diff --git a/0001-Update-kernel-headers.patch b/0001-Update-kernel-headers.patch new file mode 100644 index 0000000000000000000000000000000000000000..41d772bbf24dd99658830ecd686f00cc6147cecb --- /dev/null +++ b/0001-Update-kernel-headers.patch @@ -0,0 +1,42 @@ +From b8814f1da5d2e3fd9be301ba761d7313a82b3cd1 Mon Sep 17 00:00:00 2001 +From: Yixing Liu +Date: Thu, 22 Feb 2024 15:55:23 +0800 +Subject: [PATCH 1/2] Update kernel headers + +To commit ?? ("RDMA/hns: Support DSCP of userspace"). + +Signed-off-by: Yixing Liu +--- + kernel-headers/rdma/hns-abi.h | 9 ++++++++- + 1 file changed, 8 insertions(+), 1 deletion(-) + +diff --git a/kernel-headers/rdma/hns-abi.h b/kernel-headers/rdma/hns-abi.h +index c996e15..f77697c 100644 +--- a/kernel-headers/rdma/hns-abi.h ++++ b/kernel-headers/rdma/hns-abi.h +@@ -95,6 +95,12 @@ struct hns_roce_ib_create_qp_resp { + __aligned_u64 dwqe_mmap_key; + }; + ++struct hns_roce_ib_modify_qp_resp { ++ __u8 tc_mode; ++ __u8 priority; ++ __u8 reserved[6]; ++}; ++ + enum { + HNS_ROCE_EXSGE_FLAGS = 1 << 0, + HNS_ROCE_RQ_INLINE_FLAGS = 1 << 1, +@@ -127,7 +133,8 @@ struct hns_roce_ib_alloc_pd_resp { + + struct hns_roce_ib_create_ah_resp { + __u8 dmac[6]; +- __u8 reserved[2]; ++ __u8 priority; ++ __u8 tc_mode; + }; + + #endif /* HNS_ABI_USER_H */ +-- +2.25.1 + diff --git a/0002-libhns-Support-DSCP.patch b/0002-libhns-Support-DSCP.patch new file mode 100644 index 0000000000000000000000000000000000000000..7f8fa76aa0ff35809f771d4def5ab7d72279fec9 --- /dev/null +++ b/0002-libhns-Support-DSCP.patch @@ -0,0 +1,136 @@ +From f457a4648d8705a563be72ac736f65639de11d52 Mon Sep 17 00:00:00 2001 +From: Yixing Liu +Date: Thu, 22 Feb 2024 15:55:24 +0800 +Subject: [PATCH 2/2] libhns: Support DSCP + +driver inclusion +category: feature +bugzilla: https://gitee.com/openeuler/kernel/issues/I92J5Q + +------------------------------------------------------------------ + +This patch adds user mode DSCP function through +the mapping of dscp-tc configured in kernel mode. + +Signed-off-by: Yixing Liu +Signed-off-by: Ran Zhou +--- + providers/hns/hns_roce_u.h | 7 +++++++ + providers/hns/hns_roce_u_abi.h | 3 +++ + providers/hns/hns_roce_u_hw_v2.c | 24 ++++++++++++++++++------ + providers/hns/hns_roce_u_verbs.c | 3 +++ + 4 files changed, 31 insertions(+), 6 deletions(-) + +diff --git a/providers/hns/hns_roce_u.h b/providers/hns/hns_roce_u.h +index afb68fe..5ec2734 100644 +--- a/providers/hns/hns_roce_u.h ++++ b/providers/hns/hns_roce_u.h +@@ -182,6 +182,11 @@ enum hns_roce_pktype { + HNS_ROCE_PKTYPE_ROCE_V2_IPV4, + }; + ++enum hns_roce_tc_map_mode { ++ HNS_ROCE_TC_MAP_MODE_PRIO, ++ HNS_ROCE_TC_MAP_MODE_DSCP, ++}; ++ + struct hns_roce_db_page { + struct hns_roce_db_page *prev, *next; + struct hns_roce_buf buf; +@@ -323,6 +328,8 @@ struct hns_roce_qp { + unsigned int next_sge; + int port_num; + uint8_t sl; ++ uint8_t tc_mode; ++ uint8_t priority; + unsigned int qkey; + enum ibv_mtu path_mtu; + +diff --git a/providers/hns/hns_roce_u_abi.h b/providers/hns/hns_roce_u_abi.h +index 3f98eb3..ec47c4b 100644 +--- a/providers/hns/hns_roce_u_abi.h ++++ b/providers/hns/hns_roce_u_abi.h +@@ -64,4 +64,7 @@ DECLARE_DRV_CMD(hns_roce_create_srq_ex, IB_USER_VERBS_CMD_CREATE_XSRQ, + DECLARE_DRV_CMD(hns_roce_create_ah, IB_USER_VERBS_CMD_CREATE_AH, empty, + hns_roce_ib_create_ah_resp); + ++DECLARE_DRV_CMD(hns_roce_modify_qp_ex, IB_USER_VERBS_EX_CMD_MODIFY_QP, ++ empty, hns_roce_ib_modify_qp_resp); ++ + #endif /* _HNS_ROCE_U_ABI_H */ +diff --git a/providers/hns/hns_roce_u_hw_v2.c b/providers/hns/hns_roce_u_hw_v2.c +index daef17a..dd13049 100644 +--- a/providers/hns/hns_roce_u_hw_v2.c ++++ b/providers/hns/hns_roce_u_hw_v2.c +@@ -1523,8 +1523,12 @@ static void record_qp_attr(struct ibv_qp *qp, struct ibv_qp_attr *attr, + if (attr_mask & IBV_QP_PORT) + hr_qp->port_num = attr->port_num; + +- if (attr_mask & IBV_QP_AV) +- hr_qp->sl = attr->ah_attr.sl; ++ if (hr_qp->tc_mode == HNS_ROCE_TC_MAP_MODE_DSCP) ++ hr_qp->sl = hr_qp->priority; ++ else { ++ if (attr_mask & IBV_QP_AV) ++ hr_qp->sl = attr->ah_attr.sl; ++ } + + if (attr_mask & IBV_QP_QKEY) + hr_qp->qkey = attr->qkey; +@@ -1538,10 +1542,11 @@ static void record_qp_attr(struct ibv_qp *qp, struct ibv_qp_attr *attr, + static int hns_roce_u_v2_modify_qp(struct ibv_qp *qp, struct ibv_qp_attr *attr, + int attr_mask) + { +- int ret; +- struct ibv_modify_qp cmd; ++ struct hns_roce_modify_qp_ex_resp resp_ex = {}; ++ struct hns_roce_modify_qp_ex cmd_ex = {}; + struct hns_roce_qp *hr_qp = to_hr_qp(qp); + bool flag = false; /* modify qp to error */ ++ int ret; + + if ((attr_mask & IBV_QP_STATE) && (attr->qp_state == IBV_QPS_ERR)) { + pthread_spin_lock(&hr_qp->sq.lock); +@@ -1549,7 +1554,9 @@ static int hns_roce_u_v2_modify_qp(struct ibv_qp *qp, struct ibv_qp_attr *attr, + flag = true; + } + +- ret = ibv_cmd_modify_qp(qp, attr, attr_mask, &cmd, sizeof(cmd)); ++ ret = ibv_cmd_modify_qp_ex(qp, attr, attr_mask, &cmd_ex.ibv_cmd, ++ sizeof(cmd_ex), &resp_ex.ibv_resp, ++ sizeof(resp_ex)); + + if (flag) { + if (!ret) +@@ -1561,8 +1568,13 @@ static int hns_roce_u_v2_modify_qp(struct ibv_qp *qp, struct ibv_qp_attr *attr, + if (ret) + return ret; + +- if (attr_mask & IBV_QP_STATE) ++ if (attr_mask & IBV_QP_STATE) { + qp->state = attr->qp_state; ++ if (attr->qp_state == IBV_QPS_RTR) { ++ hr_qp->tc_mode = resp_ex.drv_payload.tc_mode; ++ hr_qp->priority = resp_ex.drv_payload.priority; ++ } ++ } + + if ((attr_mask & IBV_QP_STATE) && attr->qp_state == IBV_QPS_RESET) { + if (qp->recv_cq) +diff --git a/providers/hns/hns_roce_u_verbs.c b/providers/hns/hns_roce_u_verbs.c +index 34f7ee4..d081bb3 100644 +--- a/providers/hns/hns_roce_u_verbs.c ++++ b/providers/hns/hns_roce_u_verbs.c +@@ -1486,6 +1486,9 @@ struct ibv_ah *hns_roce_u_create_ah(struct ibv_pd *pd, struct ibv_ah_attr *attr) + ah->av.mac, NULL)) + goto err; + ++ if (resp.tc_mode == HNS_ROCE_TC_MAP_MODE_DSCP) ++ ah->av.sl = resp.priority; ++ + ah->av.udp_sport = get_ah_udp_sport(attr); + + return &ah->ibv_ah; +-- +2.25.1 + diff --git a/0003-Update-kernel-headers.patch b/0003-Update-kernel-headers.patch new file mode 100644 index 0000000000000000000000000000000000000000..2bae99845031c7ffefd9b8b3c3b795c763c50c24 --- /dev/null +++ b/0003-Update-kernel-headers.patch @@ -0,0 +1,57 @@ +From 75fdc5c9a0cce5cc6859189eeb91ce1846edc4b9 Mon Sep 17 00:00:00 2001 +From: Junxian Huang +Date: Tue, 5 Mar 2024 13:57:20 +0800 +Subject: [PATCH 3/7] Update kernel headers + +To commit: 6ec429d5887a ("RDMA/hns: Support userspace configuring +congestion control algorithm with QP granularity"). + +Signed-off-by: Junxian Huang +--- + kernel-headers/rdma/hns-abi.h | 16 ++++++++++++++++ + 1 file changed, 16 insertions(+) + +diff --git a/kernel-headers/rdma/hns-abi.h b/kernel-headers/rdma/hns-abi.h +index f77697c..39ed8a4 100644 +--- a/kernel-headers/rdma/hns-abi.h ++++ b/kernel-headers/rdma/hns-abi.h +@@ -73,6 +73,17 @@ struct hns_roce_ib_create_srq_resp { + __u32 cap_flags; /* Use enum hns_roce_srq_cap_flags */ + }; + ++enum hns_roce_congest_type_flags { ++ HNS_ROCE_CREATE_QP_FLAGS_DCQCN, ++ HNS_ROCE_CREATE_QP_FLAGS_LDCP, ++ HNS_ROCE_CREATE_QP_FLAGS_HC3, ++ HNS_ROCE_CREATE_QP_FLAGS_DIP, ++}; ++ ++enum hns_roce_create_qp_comp_mask { ++ HNS_ROCE_CREATE_QP_MASK_CONGEST_TYPE = 1 << 0, ++}; ++ + struct hns_roce_ib_create_qp { + __aligned_u64 buf_addr; + __aligned_u64 db_addr; +@@ -81,6 +92,9 @@ struct hns_roce_ib_create_qp { + __u8 sq_no_prefetch; + __u8 reserved[5]; + __aligned_u64 sdb_addr; ++ __aligned_u64 comp_mask; /* Use enum hns_roce_create_qp_comp_mask */ ++ __aligned_u64 create_flags; ++ __aligned_u64 cong_type_flags; + }; + + enum hns_roce_qp_cap_flags { +@@ -120,6 +134,8 @@ struct hns_roce_ib_alloc_ucontext_resp { + __u32 reserved; + __u32 config; + __u32 max_inline_data; ++ __u8 congest_type; ++ __u8 reserved0[7]; + }; + + struct hns_roce_ib_alloc_ucontext { +-- +2.33.0 + diff --git a/0004-libhns-Introduce-hns-direct-verbs.patch b/0004-libhns-Introduce-hns-direct-verbs.patch new file mode 100644 index 0000000000000000000000000000000000000000..0a044920491f73b487f56555c2a68ab9bb5c6e2f --- /dev/null +++ b/0004-libhns-Introduce-hns-direct-verbs.patch @@ -0,0 +1,466 @@ +From 4822f5d7166996c1a619f7c51d156a029e85dd53 Mon Sep 17 00:00:00 2001 +From: Junxian Huang +Date: Tue, 5 Mar 2024 13:57:22 +0800 +Subject: [PATCH 4/7] libhns: Introduce hns direct verbs + +driver inclusion +category: feature +bugzilla: https://gitee.com/openeuler/kernel/issues/I95UWO + +------------------------------------------------------------------ +Introduce the frame of hns direct verbs, including hnsdv_is_supported(), +hnsdv_create_qp() and hnsdv_query_device(). + +Signed-off-by: Junxian Huang +Signed-off-by: Ran Zhou +--- + debian/ibverbs-providers.install | 1 + + debian/ibverbs-providers.lintian-overrides | 4 +- + debian/ibverbs-providers.symbols | 6 ++ + debian/libibverbs-dev.install | 4 ++ + providers/hns/CMakeLists.txt | 9 ++- + providers/hns/hns_roce_u.c | 13 ++++ + providers/hns/hns_roce_u.h | 2 + + providers/hns/hns_roce_u_abi.h | 1 + + providers/hns/hns_roce_u_verbs.c | 69 ++++++++++++++++++++-- + providers/hns/hnsdv.h | 37 ++++++++++++ + providers/hns/libhns.map | 9 +++ + redhat/rdma-core.spec | 5 +- + suse/rdma-core.spec | 21 ++++++- + 13 files changed, 171 insertions(+), 10 deletions(-) + create mode 100644 providers/hns/hnsdv.h + create mode 100644 providers/hns/libhns.map + +diff --git a/debian/ibverbs-providers.install b/debian/ibverbs-providers.install +index a003a30..fea15e0 100644 +--- a/debian/ibverbs-providers.install ++++ b/debian/ibverbs-providers.install +@@ -1,6 +1,7 @@ + etc/libibverbs.d/ + usr/lib/*/libefa.so.* + usr/lib/*/libibverbs/lib*-rdmav*.so ++usr/lib/*/libhns.so.* + usr/lib/*/libmana.so.* + usr/lib/*/libmlx4.so.* + usr/lib/*/libmlx5.so.* +diff --git a/debian/ibverbs-providers.lintian-overrides b/debian/ibverbs-providers.lintian-overrides +index 5815058..fd73a76 100644 +--- a/debian/ibverbs-providers.lintian-overrides ++++ b/debian/ibverbs-providers.lintian-overrides +@@ -1,2 +1,2 @@ +-# libefa, libmana, libmlx4 and libmlx5 are ibverbs provider that provides more functions. +-ibverbs-providers: package-name-doesnt-match-sonames libefa1 libmana1 libmlx4-1 libmlx5-1 ++# libefa, libhns, libmana, libmlx4 and libmlx5 are ibverbs provider that provides more functions. ++ibverbs-providers: package-name-doesnt-match-sonames libefa1 libhns-1 libmana1 libmlx4-1 libmlx5-1 +diff --git a/debian/ibverbs-providers.symbols b/debian/ibverbs-providers.symbols +index 72361bd..d2c0989 100644 +--- a/debian/ibverbs-providers.symbols ++++ b/debian/ibverbs-providers.symbols +@@ -174,6 +174,12 @@ libefa.so.1 ibverbs-providers #MINVER# + efadv_cq_from_ibv_cq_ex@EFA_1.2 43 + efadv_create_cq@EFA_1.2 43 + efadv_query_mr@EFA_1.3 50 ++libhns.so.1 ibverbs-providers #MINVER# ++* Build-Depends-Package: libibverbs-dev ++ HNS_1.0@HNS_1.0 51 ++ hnsdv_is_supported@HNS_1.0 51 ++ hnsdv_create_qp@HNS_1.0 51 ++ hnsdv_query_device@HNS_1.0 51 + libmana.so.1 ibverbs-providers #MINVER# + * Build-Depends-Package: libibverbs-dev + MANA_1.0@MANA_1.0 41 +diff --git a/debian/libibverbs-dev.install b/debian/libibverbs-dev.install +index 5f2ffd5..ef5b9a4 100644 +--- a/debian/libibverbs-dev.install ++++ b/debian/libibverbs-dev.install +@@ -1,5 +1,6 @@ + usr/include/infiniband/arch.h + usr/include/infiniband/efadv.h ++usr/include/infiniband/hnsdv.h + usr/include/infiniband/ib_user_ioctl_verbs.h + usr/include/infiniband/manadv.h + usr/include/infiniband/mlx4dv.h +@@ -15,6 +16,8 @@ usr/include/infiniband/verbs_api.h + usr/lib/*/lib*-rdmav*.a + usr/lib/*/libefa.a + usr/lib/*/libefa.so ++usr/lib/*/libhns.a ++usr/lib/*/libhns.so + usr/lib/*/libibverbs*.so + usr/lib/*/libibverbs.a + usr/lib/*/libmana.a +@@ -24,6 +27,7 @@ usr/lib/*/libmlx4.so + usr/lib/*/libmlx5.a + usr/lib/*/libmlx5.so + usr/lib/*/pkgconfig/libefa.pc ++usr/lib/*/pkgconfig/libhns.pc + usr/lib/*/pkgconfig/libibverbs.pc + usr/lib/*/pkgconfig/libmana.pc + usr/lib/*/pkgconfig/libmlx4.pc +diff --git a/providers/hns/CMakeLists.txt b/providers/hns/CMakeLists.txt +index 7aaca75..58139ae 100644 +--- a/providers/hns/CMakeLists.txt ++++ b/providers/hns/CMakeLists.txt +@@ -1,7 +1,14 @@ +-rdma_provider(hns ++rdma_shared_provider(hns libhns.map ++ 1 1.0.${PACKAGE_VERSION} + hns_roce_u.c + hns_roce_u_buf.c + hns_roce_u_db.c + hns_roce_u_hw_v2.c + hns_roce_u_verbs.c + ) ++ ++publish_headers(infiniband ++ hnsdv.h ++) ++ ++rdma_pkg_config("hns" "libibverbs" "${CMAKE_THREAD_LIBS_INIT}") +diff --git a/providers/hns/hns_roce_u.c b/providers/hns/hns_roce_u.c +index 266e73e..0b254fb 100644 +--- a/providers/hns/hns_roce_u.c ++++ b/providers/hns/hns_roce_u.c +@@ -216,4 +216,17 @@ static const struct verbs_device_ops hns_roce_dev_ops = { + .uninit_device = hns_uninit_device, + .alloc_context = hns_roce_alloc_context, + }; ++ ++bool is_hns_dev(struct ibv_device *device) ++{ ++ struct verbs_device *verbs_device = verbs_get_device(device); ++ ++ return verbs_device->ops == &hns_roce_dev_ops; ++} ++ ++bool hnsdv_is_supported(struct ibv_device *device) ++{ ++ return is_hns_dev(device); ++} ++ + PROVIDER_DRIVER(hns, hns_roce_dev_ops); +diff --git a/providers/hns/hns_roce_u.h b/providers/hns/hns_roce_u.h +index 5ec2734..99fa23f 100644 +--- a/providers/hns/hns_roce_u.h ++++ b/providers/hns/hns_roce_u.h +@@ -501,6 +501,8 @@ void hns_roce_free_qp_buf(struct hns_roce_qp *qp, struct hns_roce_context *ctx); + + void hns_roce_init_qp_indices(struct hns_roce_qp *qp); + ++bool is_hns_dev(struct ibv_device *device); ++ + extern const struct hns_roce_u_hw hns_roce_u_hw_v2; + + #endif /* _HNS_ROCE_U_H */ +diff --git a/providers/hns/hns_roce_u_abi.h b/providers/hns/hns_roce_u_abi.h +index ec47c4b..7e9bbc1 100644 +--- a/providers/hns/hns_roce_u_abi.h ++++ b/providers/hns/hns_roce_u_abi.h +@@ -36,6 +36,7 @@ + #include + #include + #include ++#include "hnsdv.h" + + DECLARE_DRV_CMD(hns_roce_alloc_pd, IB_USER_VERBS_CMD_ALLOC_PD, + empty, hns_roce_ib_alloc_pd_resp); +diff --git a/providers/hns/hns_roce_u_verbs.c b/providers/hns/hns_roce_u_verbs.c +index d081bb3..997b7e0 100644 +--- a/providers/hns/hns_roce_u_verbs.c ++++ b/providers/hns/hns_roce_u_verbs.c +@@ -785,6 +785,25 @@ int hns_roce_u_destroy_srq(struct ibv_srq *ibv_srq) + return 0; + } + ++enum { ++ HNSDV_QP_SUP_COMP_MASK = 0, ++}; ++ ++static int check_hnsdv_qp_attr(struct hns_roce_context *ctx, ++ struct hnsdv_qp_init_attr *hns_attr) ++{ ++ if (!hns_attr) ++ return 0; ++ ++ if (!check_comp_mask(hns_attr->comp_mask, HNSDV_QP_SUP_COMP_MASK)) { ++ verbs_err(&ctx->ibv_ctx, "invalid hnsdv comp_mask 0x%x.\n", ++ hns_attr->comp_mask); ++ return EINVAL; ++ } ++ ++ return 0; ++} ++ + enum { + CREATE_QP_SUP_COMP_MASK = IBV_QP_INIT_ATTR_PD | IBV_QP_INIT_ATTR_XRCD | + IBV_QP_INIT_ATTR_SEND_OPS_FLAGS, +@@ -866,7 +885,8 @@ static int verify_qp_create_cap(struct hns_roce_context *ctx, + } + + static int verify_qp_create_attr(struct hns_roce_context *ctx, +- struct ibv_qp_init_attr_ex *attr) ++ struct ibv_qp_init_attr_ex *attr, ++ struct hnsdv_qp_init_attr *hns_attr) + { + int ret; + +@@ -874,6 +894,10 @@ static int verify_qp_create_attr(struct hns_roce_context *ctx, + if (ret) + return ret; + ++ ret = check_hnsdv_qp_attr(ctx, hns_attr); ++ if (ret) ++ return ret; ++ + return verify_qp_create_cap(ctx, attr); + } + +@@ -1274,14 +1298,15 @@ static int mmap_dwqe(struct ibv_context *ibv_ctx, struct hns_roce_qp *qp, + } + + static struct ibv_qp *create_qp(struct ibv_context *ibv_ctx, +- struct ibv_qp_init_attr_ex *attr) ++ struct ibv_qp_init_attr_ex *attr, ++ struct hnsdv_qp_init_attr *hns_attr) + { + struct hns_roce_context *context = to_hr_ctx(ibv_ctx); + struct hns_roce_qp *qp; + uint64_t dwqe_mmap_key; + int ret; + +- ret = verify_qp_create_attr(context, attr); ++ ret = verify_qp_create_attr(context, attr, hns_attr); + if (ret) + goto err; + +@@ -1345,7 +1370,7 @@ struct ibv_qp *hns_roce_u_create_qp(struct ibv_pd *pd, + attrx.comp_mask = IBV_QP_INIT_ATTR_PD; + attrx.pd = pd; + +- qp = create_qp(pd->context, &attrx); ++ qp = create_qp(pd->context, &attrx, NULL); + if (qp) + memcpy(attr, &attrx, sizeof(*attr)); + +@@ -1355,7 +1380,41 @@ struct ibv_qp *hns_roce_u_create_qp(struct ibv_pd *pd, + struct ibv_qp *hns_roce_u_create_qp_ex(struct ibv_context *context, + struct ibv_qp_init_attr_ex *attr) + { +- return create_qp(context, attr); ++ return create_qp(context, attr, NULL); ++} ++ ++struct ibv_qp *hnsdv_create_qp(struct ibv_context *context, ++ struct ibv_qp_init_attr_ex *qp_attr, ++ struct hnsdv_qp_init_attr *hns_attr) ++{ ++ if (!context || !qp_attr) { ++ errno = EINVAL; ++ return NULL; ++ } ++ ++ if (!is_hns_dev(context->device)) { ++ errno = EOPNOTSUPP; ++ return NULL; ++ } ++ ++ return create_qp(context, qp_attr, hns_attr); ++} ++ ++int hnsdv_query_device(struct ibv_context *context, ++ struct hnsdv_context *attrs_out) ++{ ++ struct hns_roce_context *ctx = context ? to_hr_ctx(context) : NULL; ++ ++ if (!ctx || !attrs_out) ++ return EINVAL; ++ ++ if (!is_hns_dev(context->device)) { ++ verbs_err(verbs_get_ctx(context), "not a HNS RoCE device!\n"); ++ return EOPNOTSUPP; ++ } ++ memset(attrs_out, 0, sizeof(*attrs_out)); ++ ++ return 0; + } + + struct ibv_qp *hns_roce_u_open_qp(struct ibv_context *context, +diff --git a/providers/hns/hnsdv.h b/providers/hns/hnsdv.h +new file mode 100644 +index 0000000..49ba08a +--- /dev/null ++++ b/providers/hns/hnsdv.h +@@ -0,0 +1,37 @@ ++/* SPDX-License-Identifier: GPL-2.0 OR BSD-2-Clause */ ++/* ++ * Copyright (c) 2024 Hisilicon Limited. ++ */ ++ ++#ifndef __HNSDV_H__ ++#define __HNSDV_H__ ++ ++#include ++#include ++#include ++#include ++ ++#ifdef __cplusplus ++extern "C" { ++#endif ++ ++struct hnsdv_qp_init_attr { ++ uint64_t comp_mask; ++}; ++ ++struct hnsdv_context { ++ uint64_t comp_mask; ++}; ++ ++bool hnsdv_is_supported(struct ibv_device *device); ++int hnsdv_query_device(struct ibv_context *ctx_in, ++ struct hnsdv_context *attrs_out); ++struct ibv_qp *hnsdv_create_qp(struct ibv_context *context, ++ struct ibv_qp_init_attr_ex *qp_attr, ++ struct hnsdv_qp_init_attr *hns_qp_attr); ++ ++#ifdef __cplusplus ++} ++#endif ++ ++#endif /* __HNSDV_H__ */ +diff --git a/providers/hns/libhns.map b/providers/hns/libhns.map +new file mode 100644 +index 0000000..e9bf417 +--- /dev/null ++++ b/providers/hns/libhns.map +@@ -0,0 +1,9 @@ ++/* Export symbols should be added below according to ++ Documentation/versioning.md document. */ ++HNS_1.0 { ++ global: ++ hnsdv_is_supported; ++ hnsdv_create_qp; ++ hnsdv_query_device; ++ local: *; ++}; +diff --git a/redhat/rdma-core.spec b/redhat/rdma-core.spec +index c6ddcfd..c347195 100644 +--- a/redhat/rdma-core.spec ++++ b/redhat/rdma-core.spec +@@ -158,6 +158,8 @@ Provides: liberdma = %{version}-%{release} + Obsoletes: liberdma < %{version}-%{release} + Provides: libhfi1 = %{version}-%{release} + Obsoletes: libhfi1 < %{version}-%{release} ++Provides: libhns = %{version}-%{release} ++Obsoletes: libhns < %{version}-%{release} + Provides: libipathverbs = %{version}-%{release} + Obsoletes: libipathverbs < %{version}-%{release} + Provides: libirdma = %{version}-%{release} +@@ -188,7 +190,7 @@ Device-specific plug-in ibverbs userspace drivers are included: + - libefa: Amazon Elastic Fabric Adapter + - liberdma: Alibaba Elastic RDMA (iWarp) Adapter + - libhfi1: Intel Omni-Path HFI +-- libhns: HiSilicon Hip06 SoC ++- libhns: HiSilicon Hip08+ SoC + - libipathverbs: QLogic InfiniPath HCA + - libirdma: Intel Ethernet Connection RDMA + - libmana: Microsoft Azure Network Adapter +@@ -575,6 +577,7 @@ fi + %dir %{_sysconfdir}/libibverbs.d + %dir %{_libdir}/libibverbs + %{_libdir}/libefa.so.* ++%{_libdir}/libhns.so.* + %{_libdir}/libibverbs*.so.* + %{_libdir}/libibverbs/*.so + %{_libdir}/libmana.so.* +diff --git a/suse/rdma-core.spec b/suse/rdma-core.spec +index d534dbc..f2bd0c0 100644 +--- a/suse/rdma-core.spec ++++ b/suse/rdma-core.spec +@@ -35,6 +35,7 @@ License: BSD-2-Clause OR GPL-2.0-only + Group: Productivity/Networking/Other + + %define efa_so_major 1 ++%define hns_so_major 1 + %define verbs_so_major 1 + %define rdmacm_so_major 1 + %define umad_so_major 3 +@@ -45,6 +46,7 @@ Group: Productivity/Networking/Other + %define mad_major 5 + + %define efa_lname libefa%{efa_so_major} ++%define hns_lname libhns%{hns_so_major} + %define verbs_lname libibverbs%{verbs_so_major} + %define rdmacm_lname librdmacm%{rdmacm_so_major} + %define umad_lname libibumad%{umad_so_major} +@@ -159,6 +161,7 @@ Requires: %{umad_lname} = %{version}-%{release} + Requires: %{verbs_lname} = %{version}-%{release} + %if 0%{?dma_coherent} + Requires: %{efa_lname} = %{version}-%{release} ++Requires: %{hns_lname} = %{version}-%{release} + Requires: %{mana_lname} = %{version}-%{release} + Requires: %{mlx4_lname} = %{version}-%{release} + Requires: %{mlx5_lname} = %{version}-%{release} +@@ -200,6 +203,7 @@ Requires: %{name}%{?_isa} = %{version}-%{release} + Obsoletes: libcxgb4-rdmav2 < %{version}-%{release} + Obsoletes: libefa-rdmav2 < %{version}-%{release} + Obsoletes: libhfi1verbs-rdmav2 < %{version}-%{release} ++Obsoletes: libhns-rdmav2 < %{version}-%{release} + Obsoletes: libipathverbs-rdmav2 < %{version}-%{release} + Obsoletes: libmana-rdmav2 < %{version}-%{release} + Obsoletes: libmlx4-rdmav2 < %{version}-%{release} +@@ -209,6 +213,7 @@ Obsoletes: libocrdma-rdmav2 < %{version}-%{release} + Obsoletes: librxe-rdmav2 < %{version}-%{release} + %if 0%{?dma_coherent} + Requires: %{efa_lname} = %{version}-%{release} ++Requires: %{hns_lname} = %{version}-%{release} + Requires: %{mana_lname} = %{version}-%{release} + Requires: %{mlx4_lname} = %{version}-%{release} + Requires: %{mlx5_lname} = %{version}-%{release} +@@ -228,7 +233,7 @@ Device-specific plug-in ibverbs userspace drivers are included: + - libcxgb4: Chelsio T4 iWARP HCA + - libefa: Amazon Elastic Fabric Adapter + - libhfi1: Intel Omni-Path HFI +-- libhns: HiSilicon Hip06 SoC ++- libhns: HiSilicon Hip08+ SoC + - libipathverbs: QLogic InfiniPath HCA + - libirdma: Intel Ethernet Connection RDMA + - libmana: Microsoft Azure Network Adapter +@@ -256,6 +261,13 @@ Group: System/Libraries + %description -n %efa_lname + This package contains the efa runtime library. + ++%package -n %hns_lname ++Summary: HNS runtime library ++Group: System/Libraries ++ ++%description -n %hns_lname ++This package contains the hns runtime library. ++ + %package -n %mana_lname + Summary: MANA runtime library + Group: System/Libraries +@@ -508,6 +520,9 @@ rm -rf %{buildroot}/%{_sbindir}/srp_daemon.sh + %post -n %efa_lname -p /sbin/ldconfig + %postun -n %efa_lname -p /sbin/ldconfig + ++%post -n %hns_lname -p /sbin/ldconfig ++%postun -n %hns_lname -p /sbin/ldconfig ++ + %post -n %mana_lname -p /sbin/ldconfig + %postun -n %mana_lname -p /sbin/ldconfig + +@@ -700,6 +715,10 @@ done + %files -n %efa_lname + %{_libdir}/libefa*.so.* + ++%files -n %hns_lname ++%defattr(-,root,root) ++%{_libdir}/libhns*.so.* ++ + %files -n %mana_lname + %{_libdir}/libmana*.so.* + +-- +2.33.0 + diff --git a/0005-libhns-Encapsulate-context-attribute-setting-into-a-.patch b/0005-libhns-Encapsulate-context-attribute-setting-into-a-.patch new file mode 100644 index 0000000000000000000000000000000000000000..a111ed5fbe1498867397d92951d95e3c22625199 --- /dev/null +++ b/0005-libhns-Encapsulate-context-attribute-setting-into-a-.patch @@ -0,0 +1,139 @@ +From 4deb1a1a9b181d481f51a989b5c173857da87c44 Mon Sep 17 00:00:00 2001 +From: Junxian Huang +Date: Tue, 5 Mar 2024 13:57:23 +0800 +Subject: [PATCH] libhns: Encapsulate context attribute setting into a single + function + +driver inclusion +category: feature +bugzilla: https://gitee.com/openeuler/kernel/issues/I95UWO + +------------------------------------------------------------------ +This patch doesn't involve functional changes. Just encapsulate context +attribute setting into a single function set_context_attr() to make +hns_roce_alloc_context() more readable. + +Signed-off-by: Junxian Huang +Signed-off-by: Ran Zhou +--- + providers/hns/hns_roce_u.c | 69 ++++++++++++++++++++++---------------- + 1 file changed, 40 insertions(+), 29 deletions(-) + +diff --git a/providers/hns/hns_roce_u.c b/providers/hns/hns_roce_u.c +index 0b254fb..69f7d3f 100644 +--- a/providers/hns/hns_roce_u.c ++++ b/providers/hns/hns_roce_u.c +@@ -97,50 +97,33 @@ static uint32_t calc_table_shift(uint32_t entry_count, uint32_t size_shift) + return count_shift > size_shift ? count_shift - size_shift : 0; + } + +-static struct verbs_context *hns_roce_alloc_context(struct ibv_device *ibdev, +- int cmd_fd, +- void *private_data) ++static int set_context_attr(struct hns_roce_device *hr_dev, ++ struct hns_roce_context *context, ++ struct hns_roce_alloc_ucontext_resp *resp) + { +- struct hns_roce_device *hr_dev = to_hr_dev(ibdev); +- struct hns_roce_alloc_ucontext_resp resp = {}; +- struct hns_roce_alloc_ucontext cmd = {}; + struct ibv_device_attr dev_attrs; +- struct hns_roce_context *context; + int i; + +- context = verbs_init_and_alloc_context(ibdev, cmd_fd, context, ibv_ctx, +- RDMA_DRIVER_HNS); +- if (!context) +- return NULL; +- +- cmd.config |= HNS_ROCE_EXSGE_FLAGS | HNS_ROCE_RQ_INLINE_FLAGS | +- HNS_ROCE_CQE_INLINE_FLAGS; +- if (ibv_cmd_get_context(&context->ibv_ctx, &cmd.ibv_cmd, sizeof(cmd), +- &resp.ibv_resp, sizeof(resp))) +- goto err_free; +- +- if (!resp.cqe_size) ++ if (!resp->cqe_size) + context->cqe_size = HNS_ROCE_CQE_SIZE; +- else if (resp.cqe_size <= HNS_ROCE_V3_CQE_SIZE) +- context->cqe_size = resp.cqe_size; ++ else if (resp->cqe_size <= HNS_ROCE_V3_CQE_SIZE) ++ context->cqe_size = resp->cqe_size; + else + context->cqe_size = HNS_ROCE_V3_CQE_SIZE; + +- context->config = resp.config; +- if (resp.config & HNS_ROCE_RSP_EXSGE_FLAGS) +- context->max_inline_data = resp.max_inline_data; ++ context->config = resp->config; ++ if (resp->config & HNS_ROCE_RSP_EXSGE_FLAGS) ++ context->max_inline_data = resp->max_inline_data; + +- context->qp_table_shift = calc_table_shift(resp.qp_tab_size, ++ context->qp_table_shift = calc_table_shift(resp->qp_tab_size, + HNS_ROCE_QP_TABLE_BITS); + context->qp_table_mask = (1 << context->qp_table_shift) - 1; +- pthread_mutex_init(&context->qp_table_mutex, NULL); + for (i = 0; i < HNS_ROCE_QP_TABLE_SIZE; ++i) + context->qp_table[i].refcnt = 0; + +- context->srq_table_shift = calc_table_shift(resp.srq_tab_size, ++ context->srq_table_shift = calc_table_shift(resp->srq_tab_size, + HNS_ROCE_SRQ_TABLE_BITS); + context->srq_table_mask = (1 << context->srq_table_shift) - 1; +- pthread_mutex_init(&context->srq_table_mutex, NULL); + for (i = 0; i < HNS_ROCE_SRQ_TABLE_SIZE; ++i) + context->srq_table[i].refcnt = 0; + +@@ -149,7 +132,7 @@ static struct verbs_context *hns_roce_alloc_context(struct ibv_device *ibdev, + struct ibv_device_attr_ex, + orig_attr), + sizeof(dev_attrs))) +- goto err_free; ++ return EIO; + + hr_dev->hw_version = dev_attrs.hw_ver; + context->max_qp_wr = dev_attrs.max_qp_wr; +@@ -158,11 +141,39 @@ static struct verbs_context *hns_roce_alloc_context(struct ibv_device *ibdev, + context->max_srq_wr = dev_attrs.max_srq_wr; + context->max_srq_sge = dev_attrs.max_srq_sge; + ++ return 0; ++} ++ ++static struct verbs_context *hns_roce_alloc_context(struct ibv_device *ibdev, ++ int cmd_fd, ++ void *private_data) ++{ ++ struct hns_roce_device *hr_dev = to_hr_dev(ibdev); ++ struct hns_roce_alloc_ucontext_resp resp = {}; ++ struct hns_roce_alloc_ucontext cmd = {}; ++ struct hns_roce_context *context; ++ ++ context = verbs_init_and_alloc_context(ibdev, cmd_fd, context, ibv_ctx, ++ RDMA_DRIVER_HNS); ++ if (!context) ++ return NULL; ++ ++ cmd.config |= HNS_ROCE_EXSGE_FLAGS | HNS_ROCE_RQ_INLINE_FLAGS | ++ HNS_ROCE_CQE_INLINE_FLAGS; ++ if (ibv_cmd_get_context(&context->ibv_ctx, &cmd.ibv_cmd, sizeof(cmd), ++ &resp.ibv_resp, sizeof(resp))) ++ goto err_free; ++ ++ if (set_context_attr(hr_dev, context, &resp)) ++ goto err_free; ++ + context->uar = mmap(NULL, hr_dev->page_size, PROT_READ | PROT_WRITE, + MAP_SHARED, cmd_fd, 0); + if (context->uar == MAP_FAILED) + goto err_free; + ++ pthread_mutex_init(&context->qp_table_mutex, NULL); ++ pthread_mutex_init(&context->srq_table_mutex, NULL); + pthread_spin_init(&context->uar_lock, PTHREAD_PROCESS_PRIVATE); + + verbs_set_ops(&context->ibv_ctx, &hns_common_ops); +-- +2.33.0 + diff --git a/0006-libhns-Support-congestion-control-algorithm-configur.patch b/0006-libhns-Support-congestion-control-algorithm-configur.patch new file mode 100644 index 0000000000000000000000000000000000000000..7af97c46d6d1eea5833dfa6d0fb1e453e12ebf61 --- /dev/null +++ b/0006-libhns-Support-congestion-control-algorithm-configur.patch @@ -0,0 +1,185 @@ +From 4cc15f4ef3dadb3219719376822cf427df338f2a Mon Sep 17 00:00:00 2001 +From: Junxian Huang +Date: Tue, 5 Mar 2024 13:57:24 +0800 +Subject: [PATCH 7/7] libhns: Support congestion control algorithm + configuration with direct verbs + +driver inclusion +category: feature +bugzilla: https://gitee.com/openeuler/kernel/issues/I95UWO + +------------------------------------------------------------------ +Add support for configuration of congestion control algorithms in QP +granularity with direct verbs hnsdv_create_qp(). + +Signed-off-by: Junxian Huang +Signed-off-by: Ran Zhou +--- + providers/hns/hns_roce_u.c | 1 + + providers/hns/hns_roce_u.h | 1 + + providers/hns/hns_roce_u_verbs.c | 45 ++++++++++++++++++++++++++++---- + providers/hns/hnsdv.h | 25 ++++++++++++++++-- + 4 files changed, 65 insertions(+), 7 deletions(-) + +diff --git a/providers/hns/hns_roce_u.c b/providers/hns/hns_roce_u.c +index 69f7d3f..90f250e 100644 +--- a/providers/hns/hns_roce_u.c ++++ b/providers/hns/hns_roce_u.c +@@ -135,6 +135,7 @@ static int set_context_attr(struct hns_roce_device *hr_dev, + return EIO; + + hr_dev->hw_version = dev_attrs.hw_ver; ++ hr_dev->congest_cap = resp->congest_type; + context->max_qp_wr = dev_attrs.max_qp_wr; + context->max_sge = dev_attrs.max_sge; + context->max_cqe = dev_attrs.max_cqe; +diff --git a/providers/hns/hns_roce_u.h b/providers/hns/hns_roce_u.h +index 99fa23f..c73e5c0 100644 +--- a/providers/hns/hns_roce_u.h ++++ b/providers/hns/hns_roce_u.h +@@ -158,6 +158,7 @@ struct hns_roce_device { + int page_size; + const struct hns_roce_u_hw *u_hw; + int hw_version; ++ uint8_t congest_cap; + }; + + struct hns_roce_buf { +diff --git a/providers/hns/hns_roce_u_verbs.c b/providers/hns/hns_roce_u_verbs.c +index 997b7e0..dcdc722 100644 +--- a/providers/hns/hns_roce_u_verbs.c ++++ b/providers/hns/hns_roce_u_verbs.c +@@ -786,7 +786,7 @@ int hns_roce_u_destroy_srq(struct ibv_srq *ibv_srq) + } + + enum { +- HNSDV_QP_SUP_COMP_MASK = 0, ++ HNSDV_QP_SUP_COMP_MASK = HNSDV_QP_INIT_ATTR_MASK_QP_CONGEST_TYPE, + }; + + static int check_hnsdv_qp_attr(struct hns_roce_context *ctx, +@@ -1209,10 +1209,33 @@ static int hns_roce_store_qp(struct hns_roce_context *ctx, + return 0; + } + ++static int to_cmd_cong_type(uint8_t cong_type, __u64 *cmd_cong_type) ++{ ++ switch (cong_type) { ++ case HNSDV_QP_CREATE_ENABLE_DCQCN: ++ *cmd_cong_type = HNS_ROCE_CREATE_QP_FLAGS_DCQCN; ++ break; ++ case HNSDV_QP_CREATE_ENABLE_LDCP: ++ *cmd_cong_type = HNS_ROCE_CREATE_QP_FLAGS_LDCP; ++ break; ++ case HNSDV_QP_CREATE_ENABLE_HC3: ++ *cmd_cong_type = HNS_ROCE_CREATE_QP_FLAGS_HC3; ++ break; ++ case HNSDV_QP_CREATE_ENABLE_DIP: ++ *cmd_cong_type = HNS_ROCE_CREATE_QP_FLAGS_DIP; ++ break; ++ default: ++ return EINVAL; ++ } ++ ++ return 0; ++} ++ + static int qp_exec_create_cmd(struct ibv_qp_init_attr_ex *attr, + struct hns_roce_qp *qp, + struct hns_roce_context *ctx, +- uint64_t *dwqe_mmap_key) ++ uint64_t *dwqe_mmap_key, ++ struct hnsdv_qp_init_attr *hns_attr) + { + struct hns_roce_create_qp_ex_resp resp_ex = {}; + struct hns_roce_create_qp_ex cmd_ex = {}; +@@ -1224,6 +1247,15 @@ static int qp_exec_create_cmd(struct ibv_qp_init_attr_ex *attr, + cmd_ex.log_sq_stride = qp->sq.wqe_shift; + cmd_ex.log_sq_bb_count = hr_ilog32(qp->sq.wqe_cnt); + ++ if (hns_attr && ++ hns_attr->comp_mask & HNSDV_QP_INIT_ATTR_MASK_QP_CONGEST_TYPE) { ++ ret = to_cmd_cong_type(hns_attr->congest_type, ++ &cmd_ex.cong_type_flags); ++ if (ret) ++ return ret; ++ cmd_ex.comp_mask |= HNS_ROCE_CREATE_QP_MASK_CONGEST_TYPE; ++ } ++ + ret = ibv_cmd_create_qp_ex2(&ctx->ibv_ctx.context, &qp->verbs_qp, attr, + &cmd_ex.ibv_cmd, sizeof(cmd_ex), + &resp_ex.ibv_resp, sizeof(resp_ex)); +@@ -1322,7 +1354,7 @@ static struct ibv_qp *create_qp(struct ibv_context *ibv_ctx, + if (ret) + goto err_buf; + +- ret = qp_exec_create_cmd(attr, qp, context, &dwqe_mmap_key); ++ ret = qp_exec_create_cmd(attr, qp, context, &dwqe_mmap_key, hns_attr); + if (ret) + goto err_cmd; + +@@ -1403,9 +1435,9 @@ struct ibv_qp *hnsdv_create_qp(struct ibv_context *context, + int hnsdv_query_device(struct ibv_context *context, + struct hnsdv_context *attrs_out) + { +- struct hns_roce_context *ctx = context ? to_hr_ctx(context) : NULL; ++ struct hns_roce_device *hr_dev = to_hr_dev(context->device); + +- if (!ctx || !attrs_out) ++ if (!hr_dev || !attrs_out) + return EINVAL; + + if (!is_hns_dev(context->device)) { +@@ -1414,6 +1446,9 @@ int hnsdv_query_device(struct ibv_context *context, + } + memset(attrs_out, 0, sizeof(*attrs_out)); + ++ attrs_out->comp_mask |= HNSDV_CONTEXT_MASK_CONGEST_TYPE; ++ attrs_out->congest_type = hr_dev->congest_cap; ++ + return 0; + } + +diff --git a/providers/hns/hnsdv.h b/providers/hns/hnsdv.h +index 49ba08a..451b26e 100644 +--- a/providers/hns/hnsdv.h ++++ b/providers/hns/hnsdv.h +@@ -15,12 +15,33 @@ + extern "C" { + #endif + ++enum hnsdv_qp_congest_ctrl_type { ++ HNSDV_QP_CREATE_ENABLE_DCQCN = 1 << 0, ++ HNSDV_QP_CREATE_ENABLE_LDCP = 1 << 1, ++ HNSDV_QP_CREATE_ENABLE_HC3 = 1 << 2, ++ HNSDV_QP_CREATE_ENABLE_DIP = 1 << 3, ++}; ++ ++enum hnsdv_qp_init_attr_mask { ++ HNSDV_QP_INIT_ATTR_MASK_QP_CONGEST_TYPE = 1 << 1, ++}; ++ + struct hnsdv_qp_init_attr { +- uint64_t comp_mask; ++ uint64_t comp_mask; /* Use enum hnsdv_qp_init_attr_mask */ ++ uint32_t create_flags; ++ uint8_t congest_type; /* Use enum hnsdv_qp_congest_ctrl_type */ ++ uint8_t reserved[3]; ++}; ++ ++enum hnsdv_query_context_comp_mask { ++ HNSDV_CONTEXT_MASK_CONGEST_TYPE = 1 << 0, + }; + + struct hnsdv_context { +- uint64_t comp_mask; ++ uint64_t comp_mask; /* Use enum hnsdv_query_context_comp_mask */ ++ uint64_t flags; ++ uint8_t congest_type; /* Use enum hnsdv_qp_congest_ctrl_type */ ++ uint8_t reserved[7]; + }; + + bool hnsdv_is_supported(struct ibv_device *device); +-- +2.33.0 + diff --git a/0007-libhns-Add-support-for-thread-domain-and-parent-doma.patch b/0007-libhns-Add-support-for-thread-domain-and-parent-doma.patch new file mode 100644 index 0000000000000000000000000000000000000000..137e4a3e14bcb57608227e1009303fb9f38a7884 --- /dev/null +++ b/0007-libhns-Add-support-for-thread-domain-and-parent-doma.patch @@ -0,0 +1,372 @@ +From 510ebb10167a964ddb02bc1a6df90ea767d611e9 Mon Sep 17 00:00:00 2001 +From: zzry <1245464216@qq.com> +Date: Fri, 8 Mar 2024 15:05:55 +0800 +Subject: [PATCH 07/10] libhns: Add support for thread domain and parent domain + +driver inclusion +category: feature +bugzilla: https://gitee.com/src-openeuler/rdma-core/issues/I97WST + +------------------------------------------------------------------ + +Add support for thread domain (TD) and parent domain (PAD). +Extend the orginal hns_roce_pd struct to hns_roce_pad by +adding the new hns_roce_td struct. When a parent domain +holds a thread domain, the associated data path will be set +to lock-free mode to improve performance. + +Signed-off-by: Yixing Liu +Signed-off-by: Junxian Huang +--- + providers/hns/hns_roce_u.c | 5 +- + providers/hns/hns_roce_u.h | 69 +++++++++++++- + providers/hns/hns_roce_u_verbs.c | 156 ++++++++++++++++++++++++++++--- + 3 files changed, 215 insertions(+), 15 deletions(-) + +diff --git a/providers/hns/hns_roce_u.c b/providers/hns/hns_roce_u.c +index 90f250e..e1c2659 100644 +--- a/providers/hns/hns_roce_u.c ++++ b/providers/hns/hns_roce_u.c +@@ -67,7 +67,7 @@ static const struct verbs_context_ops hns_common_ops = { + .create_qp = hns_roce_u_create_qp, + .create_qp_ex = hns_roce_u_create_qp_ex, + .dealloc_mw = hns_roce_u_dealloc_mw, +- .dealloc_pd = hns_roce_u_free_pd, ++ .dealloc_pd = hns_roce_u_dealloc_pd, + .dereg_mr = hns_roce_u_dereg_mr, + .destroy_cq = hns_roce_u_destroy_cq, + .modify_cq = hns_roce_u_modify_cq, +@@ -88,6 +88,9 @@ static const struct verbs_context_ops hns_common_ops = { + .close_xrcd = hns_roce_u_close_xrcd, + .open_qp = hns_roce_u_open_qp, + .get_srq_num = hns_roce_u_get_srq_num, ++ .alloc_td = hns_roce_u_alloc_td, ++ .dealloc_td = hns_roce_u_dealloc_td, ++ .alloc_parent_domain = hns_roce_u_alloc_pad, + }; + + static uint32_t calc_table_shift(uint32_t entry_count, uint32_t size_shift) +diff --git a/providers/hns/hns_roce_u.h b/providers/hns/hns_roce_u.h +index c73e5c0..5d3f480 100644 +--- a/providers/hns/hns_roce_u.h ++++ b/providers/hns/hns_roce_u.h +@@ -196,6 +196,11 @@ struct hns_roce_db_page { + unsigned long *bitmap; + }; + ++struct hns_roce_spinlock { ++ pthread_spinlock_t lock; ++ int need_lock; ++}; ++ + struct hns_roce_context { + struct verbs_context ibv_ctx; + void *uar; +@@ -230,9 +235,21 @@ struct hns_roce_context { + unsigned int max_inline_data; + }; + ++struct hns_roce_td { ++ struct ibv_td ibv_td; ++ atomic_int refcount; ++}; ++ + struct hns_roce_pd { + struct ibv_pd ibv_pd; + unsigned int pdn; ++ atomic_int refcount; ++ struct hns_roce_pd *protection_domain; ++}; ++ ++struct hns_roce_pad { ++ struct hns_roce_pd pd; ++ struct hns_roce_td *td; + }; + + struct hns_roce_cq { +@@ -406,9 +423,35 @@ static inline struct hns_roce_context *to_hr_ctx(struct ibv_context *ibv_ctx) + return container_of(ibv_ctx, struct hns_roce_context, ibv_ctx.context); + } + ++static inline struct hns_roce_td *to_hr_td(struct ibv_td *ibv_td) ++{ ++ return container_of(ibv_td, struct hns_roce_td, ibv_td); ++} ++ ++/* to_hr_pd always returns the real hns_roce_pd obj. */ + static inline struct hns_roce_pd *to_hr_pd(struct ibv_pd *ibv_pd) + { +- return container_of(ibv_pd, struct hns_roce_pd, ibv_pd); ++ struct hns_roce_pd *pd = ++ container_of(ibv_pd, struct hns_roce_pd, ibv_pd); ++ ++ if (pd->protection_domain) ++ return pd->protection_domain; ++ ++ return pd; ++} ++ ++static inline struct hns_roce_pad *to_hr_pad(struct ibv_pd *ibv_pd) ++{ ++ struct hns_roce_pad *pad = ++ ibv_pd ? ++ container_of(ibv_pd, struct hns_roce_pad, pd.ibv_pd) : ++ NULL; ++ ++ if (pad && pad->pd.protection_domain) ++ return pad; ++ ++ /* Otherwise ibv_pd isn't a parent_domain */ ++ return NULL; + } + + static inline struct hns_roce_cq *to_hr_cq(struct ibv_cq *ibv_cq) +@@ -431,14 +474,35 @@ static inline struct hns_roce_ah *to_hr_ah(struct ibv_ah *ibv_ah) + return container_of(ibv_ah, struct hns_roce_ah, ibv_ah); + } + ++static inline int hns_roce_spin_lock(struct hns_roce_spinlock *hr_lock) ++{ ++ if (hr_lock->need_lock) ++ return pthread_spin_lock(&hr_lock->lock); ++ ++ return 0; ++} ++ ++static inline int hns_roce_spin_unlock(struct hns_roce_spinlock *hr_lock) ++{ ++ if (hr_lock->need_lock) ++ return pthread_spin_unlock(&hr_lock->lock); ++ ++ return 0; ++} ++ + int hns_roce_u_query_device(struct ibv_context *context, + const struct ibv_query_device_ex_input *input, + struct ibv_device_attr_ex *attr, size_t attr_size); + int hns_roce_u_query_port(struct ibv_context *context, uint8_t port, + struct ibv_port_attr *attr); + ++struct ibv_td *hns_roce_u_alloc_td(struct ibv_context *context, ++ struct ibv_td_init_attr *attr); ++int hns_roce_u_dealloc_td(struct ibv_td *ibv_td); ++struct ibv_pd *hns_roce_u_alloc_pad(struct ibv_context *context, ++ struct ibv_parent_domain_init_attr *attr); + struct ibv_pd *hns_roce_u_alloc_pd(struct ibv_context *context); +-int hns_roce_u_free_pd(struct ibv_pd *pd); ++int hns_roce_u_dealloc_pd(struct ibv_pd *pd); + + struct ibv_mr *hns_roce_u_reg_mr(struct ibv_pd *pd, void *addr, size_t length, + uint64_t hca_va, int access); +@@ -497,6 +561,7 @@ int hns_roce_u_close_xrcd(struct ibv_xrcd *ibv_xrcd); + int hns_roce_alloc_buf(struct hns_roce_buf *buf, unsigned int size, + int page_size); + void hns_roce_free_buf(struct hns_roce_buf *buf); ++void hns_roce_qp_spinlock_destroy(struct hns_roce_qp *qp); + + void hns_roce_free_qp_buf(struct hns_roce_qp *qp, struct hns_roce_context *ctx); + +diff --git a/providers/hns/hns_roce_u_verbs.c b/providers/hns/hns_roce_u_verbs.c +index dcdc722..ecf8666 100644 +--- a/providers/hns/hns_roce_u_verbs.c ++++ b/providers/hns/hns_roce_u_verbs.c +@@ -33,6 +33,7 @@ + #include + #include + #include ++#include + #include + #include + #include +@@ -42,6 +43,37 @@ + #include "hns_roce_u_db.h" + #include "hns_roce_u_hw_v2.h" + ++static bool hns_roce_whether_need_lock(struct ibv_pd *pd) ++{ ++ struct hns_roce_pad *pad; ++ ++ pad = to_hr_pad(pd); ++ if (pad && pad->td) ++ return false; ++ ++ return true; ++} ++ ++static int hns_roce_spinlock_init(struct hns_roce_spinlock *hr_lock, ++ bool need_lock) ++{ ++ hr_lock->need_lock = need_lock; ++ ++ if (need_lock) ++ return pthread_spin_init(&hr_lock->lock, ++ PTHREAD_PROCESS_PRIVATE); ++ ++ return 0; ++} ++ ++static int hns_roce_spinlock_destroy(struct hns_roce_spinlock *hr_lock) ++{ ++ if (hr_lock->need_lock) ++ return pthread_spin_destroy(&hr_lock->lock); ++ ++ return 0; ++} ++ + void hns_roce_init_qp_indices(struct hns_roce_qp *qp) + { + qp->sq.head = 0; +@@ -85,38 +117,138 @@ int hns_roce_u_query_port(struct ibv_context *context, uint8_t port, + return ibv_cmd_query_port(context, port, attr, &cmd, sizeof(cmd)); + } + ++struct ibv_td *hns_roce_u_alloc_td(struct ibv_context *context, ++ struct ibv_td_init_attr *attr) ++{ ++ struct hns_roce_td *td; ++ ++ if (attr->comp_mask) { ++ errno = EOPNOTSUPP; ++ return NULL; ++ } ++ ++ td = calloc(1, sizeof(*td)); ++ if (!td) { ++ errno = ENOMEM; ++ return NULL; ++ } ++ ++ td->ibv_td.context = context; ++ atomic_init(&td->refcount, 1); ++ ++ return &td->ibv_td; ++} ++ ++int hns_roce_u_dealloc_td(struct ibv_td *ibv_td) ++{ ++ struct hns_roce_td *td; ++ ++ td = to_hr_td(ibv_td); ++ if (atomic_load(&td->refcount) > 1) ++ return EBUSY; ++ ++ free(td); ++ ++ return 0; ++} ++ + struct ibv_pd *hns_roce_u_alloc_pd(struct ibv_context *context) + { ++ struct hns_roce_alloc_pd_resp resp = {}; + struct ibv_alloc_pd cmd; + struct hns_roce_pd *pd; +- struct hns_roce_alloc_pd_resp resp = {}; +- +- pd = malloc(sizeof(*pd)); +- if (!pd) +- return NULL; + +- if (ibv_cmd_alloc_pd(context, &pd->ibv_pd, &cmd, sizeof(cmd), +- &resp.ibv_resp, sizeof(resp))) { +- free(pd); ++ pd = calloc(1, sizeof(*pd)); ++ if (!pd) { ++ errno = ENOMEM; + return NULL; + } ++ errno = ibv_cmd_alloc_pd(context, &pd->ibv_pd, &cmd, sizeof(cmd), ++ &resp.ibv_resp, sizeof(resp)); + ++ if (errno) ++ goto err; ++ ++ atomic_init(&pd->refcount, 1); + pd->pdn = resp.pdn; + + return &pd->ibv_pd; ++ ++err: ++ free(pd); ++ return NULL; + } + +-int hns_roce_u_free_pd(struct ibv_pd *pd) ++struct ibv_pd *hns_roce_u_alloc_pad(struct ibv_context *context, ++ struct ibv_parent_domain_init_attr *attr) ++{ ++ struct hns_roce_pad *pad; ++ ++ if (ibv_check_alloc_parent_domain(attr)) ++ return NULL; ++ ++ if (attr->comp_mask) { ++ errno = EOPNOTSUPP; ++ return NULL; ++ } ++ ++ pad = calloc(1, sizeof(*pad)); ++ if (!pad) { ++ errno = ENOMEM; ++ return NULL; ++ } ++ ++ if (attr->td) { ++ pad->td = to_hr_td(attr->td); ++ atomic_fetch_add(&pad->td->refcount, 1); ++ } ++ ++ pad->pd.protection_domain = to_hr_pd(attr->pd); ++ atomic_fetch_add(&pad->pd.protection_domain->refcount, 1); ++ ++ atomic_init(&pad->pd.refcount, 1); ++ ibv_initialize_parent_domain(&pad->pd.ibv_pd, ++ &pad->pd.protection_domain->ibv_pd); ++ ++ return &pad->pd.ibv_pd; ++} ++ ++static void hns_roce_free_pad(struct hns_roce_pad *pad) ++{ ++ atomic_fetch_sub(&pad->pd.protection_domain->refcount, 1); ++ ++ if (pad->td) ++ atomic_fetch_sub(&pad->td->refcount, 1); ++ ++ free(pad); ++} ++ ++static int hns_roce_free_pd(struct hns_roce_pd *pd) + { + int ret; + +- ret = ibv_cmd_dealloc_pd(pd); ++ if (atomic_load(&pd->refcount) > 1) ++ return EBUSY; ++ ++ ret = ibv_cmd_dealloc_pd(&pd->ibv_pd); + if (ret) + return ret; + +- free(to_hr_pd(pd)); ++ free(pd); ++ return 0; ++} + +- return ret; ++int hns_roce_u_dealloc_pd(struct ibv_pd *ibv_pd) ++{ ++ struct hns_roce_pad *pad = to_hr_pad(ibv_pd); ++ struct hns_roce_pd *pd = to_hr_pd(ibv_pd); ++ ++ if (pad) { ++ hns_roce_free_pad(pad); ++ return 0; ++ } ++ ++ return hns_roce_free_pd(pd); + } + + struct ibv_xrcd *hns_roce_u_open_xrcd(struct ibv_context *context, +-- +2.33.0 + diff --git a/0008-libhns-Add-support-for-lock-free-QP.patch b/0008-libhns-Add-support-for-lock-free-QP.patch new file mode 100644 index 0000000000000000000000000000000000000000..b2d6afe5630d15d2fb0f3c4e079dd7b4d3771226 --- /dev/null +++ b/0008-libhns-Add-support-for-lock-free-QP.patch @@ -0,0 +1,232 @@ +From f5f54bf889825da254b2a5df859da1c471a40314 Mon Sep 17 00:00:00 2001 +From: zzry <1245464216@qq.com> +Date: Fri, 8 Mar 2024 15:56:09 +0800 +Subject: [PATCH 08/10] libhns: Add support for lock-free QP + +driver inclusion +category: feature +bugzilla: https://gitee.com/src-openeuler/rdma-core/issues/I97WST + +------------------------------------------------------------------ + +Drop QP locks when associated to a PAD holding a TD. + +Signed-off-by: Yixing Liu +Signed-off-by: Junxian Huang +--- + providers/hns/hns_roce_u.h | 2 +- + providers/hns/hns_roce_u_hw_v2.c | 26 ++++++++++------- + providers/hns/hns_roce_u_verbs.c | 49 +++++++++++++++++++++++++++++--- + 3 files changed, 61 insertions(+), 16 deletions(-) + +diff --git a/providers/hns/hns_roce_u.h b/providers/hns/hns_roce_u.h +index 5d3f480..5732e39 100644 +--- a/providers/hns/hns_roce_u.h ++++ b/providers/hns/hns_roce_u.h +@@ -305,7 +305,7 @@ struct hns_roce_srq { + + struct hns_roce_wq { + unsigned long *wrid; +- pthread_spinlock_t lock; ++ struct hns_roce_spinlock hr_lock; + unsigned int wqe_cnt; + int max_post; + unsigned int head; +diff --git a/providers/hns/hns_roce_u_hw_v2.c b/providers/hns/hns_roce_u_hw_v2.c +index dd13049..90a76e2 100644 +--- a/providers/hns/hns_roce_u_hw_v2.c ++++ b/providers/hns/hns_roce_u_hw_v2.c +@@ -1270,7 +1270,7 @@ int hns_roce_u_v2_post_send(struct ibv_qp *ibvqp, struct ibv_send_wr *wr, + return ret; + } + +- pthread_spin_lock(&qp->sq.lock); ++ hns_roce_spin_lock(&qp->sq.hr_lock); + + sge_info.start_idx = qp->next_sge; /* start index of extend sge */ + +@@ -1331,7 +1331,7 @@ out: + *(qp->sdb) = qp->sq.head & 0xffff; + } + +- pthread_spin_unlock(&qp->sq.lock); ++ hns_roce_spin_unlock(&qp->sq.hr_lock); + + if (ibvqp->state == IBV_QPS_ERR) { + attr.qp_state = IBV_QPS_ERR; +@@ -1420,7 +1420,7 @@ static int hns_roce_u_v2_post_recv(struct ibv_qp *ibvqp, struct ibv_recv_wr *wr, + return ret; + } + +- pthread_spin_lock(&qp->rq.lock); ++ hns_roce_spin_lock(&qp->rq.hr_lock); + + max_sge = qp->rq.max_gs - qp->rq.rsv_sge; + for (nreq = 0; wr; ++nreq, wr = wr->next) { +@@ -1454,7 +1454,7 @@ out: + hns_roce_update_rq_db(ctx, ibvqp->qp_num, qp->rq.head); + } + +- pthread_spin_unlock(&qp->rq.lock); ++ hns_roce_spin_unlock(&qp->rq.hr_lock); + + if (ibvqp->state == IBV_QPS_ERR) { + attr.qp_state = IBV_QPS_ERR; +@@ -1549,8 +1549,8 @@ static int hns_roce_u_v2_modify_qp(struct ibv_qp *qp, struct ibv_qp_attr *attr, + int ret; + + if ((attr_mask & IBV_QP_STATE) && (attr->qp_state == IBV_QPS_ERR)) { +- pthread_spin_lock(&hr_qp->sq.lock); +- pthread_spin_lock(&hr_qp->rq.lock); ++ hns_roce_spin_lock(&hr_qp->sq.hr_lock); ++ hns_roce_spin_lock(&hr_qp->rq.hr_lock); + flag = true; + } + +@@ -1561,8 +1561,8 @@ static int hns_roce_u_v2_modify_qp(struct ibv_qp *qp, struct ibv_qp_attr *attr, + if (flag) { + if (!ret) + qp->state = IBV_QPS_ERR; +- pthread_spin_unlock(&hr_qp->rq.lock); +- pthread_spin_unlock(&hr_qp->sq.lock); ++ hns_roce_spin_unlock(&hr_qp->sq.hr_lock); ++ hns_roce_spin_unlock(&hr_qp->rq.hr_lock); + } + + if (ret) +@@ -1640,6 +1640,7 @@ static void hns_roce_unlock_cqs(struct ibv_qp *qp) + static int hns_roce_u_v2_destroy_qp(struct ibv_qp *ibqp) + { + struct hns_roce_context *ctx = to_hr_ctx(ibqp->context); ++ struct hns_roce_pad *pad = to_hr_pad(ibqp->pd); + struct hns_roce_qp *qp = to_hr_qp(ibqp); + int ret; + +@@ -1666,6 +1667,9 @@ static int hns_roce_u_v2_destroy_qp(struct ibv_qp *ibqp) + + hns_roce_free_qp_buf(qp, ctx); + ++ if (pad) ++ atomic_fetch_sub(&pad->pd.refcount, 1); ++ + free(qp); + + return ret; +@@ -2555,7 +2559,7 @@ static void wr_start(struct ibv_qp_ex *ibv_qp) + return; + } + +- pthread_spin_lock(&qp->sq.lock); ++ hns_roce_spin_lock(&qp->sq.hr_lock); + qp->sge_info.start_idx = qp->next_sge; + qp->rb_sq_head = qp->sq.head; + qp->err = 0; +@@ -2588,7 +2592,7 @@ static int wr_complete(struct ibv_qp_ex *ibv_qp) + } + + out: +- pthread_spin_unlock(&qp->sq.lock); ++ hns_roce_spin_unlock(&qp->sq.hr_lock); + if (ibv_qp->qp_base.state == IBV_QPS_ERR) { + attr.qp_state = IBV_QPS_ERR; + hns_roce_u_v2_modify_qp(&ibv_qp->qp_base, &attr, IBV_QP_STATE); +@@ -2603,7 +2607,7 @@ static void wr_abort(struct ibv_qp_ex *ibv_qp) + + qp->sq.head = qp->rb_sq_head; + +- pthread_spin_unlock(&qp->sq.lock); ++ hns_roce_spin_unlock(&qp->sq.hr_lock); + } + + enum { +diff --git a/providers/hns/hns_roce_u_verbs.c b/providers/hns/hns_roce_u_verbs.c +index ecf8666..d503031 100644 +--- a/providers/hns/hns_roce_u_verbs.c ++++ b/providers/hns/hns_roce_u_verbs.c +@@ -1033,6 +1033,41 @@ static int verify_qp_create_attr(struct hns_roce_context *ctx, + return verify_qp_create_cap(ctx, attr); + } + ++static int hns_roce_qp_spinlock_init(struct hns_roce_context *ctx, ++ struct ibv_qp_init_attr_ex *attr, ++ struct hns_roce_qp *qp) ++{ ++ bool sq_need_lock; ++ bool rq_need_lock; ++ int ret; ++ ++ sq_need_lock = hns_roce_whether_need_lock(attr->pd); ++ if (!sq_need_lock) ++ verbs_info(&ctx->ibv_ctx, "configure sq as no lock.\n"); ++ ++ rq_need_lock = hns_roce_whether_need_lock(attr->pd); ++ if (!rq_need_lock) ++ verbs_info(&ctx->ibv_ctx, "configure rq as no lock.\n"); ++ ++ ret = hns_roce_spinlock_init(&qp->sq.hr_lock, sq_need_lock); ++ if (ret) ++ return ret; ++ ++ ret = hns_roce_spinlock_init(&qp->rq.hr_lock, rq_need_lock); ++ if (ret) { ++ hns_roce_spinlock_destroy(&qp->sq.hr_lock); ++ return ret; ++ } ++ ++ return 0; ++} ++ ++void hns_roce_qp_spinlock_destroy(struct hns_roce_qp *qp) ++{ ++ hns_roce_spinlock_destroy(&qp->rq.hr_lock); ++ hns_roce_spinlock_destroy(&qp->sq.hr_lock); ++} ++ + static int alloc_recv_rinl_buf(uint32_t max_sge, + struct hns_roce_rinl_buf *rinl_buf) + { +@@ -1435,10 +1470,6 @@ static int hns_roce_alloc_qp_buf(struct ibv_qp_init_attr_ex *attr, + { + int ret; + +- if (pthread_spin_init(&qp->sq.lock, PTHREAD_PROCESS_PRIVATE) || +- pthread_spin_init(&qp->rq.lock, PTHREAD_PROCESS_PRIVATE)) +- return -ENOMEM; +- + ret = qp_alloc_wqe(&attr->cap, qp, ctx); + if (ret) + return ret; +@@ -1466,6 +1497,7 @@ static struct ibv_qp *create_qp(struct ibv_context *ibv_ctx, + struct hnsdv_qp_init_attr *hns_attr) + { + struct hns_roce_context *context = to_hr_ctx(ibv_ctx); ++ struct hns_roce_pad *pad = to_hr_pad(attr->pd); + struct hns_roce_qp *qp; + uint64_t dwqe_mmap_key; + int ret; +@@ -1482,6 +1514,13 @@ static struct ibv_qp *create_qp(struct ibv_context *ibv_ctx, + + hns_roce_set_qp_params(attr, qp, context); + ++ if (pad) ++ atomic_fetch_add(&pad->pd.refcount, 1); ++ ++ ret = hns_roce_qp_spinlock_init(context, attr, qp); ++ if (ret) ++ goto err_spinlock; ++ + ret = hns_roce_alloc_qp_buf(attr, qp, context); + if (ret) + goto err_buf; +@@ -1515,6 +1554,8 @@ err_ops: + err_cmd: + hns_roce_free_qp_buf(qp, context); + err_buf: ++ hns_roce_qp_spinlock_destroy(qp); ++err_spinlock: + free(qp); + err: + if (ret < 0) +-- +2.33.0 + diff --git a/0009-libhns-Add-support-for-lock-free-CQ.patch b/0009-libhns-Add-support-for-lock-free-CQ.patch new file mode 100644 index 0000000000000000000000000000000000000000..12ddd86d4dbfb126b9d8d8d65d6b22289be9db83 --- /dev/null +++ b/0009-libhns-Add-support-for-lock-free-CQ.patch @@ -0,0 +1,282 @@ +From cac8fdd87cd6e222ab5184f3d91dfc99bb922627 Mon Sep 17 00:00:00 2001 +From: zzry <1245464216@qq.com> +Date: Fri, 8 Mar 2024 16:29:34 +0800 +Subject: [PATCH 09/10] libhns: Add support for lock-free CQ + +driver inclusion +category: feature +bugzilla: https://gitee.com/src-openeuler/rdma-core/issues/I97WST + +------------------------------------------------------------------ + +Drop CQ locks when associated to a PAD holding a TD. + +Signed-off-by: Yixing Liu +Signed-off-by: Junxian Huang +--- + providers/hns/hns_roce_u.h | 3 +- + providers/hns/hns_roce_u_hw_v2.c | 46 +++++++++++++------------- + providers/hns/hns_roce_u_verbs.c | 56 ++++++++++++++++++++++++++++---- + 3 files changed, 74 insertions(+), 31 deletions(-) + +diff --git a/providers/hns/hns_roce_u.h b/providers/hns/hns_roce_u.h +index 5732e39..0035e36 100644 +--- a/providers/hns/hns_roce_u.h ++++ b/providers/hns/hns_roce_u.h +@@ -255,7 +255,7 @@ struct hns_roce_pad { + struct hns_roce_cq { + struct verbs_cq verbs_cq; + struct hns_roce_buf buf; +- pthread_spinlock_t lock; ++ struct hns_roce_spinlock hr_lock; + unsigned int cqn; + unsigned int cq_depth; + unsigned int cons_index; +@@ -265,6 +265,7 @@ struct hns_roce_cq { + unsigned long flags; + unsigned int cqe_size; + struct hns_roce_v2_cqe *cqe; ++ struct ibv_pd *parent_domain; + }; + + struct hns_roce_idx_que { +diff --git a/providers/hns/hns_roce_u_hw_v2.c b/providers/hns/hns_roce_u_hw_v2.c +index 90a76e2..2fb4d72 100644 +--- a/providers/hns/hns_roce_u_hw_v2.c ++++ b/providers/hns/hns_roce_u_hw_v2.c +@@ -268,9 +268,9 @@ static int hns_roce_v2_wq_overflow(struct hns_roce_wq *wq, unsigned int nreq, + if (cur + nreq < wq->max_post) + return 0; + +- pthread_spin_lock(&cq->lock); ++ hns_roce_spin_lock(&cq->hr_lock); + cur = wq->head - wq->tail; +- pthread_spin_unlock(&cq->lock); ++ hns_roce_spin_unlock(&cq->hr_lock); + + return cur + nreq >= wq->max_post; + } +@@ -724,7 +724,7 @@ static int hns_roce_u_v2_poll_cq(struct ibv_cq *ibvcq, int ne, + int err = V2_CQ_OK; + int npolled; + +- pthread_spin_lock(&cq->lock); ++ hns_roce_spin_lock(&cq->hr_lock); + + for (npolled = 0; npolled < ne; ++npolled) { + err = hns_roce_poll_one(ctx, &qp, cq, wc + npolled); +@@ -739,7 +739,7 @@ static int hns_roce_u_v2_poll_cq(struct ibv_cq *ibvcq, int ne, + update_cq_db(ctx, cq); + } + +- pthread_spin_unlock(&cq->lock); ++ hns_roce_spin_unlock(&cq->hr_lock); + + return err == V2_CQ_POLL_ERR ? err : npolled; + } +@@ -1510,9 +1510,9 @@ static void __hns_roce_v2_cq_clean(struct hns_roce_cq *cq, uint32_t qpn, + static void hns_roce_v2_cq_clean(struct hns_roce_cq *cq, unsigned int qpn, + struct hns_roce_srq *srq) + { +- pthread_spin_lock(&cq->lock); ++ hns_roce_spin_lock(&cq->hr_lock); + __hns_roce_v2_cq_clean(cq, qpn, srq); +- pthread_spin_unlock(&cq->lock); ++ hns_roce_spin_unlock(&cq->hr_lock); + } + + static void record_qp_attr(struct ibv_qp *qp, struct ibv_qp_attr *attr, +@@ -1600,18 +1600,18 @@ static void hns_roce_lock_cqs(struct ibv_qp *qp) + + if (send_cq && recv_cq) { + if (send_cq == recv_cq) { +- pthread_spin_lock(&send_cq->lock); ++ hns_roce_spin_lock(&send_cq->hr_lock); + } else if (send_cq->cqn < recv_cq->cqn) { +- pthread_spin_lock(&send_cq->lock); +- pthread_spin_lock(&recv_cq->lock); ++ hns_roce_spin_lock(&send_cq->hr_lock); ++ hns_roce_spin_lock(&recv_cq->hr_lock); + } else { +- pthread_spin_lock(&recv_cq->lock); +- pthread_spin_lock(&send_cq->lock); ++ hns_roce_spin_lock(&recv_cq->hr_lock); ++ hns_roce_spin_lock(&send_cq->hr_lock); + } + } else if (send_cq) { +- pthread_spin_lock(&send_cq->lock); ++ hns_roce_spin_lock(&send_cq->hr_lock); + } else if (recv_cq) { +- pthread_spin_lock(&recv_cq->lock); ++ hns_roce_spin_lock(&recv_cq->hr_lock); + } + } + +@@ -1622,18 +1622,18 @@ static void hns_roce_unlock_cqs(struct ibv_qp *qp) + + if (send_cq && recv_cq) { + if (send_cq == recv_cq) { +- pthread_spin_unlock(&send_cq->lock); ++ hns_roce_spin_unlock(&send_cq->hr_lock); + } else if (send_cq->cqn < recv_cq->cqn) { +- pthread_spin_unlock(&recv_cq->lock); +- pthread_spin_unlock(&send_cq->lock); ++ hns_roce_spin_unlock(&recv_cq->hr_lock); ++ hns_roce_spin_unlock(&send_cq->hr_lock); + } else { +- pthread_spin_unlock(&send_cq->lock); +- pthread_spin_unlock(&recv_cq->lock); ++ hns_roce_spin_unlock(&send_cq->hr_lock); ++ hns_roce_spin_unlock(&recv_cq->hr_lock); + } + } else if (send_cq) { +- pthread_spin_unlock(&send_cq->lock); ++ hns_roce_spin_unlock(&send_cq->hr_lock); + } else if (recv_cq) { +- pthread_spin_unlock(&recv_cq->lock); ++ hns_roce_spin_unlock(&recv_cq->hr_lock); + } + } + +@@ -1811,11 +1811,11 @@ static int wc_start_poll_cq(struct ibv_cq_ex *current, + if (attr->comp_mask) + return EINVAL; + +- pthread_spin_lock(&cq->lock); ++ hns_roce_spin_lock(&cq->hr_lock); + + err = hns_roce_poll_one(ctx, &qp, cq, NULL); + if (err != V2_CQ_OK) +- pthread_spin_unlock(&cq->lock); ++ hns_roce_spin_unlock(&cq->hr_lock); + + return err; + } +@@ -1849,7 +1849,7 @@ static void wc_end_poll_cq(struct ibv_cq_ex *current) + else + update_cq_db(ctx, cq); + +- pthread_spin_unlock(&cq->lock); ++ hns_roce_spin_unlock(&cq->hr_lock); + } + + static enum ibv_wc_opcode wc_read_opcode(struct ibv_cq_ex *current) +diff --git a/providers/hns/hns_roce_u_verbs.c b/providers/hns/hns_roce_u_verbs.c +index d503031..afde313 100644 +--- a/providers/hns/hns_roce_u_verbs.c ++++ b/providers/hns/hns_roce_u_verbs.c +@@ -407,6 +407,11 @@ int hns_roce_u_dealloc_mw(struct ibv_mw *mw) + return 0; + } + ++enum { ++ CREATE_CQ_SUPPORTED_COMP_MASK = IBV_CQ_INIT_ATTR_MASK_FLAGS | ++ IBV_CQ_INIT_ATTR_MASK_PD, ++}; ++ + enum { + CREATE_CQ_SUPPORTED_WC_FLAGS = IBV_WC_STANDARD_FLAGS | + IBV_WC_EX_WITH_CVLAN, +@@ -415,21 +420,47 @@ enum { + static int verify_cq_create_attr(struct ibv_cq_init_attr_ex *attr, + struct hns_roce_context *context) + { ++ struct hns_roce_pad *pad = to_hr_pad(attr->parent_domain); ++ + if (!attr->cqe || attr->cqe > context->max_cqe) + return EINVAL; + +- if (attr->comp_mask) ++ if (!check_comp_mask(attr->comp_mask, CREATE_CQ_SUPPORTED_COMP_MASK)) { ++ verbs_err(&context->ibv_ctx, "unsupported cq comps 0x%x\n", ++ attr->comp_mask); + return EOPNOTSUPP; ++ } + + if (!check_comp_mask(attr->wc_flags, CREATE_CQ_SUPPORTED_WC_FLAGS)) + return EOPNOTSUPP; + ++ if (attr->comp_mask & IBV_CQ_INIT_ATTR_MASK_PD) { ++ if (!pad) { ++ verbs_err(&context->ibv_ctx, "failed to check the pad of cq.\n"); ++ return EINVAL; ++ } ++ atomic_fetch_add(&pad->pd.refcount, 1); ++ } ++ + attr->cqe = max_t(uint32_t, HNS_ROCE_MIN_CQE_NUM, + roundup_pow_of_two(attr->cqe)); + + return 0; + } + ++static int hns_roce_cq_spinlock_init(struct ibv_context *context, ++ struct hns_roce_cq *cq, ++ struct ibv_cq_init_attr_ex *attr) ++{ ++ bool need_lock; ++ ++ need_lock = hns_roce_whether_need_lock(attr->parent_domain); ++ if (!need_lock) ++ verbs_info(verbs_get_ctx(context), "configure cq as no lock.\n"); ++ ++ return hns_roce_spinlock_init(&cq->hr_lock, need_lock); ++} ++ + static int hns_roce_alloc_cq_buf(struct hns_roce_cq *cq) + { + int buf_size = hr_hw_page_align(cq->cq_depth * cq->cqe_size); +@@ -486,7 +517,10 @@ static struct ibv_cq_ex *create_cq(struct ibv_context *context, + goto err; + } + +- ret = pthread_spin_init(&cq->lock, PTHREAD_PROCESS_PRIVATE); ++ if (attr->comp_mask & IBV_CQ_INIT_ATTR_MASK_PD) ++ cq->parent_domain = attr->parent_domain; ++ ++ ret = hns_roce_cq_spinlock_init(context, cq, attr); + if (ret) + goto err_lock; + +@@ -517,8 +551,9 @@ err_cmd: + hns_roce_free_db(hr_ctx, cq->db, HNS_ROCE_CQ_TYPE_DB); + err_db: + hns_roce_free_buf(&cq->buf); +-err_lock: + err_buf: ++ hns_roce_spinlock_destroy(&cq->hr_lock); ++err_lock: + free(cq); + err: + if (ret < 0) +@@ -569,16 +604,23 @@ int hns_roce_u_modify_cq(struct ibv_cq *cq, struct ibv_modify_cq_attr *attr) + + int hns_roce_u_destroy_cq(struct ibv_cq *cq) + { ++ struct hns_roce_cq *hr_cq = to_hr_cq(cq); ++ struct hns_roce_pad *pad = to_hr_pad(hr_cq->parent_domain); + int ret; + + ret = ibv_cmd_destroy_cq(cq); + if (ret) + return ret; + +- hns_roce_free_db(to_hr_ctx(cq->context), to_hr_cq(cq)->db, +- HNS_ROCE_CQ_TYPE_DB); +- hns_roce_free_buf(&to_hr_cq(cq)->buf); +- free(to_hr_cq(cq)); ++ hns_roce_free_db(to_hr_ctx(cq->context), hr_cq->db, HNS_ROCE_CQ_TYPE_DB); ++ hns_roce_free_buf(&hr_cq->buf); ++ ++ hns_roce_spinlock_destroy(&hr_cq->hr_lock); ++ ++ if (pad) ++ atomic_fetch_sub(&pad->pd.refcount, 1); ++ ++ free(hr_cq); + + return ret; + } +-- +2.33.0 + diff --git a/0010-libhns-Add-support-for-lock-free-SRQ.patch b/0010-libhns-Add-support-for-lock-free-SRQ.patch new file mode 100644 index 0000000000000000000000000000000000000000..bf7a392a74b95aed1b1f64660f6b327800cf4153 --- /dev/null +++ b/0010-libhns-Add-support-for-lock-free-SRQ.patch @@ -0,0 +1,157 @@ +From 19f2857b3bb6b5b6992ae7314b52c7b84e08780d Mon Sep 17 00:00:00 2001 +From: zzry <1245464216@qq.com> +Date: Fri, 8 Mar 2024 16:33:48 +0800 +Subject: [PATCH 10/10] libhns: Add support for lock-free SRQ + +driver inclusion +category: feature +bugzilla: https://gitee.com/src-openeuler/rdma-core/issues/I97WST + +------------------------------------------------------------------ + +Drop SRQ locks when associated to a PAD holding a TD. + +Signed-off-by: Yixing Liu +Signed-off-by: Junxian Huang +--- + providers/hns/hns_roce_u.h | 2 +- + providers/hns/hns_roce_u_hw_v2.c | 8 ++++---- + providers/hns/hns_roce_u_verbs.c | 31 +++++++++++++++++++++++++++++-- + 3 files changed, 34 insertions(+), 7 deletions(-) + +diff --git a/providers/hns/hns_roce_u.h b/providers/hns/hns_roce_u.h +index 0035e36..21a6e28 100644 +--- a/providers/hns/hns_roce_u.h ++++ b/providers/hns/hns_roce_u.h +@@ -292,7 +292,7 @@ struct hns_roce_srq { + struct hns_roce_idx_que idx_que; + struct hns_roce_buf wqe_buf; + struct hns_roce_rinl_buf srq_rinl_buf; +- pthread_spinlock_t lock; ++ struct hns_roce_spinlock hr_lock; + unsigned long *wrid; + unsigned int srqn; + unsigned int wqe_cnt; +diff --git a/providers/hns/hns_roce_u_hw_v2.c b/providers/hns/hns_roce_u_hw_v2.c +index 2fb4d72..1d7a304 100644 +--- a/providers/hns/hns_roce_u_hw_v2.c ++++ b/providers/hns/hns_roce_u_hw_v2.c +@@ -229,14 +229,14 @@ static void hns_roce_free_srq_wqe(struct hns_roce_srq *srq, uint16_t ind) + uint32_t bitmap_num; + int bit_num; + +- pthread_spin_lock(&srq->lock); ++ hns_roce_spin_lock(&srq->hr_lock); + + bitmap_num = ind / BIT_CNT_PER_LONG; + bit_num = ind % BIT_CNT_PER_LONG; + srq->idx_que.bitmap[bitmap_num] |= (1ULL << bit_num); + srq->idx_que.tail++; + +- pthread_spin_unlock(&srq->lock); ++ hns_roce_spin_unlock(&srq->hr_lock); + } + + static int get_srq_from_cqe(struct hns_roce_v2_cqe *cqe, +@@ -1756,7 +1756,7 @@ static int hns_roce_u_v2_post_srq_recv(struct ibv_srq *ib_srq, + int ret = 0; + void *wqe; + +- pthread_spin_lock(&srq->lock); ++ hns_roce_spin_lock(&srq->hr_lock); + + max_sge = srq->max_gs - srq->rsv_sge; + for (nreq = 0; wr; ++nreq, wr = wr->next) { +@@ -1795,7 +1795,7 @@ static int hns_roce_u_v2_post_srq_recv(struct ibv_srq *ib_srq, + update_srq_db(ctx, &srq_db, srq); + } + +- pthread_spin_unlock(&srq->lock); ++ hns_roce_spin_unlock(&srq->hr_lock); + + return ret; + } +diff --git a/providers/hns/hns_roce_u_verbs.c b/providers/hns/hns_roce_u_verbs.c +index afde313..00e59dc 100644 +--- a/providers/hns/hns_roce_u_verbs.c ++++ b/providers/hns/hns_roce_u_verbs.c +@@ -461,6 +461,19 @@ static int hns_roce_cq_spinlock_init(struct ibv_context *context, + return hns_roce_spinlock_init(&cq->hr_lock, need_lock); + } + ++static int hns_roce_srq_spinlock_init(struct ibv_context *context, ++ struct hns_roce_srq *srq, ++ struct ibv_srq_init_attr_ex *attr) ++{ ++ bool need_lock; ++ ++ need_lock = hns_roce_whether_need_lock(attr->pd); ++ if (!need_lock) ++ verbs_info(verbs_get_ctx(context), "configure srq as no lock.\n"); ++ ++ return hns_roce_spinlock_init(&srq->hr_lock, need_lock); ++} ++ + static int hns_roce_alloc_cq_buf(struct hns_roce_cq *cq) + { + int buf_size = hr_hw_page_align(cq->cq_depth * cq->cqe_size); +@@ -830,6 +843,7 @@ static struct ibv_srq *create_srq(struct ibv_context *context, + struct ibv_srq_init_attr_ex *init_attr) + { + struct hns_roce_context *hr_ctx = to_hr_ctx(context); ++ struct hns_roce_pad *pad = to_hr_pad(init_attr->pd); + struct hns_roce_srq *srq; + int ret; + +@@ -843,12 +857,15 @@ static struct ibv_srq *create_srq(struct ibv_context *context, + goto err; + } + +- if (pthread_spin_init(&srq->lock, PTHREAD_PROCESS_PRIVATE)) ++ if (pad) ++ atomic_fetch_add(&pad->pd.refcount, 1); ++ ++ if (hns_roce_srq_spinlock_init(context, srq, init_attr)) + goto err_free_srq; + + set_srq_param(context, srq, init_attr); + if (alloc_srq_buf(srq)) +- goto err_free_srq; ++ goto err_destroy_lock; + + srq->rdb = hns_roce_alloc_db(hr_ctx, HNS_ROCE_SRQ_TYPE_DB); + if (!srq->rdb) +@@ -879,6 +896,9 @@ err_srq_db: + err_srq_buf: + free_srq_buf(srq); + ++err_destroy_lock: ++ hns_roce_spinlock_destroy(&srq->hr_lock); ++ + err_free_srq: + free(srq); + +@@ -943,6 +963,7 @@ int hns_roce_u_query_srq(struct ibv_srq *srq, struct ibv_srq_attr *srq_attr) + int hns_roce_u_destroy_srq(struct ibv_srq *ibv_srq) + { + struct hns_roce_context *ctx = to_hr_ctx(ibv_srq->context); ++ struct hns_roce_pad *pad = to_hr_pad(ibv_srq->pd); + struct hns_roce_srq *srq = to_hr_srq(ibv_srq); + int ret; + +@@ -954,6 +975,12 @@ int hns_roce_u_destroy_srq(struct ibv_srq *ibv_srq) + + hns_roce_free_db(ctx, srq->rdb, HNS_ROCE_SRQ_TYPE_DB); + free_srq_buf(srq); ++ ++ hns_roce_spinlock_destroy(&srq->hr_lock); ++ ++ if (pad) ++ atomic_fetch_sub(&pad->pd.refcount, 1); ++ + free(srq); + + return 0; +-- +2.33.0 + diff --git a/0011-libhns-Support-flexible-WQE-buffer-page-size.patch b/0011-libhns-Support-flexible-WQE-buffer-page-size.patch new file mode 100644 index 0000000000000000000000000000000000000000..f16e317cb4f055506f19846ff21d6411fc433d48 --- /dev/null +++ b/0011-libhns-Support-flexible-WQE-buffer-page-size.patch @@ -0,0 +1,186 @@ +From b05879f0287aa5b4bd315fea3ef0e0b82238e935 Mon Sep 17 00:00:00 2001 +From: Chengchang Tang +Date: Mon, 23 Oct 2023 21:13:03 +0800 +Subject: [PATCH 11/18] libhns: Support flexible WQE buffer page size + +driver inclusion +category: bugfix +bugzilla: https://gitee.com/openeuler/kernel/issues/I98HIN + +-------------------------------------------------------------------------- + +Currently, driver fixedly allocates 4K pages for user space WQE buffer +even in a 64K system. This results in HW reading WQE with a granularity +of 4K even in a 64K system. Considering that we support 1024-byte inline, +in the scenario of using SQ inline, HW will switch pages every 4 WQEs. +This will introduce a delay of about 400ns, which is an average delay of +100ns per packet. + +In order to improve performance, we allow user-mode drivers to use a +larger page size to allocate WQE buffers, thereby reducing the latency +introduced by HW page switching. User-mode drivers will be allowed to +allocate WQE buffers between 4K to system page size. During +ibv_create_qp(), the driver will dynamically select the appropriate page +size based on ibv_qp_cap, thus reducing memory consumption while improving +performance. + +This feature needs to be used in conjunction with the kernel-mode driver. +In order to ensure forward compatibility, if the kernel-mode driver does +not support this feature, the user-mode driver will continue to use a +fixed 4K pagesize to allocate WQE buffer. + +Signed-off-by: Chengchang Tang +--- + kernel-headers/rdma/hns-abi.h | 5 +++- + providers/hns/hns_roce_u.h | 1 + + providers/hns/hns_roce_u_verbs.c | 51 ++++++++++++++++++++++++++++---- + 3 files changed, 50 insertions(+), 7 deletions(-) + +diff --git a/kernel-headers/rdma/hns-abi.h b/kernel-headers/rdma/hns-abi.h +index 39ed8a4..f33d876 100644 +--- a/kernel-headers/rdma/hns-abi.h ++++ b/kernel-headers/rdma/hns-abi.h +@@ -90,7 +90,8 @@ struct hns_roce_ib_create_qp { + __u8 log_sq_bb_count; + __u8 log_sq_stride; + __u8 sq_no_prefetch; +- __u8 reserved[5]; ++ __u8 pageshift; ++ __u8 reserved[4]; + __aligned_u64 sdb_addr; + __aligned_u64 comp_mask; /* Use enum hns_roce_create_qp_comp_mask */ + __aligned_u64 create_flags; +@@ -119,12 +120,14 @@ enum { + HNS_ROCE_EXSGE_FLAGS = 1 << 0, + HNS_ROCE_RQ_INLINE_FLAGS = 1 << 1, + HNS_ROCE_CQE_INLINE_FLAGS = 1 << 2, ++ HNS_ROCE_UCTX_DYN_QP_PGSZ = 1 << 4, + }; + + enum { + HNS_ROCE_RSP_EXSGE_FLAGS = 1 << 0, + HNS_ROCE_RSP_RQ_INLINE_FLAGS = 1 << 1, + HNS_ROCE_RSP_CQE_INLINE_FLAGS = 1 << 2, ++ HNS_ROCE_UCTX_RSP_DYN_QP_PGSZ = HNS_ROCE_UCTX_DYN_QP_PGSZ, + }; + + struct hns_roce_ib_alloc_ucontext_resp { +diff --git a/providers/hns/hns_roce_u.h b/providers/hns/hns_roce_u.h +index 21a6e28..56851b0 100644 +--- a/providers/hns/hns_roce_u.h ++++ b/providers/hns/hns_roce_u.h +@@ -349,6 +349,7 @@ struct hns_roce_qp { + uint8_t sl; + uint8_t tc_mode; + uint8_t priority; ++ uint8_t pageshift; + unsigned int qkey; + enum ibv_mtu path_mtu; + +diff --git a/providers/hns/hns_roce_u_verbs.c b/providers/hns/hns_roce_u_verbs.c +index 00e59dc..fc255ed 100644 +--- a/providers/hns/hns_roce_u_verbs.c ++++ b/providers/hns/hns_roce_u_verbs.c +@@ -1178,31 +1178,69 @@ static void free_recv_rinl_buf(struct hns_roce_rinl_buf *rinl_buf) + } + } + ++static void get_best_multi_region_pg_shift(struct hns_roce_device *hr_dev, ++ struct hns_roce_context *ctx, ++ struct hns_roce_qp *qp) ++{ ++ uint32_t ext_sge_size; ++ uint32_t sq_size; ++ uint32_t rq_size; ++ uint8_t pg_shift; ++ ++ if (!(ctx->config & HNS_ROCE_UCTX_RSP_DYN_QP_PGSZ)) { ++ qp->pageshift = HNS_HW_PAGE_SHIFT; ++ return; ++ } ++ ++ /* ++ * The larger the pagesize used, the better the performance, but it ++ * may waste more memory. Therefore, we use the least common multiple ++ * (aligned to power of 2) of sq wqe buffer size, rq wqe buffer size, ++ * and ext_sge buffer size as the pagesize. Additionally, since the ++ * kernel cannot guarantee the allocation of contiguous memory larger ++ * than the system page, the pagesize must be smaller than the system ++ * page. ++ */ ++ sq_size = qp->sq.wqe_cnt << qp->sq.wqe_shift; ++ ext_sge_size = qp->ex_sge.sge_cnt << qp->ex_sge.sge_shift; ++ rq_size = qp->rq.wqe_cnt << qp->rq.wqe_shift; ++ ++ pg_shift = max_t(uint8_t, sq_size ? hr_ilog32(sq_size) : 0, ++ ext_sge_size ? hr_ilog32(ext_sge_size) : 0); ++ pg_shift = max_t(uint8_t, pg_shift, rq_size ? hr_ilog32(rq_size) : 0); ++ pg_shift = max_t(uint8_t, pg_shift, HNS_HW_PAGE_SHIFT); ++ qp->pageshift = min_t(uint8_t, pg_shift, hr_ilog32(hr_dev->page_size)); ++} ++ + static int calc_qp_buff_size(struct hns_roce_device *hr_dev, ++ struct hns_roce_context *ctx, + struct hns_roce_qp *qp) + { + struct hns_roce_wq *sq = &qp->sq; + struct hns_roce_wq *rq = &qp->rq; ++ unsigned int page_size; + unsigned int size; + + qp->buf_size = 0; ++ get_best_multi_region_pg_shift(hr_dev, ctx, qp); ++ page_size = 1 << qp->pageshift; + + /* SQ WQE */ + sq->offset = 0; +- size = to_hr_hem_entries_size(sq->wqe_cnt, sq->wqe_shift); ++ size = align(sq->wqe_cnt << sq->wqe_shift, page_size); + qp->buf_size += size; + + /* extend SGE WQE in SQ */ + qp->ex_sge.offset = qp->buf_size; + if (qp->ex_sge.sge_cnt > 0) { +- size = to_hr_hem_entries_size(qp->ex_sge.sge_cnt, +- qp->ex_sge.sge_shift); ++ size = align(qp->ex_sge.sge_cnt << qp->ex_sge.sge_shift, ++ page_size); + qp->buf_size += size; + } + + /* RQ WQE */ + rq->offset = qp->buf_size; +- size = to_hr_hem_entries_size(rq->wqe_cnt, rq->wqe_shift); ++ size = align(rq->wqe_cnt << rq->wqe_shift, page_size); + qp->buf_size += size; + + if (qp->buf_size < 1) +@@ -1227,7 +1265,7 @@ static int qp_alloc_wqe(struct ibv_qp_cap *cap, struct hns_roce_qp *qp, + { + struct hns_roce_device *hr_dev = to_hr_dev(ctx->ibv_ctx.context.device); + +- if (calc_qp_buff_size(hr_dev, qp)) ++ if (calc_qp_buff_size(hr_dev, ctx, qp)) + return -EINVAL; + + qp->sq.wrid = malloc(qp->sq.wqe_cnt * sizeof(uint64_t)); +@@ -1245,7 +1283,7 @@ static int qp_alloc_wqe(struct ibv_qp_cap *cap, struct hns_roce_qp *qp, + goto err_alloc; + } + +- if (hns_roce_alloc_buf(&qp->buf, qp->buf_size, HNS_HW_PAGE_SIZE)) ++ if (hns_roce_alloc_buf(&qp->buf, qp->buf_size, 1 << qp->pageshift)) + goto err_alloc; + + return 0; +@@ -1482,6 +1520,7 @@ static int qp_exec_create_cmd(struct ibv_qp_init_attr_ex *attr, + cmd_ex.buf_addr = (uintptr_t)qp->buf.buf; + cmd_ex.log_sq_stride = qp->sq.wqe_shift; + cmd_ex.log_sq_bb_count = hr_ilog32(qp->sq.wqe_cnt); ++ cmd_ex.pageshift = qp->pageshift; + + if (hns_attr && + hns_attr->comp_mask & HNSDV_QP_INIT_ATTR_MASK_QP_CONGEST_TYPE) { +-- +2.33.0 + diff --git a/0012-Update-kernel-headers.patch b/0012-Update-kernel-headers.patch new file mode 100644 index 0000000000000000000000000000000000000000..43c7af3352239ce88d293d32437d5fbe2c504831 --- /dev/null +++ b/0012-Update-kernel-headers.patch @@ -0,0 +1,41 @@ +From 1c2b95fe3fc64075178935bb3e1bf2086694fba3 Mon Sep 17 00:00:00 2001 +From: Yixing Liu +Date: Wed, 14 Dec 2022 16:37:26 +0800 +Subject: [PATCH 12/18] Update kernel headers + +To commit ?? ("RDMA/hns: Kernel notify usr space to stop ring db"). + +Signed-off-by: Yixing Liu +--- + kernel-headers/rdma/hns-abi.h | 8 +++++--- + 1 file changed, 5 insertions(+), 3 deletions(-) + +diff --git a/kernel-headers/rdma/hns-abi.h b/kernel-headers/rdma/hns-abi.h +index f33d876..1d51612 100644 +--- a/kernel-headers/rdma/hns-abi.h ++++ b/kernel-headers/rdma/hns-abi.h +@@ -111,9 +111,9 @@ struct hns_roce_ib_create_qp_resp { + }; + + struct hns_roce_ib_modify_qp_resp { +- __u8 tc_mode; +- __u8 priority; +- __u8 reserved[6]; ++ __u8 tc_mode; ++ __u8 priority; ++ __u8 reserved[6]; + }; + + enum { +@@ -139,6 +139,8 @@ struct hns_roce_ib_alloc_ucontext_resp { + __u32 max_inline_data; + __u8 congest_type; + __u8 reserved0[7]; ++ __aligned_u64 rsv_for_dca[2]; ++ __aligned_u64 reset_mmap_key; + }; + + struct hns_roce_ib_alloc_ucontext { +-- +2.33.0 + diff --git a/0013-libhns-Add-reset-stop-flow-mechanism.patch b/0013-libhns-Add-reset-stop-flow-mechanism.patch new file mode 100644 index 0000000000000000000000000000000000000000..518957500873f82910c40971c81f673940f0c784 --- /dev/null +++ b/0013-libhns-Add-reset-stop-flow-mechanism.patch @@ -0,0 +1,189 @@ +From 13d5c1bd7192d75f27aba97e556fb83bd182c561 Mon Sep 17 00:00:00 2001 +From: Guofeng Yue +Date: Mon, 9 May 2022 16:03:38 +0800 +Subject: [PATCH 13/18] libhns: Add reset stop flow mechanism + +driver inclusion +category: bugfix +bugzilla: https://gitee.com/src-openeuler/rdma-core/issues/I65WI7 + +------------------------------------------------------------------ + +Add an interface to the user space, which is used to receive +the kernel reset state. After receiving the reset flag, the +user space stops sending db. + +Signed-off-by: Yixing Liu +Signed-off-by: Guofeng Yue +Reviewed-by: Yangyang Li +--- + providers/hns/hns_roce_u.c | 25 +++++++++++++++++++++++++ + providers/hns/hns_roce_u.h | 5 +++++ + providers/hns/hns_roce_u_db.h | 8 +++++++- + providers/hns/hns_roce_u_hw_v2.c | 19 ++++++++++++++----- + 4 files changed, 51 insertions(+), 6 deletions(-) + +diff --git a/providers/hns/hns_roce_u.c b/providers/hns/hns_roce_u.c +index e1c2659..0e4f4c1 100644 +--- a/providers/hns/hns_roce_u.c ++++ b/providers/hns/hns_roce_u.c +@@ -100,6 +100,24 @@ static uint32_t calc_table_shift(uint32_t entry_count, uint32_t size_shift) + return count_shift > size_shift ? count_shift - size_shift : 0; + } + ++static int init_reset_context(struct hns_roce_context *ctx, int cmd_fd, ++ struct hns_roce_alloc_ucontext_resp *resp, ++ int page_size) ++{ ++ uint64_t reset_mmap_key = resp->reset_mmap_key; ++ ++ /* The reset mmap key is 0, which means it is not supported. */ ++ if (reset_mmap_key == 0) ++ return 0; ++ ++ ctx->reset_state = mmap(NULL, page_size, PROT_READ, MAP_SHARED, ++ cmd_fd, reset_mmap_key); ++ if (ctx->reset_state == MAP_FAILED) ++ return -ENOMEM; ++ ++ return 0; ++} ++ + static int set_context_attr(struct hns_roce_device *hr_dev, + struct hns_roce_context *context, + struct hns_roce_alloc_ucontext_resp *resp) +@@ -176,6 +194,9 @@ static struct verbs_context *hns_roce_alloc_context(struct ibv_device *ibdev, + if (context->uar == MAP_FAILED) + goto err_free; + ++ if (init_reset_context(context, cmd_fd, &resp, hr_dev->page_size)) ++ goto reset_free; ++ + pthread_mutex_init(&context->qp_table_mutex, NULL); + pthread_mutex_init(&context->srq_table_mutex, NULL); + pthread_spin_init(&context->uar_lock, PTHREAD_PROCESS_PRIVATE); +@@ -185,6 +206,8 @@ static struct verbs_context *hns_roce_alloc_context(struct ibv_device *ibdev, + + return &context->ibv_ctx; + ++reset_free: ++ munmap(context->uar, hr_dev->page_size); + err_free: + verbs_uninit_context(&context->ibv_ctx); + free(context); +@@ -197,6 +220,8 @@ static void hns_roce_free_context(struct ibv_context *ibctx) + struct hns_roce_context *context = to_hr_ctx(ibctx); + + munmap(context->uar, hr_dev->page_size); ++ if (context->reset_state) ++ munmap(context->reset_state, hr_dev->page_size); + verbs_uninit_context(&context->ibv_ctx); + free(context); + } +diff --git a/providers/hns/hns_roce_u.h b/providers/hns/hns_roce_u.h +index 56851b0..49de0f9 100644 +--- a/providers/hns/hns_roce_u.h ++++ b/providers/hns/hns_roce_u.h +@@ -201,9 +201,14 @@ struct hns_roce_spinlock { + int need_lock; + }; + ++struct hns_roce_v2_reset_state { ++ uint32_t is_reset; ++}; ++ + struct hns_roce_context { + struct verbs_context ibv_ctx; + void *uar; ++ void *reset_state; + pthread_spinlock_t uar_lock; + + struct { +diff --git a/providers/hns/hns_roce_u_db.h b/providers/hns/hns_roce_u_db.h +index 8c47a53..de288de 100644 +--- a/providers/hns/hns_roce_u_db.h ++++ b/providers/hns/hns_roce_u_db.h +@@ -40,8 +40,14 @@ + + #define HNS_ROCE_WORD_NUM 2 + +-static inline void hns_roce_write64(void *dest, __le32 val[HNS_ROCE_WORD_NUM]) ++static inline void hns_roce_write64(struct hns_roce_context *ctx, void *dest, ++ __le32 val[HNS_ROCE_WORD_NUM]) + { ++ struct hns_roce_v2_reset_state *state = ctx->reset_state; ++ ++ if (state && state->is_reset) ++ return; ++ + mmio_write64_le(dest, *(__le64 *)val); + } + +diff --git a/providers/hns/hns_roce_u_hw_v2.c b/providers/hns/hns_roce_u_hw_v2.c +index 1d7a304..1855d83 100644 +--- a/providers/hns/hns_roce_u_hw_v2.c ++++ b/providers/hns/hns_roce_u_hw_v2.c +@@ -284,7 +284,8 @@ static void hns_roce_update_rq_db(struct hns_roce_context *ctx, + hr_reg_write(&rq_db, DB_CMD, HNS_ROCE_V2_RQ_DB); + hr_reg_write(&rq_db, DB_PI, rq_head); + +- hns_roce_write64(ctx->uar + ROCEE_VF_DB_CFG0_OFFSET, (__le32 *)&rq_db); ++ hns_roce_write64(ctx, ctx->uar + ROCEE_VF_DB_CFG0_OFFSET, ++ (__le32 *)&rq_db); + } + + static void hns_roce_update_sq_db(struct hns_roce_context *ctx, +@@ -298,7 +299,7 @@ static void hns_roce_update_sq_db(struct hns_roce_context *ctx, + hr_reg_write(&sq_db, DB_PI, qp->sq.head); + hr_reg_write(&sq_db, DB_SL, qp->sl); + +- hns_roce_write64(qp->sq.db_reg, (__le32 *)&sq_db); ++ hns_roce_write64(ctx, qp->sq.db_reg, (__le32 *)&sq_db); + } + + static void hns_roce_write512(uint64_t *dest, uint64_t *val) +@@ -309,6 +310,12 @@ static void hns_roce_write512(uint64_t *dest, uint64_t *val) + static void hns_roce_write_dwqe(struct hns_roce_qp *qp, void *wqe) + { + struct hns_roce_rc_sq_wqe *rc_sq_wqe = wqe; ++ struct ibv_qp *ibvqp = &qp->verbs_qp.qp; ++ struct hns_roce_context *ctx = to_hr_ctx(ibvqp->context); ++ struct hns_roce_v2_reset_state *state = ctx->reset_state; ++ ++ if (state && state->is_reset) ++ return; + + /* All kinds of DirectWQE have the same header field layout */ + hr_reg_enable(rc_sq_wqe, RCWQE_FLAG); +@@ -328,7 +335,8 @@ static void update_cq_db(struct hns_roce_context *ctx, struct hns_roce_cq *cq) + hr_reg_write(&cq_db, DB_CQ_CI, cq->cons_index); + hr_reg_write(&cq_db, DB_CQ_CMD_SN, 1); + +- hns_roce_write64(ctx->uar + ROCEE_VF_DB_CFG0_OFFSET, (__le32 *)&cq_db); ++ hns_roce_write64(ctx, ctx->uar + ROCEE_VF_DB_CFG0_OFFSET, ++ (__le32 *)&cq_db); + } + + static struct hns_roce_qp *hns_roce_v2_find_qp(struct hns_roce_context *ctx, +@@ -762,7 +770,8 @@ static int hns_roce_u_v2_arm_cq(struct ibv_cq *ibvcq, int solicited) + hr_reg_write(&cq_db, DB_CQ_CMD_SN, cq->arm_sn); + hr_reg_write(&cq_db, DB_CQ_NOTIFY, solicited_flag); + +- hns_roce_write64(ctx->uar + ROCEE_VF_DB_CFG0_OFFSET, (__le32 *)&cq_db); ++ hns_roce_write64(ctx, ctx->uar + ROCEE_VF_DB_CFG0_OFFSET, ++ (__le32 *)&cq_db); + + return 0; + } +@@ -1741,7 +1750,7 @@ static void update_srq_db(struct hns_roce_context *ctx, struct hns_roce_db *db, + hr_reg_write(db, DB_CMD, HNS_ROCE_V2_SRQ_DB); + hr_reg_write(db, DB_PI, srq->idx_que.head); + +- hns_roce_write64(ctx->uar + ROCEE_VF_DB_CFG0_OFFSET, ++ hns_roce_write64(ctx, ctx->uar + ROCEE_VF_DB_CFG0_OFFSET, + (__le32 *)db); + } + +-- +2.33.0 + diff --git a/0014-libhns-Support-reporting-wc-as-software-mode.patch b/0014-libhns-Support-reporting-wc-as-software-mode.patch new file mode 100644 index 0000000000000000000000000000000000000000..5734965a4ea8922c1f48df23f26fca0d4b995ba3 --- /dev/null +++ b/0014-libhns-Support-reporting-wc-as-software-mode.patch @@ -0,0 +1,542 @@ +From 3344ba5dc2240ae4ce43b6df2cbef78539c38e0c Mon Sep 17 00:00:00 2001 +From: Chengchang Tang +Date: Tue, 26 Sep 2023 19:19:06 +0800 +Subject: [PATCH 14/18] libhns: Support reporting wc as software mode + +driver inclusion +category: feature +bugzilla: https://gitee.com/src-openeuler/rdma-core/issues/I83BP0 + +---------------------------------------------------------- + +When HW is in resetting stage, we could not poll back all the expected +work completions as the HW won't generate cqe anymore. + +This patch allows driver to compose the expected wc instead of the HW +during resetting stage. Once the hardware finished resetting, we can +poll cq from hardware again. + +Signed-off-by: Chengchang Tang +--- + providers/hns/hns_roce_u.h | 12 ++ + providers/hns/hns_roce_u_hw_v2.c | 217 +++++++++++++++++++++++++++++-- + providers/hns/hns_roce_u_hw_v2.h | 2 + + providers/hns/hns_roce_u_verbs.c | 91 +++++++++++++ + 4 files changed, 310 insertions(+), 12 deletions(-) + +diff --git a/providers/hns/hns_roce_u.h b/providers/hns/hns_roce_u.h +index 49de0f9..5adf6bd 100644 +--- a/providers/hns/hns_roce_u.h ++++ b/providers/hns/hns_roce_u.h +@@ -238,6 +238,8 @@ struct hns_roce_context { + unsigned int cqe_size; + uint32_t config; + unsigned int max_inline_data; ++ ++ bool reseted; + }; + + struct hns_roce_td { +@@ -271,6 +273,11 @@ struct hns_roce_cq { + unsigned int cqe_size; + struct hns_roce_v2_cqe *cqe; + struct ibv_pd *parent_domain; ++ struct list_head list_sq; ++ struct list_head list_rq; ++ struct list_head list_srq; ++ struct list_head list_xrc_srq; ++ struct hns_roce_v2_cqe *sw_cqe; + }; + + struct hns_roce_idx_que { +@@ -307,6 +314,7 @@ struct hns_roce_srq { + unsigned int *rdb; + unsigned int cap_flags; + unsigned short counter; ++ struct list_node xrc_srcq_node; + }; + + struct hns_roce_wq { +@@ -368,6 +376,10 @@ struct hns_roce_qp { + void *cur_wqe; + unsigned int rb_sq_head; /* roll back sq head */ + struct hns_roce_sge_info sge_info; ++ ++ struct list_node rcq_node; ++ struct list_node scq_node; ++ struct list_node srcq_node; + }; + + struct hns_roce_av { +diff --git a/providers/hns/hns_roce_u_hw_v2.c b/providers/hns/hns_roce_u_hw_v2.c +index 1855d83..2119c4c 100644 +--- a/providers/hns/hns_roce_u_hw_v2.c ++++ b/providers/hns/hns_roce_u_hw_v2.c +@@ -723,6 +723,180 @@ static int hns_roce_poll_one(struct hns_roce_context *ctx, + return hns_roce_flush_cqe(*cur_qp, status); + } + ++static void hns_roce_fill_swc(struct hns_roce_cq *cq, struct ibv_wc *wc, ++ uint64_t wr_id, uint32_t qp_num) ++{ ++ if (!wc) { ++ cq->verbs_cq.cq_ex.status = IBV_WC_WR_FLUSH_ERR; ++ cq->verbs_cq.cq_ex.wr_id = wr_id; ++ hr_reg_write(cq->sw_cqe, CQE_LCL_QPN, qp_num); ++ return; ++ } ++ ++ wc->wr_id = wr_id; ++ wc->status = IBV_WC_WR_FLUSH_ERR; ++ wc->vendor_err = 0; ++ wc->qp_num = qp_num; ++} ++ ++static int hns_roce_get_wq_swc(struct hns_roce_cq *cq, struct hns_roce_qp *qp, ++ struct ibv_wc *wc, bool is_sq) ++{ ++ struct hns_roce_wq *wq = is_sq ? &qp->sq : &qp->rq; ++ unsigned int left_wr; ++ uint64_t wr_id; ++ ++ left_wr = wq->head - wq->tail; ++ if (left_wr == 0) { ++ if (is_sq) ++ list_del_init(&qp->scq_node); ++ else ++ list_del_init(&qp->rcq_node); ++ ++ return ENOENT; ++ } ++ ++ wr_id = wq->wrid[wq->tail & (wq->wqe_cnt - 1)]; ++ hns_roce_fill_swc(cq, wc, wr_id, qp->verbs_qp.qp.qp_num); ++ wq->tail++; ++ return V2_CQ_OK; ++} ++ ++static int hns_roce_gen_sq_swc(struct hns_roce_cq *cq, struct ibv_wc *wc) ++{ ++ struct hns_roce_qp *next, *qp = NULL; ++ ++ list_for_each_safe(&cq->list_sq, qp, next, scq_node) { ++ if (hns_roce_get_wq_swc(cq, qp, wc, true) == ENOENT) ++ continue; ++ ++ return V2_CQ_OK; ++ } ++ ++ return wc ? V2_CQ_EMPTY : ENOENT; ++} ++ ++static int hns_roce_gen_rq_swc(struct hns_roce_cq *cq, struct ibv_wc *wc) ++{ ++ struct hns_roce_qp *next, *qp = NULL; ++ ++ list_for_each_safe(&cq->list_rq, qp, next, rcq_node) { ++ if (hns_roce_get_wq_swc(cq, qp, wc, false) == ENOENT) ++ continue; ++ ++ return V2_CQ_OK; ++ } ++ ++ return wc ? V2_CQ_EMPTY : ENOENT; ++} ++ ++static int hns_roce_get_srq_swc(struct hns_roce_cq *cq, struct hns_roce_qp *qp, ++ struct hns_roce_srq *srq, struct ibv_wc *wc) ++{ ++ unsigned int left_wr; ++ uint64_t wr_id; ++ ++ hns_roce_spin_lock(&srq->hr_lock); ++ left_wr = srq->idx_que.head - srq->idx_que.tail; ++ if (left_wr == 0) { ++ if (qp) ++ list_del_init(&qp->srcq_node); ++ else ++ list_del_init(&srq->xrc_srcq_node); ++ ++ hns_roce_spin_unlock(&srq->hr_lock); ++ return ENOENT; ++ } ++ ++ wr_id = srq->wrid[srq->idx_que.tail & (srq->wqe_cnt - 1)]; ++ hns_roce_fill_swc(cq, wc, wr_id, srq->srqn); ++ srq->idx_que.tail++; ++ hns_roce_spin_unlock(&srq->hr_lock); ++ ++ return V2_CQ_OK; ++} ++ ++static int hns_roce_gen_common_srq_swc(struct hns_roce_cq *cq, ++ struct ibv_wc *wc) ++{ ++ struct hns_roce_qp *next, *qp = NULL; ++ struct hns_roce_srq *srq; ++ ++ list_for_each_safe(&cq->list_srq, qp, next, srcq_node) { ++ srq = to_hr_srq(qp->verbs_qp.qp.srq); ++ if (hns_roce_get_srq_swc(cq, qp, srq, wc) == ENOENT) ++ continue; ++ ++ return V2_CQ_OK; ++ } ++ ++ return wc ? V2_CQ_EMPTY : ENOENT; ++} ++ ++static int hns_roce_gen_xrc_srq_swc(struct hns_roce_cq *cq, struct ibv_wc *wc) ++{ ++ struct hns_roce_srq *next, *srq = NULL; ++ ++ list_for_each_safe(&cq->list_xrc_srq, srq, next, xrc_srcq_node) { ++ if (hns_roce_get_srq_swc(cq, NULL, srq, wc) == ENOENT) ++ continue; ++ ++ return V2_CQ_OK; ++ } ++ ++ return wc ? V2_CQ_EMPTY : ENOENT; ++} ++ ++static int hns_roce_gen_srq_swc(struct hns_roce_cq *cq, struct ibv_wc *wc) ++{ ++ int err; ++ ++ err = hns_roce_gen_common_srq_swc(cq, wc); ++ if (err == V2_CQ_OK) ++ return err; ++ ++ return hns_roce_gen_xrc_srq_swc(cq, wc); ++} ++ ++static int hns_roce_poll_one_swc(struct hns_roce_cq *cq, struct ibv_wc *wc) ++{ ++ int err; ++ ++ err = hns_roce_gen_sq_swc(cq, wc); ++ if (err == V2_CQ_OK) ++ return err; ++ ++ err = hns_roce_gen_rq_swc(cq, wc); ++ if (err == V2_CQ_OK) ++ return err; ++ ++ return hns_roce_gen_srq_swc(cq, wc); ++} ++ ++static int hns_roce_poll_swc(struct hns_roce_cq *cq, int ne, struct ibv_wc *wc) ++{ ++ int npolled; ++ int err; ++ ++ for (npolled = 0; npolled < ne; npolled++) { ++ err = hns_roce_poll_one_swc(cq, wc + npolled); ++ if (err == V2_CQ_EMPTY) ++ break; ++ } ++ ++ return npolled; ++} ++ ++static bool hns_roce_reseted(struct hns_roce_context *ctx) ++{ ++ struct hns_roce_v2_reset_state *state = ctx->reset_state; ++ ++ if (state && state->is_reset) ++ ctx->reseted = true; ++ ++ return ctx->reseted; ++} ++ + static int hns_roce_u_v2_poll_cq(struct ibv_cq *ibvcq, int ne, + struct ibv_wc *wc) + { +@@ -734,6 +908,12 @@ static int hns_roce_u_v2_poll_cq(struct ibv_cq *ibvcq, int ne, + + hns_roce_spin_lock(&cq->hr_lock); + ++ if (unlikely(hns_roce_reseted(ctx))) { ++ npolled = hns_roce_poll_swc(cq, ne, wc); ++ hns_roce_spin_unlock(&cq->hr_lock); ++ return npolled; ++ } ++ + for (npolled = 0; npolled < ne; ++npolled) { + err = hns_roce_poll_one(ctx, &qp, cq, wc + npolled); + if (err != V2_CQ_OK) +@@ -1602,11 +1782,8 @@ static int hns_roce_u_v2_modify_qp(struct ibv_qp *qp, struct ibv_qp_attr *attr, + return ret; + } + +-static void hns_roce_lock_cqs(struct ibv_qp *qp) ++void hns_roce_lock_cqs(struct hns_roce_cq *send_cq, struct hns_roce_cq *recv_cq) + { +- struct hns_roce_cq *send_cq = to_hr_cq(qp->send_cq); +- struct hns_roce_cq *recv_cq = to_hr_cq(qp->recv_cq); +- + if (send_cq && recv_cq) { + if (send_cq == recv_cq) { + hns_roce_spin_lock(&send_cq->hr_lock); +@@ -1624,11 +1801,8 @@ static void hns_roce_lock_cqs(struct ibv_qp *qp) + } + } + +-static void hns_roce_unlock_cqs(struct ibv_qp *qp) ++void hns_roce_unlock_cqs(struct hns_roce_cq *send_cq, struct hns_roce_cq *recv_cq) + { +- struct hns_roce_cq *send_cq = to_hr_cq(qp->send_cq); +- struct hns_roce_cq *recv_cq = to_hr_cq(qp->recv_cq); +- + if (send_cq && recv_cq) { + if (send_cq == recv_cq) { + hns_roce_spin_unlock(&send_cq->hr_lock); +@@ -1662,17 +1836,22 @@ static int hns_roce_u_v2_destroy_qp(struct ibv_qp *ibqp) + + hns_roce_v2_clear_qp(ctx, qp); + +- hns_roce_lock_cqs(ibqp); ++ hns_roce_lock_cqs(to_hr_cq(ibqp->send_cq), to_hr_cq(ibqp->recv_cq)); + +- if (ibqp->recv_cq) ++ if (ibqp->recv_cq) { + __hns_roce_v2_cq_clean(to_hr_cq(ibqp->recv_cq), ibqp->qp_num, + ibqp->srq ? to_hr_srq(ibqp->srq) : NULL); ++ list_del(&qp->srcq_node); ++ list_del(&qp->rcq_node); ++ } + +- if (ibqp->send_cq && ibqp->send_cq != ibqp->recv_cq) ++ if (ibqp->send_cq && ibqp->send_cq != ibqp->recv_cq) { + __hns_roce_v2_cq_clean(to_hr_cq(ibqp->send_cq), ibqp->qp_num, + NULL); ++ list_del(&qp->scq_node); ++ } + +- hns_roce_unlock_cqs(ibqp); ++ hns_roce_unlock_cqs(to_hr_cq(ibqp->send_cq), to_hr_cq(ibqp->recv_cq)); + + hns_roce_free_qp_buf(qp, ctx); + +@@ -1822,7 +2001,14 @@ static int wc_start_poll_cq(struct ibv_cq_ex *current, + + hns_roce_spin_lock(&cq->hr_lock); + ++ if (unlikely(hns_roce_reseted(ctx))) { ++ err = hns_roce_poll_one_swc(cq, NULL); ++ goto start_poll_done; ++ } ++ + err = hns_roce_poll_one(ctx, &qp, cq, NULL); ++ ++start_poll_done: + if (err != V2_CQ_OK) + hns_roce_spin_unlock(&cq->hr_lock); + +@@ -1836,6 +2022,9 @@ static int wc_next_poll_cq(struct ibv_cq_ex *current) + struct hns_roce_qp *qp = NULL; + int err; + ++ if (unlikely(hns_roce_reseted(ctx))) ++ return hns_roce_poll_one_swc(cq, NULL); ++ + err = hns_roce_poll_one(ctx, &qp, cq, NULL); + if (err != V2_CQ_OK) + return err; +@@ -1853,11 +2042,15 @@ static void wc_end_poll_cq(struct ibv_cq_ex *current) + struct hns_roce_cq *cq = to_hr_cq(ibv_cq_ex_to_cq(current)); + struct hns_roce_context *ctx = to_hr_ctx(current->context); + ++ if (unlikely(hns_roce_reseted(ctx))) ++ goto end_poll_done; ++ + if (cq->flags & HNS_ROCE_CQ_FLAG_RECORD_DB) + *cq->db = cq->cons_index & RECORD_DB_CI_MASK; + else + update_cq_db(ctx, cq); + ++end_poll_done: + hns_roce_spin_unlock(&cq->hr_lock); + } + +diff --git a/providers/hns/hns_roce_u_hw_v2.h b/providers/hns/hns_roce_u_hw_v2.h +index abf9467..1a7b828 100644 +--- a/providers/hns/hns_roce_u_hw_v2.h ++++ b/providers/hns/hns_roce_u_hw_v2.h +@@ -344,5 +344,7 @@ void hns_roce_v2_clear_qp(struct hns_roce_context *ctx, struct hns_roce_qp *qp); + void hns_roce_attach_cq_ex_ops(struct ibv_cq_ex *cq_ex, uint64_t wc_flags); + int hns_roce_attach_qp_ex_ops(struct ibv_qp_init_attr_ex *attr, + struct hns_roce_qp *qp); ++void hns_roce_lock_cqs(struct hns_roce_cq *send_cq, struct hns_roce_cq *recv_cq); ++void hns_roce_unlock_cqs(struct hns_roce_cq *send_cq, struct hns_roce_cq *recv_cq); + + #endif /* _HNS_ROCE_U_HW_V2_H */ +diff --git a/providers/hns/hns_roce_u_verbs.c b/providers/hns/hns_roce_u_verbs.c +index fc255ed..3f23715 100644 +--- a/providers/hns/hns_roce_u_verbs.c ++++ b/providers/hns/hns_roce_u_verbs.c +@@ -513,6 +513,32 @@ static int exec_cq_create_cmd(struct ibv_context *context, + return 0; + } + ++static int hns_roce_init_cq_swc(struct hns_roce_cq *cq, ++ struct ibv_cq_init_attr_ex *attr) ++{ ++ list_head_init(&cq->list_sq); ++ list_head_init(&cq->list_rq); ++ list_head_init(&cq->list_srq); ++ list_head_init(&cq->list_xrc_srq); ++ ++ if (!(attr->wc_flags & CREATE_CQ_SUPPORTED_WC_FLAGS)) ++ return 0; ++ ++ cq->sw_cqe = calloc(1, sizeof(struct hns_roce_v2_cqe)); ++ if (!cq->sw_cqe) ++ return -ENOMEM; ++ ++ return 0; ++} ++ ++static void hns_roce_uninit_cq_swc(struct hns_roce_cq *cq) ++{ ++ if (cq->sw_cqe) { ++ free(cq->sw_cqe); ++ cq->sw_cqe = NULL; ++ } ++} ++ + static struct ibv_cq_ex *create_cq(struct ibv_context *context, + struct ibv_cq_init_attr_ex *attr) + { +@@ -552,6 +578,10 @@ static struct ibv_cq_ex *create_cq(struct ibv_context *context, + + *cq->db = 0; + ++ ret = hns_roce_init_cq_swc(cq, attr); ++ if (ret) ++ goto err_swc; ++ + ret = exec_cq_create_cmd(context, cq, attr); + if (ret) + goto err_cmd; +@@ -561,6 +591,8 @@ static struct ibv_cq_ex *create_cq(struct ibv_context *context, + return &cq->verbs_cq.cq_ex; + + err_cmd: ++ hns_roce_uninit_cq_swc(cq); ++err_swc: + hns_roce_free_db(hr_ctx, cq->db, HNS_ROCE_CQ_TYPE_DB); + err_db: + hns_roce_free_buf(&cq->buf); +@@ -625,6 +657,8 @@ int hns_roce_u_destroy_cq(struct ibv_cq *cq) + if (ret) + return ret; + ++ hns_roce_uninit_cq_swc(to_hr_cq(cq)); ++ + hns_roce_free_db(to_hr_ctx(cq->context), hr_cq->db, HNS_ROCE_CQ_TYPE_DB); + hns_roce_free_buf(&hr_cq->buf); + +@@ -839,6 +873,22 @@ static int exec_srq_create_cmd(struct ibv_context *context, + return 0; + } + ++static void init_srq_cq_list(struct hns_roce_srq *srq, ++ struct ibv_srq_init_attr_ex *init_attr) ++{ ++ struct hns_roce_cq *srq_cq; ++ ++ list_node_init(&srq->xrc_srcq_node); ++ ++ if (!init_attr->cq) ++ return; ++ ++ srq_cq = to_hr_cq(init_attr->cq); ++ hns_roce_spin_lock(&srq_cq->hr_lock); ++ list_add_tail(&srq_cq->list_xrc_srq, &srq->xrc_srcq_node); ++ hns_roce_spin_unlock(&srq_cq->hr_lock); ++} ++ + static struct ibv_srq *create_srq(struct ibv_context *context, + struct ibv_srq_init_attr_ex *init_attr) + { +@@ -885,6 +935,8 @@ static struct ibv_srq *create_srq(struct ibv_context *context, + init_attr->attr.max_sge = + min(init_attr->attr.max_sge - srq->rsv_sge, hr_ctx->max_srq_sge); + ++ init_srq_cq_list(srq, init_attr); ++ + return &srq->verbs_srq.srq; + + err_destroy_srq: +@@ -960,6 +1012,18 @@ int hns_roce_u_query_srq(struct ibv_srq *srq, struct ibv_srq_attr *srq_attr) + return ret; + } + ++static void del_srq_from_cq_list(struct hns_roce_srq *srq) ++{ ++ struct hns_roce_cq *srq_cq = to_hr_cq(srq->verbs_srq.cq); ++ ++ if (!srq_cq) ++ return; ++ ++ hns_roce_spin_lock(&srq_cq->hr_lock); ++ list_del(&srq->xrc_srcq_node); ++ hns_roce_spin_unlock(&srq_cq->hr_lock); ++} ++ + int hns_roce_u_destroy_srq(struct ibv_srq *ibv_srq) + { + struct hns_roce_context *ctx = to_hr_ctx(ibv_srq->context); +@@ -967,6 +1031,8 @@ int hns_roce_u_destroy_srq(struct ibv_srq *ibv_srq) + struct hns_roce_srq *srq = to_hr_srq(ibv_srq); + int ret; + ++ del_srq_from_cq_list(srq); ++ + ret = ibv_cmd_destroy_srq(ibv_srq); + if (ret) + return ret; +@@ -1600,6 +1666,30 @@ static int mmap_dwqe(struct ibv_context *ibv_ctx, struct hns_roce_qp *qp, + return 0; + } + ++static void add_qp_to_cq_list(struct ibv_qp_init_attr_ex *attr, ++ struct hns_roce_qp *qp) ++{ ++ struct hns_roce_cq *send_cq, *recv_cq; ++ ++ send_cq = attr->send_cq ? to_hr_cq(attr->send_cq) : NULL; ++ recv_cq = attr->recv_cq ? to_hr_cq(attr->recv_cq) : NULL; ++ ++ list_node_init(&qp->scq_node); ++ list_node_init(&qp->rcq_node); ++ list_node_init(&qp->srcq_node); ++ ++ hns_roce_lock_cqs(send_cq, recv_cq); ++ if (send_cq) ++ list_add_tail(&send_cq->list_sq, &qp->scq_node); ++ if (recv_cq) { ++ if (attr->srq) ++ list_add_tail(&recv_cq->list_srq, &qp->srcq_node); ++ else ++ list_add_tail(&recv_cq->list_rq, &qp->rcq_node); ++ } ++ hns_roce_unlock_cqs(send_cq, recv_cq); ++} ++ + static struct ibv_qp *create_qp(struct ibv_context *ibv_ctx, + struct ibv_qp_init_attr_ex *attr, + struct hnsdv_qp_init_attr *hns_attr) +@@ -1652,6 +1742,7 @@ static struct ibv_qp *create_qp(struct ibv_context *ibv_ctx, + } + + qp_setup_config(attr, qp, context); ++ add_qp_to_cq_list(attr, qp); + + return &qp->verbs_qp.qp; + +-- +2.33.0 + diff --git a/0015-libhns-return-error-when-post-send-in-reset-state.patch b/0015-libhns-return-error-when-post-send-in-reset-state.patch new file mode 100644 index 0000000000000000000000000000000000000000..7b5067bad478ff3c8cd4a35ad90a5df5a5dcaca8 --- /dev/null +++ b/0015-libhns-return-error-when-post-send-in-reset-state.patch @@ -0,0 +1,155 @@ +From 0b33b387d5b806804ae9278d3911289f8619dfd2 Mon Sep 17 00:00:00 2001 +From: Chengchang Tang +Date: Tue, 26 Sep 2023 19:19:07 +0800 +Subject: [PATCH 15/18] libhns: return error when post send in reset state + +driver inclusion +category: bugfix +bugzilla: https://gitee.com/openeuler/kernel/issues/I98HQV + +-------------------------------------------------------------------------- + +If the device has been resetted, the original business will not be able +to continue. The current design is to allow users to continue issuing IO. +Such a design is meaningless, the user should perceive the exception and +restore the business as soon as possible. + +The current kernel mode directly returns an error when device has been +resetted, and this patch can unify the behavior of the kernel mode and +user mode. + +Signed-off-by: Chengchang Tang +--- + providers/hns/hns_roce_u_hw_v2.c | 66 ++++++++++++++++++++++++-------- + 1 file changed, 51 insertions(+), 15 deletions(-) + +diff --git a/providers/hns/hns_roce_u_hw_v2.c b/providers/hns/hns_roce_u_hw_v2.c +index 2119c4c..fe22b43 100644 +--- a/providers/hns/hns_roce_u_hw_v2.c ++++ b/providers/hns/hns_roce_u_hw_v2.c +@@ -956,14 +956,24 @@ static int hns_roce_u_v2_arm_cq(struct ibv_cq *ibvcq, int solicited) + return 0; + } + +-static inline int check_qp_send(struct ibv_qp *qp) ++static int check_qp_send(struct hns_roce_qp *qp, struct hns_roce_context *ctx) + { +- if (unlikely(qp->state == IBV_QPS_RESET || +- qp->state == IBV_QPS_INIT || +- qp->state == IBV_QPS_RTR)) ++ struct ibv_qp *ibvqp = &qp->verbs_qp.qp; ++ int ret = 0; ++ ++ if (unlikely(ibvqp->state == IBV_QPS_RESET || ++ ibvqp->state == IBV_QPS_INIT || ++ ibvqp->state == IBV_QPS_RTR)){ ++ verbs_err(verbs_get_ctx(qp->verbs_qp.qp.context), ++ "unsupported qp state, state = %d.\n", ibvqp->state); + return EINVAL; ++ } else if (unlikely(hns_roce_reseted(ctx))) { ++ verbs_err_datapath(&ctx->ibv_ctx, ++ "failed to send, device has been reseted!\n"); ++ return EIO; ++ } + +- return 0; ++ return ret; + } + + static void set_rc_sge(struct hns_roce_v2_wqe_data_seg *dseg, +@@ -1453,7 +1463,7 @@ int hns_roce_u_v2_post_send(struct ibv_qp *ibvqp, struct ibv_send_wr *wr, + unsigned int wqe_idx, nreq; + int ret; + +- ret = check_qp_send(ibvqp); ++ ret = check_qp_send(qp, ctx); + if (unlikely(ret)) { + *bad_wr = wr; + return ret; +@@ -1531,12 +1541,22 @@ out: + return ret; + } + +-static inline int check_qp_recv(struct ibv_qp *qp) ++static int check_qp_recv(struct hns_roce_qp *qp, struct hns_roce_context *ctx) + { +- if (qp->state == IBV_QPS_RESET) ++ struct ibv_qp *ibvqp = &qp->verbs_qp.qp; ++ int ret = 0; ++ ++ if (ibvqp->state == IBV_QPS_RESET) { ++ verbs_err(verbs_get_ctx(qp->verbs_qp.qp.context), ++ "unsupported qp state, state = %d.\n", ibvqp->state); + return EINVAL; ++ } else if (unlikely(hns_roce_reseted(ctx))) { ++ verbs_err_datapath(&ctx->ibv_ctx, ++ "fail to recv, device has been reseted!\n"); ++ return EIO; ++ } + +- return 0; ++ return ret; + } + + static void fill_recv_sge_to_wqe(struct ibv_recv_wr *wr, void *wqe, +@@ -1603,7 +1623,7 @@ static int hns_roce_u_v2_post_recv(struct ibv_qp *ibvqp, struct ibv_recv_wr *wr, + struct ibv_qp_attr attr = {}; + int ret; + +- ret = check_qp_recv(ibvqp); ++ ret = check_qp_recv(qp, ctx); + if (unlikely(ret)) { + *bad_wr = wr; + return ret; +@@ -1933,6 +1953,16 @@ static void update_srq_db(struct hns_roce_context *ctx, struct hns_roce_db *db, + (__le32 *)db); + } + ++static int check_srq_recv(struct hns_roce_context *ctx) ++{ ++ if (hns_roce_reseted(ctx)) { ++ verbs_err_datapath(&ctx->ibv_ctx, ++ "srq failed to recv, device has been reseted!\n"); ++ return EIO; ++ } ++ return 0; ++} ++ + static int hns_roce_u_v2_post_srq_recv(struct ibv_srq *ib_srq, + struct ibv_recv_wr *wr, + struct ibv_recv_wr **bad_wr) +@@ -1944,6 +1974,12 @@ static int hns_roce_u_v2_post_srq_recv(struct ibv_srq *ib_srq, + int ret = 0; + void *wqe; + ++ ret = check_srq_recv(ctx); ++ if (ret) { ++ *bad_wr = wr; ++ return ret; ++ } ++ + hns_roce_spin_lock(&srq->hr_lock); + + max_sge = srq->max_gs - srq->rsv_sge; +@@ -2751,13 +2787,13 @@ static void wr_set_inline_data_list_ud(struct ibv_qp_ex *ibv_qp, size_t num_buf, + + static void wr_start(struct ibv_qp_ex *ibv_qp) + { ++ struct hns_roce_context *ctx = to_hr_ctx(ibv_qp->qp_base.context); + struct hns_roce_qp *qp = to_hr_qp(&ibv_qp->qp_base); +- enum ibv_qp_state state = ibv_qp->qp_base.state; ++ int ret; + +- if (state == IBV_QPS_RESET || +- state == IBV_QPS_INIT || +- state == IBV_QPS_RTR) { +- qp->err = EINVAL; ++ ret = check_qp_send(qp, ctx); ++ if (ret) { ++ qp->err = ret; + return; + } + +-- +2.33.0 + diff --git a/0016-libhns-Assign-doorbell-to-zero-when-allocate-it.patch b/0016-libhns-Assign-doorbell-to-zero-when-allocate-it.patch new file mode 100644 index 0000000000000000000000000000000000000000..dcb3ea88920e3feb2293731b1e5fa1b747ed40d5 --- /dev/null +++ b/0016-libhns-Assign-doorbell-to-zero-when-allocate-it.patch @@ -0,0 +1,83 @@ +From 62e56376912213cab92a4378a719d037fef61cd4 Mon Sep 17 00:00:00 2001 +From: Chengchang Tang +Date: Thu, 18 Apr 2024 13:49:32 +0800 +Subject: [PATCH] libhns: Assign doorbell to zero when allocate it + +mainline inclusion +from mainline-master +commit 2af6b0f3262c432f35cb6a92de50c4e93b63b6af +category: feature +bugzilla: https://gitee.com/openeuler/kernel/issues/I9NZME +CVE: NA + +Reference: https://github.com/linux-rdma/rdma-core/pull/1450/commits/2af6b0f3262c432f35cb6a92de50c4e93b63b6af + +---------------------------------------------------------------------- + +Clear the doorbell when getting it to avoid clearing it in each +function that uses hns_roce_alloc_db() + +Signed-off-by: Chengchang Tang +Signed-off-by: Junxian Huang +Signed-off-by: Juan Zhou +--- + providers/hns/hns_roce_u_db.c | 2 ++ + providers/hns/hns_roce_u_verbs.c | 8 -------- + 2 files changed, 2 insertions(+), 8 deletions(-) + +diff --git a/providers/hns/hns_roce_u_db.c b/providers/hns/hns_roce_u_db.c +index 0314254..bbef988 100644 +--- a/providers/hns/hns_roce_u_db.c ++++ b/providers/hns/hns_roce_u_db.c +@@ -116,6 +116,8 @@ found: + + out: + pthread_mutex_unlock((pthread_mutex_t *)&ctx->db_list_mutex); ++ if (db) ++ *((unsigned int *)db) = 0; + + return db; + } +diff --git a/providers/hns/hns_roce_u_verbs.c b/providers/hns/hns_roce_u_verbs.c +index 3f23715..69bcc13 100644 +--- a/providers/hns/hns_roce_u_verbs.c ++++ b/providers/hns/hns_roce_u_verbs.c +@@ -576,8 +576,6 @@ static struct ibv_cq_ex *create_cq(struct ibv_context *context, + goto err_db; + } + +- *cq->db = 0; +- + ret = hns_roce_init_cq_swc(cq, attr); + if (ret) + goto err_swc; +@@ -921,8 +919,6 @@ static struct ibv_srq *create_srq(struct ibv_context *context, + if (!srq->rdb) + goto err_srq_buf; + +- *srq->rdb = 0; +- + ret = exec_srq_create_cmd(context, srq, init_attr); + if (ret) + goto err_srq_db; +@@ -1505,8 +1501,6 @@ static int qp_alloc_db(struct ibv_qp_init_attr_ex *attr, struct hns_roce_qp *qp, + qp->sdb = hns_roce_alloc_db(ctx, HNS_ROCE_QP_TYPE_DB); + if (!qp->sdb) + return -ENOMEM; +- +- *qp->sdb = 0; + } + + if (attr->cap.max_recv_sge) { +@@ -1518,8 +1512,6 @@ static int qp_alloc_db(struct ibv_qp_init_attr_ex *attr, struct hns_roce_qp *qp, + + return -ENOMEM; + } +- +- *qp->rdb = 0; + } + + return 0; +-- +2.33.0 + diff --git a/0017-libhns-Fix-missing-reset-notification.patch b/0017-libhns-Fix-missing-reset-notification.patch new file mode 100644 index 0000000000000000000000000000000000000000..0b3e2c6512b1dbf03dc942679338215d200232fb --- /dev/null +++ b/0017-libhns-Fix-missing-reset-notification.patch @@ -0,0 +1,92 @@ +From 8b922418b18fefe2a60e122374b3bc8096672661 Mon Sep 17 00:00:00 2001 +From: Chengchang Tang +Date: Tue, 26 Sep 2023 19:19:10 +0800 +Subject: [PATCH 17/18] libhns: Fix missing reset notification. + +driver inclusion +category: feature +bugzilla: https://gitee.com/openeuler/kernel/issues/I98HQV + +-------------------------------------------------------------------------- + +Currently, userspace driver get the reset notification by reading a +a shared variable which would be set to non-zero during reset. However, +if the user does not call driver's IO interface during reset, the reset +notification will be ignored. because this variable will be clear after +completes the reset. + +This patch use a new reset flag to get whether the driver has been reset +at any time. A non-zero value will be assigned to this new reset +flag by default, which will permanently become 0 once a reset occurs. +During reset, the kernel space driver will assign 0 to this variable. +After reset, this variable will be remapped to a page of all zeros. The +userspace driver can judge whether the driver has been reset by whether +this variable is 0. + +Fixes: 34f2ad8085c2 ("libhns: Add reset stop flow mechanism") +Signed-off-by: Chengchang Tang +--- + providers/hns/hns_roce_u.c | 4 ++++ + providers/hns/hns_roce_u.h | 2 ++ + providers/hns/hns_roce_u_hw_v2.c | 3 +++ + 3 files changed, 9 insertions(+) + +diff --git a/providers/hns/hns_roce_u.c b/providers/hns/hns_roce_u.c +index 0e4f4c1..810b650 100644 +--- a/providers/hns/hns_roce_u.c ++++ b/providers/hns/hns_roce_u.c +@@ -105,6 +105,7 @@ static int init_reset_context(struct hns_roce_context *ctx, int cmd_fd, + int page_size) + { + uint64_t reset_mmap_key = resp->reset_mmap_key; ++ struct hns_roce_v2_reset_state *state; + + /* The reset mmap key is 0, which means it is not supported. */ + if (reset_mmap_key == 0) +@@ -115,6 +116,9 @@ static int init_reset_context(struct hns_roce_context *ctx, int cmd_fd, + if (ctx->reset_state == MAP_FAILED) + return -ENOMEM; + ++ state = ctx->reset_state; ++ ctx->use_new_reset_flag = state->hw_ready; ++ + return 0; + } + +diff --git a/providers/hns/hns_roce_u.h b/providers/hns/hns_roce_u.h +index 5adf6bd..024932a 100644 +--- a/providers/hns/hns_roce_u.h ++++ b/providers/hns/hns_roce_u.h +@@ -203,6 +203,7 @@ struct hns_roce_spinlock { + + struct hns_roce_v2_reset_state { + uint32_t is_reset; ++ uint32_t hw_ready; + }; + + struct hns_roce_context { +@@ -239,6 +240,7 @@ struct hns_roce_context { + uint32_t config; + unsigned int max_inline_data; + ++ bool use_new_reset_flag; + bool reseted; + }; + +diff --git a/providers/hns/hns_roce_u_hw_v2.c b/providers/hns/hns_roce_u_hw_v2.c +index fe22b43..a0dce1c 100644 +--- a/providers/hns/hns_roce_u_hw_v2.c ++++ b/providers/hns/hns_roce_u_hw_v2.c +@@ -891,6 +891,9 @@ static bool hns_roce_reseted(struct hns_roce_context *ctx) + { + struct hns_roce_v2_reset_state *state = ctx->reset_state; + ++ if (ctx->use_new_reset_flag) ++ return !state->hw_ready; ++ + if (state && state->is_reset) + ctx->reseted = true; + +-- +2.33.0 + diff --git a/0018-libhns-Fix-owner-bit-when-SQ-wraps-around-in-new-IO.patch b/0018-libhns-Fix-owner-bit-when-SQ-wraps-around-in-new-IO.patch new file mode 100644 index 0000000000000000000000000000000000000000..cefb81142f851cdbb7f5144a2ba3758e47f36ba7 --- /dev/null +++ b/0018-libhns-Fix-owner-bit-when-SQ-wraps-around-in-new-IO.patch @@ -0,0 +1,103 @@ +From 26cd3b3f19a019cf0bc17915af179de6193fe56c Mon Sep 17 00:00:00 2001 +From: Chengchang Tang +Date: Thu, 18 Apr 2024 13:49:33 +0800 +Subject: [PATCH] libhns: Fix owner bit when SQ wraps around in new IO + +mainline inclusion +from mainline-master +commit 0067aad0a3a9a46d6c150e089b30bc9246dfe663 +category: bugfix +bugzilla: https://gitee.com/openeuler/kernel/issues/I9NZME +CVE: NA + +Reference: https://github.com/linux-rdma/rdma-core/pull/1450/commits/0067aad0a3a9a46d6c150e089b30bc9246dfe663 + +---------------------------------------------------------------------- + +Commit c292b7809f38 ("libhns: Fix the owner bit error of sq in new io") +fixed a bug that the SQ head was updated before the owner bit was filled +in WQE, but only when using ibv_wr_set_sge(). Actually this bug still +exists in other ibv_wr_set_*(). + +For example, in the flow below, the driver will fill the owner bit in +ibv_wr_rdma_write(), but mistakenly overwrite it again in +ibv_wr_set_sge_list() or ibv_wr_set_inline_data_list(). + +```c +ibv_wr_start(); +ibv_wr_rdma_write(); +if (inline) + ibv_wr_set_inline_data_list(); +else + ibv_wr_set_sge_list(); +ibv_wr_complete(); +``` + +When the SQ wraps around, the overwritten value will be incorrect. +Remove all the incorrect owner bit filling in ibv_wr_set_*(). + +Fixes: 36446a56eea5 ("libhns: Extended QP supports the new post send mechanism") +Fixes: c292b7809f38 ("libhns: Fix the owner bit error of sq in new io") +Signed-off-by: Chengchang Tang +Signed-off-by: Junxian Huang +Signed-off-by: Juan Zhou +--- + providers/hns/hns_roce_u_hw_v2.c | 7 ------- + 1 file changed, 7 deletions(-) + +diff --git a/providers/hns/hns_roce_u_hw_v2.c b/providers/hns/hns_roce_u_hw_v2.c +index a0dce1c..9016978 100644 +--- a/providers/hns/hns_roce_u_hw_v2.c ++++ b/providers/hns/hns_roce_u_hw_v2.c +@@ -2353,8 +2353,6 @@ static void wr_set_sge_list_rc(struct ibv_qp_ex *ibv_qp, size_t num_sge, + + wqe->msg_len = htole32(qp->sge_info.total_len); + hr_reg_write(wqe, RCWQE_SGE_NUM, qp->sge_info.valid_num); +- +- enable_wqe(qp, wqe, qp->sq.head); + } + + static void wr_send_rc(struct ibv_qp_ex *ibv_qp) +@@ -2546,7 +2544,6 @@ static void wr_set_inline_data_rc(struct ibv_qp_ex *ibv_qp, void *addr, + + qp->sge_info.total_len = length; + set_inline_data_list_rc(qp, wqe, 1, &buff); +- enable_wqe(qp, wqe, qp->sq.head); + } + + static void wr_set_inline_data_list_rc(struct ibv_qp_ex *ibv_qp, size_t num_buf, +@@ -2564,7 +2561,6 @@ static void wr_set_inline_data_list_rc(struct ibv_qp_ex *ibv_qp, size_t num_buf, + qp->sge_info.total_len += buf_list[i].length; + + set_inline_data_list_rc(qp, wqe, num_buf, buf_list); +- enable_wqe(qp, wqe, qp->sq.head); + } + + static struct hns_roce_ud_sq_wqe * +@@ -2701,7 +2697,6 @@ static void wr_set_sge_list_ud(struct ibv_qp_ex *ibv_qp, size_t num_sge, + hr_reg_write(wqe, UDWQE_SGE_NUM, cnt); + + qp->sge_info.start_idx += cnt; +- enable_wqe(qp, wqe, qp->sq.head); + } + + static void set_inline_data_list_ud(struct hns_roce_qp *qp, +@@ -2767,7 +2762,6 @@ static void wr_set_inline_data_ud(struct ibv_qp_ex *ibv_qp, void *addr, + + qp->sge_info.total_len = length; + set_inline_data_list_ud(qp, wqe, 1, &buff); +- enable_wqe(qp, wqe, qp->sq.head); + } + + static void wr_set_inline_data_list_ud(struct ibv_qp_ex *ibv_qp, size_t num_buf, +@@ -2785,7 +2779,6 @@ static void wr_set_inline_data_list_ud(struct ibv_qp_ex *ibv_qp, size_t num_buf, + qp->sge_info.total_len += buf_list[i].length; + + set_inline_data_list_ud(qp, wqe, num_buf, buf_list); +- enable_wqe(qp, wqe, qp->sq.head); + } + + static void wr_start(struct ibv_qp_ex *ibv_qp) +-- +2.33.0 + diff --git a/0019-Update-kernel-headers.patch b/0019-Update-kernel-headers.patch new file mode 100644 index 0000000000000000000000000000000000000000..a2f881e23632439d13351f6f239c5bd4e4d0eb46 --- /dev/null +++ b/0019-Update-kernel-headers.patch @@ -0,0 +1,145 @@ +From 12067eedd348988f882f707555239d692f6c13c4 Mon Sep 17 00:00:00 2001 +From: Chengchang Tang +Date: Mon, 28 Nov 2022 21:52:20 +0800 +Subject: [PATCH 19/25] Update kernel headers + +driver inclusion +category: feature +bugzilla: https://gitee.com/src-openeuler/rdma-core/issues/I9C2AQ + +------------------------------------------------------------------ + +To commit ?? ("RDMA/hns: Fixes concurrent ressetting and post_recv in DCA +mode"). + +Signed-off-by: Chengchang Tang +Reviewed-by: Yangyang Li +--- + kernel-headers/rdma/hns-abi.h | 73 ++++++++++++++++++++++++++++++++++- + 1 file changed, 72 insertions(+), 1 deletion(-) + +diff --git a/kernel-headers/rdma/hns-abi.h b/kernel-headers/rdma/hns-abi.h +index 1d51612..8a8f2e4 100644 +--- a/kernel-headers/rdma/hns-abi.h ++++ b/kernel-headers/rdma/hns-abi.h +@@ -102,7 +102,9 @@ enum hns_roce_qp_cap_flags { + HNS_ROCE_QP_CAP_RQ_RECORD_DB = 1 << 0, + HNS_ROCE_QP_CAP_SQ_RECORD_DB = 1 << 1, + HNS_ROCE_QP_CAP_OWNER_DB = 1 << 2, ++ HNS_ROCE_QP_CAP_DYNAMIC_CTX_ATTACH = 1 << 4, + HNS_ROCE_QP_CAP_DIRECT_WQE = 1 << 5, ++ HNS_ROCE_QP_CAP_DYNAMIC_CTX_DETACH = 1 << 6, + }; + + struct hns_roce_ib_create_qp_resp { +@@ -114,12 +116,15 @@ struct hns_roce_ib_modify_qp_resp { + __u8 tc_mode; + __u8 priority; + __u8 reserved[6]; ++ __u32 dcan; ++ __u32 rsv2; + }; + + enum { + HNS_ROCE_EXSGE_FLAGS = 1 << 0, + HNS_ROCE_RQ_INLINE_FLAGS = 1 << 1, + HNS_ROCE_CQE_INLINE_FLAGS = 1 << 2, ++ HNS_ROCE_UCTX_CONFIG_DCA = 1 << 3, + HNS_ROCE_UCTX_DYN_QP_PGSZ = 1 << 4, + }; + +@@ -127,6 +132,7 @@ enum { + HNS_ROCE_RSP_EXSGE_FLAGS = 1 << 0, + HNS_ROCE_RSP_RQ_INLINE_FLAGS = 1 << 1, + HNS_ROCE_RSP_CQE_INLINE_FLAGS = 1 << 2, ++ HNS_ROCE_UCTX_RSP_DCA_FLAGS = HNS_ROCE_UCTX_CONFIG_DCA, + HNS_ROCE_UCTX_RSP_DYN_QP_PGSZ = HNS_ROCE_UCTX_DYN_QP_PGSZ, + }; + +@@ -139,12 +145,20 @@ struct hns_roce_ib_alloc_ucontext_resp { + __u32 max_inline_data; + __u8 congest_type; + __u8 reserved0[7]; +- __aligned_u64 rsv_for_dca[2]; ++ __u32 dca_qps; ++ __u32 dca_mmap_size; ++ __aligned_u64 dca_mmap_key; + __aligned_u64 reset_mmap_key; + }; + ++enum hns_roce_uctx_comp_mask { ++ HNS_ROCE_ALLOC_UCTX_COMP_DCA_MAX_QPS = 1 << 0, ++}; ++ + struct hns_roce_ib_alloc_ucontext { + __u32 config; ++ __u32 comp; /* use hns_roce_uctx_comp_mask */ ++ __u32 dca_max_qps; + __u32 reserved; + }; + +@@ -158,4 +172,61 @@ struct hns_roce_ib_create_ah_resp { + __u8 tc_mode; + }; + ++#define UVERBS_ID_NS_MASK 0xF000 ++#define UVERBS_ID_NS_SHIFT 12 ++ ++enum hns_ib_objects { ++ HNS_IB_OBJECT_DCA_MEM = (1U << UVERBS_ID_NS_SHIFT), ++}; ++ ++enum hns_ib_dca_mem_methods { ++ HNS_IB_METHOD_DCA_MEM_REG = (1U << UVERBS_ID_NS_SHIFT), ++ HNS_IB_METHOD_DCA_MEM_DEREG, ++ HNS_IB_METHOD_DCA_MEM_SHRINK, ++ HNS_IB_METHOD_DCA_MEM_ATTACH, ++ HNS_IB_METHOD_DCA_MEM_DETACH, ++ HNS_IB_METHOD_DCA_MEM_QUERY, ++}; ++ ++enum hns_ib_dca_mem_reg_attrs { ++ HNS_IB_ATTR_DCA_MEM_REG_HANDLE = (1U << UVERBS_ID_NS_SHIFT), ++ HNS_IB_ATTR_DCA_MEM_REG_FLAGS, ++ HNS_IB_ATTR_DCA_MEM_REG_LEN, ++ HNS_IB_ATTR_DCA_MEM_REG_ADDR, ++ HNS_IB_ATTR_DCA_MEM_REG_KEY, ++}; ++ ++enum hns_ib_dca_mem_dereg_attrs { ++ HNS_IB_ATTR_DCA_MEM_DEREG_HANDLE = (1U << UVERBS_ID_NS_SHIFT), ++}; ++ ++enum hns_ib_dca_mem_shrink_attrs { ++ HNS_IB_ATTR_DCA_MEM_SHRINK_HANDLE = (1U << UVERBS_ID_NS_SHIFT), ++ HNS_IB_ATTR_DCA_MEM_SHRINK_RESERVED_SIZE, ++ HNS_IB_ATTR_DCA_MEM_SHRINK_OUT_FREE_KEY, ++ HNS_IB_ATTR_DCA_MEM_SHRINK_OUT_FREE_MEMS, ++}; ++ ++enum hns_ib_dca_mem_attach_attrs { ++ HNS_IB_ATTR_DCA_MEM_ATTACH_HANDLE = (1U << UVERBS_ID_NS_SHIFT), ++ HNS_IB_ATTR_DCA_MEM_ATTACH_SQ_OFFSET, ++ HNS_IB_ATTR_DCA_MEM_ATTACH_SGE_OFFSET, ++ HNS_IB_ATTR_DCA_MEM_ATTACH_RQ_OFFSET, ++ HNS_IB_ATTR_DCA_MEM_ATTACH_OUT_ALLOC_FLAGS, ++ HNS_IB_ATTR_DCA_MEM_ATTACH_OUT_ALLOC_PAGES, ++}; ++ ++enum hns_ib_dca_mem_detach_attrs { ++ HNS_IB_ATTR_DCA_MEM_DETACH_HANDLE = (1U << UVERBS_ID_NS_SHIFT), ++ HNS_IB_ATTR_DCA_MEM_DETACH_SQ_INDEX, ++}; ++ ++enum hns_ib_dca_mem_query_attrs { ++ HNS_IB_ATTR_DCA_MEM_QUERY_HANDLE = (1U << UVERBS_ID_NS_SHIFT), ++ HNS_IB_ATTR_DCA_MEM_QUERY_PAGE_INDEX, ++ HNS_IB_ATTR_DCA_MEM_QUERY_OUT_KEY, ++ HNS_IB_ATTR_DCA_MEM_QUERY_OUT_OFFSET, ++ HNS_IB_ATTR_DCA_MEM_QUERY_OUT_PAGE_COUNT, ++}; ++ + #endif /* HNS_ABI_USER_H */ +-- +2.33.0 + diff --git a/0020-libhns-Introduce-DCA-for-RC-QP.patch b/0020-libhns-Introduce-DCA-for-RC-QP.patch new file mode 100644 index 0000000000000000000000000000000000000000..721d39146482b55993f3464f5643d28909d3a441 --- /dev/null +++ b/0020-libhns-Introduce-DCA-for-RC-QP.patch @@ -0,0 +1,346 @@ +From f0d70762b8c69e735a1d15f8379b649bcad3929c Mon Sep 17 00:00:00 2001 +From: Chengchang Tang +Date: Mon, 10 May 2021 17:13:09 +0800 +Subject: [PATCH 20/25] libhns: Introduce DCA for RC QP + +driver inclusion +category: feature +bugzilla: https://gitee.com/src-openeuler/rdma-core/issues/I9C2AQ + +------------------------------------------------------------------ + +The HIP09 introduces the DCA(Dynamic context attachment) feature which +supports many RC QPs to share the WQE buffer in a memory pool, this will +reduce the memory consumption when there are too many QPs inactive. + +Two functions are defined for adding buffers to memory pool and removing +buffers from memory pool by calling ib cmd implemented in hns kernelspace +driver. + +If a QP enables DCA feature, the WQE's buffer will be attached to the +memory pool when the users start to post WRs and be detached when all CQEs +has been polled. + +Signed-off-by: Chengchang Tang +Reviewed-by: Yangyang Li +--- + providers/hns/hns_roce_u.c | 61 +++++++++++++- + providers/hns/hns_roce_u.h | 21 ++++- + providers/hns/hns_roce_u_buf.c | 147 +++++++++++++++++++++++++++++++++ + 3 files changed, 226 insertions(+), 3 deletions(-) + +diff --git a/providers/hns/hns_roce_u.c b/providers/hns/hns_roce_u.c +index 810b650..2272431 100644 +--- a/providers/hns/hns_roce_u.c ++++ b/providers/hns/hns_roce_u.c +@@ -100,6 +100,53 @@ static uint32_t calc_table_shift(uint32_t entry_count, uint32_t size_shift) + return count_shift > size_shift ? count_shift - size_shift : 0; + } + ++static int hns_roce_mmap(struct hns_roce_device *hr_dev, ++ struct hns_roce_context *context, int cmd_fd) ++{ ++ int page_size = hr_dev->page_size; ++ ++ context->uar = mmap(NULL, page_size, PROT_READ | PROT_WRITE, ++ MAP_SHARED, cmd_fd, 0); ++ if (context->uar == MAP_FAILED) ++ return -ENOMEM; ++ ++ return 0; ++} ++ ++static int init_dca_context(struct hns_roce_context *ctx, int page_size) ++{ ++ struct hns_roce_dca_ctx *dca_ctx = &ctx->dca_ctx; ++ int ret; ++ ++ if (!(ctx->config & HNS_ROCE_UCTX_RSP_DCA_FLAGS)) ++ return 0; ++ ++ list_head_init(&dca_ctx->mem_list); ++ ret = pthread_spin_init(&dca_ctx->lock, PTHREAD_PROCESS_PRIVATE); ++ if (ret) ++ return ret; ++ ++ dca_ctx->unit_size = page_size * HNS_DCA_DEFAULT_UNIT_PAGES; ++ dca_ctx->max_size = HNS_DCA_MAX_MEM_SIZE; ++ dca_ctx->mem_cnt = 0; ++ ++ return 0; ++} ++ ++static void uninit_dca_context(struct hns_roce_context *ctx) ++{ ++ struct hns_roce_dca_ctx *dca_ctx = &ctx->dca_ctx; ++ ++ if (!(ctx->config & HNS_ROCE_UCTX_RSP_DCA_FLAGS)) ++ return; ++ ++ pthread_spin_lock(&dca_ctx->lock); ++ hns_roce_cleanup_dca_mem(ctx); ++ pthread_spin_unlock(&dca_ctx->lock); ++ ++ pthread_spin_destroy(&dca_ctx->lock); ++} ++ + static int init_reset_context(struct hns_roce_context *ctx, int cmd_fd, + struct hns_roce_alloc_ucontext_resp *resp, + int page_size) +@@ -185,7 +232,7 @@ static struct verbs_context *hns_roce_alloc_context(struct ibv_device *ibdev, + return NULL; + + cmd.config |= HNS_ROCE_EXSGE_FLAGS | HNS_ROCE_RQ_INLINE_FLAGS | +- HNS_ROCE_CQE_INLINE_FLAGS; ++ HNS_ROCE_CQE_INLINE_FLAGS | HNS_ROCE_UCTX_CONFIG_DCA; + if (ibv_cmd_get_context(&context->ibv_ctx, &cmd.ibv_cmd, sizeof(cmd), + &resp.ibv_resp, sizeof(resp))) + goto err_free; +@@ -198,9 +245,15 @@ static struct verbs_context *hns_roce_alloc_context(struct ibv_device *ibdev, + if (context->uar == MAP_FAILED) + goto err_free; + ++ if (init_dca_context(context, hr_dev->page_size)) ++ goto err_free; ++ + if (init_reset_context(context, cmd_fd, &resp, hr_dev->page_size)) + goto reset_free; + ++ if (hns_roce_mmap(hr_dev, context, cmd_fd)) ++ goto uar_free; ++ + pthread_mutex_init(&context->qp_table_mutex, NULL); + pthread_mutex_init(&context->srq_table_mutex, NULL); + pthread_spin_init(&context->uar_lock, PTHREAD_PROCESS_PRIVATE); +@@ -210,8 +263,11 @@ static struct verbs_context *hns_roce_alloc_context(struct ibv_device *ibdev, + + return &context->ibv_ctx; + ++uar_free: ++ if (context->reset_state) ++ munmap(context->reset_state, hr_dev->page_size); + reset_free: +- munmap(context->uar, hr_dev->page_size); ++ uninit_dca_context(context); + err_free: + verbs_uninit_context(&context->ibv_ctx); + free(context); +@@ -226,6 +282,7 @@ static void hns_roce_free_context(struct ibv_context *ibctx) + munmap(context->uar, hr_dev->page_size); + if (context->reset_state) + munmap(context->reset_state, hr_dev->page_size); ++ uninit_dca_context(context); + verbs_uninit_context(&context->ibv_ctx); + free(context); + } +diff --git a/providers/hns/hns_roce_u.h b/providers/hns/hns_roce_u.h +index 024932a..90b2205 100644 +--- a/providers/hns/hns_roce_u.h ++++ b/providers/hns/hns_roce_u.h +@@ -147,6 +147,10 @@ + + #define hr_reg_read(ptr, field) _hr_reg_read(ptr, field) + ++enum { ++ HNS_ROCE_CAP_FLAG_DCA_MODE = BIT(15), ++}; ++ + #define HNS_ROCE_QP_TABLE_BITS 8 + #define HNS_ROCE_QP_TABLE_SIZE BIT(HNS_ROCE_QP_TABLE_BITS) + +@@ -201,6 +205,18 @@ struct hns_roce_spinlock { + int need_lock; + }; + ++#define HNS_DCA_MAX_MEM_SIZE ~0UL ++#define HNS_DCA_DEFAULT_UNIT_PAGES 16 ++ ++struct hns_roce_dca_ctx { ++ struct list_head mem_list; ++ pthread_spinlock_t lock; ++ int mem_cnt; ++ unsigned int unit_size; ++ uint64_t max_size; ++ uint64_t curr_size; ++}; ++ + struct hns_roce_v2_reset_state { + uint32_t is_reset; + uint32_t hw_ready; +@@ -239,7 +255,7 @@ struct hns_roce_context { + unsigned int cqe_size; + uint32_t config; + unsigned int max_inline_data; +- ++ struct hns_roce_dca_ctx dca_ctx; + bool use_new_reset_flag; + bool reseted; + }; +@@ -586,6 +602,9 @@ void hns_roce_qp_spinlock_destroy(struct hns_roce_qp *qp); + + void hns_roce_free_qp_buf(struct hns_roce_qp *qp, struct hns_roce_context *ctx); + ++void hns_roce_cleanup_dca_mem(struct hns_roce_context *ctx); ++int hns_roce_add_dca_mem(struct hns_roce_context *ctx, uint32_t size); ++ + void hns_roce_init_qp_indices(struct hns_roce_qp *qp); + + bool is_hns_dev(struct ibv_device *device); +diff --git a/providers/hns/hns_roce_u_buf.c b/providers/hns/hns_roce_u_buf.c +index 471dd9c..02c43ae 100644 +--- a/providers/hns/hns_roce_u_buf.c ++++ b/providers/hns/hns_roce_u_buf.c +@@ -60,3 +60,150 @@ void hns_roce_free_buf(struct hns_roce_buf *buf) + + munmap(buf->buf, buf->length); + } ++ ++struct hns_roce_dca_mem { ++ uint32_t handle; ++ struct list_node entry; ++ struct hns_roce_buf buf; ++ struct hns_roce_context *ctx; ++}; ++ ++static void free_dca_mem(struct hns_roce_context *ctx, ++ struct hns_roce_dca_mem *mem) ++{ ++ hns_roce_free_buf(&mem->buf); ++ free(mem); ++} ++ ++static struct hns_roce_dca_mem *alloc_dca_mem(uint32_t size) ++{ ++ struct hns_roce_dca_mem *mem = NULL; ++ int ret; ++ ++ mem = malloc(sizeof(struct hns_roce_dca_mem)); ++ if (!mem) { ++ errno = ENOMEM; ++ return NULL; ++ } ++ ++ ret = hns_roce_alloc_buf(&mem->buf, size, HNS_HW_PAGE_SIZE); ++ if (ret) { ++ errno = ENOMEM; ++ free(mem); ++ return NULL; ++ } ++ ++ return mem; ++} ++ ++static inline uint64_t dca_mem_to_key(struct hns_roce_dca_mem *dca_mem) ++{ ++ return (uintptr_t)dca_mem; ++} ++ ++static inline void *dca_mem_addr(struct hns_roce_dca_mem *dca_mem, int offset) ++{ ++ return dca_mem->buf.buf + offset; ++} ++ ++static int register_dca_mem(struct hns_roce_context *ctx, uint64_t key, ++ void *addr, uint32_t size, uint32_t *handle) ++{ ++ struct ib_uverbs_attr *attr; ++ int ret; ++ ++ DECLARE_COMMAND_BUFFER(cmd, HNS_IB_OBJECT_DCA_MEM, ++ HNS_IB_METHOD_DCA_MEM_REG, 4); ++ fill_attr_in_uint32(cmd, HNS_IB_ATTR_DCA_MEM_REG_LEN, size); ++ fill_attr_in_uint64(cmd, HNS_IB_ATTR_DCA_MEM_REG_ADDR, ++ ioctl_ptr_to_u64(addr)); ++ fill_attr_in_uint64(cmd, HNS_IB_ATTR_DCA_MEM_REG_KEY, key); ++ attr = fill_attr_out_obj(cmd, HNS_IB_ATTR_DCA_MEM_REG_HANDLE); ++ ++ ret = execute_ioctl(&ctx->ibv_ctx.context, cmd); ++ if (ret) { ++ verbs_err(&ctx->ibv_ctx, "failed to reg DCA mem, ret = %d.\n", ++ ret); ++ return ret; ++ } ++ ++ *handle = read_attr_obj(HNS_IB_ATTR_DCA_MEM_REG_HANDLE, attr); ++ ++ return 0; ++} ++ ++static void deregister_dca_mem(struct hns_roce_context *ctx, uint32_t handle) ++{ ++ int ret; ++ ++ DECLARE_COMMAND_BUFFER(cmd, HNS_IB_OBJECT_DCA_MEM, ++ HNS_IB_METHOD_DCA_MEM_DEREG, 1); ++ fill_attr_in_obj(cmd, HNS_IB_ATTR_DCA_MEM_DEREG_HANDLE, handle); ++ ret = execute_ioctl(&ctx->ibv_ctx.context, cmd); ++ if (ret) ++ verbs_warn(&ctx->ibv_ctx, ++ "failed to dereg DCA mem-%u, ret = %d.\n", ++ handle, ret); ++} ++ ++void hns_roce_cleanup_dca_mem(struct hns_roce_context *ctx) ++{ ++ struct hns_roce_dca_ctx *dca_ctx = &ctx->dca_ctx; ++ struct hns_roce_dca_mem *mem; ++ struct hns_roce_dca_mem *tmp; ++ ++ list_for_each_safe(&dca_ctx->mem_list, mem, tmp, entry) ++ deregister_dca_mem(ctx, mem->handle); ++} ++ ++static bool add_dca_mem_enabled(struct hns_roce_dca_ctx *ctx, ++ uint32_t alloc_size) ++{ ++ bool enable; ++ ++ pthread_spin_lock(&ctx->lock); ++ ++ if (ctx->unit_size == 0) /* Pool size can't be increased */ ++ enable = false; ++ else if (ctx->max_size == HNS_DCA_MAX_MEM_SIZE) /* Pool size no limit */ ++ enable = true; ++ else /* Pool size doesn't exceed max size */ ++ enable = (ctx->curr_size + alloc_size) < ctx->max_size; ++ ++ pthread_spin_unlock(&ctx->lock); ++ ++ return enable; ++} ++ ++int hns_roce_add_dca_mem(struct hns_roce_context *ctx, uint32_t size) ++{ ++ struct hns_roce_dca_ctx *dca_ctx = &ctx->dca_ctx; ++ struct hns_roce_dca_mem *mem; ++ int ret; ++ ++ if (!add_dca_mem_enabled(&ctx->dca_ctx, size)) ++ return -ENOMEM; ++ ++ /* Step 1: Alloc DCA mem address */ ++ mem = alloc_dca_mem( ++ DIV_ROUND_UP(size, dca_ctx->unit_size) * dca_ctx->unit_size); ++ if (!mem) ++ return -ENOMEM; ++ ++ /* Step 2: Register DCA mem uobject to pin user address */ ++ ret = register_dca_mem(ctx, dca_mem_to_key(mem), dca_mem_addr(mem, 0), ++ mem->buf.length, &mem->handle); ++ if (ret) { ++ free_dca_mem(ctx, mem); ++ return ret; ++ } ++ ++ /* Step 3: Add DCA mem node to pool */ ++ pthread_spin_lock(&dca_ctx->lock); ++ list_add_tail(&dca_ctx->mem_list, &mem->entry); ++ dca_ctx->mem_cnt++; ++ dca_ctx->curr_size += mem->buf.length; ++ pthread_spin_unlock(&dca_ctx->lock); ++ ++ return 0; ++} +-- +2.33.0 + diff --git a/0021-libhns-Add-support-for-shrinking-DCA-memory-pool.patch b/0021-libhns-Add-support-for-shrinking-DCA-memory-pool.patch new file mode 100644 index 0000000000000000000000000000000000000000..aa6797e53e0e5f5d32cc8f3d90241bc5f2cf5ddd --- /dev/null +++ b/0021-libhns-Add-support-for-shrinking-DCA-memory-pool.patch @@ -0,0 +1,204 @@ +From c104e33f0c4466f0c4b163984736eac18e9c8357 Mon Sep 17 00:00:00 2001 +From: Chengchang Tang +Date: Mon, 10 May 2021 17:13:13 +0800 +Subject: [PATCH 21/25] libhns: Add support for shrinking DCA memory pool + +driver inclusion +category: feature +bugzilla: https://gitee.com/src-openeuler/rdma-core/issues/I9C2AQ + +------------------------------------------------------------------ + +The QP's WQE buffer may be detached after QP is modified or CQE is polled, +and the state of DCA mem object may be changed as clean for no QP is using +it. So shrink the clean DCA mem from the memory pool and destroy the DCA +mem's buffer to reduce the memory consumption. + +Signed-off-by: Chengchang Tang +Reviewed-by: Yangyang Li +--- + providers/hns/hns_roce_u.h | 2 + + providers/hns/hns_roce_u_buf.c | 103 +++++++++++++++++++++++++++++++ + providers/hns/hns_roce_u_hw_v2.c | 7 +++ + 3 files changed, 112 insertions(+) + +diff --git a/providers/hns/hns_roce_u.h b/providers/hns/hns_roce_u.h +index 90b2205..e3fa24d 100644 +--- a/providers/hns/hns_roce_u.h ++++ b/providers/hns/hns_roce_u.h +@@ -214,6 +214,7 @@ struct hns_roce_dca_ctx { + int mem_cnt; + unsigned int unit_size; + uint64_t max_size; ++ uint64_t min_size; + uint64_t curr_size; + }; + +@@ -602,6 +603,7 @@ void hns_roce_qp_spinlock_destroy(struct hns_roce_qp *qp); + + void hns_roce_free_qp_buf(struct hns_roce_qp *qp, struct hns_roce_context *ctx); + ++void hns_roce_shrink_dca_mem(struct hns_roce_context *ctx); + void hns_roce_cleanup_dca_mem(struct hns_roce_context *ctx); + int hns_roce_add_dca_mem(struct hns_roce_context *ctx, uint32_t size); + +diff --git a/providers/hns/hns_roce_u_buf.c b/providers/hns/hns_roce_u_buf.c +index 02c43ae..c0f86e9 100644 +--- a/providers/hns/hns_roce_u_buf.c ++++ b/providers/hns/hns_roce_u_buf.c +@@ -101,6 +101,20 @@ static inline uint64_t dca_mem_to_key(struct hns_roce_dca_mem *dca_mem) + return (uintptr_t)dca_mem; + } + ++static struct hns_roce_dca_mem *key_to_dca_mem(struct hns_roce_dca_ctx *ctx, ++ uint64_t key) ++{ ++ struct hns_roce_dca_mem *mem; ++ struct hns_roce_dca_mem *tmp; ++ ++ list_for_each_safe(&ctx->mem_list, mem, tmp, entry) { ++ if (dca_mem_to_key(mem) == key) ++ return mem; ++ } ++ ++ return NULL; ++} ++ + static inline void *dca_mem_addr(struct hns_roce_dca_mem *dca_mem, int offset) + { + return dca_mem->buf.buf + offset; +@@ -156,6 +170,32 @@ void hns_roce_cleanup_dca_mem(struct hns_roce_context *ctx) + deregister_dca_mem(ctx, mem->handle); + } + ++struct hns_dca_mem_shrink_resp { ++ uint32_t free_mems; ++ uint64_t free_key; ++}; ++ ++static int shrink_dca_mem(struct hns_roce_context *ctx, uint32_t handle, ++ uint64_t size, struct hns_dca_mem_shrink_resp *resp) ++{ ++ int ret; ++ ++ DECLARE_COMMAND_BUFFER(cmd, HNS_IB_OBJECT_DCA_MEM, ++ HNS_IB_METHOD_DCA_MEM_SHRINK, 4); ++ fill_attr_in_obj(cmd, HNS_IB_ATTR_DCA_MEM_SHRINK_HANDLE, handle); ++ fill_attr_in_uint64(cmd, HNS_IB_ATTR_DCA_MEM_SHRINK_RESERVED_SIZE, size); ++ fill_attr_out(cmd, HNS_IB_ATTR_DCA_MEM_SHRINK_OUT_FREE_KEY, ++ &resp->free_key, sizeof(resp->free_key)); ++ fill_attr_out(cmd, HNS_IB_ATTR_DCA_MEM_SHRINK_OUT_FREE_MEMS, ++ &resp->free_mems, sizeof(resp->free_mems)); ++ ++ ret = execute_ioctl(&ctx->ibv_ctx.context, cmd); ++ if (ret) ++ verbs_err(&ctx->ibv_ctx, "failed to shrink DCA mem, ret = %d.\n", ++ ret); ++ ++ return ret; ++} + static bool add_dca_mem_enabled(struct hns_roce_dca_ctx *ctx, + uint32_t alloc_size) + { +@@ -175,6 +215,17 @@ static bool add_dca_mem_enabled(struct hns_roce_dca_ctx *ctx, + return enable; + } + ++static bool shrink_dca_mem_enabled(struct hns_roce_dca_ctx *ctx) ++{ ++ bool enable; ++ ++ pthread_spin_lock(&ctx->lock); ++ enable = ctx->mem_cnt > 0 && ctx->min_size < ctx->max_size; ++ pthread_spin_unlock(&ctx->lock); ++ ++ return enable; ++} ++ + int hns_roce_add_dca_mem(struct hns_roce_context *ctx, uint32_t size) + { + struct hns_roce_dca_ctx *dca_ctx = &ctx->dca_ctx; +@@ -207,3 +258,55 @@ int hns_roce_add_dca_mem(struct hns_roce_context *ctx, uint32_t size) + + return 0; + } ++ ++void hns_roce_shrink_dca_mem(struct hns_roce_context *ctx) ++{ ++ struct hns_roce_dca_ctx *dca_ctx = &ctx->dca_ctx; ++ struct hns_dca_mem_shrink_resp resp = {}; ++ struct hns_roce_dca_mem *mem; ++ int dca_mem_cnt; ++ uint32_t handle; ++ int ret; ++ ++ pthread_spin_lock(&dca_ctx->lock); ++ dca_mem_cnt = ctx->dca_ctx.mem_cnt; ++ pthread_spin_unlock(&dca_ctx->lock); ++ while (dca_mem_cnt > 0 && shrink_dca_mem_enabled(dca_ctx)) { ++ resp.free_mems = 0; ++ /* Step 1: Use any DCA mem uobject to shrink pool */ ++ pthread_spin_lock(&dca_ctx->lock); ++ mem = list_tail(&dca_ctx->mem_list, ++ struct hns_roce_dca_mem, entry); ++ handle = mem ? mem->handle : 0; ++ pthread_spin_unlock(&dca_ctx->lock); ++ if (!mem) ++ break; ++ ++ ret = shrink_dca_mem(ctx, handle, dca_ctx->min_size, &resp); ++ if (ret || likely(resp.free_mems < 1)) ++ break; ++ ++ /* Step 2: Remove shrunk DCA mem node from pool */ ++ pthread_spin_lock(&dca_ctx->lock); ++ mem = key_to_dca_mem(dca_ctx, resp.free_key); ++ if (mem) { ++ list_del(&mem->entry); ++ dca_ctx->mem_cnt--; ++ dca_ctx->curr_size -= mem->buf.length; ++ } ++ ++ handle = mem ? mem->handle : 0; ++ pthread_spin_unlock(&dca_ctx->lock); ++ if (!mem) ++ break; ++ ++ /* Step 3: Destroy DCA mem uobject */ ++ deregister_dca_mem(ctx, handle); ++ free_dca_mem(ctx, mem); ++ /* No any free memory after deregister 1 DCA mem */ ++ if (resp.free_mems <= 1) ++ break; ++ ++ dca_mem_cnt--; ++ } ++} +diff --git a/providers/hns/hns_roce_u_hw_v2.c b/providers/hns/hns_roce_u_hw_v2.c +index 9016978..0a100b8 100644 +--- a/providers/hns/hns_roce_u_hw_v2.c ++++ b/providers/hns/hns_roce_u_hw_v2.c +@@ -932,6 +932,10 @@ static int hns_roce_u_v2_poll_cq(struct ibv_cq *ibvcq, int ne, + + hns_roce_spin_unlock(&cq->hr_lock); + ++ /* Try to shrink the DCA mem */ ++ if (ctx->dca_ctx.mem_cnt > 0) ++ hns_roce_shrink_dca_mem(ctx); ++ + return err == V2_CQ_POLL_ERR ? err : npolled; + } + +@@ -1883,6 +1887,9 @@ static int hns_roce_u_v2_destroy_qp(struct ibv_qp *ibqp) + + free(qp); + ++ if (ctx->dca_ctx.mem_cnt > 0) ++ hns_roce_shrink_dca_mem(ctx); ++ + return ret; + } + +-- +2.33.0 + diff --git a/0022-libhns-Add-support-for-attaching-QP-s-WQE-buffer.patch b/0022-libhns-Add-support-for-attaching-QP-s-WQE-buffer.patch new file mode 100644 index 0000000000000000000000000000000000000000..123877030d090d526239738e24c3afcce01072fe --- /dev/null +++ b/0022-libhns-Add-support-for-attaching-QP-s-WQE-buffer.patch @@ -0,0 +1,575 @@ +From a1a5d42a2c48660c040695bd8316538a9ce83ab2 Mon Sep 17 00:00:00 2001 +From: Chengchang Tang +Date: Mon, 10 May 2021 17:13:17 +0800 +Subject: [PATCH 22/25] libhns: Add support for attaching QP's WQE buffer + +driver inclusion +category: feature +bugzilla: https://gitee.com/src-openeuler/rdma-core/issues/I9C2AQ + +------------------------------------------------------------------ + +If a uQP works in DCA mode, the WQE's buffer will be split as many blocks +and be stored into a list. The blocks are allocated from the DCA's memory +pool before posting WRs and are dropped when the QP's CI is equal to PI +after polling CQ. + +Signed-off-by: Chengchang Tang +Reviewed-by: Yangyang Li +--- + providers/hns/hns_roce_u.h | 26 ++++- + providers/hns/hns_roce_u_buf.c | 173 ++++++++++++++++++++++++++++++- + providers/hns/hns_roce_u_hw_v2.c | 125 +++++++++++++++++++++- + providers/hns/hns_roce_u_hw_v2.h | 2 + + providers/hns/hns_roce_u_verbs.c | 32 ++++-- + 5 files changed, 345 insertions(+), 13 deletions(-) + +diff --git a/providers/hns/hns_roce_u.h b/providers/hns/hns_roce_u.h +index e3fa24d..ba646d3 100644 +--- a/providers/hns/hns_roce_u.h ++++ b/providers/hns/hns_roce_u.h +@@ -365,11 +365,18 @@ struct hns_roce_sge_ex { + unsigned int sge_shift; + }; + ++struct hns_roce_dca_buf { ++ void **bufs; ++ unsigned int max_cnt; ++ unsigned int shift; ++}; ++ + struct hns_roce_qp { + struct verbs_qp verbs_qp; + struct hns_roce_buf buf; ++ struct hns_roce_dca_buf dca_wqe; + int max_inline_data; +- int buf_size; ++ unsigned int buf_size; + unsigned int sq_signal_bits; + struct hns_roce_wq sq; + struct hns_roce_wq rq; +@@ -423,11 +430,22 @@ struct hns_roce_u_hw { + struct verbs_context_ops hw_ops; + }; + ++struct hns_roce_dca_attach_attr { ++ uint32_t sq_offset; ++ uint32_t sge_offset; ++ uint32_t rq_offset; ++}; ++ ++struct hns_roce_dca_detach_attr { ++ uint32_t sq_index; ++}; ++ + /* + * The entries's buffer should be aligned to a multiple of the hardware's + * minimum page size. + */ + #define hr_hw_page_align(x) align(x, HNS_HW_PAGE_SIZE) ++#define hr_hw_page_count(x) (hr_hw_page_align(x) / HNS_HW_PAGE_SIZE) + + static inline unsigned int to_hr_hem_entries_size(int count, int buf_shift) + { +@@ -603,9 +621,13 @@ void hns_roce_qp_spinlock_destroy(struct hns_roce_qp *qp); + + void hns_roce_free_qp_buf(struct hns_roce_qp *qp, struct hns_roce_context *ctx); + ++int hns_roce_attach_dca_mem(struct hns_roce_context *ctx, uint32_t handle, ++ struct hns_roce_dca_attach_attr *attr, ++ uint32_t size, struct hns_roce_dca_buf *buf); ++void hns_roce_detach_dca_mem(struct hns_roce_context *ctx, uint32_t handle, ++ struct hns_roce_dca_detach_attr *attr); + void hns_roce_shrink_dca_mem(struct hns_roce_context *ctx); + void hns_roce_cleanup_dca_mem(struct hns_roce_context *ctx); +-int hns_roce_add_dca_mem(struct hns_roce_context *ctx, uint32_t size); + + void hns_roce_init_qp_indices(struct hns_roce_qp *qp); + +diff --git a/providers/hns/hns_roce_u_buf.c b/providers/hns/hns_roce_u_buf.c +index c0f86e9..3d41b89 100644 +--- a/providers/hns/hns_roce_u_buf.c ++++ b/providers/hns/hns_roce_u_buf.c +@@ -196,6 +196,88 @@ static int shrink_dca_mem(struct hns_roce_context *ctx, uint32_t handle, + + return ret; + } ++ ++struct hns_dca_mem_query_resp { ++ uint64_t key; ++ uint32_t offset; ++ uint32_t page_count; ++}; ++ ++static int query_dca_mem(struct hns_roce_context *ctx, uint32_t handle, ++ uint32_t index, struct hns_dca_mem_query_resp *resp) ++{ ++ int ret; ++ ++ DECLARE_COMMAND_BUFFER(cmd, HNS_IB_OBJECT_DCA_MEM, ++ HNS_IB_METHOD_DCA_MEM_QUERY, 5); ++ fill_attr_in_obj(cmd, HNS_IB_ATTR_DCA_MEM_QUERY_HANDLE, handle); ++ fill_attr_in_uint32(cmd, HNS_IB_ATTR_DCA_MEM_QUERY_PAGE_INDEX, index); ++ fill_attr_out(cmd, HNS_IB_ATTR_DCA_MEM_QUERY_OUT_KEY, ++ &resp->key, sizeof(resp->key)); ++ fill_attr_out(cmd, HNS_IB_ATTR_DCA_MEM_QUERY_OUT_OFFSET, ++ &resp->offset, sizeof(resp->offset)); ++ fill_attr_out(cmd, HNS_IB_ATTR_DCA_MEM_QUERY_OUT_PAGE_COUNT, ++ &resp->page_count, sizeof(resp->page_count)); ++ ret = execute_ioctl(&ctx->ibv_ctx.context, cmd); ++ if (ret) ++ verbs_err(&ctx->ibv_ctx, ++ "failed to query DCA mem-%u, ret = %d.\n", ++ handle, ret); ++ ++ return ret; ++} ++ ++void hns_roce_detach_dca_mem(struct hns_roce_context *ctx, uint32_t handle, ++ struct hns_roce_dca_detach_attr *attr) ++{ ++ int ret; ++ ++ DECLARE_COMMAND_BUFFER(cmd, HNS_IB_OBJECT_DCA_MEM, ++ HNS_IB_METHOD_DCA_MEM_DETACH, 4); ++ fill_attr_in_obj(cmd, HNS_IB_ATTR_DCA_MEM_DETACH_HANDLE, handle); ++ fill_attr_in_uint32(cmd, HNS_IB_ATTR_DCA_MEM_DETACH_SQ_INDEX, ++ attr->sq_index); ++ ret = execute_ioctl(&ctx->ibv_ctx.context, cmd); ++ if (ret) ++ verbs_warn(&ctx->ibv_ctx, ++ "failed to detach DCA mem-%u, ret = %d.\n", ++ handle, ret); ++} ++ ++struct hns_dca_mem_attach_resp { ++#define HNS_DCA_ATTACH_OUT_FLAGS_NEW_BUFFER BIT(0) ++ uint32_t alloc_flags; ++ uint32_t alloc_pages; ++}; ++ ++static int attach_dca_mem(struct hns_roce_context *ctx, uint32_t handle, ++ struct hns_roce_dca_attach_attr *attr, ++ struct hns_dca_mem_attach_resp *resp) ++{ ++ int ret; ++ ++ DECLARE_COMMAND_BUFFER(cmd, HNS_IB_OBJECT_DCA_MEM, ++ HNS_IB_METHOD_DCA_MEM_ATTACH, 6); ++ fill_attr_in_obj(cmd, HNS_IB_ATTR_DCA_MEM_ATTACH_HANDLE, handle); ++ fill_attr_in_uint32(cmd, HNS_IB_ATTR_DCA_MEM_ATTACH_SQ_OFFSET, ++ attr->sq_offset); ++ fill_attr_in_uint32(cmd, HNS_IB_ATTR_DCA_MEM_ATTACH_SGE_OFFSET, ++ attr->sge_offset); ++ fill_attr_in_uint32(cmd, HNS_IB_ATTR_DCA_MEM_ATTACH_RQ_OFFSET, ++ attr->rq_offset); ++ fill_attr_out(cmd, HNS_IB_ATTR_DCA_MEM_ATTACH_OUT_ALLOC_FLAGS, ++ &resp->alloc_flags, sizeof(resp->alloc_flags)); ++ fill_attr_out(cmd, HNS_IB_ATTR_DCA_MEM_ATTACH_OUT_ALLOC_PAGES, ++ &resp->alloc_pages, sizeof(resp->alloc_pages)); ++ ret = execute_ioctl(&ctx->ibv_ctx.context, cmd); ++ if (ret) ++ verbs_err(&ctx->ibv_ctx, ++ "failed to attach DCA mem-%u, ret = %d.\n", ++ handle, ret); ++ ++ return ret; ++} ++ + static bool add_dca_mem_enabled(struct hns_roce_dca_ctx *ctx, + uint32_t alloc_size) + { +@@ -226,7 +308,7 @@ static bool shrink_dca_mem_enabled(struct hns_roce_dca_ctx *ctx) + return enable; + } + +-int hns_roce_add_dca_mem(struct hns_roce_context *ctx, uint32_t size) ++static int add_dca_mem(struct hns_roce_context *ctx, uint32_t size) + { + struct hns_roce_dca_ctx *dca_ctx = &ctx->dca_ctx; + struct hns_roce_dca_mem *mem; +@@ -310,3 +392,92 @@ void hns_roce_shrink_dca_mem(struct hns_roce_context *ctx) + dca_mem_cnt--; + } + } ++ ++static void config_dca_pages(void *addr, struct hns_roce_dca_buf *buf, ++ uint32_t page_index, int page_count) ++{ ++ void **pages = &buf->bufs[page_index]; ++ int page_size = 1 << buf->shift; ++ int i; ++ ++ for (i = 0; i < page_count; i++) { ++ pages[i] = addr; ++ addr += page_size; ++ } ++} ++ ++static int setup_dca_buf(struct hns_roce_context *ctx, uint32_t handle, ++ struct hns_roce_dca_buf *buf, uint32_t page_count) ++{ ++ struct hns_roce_dca_ctx *dca_ctx = &ctx->dca_ctx; ++ struct hns_dca_mem_query_resp resp = {}; ++ struct hns_roce_dca_mem *mem; ++ uint32_t idx = 0; ++ int ret; ++ ++ while (idx < page_count && idx < buf->max_cnt) { ++ resp.page_count = 0; ++ ret = query_dca_mem(ctx, handle, idx, &resp); ++ if (ret) ++ return -ENOMEM; ++ if (resp.page_count < 1) ++ break; ++ ++ pthread_spin_lock(&dca_ctx->lock); ++ mem = key_to_dca_mem(dca_ctx, resp.key); ++ if (mem && resp.offset < mem->buf.length) { ++ config_dca_pages(dca_mem_addr(mem, resp.offset), ++ buf, idx, resp.page_count); ++ } else { ++ pthread_spin_unlock(&dca_ctx->lock); ++ break; ++ } ++ pthread_spin_unlock(&dca_ctx->lock); ++ ++ idx += resp.page_count; ++ } ++ ++ return (idx >= page_count) ? 0 : -ENOMEM; ++} ++ ++#define DCA_EXPAND_MEM_TRY_TIMES 3 ++int hns_roce_attach_dca_mem(struct hns_roce_context *ctx, uint32_t handle, ++ struct hns_roce_dca_attach_attr *attr, ++ uint32_t size, struct hns_roce_dca_buf *buf) ++{ ++ uint32_t buf_pages = size >> buf->shift; ++ struct hns_dca_mem_attach_resp resp = {}; ++ bool is_new_buf = true; ++ int try_times = 0; ++ int ret = 0; ++ ++ do { ++ resp.alloc_pages = 0; ++ ret = attach_dca_mem(ctx, handle, attr, &resp); ++ if (ret) ++ break; ++ ++ if (resp.alloc_pages >= buf_pages) { ++ is_new_buf = !!(resp.alloc_flags & ++ HNS_DCA_ATTACH_OUT_FLAGS_NEW_BUFFER); ++ break; ++ } ++ ++ ret = add_dca_mem(ctx, size); ++ if (ret) ++ break; ++ } while (try_times++ < DCA_EXPAND_MEM_TRY_TIMES); ++ ++ if (ret || resp.alloc_pages < buf_pages) { ++ verbs_err(&ctx->ibv_ctx, ++ "failed to attach, size %u count %u != %u, ret = %d.\n", ++ size, buf_pages, resp.alloc_pages, ret); ++ return -ENOMEM; ++ } ++ ++ /* No need config user address if DCA config not changed */ ++ if (!is_new_buf && buf->bufs[0]) ++ return 0; ++ ++ return setup_dca_buf(ctx, handle, buf, buf_pages); ++} +diff --git a/providers/hns/hns_roce_u_hw_v2.c b/providers/hns/hns_roce_u_hw_v2.c +index 0a100b8..7a93456 100644 +--- a/providers/hns/hns_roce_u_hw_v2.c ++++ b/providers/hns/hns_roce_u_hw_v2.c +@@ -199,19 +199,35 @@ static struct hns_roce_v2_cqe *next_cqe_sw_v2(struct hns_roce_cq *cq) + return get_sw_cqe_v2(cq, cq->cons_index); + } + ++static inline bool check_qp_dca_enable(struct hns_roce_qp *qp) ++{ ++ return !!qp->dca_wqe.bufs; ++} ++ ++static inline void *get_wqe(struct hns_roce_qp *qp, unsigned int offset) ++{ ++ if (likely(qp->buf.buf)) ++ return qp->buf.buf + offset; ++ else if (unlikely(check_qp_dca_enable(qp))) ++ return qp->dca_wqe.bufs[offset >> qp->dca_wqe.shift] + ++ (offset & ((1 << qp->dca_wqe.shift) - 1)); ++ else ++ return NULL; ++} ++ + static void *get_recv_wqe_v2(struct hns_roce_qp *qp, unsigned int n) + { +- return qp->buf.buf + qp->rq.offset + (n << qp->rq.wqe_shift); ++ return get_wqe(qp, qp->rq.offset + (n << qp->rq.wqe_shift)); + } + + static void *get_send_wqe(struct hns_roce_qp *qp, unsigned int n) + { +- return qp->buf.buf + qp->sq.offset + (n << qp->sq.wqe_shift); ++ return get_wqe(qp, qp->sq.offset + (n << qp->sq.wqe_shift)); + } + + static void *get_send_sge_ex(struct hns_roce_qp *qp, unsigned int n) + { +- return qp->buf.buf + qp->ex_sge.offset + (n << qp->ex_sge.sge_shift); ++ return get_wqe(qp, qp->ex_sge.offset + (n << qp->ex_sge.sge_shift)); + } + + static void *get_srq_wqe(struct hns_roce_srq *srq, unsigned int n) +@@ -580,6 +596,73 @@ static void parse_cqe_for_req(struct hns_roce_v2_cqe *cqe, struct ibv_wc *wc, + wc->opcode = wc_send_op_map[opcode]; + } + ++static bool check_dca_attach_enable(struct hns_roce_qp *qp) ++{ ++ return check_qp_dca_enable(qp) && ++ (qp->flags & HNS_ROCE_QP_CAP_DYNAMIC_CTX_ATTACH); ++} ++ ++static bool check_dca_detach_enable(struct hns_roce_qp *qp) ++{ ++ return check_qp_dca_enable(qp) && ++ (qp->flags & HNS_ROCE_QP_CAP_DYNAMIC_CTX_DETACH); ++} ++ ++static int dca_attach_qp_buf(struct hns_roce_context *ctx, ++ struct hns_roce_qp *qp) ++{ ++ struct hns_roce_dca_attach_attr attr = {}; ++ uint32_t idx; ++ int ret; ++ ++ hns_roce_spin_lock(&qp->sq.hr_lock); ++ hns_roce_spin_lock(&qp->rq.hr_lock); ++ ++ if (qp->sq.wqe_cnt > 0) { ++ idx = qp->sq.head & (qp->sq.wqe_cnt - 1); ++ attr.sq_offset = idx << qp->sq.wqe_shift; ++ } ++ ++ if (qp->ex_sge.sge_cnt > 0) { ++ idx = qp->next_sge & (qp->ex_sge.sge_cnt - 1); ++ attr.sge_offset = idx << qp->ex_sge.sge_shift; ++ } ++ ++ if (qp->rq.wqe_cnt > 0) { ++ idx = qp->rq.head & (qp->rq.wqe_cnt - 1); ++ attr.rq_offset = idx << qp->rq.wqe_shift; ++ } ++ ++ ++ ret = hns_roce_attach_dca_mem(ctx, qp->verbs_qp.qp.handle, &attr, ++ qp->buf_size, &qp->dca_wqe); ++ ++ hns_roce_spin_unlock(&qp->rq.hr_lock); ++ hns_roce_spin_unlock(&qp->sq.hr_lock); ++ ++ return ret; ++} ++ ++static void dca_detach_qp_buf(struct hns_roce_context *ctx, ++ struct hns_roce_qp *qp) ++{ ++ struct hns_roce_dca_detach_attr attr; ++ bool is_empty; ++ ++ hns_roce_spin_lock(&qp->sq.hr_lock); ++ hns_roce_spin_lock(&qp->rq.hr_lock); ++ ++ is_empty = qp->sq.head == qp->sq.tail && qp->rq.head == qp->rq.tail; ++ if (is_empty && qp->sq.wqe_cnt > 0) ++ attr.sq_index = qp->sq.head & (qp->sq.wqe_cnt - 1); ++ ++ hns_roce_spin_unlock(&qp->rq.hr_lock); ++ hns_roce_spin_unlock(&qp->sq.hr_lock); ++ ++ if (is_empty && qp->sq.wqe_cnt > 0) ++ hns_roce_detach_dca_mem(ctx, qp->verbs_qp.qp.handle, &attr); ++} ++ + static void cqe_proc_sq(struct hns_roce_qp *hr_qp, uint32_t wqe_idx, + struct hns_roce_cq *cq) + { +@@ -919,6 +1002,9 @@ static int hns_roce_u_v2_poll_cq(struct ibv_cq *ibvcq, int ne, + + for (npolled = 0; npolled < ne; ++npolled) { + err = hns_roce_poll_one(ctx, &qp, cq, wc + npolled); ++ if (qp && check_dca_detach_enable(qp)) ++ dca_detach_qp_buf(ctx, qp); ++ + if (err != V2_CQ_OK) + break; + } +@@ -970,7 +1056,7 @@ static int check_qp_send(struct hns_roce_qp *qp, struct hns_roce_context *ctx) + + if (unlikely(ibvqp->state == IBV_QPS_RESET || + ibvqp->state == IBV_QPS_INIT || +- ibvqp->state == IBV_QPS_RTR)){ ++ ibvqp->state == IBV_QPS_RTR)) { + verbs_err(verbs_get_ctx(qp->verbs_qp.qp.context), + "unsupported qp state, state = %d.\n", ibvqp->state); + return EINVAL; +@@ -980,6 +1066,14 @@ static int check_qp_send(struct hns_roce_qp *qp, struct hns_roce_context *ctx) + return EIO; + } + ++ if (check_dca_attach_enable(qp)) { ++ ret = dca_attach_qp_buf(ctx, qp); ++ if (ret) ++ verbs_err_datapath(&ctx->ibv_ctx, ++ "failed to attach QP-%u send, ret = %d.\n", ++ qp->verbs_qp.qp.qp_num, ret); ++ } ++ + return ret; + } + +@@ -1347,6 +1441,13 @@ static int set_rc_inl(struct hns_roce_qp *qp, const struct ibv_send_wr *wr, + return 0; + } + ++static inline void fill_rc_dca_fields(uint32_t qp_num, ++ struct hns_roce_rc_sq_wqe *wqe) ++{ ++ hr_reg_write(wqe, RCWQE_SQPN_L, qp_num); ++ hr_reg_write(wqe, RCWQE_SQPN_H, qp_num >> RCWQE_SQPN_L_WIDTH); ++} ++ + static void set_bind_mw_seg(struct hns_roce_rc_sq_wqe *wqe, + const struct ibv_send_wr *wr) + { +@@ -1454,6 +1555,9 @@ static int set_rc_wqe(void *wqe, struct hns_roce_qp *qp, struct ibv_send_wr *wr, + return ret; + + wqe_valid: ++ if (check_qp_dca_enable(qp)) ++ fill_rc_dca_fields(qp->verbs_qp.qp.qp_num, rc_sq_wqe); ++ + enable_wqe(qp, rc_sq_wqe, qp->sq.head + nreq); + + return 0; +@@ -1563,6 +1667,14 @@ static int check_qp_recv(struct hns_roce_qp *qp, struct hns_roce_context *ctx) + return EIO; + } + ++ if (check_dca_attach_enable(qp)) { ++ ret = dca_attach_qp_buf(ctx, qp); ++ if (ret) ++ verbs_err_datapath(&ctx->ibv_ctx, ++ "failed to attach QP-%u recv, ret = %d.\n", ++ qp->verbs_qp.qp.qp_num, ret); ++ } ++ + return ret; + } + +@@ -1758,6 +1870,7 @@ static void record_qp_attr(struct ibv_qp *qp, struct ibv_qp_attr *attr, + static int hns_roce_u_v2_modify_qp(struct ibv_qp *qp, struct ibv_qp_attr *attr, + int attr_mask) + { ++ struct hns_roce_context *ctx = to_hr_ctx(qp->context); + struct hns_roce_modify_qp_ex_resp resp_ex = {}; + struct hns_roce_modify_qp_ex cmd_ex = {}; + struct hns_roce_qp *hr_qp = to_hr_qp(qp); +@@ -1804,6 +1917,10 @@ static int hns_roce_u_v2_modify_qp(struct ibv_qp *qp, struct ibv_qp_attr *attr, + hns_roce_init_qp_indices(to_hr_qp(qp)); + } + ++ /* Try to shrink the DCA mem */ ++ if (ctx->dca_ctx.mem_cnt > 0) ++ hns_roce_shrink_dca_mem(ctx); ++ + record_qp_attr(qp, attr, attr_mask); + + return ret; +diff --git a/providers/hns/hns_roce_u_hw_v2.h b/providers/hns/hns_roce_u_hw_v2.h +index 1a7b828..50a920f 100644 +--- a/providers/hns/hns_roce_u_hw_v2.h ++++ b/providers/hns/hns_roce_u_hw_v2.h +@@ -237,6 +237,8 @@ struct hns_roce_rc_sq_wqe { + #define RCWQE_MW_RR_EN RCWQE_FIELD_LOC(259, 259) + #define RCWQE_MW_RW_EN RCWQE_FIELD_LOC(260, 260) + ++#define RCWQE_SQPN_L_WIDTH 2 ++ + struct hns_roce_v2_wqe_data_seg { + __le32 len; + __le32 lkey; +diff --git a/providers/hns/hns_roce_u_verbs.c b/providers/hns/hns_roce_u_verbs.c +index 69bcc13..248d862 100644 +--- a/providers/hns/hns_roce_u_verbs.c ++++ b/providers/hns/hns_roce_u_verbs.c +@@ -1311,6 +1311,14 @@ static int calc_qp_buff_size(struct hns_roce_device *hr_dev, + return 0; + } + ++static inline bool check_qp_support_dca(bool pool_en, enum ibv_qp_type qp_type) ++{ ++ if (pool_en && (qp_type == IBV_QPT_RC || qp_type == IBV_QPT_XRC_SEND)) ++ return true; ++ ++ return false; ++} ++ + static void qp_free_wqe(struct hns_roce_qp *qp) + { + free_recv_rinl_buf(&qp->rq_rinl_buf); +@@ -1322,8 +1330,8 @@ static void qp_free_wqe(struct hns_roce_qp *qp) + hns_roce_free_buf(&qp->buf); + } + +-static int qp_alloc_wqe(struct ibv_qp_cap *cap, struct hns_roce_qp *qp, +- struct hns_roce_context *ctx) ++static int qp_alloc_wqe(struct ibv_qp_init_attr_ex *attr, ++ struct hns_roce_qp *qp, struct hns_roce_context *ctx) + { + struct hns_roce_device *hr_dev = to_hr_dev(ctx->ibv_ctx.context.device); + +@@ -1341,12 +1349,24 @@ static int qp_alloc_wqe(struct ibv_qp_cap *cap, struct hns_roce_qp *qp, + } + + if (qp->rq_rinl_buf.wqe_cnt) { +- if (alloc_recv_rinl_buf(cap->max_recv_sge, &qp->rq_rinl_buf)) ++ if (alloc_recv_rinl_buf(attr->cap.max_recv_sge, ++ &qp->rq_rinl_buf)) + goto err_alloc; + } + +- if (hns_roce_alloc_buf(&qp->buf, qp->buf_size, 1 << qp->pageshift)) +- goto err_alloc; ++ if (check_qp_support_dca(ctx->dca_ctx.max_size != 0, attr->qp_type)) { ++ /* when DCA is enabled, use a buffer list to store page addr */ ++ qp->buf.buf = NULL; ++ qp->dca_wqe.max_cnt = hr_hw_page_count(qp->buf_size); ++ qp->dca_wqe.shift = HNS_HW_PAGE_SHIFT; ++ qp->dca_wqe.bufs = calloc(qp->dca_wqe.max_cnt, sizeof(void *)); ++ if (!qp->dca_wqe.bufs) ++ goto err_alloc; ++ } else { ++ if (hns_roce_alloc_buf(&qp->buf, qp->buf_size, ++ HNS_HW_PAGE_SIZE)) ++ goto err_alloc; ++ } + + return 0; + +@@ -1636,7 +1656,7 @@ static int hns_roce_alloc_qp_buf(struct ibv_qp_init_attr_ex *attr, + { + int ret; + +- ret = qp_alloc_wqe(&attr->cap, qp, ctx); ++ ret = qp_alloc_wqe(attr, qp, ctx); + if (ret) + return ret; + +-- +2.33.0 + diff --git a/0023-libhns-Use-shared-memory-to-sync-DCA-status.patch b/0023-libhns-Use-shared-memory-to-sync-DCA-status.patch new file mode 100644 index 0000000000000000000000000000000000000000..9cf722ed053e71c1941c9ed9459b8899216fdf3a --- /dev/null +++ b/0023-libhns-Use-shared-memory-to-sync-DCA-status.patch @@ -0,0 +1,167 @@ +From 831683cc6bb077ab409cb6a1b7252a6e1762bc11 Mon Sep 17 00:00:00 2001 +From: Chengchang Tang +Date: Tue, 29 Jun 2021 20:06:47 +0800 +Subject: [PATCH 23/25] libhns: Use shared memory to sync DCA status + +driver inclusion +category: feature +bugzilla: https://gitee.com/src-openeuler/rdma-core/issues/I9C2AQ + +------------------------------------------------------------------ + +The user DCA needs to check the QP attaching state before filling wqe +buffer by the response from uverbs 'HNS_IB_METHOD_DCA_MEM_ATTACH', but +this will result in too much time being wasted on system calls, so use a +shared table between user driver and kernel driver to sync DCA status. + +Signed-off-by: Chengchang Tang +Reviewed-by: Yangyang Li +--- + providers/hns/hns_roce_u.c | 51 +++++++++++++++++++++++++++++++++++--- + providers/hns/hns_roce_u.h | 10 ++++++++ + 2 files changed, 57 insertions(+), 4 deletions(-) + +diff --git a/providers/hns/hns_roce_u.c b/providers/hns/hns_roce_u.c +index 2272431..56ff201 100644 +--- a/providers/hns/hns_roce_u.c ++++ b/providers/hns/hns_roce_u.c +@@ -113,9 +113,33 @@ static int hns_roce_mmap(struct hns_roce_device *hr_dev, + return 0; + } + +-static int init_dca_context(struct hns_roce_context *ctx, int page_size) ++static int mmap_dca(struct hns_roce_context *ctx, int cmd_fd, ++ int page_size, size_t size, uint64_t mmap_key) + { + struct hns_roce_dca_ctx *dca_ctx = &ctx->dca_ctx; ++ void *addr; ++ ++ addr = mmap(NULL, size, PROT_READ | PROT_WRITE, MAP_SHARED, cmd_fd, ++ mmap_key); ++ if (addr == MAP_FAILED) { ++ verbs_err(&ctx->ibv_ctx, "failed to mmap() dca prime qp.\n"); ++ return -EINVAL; ++ } ++ ++ dca_ctx->buf_status = addr; ++ dca_ctx->sync_status = addr + size / 2; ++ ++ return 0; ++} ++ ++static int init_dca_context(struct hns_roce_context *ctx, int cmd_fd, ++ struct hns_roce_alloc_ucontext_resp *resp, ++ int page_size) ++{ ++ struct hns_roce_dca_ctx *dca_ctx = &ctx->dca_ctx; ++ uint64_t mmap_key = resp->dca_mmap_key; ++ int mmap_size = resp->dca_mmap_size; ++ int max_qps = resp->dca_qps; + int ret; + + if (!(ctx->config & HNS_ROCE_UCTX_RSP_DCA_FLAGS)) +@@ -130,6 +154,16 @@ static int init_dca_context(struct hns_roce_context *ctx, int page_size) + dca_ctx->max_size = HNS_DCA_MAX_MEM_SIZE; + dca_ctx->mem_cnt = 0; + ++ if (mmap_key) { ++ const unsigned int bits_per_qp = 2 * HNS_DCA_BITS_PER_STATUS; ++ ++ if (!mmap_dca(ctx, cmd_fd, page_size, mmap_size, mmap_key)) { ++ dca_ctx->status_size = mmap_size; ++ dca_ctx->max_qps = min_t(int, max_qps, ++ mmap_size * 8 / bits_per_qp); ++ } ++ } ++ + return 0; + } + +@@ -143,6 +177,8 @@ static void uninit_dca_context(struct hns_roce_context *ctx) + pthread_spin_lock(&dca_ctx->lock); + hns_roce_cleanup_dca_mem(ctx); + pthread_spin_unlock(&dca_ctx->lock); ++ if (dca_ctx->buf_status) ++ munmap(dca_ctx->buf_status, dca_ctx->status_size); + + pthread_spin_destroy(&dca_ctx->lock); + } +@@ -217,6 +253,14 @@ static int set_context_attr(struct hns_roce_device *hr_dev, + return 0; + } + ++static void ucontext_set_cmd(struct hns_roce_alloc_ucontext *cmd, int page_size) ++{ ++ cmd->config |= HNS_ROCE_EXSGE_FLAGS | HNS_ROCE_RQ_INLINE_FLAGS | ++ HNS_ROCE_CQE_INLINE_FLAGS | HNS_ROCE_UCTX_CONFIG_DCA; ++ cmd->comp = HNS_ROCE_ALLOC_UCTX_COMP_DCA_MAX_QPS; ++ cmd->dca_max_qps = page_size * 8 / 2 * HNS_DCA_BITS_PER_STATUS; ++} ++ + static struct verbs_context *hns_roce_alloc_context(struct ibv_device *ibdev, + int cmd_fd, + void *private_data) +@@ -231,8 +275,7 @@ static struct verbs_context *hns_roce_alloc_context(struct ibv_device *ibdev, + if (!context) + return NULL; + +- cmd.config |= HNS_ROCE_EXSGE_FLAGS | HNS_ROCE_RQ_INLINE_FLAGS | +- HNS_ROCE_CQE_INLINE_FLAGS | HNS_ROCE_UCTX_CONFIG_DCA; ++ ucontext_set_cmd(&cmd, hr_dev->page_size); + if (ibv_cmd_get_context(&context->ibv_ctx, &cmd.ibv_cmd, sizeof(cmd), + &resp.ibv_resp, sizeof(resp))) + goto err_free; +@@ -245,7 +288,7 @@ static struct verbs_context *hns_roce_alloc_context(struct ibv_device *ibdev, + if (context->uar == MAP_FAILED) + goto err_free; + +- if (init_dca_context(context, hr_dev->page_size)) ++ if (init_dca_context(context, cmd_fd, &resp, hr_dev->page_size)) + goto err_free; + + if (init_reset_context(context, cmd_fd, &resp, hr_dev->page_size)) +diff --git a/providers/hns/hns_roce_u.h b/providers/hns/hns_roce_u.h +index ba646d3..e808ff3 100644 +--- a/providers/hns/hns_roce_u.h ++++ b/providers/hns/hns_roce_u.h +@@ -35,6 +35,7 @@ + + #include + #include ++#include + #include + + #include +@@ -44,6 +45,7 @@ + #include + #include + #include ++#include + #include + #include "hns_roce_u_abi.h" + +@@ -52,6 +54,8 @@ + + #define PFX "hns: " + ++typedef _Atomic(uint64_t) atomic_bitmap_t; ++ + /* The minimum page size is 4K for hardware */ + #define HNS_HW_PAGE_SHIFT 12 + #define HNS_HW_PAGE_SIZE (1 << HNS_HW_PAGE_SHIFT) +@@ -216,6 +220,12 @@ struct hns_roce_dca_ctx { + uint64_t max_size; + uint64_t min_size; + uint64_t curr_size; ++ ++#define HNS_DCA_BITS_PER_STATUS 1 ++ unsigned int max_qps; ++ unsigned int status_size; ++ atomic_bitmap_t *buf_status; ++ atomic_bitmap_t *sync_status; + }; + + struct hns_roce_v2_reset_state { +-- +2.33.0 + diff --git a/0024-libhns-Sync-DCA-status-by-shared-memory.patch b/0024-libhns-Sync-DCA-status-by-shared-memory.patch new file mode 100644 index 0000000000000000000000000000000000000000..3005ba43f5397fd2023ab9fd1a10dd886aff2ebd --- /dev/null +++ b/0024-libhns-Sync-DCA-status-by-shared-memory.patch @@ -0,0 +1,223 @@ +From 5b151e86c6004c11913fc9a8086f0fc63902af45 Mon Sep 17 00:00:00 2001 +From: Chengchang Tang +Date: Tue, 29 Jun 2021 21:01:27 +0800 +Subject: [PATCH 24/25] libhns: Sync DCA status by shared memory + +driver inclusion +category: feature +bugzilla: https://gitee.com/src-openeuler/rdma-core/issues/I9C2AQ + +------------------------------------------------------------------ + +Use DCA num from the resp of modify_qp() and indicate the DCA status bit in +the shared memory, if the num is valid, the user DCA can get the DCA status +by testing the bit in the shared memory for each QP, othewise invoke the +verbs 'HNS_IB_METHOD_DCA_MEM_ATTACH' to check the DCA status. + +Each QP has 2 bits in shared memory, 1 bit is used to lock the DCA status +changing by kernel driver or user driver, another bit is used to indicate +the DCA attaching status. + +Signed-off-by: Chengchang Tang +Reviewed-by: Yangyang Li +--- + providers/hns/hns_roce_u.h | 31 +++++++++++++++++++++++ + providers/hns/hns_roce_u_buf.c | 42 ++++++++++++++++++++++++++++++++ + providers/hns/hns_roce_u_hw_v2.c | 21 +++++++++++++++- + 3 files changed, 93 insertions(+), 1 deletion(-) + +diff --git a/providers/hns/hns_roce_u.h b/providers/hns/hns_roce_u.h +index e808ff3..5bddb00 100644 +--- a/providers/hns/hns_roce_u.h ++++ b/providers/hns/hns_roce_u.h +@@ -379,6 +379,7 @@ struct hns_roce_dca_buf { + void **bufs; + unsigned int max_cnt; + unsigned int shift; ++ unsigned int dcan; + }; + + struct hns_roce_qp { +@@ -444,6 +445,7 @@ struct hns_roce_dca_attach_attr { + uint32_t sq_offset; + uint32_t sge_offset; + uint32_t rq_offset; ++ bool force; + }; + + struct hns_roce_dca_detach_attr { +@@ -556,6 +558,32 @@ static inline int hns_roce_spin_unlock(struct hns_roce_spinlock *hr_lock) + return 0; + } + ++#define HNS_ROCE_BIT_MASK(nr) (1UL << ((nr) % 64)) ++#define HNS_ROCE_BIT_WORD(nr) ((nr) / 64) ++ ++static inline bool atomic_test_bit(atomic_bitmap_t *p, uint32_t nr) ++{ ++ p += HNS_ROCE_BIT_WORD(nr); ++ return !!(atomic_load(p) & HNS_ROCE_BIT_MASK(nr)); ++} ++ ++static inline bool test_and_set_bit_lock(atomic_bitmap_t *p, uint32_t nr) ++{ ++ uint64_t mask = HNS_ROCE_BIT_MASK(nr); ++ ++ p += HNS_ROCE_BIT_WORD(nr); ++ if (atomic_load(p) & mask) ++ return true; ++ ++ return (atomic_fetch_or(p, mask) & mask) != 0; ++} ++ ++static inline void clear_bit_unlock(atomic_bitmap_t *p, uint32_t nr) ++{ ++ p += HNS_ROCE_BIT_WORD(nr); ++ atomic_fetch_and(p, ~HNS_ROCE_BIT_MASK(nr)); ++} ++ + int hns_roce_u_query_device(struct ibv_context *context, + const struct ibv_query_device_ex_input *input, + struct ibv_device_attr_ex *attr, size_t attr_size); +@@ -636,6 +664,9 @@ int hns_roce_attach_dca_mem(struct hns_roce_context *ctx, uint32_t handle, + uint32_t size, struct hns_roce_dca_buf *buf); + void hns_roce_detach_dca_mem(struct hns_roce_context *ctx, uint32_t handle, + struct hns_roce_dca_detach_attr *attr); ++bool hns_roce_dca_start_post(struct hns_roce_dca_ctx *ctx, uint32_t dcan); ++void hns_roce_dca_stop_post(struct hns_roce_dca_ctx *ctx, uint32_t dcan); ++ + void hns_roce_shrink_dca_mem(struct hns_roce_context *ctx); + void hns_roce_cleanup_dca_mem(struct hns_roce_context *ctx); + +diff --git a/providers/hns/hns_roce_u_buf.c b/providers/hns/hns_roce_u_buf.c +index 3d41b89..08c0fbc 100644 +--- a/providers/hns/hns_roce_u_buf.c ++++ b/providers/hns/hns_roce_u_buf.c +@@ -440,6 +440,45 @@ static int setup_dca_buf(struct hns_roce_context *ctx, uint32_t handle, + return (idx >= page_count) ? 0 : -ENOMEM; + } + ++#define DCAN_TO_SYNC_BIT(n) ((n) * HNS_DCA_BITS_PER_STATUS) ++#define DCAN_TO_STAT_BIT(n) DCAN_TO_SYNC_BIT(n) ++ ++#define MAX_DCA_TRY_LOCK_TIMES 10 ++bool hns_roce_dca_start_post(struct hns_roce_dca_ctx *ctx, uint32_t dcan) ++{ ++ atomic_bitmap_t *st = ctx->sync_status; ++ int try_times = 0; ++ ++ if (!st || dcan >= ctx->max_qps) ++ return true; ++ ++ while (test_and_set_bit_lock(st, DCAN_TO_SYNC_BIT(dcan))) ++ if (try_times++ > MAX_DCA_TRY_LOCK_TIMES) ++ return false; ++ ++ return true; ++} ++ ++void hns_roce_dca_stop_post(struct hns_roce_dca_ctx *ctx, uint32_t dcan) ++{ ++ atomic_bitmap_t *st = ctx->sync_status; ++ ++ if (!st || dcan >= ctx->max_qps) ++ return; ++ ++ clear_bit_unlock(st, DCAN_TO_SYNC_BIT(dcan)); ++} ++ ++static bool check_dca_is_attached(struct hns_roce_dca_ctx *ctx, uint32_t dcan) ++{ ++ atomic_bitmap_t *st = ctx->buf_status; ++ ++ if (!st || dcan >= ctx->max_qps) ++ return false; ++ ++ return atomic_test_bit(st, DCAN_TO_STAT_BIT(dcan)); ++} ++ + #define DCA_EXPAND_MEM_TRY_TIMES 3 + int hns_roce_attach_dca_mem(struct hns_roce_context *ctx, uint32_t handle, + struct hns_roce_dca_attach_attr *attr, +@@ -451,6 +490,9 @@ int hns_roce_attach_dca_mem(struct hns_roce_context *ctx, uint32_t handle, + int try_times = 0; + int ret = 0; + ++ if (!attr->force && check_dca_is_attached(&ctx->dca_ctx, buf->dcan)) ++ return 0; ++ + do { + resp.alloc_pages = 0; + ret = attach_dca_mem(ctx, handle, attr, &resp); +diff --git a/providers/hns/hns_roce_u_hw_v2.c b/providers/hns/hns_roce_u_hw_v2.c +index 7a93456..15d9108 100644 +--- a/providers/hns/hns_roce_u_hw_v2.c ++++ b/providers/hns/hns_roce_u_hw_v2.c +@@ -612,6 +612,7 @@ static int dca_attach_qp_buf(struct hns_roce_context *ctx, + struct hns_roce_qp *qp) + { + struct hns_roce_dca_attach_attr attr = {}; ++ bool enable_detach; + uint32_t idx; + int ret; + +@@ -633,9 +634,16 @@ static int dca_attach_qp_buf(struct hns_roce_context *ctx, + attr.rq_offset = idx << qp->rq.wqe_shift; + } + ++ enable_detach = check_dca_detach_enable(qp); ++ if (enable_detach && ++ !hns_roce_dca_start_post(&ctx->dca_ctx, qp->dca_wqe.dcan)) ++ /* Force attach if failed to sync dca status */ ++ attr.force = true; + + ret = hns_roce_attach_dca_mem(ctx, qp->verbs_qp.qp.handle, &attr, +- qp->buf_size, &qp->dca_wqe); ++ qp->buf_size, &qp->dca_wqe); ++ if (ret && enable_detach) ++ hns_roce_dca_stop_post(&ctx->dca_ctx, qp->dca_wqe.dcan); + + hns_roce_spin_unlock(&qp->rq.hr_lock); + hns_roce_spin_unlock(&qp->sq.hr_lock); +@@ -1643,6 +1651,9 @@ out: + + hns_roce_spin_unlock(&qp->sq.hr_lock); + ++ if (check_dca_detach_enable(qp)) ++ hns_roce_dca_stop_post(&ctx->dca_ctx, qp->dca_wqe.dcan); ++ + if (ibvqp->state == IBV_QPS_ERR) { + attr.qp_state = IBV_QPS_ERR; + +@@ -1784,6 +1795,9 @@ out: + + hns_roce_spin_unlock(&qp->rq.hr_lock); + ++ if (check_dca_detach_enable(qp)) ++ hns_roce_dca_stop_post(&ctx->dca_ctx, qp->dca_wqe.dcan); ++ + if (ibvqp->state == IBV_QPS_ERR) { + attr.qp_state = IBV_QPS_ERR; + hns_roce_u_v2_modify_qp(ibvqp, &attr, IBV_QP_STATE); +@@ -1902,6 +1916,7 @@ static int hns_roce_u_v2_modify_qp(struct ibv_qp *qp, struct ibv_qp_attr *attr, + if (attr->qp_state == IBV_QPS_RTR) { + hr_qp->tc_mode = resp_ex.drv_payload.tc_mode; + hr_qp->priority = resp_ex.drv_payload.priority; ++ hr_qp->dca_wqe.dcan = resp_ex.drv_payload.dcan; + } + } + +@@ -2951,6 +2966,10 @@ static int wr_complete(struct ibv_qp_ex *ibv_qp) + + out: + hns_roce_spin_unlock(&qp->sq.hr_lock); ++ ++ if (check_dca_detach_enable(qp)) ++ hns_roce_dca_stop_post(&ctx->dca_ctx, qp->dca_wqe.dcan); ++ + if (ibv_qp->qp_base.state == IBV_QPS_ERR) { + attr.qp_state = IBV_QPS_ERR; + hns_roce_u_v2_modify_qp(&ibv_qp->qp_base, &attr, IBV_QP_STATE); +-- +2.33.0 + diff --git a/0025-libhns-Add-direct-verbs-support-to-config-DCA.patch b/0025-libhns-Add-direct-verbs-support-to-config-DCA.patch new file mode 100644 index 0000000000000000000000000000000000000000..b5a4c71c5cbf616e5c4a5367ab6691ce63c49318 --- /dev/null +++ b/0025-libhns-Add-direct-verbs-support-to-config-DCA.patch @@ -0,0 +1,386 @@ +From 08b80f6450477832b1a194f18fbed60367da46de Mon Sep 17 00:00:00 2001 +From: Chengchang Tang +Date: Mon, 10 May 2021 17:13:49 +0800 +Subject: [PATCH 25/25] libhns: Add direct verbs support to config DCA + +driver inclusion +category: feature +bugzilla: https://gitee.com/src-openeuler/rdma-core/issues/I9C2AQ + +------------------------------------------------------------------ + +Add two direct verbs to config DCA: +1. hnsdv_open_device() is used to config DCA memory pool. +2. hnsdv_create_qp() is used to create a DCA QP. + +Signed-off-by: Chengchang Tang +Reviewed-by: Yangyang Li +--- + debian/control | 2 +- + providers/hns/hns_roce_u.c | 80 ++++++++++++++++++++++++++++---- + providers/hns/hns_roce_u.h | 4 +- + providers/hns/hns_roce_u_buf.c | 3 ++ + providers/hns/hns_roce_u_verbs.c | 39 ++++++++++++++-- + providers/hns/hnsdv.h | 29 +++++++++++- + providers/hns/libhns.map | 1 + + 7 files changed, 140 insertions(+), 18 deletions(-) + +diff --git a/debian/control b/debian/control +index 160824f..2a55372 100644 +--- a/debian/control ++++ b/debian/control +@@ -87,7 +87,7 @@ Description: User space provider drivers for libibverbs + - efa: Amazon Elastic Fabric Adapter + - erdma: Alibaba Elastic RDMA (iWarp) Adapter + - hfi1verbs: Intel Omni-Path HFI +- - hns: HiSilicon Hip06 SoC ++ - hns: HiSilicon Hip08+ SoC + - ipathverbs: QLogic InfiniPath HCAs + - irdma: Intel Ethernet Connection RDMA + - mana: Microsoft Azure Network Adapter +diff --git a/providers/hns/hns_roce_u.c b/providers/hns/hns_roce_u.c +index 56ff201..93a0312 100644 +--- a/providers/hns/hns_roce_u.c ++++ b/providers/hns/hns_roce_u.c +@@ -132,8 +132,55 @@ static int mmap_dca(struct hns_roce_context *ctx, int cmd_fd, + return 0; + } + ++struct ibv_context *hnsdv_open_device(struct ibv_device *device, ++ struct hnsdv_context_attr *attr) ++{ ++ if (!is_hns_dev(device)) { ++ errno = EOPNOTSUPP; ++ return NULL; ++ } ++ ++ return verbs_open_device(device, attr); ++} ++ ++static void set_dca_pool_param(struct hns_roce_context *ctx, ++ struct hnsdv_context_attr *attr, int page_size) ++{ ++ struct hns_roce_dca_ctx *dca_ctx = &ctx->dca_ctx; ++ ++ if (attr->comp_mask & HNSDV_CONTEXT_MASK_DCA_UNIT_SIZE) ++ dca_ctx->unit_size = align(attr->dca_unit_size, page_size); ++ else ++ dca_ctx->unit_size = page_size * HNS_DCA_DEFAULT_UNIT_PAGES; ++ ++ /* The memory pool cannot be expanded, only init the DCA context. */ ++ if (dca_ctx->unit_size == 0) ++ return; ++ ++ /* If not set, the memory pool can be expanded unlimitedly. */ ++ if (attr->comp_mask & HNSDV_CONTEXT_MASK_DCA_MAX_SIZE) ++ dca_ctx->max_size = DIV_ROUND_UP(attr->dca_max_size, ++ dca_ctx->unit_size) * ++ dca_ctx->unit_size; ++ else ++ dca_ctx->max_size = HNS_DCA_MAX_MEM_SIZE; ++ ++ /* If not set, the memory pool cannot be shrunk. */ ++ if (attr->comp_mask & HNSDV_CONTEXT_MASK_DCA_MIN_SIZE) ++ dca_ctx->min_size = DIV_ROUND_UP(attr->dca_min_size, ++ dca_ctx->unit_size) * ++ dca_ctx->unit_size; ++ else ++ dca_ctx->min_size = HNS_DCA_MAX_MEM_SIZE; ++ ++ verbs_debug(&ctx->ibv_ctx, ++ "Support DCA, unit %u, max %lu, min %lu Bytes.\n", ++ dca_ctx->unit_size, dca_ctx->max_size, dca_ctx->min_size); ++} ++ + static int init_dca_context(struct hns_roce_context *ctx, int cmd_fd, + struct hns_roce_alloc_ucontext_resp *resp, ++ struct hnsdv_context_attr *attr, + int page_size) + { + struct hns_roce_dca_ctx *dca_ctx = &ctx->dca_ctx; +@@ -145,14 +192,18 @@ static int init_dca_context(struct hns_roce_context *ctx, int cmd_fd, + if (!(ctx->config & HNS_ROCE_UCTX_RSP_DCA_FLAGS)) + return 0; + ++ dca_ctx->unit_size = 0; ++ dca_ctx->mem_cnt = 0; ++ + list_head_init(&dca_ctx->mem_list); + ret = pthread_spin_init(&dca_ctx->lock, PTHREAD_PROCESS_PRIVATE); + if (ret) + return ret; + +- dca_ctx->unit_size = page_size * HNS_DCA_DEFAULT_UNIT_PAGES; +- dca_ctx->max_size = HNS_DCA_MAX_MEM_SIZE; +- dca_ctx->mem_cnt = 0; ++ if (!attr || !(attr->flags & HNSDV_CONTEXT_FLAGS_DCA)) ++ return 0; ++ ++ set_dca_pool_param(ctx, attr, page_size); + + if (mmap_key) { + const unsigned int bits_per_qp = 2 * HNS_DCA_BITS_PER_STATUS; +@@ -253,18 +304,28 @@ static int set_context_attr(struct hns_roce_device *hr_dev, + return 0; + } + +-static void ucontext_set_cmd(struct hns_roce_alloc_ucontext *cmd, int page_size) ++static void ucontext_set_cmd(struct hns_roce_alloc_ucontext *cmd, ++ struct hnsdv_context_attr *attr) + { + cmd->config |= HNS_ROCE_EXSGE_FLAGS | HNS_ROCE_RQ_INLINE_FLAGS | +- HNS_ROCE_CQE_INLINE_FLAGS | HNS_ROCE_UCTX_CONFIG_DCA; +- cmd->comp = HNS_ROCE_ALLOC_UCTX_COMP_DCA_MAX_QPS; +- cmd->dca_max_qps = page_size * 8 / 2 * HNS_DCA_BITS_PER_STATUS; ++ HNS_ROCE_CQE_INLINE_FLAGS; ++ ++ if (!attr || !(attr->flags & HNSDV_CONTEXT_FLAGS_DCA)) ++ return; ++ ++ cmd->config |= HNS_ROCE_UCTX_CONFIG_DCA; ++ ++ if (attr->comp_mask & HNSDV_CONTEXT_MASK_DCA_PRIME_QPS) { ++ cmd->comp |= HNS_ROCE_ALLOC_UCTX_COMP_DCA_MAX_QPS; ++ cmd->dca_max_qps = attr->dca_prime_qps; ++ } + } + + static struct verbs_context *hns_roce_alloc_context(struct ibv_device *ibdev, + int cmd_fd, + void *private_data) + { ++ struct hnsdv_context_attr *ctx_attr = private_data; + struct hns_roce_device *hr_dev = to_hr_dev(ibdev); + struct hns_roce_alloc_ucontext_resp resp = {}; + struct hns_roce_alloc_ucontext cmd = {}; +@@ -275,7 +336,7 @@ static struct verbs_context *hns_roce_alloc_context(struct ibv_device *ibdev, + if (!context) + return NULL; + +- ucontext_set_cmd(&cmd, hr_dev->page_size); ++ ucontext_set_cmd(&cmd, ctx_attr); + if (ibv_cmd_get_context(&context->ibv_ctx, &cmd.ibv_cmd, sizeof(cmd), + &resp.ibv_resp, sizeof(resp))) + goto err_free; +@@ -288,7 +349,8 @@ static struct verbs_context *hns_roce_alloc_context(struct ibv_device *ibdev, + if (context->uar == MAP_FAILED) + goto err_free; + +- if (init_dca_context(context, cmd_fd, &resp, hr_dev->page_size)) ++ if (init_dca_context(context, cmd_fd, ++ &resp, ctx_attr, hr_dev->page_size)) + goto err_free; + + if (init_reset_context(context, cmd_fd, &resp, hr_dev->page_size)) +diff --git a/providers/hns/hns_roce_u.h b/providers/hns/hns_roce_u.h +index 5bddb00..691bf61 100644 +--- a/providers/hns/hns_roce_u.h ++++ b/providers/hns/hns_roce_u.h +@@ -584,6 +584,8 @@ static inline void clear_bit_unlock(atomic_bitmap_t *p, uint32_t nr) + atomic_fetch_and(p, ~HNS_ROCE_BIT_MASK(nr)); + } + ++bool is_hns_dev(struct ibv_device *device); ++ + int hns_roce_u_query_device(struct ibv_context *context, + const struct ibv_query_device_ex_input *input, + struct ibv_device_attr_ex *attr, size_t attr_size); +@@ -672,8 +674,6 @@ void hns_roce_cleanup_dca_mem(struct hns_roce_context *ctx); + + void hns_roce_init_qp_indices(struct hns_roce_qp *qp); + +-bool is_hns_dev(struct ibv_device *device); +- + extern const struct hns_roce_u_hw hns_roce_u_hw_v2; + + #endif /* _HNS_ROCE_U_H */ +diff --git a/providers/hns/hns_roce_u_buf.c b/providers/hns/hns_roce_u_buf.c +index 08c0fbc..780683e 100644 +--- a/providers/hns/hns_roce_u_buf.c ++++ b/providers/hns/hns_roce_u_buf.c +@@ -56,6 +56,9 @@ int hns_roce_alloc_buf(struct hns_roce_buf *buf, unsigned int size, + + void hns_roce_free_buf(struct hns_roce_buf *buf) + { ++ if (!buf->buf) ++ return; ++ + ibv_dofork_range(buf->buf, buf->length); + + munmap(buf->buf, buf->length); +diff --git a/providers/hns/hns_roce_u_verbs.c b/providers/hns/hns_roce_u_verbs.c +index 248d862..8964d53 100644 +--- a/providers/hns/hns_roce_u_verbs.c ++++ b/providers/hns/hns_roce_u_verbs.c +@@ -1072,6 +1072,15 @@ enum { + IBV_QP_INIT_ATTR_SEND_OPS_FLAGS, + }; + ++enum { ++ SEND_OPS_FLAG_MASK = ++ IBV_QP_EX_WITH_RDMA_WRITE | IBV_QP_EX_WITH_RDMA_WRITE_WITH_IMM | ++ IBV_QP_EX_WITH_SEND | IBV_QP_EX_WITH_SEND_WITH_IMM | ++ IBV_QP_EX_WITH_RDMA_READ | IBV_QP_EX_WITH_ATOMIC_CMP_AND_SWP | ++ IBV_QP_EX_WITH_ATOMIC_FETCH_AND_ADD | IBV_QP_EX_WITH_LOCAL_INV | ++ IBV_QP_EX_WITH_SEND_WITH_INV, ++}; ++ + static int check_qp_create_mask(struct hns_roce_context *ctx, + struct ibv_qp_init_attr_ex *attr) + { +@@ -1080,6 +1089,10 @@ static int check_qp_create_mask(struct hns_roce_context *ctx, + if (!check_comp_mask(attr->comp_mask, CREATE_QP_SUP_COMP_MASK)) + return EOPNOTSUPP; + ++ if (attr->comp_mask & IBV_QP_INIT_ATTR_SEND_OPS_FLAGS && ++ !check_comp_mask(attr->send_ops_flags, SEND_OPS_FLAG_MASK)) ++ return -EOPNOTSUPP; ++ + switch (attr->qp_type) { + case IBV_QPT_UD: + if (hr_dev->hw_version == HNS_ROCE_HW_VER2) +@@ -1311,9 +1324,21 @@ static int calc_qp_buff_size(struct hns_roce_device *hr_dev, + return 0; + } + +-static inline bool check_qp_support_dca(bool pool_en, enum ibv_qp_type qp_type) ++static inline bool check_qp_support_dca(struct hns_roce_dca_ctx *dca_ctx, ++ struct ibv_qp_init_attr_ex *attr, ++ struct hnsdv_qp_init_attr *hns_attr) + { +- if (pool_en && (qp_type == IBV_QPT_RC || qp_type == IBV_QPT_XRC_SEND)) ++ /* DCA pool disable */ ++ if (!dca_ctx->unit_size) ++ return false; ++ ++ /* Unsupport type */ ++ if (attr->qp_type != IBV_QPT_RC && attr->qp_type != IBV_QPT_XRC_SEND) ++ return false; ++ ++ if (hns_attr && ++ (hns_attr->comp_mask & HNSDV_QP_INIT_ATTR_MASK_QP_CREATE_FLAGS) && ++ (hns_attr->create_flags & HNSDV_QP_CREATE_ENABLE_DCA_MODE)) + return true; + + return false; +@@ -1331,6 +1356,7 @@ static void qp_free_wqe(struct hns_roce_qp *qp) + } + + static int qp_alloc_wqe(struct ibv_qp_init_attr_ex *attr, ++ struct hnsdv_qp_init_attr *hns_attr, + struct hns_roce_qp *qp, struct hns_roce_context *ctx) + { + struct hns_roce_device *hr_dev = to_hr_dev(ctx->ibv_ctx.context.device); +@@ -1354,7 +1380,8 @@ static int qp_alloc_wqe(struct ibv_qp_init_attr_ex *attr, + goto err_alloc; + } + +- if (check_qp_support_dca(ctx->dca_ctx.max_size != 0, attr->qp_type)) { ++ if (check_qp_support_dca(&ctx->dca_ctx, attr, hns_attr) && ++ ctx->dca_ctx.max_size > 0) { + /* when DCA is enabled, use a buffer list to store page addr */ + qp->buf.buf = NULL; + qp->dca_wqe.max_cnt = hr_hw_page_count(qp->buf_size); +@@ -1362,6 +1389,7 @@ static int qp_alloc_wqe(struct ibv_qp_init_attr_ex *attr, + qp->dca_wqe.bufs = calloc(qp->dca_wqe.max_cnt, sizeof(void *)); + if (!qp->dca_wqe.bufs) + goto err_alloc; ++ verbs_debug(&ctx->ibv_ctx, "alloc DCA buf.\n"); + } else { + if (hns_roce_alloc_buf(&qp->buf, qp->buf_size, + HNS_HW_PAGE_SIZE)) +@@ -1651,12 +1679,13 @@ void hns_roce_free_qp_buf(struct hns_roce_qp *qp, struct hns_roce_context *ctx) + } + + static int hns_roce_alloc_qp_buf(struct ibv_qp_init_attr_ex *attr, ++ struct hnsdv_qp_init_attr *hns_attr, + struct hns_roce_qp *qp, + struct hns_roce_context *ctx) + { + int ret; + +- ret = qp_alloc_wqe(attr, qp, ctx); ++ ret = qp_alloc_wqe(attr, hns_attr, qp, ctx); + if (ret) + return ret; + +@@ -1731,7 +1760,7 @@ static struct ibv_qp *create_qp(struct ibv_context *ibv_ctx, + if (ret) + goto err_spinlock; + +- ret = hns_roce_alloc_qp_buf(attr, qp, context); ++ ret = hns_roce_alloc_qp_buf(attr, hns_attr, qp, context); + if (ret) + goto err_buf; + +diff --git a/providers/hns/hnsdv.h b/providers/hns/hnsdv.h +index 451b26e..68bf001 100644 +--- a/providers/hns/hnsdv.h ++++ b/providers/hns/hnsdv.h +@@ -22,17 +22,42 @@ enum hnsdv_qp_congest_ctrl_type { + HNSDV_QP_CREATE_ENABLE_DIP = 1 << 3, + }; + ++enum hnsdv_qp_create_flags { ++ HNSDV_QP_CREATE_ENABLE_DCA_MODE = 1 << 0, ++}; ++ ++enum hnsdv_context_comp_mask { ++ HNSDV_CONTEXT_MASK_DCA_PRIME_QPS = 1 << 0, ++ HNSDV_CONTEXT_MASK_DCA_UNIT_SIZE = 1 << 1, ++ HNSDV_CONTEXT_MASK_DCA_MAX_SIZE = 1 << 2, ++ HNSDV_CONTEXT_MASK_DCA_MIN_SIZE = 1 << 3, ++}; ++ + enum hnsdv_qp_init_attr_mask { ++ HNSDV_QP_INIT_ATTR_MASK_QP_CREATE_FLAGS = 1 << 0, + HNSDV_QP_INIT_ATTR_MASK_QP_CONGEST_TYPE = 1 << 1, + }; + ++struct hnsdv_context_attr { ++ uint64_t flags; /* Use enum hnsdv_context_attr_flags */ ++ uint64_t comp_mask; /* Use enum hnsdv_context_comp_mask */ ++ uint32_t dca_prime_qps; ++ uint32_t dca_unit_size; ++ uint64_t dca_max_size; ++ uint64_t dca_min_size; ++}; ++ + struct hnsdv_qp_init_attr { + uint64_t comp_mask; /* Use enum hnsdv_qp_init_attr_mask */ +- uint32_t create_flags; ++ uint32_t create_flags; /* Use enum hnsdv_qp_create_flags */ + uint8_t congest_type; /* Use enum hnsdv_qp_congest_ctrl_type */ + uint8_t reserved[3]; + }; + ++enum hnsdv_context_attr_flags { ++ HNSDV_CONTEXT_FLAGS_DCA = 1 << 0, ++}; ++ + enum hnsdv_query_context_comp_mask { + HNSDV_CONTEXT_MASK_CONGEST_TYPE = 1 << 0, + }; +@@ -50,6 +75,8 @@ int hnsdv_query_device(struct ibv_context *ctx_in, + struct ibv_qp *hnsdv_create_qp(struct ibv_context *context, + struct ibv_qp_init_attr_ex *qp_attr, + struct hnsdv_qp_init_attr *hns_qp_attr); ++struct ibv_context *hnsdv_open_device(struct ibv_device *device, ++ struct hnsdv_context_attr *attr); + + #ifdef __cplusplus + } +diff --git a/providers/hns/libhns.map b/providers/hns/libhns.map +index e9bf417..a955346 100644 +--- a/providers/hns/libhns.map ++++ b/providers/hns/libhns.map +@@ -5,5 +5,6 @@ HNS_1.0 { + hnsdv_is_supported; + hnsdv_create_qp; + hnsdv_query_device; ++ hnsdv_open_device; + local: *; + }; +-- +2.33.0 + diff --git a/0026-libhns-Add-RoH-device-IDs.patch b/0026-libhns-Add-RoH-device-IDs.patch new file mode 100644 index 0000000000000000000000000000000000000000..9c7d9481cb1db5c4e13c15366018b6dc828a8719 --- /dev/null +++ b/0026-libhns-Add-RoH-device-IDs.patch @@ -0,0 +1,36 @@ +From 8c9305fce0941a6660582ed9aaf62c5a8367bc0f Mon Sep 17 00:00:00 2001 +From: Chengchang Tang +Date: Fri, 9 Oct 2020 11:14:39 +0800 +Subject: [PATCH 1/2] libhns: Add RoH device IDs + +driver inclusion +category: feature +bugzilla: https://gitee.com/src-openeuler/rdma-core/issues/I9FFIU + +------------------------------------------------------------------ + +Add RoH device IDs. +0xA22C is a 200Gb/s RoH device. +0xA22D is a 400Gb/s RoH device. + +Signed-off-by: Chengchang Tang +--- + providers/hns/hns_roce_u.c | 2 ++ + 1 file changed, 2 insertions(+) + +diff --git a/providers/hns/hns_roce_u.c b/providers/hns/hns_roce_u.c +index 93a0312..f9abe2f 100644 +--- a/providers/hns/hns_roce_u.c ++++ b/providers/hns/hns_roce_u.c +@@ -53,6 +53,8 @@ static const struct verbs_match_ent hca_table[] = { + VERBS_PCI_MATCH(PCI_VENDOR_ID_HUAWEI, 0xA226, &hns_roce_u_hw_v2), + VERBS_PCI_MATCH(PCI_VENDOR_ID_HUAWEI, 0xA227, &hns_roce_u_hw_v2), + VERBS_PCI_MATCH(PCI_VENDOR_ID_HUAWEI, 0xA228, &hns_roce_u_hw_v2), ++ VERBS_PCI_MATCH(PCI_VENDOR_ID_HUAWEI, 0xA22C, &hns_roce_u_hw_v2), ++ VERBS_PCI_MATCH(PCI_VENDOR_ID_HUAWEI, 0xA22D, &hns_roce_u_hw_v2), + VERBS_PCI_MATCH(PCI_VENDOR_ID_HUAWEI, 0xA22F, &hns_roce_u_hw_v2), + {} + }; +-- +2.33.0 + diff --git a/0027-libhns-Add-the-parsing-of-mac-type-in-RoH-mode.patch b/0027-libhns-Add-the-parsing-of-mac-type-in-RoH-mode.patch new file mode 100644 index 0000000000000000000000000000000000000000..d8e377ab5191d091384e6c4241b1a6e8e8267349 --- /dev/null +++ b/0027-libhns-Add-the-parsing-of-mac-type-in-RoH-mode.patch @@ -0,0 +1,125 @@ +From 928442c4184ceb115665da2040accc7e6c716b79 Mon Sep 17 00:00:00 2001 +From: Yangyang Li +Date: Tue, 13 Sep 2022 20:09:27 +0800 +Subject: [PATCH 2/2] libhns: Add the parsing of mac type in RoH mode + +driver inclusion +category: feature +bugzilla: https://gitee.com/src-openeuler/rdma-core/issues/I9FFIU + +------------------------------------------------------------------ + +After parsing the mac type as RoH mode, the user driver +needs to set the dmac field of ud wqe to 0xFF, the hardware +will recognize this field, and increase the recognition of +the IP field in RoH mode, which is used for the CM link +building function in user mode. + +Signed-off-by: Yangyang Li +Signed-off-by: Guofeng Yue +--- + providers/hns/hns_roce_u.c | 34 +++++++++++++++++++++++++++++++- + providers/hns/hns_roce_u.h | 6 ++++++ + providers/hns/hns_roce_u_hw_v2.c | 4 ++++ + 3 files changed, 43 insertions(+), 1 deletion(-) + +diff --git a/providers/hns/hns_roce_u.c b/providers/hns/hns_roce_u.c +index f9abe2f..7a3d1a2 100644 +--- a/providers/hns/hns_roce_u.c ++++ b/providers/hns/hns_roce_u.c +@@ -95,6 +95,38 @@ static const struct verbs_context_ops hns_common_ops = { + .alloc_parent_domain = hns_roce_u_alloc_pad, + }; + ++static struct { ++ uint32_t device_id; ++ enum hns_device_link_type link_type; ++} device_link_types[] = { ++ {0xA222, HNS_DEV_LINK_TYPE_ETH}, ++ {0xA223, HNS_DEV_LINK_TYPE_ETH}, ++ {0xA224, HNS_DEV_LINK_TYPE_ETH}, ++ {0xA225, HNS_DEV_LINK_TYPE_ETH}, ++ {0xA226, HNS_DEV_LINK_TYPE_ETH}, ++ {0xA228, HNS_DEV_LINK_TYPE_ETH}, ++ {0xA22F, HNS_DEV_LINK_TYPE_ETH}, ++ {0xA227, HNS_DEV_LINK_TYPE_HCCS}, ++ {0xA22C, HNS_DEV_LINK_TYPE_HCCS}, ++ {0xA22D, HNS_DEV_LINK_TYPE_HCCS} ++}; ++ ++static int get_link_type(uint32_t device_id, ++ enum hns_device_link_type *link_type) ++{ ++ int i; ++ ++ for (i = 0; i < ARRAY_SIZE(device_link_types); i++) { ++ if (device_id == device_link_types[i].device_id) { ++ *link_type = device_link_types[i].link_type; ++ return 0; ++ } ++ } ++ ++ return ENOENT; ++} ++ ++ + static uint32_t calc_table_shift(uint32_t entry_count, uint32_t size_shift) + { + uint32_t count_shift = hr_ilog32(entry_count); +@@ -303,7 +335,7 @@ static int set_context_attr(struct hns_roce_device *hr_dev, + context->max_srq_wr = dev_attrs.max_srq_wr; + context->max_srq_sge = dev_attrs.max_srq_sge; + +- return 0; ++ return get_link_type(dev_attrs.vendor_part_id, &hr_dev->link_type); + } + + static void ucontext_set_cmd(struct hns_roce_alloc_ucontext *cmd, +diff --git a/providers/hns/hns_roce_u.h b/providers/hns/hns_roce_u.h +index 691bf61..5eedb81 100644 +--- a/providers/hns/hns_roce_u.h ++++ b/providers/hns/hns_roce_u.h +@@ -161,12 +161,18 @@ enum { + #define HNS_ROCE_SRQ_TABLE_BITS 8 + #define HNS_ROCE_SRQ_TABLE_SIZE BIT(HNS_ROCE_SRQ_TABLE_BITS) + ++enum hns_device_link_type { ++ HNS_DEV_LINK_TYPE_ETH, ++ HNS_DEV_LINK_TYPE_HCCS, ++}; ++ + struct hns_roce_device { + struct verbs_device ibv_dev; + int page_size; + const struct hns_roce_u_hw *u_hw; + int hw_version; + uint8_t congest_cap; ++ enum hns_device_link_type link_type; + }; + + struct hns_roce_buf { +diff --git a/providers/hns/hns_roce_u_hw_v2.c b/providers/hns/hns_roce_u_hw_v2.c +index 15d9108..b2d452b 100644 +--- a/providers/hns/hns_roce_u_hw_v2.c ++++ b/providers/hns/hns_roce_u_hw_v2.c +@@ -1377,6 +1377,7 @@ static inline void enable_wqe(struct hns_roce_qp *qp, void *sq_wqe, + static int set_ud_wqe(void *wqe, struct hns_roce_qp *qp, struct ibv_send_wr *wr, + unsigned int nreq, struct hns_roce_sge_info *sge_info) + { ++ struct hns_roce_device *hr_dev = to_hr_dev(qp->verbs_qp.qp.context->device); + struct hns_roce_ah *ah = to_hr_ah(wr->wr.ud.ah); + struct hns_roce_ud_sq_wqe *ud_sq_wqe = wqe; + int ret = 0; +@@ -1401,6 +1402,9 @@ static int set_ud_wqe(void *wqe, struct hns_roce_qp *qp, struct ibv_send_wr *wr, + if (ret) + return ret; + ++ if (hr_dev->link_type == HNS_DEV_LINK_TYPE_HCCS) ++ ud_sq_wqe->dmac[0] = 0xF0; ++ + ret = fill_ud_data_seg(ud_sq_wqe, qp, wr, sge_info); + if (ret) + return ret; +-- +2.33.0 + diff --git a/0028-libhns-Fix-missing-flexible-WQE-buffer-page-flag.patch b/0028-libhns-Fix-missing-flexible-WQE-buffer-page-flag.patch new file mode 100644 index 0000000000000000000000000000000000000000..dd27179ef04ffb9d9092739850d0e7f896ada8a6 --- /dev/null +++ b/0028-libhns-Fix-missing-flexible-WQE-buffer-page-flag.patch @@ -0,0 +1,35 @@ +From 6ee60b49872472129e26fbb4d19061e7a864a32a Mon Sep 17 00:00:00 2001 +From: Chengchang Tang +Date: Tue, 30 Jan 2024 20:57:02 +0800 +Subject: [PATCH] libhns: Fix missing flexible WQE buffer page flag + +driver inclusion +category: bugfix +bugzilla: https://gitee.com/openeuler/kernel/issues/I98HIN + +-------------------------------------------------------------------------- + +Due to the lack of this flag, this feature cannot actually take effect. + +Fixes: bf57963e729c ("libhns: Support flexible WQE buffer page size") +Signed-off-by: Chengchang Tang +--- + providers/hns/hns_roce_u.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/providers/hns/hns_roce_u.c b/providers/hns/hns_roce_u.c +index 7a3d1a2..c4a3ba5 100644 +--- a/providers/hns/hns_roce_u.c ++++ b/providers/hns/hns_roce_u.c +@@ -342,7 +342,7 @@ static void ucontext_set_cmd(struct hns_roce_alloc_ucontext *cmd, + struct hnsdv_context_attr *attr) + { + cmd->config |= HNS_ROCE_EXSGE_FLAGS | HNS_ROCE_RQ_INLINE_FLAGS | +- HNS_ROCE_CQE_INLINE_FLAGS; ++ HNS_ROCE_CQE_INLINE_FLAGS | HNS_ROCE_UCTX_DYN_QP_PGSZ; + + if (!attr || !(attr->flags & HNSDV_CONTEXT_FLAGS_DCA)) + return; +-- +2.33.0 + diff --git a/0029-libhns-Fix-ext_sge-page-size.patch b/0029-libhns-Fix-ext_sge-page-size.patch new file mode 100644 index 0000000000000000000000000000000000000000..3ae1da38adfbf09f533f1e155b56759c8eb1424e --- /dev/null +++ b/0029-libhns-Fix-ext_sge-page-size.patch @@ -0,0 +1,52 @@ +From 85b6e60f3ec97d2479373111b2a4bceb6ef286cd Mon Sep 17 00:00:00 2001 +From: Chengchang Tang +Date: Fri, 19 Apr 2024 12:06:12 +0800 +Subject: [PATCH] libhns: Fix ext_sge page size + +driver inclusion +category: bugfix +bugzilla: https://gitee.com/openeuler/kernel/issues/I98HIN + +-------------------------------------------------------------------------- + +The buffer for extending SGE must be equal to the WQE buffer page +size. + +Fixes: bf57963e729c ("libhns: Support flexible WQE buffer page size") +Signed-off-by: Chengchang Tang +--- + providers/hns/hns_roce_u_verbs.c | 10 +++++++--- + 1 file changed, 7 insertions(+), 3 deletions(-) + +diff --git a/providers/hns/hns_roce_u_verbs.c b/providers/hns/hns_roce_u_verbs.c +index 8964d53..a680339 100644 +--- a/providers/hns/hns_roce_u_verbs.c ++++ b/providers/hns/hns_roce_u_verbs.c +@@ -1270,8 +1270,10 @@ static void get_best_multi_region_pg_shift(struct hns_roce_device *hr_dev, + /* + * The larger the pagesize used, the better the performance, but it + * may waste more memory. Therefore, we use the least common multiple +- * (aligned to power of 2) of sq wqe buffer size, rq wqe buffer size, + * and ext_sge buffer size as the pagesize. Additionally, since the ++ * (aligned to power of 2) of sq wqe buffer size and rq wqe buffer ++ * size as the pagesize. And the wqe buffer page cannot be larger ++ * than the buffer size used by extend sge. Additionally, since the + * kernel cannot guarantee the allocation of contiguous memory larger + * than the system page, the pagesize must be smaller than the system + * page. +@@ -1281,8 +1283,10 @@ static void get_best_multi_region_pg_shift(struct hns_roce_device *hr_dev, + rq_size = qp->rq.wqe_cnt << qp->rq.wqe_shift; + + pg_shift = max_t(uint8_t, sq_size ? hr_ilog32(sq_size) : 0, +- ext_sge_size ? hr_ilog32(ext_sge_size) : 0); +- pg_shift = max_t(uint8_t, pg_shift, rq_size ? hr_ilog32(rq_size) : 0); ++ rq_size ? hr_ilog32(rq_size) : 0); ++ pg_shift = ext_sge_size ? ++ min_t(uint8_t, pg_shift, hr_ilog32(ext_sge_size)) : ++ pg_shift; + pg_shift = max_t(uint8_t, pg_shift, HNS_HW_PAGE_SHIFT); + qp->pageshift = min_t(uint8_t, pg_shift, hr_ilog32(hr_dev->page_size)); + } +-- +2.33.0 + diff --git a/0030-libhns-Remove-unused-return-value.patch b/0030-libhns-Remove-unused-return-value.patch new file mode 100644 index 0000000000000000000000000000000000000000..0263254d16767c85ea204b5128ab308aeddb141e --- /dev/null +++ b/0030-libhns-Remove-unused-return-value.patch @@ -0,0 +1,51 @@ +From 953cb41219fbbb5ccb4cadd2524adc4d6927d996 Mon Sep 17 00:00:00 2001 +From: Junxian Huang +Date: Thu, 18 Apr 2024 13:49:29 +0800 +Subject: [PATCH 30/33] libhns: Remove unused return value + +mainline inclusion +from mainline-master +commit 9e1847c96356c452b3ed2330ddf4c484508c6f10 +category: bugfix +bugzilla: https://gitee.com/openeuler/kernel/issues/I9NZME +CVE: NA + +Reference: https://github.com/linux-rdma/rdma-core/pull/1450/commits/9e1847c96356c452b3ed2330ddf4c484508c6f10 + +---------------------------------------------------------------------- + +parse_cqe_for_resp() will never fail and always returns 0. Remove the +unused return value. + +Fixes: 061f7e1757ca ("libhns: Refactor the poll one interface") +Signed-off-by: Junxian Huang +Signed-off-by: Juan Zhou +--- + providers/hns/hns_roce_u_hw_v2.c | 4 +--- + 1 file changed, 1 insertion(+), 3 deletions(-) + +diff --git a/providers/hns/hns_roce_u_hw_v2.c b/providers/hns/hns_roce_u_hw_v2.c +index b2d452b..a0b8655 100644 +--- a/providers/hns/hns_roce_u_hw_v2.c ++++ b/providers/hns/hns_roce_u_hw_v2.c +@@ -531,7 +531,7 @@ static void parse_cqe_for_srq(struct hns_roce_v2_cqe *cqe, struct ibv_wc *wc, + handle_recv_cqe_inl_from_srq(cqe, srq); + } + +-static int parse_cqe_for_resp(struct hns_roce_v2_cqe *cqe, struct ibv_wc *wc, ++static void parse_cqe_for_resp(struct hns_roce_v2_cqe *cqe, struct ibv_wc *wc, + struct hns_roce_qp *hr_qp) + { + struct hns_roce_wq *wq; +@@ -547,8 +547,6 @@ static int parse_cqe_for_resp(struct hns_roce_v2_cqe *cqe, struct ibv_wc *wc, + handle_recv_cqe_inl_from_rq(cqe, hr_qp); + else if (hr_reg_read(cqe, CQE_RQ_INLINE)) + handle_recv_rq_inl(cqe, hr_qp); +- +- return 0; + } + + static void parse_cqe_for_req(struct hns_roce_v2_cqe *cqe, struct ibv_wc *wc, +-- +2.33.0 + diff --git a/0031-libhns-Fix-several-context-locks-issue.patch b/0031-libhns-Fix-several-context-locks-issue.patch new file mode 100644 index 0000000000000000000000000000000000000000..539b04b0e8de5438d194c1f027289a9e41e407ca --- /dev/null +++ b/0031-libhns-Fix-several-context-locks-issue.patch @@ -0,0 +1,148 @@ +From 4030d141751c6fb73270fdb8e8c46854df307865 Mon Sep 17 00:00:00 2001 +From: Junxian Huang +Date: Thu, 18 Apr 2024 13:49:30 +0800 +Subject: [PATCH 31/33] libhns: Fix several context locks issue + +mainline inclusion +from mainline-master +commit 6772962084dd1ee0ec277d79c63673f8736aa94f +category: bugfix +bugzilla: https://gitee.com/openeuler/kernel/issues/I9NZME +CVE: NA + +Reference: https://github.com/linux-rdma/rdma-core/pull/1450/commits/6772962084dd1ee0ec277d79c63673f8736aa94f + +---------------------------------------------------------------------- + +Fix several context lock issue: + +1. db_list_mutex is used without init currently. Add its init to + hns_roce_alloc_context(). + +2. pthread_mutex_init()/pthread_spin_init() may return error value. + Check the return value in hns_roce_alloc_context(). + +3. Add destruction for these context locks. + +4. Encapsulate init and destruction functions for these context locks. + +Fixes: 13eae8889690 ("libhns: Support rq record doorbell") +Fixes: 887b78c80224 ("libhns: Add initial main frame") +Signed-off-by: Junxian Huang +Signed-off-by: Juan Zhou +--- + providers/hns/hns_roce_u.c | 61 ++++++++++++++++++++++++++++++++------ + 1 file changed, 52 insertions(+), 9 deletions(-) + +diff --git a/providers/hns/hns_roce_u.c b/providers/hns/hns_roce_u.c +index c4a3ba5..e219b9e 100644 +--- a/providers/hns/hns_roce_u.c ++++ b/providers/hns/hns_roce_u.c +@@ -355,6 +355,47 @@ static void ucontext_set_cmd(struct hns_roce_alloc_ucontext *cmd, + } + } + ++static int hns_roce_init_context_lock(struct hns_roce_context *context) ++{ ++ int ret; ++ ++ ret = pthread_spin_init(&context->uar_lock, PTHREAD_PROCESS_PRIVATE); ++ if (ret) ++ return ret; ++ ++ ret = pthread_mutex_init(&context->qp_table_mutex, NULL); ++ if (ret) ++ goto destroy_uar_lock; ++ ++ ret = pthread_mutex_init(&context->srq_table_mutex, NULL); ++ if (ret) ++ goto destroy_qp_mutex; ++ ++ ret = pthread_mutex_init(&context->db_list_mutex, NULL); ++ if (ret) ++ goto destroy_srq_mutex; ++ ++ return 0; ++ ++destroy_srq_mutex: ++ pthread_mutex_destroy(&context->srq_table_mutex); ++ ++destroy_qp_mutex: ++ pthread_mutex_destroy(&context->qp_table_mutex); ++ ++destroy_uar_lock: ++ pthread_spin_destroy(&context->uar_lock); ++ return ret; ++} ++ ++static void hns_roce_destroy_context_lock(struct hns_roce_context *context) ++{ ++ pthread_spin_destroy(&context->uar_lock); ++ pthread_mutex_destroy(&context->qp_table_mutex); ++ pthread_mutex_destroy(&context->srq_table_mutex); ++ pthread_mutex_destroy(&context->db_list_mutex); ++} ++ + static struct verbs_context *hns_roce_alloc_context(struct ibv_device *ibdev, + int cmd_fd, + void *private_data) +@@ -373,19 +414,22 @@ static struct verbs_context *hns_roce_alloc_context(struct ibv_device *ibdev, + ucontext_set_cmd(&cmd, ctx_attr); + if (ibv_cmd_get_context(&context->ibv_ctx, &cmd.ibv_cmd, sizeof(cmd), + &resp.ibv_resp, sizeof(resp))) +- goto err_free; ++ goto err_ibv_cmd; ++ ++ if (hns_roce_init_context_lock(context)) ++ goto err_ibv_cmd; + + if (set_context_attr(hr_dev, context, &resp)) +- goto err_free; ++ goto err_set_attr; + + context->uar = mmap(NULL, hr_dev->page_size, PROT_READ | PROT_WRITE, + MAP_SHARED, cmd_fd, 0); + if (context->uar == MAP_FAILED) +- goto err_free; ++ goto err_set_attr; + + if (init_dca_context(context, cmd_fd, + &resp, ctx_attr, hr_dev->page_size)) +- goto err_free; ++ goto err_set_attr; + + if (init_reset_context(context, cmd_fd, &resp, hr_dev->page_size)) + goto reset_free; +@@ -393,10 +437,6 @@ static struct verbs_context *hns_roce_alloc_context(struct ibv_device *ibdev, + if (hns_roce_mmap(hr_dev, context, cmd_fd)) + goto uar_free; + +- pthread_mutex_init(&context->qp_table_mutex, NULL); +- pthread_mutex_init(&context->srq_table_mutex, NULL); +- pthread_spin_init(&context->uar_lock, PTHREAD_PROCESS_PRIVATE); +- + verbs_set_ops(&context->ibv_ctx, &hns_common_ops); + verbs_set_ops(&context->ibv_ctx, &hr_dev->u_hw->hw_ops); + +@@ -407,7 +447,9 @@ uar_free: + munmap(context->reset_state, hr_dev->page_size); + reset_free: + uninit_dca_context(context); +-err_free: ++err_set_attr: ++ hns_roce_destroy_context_lock(context); ++err_ibv_cmd: + verbs_uninit_context(&context->ibv_ctx); + free(context); + return NULL; +@@ -422,6 +464,7 @@ static void hns_roce_free_context(struct ibv_context *ibctx) + if (context->reset_state) + munmap(context->reset_state, hr_dev->page_size); + uninit_dca_context(context); ++ hns_roce_destroy_context_lock(context); + verbs_uninit_context(&context->ibv_ctx); + free(context); + } +-- +2.33.0 + diff --git a/0032-libhns-Clean-up-signed-unsigned-mix-with-relational-.patch b/0032-libhns-Clean-up-signed-unsigned-mix-with-relational-.patch new file mode 100644 index 0000000000000000000000000000000000000000..b0a6e14fc2974e7c5f869728377b045d15c1fe58 --- /dev/null +++ b/0032-libhns-Clean-up-signed-unsigned-mix-with-relational-.patch @@ -0,0 +1,80 @@ +From 146a980b0669d6db58ac4b5e83efa951ea48ddae Mon Sep 17 00:00:00 2001 +From: Junxian Huang +Date: Thu, 18 Apr 2024 13:49:34 +0800 +Subject: [PATCH 32/33] libhns: Clean up signed-unsigned mix with relational + issue + +mainline inclusion +from mainline-master +commit 79475124d39a92819030a854b7ea94fb73d9bc39 +category: bugfix +bugzilla: https://gitee.com/openeuler/kernel/issues/I9NZME +CVE: NA + +Reference: https://github.com/linux-rdma/rdma-core/pull/1450/commits/79475124d39a92819030a854b7ea94fb73d9bc39 + +---------------------------------------------------------------------- + +Clean up signed-unsigned mix with relational issue. + +Fixes: 36446a56eea5 ("libhns: Extended QP supports the new post send mechanism") +Signed-off-by: Junxian Huang +Signed-off-by: Juan Zhou +--- + providers/hns/hns_roce_u_hw_v2.c | 10 +++++----- + 1 file changed, 5 insertions(+), 5 deletions(-) + +diff --git a/providers/hns/hns_roce_u_hw_v2.c b/providers/hns/hns_roce_u_hw_v2.c +index a0b8655..9371150 100644 +--- a/providers/hns/hns_roce_u_hw_v2.c ++++ b/providers/hns/hns_roce_u_hw_v2.c +@@ -2632,8 +2632,8 @@ static void set_inline_data_list_rc(struct hns_roce_qp *qp, + { + unsigned int msg_len = qp->sge_info.total_len; + void *dseg; ++ size_t i; + int ret; +- int i; + + hr_reg_enable(wqe, RCWQE_INLINE); + +@@ -2692,7 +2692,7 @@ static void wr_set_inline_data_list_rc(struct ibv_qp_ex *ibv_qp, size_t num_buf, + { + struct hns_roce_qp *qp = to_hr_qp(&ibv_qp->qp_base); + struct hns_roce_rc_sq_wqe *wqe = qp->cur_wqe; +- int i; ++ size_t i; + + if (!wqe) + return; +@@ -2822,7 +2822,7 @@ static void wr_set_sge_list_ud(struct ibv_qp_ex *ibv_qp, size_t num_sge, + } + + hr_reg_write(wqe, UDWQE_MSG_START_SGE_IDX, sge_idx & mask); +- for (int i = 0; i < num_sge; i++) { ++ for (size_t i = 0; i < num_sge; i++) { + if (!sg_list[i].length) + continue; + +@@ -2848,8 +2848,8 @@ static void set_inline_data_list_ud(struct hns_roce_qp *qp, + uint8_t data[HNS_ROCE_MAX_UD_INL_INN_SZ] = {}; + unsigned int msg_len = qp->sge_info.total_len; + void *tmp; ++ size_t i; + int ret; +- int i; + + if (!check_inl_data_len(qp, msg_len)) { + qp->err = EINVAL; +@@ -2910,7 +2910,7 @@ static void wr_set_inline_data_list_ud(struct ibv_qp_ex *ibv_qp, size_t num_buf, + { + struct hns_roce_qp *qp = to_hr_qp(&ibv_qp->qp_base); + struct hns_roce_ud_sq_wqe *wqe = qp->cur_wqe; +- int i; ++ size_t i; + + if (!wqe) + return; +-- +2.33.0 + diff --git a/0033-libhns-Fix-missing-flag-when-creating-qp-by-hnsdv_cr.patch b/0033-libhns-Fix-missing-flag-when-creating-qp-by-hnsdv_cr.patch new file mode 100644 index 0000000000000000000000000000000000000000..f52033fb7de51de513f4d77b23498f347bb92ff6 --- /dev/null +++ b/0033-libhns-Fix-missing-flag-when-creating-qp-by-hnsdv_cr.patch @@ -0,0 +1,38 @@ +From bd53382cfbc0f1b1b5636dd9b425d546d98079b1 Mon Sep 17 00:00:00 2001 +From: Juan Zhou +Date: Sat, 11 May 2024 14:23:19 +0800 +Subject: [PATCH 33/33] libhns: Fix missing flag when creating qp by + hnsdv_create_qp() + +driver inclusion +category: bugfix +bugzilla: https://gitee.com/src-openeuler/rdma-core/issues/I9NZME + +-------------------------------------------------------------------------- + +This flag will be used when the DCA mode is enabled. Without this flag, +the QP fails to be created in DCA mode. + +Fixes: c7bf0dbf472d ("libhns: Introduce DCA for RC QP") +Signed-off-by: Juan Zhou +--- + providers/hns/hns_roce_u_verbs.c | 3 ++- + 1 file changed, 2 insertions(+), 1 deletion(-) + +diff --git a/providers/hns/hns_roce_u_verbs.c b/providers/hns/hns_roce_u_verbs.c +index a680339..e30880c 100644 +--- a/providers/hns/hns_roce_u_verbs.c ++++ b/providers/hns/hns_roce_u_verbs.c +@@ -1049,7 +1049,8 @@ int hns_roce_u_destroy_srq(struct ibv_srq *ibv_srq) + } + + enum { +- HNSDV_QP_SUP_COMP_MASK = HNSDV_QP_INIT_ATTR_MASK_QP_CONGEST_TYPE, ++ HNSDV_QP_SUP_COMP_MASK = HNSDV_QP_INIT_ATTR_MASK_QP_CREATE_FLAGS | ++ HNSDV_QP_INIT_ATTR_MASK_QP_CONGEST_TYPE, + }; + + static int check_hnsdv_qp_attr(struct hns_roce_context *ctx, +-- +2.33.0 + diff --git a/0034-librdmacm-Fix-an-overflow-bug-in-qsort-comparison-function.patch b/0034-librdmacm-Fix-an-overflow-bug-in-qsort-comparison-function.patch new file mode 100644 index 0000000000000000000000000000000000000000..787c73f1f33ee9af25f700ed6312b0db43bdfb88 --- /dev/null +++ b/0034-librdmacm-Fix-an-overflow-bug-in-qsort-comparison-function.patch @@ -0,0 +1,36 @@ +From c4a5ac8bba611206e062c0955fb605bfc0f48b0f Mon Sep 17 00:00:00 2001 +From: Mark Zhang +Date: Fri, 26 Apr 2024 14:17:55 +0300 +Subject: [PATCH] librdmacm: Fix an overflow bug in qsort comparison function + +Reference: https://github.com/linux-rdma/rdma-core/commit/c4a5ac8bba611206e062c0955fb605bfc0f48b0f + +The comparison function dev_cmp() doesn't work with 64b pointers in some +cases, as it casts the pointer to int. For example it's not able to sort +this list: + {0xfffe0c2f0b00, 0xaaac741b4a90, 0xaaac741b4d70} + +Fixes: e5d371cb0af0 ("librdmacm: Globally store and sort IB device list") +Signed-off-by: Mark Zhang +Reviewed-by: Leon Romanovsky +Signed-off-by: Yishai Hadas +--- + librdmacm/cma.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/librdmacm/cma.c b/librdmacm/cma.c +index 7b924bd..0a631bd 100644 +--- a/librdmacm/cma.c ++++ b/librdmacm/cma.c +@@ -311,7 +311,7 @@ static void remove_cma_dev(struct cma_device *cma_dev) + + static int dev_cmp(const void *a, const void *b) + { +- return (int)(*(char *const *)a - *(char *const *)b); ++ return (*(uintptr_t *)a > *(uintptr_t *)b) - (*(uintptr_t *)a < *(uintptr_t *)b); + } + + static int sync_devices_list(void) +-- +2.27.0 + diff --git a/0035-Fix-the-stride-calculation-for-MSN-PSN-area.patch b/0035-Fix-the-stride-calculation-for-MSN-PSN-area.patch new file mode 100644 index 0000000000000000000000000000000000000000..bf0ee9f1455a315b625d5c8d1a332cd40e82f990 --- /dev/null +++ b/0035-Fix-the-stride-calculation-for-MSN-PSN-area.patch @@ -0,0 +1,31 @@ +From 05b65d96296f07baca079acef1a5bbb3c71fffb6 Mon Sep 17 00:00:00 2001 +From: Selvin Xavier +Date: Wed, 17 Jul 2024 10:34:23 +0800 +Subject: [PATCH] Fix the stride calculation for MSN/PSN area [ Upstream commit + 65197a4 ] Library expects ilog2 of psn_size while calculating the stride. + ilog32 returns log2(v) + 1 and the calculation fails since the psn size is a + power of 2 value. Fix by passing psn_size - 1. + +Fixes: 0a0e0d0 ("bnxt_re/lib: Adds MSN table capability for Gen P7 adapters") +Signed-off-by: Selvin Xavier +Signed-off-by: Nicolas Morey +--- + providers/bnxt_re/verbs.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/providers/bnxt_re/verbs.c b/providers/bnxt_re/verbs.c +index 55d5284..a74d32c 100644 +--- a/providers/bnxt_re/verbs.c ++++ b/providers/bnxt_re/verbs.c +@@ -1233,7 +1233,7 @@ static int bnxt_re_alloc_queues(struct bnxt_re_context *cntx, + /* psn_depth extra entries of size que->stride */ + psn_size = bnxt_re_get_psne_size(qp->cntx); + psn_depth = (nswr * psn_size) / que->stride; +- que->pad_stride_log2 = (uint32_t)ilog32(psn_size); ++ que->pad_stride_log2 = ilog32(psn_size - 1); + if ((nswr * psn_size) % que->stride) + psn_depth++; + que->depth += psn_depth; +-- +2.27.0 + diff --git a/0036-add-ZTE-Dinghai-rdma-driver.patch b/0036-add-ZTE-Dinghai-rdma-driver.patch new file mode 100644 index 0000000000000000000000000000000000000000..6b63965373c48d920c79732153abd128df3a5d24 --- /dev/null +++ b/0036-add-ZTE-Dinghai-rdma-driver.patch @@ -0,0 +1,8250 @@ +From e4eff3b4ead0430772c249d3a1a3fd734c8d3832 Mon Sep 17 00:00:00 2001 +From: =?UTF-8?q?=E6=9D=8E=E5=AF=8C=E8=89=B3?= +Date: Wed, 4 Sep 2024 15:49:06 +0800 +Subject: [PATCH] add ZTE Dinghai RDMA driver +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +Signed-off-by: 李富艳 +--- + CMakeLists.txt | 1 + + MAINTAINERS | 5 + + README.md | 1 + + debian/control | 1 + + debian/copyright | 4 + + debian/ibverbs-providers.install | 1 + + debian/libibverbs-dev.install | 5 + + kernel-headers/CMakeLists.txt | 4 + + kernel-headers/rdma/ib_user_ioctl_verbs.h | 1 + + kernel-headers/rdma/zxdh-abi.h | 143 + + kernel-headers/rdma/zxdh_user_ioctl_cmds.h | 56 + + kernel-headers/rdma/zxdh_user_ioctl_verbs.h | 34 + + libibverbs/verbs.h | 1 + + providers/zrdma/CMakeLists.txt | 17 + + providers/zrdma/libzrdma.map | 16 + + providers/zrdma/main.c | 200 ++ + providers/zrdma/main.h | 223 ++ + providers/zrdma/private_verbs_cmd.c | 201 ++ + providers/zrdma/private_verbs_cmd.h | 24 + + providers/zrdma/zxdh_abi.h | 36 + + providers/zrdma/zxdh_defs.h | 399 +++ + providers/zrdma/zxdh_devids.h | 17 + + providers/zrdma/zxdh_dv.h | 75 + + providers/zrdma/zxdh_hw.c | 2596 +++++++++++++++ + providers/zrdma/zxdh_status.h | 75 + + providers/zrdma/zxdh_verbs.c | 3185 +++++++++++++++++++ + providers/zrdma/zxdh_verbs.h | 611 ++++ + redhat/rdma-core.spec | 4 + + 28 files changed, 7936 insertions(+) + create mode 100644 kernel-headers/rdma/zxdh-abi.h + create mode 100644 kernel-headers/rdma/zxdh_user_ioctl_cmds.h + create mode 100644 kernel-headers/rdma/zxdh_user_ioctl_verbs.h + create mode 100644 providers/zrdma/CMakeLists.txt + create mode 100644 providers/zrdma/libzrdma.map + create mode 100644 providers/zrdma/main.c + create mode 100644 providers/zrdma/main.h + create mode 100644 providers/zrdma/private_verbs_cmd.c + create mode 100644 providers/zrdma/private_verbs_cmd.h + create mode 100644 providers/zrdma/zxdh_abi.h + create mode 100644 providers/zrdma/zxdh_defs.h + create mode 100644 providers/zrdma/zxdh_devids.h + create mode 100644 providers/zrdma/zxdh_dv.h + create mode 100644 providers/zrdma/zxdh_hw.c + create mode 100644 providers/zrdma/zxdh_status.h + create mode 100644 providers/zrdma/zxdh_verbs.c + create mode 100644 providers/zrdma/zxdh_verbs.h + +diff --git a/CMakeLists.txt b/CMakeLists.txt +index 98985e7..432a650 100644 +--- a/CMakeLists.txt ++++ b/CMakeLists.txt +@@ -748,6 +748,7 @@ add_subdirectory(providers/mthca) + add_subdirectory(providers/ocrdma) + add_subdirectory(providers/qedr) + add_subdirectory(providers/vmw_pvrdma) ++add_subdirectory(providers/zrdma) + endif() + + add_subdirectory(providers/hfi1verbs) +diff --git a/MAINTAINERS b/MAINTAINERS +index 4b24117..aa41217 100644 +--- a/MAINTAINERS ++++ b/MAINTAINERS +@@ -185,6 +185,11 @@ L: pv-drivers@vmware.com + S: Supported + F: providers/vmw_pvrdma/ + ++ZRDMA USERSPACE PROVIDER (for zrdma.ko) ++M: Li Fuyan ++S: Supported ++F: providers/zrdma/ ++ + PYVERBS + M: Edward Srouji + S: Supported +diff --git a/README.md b/README.md +index 928bdc4..8f47d3c 100644 +--- a/README.md ++++ b/README.md +@@ -31,6 +31,7 @@ is included: + - rdma_rxe.ko + - siw.ko + - vmw_pvrdma.ko ++ - zrdma.ko + + Additional service daemons are provided for: + - srp_daemon (ib_srp.ko) +diff --git a/debian/control b/debian/control +index 2a55372..f86cc77 100644 +--- a/debian/control ++++ b/debian/control +@@ -99,6 +99,7 @@ Description: User space provider drivers for libibverbs + - rxe: A software implementation of the RoCE protocol + - siw: A software implementation of the iWarp protocol + - vmw_pvrdma: VMware paravirtual RDMA device ++ - zrdma: ZTE Connection RDMA + + Package: ibverbs-utils + Architecture: linux-any +diff --git a/debian/copyright b/debian/copyright +index 36ac71e..7e435b5 100644 +--- a/debian/copyright ++++ b/debian/copyright +@@ -228,6 +228,10 @@ Files: providers/vmw_pvrdma/* + Copyright: 2012-2016 VMware, Inc. + License: BSD-2-clause or GPL-2 + ++Files: providers/zrdma/* ++Copyright: 2024 ZTE Corporation. ++License: BSD-MIT or GPL-2 ++ + Files: rdma-ndd/* + Copyright: 2004-2016, Intel Corporation. + License: BSD-MIT or GPL-2 +diff --git a/debian/ibverbs-providers.install b/debian/ibverbs-providers.install +index fea15e0..360516f 100644 +--- a/debian/ibverbs-providers.install ++++ b/debian/ibverbs-providers.install +@@ -5,3 +5,4 @@ usr/lib/*/libhns.so.* + usr/lib/*/libmana.so.* + usr/lib/*/libmlx4.so.* + usr/lib/*/libmlx5.so.* ++usr/lib/*/libzrdma.so.* +diff --git a/debian/libibverbs-dev.install b/debian/libibverbs-dev.install +index ef5b9a4..73dd8c7 100644 +--- a/debian/libibverbs-dev.install ++++ b/debian/libibverbs-dev.install +@@ -13,6 +13,8 @@ usr/include/infiniband/sa.h + usr/include/infiniband/tm_types.h + usr/include/infiniband/verbs.h + usr/include/infiniband/verbs_api.h ++usr/include/infiniband/zxdh_dv.h ++usr/include/infiniband/zxdh_devids.h + usr/lib/*/lib*-rdmav*.a + usr/lib/*/libefa.a + usr/lib/*/libefa.so +@@ -26,12 +28,15 @@ usr/lib/*/libmlx4.a + usr/lib/*/libmlx4.so + usr/lib/*/libmlx5.a + usr/lib/*/libmlx5.so ++usr/lib/*/libzrdma.a ++usr/lib/*/libzrdma.so + usr/lib/*/pkgconfig/libefa.pc + usr/lib/*/pkgconfig/libhns.pc + usr/lib/*/pkgconfig/libibverbs.pc + usr/lib/*/pkgconfig/libmana.pc + usr/lib/*/pkgconfig/libmlx4.pc + usr/lib/*/pkgconfig/libmlx5.pc ++usr/lib/*/pkgconfig/libzrdma.pc + usr/share/man/man3/efadv_*.3 + usr/share/man/man3/ibv_* + usr/share/man/man3/mbps_to_ibv_rate.3 +diff --git a/kernel-headers/CMakeLists.txt b/kernel-headers/CMakeLists.txt +index 82c191c..9ceac31 100644 +--- a/kernel-headers/CMakeLists.txt ++++ b/kernel-headers/CMakeLists.txt +@@ -26,6 +26,9 @@ publish_internal_headers(rdma + rdma/rvt-abi.h + rdma/siw-abi.h + rdma/vmw_pvrdma-abi.h ++ rdma/zxdh-abi.h ++ rdma/zxdh_user_ioctl_cmds.h ++ rdma/zxdh_user_ioctl_verbs.h + ) + + publish_internal_headers(rdma/hfi +@@ -80,6 +83,7 @@ rdma_kernel_provider_abi( + rdma/rdma_user_rxe.h + rdma/siw-abi.h + rdma/vmw_pvrdma-abi.h ++ rdma/zxdh-abi.h + ) + + publish_headers(infiniband +diff --git a/kernel-headers/rdma/ib_user_ioctl_verbs.h b/kernel-headers/rdma/ib_user_ioctl_verbs.h +index fe15bc7..a31f330 100644 +--- a/kernel-headers/rdma/ib_user_ioctl_verbs.h ++++ b/kernel-headers/rdma/ib_user_ioctl_verbs.h +@@ -255,6 +255,7 @@ enum rdma_driver_id { + RDMA_DRIVER_SIW, + RDMA_DRIVER_ERDMA, + RDMA_DRIVER_MANA, ++ RDMA_DRIVER_ZXDH = 50, + }; + + enum ib_uverbs_gid_type { +diff --git a/kernel-headers/rdma/zxdh-abi.h b/kernel-headers/rdma/zxdh-abi.h +new file mode 100644 +index 0000000..665f874 +--- /dev/null ++++ b/kernel-headers/rdma/zxdh-abi.h +@@ -0,0 +1,143 @@ ++/* SPDX-License-Identifier: GPL-2.0 or Linux-OpenIB */ ++/* Copyright (c) 2024 ZTE Corporation. All rights reserved. */ ++ ++#ifndef ZXDH_ABI_H ++#define ZXDH_ABI_H ++ ++#include ++ ++/* zxdh must support legacy GEN_1 i40iw kernel ++ * and user-space whose last ABI ver is 5 ++ */ ++#define ZXDH_ABI_VER 5 ++ ++enum zxdh_memreg_type { ++ ZXDH_MEMREG_TYPE_MEM = 0, ++ ZXDH_MEMREG_TYPE_QP = 1, ++ ZXDH_MEMREG_TYPE_CQ = 2, ++ ZXDH_MEMREG_TYPE_SRQ = 3, ++}; ++ ++enum zxdh_db_addr_type { ++ ZXDH_DB_ADDR_PHY = 0, ++ ZXDH_DB_ADDR_BAR = 1, ++}; ++ ++struct zxdh_alloc_ucontext_req { ++ __u32 rsvd32; ++ __u8 userspace_ver; ++ __u8 rsvd8[3]; ++}; ++ ++struct zxdh_alloc_ucontext_resp { ++ __u32 max_pds; ++ __u32 max_qps; ++ __u32 wq_size; /* size of the WQs (SQ+RQ) in the mmaped area */ ++ __u8 kernel_ver; ++ __u8 db_addr_type; ++ __u8 rsvd[2]; ++ __aligned_u64 feature_flags; ++ __aligned_u64 sq_db_mmap_key; ++ __aligned_u64 cq_db_mmap_key; ++ __aligned_u64 sq_db_pa; ++ __aligned_u64 cq_db_pa; ++ __u32 max_hw_wq_frags; ++ __u32 max_hw_read_sges; ++ __u32 max_hw_inline; ++ __u32 max_hw_rq_quanta; ++ __u32 max_hw_srq_quanta; ++ __u32 max_hw_wq_quanta; ++ __u32 max_hw_srq_wr; ++ __u32 min_hw_cq_size; ++ __u32 max_hw_cq_size; ++ __u16 max_hw_sq_chunk; ++ __u8 hw_rev; ++ __u8 rsvd2; ++}; ++ ++struct zxdh_alloc_pd_resp { ++ __u32 pd_id; ++ __u8 rsvd[4]; ++}; ++ ++struct zxdh_resize_cq_req { ++ __aligned_u64 user_cq_buffer; ++}; ++ ++struct zxdh_create_cq_req { ++ __aligned_u64 user_cq_buf; ++ __aligned_u64 user_shadow_area; ++}; ++ ++struct zxdh_create_qp_req { ++ __aligned_u64 user_wqe_bufs; ++ __aligned_u64 user_compl_ctx; ++}; ++ ++struct zxdh_create_srq_req { ++ __aligned_u64 user_wqe_bufs; ++ __aligned_u64 user_compl_ctx; ++ __aligned_u64 user_wqe_list; ++ __aligned_u64 user_wqe_db; ++}; ++ ++struct zxdh_mem_reg_req { ++ __u16 reg_type; /* enum zxdh_memreg_type */ ++ __u16 cq_pages; ++ __u16 rq_pages; ++ __u16 sq_pages; ++ __u16 srq_pages; ++ __u16 srq_list_pages; ++ __u8 rsvd[4]; ++}; ++ ++struct zxdh_reg_mr_resp { ++ __u32 mr_pa_low; ++ __u32 mr_pa_hig; ++ __u16 host_page_size; ++ __u16 leaf_pbl_size; ++ __u8 rsvd[4]; ++}; ++ ++struct zxdh_modify_qp_req { ++ __u8 sq_flush; ++ __u8 rq_flush; ++ __u8 rsvd[6]; ++}; ++ ++struct zxdh_create_cq_resp { ++ __u32 cq_id; ++ __u32 cq_size; ++}; ++ ++struct zxdh_create_qp_resp { ++ __u32 qp_id; ++ __u32 actual_sq_size; ++ __u32 actual_rq_size; ++ __u32 zxdh_drv_opt; ++ __u16 push_idx; ++ __u8 lsmm; ++ __u8 rsvd; ++ __u32 qp_caps; ++}; ++ ++struct zxdh_create_srq_resp { ++ __u32 srq_id; ++ __u32 actual_srq_size; ++ __u32 actual_srq_list_size; ++ __u8 rsvd[4]; ++}; ++ ++struct zxdh_modify_qp_resp { ++ __aligned_u64 push_wqe_mmap_key; ++ __aligned_u64 push_db_mmap_key; ++ __u16 push_offset; ++ __u8 push_valid; ++ __u8 rsvd[5]; ++}; ++ ++struct zxdh_create_ah_resp { ++ __u32 ah_id; ++ __u8 rsvd[4]; ++}; ++#endif /* ZXDH_ABI_H */ +diff --git a/kernel-headers/rdma/zxdh_user_ioctl_cmds.h b/kernel-headers/rdma/zxdh_user_ioctl_cmds.h +new file mode 100644 +index 0000000..96d2eb4 +--- /dev/null ++++ b/kernel-headers/rdma/zxdh_user_ioctl_cmds.h +@@ -0,0 +1,56 @@ ++/* SPDX-License-Identifier: GPL-2.0 or Linux-OpenIB */ ++/* Copyright (c) 2024 ZTE Corporation. All rights reserved. */ ++ ++#ifndef ZXDH_USER_IOCTL_CMDS_H ++#define ZXDH_USER_IOCTL_CMDS_H ++ ++#include ++#include ++ ++enum zxdh_ib_dev_get_log_trace_attrs { ++ ZXDH_IB_ATTR_DEV_GET_LOG_TARCE_SWITCH = (1U << UVERBS_ID_NS_SHIFT), ++}; ++ ++enum zxdh_ib_dev_set_log_trace_attrs { ++ ZXDH_IB_ATTR_DEV_SET_LOG_TARCE_SWITCH = (1U << UVERBS_ID_NS_SHIFT), ++}; ++ ++enum zxdh_ib_dev_methods { ++ ZXDH_IB_METHOD_DEV_GET_LOG_TRACE = (1U << UVERBS_ID_NS_SHIFT), ++ ZXDH_IB_METHOD_DEV_SET_LOG_TRACE, ++}; ++ ++enum zxdh_ib_qp_modify_udp_sport_attrs { ++ ZXDH_IB_ATTR_QP_UDP_PORT = (1U << UVERBS_ID_NS_SHIFT), ++ ZXDH_IB_ATTR_QP_QPN, ++}; ++ ++enum zxdh_ib_qp_query_qpc_attrs { ++ ZXDH_IB_ATTR_QP_QUERY_HANDLE = (1U << UVERBS_ID_NS_SHIFT), ++ ZXDH_IB_ATTR_QP_QUERY_RESP, ++}; ++ ++enum zxdh_ib_qp_modify_qpc_attrs { ++ ZXDH_IB_ATTR_QP_MODIFY_QPC_HANDLE = (1U << UVERBS_ID_NS_SHIFT), ++ ZXDH_IB_ATTR_QP_MODIFY_QPC_REQ, ++ ZXDH_IB_ATTR_QP_MODIFY_QPC_MASK, ++}; ++ ++enum zxdh_ib_qp_reset_qp_attrs { ++ ZXDH_IB_ATTR_QP_RESET_QP_HANDLE = (1U << UVERBS_ID_NS_SHIFT), ++ ZXDH_IB_ATTR_QP_RESET_OP_CODE, ++}; ++ ++enum zxdh_ib_qp_methods { ++ ZXDH_IB_METHOD_QP_MODIFY_UDP_SPORT = (1U << UVERBS_ID_NS_SHIFT), ++ ZXDH_IB_METHOD_QP_QUERY_QPC, ++ ZXDH_IB_METHOD_QP_MODIFY_QPC, ++ ZXDH_IB_METHOD_QP_RESET_QP, ++}; ++ ++enum zxdh_ib_objects { ++ ZXDH_IB_OBJECT_DEV = (1U << UVERBS_ID_NS_SHIFT), ++ ZXDH_IB_OBJECT_QP_OBJ, ++}; ++ ++#endif +diff --git a/kernel-headers/rdma/zxdh_user_ioctl_verbs.h b/kernel-headers/rdma/zxdh_user_ioctl_verbs.h +new file mode 100644 +index 0000000..bc0e812 +--- /dev/null ++++ b/kernel-headers/rdma/zxdh_user_ioctl_verbs.h +@@ -0,0 +1,34 @@ ++/* SPDX-License-Identifier: GPL-2.0 or Linux-OpenIB */ ++/* Copyright (c) 2024 ZTE Corporation. All rights reserved. */ ++#ifndef ZXDH_USER_IOCTL_VERBS_H ++#define ZXDH_USER_IOCTL_VERBS_H ++ ++#include ++ ++//todo ailgn ++struct zxdh_query_qpc_resp { ++ __u8 retry_flag; ++ __u8 rnr_retry_flag; ++ __u8 read_retry_flag; ++ __u8 cur_retry_count; ++ __u8 retry_cqe_sq_opcode; ++ __u8 err_flag; ++ __u8 ack_err_flag; ++ __u8 package_err_flag; ++ __u8 recv_err_flag; ++ __u8 retry_count; ++ __u32 tx_last_ack_psn; ++}; ++ ++struct zxdh_modify_qpc_req { ++ __u8 retry_flag; ++ __u8 rnr_retry_flag; ++ __u8 read_retry_flag; ++ __u8 cur_retry_count; ++ __u8 retry_cqe_sq_opcode; ++ __u8 err_flag; ++ __u8 ack_err_flag; ++ __u8 package_err_flag; ++}; ++ ++#endif +diff --git a/libibverbs/verbs.h b/libibverbs/verbs.h +index 78129fd..be0e76b 100644 +--- a/libibverbs/verbs.h ++++ b/libibverbs/verbs.h +@@ -2275,6 +2275,7 @@ extern const struct verbs_device_ops verbs_provider_qedr; + extern const struct verbs_device_ops verbs_provider_rxe; + extern const struct verbs_device_ops verbs_provider_siw; + extern const struct verbs_device_ops verbs_provider_vmw_pvrdma; ++extern const struct verbs_device_ops verbs_provider_zrdma; + extern const struct verbs_device_ops verbs_provider_all; + extern const struct verbs_device_ops verbs_provider_none; + void ibv_static_providers(void *unused, ...); +diff --git a/providers/zrdma/CMakeLists.txt b/providers/zrdma/CMakeLists.txt +new file mode 100644 +index 0000000..1af572a +--- /dev/null ++++ b/providers/zrdma/CMakeLists.txt +@@ -0,0 +1,17 @@ ++# SPDX-License-Identifier: GPL-2.0 or Linux-OpenIB */ ++# Copyright (c) 2024 ZTE Corporation. All rights reserved. */ ++set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -Werror") ++rdma_shared_provider(zrdma libzrdma.map ++ 1 1.1.${PACKAGE_VERSION} ++ zxdh_hw.c ++ main.c ++ zxdh_verbs.c ++ private_verbs_cmd.c ++) ++ ++publish_headers(infiniband ++ zxdh_dv.h ++) ++ ++ ++rdma_pkg_config("zrdma" "libibverbs" "${CMAKE_THREAD_LIBS_INIT}") +diff --git a/providers/zrdma/libzrdma.map b/providers/zrdma/libzrdma.map +new file mode 100644 +index 0000000..f95de4b +--- /dev/null ++++ b/providers/zrdma/libzrdma.map +@@ -0,0 +1,16 @@ ++/* Export symbols should be added below according to ++ Documentation/versioning.md document. */ ++ZRDMA_1.0 { ++ global: ++ zxdh_get_log_trace_switch; ++ local: *; ++}; ++ ++ZRDMA_1.1 { ++ global: ++ zxdh_set_log_trace_switch; ++ zxdh_modify_qp_udp_sport; ++ zxdh_query_qpc; ++ zxdh_modify_qpc; ++ zxdh_reset_qp; ++} ZRDMA_1.0; +diff --git a/providers/zrdma/main.c b/providers/zrdma/main.c +new file mode 100644 +index 0000000..e25a1a2 +--- /dev/null ++++ b/providers/zrdma/main.c +@@ -0,0 +1,200 @@ ++// SPDX-License-Identifier: GPL-2.0 or Linux-OpenIB ++/* Copyright (c) 2024 ZTE Corporation. All rights reserved. */ ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include "zxdh_devids.h" ++#include "main.h" ++#include "zxdh_abi.h" ++#include "private_verbs_cmd.h" ++ ++#define ZXDH_HCA(v, d) VERBS_PCI_MATCH(v, d, NULL) ++static const struct verbs_match_ent hca_table[] = { ++ VERBS_DRIVER_ID(RDMA_DRIVER_ZXDH), ++ ZXDH_HCA(PCI_VENDOR_ID_ZXDH_EVB, ZXDH_DEV_ID_ADAPTIVE_EVB_PF), ++ ZXDH_HCA(PCI_VENDOR_ID_ZXDH_EVB, ZXDH_DEV_ID_ADAPTIVE_EVB_VF), ++ ZXDH_HCA(PCI_VENDOR_ID_ZXDH_E312, ZXDH_DEV_ID_ADAPTIVE_E312_PF), ++ ZXDH_HCA(PCI_VENDOR_ID_ZXDH_E312, ZXDH_DEV_ID_ADAPTIVE_E312_VF), ++ ZXDH_HCA(PCI_VENDOR_ID_ZXDH_X512, ZXDH_DEV_ID_ADAPTIVE_X512_PF), ++ ZXDH_HCA(PCI_VENDOR_ID_ZXDH_X512, ZXDH_DEV_ID_ADAPTIVE_X512_VF), ++ {} ++}; ++ ++/** ++ * zxdh_ufree_context - free context that was allocated ++ * @ibctx: context allocated ptr ++ */ ++static void zxdh_ufree_context(struct ibv_context *ibctx) ++{ ++ struct zxdh_uvcontext *iwvctx; ++ ++ iwvctx = container_of(ibctx, struct zxdh_uvcontext, ibv_ctx.context); ++ ++ zxdh_ufree_pd(&iwvctx->iwupd->ibv_pd); ++ zxdh_munmap(iwvctx->sq_db); ++ zxdh_munmap(iwvctx->cq_db); ++ verbs_uninit_context(&iwvctx->ibv_ctx); ++ free(iwvctx); ++} ++ ++static const struct verbs_context_ops zxdh_uctx_ops = { ++ .alloc_mw = zxdh_ualloc_mw, ++ .alloc_pd = zxdh_ualloc_pd, ++ .attach_mcast = zxdh_uattach_mcast, ++ .bind_mw = zxdh_ubind_mw, ++ .cq_event = zxdh_cq_event, ++ .create_ah = zxdh_ucreate_ah, ++ .create_cq = zxdh_ucreate_cq, ++ .create_cq_ex = zxdh_ucreate_cq_ex, ++ .create_qp = zxdh_ucreate_qp, ++ .create_qp_ex = zxdh_ucreate_qp_ex, ++ .create_srq = zxdh_ucreate_srq, ++ .dealloc_mw = zxdh_udealloc_mw, ++ .dealloc_pd = zxdh_ufree_pd, ++ .dereg_mr = zxdh_udereg_mr, ++ .destroy_ah = zxdh_udestroy_ah, ++ .destroy_cq = zxdh_udestroy_cq, ++ .modify_cq = zxdh_umodify_cq, ++ .destroy_qp = zxdh_udestroy_qp, ++ .destroy_srq = zxdh_udestroy_srq, ++ .detach_mcast = zxdh_udetach_mcast, ++ .modify_qp = zxdh_umodify_qp, ++ .modify_srq = zxdh_umodify_srq, ++ .poll_cq = zxdh_upoll_cq, ++ .post_recv = zxdh_upost_recv, ++ .post_send = zxdh_upost_send, ++ .post_srq_recv = zxdh_upost_srq_recv, ++ .query_device_ex = zxdh_uquery_device_ex, ++ .query_port = zxdh_uquery_port, ++ .query_qp = zxdh_uquery_qp, ++ .query_srq = zxdh_uquery_srq, ++ .reg_mr = zxdh_ureg_mr, ++ .rereg_mr = zxdh_urereg_mr, ++ .req_notify_cq = zxdh_uarm_cq, ++ .resize_cq = zxdh_uresize_cq, ++ .free_context = zxdh_ufree_context, ++ .get_srq_num = zxdh_uget_srq_num, ++}; ++ ++/** ++ * zxdh_ualloc_context - allocate context for user app ++ * @ibdev: ib device created during zxdh_driver_init ++ * @cmd_fd: save fd for the device ++ * @private_data: device private data ++ * ++ * Returns callback routine table and calls driver for allocating ++ * context and getting back resource information to return as ibv_context. ++ */ ++static struct verbs_context *zxdh_ualloc_context(struct ibv_device *ibdev, ++ int cmd_fd, void *private_data) ++{ ++ struct ibv_pd *ibv_pd; ++ struct zxdh_uvcontext *iwvctx; ++ struct zxdh_get_context cmd; ++ struct zxdh_get_context_resp resp = {}; ++ __u64 sq_db_mmap_key, cq_db_mmap_key; ++ __u8 user_ver = ZXDH_ABI_VER; ++ ++ iwvctx = verbs_init_and_alloc_context(ibdev, cmd_fd, iwvctx, ibv_ctx, ++ RDMA_DRIVER_ZXDH); ++ if (!iwvctx) ++ return NULL; ++ ++ zxdh_set_debug_mask(); ++ iwvctx->zxdh_write_imm_split_switch = zxdh_get_write_imm_split_switch(); ++ cmd.userspace_ver = user_ver; ++ if (ibv_cmd_get_context(&iwvctx->ibv_ctx, ++ (struct ibv_get_context *)&cmd, sizeof(cmd), ++ &resp.ibv_resp, sizeof(resp))) { ++ cmd.userspace_ver = 4; ++ if (ibv_cmd_get_context( ++ &iwvctx->ibv_ctx, (struct ibv_get_context *)&cmd, ++ sizeof(cmd), &resp.ibv_resp, sizeof(resp))) ++ goto err_free; ++ user_ver = cmd.userspace_ver; ++ } ++ ++ verbs_set_ops(&iwvctx->ibv_ctx, &zxdh_uctx_ops); ++ ++ iwvctx->dev_attrs.feature_flags = resp.feature_flags; ++ iwvctx->dev_attrs.hw_rev = resp.hw_rev; ++ iwvctx->dev_attrs.max_hw_wq_frags = resp.max_hw_wq_frags; ++ iwvctx->dev_attrs.max_hw_read_sges = resp.max_hw_read_sges; ++ iwvctx->dev_attrs.max_hw_inline = resp.max_hw_inline; ++ iwvctx->dev_attrs.max_hw_rq_quanta = resp.max_hw_rq_quanta; ++ iwvctx->dev_attrs.max_hw_srq_quanta = resp.max_hw_srq_quanta; ++ iwvctx->dev_attrs.max_hw_wq_quanta = resp.max_hw_wq_quanta; ++ iwvctx->dev_attrs.max_hw_srq_wr = resp.max_hw_srq_wr; ++ iwvctx->dev_attrs.max_hw_sq_chunk = resp.max_hw_sq_chunk; ++ iwvctx->dev_attrs.max_hw_cq_size = resp.max_hw_cq_size; ++ iwvctx->dev_attrs.min_hw_cq_size = resp.min_hw_cq_size; ++ iwvctx->abi_ver = user_ver; ++ ++ sq_db_mmap_key = resp.sq_db_mmap_key; ++ cq_db_mmap_key = resp.cq_db_mmap_key; ++ ++ iwvctx->sq_db = zxdh_mmap(cmd_fd, sq_db_mmap_key); ++ if (iwvctx->sq_db == MAP_FAILED) ++ goto err_free; ++ ++ iwvctx->cq_db = zxdh_mmap(cmd_fd, cq_db_mmap_key); ++ if (iwvctx->cq_db == MAP_FAILED) { ++ zxdh_munmap(iwvctx->sq_db); ++ goto err_free; ++ } ++ ibv_pd = zxdh_ualloc_pd(&iwvctx->ibv_ctx.context); ++ if (!ibv_pd) { ++ zxdh_munmap(iwvctx->sq_db); ++ zxdh_munmap(iwvctx->cq_db); ++ goto err_free; ++ } ++ ++ ibv_pd->context = &iwvctx->ibv_ctx.context; ++ iwvctx->iwupd = container_of(ibv_pd, struct zxdh_upd, ibv_pd); ++ add_private_ops(iwvctx); ++ return &iwvctx->ibv_ctx; ++ ++err_free: ++ free(iwvctx); ++ ++ return NULL; ++} ++ ++static void zxdh_uninit_device(struct verbs_device *verbs_device) ++{ ++ struct zxdh_udevice *dev; ++ ++ dev = container_of(&verbs_device->device, struct zxdh_udevice, ++ ibv_dev.device); ++ free(dev); ++} ++ ++static struct verbs_device *zxdh_device_alloc(struct verbs_sysfs_dev *sysfs_dev) ++{ ++ struct zxdh_udevice *dev; ++ ++ dev = calloc(1, sizeof(*dev)); ++ if (!dev) ++ return NULL; ++ ++ return &dev->ibv_dev; ++} ++ ++static const struct verbs_device_ops zxdh_udev_ops = { ++ .alloc_context = zxdh_ualloc_context, ++ .alloc_device = zxdh_device_alloc, ++ .match_max_abi_version = ZXDH_MAX_ABI_VERSION, ++ .match_min_abi_version = ZXDH_MIN_ABI_VERSION, ++ .match_table = hca_table, ++ .name = "zxdh", ++ .uninit_device = zxdh_uninit_device, ++}; ++ ++PROVIDER_DRIVER(zxdh, zxdh_udev_ops); +diff --git a/providers/zrdma/main.h b/providers/zrdma/main.h +new file mode 100644 +index 0000000..e28c77b +--- /dev/null ++++ b/providers/zrdma/main.h +@@ -0,0 +1,223 @@ ++/* SPDX-License-Identifier: GPL-2.0 or Linux-OpenIB */ ++/* Copyright (c) 2024 ZTE Corporation. All rights reserved. */ ++#ifndef ZXDH_UMAIN_H ++#define ZXDH_UMAIN_H ++ ++#include ++#include ++#include ++#include ++#include ++ ++#include "zxdh_defs.h" ++#include "zxdh_status.h" ++#include "zxdh_verbs.h" ++ ++#define ZXDH_BASE_PUSH_PAGE 1 ++#define ZXDH_U_MINCQ_SIZE 4 ++#define ZXDH_DB_SHADOW_AREA_SIZE 8 ++#define ZXDH_DB_SQ_OFFSET 0x404 ++#define ZXDH_DB_CQ_OFFSET 0x588 ++ ++#define MIN_UDP_SPORT 1024 ++#define MIN_QP_QPN 1 ++ ++enum zxdh_supported_wc_flags { ++ ZXDH_CQ_SUPPORTED_WC_FLAGS = ++ IBV_WC_EX_WITH_BYTE_LEN | IBV_WC_EX_WITH_IMM | ++ IBV_WC_EX_WITH_QP_NUM | IBV_WC_EX_WITH_SRC_QP | ++ IBV_WC_EX_WITH_SLID | IBV_WC_EX_WITH_SL | ++ IBV_WC_EX_WITH_DLID_PATH_BITS | ++ IBV_WC_EX_WITH_COMPLETION_TIMESTAMP_WALLCLOCK | ++ IBV_WC_EX_WITH_COMPLETION_TIMESTAMP, ++}; ++ ++enum { ++ ZXDH_DBG_QP = 1 << 0, ++ ZXDH_DBG_CQ = 1 << 1, ++ ZXDH_DBG_SRQ = 1 << 2, ++}; ++extern uint32_t zxdh_debug_mask; ++#define zxdh_dbg(ctx, mask, format, arg...) \ ++ do { \ ++ if (mask & zxdh_debug_mask) { \ ++ int zxdh_dbg_tmp = errno; \ ++ verbs_debug(ctx, format, ##arg); \ ++ errno = zxdh_dbg_tmp; \ ++ } \ ++ } while (0) ++ ++struct zxdh_udevice { ++ struct verbs_device ibv_dev; ++}; ++ ++struct zxdh_uah { ++ struct ibv_ah ibv_ah; ++ uint32_t ah_id; ++ struct ibv_global_route grh; ++}; ++ ++struct zxdh_upd { ++ struct ibv_pd ibv_pd; ++ void *arm_cq_page; ++ void *arm_cq; ++ uint32_t pd_id; ++}; ++ ++struct zxdh_uvcontext { ++ struct verbs_context ibv_ctx; ++ struct zxdh_upd *iwupd; ++ struct zxdh_dev_attrs dev_attrs; ++ void *db; ++ void *sq_db; ++ void *cq_db; ++ int abi_ver; ++ bool legacy_mode; ++ uint8_t zxdh_write_imm_split_switch; ++ struct zxdh_uvcontext_ops *cxt_ops; ++}; ++ ++struct zxdh_uqp; ++ ++struct zxdh_cq_buf { ++ struct list_node list; ++ struct zxdh_cq cq; ++ struct verbs_mr vmr; ++}; ++ ++struct zxdh_ucq { ++ struct verbs_cq verbs_cq; ++ struct verbs_mr vmr; ++ struct verbs_mr vmr_shadow_area; ++ pthread_spinlock_t lock; ++ size_t buf_size; ++ bool is_armed; ++ enum zxdh_cmpl_notify last_notify; ++ int comp_vector; ++ uint32_t report_rtt; ++ struct zxdh_uqp *uqp; ++ struct zxdh_cq cq; ++ struct list_head resize_list; ++ /* for extended CQ completion fields */ ++ struct zxdh_cq_poll_info cur_cqe; ++ bool resize_enable; ++}; ++ ++struct zxdh_usrq { ++ struct ibv_srq ibv_srq; ++ struct verbs_mr vmr; ++ struct verbs_mr list_vmr; ++ struct verbs_mr db_vmr; ++ size_t total_buf_size; ++ size_t buf_size; ++ size_t list_buf_size; ++ size_t db_buf_size; ++ size_t srq_size; ++ size_t srq_list_size; ++ uint32_t srq_id; ++ uint32_t max_wr; ++ uint32_t max_sge; ++ uint32_t srq_limit; ++ pthread_spinlock_t lock; ++ uint32_t wq_size; ++ struct ibv_recv_wr *pend_rx_wr; ++ struct zxdh_srq srq; ++}; ++ ++struct zxdh_uqp { ++ struct verbs_qp vqp; ++ struct zxdh_ucq *send_cq; ++ struct zxdh_ucq *recv_cq; ++ struct zxdh_usrq *srq; ++ struct verbs_mr vmr; ++ size_t buf_size; ++ uint32_t zxdh_drv_opt; ++ pthread_spinlock_t lock; ++ uint16_t sq_sig_all; ++ uint16_t qperr; ++ uint16_t rsvd; ++ uint32_t pending_rcvs; ++ uint32_t wq_size; ++ struct ibv_recv_wr *pend_rx_wr; ++ struct zxdh_qp qp; ++ enum ibv_qp_type qp_type; ++ struct zxdh_sge *recv_sges; ++ uint8_t is_srq; ++ uint8_t inline_data[ZXDH_MAX_INLINE_DATA_SIZE]; ++}; ++ ++struct zxdh_umr { ++ struct verbs_mr vmr; ++ uint32_t acc_flags; ++ uint8_t leaf_pbl_size; ++ uint8_t host_page_size; ++ uint64_t mr_pa_pble_index; ++}; ++ ++/* zxdh_verbs.c */ ++int zxdh_uquery_device_ex(struct ibv_context *context, ++ const struct ibv_query_device_ex_input *input, ++ struct ibv_device_attr_ex *attr, size_t attr_size); ++int zxdh_uquery_port(struct ibv_context *context, uint8_t port, ++ struct ibv_port_attr *attr); ++struct ibv_pd *zxdh_ualloc_pd(struct ibv_context *context); ++int zxdh_ufree_pd(struct ibv_pd *pd); ++struct ibv_mr *zxdh_ureg_mr(struct ibv_pd *pd, void *addr, size_t length, ++ uint64_t hca_va, int access); ++int zxdh_udereg_mr(struct verbs_mr *vmr); ++ ++int zxdh_urereg_mr(struct verbs_mr *mr, int flags, struct ibv_pd *pd, ++ void *addr, size_t length, int access); ++ ++struct ibv_mw *zxdh_ualloc_mw(struct ibv_pd *pd, enum ibv_mw_type type); ++int zxdh_ubind_mw(struct ibv_qp *qp, struct ibv_mw *mw, ++ struct ibv_mw_bind *mw_bind); ++int zxdh_udealloc_mw(struct ibv_mw *mw); ++struct ibv_cq *zxdh_ucreate_cq(struct ibv_context *context, int cqe, ++ struct ibv_comp_channel *channel, ++ int comp_vector); ++struct ibv_cq_ex *zxdh_ucreate_cq_ex(struct ibv_context *context, ++ struct ibv_cq_init_attr_ex *attr_ex); ++void zxdh_ibvcq_ex_fill_priv_funcs(struct zxdh_ucq *iwucq, ++ struct ibv_cq_init_attr_ex *attr_ex); ++int zxdh_uresize_cq(struct ibv_cq *cq, int cqe); ++int zxdh_udestroy_cq(struct ibv_cq *cq); ++int zxdh_umodify_cq(struct ibv_cq *cq, struct ibv_modify_cq_attr *attr); ++int zxdh_upoll_cq(struct ibv_cq *cq, int entries, struct ibv_wc *entry); ++int zxdh_uarm_cq(struct ibv_cq *cq, int solicited); ++void zxdh_cq_event(struct ibv_cq *cq); ++struct ibv_qp *zxdh_ucreate_qp(struct ibv_pd *pd, ++ struct ibv_qp_init_attr *attr); ++struct ibv_qp *zxdh_ucreate_qp_ex(struct ibv_context *context, ++ struct ibv_qp_init_attr_ex *attr); ++int zxdh_uquery_qp(struct ibv_qp *qp, struct ibv_qp_attr *attr, int attr_mask, ++ struct ibv_qp_init_attr *init_attr); ++int zxdh_umodify_qp(struct ibv_qp *qp, struct ibv_qp_attr *attr, int attr_mask); ++int zxdh_udestroy_qp(struct ibv_qp *qp); ++int zxdh_upost_send(struct ibv_qp *ib_qp, struct ibv_send_wr *ib_wr, ++ struct ibv_send_wr **bad_wr); ++int zxdh_upost_recv(struct ibv_qp *ib_qp, struct ibv_recv_wr *ib_wr, ++ struct ibv_recv_wr **bad_wr); ++struct ibv_srq *zxdh_ucreate_srq(struct ibv_pd *pd, ++ struct ibv_srq_init_attr *srq_init_attr); ++int zxdh_udestroy_srq(struct ibv_srq *srq); ++int zxdh_umodify_srq(struct ibv_srq *srq, struct ibv_srq_attr *srq_attr, ++ int srq_attr_mask); ++int zxdh_uquery_srq(struct ibv_srq *srq, struct ibv_srq_attr *srq_attr); ++int zxdh_upost_srq_recv(struct ibv_srq *srq, struct ibv_recv_wr *recv_wr, ++ struct ibv_recv_wr **bad_recv_wr); ++int zxdh_uget_srq_num(struct ibv_srq *srq, uint32_t *srq_num); ++struct ibv_ah *zxdh_ucreate_ah(struct ibv_pd *ibpd, struct ibv_ah_attr *attr); ++int zxdh_udestroy_ah(struct ibv_ah *ibah); ++int zxdh_uattach_mcast(struct ibv_qp *qp, const union ibv_gid *gid, ++ uint16_t lid); ++int zxdh_udetach_mcast(struct ibv_qp *qp, const union ibv_gid *gid, ++ uint16_t lid); ++void zxdh_async_event(struct ibv_context *context, ++ struct ibv_async_event *event); ++void zxdh_set_hw_attrs(struct zxdh_hw_attrs *attrs); ++void *zxdh_mmap(int fd, off_t offset); ++void zxdh_munmap(void *map); ++void zxdh_set_debug_mask(void); ++int zxdh_get_write_imm_split_switch(void); ++#endif /* ZXDH_UMAIN_H */ +diff --git a/providers/zrdma/private_verbs_cmd.c b/providers/zrdma/private_verbs_cmd.c +new file mode 100644 +index 0000000..68bba23 +--- /dev/null ++++ b/providers/zrdma/private_verbs_cmd.c +@@ -0,0 +1,201 @@ ++// SPDX-License-Identifier: GPL-2.0 or Linux-OpenIB ++/* Copyright (c) 2024 ZTE Corporation. All rights reserved. */ ++#include ++#include ++#include "private_verbs_cmd.h" ++#include "zxdh_dv.h" ++ ++static void copy_query_qpc(struct zxdh_query_qpc_resp *resp, ++ struct zxdh_rdma_qpc *qpc) ++{ ++ qpc->ack_err_flag = resp->ack_err_flag; ++ qpc->retry_flag = resp->retry_flag; ++ qpc->rnr_retry_flag = resp->rnr_retry_flag; ++ qpc->cur_retry_count = resp->cur_retry_count; ++ qpc->retry_cqe_sq_opcode = resp->retry_cqe_sq_opcode; ++ qpc->err_flag = resp->err_flag; ++ qpc->package_err_flag = resp->package_err_flag; ++ qpc->recv_err_flag = resp->recv_err_flag; ++ qpc->tx_last_ack_psn = resp->tx_last_ack_psn; ++ qpc->retry_count = resp->retry_count; ++ qpc->read_retry_flag = resp->read_retry_flag; ++} ++ ++static int _zxdh_query_qpc(struct ibv_qp *qp, struct zxdh_rdma_qpc *qpc) ++{ ++ DECLARE_COMMAND_BUFFER(cmd, ZXDH_IB_OBJECT_QP_OBJ, ++ ZXDH_IB_METHOD_QP_QUERY_QPC, 2); ++ int ret; ++ struct zxdh_query_qpc_resp resp_ex = { 0 }; ++ ++ fill_attr_in_obj(cmd, ZXDH_IB_ATTR_QP_QUERY_HANDLE, qp->handle); ++ fill_attr_out_ptr(cmd, ZXDH_IB_ATTR_QP_QUERY_RESP, &resp_ex); ++ ++ ret = execute_ioctl(qp->context, cmd); ++ if (ret) ++ return ret; ++ ++ copy_query_qpc(&resp_ex, qpc); ++ return 0; ++} ++ ++static void copy_modify_qpc_fields(struct zxdh_modify_qpc_req *req_cmd, ++ uint64_t attr_mask, ++ struct zxdh_rdma_qpc *qpc) ++{ ++ if (attr_mask & ZXDH_TX_READ_RETRY_FLAG_SET) { ++ req_cmd->retry_flag = qpc->retry_flag; ++ req_cmd->rnr_retry_flag = qpc->rnr_retry_flag; ++ req_cmd->read_retry_flag = qpc->read_retry_flag; ++ req_cmd->cur_retry_count = qpc->cur_retry_count; ++ } ++ if (attr_mask & ZXDH_RETRY_CQE_SQ_OPCODE) ++ req_cmd->retry_cqe_sq_opcode = qpc->retry_cqe_sq_opcode; ++ ++ if (attr_mask & ZXDH_ERR_FLAG_SET) { ++ req_cmd->err_flag = qpc->err_flag; ++ req_cmd->ack_err_flag = qpc->ack_err_flag; ++ } ++ if (attr_mask & ZXDH_PACKAGE_ERR_FLAG) ++ req_cmd->package_err_flag = qpc->package_err_flag; ++} ++ ++static int _zxdh_reset_qp(struct ibv_qp *qp, uint64_t opcode) ++{ ++ DECLARE_COMMAND_BUFFER(cmd, ZXDH_IB_OBJECT_QP_OBJ, ++ ZXDH_IB_METHOD_QP_RESET_QP, 2); ++ ++ fill_attr_in_obj(cmd, ZXDH_IB_ATTR_QP_RESET_QP_HANDLE, qp->handle); ++ fill_attr_in_uint64(cmd, ZXDH_IB_ATTR_QP_RESET_OP_CODE, opcode); ++ return execute_ioctl(qp->context, cmd); ++} ++ ++static int _zxdh_modify_qpc(struct ibv_qp *qp, struct zxdh_rdma_qpc *qpc, ++ uint64_t qpc_mask) ++{ ++ DECLARE_COMMAND_BUFFER(cmd, ZXDH_IB_OBJECT_QP_OBJ, ++ ZXDH_IB_METHOD_QP_MODIFY_QPC, 3); ++ struct zxdh_modify_qpc_req req = { 0 }; ++ ++ copy_modify_qpc_fields(&req, qpc_mask, qpc); ++ fill_attr_in_obj(cmd, ZXDH_IB_ATTR_QP_QUERY_HANDLE, qp->handle); ++ fill_attr_in_uint64(cmd, ZXDH_IB_ATTR_QP_MODIFY_QPC_MASK, qpc_mask); ++ fill_attr_in_ptr(cmd, ZXDH_IB_ATTR_QP_MODIFY_QPC_REQ, &req); ++ return execute_ioctl(qp->context, cmd); ++} ++ ++static int _zxdh_modify_qp_udp_sport(struct ibv_context *ibctx, ++ uint16_t udp_sport, uint32_t qpn) ++{ ++ if (udp_sport <= MIN_UDP_SPORT || qpn <= MIN_QP_QPN) ++ return -EINVAL; ++ ++ DECLARE_COMMAND_BUFFER(cmd, ZXDH_IB_OBJECT_QP_OBJ, ++ ZXDH_IB_METHOD_QP_MODIFY_UDP_SPORT, 2); ++ fill_attr_in(cmd, ZXDH_IB_ATTR_QP_UDP_PORT, &udp_sport, ++ sizeof(udp_sport)); ++ fill_attr_in_uint32(cmd, ZXDH_IB_ATTR_QP_QPN, qpn); ++ return execute_ioctl(ibctx, cmd); ++} ++ ++static int _zxdh_get_log_trace_switch(struct ibv_context *ibctx, ++ uint8_t *switch_status) ++{ ++ DECLARE_COMMAND_BUFFER(cmd, ZXDH_IB_OBJECT_DEV, ++ ZXDH_IB_METHOD_DEV_GET_LOG_TRACE, 1); ++ ++ fill_attr_out_ptr(cmd, ZXDH_IB_ATTR_DEV_GET_LOG_TARCE_SWITCH, ++ switch_status); ++ return execute_ioctl(ibctx, cmd); ++} ++ ++static int _zxdh_set_log_trace_switch(struct ibv_context *ibctx, ++ uint8_t switch_status) ++{ ++ DECLARE_COMMAND_BUFFER(cmd, ZXDH_IB_OBJECT_DEV, ++ ZXDH_IB_METHOD_DEV_SET_LOG_TRACE, 1); ++ fill_attr_in(cmd, ZXDH_IB_ATTR_DEV_SET_LOG_TARCE_SWITCH, &switch_status, ++ sizeof(switch_status)); ++ return execute_ioctl(ibctx, cmd); ++} ++ ++static struct zxdh_uvcontext_ops zxdh_ctx_ops = { ++ .modify_qp_udp_sport = _zxdh_modify_qp_udp_sport, ++ .get_log_trace_switch = _zxdh_get_log_trace_switch, ++ .set_log_trace_switch = _zxdh_set_log_trace_switch, ++ .query_qpc = _zxdh_query_qpc, ++ .modify_qpc = _zxdh_modify_qpc, ++ .reset_qp = _zxdh_reset_qp, ++}; ++ ++static inline struct zxdh_uvcontext *to_zxdhtx(struct ibv_context *ibctx) ++{ ++ return container_of(ibctx, struct zxdh_uvcontext, ibv_ctx.context); ++} ++ ++int zxdh_reset_qp(struct ibv_qp *qp, uint64_t opcode) ++{ ++ struct zxdh_uvcontext_ops *dvops = to_zxdhtx(qp->context)->cxt_ops; ++ ++ if (!dvops || !dvops->reset_qp) ++ return -EOPNOTSUPP; ++ return dvops->reset_qp(qp, opcode); ++} ++ ++int zxdh_modify_qpc(struct ibv_qp *qp, struct zxdh_rdma_qpc *qpc, ++ uint64_t qpc_mask) ++{ ++ struct zxdh_uvcontext_ops *dvops = to_zxdhtx(qp->context)->cxt_ops; ++ ++ if (!dvops || !dvops->modify_qpc) ++ return -EOPNOTSUPP; ++ return dvops->modify_qpc(qp, qpc, qpc_mask); ++} ++ ++int zxdh_query_qpc(struct ibv_qp *qp, struct zxdh_rdma_qpc *qpc) ++{ ++ struct zxdh_uvcontext_ops *dvops = to_zxdhtx(qp->context)->cxt_ops; ++ ++ if (!dvops || !dvops->query_qpc) ++ return -EOPNOTSUPP; ++ ++ return dvops->query_qpc(qp, qpc); ++} ++ ++int zxdh_modify_qp_udp_sport(struct ibv_context *context, uint16_t udp_sport, ++ uint32_t qpn) ++{ ++ struct zxdh_uvcontext_ops *dvops = to_zxdhtx(context)->cxt_ops; ++ ++ if (!dvops || !dvops->modify_qp_udp_sport) ++ return -EOPNOTSUPP; ++ ++ return dvops->modify_qp_udp_sport(context, udp_sport, qpn); ++} ++ ++int zxdh_get_log_trace_switch(struct ibv_context *context, ++ enum switch_status *switch_status) ++{ ++ struct zxdh_uvcontext_ops *dvops = to_zxdhtx(context)->cxt_ops; ++ ++ if (!dvops || !dvops->get_log_trace_switch) ++ return -EOPNOTSUPP; ++ ++ return dvops->get_log_trace_switch(context, (uint8_t *)switch_status); ++} ++ ++int zxdh_set_log_trace_switch(struct ibv_context *context, ++ enum switch_status switch_status) ++{ ++ struct zxdh_uvcontext_ops *dvops = to_zxdhtx(context)->cxt_ops; ++ ++ if (!dvops || !dvops->set_log_trace_switch) ++ return -EOPNOTSUPP; ++ ++ return dvops->set_log_trace_switch(context, switch_status); ++} ++ ++void add_private_ops(struct zxdh_uvcontext *iwvctx) ++{ ++ iwvctx->cxt_ops = &zxdh_ctx_ops; ++} +diff --git a/providers/zrdma/private_verbs_cmd.h b/providers/zrdma/private_verbs_cmd.h +new file mode 100644 +index 0000000..32d0d68 +--- /dev/null ++++ b/providers/zrdma/private_verbs_cmd.h +@@ -0,0 +1,24 @@ ++/* SPDX-License-Identifier: GPL-2.0 or Linux-OpenIB */ ++/* Copyright (c) 2024 ZTE Corporation. All rights reserved. */ ++#ifndef ZXDH_RDMA_PRIVATE_VERBS_CMD_H ++#define ZXDH_RDMA_PRIVATE_VERBS_CMD_H ++ ++#include "main.h" ++#include "zxdh_dv.h" ++ ++struct zxdh_uvcontext_ops { ++ int (*modify_qp_udp_sport)(struct ibv_context *ibctx, ++ uint16_t udp_sport, uint32_t qpn); ++ int (*set_log_trace_switch)(struct ibv_context *ibctx, ++ uint8_t switch_status); ++ int (*get_log_trace_switch)(struct ibv_context *ibctx, ++ uint8_t *switch_status); ++ int (*query_qpc)(struct ibv_qp *qp, struct zxdh_rdma_qpc *qpc); ++ int (*modify_qpc)(struct ibv_qp *qp, struct zxdh_rdma_qpc *qpc, ++ uint64_t qpc_mask); ++ int (*reset_qp)(struct ibv_qp *qp, uint64_t opcode); ++}; ++ ++void add_private_ops(struct zxdh_uvcontext *iwvctx); ++ ++#endif +diff --git a/providers/zrdma/zxdh_abi.h b/providers/zrdma/zxdh_abi.h +new file mode 100644 +index 0000000..f3cff03 +--- /dev/null ++++ b/providers/zrdma/zxdh_abi.h +@@ -0,0 +1,36 @@ ++/* SPDX-License-Identifier: GPL-2.0 or Linux-OpenIB */ ++/* Copyright (c) 2024 ZTE Corporation. All rights reserved. */ ++#ifndef PROVIDER_ZXDH_ABI_H ++#define PROVIDER_ZXDH_ABI_H ++ ++#include ++#include ++#include ++#include "zxdh_verbs.h" ++ ++#define ZXDH_MIN_ABI_VERSION 0 ++#define ZXDH_MAX_ABI_VERSION 5 ++ ++DECLARE_DRV_CMD(zxdh_ualloc_pd, IB_USER_VERBS_CMD_ALLOC_PD, empty, ++ zxdh_alloc_pd_resp); ++DECLARE_DRV_CMD(zxdh_ucreate_cq, IB_USER_VERBS_CMD_CREATE_CQ, ++ zxdh_create_cq_req, zxdh_create_cq_resp); ++DECLARE_DRV_CMD(zxdh_ucreate_cq_ex, IB_USER_VERBS_EX_CMD_CREATE_CQ, ++ zxdh_create_cq_req, zxdh_create_cq_resp); ++DECLARE_DRV_CMD(zxdh_uresize_cq, IB_USER_VERBS_CMD_RESIZE_CQ, ++ zxdh_resize_cq_req, empty); ++DECLARE_DRV_CMD(zxdh_ucreate_qp, IB_USER_VERBS_CMD_CREATE_QP, ++ zxdh_create_qp_req, zxdh_create_qp_resp); ++DECLARE_DRV_CMD(zxdh_umodify_qp, IB_USER_VERBS_EX_CMD_MODIFY_QP, ++ zxdh_modify_qp_req, zxdh_modify_qp_resp); ++DECLARE_DRV_CMD(zxdh_get_context, IB_USER_VERBS_CMD_GET_CONTEXT, ++ zxdh_alloc_ucontext_req, zxdh_alloc_ucontext_resp); ++DECLARE_DRV_CMD(zxdh_ureg_mr, IB_USER_VERBS_CMD_REG_MR, zxdh_mem_reg_req, ++ zxdh_reg_mr_resp); ++DECLARE_DRV_CMD(zxdh_urereg_mr, IB_USER_VERBS_CMD_REREG_MR, zxdh_mem_reg_req, ++ empty); ++DECLARE_DRV_CMD(zxdh_ucreate_ah, IB_USER_VERBS_CMD_CREATE_AH, empty, ++ zxdh_create_ah_resp); ++DECLARE_DRV_CMD(zxdh_ucreate_srq, IB_USER_VERBS_CMD_CREATE_SRQ, ++ zxdh_create_srq_req, zxdh_create_srq_resp); ++#endif /* PROVIDER_ZXDH_ABI_H */ +diff --git a/providers/zrdma/zxdh_defs.h b/providers/zrdma/zxdh_defs.h +new file mode 100644 +index 0000000..eaf73ca +--- /dev/null ++++ b/providers/zrdma/zxdh_defs.h +@@ -0,0 +1,399 @@ ++/* SPDX-License-Identifier: GPL-2.0 or Linux-OpenIB */ ++/* Copyright (c) 2024 ZTE Corporation. All rights reserved. */ ++#ifndef ZXDH_DEFS_H ++#define ZXDH_DEFS_H ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#define ZXDH_RECV_ERR_FLAG_NAK_RNR_NAK 1 ++#define ZXDH_RECV_ERR_FLAG_READ_RESP 2 ++#define ZXDH_RETRY_CQE_SQ_OPCODE_ERR 32 ++#define ZXDH_QP_RETRY_COUNT 2 ++#define ZXDH_RESET_RETRY_CQE_SQ_OPCODE_ERR 0x1f ++ ++#define ZXDH_QP_TYPE_ROCE_RC 1 ++#define ZXDH_QP_TYPE_ROCE_UD 2 ++ ++#define ZXDH_HW_PAGE_SIZE 4096 ++#define ZXDH_HW_PAGE_SHIFT 12 ++#define ZXDH_CQE_QTYPE_RQ 0 ++#define ZXDH_CQE_QTYPE_SQ 1 ++ ++#define ZXDH_MAX_SQ_WQES_PER_PAGE 128 ++#define ZXDH_MAX_SQ_DEPTH 32768 ++ ++#define ZXDH_QP_SW_MIN_WQSIZE 64u /* in WRs*/ ++#define ZXDH_QP_WQE_MIN_SIZE 32 ++#define ZXDH_QP_SQE_MIN_SIZE 32 ++#define ZXDH_QP_RQE_MIN_SIZE 16 ++#define ZXDH_QP_WQE_MAX_SIZE 256 ++#define ZXDH_QP_WQE_MIN_QUANTA 1 ++#define ZXDH_MAX_RQ_WQE_SHIFT_GEN1 2 ++#define ZXDH_MAX_RQ_WQE_SHIFT_GEN2 3 ++#define ZXDH_SRQ_FRAG_BYTESIZE 16 ++#define ZXDH_QP_FRAG_BYTESIZE 16 ++#define ZXDH_SQ_WQE_BYTESIZE 32 ++#define ZXDH_SRQ_WQE_MIN_SIZE 16 ++ ++#define ZXDH_SQ_RSVD 258 ++#define ZXDH_RQ_RSVD 1 ++#define ZXDH_SRQ_RSVD 1 ++ ++#define ZXDH_FEATURE_RTS_AE 1ULL ++#define ZXDH_FEATURE_CQ_RESIZE 2ULL ++#define ZXDHQP_OP_RDMA_WRITE 0x00 ++#define ZXDHQP_OP_RDMA_READ 0x01 ++#define ZXDHQP_OP_RDMA_SEND 0x03 ++#define ZXDHQP_OP_RDMA_SEND_INV 0x04 ++#define ZXDHQP_OP_RDMA_SEND_SOL_EVENT 0x05 ++#define ZXDHQP_OP_RDMA_SEND_SOL_EVENT_INV 0x06 ++#define ZXDHQP_OP_BIND_MW 0x08 ++#define ZXDHQP_OP_FAST_REGISTER 0x09 ++#define ZXDHQP_OP_LOCAL_INVALIDATE 0x0a ++#define ZXDHQP_OP_RDMA_READ_LOC_INV 0x0b ++#define ZXDHQP_OP_NOP 0x0c ++ ++#define ZXDH_CQPHC_QPCTX GENMASK_ULL(63, 0) ++#define ZXDH_QP_DBSA_HW_SQ_TAIL GENMASK_ULL(14, 0) ++#define ZXDH_CQ_DBSA_CQEIDX GENMASK_ULL(22, 0) ++#define ZXDH_CQ_DBSA_SW_CQ_SELECT GENMASK_ULL(28, 23) ++#define ZXDH_CQ_DBSA_ARM_NEXT BIT_ULL(31) ++// #define ZXDH_CQ_DBSA_ARM_NEXT_SE BIT_ULL(15) ++#define ZXDH_CQ_DBSA_ARM_SEQ_NUM GENMASK_ULL(30, 29) ++#define ZXDH_CQ_ARM_CQ_ID_S 10 ++#define ZXDH_CQ_ARM_CQ_ID GENMASK_ULL(29, 10) ++#define ZXDH_CQ_ARM_DBSA_VLD_S 30 ++#define ZXDH_CQ_ARM_DBSA_VLD BIT_ULL(30) ++ ++/* CQP and iWARP Completion Queue */ ++#define ZXDH_CQ_QPCTX ZXDH_CQPHC_QPCTX ++ ++#define ZXDH_CQ_MINERR GENMASK_ULL(22, 7) ++#define ZXDH_CQ_MAJERR GENMASK_ULL(38, 23) ++#define ZXDH_CQ_WQEIDX GENMASK_ULL(54, 40) ++#define ZXDH_CQ_EXTCQE BIT_ULL(50) ++#define ZXDH_OOO_CMPL BIT_ULL(54) ++#define ZXDH_CQ_ERROR BIT_ULL(39) ++#define ZXDH_CQ_SQ BIT_ULL(4) ++ ++#define ZXDH_CQ_VALID BIT_ULL(5) ++#define ZXDH_CQ_IMMVALID BIT_ULL(0) ++#define ZXDH_CQ_UDSMACVALID BIT_ULL(26) ++#define ZXDH_CQ_UDVLANVALID BIT_ULL(27) ++#define ZXDH_CQ_IMMDATA GENMASK_ULL(31, 0) ++#define ZXDH_CQ_UDSMAC GENMASK_ULL(47, 0) ++#define ZXDH_CQ_UDVLAN GENMASK_ULL(63, 48) ++ ++#define ZXDH_CQ_IMMDATA_S 0 ++#define ZXDH_CQ_IMMDATA_M (0xffffffffffffffffULL << ZXDH_CQ_IMMVALID_S) ++#define ZXDH_CQ_IMMDATALOW32 GENMASK_ULL(31, 0) ++#define ZXDH_CQ_IMMDATAUP32 GENMASK_ULL(63, 32) ++#define ZXDHCQ_PAYLDLEN GENMASK_ULL(63, 32) ++#define ZXDHCQ_TCPSEQNUMRTT GENMASK_ULL(63, 32) ++#define ZXDHCQ_INVSTAG_S 11 ++#define ZXDHCQ_INVSTAG GENMASK_ULL(42, 11) ++#define ZXDHCQ_QPID GENMASK_ULL(63, 44) ++ ++#define ZXDHCQ_UDSRCQPN GENMASK_ULL(24, 1) ++#define ZXDHCQ_PSHDROP BIT_ULL(51) ++#define ZXDHCQ_STAG_S 43 ++#define ZXDHCQ_STAG BIT_ULL(43) ++#define ZXDHCQ_IPV4 BIT_ULL(25) ++#define ZXDHCQ_SOEVENT BIT_ULL(6) ++#define ZXDHCQ_OP GENMASK_ULL(63, 58) ++ ++/* Manage Push Page - MPP */ ++#define ZXDH_INVALID_PUSH_PAGE_INDEX_GEN_1 0xffff ++#define ZXDH_INVALID_PUSH_PAGE_INDEX 0xffffffff ++ ++#define ZXDHQPSQ_OPCODE GENMASK_ULL(62, 57) ++#define ZXDHQPSQ_COPY_HOST_PBL BIT_ULL(43) ++#define ZXDHQPSQ_ADDFRAGCNT GENMASK_ULL(39, 32) ++#define ZXDHQPSQ_PUSHWQE BIT_ULL(56) ++#define ZXDHQPSQ_STREAMMODE BIT_ULL(58) ++#define ZXDHQPSQ_WAITFORRCVPDU BIT_ULL(59) ++#define ZXDHQPSQ_READFENCE BIT_ULL(54) ++#define ZXDHQPSQ_LOCALFENCE BIT_ULL(55) ++#define ZXDHQPSQ_UDPHEADER BIT_ULL(61) ++#define ZXDHQPSQ_L4LEN GENMASK_ULL(45, 42) ++#define ZXDHQPSQ_SIGCOMPL BIT_ULL(56) ++#define ZXDHQPSQ_SOLICITED BIT_ULL(53) ++#define ZXDHQPSQ_VALID BIT_ULL(63) ++ ++#define ZXDHQPSQ_FIRST_FRAG_VALID BIT_ULL(0) ++#define ZXDHQPSQ_FIRST_FRAG_LEN GENMASK_ULL(31, 1) ++#define ZXDHQPSQ_FIRST_FRAG_STAG GENMASK_ULL(63, 32) ++#define ZXDHQPSQ_FRAG_TO ZXDH_CQPHC_QPCTX ++#define ZXDHQPSQ_FRAG_VALID BIT_ULL(63) ++#define ZXDHQPSQ_FRAG_LEN GENMASK_ULL(62, 32) ++#define ZXDHQPSQ_FRAG_STAG GENMASK_ULL(31, 0) ++#define ZXDHQPSQ_GEN1_FRAG_LEN GENMASK_ULL(31, 0) ++#define ZXDHQPSQ_GEN1_FRAG_STAG GENMASK_ULL(63, 32) ++#define ZXDHQPSQ_REMSTAGINV GENMASK_ULL(31, 0) ++#define ZXDHQPSQ_DESTQKEY GENMASK_ULL(31, 0) ++#define ZXDHQPSQ_DESTQPN GENMASK_ULL(55, 32) ++#define ZXDHQPSQ_AHID GENMASK_ULL(18, 0) ++#define ZXDHQPSQ_INLINEDATAFLAG BIT_ULL(63) ++#define ZXDHQPSQ_UD_INLINEDATAFLAG BIT_ULL(50) ++#define ZXDHQPSQ_UD_INLINEDATALEN GENMASK_ULL(49, 42) ++#define ZXDHQPSQ_UD_ADDFRAGCNT GENMASK_ULL(36, 29) ++#define ZXDHQPSQ_WRITE_INLINEDATAFLAG BIT_ULL(48) ++#define ZXDHQPSQ_WRITE_INLINEDATALEN GENMASK_ULL(47, 40) ++ ++#define ZXDH_INLINE_VALID_S 7 ++#define ZXDHQPSQ_INLINE_VALID BIT_ULL(63) ++#define ZXDHQPSQ_INLINEDATALEN GENMASK_ULL(62, 55) ++#define ZXDHQPSQ_IMMDATAFLAG BIT_ULL(52) ++#define ZXDHQPSQ_REPORTRTT BIT_ULL(46) ++ ++#define ZXDHQPSQ_IMMDATA GENMASK_ULL(31, 0) ++#define ZXDHQPSQ_REMSTAG_S 0 ++#define ZXDHQPSQ_REMSTAG GENMASK_ULL(31, 0) ++ ++#define ZXDHQPSQ_REMTO ZXDH_CQPHC_QPCTX ++ ++#define ZXDHQPSQ_IMMDATA_VALID BIT_ULL(63) ++#define ZXDHQPSQ_STAGRIGHTS GENMASK_ULL(50, 46) ++#define ZXDHQPSQ_VABASEDTO BIT_ULL(51) ++#define ZXDHQPSQ_MEMWINDOWTYPE BIT_ULL(52) ++ ++#define ZXDHQPSQ_MWLEN ZXDH_CQPHC_QPCTX ++#define ZXDHQPSQ_PARENTMRSTAG GENMASK_ULL(31, 0) ++#define ZXDHQPSQ_MWSTAG GENMASK_ULL(31, 0) ++#define ZXDHQPSQ_MW_PA_PBLE_ONE GENMASK_ULL(63, 46) ++#define ZXDHQPSQ_MW_PA_PBLE_TWO GENMASK_ULL(63, 32) ++#define ZXDHQPSQ_MW_PA_PBLE_THREE GENMASK_ULL(33, 32) ++#define ZXDHQPSQ_MW_HOST_PAGE_SIZE GENMASK_ULL(40, 36) ++#define ZXDHQPSQ_MW_LEAF_PBL_SIZE GENMASK_ULL(35, 34) ++#define ZXDHQPSQ_MW_LEVLE2_FIRST_PBLE_INDEX GENMASK_ULL(41, 32) ++#define ZXDHQPSQ_MW_LEVLE2_ROOT_PBLE_INDEX GENMASK_ULL(50, 42) ++ ++#define ZXDHQPSQ_BASEVA_TO_FBO ZXDH_CQPHC_QPCTX ++ ++#define ZXDHQPSQ_LOCSTAG GENMASK_ULL(31, 0) ++ ++#define ZXDHQPSRQ_RSV GENMASK_ULL(63, 40) ++#define ZXDHQPSRQ_VALID_SGE_NUM GENMASK_ULL(39, 32) ++#define ZXDHQPSRQ_SIGNATURE GENMASK_ULL(31, 24) ++#define ZXDHQPSRQ_NEXT_WQE_INDEX GENMASK_ULL(15, 0) ++#define ZXDHQPSRQ_START_PADDING BIT_ULL(63) ++#define ZXDHQPSRQ_FRAG_LEN GENMASK_ULL(62, 32) ++#define ZXDHQPSRQ_FRAG_STAG GENMASK_ULL(31, 0) ++ ++/* QP RQ WQE common fields */ ++#define ZXDHQPRQ_SIGNATURE GENMASK_ULL(31, 16) ++#define ZXDHQPRQ_ADDFRAGCNT ZXDHQPSQ_ADDFRAGCNT ++#define ZXDHQPRQ_VALID ZXDHQPSQ_VALID ++#define ZXDHQPRQ_COMPLCTX ZXDH_CQPHC_QPCTX ++#define ZXDHQPRQ_FRAG_LEN ZXDHQPSQ_FRAG_LEN ++#define ZXDHQPRQ_STAG ZXDHQPSQ_FRAG_STAG ++#define ZXDHQPRQ_TO ZXDHQPSQ_FRAG_TO ++ ++//QP RQ DBSA fields ++#define ZXDHQPDBSA_RQ_POLARITY_S 15 ++#define ZXDHQPDBSA_RQ_POLARITY BIT_ULL(15) ++#define ZXDHQPDBSA_RQ_SW_HEAD_S 0 ++#define ZXDHQPDBSA_RQ_SW_HEAD GENMASK_ULL(14, 0) ++ ++#define ZXDHPFINT_OICR_HMC_ERR_M BIT(26) ++#define ZXDHPFINT_OICR_PE_PUSH_M BIT(27) ++#define ZXDHPFINT_OICR_PE_CRITERR_M BIT(28) ++ ++#define ZXDH_SRQ_PARITY_SIGN_S 15 ++#define ZXDH_SRQ_PARITY_SIGN BIT_ULL(15) ++#define ZXDH_SRQ_SW_SRQ_HEAD_S 0 ++#define ZXDH_SRQ_SW_SRQ_HEAD GENMASK_ULL(14, 0) ++#define ZXDH_CQE_SQ_OPCODE_RESET BIT(5) ++ ++#define ZXDH_CQP_INIT_WQE(wqe) memset(wqe, 0, 64) ++ ++#define ZXDH_GET_CURRENT_CQ_ELEM(_cq) \ ++ ((_cq)->cq_base[ZXDH_RING_CURRENT_HEAD((_cq)->cq_ring)].buf) ++#define ZXDH_GET_CURRENT_EXTENDED_CQ_ELEM(_cq) \ ++ (((struct zxdh_extended_cqe \ ++ *)((_cq)->cq_base))[ZXDH_RING_CURRENT_HEAD((_cq)->cq_ring)] \ ++ .buf) ++ ++#define ZXDH_RING_INIT(_ring, _size) \ ++ { \ ++ (_ring).head = 0; \ ++ (_ring).tail = 0; \ ++ (_ring).size = (_size); \ ++ } ++#define ZXDH_RING_SIZE(_ring) ((_ring).size) ++#define ZXDH_RING_CURRENT_HEAD(_ring) ((_ring).head) ++#define ZXDH_RING_CURRENT_TAIL(_ring) ((_ring).tail) ++ ++#define ZXDH_RING_MOVE_HEAD(_ring, _retcode) \ ++ { \ ++ register __u32 size; \ ++ size = (_ring).size; \ ++ if (!ZXDH_RING_FULL_ERR(_ring)) { \ ++ (_ring).head = ((_ring).head + 1) % size; \ ++ (_retcode) = 0; \ ++ } else { \ ++ (_retcode) = ZXDH_ERR_RING_FULL; \ ++ } \ ++ } ++#define ZXDH_RING_MOVE_HEAD_BY_COUNT(_ring, _count, _retcode) \ ++ { \ ++ register __u32 size; \ ++ size = (_ring).size; \ ++ if ((ZXDH_RING_USED_QUANTA(_ring) + (_count)) < size) { \ ++ (_ring).head = ((_ring).head + (_count)) % size; \ ++ (_retcode) = 0; \ ++ } else { \ ++ (_retcode) = ZXDH_ERR_RING_FULL; \ ++ } \ ++ } ++#define ZXDH_SQ_RING_MOVE_HEAD(_ring, _retcode) \ ++ { \ ++ register __u32 size; \ ++ size = (_ring).size; \ ++ if (!ZXDH_SQ_RING_FULL_ERR(_ring)) { \ ++ (_ring).head = ((_ring).head + 1) % size; \ ++ (_retcode) = 0; \ ++ } else { \ ++ (_retcode) = ZXDH_ERR_RING_FULL; \ ++ } \ ++ } ++#define ZXDH_SQ_RING_MOVE_HEAD_BY_COUNT(_ring, _count, _retcode) \ ++ { \ ++ register __u32 size; \ ++ size = (_ring).size; \ ++ if ((ZXDH_RING_USED_QUANTA(_ring) + (_count)) < \ ++ (size - 256)) { \ ++ (_ring).head = ((_ring).head + (_count)) % size; \ ++ (_retcode) = 0; \ ++ } else { \ ++ (_retcode) = ZXDH_ERR_RING_FULL; \ ++ } \ ++ } ++#define ZXDH_RING_MOVE_HEAD_BY_COUNT_NOCHECK(_ring, _count) \ ++ (_ring).head = ((_ring).head + (_count)) % (_ring).size ++ ++#define ZXDH_RING_MOVE_TAIL(_ring) \ ++ (_ring).tail = ((_ring).tail + 1) % (_ring).size ++ ++#define ZXDH_RING_MOVE_HEAD_NOCHECK(_ring) \ ++ (_ring).head = ((_ring).head + 1) % (_ring).size ++ ++#define ZXDH_RING_MOVE_TAIL_BY_COUNT(_ring, _count) \ ++ (_ring).tail = ((_ring).tail + (_count)) % (_ring).size ++ ++#define ZXDH_RING_SET_TAIL(_ring, _pos) (_ring).tail = (_pos) % (_ring).size ++ ++#define ZXDH_RING_FULL_ERR(_ring) \ ++ ((ZXDH_RING_USED_QUANTA(_ring) == ((_ring).size - 1))) ++ ++#define ZXDH_ERR_RING_FULL2(_ring) \ ++ ((ZXDH_RING_USED_QUANTA(_ring) == ((_ring).size - 2))) ++ ++#define ZXDH_ERR_RING_FULL3(_ring) \ ++ ((ZXDH_RING_USED_QUANTA(_ring) == ((_ring).size - 3))) ++ ++#define ZXDH_SQ_RING_FULL_ERR(_ring) \ ++ ((ZXDH_RING_USED_QUANTA(_ring) == ((_ring).size - 257))) ++ ++#define ZXDH_ERR_SQ_RING_FULL2(_ring) \ ++ ((ZXDH_RING_USED_QUANTA(_ring) == ((_ring).size - 258))) ++#define ZXDH_ERR_SQ_RING_FULL3(_ring) \ ++ ((ZXDH_RING_USED_QUANTA(_ring) == ((_ring).size - 259))) ++#define ZXDH_RING_MORE_WORK(_ring) ((ZXDH_RING_USED_QUANTA(_ring) != 0)) ++ ++#define ZXDH_RING_USED_QUANTA(_ring) \ ++ ((((_ring).head + (_ring).size - (_ring).tail) % (_ring).size)) ++ ++#define ZXDH_RING_FREE_QUANTA(_ring) \ ++ (((_ring).size - ZXDH_RING_USED_QUANTA(_ring) - 1)) ++ ++#define ZXDH_SQ_RING_FREE_QUANTA(_ring) \ ++ (((_ring).size - ZXDH_RING_USED_QUANTA(_ring) - 257)) ++ ++#define ZXDH_ATOMIC_RING_MOVE_HEAD(_ring, index, _retcode) \ ++ { \ ++ index = ZXDH_RING_CURRENT_HEAD(_ring); \ ++ ZXDH_RING_MOVE_HEAD(_ring, _retcode); \ ++ } ++ ++enum zxdh_qp_wqe_size { ++ ZXDH_WQE_SIZE_32 = 32, ++ ZXDH_WQE_SIZE_64 = 64, ++ ZXDH_WQE_SIZE_96 = 96, ++ ZXDH_WQE_SIZE_128 = 128, ++ ZXDH_WQE_SIZE_256 = 256, ++}; ++ ++/** ++ * set_64bit_val - set 64 bit value to hw wqe ++ * @wqe_words: wqe addr to write ++ * @byte_index: index in wqe ++ * @val: value to write ++ **/ ++static inline void set_64bit_val(__le64 *wqe_words, __u32 byte_index, __u64 val) ++{ ++ wqe_words[byte_index >> 3] = htole64(val); ++} ++ ++/** ++ * set_32bit_val - set 32 bit value to hw wqe ++ * @wqe_words: wqe addr to write ++ * @byte_index: index in wqe ++ * @val: value to write ++ **/ ++static inline void set_32bit_val(__le32 *wqe_words, __u32 byte_index, __u32 val) ++{ ++ wqe_words[byte_index >> 2] = htole32(val); ++} ++ ++/** ++ * set_16bit_val - set 16 bit value to hw wqe ++ * @wqe_words: wqe addr to write ++ * @byte_index: index in wqe ++ * @val: value to write ++ **/ ++static inline void set_16bit_val(__le16 *wqe_words, __u32 byte_index, __u16 val) ++{ ++ wqe_words[byte_index >> 1] = htole16(val); ++} ++ ++/** ++ * get_64bit_val - read 64 bit value from wqe ++ * @wqe_words: wqe addr ++ * @byte_index: index to read from ++ * @val: read value ++ **/ ++static inline void get_64bit_val(__le64 *wqe_words, __u32 byte_index, ++ __u64 *val) ++{ ++ *val = le64toh(wqe_words[byte_index >> 3]); ++} ++ ++/** ++ * get_32bit_val - read 32 bit value from wqe ++ * @wqe_words: wqe addr ++ * @byte_index: index to reaad from ++ * @val: return 32 bit value ++ **/ ++static inline void get_32bit_val(__le32 *wqe_words, __u32 byte_index, ++ __u32 *val) ++{ ++ *val = le32toh(wqe_words[byte_index >> 2]); ++} ++ ++static inline void db_wr32(__u32 val, __u32 *wqe_word) ++{ ++ *wqe_word = val; ++} ++ ++#define read_wqe_need_split(pre_cal_psn, next_psn) \ ++ (((pre_cal_psn < next_psn) && (pre_cal_psn != 0)) || \ ++ ((next_psn <= 0x7FFFFF) && (pre_cal_psn > 0x800000))) ++ ++#endif /* ZXDH_DEFS_H */ +diff --git a/providers/zrdma/zxdh_devids.h b/providers/zrdma/zxdh_devids.h +new file mode 100644 +index 0000000..ac23124 +--- /dev/null ++++ b/providers/zrdma/zxdh_devids.h +@@ -0,0 +1,17 @@ ++/* SPDX-License-Identifier: GPL-2.0 or Linux-OpenIB */ ++/* Copyright (c) 2024 ZTE Corporation. All rights reserved. */ ++#ifndef ZXDH_DEVIDS_H ++#define ZXDH_DEVIDS_H ++ ++/* ZXDH VENDOR ID */ ++#define PCI_VENDOR_ID_ZXDH_EVB 0x16c3 ++#define PCI_VENDOR_ID_ZXDH_E312 0x1cf2 ++#define PCI_VENDOR_ID_ZXDH_X512 0x1cf2 ++/* ZXDH Devices ID */ ++#define ZXDH_DEV_ID_ADAPTIVE_EVB_PF 0x8040 /* ZXDH EVB PF DEVICE ID*/ ++#define ZXDH_DEV_ID_ADAPTIVE_EVB_VF 0x8041 /* ZXDH EVB VF DEVICE ID*/ ++#define ZXDH_DEV_ID_ADAPTIVE_E312_PF 0x8049 /* ZXDH E312 PF DEVICE ID*/ ++#define ZXDH_DEV_ID_ADAPTIVE_E312_VF 0x8060 /* ZXDH E312 VF DEVICE ID*/ ++#define ZXDH_DEV_ID_ADAPTIVE_X512_PF 0x806B /* ZXDH X512 PF DEVICE ID*/ ++#define ZXDH_DEV_ID_ADAPTIVE_X512_VF 0x806C /* ZXDH X512 VF DEVICE ID*/ ++#endif /* ZXDH_DEVIDS_H */ +diff --git a/providers/zrdma/zxdh_dv.h b/providers/zrdma/zxdh_dv.h +new file mode 100644 +index 0000000..bb7a845 +--- /dev/null ++++ b/providers/zrdma/zxdh_dv.h +@@ -0,0 +1,75 @@ ++/* SPDX-License-Identifier: GPL-2.0 or Linux-OpenIB */ ++/* Copyright (c) 2024 ZTE Corporation. All rights reserved. */ ++#ifndef _ZXDH_DV_H_ ++#define _ZXDH_DV_H_ ++ ++#include ++#include ++#include /* For the __be64 type */ ++#include ++#include ++#if defined(__SSE3__) ++#include ++#include ++#include ++#endif /* defined(__SSE3__) */ ++ ++#include ++#include ++ ++#ifdef __cplusplus ++extern "C" { ++#endif ++ ++enum switch_status { ++ SWITCH_CLOSE = 0, ++ SWITCH_OPEN = 1, ++ SWITCH_ERROR, ++}; ++ ++enum zxdh_qp_reset_qp_code { ++ ZXDH_RESET_RETRY_TX_ITEM_FLAG = 1, ++}; ++ ++enum zxdh_qp_modify_qpc_mask { ++ ZXDH_RETRY_CQE_SQ_OPCODE = 1 << 0, ++ ZXDH_ERR_FLAG_SET = 1 << 1, ++ ZXDH_PACKAGE_ERR_FLAG = 1 << 2, ++ ZXDH_TX_LAST_ACK_PSN = 1 << 3, ++ ZXDH_TX_LAST_ACK_WQE_OFFSET_SET = 1 << 4, ++ ZXDH_TX_READ_RETRY_FLAG_SET = 1 << 5, ++ ZXDH_TX_RDWQE_PYLD_LENGTH = 1 << 6, ++ ZXDH_TX_RECV_READ_FLAG_SET = 1 << 7, ++ ZXDH_TX_RD_MSG_LOSS_ERR_FLAG_SET = 1 << 8, ++}; ++ ++struct zxdh_rdma_qpc { ++ uint8_t retry_flag; ++ uint8_t rnr_retry_flag; ++ uint8_t read_retry_flag; ++ uint8_t cur_retry_count; ++ uint8_t retry_cqe_sq_opcode; ++ uint8_t err_flag; ++ uint8_t ack_err_flag; ++ uint8_t package_err_flag; ++ uint8_t recv_err_flag; ++ uint32_t tx_last_ack_psn; ++ uint8_t retry_count; ++}; ++ ++int zxdh_get_log_trace_switch(struct ibv_context *context, ++ enum switch_status *status); ++int zxdh_set_log_trace_switch(struct ibv_context *context, ++ enum switch_status status); ++int zxdh_modify_qp_udp_sport(struct ibv_context *context, uint16_t udp_sport, ++ uint32_t qpn); ++int zxdh_query_qpc(struct ibv_qp *qp, struct zxdh_rdma_qpc *qpc); ++int zxdh_modify_qpc(struct ibv_qp *qp, struct zxdh_rdma_qpc *qpc, ++ uint64_t qpc_mask); ++int zxdh_reset_qp(struct ibv_qp *qp, uint64_t opcode); ++ ++#ifdef __cplusplus ++} ++#endif ++ ++#endif +diff --git a/providers/zrdma/zxdh_hw.c b/providers/zrdma/zxdh_hw.c +new file mode 100644 +index 0000000..ed577a9 +--- /dev/null ++++ b/providers/zrdma/zxdh_hw.c +@@ -0,0 +1,2596 @@ ++// SPDX-License-Identifier: GPL-2.0 or Linux-OpenIB ++/* Copyright (c) 2024 ZTE Corporation. All rights reserved. */ ++#include "zxdh_status.h" ++#include "zxdh_defs.h" ++#include "zxdh_verbs.h" ++#include "main.h" ++#include ++#include ++#include "private_verbs_cmd.h" ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#define ERROR_CODE_VALUE 65 ++ ++static void qp_tx_psn_add(__u32 *x, __u32 y, __u16 mtu) ++{ ++ if (y == 0) { ++ *x = (*x + 1) & 0xffffff; ++ return; ++ } ++ *x = (*x + ((y % mtu) ? (y / mtu + 1) : y / mtu)) & 0xffffff; ++} ++ ++int zxdh_get_write_imm_split_switch(void) ++{ ++ char *env; ++ env = getenv("ZXDH_WRITE_IMM_SPILT_ENABLE"); ++ return (env != NULL) ? atoi(env) : 0; ++} ++ ++/** ++ * zxdh_set_fragment - set fragment in wqe ++ * @wqe: wqe for setting fragment ++ * @offset: offset value ++ * @sge: sge length and stag ++ * @valid: The wqe valid ++ */ ++static void zxdh_set_fragment(__le64 *wqe, __u32 offset, struct zxdh_sge *sge, ++ __u8 valid) ++{ ++ if (sge) { ++ set_64bit_val(wqe, offset + 8, ++ FIELD_PREP(ZXDHQPSQ_FRAG_TO, sge->tag_off)); ++ set_64bit_val(wqe, offset, ++ FIELD_PREP(ZXDHQPSQ_VALID, valid) | ++ FIELD_PREP(ZXDHQPSQ_FRAG_LEN, sge->len) | ++ FIELD_PREP(ZXDHQPSQ_FRAG_STAG, ++ sge->stag)); ++ } else { ++ set_64bit_val(wqe, offset + 8, 0); ++ set_64bit_val(wqe, offset, FIELD_PREP(ZXDHQPSQ_VALID, valid)); ++ } ++} ++ ++/** ++ * zxdh_nop_1 - insert a NOP wqe ++ * @qp: hw qp ptr ++ */ ++static enum zxdh_status_code zxdh_nop_1(struct zxdh_qp *qp) ++{ ++ __u64 hdr; ++ __le64 *wqe; ++ __u32 wqe_idx; ++ bool signaled = false; ++ ++ if (!qp->sq_ring.head) ++ return ZXDH_ERR_PARAM; ++ ++ wqe_idx = ZXDH_RING_CURRENT_HEAD(qp->sq_ring); ++ wqe = qp->sq_base[wqe_idx].elem; ++ ++ qp->sq_wrtrk_array[wqe_idx].quanta = ZXDH_QP_WQE_MIN_QUANTA; ++ ++ set_64bit_val(wqe, 8, 0); ++ set_64bit_val(wqe, 16, 0); ++ set_64bit_val(wqe, 24, 0); ++ ++ hdr = FIELD_PREP(ZXDHQPSQ_OPCODE, ZXDH_OP_TYPE_NOP) | ++ FIELD_PREP(ZXDHQPSQ_SIGCOMPL, signaled) | ++ FIELD_PREP(ZXDHQPSQ_VALID, qp->swqe_polarity); ++ ++ /* make sure WQE is written before valid bit is set */ ++ udma_to_device_barrier(); ++ ++ set_64bit_val(wqe, 0, hdr); ++ ++ return 0; ++} ++ ++/** ++ * zxdh_clr_wqes - clear next 128 sq entries ++ * @qp: hw qp ptr ++ * @qp_wqe_idx: wqe_idx ++ */ ++void zxdh_clr_wqes(struct zxdh_qp *qp, __u32 qp_wqe_idx) ++{ ++ __le64 *wqe; ++ __u32 wqe_idx; ++ ++ if (!(qp_wqe_idx & 0x7F)) { ++ wqe_idx = (qp_wqe_idx + 128) % qp->sq_ring.size; ++ wqe = qp->sq_base[wqe_idx].elem; ++ if (wqe_idx) ++ memset(wqe, qp->swqe_polarity ? 0 : 0xFF, 0x1000); ++ else ++ memset(wqe, qp->swqe_polarity ? 0xFF : 0, 0x1000); ++ } ++} ++ ++/** ++ * zxdh_qp_post_wr - ring doorbell ++ * @qp: hw qp ptr ++ */ ++void zxdh_qp_post_wr(struct zxdh_qp *qp) ++{ ++ /* valid bit is written before ringing doorbell */ ++ udma_to_device_barrier(); ++ ++ db_wr32(qp->qp_id, qp->wqe_alloc_db); ++ qp->initial_ring.head = qp->sq_ring.head; ++} ++ ++/** ++ * zxdh_qp_set_shadow_area - fill SW_RQ_Head ++ * @qp: hw qp ptr ++ */ ++void zxdh_qp_set_shadow_area(struct zxdh_qp *qp) ++{ ++ __u8 polarity = 0; ++ ++ polarity = ((ZXDH_RING_CURRENT_HEAD(qp->rq_ring) == 0) ? ++ !qp->rwqe_polarity : ++ qp->rwqe_polarity); ++ set_64bit_val(qp->shadow_area, 0, ++ FIELD_PREP(ZXDHQPDBSA_RQ_POLARITY, polarity) | ++ FIELD_PREP(ZXDHQPDBSA_RQ_SW_HEAD, ++ ZXDH_RING_CURRENT_HEAD(qp->rq_ring))); ++} ++ ++/** ++ * zxdh_qp_ring_push_db - ring qp doorbell ++ * @qp: hw qp ptr ++ * @wqe_idx: wqe index ++ */ ++static void zxdh_qp_ring_push_db(struct zxdh_qp *qp, __u32 wqe_idx) ++{ ++ set_32bit_val(qp->push_db, 0, ++ FIELD_PREP(ZXDH_WQEALLOC_WQE_DESC_INDEX, wqe_idx >> 3) | ++ qp->qp_id); ++ qp->initial_ring.head = qp->sq_ring.head; ++ qp->push_mode = true; ++ qp->push_dropped = false; ++} ++ ++void zxdh_qp_push_wqe(struct zxdh_qp *qp, __le64 *wqe, __u16 quanta, ++ __u32 wqe_idx, bool post_sq) ++{ ++ __le64 *push; ++ ++ if (ZXDH_RING_CURRENT_HEAD(qp->initial_ring) != ++ ZXDH_RING_CURRENT_TAIL(qp->sq_ring) && ++ !qp->push_mode) { ++ if (post_sq) ++ zxdh_qp_post_wr(qp); ++ } else { ++ push = (__le64 *)((uintptr_t)qp->push_wqe + ++ (wqe_idx & 0x7) * 0x20); ++ memcpy(push, wqe, quanta * ZXDH_QP_WQE_MIN_SIZE); ++ zxdh_qp_ring_push_db(qp, wqe_idx); ++ } ++} ++ ++/** ++ * zxdh_qp_get_next_send_wqe - pad with NOP if needed, return where next WR should go ++ * @qp: hw qp ptr ++ * @wqe_idx: return wqe index ++ * @quanta: size of WR in quanta ++ * @total_size: size of WR in bytes ++ * @info: info on WR ++ */ ++__le64 *zxdh_qp_get_next_send_wqe(struct zxdh_qp *qp, __u32 *wqe_idx, ++ __u16 quanta, __u32 total_size, ++ struct zxdh_post_sq_info *info) ++{ ++ __le64 *wqe; ++ __u16 avail_quanta; ++ __u16 i; ++ ++ avail_quanta = ZXDH_MAX_SQ_WQES_PER_PAGE - ++ (ZXDH_RING_CURRENT_HEAD(qp->sq_ring) % ++ ZXDH_MAX_SQ_WQES_PER_PAGE); ++ if (quanta <= avail_quanta) { ++ /* WR fits in current chunk */ ++ if (quanta > ZXDH_SQ_RING_FREE_QUANTA(qp->sq_ring)) ++ return NULL; ++ } else { ++ /* Need to pad with NOP */ ++ if (quanta + avail_quanta > ++ ZXDH_SQ_RING_FREE_QUANTA(qp->sq_ring)) ++ return NULL; ++ ++ for (i = 0; i < avail_quanta; i++) { ++ zxdh_nop_1(qp); ++ ZXDH_RING_MOVE_HEAD_NOCHECK(qp->sq_ring); ++ } ++ } ++ ++ *wqe_idx = ZXDH_RING_CURRENT_HEAD(qp->sq_ring); ++ if (!*wqe_idx) ++ qp->swqe_polarity = !qp->swqe_polarity; ++ ++ ZXDH_RING_MOVE_HEAD_BY_COUNT_NOCHECK(qp->sq_ring, quanta); ++ ++ wqe = qp->sq_base[*wqe_idx].elem; ++ qp->sq_wrtrk_array[*wqe_idx].wrid = info->wr_id; ++ qp->sq_wrtrk_array[*wqe_idx].wr_len = total_size; ++ qp->sq_wrtrk_array[*wqe_idx].quanta = quanta; ++ ++ return wqe; ++} ++ ++/** ++ * zxdh_qp_get_next_recv_wqe - get next qp's rcv wqe ++ * @qp: hw qp ptr ++ * @wqe_idx: return wqe index ++ */ ++__le64 *zxdh_qp_get_next_recv_wqe(struct zxdh_qp *qp, __u32 *wqe_idx) ++{ ++ __le64 *wqe; ++ enum zxdh_status_code ret_code; ++ ++ if (ZXDH_RING_FULL_ERR(qp->rq_ring)) ++ return NULL; ++ ++ ZXDH_ATOMIC_RING_MOVE_HEAD(qp->rq_ring, *wqe_idx, ret_code); ++ if (ret_code) ++ return NULL; ++ ++ if (!*wqe_idx) ++ qp->rwqe_polarity = !qp->rwqe_polarity; ++ /* rq_wqe_size_multiplier is no of 16 byte quanta in one rq wqe */ ++ wqe = qp->rq_base[*wqe_idx * qp->rq_wqe_size_multiplier].elem; ++ ++ return wqe; ++} ++ ++static enum zxdh_status_code ++zxdh_post_rdma_write(struct zxdh_qp *qp, struct zxdh_post_sq_info *info, ++ bool post_sq, __u32 total_size) ++{ ++ enum zxdh_status_code ret_code; ++ struct zxdh_rdma_write *op_info; ++ __u32 i, byte_off = 0; ++ __u32 frag_cnt, addl_frag_cnt; ++ __le64 *wqe; ++ __u32 wqe_idx; ++ __u16 quanta; ++ __u64 hdr; ++ bool read_fence = false; ++ bool imm_data_flag; ++ ++ op_info = &info->op.rdma_write; ++ imm_data_flag = info->imm_data_valid ? 1 : 0; ++ read_fence |= info->read_fence; ++ ++ if (imm_data_flag) ++ frag_cnt = ++ op_info->num_lo_sges ? (op_info->num_lo_sges + 1) : 2; ++ else ++ frag_cnt = op_info->num_lo_sges; ++ addl_frag_cnt = ++ op_info->num_lo_sges > 1 ? (op_info->num_lo_sges - 1) : 0; ++ ++ ret_code = zxdh_fragcnt_to_quanta_sq(frag_cnt, &quanta); ++ if (ret_code) ++ return ret_code; ++ ++ wqe = zxdh_qp_get_next_send_wqe(qp, &wqe_idx, quanta, total_size, info); ++ if (!wqe) ++ return ZXDH_ERR_QP_TOOMANY_WRS_POSTED; ++ ++ zxdh_clr_wqes(qp, wqe_idx); ++ ++ if (op_info->num_lo_sges) { ++ set_64bit_val( ++ wqe, 16, ++ FIELD_PREP(ZXDHQPSQ_FIRST_FRAG_VALID, ++ op_info->lo_sg_list->len == ++ ZXDH_MAX_SQ_PAYLOAD_SIZE ? ++ 1 : ++ 0) | ++ FIELD_PREP(ZXDHQPSQ_FIRST_FRAG_LEN, ++ op_info->lo_sg_list->len) | ++ FIELD_PREP(ZXDHQPSQ_FIRST_FRAG_STAG, ++ op_info->lo_sg_list->stag)); ++ set_64bit_val(wqe, 8, ++ FIELD_PREP(ZXDHQPSQ_FRAG_TO, ++ op_info->lo_sg_list->tag_off)); ++ } else { ++ /*if zero sge,post a special sge with zero lenth*/ ++ set_64bit_val(wqe, 16, ++ FIELD_PREP(ZXDHQPSQ_FIRST_FRAG_VALID, 0) | ++ FIELD_PREP(ZXDHQPSQ_FIRST_FRAG_LEN, 0) | ++ FIELD_PREP(ZXDHQPSQ_FIRST_FRAG_STAG, ++ 0x100)); ++ set_64bit_val(wqe, 8, FIELD_PREP(ZXDHQPSQ_FRAG_TO, 0)); ++ } ++ ++ if (imm_data_flag) { ++ byte_off = ZXDH_SQ_WQE_BYTESIZE + ZXDH_QP_FRAG_BYTESIZE; ++ if (op_info->num_lo_sges > 1) { ++ qp->wqe_ops.iw_set_fragment(wqe, byte_off, ++ &op_info->lo_sg_list[1], ++ qp->swqe_polarity); ++ byte_off += ZXDH_QP_FRAG_BYTESIZE; ++ } ++ set_64bit_val( ++ wqe, ZXDH_SQ_WQE_BYTESIZE, ++ FIELD_PREP(ZXDHQPSQ_IMMDATA_VALID, qp->swqe_polarity) | ++ FIELD_PREP(ZXDHQPSQ_IMMDATA, info->imm_data)); ++ i = 2; ++ if (i < op_info->num_lo_sges) { ++ for (byte_off = ZXDH_SQ_WQE_BYTESIZE + ++ 2 * ZXDH_QP_FRAG_BYTESIZE; ++ i < op_info->num_lo_sges; i += 2) { ++ if (i == addl_frag_cnt) { ++ qp->wqe_ops.iw_set_fragment( ++ wqe, byte_off, ++ &op_info->lo_sg_list[i], ++ qp->swqe_polarity); ++ byte_off += ZXDH_QP_FRAG_BYTESIZE; ++ break; ++ } ++ byte_off += ZXDH_QP_FRAG_BYTESIZE; ++ qp->wqe_ops.iw_set_fragment( ++ wqe, byte_off, ++ &op_info->lo_sg_list[i + 1], ++ qp->swqe_polarity); ++ byte_off -= ZXDH_QP_FRAG_BYTESIZE; ++ qp->wqe_ops.iw_set_fragment( ++ wqe, byte_off, &op_info->lo_sg_list[i], ++ qp->swqe_polarity); ++ byte_off += 2 * ZXDH_QP_FRAG_BYTESIZE; ++ } ++ } ++ } else { ++ i = 1; ++ for (byte_off = ZXDH_SQ_WQE_BYTESIZE; i < op_info->num_lo_sges; ++ i += 2) { ++ if (i == addl_frag_cnt) { ++ qp->wqe_ops.iw_set_fragment( ++ wqe, byte_off, &op_info->lo_sg_list[i], ++ qp->swqe_polarity); ++ byte_off += ZXDH_QP_FRAG_BYTESIZE; ++ break; ++ } ++ byte_off += ZXDH_QP_FRAG_BYTESIZE; ++ qp->wqe_ops.iw_set_fragment(wqe, byte_off, ++ &op_info->lo_sg_list[i + 1], ++ qp->swqe_polarity); ++ byte_off -= ZXDH_QP_FRAG_BYTESIZE; ++ qp->wqe_ops.iw_set_fragment(wqe, byte_off, ++ &op_info->lo_sg_list[i], ++ qp->swqe_polarity); ++ byte_off += 2 * ZXDH_QP_FRAG_BYTESIZE; ++ } ++ } ++ /* if not an odd number set valid bit in next fragment */ ++ if (!(frag_cnt & 0x01) && frag_cnt) { ++ qp->wqe_ops.iw_set_fragment(wqe, byte_off, NULL, ++ qp->swqe_polarity); ++ } ++ ++ hdr = FIELD_PREP(ZXDHQPSQ_VALID, qp->swqe_polarity) | ++ FIELD_PREP(ZXDHQPSQ_OPCODE, info->op_type) | ++ FIELD_PREP(ZXDHQPSQ_SIGCOMPL, info->signaled) | ++ FIELD_PREP(ZXDHQPSQ_LOCALFENCE, info->local_fence) | ++ FIELD_PREP(ZXDHQPSQ_READFENCE, read_fence) | ++ FIELD_PREP(ZXDHQPSQ_SOLICITED, info->solicited) | ++ FIELD_PREP(ZXDHQPSQ_IMMDATAFLAG, imm_data_flag) | ++ FIELD_PREP(ZXDHQPSQ_ADDFRAGCNT, addl_frag_cnt) | ++ FIELD_PREP(ZXDHQPSQ_REMSTAG, op_info->rem_addr.stag); ++ set_64bit_val(wqe, 24, ++ FIELD_PREP(ZXDHQPSQ_FRAG_TO, op_info->rem_addr.tag_off)); ++ ++ udma_to_device_barrier(); /* make sure WQE is populated before valid bit is set */ ++ ++ set_64bit_val(wqe, 0, hdr); ++ if (post_sq) ++ zxdh_qp_post_wr(qp); ++ qp_tx_psn_add(&qp->next_psn, total_size, qp->mtu); ++ return 0; ++} ++ ++static void split_write_imm_wqe(struct zxdh_qp *qp, ++ struct zxdh_post_sq_info *info, ++ struct zxdh_post_sq_info *split_part1_info, ++ struct zxdh_post_sq_info *split_part2_info) ++{ ++ __u32 total_size = 0; ++ struct zxdh_rdma_write *op_info; ++ ++ op_info = &info->op.rdma_write; ++ total_size = op_info->rem_addr.len; ++ split_part1_info->op.rdma_write.lo_sg_list = ++ info->op.rdma_write.lo_sg_list; ++ split_part2_info->op.rdma_write.lo_sg_list = NULL; ++ ++ split_part1_info->op_type = ZXDH_OP_TYPE_WRITE; ++ split_part1_info->signaled = false; ++ split_part1_info->local_fence = info->local_fence; ++ split_part1_info->read_fence = info->read_fence; ++ split_part1_info->solicited = info->solicited; ++ split_part1_info->imm_data_valid = false; ++ split_part1_info->wr_id = info->wr_id; ++ split_part1_info->op.rdma_write.num_lo_sges = ++ info->op.rdma_write.num_lo_sges; ++ split_part1_info->op.rdma_write.rem_addr.stag = op_info->rem_addr.stag; ++ split_part1_info->op.rdma_write.rem_addr.tag_off = ++ op_info->rem_addr.tag_off; ++ ++ split_part2_info->op_type = info->op_type; ++ split_part2_info->signaled = info->signaled; ++ split_part2_info->local_fence = info->local_fence; ++ split_part2_info->read_fence = info->read_fence; ++ split_part2_info->solicited = info->solicited; ++ split_part2_info->imm_data_valid = info->imm_data_valid; ++ split_part2_info->wr_id = info->wr_id; ++ split_part2_info->imm_data = info->imm_data; ++ split_part2_info->op.rdma_write.num_lo_sges = 0; ++ split_part2_info->op.rdma_write.rem_addr.stag = op_info->rem_addr.stag; ++ split_part2_info->op.rdma_write.rem_addr.tag_off = ++ op_info->rem_addr.tag_off + total_size; ++} ++ ++/** ++ * zxdh_rdma_write - rdma write operation ++ * @qp: hw qp ptr ++ * @info: post sq information ++ * @post_sq: flag to post sq ++ */ ++enum zxdh_status_code zxdh_rdma_write(struct zxdh_qp *qp, ++ struct zxdh_post_sq_info *info, ++ bool post_sq) ++{ ++ struct zxdh_post_sq_info split_part1_info = { 0 }; ++ struct zxdh_post_sq_info split_part2_info = { 0 }; ++ struct zxdh_rdma_write *op_info; ++ struct zxdh_uqp *iwuqp; ++ struct zxdh_uvcontext *iwvctx; ++ __u32 i; ++ __u32 total_size = 0; ++ enum zxdh_status_code ret_code; ++ bool imm_data_flag = info->imm_data_valid ? 1 : 0; ++ iwuqp = container_of(qp, struct zxdh_uqp, qp); ++ iwvctx = container_of(iwuqp->vqp.qp.context, struct zxdh_uvcontext, ++ ibv_ctx.context); ++ op_info = &info->op.rdma_write; ++ if (op_info->num_lo_sges > qp->max_sq_frag_cnt) ++ return ZXDH_ERR_INVALID_FRAG_COUNT; ++ ++ for (i = 0; i < op_info->num_lo_sges; i++) { ++ total_size += op_info->lo_sg_list[i].len; ++ if (0 != i && 0 == op_info->lo_sg_list[i].len) ++ return ZXDH_ERR_INVALID_FRAG_LEN; ++ } ++ ++ if (total_size > ZXDH_MAX_SQ_PAYLOAD_SIZE) ++ return ZXDH_ERR_QP_INVALID_MSG_SIZE; ++ ++ op_info->rem_addr.len = total_size; ++ if (iwvctx->zxdh_write_imm_split_switch == 0) { ++ ret_code = zxdh_post_rdma_write(qp, info, post_sq, total_size); ++ if (ret_code) ++ return ret_code; ++ } else { ++ if (imm_data_flag && total_size > qp->mtu) { ++ split_write_imm_wqe(qp, info, &split_part1_info, ++ &split_part2_info); ++ ++ ret_code = zxdh_post_rdma_write(qp, &split_part1_info, ++ post_sq, total_size); ++ if (ret_code) ++ return ret_code; ++ ret_code = zxdh_post_rdma_write(qp, &split_part2_info, ++ post_sq, 0); ++ if (ret_code) ++ return ret_code; ++ } else { ++ ret_code = zxdh_post_rdma_write(qp, info, post_sq, ++ total_size); ++ if (ret_code) ++ return ret_code; ++ } ++ } ++ ++ return 0; ++} ++ ++static void split_two_part_info(struct zxdh_qp *qp, ++ struct zxdh_post_sq_info *info, __u32 ori_psn, ++ __u32 pre_cal_psn, ++ struct zxdh_post_sq_info *split_part1_info, ++ struct zxdh_post_sq_info *split_part2_info) ++{ ++ __u32 total_size = 0; ++ __u32 remain_size = 0; ++ __u32 split_size = 0; ++ struct zxdh_rdma_read *op_info; ++ ++ op_info = &info->op.rdma_read; ++ total_size = op_info->rem_addr.len; ++ split_part1_info->op.rdma_read.lo_sg_list = qp->split_sg_list; ++ split_part2_info->op.rdma_read.lo_sg_list = ++ qp->split_sg_list + op_info->num_lo_sges; ++ ++ memset(split_part1_info->op.rdma_read.lo_sg_list, 0, ++ 2 * op_info->num_lo_sges * sizeof(struct zxdh_sge)); ++ if (pre_cal_psn < ori_psn && pre_cal_psn != 0) ++ remain_size = (0xffffff - ori_psn + 1) * qp->mtu; ++ else ++ remain_size = (0x800000 - ori_psn) * qp->mtu; ++ ++ split_size = total_size - remain_size; ++ ++ split_part1_info->signaled = false; ++ split_part1_info->local_fence = info->local_fence; ++ split_part1_info->read_fence = info->read_fence; ++ split_part1_info->solicited = false; ++ split_part1_info->wr_id = info->wr_id; ++ split_part1_info->op.rdma_read.rem_addr.stag = op_info->rem_addr.stag; ++ split_part1_info->op.rdma_read.rem_addr.tag_off = ++ op_info->rem_addr.tag_off; ++ ++ split_part2_info->signaled = info->signaled; ++ split_part2_info->local_fence = info->local_fence; ++ split_part2_info->read_fence = info->read_fence; ++ split_part2_info->solicited = info->solicited; ++ split_part2_info->wr_id = info->wr_id; ++ split_part2_info->op.rdma_read.rem_addr.stag = op_info->rem_addr.stag; ++ split_part2_info->op.rdma_read.rem_addr.tag_off = ++ op_info->rem_addr.tag_off + remain_size; ++ ++ for (int i = 0; i < op_info->num_lo_sges; i++) { ++ if (op_info->lo_sg_list[i].len + ++ split_part1_info->op.rdma_read.rem_addr.len < ++ remain_size) { ++ split_part1_info->op.rdma_read.rem_addr.len += ++ op_info->lo_sg_list[i].len; ++ split_part1_info->op.rdma_read.num_lo_sges += 1; ++ memcpy(split_part1_info->op.rdma_read.lo_sg_list + i, ++ op_info->lo_sg_list + i, ++ sizeof(struct zxdh_sge)); ++ continue; ++ } else if (op_info->lo_sg_list[i].len + ++ split_part1_info->op.rdma_read.rem_addr.len == ++ remain_size) { ++ split_part1_info->op.rdma_read.rem_addr.len += ++ op_info->lo_sg_list[i].len; ++ split_part1_info->op.rdma_read.num_lo_sges += 1; ++ memcpy(split_part1_info->op.rdma_read.lo_sg_list + i, ++ op_info->lo_sg_list + i, ++ sizeof(struct zxdh_sge)); ++ split_part2_info->op.rdma_read.rem_addr.len = ++ split_size; ++ split_part2_info->op.rdma_read.num_lo_sges = ++ op_info->num_lo_sges - ++ split_part1_info->op.rdma_read.num_lo_sges; ++ memcpy(split_part2_info->op.rdma_read.lo_sg_list, ++ op_info->lo_sg_list + i + 1, ++ split_part2_info->op.rdma_read.num_lo_sges * ++ sizeof(struct zxdh_sge)); ++ break; ++ } ++ ++ split_part1_info->op.rdma_read.lo_sg_list[i].len = ++ remain_size - ++ split_part1_info->op.rdma_read.rem_addr.len; ++ split_part1_info->op.rdma_read.lo_sg_list[i].tag_off = ++ op_info->lo_sg_list[i].tag_off; ++ split_part1_info->op.rdma_read.lo_sg_list[i].stag = ++ op_info->lo_sg_list[i].stag; ++ split_part1_info->op.rdma_read.rem_addr.len = remain_size; ++ split_part1_info->op.rdma_read.num_lo_sges += 1; ++ split_part2_info->op.rdma_read.lo_sg_list[0].len = ++ op_info->lo_sg_list[i].len - ++ split_part1_info->op.rdma_read.lo_sg_list[i].len; ++ split_part2_info->op.rdma_read.lo_sg_list[0].tag_off = ++ op_info->lo_sg_list[i].tag_off + ++ split_part1_info->op.rdma_read.lo_sg_list[i].len; ++ split_part2_info->op.rdma_read.lo_sg_list[0].stag = ++ op_info->lo_sg_list[i].stag; ++ split_part2_info->op.rdma_read.rem_addr.len = split_size; ++ split_part2_info->op.rdma_read.num_lo_sges = ++ op_info->num_lo_sges - ++ split_part1_info->op.rdma_read.num_lo_sges + 1; ++ if (split_part2_info->op.rdma_read.num_lo_sges - 1 > 0) { ++ memcpy(split_part2_info->op.rdma_read.lo_sg_list + 1, ++ op_info->lo_sg_list + i + 1, ++ (split_part2_info->op.rdma_read.num_lo_sges - ++ 1) * sizeof(struct zxdh_sge)); ++ } ++ break; ++ } ++} ++ ++static enum zxdh_status_code zxdh_post_rdma_read(struct zxdh_qp *qp, ++ struct zxdh_post_sq_info *info, ++ bool post_sq, __u32 total_size) ++{ ++ enum zxdh_status_code ret_code; ++ struct zxdh_rdma_read *op_info; ++ __u32 i, byte_off = 0; ++ bool local_fence = false; ++ __u32 addl_frag_cnt; ++ __le64 *wqe; ++ __u32 wqe_idx; ++ __u16 quanta; ++ __u64 hdr; ++ ++ op_info = &info->op.rdma_read; ++ ret_code = zxdh_fragcnt_to_quanta_sq(op_info->num_lo_sges, &quanta); ++ if (ret_code) ++ return ret_code; ++ ++ wqe = zxdh_qp_get_next_send_wqe(qp, &wqe_idx, quanta, total_size, info); ++ if (!wqe) ++ return ZXDH_ERR_QP_TOOMANY_WRS_POSTED; ++ ++ zxdh_clr_wqes(qp, wqe_idx); ++ ++ addl_frag_cnt = ++ op_info->num_lo_sges > 1 ? (op_info->num_lo_sges - 1) : 0; ++ local_fence |= info->local_fence; ++ ++ if (op_info->num_lo_sges) { ++ set_64bit_val( ++ wqe, 16, ++ FIELD_PREP(ZXDHQPSQ_FIRST_FRAG_VALID, ++ op_info->lo_sg_list->len == ++ ZXDH_MAX_SQ_PAYLOAD_SIZE ? ++ 1 : ++ 0) | ++ FIELD_PREP(ZXDHQPSQ_FIRST_FRAG_LEN, ++ op_info->lo_sg_list->len) | ++ FIELD_PREP(ZXDHQPSQ_FIRST_FRAG_STAG, ++ op_info->lo_sg_list->stag)); ++ set_64bit_val(wqe, 8, ++ FIELD_PREP(ZXDHQPSQ_FRAG_TO, ++ op_info->lo_sg_list->tag_off)); ++ } else { ++ /*if zero sge,post a special sge with zero lenth*/ ++ set_64bit_val(wqe, 16, ++ FIELD_PREP(ZXDHQPSQ_FIRST_FRAG_VALID, 0) | ++ FIELD_PREP(ZXDHQPSQ_FIRST_FRAG_LEN, 0) | ++ FIELD_PREP(ZXDHQPSQ_FIRST_FRAG_STAG, ++ 0x100)); ++ set_64bit_val(wqe, 8, FIELD_PREP(ZXDHQPSQ_FRAG_TO, 0)); ++ } ++ ++ i = 1; ++ for (byte_off = ZXDH_SQ_WQE_BYTESIZE; i < op_info->num_lo_sges; ++ i += 2) { ++ if (i == addl_frag_cnt) { ++ qp->wqe_ops.iw_set_fragment(wqe, byte_off, ++ &op_info->lo_sg_list[i], ++ qp->swqe_polarity); ++ byte_off += ZXDH_QP_FRAG_BYTESIZE; ++ break; ++ } ++ byte_off += ZXDH_QP_FRAG_BYTESIZE; ++ qp->wqe_ops.iw_set_fragment(wqe, byte_off, ++ &op_info->lo_sg_list[i + 1], ++ qp->swqe_polarity); ++ byte_off -= ZXDH_QP_FRAG_BYTESIZE; ++ qp->wqe_ops.iw_set_fragment(wqe, byte_off, ++ &op_info->lo_sg_list[i], ++ qp->swqe_polarity); ++ byte_off += 2 * ZXDH_QP_FRAG_BYTESIZE; ++ } ++ ++ /* if not an odd number set valid bit in next fragment */ ++ if (!(op_info->num_lo_sges & 0x01) && op_info->num_lo_sges) { ++ qp->wqe_ops.iw_set_fragment(wqe, byte_off, NULL, ++ qp->swqe_polarity); ++ } ++ ++ hdr = FIELD_PREP(ZXDHQPSQ_VALID, qp->swqe_polarity) | ++ FIELD_PREP(ZXDHQPSQ_OPCODE, ZXDH_OP_TYPE_READ) | ++ FIELD_PREP(ZXDHQPSQ_SIGCOMPL, info->signaled) | ++ FIELD_PREP(ZXDHQPSQ_LOCALFENCE, info->local_fence) | ++ FIELD_PREP(ZXDHQPSQ_READFENCE, info->read_fence) | ++ FIELD_PREP(ZXDHQPSQ_SOLICITED, info->solicited) | ++ FIELD_PREP(ZXDHQPSQ_ADDFRAGCNT, addl_frag_cnt) | ++ FIELD_PREP(ZXDHQPSQ_REMSTAG, op_info->rem_addr.stag); ++ set_64bit_val(wqe, 24, ++ FIELD_PREP(ZXDHQPSQ_FRAG_TO, op_info->rem_addr.tag_off)); ++ ++ udma_to_device_barrier(); /* make sure WQE is populated before valid bit is set */ ++ ++ set_64bit_val(wqe, 0, hdr); ++ if (post_sq) ++ zxdh_qp_post_wr(qp); ++ return 0; ++} ++ ++/** ++ * zxdh_rdma_read - rdma read command ++ * @qp: hw qp ptr ++ * @info: post sq information ++ * @inv_stag: flag for inv_stag ++ * @post_sq: flag to post sq ++ */ ++enum zxdh_status_code zxdh_rdma_read(struct zxdh_qp *qp, ++ struct zxdh_post_sq_info *info, ++ bool inv_stag, bool post_sq) ++{ ++ struct zxdh_post_sq_info split_part1_info = { 0 }; ++ struct zxdh_post_sq_info split_part2_info = { 0 }; ++ struct zxdh_rdma_read *op_info; ++ enum zxdh_status_code ret_code; ++ __u32 i, total_size = 0, pre_cal_psn = 0; ++ ++ op_info = &info->op.rdma_read; ++ if (qp->max_sq_frag_cnt < op_info->num_lo_sges) ++ return ZXDH_ERR_INVALID_FRAG_COUNT; ++ ++ for (i = 0; i < op_info->num_lo_sges; i++) { ++ total_size += op_info->lo_sg_list[i].len; ++ if (0 != i && 0 == op_info->lo_sg_list[i].len) ++ return ZXDH_ERR_INVALID_FRAG_LEN; ++ } ++ ++ if (total_size > ZXDH_MAX_SQ_PAYLOAD_SIZE) ++ return ZXDH_ERR_QP_INVALID_MSG_SIZE; ++ op_info->rem_addr.len = total_size; ++ pre_cal_psn = qp->next_psn; ++ qp_tx_psn_add(&pre_cal_psn, total_size, qp->mtu); ++ if (read_wqe_need_split(pre_cal_psn, qp->next_psn)) { ++ split_two_part_info(qp, info, qp->next_psn, pre_cal_psn, ++ &split_part1_info, &split_part2_info); ++ ret_code = zxdh_post_rdma_read(qp, &split_part1_info, post_sq, ++ total_size); ++ if (ret_code) ++ return ret_code; ++ ++ qp_tx_psn_add(&qp->next_psn, ++ split_part1_info.op.rdma_read.rem_addr.len, ++ qp->mtu); ++ ret_code = zxdh_post_rdma_read(qp, &split_part2_info, post_sq, ++ total_size); ++ if (ret_code) ++ return ret_code; ++ ++ qp_tx_psn_add(&qp->next_psn, ++ split_part2_info.op.rdma_read.rem_addr.len, ++ qp->mtu); ++ } else { ++ ret_code = zxdh_post_rdma_read(qp, info, post_sq, total_size); ++ if (ret_code) ++ return ret_code; ++ ++ qp_tx_psn_add(&qp->next_psn, total_size, qp->mtu); ++ } ++ return 0; ++} ++ ++/** ++ * zxdh_rc_send - rdma send command ++ * @qp: hw qp ptr ++ * @info: post sq information ++ * @post_sq: flag to post sq ++ */ ++enum zxdh_status_code zxdh_rc_send(struct zxdh_qp *qp, ++ struct zxdh_post_sq_info *info, bool post_sq) ++{ ++ __le64 *wqe; ++ struct zxdh_post_send *op_info; ++ __u64 hdr; ++ __u32 i, wqe_idx, total_size = 0, byte_off; ++ enum zxdh_status_code ret_code; ++ __u32 frag_cnt, addl_frag_cnt; ++ bool read_fence = false; ++ __u16 quanta; ++ bool imm_data_flag = info->imm_data_valid ? 1 : 0; ++ ++ op_info = &info->op.send; ++ if (qp->max_sq_frag_cnt < op_info->num_sges) ++ return ZXDH_ERR_INVALID_FRAG_COUNT; ++ ++ for (i = 0; i < op_info->num_sges; i++) { ++ total_size += op_info->sg_list[i].len; ++ if (0 != i && 0 == op_info->sg_list[i].len) ++ return ZXDH_ERR_INVALID_FRAG_LEN; ++ } ++ ++ if (total_size > ZXDH_MAX_SQ_PAYLOAD_SIZE) ++ return ZXDH_ERR_QP_INVALID_MSG_SIZE; ++ ++ if (imm_data_flag) ++ frag_cnt = op_info->num_sges ? (op_info->num_sges + 1) : 2; ++ else ++ frag_cnt = op_info->num_sges; ++ ret_code = zxdh_fragcnt_to_quanta_sq(frag_cnt, &quanta); ++ if (ret_code) ++ return ret_code; ++ ++ wqe = zxdh_qp_get_next_send_wqe(qp, &wqe_idx, quanta, total_size, info); ++ if (!wqe) ++ return ZXDH_ERR_QP_TOOMANY_WRS_POSTED; ++ ++ zxdh_clr_wqes(qp, wqe_idx); ++ ++ read_fence |= info->read_fence; ++ addl_frag_cnt = op_info->num_sges > 1 ? (op_info->num_sges - 1) : 0; ++ if (op_info->num_sges) { ++ set_64bit_val( ++ wqe, 16, ++ FIELD_PREP(ZXDHQPSQ_FIRST_FRAG_VALID, ++ op_info->sg_list->len == ++ ZXDH_MAX_SQ_PAYLOAD_SIZE ? ++ 1 : ++ 0) | ++ FIELD_PREP(ZXDHQPSQ_FIRST_FRAG_LEN, ++ op_info->sg_list->len) | ++ FIELD_PREP(ZXDHQPSQ_FIRST_FRAG_STAG, ++ op_info->sg_list->stag)); ++ set_64bit_val(wqe, 8, ++ FIELD_PREP(ZXDHQPSQ_FRAG_TO, ++ op_info->sg_list->tag_off)); ++ } else { ++ /*if zero sge,post a special sge with zero lenth*/ ++ set_64bit_val(wqe, 16, ++ FIELD_PREP(ZXDHQPSQ_FIRST_FRAG_VALID, 0) | ++ FIELD_PREP(ZXDHQPSQ_FIRST_FRAG_LEN, 0) | ++ FIELD_PREP(ZXDHQPSQ_FIRST_FRAG_STAG, ++ 0x100)); ++ set_64bit_val(wqe, 8, FIELD_PREP(ZXDHQPSQ_FRAG_TO, 0)); ++ } ++ ++ if (imm_data_flag) { ++ byte_off = ZXDH_SQ_WQE_BYTESIZE + ZXDH_QP_FRAG_BYTESIZE; ++ if (op_info->num_sges > 1) { ++ qp->wqe_ops.iw_set_fragment(wqe, byte_off, ++ &op_info->sg_list[1], ++ qp->swqe_polarity); ++ byte_off += ZXDH_QP_FRAG_BYTESIZE; ++ } ++ set_64bit_val( ++ wqe, ZXDH_SQ_WQE_BYTESIZE, ++ FIELD_PREP(ZXDHQPSQ_IMMDATA_VALID, qp->swqe_polarity) | ++ FIELD_PREP(ZXDHQPSQ_IMMDATA, info->imm_data)); ++ i = 2; ++ if (i < op_info->num_sges) { ++ for (byte_off = ZXDH_SQ_WQE_BYTESIZE + ++ 2 * ZXDH_QP_FRAG_BYTESIZE; ++ i < op_info->num_sges; i += 2) { ++ if (i == addl_frag_cnt) { ++ qp->wqe_ops.iw_set_fragment( ++ wqe, byte_off, ++ &op_info->sg_list[i], ++ qp->swqe_polarity); ++ byte_off += ZXDH_QP_FRAG_BYTESIZE; ++ break; ++ } ++ byte_off += ZXDH_QP_FRAG_BYTESIZE; ++ qp->wqe_ops.iw_set_fragment( ++ wqe, byte_off, &op_info->sg_list[i + 1], ++ qp->swqe_polarity); ++ byte_off -= ZXDH_QP_FRAG_BYTESIZE; ++ qp->wqe_ops.iw_set_fragment( ++ wqe, byte_off, &op_info->sg_list[i], ++ qp->swqe_polarity); ++ byte_off += 2 * ZXDH_QP_FRAG_BYTESIZE; ++ } ++ } ++ } else { ++ i = 1; ++ for (byte_off = ZXDH_SQ_WQE_BYTESIZE; i < op_info->num_sges; ++ i += 2) { ++ if (i == addl_frag_cnt) { ++ qp->wqe_ops.iw_set_fragment( ++ wqe, byte_off, &op_info->sg_list[i], ++ qp->swqe_polarity); ++ byte_off += ZXDH_QP_FRAG_BYTESIZE; ++ break; ++ } ++ byte_off += ZXDH_QP_FRAG_BYTESIZE; ++ qp->wqe_ops.iw_set_fragment(wqe, byte_off, ++ &op_info->sg_list[i + 1], ++ qp->swqe_polarity); ++ byte_off -= ZXDH_QP_FRAG_BYTESIZE; ++ qp->wqe_ops.iw_set_fragment(wqe, byte_off, ++ &op_info->sg_list[i], ++ qp->swqe_polarity); ++ byte_off += 2 * ZXDH_QP_FRAG_BYTESIZE; ++ } ++ } ++ ++ /* if not an odd number set valid bit in next fragment */ ++ if (!(frag_cnt & 0x01) && frag_cnt) { ++ qp->wqe_ops.iw_set_fragment(wqe, byte_off, NULL, ++ qp->swqe_polarity); ++ } ++ ++ hdr = FIELD_PREP(ZXDHQPSQ_VALID, qp->swqe_polarity) | ++ FIELD_PREP(ZXDHQPSQ_OPCODE, info->op_type) | ++ FIELD_PREP(ZXDHQPSQ_SIGCOMPL, info->signaled) | ++ FIELD_PREP(ZXDHQPSQ_LOCALFENCE, info->local_fence) | ++ FIELD_PREP(ZXDHQPSQ_READFENCE, read_fence) | ++ FIELD_PREP(ZXDHQPSQ_SOLICITED, info->solicited) | ++ FIELD_PREP(ZXDHQPSQ_IMMDATAFLAG, imm_data_flag) | ++ FIELD_PREP(ZXDHQPSQ_ADDFRAGCNT, addl_frag_cnt) | ++ FIELD_PREP(ZXDHQPSQ_REMSTAG, info->stag_to_inv); ++ set_64bit_val(wqe, 24, ++ FIELD_PREP(ZXDHQPSQ_INLINEDATAFLAG, 0) | ++ FIELD_PREP(ZXDHQPSQ_INLINEDATALEN, 0)); ++ ++ udma_to_device_barrier(); /* make sure WQE is populated before valid bit is set */ ++ ++ set_64bit_val(wqe, 0, hdr); ++ if (post_sq) ++ zxdh_qp_post_wr(qp); ++ qp_tx_psn_add(&qp->next_psn, total_size, qp->mtu); ++ ++ return 0; ++} ++ ++/** ++ * zxdh_ud_send - rdma send command ++ * @qp: hw qp ptr ++ * @info: post sq information ++ * @post_sq: flag to post sq ++ */ ++enum zxdh_status_code zxdh_ud_send(struct zxdh_qp *qp, ++ struct zxdh_post_sq_info *info, bool post_sq) ++{ ++ __le64 *wqe_base; ++ __le64 *wqe_ex = NULL; ++ struct zxdh_post_send *op_info; ++ __u64 hdr; ++ __u32 i, wqe_idx, total_size = 0, byte_off; ++ enum zxdh_status_code ret_code; ++ __u32 frag_cnt, addl_frag_cnt; ++ bool read_fence = false; ++ __u16 quanta; ++ bool imm_data_flag = info->imm_data_valid ? 1 : 0; ++ ++ op_info = &info->op.send; ++ if (qp->max_sq_frag_cnt < op_info->num_sges) ++ return ZXDH_ERR_INVALID_FRAG_COUNT; ++ ++ for (i = 0; i < op_info->num_sges; i++) { ++ total_size += op_info->sg_list[i].len; ++ if (0 != i && 0 == op_info->sg_list[i].len) ++ return ZXDH_ERR_INVALID_FRAG_LEN; ++ } ++ ++ if (total_size > ZXDH_MAX_SQ_PAYLOAD_SIZE) ++ return ZXDH_ERR_QP_INVALID_MSG_SIZE; ++ ++ if (imm_data_flag) ++ frag_cnt = op_info->num_sges ? (op_info->num_sges + 1) : 2; ++ else ++ frag_cnt = op_info->num_sges; ++ ret_code = zxdh_fragcnt_to_quanta_sq(frag_cnt, &quanta); ++ if (ret_code) ++ return ret_code; ++ ++ if (quanta > ZXDH_SQ_RING_FREE_QUANTA(qp->sq_ring)) ++ return ZXDH_ERR_QP_TOOMANY_WRS_POSTED; ++ ++ wqe_idx = ZXDH_RING_CURRENT_HEAD(qp->sq_ring); ++ if (!wqe_idx) ++ qp->swqe_polarity = !qp->swqe_polarity; ++ ++ ZXDH_RING_MOVE_HEAD_BY_COUNT_NOCHECK(qp->sq_ring, quanta); ++ ++ wqe_base = qp->sq_base[wqe_idx].elem; ++ qp->sq_wrtrk_array[wqe_idx].wrid = info->wr_id; ++ qp->sq_wrtrk_array[wqe_idx].wr_len = total_size; ++ qp->sq_wrtrk_array[wqe_idx].quanta = quanta; ++ ++ zxdh_clr_wqes(qp, wqe_idx); ++ ++ read_fence |= info->read_fence; ++ addl_frag_cnt = op_info->num_sges > 1 ? (op_info->num_sges - 1) : 0; ++ hdr = FIELD_PREP(ZXDHQPSQ_VALID, qp->swqe_polarity) | ++ FIELD_PREP(ZXDHQPSQ_OPCODE, info->op_type) | ++ FIELD_PREP(ZXDHQPSQ_SIGCOMPL, info->signaled) | ++ FIELD_PREP(ZXDHQPSQ_SOLICITED, info->solicited) | ++ FIELD_PREP(ZXDHQPSQ_IMMDATAFLAG, imm_data_flag) | ++ FIELD_PREP(ZXDHQPSQ_UD_INLINEDATAFLAG, 0) | ++ FIELD_PREP(ZXDHQPSQ_UD_INLINEDATALEN, 0) | ++ FIELD_PREP(ZXDHQPSQ_UD_ADDFRAGCNT, addl_frag_cnt) | ++ FIELD_PREP(ZXDHQPSQ_AHID, op_info->ah_id); ++ ++ if (op_info->num_sges) { ++ set_64bit_val( ++ wqe_base, 16, ++ FIELD_PREP(ZXDHQPSQ_FIRST_FRAG_VALID, ++ op_info->sg_list->len == ++ ZXDH_MAX_SQ_PAYLOAD_SIZE ? ++ 1 : ++ 0) | ++ FIELD_PREP(ZXDHQPSQ_FIRST_FRAG_LEN, ++ op_info->sg_list->len) | ++ FIELD_PREP(ZXDHQPSQ_FIRST_FRAG_STAG, ++ op_info->sg_list->stag)); ++ set_64bit_val(wqe_base, 8, ++ FIELD_PREP(ZXDHQPSQ_FRAG_TO, ++ op_info->sg_list->tag_off)); ++ } else { ++ /*if zero sge,post a special sge with zero lenth*/ ++ set_64bit_val(wqe_base, 16, ++ FIELD_PREP(ZXDHQPSQ_FIRST_FRAG_VALID, 0) | ++ FIELD_PREP(ZXDHQPSQ_FIRST_FRAG_LEN, 0) | ++ FIELD_PREP(ZXDHQPSQ_FIRST_FRAG_STAG, ++ 0x100)); ++ set_64bit_val(wqe_base, 8, FIELD_PREP(ZXDHQPSQ_FRAG_TO, 0)); ++ } ++ ++ if (imm_data_flag) { ++ wqe_idx = (wqe_idx + 1) % qp->sq_ring.size; ++ if (!wqe_idx) ++ qp->swqe_polarity = !qp->swqe_polarity; ++ wqe_ex = qp->sq_base[wqe_idx].elem; ++ if (op_info->num_sges > 1) { ++ qp->wqe_ops.iw_set_fragment(wqe_ex, ++ ZXDH_QP_FRAG_BYTESIZE, ++ &op_info->sg_list[1], ++ qp->swqe_polarity); ++ } ++ set_64bit_val( ++ wqe_ex, 0, ++ FIELD_PREP(ZXDHQPSQ_IMMDATA_VALID, qp->swqe_polarity) | ++ FIELD_PREP(ZXDHQPSQ_IMMDATA, info->imm_data)); ++ i = 2; ++ for (byte_off = ZXDH_QP_FRAG_BYTESIZE; i < op_info->num_sges; ++ i += 2) { ++ if (!(i & 0x1)) { ++ wqe_idx = (wqe_idx + 1) % qp->sq_ring.size; ++ if (!wqe_idx) ++ qp->swqe_polarity = !qp->swqe_polarity; ++ wqe_ex = qp->sq_base[wqe_idx].elem; ++ } ++ if (i == addl_frag_cnt) { ++ qp->wqe_ops.iw_set_fragment( ++ wqe_ex, 0, &op_info->sg_list[i], ++ qp->swqe_polarity); ++ break; ++ } ++ qp->wqe_ops.iw_set_fragment( ++ wqe_ex, byte_off % ZXDH_SQ_WQE_BYTESIZE, ++ &op_info->sg_list[i + 1], qp->swqe_polarity); ++ byte_off -= ZXDH_QP_FRAG_BYTESIZE; ++ qp->wqe_ops.iw_set_fragment( ++ wqe_ex, byte_off % ZXDH_SQ_WQE_BYTESIZE, ++ &op_info->sg_list[i], qp->swqe_polarity); ++ byte_off += ZXDH_QP_FRAG_BYTESIZE; ++ } ++ } else { ++ i = 1; ++ for (byte_off = 0; i < op_info->num_sges; i += 2) { ++ if (i & 0x1) { ++ wqe_idx = (wqe_idx + 1) % qp->sq_ring.size; ++ if (!wqe_idx) ++ qp->swqe_polarity = !qp->swqe_polarity; ++ wqe_ex = qp->sq_base[wqe_idx].elem; ++ } ++ if (i == addl_frag_cnt) { ++ qp->wqe_ops.iw_set_fragment( ++ wqe_ex, 0, &op_info->sg_list[i], ++ qp->swqe_polarity); ++ break; ++ } ++ byte_off += ZXDH_QP_FRAG_BYTESIZE; ++ qp->wqe_ops.iw_set_fragment( ++ wqe_ex, byte_off % ZXDH_SQ_WQE_BYTESIZE, ++ &op_info->sg_list[i + 1], qp->swqe_polarity); ++ byte_off -= ZXDH_QP_FRAG_BYTESIZE; ++ qp->wqe_ops.iw_set_fragment( ++ wqe_ex, byte_off % ZXDH_SQ_WQE_BYTESIZE, ++ &op_info->sg_list[i], qp->swqe_polarity); ++ } ++ } ++ ++ /* if not an odd number set valid bit in next fragment */ ++ if (!(frag_cnt & 0x01) && frag_cnt && wqe_ex) { ++ qp->wqe_ops.iw_set_fragment(wqe_ex, ZXDH_QP_FRAG_BYTESIZE, NULL, ++ qp->swqe_polarity); ++ } ++ ++ set_64bit_val(wqe_base, 24, ++ FIELD_PREP(ZXDHQPSQ_DESTQPN, op_info->dest_qp) | ++ FIELD_PREP(ZXDHQPSQ_DESTQKEY, op_info->qkey)); ++ ++ udma_to_device_barrier(); /* make sure WQE is populated before valid bit is set */ ++ ++ set_64bit_val(wqe_base, 0, hdr); ++ if (post_sq) ++ zxdh_qp_post_wr(qp); ++ ++ return 0; ++} ++ ++/** ++ * zxdh_set_mw_bind_wqe - set mw bind in wqe ++ * @wqe: wqe for setting mw bind ++ * @op_info: info for setting wqe values ++ */ ++static void zxdh_set_mw_bind_wqe(__le64 *wqe, struct zxdh_bind_window *op_info) ++{ ++ __u32 value = 0; ++ __u8 leaf_pbl_size = op_info->leaf_pbl_size; ++ ++ set_64bit_val(wqe, 8, (uintptr_t)op_info->va); ++ ++ if (leaf_pbl_size == 0) { ++ value = (__u32)(op_info->mw_pa_pble_index >> 12); ++ value = (value & 0x03FFFFFFFC0000) >> 18; ++ set_64bit_val( ++ wqe, 16, ++ FIELD_PREP(ZXDHQPSQ_PARENTMRSTAG, op_info->mr_stag) | ++ FIELD_PREP(ZXDHQPSQ_MW_PA_PBLE_TWO, value)); ++ } else if (leaf_pbl_size == 1) { ++ value = (__u32)((op_info->mw_pa_pble_index & 0x0FFC0000) >> 18); ++ set_64bit_val( ++ wqe, 16, ++ FIELD_PREP(ZXDHQPSQ_PARENTMRSTAG, op_info->mr_stag) | ++ FIELD_PREP(ZXDHQPSQ_MW_PA_PBLE_TWO, value)); ++ } else { ++ value = (__u32)((op_info->mw_pa_pble_index & 0x0FFC0000) >> 18); ++ set_64bit_val( ++ wqe, 16, ++ FIELD_PREP(ZXDHQPSQ_PARENTMRSTAG, op_info->mr_stag) | ++ FIELD_PREP(ZXDHQPSQ_MW_LEVLE2_FIRST_PBLE_INDEX, ++ value) | ++ FIELD_PREP(ZXDHQPSQ_MW_LEVLE2_ROOT_PBLE_INDEX, ++ op_info->root_leaf_offset)); ++ } ++ ++ if (leaf_pbl_size == 0) { ++ value = (__u32)(op_info->mw_pa_pble_index >> 12); ++ value = value & 0x3FFFF; ++ } else { ++ value = (__u32)(op_info->mw_pa_pble_index & 0x3FFFF); ++ } ++ ++ set_64bit_val(wqe, 24, ++ op_info->bind_len | ++ FIELD_PREP(ZXDHQPSQ_MW_PA_PBLE_ONE, value)); ++} ++ ++/** ++ * zxdh_copy_inline_data - Copy inline data to wqe ++ * @dest: pointer to wqe ++ * @src: pointer to inline data ++ * @len: length of inline data to copy ++ * @polarity: polarity of wqe valid bit ++ */ ++static void zxdh_copy_inline_data(__u8 *dest, __u8 *src, __u32 len, ++ __u8 polarity, bool imm_data_flag) ++{ ++ __u8 inline_valid = polarity << ZXDH_INLINE_VALID_S; ++ __u32 copy_size; ++ __u8 *inline_valid_addr; ++ ++ dest += ZXDH_WQE_SIZE_32; /* point to additional 32 byte quanta */ ++ if (len) { ++ inline_valid_addr = dest + WQE_OFFSET_7BYTES; ++ if (imm_data_flag) { ++ copy_size = len < INLINE_DATASIZE_24BYTES ? ++ len : ++ INLINE_DATASIZE_24BYTES; ++ dest += WQE_OFFSET_8BYTES; ++ memcpy(dest, src, copy_size); ++ len -= copy_size; ++ dest += WQE_OFFSET_24BYTES; ++ src += copy_size; ++ } else { ++ if (len <= INLINE_DATASIZE_7BYTES) { ++ copy_size = len; ++ memcpy(dest, src, copy_size); ++ *inline_valid_addr = inline_valid; ++ return; ++ } ++ memcpy(dest, src, INLINE_DATASIZE_7BYTES); ++ len -= INLINE_DATASIZE_7BYTES; ++ dest += WQE_OFFSET_8BYTES; ++ src += INLINE_DATA_OFFSET_7BYTES; ++ copy_size = len < INLINE_DATASIZE_24BYTES ? ++ len : ++ INLINE_DATASIZE_24BYTES; ++ memcpy(dest, src, copy_size); ++ len -= copy_size; ++ dest += WQE_OFFSET_24BYTES; ++ src += copy_size; ++ } ++ *inline_valid_addr = inline_valid; ++ } ++ ++ while (len) { ++ inline_valid_addr = dest + WQE_OFFSET_7BYTES; ++ if (len <= INLINE_DATASIZE_7BYTES) { ++ copy_size = len; ++ memcpy(dest, src, copy_size); ++ *inline_valid_addr = inline_valid; ++ return; ++ } ++ memcpy(dest, src, INLINE_DATASIZE_7BYTES); ++ len -= INLINE_DATASIZE_7BYTES; ++ dest += WQE_OFFSET_8BYTES; ++ src += INLINE_DATA_OFFSET_7BYTES; ++ copy_size = len < INLINE_DATASIZE_24BYTES ? ++ len : ++ INLINE_DATASIZE_24BYTES; ++ memcpy(dest, src, copy_size); ++ len -= copy_size; ++ dest += WQE_OFFSET_24BYTES; ++ src += copy_size; ++ ++ *inline_valid_addr = inline_valid; ++ } ++} ++ ++/** ++ * zxdh_inline_data_size_to_quanta - based on inline data, quanta ++ * @data_size: data size for inline ++ * @imm_data_flag: flag for immediate data ++ * ++ * Gets the quanta based on inline and immediate data. ++ */ ++static __u16 zxdh_inline_data_size_to_quanta(__u32 data_size, ++ bool imm_data_flag) ++{ ++ if (imm_data_flag) ++ data_size += INLINE_DATASIZE_7BYTES; ++ ++ return data_size % 31 ? data_size / 31 + 2 : data_size / 31 + 1; ++} ++ ++/** ++ * zxdh_inline_rdma_write - inline rdma write operation ++ * @qp: hw qp ptr ++ * @info: post sq information ++ * @post_sq: flag to post sq ++ */ ++enum zxdh_status_code zxdh_inline_rdma_write(struct zxdh_qp *qp, ++ struct zxdh_post_sq_info *info, ++ bool post_sq) ++{ ++ __le64 *wqe; ++ __u8 imm_valid; ++ struct zxdh_inline_rdma_write *op_info; ++ __u64 hdr = 0; ++ __u32 wqe_idx; ++ bool read_fence = false; ++ __u16 quanta; ++ bool imm_data_flag = info->imm_data_valid ? 1 : 0; ++ ++ op_info = &info->op.inline_rdma_write; ++ ++ if (op_info->len > qp->max_inline_data) ++ return ZXDH_ERR_INVALID_INLINE_DATA_SIZE; ++ if (imm_data_flag && op_info->len > ZXDH_MAX_SQ_INLINE_DATELEN_WITH_IMM) ++ return ZXDH_ERR_INVALID_INLINE_DATA_SIZE; ++ ++ quanta = qp->wqe_ops.iw_inline_data_size_to_quanta(op_info->len, ++ imm_data_flag); ++ wqe = zxdh_qp_get_next_send_wqe(qp, &wqe_idx, quanta, op_info->len, ++ info); ++ if (!wqe) ++ return ZXDH_ERR_QP_TOOMANY_WRS_POSTED; ++ ++ zxdh_clr_wqes(qp, wqe_idx); ++ ++ read_fence |= info->read_fence; ++ hdr = FIELD_PREP(ZXDHQPSQ_VALID, qp->swqe_polarity) | ++ FIELD_PREP(ZXDHQPSQ_OPCODE, info->op_type) | ++ FIELD_PREP(ZXDHQPSQ_SIGCOMPL, info->signaled) | ++ FIELD_PREP(ZXDHQPSQ_LOCALFENCE, info->local_fence) | ++ FIELD_PREP(ZXDHQPSQ_READFENCE, read_fence) | ++ FIELD_PREP(ZXDHQPSQ_SOLICITED, info->solicited) | ++ FIELD_PREP(ZXDHQPSQ_IMMDATAFLAG, imm_data_flag) | ++ FIELD_PREP(ZXDHQPSQ_WRITE_INLINEDATAFLAG, 1) | ++ FIELD_PREP(ZXDHQPSQ_WRITE_INLINEDATALEN, op_info->len) | ++ FIELD_PREP(ZXDHQPSQ_ADDFRAGCNT, quanta - 1) | ++ FIELD_PREP(ZXDHQPSQ_REMSTAG, op_info->rem_addr.stag); ++ set_64bit_val(wqe, 24, ++ FIELD_PREP(ZXDHQPSQ_FRAG_TO, op_info->rem_addr.tag_off)); ++ ++ if (imm_data_flag) { ++ /* if inline exist, not update imm valid */ ++ imm_valid = (op_info->len == 0) ? qp->swqe_polarity : ++ (!qp->swqe_polarity); ++ ++ set_64bit_val(wqe, 32, ++ FIELD_PREP(ZXDHQPSQ_IMMDATA_VALID, imm_valid) | ++ FIELD_PREP(ZXDHQPSQ_IMMDATA, ++ info->imm_data)); ++ } ++ qp->wqe_ops.iw_copy_inline_data((__u8 *)wqe, op_info->data, ++ op_info->len, qp->swqe_polarity, ++ imm_data_flag); ++ ++ udma_to_device_barrier(); /* make sure WQE is populated before valid bit is set */ ++ ++ set_64bit_val(wqe, 0, hdr); ++ ++ if (post_sq) ++ zxdh_qp_post_wr(qp); ++ qp_tx_psn_add(&qp->next_psn, op_info->len, qp->mtu); ++ return 0; ++} ++ ++/** ++ * zxdh_rc_inline_send - inline send operation ++ * @qp: hw qp ptr ++ * @info: post sq information ++ * @post_sq: flag to post sq ++ */ ++enum zxdh_status_code zxdh_rc_inline_send(struct zxdh_qp *qp, ++ struct zxdh_post_sq_info *info, ++ bool post_sq) ++{ ++ __le64 *wqe; ++ __u8 imm_valid; ++ struct zxdh_inline_rdma_send *op_info; ++ __u64 hdr; ++ __u32 wqe_idx; ++ bool read_fence = false; ++ __u16 quanta; ++ bool imm_data_flag = info->imm_data_valid ? 1 : 0; ++ ++ op_info = &info->op.inline_rdma_send; ++ ++ if (op_info->len > qp->max_inline_data) ++ return ZXDH_ERR_INVALID_INLINE_DATA_SIZE; ++ if (imm_data_flag && op_info->len > ZXDH_MAX_SQ_INLINE_DATELEN_WITH_IMM) ++ return ZXDH_ERR_INVALID_INLINE_DATA_SIZE; ++ ++ quanta = qp->wqe_ops.iw_inline_data_size_to_quanta(op_info->len, ++ imm_data_flag); ++ wqe = zxdh_qp_get_next_send_wqe(qp, &wqe_idx, quanta, op_info->len, ++ info); ++ if (!wqe) ++ return ZXDH_ERR_QP_TOOMANY_WRS_POSTED; ++ ++ zxdh_clr_wqes(qp, wqe_idx); ++ ++ read_fence |= info->read_fence; ++ hdr = FIELD_PREP(ZXDHQPSQ_VALID, qp->swqe_polarity) | ++ FIELD_PREP(ZXDHQPSQ_OPCODE, info->op_type) | ++ FIELD_PREP(ZXDHQPSQ_SIGCOMPL, info->signaled) | ++ FIELD_PREP(ZXDHQPSQ_LOCALFENCE, info->local_fence) | ++ FIELD_PREP(ZXDHQPSQ_READFENCE, read_fence) | ++ FIELD_PREP(ZXDHQPSQ_SOLICITED, info->solicited) | ++ FIELD_PREP(ZXDHQPSQ_ADDFRAGCNT, quanta - 1) | ++ FIELD_PREP(ZXDHQPSQ_IMMDATAFLAG, imm_data_flag) | ++ FIELD_PREP(ZXDHQPSQ_REMSTAG, info->stag_to_inv); ++ set_64bit_val(wqe, 24, ++ FIELD_PREP(ZXDHQPSQ_INLINEDATAFLAG, 1) | ++ FIELD_PREP(ZXDHQPSQ_INLINEDATALEN, op_info->len)); ++ ++ if (imm_data_flag) { ++ /* if inline exist, not update imm valid */ ++ imm_valid = (op_info->len == 0) ? qp->swqe_polarity : ++ (!qp->swqe_polarity); ++ set_64bit_val(wqe, 32, ++ FIELD_PREP(ZXDHQPSQ_IMMDATA_VALID, imm_valid) | ++ FIELD_PREP(ZXDHQPSQ_IMMDATA, ++ info->imm_data)); ++ } ++ ++ qp->wqe_ops.iw_copy_inline_data((__u8 *)wqe, op_info->data, ++ op_info->len, qp->swqe_polarity, ++ imm_data_flag); ++ ++ udma_to_device_barrier(); /* make sure WQE is populated before valid bit is set */ ++ ++ set_64bit_val(wqe, 0, hdr); ++ ++ if (post_sq) ++ zxdh_qp_post_wr(qp); ++ ++ qp_tx_psn_add(&qp->next_psn, op_info->len, qp->mtu); ++ return 0; ++} ++ ++/** ++ * zxdh_ud_inline_send - inline send operation ++ * @qp: hw qp ptr ++ * @info: post sq information ++ * @post_sq: flag to post sq ++ */ ++enum zxdh_status_code zxdh_ud_inline_send(struct zxdh_qp *qp, ++ struct zxdh_post_sq_info *info, ++ bool post_sq) ++{ ++ __le64 *wqe_base; ++ __le64 *wqe_ex; ++ struct zxdh_inline_rdma_send *op_info; ++ __u64 hdr; ++ __u32 wqe_idx; ++ bool read_fence = false; ++ __u16 quanta; ++ bool imm_data_flag = info->imm_data_valid ? 1 : 0; ++ __u8 *inline_dest; ++ __u8 *inline_src; ++ __u32 inline_len; ++ __u32 copy_size; ++ __u8 *inline_valid_addr; ++ ++ op_info = &info->op.inline_rdma_send; ++ inline_len = op_info->len; ++ ++ if (op_info->len > qp->max_inline_data) ++ return ZXDH_ERR_INVALID_INLINE_DATA_SIZE; ++ if (imm_data_flag && op_info->len > ZXDH_MAX_SQ_INLINE_DATELEN_WITH_IMM) ++ return ZXDH_ERR_INVALID_INLINE_DATA_SIZE; ++ ++ quanta = qp->wqe_ops.iw_inline_data_size_to_quanta(op_info->len, ++ imm_data_flag); ++ if (quanta > ZXDH_SQ_RING_FREE_QUANTA(qp->sq_ring)) ++ return ZXDH_ERR_QP_TOOMANY_WRS_POSTED; ++ ++ wqe_idx = ZXDH_RING_CURRENT_HEAD(qp->sq_ring); ++ if (!wqe_idx) ++ qp->swqe_polarity = !qp->swqe_polarity; ++ ++ ZXDH_RING_MOVE_HEAD_BY_COUNT_NOCHECK(qp->sq_ring, quanta); ++ ++ wqe_base = qp->sq_base[wqe_idx].elem; ++ qp->sq_wrtrk_array[wqe_idx].wrid = info->wr_id; ++ qp->sq_wrtrk_array[wqe_idx].wr_len = op_info->len; ++ qp->sq_wrtrk_array[wqe_idx].quanta = quanta; ++ ++ zxdh_clr_wqes(qp, wqe_idx); ++ ++ read_fence |= info->read_fence; ++ hdr = FIELD_PREP(ZXDHQPSQ_VALID, qp->swqe_polarity) | ++ FIELD_PREP(ZXDHQPSQ_OPCODE, info->op_type) | ++ FIELD_PREP(ZXDHQPSQ_SIGCOMPL, info->signaled) | ++ FIELD_PREP(ZXDHQPSQ_SOLICITED, info->solicited) | ++ FIELD_PREP(ZXDHQPSQ_IMMDATAFLAG, imm_data_flag) | ++ FIELD_PREP(ZXDHQPSQ_UD_INLINEDATAFLAG, 1) | ++ FIELD_PREP(ZXDHQPSQ_UD_INLINEDATALEN, op_info->len) | ++ FIELD_PREP(ZXDHQPSQ_UD_ADDFRAGCNT, quanta - 1) | ++ FIELD_PREP(ZXDHQPSQ_AHID, op_info->ah_id); ++ set_64bit_val(wqe_base, 24, ++ FIELD_PREP(ZXDHQPSQ_DESTQPN, op_info->dest_qp) | ++ FIELD_PREP(ZXDHQPSQ_DESTQKEY, op_info->qkey)); ++ ++ if (imm_data_flag) { ++ wqe_idx = (wqe_idx + 1) % qp->sq_ring.size; ++ if (!wqe_idx) ++ qp->swqe_polarity = !qp->swqe_polarity; ++ wqe_ex = qp->sq_base[wqe_idx].elem; ++ ++ if (inline_len) { ++ /* imm and inline use the same valid, valid set after inline data updated*/ ++ copy_size = inline_len < INLINE_DATASIZE_24BYTES ? ++ inline_len : ++ INLINE_DATASIZE_24BYTES; ++ inline_dest = (__u8 *)wqe_ex + WQE_OFFSET_8BYTES; ++ inline_src = (__u8 *)op_info->data; ++ memcpy(inline_dest, inline_src, copy_size); ++ inline_len -= copy_size; ++ inline_src += copy_size; ++ } ++ set_64bit_val( ++ wqe_ex, 0, ++ FIELD_PREP(ZXDHQPSQ_IMMDATA_VALID, qp->swqe_polarity) | ++ FIELD_PREP(ZXDHQPSQ_IMMDATA, info->imm_data)); ++ ++ } else if (inline_len) { ++ wqe_idx = (wqe_idx + 1) % qp->sq_ring.size; ++ if (!wqe_idx) ++ qp->swqe_polarity = !qp->swqe_polarity; ++ wqe_ex = qp->sq_base[wqe_idx].elem; ++ inline_dest = (__u8 *)wqe_ex; ++ inline_src = (__u8 *)op_info->data; ++ ++ if (inline_len <= INLINE_DATASIZE_7BYTES) { ++ copy_size = inline_len; ++ memcpy(inline_dest, inline_src, copy_size); ++ inline_len = 0; ++ } else { ++ copy_size = INLINE_DATASIZE_7BYTES; ++ memcpy(inline_dest, inline_src, copy_size); ++ inline_len -= copy_size; ++ inline_src += copy_size; ++ inline_dest += WQE_OFFSET_8BYTES; ++ copy_size = inline_len < INLINE_DATASIZE_24BYTES ? ++ inline_len : ++ INLINE_DATASIZE_24BYTES; ++ memcpy(inline_dest, inline_src, copy_size); ++ inline_len -= copy_size; ++ inline_src += copy_size; ++ } ++ inline_valid_addr = (__u8 *)wqe_ex + WQE_OFFSET_7BYTES; ++ *inline_valid_addr = qp->swqe_polarity << ZXDH_INLINE_VALID_S; ++ } ++ ++ while (inline_len) { ++ wqe_idx = (wqe_idx + 1) % qp->sq_ring.size; ++ if (!wqe_idx) ++ qp->swqe_polarity = !qp->swqe_polarity; ++ wqe_ex = qp->sq_base[wqe_idx].elem; ++ inline_dest = (__u8 *)wqe_ex; ++ ++ if (inline_len <= INLINE_DATASIZE_7BYTES) { ++ copy_size = inline_len; ++ memcpy(inline_dest, inline_src, copy_size); ++ inline_len = 0; ++ } else { ++ copy_size = INLINE_DATASIZE_7BYTES; ++ memcpy(inline_dest, inline_src, copy_size); ++ inline_len -= copy_size; ++ inline_src += copy_size; ++ inline_dest += WQE_OFFSET_8BYTES; ++ copy_size = inline_len < INLINE_DATASIZE_24BYTES ? ++ inline_len : ++ INLINE_DATASIZE_24BYTES; ++ memcpy(inline_dest, inline_src, copy_size); ++ inline_len -= copy_size; ++ inline_src += copy_size; ++ } ++ inline_valid_addr = (__u8 *)wqe_ex + WQE_OFFSET_7BYTES; ++ *inline_valid_addr = qp->swqe_polarity << ZXDH_INLINE_VALID_S; ++ } ++ ++ udma_to_device_barrier(); /* make sure WQE is populated before valid bit is set */ ++ ++ set_64bit_val(wqe_base, 0, hdr); ++ ++ if (post_sq) ++ zxdh_qp_post_wr(qp); ++ ++ return 0; ++} ++ ++/** ++ * zxdh_stag_local_invalidate - stag invalidate operation ++ * @qp: hw qp ptr ++ * @info: post sq information ++ * @post_sq: flag to post sq ++ */ ++enum zxdh_status_code zxdh_stag_local_invalidate(struct zxdh_qp *qp, ++ struct zxdh_post_sq_info *info, ++ bool post_sq) ++{ ++ __le64 *wqe; ++ struct zxdh_inv_local_stag *op_info; ++ __u64 hdr; ++ __u32 wqe_idx; ++ bool local_fence = true; ++ ++ op_info = &info->op.inv_local_stag; ++ ++ wqe = zxdh_qp_get_next_send_wqe(qp, &wqe_idx, ZXDH_QP_WQE_MIN_QUANTA, 0, ++ info); ++ if (!wqe) ++ return ZXDH_ERR_QP_TOOMANY_WRS_POSTED; ++ ++ zxdh_clr_wqes(qp, wqe_idx); ++ ++ set_64bit_val(wqe, 16, 0); ++ ++ hdr = FIELD_PREP(ZXDHQPSQ_VALID, qp->swqe_polarity) | ++ FIELD_PREP(ZXDHQPSQ_OPCODE, ZXDH_OP_TYPE_LOCAL_INV) | ++ FIELD_PREP(ZXDHQPSQ_SIGCOMPL, info->signaled) | ++ FIELD_PREP(ZXDHQPSQ_LOCALFENCE, local_fence) | ++ FIELD_PREP(ZXDHQPSQ_READFENCE, info->read_fence) | ++ FIELD_PREP(ZXDHQPSQ_REMSTAG, op_info->target_stag); ++ ++ udma_to_device_barrier(); /* make sure WQE is populated before valid bit is set */ ++ ++ set_64bit_val(wqe, 0, hdr); ++ ++ if (post_sq) ++ zxdh_qp_post_wr(qp); ++ ++ return 0; ++} ++ ++/** ++ * zxdh_mw_bind - bind Memory Window ++ * @qp: hw qp ptr ++ * @info: post sq information ++ * @post_sq: flag to post sq ++ */ ++enum zxdh_status_code zxdh_mw_bind(struct zxdh_qp *qp, ++ struct zxdh_post_sq_info *info, bool post_sq) ++{ ++ __le64 *wqe; ++ struct zxdh_bind_window *op_info; ++ __u64 hdr; ++ __u32 wqe_idx; ++ bool local_fence = true; ++ __u8 access = 1; ++ __u16 value = 0; ++ ++ op_info = &info->op.bind_window; ++ local_fence |= info->local_fence; ++ ++ wqe = zxdh_qp_get_next_send_wqe(qp, &wqe_idx, ZXDH_QP_WQE_MIN_QUANTA, 0, ++ info); ++ if (!wqe) ++ return ZXDH_ERR_QP_TOOMANY_WRS_POSTED; ++ ++ zxdh_clr_wqes(qp, wqe_idx); ++ ++ if (op_info->ena_writes) { ++ access = (op_info->ena_reads << 2) | ++ (op_info->ena_writes << 3) | (1 << 1) | access; ++ } else { ++ access = (op_info->ena_reads << 2) | ++ (op_info->ena_writes << 3) | access; ++ } ++ ++ qp->wqe_ops.iw_set_mw_bind_wqe(wqe, op_info); ++ ++ value = (__u16)((op_info->mw_pa_pble_index >> 12) & 0xC000000000000); ++ ++ hdr = FIELD_PREP(ZXDHQPSQ_OPCODE, ZXDH_OP_TYPE_BIND_MW) | ++ FIELD_PREP(ZXDHQPSQ_MWSTAG, op_info->mw_stag) | ++ FIELD_PREP(ZXDHQPSQ_STAGRIGHTS, access) | ++ FIELD_PREP(ZXDHQPSQ_VABASEDTO, ++ (op_info->addressing_type == ZXDH_ADDR_TYPE_VA_BASED ? ++ 1 : ++ 0)) | ++ FIELD_PREP(ZXDHQPSQ_MEMWINDOWTYPE, ++ (op_info->mem_window_type_1 ? 1 : 0)) | ++ FIELD_PREP(ZXDHQPSQ_READFENCE, info->read_fence) | ++ FIELD_PREP(ZXDHQPSQ_LOCALFENCE, local_fence) | ++ FIELD_PREP(ZXDHQPSQ_SIGCOMPL, info->signaled) | ++ FIELD_PREP(ZXDHQPSQ_MW_HOST_PAGE_SIZE, op_info->host_page_size) | ++ FIELD_PREP(ZXDHQPSQ_MW_LEAF_PBL_SIZE, op_info->leaf_pbl_size) | ++ FIELD_PREP(ZXDHQPSQ_MW_PA_PBLE_THREE, value) | ++ FIELD_PREP(ZXDHQPSQ_VALID, qp->swqe_polarity); ++ ++ udma_to_device_barrier(); /* make sure WQE is populated before valid bit is set */ ++ ++ set_64bit_val(wqe, 0, hdr); ++ ++ if (post_sq) ++ zxdh_qp_post_wr(qp); ++ ++ return 0; ++} ++ ++static void zxdh_sleep_ns(unsigned int nanoseconds) ++{ ++ struct timespec req; ++ ++ req.tv_sec = 0; ++ req.tv_nsec = nanoseconds; ++ nanosleep(&req, NULL); ++} ++ ++/** ++ * zxdh_post_receive - post receive wqe ++ * @qp: hw qp ptr ++ * @info: post rq information ++ */ ++enum zxdh_status_code zxdh_post_receive(struct zxdh_qp *qp, ++ struct zxdh_post_rq_info *info) ++{ ++ __u32 wqe_idx, i, byte_off; ++ __le64 *wqe; ++ struct zxdh_sge *sge; ++ ++ if (qp->max_rq_frag_cnt < info->num_sges) ++ return ZXDH_ERR_INVALID_FRAG_COUNT; ++ ++ wqe = zxdh_qp_get_next_recv_wqe(qp, &wqe_idx); ++ if (unlikely(!wqe)) ++ return ZXDH_ERR_QP_TOOMANY_WRS_POSTED; ++ ++ qp->rq_wrid_array[wqe_idx] = info->wr_id; ++ ++ for (i = 0, byte_off = ZXDH_QP_FRAG_BYTESIZE; i < info->num_sges; i++) { ++ sge = &info->sg_list[i]; ++ set_64bit_val(wqe, byte_off, sge->tag_off); ++ set_64bit_val(wqe, byte_off + 8, ++ FIELD_PREP(ZXDHQPRQ_FRAG_LEN, sge->len) | ++ FIELD_PREP(ZXDHQPRQ_STAG, sge->stag)); ++ byte_off += ZXDH_QP_FRAG_BYTESIZE; ++ } ++ ++ /** ++ * while info->num_sges < qp->max_rq_frag_cnt, or 0 == info->num_sges, ++ * fill next fragment with FRAG_LEN=0, FRAG_STAG=0x00000100, ++ * witch indicates a invalid fragment ++ */ ++ if (info->num_sges < qp->max_rq_frag_cnt || 0 == info->num_sges) { ++ set_64bit_val(wqe, byte_off, 0); ++ set_64bit_val(wqe, byte_off + 8, ++ FIELD_PREP(ZXDHQPRQ_FRAG_LEN, 0) | ++ FIELD_PREP(ZXDHQPRQ_STAG, 0x00000100)); ++ } ++ ++ set_64bit_val(wqe, 0, ++ FIELD_PREP(ZXDHQPRQ_ADDFRAGCNT, info->num_sges) | ++ FIELD_PREP(ZXDHQPRQ_SIGNATURE, ++ qp->rwqe_signature)); ++ ++ udma_to_device_barrier(); /* make sure WQE is populated before valid bit is set */ ++ if (info->num_sges > 3) ++ zxdh_sleep_ns(1000); ++ ++ set_64bit_val(wqe, 8, FIELD_PREP(ZXDHQPRQ_VALID, qp->rwqe_polarity)); ++ ++ return 0; ++} ++ ++/** ++ * zxdh_cq_resize - reset the cq buffer info ++ * @cq: cq to resize ++ * @cq_base: new cq buffer addr ++ * @cq_size: number of cqes ++ */ ++void zxdh_cq_resize(struct zxdh_cq *cq, void *cq_base, int cq_size) ++{ ++ cq->cq_base = cq_base; ++ cq->cq_size = cq_size; ++ ZXDH_RING_INIT(cq->cq_ring, cq->cq_size); ++ cq->polarity = 1; ++} ++ ++/** ++ * zxdh_cq_set_resized_cnt - record the count of the resized buffers ++ * @cq: cq to resize ++ * @cq_cnt: the count of the resized cq buffers ++ */ ++void zxdh_cq_set_resized_cnt(struct zxdh_cq *cq, __u16 cq_cnt) ++{ ++ __u64 temp_val; ++ __u16 sw_cq_sel; ++ __u8 arm_next; ++ __u8 arm_seq_num; ++ ++ get_64bit_val(cq->shadow_area, 0, &temp_val); ++ ++ sw_cq_sel = (__u16)FIELD_GET(ZXDH_CQ_DBSA_SW_CQ_SELECT, temp_val); ++ sw_cq_sel += cq_cnt; ++ ++ arm_seq_num = (__u8)FIELD_GET(ZXDH_CQ_DBSA_ARM_SEQ_NUM, temp_val); ++ arm_next = (__u8)FIELD_GET(ZXDH_CQ_DBSA_ARM_NEXT, temp_val); ++ cq->cqe_rd_cnt = 0; ++ ++ temp_val = FIELD_PREP(ZXDH_CQ_DBSA_ARM_SEQ_NUM, arm_seq_num) | ++ FIELD_PREP(ZXDH_CQ_DBSA_SW_CQ_SELECT, sw_cq_sel) | ++ FIELD_PREP(ZXDH_CQ_DBSA_ARM_NEXT, arm_next) | ++ FIELD_PREP(ZXDH_CQ_DBSA_CQEIDX, cq->cqe_rd_cnt); ++ ++ set_64bit_val(cq->shadow_area, 0, temp_val); ++} ++ ++/** ++ * zxdh_cq_request_notification - cq notification request (door bell) ++ * @cq: hw cq ++ * @cq_notify: notification type ++ */ ++void zxdh_cq_request_notification(struct zxdh_cq *cq, ++ enum zxdh_cmpl_notify cq_notify) ++{ ++ __u64 temp_val; ++ __u16 sw_cq_sel; ++ __u8 arm_next = 0; ++ __u8 arm_seq_num; ++ __u32 cqe_index; ++ __u32 hdr; ++ ++ get_64bit_val(cq->shadow_area, 0, &temp_val); ++ arm_seq_num = (__u8)FIELD_GET(ZXDH_CQ_DBSA_ARM_SEQ_NUM, temp_val); ++ arm_seq_num++; ++ sw_cq_sel = (__u16)FIELD_GET(ZXDH_CQ_DBSA_SW_CQ_SELECT, temp_val); ++ cqe_index = (__u32)FIELD_GET(ZXDH_CQ_DBSA_CQEIDX, temp_val); ++ ++ if (cq_notify == ZXDH_CQ_COMPL_SOLICITED) ++ arm_next = 1; ++ temp_val = FIELD_PREP(ZXDH_CQ_DBSA_ARM_SEQ_NUM, arm_seq_num) | ++ FIELD_PREP(ZXDH_CQ_DBSA_SW_CQ_SELECT, sw_cq_sel) | ++ FIELD_PREP(ZXDH_CQ_DBSA_ARM_NEXT, arm_next) | ++ FIELD_PREP(ZXDH_CQ_DBSA_CQEIDX, cqe_index); ++ ++ set_64bit_val(cq->shadow_area, 0, temp_val); ++ ++ hdr = FIELD_PREP(ZXDH_CQ_ARM_DBSA_VLD, 0) | ++ FIELD_PREP(ZXDH_CQ_ARM_CQ_ID, cq->cq_id); ++ ++ udma_to_device_barrier(); /* make sure WQE is populated before valid bit is set */ ++ ++ db_wr32(hdr, cq->cqe_alloc_db); ++} ++ ++static inline void build_comp_status(__u32 cq_type, ++ struct zxdh_cq_poll_info *info) ++{ ++ if (!info->error) { ++ info->comp_status = ZXDH_COMPL_STATUS_SUCCESS; ++ if (cq_type == ZXDH_CQE_QTYPE_RQ) { ++ if (info->major_err != ERROR_CODE_VALUE && ++ info->minor_err != ERROR_CODE_VALUE) { ++ info->comp_status = ZXDH_COMPL_STATUS_UNKNOWN; ++ } ++ } ++ return; ++ } ++ if (info->major_err == ZXDH_RETRY_ACK_MAJOR_ERR && ++ info->minor_err == ZXDH_RETRY_ACK_MINOR_ERR) { ++ info->comp_status = ZXDH_COMPL_STATUS_RETRY_ACK_ERR; ++ return; ++ } ++ if (info->major_err == ZXDH_RETRY_ACK_MAJOR_ERR && ++ info->minor_err == ZXDH_TX_WINDOW_QUERY_ITEM_MINOR_ERR) { ++ info->comp_status = ZXDH_COMPL_STATUS_TX_WINDOW_QUERY_ITEM_ERR; ++ return; ++ } ++ info->comp_status = (info->major_err == ZXDH_FLUSH_MAJOR_ERR) ? ++ ZXDH_COMPL_STATUS_FLUSHED : ++ ZXDH_COMPL_STATUS_UNKNOWN; ++} ++ ++__le64 *get_current_cqe(struct zxdh_cq *cq) ++{ ++ return ZXDH_GET_CURRENT_EXTENDED_CQ_ELEM(cq); ++} ++ ++static inline void zxdh_get_cq_poll_info(struct zxdh_qp *qp, ++ struct zxdh_cq_poll_info *info, ++ __u64 qword2, __u64 qword3) ++{ ++ __u8 qp_type; ++ ++ qp_type = qp->qp_type; ++ ++ info->imm_valid = (bool)FIELD_GET(ZXDH_CQ_IMMVALID, qword2); ++ if (info->imm_valid) { ++ info->imm_data = (__u32)FIELD_GET(ZXDH_CQ_IMMDATA, qword3); ++ info->op_type = ZXDH_OP_TYPE_REC_IMM; ++ } else { ++ info->op_type = ZXDH_OP_TYPE_REC; ++ } ++ ++ info->bytes_xfered = (__u32)FIELD_GET(ZXDHCQ_PAYLDLEN, qword3); ++ ++ if (likely(qp_type == ZXDH_QP_TYPE_ROCE_RC)) { ++ if (qword2 & ZXDHCQ_STAG) { ++ info->stag_invalid_set = true; ++ info->inv_stag = ++ (__u32)FIELD_GET(ZXDHCQ_INVSTAG, qword2); ++ } else { ++ info->stag_invalid_set = false; ++ } ++ } else if (qp_type == ZXDH_QP_TYPE_ROCE_UD) { ++ info->ipv4 = (bool)FIELD_GET(ZXDHCQ_IPV4, qword2); ++ info->ud_src_qpn = (__u32)FIELD_GET(ZXDHCQ_UDSRCQPN, qword2); ++ } ++} ++ ++static void update_cq_poll_info(struct zxdh_qp *qp, ++ struct zxdh_cq_poll_info *info, __u32 wqe_idx, ++ __u64 qword0) ++{ ++ info->wr_id = qp->sq_wrtrk_array[wqe_idx].wrid; ++ if (!info->comp_status) ++ info->bytes_xfered = qp->sq_wrtrk_array[wqe_idx].wr_len; ++ info->op_type = (__u8)FIELD_GET(ZXDHCQ_OP, qword0); ++ ZXDH_RING_SET_TAIL(qp->sq_ring, ++ wqe_idx + qp->sq_wrtrk_array[wqe_idx].quanta); ++} ++ ++static enum zxdh_status_code ++process_tx_window_query_item_err(struct zxdh_qp *qp, ++ struct zxdh_cq_poll_info *info) ++{ ++ int ret; ++ struct ibv_qp *ib_qp; ++ struct zxdh_uqp *iwuqp; ++ struct zxdh_rdma_qpc qpc = { 0 }; ++ ++ iwuqp = container_of(qp, struct zxdh_uqp, qp); ++ ib_qp = &iwuqp->vqp.qp; ++ ret = zxdh_query_qpc(ib_qp, &qpc); ++ if (ret) { ++ verbs_err(verbs_get_ctx(ib_qp->context), ++ "process tx window query item query qpc failed:%d\n", ++ ret); ++ return ZXDH_ERR_RETRY_ACK_ERR; ++ } ++ if (qpc.tx_last_ack_psn != qp->qp_last_ack_qsn) ++ qp->qp_reset_cnt = 0; ++ ++ qp->qp_last_ack_qsn = qpc.tx_last_ack_psn; ++ if (qp->qp_reset_cnt >= ZXDH_QP_RETRY_COUNT) ++ return ZXDH_ERR_RETRY_ACK_ERR; ++ ++ ret = zxdh_reset_qp(ib_qp, ZXDH_RESET_RETRY_TX_ITEM_FLAG); ++ if (ret) { ++ verbs_err(verbs_get_ctx(ib_qp->context), ++ "process tx window query item reset qp failed:%d\n", ++ ret); ++ return ZXDH_ERR_RETRY_ACK_ERR; ++ } ++ qp->qp_reset_cnt++; ++ return ZXDH_ERR_RETRY_ACK_NOT_EXCEED_ERR; ++} ++ ++static enum zxdh_status_code ++process_retry_ack_err(struct zxdh_qp *qp, struct zxdh_cq_poll_info *info) ++{ ++ int ret; ++ struct ibv_qp *ib_qp; ++ struct zxdh_uqp *iwuqp; ++ struct zxdh_rdma_qpc qpc = { 0 }; ++ struct zxdh_rdma_qpc qpc_req_cmd = { 0 }; ++ ++ iwuqp = container_of(qp, struct zxdh_uqp, qp); ++ ++ ib_qp = &iwuqp->vqp.qp; ++ ret = zxdh_query_qpc(ib_qp, &qpc); ++ if (ret) { ++ verbs_err(verbs_get_ctx(ib_qp->context), ++ "process retry ack query qpc failed:%d\n", ret); ++ return ZXDH_ERR_RETRY_ACK_ERR; ++ } ++ if (!(qpc.retry_cqe_sq_opcode >= ZXDH_RETRY_CQE_SQ_OPCODE_ERR && ++ (qpc.recv_err_flag == ZXDH_RECV_ERR_FLAG_NAK_RNR_NAK || ++ qpc.recv_err_flag == ZXDH_RECV_ERR_FLAG_READ_RESP))) { ++ return ZXDH_ERR_RETRY_ACK_ERR; ++ } ++ if (qpc.tx_last_ack_psn != qp->cqe_last_ack_qsn) ++ qp->cqe_retry_cnt = 0; ++ ++ qp->cqe_last_ack_qsn = qpc.tx_last_ack_psn; ++ if (qp->cqe_retry_cnt >= ZXDH_QP_RETRY_COUNT) ++ return ZXDH_ERR_RETRY_ACK_ERR; ++ ++ memcpy(&qpc_req_cmd, &qpc, sizeof(qpc)); ++ qpc_req_cmd.package_err_flag = 0; ++ qpc_req_cmd.ack_err_flag = 0; ++ qpc_req_cmd.err_flag = 0; ++ qpc_req_cmd.retry_cqe_sq_opcode &= ZXDH_RESET_RETRY_CQE_SQ_OPCODE_ERR; ++ qpc_req_cmd.cur_retry_count = qpc.retry_count; ++ ret = zxdh_modify_qpc(ib_qp, &qpc_req_cmd, ++ ZXDH_PACKAGE_ERR_FLAG | ZXDH_ERR_FLAG_SET | ++ ZXDH_RETRY_CQE_SQ_OPCODE | ++ ZXDH_TX_READ_RETRY_FLAG_SET); ++ if (ret) { ++ verbs_err(verbs_get_ctx(ib_qp->context), ++ "process retry ack modify qpc failed:%d\n", ret); ++ return ZXDH_ERR_RETRY_ACK_ERR; ++ } ++ qp->cqe_retry_cnt++; ++ return ZXDH_ERR_RETRY_ACK_NOT_EXCEED_ERR; ++} ++ ++/** ++ * zxdh_cq_poll_cmpl - get cq completion info ++ * @cq: hw cq ++ * @info: cq poll information returned ++ */ ++enum zxdh_status_code zxdh_cq_poll_cmpl(struct zxdh_cq *cq, ++ struct zxdh_cq_poll_info *info) ++{ ++ enum zxdh_status_code status_code; ++ __u64 comp_ctx, qword0, qword2, qword3; ++ __le64 *cqe; ++ struct zxdh_qp *qp; ++ struct zxdh_ring *pring = NULL; ++ __u32 wqe_idx, q_type; ++ int ret_code; ++ bool move_cq_head = true; ++ __u8 polarity; ++ struct zxdh_usrq *iwusrq = NULL; ++ struct zxdh_srq *srq = NULL; ++ struct zxdh_uqp *iwuqp; ++ ++ cqe = get_current_cqe(cq); ++ ++ get_64bit_val(cqe, 0, &qword0); ++ polarity = (__u8)FIELD_GET(ZXDH_CQ_VALID, qword0); ++ if (polarity != cq->polarity) ++ return ZXDH_ERR_Q_EMPTY; ++ ++ /* Ensure CQE contents are read after valid bit is checked */ ++ udma_from_device_barrier(); ++ get_64bit_val(cqe, 8, &comp_ctx); ++ get_64bit_val(cqe, 16, &qword2); ++ get_64bit_val(cqe, 24, &qword3); ++ ++ qp = (struct zxdh_qp *)(unsigned long)comp_ctx; ++ if (unlikely(!qp || qp->destroy_pending)) { ++ ret_code = ZXDH_ERR_Q_DESTROYED; ++ goto exit; ++ } ++ iwuqp = container_of(qp, struct zxdh_uqp, qp); ++ info->qp_handle = (zxdh_qp_handle)(unsigned long)qp; ++ q_type = (__u8)FIELD_GET(ZXDH_CQ_SQ, qword0); ++ info->solicited_event = (bool)FIELD_GET(ZXDHCQ_SOEVENT, qword0); ++ wqe_idx = (__u32)FIELD_GET(ZXDH_CQ_WQEIDX, qword0); ++ info->error = (bool)FIELD_GET(ZXDH_CQ_ERROR, qword0); ++ info->major_err = FIELD_GET(ZXDH_CQ_MAJERR, qword0); ++ info->minor_err = FIELD_GET(ZXDH_CQ_MINERR, qword0); ++ ++ /* Set the min error to standard flush error code for remaining cqes */ ++ if (unlikely(info->error && info->major_err == ZXDH_FLUSH_MAJOR_ERR && ++ info->minor_err != FLUSH_GENERAL_ERR)) { ++ qword0 &= ~ZXDH_CQ_MINERR; ++ qword0 |= FIELD_PREP(ZXDH_CQ_MINERR, FLUSH_GENERAL_ERR); ++ set_64bit_val(cqe, 0, qword0); ++ } ++ build_comp_status(q_type, info); ++ ++ info->qp_id = (__u32)FIELD_GET(ZXDHCQ_QPID, qword2); ++ info->imm_valid = false; ++ ++ info->qp_handle = (zxdh_qp_handle)(unsigned long)qp; ++ switch (q_type) { ++ case ZXDH_CQE_QTYPE_RQ: ++ if (qp->is_srq) { ++ iwusrq = iwuqp->srq; ++ srq = &iwusrq->srq; ++ zxdh_free_srq_wqe(srq, wqe_idx); ++ info->wr_id = srq->srq_wrid_array[wqe_idx]; ++ zxdh_get_cq_poll_info(qp, info, qword2, qword3); ++ } else { ++ if (unlikely(info->comp_status == ++ ZXDH_COMPL_STATUS_FLUSHED || ++ info->comp_status == ++ ZXDH_COMPL_STATUS_UNKNOWN)) { ++ if (!ZXDH_RING_MORE_WORK(qp->rq_ring)) { ++ ret_code = ZXDH_ERR_Q_EMPTY; ++ goto exit; ++ } ++ wqe_idx = qp->rq_ring.tail; ++ } ++ info->wr_id = qp->rq_wrid_array[wqe_idx]; ++ zxdh_get_cq_poll_info(qp, info, qword2, qword3); ++ ZXDH_RING_SET_TAIL(qp->rq_ring, wqe_idx + 1); ++ if (info->comp_status == ZXDH_COMPL_STATUS_FLUSHED) { ++ qp->rq_flush_seen = true; ++ if (!ZXDH_RING_MORE_WORK(qp->rq_ring)) ++ qp->rq_flush_complete = true; ++ else ++ move_cq_head = false; ++ } ++ pring = &qp->rq_ring; ++ } ++ ret_code = ZXDH_SUCCESS; ++ break; ++ case ZXDH_CQE_QTYPE_SQ: ++ if (info->comp_status == ZXDH_COMPL_STATUS_RETRY_ACK_ERR && ++ qp->qp_type == ZXDH_QP_TYPE_ROCE_RC) { ++ status_code = process_retry_ack_err(qp, info); ++ if (status_code == ZXDH_ERR_RETRY_ACK_ERR) { ++ update_cq_poll_info(qp, info, wqe_idx, qword0); ++ ret_code = ZXDH_SUCCESS; ++ } else { ++ ret_code = status_code; ++ } ++ } else if (info->comp_status == ++ ZXDH_COMPL_STATUS_TX_WINDOW_QUERY_ITEM_ERR && ++ qp->qp_type == ZXDH_QP_TYPE_ROCE_RC) { ++ status_code = ++ process_tx_window_query_item_err(qp, info); ++ if (status_code == ZXDH_ERR_RETRY_ACK_ERR) { ++ update_cq_poll_info(qp, info, wqe_idx, qword0); ++ ret_code = ZXDH_SUCCESS; ++ } else { ++ ret_code = status_code; ++ } ++ } else if (info->comp_status == ZXDH_COMPL_STATUS_FLUSHED) { ++ info->wr_id = qp->sq_wrtrk_array[wqe_idx].wrid; ++ ZXDH_RING_INIT(qp->sq_ring, qp->sq_ring.size); ++ ret_code = ZXDH_SUCCESS; ++ } else { ++ update_cq_poll_info(qp, info, wqe_idx, qword0); ++ ret_code = ZXDH_SUCCESS; ++ } ++ break; ++ default: ++ zxdh_dbg(verbs_get_ctx(iwuqp->vqp.qp.context), ZXDH_DBG_CQ, ++ "zxdh get cqe type unknow!\n"); ++ ret_code = ZXDH_ERR_Q_DESTROYED; ++ break; ++ } ++exit: ++ if (move_cq_head) { ++ __u64 cq_shadow_temp; ++ ++ ZXDH_RING_MOVE_HEAD_NOCHECK(cq->cq_ring); ++ if (!ZXDH_RING_CURRENT_HEAD(cq->cq_ring)) ++ cq->polarity ^= 1; ++ ++ ZXDH_RING_MOVE_TAIL(cq->cq_ring); ++ cq->cqe_rd_cnt++; ++ get_64bit_val(cq->shadow_area, 0, &cq_shadow_temp); ++ cq_shadow_temp &= ~ZXDH_CQ_DBSA_CQEIDX; ++ cq_shadow_temp |= ++ FIELD_PREP(ZXDH_CQ_DBSA_CQEIDX, cq->cqe_rd_cnt); ++ set_64bit_val(cq->shadow_area, 0, cq_shadow_temp); ++ } else { ++ qword0 &= ~ZXDH_CQ_WQEIDX; ++ qword0 |= FIELD_PREP(ZXDH_CQ_WQEIDX, pring->tail); ++ set_64bit_val(cqe, 0, qword0); ++ } ++ ++ return ret_code; ++} ++ ++/** ++ * zxdh_qp_round_up - return round up qp wq depth ++ * @wqdepth: wq depth in quanta to round up ++ */ ++int zxdh_qp_round_up(__u32 wqdepth) ++{ ++ int scount = 1; ++ ++ for (wqdepth--; scount <= 16; scount *= 2) ++ wqdepth |= wqdepth >> scount; ++ ++ return ++wqdepth; ++} ++ ++/** ++ * zxdh_cq_round_up - return round up cq wq depth ++ * @wqdepth: wq depth in quanta to round up ++ */ ++int zxdh_cq_round_up(__u32 wqdepth) ++{ ++ int scount = 1; ++ ++ for (wqdepth--; scount <= 16; scount *= 2) ++ wqdepth |= wqdepth >> scount; ++ ++ return ++wqdepth; ++} ++ ++/** ++ * zxdh_get_rq_wqe_shift - get shift count for maximum rq wqe size ++ * @sge: Maximum Scatter Gather Elements wqe ++ * @shift: Returns the shift needed based on sge ++ * ++ * Shift can be used to left shift the rq wqe size based on number of SGEs. ++ * For 1 SGE, shift = 1 (wqe size of 2*16 bytes). ++ * For 2 or 3 SGEs, shift = 2 (wqe size of 4*16 bytes). ++ * For 4-7 SGE's Shift of 3. ++ * For 8-15 SGE's Shift of 4 otherwise (wqe size of 512 bytes). ++ */ ++void zxdh_get_rq_wqe_shift(__u32 sge, __u8 *shift) ++{ ++ *shift = 0; //16bytes RQE, need to confirm configuration ++ if (sge < 2) ++ *shift = 1; ++ else if (sge < 4) ++ *shift = 2; ++ else if (sge < 8) ++ *shift = 3; ++ else if (sge < 16) ++ *shift = 4; ++ else ++ *shift = 5; ++} ++ ++/** ++ * zxdh_get_sq_wqe_shift - get shift count for maximum wqe size ++ * @sge: Maximum Scatter Gather Elements wqe ++ * @inline_data: Maximum inline data size ++ * @shift: Returns the shift needed based on sge ++ * ++ * Shift can be used to left shift the wqe size based on number of SGEs and inlind data size. ++ * To surport WR with imm_data,shift = 1 (wqe size of 2*32 bytes). ++ * For 2-7 SGEs or 24 < inline data <= 86, shift = 2 (wqe size of 4*32 bytes). ++ * Otherwise (wqe size of 256 bytes). ++ */ ++void zxdh_get_sq_wqe_shift(__u32 sge, __u32 inline_data, __u8 *shift) ++{ ++ *shift = 1; ++ ++ if (sge > 1 || inline_data > 24) { ++ if (sge < 8 && inline_data <= 86) ++ *shift = 2; ++ else ++ *shift = 3; ++ } ++} ++ ++/* ++ * zxdh_get_sqdepth - get SQ depth (quanta) ++ * @dev_attrs: qp HW attributes ++ * @sq_size: SQ size ++ * @shift: shift which determines size of WQE ++ * @sqdepth: depth of SQ ++ * ++ */ ++enum zxdh_status_code zxdh_get_sqdepth(struct zxdh_dev_attrs *dev_attrs, ++ __u32 sq_size, __u8 shift, ++ __u32 *sqdepth) ++{ ++ if (sq_size > ZXDH_MAX_SQ_DEPTH) ++ return ZXDH_ERR_INVALID_SIZE; ++ ++ *sqdepth = zxdh_qp_round_up((sq_size << shift) + ZXDH_SQ_RSVD); ++ ++ if (*sqdepth < (ZXDH_QP_SW_MIN_WQSIZE << shift)) ++ *sqdepth = ZXDH_QP_SW_MIN_WQSIZE << shift; ++ else if (*sqdepth > dev_attrs->max_hw_wq_quanta) ++ return ZXDH_ERR_INVALID_SIZE; ++ ++ return 0; ++} ++ ++/* ++ * zxdh_get_rqdepth - get RQ depth (quanta) ++ * @dev_attrs: qp HW attributes ++ * @rq_size: RQ size ++ * @shift: shift which determines size of WQE ++ * @rqdepth: depth of RQ ++ */ ++enum zxdh_status_code zxdh_get_rqdepth(struct zxdh_dev_attrs *dev_attrs, ++ __u32 rq_size, __u8 shift, ++ __u32 *rqdepth) ++{ ++ *rqdepth = zxdh_qp_round_up((rq_size << shift) + ZXDH_RQ_RSVD); ++ ++ if (*rqdepth < (ZXDH_QP_SW_MIN_WQSIZE << shift)) ++ *rqdepth = ZXDH_QP_SW_MIN_WQSIZE << shift; ++ else if (*rqdepth > dev_attrs->max_hw_rq_quanta) ++ return ZXDH_ERR_INVALID_SIZE; ++ ++ return 0; ++} ++ ++static const struct zxdh_wqe_ops iw_wqe_ops = { ++ .iw_copy_inline_data = zxdh_copy_inline_data, ++ .iw_inline_data_size_to_quanta = zxdh_inline_data_size_to_quanta, ++ .iw_set_fragment = zxdh_set_fragment, ++ .iw_set_mw_bind_wqe = zxdh_set_mw_bind_wqe, ++}; ++ ++/** ++ * zxdh_qp_init - initialize shared qp ++ * @qp: hw qp (user and kernel) ++ * @info: qp initialization info ++ * ++ * initializes the vars used in both user and kernel mode. ++ * size of the wqe depends on numbers of max. fragements ++ * allowed. Then size of wqe * the number of wqes should be the ++ * amount of memory allocated for sq and rq. ++ */ ++enum zxdh_status_code zxdh_qp_init(struct zxdh_qp *qp, ++ struct zxdh_qp_init_info *info) ++{ ++ enum zxdh_status_code ret_code = 0; ++ __u32 sq_ring_size; ++ __u8 sqshift, rqshift; ++ ++ qp->dev_attrs = info->dev_attrs; ++ if (info->max_sq_frag_cnt > qp->dev_attrs->max_hw_wq_frags || ++ info->max_rq_frag_cnt > qp->dev_attrs->max_hw_wq_frags) ++ return ZXDH_ERR_INVALID_FRAG_COUNT; ++ ++ zxdh_get_rq_wqe_shift(info->max_rq_frag_cnt, &rqshift); ++ zxdh_get_sq_wqe_shift(info->max_sq_frag_cnt, info->max_inline_data, ++ &sqshift); ++ ++ qp->qp_caps = info->qp_caps; ++ qp->sq_base = info->sq; ++ qp->rq_base = info->rq; ++ qp->qp_type = info->type; ++ qp->shadow_area = info->shadow_area; ++ set_64bit_val(qp->shadow_area, 0, 0x8000); ++ qp->sq_wrtrk_array = info->sq_wrtrk_array; ++ ++ qp->rq_wrid_array = info->rq_wrid_array; ++ qp->wqe_alloc_db = info->wqe_alloc_db; ++ qp->qp_id = info->qp_id; ++ qp->sq_size = info->sq_size; ++ qp->push_mode = false; ++ qp->max_sq_frag_cnt = info->max_sq_frag_cnt; ++ sq_ring_size = qp->sq_size << sqshift; ++ ZXDH_RING_INIT(qp->sq_ring, sq_ring_size); ++ ZXDH_RING_INIT(qp->initial_ring, sq_ring_size); ++ qp->swqe_polarity = 0; ++ qp->swqe_polarity_deferred = 1; ++ qp->rwqe_polarity = 0; ++ qp->rwqe_signature = 0; ++ qp->rq_size = info->rq_size; ++ qp->max_rq_frag_cnt = info->max_rq_frag_cnt; ++ qp->max_inline_data = (info->max_inline_data == 0) ? ++ ZXDH_MAX_INLINE_DATA_SIZE : ++ info->max_inline_data; ++ qp->rq_wqe_size = rqshift; ++ ZXDH_RING_INIT(qp->rq_ring, qp->rq_size); ++ qp->rq_wqe_size_multiplier = 1 << rqshift; ++ qp->wqe_ops = iw_wqe_ops; ++ return ret_code; ++} ++ ++/** ++ * zxdh_cq_init - initialize shared cq (user and kernel) ++ * @cq: hw cq ++ * @info: hw cq initialization info ++ */ ++enum zxdh_status_code zxdh_cq_init(struct zxdh_cq *cq, ++ struct zxdh_cq_init_info *info) ++{ ++ cq->cq_base = info->cq_base; ++ cq->cq_id = info->cq_id; ++ cq->cq_size = info->cq_size; ++ cq->cqe_alloc_db = info->cqe_alloc_db; ++ cq->cq_ack_db = info->cq_ack_db; ++ cq->shadow_area = info->shadow_area; ++ cq->cqe_size = info->cqe_size; ++ ZXDH_RING_INIT(cq->cq_ring, cq->cq_size); ++ cq->polarity = 1; ++ cq->cqe_rd_cnt = 0; ++ ++ return 0; ++} ++ ++/** ++ * zxdh_clean_cq - clean cq entries ++ * @q: completion context ++ * @cq: cq to clean ++ */ ++void zxdh_clean_cq(void *q, struct zxdh_cq *cq) ++{ ++ __le64 *cqe; ++ __u64 qword3, comp_ctx; ++ __u32 cq_head; ++ __u8 polarity, temp; ++ ++ cq_head = cq->cq_ring.head; ++ temp = cq->polarity; ++ do { ++ if (cq->cqe_size) ++ cqe = ((struct zxdh_extended_cqe ++ *)(cq->cq_base))[cq_head] ++ .buf; ++ else ++ cqe = cq->cq_base[cq_head].buf; ++ get_64bit_val(cqe, 24, &qword3); ++ polarity = (__u8)FIELD_GET(ZXDH_CQ_VALID, qword3); ++ ++ if (polarity != temp) ++ break; ++ ++ get_64bit_val(cqe, 8, &comp_ctx); ++ if ((void *)(uintptr_t)comp_ctx == q) ++ set_64bit_val(cqe, 8, 0); ++ ++ cq_head = (cq_head + 1) % cq->cq_ring.size; ++ if (!cq_head) ++ temp ^= 1; ++ } while (true); ++} ++ ++/** ++ * zxdh_nop - post a nop ++ * @qp: hw qp ptr ++ * @wr_id: work request id ++ * @signaled: signaled for completion ++ * @post_sq: ring doorbell ++ */ ++enum zxdh_status_code zxdh_nop(struct zxdh_qp *qp, __u64 wr_id, bool signaled, ++ bool post_sq) ++{ ++ __le64 *wqe; ++ __u64 hdr; ++ __u32 wqe_idx; ++ struct zxdh_post_sq_info info = {}; ++ ++ info.push_wqe = false; ++ info.wr_id = wr_id; ++ wqe = zxdh_qp_get_next_send_wqe(qp, &wqe_idx, ZXDH_QP_WQE_MIN_QUANTA, 0, ++ &info); ++ if (!wqe) ++ return ZXDH_ERR_QP_TOOMANY_WRS_POSTED; ++ ++ zxdh_clr_wqes(qp, wqe_idx); ++ ++ set_64bit_val(wqe, 0, 0); ++ set_64bit_val(wqe, 8, 0); ++ set_64bit_val(wqe, 16, 0); ++ ++ hdr = FIELD_PREP(ZXDHQPSQ_OPCODE, ZXDHQP_OP_NOP) | ++ FIELD_PREP(ZXDHQPSQ_SIGCOMPL, signaled) | ++ FIELD_PREP(ZXDHQPSQ_VALID, qp->swqe_polarity); ++ ++ udma_to_device_barrier(); /* make sure WQE is populated before valid bit is set */ ++ ++ set_64bit_val(wqe, 24, hdr); ++ if (post_sq) ++ zxdh_qp_post_wr(qp); ++ ++ return 0; ++} ++ ++/** ++ * zxdh_fragcnt_to_quanta_sq - calculate quanta based on fragment count for SQ ++ * @frag_cnt: number of fragments ++ * @quanta: quanta for frag_cnt ++ */ ++enum zxdh_status_code zxdh_fragcnt_to_quanta_sq(__u32 frag_cnt, __u16 *quanta) ++{ ++ if (frag_cnt > ZXDH_MAX_SQ_FRAG) ++ return ZXDH_ERR_INVALID_FRAG_COUNT; ++ *quanta = frag_cnt / 2 + 1; ++ return 0; ++} ++ ++/** ++ * zxdh_fragcnt_to_wqesize_rq - calculate wqe size based on fragment count for RQ ++ * @frag_cnt: number of fragments ++ * @wqe_size: size in bytes given frag_cnt ++ */ ++enum zxdh_status_code zxdh_fragcnt_to_wqesize_rq(__u32 frag_cnt, ++ __u16 *wqe_size) ++{ ++ switch (frag_cnt) { ++ case 0: ++ case 1: ++ *wqe_size = 32; ++ break; ++ case 2: ++ case 3: ++ *wqe_size = 64; ++ break; ++ case 4: ++ case 5: ++ case 6: ++ case 7: ++ *wqe_size = 128; ++ break; ++ case 8: ++ case 9: ++ case 10: ++ case 11: ++ case 12: ++ case 13: ++ case 14: ++ *wqe_size = 256; ++ break; ++ default: ++ return ZXDH_ERR_INVALID_FRAG_COUNT; ++ } ++ ++ return 0; ++} ++ ++/** ++ * zxdh_get_srq_wqe_shift - get shift count for maximum srq wqe size ++ * @dev_attrs: srq HW attributes ++ * @sge: Maximum Scatter Gather Elements wqe ++ * @shift: Returns the shift needed based on sge ++ * ++ * Shift can be used to left shift the srq wqe size based on number of SGEs. ++ * For 1 SGE, shift = 1 (wqe size of 2*16 bytes). ++ * For 2 or 3 SGEs, shift = 2 (wqe size of 4*16 bytes). ++ * For 4-7 SGE's Shift of 3. ++ * For 8-15 SGE's Shift of 4 otherwise (wqe size of 512 bytes). ++ */ ++void zxdh_get_srq_wqe_shift(struct zxdh_dev_attrs *dev_attrs, __u32 sge, ++ __u8 *shift) ++{ ++ *shift = 0; //16bytes RQE, need to confirm configuration ++ if (sge < 2) ++ *shift = 1; ++ else if (sge < 4) ++ *shift = 2; ++ else if (sge < 8) ++ *shift = 3; ++ else if (sge < 16) ++ *shift = 4; ++ else ++ *shift = 5; ++} ++ ++/* ++ * zxdh_get_srqdepth - get SRQ depth (quanta) ++ * @max_hw_rq_quanta: HW SRQ size limit ++ * @srq_size: SRQ size ++ * @shift: shift which determines size of WQE ++ * @srqdepth: depth of SRQ ++ */ ++int zxdh_get_srqdepth(__u32 max_hw_srq_quanta, __u32 srq_size, __u8 shift, ++ __u32 *srqdepth) ++{ ++ *srqdepth = zxdh_qp_round_up((srq_size << shift) + ZXDH_SRQ_RSVD); ++ ++ if (*srqdepth < (ZXDH_QP_SW_MIN_WQSIZE << shift)) ++ *srqdepth = ZXDH_QP_SW_MIN_WQSIZE << shift; ++ else if ((*srqdepth >> shift) > max_hw_srq_quanta) ++ return ZXDH_ERR_INVALID_SIZE; ++ ++ return 0; ++} ++ ++__le64 *zxdh_get_srq_wqe(struct zxdh_srq *srq, int wqe_index) ++{ ++ __le64 *wqe; ++ ++ wqe = srq->srq_base[wqe_index * srq->srq_wqe_size_multiplier].elem; ++ return wqe; ++} ++ ++__le16 *zxdh_get_srq_list_wqe(struct zxdh_srq *srq, __u16 *idx) ++{ ++ __le16 *wqe; ++ __u16 wqe_idx; ++ ++ wqe_idx = srq->srq_list_ring.tail; ++ srq->srq_list_ring.tail++; ++ srq->srq_list_ring.tail %= srq->srq_list_ring.size; ++ *idx = srq->srq_list_ring.tail; ++ ++ if (!(*idx)) ++ srq->srq_list_polarity = !srq->srq_list_polarity; ++ ++ wqe = &srq->srq_list_base[wqe_idx]; ++ ++ return wqe; ++} ++ ++/** ++ * zxdh_srq_init - initialize srq ++ * @srq: hw srq (user and kernel) ++ * @info: srq initialization info ++ * ++ * initializes the vars used in both user and kernel mode. ++ * size of the wqe depends on numbers of max. fragements ++ * allowed. Then size of wqe * the number of wqes should be the ++ * amount of memory allocated for srq. ++ */ ++enum zxdh_status_code zxdh_srq_init(struct zxdh_srq *srq, ++ struct zxdh_srq_init_info *info) ++{ ++ __u32 srq_ring_size; ++ __u8 srqshift; ++ ++ srq->dev_attrs = info->dev_attrs; ++ if (info->max_srq_frag_cnt > srq->dev_attrs->max_hw_wq_frags) ++ return -ZXDH_ERR_INVALID_FRAG_COUNT; ++ zxdh_get_srq_wqe_shift(srq->dev_attrs, info->max_srq_frag_cnt, ++ &srqshift); ++ srq->srq_base = info->srq_base; ++ srq->srq_list_base = info->srq_list_base; ++ srq->srq_db_base = info->srq_db_base; ++ srq->srq_wrid_array = info->srq_wrid_array; ++ srq->srq_id = info->srq_id; ++ srq->srq_size = info->srq_size; ++ srq->log2_srq_size = info->log2_srq_size; ++ srq->srq_list_size = info->srq_list_size; ++ srq->max_srq_frag_cnt = info->max_srq_frag_cnt; ++ srq_ring_size = srq->srq_size; ++ srq->srq_wqe_size = srqshift; ++ srq->srq_wqe_size_multiplier = 1 << srqshift; ++ ZXDH_RING_INIT(srq->srq_ring, srq_ring_size); ++ ZXDH_RING_INIT(srq->srq_list_ring, srq->srq_list_size); ++ srq->srq_ring.tail = srq->srq_size - 1; ++ srq->srq_list_polarity = 1; ++ return 0; ++} ++ ++void zxdh_free_srq_wqe(struct zxdh_srq *srq, int wqe_index) ++{ ++ struct zxdh_usrq *iwusrq; ++ __le64 *wqe; ++ __u64 hdr; ++ ++ iwusrq = container_of(srq, struct zxdh_usrq, srq); ++ /* always called with interrupts disabled. */ ++ pthread_spin_lock(&iwusrq->lock); ++ wqe = zxdh_get_srq_wqe(srq, srq->srq_ring.tail); ++ srq->srq_ring.tail = wqe_index; ++ hdr = FIELD_PREP(ZXDHQPSRQ_NEXT_WQE_INDEX, wqe_index); ++ ++ udma_to_device_barrier(); /* make sure WQE is populated before valid bit is set */ ++ set_64bit_val(wqe, 0, hdr); ++ ++ pthread_spin_unlock(&iwusrq->lock); ++} +diff --git a/providers/zrdma/zxdh_status.h b/providers/zrdma/zxdh_status.h +new file mode 100644 +index 0000000..d9e9f04 +--- /dev/null ++++ b/providers/zrdma/zxdh_status.h +@@ -0,0 +1,75 @@ ++/* SPDX-License-Identifier: GPL-2.0 or Linux-OpenIB */ ++/* Copyright (c) 2024 ZTE Corporation. All rights reserved. */ ++#ifndef ZXDH_STATUS_H ++#define ZXDH_STATUS_H ++ ++/* Error Codes */ ++enum zxdh_status_code { ++ ZXDH_SUCCESS = 0, ++ ZXDH_ERR_NVM = -1, ++ ZXDH_ERR_NVM_CHECKSUM = -2, ++ ZXDH_ERR_CFG = -4, ++ ZXDH_ERR_PARAM = -5, ++ ZXDH_ERR_DEVICE_NOT_SUPPORTED = -6, ++ ZXDH_ERR_RESET_FAILED = -7, ++ ZXDH_ERR_SWFW_SYNC = -8, ++ ZXDH_ERR_NO_MEMORY = -9, ++ ZXDH_ERR_BAD_PTR = -10, ++ ZXDH_ERR_INVALID_PD_ID = -11, ++ ZXDH_ERR_INVALID_QP_ID = -12, ++ ZXDH_ERR_INVALID_CQ_ID = -13, ++ ZXDH_ERR_INVALID_CEQ_ID = -14, ++ ZXDH_ERR_INVALID_AEQ_ID = -15, ++ ZXDH_ERR_INVALID_SIZE = -16, ++ ZXDH_ERR_INVALID_ARP_INDEX = -17, ++ ZXDH_ERR_INVALID_FPM_FUNC_ID = -18, ++ ZXDH_ERR_QP_INVALID_MSG_SIZE = -19, ++ ZXDH_ERR_QP_TOOMANY_WRS_POSTED = -20, ++ ZXDH_ERR_INVALID_FRAG_COUNT = -21, ++ ZXDH_ERR_Q_EMPTY = -22, ++ ZXDH_ERR_INVALID_ALIGNMENT = -23, ++ ZXDH_ERR_FLUSHED_Q = -24, ++ ZXDH_ERR_INVALID_PUSH_PAGE_INDEX = -25, ++ ZXDH_ERR_INVALID_INLINE_DATA_SIZE = -26, ++ ZXDH_ERR_TIMEOUT = -27, ++ ZXDH_ERR_OPCODE_MISMATCH = -28, ++ ZXDH_ERR_CQP_COMPL_ERROR = -29, ++ ZXDH_ERR_INVALID_VF_ID = -30, ++ ZXDH_ERR_INVALID_HMCFN_ID = -31, ++ ZXDH_ERR_BACKING_PAGE_ERROR = -32, ++ ZXDH_ERR_NO_PBLCHUNKS_AVAILABLE = -33, ++ ZXDH_ERR_INVALID_PBLE_INDEX = -34, ++ ZXDH_ERR_INVALID_SD_INDEX = -35, ++ ZXDH_ERR_INVALID_PAGE_DESC_INDEX = -36, ++ ZXDH_ERR_INVALID_SD_TYPE = -37, ++ ZXDH_ERR_MEMCPY_FAILED = -38, ++ ZXDH_ERR_INVALID_HMC_OBJ_INDEX = -39, ++ ZXDH_ERR_INVALID_HMC_OBJ_COUNT = -40, ++ ZXDH_ERR_BUF_TOO_SHORT = -43, ++ ZXDH_ERR_BAD_IWARP_CQE = -44, ++ ZXDH_ERR_NVM_BLANK_MODE = -45, ++ ZXDH_ERR_NOT_IMPL = -46, ++ ZXDH_ERR_PE_DOORBELL_NOT_ENA = -47, ++ ZXDH_ERR_NOT_READY = -48, ++ ZXDH_NOT_SUPPORTED = -49, ++ ZXDH_ERR_FIRMWARE_API_VER = -50, ++ ZXDH_ERR_RING_FULL = -51, ++ ZXDH_ERR_MPA_CRC = -61, ++ ZXDH_ERR_NO_TXBUFS = -62, ++ ZXDH_ERR_SEQ_NUM = -63, ++ ZXDH_ERR_LIST_EMPTY = -64, ++ ZXDH_ERR_INVALID_MAC_ADDR = -65, ++ ZXDH_ERR_BAD_STAG = -66, ++ ZXDH_ERR_CQ_COMPL_ERROR = -67, ++ ZXDH_ERR_Q_DESTROYED = -68, ++ ZXDH_ERR_INVALID_FEAT_CNT = -69, ++ ZXDH_ERR_REG_CQ_FULL = -70, ++ ZXDH_ERR_VF_MSG_ERROR = -71, ++ ZXDH_ERR_NO_INTR = -72, ++ ZXDH_ERR_REG_QSET = -73, ++ ZXDH_ERR_FEATURES_OP = -74, ++ ZXDH_ERR_INVALID_FRAG_LEN = -75, ++ ZXDH_ERR_RETRY_ACK_ERR = -76, ++ ZXDH_ERR_RETRY_ACK_NOT_EXCEED_ERR = -77, ++}; ++#endif /* ZXDH_STATUS_H */ +diff --git a/providers/zrdma/zxdh_verbs.c b/providers/zrdma/zxdh_verbs.c +new file mode 100644 +index 0000000..93cf705 +--- /dev/null ++++ b/providers/zrdma/zxdh_verbs.c +@@ -0,0 +1,3185 @@ ++// SPDX-License-Identifier: GPL-2.0 or Linux-OpenIB ++/* Copyright (c) 2024 ZTE Corporation. All rights reserved. */ ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++ ++#include "main.h" ++#include "zxdh_abi.h" ++ ++uint32_t zxdh_debug_mask; ++ ++static const unsigned int zxdh_roce_mtu[] = { ++ [IBV_MTU_256] = 256, [IBV_MTU_512] = 512, [IBV_MTU_1024] = 1024, ++ [IBV_MTU_2048] = 2048, [IBV_MTU_4096] = 4096, ++}; ++ ++static inline unsigned int mtu_enum_to_int(enum ibv_mtu mtu) ++{ ++ return zxdh_roce_mtu[mtu]; ++} ++ ++static inline void print_fw_ver(uint64_t fw_ver, char *str, size_t len) ++{ ++ uint16_t major, minor, sub_minor, sub_major; ++ ++ major = (fw_ver >> 48) & 0xffff; ++ sub_major = (fw_ver >> 32) & 0xffff; ++ minor = (fw_ver >> 16) & 0xffff; ++ sub_minor = fw_ver & 0xffff; ++ snprintf(str, len, "%d.%02d.%02d.%02d", major, sub_major, minor, ++ sub_minor); ++} ++ ++/** ++ * zxdh_get_inline_data - get inline_multi_sge data ++ * @inline_data: uint8_t* ++ * @ib_wr: work request ptr ++ * @len: sge total length ++ */ ++static int zxdh_get_inline_data(uint8_t *inline_data, struct ibv_send_wr *ib_wr, ++ __u32 *len) ++{ ++ int num = 0; ++ int offset = 0; ++ ++ while (num < ib_wr->num_sge) { ++ *len += ib_wr->sg_list[num].length; ++ if (*len > ZXDH_MAX_INLINE_DATA_SIZE) { ++ return -EINVAL; ++ } ++ memcpy(inline_data + offset, ++ (void *)(uintptr_t)ib_wr->sg_list[num].addr, ++ ib_wr->sg_list[num].length); ++ offset += ib_wr->sg_list[num].length; ++ num++; ++ } ++ return 0; ++} ++ ++/** ++ * zxdh_uquery_device_ex - query device attributes including extended properties ++ * @context: user context for the device ++ * @input: extensible input struct for ibv_query_device_ex verb ++ * @attr: extended device attribute struct ++ * @attr_size: size of extended device attribute struct ++ **/ ++int zxdh_uquery_device_ex(struct ibv_context *context, ++ const struct ibv_query_device_ex_input *input, ++ struct ibv_device_attr_ex *attr, size_t attr_size) ++{ ++ struct ib_uverbs_ex_query_device_resp resp = {}; ++ size_t resp_size = sizeof(resp); ++ int ret; ++ ++ ret = ibv_cmd_query_device_any(context, input, attr, attr_size, &resp, ++ &resp_size); ++ if (ret) ++ return ret; ++ ++ print_fw_ver(resp.base.fw_ver, attr->orig_attr.fw_ver, ++ sizeof(attr->orig_attr.fw_ver)); ++ ++ return 0; ++} ++ ++/** ++ * zxdh_uquery_port - get port attributes (msg size, lnk, mtu...) ++ * @context: user context of the device ++ * @port: port for the attributes ++ * @attr: to return port attributes ++ **/ ++int zxdh_uquery_port(struct ibv_context *context, uint8_t port, ++ struct ibv_port_attr *attr) ++{ ++ struct ibv_query_port cmd; ++ ++ return ibv_cmd_query_port(context, port, attr, &cmd, sizeof(cmd)); ++} ++ ++/** ++ * zxdh_ualloc_pd - allocates protection domain and return pd ptr ++ * @context: user context of the device ++ **/ ++struct ibv_pd *zxdh_ualloc_pd(struct ibv_context *context) ++{ ++ struct ibv_alloc_pd cmd; ++ struct zxdh_ualloc_pd_resp resp = {}; ++ struct zxdh_upd *iwupd; ++ int err; ++ ++ iwupd = malloc(sizeof(*iwupd)); ++ if (!iwupd) ++ return NULL; ++ ++ err = ibv_cmd_alloc_pd(context, &iwupd->ibv_pd, &cmd, sizeof(cmd), ++ &resp.ibv_resp, sizeof(resp)); ++ if (err) ++ goto err_free; ++ ++ iwupd->pd_id = resp.pd_id; ++ ++ return &iwupd->ibv_pd; ++ ++err_free: ++ free(iwupd); ++ errno = err; ++ return NULL; ++} ++ ++/** ++ * zxdh_ufree_pd - free pd resources ++ * @pd: pd to free resources ++ */ ++int zxdh_ufree_pd(struct ibv_pd *pd) ++{ ++ struct zxdh_upd *iwupd; ++ int ret; ++ ++ iwupd = container_of(pd, struct zxdh_upd, ibv_pd); ++ ret = ibv_cmd_dealloc_pd(pd); ++ if (ret) ++ return ret; ++ ++ free(iwupd); ++ ++ return 0; ++} ++ ++/** ++ * zxdh_ureg_mr - register user memory region ++ * @pd: pd for the mr ++ * @addr: user address of the memory region ++ * @length: length of the memory ++ * @hca_va: hca_va ++ * @access: access allowed on this mr ++ */ ++struct ibv_mr *zxdh_ureg_mr(struct ibv_pd *pd, void *addr, size_t length, ++ uint64_t hca_va, int access) ++{ ++ struct zxdh_umr *umr; ++ struct zxdh_ureg_mr cmd; ++ struct zxdh_ureg_mr_resp resp = {}; ++ int err; ++ ++ umr = malloc(sizeof(*umr)); ++ if (!umr) ++ return NULL; ++ ++ cmd.reg_type = ZXDH_MEMREG_TYPE_MEM; ++ err = ibv_cmd_reg_mr(pd, addr, length, hca_va, access, &umr->vmr, ++ &cmd.ibv_cmd, sizeof(cmd), &resp.ibv_resp, ++ sizeof(resp)); ++ if (err) { ++ free(umr); ++ errno = err; ++ return NULL; ++ } ++ umr->acc_flags = access; ++ umr->host_page_size = resp.host_page_size; ++ umr->leaf_pbl_size = resp.leaf_pbl_size; ++ umr->mr_pa_pble_index = resp.mr_pa_hig; ++ umr->mr_pa_pble_index = (umr->mr_pa_pble_index << 32) | resp.mr_pa_low; ++ ++ return &umr->vmr.ibv_mr; ++} ++ ++/* ++ * zxdh_urereg_mr - re-register memory region ++ * @vmr: mr that was allocated ++ * @flags: bit mask to indicate which of the attr's of MR modified ++ * @pd: pd of the mr ++ * @addr: user address of the memory region ++ * @length: length of the memory ++ * @access: access allowed on this mr ++ */ ++int zxdh_urereg_mr(struct verbs_mr *vmr, int flags, struct ibv_pd *pd, ++ void *addr, size_t length, int access) ++{ ++ struct zxdh_urereg_mr cmd = {}; ++ struct ib_uverbs_rereg_mr_resp resp; ++ ++ cmd.reg_type = ZXDH_MEMREG_TYPE_MEM; ++ return ibv_cmd_rereg_mr(vmr, flags, addr, length, (uintptr_t)addr, ++ access, pd, &cmd.ibv_cmd, sizeof(cmd), &resp, ++ sizeof(resp)); ++} ++ ++/** ++ * zxdh_udereg_mr - re-register memory region ++ * @vmr: mr that was allocated ++ */ ++int zxdh_udereg_mr(struct verbs_mr *vmr) ++{ ++ int ret; ++ ++ ret = ibv_cmd_dereg_mr(vmr); ++ if (ret) ++ return ret; ++ ++ free(vmr); ++ ++ return 0; ++} ++ ++/** ++ * zxdh_ualloc_mw - allocate memory window ++ * @pd: protection domain ++ * @type: memory window type ++ */ ++struct ibv_mw *zxdh_ualloc_mw(struct ibv_pd *pd, enum ibv_mw_type type) ++{ ++ struct ibv_mw *mw; ++ struct ibv_alloc_mw cmd; ++ struct ib_uverbs_alloc_mw_resp resp; ++ ++ mw = calloc(1, sizeof(*mw)); ++ if (!mw) ++ return NULL; ++ ++ if (ibv_cmd_alloc_mw(pd, type, mw, &cmd, sizeof(cmd), &resp, ++ sizeof(resp))) { ++ free(mw); ++ return NULL; ++ } ++ ++ return mw; ++} ++ ++/** ++ * zxdh_ubind_mw - bind a memory window ++ * @qp: qp to post WR ++ * @mw: memory window to bind ++ * @mw_bind: bind info ++ */ ++int zxdh_ubind_mw(struct ibv_qp *qp, struct ibv_mw *mw, ++ struct ibv_mw_bind *mw_bind) ++{ ++ struct ibv_mw_bind_info *bind_info = &mw_bind->bind_info; ++ struct verbs_mr *vmr = verbs_get_mr(bind_info->mr); ++ struct zxdh_umr *umr = container_of(vmr, struct zxdh_umr, vmr); ++ struct ibv_send_wr wr = {}; ++ struct ibv_send_wr *bad_wr; ++ int err; ++ ++ if (vmr->mr_type != IBV_MR_TYPE_MR) ++ return -ENOTSUP; ++ ++ if (umr->acc_flags & IBV_ACCESS_ZERO_BASED) ++ return -EINVAL; ++ ++ if (mw->type != IBV_MW_TYPE_1) ++ return -EINVAL; ++ ++ wr.opcode = IBV_WR_BIND_MW; ++ wr.bind_mw.bind_info = mw_bind->bind_info; ++ wr.bind_mw.mw = mw; ++ wr.bind_mw.rkey = ibv_inc_rkey(mw->rkey); ++ ++ wr.wr_id = mw_bind->wr_id; ++ wr.send_flags = mw_bind->send_flags; ++ ++ err = zxdh_upost_send(qp, &wr, &bad_wr); ++ if (!err) ++ mw->rkey = wr.bind_mw.rkey; ++ ++ return err; ++} ++ ++/** ++ * zxdh_udealloc_mw - deallocate memory window ++ * @mw: memory window to dealloc ++ */ ++int zxdh_udealloc_mw(struct ibv_mw *mw) ++{ ++ int ret; ++ ++ ret = ibv_cmd_dealloc_mw(mw); ++ if (ret) ++ return ret; ++ free(mw); ++ ++ return 0; ++} ++ ++static void *zxdh_alloc_hw_buf(size_t size) ++{ ++ void *buf; ++ ++ buf = memalign(ZXDH_HW_PAGE_SIZE, size); ++ ++ if (!buf) ++ return NULL; ++ if (ibv_dontfork_range(buf, size)) { ++ free(buf); ++ return NULL; ++ } ++ ++ return buf; ++} ++ ++static void zxdh_free_hw_buf(void *buf, size_t size) ++{ ++ ibv_dofork_range(buf, size); ++ free(buf); ++} ++ ++/** ++ * get_cq_size - returns actual cqe needed by HW ++ * @ncqe: minimum cqes requested by application ++ */ ++static inline int get_cq_size(int ncqe) ++{ ++ ncqe++; ++ ++ /* Completions with immediate require 1 extra entry */ ++ if (ncqe < ZXDH_U_MINCQ_SIZE) ++ ncqe = ZXDH_U_MINCQ_SIZE; ++ ++ return ncqe; ++} ++ ++static inline size_t get_cq_total_bytes(__u32 cq_size) ++{ ++ return roundup(cq_size * sizeof(struct zxdh_cqe), ZXDH_HW_PAGE_SIZE); ++} ++ ++/** ++ * ucreate_cq - zxdh util function to create a CQ ++ * @context: ibv context ++ * @attr_ex: CQ init attributes ++ * @ext_cq: flag to create an extendable or normal CQ ++ */ ++static struct ibv_cq_ex *ucreate_cq(struct ibv_context *context, ++ struct ibv_cq_init_attr_ex *attr_ex, ++ bool ext_cq) ++{ ++ struct zxdh_cq_init_info info = {}; ++ struct zxdh_ureg_mr reg_mr_cmd = {}; ++ struct zxdh_ucreate_cq_ex cmd = {}; ++ struct zxdh_ucreate_cq_ex_resp resp = {}; ++ struct ib_uverbs_reg_mr_resp reg_mr_resp = {}; ++ struct zxdh_ureg_mr reg_mr_shadow_cmd = {}; ++ struct ib_uverbs_reg_mr_resp reg_mr_shadow_resp = {}; ++ struct zxdh_dev_attrs *dev_attrs; ++ struct zxdh_uvcontext *iwvctx; ++ struct zxdh_ucq *iwucq; ++ size_t total_size; ++ __u32 cq_pages; ++ int ret, ncqe; ++ ++ iwvctx = container_of(context, struct zxdh_uvcontext, ibv_ctx.context); ++ dev_attrs = &iwvctx->dev_attrs; ++ ++ if (attr_ex->cqe < ZXDH_MIN_CQ_SIZE || ++ attr_ex->cqe > dev_attrs->max_hw_cq_size) { ++ errno = EINVAL; ++ return NULL; ++ } ++ ++ /* save the cqe requested by application */ ++ ncqe = attr_ex->cqe; ++ iwucq = calloc(1, sizeof(*iwucq)); ++ if (!iwucq) ++ return NULL; ++ ++ ret = pthread_spin_init(&iwucq->lock, PTHREAD_PROCESS_PRIVATE); ++ if (ret) { ++ errno = ret; ++ free(iwucq); ++ return NULL; ++ } ++ ++ iwucq->resize_enable = false; ++ info.cq_size = get_cq_size(attr_ex->cqe); ++ info.cq_size = zxdh_cq_round_up(info.cq_size); ++ iwucq->comp_vector = attr_ex->comp_vector; ++ list_head_init(&iwucq->resize_list); ++ total_size = get_cq_total_bytes(info.cq_size); ++ cq_pages = total_size >> ZXDH_HW_PAGE_SHIFT; ++ ++ if (!(dev_attrs->feature_flags & ZXDH_FEATURE_CQ_RESIZE)) ++ total_size = (cq_pages << ZXDH_HW_PAGE_SHIFT) + ++ ZXDH_DB_SHADOW_AREA_SIZE; ++ ++ iwucq->buf_size = total_size; ++ info.cq_base = zxdh_alloc_hw_buf(total_size); ++ if (!info.cq_base) ++ goto err_cq_base; ++ ++ memset(info.cq_base, 0, total_size); ++ reg_mr_cmd.reg_type = ZXDH_MEMREG_TYPE_CQ; ++ reg_mr_cmd.cq_pages = cq_pages; ++ ++ ret = ibv_cmd_reg_mr(&iwvctx->iwupd->ibv_pd, info.cq_base, total_size, ++ (uintptr_t)info.cq_base, IBV_ACCESS_LOCAL_WRITE, ++ &iwucq->vmr, ®_mr_cmd.ibv_cmd, ++ sizeof(reg_mr_cmd), ®_mr_resp, ++ sizeof(reg_mr_resp)); ++ if (ret) { ++ errno = ret; ++ goto err_dereg_mr; ++ } ++ ++ iwucq->vmr.ibv_mr.pd = &iwvctx->iwupd->ibv_pd; ++ ++ if (dev_attrs->feature_flags & ZXDH_FEATURE_CQ_RESIZE) { ++ info.shadow_area = zxdh_alloc_hw_buf(ZXDH_DB_SHADOW_AREA_SIZE); ++ if (!info.shadow_area) ++ goto err_dereg_mr; ++ ++ memset(info.shadow_area, 0, ZXDH_DB_SHADOW_AREA_SIZE); ++ reg_mr_shadow_cmd.reg_type = ZXDH_MEMREG_TYPE_CQ; ++ reg_mr_shadow_cmd.cq_pages = 1; ++ ++ ret = ibv_cmd_reg_mr( ++ &iwvctx->iwupd->ibv_pd, info.shadow_area, ++ ZXDH_DB_SHADOW_AREA_SIZE, (uintptr_t)info.shadow_area, ++ IBV_ACCESS_LOCAL_WRITE, &iwucq->vmr_shadow_area, ++ ®_mr_shadow_cmd.ibv_cmd, sizeof(reg_mr_shadow_cmd), ++ ®_mr_shadow_resp, sizeof(reg_mr_shadow_resp)); ++ if (ret) { ++ errno = ret; ++ goto err_dereg_shadow; ++ } ++ ++ iwucq->vmr_shadow_area.ibv_mr.pd = &iwvctx->iwupd->ibv_pd; ++ ++ } else { ++ info.shadow_area = (__le64 *)((__u8 *)info.cq_base + ++ (cq_pages << ZXDH_HW_PAGE_SHIFT)); ++ } ++ ++ attr_ex->cqe = info.cq_size; ++ cmd.user_cq_buf = (__u64)((uintptr_t)info.cq_base); ++ cmd.user_shadow_area = (__u64)((uintptr_t)info.shadow_area); ++ ++ ret = ibv_cmd_create_cq_ex(context, attr_ex, &iwucq->verbs_cq, ++ &cmd.ibv_cmd, sizeof(cmd), &resp.ibv_resp, ++ sizeof(resp), 0); ++ if (ret) { ++ errno = ret; ++ goto err_dereg_shadow; ++ } ++ ++ if (ext_cq) ++ zxdh_ibvcq_ex_fill_priv_funcs(iwucq, attr_ex); ++ info.cq_id = resp.cq_id; ++ /* Do not report the cqe's burned by HW */ ++ iwucq->verbs_cq.cq.cqe = ncqe; ++ ++ info.cqe_alloc_db = ++ (__u32 *)((__u8 *)iwvctx->cq_db + ZXDH_DB_CQ_OFFSET); ++ zxdh_cq_init(&iwucq->cq, &info); ++ ++ return &iwucq->verbs_cq.cq_ex; ++ ++err_dereg_shadow: ++ ibv_cmd_dereg_mr(&iwucq->vmr); ++ if (iwucq->vmr_shadow_area.ibv_mr.handle) { ++ ibv_cmd_dereg_mr(&iwucq->vmr_shadow_area); ++ zxdh_free_hw_buf(info.shadow_area, ZXDH_DB_SHADOW_AREA_SIZE); ++ } ++err_dereg_mr: ++ zxdh_free_hw_buf(info.cq_base, total_size); ++err_cq_base: ++ pthread_spin_destroy(&iwucq->lock); ++ ++ free(iwucq); ++ ++ return NULL; ++} ++ ++struct ibv_cq *zxdh_ucreate_cq(struct ibv_context *context, int cqe, ++ struct ibv_comp_channel *channel, ++ int comp_vector) ++{ ++ struct ibv_cq_init_attr_ex attr_ex = { ++ .cqe = cqe, ++ .channel = channel, ++ .comp_vector = comp_vector, ++ }; ++ struct ibv_cq_ex *ibvcq_ex; ++ ++ ibvcq_ex = ucreate_cq(context, &attr_ex, false); ++ ++ return ibvcq_ex ? ibv_cq_ex_to_cq(ibvcq_ex) : NULL; ++} ++ ++struct ibv_cq_ex *zxdh_ucreate_cq_ex(struct ibv_context *context, ++ struct ibv_cq_init_attr_ex *attr_ex) ++{ ++ if (attr_ex->wc_flags & ~ZXDH_CQ_SUPPORTED_WC_FLAGS) { ++ errno = EOPNOTSUPP; ++ return NULL; ++ } ++ ++ return ucreate_cq(context, attr_ex, true); ++} ++ ++/** ++ * zxdh_free_cq_buf - free memory for cq buffer ++ * @cq_buf: cq buf to free ++ */ ++static void zxdh_free_cq_buf(struct zxdh_cq_buf *cq_buf) ++{ ++ ibv_cmd_dereg_mr(&cq_buf->vmr); ++ zxdh_free_hw_buf(cq_buf->cq.cq_base, ++ get_cq_total_bytes(cq_buf->cq.cq_size)); ++ free(cq_buf); ++} ++ ++/** ++ * zxdh_process_resize_list - process the cq list to remove buffers ++ * @iwucq: cq which owns the list ++ * @lcqe_buf: cq buf where the last cqe is found ++ */ ++static int zxdh_process_resize_list(struct zxdh_ucq *iwucq, ++ struct zxdh_cq_buf *lcqe_buf) ++{ ++ struct zxdh_cq_buf *cq_buf, *next; ++ int cq_cnt = 0; ++ ++ list_for_each_safe (&iwucq->resize_list, cq_buf, next, list) { ++ if (cq_buf == lcqe_buf) ++ return cq_cnt; ++ ++ list_del(&cq_buf->list); ++ zxdh_free_cq_buf(cq_buf); ++ cq_cnt++; ++ } ++ ++ return cq_cnt; ++} ++ ++/** ++ * zxdh_udestroy_cq - destroys cq ++ * @cq: ptr to cq to be destroyed ++ */ ++int zxdh_udestroy_cq(struct ibv_cq *cq) ++{ ++ struct zxdh_dev_attrs *dev_attrs; ++ struct zxdh_uvcontext *iwvctx; ++ struct zxdh_ucq *iwucq; ++ __u64 cq_shadow_temp; ++ int ret; ++ ++ iwucq = container_of(cq, struct zxdh_ucq, verbs_cq.cq); ++ iwvctx = container_of(cq->context, struct zxdh_uvcontext, ++ ibv_ctx.context); ++ dev_attrs = &iwvctx->dev_attrs; ++ ++ ret = pthread_spin_destroy(&iwucq->lock); ++ if (ret) ++ goto err; ++ ++ get_64bit_val(iwucq->cq.shadow_area, 0, &cq_shadow_temp); ++ ++ zxdh_process_resize_list(iwucq, NULL); ++ ret = ibv_cmd_destroy_cq(cq); ++ if (ret) ++ goto err; ++ ++ ibv_cmd_dereg_mr(&iwucq->vmr); ++ zxdh_free_hw_buf(iwucq->cq.cq_base, iwucq->buf_size); ++ ++ if (dev_attrs->feature_flags & ZXDH_FEATURE_CQ_RESIZE) { ++ ibv_cmd_dereg_mr(&iwucq->vmr_shadow_area); ++ zxdh_free_hw_buf(iwucq->cq.shadow_area, ++ ZXDH_DB_SHADOW_AREA_SIZE); ++ } ++ free(iwucq); ++ return 0; ++ ++err: ++ return ret; ++} ++ ++int zxdh_umodify_cq(struct ibv_cq *cq, struct ibv_modify_cq_attr *attr) ++{ ++ struct ibv_modify_cq cmd = {}; ++ ++ return ibv_cmd_modify_cq(cq, attr, &cmd, sizeof(cmd)); ++} ++ ++static enum ibv_wc_status ++zxdh_flush_err_to_ib_wc_status(enum zxdh_flush_opcode opcode) ++{ ++ switch (opcode) { ++ case FLUSH_PROT_ERR: ++ return IBV_WC_LOC_PROT_ERR; ++ case FLUSH_REM_ACCESS_ERR: ++ return IBV_WC_REM_ACCESS_ERR; ++ case FLUSH_LOC_QP_OP_ERR: ++ return IBV_WC_LOC_QP_OP_ERR; ++ case FLUSH_REM_OP_ERR: ++ return IBV_WC_REM_OP_ERR; ++ case FLUSH_LOC_LEN_ERR: ++ return IBV_WC_LOC_LEN_ERR; ++ case FLUSH_GENERAL_ERR: ++ return IBV_WC_WR_FLUSH_ERR; ++ case FLUSH_RETRY_EXC_ERR: ++ return IBV_WC_RETRY_EXC_ERR; ++ case FLUSH_MW_BIND_ERR: ++ return IBV_WC_MW_BIND_ERR; ++ case FLUSH_REM_INV_REQ_ERR: ++ return IBV_WC_REM_INV_REQ_ERR; ++ case FLUSH_FATAL_ERR: ++ default: ++ return IBV_WC_FATAL_ERR; ++ } ++} ++ ++/** ++ * zxdh_process_cqe_ext - process current cqe for extended CQ ++ * @cur_cqe - current cqe info ++ */ ++static inline void zxdh_process_cqe_ext(struct zxdh_cq_poll_info *cur_cqe) ++{ ++ struct zxdh_ucq *iwucq = ++ container_of(cur_cqe, struct zxdh_ucq, cur_cqe); ++ struct ibv_cq_ex *ibvcq_ex = &iwucq->verbs_cq.cq_ex; ++ ++ ibvcq_ex->wr_id = cur_cqe->wr_id; ++ if (cur_cqe->error) ++ ibvcq_ex->status = ++ (cur_cqe->comp_status == ZXDH_COMPL_STATUS_FLUSHED) ? ++ zxdh_flush_err_to_ib_wc_status( ++ cur_cqe->minor_err) : ++ IBV_WC_GENERAL_ERR; ++ else ++ ibvcq_ex->status = IBV_WC_SUCCESS; ++} ++ ++/** ++ * zxdh_process_cqe - process current cqe info ++ * @entry - ibv_wc object to fill in for non-extended CQ ++ * @cur_cqe - current cqe info ++ */ ++static inline void zxdh_process_cqe(struct ibv_wc *entry, ++ struct zxdh_cq_poll_info *cur_cqe) ++{ ++ struct zxdh_qp *qp; ++ struct ibv_qp *ib_qp; ++ ++ entry->wc_flags = 0; ++ entry->wr_id = cur_cqe->wr_id; ++ entry->qp_num = cur_cqe->qp_id; ++ qp = cur_cqe->qp_handle; ++ ib_qp = qp->back_qp; ++ ++ if (cur_cqe->error) { ++ entry->status = ++ (cur_cqe->comp_status == ZXDH_COMPL_STATUS_FLUSHED) ? ++ zxdh_flush_err_to_ib_wc_status( ++ cur_cqe->minor_err) : ++ IBV_WC_GENERAL_ERR; ++ entry->vendor_err = ++ cur_cqe->major_err << 16 | cur_cqe->minor_err; ++ } else { ++ entry->status = IBV_WC_SUCCESS; ++ } ++ ++ if (cur_cqe->imm_valid) { ++ entry->imm_data = htonl(cur_cqe->imm_data); ++ entry->wc_flags |= IBV_WC_WITH_IMM; ++ } ++ ++ switch (cur_cqe->op_type) { ++ case ZXDH_OP_TYPE_SEND: ++ case ZXDH_OP_TYPE_SEND_WITH_IMM: ++ case ZXDH_OP_TYPE_SEND_INV: ++ case ZXDH_OP_TYPE_UD_SEND: ++ case ZXDH_OP_TYPE_UD_SEND_WITH_IMM: ++ entry->opcode = IBV_WC_SEND; ++ break; ++ case ZXDH_OP_TYPE_WRITE: ++ case ZXDH_OP_TYPE_WRITE_WITH_IMM: ++ entry->opcode = IBV_WC_RDMA_WRITE; ++ break; ++ case ZXDH_OP_TYPE_READ: ++ entry->opcode = IBV_WC_RDMA_READ; ++ break; ++ case ZXDH_OP_TYPE_BIND_MW: ++ entry->opcode = IBV_WC_BIND_MW; ++ break; ++ case ZXDH_OP_TYPE_LOCAL_INV: ++ entry->opcode = IBV_WC_LOCAL_INV; ++ break; ++ case ZXDH_OP_TYPE_REC: ++ entry->opcode = IBV_WC_RECV; ++ if (ib_qp->qp_type != IBV_QPT_UD && cur_cqe->stag_invalid_set) { ++ entry->invalidated_rkey = cur_cqe->inv_stag; ++ entry->wc_flags |= IBV_WC_WITH_INV; ++ } ++ break; ++ case ZXDH_OP_TYPE_REC_IMM: ++ entry->opcode = IBV_WC_RECV_RDMA_WITH_IMM; ++ if (ib_qp->qp_type != IBV_QPT_UD && cur_cqe->stag_invalid_set) { ++ entry->invalidated_rkey = cur_cqe->inv_stag; ++ entry->wc_flags |= IBV_WC_WITH_INV; ++ } ++ break; ++ default: ++ entry->status = IBV_WC_GENERAL_ERR; ++ return; ++ } ++ ++ if (ib_qp->qp_type == IBV_QPT_UD) { ++ entry->src_qp = cur_cqe->ud_src_qpn; ++ entry->wc_flags |= IBV_WC_GRH; ++ entry->sl = cur_cqe->ipv4 ? 2 : 1; ++ } else { ++ entry->src_qp = cur_cqe->qp_id; ++ } ++ entry->byte_len = cur_cqe->bytes_xfered; ++} ++ ++/** ++ * zxdh_poll_one - poll one entry of the CQ ++ * @cq: cq to poll ++ * @cur_cqe: current CQE info to be filled in ++ * @entry: ibv_wc object to be filled for non-extended CQ or NULL for extended CQ ++ * ++ * Returns the internal zxdh device error code or 0 on success ++ */ ++static int zxdh_poll_one(struct zxdh_cq *cq, struct zxdh_cq_poll_info *cur_cqe, ++ struct ibv_wc *entry) ++{ ++ int ret = zxdh_cq_poll_cmpl(cq, cur_cqe); ++ ++ if (ret) ++ return ret; ++ ++ if (entry) ++ zxdh_process_cqe(entry, cur_cqe); ++ else ++ zxdh_process_cqe_ext(cur_cqe); ++ ++ return 0; ++} ++ ++/** ++ * __zxdh_upoll_resize_cq - zxdh util function to poll device CQ ++ * @iwucq: zxdh cq to poll ++ * @num_entries: max cq entries to poll ++ * @entry: pointer to array of ibv_wc objects to be filled in for each completion or NULL if ext CQ ++ * ++ * Returns non-negative value equal to the number of completions ++ * found. On failure, -EINVAL ++ */ ++static int __zxdh_upoll_resize_cq(struct zxdh_ucq *iwucq, int num_entries, ++ struct ibv_wc *entry) ++{ ++ struct zxdh_cq_buf *cq_buf, *next; ++ struct zxdh_cq_buf *last_buf = NULL; ++ struct zxdh_cq_poll_info *cur_cqe = &iwucq->cur_cqe; ++ bool cq_new_cqe = false; ++ int resized_bufs = 0; ++ int npolled = 0; ++ int ret; ++ ++ /* go through the list of previously resized CQ buffers */ ++ list_for_each_safe (&iwucq->resize_list, cq_buf, next, list) { ++ while (npolled < num_entries) { ++ ret = zxdh_poll_one(&cq_buf->cq, cur_cqe, ++ entry ? entry + npolled : NULL); ++ if (ret == ZXDH_SUCCESS) { ++ ++npolled; ++ cq_new_cqe = true; ++ continue; ++ } ++ if (ret == ZXDH_ERR_Q_EMPTY) ++ break; ++ if (ret == ZXDH_ERR_RETRY_ACK_NOT_EXCEED_ERR) ++ break; ++ /* QP using the CQ is destroyed. Skip reporting this CQE */ ++ if (ret == ZXDH_ERR_Q_DESTROYED) { ++ cq_new_cqe = true; ++ continue; ++ } ++ goto error; ++ } ++ ++ /* save the resized CQ buffer which received the last cqe */ ++ if (cq_new_cqe) ++ last_buf = cq_buf; ++ cq_new_cqe = false; ++ } ++ ++ /* check the current CQ for new cqes */ ++ while (npolled < num_entries) { ++ ret = zxdh_poll_one(&iwucq->cq, cur_cqe, ++ entry ? entry + npolled : NULL); ++ if (ret == ZXDH_SUCCESS) { ++ ++npolled; ++ cq_new_cqe = true; ++ continue; ++ } ++ if (ret == ZXDH_ERR_Q_EMPTY) ++ break; ++ if (ret == ZXDH_ERR_RETRY_ACK_NOT_EXCEED_ERR) ++ break; ++ /* QP using the CQ is destroyed. Skip reporting this CQE */ ++ if (ret == ZXDH_ERR_Q_DESTROYED) { ++ cq_new_cqe = true; ++ continue; ++ } ++ goto error; ++ } ++ if (cq_new_cqe) ++ /* all previous CQ resizes are complete */ ++ resized_bufs = zxdh_process_resize_list(iwucq, NULL); ++ else if (last_buf) ++ /* only CQ resizes up to the last_buf are complete */ ++ resized_bufs = zxdh_process_resize_list(iwucq, last_buf); ++ if (resized_bufs) ++ /* report to the HW the number of complete CQ resizes */ ++ zxdh_cq_set_resized_cnt(&iwucq->cq, resized_bufs); ++ ++ return npolled; ++ ++error: ++ ++ return -EINVAL; ++} ++ ++/** ++ * __zxdh_upoll_current_cq - zxdh util function to poll device CQ ++ * @iwucq: zxdh cq to poll ++ * @num_entries: max cq entries to poll ++ * @entry: pointer to array of ibv_wc objects to be filled in for each completion or NULL if ext CQ ++ * ++ * Returns non-negative value equal to the number of completions ++ * found. On failure, -EINVAL ++ */ ++static int __zxdh_upoll_curent_cq(struct zxdh_ucq *iwucq, int num_entries, ++ struct ibv_wc *entry) ++{ ++ struct zxdh_cq_poll_info *cur_cqe = &iwucq->cur_cqe; ++ int npolled = 0; ++ int ret; ++ ++ /* check the current CQ for new cqes */ ++ while (npolled < num_entries) { ++ ret = zxdh_poll_one(&iwucq->cq, cur_cqe, ++ entry ? entry + npolled : NULL); ++ if (unlikely(ret != ZXDH_SUCCESS)) ++ break; ++ ++npolled; ++ } ++ return npolled; ++} ++ ++/** ++ * zxdh_upoll_cq - verb API callback to poll device CQ ++ * @cq: ibv_cq to poll ++ * @num_entries: max cq entries to poll ++ * @entry: pointer to array of ibv_wc objects to be filled in for each completion ++ * ++ * Returns non-negative value equal to the number of completions ++ * found and a negative error code on failure ++ */ ++int zxdh_upoll_cq(struct ibv_cq *cq, int num_entries, struct ibv_wc *entry) ++{ ++ struct zxdh_ucq *iwucq; ++ int ret; ++ ++ iwucq = container_of(cq, struct zxdh_ucq, verbs_cq.cq); ++ ret = pthread_spin_lock(&iwucq->lock); ++ if (ret) ++ return -ret; ++ ++ if (likely(!iwucq->resize_enable)) ++ ret = __zxdh_upoll_curent_cq(iwucq, num_entries, entry); ++ else ++ ret = __zxdh_upoll_resize_cq(iwucq, num_entries, entry); ++ ++ pthread_spin_unlock(&iwucq->lock); ++ ++ return ret; ++} ++ ++/** ++ * zxdh_start_poll - verb_ex API callback to poll batch of WC's ++ * @ibvcq_ex: ibv extended CQ ++ * @attr: attributes (not used) ++ * ++ * Start polling batch of work completions. Return 0 on success, ENONENT when ++ * no completions are available on CQ. And an error code on errors ++ */ ++static int zxdh_start_poll(struct ibv_cq_ex *ibvcq_ex, ++ struct ibv_poll_cq_attr *attr) ++{ ++ struct zxdh_ucq *iwucq; ++ int ret; ++ ++ iwucq = container_of(ibvcq_ex, struct zxdh_ucq, verbs_cq.cq_ex); ++ ret = pthread_spin_lock(&iwucq->lock); ++ if (ret) ++ return ret; ++ ++ if (!iwucq->resize_enable) { ++ ret = __zxdh_upoll_curent_cq(iwucq, 1, NULL); ++ if (ret == 1) ++ return 0; ++ } else { ++ ret = __zxdh_upoll_resize_cq(iwucq, 1, NULL); ++ if (ret == 1) ++ return 0; ++ } ++ ++ /* No Completions on CQ */ ++ if (!ret) ++ ret = ENOENT; ++ ++ pthread_spin_unlock(&iwucq->lock); ++ ++ return ret; ++} ++ ++/** ++ * zxdh_next_poll - verb_ex API callback to get next WC ++ * @ibvcq_ex: ibv extended CQ ++ * ++ * Return 0 on success, ENONENT when no completions are available on CQ. ++ * And an error code on errors ++ */ ++static int zxdh_next_poll(struct ibv_cq_ex *ibvcq_ex) ++{ ++ struct zxdh_ucq *iwucq; ++ int ret; ++ ++ iwucq = container_of(ibvcq_ex, struct zxdh_ucq, verbs_cq.cq_ex); ++ if (!iwucq->resize_enable) { ++ ret = __zxdh_upoll_curent_cq(iwucq, 1, NULL); ++ if (ret == 1) ++ return 0; ++ } else { ++ ret = __zxdh_upoll_resize_cq(iwucq, 1, NULL); ++ if (ret == 1) ++ return 0; ++ } ++ ++ /* No Completions on CQ */ ++ if (!ret) ++ ret = ENOENT; ++ ++ return ret; ++} ++ ++/** ++ * zxdh_end_poll - verb_ex API callback to end polling of WC's ++ * @ibvcq_ex: ibv extended CQ ++ */ ++static void zxdh_end_poll(struct ibv_cq_ex *ibvcq_ex) ++{ ++ struct zxdh_ucq *iwucq = ++ container_of(ibvcq_ex, struct zxdh_ucq, verbs_cq.cq_ex); ++ ++ pthread_spin_unlock(&iwucq->lock); ++} ++ ++/** ++ * zxdh_wc_read_completion_ts - Get completion timestamp ++ * @ibvcq_ex: ibv extended CQ ++ * ++ * Get completion timestamp in HCA clock units ++ */ ++static uint64_t zxdh_wc_read_completion_ts(struct ibv_cq_ex *ibvcq_ex) ++{ ++ struct zxdh_ucq *iwucq = ++ container_of(ibvcq_ex, struct zxdh_ucq, verbs_cq.cq_ex); ++#define HCA_CORE_CLOCK_800_MHZ 800 ++ ++ return iwucq->cur_cqe.tcp_seq_num_rtt / HCA_CORE_CLOCK_800_MHZ; ++} ++ ++/** ++ * zxdh_wc_read_completion_wallclock_ns - Get completion timestamp in ns ++ * @ibvcq_ex: ibv extended CQ ++ * ++ * Get completion timestamp from current completion in wall clock nanoseconds ++ */ ++static uint64_t zxdh_wc_read_completion_wallclock_ns(struct ibv_cq_ex *ibvcq_ex) ++{ ++ struct zxdh_ucq *iwucq = ++ container_of(ibvcq_ex, struct zxdh_ucq, verbs_cq.cq_ex); ++ ++ /* RTT is in usec */ ++ return iwucq->cur_cqe.tcp_seq_num_rtt * 1000; ++} ++ ++static enum ibv_wc_opcode zxdh_wc_read_opcode(struct ibv_cq_ex *ibvcq_ex) ++{ ++ struct zxdh_ucq *iwucq = ++ container_of(ibvcq_ex, struct zxdh_ucq, verbs_cq.cq_ex); ++ ++ switch (iwucq->cur_cqe.op_type) { ++ case ZXDH_OP_TYPE_WRITE: ++ case ZXDH_OP_TYPE_WRITE_WITH_IMM: ++ return IBV_WC_RDMA_WRITE; ++ case ZXDH_OP_TYPE_READ: ++ return IBV_WC_RDMA_READ; ++ case ZXDH_OP_TYPE_SEND: ++ case ZXDH_OP_TYPE_SEND_WITH_IMM: ++ case ZXDH_OP_TYPE_SEND_INV: ++ case ZXDH_OP_TYPE_UD_SEND: ++ case ZXDH_OP_TYPE_UD_SEND_WITH_IMM: ++ return IBV_WC_SEND; ++ case ZXDH_OP_TYPE_BIND_MW: ++ return IBV_WC_BIND_MW; ++ case ZXDH_OP_TYPE_REC: ++ return IBV_WC_RECV; ++ case ZXDH_OP_TYPE_REC_IMM: ++ return IBV_WC_RECV_RDMA_WITH_IMM; ++ case ZXDH_OP_TYPE_LOCAL_INV: ++ return IBV_WC_LOCAL_INV; ++ } ++ ++ return 0; ++} ++ ++static uint32_t zxdh_wc_read_vendor_err(struct ibv_cq_ex *ibvcq_ex) ++{ ++ struct zxdh_cq_poll_info *cur_cqe; ++ struct zxdh_ucq *iwucq; ++ ++ iwucq = container_of(ibvcq_ex, struct zxdh_ucq, verbs_cq.cq_ex); ++ cur_cqe = &iwucq->cur_cqe; ++ ++ return cur_cqe->error ? cur_cqe->major_err << 16 | cur_cqe->minor_err : ++ 0; ++} ++ ++static unsigned int zxdh_wc_read_wc_flags(struct ibv_cq_ex *ibvcq_ex) ++{ ++ struct zxdh_cq_poll_info *cur_cqe; ++ struct zxdh_ucq *iwucq; ++ struct zxdh_qp *qp; ++ struct ibv_qp *ib_qp; ++ unsigned int wc_flags = 0; ++ ++ iwucq = container_of(ibvcq_ex, struct zxdh_ucq, verbs_cq.cq_ex); ++ cur_cqe = &iwucq->cur_cqe; ++ qp = cur_cqe->qp_handle; ++ ib_qp = qp->back_qp; ++ ++ if (cur_cqe->imm_valid) ++ wc_flags |= IBV_WC_WITH_IMM; ++ ++ if (ib_qp->qp_type == IBV_QPT_UD) { ++ wc_flags |= IBV_WC_GRH; ++ } else { ++ if (cur_cqe->stag_invalid_set) { ++ switch (cur_cqe->op_type) { ++ case ZXDH_OP_TYPE_REC: ++ wc_flags |= IBV_WC_WITH_INV; ++ break; ++ case ZXDH_OP_TYPE_REC_IMM: ++ wc_flags |= IBV_WC_WITH_INV; ++ break; ++ } ++ } ++ } ++ ++ return wc_flags; ++} ++ ++static uint32_t zxdh_wc_read_byte_len(struct ibv_cq_ex *ibvcq_ex) ++{ ++ struct zxdh_ucq *iwucq = ++ container_of(ibvcq_ex, struct zxdh_ucq, verbs_cq.cq_ex); ++ ++ return iwucq->cur_cqe.bytes_xfered; ++} ++ ++static __be32 zxdh_wc_read_imm_data(struct ibv_cq_ex *ibvcq_ex) ++{ ++ struct zxdh_cq_poll_info *cur_cqe; ++ struct zxdh_ucq *iwucq; ++ ++ iwucq = container_of(ibvcq_ex, struct zxdh_ucq, verbs_cq.cq_ex); ++ cur_cqe = &iwucq->cur_cqe; ++ ++ return cur_cqe->imm_valid ? htonl(cur_cqe->imm_data) : 0; ++} ++ ++static uint32_t zxdh_wc_read_qp_num(struct ibv_cq_ex *ibvcq_ex) ++{ ++ struct zxdh_ucq *iwucq = ++ container_of(ibvcq_ex, struct zxdh_ucq, verbs_cq.cq_ex); ++ ++ return iwucq->cur_cqe.qp_id; ++} ++ ++static uint32_t zxdh_wc_read_src_qp(struct ibv_cq_ex *ibvcq_ex) ++{ ++ struct zxdh_cq_poll_info *cur_cqe; ++ struct zxdh_ucq *iwucq; ++ struct zxdh_qp *qp; ++ struct ibv_qp *ib_qp; ++ ++ iwucq = container_of(ibvcq_ex, struct zxdh_ucq, verbs_cq.cq_ex); ++ cur_cqe = &iwucq->cur_cqe; ++ qp = cur_cqe->qp_handle; ++ ib_qp = qp->back_qp; ++ ++ return ib_qp->qp_type == IBV_QPT_UD ? cur_cqe->ud_src_qpn : ++ cur_cqe->qp_id; ++} ++ ++static uint32_t zxdh_wc_read_slid(struct ibv_cq_ex *ibvcq_ex) ++{ ++ return 0; ++} ++ ++static uint8_t zxdh_wc_read_sl(struct ibv_cq_ex *ibvcq_ex) ++{ ++ return 0; ++} ++ ++static uint8_t zxdh_wc_read_dlid_path_bits(struct ibv_cq_ex *ibvcq_ex) ++{ ++ return 0; ++} ++ ++void zxdh_ibvcq_ex_fill_priv_funcs(struct zxdh_ucq *iwucq, ++ struct ibv_cq_init_attr_ex *attr_ex) ++{ ++ struct ibv_cq_ex *ibvcq_ex = &iwucq->verbs_cq.cq_ex; ++ ++ ibvcq_ex->start_poll = zxdh_start_poll; ++ ibvcq_ex->end_poll = zxdh_end_poll; ++ ibvcq_ex->next_poll = zxdh_next_poll; ++ ++ if (attr_ex->wc_flags & IBV_WC_EX_WITH_COMPLETION_TIMESTAMP) { ++ ibvcq_ex->read_completion_ts = zxdh_wc_read_completion_ts; ++ iwucq->report_rtt = true; ++ } ++ if (attr_ex->wc_flags & IBV_WC_EX_WITH_COMPLETION_TIMESTAMP_WALLCLOCK) { ++ ibvcq_ex->read_completion_wallclock_ns = ++ zxdh_wc_read_completion_wallclock_ns; ++ iwucq->report_rtt = true; ++ } ++ ++ ibvcq_ex->read_opcode = zxdh_wc_read_opcode; ++ ibvcq_ex->read_vendor_err = zxdh_wc_read_vendor_err; ++ ibvcq_ex->read_wc_flags = zxdh_wc_read_wc_flags; ++ ++ if (attr_ex->wc_flags & IBV_WC_EX_WITH_BYTE_LEN) ++ ibvcq_ex->read_byte_len = zxdh_wc_read_byte_len; ++ if (attr_ex->wc_flags & IBV_WC_EX_WITH_IMM) ++ ibvcq_ex->read_imm_data = zxdh_wc_read_imm_data; ++ if (attr_ex->wc_flags & IBV_WC_EX_WITH_QP_NUM) ++ ibvcq_ex->read_qp_num = zxdh_wc_read_qp_num; ++ if (attr_ex->wc_flags & IBV_WC_EX_WITH_SRC_QP) ++ ibvcq_ex->read_src_qp = zxdh_wc_read_src_qp; ++ if (attr_ex->wc_flags & IBV_WC_EX_WITH_SLID) ++ ibvcq_ex->read_slid = zxdh_wc_read_slid; ++ if (attr_ex->wc_flags & IBV_WC_EX_WITH_SL) ++ ibvcq_ex->read_sl = zxdh_wc_read_sl; ++ if (attr_ex->wc_flags & IBV_WC_EX_WITH_DLID_PATH_BITS) ++ ibvcq_ex->read_dlid_path_bits = zxdh_wc_read_dlid_path_bits; ++} ++ ++/** ++ * zxdh_arm_cq - arm of cq ++ * @iwucq: cq to which arm ++ * @cq_notify: notification params ++ */ ++static void zxdh_arm_cq(struct zxdh_ucq *iwucq, enum zxdh_cmpl_notify cq_notify) ++{ ++ iwucq->is_armed = true; ++ iwucq->last_notify = cq_notify; ++ ++ zxdh_cq_request_notification(&iwucq->cq, cq_notify); ++} ++ ++/** ++ * zxdh_uarm_cq - callback for arm of cq ++ * @cq: cq to arm ++ * @solicited: to get notify params ++ */ ++int zxdh_uarm_cq(struct ibv_cq *cq, int solicited) ++{ ++ struct zxdh_ucq *iwucq; ++ enum zxdh_cmpl_notify cq_notify = ZXDH_CQ_COMPL_EVENT; ++ bool promo_event = false; ++ int ret; ++ ++ iwucq = container_of(cq, struct zxdh_ucq, verbs_cq.cq); ++ if (solicited) { ++ cq_notify = ZXDH_CQ_COMPL_SOLICITED; ++ } else { ++ if (iwucq->last_notify == ZXDH_CQ_COMPL_SOLICITED) ++ promo_event = true; ++ } ++ ++ ret = pthread_spin_lock(&iwucq->lock); ++ if (ret) ++ return ret; ++ ++ if (!iwucq->is_armed || promo_event) ++ zxdh_arm_cq(iwucq, cq_notify); ++ ++ pthread_spin_unlock(&iwucq->lock); ++ ++ return 0; ++} ++ ++/** ++ * zxdh_cq_event - cq to do completion event ++ * @cq: cq to arm ++ */ ++void zxdh_cq_event(struct ibv_cq *cq) ++{ ++ struct zxdh_ucq *iwucq; ++ ++ iwucq = container_of(cq, struct zxdh_ucq, verbs_cq.cq); ++ if (pthread_spin_lock(&iwucq->lock)) ++ return; ++ ++ iwucq->is_armed = false; ++ ++ pthread_spin_unlock(&iwucq->lock); ++} ++ ++void *zxdh_mmap(int fd, off_t offset) ++{ ++ void *map; ++ ++ map = mmap(NULL, ZXDH_HW_PAGE_SIZE, PROT_WRITE | PROT_READ, MAP_SHARED, ++ fd, offset); ++ if (map == MAP_FAILED) ++ return map; ++ ++ if (ibv_dontfork_range(map, ZXDH_HW_PAGE_SIZE)) { ++ munmap(map, ZXDH_HW_PAGE_SIZE); ++ return MAP_FAILED; ++ } ++ ++ return map; ++} ++ ++void zxdh_munmap(void *map) ++{ ++ ibv_dofork_range(map, ZXDH_HW_PAGE_SIZE); ++ munmap(map, ZXDH_HW_PAGE_SIZE); ++} ++ ++/** ++ * zxdh_destroy_vmapped_qp - destroy resources for qp ++ * @iwuqp: qp struct for resources ++ */ ++static int zxdh_destroy_vmapped_qp(struct zxdh_uqp *iwuqp) ++{ ++ int ret; ++ ++ ret = ibv_cmd_destroy_qp(&iwuqp->vqp.qp); ++ if (ret) ++ return ret; ++ ++ ibv_cmd_dereg_mr(&iwuqp->vmr); ++ ++ return 0; ++} ++ ++/** ++ * zxdh_vmapped_qp - create resources for qp ++ * @iwuqp: qp struct for resources ++ * @pd: pd for the qp ++ * @attr: attributes of qp passed ++ * @resp: response back from create qp ++ * @sqdepth: depth of sq ++ * @rqdepth: depth of rq ++ * @info: info for initializing user level qp ++ * @abi_ver: abi version of the create qp command ++ */ ++static int zxdh_vmapped_qp(struct zxdh_uqp *iwuqp, struct ibv_pd *pd, ++ struct ibv_qp_init_attr *attr, int sqdepth, ++ int rqdepth, struct zxdh_qp_init_info *info, ++ bool legacy_mode) ++{ ++ struct zxdh_ucreate_qp cmd = {}; ++ size_t sqsize, rqsize, totalqpsize; ++ struct zxdh_ucreate_qp_resp resp = {}; ++ struct zxdh_ureg_mr reg_mr_cmd = {}; ++ struct ib_uverbs_reg_mr_resp reg_mr_resp = {}; ++ int ret; ++ ++ rqsize = 0; ++ sqsize = roundup(sqdepth * ZXDH_QP_SQE_MIN_SIZE, ZXDH_HW_PAGE_SIZE); ++ if (iwuqp->is_srq == false) { ++ rqsize = roundup(rqdepth * ZXDH_QP_RQE_MIN_SIZE, ++ ZXDH_HW_PAGE_SIZE); ++ totalqpsize = rqsize + sqsize + ZXDH_DB_SHADOW_AREA_SIZE; ++ } else { ++ totalqpsize = sqsize + ZXDH_DB_SHADOW_AREA_SIZE; ++ } ++ info->sq = zxdh_alloc_hw_buf(totalqpsize); ++ iwuqp->buf_size = totalqpsize; ++ ++ if (!info->sq) ++ return -ENOMEM; ++ ++ memset(info->sq, 0, totalqpsize); ++ if (iwuqp->is_srq == false) { ++ info->rq = (struct zxdh_qp_rq_quanta *)&info ++ ->sq[sqsize / ZXDH_QP_SQE_MIN_SIZE]; ++ info->shadow_area = ++ info->rq[rqsize / ZXDH_QP_RQE_MIN_SIZE].elem; ++ reg_mr_cmd.rq_pages = rqsize >> ZXDH_HW_PAGE_SHIFT; ++ } else { ++ info->shadow_area = ++ (__le64 *)&info->sq[sqsize / ZXDH_QP_SQE_MIN_SIZE]; ++ } ++ reg_mr_cmd.reg_type = ZXDH_MEMREG_TYPE_QP; ++ reg_mr_cmd.sq_pages = sqsize >> ZXDH_HW_PAGE_SHIFT; ++ ++ ret = ibv_cmd_reg_mr(pd, info->sq, totalqpsize, (uintptr_t)info->sq, ++ IBV_ACCESS_LOCAL_WRITE, &iwuqp->vmr, ++ ®_mr_cmd.ibv_cmd, sizeof(reg_mr_cmd), ++ ®_mr_resp, sizeof(reg_mr_resp)); ++ if (ret) ++ goto err_dereg_mr; ++ ++ cmd.user_wqe_bufs = (__u64)((uintptr_t)info->sq); ++ cmd.user_compl_ctx = (__u64)(uintptr_t)&iwuqp->qp; ++ ret = ibv_cmd_create_qp(pd, &iwuqp->vqp.qp, attr, &cmd.ibv_cmd, ++ sizeof(cmd), &resp.ibv_resp, ++ sizeof(struct zxdh_ucreate_qp_resp)); ++ if (ret) ++ goto err_qp; ++ ++ info->sq_size = resp.actual_sq_size; ++ info->rq_size = resp.actual_rq_size; ++ info->qp_caps = resp.qp_caps; ++ info->qp_id = resp.qp_id; ++ iwuqp->zxdh_drv_opt = resp.zxdh_drv_opt; ++ iwuqp->vqp.qp.qp_num = resp.qp_id; ++ ++ iwuqp->send_cq = ++ container_of(attr->send_cq, struct zxdh_ucq, verbs_cq.cq); ++ iwuqp->recv_cq = ++ container_of(attr->recv_cq, struct zxdh_ucq, verbs_cq.cq); ++ iwuqp->send_cq->uqp = iwuqp; ++ iwuqp->recv_cq->uqp = iwuqp; ++ ++ return 0; ++err_qp: ++ ibv_cmd_dereg_mr(&iwuqp->vmr); ++err_dereg_mr: ++ zxdh_free_hw_buf(info->sq, iwuqp->buf_size); ++ return ret; ++} ++ ++static void zxdh_wr_local_inv(struct ibv_qp_ex *ibqp, uint32_t invalidate_rkey) ++{ ++ struct zxdh_uqp *qp = container_of(ibqp, struct zxdh_uqp, vqp.qp_ex); ++ struct ibv_send_wr wr = {}; ++ struct ibv_send_wr *bad_wr = NULL; ++ ++ wr.opcode = IBV_WR_LOCAL_INV; ++ wr.invalidate_rkey = invalidate_rkey; ++ ++ zxdh_upost_send(&qp->vqp.qp, &wr, &bad_wr); ++} ++ ++static void zxdh_send_wr_send_inv(struct ibv_qp_ex *ibqp, ++ uint32_t invalidate_rkey) ++{ ++ struct zxdh_uqp *qp = container_of(ibqp, struct zxdh_uqp, vqp.qp_ex); ++ struct ibv_send_wr wr = {}; ++ struct ibv_send_wr *bad_wr = NULL; ++ ++ wr.opcode = IBV_WR_SEND_WITH_INV; ++ wr.invalidate_rkey = invalidate_rkey; ++ ++ zxdh_upost_send(&qp->vqp.qp, &wr, &bad_wr); ++} ++ ++static void zxdh_wr_bind_mw(struct ibv_qp_ex *ibqp, struct ibv_mw *ibmw, ++ uint32_t rkey, const struct ibv_mw_bind_info *info) ++{ ++ struct zxdh_uqp *qp = container_of(ibqp, struct zxdh_uqp, vqp.qp_ex); ++ struct ibv_send_wr wr = {}; ++ struct ibv_send_wr *bad_wr = NULL; ++ ++ if (ibmw->type != IBV_MW_TYPE_2) ++ return; ++ ++ wr.opcode = IBV_WR_BIND_MW; ++ wr.bind_mw.bind_info = *info; ++ wr.bind_mw.mw = ibmw; ++ wr.bind_mw.rkey = rkey; ++ ++ zxdh_upost_send(&qp->vqp.qp, &wr, &bad_wr); ++} ++ ++static struct ibv_qp *create_qp(struct ibv_context *ibv_ctx, ++ struct ibv_qp_init_attr_ex *attr_ex) ++{ ++ struct zxdh_qp_init_info info = {}; ++ struct zxdh_dev_attrs *dev_attrs; ++ struct zxdh_uvcontext *iwvctx; ++ struct zxdh_uqp *iwuqp; ++ struct zxdh_usrq *iwusrq; ++ struct ibv_pd *pd = attr_ex->pd; ++ struct ibv_qp_init_attr *attr; ++ __u32 sqdepth, rqdepth; ++ __u8 sqshift, rqshift; ++ int status; ++ ++ attr = calloc(1, sizeof(*attr)); ++ if (!attr) ++ return NULL; ++ ++ memcpy(attr, attr_ex, sizeof(*attr)); ++ ++ if (attr->qp_type != IBV_QPT_RC && attr->qp_type != IBV_QPT_UD) { ++ errno = EOPNOTSUPP; ++ free(attr); ++ return NULL; ++ } ++ ++ iwvctx = container_of(ibv_ctx, struct zxdh_uvcontext, ibv_ctx.context); ++ dev_attrs = &iwvctx->dev_attrs; ++ ++ if (attr->cap.max_send_sge > dev_attrs->max_hw_wq_frags || ++ attr->cap.max_recv_sge > dev_attrs->max_hw_wq_frags) { ++ errno = EINVAL; ++ free(attr); ++ return NULL; ++ } ++ ++ if (attr->cap.max_inline_data > dev_attrs->max_hw_inline) { ++ zxdh_dbg(&iwvctx->ibv_ctx, ZXDH_DBG_QP, ++ "max_inline_data over max_hw_inline\n"); ++ attr->cap.max_inline_data = dev_attrs->max_hw_inline; ++ } ++ ++ zxdh_get_sq_wqe_shift(attr->cap.max_send_sge, attr->cap.max_inline_data, ++ &sqshift); ++ status = zxdh_get_sqdepth(dev_attrs, attr->cap.max_send_wr, sqshift, ++ &sqdepth); ++ if (status) { ++ errno = EINVAL; ++ free(attr); ++ return NULL; ++ } ++ ++ zxdh_get_rq_wqe_shift(attr->cap.max_recv_sge, &rqshift); ++ status = zxdh_get_rqdepth(dev_attrs, attr->cap.max_recv_wr, rqshift, ++ &rqdepth); ++ if (status) { ++ errno = EINVAL; ++ free(attr); ++ return NULL; ++ } ++ ++ iwuqp = memalign(1024, sizeof(*iwuqp)); ++ if (!iwuqp) { ++ free(attr); ++ return NULL; ++ } ++ ++ memset(iwuqp, 0, sizeof(*iwuqp)); ++ ++ if (attr_ex->comp_mask & IBV_QP_INIT_ATTR_SEND_OPS_FLAGS) { ++ if (attr_ex->send_ops_flags & ~IBV_QP_EX_WITH_BIND_MW) { ++ errno = EOPNOTSUPP; ++ free(iwuqp); ++ free(attr); ++ return NULL; ++ } ++ ++ iwuqp->vqp.comp_mask |= VERBS_QP_EX; ++ if (attr_ex->send_ops_flags & IBV_QP_EX_WITH_BIND_MW) ++ iwuqp->vqp.qp_ex.wr_bind_mw = zxdh_wr_bind_mw; ++ ++ if (attr_ex->send_ops_flags & IBV_QP_EX_WITH_SEND_WITH_INV) ++ iwuqp->vqp.qp_ex.wr_send_inv = zxdh_send_wr_send_inv; ++ ++ if (attr_ex->send_ops_flags & IBV_QP_EX_WITH_LOCAL_INV) ++ iwuqp->vqp.qp_ex.wr_local_inv = zxdh_wr_local_inv; ++ } ++ ++ if (pthread_spin_init(&iwuqp->lock, PTHREAD_PROCESS_PRIVATE)) ++ goto err_free_qp; ++ ++ info.sq_size = sqdepth >> sqshift; ++ info.rq_size = rqdepth >> rqshift; ++ attr->cap.max_send_wr = info.sq_size; ++ attr->cap.max_recv_wr = info.rq_size; ++ ++ info.dev_attrs = dev_attrs; ++ info.max_sq_frag_cnt = attr->cap.max_send_sge; ++ info.max_rq_frag_cnt = attr->cap.max_recv_sge; ++ ++ if (attr->srq != NULL) { ++ iwuqp->is_srq = true; ++ iwusrq = container_of(attr->srq, struct zxdh_usrq, ibv_srq); ++ iwuqp->srq = iwusrq; ++ iwuqp->qp.is_srq = true; ++ } ++ ++ if (iwuqp->is_srq == false) { ++ iwuqp->recv_sges = calloc(attr->cap.max_recv_sge, ++ sizeof(*iwuqp->recv_sges)); ++ if (!iwuqp->recv_sges) ++ goto err_destroy_lock; ++ } ++ ++ info.wqe_alloc_db = ++ (__u32 *)((__u8 *)iwvctx->sq_db + ZXDH_DB_SQ_OFFSET); ++ info.abi_ver = iwvctx->abi_ver; ++ info.legacy_mode = iwvctx->legacy_mode; ++ info.sq_wrtrk_array = calloc(sqdepth, sizeof(*info.sq_wrtrk_array)); ++ if (!info.sq_wrtrk_array) ++ goto err_free_rsges; ++ ++ if (iwuqp->is_srq == false) { ++ info.rq_wrid_array = ++ calloc(info.rq_size, sizeof(*info.rq_wrid_array)); ++ if (!info.rq_wrid_array) ++ goto err_free_sq_wrtrk; ++ } ++ ++ iwuqp->sq_sig_all = attr->sq_sig_all; ++ iwuqp->qp_type = attr->qp_type; ++ if (attr->qp_type == IBV_QPT_UD) ++ info.type = ZXDH_QP_TYPE_ROCE_UD; ++ else ++ info.type = ZXDH_QP_TYPE_ROCE_RC; ++ status = zxdh_vmapped_qp(iwuqp, pd, attr, sqdepth, rqdepth, &info, ++ iwvctx->legacy_mode); ++ if (status) { ++ errno = status; ++ goto err_free_rq_wrid; ++ } ++ ++ iwuqp->qp.back_qp = iwuqp; ++ iwuqp->qp.lock = &iwuqp->lock; ++ info.max_sq_frag_cnt = attr->cap.max_send_sge; ++ info.max_rq_frag_cnt = attr->cap.max_recv_sge; ++ info.max_inline_data = attr->cap.max_inline_data; ++ if (info.type == ZXDH_QP_TYPE_ROCE_RC) { ++ iwuqp->qp.split_sg_list = ++ calloc(2 * dev_attrs->max_hw_read_sges, ++ sizeof(*iwuqp->qp.split_sg_list)); ++ if (!iwuqp->qp.split_sg_list) ++ goto err_free_vmap_qp; ++ } ++ status = zxdh_qp_init(&iwuqp->qp, &info); ++ if (status) { ++ errno = EINVAL; ++ goto err_free_sg_list; ++ } ++ iwuqp->qp.mtu = mtu_enum_to_int(IBV_MTU_1024); ++ attr->cap.max_send_wr = (sqdepth - ZXDH_SQ_RSVD) >> sqshift; ++ attr->cap.max_recv_wr = (rqdepth - ZXDH_RQ_RSVD) >> rqshift; ++ memcpy(attr_ex, attr, sizeof(*attr)); ++ free(attr); ++ return &iwuqp->vqp.qp; ++ ++err_free_sg_list: ++ if (iwuqp->qp.split_sg_list) ++ free(iwuqp->qp.split_sg_list); ++err_free_vmap_qp: ++ zxdh_destroy_vmapped_qp(iwuqp); ++ zxdh_free_hw_buf(info.sq, iwuqp->buf_size); ++err_free_rq_wrid: ++ free(info.rq_wrid_array); ++err_free_sq_wrtrk: ++ free(info.sq_wrtrk_array); ++err_free_rsges: ++ free(iwuqp->recv_sges); ++err_destroy_lock: ++ pthread_spin_destroy(&iwuqp->lock); ++err_free_qp: ++ free(iwuqp); ++ free(attr); ++ ++ return NULL; ++} ++ ++/** ++ * zxdh_ucreate_qp - create qp on user app ++ * @pd: pd for the qp ++ * @attr: attributes of the qp to be created (sizes, sge, cq) ++ */ ++struct ibv_qp *zxdh_ucreate_qp(struct ibv_pd *pd, struct ibv_qp_init_attr *attr) ++{ ++ struct ibv_qp_init_attr_ex attrx = {}; ++ struct ibv_qp *qp; ++ ++ memcpy(&attrx, attr, sizeof(*attr)); ++ attrx.comp_mask = IBV_QP_INIT_ATTR_PD; ++ attrx.pd = pd; ++ ++ qp = create_qp(pd->context, &attrx); ++ if (qp) ++ memcpy(attr, &attrx, sizeof(*attr)); ++ ++ return qp; ++} ++ ++/** ++ * zxdh_ucreate_qp_ex - create qp_ex on user app ++ * @context: user context of the device ++ * @attr: attributes of the qp_ex to be created ++ */ ++struct ibv_qp *zxdh_ucreate_qp_ex(struct ibv_context *context, ++ struct ibv_qp_init_attr_ex *attr) ++{ ++ return create_qp(context, attr); ++} ++ ++/** ++ * zxdh_uquery_qp - query qp for some attribute ++ * @qp: qp for the attributes query ++ * @attr: to return the attributes ++ * @attr_mask: mask of what is query for ++ * @init_attr: initial attributes during create_qp ++ */ ++int zxdh_uquery_qp(struct ibv_qp *qp, struct ibv_qp_attr *attr, int attr_mask, ++ struct ibv_qp_init_attr *init_attr) ++{ ++ struct ibv_query_qp cmd; ++ ++ return ibv_cmd_query_qp(qp, attr, attr_mask, init_attr, &cmd, ++ sizeof(cmd)); ++} ++ ++/** ++ * zxdh_umodify_qp - send qp modify to driver ++ * @qp: qp to modify ++ * @attr: attribute to modify ++ * @attr_mask: mask of the attribute ++ */ ++int zxdh_umodify_qp(struct ibv_qp *qp, struct ibv_qp_attr *attr, int attr_mask) ++{ ++ struct zxdh_uqp *iwuqp; ++ struct zxdh_umodify_qp_resp resp = {}; ++ struct ibv_modify_qp cmd = {}; ++ struct zxdh_umodify_qp cmd_ex = {}; ++ int ret; ++ __u16 mtu = 0; ++ ++ iwuqp = container_of(qp, struct zxdh_uqp, vqp.qp); ++ if (attr_mask & IBV_QP_STATE || attr_mask & IBV_QP_RATE_LIMIT) { ++ ret = ibv_cmd_modify_qp_ex(qp, attr, attr_mask, &cmd_ex.ibv_cmd, ++ sizeof(cmd_ex), &resp.ibv_resp, ++ sizeof(resp)); ++ } else { ++ ret = ibv_cmd_modify_qp(qp, attr, attr_mask, &cmd, sizeof(cmd)); ++ } ++ if (!ret && (attr_mask & IBV_QP_PATH_MTU) && ++ qp->qp_type == IBV_QPT_RC) { ++ mtu = mtu_enum_to_int(attr->path_mtu); ++ if (mtu == 0) ++ return -EINVAL; ++ iwuqp->qp.mtu = mtu; ++ } ++ if (!ret && (attr_mask & IBV_QP_SQ_PSN) && qp->qp_type == IBV_QPT_RC) { ++ iwuqp->qp.next_psn = attr->sq_psn; ++ iwuqp->qp.cqe_last_ack_qsn = attr->sq_psn - 1; ++ iwuqp->qp.qp_last_ack_qsn = attr->sq_psn - 1; ++ iwuqp->qp.cqe_retry_cnt = 0; ++ iwuqp->qp.qp_reset_cnt = 0; ++ } ++ return ret; ++} ++ ++static void zxdh_issue_flush(struct ibv_qp *qp, bool sq_flush, bool rq_flush) ++{ ++ struct ib_uverbs_ex_modify_qp_resp resp = {}; ++ struct zxdh_umodify_qp cmd_ex = {}; ++ struct ibv_qp_attr attr = {}; ++ ++ attr.qp_state = IBV_QPS_ERR; ++ cmd_ex.sq_flush = sq_flush; ++ cmd_ex.rq_flush = rq_flush; ++ ++ ibv_cmd_modify_qp_ex(qp, &attr, IBV_QP_STATE, &cmd_ex.ibv_cmd, ++ sizeof(cmd_ex), &resp, sizeof(resp)); ++} ++ ++/** ++ * zxdh_clean_cqes - clean cq entries for qp ++ * @qp: qp for which completions are cleaned ++ * @iwcq: cq to be cleaned ++ */ ++static void zxdh_clean_cqes(struct zxdh_qp *qp, struct zxdh_ucq *iwucq) ++{ ++ struct zxdh_cq *cq = &iwucq->cq; ++ int ret; ++ ++ ret = pthread_spin_lock(&iwucq->lock); ++ if (ret) ++ return; ++ ++ zxdh_clean_cq(qp, cq); ++ pthread_spin_unlock(&iwucq->lock); ++} ++ ++/** ++ * zxdh_udestroy_qp - destroy qp ++ * @qp: qp to destroy ++ */ ++int zxdh_udestroy_qp(struct ibv_qp *qp) ++{ ++ struct zxdh_uqp *iwuqp; ++ int ret; ++ ++ iwuqp = container_of(qp, struct zxdh_uqp, vqp.qp); ++ ret = pthread_spin_destroy(&iwuqp->lock); ++ if (ret) ++ goto err; ++ ++ iwuqp->qp.destroy_pending = true; ++ ++ ret = zxdh_destroy_vmapped_qp(iwuqp); ++ if (ret) ++ goto err; ++ ++ /* Clean any pending completions from the cq(s) */ ++ if (iwuqp->send_cq) ++ zxdh_clean_cqes(&iwuqp->qp, iwuqp->send_cq); ++ ++ if (iwuqp->recv_cq && iwuqp->recv_cq != iwuqp->send_cq) ++ zxdh_clean_cqes(&iwuqp->qp, iwuqp->recv_cq); ++ ++ if (iwuqp->qp.sq_wrtrk_array) ++ free(iwuqp->qp.sq_wrtrk_array); ++ if (iwuqp->qp.rq_wrid_array) ++ free(iwuqp->qp.rq_wrid_array); ++ if (iwuqp->qp.split_sg_list) ++ free(iwuqp->qp.split_sg_list); ++ ++ zxdh_free_hw_buf(iwuqp->qp.sq_base, iwuqp->buf_size); ++ free(iwuqp->recv_sges); ++ free(iwuqp); ++ return 0; ++ ++err: ++ return ret; ++} ++ ++/** ++ * zxdh_copy_sg_list - copy sg list for qp ++ * @sg_list: copied into sg_list ++ * @sgl: copy from sgl ++ * @num_sges: count of sg entries ++ * @max_sges: count of max supported sg entries ++ */ ++static void zxdh_copy_sg_list(struct zxdh_sge *sg_list, struct ibv_sge *sgl, ++ int num_sges) ++{ ++ int i; ++ ++ for (i = 0; i < num_sges; i++) { ++ sg_list[i].tag_off = sgl[i].addr; ++ sg_list[i].len = sgl[i].length; ++ sg_list[i].stag = sgl[i].lkey; ++ } ++} ++ ++/** ++ * calc_type2_mw_stag - calculate type 2 MW stag ++ * @rkey: desired rkey of the MW ++ * @mw_rkey: type2 memory window rkey ++ * ++ * compute type2 memory window stag by taking lower 8 bits ++ * of the desired rkey and leaving 24 bits if mw->rkey unchanged ++ */ ++static inline __u32 calc_type2_mw_stag(__u32 rkey, __u32 mw_rkey) ++{ ++ const __u32 mask = 0xff; ++ ++ return (rkey & mask) | (mw_rkey & ~mask); ++} ++ ++/** ++ * zxdh_post_send - post send wr for user application ++ * @ib_qp: qp to post wr ++ * @ib_wr: work request ptr ++ * @bad_wr: return of bad wr if err ++ */ ++int zxdh_upost_send(struct ibv_qp *ib_qp, struct ibv_send_wr *ib_wr, ++ struct ibv_send_wr **bad_wr) ++{ ++ struct zxdh_post_sq_info info; ++ struct zxdh_uvcontext *iwvctx; ++ struct zxdh_dev_attrs *dev_attrs; ++ enum zxdh_status_code ret = 0; ++ struct zxdh_uqp *iwuqp; ++ bool reflush = false; ++ int err = 0; ++ struct verbs_mr *vmr = NULL; ++ struct zxdh_umr *umr = NULL; ++ __u64 mr_va = 0, mw_va = 0, value_dffer = 0, mw_pa_pble_index = 0; ++ __u16 mr_offset = 0; ++ iwvctx = container_of(ib_qp->context, struct zxdh_uvcontext, ++ ibv_ctx.context); ++ if (ib_qp->state != IBV_QPS_RTS) { ++ *bad_wr = ib_wr; ++ verbs_err(&iwvctx->ibv_ctx, "zrdma: post send at state:%d\n", ++ ib_qp->state); ++ return -EINVAL; ++ } ++ ++ iwuqp = container_of(ib_qp, struct zxdh_uqp, vqp.qp); ++ dev_attrs = &iwvctx->dev_attrs; ++ ++ err = pthread_spin_lock(&iwuqp->lock); ++ if (err) ++ return err; ++ ++ if (!ZXDH_RING_MORE_WORK(iwuqp->qp.sq_ring) && ++ ib_qp->state == IBV_QPS_ERR) ++ reflush = true; ++ ++ while (ib_wr) { ++ memset(&info, 0, sizeof(info)); ++ info.wr_id = (__u64)(ib_wr->wr_id); ++ if ((ib_wr->send_flags & IBV_SEND_SIGNALED) || ++ iwuqp->sq_sig_all) ++ info.signaled = true; ++ if (ib_wr->send_flags & IBV_SEND_FENCE) ++ info.read_fence = true; ++ ++ switch (ib_wr->opcode) { ++ case IBV_WR_SEND_WITH_IMM: ++ if (iwuqp->qp.qp_caps & ZXDH_SEND_WITH_IMM) { ++ info.imm_data_valid = true; ++ info.imm_data = ntohl(ib_wr->imm_data); ++ } else { ++ err = EINVAL; ++ break; ++ } ++ SWITCH_FALLTHROUGH; ++ case IBV_WR_SEND: ++ case IBV_WR_SEND_WITH_INV: ++ if (ib_wr->send_flags & IBV_SEND_SOLICITED) ++ info.solicited = 1; ++ ++ if (ib_wr->opcode == IBV_WR_SEND) { ++ if (ib_qp->qp_type == IBV_QPT_UD) ++ info.op_type = ZXDH_OP_TYPE_UD_SEND; ++ else ++ info.op_type = ZXDH_OP_TYPE_SEND; ++ } else if (ib_wr->opcode == IBV_WR_SEND_WITH_IMM) { ++ if (ib_qp->qp_type == IBV_QPT_UD) ++ info.op_type = ++ ZXDH_OP_TYPE_UD_SEND_WITH_IMM; ++ else ++ info.op_type = ++ ZXDH_OP_TYPE_SEND_WITH_IMM; ++ } else { ++ info.op_type = ZXDH_OP_TYPE_SEND_INV; ++ info.stag_to_inv = ib_wr->invalidate_rkey; ++ } ++ ++ if ((ib_wr->send_flags & IBV_SEND_INLINE) && ++ (ib_wr->num_sge != 0)) { ++ ret = zxdh_get_inline_data( ++ iwuqp->inline_data, ib_wr, ++ &info.op.inline_rdma_send.len); ++ if (ret) { ++ verbs_err( ++ &iwvctx->ibv_ctx, ++ "zrdma: get inline data fail\n"); ++ pthread_spin_unlock(&iwuqp->lock); ++ return -EINVAL; ++ } ++ info.op.inline_rdma_send.data = ++ iwuqp->inline_data; ++ if (ib_qp->qp_type == IBV_QPT_UD) { ++ struct zxdh_uah *ah = ++ container_of(ib_wr->wr.ud.ah, ++ struct zxdh_uah, ++ ibv_ah); ++ info.op.inline_rdma_send.ah_id = ++ ah->ah_id; ++ info.op.inline_rdma_send.qkey = ++ ib_wr->wr.ud.remote_qkey; ++ info.op.inline_rdma_send.dest_qp = ++ ib_wr->wr.ud.remote_qpn; ++ ret = zxdh_ud_inline_send(&iwuqp->qp, ++ &info, false); ++ } else { ++ ret = zxdh_rc_inline_send(&iwuqp->qp, ++ &info, false); ++ } ++ } else { ++ info.op.send.num_sges = ib_wr->num_sge; ++ info.op.send.sg_list = ++ (struct zxdh_sge *)ib_wr->sg_list; ++ if (ib_qp->qp_type == IBV_QPT_UD) { ++ struct zxdh_uah *ah = ++ container_of(ib_wr->wr.ud.ah, ++ struct zxdh_uah, ++ ibv_ah); ++ ++ info.op.inline_rdma_send.ah_id = ++ ah->ah_id; ++ info.op.inline_rdma_send.qkey = ++ ib_wr->wr.ud.remote_qkey; ++ info.op.inline_rdma_send.dest_qp = ++ ib_wr->wr.ud.remote_qpn; ++ ret = zxdh_ud_send(&iwuqp->qp, &info, ++ false); ++ } else { ++ ret = zxdh_rc_send(&iwuqp->qp, &info, ++ false); ++ } ++ } ++ if (ret) ++ err = (ret == ZXDH_ERR_QP_TOOMANY_WRS_POSTED) ? ++ ENOMEM : ++ EINVAL; ++ break; ++ case IBV_WR_RDMA_WRITE_WITH_IMM: ++ if (iwuqp->qp.qp_caps & ZXDH_WRITE_WITH_IMM) { ++ info.imm_data_valid = true; ++ info.imm_data = ntohl(ib_wr->imm_data); ++ } else { ++ err = -EINVAL; ++ break; ++ } ++ SWITCH_FALLTHROUGH; ++ case IBV_WR_RDMA_WRITE: ++ if (ib_wr->send_flags & IBV_SEND_SOLICITED) ++ info.solicited = 1; ++ ++ if (ib_wr->opcode == IBV_WR_RDMA_WRITE) ++ info.op_type = ZXDH_OP_TYPE_WRITE; ++ else ++ info.op_type = ZXDH_OP_TYPE_WRITE_WITH_IMM; ++ ++ if ((ib_wr->send_flags & IBV_SEND_INLINE) && ++ (ib_wr->num_sge != 0)) { ++ ret = zxdh_get_inline_data( ++ iwuqp->inline_data, ib_wr, ++ &info.op.inline_rdma_write.len); ++ if (ret) { ++ verbs_err( ++ &iwvctx->ibv_ctx, ++ "zrdma: get inline data fail\n"); ++ pthread_spin_unlock(&iwuqp->lock); ++ return -EINVAL; ++ } ++ info.op.inline_rdma_write.data = ++ iwuqp->inline_data; ++ info.op.inline_rdma_write.rem_addr.tag_off = ++ ib_wr->wr.rdma.remote_addr; ++ info.op.inline_rdma_write.rem_addr.stag = ++ ib_wr->wr.rdma.rkey; ++ ret = zxdh_inline_rdma_write(&iwuqp->qp, &info, ++ false); ++ } else { ++ info.op.rdma_write.lo_sg_list = ++ (void *)ib_wr->sg_list; ++ info.op.rdma_write.num_lo_sges = ib_wr->num_sge; ++ info.op.rdma_write.rem_addr.tag_off = ++ ib_wr->wr.rdma.remote_addr; ++ info.op.rdma_write.rem_addr.stag = ++ ib_wr->wr.rdma.rkey; ++ ret = zxdh_rdma_write(&iwuqp->qp, &info, false); ++ } ++ if (ret) ++ err = (ret == ZXDH_ERR_QP_TOOMANY_WRS_POSTED) ? ++ ENOMEM : ++ EINVAL; ++ break; ++ case IBV_WR_RDMA_READ: ++ if (ib_wr->num_sge > dev_attrs->max_hw_read_sges) { ++ err = EINVAL; ++ break; ++ } ++ info.op_type = ZXDH_OP_TYPE_READ; ++ info.op.rdma_read.rem_addr.tag_off = ++ ib_wr->wr.rdma.remote_addr; ++ info.op.rdma_read.rem_addr.stag = ib_wr->wr.rdma.rkey; ++ ++ info.op.rdma_read.lo_sg_list = (void *)ib_wr->sg_list; ++ info.op.rdma_read.num_lo_sges = ib_wr->num_sge; ++ ret = zxdh_rdma_read(&iwuqp->qp, &info, false, false); ++ if (ret) ++ err = (ret == ZXDH_ERR_QP_TOOMANY_WRS_POSTED) ? ++ ENOMEM : ++ EINVAL; ++ break; ++ case IBV_WR_BIND_MW: ++ vmr = verbs_get_mr(ib_wr->bind_mw.bind_info.mr); ++ umr = container_of(vmr, struct zxdh_umr, vmr); ++ mr_va = (uintptr_t)ib_wr->bind_mw.bind_info.mr->addr; ++ mw_va = ib_wr->bind_mw.bind_info.addr; ++ mr_offset = 0; ++ value_dffer = 0; ++ mw_pa_pble_index = 0; ++ ++ if (ib_qp->qp_type != IBV_QPT_RC) { ++ err = EINVAL; ++ break; ++ } ++ info.op_type = ZXDH_OP_TYPE_BIND_MW; ++ info.op.bind_window.mr_stag = ++ ib_wr->bind_mw.bind_info.mr->rkey; ++ ++ if (ib_wr->bind_mw.mw->type == IBV_MW_TYPE_1) { ++ info.op.bind_window.mem_window_type_1 = true; ++ info.op.bind_window.mw_stag = ++ ib_wr->bind_mw.rkey; ++ } else { ++ info.op.bind_window.mem_window_type_1 = false; ++ info.op.bind_window.mw_stag = ++ calc_type2_mw_stag( ++ ib_wr->bind_mw.rkey, ++ ib_wr->bind_mw.mw->rkey); ++ ib_wr->bind_mw.mw->rkey = ++ info.op.bind_window.mw_stag; ++ } ++ ++ if (ib_wr->bind_mw.bind_info.mw_access_flags & ++ IBV_ACCESS_ZERO_BASED) { ++ info.op.bind_window.addressing_type = ++ ZXDH_ADDR_TYPE_ZERO_BASED; ++ if (ib_wr->bind_mw.mw->type == IBV_MW_TYPE_1) { ++ err = EINVAL; ++ break; ++ } ++ ++ info.op.bind_window.addressing_type = ++ ZXDH_ADDR_TYPE_ZERO_BASED; ++ info.op.bind_window.host_page_size = ++ umr->host_page_size; ++ if (umr->host_page_size == ZXDH_PAGE_SIZE_4K) { ++ mr_offset = mr_va & 0x0fff; ++ value_dffer = mw_va - mr_va; ++ if (umr->leaf_pbl_size == 3) { ++ mw_pa_pble_index = ++ (mr_offset + ++ value_dffer) / ++ (4096 * 512); ++ info.op.bind_window ++ .mw_pa_pble_index = ++ umr->mr_pa_pble_index + ++ mw_pa_pble_index; ++ mw_pa_pble_index = ++ ((mr_offset + ++ value_dffer) / ++ 4096) % ++ 512; ++ ++ info.op.bind_window ++ .root_leaf_offset = ++ (__u16)mw_pa_pble_index; ++ info.op.bind_window.va = ++ (void *)(uintptr_t)(mw_va & ++ 0x0fff); ++ info.op.bind_window ++ .leaf_pbl_size = 3; ++ ++ } else if (umr->leaf_pbl_size == 1) { ++ mw_pa_pble_index = ++ (mr_offset + ++ value_dffer) / ++ 4096; ++ info.op.bind_window ++ .mw_pa_pble_index = ++ umr->mr_pa_pble_index + ++ mw_pa_pble_index; ++ info.op.bind_window ++ .leaf_pbl_size = 1; ++ info.op.bind_window.va = ++ (void *)(uintptr_t)(mw_va & ++ 0x0fff); ++ info.op.bind_window ++ .root_leaf_offset = 0; ++ } else { ++ mw_pa_pble_index = ++ umr->mr_pa_pble_index + ++ mr_offset + value_dffer; ++ info.op.bind_window.va = ++ (void *)(uintptr_t)(mw_va & ++ 0x0fff); ++ info.op.bind_window ++ .mw_pa_pble_index = ++ mw_pa_pble_index; ++ info.op.bind_window ++ .leaf_pbl_size = 0; ++ info.op.bind_window ++ .root_leaf_offset = 0; ++ } ++ ++ } else if (umr->host_page_size == ++ ZXDH_PAGE_SIZE_2M) { ++ mr_offset = mr_va & 0x1FFFFF; ++ value_dffer = mw_va - mr_va; ++ if (umr->leaf_pbl_size == 3) { ++ mw_pa_pble_index = ++ (mr_offset + ++ value_dffer) / ++ ((4096 * 512) * 512); ++ info.op.bind_window ++ .mw_pa_pble_index = ++ umr->mr_pa_pble_index + ++ mw_pa_pble_index; ++ mw_pa_pble_index = ++ ((mr_offset + ++ value_dffer) / ++ (4096 * 512)) % ++ 512; ++ ++ info.op.bind_window ++ .root_leaf_offset = ++ (__u16)mw_pa_pble_index; ++ info.op.bind_window.va = ++ (void *)(uintptr_t)(mw_va & ++ 0x1FFFFF); ++ info.op.bind_window ++ .leaf_pbl_size = 3; ++ ++ } else if (umr->leaf_pbl_size == 1) { ++ mw_pa_pble_index = ++ (mr_offset + ++ value_dffer) / ++ (4096 * 512); ++ info.op.bind_window ++ .mw_pa_pble_index = ++ umr->mr_pa_pble_index + ++ mw_pa_pble_index; ++ info.op.bind_window ++ .leaf_pbl_size = 1; ++ info.op.bind_window.va = ++ (void *)(uintptr_t)(mw_va & ++ 0x1FFFFF); ++ info.op.bind_window ++ .root_leaf_offset = 0; ++ } else { ++ mw_pa_pble_index = ++ umr->mr_pa_pble_index + ++ mr_offset + value_dffer; ++ info.op.bind_window.va = ++ (void *)(uintptr_t)(mw_va & ++ 0x1FFFFF); ++ info.op.bind_window ++ .mw_pa_pble_index = ++ mw_pa_pble_index; ++ info.op.bind_window ++ .leaf_pbl_size = 0; ++ info.op.bind_window ++ .root_leaf_offset = 0; ++ } ++ } else if (umr->host_page_size == ++ ZXDH_PAGE_SIZE_1G) { ++ mr_offset = mr_va & 0x3FFFFFFF; ++ value_dffer = mw_va - mr_va; ++ if (umr->leaf_pbl_size == 1) { ++ mw_pa_pble_index = ++ (mr_offset + ++ value_dffer) / ++ (1024 * 1024 * 1024); ++ info.op.bind_window ++ .mw_pa_pble_index = ++ umr->mr_pa_pble_index + ++ mw_pa_pble_index; ++ info.op.bind_window ++ .leaf_pbl_size = 1; ++ info.op.bind_window.va = ++ (void *)(uintptr_t)(mw_va & ++ 0x3FFFFFFF); ++ info.op.bind_window ++ .root_leaf_offset = 0; ++ } else if (umr->leaf_pbl_size == 0) { ++ mw_pa_pble_index = ++ umr->mr_pa_pble_index + ++ mr_offset + value_dffer; ++ info.op.bind_window.va = ++ (void *)(uintptr_t)(mw_va & ++ 0x3FFFFFFF); ++ info.op.bind_window ++ .mw_pa_pble_index = ++ mw_pa_pble_index; ++ info.op.bind_window ++ .leaf_pbl_size = 0; ++ info.op.bind_window ++ .root_leaf_offset = 0; ++ } ++ } ++ ++ } else { ++ info.op.bind_window.addressing_type = ++ ZXDH_ADDR_TYPE_VA_BASED; ++ info.op.bind_window.va = ++ (void *)(uintptr_t) ++ ib_wr->bind_mw.bind_info.addr; ++ info.op.bind_window.host_page_size = ++ umr->host_page_size; ++ ++ if (umr->host_page_size == ZXDH_PAGE_SIZE_4K) { ++ mr_offset = mr_va & 0x0fff; ++ value_dffer = mw_va - mr_va; ++ if (umr->leaf_pbl_size == 3) { ++ mw_pa_pble_index = ++ (mr_offset + ++ value_dffer) / ++ (4096 * 512); ++ info.op.bind_window ++ .mw_pa_pble_index = ++ umr->mr_pa_pble_index + ++ mw_pa_pble_index; ++ mw_pa_pble_index = ++ ((mr_offset + ++ value_dffer) / ++ 4096) % ++ 512; ++ info.op.bind_window ++ .root_leaf_offset = ++ (__u16)mw_pa_pble_index; ++ info.op.bind_window ++ .leaf_pbl_size = 3; ++ } else if (umr->leaf_pbl_size == 1) { ++ info.op.bind_window ++ .mw_pa_pble_index = ++ umr->mr_pa_pble_index + ++ ((mr_offset + ++ value_dffer) / ++ 4096); ++ info.op.bind_window ++ .leaf_pbl_size = 1; ++ info.op.bind_window ++ .root_leaf_offset = 0; ++ } else { ++ info.op.bind_window ++ .leaf_pbl_size = 0; ++ info.op.bind_window ++ .mw_pa_pble_index = ++ umr->mr_pa_pble_index + ++ (mr_va & 0x0fff) + ++ (mw_va - mr_va); ++ info.op.bind_window ++ .root_leaf_offset = 0; ++ } ++ } else if (umr->host_page_size == ++ ZXDH_PAGE_SIZE_2M) { ++ mr_offset = mr_va & 0x1FFFFF; ++ value_dffer = mw_va - mr_va; ++ if (umr->leaf_pbl_size == 3) { ++ mw_pa_pble_index = ++ (mr_offset + ++ value_dffer) / ++ ((4096 * 512) * 512); ++ info.op.bind_window ++ .mw_pa_pble_index = ++ umr->mr_pa_pble_index + ++ mw_pa_pble_index; ++ mw_pa_pble_index = ++ ((mr_offset + ++ value_dffer) / ++ (4096 * 512)) % ++ 512; ++ info.op.bind_window ++ .root_leaf_offset = ++ (__u16)mw_pa_pble_index; ++ info.op.bind_window ++ .leaf_pbl_size = 3; ++ } else if (umr->leaf_pbl_size == 1) { ++ info.op.bind_window ++ .mw_pa_pble_index = ++ umr->mr_pa_pble_index + ++ ((mr_offset + ++ value_dffer) / ++ (4096 * 512)); ++ info.op.bind_window ++ .leaf_pbl_size = 1; ++ info.op.bind_window ++ .root_leaf_offset = 0; ++ } else { ++ info.op.bind_window ++ .leaf_pbl_size = 0; ++ info.op.bind_window ++ .mw_pa_pble_index = ++ umr->mr_pa_pble_index + ++ (mr_va & 0x1FFFFF) + ++ (mw_va - mr_va); ++ info.op.bind_window ++ .root_leaf_offset = 0; ++ } ++ } else if (umr->host_page_size == ++ ZXDH_PAGE_SIZE_1G) { ++ mr_offset = mr_va & 0x3FFFFFFF; ++ value_dffer = mw_va - mr_va; ++ if (umr->leaf_pbl_size == 1) { ++ info.op.bind_window ++ .mw_pa_pble_index = ++ umr->mr_pa_pble_index + ++ ((mr_offset + ++ value_dffer) / ++ (1024 * 1024 * 1024)); ++ info.op.bind_window ++ .leaf_pbl_size = 1; ++ info.op.bind_window ++ .root_leaf_offset = 0; ++ } else if (umr->leaf_pbl_size == 0) { ++ info.op.bind_window ++ .leaf_pbl_size = 0; ++ info.op.bind_window ++ .mw_pa_pble_index = ++ umr->mr_pa_pble_index + ++ (mr_va & 0x3FFFFFFF) + ++ (mw_va - mr_va); ++ info.op.bind_window ++ .root_leaf_offset = 0; ++ } ++ } ++ } ++ ++ info.op.bind_window.bind_len = ++ ib_wr->bind_mw.bind_info.length; ++ info.op.bind_window.ena_reads = ++ (ib_wr->bind_mw.bind_info.mw_access_flags & ++ IBV_ACCESS_REMOTE_READ) ? ++ 1 : ++ 0; ++ info.op.bind_window.ena_writes = ++ (ib_wr->bind_mw.bind_info.mw_access_flags & ++ IBV_ACCESS_REMOTE_WRITE) ? ++ 1 : ++ 0; ++ ++ ret = zxdh_mw_bind(&iwuqp->qp, &info, false); ++ if (ret) ++ err = (ret == ZXDH_ERR_QP_TOOMANY_WRS_POSTED) ? ++ ENOMEM : ++ EINVAL; ++ break; ++ case IBV_WR_LOCAL_INV: ++ info.op_type = ZXDH_OP_TYPE_LOCAL_INV; ++ info.op.inv_local_stag.target_stag = ++ ib_wr->invalidate_rkey; ++ ret = zxdh_stag_local_invalidate(&iwuqp->qp, &info, ++ true); ++ if (ret) ++ err = (ret == ZXDH_ERR_QP_TOOMANY_WRS_POSTED) ? ++ ENOMEM : ++ EINVAL; ++ break; ++ default: ++ /* error */ ++ err = EINVAL; ++ break; ++ } ++ if (err) ++ break; ++ ++ ib_wr = ib_wr->next; ++ } ++ ++ if (err) ++ *bad_wr = ib_wr; ++ ++ zxdh_qp_post_wr(&iwuqp->qp); ++ if (reflush) ++ zxdh_issue_flush(ib_qp, 1, 0); ++ ++ pthread_spin_unlock(&iwuqp->lock); ++ ++ return err; ++} ++ ++/** ++ * zxdh_post_recv - post receive wr for user application ++ * @ib_wr: work request for receive ++ * @bad_wr: bad wr caused an error ++ */ ++int zxdh_upost_recv(struct ibv_qp *ib_qp, struct ibv_recv_wr *ib_wr, ++ struct ibv_recv_wr **bad_wr) ++{ ++ struct zxdh_post_rq_info post_recv = {}; ++ enum zxdh_status_code ret = 0; ++ struct zxdh_sge *sg_list; ++ struct zxdh_uqp *iwuqp; ++ bool reflush = false; ++ int err = 0; ++ ++ iwuqp = container_of(ib_qp, struct zxdh_uqp, vqp.qp); ++ sg_list = iwuqp->recv_sges; ++ ++ if (unlikely(ib_qp->state == IBV_QPS_RESET || ib_qp->srq)) { ++ *bad_wr = ib_wr; ++ return -EINVAL; ++ } ++ ++ err = pthread_spin_lock(&iwuqp->lock); ++ if (err) ++ return err; ++ ++ if (unlikely(!ZXDH_RING_MORE_WORK(iwuqp->qp.rq_ring)) && ++ ib_qp->state == IBV_QPS_ERR) ++ reflush = true; ++ ++ while (ib_wr) { ++ if (unlikely(ib_wr->num_sge > iwuqp->qp.max_rq_frag_cnt)) { ++ *bad_wr = ib_wr; ++ err = EINVAL; ++ goto error; ++ } ++ post_recv.num_sges = ib_wr->num_sge; ++ post_recv.wr_id = ib_wr->wr_id; ++ zxdh_copy_sg_list(sg_list, ib_wr->sg_list, ib_wr->num_sge); ++ post_recv.sg_list = sg_list; ++ ret = zxdh_post_receive(&iwuqp->qp, &post_recv); ++ if (unlikely(ret)) { ++ err = (ret == ZXDH_ERR_QP_TOOMANY_WRS_POSTED) ? ENOMEM : ++ EINVAL; ++ *bad_wr = ib_wr; ++ goto error; ++ } ++ ++ if (reflush) ++ zxdh_issue_flush(ib_qp, 0, 1); ++ ++ ib_wr = ib_wr->next; ++ } ++error: ++ zxdh_qp_set_shadow_area(&iwuqp->qp); ++ pthread_spin_unlock(&iwuqp->lock); ++ ++ return err; ++} ++ ++/** ++ * zxdh_ucreate_ah - create address handle associated with a pd ++ * @ibpd: pd for the address handle ++ * @attr: attributes of address handle ++ */ ++struct ibv_ah *zxdh_ucreate_ah(struct ibv_pd *ibpd, struct ibv_ah_attr *attr) ++{ ++ struct zxdh_uah *ah; ++ struct zxdh_ucreate_ah_resp resp; ++ int err; ++ ++ ah = calloc(1, sizeof(*ah)); ++ if (!ah) ++ return NULL; ++ ++ err = ibv_cmd_create_ah(ibpd, &ah->ibv_ah, attr, &resp.ibv_resp, ++ sizeof(resp)); ++ if (err) { ++ free(ah); ++ errno = err; ++ return NULL; ++ } ++ ++ ah->ah_id = resp.ah_id; ++ ++ return &ah->ibv_ah; ++} ++ ++/** ++ * zxdh_udestroy_ah - destroy the address handle ++ * @ibah: address handle ++ */ ++int zxdh_udestroy_ah(struct ibv_ah *ibah) ++{ ++ struct zxdh_uah *ah; ++ int ret; ++ ++ ah = container_of(ibah, struct zxdh_uah, ibv_ah); ++ ++ ret = ibv_cmd_destroy_ah(ibah); ++ if (ret) ++ return ret; ++ ++ free(ah); ++ ++ return 0; ++} ++ ++/** ++ * zxdh_uattach_mcast - Attach qp to multicast group implemented ++ * @qp: The queue pair ++ * @gid:The Global ID for multicast group ++ * @lid: The Local ID ++ */ ++int zxdh_uattach_mcast(struct ibv_qp *qp, const union ibv_gid *gid, ++ uint16_t lid) ++{ ++ return ibv_cmd_attach_mcast(qp, gid, lid); ++} ++ ++/** ++ * zxdh_udetach_mcast - Detach qp from multicast group ++ * @qp: The queue pair ++ * @gid:The Global ID for multicast group ++ * @lid: The Local ID ++ */ ++int zxdh_udetach_mcast(struct ibv_qp *qp, const union ibv_gid *gid, ++ uint16_t lid) ++{ ++ return ibv_cmd_detach_mcast(qp, gid, lid); ++} ++ ++/** ++ * zxdh_uresize_cq - resizes a cq ++ * @cq: cq to resize ++ * @cqe: the number of cqes of the new cq ++ */ ++int zxdh_uresize_cq(struct ibv_cq *cq, int cqe) ++{ ++ struct zxdh_uvcontext *iwvctx; ++ struct zxdh_dev_attrs *dev_attrs; ++ struct zxdh_uresize_cq cmd = {}; ++ struct ib_uverbs_resize_cq_resp resp = {}; ++ struct zxdh_ureg_mr reg_mr_cmd = {}; ++ struct ib_uverbs_reg_mr_resp reg_mr_resp = {}; ++ struct zxdh_cq_buf *cq_buf = NULL; ++ struct zxdh_cqe *cq_base = NULL; ++ struct verbs_mr new_mr = {}; ++ struct zxdh_ucq *iwucq; ++ size_t cq_size; ++ __u32 cq_pages; ++ int cqe_needed; ++ int ret = 0; ++ ++ iwucq = container_of(cq, struct zxdh_ucq, verbs_cq.cq); ++ iwvctx = container_of(cq->context, struct zxdh_uvcontext, ++ ibv_ctx.context); ++ dev_attrs = &iwvctx->dev_attrs; ++ ++ if (!(dev_attrs->feature_flags & ZXDH_FEATURE_CQ_RESIZE)) ++ return -EOPNOTSUPP; ++ ++ if (cqe > ZXDH_MAX_CQ_SIZE) ++ return -EINVAL; ++ ++ cqe_needed = zxdh_cq_round_up(cqe + 1); ++ ++ if (cqe_needed < ZXDH_U_MINCQ_SIZE) ++ cqe_needed = ZXDH_U_MINCQ_SIZE; ++ ++ if (cqe_needed == iwucq->cq.cq_size) ++ return 0; ++ ++ cq_size = get_cq_total_bytes(cqe_needed); ++ cq_pages = cq_size >> ZXDH_HW_PAGE_SHIFT; ++ cq_base = zxdh_alloc_hw_buf(cq_size); ++ if (!cq_base) ++ return -ENOMEM; ++ ++ memset(cq_base, 0, cq_size); ++ ++ cq_buf = malloc(sizeof(*cq_buf)); ++ if (!cq_buf) { ++ ret = -ENOMEM; ++ goto err_buf; ++ } ++ ++ new_mr.ibv_mr.pd = iwucq->vmr.ibv_mr.pd; ++ reg_mr_cmd.reg_type = ZXDH_MEMREG_TYPE_CQ; ++ reg_mr_cmd.cq_pages = cq_pages; ++ ++ ret = ibv_cmd_reg_mr(new_mr.ibv_mr.pd, cq_base, cq_size, ++ (uintptr_t)cq_base, IBV_ACCESS_LOCAL_WRITE, ++ &new_mr, ®_mr_cmd.ibv_cmd, sizeof(reg_mr_cmd), ++ ®_mr_resp, sizeof(reg_mr_resp)); ++ if (ret) ++ goto err_dereg_mr; ++ ++ ret = pthread_spin_lock(&iwucq->lock); ++ if (ret) ++ goto err_lock; ++ ++ cmd.user_cq_buffer = (__u64)((uintptr_t)cq_base); ++ ret = ibv_cmd_resize_cq(&iwucq->verbs_cq.cq, cqe_needed, &cmd.ibv_cmd, ++ sizeof(cmd), &resp, sizeof(resp)); ++ if (ret) ++ goto err_resize; ++ ++ memcpy(&cq_buf->cq, &iwucq->cq, sizeof(cq_buf->cq)); ++ cq_buf->vmr = iwucq->vmr; ++ iwucq->vmr = new_mr; ++ zxdh_cq_resize(&iwucq->cq, cq_base, cqe_needed); ++ iwucq->verbs_cq.cq.cqe = cqe; ++ list_add_tail(&iwucq->resize_list, &cq_buf->list); ++ iwucq->resize_enable = true; ++ pthread_spin_unlock(&iwucq->lock); ++ ++ return ret; ++ ++err_resize: ++ pthread_spin_unlock(&iwucq->lock); ++err_lock: ++ ibv_cmd_dereg_mr(&new_mr); ++err_dereg_mr: ++ free(cq_buf); ++err_buf: ++ zxdh_free_hw_buf(cq_base, cq_size); ++ return ret; ++} ++ ++static void zxdh_srq_wqe_init(struct zxdh_usrq *iwusrq) ++{ ++ uint32_t i; ++ struct zxdh_srq *srq; ++ __le64 *wqe; ++ __u64 hdr; ++ ++ srq = &iwusrq->srq; ++ zxdh_dbg(verbs_get_ctx(iwusrq->ibv_srq.context), ZXDH_DBG_SRQ, ++ "%s head:%d tail:%d\n", __func__, srq->srq_ring.head, ++ srq->srq_ring.tail); ++ for (i = srq->srq_ring.head; i < srq->srq_ring.tail; i++) { ++ wqe = zxdh_get_srq_wqe(srq, i); ++ ++ hdr = FIELD_PREP(ZXDHQPSRQ_NEXT_WQE_INDEX, (uint32_t)(i + 1)); ++ ++ udma_to_device_barrier(); /* make sure WQE is populated before valid bit is set */ ++ set_64bit_val(wqe, 0, hdr); ++ } ++} ++ ++static size_t zxdh_get_srq_queue_size(int srqdepth) ++{ ++ return roundup(srqdepth * ZXDH_SRQ_WQE_MIN_SIZE, ZXDH_HW_PAGE_SIZE); ++} ++ ++static size_t zxdh_get_srq_list_size(size_t srq_size) ++{ ++ return roundup(srq_size * sizeof(__u16), ZXDH_HW_PAGE_SIZE); ++} ++ ++static size_t zxdh_get_srq_db_size(void) ++{ ++ return 8 * sizeof(char); ++} ++ ++static size_t zxdh_get_total_srq_size(struct zxdh_usrq *iwusrq, int srqdepth, ++ size_t srq_size) ++{ ++ size_t total_srq_queue_size; ++ size_t total_srq_list_size; ++ size_t total_srq_db_size; ++ size_t total_srq_size; ++ ++ total_srq_queue_size = zxdh_get_srq_queue_size(srqdepth); ++ iwusrq->buf_size = total_srq_queue_size; ++ total_srq_list_size = zxdh_get_srq_list_size(srq_size); ++ iwusrq->list_buf_size = total_srq_list_size; ++ total_srq_db_size = zxdh_get_srq_db_size(); ++ iwusrq->db_buf_size = total_srq_db_size; ++ total_srq_size = ++ total_srq_queue_size + total_srq_list_size + total_srq_db_size; ++ iwusrq->total_buf_size = total_srq_size; ++ zxdh_dbg( ++ verbs_get_ctx(iwusrq->ibv_srq.context), ZXDH_DBG_SRQ, ++ "%s total_srq_queue_size:%ld total_srq_list_size:%ld total_srq_db_size:%ld srqdepth:%d\n", ++ __func__, total_srq_queue_size, total_srq_list_size, ++ total_srq_db_size, srqdepth); ++ ++ return total_srq_size; ++} ++ ++static int zxdh_alloc_srq_buf(struct zxdh_usrq *iwusrq, ++ struct zxdh_srq_init_info *info, ++ size_t total_srq_size) ++{ ++ info->srq_base = zxdh_alloc_hw_buf(total_srq_size); ++ if (!info->srq_base) ++ return -ENOMEM; ++ memset(info->srq_base, 0, total_srq_size); ++ info->srq_list_base = ++ (__le16 *)&info ++ ->srq_base[iwusrq->buf_size / ZXDH_SRQ_WQE_MIN_SIZE]; ++ info->srq_db_base = ++ (__le64 *)&info->srq_list_base[iwusrq->list_buf_size / ++ (sizeof(__u16))]; ++ *(__le64 *)info->srq_db_base = ZXDH_SRQ_DB_INIT_VALUE; ++ zxdh_dbg(verbs_get_ctx(iwusrq->ibv_srq.context), ZXDH_DBG_SRQ, ++ "%s srq_base:0x%p srq_list_base:0x%p srq_db_base:0x%p\n", ++ __func__, info->srq_base, info->srq_list_base, ++ info->srq_db_base); ++ return 0; ++} ++ ++static int zxdh_reg_srq_mr(struct ibv_pd *pd, struct zxdh_srq_init_info *info, ++ size_t total_srq_size, uint16_t srq_pages, ++ uint16_t srq_list_pages, struct zxdh_usrq *iwusrq) ++{ ++ struct zxdh_ureg_mr reg_mr_cmd = {}; ++ struct ib_uverbs_reg_mr_resp reg_mr_resp = {}; ++ int ret; ++ ++ reg_mr_cmd.reg_type = ZXDH_MEMREG_TYPE_SRQ; ++ reg_mr_cmd.srq_pages = srq_pages; ++ reg_mr_cmd.srq_list_pages = srq_list_pages; ++ ret = ibv_cmd_reg_mr(pd, info->srq_base, total_srq_size, ++ (uintptr_t)info->srq_base, IBV_ACCESS_LOCAL_WRITE, ++ &iwusrq->vmr, ®_mr_cmd.ibv_cmd, ++ sizeof(reg_mr_cmd), ®_mr_resp, ++ sizeof(reg_mr_resp)); ++ if (ret) ++ return ret; ++ ++ return 0; ++} ++ ++static int create_srq(struct ibv_pd *pd, struct zxdh_usrq *iwusrq, ++ struct ibv_srq_init_attr *attr, ++ struct zxdh_srq_init_info *info) ++{ ++ struct zxdh_ucreate_srq cmd = {}; ++ struct zxdh_ucreate_srq_resp resp = {}; ++ int ret; ++ ++ cmd.user_wqe_bufs = (__u64)((uintptr_t)info->srq_base); ++ cmd.user_compl_ctx = (__u64)(uintptr_t)&iwusrq->srq; ++ cmd.user_wqe_list = (__u64)((uintptr_t)info->srq_list_base); ++ cmd.user_wqe_db = (__u64)((uintptr_t)info->srq_db_base); ++ ret = ibv_cmd_create_srq(pd, &iwusrq->ibv_srq, attr, &cmd.ibv_cmd, ++ sizeof(cmd), &resp.ibv_resp, ++ sizeof(struct zxdh_ucreate_srq_resp)); ++ if (ret) ++ return ret; ++ ++ iwusrq->srq_id = resp.srq_id; ++ info->srq_id = resp.srq_id; ++ info->srq_size = resp.actual_srq_size; ++ info->srq_list_size = resp.actual_srq_list_size; ++ zxdh_dbg( ++ verbs_get_ctx(iwusrq->ibv_srq.context), ZXDH_DBG_SRQ, ++ "%s info->srq_id:%d info->srq_size:%d info->srq_list_size:%d\n", ++ __func__, info->srq_id, info->srq_size, info->srq_list_size); ++ ++ return 0; ++} ++ ++/** ++ * zxdh_vmapped_srq - create resources for srq ++ * @iwusrq: srq struct for resources ++ * @pd: pd for the srq ++ * @attr: attributes of srq passed ++ * @resp: response back from create srq ++ * @srqdepth: depth of sq ++ * @info: info for initializing user level srq ++ */ ++static int zxdh_vmapped_srq(struct zxdh_usrq *iwusrq, struct ibv_pd *pd, ++ struct ibv_srq_init_attr *attr, int srqdepth, ++ struct zxdh_srq_init_info *info) ++{ ++ size_t total_srq_size; ++ size_t srq_pages = 0; ++ size_t srq_list_pages = 0; ++ int ret; ++ ++ total_srq_size = ++ zxdh_get_total_srq_size(iwusrq, srqdepth, info->srq_size); ++ srq_pages = iwusrq->buf_size >> ZXDH_HW_PAGE_SHIFT; ++ srq_list_pages = iwusrq->list_buf_size >> ZXDH_HW_PAGE_SHIFT; ++ ret = zxdh_alloc_srq_buf(iwusrq, info, total_srq_size); ++ if (ret) ++ return -ENOMEM; ++ zxdh_dbg(verbs_get_ctx(iwusrq->ibv_srq.context), ZXDH_DBG_SRQ, ++ "%s srq_pages:%ld srq_list_pages:%ld\n", __func__, srq_pages, ++ srq_list_pages); ++ ++ ret = zxdh_reg_srq_mr(pd, info, total_srq_size, srq_pages, ++ srq_list_pages, iwusrq); ++ if (ret) { ++ errno = ret; ++ goto err_dereg_srq_mr; ++ } ++ ret = create_srq(pd, iwusrq, attr, info); ++ if (ret) ++ goto err_srq; ++ return 0; ++err_srq: ++ ibv_cmd_dereg_mr(&iwusrq->vmr); ++err_dereg_srq_mr: ++ zxdh_free_hw_buf(info->srq_base, total_srq_size); ++ ++ return ret; ++} ++ ++/** ++ * zxdh_destroy_vmapped_srq - destroy resources for srq ++ * @iwusrq: srq struct for resources ++ */ ++static int zxdh_destroy_vmapped_srq(struct zxdh_usrq *iwusrq) ++{ ++ int ret; ++ ++ ret = ibv_cmd_destroy_srq(&iwusrq->ibv_srq); ++ if (ret) ++ return ret; ++ ++ ibv_cmd_dereg_mr(&iwusrq->vmr); ++ return 0; ++} ++ ++static int zxdh_check_srq_init_attr(struct ibv_srq_init_attr *srq_init_attr, ++ struct zxdh_dev_attrs *dev_attrs) ++{ ++ if ((srq_init_attr->attr.srq_limit > srq_init_attr->attr.max_wr) || ++ (srq_init_attr->attr.max_sge > dev_attrs->max_hw_wq_frags) || ++ (srq_init_attr->attr.max_wr > dev_attrs->max_hw_srq_wr)) { ++ return 1; ++ } ++ return 0; ++} ++ ++static int zxdh_init_iwusrq(struct zxdh_usrq *iwusrq, ++ struct ibv_srq_init_attr *srq_init_attr, ++ __u32 srqdepth, __u8 srqshift, ++ struct zxdh_srq_init_info *info, ++ struct zxdh_dev_attrs *dev_attrs) ++{ ++ info->srq_size = srqdepth >> srqshift; ++ iwusrq->max_wr = info->srq_size; ++ iwusrq->max_sge = srq_init_attr->attr.max_sge; ++ iwusrq->srq_limit = srq_init_attr->attr.srq_limit; ++ ++ srq_init_attr->attr.max_wr = info->srq_size; ++ info->dev_attrs = dev_attrs; ++ info->max_srq_frag_cnt = srq_init_attr->attr.max_sge; ++ info->srq_wrid_array = ++ calloc(info->srq_size, sizeof(*info->srq_wrid_array)); ++ if (info->srq_wrid_array == NULL) ++ return 1; ++ ++ return 0; ++} ++ ++/** ++ * zxdh_ucreate_srq - create srq on user app ++ * @pd: pd for the srq ++ * @srq_init_attr: attributes of the srq to be created (sizes, sge) ++ */ ++struct ibv_srq *zxdh_ucreate_srq(struct ibv_pd *pd, ++ struct ibv_srq_init_attr *srq_init_attr) ++{ ++ struct zxdh_srq_init_info info = {}; ++ struct zxdh_dev_attrs *dev_attrs; ++ struct zxdh_uvcontext *iwvctx; ++ __u32 srqdepth; ++ __u8 srqshift; ++ int status; ++ int ret; ++ struct zxdh_usrq *iwusrq; ++ ++ iwvctx = container_of(pd->context, struct zxdh_uvcontext, ++ ibv_ctx.context); ++ dev_attrs = &iwvctx->dev_attrs; ++ ++ if ((zxdh_check_srq_init_attr(srq_init_attr, dev_attrs)) != 0) { ++ verbs_err(&iwvctx->ibv_ctx, ++ "zxdh_check_srq_init_attr failed\n"); ++ errno = EINVAL; ++ return NULL; ++ } ++ ++ /* get shift count for maximum wqe size */ ++ zxdh_get_srq_wqe_shift(dev_attrs, srq_init_attr->attr.max_sge, ++ &srqshift); ++ ++ /* get RQ/SRQ depth (quanta),minimum number of units in srq */ ++ status = zxdh_get_srqdepth(dev_attrs->max_hw_srq_quanta, ++ srq_init_attr->attr.max_wr, srqshift, ++ &srqdepth); ++ zxdh_dbg( ++ &iwvctx->ibv_ctx, ZXDH_DBG_SRQ, ++ "%s %d status:%d srqshift:%d srqdepth:%d dev_attrs->max_hw_srq_quanta:%d srq_init_attr->attr.max_wr:%d\n", ++ __func__, __LINE__, status, srqshift, srqdepth, ++ dev_attrs->max_hw_srq_quanta, srq_init_attr->attr.max_wr); ++ if (status != 0) { ++ verbs_err(&iwvctx->ibv_ctx, "zxdh_get_srqdepth failed\n"); ++ errno = EINVAL; ++ return NULL; ++ } ++ iwusrq = memalign(1024, sizeof(*iwusrq)); ++ if (!iwusrq) ++ return NULL; ++ memset(iwusrq, 0, sizeof(*iwusrq)); ++ if (pthread_spin_init(&iwusrq->lock, PTHREAD_PROCESS_PRIVATE) != 0) ++ goto err_free_srq; ++ ++ if (zxdh_init_iwusrq(iwusrq, srq_init_attr, srqdepth, srqshift, &info, ++ dev_attrs)) { ++ verbs_err(&iwvctx->ibv_ctx, "calloc srq_wrid_array failed\n"); ++ goto err_srq_wrid_array; ++ } ++ status = zxdh_vmapped_srq(iwusrq, pd, srq_init_attr, srqdepth, &info); ++ if (status) { ++ verbs_err(&iwvctx->ibv_ctx, "zxdh_vmapped_srq failed\n"); ++ errno = status; ++ goto err_vmapped_srq; ++ } ++ ++ status = zxdh_srq_init(&iwusrq->srq, &info); ++ if (status) { ++ verbs_err(&iwvctx->ibv_ctx, "zxdh_srq_init failed\n"); ++ errno = EINVAL; ++ goto err_free_srq_init; ++ } ++ zxdh_srq_wqe_init(iwusrq); ++ ++ srq_init_attr->attr.max_wr = (srqdepth - ZXDH_SRQ_RSVD) >> srqshift; ++ ++ zxdh_dbg(&iwvctx->ibv_ctx, ZXDH_DBG_SRQ, ++ "iwusrq->srq_id:%d info.srq_size:%d\n", iwusrq->srq_id, ++ info.srq_size); ++ return &iwusrq->ibv_srq; ++ ++err_free_srq_init: ++ zxdh_destroy_vmapped_srq(iwusrq); ++ zxdh_free_hw_buf(info.srq_base, iwusrq->total_buf_size); ++err_vmapped_srq: ++ free(info.srq_wrid_array); ++err_srq_wrid_array: ++ ret = pthread_spin_destroy(&iwusrq->lock); ++ if (ret) ++ errno = EINVAL; ++err_free_srq: ++ free(iwusrq); ++ return NULL; ++} ++ ++/** ++ * zxdh_udestroy_srq - destroy srq on user app ++ * @srq: srq to destroy ++ */ ++int zxdh_udestroy_srq(struct ibv_srq *srq) ++{ ++ struct zxdh_usrq *iwusrq; ++ int ret; ++ ++ iwusrq = container_of(srq, struct zxdh_usrq, ibv_srq); ++ ret = pthread_spin_destroy(&iwusrq->lock); ++ if (ret) ++ goto err; ++ ++ ret = zxdh_destroy_vmapped_srq(iwusrq); ++ if (ret) ++ goto err; ++ zxdh_dbg(verbs_get_ctx(iwusrq->ibv_srq.context), ZXDH_DBG_SRQ, ++ "iwusrq->srq_id:%d\n", iwusrq->srq_id); ++ zxdh_free_hw_buf(iwusrq->srq.srq_base, iwusrq->total_buf_size); ++ free(iwusrq->srq.srq_wrid_array); ++ free(iwusrq); ++ ++ return 0; ++ ++err: ++ return ret; ++} ++ ++/** ++ * zxdh_umodify_srq - modify srq on user app ++ * @srq: srq to destroy ++ */ ++int zxdh_umodify_srq(struct ibv_srq *srq, struct ibv_srq_attr *srq_attr, ++ int srq_attr_mask) ++{ ++ struct ibv_modify_srq cmd; ++ struct zxdh_usrq *iwusrq; ++ int ret; ++ ++ iwusrq = container_of(srq, struct zxdh_usrq, ibv_srq); ++ ret = ibv_cmd_modify_srq(srq, srq_attr, srq_attr_mask, &cmd, ++ sizeof(cmd)); ++ if (ret == 0) ++ iwusrq->srq_limit = srq_attr->srq_limit; ++ zxdh_dbg(verbs_get_ctx(iwusrq->ibv_srq.context), ZXDH_DBG_SRQ, ++ "iwusrq->srq_id:%d srq_attr->srq_limit:%d\n", iwusrq->srq_id, ++ srq_attr->srq_limit); ++ return ret; ++} ++ ++/** ++ * zxdh_uquery_srq - query srq on user app ++ * @srq: srq to query ++ * @srq_attr: attributes of the srq to be query ++ */ ++int zxdh_uquery_srq(struct ibv_srq *srq, struct ibv_srq_attr *srq_attr) ++{ ++ struct ibv_query_srq cmd; ++ ++ return ibv_cmd_query_srq(srq, srq_attr, &cmd, sizeof(cmd)); ++} ++ ++static int zxdh_check_srq_valid(struct ibv_recv_wr *recv_wr, ++ struct zxdh_usrq *iwusrq, struct zxdh_srq *srq) ++{ ++ if (unlikely(recv_wr->num_sge > iwusrq->max_sge)) ++ return -EINVAL; ++ ++ if (unlikely(srq->srq_ring.head == srq->srq_ring.tail)) ++ return -ENOMEM; ++ ++ return 0; ++} ++ ++static void zxdh_fill_srq_wqe(struct zxdh_usrq *iwusrq, struct zxdh_srq *srq, ++ __le64 *wqe_64, struct ibv_recv_wr *recv_wr) ++{ ++ __u32 byte_off; ++ int i; ++ ++ for (i = 0, byte_off = ZXDH_SRQ_FRAG_BYTESIZE; ++ i < recv_wr->num_sge && ++ byte_off + ZXDH_SRQ_FRAG_BYTESIZE < UINT32_MAX; ++ i++) { ++ set_64bit_val(wqe_64, byte_off, recv_wr->sg_list[i].addr); ++ set_64bit_val(wqe_64, byte_off + 8, ++ FIELD_PREP(ZXDHQPSRQ_FRAG_LEN, ++ recv_wr->sg_list[i].length) | ++ FIELD_PREP(ZXDHQPSRQ_FRAG_STAG, ++ recv_wr->sg_list[i].lkey)); ++ byte_off += ZXDH_SRQ_FRAG_BYTESIZE; ++ } ++ ++ if ((recv_wr->num_sge < iwusrq->max_sge) || (recv_wr->num_sge == 0)) { ++ set_64bit_val(wqe_64, byte_off, 0); ++ set_64bit_val(wqe_64, byte_off + 8, ++ FIELD_PREP(ZXDHQPSRQ_FRAG_LEN, 0) | ++ FIELD_PREP(ZXDHQPSRQ_FRAG_STAG, ++ ZXDH_SRQ_INVALID_LKEY)); ++ } ++ ++ set_64bit_val(wqe_64, 8, ((uint64_t)iwusrq->srq_id) << 32); ++ ++ __u64 hdr = FIELD_PREP(ZXDHQPSRQ_RSV, 0) | ++ FIELD_PREP(ZXDHQPSRQ_VALID_SGE_NUM, recv_wr->num_sge) | ++ FIELD_PREP(ZXDHQPSRQ_SIGNATURE, 0) | ++ FIELD_PREP(ZXDHQPSRQ_NEXT_WQE_INDEX, srq->srq_ring.head); ++ ++ udma_to_device_barrier(); /* make sure WQE is populated before valid bit is set */ ++ set_64bit_val(wqe_64, 0, hdr); ++} ++ ++static void zxdh_get_wqe_index(struct zxdh_srq *srq, __le16 *wqe_16, __u16 *buf, ++ __u16 nreq, __u16 *idx) ++{ ++ int i; ++ ++ for (i = 0; i < nreq; i++) { ++ wqe_16 = zxdh_get_srq_list_wqe(srq, idx); ++ udma_to_device_barrier(); /* make sure WQE is populated before valid bit is set */ ++ set_16bit_val(wqe_16, 0, buf[i]); ++ } ++} ++ ++static void zxdh_update_srq_db_base(struct zxdh_usrq *iwusrq, __u16 idx) ++{ ++ __u64 hdr = FIELD_PREP(ZXDH_SRQ_PARITY_SIGN, ++ iwusrq->srq.srq_list_polarity) | ++ FIELD_PREP(ZXDH_SRQ_SW_SRQ_HEAD, idx); ++ ++ udma_to_device_barrier(); /* make sure WQE is populated before valid bit is set */ ++ set_64bit_val(iwusrq->srq.srq_db_base, 0, hdr); ++} ++ ++/** ++ * zxdh_upost_srq_recv - post srq recv on user app ++ * @srq: srq to post recv ++ * @recv_wr: a list of work requests to post on the receive queue ++ * @bad_recv_wr: pointer to first rejected wr ++ */ ++int zxdh_upost_srq_recv(struct ibv_srq *srq, struct ibv_recv_wr *recv_wr, ++ struct ibv_recv_wr **bad_recv_wr) ++{ ++ struct zxdh_usrq *iwusrq; ++ struct zxdh_srq *hw_srq; ++ __le16 *wqe_16; ++ __le64 *wqe_64; ++ __u64 temp_val; ++ int err = 0; ++ int nreq; ++ __u16 *buf; ++ size_t buf_size; ++ __u16 idx = 0; ++ ++ iwusrq = container_of(srq, struct zxdh_usrq, ibv_srq); ++ hw_srq = &iwusrq->srq; ++ pthread_spin_lock(&iwusrq->lock); ++ buf_size = iwusrq->max_wr * sizeof(__u16); ++ buf = malloc(buf_size); ++ if (buf == NULL) { ++ verbs_err(verbs_get_ctx(iwusrq->ibv_srq.context), ++ "malloc buf_size failed\n"); ++ err = -ENOMEM; ++ goto out; ++ } ++ ++ for (nreq = 0; recv_wr; nreq++, recv_wr = recv_wr->next) { ++ err = zxdh_check_srq_valid(recv_wr, iwusrq, hw_srq); ++ if (err) ++ break; ++ ++ iwusrq->srq.srq_wrid_array[hw_srq->srq_ring.head] = ++ recv_wr->wr_id; ++ buf[nreq] = hw_srq->srq_ring.head; ++ wqe_64 = zxdh_get_srq_wqe(hw_srq, hw_srq->srq_ring.head); ++ get_64bit_val(wqe_64, 0, &temp_val); ++ hw_srq->srq_ring.head = ++ (__u16)FIELD_GET(ZXDHQPSRQ_NEXT_WQE_INDEX, temp_val); ++ zxdh_fill_srq_wqe(iwusrq, hw_srq, wqe_64, recv_wr); ++ } ++ ++ zxdh_dbg(verbs_get_ctx(iwusrq->ibv_srq.context), ZXDH_DBG_SRQ, ++ "nreq:%d err:%d iwusrq->srq_id:%d\n", nreq, err, ++ iwusrq->srq_id); ++ ++ if (err == 0) { ++ zxdh_get_wqe_index(hw_srq, wqe_16, buf, nreq, &idx); ++ zxdh_update_srq_db_base(iwusrq, idx); ++ } ++out: ++ pthread_spin_unlock(&iwusrq->lock); ++ if (err) ++ *bad_recv_wr = recv_wr; ++ if (buf) ++ free(buf); ++ return err; ++} ++ ++/** ++ * zxdh_uget_srq_num - get srq num on user app ++ * @srq: srq to get num ++ * @srq_num: to get srq num ++ */ ++int zxdh_uget_srq_num(struct ibv_srq *srq, uint32_t *srq_num) ++{ ++ struct zxdh_usrq *iwusrq; ++ ++ iwusrq = container_of(srq, struct zxdh_usrq, ibv_srq); ++ ++ *srq_num = iwusrq->srq_id; ++ return 0; ++} ++ ++void zxdh_set_debug_mask(void) ++{ ++ char *env; ++ ++ env = getenv("ZXDH_DEBUG_MASK"); ++ if (env) ++ zxdh_debug_mask = strtol(env, NULL, 0); ++} +diff --git a/providers/zrdma/zxdh_verbs.h b/providers/zrdma/zxdh_verbs.h +new file mode 100644 +index 0000000..69a98cc +--- /dev/null ++++ b/providers/zrdma/zxdh_verbs.h +@@ -0,0 +1,611 @@ ++// SPDX-License-Identifier: GPL-2.0 or Linux-OpenIB ++/* Copyright (c) 2024 ZTE Corporation. All rights reserved. */ ++#ifndef ZXDH_VERBS_H ++#define ZXDH_VERBS_H ++#include "zxdh_defs.h" ++ ++#define zxdh_handle void * ++#define zxdh_adapter_handle zxdh_handle ++#define zxdh_qp_handle zxdh_handle ++#define zxdh_cq_handle zxdh_handle ++#define zxdh_pd_id zxdh_handle ++#define zxdh_stag_handle zxdh_handle ++#define zxdh_stag_index __u32 ++#define zxdh_stag __u32 ++#define zxdh_stag_key __u8 ++#define zxdh_tagged_offset __u64 ++#define zxdh_access_privileges __u32 ++#define zxdh_physical_fragment __u64 ++#define zxdh_address_list __u64 * ++#define zxdh_sgl struct zxdh_sge * ++ ++#define ZXDH_MAX_MR_SIZE 0x200000000000ULL ++ ++#define ZXDH_ACCESS_FLAGS_LOCALREAD 0x01 ++#define ZXDH_ACCESS_FLAGS_LOCALWRITE 0x02 ++#define ZXDH_ACCESS_FLAGS_REMOTEREAD_ONLY 0x04 ++#define ZXDH_ACCESS_FLAGS_REMOTEREAD 0x05 ++#define ZXDH_ACCESS_FLAGS_REMOTEWRITE_ONLY 0x08 ++#define ZXDH_ACCESS_FLAGS_REMOTEWRITE 0x0a ++#define ZXDH_ACCESS_FLAGS_BIND_WINDOW 0x10 ++#define ZXDH_ACCESS_FLAGS_ZERO_BASED 0x20 ++#define ZXDH_ACCESS_FLAGS_ALL 0x3f ++ ++#define ZXDH_OP_TYPE_NOP 0x00 ++#define ZXDH_OP_TYPE_SEND 0x01 ++#define ZXDH_OP_TYPE_SEND_WITH_IMM 0x02 ++#define ZXDH_OP_TYPE_SEND_INV 0x03 ++#define ZXDH_OP_TYPE_WRITE 0x04 ++#define ZXDH_OP_TYPE_WRITE_WITH_IMM 0x05 ++#define ZXDH_OP_TYPE_READ 0x06 ++#define ZXDH_OP_TYPE_BIND_MW 0x07 ++#define ZXDH_OP_TYPE_FAST_REG_MR 0x08 ++#define ZXDH_OP_TYPE_LOCAL_INV 0x09 ++#define ZXDH_OP_TYPE_UD_SEND 0x0a ++#define ZXDH_OP_TYPE_UD_SEND_WITH_IMM 0x0b ++#define ZXDH_OP_TYPE_REC 0x3e ++#define ZXDH_OP_TYPE_REC_IMM 0x3f ++ ++#define ZXDH_FLUSH_MAJOR_ERR 1 ++#define ZXDH_RETRY_ACK_MAJOR_ERR 0x8 ++#define ZXDH_RETRY_ACK_MINOR_ERR 0xf3 ++#define ZXDH_TX_WINDOW_QUERY_ITEM_MINOR_ERR 0xf5 ++ ++#define ZXDH_MAX_SQ_FRAG 31 ++#define ZXDH_MAX_SQ_INLINE_DATELEN_WITH_IMM 210 ++ ++#define INLINE_DATASIZE_7BYTES 7 ++#define INLINE_DATASIZE_24BYTES 24 ++#define INLINE_FRAG_DATASIZE_31BYTES 31 ++ ++#define INLINE_DATA_OFFSET_7BYTES 7 ++#define WQE_OFFSET_7BYTES 7 ++#define WQE_OFFSET_8BYTES 8 ++#define WQE_OFFSET_24BYTES 24 ++ ++#define ZXDH_SQE_SIZE 4 ++#define ZXDH_RQE_SIZE 2 ++ ++#define ZXDH_SRQ_INVALID_LKEY 0x100 ++#define ZXDH_SRQ_DB_INIT_VALUE 0x8000 ++ ++#define ZXDH_WQEALLOC_WQE_DESC_INDEX GENMASK(31, 20) ++ ++enum zxdh_device_caps_const { ++ ZXDH_WQE_SIZE = 4, ++ ZXDH_SRQE_SIZE = 2, ++ ZXDH_CQP_WQE_SIZE = 8, ++ ZXDH_CQE_SIZE = 8, ++ ZXDH_EXTENDED_CQE_SIZE = 8, ++ ZXDH_AEQE_SIZE = 2, ++ ZXDH_CEQE_SIZE = 1, ++ ZXDH_CQP_CTX_SIZE = 8, ++ ZXDH_SHADOW_AREA_SIZE = 8, ++ ZXDH_GATHER_STATS_BUF_SIZE = 1024, ++ ZXDH_MIN_IW_QP_ID = 0, ++ ZXDH_QUERY_FPM_BUF_SIZE = 176, ++ ZXDH_COMMIT_FPM_BUF_SIZE = 176, ++ ZXDH_MAX_IW_QP_ID = 262143, ++ ZXDH_MIN_CEQID = 0, ++ ZXDH_MAX_CEQID = 1023, ++ ZXDH_CEQ_MAX_COUNT = ZXDH_MAX_CEQID + 1, ++ ZXDH_MIN_CQID = 0, ++ ZXDH_MAX_CQID = 524287, ++ ZXDH_MIN_AEQ_ENTRIES = 1, ++ ZXDH_MAX_AEQ_ENTRIES = 524287, ++ ZXDH_MIN_CEQ_ENTRIES = 1, ++ ZXDH_MAX_CEQ_ENTRIES = 262143, ++ ZXDH_MIN_CQ_SIZE = 1, ++ ZXDH_MAX_CQ_SIZE = 1048575, ++ ZXDH_DB_ID_ZERO = 0, ++ ZXDH_MAX_WQ_FRAGMENT_COUNT = 13, ++ ZXDH_MAX_SGE_RD = 13, ++ ZXDH_MAX_OUTBOUND_MSG_SIZE = 2147483647, ++ ZXDH_MAX_INBOUND_MSG_SIZE = 2147483647, ++ ZXDH_MAX_PUSH_PAGE_COUNT = 1024, ++ ZXDH_MAX_PE_ENA_VF_COUNT = 32, ++ ZXDH_MAX_VF_FPM_ID = 47, ++ ZXDH_MAX_SQ_PAYLOAD_SIZE = 2147483648, ++ ZXDH_MAX_INLINE_DATA_SIZE = 217, ++ ZXDH_MAX_WQ_ENTRIES = 32768, ++ ZXDH_Q2_BUF_SIZE = 256, ++ ZXDH_QP_CTX_SIZE = 256, ++ ZXDH_MAX_PDS = 262144, ++}; ++ ++enum zxdh_addressing_type { ++ ZXDH_ADDR_TYPE_ZERO_BASED = 0, ++ ZXDH_ADDR_TYPE_VA_BASED = 1, ++}; ++ ++enum zxdh_flush_opcode { ++ FLUSH_INVALID = 0, ++ FLUSH_GENERAL_ERR, ++ FLUSH_PROT_ERR, ++ FLUSH_REM_ACCESS_ERR, ++ FLUSH_LOC_QP_OP_ERR, ++ FLUSH_REM_OP_ERR, ++ FLUSH_LOC_LEN_ERR, ++ FLUSH_FATAL_ERR, ++ FLUSH_RETRY_EXC_ERR, ++ FLUSH_MW_BIND_ERR, ++ FLUSH_REM_INV_REQ_ERR, ++}; ++ ++enum zxdh_cmpl_status { ++ ZXDH_COMPL_STATUS_SUCCESS = 0, ++ ZXDH_COMPL_STATUS_FLUSHED, ++ ZXDH_COMPL_STATUS_INVALID_WQE, ++ ZXDH_COMPL_STATUS_QP_CATASTROPHIC, ++ ZXDH_COMPL_STATUS_REMOTE_TERMINATION, ++ ZXDH_COMPL_STATUS_INVALID_STAG, ++ ZXDH_COMPL_STATUS_BASE_BOUND_VIOLATION, ++ ZXDH_COMPL_STATUS_ACCESS_VIOLATION, ++ ZXDH_COMPL_STATUS_INVALID_PD_ID, ++ ZXDH_COMPL_STATUS_WRAP_ERROR, ++ ZXDH_COMPL_STATUS_STAG_INVALID_PDID, ++ ZXDH_COMPL_STATUS_RDMA_READ_ZERO_ORD, ++ ZXDH_COMPL_STATUS_QP_NOT_PRIVLEDGED, ++ ZXDH_COMPL_STATUS_STAG_NOT_INVALID, ++ ZXDH_COMPL_STATUS_INVALID_PHYS_BUF_SIZE, ++ ZXDH_COMPL_STATUS_INVALID_PHYS_BUF_ENTRY, ++ ZXDH_COMPL_STATUS_INVALID_FBO, ++ ZXDH_COMPL_STATUS_INVALID_LEN, ++ ZXDH_COMPL_STATUS_INVALID_ACCESS, ++ ZXDH_COMPL_STATUS_PHYS_BUF_LIST_TOO_LONG, ++ ZXDH_COMPL_STATUS_INVALID_VIRT_ADDRESS, ++ ZXDH_COMPL_STATUS_INVALID_REGION, ++ ZXDH_COMPL_STATUS_INVALID_WINDOW, ++ ZXDH_COMPL_STATUS_INVALID_TOTAL_LEN, ++ ZXDH_COMPL_STATUS_RETRY_ACK_ERR, ++ ZXDH_COMPL_STATUS_TX_WINDOW_QUERY_ITEM_ERR, ++ ZXDH_COMPL_STATUS_UNKNOWN, ++}; ++ ++enum zxdh_cmpl_notify { ++ ZXDH_CQ_COMPL_EVENT = 0, ++ ZXDH_CQ_COMPL_SOLICITED = 1, ++}; ++ ++enum zxdh_qp_caps { ++ ZXDH_WRITE_WITH_IMM = 1, ++ ZXDH_SEND_WITH_IMM = 2, ++ ZXDH_ROCE = 4, ++ ZXDH_PUSH_MODE = 8, ++}; ++ ++enum zxdh_page_size { ++ ZXDH_PAGE_SIZE_4K = 0, ++ ZXDH_PAGE_SIZE_2M = 9, ++ ZXDH_PAGE_SIZE_1G = 18, ++}; ++ ++struct zxdh_qp; ++struct zxdh_cq; ++struct zxdh_qp_init_info; ++struct zxdh_cq_init_info; ++ ++struct zxdh_sge { ++ zxdh_tagged_offset tag_off; ++ __u32 len; ++ zxdh_stag stag; ++}; ++ ++struct zxdh_ring { ++ __u32 head; ++ __u32 tail; ++ __u32 size; ++}; ++ ++struct zxdh_cqe { ++ __le64 buf[ZXDH_CQE_SIZE]; ++}; ++ ++struct zxdh_extended_cqe { ++ __le64 buf[ZXDH_EXTENDED_CQE_SIZE]; ++}; ++ ++struct zxdh_post_send { ++ zxdh_sgl sg_list; ++ __u32 num_sges; ++ __u32 qkey; ++ __u32 dest_qp; ++ __u32 ah_id; ++}; ++ ++struct zxdh_inline_rdma_send { ++ void *data; ++ __u32 len; ++ __u32 qkey; ++ __u32 dest_qp; ++ __u32 ah_id; ++}; ++ ++struct zxdh_post_rq_info { ++ __u64 wr_id; ++ zxdh_sgl sg_list; ++ __u32 num_sges; ++}; ++ ++struct zxdh_rdma_write { ++ zxdh_sgl lo_sg_list; ++ __u32 num_lo_sges; ++ struct zxdh_sge rem_addr; ++}; ++ ++struct zxdh_inline_rdma_write { ++ void *data; ++ __u32 len; ++ struct zxdh_sge rem_addr; ++}; ++ ++struct zxdh_rdma_read { ++ zxdh_sgl lo_sg_list; ++ __u32 num_lo_sges; ++ struct zxdh_sge rem_addr; ++}; ++ ++struct zxdh_bind_window { ++ zxdh_stag mr_stag; ++ __u64 bind_len; ++ void *va; ++ enum zxdh_addressing_type addressing_type; ++ __u8 ena_reads : 1; ++ __u8 ena_writes : 1; ++ zxdh_stag mw_stag; ++ __u8 mem_window_type_1 : 1; ++ __u8 host_page_size; ++ __u8 leaf_pbl_size; ++ __u16 root_leaf_offset; ++ __u64 mw_pa_pble_index; ++}; ++ ++struct zxdh_inv_local_stag { ++ zxdh_stag target_stag; ++}; ++ ++struct zxdh_post_sq_info { ++ __u64 wr_id; ++ __u8 op_type; ++ __u8 l4len; ++ __u8 signaled : 1; ++ __u8 read_fence : 1; ++ __u8 local_fence : 1; ++ __u8 inline_data : 1; ++ __u8 imm_data_valid : 1; ++ __u8 push_wqe : 1; ++ __u8 report_rtt : 1; ++ __u8 udp_hdr : 1; ++ __u8 defer_flag : 1; ++ __u8 solicited : 1; ++ __u32 imm_data; ++ __u32 stag_to_inv; ++ union { ++ struct zxdh_post_send send; ++ struct zxdh_rdma_write rdma_write; ++ struct zxdh_rdma_read rdma_read; ++ struct zxdh_bind_window bind_window; ++ struct zxdh_inv_local_stag inv_local_stag; ++ struct zxdh_inline_rdma_write inline_rdma_write; ++ struct zxdh_inline_rdma_send inline_rdma_send; ++ } op; ++}; ++ ++struct zxdh_cq_poll_info { ++ __u64 wr_id; ++ zxdh_qp_handle qp_handle; ++ __u32 bytes_xfered; ++ __u32 tcp_seq_num_rtt; ++ __u32 qp_id; ++ __u32 ud_src_qpn; ++ __u32 imm_data; ++ zxdh_stag inv_stag; /* or L_R_Key */ ++ enum zxdh_cmpl_status comp_status; ++ __u16 major_err; ++ __u16 minor_err; ++ __u8 op_type; ++ __u8 stag_invalid_set : 1; /* or L_R_Key set */ ++ __u8 push_dropped : 1; ++ __u8 error : 1; ++ __u8 solicited_event : 1; ++ __u8 ipv4 : 1; ++ __u8 imm_valid : 1; ++}; ++ ++enum zxdh_status_code zxdh_inline_rdma_write(struct zxdh_qp *qp, ++ struct zxdh_post_sq_info *info, ++ bool post_sq); ++enum zxdh_status_code zxdh_rc_inline_send(struct zxdh_qp *qp, ++ struct zxdh_post_sq_info *info, ++ bool post_sq); ++enum zxdh_status_code zxdh_ud_inline_send(struct zxdh_qp *qp, ++ struct zxdh_post_sq_info *info, ++ bool post_sq); ++enum zxdh_status_code ++zxdh_mw_bind(struct zxdh_qp *qp, struct zxdh_post_sq_info *info, bool post_sq); ++enum zxdh_status_code zxdh_post_nop(struct zxdh_qp *qp, __u64 wr_id, ++ bool signaled, bool post_sq); ++enum zxdh_status_code zxdh_post_receive(struct zxdh_qp *qp, ++ struct zxdh_post_rq_info *info); ++void zxdh_qp_post_wr(struct zxdh_qp *qp); ++void zxdh_qp_set_shadow_area(struct zxdh_qp *qp); ++enum zxdh_status_code zxdh_rdma_read(struct zxdh_qp *qp, ++ struct zxdh_post_sq_info *info, ++ bool inv_stag, bool post_sq); ++enum zxdh_status_code zxdh_rdma_write(struct zxdh_qp *qp, ++ struct zxdh_post_sq_info *info, ++ bool post_sq); ++enum zxdh_status_code ++zxdh_rc_send(struct zxdh_qp *qp, struct zxdh_post_sq_info *info, bool post_sq); ++enum zxdh_status_code ++zxdh_ud_send(struct zxdh_qp *qp, struct zxdh_post_sq_info *info, bool post_sq); ++enum zxdh_status_code zxdh_stag_local_invalidate(struct zxdh_qp *qp, ++ struct zxdh_post_sq_info *info, ++ bool post_sq); ++ ++struct zxdh_wqe_ops { ++ void (*iw_copy_inline_data)(__u8 *dest, __u8 *src, __u32 len, ++ __u8 polarity, bool imm_data_flag); ++ __u16 (*iw_inline_data_size_to_quanta)(__u32 data_size, ++ bool imm_data_flag); ++ void (*iw_set_fragment)(__le64 *wqe, __u32 offset, struct zxdh_sge *sge, ++ __u8 valid); ++ void (*iw_set_mw_bind_wqe)(__le64 *wqe, ++ struct zxdh_bind_window *op_info); ++}; ++ ++__le64 *get_current_cqe(struct zxdh_cq *cq); ++enum zxdh_status_code zxdh_cq_poll_cmpl(struct zxdh_cq *cq, ++ struct zxdh_cq_poll_info *info); ++void zxdh_cq_request_notification(struct zxdh_cq *cq, ++ enum zxdh_cmpl_notify cq_notify); ++void zxdh_cq_resize(struct zxdh_cq *cq, void *cq_base, int size); ++void zxdh_cq_set_resized_cnt(struct zxdh_cq *qp, __u16 cnt); ++enum zxdh_status_code zxdh_cq_init(struct zxdh_cq *cq, ++ struct zxdh_cq_init_info *info); ++enum zxdh_status_code zxdh_qp_init(struct zxdh_qp *qp, ++ struct zxdh_qp_init_info *info); ++struct zxdh_sq_wr_trk_info { ++ __u64 wrid; ++ __u32 wr_len; ++ __u16 quanta; ++ __u8 reserved[2]; ++}; ++ ++struct zxdh_qp_sq_quanta { ++ __le64 elem[ZXDH_SQE_SIZE]; ++}; ++ ++struct zxdh_qp_rq_quanta { ++ __le64 elem[ZXDH_RQE_SIZE]; ++}; ++ ++struct zxdh_dev_attrs { ++ __u64 feature_flags; ++ __aligned_u64 sq_db_pa; ++ __aligned_u64 cq_db_pa; ++ __u32 max_hw_wq_frags; ++ __u32 max_hw_read_sges; ++ __u32 max_hw_inline; ++ __u32 max_hw_rq_quanta; ++ __u32 max_hw_srq_quanta; ++ __u32 max_hw_wq_quanta; ++ __u32 min_hw_cq_size; ++ __u32 max_hw_cq_size; ++ __u16 max_hw_sq_chunk; ++ __u32 max_hw_srq_wr; ++ __u8 hw_rev; ++ __u8 db_addr_type; ++}; ++ ++struct zxdh_hw_attrs { ++ struct zxdh_dev_attrs dev_attrs; ++ __u64 max_hw_outbound_msg_size; ++ __u64 max_hw_inbound_msg_size; ++ __u64 max_mr_size; ++ __u32 min_hw_qp_id; ++ __u32 min_hw_aeq_size; ++ __u32 max_hw_aeq_size; ++ __u32 min_hw_ceq_size; ++ __u32 max_hw_ceq_size; ++ __u32 max_hw_device_pages; ++ __u32 max_hw_vf_fpm_id; ++ __u32 first_hw_vf_fpm_id; ++ __u32 max_hw_ird; ++ __u32 max_hw_ord; ++ __u32 max_hw_wqes; ++ __u32 max_hw_pds; ++ __u32 max_hw_ena_vf_count; ++ __u32 max_qp_wr; ++ __u32 max_pe_ready_count; ++ __u32 max_done_count; ++ __u32 max_sleep_count; ++ __u32 max_cqp_compl_wait_time_ms; ++ __u16 max_stat_inst; ++}; ++ ++struct zxdh_qp { ++ struct zxdh_qp_sq_quanta *sq_base; ++ struct zxdh_qp_rq_quanta *rq_base; ++ struct zxdh_dev_attrs *dev_attrs; ++ __u32 *wqe_alloc_db; ++ struct zxdh_sq_wr_trk_info *sq_wrtrk_array; ++ __u64 *rq_wrid_array; ++ __le64 *shadow_area; ++ __le32 *push_db; ++ __le64 *push_wqe; ++ struct zxdh_ring sq_ring; ++ struct zxdh_ring rq_ring; ++ struct zxdh_ring initial_ring; ++ __u32 qp_id; ++ __u32 qp_caps; ++ __u32 sq_size; ++ __u32 rq_size; ++ __u32 max_sq_frag_cnt; ++ __u32 max_rq_frag_cnt; ++ __u32 max_inline_data; ++ struct zxdh_wqe_ops wqe_ops; ++ __u16 conn_wqes; ++ __u8 qp_type; ++ __u8 swqe_polarity; ++ __u8 swqe_polarity_deferred; ++ __u8 rwqe_polarity; ++ __u8 rq_wqe_size; ++ __u8 rq_wqe_size_multiplier; ++ __u8 deferred_flag : 1; ++ __u8 push_mode : 1; /* whether the last post wqe was pushed */ ++ __u8 push_dropped : 1; ++ __u8 sq_flush_complete : 1; /* Indicates flush was seen and SQ was empty after the flush */ ++ __u8 rq_flush_complete : 1; /* Indicates flush was seen and RQ was empty after the flush */ ++ __u8 destroy_pending : 1; /* Indicates the QP is being destroyed */ ++ void *back_qp; ++ zxdh_sgl split_sg_list; ++ pthread_spinlock_t *lock; ++ __u16 rwqe_signature; ++ __u8 dbg_rq_flushed; ++ __u8 sq_flush_seen; ++ __u8 rq_flush_seen; ++ __u8 is_srq; ++ __u16 mtu; ++ __u32 next_psn; ++ __u32 cqe_last_ack_qsn; ++ __u32 qp_last_ack_qsn; ++ __u8 cqe_retry_cnt; ++ __u8 qp_reset_cnt; ++}; ++ ++struct zxdh_cq { ++ struct zxdh_cqe *cq_base; ++ __u32 *cqe_alloc_db; ++ __u32 *cq_ack_db; ++ __le64 *shadow_area; ++ __u32 cq_id; ++ __u32 cq_size; ++ __u32 cqe_rd_cnt; ++ struct zxdh_ring cq_ring; ++ __u8 polarity; ++ __u8 cqe_size; ++}; ++ ++struct zxdh_srq { ++ struct zxdh_srq_wqe *srq_base; ++ struct zxdh_dev_attrs *dev_attrs; ++ __le16 *srq_list_base; ++ __le64 *srq_db_base; ++ __u32 srq_id; ++ __u32 srq_size; ++ __u32 log2_srq_size; ++ __u32 srq_list_size; ++ struct zxdh_ring srq_ring; ++ struct zxdh_ring srq_list_ring; ++ __u8 srq_list_polarity; ++ __u64 *srq_wrid_array; ++ __u8 srq_wqe_size; ++ __u8 srq_wqe_size_multiplier; ++ __u32 srq_caps; ++ __u32 max_srq_frag_cnt; ++ __u32 srq_type; ++ pthread_spinlock_t *lock; ++ __u8 srq_flush_complete : 1; /* Indicates flush was seen and SQ was empty after the flush */ ++ __u8 destroy_pending : 1; /* Indicates the QP is being destroyed */ ++ __u8 srq_flush_seen; ++}; ++ ++struct zxdh_qp_init_info { ++ struct zxdh_qp_sq_quanta *sq; ++ struct zxdh_qp_rq_quanta *rq; ++ struct zxdh_dev_attrs *dev_attrs; ++ __u32 *wqe_alloc_db; ++ __le64 *shadow_area; ++ struct zxdh_sq_wr_trk_info *sq_wrtrk_array; ++ __u64 *rq_wrid_array; ++ __u32 qp_id; ++ __u32 qp_caps; ++ __u32 sq_size; ++ __u32 rq_size; ++ __u32 max_sq_frag_cnt; ++ __u32 max_rq_frag_cnt; ++ __u32 max_inline_data; ++ __u8 type; ++ int abi_ver; ++ bool legacy_mode; ++}; ++ ++struct zxdh_cq_init_info { ++ __u32 *cqe_alloc_db; ++ __u32 *cq_ack_db; ++ struct zxdh_cqe *cq_base; ++ __le64 *shadow_area; ++ __u32 cq_size; ++ __u32 cq_id; ++ __u8 cqe_size; ++}; ++ ++struct zxdh_srq_init_info { ++ struct zxdh_srq_wqe *srq_base; ++ struct zxdh_dev_attrs *dev_attrs; ++ __le16 *srq_list_base; ++ __le64 *srq_db_base; ++ __u64 *srq_wrid_array; ++ __u32 srq_id; ++ __u32 srq_caps; ++ __u32 srq_size; ++ __u32 log2_srq_size; ++ __u32 srq_list_size; ++ __u32 srq_db_size; ++ __u32 max_srq_frag_cnt; ++ __u32 srq_limit; ++}; ++ ++struct zxdh_wqe_srq_next_sge { ++ __le16 next_wqe_index; ++ __le16 signature; ++ __u8 valid_sge_num; ++ __u8 rsvd[11]; ++}; ++ ++struct zxdh_srq_sge { ++ __le64 addr; ++ __le32 length; ++ __le32 lkey; ++}; ++ ++struct zxdh_srq_wqe { ++ __le64 elem[ZXDH_SRQE_SIZE]; ++}; ++ ++__le64 *zxdh_qp_get_next_send_wqe(struct zxdh_qp *qp, __u32 *wqe_idx, ++ __u16 quanta, __u32 total_size, ++ struct zxdh_post_sq_info *info); ++__le64 *zxdh_qp_get_next_recv_wqe(struct zxdh_qp *qp, __u32 *wqe_idx); ++void zxdh_clean_cq(void *q, struct zxdh_cq *cq); ++enum zxdh_status_code zxdh_nop(struct zxdh_qp *qp, __u64 wr_id, bool signaled, ++ bool post_sq); ++enum zxdh_status_code zxdh_fragcnt_to_quanta_sq(__u32 frag_cnt, __u16 *quanta); ++enum zxdh_status_code zxdh_fragcnt_to_wqesize_rq(__u32 frag_cnt, ++ __u16 *wqe_size); ++void zxdh_get_sq_wqe_shift(__u32 sge, __u32 inline_data, __u8 *shift); ++void zxdh_get_rq_wqe_shift(__u32 sge, __u8 *shift); ++enum zxdh_status_code zxdh_get_sqdepth(struct zxdh_dev_attrs *dev_attrs, ++ __u32 sq_size, __u8 shift, ++ __u32 *wqdepth); ++enum zxdh_status_code zxdh_get_rqdepth(struct zxdh_dev_attrs *dev_attrs, ++ __u32 rq_size, __u8 shift, ++ __u32 *wqdepth); ++int zxdh_qp_round_up(__u32 wqdepth); ++int zxdh_cq_round_up(__u32 wqdepth); ++void zxdh_qp_push_wqe(struct zxdh_qp *qp, __le64 *wqe, __u16 quanta, ++ __u32 wqe_idx, bool post_sq); ++void zxdh_clr_wqes(struct zxdh_qp *qp, __u32 qp_wqe_idx); ++ ++void zxdh_get_srq_wqe_shift(struct zxdh_dev_attrs *dev_attrs, __u32 sge, ++ __u8 *shift); ++int zxdh_get_srqdepth(__u32 max_hw_srq_quanta, __u32 srq_size, __u8 shift, ++ __u32 *srqdepth); ++__le64 *zxdh_get_srq_wqe(struct zxdh_srq *srq, int wqe_index); ++__le16 *zxdh_get_srq_list_wqe(struct zxdh_srq *srq, __u16 *idx); ++ ++enum zxdh_status_code zxdh_srq_init(struct zxdh_srq *srq, ++ struct zxdh_srq_init_info *info); ++void zxdh_free_srq_wqe(struct zxdh_srq *srq, int wqe_index); ++#endif /* ZXDH_USER_H */ +diff --git a/redhat/rdma-core.spec b/redhat/rdma-core.spec +index c347195..6d82a64 100644 +--- a/redhat/rdma-core.spec ++++ b/redhat/rdma-core.spec +@@ -176,6 +176,8 @@ Provides: libocrdma = %{version}-%{release} + Obsoletes: libocrdma < %{version}-%{release} + Provides: librxe = %{version}-%{release} + Obsoletes: librxe < %{version}-%{release} ++Provides: libzrdma = %{version}-%{release} ++Obsoletes: libzrdma < %{version}-%{release} + + %description -n libibverbs + libibverbs is a library that allows userspace processes to use RDMA +@@ -202,6 +204,7 @@ Device-specific plug-in ibverbs userspace drivers are included: + - librxe: A software implementation of the RoCE protocol + - libsiw: A software implementation of the iWarp protocol + - libvmw_pvrdma: VMware paravirtual RDMA device ++- libzrdma: ZTE Connection RDMA + + %package -n libibverbs-utils + Summary: Examples for the libibverbs library +@@ -583,6 +586,7 @@ fi + %{_libdir}/libmana.so.* + %{_libdir}/libmlx5.so.* + %{_libdir}/libmlx4.so.* ++%{_libdir}/libzrdma.so.* + %config(noreplace) %{_sysconfdir}/libibverbs.d/*.driver + %doc %{_docdir}/%{name}/libibverbs.md + +-- +2.27.0 + diff --git a/0037-libhns-Fix-out-of-order-issue-of-requester-when-sett.patch b/0037-libhns-Fix-out-of-order-issue-of-requester-when-sett.patch new file mode 100644 index 0000000000000000000000000000000000000000..79e3fd325f0ebb98ba835a00adda09958ad5b55c --- /dev/null +++ b/0037-libhns-Fix-out-of-order-issue-of-requester-when-sett.patch @@ -0,0 +1,50 @@ +From f13f4391bb2c0b5a1c876f36b99242615bdae88b Mon Sep 17 00:00:00 2001 +From: Junxian Huang +Date: Fri, 8 Nov 2024 17:04:09 +0800 +Subject: [PATCH] libhns: Fix out-of-order issue of requester when setting + FENCE + +mainline inclusion +from mainline-master +commit c4119911c212aaa552c9cb928fba0a696640c9b5 +category: bugfix +bugzilla: https://gitee.com/src-openeuler/rdma-core/issues/IB3ZHQ +CVE: NA +Reference: https://github.com/linux-rdma/rdma-core/pull/1513/commits/c4119911c212aaa552c9cb928fba0a696640c9b5 + +---------------------------------------------------------------------- + +The FENCE indicator in hns WQE doesn't ensure that response data from +a previous Read/Atomic operation has been written to the requester's +memory before the subsequent Send/Write operation is processed. This +may result in the subsequent Send/Write operation accessing the original +data in memory instead of the expected response data. + +Unlike FENCE, the SO (Strong Order) indicator blocks the subsequent +operation until the previous response data is written to memory and a +bresp is returned. Set the SO indicator instead of FENCE to maintain +strict order. + +Fixes: cbdf5e32a855 ("libhns: Reimplement verbs of post_send and post_recv for hip08 RoCE") +Signed-off-by: Junxian Huang +Signed-off-by: Xinghai Cen +--- + providers/hns/hns_roce_u_hw_v2.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/providers/hns/hns_roce_u_hw_v2.c b/providers/hns/hns_roce_u_hw_v2.c +index 9371150..2debcb3 100644 +--- a/providers/hns/hns_roce_u_hw_v2.c ++++ b/providers/hns/hns_roce_u_hw_v2.c +@@ -1527,7 +1527,7 @@ static int set_rc_wqe(void *wqe, struct hns_roce_qp *qp, struct ibv_send_wr *wr, + + hr_reg_write_bool(wqe, RCWQE_CQE, + !!(wr->send_flags & IBV_SEND_SIGNALED)); +- hr_reg_write_bool(wqe, RCWQE_FENCE, ++ hr_reg_write_bool(wqe, RCWQE_SO, + !!(wr->send_flags & IBV_SEND_FENCE)); + hr_reg_write_bool(wqe, RCWQE_SE, + !!(wr->send_flags & IBV_SEND_SOLICITED)); +-- +2.25.1 + diff --git a/0038-libhns-Fix-reference-to-uninitialized-cq-pointer.patch b/0038-libhns-Fix-reference-to-uninitialized-cq-pointer.patch new file mode 100644 index 0000000000000000000000000000000000000000..d595a48dc7622c7214b3ef65127e7fec6f9cb3ff --- /dev/null +++ b/0038-libhns-Fix-reference-to-uninitialized-cq-pointer.patch @@ -0,0 +1,68 @@ +From ccd9858cdf31573c447b43f7f7977e6b61869b82 Mon Sep 17 00:00:00 2001 +From: Chengchang Tang +Date: Fri, 8 Nov 2024 17:04:08 +0800 +Subject: [PATCH] libhns: Fix reference to uninitialized cq pointer +MIME-Version: 1.0 +Content-Type: text/plain; charset=utf-8 +Content-Transfer-Encoding: 8bit + +mainline inclusion +from mainline-master +commit 18e3117cdd161a3f40b8a917f24cfb5227a1d75a +category: bugfix +bugzilla: https://gitee.com/src-openeuler/rdma-core/issues/IB3ZHQ +CVE: NA +Reference: https://github.com/linux-rdma/rdma-core/pull/1513/commits/18e3117cdd161a3f40b8a917f24cfb5227a1d75a + +---------------------------------------------------------------------- + +For QPs which do not have an SQ, such as XRC TGT,the send_cq +pointer will not be initailized. Since the supported max_gs +will be 0 in this case, check it and return before referencing +the send_cq pointer. + +Fixes: cbdf5e32a855 ("libhns: Reimplement verbs of post_send and post_recv for hip08 RoCE") +Signed-off-by: Chengchang Tang +Signed-off-by: Junxian Huang +Signed-off-by: Xinghai Cen +--- + providers/hns/hns_roce_u_hw_v2.c | 12 ++++++------ + 1 file changed, 6 insertions(+), 6 deletions(-) + +diff --git a/providers/hns/hns_roce_u_hw_v2.c b/providers/hns/hns_roce_u_hw_v2.c +index 2debcb3..465ef1e 100644 +--- a/providers/hns/hns_roce_u_hw_v2.c ++++ b/providers/hns/hns_roce_u_hw_v2.c +@@ -1579,7 +1579,7 @@ int hns_roce_u_v2_post_send(struct ibv_qp *ibvqp, struct ibv_send_wr *wr, + struct hns_roce_context *ctx = to_hr_ctx(ibvqp->context); + struct hns_roce_qp *qp = to_hr_qp(ibvqp); + struct hns_roce_sge_info sge_info = {}; +- struct hns_roce_rc_sq_wqe *wqe; ++ struct hns_roce_rc_sq_wqe *wqe = NULL; + struct ibv_qp_attr attr = {}; + unsigned int wqe_idx, nreq; + int ret; +@@ -1595,15 +1595,15 @@ int hns_roce_u_v2_post_send(struct ibv_qp *ibvqp, struct ibv_send_wr *wr, + sge_info.start_idx = qp->next_sge; /* start index of extend sge */ + + for (nreq = 0; wr; ++nreq, wr = wr->next) { +- if (hns_roce_v2_wq_overflow(&qp->sq, nreq, +- to_hr_cq(qp->verbs_qp.qp.send_cq))) { +- ret = ENOMEM; ++ if (wr->num_sge > (int)qp->sq.max_gs) { ++ ret = qp->sq.max_gs > 0 ? EINVAL : EOPNOTSUPP; + *bad_wr = wr; + goto out; + } + +- if (wr->num_sge > qp->sq.max_gs) { +- ret = EINVAL; ++ if (hns_roce_v2_wq_overflow(&qp->sq, nreq, ++ to_hr_cq(qp->verbs_qp.qp.send_cq))) { ++ ret = ENOMEM; + *bad_wr = wr; + goto out; + } +-- +2.25.1 + diff --git a/0039-libhns-Fix-the-exception-branch-of-wr_start-is-not-l.patch b/0039-libhns-Fix-the-exception-branch-of-wr_start-is-not-l.patch new file mode 100644 index 0000000000000000000000000000000000000000..300832ba9716240038b22205b63fed6935a1c0c1 --- /dev/null +++ b/0039-libhns-Fix-the-exception-branch-of-wr_start-is-not-l.patch @@ -0,0 +1,42 @@ +From e7613ae249465ebca5434421199fe97aee845a90 Mon Sep 17 00:00:00 2001 +From: wenglianfa +Date: Wed, 12 Jun 2024 17:11:13 +0800 +Subject: [PATCH] libhns: Fix the exception branch of wr_start() is not locked + +driver inclusion +category: feature +bugzilla: https://gitee.com/src-openeuler/rdma-core/issues/IB66RT + +------------------------------------------------------------------ + +The provider should provide locking to ensure that ibv_wr_start() +and ibv_wr_complete()/abort() form a per-QP critical section +where no other threads can enter. + +The exception branch of wr_start() is not locked, fix it here. +Because check_qp_send () does not require lock protection, +hns_roce_spin_lock () is placed after check_qp_send (). + +Fixes: 36446a56eea5 ("libhns: Extended QP supports the new post send mechanism") + +Signed-off-by: wenglianfa +Signed-off-by: Xinghai Cen +--- + providers/hns/hns_roce_u_hw_v2.c | 1 + + 1 file changed, 1 insertion(+) + +diff --git a/providers/hns/hns_roce_u_hw_v2.c b/providers/hns/hns_roce_u_hw_v2.c +index 465ef1e..e4232ea 100644 +--- a/providers/hns/hns_roce_u_hw_v2.c ++++ b/providers/hns/hns_roce_u_hw_v2.c +@@ -2930,6 +2930,7 @@ static void wr_start(struct ibv_qp_ex *ibv_qp) + + ret = check_qp_send(qp, ctx); + if (ret) { ++ hns_roce_spin_lock(&qp->sq.hr_lock); + qp->err = ret; + return; + } +-- +2.25.1 + diff --git a/0040-libhns-Fix-memory-leakage-when-DCA-is-enabled.patch b/0040-libhns-Fix-memory-leakage-when-DCA-is-enabled.patch new file mode 100644 index 0000000000000000000000000000000000000000..58cd8ec4abb96b5b9d714933446331957fb055b2 --- /dev/null +++ b/0040-libhns-Fix-memory-leakage-when-DCA-is-enabled.patch @@ -0,0 +1,41 @@ +From f8e29f955dd5399bd227c4de532f6d09872a254a Mon Sep 17 00:00:00 2001 +From: wenglianfa +Date: Thu, 25 Jul 2024 11:06:01 +0800 +Subject: [PATCH] libhns: Fix memory leakage when DCA is enabled + +driver inclusion +category: feature +bugzilla: https://gitee.com/src-openeuler/rdma-core/issues/IB66RT + +------------------------------------------------------------------ + +After DCA is enabled and a QP is created, the memory block +applied for DCA is not free when the QP is destroyed. Here +fix it. + +Fixes: 2783884a97e7 ("libhns: Add support for attaching QP's WQE buffer") +Signed-off-by: wenglianfa +Signed-off-by: Xinghai Cen +--- + providers/hns/hns_roce_u_verbs.c | 5 ++++- + 1 file changed, 4 insertions(+), 1 deletion(-) + +diff --git a/providers/hns/hns_roce_u_verbs.c b/providers/hns/hns_roce_u_verbs.c +index e30880c..154e800 100644 +--- a/providers/hns/hns_roce_u_verbs.c ++++ b/providers/hns/hns_roce_u_verbs.c +@@ -1357,7 +1357,10 @@ static void qp_free_wqe(struct hns_roce_qp *qp) + + if (qp->rq.wqe_cnt) + free(qp->rq.wrid); +- hns_roce_free_buf(&qp->buf); ++ if (qp->dca_wqe.bufs) ++ free(qp->dca_wqe.bufs); ++ else ++ hns_roce_free_buf(&qp->buf); + } + + static int qp_alloc_wqe(struct ibv_qp_init_attr_ex *attr, +-- +2.33.0 + diff --git a/0041-libhns-Fix-coredump-during-QP-destruction-when-send_.patch b/0041-libhns-Fix-coredump-during-QP-destruction-when-send_.patch new file mode 100644 index 0000000000000000000000000000000000000000..2e9f546ad367c3d7afafd38edbb840a2e18405e0 --- /dev/null +++ b/0041-libhns-Fix-coredump-during-QP-destruction-when-send_.patch @@ -0,0 +1,54 @@ +From 83784fc2538d24f3f06f023c21cc045d5b7f44ce Mon Sep 17 00:00:00 2001 +From: Yuyu Li +Date: Mon, 25 Nov 2024 16:13:48 +0800 +Subject: [PATCH] libhns: Fix coredump during QP destruction when send_cq == + recv_cq + +driver inclusion +category: feature +bugzilla: https://gitee.com/src-openeuler/rdma-core/issues/IB7JZL + +------------------------------------------------------------------ + +If the specified send CQ and recv CQ are both +the same CQ, the QP node in SCQ is not deleted. +which causes a segfault to occur when recreating +the QP. Here fix it. + +coredump info: +0x0000ffff8fbc37d4 in list_add_before_ +0x0000ffff8fbc381c in list_add_tail_ +0x0000ffff8fbc9d9c in add_qp_to_cq_list +0x0000ffff8fbca008 in create_qp +0x0000ffff8fbca110 in hns_roce_u_create_qp +0x0000ffff8feae39c in __ibv_create_qp_1_1 +0x0000000000401420 in test_ctrl_path + +Fixes: 5494e44cf97e ("Support reporting wc as software mode.") +Signed-off-by: Yuyu Li +Signed-off-by: Xinghai Cen +--- + providers/hns/hns_roce_u_hw_v2.c | 7 ++++--- + 1 file changed, 4 insertions(+), 3 deletions(-) + +diff --git a/providers/hns/hns_roce_u_hw_v2.c b/providers/hns/hns_roce_u_hw_v2.c +index e4232ea..c746e03 100644 +--- a/providers/hns/hns_roce_u_hw_v2.c ++++ b/providers/hns/hns_roce_u_hw_v2.c +@@ -2006,9 +2006,10 @@ static int hns_roce_u_v2_destroy_qp(struct ibv_qp *ibqp) + list_del(&qp->rcq_node); + } + +- if (ibqp->send_cq && ibqp->send_cq != ibqp->recv_cq) { +- __hns_roce_v2_cq_clean(to_hr_cq(ibqp->send_cq), ibqp->qp_num, +- NULL); ++ if (ibqp->send_cq) { ++ if (ibqp->send_cq != ibqp->recv_cq) ++ __hns_roce_v2_cq_clean(to_hr_cq(ibqp->send_cq), ibqp->qp_num, ++ NULL); + list_del(&qp->scq_node); + } + +-- +2.33.0 + diff --git a/0042-libhns-Add-error-logs-to-help-diagnosis.patch b/0042-libhns-Add-error-logs-to-help-diagnosis.patch new file mode 100644 index 0000000000000000000000000000000000000000..9d880ea4a3ed859d0a486f74d864654e94d21693 --- /dev/null +++ b/0042-libhns-Add-error-logs-to-help-diagnosis.patch @@ -0,0 +1,242 @@ +From 60c45b5f7c2cd0c2e7139d472406f071f327bb91 Mon Sep 17 00:00:00 2001 +From: Junxian Huang +Date: Fri, 27 Dec 2024 14:02:29 +0800 +Subject: [PATCH] libhns: Add error logs to help diagnosis + +mainline inclusion +from mainline-master +commit 7849f1b17f89b8baa0065adaf9cd04204698ea82 +category: feature +bugzilla: https://gitee.com/src-openeuler/rdma-core/issues/IBFGPH +CVE: NA + +Reference: https://github.com/linux-rdma/rdma-core/pull/1533/commits/7849f1b17f89b8baa0065adaf9cd04204698ea82 + +---------------------------------------------------------------------- + +Add error logs to help diagnosis. + +Signed-off-by: Junxian Huang +--- + providers/hns/hns_roce_u.c | 4 +- + providers/hns/hns_roce_u_hw_v2.c | 3 ++ + providers/hns/hns_roce_u_verbs.c | 79 ++++++++++++++++++++++++++------ + 3 files changed, 70 insertions(+), 16 deletions(-) + +diff --git a/providers/hns/hns_roce_u.c b/providers/hns/hns_roce_u.c +index e219b9e..ec995e7 100644 +--- a/providers/hns/hns_roce_u.c ++++ b/providers/hns/hns_roce_u.c +@@ -424,8 +424,10 @@ static struct verbs_context *hns_roce_alloc_context(struct ibv_device *ibdev, + + context->uar = mmap(NULL, hr_dev->page_size, PROT_READ | PROT_WRITE, + MAP_SHARED, cmd_fd, 0); +- if (context->uar == MAP_FAILED) ++ if (context->uar == MAP_FAILED) { ++ verbs_err(&context->ibv_ctx, "failed to mmap uar page.\n"); + goto err_set_attr; ++ } + + if (init_dca_context(context, cmd_fd, + &resp, ctx_attr, hr_dev->page_size)) +diff --git a/providers/hns/hns_roce_u_hw_v2.c b/providers/hns/hns_roce_u_hw_v2.c +index c746e03..0628646 100644 +--- a/providers/hns/hns_roce_u_hw_v2.c ++++ b/providers/hns/hns_roce_u_hw_v2.c +@@ -3057,6 +3057,9 @@ static int fill_send_wr_ops(const struct ibv_qp_init_attr_ex *attr, + fill_send_wr_ops_ud(qp_ex); + break; + default: ++ verbs_err(verbs_get_ctx(qp_ex->qp_base.context), ++ "QP type %d not supported for qp_ex send ops.\n", ++ attr->qp_type); + return -EOPNOTSUPP; + } + +diff --git a/providers/hns/hns_roce_u_verbs.c b/providers/hns/hns_roce_u_verbs.c +index c733b21..e9acfab 100644 +--- a/providers/hns/hns_roce_u_verbs.c ++++ b/providers/hns/hns_roce_u_verbs.c +@@ -422,8 +422,11 @@ static int verify_cq_create_attr(struct ibv_cq_init_attr_ex *attr, + { + struct hns_roce_pad *pad = to_hr_pad(attr->parent_domain); + +- if (!attr->cqe || attr->cqe > context->max_cqe) ++ if (!attr->cqe || attr->cqe > context->max_cqe) { ++ verbs_err(&context->ibv_ctx, "unsupported cq depth %u.\n", ++ attr->cqe); + return EINVAL; ++ } + + if (!check_comp_mask(attr->comp_mask, CREATE_CQ_SUPPORTED_COMP_MASK)) { + verbs_err(&context->ibv_ctx, "unsupported cq comps 0x%x\n", +@@ -431,8 +434,11 @@ static int verify_cq_create_attr(struct ibv_cq_init_attr_ex *attr, + return EOPNOTSUPP; + } + +- if (!check_comp_mask(attr->wc_flags, CREATE_CQ_SUPPORTED_WC_FLAGS)) ++ if (!check_comp_mask(attr->wc_flags, CREATE_CQ_SUPPORTED_WC_FLAGS)) { ++ verbs_err(&context->ibv_ctx, "unsupported wc flags 0x%llx.\n", ++ attr->wc_flags); + return EOPNOTSUPP; ++ } + + if (attr->comp_mask & IBV_CQ_INIT_ATTR_MASK_PD) { + if (!pad) { +@@ -504,8 +510,11 @@ static int exec_cq_create_cmd(struct ibv_context *context, + ret = ibv_cmd_create_cq_ex(context, attr, &cq->verbs_cq, + &cmd_ex.ibv_cmd, sizeof(cmd_ex), + &resp_ex.ibv_resp, sizeof(resp_ex), 0); +- if (ret) ++ if (ret) { ++ verbs_err(verbs_get_ctx(context), ++ "failed to exec create cq cmd, ret = %d.\n", ret); + return ret; ++ } + + cq->cqn = resp_drv->cqn; + cq->flags = resp_drv->cap_flags; +@@ -724,13 +733,20 @@ static int verify_srq_create_attr(struct hns_roce_context *context, + struct ibv_srq_init_attr_ex *attr) + { + if (attr->srq_type != IBV_SRQT_BASIC && +- attr->srq_type != IBV_SRQT_XRC) ++ attr->srq_type != IBV_SRQT_XRC) { ++ verbs_err(&context->ibv_ctx, ++ "unsupported srq type, type = %d.\n", attr->srq_type); + return -EINVAL; ++ } + + if (!attr->attr.max_sge || + attr->attr.max_wr > context->max_srq_wr || +- attr->attr.max_sge > context->max_srq_sge) ++ attr->attr.max_sge > context->max_srq_sge) { ++ verbs_err(&context->ibv_ctx, ++ "invalid srq attr size, max_wr = %u, max_sge = %u.\n", ++ attr->attr.max_wr, attr->attr.max_sge); + return -EINVAL; ++ } + + attr->attr.max_wr = max_t(uint32_t, attr->attr.max_wr, + HNS_ROCE_MIN_SRQ_WQE_NUM); +@@ -862,8 +878,12 @@ static int exec_srq_create_cmd(struct ibv_context *context, + ret = ibv_cmd_create_srq_ex(context, &srq->verbs_srq, init_attr, + &cmd_ex.ibv_cmd, sizeof(cmd_ex), + &resp_ex.ibv_resp, sizeof(resp_ex)); +- if (ret) ++ if (ret) { ++ verbs_err(verbs_get_ctx(context), ++ "failed to exec create srq cmd, ret = %d.\n", ++ ret); + return ret; ++ } + + srq->srqn = resp_ex.srqn; + srq->cap_flags = resp_ex.cap_flags; +@@ -1086,9 +1106,12 @@ static int check_qp_create_mask(struct hns_roce_context *ctx, + struct ibv_qp_init_attr_ex *attr) + { + struct hns_roce_device *hr_dev = to_hr_dev(ctx->ibv_ctx.context.device); ++ int ret = 0; + +- if (!check_comp_mask(attr->comp_mask, CREATE_QP_SUP_COMP_MASK)) +- return EOPNOTSUPP; ++ if (!check_comp_mask(attr->comp_mask, CREATE_QP_SUP_COMP_MASK)) { ++ ret = EOPNOTSUPP; ++ goto out; ++ } + + if (attr->comp_mask & IBV_QP_INIT_ATTR_SEND_OPS_FLAGS && + !check_comp_mask(attr->send_ops_flags, SEND_OPS_FLAG_MASK)) +@@ -1102,17 +1125,21 @@ static int check_qp_create_mask(struct hns_roce_context *ctx, + case IBV_QPT_RC: + case IBV_QPT_XRC_SEND: + if (!(attr->comp_mask & IBV_QP_INIT_ATTR_PD)) +- return EINVAL; ++ ret = EINVAL; + break; + case IBV_QPT_XRC_RECV: + if (!(attr->comp_mask & IBV_QP_INIT_ATTR_XRCD)) +- return EINVAL; ++ ret = EINVAL; + break; + default: + return EOPNOTSUPP; + } + +- return 0; ++out: ++ if (ret) ++ verbs_err(&ctx->ibv_ctx, "invalid comp_mask 0x%x.\n", ++ attr->comp_mask); ++ return ret; + } + + static int hns_roce_qp_has_rq(struct ibv_qp_init_attr_ex *attr) +@@ -1137,8 +1164,13 @@ static int verify_qp_create_cap(struct hns_roce_context *ctx, + if (cap->max_send_wr > ctx->max_qp_wr || + cap->max_recv_wr > ctx->max_qp_wr || + cap->max_send_sge > ctx->max_sge || +- cap->max_recv_sge > ctx->max_sge) ++ cap->max_recv_sge > ctx->max_sge) { ++ verbs_err(&ctx->ibv_ctx, ++ "invalid qp cap size, max_send/recv_wr = {%u, %u}, max_send/recv_sge = {%u, %u}.\n", ++ cap->max_send_wr, cap->max_recv_wr, ++ cap->max_send_sge, cap->max_recv_sge); + return -EINVAL; ++ } + + has_rq = hns_roce_qp_has_rq(attr); + if (!has_rq) { +@@ -1147,12 +1179,20 @@ static int verify_qp_create_cap(struct hns_roce_context *ctx, + } + + min_wqe_num = HNS_ROCE_V2_MIN_WQE_NUM; +- if (cap->max_send_wr < min_wqe_num) ++ if (cap->max_send_wr < min_wqe_num) { ++ verbs_debug(&ctx->ibv_ctx, ++ "change sq depth from %u to minimum %u.\n", ++ cap->max_send_wr, min_wqe_num); + cap->max_send_wr = min_wqe_num; ++ } + + if (cap->max_recv_wr) { +- if (cap->max_recv_wr < min_wqe_num) ++ if (cap->max_recv_wr < min_wqe_num) { ++ verbs_debug(&ctx->ibv_ctx, ++ "change rq depth from %u to minimum %u.\n", ++ cap->max_recv_wr, min_wqe_num); + cap->max_recv_wr = min_wqe_num; ++ } + + if (!cap->max_recv_sge) + return -EINVAL; +@@ -1646,6 +1686,11 @@ static int qp_exec_create_cmd(struct ibv_qp_init_attr_ex *attr, + ret = ibv_cmd_create_qp_ex2(&ctx->ibv_ctx.context, &qp->verbs_qp, attr, + &cmd_ex.ibv_cmd, sizeof(cmd_ex), + &resp_ex.ibv_resp, sizeof(resp_ex)); ++ if (ret) { ++ verbs_err(&ctx->ibv_ctx, ++ "failed to exec create qp cmd, ret = %d.\n", ret); ++ return ret; ++ } + + qp->flags = resp_ex.drv_payload.cap_flags; + *dwqe_mmap_key = resp_ex.drv_payload.dwqe_mmap_key; +@@ -1707,8 +1752,12 @@ static int mmap_dwqe(struct ibv_context *ibv_ctx, struct hns_roce_qp *qp, + { + qp->dwqe_page = mmap(NULL, HNS_ROCE_DWQE_PAGE_SIZE, PROT_WRITE, + MAP_SHARED, ibv_ctx->cmd_fd, dwqe_mmap_key); +- if (qp->dwqe_page == MAP_FAILED) ++ if (qp->dwqe_page == MAP_FAILED) { ++ verbs_err(verbs_get_ctx(ibv_ctx), ++ "failed to mmap direct wqe page, QPN = %u.\n", ++ qp->verbs_qp.qp.qp_num); + return -EINVAL; ++ } + + return 0; + } +-- +2.33.0 + diff --git a/0043-libhns-Fix-missing-fields-for-SRQ-WC.patch b/0043-libhns-Fix-missing-fields-for-SRQ-WC.patch new file mode 100644 index 0000000000000000000000000000000000000000..4058094b8ec2deb786a38e8f4e3eec1dad0842d3 --- /dev/null +++ b/0043-libhns-Fix-missing-fields-for-SRQ-WC.patch @@ -0,0 +1,82 @@ +From b52618371517527ce8ea4b8f5bd2571c7f69a2ba Mon Sep 17 00:00:00 2001 +From: wenglianfa +Date: Wed, 15 Jan 2025 15:55:29 +0800 +Subject: [PATCH] libhns: Fix missing fields for SRQ WC + +mainline inclusion +from mainline-master +commit 65a7ce99cf4bfd6748346206f546e51c0a82c993 +category: bugfix +bugzilla: https://gitee.com/src-openeuler/rdma-core/issues/IBIEA4 +CVE: NA +Reference: https://github.com/linux-rdma/rdma-core/pull/1543/commits/65a7ce99cf4bfd6748346206f546e51c0a82c993 + +---------------------------------------------------------------------- + +The sl and src_qpn fields in recv-WC are not filled when the QP is UD +and has an SRQ. Here fix it. + +In addition, UD QP does not support RQ INLINE and CQE INLINE features. +Reorder the related if-else statements to reduce the number of +conditional checks in IO path. + +Fixes: 061f7e1757ca ("libhns: Refactor the poll one interface") +Signed-off-by: wenglianfa +Signed-off-by: Junxian Huang +Signed-off-by: Xinghai Cen +--- + providers/hns/hns_roce_u_hw_v2.c | 13 ++++++++----- + 1 file changed, 8 insertions(+), 5 deletions(-) + +diff --git a/providers/hns/hns_roce_u_hw_v2.c b/providers/hns/hns_roce_u_hw_v2.c +index 0628646..aadea7a 100644 +--- a/providers/hns/hns_roce_u_hw_v2.c ++++ b/providers/hns/hns_roce_u_hw_v2.c +@@ -519,7 +519,8 @@ static void parse_for_ud_qp(struct hns_roce_v2_cqe *cqe, struct ibv_wc *wc) + } + + static void parse_cqe_for_srq(struct hns_roce_v2_cqe *cqe, struct ibv_wc *wc, +- struct hns_roce_srq *srq) ++ struct hns_roce_srq *srq, ++ struct hns_roce_qp *hr_qp) + { + uint32_t wqe_idx; + +@@ -529,6 +530,8 @@ static void parse_cqe_for_srq(struct hns_roce_v2_cqe *cqe, struct ibv_wc *wc, + + if (hr_reg_read(cqe, CQE_CQE_INLINE)) + handle_recv_cqe_inl_from_srq(cqe, srq); ++ else if (hr_qp->verbs_qp.qp.qp_type == IBV_QPT_UD) ++ parse_for_ud_qp(cqe, wc); + } + + static void parse_cqe_for_resp(struct hns_roce_v2_cqe *cqe, struct ibv_wc *wc, +@@ -540,13 +543,13 @@ static void parse_cqe_for_resp(struct hns_roce_v2_cqe *cqe, struct ibv_wc *wc, + wc->wr_id = wq->wrid[wq->tail & (wq->wqe_cnt - 1)]; + ++wq->tail; + +- if (hr_qp->verbs_qp.qp.qp_type == IBV_QPT_UD) +- parse_for_ud_qp(cqe, wc); +- + if (hr_reg_read(cqe, CQE_CQE_INLINE)) + handle_recv_cqe_inl_from_rq(cqe, hr_qp); + else if (hr_reg_read(cqe, CQE_RQ_INLINE)) + handle_recv_rq_inl(cqe, hr_qp); ++ else if (hr_qp->verbs_qp.qp.qp_type == IBV_QPT_UD) ++ parse_for_ud_qp(cqe, wc); ++ + } + + static void parse_cqe_for_req(struct hns_roce_v2_cqe *cqe, struct ibv_wc *wc, +@@ -753,7 +756,7 @@ static int parse_cqe_for_cq(struct hns_roce_context *ctx, struct hns_roce_cq *cq + return V2_CQ_POLL_ERR; + + if (srq) +- parse_cqe_for_srq(cqe, wc, srq); ++ parse_cqe_for_srq(cqe, wc, srq, cur_qp); + else + parse_cqe_for_resp(cqe, wc, cur_qp); + } +-- +2.33.0 + diff --git a/0044-libxscale-Add-Yunsilicon-User-Space-RDMA-Driver.patch b/0044-libxscale-Add-Yunsilicon-User-Space-RDMA-Driver.patch new file mode 100644 index 0000000000000000000000000000000000000000..f1a2f74d3e3ad756583c15a01ec1005141e3e9b1 --- /dev/null +++ b/0044-libxscale-Add-Yunsilicon-User-Space-RDMA-Driver.patch @@ -0,0 +1,10916 @@ +From da5f427f7ae7bf0ad0e0941c6e6f4427a0fd323e Mon Sep 17 00:00:00 2001 +From: Xin Tian +Date: Wed, 26 Feb 2025 10:29:59 +0800 +Subject: [PATCH] libxscale: Add Yunsilicon User Space RDMA Driver + +Introduce xscale provider for Yunsilicon devices. + +Signed-off-by: Xin Tian +--- + CMakeLists.txt | 1 + + MAINTAINERS | 6 + + README.md | 1 + + debian/control | 1 + + debian/copyright | 4 + + debian/ibverbs-providers.install | 1 + + debian/libibverbs-dev.install | 2 + + kernel-headers/CMakeLists.txt | 4 + + kernel-headers/rdma/ib_user_ioctl_verbs.h | 1 + + kernel-headers/rdma/xsc-abi.h | 333 +++ + kernel-headers/rdma/xsc_user_ioctl_cmds.h | 163 ++ + kernel-headers/rdma/xsc_user_ioctl_verbs.h | 27 + + libibverbs/verbs.h | 1 + + providers/xscale/CMakeLists.txt | 18 + + providers/xscale/bitmap.h | 84 + + providers/xscale/buf.c | 594 +++++ + providers/xscale/cq.c | 1410 ++++++++++ + providers/xscale/cqm_csr_defines.h | 180 ++ + providers/xscale/dbrec.c | 131 + + providers/xscale/libxsc.map | 59 + + providers/xscale/qp.c | 678 +++++ + providers/xscale/rqm_csr_defines.h | 200 ++ + providers/xscale/sqm_csr_defines.h | 204 ++ + providers/xscale/verbs.c | 2816 ++++++++++++++++++++ + providers/xscale/wqe.h | 72 + + providers/xscale/xsc-abi.h | 56 + + providers/xscale/xsc_api.h | 29 + + providers/xscale/xsc_hsi.h | 252 ++ + providers/xscale/xsc_hw.h | 584 ++++ + providers/xscale/xscale.c | 948 +++++++ + providers/xscale/xscale.h | 834 ++++++ + providers/xscale/xscdv.h | 876 ++++++ + redhat/rdma-core.spec | 4 + + 33 files changed, 10576 insertions(+) + create mode 100644 kernel-headers/rdma/xsc-abi.h + create mode 100644 kernel-headers/rdma/xsc_user_ioctl_cmds.h + create mode 100644 kernel-headers/rdma/xsc_user_ioctl_verbs.h + create mode 100644 providers/xscale/CMakeLists.txt + create mode 100644 providers/xscale/bitmap.h + create mode 100644 providers/xscale/buf.c + create mode 100644 providers/xscale/cq.c + create mode 100644 providers/xscale/cqm_csr_defines.h + create mode 100644 providers/xscale/dbrec.c + create mode 100644 providers/xscale/libxsc.map + create mode 100644 providers/xscale/qp.c + create mode 100644 providers/xscale/rqm_csr_defines.h + create mode 100644 providers/xscale/sqm_csr_defines.h + create mode 100644 providers/xscale/verbs.c + create mode 100644 providers/xscale/wqe.h + create mode 100644 providers/xscale/xsc-abi.h + create mode 100644 providers/xscale/xsc_api.h + create mode 100644 providers/xscale/xsc_hsi.h + create mode 100755 providers/xscale/xsc_hw.h + create mode 100644 providers/xscale/xscale.c + create mode 100644 providers/xscale/xscale.h + create mode 100644 providers/xscale/xscdv.h + +diff --git a/CMakeLists.txt b/CMakeLists.txt +index 432a650..feb338a 100644 +--- a/CMakeLists.txt ++++ b/CMakeLists.txt +@@ -748,6 +748,7 @@ add_subdirectory(providers/mthca) + add_subdirectory(providers/ocrdma) + add_subdirectory(providers/qedr) + add_subdirectory(providers/vmw_pvrdma) ++add_subdirectory(providers/xscale) + add_subdirectory(providers/zrdma) + endif() + +diff --git a/MAINTAINERS b/MAINTAINERS +index aa41217..fdacc6a 100644 +--- a/MAINTAINERS ++++ b/MAINTAINERS +@@ -185,6 +185,12 @@ L: pv-drivers@vmware.com + S: Supported + F: providers/vmw_pvrdma/ + ++XSCALE USERSPACE PROVIDER (for xsc_ib.ko) ++M: Honggang Wei ++M: Xin Tianx ++S: Supported ++F: providers/xscale/ ++ + ZRDMA USERSPACE PROVIDER (for zrdma.ko) + M: Li Fuyan + S: Supported +diff --git a/README.md b/README.md +index 8f47d3c..611654c 100644 +--- a/README.md ++++ b/README.md +@@ -31,6 +31,7 @@ is included: + - rdma_rxe.ko + - siw.ko + - vmw_pvrdma.ko ++ - xsc_ib.ko + - zrdma.ko + + Additional service daemons are provided for: +diff --git a/debian/control b/debian/control +index f86cc77..fb0538a 100644 +--- a/debian/control ++++ b/debian/control +@@ -99,6 +99,7 @@ Description: User space provider drivers for libibverbs + - rxe: A software implementation of the RoCE protocol + - siw: A software implementation of the iWarp protocol + - vmw_pvrdma: VMware paravirtual RDMA device ++ - xscale: Yunsilicon RDMA device + - zrdma: ZTE Connection RDMA + + Package: ibverbs-utils +diff --git a/debian/copyright b/debian/copyright +index 7e435b5..98867b8 100644 +--- a/debian/copyright ++++ b/debian/copyright +@@ -228,6 +228,10 @@ Files: providers/vmw_pvrdma/* + Copyright: 2012-2016 VMware, Inc. + License: BSD-2-clause or GPL-2 + ++Files: providers/xscale/* ++Copyright: 2021-2025, Yunsilicon Technology Co., Ltd. ++License: GPL-2 ++ + Files: providers/zrdma/* + Copyright: 2024 ZTE Corporation. + License: BSD-MIT or GPL-2 +diff --git a/debian/ibverbs-providers.install b/debian/ibverbs-providers.install +index 360516f..816a4a5 100644 +--- a/debian/ibverbs-providers.install ++++ b/debian/ibverbs-providers.install +@@ -5,4 +5,5 @@ usr/lib/*/libhns.so.* + usr/lib/*/libmana.so.* + usr/lib/*/libmlx4.so.* + usr/lib/*/libmlx5.so.* ++usr/lib/*/libxscale.so.* + usr/lib/*/libzrdma.so.* +diff --git a/debian/libibverbs-dev.install b/debian/libibverbs-dev.install +index 73dd8c7..4e34968 100644 +--- a/debian/libibverbs-dev.install ++++ b/debian/libibverbs-dev.install +@@ -28,6 +28,8 @@ usr/lib/*/libmlx4.a + usr/lib/*/libmlx4.so + usr/lib/*/libmlx5.a + usr/lib/*/libmlx5.so ++usr/lib/*/libxscale.a ++usr/lib/*/libxscale.so + usr/lib/*/libzrdma.a + usr/lib/*/libzrdma.so + usr/lib/*/pkgconfig/libefa.pc +diff --git a/kernel-headers/CMakeLists.txt b/kernel-headers/CMakeLists.txt +index 9ceac31..76b5e7b 100644 +--- a/kernel-headers/CMakeLists.txt ++++ b/kernel-headers/CMakeLists.txt +@@ -26,6 +26,9 @@ publish_internal_headers(rdma + rdma/rvt-abi.h + rdma/siw-abi.h + rdma/vmw_pvrdma-abi.h ++ rdma/xsc-abi.h ++ rdma/xsc_user_ioctl_cmds.h ++ rdma/xsc_user_ioctl_verbs.h + rdma/zxdh-abi.h + rdma/zxdh_user_ioctl_cmds.h + rdma/zxdh_user_ioctl_verbs.h +@@ -83,6 +86,7 @@ rdma_kernel_provider_abi( + rdma/rdma_user_rxe.h + rdma/siw-abi.h + rdma/vmw_pvrdma-abi.h ++ rdma/xsc-abi.h + rdma/zxdh-abi.h + ) + +diff --git a/kernel-headers/rdma/ib_user_ioctl_verbs.h b/kernel-headers/rdma/ib_user_ioctl_verbs.h +index a31f330..e8b2cc1 100644 +--- a/kernel-headers/rdma/ib_user_ioctl_verbs.h ++++ b/kernel-headers/rdma/ib_user_ioctl_verbs.h +@@ -255,6 +255,7 @@ enum rdma_driver_id { + RDMA_DRIVER_SIW, + RDMA_DRIVER_ERDMA, + RDMA_DRIVER_MANA, ++ RDMA_DRIVER_XSC = 1, + RDMA_DRIVER_ZXDH = 50, + }; + +diff --git a/kernel-headers/rdma/xsc-abi.h b/kernel-headers/rdma/xsc-abi.h +new file mode 100644 +index 0000000..4af6408 +--- /dev/null ++++ b/kernel-headers/rdma/xsc-abi.h +@@ -0,0 +1,333 @@ ++/* ++ * Copyright (c) 2021 - 2022, Shanghai Yunsilicon Technology Co., Ltd. ++ * All rights reserved. ++ */ ++ ++#ifndef XSC_ABI_USER_H ++#define XSC_ABI_USER_H ++ ++#include ++#include /* For ETH_ALEN. */ ++#include ++ ++enum { ++ XSC_WQ_FLAG_SIGNATURE = 1 << 0, ++}; ++ ++/* Make sure that all structs defined in this file remain laid out so ++ * that they pack the same way on 32-bit and 64-bit architectures (to ++ * avoid incompatibility between 32-bit userspace and 64-bit kernels). ++ * In particular do not use pointer types -- pass pointers in __u64 ++ * instead. ++ */ ++ ++struct xsc_ib_alloc_ucontext_req { ++ __u32 rsvd0; ++ __u32 rsvd1; ++}; ++ ++enum xsc_user_cmds_supp_uhw { ++ XSC_USER_CMDS_SUPP_UHW_QUERY_DEVICE = 1 << 0, ++ XSC_USER_CMDS_SUPP_UHW_CREATE_AH = 1 << 1, ++}; ++ ++struct xsc_ib_alloc_ucontext_resp { ++ __u32 qp_tab_size; ++ __u32 cache_line_size; ++ __u16 max_sq_desc_sz; ++ __u16 max_rq_desc_sz; ++ __u32 max_send_wqebb; ++ __u32 max_recv_wr; ++ __u16 num_ports; ++ __u16 device_id; ++ __u64 qpm_tx_db; ++ __u64 qpm_rx_db; ++ __u64 cqm_next_cid_reg; ++ __u64 cqm_armdb; ++ __u32 send_ds_num; ++ __u32 recv_ds_num; ++ __u32 cmds_supp_uhw; ++}; ++ ++struct xsc_ib_alloc_pd_resp { ++ __u32 pdn; ++}; ++ ++struct xsc_ib_tso_caps { ++ __u32 max_tso; /* Maximum tso payload size in bytes */ ++ ++ /* Corresponding bit will be set if qp type from ++ * 'enum ib_qp_type' is supported, e.g. ++ * supported_qpts |= 1 << IB_QPT_UD ++ */ ++ __u32 supported_qpts; ++}; ++ ++struct xsc_ib_rss_caps { ++ __aligned_u64 rx_hash_fields_mask; /* enum xsc_rx_hash_fields */ ++ __u8 rx_hash_function; /* enum xsc_rx_hash_function_flags */ ++ __u8 reserved[7]; ++}; ++ ++enum xsc_ib_cqe_comp_res_format { ++ XSC_IB_CQE_RES_FORMAT_HASH = 1 << 0, ++ XSC_IB_CQE_RES_FORMAT_CSUM = 1 << 1, ++ XSC_IB_CQE_RES_FORMAT_CSUM_STRIDX = 1 << 2, ++}; ++ ++struct xsc_ib_cqe_comp_caps { ++ __u32 max_num; ++ __u32 supported_format; /* enum xsc_ib_cqe_comp_res_format */ ++}; ++ ++enum xsc_ib_packet_pacing_cap_flags { ++ XSC_IB_PP_SUPPORT_BURST = 1 << 0, ++}; ++ ++struct xsc_packet_pacing_caps { ++ __u32 qp_rate_limit_min; ++ __u32 qp_rate_limit_max; /* In kpbs */ ++ ++ /* Corresponding bit will be set if qp type from ++ * 'enum ib_qp_type' is supported, e.g. ++ * supported_qpts |= 1 << IB_QPT_RAW_PACKET ++ */ ++ __u32 supported_qpts; ++ __u8 cap_flags; /* enum xsc_ib_packet_pacing_cap_flags */ ++ __u8 reserved[3]; ++}; ++ ++enum xsc_ib_mpw_caps { ++ MPW_RESERVED = 1 << 0, ++ XSC_IB_ALLOW_MPW = 1 << 1, ++ XSC_IB_SUPPORT_EMPW = 1 << 2, ++}; ++ ++enum xsc_ib_sw_parsing_offloads { ++ XSC_IB_SW_PARSING = 1 << 0, ++ XSC_IB_SW_PARSING_CSUM = 1 << 1, ++ XSC_IB_SW_PARSING_LSO = 1 << 2, ++}; ++ ++struct xsc_ib_sw_parsing_caps { ++ __u32 sw_parsing_offloads; /* enum xsc_ib_sw_parsing_offloads */ ++ ++ /* Corresponding bit will be set if qp type from ++ * 'enum ib_qp_type' is supported, e.g. ++ * supported_qpts |= 1 << IB_QPT_RAW_PACKET ++ */ ++ __u32 supported_qpts; ++}; ++ ++struct xsc_ib_striding_rq_caps { ++ __u32 min_single_stride_log_num_of_bytes; ++ __u32 max_single_stride_log_num_of_bytes; ++ __u32 min_single_wqe_log_num_of_strides; ++ __u32 max_single_wqe_log_num_of_strides; ++ ++ /* Corresponding bit will be set if qp type from ++ * 'enum ib_qp_type' is supported, e.g. ++ * supported_qpts |= 1 << IB_QPT_RAW_PACKET ++ */ ++ __u32 supported_qpts; ++ __u32 reserved; ++}; ++ ++enum xsc_ib_query_dev_resp_flags { ++ /* Support 128B CQE compression */ ++ XSC_IB_QUERY_DEV_RESP_FLAGS_CQE_128B_COMP = 1 << 0, ++ XSC_IB_QUERY_DEV_RESP_FLAGS_CQE_128B_PAD = 1 << 1, ++}; ++ ++enum xsc_ib_tunnel_offloads { ++ XSC_IB_TUNNELED_OFFLOADS_VXLAN = 1 << 0, ++ XSC_IB_TUNNELED_OFFLOADS_GRE = 1 << 1, ++ XSC_IB_TUNNELED_OFFLOADS_GENEVE = 1 << 2, ++ XSC_IB_TUNNELED_OFFLOADS_MPLS_GRE = 1 << 3, ++ XSC_IB_TUNNELED_OFFLOADS_MPLS_UDP = 1 << 4, ++}; ++ ++struct xsc_ib_query_device_resp { ++ __u32 comp_mask; ++ __u32 response_length; ++ struct xsc_ib_tso_caps tso_caps; ++ struct xsc_ib_rss_caps rss_caps; ++ struct xsc_ib_cqe_comp_caps cqe_comp_caps; ++ struct xsc_packet_pacing_caps packet_pacing_caps; ++ __u32 xsc_ib_support_multi_pkt_send_wqes; ++ __u32 flags; /* Use enum xsc_ib_query_dev_resp_flags */ ++ struct xsc_ib_sw_parsing_caps sw_parsing_caps; ++ struct xsc_ib_striding_rq_caps striding_rq_caps; ++ __u32 tunnel_offloads_caps; /* enum xsc_ib_tunnel_offloads */ ++ __u32 reserved; ++}; ++ ++struct xsc_ib_create_cq { ++ __aligned_u64 buf_addr; ++ __aligned_u64 db_addr; ++ __u32 cqe_size; ++}; ++ ++struct xsc_ib_create_cq_resp { ++ __u32 cqn; ++ __u32 reserved; ++}; ++ ++struct xsc_ib_resize_cq { ++ __aligned_u64 buf_addr; ++ __u16 cqe_size; ++ __u16 reserved0; ++ __u32 reserved1; ++}; ++ ++struct xsc_ib_create_qp { ++ __aligned_u64 buf_addr; ++ __aligned_u64 db_addr; ++ __u32 sq_wqe_count; ++ __u32 rq_wqe_count; ++ __u32 rq_wqe_shift; ++ __u32 flags; ++}; ++ ++/* RX Hash function flags */ ++enum xsc_rx_hash_function_flags { ++ XSC_RX_HASH_FUNC_TOEPLITZ = 1 << 0, ++}; ++ ++/* ++ * RX Hash flags, these flags allows to set which incoming packet's field should ++ * participates in RX Hash. Each flag represent certain packet's field, ++ * when the flag is set the field that is represented by the flag will ++ * participate in RX Hash calculation. ++ * Note: *IPV4 and *IPV6 flags can't be enabled together on the same QP ++ * and *TCP and *UDP flags can't be enabled together on the same QP. ++*/ ++enum xsc_rx_hash_fields { ++ XSC_RX_HASH_SRC_IPV4 = 1 << 0, ++ XSC_RX_HASH_DST_IPV4 = 1 << 1, ++ XSC_RX_HASH_SRC_IPV6 = 1 << 2, ++ XSC_RX_HASH_DST_IPV6 = 1 << 3, ++ XSC_RX_HASH_SRC_PORT_TCP = 1 << 4, ++ XSC_RX_HASH_DST_PORT_TCP = 1 << 5, ++ XSC_RX_HASH_SRC_PORT_UDP = 1 << 6, ++ XSC_RX_HASH_DST_PORT_UDP = 1 << 7, ++ XSC_RX_HASH_IPSEC_SPI = 1 << 8, ++ /* Save bits for future fields */ ++ XSC_RX_HASH_INNER = (1UL << 31), ++}; ++ ++struct xsc_ib_create_qp_rss { ++ __aligned_u64 rx_hash_fields_mask; /* enum xscd_rx_hash_fields */ ++ __u8 rx_hash_function; /* enum xsc_rx_hash_function_flags */ ++ __u8 rx_key_len; /* valid only for Toeplitz */ ++ __u8 reserved[6]; ++ __u8 rx_hash_key[128]; /* valid only for Toeplitz */ ++ __u32 comp_mask; ++ __u32 flags; ++}; ++ ++struct xsc_ib_create_qp_resp { ++ __u32 bfreg_index; ++ __u32 resv; ++}; ++ ++enum xsc_ib_create_wq_mask { ++ XSC_IB_CREATE_WQ_STRIDING_RQ = (1 << 0), ++}; ++ ++struct xsc_ib_create_wq { ++ __aligned_u64 buf_addr; ++ __aligned_u64 db_addr; ++ __u32 rq_wqe_count; ++ __u32 rq_wqe_shift; ++ __u32 user_index; ++ __u32 flags; ++ __u32 comp_mask; ++ __u32 single_stride_log_num_of_bytes; ++ __u32 single_wqe_log_num_of_strides; ++ __u32 two_byte_shift_en; ++}; ++ ++struct xsc_ib_create_ah_resp { ++ __u32 response_length; ++ __u8 dmac[ETH_ALEN]; ++ __u8 reserved[6]; ++}; ++ ++struct xsc_ib_burst_info { ++ __u32 max_burst_sz; ++ __u16 typical_pkt_sz; ++ __u16 reserved; ++}; ++ ++struct xsc_ib_modify_qp { ++ __u32 comp_mask; ++ struct xsc_ib_burst_info burst_info; ++ __u32 reserved; ++}; ++ ++struct xsc_ib_modify_qp_resp { ++ __u32 response_length; ++ __u32 dctn; ++}; ++ ++struct xsc_ib_create_wq_resp { ++ __u32 response_length; ++ __u32 reserved; ++}; ++ ++struct xsc_ib_modify_wq { ++ __u32 comp_mask; ++ __u32 reserved; ++}; ++ ++struct xsc_ib_clock_info { ++ __u32 sign; ++ __u32 resv; ++ __aligned_u64 nsec; ++ __aligned_u64 cycles; ++ __aligned_u64 frac; ++ __u32 mult; ++ __u32 shift; ++ __aligned_u64 mask; ++ __aligned_u64 overflow_period; ++}; ++ ++enum xsc_ib_mmap_cmd { ++ XSC_IB_MMAP_REGULAR_PAGE = 0, ++ XSC_IB_MMAP_GET_CONTIGUOUS_PAGES = 1, ++ XSC_IB_MMAP_WC_PAGE = 2, ++ XSC_IB_MMAP_NC_PAGE = 3, ++ XSC_IB_MMAP_CORE_CLOCK = 5, ++ XSC_IB_MMAP_ALLOC_WC = 6, ++ XSC_IB_MMAP_CLOCK_INFO = 7, ++ XSC_IB_MMAP_DEVICE_MEM = 8, ++}; ++ ++enum { ++ XSC_IB_CLOCK_INFO_KERNEL_UPDATING = 1, ++}; ++ ++struct xsc_ib_flow_counters_desc { ++ __u32 description; ++ __u32 index; ++}; ++ ++struct xsc_ib_flow_counters_data { ++ RDMA_UAPI_PTR(struct xsc_ib_flow_counters_desc *, counters_data); ++ __u32 ncounters; ++ __u32 reserved; ++}; ++ ++struct xsc_ib_create_flow { ++ __u32 ncounters_data; ++ __u32 reserved; ++ /* ++ * Following are counters data based on ncounters_data, each ++ * entry in the data[] should match a corresponding counter object ++ * that was pointed by a counters spec upon the flow creation ++ */ ++ struct xsc_ib_flow_counters_data data[]; ++}; ++ ++#endif /* XSC_ABI_USER_H */ +diff --git a/kernel-headers/rdma/xsc_user_ioctl_cmds.h b/kernel-headers/rdma/xsc_user_ioctl_cmds.h +new file mode 100644 +index 0000000..590a061 +--- /dev/null ++++ b/kernel-headers/rdma/xsc_user_ioctl_cmds.h +@@ -0,0 +1,163 @@ ++/* ++ * Copyright (c) 2021 - 2022, Shanghai Yunsilicon Technology Co., Ltd. ++ * All rights reserved. ++ */ ++ ++#ifndef XSC_USER_IOCTL_CMDS_H ++#define XSC_USER_IOCTL_CMDS_H ++ ++#include ++#include ++ ++enum xsc_ib_create_flow_action_attrs { ++ /* This attribute belong to the driver namespace */ ++ XSC_IB_ATTR_CREATE_FLOW_ACTION_FLAGS = (1U << UVERBS_ID_NS_SHIFT), ++}; ++ ++enum xsc_ib_alloc_dm_attrs { ++ XSC_IB_ATTR_ALLOC_DM_RESP_START_OFFSET = (1U << UVERBS_ID_NS_SHIFT), ++ XSC_IB_ATTR_ALLOC_DM_RESP_PAGE_INDEX, ++}; ++ ++enum xsc_ib_devx_methods { ++ XSC_IB_METHOD_DEVX_OTHER = (1U << UVERBS_ID_NS_SHIFT), ++ XSC_IB_METHOD_DEVX_QUERY_UAR, ++ XSC_IB_METHOD_DEVX_QUERY_EQN, ++}; ++ ++enum xsc_ib_devx_other_attrs { ++ XSC_IB_ATTR_DEVX_OTHER_CMD_IN = (1U << UVERBS_ID_NS_SHIFT), ++ XSC_IB_ATTR_DEVX_OTHER_CMD_OUT, ++}; ++ ++enum xsc_ib_devx_obj_create_attrs { ++ XSC_IB_ATTR_DEVX_OBJ_CREATE_HANDLE = (1U << UVERBS_ID_NS_SHIFT), ++ XSC_IB_ATTR_DEVX_OBJ_CREATE_CMD_IN, ++ XSC_IB_ATTR_DEVX_OBJ_CREATE_CMD_OUT, ++}; ++ ++enum xsc_ib_devx_query_uar_attrs { ++ XSC_IB_ATTR_DEVX_QUERY_UAR_USER_IDX = (1U << UVERBS_ID_NS_SHIFT), ++ XSC_IB_ATTR_DEVX_QUERY_UAR_DEV_IDX, ++}; ++ ++enum xsc_ib_devx_obj_destroy_attrs { ++ XSC_IB_ATTR_DEVX_OBJ_DESTROY_HANDLE = (1U << UVERBS_ID_NS_SHIFT), ++}; ++ ++enum xsc_ib_devx_obj_modify_attrs { ++ XSC_IB_ATTR_DEVX_OBJ_MODIFY_HANDLE = (1U << UVERBS_ID_NS_SHIFT), ++ XSC_IB_ATTR_DEVX_OBJ_MODIFY_CMD_IN, ++ XSC_IB_ATTR_DEVX_OBJ_MODIFY_CMD_OUT, ++}; ++ ++enum xsc_ib_devx_obj_query_attrs { ++ XSC_IB_ATTR_DEVX_OBJ_QUERY_HANDLE = (1U << UVERBS_ID_NS_SHIFT), ++ XSC_IB_ATTR_DEVX_OBJ_QUERY_CMD_IN, ++ XSC_IB_ATTR_DEVX_OBJ_QUERY_CMD_OUT, ++}; ++ ++enum xsc_ib_devx_query_eqn_attrs { ++ XSC_IB_ATTR_DEVX_QUERY_EQN_USER_VEC = (1U << UVERBS_ID_NS_SHIFT), ++ XSC_IB_ATTR_DEVX_QUERY_EQN_DEV_EQN, ++}; ++ ++enum xsc_ib_devx_obj_methods { ++ XSC_IB_METHOD_DEVX_OBJ_CREATE = (1U << UVERBS_ID_NS_SHIFT), ++ XSC_IB_METHOD_DEVX_OBJ_DESTROY, ++ XSC_IB_METHOD_DEVX_OBJ_MODIFY, ++ XSC_IB_METHOD_DEVX_OBJ_QUERY, ++}; ++ ++enum xsc_ib_devx_umem_reg_attrs { ++ XSC_IB_ATTR_DEVX_UMEM_REG_HANDLE = (1U << UVERBS_ID_NS_SHIFT), ++ XSC_IB_ATTR_DEVX_UMEM_REG_ADDR, ++ XSC_IB_ATTR_DEVX_UMEM_REG_LEN, ++ XSC_IB_ATTR_DEVX_UMEM_REG_ACCESS, ++ XSC_IB_ATTR_DEVX_UMEM_REG_OUT_ID, ++}; ++ ++enum xsc_ib_devx_umem_dereg_attrs { ++ XSC_IB_ATTR_DEVX_UMEM_DEREG_HANDLE = (1U << UVERBS_ID_NS_SHIFT), ++}; ++ ++enum xsc_ib_devx_umem_methods { ++ XSC_IB_METHOD_DEVX_UMEM_REG = (1U << UVERBS_ID_NS_SHIFT), ++ XSC_IB_METHOD_DEVX_UMEM_DEREG, ++}; ++ ++enum xsc_ib_objects { ++ XSC_IB_OBJECT_DEVX = (1U << UVERBS_ID_NS_SHIFT), ++ XSC_IB_OBJECT_DEVX_OBJ, ++ XSC_IB_OBJECT_DEVX_UMEM, ++ XSC_IB_OBJECT_FLOW_MATCHER, ++}; ++ ++enum xsc_ib_flow_matcher_create_attrs { ++ XSC_IB_ATTR_FLOW_MATCHER_CREATE_HANDLE = (1U << UVERBS_ID_NS_SHIFT), ++ XSC_IB_ATTR_FLOW_MATCHER_MATCH_MASK, ++ XSC_IB_ATTR_FLOW_MATCHER_FLOW_TYPE, ++ XSC_IB_ATTR_FLOW_MATCHER_MATCH_CRITERIA, ++ XSC_IB_ATTR_FLOW_MATCHER_FLOW_FLAGS, ++}; ++ ++enum xsc_ib_flow_matcher_destroy_attrs { ++ XSC_IB_ATTR_FLOW_MATCHER_DESTROY_HANDLE = (1U << UVERBS_ID_NS_SHIFT), ++}; ++ ++enum xsc_ib_flow_matcher_methods { ++ XSC_IB_METHOD_FLOW_MATCHER_CREATE = (1U << UVERBS_ID_NS_SHIFT), ++ XSC_IB_METHOD_FLOW_MATCHER_DESTROY, ++}; ++ ++#define XSC_IB_DW_MATCH_PARAM 0x80 ++ ++struct xsc_ib_match_params { ++ __u32 match_params[XSC_IB_DW_MATCH_PARAM]; ++}; ++ ++enum xsc_ib_flow_type { ++ XSC_IB_FLOW_TYPE_NORMAL, ++ XSC_IB_FLOW_TYPE_SNIFFER, ++ XSC_IB_FLOW_TYPE_ALL_DEFAULT, ++ XSC_IB_FLOW_TYPE_MC_DEFAULT, ++}; ++ ++enum xsc_ib_create_flow_attrs { ++ XSC_IB_ATTR_CREATE_FLOW_HANDLE = (1U << UVERBS_ID_NS_SHIFT), ++ XSC_IB_ATTR_CREATE_FLOW_MATCH_VALUE, ++ XSC_IB_ATTR_CREATE_FLOW_DEST_QP, ++ XSC_IB_ATTR_CREATE_FLOW_DEST_DEVX, ++ XSC_IB_ATTR_CREATE_FLOW_MATCHER, ++ XSC_IB_ATTR_CREATE_FLOW_ARR_FLOW_ACTIONS, ++ XSC_IB_ATTR_CREATE_FLOW_TAG, ++}; ++ ++enum xsc_ib_destoy_flow_attrs { ++ XSC_IB_ATTR_DESTROY_FLOW_HANDLE = (1U << UVERBS_ID_NS_SHIFT), ++}; ++ ++enum xsc_ib_flow_methods { ++ XSC_IB_METHOD_CREATE_FLOW = (1U << UVERBS_ID_NS_SHIFT), ++ XSC_IB_METHOD_DESTROY_FLOW, ++}; ++ ++enum xsc_ib_flow_action_methods { ++ XSC_IB_METHOD_FLOW_ACTION_CREATE_MODIFY_HEADER = (1U << UVERBS_ID_NS_SHIFT), ++ XSC_IB_METHOD_FLOW_ACTION_CREATE_PACKET_REFORMAT, ++}; ++ ++enum xsc_ib_create_flow_action_create_modify_header_attrs { ++ XSC_IB_ATTR_CREATE_MODIFY_HEADER_HANDLE = (1U << UVERBS_ID_NS_SHIFT), ++ XSC_IB_ATTR_CREATE_MODIFY_HEADER_ACTIONS_PRM, ++ XSC_IB_ATTR_CREATE_MODIFY_HEADER_FT_TYPE, ++}; ++ ++enum xsc_ib_create_flow_action_create_packet_reformat_attrs { ++ XSC_IB_ATTR_CREATE_PACKET_REFORMAT_HANDLE = (1U << UVERBS_ID_NS_SHIFT), ++ XSC_IB_ATTR_CREATE_PACKET_REFORMAT_TYPE, ++ XSC_IB_ATTR_CREATE_PACKET_REFORMAT_FT_TYPE, ++ XSC_IB_ATTR_CREATE_PACKET_REFORMAT_DATA_BUF, ++}; ++ ++#endif +diff --git a/kernel-headers/rdma/xsc_user_ioctl_verbs.h b/kernel-headers/rdma/xsc_user_ioctl_verbs.h +new file mode 100644 +index 0000000..ba84f57 +--- /dev/null ++++ b/kernel-headers/rdma/xsc_user_ioctl_verbs.h +@@ -0,0 +1,27 @@ ++/* ++ * Copyright (c) 2021 - 2022, Shanghai Yunsilicon Technology Co., Ltd. ++ * All rights reserved. ++ */ ++ ++#ifndef XSC_USER_IOCTL_VERBS_H ++#define XSC_USER_IOCTL_VERBS_H ++ ++#include ++ ++enum xsc_ib_uapi_flow_action_flags { ++ XSC_IB_UAPI_FLOW_ACTION_FLAGS_REQUIRE_METADATA = 1 << 0, ++}; ++ ++enum xsc_ib_uapi_flow_table_type { ++ XSC_IB_UAPI_FLOW_TABLE_TYPE_NIC_RX = 0x0, ++ XSC_IB_UAPI_FLOW_TABLE_TYPE_NIC_TX = 0x1, ++}; ++ ++enum xsc_ib_uapi_flow_action_packet_reformat_type { ++ XSC_IB_UAPI_FLOW_ACTION_PACKET_REFORMAT_TYPE_L2_TUNNEL_TO_L2 = 0x0, ++ XSC_IB_UAPI_FLOW_ACTION_PACKET_REFORMAT_TYPE_L2_TO_L2_TUNNEL = 0x1, ++ XSC_IB_UAPI_FLOW_ACTION_PACKET_REFORMAT_TYPE_L3_TUNNEL_TO_L2 = 0x2, ++ XSC_IB_UAPI_FLOW_ACTION_PACKET_REFORMAT_TYPE_L2_TO_L3_TUNNEL = 0x3, ++}; ++ ++#endif +diff --git a/libibverbs/verbs.h b/libibverbs/verbs.h +index be0e76b..fd10643 100644 +--- a/libibverbs/verbs.h ++++ b/libibverbs/verbs.h +@@ -2275,6 +2275,7 @@ extern const struct verbs_device_ops verbs_provider_qedr; + extern const struct verbs_device_ops verbs_provider_rxe; + extern const struct verbs_device_ops verbs_provider_siw; + extern const struct verbs_device_ops verbs_provider_vmw_pvrdma; ++extern const struct verbs_device_ops verbs_provider_xscale; + extern const struct verbs_device_ops verbs_provider_zrdma; + extern const struct verbs_device_ops verbs_provider_all; + extern const struct verbs_device_ops verbs_provider_none; +diff --git a/providers/xscale/CMakeLists.txt b/providers/xscale/CMakeLists.txt +new file mode 100644 +index 0000000..1188db1 +--- /dev/null ++++ b/providers/xscale/CMakeLists.txt +@@ -0,0 +1,18 @@ ++rdma_shared_provider(xscale libxsc.map ++ 1 1.24.${PACKAGE_VERSION} ++ buf.c ++ cq.c ++ dbrec.c ++ xscale.c ++ qp.c ++ verbs.c ++) ++ ++publish_headers(infiniband ++ ../../kernel-headers/rdma/xsc_user_ioctl_verbs.h ++ ../../kernel-headers/rdma/xsc_user_ioctl_cmds.h ++ xsc_api.h ++ xscdv.h ++) ++ ++rdma_pkg_config("xscale" "libibverbs" "${CMAKE_THREAD_LIBS_INIT}") +diff --git a/providers/xscale/bitmap.h b/providers/xscale/bitmap.h +new file mode 100644 +index 0000000..ef7f202 +--- /dev/null ++++ b/providers/xscale/bitmap.h +@@ -0,0 +1,84 @@ ++/* ++ * Copyright (c) 2021 - 2022, Shanghai Yunsilicon Technology Co., Ltd. ++ * All rights reserved. ++ */ ++ ++#ifndef BITMAP_H ++#define BITMAP_H ++ ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include "xscale.h" ++ ++/* Only ia64 requires this */ ++#ifdef __ia64__ ++#define XSC_SHM_ADDR ((void *)0x8000000000000000UL) ++#define XSC_SHMAT_FLAGS (SHM_RND) ++#else ++#define XSC_SHM_ADDR NULL ++#define XSC_SHMAT_FLAGS 0 ++#endif ++ ++#define BITS_PER_LONG (8 * sizeof(long)) ++#define BITS_TO_LONGS(nr) DIV_ROUND_UP(nr, BITS_PER_LONG) ++ ++#ifndef HPAGE_SIZE ++#define HPAGE_SIZE (2UL * 1024 * 1024) ++#endif ++ ++#define XSC_SHM_LENGTH HPAGE_SIZE ++#define XSC_Q_CHUNK_SIZE 32768 ++#define XSC_SHM_NUM_REGION 64 ++ ++static inline unsigned long xsc_ffz(uint32_t word) ++{ ++ return __builtin_ffs(~word) - 1; ++} ++ ++static inline uint32_t xsc_find_first_zero_bit(const unsigned long *addr, ++ uint32_t size) ++{ ++ const unsigned long *p = addr; ++ uint32_t result = 0; ++ unsigned long tmp; ++ ++ while (size & ~(BITS_PER_LONG - 1)) { ++ tmp = *(p++); ++ if (~tmp) ++ goto found; ++ result += BITS_PER_LONG; ++ size -= BITS_PER_LONG; ++ } ++ if (!size) ++ return result; ++ ++ tmp = (*p) | (~0UL << size); ++ if (tmp == (uint32_t)~0UL) /* Are any bits zero? */ ++ return result + size; /* Nope. */ ++found: ++ return result + xsc_ffz(tmp); ++} ++ ++static inline void xsc_set_bit(unsigned int nr, unsigned long *addr) ++{ ++ addr[(nr / BITS_PER_LONG)] |= (1 << (nr % BITS_PER_LONG)); ++} ++ ++static inline void xsc_clear_bit(unsigned int nr, unsigned long *addr) ++{ ++ addr[(nr / BITS_PER_LONG)] &= ~(1 << (nr % BITS_PER_LONG)); ++} ++ ++static inline int xsc_test_bit(unsigned int nr, const unsigned long *addr) ++{ ++ return !!(addr[(nr / BITS_PER_LONG)] & (1 << (nr % BITS_PER_LONG))); ++} ++ ++#endif +diff --git a/providers/xscale/buf.c b/providers/xscale/buf.c +new file mode 100644 +index 0000000..61daf6d +--- /dev/null ++++ b/providers/xscale/buf.c +@@ -0,0 +1,594 @@ ++/* ++ * Copyright (c) 2021 - 2022, Shanghai Yunsilicon Technology Co., Ltd. ++ * All rights reserved. ++ */ ++ ++#include ++ ++#include ++#include ++#include ++#include ++#include ++#include ++ ++#include "xscale.h" ++#include "bitmap.h" ++ ++static int xsc_bitmap_init(struct xsc_bitmap *bitmap, uint32_t num, ++ uint32_t mask) ++{ ++ bitmap->last = 0; ++ bitmap->top = 0; ++ bitmap->max = num; ++ bitmap->avail = num; ++ bitmap->mask = mask; ++ bitmap->avail = bitmap->max; ++ bitmap->table = calloc(BITS_TO_LONGS(bitmap->max), sizeof(*bitmap->table)); ++ if (!bitmap->table) ++ return -ENOMEM; ++ ++ return 0; ++} ++ ++static void bitmap_free_range(struct xsc_bitmap *bitmap, uint32_t obj, ++ int cnt) ++{ ++ int i; ++ ++ obj &= bitmap->max - 1; ++ ++ for (i = 0; i < cnt; i++) ++ xsc_clear_bit(obj + i, bitmap->table); ++ bitmap->last = min(bitmap->last, obj); ++ bitmap->top = (bitmap->top + bitmap->max) & bitmap->mask; ++ bitmap->avail += cnt; ++} ++ ++static int bitmap_empty(struct xsc_bitmap *bitmap) ++{ ++ return (bitmap->avail == bitmap->max) ? 1 : 0; ++} ++ ++static int bitmap_avail(struct xsc_bitmap *bitmap) ++{ ++ return bitmap->avail; ++} ++ ++static void xsc_bitmap_cleanup(struct xsc_bitmap *bitmap) ++{ ++ if (bitmap->table) ++ free(bitmap->table); ++} ++ ++static void free_huge_mem(struct xsc_hugetlb_mem *hmem) ++{ ++ xsc_bitmap_cleanup(&hmem->bitmap); ++ if (shmdt(hmem->shmaddr) == -1) ++ xsc_dbg(stderr, XSC_DBG_CONTIG, "%s\n", strerror(errno)); ++ shmctl(hmem->shmid, IPC_RMID, NULL); ++ free(hmem); ++} ++ ++static int xsc_bitmap_alloc(struct xsc_bitmap *bitmap) ++{ ++ uint32_t obj; ++ int ret; ++ ++ obj = xsc_find_first_zero_bit(bitmap->table, bitmap->max); ++ if (obj < bitmap->max) { ++ xsc_set_bit(obj, bitmap->table); ++ bitmap->last = (obj + 1); ++ if (bitmap->last == bitmap->max) ++ bitmap->last = 0; ++ obj |= bitmap->top; ++ ret = obj; ++ } else ++ ret = -1; ++ ++ if (ret != -1) ++ --bitmap->avail; ++ ++ return ret; ++} ++ ++static uint32_t find_aligned_range(unsigned long *bitmap, ++ uint32_t start, uint32_t nbits, ++ int len, int alignment) ++{ ++ uint32_t end, i; ++ ++again: ++ start = align(start, alignment); ++ ++ while ((start < nbits) && xsc_test_bit(start, bitmap)) ++ start += alignment; ++ ++ if (start >= nbits) ++ return -1; ++ ++ end = start + len; ++ if (end > nbits) ++ return -1; ++ ++ for (i = start + 1; i < end; i++) { ++ if (xsc_test_bit(i, bitmap)) { ++ start = i + 1; ++ goto again; ++ } ++ } ++ ++ return start; ++} ++ ++static int bitmap_alloc_range(struct xsc_bitmap *bitmap, int cnt, ++ int align) ++{ ++ uint32_t obj; ++ int ret, i; ++ ++ if (cnt == 1 && align == 1) ++ return xsc_bitmap_alloc(bitmap); ++ ++ if (cnt > bitmap->max) ++ return -1; ++ ++ obj = find_aligned_range(bitmap->table, bitmap->last, ++ bitmap->max, cnt, align); ++ if (obj >= bitmap->max) { ++ bitmap->top = (bitmap->top + bitmap->max) & bitmap->mask; ++ obj = find_aligned_range(bitmap->table, 0, bitmap->max, ++ cnt, align); ++ } ++ ++ if (obj < bitmap->max) { ++ for (i = 0; i < cnt; i++) ++ xsc_set_bit(obj + i, bitmap->table); ++ if (obj == bitmap->last) { ++ bitmap->last = (obj + cnt); ++ if (bitmap->last >= bitmap->max) ++ bitmap->last = 0; ++ } ++ obj |= bitmap->top; ++ ret = obj; ++ } else ++ ret = -1; ++ ++ if (ret != -1) ++ bitmap->avail -= cnt; ++ ++ return obj; ++} ++ ++static struct xsc_hugetlb_mem *alloc_huge_mem(size_t size) ++{ ++ struct xsc_hugetlb_mem *hmem; ++ size_t shm_len; ++ ++ hmem = malloc(sizeof(*hmem)); ++ if (!hmem) ++ return NULL; ++ ++ shm_len = align(size, XSC_SHM_LENGTH); ++ hmem->shmid = shmget(IPC_PRIVATE, shm_len, SHM_HUGETLB | SHM_R | SHM_W); ++ if (hmem->shmid == -1) { ++ xsc_dbg(stderr, XSC_DBG_CONTIG, "%s\n", strerror(errno)); ++ goto out_free; ++ } ++ ++ hmem->shmaddr = shmat(hmem->shmid, XSC_SHM_ADDR, XSC_SHMAT_FLAGS); ++ if (hmem->shmaddr == (void *)-1) { ++ xsc_dbg(stderr, XSC_DBG_CONTIG, "%s\n", strerror(errno)); ++ goto out_rmid; ++ } ++ ++ if (xsc_bitmap_init(&hmem->bitmap, shm_len / XSC_Q_CHUNK_SIZE, ++ shm_len / XSC_Q_CHUNK_SIZE - 1)) { ++ xsc_dbg(stderr, XSC_DBG_CONTIG, "%s\n", strerror(errno)); ++ goto out_shmdt; ++ } ++ ++ /* ++ * Marked to be destroyed when process detaches from shmget segment ++ */ ++ shmctl(hmem->shmid, IPC_RMID, NULL); ++ ++ return hmem; ++ ++out_shmdt: ++ if (shmdt(hmem->shmaddr) == -1) ++ xsc_dbg(stderr, XSC_DBG_CONTIG, "%s\n", strerror(errno)); ++ ++out_rmid: ++ shmctl(hmem->shmid, IPC_RMID, NULL); ++ ++out_free: ++ free(hmem); ++ return NULL; ++} ++ ++static int alloc_huge_buf(struct xsc_context *xctx, struct xsc_buf *buf, ++ size_t size, int page_size) ++{ ++ int found = 0; ++ int nchunk; ++ struct xsc_hugetlb_mem *hmem; ++ int ret; ++ ++ buf->length = align(size, XSC_Q_CHUNK_SIZE); ++ nchunk = buf->length / XSC_Q_CHUNK_SIZE; ++ ++ if (!nchunk) ++ return 0; ++ ++ xsc_spin_lock(&xctx->hugetlb_lock); ++ list_for_each(&xctx->hugetlb_list, hmem, entry) { ++ if (bitmap_avail(&hmem->bitmap)) { ++ buf->base = bitmap_alloc_range(&hmem->bitmap, nchunk, 1); ++ if (buf->base != -1) { ++ buf->hmem = hmem; ++ found = 1; ++ break; ++ } ++ } ++ } ++ xsc_spin_unlock(&xctx->hugetlb_lock); ++ ++ if (!found) { ++ hmem = alloc_huge_mem(buf->length); ++ if (!hmem) ++ return -1; ++ ++ buf->base = bitmap_alloc_range(&hmem->bitmap, nchunk, 1); ++ if (buf->base == -1) { ++ free_huge_mem(hmem); ++ /* TBD: remove after proven stability */ ++ fprintf(stderr, "BUG: huge allocation\n"); ++ return -1; ++ } ++ ++ buf->hmem = hmem; ++ ++ xsc_spin_lock(&xctx->hugetlb_lock); ++ if (bitmap_avail(&hmem->bitmap)) ++ list_add(&xctx->hugetlb_list, &hmem->entry); ++ else ++ list_add_tail(&xctx->hugetlb_list, &hmem->entry); ++ xsc_spin_unlock(&xctx->hugetlb_lock); ++ } ++ ++ buf->buf = hmem->shmaddr + buf->base * XSC_Q_CHUNK_SIZE; ++ ++ ret = ibv_dontfork_range(buf->buf, buf->length); ++ if (ret) { ++ goto out_fork; ++ } ++ buf->type = XSC_ALLOC_TYPE_HUGE; ++ ++ return 0; ++ ++out_fork: ++ xsc_spin_lock(&xctx->hugetlb_lock); ++ bitmap_free_range(&hmem->bitmap, buf->base, nchunk); ++ if (bitmap_empty(&hmem->bitmap)) { ++ list_del(&hmem->entry); ++ xsc_spin_unlock(&xctx->hugetlb_lock); ++ free_huge_mem(hmem); ++ } else ++ xsc_spin_unlock(&xctx->hugetlb_lock); ++ ++ return -1; ++} ++ ++static void free_huge_buf(struct xsc_context *ctx, struct xsc_buf *buf) ++{ ++ int nchunk; ++ ++ nchunk = buf->length / XSC_Q_CHUNK_SIZE; ++ if (!nchunk) ++ return; ++ ++ xsc_spin_lock(&ctx->hugetlb_lock); ++ bitmap_free_range(&buf->hmem->bitmap, buf->base, nchunk); ++ if (bitmap_empty(&buf->hmem->bitmap)) { ++ list_del(&buf->hmem->entry); ++ xsc_spin_unlock(&ctx->hugetlb_lock); ++ free_huge_mem(buf->hmem); ++ } else ++ xsc_spin_unlock(&ctx->hugetlb_lock); ++} ++ ++void xsc_free_buf_extern(struct xsc_context *ctx, struct xsc_buf *buf) ++{ ++ ibv_dofork_range(buf->buf, buf->length); ++ ctx->extern_alloc.free(buf->buf, ctx->extern_alloc.data); ++} ++ ++int xsc_alloc_buf_extern(struct xsc_context *ctx, struct xsc_buf *buf, ++ size_t size) ++{ ++ void *addr; ++ ++ addr = ctx->extern_alloc.alloc(size, ctx->extern_alloc.data); ++ if (addr || size == 0) { ++ if (ibv_dontfork_range(addr, size)) { ++ xsc_err("External mode dontfork_range failed\n"); ++ ctx->extern_alloc.free(addr, ++ ctx->extern_alloc.data); ++ return -1; ++ } ++ buf->buf = addr; ++ buf->length = size; ++ buf->type = XSC_ALLOC_TYPE_EXTERNAL; ++ return 0; ++ } ++ ++ xsc_err("External alloc failed\n"); ++ return -1; ++} ++ ++int xsc_alloc_prefered_buf(struct xsc_context *xctx, ++ struct xsc_buf *buf, ++ size_t size, int page_size, ++ enum xsc_alloc_type type, ++ const char *component) ++{ ++ int ret; ++ ++ /* ++ * Fallback mechanism priority: ++ * huge pages ++ * contig pages ++ * default ++ */ ++ if (type == XSC_ALLOC_TYPE_HUGE || ++ type == XSC_ALLOC_TYPE_PREFER_HUGE || ++ type == XSC_ALLOC_TYPE_ALL) { ++ ret = alloc_huge_buf(xctx, buf, size, page_size); ++ if (!ret) ++ return 0; ++ ++ if (type == XSC_ALLOC_TYPE_HUGE) ++ return -1; ++ ++ xsc_dbg(xctx->dbg_fp, XSC_DBG_CONTIG, "Huge mode allocation failed, fallback to %s mode\n", ++ XSC_ALLOC_TYPE_ALL ? "contig" : "default"); ++ } ++ ++ if (type == XSC_ALLOC_TYPE_CONTIG || ++ type == XSC_ALLOC_TYPE_PREFER_CONTIG || ++ type == XSC_ALLOC_TYPE_ALL) { ++ ret = xsc_alloc_buf_contig(xctx, buf, size, page_size, component); ++ if (!ret) ++ return 0; ++ ++ if (type == XSC_ALLOC_TYPE_CONTIG) ++ return -1; ++ xsc_dbg(xctx->dbg_fp, XSC_DBG_CONTIG, "Contig allocation failed, fallback to default mode\n"); ++ } ++ ++ if (type == XSC_ALLOC_TYPE_EXTERNAL) ++ return xsc_alloc_buf_extern(xctx, buf, size); ++ ++ return xsc_alloc_buf(buf, size, page_size); ++ ++} ++ ++int xsc_free_actual_buf(struct xsc_context *ctx, struct xsc_buf *buf) ++{ ++ int err = 0; ++ ++ switch (buf->type) { ++ case XSC_ALLOC_TYPE_ANON: ++ xsc_free_buf(buf); ++ break; ++ ++ case XSC_ALLOC_TYPE_HUGE: ++ free_huge_buf(ctx, buf); ++ break; ++ ++ case XSC_ALLOC_TYPE_CONTIG: ++ xsc_free_buf_contig(ctx, buf); ++ break; ++ ++ case XSC_ALLOC_TYPE_EXTERNAL: ++ xsc_free_buf_extern(ctx, buf); ++ break; ++ ++ default: ++ fprintf(stderr, "Bad allocation type\n"); ++ } ++ ++ return err; ++} ++ ++/* This function computes log2(v) rounded up. ++ We don't want to have a dependency to libm which exposes ceil & log2 APIs. ++ Code was written based on public domain code: ++ URL: http://graphics.stanford.edu/~seander/bithacks.html#IntegerLog. ++*/ ++static uint32_t xsc_get_block_order(uint32_t v) ++{ ++ static const uint32_t bits_arr[] = {0x2, 0xC, 0xF0, 0xFF00, 0xFFFF0000}; ++ static const uint32_t shift_arr[] = {1, 2, 4, 8, 16}; ++ int i; ++ uint32_t input_val = v; ++ ++ register uint32_t r = 0;/* result of log2(v) will go here */ ++ for (i = 4; i >= 0; i--) { ++ if (v & bits_arr[i]) { ++ v >>= shift_arr[i]; ++ r |= shift_arr[i]; ++ } ++ } ++ /* Rounding up if required */ ++ r += !!(input_val & ((1 << r) - 1)); ++ ++ return r; ++} ++ ++bool xsc_is_extern_alloc(struct xsc_context *context) ++{ ++ return context->extern_alloc.alloc && context->extern_alloc.free; ++} ++ ++void xsc_get_alloc_type(struct xsc_context *context, ++ const char *component, ++ enum xsc_alloc_type *alloc_type, ++ enum xsc_alloc_type default_type) ++ ++{ ++ char *env_value; ++ char name[128]; ++ ++ if (xsc_is_extern_alloc(context)) { ++ *alloc_type = XSC_ALLOC_TYPE_EXTERNAL; ++ return; ++ } ++ ++ snprintf(name, sizeof(name), "%s_ALLOC_TYPE", component); ++ ++ *alloc_type = default_type; ++ ++ env_value = getenv(name); ++ if (env_value) { ++ if (!strcasecmp(env_value, "ANON")) ++ *alloc_type = XSC_ALLOC_TYPE_ANON; ++ else if (!strcasecmp(env_value, "HUGE")) ++ *alloc_type = XSC_ALLOC_TYPE_HUGE; ++ else if (!strcasecmp(env_value, "CONTIG")) ++ *alloc_type = XSC_ALLOC_TYPE_CONTIG; ++ else if (!strcasecmp(env_value, "PREFER_CONTIG")) ++ *alloc_type = XSC_ALLOC_TYPE_PREFER_CONTIG; ++ else if (!strcasecmp(env_value, "PREFER_HUGE")) ++ *alloc_type = XSC_ALLOC_TYPE_PREFER_HUGE; ++ else if (!strcasecmp(env_value, "ALL")) ++ *alloc_type = XSC_ALLOC_TYPE_ALL; ++ } ++} ++ ++static void xsc_alloc_get_env_info(int *max_block_log, ++ int *min_block_log, ++ const char *component) ++ ++{ ++ char *env; ++ int value; ++ char name[128]; ++ ++ /* First set defaults */ ++ *max_block_log = XSC_MAX_LOG2_CONTIG_BLOCK_SIZE; ++ *min_block_log = XSC_MIN_LOG2_CONTIG_BLOCK_SIZE; ++ ++ snprintf(name, sizeof(name), "%s_MAX_LOG2_CONTIG_BSIZE", component); ++ env = getenv(name); ++ if (env) { ++ value = atoi(env); ++ if (value <= XSC_MAX_LOG2_CONTIG_BLOCK_SIZE && ++ value >= XSC_MIN_LOG2_CONTIG_BLOCK_SIZE) ++ *max_block_log = value; ++ else ++ fprintf(stderr, "Invalid value %d for %s\n", ++ value, name); ++ } ++ sprintf(name, "%s_MIN_LOG2_CONTIG_BSIZE", component); ++ env = getenv(name); ++ if (env) { ++ value = atoi(env); ++ if (value >= XSC_MIN_LOG2_CONTIG_BLOCK_SIZE && ++ value <= *max_block_log) ++ *min_block_log = value; ++ else ++ fprintf(stderr, "Invalid value %d for %s\n", ++ value, name); ++ } ++} ++ ++int xsc_alloc_buf_contig(struct xsc_context *xctx, ++ struct xsc_buf *buf, size_t size, ++ int page_size, ++ const char *component) ++{ ++ void *addr = MAP_FAILED; ++ int block_size_exp; ++ int max_block_log; ++ int min_block_log; ++ struct ibv_context *context = &xctx->ibv_ctx.context; ++ off_t offset; ++ ++ xsc_alloc_get_env_info(&max_block_log, ++ &min_block_log, ++ component); ++ ++ block_size_exp = xsc_get_block_order(size); ++ ++ if (block_size_exp > max_block_log) ++ block_size_exp = max_block_log; ++ ++ do { ++ offset = 0; ++ set_command(XSC_IB_MMAP_GET_CONTIGUOUS_PAGES, &offset); ++ set_order(block_size_exp, &offset); ++ addr = mmap(NULL , size, PROT_WRITE | PROT_READ, MAP_SHARED, ++ context->cmd_fd, page_size * offset); ++ if (addr != MAP_FAILED) ++ break; ++ ++ /* ++ * The kernel returns EINVAL if not supported ++ */ ++ if (errno == EINVAL) ++ return -1; ++ ++ block_size_exp -= 1; ++ } while (block_size_exp >= min_block_log); ++ xsc_dbg(xctx->dbg_fp, XSC_DBG_CONTIG, "block order %d, addr %p\n", block_size_exp, addr); ++ ++ if (addr == MAP_FAILED) ++ return -1; ++ ++ if (ibv_dontfork_range(addr, size)) { ++ munmap(addr, size); ++ return -1; ++ } ++ ++ buf->buf = addr; ++ buf->length = size; ++ buf->type = XSC_ALLOC_TYPE_CONTIG; ++ ++ return 0; ++} ++ ++void xsc_free_buf_contig(struct xsc_context *xctx, struct xsc_buf *buf) ++{ ++ ibv_dofork_range(buf->buf, buf->length); ++ munmap(buf->buf, buf->length); ++} ++ ++int xsc_alloc_buf(struct xsc_buf *buf, size_t size, int page_size) ++{ ++ int ret; ++ int al_size; ++ ++ al_size = align(size, page_size); ++ ret = posix_memalign(&buf->buf, page_size, al_size); ++ if (ret) ++ return ret; ++ ++ ret = ibv_dontfork_range(buf->buf, al_size); ++ if (ret) ++ free(buf->buf); ++ ++ if (!ret) { ++ buf->length = al_size; ++ buf->type = XSC_ALLOC_TYPE_ANON; ++ } ++ ++ return ret; ++} ++ ++void xsc_free_buf(struct xsc_buf *buf) ++{ ++ ibv_dofork_range(buf->buf, buf->length); ++ free(buf->buf); ++} +diff --git a/providers/xscale/cq.c b/providers/xscale/cq.c +new file mode 100644 +index 0000000..e2619f0 +--- /dev/null ++++ b/providers/xscale/cq.c +@@ -0,0 +1,1410 @@ ++/* ++ * Copyright (c) 2021 - 2022, Shanghai Yunsilicon Technology Co., Ltd. ++ * All rights reserved. ++ */ ++ ++#include ++ ++#include ++#include ++#include ++#include ++#include ++#include ++ ++#include ++#include ++#include ++ ++#include "xscale.h" ++#include "wqe.h" ++#include "xsc_hsi.h" ++ ++enum { ++ CQ_OK = 0, ++ CQ_EMPTY = -1, ++ CQ_POLL_ERR = -2 ++}; ++ ++enum { ++ XSC_CQE_APP_TAG_MATCHING = 1, ++}; ++ ++enum { ++ XSC_CQE_APP_OP_TM_CONSUMED = 0x1, ++ XSC_CQE_APP_OP_TM_EXPECTED = 0x2, ++ XSC_CQE_APP_OP_TM_UNEXPECTED = 0x3, ++ XSC_CQE_APP_OP_TM_NO_TAG = 0x4, ++ XSC_CQE_APP_OP_TM_APPEND = 0x5, ++ XSC_CQE_APP_OP_TM_REMOVE = 0x6, ++ XSC_CQE_APP_OP_TM_NOOP = 0x7, ++ XSC_CQE_APP_OP_TM_CONSUMED_SW_RDNV = 0x9, ++ XSC_CQE_APP_OP_TM_CONSUMED_MSG = 0xA, ++ XSC_CQE_APP_OP_TM_CONSUMED_MSG_SW_RDNV = 0xB, ++ XSC_CQE_APP_OP_TM_MSG_COMPLETION_CANCELED = 0xC, ++}; ++ ++static const uint32_t xsc_msg_opcode[][2][2] = { ++ [XSC_MSG_OPCODE_SEND][XSC_REQ][XSC_WITHOUT_IMMDT] = XSC_OPCODE_RDMA_REQ_SEND, ++ [XSC_MSG_OPCODE_SEND][XSC_REQ][XSC_WITH_IMMDT] = XSC_OPCODE_RDMA_REQ_SEND_IMMDT, ++ [XSC_MSG_OPCODE_SEND][XSC_RSP][XSC_WITHOUT_IMMDT] = XSC_OPCODE_RDMA_RSP_RECV, ++ [XSC_MSG_OPCODE_SEND][XSC_RSP][XSC_WITH_IMMDT] = XSC_OPCODE_RDMA_RSP_RECV_IMMDT, ++ [XSC_MSG_OPCODE_RDMA_WRITE][XSC_REQ][XSC_WITHOUT_IMMDT] = XSC_OPCODE_RDMA_REQ_WRITE, ++ [XSC_MSG_OPCODE_RDMA_WRITE][XSC_REQ][XSC_WITH_IMMDT] = XSC_OPCODE_RDMA_REQ_WRITE_IMMDT, ++ [XSC_MSG_OPCODE_RDMA_WRITE][XSC_RSP][XSC_WITHOUT_IMMDT] = XSC_OPCODE_RDMA_CQE_ERROR, ++ [XSC_MSG_OPCODE_RDMA_WRITE][XSC_RSP][XSC_WITH_IMMDT] = XSC_OPCODE_RDMA_RSP_WRITE_IMMDT, ++ [XSC_MSG_OPCODE_RDMA_READ][XSC_REQ][XSC_WITHOUT_IMMDT] = XSC_OPCODE_RDMA_REQ_READ, ++ [XSC_MSG_OPCODE_RDMA_READ][XSC_REQ][XSC_WITH_IMMDT] = XSC_OPCODE_RDMA_CQE_ERROR, ++ [XSC_MSG_OPCODE_RDMA_READ][XSC_RSP][XSC_WITHOUT_IMMDT] = XSC_OPCODE_RDMA_CQE_ERROR, ++ [XSC_MSG_OPCODE_RDMA_READ][XSC_RSP][XSC_WITH_IMMDT] = XSC_OPCODE_RDMA_CQE_ERROR, ++}; ++ ++static const uint32_t xsc_cqe_opcode[] = { ++ [XSC_OPCODE_RDMA_REQ_SEND] = IBV_WC_SEND, ++ [XSC_OPCODE_RDMA_REQ_SEND_IMMDT] = IBV_WC_SEND, ++ [XSC_OPCODE_RDMA_RSP_RECV] = IBV_WC_RECV, ++ [XSC_OPCODE_RDMA_RSP_RECV_IMMDT] = IBV_WC_RECV, ++ [XSC_OPCODE_RDMA_REQ_WRITE] = IBV_WC_RDMA_WRITE, ++ [XSC_OPCODE_RDMA_REQ_WRITE_IMMDT] = IBV_WC_RDMA_WRITE, ++ [XSC_OPCODE_RDMA_RSP_WRITE_IMMDT] = IBV_WC_RECV_RDMA_WITH_IMM, ++ [XSC_OPCODE_RDMA_REQ_READ] = IBV_WC_RDMA_READ, ++}; ++ ++int xsc_stall_num_loop = 60; ++int xsc_stall_cq_poll_min = 60; ++int xsc_stall_cq_poll_max = 100000; ++int xsc_stall_cq_inc_step = 100; ++int xsc_stall_cq_dec_step = 10; ++ ++static inline uint8_t xsc_get_cqe_opcode(struct xsc_cqe *cqe) ALWAYS_INLINE; ++static inline uint8_t xsc_get_cqe_opcode(struct xsc_cqe *cqe) ++{ ++ if (cqe->is_error) ++ return cqe->type ? XSC_OPCODE_RDMA_RSP_ERROR : XSC_OPCODE_RDMA_REQ_ERROR; ++ if (cqe->msg_opcode > XSC_MSG_OPCODE_RDMA_READ) { ++ printf("rdma cqe msg code should be send/write/read\n"); ++ return XSC_OPCODE_RDMA_CQE_ERROR; ++ } ++ return xsc_msg_opcode[cqe->msg_opcode][cqe->type][cqe->with_immdt]; ++} ++ ++static inline uint8_t get_cqe_l3_hdr_type(struct xsc_cqe64 *cqe) ++{ ++ return (cqe->l4_hdr_type_etc >> 2) & 0x3; ++} ++ ++static void *get_cqe(struct xsc_cq *cq, int n) ++{ ++ return cq->active_buf->buf + n * cq->cqe_sz; ++} ++ ++static void *get_sw_cqe(struct xsc_cq *cq, int n) ++{ ++ int cid = n & (cq->verbs_cq.cq_ex.cqe - 1); ++ struct xsc_cqe *cqe = get_cqe(cq, cid); ++ if (likely(xsc_get_cqe_sw_own(cqe, n, cq->log2_cq_ring_sz))) ++ return cqe; ++ else ++ return NULL; ++} ++ ++static void *next_cqe_sw(struct xsc_cq *cq) ++{ ++ return get_sw_cqe(cq, cq->cons_index); ++} ++ ++static void update_cons_index(struct xsc_cq *cq) ++{ ++ union xsc_db_data db; ++ ++ db.raw_data = cq->cons_index; ++ db.cqn = cq->cqn; ++ WR_REG(cq->db, db.raw_data); ++} ++ ++static inline void handle_good_req( ++ struct ibv_wc *wc, struct xsc_cqe *cqe, struct xsc_qp *qp, struct xsc_wq *wq, uint8_t opcode) ++{ ++ int idx; ++ struct xsc_send_wqe_ctrl_seg *ctrl; ++ ++ wc->opcode = xsc_cqe_opcode[opcode]; ++ wc->status = IBV_WC_SUCCESS; ++ idx = RD_LE_16(cqe->wqe_id); ++ idx >>= (qp->sq.wqe_shift - XSC_BASE_WQE_SHIFT); ++ idx &= (wq->wqe_cnt -1); ++ wc->wr_id = wq->wrid[idx]; ++ wq->tail = wq->wqe_head[idx] + 1; ++ if (opcode == XSC_OPCODE_RDMA_REQ_READ) { ++ ctrl = xsc_get_send_wqe(qp, idx); ++ wc->byte_len = ctrl->msg_len; ++ } ++ wq->flush_wqe_cnt--; ++ ++ xsc_dbg(to_xctx(qp->ibv_qp->context)->dbg_fp, XSC_DBG_CQ_CQE, ++ "wqeid:%u, wq tail:%u\n", idx, wq->tail); ++} ++ ++/* Returns IBV_WC_IP_CSUM_OK or 0 */ ++static inline int get_csum_ok(struct xsc_cqe64 *cqe) ++{ ++ return (((cqe->hds_ip_ext & (XSC_CQE_L4_OK | XSC_CQE_L3_OK)) == ++ (XSC_CQE_L4_OK | XSC_CQE_L3_OK)) & ++ (get_cqe_l3_hdr_type(cqe) == XSC_CQE_L3_HDR_TYPE_IPV4)) ++ << IBV_WC_IP_CSUM_OK_SHIFT; ++} ++ ++static inline void handle_good_responder( ++ struct ibv_wc *wc, struct xsc_cqe *cqe, struct xsc_wq *wq, uint8_t opcode) ++{ ++ uint16_t idx; ++ struct xsc_qp *qp = container_of(wq, struct xsc_qp, rq); ++ ++ wc->byte_len = RD_LE_32(cqe->msg_len); ++ wc->opcode = xsc_cqe_opcode[opcode]; ++ wc->status = IBV_WC_SUCCESS; ++ ++ idx = wq->tail & (wq->wqe_cnt - 1); ++ wc->wr_id = wq->wrid[idx]; ++ ++wq->tail; ++ wq->flush_wqe_cnt--; ++ ++ xsc_dbg(to_xctx(qp->ibv_qp->context)->dbg_fp, XSC_DBG_CQ_CQE, ++ "recv cqe idx:%u, len:%u\n", idx, wc->byte_len); ++} ++ ++static void dump_cqe(void *buf) ++{ ++ __le32 *p = buf; ++ int i; ++ ++ for (i = 0; i < 8; i += 4) ++ printf("0x%08x 0x%08x 0x%08x 0x%08x\n", p[i], p[i+1], p[i+2], p[i+3]); ++} ++ ++static enum ibv_wc_status xsc_cqe_error_code(struct xsc_cqe *cqe) ++{ ++ switch (cqe->error_code) { ++ case XSC_ERR_CODE_NAK_RETRY: ++ return IBV_WC_RETRY_EXC_ERR; ++ case XSC_ERR_CODE_NAK_OPCODE: ++ return IBV_WC_BAD_RESP_ERR; ++ case XSC_ERR_CODE_NAK_MR: ++ return IBV_WC_REM_ACCESS_ERR; ++ case XSC_ERR_CODE_NAK_OPERATION: ++ return IBV_WC_REM_OP_ERR; ++ case XSC_ERR_CODE_NAK_RNR: ++ return IBV_WC_RNR_RETRY_EXC_ERR; ++ case XSC_ERR_CODE_LOCAL_MR: ++ return IBV_WC_LOC_PROT_ERR; ++ case XSC_ERR_CODE_LOCAL_LEN: ++ return IBV_WC_LOC_LEN_ERR; ++ case XSC_ERR_CODE_LEN_GEN_CQE: ++ return IBV_WC_LOC_LEN_ERR; ++ case XSC_ERR_CODE_OPERATION: ++ return IBV_WC_LOC_ACCESS_ERR; ++ case XSC_ERR_CODE_FLUSH: ++ return IBV_WC_WR_FLUSH_ERR; ++ case XSC_ERR_CODE_MALF_WQE_HOST: ++ case XSC_ERR_CODE_STRG_ACC_GEN_CQE: ++ return IBV_WC_FATAL_ERR; ++ case XSC_ERR_CODE_OPCODE_GEN_CQE: ++ case XSC_ERR_CODE_LOCAL_OPCODE: ++ default: ++ return IBV_WC_GENERAL_ERR; ++ } ++} ++ ++ ++static inline bool xsc_qp_need_cqe(struct xsc_qp *qp, int *type, int *wqe_id) ++{ ++ struct xsc_wq *wq; ++ struct xsc_send_wqe_ctrl_seg *ctrl; ++ int idx = 0; ++ ++ /* check recv queue work request */ ++ wq = &qp->rq; ++ if (wq->head - wq->tail > 0) { ++ *type = 1; ++ return true; ++ } ++ /* check send queue work request */ ++ wq = &qp->sq; ++ while (wq->head - wq->tail > 0) { ++ idx = wq->tail & (wq->wqe_cnt - 1); ++ ++wq->tail; ++ ctrl = xsc_get_send_wqe(qp, idx); ++ if (ctrl->ce) { ++ *type = 0; ++ *wqe_id = idx << (qp->sq.wqe_shift - XSC_BASE_WQE_SHIFT); ++ return true; ++ } ++ } ++ return false; ++} ++ ++static inline void handle_bad_req( ++ struct xsc_context *xctx, ++ struct ibv_wc *wc, struct xsc_cqe *cqe, struct xsc_qp *qp, struct xsc_wq *wq) ++{ ++ int idx; ++ wc->status = xsc_cqe_error_code(cqe); ++ wc->vendor_err = cqe->error_code; ++ idx = RD_LE_16(cqe->wqe_id); ++ idx >>= (qp->sq.wqe_shift - XSC_BASE_WQE_SHIFT); ++ idx &= (wq->wqe_cnt -1); ++ wq->tail = wq->wqe_head[idx] + 1; ++ wc->wr_id = wq->wrid[idx]; ++ wq->flush_wqe_cnt--; ++ ++ if (cqe->error_code != XSC_ERR_CODE_FLUSH) { ++ printf("%s: got completion with error:\n", xctx->hostname); ++ dump_cqe(cqe); ++ } ++} ++ ++static inline void handle_bad_responder( ++ struct xsc_context *xctx, ++ struct ibv_wc *wc, struct xsc_cqe *cqe, struct xsc_wq *wq) ++{ ++ wc->status = xsc_cqe_error_code(cqe); ++ wc->vendor_err = cqe->error_code; ++ ++ ++wq->tail; ++ wq->flush_wqe_cnt--; ++ ++ if (cqe->error_code != XSC_ERR_CODE_FLUSH) { ++ printf("%s: got completion with error:\n", xctx->hostname); ++ dump_cqe(cqe); ++ } ++} ++ ++#if defined(__x86_64__) || defined (__i386__) ++static inline unsigned long get_cycles(void) ++{ ++ uint32_t low, high; ++ uint64_t val; ++ asm volatile ("rdtsc" : "=a" (low), "=d" (high)); ++ val = high; ++ val = (val << 32) | low; ++ return val; ++} ++ ++static void xsc_stall_poll_cq(void) ++{ ++ int i; ++ ++ for (i = 0; i < xsc_stall_num_loop; i++) ++ (void)get_cycles(); ++} ++static void xsc_stall_cycles_poll_cq(uint64_t cycles) ++{ ++ while (get_cycles() < cycles) ++ ; /* Nothing */ ++} ++static void xsc_get_cycles(uint64_t *cycles) ++{ ++ *cycles = get_cycles(); ++} ++#else ++static void xsc_stall_poll_cq(void) ++{ ++} ++static void xsc_stall_cycles_poll_cq(uint64_t cycles) ++{ ++} ++static void xsc_get_cycles(uint64_t *cycles) ++{ ++} ++#endif ++ ++static inline int get_qp_ctx(struct xsc_context *xctx, ++ struct xsc_resource **cur_rsc, ++ uint32_t qpn) ++ ALWAYS_INLINE; ++static inline int get_qp_ctx(struct xsc_context *xctx, ++ struct xsc_resource **cur_rsc, ++ uint32_t qpn) ++{ ++ if (!*cur_rsc || (qpn != (*cur_rsc)->rsn)) { ++ /* ++ * We do not have to take the QP table lock here, ++ * because CQs will be locked while QPs are removed ++ * from the table. ++ */ ++ *cur_rsc = (struct xsc_resource *)xsc_find_qp(xctx, qpn); ++ if (unlikely(!*cur_rsc)) ++ return CQ_POLL_ERR; ++ } ++ ++ return CQ_OK; ++} ++ ++static inline int xsc_get_next_cqe(struct xsc_cq *cq, ++ struct xsc_cqe64 **pcqe64, ++ void **pcqe) ++ ALWAYS_INLINE; ++static inline int xsc_get_next_cqe(struct xsc_cq *cq, ++ struct xsc_cqe64 **pcqe64, ++ void **pcqe) ++{ ++ void *cqe = next_cqe_sw(cq); ++ if (!cqe) ++ return CQ_EMPTY; ++ ++ ++cq->cons_index; ++ ++ /* ++ * Make sure we read CQ entry contents after we've checked the ++ * ownership bit. ++ */ ++ udma_from_device_barrier(); ++ ++ *pcqe = cqe; ++ ++ return CQ_OK; ++} ++ ++static inline int xsc_parse_cqe(struct xsc_cq *cq, ++ struct xsc_cqe *cqe, ++ struct xsc_resource **cur_rsc, ++ struct ibv_wc *wc, ++ int lazy) ++{ ++ struct xsc_wq *wq; ++ uint32_t qp_id; ++ uint8_t opcode; ++ int err = 0; ++ struct xsc_qp *xqp = NULL; ++ struct xsc_context *xctx; ++ ++ xctx = to_xctx(ibv_cq_ex_to_cq(&cq->verbs_cq.cq_ex)->context); ++ qp_id = cqe->qp_id; ++ qp_id = RD_LE_16(qp_id); ++ wc->wc_flags = 0; ++ wc->qp_num = qp_id; ++ opcode = xsc_get_cqe_opcode(cqe); ++ ++ xsc_dbg(xctx->dbg_fp, XSC_DBG_CQ_CQE, "opcode:0x%x qp_num:%u\n", opcode, qp_id); ++ switch (opcode) { ++ case XSC_OPCODE_RDMA_REQ_SEND_IMMDT: ++ case XSC_OPCODE_RDMA_REQ_WRITE_IMMDT: ++ wc->wc_flags |= IBV_WC_WITH_IMM; ++ SWITCH_FALLTHROUGH; ++ case XSC_OPCODE_RDMA_REQ_SEND: ++ case XSC_OPCODE_RDMA_REQ_WRITE: ++ case XSC_OPCODE_RDMA_REQ_READ: ++ err = get_qp_ctx(xctx, cur_rsc, qp_id); ++ if (unlikely(err)) ++ return CQ_EMPTY; ++ xqp = rsc_to_xqp(*cur_rsc); ++ wq = &xqp->sq; ++ handle_good_req(wc, cqe, xqp, wq, opcode); ++ break; ++ case XSC_OPCODE_RDMA_RSP_RECV_IMMDT: ++ case XSC_OPCODE_RDMA_RSP_WRITE_IMMDT: ++ wc->wc_flags |= IBV_WC_WITH_IMM; ++ wc->imm_data = cqe->imm_data; ++ SWITCH_FALLTHROUGH; ++ case XSC_OPCODE_RDMA_RSP_RECV: ++ err = get_qp_ctx(xctx, cur_rsc, qp_id); ++ if (unlikely(err)) ++ return CQ_EMPTY; ++ xqp = rsc_to_xqp(*cur_rsc); ++ wq = &xqp->rq; ++ handle_good_responder(wc, cqe, wq, opcode); ++ break; ++ case XSC_OPCODE_RDMA_REQ_ERROR: ++ err = get_qp_ctx(xctx, cur_rsc, qp_id); ++ if (unlikely(err)) ++ return CQ_POLL_ERR; ++ xqp = rsc_to_xqp(*cur_rsc); ++ wq = &xqp->sq; ++ handle_bad_req(xctx, wc, cqe, xqp, wq); ++ break; ++ case XSC_OPCODE_RDMA_RSP_ERROR: ++ err = get_qp_ctx(xctx, cur_rsc, qp_id); ++ if (unlikely(err)) ++ return CQ_POLL_ERR; ++ xqp = rsc_to_xqp(*cur_rsc); ++ wq = &xqp->rq; ++ handle_bad_responder(xctx, wc, cqe, wq); ++ break; ++ case XSC_OPCODE_RDMA_CQE_ERROR: ++ printf("%s: got completion with cqe format error:\n", xctx->hostname); ++ dump_cqe(cqe); ++ SWITCH_FALLTHROUGH; ++ default: ++ return CQ_POLL_ERR; ++ } ++ return CQ_OK; ++} ++ ++static inline int xsc_parse_lazy_cqe(struct xsc_cq *cq, ++ struct xsc_cqe64 *cqe64, ++ void *cqe, int cqe_ver) ++ ALWAYS_INLINE; ++static inline int xsc_parse_lazy_cqe(struct xsc_cq *cq, ++ struct xsc_cqe64 *cqe64, ++ void *cqe, int cqe_ver) ++{ ++ return xsc_parse_cqe(cq, cqe, &cq->cur_rsc, NULL, 1); ++} ++ ++static inline int xsc_poll_one(struct xsc_cq *cq, ++ struct xsc_resource **cur_rsc, ++ struct ibv_wc *wc) ++ ALWAYS_INLINE; ++static inline int xsc_poll_one(struct xsc_cq *cq, ++ struct xsc_resource **cur_rsc, ++ struct ibv_wc *wc) ++{ ++ struct xsc_cqe *cqe = get_sw_cqe(cq, cq->cons_index); ++ if (cqe == NULL) { ++ return CQ_EMPTY; ++ } ++ memset(wc, 0, sizeof(*wc)); ++ ++ ++cq->cons_index; ++ ++ /* ++ * Make sure we read CQ entry contents after we've checked the ++ * ownership bit. ++ */ ++ udma_from_device_barrier(); ++ return xsc_parse_cqe(cq, cqe, cur_rsc, wc, 0); ++} ++ ++static inline void gen_flush_err_cqe(struct xsc_err_state_qp_node *err_node, ++ uint32_t qp_id, struct xsc_wq *wq, uint32_t idx, ++ struct ibv_wc *wc) ++{ ++ memset(wc, 0, sizeof(*wc)); ++ if (err_node->is_sq) { ++ switch (wq->wr_opcode[idx]){ ++ case IBV_WR_SEND: ++ case IBV_WR_SEND_WITH_IMM: ++ case IBV_WR_SEND_WITH_INV: ++ wc->opcode = IBV_WC_SEND; ++ break; ++ case IBV_WR_RDMA_WRITE: ++ case IBV_WR_RDMA_WRITE_WITH_IMM: ++ wc->opcode = IBV_WC_RDMA_WRITE; ++ break; ++ case IBV_WR_RDMA_READ: ++ wc->opcode = IBV_WC_RDMA_READ; ++ } ++ } else { ++ wc->opcode = IBV_WC_RECV; ++ } ++ ++ wc->qp_num = qp_id; ++ wc->status = IBV_WC_WR_FLUSH_ERR; ++ wc->vendor_err = XSC_ERR_CODE_FLUSH; ++ wc->wr_id = wq->wrid[idx]; ++ wq->tail++; ++ wq->flush_wqe_cnt--; ++} ++ ++static inline int xsc_generate_flush_err_cqe(struct ibv_cq *ibcq, ++ int ne, int *npolled, struct ibv_wc *wc) ++{ ++ uint32_t qp_id = 0; ++ uint32_t flush_wqe_cnt = 0; ++ int sw_npolled = 0; ++ int ret = 0; ++ uint32_t idx = 0; ++ struct xsc_err_state_qp_node *err_qp_node, *tmp; ++ struct xsc_resource *res = NULL; ++ struct xsc_context *xctx = to_xctx(ibcq->context); ++ struct xsc_cq *cq = to_xcq(ibcq); ++ struct xsc_wq *wq; ++ ++ list_for_each_safe(&cq->err_state_qp_list, err_qp_node, tmp, entry) { ++ if (!err_qp_node) ++ break; ++ ++ sw_npolled = 0; ++ qp_id = err_qp_node->qp_id; ++ ret = get_qp_ctx(xctx, &res, qp_id); ++ if (unlikely(ret)) ++ continue; ++ wq = err_qp_node->is_sq ? &(rsc_to_xqp(res)->sq):&(rsc_to_xqp(res)->rq); ++ flush_wqe_cnt = wq->flush_wqe_cnt; ++ xsc_dbg(xctx->dbg_fp, XSC_DBG_CQ_CQE, "is_sq %d, flush_wq_cnt %d, ne %d, npolled %d, qp_id %d\n", ++ err_qp_node->is_sq, wq->flush_wqe_cnt, ne, *npolled, qp_id); ++ ++ if (flush_wqe_cnt <= (ne - *npolled)) { ++ while (sw_npolled < flush_wqe_cnt) { ++ idx = wq->tail & (wq->wqe_cnt - 1); ++ if (err_qp_node->is_sq && !wq->need_flush[idx]) { ++ wq->tail++; ++ continue; ++ } else { ++ gen_flush_err_cqe(err_qp_node, err_qp_node->qp_id, wq, ++ idx, wc + *npolled + sw_npolled); ++ ++sw_npolled; ++ } ++ } ++ list_del(&err_qp_node->entry); ++ free(err_qp_node); ++ *npolled += sw_npolled; ++ } else { ++ while (sw_npolled < (ne - *npolled)) { ++ idx = wq->tail & (wq->wqe_cnt - 1); ++ if (err_qp_node->is_sq && !wq->need_flush[idx]) { ++ wq->tail++; ++ continue; ++ } else { ++ gen_flush_err_cqe(err_qp_node, err_qp_node->qp_id, wq, ++ idx, wc + *npolled + sw_npolled); ++ ++sw_npolled; ++ } ++ } ++ *npolled = ne; ++ break; ++ } ++ } ++ ++ return 0; ++} ++ ++static inline int poll_cq(struct ibv_cq *ibcq, int ne, struct ibv_wc *wc) ALWAYS_INLINE; ++static inline int poll_cq(struct ibv_cq *ibcq, int ne, struct ibv_wc *wc) ++{ ++ struct xsc_cq *cq = to_xcq(ibcq); ++ struct xsc_resource *rsc = NULL; ++ int npolled = 0; ++ int err = CQ_OK; ++ uint32_t next_cid = cq->cons_index; ++ ++ xsc_spin_lock(&cq->lock); ++ for (npolled = 0; npolled < ne; ++npolled) { ++ err = xsc_poll_one(cq, &rsc, wc + npolled); ++ if (err != CQ_OK) ++ break; ++ } ++ ++ if (err == CQ_EMPTY) { ++ if (npolled < ne && !(list_empty(&cq->err_state_qp_list))) { ++ xsc_generate_flush_err_cqe(ibcq, ne, &npolled, wc); ++ } ++ } ++ ++ udma_to_device_barrier(); ++ if (next_cid != cq->cons_index) ++ update_cons_index(cq); ++ xsc_spin_unlock(&cq->lock); ++ ++ return err == CQ_POLL_ERR ? err : npolled; ++} ++ ++enum polling_mode { ++ POLLING_MODE_NO_STALL, ++ POLLING_MODE_STALL, ++ POLLING_MODE_STALL_ADAPTIVE ++}; ++ ++static inline void _xsc_end_poll(struct ibv_cq_ex *ibcq, ++ int lock, enum polling_mode stall) ++ ALWAYS_INLINE; ++static inline void _xsc_end_poll(struct ibv_cq_ex *ibcq, ++ int lock, enum polling_mode stall) ++{ ++ struct xsc_cq *cq = to_xcq(ibv_cq_ex_to_cq(ibcq)); ++ ++ update_cons_index(cq); ++ ++ if (lock) ++ xsc_spin_unlock(&cq->lock); ++ ++ if (stall) { ++ if (stall == POLLING_MODE_STALL_ADAPTIVE) { ++ if (!(cq->flags & XSC_CQ_FLAGS_FOUND_CQES)) { ++ cq->stall_cycles = max(cq->stall_cycles - xsc_stall_cq_dec_step, ++ xsc_stall_cq_poll_min); ++ xsc_get_cycles(&cq->stall_last_count); ++ } else if (cq->flags & XSC_CQ_FLAGS_EMPTY_DURING_POLL) { ++ cq->stall_cycles = min(cq->stall_cycles + xsc_stall_cq_inc_step, ++ xsc_stall_cq_poll_max); ++ xsc_get_cycles(&cq->stall_last_count); ++ } else { ++ cq->stall_cycles = max(cq->stall_cycles - xsc_stall_cq_dec_step, ++ xsc_stall_cq_poll_min); ++ cq->stall_last_count = 0; ++ } ++ } else if (!(cq->flags & XSC_CQ_FLAGS_FOUND_CQES)) { ++ cq->stall_next_poll = 1; ++ } ++ ++ cq->flags &= ~(XSC_CQ_FLAGS_FOUND_CQES | XSC_CQ_FLAGS_EMPTY_DURING_POLL); ++ } ++} ++ ++static inline int xsc_start_poll(struct ibv_cq_ex *ibcq, struct ibv_poll_cq_attr *attr, ++ int lock, enum polling_mode stall, ++ int cqe_version, int clock_update) ++ ALWAYS_INLINE; ++static inline int xsc_start_poll(struct ibv_cq_ex *ibcq, struct ibv_poll_cq_attr *attr, ++ int lock, enum polling_mode stall, ++ int cqe_version, int clock_update) ++{ ++ struct xsc_cq *cq = to_xcq(ibv_cq_ex_to_cq(ibcq)); ++ struct xsc_cqe64 *cqe64; ++ void *cqe; ++ int err; ++ ++ if (unlikely(attr->comp_mask)) ++ return EINVAL; ++ ++ if (stall) { ++ if (stall == POLLING_MODE_STALL_ADAPTIVE) { ++ if (cq->stall_last_count) ++ xsc_stall_cycles_poll_cq(cq->stall_last_count + cq->stall_cycles); ++ } else if (cq->stall_next_poll) { ++ cq->stall_next_poll = 0; ++ xsc_stall_poll_cq(); ++ } ++ } ++ ++ if (lock) ++ xsc_spin_lock(&cq->lock); ++ ++ cq->cur_rsc = NULL; ++ ++ err = xsc_get_next_cqe(cq, &cqe64, &cqe); ++ if (err == CQ_EMPTY) { ++ if (lock) ++ xsc_spin_unlock(&cq->lock); ++ ++ if (stall) { ++ if (stall == POLLING_MODE_STALL_ADAPTIVE) { ++ cq->stall_cycles = max(cq->stall_cycles - xsc_stall_cq_dec_step, ++ xsc_stall_cq_poll_min); ++ xsc_get_cycles(&cq->stall_last_count); ++ } else { ++ cq->stall_next_poll = 1; ++ } ++ } ++ ++ return ENOENT; ++ } ++ ++ if (stall) ++ cq->flags |= XSC_CQ_FLAGS_FOUND_CQES; ++ ++ err = xsc_parse_lazy_cqe(cq, cqe64, cqe, cqe_version); ++ if (lock && err) ++ xsc_spin_unlock(&cq->lock); ++ ++ if (stall && err) { ++ if (stall == POLLING_MODE_STALL_ADAPTIVE) { ++ cq->stall_cycles = max(cq->stall_cycles - xsc_stall_cq_dec_step, ++ xsc_stall_cq_poll_min); ++ cq->stall_last_count = 0; ++ } ++ ++ cq->flags &= ~(XSC_CQ_FLAGS_FOUND_CQES); ++ ++ goto out; ++ } ++ ++ if (clock_update && !err) ++ err = xscdv_get_clock_info(ibcq->context, &cq->last_clock_info); ++ ++out: ++ return err; ++} ++ ++static inline int xsc_next_poll(struct ibv_cq_ex *ibcq, ++ enum polling_mode stall, int cqe_version) ++ ALWAYS_INLINE; ++static inline int xsc_next_poll(struct ibv_cq_ex *ibcq, ++ enum polling_mode stall, ++ int cqe_version) ++{ ++ struct xsc_cq *cq = to_xcq(ibv_cq_ex_to_cq(ibcq)); ++ struct xsc_cqe64 *cqe64; ++ void *cqe; ++ int err; ++ ++ err = xsc_get_next_cqe(cq, &cqe64, &cqe); ++ if (err == CQ_EMPTY) { ++ if (stall == POLLING_MODE_STALL_ADAPTIVE) ++ cq->flags |= XSC_CQ_FLAGS_EMPTY_DURING_POLL; ++ ++ return ENOENT; ++ } ++ ++ return xsc_parse_lazy_cqe(cq, cqe64, cqe, cqe_version); ++} ++ ++static inline int xsc_next_poll_adaptive_v0(struct ibv_cq_ex *ibcq) ++{ ++ return xsc_next_poll(ibcq, POLLING_MODE_STALL_ADAPTIVE, 0); ++} ++ ++static inline int xsc_next_poll_adaptive_v1(struct ibv_cq_ex *ibcq) ++{ ++ return xsc_next_poll(ibcq, POLLING_MODE_STALL_ADAPTIVE, 1); ++} ++ ++static inline int xsc_next_poll_v0(struct ibv_cq_ex *ibcq) ++{ ++ return xsc_next_poll(ibcq, 0, 0); ++} ++ ++static inline int xsc_next_poll_v1(struct ibv_cq_ex *ibcq) ++{ ++ return xsc_next_poll(ibcq, 0, 1); ++} ++ ++static inline int xsc_start_poll_v0(struct ibv_cq_ex *ibcq, ++ struct ibv_poll_cq_attr *attr) ++{ ++ return xsc_start_poll(ibcq, attr, 0, 0, 0, 0); ++} ++ ++static inline int xsc_start_poll_v1(struct ibv_cq_ex *ibcq, ++ struct ibv_poll_cq_attr *attr) ++{ ++ return xsc_start_poll(ibcq, attr, 0, 0, 1, 0); ++} ++ ++static inline int xsc_start_poll_v0_lock(struct ibv_cq_ex *ibcq, ++ struct ibv_poll_cq_attr *attr) ++{ ++ return xsc_start_poll(ibcq, attr, 1, 0, 0, 0); ++} ++ ++static inline int xsc_start_poll_v1_lock(struct ibv_cq_ex *ibcq, ++ struct ibv_poll_cq_attr *attr) ++{ ++ return xsc_start_poll(ibcq, attr, 1, 0, 1, 0); ++} ++ ++static inline int xsc_start_poll_adaptive_stall_v0_lock(struct ibv_cq_ex *ibcq, ++ struct ibv_poll_cq_attr *attr) ++{ ++ return xsc_start_poll(ibcq, attr, 1, POLLING_MODE_STALL_ADAPTIVE, 0, 0); ++} ++ ++static inline int xsc_start_poll_stall_v0_lock(struct ibv_cq_ex *ibcq, ++ struct ibv_poll_cq_attr *attr) ++{ ++ return xsc_start_poll(ibcq, attr, 1, POLLING_MODE_STALL, 0, 0); ++} ++ ++static inline int xsc_start_poll_adaptive_stall_v1_lock(struct ibv_cq_ex *ibcq, ++ struct ibv_poll_cq_attr *attr) ++{ ++ return xsc_start_poll(ibcq, attr, 1, POLLING_MODE_STALL_ADAPTIVE, 1, 0); ++} ++ ++static inline int xsc_start_poll_stall_v1_lock(struct ibv_cq_ex *ibcq, ++ struct ibv_poll_cq_attr *attr) ++{ ++ return xsc_start_poll(ibcq, attr, 1, POLLING_MODE_STALL, 1, 0); ++} ++ ++static inline int xsc_start_poll_stall_v0(struct ibv_cq_ex *ibcq, ++ struct ibv_poll_cq_attr *attr) ++{ ++ return xsc_start_poll(ibcq, attr, 0, POLLING_MODE_STALL, 0, 0); ++} ++ ++static inline int xsc_start_poll_adaptive_stall_v0(struct ibv_cq_ex *ibcq, ++ struct ibv_poll_cq_attr *attr) ++{ ++ return xsc_start_poll(ibcq, attr, 0, POLLING_MODE_STALL_ADAPTIVE, 0, 0); ++} ++ ++static inline int xsc_start_poll_adaptive_stall_v1(struct ibv_cq_ex *ibcq, ++ struct ibv_poll_cq_attr *attr) ++{ ++ return xsc_start_poll(ibcq, attr, 0, POLLING_MODE_STALL_ADAPTIVE, 1, 0); ++} ++ ++static inline int xsc_start_poll_stall_v1(struct ibv_cq_ex *ibcq, ++ struct ibv_poll_cq_attr *attr) ++{ ++ return xsc_start_poll(ibcq, attr, 0, POLLING_MODE_STALL, 1, 0); ++} ++ ++static inline int xsc_start_poll_v0_lock_clock_update(struct ibv_cq_ex *ibcq, ++ struct ibv_poll_cq_attr *attr) ++{ ++ return xsc_start_poll(ibcq, attr, 1, 0, 0, 1); ++} ++ ++static inline int xsc_start_poll_v1_lock_clock_update(struct ibv_cq_ex *ibcq, ++ struct ibv_poll_cq_attr *attr) ++{ ++ return xsc_start_poll(ibcq, attr, 1, 0, 1, 1); ++} ++ ++static inline int xsc_start_poll_v1_clock_update(struct ibv_cq_ex *ibcq, ++ struct ibv_poll_cq_attr *attr) ++{ ++ return xsc_start_poll(ibcq, attr, 0, 0, 1, 1); ++} ++ ++static inline int xsc_start_poll_v0_clock_update(struct ibv_cq_ex *ibcq, ++ struct ibv_poll_cq_attr *attr) ++{ ++ return xsc_start_poll(ibcq, attr, 0, 0, 0, 1); ++} ++ ++static inline int xsc_start_poll_stall_v1_lock_clock_update(struct ibv_cq_ex *ibcq, ++ struct ibv_poll_cq_attr *attr) ++{ ++ return xsc_start_poll(ibcq, attr, 1, POLLING_MODE_STALL, 1, 1); ++} ++ ++static inline int xsc_start_poll_stall_v0_lock_clock_update(struct ibv_cq_ex *ibcq, ++ struct ibv_poll_cq_attr *attr) ++{ ++ return xsc_start_poll(ibcq, attr, 1, POLLING_MODE_STALL, 0, 1); ++} ++ ++static inline int xsc_start_poll_stall_v1_clock_update(struct ibv_cq_ex *ibcq, ++ struct ibv_poll_cq_attr *attr) ++{ ++ return xsc_start_poll(ibcq, attr, 0, POLLING_MODE_STALL, 1, 1); ++} ++ ++static inline int xsc_start_poll_stall_v0_clock_update(struct ibv_cq_ex *ibcq, ++ struct ibv_poll_cq_attr *attr) ++{ ++ return xsc_start_poll(ibcq, attr, 0, POLLING_MODE_STALL, 0, 1); ++} ++ ++static inline int xsc_start_poll_adaptive_stall_v0_lock_clock_update(struct ibv_cq_ex *ibcq, ++ struct ibv_poll_cq_attr *attr) ++{ ++ return xsc_start_poll(ibcq, attr, 1, POLLING_MODE_STALL_ADAPTIVE, 0, 1); ++} ++ ++static inline int xsc_start_poll_adaptive_stall_v1_lock_clock_update(struct ibv_cq_ex *ibcq, ++ struct ibv_poll_cq_attr *attr) ++{ ++ return xsc_start_poll(ibcq, attr, 1, POLLING_MODE_STALL_ADAPTIVE, 1, 1); ++} ++ ++static inline int xsc_start_poll_adaptive_stall_v0_clock_update(struct ibv_cq_ex *ibcq, ++ struct ibv_poll_cq_attr *attr) ++{ ++ return xsc_start_poll(ibcq, attr, 0, POLLING_MODE_STALL_ADAPTIVE, 0, 1); ++} ++ ++static inline int xsc_start_poll_adaptive_stall_v1_clock_update(struct ibv_cq_ex *ibcq, ++ struct ibv_poll_cq_attr *attr) ++{ ++ return xsc_start_poll(ibcq, attr, 0, POLLING_MODE_STALL_ADAPTIVE, 1, 1); ++} ++ ++static inline void xsc_end_poll_adaptive_stall_lock(struct ibv_cq_ex *ibcq) ++{ ++ _xsc_end_poll(ibcq, 1, POLLING_MODE_STALL_ADAPTIVE); ++} ++ ++static inline void xsc_end_poll_stall_lock(struct ibv_cq_ex *ibcq) ++{ ++ _xsc_end_poll(ibcq, 1, POLLING_MODE_STALL); ++} ++ ++static inline void xsc_end_poll_adaptive_stall(struct ibv_cq_ex *ibcq) ++{ ++ _xsc_end_poll(ibcq, 0, POLLING_MODE_STALL_ADAPTIVE); ++} ++ ++static inline void xsc_end_poll_stall(struct ibv_cq_ex *ibcq) ++{ ++ _xsc_end_poll(ibcq, 0, POLLING_MODE_STALL); ++} ++ ++static inline void xsc_end_poll(struct ibv_cq_ex *ibcq) ++{ ++ _xsc_end_poll(ibcq, 0, 0); ++} ++ ++static inline void xsc_end_poll_lock(struct ibv_cq_ex *ibcq) ++{ ++ _xsc_end_poll(ibcq, 1, 0); ++} ++ ++int xsc_poll_cq(struct ibv_cq *ibcq, int ne, struct ibv_wc *wc) ++{ ++ return poll_cq(ibcq, ne, wc); ++} ++ ++static inline enum ibv_wc_opcode xsc_cq_read_wc_opcode(struct ibv_cq_ex *ibcq) ++{ ++ struct xsc_cq *cq = to_xcq(ibv_cq_ex_to_cq(ibcq)); ++ ++ switch (xscdv_get_cqe_opcode(cq->cqe64)) { ++ case XSC_CQE_RESP_WR_IMM: ++ return IBV_WC_RECV_RDMA_WITH_IMM; ++ case XSC_CQE_RESP_SEND: ++ case XSC_CQE_RESP_SEND_IMM: ++ case XSC_CQE_RESP_SEND_INV: ++ if (unlikely(cq->cqe64->app == XSC_CQE_APP_TAG_MATCHING)) { ++ switch (cq->cqe64->app_op) { ++ case XSC_CQE_APP_OP_TM_CONSUMED_MSG_SW_RDNV: ++ case XSC_CQE_APP_OP_TM_CONSUMED_MSG: ++ case XSC_CQE_APP_OP_TM_CONSUMED_SW_RDNV: ++ case XSC_CQE_APP_OP_TM_EXPECTED: ++ case XSC_CQE_APP_OP_TM_UNEXPECTED: ++ return IBV_WC_TM_RECV; ++ case XSC_CQE_APP_OP_TM_NO_TAG: ++ return IBV_WC_TM_NO_TAG; ++ } ++ } ++ return IBV_WC_RECV; ++ case XSC_CQE_NO_PACKET: ++ switch (cq->cqe64->app_op) { ++ case XSC_CQE_APP_OP_TM_REMOVE: ++ return IBV_WC_TM_DEL; ++ case XSC_CQE_APP_OP_TM_APPEND: ++ return IBV_WC_TM_ADD; ++ case XSC_CQE_APP_OP_TM_NOOP: ++ return IBV_WC_TM_SYNC; ++ case XSC_CQE_APP_OP_TM_CONSUMED: ++ return IBV_WC_TM_RECV; ++ } ++ break; ++ case XSC_CQE_REQ: ++ switch (be32toh(cq->cqe64->sop_drop_qpn) >> 24) { ++ case XSC_OPCODE_RDMA_WRITE_IMM: ++ case XSC_OPCODE_RDMA_WRITE: ++ return IBV_WC_RDMA_WRITE; ++ case XSC_OPCODE_SEND_IMM: ++ case XSC_OPCODE_SEND: ++ case XSC_OPCODE_SEND_INVAL: ++ return IBV_WC_SEND; ++ case XSC_OPCODE_RDMA_READ: ++ return IBV_WC_RDMA_READ; ++ case XSC_OPCODE_ATOMIC_CS: ++ return IBV_WC_COMP_SWAP; ++ case XSC_OPCODE_ATOMIC_FA: ++ return IBV_WC_FETCH_ADD; ++ case XSC_OPCODE_UMR: ++ return cq->umr_opcode; ++ case XSC_OPCODE_TSO: ++ return IBV_WC_TSO; ++ } ++ } ++ ++ return 0; ++} ++ ++static inline uint32_t xsc_cq_read_wc_qp_num(struct ibv_cq_ex *ibcq) ++{ ++ struct xsc_cq *cq = to_xcq(ibv_cq_ex_to_cq(ibcq)); ++ ++ return be32toh(cq->cqe64->sop_drop_qpn) & 0xffffff; ++} ++ ++static inline unsigned int xsc_cq_read_wc_flags(struct ibv_cq_ex *ibcq) ++{ ++ struct xsc_cq *cq = to_xcq(ibv_cq_ex_to_cq(ibcq)); ++ int wc_flags = 0; ++ ++ if (cq->flags & XSC_CQ_FLAGS_RX_CSUM_VALID) ++ wc_flags = get_csum_ok(cq->cqe64); ++ ++ switch (xscdv_get_cqe_opcode(cq->cqe64)) { ++ case XSC_CQE_RESP_WR_IMM: ++ case XSC_CQE_RESP_SEND_IMM: ++ wc_flags |= IBV_WC_WITH_IMM; ++ break; ++ case XSC_CQE_RESP_SEND_INV: ++ wc_flags |= IBV_WC_WITH_INV; ++ break; ++ } ++ ++ if (cq->flags & XSC_CQ_FLAGS_TM_SYNC_REQ) ++ wc_flags |= IBV_WC_TM_SYNC_REQ; ++ ++ if (unlikely(cq->cqe64->app == XSC_CQE_APP_TAG_MATCHING)) { ++ switch (cq->cqe64->app_op) { ++ case XSC_CQE_APP_OP_TM_CONSUMED_MSG_SW_RDNV: ++ case XSC_CQE_APP_OP_TM_CONSUMED_MSG: ++ case XSC_CQE_APP_OP_TM_MSG_COMPLETION_CANCELED: ++ /* Full completion */ ++ wc_flags |= (IBV_WC_TM_MATCH | IBV_WC_TM_DATA_VALID); ++ break; ++ case XSC_CQE_APP_OP_TM_CONSUMED_SW_RDNV: ++ case XSC_CQE_APP_OP_TM_CONSUMED: /* First completion */ ++ wc_flags |= IBV_WC_TM_MATCH; ++ break; ++ case XSC_CQE_APP_OP_TM_EXPECTED: /* Second completion */ ++ wc_flags |= IBV_WC_TM_DATA_VALID; ++ break; ++ } ++ } ++ ++ wc_flags |= ((be32toh(cq->cqe64->flags_rqpn) >> 28) & 3) ? IBV_WC_GRH : 0; ++ return wc_flags; ++} ++ ++static inline uint32_t xsc_cq_read_wc_byte_len(struct ibv_cq_ex *ibcq) ++{ ++ struct xsc_cq *cq = to_xcq(ibv_cq_ex_to_cq(ibcq)); ++ ++ return be32toh(cq->cqe64->byte_cnt); ++} ++ ++static inline uint32_t xsc_cq_read_wc_vendor_err(struct ibv_cq_ex *ibcq) ++{ ++ struct xsc_cq *cq = to_xcq(ibv_cq_ex_to_cq(ibcq)); ++ struct xsc_err_cqe *ecqe = (struct xsc_err_cqe *)cq->cqe64; ++ ++ return ecqe->vendor_err_synd; ++} ++ ++static inline __be32 xsc_cq_read_wc_imm_data(struct ibv_cq_ex *ibcq) ++{ ++ struct xsc_cq *cq = to_xcq(ibv_cq_ex_to_cq(ibcq)); ++ ++ switch (xscdv_get_cqe_opcode(cq->cqe64)) { ++ case XSC_CQE_RESP_SEND_INV: ++ /* This is returning invalidate_rkey which is in host order, see ++ * ibv_wc_read_invalidated_rkey ++ */ ++ return (__force __be32)be32toh(cq->cqe64->imm_inval_pkey); ++ default: ++ return cq->cqe64->imm_inval_pkey; ++ } ++} ++ ++static inline uint32_t xsc_cq_read_wc_slid(struct ibv_cq_ex *ibcq) ++{ ++ struct xsc_cq *cq = to_xcq(ibv_cq_ex_to_cq(ibcq)); ++ ++ return (uint32_t)be16toh(cq->cqe64->slid); ++} ++ ++static inline uint8_t xsc_cq_read_wc_sl(struct ibv_cq_ex *ibcq) ++{ ++ struct xsc_cq *cq = to_xcq(ibv_cq_ex_to_cq(ibcq)); ++ ++ return (be32toh(cq->cqe64->flags_rqpn) >> 24) & 0xf; ++} ++ ++static inline uint32_t xsc_cq_read_wc_src_qp(struct ibv_cq_ex *ibcq) ++{ ++ struct xsc_cq *cq = to_xcq(ibv_cq_ex_to_cq(ibcq)); ++ ++ return be32toh(cq->cqe64->flags_rqpn) & 0xffffff; ++} ++ ++static inline uint8_t xsc_cq_read_wc_dlid_path_bits(struct ibv_cq_ex *ibcq) ++{ ++ struct xsc_cq *cq = to_xcq(ibv_cq_ex_to_cq(ibcq)); ++ ++ return cq->cqe64->ml_path & 0x7f; ++} ++ ++static inline uint64_t xsc_cq_read_wc_completion_ts(struct ibv_cq_ex *ibcq) ++{ ++ struct xsc_cq *cq = to_xcq(ibv_cq_ex_to_cq(ibcq)); ++ ++ return be64toh(cq->cqe64->timestamp); ++} ++ ++static inline uint64_t ++xsc_cq_read_wc_completion_wallclock_ns(struct ibv_cq_ex *ibcq) ++{ ++ struct xsc_cq *cq = to_xcq(ibv_cq_ex_to_cq(ibcq)); ++ ++ return xscdv_ts_to_ns(&cq->last_clock_info, ++ xsc_cq_read_wc_completion_ts(ibcq)); ++} ++ ++static inline uint16_t xsc_cq_read_wc_cvlan(struct ibv_cq_ex *ibcq) ++{ ++ struct xsc_cq *cq = to_xcq(ibv_cq_ex_to_cq(ibcq)); ++ ++ return be16toh(cq->cqe64->vlan_info); ++} ++ ++static inline uint32_t xsc_cq_read_flow_tag(struct ibv_cq_ex *ibcq) ++{ ++ struct xsc_cq *cq = to_xcq(ibv_cq_ex_to_cq(ibcq)); ++ ++ return be32toh(cq->cqe64->sop_drop_qpn) & XSC_FLOW_TAG_MASK; ++} ++ ++static inline void xsc_cq_read_wc_tm_info(struct ibv_cq_ex *ibcq, ++ struct ibv_wc_tm_info *tm_info) ++{ ++ struct xsc_cq *cq = to_xcq(ibv_cq_ex_to_cq(ibcq)); ++ ++ tm_info->tag = be64toh(cq->cqe64->tmh.tag); ++ tm_info->priv = be32toh(cq->cqe64->tmh.app_ctx); ++} ++ ++#define BIT(i) (1UL << (i)) ++ ++#define SINGLE_THREADED BIT(0) ++#define STALL BIT(1) ++#define V1 BIT(2) ++#define ADAPTIVE BIT(3) ++#define CLOCK_UPDATE BIT(4) ++ ++#define xsc_start_poll_name(cqe_ver, lock, stall, adaptive, clock_update) \ ++ xsc_start_poll##adaptive##stall##cqe_ver##lock##clock_update ++#define xsc_next_poll_name(cqe_ver, adaptive) \ ++ xsc_next_poll##adaptive##cqe_ver ++#define xsc_end_poll_name(lock, stall, adaptive) \ ++ xsc_end_poll##adaptive##stall##lock ++ ++#define POLL_FN_ENTRY(cqe_ver, lock, stall, adaptive, clock_update) { \ ++ .start_poll = &xsc_start_poll_name(cqe_ver, lock, stall, adaptive, clock_update), \ ++ .next_poll = &xsc_next_poll_name(cqe_ver, adaptive), \ ++ .end_poll = &xsc_end_poll_name(lock, stall, adaptive), \ ++ } ++ ++static const struct op ++{ ++ int (*start_poll)(struct ibv_cq_ex *ibcq, struct ibv_poll_cq_attr *attr); ++ int (*next_poll)(struct ibv_cq_ex *ibcq); ++ void (*end_poll)(struct ibv_cq_ex *ibcq); ++} ops[ADAPTIVE + V1 + STALL + SINGLE_THREADED + CLOCK_UPDATE + 1] = { ++ [V1] = POLL_FN_ENTRY(_v1, _lock, , ,), ++ [0] = POLL_FN_ENTRY(_v0, _lock, , ,), ++ [V1 | SINGLE_THREADED] = POLL_FN_ENTRY(_v1, , , , ), ++ [SINGLE_THREADED] = POLL_FN_ENTRY(_v0, , , , ), ++ [V1 | STALL] = POLL_FN_ENTRY(_v1, _lock, _stall, , ), ++ [STALL] = POLL_FN_ENTRY(_v0, _lock, _stall, , ), ++ [V1 | SINGLE_THREADED | STALL] = POLL_FN_ENTRY(_v1, , _stall, , ), ++ [SINGLE_THREADED | STALL] = POLL_FN_ENTRY(_v0, , _stall, , ), ++ [V1 | STALL | ADAPTIVE] = POLL_FN_ENTRY(_v1, _lock, _stall, _adaptive, ), ++ [STALL | ADAPTIVE] = POLL_FN_ENTRY(_v0, _lock, _stall, _adaptive, ), ++ [V1 | SINGLE_THREADED | STALL | ADAPTIVE] = POLL_FN_ENTRY(_v1, , _stall, _adaptive, ), ++ [SINGLE_THREADED | STALL | ADAPTIVE] = POLL_FN_ENTRY(_v0, , _stall, _adaptive, ), ++ [V1 | CLOCK_UPDATE] = POLL_FN_ENTRY(_v1, _lock, , , _clock_update), ++ [0 | CLOCK_UPDATE] = POLL_FN_ENTRY(_v0, _lock, , , _clock_update), ++ [V1 | SINGLE_THREADED | CLOCK_UPDATE] = POLL_FN_ENTRY(_v1, , , , _clock_update), ++ [SINGLE_THREADED | CLOCK_UPDATE] = POLL_FN_ENTRY(_v0, , , , _clock_update), ++ [V1 | STALL | CLOCK_UPDATE] = POLL_FN_ENTRY(_v1, _lock, _stall, , _clock_update), ++ [STALL | CLOCK_UPDATE] = POLL_FN_ENTRY(_v0, _lock, _stall, , _clock_update), ++ [V1 | SINGLE_THREADED | STALL | CLOCK_UPDATE] = POLL_FN_ENTRY(_v1, , _stall, , _clock_update), ++ [SINGLE_THREADED | STALL | CLOCK_UPDATE] = POLL_FN_ENTRY(_v0, , _stall, , _clock_update), ++ [V1 | STALL | ADAPTIVE | CLOCK_UPDATE] = POLL_FN_ENTRY(_v1, _lock, _stall, _adaptive, _clock_update), ++ [STALL | ADAPTIVE | CLOCK_UPDATE] = POLL_FN_ENTRY(_v0, _lock, _stall, _adaptive, _clock_update), ++ [V1 | SINGLE_THREADED | STALL | ADAPTIVE | CLOCK_UPDATE] = POLL_FN_ENTRY(_v1, , _stall, _adaptive, _clock_update), ++ [SINGLE_THREADED | STALL | ADAPTIVE | CLOCK_UPDATE] = POLL_FN_ENTRY(_v0, , _stall, _adaptive, _clock_update), ++}; ++ ++int xsc_cq_fill_pfns(struct xsc_cq *cq, ++ const struct ibv_cq_init_attr_ex *cq_attr, ++ struct xsc_context *xctx) ++{ ++ const struct op *poll_ops = &ops[((cq->stall_enable && cq->stall_adaptive_enable) ? ADAPTIVE : 0) | ++ (xctx->cqe_version ? V1 : 0) | ++ (cq->flags & XSC_CQ_FLAGS_SINGLE_THREADED ? ++ SINGLE_THREADED : 0) | ++ (cq->stall_enable ? STALL : 0) | ++ ((cq_attr->wc_flags & IBV_WC_EX_WITH_COMPLETION_TIMESTAMP_WALLCLOCK) ? ++ CLOCK_UPDATE : 0)]; ++ ++ cq->verbs_cq.cq_ex.start_poll = poll_ops->start_poll; ++ cq->verbs_cq.cq_ex.next_poll = poll_ops->next_poll; ++ cq->verbs_cq.cq_ex.end_poll = poll_ops->end_poll; ++ ++ cq->verbs_cq.cq_ex.read_opcode = xsc_cq_read_wc_opcode; ++ cq->verbs_cq.cq_ex.read_vendor_err = xsc_cq_read_wc_vendor_err; ++ cq->verbs_cq.cq_ex.read_wc_flags = xsc_cq_read_wc_flags; ++ if (cq_attr->wc_flags & IBV_WC_EX_WITH_BYTE_LEN) ++ cq->verbs_cq.cq_ex.read_byte_len = xsc_cq_read_wc_byte_len; ++ if (cq_attr->wc_flags & IBV_WC_EX_WITH_IMM) ++ cq->verbs_cq.cq_ex.read_imm_data = xsc_cq_read_wc_imm_data; ++ if (cq_attr->wc_flags & IBV_WC_EX_WITH_QP_NUM) ++ cq->verbs_cq.cq_ex.read_qp_num = xsc_cq_read_wc_qp_num; ++ if (cq_attr->wc_flags & IBV_WC_EX_WITH_SRC_QP) ++ cq->verbs_cq.cq_ex.read_src_qp = xsc_cq_read_wc_src_qp; ++ if (cq_attr->wc_flags & IBV_WC_EX_WITH_SLID) ++ cq->verbs_cq.cq_ex.read_slid = xsc_cq_read_wc_slid; ++ if (cq_attr->wc_flags & IBV_WC_EX_WITH_SL) ++ cq->verbs_cq.cq_ex.read_sl = xsc_cq_read_wc_sl; ++ if (cq_attr->wc_flags & IBV_WC_EX_WITH_DLID_PATH_BITS) ++ cq->verbs_cq.cq_ex.read_dlid_path_bits = xsc_cq_read_wc_dlid_path_bits; ++ if (cq_attr->wc_flags & IBV_WC_EX_WITH_COMPLETION_TIMESTAMP) ++ cq->verbs_cq.cq_ex.read_completion_ts = xsc_cq_read_wc_completion_ts; ++ if (cq_attr->wc_flags & IBV_WC_EX_WITH_CVLAN) ++ cq->verbs_cq.cq_ex.read_cvlan = xsc_cq_read_wc_cvlan; ++ if (cq_attr->wc_flags & IBV_WC_EX_WITH_FLOW_TAG) ++ cq->verbs_cq.cq_ex.read_flow_tag = xsc_cq_read_flow_tag; ++ if (cq_attr->wc_flags & IBV_WC_EX_WITH_TM_INFO) ++ cq->verbs_cq.cq_ex.read_tm_info = xsc_cq_read_wc_tm_info; ++ if (cq_attr->wc_flags & IBV_WC_EX_WITH_COMPLETION_TIMESTAMP_WALLCLOCK) { ++ if (!xctx->clock_info_page) ++ return EOPNOTSUPP; ++ cq->verbs_cq.cq_ex.read_completion_wallclock_ns = ++ xsc_cq_read_wc_completion_wallclock_ns; ++ } ++ ++ return 0; ++} ++ ++int xsc_arm_cq(struct ibv_cq *ibvcq, int solicited) ++{ ++ struct xsc_cq *cq = to_xcq(ibvcq); ++ union xsc_db_data doorbell; ++ ++ doorbell.cqn = cq->cqn; ++ doorbell.cq_next_cid = cq->cons_index; ++ doorbell.solicited = !!solicited; ++ ++ /* ++ * Make sure that the doorbell record in host memory is ++ * written before ringing the doorbell via PCI WC MMIO. ++ */ ++ mmio_wc_start(); ++ ++ WR_REG(cq->armdb, doorbell.raw_data); ++ ++ mmio_flush_writes(); ++ ++ return 0; ++} ++ ++void xsc_cq_event(struct ibv_cq *cq) ++{ ++ to_xcq(cq)->arm_sn++; ++} ++ ++static int is_equal_rsn(struct xsc_cqe64 *cqe64, uint32_t rsn) ++{ ++ return rsn == (be32toh(cqe64->sop_drop_qpn) & 0xffffff); ++} ++ ++static inline int is_equal_uidx(struct xsc_cqe64 *cqe64, uint32_t uidx) ++{ ++ return uidx == (be32toh(cqe64->srqn_uidx) & 0xffffff); ++} ++ ++static inline int is_responder(uint8_t opcode) ++{ ++ switch (opcode) { ++ case XSC_CQE_RESP_WR_IMM: ++ case XSC_CQE_RESP_SEND: ++ case XSC_CQE_RESP_SEND_IMM: ++ case XSC_CQE_RESP_SEND_INV: ++ case XSC_CQE_RESP_ERR: ++ return 1; ++ } ++ ++ return 0; ++} ++ ++static inline int free_res_cqe(struct xsc_cqe64 *cqe64, uint32_t rsn, int cqe_version) ++{ ++ if (cqe_version) { ++ if (is_equal_uidx(cqe64, rsn)) { ++ return 1; ++ } ++ } else { ++ if (is_equal_rsn(cqe64, rsn)) { ++ return 1; ++ } ++ } ++ ++ return 0; ++} ++ ++void __xsc_cq_clean(struct xsc_cq *cq, uint32_t rsn) ++{ ++ uint32_t prod_index; ++ int nfreed = 0; ++ struct xsc_cqe64 *cqe64, *dest64; ++ void *cqe, *dest; ++ uint8_t owner_bit; ++ int cqe_version; ++ ++ if (!cq || cq->flags & XSC_CQ_FLAGS_DV_OWNED) ++ return; ++ xsc_dbg(to_xctx(cq->verbs_cq.cq_ex.context)->dbg_fp, XSC_DBG_CQ, "\n"); ++ ++ /* ++ * First we need to find the current producer index, so we ++ * know where to start cleaning from. It doesn't matter if HW ++ * adds new entries after this loop -- the QP we're worried ++ * about is already in RESET, so the new entries won't come ++ * from our QP and therefore don't need to be checked. ++ */ ++ for (prod_index = cq->cons_index; get_sw_cqe(cq, prod_index); ++prod_index) ++ if (prod_index == cq->cons_index + cq->verbs_cq.cq_ex.cqe) ++ break; ++ ++ /* ++ * Now sweep backwards through the CQ, removing CQ entries ++ * that match our QP by copying older entries on top of them. ++ */ ++ cqe_version = (to_xctx(cq->verbs_cq.cq_ex.context))->cqe_version; ++ while ((int) --prod_index - (int) cq->cons_index >= 0) { ++ cqe = get_cqe(cq, prod_index & (cq->verbs_cq.cq_ex.cqe - 1)); ++ cqe64 = (cq->cqe_sz == 64) ? cqe : cqe + 64; ++ if (free_res_cqe(cqe64, rsn, cqe_version)) { ++ ++nfreed; ++ } else if (nfreed) { ++ dest = get_cqe(cq, (prod_index + nfreed) & (cq->verbs_cq.cq_ex.cqe - 1)); ++ dest64 = (cq->cqe_sz == 64) ? dest : dest + 64; ++ owner_bit = dest64->op_own & XSC_CQE_OWNER_MASK; ++ memcpy(dest, cqe, cq->cqe_sz); ++ dest64->op_own = owner_bit | ++ (dest64->op_own & ~XSC_CQE_OWNER_MASK); ++ } ++ } ++ ++ if (nfreed) { ++ cq->cons_index += nfreed; ++ /* ++ * Make sure update of buffer contents is done before ++ * updating consumer index. ++ */ ++ udma_to_device_barrier(); ++ update_cons_index(cq); ++ } ++} ++ ++void xsc_cq_clean(struct xsc_cq *cq, uint32_t qpn) ++{ ++ xsc_spin_lock(&cq->lock); ++ __xsc_cq_clean(cq, qpn); ++ xsc_spin_unlock(&cq->lock); ++} ++ ++int xsc_alloc_cq_buf(struct xsc_context *xctx, struct xsc_cq *cq, ++ struct xsc_buf *buf, int nent, int cqe_sz) ++{ ++ struct xsc_device *xdev = to_xdev(xctx->ibv_ctx.context.device); ++ int ret; ++ enum xsc_alloc_type type; ++ enum xsc_alloc_type default_type = XSC_ALLOC_TYPE_ANON; ++ ++ if (xsc_use_huge("HUGE_CQ")) ++ default_type = XSC_ALLOC_TYPE_HUGE; ++ ++ xsc_get_alloc_type(xctx, XSC_CQ_PREFIX, &type, default_type); ++ ++ ret = xsc_alloc_prefered_buf(xctx, buf, ++ align(nent * cqe_sz, xdev->page_size), ++ xdev->page_size, ++ type, ++ XSC_CQ_PREFIX); ++ ++ if (ret) ++ return -1; ++ ++ memset(buf->buf, 0, nent * cqe_sz); ++ ++ return 0; ++} ++ ++int xsc_free_cq_buf(struct xsc_context *ctx, struct xsc_buf *buf) ++{ ++ return xsc_free_actual_buf(ctx, buf); ++} +diff --git a/providers/xscale/cqm_csr_defines.h b/providers/xscale/cqm_csr_defines.h +new file mode 100644 +index 0000000..9d87438 +--- /dev/null ++++ b/providers/xscale/cqm_csr_defines.h +@@ -0,0 +1,180 @@ ++#ifndef _CQM_CSR_DEFINES_H_ ++#define _CQM_CSR_DEFINES_H_ ++ ++#define CQM_SOFT_RESET_REG_ADDR 0x6000 ++#define CQM_SOFT_RESET_MASK 0x1 ++#define CQM_SOFT_RESET_SHIFT 0 ++ ++#define CQM_COUNTER_CONFIG_REG_ADDR 0x6020 ++#define CQM_CFG_CNT_WRAP_MASK 0x1 ++#define CQM_CFG_CNT_WRAP_SHIFT 0 ++#define CQM_CFG_CNT_RC_MASK 0x2 ++#define CQM_CFG_CNT_RC_SHIFT 1 ++ ++#define CQM_SCRATCH_PAD_REG_ADDR 0x6040 ++#define CQM_SCRATCH_PAD_MASK 0xffffffffffffffff ++#define CQM_SCRATCH_PAD_SHIFT 0 ++ ++#define CQM_CQM_CONFIG_REG_RING_ADDR_ARRAY_ADDR 0x6060 ++#define CQM_CQM_CONFIG_REG_RING_ADDR_ARRAY_SIZE 16 ++#define CQM_CQM_CONFIG_REG_RING_ADDR_ARRAY_STRIDE 0x20 ++#define CQM_CFG_CPU2CQM_RING_ADDR_MASK 0xffffffffffffffff ++#define CQM_CFG_CPU2CQM_RING_ADDR_SHIFT 0 ++ ++#define CQM_CQM_CONFIG_REG_RING_SIZE_ARRAY_ADDR 0x6260 ++#define CQM_CQM_CONFIG_REG_RING_SIZE_ARRAY_SIZE 16 ++#define CQM_CQM_CONFIG_REG_RING_SIZE_ARRAY_STRIDE 0x20 ++#define CQM_CFG_CPU2CQM_RING_SIZE_MASK 0xffff ++#define CQM_CFG_CPU2CQM_RING_SIZE_SHIFT 0 ++ ++#define CQM_CQM_CONFIG_REG_NEXT_CID_ARRAY_ADDR 0x6460 ++#define CQM_CQM_CONFIG_REG_NEXT_CID_ARRAY_SIZE 16 ++#define CQM_CQM_CONFIG_REG_NEXT_CID_ARRAY_STRIDE 0x20 ++#define CQM_CFG_CPU2CQM_NEXT_CID_MASK 0xffff ++#define CQM_CFG_CPU2CQM_NEXT_CID_SHIFT 0 ++ ++#define CQM_CQM_CONFIG_REG_CFG_EN_ARRAY_ADDR 0x6660 ++#define CQM_CQM_CONFIG_REG_CFG_EN_ARRAY_SIZE 16 ++#define CQM_CQM_CONFIG_REG_CFG_EN_ARRAY_STRIDE 0x20 ++#define CQM_CFG_CPU2CQM_CFG_EN_MASK 0x1 ++#define CQM_CFG_CPU2CQM_CFG_EN_SHIFT 0 ++ ++#define CQM_CQM_CONFIG_CQE_FIFO_TH_ADDR 0x6860 ++#define CQM_CFG_CPU2CQM_CQE_FIFO_AFULL_TH_MASK 0xff ++#define CQM_CFG_CPU2CQM_CQE_FIFO_AFULL_TH_SHIFT 0 ++#define CQM_CFG_CPU2CQM_CQE_FIFO_AMTY_TH_MASK 0xff00 ++#define CQM_CFG_CPU2CQM_CQE_FIFO_AMTY_TH_SHIFT 8 ++ ++#define CQM_CQM_CONFIG_CID_FIFO_TH_ADDR 0x6880 ++#define CQM_CFG_CPU2CQM_CID_FIFO_AFULL_TH_MASK 0xff ++#define CQM_CFG_CPU2CQM_CID_FIFO_AFULL_TH_SHIFT 0 ++#define CQM_CFG_CPU2CQM_CID_FIFO_AMTY_TH_MASK 0xff00 ++#define CQM_CFG_CPU2CQM_CID_FIFO_AMTY_TH_SHIFT 8 ++ ++#define CQM_CQM_STATUS_REG_ARRAY_ADDR 0x68a0 ++#define CQM_CQM_STATUS_REG_ARRAY_SIZE 16 ++#define CQM_CQM_STATUS_REG_ARRAY_STRIDE 0x20 ++#define CQM_CFG_CQM2CPU_DONE_PID_MASK 0xffff ++#define CQM_CFG_CQM2CPU_DONE_PID_SHIFT 0 ++ ++#define CQM_CQM_STATUS_LOCAL_NEXT_PID_REG_ARRAY_ADDR 0x6aa0 ++#define CQM_CQM_STATUS_LOCAL_NEXT_PID_REG_ARRAY_SIZE 16 ++#define CQM_CQM_STATUS_LOCAL_NEXT_PID_REG_ARRAY_STRIDE 0x20 ++#define CQM_CQM_LOCAL_NEXT_PID_MASK 0xffff ++#define CQM_CQM_LOCAL_NEXT_PID_SHIFT 0 ++ ++#define CQM_CQM_DMA_REQ_LEN_STATE_REG_ADDR 0x6ca0 ++#define CQM_CQM_DMA_REQ_LEN_MASK 0x3ff ++#define CQM_CQM_DMA_REQ_LEN_SHIFT 0 ++ ++#define CQM_CQM_DMA_REQ_ADDR_STATE_REG_ADDR 0x6cc0 ++#define CQM_CQM_DMA_REQ_ADDR_MASK 0xffffffffffffffff ++#define CQM_CQM_DMA_REQ_ADDR_SHIFT 0 ++ ++#define CQM_CQM_CQE_L_QPID_STATE_REG_ADDR 0x6ce0 ++#define CQM_CQM_CQE_L_QP_ID_MASK 0xffffff ++#define CQM_CQM_CQE_L_QP_ID_SHIFT 0 ++ ++#define CQM_CQM_CQE_MSG_LEN_STATE_REG_ADDR 0x6d00 ++#define CQM_CQM_CQE_MSG_LEN_MASK 0xffffffff ++#define CQM_CQM_CQE_MSG_LEN_SHIFT 0 ++ ++#define CQM_CQM_CQE_ERR_CODE_STATE_REG_ADDR 0x6d20 ++#define CQM_CQM_CQE_ERR_CODE_MASK 0xff ++#define CQM_CQM_CQE_ERR_CODE_SHIFT 0 ++ ++#define CQM_CQM_CQE_MSG_OPCODE_STATE_REG_ADDR 0x6d40 ++#define CQM_CQM_CQE_MSG_OPCODE_MASK 0xff ++#define CQM_CQM_CQE_MSG_OPCODE_SHIFT 0 ++ ++#define CQM_CQM_CQE_WQEID_STATE_REG_ADDR 0x6d60 ++#define CQM_CQM_CQE_WQEID_MASK 0xffff ++#define CQM_CQM_CQE_WQEID_SHIFT 0 ++ ++#define CQM_CQM_CQE_TX0RX1_STATE_REG_ADDR 0x6d80 ++#define CQM_CQM_CQE_TX0RX1_MASK 0x1 ++#define CQM_CQM_CQE_TX0RX1_SHIFT 0 ++ ++#define CQM_CQM_CQE_CQ_ID_STATE_REG_ADDR 0x6da0 ++#define CQM_CQM_CQE_CQ_ID_MASK 0xf ++#define CQM_CQM_CQE_CQ_ID_SHIFT 0 ++ ++#define CQM_CQM_WR_ACK_CNT_STATE_REG_ADDR 0x6dc0 ++#define CQM_CQM_DMA_WR_ACK_MASK 0xff ++#define CQM_CQM_DMA_WR_ACK_SHIFT 0 ++ ++#define CQM_CQM_RD_ACK_CNT_STATE_REG_ADDR 0x6de0 ++#define CQM_CQM_DMA_RD_ACK_MASK 0xff ++#define CQM_CQM_DMA_RD_ACK_SHIFT 0 ++ ++#define CQM_CQM_CQE_ACK_CNT_STATE_REG_ADDR 0x6e00 ++#define CQM_CQM_DMA_CQE_ACK_MASK 0xff ++#define CQM_CQM_DMA_CQE_ACK_SHIFT 0 ++ ++#define CQM_CQM_CMD_FIFO_STATE_REG_ADDR 0x6e20 ++#define CQM_CQM_FIFO_OVFL_INT_MASK 0x3 ++#define CQM_CQM_FIFO_OVFL_INT_SHIFT 0 ++#define CQM_CQM_FIFO_UNFL_INT_MASK 0xc ++#define CQM_CQM_FIFO_UNFL_INT_SHIFT 2 ++#define CQM_CQM_FIFO_MTY_MASK 0x30 ++#define CQM_CQM_FIFO_MTY_SHIFT 4 ++#define CQM_CQM_FIFO_FUL_MASK 0xc0 ++#define CQM_CQM_FIFO_FUL_SHIFT 6 ++#define CQM_CQM_RING_FULL_INT_MASK 0xffff00 ++#define CQM_CQM_RING_FULL_INT_SHIFT 8 ++#define CQM_CQM_DEFINE_ERR_INT_MASK 0x1000000 ++#define CQM_CQM_DEFINE_ERR_INT_SHIFT 24 ++#define CQM_CQM_SOP_EOP_NO_EQUAL_MASK 0x2000000 ++#define CQM_CQM_SOP_EOP_NO_EQUAL_SHIFT 25 ++ ++#define CQM_CQM_FIFO_USED_CNT_REG_ADDR 0x6e40 ++#define CQM_CQM_FIFO_USED_CNT_REG_SIZE 2 ++#define CQM_CQM_FIFO_USED_CNT_REG_STRIDE 0x20 ++#define CQM_CQM_FIFO_USED_CNT_MASK 0x7f ++#define CQM_CQM_FIFO_USED_CNT_SHIFT 0 ++ ++#define CQM_CQM_DEBUG_INFO_STATE_REG_0_ADDR 0x6e80 ++#define CQM_CQM2CSR_DBG_OPCODE_MASK 0xff ++#define CQM_CQM2CSR_DBG_OPCODE_SHIFT 0 ++#define CQM_CQM2CSR_DBG_TX0_RX1_MASK 0x100 ++#define CQM_CQM2CSR_DBG_TX0_RX1_SHIFT 8 ++#define CQM_CQM2CSR_DBG_CAP_MASK 0x200 ++#define CQM_CQM2CSR_DBG_CAP_SHIFT 9 ++#define CQM_CQM2CSR_DBG_L_QPID_MASK 0x1c00 ++#define CQM_CQM2CSR_DBG_L_QPID_SHIFT 10 ++#define CQM_CQM2CSR_DBG_SN_MASK 0x1fffffe000 ++#define CQM_CQM2CSR_DBG_SN_SHIFT 13 ++ ++#define CQM_CQM_DEBUG_INFO_STATE_REG_1_ADDR 0x6ea0 ++#define CQM_CQM2CSR_DBG_MOD_IF_BM_MASK 0xffffffffffffffff ++#define CQM_CQM2CSR_DBG_MOD_IF_BM_SHIFT 0 ++ ++#define CQM_CQM_DMA_IN_SOP_CNT_REG_ADDR 0x6ec0 ++#define CQM_CQM_DMA_IN_SOP_CNT_MASK 0xffffffffffffffff ++#define CQM_CQM_DMA_IN_SOP_CNT_SHIFT 0 ++ ++#define CQM_CQM_DMA_IN_EOP_CNT_REG_ADDR 0x6ee0 ++#define CQM_CQM_DMA_IN_EOP_CNT_MASK 0xffffffffffffffff ++#define CQM_CQM_DMA_IN_EOP_CNT_SHIFT 0 ++ ++#define CQM_CQM_DMA_IN_VLD_CNT_REG_ADDR 0x6f00 ++#define CQM_CQM_DMA_IN_VLD_CNT_MASK 0xffffffffffffffff ++#define CQM_CQM_DMA_IN_VLD_CNT_SHIFT 0 ++ ++#define CQM_CQM_DMA_REQ_CNT_REG_ADDR 0x6f20 ++#define CQM_CQM_DMA_REQ_CNT_MASK 0xffffffffffffffff ++#define CQM_CQM_DMA_REQ_CNT_SHIFT 0 ++ ++#define CQM_CQM_DMA_GNT_CNT_REG_ADDR 0x6f40 ++#define CQM_CQM_DMA_GNT_CNT_MASK 0xffffffffffffffff ++#define CQM_CQM_DMA_GNT_CNT_SHIFT 0 ++ ++#define CQM_CQM_DMA_ACK_VLD_CNT_REG_ADDR 0x6f60 ++#define CQM_CQM_DMA_ACK_VLD_CNT_MASK 0xffffffffffffffff ++#define CQM_CQM_DMA_ACK_VLD_CNT_SHIFT 0 ++ ++#define CQM_CQM_MER2CQM_VLD_CNT_REG_ADDR 0x6f80 ++#define CQM_CQM_MER2CQM_VLD_CNT_MASK 0xffffffffffffffff ++#define CQM_CQM_MER2CQM_VLD_CNT_SHIFT 0 ++ ++#endif +diff --git a/providers/xscale/dbrec.c b/providers/xscale/dbrec.c +new file mode 100644 +index 0000000..3987b88 +--- /dev/null ++++ b/providers/xscale/dbrec.c +@@ -0,0 +1,131 @@ ++/* ++ * Copyright (c) 2021 - 2022, Shanghai Yunsilicon Technology Co., Ltd. ++ * All rights reserved. ++ */ ++ ++#define _GNU_SOURCE ++#include ++ ++#include ++#include ++#include ++ ++#include "xscale.h" ++ ++struct xsc_db_page { ++ struct xsc_db_page *prev, *next; ++ struct xsc_buf buf; ++ int num_db; ++ int use_cnt; ++ unsigned long free[0]; ++}; ++ ++static struct xsc_db_page *__add_page(struct xsc_context *context) ++{ ++ struct xsc_db_page *page; ++ int ps = to_xdev(context->ibv_ctx.context.device)->page_size; ++ int pp; ++ int i; ++ int nlong; ++ int ret; ++ ++ pp = ps / context->cache_line_size; ++ nlong = (pp + 8 * sizeof(long) - 1) / (8 * sizeof(long)); ++ ++ page = malloc(sizeof *page + nlong * sizeof(long)); ++ if (!page) ++ return NULL; ++ ++ if (xsc_is_extern_alloc(context)) ++ ret = xsc_alloc_buf_extern(context, &page->buf, ps); ++ else ++ ret = xsc_alloc_buf(&page->buf, ps, ps); ++ if (ret) { ++ free(page); ++ return NULL; ++ } ++ ++ page->num_db = pp; ++ page->use_cnt = 0; ++ for (i = 0; i < nlong; ++i) ++ page->free[i] = ~0; ++ ++ page->prev = NULL; ++ page->next = context->db_list; ++ context->db_list = page; ++ if (page->next) ++ page->next->prev = page; ++ ++ return page; ++} ++ ++__be32 *xsc_alloc_dbrec(struct xsc_context *context) ++{ ++ struct xsc_db_page *page; ++ __be32 *db = NULL; ++ int i, j; ++ ++ pthread_mutex_lock(&context->db_list_mutex); ++ ++ for (page = context->db_list; page; page = page->next) ++ if (page->use_cnt < page->num_db) ++ goto found; ++ ++ page = __add_page(context); ++ if (!page) ++ goto out; ++ ++found: ++ ++page->use_cnt; ++ ++ for (i = 0; !page->free[i]; ++i) ++ /* nothing */; ++ ++ j = ffsl(page->free[i]); ++ --j; ++ page->free[i] &= ~(1UL << j); ++ db = page->buf.buf + (i * 8 * sizeof(long) + j) * context->cache_line_size; ++ ++out: ++ pthread_mutex_unlock(&context->db_list_mutex); ++ ++ return db; ++} ++ ++void xsc_free_db(struct xsc_context *context, __be32 *db) ++{ ++ struct xsc_db_page *page; ++ uintptr_t ps = to_xdev(context->ibv_ctx.context.device)->page_size; ++ int i; ++ ++ pthread_mutex_lock(&context->db_list_mutex); ++ ++ for (page = context->db_list; page; page = page->next) ++ if (((uintptr_t) db & ~(ps - 1)) == (uintptr_t) page->buf.buf) ++ break; ++ ++ if (!page) ++ goto out; ++ ++ i = ((void *) db - page->buf.buf) / context->cache_line_size; ++ page->free[i / (8 * sizeof(long))] |= 1UL << (i % (8 * sizeof(long))); ++ ++ if (!--page->use_cnt) { ++ if (page->prev) ++ page->prev->next = page->next; ++ else ++ context->db_list = page->next; ++ if (page->next) ++ page->next->prev = page->prev; ++ ++ if (page->buf.type == XSC_ALLOC_TYPE_EXTERNAL) ++ xsc_free_buf_extern(context, &page->buf); ++ else ++ xsc_free_buf(&page->buf); ++ ++ free(page); ++ } ++ ++out: ++ pthread_mutex_unlock(&context->db_list_mutex); ++} +diff --git a/providers/xscale/libxsc.map b/providers/xscale/libxsc.map +new file mode 100644 +index 0000000..005c161 +--- /dev/null ++++ b/providers/xscale/libxsc.map +@@ -0,0 +1,59 @@ ++/* Export symbols should be added below according to ++ Documentation/versioning.md document. */ ++XSC_1.0 { ++ global: ++ xscdv_query_device; ++ xscdv_init_obj; ++ local: *; ++}; ++ ++XSC_1.1 { ++ global: ++ xscdv_create_cq; ++} XSC_1.0; ++ ++XSC_1.2 { ++ global: ++ xscdv_init_obj; ++ xscdv_set_context_attr; ++} XSC_1.1; ++ ++XSC_1.3 { ++ global: ++ xscdv_create_qp; ++ xscdv_create_wq; ++} XSC_1.2; ++ ++XSC_1.4 { ++ global: ++ xscdv_get_clock_info; ++} XSC_1.3; ++ ++XSC_1.5 { ++ global: ++ xscdv_create_flow_action_esp; ++} XSC_1.4; ++ ++XSC_1.6 { ++ global: ++ xscdv_create_flow_matcher; ++ xscdv_destroy_flow_matcher; ++ xscdv_create_flow; ++} XSC_1.5; ++ ++XSC_1.7 { ++ global: ++ xscdv_create_flow_action_modify_header; ++ xscdv_create_flow_action_packet_reformat; ++ xscdv_devx_alloc_uar; ++ xscdv_devx_free_uar; ++ xscdv_devx_general_cmd; ++ xscdv_devx_obj_create; ++ xscdv_devx_obj_destroy; ++ xscdv_devx_obj_modify; ++ xscdv_devx_obj_query; ++ xscdv_devx_query_eqn; ++ xscdv_devx_umem_dereg; ++ xscdv_devx_umem_reg; ++ xscdv_open_device; ++} XSC_1.6; +diff --git a/providers/xscale/qp.c b/providers/xscale/qp.c +new file mode 100644 +index 0000000..c18fef7 +--- /dev/null ++++ b/providers/xscale/qp.c +@@ -0,0 +1,678 @@ ++/* ++ * Copyright (c) 2021 - 2022, Shanghai Yunsilicon Technology Co., Ltd. ++ * All rights reserved. ++ */ ++ ++#include ++ ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++ ++#include "xscale.h" ++#include "wqe.h" ++#include "xsc_hsi.h" ++ ++static const uint32_t xsc_ib_opcode[] = { ++ [IBV_WR_SEND] = XSC_MSG_OPCODE_SEND, ++ [IBV_WR_SEND_WITH_IMM] = XSC_MSG_OPCODE_SEND, ++ [IBV_WR_RDMA_WRITE] = XSC_MSG_OPCODE_RDMA_WRITE, ++ [IBV_WR_RDMA_WRITE_WITH_IMM] = XSC_MSG_OPCODE_RDMA_WRITE, ++ [IBV_WR_RDMA_READ] = XSC_MSG_OPCODE_RDMA_READ, ++ [IBV_WR_SEND_WITH_INV] = XSC_MSG_OPCODE_SEND, ++}; ++ ++static void *get_recv_wqe(struct xsc_qp *qp, int n) ++{ ++ return qp->rq_start + (n << qp->rq.wqe_shift); ++} ++ ++static void *get_wq_recv_wqe(struct xsc_rwq *rwq, int n) ++{ ++ return rwq->pbuff + (n << rwq->rq.wqe_shift); ++} ++ ++static void *get_seg_wqe(void *first, int n) ++{ ++ return first + (n << XSC_BASE_WQE_SHIFT); ++} ++ ++void *xsc_get_send_wqe(struct xsc_qp *qp, int n) ++{ ++ return qp->sq_start + (n << qp->sq.wqe_shift); ++} ++ ++void xsc_init_rwq_indices(struct xsc_rwq *rwq) ++{ ++ rwq->rq.head = 0; ++ rwq->rq.tail = 0; ++} ++ ++void xsc_init_qp_indices(struct xsc_qp *qp) ++{ ++ qp->sq.head = 0; ++ qp->sq.tail = 0; ++ qp->rq.head = 0; ++ qp->rq.tail = 0; ++ qp->sq.cur_post = 0; ++} ++ ++static int xsc_wq_overflow(struct xsc_wq *wq, int nreq, struct xsc_cq *cq) ++{ ++ unsigned cur; ++ ++ cur = wq->head - wq->tail; ++ if (cur + nreq < wq->max_post) ++ return 0; ++ ++ xsc_spin_lock(&cq->lock); ++ cur = wq->head - wq->tail; ++ xsc_spin_unlock(&cq->lock); ++ ++ return cur + nreq >= wq->max_post; ++} ++ ++static inline void set_remote_addr_seg(struct xsc_wqe_data_seg *remote_seg, ++ uint32_t msg_len, uint64_t remote_addr, uint32_t rkey) ++{ ++ WR_LE_32(remote_seg->seg_len, msg_len); ++ WR_LE_32(remote_seg->mkey, rkey); ++ WR_LE_64(remote_seg->va, remote_addr); ++} ++ ++static void set_local_data_seg(struct xsc_wqe_data_seg *data_seg, struct ibv_sge *sg) ++{ ++ WR_LE_32(data_seg->seg_len, sg->length); ++ WR_LE_32(data_seg->mkey, sg->lkey); ++ WR_LE_64(data_seg->va, sg->addr); ++} ++ ++static __be32 send_ieth(struct ibv_send_wr *wr) ++{ ++ switch (wr->opcode) { ++ case IBV_WR_SEND_WITH_IMM: ++ case IBV_WR_RDMA_WRITE_WITH_IMM: ++ return wr->imm_data; ++ default: ++ return 0; ++ } ++} ++ ++static int set_data_inl_seg(struct xsc_qp *qp, struct ibv_send_wr *wr, ++ struct xsc_send_wqe_ctrl_seg *ctrl) ++{ ++ void *data_seg; ++ unsigned seg_index; ++ void *addr; ++ int len = 0; ++ int i; ++ const int ds_len = sizeof(struct xsc_wqe_data_seg); ++ int left_len = 0; ++ int msg_len = ctrl->msg_len; ++ ++ if (wr->opcode == IBV_WR_SEND || wr->opcode == IBV_WR_SEND_WITH_IMM) ++ seg_index = 1; ++ else ++ seg_index = 2; ++ ++ if (unlikely(msg_len > qp->max_inline_data)) ++ return ENOMEM; ++ ++ for (i = 0; i < wr->num_sge; ++i) { ++ if (likely(wr->sg_list[i].length)) { ++ addr = (void*)wr->sg_list[i].addr; ++ len = wr->sg_list[i].length; ++ if (left_len > 0) { ++ int copy_len = min_t(int, len, left_len); ++ memcpy(data_seg, addr, copy_len); ++ addr += copy_len; ++ len -= copy_len; ++ } ++ ++ while (len >= ds_len) { ++ data_seg = get_seg_wqe(ctrl, seg_index); ++ seg_index++; ++ memcpy(data_seg, addr, ds_len); ++ addr += ds_len; ++ len -= ds_len; ++ } ++ ++ if (len > 0) { ++ data_seg = get_seg_wqe(ctrl, seg_index); ++ seg_index++; ++ memcpy(data_seg, addr, len); ++ data_seg += len; ++ left_len = ds_len - len; ++ } else { ++ left_len = 0; ++ } ++ } ++ } ++ ++ ctrl->ds_data_num = seg_index - 1; ++ ++ return 0; ++} ++ ++static void zero_send_ds(int idx, struct xsc_qp *qp) ++{ ++ void *seg; ++ uint64_t *uninitialized_var(p); ++ int i; ++ ++ seg = (void*)xsc_get_send_wqe(qp, idx); ++ for (i = 1; i < qp->sq.seg_cnt; i++) { ++ p = get_seg_wqe(seg, i); ++ p[0] = p[1] = 0; ++ } ++} ++ ++static void zero_recv_ds(int idx, struct xsc_qp *qp) ++{ ++ void *seg; ++ uint64_t *uninitialized_var(p); ++ int i; ++ ++ seg = (void*)get_recv_wqe(qp, idx); ++ for (i = 1; i < qp->rq.seg_cnt; i++) { ++ p = get_seg_wqe(seg, i); ++ p[0] = p[1] = 0; ++ } ++} ++ ++#ifdef XSC_DEBUG ++static void dump_wqe(int type, int idx, struct xsc_qp *qp) ++{ ++ /* type0 send type1 recv */ ++ uint32_t *uninitialized_var(p); ++ int i; ++ void *seg; ++ ++ if (type == 0) { ++ seg = (void*)xsc_get_send_wqe(qp, idx); ++ xsc_dbg(to_xctx(qp->ibv_qp->context)->dbg_fp, XSC_DBG_QP, ++ "dump send wqe at %p\n", seg); ++ for (i = 0; i < qp->sq.seg_cnt; i++) { ++ p = get_seg_wqe(seg, i); ++ xsc_dbg(to_xctx(qp->ibv_qp->context)->dbg_fp, XSC_DBG_QP, ++ "0x%08x 0x%08x 0x%08x 0x%08x\n", p[0], p[1], p[2], p[3]); ++ } ++ } else if (type == 1) { ++ seg = (void*)get_recv_wqe(qp, idx); ++ xsc_dbg(to_xctx(qp->ibv_qp->context)->dbg_fp, XSC_DBG_QP, ++ "dump recv wqe at %p\n", seg); ++ for (i = 0; i < qp->rq.seg_cnt; i++) { ++ p = get_seg_wqe(seg, i); ++ xsc_dbg(to_xctx(qp->ibv_qp->context)->dbg_fp, XSC_DBG_QP, ++ "0x%08x 0x%08x 0x%08x 0x%08x\n", p[0], p[1], p[2], p[3]); ++ } ++ } else { ++ xsc_dbg(to_xctx(qp->ibv_qp->context)->dbg_fp, XSC_DBG_QP, ++ "unknown type %d\n", type); ++ } ++} ++#else ++static inline void dump_wqe(int type, int idx, struct xsc_qp *qp) {}; ++#endif ++ ++static inline void xsc_post_send_db(struct xsc_qp *qp, int nreq) ++{ ++ uint16_t next_pid; ++ union xsc_db_data db; ++ ++ if (unlikely(!nreq)) ++ return; ++ ++ qp->sq.head += nreq; ++ next_pid = qp->sq.head << (qp->sq.wqe_shift - XSC_BASE_WQE_SHIFT); ++ db.sq_next_pid = next_pid; ++ db.sqn = qp->sqn; ++ /* ++ * Make sure that descriptors are written before ++ * updating doorbell record and ringing the doorbell ++ */ ++ xsc_dbg(to_xctx(qp->ibv_qp->context)->dbg_fp, XSC_DBG_QP_SEND, "nreq:%d\n", nreq); ++ udma_to_device_barrier(); ++ WR_REG(qp->sq.db, db.raw_data); ++} ++ ++static inline int _xsc_post_send(struct ibv_qp *ibqp, struct ibv_send_wr *wr, ++ struct ibv_send_wr **bad_wr) ++{ ++ struct xsc_qp *qp = to_xqp(ibqp); ++ void *seg; ++ struct xsc_send_wqe_ctrl_seg *ctrl; ++ struct xsc_wqe_data_seg *data_seg; ++ ++ int nreq; ++ int err = 0; ++ int i; ++ unsigned idx; ++ unsigned seg_index = 1; ++ unsigned msg_len = 0; ++ ++ if (unlikely(ibqp->state < IBV_QPS_RTS)) { ++ xsc_dbg(to_xctx(ibqp->context)->dbg_fp, XSC_DBG_QP_SEND, ++ "qp state is %u, should not post send\n", ibqp->state); ++ err = EINVAL; ++ *bad_wr = wr; ++ return err; ++ } ++ ++ xsc_spin_lock(&qp->sq.lock); ++ ++ for (nreq = 0; wr; ++nreq, wr = wr->next) { ++ seg_index = 1; ++ msg_len = 0; ++ if (unlikely(wr->opcode < 0 || ++ wr->opcode >= sizeof(xsc_ib_opcode) / sizeof(xsc_ib_opcode[0]))) { ++ xsc_dbg(to_xctx(ibqp->context)->dbg_fp, XSC_DBG_QP_SEND, ++ "bad opcode %d\n", wr->opcode); ++ err = EINVAL; ++ *bad_wr = wr; ++ goto out; ++ } ++ ++ if (unlikely(xsc_wq_overflow(&qp->sq, nreq, ++ to_xcq(qp->ibv_qp->send_cq)))) { ++ xsc_dbg(to_xctx(ibqp->context)->dbg_fp, XSC_DBG_QP_SEND, ++ "send work queue overflow\n"); ++ err = ENOMEM; ++ *bad_wr = wr; ++ goto out; ++ } ++ ++ if (unlikely(wr->num_sge > qp->sq.max_gs)) { ++ xsc_dbg(to_xctx(ibqp->context)->dbg_fp, XSC_DBG_QP_SEND, ++ "max gs exceeded %d (max = %d)\n", ++ wr->num_sge, qp->sq.max_gs); ++ err = ENOMEM; ++ *bad_wr = wr; ++ goto out; ++ } ++ ++ if (unlikely(wr->opcode == IBV_WR_RDMA_READ && wr->num_sge > 1)) { ++ xsc_dbg(to_xctx(ibqp->context)->dbg_fp, XSC_DBG_QP_SEND, ++ "rdma read, max gs exceeded %d (max = 1)\n", ++ wr->num_sge); ++ err = ENOMEM; ++ *bad_wr = wr; ++ goto out; ++ } ++ ++ idx = qp->sq.cur_post & (qp->sq.wqe_cnt - 1); ++ zero_send_ds(idx, qp); ++ ctrl = seg = xsc_get_send_wqe(qp, idx); ++ ctrl->ds_data_num = 0; ++ WR_LE_16(ctrl->wqe_id, ++ qp->sq.cur_post << (qp->sq.wqe_shift - XSC_BASE_WQE_SHIFT)); ++ ctrl->se = wr->send_flags & IBV_SEND_SOLICITED ? 1 : 0; ++ ctrl->ce = qp->sq_signal_bits ? 1 : (wr->send_flags & IBV_SEND_SIGNALED ? 1 : 0); ++ ctrl->in_line = wr->send_flags & IBV_SEND_INLINE ? 1 : 0; ++ for (i = 0; i < wr->num_sge; ++i) { ++ if (likely(wr->sg_list[i].length)) { ++ msg_len += wr->sg_list[i].length; ++ } ++ } ++ ctrl->msg_len = msg_len; ++ ctrl->with_immdt = 0; ++ ++ if (unlikely(wr->opcode == IBV_WR_RDMA_READ && msg_len == 0)) { ++ xsc_dbg(to_xctx(ibqp->context)->dbg_fp, XSC_DBG_QP_SEND, ++ "rdma read, msg len should not be 0\n"); ++ /* workaround, return success for posting zero-length read */ ++ err = 0; ++ goto out; ++ } ++ ++ switch (ibqp->qp_type) { ++ case IBV_QPT_RC: ++ switch (wr->opcode) { ++ case IBV_WR_SEND_WITH_INV: ++ case IBV_WR_SEND: ++ break; ++ case IBV_WR_SEND_WITH_IMM: ++ ctrl->with_immdt = 1; ++ ctrl->opcode_data = send_ieth(wr); ++ break; ++ case IBV_WR_RDMA_WRITE_WITH_IMM: ++ ctrl->with_immdt = 1; ++ ctrl->opcode_data = send_ieth(wr); ++ SWITCH_FALLTHROUGH; ++ case IBV_WR_RDMA_READ: ++ case IBV_WR_RDMA_WRITE: ++ if (ctrl->msg_len == 0) ++ break; ++ ctrl->ds_data_num++; ++ data_seg = get_seg_wqe(ctrl, seg_index); ++ set_remote_addr_seg( ++ data_seg, ++ msg_len, ++ wr->wr.rdma.remote_addr, ++ wr->wr.rdma.rkey); ++ seg_index++; ++ break; ++ default: ++ printf("debug: opcode:%u NOT supported\n", wr->opcode); ++ err = EPERM; ++ *bad_wr = wr; ++ goto out; ++ } ++ break; ++ default: ++ xsc_dbg(to_xctx(ibqp->context)->dbg_fp, XSC_DBG_QP_SEND, ++ "qp type:%u NOT supported\n", ibqp->qp_type); ++ err = EPERM; ++ *bad_wr = wr; ++ goto out; ++ } ++ ++ if (wr->send_flags & IBV_SEND_INLINE && wr->num_sge) { ++ err = set_data_inl_seg(qp, wr, ctrl); ++ if (unlikely(err)) { ++ *bad_wr = wr; ++ xsc_dbg(to_xctx(ibqp->context)->dbg_fp, XSC_DBG_QP_SEND, ++ "inline layout failed, err %d\n", err); ++ goto out; ++ } ++ } else { ++ for (i = 0; i < wr->num_sge; ++i, ++seg_index) { ++ if (likely(wr->sg_list[i].length)) { ++ data_seg = get_seg_wqe(ctrl, seg_index); ++ set_local_data_seg(data_seg, &wr->sg_list[i]); ++ ctrl->ds_data_num++; ++ } ++ } ++ } ++ ++ ctrl->msg_opcode = xsc_ib_opcode[wr->opcode]; ++ if (ctrl->msg_len == 0) { ++ ctrl->ds_data_num = 0; ++ zero_send_ds(idx, qp); ++ } ++ qp->sq.wrid[idx] = wr->wr_id; ++ qp->sq.wqe_head[idx] = qp->sq.head + nreq; ++ qp->sq.cur_post += 1; ++ if (ctrl->ce) { ++ qp->sq.flush_wqe_cnt++; ++ qp->sq.need_flush[idx] = 1; ++ } ++ qp->sq.wr_opcode[idx] = wr->opcode; ++ ++ if (xsc_debug_mask & XSC_DBG_QP_SEND) ++ dump_wqe(0, idx, qp); ++ } ++ ++out: ++ xsc_post_send_db(qp, nreq); ++ xsc_spin_unlock(&qp->sq.lock); ++ ++ return err; ++} ++ ++int xsc_post_send(struct ibv_qp *ibqp, struct ibv_send_wr *wr, ++ struct ibv_send_wr **bad_wr) ++{ ++ return _xsc_post_send(ibqp, wr, bad_wr); ++} ++ ++static void set_wq_sig_seg(struct xsc_rwq *rwq, struct xsc_rwqe_sig *sig, ++ int size, uint16_t idx) ++{ ++ uint8_t sign; ++ uint32_t qpn = rwq->wq.wq_num; ++ ++ sign = calc_sig(sig, size); ++ sign ^= calc_sig(&qpn, 4); ++ sign ^= calc_sig(&idx, 2); ++ sig->signature = sign; ++} ++ ++int xsc_post_wq_recv(struct ibv_wq *ibwq, struct ibv_recv_wr *wr, ++ struct ibv_recv_wr **bad_wr) ++{ ++ struct xsc_rwq *rwq = to_xrwq(ibwq); ++ struct xsc_wqe_data_seg *scat; ++ int err = 0; ++ int nreq; ++ int ind; ++ int i, j; ++ struct xsc_rwqe_sig *sig; ++ ++ xsc_spin_lock(&rwq->rq.lock); ++ ++ ind = rwq->rq.head & (rwq->rq.wqe_cnt - 1); ++ ++ for (nreq = 0; wr; ++nreq, wr = wr->next) { ++ if (unlikely(xsc_wq_overflow(&rwq->rq, nreq, ++ to_xcq(rwq->wq.cq)))) { ++ err = ENOMEM; ++ *bad_wr = wr; ++ goto out; ++ } ++ ++ if (unlikely(wr->num_sge > rwq->rq.max_gs)) { ++ err = EINVAL; ++ *bad_wr = wr; ++ goto out; ++ } ++ ++ scat = get_wq_recv_wqe(rwq, ind); ++ sig = (struct xsc_rwqe_sig *)scat; ++ if (unlikely(rwq->wq_sig)) { ++ memset(sig, 0, 1 << rwq->rq.wqe_shift); ++ ++scat; ++ } ++ ++ for (i = 0, j = 0; i < wr->num_sge; ++i) { ++ if (unlikely(!wr->sg_list[i].length)) ++ continue; ++ //set_data_ptr_seg(scat + j++, wr->sg_list + i); ++ } ++ ++ if (j < rwq->rq.max_gs) { ++ scat[j].seg_len = 0; ++ scat[j].mkey = htole32(XSC_INVALID_LKEY); ++ scat[j].va = 0; ++ } ++ ++ if (unlikely(rwq->wq_sig)) ++ set_wq_sig_seg(rwq, sig, (wr->num_sge + 1) << 4, ++ rwq->rq.head & 0xffff); ++ ++ rwq->rq.wrid[ind] = wr->wr_id; ++ ++ ind = (ind + 1) & (rwq->rq.wqe_cnt - 1); ++ rwq->rq.flush_wqe_cnt++; ++ } ++ ++out: ++ if (likely(nreq)) { ++ rwq->rq.head += nreq; ++ /* ++ * Make sure that descriptors are written before ++ * doorbell record. ++ */ ++ udma_to_device_barrier(); ++ *(rwq->recv_db) = htobe32(rwq->rq.head & 0xffff); ++ } ++ ++ xsc_spin_unlock(&rwq->rq.lock); ++ ++ return err; ++} ++ ++int xsc_post_recv(struct ibv_qp *ibqp, struct ibv_recv_wr *wr, ++ struct ibv_recv_wr **bad_wr) ++{ ++ struct xsc_qp *qp = to_xqp(ibqp); ++ struct xsc_wqe_data_seg *recv_head; ++ struct xsc_wqe_data_seg *data_seg; ++ int err = 0; ++ uint16_t next_pid = 0; ++ union xsc_db_data db; ++ int nreq; ++ uint16_t idx; ++ int i; ++ ++ xsc_spin_lock(&qp->rq.lock); ++ ++ idx = qp->rq.head & (qp->rq.wqe_cnt - 1); ++ ++ zero_recv_ds(idx, qp); ++ for (nreq = 0; wr; ++nreq, wr = wr->next) { ++ if (unlikely(xsc_wq_overflow(&qp->rq, nreq, ++ to_xcq(qp->ibv_qp->recv_cq)))) { ++ printf("recv work queue overflow\n"); ++ err = ENOMEM; ++ *bad_wr = wr; ++ goto out; ++ } ++ ++ if (unlikely(wr->num_sge > qp->rq.max_gs)) { ++ printf("max gs exceeded %d (max = %d)\n", ++ wr->num_sge, qp->rq.max_gs); ++ err = EINVAL; ++ *bad_wr = wr; ++ goto out; ++ } ++ ++ recv_head = get_recv_wqe(qp, idx); ++ ++ for (i = 0; i < wr->num_sge; ++i) { ++ if (unlikely(!wr->sg_list[i].length)) ++ continue; ++ data_seg = get_seg_wqe(recv_head, i); ++ WR_LE_32(data_seg->seg_len, wr->sg_list[i].length); ++ WR_LE_32(data_seg->mkey, wr->sg_list[i].lkey); ++ WR_LE_64(data_seg->va, wr->sg_list[i].addr); ++ } ++ ++ qp->rq.wrid[idx] = wr->wr_id; ++ ++ dump_wqe(1, idx, qp); ++ idx = (idx + 1) & (qp->rq.wqe_cnt - 1); ++ qp->rq.flush_wqe_cnt++; ++ } ++ ++out: ++ if (likely(nreq)) { ++ qp->rq.head += nreq; ++ next_pid = qp->rq.head << (qp->rq.wqe_shift - XSC_BASE_WQE_SHIFT); ++ db.rq_next_pid = next_pid; ++ db.rqn = qp->rqn; ++ ++ /* ++ * Make sure that descriptors are written before ++ * doorbell record. ++ */ ++ udma_to_device_barrier(); ++ WR_REG(qp->rq.db, db.raw_data); ++ } ++ ++ xsc_spin_unlock(&qp->rq.lock); ++ ++ return err; ++} ++ ++int xsc_use_huge(const char *key) ++{ ++ char *e; ++ e = getenv(key); ++ if (e && !strcmp(e, "y")) ++ return 1; ++ ++ return 0; ++} ++ ++struct xsc_qp *xsc_find_qp(struct xsc_context *ctx, uint32_t qpn) ++{ ++ int tind = qpn >> XSC_QP_TABLE_SHIFT; ++ ++ if (ctx->qp_table[tind].refcnt) ++ return ctx->qp_table[tind].table[qpn & XSC_QP_TABLE_MASK]; ++ else ++ return NULL; ++} ++ ++int xsc_store_qp(struct xsc_context *ctx, uint32_t qpn, struct xsc_qp *qp) ++{ ++ int tind = qpn >> XSC_QP_TABLE_SHIFT; ++ ++ if (!ctx->qp_table[tind].refcnt) { ++ ctx->qp_table[tind].table = calloc(XSC_QP_TABLE_MASK + 1, ++ sizeof(struct xsc_qp *)); ++ if (!ctx->qp_table[tind].table) ++ return -1; ++ } ++ ++ ++ctx->qp_table[tind].refcnt; ++ ctx->qp_table[tind].table[qpn & XSC_QP_TABLE_MASK] = qp; ++ return 0; ++} ++ ++void xsc_clear_qp(struct xsc_context *ctx, uint32_t qpn) ++{ ++ int tind = qpn >> XSC_QP_TABLE_SHIFT; ++ ++ if (!--ctx->qp_table[tind].refcnt) ++ free(ctx->qp_table[tind].table); ++ else ++ ctx->qp_table[tind].table[qpn & XSC_QP_TABLE_MASK] = NULL; ++} ++ ++int xsc_err_state_qp(struct ibv_qp *qp, enum ibv_qp_state cur_state, ++ enum ibv_qp_state state) ++{ ++ struct xsc_err_state_qp_node *tmp, *err_rq_node, *err_sq_node; ++ struct xsc_qp *xqp = to_xqp(qp); ++ int ret = 0; ++ ++ xsc_dbg(to_xctx(qp->context)->dbg_fp, XSC_DBG_QP, ++ "modify qp: qpid %d, cur_qp_state %d, qp_state %d\n", xqp->rsc.rsn, cur_state, state); ++ if (cur_state == IBV_QPS_ERR && state != IBV_QPS_ERR) { ++ if(qp->recv_cq) { ++ list_for_each_safe(&to_xcq(qp->recv_cq)->err_state_qp_list, err_rq_node, tmp, entry) { ++ if (err_rq_node->qp_id == xqp->rsc.rsn) { ++ list_del(&err_rq_node->entry); ++ free(err_rq_node); ++ } ++ } ++ } ++ ++ if(qp->send_cq) { ++ list_for_each_safe(&to_xcq(qp->send_cq)->err_state_qp_list, err_sq_node, tmp, entry) { ++ if (err_sq_node->qp_id == xqp->rsc.rsn) { ++ list_del(&err_sq_node->entry); ++ free(err_sq_node); ++ } ++ } ++ } ++ return ret; ++ } ++ ++ if (cur_state != IBV_QPS_ERR && state == IBV_QPS_ERR) { ++ if(qp->recv_cq) { ++ err_rq_node = calloc(1, sizeof(*err_rq_node)); ++ if (!err_rq_node) ++ return ENOMEM; ++ err_rq_node->qp_id = xqp->rsc.rsn; ++ err_rq_node->is_sq = false; ++ list_add_tail(&to_xcq(qp->recv_cq)->err_state_qp_list, &err_rq_node->entry); ++ } ++ ++ if(qp->send_cq) { ++ err_sq_node = calloc(1, sizeof(*err_sq_node)); ++ if (!err_sq_node) ++ return ENOMEM; ++ err_sq_node->qp_id = xqp->rsc.rsn; ++ err_sq_node->is_sq = true; ++ list_add_tail(&to_xcq(qp->send_cq)->err_state_qp_list, &err_sq_node->entry); ++ } ++ } ++ return ret; ++} +diff --git a/providers/xscale/rqm_csr_defines.h b/providers/xscale/rqm_csr_defines.h +new file mode 100644 +index 0000000..9552855 +--- /dev/null ++++ b/providers/xscale/rqm_csr_defines.h +@@ -0,0 +1,200 @@ ++#ifndef _RQM_CSR_DEFINES_H_ ++#define _RQM_CSR_DEFINES_H_ ++ ++#define RQM_SOFT_RESET_REG_ADDR 0x5000 ++#define RQM_SOFT_RESET_MASK 0x1 ++#define RQM_SOFT_RESET_SHIFT 0 ++ ++#define RQM_COUNTER_CONFIG_REG_ADDR 0x5020 ++#define RQM_CFG_CNT_WRAP_MASK 0x1 ++#define RQM_CFG_CNT_WRAP_SHIFT 0 ++#define RQM_CFG_CNT_RC_MASK 0x2 ++#define RQM_CFG_CNT_RC_SHIFT 1 ++ ++#define RQM_SCRATCH_PAD_REG_ADDR 0x5040 ++#define RQM_SCRATCH_PAD_MASK 0xffffffffffffffff ++#define RQM_SCRATCH_PAD_SHIFT 0 ++ ++#define RQM_RQM_CONFIG_REG_RING_ADDR_ARRAY_ADDR 0x5060 ++#define RQM_RQM_CONFIG_REG_RING_ADDR_ARRAY_SIZE 8 ++#define RQM_RQM_CONFIG_REG_RING_ADDR_ARRAY_STRIDE 0x20 ++#define RQM_CFG_CPU2RQM_RING_ADDR_MASK 0xffffffffffffffff ++#define RQM_CFG_CPU2RQM_RING_ADDR_SHIFT 0 ++ ++#define RQM_RQM_CONFIG_REG_RING_SIZE_ARRAY_ADDR 0x5160 ++#define RQM_RQM_CONFIG_REG_RING_SIZE_ARRAY_SIZE 8 ++#define RQM_RQM_CONFIG_REG_RING_SIZE_ARRAY_STRIDE 0x20 ++#define RQM_CFG_CPU2RQM_RING_SIZE_MASK 0xffff ++#define RQM_CFG_CPU2RQM_RING_SIZE_SHIFT 0 ++ ++#define RQM_RQM_CONFIG_REG_NEXT_PID_ARRAY_ADDR 0x5260 ++#define RQM_RQM_CONFIG_REG_NEXT_PID_ARRAY_SIZE 8 ++#define RQM_RQM_CONFIG_REG_NEXT_PID_ARRAY_STRIDE 0x20 ++#define RQM_CFG_CPU2RQM_NEXT_PID_MASK 0xffff ++#define RQM_CFG_CPU2RQM_NEXT_PID_SHIFT 0 ++ ++#define RQM_RQM_CONFIG_REG_CFG_EN_ARRAY_ADDR 0x5360 ++#define RQM_RQM_CONFIG_REG_CFG_EN_ARRAY_SIZE 8 ++#define RQM_RQM_CONFIG_REG_CFG_EN_ARRAY_STRIDE 0x20 ++#define RQM_CFG_CPU2RQM_CFG_EN_MASK 0x1 ++#define RQM_CFG_CPU2RQM_CFG_EN_SHIFT 0 ++ ++#define RQM_RQM_STATUS_REG_ARRAY_ADDR 0x5460 ++#define RQM_RQM_STATUS_REG_ARRAY_SIZE 8 ++#define RQM_RQM_STATUS_REG_ARRAY_STRIDE 0x20 ++#define RQM_STS_RQM2CPU_DONE_CID_MASK 0xffff ++#define RQM_STS_RQM2CPU_DONE_CID_SHIFT 0 ++ ++#define RQM_RQM_CONFIG_MER_QPID_FIFO_TH_ADDR 0x5560 ++#define RQM_CFG_CPU2RQM_MER_QPID_FIFO_AMTY_TH_MASK 0x1f ++#define RQM_CFG_CPU2RQM_MER_QPID_FIFO_AMTY_TH_SHIFT 0 ++#define RQM_CFG_CPU2RQM_MER_QPID_FIFO_AFUL_TH_MASK 0x3e0 ++#define RQM_CFG_CPU2RQM_MER_QPID_FIFO_AFUL_TH_SHIFT 5 ++ ++#define RQM_RQM_CONFIG_DMA_QPID_FIFO_TH_ADDR 0x5580 ++#define RQM_CFG_CPU2RQM_DMA_QPID_FIFO_AMTY_TH_MASK 0x1f ++#define RQM_CFG_CPU2RQM_DMA_QPID_FIFO_AMTY_TH_SHIFT 0 ++#define RQM_CFG_CPU2RQM_DMA_QPID_FIFO_AFUL_TH_MASK 0x3e0 ++#define RQM_CFG_CPU2RQM_DMA_QPID_FIFO_AFUL_TH_SHIFT 5 ++ ++#define RQM_RQM_CONFIG_PTR_QPID_FIFO_TH_ADDR 0x55a0 ++#define RQM_CFG_CPU2RQM_PTR_QPID_FIFO_AMTY_TH_MASK 0x1f ++#define RQM_CFG_CPU2RQM_PTR_QPID_FIFO_AMTY_TH_SHIFT 0 ++#define RQM_CFG_CPU2RQM_PTR_QPID_FIFO_AFUL_TH_MASK 0x3e0 ++#define RQM_CFG_CPU2RQM_PTR_QPID_FIFO_AFUL_TH_SHIFT 5 ++ ++#define RQM_RQM_CONFIG_WQE_FIFO_AMTY_TH_ADDR 0x55c0 ++#define RQM_RQM_CONFIG_WQE_FIFO_AMTY_TH_SIZE 8 ++#define RQM_RQM_CONFIG_WQE_FIFO_AMTY_TH_STRIDE 0x20 ++#define RQM_CFG_CPU2RQM_WQE_FIFO_AMTY_TH_MASK 0x1f ++#define RQM_CFG_CPU2RQM_WQE_FIFO_AMTY_TH_SHIFT 0 ++ ++#define RQM_RQM_CONFIG_WQE_FIFO_AFUL_TH_ADDR 0x56c0 ++#define RQM_RQM_CONFIG_WQE_FIFO_AFUL_TH_SIZE 8 ++#define RQM_RQM_CONFIG_WQE_FIFO_AFUL_TH_STRIDE 0x20 ++#define RQM_CFG_CPU2RQM_WQE_FIFO_AFUL_TH_MASK 0x1f ++#define RQM_CFG_CPU2RQM_WQE_FIFO_AFUL_TH_SHIFT 0 ++ ++#define RQM_RQM_INT_STATE_REG_ADDR 0x57c0 ++#define RQM_RQM_WQE_FIFO_OVFL_ERR_MASK 0xff ++#define RQM_RQM_WQE_FIFO_OVFL_ERR_SHIFT 0 ++#define RQM_RQM_WQE_FIFO_UNFL_ERR_MASK 0xff00 ++#define RQM_RQM_WQE_FIFO_UNFL_ERR_SHIFT 8 ++#define RQM_RQM_NO_WQE_ERR_MASK 0xff0000 ++#define RQM_RQM_NO_WQE_ERR_SHIFT 16 ++ ++#define RQM_RQM_FIFO_USED_CNT_REG_ADDR 0x57e0 ++#define RQM_RQM_FIFO_USED_CNT_REG_SIZE 8 ++#define RQM_RQM_FIFO_USED_CNT_REG_STRIDE 0x20 ++#define RQM_RQM_WQE_FIFO_USED_CNT_MASK 0xf ++#define RQM_RQM_WQE_FIFO_USED_CNT_SHIFT 0 ++ ++#define RQM_RQM_CMD_FIFO_STATE_REG_ADDR 0x58e0 ++#define RQM_RQM_WQE_FIFO_MTY_MASK 0xff ++#define RQM_RQM_WQE_FIFO_MTY_SHIFT 0 ++#define RQM_RQM_WQE_FIFO_FUL_MASK 0xff00 ++#define RQM_RQM_WQE_FIFO_FUL_SHIFT 8 ++ ++#define RQM_RQM_OTH_FIFO_STATE_REG_ADDR 0x5900 ++#define RQM_RQM_OTH_FIFO_MTY_MASK 0x7 ++#define RQM_RQM_OTH_FIFO_MTY_SHIFT 0 ++#define RQM_RQM_OTH_FIFO_AFUL_MASK 0x38 ++#define RQM_RQM_OTH_FIFO_AFUL_SHIFT 3 ++#define RQM_RQM_OTH_FIFO_OVFL_ERR_MASK 0x1c0 ++#define RQM_RQM_OTH_FIFO_OVFL_ERR_SHIFT 6 ++#define RQM_RQM_OTH_FIFO_UNFL_ERR_MASK 0xe00 ++#define RQM_RQM_OTH_FIFO_UNFL_ERR_SHIFT 9 ++ ++#define RQM_RQM_OTHERS_FIFO_USED_CNT_REG_ADDR 0x5920 ++#define RQM_RQM_MER_REQ_FIFO_USED_CNT_MASK 0xf ++#define RQM_RQM_MER_REQ_FIFO_USED_CNT_SHIFT 0 ++#define RQM_RQM_DMA_REQ_FIFO_USED_CNT_MASK 0xf0 ++#define RQM_RQM_DMA_REQ_FIFO_USED_CNT_SHIFT 4 ++#define RQM_RQM_PTR_REQ_FIFO_USED_CNT_MASK 0xf00 ++#define RQM_RQM_PTR_REQ_FIFO_USED_CNT_SHIFT 8 ++ ++#define RQM_RQM_DEBUG_INFO_STATE_REG_0_ADDR 0x5940 ++#define RQM_RQM2MER_DBG_OPCODE_MASK 0xff ++#define RQM_RQM2MER_DBG_OPCODE_SHIFT 0 ++#define RQM_RQM2MER_DBG_TX0_RX1_MASK 0x100 ++#define RQM_RQM2MER_DBG_TX0_RX1_SHIFT 8 ++#define RQM_RQM2MER_DBG_CAP_MASK 0x200 ++#define RQM_RQM2MER_DBG_CAP_SHIFT 9 ++#define RQM_RQM2MER_DBG_L_QPID_MASK 0x1c00 ++#define RQM_RQM2MER_DBG_L_QPID_SHIFT 10 ++#define RQM_RQM2MER_DBG_SN_MASK 0x1fffffe000 ++#define RQM_RQM2MER_DBG_SN_SHIFT 13 ++ ++#define RQM_RQM_DEBUG_INFO_STATE_REG_1_ADDR 0x5960 ++#define RQM_RQM2MER_DBG_MOD_IF_BM_MASK 0xffffffffffffffff ++#define RQM_RQM2MER_DBG_MOD_IF_BM_SHIFT 0 ++ ++#define RQM_RQM_DEBUG_INFO_STATE_REG_2_ADDR 0x5980 ++#define RQM_RQM2MER_DBG_RQM2MER_VLD_CNT_MASK 0xffffffff ++#define RQM_RQM2MER_DBG_RQM2MER_VLD_CNT_SHIFT 0 ++#define RQM_RQM2MER_DBG_DD2RQM_DATA_VLD_CNT_MASK 0xffffffff00000000 ++#define RQM_RQM2MER_DBG_DD2RQM_DATA_VLD_CNT_SHIFT 32 ++ ++#define RQM_RQM_DMA_REQ_LEN_STATE_REG_ADDR 0x59a0 ++#define RQM_RQM_DMA_REQ_LEN_MASK 0x3ff ++#define RQM_RQM_DMA_REQ_LEN_SHIFT 0 ++ ++#define RQM_RQM_DMA_REQ_ADDR_STATE_REG_ADDR 0x59c0 ++#define RQM_RQM_DMA_REQ_ADDR_MASK 0xffffffffffffffff ++#define RQM_RQM_DMA_REQ_ADDR_SHIFT 0 ++ ++#define RQM_RQM_WQE_WQEID_ADDR 0x59e0 ++#define RQM_RQM_WQE_WQEID_MASK 0xffff ++#define RQM_RQM_WQE_WQEID_SHIFT 0 ++ ++#define RQM_RQM_WQE_RECV_LEN_ADDR 0x5a00 ++#define RQM_RQM_WQE_REC_LEN_MASK 0x7fffffff ++#define RQM_RQM_WQE_REC_LEN_SHIFT 0 ++ ++#define RQM_RQM_WQE_LOCAL_VA_ADDR 0x5a20 ++#define RQM_RQM_WQE_L_VA_MASK 0xffffffffffffffff ++#define RQM_RQM_WQE_L_VA_SHIFT 0 ++ ++#define RQM_RQM_WQE_LOCAL_KEY_ADDR 0x5a40 ++#define RQM_RQM_WQE_L_KEY_MASK 0xffffffff ++#define RQM_RQM_WQE_L_KEY_SHIFT 0 ++ ++#define RQM_MER_RQM_WQE_QPID_ADDR 0x5a60 ++#define RQM_RQM_WQE_QPID_MASK 0x7 ++#define RQM_RQM_WQE_QPID_SHIFT 0 ++ ++#define RQM_RQM_STATUS_NEXT_CID_REG_ARRAY_ADDR 0x5a80 ++#define RQM_RQM_STATUS_NEXT_CID_REG_ARRAY_SIZE 8 ++#define RQM_RQM_STATUS_NEXT_CID_REG_ARRAY_STRIDE 0x20 ++#define RQM_RQM_NEXT_CID_MASK 0xffff ++#define RQM_RQM_NEXT_CID_SHIFT 0 ++ ++#define RQM_RQM_DMA_IN_SOP_CNT_REG_ADDR 0x5b80 ++#define RQM_RQM_DMA_IN_SOP_CNT_MASK 0xffffffffffffffff ++#define RQM_RQM_DMA_IN_SOP_CNT_SHIFT 0 ++ ++#define RQM_RQM_DMA_IN_EOP_CNT_REG_ADDR 0x5ba0 ++#define RQM_RQM_DMA_IN_EOP_CNT_MASK 0xffffffffffffffff ++#define RQM_RQM_DMA_IN_EOP_CNT_SHIFT 0 ++ ++#define RQM_RQM_DMA_IN_VLD_CNT_REG_ADDR 0x5bc0 ++#define RQM_RQM_DMA_IN_VLD_CNT_MASK 0xffffffffffffffff ++#define RQM_RQM_DMA_IN_VLD_CNT_SHIFT 0 ++ ++#define RQM_RQM_DMA_REQ_CNT_REG_ADDR 0x5be0 ++#define RQM_RQM_DMA_REQ_CNT_MASK 0xffffffffffffffff ++#define RQM_RQM_DMA_REQ_CNT_SHIFT 0 ++ ++#define RQM_RQM_DMA_GNT_CNT_REG_ADDR 0x5c00 ++#define RQM_RQM_DMA_GNT_CNT_MASK 0xffffffffffffffff ++#define RQM_RQM_DMA_GNT_CNT_SHIFT 0 ++ ++#define RQM_RQM_MER_VLD_CNT_REG_ADDR 0x5c20 ++#define RQM_RQM_MER_VLD_CNT_MASK 0xffffffffffffffff ++#define RQM_RQM_MER_VLD_CNT_SHIFT 0 ++ ++#define RQM_RQM_MER_REQ_CNT_REG_ADDR 0x5c40 ++#define RQM_RQM_MER_REQ_CNT_MASK 0xffffffffffffffff ++#define RQM_RQM_MER_REQ_CNT_SHIFT 0 ++ ++#endif +diff --git a/providers/xscale/sqm_csr_defines.h b/providers/xscale/sqm_csr_defines.h +new file mode 100644 +index 0000000..e0dc6e9 +--- /dev/null ++++ b/providers/xscale/sqm_csr_defines.h +@@ -0,0 +1,204 @@ ++#ifndef _SQM_CSR_DEFINES_H_ ++#define _SQM_CSR_DEFINES_H_ ++ ++#define SQM_SOFT_RESET_REG_ADDR 0x4000 ++#define SQM_SOFT_RESET_MASK 0x1 ++#define SQM_SOFT_RESET_SHIFT 0 ++ ++#define SQM_COUNTER_CONFIG_REG_ADDR 0x4020 ++#define SQM_CFG_CNT_WRAP_MASK 0x1 ++#define SQM_CFG_CNT_WRAP_SHIFT 0 ++#define SQM_CFG_CNT_RC_MASK 0x2 ++#define SQM_CFG_CNT_RC_SHIFT 1 ++ ++#define SQM_SCRATCH_PAD_REG_ADDR 0x4040 ++#define SQM_SCRATCH_PAD_MASK 0xffffffffffffffff ++#define SQM_SCRATCH_PAD_SHIFT 0 ++ ++#define SQM_SQM_CONFIG_REG_RING_ADDR_ARRAY_ADDR 0x4060 ++#define SQM_SQM_CONFIG_REG_RING_ADDR_ARRAY_SIZE 8 ++#define SQM_SQM_CONFIG_REG_RING_ADDR_ARRAY_STRIDE 0x20 ++#define SQM_CFG_CPU2SQM_RING_ADDR_MASK 0xffffffffffffffff ++#define SQM_CFG_CPU2SQM_RING_ADDR_SHIFT 0 ++ ++#define SQM_SQM_CONFIG_REG_RING_SIZE_ARRAY_ADDR 0x4160 ++#define SQM_SQM_CONFIG_REG_RING_SIZE_ARRAY_SIZE 8 ++#define SQM_SQM_CONFIG_REG_RING_SIZE_ARRAY_STRIDE 0x20 ++#define SQM_CFG_CPU2SQM_RING_SIZE_MASK 0xffff ++#define SQM_CFG_CPU2SQM_RING_SIZE_SHIFT 0 ++ ++#define SQM_SQM_CONFIG_REG_ARRAY_ADDR 0x4260 ++#define SQM_SQM_CONFIG_REG_ARRAY_SIZE 8 ++#define SQM_SQM_CONFIG_REG_ARRAY_STRIDE 0x20 ++#define SQM_CFG_CPU2SQM_NEXT_PID_MASK 0xffff ++#define SQM_CFG_CPU2SQM_NEXT_PID_SHIFT 0 ++ ++#define SQM_SQM_CONFIG_REG_CFG_EN_ARRAY_ADDR 0x4360 ++#define SQM_SQM_CONFIG_REG_CFG_EN_ARRAY_SIZE 8 ++#define SQM_SQM_CONFIG_REG_CFG_EN_ARRAY_STRIDE 0x20 ++#define SQM_CFG_CPU2SQM_CFG_EN_MASK 0x1 ++#define SQM_CFG_CPU2SQM_CFG_EN_SHIFT 0 ++ ++#define SQM_SQM_STATUS_REG_DONE_CID_ARRAY_ADDR 0x4460 ++#define SQM_SQM_STATUS_REG_DONE_CID_ARRAY_SIZE 8 ++#define SQM_SQM_STATUS_REG_DONE_CID_ARRAY_STRIDE 0x20 ++#define SQM_STS_SQM2CPU_DONE_CID_MASK 0xffff ++#define SQM_STS_SQM2CPU_DONE_CID_SHIFT 0 ++ ++#define SQM_SQM_CFG_WQE_FIFO_TH_ADDR 0x4560 ++#define SQM_CFG_CPU2SQM_WQE_FIFO_AFUL_TH_MASK 0xff ++#define SQM_CFG_CPU2SQM_WQE_FIFO_AFUL_TH_SHIFT 0 ++#define SQM_CFG_CPU2SQM_WQE_FIFO_AMTY_TH_MASK 0xff00 ++#define SQM_CFG_CPU2SQM_WQE_FIFO_AMTY_TH_SHIFT 8 ++ ++#define SQM_SQM_CONFIG_DBG_FIFO_REG_CFG_ADDR 0x4580 ++#define SQM_CFG_CPU2SQM_DBG_FIFO_AFUL_TH_MASK 0xff ++#define SQM_CFG_CPU2SQM_DBG_FIFO_AFUL_TH_SHIFT 0 ++#define SQM_CFG_CPU2SQM_DBG_FIFO_AMTY_TH_MASK 0xff00 ++#define SQM_CFG_CPU2SQM_DBG_FIFO_AMTY_TH_SHIFT 8 ++ ++#define SQM_SQM_CONFIG_QPID_W_FIFO_REG_CFG_ADDR 0x45a0 ++#define SQM_CFG_CPU2SQM_QPID_W_FIFO_AFUL_TH_MASK 0xff ++#define SQM_CFG_CPU2SQM_QPID_W_FIFO_AFUL_TH_SHIFT 0 ++#define SQM_CFG_CPU2SQM_QPID_W_FIFO_AMTY_TH_MASK 0xff00 ++#define SQM_CFG_CPU2SQM_QPID_W_FIFO_AMTY_TH_SHIFT 8 ++ ++#define SQM_SQM_CONFIG_QPID_R_FIFO_REG_CFG_ADDR 0x45c0 ++#define SQM_CFG_CPU2SQM_QPID_R_FIFO_AFUL_TH_MASK 0xff ++#define SQM_CFG_CPU2SQM_QPID_R_FIFO_AFUL_TH_SHIFT 0 ++#define SQM_CFG_CPU2SQM_QPID_R_FIFO_AMTY_TH_MASK 0xff00 ++#define SQM_CFG_CPU2SQM_QPID_R_FIFO_AMTY_TH_SHIFT 8 ++ ++#define SQM_SQM_INT_STATE_REG_ADDR 0x45e0 ++#define SQM_SQM_FIFO_OVFL_ERR_MASK 0xf ++#define SQM_SQM_FIFO_OVFL_ERR_SHIFT 0 ++#define SQM_SQM_FIFO_UNFL_ERR_MASK 0xf0 ++#define SQM_SQM_FIFO_UNFL_ERR_SHIFT 4 ++#define SQM_SQM_FIFO_MTY_MASK 0xf00 ++#define SQM_SQM_FIFO_MTY_SHIFT 8 ++#define SQM_SQM_FIFO_AFUL_MASK 0xf000 ++#define SQM_SQM_FIFO_AFUL_SHIFT 12 ++#define SQM_SQM_SOP_EOP_NO_EQUAL_MASK 0x10000 ++#define SQM_SQM_SOP_EOP_NO_EQUAL_SHIFT 16 ++ ++#define SQM_SQM_FIFO_USED_CNT_REG_ADDR 0x4600 ++#define SQM_SQM_WQE_FIFO_USED_CNT_MASK 0x7f ++#define SQM_SQM_WQE_FIFO_USED_CNT_SHIFT 0 ++#define SQM_SQM_HEAD_FIFO_USED_CNT_MASK 0x3f80 ++#define SQM_SQM_HEAD_FIFO_USED_CNT_SHIFT 7 ++#define SQM_SQM_PTR_FIFO_USED_CNT_MASK 0x1fc000 ++#define SQM_SQM_PTR_FIFO_USED_CNT_SHIFT 14 ++#define SQM_SQM_DBG_FIFO_USED_CNT_MASK 0xfe00000 ++#define SQM_SQM_DBG_FIFO_USED_CNT_SHIFT 21 ++ ++#define SQM_SQM_DMA_REQUEST_LEN_REG_ADDR 0x4620 ++#define SQM_SQM_DMA_REQ_LEN_MASK 0x3ff ++#define SQM_SQM_DMA_REQ_LEN_SHIFT 0 ++ ++#define SQM_SQM_DMA_REQUEST_ADDR_REG_ADDR 0x4640 ++#define SQM_SQM_DMA_REQ_ADDR_MASK 0xffffffffffffffff ++#define SQM_SQM_DMA_REQ_ADDR_SHIFT 0 ++ ++#define SQM_SQM_STATUS_REG_NEXT_CID_ARRAY_ADDR 0x4660 ++#define SQM_SQM_STATUS_REG_NEXT_CID_ARRAY_SIZE 8 ++#define SQM_SQM_STATUS_REG_NEXT_CID_ARRAY_STRIDE 0x20 ++#define SQM_SQM_NEXT_CID_MASK 0xffff ++#define SQM_SQM_NEXT_CID_SHIFT 0 ++ ++#define SQM_SQM_WQE_OPCODE_ADDR 0x4760 ++#define SQM_SQM_WQE_OPCODE_MASK 0xff ++#define SQM_SQM_WQE_OPCODE_SHIFT 0 ++ ++#define SQM_SQM_WQE_WQEID_ADDR 0x4780 ++#define SQM_SQM_WQE_WQEID_MASK 0xffff ++#define SQM_SQM_WQE_WQEID_SHIFT 0 ++ ++#define SQM_SQM_WQE_R_VA_ADDR 0x47a0 ++#define SQM_SQM_WQE_R_VA_MASK 0xffffffffffffffff ++#define SQM_SQM_WQE_R_VA_SHIFT 0 ++ ++#define SQM_SQM_WQE_R_KEY_ADDR 0x47c0 ++#define SQM_SQM_WQE_R_KEY_MASK 0xffffffff ++#define SQM_SQM_WQE_R_KEY_SHIFT 0 ++ ++#define SQM_SQM_WQE_L_LEN_ADDR 0x47e0 ++#define SQM_SQM_WQE_L_LEN_MASK 0x7fffffff ++#define SQM_SQM_WQE_L_LEN_SHIFT 0 ++ ++#define SQM_SQM_WQE_L_VA_ADDR 0x4800 ++#define SQM_SQM_WQE_L_VA_MASK 0xffffffffffffffff ++#define SQM_SQM_WQE_L_VA_SHIFT 0 ++ ++#define SQM_SQM_WQE_L_KEY_ADDR 0x4820 ++#define SQM_SQM_WQE_L_KEY_MASK 0xffffffff ++#define SQM_SQM_WQE_L_KEY_SHIFT 0 ++ ++#define SQM_SQM_WQE_QPID_ADDR 0x4840 ++#define SQM_SQM_WQE_QPID_MASK 0x7 ++#define SQM_SQM_WQE_QPID_SHIFT 0 ++ ++#define SQM_SQM_DMA_IN_SOP_CNT_REG_ADDR 0x4860 ++#define SQM_SQM_DMA_IN_SOP_CNT_MASK 0xffffffffffffffff ++#define SQM_SQM_DMA_IN_SOP_CNT_SHIFT 0 ++ ++#define SQM_SQM_DMA_IN_EOP_CNT_REG_ADDR 0x4880 ++#define SQM_SQM_DMA_IN_EOP_CNT_MASK 0xffffffffffffffff ++#define SQM_SQM_DMA_IN_EOP_CNT_SHIFT 0 ++ ++#define SQM_SQM_DMA_IN_VLD_CNT_REG_ADDR 0x48a0 ++#define SQM_SQM_DMA_IN_VLD_CNT_MASK 0xffffffffffffffff ++#define SQM_SQM_DMA_IN_VLD_CNT_SHIFT 0 ++ ++#define SQM_SQM_DMA_REQ_CNT_REG_ADDR 0x48c0 ++#define SQM_SQM_DMA_REQ_CNT_MASK 0xffffffffffffffff ++#define SQM_SQM_DMA_REQ_CNT_SHIFT 0 ++ ++#define SQM_SQM_DMA_GNT_CNT_REG_ADDR 0x48e0 ++#define SQM_SQM_DMA_GNT_CNT_MASK 0xffffffffffffffff ++#define SQM_SQM_DMA_GNT_CNT_SHIFT 0 ++ ++#define SQM_SQM_MET_VLD_CNT_REG_ADDR 0x4900 ++#define SQM_SQM_MET_CNT_MASK 0xffffffffffffffff ++#define SQM_SQM_MET_CNT_SHIFT 0 ++ ++#define SQM_SQM_CONFIG_CAP_CFG_EN_ADDR 0x4920 ++#define SQM_CFG_CPU2SQM_CAP_EN_CLR_MASK 0x1 ++#define SQM_CFG_CPU2SQM_CAP_EN_CLR_SHIFT 0 ++#define SQM_CFG_CPU2SQM_CAP_QPID_EN_MASK 0x2 ++#define SQM_CFG_CPU2SQM_CAP_QPID_EN_SHIFT 1 ++#define SQM_CFG_CPU2SQM_CAP_OPCODE_EN_MASK 0x4 ++#define SQM_CFG_CPU2SQM_CAP_OPCODE_EN_SHIFT 2 ++#define SQM_CFG_CPU2SQM_CAP_QPID_MASK 0x38 ++#define SQM_CFG_CPU2SQM_CAP_QPID_SHIFT 3 ++#define SQM_CFG_CPU2SQM_CAP_OPCODE_MASK 0x3fc0 ++#define SQM_CFG_CPU2SQM_CAP_OPCODE_SHIFT 6 ++ ++#define SQM_SQM_DEBUG_INFO_STATE_REG_0_ADDR 0x4940 ++#define SQM_SQM2MET_DBG_OPCODE_MASK 0xff ++#define SQM_SQM2MET_DBG_OPCODE_SHIFT 0 ++#define SQM_SQM2MET_DBG_TX0_RX1_MASK 0x100 ++#define SQM_SQM2MET_DBG_TX0_RX1_SHIFT 8 ++#define SQM_SQM2MET_DBG_CAP_MASK 0x200 ++#define SQM_SQM2MET_DBG_CAP_SHIFT 9 ++#define SQM_SQM2MET_DBG_L_QPID_MASK 0x1c00 ++#define SQM_SQM2MET_DBG_L_QPID_SHIFT 10 ++#define SQM_SQM2MET_DBG_SN_MASK 0x1fffffe000 ++#define SQM_SQM2MET_DBG_SN_SHIFT 13 ++ ++#define SQM_SQM_DEBUG_INFO_STATE_REG_1_ADDR 0x4960 ++#define SQM_SQM2MET_DBG_MOD_IF_BM_MASK 0xffffffffffffffff ++#define SQM_SQM2MET_DBG_MOD_IF_BM_SHIFT 0 ++ ++#define SQM_SQM_DMA_REQ_COUNTER_REG_ADDR 0x4980 ++#define SQM_SQM_DMA_REQ_COUNTER_MASK 0xff ++#define SQM_SQM_DMA_REQ_COUNTER_SHIFT 0 ++ ++#define SQM_SQM_DMA_GNT_COUNTER_REG_ADDR 0x49a0 ++#define SQM_SQM_DMA_GNT_COUNTER_MASK 0xff ++#define SQM_SQM_DMA_GNT_COUNTER_SHIFT 0 ++ ++#define SQM_SQM_SQM2MET_COUNTER_REG_ADDR 0x49c0 ++#define SQM_SQM_SQM2MET_CNT_MASK 0xff ++#define SQM_SQM_SQM2MET_CNT_SHIFT 0 ++ ++#endif +diff --git a/providers/xscale/verbs.c b/providers/xscale/verbs.c +new file mode 100644 +index 0000000..937bed1 +--- /dev/null ++++ b/providers/xscale/verbs.c +@@ -0,0 +1,2816 @@ ++/* ++ * Copyright (c) 2021 - 2022, Shanghai Yunsilicon Technology Co., Ltd. ++ * All rights reserved. ++ */ ++ ++#include ++ ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++ ++#include ++#include ++#include ++#include ++#include ++ ++#include "xscale.h" ++#include "xsc-abi.h" ++#include "wqe.h" ++#include "xsc_hsi.h" ++ ++int xsc_single_threaded = 0; ++ ++static inline int is_xrc_tgt(int type) ++{ ++ return type == IBV_QPT_XRC_RECV; ++} ++ ++static void xsc_set_fw_version(struct ibv_device_attr *attr, union xsc_ib_fw_ver *fw_ver) ++{ ++ uint8_t ver_major = fw_ver->s.ver_major; ++ uint8_t ver_minor = fw_ver->s.ver_minor; ++ uint16_t ver_patch = fw_ver->s.ver_patch; ++ uint32_t ver_tweak = fw_ver->s.ver_tweak; ++ ++ if (ver_tweak == 0) { ++ snprintf(attr->fw_ver, sizeof(attr->fw_ver), "v%u.%u.%u", ++ ver_major, ver_minor, ver_patch); ++ } else { ++ snprintf(attr->fw_ver, sizeof(attr->fw_ver), "v%u.%u.%u+%u", ++ ver_major, ver_minor, ver_patch, ver_tweak); ++ } ++} ++ ++static int xsc_read_clock(struct ibv_context *context, uint64_t *cycles) ++{ ++ unsigned int clockhi, clocklo, clockhi1; ++ int i; ++ struct xsc_context *ctx = to_xctx(context); ++ ++ if (!ctx->hca_core_clock) ++ return EOPNOTSUPP; ++ ++ /* Handle wraparound */ ++ for (i = 0; i < 2; i++) { ++ clockhi = be32toh(mmio_read32_be(ctx->hca_core_clock)); ++ clocklo = be32toh(mmio_read32_be(ctx->hca_core_clock + 4)); ++ clockhi1 = be32toh(mmio_read32_be(ctx->hca_core_clock)); ++ if (clockhi == clockhi1) ++ break; ++ } ++ ++ *cycles = (uint64_t)clockhi << 32 | (uint64_t)clocklo; ++ ++ return 0; ++} ++ ++int xsc_query_rt_values(struct ibv_context *context, ++ struct ibv_values_ex *values) ++{ ++ uint32_t comp_mask = 0; ++ int err = 0; ++ ++ if (!check_comp_mask(values->comp_mask, IBV_VALUES_MASK_RAW_CLOCK)) ++ return EINVAL; ++ ++ if (values->comp_mask & IBV_VALUES_MASK_RAW_CLOCK) { ++ uint64_t cycles; ++ ++ err = xsc_read_clock(context, &cycles); ++ if (!err) { ++ values->raw_clock.tv_sec = 0; ++ values->raw_clock.tv_nsec = cycles; ++ comp_mask |= IBV_VALUES_MASK_RAW_CLOCK; ++ } ++ } ++ ++ values->comp_mask = comp_mask; ++ ++ return err; ++} ++ ++int xsc_query_port(struct ibv_context *context, uint8_t port, ++ struct ibv_port_attr *attr) ++{ ++ struct ibv_query_port cmd; ++ ++ return ibv_cmd_query_port(context, port, attr, &cmd, sizeof cmd); ++} ++ ++struct ibv_pd *xsc_alloc_pd(struct ibv_context *context) ++{ ++ struct ibv_alloc_pd cmd; ++ struct xsc_alloc_pd_resp resp; ++ struct xsc_pd *pd; ++ ++ pd = calloc(1, sizeof *pd); ++ if (!pd) ++ return NULL; ++ ++ if (ibv_cmd_alloc_pd(context, &pd->ibv_pd, &cmd, sizeof cmd, ++ &resp.ibv_resp, sizeof resp)) { ++ free(pd); ++ return NULL; ++ } ++ ++ atomic_init(&pd->refcount, 1); ++ pd->pdn = resp.pdn; ++ xsc_dbg(to_xctx(context)->dbg_fp, XSC_DBG_PD, "pd number:%u\n", pd->pdn); ++ ++ return &pd->ibv_pd; ++} ++ ++struct ibv_pd * ++xsc_alloc_parent_domain(struct ibv_context *context, ++ struct ibv_parent_domain_init_attr *attr) ++{ ++ struct xsc_parent_domain *xparent_domain; ++ ++ if (ibv_check_alloc_parent_domain(attr)) ++ return NULL; ++ ++ if (attr->comp_mask) { ++ errno = EINVAL; ++ return NULL; ++ } ++ ++ xparent_domain = calloc(1, sizeof(*xparent_domain)); ++ if (!xparent_domain) { ++ errno = ENOMEM; ++ return NULL; ++ } ++ ++ xparent_domain->xpd.xprotection_domain = to_xpd(attr->pd); ++ atomic_fetch_add(&xparent_domain->xpd.xprotection_domain->refcount, 1); ++ atomic_init(&xparent_domain->xpd.refcount, 1); ++ ++ ibv_initialize_parent_domain( ++ &xparent_domain->xpd.ibv_pd, ++ &xparent_domain->xpd.xprotection_domain->ibv_pd); ++ ++ return &xparent_domain->xpd.ibv_pd; ++} ++ ++static int xsc_dealloc_parent_domain(struct xsc_parent_domain *xparent_domain) ++{ ++ if (atomic_load(&xparent_domain->xpd.refcount) > 1) ++ return EBUSY; ++ ++ atomic_fetch_sub(&xparent_domain->xpd.xprotection_domain->refcount, 1); ++ ++ free(xparent_domain); ++ return 0; ++} ++ ++int xsc_free_pd(struct ibv_pd *pd) ++{ ++ int ret; ++ struct xsc_parent_domain *xparent_domain = to_xparent_domain(pd); ++ struct xsc_pd *xpd = to_xpd(pd); ++ ++ if (xparent_domain) ++ return xsc_dealloc_parent_domain(xparent_domain); ++ ++ if (atomic_load(&xpd->refcount) > 1) ++ return EBUSY; ++ ++ ret = ibv_cmd_dealloc_pd(pd); ++ if (ret) ++ return ret; ++ ++ xsc_dbg(to_xctx(pd->context)->dbg_fp, XSC_DBG_PD, "dealloc pd\n"); ++ free(xpd); ++ ++ return 0; ++} ++ ++struct ibv_mr *xsc_reg_mr(struct ibv_pd *pd, void *addr, size_t length, ++ uint64_t hca_va, int acc) ++{ ++ struct xsc_mr *mr; ++ struct ibv_reg_mr cmd; ++ int ret; ++ enum ibv_access_flags access = (enum ibv_access_flags)acc; ++ struct ib_uverbs_reg_mr_resp resp; ++ ++ mr = calloc(1, sizeof(*mr)); ++ if (!mr) ++ return NULL; ++ ++ ret = ibv_cmd_reg_mr(pd, addr, length, hca_va, access, ++ &mr->vmr, &cmd, sizeof(cmd), &resp, ++ sizeof resp); ++ if (ret) { ++ xsc_free_buf(&(mr->buf)); ++ free(mr); ++ return NULL; ++ } ++ mr->alloc_flags = acc; ++ ++ xsc_dbg(to_xctx(pd->context)->dbg_fp, XSC_DBG_MR, "lkey:%u, rkey:%u\n", ++ mr->vmr.ibv_mr.lkey, mr->vmr.ibv_mr.rkey); ++ ++ return &mr->vmr.ibv_mr; ++} ++ ++struct ibv_mr *xsc_alloc_null_mr(struct ibv_pd *pd) ++{ ++ struct xsc_mr *mr; ++ struct xsc_context *ctx = to_xctx(pd->context); ++ ++ if (ctx->dump_fill_mkey == XSC_INVALID_LKEY) { ++ errno = ENOTSUP; ++ return NULL; ++ } ++ ++ mr = calloc(1, sizeof(*mr)); ++ if (!mr) { ++ errno = ENOMEM; ++ return NULL; ++ } ++ ++ mr->vmr.ibv_mr.lkey = ctx->dump_fill_mkey; ++ ++ mr->vmr.ibv_mr.context = pd->context; ++ mr->vmr.ibv_mr.pd = pd; ++ mr->vmr.ibv_mr.addr = NULL; ++ mr->vmr.ibv_mr.length = SIZE_MAX; ++ mr->vmr.mr_type = IBV_MR_TYPE_NULL_MR; ++ ++ return &mr->vmr.ibv_mr; ++} ++ ++enum { ++ XSC_DM_ALLOWED_ACCESS = IBV_ACCESS_LOCAL_WRITE | ++ IBV_ACCESS_REMOTE_WRITE | ++ IBV_ACCESS_REMOTE_READ | ++ IBV_ACCESS_REMOTE_ATOMIC | ++ IBV_ACCESS_ZERO_BASED ++}; ++ ++struct ibv_mr *xsc_reg_dm_mr(struct ibv_pd *pd, struct ibv_dm *ibdm, ++ uint64_t dm_offset, size_t length, ++ unsigned int acc) ++{ ++ struct xsc_dm *dm = to_xdm(ibdm); ++ struct xsc_mr *mr; ++ int ret; ++ ++ if (acc & ~XSC_DM_ALLOWED_ACCESS) { ++ errno = EINVAL; ++ return NULL; ++ } ++ ++ mr = calloc(1, sizeof(*mr)); ++ if (!mr) { ++ errno = ENOMEM; ++ return NULL; ++ } ++ ++ ret = ibv_cmd_reg_dm_mr(pd, &dm->verbs_dm, dm_offset, length, acc, ++ &mr->vmr, NULL); ++ if (ret) { ++ free(mr); ++ return NULL; ++ } ++ ++ mr->alloc_flags = acc; ++ ++ return &mr->vmr.ibv_mr; ++} ++ ++int xsc_rereg_mr(struct verbs_mr *vmr, int flags, struct ibv_pd *pd, ++ void *addr, size_t length, int access) ++{ ++ struct ibv_rereg_mr cmd; ++ struct ib_uverbs_rereg_mr_resp resp; ++ ++ return ibv_cmd_rereg_mr(vmr, flags, addr, length, (uintptr_t)addr, ++ access, pd, &cmd, sizeof(cmd), &resp, ++ sizeof(resp)); ++} ++ ++int xsc_dereg_mr(struct verbs_mr *vmr) ++{ ++ int ret; ++ ++ if (vmr->mr_type == IBV_MR_TYPE_NULL_MR) ++ goto free; ++ ++ ret = ibv_cmd_dereg_mr(vmr); ++ if (ret) ++ return ret; ++ ++free: ++ free(vmr); ++ return 0; ++} ++ ++int xsc_round_up_power_of_two(long long sz) ++{ ++ long long ret; ++ ++ for (ret = 1; ret < sz; ret <<= 1) ++ ; /* nothing */ ++ ++ if (ret > INT_MAX) { ++ fprintf(stderr, "%s: roundup overflow\n", __func__); ++ return -ENOMEM; ++ } ++ ++ return (int)ret; ++} ++ ++static int align_queue_size(long long req) ++{ ++ return xsc_round_up_power_of_two(req); ++} ++ ++enum { ++ CREATE_CQ_SUPPORTED_WC_FLAGS = IBV_WC_STANDARD_FLAGS | ++ IBV_WC_EX_WITH_COMPLETION_TIMESTAMP | ++ IBV_WC_EX_WITH_CVLAN | ++ IBV_WC_EX_WITH_FLOW_TAG | ++ IBV_WC_EX_WITH_TM_INFO | ++ IBV_WC_EX_WITH_COMPLETION_TIMESTAMP_WALLCLOCK ++}; ++ ++enum { ++ CREATE_CQ_SUPPORTED_COMP_MASK = IBV_CQ_INIT_ATTR_MASK_FLAGS ++}; ++ ++enum { ++ CREATE_CQ_SUPPORTED_FLAGS = ++ IBV_CREATE_CQ_ATTR_SINGLE_THREADED | ++ IBV_CREATE_CQ_ATTR_IGNORE_OVERRUN ++}; ++ ++enum { ++ XSC_DV_CREATE_CQ_SUP_COMP_MASK = ++ (XSCDV_CQ_INIT_ATTR_MASK_COMPRESSED_CQE | ++ XSCDV_CQ_INIT_ATTR_MASK_FLAGS | ++ XSCDV_CQ_INIT_ATTR_MASK_CQE_SIZE), ++}; ++ ++static int xsc_cqe_depth_check(void) ++{ ++ char *e; ++ ++ e = getenv("XSC_CQE_DEPTH_CHECK"); ++ if (e && !strcmp(e, "n")) ++ return 0; ++ ++ return 1; ++} ++ ++static struct ibv_cq_ex *create_cq(struct ibv_context *context, ++ const struct ibv_cq_init_attr_ex *cq_attr, ++ int cq_alloc_flags, ++ struct xscdv_cq_init_attr *xcq_attr) ++{ ++ struct xsc_create_cq cmd = {}; ++ struct xsc_create_cq_resp resp = {}; ++ struct xsc_create_cq_ex cmd_ex = {}; ++ struct xsc_create_cq_ex_resp resp_ex = {}; ++ struct xsc_ib_create_cq *cmd_drv; ++ struct xsc_ib_create_cq_resp *resp_drv; ++ struct xsc_cq *cq; ++ int cqe_sz; ++ int ret; ++ int ncqe; ++ struct xsc_context *xctx = to_xctx(context); ++ bool use_ex = false; ++ char *env; ++ int i; ++ ++ if (!cq_attr->cqe) { ++ xsc_err("CQE invalid\n"); ++ errno = EINVAL; ++ return NULL; ++ } ++ ++ xsc_dbg(xctx->dbg_fp, XSC_DBG_CQ, "CQE number:%u\n", cq_attr->cqe); ++ ++ if (cq_attr->comp_mask & ~CREATE_CQ_SUPPORTED_COMP_MASK) { ++ xsc_err("Unsupported comp_mask for create cq\n"); ++ errno = EINVAL; ++ return NULL; ++ } ++ ++ if (cq_attr->comp_mask & IBV_CQ_INIT_ATTR_MASK_FLAGS && ++ cq_attr->flags & ~CREATE_CQ_SUPPORTED_FLAGS) { ++ xsc_err("Unsupported creation flags requested for create cq\n"); ++ errno = EINVAL; ++ return NULL; ++ } ++ ++ if (cq_attr->wc_flags & ~CREATE_CQ_SUPPORTED_WC_FLAGS) { ++ xsc_err("unsupported flgas:0x%lx\n", cq_attr->wc_flags); ++ errno = ENOTSUP; ++ return NULL; ++ } ++ ++ cq = calloc(1, sizeof *cq); ++ if (!cq) { ++ xsc_err("Alloc CQ failed\n"); ++ errno = ENOMEM; ++ return NULL; ++ } ++ ++ if (cq_attr->comp_mask & IBV_CQ_INIT_ATTR_MASK_FLAGS) { ++ if (cq_attr->flags & IBV_CREATE_CQ_ATTR_SINGLE_THREADED) ++ cq->flags |= XSC_CQ_FLAGS_SINGLE_THREADED; ++ if (cq_attr->flags & IBV_CREATE_CQ_ATTR_IGNORE_OVERRUN) ++ use_ex = true; ++ } ++ ++ xsc_dbg(xctx->dbg_fp, XSC_DBG_CQ, "use_ex:%u\n", use_ex); ++ ++ cmd_drv = use_ex ? &cmd_ex.drv_payload : &cmd.drv_payload; ++ resp_drv = use_ex ? &resp_ex.drv_payload : &resp.drv_payload; ++ ++ cq->cons_index = 0; ++ ++ if (xsc_spinlock_init(&cq->lock, !xsc_single_threaded)) ++ goto err; ++ ++ ncqe = align_queue_size(cq_attr->cqe); ++ if (ncqe < XSC_CQE_RING_DEPTH_MIN) { ++ xsc_dbg(xctx->dbg_fp, XSC_DBG_CQ, "CQE ring size %u is not enough, set it as %u\n", ++ ncqe, XSC_CQE_RING_DEPTH_MIN); ++ ncqe = XSC_CQE_RING_DEPTH_MIN; ++ } ++ ++ if (ncqe > XSC_CQE_RING_DEPTH_MAX) { ++ if (xsc_cqe_depth_check()) { ++ xsc_err("CQE ring size %u exceeds CQE ring depth %u, abort!\n", ++ ncqe, XSC_CQE_RING_DEPTH_MAX); ++ errno = EINVAL; ++ goto err_spl; ++ } else { ++ xsc_dbg(xctx->dbg_fp, XSC_DBG_CQ, "CQE ring size %u exceeds the MAX ring szie, set it as %u\n", ++ ncqe, XSC_CQE_RING_DEPTH_MAX); ++ ncqe = XSC_CQE_RING_DEPTH_MAX; ++ } ++ } ++ ++ cqe_sz = XSC_CQE_SIZE; ++ xsc_dbg(xctx->dbg_fp, XSC_DBG_CQ, "CQE number:%u, size:%u\n", ncqe, cqe_sz); ++ ++ if (xsc_alloc_cq_buf(to_xctx(context), cq, &cq->buf_a, ncqe, cqe_sz)) { ++ xsc_err("Alloc cq buffer failed.\n"); ++ errno = ENOMEM; ++ goto err_spl; ++ } ++ ++ cq->arm_sn = 0; ++ cq->cqe_sz = cqe_sz; ++ cq->flags = cq_alloc_flags; ++ ++ cmd_drv->buf_addr = (uintptr_t) cq->buf_a.buf; ++ cmd_drv->db_addr = (uintptr_t) cq->dbrec; ++ cmd_drv->cqe_size = cqe_sz; ++ ++ xsc_dbg(xctx->dbg_fp, XSC_DBG_CQ, "buf_addr:%p\n", cq->buf_a.buf); ++ ++ if (use_ex) { ++ struct ibv_cq_init_attr_ex cq_attr_ex = *cq_attr; ++ ++ cq_attr_ex.cqe = ncqe; ++ ret = ibv_cmd_create_cq_ex(context, &cq_attr_ex, &cq->verbs_cq, ++ &cmd_ex.ibv_cmd, sizeof(cmd_ex), ++ &resp_ex.ibv_resp, sizeof(resp_ex), ++ 0); ++ } else { ++ ret = ibv_cmd_create_cq(context, ncqe, cq_attr->channel, ++ cq_attr->comp_vector, ++ ibv_cq_ex_to_cq(&cq->verbs_cq.cq_ex), ++ &cmd.ibv_cmd, sizeof(cmd), ++ &resp.ibv_resp, sizeof(resp)); ++ } ++ ++ if (ret) { ++ xsc_err("ibv_cmd_create_cq failed,ret %d\n", ret); ++ goto err_buf; ++ } ++ ++ cq->active_buf = &cq->buf_a; ++ cq->resize_buf = NULL; ++ cq->cqn = resp_drv->cqn; ++ cq->stall_enable = to_xctx(context)->stall_enable; ++ cq->stall_adaptive_enable = to_xctx(context)->stall_adaptive_enable; ++ cq->stall_cycles = to_xctx(context)->stall_cycles; ++ ++ cq->db = xctx->cqm_reg_va + ++ (xctx->cqm_next_cid_reg & (xctx->page_size - 1)); ++ cq->armdb =xctx->cqm_armdb_va + ++ (xctx->cqm_armdb & (xctx->page_size - 1)); ++ cq->cqe_cnt = ncqe; ++ cq->log2_cq_ring_sz = xsc_ilog2(ncqe); ++ ++ for (i = 0; i < ncqe; i++) { ++ struct xsc_cqe *cqe = (struct xsc_cqe *)(cq->active_buf->buf + i * cq->cqe_sz); ++ cqe->owner = 1; ++ } ++ ++ env = getenv("XSC_DISABLE_FLUSH_ERROR"); ++ cq->disable_flush_error_cqe = env ? true : false; ++ xsc_dbg(xctx->dbg_fp, XSC_DBG_CQ, "cqe count:%u cqn:%u\n", cq->cqe_cnt, cq->cqn); ++ list_head_init(&cq->err_state_qp_list); ++ return &cq->verbs_cq.cq_ex; ++ ++ ++err_buf: ++ xsc_free_cq_buf(to_xctx(context), &cq->buf_a); ++ ++err_spl: ++ xsc_spinlock_destroy(&cq->lock); ++ ++err: ++ free(cq); ++ ++ return NULL; ++} ++ ++struct ibv_cq *xsc_create_cq(struct ibv_context *context, int cqe, ++ struct ibv_comp_channel *channel, ++ int comp_vector) ++{ ++ struct ibv_cq_ex *cq; ++ struct ibv_cq_init_attr_ex cq_attr = {.cqe = cqe, .channel = channel, ++ .comp_vector = comp_vector, ++ .wc_flags = IBV_WC_STANDARD_FLAGS}; ++ ++ if (cqe <= 0) { ++ errno = EINVAL; ++ return NULL; ++ } ++ ++ cq = create_cq(context, &cq_attr, 0, NULL); ++ return cq ? ibv_cq_ex_to_cq(cq) : NULL; ++} ++ ++struct ibv_cq_ex *xsc_create_cq_ex(struct ibv_context *context, ++ struct ibv_cq_init_attr_ex *cq_attr) ++{ ++ return create_cq(context, cq_attr, XSC_CQ_FLAGS_EXTENDED, NULL); ++} ++ ++struct ibv_cq_ex *xscdv_create_cq(struct ibv_context *context, ++ struct ibv_cq_init_attr_ex *cq_attr, ++ struct xscdv_cq_init_attr *xcq_attr) ++{ ++ struct ibv_cq_ex *cq; ++ ++ cq = create_cq(context, cq_attr, XSC_CQ_FLAGS_EXTENDED, xcq_attr); ++ if (!cq) ++ return NULL; ++ ++ verbs_init_cq(ibv_cq_ex_to_cq(cq), context, ++ cq_attr->channel, cq_attr->cq_context); ++ return cq; ++} ++ ++int xsc_resize_cq(struct ibv_cq *ibcq, int cqe) ++{ ++ struct xsc_cq *cq = to_xcq(ibcq); ++ ++ if (cqe < 0) { ++ errno = EINVAL; ++ return errno; ++ } ++ ++ xsc_spin_lock(&cq->lock); ++ cq->active_cqes = cq->verbs_cq.cq_ex.cqe; ++ /* currently we don't change cqe size */ ++ cq->resize_cqe_sz = cq->cqe_sz; ++ cq->resize_cqes = cq->verbs_cq.cq_ex.cqe; ++ xsc_spin_unlock(&cq->lock); ++ cq->resize_buf = NULL; ++ return 0; ++} ++ ++int xsc_destroy_cq(struct ibv_cq *cq) ++{ ++ int ret; ++ struct xsc_err_state_qp_node *tmp, *err_qp_node; ++ ++ xsc_dbg(to_xctx(cq->context)->dbg_fp, XSC_DBG_CQ, "\n"); ++ ret = ibv_cmd_destroy_cq(cq); ++ if (ret) ++ return ret; ++ ++ list_for_each_safe(&to_xcq(cq)->err_state_qp_list, err_qp_node, tmp, entry) { ++ list_del(&err_qp_node->entry); ++ free(err_qp_node); ++ } ++ ++ xsc_free_cq_buf(to_xctx(cq->context), to_xcq(cq)->active_buf); ++ free(to_xcq(cq)); ++ ++ return 0; ++} ++ ++static int xsc_calc_sq_size(struct xsc_context *ctx, ++ struct ibv_qp_init_attr_ex *attr, ++ struct xsc_qp *qp) ++{ ++ int wqe_size; ++ int wq_size; ++ int wq_size_min = 0; ++ ++ if (!attr->cap.max_send_wr) ++ return 0; ++ ++ wqe_size = 1 << (XSC_BASE_WQE_SHIFT + ctx->send_ds_shift); ++ ++ wq_size = xsc_round_up_power_of_two(attr->cap.max_send_wr); ++ ++ if (attr->qp_type != IBV_QPT_RAW_PACKET) ++ wq_size_min = XSC_SEND_WQE_RING_DEPTH_MIN; ++ if (wq_size < wq_size_min) { ++ xsc_dbg(ctx->dbg_fp, XSC_DBG_QP, "WQE size %u is not enough, set it as %u\n", ++ wq_size, wq_size_min); ++ wq_size = wq_size_min; ++ } ++ ++ if (wq_size > XSC_SEND_WQE_RING_DEPTH_MAX) { ++ xsc_dbg(ctx->dbg_fp, XSC_DBG_QP, ++ "WQE size %u exceeds WQE ring depth, set it as %u\n", ++ wq_size, XSC_SEND_WQE_RING_DEPTH_MAX); ++ wq_size = XSC_SEND_WQE_RING_DEPTH_MAX; ++ } ++ ++ qp->max_inline_data = attr->cap.max_inline_data; ++ qp->sq.wqe_cnt = wq_size; ++ qp->sq.ds_cnt = wq_size << ctx->send_ds_shift; ++ qp->sq.seg_cnt = 1 << ctx->send_ds_shift; ++ qp->sq.wqe_shift = XSC_BASE_WQE_SHIFT + ctx->send_ds_shift; ++ qp->sq.max_gs = attr->cap.max_send_sge; ++ qp->sq.max_post = qp->sq.wqe_cnt; ++ if (attr->cap.max_inline_data > ++ (qp->sq.seg_cnt - 2) * sizeof(struct xsc_wqe_data_seg)) ++ return -EINVAL; ++ ++ xsc_dbg(ctx->dbg_fp, XSC_DBG_QP, "Send WQE count:%u, max post:%u wqe shift:%u\n", ++ qp->sq.wqe_cnt, qp->sq.max_post, qp->sq.wqe_shift); ++ ++ return wqe_size * qp->sq.wqe_cnt; ++} ++ ++enum { ++ DV_CREATE_WQ_SUPPORTED_COMP_MASK = XSCDV_WQ_INIT_ATTR_MASK_STRIDING_RQ ++}; ++ ++static int xsc_calc_rwq_size(struct xsc_context *ctx, ++ struct xsc_rwq *rwq, ++ struct ibv_wq_init_attr *attr, ++ struct xscdv_wq_init_attr *xwq_attr) ++{ ++ size_t wqe_size; ++ int wq_size; ++ uint32_t num_scatter; ++ int is_mprq = 0; ++ int scat_spc; ++ ++ if (!attr->max_wr) ++ return -EINVAL; ++ if (xwq_attr) { ++ if (!check_comp_mask(xwq_attr->comp_mask, ++ DV_CREATE_WQ_SUPPORTED_COMP_MASK)) ++ return -EINVAL; ++ ++ is_mprq = !!(xwq_attr->comp_mask & ++ XSCDV_WQ_INIT_ATTR_MASK_STRIDING_RQ); ++ } ++ ++ /* TBD: check caps for RQ */ ++ num_scatter = max_t(uint32_t, attr->max_sge, 1); ++ wqe_size = sizeof(struct xsc_wqe_data_seg) * num_scatter + ++ sizeof(struct xsc_wqe_srq_next_seg) * is_mprq; ++ ++ if (rwq->wq_sig) ++ wqe_size += sizeof(struct xsc_rwqe_sig); ++ ++ if (wqe_size <= 0 || wqe_size > ctx->max_rq_desc_sz) ++ return -EINVAL; ++ ++ wqe_size = xsc_round_up_power_of_two(wqe_size); ++ wq_size = xsc_round_up_power_of_two(attr->max_wr) * wqe_size; ++ wq_size = max(wq_size, XSC_SEND_WQE_BB); ++ rwq->rq.wqe_cnt = wq_size / wqe_size; ++ rwq->rq.wqe_shift = xsc_ilog2(wqe_size); ++ rwq->rq.max_post = 1 << xsc_ilog2(wq_size / wqe_size); ++ scat_spc = wqe_size - ++ ((rwq->wq_sig) ? sizeof(struct xsc_rwqe_sig) : 0) - ++ is_mprq * sizeof(struct xsc_wqe_srq_next_seg); ++ rwq->rq.max_gs = scat_spc / sizeof(struct xsc_wqe_data_seg); ++ return wq_size; ++} ++ ++static int xsc_calc_rq_size(struct xsc_context *ctx, ++ struct ibv_qp_init_attr_ex *attr, ++ struct xsc_qp *qp) ++{ ++ int wqe_size; ++ int wq_size; ++ int wq_size_min = 0; ++ ++ if (!attr->cap.max_recv_wr) ++ return 0; ++ ++ wqe_size = 1 << (XSC_BASE_WQE_SHIFT + ctx->recv_ds_shift); ++ ++ wq_size = xsc_round_up_power_of_two(attr->cap.max_recv_wr); ++ /* due to hardware limit, rdma rq depth should be one send wqe ds num at least*/ ++ if (attr->qp_type != IBV_QPT_RAW_PACKET) ++ wq_size_min = ctx->send_ds_num; ++ if (wq_size < wq_size_min) { ++ xsc_dbg(ctx->dbg_fp, XSC_DBG_QP, "WQE size %u is not enough, set it as %u\n", ++ wq_size, wq_size_min); ++ wq_size = wq_size_min; ++ } ++ ++ if (wq_size > XSC_RECV_WQE_RING_DEPTH_MAX) { ++ xsc_dbg(ctx->dbg_fp, XSC_DBG_QP, ++ "WQE size %u exceeds WQE ring depth, set it as %u\n", ++ wq_size, XSC_RECV_WQE_RING_DEPTH_MAX); ++ wq_size = XSC_RECV_WQE_RING_DEPTH_MAX; ++ } ++ ++ qp->rq.wqe_cnt = wq_size; ++ qp->rq.ds_cnt = qp->rq.wqe_cnt << ctx->recv_ds_shift; ++ qp->rq.seg_cnt = 1 << ctx->recv_ds_shift; ++ qp->rq.wqe_shift = XSC_BASE_WQE_SHIFT + ctx->recv_ds_shift; ++ qp->rq.max_post = qp->rq.wqe_cnt; ++ qp->rq.max_gs = attr->cap.max_recv_sge; ++ ++ xsc_dbg(ctx->dbg_fp, XSC_DBG_QP, "Recv WQE count:%u, max post:%u wqe shift:%u\n", ++ qp->rq.wqe_cnt, qp->rq.max_post, qp->rq.wqe_shift); ++ return wqe_size * qp->rq.wqe_cnt; ++} ++ ++static int xsc_calc_wq_size(struct xsc_context *ctx, ++ struct ibv_qp_init_attr_ex *attr, ++ struct xsc_qp *qp) ++{ ++ int ret; ++ int result; ++ ++ ret = xsc_calc_sq_size(ctx, attr, qp); ++ if (ret < 0) ++ return ret; ++ ++ result = ret; ++ ++ ret = xsc_calc_rq_size(ctx, attr, qp); ++ if (ret < 0) ++ return ret; ++ ++ result += ret; ++ ++ qp->sq.offset = ret; ++ qp->rq.offset = 0; ++ ++ return result; ++} ++ ++static const char *qptype2key(enum ibv_qp_type type) ++{ ++ switch (type) { ++ case IBV_QPT_RC: return "HUGE_RC"; ++ case IBV_QPT_UC: return "HUGE_UC"; ++ case IBV_QPT_UD: return "HUGE_UD"; ++ case IBV_QPT_RAW_PACKET: return "HUGE_RAW_ETH"; ++ default: return "HUGE_NA"; ++ } ++} ++ ++static int xsc_alloc_qp_buf(struct ibv_context *context, ++ struct ibv_qp_init_attr_ex *attr, ++ struct xsc_qp *qp, ++ int size) ++{ ++ int err; ++ enum xsc_alloc_type alloc_type; ++ enum xsc_alloc_type default_alloc_type = XSC_ALLOC_TYPE_ANON; ++ const char *qp_huge_key; ++ ++ if (qp->sq.wqe_cnt) { ++ qp->sq.wrid = malloc(qp->sq.wqe_cnt * sizeof(*qp->sq.wrid)); ++ if (!qp->sq.wrid) { ++ errno = ENOMEM; ++ err = -1; ++ return err; ++ } ++ ++ qp->sq.wr_data = malloc(qp->sq.wqe_cnt * sizeof(*qp->sq.wr_data)); ++ if (!qp->sq.wr_data) { ++ errno = ENOMEM; ++ err = -1; ++ goto ex_wrid; ++ } ++ ++ qp->sq.wqe_head = malloc(qp->sq.wqe_cnt * sizeof(*qp->sq.wqe_head)); ++ if (!qp->sq.wqe_head) { ++ errno = ENOMEM; ++ err = -1; ++ goto ex_wrid; ++ } ++ ++ qp->sq.need_flush = malloc(qp->sq.wqe_cnt * sizeof(*qp->sq.need_flush)); ++ if (!qp->sq.need_flush) { ++ errno = ENOMEM; ++ err = -1; ++ goto ex_wrid; ++ } ++ memset(qp->sq.need_flush, 0, qp->sq.wqe_cnt); ++ ++ qp->sq.wr_opcode = malloc(qp->sq.wqe_cnt * sizeof(*qp->sq.wr_opcode)); ++ if (!qp->sq.wr_opcode) { ++ errno = ENOMEM; ++ err = -1; ++ goto ex_wrid; ++ } ++ } ++ ++ if (qp->rq.wqe_cnt) { ++ qp->rq.wrid = malloc(qp->rq.wqe_cnt * sizeof(uint64_t)); ++ if (!qp->rq.wrid) { ++ errno = ENOMEM; ++ err = -1; ++ goto ex_wrid; ++ } ++ } ++ ++ /* compatibility support */ ++ qp_huge_key = qptype2key(qp->ibv_qp->qp_type); ++ if (xsc_use_huge(qp_huge_key)) ++ default_alloc_type = XSC_ALLOC_TYPE_HUGE; ++ ++ xsc_get_alloc_type(to_xctx(context), XSC_QP_PREFIX, &alloc_type, ++ default_alloc_type); ++ ++ err = xsc_alloc_prefered_buf(to_xctx(context), &qp->buf, ++ align(qp->buf_size, to_xdev ++ (context->device)->page_size), ++ to_xdev(context->device)->page_size, ++ alloc_type, ++ XSC_QP_PREFIX); ++ ++ if (err) { ++ err = -ENOMEM; ++ goto ex_wrid; ++ } ++ ++ memset(qp->buf.buf, 0, qp->buf_size); ++ ++ if (attr->qp_type == IBV_QPT_RAW_PACKET || ++ qp->flags & XSC_QP_FLAGS_USE_UNDERLAY) { ++ size_t aligned_sq_buf_size = align(qp->sq_buf_size, ++ to_xdev(context->device)->page_size); ++ /* For Raw Packet QP, allocate a separate buffer for the SQ */ ++ err = xsc_alloc_prefered_buf(to_xctx(context), &qp->sq_buf, ++ aligned_sq_buf_size, ++ to_xdev(context->device)->page_size, ++ alloc_type, ++ XSC_QP_PREFIX); ++ if (err) { ++ err = -ENOMEM; ++ goto rq_buf; ++ } ++ ++ memset(qp->sq_buf.buf, 0, aligned_sq_buf_size); ++ } ++ ++ return 0; ++rq_buf: ++ xsc_free_actual_buf(to_xctx(context), &qp->buf); ++ex_wrid: ++ if (qp->rq.wrid) ++ free(qp->rq.wrid); ++ ++ if (qp->sq.wqe_head) ++ free(qp->sq.wqe_head); ++ ++ if (qp->sq.wr_data) ++ free(qp->sq.wr_data); ++ if (qp->sq.wrid) ++ free(qp->sq.wrid); ++ ++ if (qp->sq.need_flush) ++ free(qp->sq.need_flush); ++ ++ if (qp->sq.wr_opcode) ++ free(qp->sq.wr_opcode); ++ ++ return err; ++} ++ ++static void xsc_free_qp_buf(struct xsc_context *ctx, struct xsc_qp *qp) ++{ ++ xsc_free_actual_buf(ctx, &qp->buf); ++ ++ if (qp->sq_buf.buf) ++ xsc_free_actual_buf(ctx, &qp->sq_buf); ++ ++ if (qp->rq.wrid) ++ free(qp->rq.wrid); ++ ++ if (qp->sq.wqe_head) ++ free(qp->sq.wqe_head); ++ ++ if (qp->sq.wrid) ++ free(qp->sq.wrid); ++ ++ if (qp->sq.wr_data) ++ free(qp->sq.wr_data); ++ ++ if (qp->sq.need_flush) ++ free(qp->sq.need_flush); ++ ++ if (qp->sq.wr_opcode) ++ free(qp->sq.wr_opcode); ++} ++ ++enum { ++ XSC_CREATE_QP_SUP_COMP_MASK = IBV_QP_INIT_ATTR_PD | ++ IBV_QP_INIT_ATTR_CREATE_FLAGS ++}; ++ ++enum { ++ XSC_DV_CREATE_QP_SUP_COMP_MASK = XSCDV_QP_INIT_ATTR_MASK_QP_CREATE_FLAGS | ++ XSCDV_QP_INIT_ATTR_MASK_DC ++}; ++ ++enum { ++ XSC_CREATE_QP_EX2_COMP_MASK = (IBV_QP_INIT_ATTR_CREATE_FLAGS | ++ IBV_QP_INIT_ATTR_MAX_TSO_HEADER | ++ IBV_QP_INIT_ATTR_IND_TABLE | ++ IBV_QP_INIT_ATTR_RX_HASH), ++}; ++ ++enum { ++ XSCDV_QP_CREATE_SUP_FLAGS = ++ (XSCDV_QP_CREATE_TUNNEL_OFFLOADS | ++ XSCDV_QP_CREATE_TIR_ALLOW_SELF_LOOPBACK_UC | ++ XSCDV_QP_CREATE_TIR_ALLOW_SELF_LOOPBACK_MC | ++ XSCDV_QP_CREATE_DISABLE_SCATTER_TO_CQE | ++ XSCDV_QP_CREATE_ALLOW_SCATTER_TO_CQE), ++}; ++ ++static struct ibv_qp *create_qp(struct ibv_context *context, ++ struct ibv_qp_init_attr_ex *attr, ++ struct xscdv_qp_init_attr *xqp_attr) ++{ ++ struct xsc_create_qp cmd; ++ struct xsc_create_qp_resp resp; ++ struct xsc_create_qp_ex_resp resp_ex; ++ struct xsc_qp *qp; ++ int ret; ++ struct xsc_context *ctx = to_xctx(context); ++ struct ibv_qp *ibqp; ++ struct xsc_parent_domain *xparent_domain; ++ struct xsc_device *xdev = to_xdev(context->device); ++ ++ xsc_dbg(ctx->dbg_fp, XSC_DBG_QP, "comp_mask=0x%x.\n", attr->comp_mask); ++ ++ if (attr->comp_mask & ~XSC_CREATE_QP_SUP_COMP_MASK) { ++ xsc_err("Not supported comp_mask:0x%x\n", attr->comp_mask); ++ return NULL; ++ } ++ ++ qp = calloc(1, sizeof(*qp)); ++ if (!qp) { ++ xsc_err("QP calloc failed\n"); ++ return NULL; ++ } ++ ++ ibqp = (struct ibv_qp *)&qp->verbs_qp; ++ qp->ibv_qp = ibqp; ++ ++ memset(&cmd, 0, sizeof(cmd)); ++ memset(&resp, 0, sizeof(resp)); ++ memset(&resp_ex, 0, sizeof(resp_ex)); ++ ++ ret = xsc_calc_wq_size(ctx, attr, qp); ++ if (ret < 0) { ++ xsc_err("Calculate WQ size failed\n"); ++ errno = EINVAL; ++ goto err; ++ } ++ ++ qp->buf_size = ret; ++ qp->sq_buf_size = 0; ++ ++ if (xsc_alloc_qp_buf(context, attr, qp, ret)) { ++ xsc_err("Alloc QP buffer failed\n"); ++ errno = ENOMEM; ++ goto err; ++ } ++ ++ qp->sq_start = qp->buf.buf + qp->sq.offset; ++ qp->rq_start = qp->buf.buf + qp->rq.offset; ++ qp->sq.qend = qp->buf.buf + qp->sq.offset + ++ (qp->sq.wqe_cnt << qp->sq.wqe_shift); ++ ++ xsc_dbg(ctx->dbg_fp, XSC_DBG_QP, "sq start:%p, sq qend:%p, buffer size:%u\n", ++ qp->sq_start, qp->sq.qend, qp->buf_size); ++ ++ xsc_init_qp_indices(qp); ++ ++ if (xsc_spinlock_init_pd(&qp->sq.lock, attr->pd) || ++ xsc_spinlock_init_pd(&qp->rq.lock, attr->pd)) ++ goto err_free_qp_buf; ++ ++ cmd.buf_addr = (uintptr_t) qp->buf.buf; ++ cmd.db_addr = (uintptr_t) qp->db; ++ cmd.sq_wqe_count = qp->sq.ds_cnt; ++ cmd.rq_wqe_count = qp->rq.ds_cnt; ++ cmd.rq_wqe_shift = qp->rq.wqe_shift; ++ ++ if (attr->qp_type == IBV_QPT_RAW_PACKET) { ++ if (attr->comp_mask & IBV_QP_INIT_ATTR_CREATE_FLAGS) { ++ if (attr->create_flags & XSC_QP_CREATE_RAWPACKET_TSO) { ++ cmd.flags |= XSC_QP_FLAG_RAWPACKET_TSO;/*revert to command flags*/ ++ xsc_dbg(ctx->dbg_fp, XSC_DBG_QP, ++ "revert create_flags(0x%x) to cmd_flags(0x%x)\n", ++ attr->create_flags, cmd.flags); ++ } ++ ++ if (attr->create_flags & XSC_QP_CREATE_RAWPACKET_TX) { ++ cmd.flags |= XSC_QP_FLAG_RAWPACKET_TX;/*revert to command flags*/ ++ xsc_dbg(ctx->dbg_fp, XSC_DBG_QP, ++ "revert create_flags(0x%x) to cmd_flags(0x%x)\n", ++ attr->create_flags, cmd.flags); ++ } ++ attr->comp_mask &= ~IBV_QP_INIT_ATTR_CREATE_FLAGS; ++ } ++ } ++ ++ pthread_mutex_lock(&ctx->qp_table_mutex); ++ ++ xparent_domain = to_xparent_domain(attr->pd); ++ ++ ret = ibv_cmd_create_qp_ex(context, &qp->verbs_qp, attr, ++ &cmd.ibv_cmd, sizeof(cmd), ++ &resp.ibv_resp, sizeof(resp)); ++ if (ret) { ++ xsc_err("ibv_cmd_create_qp_ex failed,ret %d\n", ret); ++ errno = ret; ++ goto err_free_uidx; ++ } ++ ++ if (qp->sq.wqe_cnt || qp->rq.wqe_cnt) { ++ ret = xsc_store_qp(ctx, ibqp->qp_num, qp); ++ if (ret) { ++ xsc_err("xsc_store_qp failed,ret %d\n", ret); ++ errno = EINVAL; ++ goto err_destroy; ++ } ++ } ++ ++ pthread_mutex_unlock(&ctx->qp_table_mutex); ++ ++ qp->rq.max_post = qp->rq.wqe_cnt; ++ ++ if (attr->sq_sig_all) ++ qp->sq_signal_bits = 1; ++ else ++ qp->sq_signal_bits = 0; ++ ++ attr->cap.max_send_wr = qp->sq.max_post; ++ attr->cap.max_recv_wr = qp->rq.max_post; ++ attr->cap.max_recv_sge = qp->rq.max_gs; ++ ++ qp->rsc.type = XSC_RSC_TYPE_QP; ++ qp->rsc.rsn = ibqp->qp_num; ++ ++ if (xparent_domain) ++ atomic_fetch_add(&xparent_domain->xpd.refcount, 1); ++ ++ qp->rqn = ibqp->qp_num; ++ qp->sqn = ibqp->qp_num; ++ ++ xsc_dbg(ctx->dbg_fp, XSC_DBG_QP, "qp rqn:%u, sqn:%u\n", qp->rqn, qp->sqn); ++ qp->sq.db = ctx->sqm_reg_va + (ctx->qpm_tx_db & (xdev->page_size - 1)); ++ qp->rq.db = ctx->rqm_reg_va + (ctx->qpm_rx_db & (xdev->page_size - 1)); ++ ++ return ibqp; ++ ++err_destroy: ++ ibv_cmd_destroy_qp(ibqp); ++ ++err_free_uidx: ++ pthread_mutex_unlock(&to_xctx(context)->qp_table_mutex); ++ ++err_free_qp_buf: ++ xsc_free_qp_buf(ctx, qp); ++ ++err: ++ free(qp); ++ ++ return NULL; ++} ++ ++struct ibv_qp *xsc_create_qp(struct ibv_pd *pd, ++ struct ibv_qp_init_attr *attr) ++{ ++ struct ibv_qp *qp; ++ struct ibv_qp_init_attr_ex attrx; ++ ++ memset(&attrx, 0, sizeof(attrx)); ++ memcpy(&attrx, attr, sizeof(*attr)); ++ attrx.comp_mask = IBV_QP_INIT_ATTR_PD; ++ attrx.pd = pd; ++ qp = create_qp(pd->context, &attrx, NULL); ++ if (qp) ++ memcpy(attr, &attrx, sizeof(*attr)); ++ ++ return qp; ++} ++ ++static void xsc_lock_cqs(struct ibv_qp *qp) ++{ ++ struct xsc_cq *send_cq = to_xcq(qp->send_cq); ++ struct xsc_cq *recv_cq = to_xcq(qp->recv_cq); ++ ++ if (send_cq && recv_cq) { ++ if (send_cq == recv_cq) { ++ xsc_spin_lock(&send_cq->lock); ++ } else if (send_cq->cqn < recv_cq->cqn) { ++ xsc_spin_lock(&send_cq->lock); ++ xsc_spin_lock(&recv_cq->lock); ++ } else { ++ xsc_spin_lock(&recv_cq->lock); ++ xsc_spin_lock(&send_cq->lock); ++ } ++ } else if (send_cq) { ++ xsc_spin_lock(&send_cq->lock); ++ } else if (recv_cq) { ++ xsc_spin_lock(&recv_cq->lock); ++ } ++} ++ ++static void xsc_unlock_cqs(struct ibv_qp *qp) ++{ ++ struct xsc_cq *send_cq = to_xcq(qp->send_cq); ++ struct xsc_cq *recv_cq = to_xcq(qp->recv_cq); ++ ++ if (send_cq && recv_cq) { ++ if (send_cq == recv_cq) { ++ xsc_spin_unlock(&send_cq->lock); ++ } else if (send_cq->cqn < recv_cq->cqn) { ++ xsc_spin_unlock(&recv_cq->lock); ++ xsc_spin_unlock(&send_cq->lock); ++ } else { ++ xsc_spin_unlock(&send_cq->lock); ++ xsc_spin_unlock(&recv_cq->lock); ++ } ++ } else if (send_cq) { ++ xsc_spin_unlock(&send_cq->lock); ++ } else if (recv_cq) { ++ xsc_spin_unlock(&recv_cq->lock); ++ } ++} ++ ++int xsc_destroy_qp(struct ibv_qp *ibqp) ++{ ++ struct xsc_qp *qp = to_xqp(ibqp); ++ struct xsc_context *ctx = to_xctx(ibqp->context); ++ int ret; ++ struct xsc_parent_domain *xparent_domain = to_xparent_domain(ibqp->pd); ++ struct xsc_err_state_qp_node *tmp, *err_rq_node, *err_sq_node; ++ ++ xsc_dbg(ctx->dbg_fp, XSC_DBG_QP, "\n"); ++ ++ pthread_mutex_lock(&ctx->qp_table_mutex); ++ ++ ret = ibv_cmd_destroy_qp(ibqp); ++ if (ret) { ++ pthread_mutex_unlock(&ctx->qp_table_mutex); ++ return ret; ++ } ++ ++ xsc_lock_cqs(ibqp); ++ ++ list_for_each_safe(&to_xcq(ibqp->recv_cq)->err_state_qp_list, err_rq_node, tmp, entry) { ++ if (err_rq_node->qp_id == qp->rsc.rsn) { ++ list_del(&err_rq_node->entry); ++ free(err_rq_node); ++ } ++ } ++ ++ list_for_each_safe(&to_xcq(ibqp->send_cq)->err_state_qp_list, err_sq_node, tmp, entry) { ++ if (err_sq_node->qp_id == qp->rsc.rsn) { ++ list_del(&err_sq_node->entry); ++ free(err_sq_node); ++ } ++ } ++ ++ __xsc_cq_clean(to_xcq(ibqp->recv_cq), qp->rsc.rsn); ++ if (ibqp->send_cq != ibqp->recv_cq) ++ __xsc_cq_clean(to_xcq(ibqp->send_cq), qp->rsc.rsn); ++ ++ if (qp->sq.wqe_cnt || qp->rq.wqe_cnt) ++ xsc_clear_qp(ctx, ibqp->qp_num); ++ ++ xsc_unlock_cqs(ibqp); ++ pthread_mutex_unlock(&ctx->qp_table_mutex); ++ ++ xsc_free_qp_buf(ctx, qp); ++ ++ if (xparent_domain) ++ atomic_fetch_sub(&xparent_domain->xpd.refcount, 1); ++ ++ free(qp); ++ ++ return 0; ++} ++ ++int xsc_query_qp(struct ibv_qp *ibqp, struct ibv_qp_attr *attr, ++ int attr_mask, struct ibv_qp_init_attr *init_attr) ++{ ++ struct ibv_query_qp cmd; ++ struct xsc_qp *qp = to_xqp(ibqp); ++ int ret; ++ ++ xsc_dbg(to_xctx(ibqp->context)->dbg_fp, XSC_DBG_QP, "\n"); ++ ++ if (qp->rss_qp) ++ return ENOSYS; ++ ++ ret = ibv_cmd_query_qp(ibqp, attr, attr_mask, init_attr, &cmd, sizeof(cmd)); ++ if (ret) ++ return ret; ++ ++ init_attr->cap.max_send_wr = qp->sq.max_post; ++ init_attr->cap.max_send_sge = qp->sq.max_gs; ++ init_attr->cap.max_inline_data = qp->max_inline_data; ++ ++ attr->cap = init_attr->cap; ++ ++ return 0; ++} ++ ++enum { ++ XSC_MODIFY_QP_EX_ATTR_MASK = IBV_QP_RATE_LIMIT, ++}; ++ ++int xsc_modify_qp(struct ibv_qp *qp, struct ibv_qp_attr *attr, ++ int attr_mask) ++{ ++ struct ibv_modify_qp cmd = {}; ++ struct xsc_qp *xqp = to_xqp(qp); ++ int ret; ++ ++ xsc_dbg(to_xctx(qp->context)->dbg_fp, XSC_DBG_QP, "\n"); ++ ret = ibv_cmd_modify_qp(qp, attr, attr_mask, ++ &cmd, sizeof(cmd)); ++ ++ if (!ret && (attr_mask & IBV_QP_STATE) && ++ attr->qp_state == IBV_QPS_RESET) { ++ if (qp->recv_cq) { ++ xsc_cq_clean(to_xcq(qp->recv_cq), xqp->rsc.rsn); ++ } ++ if (qp->send_cq != qp->recv_cq && qp->send_cq) ++ xsc_cq_clean(to_xcq(qp->send_cq), ++ to_xqp(qp)->rsc.rsn); ++ ++ xsc_init_qp_indices(xqp); ++ } ++ ++ if (!ret && (attr_mask & IBV_QP_STATE)) ++ qp->state = attr->qp_state; ++ ++ /*workaround: generate flush err cqe if qp status turns to ERR*/ ++ if (!ret && (attr_mask & IBV_QP_STATE)) ++ ret = xsc_err_state_qp(qp, attr->cur_qp_state, attr->qp_state); ++ ++ return ret; ++} ++ ++int xsc_modify_qp_rate_limit(struct ibv_qp *qp, ++ struct ibv_qp_rate_limit_attr *attr) ++{ ++ struct ibv_qp_attr qp_attr = {}; ++ struct ib_uverbs_ex_modify_qp_resp resp = {}; ++ struct xsc_modify_qp cmd = {}; ++ struct xsc_context *xctx = to_xctx(qp->context); ++ int ret; ++ ++ if (attr->comp_mask) ++ return EINVAL; ++ ++ if ((attr->max_burst_sz || ++ attr->typical_pkt_sz) && ++ (!attr->rate_limit || ++ !(xctx->packet_pacing_caps.cap_flags & ++ XSC_IB_PP_SUPPORT_BURST))) ++ return EINVAL; ++ ++ cmd.burst_info.max_burst_sz = attr->max_burst_sz; ++ cmd.burst_info.typical_pkt_sz = attr->typical_pkt_sz; ++ qp_attr.rate_limit = attr->rate_limit; ++ ++ ret = ibv_cmd_modify_qp_ex(qp, &qp_attr, IBV_QP_RATE_LIMIT, ++ &cmd.ibv_cmd, ++ sizeof(cmd), ++ &resp, ++ sizeof(resp)); ++ ++ return ret; ++} ++ ++/* ++ * IB spec version 1.3. Table 224 Rate to xsc rate ++ * conversion table on best effort basis. ++ */ ++static const uint8_t ib_to_xsc_rate_table[] = { ++ 0, /* Invalid to unlimited */ ++ 0, /* Invalid to unlimited */ ++ 7, /* 2.5 Gbps */ ++ 8, /* 10Gbps */ ++ 9, /* 30Gbps */ ++ 10, /* 5 Gbps */ ++ 11, /* 20 Gbps */ ++ 12, /* 40 Gbps */ ++ 13, /* 60 Gbps */ ++ 14, /* 80 Gbps */ ++ 15, /* 120 Gbps */ ++ 11, /* 14 Gbps to 20 Gbps */ ++ 13, /* 56 Gbps to 60 Gbps */ ++ 15, /* 112 Gbps to 120 Gbps */ ++ 0, /* 168 Gbps to unlimited */ ++ 9, /* 25 Gbps to 30 Gbps */ ++ 15, /* 100 Gbps to 120 Gbps */ ++ 0, /* 200 Gbps to unlimited */ ++ 0, /* 300 Gbps to unlimited */ ++}; ++ ++static uint8_t ah_attr_to_xsc_rate(enum ibv_rate ah_static_rate) ++{ ++ if (ah_static_rate >= ARRAY_SIZE(ib_to_xsc_rate_table)) ++ return 0; ++ return ib_to_xsc_rate_table[ah_static_rate]; ++} ++ ++#define RROCE_UDP_SPORT_MIN 0xC000 ++#define RROCE_UDP_SPORT_MAX 0xFFFF ++struct ibv_ah *xsc_create_ah(struct ibv_pd *pd, struct ibv_ah_attr *attr) ++{ ++ struct xsc_context *ctx = to_xctx(pd->context); ++ struct ibv_port_attr port_attr; ++ struct xsc_ah *ah; ++ uint8_t static_rate; ++ uint32_t gid_type; ++ __be32 tmp; ++ uint8_t grh; ++ bool is_eth; ++ bool grh_req; ++ ++ if (attr->port_num < 1 || attr->port_num > ctx->num_ports) ++ return NULL; ++ ++ if (ctx->cached_link_layer[attr->port_num - 1]) { ++ is_eth = ctx->cached_link_layer[attr->port_num - 1] == ++ IBV_LINK_LAYER_ETHERNET; ++ grh_req = ctx->cached_port_flags[attr->port_num - 1] & ++ IBV_QPF_GRH_REQUIRED; ++ } else { ++ if (ibv_query_port(pd->context, attr->port_num, &port_attr)) ++ return NULL; ++ ++ is_eth = port_attr.link_layer == IBV_LINK_LAYER_ETHERNET; ++ grh_req = port_attr.flags & IBV_QPF_GRH_REQUIRED; ++ } ++ ++ if (unlikely((!attr->is_global) && (is_eth || grh_req))) { ++ errno = EINVAL; ++ return NULL; ++ } ++ ++ ah = calloc(1, sizeof *ah); ++ if (!ah) ++ return NULL; ++ ++ static_rate = ah_attr_to_xsc_rate(attr->static_rate); ++ if (is_eth) { ++ if (ibv_query_gid_type(pd->context, attr->port_num, ++ attr->grh.sgid_index, &gid_type)) ++ goto err; ++ ++ if (gid_type == IBV_GID_TYPE_ROCE_V2) ++ ah->av.rlid = htobe16(rand() % (RROCE_UDP_SPORT_MAX + 1 ++ - RROCE_UDP_SPORT_MIN) ++ + RROCE_UDP_SPORT_MIN); ++ /* Since RoCE packets must contain GRH, this bit is reserved ++ * for RoCE and shouldn't be set. ++ */ ++ grh = 0; ++ ah->av.stat_rate_sl = (static_rate << 4) | ((attr->sl & 0x7) << 1); ++ } else { ++ ah->av.fl_mlid = attr->src_path_bits & 0x7f; ++ ah->av.rlid = htobe16(attr->dlid); ++ grh = 1; ++ ah->av.stat_rate_sl = (static_rate << 4) | (attr->sl & 0xf); ++ } ++ if (attr->is_global) { ++ ah->av.tclass = attr->grh.traffic_class; ++ ah->av.hop_limit = attr->grh.hop_limit; ++ tmp = htobe32((grh << 30) | ++ ((attr->grh.sgid_index & 0xff) << 20) | ++ (attr->grh.flow_label & 0xfffff)); ++ ah->av.grh_gid_fl = tmp; ++ memcpy(ah->av.rgid, attr->grh.dgid.raw, 16); ++ } ++ ++ if (is_eth) { ++ if (ctx->cmds_supp_uhw & XSC_USER_CMDS_SUPP_UHW_CREATE_AH) { ++ struct xsc_create_ah_resp resp = {}; ++ ++ if (ibv_cmd_create_ah(pd, &ah->ibv_ah, attr, &resp.ibv_resp, sizeof(resp))) ++ goto err; ++ ++ ah->kern_ah = true; ++ memcpy(ah->av.rmac, resp.dmac, ETHERNET_LL_SIZE); ++ } else { ++ uint16_t vid; ++ ++ if (ibv_resolve_eth_l2_from_gid(pd->context, attr, ++ ah->av.rmac, &vid)) ++ goto err; ++ } ++ } ++ ++ return &ah->ibv_ah; ++err: ++ free(ah); ++ return NULL; ++} ++ ++int xsc_destroy_ah(struct ibv_ah *ah) ++{ ++ struct xsc_ah *xah = to_xah(ah); ++ int err; ++ ++ if (xah->kern_ah) { ++ err = ibv_cmd_destroy_ah(ah); ++ if (err) ++ return err; ++ } ++ ++ free(xah); ++ return 0; ++} ++ ++int xsc_attach_mcast(struct ibv_qp *qp, const union ibv_gid *gid, uint16_t lid) ++{ ++ return ibv_cmd_attach_mcast(qp, gid, lid); ++} ++ ++int xsc_detach_mcast(struct ibv_qp *qp, const union ibv_gid *gid, uint16_t lid) ++{ ++ return ibv_cmd_detach_mcast(qp, gid, lid); ++} ++ ++struct ibv_qp *xsc_create_qp_ex(struct ibv_context *context, ++ struct ibv_qp_init_attr_ex *attr) ++{ ++ return create_qp(context, attr, NULL); ++} ++ ++struct ibv_qp *xscdv_create_qp(struct ibv_context *context, ++ struct ibv_qp_init_attr_ex *qp_attr, ++ struct xscdv_qp_init_attr *xqp_attr) ++{ ++ return create_qp(context, qp_attr, xqp_attr); ++} ++ ++struct ibv_xrcd * ++xsc_open_xrcd(struct ibv_context *context, ++ struct ibv_xrcd_init_attr *xrcd_init_attr) ++{ ++ int err; ++ struct verbs_xrcd *xrcd; ++ struct ibv_open_xrcd cmd = {}; ++ struct ib_uverbs_open_xrcd_resp resp = {}; ++ ++ xrcd = calloc(1, sizeof(*xrcd)); ++ if (!xrcd) ++ return NULL; ++ ++ err = ibv_cmd_open_xrcd(context, xrcd, sizeof(*xrcd), xrcd_init_attr, ++ &cmd, sizeof(cmd), &resp, sizeof(resp)); ++ if (err) { ++ free(xrcd); ++ return NULL; ++ } ++ ++ return &xrcd->xrcd; ++} ++ ++int xsc_close_xrcd(struct ibv_xrcd *ib_xrcd) ++{ ++ struct verbs_xrcd *xrcd = container_of(ib_xrcd, struct verbs_xrcd, xrcd); ++ int ret; ++ ++ ret = ibv_cmd_close_xrcd(xrcd); ++ if (!ret) ++ free(xrcd); ++ ++ return ret; ++} ++ ++int xsc_query_device_ex(struct ibv_context *context, ++ const struct ibv_query_device_ex_input *input, ++ struct ibv_device_attr_ex *attr, ++ size_t attr_size) ++{ ++ struct xsc_context *xctx = to_xctx(context); ++ struct xsc_query_device_ex_resp resp = {}; ++ size_t resp_size = ++ (xctx->cmds_supp_uhw & XSC_USER_CMDS_SUPP_UHW_QUERY_DEVICE) ? ++ sizeof(resp) : ++ sizeof(resp.ibv_resp); ++ struct ibv_device_attr *a; ++ union xsc_ib_fw_ver raw_fw_ver; ++ int err; ++ ++ raw_fw_ver.data = 0; ++ err = ibv_cmd_query_device_any(context, input, attr, attr_size, ++ &resp.ibv_resp, &resp_size); ++ if (err) ++ return err; ++ ++ if (attr_size >= offsetofend(struct ibv_device_attr_ex, tso_caps)) { ++ attr->tso_caps.max_tso = resp.tso_caps.max_tso; ++ attr->tso_caps.supported_qpts = resp.tso_caps.supported_qpts; ++ } ++ if (attr_size >= offsetofend(struct ibv_device_attr_ex, rss_caps)) { ++ attr->rss_caps.rx_hash_fields_mask = ++ resp.rss_caps.rx_hash_fields_mask; ++ attr->rss_caps.rx_hash_function = ++ resp.rss_caps.rx_hash_function; ++ } ++ if (attr_size >= ++ offsetofend(struct ibv_device_attr_ex, packet_pacing_caps)) { ++ attr->packet_pacing_caps.qp_rate_limit_min = ++ resp.packet_pacing_caps.qp_rate_limit_min; ++ attr->packet_pacing_caps.qp_rate_limit_max = ++ resp.packet_pacing_caps.qp_rate_limit_max; ++ attr->packet_pacing_caps.supported_qpts = ++ resp.packet_pacing_caps.supported_qpts; ++ } ++ ++ if (resp.xsc_ib_support_multi_pkt_send_wqes & XSC_IB_ALLOW_MPW) ++ xctx->vendor_cap_flags |= XSC_VENDOR_CAP_FLAGS_MPW_ALLOWED; ++ ++ if (resp.xsc_ib_support_multi_pkt_send_wqes & XSC_IB_SUPPORT_EMPW) ++ xctx->vendor_cap_flags |= XSC_VENDOR_CAP_FLAGS_ENHANCED_MPW; ++ ++ xctx->cqe_comp_caps.max_num = resp.cqe_comp_caps.max_num; ++ xctx->cqe_comp_caps.supported_format = resp.cqe_comp_caps.supported_format; ++ xctx->sw_parsing_caps.sw_parsing_offloads = ++ resp.sw_parsing_caps.sw_parsing_offloads; ++ xctx->sw_parsing_caps.supported_qpts = ++ resp.sw_parsing_caps.supported_qpts; ++ xctx->striding_rq_caps.min_single_stride_log_num_of_bytes = ++ resp.striding_rq_caps.min_single_stride_log_num_of_bytes; ++ xctx->striding_rq_caps.max_single_stride_log_num_of_bytes = ++ resp.striding_rq_caps.max_single_stride_log_num_of_bytes; ++ xctx->striding_rq_caps.min_single_wqe_log_num_of_strides = ++ resp.striding_rq_caps.min_single_wqe_log_num_of_strides; ++ xctx->striding_rq_caps.max_single_wqe_log_num_of_strides = ++ resp.striding_rq_caps.max_single_wqe_log_num_of_strides; ++ xctx->striding_rq_caps.supported_qpts = ++ resp.striding_rq_caps.supported_qpts; ++ xctx->tunnel_offloads_caps = resp.tunnel_offloads_caps; ++ xctx->packet_pacing_caps = resp.packet_pacing_caps; ++ ++ if (resp.flags & XSC_IB_QUERY_DEV_RESP_FLAGS_CQE_128B_COMP) ++ xctx->vendor_cap_flags |= XSC_VENDOR_CAP_FLAGS_CQE_128B_COMP; ++ ++ if (resp.flags & XSC_IB_QUERY_DEV_RESP_FLAGS_CQE_128B_PAD) ++ xctx->vendor_cap_flags |= XSC_VENDOR_CAP_FLAGS_CQE_128B_PAD; ++ ++ raw_fw_ver.data = resp.ibv_resp.base.fw_ver; ++ a = &attr->orig_attr; ++ xsc_set_fw_version(a, &raw_fw_ver); ++ ++ return 0; ++} ++ ++static int rwq_sig_enabled(struct ibv_context *context) ++{ ++ char *env; ++ ++ env = getenv("XSC_RWQ_SIGNATURE"); ++ if (env) ++ return 1; ++ ++ return 0; ++} ++ ++static void xsc_free_rwq_buf(struct xsc_rwq *rwq, struct ibv_context *context) ++{ ++ struct xsc_context *ctx = to_xctx(context); ++ ++ xsc_free_actual_buf(ctx, &rwq->buf); ++ free(rwq->rq.wrid); ++} ++ ++static int xsc_alloc_rwq_buf(struct ibv_context *context, ++ struct xsc_rwq *rwq, ++ int size) ++{ ++ int err; ++ enum xsc_alloc_type alloc_type; ++ ++ xsc_get_alloc_type(to_xctx(context), XSC_RWQ_PREFIX, ++ &alloc_type, XSC_ALLOC_TYPE_ANON); ++ ++ rwq->rq.wrid = malloc(rwq->rq.wqe_cnt * sizeof(uint64_t)); ++ if (!rwq->rq.wrid) { ++ errno = ENOMEM; ++ return -1; ++ } ++ ++ err = xsc_alloc_prefered_buf(to_xctx(context), &rwq->buf, ++ align(rwq->buf_size, to_xdev ++ (context->device)->page_size), ++ to_xdev(context->device)->page_size, ++ alloc_type, ++ XSC_RWQ_PREFIX); ++ ++ if (err) { ++ free(rwq->rq.wrid); ++ errno = ENOMEM; ++ return -1; ++ } ++ ++ return 0; ++} ++ ++static struct ibv_wq *create_wq(struct ibv_context *context, ++ struct ibv_wq_init_attr *attr, ++ struct xscdv_wq_init_attr *xwq_attr) ++{ ++ struct xsc_create_wq cmd; ++ struct xsc_create_wq_resp resp; ++ int err; ++ struct xsc_rwq *rwq; ++ struct xsc_context *ctx = to_xctx(context); ++ int ret; ++ int32_t usr_idx = 0; ++ ++ if (attr->wq_type != IBV_WQT_RQ) ++ return NULL; ++ ++ memset(&cmd, 0, sizeof(cmd)); ++ memset(&resp, 0, sizeof(resp)); ++ ++ rwq = calloc(1, sizeof(*rwq)); ++ if (!rwq) ++ return NULL; ++ ++ rwq->wq_sig = rwq_sig_enabled(context); ++ if (rwq->wq_sig) ++ cmd.flags = XSC_WQ_FLAG_SIGNATURE; ++ ++ ret = xsc_calc_rwq_size(ctx, rwq, attr, xwq_attr); ++ if (ret < 0) { ++ errno = -ret; ++ goto err; ++ } ++ ++ rwq->buf_size = ret; ++ if (xsc_alloc_rwq_buf(context, rwq, ret)) ++ goto err; ++ ++ xsc_init_rwq_indices(rwq); ++ ++ if (xsc_spinlock_init_pd(&rwq->rq.lock, attr->pd)) ++ goto err_free_rwq_buf; ++ ++ rwq->db = xsc_alloc_dbrec(ctx); ++ if (!rwq->db) ++ goto err_free_rwq_buf; ++ ++ rwq->db[XSC_RCV_DBR] = 0; ++ rwq->db[XSC_SND_DBR] = 0; ++ rwq->pbuff = rwq->buf.buf + rwq->rq.offset; ++ rwq->recv_db = &rwq->db[XSC_RCV_DBR]; ++ cmd.buf_addr = (uintptr_t)rwq->buf.buf; ++ cmd.db_addr = (uintptr_t)rwq->db; ++ cmd.rq_wqe_count = rwq->rq.wqe_cnt; ++ cmd.rq_wqe_shift = rwq->rq.wqe_shift; ++ usr_idx = xsc_store_uidx(ctx, rwq); ++ if (usr_idx < 0) { ++ xsc_dbg(ctx->dbg_fp, XSC_DBG_QP, "Couldn't find free user index\n"); ++ goto err_free_db_rec; ++ } ++ ++ cmd.user_index = usr_idx; ++ ++ if (xwq_attr) { ++ if (xwq_attr->comp_mask & XSCDV_WQ_INIT_ATTR_MASK_STRIDING_RQ) { ++ if ((xwq_attr->striding_rq_attrs.single_stride_log_num_of_bytes < ++ ctx->striding_rq_caps.min_single_stride_log_num_of_bytes) || ++ (xwq_attr->striding_rq_attrs.single_stride_log_num_of_bytes > ++ ctx->striding_rq_caps.max_single_stride_log_num_of_bytes)) { ++ errno = EINVAL; ++ goto err_create; ++ } ++ ++ if ((xwq_attr->striding_rq_attrs.single_wqe_log_num_of_strides < ++ ctx->striding_rq_caps.min_single_wqe_log_num_of_strides) || ++ (xwq_attr->striding_rq_attrs.single_wqe_log_num_of_strides > ++ ctx->striding_rq_caps.max_single_wqe_log_num_of_strides)) { ++ errno = EINVAL; ++ goto err_create; ++ } ++ ++ cmd.single_stride_log_num_of_bytes = ++ xwq_attr->striding_rq_attrs.single_stride_log_num_of_bytes; ++ cmd.single_wqe_log_num_of_strides = ++ xwq_attr->striding_rq_attrs.single_wqe_log_num_of_strides; ++ cmd.two_byte_shift_en = ++ xwq_attr->striding_rq_attrs.two_byte_shift_en; ++ cmd.comp_mask |= XSC_IB_CREATE_WQ_STRIDING_RQ; ++ } ++ } ++ ++ err = ibv_cmd_create_wq(context, attr, &rwq->wq, &cmd.ibv_cmd, ++ sizeof(cmd), &resp.ibv_resp, sizeof(resp)); ++ if (err) ++ goto err_create; ++ ++ rwq->rsc.type = XSC_RSC_TYPE_RWQ; ++ rwq->rsc.rsn = cmd.user_index; ++ ++ rwq->wq.post_recv = xsc_post_wq_recv; ++ return &rwq->wq; ++ ++err_create: ++ xsc_clear_uidx(ctx, cmd.user_index); ++err_free_db_rec: ++ xsc_free_db(to_xctx(context), rwq->db); ++err_free_rwq_buf: ++ xsc_free_rwq_buf(rwq, context); ++err: ++ free(rwq); ++ return NULL; ++} ++ ++struct ibv_wq *xsc_create_wq(struct ibv_context *context, ++ struct ibv_wq_init_attr *attr) ++{ ++ return create_wq(context, attr, NULL); ++} ++ ++struct ibv_wq *xscdv_create_wq(struct ibv_context *context, ++ struct ibv_wq_init_attr *attr, ++ struct xscdv_wq_init_attr *xwq_attr) ++{ ++ return create_wq(context, attr, xwq_attr); ++} ++ ++int xsc_modify_wq(struct ibv_wq *wq, struct ibv_wq_attr *attr) ++{ ++ struct xsc_modify_wq cmd = {}; ++ struct xsc_rwq *rwq = to_xrwq(wq); ++ ++ if ((attr->attr_mask & IBV_WQ_ATTR_STATE) && ++ attr->wq_state == IBV_WQS_RDY) { ++ if ((attr->attr_mask & IBV_WQ_ATTR_CURR_STATE) && ++ attr->curr_wq_state != wq->state) ++ return -EINVAL; ++ ++ if (wq->state == IBV_WQS_RESET) { ++ xsc_spin_lock(&to_xcq(wq->cq)->lock); ++ __xsc_cq_clean(to_xcq(wq->cq), ++ rwq->rsc.rsn); ++ xsc_spin_unlock(&to_xcq(wq->cq)->lock); ++ xsc_init_rwq_indices(rwq); ++ rwq->db[XSC_RCV_DBR] = 0; ++ rwq->db[XSC_SND_DBR] = 0; ++ } ++ } ++ ++ return ibv_cmd_modify_wq(wq, attr, &cmd.ibv_cmd, sizeof(cmd)); ++} ++ ++int xsc_destroy_wq(struct ibv_wq *wq) ++{ ++ struct xsc_rwq *rwq = to_xrwq(wq); ++ int ret; ++ ++ ret = ibv_cmd_destroy_wq(wq); ++ if (ret) ++ return ret; ++ ++ xsc_spin_lock(&to_xcq(wq->cq)->lock); ++ __xsc_cq_clean(to_xcq(wq->cq), rwq->rsc.rsn); ++ xsc_spin_unlock(&to_xcq(wq->cq)->lock); ++ xsc_clear_uidx(to_xctx(wq->context), rwq->rsc.rsn); ++ xsc_free_db(to_xctx(wq->context), rwq->db); ++ xsc_free_rwq_buf(rwq, wq->context); ++ free(rwq); ++ ++ return 0; ++} ++ ++static void free_flow_counters_descriptions(struct xsc_ib_create_flow *cmd) ++{ ++ int i; ++ ++ for (i = 0; i < cmd->ncounters_data; i++) ++ free(cmd->data[i].counters_data); ++} ++ ++static int get_flow_mcounters(struct xsc_flow *mflow, ++ struct ibv_flow_attr *flow_attr, ++ struct xsc_counters **mcounters, ++ uint32_t *data_size) ++{ ++ struct ibv_flow_spec *ib_spec; ++ uint32_t ncounters_used = 0; ++ int i; ++ ++ ib_spec = (struct ibv_flow_spec *)(flow_attr + 1); ++ for (i = 0; i < flow_attr->num_of_specs; i++, ib_spec = (void *)ib_spec + ib_spec->hdr.size) { ++ if (ib_spec->hdr.type != IBV_FLOW_SPEC_ACTION_COUNT) ++ continue; ++ ++ /* currently support only one counters data */ ++ if (ncounters_used > 0) ++ return EINVAL; ++ ++ *mcounters = to_mcounters(ib_spec->flow_count.counters); ++ ncounters_used++; ++ } ++ ++ *data_size = ncounters_used * sizeof(struct xsc_ib_flow_counters_data); ++ return 0; ++} ++ ++static int allocate_flow_counters_descriptions(struct xsc_counters *mcounters, ++ struct xsc_ib_create_flow *cmd) ++{ ++ struct xsc_ib_flow_counters_data *mcntrs_data; ++ struct xsc_ib_flow_counters_desc *cntrs_data; ++ struct xsc_counter_node *cntr_node; ++ uint32_t ncounters; ++ int j = 0; ++ ++ mcntrs_data = cmd->data; ++ ncounters = mcounters->ncounters; ++ ++ /* xsc_attach_counters_point_flow was never called */ ++ if (!ncounters) ++ return EINVAL; ++ ++ /* each counter has both index and description */ ++ cntrs_data = calloc(ncounters, sizeof(*cntrs_data)); ++ if (!cntrs_data) ++ return ENOMEM; ++ ++ list_for_each(&mcounters->counters_list, cntr_node, entry) { ++ cntrs_data[j].description = cntr_node->desc; ++ cntrs_data[j].index = cntr_node->index; ++ j++; ++ } ++ ++ scrub_ptr_attr(cntrs_data); ++ mcntrs_data[cmd->ncounters_data].counters_data = cntrs_data; ++ mcntrs_data[cmd->ncounters_data].ncounters = ncounters; ++ cmd->ncounters_data++; ++ ++ return 0; ++} ++ ++struct ibv_flow *xsc_create_flow(struct ibv_qp *qp, struct ibv_flow_attr *flow_attr) ++{ ++ struct xsc_ib_create_flow *cmd; ++ uint32_t required_cmd_size = 0; ++ struct ibv_flow *flow_id; ++ struct xsc_flow *mflow; ++ int ret; ++ ++ mflow = calloc(1, sizeof(*mflow)); ++ if (!mflow) { ++ errno = ENOMEM; ++ return NULL; ++ } ++ ++ ret = get_flow_mcounters(mflow, flow_attr, &mflow->mcounters, &required_cmd_size); ++ if (ret) { ++ errno = ret; ++ goto err_get_mcounters; ++ } ++ ++ required_cmd_size += sizeof(*cmd); ++ cmd = calloc(1, required_cmd_size); ++ if (!cmd) { ++ errno = ENOMEM; ++ goto err_get_mcounters; ++ } ++ ++ if (mflow->mcounters) { ++ pthread_mutex_lock(&mflow->mcounters->lock); ++ /* if the counters already bound no need to pass its description */ ++ if (!mflow->mcounters->refcount) { ++ ret = allocate_flow_counters_descriptions(mflow->mcounters, cmd); ++ if (ret) { ++ errno = ret; ++ goto err_desc_alloc; ++ } ++ } ++ } ++ ++ flow_id = &mflow->flow_id; ++ ret = ibv_cmd_create_flow(qp, flow_id, flow_attr, ++ cmd, required_cmd_size); ++ if (ret) ++ goto err_create_flow; ++ ++ if (mflow->mcounters) { ++ free_flow_counters_descriptions(cmd); ++ mflow->mcounters->refcount++; ++ pthread_mutex_unlock(&mflow->mcounters->lock); ++ } ++ ++ free(cmd); ++ ++ return flow_id; ++ ++err_create_flow: ++ if (mflow->mcounters) { ++ free_flow_counters_descriptions(cmd); ++ pthread_mutex_unlock(&mflow->mcounters->lock); ++ } ++err_desc_alloc: ++ free(cmd); ++err_get_mcounters: ++ free(mflow); ++ return NULL; ++} ++ ++int xsc_destroy_flow(struct ibv_flow *flow_id) ++{ ++ struct xsc_flow *mflow = to_mflow(flow_id); ++ int ret; ++ ++ ret = ibv_cmd_destroy_flow(flow_id); ++ if (ret) ++ return ret; ++ ++ if (mflow->mcounters) { ++ pthread_mutex_lock(&mflow->mcounters->lock); ++ mflow->mcounters->refcount--; ++ pthread_mutex_unlock(&mflow->mcounters->lock); ++ } ++ ++ free(mflow); ++ return 0; ++} ++ ++struct ibv_rwq_ind_table *xsc_create_rwq_ind_table(struct ibv_context *context, ++ struct ibv_rwq_ind_table_init_attr *init_attr) ++{ ++ struct ibv_create_rwq_ind_table *cmd; ++ struct xsc_create_rwq_ind_table_resp resp; ++ struct ibv_rwq_ind_table *ind_table; ++ uint32_t required_tbl_size; ++ int num_tbl_entries; ++ int cmd_size; ++ int err; ++ ++ num_tbl_entries = 1 << init_attr->log_ind_tbl_size; ++ /* Data must be u64 aligned */ ++ required_tbl_size = (num_tbl_entries * sizeof(uint32_t)) < sizeof(uint64_t) ? ++ sizeof(uint64_t) : (num_tbl_entries * sizeof(uint32_t)); ++ ++ cmd_size = required_tbl_size + sizeof(*cmd); ++ cmd = calloc(1, cmd_size); ++ if (!cmd) ++ return NULL; ++ ++ memset(&resp, 0, sizeof(resp)); ++ ind_table = calloc(1, sizeof(*ind_table)); ++ if (!ind_table) ++ goto free_cmd; ++ ++ err = ibv_cmd_create_rwq_ind_table(context, init_attr, ind_table, ++ &resp.ibv_resp, sizeof(resp)); ++ if (err) ++ goto err; ++ ++ free(cmd); ++ return ind_table; ++ ++err: ++ free(ind_table); ++free_cmd: ++ free(cmd); ++ return NULL; ++} ++ ++int xsc_destroy_rwq_ind_table(struct ibv_rwq_ind_table *rwq_ind_table) ++{ ++ int ret; ++ ++ ret = ibv_cmd_destroy_rwq_ind_table(rwq_ind_table); ++ ++ if (ret) ++ return ret; ++ ++ free(rwq_ind_table); ++ return 0; ++} ++ ++int xsc_modify_cq(struct ibv_cq *cq, struct ibv_modify_cq_attr *attr) ++{ ++ struct ibv_modify_cq cmd = {}; ++ ++ return ibv_cmd_modify_cq(cq, attr, &cmd, sizeof(cmd)); ++} ++ ++static struct ibv_flow_action *_xsc_create_flow_action_esp(struct ibv_context *ctx, ++ struct ibv_flow_action_esp_attr *attr, ++ struct ibv_command_buffer *driver_attr) ++{ ++ struct verbs_flow_action *action; ++ int ret; ++ ++ if (!check_comp_mask(attr->comp_mask, IBV_FLOW_ACTION_ESP_MASK_ESN)) { ++ errno = EOPNOTSUPP; ++ return NULL; ++ } ++ ++ action = calloc(1, sizeof(*action)); ++ if (!action) { ++ errno = ENOMEM; ++ return NULL; ++ } ++ ++ ret = ibv_cmd_create_flow_action_esp(ctx, attr, action, driver_attr); ++ if (ret) { ++ free(action); ++ return NULL; ++ } ++ ++ return &action->action; ++} ++ ++struct ibv_flow_action *xsc_create_flow_action_esp(struct ibv_context *ctx, ++ struct ibv_flow_action_esp_attr *attr) ++{ ++ return _xsc_create_flow_action_esp(ctx, attr, NULL); ++} ++ ++struct ibv_flow_action *xscdv_create_flow_action_esp(struct ibv_context *ctx, ++ struct ibv_flow_action_esp_attr *esp, ++ struct xscdv_flow_action_esp *xattr) ++{ ++ DECLARE_COMMAND_BUFFER_LINK(driver_attr, UVERBS_OBJECT_FLOW_ACTION, ++ UVERBS_METHOD_FLOW_ACTION_ESP_CREATE, 1, ++ NULL); ++ ++ if (!check_comp_mask(xattr->comp_mask, ++ XSCDV_FLOW_ACTION_ESP_MASK_FLAGS)) { ++ errno = EOPNOTSUPP; ++ return NULL; ++ } ++ ++ if (xattr->comp_mask & XSCDV_FLOW_ACTION_ESP_MASK_FLAGS) { ++ if (!check_comp_mask(xattr->action_flags, ++ XSC_IB_UAPI_FLOW_ACTION_FLAGS_REQUIRE_METADATA)) { ++ errno = EOPNOTSUPP; ++ return NULL; ++ } ++ fill_attr_in_uint64(driver_attr, XSC_IB_ATTR_CREATE_FLOW_ACTION_FLAGS, ++ xattr->action_flags); ++ } ++ ++ return _xsc_create_flow_action_esp(ctx, esp, driver_attr); ++} ++ ++int xsc_modify_flow_action_esp(struct ibv_flow_action *action, ++ struct ibv_flow_action_esp_attr *attr) ++{ ++ struct verbs_flow_action *vaction = ++ container_of(action, struct verbs_flow_action, action); ++ ++ if (!check_comp_mask(attr->comp_mask, IBV_FLOW_ACTION_ESP_MASK_ESN)) ++ return EOPNOTSUPP; ++ ++ return ibv_cmd_modify_flow_action_esp(vaction, attr, NULL); ++} ++ ++struct ibv_flow_action *xscdv_create_flow_action_modify_header(struct ibv_context *ctx, ++ size_t actions_sz, ++ uint64_t actions[], ++ enum xscdv_flow_table_type ft_type) ++{ ++ DECLARE_COMMAND_BUFFER(cmd, UVERBS_OBJECT_FLOW_ACTION, ++ XSC_IB_METHOD_FLOW_ACTION_CREATE_MODIFY_HEADER, ++ 3); ++ struct ib_uverbs_attr *handle = fill_attr_out_obj(cmd, ++ XSC_IB_ATTR_CREATE_MODIFY_HEADER_HANDLE); ++ struct verbs_flow_action *action; ++ int ret; ++ ++ fill_attr_in(cmd, XSC_IB_ATTR_CREATE_MODIFY_HEADER_ACTIONS_PRM, ++ actions, actions_sz); ++ fill_attr_const_in(cmd, XSC_IB_ATTR_CREATE_MODIFY_HEADER_FT_TYPE, ++ ft_type); ++ ++ action = calloc(1, sizeof(*action)); ++ if (!action) { ++ errno = ENOMEM; ++ return NULL; ++ } ++ ++ ret = execute_ioctl(ctx, cmd); ++ if (ret) { ++ free(action); ++ return NULL; ++ } ++ ++ action->action.context = ctx; ++ action->type = IBV_FLOW_ACTION_UNSPECIFIED; ++ action->handle = read_attr_obj(XSC_IB_ATTR_CREATE_MODIFY_HEADER_HANDLE, ++ handle); ++ ++ return &action->action; ++} ++ ++struct ibv_flow_action * ++xscdv_create_flow_action_packet_reformat(struct ibv_context *ctx, ++ size_t data_sz, ++ void *data, ++ enum xscdv_flow_action_packet_reformat_type reformat_type, ++ enum xscdv_flow_table_type ft_type) ++{ ++ DECLARE_COMMAND_BUFFER(cmd, UVERBS_OBJECT_FLOW_ACTION, ++ XSC_IB_METHOD_FLOW_ACTION_CREATE_PACKET_REFORMAT, 4); ++ struct ib_uverbs_attr *handle = fill_attr_out_obj(cmd, ++ XSC_IB_ATTR_CREATE_PACKET_REFORMAT_HANDLE); ++ struct verbs_flow_action *action; ++ int ret; ++ ++ if ((!data && data_sz) || (data && !data_sz)) { ++ errno = EINVAL; ++ return NULL; ++ } ++ ++ if (data && data_sz) ++ fill_attr_in(cmd, ++ XSC_IB_ATTR_CREATE_PACKET_REFORMAT_DATA_BUF, ++ data, data_sz); ++ ++ fill_attr_const_in(cmd, XSC_IB_ATTR_CREATE_PACKET_REFORMAT_TYPE, ++ reformat_type); ++ ++ fill_attr_const_in(cmd, XSC_IB_ATTR_CREATE_PACKET_REFORMAT_FT_TYPE, ++ ft_type); ++ ++ action = calloc(1, sizeof(*action)); ++ if (!action) { ++ errno = ENOMEM; ++ return NULL; ++ } ++ ++ ret = execute_ioctl(ctx, cmd); ++ if (ret) { ++ free(action); ++ return NULL; ++ } ++ ++ action->action.context = ctx; ++ action->type = IBV_FLOW_ACTION_UNSPECIFIED; ++ action->handle = read_attr_obj(XSC_IB_ATTR_CREATE_PACKET_REFORMAT_HANDLE, ++ handle); ++ ++ return &action->action; ++} ++ ++int xsc_destroy_flow_action(struct ibv_flow_action *action) ++{ ++ struct verbs_flow_action *vaction = ++ container_of(action, struct verbs_flow_action, action); ++ int ret = ibv_cmd_destroy_flow_action(vaction); ++ ++ if (!ret) ++ free(action); ++ ++ return ret; ++} ++ ++static inline int xsc_access_dm(struct ibv_dm *ibdm, uint64_t dm_offset, ++ void *host_addr, size_t length, ++ uint32_t read) ++{ ++ struct xsc_dm *dm = to_xdm(ibdm); ++ atomic_uint32_t *dm_ptr = ++ (atomic_uint32_t *)dm->start_va + dm_offset / 4; ++ uint32_t *host_ptr = host_addr; ++ const uint32_t *host_end = host_ptr + length / 4; ++ ++ if (dm_offset + length > dm->length) ++ return EFAULT; ++ ++ /* Due to HW limitation, DM access address and length must be aligned ++ * to 4 bytes. ++ */ ++ if ((length & 3) || (dm_offset & 3)) ++ return EINVAL; ++ ++ /* Copy granularity should be 4 Bytes since we enforce copy size to be ++ * a multiple of 4 bytes. ++ */ ++ if (read) { ++ while (host_ptr != host_end) { ++ *host_ptr = atomic_load_explicit(dm_ptr, ++ memory_order_relaxed); ++ host_ptr++; ++ dm_ptr++; ++ } ++ } else { ++ while (host_ptr != host_end) { ++ atomic_store_explicit(dm_ptr, *host_ptr, ++ memory_order_relaxed); ++ host_ptr++; ++ dm_ptr++; ++ } ++ } ++ ++ return 0; ++} ++static inline int xsc_memcpy_to_dm(struct ibv_dm *ibdm, uint64_t dm_offset, ++ const void *host_addr, size_t length) ++{ ++ return xsc_access_dm(ibdm, dm_offset, (void *)host_addr, length, 0); ++} ++ ++static inline int xsc_memcpy_from_dm(void *host_addr, struct ibv_dm *ibdm, ++ uint64_t dm_offset, size_t length) ++{ ++ return xsc_access_dm(ibdm, dm_offset, host_addr, length, 1); ++} ++ ++struct ibv_dm *xsc_alloc_dm(struct ibv_context *context, ++ struct ibv_alloc_dm_attr *dm_attr) ++{ ++ DECLARE_COMMAND_BUFFER(cmdb, UVERBS_OBJECT_DM, UVERBS_METHOD_DM_ALLOC, ++ 2); ++ int page_size = to_xdev(context->device)->page_size; ++ struct xsc_context *xctx = to_xctx(context); ++ uint64_t act_size, start_offset; ++ struct xsc_dm *dm; ++ uint16_t page_idx; ++ off_t offset = 0; ++ void *va; ++ ++ if (!check_comp_mask(dm_attr->comp_mask, 0)) { ++ errno = EINVAL; ++ return NULL; ++ } ++ ++ if (dm_attr->length > xctx->max_dm_size) { ++ errno = EINVAL; ++ return NULL; ++ } ++ ++ dm = calloc(1, sizeof(*dm)); ++ if (!dm) { ++ errno = ENOMEM; ++ return NULL; ++ } ++ ++ ++ fill_attr_out(cmdb, XSC_IB_ATTR_ALLOC_DM_RESP_START_OFFSET, ++ &start_offset, sizeof(start_offset)); ++ fill_attr_out(cmdb, XSC_IB_ATTR_ALLOC_DM_RESP_PAGE_INDEX, ++ &page_idx, sizeof(page_idx)); ++ ++ if (ibv_cmd_alloc_dm(context, dm_attr, &dm->verbs_dm, cmdb)) ++ goto err_free_mem; ++ ++ act_size = align(dm_attr->length, page_size); ++ set_command(XSC_IB_MMAP_DEVICE_MEM, &offset); ++ set_extended_index(page_idx, &offset); ++ va = mmap(NULL, act_size, PROT_READ | PROT_WRITE, ++ MAP_SHARED, context->cmd_fd, ++ page_size * offset); ++ if (va == MAP_FAILED) ++ goto err_free_dm; ++ ++ dm->mmap_va = va; ++ dm->length = dm_attr->length; ++ dm->start_va = va + (start_offset & (page_size - 1)); ++ dm->verbs_dm.dm.memcpy_to_dm = xsc_memcpy_to_dm; ++ dm->verbs_dm.dm.memcpy_from_dm = xsc_memcpy_from_dm; ++ ++ return &dm->verbs_dm.dm; ++ ++err_free_dm: ++ ibv_cmd_free_dm(&dm->verbs_dm); ++ ++err_free_mem: ++ free(dm); ++ ++ return NULL; ++} ++ ++int xsc_free_dm(struct ibv_dm *ibdm) ++{ ++ struct xsc_device *xdev = to_xdev(ibdm->context->device); ++ struct xsc_dm *dm = to_xdm(ibdm); ++ size_t act_size = align(dm->length, xdev->page_size); ++ int ret; ++ ++ ret = ibv_cmd_free_dm(&dm->verbs_dm); ++ ++ if (ret) ++ return ret; ++ ++ munmap(dm->mmap_va, act_size); ++ free(dm); ++ return 0; ++} ++ ++struct ibv_counters *xsc_create_counters(struct ibv_context *context, ++ struct ibv_counters_init_attr *init_attr) ++{ ++ struct xsc_counters *mcntrs; ++ int ret; ++ ++ if (!check_comp_mask(init_attr->comp_mask, 0)) { ++ errno = EOPNOTSUPP; ++ return NULL; ++ } ++ ++ mcntrs = calloc(1, sizeof(*mcntrs)); ++ if (!mcntrs) { ++ errno = ENOMEM; ++ return NULL; ++ } ++ ++ pthread_mutex_init(&mcntrs->lock, NULL); ++ ret = ibv_cmd_create_counters(context, ++ init_attr, ++ &mcntrs->vcounters, ++ NULL); ++ if (ret) ++ goto err_create; ++ ++ list_head_init(&mcntrs->counters_list); ++ ++ return &mcntrs->vcounters.counters; ++ ++err_create: ++ free(mcntrs); ++ return NULL; ++} ++ ++int xsc_destroy_counters(struct ibv_counters *counters) ++{ ++ struct xsc_counters *mcntrs = to_mcounters(counters); ++ struct xsc_counter_node *tmp, *cntrs_node; ++ int ret; ++ ++ ret = ibv_cmd_destroy_counters(&mcntrs->vcounters); ++ if (ret) ++ return ret; ++ ++ list_for_each_safe(&mcntrs->counters_list, cntrs_node, tmp, entry) { ++ list_del(&cntrs_node->entry); ++ free(cntrs_node); ++ } ++ ++ free(mcntrs); ++ return 0; ++} ++ ++int xsc_attach_counters_point_flow(struct ibv_counters *counters, ++ struct ibv_counter_attach_attr *attr, ++ struct ibv_flow *flow) ++{ ++ struct xsc_counters *mcntrs = to_mcounters(counters); ++ struct xsc_counter_node *cntrs_node; ++ int ret; ++ ++ /* The driver supports only the static binding mode as part of ibv_create_flow */ ++ if (flow) ++ return ENOTSUP; ++ ++ if (!check_comp_mask(attr->comp_mask, 0)) ++ return EOPNOTSUPP; ++ ++ /* Check whether the attached counter is supported */ ++ if (attr->counter_desc < IBV_COUNTER_PACKETS || ++ attr->counter_desc > IBV_COUNTER_BYTES) ++ return ENOTSUP; ++ ++ cntrs_node = calloc(1, sizeof(*cntrs_node)); ++ if (!cntrs_node) ++ return ENOMEM; ++ ++ pthread_mutex_lock(&mcntrs->lock); ++ /* The counter is bound to a flow, attach is not allowed */ ++ if (mcntrs->refcount) { ++ ret = EBUSY; ++ goto err_already_bound; ++ } ++ ++ cntrs_node->index = attr->index; ++ cntrs_node->desc = attr->counter_desc; ++ list_add(&mcntrs->counters_list, &cntrs_node->entry); ++ mcntrs->ncounters++; ++ pthread_mutex_unlock(&mcntrs->lock); ++ ++ return 0; ++ ++err_already_bound: ++ pthread_mutex_unlock(&mcntrs->lock); ++ free(cntrs_node); ++ return ret; ++} ++ ++int xsc_read_counters(struct ibv_counters *counters, ++ uint64_t *counters_value, ++ uint32_t ncounters, ++ uint32_t flags) ++{ ++ struct xsc_counters *mcntrs = to_mcounters(counters); ++ ++ return ibv_cmd_read_counters(&mcntrs->vcounters, ++ counters_value, ++ ncounters, ++ flags, ++ NULL); ++ ++} ++ ++struct xscdv_flow_matcher * ++xscdv_create_flow_matcher(struct ibv_context *context, ++ struct xscdv_flow_matcher_attr *attr) ++{ ++ DECLARE_COMMAND_BUFFER(cmd, XSC_IB_OBJECT_FLOW_MATCHER, ++ XSC_IB_METHOD_FLOW_MATCHER_CREATE, ++ 5); ++ struct xscdv_flow_matcher *flow_matcher; ++ struct ib_uverbs_attr *handle; ++ int ret; ++ ++ if (!check_comp_mask(attr->comp_mask, 0)) { ++ errno = EOPNOTSUPP; ++ return NULL; ++ } ++ ++ flow_matcher = calloc(1, sizeof(*flow_matcher)); ++ if (!flow_matcher) { ++ errno = ENOMEM; ++ return NULL; ++ } ++ ++ if (attr->type != IBV_FLOW_ATTR_NORMAL) { ++ errno = EOPNOTSUPP; ++ goto err; ++ } ++ ++ handle = fill_attr_out_obj(cmd, XSC_IB_ATTR_FLOW_MATCHER_CREATE_HANDLE); ++ fill_attr_in(cmd, XSC_IB_ATTR_FLOW_MATCHER_MATCH_MASK, ++ attr->match_mask->match_buf, ++ attr->match_mask->match_sz); ++ fill_attr_in(cmd, XSC_IB_ATTR_FLOW_MATCHER_MATCH_CRITERIA, ++ &attr->match_criteria_enable, sizeof(attr->match_criteria_enable)); ++ fill_attr_in_enum(cmd, XSC_IB_ATTR_FLOW_MATCHER_FLOW_TYPE, ++ IBV_FLOW_ATTR_NORMAL, &attr->priority, ++ sizeof(attr->priority)); ++ if (attr->flags) ++ fill_attr_const_in(cmd, XSC_IB_ATTR_FLOW_MATCHER_FLOW_FLAGS, ++ attr->flags); ++ ++ ret = execute_ioctl(context, cmd); ++ if (ret) ++ goto err; ++ ++ flow_matcher->context = context; ++ flow_matcher->handle = read_attr_obj(XSC_IB_ATTR_FLOW_MATCHER_CREATE_HANDLE, handle); ++ ++ return flow_matcher; ++ ++err: ++ free(flow_matcher); ++ return NULL; ++} ++ ++int xscdv_destroy_flow_matcher(struct xscdv_flow_matcher *flow_matcher) ++{ ++ DECLARE_COMMAND_BUFFER(cmd, XSC_IB_OBJECT_FLOW_MATCHER, ++ XSC_IB_METHOD_FLOW_MATCHER_DESTROY, ++ 1); ++ int ret; ++ ++ fill_attr_in_obj(cmd, XSC_IB_ATTR_FLOW_MATCHER_DESTROY_HANDLE, flow_matcher->handle); ++ ret = execute_ioctl(flow_matcher->context, cmd); ++ verbs_is_destroy_err(&ret); ++ ++ if (ret) ++ return ret; ++ ++ free(flow_matcher); ++ return 0; ++} ++ ++#define CREATE_FLOW_MAX_FLOW_ACTIONS_SUPPORTED 8 ++struct ibv_flow * ++xscdv_create_flow(struct xscdv_flow_matcher *flow_matcher, ++ struct xscdv_flow_match_parameters *match_value, ++ size_t num_actions, ++ struct xscdv_flow_action_attr actions_attr[]) ++{ ++ uint32_t flow_actions[CREATE_FLOW_MAX_FLOW_ACTIONS_SUPPORTED]; ++ struct verbs_flow_action *vaction; ++ int num_flow_actions = 0; ++ struct xsc_flow *mflow; ++ bool have_qp = false; ++ bool have_dest_devx = false; ++ bool have_flow_tag = false; ++ int ret; ++ int i; ++ DECLARE_COMMAND_BUFFER(cmd, UVERBS_OBJECT_FLOW, ++ XSC_IB_METHOD_CREATE_FLOW, ++ 6); ++ struct ib_uverbs_attr *handle; ++ enum xscdv_flow_action_type type; ++ ++ mflow = calloc(1, sizeof(*mflow)); ++ if (!mflow) { ++ errno = ENOMEM; ++ return NULL; ++ } ++ ++ handle = fill_attr_out_obj(cmd, XSC_IB_ATTR_CREATE_FLOW_HANDLE); ++ fill_attr_in(cmd, XSC_IB_ATTR_CREATE_FLOW_MATCH_VALUE, ++ match_value->match_buf, ++ match_value->match_sz); ++ fill_attr_in_obj(cmd, XSC_IB_ATTR_CREATE_FLOW_MATCHER, flow_matcher->handle); ++ ++ for (i = 0; i < num_actions; i++) { ++ type = actions_attr[i].type; ++ switch (type) { ++ case XSCDV_FLOW_ACTION_DEST_IBV_QP: ++ if (have_qp || have_dest_devx) { ++ errno = EOPNOTSUPP; ++ goto err; ++ } ++ fill_attr_in_obj(cmd, XSC_IB_ATTR_CREATE_FLOW_DEST_QP, ++ actions_attr[i].qp->handle); ++ have_qp = true; ++ break; ++ case XSCDV_FLOW_ACTION_IBV_FLOW_ACTION: ++ if (num_flow_actions == ++ CREATE_FLOW_MAX_FLOW_ACTIONS_SUPPORTED) { ++ errno = EOPNOTSUPP; ++ goto err; ++ } ++ vaction = container_of(actions_attr[i].action, ++ struct verbs_flow_action, ++ action); ++ ++ flow_actions[num_flow_actions] = vaction->handle; ++ num_flow_actions++; ++ break; ++ case XSCDV_FLOW_ACTION_DEST_DEVX: ++ if (have_dest_devx || have_qp) { ++ errno = EOPNOTSUPP; ++ goto err; ++ } ++ fill_attr_in_obj(cmd, XSC_IB_ATTR_CREATE_FLOW_DEST_DEVX, ++ actions_attr[i].obj->handle); ++ have_dest_devx = true; ++ break; ++ case XSCDV_FLOW_ACTION_TAG: ++ if (have_flow_tag) { ++ errno = EINVAL; ++ goto err; ++ } ++ fill_attr_in_uint32(cmd, ++ XSC_IB_ATTR_CREATE_FLOW_TAG, ++ actions_attr[i].tag_value); ++ have_flow_tag = true; ++ break; ++ default: ++ errno = EOPNOTSUPP; ++ goto err; ++ } ++ } ++ ++ if (num_flow_actions) ++ fill_attr_in_objs_arr(cmd, ++ XSC_IB_ATTR_CREATE_FLOW_ARR_FLOW_ACTIONS, ++ flow_actions, ++ num_flow_actions); ++ ret = execute_ioctl(flow_matcher->context, cmd); ++ if (ret) ++ goto err; ++ ++ mflow->flow_id.handle = read_attr_obj(XSC_IB_ATTR_CREATE_FLOW_HANDLE, handle); ++ mflow->flow_id.context = flow_matcher->context; ++ return &mflow->flow_id; ++err: ++ free(mflow); ++ return NULL; ++} ++ ++struct xscdv_devx_umem * ++xscdv_devx_umem_reg(struct ibv_context *context, void *addr, size_t size, uint32_t access) ++{ ++ DECLARE_COMMAND_BUFFER(cmd, ++ XSC_IB_OBJECT_DEVX_UMEM, ++ XSC_IB_METHOD_DEVX_UMEM_REG, ++ 5); ++ struct ib_uverbs_attr *handle; ++ struct xsc_devx_umem *umem; ++ int ret; ++ ++ umem = calloc(1, sizeof(*umem)); ++ if (!umem) { ++ errno = ENOMEM; ++ return NULL; ++ } ++ ++ if (ibv_dontfork_range(addr, size)) ++ goto err; ++ ++ fill_attr_in_uint64(cmd, XSC_IB_ATTR_DEVX_UMEM_REG_ADDR, (intptr_t)addr); ++ fill_attr_in_uint64(cmd, XSC_IB_ATTR_DEVX_UMEM_REG_LEN, size); ++ fill_attr_in_uint32(cmd, XSC_IB_ATTR_DEVX_UMEM_REG_ACCESS, access); ++ fill_attr_out(cmd, XSC_IB_ATTR_DEVX_UMEM_REG_OUT_ID, ++ &umem->dv_devx_umem.umem_id, ++ sizeof(umem->dv_devx_umem.umem_id)); ++ handle = fill_attr_out_obj(cmd, XSC_IB_ATTR_DEVX_UMEM_REG_HANDLE); ++ ++ ret = execute_ioctl(context, cmd); ++ if (ret) ++ goto err_umem_reg_cmd; ++ ++ umem->handle = read_attr_obj(XSC_IB_ATTR_DEVX_UMEM_REG_HANDLE, handle); ++ umem->context = context; ++ umem->addr = addr; ++ umem->size = size; ++ ++ return &umem->dv_devx_umem; ++ ++err_umem_reg_cmd: ++ ibv_dofork_range(addr, size); ++err: ++ free(umem); ++ return NULL; ++} ++ ++int xscdv_devx_umem_dereg(struct xscdv_devx_umem *dv_devx_umem) ++{ ++ DECLARE_COMMAND_BUFFER(cmd, ++ XSC_IB_OBJECT_DEVX_UMEM, ++ XSC_IB_METHOD_DEVX_UMEM_DEREG, ++ 1); ++ int ret; ++ struct xsc_devx_umem *umem = container_of(dv_devx_umem, struct xsc_devx_umem, ++ dv_devx_umem); ++ ++ fill_attr_in_obj(cmd, XSC_IB_ATTR_DEVX_UMEM_DEREG_HANDLE, umem->handle); ++ ret = execute_ioctl(umem->context, cmd); ++ if (ret) ++ return ret; ++ ++ ibv_dofork_range(umem->addr, umem->size); ++ free(umem); ++ return 0; ++} ++ ++struct xscdv_devx_obj * ++xscdv_devx_obj_create(struct ibv_context *context, const void *in, size_t inlen, ++ void *out, size_t outlen) ++{ ++ DECLARE_COMMAND_BUFFER(cmd, ++ XSC_IB_OBJECT_DEVX_OBJ, ++ XSC_IB_METHOD_DEVX_OBJ_CREATE, ++ 3); ++ struct ib_uverbs_attr *handle; ++ struct xscdv_devx_obj *obj; ++ int ret; ++ ++ obj = calloc(1, sizeof(*obj)); ++ if (!obj) { ++ errno = ENOMEM; ++ return NULL; ++ } ++ ++ handle = fill_attr_out_obj(cmd, XSC_IB_ATTR_DEVX_OBJ_CREATE_HANDLE); ++ fill_attr_in(cmd, XSC_IB_ATTR_DEVX_OBJ_CREATE_CMD_IN, in, inlen); ++ fill_attr_out(cmd, XSC_IB_ATTR_DEVX_OBJ_CREATE_CMD_OUT, out, outlen); ++ ++ ret = execute_ioctl(context, cmd); ++ if (ret) ++ goto err; ++ ++ obj->handle = read_attr_obj(XSC_IB_ATTR_DEVX_OBJ_CREATE_HANDLE, handle); ++ obj->context = context; ++ return obj; ++err: ++ free(obj); ++ return NULL; ++} ++ ++int xscdv_devx_obj_query(struct xscdv_devx_obj *obj, const void *in, size_t inlen, ++ void *out, size_t outlen) ++{ ++ DECLARE_COMMAND_BUFFER(cmd, ++ XSC_IB_OBJECT_DEVX_OBJ, ++ XSC_IB_METHOD_DEVX_OBJ_QUERY, ++ 3); ++ ++ fill_attr_in_obj(cmd, XSC_IB_ATTR_DEVX_OBJ_QUERY_HANDLE, obj->handle); ++ fill_attr_in(cmd, XSC_IB_ATTR_DEVX_OBJ_QUERY_CMD_IN, in, inlen); ++ fill_attr_out(cmd, XSC_IB_ATTR_DEVX_OBJ_QUERY_CMD_OUT, out, outlen); ++ ++ return execute_ioctl(obj->context, cmd); ++} ++ ++int xscdv_devx_obj_modify(struct xscdv_devx_obj *obj, const void *in, size_t inlen, ++ void *out, size_t outlen) ++{ ++ DECLARE_COMMAND_BUFFER(cmd, ++ XSC_IB_OBJECT_DEVX_OBJ, ++ XSC_IB_METHOD_DEVX_OBJ_MODIFY, ++ 3); ++ ++ fill_attr_in_obj(cmd, XSC_IB_ATTR_DEVX_OBJ_MODIFY_HANDLE, obj->handle); ++ fill_attr_in(cmd, XSC_IB_ATTR_DEVX_OBJ_MODIFY_CMD_IN, in, inlen); ++ fill_attr_out(cmd, XSC_IB_ATTR_DEVX_OBJ_MODIFY_CMD_OUT, out, outlen); ++ ++ return execute_ioctl(obj->context, cmd); ++} ++ ++int xscdv_devx_obj_destroy(struct xscdv_devx_obj *obj) ++{ ++ DECLARE_COMMAND_BUFFER(cmd, ++ XSC_IB_OBJECT_DEVX_OBJ, ++ XSC_IB_METHOD_DEVX_OBJ_DESTROY, ++ 1); ++ int ret; ++ ++ fill_attr_in_obj(cmd, XSC_IB_ATTR_DEVX_OBJ_DESTROY_HANDLE, obj->handle); ++ ret = execute_ioctl(obj->context, cmd); ++ ++ if (ret) ++ return ret; ++ free(obj); ++ return 0; ++} ++ ++int xscdv_devx_general_cmd(struct ibv_context *context, const void *in, size_t inlen, ++ void *out, size_t outlen) ++{ ++ DECLARE_COMMAND_BUFFER(cmd, ++ XSC_IB_OBJECT_DEVX, ++ XSC_IB_METHOD_DEVX_OTHER, ++ 2); ++ ++ fill_attr_in(cmd, XSC_IB_ATTR_DEVX_OTHER_CMD_IN, in, inlen); ++ fill_attr_out(cmd, XSC_IB_ATTR_DEVX_OTHER_CMD_OUT, out, outlen); ++ ++ return execute_ioctl(context, cmd); ++} ++ ++int xscdv_devx_query_eqn(struct ibv_context *context, uint32_t vector, ++ uint32_t *eqn) ++{ ++ DECLARE_COMMAND_BUFFER(cmd, ++ XSC_IB_OBJECT_DEVX, ++ XSC_IB_METHOD_DEVX_QUERY_EQN, ++ 2); ++ ++ fill_attr_in_uint32(cmd, XSC_IB_ATTR_DEVX_QUERY_EQN_USER_VEC, vector); ++ fill_attr_out_ptr(cmd, XSC_IB_ATTR_DEVX_QUERY_EQN_DEV_EQN, eqn); ++ ++ return execute_ioctl(context, cmd); ++} +diff --git a/providers/xscale/wqe.h b/providers/xscale/wqe.h +new file mode 100644 +index 0000000..4b7f327 +--- /dev/null ++++ b/providers/xscale/wqe.h +@@ -0,0 +1,72 @@ ++/* ++ * Copyright (c) 2021 - 2022, Shanghai Yunsilicon Technology Co., Ltd. ++ * All rights reserved. ++ */ ++ ++#ifndef WQE_H ++#define WQE_H ++ ++#include ++ ++struct xsc_wqe_eth_pad { ++ uint8_t rsvd0[16]; ++}; ++ ++struct xsc_wqe_xrc_seg { ++ __be32 xrc_srqn; ++ uint8_t rsvd[12]; ++}; ++ ++enum { ++ XSC_IPOIB_INLINE_MIN_HEADER_SIZE = 4, ++ XSC_SOURCE_QPN_INLINE_MAX_HEADER_SIZE = 18, ++ XSC_ETH_L2_INLINE_HEADER_SIZE = 18, ++ XSC_ETH_L2_MIN_HEADER_SIZE = 14, ++}; ++ ++struct xsc_wqe_umr_ctrl_seg { ++ uint8_t flags; ++ uint8_t rsvd0[3]; ++ __be16 klm_octowords; ++ __be16 translation_offset; ++ __be64 mkey_mask; ++ uint8_t rsvd1[32]; ++}; ++ ++struct xsc_wqe_umr_klm_seg { ++ /* up to 2GB */ ++ __be32 byte_count; ++ __be32 mkey; ++ __be64 address; ++}; ++ ++union xsc_wqe_umr_inline_seg { ++ struct xsc_wqe_umr_klm_seg klm; ++}; ++ ++struct xsc_wqe_mkey_context_seg { ++ uint8_t free; ++ uint8_t reserved1; ++ uint8_t access_flags; ++ uint8_t sf; ++ __be32 qpn_mkey; ++ __be32 reserved2; ++ __be32 flags_pd; ++ __be64 start_addr; ++ __be64 len; ++ __be32 bsf_octword_size; ++ __be32 reserved3[4]; ++ __be32 translations_octword_size; ++ uint8_t reserved4[3]; ++ uint8_t log_page_size; ++ __be32 reserved; ++ union xsc_wqe_umr_inline_seg inseg[0]; ++}; ++ ++struct xsc_rwqe_sig { ++ uint8_t rsvd0[4]; ++ uint8_t signature; ++ uint8_t rsvd1[11]; ++}; ++ ++#endif /* WQE_H */ +diff --git a/providers/xscale/xsc-abi.h b/providers/xscale/xsc-abi.h +new file mode 100644 +index 0000000..7eab95c +--- /dev/null ++++ b/providers/xscale/xsc-abi.h +@@ -0,0 +1,56 @@ ++/* ++ * Copyright (c) 2021 - 2022, Shanghai Yunsilicon Technology Co., Ltd. ++ * All rights reserved. ++ */ ++ ++#ifndef XSC_ABI_H ++#define XSC_ABI_H ++ ++#include ++#include ++#include ++#include ++#include "xscdv.h" ++ ++#define XSC_UVERBS_MIN_ABI_VERSION 1 ++#define XSC_UVERBS_MAX_ABI_VERSION 1 ++ ++DECLARE_DRV_CMD(xsc_alloc_ucontext, IB_USER_VERBS_CMD_GET_CONTEXT, ++ xsc_ib_alloc_ucontext_req, xsc_ib_alloc_ucontext_resp); ++DECLARE_DRV_CMD(xsc_create_ah, IB_USER_VERBS_CMD_CREATE_AH, ++ empty, xsc_ib_create_ah_resp); ++DECLARE_DRV_CMD(xsc_alloc_pd, IB_USER_VERBS_CMD_ALLOC_PD, ++ empty, xsc_ib_alloc_pd_resp); ++DECLARE_DRV_CMD(xsc_create_cq, IB_USER_VERBS_CMD_CREATE_CQ, ++ xsc_ib_create_cq, xsc_ib_create_cq_resp); ++DECLARE_DRV_CMD(xsc_create_cq_ex, IB_USER_VERBS_EX_CMD_CREATE_CQ, ++ xsc_ib_create_cq, xsc_ib_create_cq_resp); ++DECLARE_DRV_CMD(xsc_create_qp_ex, IB_USER_VERBS_EX_CMD_CREATE_QP, ++ xsc_ib_create_qp, xsc_ib_create_qp_resp); ++DECLARE_DRV_CMD(xsc_create_qp_ex_rss, IB_USER_VERBS_EX_CMD_CREATE_QP, ++ xsc_ib_create_qp_rss, xsc_ib_create_qp_resp); ++DECLARE_DRV_CMD(xsc_create_qp, IB_USER_VERBS_CMD_CREATE_QP, ++ xsc_ib_create_qp, xsc_ib_create_qp_resp); ++DECLARE_DRV_CMD(xsc_create_wq, IB_USER_VERBS_EX_CMD_CREATE_WQ, ++ xsc_ib_create_wq, xsc_ib_create_wq_resp); ++DECLARE_DRV_CMD(xsc_modify_wq, IB_USER_VERBS_EX_CMD_MODIFY_WQ, ++ xsc_ib_modify_wq, empty); ++DECLARE_DRV_CMD(xsc_create_rwq_ind_table, IB_USER_VERBS_EX_CMD_CREATE_RWQ_IND_TBL, ++ empty, empty); ++DECLARE_DRV_CMD(xsc_destroy_rwq_ind_table, IB_USER_VERBS_EX_CMD_DESTROY_RWQ_IND_TBL, ++ empty, empty); ++DECLARE_DRV_CMD(xsc_resize_cq, IB_USER_VERBS_CMD_RESIZE_CQ, ++ xsc_ib_resize_cq, empty); ++DECLARE_DRV_CMD(xsc_query_device_ex, IB_USER_VERBS_EX_CMD_QUERY_DEVICE, ++ empty, xsc_ib_query_device_resp); ++DECLARE_DRV_CMD(xsc_modify_qp_ex, IB_USER_VERBS_EX_CMD_MODIFY_QP, ++ empty, xsc_ib_modify_qp_resp); ++ ++struct xsc_modify_qp { ++ struct ibv_modify_qp_ex ibv_cmd; ++ __u32 comp_mask; ++ struct xsc_ib_burst_info burst_info; ++ __u32 reserved; ++}; ++ ++#endif /* XSC_ABI_H */ +diff --git a/providers/xscale/xsc_api.h b/providers/xscale/xsc_api.h +new file mode 100644 +index 0000000..c533019 +--- /dev/null ++++ b/providers/xscale/xsc_api.h +@@ -0,0 +1,29 @@ ++/* ++ * Copyright (c) 2021 - 2022, Shanghai Yunsilicon Technology Co., Ltd. ++ * All rights reserved. ++ */ ++ ++#ifndef XSC_API_H ++#define XSC_API_H ++ ++#include ++ ++#define xscdv_flow_action_flags xsc_ib_uapi_flow_action_flags ++#define XSCDV_FLOW_ACTION_FLAGS_REQUIRE_METADATA XSC_IB_UAPI_FLOW_ACTION_FLAGS_REQUIRE_METADATA ++#define xscdv_flow_table_type xsc_ib_uapi_flow_table_type ++#define XSCDV_FLOW_TABLE_TYPE_NIC_RX XSC_IB_UAPI_FLOW_TABLE_TYPE_NIC_RX ++#define XSCDV_FLOW_TABLE_TYPE_NIC_TX XSC_IB_UAPI_FLOW_TABLE_TYPE_NIC_TX ++#define xscdv_flow_action_packet_reformat_type xsc_ib_uapi_flow_action_packet_reformat_type ++#define XSCDV_FLOW_ACTION_PACKET_REFORMAT_TYPE_L2_TUNNEL_TO_L2 XSC_IB_UAPI_FLOW_ACTION_PACKET_REFORMAT_TYPE_L2_TUNNEL_TO_L2 ++#define XSCDV_FLOW_ACTION_PACKET_REFORMAT_TYPE_L2_TO_L2_TUNNEL XSC_IB_UAPI_FLOW_ACTION_PACKET_REFORMAT_TYPE_L2_TO_L2_TUNNEL ++#define XSCDV_FLOW_ACTION_PACKET_REFORMAT_TYPE_L3_TUNNEL_TO_L2 XSC_IB_UAPI_FLOW_ACTION_PACKET_REFORMAT_TYPE_L3_TUNNEL_TO_L2 ++#define XSCDV_FLOW_ACTION_PACKET_REFORMAT_TYPE_L2_TO_L3_TUNNEL XSC_IB_UAPI_FLOW_ACTION_PACKET_REFORMAT_TYPE_L2_TO_L3_TUNNEL ++ ++enum xsc_qp_create_flags { ++ XSC_QP_CREATE_RAWPACKE_TSO = 1 << 0, ++ XSC_QP_CREATE_RAWPACKET_TSO = 1 << 0, ++ XSC_QP_CREATE_RAWPACKET_TX = 1 << 1, ++}; ++ ++ ++#endif +diff --git a/providers/xscale/xsc_hsi.h b/providers/xscale/xsc_hsi.h +new file mode 100644 +index 0000000..53fe552 +--- /dev/null ++++ b/providers/xscale/xsc_hsi.h +@@ -0,0 +1,252 @@ ++/* ++ * Copyright (c) 2021 - 2022, Shanghai Yunsilicon Technology Co., Ltd. ++ * All rights reserved. ++ */ ++ ++#ifndef __XSC_HSI_H__ ++#define __XSC_HSI_H__ ++ ++#include ++#include ++#include "sqm_csr_defines.h" ++#include "rqm_csr_defines.h" ++#include "cqm_csr_defines.h" ++ ++#define upper_32_bits(n) ((uint32_t)(((n) >> 16) >> 16)) ++#define lower_32_bits(n) ((uint32_t)(n)) ++ ++#define DMA_LO_LE(x) __cpu_to_le32(lower_32_bits(x)) ++#define DMA_HI_LE(x) __cpu_to_le32(upper_32_bits(x)) ++#define DMA_REGPAIR_LE(x, val) do { \ ++ (x).hi = DMA_HI_LE((val)); \ ++ (x).lo = DMA_LO_LE((val)); \ ++ } while (0) ++ ++#define WR_LE_16(x, val) x = __cpu_to_le16(val) ++#define WR_LE_32(x, val) x = __cpu_to_le32(val) ++#define WR_LE_64(x, val) x = __cpu_to_le64(val) ++#define WR_LE_R64(x, val) DMA_REGPAIR_LE(x, val) ++#define WR_BE_32(x, val) x = __cpu_to_be32(val) ++ ++#define RD_LE_16(x) __le16_to_cpu(x) ++#define RD_LE_32(x) __le32_to_cpu(x) ++#define RD_BE_32(x) __be32_to_cpu(x) ++ ++#define WR_REG(addr, val) mmio_write32_le(addr, val) ++#define RD_REG(addr) mmio_read32_le(addr) ++ ++/* message opcode */ ++enum { ++ XSC_MSG_OPCODE_SEND = 0, ++ XSC_MSG_OPCODE_RDMA_WRITE = 1, ++ XSC_MSG_OPCODE_RDMA_READ = 2, ++ XSC_MSG_OPCODE_MAD = 3, ++ XSC_MSG_OPCODE_RDMA_ACK = 4, ++ XSC_MSG_OPCODE_RDMA_ACK_READ = 5, ++ XSC_MSG_OPCODE_RDMA_CNP = 6, ++ XSC_MSG_OPCODE_RAW = 7, ++ XSC_MSG_OPCODE_VIRTIO_NET = 8, ++ XSC_MSG_OPCODE_VIRTIO_BLK = 9, ++ XSC_MSG_OPCODE_RAW_TPE = 10, ++ XSC_MSG_OPCODE_INIT_QP_REQ = 11, ++ XSC_MSG_OPCODE_INIT_QP_RSP = 12, ++ XSC_MSG_OPCODE_INIT_PATH_REQ = 13, ++ XSC_MSG_OPCODE_INIT_PATH_RSP = 14, ++}; ++ ++enum { ++ XSC_REQ = 0, ++ XSC_RSP = 1, ++}; ++ ++enum { ++ XSC_WITHOUT_IMMDT = 0, ++ XSC_WITH_IMMDT = 1, ++}; ++ ++enum { ++ XSC_ERR_CODE_NAK_RETRY = 0x40, ++ XSC_ERR_CODE_NAK_OPCODE = 0x41, ++ XSC_ERR_CODE_NAK_MR = 0x42, ++ XSC_ERR_CODE_NAK_OPERATION = 0x43, ++ XSC_ERR_CODE_NAK_RNR = 0x44, ++ XSC_ERR_CODE_LOCAL_MR = 0x45, ++ XSC_ERR_CODE_LOCAL_LEN = 0x46, ++ XSC_ERR_CODE_LOCAL_OPCODE = 0x47, ++ XSC_ERR_CODE_CQ_OVER_FLOW = 0x48, ++ XSC_ERR_CODE_STRG_ACC_GEN_CQE = 0x4c, ++ XSC_ERR_CODE_CQE_ACC = 0x4d, ++ XSC_ERR_CODE_FLUSH = 0x4e, ++ XSC_ERR_CODE_MALF_WQE_HOST = 0x50, ++ XSC_ERR_CODE_MALF_WQE_INFO = 0x51, ++ XSC_ERR_CODE_MR_NON_NAK = 0x52, ++ XSC_ERR_CODE_OPCODE_GEN_CQE = 0x61, ++ XSC_ERR_CODE_MANY_READ = 0x62, ++ XSC_ERR_CODE_LEN_GEN_CQE = 0x63, ++ XSC_ERR_CODE_MR = 0x65, ++ XSC_ERR_CODE_MR_GEN_CQE = 0x66, ++ XSC_ERR_CODE_OPERATION = 0x67, ++ XSC_ERR_CODE_MALF_WQE_INFO_GEN_NAK = 0x68, ++}; ++ ++/* TODO: sw cqe opcode*/ ++enum { ++ XSC_OPCODE_RDMA_REQ_SEND = 0, ++ XSC_OPCODE_RDMA_REQ_SEND_IMMDT = 1, ++ XSC_OPCODE_RDMA_RSP_RECV = 2, ++ XSC_OPCODE_RDMA_RSP_RECV_IMMDT = 3, ++ XSC_OPCODE_RDMA_REQ_WRITE = 4, ++ XSC_OPCODE_RDMA_REQ_WRITE_IMMDT = 5, ++ XSC_OPCODE_RDMA_RSP_WRITE_IMMDT = 6, ++ XSC_OPCODE_RDMA_REQ_READ = 7, ++ XSC_OPCODE_RDMA_REQ_ERROR = 8, ++ XSC_OPCODE_RDMA_RSP_ERROR = 9, ++ XSC_OPCODE_RDMA_CQE_ERROR = 10, ++}; ++ ++enum { ++ XSC_BASE_WQE_SHIFT = 4, ++}; ++ ++/* ++ * Descriptors that are allocated by SW and accessed by HW, 32-byte aligned ++ */ ++/* this is to keep descriptor structures packed */ ++struct regpair { ++ __le32 lo; ++ __le32 hi; ++}; ++ ++struct xsc_send_wqe_ctrl_seg { ++ uint8_t msg_opcode; ++ uint8_t with_immdt:1; ++ uint8_t :2; ++ uint8_t ds_data_num:5; ++ __le16 wqe_id; ++ __le32 msg_len; ++ __le32 opcode_data; ++ uint8_t se:1; ++ uint8_t ce:1; ++ uint8_t in_line:1; ++ __le32 :29; ++}; ++ ++ ++struct xsc_wqe_data_seg { ++ union { ++ struct { ++ uint8_t :1; ++ __le32 seg_len:31; ++ __le32 mkey; ++ __le64 va; ++ }; ++ struct { ++ uint8_t in_line_data[16]; ++ }; ++ }; ++}; ++ ++struct xsc_cqe { ++ union { ++ uint8_t msg_opcode; ++ struct { ++ uint8_t error_code:7; ++ uint8_t is_error:1; ++ }; ++ }; ++ __le32 qp_id:15; ++ uint8_t :1; ++ uint8_t se:1; ++ uint8_t has_pph:1; ++ uint8_t type:1; ++ uint8_t with_immdt:1; ++ uint8_t csum_err:4; ++ __le32 imm_data; ++ __le32 msg_len; ++ __le32 vni; ++ __le64 ts:48; ++ __le16 wqe_id; ++ __le16 rsv[3]; ++ __le16 rsv1:15; ++ uint8_t owner:1; ++}; ++ ++/* Size of CQE */ ++#define XSC_CQE_SIZE sizeof(struct xsc_cqe) ++ ++union xsc_db_data { ++ struct { ++ __le32 sq_next_pid:16; ++ __le32 sqn:15; ++ __le32 :1; ++ }; ++ struct { ++ __le32 rq_next_pid:13; ++ __le32 rqn:15; ++ __le32 :4; ++ }; ++ struct { ++ __le32 cq_next_cid:16; ++ __le32 cqn:15; ++ __le32 solicited:1; ++ }; ++ __le32 raw_data; ++}; ++ ++#define CQM_DB_NEXT_CID_OFFSET(n) (4 * (n)) ++ ++#define XSC_SEND_WQE_RING_DEPTH_MIN 16 ++#define XSC_CQE_RING_DEPTH_MIN 2 ++#define XSC_SEND_WQE_RING_DEPTH_MAX 1024 ++#define XSC_RECV_WQE_RING_DEPTH_MAX 1024 ++#define XSC_CQE_RING_DEPTH_MAX (1024 * 32) ++ ++/* ++ * Registers that are allocated by HW and accessed by SW in 4-byte granularity ++ */ ++/* MMT table (32 bytes) */ ++struct xsc_mmt_tbl { ++ struct regpair pa; ++ struct regpair va; ++ __le32 size; ++#define XSC_MMT_TBL_PD_MASK 0x00FFFFFF ++#define XSC_MMT_TBL_KEY_MASK 0xFF000000 ++ __le32 key_pd; ++#define XSC_MMT_TBL_ACC_MASK 0x0000000F ++ __le32 acc; ++ uint8_t padding[4]; ++}; ++ ++/* QP Context (16 bytes) */ ++struct xsc_qp_context { ++#define XSC_QP_CONTEXT_STATE_MASK 0x00000007 ++#define XSC_QP_CONTEXT_FUNC_MASK 0x00000018 ++#define XSC_QP_CONTEXT_DSTID_MASK 0x000000E0 ++#define XSC_QP_CONTEXT_PD_MASK 0xFFFFFF00 ++ __le32 pd_dstid_func_state; ++#define XSC_QP_CONTEXT_DSTQP_MASK 0x00FFFFFF ++#define XSC_QP_CONTEXT_RCQIDL_MASK 0xFF000000 ++ __le32 rcqidl_dstqp; ++#define XSC_QP_CONTEXT_RCQIDH_MASK 0x0000FFFF ++#define XSC_QP_CONTEXT_SCQIDL_MASK 0xFFFF0000 ++ __le32 scqidl_rcqidh; ++#define XSC_QP_CONTEXT_SCQIDH_MASK 0x000000FF ++ __le32 scqidh; ++}; ++ ++/* TODO: EPP Table and others */ ++ ++static inline bool xsc_get_cqe_sw_own(struct xsc_cqe *cqe, int cid, int ring_sz) ALWAYS_INLINE; ++ ++static inline void xsc_set_cqe_sw_own(struct xsc_cqe *cqe, int pid, int ring_sz) ALWAYS_INLINE; ++ ++static inline bool xsc_get_cqe_sw_own(struct xsc_cqe *cqe, int cid, int ring_sz) ++{ ++ return cqe->owner == ((cid >> ring_sz) & 1); ++} ++ ++static inline void xsc_set_cqe_sw_own(struct xsc_cqe *cqe, int pid, int ring_sz) ++{ ++ cqe->owner = ((pid >> ring_sz) & 1); ++} ++#endif /* __XSC_HSI_H__ */ +diff --git a/providers/xscale/xsc_hw.h b/providers/xscale/xsc_hw.h +new file mode 100755 +index 0000000..f2b0ce3 +--- /dev/null ++++ b/providers/xscale/xsc_hw.h +@@ -0,0 +1,584 @@ ++#ifndef _XSC_HW_H_ ++#define _XSC_HW_H_ ++ ++#include ++ ++#include "xscale.h" ++ ++struct xsc_andes_cqe { ++ union { ++ uint8_t msg_opcode; ++ struct { ++ uint8_t error_code:7; ++ uint8_t is_error:1; ++ }; ++ }; ++ __le32 qp_id:15; ++ uint8_t :1; ++ uint8_t se:1; ++ uint8_t has_pph:1; ++ uint8_t type:1; ++ uint8_t with_immdt:1; ++ uint8_t csum_err:4; ++ __le32 imm_data; ++ __le32 msg_len; ++ __le32 vni; ++ __le64 ts:48; ++ __le16 wqe_id; ++ __le16 rsv[3]; ++ __le16 rsv1:15; ++ uint8_t owner:1; ++}; ++ ++union xsc_andes_cq_doorbell { ++ struct { ++ uint32_t cq_next_cid:16; ++ uint32_t cq_id:15; ++ uint32_t arm:1; ++ }; ++ uint32_t val; ++}; ++ ++union xsc_andes_send_doorbell { ++ struct { ++ uint32_t next_pid:16; ++ uint32_t qp_id:15; ++ }; ++ uint32_t raw; ++}; ++ ++union xsc_andes_recv_doorbell { ++ struct { ++ uint32_t next_pid:13; ++ uint32_t qp_id:15; ++ }; ++ uint32_t raw; ++}; ++ ++struct xsc_andes_data_seg { ++ uint32_t :1; ++ uint32_t length:31; ++ uint32_t key; ++ uint64_t addr; ++}; ++ ++struct xsc_diamond_cqe { ++ uint8_t error_code; ++ __le32 qp_id:15; ++ uint8_t :1; ++ uint8_t se:1; ++ uint8_t has_pph:1; ++ uint8_t type:1; ++ uint8_t with_immdt:1; ++ uint8_t csum_err:4; ++ __le32 imm_data; ++ __le32 msg_len; ++ __le32 vni; ++ __le64 ts:48; ++ __le16 wqe_id; ++ uint8_t msg_opcode; ++ uint8_t rsv; ++ __le16 rsv1[2]; ++ __le16 rsv2:15; ++ uint8_t owner:1; ++}; ++ ++union xsc_diamond_cq_doorbell { ++ struct { ++ uint64_t cq_next_cid:23; ++ uint64_t cq_id:14; ++ uint64_t cq_sta:2; ++ }; ++ uint64_t raw; ++}; ++ ++union xsc_diamond_recv_doorbell { ++ struct { ++ uint64_t next_pid:14; ++ uint64_t qp_id:14; ++ }; ++ uint64_t raw; ++}; ++ ++union xsc_diamond_send_doorbell { ++ struct { ++ uint64_t next_pid:17; ++ uint64_t qp_id:14; ++ }; ++ uint64_t raw; ++}; ++ ++struct xsc_diamond_data_seg { ++ uint32_t length; ++ uint32_t key; ++ uint64_t addr; ++}; ++ ++union xsc_diamond_next_cq_doorbell { ++ struct { ++ uint64_t cq_next_cid:23; ++ uint64_t cq_id:10; ++ uint64_t cq_sta:2; ++ }; ++ uint64_t raw; ++}; ++ ++union xsc_diamond_next_send_doorbell { ++ struct { ++ uint64_t next_pid:17; ++ uint64_t qp_id:10; ++ }; ++ uint64_t raw; ++}; ++ ++union xsc_diamond_next_recv_doorbell { ++ struct { ++ uint64_t next_pid:14; ++ uint64_t qp_id:10; ++ }; ++ uint64_t raw; ++}; ++ ++enum { ++ XSC_CQ_STAT_FIRED, ++ XSC_CQ_STAT_KEEP, ++ XSC_CQ_STAT_ARM_NEXT, ++ XSC_CQ_STAT_ARM_SOLICITED, ++}; ++ ++#define XSC_HW_ALWAYS_INLINE inline __attribute__((always_inline)) ++ ++static XSC_HW_ALWAYS_INLINE uint8_t xsc_diamond_get_cqe_msg_opcode(void *cqe) ++{ ++ return ((struct xsc_diamond_cqe *)cqe)->msg_opcode; ++} ++ ++static XSC_HW_ALWAYS_INLINE uint8_t xsc_andes_get_cqe_msg_opcode(void *cqe) ++{ ++ return ((struct xsc_andes_cqe *)cqe)->msg_opcode; ++} ++ ++static XSC_HW_ALWAYS_INLINE uint8_t xsc_hw_get_cqe_msg_opcode(uint16_t device_id, void *cqe) ++{ ++ switch (device_id) { ++ case XSC_MS_PF_DEV_ID: ++ case XSC_MS_VF_DEV_ID: ++ return xsc_andes_get_cqe_msg_opcode(cqe); ++ case XSC_MC_PF_DEV_ID_DIAMOND: ++ case XSC_MC_PF_DEV_ID_DIAMOND_NEXT: ++ return xsc_diamond_get_cqe_msg_opcode(cqe); ++ default: ++ return xsc_andes_get_cqe_msg_opcode(cqe); ++ } ++} ++ ++static XSC_HW_ALWAYS_INLINE bool xsc_diamond_is_err_cqe(void *cqe) ++{ ++ return !!((struct xsc_diamond_cqe *)cqe)->error_code; ++} ++ ++static XSC_HW_ALWAYS_INLINE bool xsc_andes_is_err_cqe(void *cqe) ++{ ++ return ((struct xsc_andes_cqe *)cqe)->is_error; ++} ++ ++static XSC_HW_ALWAYS_INLINE bool xsc_hw_is_err_cqe(uint16_t device_id, void *cqe) ++{ ++ switch (device_id) { ++ case XSC_MS_PF_DEV_ID: ++ case XSC_MS_VF_DEV_ID: ++ return xsc_andes_is_err_cqe(cqe); ++ case XSC_MC_PF_DEV_ID_DIAMOND: ++ case XSC_MC_PF_DEV_ID_DIAMOND_NEXT: ++ return xsc_diamond_is_err_cqe(cqe); ++ default: ++ return xsc_andes_is_err_cqe(cqe); ++ } ++} ++ ++static XSC_HW_ALWAYS_INLINE uint8_t xsc_diamond_get_cqe_err_code(void *cqe) ++{ ++ return ((struct xsc_diamond_cqe *)cqe)->error_code; ++} ++ ++static XSC_HW_ALWAYS_INLINE uint8_t xsc_andes_get_cqe_err_code(void *cqe) ++{ ++ return ((struct xsc_andes_cqe *)cqe)->error_code; ++} ++ ++static XSC_HW_ALWAYS_INLINE uint8_t xsc_hw_get_cqe_err_code(uint16_t device_id, void *cqe) ++{ ++ switch (device_id) { ++ case XSC_MS_PF_DEV_ID: ++ case XSC_MS_VF_DEV_ID: ++ return xsc_andes_get_cqe_err_code(cqe); ++ case XSC_MC_PF_DEV_ID_DIAMOND: ++ case XSC_MC_PF_DEV_ID_DIAMOND_NEXT: ++ return xsc_diamond_get_cqe_err_code(cqe); ++ default: ++ return xsc_andes_get_cqe_err_code(cqe); ++ } ++} ++ ++static inline enum ibv_wc_status xsc_andes_cqe_err_code(uint8_t error_code) ++{ ++ switch (error_code) { ++ case XSC_ANDES_ERR_CODE_NAK_RETRY: ++ return IBV_WC_RETRY_EXC_ERR; ++ case XSC_ANDES_ERR_CODE_NAK_OPCODE: ++ return IBV_WC_REM_INV_REQ_ERR; ++ case XSC_ANDES_ERR_CODE_NAK_MR: ++ return IBV_WC_REM_ACCESS_ERR; ++ case XSC_ANDES_ERR_CODE_NAK_OPERATION: ++ return IBV_WC_REM_OP_ERR; ++ case XSC_ANDES_ERR_CODE_NAK_RNR: ++ return IBV_WC_RNR_RETRY_EXC_ERR; ++ case XSC_ANDES_ERR_CODE_LOCAL_MR: ++ return IBV_WC_LOC_PROT_ERR; ++ case XSC_ANDES_ERR_CODE_LOCAL_LEN: ++ return IBV_WC_LOC_LEN_ERR; ++ case XSC_ANDES_ERR_CODE_LEN_GEN_CQE: ++ return IBV_WC_LOC_LEN_ERR; ++ case XSC_ANDES_ERR_CODE_OPERATION: ++ return IBV_WC_LOC_ACCESS_ERR; ++ case XSC_ANDES_ERR_CODE_FLUSH: ++ return IBV_WC_WR_FLUSH_ERR; ++ case XSC_ANDES_ERR_CODE_MALF_WQE_HOST: ++ case XSC_ANDES_ERR_CODE_STRG_ACC_GEN_CQE: ++ case XSC_ANDES_ERR_CODE_STRG_ACC: ++ return IBV_WC_FATAL_ERR; ++ case XSC_ANDES_ERR_CODE_MR_GEN_CQE: ++ return IBV_WC_LOC_PROT_ERR; ++ case XSC_ANDES_ERR_CODE_LOCAL_OPERATION_WQE: ++ return IBV_WC_LOC_QP_OP_ERR; ++ case XSC_ANDES_ERR_CODE_OPCODE_GEN_CQE: ++ case XSC_ANDES_ERR_CODE_LOCAL_OPCODE: ++ default: ++ return IBV_WC_GENERAL_ERR; ++ } ++} ++ ++static inline enum ibv_wc_status xsc_diamond_cqe_err_code(uint8_t error_code) ++{ ++ switch (error_code) { ++ case XSC_DIAMOND_ERR_CODE_NAK_SEQ_ERR: ++ case XSC_DIAMOND_ERR_CODE_RTO_REQ: ++ return IBV_WC_RETRY_EXC_ERR; ++ case XSC_DIAMOND_ERR_CODE_NAK_INV_REQ: ++ return IBV_WC_REM_INV_REQ_ERR; ++ case XSC_DIAMOND_ERR_CODE_NAK_MR: ++ return IBV_WC_REM_ACCESS_ERR; ++ case XSC_DIAMOND_ERR_CODE_NAK_REMOTE_OPER_ERR: ++ return IBV_WC_REM_OP_ERR; ++ case XSC_DIAMOND_ERR_CODE_LOCAL_MR_REQ: ++ case XSC_DIAMOND_ERR_CODE_REMOTE_MR: ++ case XSC_DIAMOND_ERR_CODE_REMOTE_MR_GEN_CQE: ++ case XSC_DIAMOND_ERR_CODE_LOCAL_MR_RSP: ++ return IBV_WC_LOC_PROT_ERR; ++ case XSC_DIAMOND_ERR_CODE_LEN: ++ case XSC_DIAMOND_ERR_CODE_LEN_GEN_CQE: ++ return IBV_WC_LOC_LEN_ERR; ++ case XSC_DIAMOND_ERR_CODE_FLUSH: ++ return IBV_WC_WR_FLUSH_ERR; ++ case XSC_DIAMOND_ERR_CODE_RCV_WQE_DMA: ++ case XSC_DIAMOND_ERR_CODE_DATA_DMA_RD_REQ: ++ case XSC_DIAMOND_ERR_CODE_DATA_DMA_WR_RSP_GEN_CQE: ++ case XSC_DIAMOND_ERR_CODE_DATA_DMA_WR_RSP: ++ return IBV_WC_FATAL_ERR; ++ case XSC_DIAMOND_ERR_CODE_SND_WQE_FORMAT: ++ return IBV_WC_LOC_QP_OP_ERR; ++ default: ++ return IBV_WC_GENERAL_ERR; ++ } ++} ++ ++static XSC_HW_ALWAYS_INLINE enum ibv_wc_status xsc_hw_cqe_err_status(uint16_t device_id, ++ void *cqe) ++{ ++ switch (device_id) { ++ case XSC_MS_PF_DEV_ID: ++ case XSC_MS_VF_DEV_ID: ++ return xsc_andes_cqe_err_code(xsc_andes_get_cqe_err_code(cqe)); ++ case XSC_MC_PF_DEV_ID_DIAMOND: ++ case XSC_MC_PF_DEV_ID_DIAMOND_NEXT: ++ return xsc_diamond_cqe_err_code(xsc_diamond_get_cqe_err_code(cqe)); ++ default: ++ return xsc_andes_cqe_err_code(xsc_andes_get_cqe_err_code(cqe)); ++ } ++} ++ ++static XSC_HW_ALWAYS_INLINE void xsc_diamond_set_data_seg(void *data_seg, ++ uint64_t addr, uint32_t key, ++ uint32_t length) ++{ ++ struct xsc_diamond_data_seg *seg = data_seg; ++ ++ seg->length = length; ++ seg->key = key; ++ seg->addr = addr; ++} ++ ++static XSC_HW_ALWAYS_INLINE void xsc_andes_set_data_seg(void *data_seg, ++ uint64_t addr, uint32_t key, ++ uint32_t length) ++{ ++ struct xsc_andes_data_seg *seg = data_seg; ++ ++ seg->length = length; ++ seg->key = key; ++ seg->addr = addr; ++} ++ ++static XSC_HW_ALWAYS_INLINE void xsc_hw_set_data_seg(uint16_t device_id, void *data_seg, ++ uint64_t addr, uint32_t key, uint32_t length) ++{ ++ switch (device_id) { ++ case XSC_MS_PF_DEV_ID: ++ case XSC_MS_VF_DEV_ID: ++ xsc_andes_set_data_seg(data_seg, addr, key, length); ++ break; ++ case XSC_MC_PF_DEV_ID_DIAMOND: ++ case XSC_MC_PF_DEV_ID_DIAMOND_NEXT: ++ xsc_diamond_set_data_seg(data_seg, addr, key, length); ++ break; ++ default: ++ xsc_andes_set_data_seg(data_seg, addr, key, length); ++ } ++} ++ ++static XSC_HW_ALWAYS_INLINE void xsc_diamond_set_cq_ci(void *db_addr, ++ uint32_t cqn, uint32_t next_cid) ++{ ++ union xsc_diamond_cq_doorbell db; ++ ++ db.cq_id = cqn; ++ db.cq_next_cid = next_cid; ++ db.cq_sta = XSC_CQ_STAT_FIRED; ++ udma_to_device_barrier(); ++ mmio_write64_le(db_addr, db.raw); ++} ++ ++static XSC_HW_ALWAYS_INLINE void xsc_diamond_next_set_cq_ci(void *db_addr, ++ uint32_t cqn, uint32_t next_cid) ++{ ++ union xsc_diamond_next_cq_doorbell db; ++ ++ db.cq_id = cqn; ++ db.cq_next_cid = next_cid; ++ db.cq_sta = XSC_CQ_STAT_FIRED; ++ udma_to_device_barrier(); ++ mmio_write64_le(db_addr, db.raw); ++} ++ ++static XSC_HW_ALWAYS_INLINE void xsc_andes_set_cq_ci(void *db_addr, ++ uint32_t cqn, uint32_t next_cid) ++{ ++ union xsc_andes_cq_doorbell db; ++ ++ db.cq_id = cqn; ++ db.cq_next_cid = next_cid; ++ db.arm = XSC_CQ_STAT_FIRED; ++ udma_to_device_barrier(); ++ mmio_write32_le(db_addr, db.val); ++} ++ ++ ++static XSC_HW_ALWAYS_INLINE void xsc_hw_set_cq_ci(uint16_t device_id, void *db_addr, ++ uint32_t cqn, uint32_t next_cid) ++{ ++ switch (device_id) { ++ case XSC_MS_PF_DEV_ID: ++ case XSC_MS_VF_DEV_ID: ++ xsc_andes_set_cq_ci(db_addr, cqn, next_cid); ++ break; ++ case XSC_MC_PF_DEV_ID_DIAMOND: ++ xsc_diamond_set_cq_ci(db_addr, cqn, next_cid); ++ break; ++ case XSC_MC_PF_DEV_ID_DIAMOND_NEXT: ++ xsc_diamond_next_set_cq_ci(db_addr, cqn, next_cid); ++ break; ++ default: ++ xsc_andes_set_cq_ci(db_addr, cqn, next_cid); ++ } ++} ++ ++static XSC_HW_ALWAYS_INLINE void xsc_diamond_update_cq_db(void *db_addr, ++ uint32_t cqn, uint32_t next_cid, ++ uint8_t solicited) ++{ ++ union xsc_diamond_cq_doorbell db; ++ ++ db.cq_id = cqn; ++ db.cq_next_cid = next_cid; ++ db.cq_sta = solicited ? XSC_CQ_STAT_ARM_SOLICITED : XSC_CQ_STAT_ARM_NEXT; ++ udma_to_device_barrier(); ++ mmio_wc_start(); ++ mmio_write64_le(db_addr, db.raw); ++ mmio_flush_writes(); ++} ++ ++static XSC_HW_ALWAYS_INLINE void xsc_diamond_next_update_cq_db(void *db_addr, ++ uint32_t cqn, uint32_t next_cid, ++ uint8_t solicited) ++{ ++ union xsc_diamond_next_cq_doorbell db; ++ ++ db.cq_id = cqn; ++ db.cq_next_cid = next_cid; ++ db.cq_sta = solicited ? XSC_CQ_STAT_ARM_SOLICITED : XSC_CQ_STAT_ARM_NEXT; ++ udma_to_device_barrier(); ++ mmio_wc_start(); ++ mmio_write64_le(db_addr, db.raw); ++ mmio_flush_writes(); ++} ++ ++static XSC_HW_ALWAYS_INLINE void xsc_andes_update_cq_db(void *db_addr, ++ uint32_t cqn, uint32_t next_cid, ++ uint8_t solicited) ++{ ++ union xsc_andes_cq_doorbell db; ++ ++ db.cq_id = cqn; ++ db.cq_next_cid = next_cid; ++ db.arm = solicited; ++ udma_to_device_barrier(); ++ mmio_wc_start(); ++ mmio_write32_le(db_addr, db.val); ++ mmio_flush_writes(); ++} ++ ++static XSC_HW_ALWAYS_INLINE void xsc_hw_update_cq_db(uint16_t device_id, void *db_addr, ++ uint32_t cqn, uint32_t next_cid, ++ uint8_t solicited) ++{ ++ switch (device_id) { ++ case XSC_MS_PF_DEV_ID: ++ case XSC_MS_VF_DEV_ID: ++ xsc_andes_update_cq_db(db_addr, cqn, next_cid, solicited); ++ break; ++ case XSC_MC_PF_DEV_ID_DIAMOND: ++ xsc_diamond_update_cq_db(db_addr, cqn, next_cid, solicited); ++ break; ++ case XSC_MC_PF_DEV_ID_DIAMOND_NEXT: ++ xsc_diamond_next_update_cq_db(db_addr, cqn, next_cid, solicited); ++ break; ++ default: ++ xsc_andes_update_cq_db(db_addr, cqn, next_cid, solicited); ++ } ++} ++ ++static XSC_HW_ALWAYS_INLINE void xsc_diamond_ring_rx_doorbell(void *db_addr, ++ uint32_t rqn, uint32_t next_pid) ++{ ++ union xsc_diamond_recv_doorbell db; ++ ++ db.qp_id = rqn; ++ db.next_pid = next_pid; ++ ++ udma_to_device_barrier(); ++ mmio_write64_le(db_addr, db.raw); ++} ++ ++ ++static XSC_HW_ALWAYS_INLINE void xsc_diamond_next_ring_rx_doorbell(void *db_addr, ++ uint32_t rqn, uint32_t next_pid) ++{ ++ union xsc_diamond_next_recv_doorbell db; ++ ++ db.qp_id = rqn; ++ db.next_pid = next_pid; ++ ++ udma_to_device_barrier(); ++ mmio_write64_le(db_addr, db.raw); ++} ++ ++static XSC_HW_ALWAYS_INLINE void xsc_andes_ring_rx_doorbell(void *db_addr, ++ uint32_t rqn, uint32_t next_pid) ++{ ++ union xsc_andes_recv_doorbell db; ++ ++ db.qp_id = rqn; ++ db.next_pid = next_pid; ++ ++ udma_to_device_barrier(); ++ mmio_write32_le(db_addr, db.raw); ++} ++ ++static XSC_HW_ALWAYS_INLINE void xsc_hw_ring_rx_doorbell(uint16_t device_id, ++ void *db_addr, ++ uint32_t rqn, uint32_t next_pid) ++{ ++ switch (device_id) { ++ case XSC_MS_PF_DEV_ID: ++ case XSC_MS_VF_DEV_ID: ++ xsc_andes_ring_rx_doorbell(db_addr, rqn, next_pid); ++ break; ++ case XSC_MC_PF_DEV_ID_DIAMOND: ++ xsc_diamond_ring_rx_doorbell(db_addr, rqn, next_pid); ++ break; ++ case XSC_MC_PF_DEV_ID_DIAMOND_NEXT: ++ xsc_diamond_next_ring_rx_doorbell(db_addr, rqn, next_pid); ++ break; ++ default: ++ xsc_andes_ring_rx_doorbell(db_addr, rqn, next_pid); ++ } ++} ++ ++static XSC_HW_ALWAYS_INLINE void xsc_diamond_ring_tx_doorbell(void *db_addr, ++ uint32_t rqn, uint32_t next_pid) ++{ ++ union xsc_diamond_send_doorbell db; ++ ++ db.qp_id = rqn; ++ db.next_pid = next_pid; ++ ++ udma_to_device_barrier(); ++ mmio_write64_le(db_addr, db.raw); ++} ++ ++ ++static XSC_HW_ALWAYS_INLINE void xsc_diamond_next_ring_tx_doorbell(void *db_addr, ++ uint32_t rqn, uint32_t next_pid) ++{ ++ union xsc_diamond_next_send_doorbell db; ++ ++ db.qp_id = rqn; ++ db.next_pid = next_pid; ++ ++ udma_to_device_barrier(); ++ mmio_write64_le(db_addr, db.raw); ++} ++ ++static XSC_HW_ALWAYS_INLINE void xsc_andes_ring_tx_doorbell(void *db_addr, ++ uint32_t rqn, uint32_t next_pid) ++{ ++ union xsc_andes_send_doorbell db; ++ ++ db.qp_id = rqn; ++ db.next_pid = next_pid; ++ ++ udma_to_device_barrier(); ++ mmio_write32_le(db_addr, db.raw); ++} ++ ++static XSC_HW_ALWAYS_INLINE void xsc_hw_ring_tx_doorbell(uint16_t device_id, ++ void *db_addr, ++ uint32_t sqn, uint32_t next_pid) ++{ ++ switch (device_id) { ++ case XSC_MS_PF_DEV_ID: ++ case XSC_MS_VF_DEV_ID: ++ xsc_andes_ring_tx_doorbell(db_addr, sqn, next_pid); ++ break; ++ case XSC_MC_PF_DEV_ID_DIAMOND: ++ xsc_diamond_ring_tx_doorbell(db_addr, sqn, next_pid); ++ break; ++ case XSC_MC_PF_DEV_ID_DIAMOND_NEXT: ++ xsc_diamond_next_ring_tx_doorbell(db_addr, sqn, next_pid); ++ break; ++ default: ++ xsc_andes_ring_tx_doorbell(db_addr, sqn, next_pid); ++ } ++} ++ ++#endif /* _XSC_HW_H_ */ +diff --git a/providers/xscale/xscale.c b/providers/xscale/xscale.c +new file mode 100644 +index 0000000..e6792b9 +--- /dev/null ++++ b/providers/xscale/xscale.c +@@ -0,0 +1,948 @@ ++/* ++ * Copyright (c) 2021 - 2022, Shanghai Yunsilicon Technology Co., Ltd. ++ * All rights reserved. ++ */ ++ ++#define _GNU_SOURCE ++#include ++ ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++ ++#include ++ ++#include "xscale.h" ++#include "xsc-abi.h" ++#include "wqe.h" ++#include "xsc_hsi.h" ++ ++#ifndef PCI_VENDOR_ID_MELLANOX ++#define PCI_VENDOR_ID_MELLANOX 0x15b3 ++#endif ++ ++#ifndef CPU_OR ++#define CPU_OR(x, y, z) do {} while (0) ++#endif ++ ++#ifndef CPU_EQUAL ++#define CPU_EQUAL(x, y) 1 ++#endif ++ ++#define HCA(v, d) VERBS_PCI_MATCH(PCI_VENDOR_ID_##v, d, NULL) ++static const struct verbs_match_ent hca_table[] = { ++ VERBS_MODALIAS_MATCH("*xscale*", NULL), ++ {} ++}; ++ ++uint32_t xsc_debug_mask = 0; ++int xsc_freeze_on_error_cqe; ++static void xsc_free_context(struct ibv_context *ibctx); ++ ++static const struct verbs_context_ops xsc_ctx_common_ops = { ++ .query_port = xsc_query_port, ++ .alloc_pd = xsc_alloc_pd, ++ .dealloc_pd = xsc_free_pd, ++ .reg_mr = xsc_reg_mr, ++ .rereg_mr = xsc_rereg_mr, ++ .dereg_mr = xsc_dereg_mr, ++ .alloc_mw = NULL, ++ .dealloc_mw = NULL, ++ .bind_mw = NULL, ++ .create_cq = xsc_create_cq, ++ .poll_cq = xsc_poll_cq, ++ .req_notify_cq = xsc_arm_cq, ++ .cq_event = xsc_cq_event, ++ .resize_cq = xsc_resize_cq, ++ .destroy_cq = xsc_destroy_cq, ++ .create_srq = NULL, ++ .modify_srq = NULL, ++ .query_srq = NULL, ++ .destroy_srq = NULL, ++ .post_srq_recv = NULL, ++ .create_qp = xsc_create_qp, ++ .query_qp = xsc_query_qp, ++ .modify_qp = xsc_modify_qp, ++ .destroy_qp = xsc_destroy_qp, ++ .post_send = xsc_post_send, ++ .post_recv = xsc_post_recv, ++ .create_ah = xsc_create_ah, ++ .destroy_ah = xsc_destroy_ah, ++ .attach_mcast = xsc_attach_mcast, ++ .detach_mcast = xsc_detach_mcast, ++ ++ .alloc_dm = xsc_alloc_dm, ++ .alloc_parent_domain = xsc_alloc_parent_domain, ++ .alloc_td = NULL, ++ .attach_counters_point_flow = xsc_attach_counters_point_flow, ++ .close_xrcd = xsc_close_xrcd, ++ .create_counters = xsc_create_counters, ++ .create_cq_ex = xsc_create_cq_ex, ++ .create_flow = xsc_create_flow, ++ .create_flow_action_esp = xsc_create_flow_action_esp, ++ .create_qp_ex = xsc_create_qp_ex, ++ .create_rwq_ind_table = xsc_create_rwq_ind_table, ++ .create_srq_ex = NULL, ++ .create_wq = xsc_create_wq, ++ .dealloc_td = NULL, ++ .destroy_counters = xsc_destroy_counters, ++ .destroy_flow = xsc_destroy_flow, ++ .destroy_flow_action = xsc_destroy_flow_action, ++ .destroy_rwq_ind_table = xsc_destroy_rwq_ind_table, ++ .destroy_wq = xsc_destroy_wq, ++ .free_dm = xsc_free_dm, ++ .get_srq_num = NULL, ++ .modify_cq = xsc_modify_cq, ++ .modify_flow_action_esp = xsc_modify_flow_action_esp, ++ .modify_qp_rate_limit = xsc_modify_qp_rate_limit, ++ .modify_wq = xsc_modify_wq, ++ .open_xrcd = xsc_open_xrcd, ++ .post_srq_ops = NULL, ++ .query_device_ex = xsc_query_device_ex, ++ .query_rt_values = xsc_query_rt_values, ++ .read_counters = xsc_read_counters, ++ .reg_dm_mr = xsc_reg_dm_mr, ++ .alloc_null_mr = xsc_alloc_null_mr, ++ .free_context = xsc_free_context, ++}; ++ ++static int read_number_from_line(const char *line, int *value) ++{ ++ const char *ptr; ++ ++ ptr = strchr(line, ':'); ++ if (!ptr) ++ return 1; ++ ++ ++ptr; ++ ++ *value = atoi(ptr); ++ return 0; ++} ++/** ++ * The function looks for the first free user-index in all the ++ * user-index tables. If all are used, returns -1, otherwise ++ * a valid user-index. ++ * In case the reference count of the table is zero, it means the ++ * table is not in use and wasn't allocated yet, therefore the ++ * xsc_store_uidx allocates the table, and increment the reference ++ * count on the table. ++ */ ++static int32_t get_free_uidx(struct xsc_context *ctx) ++{ ++ int32_t tind; ++ int32_t i; ++ ++ for (tind = 0; tind < XSC_UIDX_TABLE_SIZE; tind++) { ++ if (ctx->uidx_table[tind].refcnt < XSC_UIDX_TABLE_MASK) ++ break; ++ } ++ ++ if (tind == XSC_UIDX_TABLE_SIZE) ++ return -1; ++ ++ if (!ctx->uidx_table[tind].refcnt) ++ return tind << XSC_UIDX_TABLE_SHIFT; ++ ++ for (i = 0; i < XSC_UIDX_TABLE_MASK + 1; i++) { ++ if (!ctx->uidx_table[tind].table[i]) ++ break; ++ } ++ ++ return (tind << XSC_UIDX_TABLE_SHIFT) | i; ++} ++ ++int32_t xsc_store_uidx(struct xsc_context *ctx, void *rsc) ++{ ++ int32_t tind; ++ int32_t ret = -1; ++ int32_t uidx; ++ ++ pthread_mutex_lock(&ctx->uidx_table_mutex); ++ uidx = get_free_uidx(ctx); ++ if (uidx < 0) ++ goto out; ++ ++ tind = uidx >> XSC_UIDX_TABLE_SHIFT; ++ ++ if (!ctx->uidx_table[tind].refcnt) { ++ ctx->uidx_table[tind].table = calloc(XSC_UIDX_TABLE_MASK + 1, ++ sizeof(struct xsc_resource *)); ++ if (!ctx->uidx_table[tind].table) ++ goto out; ++ } ++ ++ ++ctx->uidx_table[tind].refcnt; ++ ctx->uidx_table[tind].table[uidx & XSC_UIDX_TABLE_MASK] = rsc; ++ ret = uidx; ++ ++out: ++ pthread_mutex_unlock(&ctx->uidx_table_mutex); ++ return ret; ++} ++ ++void xsc_clear_uidx(struct xsc_context *ctx, uint32_t uidx) ++{ ++ int tind = uidx >> XSC_UIDX_TABLE_SHIFT; ++ ++ pthread_mutex_lock(&ctx->uidx_table_mutex); ++ ++ if (!--ctx->uidx_table[tind].refcnt) ++ free(ctx->uidx_table[tind].table); ++ else ++ ctx->uidx_table[tind].table[uidx & XSC_UIDX_TABLE_MASK] = NULL; ++ ++ pthread_mutex_unlock(&ctx->uidx_table_mutex); ++} ++ ++static int xsc_is_sandy_bridge(int *num_cores) ++{ ++ char line[128]; ++ FILE *fd; ++ int rc = 0; ++ int cur_cpu_family = -1; ++ int cur_cpu_model = -1; ++ ++ fd = fopen("/proc/cpuinfo", "r"); ++ if (!fd) ++ return 0; ++ ++ *num_cores = 0; ++ ++ while (fgets(line, 128, fd)) { ++ int value; ++ ++ /* if this is information on new processor */ ++ if (!strncmp(line, "processor", 9)) { ++ ++*num_cores; ++ ++ cur_cpu_family = -1; ++ cur_cpu_model = -1; ++ } else if (!strncmp(line, "cpu family", 10)) { ++ if ((cur_cpu_family < 0) && (!read_number_from_line(line, &value))) ++ cur_cpu_family = value; ++ } else if (!strncmp(line, "model", 5)) { ++ if ((cur_cpu_model < 0) && (!read_number_from_line(line, &value))) ++ cur_cpu_model = value; ++ } ++ ++ /* if this is a Sandy Bridge CPU */ ++ if ((cur_cpu_family == 6) && ++ (cur_cpu_model == 0x2A || (cur_cpu_model == 0x2D) )) ++ rc = 1; ++ } ++ ++ fclose(fd); ++ return rc; ++} ++ ++/* ++man cpuset ++ ++ This format displays each 32-bit word in hexadecimal (using ASCII characters "0" - "9" and "a" - "f"); words ++ are filled with leading zeros, if required. For masks longer than one word, a comma separator is used between ++ words. Words are displayed in big-endian order, which has the most significant bit first. The hex digits ++ within a word are also in big-endian order. ++ ++ The number of 32-bit words displayed is the minimum number needed to display all bits of the bitmask, based on ++ the size of the bitmask. ++ ++ Examples of the Mask Format: ++ ++ 00000001 # just bit 0 set ++ 40000000,00000000,00000000 # just bit 94 set ++ 000000ff,00000000 # bits 32-39 set ++ 00000000,000E3862 # 1,5,6,11-13,17-19 set ++ ++ A mask with bits 0, 1, 2, 4, 8, 16, 32, and 64 set displays as: ++ ++ 00000001,00000001,00010117 ++ ++ The first "1" is for bit 64, the second for bit 32, the third for bit 16, the fourth for bit 8, the fifth for ++ bit 4, and the "7" is for bits 2, 1, and 0. ++*/ ++static void xsc_local_cpu_set(struct ibv_device *ibdev, cpu_set_t *cpu_set) ++{ ++ char *p, buf[1024] = {}; ++ char *env_value; ++ uint32_t word; ++ int i, k; ++ ++ env_value = getenv("XSC_LOCAL_CPUS"); ++ if (env_value) ++ strncpy(buf, env_value, sizeof(buf) - 1); ++ else { ++ char fname[MAXPATHLEN]; ++ FILE *fp; ++ ++ snprintf(fname, MAXPATHLEN, "/sys/class/infiniband/%s/device/local_cpus", ++ ibv_get_device_name(ibdev)); ++ ++ fp = fopen(fname, "r"); ++ if (!fp) { ++ fprintf(stderr, PFX "Warning: can not get local cpu set: failed to open %s\n", fname); ++ return; ++ } ++ if (!fgets(buf, sizeof(buf), fp)) { ++ fprintf(stderr, PFX "Warning: can not get local cpu set: failed to read cpu mask\n"); ++ fclose(fp); ++ return; ++ } ++ fclose(fp); ++ } ++ ++ p = strrchr(buf, ','); ++ if (!p) ++ p = buf; ++ ++ i = 0; ++ do { ++ if (*p == ',') { ++ *p = 0; ++ p ++; ++ } ++ ++ word = strtoul(p, NULL, 16); ++ ++ for (k = 0; word; ++k, word >>= 1) ++ if (word & 1) ++ CPU_SET(k+i, cpu_set); ++ ++ if (p == buf) ++ break; ++ ++ p = strrchr(buf, ','); ++ if (!p) ++ p = buf; ++ ++ i += 32; ++ } while (i < CPU_SETSIZE); ++} ++ ++static int xsc_enable_sandy_bridge_fix(struct ibv_device *ibdev) ++{ ++ cpu_set_t my_cpus, dev_local_cpus, result_set; ++ int stall_enable; ++ int ret; ++ int num_cores; ++ ++ if (!xsc_is_sandy_bridge(&num_cores)) ++ return 0; ++ ++ /* by default enable stall on sandy bridge arch */ ++ stall_enable = 1; ++ ++ /* ++ * check if app is bound to cpu set that is inside ++ * of device local cpu set. Disable stalling if true ++ */ ++ ++ /* use static cpu set - up to CPU_SETSIZE (1024) cpus/node */ ++ CPU_ZERO(&my_cpus); ++ CPU_ZERO(&dev_local_cpus); ++ CPU_ZERO(&result_set); ++ ret = sched_getaffinity(0, sizeof(my_cpus), &my_cpus); ++ if (ret == -1) { ++ if (errno == EINVAL) ++ fprintf(stderr, PFX "Warning: my cpu set is too small\n"); ++ else ++ fprintf(stderr, PFX "Warning: failed to get my cpu set\n"); ++ goto out; ++ } ++ ++ /* get device local cpu set */ ++ xsc_local_cpu_set(ibdev, &dev_local_cpus); ++ ++ /* check if my cpu set is in dev cpu */ ++ CPU_OR(&result_set, &my_cpus, &dev_local_cpus); ++ stall_enable = CPU_EQUAL(&result_set, &dev_local_cpus) ? 0 : 1; ++ ++out: ++ return stall_enable; ++} ++ ++static void xsc_read_env(struct ibv_device *ibdev, struct xsc_context *ctx) ++{ ++ char *env_value; ++ ++ env_value = getenv("XSC_STALL_CQ_POLL"); ++ if (env_value) ++ /* check if cq stall is enforced by user */ ++ ctx->stall_enable = (strcmp(env_value, "0")) ? 1 : 0; ++ else ++ /* autodetect if we need to do cq polling */ ++ ctx->stall_enable = xsc_enable_sandy_bridge_fix(ibdev); ++ ++ env_value = getenv("XSC_STALL_NUM_LOOP"); ++ if (env_value) ++ xsc_stall_num_loop = atoi(env_value); ++ ++ env_value = getenv("XSC_STALL_CQ_POLL_MIN"); ++ if (env_value) ++ xsc_stall_cq_poll_min = atoi(env_value); ++ ++ env_value = getenv("XSC_STALL_CQ_POLL_MAX"); ++ if (env_value) ++ xsc_stall_cq_poll_max = atoi(env_value); ++ ++ env_value = getenv("XSC_STALL_CQ_INC_STEP"); ++ if (env_value) ++ xsc_stall_cq_inc_step = atoi(env_value); ++ ++ env_value = getenv("XSC_STALL_CQ_DEC_STEP"); ++ if (env_value) ++ xsc_stall_cq_dec_step = atoi(env_value); ++ ++ ctx->stall_adaptive_enable = 0; ++ ctx->stall_cycles = 0; ++ ++ if (xsc_stall_num_loop < 0) { ++ ctx->stall_adaptive_enable = 1; ++ ctx->stall_cycles = xsc_stall_cq_poll_min; ++ } ++ ++} ++ ++static void open_debug_file(struct xsc_context *ctx) ++{ ++ char *env; ++ ++ env = getenv("XSC_DEBUG_FILE"); ++ if (!env) { ++ ctx->dbg_fp = stderr; ++ return; ++ } ++ ++ ctx->dbg_fp = fopen(env, "aw+"); ++ if (!ctx->dbg_fp) { ++ fprintf(stderr, "Failed opening debug file %s, using stderr\n", env); ++ ctx->dbg_fp = stderr; ++ return; ++ } ++} ++ ++static void close_debug_file(struct xsc_context *ctx) ++{ ++ if (ctx->dbg_fp && ctx->dbg_fp != stderr) ++ fclose(ctx->dbg_fp); ++} ++ ++static void set_debug_mask(void) ++{ ++ char *env; ++ ++ env = getenv("XSC_DEBUG_MASK"); ++ if (env) ++ xsc_debug_mask = strtol(env, NULL, 0); ++} ++ ++static void set_freeze_on_error(void) ++{ ++ char *env; ++ ++ env = getenv("XSC_FREEZE_ON_ERROR_CQE"); ++ if (env) ++ xsc_freeze_on_error_cqe = strtol(env, NULL, 0); ++} ++ ++static int single_threaded_app(void) ++{ ++ ++ char *env; ++ ++ env = getenv("XSC_SINGLE_THREADED"); ++ if (env) ++ return strcmp(env, "1") ? 0 : 1; ++ ++ return 0; ++} ++ ++static int xsc_cmd_get_context(struct xsc_context *context, ++ struct xsc_alloc_ucontext *req, ++ size_t req_len, ++ struct xsc_alloc_ucontext_resp *resp, ++ size_t resp_len) ++{ ++ struct verbs_context *verbs_ctx = &context->ibv_ctx; ++ ++ return ibv_cmd_get_context(verbs_ctx, &req->ibv_cmd, ++ req_len, &resp->ibv_resp, resp_len); ++} ++ ++int xscdv_query_device(struct ibv_context *ctx_in, ++ struct xscdv_context *attrs_out) ++{ ++ struct xsc_context *xctx = to_xctx(ctx_in); ++ uint64_t comp_mask_out = 0; ++ ++ attrs_out->version = 0; ++ attrs_out->flags = 0; ++ ++ if (xctx->cqe_version == XSC_CQE_VERSION_V1) ++ attrs_out->flags |= XSCDV_CONTEXT_FLAGS_CQE_V1; ++ ++ if (xctx->vendor_cap_flags & XSC_VENDOR_CAP_FLAGS_MPW_ALLOWED) ++ attrs_out->flags |= XSCDV_CONTEXT_FLAGS_MPW_ALLOWED; ++ ++ if (xctx->vendor_cap_flags & XSC_VENDOR_CAP_FLAGS_CQE_128B_COMP) ++ attrs_out->flags |= XSCDV_CONTEXT_FLAGS_CQE_128B_COMP; ++ ++ if (xctx->vendor_cap_flags & XSC_VENDOR_CAP_FLAGS_CQE_128B_PAD) ++ attrs_out->flags |= XSCDV_CONTEXT_FLAGS_CQE_128B_PAD; ++ ++ if (attrs_out->comp_mask & XSCDV_CONTEXT_MASK_CQE_COMPRESION) { ++ attrs_out->cqe_comp_caps = xctx->cqe_comp_caps; ++ comp_mask_out |= XSCDV_CONTEXT_MASK_CQE_COMPRESION; ++ } ++ ++ if (xctx->vendor_cap_flags & XSC_VENDOR_CAP_FLAGS_ENHANCED_MPW) ++ attrs_out->flags |= XSCDV_CONTEXT_FLAGS_ENHANCED_MPW; ++ ++ if (attrs_out->comp_mask & XSCDV_CONTEXT_MASK_SWP) { ++ attrs_out->sw_parsing_caps = xctx->sw_parsing_caps; ++ comp_mask_out |= XSCDV_CONTEXT_MASK_SWP; ++ } ++ ++ if (attrs_out->comp_mask & XSCDV_CONTEXT_MASK_STRIDING_RQ) { ++ attrs_out->striding_rq_caps = xctx->striding_rq_caps; ++ comp_mask_out |= XSCDV_CONTEXT_MASK_STRIDING_RQ; ++ } ++ ++ if (attrs_out->comp_mask & XSCDV_CONTEXT_MASK_TUNNEL_OFFLOADS) { ++ attrs_out->tunnel_offloads_caps = xctx->tunnel_offloads_caps; ++ comp_mask_out |= XSCDV_CONTEXT_MASK_TUNNEL_OFFLOADS; ++ } ++ ++ if (attrs_out->comp_mask & XSCDV_CONTEXT_MASK_CLOCK_INFO_UPDATE) { ++ if (xctx->clock_info_page) { ++ attrs_out->max_clock_info_update_nsec = ++ xctx->clock_info_page->overflow_period; ++ comp_mask_out |= XSCDV_CONTEXT_MASK_CLOCK_INFO_UPDATE; ++ } ++ } ++ ++ if (attrs_out->comp_mask & XSCDV_CONTEXT_MASK_FLOW_ACTION_FLAGS) { ++ attrs_out->flow_action_flags = xctx->flow_action_flags; ++ comp_mask_out |= XSCDV_CONTEXT_MASK_FLOW_ACTION_FLAGS; ++ } ++ ++ attrs_out->comp_mask = comp_mask_out; ++ ++ return 0; ++} ++ ++static int xscdv_get_qp(struct ibv_qp *qp_in, ++ struct xscdv_qp *qp_out) ++{ ++ struct xsc_qp *xqp = to_xqp(qp_in); ++ uint64_t mask_out = 0; ++ ++ qp_out->dbrec = xqp->db; ++ qp_out->sq.db = xqp->sq.db; ++ qp_out->rq.db = xqp->rq.db; ++ ++ if (xqp->sq_buf_size) ++ /* IBV_QPT_RAW_PACKET */ ++ qp_out->sq.buf = (void *)((uintptr_t)xqp->sq_buf.buf); ++ else ++ qp_out->sq.buf = (void *)((uintptr_t)xqp->buf.buf + xqp->sq.offset); ++ qp_out->sq.wqe_cnt = xqp->sq.wqe_cnt; ++ qp_out->sq.stride = 1 << xqp->sq.wqe_shift; ++ ++ qp_out->rq.buf = (void *)((uintptr_t)xqp->buf.buf + xqp->rq.offset); ++ qp_out->rq.wqe_cnt = xqp->rq.wqe_cnt; ++ qp_out->rq.stride = 1 << xqp->rq.wqe_shift; ++ ++ if (qp_out->comp_mask & XSCDV_QP_MASK_RAW_QP_HANDLES) { ++ qp_out->tirn = xqp->tirn; ++ qp_out->tisn = xqp->tisn; ++ qp_out->rqn = xqp->rqn; ++ qp_out->sqn = xqp->sqn; ++ mask_out |= XSCDV_QP_MASK_RAW_QP_HANDLES; ++ } ++ ++ qp_out->comp_mask = mask_out; ++ ++ return 0; ++} ++ ++static int xscdv_get_cq(struct ibv_cq *cq_in, ++ struct xscdv_cq *cq_out) ++{ ++ struct xsc_cq *xcq = to_xcq(cq_in); ++ ++ cq_out->comp_mask = 0; ++ cq_out->cqn = xcq->cqn; ++ cq_out->cqe_cnt = xcq->verbs_cq.cq_ex.cqe; ++ cq_out->cqe_size = xcq->cqe_sz; ++ cq_out->buf = xcq->active_buf->buf; ++ cq_out->dbrec = xcq->dbrec; ++ cq_out->db = xcq->db; ++ xcq->flags |= XSC_CQ_FLAGS_DV_OWNED; ++ ++ return 0; ++} ++ ++static int xscdv_get_rwq(struct ibv_wq *wq_in, ++ struct xscdv_rwq *rwq_out) ++{ ++ struct xsc_rwq *xrwq = to_xrwq(wq_in); ++ ++ rwq_out->comp_mask = 0; ++ rwq_out->buf = xrwq->pbuff; ++ rwq_out->dbrec = xrwq->recv_db; ++ rwq_out->wqe_cnt = xrwq->rq.wqe_cnt; ++ rwq_out->stride = 1 << xrwq->rq.wqe_shift; ++ rwq_out->db = xrwq->rq.db; ++ ++ return 0; ++} ++ ++static int xscdv_get_dm(struct ibv_dm *dm_in, ++ struct xscdv_dm *dm_out) ++{ ++ struct xsc_dm *xdm = to_xdm(dm_in); ++ ++ dm_out->comp_mask = 0; ++ dm_out->buf = xdm->start_va; ++ dm_out->length = xdm->length; ++ ++ return 0; ++} ++ ++static int xscdv_get_av(struct ibv_ah *ah_in, ++ struct xscdv_ah *ah_out) ++{ ++ struct xsc_ah *xah = to_xah(ah_in); ++ ++ ah_out->comp_mask = 0; ++ ah_out->av = &xah->av; ++ ++ return 0; ++} ++ ++static int xscdv_get_pd(struct ibv_pd *pd_in, ++ struct xscdv_pd *pd_out) ++{ ++ struct xsc_pd *xpd = to_xpd(pd_in); ++ ++ pd_out->comp_mask = 0; ++ pd_out->pdn = xpd->pdn; ++ ++ return 0; ++} ++ ++ int xscdv_init_obj(struct xscdv_obj *obj, uint64_t obj_type) ++{ ++ int ret = 0; ++ ++ if (obj_type & XSCDV_OBJ_QP) ++ ret = xscdv_get_qp(obj->qp.in, obj->qp.out); ++ if (!ret && (obj_type & XSCDV_OBJ_CQ)) ++ ret = xscdv_get_cq(obj->cq.in, obj->cq.out); ++ if (!ret && (obj_type & XSCDV_OBJ_RWQ)) ++ ret = xscdv_get_rwq(obj->rwq.in, obj->rwq.out); ++ if (!ret && (obj_type & XSCDV_OBJ_DM)) ++ ret = xscdv_get_dm(obj->dm.in, obj->dm.out); ++ if (!ret && (obj_type & XSCDV_OBJ_AH)) ++ ret = xscdv_get_av(obj->ah.in, obj->ah.out); ++ if (!ret && (obj_type & XSCDV_OBJ_PD)) ++ ret = xscdv_get_pd(obj->pd.in, obj->pd.out); ++ ++ return ret; ++} ++ ++int xscdv_set_context_attr(struct ibv_context *ibv_ctx, ++ enum xscdv_set_ctx_attr_type type, void *attr) ++{ ++ struct xsc_context *ctx = to_xctx(ibv_ctx); ++ ++ switch (type) { ++ case XSCDV_CTX_ATTR_BUF_ALLOCATORS: ++ ctx->extern_alloc = *((struct xscdv_ctx_allocators *)attr); ++ break; ++ default: ++ return ENOTSUP; ++ } ++ ++ return 0; ++} ++ ++int xscdv_get_clock_info(struct ibv_context *ctx_in, ++ struct xscdv_clock_info *clock_info) ++{ ++ struct xsc_context *ctx = to_xctx(ctx_in); ++ const struct xsc_ib_clock_info *ci = ctx->clock_info_page; ++ uint32_t retry, tmp_sig; ++ atomic_uint32_t *sig; ++ ++ if (!ci) ++ return EINVAL; ++ ++ sig = (atomic_uint32_t *)&ci->sign; ++ ++ do { ++ retry = 10; ++repeat: ++ tmp_sig = atomic_load(sig); ++ if (unlikely(tmp_sig & ++ XSC_IB_CLOCK_INFO_KERNEL_UPDATING)) { ++ if (--retry) ++ goto repeat; ++ return EBUSY; ++ } ++ clock_info->nsec = ci->nsec; ++ clock_info->last_cycles = ci->cycles; ++ clock_info->frac = ci->frac; ++ clock_info->mult = ci->mult; ++ clock_info->shift = ci->shift; ++ clock_info->mask = ci->mask; ++ } while (unlikely(tmp_sig != atomic_load(sig))); ++ ++ return 0; ++} ++ ++struct ibv_context * ++xscdv_open_device(struct ibv_device *device, struct xscdv_context_attr *attr) ++{ ++ return verbs_open_device(device, attr); ++} ++ ++static int xsc_mmap(struct xsc_device *xdev, struct xsc_context *context, ++ int cmd_fd, int size) ++{ ++ uint64_t page_mask; ++ ++ page_mask = (~(xdev->page_size - 1)); ++ xsc_dbg(context->dbg_fp, XSC_DBG_CTX, "page size:%d\n", size); ++ context->sqm_reg_va = mmap(NULL, size, PROT_READ | PROT_WRITE, MAP_SHARED, ++ cmd_fd, context->qpm_tx_db & page_mask); ++ if (context->sqm_reg_va == MAP_FAILED) { ++ return -1; ++ } ++ xsc_dbg(context->dbg_fp, XSC_DBG_CTX, "qpm reg va:%p\n", context->sqm_reg_va); ++ ++ context->rqm_reg_va = mmap(NULL, size, PROT_READ | PROT_WRITE, MAP_SHARED, ++ cmd_fd, context->qpm_rx_db & page_mask); ++ if (context->rqm_reg_va == MAP_FAILED) { ++ goto free_sqm; ++ } ++ xsc_dbg(context->dbg_fp, XSC_DBG_CTX, "qpm reg va:%p\n", context->rqm_reg_va); ++ ++ context->cqm_reg_va = mmap(NULL, size, PROT_READ | PROT_WRITE, MAP_SHARED, ++ cmd_fd, context->cqm_next_cid_reg & page_mask); ++ if (context->cqm_reg_va == MAP_FAILED) { ++ goto free_rqm; ++ } ++ xsc_dbg(context->dbg_fp, XSC_DBG_CTX, "cqm ci va:%p\n", context->cqm_reg_va); ++ context->db_mmap_size = size; ++ ++ context->cqm_armdb_va = mmap(NULL, size, PROT_READ | PROT_WRITE, MAP_SHARED, ++ cmd_fd, context->cqm_armdb & page_mask); ++ if (context->cqm_armdb_va == MAP_FAILED) { ++ goto free_cqm; ++ } ++ xsc_dbg(context->dbg_fp, XSC_DBG_CTX, "cqm armdb va:%p\n", context->cqm_armdb_va); ++ ++ return 0; ++ ++free_cqm: ++ munmap(context->cqm_reg_va, size); ++free_rqm: ++ munmap(context->rqm_reg_va, size); ++free_sqm: ++ munmap(context->sqm_reg_va, size); ++ ++ return -1; ++ ++} ++static void xsc_munmap(struct xsc_context *context) ++{ ++ if (context->sqm_reg_va) ++ munmap(context->sqm_reg_va, context->db_mmap_size); ++ ++ if (context->rqm_reg_va) ++ munmap(context->rqm_reg_va, context->db_mmap_size); ++ ++ if (context->cqm_reg_va) ++ munmap(context->cqm_reg_va, context->db_mmap_size); ++ ++ if (context->cqm_armdb_va) ++ munmap(context->cqm_armdb_va, context->db_mmap_size); ++ ++} ++static struct verbs_context *xsc_alloc_context(struct ibv_device *ibdev, ++ int cmd_fd, ++ void *private_data) ++{ ++ struct xsc_context *context; ++ struct xsc_alloc_ucontext req; ++ struct xsc_alloc_ucontext_resp resp; ++ int i; ++ int page_size; ++ int j; ++ struct xsc_device *xdev = to_xdev(ibdev); ++ struct verbs_context *v_ctx; ++ struct ibv_port_attr port_attr; ++ struct ibv_device_attr_ex device_attr; ++ struct xscdv_context_attr *ctx_attr = private_data; ++ ++ if (ctx_attr && ctx_attr->comp_mask) { ++ errno = EINVAL; ++ return NULL; ++ } ++ ++ context = verbs_init_and_alloc_context(ibdev, cmd_fd, context, ibv_ctx, ++ RDMA_DRIVER_XSC); ++ if (!context) ++ return NULL; ++ ++ v_ctx = &context->ibv_ctx; ++ page_size = xdev->page_size; ++ xsc_single_threaded = single_threaded_app(); ++ ++ open_debug_file(context); ++ set_debug_mask(); ++ set_freeze_on_error(); ++ if (gethostname(context->hostname, sizeof(context->hostname))) ++ strcpy(context->hostname, "host_unknown"); ++ ++ memset(&req, 0, sizeof(req)); ++ memset(&resp, 0, sizeof(resp)); ++ ++ if (xsc_cmd_get_context(context, &req, sizeof(req), &resp, ++ sizeof(resp))) ++ goto err_free; ++ ++ context->max_num_qps = resp.qp_tab_size; ++ context->cache_line_size = resp.cache_line_size; ++ context->max_sq_desc_sz = resp.max_sq_desc_sz; ++ context->max_rq_desc_sz = resp.max_rq_desc_sz; ++ context->max_send_wqebb = resp.max_send_wqebb; ++ context->num_ports = resp.num_ports; ++ context->max_recv_wr = resp.max_recv_wr; ++ context->qpm_tx_db = resp.qpm_tx_db; ++ context->qpm_rx_db = resp.qpm_rx_db; ++ context->cqm_next_cid_reg = resp.cqm_next_cid_reg; ++ context->cqm_armdb = resp.cqm_armdb; ++ context->send_ds_num = resp.send_ds_num; ++ context->send_ds_shift = xsc_ilog2(resp.send_ds_num); ++ context->recv_ds_num = resp.recv_ds_num; ++ context->recv_ds_shift = xsc_ilog2(resp.recv_ds_num); ++ ++ xsc_dbg(context->dbg_fp, XSC_DBG_CTX, ++ "max_num_qps:%u, max_sq_desc_sz:%u max_rq_desc_sz:%u " \ ++ "max_send_wqebb:%u, num_ports:%u, max_recv_wr:%u\n", ++ context->max_num_qps, context->max_sq_desc_sz, ++ context->max_rq_desc_sz, context->max_send_wqebb, ++ context->num_ports, context->max_recv_wr); ++ ++ xsc_dbg(context->dbg_fp, XSC_DBG_CTX, "send_ds_num:%u shift:%u recv_ds_num:%u shift:%u\n", ++ context->send_ds_num, context->send_ds_shift, ++ context->recv_ds_num, context->recv_ds_shift); ++ context->dump_fill_mkey = XSC_INVALID_LKEY; ++ context->dump_fill_mkey_be = htobe32(XSC_INVALID_LKEY); ++ context->eth_min_inline_size = XSC_ETH_L2_INLINE_HEADER_SIZE; ++ context->cmds_supp_uhw = resp.cmds_supp_uhw; ++ ++ pthread_mutex_init(&context->qp_table_mutex, NULL); ++ pthread_mutex_init(&context->uidx_table_mutex, NULL); ++ for (i = 0; i < XSC_QP_TABLE_SIZE; ++i) ++ context->qp_table[i].refcnt = 0; ++ ++ for (i = 0; i < XSC_QP_TABLE_SIZE; ++i) ++ context->uidx_table[i].refcnt = 0; ++ ++ context->db_list = NULL; ++ context->page_size = page_size; ++ if (xsc_mmap(xdev, context, cmd_fd, page_size)) ++ goto err_free; ++ ++ pthread_mutex_init(&context->db_list_mutex, NULL); ++ ++ context->hca_core_clock = NULL; ++ context->clock_info_page = NULL; ++ ++ xsc_read_env(ibdev, context); ++ ++ xsc_spinlock_init(&context->hugetlb_lock, !xsc_single_threaded); ++ list_head_init(&context->hugetlb_list); ++ ++ verbs_set_ops(v_ctx, &xsc_ctx_common_ops); ++ ++ memset(&device_attr, 0, sizeof(device_attr)); ++ if (!xsc_query_device_ex(&v_ctx->context, NULL, &device_attr, ++ sizeof(struct ibv_device_attr_ex))) { ++ context->cached_device_cap_flags = ++ device_attr.orig_attr.device_cap_flags; ++ context->atomic_cap = device_attr.orig_attr.atomic_cap; ++ context->cached_tso_caps = device_attr.tso_caps; ++ context->max_dm_size = device_attr.max_dm_size; ++ } ++ ++ for (j = 0; j < min(XSC_MAX_PORTS_NUM, context->num_ports); ++j) { ++ memset(&port_attr, 0, sizeof(port_attr)); ++ if (!xsc_query_port(&v_ctx->context, j + 1, &port_attr)) { ++ context->cached_link_layer[j] = port_attr.link_layer; ++ context->cached_port_flags[j] = port_attr.flags; ++ } ++ } ++ ++ return v_ctx; ++ ++err_free: ++ verbs_uninit_context(&context->ibv_ctx); ++ close_debug_file(context); ++ free(context); ++ return NULL; ++} ++ ++static void xsc_free_context(struct ibv_context *ibctx) ++{ ++ struct xsc_context *context = to_xctx(ibctx); ++ ++ xsc_dbg(context->dbg_fp, XSC_DBG_CTX, "\n"); ++ xsc_munmap(context); ++ ++ verbs_uninit_context(&context->ibv_ctx); ++ close_debug_file(context); ++ free(context); ++} ++ ++static void xsc_uninit_device(struct verbs_device *verbs_device) ++{ ++ struct xsc_device *xdev = to_xdev(&verbs_device->device); ++ ++ free(xdev); ++} ++ ++static struct verbs_device *xsc_device_alloc(struct verbs_sysfs_dev *sysfs_dev) ++{ ++ struct xsc_device *xdev; ++ ++ xdev = calloc(1, sizeof *xdev); ++ if (!xdev) ++ return NULL; ++ ++ xdev->page_size = sysconf(_SC_PAGESIZE); ++ xdev->driver_abi_ver = sysfs_dev->abi_ver; ++ ++ return &xdev->verbs_dev; ++} ++ ++static const struct verbs_device_ops xsc_dev_ops = { ++ .name = "xscale", ++ .match_min_abi_version = XSC_UVERBS_MIN_ABI_VERSION, ++ .match_max_abi_version = XSC_UVERBS_MAX_ABI_VERSION, ++ .match_table = hca_table, ++ .alloc_device = xsc_device_alloc, ++ .uninit_device = xsc_uninit_device, ++ .alloc_context = xsc_alloc_context, ++}; ++PROVIDER_DRIVER(xscale, xsc_dev_ops); +diff --git a/providers/xscale/xscale.h b/providers/xscale/xscale.h +new file mode 100644 +index 0000000..0aee472 +--- /dev/null ++++ b/providers/xscale/xscale.h +@@ -0,0 +1,834 @@ ++/* ++ * Copyright (c) 2021 - 2022, Shanghai Yunsilicon Technology Co., Ltd. ++ * All rights reserved. ++ */ ++ ++#ifndef XSCALE_H ++#define XSCALE_H ++ ++#include ++#include ++#include ++#include ++#include ++ ++#include ++#include ++#include "xsc-abi.h" ++#include ++#include "bitmap.h" ++#include ++#include "xscdv.h" ++ ++#include ++ ++#define PFX "xsc: " ++#define offsetofend(_type, _member) \ ++ (offsetof(_type, _member) + sizeof(((_type *)0)->_member)) ++ ++typedef _Atomic(uint32_t) atomic_uint32_t; ++ ++enum { ++ XSC_IB_MMAP_CMD_SHIFT = 8, ++ XSC_IB_MMAP_CMD_MASK = 0xff, ++}; ++ ++enum { ++ XSC_CQE_VERSION_V0 = 0, ++ XSC_CQE_VERSION_V1 = 1, ++}; ++ ++enum { ++ XSC_ADAPTER_PAGE_SIZE = 4096, ++}; ++ ++enum { ++ XSC_QP_FLAG_RAWPACKET_TSO = 1 << 9, ++ XSC_QP_FLAG_RAWPACKET_TX = 1 << 10, ++}; ++ ++ ++#define XSC_CQ_PREFIX "XSC_CQ" ++#define XSC_QP_PREFIX "XSC_QP" ++#define XSC_MR_PREFIX "XSC_MR" ++#define XSC_RWQ_PREFIX "XSC_RWQ" ++#define XSC_MAX_LOG2_CONTIG_BLOCK_SIZE 23 ++#define XSC_MIN_LOG2_CONTIG_BLOCK_SIZE 12 ++ ++enum { ++ XSC_DBG_QP = 1 << 0, ++ XSC_DBG_CQ = 1 << 1, ++ XSC_DBG_QP_SEND = 1 << 2, ++ XSC_DBG_QP_SEND_ERR = 1 << 3, ++ XSC_DBG_CQ_CQE = 1 << 4, ++ XSC_DBG_CONTIG = 1 << 5, ++ XSC_DBG_DR = 1 << 6, ++ XSC_DBG_CTX = 1 << 7, ++ XSC_DBG_PD = 1 << 8, ++ XSC_DBG_MR = 1 << 9, ++}; ++ ++extern uint32_t xsc_debug_mask; ++extern int xsc_freeze_on_error_cqe; ++ ++#define XSC_DEBUG ++#ifdef XSC_DEBUG ++#define xsc_dbg(fp, mask, fmt, args...) \ ++do { \ ++ if (xsc_debug_mask & mask) { \ ++ char host[256]; \ ++ char timestr[32]; \ ++ struct tm now_tm; \ ++ time_t now_time; \ ++ time(&now_time); \ ++ localtime_r(&now_time, &now_tm); \ ++ strftime(timestr, sizeof(timestr), "%Y-%m-%d %X", &now_tm); \ ++ gethostname(host, 256); \ ++ fprintf(fp, "[%s %s %s %d] " fmt, timestr, host, __func__, __LINE__, ##args); \ ++ } \ ++} while (0) ++#else ++static inline void xsc_dbg(FILE *fp, uint32_t mask, const char *fmt, ...) ++{ ++} ++#endif ++ ++#define xsc_err(fmt, args...) \ ++do { \ ++ char host[256]; \ ++ char timestr[32]; \ ++ struct tm now_tm; \ ++ time_t now_time; \ ++ time(&now_time); \ ++ localtime_r(&now_time, &now_tm); \ ++ strftime(timestr, sizeof(timestr), "%Y-%m-%d %X", &now_tm); \ ++ gethostname(host, 256); \ ++ printf("[%s %s %s %d] " fmt, timestr, host, __func__, __LINE__, ##args); \ ++} while (0) ++ ++enum { ++ XSC_QP_TABLE_SHIFT = 12, ++ XSC_QP_TABLE_MASK = (1 << XSC_QP_TABLE_SHIFT) - 1, ++ XSC_QP_TABLE_SIZE = 1 << (24 - XSC_QP_TABLE_SHIFT), ++}; ++ ++enum { ++ XSC_UIDX_TABLE_SHIFT = 12, ++ XSC_UIDX_TABLE_MASK = (1 << XSC_UIDX_TABLE_SHIFT) - 1, ++ XSC_UIDX_TABLE_SIZE = 1 << (24 - XSC_UIDX_TABLE_SHIFT), ++}; ++ ++enum { ++ XSC_MAX_PORTS_NUM = 2, ++}; ++ ++enum xsc_alloc_type { ++ XSC_ALLOC_TYPE_ANON, ++ XSC_ALLOC_TYPE_HUGE, ++ XSC_ALLOC_TYPE_CONTIG, ++ XSC_ALLOC_TYPE_PREFER_HUGE, ++ XSC_ALLOC_TYPE_PREFER_CONTIG, ++ XSC_ALLOC_TYPE_EXTERNAL, ++ XSC_ALLOC_TYPE_ALL ++}; ++ ++enum xsc_rsc_type { ++ XSC_RSC_TYPE_QP, ++ XSC_RSC_TYPE_XSRQ, ++ XSC_RSC_TYPE_SRQ, ++ XSC_RSC_TYPE_RWQ, ++ XSC_RSC_TYPE_INVAL, ++}; ++ ++enum xsc_vendor_cap_flags { ++ XSC_VENDOR_CAP_FLAGS_MPW = 1 << 0, /* Obsoleted */ ++ XSC_VENDOR_CAP_FLAGS_MPW_ALLOWED = 1 << 1, ++ XSC_VENDOR_CAP_FLAGS_ENHANCED_MPW = 1 << 2, ++ XSC_VENDOR_CAP_FLAGS_CQE_128B_COMP = 1 << 3, ++ XSC_VENDOR_CAP_FLAGS_CQE_128B_PAD = 1 << 4, ++}; ++ ++enum { ++ XSC_FLOW_TAG_MASK = 0x00ffffff, ++}; ++ ++struct xsc_resource { ++ enum xsc_rsc_type type; ++ uint32_t rsn; ++}; ++ ++struct xsc_device { ++ struct verbs_device verbs_dev; ++ int page_size; ++ int driver_abi_ver; ++}; ++ ++struct xsc_db_page; ++ ++struct xsc_spinlock { ++ pthread_spinlock_t lock; ++ int in_use; ++ int need_lock; ++}; ++ ++/* PAGE_SHIFT determines the page size */ ++ ++#define PAGE_SHIFT 12 ++#define PAGE_SIZE (1UL << PAGE_SHIFT) ++#define PAGE_MASK (~(PAGE_SIZE-1)) ++ ++struct xsc_context { ++ struct verbs_context ibv_ctx; ++ int max_num_qps; ++ struct { ++ struct xsc_qp **table; ++ int refcnt; ++ } qp_table[XSC_QP_TABLE_SIZE]; ++ pthread_mutex_t qp_table_mutex; ++ ++ struct { ++ struct xsc_resource **table; ++ int refcnt; ++ } uidx_table[XSC_UIDX_TABLE_SIZE]; ++ pthread_mutex_t uidx_table_mutex; ++ ++ struct xsc_db_page *db_list; ++ pthread_mutex_t db_list_mutex; ++ int cache_line_size; ++ int max_sq_desc_sz; ++ int max_rq_desc_sz; ++ int max_send_wqebb; ++ int max_recv_wr; ++ int num_ports; ++ int stall_enable; ++ int stall_adaptive_enable; ++ int stall_cycles; ++ char hostname[40]; ++ struct xsc_spinlock hugetlb_lock; ++ struct list_head hugetlb_list; ++ int cqe_version; ++ uint8_t cached_link_layer[XSC_MAX_PORTS_NUM]; ++ uint8_t cached_port_flags[XSC_MAX_PORTS_NUM]; ++ unsigned int cached_device_cap_flags; ++ enum ibv_atomic_cap atomic_cap; ++ struct { ++ uint64_t offset; ++ uint64_t mask; ++ } core_clock; ++ void *hca_core_clock; ++ const struct xsc_ib_clock_info *clock_info_page; ++ struct ibv_tso_caps cached_tso_caps; ++ int cmds_supp_uhw; ++ uint64_t vendor_cap_flags; /* Use enum xsc_vendor_cap_flags */ ++ struct xscdv_cqe_comp_caps cqe_comp_caps; ++ struct xscdv_ctx_allocators extern_alloc; ++ struct xscdv_sw_parsing_caps sw_parsing_caps; ++ struct xscdv_striding_rq_caps striding_rq_caps; ++ uint32_t tunnel_offloads_caps; ++ struct xsc_packet_pacing_caps packet_pacing_caps; ++ uint16_t flow_action_flags; ++ uint64_t max_dm_size; ++ uint32_t eth_min_inline_size; ++ uint32_t dump_fill_mkey; ++ __be32 dump_fill_mkey_be; ++ void *sqm_reg_va; ++ void *rqm_reg_va; ++ void *cqm_reg_va; ++ void *cqm_armdb_va; ++ int db_mmap_size; ++ uint32_t page_size; ++ uint64_t qpm_tx_db; ++ uint64_t qpm_rx_db; ++ uint64_t cqm_next_cid_reg; ++ uint64_t cqm_armdb; ++ uint32_t send_ds_num; ++ uint32_t recv_ds_num; ++ uint32_t send_ds_shift; ++ uint32_t recv_ds_shift; ++ FILE *dbg_fp; ++}; ++ ++struct xsc_bitmap { ++ uint32_t last; ++ uint32_t top; ++ uint32_t max; ++ uint32_t avail; ++ uint32_t mask; ++ unsigned long *table; ++}; ++ ++struct xsc_hugetlb_mem { ++ int shmid; ++ void *shmaddr; ++ struct xsc_bitmap bitmap; ++ struct list_node entry; ++}; ++ ++struct xsc_buf { ++ void *buf; ++ size_t length; ++ int base; ++ struct xsc_hugetlb_mem *hmem; ++ enum xsc_alloc_type type; ++}; ++ ++struct xsc_pd { ++ struct ibv_pd ibv_pd; ++ uint32_t pdn; ++ atomic_int refcount; ++ struct xsc_pd *xprotection_domain; ++}; ++ ++struct xsc_parent_domain { ++ struct xsc_pd xpd; ++}; ++ ++enum { ++ XSC_CQ_FLAGS_RX_CSUM_VALID = 1 << 0, ++ XSC_CQ_FLAGS_EMPTY_DURING_POLL = 1 << 1, ++ XSC_CQ_FLAGS_FOUND_CQES = 1 << 2, ++ XSC_CQ_FLAGS_EXTENDED = 1 << 3, ++ XSC_CQ_FLAGS_SINGLE_THREADED = 1 << 4, ++ XSC_CQ_FLAGS_DV_OWNED = 1 << 5, ++ XSC_CQ_FLAGS_TM_SYNC_REQ = 1 << 6, ++}; ++ ++struct xsc_err_state_qp_node { ++ struct list_node entry; ++ uint32_t qp_id; ++ bool is_sq; ++}; ++ ++struct xsc_cq { ++ /* ibv_cq should always be subset of ibv_cq_ex */ ++ struct verbs_cq verbs_cq; ++ struct xsc_buf buf_a; ++ struct xsc_buf buf_b; ++ struct xsc_buf *active_buf; ++ struct xsc_buf *resize_buf; ++ int resize_cqes; ++ int active_cqes; ++ struct xsc_spinlock lock; ++ uint32_t cqn; ++ uint32_t cons_index; ++ __le32 *dbrec; ++ __le32 *db; ++ __le32 *armdb; ++ uint32_t cqe_cnt; ++ int log2_cq_ring_sz; ++ int arm_sn; ++ int cqe_sz; ++ int resize_cqe_sz; ++ int stall_next_poll; ++ int stall_enable; ++ uint64_t stall_last_count; ++ int stall_adaptive_enable; ++ int stall_cycles; ++ struct xsc_resource *cur_rsc; ++ struct xsc_cqe64 *cqe64; ++ uint32_t flags; ++ int umr_opcode; ++ struct xscdv_clock_info last_clock_info; ++ bool disable_flush_error_cqe; ++ struct list_head err_state_qp_list; ++}; ++ ++struct wr_list { ++ uint16_t opcode; ++ uint16_t next; ++}; ++ ++struct xsc_wq { ++ uint64_t *wrid; ++ unsigned *wqe_head; ++ struct xsc_spinlock lock; ++ unsigned wqe_cnt; ++ unsigned max_post; ++ unsigned head; ++ unsigned tail; ++ unsigned cur_post; ++ int max_gs; ++ int wqe_shift; ++ int offset; ++ void *qend; ++ uint32_t *wr_data; ++ __le32 *db; ++ unsigned ds_cnt; ++ unsigned seg_cnt; ++ unsigned *wr_opcode; ++ unsigned *need_flush; ++ unsigned flush_wqe_cnt; ++}; ++ ++struct xsc_dm { ++ struct verbs_dm verbs_dm; ++ size_t length; ++ void *mmap_va; ++ void *start_va; ++}; ++ ++struct xsc_mr { ++ struct verbs_mr vmr; ++ struct xsc_buf buf; ++ uint32_t alloc_flags; ++}; ++ ++enum xsc_qp_flags { ++ XSC_QP_FLAGS_USE_UNDERLAY = 0x01, ++}; ++ ++struct xsc_qp { ++ struct xsc_resource rsc; /* This struct must be first */ ++ struct verbs_qp verbs_qp; ++ struct ibv_qp *ibv_qp; ++ struct xsc_buf buf; ++ void *sq_start; ++ void *rq_start; ++ int max_inline_data; ++ int buf_size; ++ /* For Raw Packet QP, use different buffers for the SQ and RQ */ ++ struct xsc_buf sq_buf; ++ int sq_buf_size; ++ ++ uint8_t fm_cache; ++ uint8_t sq_signal_bits; ++ struct xsc_wq sq; ++ ++ __le32 *db; ++ struct xsc_wq rq; ++ int wq_sig; ++ uint32_t qp_cap_cache; ++ int atomics_enabled; ++ uint32_t max_tso; ++ uint16_t max_tso_header; ++ int rss_qp; ++ uint32_t flags; /* Use enum xsc_qp_flags */ ++ enum xscdv_dc_type dc_type; ++ uint32_t tirn; ++ uint32_t tisn; ++ uint32_t rqn; ++ uint32_t sqn; ++}; ++ ++struct xsc_ah { ++ struct ibv_ah ibv_ah; ++ struct xsc_wqe_av av; ++ bool kern_ah; ++}; ++ ++struct xsc_rwq { ++ struct xsc_resource rsc; ++ struct ibv_wq wq; ++ struct xsc_buf buf; ++ int buf_size; ++ struct xsc_wq rq; ++ __le32 *db; ++ void *pbuff; ++ __le32 *recv_db; ++ int wq_sig; ++}; ++ ++struct xsc_counter_node { ++ uint32_t index; ++ struct list_node entry; ++ enum ibv_counter_description desc; ++}; ++ ++struct xsc_counters { ++ struct verbs_counters vcounters; ++ struct list_head counters_list; ++ pthread_mutex_t lock; ++ uint32_t ncounters; ++ /* number of bounded objects */ ++ int refcount; ++}; ++ ++struct xsc_flow { ++ struct ibv_flow flow_id; ++ struct xsc_counters *mcounters; ++}; ++ ++struct xscdv_flow_matcher { ++ struct ibv_context *context; ++ uint32_t handle; ++}; ++ ++struct xscdv_devx_obj { ++ struct ibv_context *context; ++ uint32_t handle; ++}; ++ ++struct xsc_devx_umem { ++ struct xscdv_devx_umem dv_devx_umem; ++ struct ibv_context *context; ++ uint32_t handle; ++ void *addr; ++ size_t size; ++}; ++ ++union xsc_ib_fw_ver { ++ uint64_t data; ++ struct { ++ uint8_t ver_major; ++ uint8_t ver_minor; ++ uint16_t ver_patch; ++ uint32_t ver_tweak; ++ } s; ++}; ++ ++static inline int xsc_ilog2(int n) ++{ ++ int t; ++ ++ if (n <= 0) ++ return -1; ++ ++ t = 0; ++ while ((1 << t) < n) ++ ++t; ++ ++ return t; ++} ++ ++extern int xsc_stall_num_loop; ++extern int xsc_stall_cq_poll_min; ++extern int xsc_stall_cq_poll_max; ++extern int xsc_stall_cq_inc_step; ++extern int xsc_stall_cq_dec_step; ++extern int xsc_single_threaded; ++ ++static inline unsigned DIV_ROUND_UP(unsigned n, unsigned d) ++{ ++ return (n + d - 1u) / d; ++} ++ ++static inline unsigned long align(unsigned long val, unsigned long align) ++{ ++ return (val + align - 1) & ~(align - 1); ++} ++ ++static inline struct xsc_device *to_xdev(struct ibv_device *ibdev) ++{ ++ return container_of(ibdev, struct xsc_device, verbs_dev.device); ++} ++ ++static inline struct xsc_context *to_xctx(struct ibv_context *ibctx) ++{ ++ return container_of(ibctx, struct xsc_context, ibv_ctx.context); ++} ++ ++/* to_xpd always returns the real xsc_pd object ie the protection domain. */ ++static inline struct xsc_pd *to_xpd(struct ibv_pd *ibpd) ++{ ++ struct xsc_pd *xpd = container_of(ibpd, struct xsc_pd, ibv_pd); ++ ++ if (xpd->xprotection_domain) ++ return xpd->xprotection_domain; ++ ++ return xpd; ++} ++ ++static inline struct xsc_parent_domain *to_xparent_domain(struct ibv_pd *ibpd) ++{ ++ struct xsc_parent_domain *xparent_domain = ++ ibpd ? container_of(ibpd, struct xsc_parent_domain, xpd.ibv_pd) : NULL; ++ ++ if (xparent_domain && xparent_domain->xpd.xprotection_domain) ++ return xparent_domain; ++ ++ /* Otherwise ibpd isn't a parent_domain */ ++ return NULL; ++} ++ ++static inline struct xsc_cq *to_xcq(struct ibv_cq *ibcq) ++{ ++ return container_of((struct ibv_cq_ex *)ibcq, struct xsc_cq, verbs_cq.cq_ex); ++} ++ ++static inline struct xsc_qp *to_xqp(struct ibv_qp *ibqp) ++{ ++ struct verbs_qp *vqp = (struct verbs_qp *)ibqp; ++ ++ return container_of(vqp, struct xsc_qp, verbs_qp); ++} ++ ++static inline struct xsc_rwq *to_xrwq(struct ibv_wq *ibwq) ++{ ++ return container_of(ibwq, struct xsc_rwq, wq); ++} ++ ++static inline struct xsc_dm *to_xdm(struct ibv_dm *ibdm) ++{ ++ return container_of(ibdm, struct xsc_dm, verbs_dm.dm); ++} ++ ++static inline struct xsc_mr *to_xmr(struct ibv_mr *ibmr) ++{ ++ return container_of(ibmr, struct xsc_mr, vmr.ibv_mr); ++} ++ ++static inline struct xsc_ah *to_xah(struct ibv_ah *ibah) ++{ ++ return container_of(ibah, struct xsc_ah, ibv_ah); ++} ++ ++static inline int max_int(int a, int b) ++{ ++ return a > b ? a : b; ++} ++ ++static inline struct xsc_qp *rsc_to_xqp(struct xsc_resource *rsc) ++{ ++ return (struct xsc_qp *)rsc; ++} ++ ++static inline struct xsc_rwq *rsc_to_mrwq(struct xsc_resource *rsc) ++{ ++ return (struct xsc_rwq *)rsc; ++} ++ ++static inline struct xsc_counters *to_mcounters(struct ibv_counters *ibcounters) ++{ ++ return container_of(ibcounters, struct xsc_counters, vcounters.counters); ++} ++ ++static inline struct xsc_flow *to_mflow(struct ibv_flow *flow_id) ++{ ++ return container_of(flow_id, struct xsc_flow, flow_id); ++} ++ ++int xsc_alloc_buf(struct xsc_buf *buf, size_t size, int page_size); ++void xsc_free_buf(struct xsc_buf *buf); ++int xsc_alloc_buf_contig(struct xsc_context *xctx, struct xsc_buf *buf, ++ size_t size, int page_size, const char *component); ++void xsc_free_buf_contig(struct xsc_context *xctx, struct xsc_buf *buf); ++int xsc_alloc_prefered_buf(struct xsc_context *xctx, ++ struct xsc_buf *buf, ++ size_t size, int page_size, ++ enum xsc_alloc_type alloc_type, ++ const char *component); ++int xsc_free_actual_buf(struct xsc_context *ctx, struct xsc_buf *buf); ++void xsc_get_alloc_type(struct xsc_context *context, ++ const char *component, ++ enum xsc_alloc_type *alloc_type, ++ enum xsc_alloc_type default_alloc_type); ++int xsc_use_huge(const char *key); ++bool xsc_is_extern_alloc(struct xsc_context *context); ++int xsc_alloc_buf_extern(struct xsc_context *ctx, struct xsc_buf *buf, ++ size_t size); ++void xsc_free_buf_extern(struct xsc_context *ctx, struct xsc_buf *buf); ++ ++__le32 *xsc_alloc_dbrec(struct xsc_context *context); ++void xsc_free_db(struct xsc_context *context, __le32 *db); ++ ++int xsc_query_device(struct ibv_context *context, ++ struct ibv_device_attr *attr); ++int xsc_query_device_ex(struct ibv_context *context, ++ const struct ibv_query_device_ex_input *input, ++ struct ibv_device_attr_ex *attr, ++ size_t attr_size); ++int xsc_query_rt_values(struct ibv_context *context, ++ struct ibv_values_ex *values); ++struct ibv_qp *xsc_create_qp_ex(struct ibv_context *context, ++ struct ibv_qp_init_attr_ex *attr); ++int xsc_query_port(struct ibv_context *context, uint8_t port, ++ struct ibv_port_attr *attr); ++ ++struct ibv_pd *xsc_alloc_pd(struct ibv_context *context); ++int xsc_free_pd(struct ibv_pd *pd); ++ ++struct ibv_mr *xsc_alloc_null_mr(struct ibv_pd *pd); ++struct ibv_mr *xsc_reg_mr(struct ibv_pd *pd, void *addr, ++ size_t length, uint64_t hca_va, int access); ++int xsc_rereg_mr(struct verbs_mr *mr, int flags, struct ibv_pd *pd, void *addr, ++ size_t length, int access); ++int xsc_dereg_mr(struct verbs_mr *mr); ++struct ibv_cq *xsc_create_cq(struct ibv_context *context, int cqe, ++ struct ibv_comp_channel *channel, ++ int comp_vector); ++struct ibv_cq_ex *xsc_create_cq_ex(struct ibv_context *context, ++ struct ibv_cq_init_attr_ex *cq_attr); ++int xsc_cq_fill_pfns(struct xsc_cq *cq, ++ const struct ibv_cq_init_attr_ex *cq_attr, ++ struct xsc_context *xctx); ++int xsc_alloc_cq_buf(struct xsc_context *xctx, struct xsc_cq *cq, ++ struct xsc_buf *buf, int nent, int cqe_sz); ++int xsc_free_cq_buf(struct xsc_context *ctx, struct xsc_buf *buf); ++int xsc_resize_cq(struct ibv_cq *cq, int cqe); ++int xsc_modify_cq(struct ibv_cq *cq, struct ibv_modify_cq_attr *attr); ++int xsc_destroy_cq(struct ibv_cq *cq); ++int xsc_poll_cq(struct ibv_cq *cq, int ne, struct ibv_wc *wc); ++int xsc_arm_cq(struct ibv_cq *cq, int solicited); ++void xsc_cq_event(struct ibv_cq *cq); ++void __xsc_cq_clean(struct xsc_cq *cq, uint32_t qpn); ++void xsc_cq_clean(struct xsc_cq *cq, uint32_t qpn); ++ ++struct ibv_qp *xsc_create_qp(struct ibv_pd *pd, struct ibv_qp_init_attr *attr); ++int xsc_query_qp(struct ibv_qp *qp, struct ibv_qp_attr *attr, ++ int attr_mask, ++ struct ibv_qp_init_attr *init_attr); ++int xsc_modify_qp(struct ibv_qp *qp, struct ibv_qp_attr *attr, ++ int attr_mask); ++int xsc_modify_qp_rate_limit(struct ibv_qp *qp, ++ struct ibv_qp_rate_limit_attr *attr); ++int xsc_destroy_qp(struct ibv_qp *qp); ++void xsc_init_qp_indices(struct xsc_qp *qp); ++void xsc_init_rwq_indices(struct xsc_rwq *rwq); ++int xsc_post_send(struct ibv_qp *ibqp, struct ibv_send_wr *wr, ++ struct ibv_send_wr **bad_wr); ++int xsc_post_recv(struct ibv_qp *ibqp, struct ibv_recv_wr *wr, ++ struct ibv_recv_wr **bad_wr); ++int xsc_post_wq_recv(struct ibv_wq *ibwq, struct ibv_recv_wr *wr, ++ struct ibv_recv_wr **bad_wr); ++struct xsc_qp *xsc_find_qp(struct xsc_context *ctx, uint32_t qpn); ++int xsc_store_qp(struct xsc_context *ctx, uint32_t qpn, struct xsc_qp *qp); ++void xsc_clear_qp(struct xsc_context *ctx, uint32_t qpn); ++int xsc_err_state_qp(struct ibv_qp *qp, enum ibv_qp_state cur_state, ++ enum ibv_qp_state state); ++int32_t xsc_store_uidx(struct xsc_context *ctx, void *rsc); ++void xsc_clear_uidx(struct xsc_context *ctx, uint32_t uidx); ++struct ibv_ah *xsc_create_ah(struct ibv_pd *pd, struct ibv_ah_attr *attr); ++int xsc_destroy_ah(struct ibv_ah *ah); ++int xsc_attach_mcast(struct ibv_qp *qp, const union ibv_gid *gid, uint16_t lid); ++int xsc_detach_mcast(struct ibv_qp *qp, const union ibv_gid *gid, uint16_t lid); ++int xsc_round_up_power_of_two(long long sz); ++void *xsc_get_send_wqe(struct xsc_qp *qp, int n); ++struct ibv_xrcd *xsc_open_xrcd(struct ibv_context *context, ++ struct ibv_xrcd_init_attr *xrcd_init_attr); ++int xsc_close_xrcd(struct ibv_xrcd *ib_xrcd); ++struct ibv_wq *xsc_create_wq(struct ibv_context *context, ++ struct ibv_wq_init_attr *attr); ++int xsc_modify_wq(struct ibv_wq *wq, struct ibv_wq_attr *attr); ++int xsc_destroy_wq(struct ibv_wq *wq); ++struct ibv_rwq_ind_table *xsc_create_rwq_ind_table(struct ibv_context *context, ++ struct ibv_rwq_ind_table_init_attr *init_attr); ++int xsc_destroy_rwq_ind_table(struct ibv_rwq_ind_table *rwq_ind_table); ++struct ibv_flow *xsc_create_flow(struct ibv_qp *qp, struct ibv_flow_attr *flow_attr); ++int xsc_destroy_flow(struct ibv_flow *flow_id); ++struct ibv_flow_action *xsc_create_flow_action_esp(struct ibv_context *ctx, ++ struct ibv_flow_action_esp_attr *attr); ++int xsc_destroy_flow_action(struct ibv_flow_action *action); ++int xsc_modify_flow_action_esp(struct ibv_flow_action *action, ++ struct ibv_flow_action_esp_attr *attr); ++ ++struct ibv_dm *xsc_alloc_dm(struct ibv_context *context, ++ struct ibv_alloc_dm_attr *dm_attr); ++int xsc_free_dm(struct ibv_dm *ibdm); ++struct ibv_mr *xsc_reg_dm_mr(struct ibv_pd *pd, struct ibv_dm *ibdm, ++ uint64_t dm_offset, size_t length, ++ unsigned int acc); ++ ++struct ibv_pd *xsc_alloc_parent_domain(struct ibv_context *context, ++ struct ibv_parent_domain_init_attr *attr); ++ ++ ++struct ibv_counters *xsc_create_counters(struct ibv_context *context, ++ struct ibv_counters_init_attr *init_attr); ++int xsc_destroy_counters(struct ibv_counters *counters); ++int xsc_attach_counters_point_flow(struct ibv_counters *counters, ++ struct ibv_counter_attach_attr *attr, ++ struct ibv_flow *flow); ++int xsc_read_counters(struct ibv_counters *counters, ++ uint64_t *counters_value, ++ uint32_t ncounters, ++ uint32_t flags); ++ ++static inline void *xsc_find_uidx(struct xsc_context *ctx, uint32_t uidx) ++{ ++ int tind = uidx >> XSC_UIDX_TABLE_SHIFT; ++ ++ if (likely(ctx->uidx_table[tind].refcnt)) ++ return ctx->uidx_table[tind].table[uidx & XSC_UIDX_TABLE_MASK]; ++ ++ return NULL; ++} ++ ++static inline int xsc_spin_lock(struct xsc_spinlock *lock) ++{ ++ if (lock->need_lock) ++ return pthread_spin_lock(&lock->lock); ++ ++ if (unlikely(lock->in_use)) { ++ fprintf(stderr, "*** ERROR: multithreading vilation ***\n" ++ "You are running a multithreaded application but\n" ++ "you set XSC_SINGLE_THREADED=1. Please unset it.\n"); ++ abort(); ++ } else { ++ lock->in_use = 1; ++ /* ++ * This fence is not at all correct, but it increases the ++ * chance that in_use is detected by another thread without ++ * much runtime cost. */ ++ atomic_thread_fence(memory_order_acq_rel); ++ } ++ ++ return 0; ++} ++ ++static inline int xsc_spin_unlock(struct xsc_spinlock *lock) ++{ ++ if (lock->need_lock) ++ return pthread_spin_unlock(&lock->lock); ++ ++ lock->in_use = 0; ++ ++ return 0; ++} ++ ++static inline int xsc_spinlock_init(struct xsc_spinlock *lock, int need_lock) ++{ ++ lock->in_use = 0; ++ lock->need_lock = need_lock; ++ return pthread_spin_init(&lock->lock, PTHREAD_PROCESS_PRIVATE); ++} ++ ++static inline int xsc_spinlock_init_pd(struct xsc_spinlock *lock, struct ibv_pd *pd) ++{ ++ int thread_safe = xsc_single_threaded; ++ ++ return xsc_spinlock_init(lock, !thread_safe); ++} ++ ++static inline int xsc_spinlock_destroy(struct xsc_spinlock *lock) ++{ ++ return pthread_spin_destroy(&lock->lock); ++} ++ ++static inline void set_command(int command, off_t *offset) ++{ ++ *offset |= (command << XSC_IB_MMAP_CMD_SHIFT); ++} ++ ++static inline void set_arg(int arg, off_t *offset) ++{ ++ *offset |= arg; ++} ++ ++static inline void set_order(int order, off_t *offset) ++{ ++ set_arg(order, offset); ++} ++ ++static inline void set_index(int index, off_t *offset) ++{ ++ set_arg(index, offset); ++} ++ ++static inline void set_extended_index(int index, off_t *offset) ++{ ++ *offset |= (index & 0xff) | ((index >> 8) << 16); ++} ++ ++static inline uint8_t calc_sig(void *wqe, int size) ++{ ++ int i; ++ uint8_t *p = wqe; ++ uint8_t res = 0; ++ ++ for (i = 0; i < size; ++i) ++ res ^= p[i]; ++ ++ return ~res; ++} ++ ++#endif /* XSC_H */ +diff --git a/providers/xscale/xscdv.h b/providers/xscale/xscdv.h +new file mode 100644 +index 0000000..98d2daf +--- /dev/null ++++ b/providers/xscale/xscdv.h +@@ -0,0 +1,876 @@ ++/* ++ * Copyright (c) 2021 - 2022, Shanghai Yunsilicon Technology Co., Ltd. ++ * All rights reserved. ++ */ ++ ++#ifndef _XSCDV_H_ ++#define _XSCDV_H_ ++ ++#include ++#include /* For the __be64 type */ ++#include ++#include ++#if defined(__SSE3__) ++#include ++#include ++#include ++#endif /* defined(__SSE3__) */ ++ ++#include ++#include ++#include ++ ++#ifdef __cplusplus ++extern "C" { ++#endif ++ ++/* Always inline the functions */ ++#ifdef __GNUC__ ++#define XSCDV_ALWAYS_INLINE inline __attribute__((always_inline)) ++#else ++#define XSCDV_ALWAYS_INLINE inline ++#endif ++ ++enum { ++ XSC_RCV_DBR = 0, ++ XSC_SND_DBR = 1, ++}; ++ ++enum xscdv_context_comp_mask { ++ XSCDV_CONTEXT_MASK_CQE_COMPRESION = 1 << 0, ++ XSCDV_CONTEXT_MASK_SWP = 1 << 1, ++ XSCDV_CONTEXT_MASK_STRIDING_RQ = 1 << 2, ++ XSCDV_CONTEXT_MASK_TUNNEL_OFFLOADS = 1 << 3, ++ XSCDV_CONTEXT_MASK_DYN_BFREGS = 1 << 4, ++ XSCDV_CONTEXT_MASK_CLOCK_INFO_UPDATE = 1 << 5, ++ XSCDV_CONTEXT_MASK_FLOW_ACTION_FLAGS = 1 << 6, ++}; ++ ++struct xscdv_cqe_comp_caps { ++ uint32_t max_num; ++ uint32_t supported_format; /* enum xscdv_cqe_comp_res_format */ ++}; ++ ++struct xscdv_sw_parsing_caps { ++ uint32_t sw_parsing_offloads; /* Use enum xscdv_sw_parsing_offloads */ ++ uint32_t supported_qpts; ++}; ++ ++struct xscdv_striding_rq_caps { ++ uint32_t min_single_stride_log_num_of_bytes; ++ uint32_t max_single_stride_log_num_of_bytes; ++ uint32_t min_single_wqe_log_num_of_strides; ++ uint32_t max_single_wqe_log_num_of_strides; ++ uint32_t supported_qpts; ++}; ++ ++/* ++ * Direct verbs device-specific attributes ++ */ ++struct xscdv_context { ++ uint8_t version; ++ uint64_t flags; ++ uint64_t comp_mask; ++ struct xscdv_cqe_comp_caps cqe_comp_caps; ++ struct xscdv_sw_parsing_caps sw_parsing_caps; ++ struct xscdv_striding_rq_caps striding_rq_caps; ++ uint32_t tunnel_offloads_caps; ++ uint64_t max_clock_info_update_nsec; ++ uint32_t flow_action_flags; ++}; ++ ++enum xscsdv_context_flags { ++ /* ++ * This flag indicates if CQE version 0 or 1 is needed. ++ */ ++ XSCDV_CONTEXT_FLAGS_CQE_V1 = (1 << 0), ++ XSCDV_CONTEXT_FLAGS_OBSOLETE = (1 << 1), /* Obsoleted, don't use */ ++ XSCDV_CONTEXT_FLAGS_MPW_ALLOWED = (1 << 2), ++ XSCDV_CONTEXT_FLAGS_ENHANCED_MPW = (1 << 3), ++ XSCDV_CONTEXT_FLAGS_CQE_128B_COMP = (1 << 4), /* Support CQE 128B compression */ ++ XSCDV_CONTEXT_FLAGS_CQE_128B_PAD = (1 << 5), /* Support CQE 128B padding */ ++}; ++ ++enum xscdv_cq_init_attr_mask { ++ XSCDV_CQ_INIT_ATTR_MASK_COMPRESSED_CQE = 1 << 0, ++ XSCDV_CQ_INIT_ATTR_MASK_FLAGS = 1 << 1, ++ XSCDV_CQ_INIT_ATTR_MASK_CQE_SIZE = 1 << 2, ++}; ++ ++struct xscdv_cq_init_attr { ++ uint64_t comp_mask; /* Use enum xscdv_cq_init_attr_mask */ ++ uint8_t cqe_comp_res_format; /* Use enum xscdv_cqe_comp_res_format */ ++ uint32_t flags; ++ uint16_t cqe_size; /* when XSCDV_CQ_INIT_ATTR_MASK_CQE_SIZE set */ ++}; ++ ++struct ibv_cq_ex *xscdv_create_cq(struct ibv_context *context, ++ struct ibv_cq_init_attr_ex *cq_attr, ++ struct xscdv_cq_init_attr *xcq_attr); ++ ++enum xscdv_qp_create_flags { ++ XSCDV_QP_CREATE_TUNNEL_OFFLOADS = 1 << 0, ++ XSCDV_QP_CREATE_TIR_ALLOW_SELF_LOOPBACK_UC = 1 << 1, ++ XSCDV_QP_CREATE_TIR_ALLOW_SELF_LOOPBACK_MC = 1 << 2, ++ XSCDV_QP_CREATE_DISABLE_SCATTER_TO_CQE = 1 << 3, ++ XSCDV_QP_CREATE_ALLOW_SCATTER_TO_CQE = 1 << 4, ++}; ++ ++enum xscdv_qp_init_attr_mask { ++ XSCDV_QP_INIT_ATTR_MASK_QP_CREATE_FLAGS = 1 << 0, ++ XSCDV_QP_INIT_ATTR_MASK_DC = 1 << 1, ++}; ++ ++enum xscdv_dc_type { ++ XSCDV_DCTYPE_DCT = 1, ++ XSCDV_DCTYPE_DCI, ++}; ++ ++struct xscdv_dc_init_attr { ++ enum xscdv_dc_type dc_type; ++ uint64_t dct_access_key; ++}; ++ ++struct xscdv_qp_init_attr { ++ uint64_t comp_mask; /* Use enum xscdv_qp_init_attr_mask */ ++ uint32_t create_flags; /* Use enum xsc_qp_create_flags */ ++ struct xscdv_dc_init_attr dc_init_attr; ++}; ++ ++struct ibv_qp *xscdv_create_qp(struct ibv_context *context, ++ struct ibv_qp_init_attr_ex *qp_attr, ++ struct xscdv_qp_init_attr *xqp_attr); ++ ++enum xscdv_flow_action_esp_mask { ++ XSCDV_FLOW_ACTION_ESP_MASK_FLAGS = 1 << 0, ++}; ++ ++struct xscdv_flow_action_esp { ++ uint64_t comp_mask; /* Use enum xscdv_flow_action_esp_mask */ ++ uint32_t action_flags; /* Use enum xscdv_flow_action_flags */ ++}; ++ ++struct xscdv_flow_match_parameters { ++ size_t match_sz; ++ uint64_t match_buf[]; /* Device spec format */ ++}; ++ ++struct xscdv_flow_matcher_attr { ++ enum ibv_flow_attr_type type; ++ uint32_t flags; /* From enum ibv_flow_flags */ ++ uint16_t priority; ++ uint8_t match_criteria_enable; /* Device spec format */ ++ struct xscdv_flow_match_parameters *match_mask; ++ uint64_t comp_mask; ++}; ++ ++struct xscdv_flow_matcher; ++ ++struct xscdv_flow_matcher * ++xscdv_create_flow_matcher(struct ibv_context *context, ++ struct xscdv_flow_matcher_attr *matcher_attr); ++ ++int xscdv_destroy_flow_matcher(struct xscdv_flow_matcher *matcher); ++ ++enum xscdv_flow_action_type { ++ XSCDV_FLOW_ACTION_DEST_IBV_QP, ++ XSCDV_FLOW_ACTION_DROP, ++ XSCDV_FLOW_ACTION_IBV_COUNTER, ++ XSCDV_FLOW_ACTION_IBV_FLOW_ACTION, ++ XSCDV_FLOW_ACTION_TAG, ++ XSCDV_FLOW_ACTION_DEST_DEVX, ++}; ++ ++struct xscdv_flow_action_attr { ++ enum xscdv_flow_action_type type; ++ union { ++ struct ibv_qp *qp; ++ struct ibv_counters *counter; ++ struct ibv_flow_action *action; ++ uint32_t tag_value; ++ struct xscdv_devx_obj *obj; ++ }; ++}; ++ ++struct ibv_flow * ++xscdv_create_flow(struct xscdv_flow_matcher *matcher, ++ struct xscdv_flow_match_parameters *match_value, ++ size_t num_actions, ++ struct xscdv_flow_action_attr actions_attr[]); ++ ++struct ibv_flow_action *xscdv_create_flow_action_esp(struct ibv_context *ctx, ++ struct ibv_flow_action_esp_attr *esp, ++ struct xscdv_flow_action_esp *xattr); ++ ++/* ++ * xscdv_create_flow_action_modify_header - Create a flow action which mutates ++ * a packet. The flow action can be attached to steering rules via ++ * ibv_create_flow(). ++ * ++ * @ctx: RDMA device context to create the action on. ++ * @actions_sz: The size of *actions* buffer in bytes. ++ * @actions: A buffer which contains modify actions provided in device spec ++ * format. ++ * @ft_type: Defines the flow table type to which the modify ++ * header action will be attached. ++ * ++ * Return a valid ibv_flow_action if successful, NULL otherwise. ++ */ ++struct ibv_flow_action * ++xscdv_create_flow_action_modify_header(struct ibv_context *ctx, ++ size_t actions_sz, ++ uint64_t actions[], ++ enum xscdv_flow_table_type ft_type); ++ ++/* ++ * xscdv_create_flow_action_packet_reformat - Create flow action which can ++ * encap/decap packets. ++ */ ++struct ibv_flow_action * ++xscdv_create_flow_action_packet_reformat(struct ibv_context *ctx, ++ size_t data_sz, ++ void *data, ++ enum xscdv_flow_action_packet_reformat_type reformat_type, ++ enum xscdv_flow_table_type ft_type); ++/* ++ * Most device capabilities are exported by ibv_query_device(...), ++ * but there is HW device-specific information which is important ++ * for data-path, but isn't provided. ++ * ++ * Return 0 on success. ++ */ ++int xscdv_query_device(struct ibv_context *ctx_in, ++ struct xscdv_context *attrs_out); ++ ++enum xscdv_qp_comp_mask { ++ XSCDV_QP_MASK_UAR_MMAP_OFFSET = 1 << 0, ++ XSCDV_QP_MASK_RAW_QP_HANDLES = 1 << 1, ++}; ++ ++struct xscdv_qp { ++ __le32 *dbrec; ++ struct { ++ void *buf; ++ uint32_t wqe_cnt; ++ uint32_t stride; ++ __le32 *db; ++ } sq; ++ struct { ++ void *buf; ++ uint32_t wqe_cnt; ++ uint32_t stride; ++ __le32 *db; ++ } rq; ++ uint64_t comp_mask; ++ uint32_t tirn; ++ uint32_t tisn; ++ uint32_t rqn; ++ uint32_t sqn; ++}; ++ ++struct xscdv_cq { ++ void *buf; ++ __le32 *dbrec; ++ __le32 *db; ++ uint32_t cqe_cnt; ++ uint32_t cqe_size; ++ uint32_t cqn; ++ uint64_t comp_mask; ++}; ++ ++struct xscdv_rwq { ++ void *buf; ++ __le32 *dbrec; ++ uint32_t wqe_cnt; ++ uint32_t stride; ++ uint64_t comp_mask; ++ __le32 *db; ++}; ++ ++struct xscdv_dm { ++ void *buf; ++ uint64_t length; ++ uint64_t comp_mask; ++}; ++ ++struct xsc_wqe_av; ++ ++struct xscdv_ah { ++ struct xsc_wqe_av *av; ++ uint64_t comp_mask; ++}; ++ ++struct xscdv_pd { ++ uint32_t pdn; ++ uint64_t comp_mask; ++}; ++ ++struct xscdv_obj { ++ struct { ++ struct ibv_qp *in; ++ struct xscdv_qp *out; ++ } qp; ++ struct { ++ struct ibv_cq *in; ++ struct xscdv_cq *out; ++ } cq; ++ struct { ++ struct ibv_wq *in; ++ struct xscdv_rwq *out; ++ } rwq; ++ struct { ++ struct ibv_dm *in; ++ struct xscdv_dm *out; ++ } dm; ++ struct { ++ struct ibv_ah *in; ++ struct xscdv_ah *out; ++ } ah; ++ struct { ++ struct ibv_pd *in; ++ struct xscdv_pd *out; ++ } pd; ++}; ++ ++enum xscdv_obj_type { ++ XSCDV_OBJ_QP = 1 << 0, ++ XSCDV_OBJ_CQ = 1 << 1, ++ XSCDV_OBJ_SRQ = 1 << 2, ++ XSCDV_OBJ_RWQ = 1 << 3, ++ XSCDV_OBJ_DM = 1 << 4, ++ XSCDV_OBJ_AH = 1 << 5, ++ XSCDV_OBJ_PD = 1 << 6, ++}; ++ ++enum xscdv_wq_init_attr_mask { ++ XSCDV_WQ_INIT_ATTR_MASK_STRIDING_RQ = 1 << 0, ++}; ++ ++struct xscdv_striding_rq_init_attr { ++ uint32_t single_stride_log_num_of_bytes; ++ uint32_t single_wqe_log_num_of_strides; ++ uint8_t two_byte_shift_en; ++}; ++ ++struct xscdv_wq_init_attr { ++ uint64_t comp_mask; /* Use enum xscdv_wq_init_attr_mask */ ++ struct xscdv_striding_rq_init_attr striding_rq_attrs; ++}; ++ ++/* ++ * This function creates a work queue object with extra properties ++ * defined by xscdv_wq_init_attr struct. ++ * ++ * For each bit in the comp_mask, a field in xscdv_wq_init_attr ++ * should follow. ++ * ++ * XSCDV_WQ_INIT_ATTR_MASK_STRIDING_RQ: Create a work queue with ++ * striding RQ capabilities. ++ * - single_stride_log_num_of_bytes represents the size of each stride in the ++ * WQE and its value should be between min_single_stride_log_num_of_bytes ++ * and max_single_stride_log_num_of_bytes that are reported in ++ * xscdv_query_device. ++ * - single_wqe_log_num_of_strides represents the number of strides in each WQE. ++ * Its value should be between min_single_wqe_log_num_of_strides and ++ * max_single_wqe_log_num_of_strides that are reported in xscdv_query_device. ++ * - two_byte_shift_en: When enabled, hardware pads 2 bytes of zeroes ++ * before writing the message to memory (e.g. for IP alignment) ++ */ ++struct ibv_wq *xscdv_create_wq(struct ibv_context *context, ++ struct ibv_wq_init_attr *wq_init_attr, ++ struct xscdv_wq_init_attr *xwq_attr); ++/* ++ * This function will initialize xscdv_xxx structs based on supplied type. ++ * The information for initialization is taken from ibv_xx structs supplied ++ * as part of input. ++ * ++ * Request information of CQ marks its owned by DV for all consumer index ++ * related actions. ++ * ++ * The initialization type can be combination of several types together. ++ * ++ * Return: 0 in case of success. ++ */ ++int xscdv_init_obj(struct xscdv_obj *obj, uint64_t obj_type); ++ ++enum { ++ XSC_OPCODE_NOP = 0x00, ++ XSC_OPCODE_SEND_INVAL = 0x01, ++ XSC_OPCODE_RDMA_WRITE = 0x08, ++ XSC_OPCODE_RDMA_WRITE_IMM = 0x09, ++ XSC_OPCODE_SEND = 0x0a, ++ XSC_OPCODE_SEND_IMM = 0x0b, ++ XSC_OPCODE_TSO = 0x0e, ++ XSC_OPCODE_RDMA_READ = 0x10, ++ XSC_OPCODE_ATOMIC_CS = 0x11, ++ XSC_OPCODE_ATOMIC_FA = 0x12, ++ XSC_OPCODE_ATOMIC_MASKED_CS = 0x14, ++ XSC_OPCODE_ATOMIC_MASKED_FA = 0x15, ++ XSC_OPCODE_FMR = 0x19, ++ XSC_OPCODE_LOCAL_INVAL = 0x1b, ++ XSC_OPCODE_CONFIG_CMD = 0x1f, ++ XSC_OPCODE_UMR = 0x25, ++ XSC_OPCODE_TAG_MATCHING = 0x28 ++}; ++ ++enum { ++ XSC_CQE_L2_OK = 1 << 0, ++ XSC_CQE_L3_OK = 1 << 1, ++ XSC_CQE_L4_OK = 1 << 2, ++}; ++ ++enum { ++ XSC_CQE_L3_HDR_TYPE_NONE = 0x0, ++ XSC_CQE_L3_HDR_TYPE_IPV6 = 0x1, ++ XSC_CQE_L3_HDR_TYPE_IPV4 = 0x2, ++}; ++ ++enum { ++ XSC_CQE_OWNER_MASK = 1, ++ XSC_CQE_REQ = 0, ++ XSC_CQE_RESP_WR_IMM = 1, ++ XSC_CQE_RESP_SEND = 2, ++ XSC_CQE_RESP_SEND_IMM = 3, ++ XSC_CQE_RESP_SEND_INV = 4, ++ XSC_CQE_RESIZE_CQ = 5, ++ XSC_CQE_NO_PACKET = 6, ++ XSC_CQE_REQ_ERR = 13, ++ XSC_CQE_RESP_ERR = 14, ++ XSC_CQE_INVALID = 15, ++}; ++ ++struct xsc_err_cqe { ++ uint8_t rsvd0[32]; ++ uint32_t srqn; ++ uint8_t rsvd1[18]; ++ uint8_t vendor_err_synd; ++ uint8_t syndrome; ++ uint32_t s_wqe_opcode_qpn; ++ uint16_t wqe_counter; ++ uint8_t signature; ++ uint8_t op_own; ++}; ++ ++struct xsc_tm_cqe { ++ __be32 success; ++ __be16 hw_phase_cnt; ++ uint8_t rsvd0[12]; ++}; ++ ++struct xsc_cqe64 { ++ union { ++ struct { ++ uint8_t rsvd0[2]; ++ __be16 wqe_id; ++ uint8_t rsvd4[13]; ++ uint8_t ml_path; ++ uint8_t rsvd20[4]; ++ __be16 slid; ++ __be32 flags_rqpn; ++ uint8_t hds_ip_ext; ++ uint8_t l4_hdr_type_etc; ++ __be16 vlan_info; ++ }; ++ struct xsc_tm_cqe tm_cqe; ++ /* TMH is scattered to CQE upon match */ ++ struct ibv_tmh tmh; ++ }; ++ __be32 srqn_uidx; ++ __be32 imm_inval_pkey; ++ uint8_t app; ++ uint8_t app_op; ++ __be16 app_info; ++ __be32 byte_cnt; ++ __be64 timestamp; ++ __be32 sop_drop_qpn; ++ __be16 wqe_counter; ++ uint8_t signature; ++ uint8_t op_own; ++}; ++ ++enum xscdv_cqe_comp_res_format { ++ XSCDV_CQE_RES_FORMAT_HASH = 1 << 0, ++ XSCDV_CQE_RES_FORMAT_CSUM = 1 << 1, ++ XSCDV_CQE_RES_FORMAT_CSUM_STRIDX = 1 << 2, ++}; ++ ++enum xscdv_sw_parsing_offloads { ++ XSCDV_SW_PARSING = 1 << 0, ++ XSCDV_SW_PARSING_CSUM = 1 << 1, ++ XSCDV_SW_PARSING_LSO = 1 << 2, ++}; ++ ++static XSCDV_ALWAYS_INLINE ++uint8_t xscdv_get_cqe_owner(struct xsc_cqe64 *cqe) ++{ ++ return cqe->op_own & 0x1; ++} ++ ++static XSCDV_ALWAYS_INLINE ++void xscdv_set_cqe_owner(struct xsc_cqe64 *cqe, uint8_t val) ++{ ++ cqe->op_own = (val & 0x1) | (cqe->op_own & ~0x1); ++} ++ ++/* Solicited event */ ++static XSCDV_ALWAYS_INLINE ++uint8_t xscdv_get_cqe_se(struct xsc_cqe64 *cqe) ++{ ++ return (cqe->op_own >> 1) & 0x1; ++} ++ ++static XSCDV_ALWAYS_INLINE ++uint8_t xscdv_get_cqe_format(struct xsc_cqe64 *cqe) ++{ ++ return (cqe->op_own >> 2) & 0x3; ++} ++ ++static XSCDV_ALWAYS_INLINE ++uint8_t xscdv_get_cqe_opcode(struct xsc_cqe64 *cqe) ++{ ++ return cqe->op_own >> 4; ++} ++ ++/* ++ * WQE related part ++ */ ++enum { ++ XSC_INVALID_LKEY = 0x100, ++}; ++ ++enum { ++ XSC_SEND_WQE_BB = 64, ++ XSC_SEND_WQE_SHIFT = 6, ++}; ++ ++struct xsc_wqe_srq_next_seg { ++ uint8_t rsvd0[2]; ++ __be16 next_wqe_index; ++ uint8_t signature; ++ uint8_t rsvd1[11]; ++}; ++ ++struct xsc_wqe_ctrl_seg { ++ __be32 opmod_idx_opcode; ++ __be32 qpn_ds; ++ uint8_t signature; ++ uint8_t rsvd[2]; ++ uint8_t fm_ce_se; ++ __be32 imm; ++}; ++ ++struct xsc_wqe_av { ++ union { ++ struct { ++ __be32 qkey; ++ __be32 reserved; ++ } qkey; ++ __be64 dc_key; ++ } key; ++ __be32 dqp_dct; ++ uint8_t stat_rate_sl; ++ uint8_t fl_mlid; ++ __be16 rlid; ++ uint8_t reserved0[4]; ++ uint8_t rmac[6]; ++ uint8_t tclass; ++ uint8_t hop_limit; ++ __be32 grh_gid_fl; ++ uint8_t rgid[16]; ++}; ++ ++struct xsc_wqe_datagram_seg { ++ struct xsc_wqe_av av; ++}; ++ ++struct xsc_wqe_raddr_seg { ++ __be64 raddr; ++ __be32 rkey; ++ __be32 reserved; ++}; ++ ++struct xsc_wqe_atomic_seg { ++ __be64 swap_add; ++ __be64 compare; ++}; ++ ++struct xsc_wqe_inl_data_seg { ++ uint32_t byte_count; ++}; ++ ++struct xsc_wqe_eth_seg { ++ __be32 rsvd0; ++ uint8_t cs_flags; ++ uint8_t rsvd1; ++ __be16 mss; ++ __be32 rsvd2; ++ __be16 inline_hdr_sz; ++ uint8_t inline_hdr_start[2]; ++ uint8_t inline_hdr[16]; ++}; ++ ++/* ++ * Control segment - contains some control information for the current WQE. ++ * ++ * Output: ++ * seg - control segment to be filled ++ * Input: ++ * pi - WQEBB number of the first block of this WQE. ++ * This number should wrap at 0xffff, regardless of ++ * size of the WQ. ++ * opcode - Opcode of this WQE. Encodes the type of operation ++ * to be executed on the QP. ++ * opmod - Opcode modifier. ++ * qp_num - QP/SQ number this WQE is posted to. ++ * fm_ce_se - FM (fence mode), CE (completion and event mode) ++ * and SE (solicited event). ++ * ds - WQE size in octowords (16-byte units). DS accounts for all ++ * the segments in the WQE as summarized in WQE construction. ++ * signature - WQE signature. ++ * imm - Immediate data/Invalidation key/UMR mkey. ++ */ ++static XSCDV_ALWAYS_INLINE ++void xscdv_set_ctrl_seg(struct xsc_wqe_ctrl_seg *seg, uint16_t pi, ++ uint8_t opcode, uint8_t opmod, uint32_t qp_num, ++ uint8_t fm_ce_se, uint8_t ds, ++ uint8_t signature, uint32_t imm) ++{ ++ seg->opmod_idx_opcode = htobe32(((uint32_t)opmod << 24) | ((uint32_t)pi << 8) | opcode); ++ seg->qpn_ds = htobe32((qp_num << 8) | ds); ++ seg->fm_ce_se = fm_ce_se; ++ seg->signature = signature; ++ /* ++ * The caller should prepare "imm" in advance based on WR opcode. ++ * For IBV_WR_SEND_WITH_IMM and IBV_WR_RDMA_WRITE_WITH_IMM, ++ * the "imm" should be assigned as is. ++ * For the IBV_WR_SEND_WITH_INV, it should be htobe32(imm). ++ */ ++ seg->imm = imm; ++} ++ ++/* x86 optimized version of xscdv_set_ctrl_seg() ++ * ++ * This is useful when doing calculations on large data sets ++ * for parallel calculations. ++ * ++ * It doesn't suit for serialized algorithms. ++ */ ++#if defined(__SSE3__) ++static XSCDV_ALWAYS_INLINE ++void xscdv_x86_set_ctrl_seg(struct xsc_wqe_ctrl_seg *seg, uint16_t pi, ++ uint8_t opcode, uint8_t opmod, uint32_t qp_num, ++ uint8_t fm_ce_se, uint8_t ds, ++ uint8_t signature, uint32_t imm) ++{ ++ __m128i val = _mm_set_epi32(imm, qp_num, (ds << 16) | pi, ++ (signature << 24) | (opcode << 16) | (opmod << 8) | fm_ce_se); ++ __m128i mask = _mm_set_epi8(15, 14, 13, 12, /* immediate */ ++ 0, /* signal/fence_mode */ ++#if CHAR_MIN ++ -128, -128, /* reserved */ ++#else ++ 0x80, 0x80, /* reserved */ ++#endif ++ 3, /* signature */ ++ 6, /* data size */ ++ 8, 9, 10, /* QP num */ ++ 2, /* opcode */ ++ 4, 5, /* sw_pi in BE */ ++ 1 /* opmod */ ++ ); ++ *(__m128i *) seg = _mm_shuffle_epi8(val, mask); ++} ++#endif /* defined(__SSE3__) */ ++ ++/* ++ * Datagram Segment - contains address information required in order ++ * to form a datagram message. ++ * ++ * Output: ++ * seg - datagram segment to be filled. ++ * Input: ++ * key - Q_key/access key. ++ * dqp_dct - Destination QP number for UD and DCT for DC. ++ * ext - Address vector extension. ++ * stat_rate_sl - Maximum static rate control, SL/ethernet priority. ++ * fl_mlid - Force loopback and source LID for IB. ++ * rlid - Remote LID ++ * rmac - Remote MAC ++ * tclass - GRH tclass/IPv6 tclass/IPv4 ToS ++ * hop_limit - GRH hop limit/IPv6 hop limit/IPv4 TTL ++ * grh_gid_fi - GRH, source GID address and IPv6 flow label. ++ * rgid - Remote GID/IP address. ++ */ ++static XSCDV_ALWAYS_INLINE ++void xscdv_set_dgram_seg(struct xsc_wqe_datagram_seg *seg, ++ uint64_t key, uint32_t dqp_dct, ++ uint8_t ext, uint8_t stat_rate_sl, ++ uint8_t fl_mlid, uint16_t rlid, ++ uint8_t *rmac, uint8_t tclass, ++ uint8_t hop_limit, uint32_t grh_gid_fi, ++ uint8_t *rgid) ++{ ++ ++ /* Always put 64 bits, in q_key, the reserved part will be 0 */ ++ seg->av.key.dc_key = htobe64(key); ++ seg->av.dqp_dct = htobe32(((uint32_t)ext << 31) | dqp_dct); ++ seg->av.stat_rate_sl = stat_rate_sl; ++ seg->av.fl_mlid = fl_mlid; ++ seg->av.rlid = htobe16(rlid); ++ memcpy(seg->av.rmac, rmac, 6); ++ seg->av.tclass = tclass; ++ seg->av.hop_limit = hop_limit; ++ seg->av.grh_gid_fl = htobe32(grh_gid_fi); ++ memcpy(seg->av.rgid, rgid, 16); ++} ++ ++/* ++ * Eth Segment - contains packet headers and information for stateless L2, L3, L4 offloading. ++ * ++ * Output: ++ * seg - Eth segment to be filled. ++ * Input: ++ * cs_flags - l3cs/l3cs_inner/l4cs/l4cs_inner. ++ * mss - Maximum segment size. For TSO WQEs, the number of bytes ++ * in the TCP payload to be transmitted in each packet. Must ++ * be 0 on non TSO WQEs. ++ * inline_hdr_sz - Length of the inlined packet headers. ++ * inline_hdr_start - Inlined packet header. ++ */ ++static XSCDV_ALWAYS_INLINE ++void xscdv_set_eth_seg(struct xsc_wqe_eth_seg *seg, uint8_t cs_flags, ++ uint16_t mss, uint16_t inline_hdr_sz, ++ uint8_t *inline_hdr_start) ++{ ++ seg->cs_flags = cs_flags; ++ seg->mss = htobe16(mss); ++ seg->inline_hdr_sz = htobe16(inline_hdr_sz); ++ memcpy(seg->inline_hdr_start, inline_hdr_start, inline_hdr_sz); ++} ++ ++enum xscdv_set_ctx_attr_type { ++ XSCDV_CTX_ATTR_BUF_ALLOCATORS = 1, ++}; ++ ++enum { ++ XSC_MMAP_GET_REGULAR_PAGES_CMD = 0, ++ XSC_MMAP_GET_NC_PAGES_CMD = 3, ++}; ++ ++struct xscdv_ctx_allocators { ++ void *(*alloc)(size_t size, void *priv_data); ++ void (*free)(void *ptr, void *priv_data); ++ void *data; ++}; ++ ++/* ++ * Generic context attributes set API ++ * ++ * Returns 0 on success, or the value of errno on failure ++ * (which indicates the failure reason). ++ */ ++int xscdv_set_context_attr(struct ibv_context *context, ++ enum xscdv_set_ctx_attr_type type, void *attr); ++ ++struct xscdv_clock_info { ++ uint64_t nsec; ++ uint64_t last_cycles; ++ uint64_t frac; ++ uint32_t mult; ++ uint32_t shift; ++ uint64_t mask; ++}; ++ ++/* ++ * Get xsc core clock info ++ * ++ * Output: ++ * clock_info - clock info to be filled ++ * Input: ++ * context - device context ++ * ++ * Return: 0 on success, or the value of errno on failure ++ */ ++int xscdv_get_clock_info(struct ibv_context *context, ++ struct xscdv_clock_info *clock_info); ++ ++/* ++ * Translate device timestamp to nano-sec ++ * ++ * Input: ++ * clock_info - clock info to be filled ++ * device_timestamp - timestamp to translate ++ * ++ * Return: nano-sec ++ */ ++static inline uint64_t xscdv_ts_to_ns(struct xscdv_clock_info *clock_info, ++ uint64_t device_timestamp) ++{ ++ uint64_t delta, nsec; ++ ++ /* ++ * device_timestamp & cycles are the free running 'mask' bit counters ++ * from the hardware hca_core_clock clock. ++ */ ++ delta = (device_timestamp - clock_info->last_cycles) & clock_info->mask; ++ nsec = clock_info->nsec; ++ ++ /* ++ * Guess if the device_timestamp is more recent than ++ * clock_info->last_cycles, if not (too far in the future) treat ++ * it as old time stamp. This will break every max_clock_info_update_nsec. ++ */ ++ ++ if (delta > clock_info->mask / 2) { ++ delta = (clock_info->last_cycles - device_timestamp) & ++ clock_info->mask; ++ nsec -= ((delta * clock_info->mult) - clock_info->frac) >> ++ clock_info->shift; ++ } else { ++ nsec += ((delta * clock_info->mult) + clock_info->frac) >> ++ clock_info->shift; ++ } ++ ++ return nsec; ++} ++ ++enum xscdv_context_attr_flags { ++ XSCDV_CONTEXT_FLAGS_DEVX = 1 << 0, ++}; ++ ++struct xscdv_context_attr { ++ uint32_t flags; /* Use enum xscdv_context_attr_flags */ ++ uint64_t comp_mask; ++}; ++ ++struct ibv_context * ++xscdv_open_device(struct ibv_device *device, struct xscdv_context_attr *attr); ++ ++struct xscdv_devx_obj; ++ ++struct xscdv_devx_obj * ++xscdv_devx_obj_create(struct ibv_context *context, const void *in, size_t inlen, ++ void *out, size_t outlen); ++int xscdv_devx_obj_query(struct xscdv_devx_obj *obj, const void *in, size_t inlen, ++ void *out, size_t outlen); ++int xscdv_devx_obj_modify(struct xscdv_devx_obj *obj, const void *in, size_t inlen, ++ void *out, size_t outlen); ++int xscdv_devx_obj_destroy(struct xscdv_devx_obj *obj); ++int xscdv_devx_general_cmd(struct ibv_context *context, const void *in, size_t inlen, ++ void *out, size_t outlen); ++ ++struct xscdv_devx_umem { ++ uint32_t umem_id; ++}; ++ ++struct xscdv_devx_umem * ++xscdv_devx_umem_reg(struct ibv_context *ctx, void *addr, size_t size, uint32_t access); ++int xscdv_devx_umem_dereg(struct xscdv_devx_umem *umem); ++int xscdv_devx_query_eqn(struct ibv_context *context, uint32_t vector, ++ uint32_t *eqn); ++ ++#ifdef __cplusplus ++} ++#endif ++ ++#endif /* _XSCDV_H_ */ +diff --git a/redhat/rdma-core.spec b/redhat/rdma-core.spec +index 6d82a64..867c896 100644 +--- a/redhat/rdma-core.spec ++++ b/redhat/rdma-core.spec +@@ -176,6 +176,8 @@ Provides: libocrdma = %{version}-%{release} + Obsoletes: libocrdma < %{version}-%{release} + Provides: librxe = %{version}-%{release} + Obsoletes: librxe < %{version}-%{release} ++Provides: libxscale = %{version}-%{release} ++Obsoletes: libxscale < %{version}-%{release} + Provides: libzrdma = %{version}-%{release} + Obsoletes: libzrdma < %{version}-%{release} + +@@ -204,6 +206,7 @@ Device-specific plug-in ibverbs userspace drivers are included: + - librxe: A software implementation of the RoCE protocol + - libsiw: A software implementation of the iWarp protocol + - libvmw_pvrdma: VMware paravirtual RDMA device ++- libxscale: Yunsilicon RDMA device + - libzrdma: ZTE Connection RDMA + + %package -n libibverbs-utils +@@ -586,6 +589,7 @@ fi + %{_libdir}/libmana.so.* + %{_libdir}/libmlx5.so.* + %{_libdir}/libmlx4.so.* ++%{_libdir}/libxscale.so.* + %{_libdir}/libzrdma.so.* + %config(noreplace) %{_sysconfdir}/libibverbs.d/*.driver + %doc %{_docdir}/%{name}/libibverbs.md +-- +2.18.4 + + diff --git a/0045-libhns-fix-incorrectly-using-fixed-pagesize.patch b/0045-libhns-fix-incorrectly-using-fixed-pagesize.patch new file mode 100644 index 0000000000000000000000000000000000000000..717b340f69bd074856a455effb9506969bb9a076 --- /dev/null +++ b/0045-libhns-fix-incorrectly-using-fixed-pagesize.patch @@ -0,0 +1,110 @@ +From 7bd22fed52a1828b0d44a990b52266e9e1d92b5d Mon Sep 17 00:00:00 2001 +From: Chengchang Tang +Date: Tue, 30 Jan 2024 21:00:46 +0800 +Subject: [PATCH 45/46] libhns: fix incorrectly using fixed pagesize + +driver inclusion +category: bugfix +bugzilla: https://gitee.com/src-openeuler/rdma-core/issues/IB66RT + +------------------------------------------------------------------ + +Currently, actually used page size is fixed, causing the flexible wqe +buffer size feature to not take effect. + +Fixes: 9ab7600d832b ("libhns: Add support for attaching QP's WQE buffer") +Signed-off-by: Chengchang Tang +Signed-off-by: Xinghai Cen +--- + providers/hns/hns_roce_u_verbs.c | 24 +++++++++++++----------- + 1 file changed, 13 insertions(+), 11 deletions(-) + +diff --git a/providers/hns/hns_roce_u_verbs.c b/providers/hns/hns_roce_u_verbs.c +index bce215e..848f836 100644 +--- a/providers/hns/hns_roce_u_verbs.c ++++ b/providers/hns/hns_roce_u_verbs.c +@@ -1296,14 +1296,14 @@ static void free_recv_rinl_buf(struct hns_roce_rinl_buf *rinl_buf) + + static void get_best_multi_region_pg_shift(struct hns_roce_device *hr_dev, + struct hns_roce_context *ctx, +- struct hns_roce_qp *qp) ++ struct hns_roce_qp *qp, bool dca_en) + { + uint32_t ext_sge_size; + uint32_t sq_size; + uint32_t rq_size; + uint8_t pg_shift; + +- if (!(ctx->config & HNS_ROCE_UCTX_RSP_DYN_QP_PGSZ)) { ++ if (!(ctx->config & HNS_ROCE_UCTX_RSP_DYN_QP_PGSZ || dca_en)) { + qp->pageshift = HNS_HW_PAGE_SHIFT; + return; + } +@@ -1334,7 +1334,7 @@ static void get_best_multi_region_pg_shift(struct hns_roce_device *hr_dev, + + static int calc_qp_buff_size(struct hns_roce_device *hr_dev, + struct hns_roce_context *ctx, +- struct hns_roce_qp *qp) ++ struct hns_roce_qp *qp, bool dca_en) + { + struct hns_roce_wq *sq = &qp->sq; + struct hns_roce_wq *rq = &qp->rq; +@@ -1342,7 +1342,7 @@ static int calc_qp_buff_size(struct hns_roce_device *hr_dev, + unsigned int size; + + qp->buf_size = 0; +- get_best_multi_region_pg_shift(hr_dev, ctx, qp); ++ get_best_multi_region_pg_shift(hr_dev, ctx, qp, dca_en); + page_size = 1 << qp->pageshift; + + /* SQ WQE */ +@@ -1384,7 +1384,7 @@ static inline bool check_qp_support_dca(struct hns_roce_dca_ctx *dca_ctx, + if (hns_attr && + (hns_attr->comp_mask & HNSDV_QP_INIT_ATTR_MASK_QP_CREATE_FLAGS) && + (hns_attr->create_flags & HNSDV_QP_CREATE_ENABLE_DCA_MODE)) +- return true; ++ return dca_ctx->max_size > 0; + + return false; + } +@@ -1408,9 +1408,12 @@ static int qp_alloc_wqe(struct ibv_qp_init_attr_ex *attr, + struct hns_roce_qp *qp, struct hns_roce_context *ctx) + { + struct hns_roce_device *hr_dev = to_hr_dev(ctx->ibv_ctx.context.device); ++ bool dca_en = check_qp_support_dca(&ctx->dca_ctx, attr, hns_attr); ++ int ret; + +- if (calc_qp_buff_size(hr_dev, ctx, qp)) +- return -EINVAL; ++ ret = calc_qp_buff_size(hr_dev, ctx, qp, dca_en); ++ if (ret) ++ return ret; + + qp->sq.wrid = malloc(qp->sq.wqe_cnt * sizeof(uint64_t)); + if (!qp->sq.wrid) +@@ -1428,19 +1431,18 @@ static int qp_alloc_wqe(struct ibv_qp_init_attr_ex *attr, + goto err_alloc; + } + +- if (check_qp_support_dca(&ctx->dca_ctx, attr, hns_attr) && +- ctx->dca_ctx.max_size > 0) { ++ if (check_qp_support_dca(&ctx->dca_ctx, attr, hns_attr)) { + /* when DCA is enabled, use a buffer list to store page addr */ + qp->buf.buf = NULL; + qp->dca_wqe.max_cnt = hr_hw_page_count(qp->buf_size); +- qp->dca_wqe.shift = HNS_HW_PAGE_SHIFT; ++ qp->dca_wqe.shift = qp->pageshift; + qp->dca_wqe.bufs = calloc(qp->dca_wqe.max_cnt, sizeof(void *)); + if (!qp->dca_wqe.bufs) + goto err_alloc; + verbs_debug(&ctx->ibv_ctx, "alloc DCA buf.\n"); + } else { + if (hns_roce_alloc_buf(&qp->buf, qp->buf_size, +- HNS_HW_PAGE_SIZE)) ++ 1 << qp->pageshift)) + goto err_alloc; + } + +-- +2.33.0 + diff --git a/0046-libhns-fix-missing-new-IO-support-for-DCA.patch b/0046-libhns-fix-missing-new-IO-support-for-DCA.patch new file mode 100644 index 0000000000000000000000000000000000000000..1b98200e694134f738912597ce7cb53a439ec862 --- /dev/null +++ b/0046-libhns-fix-missing-new-IO-support-for-DCA.patch @@ -0,0 +1,55 @@ +From 199b2f78ff9eeeb25acc78f9da495ae58877807a Mon Sep 17 00:00:00 2001 +From: Chengchang Tang +Date: Tue, 30 Jan 2024 21:28:44 +0800 +Subject: [PATCH 46/46] libhns: fix missing new IO support for DCA + +driver inclusion +category: bugfix +bugzilla: https://gitee.com/src-openeuler/rdma-core/issues/IBSL67 + +------------------------------------------------------------------ + +New IO related support has been missed for DCA. + +Fixes: 9ab7600d832b ("libhns: Add support for attaching QP's WQE buffer") +Signed-off-by: Chengchang Tang +Signed-off-by: Xinghai Cen +--- + providers/hns/hns_roce_u_hw_v2.c | 7 +++++++ + 1 file changed, 7 insertions(+) + +diff --git a/providers/hns/hns_roce_u_hw_v2.c b/providers/hns/hns_roce_u_hw_v2.c +index aadea7a..3137111 100644 +--- a/providers/hns/hns_roce_u_hw_v2.c ++++ b/providers/hns/hns_roce_u_hw_v2.c +@@ -2191,6 +2191,8 @@ static int wc_start_poll_cq(struct ibv_cq_ex *current, + } + + err = hns_roce_poll_one(ctx, &qp, cq, NULL); ++ if (qp && check_dca_detach_enable(qp)) ++ dca_detach_qp_buf(ctx, qp); + + start_poll_done: + if (err != V2_CQ_OK) +@@ -2210,6 +2212,8 @@ static int wc_next_poll_cq(struct ibv_cq_ex *current) + return hns_roce_poll_one_swc(cq, NULL); + + err = hns_roce_poll_one(ctx, &qp, cq, NULL); ++ if (qp && check_dca_detach_enable(qp)) ++ dca_detach_qp_buf(ctx, qp); + if (err != V2_CQ_OK) + return err; + +@@ -2408,6 +2412,9 @@ init_rc_wqe(struct hns_roce_qp *qp, uint64_t wr_id, unsigned int opcode) + hr_reg_write_bool(wqe, RCWQE_SE, send_flags & IBV_SEND_SOLICITED); + hr_reg_clear(wqe, RCWQE_INLINE); + ++ if (check_qp_dca_enable(qp)) ++ fill_rc_dca_fields(qp->verbs_qp.qp.qp_num, wqe); ++ + qp->sq.wrid[wqe_idx] = wr_id; + qp->cur_wqe = wqe; + +-- +2.33.0 + diff --git a/0047-libzrdma-Fix-wqe-polarity-set-error.patch b/0047-libzrdma-Fix-wqe-polarity-set-error.patch new file mode 100644 index 0000000000000000000000000000000000000000..19439ff91c99dbe4607346716e0df71d5e46f727 --- /dev/null +++ b/0047-libzrdma-Fix-wqe-polarity-set-error.patch @@ -0,0 +1,245 @@ +From 232ae986da9c9995be0ae39bdf5b2145250c22c9 Mon Sep 17 00:00:00 2001 +From: =?UTF-8?q?=E6=9D=8E=E5=AF=8C=E8=89=B3?= +Date: Fri, 28 Mar 2025 11:04:33 +0800 +Subject: [PATCH] Fix: wqe polarity set error +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +Signed-off-by: 李富艳 +--- + providers/zrdma/zxdh_defs.h | 3 -- + providers/zrdma/zxdh_hw.c | 62 +++++++----------------------------- + providers/zrdma/zxdh_verbs.h | 1 - + 3 files changed, 11 insertions(+), 55 deletions(-) + +diff --git a/providers/zrdma/zxdh_defs.h b/providers/zrdma/zxdh_defs.h +index eaf73ca..3863fb9 100644 +--- a/providers/zrdma/zxdh_defs.h ++++ b/providers/zrdma/zxdh_defs.h +@@ -313,9 +313,6 @@ + #define ZXDH_RING_FREE_QUANTA(_ring) \ + (((_ring).size - ZXDH_RING_USED_QUANTA(_ring) - 1)) + +-#define ZXDH_SQ_RING_FREE_QUANTA(_ring) \ +- (((_ring).size - ZXDH_RING_USED_QUANTA(_ring) - 257)) +- + #define ZXDH_ATOMIC_RING_MOVE_HEAD(_ring, index, _retcode) \ + { \ + index = ZXDH_RING_CURRENT_HEAD(_ring); \ +diff --git a/providers/zrdma/zxdh_hw.c b/providers/zrdma/zxdh_hw.c +index ed577a9..073b198 100644 +--- a/providers/zrdma/zxdh_hw.c ++++ b/providers/zrdma/zxdh_hw.c +@@ -20,13 +20,14 @@ + #include + #define ERROR_CODE_VALUE 65 + +-static void qp_tx_psn_add(__u32 *x, __u32 y, __u16 mtu) ++static inline void qp_tx_psn_add(__u32 *x, __u32 y, __u16 mtu) + { + if (y == 0) { + *x = (*x + 1) & 0xffffff; + return; + } +- *x = (*x + ((y % mtu) ? (y / mtu + 1) : y / mtu)) & 0xffffff; ++ __u32 chunks = (y + mtu - 1) / mtu; ++ *x = (*x + chunks) & 0xffffff; + } + + int zxdh_get_write_imm_split_switch(void) +@@ -95,26 +96,6 @@ static enum zxdh_status_code zxdh_nop_1(struct zxdh_qp *qp) + return 0; + } + +-/** +- * zxdh_clr_wqes - clear next 128 sq entries +- * @qp: hw qp ptr +- * @qp_wqe_idx: wqe_idx +- */ +-void zxdh_clr_wqes(struct zxdh_qp *qp, __u32 qp_wqe_idx) +-{ +- __le64 *wqe; +- __u32 wqe_idx; +- +- if (!(qp_wqe_idx & 0x7F)) { +- wqe_idx = (qp_wqe_idx + 128) % qp->sq_ring.size; +- wqe = qp->sq_base[wqe_idx].elem; +- if (wqe_idx) +- memset(wqe, qp->swqe_polarity ? 0 : 0xFF, 0x1000); +- else +- memset(wqe, qp->swqe_polarity ? 0xFF : 0, 0x1000); +- } +-} +- + /** + * zxdh_qp_post_wr - ring doorbell + * @qp: hw qp ptr +@@ -197,14 +178,13 @@ __le64 *zxdh_qp_get_next_send_wqe(struct zxdh_qp *qp, __u32 *wqe_idx, + avail_quanta = ZXDH_MAX_SQ_WQES_PER_PAGE - + (ZXDH_RING_CURRENT_HEAD(qp->sq_ring) % + ZXDH_MAX_SQ_WQES_PER_PAGE); +- if (quanta <= avail_quanta) { ++ if (likely(quanta <= avail_quanta)) { + /* WR fits in current chunk */ +- if (quanta > ZXDH_SQ_RING_FREE_QUANTA(qp->sq_ring)) ++ if (unlikely(quanta > ZXDH_RING_FREE_QUANTA(qp->sq_ring))) + return NULL; + } else { + /* Need to pad with NOP */ +- if (quanta + avail_quanta > +- ZXDH_SQ_RING_FREE_QUANTA(qp->sq_ring)) ++ if (quanta + avail_quanta > ZXDH_RING_FREE_QUANTA(qp->sq_ring)) + return NULL; + + for (i = 0; i < avail_quanta; i++) { +@@ -287,8 +267,6 @@ zxdh_post_rdma_write(struct zxdh_qp *qp, struct zxdh_post_sq_info *info, + if (!wqe) + return ZXDH_ERR_QP_TOOMANY_WRS_POSTED; + +- zxdh_clr_wqes(qp, wqe_idx); +- + if (op_info->num_lo_sges) { + set_64bit_val( + wqe, 16, +@@ -635,8 +613,6 @@ static enum zxdh_status_code zxdh_post_rdma_read(struct zxdh_qp *qp, + if (!wqe) + return ZXDH_ERR_QP_TOOMANY_WRS_POSTED; + +- zxdh_clr_wqes(qp, wqe_idx); +- + addl_frag_cnt = + op_info->num_lo_sges > 1 ? (op_info->num_lo_sges - 1) : 0; + local_fence |= info->local_fence; +@@ -817,8 +793,6 @@ enum zxdh_status_code zxdh_rc_send(struct zxdh_qp *qp, + if (!wqe) + return ZXDH_ERR_QP_TOOMANY_WRS_POSTED; + +- zxdh_clr_wqes(qp, wqe_idx); +- + read_fence |= info->read_fence; + addl_frag_cnt = op_info->num_sges > 1 ? (op_info->num_sges - 1) : 0; + if (op_info->num_sges) { +@@ -975,7 +949,7 @@ enum zxdh_status_code zxdh_ud_send(struct zxdh_qp *qp, + if (ret_code) + return ret_code; + +- if (quanta > ZXDH_SQ_RING_FREE_QUANTA(qp->sq_ring)) ++ if (quanta > ZXDH_RING_FREE_QUANTA(qp->sq_ring)) + return ZXDH_ERR_QP_TOOMANY_WRS_POSTED; + + wqe_idx = ZXDH_RING_CURRENT_HEAD(qp->sq_ring); +@@ -989,8 +963,6 @@ enum zxdh_status_code zxdh_ud_send(struct zxdh_qp *qp, + qp->sq_wrtrk_array[wqe_idx].wr_len = total_size; + qp->sq_wrtrk_array[wqe_idx].quanta = quanta; + +- zxdh_clr_wqes(qp, wqe_idx); +- + read_fence |= info->read_fence; + addl_frag_cnt = op_info->num_sges > 1 ? (op_info->num_sges - 1) : 0; + hdr = FIELD_PREP(ZXDHQPSQ_VALID, qp->swqe_polarity) | +@@ -1281,8 +1253,6 @@ enum zxdh_status_code zxdh_inline_rdma_write(struct zxdh_qp *qp, + if (!wqe) + return ZXDH_ERR_QP_TOOMANY_WRS_POSTED; + +- zxdh_clr_wqes(qp, wqe_idx); +- + read_fence |= info->read_fence; + hdr = FIELD_PREP(ZXDHQPSQ_VALID, qp->swqe_polarity) | + FIELD_PREP(ZXDHQPSQ_OPCODE, info->op_type) | +@@ -1293,7 +1263,7 @@ enum zxdh_status_code zxdh_inline_rdma_write(struct zxdh_qp *qp, + FIELD_PREP(ZXDHQPSQ_IMMDATAFLAG, imm_data_flag) | + FIELD_PREP(ZXDHQPSQ_WRITE_INLINEDATAFLAG, 1) | + FIELD_PREP(ZXDHQPSQ_WRITE_INLINEDATALEN, op_info->len) | +- FIELD_PREP(ZXDHQPSQ_ADDFRAGCNT, quanta - 1) | ++ FIELD_PREP(ZXDHQPSQ_ADDFRAGCNT, (__u16)(quanta - 1)) | + FIELD_PREP(ZXDHQPSQ_REMSTAG, op_info->rem_addr.stag); + set_64bit_val(wqe, 24, + FIELD_PREP(ZXDHQPSQ_FRAG_TO, op_info->rem_addr.tag_off)); +@@ -1355,8 +1325,6 @@ enum zxdh_status_code zxdh_rc_inline_send(struct zxdh_qp *qp, + if (!wqe) + return ZXDH_ERR_QP_TOOMANY_WRS_POSTED; + +- zxdh_clr_wqes(qp, wqe_idx); +- + read_fence |= info->read_fence; + hdr = FIELD_PREP(ZXDHQPSQ_VALID, qp->swqe_polarity) | + FIELD_PREP(ZXDHQPSQ_OPCODE, info->op_type) | +@@ -1364,7 +1332,7 @@ enum zxdh_status_code zxdh_rc_inline_send(struct zxdh_qp *qp, + FIELD_PREP(ZXDHQPSQ_LOCALFENCE, info->local_fence) | + FIELD_PREP(ZXDHQPSQ_READFENCE, read_fence) | + FIELD_PREP(ZXDHQPSQ_SOLICITED, info->solicited) | +- FIELD_PREP(ZXDHQPSQ_ADDFRAGCNT, quanta - 1) | ++ FIELD_PREP(ZXDHQPSQ_ADDFRAGCNT, (__u16)(quanta - 1)) | + FIELD_PREP(ZXDHQPSQ_IMMDATAFLAG, imm_data_flag) | + FIELD_PREP(ZXDHQPSQ_REMSTAG, info->stag_to_inv); + set_64bit_val(wqe, 24, +@@ -1430,7 +1398,7 @@ enum zxdh_status_code zxdh_ud_inline_send(struct zxdh_qp *qp, + + quanta = qp->wqe_ops.iw_inline_data_size_to_quanta(op_info->len, + imm_data_flag); +- if (quanta > ZXDH_SQ_RING_FREE_QUANTA(qp->sq_ring)) ++ if (quanta > ZXDH_RING_FREE_QUANTA(qp->sq_ring)) + return ZXDH_ERR_QP_TOOMANY_WRS_POSTED; + + wqe_idx = ZXDH_RING_CURRENT_HEAD(qp->sq_ring); +@@ -1444,8 +1412,6 @@ enum zxdh_status_code zxdh_ud_inline_send(struct zxdh_qp *qp, + qp->sq_wrtrk_array[wqe_idx].wr_len = op_info->len; + qp->sq_wrtrk_array[wqe_idx].quanta = quanta; + +- zxdh_clr_wqes(qp, wqe_idx); +- + read_fence |= info->read_fence; + hdr = FIELD_PREP(ZXDHQPSQ_VALID, qp->swqe_polarity) | + FIELD_PREP(ZXDHQPSQ_OPCODE, info->op_type) | +@@ -1454,7 +1420,7 @@ enum zxdh_status_code zxdh_ud_inline_send(struct zxdh_qp *qp, + FIELD_PREP(ZXDHQPSQ_IMMDATAFLAG, imm_data_flag) | + FIELD_PREP(ZXDHQPSQ_UD_INLINEDATAFLAG, 1) | + FIELD_PREP(ZXDHQPSQ_UD_INLINEDATALEN, op_info->len) | +- FIELD_PREP(ZXDHQPSQ_UD_ADDFRAGCNT, quanta - 1) | ++ FIELD_PREP(ZXDHQPSQ_UD_ADDFRAGCNT, (__u16)(quanta - 1)) | + FIELD_PREP(ZXDHQPSQ_AHID, op_info->ah_id); + set_64bit_val(wqe_base, 24, + FIELD_PREP(ZXDHQPSQ_DESTQPN, op_info->dest_qp) | +@@ -1572,8 +1538,6 @@ enum zxdh_status_code zxdh_stag_local_invalidate(struct zxdh_qp *qp, + if (!wqe) + return ZXDH_ERR_QP_TOOMANY_WRS_POSTED; + +- zxdh_clr_wqes(qp, wqe_idx); +- + set_64bit_val(wqe, 16, 0); + + hdr = FIELD_PREP(ZXDHQPSQ_VALID, qp->swqe_polarity) | +@@ -1618,8 +1582,6 @@ enum zxdh_status_code zxdh_mw_bind(struct zxdh_qp *qp, + if (!wqe) + return ZXDH_ERR_QP_TOOMANY_WRS_POSTED; + +- zxdh_clr_wqes(qp, wqe_idx); +- + if (op_info->ena_writes) { + access = (op_info->ena_reads << 2) | + (op_info->ena_writes << 3) | (1 << 1) | access; +@@ -2391,8 +2353,6 @@ enum zxdh_status_code zxdh_nop(struct zxdh_qp *qp, __u64 wr_id, bool signaled, + if (!wqe) + return ZXDH_ERR_QP_TOOMANY_WRS_POSTED; + +- zxdh_clr_wqes(qp, wqe_idx); +- + set_64bit_val(wqe, 0, 0); + set_64bit_val(wqe, 8, 0); + set_64bit_val(wqe, 16, 0); +diff --git a/providers/zrdma/zxdh_verbs.h b/providers/zrdma/zxdh_verbs.h +index 69a98cc..40aa7bb 100644 +--- a/providers/zrdma/zxdh_verbs.h ++++ b/providers/zrdma/zxdh_verbs.h +@@ -596,7 +596,6 @@ int zxdh_qp_round_up(__u32 wqdepth); + int zxdh_cq_round_up(__u32 wqdepth); + void zxdh_qp_push_wqe(struct zxdh_qp *qp, __le64 *wqe, __u16 quanta, + __u32 wqe_idx, bool post_sq); +-void zxdh_clr_wqes(struct zxdh_qp *qp, __u32 qp_wqe_idx); + + void zxdh_get_srq_wqe_shift(struct zxdh_dev_attrs *dev_attrs, __u32 sge, + __u8 *shift); +-- +2.27.0 + diff --git a/0048-libzrdma-Add-interface-aligned-with-kernel.patch b/0048-libzrdma-Add-interface-aligned-with-kernel.patch new file mode 100644 index 0000000000000000000000000000000000000000..ffe84a9e210e411dc7aa6f1a893193e7abf59738 --- /dev/null +++ b/0048-libzrdma-Add-interface-aligned-with-kernel.patch @@ -0,0 +1,304 @@ +From da8370c2360deb73af7a211bec2be76b025cb5d3 Mon Sep 17 00:00:00 2001 +From: =?UTF-8?q?=E6=9D=8E=E5=AF=8C=E8=89=B3?= +Date: Fri, 28 Mar 2025 11:36:50 +0800 +Subject: [PATCH] libzrdma:Add interface aligned with kernel +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +Signed-off-by: 李富艳 +--- + kernel-headers/rdma/zxdh-abi.h | 23 ++++++++------- + providers/zrdma/main.c | 34 +++++++++++++---------- + providers/zrdma/zxdh_defs.h | 8 +++--- + providers/zrdma/zxdh_devids.h | 9 ++++++ + providers/zrdma/zxdh_hw.c | 9 ++++-- + providers/zrdma/zxdh_verbs.h | 51 ++++++---------------------------- + 6 files changed, 60 insertions(+), 74 deletions(-) + +diff --git a/kernel-headers/rdma/zxdh-abi.h b/kernel-headers/rdma/zxdh-abi.h +index 665f874..59c0160 100644 +--- a/kernel-headers/rdma/zxdh-abi.h ++++ b/kernel-headers/rdma/zxdh-abi.h +@@ -6,10 +6,9 @@ + + #include + +-/* zxdh must support legacy GEN_1 i40iw kernel +- * and user-space whose last ABI ver is 5 +- */ ++/* user-space whose last ABI ver is 5 */ + #define ZXDH_ABI_VER 5 ++#define ZXDH_CONTEXT_VER_V1 5 + + enum zxdh_memreg_type { + ZXDH_MEMREG_TYPE_MEM = 0, +@@ -35,7 +34,7 @@ struct zxdh_alloc_ucontext_resp { + __u32 wq_size; /* size of the WQs (SQ+RQ) in the mmaped area */ + __u8 kernel_ver; + __u8 db_addr_type; +- __u8 rsvd[2]; ++ __u16 rdma_tool_flags; + __aligned_u64 feature_flags; + __aligned_u64 sq_db_mmap_key; + __aligned_u64 cq_db_mmap_key; +@@ -51,8 +50,8 @@ struct zxdh_alloc_ucontext_resp { + __u32 min_hw_cq_size; + __u32 max_hw_cq_size; + __u16 max_hw_sq_chunk; +- __u8 hw_rev; +- __u8 rsvd2; ++ __u8 rsvd; ++ __u8 chip_rev; + }; + + struct zxdh_alloc_pd_resp { +@@ -82,13 +81,13 @@ struct zxdh_create_srq_req { + }; + + struct zxdh_mem_reg_req { +- __u16 reg_type; /* enum zxdh_memreg_type */ +- __u16 cq_pages; +- __u16 rq_pages; +- __u16 sq_pages; +- __u16 srq_pages; ++ __u32 reg_type; /* enum zxdh_memreg_type */ ++ __u32 cq_pages; ++ __u32 rq_pages; ++ __u32 sq_pages; ++ __u32 srq_pages; + __u16 srq_list_pages; +- __u8 rsvd[4]; ++ __u8 rsvd[2]; + }; + + struct zxdh_reg_mr_resp { +diff --git a/providers/zrdma/main.c b/providers/zrdma/main.c +index e25a1a2..4626a21 100644 +--- a/providers/zrdma/main.c ++++ b/providers/zrdma/main.c +@@ -22,6 +22,12 @@ static const struct verbs_match_ent hca_table[] = { + ZXDH_HCA(PCI_VENDOR_ID_ZXDH_EVB, ZXDH_DEV_ID_ADAPTIVE_EVB_VF), + ZXDH_HCA(PCI_VENDOR_ID_ZXDH_E312, ZXDH_DEV_ID_ADAPTIVE_E312_PF), + ZXDH_HCA(PCI_VENDOR_ID_ZXDH_E312, ZXDH_DEV_ID_ADAPTIVE_E312_VF), ++ ZXDH_HCA(PCI_VENDOR_ID_ZXDH_E310, ZXDH_DEV_ID_ADAPTIVE_E310_PF), ++ ZXDH_HCA(PCI_VENDOR_ID_ZXDH_E310, ZXDH_DEV_ID_ADAPTIVE_E310_VF), ++ ZXDH_HCA(PCI_VENDOR_ID_ZXDH_E310_RDMA, ZXDH_DEV_ID_ADAPTIVE_E310_RDMA_PF), ++ ZXDH_HCA(PCI_VENDOR_ID_ZXDH_E310_RDMA, ZXDH_DEV_ID_ADAPTIVE_E310_RDMA_VF), ++ ZXDH_HCA(PCI_VENDOR_ID_ZXDH_E316, ZXDH_DEV_ID_ADAPTIVE_E316_PF), ++ ZXDH_HCA(PCI_VENDOR_ID_ZXDH_E316, ZXDH_DEV_ID_ADAPTIVE_E316_VF), + ZXDH_HCA(PCI_VENDOR_ID_ZXDH_X512, ZXDH_DEV_ID_ADAPTIVE_X512_PF), + ZXDH_HCA(PCI_VENDOR_ID_ZXDH_X512, ZXDH_DEV_ID_ADAPTIVE_X512_VF), + {} +@@ -100,7 +106,6 @@ static struct verbs_context *zxdh_ualloc_context(struct ibv_device *ibdev, + struct zxdh_get_context cmd; + struct zxdh_get_context_resp resp = {}; + __u64 sq_db_mmap_key, cq_db_mmap_key; +- __u8 user_ver = ZXDH_ABI_VER; + + iwvctx = verbs_init_and_alloc_context(ibdev, cmd_fd, iwvctx, ibv_ctx, + RDMA_DRIVER_ZXDH); +@@ -109,22 +114,16 @@ static struct verbs_context *zxdh_ualloc_context(struct ibv_device *ibdev, + + zxdh_set_debug_mask(); + iwvctx->zxdh_write_imm_split_switch = zxdh_get_write_imm_split_switch(); +- cmd.userspace_ver = user_ver; ++ ++ cmd.userspace_ver = ZXDH_CONTEXT_VER_V1; + if (ibv_cmd_get_context(&iwvctx->ibv_ctx, + (struct ibv_get_context *)&cmd, sizeof(cmd), +- &resp.ibv_resp, sizeof(resp))) { +- cmd.userspace_ver = 4; +- if (ibv_cmd_get_context( +- &iwvctx->ibv_ctx, (struct ibv_get_context *)&cmd, +- sizeof(cmd), &resp.ibv_resp, sizeof(resp))) +- goto err_free; +- user_ver = cmd.userspace_ver; +- } ++ &resp.ibv_resp, sizeof(resp))) ++ goto err_free; + + verbs_set_ops(&iwvctx->ibv_ctx, &zxdh_uctx_ops); + + iwvctx->dev_attrs.feature_flags = resp.feature_flags; +- iwvctx->dev_attrs.hw_rev = resp.hw_rev; + iwvctx->dev_attrs.max_hw_wq_frags = resp.max_hw_wq_frags; + iwvctx->dev_attrs.max_hw_read_sges = resp.max_hw_read_sges; + iwvctx->dev_attrs.max_hw_inline = resp.max_hw_inline; +@@ -135,11 +134,20 @@ static struct verbs_context *zxdh_ualloc_context(struct ibv_device *ibdev, + iwvctx->dev_attrs.max_hw_sq_chunk = resp.max_hw_sq_chunk; + iwvctx->dev_attrs.max_hw_cq_size = resp.max_hw_cq_size; + iwvctx->dev_attrs.min_hw_cq_size = resp.min_hw_cq_size; +- iwvctx->abi_ver = user_ver; ++ iwvctx->abi_ver = ZXDH_ABI_VER; ++ iwvctx->dev_attrs.chip_rev = resp.chip_rev; ++ iwvctx->dev_attrs.rdma_tool_flags = resp.rdma_tool_flags; + + sq_db_mmap_key = resp.sq_db_mmap_key; + cq_db_mmap_key = resp.cq_db_mmap_key; + ++ iwvctx->dev_attrs.db_addr_type = resp.db_addr_type; ++ iwvctx->dev_attrs.sq_db_pa = resp.sq_db_pa; ++ iwvctx->dev_attrs.cq_db_pa = resp.cq_db_pa; ++ ++ if (iwvctx->dev_attrs.db_addr_type != ZXDH_DB_ADDR_BAR) ++ goto err_free; ++ + iwvctx->sq_db = zxdh_mmap(cmd_fd, sq_db_mmap_key); + if (iwvctx->sq_db == MAP_FAILED) + goto err_free; +@@ -160,10 +168,8 @@ static struct verbs_context *zxdh_ualloc_context(struct ibv_device *ibdev, + iwvctx->iwupd = container_of(ibv_pd, struct zxdh_upd, ibv_pd); + add_private_ops(iwvctx); + return &iwvctx->ibv_ctx; +- + err_free: + free(iwvctx); +- + return NULL; + } + +diff --git a/providers/zrdma/zxdh_defs.h b/providers/zrdma/zxdh_defs.h +index 3863fb9..8772e7b 100644 +--- a/providers/zrdma/zxdh_defs.h ++++ b/providers/zrdma/zxdh_defs.h +@@ -389,8 +389,8 @@ static inline void db_wr32(__u32 val, __u32 *wqe_word) + *wqe_word = val; + } + +-#define read_wqe_need_split(pre_cal_psn, next_psn) \ +- (((pre_cal_psn < next_psn) && (pre_cal_psn != 0)) || \ +- ((next_psn <= 0x7FFFFF) && (pre_cal_psn > 0x800000))) +- ++#define read_wqe_need_split(pre_cal_psn, next_psn, chip_rev) \ ++ (!(chip_rev == 2) && \ ++ (((pre_cal_psn < next_psn) && (pre_cal_psn != 0)) || \ ++ ((next_psn <= 0x7FFFFF) && (pre_cal_psn > 0x800000)))) + #endif /* ZXDH_DEFS_H */ +diff --git a/providers/zrdma/zxdh_devids.h b/providers/zrdma/zxdh_devids.h +index ac23124..3430f5f 100644 +--- a/providers/zrdma/zxdh_devids.h ++++ b/providers/zrdma/zxdh_devids.h +@@ -6,12 +6,21 @@ + /* ZXDH VENDOR ID */ + #define PCI_VENDOR_ID_ZXDH_EVB 0x16c3 + #define PCI_VENDOR_ID_ZXDH_E312 0x1cf2 ++#define PCI_VENDOR_ID_ZXDH_E310 0x1cf2 ++#define PCI_VENDOR_ID_ZXDH_E310_RDMA 0x1cf2 ++#define PCI_VENDOR_ID_ZXDH_E316 0x1cf2 + #define PCI_VENDOR_ID_ZXDH_X512 0x1cf2 + /* ZXDH Devices ID */ + #define ZXDH_DEV_ID_ADAPTIVE_EVB_PF 0x8040 /* ZXDH EVB PF DEVICE ID*/ + #define ZXDH_DEV_ID_ADAPTIVE_EVB_VF 0x8041 /* ZXDH EVB VF DEVICE ID*/ + #define ZXDH_DEV_ID_ADAPTIVE_E312_PF 0x8049 /* ZXDH E312 PF DEVICE ID*/ + #define ZXDH_DEV_ID_ADAPTIVE_E312_VF 0x8060 /* ZXDH E312 VF DEVICE ID*/ ++#define ZXDH_DEV_ID_ADAPTIVE_E310_PF 0x8061 /* ZXDH E310 PF DEVICE ID*/ ++#define ZXDH_DEV_ID_ADAPTIVE_E310_VF 0x8062 /* ZXDH E310 VF DEVICE ID*/ ++#define ZXDH_DEV_ID_ADAPTIVE_E310_RDMA_PF 0x8084 /* ZXDH E310_RDMA PF DEVICE ID*/ ++#define ZXDH_DEV_ID_ADAPTIVE_E310_RDMA_VF 0x8085 /* ZXDH E310_RDMA VF DEVICE ID*/ ++#define ZXDH_DEV_ID_ADAPTIVE_E316_PF 0x807e /* ZXDH E316 PF DEVICE ID*/ ++#define ZXDH_DEV_ID_ADAPTIVE_E316_VF 0x807f /* ZXDH E316 VF DEVICE ID*/ + #define ZXDH_DEV_ID_ADAPTIVE_X512_PF 0x806B /* ZXDH X512 PF DEVICE ID*/ + #define ZXDH_DEV_ID_ADAPTIVE_X512_VF 0x806C /* ZXDH X512 VF DEVICE ID*/ + #endif /* ZXDH_DEVIDS_H */ +diff --git a/providers/zrdma/zxdh_hw.c b/providers/zrdma/zxdh_hw.c +index 073b198..99489dc 100644 +--- a/providers/zrdma/zxdh_hw.c ++++ b/providers/zrdma/zxdh_hw.c +@@ -703,8 +703,12 @@ enum zxdh_status_code zxdh_rdma_read(struct zxdh_qp *qp, + struct zxdh_post_sq_info split_part2_info = { 0 }; + struct zxdh_rdma_read *op_info; + enum zxdh_status_code ret_code; ++ struct zxdh_uqp *iwuqp; ++ struct zxdh_uvcontext *iwvctx; + __u32 i, total_size = 0, pre_cal_psn = 0; +- ++ iwuqp = container_of(qp, struct zxdh_uqp, qp); ++ iwvctx = container_of(iwuqp->vqp.qp.context, struct zxdh_uvcontext, ++ ibv_ctx.context); + op_info = &info->op.rdma_read; + if (qp->max_sq_frag_cnt < op_info->num_lo_sges) + return ZXDH_ERR_INVALID_FRAG_COUNT; +@@ -720,7 +724,8 @@ enum zxdh_status_code zxdh_rdma_read(struct zxdh_qp *qp, + op_info->rem_addr.len = total_size; + pre_cal_psn = qp->next_psn; + qp_tx_psn_add(&pre_cal_psn, total_size, qp->mtu); +- if (read_wqe_need_split(pre_cal_psn, qp->next_psn)) { ++ if (read_wqe_need_split(pre_cal_psn, qp->next_psn, ++ iwvctx->dev_attrs.chip_rev)) { + split_two_part_info(qp, info, qp->next_psn, pre_cal_psn, + &split_part1_info, &split_part2_info); + ret_code = zxdh_post_rdma_read(qp, &split_part1_info, post_sq, +diff --git a/providers/zrdma/zxdh_verbs.h b/providers/zrdma/zxdh_verbs.h +index 40aa7bb..1a26cf4 100644 +--- a/providers/zrdma/zxdh_verbs.h ++++ b/providers/zrdma/zxdh_verbs.h +@@ -71,47 +71,13 @@ + + #define ZXDH_WQEALLOC_WQE_DESC_INDEX GENMASK(31, 20) + +-enum zxdh_device_caps_const { +- ZXDH_WQE_SIZE = 4, +- ZXDH_SRQE_SIZE = 2, +- ZXDH_CQP_WQE_SIZE = 8, +- ZXDH_CQE_SIZE = 8, +- ZXDH_EXTENDED_CQE_SIZE = 8, +- ZXDH_AEQE_SIZE = 2, +- ZXDH_CEQE_SIZE = 1, +- ZXDH_CQP_CTX_SIZE = 8, +- ZXDH_SHADOW_AREA_SIZE = 8, +- ZXDH_GATHER_STATS_BUF_SIZE = 1024, +- ZXDH_MIN_IW_QP_ID = 0, +- ZXDH_QUERY_FPM_BUF_SIZE = 176, +- ZXDH_COMMIT_FPM_BUF_SIZE = 176, +- ZXDH_MAX_IW_QP_ID = 262143, +- ZXDH_MIN_CEQID = 0, +- ZXDH_MAX_CEQID = 1023, +- ZXDH_CEQ_MAX_COUNT = ZXDH_MAX_CEQID + 1, +- ZXDH_MIN_CQID = 0, +- ZXDH_MAX_CQID = 524287, +- ZXDH_MIN_AEQ_ENTRIES = 1, +- ZXDH_MAX_AEQ_ENTRIES = 524287, +- ZXDH_MIN_CEQ_ENTRIES = 1, +- ZXDH_MAX_CEQ_ENTRIES = 262143, +- ZXDH_MIN_CQ_SIZE = 1, +- ZXDH_MAX_CQ_SIZE = 1048575, +- ZXDH_DB_ID_ZERO = 0, +- ZXDH_MAX_WQ_FRAGMENT_COUNT = 13, +- ZXDH_MAX_SGE_RD = 13, +- ZXDH_MAX_OUTBOUND_MSG_SIZE = 2147483647, +- ZXDH_MAX_INBOUND_MSG_SIZE = 2147483647, +- ZXDH_MAX_PUSH_PAGE_COUNT = 1024, +- ZXDH_MAX_PE_ENA_VF_COUNT = 32, +- ZXDH_MAX_VF_FPM_ID = 47, +- ZXDH_MAX_SQ_PAYLOAD_SIZE = 2147483648, +- ZXDH_MAX_INLINE_DATA_SIZE = 217, +- ZXDH_MAX_WQ_ENTRIES = 32768, +- ZXDH_Q2_BUF_SIZE = 256, +- ZXDH_QP_CTX_SIZE = 256, +- ZXDH_MAX_PDS = 262144, +-}; ++#define ZXDH_SRQE_SIZE 2 ++#define ZXDH_CQE_SIZE 8 ++#define ZXDH_EXTENDED_CQE_SIZE 8 ++#define ZXDH_MAX_INLINE_DATA_SIZE 217 ++#define ZXDH_MAX_SQ_PAYLOAD_SIZE 2147483648 ++#define ZXDH_MIN_CQ_SIZE 1 ++#define ZXDH_MAX_CQ_SIZE 2097152 + + enum zxdh_addressing_type { + ZXDH_ADDR_TYPE_ZERO_BASED = 0, +@@ -394,8 +360,9 @@ struct zxdh_dev_attrs { + __u32 max_hw_cq_size; + __u16 max_hw_sq_chunk; + __u32 max_hw_srq_wr; +- __u8 hw_rev; + __u8 db_addr_type; ++ __u8 chip_rev; ++ __u16 rdma_tool_flags; + }; + + struct zxdh_hw_attrs { +-- +2.27.0 + diff --git a/0049-libzrdma-Add-poll-cqe-error-to-Failed-status.patch b/0049-libzrdma-Add-poll-cqe-error-to-Failed-status.patch new file mode 100644 index 0000000000000000000000000000000000000000..1d60e3473ccfccea989052c9c0fa93b405178370 --- /dev/null +++ b/0049-libzrdma-Add-poll-cqe-error-to-Failed-status.patch @@ -0,0 +1,241 @@ +From 3a0b295e98d9557c65274424dc23b74de4aef8d2 Mon Sep 17 00:00:00 2001 +From: =?UTF-8?q?=E6=9D=8E=E5=AF=8C=E8=89=B3?= +Date: Fri, 28 Mar 2025 15:13:32 +0800 +Subject: [PATCH] libzrdma:Add poll cqe error to Failed status +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +Signed-off-by: 李富艳 +--- + providers/zrdma/zxdh_verbs.c | 68 +++++++++++++-------- + providers/zrdma/zxdh_verbs.h | 113 +++++++++++++++++++++++++++++++++++ + 2 files changed, 156 insertions(+), 25 deletions(-) + +diff --git a/providers/zrdma/zxdh_verbs.c b/providers/zrdma/zxdh_verbs.c +index 93cf705..f67f8c7 100644 +--- a/providers/zrdma/zxdh_verbs.c ++++ b/providers/zrdma/zxdh_verbs.c +@@ -616,30 +616,50 @@ int zxdh_umodify_cq(struct ibv_cq *cq, struct ibv_modify_cq_attr *attr) + } + + static enum ibv_wc_status +-zxdh_flush_err_to_ib_wc_status(enum zxdh_flush_opcode opcode) ++zxdh_err_to_ib_wc_status(__u32 opcode) + { + switch (opcode) { +- case FLUSH_PROT_ERR: ++ case ZXDH_RX_WQE_LEN_ERR: ++ return IBV_WC_LOC_LEN_ERR; ++ case ZXDH_TX_ACK_SYS_TOP_VADDR_LEN_CHECK_ERR: ++ case ZXDH_TX_ACK_SYS_TOP_LKEY_CHECK_ERR: ++ case ZXDH_TX_ACK_SYS_TOP_ACCESS_RIGHT_CHECK_ERR: ++ case ZXDH_RX_MR_MW_STATE_FREE_ERR: ++ case ZXDH_RX_MR_MW_STATE_INVALID_ERR: ++ case ZXDH_RX_MR_MW_PD_CHECK_ERR: ++ case ZXDH_RX_MR_MW_KEY_CHECK_ERR: ++ case ZXDH_RX_MR_MW_STAG_INDEX_CHECK_ERR: ++ case ZXDH_RX_MR_MW_BOUNDARY_CHECK_ERR: ++ case ZXDH_RX_MR_MW_0STAG_INDEX_CHECK_ERR: ++ case ZXDH_RX_MW_STATE_INVALID_ERR: ++ case ZXDH_RX_MW_PD_CHECK_ERR: ++ case ZXDH_RX_MW_STAG_INDEX_CHECK_ERR: ++ case ZXDH_RX_MW_SHARE_MR_CHECK_ERR: ++ case ZXDH_RX_MR_PD_CHECK_ERR: ++ case ZXDH_RX_MR_SHARE_MR_CHECK_ERR: ++ case ZXDH_RX_MR_MW_ACCESS_CHECK_ERR: + return IBV_WC_LOC_PROT_ERR; +- case FLUSH_REM_ACCESS_ERR: ++ case ZXDH_TX_PARSE_TOP_WQE_FLUSH: ++ return IBV_WC_WR_FLUSH_ERR; ++ case ZXDH_TX_ACK_SYS_TOP_NAK_INVALID_REQ: ++ return IBV_WC_REM_INV_REQ_ERR; ++ case ZXDH_TX_ACK_SYS_TOP_NAK_REMOTE_ACCESS_ERR: ++ case ZXDH_RX_MW_RKEY_CHECK_ERR: ++ case ZXDH_RX_MR_RKEY_CHECK_ERR: + return IBV_WC_REM_ACCESS_ERR; +- case FLUSH_LOC_QP_OP_ERR: +- return IBV_WC_LOC_QP_OP_ERR; +- case FLUSH_REM_OP_ERR: ++ case ZXDH_TX_ACK_SYS_TOP_NAK_REMOTE_OPERATIONAL_ERR: + return IBV_WC_REM_OP_ERR; +- case FLUSH_LOC_LEN_ERR: +- return IBV_WC_LOC_LEN_ERR; +- case FLUSH_GENERAL_ERR: +- return IBV_WC_WR_FLUSH_ERR; +- case FLUSH_RETRY_EXC_ERR: ++ case ZXDH_TX_ACK_SYS_TOP_NAK_RETRY_LIMIT: ++ case ZXDH_TX_ACK_SYS_TOP_READ_RETRY_LIMIT: ++ case ZXDH_TX_ACK_SYS_TOP_TIMEOUT_RETRY_LIMIT: + return IBV_WC_RETRY_EXC_ERR; +- case FLUSH_MW_BIND_ERR: +- return IBV_WC_MW_BIND_ERR; +- case FLUSH_REM_INV_REQ_ERR: +- return IBV_WC_REM_INV_REQ_ERR; +- case FLUSH_FATAL_ERR: +- default: ++ case ZXDH_TX_ACK_SYS_TOP_RNR_RETRY_LIMIT: ++ return IBV_WC_RNR_RETRY_EXC_ERR; ++ case ZXDH_TX_PARSE_TOP_AXI_ERR: ++ case ZXDH_RX_AXI_RESP_ERR: + return IBV_WC_FATAL_ERR; ++ default: ++ return IBV_WC_GENERAL_ERR; + } + } + +@@ -656,10 +676,9 @@ static inline void zxdh_process_cqe_ext(struct zxdh_cq_poll_info *cur_cqe) + ibvcq_ex->wr_id = cur_cqe->wr_id; + if (cur_cqe->error) + ibvcq_ex->status = +- (cur_cqe->comp_status == ZXDH_COMPL_STATUS_FLUSHED) ? +- zxdh_flush_err_to_ib_wc_status( +- cur_cqe->minor_err) : +- IBV_WC_GENERAL_ERR; ++ zxdh_err_to_ib_wc_status( ++ cur_cqe->major_err << 16 | ++ cur_cqe->minor_err); + else + ibvcq_ex->status = IBV_WC_SUCCESS; + } +@@ -683,10 +702,9 @@ static inline void zxdh_process_cqe(struct ibv_wc *entry, + + if (cur_cqe->error) { + entry->status = +- (cur_cqe->comp_status == ZXDH_COMPL_STATUS_FLUSHED) ? +- zxdh_flush_err_to_ib_wc_status( +- cur_cqe->minor_err) : +- IBV_WC_GENERAL_ERR; ++ zxdh_err_to_ib_wc_status( ++ cur_cqe->major_err << 16 | ++ cur_cqe->minor_err); + entry->vendor_err = + cur_cqe->major_err << 16 | cur_cqe->minor_err; + } else { +diff --git a/providers/zrdma/zxdh_verbs.h b/providers/zrdma/zxdh_verbs.h +index 1a26cf4..e3974c1 100644 +--- a/providers/zrdma/zxdh_verbs.h ++++ b/providers/zrdma/zxdh_verbs.h +@@ -146,6 +146,119 @@ enum zxdh_page_size { + ZXDH_PAGE_SIZE_1G = 18, + }; + ++enum zxdh_rdmatx_parse_top_err { ++ ZXDH_TX_PARSE_TOP_AXI_ERR = 0x1, ++ ZXDH_TX_PARSE_TOP_WQE_FLUSH = 0x10001, ++ ZXDH_TX_PARSE_TOP_ORD_ERR = 0x20020, ++ ZXDH_TX_PARSE_TOP_OPCODE_ERR_FLAG = 0x20021, ++ ZXDH_TX_PARSE_TOP_CQP_STATE_AXI_ERR = 0x20022, ++ ZXDH_TX_PARSE_TOP_WQE_LEN_ERR = 0x20023, ++ ZXDH_TX_PARSE_TOP_DATA_LEN_ERR = 0x20024, ++ ZXDH_TX_PARSE_TOP_AH_VALID0_ERR = 0x20025, ++ ZXDH_TX_PARSE_TOP_UD_PDINDEX_ERR = 0x20026, ++ ZXDH_TX_PARSE_TOP_QP_STATE_ERR = 0x20027, ++ ZXDH_TX_PARSE_TOP_SERVICE_TYPE_ERR = 0x20028, ++ ZXDH_TX_PARSE_TOP_UD_PAYLOAD_ERR = 0x20029, ++ ZXDH_TX_PARSE_TOP_WQE_LEN0_ERR = 0x2002a, ++ ZXDH_TX_PARSE_TOP_WQE_DEFICIENT_CLR_ERR = 0x2002b, ++ ZXDH_TX_PARSE_TOP_IMMDT_ERR = 0x2002c, ++ ZXDH_TX_PARSE_TOP_FRAGMENT_LENGTH_ERR = 0x2009f, ++ ZXDH_TX_PARSE_TOP_MRTE_STATE_ERR = 0x90091, ++ ZXDH_TX_PARSE_TOP_QP_CHECK_ERR = 0x90092, ++ ZXDH_TX_PARSE_TOP_PD_CHECK_ERR = 0x90093, ++ ZXDH_TX_PARSE_TOP_LKEY_CHECK_ERR = 0x90094, ++ ZXDH_TX_PARSE_TOP_STAG_INDEX_CHECK_ERR = 0x90095, ++ ZXDH_TX_PARSE_TOP_VADDR_LEN_CHECK_ERR = 0x90096, ++ ZXDH_TX_PARSE_TOP_ACCESS_RIGHT_CHECK_ERR = 0x90097, ++ ZXDH_TX_PARSE_TOP_STAG_INDEX_CHECK_ZERO_ERR = 0x90098, ++}; ++ ++enum zxdh_rdmatx_ack_sys_top_err { ++ ZXDH_TX_ACK_SYS_TOP_NVME_INDEX_ERR = 0x30030, ++ ZXDH_TX_ACK_SYS_TOP_NVME_NOF_QID_ERR = 0x30031, ++ ZXDH_TX_ACK_SYS_TOP_NVME_NOF_PD_INDEX_ERR = 0x30032, ++ ZXDH_TX_ACK_SYS_TOP_NVME_LENGTH_ERR = 0x30033, ++ ZXDH_TX_ACK_SYS_TOP_NVME_KEY_ERR = 0x30034, ++ ZXDH_TX_ACK_SYS_TOP_NVME_ACCESS_ERR = 0x30035, ++ ZXDH_TX_ACK_SYS_TOP_MRTE_STATE_ERR = 0x50091, ++ ZXDH_TX_ACK_SYS_TOP_QP_CHECK_ERR = 0x50092, ++ ZXDH_TX_ACK_SYS_TOP_PD_CHECK_ERR = 0x50093, ++ ZXDH_TX_ACK_SYS_TOP_LKEY_CHECK_ERR = 0x50094, ++ ZXDH_TX_ACK_SYS_TOP_STAG_INDEX_CHECK_ERR = 0x50095, ++ ZXDH_TX_ACK_SYS_TOP_VADDR_LEN_CHECK_ERR = 0x50096, ++ ZXDH_TX_ACK_SYS_TOP_ACCESS_RIGHT_CHECK_ERR = 0x50097, ++ ZXDH_TX_ACK_SYS_TOP_STAG_INDEX_CHECK_ZERO_ERR = 0x50098, ++ ZXDH_TX_ACK_SYS_TOP_LOC_LEN_ERR = 0x600c0, ++ ZXDH_TX_ACK_SYS_TOP_NAK_INVALID_REQ = 0x700d0, ++ ZXDH_TX_ACK_SYS_TOP_NAK_REMOTE_ACCESS_ERR = 0x700d1, ++ ZXDH_TX_ACK_SYS_TOP_NAK_REMOTE_OPERATIONAL_ERR = 0x700d2, ++ ZXDH_TX_ACK_SYS_TOP_NAK_RETRY_LIMIT = 0x800f1, ++ ZXDH_TX_ACK_SYS_TOP_READ_RETRY_LIMIT = 0x800f2, ++ ZXDH_TX_ACK_SYS_TOP_TIMEOUT_RETRY_LIMIT = 0x800f3, ++ ZXDH_TX_ACK_SYS_TOP_RNR_RETRY_LIMIT = 0x800f4, ++}; ++ ++enum zxdh_rdmatx_window_top_err { ++ ZXDH_TX_WINDOW_TOP_WINDOW_NO_ENTRY = 0x800f5, ++ ZXDH_TX_WINDOW_TOP_WINDOW_BACK_MSN = 0x800f6, ++ ZXDH_TX_WINDOW_TOP_WINDOW_SMALL_MSN = 0x800f7, ++}; ++ ++enum zxdh_rdmatx_doorbell_mgr_err { ++ ZXDH_TX_DOORBELL_MGR_INDEX_CHECK_ERROR = 0x30036, ++ ZXDH_TX_DOORBELL_MGR_QID_CHECK_ERROR = 0x30037, ++ ZXDH_TX_DOORBELL_MGR_PD_INDEX_CHECK_ERROR = 0x30038, ++ ZXDH_TX_DOORBELL_MGR_LENGTH_CHECK_ERROR = 0x30039, ++ ZXDH_TX_DOORBELL_MGR_KEY_CHECK_ERROR = 0x3003a, ++ ZXDH_TX_DOORBELL_MGR_ACCESS_CHECK_ERROR = 0x3003b, ++}; ++ ++enum zxdh_rdmarx_err { ++ ZXDH_RX_CQP_FLUSH = 0x12, ++ ZXDH_RX_FIRST_PACKET_ERR = 0x4f, ++ ZXDH_RX_INVALID_OPCODE = 0x50, ++ ZXDH_RX_ORDER_ERR = 0x51, ++ ZXDH_RX_LEN_ERR = 0x52, ++ ZXDH_RX_SQR_STATE_ERR = 0x53, ++ ZXDH_RX_WQE_SIGN_ERR = 0x54, ++ ZXDH_RX_WQE_LEN_ERR = 0x55, ++ ZXDH_RX_SQR_WATER_LEVEL_ERR = 0x80, ++ ZXDH_RX_SRQ_AXI_RESP_ERR = 0xb1, ++ ZXDH_RX_CQ_OVERFLOW_ERR = 0x76, ++ ZXDH_RX_QP_CQ_OVERFLOW_ERR = 0x78, ++ ZXDH_RX_CQ_STATE_ERR = 0x7a, ++ ZXDH_RX_CQ_AXI_ERR = 0x7b, ++ ZXDH_RX_QP_CQ_AXI_ERR = 0x7c, ++ ZXDH_RX_NOF_IOQ_ERR = 0x70, ++ ZXDH_RX_NOF_PDNUM_ERR = 0x71, ++ ZXDH_RX_NOF_LEN_ERR = 0x72, ++ ZXDH_RX_NOF_RKEY_ERR = 0x73, ++ ZXDH_RX_NOF_ACC_ERR = 0x74, ++ ZXDH_RX_IRD_OVF = 0x77, ++ ZXDH_RX_MR_MW_STATE_FREE_ERR = 0x90, ++ ZXDH_RX_MR_MW_STATE_INVALID_ERR = 0x91, ++ ZXDH_RX_TYPE2B_MW_QPN_CHECK_ERR = 0x92, ++ ZXDH_RX_MR_MW_PD_CHECK_ERR = 0x93, ++ ZXDH_RX_MR_MW_KEY_CHECK_ERR = 0x94, ++ ZXDH_RX_MR_MW_STAG_INDEX_CHECK_ERR = 0x95, ++ ZXDH_RX_MR_MW_BOUNDARY_CHECK_ERR = 0x96, ++ ZXDH_RX_MR_MW_ACCESS_CHECK_ERR = 0x97, ++ ZXDH_RX_MR_MW_0STAG_INDEX_CHECK_ERR = 0x98, ++ ZXDH_RX_MW_STATE_INVALID_ERR = 0x99, ++ ZXDH_RX_MW_PD_CHECK_ERR = 0x9a, ++ ZXDH_RX_MW_RKEY_CHECK_ERR = 0x9b, ++ ZXDH_RX_TYPE2BMW_QPN_CHECK_ERR = 0x9c, ++ ZXDH_RX_MW_STAG_INDEX_CHECK_ERR = 0x9d, ++ ZXDH_RX_MW_SHARE_MR_CHECK_ERR = 0x9e, ++ ZXDH_RX_MW_TYPE1_CHECK_ERR = 0x9f, ++ ZXDH_RX_MR_PD_CHECK_ERR = 0xa0, ++ ZXDH_RX_MR_RKEY_CHECK_ERR = 0xa1, ++ ZXDH_RX_MR_SHARE_MR_CHECK_ERR = 0xa4, ++ ZXDH_RX_MR_BOND_MW_NUM_CHECK_ERR = 0xa5, ++ ZXDH_RX_MR_CANBE_R_INVALID_CHECK_ERR = 0xa6, ++ ZXDH_RX_AXI_RESP_ERR = 0xb0, ++}; ++ + struct zxdh_qp; + struct zxdh_cq; + struct zxdh_qp_init_info; +-- +2.27.0 + diff --git a/0050-libzrdma-Add-sq-rq-flush-cqe-and-log-optimization.patch b/0050-libzrdma-Add-sq-rq-flush-cqe-and-log-optimization.patch new file mode 100644 index 0000000000000000000000000000000000000000..b65ed3f8aeacd4302ba43beddfa3c61d7d6083a1 --- /dev/null +++ b/0050-libzrdma-Add-sq-rq-flush-cqe-and-log-optimization.patch @@ -0,0 +1,625 @@ +From 9f440ac471dce7fa44f0ecb6df4ca2ac0509d105 Mon Sep 17 00:00:00 2001 +From: =?UTF-8?q?=E6=9D=8E=E5=AF=8C=E8=89=B3?= +Date: Fri, 28 Mar 2025 15:30:41 +0800 +Subject: [PATCH] libzrdma:Add sq/rq flush cqe and log optimization +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +Signed-off-by: 李富艳 +--- + providers/zrdma/main.h | 10 +- + providers/zrdma/zxdh_hw.c | 253 ++++++++++++++++++++++------------- + providers/zrdma/zxdh_verbs.c | 72 +++++----- + 3 files changed, 206 insertions(+), 129 deletions(-) + +diff --git a/providers/zrdma/main.h b/providers/zrdma/main.h +index e28c77b..7c78fed 100644 +--- a/providers/zrdma/main.h ++++ b/providers/zrdma/main.h +@@ -38,15 +38,17 @@ enum { + ZXDH_DBG_SRQ = 1 << 2, + }; + extern uint32_t zxdh_debug_mask; +-#define zxdh_dbg(ctx, mask, format, arg...) \ ++#define zxdh_dbg(mask, format, arg...) \ + do { \ + if (mask & zxdh_debug_mask) { \ +- int zxdh_dbg_tmp = errno; \ +- verbs_debug(ctx, format, ##arg); \ +- errno = zxdh_dbg_tmp; \ ++ int tmp = errno; \ ++ fprintf(stdout, "%s:%d: " format, __func__, __LINE__, \ ++ ##arg); \ ++ errno = tmp; \ + } \ + } while (0) + ++ + struct zxdh_udevice { + struct verbs_device ibv_dev; + }; +diff --git a/providers/zrdma/zxdh_hw.c b/providers/zrdma/zxdh_hw.c +index 99489dc..fb8f016 100644 +--- a/providers/zrdma/zxdh_hw.c ++++ b/providers/zrdma/zxdh_hw.c +@@ -1785,19 +1785,26 @@ static inline void build_comp_status(__u32 cq_type, + } + return; + } +- if (info->major_err == ZXDH_RETRY_ACK_MAJOR_ERR && +- info->minor_err == ZXDH_RETRY_ACK_MINOR_ERR) { +- info->comp_status = ZXDH_COMPL_STATUS_RETRY_ACK_ERR; ++ ++ switch (info->major_err) { ++ case ZXDH_RETRY_ACK_MAJOR_ERR: ++ if (info->minor_err == ZXDH_RETRY_ACK_MINOR_ERR) { ++ info->comp_status = ZXDH_COMPL_STATUS_RETRY_ACK_ERR; ++ return; ++ } ++ if (info->minor_err == ZXDH_TX_WINDOW_QUERY_ITEM_MINOR_ERR) { ++ info->comp_status = ++ ZXDH_COMPL_STATUS_TX_WINDOW_QUERY_ITEM_ERR; ++ return; ++ } ++ break; ++ case ZXDH_FLUSH_MAJOR_ERR: ++ info->comp_status = ZXDH_COMPL_STATUS_FLUSHED; + return; +- } +- if (info->major_err == ZXDH_RETRY_ACK_MAJOR_ERR && +- info->minor_err == ZXDH_TX_WINDOW_QUERY_ITEM_MINOR_ERR) { +- info->comp_status = ZXDH_COMPL_STATUS_TX_WINDOW_QUERY_ITEM_ERR; ++ default: ++ info->comp_status = ZXDH_COMPL_STATUS_UNKNOWN; + return; + } +- info->comp_status = (info->major_err == ZXDH_FLUSH_MAJOR_ERR) ? +- ZXDH_COMPL_STATUS_FLUSHED : +- ZXDH_COMPL_STATUS_UNKNOWN; + } + + __le64 *get_current_cqe(struct zxdh_cq *cq) +@@ -1837,9 +1844,9 @@ static inline void zxdh_get_cq_poll_info(struct zxdh_qp *qp, + } + } + +-static void update_cq_poll_info(struct zxdh_qp *qp, +- struct zxdh_cq_poll_info *info, __u32 wqe_idx, +- __u64 qword0) ++static enum zxdh_status_code update_cq_poll_info(struct zxdh_qp *qp, ++ struct zxdh_cq_poll_info *info, ++ __u32 wqe_idx, __u64 qword0) + { + info->wr_id = qp->sq_wrtrk_array[wqe_idx].wrid; + if (!info->comp_status) +@@ -1847,6 +1854,7 @@ static void update_cq_poll_info(struct zxdh_qp *qp, + info->op_type = (__u8)FIELD_GET(ZXDHCQ_OP, qword0); + ZXDH_RING_SET_TAIL(qp->sq_ring, + wqe_idx + qp->sq_wrtrk_array[wqe_idx].quanta); ++ return ZXDH_SUCCESS; + } + + static enum zxdh_status_code +@@ -1862,9 +1870,9 @@ process_tx_window_query_item_err(struct zxdh_qp *qp, + ib_qp = &iwuqp->vqp.qp; + ret = zxdh_query_qpc(ib_qp, &qpc); + if (ret) { +- verbs_err(verbs_get_ctx(ib_qp->context), +- "process tx window query item query qpc failed:%d\n", +- ret); ++ zxdh_dbg(ZXDH_DBG_QP, ++ "process tx window query item query qpc failed:%d\n", ++ ret); + return ZXDH_ERR_RETRY_ACK_ERR; + } + if (qpc.tx_last_ack_psn != qp->qp_last_ack_qsn) +@@ -1876,9 +1884,9 @@ process_tx_window_query_item_err(struct zxdh_qp *qp, + + ret = zxdh_reset_qp(ib_qp, ZXDH_RESET_RETRY_TX_ITEM_FLAG); + if (ret) { +- verbs_err(verbs_get_ctx(ib_qp->context), +- "process tx window query item reset qp failed:%d\n", +- ret); ++ zxdh_dbg(ZXDH_DBG_QP, ++ "process tx window query item reset qp failed:%d\n", ++ ret); + return ZXDH_ERR_RETRY_ACK_ERR; + } + qp->qp_reset_cnt++; +@@ -1899,8 +1907,8 @@ process_retry_ack_err(struct zxdh_qp *qp, struct zxdh_cq_poll_info *info) + ib_qp = &iwuqp->vqp.qp; + ret = zxdh_query_qpc(ib_qp, &qpc); + if (ret) { +- verbs_err(verbs_get_ctx(ib_qp->context), +- "process retry ack query qpc failed:%d\n", ret); ++ zxdh_dbg(ZXDH_DBG_QP, "process retry ack query qpc failed:%d\n", ++ ret); + return ZXDH_ERR_RETRY_ACK_ERR; + } + if (!(qpc.retry_cqe_sq_opcode >= ZXDH_RETRY_CQE_SQ_OPCODE_ERR && +@@ -1926,14 +1934,122 @@ process_retry_ack_err(struct zxdh_qp *qp, struct zxdh_cq_poll_info *info) + ZXDH_RETRY_CQE_SQ_OPCODE | + ZXDH_TX_READ_RETRY_FLAG_SET); + if (ret) { +- verbs_err(verbs_get_ctx(ib_qp->context), +- "process retry ack modify qpc failed:%d\n", ret); ++ zxdh_dbg(ZXDH_DBG_QP, ++ "process retry ack modify qpc failed:%d\n", ret); + return ZXDH_ERR_RETRY_ACK_ERR; + } + qp->cqe_retry_cnt++; + return ZXDH_ERR_RETRY_ACK_NOT_EXCEED_ERR; + } + ++static enum zxdh_status_code ++zxdh_flush_sq_comp_info(struct zxdh_qp *qp, struct zxdh_cq_poll_info *info, ++ bool *move_cq_head) ++{ ++ if (!ZXDH_RING_MORE_WORK(qp->sq_ring)) { ++ ZXDH_RING_INIT(qp->sq_ring, qp->sq_ring.size) ++ return ZXDH_ERR_Q_EMPTY; ++ } ++ do { ++ __le64 *sw_wqe; ++ __u64 wqe_qword; ++ __u64 wqe_idx; ++ wqe_idx = qp->sq_ring.tail; ++ sw_wqe = qp->sq_base[wqe_idx].elem; ++ get_64bit_val(sw_wqe, 0, &wqe_qword); ++ info->op_type = (__u8)FIELD_GET(ZXDHQPSQ_OPCODE, wqe_qword); ++ ZXDH_RING_SET_TAIL(qp->sq_ring, ++ wqe_idx + ++ qp->sq_wrtrk_array[wqe_idx].quanta); ++ ++ if (info->op_type != ZXDH_OP_TYPE_NOP) { ++ info->wr_id = qp->sq_wrtrk_array[wqe_idx].wrid; ++ break; ++ } ++ } while (1); ++ qp->sq_flush_seen = true; ++ if (!ZXDH_RING_MORE_WORK(qp->sq_ring)) { ++ qp->sq_flush_complete = true; ++ ZXDH_RING_INIT(qp->sq_ring, qp->sq_ring.size) ++ } else ++ *move_cq_head = false; ++ return ZXDH_SUCCESS; ++} ++ ++static enum zxdh_status_code zxdh_sq_comp_info(struct zxdh_qp *qp, ++ struct zxdh_cq_poll_info *info, ++ __u32 wqe_idx, __u64 qword0, ++ bool *move_cq_head) ++{ ++ enum zxdh_status_code status_code; ++ switch (info->comp_status) { ++ case ZXDH_COMPL_STATUS_SUCCESS: ++ case ZXDH_COMPL_STATUS_UNKNOWN: ++ break; ++ case ZXDH_COMPL_STATUS_RETRY_ACK_ERR: ++ if (qp->qp_type == ZXDH_QP_TYPE_ROCE_RC) { ++ status_code = process_retry_ack_err(qp, info); ++ return (status_code == ZXDH_ERR_RETRY_ACK_ERR) ? ++ update_cq_poll_info(qp, info, wqe_idx, ++ qword0) : ++ status_code; ++ } ++ break; ++ case ZXDH_COMPL_STATUS_TX_WINDOW_QUERY_ITEM_ERR: ++ if (qp->qp_type == ZXDH_QP_TYPE_ROCE_RC) { ++ status_code = ++ process_tx_window_query_item_err(qp, info); ++ return (status_code == ZXDH_ERR_RETRY_ACK_ERR) ? ++ update_cq_poll_info(qp, info, wqe_idx, ++ qword0) : ++ status_code; ++ } ++ break; ++ case ZXDH_COMPL_STATUS_FLUSHED: ++ return zxdh_flush_sq_comp_info(qp, info, move_cq_head); ++ default: ++ break; ++ } ++ return update_cq_poll_info(qp, info, wqe_idx, qword0); ++} ++ ++static enum zxdh_status_code zxdh_rq_comp_info(struct zxdh_qp *qp, ++ struct zxdh_cq_poll_info *info, ++ __u32 wqe_idx, __u64 qword2, ++ __u64 qword3, bool *move_cq_head) ++{ ++ struct zxdh_uqp *iwuqp = NULL; ++ struct zxdh_usrq *iwusrq = NULL; ++ struct zxdh_srq *srq = NULL; ++ if (qp->is_srq) { ++ iwuqp = container_of(qp, struct zxdh_uqp, qp); ++ iwusrq = iwuqp->srq; ++ srq = &iwusrq->srq; ++ zxdh_free_srq_wqe(srq, wqe_idx); ++ info->wr_id = srq->srq_wrid_array[wqe_idx]; ++ zxdh_get_cq_poll_info(qp, info, qword2, qword3); ++ } else { ++ if (unlikely(info->comp_status == ZXDH_COMPL_STATUS_FLUSHED || ++ info->comp_status == ZXDH_COMPL_STATUS_UNKNOWN)) { ++ if (!ZXDH_RING_MORE_WORK(qp->rq_ring)) { ++ return ZXDH_ERR_Q_EMPTY; ++ } ++ wqe_idx = qp->rq_ring.tail; ++ } ++ info->wr_id = qp->rq_wrid_array[wqe_idx]; ++ zxdh_get_cq_poll_info(qp, info, qword2, qword3); ++ ZXDH_RING_SET_TAIL(qp->rq_ring, wqe_idx + 1); ++ if (info->comp_status == ZXDH_COMPL_STATUS_FLUSHED) { ++ qp->rq_flush_seen = true; ++ if (!ZXDH_RING_MORE_WORK(qp->rq_ring)) ++ qp->rq_flush_complete = true; ++ else ++ *move_cq_head = false; ++ } ++ } ++ return ZXDH_SUCCESS; ++} ++ + /** + * zxdh_cq_poll_cmpl - get cq completion info + * @cq: hw cq +@@ -1942,7 +2058,6 @@ process_retry_ack_err(struct zxdh_qp *qp, struct zxdh_cq_poll_info *info) + enum zxdh_status_code zxdh_cq_poll_cmpl(struct zxdh_cq *cq, + struct zxdh_cq_poll_info *info) + { +- enum zxdh_status_code status_code; + __u64 comp_ctx, qword0, qword2, qword3; + __le64 *cqe; + struct zxdh_qp *qp; +@@ -1951,9 +2066,6 @@ enum zxdh_status_code zxdh_cq_poll_cmpl(struct zxdh_cq *cq, + int ret_code; + bool move_cq_head = true; + __u8 polarity; +- struct zxdh_usrq *iwusrq = NULL; +- struct zxdh_srq *srq = NULL; +- struct zxdh_uqp *iwuqp; + + cqe = get_current_cqe(cq); + +@@ -1973,7 +2085,7 @@ enum zxdh_status_code zxdh_cq_poll_cmpl(struct zxdh_cq *cq, + ret_code = ZXDH_ERR_Q_DESTROYED; + goto exit; + } +- iwuqp = container_of(qp, struct zxdh_uqp, qp); ++ + info->qp_handle = (zxdh_qp_handle)(unsigned long)qp; + q_type = (__u8)FIELD_GET(ZXDH_CQ_SQ, qword0); + info->solicited_event = (bool)FIELD_GET(ZXDHCQ_SOEVENT, qword0); +@@ -1993,74 +2105,19 @@ enum zxdh_status_code zxdh_cq_poll_cmpl(struct zxdh_cq *cq, + + info->qp_id = (__u32)FIELD_GET(ZXDHCQ_QPID, qword2); + info->imm_valid = false; +- +- info->qp_handle = (zxdh_qp_handle)(unsigned long)qp; + switch (q_type) { +- case ZXDH_CQE_QTYPE_RQ: +- if (qp->is_srq) { +- iwusrq = iwuqp->srq; +- srq = &iwusrq->srq; +- zxdh_free_srq_wqe(srq, wqe_idx); +- info->wr_id = srq->srq_wrid_array[wqe_idx]; +- zxdh_get_cq_poll_info(qp, info, qword2, qword3); +- } else { +- if (unlikely(info->comp_status == +- ZXDH_COMPL_STATUS_FLUSHED || +- info->comp_status == +- ZXDH_COMPL_STATUS_UNKNOWN)) { +- if (!ZXDH_RING_MORE_WORK(qp->rq_ring)) { +- ret_code = ZXDH_ERR_Q_EMPTY; +- goto exit; +- } +- wqe_idx = qp->rq_ring.tail; +- } +- info->wr_id = qp->rq_wrid_array[wqe_idx]; +- zxdh_get_cq_poll_info(qp, info, qword2, qword3); +- ZXDH_RING_SET_TAIL(qp->rq_ring, wqe_idx + 1); +- if (info->comp_status == ZXDH_COMPL_STATUS_FLUSHED) { +- qp->rq_flush_seen = true; +- if (!ZXDH_RING_MORE_WORK(qp->rq_ring)) +- qp->rq_flush_complete = true; +- else +- move_cq_head = false; +- } +- pring = &qp->rq_ring; +- } +- ret_code = ZXDH_SUCCESS; +- break; + case ZXDH_CQE_QTYPE_SQ: +- if (info->comp_status == ZXDH_COMPL_STATUS_RETRY_ACK_ERR && +- qp->qp_type == ZXDH_QP_TYPE_ROCE_RC) { +- status_code = process_retry_ack_err(qp, info); +- if (status_code == ZXDH_ERR_RETRY_ACK_ERR) { +- update_cq_poll_info(qp, info, wqe_idx, qword0); +- ret_code = ZXDH_SUCCESS; +- } else { +- ret_code = status_code; +- } +- } else if (info->comp_status == +- ZXDH_COMPL_STATUS_TX_WINDOW_QUERY_ITEM_ERR && +- qp->qp_type == ZXDH_QP_TYPE_ROCE_RC) { +- status_code = +- process_tx_window_query_item_err(qp, info); +- if (status_code == ZXDH_ERR_RETRY_ACK_ERR) { +- update_cq_poll_info(qp, info, wqe_idx, qword0); +- ret_code = ZXDH_SUCCESS; +- } else { +- ret_code = status_code; +- } +- } else if (info->comp_status == ZXDH_COMPL_STATUS_FLUSHED) { +- info->wr_id = qp->sq_wrtrk_array[wqe_idx].wrid; +- ZXDH_RING_INIT(qp->sq_ring, qp->sq_ring.size); +- ret_code = ZXDH_SUCCESS; +- } else { +- update_cq_poll_info(qp, info, wqe_idx, qword0); +- ret_code = ZXDH_SUCCESS; +- } ++ ret_code = zxdh_sq_comp_info(qp, info, wqe_idx, qword0, ++ &move_cq_head); ++ pring = &qp->sq_ring; ++ break; ++ case ZXDH_CQE_QTYPE_RQ: ++ ret_code = zxdh_rq_comp_info(qp, info, wqe_idx, qword2, qword3, ++ &move_cq_head); ++ pring = &qp->rq_ring; + break; + default: +- zxdh_dbg(verbs_get_ctx(iwuqp->vqp.qp.context), ZXDH_DBG_CQ, +- "zxdh get cqe type unknow!\n"); ++ zxdh_dbg(ZXDH_DBG_CQ, "zxdh get cqe type unknow!\n"); + ret_code = ZXDH_ERR_Q_DESTROYED; + break; + } +@@ -2538,6 +2595,16 @@ enum zxdh_status_code zxdh_srq_init(struct zxdh_srq *srq, + ZXDH_RING_INIT(srq->srq_list_ring, srq->srq_list_size); + srq->srq_ring.tail = srq->srq_size - 1; + srq->srq_list_polarity = 1; ++ zxdh_dbg(ZXDH_DBG_SRQ, "%s srq_wqe_size_multiplier:%d srqshift:%d\n", ++ __func__, srq->srq_wqe_size_multiplier, srqshift); ++ zxdh_dbg( ++ ZXDH_DBG_SRQ, ++ "%s srq->srq_id:%d srq_base:0x%p srq_list_base:0x%p srq_db_base:0x%p\n", ++ __func__, srq->srq_id, srq->srq_base, srq->srq_list_base, ++ srq->srq_db_base); ++ zxdh_dbg(ZXDH_DBG_SRQ, ++ "%s srq->srq_id:%d srq_ring_size:%d srq->srq_list_size:%d\n", ++ __func__, srq->srq_id, srq_ring_size, srq->srq_list_size); + return 0; + } + +@@ -2558,4 +2625,6 @@ void zxdh_free_srq_wqe(struct zxdh_srq *srq, int wqe_index) + set_64bit_val(wqe, 0, hdr); + + pthread_spin_unlock(&iwusrq->lock); ++ zxdh_dbg(ZXDH_DBG_SRQ, "%s srq->srq_id:%d wqe_index:%d\n", __func__, ++ srq->srq_id, wqe_index); + } +diff --git a/providers/zrdma/zxdh_verbs.c b/providers/zrdma/zxdh_verbs.c +index f67f8c7..9cf1240 100644 +--- a/providers/zrdma/zxdh_verbs.c ++++ b/providers/zrdma/zxdh_verbs.c +@@ -1485,13 +1485,12 @@ static struct ibv_qp *create_qp(struct ibv_context *ibv_ctx, + } + + if (attr->cap.max_inline_data > dev_attrs->max_hw_inline) { +- zxdh_dbg(&iwvctx->ibv_ctx, ZXDH_DBG_QP, +- "max_inline_data over max_hw_inline\n"); ++ zxdh_dbg(ZXDH_DBG_QP, "max_inline_data over max_hw_inline\n"); + attr->cap.max_inline_data = dev_attrs->max_hw_inline; + } + +- zxdh_get_sq_wqe_shift(attr->cap.max_send_sge, attr->cap.max_inline_data, +- &sqshift); ++ zxdh_get_sq_wqe_shift(attr->cap.max_send_sge, ++ attr->cap.max_inline_data, &sqshift); + status = zxdh_get_sqdepth(dev_attrs, attr->cap.max_send_wr, sqshift, + &sqdepth); + if (status) { +@@ -2661,9 +2660,8 @@ static void zxdh_srq_wqe_init(struct zxdh_usrq *iwusrq) + __u64 hdr; + + srq = &iwusrq->srq; +- zxdh_dbg(verbs_get_ctx(iwusrq->ibv_srq.context), ZXDH_DBG_SRQ, +- "%s head:%d tail:%d\n", __func__, srq->srq_ring.head, +- srq->srq_ring.tail); ++ zxdh_dbg(ZXDH_DBG_SRQ, "%s head:%d tail:%d\n", __func__, ++ srq->srq_ring.head, srq->srq_ring.tail); + for (i = srq->srq_ring.head; i < srq->srq_ring.tail; i++) { + wqe = zxdh_get_srq_wqe(srq, i); + +@@ -2707,7 +2705,7 @@ static size_t zxdh_get_total_srq_size(struct zxdh_usrq *iwusrq, int srqdepth, + total_srq_queue_size + total_srq_list_size + total_srq_db_size; + iwusrq->total_buf_size = total_srq_size; + zxdh_dbg( +- verbs_get_ctx(iwusrq->ibv_srq.context), ZXDH_DBG_SRQ, ++ ZXDH_DBG_SRQ, + "%s total_srq_queue_size:%ld total_srq_list_size:%ld total_srq_db_size:%ld srqdepth:%d\n", + __func__, total_srq_queue_size, total_srq_list_size, + total_srq_db_size, srqdepth); +@@ -2730,7 +2728,7 @@ static int zxdh_alloc_srq_buf(struct zxdh_usrq *iwusrq, + (__le64 *)&info->srq_list_base[iwusrq->list_buf_size / + (sizeof(__u16))]; + *(__le64 *)info->srq_db_base = ZXDH_SRQ_DB_INIT_VALUE; +- zxdh_dbg(verbs_get_ctx(iwusrq->ibv_srq.context), ZXDH_DBG_SRQ, ++ zxdh_dbg(ZXDH_DBG_SRQ, + "%s srq_base:0x%p srq_list_base:0x%p srq_db_base:0x%p\n", + __func__, info->srq_base, info->srq_list_base, + info->srq_db_base); +@@ -2782,7 +2780,7 @@ static int create_srq(struct ibv_pd *pd, struct zxdh_usrq *iwusrq, + info->srq_size = resp.actual_srq_size; + info->srq_list_size = resp.actual_srq_list_size; + zxdh_dbg( +- verbs_get_ctx(iwusrq->ibv_srq.context), ZXDH_DBG_SRQ, ++ ZXDH_DBG_SRQ, + "%s info->srq_id:%d info->srq_size:%d info->srq_list_size:%d\n", + __func__, info->srq_id, info->srq_size, info->srq_list_size); + +@@ -2814,19 +2812,21 @@ static int zxdh_vmapped_srq(struct zxdh_usrq *iwusrq, struct ibv_pd *pd, + ret = zxdh_alloc_srq_buf(iwusrq, info, total_srq_size); + if (ret) + return -ENOMEM; +- zxdh_dbg(verbs_get_ctx(iwusrq->ibv_srq.context), ZXDH_DBG_SRQ, +- "%s srq_pages:%ld srq_list_pages:%ld\n", __func__, srq_pages, +- srq_list_pages); ++ zxdh_dbg(ZXDH_DBG_SRQ, "%s srq_pages:%ld srq_list_pages:%ld\n", ++ __func__, srq_pages, srq_list_pages); + + ret = zxdh_reg_srq_mr(pd, info, total_srq_size, srq_pages, + srq_list_pages, iwusrq); ++ zxdh_dbg(ZXDH_DBG_SRQ, "%s %d ret:%d\n", __func__, __LINE__, ret); + if (ret) { + errno = ret; + goto err_dereg_srq_mr; + } + ret = create_srq(pd, iwusrq, attr, info); ++ zxdh_dbg(ZXDH_DBG_SRQ, "%s %d ret:%d\n", __func__, __LINE__, ret); + if (ret) + goto err_srq; ++ + return 0; + err_srq: + ibv_cmd_dereg_mr(&iwusrq->vmr); +@@ -2907,8 +2907,7 @@ struct ibv_srq *zxdh_ucreate_srq(struct ibv_pd *pd, + dev_attrs = &iwvctx->dev_attrs; + + if ((zxdh_check_srq_init_attr(srq_init_attr, dev_attrs)) != 0) { +- verbs_err(&iwvctx->ibv_ctx, +- "zxdh_check_srq_init_attr failed\n"); ++ zxdh_dbg(ZXDH_DBG_SRQ, "zxdh_check_srq_init_attr failed\n"); + errno = EINVAL; + return NULL; + } +@@ -2922,12 +2921,12 @@ struct ibv_srq *zxdh_ucreate_srq(struct ibv_pd *pd, + srq_init_attr->attr.max_wr, srqshift, + &srqdepth); + zxdh_dbg( +- &iwvctx->ibv_ctx, ZXDH_DBG_SRQ, ++ ZXDH_DBG_SRQ, + "%s %d status:%d srqshift:%d srqdepth:%d dev_attrs->max_hw_srq_quanta:%d srq_init_attr->attr.max_wr:%d\n", + __func__, __LINE__, status, srqshift, srqdepth, + dev_attrs->max_hw_srq_quanta, srq_init_attr->attr.max_wr); + if (status != 0) { +- verbs_err(&iwvctx->ibv_ctx, "zxdh_get_srqdepth failed\n"); ++ zxdh_dbg(ZXDH_DBG_SRQ, "zxdh_get_srqdepth failed\n"); + errno = EINVAL; + return NULL; + } +@@ -2940,19 +2939,21 @@ struct ibv_srq *zxdh_ucreate_srq(struct ibv_pd *pd, + + if (zxdh_init_iwusrq(iwusrq, srq_init_attr, srqdepth, srqshift, &info, + dev_attrs)) { +- verbs_err(&iwvctx->ibv_ctx, "calloc srq_wrid_array failed\n"); ++ zxdh_dbg(ZXDH_DBG_SRQ, "calloc srq_wrid_array failed\n"); + goto err_srq_wrid_array; + } + status = zxdh_vmapped_srq(iwusrq, pd, srq_init_attr, srqdepth, &info); ++ zxdh_dbg(ZXDH_DBG_SRQ, "%s %d status:%d\n", __func__, __LINE__, status); + if (status) { +- verbs_err(&iwvctx->ibv_ctx, "zxdh_vmapped_srq failed\n"); ++ zxdh_dbg(ZXDH_DBG_SRQ, "zxdh_vmapped_srq failed\n"); + errno = status; + goto err_vmapped_srq; + } + + status = zxdh_srq_init(&iwusrq->srq, &info); ++ zxdh_dbg(ZXDH_DBG_SRQ, "%s %d status:%d\n", __func__, __LINE__, status); + if (status) { +- verbs_err(&iwvctx->ibv_ctx, "zxdh_srq_init failed\n"); ++ zxdh_dbg(ZXDH_DBG_SRQ, "zxdh_srq_init failed\n"); + errno = EINVAL; + goto err_free_srq_init; + } +@@ -2960,9 +2961,8 @@ struct ibv_srq *zxdh_ucreate_srq(struct ibv_pd *pd, + + srq_init_attr->attr.max_wr = (srqdepth - ZXDH_SRQ_RSVD) >> srqshift; + +- zxdh_dbg(&iwvctx->ibv_ctx, ZXDH_DBG_SRQ, +- "iwusrq->srq_id:%d info.srq_size:%d\n", iwusrq->srq_id, +- info.srq_size); ++ zxdh_dbg(ZXDH_DBG_SRQ, "%s iwusrq->srq_id:%d info.srq_size:%d\n", ++ __func__, iwusrq->srq_id, info.srq_size); + return &iwusrq->ibv_srq; + + err_free_srq_init: +@@ -2976,6 +2976,7 @@ err_srq_wrid_array: + errno = EINVAL; + err_free_srq: + free(iwusrq); ++ zxdh_dbg(ZXDH_DBG_SRQ, "%s %d\n", __func__, __LINE__); + return NULL; + } + +@@ -2996,8 +2997,8 @@ int zxdh_udestroy_srq(struct ibv_srq *srq) + ret = zxdh_destroy_vmapped_srq(iwusrq); + if (ret) + goto err; +- zxdh_dbg(verbs_get_ctx(iwusrq->ibv_srq.context), ZXDH_DBG_SRQ, +- "iwusrq->srq_id:%d\n", iwusrq->srq_id); ++ zxdh_dbg(ZXDH_DBG_SRQ, "%s iwusrq->srq_id:%d\n", __func__, ++ iwusrq->srq_id); + zxdh_free_hw_buf(iwusrq->srq.srq_base, iwusrq->total_buf_size); + free(iwusrq->srq.srq_wrid_array); + free(iwusrq); +@@ -3024,9 +3025,8 @@ int zxdh_umodify_srq(struct ibv_srq *srq, struct ibv_srq_attr *srq_attr, + sizeof(cmd)); + if (ret == 0) + iwusrq->srq_limit = srq_attr->srq_limit; +- zxdh_dbg(verbs_get_ctx(iwusrq->ibv_srq.context), ZXDH_DBG_SRQ, +- "iwusrq->srq_id:%d srq_attr->srq_limit:%d\n", iwusrq->srq_id, +- srq_attr->srq_limit); ++ zxdh_dbg(ZXDH_DBG_SRQ, "%s iwusrq->srq_id:%d srq_attr->srq_limit:%d\n", ++ __func__, iwusrq->srq_id, srq_attr->srq_limit); + return ret; + } + +@@ -3090,6 +3090,13 @@ static void zxdh_fill_srq_wqe(struct zxdh_usrq *iwusrq, struct zxdh_srq *srq, + + udma_to_device_barrier(); /* make sure WQE is populated before valid bit is set */ + set_64bit_val(wqe_64, 0, hdr); ++ ++ zxdh_dbg(ZXDH_DBG_SRQ, "%s wqe_64[0]:0x%llx\n", __func__, wqe_64[0]); ++ zxdh_dbg(ZXDH_DBG_SRQ, "%s wqe_64[1]:0x%llx\n", __func__, wqe_64[1]); ++ zxdh_dbg(ZXDH_DBG_SRQ, "%s wqe_64[2]:0x%llx\n", __func__, wqe_64[2]); ++ zxdh_dbg(ZXDH_DBG_SRQ, "%s wqe_64[3]:0x%llx\n", __func__, wqe_64[3]); ++ zxdh_dbg(ZXDH_DBG_SRQ, "%s wqe_64[4]:0x%llx\n", __func__, wqe_64[4]); ++ zxdh_dbg(ZXDH_DBG_SRQ, "%s wqe_64[5]:0x%llx\n", __func__, wqe_64[5]); + } + + static void zxdh_get_wqe_index(struct zxdh_srq *srq, __le16 *wqe_16, __u16 *buf, +@@ -3112,6 +3119,7 @@ static void zxdh_update_srq_db_base(struct zxdh_usrq *iwusrq, __u16 idx) + + udma_to_device_barrier(); /* make sure WQE is populated before valid bit is set */ + set_64bit_val(iwusrq->srq.srq_db_base, 0, hdr); ++ zxdh_dbg(ZXDH_DBG_SRQ, "%s srq_db_base(hdr):0x%llx\n", __func__, hdr); + } + + /** +@@ -3140,8 +3148,7 @@ int zxdh_upost_srq_recv(struct ibv_srq *srq, struct ibv_recv_wr *recv_wr, + buf_size = iwusrq->max_wr * sizeof(__u16); + buf = malloc(buf_size); + if (buf == NULL) { +- verbs_err(verbs_get_ctx(iwusrq->ibv_srq.context), +- "malloc buf_size failed\n"); ++ zxdh_dbg(ZXDH_DBG_SRQ, "malloc buf_size failed\n"); + err = -ENOMEM; + goto out; + } +@@ -3161,9 +3168,8 @@ int zxdh_upost_srq_recv(struct ibv_srq *srq, struct ibv_recv_wr *recv_wr, + zxdh_fill_srq_wqe(iwusrq, hw_srq, wqe_64, recv_wr); + } + +- zxdh_dbg(verbs_get_ctx(iwusrq->ibv_srq.context), ZXDH_DBG_SRQ, +- "nreq:%d err:%d iwusrq->srq_id:%d\n", nreq, err, +- iwusrq->srq_id); ++ zxdh_dbg(ZXDH_DBG_SRQ, "%s nreq:%d err:%d iwusrq->srq_id:%d\n", ++ __func__, nreq, err, iwusrq->srq_id); + + if (err == 0) { + zxdh_get_wqe_index(hw_srq, wqe_16, buf, nreq, &idx); +-- +2.27.0 + diff --git a/0051-libzrdma-Fix-capability-related-bugs.patch b/0051-libzrdma-Fix-capability-related-bugs.patch new file mode 100644 index 0000000000000000000000000000000000000000..643e186f409bda4028e9181f106ca1a06663bdb6 --- /dev/null +++ b/0051-libzrdma-Fix-capability-related-bugs.patch @@ -0,0 +1,676 @@ +From 2db3c164aea36d297eb3db7c54804037c2754c80 Mon Sep 17 00:00:00 2001 +From: =?UTF-8?q?=E6=9D=8E=E5=AF=8C=E8=89=B3?= +Date: Fri, 28 Mar 2025 15:56:16 +0800 +Subject: [PATCH] libzrdma:Fix capability related bugs +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +Signed-off-by: 李富艳 +--- + providers/zrdma/zxdh_defs.h | 33 +----- + providers/zrdma/zxdh_hw.c | 47 ++++----- + providers/zrdma/zxdh_verbs.c | 196 +++++++++++++++++++++-------------- + providers/zrdma/zxdh_verbs.h | 2 +- + 4 files changed, 141 insertions(+), 137 deletions(-) + +diff --git a/providers/zrdma/zxdh_defs.h b/providers/zrdma/zxdh_defs.h +index 8772e7b..ec0bebe 100644 +--- a/providers/zrdma/zxdh_defs.h ++++ b/providers/zrdma/zxdh_defs.h +@@ -41,7 +41,7 @@ + #define ZXDH_SQ_WQE_BYTESIZE 32 + #define ZXDH_SRQ_WQE_MIN_SIZE 16 + +-#define ZXDH_SQ_RSVD 258 ++#define ZXDH_SQ_RSVD 1 + #define ZXDH_RQ_RSVD 1 + #define ZXDH_SRQ_RSVD 1 + +@@ -252,29 +252,7 @@ + (_retcode) = ZXDH_ERR_RING_FULL; \ + } \ + } +-#define ZXDH_SQ_RING_MOVE_HEAD(_ring, _retcode) \ +- { \ +- register __u32 size; \ +- size = (_ring).size; \ +- if (!ZXDH_SQ_RING_FULL_ERR(_ring)) { \ +- (_ring).head = ((_ring).head + 1) % size; \ +- (_retcode) = 0; \ +- } else { \ +- (_retcode) = ZXDH_ERR_RING_FULL; \ +- } \ +- } +-#define ZXDH_SQ_RING_MOVE_HEAD_BY_COUNT(_ring, _count, _retcode) \ +- { \ +- register __u32 size; \ +- size = (_ring).size; \ +- if ((ZXDH_RING_USED_QUANTA(_ring) + (_count)) < \ +- (size - 256)) { \ +- (_ring).head = ((_ring).head + (_count)) % size; \ +- (_retcode) = 0; \ +- } else { \ +- (_retcode) = ZXDH_ERR_RING_FULL; \ +- } \ +- } ++ + #define ZXDH_RING_MOVE_HEAD_BY_COUNT_NOCHECK(_ring, _count) \ + (_ring).head = ((_ring).head + (_count)) % (_ring).size + +@@ -298,13 +276,6 @@ + #define ZXDH_ERR_RING_FULL3(_ring) \ + ((ZXDH_RING_USED_QUANTA(_ring) == ((_ring).size - 3))) + +-#define ZXDH_SQ_RING_FULL_ERR(_ring) \ +- ((ZXDH_RING_USED_QUANTA(_ring) == ((_ring).size - 257))) +- +-#define ZXDH_ERR_SQ_RING_FULL2(_ring) \ +- ((ZXDH_RING_USED_QUANTA(_ring) == ((_ring).size - 258))) +-#define ZXDH_ERR_SQ_RING_FULL3(_ring) \ +- ((ZXDH_RING_USED_QUANTA(_ring) == ((_ring).size - 259))) + #define ZXDH_RING_MORE_WORK(_ring) ((ZXDH_RING_USED_QUANTA(_ring) != 0)) + + #define ZXDH_RING_USED_QUANTA(_ring) \ +diff --git a/providers/zrdma/zxdh_hw.c b/providers/zrdma/zxdh_hw.c +index fb8f016..0ea5a85 100644 +--- a/providers/zrdma/zxdh_hw.c ++++ b/providers/zrdma/zxdh_hw.c +@@ -10,14 +10,6 @@ + #include + #include + #include +-#include +-#include +-#include +-#include +-#include +-#include +-#include +-#include + #define ERROR_CODE_VALUE 65 + + static inline void qp_tx_psn_add(__u32 *x, __u32 y, __u16 mtu) +@@ -30,11 +22,17 @@ static inline void qp_tx_psn_add(__u32 *x, __u32 y, __u16 mtu) + *x = (*x + chunks) & 0xffffff; + } + +-int zxdh_get_write_imm_split_switch(void) ++/** ++ * zxdh_fragcnt_to_quanta_sq - calculate quanta based on fragment count for SQ ++ * @frag_cnt: number of fragments ++ * @quanta: quanta for frag_cnt ++ */ ++static inline enum zxdh_status_code zxdh_fragcnt_to_quanta_sq(__u32 frag_cnt, __u16 *quanta) + { +- char *env; +- env = getenv("ZXDH_WRITE_IMM_SPILT_ENABLE"); +- return (env != NULL) ? atoi(env) : 0; ++ if (unlikely(frag_cnt > ZXDH_MAX_SQ_FRAG)) ++ return ZXDH_ERR_INVALID_FRAG_COUNT; ++ *quanta = (frag_cnt >> 1) + 1; ++ return 0; + } + + /** +@@ -2153,6 +2151,9 @@ int zxdh_qp_round_up(__u32 wqdepth) + { + int scount = 1; + ++ if (wqdepth == 0) ++ return 0; ++ + for (wqdepth--; scount <= 16; scount *= 2) + wqdepth |= wqdepth >> scount; + +@@ -2167,6 +2168,9 @@ int zxdh_cq_round_up(__u32 wqdepth) + { + int scount = 1; + ++ if (wqdepth == 0) ++ return 0; ++ + for (wqdepth--; scount <= 16; scount *= 2) + wqdepth |= wqdepth >> scount; + +@@ -2364,7 +2368,7 @@ enum zxdh_status_code zxdh_cq_init(struct zxdh_cq *cq, + void zxdh_clean_cq(void *q, struct zxdh_cq *cq) + { + __le64 *cqe; +- __u64 qword3, comp_ctx; ++ __u64 qword0, comp_ctx; + __u32 cq_head; + __u8 polarity, temp; + +@@ -2377,8 +2381,8 @@ void zxdh_clean_cq(void *q, struct zxdh_cq *cq) + .buf; + else + cqe = cq->cq_base[cq_head].buf; +- get_64bit_val(cqe, 24, &qword3); +- polarity = (__u8)FIELD_GET(ZXDH_CQ_VALID, qword3); ++ get_64bit_val(cqe, 0, &qword0); ++ polarity = (__u8)FIELD_GET(ZXDH_CQ_VALID, qword0); + + if (polarity != temp) + break; +@@ -2432,19 +2436,6 @@ enum zxdh_status_code zxdh_nop(struct zxdh_qp *qp, __u64 wr_id, bool signaled, + return 0; + } + +-/** +- * zxdh_fragcnt_to_quanta_sq - calculate quanta based on fragment count for SQ +- * @frag_cnt: number of fragments +- * @quanta: quanta for frag_cnt +- */ +-enum zxdh_status_code zxdh_fragcnt_to_quanta_sq(__u32 frag_cnt, __u16 *quanta) +-{ +- if (frag_cnt > ZXDH_MAX_SQ_FRAG) +- return ZXDH_ERR_INVALID_FRAG_COUNT; +- *quanta = frag_cnt / 2 + 1; +- return 0; +-} +- + /** + * zxdh_fragcnt_to_wqesize_rq - calculate wqe size based on fragment count for RQ + * @frag_cnt: number of fragments +diff --git a/providers/zrdma/zxdh_verbs.c b/providers/zrdma/zxdh_verbs.c +index 9cf1240..39ff401 100644 +--- a/providers/zrdma/zxdh_verbs.c ++++ b/providers/zrdma/zxdh_verbs.c +@@ -59,6 +59,7 @@ static int zxdh_get_inline_data(uint8_t *inline_data, struct ibv_send_wr *ib_wr, + while (num < ib_wr->num_sge) { + *len += ib_wr->sg_list[num].length; + if (*len > ZXDH_MAX_INLINE_DATA_SIZE) { ++ printf("err:inline bytes over max inline length\n"); + return -EINVAL; + } + memcpy(inline_data + offset, +@@ -343,12 +344,8 @@ static void zxdh_free_hw_buf(void *buf, size_t size) + */ + static inline int get_cq_size(int ncqe) + { +- ncqe++; +- +- /* Completions with immediate require 1 extra entry */ + if (ncqe < ZXDH_U_MINCQ_SIZE) + ncqe = ZXDH_U_MINCQ_SIZE; +- + return ncqe; + } + +@@ -380,6 +377,7 @@ static struct ibv_cq_ex *ucreate_cq(struct ibv_context *context, + size_t total_size; + __u32 cq_pages; + int ret, ncqe; ++ __u64 resize_supported; + + iwvctx = container_of(context, struct zxdh_uvcontext, ibv_ctx.context); + dev_attrs = &iwvctx->dev_attrs; +@@ -390,6 +388,13 @@ static struct ibv_cq_ex *ucreate_cq(struct ibv_context *context, + return NULL; + } + ++ info.cq_size = get_cq_size(attr_ex->cqe); ++ info.cq_size = zxdh_cq_round_up(info.cq_size); ++ if (info.cq_size > dev_attrs->max_hw_cq_size) { ++ errno = EINVAL; ++ return NULL; ++ } ++ + /* save the cqe requested by application */ + ncqe = attr_ex->cqe; + iwucq = calloc(1, sizeof(*iwucq)); +@@ -404,14 +409,13 @@ static struct ibv_cq_ex *ucreate_cq(struct ibv_context *context, + } + + iwucq->resize_enable = false; +- info.cq_size = get_cq_size(attr_ex->cqe); +- info.cq_size = zxdh_cq_round_up(info.cq_size); + iwucq->comp_vector = attr_ex->comp_vector; + list_head_init(&iwucq->resize_list); + total_size = get_cq_total_bytes(info.cq_size); + cq_pages = total_size >> ZXDH_HW_PAGE_SHIFT; ++ resize_supported = dev_attrs->feature_flags & ZXDH_FEATURE_CQ_RESIZE; + +- if (!(dev_attrs->feature_flags & ZXDH_FEATURE_CQ_RESIZE)) ++ if (!resize_supported) + total_size = (cq_pages << ZXDH_HW_PAGE_SHIFT) + + ZXDH_DB_SHADOW_AREA_SIZE; + +@@ -436,7 +440,7 @@ static struct ibv_cq_ex *ucreate_cq(struct ibv_context *context, + + iwucq->vmr.ibv_mr.pd = &iwvctx->iwupd->ibv_pd; + +- if (dev_attrs->feature_flags & ZXDH_FEATURE_CQ_RESIZE) { ++ if (resize_supported) { + info.shadow_area = zxdh_alloc_hw_buf(ZXDH_DB_SHADOW_AREA_SIZE); + if (!info.shadow_area) + goto err_dereg_mr; +@@ -457,7 +461,6 @@ static struct ibv_cq_ex *ucreate_cq(struct ibv_context *context, + } + + iwucq->vmr_shadow_area.ibv_mr.pd = &iwvctx->iwupd->ibv_pd; +- + } else { + info.shadow_area = (__le64 *)((__u8 *)info.cq_base + + (cq_pages << ZXDH_HW_PAGE_SHIFT)); +@@ -491,7 +494,9 @@ err_dereg_shadow: + ibv_cmd_dereg_mr(&iwucq->vmr); + if (iwucq->vmr_shadow_area.ibv_mr.handle) { + ibv_cmd_dereg_mr(&iwucq->vmr_shadow_area); +- zxdh_free_hw_buf(info.shadow_area, ZXDH_DB_SHADOW_AREA_SIZE); ++ if (resize_supported) ++ zxdh_free_hw_buf(info.shadow_area, ++ ZXDH_DB_SHADOW_AREA_SIZE); + } + err_dereg_mr: + zxdh_free_hw_buf(info.cq_base, total_size); +@@ -553,7 +558,7 @@ static int zxdh_process_resize_list(struct zxdh_ucq *iwucq, + struct zxdh_cq_buf *cq_buf, *next; + int cq_cnt = 0; + +- list_for_each_safe (&iwucq->resize_list, cq_buf, next, list) { ++ list_for_each_safe(&iwucq->resize_list, cq_buf, next, list) { + if (cq_buf == lcqe_buf) + return cq_cnt; + +@@ -774,7 +779,8 @@ static inline void zxdh_process_cqe(struct ibv_wc *entry, + * + * Returns the internal zxdh device error code or 0 on success + */ +-static int zxdh_poll_one(struct zxdh_cq *cq, struct zxdh_cq_poll_info *cur_cqe, ++static int zxdh_poll_one(struct zxdh_cq *cq, ++ struct zxdh_cq_poll_info *cur_cqe, + struct ibv_wc *entry) + { + int ret = zxdh_cq_poll_cmpl(cq, cur_cqe); +@@ -811,7 +817,7 @@ static int __zxdh_upoll_resize_cq(struct zxdh_ucq *iwucq, int num_entries, + int ret; + + /* go through the list of previously resized CQ buffers */ +- list_for_each_safe (&iwucq->resize_list, cq_buf, next, list) { ++ list_for_each_safe(&iwucq->resize_list, cq_buf, next, list) { + while (npolled < num_entries) { + ret = zxdh_poll_one(&cq_buf->cq, cur_cqe, + entry ? entry + npolled : NULL); +@@ -829,6 +835,7 @@ static int __zxdh_upoll_resize_cq(struct zxdh_ucq *iwucq, int num_entries, + cq_new_cqe = true; + continue; + } ++ printf("__zrdma_upoll_cq resize goto error failed\n"); + goto error; + } + +@@ -856,6 +863,7 @@ static int __zxdh_upoll_resize_cq(struct zxdh_ucq *iwucq, int num_entries, + cq_new_cqe = true; + continue; + } ++ printf("__zrdma_upoll_cq goto error failed\n"); + goto error; + } + if (cq_new_cqe) +@@ -1038,7 +1046,7 @@ static uint64_t zxdh_wc_read_completion_wallclock_ns(struct ibv_cq_ex *ibvcq_ex) + container_of(ibvcq_ex, struct zxdh_ucq, verbs_cq.cq_ex); + + /* RTT is in usec */ +- return iwucq->cur_cqe.tcp_seq_num_rtt * 1000; ++ return (uint64_t)iwucq->cur_cqe.tcp_seq_num_rtt * 1000; + } + + static enum ibv_wc_opcode zxdh_wc_read_opcode(struct ibv_cq_ex *ibvcq_ex) +@@ -1682,6 +1690,37 @@ int zxdh_uquery_qp(struct ibv_qp *qp, struct ibv_qp_attr *attr, int attr_mask, + sizeof(cmd)); + } + ++/** ++ * zxdh_clean_cqes - clean cq entries for qp ++ * @qp: qp for which completions are cleaned ++ * @iwcq: cq to be cleaned ++ */ ++static void zxdh_clean_cqes(struct zxdh_qp *qp, struct zxdh_ucq *iwucq) ++{ ++ struct zxdh_cq *ukcq = &iwucq->cq; ++ int ret; ++ ++ ret = pthread_spin_lock(&iwucq->lock); ++ if (ret) ++ return; ++ ++ zxdh_clean_cq(qp, ukcq); ++ pthread_spin_unlock(&iwucq->lock); ++} ++ ++static void zxdh_init_qp_indices(struct zxdh_qp *qp) ++{ ++ __u32 sq_ring_size; ++ sq_ring_size = ZXDH_RING_SIZE(qp->sq_ring); ++ ZXDH_RING_INIT(qp->sq_ring, sq_ring_size); ++ ZXDH_RING_INIT(qp->initial_ring, sq_ring_size); ++ qp->swqe_polarity = 0; ++ qp->swqe_polarity_deferred = 1; ++ qp->rwqe_polarity = 0; ++ qp->rwqe_signature = 0; ++ ZXDH_RING_INIT(qp->rq_ring, qp->rq_size); ++} ++ + /** + * zxdh_umodify_qp - send qp modify to driver + * @qp: qp to modify +@@ -1705,6 +1744,18 @@ int zxdh_umodify_qp(struct ibv_qp *qp, struct ibv_qp_attr *attr, int attr_mask) + } else { + ret = ibv_cmd_modify_qp(qp, attr, attr_mask, &cmd, sizeof(cmd)); + } ++ ++ if (!ret && ++ (attr_mask & IBV_QP_STATE) && ++ attr->qp_state == IBV_QPS_RESET) { ++ if (iwuqp->send_cq) ++ zxdh_clean_cqes(&iwuqp->qp, iwuqp->send_cq); ++ ++ if (iwuqp->recv_cq && iwuqp->recv_cq != iwuqp->send_cq) ++ zxdh_clean_cqes(&iwuqp->qp, iwuqp->recv_cq); ++ zxdh_init_qp_indices(&iwuqp->qp); ++ } ++ + if (!ret && (attr_mask & IBV_QP_PATH_MTU) && + qp->qp_type == IBV_QPT_RC) { + mtu = mtu_enum_to_int(attr->path_mtu); +@@ -1736,24 +1787,6 @@ static void zxdh_issue_flush(struct ibv_qp *qp, bool sq_flush, bool rq_flush) + sizeof(cmd_ex), &resp, sizeof(resp)); + } + +-/** +- * zxdh_clean_cqes - clean cq entries for qp +- * @qp: qp for which completions are cleaned +- * @iwcq: cq to be cleaned +- */ +-static void zxdh_clean_cqes(struct zxdh_qp *qp, struct zxdh_ucq *iwucq) +-{ +- struct zxdh_cq *cq = &iwucq->cq; +- int ret; +- +- ret = pthread_spin_lock(&iwucq->lock); +- if (ret) +- return; +- +- zxdh_clean_cq(qp, cq); +- pthread_spin_unlock(&iwucq->lock); +-} +- + /** + * zxdh_udestroy_qp - destroy qp + * @qp: qp to destroy +@@ -1851,16 +1884,10 @@ int zxdh_upost_send(struct ibv_qp *ib_qp, struct ibv_send_wr *ib_wr, + struct zxdh_umr *umr = NULL; + __u64 mr_va = 0, mw_va = 0, value_dffer = 0, mw_pa_pble_index = 0; + __u16 mr_offset = 0; +- iwvctx = container_of(ib_qp->context, struct zxdh_uvcontext, +- ibv_ctx.context); +- if (ib_qp->state != IBV_QPS_RTS) { +- *bad_wr = ib_wr; +- verbs_err(&iwvctx->ibv_ctx, "zrdma: post send at state:%d\n", +- ib_qp->state); +- return -EINVAL; +- } + + iwuqp = container_of(ib_qp, struct zxdh_uqp, vqp.qp); ++ iwvctx = container_of(ib_qp->context, struct zxdh_uvcontext, ++ ibv_ctx.context); + dev_attrs = &iwvctx->dev_attrs; + + err = pthread_spin_lock(&iwuqp->lock); +@@ -1918,9 +1945,7 @@ int zxdh_upost_send(struct ibv_qp *ib_qp, struct ibv_send_wr *ib_wr, + iwuqp->inline_data, ib_wr, + &info.op.inline_rdma_send.len); + if (ret) { +- verbs_err( +- &iwvctx->ibv_ctx, +- "zrdma: get inline data fail\n"); ++ printf("err:zxdh_get_inline_data fail\n"); + pthread_spin_unlock(&iwuqp->lock); + return -EINVAL; + } +@@ -1937,11 +1962,11 @@ int zxdh_upost_send(struct ibv_qp *ib_qp, struct ibv_send_wr *ib_wr, + ib_wr->wr.ud.remote_qkey; + info.op.inline_rdma_send.dest_qp = + ib_wr->wr.ud.remote_qpn; +- ret = zxdh_ud_inline_send(&iwuqp->qp, +- &info, false); ++ ret = zxdh_ud_inline_send( ++ &iwuqp->qp, &info, false); + } else { +- ret = zxdh_rc_inline_send(&iwuqp->qp, +- &info, false); ++ ret = zxdh_rc_inline_send( ++ &iwuqp->qp, &info, false); + } + } else { + info.op.send.num_sges = ib_wr->num_sge; +@@ -1960,10 +1985,10 @@ int zxdh_upost_send(struct ibv_qp *ib_qp, struct ibv_send_wr *ib_wr, + info.op.inline_rdma_send.dest_qp = + ib_wr->wr.ud.remote_qpn; + ret = zxdh_ud_send(&iwuqp->qp, &info, +- false); ++ false); + } else { + ret = zxdh_rc_send(&iwuqp->qp, &info, +- false); ++ false); + } + } + if (ret) +@@ -1995,9 +2020,7 @@ int zxdh_upost_send(struct ibv_qp *ib_qp, struct ibv_send_wr *ib_wr, + iwuqp->inline_data, ib_wr, + &info.op.inline_rdma_write.len); + if (ret) { +- verbs_err( +- &iwvctx->ibv_ctx, +- "zrdma: get inline data fail\n"); ++ printf("err:zxdh_get_inline_data fail\n"); + pthread_spin_unlock(&iwuqp->lock); + return -EINVAL; + } +@@ -2007,8 +2030,8 @@ int zxdh_upost_send(struct ibv_qp *ib_qp, struct ibv_send_wr *ib_wr, + ib_wr->wr.rdma.remote_addr; + info.op.inline_rdma_write.rem_addr.stag = + ib_wr->wr.rdma.rkey; +- ret = zxdh_inline_rdma_write(&iwuqp->qp, &info, +- false); ++ ret = zxdh_inline_rdma_write(&iwuqp->qp, ++ &info, false); + } else { + info.op.rdma_write.lo_sg_list = + (void *)ib_wr->sg_list; +@@ -2017,7 +2040,8 @@ int zxdh_upost_send(struct ibv_qp *ib_qp, struct ibv_send_wr *ib_wr, + ib_wr->wr.rdma.remote_addr; + info.op.rdma_write.rem_addr.stag = + ib_wr->wr.rdma.rkey; +- ret = zxdh_rdma_write(&iwuqp->qp, &info, false); ++ ret = zxdh_rdma_write(&iwuqp->qp, &info, ++ false); + } + if (ret) + err = (ret == ZXDH_ERR_QP_TOOMANY_WRS_POSTED) ? +@@ -2036,7 +2060,8 @@ int zxdh_upost_send(struct ibv_qp *ib_qp, struct ibv_send_wr *ib_wr, + + info.op.rdma_read.lo_sg_list = (void *)ib_wr->sg_list; + info.op.rdma_read.num_lo_sges = ib_wr->num_sge; +- ret = zxdh_rdma_read(&iwuqp->qp, &info, false, false); ++ ret = zxdh_rdma_read(&iwuqp->qp, &info, false, ++ false); + if (ret) + err = (ret == ZXDH_ERR_QP_TOOMANY_WRS_POSTED) ? + ENOMEM : +@@ -2383,20 +2408,17 @@ int zxdh_upost_send(struct ibv_qp *ib_qp, struct ibv_send_wr *ib_wr, + + ret = zxdh_mw_bind(&iwuqp->qp, &info, false); + if (ret) +- err = (ret == ZXDH_ERR_QP_TOOMANY_WRS_POSTED) ? +- ENOMEM : +- EINVAL; ++ err = ENOMEM; ++ + break; + case IBV_WR_LOCAL_INV: + info.op_type = ZXDH_OP_TYPE_LOCAL_INV; + info.op.inv_local_stag.target_stag = + ib_wr->invalidate_rkey; + ret = zxdh_stag_local_invalidate(&iwuqp->qp, &info, +- true); ++ true); + if (ret) +- err = (ret == ZXDH_ERR_QP_TOOMANY_WRS_POSTED) ? +- ENOMEM : +- EINVAL; ++ err = ENOMEM; + break; + default: + /* error */ +@@ -2441,6 +2463,7 @@ int zxdh_upost_recv(struct ibv_qp *ib_qp, struct ibv_recv_wr *ib_wr, + + if (unlikely(ib_qp->state == IBV_QPS_RESET || ib_qp->srq)) { + *bad_wr = ib_wr; ++ printf("err:post recv at reset or using srq\n"); + return -EINVAL; + } + +@@ -2490,9 +2513,18 @@ error: + struct ibv_ah *zxdh_ucreate_ah(struct ibv_pd *ibpd, struct ibv_ah_attr *attr) + { + struct zxdh_uah *ah; ++ union ibv_gid sgid; + struct zxdh_ucreate_ah_resp resp; + int err; + ++ memset(&resp, 0, sizeof(resp)); ++ err = ibv_query_gid(ibpd->context, attr->port_num, attr->grh.sgid_index, ++ &sgid); ++ if (err) { ++ errno = err; ++ return NULL; ++ } ++ + ah = calloc(1, sizeof(*ah)); + if (!ah) + return NULL; +@@ -2584,10 +2616,10 @@ int zxdh_uresize_cq(struct ibv_cq *cq, int cqe) + if (!(dev_attrs->feature_flags & ZXDH_FEATURE_CQ_RESIZE)) + return -EOPNOTSUPP; + +- if (cqe > ZXDH_MAX_CQ_SIZE) ++ if (cqe > dev_attrs->max_hw_cq_size) + return -EINVAL; + +- cqe_needed = zxdh_cq_round_up(cqe + 1); ++ cqe_needed = zxdh_cq_round_up(cqe); + + if (cqe_needed < ZXDH_U_MINCQ_SIZE) + cqe_needed = ZXDH_U_MINCQ_SIZE; +@@ -2609,6 +2641,10 @@ int zxdh_uresize_cq(struct ibv_cq *cq, int cqe) + goto err_buf; + } + ++ ret = pthread_spin_lock(&iwucq->lock); ++ if (ret) ++ goto err_lock; ++ + new_mr.ibv_mr.pd = iwucq->vmr.ibv_mr.pd; + reg_mr_cmd.reg_type = ZXDH_MEMREG_TYPE_CQ; + reg_mr_cmd.cq_pages = cq_pages; +@@ -2620,10 +2656,6 @@ int zxdh_uresize_cq(struct ibv_cq *cq, int cqe) + if (ret) + goto err_dereg_mr; + +- ret = pthread_spin_lock(&iwucq->lock); +- if (ret) +- goto err_lock; +- + cmd.user_cq_buffer = (__u64)((uintptr_t)cq_base); + ret = ibv_cmd_resize_cq(&iwucq->verbs_cq.cq, cqe_needed, &cmd.ibv_cmd, + sizeof(cmd), &resp, sizeof(resp)); +@@ -2642,10 +2674,10 @@ int zxdh_uresize_cq(struct ibv_cq *cq, int cqe) + return ret; + + err_resize: +- pthread_spin_unlock(&iwucq->lock); +-err_lock: + ibv_cmd_dereg_mr(&new_mr); + err_dereg_mr: ++ pthread_spin_unlock(&iwucq->lock); ++err_lock: + free(cq_buf); + err_buf: + zxdh_free_hw_buf(cq_base, cq_size); +@@ -2735,7 +2767,8 @@ static int zxdh_alloc_srq_buf(struct zxdh_usrq *iwusrq, + return 0; + } + +-static int zxdh_reg_srq_mr(struct ibv_pd *pd, struct zxdh_srq_init_info *info, ++static int zxdh_reg_srq_mr(struct ibv_pd *pd, ++ struct zxdh_srq_init_info *info, + size_t total_srq_size, uint16_t srq_pages, + uint16_t srq_list_pages, struct zxdh_usrq *iwusrq) + { +@@ -3043,7 +3076,8 @@ int zxdh_uquery_srq(struct ibv_srq *srq, struct ibv_srq_attr *srq_attr) + } + + static int zxdh_check_srq_valid(struct ibv_recv_wr *recv_wr, +- struct zxdh_usrq *iwusrq, struct zxdh_srq *srq) ++ struct zxdh_usrq *iwusrq, ++ struct zxdh_srq *srq) + { + if (unlikely(recv_wr->num_sge > iwusrq->max_sge)) + return -EINVAL; +@@ -3054,8 +3088,9 @@ static int zxdh_check_srq_valid(struct ibv_recv_wr *recv_wr, + return 0; + } + +-static void zxdh_fill_srq_wqe(struct zxdh_usrq *iwusrq, struct zxdh_srq *srq, +- __le64 *wqe_64, struct ibv_recv_wr *recv_wr) ++static void zxdh_fill_srq_wqe(struct zxdh_usrq *iwusrq, ++ struct zxdh_srq *srq, __le64 *wqe_64, ++ struct ibv_recv_wr *recv_wr) + { + __u32 byte_off; + int i; +@@ -3099,8 +3134,8 @@ static void zxdh_fill_srq_wqe(struct zxdh_usrq *iwusrq, struct zxdh_srq *srq, + zxdh_dbg(ZXDH_DBG_SRQ, "%s wqe_64[5]:0x%llx\n", __func__, wqe_64[5]); + } + +-static void zxdh_get_wqe_index(struct zxdh_srq *srq, __le16 *wqe_16, __u16 *buf, +- __u16 nreq, __u16 *idx) ++static void zxdh_get_wqe_index(struct zxdh_srq *srq, __le16 *wqe_16, ++ __u16 *buf, __u16 nreq, __u16 *idx) + { + int i; + +@@ -3207,3 +3242,10 @@ void zxdh_set_debug_mask(void) + if (env) + zxdh_debug_mask = strtol(env, NULL, 0); + } ++ ++int zxdh_get_write_imm_split_switch(void) ++{ ++ char *env; ++ env = getenv("ZXDH_WRITE_IMM_SPILT_ENABLE"); ++ return (env != NULL) ? atoi(env) : 0; ++} +diff --git a/providers/zrdma/zxdh_verbs.h b/providers/zrdma/zxdh_verbs.h +index e3974c1..b72fa74 100644 +--- a/providers/zrdma/zxdh_verbs.h ++++ b/providers/zrdma/zxdh_verbs.h +@@ -661,10 +661,10 @@ __le64 *zxdh_qp_get_next_recv_wqe(struct zxdh_qp *qp, __u32 *wqe_idx); + void zxdh_clean_cq(void *q, struct zxdh_cq *cq); + enum zxdh_status_code zxdh_nop(struct zxdh_qp *qp, __u64 wr_id, bool signaled, + bool post_sq); +-enum zxdh_status_code zxdh_fragcnt_to_quanta_sq(__u32 frag_cnt, __u16 *quanta); + enum zxdh_status_code zxdh_fragcnt_to_wqesize_rq(__u32 frag_cnt, + __u16 *wqe_size); + void zxdh_get_sq_wqe_shift(__u32 sge, __u32 inline_data, __u8 *shift); ++ + void zxdh_get_rq_wqe_shift(__u32 sge, __u8 *shift); + enum zxdh_status_code zxdh_get_sqdepth(struct zxdh_dev_attrs *dev_attrs, + __u32 sq_size, __u8 shift, +-- +2.27.0 + diff --git a/0052-libxscale-Match-dev-by-vid-and-did.patch b/0052-libxscale-Match-dev-by-vid-and-did.patch new file mode 100644 index 0000000000000000000000000000000000000000..4e561c7ac9093b4ed2fc2d2373008b866768ca0a --- /dev/null +++ b/0052-libxscale-Match-dev-by-vid-and-did.patch @@ -0,0 +1,85 @@ +From c51e33b6a7b21ea3cc1230838d95d5428396be3b Mon Sep 17 00:00:00 2001 +From: Xin Tian +Date: Thu, 20 Mar 2025 15:33:54 +0800 +Subject: [PATCH] libxscale: Match dev by vid and did + +Match dev by vid and did. + +Signed-off-by: Xin Tian +--- + providers/xscale/xscale.c | 20 +++++++++++++++----- + providers/xscale/xscale.h | 18 ++++++++++++++++++ + 2 files changed, 33 insertions(+), 5 deletions(-) + +diff --git a/providers/xscale/xscale.c b/providers/xscale/xscale.c +index e6792b9..e24cfd2 100644 +--- a/providers/xscale/xscale.c ++++ b/providers/xscale/xscale.c +@@ -23,10 +23,6 @@ + #include "wqe.h" + #include "xsc_hsi.h" + +-#ifndef PCI_VENDOR_ID_MELLANOX +-#define PCI_VENDOR_ID_MELLANOX 0x15b3 +-#endif +- + #ifndef CPU_OR + #define CPU_OR(x, y, z) do {} while (0) + #endif +@@ -35,9 +31,23 @@ + #define CPU_EQUAL(x, y) 1 + #endif + +-#define HCA(v, d) VERBS_PCI_MATCH(PCI_VENDOR_ID_##v, d, NULL) + static const struct verbs_match_ent hca_table[] = { + VERBS_MODALIAS_MATCH("*xscale*", NULL), ++ VERBS_PCI_MATCH(XSC_PCI_VENDOR_ID, XSC_MC_PF_DEV_ID, NULL), ++ VERBS_PCI_MATCH(XSC_PCI_VENDOR_ID, XSC_MC_VF_DEV_ID, NULL), ++ VERBS_PCI_MATCH(XSC_PCI_VENDOR_ID, XSC_MC_PF_DEV_ID_DIAMOND, NULL), ++ VERBS_PCI_MATCH(XSC_PCI_VENDOR_ID, XSC_MC_PF_DEV_ID_DIAMOND_NEXT, NULL), ++ ++ VERBS_PCI_MATCH(XSC_PCI_VENDOR_ID, XSC_MF_HOST_PF_DEV_ID, NULL), ++ VERBS_PCI_MATCH(XSC_PCI_VENDOR_ID, XSC_MF_HOST_VF_DEV_ID, NULL), ++ VERBS_PCI_MATCH(XSC_PCI_VENDOR_ID, XSC_MF_SOC_PF_DEV_ID, NULL), ++ ++ VERBS_PCI_MATCH(XSC_PCI_VENDOR_ID, XSC_MS_PF_DEV_ID, NULL), ++ VERBS_PCI_MATCH(XSC_PCI_VENDOR_ID, XSC_MS_VF_DEV_ID, NULL), ++ ++ VERBS_PCI_MATCH(XSC_PCI_VENDOR_ID, XSC_MV_HOST_PF_DEV_ID, NULL), ++ VERBS_PCI_MATCH(XSC_PCI_VENDOR_ID, XSC_MV_HOST_VF_DEV_ID, NULL), ++ VERBS_PCI_MATCH(XSC_PCI_VENDOR_ID, XSC_MV_SOC_PF_DEV_ID, NULL), + {} + }; + +diff --git a/providers/xscale/xscale.h b/providers/xscale/xscale.h +index 0aee472..c6cc9f7 100644 +--- a/providers/xscale/xscale.h ++++ b/providers/xscale/xscale.h +@@ -171,6 +171,24 @@ struct xsc_spinlock { + int need_lock; + }; + ++#define XSC_PCI_VENDOR_ID 0x1f67 ++ ++#define XSC_MC_PF_DEV_ID 0x1011 ++#define XSC_MC_VF_DEV_ID 0x1012 ++#define XSC_MC_PF_DEV_ID_DIAMOND 0x1021 ++#define XSC_MC_PF_DEV_ID_DIAMOND_NEXT 0x1023 ++ ++#define XSC_MF_HOST_PF_DEV_ID 0x1051 ++#define XSC_MF_HOST_VF_DEV_ID 0x1052 ++#define XSC_MF_SOC_PF_DEV_ID 0x1053 ++ ++#define XSC_MS_PF_DEV_ID 0x1111 ++#define XSC_MS_VF_DEV_ID 0x1112 ++ ++#define XSC_MV_HOST_PF_DEV_ID 0x1151 ++#define XSC_MV_HOST_VF_DEV_ID 0x1152 ++#define XSC_MV_SOC_PF_DEV_ID 0x1153 ++ + /* PAGE_SHIFT determines the page size */ + + #define PAGE_SHIFT 12 +-- +2.25.1 + diff --git a/0053-libhns-Clean-up-data-type-issues.patch b/0053-libhns-Clean-up-data-type-issues.patch new file mode 100644 index 0000000000000000000000000000000000000000..dfea4d28e29cb4d0728282b50e988c5376ae5b80 --- /dev/null +++ b/0053-libhns-Clean-up-data-type-issues.patch @@ -0,0 +1,152 @@ +From 8f95635c359ca3c36f5b1b48889719b6840c07cc Mon Sep 17 00:00:00 2001 +From: Junxian Huang +Date: Thu, 13 Mar 2025 17:26:50 +0800 +Subject: [PATCH 53/55] libhns: Clean up data type issues + +mainline inclusion +from mainline-v56.0-65 +commit fbe8827f270d0aff4a28bb645b826fa98fe00c9d +category: bugfix +bugzilla: https://gitee.com/src-openeuler/rdma-core/issues/IC1V44 +CVE: NA + +Reference: https://github.com/linux-rdma/rdma-core/pull/1579/commits/fbe8827f270d0aff4a... + +--------------------------------------------------------------------- + +Clean up mixed signed/unsigned type issues. Fix a wrong format +character as well. + +Fixes: cf6d9149f8f5 ("libhns: Introduce hns direct verbs") +Signed-off-by: Junxian Huang +Signed-off-by: Xinghai Cen +--- + providers/hns/hns_roce_u.h | 4 ++-- + providers/hns/hns_roce_u_hw_v2.c | 15 ++++++++------- + providers/hns/hns_roce_u_verbs.c | 6 +++--- + 3 files changed, 13 insertions(+), 12 deletions(-) + +diff --git a/providers/hns/hns_roce_u.h b/providers/hns/hns_roce_u.h +index 5eedb81..e7e3f01 100644 +--- a/providers/hns/hns_roce_u.h ++++ b/providers/hns/hns_roce_u.h +@@ -356,7 +356,7 @@ struct hns_roce_wq { + unsigned long *wrid; + struct hns_roce_spinlock hr_lock; + unsigned int wqe_cnt; +- int max_post; ++ unsigned int max_post; + unsigned int head; + unsigned int tail; + unsigned int max_gs; +@@ -392,7 +392,7 @@ struct hns_roce_qp { + struct verbs_qp verbs_qp; + struct hns_roce_buf buf; + struct hns_roce_dca_buf dca_wqe; +- int max_inline_data; ++ unsigned int max_inline_data; + unsigned int buf_size; + unsigned int sq_signal_bits; + struct hns_roce_wq sq; +diff --git a/providers/hns/hns_roce_u_hw_v2.c b/providers/hns/hns_roce_u_hw_v2.c +index 3137111..cea3043 100644 +--- a/providers/hns/hns_roce_u_hw_v2.c ++++ b/providers/hns/hns_roce_u_hw_v2.c +@@ -173,7 +173,7 @@ static enum ibv_wc_status get_wc_status(uint8_t status) + { HNS_ROCE_V2_CQE_XRC_VIOLATION_ERR, IBV_WC_REM_INV_RD_REQ_ERR }, + }; + +- for (int i = 0; i < ARRAY_SIZE(map); i++) { ++ for (unsigned int i = 0; i < ARRAY_SIZE(map); i++) { + if (status == map[i].cqe_status) + return map[i].wc_status; + } +@@ -1189,7 +1189,7 @@ static int fill_ext_sge_inl_data(struct hns_roce_qp *qp, + unsigned int sge_mask = qp->ex_sge.sge_cnt - 1; + void *dst_addr, *src_addr, *tail_bound_addr; + uint32_t src_len, tail_len; +- int i; ++ uint32_t i; + + if (sge_info->total_len > qp->sq.ext_sge_cnt * HNS_ROCE_SGE_SIZE) + return EINVAL; +@@ -1259,7 +1259,7 @@ static void fill_ud_inn_inl_data(const struct ibv_send_wr *wr, + + static bool check_inl_data_len(struct hns_roce_qp *qp, unsigned int len) + { +- int mtu = mtu_enum_to_int(qp->path_mtu); ++ unsigned int mtu = mtu_enum_to_int(qp->path_mtu); + + return (len <= qp->max_inline_data && len <= mtu); + } +@@ -1698,7 +1698,8 @@ static void fill_recv_sge_to_wqe(struct ibv_recv_wr *wr, void *wqe, + unsigned int max_sge, bool rsv) + { + struct hns_roce_v2_wqe_data_seg *dseg = wqe; +- unsigned int i, cnt; ++ unsigned int cnt; ++ int i; + + for (i = 0, cnt = 0; i < wr->num_sge; i++) { + /* Skip zero-length sge */ +@@ -1726,7 +1727,7 @@ static void fill_recv_inl_buf(struct hns_roce_rinl_buf *rinl_buf, + unsigned int wqe_idx, struct ibv_recv_wr *wr) + { + struct ibv_sge *sge_list; +- unsigned int i; ++ int i; + + if (!rinl_buf->wqe_cnt) + return; +@@ -2053,7 +2054,7 @@ static int check_post_srq_valid(struct hns_roce_srq *srq, + static int get_wqe_idx(struct hns_roce_srq *srq, unsigned int *wqe_idx) + { + struct hns_roce_idx_que *idx_que = &srq->idx_que; +- int bit_num; ++ unsigned int bit_num; + int i; + + /* bitmap[i] is set zero if all bits are allocated */ +@@ -2451,7 +2452,7 @@ static void set_sgl_rc(struct hns_roce_v2_wqe_data_seg *dseg, + unsigned int mask = qp->ex_sge.sge_cnt - 1; + unsigned int msg_len = 0; + unsigned int cnt = 0; +- int i; ++ unsigned int i; + + for (i = 0; i < num_sge; i++) { + if (!sge[i].length) +diff --git a/providers/hns/hns_roce_u_verbs.c b/providers/hns/hns_roce_u_verbs.c +index 848f836..f0098ed 100644 +--- a/providers/hns/hns_roce_u_verbs.c ++++ b/providers/hns/hns_roce_u_verbs.c +@@ -422,7 +422,7 @@ static int verify_cq_create_attr(struct ibv_cq_init_attr_ex *attr, + { + struct hns_roce_pad *pad = to_hr_pad(attr->parent_domain); + +- if (!attr->cqe || attr->cqe > context->max_cqe) { ++ if (!attr->cqe || attr->cqe > (uint32_t)context->max_cqe) { + verbs_err(&context->ibv_ctx, "unsupported cq depth %u.\n", + attr->cqe); + return EINVAL; +@@ -1080,7 +1080,7 @@ static int check_hnsdv_qp_attr(struct hns_roce_context *ctx, + return 0; + + if (!check_comp_mask(hns_attr->comp_mask, HNSDV_QP_SUP_COMP_MASK)) { +- verbs_err(&ctx->ibv_ctx, "invalid hnsdv comp_mask 0x%x.\n", ++ verbs_err(&ctx->ibv_ctx, "invalid hnsdv comp_mask 0x%llx.\n", + hns_attr->comp_mask); + return EINVAL; + } +@@ -1257,7 +1257,7 @@ static int alloc_recv_rinl_buf(uint32_t max_sge, + struct hns_roce_rinl_buf *rinl_buf) + { + unsigned int cnt; +- int i; ++ unsigned int i; + + cnt = rinl_buf->wqe_cnt; + rinl_buf->wqe_list = calloc(cnt, sizeof(struct hns_roce_rinl_wqe)); +-- +2.33.0 + diff --git a/0054-libhns-Fix-wrong-max-inline-data-value.patch b/0054-libhns-Fix-wrong-max-inline-data-value.patch new file mode 100644 index 0000000000000000000000000000000000000000..c911b875deb73b907c44eec2b7732331ccd3a982 --- /dev/null +++ b/0054-libhns-Fix-wrong-max-inline-data-value.patch @@ -0,0 +1,63 @@ +From 10534f0ef2ca73e8e59a38e51969cae864f9fbbf Mon Sep 17 00:00:00 2001 +From: wenglianfa +Date: Thu, 13 Mar 2025 17:26:51 +0800 +Subject: [PATCH 54/55] libhns: Fix wrong max inline data value + +mainline inclusion +from mainline-v56.0-65 +commit 8307b7c54ed81c343ec874e2066de79260b666d2 +category: bugfix +bugzilla: https://gitee.com/src-openeuler/rdma-core/issues/IC1V44 +CVE: NA + +Reference: https://github.com/linux-rdma/rdma-core/pull/1579/commits/8307b7c54ed81c343e... + +--------------------------------------------------------------------- + +When cap.max_inline_data is 0, it will be modified to 1 since +roundup_pow_of_two(0) == 1, which violates users' expectations. +Here fix it. + +Fixes: 2aff0d55098c ("libhns: Fix the problem of sge nums") +Signed-off-by: wenglianfa +Signed-off-by: Junxian Huang +Signed-off-by: Xinghai Cen +--- + providers/hns/hns_roce_u_verbs.c | 14 +++++++++++--- + 1 file changed, 11 insertions(+), 3 deletions(-) + +diff --git a/providers/hns/hns_roce_u_verbs.c b/providers/hns/hns_roce_u_verbs.c +index f0098ed..5fe169e 100644 +--- a/providers/hns/hns_roce_u_verbs.c ++++ b/providers/hns/hns_roce_u_verbs.c +@@ -1494,6 +1494,16 @@ static unsigned int get_sge_num_from_max_inl_data(bool is_ud, + return inline_sge; + } + ++static uint32_t get_max_inline_data(struct hns_roce_context *ctx, ++ struct ibv_qp_cap *cap) ++{ ++ if (cap->max_inline_data) ++ return min_t(uint32_t, roundup_pow_of_two(cap->max_inline_data), ++ ctx->max_inline_data); ++ ++ return 0; ++} ++ + static void set_ext_sge_param(struct hns_roce_context *ctx, + struct ibv_qp_init_attr_ex *attr, + struct hns_roce_qp *qp, unsigned int wr_cnt) +@@ -1510,9 +1520,7 @@ static void set_ext_sge_param(struct hns_roce_context *ctx, + attr->cap.max_send_sge); + + if (ctx->config & HNS_ROCE_RSP_EXSGE_FLAGS) { +- attr->cap.max_inline_data = min_t(uint32_t, roundup_pow_of_two( +- attr->cap.max_inline_data), +- ctx->max_inline_data); ++ attr->cap.max_inline_data = get_max_inline_data(ctx, &attr->cap); + + inline_ext_sge = max(ext_wqe_sge_cnt, + get_sge_num_from_max_inl_data(is_ud, +-- +2.33.0 + diff --git a/0055-libhns-Fix-wrong-order-of-spin-unlock-in-modify-qp.patch b/0055-libhns-Fix-wrong-order-of-spin-unlock-in-modify-qp.patch new file mode 100644 index 0000000000000000000000000000000000000000..3f2a8f613d4f20e80f7d1384836b020e658df568 --- /dev/null +++ b/0055-libhns-Fix-wrong-order-of-spin-unlock-in-modify-qp.patch @@ -0,0 +1,42 @@ +From d1409106e1323c54fbbb0618c071efb024f58130 Mon Sep 17 00:00:00 2001 +From: Junxian Huang +Date: Thu, 13 Mar 2025 17:26:52 +0800 +Subject: [PATCH 55/55] libhns: Fix wrong order of spin unlock in modify qp + +mainline inclusion +from mainline-v56.0-65 +commit d2b41c86c49335b3c6ab638abb1c0e31f5ba0e8f +category: bugfix +bugzilla: https://gitee.com/src-openeuler/rdma-core/issues/IC1V44 +CVE: NA + +Reference: https://github.com/linux-rdma/rdma-core/pull/1579/commits/d2b41c86c49335b3c6... + +--------------------------------------------------------------------- + +The spin_unlock order should be the reverse of spin_lock order. + +Fixes: 179f015e090d ("libhns: Add support for lock-free QP") +Signed-off-by: Junxian Huang +Signed-off-by: Xinghai Cen +--- + providers/hns/hns_roce_u_hw_v2.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/providers/hns/hns_roce_u_hw_v2.c b/providers/hns/hns_roce_u_hw_v2.c +index cea3043..3a1249f 100644 +--- a/providers/hns/hns_roce_u_hw_v2.c ++++ b/providers/hns/hns_roce_u_hw_v2.c +@@ -1910,8 +1910,8 @@ static int hns_roce_u_v2_modify_qp(struct ibv_qp *qp, struct ibv_qp_attr *attr, + if (flag) { + if (!ret) + qp->state = IBV_QPS_ERR; +- hns_roce_spin_unlock(&hr_qp->sq.hr_lock); + hns_roce_spin_unlock(&hr_qp->rq.hr_lock); ++ hns_roce_spin_unlock(&hr_qp->sq.hr_lock); + } + + if (ret) +-- +2.33.0 + diff --git a/0056-libhns-Add-initial-support-for-HNS-LTTng-tracing.patch b/0056-libhns-Add-initial-support-for-HNS-LTTng-tracing.patch new file mode 100644 index 0000000000000000000000000000000000000000..cb06c7b1223eb23952b348f6a07cb8c420a52812 --- /dev/null +++ b/0056-libhns-Add-initial-support-for-HNS-LTTng-tracing.patch @@ -0,0 +1,112 @@ +From dfcef98e85b947dd38738436c769926f66438a7d Mon Sep 17 00:00:00 2001 +From: wenglianfa +Date: Tue, 22 Apr 2025 16:18:44 +0800 +Subject: [PATCH 56/57] libhns: Add initial support for HNS LTTng tracing + +mainline inclusion +from mainline-v56.0-65 +commit 5d96d96c822323a1c9b0a6b98ce58a17a8f165c1 +category: feature +bugzilla: https://gitee.com/src-openeuler/rdma-core/issues/IC3E67 +CVE: NA + +Reference: https://github.com/linux-rdma/rdma-core/pull/1587/commits/5d96d96c822323a1c9... + +--------------------------------------------------------------------- + +Add initial support for HNS LTTng tracing. + +Signed-off-by: wenglianfa +Signed-off-by: Junxian Huang +Signed-off-by: Xinghai Cen +--- + providers/hns/CMakeLists.txt | 10 +++++++++ + providers/hns/hns_roce_u_trace.c | 9 ++++++++ + providers/hns/hns_roce_u_trace.h | 35 ++++++++++++++++++++++++++++++++ + 3 files changed, 54 insertions(+) + create mode 100644 providers/hns/hns_roce_u_trace.c + create mode 100644 providers/hns/hns_roce_u_trace.h + +diff --git a/providers/hns/CMakeLists.txt b/providers/hns/CMakeLists.txt +index 58139ae..36ebfac 100644 +--- a/providers/hns/CMakeLists.txt ++++ b/providers/hns/CMakeLists.txt +@@ -1,5 +1,10 @@ ++if (ENABLE_LTTNG AND LTTNGUST_FOUND) ++ set(TRACE_FILE hns_roce_u_trace.c) ++endif() ++ + rdma_shared_provider(hns libhns.map + 1 1.0.${PACKAGE_VERSION} ++ ${TRACE_FILE} + hns_roce_u.c + hns_roce_u_buf.c + hns_roce_u_db.c +@@ -12,3 +17,8 @@ publish_headers(infiniband + ) + + rdma_pkg_config("hns" "libibverbs" "${CMAKE_THREAD_LIBS_INIT}") ++ ++if (ENABLE_LTTNG AND LTTNGUST_FOUND) ++ target_include_directories(hns PUBLIC ".") ++ target_link_libraries(hns LINK_PRIVATE LTTng::UST) ++endif() +diff --git a/providers/hns/hns_roce_u_trace.c b/providers/hns/hns_roce_u_trace.c +new file mode 100644 +index 0000000..812f54c +--- /dev/null ++++ b/providers/hns/hns_roce_u_trace.c +@@ -0,0 +1,9 @@ ++// SPDX-License-Identifier: GPL-2.0 OR BSD-2-Clause ++/* ++ * Copyright (c) 2025 Hisilicon Limited. ++ */ ++ ++#define LTTNG_UST_TRACEPOINT_CREATE_PROBES ++#define LTTNG_UST_TRACEPOINT_DEFINE ++ ++#include "hns_roce_u_trace.h" +diff --git a/providers/hns/hns_roce_u_trace.h b/providers/hns/hns_roce_u_trace.h +new file mode 100644 +index 0000000..9b9485c +--- /dev/null ++++ b/providers/hns/hns_roce_u_trace.h +@@ -0,0 +1,35 @@ ++/* SPDX-License-Identifier: GPL-2.0 OR BSD-2-Clause */ ++/* ++ * Copyright (c) 2025 Hisilicon Limited. ++ */ ++ ++#if defined(LTTNG_ENABLED) ++ ++#undef LTTNG_UST_TRACEPOINT_PROVIDER ++#define LTTNG_UST_TRACEPOINT_PROVIDER rdma_core_hns ++ ++#undef LTTNG_UST_TRACEPOINT_INCLUDE ++#define LTTNG_UST_TRACEPOINT_INCLUDE "hns_roce_u_trace.h" ++ ++#if !defined(__HNS_TRACE_H__) || defined(LTTNG_UST_TRACEPOINT_HEADER_MULTI_READ) ++#define __HNS_TRACE_H__ ++ ++#include ++#include ++ ++#define rdma_tracepoint(arg...) lttng_ust_tracepoint(arg) ++ ++#endif /* __HNS_TRACE_H__*/ ++ ++#include ++ ++#else ++ ++#ifndef __HNS_TRACE_H__ ++#define __HNS_TRACE_H__ ++ ++#define rdma_tracepoint(arg...) ++ ++#endif /* __HNS_TRACE_H__*/ ++ ++#endif /* defined(LTTNG_ENABLED) */ +-- +2.33.0 + diff --git a/0057-libhns-Add-tracepoint-for-HNS-RoCE-I-O.patch b/0057-libhns-Add-tracepoint-for-HNS-RoCE-I-O.patch new file mode 100644 index 0000000000000000000000000000000000000000..80561aa6db31eb6d64edb97337838d14c25c6c7a --- /dev/null +++ b/0057-libhns-Add-tracepoint-for-HNS-RoCE-I-O.patch @@ -0,0 +1,382 @@ +From feec8deebf58cf6faaf9f70eda49b929eb674f72 Mon Sep 17 00:00:00 2001 +From: wenglianfa +Date: Tue, 22 Apr 2025 16:18:45 +0800 +Subject: [PATCH 57/57] libhns: Add tracepoint for HNS RoCE I/O + +mainline inclusion +from mainline-v56.0-65 +commit 19cb51c73029b593608f0c5d41a4ace8d1f1e334 +category: feature +bugzilla: https://gitee.com/src-openeuler/rdma-core/issues/IC3E67 +CVE: NA + +Reference: https://github.com/linux-rdma/rdma-core/pull/1587/commits/19cb51c73029b59360... + +--------------------------------------------------------------------- + +Add tracepoint for HNS RoCE I/O, including post_send, post_recv and +poll_cq. + +Signed-off-by: wenglianfa +Signed-off-by: Junxian Huang +Signed-off-by: Xinghai Cen +--- + providers/hns/hns_roce_u_hw_v2.c | 153 +++++++++++++++++++++++++++++++ + providers/hns/hns_roce_u_trace.h | 98 ++++++++++++++++++++ + 2 files changed, 251 insertions(+) + +diff --git a/providers/hns/hns_roce_u_hw_v2.c b/providers/hns/hns_roce_u_hw_v2.c +index 3a1249f..b80c574 100644 +--- a/providers/hns/hns_roce_u_hw_v2.c ++++ b/providers/hns/hns_roce_u_hw_v2.c +@@ -38,6 +38,7 @@ + #include "hns_roce_u.h" + #include "hns_roce_u_db.h" + #include "hns_roce_u_hw_v2.h" ++#include "hns_roce_u_trace.h" + + #define HR_IBV_OPC_MAP(ib_key, hr_key) \ + [IBV_WR_ ## ib_key] = HNS_ROCE_WQE_OP_ ## hr_key +@@ -764,6 +765,80 @@ static int parse_cqe_for_cq(struct hns_roce_context *ctx, struct hns_roce_cq *cq + return 0; + } + ++#ifdef LTTNG_ENABLED ++static uint8_t read_wc_sl(struct hns_roce_qp *hr_qp, ++ struct hns_roce_v2_cqe *cqe, ++ struct ibv_wc *wc) ++{ ++ return hr_qp->verbs_qp.qp.qp_type == IBV_QPT_UD && ++ hr_reg_read(cqe, CQE_S_R) == CQE_FOR_RQ ? ++ wc->sl : UINT8_MAX; ++} ++ ++static uint32_t read_wc_rqpn(struct hns_roce_qp *hr_qp, ++ struct hns_roce_v2_cqe *cqe, ++ struct ibv_wc *wc) ++{ ++ return hr_qp->verbs_qp.qp.qp_type == IBV_QPT_UD && ++ hr_reg_read(cqe, CQE_S_R) == CQE_FOR_RQ ? ++ wc->src_qp : UINT32_MAX; ++} ++ ++static uint32_t read_wc_byte_len(struct hns_roce_v2_cqe *cqe, ++ struct ibv_wc *wc) ++{ ++ if (hr_reg_read(cqe, CQE_S_R) == CQE_FOR_RQ) ++ return wc->byte_len; ++ ++ switch (hr_reg_read(cqe, CQE_OPCODE)) { ++ case HNS_ROCE_SQ_OP_RDMA_READ: ++ case HNS_ROCE_SQ_OP_ATOMIC_COMP_AND_SWAP: ++ case HNS_ROCE_SQ_OP_ATOMIC_FETCH_AND_ADD: ++ case HNS_ROCE_SQ_OP_ATOMIC_MASK_COMP_AND_SWAP: ++ case HNS_ROCE_SQ_OP_ATOMIC_MASK_FETCH_AND_ADD: ++ return wc->byte_len; ++ default: ++ return UINT32_MAX; ++ } ++} ++ ++static uint8_t trace_wc_read_sl(struct ibv_cq_ex *cq_ex) ++{ ++ return cq_ex->read_sl ? cq_ex->read_sl(cq_ex) : UINT8_MAX; ++} ++ ++static uint32_t trace_wc_read_qp_num(struct ibv_cq_ex *cq_ex) ++{ ++ return cq_ex->read_qp_num ? ++ cq_ex->read_qp_num(cq_ex) : UINT32_MAX; ++} ++ ++static uint32_t trace_wc_read_src_qp(struct ibv_cq_ex *cq_ex) ++{ ++ return cq_ex->read_src_qp ? ++ cq_ex->read_src_qp(cq_ex) : UINT32_MAX; ++} ++ ++static uint32_t trace_wc_read_byte_len(struct ibv_cq_ex *cq_ex) ++{ ++ return cq_ex->read_byte_len ? ++ cq_ex->read_byte_len(cq_ex) : UINT32_MAX; ++} ++ ++static uint32_t get_send_wr_rqpn(struct ibv_send_wr *wr, ++ uint8_t qp_type) ++{ ++ return qp_type == IBV_QPT_UD ? wr->wr.ud.remote_qpn : UINT32_MAX; ++} ++ ++static uint8_t get_send_wr_tclass(struct ibv_send_wr *wr, ++ uint8_t qp_type) ++{ ++ return qp_type == IBV_QPT_UD ? ++ to_hr_ah(wr->wr.ud.ah)->av.tclass : UINT8_MAX; ++} ++#endif ++ + static int hns_roce_poll_one(struct hns_roce_context *ctx, + struct hns_roce_qp **cur_qp, struct hns_roce_cq *cq, + struct ibv_wc *wc) +@@ -800,8 +875,27 @@ static int hns_roce_poll_one(struct hns_roce_context *ctx, + wc->status = wc_status; + wc->vendor_err = hr_reg_read(cqe, CQE_SUB_STATUS); + wc->qp_num = qpn; ++ ++ rdma_tracepoint(rdma_core_hns, poll_cq, ++ cq->verbs_cq.cq.context->device->name, ++ wc->wr_id, wc_status, wc->opcode, ++ wc->wc_flags, wc->vendor_err, ++ read_wc_sl(*cur_qp, cqe, wc), ++ wc->qp_num, read_wc_rqpn(*cur_qp, cqe, wc), ++ read_wc_byte_len(cqe, wc)); + } else { + cq->verbs_cq.cq_ex.status = wc_status; ++ ++ rdma_tracepoint(rdma_core_hns, poll_cq, ++ cq->verbs_cq.cq.context->device->name, ++ cq->verbs_cq.cq_ex.wr_id, wc_status, ++ ibv_wc_read_opcode(&cq->verbs_cq.cq_ex), ++ ibv_wc_read_wc_flags(&cq->verbs_cq.cq_ex), ++ ibv_wc_read_vendor_err(&cq->verbs_cq.cq_ex), ++ trace_wc_read_sl(&cq->verbs_cq.cq_ex), ++ trace_wc_read_qp_num(&cq->verbs_cq.cq_ex), ++ trace_wc_read_src_qp(&cq->verbs_cq.cq_ex), ++ trace_wc_read_byte_len(&cq->verbs_cq.cq_ex)); + } + + if (status == HNS_ROCE_V2_CQE_SUCCESS || +@@ -1635,6 +1729,14 @@ int hns_roce_u_v2_post_send(struct ibv_qp *ibvqp, struct ibv_send_wr *wr, + *bad_wr = wr; + goto out; + } ++ ++ rdma_tracepoint(rdma_core_hns, post_send, ++ ibvqp->context->device->name, wr->wr_id, ++ sge_info.valid_num, ibvqp->qp_num, ++ get_send_wr_rqpn(wr, ibvqp->qp_type), ++ wr->send_flags, sge_info.total_len, ++ wr->opcode, qp->sl, ++ get_send_wr_tclass(wr, ibvqp->qp_type)); + } + + out: +@@ -1785,6 +1887,10 @@ static int hns_roce_u_v2_post_recv(struct ibv_qp *ibvqp, struct ibv_recv_wr *wr, + wqe_idx = (qp->rq.head + nreq) & (qp->rq.wqe_cnt - 1); + fill_rq_wqe(qp, wr, wqe_idx, max_sge); + qp->rq.wrid[wqe_idx] = wr->wr_id; ++ ++ rdma_tracepoint(rdma_core_hns, post_recv, ++ ibvqp->context->device->name, wr->wr_id, ++ wr->num_sge, ibvqp->qp_num, 0); + } + + out: +@@ -2153,6 +2259,10 @@ static int hns_roce_u_v2_post_srq_recv(struct ibv_srq *ib_srq, + fill_wqe_idx(srq, wqe_idx); + + srq->wrid[wqe_idx] = wr->wr_id; ++ ++ rdma_tracepoint(rdma_core_hns, post_recv, ++ ib_srq->context->device->name, wr->wr_id, ++ wr->num_sge, srq->srqn, 1); + } + + if (nreq) { +@@ -2442,6 +2552,12 @@ static void wr_set_sge_rc(struct ibv_qp_ex *ibv_qp, uint32_t lkey, + wqe->msg_len = htole32(length); + hr_reg_write(wqe, RCWQE_LEN0, length); + hr_reg_write(wqe, RCWQE_SGE_NUM, !!length); ++ ++ rdma_tracepoint(rdma_core_hns, post_send, ++ ibv_qp->qp_base.context->device->name, ibv_qp->wr_id, ++ !!length, ibv_qp->qp_base.qp_num, UINT32_MAX, ++ ibv_qp->wr_flags, length, ++ hr_reg_read(wqe, RCWQE_OPCODE), qp->sl, UINT8_MAX); + } + + static void set_sgl_rc(struct hns_roce_v2_wqe_data_seg *dseg, +@@ -2506,6 +2622,12 @@ static void wr_set_sge_list_rc(struct ibv_qp_ex *ibv_qp, size_t num_sge, + + wqe->msg_len = htole32(qp->sge_info.total_len); + hr_reg_write(wqe, RCWQE_SGE_NUM, qp->sge_info.valid_num); ++ ++ rdma_tracepoint(rdma_core_hns, post_send, ++ ibv_qp->qp_base.context->device->name, ibv_qp->wr_id, ++ qp->sge_info.valid_num, ibv_qp->qp_base.qp_num, ++ UINT32_MAX, ibv_qp->wr_flags, qp->sge_info.total_len, ++ opcode, qp->sl, UINT8_MAX); + } + + static void wr_send_rc(struct ibv_qp_ex *ibv_qp) +@@ -2680,6 +2802,14 @@ static void set_inline_data_list_rc(struct hns_roce_qp *qp, + + hr_reg_write(wqe, RCWQE_SGE_NUM, qp->sge_info.valid_num); + } ++ ++ rdma_tracepoint(rdma_core_hns, post_send, ++ qp->verbs_qp.qp.context->device->name, ++ qp->verbs_qp.qp_ex.wr_id, ++ hr_reg_read(wqe, RCWQE_SGE_NUM), ++ qp->verbs_qp.qp.qp_num, UINT32_MAX, ++ qp->verbs_qp.qp_ex.wr_flags, msg_len, ++ hr_reg_read(wqe, RCWQE_OPCODE), qp->sl, UINT8_MAX); + } + + static void wr_set_inline_data_rc(struct ibv_qp_ex *ibv_qp, void *addr, +@@ -2812,6 +2942,13 @@ static void wr_set_sge_ud(struct ibv_qp_ex *ibv_qp, uint32_t lkey, + dseg->len = htole32(length); + + qp->sge_info.start_idx++; ++ ++ rdma_tracepoint(rdma_core_hns, post_send, ++ ibv_qp->qp_base.context->device->name, ibv_qp->wr_id, ++ 1, ibv_qp->qp_base.qp_num, ++ hr_reg_read(wqe, UDWQE_DQPN), ibv_qp->wr_flags, ++ length, hr_reg_read(wqe, UDWQE_OPCODE), ++ qp->sl, hr_reg_read(wqe, UDWQE_TCLASS)); + } + + static void wr_set_sge_list_ud(struct ibv_qp_ex *ibv_qp, size_t num_sge, +@@ -2850,6 +2987,13 @@ static void wr_set_sge_list_ud(struct ibv_qp_ex *ibv_qp, size_t num_sge, + hr_reg_write(wqe, UDWQE_SGE_NUM, cnt); + + qp->sge_info.start_idx += cnt; ++ ++ rdma_tracepoint(rdma_core_hns, post_send, ++ ibv_qp->qp_base.context->device->name, ibv_qp->wr_id, ++ cnt, ibv_qp->qp_base.qp_num, ++ hr_reg_read(wqe, UDWQE_DQPN), ibv_qp->wr_flags, ++ msg_len, hr_reg_read(wqe, UDWQE_OPCODE), ++ qp->sl, hr_reg_read(wqe, UDWQE_TCLASS)); + } + + static void set_inline_data_list_ud(struct hns_roce_qp *qp, +@@ -2898,6 +3042,15 @@ static void set_inline_data_list_ud(struct hns_roce_qp *qp, + + hr_reg_write(wqe, UDWQE_SGE_NUM, qp->sge_info.valid_num); + } ++ ++ rdma_tracepoint(rdma_core_hns, post_send, ++ qp->verbs_qp.qp.context->device->name, ++ qp->verbs_qp.qp_ex.wr_id, ++ hr_reg_read(wqe, UDWQE_SGE_NUM), ++ qp->verbs_qp.qp.qp_num, hr_reg_read(wqe, UDWQE_DQPN), ++ qp->verbs_qp.qp_ex.wr_flags, msg_len, ++ hr_reg_read(wqe, UDWQE_OPCODE), qp->sl, ++ hr_reg_read(wqe, UDWQE_TCLASS)); + } + + static void wr_set_inline_data_ud(struct ibv_qp_ex *ibv_qp, void *addr, +diff --git a/providers/hns/hns_roce_u_trace.h b/providers/hns/hns_roce_u_trace.h +index 9b9485c..4654985 100644 +--- a/providers/hns/hns_roce_u_trace.h ++++ b/providers/hns/hns_roce_u_trace.h +@@ -17,6 +17,104 @@ + #include + #include + ++LTTNG_UST_TRACEPOINT_EVENT( ++ /* Tracepoint provider name */ ++ rdma_core_hns, ++ ++ /* Tracepoint name */ ++ post_send, ++ ++ /* Input arguments */ ++ LTTNG_UST_TP_ARGS( ++ char *, dev_name, ++ uint64_t, wr_id, ++ int32_t, num_sge, ++ uint32_t, lqpn, ++ uint32_t, rqpn, ++ uint32_t, send_flags, ++ uint32_t, msg_len, ++ uint8_t, opcode, ++ uint8_t, sl, ++ uint8_t, t_class ++ ), ++ ++ /* Output event fields */ ++ LTTNG_UST_TP_FIELDS( ++ lttng_ust_field_string(dev_name, dev_name) ++ lttng_ust_field_integer_hex(uint64_t, wr_id, wr_id) ++ lttng_ust_field_integer_hex(int32_t, num_sge, num_sge) ++ lttng_ust_field_integer_hex(uint32_t, lqpn, lqpn) ++ lttng_ust_field_integer_hex(uint32_t, rqpn, rqpn) ++ lttng_ust_field_integer_hex(uint32_t, send_flags, send_flags) ++ lttng_ust_field_integer_hex(uint32_t, msg_len, msg_len) ++ lttng_ust_field_integer_hex(uint8_t, opcode, opcode) ++ lttng_ust_field_integer_hex(uint8_t, sl, sl) ++ lttng_ust_field_integer_hex(uint8_t, t_class, t_class) ++ ) ++) ++ ++LTTNG_UST_TRACEPOINT_EVENT( ++ /* Tracepoint provider name */ ++ rdma_core_hns, ++ ++ /* Tracepoint name */ ++ post_recv, ++ ++ /* Input arguments */ ++ LTTNG_UST_TP_ARGS( ++ char *, dev_name, ++ uint64_t, wr_id, ++ int32_t, num_sge, ++ uint32_t, rqn, ++ uint8_t, is_srq ++ ), ++ ++ /* Output event fields */ ++ LTTNG_UST_TP_FIELDS( ++ lttng_ust_field_string(dev_name, dev_name) ++ lttng_ust_field_integer_hex(uint64_t, wr_id, wr_id) ++ lttng_ust_field_integer_hex(int32_t, num_sge, num_sge) ++ lttng_ust_field_integer_hex(uint32_t, rqn, rqn) ++ lttng_ust_field_integer_hex(uint8_t, is_srq, is_srq) ++ ) ++) ++ ++LTTNG_UST_TRACEPOINT_EVENT( ++ /* Tracepoint provider name */ ++ rdma_core_hns, ++ ++ /* Tracepoint name */ ++ poll_cq, ++ ++ /* Input arguments */ ++ LTTNG_UST_TP_ARGS( ++ char *, dev_name, ++ uint64_t, wr_id, ++ uint8_t, status, ++ uint8_t, opcode, ++ uint8_t, wc_flags, ++ uint8_t, vendor_err, ++ uint8_t, pktype, ++ uint32_t, lqpn, ++ uint32_t, rqpn, ++ uint32_t, byte_len ++ ), ++ ++ /* Output event fields */ ++ LTTNG_UST_TP_FIELDS( ++ lttng_ust_field_string(dev_name, dev_name) ++ lttng_ust_field_integer_hex(uint64_t, wr_id, wr_id) ++ lttng_ust_field_integer_hex(uint8_t, status, status) ++ lttng_ust_field_integer_hex(uint8_t, opcode, opcode) ++ lttng_ust_field_integer_hex(uint8_t, wc_flags, wc_flags) ++ lttng_ust_field_integer_hex(uint8_t, vendor_err, vendor_err) ++ lttng_ust_field_integer_hex(uint8_t, pktype, pktype) ++ lttng_ust_field_integer_hex(uint32_t, lqpn, lqpn) ++ lttng_ust_field_integer_hex(uint32_t, rqpn, rqpn) ++ lttng_ust_field_integer_hex(uint32_t, byte_len, byte_len) ++ ) ++) ++ + #define rdma_tracepoint(arg...) lttng_ust_tracepoint(arg) + + #endif /* __HNS_TRACE_H__*/ +-- +2.33.0 + diff --git a/0058-libhns-Add-debug-log-for-lock-free-mode.patch b/0058-libhns-Add-debug-log-for-lock-free-mode.patch new file mode 100644 index 0000000000000000000000000000000000000000..98c8e1675c39c545f1afa517145d9f1659f9b69c --- /dev/null +++ b/0058-libhns-Add-debug-log-for-lock-free-mode.patch @@ -0,0 +1,59 @@ +From 40c7b406829bc1250d93af527d70836e02c1fbac Mon Sep 17 00:00:00 2001 +From: Junxian Huang +Date: Thu, 24 Apr 2025 20:32:12 +0800 +Subject: [PATCH 58/62] libhns: Add debug log for lock-free mode + +mainline inclusion +from mainline-v56.0-65 +commit fb96940fcf6f96185d407d57bcaf775ccf8f1762 +category: cheanup +bugzilla: https://gitee.com/src-openeuler/rdma-core/issues/IC3X57 +CVE: NA + +Reference: +https://github.com/linux-rdma/rdma-core/pull/1599/commits/fb96940fcf6f96185d... + +--------------------------------------------------------------------- + +Currently there is no way to observe whether the lock-free mode is +configured from the driver's perspective. Add debug log for this. + +Signed-off-by: Junxian Huang +Signed-off-by: Xinghai Cen +--- + providers/hns/hns_roce_u_verbs.c | 7 ++++++- + 1 file changed, 6 insertions(+), 1 deletion(-) + +diff --git a/providers/hns/hns_roce_u_verbs.c b/providers/hns/hns_roce_u_verbs.c +index 5fe169e..3efc2f4 100644 +--- a/providers/hns/hns_roce_u_verbs.c ++++ b/providers/hns/hns_roce_u_verbs.c +@@ -182,6 +182,7 @@ err: + struct ibv_pd *hns_roce_u_alloc_pad(struct ibv_context *context, + struct ibv_parent_domain_init_attr *attr) + { ++ struct hns_roce_pd *protection_domain; + struct hns_roce_pad *pad; + + if (ibv_check_alloc_parent_domain(attr)) +@@ -198,12 +199,16 @@ struct ibv_pd *hns_roce_u_alloc_pad(struct ibv_context *context, + return NULL; + } + ++ protection_domain = to_hr_pd(attr->pd); + if (attr->td) { + pad->td = to_hr_td(attr->td); + atomic_fetch_add(&pad->td->refcount, 1); ++ verbs_debug(verbs_get_ctx(context), ++ "set PAD(0x%x) to lock-free mode.\n", ++ protection_domain->pdn); + } + +- pad->pd.protection_domain = to_hr_pd(attr->pd); ++ pad->pd.protection_domain = protection_domain; + atomic_fetch_add(&pad->pd.protection_domain->refcount, 1); + + atomic_init(&pad->pd.refcount, 1); +-- +2.25.1 + diff --git a/0059-libhns-Fix-ret-not-assigned-in-create-srq.patch b/0059-libhns-Fix-ret-not-assigned-in-create-srq.patch new file mode 100644 index 0000000000000000000000000000000000000000..10ca441b0a1e606ccb462af7be1c9db0dd3015b6 --- /dev/null +++ b/0059-libhns-Fix-ret-not-assigned-in-create-srq.patch @@ -0,0 +1,58 @@ +From 478e5fd1d8e1a0b04fe6638c163951a0892eab44 Mon Sep 17 00:00:00 2001 +From: Junxian Huang +Date: Wed, 23 Apr 2025 16:55:14 +0800 +Subject: [PATCH 59/62] libhns: Fix ret not assigned in create srq() + +mainline inclusion +from mainline-v56.0-65 +commit 2034b1860c5a8b0cc3879315259462c04e53a98d +category: bugfix +bugzilla: https://gitee.com/src-openeuler/rdma-core/issues/IC3X57 +CVE: NA + +Reference: +https://github.com/linux-rdma/rdma-core/pull/1599/commits/2034b1860c5a8b0cc3... + +--------------------------------------------------------------------- + +Fix the problem that ret may not be assigned in the error flow +of create_srq(). + +Fixes: aa7bcf7f7e44 ("libhns: Add support for lock-free SRQ") +Signed-off-by: Junxian Huang +Signed-off-by: Xinghai Cen +--- + providers/hns/hns_roce_u_verbs.c | 10 +++++++--- + 1 file changed, 7 insertions(+), 3 deletions(-) + +diff --git a/providers/hns/hns_roce_u_verbs.c b/providers/hns/hns_roce_u_verbs.c +index 3efc2f4..e0bafe3 100644 +--- a/providers/hns/hns_roce_u_verbs.c ++++ b/providers/hns/hns_roce_u_verbs.c +@@ -933,16 +933,20 @@ static struct ibv_srq *create_srq(struct ibv_context *context, + if (pad) + atomic_fetch_add(&pad->pd.refcount, 1); + +- if (hns_roce_srq_spinlock_init(context, srq, init_attr)) ++ ret = hns_roce_srq_spinlock_init(context, srq, init_attr); ++ if (ret) + goto err_free_srq; + + set_srq_param(context, srq, init_attr); +- if (alloc_srq_buf(srq)) ++ ret = alloc_srq_buf(srq); ++ if (ret) + goto err_destroy_lock; + + srq->rdb = hns_roce_alloc_db(hr_ctx, HNS_ROCE_SRQ_TYPE_DB); +- if (!srq->rdb) ++ if (!srq->rdb) { ++ ret = ENOMEM; + goto err_srq_buf; ++ } + + ret = exec_srq_create_cmd(context, srq, init_attr); + if (ret) +-- +2.25.1 + diff --git a/0060-libhns-Fix-pad-refcnt-leaking-in-error-flow-of-creat.patch b/0060-libhns-Fix-pad-refcnt-leaking-in-error-flow-of-creat.patch new file mode 100644 index 0000000000000000000000000000000000000000..4f0288ddcfd2c1c468ed2ca79633d38a5d510c1c --- /dev/null +++ b/0060-libhns-Fix-pad-refcnt-leaking-in-error-flow-of-creat.patch @@ -0,0 +1,99 @@ +From e45b9c648476b1b56592a873fa71699cb7f32ffd Mon Sep 17 00:00:00 2001 +From: Junxian Huang +Date: Wed, 23 Apr 2025 16:55:15 +0800 +Subject: [PATCH 60/62] libhns: Fix pad refcnt leaking in error flow of create + qp/cq/srq + +mainline inclusion +from mainline-v56.0-65 +commit f877d6e610e438515e1535c9ec7a3a3ef37c58e0 +category: bugfix +bugzilla: https://gitee.com/src-openeuler/rdma-core/issues/IC3X57 +CVE: NA + +Reference: +https://github.com/linux-rdma/rdma-core/pull/1599/commits/f877d6e610e438515e... + +--------------------------------------------------------------------- + +Decrease pad refcnt by 1 in error flow of create qp/cq/srq. + +Fixes: f8b4f622b1c5 ("libhns: Add support for lock-free QP") +Fixes: 95225025e24c ("libhns: Add support for lock-free CQ") +Fixes: aa7bcf7f7e44 ("libhns: Add support for lock-free SRQ") +Signed-off-by: Junxian Huang +Signed-off-by: Xinghai Cen +--- + providers/hns/hns_roce_u_verbs.c | 20 +++++++++++++------- + 1 file changed, 13 insertions(+), 7 deletions(-) + +diff --git a/providers/hns/hns_roce_u_verbs.c b/providers/hns/hns_roce_u_verbs.c +index e0bafe3..70f516a 100644 +--- a/providers/hns/hns_roce_u_verbs.c ++++ b/providers/hns/hns_roce_u_verbs.c +@@ -445,12 +445,9 @@ static int verify_cq_create_attr(struct ibv_cq_init_attr_ex *attr, + return EOPNOTSUPP; + } + +- if (attr->comp_mask & IBV_CQ_INIT_ATTR_MASK_PD) { +- if (!pad) { +- verbs_err(&context->ibv_ctx, "failed to check the pad of cq.\n"); +- return EINVAL; +- } +- atomic_fetch_add(&pad->pd.refcount, 1); ++ if (attr->comp_mask & IBV_CQ_INIT_ATTR_MASK_PD && !pad) { ++ verbs_err(&context->ibv_ctx, "failed to check the pad of cq.\n"); ++ return EINVAL; + } + + attr->cqe = max_t(uint32_t, HNS_ROCE_MIN_CQE_NUM, +@@ -556,6 +553,7 @@ static void hns_roce_uninit_cq_swc(struct hns_roce_cq *cq) + static struct ibv_cq_ex *create_cq(struct ibv_context *context, + struct ibv_cq_init_attr_ex *attr) + { ++ struct hns_roce_pad *pad = to_hr_pad(attr->parent_domain); + struct hns_roce_context *hr_ctx = to_hr_ctx(context); + struct hns_roce_cq *cq; + int ret; +@@ -570,8 +568,10 @@ static struct ibv_cq_ex *create_cq(struct ibv_context *context, + goto err; + } + +- if (attr->comp_mask & IBV_CQ_INIT_ATTR_MASK_PD) ++ if (attr->comp_mask & IBV_CQ_INIT_ATTR_MASK_PD) { + cq->parent_domain = attr->parent_domain; ++ atomic_fetch_add(&pad->pd.refcount, 1); ++ } + + ret = hns_roce_cq_spinlock_init(context, cq, attr); + if (ret) +@@ -611,6 +611,8 @@ err_db: + err_buf: + hns_roce_spinlock_destroy(&cq->hr_lock); + err_lock: ++ if (attr->comp_mask & IBV_CQ_INIT_ATTR_MASK_PD) ++ atomic_fetch_sub(&pad->pd.refcount, 1); + free(cq); + err: + if (ret < 0) +@@ -977,6 +979,8 @@ err_destroy_lock: + hns_roce_spinlock_destroy(&srq->hr_lock); + + err_free_srq: ++ if (pad) ++ atomic_fetch_sub(&pad->pd.refcount, 1); + free(srq); + + err: +@@ -1872,6 +1876,8 @@ err_cmd: + err_buf: + hns_roce_qp_spinlock_destroy(qp); + err_spinlock: ++ if (pad) ++ atomic_fetch_sub(&pad->pd.refcount, 1); + free(qp); + err: + if (ret < 0) +-- +2.25.1 + diff --git a/0061-libhns-Fix-freeing-pad-without-checking-refcnt.patch b/0061-libhns-Fix-freeing-pad-without-checking-refcnt.patch new file mode 100644 index 0000000000000000000000000000000000000000..2d57b7ab385a9a3f6653ffc721b0a1f993f0753f --- /dev/null +++ b/0061-libhns-Fix-freeing-pad-without-checking-refcnt.patch @@ -0,0 +1,69 @@ +From 59108bf3e452fa7701a3972c78d22352598891be Mon Sep 17 00:00:00 2001 +From: Junxian Huang +Date: Wed, 23 Apr 2025 16:55:16 +0800 +Subject: [PATCH 61/62] libhns: Fix freeing pad without checking refcnt + +mainline inclusion +from mainline-v56.0-65 +commit 234d135276ea8ef83633113e224e0cd735ebeca8 +category: bugfix +bugzilla: https://gitee.com/src-openeuler/rdma-core/issues/IC3X57 +CVE: NA + +Reference: +https://github.com/linux-rdma/rdma-core/pull/1599/commits/234d135276ea8ef836... + +--------------------------------------------------------------------- + +Currently pad refcnt will be added when creating qp/cq/srq, but it is +not checked when freeing pad. Add a check to prevent freeing pad when +it is still used by any qp/cq/srq. + +Fixes: 7b6b3dae328f ("libhns: Add support for thread domain and parent +domain") +Signed-off-by: Junxian Huang +Signed-off-by: Xinghai Cen +--- + providers/hns/hns_roce_u_verbs.c | 12 +++++++----- + 1 file changed, 7 insertions(+), 5 deletions(-) + +diff --git a/providers/hns/hns_roce_u_verbs.c b/providers/hns/hns_roce_u_verbs.c +index 70f516a..edd8e3d 100644 +--- a/providers/hns/hns_roce_u_verbs.c ++++ b/providers/hns/hns_roce_u_verbs.c +@@ -218,14 +218,18 @@ struct ibv_pd *hns_roce_u_alloc_pad(struct ibv_context *context, + return &pad->pd.ibv_pd; + } + +-static void hns_roce_free_pad(struct hns_roce_pad *pad) ++static int hns_roce_free_pad(struct hns_roce_pad *pad) + { ++ if (atomic_load(&pad->pd.refcount) > 1) ++ return EBUSY; ++ + atomic_fetch_sub(&pad->pd.protection_domain->refcount, 1); + + if (pad->td) + atomic_fetch_sub(&pad->td->refcount, 1); + + free(pad); ++ return 0; + } + + static int hns_roce_free_pd(struct hns_roce_pd *pd) +@@ -248,10 +252,8 @@ int hns_roce_u_dealloc_pd(struct ibv_pd *ibv_pd) + struct hns_roce_pad *pad = to_hr_pad(ibv_pd); + struct hns_roce_pd *pd = to_hr_pd(ibv_pd); + +- if (pad) { +- hns_roce_free_pad(pad); +- return 0; +- } ++ if (pad) ++ return hns_roce_free_pad(pad); + + return hns_roce_free_pd(pd); + } +-- +2.25.1 + diff --git a/0062-verbs-Assign-ibv-srq-pd-when-creating-SRQ.patch b/0062-verbs-Assign-ibv-srq-pd-when-creating-SRQ.patch new file mode 100644 index 0000000000000000000000000000000000000000..72eb91e78c9df1e2d9d867acf8cfa53407935baf --- /dev/null +++ b/0062-verbs-Assign-ibv-srq-pd-when-creating-SRQ.patch @@ -0,0 +1,43 @@ +From 387d76c0046b4fb6fbd8d70389b335661d099683 Mon Sep 17 00:00:00 2001 +From: Junxian Huang +Date: Wed, 23 Apr 2025 16:55:17 +0800 +Subject: [PATCH 62/62] verbs: Assign ibv srq->pd when creating SRQ + +mainline inclusion +from mainline-v56.0-65 +commit bf1e427141fde2651bab4860e77a432bb7e26094 +category: bugfix +bugzilla: https://gitee.com/src-openeuler/rdma-core/issues/IC3X57 +CVE: NA + +Reference: +https://github.com/linux-rdma/rdma-core/pull/1599/commits/bf1e427141fde2651b... + +--------------------------------------------------------------------- + +Some providers need to access ibv_srq->pd during SRQ destruction, but +it may not be assigned currently when using ibv_create_srq_ex(). This +may lead to some SRQ-related resource leaks. Assign ibv_srq->pd when +creating SRQ to ensure pd can be obtained correctly. + +Signed-off-by: Junxian Huang +Signed-off-by: Xinghai Cen +--- + libibverbs/cmd_srq.c | 1 + + 1 file changed, 1 insertion(+) + +diff --git a/libibverbs/cmd_srq.c b/libibverbs/cmd_srq.c +index dfaaa6a..259ea0d 100644 +--- a/libibverbs/cmd_srq.c ++++ b/libibverbs/cmd_srq.c +@@ -63,6 +63,7 @@ static int ibv_icmd_create_srq(struct ibv_pd *pd, struct verbs_srq *vsrq, + struct verbs_xrcd *vxrcd = NULL; + enum ibv_srq_type srq_type; + ++ srq->pd = pd; + srq->context = pd->context; + pthread_mutex_init(&srq->mutex, NULL); + pthread_cond_init(&srq->cond, NULL); +-- +2.25.1 + diff --git a/0063-libxscale-update-to-version-2412GA.patch b/0063-libxscale-update-to-version-2412GA.patch new file mode 100644 index 0000000000000000000000000000000000000000..18e9c12c2e3fd30031008a198b84754caadb1229 --- /dev/null +++ b/0063-libxscale-update-to-version-2412GA.patch @@ -0,0 +1,2742 @@ +From 81a2efc28f60ab26398c45236678cc08518b1e41 Mon Sep 17 00:00:00 2001 +From: Xin Tian +Date: Thu, 8 May 2025 12:10:40 +0800 +Subject: [PATCH] libxscale: update to version 2412GA + +new feature: +- support diamond products +- support ibv_wr apis +- support extended CQ poll apis + +bugfix: +- imm data endian error + +Signed-off-by: Xin Tian +--- + providers/xscale/cq.c | 1047 ++++++++++-------------------------- + providers/xscale/qp.c | 516 ++++++++++++++---- + providers/xscale/verbs.c | 175 ++++-- + providers/xscale/xsc_api.h | 4 +- + providers/xscale/xsc_hsi.h | 103 ++-- + providers/xscale/xscale.c | 12 +- + providers/xscale/xscale.h | 37 +- + 7 files changed, 923 insertions(+), 971 deletions(-) + +diff --git a/providers/xscale/cq.c b/providers/xscale/cq.c +index e2619f0..609ce2e 100644 +--- a/providers/xscale/cq.c ++++ b/providers/xscale/cq.c +@@ -13,12 +13,12 @@ + #include + + #include +-#include + #include + + #include "xscale.h" + #include "wqe.h" + #include "xsc_hsi.h" ++#include "xsc_hw.h" + + enum { + CQ_OK = 0, +@@ -68,6 +68,7 @@ static const uint32_t xsc_cqe_opcode[] = { + [XSC_OPCODE_RDMA_REQ_WRITE_IMMDT] = IBV_WC_RDMA_WRITE, + [XSC_OPCODE_RDMA_RSP_WRITE_IMMDT] = IBV_WC_RECV_RDMA_WITH_IMM, + [XSC_OPCODE_RDMA_REQ_READ] = IBV_WC_RDMA_READ, ++ [XSC_OPCODE_RDMA_CQE_RAW_SNF] = IBV_WC_RECV, + }; + + int xsc_stall_num_loop = 60; +@@ -76,16 +77,64 @@ int xsc_stall_cq_poll_max = 100000; + int xsc_stall_cq_inc_step = 100; + int xsc_stall_cq_dec_step = 10; + +-static inline uint8_t xsc_get_cqe_opcode(struct xsc_cqe *cqe) ALWAYS_INLINE; +-static inline uint8_t xsc_get_cqe_opcode(struct xsc_cqe *cqe) ++static void xsc_stall_poll_cq(void) ++{ ++ int i; ++ ++ for (i = 0; i < xsc_stall_num_loop; i++) ++ __asm__ volatile ("nop"); ++} ++ ++static inline int get_qp_ctx(struct xsc_context *xctx, ++ struct xsc_resource **cur_rsc, ++ uint32_t qpn) ++ ALWAYS_INLINE; ++static inline int get_qp_ctx(struct xsc_context *xctx, ++ struct xsc_resource **cur_rsc, ++ uint32_t qpn) ++{ ++ if (!*cur_rsc || (qpn != (*cur_rsc)->rsn)) { ++ /* ++ * We do not have to take the QP table lock here, ++ * because CQs will be locked while QPs are removed ++ * from the table. ++ */ ++ *cur_rsc = (struct xsc_resource *)xsc_find_qp(xctx, qpn); ++ if (unlikely(!*cur_rsc)) ++ return CQ_POLL_ERR; ++ } ++ ++ return CQ_OK; ++} ++ ++static inline uint8_t xsc_get_cqe_opcode(struct xsc_context *ctx, ++ struct xsc_resource **cur_rsc, ++ struct xsc_cqe *cqe) ALWAYS_INLINE; ++static inline uint8_t xsc_get_cqe_opcode(struct xsc_context *ctx, ++ struct xsc_resource **cur_rsc, ++ struct xsc_cqe *cqe) + { +- if (cqe->is_error) ++ uint8_t msg_opcode = xsc_hw_get_cqe_msg_opcode(ctx->device_id, cqe); ++ struct xsc_qp *qp; ++ int err; ++ ++ if (xsc_hw_is_err_cqe(ctx->device_id, cqe)) + return cqe->type ? XSC_OPCODE_RDMA_RSP_ERROR : XSC_OPCODE_RDMA_REQ_ERROR; +- if (cqe->msg_opcode > XSC_MSG_OPCODE_RDMA_READ) { ++ ++ err = get_qp_ctx(ctx, cur_rsc, RD_LE_16(cqe->qp_id)); ++ if (unlikely(err)) ++ goto msg_opcode_err_check; ++ qp = rsc_to_xqp(*cur_rsc); ++ if (qp->flags & XSC_QP_FLAG_RAWPACKET_SNIFFER) ++ return XSC_OPCODE_RDMA_CQE_RAW_SNF; ++ ++msg_opcode_err_check: ++ if (msg_opcode > XSC_MSG_OPCODE_RDMA_READ) { + printf("rdma cqe msg code should be send/write/read\n"); + return XSC_OPCODE_RDMA_CQE_ERROR; + } +- return xsc_msg_opcode[cqe->msg_opcode][cqe->type][cqe->with_immdt]; ++ ++ return xsc_msg_opcode[msg_opcode][cqe->type][cqe->with_immdt]; + } + + static inline uint8_t get_cqe_l3_hdr_type(struct xsc_cqe64 *cqe) +@@ -108,18 +157,11 @@ static void *get_sw_cqe(struct xsc_cq *cq, int n) + return NULL; + } + +-static void *next_cqe_sw(struct xsc_cq *cq) +-{ +- return get_sw_cqe(cq, cq->cons_index); +-} +- + static void update_cons_index(struct xsc_cq *cq) + { +- union xsc_db_data db; ++ struct xsc_context *ctx = to_xctx(ibv_cq_ex_to_cq(&cq->verbs_cq.cq_ex)->context); + +- db.raw_data = cq->cons_index; +- db.cqn = cq->cqn; +- WR_REG(cq->db, db.raw_data); ++ xsc_hw_set_cq_ci(ctx->device_id, cq->db, cq->cqn, cq->cons_index); + } + + static inline void handle_good_req( +@@ -140,6 +182,7 @@ static inline void handle_good_req( + wc->byte_len = ctrl->msg_len; + } + wq->flush_wqe_cnt--; ++ wq->need_flush[idx] = 0; + + xsc_dbg(to_xctx(qp->ibv_qp->context)->dbg_fp, XSC_DBG_CQ_CQE, + "wqeid:%u, wq tail:%u\n", idx, wq->tail); +@@ -182,40 +225,6 @@ static void dump_cqe(void *buf) + printf("0x%08x 0x%08x 0x%08x 0x%08x\n", p[i], p[i+1], p[i+2], p[i+3]); + } + +-static enum ibv_wc_status xsc_cqe_error_code(struct xsc_cqe *cqe) +-{ +- switch (cqe->error_code) { +- case XSC_ERR_CODE_NAK_RETRY: +- return IBV_WC_RETRY_EXC_ERR; +- case XSC_ERR_CODE_NAK_OPCODE: +- return IBV_WC_BAD_RESP_ERR; +- case XSC_ERR_CODE_NAK_MR: +- return IBV_WC_REM_ACCESS_ERR; +- case XSC_ERR_CODE_NAK_OPERATION: +- return IBV_WC_REM_OP_ERR; +- case XSC_ERR_CODE_NAK_RNR: +- return IBV_WC_RNR_RETRY_EXC_ERR; +- case XSC_ERR_CODE_LOCAL_MR: +- return IBV_WC_LOC_PROT_ERR; +- case XSC_ERR_CODE_LOCAL_LEN: +- return IBV_WC_LOC_LEN_ERR; +- case XSC_ERR_CODE_LEN_GEN_CQE: +- return IBV_WC_LOC_LEN_ERR; +- case XSC_ERR_CODE_OPERATION: +- return IBV_WC_LOC_ACCESS_ERR; +- case XSC_ERR_CODE_FLUSH: +- return IBV_WC_WR_FLUSH_ERR; +- case XSC_ERR_CODE_MALF_WQE_HOST: +- case XSC_ERR_CODE_STRG_ACC_GEN_CQE: +- return IBV_WC_FATAL_ERR; +- case XSC_ERR_CODE_OPCODE_GEN_CQE: +- case XSC_ERR_CODE_LOCAL_OPCODE: +- default: +- return IBV_WC_GENERAL_ERR; +- } +-} +- +- + static inline bool xsc_qp_need_cqe(struct xsc_qp *qp, int *type, int *wqe_id) + { + struct xsc_wq *wq; +@@ -248,128 +257,49 @@ static inline void handle_bad_req( + struct ibv_wc *wc, struct xsc_cqe *cqe, struct xsc_qp *qp, struct xsc_wq *wq) + { + int idx; +- wc->status = xsc_cqe_error_code(cqe); +- wc->vendor_err = cqe->error_code; ++ ++ wc->status = xsc_hw_cqe_err_status(xctx->device_id, cqe); ++ wc->vendor_err = xsc_hw_get_cqe_err_code(xctx->device_id, cqe); + idx = RD_LE_16(cqe->wqe_id); + idx >>= (qp->sq.wqe_shift - XSC_BASE_WQE_SHIFT); + idx &= (wq->wqe_cnt -1); + wq->tail = wq->wqe_head[idx] + 1; + wc->wr_id = wq->wrid[idx]; +- wq->flush_wqe_cnt--; +- +- if (cqe->error_code != XSC_ERR_CODE_FLUSH) { ++ if (wq->need_flush[idx]) ++ wq->flush_wqe_cnt--; ++ wq->need_flush[idx] = 0; ++ if (wc->status != IBV_WC_WR_FLUSH_ERR) { + printf("%s: got completion with error:\n", xctx->hostname); + dump_cqe(cqe); + } ++ qp->err_occurred = 1; + } + + static inline void handle_bad_responder( + struct xsc_context *xctx, +- struct ibv_wc *wc, struct xsc_cqe *cqe, struct xsc_wq *wq) ++ struct ibv_wc *wc, struct xsc_cqe *cqe, struct xsc_qp *qp, struct xsc_wq *wq) + { +- wc->status = xsc_cqe_error_code(cqe); +- wc->vendor_err = cqe->error_code; ++ wc->status = xsc_hw_cqe_err_status(xctx->device_id, cqe); ++ wc->vendor_err = xsc_hw_get_cqe_err_code(xctx->device_id, cqe); + + ++wq->tail; + wq->flush_wqe_cnt--; +- +- if (cqe->error_code != XSC_ERR_CODE_FLUSH) { ++ if (wc->status != IBV_WC_WR_FLUSH_ERR) { + printf("%s: got completion with error:\n", xctx->hostname); + dump_cqe(cqe); + } +-} +- +-#if defined(__x86_64__) || defined (__i386__) +-static inline unsigned long get_cycles(void) +-{ +- uint32_t low, high; +- uint64_t val; +- asm volatile ("rdtsc" : "=a" (low), "=d" (high)); +- val = high; +- val = (val << 32) | low; +- return val; +-} +- +-static void xsc_stall_poll_cq(void) +-{ +- int i; +- +- for (i = 0; i < xsc_stall_num_loop; i++) +- (void)get_cycles(); +-} +-static void xsc_stall_cycles_poll_cq(uint64_t cycles) +-{ +- while (get_cycles() < cycles) +- ; /* Nothing */ +-} +-static void xsc_get_cycles(uint64_t *cycles) +-{ +- *cycles = get_cycles(); +-} +-#else +-static void xsc_stall_poll_cq(void) +-{ +-} +-static void xsc_stall_cycles_poll_cq(uint64_t cycles) +-{ +-} +-static void xsc_get_cycles(uint64_t *cycles) +-{ +-} +-#endif +- +-static inline int get_qp_ctx(struct xsc_context *xctx, +- struct xsc_resource **cur_rsc, +- uint32_t qpn) +- ALWAYS_INLINE; +-static inline int get_qp_ctx(struct xsc_context *xctx, +- struct xsc_resource **cur_rsc, +- uint32_t qpn) +-{ +- if (!*cur_rsc || (qpn != (*cur_rsc)->rsn)) { +- /* +- * We do not have to take the QP table lock here, +- * because CQs will be locked while QPs are removed +- * from the table. +- */ +- *cur_rsc = (struct xsc_resource *)xsc_find_qp(xctx, qpn); +- if (unlikely(!*cur_rsc)) +- return CQ_POLL_ERR; +- } +- +- return CQ_OK; +-} +- +-static inline int xsc_get_next_cqe(struct xsc_cq *cq, +- struct xsc_cqe64 **pcqe64, +- void **pcqe) +- ALWAYS_INLINE; +-static inline int xsc_get_next_cqe(struct xsc_cq *cq, +- struct xsc_cqe64 **pcqe64, +- void **pcqe) +-{ +- void *cqe = next_cqe_sw(cq); +- if (!cqe) +- return CQ_EMPTY; +- +- ++cq->cons_index; +- +- /* +- * Make sure we read CQ entry contents after we've checked the +- * ownership bit. +- */ +- udma_from_device_barrier(); +- +- *pcqe = cqe; +- +- return CQ_OK; ++ qp->err_occurred = 1; + } + + static inline int xsc_parse_cqe(struct xsc_cq *cq, +- struct xsc_cqe *cqe, +- struct xsc_resource **cur_rsc, +- struct ibv_wc *wc, +- int lazy) ++ struct xsc_cqe *cqe, ++ struct xsc_resource **cur_rsc, ++ struct ibv_wc *wc) ++ ALWAYS_INLINE; ++static inline int xsc_parse_cqe(struct xsc_cq *cq, ++ struct xsc_cqe *cqe, ++ struct xsc_resource **cur_rsc, ++ struct ibv_wc *wc) + { + struct xsc_wq *wq; + uint32_t qp_id; +@@ -378,12 +308,14 @@ static inline int xsc_parse_cqe(struct xsc_cq *cq, + struct xsc_qp *xqp = NULL; + struct xsc_context *xctx; + ++ memset(wc, 0, sizeof(*wc)); ++ wc->wc_flags = 0; ++ + xctx = to_xctx(ibv_cq_ex_to_cq(&cq->verbs_cq.cq_ex)->context); + qp_id = cqe->qp_id; + qp_id = RD_LE_16(qp_id); +- wc->wc_flags = 0; + wc->qp_num = qp_id; +- opcode = xsc_get_cqe_opcode(cqe); ++ opcode = xsc_get_cqe_opcode(xctx, cur_rsc, cqe); + + xsc_dbg(xctx->dbg_fp, XSC_DBG_CQ_CQE, "opcode:0x%x qp_num:%u\n", opcode, qp_id); + switch (opcode) { +@@ -404,8 +336,9 @@ static inline int xsc_parse_cqe(struct xsc_cq *cq, + case XSC_OPCODE_RDMA_RSP_RECV_IMMDT: + case XSC_OPCODE_RDMA_RSP_WRITE_IMMDT: + wc->wc_flags |= IBV_WC_WITH_IMM; +- wc->imm_data = cqe->imm_data; ++ WR_BE_32(wc->imm_data, RD_LE_32(cqe->imm_data)); + SWITCH_FALLTHROUGH; ++ case XSC_OPCODE_RDMA_CQE_RAW_SNF: + case XSC_OPCODE_RDMA_RSP_RECV: + err = get_qp_ctx(xctx, cur_rsc, qp_id); + if (unlikely(err)) +@@ -428,7 +361,7 @@ static inline int xsc_parse_cqe(struct xsc_cq *cq, + return CQ_POLL_ERR; + xqp = rsc_to_xqp(*cur_rsc); + wq = &xqp->rq; +- handle_bad_responder(xctx, wc, cqe, wq); ++ handle_bad_responder(xctx, wc, cqe, xqp, wq); + break; + case XSC_OPCODE_RDMA_CQE_ERROR: + printf("%s: got completion with cqe format error:\n", xctx->hostname); +@@ -440,30 +373,121 @@ static inline int xsc_parse_cqe(struct xsc_cq *cq, + return CQ_OK; + } + +-static inline int xsc_parse_lazy_cqe(struct xsc_cq *cq, +- struct xsc_cqe64 *cqe64, +- void *cqe, int cqe_ver) +- ALWAYS_INLINE; +-static inline int xsc_parse_lazy_cqe(struct xsc_cq *cq, +- struct xsc_cqe64 *cqe64, +- void *cqe, int cqe_ver) ++static inline int xsc_parse_cqe_lazy(struct xsc_cq *cq, struct xsc_cqe *cqe) ALWAYS_INLINE; ++static inline int xsc_parse_cqe_lazy(struct xsc_cq *cq, struct xsc_cqe *cqe) + { +- return xsc_parse_cqe(cq, cqe, &cq->cur_rsc, NULL, 1); ++ struct xsc_resource *cur_rsc = NULL; ++ struct xsc_qp *xqp = NULL; ++ struct xsc_context *xctx; ++ struct xsc_wq *wq; ++ uint32_t qp_id; ++ uint8_t opcode; ++ int err = 0; ++ int idx; ++ ++ cq->cqe = cqe; ++ xctx = to_xctx(ibv_cq_ex_to_cq(&cq->verbs_cq.cq_ex)->context); ++ qp_id = cqe->qp_id; ++ qp_id = RD_LE_16(qp_id); ++ opcode = xsc_get_cqe_opcode(xctx, &cur_rsc, cqe); ++ ++ xsc_dbg(xctx->dbg_fp, XSC_DBG_CQ_CQE, "opcode:0x%x qp_num:%u\n", opcode, qp_id); ++ switch (opcode) { ++ case XSC_OPCODE_RDMA_REQ_SEND_IMMDT: ++ case XSC_OPCODE_RDMA_REQ_WRITE_IMMDT: ++ case XSC_OPCODE_RDMA_REQ_SEND: ++ case XSC_OPCODE_RDMA_REQ_WRITE: ++ case XSC_OPCODE_RDMA_REQ_READ: ++ cq->verbs_cq.cq_ex.status = IBV_WC_SUCCESS; ++ err = get_qp_ctx(xctx, &cur_rsc, qp_id); ++ if (unlikely(err)) ++ return CQ_EMPTY; ++ xqp = rsc_to_xqp(cur_rsc); ++ wq = &xqp->sq; ++ idx = RD_LE_16(cqe->wqe_id); ++ idx >>= (wq->wqe_shift - XSC_BASE_WQE_SHIFT); ++ idx &= (wq->wqe_cnt - 1); ++ cq->verbs_cq.cq_ex.wr_id = wq->wrid[idx]; ++ wq->tail = wq->wqe_head[idx] + 1; ++ wq->flush_wqe_cnt--; ++ wq->need_flush[idx] = 0; ++ break; ++ case XSC_OPCODE_RDMA_RSP_RECV_IMMDT: ++ case XSC_OPCODE_RDMA_RSP_WRITE_IMMDT: ++ case XSC_OPCODE_RDMA_RSP_RECV: ++ cq->verbs_cq.cq_ex.status = IBV_WC_SUCCESS; ++ err = get_qp_ctx(xctx, &cur_rsc, qp_id); ++ if (unlikely(err)) ++ return CQ_EMPTY; ++ xqp = rsc_to_xqp(cur_rsc); ++ wq = &xqp->rq; ++ idx = wq->tail & (wq->wqe_cnt - 1); ++ cq->verbs_cq.cq_ex.wr_id = wq->wrid[idx]; ++ ++wq->tail; ++ wq->flush_wqe_cnt--; ++ break; ++ case XSC_OPCODE_RDMA_REQ_ERROR: ++ cq->verbs_cq.cq_ex.status = xsc_hw_cqe_err_status(xctx->device_id, cqe); ++ err = get_qp_ctx(xctx, &cur_rsc, qp_id); ++ if (unlikely(err)) ++ return CQ_POLL_ERR; ++ xqp = rsc_to_xqp(cur_rsc); ++ wq = &xqp->sq; ++ idx = RD_LE_16(cqe->wqe_id); ++ idx >>= (wq->wqe_shift - XSC_BASE_WQE_SHIFT); ++ idx &= (wq->wqe_cnt - 1); ++ wq->tail = wq->wqe_head[idx] + 1; ++ cq->verbs_cq.cq_ex.wr_id = wq->wrid[idx]; ++ if (wq->need_flush[idx]) ++ wq->flush_wqe_cnt--; ++ wq->need_flush[idx] = 0; ++ if (cq->verbs_cq.cq_ex.status != IBV_WC_WR_FLUSH_ERR) { ++ printf("%s: got completion with error:\n", xctx->hostname); ++ dump_cqe(cqe); ++ } ++ xqp->ibv_qp->state = IBV_QPS_ERR; ++ break; ++ case XSC_OPCODE_RDMA_RSP_ERROR: ++ cq->verbs_cq.cq_ex.status = xsc_hw_cqe_err_status(xctx->device_id, cqe); ++ err = get_qp_ctx(xctx, &cur_rsc, qp_id); ++ if (unlikely(err)) ++ return CQ_POLL_ERR; ++ xqp = rsc_to_xqp(cur_rsc); ++ wq = &xqp->rq; ++ ++ ++wq->tail; ++ wq->flush_wqe_cnt--; ++ if (cq->verbs_cq.cq_ex.status != IBV_WC_WR_FLUSH_ERR) { ++ printf("%s: got completion with error:\n", xctx->hostname); ++ dump_cqe(cqe); ++ } ++ xqp->ibv_qp->state = IBV_QPS_ERR; ++ break; ++ case XSC_OPCODE_RDMA_CQE_ERROR: ++ printf("%s: got completion with cqe format error:\n", xctx->hostname); ++ dump_cqe(cqe); ++ SWITCH_FALLTHROUGH; ++ default: ++ return CQ_POLL_ERR; ++ } ++ return CQ_OK; + } + + static inline int xsc_poll_one(struct xsc_cq *cq, + struct xsc_resource **cur_rsc, +- struct ibv_wc *wc) ++ struct ibv_wc *wc, ++ int lazy) + ALWAYS_INLINE; + static inline int xsc_poll_one(struct xsc_cq *cq, + struct xsc_resource **cur_rsc, +- struct ibv_wc *wc) ++ struct ibv_wc *wc, ++ int lazy) + { + struct xsc_cqe *cqe = get_sw_cqe(cq, cq->cons_index); +- if (cqe == NULL) { ++ int err = 0; ++ ++ if (!cqe) + return CQ_EMPTY; +- } +- memset(wc, 0, sizeof(*wc)); + + ++cq->cons_index; + +@@ -472,7 +496,12 @@ static inline int xsc_poll_one(struct xsc_cq *cq, + * ownership bit. + */ + udma_from_device_barrier(); +- return xsc_parse_cqe(cq, cqe, cur_rsc, wc, 0); ++ if (!lazy) ++ err = xsc_parse_cqe(cq, cqe, cur_rsc, wc); ++ else ++ err = xsc_parse_cqe_lazy(cq, cqe); ++ ++ return err; + } + + static inline void gen_flush_err_cqe(struct xsc_err_state_qp_node *err_node, +@@ -500,10 +529,12 @@ static inline void gen_flush_err_cqe(struct xsc_err_state_qp_node *err_node, + + wc->qp_num = qp_id; + wc->status = IBV_WC_WR_FLUSH_ERR; +- wc->vendor_err = XSC_ERR_CODE_FLUSH; ++ wc->vendor_err = XSC_ANDES_ERR_CODE_FLUSH; + wc->wr_id = wq->wrid[idx]; + wq->tail++; + wq->flush_wqe_cnt--; ++ if (err_node->is_sq) ++ wq->need_flush[idx] = 0; + } + + static inline int xsc_generate_flush_err_cqe(struct ibv_cq *ibcq, +@@ -578,9 +609,14 @@ static inline int poll_cq(struct ibv_cq *ibcq, int ne, struct ibv_wc *wc) + int err = CQ_OK; + uint32_t next_cid = cq->cons_index; + ++ if (cq->stall_enable && cq->stall_next_poll) { ++ cq->stall_next_poll = 0; ++ xsc_stall_poll_cq(); ++ } ++ + xsc_spin_lock(&cq->lock); + for (npolled = 0; npolled < ne; ++npolled) { +- err = xsc_poll_one(cq, &rsc, wc + npolled); ++ err = xsc_poll_one(cq, &rsc, wc + npolled, 0); + if (err != CQ_OK) + break; + } +@@ -596,677 +632,148 @@ static inline int poll_cq(struct ibv_cq *ibcq, int ne, struct ibv_wc *wc) + update_cons_index(cq); + xsc_spin_unlock(&cq->lock); + +- return err == CQ_POLL_ERR ? err : npolled; +-} +- +-enum polling_mode { +- POLLING_MODE_NO_STALL, +- POLLING_MODE_STALL, +- POLLING_MODE_STALL_ADAPTIVE +-}; ++ if (cq->stall_enable && err == CQ_EMPTY) ++ cq->stall_next_poll = 1; + +-static inline void _xsc_end_poll(struct ibv_cq_ex *ibcq, +- int lock, enum polling_mode stall) +- ALWAYS_INLINE; +-static inline void _xsc_end_poll(struct ibv_cq_ex *ibcq, +- int lock, enum polling_mode stall) +-{ +- struct xsc_cq *cq = to_xcq(ibv_cq_ex_to_cq(ibcq)); +- +- update_cons_index(cq); +- +- if (lock) +- xsc_spin_unlock(&cq->lock); +- +- if (stall) { +- if (stall == POLLING_MODE_STALL_ADAPTIVE) { +- if (!(cq->flags & XSC_CQ_FLAGS_FOUND_CQES)) { +- cq->stall_cycles = max(cq->stall_cycles - xsc_stall_cq_dec_step, +- xsc_stall_cq_poll_min); +- xsc_get_cycles(&cq->stall_last_count); +- } else if (cq->flags & XSC_CQ_FLAGS_EMPTY_DURING_POLL) { +- cq->stall_cycles = min(cq->stall_cycles + xsc_stall_cq_inc_step, +- xsc_stall_cq_poll_max); +- xsc_get_cycles(&cq->stall_last_count); +- } else { +- cq->stall_cycles = max(cq->stall_cycles - xsc_stall_cq_dec_step, +- xsc_stall_cq_poll_min); +- cq->stall_last_count = 0; +- } +- } else if (!(cq->flags & XSC_CQ_FLAGS_FOUND_CQES)) { +- cq->stall_next_poll = 1; +- } +- +- cq->flags &= ~(XSC_CQ_FLAGS_FOUND_CQES | XSC_CQ_FLAGS_EMPTY_DURING_POLL); +- } ++ return err == CQ_POLL_ERR ? err : npolled; + } + +-static inline int xsc_start_poll(struct ibv_cq_ex *ibcq, struct ibv_poll_cq_attr *attr, +- int lock, enum polling_mode stall, +- int cqe_version, int clock_update) +- ALWAYS_INLINE; +-static inline int xsc_start_poll(struct ibv_cq_ex *ibcq, struct ibv_poll_cq_attr *attr, +- int lock, enum polling_mode stall, +- int cqe_version, int clock_update) ++int xsc_poll_cq(struct ibv_cq *ibcq, int ne, struct ibv_wc *wc) + { +- struct xsc_cq *cq = to_xcq(ibv_cq_ex_to_cq(ibcq)); +- struct xsc_cqe64 *cqe64; +- void *cqe; +- int err; +- +- if (unlikely(attr->comp_mask)) +- return EINVAL; +- +- if (stall) { +- if (stall == POLLING_MODE_STALL_ADAPTIVE) { +- if (cq->stall_last_count) +- xsc_stall_cycles_poll_cq(cq->stall_last_count + cq->stall_cycles); +- } else if (cq->stall_next_poll) { +- cq->stall_next_poll = 0; +- xsc_stall_poll_cq(); +- } +- } +- +- if (lock) +- xsc_spin_lock(&cq->lock); +- +- cq->cur_rsc = NULL; +- +- err = xsc_get_next_cqe(cq, &cqe64, &cqe); +- if (err == CQ_EMPTY) { +- if (lock) +- xsc_spin_unlock(&cq->lock); +- +- if (stall) { +- if (stall == POLLING_MODE_STALL_ADAPTIVE) { +- cq->stall_cycles = max(cq->stall_cycles - xsc_stall_cq_dec_step, +- xsc_stall_cq_poll_min); +- xsc_get_cycles(&cq->stall_last_count); +- } else { +- cq->stall_next_poll = 1; +- } +- } +- +- return ENOENT; +- } +- +- if (stall) +- cq->flags |= XSC_CQ_FLAGS_FOUND_CQES; +- +- err = xsc_parse_lazy_cqe(cq, cqe64, cqe, cqe_version); +- if (lock && err) +- xsc_spin_unlock(&cq->lock); +- +- if (stall && err) { +- if (stall == POLLING_MODE_STALL_ADAPTIVE) { +- cq->stall_cycles = max(cq->stall_cycles - xsc_stall_cq_dec_step, +- xsc_stall_cq_poll_min); +- cq->stall_last_count = 0; +- } +- +- cq->flags &= ~(XSC_CQ_FLAGS_FOUND_CQES); +- +- goto out; +- } +- +- if (clock_update && !err) +- err = xscdv_get_clock_info(ibcq->context, &cq->last_clock_info); +- +-out: +- return err; ++ return poll_cq(ibcq, ne, wc); + } + +-static inline int xsc_next_poll(struct ibv_cq_ex *ibcq, +- enum polling_mode stall, int cqe_version) ++static inline int xsc_start_poll(struct ibv_cq_ex *ibcq, ++ struct ibv_poll_cq_attr *attr) + ALWAYS_INLINE; +-static inline int xsc_next_poll(struct ibv_cq_ex *ibcq, +- enum polling_mode stall, +- int cqe_version) ++static inline int xsc_start_poll(struct ibv_cq_ex *ibcq, ++ struct ibv_poll_cq_attr *attr) + { + struct xsc_cq *cq = to_xcq(ibv_cq_ex_to_cq(ibcq)); +- struct xsc_cqe64 *cqe64; +- void *cqe; + int err; + +- err = xsc_get_next_cqe(cq, &cqe64, &cqe); +- if (err == CQ_EMPTY) { +- if (stall == POLLING_MODE_STALL_ADAPTIVE) +- cq->flags |= XSC_CQ_FLAGS_EMPTY_DURING_POLL; +- +- return ENOENT; +- } +- +- return xsc_parse_lazy_cqe(cq, cqe64, cqe, cqe_version); +-} +- +-static inline int xsc_next_poll_adaptive_v0(struct ibv_cq_ex *ibcq) +-{ +- return xsc_next_poll(ibcq, POLLING_MODE_STALL_ADAPTIVE, 0); +-} +- +-static inline int xsc_next_poll_adaptive_v1(struct ibv_cq_ex *ibcq) +-{ +- return xsc_next_poll(ibcq, POLLING_MODE_STALL_ADAPTIVE, 1); +-} +- +-static inline int xsc_next_poll_v0(struct ibv_cq_ex *ibcq) +-{ +- return xsc_next_poll(ibcq, 0, 0); +-} +- +-static inline int xsc_next_poll_v1(struct ibv_cq_ex *ibcq) +-{ +- return xsc_next_poll(ibcq, 0, 1); +-} +- +-static inline int xsc_start_poll_v0(struct ibv_cq_ex *ibcq, +- struct ibv_poll_cq_attr *attr) +-{ +- return xsc_start_poll(ibcq, attr, 0, 0, 0, 0); +-} +- +-static inline int xsc_start_poll_v1(struct ibv_cq_ex *ibcq, +- struct ibv_poll_cq_attr *attr) +-{ +- return xsc_start_poll(ibcq, attr, 0, 0, 1, 0); +-} +- +-static inline int xsc_start_poll_v0_lock(struct ibv_cq_ex *ibcq, +- struct ibv_poll_cq_attr *attr) +-{ +- return xsc_start_poll(ibcq, attr, 1, 0, 0, 0); +-} +- +-static inline int xsc_start_poll_v1_lock(struct ibv_cq_ex *ibcq, +- struct ibv_poll_cq_attr *attr) +-{ +- return xsc_start_poll(ibcq, attr, 1, 0, 1, 0); +-} +- +-static inline int xsc_start_poll_adaptive_stall_v0_lock(struct ibv_cq_ex *ibcq, +- struct ibv_poll_cq_attr *attr) +-{ +- return xsc_start_poll(ibcq, attr, 1, POLLING_MODE_STALL_ADAPTIVE, 0, 0); +-} +- +-static inline int xsc_start_poll_stall_v0_lock(struct ibv_cq_ex *ibcq, +- struct ibv_poll_cq_attr *attr) +-{ +- return xsc_start_poll(ibcq, attr, 1, POLLING_MODE_STALL, 0, 0); +-} +- +-static inline int xsc_start_poll_adaptive_stall_v1_lock(struct ibv_cq_ex *ibcq, +- struct ibv_poll_cq_attr *attr) +-{ +- return xsc_start_poll(ibcq, attr, 1, POLLING_MODE_STALL_ADAPTIVE, 1, 0); +-} +- +-static inline int xsc_start_poll_stall_v1_lock(struct ibv_cq_ex *ibcq, +- struct ibv_poll_cq_attr *attr) +-{ +- return xsc_start_poll(ibcq, attr, 1, POLLING_MODE_STALL, 1, 0); +-} +- +-static inline int xsc_start_poll_stall_v0(struct ibv_cq_ex *ibcq, +- struct ibv_poll_cq_attr *attr) +-{ +- return xsc_start_poll(ibcq, attr, 0, POLLING_MODE_STALL, 0, 0); +-} +- +-static inline int xsc_start_poll_adaptive_stall_v0(struct ibv_cq_ex *ibcq, +- struct ibv_poll_cq_attr *attr) +-{ +- return xsc_start_poll(ibcq, attr, 0, POLLING_MODE_STALL_ADAPTIVE, 0, 0); +-} +- +-static inline int xsc_start_poll_adaptive_stall_v1(struct ibv_cq_ex *ibcq, +- struct ibv_poll_cq_attr *attr) +-{ +- return xsc_start_poll(ibcq, attr, 0, POLLING_MODE_STALL_ADAPTIVE, 1, 0); +-} +- +-static inline int xsc_start_poll_stall_v1(struct ibv_cq_ex *ibcq, +- struct ibv_poll_cq_attr *attr) +-{ +- return xsc_start_poll(ibcq, attr, 0, POLLING_MODE_STALL, 1, 0); +-} +- +-static inline int xsc_start_poll_v0_lock_clock_update(struct ibv_cq_ex *ibcq, +- struct ibv_poll_cq_attr *attr) +-{ +- return xsc_start_poll(ibcq, attr, 1, 0, 0, 1); +-} +- +-static inline int xsc_start_poll_v1_lock_clock_update(struct ibv_cq_ex *ibcq, +- struct ibv_poll_cq_attr *attr) +-{ +- return xsc_start_poll(ibcq, attr, 1, 0, 1, 1); +-} +- +-static inline int xsc_start_poll_v1_clock_update(struct ibv_cq_ex *ibcq, +- struct ibv_poll_cq_attr *attr) +-{ +- return xsc_start_poll(ibcq, attr, 0, 0, 1, 1); +-} +- +-static inline int xsc_start_poll_v0_clock_update(struct ibv_cq_ex *ibcq, +- struct ibv_poll_cq_attr *attr) +-{ +- return xsc_start_poll(ibcq, attr, 0, 0, 0, 1); +-} +- +-static inline int xsc_start_poll_stall_v1_lock_clock_update(struct ibv_cq_ex *ibcq, +- struct ibv_poll_cq_attr *attr) +-{ +- return xsc_start_poll(ibcq, attr, 1, POLLING_MODE_STALL, 1, 1); +-} +- +-static inline int xsc_start_poll_stall_v0_lock_clock_update(struct ibv_cq_ex *ibcq, +- struct ibv_poll_cq_attr *attr) +-{ +- return xsc_start_poll(ibcq, attr, 1, POLLING_MODE_STALL, 0, 1); +-} +- +-static inline int xsc_start_poll_stall_v1_clock_update(struct ibv_cq_ex *ibcq, +- struct ibv_poll_cq_attr *attr) +-{ +- return xsc_start_poll(ibcq, attr, 0, POLLING_MODE_STALL, 1, 1); +-} +- +-static inline int xsc_start_poll_stall_v0_clock_update(struct ibv_cq_ex *ibcq, +- struct ibv_poll_cq_attr *attr) +-{ +- return xsc_start_poll(ibcq, attr, 0, POLLING_MODE_STALL, 0, 1); +-} +- +-static inline int xsc_start_poll_adaptive_stall_v0_lock_clock_update(struct ibv_cq_ex *ibcq, +- struct ibv_poll_cq_attr *attr) +-{ +- return xsc_start_poll(ibcq, attr, 1, POLLING_MODE_STALL_ADAPTIVE, 0, 1); +-} +- +-static inline int xsc_start_poll_adaptive_stall_v1_lock_clock_update(struct ibv_cq_ex *ibcq, +- struct ibv_poll_cq_attr *attr) +-{ +- return xsc_start_poll(ibcq, attr, 1, POLLING_MODE_STALL_ADAPTIVE, 1, 1); +-} +- +-static inline int xsc_start_poll_adaptive_stall_v0_clock_update(struct ibv_cq_ex *ibcq, +- struct ibv_poll_cq_attr *attr) +-{ +- return xsc_start_poll(ibcq, attr, 0, POLLING_MODE_STALL_ADAPTIVE, 0, 1); +-} +- +-static inline int xsc_start_poll_adaptive_stall_v1_clock_update(struct ibv_cq_ex *ibcq, +- struct ibv_poll_cq_attr *attr) +-{ +- return xsc_start_poll(ibcq, attr, 0, POLLING_MODE_STALL_ADAPTIVE, 1, 1); +-} +- +-static inline void xsc_end_poll_adaptive_stall_lock(struct ibv_cq_ex *ibcq) +-{ +- _xsc_end_poll(ibcq, 1, POLLING_MODE_STALL_ADAPTIVE); +-} +- +-static inline void xsc_end_poll_stall_lock(struct ibv_cq_ex *ibcq) +-{ +- _xsc_end_poll(ibcq, 1, POLLING_MODE_STALL); +-} +- +-static inline void xsc_end_poll_adaptive_stall(struct ibv_cq_ex *ibcq) +-{ +- _xsc_end_poll(ibcq, 0, POLLING_MODE_STALL_ADAPTIVE); +-} ++ xsc_spin_lock(&cq->lock); ++ err = xsc_poll_one(cq, NULL, NULL, 1); ++ if (err == CQ_EMPTY) ++ xsc_spin_unlock(&cq->lock); + +-static inline void xsc_end_poll_stall(struct ibv_cq_ex *ibcq) +-{ +- _xsc_end_poll(ibcq, 0, POLLING_MODE_STALL); ++ return (err == CQ_EMPTY) ? ENOENT : err; + } + + static inline void xsc_end_poll(struct ibv_cq_ex *ibcq) +-{ +- _xsc_end_poll(ibcq, 0, 0); +-} +- +-static inline void xsc_end_poll_lock(struct ibv_cq_ex *ibcq) +-{ +- _xsc_end_poll(ibcq, 1, 0); +-} +- +-int xsc_poll_cq(struct ibv_cq *ibcq, int ne, struct ibv_wc *wc) +-{ +- return poll_cq(ibcq, ne, wc); +-} +- +-static inline enum ibv_wc_opcode xsc_cq_read_wc_opcode(struct ibv_cq_ex *ibcq) +-{ +- struct xsc_cq *cq = to_xcq(ibv_cq_ex_to_cq(ibcq)); +- +- switch (xscdv_get_cqe_opcode(cq->cqe64)) { +- case XSC_CQE_RESP_WR_IMM: +- return IBV_WC_RECV_RDMA_WITH_IMM; +- case XSC_CQE_RESP_SEND: +- case XSC_CQE_RESP_SEND_IMM: +- case XSC_CQE_RESP_SEND_INV: +- if (unlikely(cq->cqe64->app == XSC_CQE_APP_TAG_MATCHING)) { +- switch (cq->cqe64->app_op) { +- case XSC_CQE_APP_OP_TM_CONSUMED_MSG_SW_RDNV: +- case XSC_CQE_APP_OP_TM_CONSUMED_MSG: +- case XSC_CQE_APP_OP_TM_CONSUMED_SW_RDNV: +- case XSC_CQE_APP_OP_TM_EXPECTED: +- case XSC_CQE_APP_OP_TM_UNEXPECTED: +- return IBV_WC_TM_RECV; +- case XSC_CQE_APP_OP_TM_NO_TAG: +- return IBV_WC_TM_NO_TAG; +- } +- } +- return IBV_WC_RECV; +- case XSC_CQE_NO_PACKET: +- switch (cq->cqe64->app_op) { +- case XSC_CQE_APP_OP_TM_REMOVE: +- return IBV_WC_TM_DEL; +- case XSC_CQE_APP_OP_TM_APPEND: +- return IBV_WC_TM_ADD; +- case XSC_CQE_APP_OP_TM_NOOP: +- return IBV_WC_TM_SYNC; +- case XSC_CQE_APP_OP_TM_CONSUMED: +- return IBV_WC_TM_RECV; +- } +- break; +- case XSC_CQE_REQ: +- switch (be32toh(cq->cqe64->sop_drop_qpn) >> 24) { +- case XSC_OPCODE_RDMA_WRITE_IMM: +- case XSC_OPCODE_RDMA_WRITE: +- return IBV_WC_RDMA_WRITE; +- case XSC_OPCODE_SEND_IMM: +- case XSC_OPCODE_SEND: +- case XSC_OPCODE_SEND_INVAL: +- return IBV_WC_SEND; +- case XSC_OPCODE_RDMA_READ: +- return IBV_WC_RDMA_READ; +- case XSC_OPCODE_ATOMIC_CS: +- return IBV_WC_COMP_SWAP; +- case XSC_OPCODE_ATOMIC_FA: +- return IBV_WC_FETCH_ADD; +- case XSC_OPCODE_UMR: +- return cq->umr_opcode; +- case XSC_OPCODE_TSO: +- return IBV_WC_TSO; +- } +- } +- +- return 0; +-} +- +-static inline uint32_t xsc_cq_read_wc_qp_num(struct ibv_cq_ex *ibcq) ++ ALWAYS_INLINE; ++static inline void xsc_end_poll(struct ibv_cq_ex *ibcq) + { + struct xsc_cq *cq = to_xcq(ibv_cq_ex_to_cq(ibcq)); + +- return be32toh(cq->cqe64->sop_drop_qpn) & 0xffffff; ++ udma_to_device_barrier(); ++ update_cons_index(cq); ++ xsc_spin_unlock(&cq->lock); + } + +-static inline unsigned int xsc_cq_read_wc_flags(struct ibv_cq_ex *ibcq) ++static inline int xsc_next_poll(struct ibv_cq_ex *ibcq) ++ ALWAYS_INLINE; ++static inline int xsc_next_poll(struct ibv_cq_ex *ibcq) + { + struct xsc_cq *cq = to_xcq(ibv_cq_ex_to_cq(ibcq)); +- int wc_flags = 0; +- +- if (cq->flags & XSC_CQ_FLAGS_RX_CSUM_VALID) +- wc_flags = get_csum_ok(cq->cqe64); +- +- switch (xscdv_get_cqe_opcode(cq->cqe64)) { +- case XSC_CQE_RESP_WR_IMM: +- case XSC_CQE_RESP_SEND_IMM: +- wc_flags |= IBV_WC_WITH_IMM; +- break; +- case XSC_CQE_RESP_SEND_INV: +- wc_flags |= IBV_WC_WITH_INV; +- break; +- } +- +- if (cq->flags & XSC_CQ_FLAGS_TM_SYNC_REQ) +- wc_flags |= IBV_WC_TM_SYNC_REQ; ++ int err; + +- if (unlikely(cq->cqe64->app == XSC_CQE_APP_TAG_MATCHING)) { +- switch (cq->cqe64->app_op) { +- case XSC_CQE_APP_OP_TM_CONSUMED_MSG_SW_RDNV: +- case XSC_CQE_APP_OP_TM_CONSUMED_MSG: +- case XSC_CQE_APP_OP_TM_MSG_COMPLETION_CANCELED: +- /* Full completion */ +- wc_flags |= (IBV_WC_TM_MATCH | IBV_WC_TM_DATA_VALID); +- break; +- case XSC_CQE_APP_OP_TM_CONSUMED_SW_RDNV: +- case XSC_CQE_APP_OP_TM_CONSUMED: /* First completion */ +- wc_flags |= IBV_WC_TM_MATCH; +- break; +- case XSC_CQE_APP_OP_TM_EXPECTED: /* Second completion */ +- wc_flags |= IBV_WC_TM_DATA_VALID; +- break; +- } +- } ++ err = xsc_poll_one(cq, NULL, NULL, 1); + +- wc_flags |= ((be32toh(cq->cqe64->flags_rqpn) >> 28) & 3) ? IBV_WC_GRH : 0; +- return wc_flags; ++ return (err == CQ_EMPTY) ? ENOENT : err; + } + +-static inline uint32_t xsc_cq_read_wc_byte_len(struct ibv_cq_ex *ibcq) ++static inline enum ibv_wc_opcode xsc_wc_read_opcode(struct ibv_cq_ex *ibcq) + { +- struct xsc_cq *cq = to_xcq(ibv_cq_ex_to_cq(ibcq)); ++ struct xsc_cqe *cqe = to_xcq(ibv_cq_ex_to_cq(ibcq))->cqe; ++ struct xsc_context *xctx = to_xctx(ibv_cq_ex_to_cq(ibcq)->context); ++ uint8_t opcode = xsc_hw_get_cqe_msg_opcode(xctx->device_id, cqe); + +- return be32toh(cq->cqe64->byte_cnt); ++ return xsc_cqe_opcode[opcode]; + } + +-static inline uint32_t xsc_cq_read_wc_vendor_err(struct ibv_cq_ex *ibcq) ++static inline uint32_t xsc_wc_read_qp_num(struct ibv_cq_ex *ibcq) + { +- struct xsc_cq *cq = to_xcq(ibv_cq_ex_to_cq(ibcq)); +- struct xsc_err_cqe *ecqe = (struct xsc_err_cqe *)cq->cqe64; ++ struct xsc_cqe *cqe = to_xcq(ibv_cq_ex_to_cq(ibcq))->cqe; + +- return ecqe->vendor_err_synd; ++ return le32toh(cqe->qp_id); + } + +-static inline __be32 xsc_cq_read_wc_imm_data(struct ibv_cq_ex *ibcq) ++static inline unsigned int xsc_wc_read_flags(struct ibv_cq_ex *ibcq) + { +- struct xsc_cq *cq = to_xcq(ibv_cq_ex_to_cq(ibcq)); ++ struct xsc_cqe *cqe = to_xcq(ibv_cq_ex_to_cq(ibcq))->cqe; ++ struct xsc_context *xctx = to_xctx(ibv_cq_ex_to_cq(ibcq)->context); ++ uint8_t opcode = xsc_hw_get_cqe_msg_opcode(xctx->device_id, cqe); + +- switch (xscdv_get_cqe_opcode(cq->cqe64)) { +- case XSC_CQE_RESP_SEND_INV: +- /* This is returning invalidate_rkey which is in host order, see +- * ibv_wc_read_invalidated_rkey +- */ +- return (__force __be32)be32toh(cq->cqe64->imm_inval_pkey); ++ switch (opcode) { ++ case XSC_OPCODE_RDMA_REQ_SEND_IMMDT: ++ case XSC_OPCODE_RDMA_REQ_WRITE_IMMDT: ++ case XSC_OPCODE_RDMA_RSP_RECV_IMMDT: ++ case XSC_OPCODE_RDMA_RSP_WRITE_IMMDT: ++ return IBV_WC_WITH_IMM; + default: +- return cq->cqe64->imm_inval_pkey; ++ return 0; + } + } + +-static inline uint32_t xsc_cq_read_wc_slid(struct ibv_cq_ex *ibcq) ++static inline uint32_t xsc_wc_read_byte_len(struct ibv_cq_ex *ibcq) + { +- struct xsc_cq *cq = to_xcq(ibv_cq_ex_to_cq(ibcq)); ++ struct xsc_cqe *cqe = to_xcq(ibv_cq_ex_to_cq(ibcq))->cqe; + +- return (uint32_t)be16toh(cq->cqe64->slid); ++ return le32toh(cqe->msg_len); + } + +-static inline uint8_t xsc_cq_read_wc_sl(struct ibv_cq_ex *ibcq) ++static inline uint32_t xsc_wc_read_vendor_err(struct ibv_cq_ex *ibcq) + { +- struct xsc_cq *cq = to_xcq(ibv_cq_ex_to_cq(ibcq)); ++ struct xsc_cqe *cqe = to_xcq(ibv_cq_ex_to_cq(ibcq))->cqe; ++ struct xsc_context *xctx = to_xctx(ibv_cq_ex_to_cq(ibcq)->context); + +- return (be32toh(cq->cqe64->flags_rqpn) >> 24) & 0xf; ++ return xsc_hw_get_cqe_err_code(xctx->device_id, cqe); + } + +-static inline uint32_t xsc_cq_read_wc_src_qp(struct ibv_cq_ex *ibcq) ++static inline __be32 xsc_wc_read_imm_data(struct ibv_cq_ex *ibcq) + { +- struct xsc_cq *cq = to_xcq(ibv_cq_ex_to_cq(ibcq)); ++ struct xsc_cqe *cqe = to_xcq(ibv_cq_ex_to_cq(ibcq))->cqe; ++ __be32 imm_data; + +- return be32toh(cq->cqe64->flags_rqpn) & 0xffffff; +-} +- +-static inline uint8_t xsc_cq_read_wc_dlid_path_bits(struct ibv_cq_ex *ibcq) +-{ +- struct xsc_cq *cq = to_xcq(ibv_cq_ex_to_cq(ibcq)); ++ WR_BE_32(imm_data, RD_LE_32(cqe->imm_data)); + +- return cq->cqe64->ml_path & 0x7f; ++ return imm_data; + } + +-static inline uint64_t xsc_cq_read_wc_completion_ts(struct ibv_cq_ex *ibcq) ++static inline uint64_t xsc_wc_read_completion_ts(struct ibv_cq_ex *ibcq) + { +- struct xsc_cq *cq = to_xcq(ibv_cq_ex_to_cq(ibcq)); ++ struct xsc_cqe *cqe = to_xcq(ibv_cq_ex_to_cq(ibcq))->cqe; + +- return be64toh(cq->cqe64->timestamp); ++ return le64toh(cqe->ts); + } + +-static inline uint64_t +-xsc_cq_read_wc_completion_wallclock_ns(struct ibv_cq_ex *ibcq) ++void xsc_cq_fill_pfns(struct xsc_cq *cq, const struct ibv_cq_init_attr_ex *cq_attr) + { +- struct xsc_cq *cq = to_xcq(ibv_cq_ex_to_cq(ibcq)); +- +- return xscdv_ts_to_ns(&cq->last_clock_info, +- xsc_cq_read_wc_completion_ts(ibcq)); +-} + +-static inline uint16_t xsc_cq_read_wc_cvlan(struct ibv_cq_ex *ibcq) +-{ +- struct xsc_cq *cq = to_xcq(ibv_cq_ex_to_cq(ibcq)); ++ cq->verbs_cq.cq_ex.start_poll = xsc_start_poll; ++ cq->verbs_cq.cq_ex.next_poll = xsc_next_poll; ++ cq->verbs_cq.cq_ex.end_poll = xsc_end_poll; + +- return be16toh(cq->cqe64->vlan_info); +-} +- +-static inline uint32_t xsc_cq_read_flow_tag(struct ibv_cq_ex *ibcq) +-{ +- struct xsc_cq *cq = to_xcq(ibv_cq_ex_to_cq(ibcq)); +- +- return be32toh(cq->cqe64->sop_drop_qpn) & XSC_FLOW_TAG_MASK; +-} +- +-static inline void xsc_cq_read_wc_tm_info(struct ibv_cq_ex *ibcq, +- struct ibv_wc_tm_info *tm_info) +-{ +- struct xsc_cq *cq = to_xcq(ibv_cq_ex_to_cq(ibcq)); +- +- tm_info->tag = be64toh(cq->cqe64->tmh.tag); +- tm_info->priv = be32toh(cq->cqe64->tmh.app_ctx); +-} +- +-#define BIT(i) (1UL << (i)) +- +-#define SINGLE_THREADED BIT(0) +-#define STALL BIT(1) +-#define V1 BIT(2) +-#define ADAPTIVE BIT(3) +-#define CLOCK_UPDATE BIT(4) +- +-#define xsc_start_poll_name(cqe_ver, lock, stall, adaptive, clock_update) \ +- xsc_start_poll##adaptive##stall##cqe_ver##lock##clock_update +-#define xsc_next_poll_name(cqe_ver, adaptive) \ +- xsc_next_poll##adaptive##cqe_ver +-#define xsc_end_poll_name(lock, stall, adaptive) \ +- xsc_end_poll##adaptive##stall##lock +- +-#define POLL_FN_ENTRY(cqe_ver, lock, stall, adaptive, clock_update) { \ +- .start_poll = &xsc_start_poll_name(cqe_ver, lock, stall, adaptive, clock_update), \ +- .next_poll = &xsc_next_poll_name(cqe_ver, adaptive), \ +- .end_poll = &xsc_end_poll_name(lock, stall, adaptive), \ +- } +- +-static const struct op +-{ +- int (*start_poll)(struct ibv_cq_ex *ibcq, struct ibv_poll_cq_attr *attr); +- int (*next_poll)(struct ibv_cq_ex *ibcq); +- void (*end_poll)(struct ibv_cq_ex *ibcq); +-} ops[ADAPTIVE + V1 + STALL + SINGLE_THREADED + CLOCK_UPDATE + 1] = { +- [V1] = POLL_FN_ENTRY(_v1, _lock, , ,), +- [0] = POLL_FN_ENTRY(_v0, _lock, , ,), +- [V1 | SINGLE_THREADED] = POLL_FN_ENTRY(_v1, , , , ), +- [SINGLE_THREADED] = POLL_FN_ENTRY(_v0, , , , ), +- [V1 | STALL] = POLL_FN_ENTRY(_v1, _lock, _stall, , ), +- [STALL] = POLL_FN_ENTRY(_v0, _lock, _stall, , ), +- [V1 | SINGLE_THREADED | STALL] = POLL_FN_ENTRY(_v1, , _stall, , ), +- [SINGLE_THREADED | STALL] = POLL_FN_ENTRY(_v0, , _stall, , ), +- [V1 | STALL | ADAPTIVE] = POLL_FN_ENTRY(_v1, _lock, _stall, _adaptive, ), +- [STALL | ADAPTIVE] = POLL_FN_ENTRY(_v0, _lock, _stall, _adaptive, ), +- [V1 | SINGLE_THREADED | STALL | ADAPTIVE] = POLL_FN_ENTRY(_v1, , _stall, _adaptive, ), +- [SINGLE_THREADED | STALL | ADAPTIVE] = POLL_FN_ENTRY(_v0, , _stall, _adaptive, ), +- [V1 | CLOCK_UPDATE] = POLL_FN_ENTRY(_v1, _lock, , , _clock_update), +- [0 | CLOCK_UPDATE] = POLL_FN_ENTRY(_v0, _lock, , , _clock_update), +- [V1 | SINGLE_THREADED | CLOCK_UPDATE] = POLL_FN_ENTRY(_v1, , , , _clock_update), +- [SINGLE_THREADED | CLOCK_UPDATE] = POLL_FN_ENTRY(_v0, , , , _clock_update), +- [V1 | STALL | CLOCK_UPDATE] = POLL_FN_ENTRY(_v1, _lock, _stall, , _clock_update), +- [STALL | CLOCK_UPDATE] = POLL_FN_ENTRY(_v0, _lock, _stall, , _clock_update), +- [V1 | SINGLE_THREADED | STALL | CLOCK_UPDATE] = POLL_FN_ENTRY(_v1, , _stall, , _clock_update), +- [SINGLE_THREADED | STALL | CLOCK_UPDATE] = POLL_FN_ENTRY(_v0, , _stall, , _clock_update), +- [V1 | STALL | ADAPTIVE | CLOCK_UPDATE] = POLL_FN_ENTRY(_v1, _lock, _stall, _adaptive, _clock_update), +- [STALL | ADAPTIVE | CLOCK_UPDATE] = POLL_FN_ENTRY(_v0, _lock, _stall, _adaptive, _clock_update), +- [V1 | SINGLE_THREADED | STALL | ADAPTIVE | CLOCK_UPDATE] = POLL_FN_ENTRY(_v1, , _stall, _adaptive, _clock_update), +- [SINGLE_THREADED | STALL | ADAPTIVE | CLOCK_UPDATE] = POLL_FN_ENTRY(_v0, , _stall, _adaptive, _clock_update), +-}; +- +-int xsc_cq_fill_pfns(struct xsc_cq *cq, +- const struct ibv_cq_init_attr_ex *cq_attr, +- struct xsc_context *xctx) +-{ +- const struct op *poll_ops = &ops[((cq->stall_enable && cq->stall_adaptive_enable) ? ADAPTIVE : 0) | +- (xctx->cqe_version ? V1 : 0) | +- (cq->flags & XSC_CQ_FLAGS_SINGLE_THREADED ? +- SINGLE_THREADED : 0) | +- (cq->stall_enable ? STALL : 0) | +- ((cq_attr->wc_flags & IBV_WC_EX_WITH_COMPLETION_TIMESTAMP_WALLCLOCK) ? +- CLOCK_UPDATE : 0)]; +- +- cq->verbs_cq.cq_ex.start_poll = poll_ops->start_poll; +- cq->verbs_cq.cq_ex.next_poll = poll_ops->next_poll; +- cq->verbs_cq.cq_ex.end_poll = poll_ops->end_poll; +- +- cq->verbs_cq.cq_ex.read_opcode = xsc_cq_read_wc_opcode; +- cq->verbs_cq.cq_ex.read_vendor_err = xsc_cq_read_wc_vendor_err; +- cq->verbs_cq.cq_ex.read_wc_flags = xsc_cq_read_wc_flags; ++ cq->verbs_cq.cq_ex.read_opcode = xsc_wc_read_opcode; ++ cq->verbs_cq.cq_ex.read_vendor_err = xsc_wc_read_vendor_err; ++ cq->verbs_cq.cq_ex.read_wc_flags = xsc_wc_read_flags; + if (cq_attr->wc_flags & IBV_WC_EX_WITH_BYTE_LEN) +- cq->verbs_cq.cq_ex.read_byte_len = xsc_cq_read_wc_byte_len; ++ cq->verbs_cq.cq_ex.read_byte_len = xsc_wc_read_byte_len; + if (cq_attr->wc_flags & IBV_WC_EX_WITH_IMM) +- cq->verbs_cq.cq_ex.read_imm_data = xsc_cq_read_wc_imm_data; ++ cq->verbs_cq.cq_ex.read_imm_data = xsc_wc_read_imm_data; + if (cq_attr->wc_flags & IBV_WC_EX_WITH_QP_NUM) +- cq->verbs_cq.cq_ex.read_qp_num = xsc_cq_read_wc_qp_num; +- if (cq_attr->wc_flags & IBV_WC_EX_WITH_SRC_QP) +- cq->verbs_cq.cq_ex.read_src_qp = xsc_cq_read_wc_src_qp; +- if (cq_attr->wc_flags & IBV_WC_EX_WITH_SLID) +- cq->verbs_cq.cq_ex.read_slid = xsc_cq_read_wc_slid; +- if (cq_attr->wc_flags & IBV_WC_EX_WITH_SL) +- cq->verbs_cq.cq_ex.read_sl = xsc_cq_read_wc_sl; +- if (cq_attr->wc_flags & IBV_WC_EX_WITH_DLID_PATH_BITS) +- cq->verbs_cq.cq_ex.read_dlid_path_bits = xsc_cq_read_wc_dlid_path_bits; ++ cq->verbs_cq.cq_ex.read_qp_num = xsc_wc_read_qp_num; + if (cq_attr->wc_flags & IBV_WC_EX_WITH_COMPLETION_TIMESTAMP) +- cq->verbs_cq.cq_ex.read_completion_ts = xsc_cq_read_wc_completion_ts; +- if (cq_attr->wc_flags & IBV_WC_EX_WITH_CVLAN) +- cq->verbs_cq.cq_ex.read_cvlan = xsc_cq_read_wc_cvlan; +- if (cq_attr->wc_flags & IBV_WC_EX_WITH_FLOW_TAG) +- cq->verbs_cq.cq_ex.read_flow_tag = xsc_cq_read_flow_tag; +- if (cq_attr->wc_flags & IBV_WC_EX_WITH_TM_INFO) +- cq->verbs_cq.cq_ex.read_tm_info = xsc_cq_read_wc_tm_info; +- if (cq_attr->wc_flags & IBV_WC_EX_WITH_COMPLETION_TIMESTAMP_WALLCLOCK) { +- if (!xctx->clock_info_page) +- return EOPNOTSUPP; +- cq->verbs_cq.cq_ex.read_completion_wallclock_ns = +- xsc_cq_read_wc_completion_wallclock_ns; +- } +- +- return 0; ++ cq->verbs_cq.cq_ex.read_completion_ts = xsc_wc_read_completion_ts; + } + + int xsc_arm_cq(struct ibv_cq *ibvcq, int solicited) + { + struct xsc_cq *cq = to_xcq(ibvcq); +- union xsc_db_data doorbell; +- +- doorbell.cqn = cq->cqn; +- doorbell.cq_next_cid = cq->cons_index; +- doorbell.solicited = !!solicited; +- +- /* +- * Make sure that the doorbell record in host memory is +- * written before ringing the doorbell via PCI WC MMIO. +- */ +- mmio_wc_start(); +- +- WR_REG(cq->armdb, doorbell.raw_data); ++ struct xsc_context *ctx = to_xctx(ibvcq->context); + +- mmio_flush_writes(); ++ xsc_hw_update_cq_db(ctx->device_id, cq->armdb, cq->cqn, cq->cons_index, solicited); + + return 0; + } +diff --git a/providers/xscale/qp.c b/providers/xscale/qp.c +index 04e87e2..ea9ecb5 100644 +--- a/providers/xscale/qp.c ++++ b/providers/xscale/qp.c +@@ -10,12 +10,12 @@ + #include + #include + #include +-#include + #include + + #include "xscale.h" + #include "wqe.h" + #include "xsc_hsi.h" ++#include "xsc_hw.h" + + static const uint32_t xsc_ib_opcode[] = { + [IBV_WR_SEND] = XSC_MSG_OPCODE_SEND, +@@ -26,26 +26,21 @@ static const uint32_t xsc_ib_opcode[] = { + [IBV_WR_SEND_WITH_INV] = XSC_MSG_OPCODE_SEND, + }; + +-static void *get_recv_wqe(struct xsc_qp *qp, int n) ++static inline void *get_recv_wqe(struct xsc_qp *qp, int n) + { + return qp->rq_start + (n << qp->rq.wqe_shift); + } + +-static void *get_wq_recv_wqe(struct xsc_rwq *rwq, int n) ++static inline void *get_wq_recv_wqe(struct xsc_rwq *rwq, int n) + { + return rwq->pbuff + (n << rwq->rq.wqe_shift); + } + +-static void *get_seg_wqe(void *first, int n) ++static inline void *get_seg_wqe(void *first, int n) + { + return first + (n << XSC_BASE_WQE_SHIFT); + } + +-void *xsc_get_send_wqe(struct xsc_qp *qp, int n) +-{ +- return qp->sq_start + (n << qp->sq.wqe_shift); +-} +- + void xsc_init_rwq_indices(struct xsc_rwq *rwq) + { + rwq->rq.head = 0; +@@ -61,7 +56,7 @@ void xsc_init_qp_indices(struct xsc_qp *qp) + qp->sq.cur_post = 0; + } + +-static int xsc_wq_overflow(struct xsc_wq *wq, int nreq, struct xsc_cq *cq) ++static inline int xsc_wq_overflow(struct xsc_wq *wq, int nreq, struct xsc_cq *cq) + { + unsigned cur; + +@@ -76,65 +71,72 @@ static int xsc_wq_overflow(struct xsc_wq *wq, int nreq, struct xsc_cq *cq) + return cur + nreq >= wq->max_post; + } + +-static inline void set_remote_addr_seg(struct xsc_wqe_data_seg *remote_seg, +- uint32_t msg_len, uint64_t remote_addr, uint32_t rkey) ++static inline void set_data_seg_with_value(struct xsc_qp *qp, struct xsc_wqe_data_seg *data_seg, ++ uint64_t addr, uint32_t key, uint32_t length) + { +- WR_LE_32(remote_seg->seg_len, msg_len); +- WR_LE_32(remote_seg->mkey, rkey); +- WR_LE_64(remote_seg->va, remote_addr); ++ struct xsc_context *ctx = to_xctx(qp->ibv_qp->context); ++ ++ xsc_hw_set_data_seg(ctx->device_id, data_seg, addr, key, length); + } + +-static void set_local_data_seg(struct xsc_wqe_data_seg *data_seg, struct ibv_sge *sg) ++static inline void set_local_data_seg_from_sge(struct xsc_qp *qp, struct xsc_wqe_data_seg *data_seg, ++ const struct ibv_sge *sg) + { +- WR_LE_32(data_seg->seg_len, sg->length); +- WR_LE_32(data_seg->mkey, sg->lkey); +- WR_LE_64(data_seg->va, sg->addr); ++ struct xsc_context *ctx = to_xctx(qp->ibv_qp->context); ++ ++ xsc_hw_set_data_seg(ctx->device_id, data_seg, sg->addr, sg->lkey, sg->length); + } + +-static __be32 send_ieth(struct ibv_send_wr *wr) ++static void *get_addr_from_wr(const void *list, int idx) + { +- switch (wr->opcode) { +- case IBV_WR_SEND_WITH_IMM: +- case IBV_WR_RDMA_WRITE_WITH_IMM: +- return wr->imm_data; +- default: +- return 0; +- } ++ const struct ibv_send_wr *wr = list; ++ ++ return (void *)wr->sg_list[idx].addr; + } + +-static int set_data_inl_seg(struct xsc_qp *qp, struct ibv_send_wr *wr, +- struct xsc_send_wqe_ctrl_seg *ctrl) ++static int get_len_from_wr(const void *list, int idx) + { +- void *data_seg; +- unsigned seg_index; +- void *addr; +- int len = 0; +- int i; +- const int ds_len = sizeof(struct xsc_wqe_data_seg); +- int left_len = 0; +- int msg_len = ctrl->msg_len; ++ const struct ibv_send_wr *wr = list; ++ return wr->sg_list[idx].length; ++} + +- if (wr->opcode == IBV_WR_SEND || wr->opcode == IBV_WR_SEND_WITH_IMM) +- seg_index = 1; +- else +- seg_index = 2; ++static void *get_addr_from_buf_list(const void *list, int idx) ++{ ++ const struct ibv_data_buf *buf_list = list; ++ return buf_list[idx].addr; ++} + +- if (unlikely(msg_len > qp->max_inline_data)) +- return ENOMEM; ++static int get_len_from_wr_list(const void *list, int idx) ++{ ++ const struct ibv_data_buf *buf_list = list; ++ return buf_list[idx].length; ++} ++ ++static int _set_wqe_inline(void *data_seg, size_t num_buf, const void *list, ++ void *(*get_addr)(const void *, int), ++ int (*get_len)(const void *, int)) ++{ ++ int i; ++ int ds_left_len = 0; ++ int len = 0; ++ void *addr; ++ void *data_seg_base = data_seg; ++ int seg_index = 0; ++ const int ds_len = sizeof(struct xsc_wqe_data_seg); + +- for (i = 0; i < wr->num_sge; ++i) { +- if (likely(wr->sg_list[i].length)) { +- addr = (void*)wr->sg_list[i].addr; +- len = wr->sg_list[i].length; +- if (left_len > 0) { +- int copy_len = min_t(int, len, left_len); ++ for (i = 0; i < num_buf; i++) { ++ addr = get_addr(list, i); ++ len = get_len(list, i); ++ if (likely(len)) { ++ if (ds_left_len > 0) { ++ int copy_len = min_t(int, len, ds_left_len); + memcpy(data_seg, addr, copy_len); + addr += copy_len; + len -= copy_len; + } + + while (len >= ds_len) { +- data_seg = get_seg_wqe(ctrl, seg_index); ++ data_seg = get_seg_wqe(data_seg_base, seg_index); + seg_index++; + memcpy(data_seg, addr, ds_len); + addr += ds_len; +@@ -142,43 +144,84 @@ static int set_data_inl_seg(struct xsc_qp *qp, struct ibv_send_wr *wr, + } + + if (len > 0) { +- data_seg = get_seg_wqe(ctrl, seg_index); ++ data_seg = get_seg_wqe(data_seg_base, seg_index); + seg_index++; + memcpy(data_seg, addr, len); + data_seg += len; +- left_len = ds_len - len; ++ ds_left_len = ds_len - len; + } else { +- left_len = 0; ++ ds_left_len = 0; + } + } + } ++ return seg_index; ++} ++ ++static int set_wqe_inline_from_wr(struct xsc_qp *qp, struct ibv_send_wr *wr, ++ struct xsc_send_wqe_ctrl_seg *ctrl) ++{ ++ void *data_seg; ++ unsigned seg_index; ++ int msg_len = ctrl->msg_len; ++ int filled_ds_num; ++ ++ if (wr->opcode == IBV_WR_SEND || wr->opcode == IBV_WR_SEND_WITH_IMM) ++ seg_index = 1; ++ else ++ seg_index = 2; ++ data_seg = get_seg_wqe(ctrl, seg_index); + +- ctrl->ds_data_num = seg_index - 1; ++ if (unlikely(msg_len > qp->max_inline_data)) ++ return ENOMEM; ++ ++ filled_ds_num = _set_wqe_inline(data_seg, wr->num_sge, wr, ++ get_addr_from_wr, ++ get_len_from_wr); ++ ctrl->ds_data_num = seg_index - 1 + filled_ds_num; + + return 0; + } + +-static void zero_send_ds(int idx, struct xsc_qp *qp) ++static int set_wqe_inline_from_buf_list(void *data_seg, ++ size_t num_buf, ++ const struct ibv_data_buf *buf_list) ++{ ++ return _set_wqe_inline(data_seg, num_buf, buf_list, ++ get_addr_from_buf_list, ++ get_len_from_wr_list); ++} ++ ++static inline void _zero_send_ds(int idx, struct xsc_qp *qp, int keep_ctrl) + { + void *seg; + uint64_t *uninitialized_var(p); + int i; + + seg = (void*)xsc_get_send_wqe(qp, idx); +- for (i = 1; i < qp->sq.seg_cnt; i++) { ++ for (i = keep_ctrl; i < qp->sq.seg_cnt; i++) { + p = get_seg_wqe(seg, i); + p[0] = p[1] = 0; + } + } + +-static void zero_recv_ds(int idx, struct xsc_qp *qp) ++static inline void clear_send_wqe(int idx, struct xsc_qp *qp) ++{ ++ _zero_send_ds(idx, qp, 0); ++} ++ ++static inline void clear_send_wqe_except_ctrl(int idx, struct xsc_qp *qp) ++{ ++ _zero_send_ds(idx, qp, 1); ++} ++ ++static void clear_recv_wqe(int idx, struct xsc_qp *qp) + { + void *seg; + uint64_t *uninitialized_var(p); + int i; + + seg = (void*)get_recv_wqe(qp, idx); +- for (i = 1; i < qp->rq.seg_cnt; i++) { ++ for (i = 0; i < qp->rq.seg_cnt; i++) { + p = get_seg_wqe(seg, i); + p[0] = p[1] = 0; + } +@@ -221,23 +264,16 @@ static inline void dump_wqe(int type, int idx, struct xsc_qp *qp) {}; + + static inline void xsc_post_send_db(struct xsc_qp *qp, int nreq) + { +- uint16_t next_pid; +- union xsc_db_data db; ++ struct xsc_context *ctx = to_xctx(qp->ibv_qp->context); ++ uint32_t next_pid; + + if (unlikely(!nreq)) + return; + + qp->sq.head += nreq; + next_pid = qp->sq.head << (qp->sq.wqe_shift - XSC_BASE_WQE_SHIFT); +- db.sq_next_pid = next_pid; +- db.sqn = qp->sqn; +- /* +- * Make sure that descriptors are written before +- * updating doorbell record and ringing the doorbell +- */ + xsc_dbg(to_xctx(qp->ibv_qp->context)->dbg_fp, XSC_DBG_QP_SEND, "nreq:%d\n", nreq); +- udma_to_device_barrier(); +- WR_REG(qp->sq.db, db.raw_data); ++ xsc_hw_ring_tx_doorbell(ctx->device_id, qp->sq.db, qp->sqn, next_pid); + } + + static inline int _xsc_post_send(struct ibv_qp *ibqp, struct ibv_send_wr *wr, +@@ -305,7 +341,7 @@ static inline int _xsc_post_send(struct ibv_qp *ibqp, struct ibv_send_wr *wr, + } + + idx = qp->sq.cur_post & (qp->sq.wqe_cnt - 1); +- zero_send_ds(idx, qp); ++ clear_send_wqe(idx, qp); + ctrl = seg = xsc_get_send_wqe(qp, idx); + ctrl->ds_data_num = 0; + WR_LE_16(ctrl->wqe_id, +@@ -337,11 +373,11 @@ static inline int _xsc_post_send(struct ibv_qp *ibqp, struct ibv_send_wr *wr, + break; + case IBV_WR_SEND_WITH_IMM: + ctrl->with_immdt = 1; +- ctrl->opcode_data = send_ieth(wr); ++ WR_LE_32(ctrl->opcode_data, RD_BE_32(wr->imm_data)); + break; + case IBV_WR_RDMA_WRITE_WITH_IMM: + ctrl->with_immdt = 1; +- ctrl->opcode_data = send_ieth(wr); ++ WR_LE_32(ctrl->opcode_data, RD_BE_32(wr->imm_data)); + SWITCH_FALLTHROUGH; + case IBV_WR_RDMA_READ: + case IBV_WR_RDMA_WRITE: +@@ -349,11 +385,11 @@ static inline int _xsc_post_send(struct ibv_qp *ibqp, struct ibv_send_wr *wr, + break; + ctrl->ds_data_num++; + data_seg = get_seg_wqe(ctrl, seg_index); +- set_remote_addr_seg( +- data_seg, +- msg_len, +- wr->wr.rdma.remote_addr, +- wr->wr.rdma.rkey); ++ set_data_seg_with_value(qp, ++ data_seg, ++ wr->wr.rdma.remote_addr, ++ wr->wr.rdma.rkey, ++ msg_len); + seg_index++; + break; + default: +@@ -372,7 +408,7 @@ static inline int _xsc_post_send(struct ibv_qp *ibqp, struct ibv_send_wr *wr, + } + + if (wr->send_flags & IBV_SEND_INLINE && wr->num_sge) { +- err = set_data_inl_seg(qp, wr, ctrl); ++ err = set_wqe_inline_from_wr(qp, wr, ctrl); + if (unlikely(err)) { + *bad_wr = wr; + xsc_dbg(to_xctx(ibqp->context)->dbg_fp, XSC_DBG_QP_SEND, +@@ -383,7 +419,7 @@ static inline int _xsc_post_send(struct ibv_qp *ibqp, struct ibv_send_wr *wr, + for (i = 0; i < wr->num_sge; ++i, ++seg_index) { + if (likely(wr->sg_list[i].length)) { + data_seg = get_seg_wqe(ctrl, seg_index); +- set_local_data_seg(data_seg, &wr->sg_list[i]); ++ set_local_data_seg_from_sge(qp, data_seg, &wr->sg_list[i]); + ctrl->ds_data_num++; + } + } +@@ -392,7 +428,7 @@ static inline int _xsc_post_send(struct ibv_qp *ibqp, struct ibv_send_wr *wr, + ctrl->msg_opcode = xsc_ib_opcode[wr->opcode]; + if (ctrl->msg_len == 0) { + ctrl->ds_data_num = 0; +- zero_send_ds(idx, qp); ++ clear_send_wqe_except_ctrl(idx, qp); + } + qp->sq.wrid[idx] = wr->wr_id; + qp->sq.wqe_head[idx] = qp->sq.head + nreq; +@@ -403,7 +439,7 @@ static inline int _xsc_post_send(struct ibv_qp *ibqp, struct ibv_send_wr *wr, + } + qp->sq.wr_opcode[idx] = wr->opcode; + +- if (xsc_debug_mask & XSC_DBG_QP_SEND) ++ if (unlikely(xsc_debug_mask & XSC_DBG_QP_SEND)) + dump_wqe(0, idx, qp); + } + +@@ -420,6 +456,301 @@ int xsc_post_send(struct ibv_qp *ibqp, struct ibv_send_wr *wr, + return _xsc_post_send(ibqp, wr, bad_wr); + } + ++static inline void xsc_wr_start(struct ibv_qp_ex *ibqp) ++{ ++ struct xsc_qp *qp = to_xqp((struct ibv_qp *)ibqp); ++ ++ xsc_spin_lock(&qp->sq.lock); ++ ++ qp->cur_post_rb = qp->sq.cur_post; ++ qp->err = 0; ++ qp->nreq = 0; ++} ++ ++static inline int xsc_wr_complete(struct ibv_qp_ex *ibqp) ++{ ++ struct xsc_qp *qp = to_xqp((struct ibv_qp *)ibqp); ++ int err = qp->err; ++ ++ if (unlikely(err)) { ++ qp->sq.cur_post = qp->cur_post_rb; ++ goto out; ++ } ++ ++ xsc_post_send_db(qp, qp->nreq); ++out: ++ xsc_spin_unlock(&qp->sq.lock); ++ return err; ++} ++ ++static inline void xsc_wr_abort(struct ibv_qp_ex *ibqp) ++{ ++ struct xsc_qp *qp = to_xqp((struct ibv_qp *)ibqp); ++ ++ qp->sq.cur_post = qp->cur_post_rb; ++ ++ xsc_spin_unlock(&qp->sq.lock); ++} ++ ++#define RDMA_REMOTE_DATA_SEG_IDX 1 ++static const int local_ds_base_idx[] = { ++ [IBV_WR_RDMA_WRITE] = 2, ++ [IBV_WR_RDMA_WRITE_WITH_IMM] = 2, ++ [IBV_WR_SEND] = 1, ++ [IBV_WR_SEND_WITH_IMM] = 1, ++ [IBV_WR_RDMA_READ] = 2 ++}; ++ ++static inline void _common_wqe_init(struct ibv_qp_ex *ibqp, ++ enum ibv_wr_opcode ib_op) ++{ ++ struct xsc_qp *qp = to_xqp((struct ibv_qp *)ibqp); ++ struct xsc_send_wqe_ctrl_seg *ctrl; ++ uint32_t idx; ++ ++ if (unlikely(xsc_wq_overflow(&qp->sq, qp->nreq, ++ to_xcq(qp->ibv_qp->send_cq)))) { ++ xsc_dbg(to_xctx(ibqp->qp_base.context)->dbg_fp, XSC_DBG_QP_SEND, ++ "send work queue overflow\n"); ++ if (!qp->err) ++ qp->err = ENOMEM; ++ ++ return; ++ } ++ ++ idx = qp->sq.cur_post & (qp->sq.wqe_cnt - 1); ++ clear_send_wqe(idx, qp); ++ ctrl = xsc_get_send_wqe(qp, idx); ++ qp->cur_ctrl = ctrl; ++ qp->cur_ds_num = 0; ++ qp->cur_data_len = 0; ++ qp->cur_data = get_seg_wqe(ctrl, local_ds_base_idx[ib_op]); ++ qp->cur_remote_addr = 0; ++ qp->cur_remote_key = 0; ++ ctrl->msg_opcode = xsc_ib_opcode[ib_op]; ++ ctrl->ce = qp->sq_signal_bits ? 1 : (ibqp->wr_flags & IBV_SEND_SIGNALED ? 1 : 0); ++ ctrl->se = ibqp->wr_flags & IBV_SEND_SOLICITED ? 1 : 0; ++ ctrl->in_line = ibqp->wr_flags & IBV_SEND_INLINE ? 1 : 0; ++ qp->sq.wrid[idx] = ibqp->wr_id; ++ qp->sq.wqe_head[idx] = qp->sq.head + qp->nreq; ++ qp->sq.wr_opcode[idx] = ib_op; ++ WR_LE_16(ctrl->wqe_id, ++ qp->sq.cur_post << (qp->sq.wqe_shift - XSC_BASE_WQE_SHIFT)); ++} ++ ++static inline void _common_wqe_finilize(struct ibv_qp_ex *ibqp) ++{ ++ struct xsc_qp *qp = to_xqp((struct ibv_qp *)ibqp); ++ struct xsc_send_wqe_ctrl_seg *ctrl = qp->cur_ctrl; ++ struct xsc_wqe_data_seg *remote_seg; ++ uint32_t idx = qp->sq.cur_post & (qp->sq.wqe_cnt - 1); ++ ++ ctrl->ds_data_num = qp->cur_ds_num; ++ ctrl->msg_len = qp->cur_data_len; ++ if (ctrl->msg_opcode == XSC_MSG_OPCODE_RDMA_WRITE || ++ ctrl->msg_opcode == XSC_MSG_OPCODE_RDMA_READ) { ++ remote_seg = get_seg_wqe(qp->cur_ctrl, RDMA_REMOTE_DATA_SEG_IDX); ++ set_data_seg_with_value(qp, remote_seg, ++ qp->cur_remote_addr, ++ qp->cur_remote_key, ++ ctrl->msg_len); ++ } ++ ++ dump_wqe(0, idx, qp); ++ qp->sq.cur_post++; ++ qp->nreq++; ++ if (ctrl->ce) { ++ qp->sq.flush_wqe_cnt++; ++ qp->sq.need_flush[idx] = 1; ++ } ++} ++ ++static inline void xsc_wr_send(struct ibv_qp_ex *ibqp) ++{ ++ _common_wqe_init(ibqp, IBV_WR_SEND); ++} ++ ++static inline void xsc_wr_send_imm(struct ibv_qp_ex *ibqp, __be32 imm_data) ++{ ++ struct xsc_qp *qp = to_xqp((struct ibv_qp *)ibqp); ++ struct xsc_send_wqe_ctrl_seg *ctrl; ++ ++ _common_wqe_init(ibqp, IBV_WR_SEND_WITH_IMM); ++ ctrl = qp->cur_ctrl; ++ ctrl->with_immdt = 1; ++ WR_LE_32(ctrl->opcode_data, RD_BE_32(imm_data)); ++} ++ ++static inline void _xsc_wr_rdma(struct ibv_qp_ex *ibqp, ++ uint32_t rkey, ++ uint64_t remote_addr, ++ enum ibv_wr_opcode ib_op) ++{ ++ struct xsc_qp *qp = to_xqp((struct ibv_qp *)ibqp); ++ ++ _common_wqe_init(ibqp, ib_op); ++ qp->cur_remote_addr = remote_addr; ++ qp->cur_remote_key = rkey; ++ qp->cur_ds_num++; ++} ++ ++static inline void xsc_wr_rdma_write(struct ibv_qp_ex *ibqp, uint32_t rkey, ++ uint64_t remote_addr) ++{ ++ _xsc_wr_rdma(ibqp, rkey, remote_addr, IBV_WR_RDMA_WRITE); ++} ++ ++static inline void xsc_wr_rdma_write_imm(struct ibv_qp_ex *ibqp, uint32_t rkey, ++ uint64_t remote_addr, __be32 imm_data) ++{ ++ struct xsc_qp *qp = to_xqp((struct ibv_qp *)ibqp); ++ struct xsc_send_wqe_ctrl_seg *ctrl; ++ ++ _xsc_wr_rdma(ibqp, rkey, remote_addr, IBV_WR_RDMA_WRITE_WITH_IMM); ++ ctrl = qp->cur_ctrl; ++ ctrl->with_immdt = 1; ++ WR_LE_32(ctrl->opcode_data, RD_BE_32(imm_data)); ++} ++ ++static inline void xsc_wr_rdma_read(struct ibv_qp_ex *ibqp, uint32_t rkey, ++ uint64_t remote_addr) ++{ ++ _xsc_wr_rdma(ibqp, rkey, remote_addr, IBV_WR_RDMA_READ); ++} ++ ++static inline void xsc_wr_set_sge(struct ibv_qp_ex *ibqp, uint32_t lkey, uint64_t addr, ++ uint32_t length) ++{ ++ struct xsc_qp *qp = to_xqp((struct ibv_qp *)ibqp); ++ struct xsc_wqe_data_seg *data_seg = qp->cur_data; ++ ++ if (unlikely(!length)) ++ return; ++ ++ set_data_seg_with_value(qp, data_seg, addr, lkey, length); ++ qp->cur_ds_num++; ++ qp->cur_data_len = length; ++ _common_wqe_finilize(ibqp); ++} ++ ++static inline void xsc_wr_set_sge_list(struct ibv_qp_ex *ibqp, size_t num_sge, ++ const struct ibv_sge *sg_list) ++{ ++ struct xsc_qp *qp = to_xqp((struct ibv_qp *)ibqp); ++ struct xsc_wqe_data_seg *data_seg = qp->cur_data; ++ int i; ++ ++ if (unlikely(num_sge > qp->sq.max_gs)) { ++ xsc_dbg(to_xctx(ibqp->qp_base.context)->dbg_fp, XSC_DBG_QP_SEND, ++ "rdma read, max gs exceeded %lu (max = 1)\n", ++ num_sge); ++ if (!qp->err) ++ qp->err = ENOMEM; ++ return ; ++ } ++ ++ for (i = 0; i < num_sge; i++) { ++ if (unlikely(!sg_list[i].length)) ++ continue; ++ set_local_data_seg_from_sge(qp, data_seg, &sg_list[i]); ++ data_seg++; ++ qp->cur_ds_num++; ++ qp->cur_data_len += sg_list[i].length; ++ } ++ _common_wqe_finilize(ibqp); ++} ++ ++static inline void xsc_wr_set_inline_data(struct ibv_qp_ex *ibqp, void *addr, ++ size_t length) ++{ ++ struct xsc_qp *qp = to_xqp((struct ibv_qp *)ibqp); ++ struct xsc_wqe_data_seg *data_seg = qp->cur_data; ++ size_t num_buf = 1; ++ struct ibv_data_buf data_buf = {.addr = addr, .length = length}; ++ int num_filled_ds = 0; ++ ++ if (unlikely(length > qp->max_inline_data)) { ++ if (!qp->err) ++ qp->err = ENOMEM; ++ return; ++ } ++ ++ num_filled_ds = set_wqe_inline_from_buf_list(data_seg, num_buf, &data_buf); ++ ++ qp->cur_ds_num += num_filled_ds; ++ qp->cur_data_len = length; ++ _common_wqe_finilize(ibqp); ++} ++ ++static inline void xsc_wr_set_inline_data_list(struct ibv_qp_ex *ibqp, ++ size_t num_buf, ++ const struct ibv_data_buf *buf_list) ++{ ++ struct xsc_qp *qp = to_xqp((struct ibv_qp *)ibqp); ++ struct xsc_wqe_data_seg *data_seg = qp->cur_data; ++ int num_filled_ds = 0; ++ int i; ++ size_t total_len = 0; ++ ++ for (i = 0; i < num_buf; i++) ++ total_len += buf_list[i].length; ++ if (unlikely(total_len > qp->max_inline_data)) { ++ if (!qp->err) ++ qp->err = ENOMEM; ++ return; ++ } ++ ++ num_filled_ds = set_wqe_inline_from_buf_list(data_seg, num_buf, buf_list); ++ ++ qp->cur_ds_num += num_filled_ds; ++ qp->cur_data_len = total_len; ++ _common_wqe_finilize(ibqp); ++} ++ ++enum { ++ XSC_SUPPORTED_SEND_OPS_FLAGS_RC = ++ IBV_QP_EX_WITH_SEND | ++ IBV_QP_EX_WITH_SEND_WITH_IMM | ++ IBV_QP_EX_WITH_RDMA_WRITE | ++ IBV_QP_EX_WITH_RDMA_WRITE_WITH_IMM | ++ IBV_QP_EX_WITH_RDMA_READ, ++}; ++ ++static void fill_wr_pfns_rc(struct ibv_qp_ex *ibqp) ++{ ++ ibqp->wr_send = xsc_wr_send; ++ ibqp->wr_send_imm = xsc_wr_send_imm; ++ ibqp->wr_rdma_write = xsc_wr_rdma_write; ++ ibqp->wr_rdma_write_imm = xsc_wr_rdma_write_imm; ++ ibqp->wr_rdma_read = xsc_wr_rdma_read; ++ ++ ibqp->wr_set_sge = xsc_wr_set_sge; ++ ibqp->wr_set_sge_list = xsc_wr_set_sge_list; ++ ibqp->wr_set_inline_data = xsc_wr_set_inline_data; ++ ibqp->wr_set_inline_data_list = xsc_wr_set_inline_data_list; ++} ++ ++int xsc_qp_fill_wr_pfns(struct xsc_qp *xqp, const struct ibv_qp_init_attr_ex *attr) ++{ ++ struct ibv_qp_ex *ibqp = &xqp->verbs_qp.qp_ex; ++ uint64_t ops = attr->send_ops_flags; ++ ++ ibqp->wr_start = xsc_wr_start; ++ ibqp->wr_complete = xsc_wr_complete; ++ ibqp->wr_abort = xsc_wr_abort; ++ ++ switch (attr->qp_type) { ++ case IBV_QPT_RC: ++ if (ops & ~XSC_SUPPORTED_SEND_OPS_FLAGS_RC) ++ return EOPNOTSUPP; ++ fill_wr_pfns_rc(ibqp); ++ break; ++ default: ++ return EOPNOTSUPP; ++ } ++ return 0; ++} ++ + static void set_wq_sig_seg(struct xsc_rwq *rwq, struct xsc_rwqe_sig *sig, + int size, uint16_t idx) + { +@@ -506,6 +837,7 @@ out: + return err; + } + ++int xsc_post_recv_dump_wqe = 1; + int xsc_post_recv(struct ibv_qp *ibqp, struct ibv_recv_wr *wr, + struct ibv_recv_wr **bad_wr) + { +@@ -513,8 +845,7 @@ int xsc_post_recv(struct ibv_qp *ibqp, struct ibv_recv_wr *wr, + struct xsc_wqe_data_seg *recv_head; + struct xsc_wqe_data_seg *data_seg; + int err = 0; +- uint16_t next_pid = 0; +- union xsc_db_data db; ++ uint32_t next_pid = 0; + int nreq; + uint16_t idx; + int i; +@@ -523,7 +854,7 @@ int xsc_post_recv(struct ibv_qp *ibqp, struct ibv_recv_wr *wr, + + idx = qp->rq.head & (qp->rq.wqe_cnt - 1); + +- zero_recv_ds(idx, qp); ++ clear_recv_wqe(idx, qp); + for (nreq = 0; wr; ++nreq, wr = wr->next) { + if (unlikely(xsc_wq_overflow(&qp->rq, nreq, + to_xcq(qp->ibv_qp->recv_cq)))) { +@@ -547,31 +878,23 @@ int xsc_post_recv(struct ibv_qp *ibqp, struct ibv_recv_wr *wr, + if (unlikely(!wr->sg_list[i].length)) + continue; + data_seg = get_seg_wqe(recv_head, i); +- WR_LE_32(data_seg->seg_len, wr->sg_list[i].length); +- WR_LE_32(data_seg->mkey, wr->sg_list[i].lkey); +- WR_LE_64(data_seg->va, wr->sg_list[i].addr); ++ set_local_data_seg_from_sge(qp, data_seg, &wr->sg_list[i]); + } + + qp->rq.wrid[idx] = wr->wr_id; + +- dump_wqe(1, idx, qp); ++ if (xsc_post_recv_dump_wqe || (xsc_debug_mask & XSC_DBG_QP_RECV)) ++ dump_wqe(1, idx, qp); + idx = (idx + 1) & (qp->rq.wqe_cnt - 1); + qp->rq.flush_wqe_cnt++; + } + + out: + if (likely(nreq)) { ++ struct xsc_context *ctx = to_xctx(ibqp->context); + qp->rq.head += nreq; + next_pid = qp->rq.head << (qp->rq.wqe_shift - XSC_BASE_WQE_SHIFT); +- db.rq_next_pid = next_pid; +- db.rqn = qp->rqn; +- +- /* +- * Make sure that descriptors are written before +- * doorbell record. +- */ +- udma_to_device_barrier(); +- WR_REG(qp->rq.db, db.raw_data); ++ xsc_hw_ring_rx_doorbell(ctx->device_id, qp->rq.db, qp->rqn, next_pid); + } + + xsc_spin_unlock(&qp->rq.lock); +@@ -676,3 +999,4 @@ int xsc_err_state_qp(struct ibv_qp *qp, enum ibv_qp_state cur_state, + } + return ret; + } ++ +diff --git a/providers/xscale/verbs.c b/providers/xscale/verbs.c +index 937bed1..602ca9d 100644 +--- a/providers/xscale/verbs.c ++++ b/providers/xscale/verbs.c +@@ -213,7 +213,6 @@ struct ibv_mr *xsc_reg_mr(struct ibv_pd *pd, void *addr, size_t length, + &mr->vmr, &cmd, sizeof(cmd), &resp, + sizeof resp); + if (ret) { +- xsc_free_buf(&(mr->buf)); + free(mr); + return NULL; + } +@@ -225,6 +224,27 @@ struct ibv_mr *xsc_reg_mr(struct ibv_pd *pd, void *addr, size_t length, + return &mr->vmr.ibv_mr; + } + ++struct ibv_mr *xsc_reg_dmabuf_mr(struct ibv_pd *pd, uint64_t offset, size_t length, ++ uint64_t iova, int fd, int acc) ++{ ++ struct xsc_mr *mr; ++ int ret; ++ ++ mr = calloc(1, sizeof(*mr)); ++ if (!mr) ++ return NULL; ++ ++ ret = ibv_cmd_reg_dmabuf_mr(pd, offset, length, iova, fd, acc, ++ &mr->vmr); ++ if (ret) { ++ free(mr); ++ return NULL; ++ } ++ mr->alloc_flags = acc; ++ ++ return &mr->vmr.ibv_mr; ++} ++ + struct ibv_mr *xsc_alloc_null_mr(struct ibv_pd *pd) + { + struct xsc_mr *mr; +@@ -291,17 +311,6 @@ struct ibv_mr *xsc_reg_dm_mr(struct ibv_pd *pd, struct ibv_dm *ibdm, + return &mr->vmr.ibv_mr; + } + +-int xsc_rereg_mr(struct verbs_mr *vmr, int flags, struct ibv_pd *pd, +- void *addr, size_t length, int access) +-{ +- struct ibv_rereg_mr cmd; +- struct ib_uverbs_rereg_mr_resp resp; +- +- return ibv_cmd_rereg_mr(vmr, flags, addr, length, (uintptr_t)addr, +- access, pd, &cmd, sizeof(cmd), &resp, +- sizeof(resp)); +-} +- + int xsc_dereg_mr(struct verbs_mr *vmr) + { + int ret; +@@ -339,12 +348,8 @@ static int align_queue_size(long long req) + } + + enum { +- CREATE_CQ_SUPPORTED_WC_FLAGS = IBV_WC_STANDARD_FLAGS | +- IBV_WC_EX_WITH_COMPLETION_TIMESTAMP | +- IBV_WC_EX_WITH_CVLAN | +- IBV_WC_EX_WITH_FLOW_TAG | +- IBV_WC_EX_WITH_TM_INFO | +- IBV_WC_EX_WITH_COMPLETION_TIMESTAMP_WALLCLOCK ++ CREATE_CQ_SUPPORTED_WC_FLAGS = IBV_WC_STANDARD_FLAGS | ++ IBV_WC_EX_WITH_COMPLETION_TIMESTAMP + }; + + enum { +@@ -417,7 +422,7 @@ static struct ibv_cq_ex *create_cq(struct ibv_context *context, + } + + if (cq_attr->wc_flags & ~CREATE_CQ_SUPPORTED_WC_FLAGS) { +- xsc_err("unsupported flgas:0x%lx\n", cq_attr->wc_flags); ++ xsc_err("unsupported wc flags:0x%lx\n", cq_attr->wc_flags); + errno = ENOTSUP; + return NULL; + } +@@ -453,16 +458,16 @@ static struct ibv_cq_ex *create_cq(struct ibv_context *context, + ncqe = XSC_CQE_RING_DEPTH_MIN; + } + +- if (ncqe > XSC_CQE_RING_DEPTH_MAX) { ++ if (ncqe > xctx->max_cqe) { + if (xsc_cqe_depth_check()) { + xsc_err("CQE ring size %u exceeds CQE ring depth %u, abort!\n", +- ncqe, XSC_CQE_RING_DEPTH_MAX); ++ ncqe, xctx->max_cqe); + errno = EINVAL; + goto err_spl; + } else { + xsc_dbg(xctx->dbg_fp, XSC_DBG_CQ, "CQE ring size %u exceeds the MAX ring szie, set it as %u\n", +- ncqe, XSC_CQE_RING_DEPTH_MAX); +- ncqe = XSC_CQE_RING_DEPTH_MAX; ++ ncqe, xctx->max_cqe); ++ ncqe = xctx->max_cqe; + } + } + +@@ -485,6 +490,9 @@ static struct ibv_cq_ex *create_cq(struct ibv_context *context, + + xsc_dbg(xctx->dbg_fp, XSC_DBG_CQ, "buf_addr:%p\n", cq->buf_a.buf); + ++ if (cq_alloc_flags & XSC_CQ_FLAGS_EXTENDED) ++ xsc_cq_fill_pfns(cq, cq_attr); ++ + if (use_ex) { + struct ibv_cq_init_attr_ex cq_attr_ex = *cq_attr; + +@@ -630,6 +638,7 @@ static int xsc_calc_sq_size(struct xsc_context *ctx, + int wqe_size; + int wq_size; + int wq_size_min = 0; ++ int max_inline_cap; + + if (!attr->cap.max_send_wr) + return 0; +@@ -646,23 +655,34 @@ static int xsc_calc_sq_size(struct xsc_context *ctx, + wq_size = wq_size_min; + } + +- if (wq_size > XSC_SEND_WQE_RING_DEPTH_MAX) { +- xsc_dbg(ctx->dbg_fp, XSC_DBG_QP, +- "WQE size %u exceeds WQE ring depth, set it as %u\n", +- wq_size, XSC_SEND_WQE_RING_DEPTH_MAX); +- wq_size = XSC_SEND_WQE_RING_DEPTH_MAX; ++ if (wq_size > ctx->max_send_wqebb) { ++ if (ctx->device_id == XSC_MC_PF_DEV_ID_DIAMOND || ++ ctx->device_id == XSC_MC_PF_DEV_ID_DIAMOND_NEXT) { ++ xsc_err("WQE size %u exceeds WQE ring depth\n", wq_size); ++ return -EINVAL; ++ } else { ++ xsc_dbg(ctx->dbg_fp, XSC_DBG_QP, ++ "WQE size %u exceeds WQE ring depth, set it as %u\n", ++ wq_size, ctx->max_send_wqebb); ++ wq_size = ctx->max_send_wqebb; ++ } + } + +- qp->max_inline_data = attr->cap.max_inline_data; + qp->sq.wqe_cnt = wq_size; + qp->sq.ds_cnt = wq_size << ctx->send_ds_shift; + qp->sq.seg_cnt = 1 << ctx->send_ds_shift; + qp->sq.wqe_shift = XSC_BASE_WQE_SHIFT + ctx->send_ds_shift; + qp->sq.max_gs = attr->cap.max_send_sge; + qp->sq.max_post = qp->sq.wqe_cnt; +- if (attr->cap.max_inline_data > +- (qp->sq.seg_cnt - 2) * sizeof(struct xsc_wqe_data_seg)) ++ ++ if (ctx->device_id == XSC_MC_PF_DEV_ID_DIAMOND || ++ ctx->device_id == XSC_MC_PF_DEV_ID_DIAMOND_NEXT) ++ max_inline_cap = 64; ++ else ++ max_inline_cap = (qp->sq.seg_cnt - 2) * sizeof(struct xsc_wqe_data_seg); ++ if (attr->cap.max_inline_data > max_inline_cap) + return -EINVAL; ++ qp->max_inline_data = attr->cap.max_inline_data; + + xsc_dbg(ctx->dbg_fp, XSC_DBG_QP, "Send WQE count:%u, max post:%u wqe shift:%u\n", + qp->sq.wqe_cnt, qp->sq.max_post, qp->sq.wqe_shift); +@@ -743,11 +763,17 @@ static int xsc_calc_rq_size(struct xsc_context *ctx, + wq_size = wq_size_min; + } + +- if (wq_size > XSC_RECV_WQE_RING_DEPTH_MAX) { +- xsc_dbg(ctx->dbg_fp, XSC_DBG_QP, +- "WQE size %u exceeds WQE ring depth, set it as %u\n", +- wq_size, XSC_RECV_WQE_RING_DEPTH_MAX); +- wq_size = XSC_RECV_WQE_RING_DEPTH_MAX; ++ if (wq_size > ctx->max_recv_wr) { ++ if (ctx->device_id == XSC_MC_PF_DEV_ID_DIAMOND || ++ ctx->device_id == XSC_MC_PF_DEV_ID_DIAMOND_NEXT) { ++ xsc_err("WQE size %u exceeds WQE ring depth\n", wq_size); ++ return -EINVAL; ++ } else { ++ xsc_dbg(ctx->dbg_fp, XSC_DBG_QP, ++ "WQE size %u exceeds WQE ring depth, set it as %u\n", ++ wq_size, ctx->max_recv_wr); ++ wq_size = ctx->max_recv_wr; ++ } + } + + qp->rq.wqe_cnt = wq_size; +@@ -946,8 +972,10 @@ static void xsc_free_qp_buf(struct xsc_context *ctx, struct xsc_qp *qp) + } + + enum { +- XSC_CREATE_QP_SUP_COMP_MASK = IBV_QP_INIT_ATTR_PD | +- IBV_QP_INIT_ATTR_CREATE_FLAGS ++ XSC_CREATE_QP_SUP_COMP_MASK = (IBV_QP_INIT_ATTR_PD | ++ IBV_QP_INIT_ATTR_CREATE_FLAGS | ++ IBV_QP_INIT_ATTR_SEND_OPS_FLAGS | ++ IBV_QP_INIT_ATTR_MAX_TSO_HEADER), + }; + + enum { +@@ -971,6 +999,34 @@ enum { + XSCDV_QP_CREATE_ALLOW_SCATTER_TO_CQE), + }; + ++static int xsc_cmd_create_qp_ex(struct ibv_context *context, ++ struct ibv_qp_init_attr_ex *attr, ++ struct xsc_create_qp *cmd, ++ struct xsc_qp *qp, ++ struct xsc_create_qp_resp *resp, ++ struct xsc_create_qp_ex_resp *resp_ex) ++{ ++ struct xsc_create_qp_ex cmd_ex; ++ int ret; ++ ++ if (attr->comp_mask & XSC_CREATE_QP_EX2_COMP_MASK) { ++ memset(&cmd_ex, 0, sizeof(cmd_ex)); ++ *ibv_create_qp_ex_to_reg(&cmd_ex.ibv_cmd) = cmd->ibv_cmd.core_payload; ++ cmd_ex.drv_payload = cmd->drv_payload; ++ ++ ret = ibv_cmd_create_qp_ex2(context, &qp->verbs_qp, ++ attr, &cmd_ex.ibv_cmd, ++ sizeof(cmd_ex), &resp_ex->ibv_resp, ++ sizeof(*resp_ex)); ++ } else { ++ ret = ibv_cmd_create_qp_ex(context, &qp->verbs_qp, attr, ++ &cmd->ibv_cmd, sizeof(*cmd), ++ &resp->ibv_resp, sizeof(*resp)); ++ } ++ ++ return ret; ++} ++ + static struct ibv_qp *create_qp(struct ibv_context *context, + struct ibv_qp_init_attr_ex *attr, + struct xscdv_qp_init_attr *xqp_attr) +@@ -992,19 +1048,35 @@ static struct ibv_qp *create_qp(struct ibv_context *context, + return NULL; + } + ++ /*check qp_type*/ ++ if ((attr->qp_type != IBV_QPT_RC) && ++ (attr->qp_type != IBV_QPT_RAW_PACKET)){ ++ xsc_err("Not supported qp_type:0x%x\n", attr->qp_type); ++ return NULL; ++ } ++ + qp = calloc(1, sizeof(*qp)); + if (!qp) { + xsc_err("QP calloc failed\n"); + return NULL; + } + +- ibqp = (struct ibv_qp *)&qp->verbs_qp; ++ ibqp = &qp->verbs_qp.qp; + qp->ibv_qp = ibqp; + + memset(&cmd, 0, sizeof(cmd)); + memset(&resp, 0, sizeof(resp)); + memset(&resp_ex, 0, sizeof(resp_ex)); + ++ if (attr->comp_mask & IBV_QP_INIT_ATTR_SEND_OPS_FLAGS) { ++ ret = xsc_qp_fill_wr_pfns(qp, attr); ++ if (ret) { ++ errno = ret; ++ xsc_err("Fill wr pfns failed\n"); ++ goto err; ++ } ++ } ++ + ret = xsc_calc_wq_size(ctx, attr, qp); + if (ret < 0) { + xsc_err("Calculate WQ size failed\n"); +@@ -1056,17 +1128,28 @@ static struct ibv_qp *create_qp(struct ibv_context *context, + "revert create_flags(0x%x) to cmd_flags(0x%x)\n", + attr->create_flags, cmd.flags); + } ++ ++ if (attr->create_flags & XSC_QP_CREATE_RAWPACKET_SNIFFER) { ++ cmd.flags |= XSC_QP_FLAG_RAWPACKET_SNIFFER; ++ qp->flags |= XSC_QP_FLAG_RAWPACKET_SNIFFER; ++ xsc_dbg(ctx->dbg_fp, XSC_DBG_QP, ++ "revert create_flags(0x%x) to cmd_flags(0x%x)\n", ++ attr->create_flags, cmd.flags); ++ } ++ + attr->comp_mask &= ~IBV_QP_INIT_ATTR_CREATE_FLAGS; + } ++ ++ if (attr->comp_mask & IBV_QP_INIT_ATTR_MAX_TSO_HEADER) ++ cmd.flags |= XSC_QP_FLAG_RAWPACKET_TSO; ++ + } + + pthread_mutex_lock(&ctx->qp_table_mutex); + + xparent_domain = to_xparent_domain(attr->pd); + +- ret = ibv_cmd_create_qp_ex(context, &qp->verbs_qp, attr, +- &cmd.ibv_cmd, sizeof(cmd), +- &resp.ibv_resp, sizeof(resp)); ++ ret = xsc_cmd_create_qp_ex(context, attr, &cmd, qp, &resp, &resp_ex); + if (ret) { + xsc_err("ibv_cmd_create_qp_ex failed,ret %d\n", ret); + errno = ret; +@@ -1108,6 +1191,9 @@ static struct ibv_qp *create_qp(struct ibv_context *context, + qp->sq.db = ctx->sqm_reg_va + (ctx->qpm_tx_db & (xdev->page_size - 1)); + qp->rq.db = ctx->rqm_reg_va + (ctx->qpm_rx_db & (xdev->page_size - 1)); + ++ if (attr->comp_mask & IBV_QP_INIT_ATTR_SEND_OPS_FLAGS) ++ qp->verbs_qp.comp_mask |= VERBS_QP_EX; ++ + return ibqp; + + err_destroy: +@@ -1261,6 +1347,11 @@ int xsc_query_qp(struct ibv_qp *ibqp, struct ibv_qp_attr *attr, + init_attr->cap.max_inline_data = qp->max_inline_data; + + attr->cap = init_attr->cap; ++ if (qp->err_occurred) { ++ qp->err_occurred = 0; ++ qp->ibv_qp->state = IBV_QPS_ERR; ++ attr->qp_state = IBV_QPS_ERR; ++ } + + return 0; + } +diff --git a/providers/xscale/xsc_api.h b/providers/xscale/xsc_api.h +index c533019..3b3eafc 100644 +--- a/providers/xscale/xsc_api.h ++++ b/providers/xscale/xsc_api.h +@@ -20,9 +20,9 @@ + #define XSCDV_FLOW_ACTION_PACKET_REFORMAT_TYPE_L2_TO_L3_TUNNEL XSC_IB_UAPI_FLOW_ACTION_PACKET_REFORMAT_TYPE_L2_TO_L3_TUNNEL + + enum xsc_qp_create_flags { +- XSC_QP_CREATE_RAWPACKE_TSO = 1 << 0, + XSC_QP_CREATE_RAWPACKET_TSO = 1 << 0, +- XSC_QP_CREATE_RAWPACKET_TX = 1 << 1, ++ XSC_QP_CREATE_RAWPACKET_SNIFFER = 1 << 2, ++ XSC_QP_CREATE_RAWPACKET_TX = 1 << 3, + }; + + +diff --git a/providers/xscale/xsc_hsi.h b/providers/xscale/xsc_hsi.h +index 53fe552..30887af 100644 +--- a/providers/xscale/xsc_hsi.h ++++ b/providers/xscale/xsc_hsi.h +@@ -65,28 +65,50 @@ enum { + }; + + enum { +- XSC_ERR_CODE_NAK_RETRY = 0x40, +- XSC_ERR_CODE_NAK_OPCODE = 0x41, +- XSC_ERR_CODE_NAK_MR = 0x42, +- XSC_ERR_CODE_NAK_OPERATION = 0x43, +- XSC_ERR_CODE_NAK_RNR = 0x44, +- XSC_ERR_CODE_LOCAL_MR = 0x45, +- XSC_ERR_CODE_LOCAL_LEN = 0x46, +- XSC_ERR_CODE_LOCAL_OPCODE = 0x47, +- XSC_ERR_CODE_CQ_OVER_FLOW = 0x48, +- XSC_ERR_CODE_STRG_ACC_GEN_CQE = 0x4c, +- XSC_ERR_CODE_CQE_ACC = 0x4d, +- XSC_ERR_CODE_FLUSH = 0x4e, +- XSC_ERR_CODE_MALF_WQE_HOST = 0x50, +- XSC_ERR_CODE_MALF_WQE_INFO = 0x51, +- XSC_ERR_CODE_MR_NON_NAK = 0x52, +- XSC_ERR_CODE_OPCODE_GEN_CQE = 0x61, +- XSC_ERR_CODE_MANY_READ = 0x62, +- XSC_ERR_CODE_LEN_GEN_CQE = 0x63, +- XSC_ERR_CODE_MR = 0x65, +- XSC_ERR_CODE_MR_GEN_CQE = 0x66, +- XSC_ERR_CODE_OPERATION = 0x67, +- XSC_ERR_CODE_MALF_WQE_INFO_GEN_NAK = 0x68, ++ XSC_ANDES_ERR_CODE_NAK_RETRY = 0x40, ++ XSC_ANDES_ERR_CODE_NAK_OPCODE = 0x41, ++ XSC_ANDES_ERR_CODE_NAK_MR = 0x42, ++ XSC_ANDES_ERR_CODE_NAK_OPERATION = 0x43, ++ XSC_ANDES_ERR_CODE_NAK_RNR = 0x44, ++ XSC_ANDES_ERR_CODE_LOCAL_MR = 0x45, ++ XSC_ANDES_ERR_CODE_LOCAL_LEN = 0x46, ++ XSC_ANDES_ERR_CODE_LOCAL_OPCODE = 0x47, ++ XSC_ANDES_ERR_CODE_CQ_OVER_FLOW = 0x48, ++ XSC_ANDES_ERR_CODE_LOCAL_OPERATION_WQE = 0x49, ++ XSC_ANDES_ERR_CODE_STRG_ACC_GEN_CQE = 0x4b, ++ XSC_ANDES_ERR_CODE_STRG_ACC = 0x4c, ++ XSC_ANDES_ERR_CODE_CQE_ACC = 0x4d, ++ XSC_ANDES_ERR_CODE_FLUSH = 0x4e, ++ XSC_ANDES_ERR_CODE_MALF_WQE_HOST = 0x50, ++ XSC_ANDES_ERR_CODE_MALF_WQE_INFO = 0x51, ++ XSC_ANDES_ERR_CODE_MR_NON_NAK = 0x52, ++ XSC_ANDES_ERR_CODE_OPCODE_GEN_CQE = 0x61, ++ XSC_ANDES_ERR_CODE_MANY_READ = 0x62, ++ XSC_ANDES_ERR_CODE_LEN_GEN_CQE = 0x63, ++ XSC_ANDES_ERR_CODE_MR = 0x65, ++ XSC_ANDES_ERR_CODE_MR_GEN_CQE = 0x66, ++ XSC_ANDES_ERR_CODE_OPERATION = 0x67, ++ XSC_ANDES_ERR_CODE_MALF_WQE_INFO_GEN_NAK = 0x68, ++}; ++ ++enum { ++ XSC_DIAMOND_ERR_CODE_NAK_SEQ_ERR = 0xa0, ++ XSC_DIAMOND_ERR_CODE_RTO_REQ = 0xa2, ++ XSC_DIAMOND_ERR_CODE_NAK_INV_REQ = 0xa4, ++ XSC_DIAMOND_ERR_CODE_NAK_MR = 0xa5, ++ XSC_DIAMOND_ERR_CODE_NAK_REMOTE_OPER_ERR = 0xa6, ++ XSC_DIAMOND_ERR_CODE_LOCAL_MR_REQ = 0xa7, ++ XSC_DIAMOND_ERR_CODE_SND_WQE_FORMAT = 0xab, ++ XSC_DIAMOND_ERR_CODE_RCV_WQE_DMA = 0xaf, ++ XSC_DIAMOND_ERR_CODE_DATA_DMA_RD_REQ = 0xb2, ++ XSC_DIAMOND_ERR_CODE_DATA_DMA_WR_RSP_GEN_CQE = 0xb4, ++ XSC_DIAMOND_ERR_CODE_DATA_DMA_WR_RSP = 0xb5, ++ XSC_DIAMOND_ERR_CODE_LEN_GEN_CQE = 0xc4, ++ XSC_DIAMOND_ERR_CODE_LEN = 0xc5, ++ XSC_DIAMOND_ERR_CODE_REMOTE_MR = 0xd4, ++ XSC_DIAMOND_ERR_CODE_REMOTE_MR_GEN_CQE = 0xd5, ++ XSC_DIAMOND_ERR_CODE_LOCAL_MR_RSP = 0xd6, ++ XSC_DIAMOND_ERR_CODE_FLUSH = 0xff, + }; + + /* TODO: sw cqe opcode*/ +@@ -102,6 +124,9 @@ enum { + XSC_OPCODE_RDMA_REQ_ERROR = 8, + XSC_OPCODE_RDMA_RSP_ERROR = 9, + XSC_OPCODE_RDMA_CQE_ERROR = 10, ++ XSC_OPCODE_RDMA_MAD_REQ_SEND = 11, ++ XSC_OPCODE_RDMA_MAD_RSP_RECV = 12, ++ XSC_OPCODE_RDMA_CQE_RAW_SNF = 13, + }; + + enum { +@@ -147,13 +172,7 @@ struct xsc_wqe_data_seg { + }; + + struct xsc_cqe { +- union { +- uint8_t msg_opcode; +- struct { +- uint8_t error_code:7; +- uint8_t is_error:1; +- }; +- }; ++ uint8_t placeholder1; + __le32 qp_id:15; + uint8_t :1; + uint8_t se:1; +@@ -166,7 +185,9 @@ struct xsc_cqe { + __le32 vni; + __le64 ts:48; + __le16 wqe_id; +- __le16 rsv[3]; ++ uint8_t placeholder2; ++ uint8_t rsv2; ++ __le16 rsv[2]; + __le16 rsv1:15; + uint8_t owner:1; + }; +@@ -174,32 +195,10 @@ struct xsc_cqe { + /* Size of CQE */ + #define XSC_CQE_SIZE sizeof(struct xsc_cqe) + +-union xsc_db_data { +- struct { +- __le32 sq_next_pid:16; +- __le32 sqn:15; +- __le32 :1; +- }; +- struct { +- __le32 rq_next_pid:13; +- __le32 rqn:15; +- __le32 :4; +- }; +- struct { +- __le32 cq_next_cid:16; +- __le32 cqn:15; +- __le32 solicited:1; +- }; +- __le32 raw_data; +-}; +- + #define CQM_DB_NEXT_CID_OFFSET(n) (4 * (n)) + + #define XSC_SEND_WQE_RING_DEPTH_MIN 16 + #define XSC_CQE_RING_DEPTH_MIN 2 +-#define XSC_SEND_WQE_RING_DEPTH_MAX 1024 +-#define XSC_RECV_WQE_RING_DEPTH_MAX 1024 +-#define XSC_CQE_RING_DEPTH_MAX (1024 * 32) + + /* + * Registers that are allocated by HW and accessed by SW in 4-byte granularity +diff --git a/providers/xscale/xscale.c b/providers/xscale/xscale.c +index e24cfd2..8b04558 100644 +--- a/providers/xscale/xscale.c ++++ b/providers/xscale/xscale.c +@@ -16,12 +16,14 @@ + #include + #include + ++#include + #include + + #include "xscale.h" + #include "xsc-abi.h" + #include "wqe.h" + #include "xsc_hsi.h" ++#include "xsc_hw.h" + + #ifndef CPU_OR + #define CPU_OR(x, y, z) do {} while (0) +@@ -60,7 +62,8 @@ static const struct verbs_context_ops xsc_ctx_common_ops = { + .alloc_pd = xsc_alloc_pd, + .dealloc_pd = xsc_free_pd, + .reg_mr = xsc_reg_mr, +- .rereg_mr = xsc_rereg_mr, ++ .reg_dmabuf_mr = xsc_reg_dmabuf_mr, ++ .rereg_mr = NULL, + .dereg_mr = xsc_dereg_mr, + .alloc_mw = NULL, + .dealloc_mw = NULL, +@@ -417,6 +420,10 @@ static void xsc_read_env(struct ibv_device *ibdev, struct xsc_context *ctx) + ctx->stall_cycles = xsc_stall_cq_poll_min; + } + ++ env_value = getenv("XSC_POST_RECV_DUMP_WQE"); ++ if (env_value) ++ xsc_post_recv_dump_wqe = (strcmp(env_value, "0")) ? 1 : 0; ++ + } + + static void open_debug_file(struct xsc_context *ctx) +@@ -787,6 +794,7 @@ static void xsc_munmap(struct xsc_context *context) + munmap(context->cqm_armdb_va, context->db_mmap_size); + + } ++ + static struct verbs_context *xsc_alloc_context(struct ibv_device *ibdev, + int cmd_fd, + void *private_data) +@@ -845,6 +853,7 @@ static struct verbs_context *xsc_alloc_context(struct ibv_device *ibdev, + context->send_ds_shift = xsc_ilog2(resp.send_ds_num); + context->recv_ds_num = resp.recv_ds_num; + context->recv_ds_shift = xsc_ilog2(resp.recv_ds_num); ++ context->device_id = resp.device_id; + + xsc_dbg(context->dbg_fp, XSC_DBG_CTX, + "max_num_qps:%u, max_sq_desc_sz:%u max_rq_desc_sz:%u " \ +@@ -894,6 +903,7 @@ static struct verbs_context *xsc_alloc_context(struct ibv_device *ibdev, + context->atomic_cap = device_attr.orig_attr.atomic_cap; + context->cached_tso_caps = device_attr.tso_caps; + context->max_dm_size = device_attr.max_dm_size; ++ context->max_cqe = device_attr.orig_attr.max_cqe; + } + + for (j = 0; j < min(XSC_MAX_PORTS_NUM, context->num_ports); ++j) { +diff --git a/providers/xscale/xscale.h b/providers/xscale/xscale.h +index c6cc9f7..e837e9b 100644 +--- a/providers/xscale/xscale.h ++++ b/providers/xscale/xscale.h +@@ -45,6 +45,7 @@ enum { + enum { + XSC_QP_FLAG_RAWPACKET_TSO = 1 << 9, + XSC_QP_FLAG_RAWPACKET_TX = 1 << 10, ++ XSC_QP_FLAG_RAWPACKET_SNIFFER = 1 << 11, + }; + + +@@ -66,6 +67,7 @@ enum { + XSC_DBG_CTX = 1 << 7, + XSC_DBG_PD = 1 << 8, + XSC_DBG_MR = 1 << 9, ++ XSC_DBG_QP_RECV = 1 << 10, + }; + + extern uint32_t xsc_debug_mask; +@@ -75,7 +77,7 @@ extern int xsc_freeze_on_error_cqe; + #ifdef XSC_DEBUG + #define xsc_dbg(fp, mask, fmt, args...) \ + do { \ +- if (xsc_debug_mask & mask) { \ ++ if (unlikely(xsc_debug_mask & mask)) { \ + char host[256]; \ + char timestr[32]; \ + struct tm now_tm; \ +@@ -246,6 +248,7 @@ struct xsc_context { + struct xsc_packet_pacing_caps packet_pacing_caps; + uint16_t flow_action_flags; + uint64_t max_dm_size; ++ uint32_t max_cqe; + uint32_t eth_min_inline_size; + uint32_t dump_fill_mkey; + __be32 dump_fill_mkey_be; +@@ -264,6 +267,7 @@ struct xsc_context { + uint32_t send_ds_shift; + uint32_t recv_ds_shift; + FILE *dbg_fp; ++ uint16_t device_id; + }; + + struct xsc_bitmap { +@@ -343,7 +347,7 @@ struct xsc_cq { + int stall_adaptive_enable; + int stall_cycles; + struct xsc_resource *cur_rsc; +- struct xsc_cqe64 *cqe64; ++ struct xsc_cqe *cqe; + uint32_t flags; + int umr_opcode; + struct xscdv_clock_info last_clock_info; +@@ -387,7 +391,6 @@ struct xsc_dm { + + struct xsc_mr { + struct verbs_mr vmr; +- struct xsc_buf buf; + uint32_t alloc_flags; + }; + +@@ -408,6 +411,17 @@ struct xsc_qp { + struct xsc_buf sq_buf; + int sq_buf_size; + ++ int err; ++ /* Number of WR entries posted in the current wr session */ ++ int nreq; ++ uint32_t cur_post_rb; ++ void *cur_ctrl; ++ void *cur_data; ++ int cur_ds_num; ++ uint32_t cur_data_len; ++ uint64_t cur_remote_addr; ++ uint32_t cur_remote_key; ++ + uint8_t fm_cache; + uint8_t sq_signal_bits; + struct xsc_wq sq; +@@ -426,6 +440,7 @@ struct xsc_qp { + uint32_t tisn; + uint32_t rqn; + uint32_t sqn; ++ unsigned int err_occurred; + }; + + struct xsc_ah { +@@ -514,6 +529,7 @@ extern int xsc_stall_cq_poll_max; + extern int xsc_stall_cq_inc_step; + extern int xsc_stall_cq_dec_step; + extern int xsc_single_threaded; ++extern int xsc_post_recv_dump_wqe; + + static inline unsigned DIV_ROUND_UP(unsigned n, unsigned d) + { +@@ -658,6 +674,8 @@ int xsc_free_pd(struct ibv_pd *pd); + struct ibv_mr *xsc_alloc_null_mr(struct ibv_pd *pd); + struct ibv_mr *xsc_reg_mr(struct ibv_pd *pd, void *addr, + size_t length, uint64_t hca_va, int access); ++struct ibv_mr *xsc_reg_dmabuf_mr(struct ibv_pd *pd, uint64_t offset, size_t length, ++ uint64_t iova, int fd, int acc); + int xsc_rereg_mr(struct verbs_mr *mr, int flags, struct ibv_pd *pd, void *addr, + size_t length, int access); + int xsc_dereg_mr(struct verbs_mr *mr); +@@ -666,9 +684,8 @@ struct ibv_cq *xsc_create_cq(struct ibv_context *context, int cqe, + int comp_vector); + struct ibv_cq_ex *xsc_create_cq_ex(struct ibv_context *context, + struct ibv_cq_init_attr_ex *cq_attr); +-int xsc_cq_fill_pfns(struct xsc_cq *cq, +- const struct ibv_cq_init_attr_ex *cq_attr, +- struct xsc_context *xctx); ++void xsc_cq_fill_pfns(struct xsc_cq *cq, ++ const struct ibv_cq_init_attr_ex *cq_attr); + int xsc_alloc_cq_buf(struct xsc_context *xctx, struct xsc_cq *cq, + struct xsc_buf *buf, int nent, int cqe_sz); + int xsc_free_cq_buf(struct xsc_context *ctx, struct xsc_buf *buf); +@@ -710,7 +727,6 @@ int xsc_destroy_ah(struct ibv_ah *ah); + int xsc_attach_mcast(struct ibv_qp *qp, const union ibv_gid *gid, uint16_t lid); + int xsc_detach_mcast(struct ibv_qp *qp, const union ibv_gid *gid, uint16_t lid); + int xsc_round_up_power_of_two(long long sz); +-void *xsc_get_send_wqe(struct xsc_qp *qp, int n); + struct ibv_xrcd *xsc_open_xrcd(struct ibv_context *context, + struct ibv_xrcd_init_attr *xrcd_init_attr); + int xsc_close_xrcd(struct ibv_xrcd *ib_xrcd); +@@ -750,7 +766,7 @@ int xsc_read_counters(struct ibv_counters *counters, + uint64_t *counters_value, + uint32_t ncounters, + uint32_t flags); +- ++int xsc_qp_fill_wr_pfns(struct xsc_qp *xqp, const struct ibv_qp_init_attr_ex *attr); + static inline void *xsc_find_uidx(struct xsc_context *ctx, uint32_t uidx) + { + int tind = uidx >> XSC_UIDX_TABLE_SHIFT; +@@ -849,4 +865,9 @@ static inline uint8_t calc_sig(void *wqe, int size) + return ~res; + } + ++static inline void *xsc_get_send_wqe(struct xsc_qp *qp, int n) ++{ ++ return qp->sq_start + (n << qp->sq.wqe_shift); ++} ++ + #endif /* XSC_H */ +-- +2.43.0 + diff --git a/0064-libxscale-automatically-load-xsc_ib.ko.patch b/0064-libxscale-automatically-load-xsc_ib.ko.patch new file mode 100644 index 0000000000000000000000000000000000000000..21686742c7de8347be1f2859e9edccdadc643552 --- /dev/null +++ b/0064-libxscale-automatically-load-xsc_ib.ko.patch @@ -0,0 +1,27 @@ +From 15228dcde5da4eaae15219978983f388576e4ec1 Mon Sep 17 00:00:00 2001 +From: Xin Tian +Date: Fri, 16 May 2025 14:16:52 +0800 +Subject: [PATCH] libxscale: automatically load xsc_ib.ko + +Automatically load xsc_ib.ko when xsc_eth.ko is loaded + +Signed-off-by: Xin Tian +--- + kernel-boot/rdma-hw-modules.rules | 1 + + 1 file changed, 1 insertion(+) + +diff --git a/kernel-boot/rdma-hw-modules.rules b/kernel-boot/rdma-hw-modules.rules +index 95eaf72..fb9aab4 100644 +--- a/kernel-boot/rdma-hw-modules.rules ++++ b/kernel-boot/rdma-hw-modules.rules +@@ -15,6 +15,7 @@ ENV{ID_NET_DRIVER}=="i40e", RUN{builtin}+="kmod load i40iw" + ENV{ID_NET_DRIVER}=="mlx4_en", RUN{builtin}+="kmod load mlx4_ib" + ENV{ID_NET_DRIVER}=="mlx5_core", RUN{builtin}+="kmod load mlx5_ib" + ENV{ID_NET_DRIVER}=="qede", RUN{builtin}+="kmod load qedr" ++ENV{ID_NET_DRIVER}=="xsc_eth", RUN{builtin}+="kmod load xsc_ib" + + # The user must explicitly load these modules via /etc/modules-load.d/ or otherwise + # rxe +-- +2.43.0 + diff --git a/0065-libhns-Fix-double-free-of-rinl_buf-wqe_list.patch b/0065-libhns-Fix-double-free-of-rinl_buf-wqe_list.patch new file mode 100644 index 0000000000000000000000000000000000000000..e568c7aba5da03c5d523b64d7fd022f99a6a51c8 --- /dev/null +++ b/0065-libhns-Fix-double-free-of-rinl_buf-wqe_list.patch @@ -0,0 +1,53 @@ +From 583d8210da89563fcef0c6e508f58cc7adf72a3b Mon Sep 17 00:00:00 2001 +From: wenglianfa +Date: Mon, 12 May 2025 10:51:32 +0800 +Subject: [PATCH 65/65] libhns: Fix double-free of rinl_buf->wqe_list + +driver inclusion +category: bugfix +bugzilla: https://gitee.com/src-openeuler/rdma-core/issues/ICAQ55 + +------------------------------------------------------------------ + +rinl_buf->wqe_list will be double-freed in error flow, first in +alloc_recv_rinl_buf() and then in free_recv_rinl_buf(). Actually +free_recv_rinl_buf() shouldn't be called when alloc_recv_rinl_buf() +failed. + +Fixes: 83b0baff3ccf ("libhns: Refactor rq inline") +Signed-off-by: wenglianfa +Signed-off-by: Junxian Huang +--- + providers/hns/hns_roce_u_verbs.c | 7 ++++--- + 1 file changed, 4 insertions(+), 3 deletions(-) + +diff --git a/providers/hns/hns_roce_u_verbs.c b/providers/hns/hns_roce_u_verbs.c +index edd8e3d..8bf7bc1 100644 +--- a/providers/hns/hns_roce_u_verbs.c ++++ b/providers/hns/hns_roce_u_verbs.c +@@ -1453,18 +1453,19 @@ static int qp_alloc_wqe(struct ibv_qp_init_attr_ex *attr, + qp->dca_wqe.shift = qp->pageshift; + qp->dca_wqe.bufs = calloc(qp->dca_wqe.max_cnt, sizeof(void *)); + if (!qp->dca_wqe.bufs) +- goto err_alloc; ++ goto err_alloc_recv_rinl_buf; + verbs_debug(&ctx->ibv_ctx, "alloc DCA buf.\n"); + } else { + if (hns_roce_alloc_buf(&qp->buf, qp->buf_size, + 1 << qp->pageshift)) +- goto err_alloc; ++ goto err_alloc_recv_rinl_buf; + } + + return 0; + +-err_alloc: ++err_alloc_recv_rinl_buf: + free_recv_rinl_buf(&qp->rq_rinl_buf); ++err_alloc: + if (qp->rq.wrid) + free(qp->rq.wrid); + +-- +2.33.0 + diff --git a/0066-libhns-Add-check-for-input-param-of-hnsdv_query_devi.patch b/0066-libhns-Add-check-for-input-param-of-hnsdv_query_devi.patch new file mode 100644 index 0000000000000000000000000000000000000000..5f0911c46000f96a8b88656e626518b4ffa5acbd --- /dev/null +++ b/0066-libhns-Add-check-for-input-param-of-hnsdv_query_devi.patch @@ -0,0 +1,54 @@ +From 57985b930eab7e5cf4dc53efa6d303ede9b414c6 Mon Sep 17 00:00:00 2001 +From: Junxian Huang +Date: Mon, 20 May 2024 14:05:33 +0800 +Subject: [PATCH 66/67] libhns: Add check for input param of + hnsdv_query_device() + +mainline inclusion +from mainline-master +commit 19e66a6b75fd1f441e787d1791fe8a416b2d56cb +category: bugfix +bugzilla: https://gitee.com/src-openeuler/rdma-core/issues/#ICEES4 +CVE: NA + +Reference: +https://github.com/linux-rdma/rdma-core/pull/1462/commits/5f9e08f62feb67d084... + +------------------------------------------------------------------ + +Add check for input param of hnsdv_query_device() to avoid null ptr. + +Fixes: cf6d9149f8f5 ("libhns: Introduce hns direct verbs") +Signed-off-by: Junxian Huang +Signed-off-by: Donghua Huang +--- + providers/hns/hns_roce_u_verbs.c | 5 +++-- + 1 file changed, 3 insertions(+), 2 deletions(-) + +diff --git a/providers/hns/hns_roce_u_verbs.c b/providers/hns/hns_roce_u_verbs.c +index 8bf7bc1..8594666 100644 +--- a/providers/hns/hns_roce_u_verbs.c ++++ b/providers/hns/hns_roce_u_verbs.c +@@ -1933,9 +1933,9 @@ struct ibv_qp *hnsdv_create_qp(struct ibv_context *context, + int hnsdv_query_device(struct ibv_context *context, + struct hnsdv_context *attrs_out) + { +- struct hns_roce_device *hr_dev = to_hr_dev(context->device); ++ struct hns_roce_device *hr_dev; + +- if (!hr_dev || !attrs_out) ++ if (!context || !context->device || !attrs_out) + return EINVAL; + + if (!is_hns_dev(context->device)) { +@@ -1944,6 +1944,7 @@ int hnsdv_query_device(struct ibv_context *context, + } + memset(attrs_out, 0, sizeof(*attrs_out)); + ++ hr_dev = to_hr_dev(context->device); + attrs_out->comp_mask |= HNSDV_CONTEXT_MASK_CONGEST_TYPE; + attrs_out->congest_type = hr_dev->congest_cap; + +-- +2.33.0 + diff --git a/0067-libhns-Adapt-UD-inline-data-size-for-UCX.patch b/0067-libhns-Adapt-UD-inline-data-size-for-UCX.patch new file mode 100644 index 0000000000000000000000000000000000000000..1c94bceb9b74e714bace48990d427525240fb809 --- /dev/null +++ b/0067-libhns-Adapt-UD-inline-data-size-for-UCX.patch @@ -0,0 +1,77 @@ +From 22a2b01e953c48648a34aa1d4066357d60cfb5fc Mon Sep 17 00:00:00 2001 +From: wenglianfa +Date: Tue, 25 Feb 2025 20:29:53 +0800 +Subject: [PATCH 67/67] libhns: Adapt UD inline data size for UCX + +driver inclusion +category: bugfix +bugzilla: https://gitee.com/src-openeuler/rdma-core/issues/ICEEPO + +------------------------------------------------------------------ + +Adapt UD inline data size for UCX. The value +must be at least 128 to avoid the ucx bug. + +The issue url: +https://gitee.com/src-openeuler/rdma-core/issues/ICEEPO?from=project-issue + +Signed-off-by: wenglianfa +Signed-off-by: Donghua Huang +--- + providers/hns/hns_roce_u.h | 2 ++ + providers/hns/hns_roce_u_verbs.c | 17 +++++++++++++---- + 2 files changed, 15 insertions(+), 4 deletions(-) + +diff --git a/providers/hns/hns_roce_u.h b/providers/hns/hns_roce_u.h +index e7e3f01..3d34495 100644 +--- a/providers/hns/hns_roce_u.h ++++ b/providers/hns/hns_roce_u.h +@@ -83,6 +83,8 @@ typedef _Atomic(uint64_t) atomic_bitmap_t; + #define HNS_ROCE_ADDRESS_MASK 0xFFFFFFFF + #define HNS_ROCE_ADDRESS_SHIFT 32 + ++#define HNS_ROCE_MIN_UD_INLINE 128 ++ + #define roce_get_field(origin, mask, shift) \ + (((le32toh(origin)) & (mask)) >> (shift)) + +diff --git a/providers/hns/hns_roce_u_verbs.c b/providers/hns/hns_roce_u_verbs.c +index 8594666..5ec2341 100644 +--- a/providers/hns/hns_roce_u_verbs.c ++++ b/providers/hns/hns_roce_u_verbs.c +@@ -1511,11 +1511,20 @@ static unsigned int get_sge_num_from_max_inl_data(bool is_ud, + } + + static uint32_t get_max_inline_data(struct hns_roce_context *ctx, +- struct ibv_qp_cap *cap) ++ struct ibv_qp_cap *cap, ++ bool is_ud) + { +- if (cap->max_inline_data) +- return min_t(uint32_t, roundup_pow_of_two(cap->max_inline_data), ++ uint32_t max_inline_data = cap->max_inline_data; ++ ++ if (max_inline_data) { ++ max_inline_data = roundup_pow_of_two(max_inline_data); ++ ++ if (is_ud && max_inline_data < HNS_ROCE_MIN_UD_INLINE) ++ max_inline_data = HNS_ROCE_MIN_UD_INLINE; ++ ++ return min_t(uint32_t, max_inline_data, + ctx->max_inline_data); ++ } + + return 0; + } +@@ -1536,7 +1545,7 @@ static void set_ext_sge_param(struct hns_roce_context *ctx, + attr->cap.max_send_sge); + + if (ctx->config & HNS_ROCE_RSP_EXSGE_FLAGS) { +- attr->cap.max_inline_data = get_max_inline_data(ctx, &attr->cap); ++ attr->cap.max_inline_data = get_max_inline_data(ctx, &attr->cap, is_ud); + + inline_ext_sge = max(ext_wqe_sge_cnt, + get_sge_num_from_max_inl_data(is_ud, +-- +2.25.1 + diff --git a/0068-libhns-Clean-up-DCA-magic-number-warnings.patch b/0068-libhns-Clean-up-DCA-magic-number-warnings.patch new file mode 100644 index 0000000000000000000000000000000000000000..a24a094de3a82c5a3ae1b26d8294115d88d9338e --- /dev/null +++ b/0068-libhns-Clean-up-DCA-magic-number-warnings.patch @@ -0,0 +1,130 @@ +From 0ffff9a86bf0896b8278bae5cd1d5d75cf0a3aa0 Mon Sep 17 00:00:00 2001 +From: Junxian Huang +Date: Tue, 29 Jul 2025 16:31:38 +0800 +Subject: [PATCH 1/2] libhns: Clean up DCA magic number warnings + +Use macros instead of magic numbers. + +Fixes: 1479aa19be37 ("libhns: Use shared memory to sync DCA status") +Fixes: 3aa4683ef700 ("libhns: Add support for attaching QP's WQE buffer") +Fixes: 9ebe050337c4 ("libhns: Add support for shrinking DCA memory pool") +Signed-off-by: Junxian Huang +Signed-off-by: Donghua Huang +--- + providers/hns/hns_roce_u.c | 3 ++- + providers/hns/hns_roce_u_buf.c | 24 ++++++++++++++++++------ + 2 files changed, 20 insertions(+), 7 deletions(-) + +diff --git a/providers/hns/hns_roce_u.c b/providers/hns/hns_roce_u.c +index ec995e7..326f11a 100644 +--- a/providers/hns/hns_roce_u.c ++++ b/providers/hns/hns_roce_u.c +@@ -150,6 +150,7 @@ static int hns_roce_mmap(struct hns_roce_device *hr_dev, + static int mmap_dca(struct hns_roce_context *ctx, int cmd_fd, + int page_size, size_t size, uint64_t mmap_key) + { ++#define PRIME_QP_BUF_RATIO 2 + struct hns_roce_dca_ctx *dca_ctx = &ctx->dca_ctx; + void *addr; + +@@ -161,7 +162,7 @@ static int mmap_dca(struct hns_roce_context *ctx, int cmd_fd, + } + + dca_ctx->buf_status = addr; +- dca_ctx->sync_status = addr + size / 2; ++ dca_ctx->sync_status = addr + size / PRIME_QP_BUF_RATIO; + + return 0; + } +diff --git a/providers/hns/hns_roce_u_buf.c b/providers/hns/hns_roce_u_buf.c +index 780683e..952c26f 100644 +--- a/providers/hns/hns_roce_u_buf.c ++++ b/providers/hns/hns_roce_u_buf.c +@@ -126,11 +126,13 @@ static inline void *dca_mem_addr(struct hns_roce_dca_mem *dca_mem, int offset) + static int register_dca_mem(struct hns_roce_context *ctx, uint64_t key, + void *addr, uint32_t size, uint32_t *handle) + { ++#define REGISTER_DCA_MEM_ATTR_NUM 4 + struct ib_uverbs_attr *attr; + int ret; + + DECLARE_COMMAND_BUFFER(cmd, HNS_IB_OBJECT_DCA_MEM, +- HNS_IB_METHOD_DCA_MEM_REG, 4); ++ HNS_IB_METHOD_DCA_MEM_REG, ++ REGISTER_DCA_MEM_ATTR_NUM); + fill_attr_in_uint32(cmd, HNS_IB_ATTR_DCA_MEM_REG_LEN, size); + fill_attr_in_uint64(cmd, HNS_IB_ATTR_DCA_MEM_REG_ADDR, + ioctl_ptr_to_u64(addr)); +@@ -151,10 +153,12 @@ static int register_dca_mem(struct hns_roce_context *ctx, uint64_t key, + + static void deregister_dca_mem(struct hns_roce_context *ctx, uint32_t handle) + { ++#define DEREGISTER_DCA_MEM_ATTR_NUM 1 + int ret; + + DECLARE_COMMAND_BUFFER(cmd, HNS_IB_OBJECT_DCA_MEM, +- HNS_IB_METHOD_DCA_MEM_DEREG, 1); ++ HNS_IB_METHOD_DCA_MEM_DEREG, ++ DEREGISTER_DCA_MEM_ATTR_NUM); + fill_attr_in_obj(cmd, HNS_IB_ATTR_DCA_MEM_DEREG_HANDLE, handle); + ret = execute_ioctl(&ctx->ibv_ctx.context, cmd); + if (ret) +@@ -181,10 +185,12 @@ struct hns_dca_mem_shrink_resp { + static int shrink_dca_mem(struct hns_roce_context *ctx, uint32_t handle, + uint64_t size, struct hns_dca_mem_shrink_resp *resp) + { ++#define SHRINK_DCA_MEM_ATTR_NUM 4 + int ret; + + DECLARE_COMMAND_BUFFER(cmd, HNS_IB_OBJECT_DCA_MEM, +- HNS_IB_METHOD_DCA_MEM_SHRINK, 4); ++ HNS_IB_METHOD_DCA_MEM_SHRINK, ++ SHRINK_DCA_MEM_ATTR_NUM); + fill_attr_in_obj(cmd, HNS_IB_ATTR_DCA_MEM_SHRINK_HANDLE, handle); + fill_attr_in_uint64(cmd, HNS_IB_ATTR_DCA_MEM_SHRINK_RESERVED_SIZE, size); + fill_attr_out(cmd, HNS_IB_ATTR_DCA_MEM_SHRINK_OUT_FREE_KEY, +@@ -209,10 +215,12 @@ struct hns_dca_mem_query_resp { + static int query_dca_mem(struct hns_roce_context *ctx, uint32_t handle, + uint32_t index, struct hns_dca_mem_query_resp *resp) + { ++#define QUERY_DCA_MEM_ATTR_NUM 5 + int ret; + + DECLARE_COMMAND_BUFFER(cmd, HNS_IB_OBJECT_DCA_MEM, +- HNS_IB_METHOD_DCA_MEM_QUERY, 5); ++ HNS_IB_METHOD_DCA_MEM_QUERY, ++ QUERY_DCA_MEM_ATTR_NUM); + fill_attr_in_obj(cmd, HNS_IB_ATTR_DCA_MEM_QUERY_HANDLE, handle); + fill_attr_in_uint32(cmd, HNS_IB_ATTR_DCA_MEM_QUERY_PAGE_INDEX, index); + fill_attr_out(cmd, HNS_IB_ATTR_DCA_MEM_QUERY_OUT_KEY, +@@ -233,10 +241,12 @@ static int query_dca_mem(struct hns_roce_context *ctx, uint32_t handle, + void hns_roce_detach_dca_mem(struct hns_roce_context *ctx, uint32_t handle, + struct hns_roce_dca_detach_attr *attr) + { ++#define DETACH_DCA_MEM_ATTR_NUM 4 + int ret; + + DECLARE_COMMAND_BUFFER(cmd, HNS_IB_OBJECT_DCA_MEM, +- HNS_IB_METHOD_DCA_MEM_DETACH, 4); ++ HNS_IB_METHOD_DCA_MEM_DETACH, ++ DETACH_DCA_MEM_ATTR_NUM); + fill_attr_in_obj(cmd, HNS_IB_ATTR_DCA_MEM_DETACH_HANDLE, handle); + fill_attr_in_uint32(cmd, HNS_IB_ATTR_DCA_MEM_DETACH_SQ_INDEX, + attr->sq_index); +@@ -257,10 +267,12 @@ static int attach_dca_mem(struct hns_roce_context *ctx, uint32_t handle, + struct hns_roce_dca_attach_attr *attr, + struct hns_dca_mem_attach_resp *resp) + { ++#define ATTACH_DCA_MEM_ATTR_NUM 6 + int ret; + + DECLARE_COMMAND_BUFFER(cmd, HNS_IB_OBJECT_DCA_MEM, +- HNS_IB_METHOD_DCA_MEM_ATTACH, 6); ++ HNS_IB_METHOD_DCA_MEM_ATTACH, ++ ATTACH_DCA_MEM_ATTR_NUM); + fill_attr_in_obj(cmd, HNS_IB_ATTR_DCA_MEM_ATTACH_HANDLE, handle); + fill_attr_in_uint32(cmd, HNS_IB_ATTR_DCA_MEM_ATTACH_SQ_OFFSET, + attr->sq_offset); +-- +2.33.0 + diff --git a/0069-libhns-Clean-up-space-tab-blank-line-warnings.patch b/0069-libhns-Clean-up-space-tab-blank-line-warnings.patch new file mode 100644 index 0000000000000000000000000000000000000000..da55d983e186622e3a0a6fafc10c5f0e99ac369a --- /dev/null +++ b/0069-libhns-Clean-up-space-tab-blank-line-warnings.patch @@ -0,0 +1,50 @@ +From 7971afc8377f46a9b6d0c457b5a65607dba53181 Mon Sep 17 00:00:00 2001 +From: Junxian Huang +Date: Tue, 29 Jul 2025 16:44:40 +0800 +Subject: [PATCH 2/2] libhns: Clean up space/tab/blank line warnings + +Clean up following warnings: +* Please use 1 blank space(' ') instead of TAB('\t') between the right + comment and the previous code. +* Do not add blank lines at the end of a code block defined by braces. +* Return value judgment should follow the function call. +* Do not put two or more continuous blank lines inside function + +Fixes: 5c1766078f32 ("libhns: Add direct verbs support to config DCA") +Fixes: 376e3c14d77c ("libhns: Adapt UD inline data size for UCX") +Fixes: 5106d55eaf78 ("libhns: Support cqe inline") +Fixes: a624938fa6ab ("libhns: Refactor rq inline") +Signed-off-by: Junxian Huang +Signed-off-by: Donghua Huang +--- + providers/hns/hns_roce_u_hw_v2.c | 1 - + providers/hns/hns_roce_u_verbs.c | 1 - + 2 files changed, 2 deletions(-) + +diff --git a/providers/hns/hns_roce_u_hw_v2.c b/providers/hns/hns_roce_u_hw_v2.c +index b80c574..20fb850 100644 +--- a/providers/hns/hns_roce_u_hw_v2.c ++++ b/providers/hns/hns_roce_u_hw_v2.c +@@ -2608,7 +2608,6 @@ static void wr_set_sge_list_rc(struct ibv_qp_ex *ibv_qp, size_t num_sge, + return; + } + +- + hr_reg_write(wqe, RCWQE_MSG_START_SGE_IDX, + qp->sge_info.start_idx & (qp->ex_sge.sge_cnt - 1)); + +diff --git a/providers/hns/hns_roce_u_verbs.c b/providers/hns/hns_roce_u_verbs.c +index 5ec2341..44e706d 100644 +--- a/providers/hns/hns_roce_u_verbs.c ++++ b/providers/hns/hns_roce_u_verbs.c +@@ -1518,7 +1518,6 @@ static uint32_t get_max_inline_data(struct hns_roce_context *ctx, + + if (max_inline_data) { + max_inline_data = roundup_pow_of_two(max_inline_data); +- + if (is_ud && max_inline_data < HNS_ROCE_MIN_UD_INLINE) + max_inline_data = HNS_ROCE_MIN_UD_INLINE; + +-- +2.33.0 + diff --git a/rdma-core-35.0.tar.gz b/rdma-core-35.0.tar.gz deleted file mode 100644 index 226137c258a735e7bb0b8812543f08cfc423d3f6..0000000000000000000000000000000000000000 Binary files a/rdma-core-35.0.tar.gz and /dev/null differ diff --git a/rdma-core-50.0.tar.gz b/rdma-core-50.0.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..f4116112b3c33ac42f85e21ffc8d1a9c0d401f16 Binary files /dev/null and b/rdma-core-50.0.tar.gz differ diff --git a/rdma-core.spec b/rdma-core.spec index f3eb78310f440769086532569ce96e0d206197af..20c44731211a8dfbabc25ac35adcf3d1c62d15dc 100644 --- a/rdma-core.spec +++ b/rdma-core.spec @@ -1,58 +1,99 @@ Name: rdma-core -Version: 35.0 -Release: 1 +Version: 50.0 +Release: 33 Summary: RDMA core userspace libraries and daemons -License: GPLv2 or BSD +License: GPL-2.0-only OR BSD-2-Clause AND BSD-3-Clause Url: https://github.com/linux-rdma/rdma-core Source: https://github.com/linux-rdma/rdma-core/releases/download/v%{version}/%{name}-%{version}.tar.gz +Patch1: 0001-Update-kernel-headers.patch +Patch2: 0002-libhns-Support-DSCP.patch +patch3: 0003-Update-kernel-headers.patch +patch4: 0004-libhns-Introduce-hns-direct-verbs.patch +patch5: 0005-libhns-Encapsulate-context-attribute-setting-into-a-.patch +patch6: 0006-libhns-Support-congestion-control-algorithm-configur.patch +patch7: 0007-libhns-Add-support-for-thread-domain-and-parent-doma.patch +patch8: 0008-libhns-Add-support-for-lock-free-QP.patch +patch9: 0009-libhns-Add-support-for-lock-free-CQ.patch +patch10: 0010-libhns-Add-support-for-lock-free-SRQ.patch +patch11: 0011-libhns-Support-flexible-WQE-buffer-page-size.patch +patch12: 0012-Update-kernel-headers.patch +patch13: 0013-libhns-Add-reset-stop-flow-mechanism.patch +patch14: 0014-libhns-Support-reporting-wc-as-software-mode.patch +patch15: 0015-libhns-return-error-when-post-send-in-reset-state.patch +patch16: 0016-libhns-Assign-doorbell-to-zero-when-allocate-it.patch +patch17: 0017-libhns-Fix-missing-reset-notification.patch +patch18: 0018-libhns-Fix-owner-bit-when-SQ-wraps-around-in-new-IO.patch +patch19: 0019-Update-kernel-headers.patch +patch20: 0020-libhns-Introduce-DCA-for-RC-QP.patch +patch21: 0021-libhns-Add-support-for-shrinking-DCA-memory-pool.patch +patch22: 0022-libhns-Add-support-for-attaching-QP-s-WQE-buffer.patch +patch23: 0023-libhns-Use-shared-memory-to-sync-DCA-status.patch +patch24: 0024-libhns-Sync-DCA-status-by-shared-memory.patch +patch25: 0025-libhns-Add-direct-verbs-support-to-config-DCA.patch +patch26: 0026-libhns-Add-RoH-device-IDs.patch +patch27: 0027-libhns-Add-the-parsing-of-mac-type-in-RoH-mode.patch +patch28: 0028-libhns-Fix-missing-flexible-WQE-buffer-page-flag.patch +patch29: 0029-libhns-Fix-ext_sge-page-size.patch +patch30: 0030-libhns-Remove-unused-return-value.patch +patch31: 0031-libhns-Fix-several-context-locks-issue.patch +patch32: 0032-libhns-Clean-up-signed-unsigned-mix-with-relational-.patch +patch33: 0033-libhns-Fix-missing-flag-when-creating-qp-by-hnsdv_cr.patch +patch34: 0034-librdmacm-Fix-an-overflow-bug-in-qsort-comparison-function.patch +patch35: 0035-Fix-the-stride-calculation-for-MSN-PSN-area.patch +patch36: 0036-add-ZTE-Dinghai-rdma-driver.patch +patch37: 0037-libhns-Fix-out-of-order-issue-of-requester-when-sett.patch +patch38: 0038-libhns-Fix-reference-to-uninitialized-cq-pointer.patch +patch39: 0039-libhns-Fix-the-exception-branch-of-wr_start-is-not-l.patch +patch40: 0040-libhns-Fix-memory-leakage-when-DCA-is-enabled.patch +patch41: 0041-libhns-Fix-coredump-during-QP-destruction-when-send_.patch +patch42: 0042-libhns-Add-error-logs-to-help-diagnosis.patch +patch43: 0043-libhns-Fix-missing-fields-for-SRQ-WC.patch +patch44: 0044-libxscale-Add-Yunsilicon-User-Space-RDMA-Driver.patch +patch45: 0045-libhns-fix-incorrectly-using-fixed-pagesize.patch +patch46: 0046-libhns-fix-missing-new-IO-support-for-DCA.patch +patch47: 0047-libzrdma-Fix-wqe-polarity-set-error.patch +patch48: 0048-libzrdma-Add-interface-aligned-with-kernel.patch +patch49: 0049-libzrdma-Add-poll-cqe-error-to-Failed-status.patch +patch50: 0050-libzrdma-Add-sq-rq-flush-cqe-and-log-optimization.patch +patch51: 0051-libzrdma-Fix-capability-related-bugs.patch +patch52: 0052-libxscale-Match-dev-by-vid-and-did.patch +patch53: 0053-libhns-Clean-up-data-type-issues.patch +patch54: 0054-libhns-Fix-wrong-max-inline-data-value.patch +patch55: 0055-libhns-Fix-wrong-order-of-spin-unlock-in-modify-qp.patch +patch56: 0056-libhns-Add-initial-support-for-HNS-LTTng-tracing.patch +patch57: 0057-libhns-Add-tracepoint-for-HNS-RoCE-I-O.patch +patch58: 0058-libhns-Add-debug-log-for-lock-free-mode.patch +patch59: 0059-libhns-Fix-ret-not-assigned-in-create-srq.patch +patch60: 0060-libhns-Fix-pad-refcnt-leaking-in-error-flow-of-creat.patch +patch61: 0061-libhns-Fix-freeing-pad-without-checking-refcnt.patch +patch62: 0062-verbs-Assign-ibv-srq-pd-when-creating-SRQ.patch +patch63: 0063-libxscale-update-to-version-2412GA.patch +patch64: 0064-libxscale-automatically-load-xsc_ib.ko.patch +patch65: 0065-libhns-Fix-double-free-of-rinl_buf-wqe_list.patch +patch66: 0066-libhns-Add-check-for-input-param-of-hnsdv_query_devi.patch +patch67: 0067-libhns-Adapt-UD-inline-data-size-for-UCX.patch +patch68: 0068-libhns-Clean-up-DCA-magic-number-warnings.patch +patch69: 0069-libhns-Clean-up-space-tab-blank-line-warnings.patch + BuildRequires: binutils cmake >= 2.8.11 gcc libudev-devel pkgconfig pkgconfig(libnl-3.0) -BuildRequires: pkgconfig(libnl-route-3.0) valgrind-devel systemd systemd-devel +BuildRequires: pkgconfig(libnl-route-3.0) systemd systemd-devel +%ifarch %{valgrind_arches} +BuildRequires: valgrind-devel +%endif BuildRequires: python3-devel python3-Cython python3 python3-docutils perl-generators BuildRequires: ninja-build -Requires: dracut kmod systemd pciutils - -Provides: ibacm infiniband-diags-compat infiniband-diags libibverbs libibverbs-utils iwpmd libibumad librdmacm librdmacm-utils srp_daemon -Obsoletes: ibacm infiniband-diags-compat infiniband-diags libibverbs libibverbs-utils iwpmd libibumad librdmacm librdmacm-utils srp_daemon +Requires: systemd pciutils Provides: rdma = %{version}-%{release} Obsoletes: rdma < %{version}-%{release} -Provides: perl(IBswcountlimits) -Provides: libibmad = %{version}-%{release} -Obsoletes: libibmad < %{version}-%{release} -Obsoletes: openib-diags < 1.3 -Provides: libcxgb4 = %{version}-%{release} -Obsoletes: libcxgb4 < %{version}-%{release} -Provides: libefa = %{version}-%{release} -Obsoletes: libefa < %{version}-%{release} -Provides: libhfi1 = %{version}-%{release} -Obsoletes: libhfi1 < %{version}-%{release} -Provides: libi40iw = %{version}-%{release} -Obsoletes: libi40iw < %{version}-%{release} -Provides: libipathverbs = %{version}-%{release} -Obsoletes: libipathverbs < %{version}-%{release} -Provides: libmlx4 = %{version}-%{release} -Obsoletes: libmlx4 < %{version}-%{release} -Provides: libmlx5 = %{version}-%{release} -Obsoletes: libmlx5 < %{version}-%{release} -Provides: libmthca = %{version}-%{release} -Obsoletes: libmthca < %{version}-%{release} -Provides: libocrdma = %{version}-%{release} -Obsoletes: libocrdma < %{version}-%{release} -Provides: librxe = %{version}-%{release} -Obsoletes: librxe < %{version}-%{release} -Obsoletes: srptools <= 1.0.3 -Provides: srptools = %{version}-%{release} -Obsoletes: openib-srptools <= 0.0.6 - +Obsoletes: rdma-core < %{version}-%{release} Conflicts: infiniband-diags <= 1.6.7 %{?systemd_requires} %define CMAKE_FLAGS -GNinja -%define make_jobs ninja-build -v %{?_smp_mflags} -%define cmake_install DESTDIR=%{buildroot} ninja-build install %description This is the userspace components for the Linux Kernel's drivers/infiniband subsystem. @@ -65,14 +106,18 @@ Specifically this contains the userspace libraries for the following device node %package devel Summary: RDMA core development libraries and headers Requires: %{name} = %{version}-%{release} +Requires: libibverbs%{?_isa} = %{version}-%{release} Provides: libibverbs-devel = %{version}-%{release} Obsoletes: libibverbs-devel < %{version}-%{release} +Requires: libibumad%{?_isa} = %{version}-%{release} Provides: libibumad-devel = %{version}-%{release} Obsoletes: libibumad-devel < %{version}-%{release} +Requires: librdmacm%{?_isa} = %{version}-%{release} Provides: librdmacm-devel = %{version}-%{release} Obsoletes: librdmacm-devel < %{version}-%{release} Provides: ibacm-devel = %{version}-%{release} Obsoletes: ibacm-devel < %{version}-%{release} +Requires: infiniband-diags%{?_isa} = %{version}-%{release} Provides: infiniband-diags-devel = %{version}-%{release} Obsoletes: infiniband-diags-devel < %{version}-%{release} Provides: libibmad-devel = %{version}-%{release} @@ -83,6 +128,148 @@ BuildRequires: pkgconfig(libnl-3.0) pkgconfig(libnl-route-3.0) %description devel RDMA core development libraries and headers. +%package -n infiniband-diags +Summary: InfiniBand Diagnostic Tools +Requires: libibumad%{?_isa} = %{version}-%{release} +Provides: perl(IBswcountlimits) +Provides: libibmad = %{version}-%{release} +Obsoletes: libibmad < %{version}-%{release} +Obsoletes: openib-diags < 1.3 + +%description -n infiniband-diags +This package provides IB diagnostic programs and scripts needed to diagnose an +IB subnet. infiniband-diags now also provides libibmad. libibmad provides +low layer IB functions for use by the IB diagnostic and management +programs. These include MAD, SA, SMP, and other basic IB functions. + +%package -n infiniband-diags-compat +Summary: OpenFabrics Alliance InfiniBand Diagnostic Tools + +%description -n infiniband-diags-compat +Deprecated scripts and utilities which provide duplicated functionality, most +often at a reduced performance. These are maintained for the time being for +compatibility reasons. + +%package -n libibverbs +Summary: A library and drivers for direct userspace use of RDMA (InfiniBand/iWARP/RoCE) hardware +Provides: libcxgb4 = %{version}-%{release} +Obsoletes: libcxgb4 < %{version}-%{release} +Provides: libefa = %{version}-%{release} +Obsoletes: libefa < %{version}-%{release} +Provides: libhfi1 = %{version}-%{release} +Obsoletes: libhfi1 < %{version}-%{release} +Provides: libhns = %{version}-%{release} +Obsoletes: libhns < %{version}-%{release} +Provides: libipathverbs = %{version}-%{release} +Obsoletes: libipathverbs < %{version}-%{release} +Provides: libirdma = %{version}-%{release} +Obsoletes: libirdma < %{version}-%{release} +Provides: libmana = %{version}-%{release} +Obsoletes: libmana < %{version}-%{release} +Provides: libmlx4 = %{version}-%{release} +Obsoletes: libmlx4 < %{version}-%{release} +Provides: libmlx5 = %{version}-%{release} +Obsoletes: libmlx5 < %{version}-%{release} +Provides: libmthca = %{version}-%{release} +Obsoletes: libmthca < %{version}-%{release} +Provides: libocrdma = %{version}-%{release} +Obsoletes: libocrdma < %{version}-%{release} +Provides: librxe = %{version}-%{release} +Obsoletes: librxe < %{version}-%{release} +Provides: libxscale = %{version}-%{release} +Obsoletes: libxscale < %{version}-%{release} +Provides: libzrdma = %{version}-%{release} +Obsoletes: libzrdma < %{version}-%{release} + +%description -n libibverbs +libibverbs is a library that allows userspace processes to use RDMA +"verbs" as described in the InfiniBand Architecture Specification and +the RDMA Protocol Verbs Specification. This includes direct hardware +access from userspace to InfiniBand/iWARP adapters (kernel bypass) for +fast path operations. + +Device-specific plug-in ibverbs userspace drivers are included: + +- libcxgb4: Chelsio T4 iWARP HCA +- libefa: Amazon Elastic Fabric Adapter +- libhfi1: Intel Omni-Path HFI +- libhns: HiSilicon Hip08/09/10 SoC +- libipathverbs: QLogic InfiniPath HCA +- libirdma: Intel Ethernet Connection RDMA +- libmana: Microsoft Azure Network Adapter +- libmlx4: Mellanox ConnectX-3 InfiniBand HCA +- libmlx5: Mellanox Connect-IB/X-4+ InfiniBand HCA +- libmthca: Mellanox InfiniBand HCA +- libocrdma: Emulex OneConnect RDMA/RoCE Device +- libqedr: QLogic QL4xxx RoCE HCA +- librxe: A software implementation of the RoCE protocol +- libsiw: A software implementation of the iWarp protocol +- libvmw_pvrdma: VMware paravirtual RDMA device +- libxscale: Yunsilicon RDMA device +- libzrdma: ZTE Connection RDMA + +%package -n libibverbs-utils +Summary: Examples for the libibverbs library +Requires: libibverbs%{?_isa} = %{version}-%{release} + +%description -n libibverbs-utils +Useful libibverbs example programs such as ibv_devinfo, which +displays information about RDMA devices. + +%package -n ibacm +Summary: InfiniBand Communication Manager Assistant +%{?systemd_requires} + +%description -n ibacm +The ibacm daemon helps reduce the load of managing path record lookups on +large InfiniBand fabrics by providing a user space implementation of what +is functionally similar to an ARP cache. The use of ibacm, when properly +configured, can reduce the SA packet load of a large IB cluster from O(n^2) +to O(n). The ibacm daemon is started and normally runs in the background, +user applications need not know about this daemon as long as their app +uses librdmacm to handle connection bring up/tear down. The librdmacm +library knows how to talk directly to the ibacm daemon to retrieve data. + +%package -n iwpmd +Summary: iWarp Port Mapper userspace daemon +%{?systemd_requires} + +%description -n iwpmd +iwpmd provides a userspace service for iWarp drivers to claim +tcp ports through the standard socket interface. + +%package -n libibumad +Summary: OpenFabrics Alliance InfiniBand umad (userspace management datagram) library + +%description -n libibumad +libibumad provides the userspace management datagram (umad) library +functions, which sit on top of the umad modules in the kernel. These +are used by the IB diagnostic and management tools, including OpenSM. + +%package -n librdmacm +Summary: Userspace RDMA Connection Manager + +%description -n librdmacm +librdmacm provides a userspace RDMA Communication Management API. + +%package -n librdmacm-utils +Summary: Examples for the librdmacm library +Requires: librdmacm%{?_isa} = %{version}-%{release} + +%description -n librdmacm-utils +Example test programs for the librdmacm library. + +%package -n srp_daemon +Summary: Tools for using the InfiniBand SRP protocol devices +Obsoletes: srptools <= 1.0.3 +Provides: srptools = %{version}-%{release} +Obsoletes: openib-srptools <= 0.0.6 +%{?systemd_requires} + +%description -n srp_daemon +In conjunction with the kernel ib_srp driver, srp_daemon allows you to +discover and use SCSI devices via the SCSI RDMA Protocol over InfiniBand. + %package -n python3-pyverbs Summary: Python3 API over IB verbs %{?python_provide:%python_provide python3-pyverbs} @@ -102,8 +289,7 @@ Obsoletes: infiniband-diags-help < %{version}-%{release} Man pages and other related documents for %{name}. %prep -%setup -%autosetup -v -p1 +%autosetup -p1 %build %if 0%{?_rundir:1} @@ -137,7 +323,7 @@ Man pages and other related documents for %{name}. -DPYTHON_EXECUTABLE:PATH=%{__python3} \ -DCMAKE_INSTALL_PYTHON_ARCH_LIB:PATH=%{python3_sitearch} \ -DNO_PYVERBS=0 -%make_jobs +%cmake_build %install %cmake_install @@ -158,83 +344,304 @@ install -D -m 0755 redhat/rdma.mlx4-setup.sh %{buildroot}%{_libexecdir}/mlx4-set rm -f %{buildroot}%{_sysconfdir}/rdma/modules/rdma.conf install -D -m0644 redhat/rdma.conf %{buildroot}%{_sysconfdir}/rdma/modules/rdma.conf -bin/ib_acme -D . -O +%{__cmake_builddir}/bin/ib_acme -D . -O install -D -m 0644 ibacm_opts.cfg %{buildroot}%{_sysconfdir}/rdma/ rm -rf %{buildroot}/%{_initrddir}/ rm -f %{buildroot}/%{_sbindir}/srp_daemon.sh -%ldconfig_scriptlets - %post +if [ -x /sbin/udevadm ];then /sbin/udevadm trigger --subsystem-match=infiniband --action=change || true /sbin/udevadm trigger --subsystem-match=net --action=change || true /sbin/udevadm trigger --subsystem-match=infiniband_mad --action=change || true -%systemd_post ibacm.service -%systemd_post srp_daemon.service -%systemd_post iwpmd.service +fi -%preun +%post -n ibacm +%systemd_post ibacm.service +%preun -n ibacm %systemd_preun ibacm.service -%systemd_preun srp_daemon.service -%systemd_preun iwpmd.service - -%postun +%postun -n ibacm %systemd_postun_with_restart ibacm.service + +%post -n srp_daemon +%systemd_post srp_daemon.service +%preun -n srp_daemon +%systemd_preun srp_daemon.service +%postun -n srp_daemon %systemd_postun_with_restart srp_daemon.service + +%post -n iwpmd +%systemd_post iwpmd.service +%preun -n iwpmd +%systemd_preun iwpmd.service +%postun -n iwpmd %systemd_postun_with_restart iwpmd.service %files -%defattr(-,root,root) -%license COPYING.* -%config(noreplace) %{_sysconfdir}/rdma/*.conf -%config(noreplace) %{_sysconfdir}/rdma/modules/*.conf -%config(noreplace) %{_sysconfdir}/udev/rules.d/* -%config(noreplace) %{_sysconfdir}/modprobe.d/*.conf -%config(noreplace) %{_sysconfdir}/infiniband-diags/* -%config(noreplace) %{_sysconfdir}/libibverbs.d/*.driver -%config(noreplace) %{_sysconfdir}/rdma/ibacm_opts.cfg -%config(noreplace) %{_sysconfdir}/iwpmd.conf -%config(noreplace) %{_sysconfdir}/srp_daemon.conf +%dir %{_sysconfdir}/rdma +%config(noreplace) %{_sysconfdir}/rdma/mlx4.conf +%config(noreplace) %{_sysconfdir}/rdma/modules/infiniband.conf +%config(noreplace) %{_sysconfdir}/rdma/modules/iwarp.conf +%config(noreplace) %{_sysconfdir}/rdma/modules/opa.conf +%config(noreplace) %{_sysconfdir}/rdma/modules/rdma.conf +%config(noreplace) %{_sysconfdir}/rdma/modules/roce.conf +%dir %{_sysconfdir}/modprobe.d +%config(noreplace) %{_sysconfdir}/modprobe.d/mlx4.conf +%config(noreplace) %{_sysconfdir}/modprobe.d/truescale.conf +%{_unitdir}/rdma-hw.target +%{_unitdir}/rdma-load-modules@.service +%dir %{dracutlibdir} +%dir %{dracutlibdir}/modules.d +%dir %{dracutlibdir}/modules.d/05rdma %{dracutlibdir}/modules.d/05rdma/module-setup.sh +%dir %{_udevrulesdir} %{_udevrulesdir}/../rdma_rename -%{_udevrulesdir}/*.rules +%{_udevrulesdir}/60-rdma-ndd.rules +%{_udevrulesdir}/60-rdma-persistent-naming.rules +%{_udevrulesdir}/75-rdma-description.rules +%{_udevrulesdir}/90-rdma-hw-modules.rules +%{_udevrulesdir}/90-rdma-ulp-modules.rules +%{_udevrulesdir}/90-rdma-umad.rules +%dir %{sysmodprobedir} %{sysmodprobedir}/libmlx4.conf -%{perl_vendorlib}/IBswcountlimits.pm %{_libexecdir}/mlx4-setup.sh %{_libexecdir}/truescale-serdes.cmds -%{_libexecdir}/srp_daemon/start_on_all_ports -%{_sbindir}/* -%{_bindir}/* -%{_unitdir}/* +%{_sbindir}/rdma-ndd +%{_unitdir}/rdma-ndd.service +%{_mandir}/man7/rxe* +%{_mandir}/man8/rdma-ndd.* +%license COPYING.* + +%files devel +%dir %{_includedir}/infiniband +%dir %{_includedir}/rdma +%{_includedir}/infiniband/* +%{_includedir}/rdma/* +%{_libdir}/lib*.a +%{_libdir}/lib*.so +%{_libdir}/pkgconfig/*.pc +%{_mandir}/man3/efadv* +%{_mandir}/man3/ibv_* +%{_mandir}/man3/rdma* +%{_mandir}/man3/umad* +%{_mandir}/man3/*_to_ibv_rate.* +%{_mandir}/man7/rdma_cm.* +%{_mandir}/man3/manadv* +%{_mandir}/man3/mlx5dv* +%{_mandir}/man3/mlx4dv* +%{_mandir}/man7/efadv* +%{_mandir}/man7/manadv* +%{_mandir}/man7/mlx5dv* +%{_mandir}/man7/mlx4dv* +%{_mandir}/man3/ibnd_* + +%files -n infiniband-diags-compat +%{_sbindir}/ibcheckerrs +%{_mandir}/man8/ibcheckerrs* +%{_sbindir}/ibchecknet +%{_mandir}/man8/ibchecknet* +%{_sbindir}/ibchecknode +%{_mandir}/man8/ibchecknode* +%{_sbindir}/ibcheckport +%{_mandir}/man8/ibcheckport.* +%{_sbindir}/ibcheckportwidth +%{_mandir}/man8/ibcheckportwidth* +%{_sbindir}/ibcheckportstate +%{_mandir}/man8/ibcheckportstate* +%{_sbindir}/ibcheckwidth +%{_mandir}/man8/ibcheckwidth* +%{_sbindir}/ibcheckstate +%{_mandir}/man8/ibcheckstate* +%{_sbindir}/ibcheckerrors +%{_mandir}/man8/ibcheckerrors* +%{_sbindir}/ibdatacounts +%{_mandir}/man8/ibdatacounts* +%{_sbindir}/ibdatacounters +%{_mandir}/man8/ibdatacounters* +%{_sbindir}/ibdiscover.pl +%{_mandir}/man8/ibdiscover* +%{_sbindir}/ibswportwatch.pl +%{_mandir}/man8/ibswportwatch* +%{_sbindir}/ibqueryerrors.pl +%{_sbindir}/iblinkinfo.pl +%{_sbindir}/ibprintca.pl +%{_mandir}/man8/ibprintca* +%{_sbindir}/ibprintswitch.pl +%{_mandir}/man8/ibprintswitch* +%{_sbindir}/ibprintrt.pl +%{_mandir}/man8/ibprintrt* +%{_sbindir}/set_nodedesc.sh + +%files -n infiniband-diags +%{_sbindir}/ibaddr +%{_mandir}/man8/ibaddr* +%{_sbindir}/ibnetdiscover +%{_mandir}/man8/ibnetdiscover* +%{_sbindir}/ibping +%{_mandir}/man8/ibping* +%{_sbindir}/ibportstate +%{_mandir}/man8/ibportstate* +%{_sbindir}/ibroute +%{_mandir}/man8/ibroute.* +%{_sbindir}/ibstat +%{_mandir}/man8/ibstat.* +%{_sbindir}/ibsysstat +%{_mandir}/man8/ibsysstat* +%{_sbindir}/ibtracert +%{_mandir}/man8/ibtracert* +%{_sbindir}/perfquery +%{_mandir}/man8/perfquery* +%{_sbindir}/sminfo +%{_mandir}/man8/sminfo* +%{_sbindir}/smpdump +%{_mandir}/man8/smpdump* +%{_sbindir}/smpquery +%{_mandir}/man8/smpquery* +%{_sbindir}/saquery +%{_mandir}/man8/saquery* +%{_sbindir}/vendstat +%{_mandir}/man8/vendstat* +%{_sbindir}/iblinkinfo +%{_mandir}/man8/iblinkinfo* +%{_sbindir}/ibqueryerrors +%{_mandir}/man8/ibqueryerrors* +%{_sbindir}/ibcacheedit +%{_mandir}/man8/ibcacheedit* +%{_sbindir}/ibccquery +%{_mandir}/man8/ibccquery* +%{_sbindir}/ibccconfig +%{_mandir}/man8/ibccconfig* +%{_sbindir}/dump_fts +%{_mandir}/man8/dump_fts* +%{_sbindir}/ibhosts +%{_mandir}/man8/ibhosts* +%{_sbindir}/ibswitches +%{_mandir}/man8/ibswitches* +%{_sbindir}/ibnodes +%{_mandir}/man8/ibnodes* +%{_sbindir}/ibrouters +%{_mandir}/man8/ibrouters* +%{_sbindir}/ibfindnodesusing.pl +%{_mandir}/man8/ibfindnodesusing* +%{_sbindir}/ibidsverify.pl +%{_mandir}/man8/ibidsverify* +%{_sbindir}/check_lft_balance.pl +%{_mandir}/man8/check_lft_balance* +%{_sbindir}/dump_lfts.sh +%{_mandir}/man8/dump_lfts* +%{_sbindir}/dump_mfts.sh +%{_mandir}/man8/dump_mfts* +%{_sbindir}/ibclearerrors +%{_mandir}/man8/ibclearerrors* +%{_sbindir}/ibclearcounters +%{_mandir}/man8/ibclearcounters* +%{_sbindir}/ibstatus +%{_mandir}/man8/ibstatus* +%{_mandir}/man8/infiniband-diags* %{_libdir}/libibmad*.so.* %{_libdir}/libibnetdisc*.so.* +%{perl_vendorlib}/IBswcountlimits.pm +%config(noreplace) %{_sysconfdir}/infiniband-diags/error_thresholds +%config(noreplace) %{_sysconfdir}/infiniband-diags/ibdiag.conf + +%files -n libibverbs +%dir %{_sysconfdir}/libibverbs.d +%dir %{_libdir}/libibverbs %{_libdir}/libefa.so.* +%{_libdir}/libhns.so.* %{_libdir}/libibverbs*.so.* %{_libdir}/libibverbs/*.so +%{_libdir}/libmana.so.* %{_libdir}/libmlx5.so.* %{_libdir}/libmlx4.so.* +%{_libdir}/libxscale.so.* +%{_libdir}/libzrdma.so.* +%config(noreplace) %{_sysconfdir}/libibverbs.d/*.driver + +%files -n libibverbs-utils +%{_bindir}/ibv_* +%{_mandir}/man1/ibv_* + +%files -n ibacm +%config(noreplace) %{_sysconfdir}/rdma/ibacm_opts.cfg +%{_bindir}/ib_acme +%{_sbindir}/ibacm +%{_mandir}/man1/ib_acme.* +%{_mandir}/man7/ibacm.* +%{_mandir}/man7/ibacm_prov.* +%{_mandir}/man8/ibacm.* +%{_unitdir}/ibacm.service +%{_unitdir}/ibacm.socket +%dir %{_libdir}/ibacm %{_libdir}/ibacm/* + +%files -n iwpmd +%{_sbindir}/iwpmd +%{_unitdir}/iwpmd.service +%config(noreplace) %{_sysconfdir}/rdma/modules/iwpmd.conf +%config(noreplace) %{_sysconfdir}/iwpmd.conf +%{_udevrulesdir}/90-iwpmd.rules +%{_mandir}/man8/iwpmd.* +%{_mandir}/man5/iwpmd.* + +%files -n libibumad %{_libdir}/libibumad*.so.* + +%files -n librdmacm %{_libdir}/librdmacm*.so.* +%dir %{_libdir}/rsocket %{_libdir}/rsocket/*.so* +%{_mandir}/man7/rsocket.* +%files -n librdmacm-utils +%{_bindir}/cmtime +%{_bindir}/mckey +%{_bindir}/rcopy +%{_bindir}/rdma_client +%{_bindir}/rdma_server +%{_bindir}/rdma_xclient +%{_bindir}/rdma_xserver +%{_bindir}/riostream +%{_bindir}/rping +%{_bindir}/rstream +%{_bindir}/ucmatose +%{_bindir}/udaddy +%{_bindir}/udpong +%{_mandir}/man1/cmtime.* +%{_mandir}/man1/mckey.* +%{_mandir}/man1/rcopy.* +%{_mandir}/man1/rdma_client.* +%{_mandir}/man1/rdma_server.* +%{_mandir}/man1/rdma_xclient.* +%{_mandir}/man1/rdma_xserver.* +%{_mandir}/man1/riostream.* +%{_mandir}/man1/rping.* +%{_mandir}/man1/rstream.* +%{_mandir}/man1/ucmatose.* +%{_mandir}/man1/udaddy.* +%{_mandir}/man1/udpong.* -%files devel -%defattr(-,root,root) -%{_includedir}/infiniband/* -%{_includedir}/rdma/* -%{_libdir}/lib*.a -%{_libdir}/lib*.so -%{_libdir}/pkgconfig/*.pc +%files -n srp_daemon +%config(noreplace) %{_sysconfdir}/srp_daemon.conf +%config(noreplace) %{_sysconfdir}/rdma/modules/srp_daemon.conf +%{_libexecdir}/srp_daemon/start_on_all_ports +%{_unitdir}/srp_daemon.service +%{_unitdir}/srp_daemon_port@.service +%{_sbindir}/ibsrpdm +%{_sbindir}/srp_daemon +%{_sbindir}/run_srp_daemon +%{_udevrulesdir}/60-srp_daemon.rules +%{_mandir}/man5/srp_daemon.service.5* +%{_mandir}/man5/srp_daemon_port@.service.5* +%{_mandir}/man8/ibsrpdm.8* +%{_mandir}/man8/srp_daemon.8* %files -n python3-pyverbs -%defattr(-,root,root) %{python3_sitearch}/pyverbs %{_docdir}/%{name}-%{version}/tests/*.py %files help -%defattr(-,root,root) %doc %{_docdir}/%{name}-%{version}/rxe.md %doc %{_docdir}/%{name}-%{version}/udev.md %doc %{_docdir}/%{name}-%{version}/ibacm.md @@ -244,9 +651,400 @@ rm -f %{buildroot}/%{_sbindir}/srp_daemon.sh %doc %{_docdir}/%{name}-%{version}/librdmacm.md %doc %{_docdir}/%{name}-%{version}/libibverbs.md %doc %{_docdir}/%{name}-%{version}/tag_matching.md -%{_mandir}/* +%doc %{_docdir}/%{name}-%{version}/70-persistent-ipoib.rules %changelog +* Wed Aug 27 2025 Donghua Huang - 50.0-33 +- Type: bugfix +- ID: NA +- SUG: NA +- DESC: libhns: Magic number and Whitespace warning cleanup + +* Thu Jun 12 2025 Donghua Huang - 50.0-32 +- Type: bugfix +- ID: NA +- SUG: NA +- DESC: libhns: Increase input parameter checks and adjust inline data size. + +* Tue May 27 2025 Junxian Huang - 50.0-31 +- Type: bugfix +- ID: NA +- SUG: NA +- DESC: libhns: Fix double-free of rinl_buf->wqe_list + +* Fri May 16 2025 Xin Tian - 50.0-30 +- Type: feature +- ID: NA +- SUG: NA +- DESC: [libxscale] automatically load xsc_ib.ko + +* Thu May 8 2025 Xin Tian - 50.0-29 +- Type: feature +- ID: NA +- SUG: NA +- DESC: [libxscale] update to version 2412GA + +* Fri Apr 25 2025 Xinghai Cen - 50.0-28 +- Type: bugfix +- ID: NA +- SUG: NA +- DESC: Bugfixes and one debug improvement + +* Wed Apr 23 2025 Xinghai Cen - 50.0-27 +- Type: feature +- ID: NA +- SUG: NA +- DESC: libhns: Add support for LTTng tracing + +* Thu Apr 17 2025 Xinghai Cen - 50.0-26 +- Type: bugfix +- ID: NA +- SUG: NA +- DESC: libhns: Cleanup and Bugfixes + +* Thu Mar 20 2025 Xin Tian - 50.0-25 +- Type: bugfix +- ID: NA +- SUG: NA +- DESC: [libxscale] Match dev by vid and did + +* Sat Mar 29 2025 Li Fuyan - 50.0-24 +- Type: requirement +- ID: NA +- SUG: NA +- DESC: fix some libzrdma bugs and add some optimization + +* Tue Mar 11 2025 Xinghai Cen - 50.0-23 +- Type: bugfix +- ID: NA +- SUG: NA +- DESC: Fix some bugs for libhns + +* Wed Feb 26 2025 Xin Tian - 50.0-22 +- Type: requirement +- ID: NA +- SUG: NA +- DESC: Add Yunsilicon user space RDMA driver + +* Fri Jan 17 2025 Xinghai Cen - 50.0-21 +- Type: bugfix +- ID: NA +- SUG: NA +- DESC: Fix missing fields for SRQ WC + +* Wed Jan 08 2025 Funda Wang - 50.0-20 +- Type: bugfix +- ID: NA +- SUG: NA +- DESC: finally fix cmake out-of-source build + +* Mon Jan 06 2025 Funda Wang - 50.0-19 +- Type: bugfix +- ID: NA +- SUG: NA +- DESC: drop unused macro definition which will confuse cmake + +* Fri Jan 3 2025 Xinghai Cen - 50.0-18 +- Type: requirement +- ID: NA +- SUG: NA +- DESC: Add error logs to help diagnosis + +* Thu Nov 28 2024 Xinghai Cen - 50.0-17 +- Type: bugfix +- ID: NA +- SUG: NA +- DESC: Fix coredump during QP destruction when send_cq == recv_cq + +* Mon Nov 25 2024 Xinghai Cen - 50.0-16 +- Type: bugfix +- ID: NA +- SUG: NA +- DESC: Fixes several bugs for libhns + +* Fri Nov 15 2024 Xinghai Cen - 50.0-15 +- Type: bugfix +- ID: NA +- SUG: NA +- DESC: Two bugfixes in post_send flow + +* Fri Nov 08 2024 Funda Wang - 50.0-14 +- adopt to new cmake macro +- migrated to SPDX license +- drop useless setup macro, cause it duplicates with autosetup +- drop useless ldconfig_scriptlets, it has been done through glibc's + filetriggers since openeuler 1.0 + +* Sat Aug 31 2024 Li Fuyan - 50.0-13 +- Type: requirement +- ID: NA +- SUG: NA +- DESC: Add support for ZTE Dinghai RDMA driver + +* Wed Jul 17 2024 dfh - 50.0-12 +- Type: bugfix +- ID: NA +- SUG: NA +- DESC: Fix the stride calculation for MSN/PSN area + +* Wed May 29 2024 zhangyaqi - 50.0-11 +- Type: bugfix +- ID: NA +- SUG: NA +- DESC: Fix an overflow bug in qsort comparison function + +* Sun May 12 2024 yinsist - 50.0-10 +- Type: requirement +- ID: NA +- SUG: NA +- DESC: Valgrind does not support certain architectures like RISC-V, Before depending on Valgrind, first check if Valgrind supports the architecture + +* Sat May 11 2024 Juan Zhou - 50.0-9 +- Type: bugfix +- ID: NA +- SUG: NA +- DESC: Some bugfixes and cleanups + +* Mon May 6 2024 Juan Zhou - 50.0-8 +- Type: bugfix +- ID: NA +- SUG: NA +- DESC: Fix flexible WQE buffer page related issues + +* Fri Apr 12 2024 Ke Chen - 50.0-7 +- Type: requirement +- ID: NA +- SUG: NA +- DESC: Add support for ROH + +* Thu Apr 11 2024 Ran Zhou - 50.0-6 +- Type: requirement +- ID: NA +- SUG: NA +- DESC: Add support for DCA + +* Tue Mar 26 2024 Ran Zhou - 50.0-5 +- Type: requirement +- ID: NA +- SUG: NA +- DESC: Support software wc and fix commit info of previous patches + +* Thu Mar 21 2024 Ran Zhou - 50.0-4 +- Type: requirement +- ID: NA +- SUG: NA +- DESC: Support td lock-free + +* Tue Mar 12 2024 Ran Zhou - 50.0-3 +- Type: requirement +- ID: NA +- SUG: NA +- DESC: Backport congestion control from mainline + +* Thu Feb 22 2024 Ran Zhou - 50.0-2 +- Type: requirement +- ID: NA +- SUG: NA +- DESC: Support DSCP + +* Tue Feb 6 2024 Ran Zhou - 50.0-1 +- Type: requirement +- ID: NA +- SUG: NA +- DESC: Update to 50.0 + +* Thu Jan 25 2024 Ran Zhou - 41.0-27 +- Type: requirement +- ID: NA +- SUG: NA +- DESC: Add neccessary dependencies for rdma-core-devel + +* Tue Dec 19 2023 Juan Zhou - 41.0-26 +- Type: requirement +- ID: NA +- SUG: NA +- DESC: Separate some packages from rdma-core + +* Tue Dec 12 2023 Ran Zhou - 41.0-25 +- Type: bugfix +- ID: NA +- SUG: NA +- DESC: Fix congest type flags error and replace a corrupt patch + +* Fri Dec 8 2023 Ran Zhou - 41.0-24 +- Type: bugfix +- ID: NA +- SUG: NA +- DESC: Fix missing DB when compiler does not support SVE + +* Thu Dec 7 2023 Ran Zhou - 41.0-23 +- Type: bugfix +- ID: NA +- SUG: NA +- DESC: Bugfix for lock and owner bit + +* Fri Dec 1 2023 Ran Zhou - 41.0-22 +- Type: bugfix +- ID: NA +- SUG: NA +- DESC: Bugfix for wrong timing of modifying ibv_qp state to err + +* Mon Nov 27 2023 Ran Zhou - 41.0-21 +- Type: bugfix +- ID: NA +- SUG: NA +- DESC: Corrects several minor issues found in review + +* Wed Nov 22 2023 Ran Zhou - 41.0-20 +- Type: bugfix +- ID: NA +- SUG: NA +- DESC: Get dmac from kernel driver + +* Tue Oct 31 2023 Ran Zhou - 41.0-19 +- Type: requirement +- ID: NA +- SUG: NA +- DESC: Support STARS over RDMA + +* Thu Oct 26 2023 Juan Zhou - 41.0-18 +- Type: requirement +- ID: NA +- SUG: NA +- DESC: Skip resolving MAC for RDMA over UBLink + +* Wed Oct 25 2023 Ran Zhou - 41.0-17 +- Type: requirement +- ID: NA +- SUG: NA +- DESC: Support SRQ record doorbell + +* Tue Oct 24 2023 Ran Zhou - 41.0-16 +- Type: requirement +- ID: NA +- SUG: NA +- DESC: Support flexible WQE buffer page size + +* Tue Sep 26 2023 Juan Zhou - 41.0-15 +- Type: requirement +- ID: NA +- SUG: NA +- DESC: Support reporting wc as software mode + +* Tue Jul 25 2023 Juan Zhou - 41.0-14 +- Type: bugfix +- ID: NA +- SUG: NA +- DESC: Two patchs are uploaded from rdma-core mainline + +* Fri Jun 9 2023 Juan Zhou - 41.0-13 +- Type: bugfix +- ID: NA +- SUG: NA +- DESC: Support user to choose using UD sl or pktype to adapt MPI APP + +* Fri Jun 2 2023 Juan Zhou - 41.0-12 +- Type: bugfix +- ID: NA +- SUG: NA +- DESC: Backport bugfix for hns + +* Thu May 11 2023 Juan Zhou - 41.0-11 +- Type: bugfix +- ID: NA +- SUG: NA +- DESC: Fix the sge number related errors and remove local invalidate operation + +* Mon Apr 17 2023 Juan Zhou - 41.0-10 +- Type: bugfix +- ID: NA +- SUG: NA +- DESC: Add support for SVE Direct WQE for libhns + +* Thu Apr 13 2023 Juan Zhou - 41.0-9 +- Type: requirement +- ID: NA +- SUG: NA +- DESC: Support congestion control algorithm configuration + +* Wed Dec 14 2022 Yixing Liu - 41.0-6 +- Type: requirement +- ID: NA +- SUG: NA +- DESC: Support libhns reset stop ring db mechanism + +* Wed Nov 30 2022 tangchengchang - 41.0-7 +- Type: requirement +- ID: NA +- SUG: NA +- DESC: Add support for hns DCA + +* Mon Nov 28 2022 Yixing Liu - 41.0-6 +- Type: requirement +- ID: NA +- SUG: NA +- DESC: Support libhns td unlock + +* Mon Nov 07 2022 Guofeng Yue - 41.0-5 +- Type: requirement +- ID: NA +- SUG: NA +- DESC: Support hns RoH mode + +* Sun Nov 06 2022 tangchengchang - 41.0-4 +- Type: bugfix +- ID: NA +- SUG: NA +- DESC: Backport bugfix from rdma-core 41.1 + +* Sat Oct 29 2022 tangchengchang - 41.0-3 +- Type: requirement +- ID: NA +- SUG: NA +- DESC: Support rq inline and cqe inline + +* Sat Oct 08 2022 luoyouming - 41.0-2 +- Type: requirement +- ID: NA +- SUG: NA +- DESC: Support rq inline and cqe inline + +* Mon Jul 25 2022 tangchengchang - 41.0-1 +- Type: requirement +- ID: NA +- SUG: NA +- DESC: update to 41.0 + +* Mon Jul 11 2022 luozhengfeng - 35.1-3 +- Type: bugfix +- ID: NA +- SUG: NA +- DESC: bugfix and refactor for hns SRQ and SGE + +* Mon Jan 10 2022 tangchengchang - 35.1-2 +- Type: requirement +- ID: NA +- SUG: NA +- DESC: Add support for hns DWQE + +* Thu Dec 09 2021 gaihuiying - 35.1-1 +- Type: requirement +- ID: NA +- SUG: NA +- DESC: update to 35.1 + +* Sun Sep 26 2021 seuzw <930zhaowei@163.com> - 35.0-3 +- Type: bugfix +- ID: NA +- SUG: NA +- DESC: Use -e para to make transferred meaning take effect + +* Thu Sep 23 2021 zhongxuan - 35.0-2 +- Type: bugfix +- ID: NA +- SUG: NA +- DESC: increase maximum number of cpus + * Fri Jul 2 2021 liyangyang - 35.0-1 - Type: bugfix - ID: NA