diff --git a/0093-libxscale-Add-Yunsilicon-User-Space-RDMA-Driver.patch b/0093-libxscale-Add-Yunsilicon-User-Space-RDMA-Driver.patch new file mode 100644 index 0000000000000000000000000000000000000000..21af89ec85792e180c57a9ed44cf1649ceef25bf --- /dev/null +++ b/0093-libxscale-Add-Yunsilicon-User-Space-RDMA-Driver.patch @@ -0,0 +1,10309 @@ +From 8946b73c2874cf781edefec6d8a5eeffe40f281c Mon Sep 17 00:00:00 2001 +From: Xin Tian +Date: Tue, 4 Mar 2025 12:27:54 +0800 +Subject: [PATCH] libxscale: Add Yunsilicon User Space RDMA Driver + +Introduce xscale provider for Yunsilicon devices. + +Signed-off-by: Xin Tian +--- + CMakeLists.txt | 1 + + MAINTAINERS | 6 + + README.md | 1 + + debian/control | 1 + + debian/copyright | 4 + + debian/ibverbs-providers.install | 1 + + kernel-headers/CMakeLists.txt | 4 + + kernel-headers/rdma/ib_user_ioctl_verbs.h | 1 + + kernel-headers/rdma/xsc-abi.h | 333 +++ + kernel-headers/rdma/xsc_user_ioctl_cmds.h | 163 ++ + kernel-headers/rdma/xsc_user_ioctl_verbs.h | 27 + + libibverbs/verbs.h | 1 + + providers/xscale/CMakeLists.txt | 18 + + providers/xscale/bitmap.h | 84 + + providers/xscale/buf.c | 594 +++++ + providers/xscale/cq.c | 1410 ++++++++++ + providers/xscale/cqm_csr_defines.h | 180 ++ + providers/xscale/dbrec.c | 131 + + providers/xscale/libxsc.map | 59 + + providers/xscale/qp.c | 678 +++++ + providers/xscale/rqm_csr_defines.h | 200 ++ + providers/xscale/sqm_csr_defines.h | 204 ++ + providers/xscale/verbs.c | 2816 ++++++++++++++++++++ + providers/xscale/wqe.h | 72 + + providers/xscale/xsc-abi.h | 56 + + providers/xscale/xsc_api.h | 29 + + providers/xscale/xsc_hsi.h | 252 ++ + providers/xscale/xscale.c | 948 +++++++ + providers/xscale/xscale.h | 834 ++++++ + providers/xscale/xscdv.h | 876 ++++++ + redhat/rdma-core.spec | 4 + + 31 files changed, 9988 insertions(+) + create mode 100644 kernel-headers/rdma/xsc-abi.h + create mode 100644 kernel-headers/rdma/xsc_user_ioctl_cmds.h + create mode 100644 kernel-headers/rdma/xsc_user_ioctl_verbs.h + create mode 100644 providers/xscale/CMakeLists.txt + create mode 100644 providers/xscale/bitmap.h + create mode 100644 providers/xscale/buf.c + create mode 100644 providers/xscale/cq.c + create mode 100644 providers/xscale/cqm_csr_defines.h + create mode 100644 providers/xscale/dbrec.c + create mode 100644 providers/xscale/libxsc.map + create mode 100644 providers/xscale/qp.c + create mode 100644 providers/xscale/rqm_csr_defines.h + create mode 100644 providers/xscale/sqm_csr_defines.h + create mode 100644 providers/xscale/verbs.c + create mode 100644 providers/xscale/wqe.h + create mode 100644 providers/xscale/xsc-abi.h + create mode 100644 providers/xscale/xsc_api.h + create mode 100644 providers/xscale/xsc_hsi.h + create mode 100644 providers/xscale/xscale.c + create mode 100644 providers/xscale/xscale.h + create mode 100644 providers/xscale/xscdv.h + +diff --git a/CMakeLists.txt b/CMakeLists.txt +index bc4437b..747842e 100644 +--- a/CMakeLists.txt ++++ b/CMakeLists.txt +@@ -704,6 +704,7 @@ add_subdirectory(providers/mthca) + add_subdirectory(providers/ocrdma) + add_subdirectory(providers/qedr) + add_subdirectory(providers/vmw_pvrdma) ++add_subdirectory(providers/xscale) + endif() + + add_subdirectory(providers/hfi1verbs) +diff --git a/MAINTAINERS b/MAINTAINERS +index 41ad47d..a5ada0d 100644 +--- a/MAINTAINERS ++++ b/MAINTAINERS +@@ -175,6 +175,12 @@ L: pv-drivers@vmware.com + S: Supported + F: providers/vmw_pvrdma/ + +++XSCALE USERSPACE PROVIDER (for xsc_ib.ko) ++M: Honggang Wei ++M: Xin Tian ++S: Supported ++F: providers/xscale/ ++ + PYVERBS + M: Edward Srouji + S: Supported +diff --git a/README.md b/README.md +index 18c3b01..4a5a4b9 100644 +--- a/README.md ++++ b/README.md +@@ -28,6 +28,7 @@ is included: + - rdma_rxe.ko + - siw.ko + - vmw_pvrdma.ko ++ - xsc_ib.ko + + Additional service daemons are provided for: + - srp_daemon (ib_srp.ko) +diff --git a/debian/control b/debian/control +index 22eb6cd..05a6af4 100644 +--- a/debian/control ++++ b/debian/control +@@ -105,6 +105,7 @@ Description: User space provider drivers for libibverbs + - rxe: A software implementation of the RoCE protocol + - siw: A software implementation of the iWarp protocol + - vmw_pvrdma: VMware paravirtual RDMA device ++ - xscale: Yunsilicon RDMA device + + Package: ibverbs-utils + Architecture: linux-any +diff --git a/debian/copyright b/debian/copyright +index 2bdc0df..b983e39 100644 +--- a/debian/copyright ++++ b/debian/copyright +@@ -220,6 +220,10 @@ Files: providers/vmw_pvrdma/* + Copyright: 2012-2016 VMware, Inc. + License: BSD-2-clause or GPL-2 + ++Files: providers/xscale/* ++Copyright: 2021-2025, Yunsilicon Technology Co., Ltd. ++License: GPL-2 ++ + Files: rdma-ndd/* + Copyright: 2004-2016, Intel Corporation. + License: BSD-MIT or GPL-2 +diff --git a/debian/ibverbs-providers.install b/debian/ibverbs-providers.install +index c6ecbbc..654135a 100644 +--- a/debian/ibverbs-providers.install ++++ b/debian/ibverbs-providers.install +@@ -4,3 +4,4 @@ usr/lib/*/libibverbs/lib*-rdmav*.so + usr/lib/*/libhns.so.* + usr/lib/*/libmlx4.so.* + usr/lib/*/libmlx5.so.* ++usr/lib/*/libxscale.so.* +diff --git a/kernel-headers/CMakeLists.txt b/kernel-headers/CMakeLists.txt +index d75df96..60e3eef 100644 +--- a/kernel-headers/CMakeLists.txt ++++ b/kernel-headers/CMakeLists.txt +@@ -24,6 +24,9 @@ publish_internal_headers(rdma + rdma/rvt-abi.h + rdma/siw-abi.h + rdma/vmw_pvrdma-abi.h ++ rdma/xsc-abi.h ++ rdma/xsc_user_ioctl_cmds.h ++ rdma/xsc_user_ioctl_verbs.h + ) + + publish_internal_headers(rdma/hfi +@@ -76,6 +79,7 @@ rdma_kernel_provider_abi( + rdma/rdma_user_rxe.h + rdma/siw-abi.h + rdma/vmw_pvrdma-abi.h ++ rdma/xsc-abi.h + ) + + publish_headers(infiniband +diff --git a/kernel-headers/rdma/ib_user_ioctl_verbs.h b/kernel-headers/rdma/ib_user_ioctl_verbs.h +index 3072e5d..248ed52 100644 +--- a/kernel-headers/rdma/ib_user_ioctl_verbs.h ++++ b/kernel-headers/rdma/ib_user_ioctl_verbs.h +@@ -250,6 +250,7 @@ enum rdma_driver_id { + RDMA_DRIVER_QIB, + RDMA_DRIVER_EFA, + RDMA_DRIVER_SIW, ++ RDMA_DRIVER_XSC = 1, + }; + + enum ib_uverbs_gid_type { +diff --git a/kernel-headers/rdma/xsc-abi.h b/kernel-headers/rdma/xsc-abi.h +new file mode 100644 +index 0000000..022edb6 +--- /dev/null ++++ b/kernel-headers/rdma/xsc-abi.h +@@ -0,0 +1,333 @@ ++/* ++ * Copyright (c) 2021 - 2022, Shanghai Yunsilicon Technology Co., Ltd. ++ * All rights reserved. ++ */ ++ ++#ifndef XSC_ABI_USER_H ++#define XSC_ABI_USER_H ++ ++#include ++#include /* For ETH_ALEN. */ ++#include ++ ++enum { ++ XSC_WQ_FLAG_SIGNATURE = 1 << 0, ++}; ++ ++/* Make sure that all structs defined in this file remain laid out so ++ * that they pack the same way on 32-bit and 64-bit architectures (to ++ * avoid incompatibility between 32-bit userspace and 64-bit kernels). ++ * In particular do not use pointer types -- pass pointers in __u64 ++ * instead. ++ */ ++ ++struct xsc_ib_alloc_ucontext_req { ++ __u32 rsvd0; ++ __u32 rsvd1; ++}; ++ ++enum xsc_user_cmds_supp_uhw { ++ XSC_USER_CMDS_SUPP_UHW_QUERY_DEVICE = 1 << 0, ++ XSC_USER_CMDS_SUPP_UHW_CREATE_AH = 1 << 1, ++}; ++ ++struct xsc_ib_alloc_ucontext_resp { ++ __u32 qp_tab_size; ++ __u32 cache_line_size; ++ __u16 max_sq_desc_sz; ++ __u16 max_rq_desc_sz; ++ __u32 max_send_wqebb; ++ __u32 max_recv_wr; ++ __u16 num_ports; ++ __u16 reserved; ++ __u64 qpm_tx_db; ++ __u64 qpm_rx_db; ++ __u64 cqm_next_cid_reg; ++ __u64 cqm_armdb; ++ __u32 send_ds_num; ++ __u32 recv_ds_num; ++ __u32 cmds_supp_uhw; ++}; ++ ++struct xsc_ib_alloc_pd_resp { ++ __u32 pdn; ++}; ++ ++struct xsc_ib_tso_caps { ++ __u32 max_tso; /* Maximum tso payload size in bytes */ ++ ++ /* Corresponding bit will be set if qp type from ++ * 'enum ib_qp_type' is supported, e.g. ++ * supported_qpts |= 1 << IB_QPT_UD ++ */ ++ __u32 supported_qpts; ++}; ++ ++struct xsc_ib_rss_caps { ++ __aligned_u64 rx_hash_fields_mask; /* enum xsc_rx_hash_fields */ ++ __u8 rx_hash_function; /* enum xsc_rx_hash_function_flags */ ++ __u8 reserved[7]; ++}; ++ ++enum xsc_ib_cqe_comp_res_format { ++ XSC_IB_CQE_RES_FORMAT_HASH = 1 << 0, ++ XSC_IB_CQE_RES_FORMAT_CSUM = 1 << 1, ++ XSC_IB_CQE_RES_FORMAT_CSUM_STRIDX = 1 << 2, ++}; ++ ++struct xsc_ib_cqe_comp_caps { ++ __u32 max_num; ++ __u32 supported_format; /* enum xsc_ib_cqe_comp_res_format */ ++}; ++ ++enum xsc_ib_packet_pacing_cap_flags { ++ XSC_IB_PP_SUPPORT_BURST = 1 << 0, ++}; ++ ++struct xsc_packet_pacing_caps { ++ __u32 qp_rate_limit_min; ++ __u32 qp_rate_limit_max; /* In kpbs */ ++ ++ /* Corresponding bit will be set if qp type from ++ * 'enum ib_qp_type' is supported, e.g. ++ * supported_qpts |= 1 << IB_QPT_RAW_PACKET ++ */ ++ __u32 supported_qpts; ++ __u8 cap_flags; /* enum xsc_ib_packet_pacing_cap_flags */ ++ __u8 reserved[3]; ++}; ++ ++enum xsc_ib_mpw_caps { ++ MPW_RESERVED = 1 << 0, ++ XSC_IB_ALLOW_MPW = 1 << 1, ++ XSC_IB_SUPPORT_EMPW = 1 << 2, ++}; ++ ++enum xsc_ib_sw_parsing_offloads { ++ XSC_IB_SW_PARSING = 1 << 0, ++ XSC_IB_SW_PARSING_CSUM = 1 << 1, ++ XSC_IB_SW_PARSING_LSO = 1 << 2, ++}; ++ ++struct xsc_ib_sw_parsing_caps { ++ __u32 sw_parsing_offloads; /* enum xsc_ib_sw_parsing_offloads */ ++ ++ /* Corresponding bit will be set if qp type from ++ * 'enum ib_qp_type' is supported, e.g. ++ * supported_qpts |= 1 << IB_QPT_RAW_PACKET ++ */ ++ __u32 supported_qpts; ++}; ++ ++struct xsc_ib_striding_rq_caps { ++ __u32 min_single_stride_log_num_of_bytes; ++ __u32 max_single_stride_log_num_of_bytes; ++ __u32 min_single_wqe_log_num_of_strides; ++ __u32 max_single_wqe_log_num_of_strides; ++ ++ /* Corresponding bit will be set if qp type from ++ * 'enum ib_qp_type' is supported, e.g. ++ * supported_qpts |= 1 << IB_QPT_RAW_PACKET ++ */ ++ __u32 supported_qpts; ++ __u32 reserved; ++}; ++ ++enum xsc_ib_query_dev_resp_flags { ++ /* Support 128B CQE compression */ ++ XSC_IB_QUERY_DEV_RESP_FLAGS_CQE_128B_COMP = 1 << 0, ++ XSC_IB_QUERY_DEV_RESP_FLAGS_CQE_128B_PAD = 1 << 1, ++}; ++ ++enum xsc_ib_tunnel_offloads { ++ XSC_IB_TUNNELED_OFFLOADS_VXLAN = 1 << 0, ++ XSC_IB_TUNNELED_OFFLOADS_GRE = 1 << 1, ++ XSC_IB_TUNNELED_OFFLOADS_GENEVE = 1 << 2, ++ XSC_IB_TUNNELED_OFFLOADS_MPLS_GRE = 1 << 3, ++ XSC_IB_TUNNELED_OFFLOADS_MPLS_UDP = 1 << 4, ++}; ++ ++struct xsc_ib_query_device_resp { ++ __u32 comp_mask; ++ __u32 response_length; ++ struct xsc_ib_tso_caps tso_caps; ++ struct xsc_ib_rss_caps rss_caps; ++ struct xsc_ib_cqe_comp_caps cqe_comp_caps; ++ struct xsc_packet_pacing_caps packet_pacing_caps; ++ __u32 xsc_ib_support_multi_pkt_send_wqes; ++ __u32 flags; /* Use enum xsc_ib_query_dev_resp_flags */ ++ struct xsc_ib_sw_parsing_caps sw_parsing_caps; ++ struct xsc_ib_striding_rq_caps striding_rq_caps; ++ __u32 tunnel_offloads_caps; /* enum xsc_ib_tunnel_offloads */ ++ __u32 reserved; ++}; ++ ++struct xsc_ib_create_cq { ++ __aligned_u64 buf_addr; ++ __aligned_u64 db_addr; ++ __u32 cqe_size; ++}; ++ ++struct xsc_ib_create_cq_resp { ++ __u32 cqn; ++ __u32 reserved; ++}; ++ ++struct xsc_ib_resize_cq { ++ __aligned_u64 buf_addr; ++ __u16 cqe_size; ++ __u16 reserved0; ++ __u32 reserved1; ++}; ++ ++struct xsc_ib_create_qp { ++ __aligned_u64 buf_addr; ++ __aligned_u64 db_addr; ++ __u32 sq_wqe_count; ++ __u32 rq_wqe_count; ++ __u32 rq_wqe_shift; ++ __u32 flags; ++}; ++ ++/* RX Hash function flags */ ++enum xsc_rx_hash_function_flags { ++ XSC_RX_HASH_FUNC_TOEPLITZ = 1 << 0, ++}; ++ ++/* ++ * RX Hash flags, these flags allows to set which incoming packet's field should ++ * participates in RX Hash. Each flag represent certain packet's field, ++ * when the flag is set the field that is represented by the flag will ++ * participate in RX Hash calculation. ++ * Note: *IPV4 and *IPV6 flags can't be enabled together on the same QP ++ * and *TCP and *UDP flags can't be enabled together on the same QP. ++*/ ++enum xsc_rx_hash_fields { ++ XSC_RX_HASH_SRC_IPV4 = 1 << 0, ++ XSC_RX_HASH_DST_IPV4 = 1 << 1, ++ XSC_RX_HASH_SRC_IPV6 = 1 << 2, ++ XSC_RX_HASH_DST_IPV6 = 1 << 3, ++ XSC_RX_HASH_SRC_PORT_TCP = 1 << 4, ++ XSC_RX_HASH_DST_PORT_TCP = 1 << 5, ++ XSC_RX_HASH_SRC_PORT_UDP = 1 << 6, ++ XSC_RX_HASH_DST_PORT_UDP = 1 << 7, ++ XSC_RX_HASH_IPSEC_SPI = 1 << 8, ++ /* Save bits for future fields */ ++ XSC_RX_HASH_INNER = (1UL << 31), ++}; ++ ++struct xsc_ib_create_qp_rss { ++ __aligned_u64 rx_hash_fields_mask; /* enum xscd_rx_hash_fields */ ++ __u8 rx_hash_function; /* enum xsc_rx_hash_function_flags */ ++ __u8 rx_key_len; /* valid only for Toeplitz */ ++ __u8 reserved[6]; ++ __u8 rx_hash_key[128]; /* valid only for Toeplitz */ ++ __u32 comp_mask; ++ __u32 flags; ++}; ++ ++struct xsc_ib_create_qp_resp { ++ __u32 bfreg_index; ++ __u32 resv; ++}; ++ ++enum xsc_ib_create_wq_mask { ++ XSC_IB_CREATE_WQ_STRIDING_RQ = (1 << 0), ++}; ++ ++struct xsc_ib_create_wq { ++ __aligned_u64 buf_addr; ++ __aligned_u64 db_addr; ++ __u32 rq_wqe_count; ++ __u32 rq_wqe_shift; ++ __u32 user_index; ++ __u32 flags; ++ __u32 comp_mask; ++ __u32 single_stride_log_num_of_bytes; ++ __u32 single_wqe_log_num_of_strides; ++ __u32 two_byte_shift_en; ++}; ++ ++struct xsc_ib_create_ah_resp { ++ __u32 response_length; ++ __u8 dmac[ETH_ALEN]; ++ __u8 reserved[6]; ++}; ++ ++struct xsc_ib_burst_info { ++ __u32 max_burst_sz; ++ __u16 typical_pkt_sz; ++ __u16 reserved; ++}; ++ ++struct xsc_ib_modify_qp { ++ __u32 comp_mask; ++ struct xsc_ib_burst_info burst_info; ++ __u32 reserved; ++}; ++ ++struct xsc_ib_modify_qp_resp { ++ __u32 response_length; ++ __u32 dctn; ++}; ++ ++struct xsc_ib_create_wq_resp { ++ __u32 response_length; ++ __u32 reserved; ++}; ++ ++struct xsc_ib_modify_wq { ++ __u32 comp_mask; ++ __u32 reserved; ++}; ++ ++struct xsc_ib_clock_info { ++ __u32 sign; ++ __u32 resv; ++ __aligned_u64 nsec; ++ __aligned_u64 cycles; ++ __aligned_u64 frac; ++ __u32 mult; ++ __u32 shift; ++ __aligned_u64 mask; ++ __aligned_u64 overflow_period; ++}; ++ ++enum xsc_ib_mmap_cmd { ++ XSC_IB_MMAP_REGULAR_PAGE = 0, ++ XSC_IB_MMAP_GET_CONTIGUOUS_PAGES = 1, ++ XSC_IB_MMAP_WC_PAGE = 2, ++ XSC_IB_MMAP_NC_PAGE = 3, ++ XSC_IB_MMAP_CORE_CLOCK = 5, ++ XSC_IB_MMAP_ALLOC_WC = 6, ++ XSC_IB_MMAP_CLOCK_INFO = 7, ++ XSC_IB_MMAP_DEVICE_MEM = 8, ++}; ++ ++enum { ++ XSC_IB_CLOCK_INFO_KERNEL_UPDATING = 1, ++}; ++ ++struct xsc_ib_flow_counters_desc { ++ __u32 description; ++ __u32 index; ++}; ++ ++struct xsc_ib_flow_counters_data { ++ RDMA_UAPI_PTR(struct xsc_ib_flow_counters_desc *, counters_data); ++ __u32 ncounters; ++ __u32 reserved; ++}; ++ ++struct xsc_ib_create_flow { ++ __u32 ncounters_data; ++ __u32 reserved; ++ /* ++ * Following are counters data based on ncounters_data, each ++ * entry in the data[] should match a corresponding counter object ++ * that was pointed by a counters spec upon the flow creation ++ */ ++ struct xsc_ib_flow_counters_data data[]; ++}; ++ ++#endif /* XSC_ABI_USER_H */ +diff --git a/kernel-headers/rdma/xsc_user_ioctl_cmds.h b/kernel-headers/rdma/xsc_user_ioctl_cmds.h +new file mode 100644 +index 0000000..590a061 +--- /dev/null ++++ b/kernel-headers/rdma/xsc_user_ioctl_cmds.h +@@ -0,0 +1,163 @@ ++/* ++ * Copyright (c) 2021 - 2022, Shanghai Yunsilicon Technology Co., Ltd. ++ * All rights reserved. ++ */ ++ ++#ifndef XSC_USER_IOCTL_CMDS_H ++#define XSC_USER_IOCTL_CMDS_H ++ ++#include ++#include ++ ++enum xsc_ib_create_flow_action_attrs { ++ /* This attribute belong to the driver namespace */ ++ XSC_IB_ATTR_CREATE_FLOW_ACTION_FLAGS = (1U << UVERBS_ID_NS_SHIFT), ++}; ++ ++enum xsc_ib_alloc_dm_attrs { ++ XSC_IB_ATTR_ALLOC_DM_RESP_START_OFFSET = (1U << UVERBS_ID_NS_SHIFT), ++ XSC_IB_ATTR_ALLOC_DM_RESP_PAGE_INDEX, ++}; ++ ++enum xsc_ib_devx_methods { ++ XSC_IB_METHOD_DEVX_OTHER = (1U << UVERBS_ID_NS_SHIFT), ++ XSC_IB_METHOD_DEVX_QUERY_UAR, ++ XSC_IB_METHOD_DEVX_QUERY_EQN, ++}; ++ ++enum xsc_ib_devx_other_attrs { ++ XSC_IB_ATTR_DEVX_OTHER_CMD_IN = (1U << UVERBS_ID_NS_SHIFT), ++ XSC_IB_ATTR_DEVX_OTHER_CMD_OUT, ++}; ++ ++enum xsc_ib_devx_obj_create_attrs { ++ XSC_IB_ATTR_DEVX_OBJ_CREATE_HANDLE = (1U << UVERBS_ID_NS_SHIFT), ++ XSC_IB_ATTR_DEVX_OBJ_CREATE_CMD_IN, ++ XSC_IB_ATTR_DEVX_OBJ_CREATE_CMD_OUT, ++}; ++ ++enum xsc_ib_devx_query_uar_attrs { ++ XSC_IB_ATTR_DEVX_QUERY_UAR_USER_IDX = (1U << UVERBS_ID_NS_SHIFT), ++ XSC_IB_ATTR_DEVX_QUERY_UAR_DEV_IDX, ++}; ++ ++enum xsc_ib_devx_obj_destroy_attrs { ++ XSC_IB_ATTR_DEVX_OBJ_DESTROY_HANDLE = (1U << UVERBS_ID_NS_SHIFT), ++}; ++ ++enum xsc_ib_devx_obj_modify_attrs { ++ XSC_IB_ATTR_DEVX_OBJ_MODIFY_HANDLE = (1U << UVERBS_ID_NS_SHIFT), ++ XSC_IB_ATTR_DEVX_OBJ_MODIFY_CMD_IN, ++ XSC_IB_ATTR_DEVX_OBJ_MODIFY_CMD_OUT, ++}; ++ ++enum xsc_ib_devx_obj_query_attrs { ++ XSC_IB_ATTR_DEVX_OBJ_QUERY_HANDLE = (1U << UVERBS_ID_NS_SHIFT), ++ XSC_IB_ATTR_DEVX_OBJ_QUERY_CMD_IN, ++ XSC_IB_ATTR_DEVX_OBJ_QUERY_CMD_OUT, ++}; ++ ++enum xsc_ib_devx_query_eqn_attrs { ++ XSC_IB_ATTR_DEVX_QUERY_EQN_USER_VEC = (1U << UVERBS_ID_NS_SHIFT), ++ XSC_IB_ATTR_DEVX_QUERY_EQN_DEV_EQN, ++}; ++ ++enum xsc_ib_devx_obj_methods { ++ XSC_IB_METHOD_DEVX_OBJ_CREATE = (1U << UVERBS_ID_NS_SHIFT), ++ XSC_IB_METHOD_DEVX_OBJ_DESTROY, ++ XSC_IB_METHOD_DEVX_OBJ_MODIFY, ++ XSC_IB_METHOD_DEVX_OBJ_QUERY, ++}; ++ ++enum xsc_ib_devx_umem_reg_attrs { ++ XSC_IB_ATTR_DEVX_UMEM_REG_HANDLE = (1U << UVERBS_ID_NS_SHIFT), ++ XSC_IB_ATTR_DEVX_UMEM_REG_ADDR, ++ XSC_IB_ATTR_DEVX_UMEM_REG_LEN, ++ XSC_IB_ATTR_DEVX_UMEM_REG_ACCESS, ++ XSC_IB_ATTR_DEVX_UMEM_REG_OUT_ID, ++}; ++ ++enum xsc_ib_devx_umem_dereg_attrs { ++ XSC_IB_ATTR_DEVX_UMEM_DEREG_HANDLE = (1U << UVERBS_ID_NS_SHIFT), ++}; ++ ++enum xsc_ib_devx_umem_methods { ++ XSC_IB_METHOD_DEVX_UMEM_REG = (1U << UVERBS_ID_NS_SHIFT), ++ XSC_IB_METHOD_DEVX_UMEM_DEREG, ++}; ++ ++enum xsc_ib_objects { ++ XSC_IB_OBJECT_DEVX = (1U << UVERBS_ID_NS_SHIFT), ++ XSC_IB_OBJECT_DEVX_OBJ, ++ XSC_IB_OBJECT_DEVX_UMEM, ++ XSC_IB_OBJECT_FLOW_MATCHER, ++}; ++ ++enum xsc_ib_flow_matcher_create_attrs { ++ XSC_IB_ATTR_FLOW_MATCHER_CREATE_HANDLE = (1U << UVERBS_ID_NS_SHIFT), ++ XSC_IB_ATTR_FLOW_MATCHER_MATCH_MASK, ++ XSC_IB_ATTR_FLOW_MATCHER_FLOW_TYPE, ++ XSC_IB_ATTR_FLOW_MATCHER_MATCH_CRITERIA, ++ XSC_IB_ATTR_FLOW_MATCHER_FLOW_FLAGS, ++}; ++ ++enum xsc_ib_flow_matcher_destroy_attrs { ++ XSC_IB_ATTR_FLOW_MATCHER_DESTROY_HANDLE = (1U << UVERBS_ID_NS_SHIFT), ++}; ++ ++enum xsc_ib_flow_matcher_methods { ++ XSC_IB_METHOD_FLOW_MATCHER_CREATE = (1U << UVERBS_ID_NS_SHIFT), ++ XSC_IB_METHOD_FLOW_MATCHER_DESTROY, ++}; ++ ++#define XSC_IB_DW_MATCH_PARAM 0x80 ++ ++struct xsc_ib_match_params { ++ __u32 match_params[XSC_IB_DW_MATCH_PARAM]; ++}; ++ ++enum xsc_ib_flow_type { ++ XSC_IB_FLOW_TYPE_NORMAL, ++ XSC_IB_FLOW_TYPE_SNIFFER, ++ XSC_IB_FLOW_TYPE_ALL_DEFAULT, ++ XSC_IB_FLOW_TYPE_MC_DEFAULT, ++}; ++ ++enum xsc_ib_create_flow_attrs { ++ XSC_IB_ATTR_CREATE_FLOW_HANDLE = (1U << UVERBS_ID_NS_SHIFT), ++ XSC_IB_ATTR_CREATE_FLOW_MATCH_VALUE, ++ XSC_IB_ATTR_CREATE_FLOW_DEST_QP, ++ XSC_IB_ATTR_CREATE_FLOW_DEST_DEVX, ++ XSC_IB_ATTR_CREATE_FLOW_MATCHER, ++ XSC_IB_ATTR_CREATE_FLOW_ARR_FLOW_ACTIONS, ++ XSC_IB_ATTR_CREATE_FLOW_TAG, ++}; ++ ++enum xsc_ib_destoy_flow_attrs { ++ XSC_IB_ATTR_DESTROY_FLOW_HANDLE = (1U << UVERBS_ID_NS_SHIFT), ++}; ++ ++enum xsc_ib_flow_methods { ++ XSC_IB_METHOD_CREATE_FLOW = (1U << UVERBS_ID_NS_SHIFT), ++ XSC_IB_METHOD_DESTROY_FLOW, ++}; ++ ++enum xsc_ib_flow_action_methods { ++ XSC_IB_METHOD_FLOW_ACTION_CREATE_MODIFY_HEADER = (1U << UVERBS_ID_NS_SHIFT), ++ XSC_IB_METHOD_FLOW_ACTION_CREATE_PACKET_REFORMAT, ++}; ++ ++enum xsc_ib_create_flow_action_create_modify_header_attrs { ++ XSC_IB_ATTR_CREATE_MODIFY_HEADER_HANDLE = (1U << UVERBS_ID_NS_SHIFT), ++ XSC_IB_ATTR_CREATE_MODIFY_HEADER_ACTIONS_PRM, ++ XSC_IB_ATTR_CREATE_MODIFY_HEADER_FT_TYPE, ++}; ++ ++enum xsc_ib_create_flow_action_create_packet_reformat_attrs { ++ XSC_IB_ATTR_CREATE_PACKET_REFORMAT_HANDLE = (1U << UVERBS_ID_NS_SHIFT), ++ XSC_IB_ATTR_CREATE_PACKET_REFORMAT_TYPE, ++ XSC_IB_ATTR_CREATE_PACKET_REFORMAT_FT_TYPE, ++ XSC_IB_ATTR_CREATE_PACKET_REFORMAT_DATA_BUF, ++}; ++ ++#endif +diff --git a/kernel-headers/rdma/xsc_user_ioctl_verbs.h b/kernel-headers/rdma/xsc_user_ioctl_verbs.h +new file mode 100644 +index 0000000..614f2ee +--- /dev/null ++++ b/kernel-headers/rdma/xsc_user_ioctl_verbs.h +@@ -0,0 +1,27 @@ ++/* ++ * Copyright (c) 2021 - 2022, Shanghai Yunsilicon Technology Co., Ltd. ++ * All rights reserved. ++ */ ++ ++#ifndef XSC_USER_IOCTL_VERBS_H ++#define XSC_USER_IOCTL_VERBS_H ++ ++#include ++ ++enum xsc_ib_uapi_flow_action_flags { ++ XSC_IB_UAPI_FLOW_ACTION_FLAGS_REQUIRE_METADATA = 1 << 0, ++}; ++ ++enum xsc_ib_uapi_flow_table_type { ++ XSC_IB_UAPI_FLOW_TABLE_TYPE_NIC_RX = 0x0, ++ XSC_IB_UAPI_FLOW_TABLE_TYPE_NIC_TX = 0x1, ++}; ++ ++enum xsc_ib_uapi_flow_action_packet_reformat_type { ++ XSC_IB_UAPI_FLOW_ACTION_PACKET_REFORMAT_TYPE_L2_TUNNEL_TO_L2 = 0x0, ++ XSC_IB_UAPI_FLOW_ACTION_PACKET_REFORMAT_TYPE_L2_TO_L2_TUNNEL = 0x1, ++ XSC_IB_UAPI_FLOW_ACTION_PACKET_REFORMAT_TYPE_L3_TUNNEL_TO_L2 = 0x2, ++ XSC_IB_UAPI_FLOW_ACTION_PACKET_REFORMAT_TYPE_L2_TO_L3_TUNNEL = 0x3, ++}; ++ ++#endif +diff --git a/libibverbs/verbs.h b/libibverbs/verbs.h +index 713cce6..76f6a83 100644 +--- a/libibverbs/verbs.h ++++ b/libibverbs/verbs.h +@@ -2223,6 +2223,7 @@ extern const struct verbs_device_ops verbs_provider_qedr; + extern const struct verbs_device_ops verbs_provider_rxe; + extern const struct verbs_device_ops verbs_provider_siw; + extern const struct verbs_device_ops verbs_provider_vmw_pvrdma; ++extern const struct verbs_device_ops verbs_provider_xscale; + extern const struct verbs_device_ops verbs_provider_all; + extern const struct verbs_device_ops verbs_provider_none; + void ibv_static_providers(void *unused, ...); +diff --git a/providers/xscale/CMakeLists.txt b/providers/xscale/CMakeLists.txt +new file mode 100644 +index 0000000..1188db1 +--- /dev/null ++++ b/providers/xscale/CMakeLists.txt +@@ -0,0 +1,18 @@ ++rdma_shared_provider(xscale libxsc.map ++ 1 1.24.${PACKAGE_VERSION} ++ buf.c ++ cq.c ++ dbrec.c ++ xscale.c ++ qp.c ++ verbs.c ++) ++ ++publish_headers(infiniband ++ ../../kernel-headers/rdma/xsc_user_ioctl_verbs.h ++ ../../kernel-headers/rdma/xsc_user_ioctl_cmds.h ++ xsc_api.h ++ xscdv.h ++) ++ ++rdma_pkg_config("xscale" "libibverbs" "${CMAKE_THREAD_LIBS_INIT}") +diff --git a/providers/xscale/bitmap.h b/providers/xscale/bitmap.h +new file mode 100644 +index 0000000..ef7f202 +--- /dev/null ++++ b/providers/xscale/bitmap.h +@@ -0,0 +1,84 @@ ++/* ++ * Copyright (c) 2021 - 2022, Shanghai Yunsilicon Technology Co., Ltd. ++ * All rights reserved. ++ */ ++ ++#ifndef BITMAP_H ++#define BITMAP_H ++ ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include "xscale.h" ++ ++/* Only ia64 requires this */ ++#ifdef __ia64__ ++#define XSC_SHM_ADDR ((void *)0x8000000000000000UL) ++#define XSC_SHMAT_FLAGS (SHM_RND) ++#else ++#define XSC_SHM_ADDR NULL ++#define XSC_SHMAT_FLAGS 0 ++#endif ++ ++#define BITS_PER_LONG (8 * sizeof(long)) ++#define BITS_TO_LONGS(nr) DIV_ROUND_UP(nr, BITS_PER_LONG) ++ ++#ifndef HPAGE_SIZE ++#define HPAGE_SIZE (2UL * 1024 * 1024) ++#endif ++ ++#define XSC_SHM_LENGTH HPAGE_SIZE ++#define XSC_Q_CHUNK_SIZE 32768 ++#define XSC_SHM_NUM_REGION 64 ++ ++static inline unsigned long xsc_ffz(uint32_t word) ++{ ++ return __builtin_ffs(~word) - 1; ++} ++ ++static inline uint32_t xsc_find_first_zero_bit(const unsigned long *addr, ++ uint32_t size) ++{ ++ const unsigned long *p = addr; ++ uint32_t result = 0; ++ unsigned long tmp; ++ ++ while (size & ~(BITS_PER_LONG - 1)) { ++ tmp = *(p++); ++ if (~tmp) ++ goto found; ++ result += BITS_PER_LONG; ++ size -= BITS_PER_LONG; ++ } ++ if (!size) ++ return result; ++ ++ tmp = (*p) | (~0UL << size); ++ if (tmp == (uint32_t)~0UL) /* Are any bits zero? */ ++ return result + size; /* Nope. */ ++found: ++ return result + xsc_ffz(tmp); ++} ++ ++static inline void xsc_set_bit(unsigned int nr, unsigned long *addr) ++{ ++ addr[(nr / BITS_PER_LONG)] |= (1 << (nr % BITS_PER_LONG)); ++} ++ ++static inline void xsc_clear_bit(unsigned int nr, unsigned long *addr) ++{ ++ addr[(nr / BITS_PER_LONG)] &= ~(1 << (nr % BITS_PER_LONG)); ++} ++ ++static inline int xsc_test_bit(unsigned int nr, const unsigned long *addr) ++{ ++ return !!(addr[(nr / BITS_PER_LONG)] & (1 << (nr % BITS_PER_LONG))); ++} ++ ++#endif +diff --git a/providers/xscale/buf.c b/providers/xscale/buf.c +new file mode 100644 +index 0000000..61daf6d +--- /dev/null ++++ b/providers/xscale/buf.c +@@ -0,0 +1,594 @@ ++/* ++ * Copyright (c) 2021 - 2022, Shanghai Yunsilicon Technology Co., Ltd. ++ * All rights reserved. ++ */ ++ ++#include ++ ++#include ++#include ++#include ++#include ++#include ++#include ++ ++#include "xscale.h" ++#include "bitmap.h" ++ ++static int xsc_bitmap_init(struct xsc_bitmap *bitmap, uint32_t num, ++ uint32_t mask) ++{ ++ bitmap->last = 0; ++ bitmap->top = 0; ++ bitmap->max = num; ++ bitmap->avail = num; ++ bitmap->mask = mask; ++ bitmap->avail = bitmap->max; ++ bitmap->table = calloc(BITS_TO_LONGS(bitmap->max), sizeof(*bitmap->table)); ++ if (!bitmap->table) ++ return -ENOMEM; ++ ++ return 0; ++} ++ ++static void bitmap_free_range(struct xsc_bitmap *bitmap, uint32_t obj, ++ int cnt) ++{ ++ int i; ++ ++ obj &= bitmap->max - 1; ++ ++ for (i = 0; i < cnt; i++) ++ xsc_clear_bit(obj + i, bitmap->table); ++ bitmap->last = min(bitmap->last, obj); ++ bitmap->top = (bitmap->top + bitmap->max) & bitmap->mask; ++ bitmap->avail += cnt; ++} ++ ++static int bitmap_empty(struct xsc_bitmap *bitmap) ++{ ++ return (bitmap->avail == bitmap->max) ? 1 : 0; ++} ++ ++static int bitmap_avail(struct xsc_bitmap *bitmap) ++{ ++ return bitmap->avail; ++} ++ ++static void xsc_bitmap_cleanup(struct xsc_bitmap *bitmap) ++{ ++ if (bitmap->table) ++ free(bitmap->table); ++} ++ ++static void free_huge_mem(struct xsc_hugetlb_mem *hmem) ++{ ++ xsc_bitmap_cleanup(&hmem->bitmap); ++ if (shmdt(hmem->shmaddr) == -1) ++ xsc_dbg(stderr, XSC_DBG_CONTIG, "%s\n", strerror(errno)); ++ shmctl(hmem->shmid, IPC_RMID, NULL); ++ free(hmem); ++} ++ ++static int xsc_bitmap_alloc(struct xsc_bitmap *bitmap) ++{ ++ uint32_t obj; ++ int ret; ++ ++ obj = xsc_find_first_zero_bit(bitmap->table, bitmap->max); ++ if (obj < bitmap->max) { ++ xsc_set_bit(obj, bitmap->table); ++ bitmap->last = (obj + 1); ++ if (bitmap->last == bitmap->max) ++ bitmap->last = 0; ++ obj |= bitmap->top; ++ ret = obj; ++ } else ++ ret = -1; ++ ++ if (ret != -1) ++ --bitmap->avail; ++ ++ return ret; ++} ++ ++static uint32_t find_aligned_range(unsigned long *bitmap, ++ uint32_t start, uint32_t nbits, ++ int len, int alignment) ++{ ++ uint32_t end, i; ++ ++again: ++ start = align(start, alignment); ++ ++ while ((start < nbits) && xsc_test_bit(start, bitmap)) ++ start += alignment; ++ ++ if (start >= nbits) ++ return -1; ++ ++ end = start + len; ++ if (end > nbits) ++ return -1; ++ ++ for (i = start + 1; i < end; i++) { ++ if (xsc_test_bit(i, bitmap)) { ++ start = i + 1; ++ goto again; ++ } ++ } ++ ++ return start; ++} ++ ++static int bitmap_alloc_range(struct xsc_bitmap *bitmap, int cnt, ++ int align) ++{ ++ uint32_t obj; ++ int ret, i; ++ ++ if (cnt == 1 && align == 1) ++ return xsc_bitmap_alloc(bitmap); ++ ++ if (cnt > bitmap->max) ++ return -1; ++ ++ obj = find_aligned_range(bitmap->table, bitmap->last, ++ bitmap->max, cnt, align); ++ if (obj >= bitmap->max) { ++ bitmap->top = (bitmap->top + bitmap->max) & bitmap->mask; ++ obj = find_aligned_range(bitmap->table, 0, bitmap->max, ++ cnt, align); ++ } ++ ++ if (obj < bitmap->max) { ++ for (i = 0; i < cnt; i++) ++ xsc_set_bit(obj + i, bitmap->table); ++ if (obj == bitmap->last) { ++ bitmap->last = (obj + cnt); ++ if (bitmap->last >= bitmap->max) ++ bitmap->last = 0; ++ } ++ obj |= bitmap->top; ++ ret = obj; ++ } else ++ ret = -1; ++ ++ if (ret != -1) ++ bitmap->avail -= cnt; ++ ++ return obj; ++} ++ ++static struct xsc_hugetlb_mem *alloc_huge_mem(size_t size) ++{ ++ struct xsc_hugetlb_mem *hmem; ++ size_t shm_len; ++ ++ hmem = malloc(sizeof(*hmem)); ++ if (!hmem) ++ return NULL; ++ ++ shm_len = align(size, XSC_SHM_LENGTH); ++ hmem->shmid = shmget(IPC_PRIVATE, shm_len, SHM_HUGETLB | SHM_R | SHM_W); ++ if (hmem->shmid == -1) { ++ xsc_dbg(stderr, XSC_DBG_CONTIG, "%s\n", strerror(errno)); ++ goto out_free; ++ } ++ ++ hmem->shmaddr = shmat(hmem->shmid, XSC_SHM_ADDR, XSC_SHMAT_FLAGS); ++ if (hmem->shmaddr == (void *)-1) { ++ xsc_dbg(stderr, XSC_DBG_CONTIG, "%s\n", strerror(errno)); ++ goto out_rmid; ++ } ++ ++ if (xsc_bitmap_init(&hmem->bitmap, shm_len / XSC_Q_CHUNK_SIZE, ++ shm_len / XSC_Q_CHUNK_SIZE - 1)) { ++ xsc_dbg(stderr, XSC_DBG_CONTIG, "%s\n", strerror(errno)); ++ goto out_shmdt; ++ } ++ ++ /* ++ * Marked to be destroyed when process detaches from shmget segment ++ */ ++ shmctl(hmem->shmid, IPC_RMID, NULL); ++ ++ return hmem; ++ ++out_shmdt: ++ if (shmdt(hmem->shmaddr) == -1) ++ xsc_dbg(stderr, XSC_DBG_CONTIG, "%s\n", strerror(errno)); ++ ++out_rmid: ++ shmctl(hmem->shmid, IPC_RMID, NULL); ++ ++out_free: ++ free(hmem); ++ return NULL; ++} ++ ++static int alloc_huge_buf(struct xsc_context *xctx, struct xsc_buf *buf, ++ size_t size, int page_size) ++{ ++ int found = 0; ++ int nchunk; ++ struct xsc_hugetlb_mem *hmem; ++ int ret; ++ ++ buf->length = align(size, XSC_Q_CHUNK_SIZE); ++ nchunk = buf->length / XSC_Q_CHUNK_SIZE; ++ ++ if (!nchunk) ++ return 0; ++ ++ xsc_spin_lock(&xctx->hugetlb_lock); ++ list_for_each(&xctx->hugetlb_list, hmem, entry) { ++ if (bitmap_avail(&hmem->bitmap)) { ++ buf->base = bitmap_alloc_range(&hmem->bitmap, nchunk, 1); ++ if (buf->base != -1) { ++ buf->hmem = hmem; ++ found = 1; ++ break; ++ } ++ } ++ } ++ xsc_spin_unlock(&xctx->hugetlb_lock); ++ ++ if (!found) { ++ hmem = alloc_huge_mem(buf->length); ++ if (!hmem) ++ return -1; ++ ++ buf->base = bitmap_alloc_range(&hmem->bitmap, nchunk, 1); ++ if (buf->base == -1) { ++ free_huge_mem(hmem); ++ /* TBD: remove after proven stability */ ++ fprintf(stderr, "BUG: huge allocation\n"); ++ return -1; ++ } ++ ++ buf->hmem = hmem; ++ ++ xsc_spin_lock(&xctx->hugetlb_lock); ++ if (bitmap_avail(&hmem->bitmap)) ++ list_add(&xctx->hugetlb_list, &hmem->entry); ++ else ++ list_add_tail(&xctx->hugetlb_list, &hmem->entry); ++ xsc_spin_unlock(&xctx->hugetlb_lock); ++ } ++ ++ buf->buf = hmem->shmaddr + buf->base * XSC_Q_CHUNK_SIZE; ++ ++ ret = ibv_dontfork_range(buf->buf, buf->length); ++ if (ret) { ++ goto out_fork; ++ } ++ buf->type = XSC_ALLOC_TYPE_HUGE; ++ ++ return 0; ++ ++out_fork: ++ xsc_spin_lock(&xctx->hugetlb_lock); ++ bitmap_free_range(&hmem->bitmap, buf->base, nchunk); ++ if (bitmap_empty(&hmem->bitmap)) { ++ list_del(&hmem->entry); ++ xsc_spin_unlock(&xctx->hugetlb_lock); ++ free_huge_mem(hmem); ++ } else ++ xsc_spin_unlock(&xctx->hugetlb_lock); ++ ++ return -1; ++} ++ ++static void free_huge_buf(struct xsc_context *ctx, struct xsc_buf *buf) ++{ ++ int nchunk; ++ ++ nchunk = buf->length / XSC_Q_CHUNK_SIZE; ++ if (!nchunk) ++ return; ++ ++ xsc_spin_lock(&ctx->hugetlb_lock); ++ bitmap_free_range(&buf->hmem->bitmap, buf->base, nchunk); ++ if (bitmap_empty(&buf->hmem->bitmap)) { ++ list_del(&buf->hmem->entry); ++ xsc_spin_unlock(&ctx->hugetlb_lock); ++ free_huge_mem(buf->hmem); ++ } else ++ xsc_spin_unlock(&ctx->hugetlb_lock); ++} ++ ++void xsc_free_buf_extern(struct xsc_context *ctx, struct xsc_buf *buf) ++{ ++ ibv_dofork_range(buf->buf, buf->length); ++ ctx->extern_alloc.free(buf->buf, ctx->extern_alloc.data); ++} ++ ++int xsc_alloc_buf_extern(struct xsc_context *ctx, struct xsc_buf *buf, ++ size_t size) ++{ ++ void *addr; ++ ++ addr = ctx->extern_alloc.alloc(size, ctx->extern_alloc.data); ++ if (addr || size == 0) { ++ if (ibv_dontfork_range(addr, size)) { ++ xsc_err("External mode dontfork_range failed\n"); ++ ctx->extern_alloc.free(addr, ++ ctx->extern_alloc.data); ++ return -1; ++ } ++ buf->buf = addr; ++ buf->length = size; ++ buf->type = XSC_ALLOC_TYPE_EXTERNAL; ++ return 0; ++ } ++ ++ xsc_err("External alloc failed\n"); ++ return -1; ++} ++ ++int xsc_alloc_prefered_buf(struct xsc_context *xctx, ++ struct xsc_buf *buf, ++ size_t size, int page_size, ++ enum xsc_alloc_type type, ++ const char *component) ++{ ++ int ret; ++ ++ /* ++ * Fallback mechanism priority: ++ * huge pages ++ * contig pages ++ * default ++ */ ++ if (type == XSC_ALLOC_TYPE_HUGE || ++ type == XSC_ALLOC_TYPE_PREFER_HUGE || ++ type == XSC_ALLOC_TYPE_ALL) { ++ ret = alloc_huge_buf(xctx, buf, size, page_size); ++ if (!ret) ++ return 0; ++ ++ if (type == XSC_ALLOC_TYPE_HUGE) ++ return -1; ++ ++ xsc_dbg(xctx->dbg_fp, XSC_DBG_CONTIG, "Huge mode allocation failed, fallback to %s mode\n", ++ XSC_ALLOC_TYPE_ALL ? "contig" : "default"); ++ } ++ ++ if (type == XSC_ALLOC_TYPE_CONTIG || ++ type == XSC_ALLOC_TYPE_PREFER_CONTIG || ++ type == XSC_ALLOC_TYPE_ALL) { ++ ret = xsc_alloc_buf_contig(xctx, buf, size, page_size, component); ++ if (!ret) ++ return 0; ++ ++ if (type == XSC_ALLOC_TYPE_CONTIG) ++ return -1; ++ xsc_dbg(xctx->dbg_fp, XSC_DBG_CONTIG, "Contig allocation failed, fallback to default mode\n"); ++ } ++ ++ if (type == XSC_ALLOC_TYPE_EXTERNAL) ++ return xsc_alloc_buf_extern(xctx, buf, size); ++ ++ return xsc_alloc_buf(buf, size, page_size); ++ ++} ++ ++int xsc_free_actual_buf(struct xsc_context *ctx, struct xsc_buf *buf) ++{ ++ int err = 0; ++ ++ switch (buf->type) { ++ case XSC_ALLOC_TYPE_ANON: ++ xsc_free_buf(buf); ++ break; ++ ++ case XSC_ALLOC_TYPE_HUGE: ++ free_huge_buf(ctx, buf); ++ break; ++ ++ case XSC_ALLOC_TYPE_CONTIG: ++ xsc_free_buf_contig(ctx, buf); ++ break; ++ ++ case XSC_ALLOC_TYPE_EXTERNAL: ++ xsc_free_buf_extern(ctx, buf); ++ break; ++ ++ default: ++ fprintf(stderr, "Bad allocation type\n"); ++ } ++ ++ return err; ++} ++ ++/* This function computes log2(v) rounded up. ++ We don't want to have a dependency to libm which exposes ceil & log2 APIs. ++ Code was written based on public domain code: ++ URL: http://graphics.stanford.edu/~seander/bithacks.html#IntegerLog. ++*/ ++static uint32_t xsc_get_block_order(uint32_t v) ++{ ++ static const uint32_t bits_arr[] = {0x2, 0xC, 0xF0, 0xFF00, 0xFFFF0000}; ++ static const uint32_t shift_arr[] = {1, 2, 4, 8, 16}; ++ int i; ++ uint32_t input_val = v; ++ ++ register uint32_t r = 0;/* result of log2(v) will go here */ ++ for (i = 4; i >= 0; i--) { ++ if (v & bits_arr[i]) { ++ v >>= shift_arr[i]; ++ r |= shift_arr[i]; ++ } ++ } ++ /* Rounding up if required */ ++ r += !!(input_val & ((1 << r) - 1)); ++ ++ return r; ++} ++ ++bool xsc_is_extern_alloc(struct xsc_context *context) ++{ ++ return context->extern_alloc.alloc && context->extern_alloc.free; ++} ++ ++void xsc_get_alloc_type(struct xsc_context *context, ++ const char *component, ++ enum xsc_alloc_type *alloc_type, ++ enum xsc_alloc_type default_type) ++ ++{ ++ char *env_value; ++ char name[128]; ++ ++ if (xsc_is_extern_alloc(context)) { ++ *alloc_type = XSC_ALLOC_TYPE_EXTERNAL; ++ return; ++ } ++ ++ snprintf(name, sizeof(name), "%s_ALLOC_TYPE", component); ++ ++ *alloc_type = default_type; ++ ++ env_value = getenv(name); ++ if (env_value) { ++ if (!strcasecmp(env_value, "ANON")) ++ *alloc_type = XSC_ALLOC_TYPE_ANON; ++ else if (!strcasecmp(env_value, "HUGE")) ++ *alloc_type = XSC_ALLOC_TYPE_HUGE; ++ else if (!strcasecmp(env_value, "CONTIG")) ++ *alloc_type = XSC_ALLOC_TYPE_CONTIG; ++ else if (!strcasecmp(env_value, "PREFER_CONTIG")) ++ *alloc_type = XSC_ALLOC_TYPE_PREFER_CONTIG; ++ else if (!strcasecmp(env_value, "PREFER_HUGE")) ++ *alloc_type = XSC_ALLOC_TYPE_PREFER_HUGE; ++ else if (!strcasecmp(env_value, "ALL")) ++ *alloc_type = XSC_ALLOC_TYPE_ALL; ++ } ++} ++ ++static void xsc_alloc_get_env_info(int *max_block_log, ++ int *min_block_log, ++ const char *component) ++ ++{ ++ char *env; ++ int value; ++ char name[128]; ++ ++ /* First set defaults */ ++ *max_block_log = XSC_MAX_LOG2_CONTIG_BLOCK_SIZE; ++ *min_block_log = XSC_MIN_LOG2_CONTIG_BLOCK_SIZE; ++ ++ snprintf(name, sizeof(name), "%s_MAX_LOG2_CONTIG_BSIZE", component); ++ env = getenv(name); ++ if (env) { ++ value = atoi(env); ++ if (value <= XSC_MAX_LOG2_CONTIG_BLOCK_SIZE && ++ value >= XSC_MIN_LOG2_CONTIG_BLOCK_SIZE) ++ *max_block_log = value; ++ else ++ fprintf(stderr, "Invalid value %d for %s\n", ++ value, name); ++ } ++ sprintf(name, "%s_MIN_LOG2_CONTIG_BSIZE", component); ++ env = getenv(name); ++ if (env) { ++ value = atoi(env); ++ if (value >= XSC_MIN_LOG2_CONTIG_BLOCK_SIZE && ++ value <= *max_block_log) ++ *min_block_log = value; ++ else ++ fprintf(stderr, "Invalid value %d for %s\n", ++ value, name); ++ } ++} ++ ++int xsc_alloc_buf_contig(struct xsc_context *xctx, ++ struct xsc_buf *buf, size_t size, ++ int page_size, ++ const char *component) ++{ ++ void *addr = MAP_FAILED; ++ int block_size_exp; ++ int max_block_log; ++ int min_block_log; ++ struct ibv_context *context = &xctx->ibv_ctx.context; ++ off_t offset; ++ ++ xsc_alloc_get_env_info(&max_block_log, ++ &min_block_log, ++ component); ++ ++ block_size_exp = xsc_get_block_order(size); ++ ++ if (block_size_exp > max_block_log) ++ block_size_exp = max_block_log; ++ ++ do { ++ offset = 0; ++ set_command(XSC_IB_MMAP_GET_CONTIGUOUS_PAGES, &offset); ++ set_order(block_size_exp, &offset); ++ addr = mmap(NULL , size, PROT_WRITE | PROT_READ, MAP_SHARED, ++ context->cmd_fd, page_size * offset); ++ if (addr != MAP_FAILED) ++ break; ++ ++ /* ++ * The kernel returns EINVAL if not supported ++ */ ++ if (errno == EINVAL) ++ return -1; ++ ++ block_size_exp -= 1; ++ } while (block_size_exp >= min_block_log); ++ xsc_dbg(xctx->dbg_fp, XSC_DBG_CONTIG, "block order %d, addr %p\n", block_size_exp, addr); ++ ++ if (addr == MAP_FAILED) ++ return -1; ++ ++ if (ibv_dontfork_range(addr, size)) { ++ munmap(addr, size); ++ return -1; ++ } ++ ++ buf->buf = addr; ++ buf->length = size; ++ buf->type = XSC_ALLOC_TYPE_CONTIG; ++ ++ return 0; ++} ++ ++void xsc_free_buf_contig(struct xsc_context *xctx, struct xsc_buf *buf) ++{ ++ ibv_dofork_range(buf->buf, buf->length); ++ munmap(buf->buf, buf->length); ++} ++ ++int xsc_alloc_buf(struct xsc_buf *buf, size_t size, int page_size) ++{ ++ int ret; ++ int al_size; ++ ++ al_size = align(size, page_size); ++ ret = posix_memalign(&buf->buf, page_size, al_size); ++ if (ret) ++ return ret; ++ ++ ret = ibv_dontfork_range(buf->buf, al_size); ++ if (ret) ++ free(buf->buf); ++ ++ if (!ret) { ++ buf->length = al_size; ++ buf->type = XSC_ALLOC_TYPE_ANON; ++ } ++ ++ return ret; ++} ++ ++void xsc_free_buf(struct xsc_buf *buf) ++{ ++ ibv_dofork_range(buf->buf, buf->length); ++ free(buf->buf); ++} +diff --git a/providers/xscale/cq.c b/providers/xscale/cq.c +new file mode 100644 +index 0000000..e2619f0 +--- /dev/null ++++ b/providers/xscale/cq.c +@@ -0,0 +1,1410 @@ ++/* ++ * Copyright (c) 2021 - 2022, Shanghai Yunsilicon Technology Co., Ltd. ++ * All rights reserved. ++ */ ++ ++#include ++ ++#include ++#include ++#include ++#include ++#include ++#include ++ ++#include ++#include ++#include ++ ++#include "xscale.h" ++#include "wqe.h" ++#include "xsc_hsi.h" ++ ++enum { ++ CQ_OK = 0, ++ CQ_EMPTY = -1, ++ CQ_POLL_ERR = -2 ++}; ++ ++enum { ++ XSC_CQE_APP_TAG_MATCHING = 1, ++}; ++ ++enum { ++ XSC_CQE_APP_OP_TM_CONSUMED = 0x1, ++ XSC_CQE_APP_OP_TM_EXPECTED = 0x2, ++ XSC_CQE_APP_OP_TM_UNEXPECTED = 0x3, ++ XSC_CQE_APP_OP_TM_NO_TAG = 0x4, ++ XSC_CQE_APP_OP_TM_APPEND = 0x5, ++ XSC_CQE_APP_OP_TM_REMOVE = 0x6, ++ XSC_CQE_APP_OP_TM_NOOP = 0x7, ++ XSC_CQE_APP_OP_TM_CONSUMED_SW_RDNV = 0x9, ++ XSC_CQE_APP_OP_TM_CONSUMED_MSG = 0xA, ++ XSC_CQE_APP_OP_TM_CONSUMED_MSG_SW_RDNV = 0xB, ++ XSC_CQE_APP_OP_TM_MSG_COMPLETION_CANCELED = 0xC, ++}; ++ ++static const uint32_t xsc_msg_opcode[][2][2] = { ++ [XSC_MSG_OPCODE_SEND][XSC_REQ][XSC_WITHOUT_IMMDT] = XSC_OPCODE_RDMA_REQ_SEND, ++ [XSC_MSG_OPCODE_SEND][XSC_REQ][XSC_WITH_IMMDT] = XSC_OPCODE_RDMA_REQ_SEND_IMMDT, ++ [XSC_MSG_OPCODE_SEND][XSC_RSP][XSC_WITHOUT_IMMDT] = XSC_OPCODE_RDMA_RSP_RECV, ++ [XSC_MSG_OPCODE_SEND][XSC_RSP][XSC_WITH_IMMDT] = XSC_OPCODE_RDMA_RSP_RECV_IMMDT, ++ [XSC_MSG_OPCODE_RDMA_WRITE][XSC_REQ][XSC_WITHOUT_IMMDT] = XSC_OPCODE_RDMA_REQ_WRITE, ++ [XSC_MSG_OPCODE_RDMA_WRITE][XSC_REQ][XSC_WITH_IMMDT] = XSC_OPCODE_RDMA_REQ_WRITE_IMMDT, ++ [XSC_MSG_OPCODE_RDMA_WRITE][XSC_RSP][XSC_WITHOUT_IMMDT] = XSC_OPCODE_RDMA_CQE_ERROR, ++ [XSC_MSG_OPCODE_RDMA_WRITE][XSC_RSP][XSC_WITH_IMMDT] = XSC_OPCODE_RDMA_RSP_WRITE_IMMDT, ++ [XSC_MSG_OPCODE_RDMA_READ][XSC_REQ][XSC_WITHOUT_IMMDT] = XSC_OPCODE_RDMA_REQ_READ, ++ [XSC_MSG_OPCODE_RDMA_READ][XSC_REQ][XSC_WITH_IMMDT] = XSC_OPCODE_RDMA_CQE_ERROR, ++ [XSC_MSG_OPCODE_RDMA_READ][XSC_RSP][XSC_WITHOUT_IMMDT] = XSC_OPCODE_RDMA_CQE_ERROR, ++ [XSC_MSG_OPCODE_RDMA_READ][XSC_RSP][XSC_WITH_IMMDT] = XSC_OPCODE_RDMA_CQE_ERROR, ++}; ++ ++static const uint32_t xsc_cqe_opcode[] = { ++ [XSC_OPCODE_RDMA_REQ_SEND] = IBV_WC_SEND, ++ [XSC_OPCODE_RDMA_REQ_SEND_IMMDT] = IBV_WC_SEND, ++ [XSC_OPCODE_RDMA_RSP_RECV] = IBV_WC_RECV, ++ [XSC_OPCODE_RDMA_RSP_RECV_IMMDT] = IBV_WC_RECV, ++ [XSC_OPCODE_RDMA_REQ_WRITE] = IBV_WC_RDMA_WRITE, ++ [XSC_OPCODE_RDMA_REQ_WRITE_IMMDT] = IBV_WC_RDMA_WRITE, ++ [XSC_OPCODE_RDMA_RSP_WRITE_IMMDT] = IBV_WC_RECV_RDMA_WITH_IMM, ++ [XSC_OPCODE_RDMA_REQ_READ] = IBV_WC_RDMA_READ, ++}; ++ ++int xsc_stall_num_loop = 60; ++int xsc_stall_cq_poll_min = 60; ++int xsc_stall_cq_poll_max = 100000; ++int xsc_stall_cq_inc_step = 100; ++int xsc_stall_cq_dec_step = 10; ++ ++static inline uint8_t xsc_get_cqe_opcode(struct xsc_cqe *cqe) ALWAYS_INLINE; ++static inline uint8_t xsc_get_cqe_opcode(struct xsc_cqe *cqe) ++{ ++ if (cqe->is_error) ++ return cqe->type ? XSC_OPCODE_RDMA_RSP_ERROR : XSC_OPCODE_RDMA_REQ_ERROR; ++ if (cqe->msg_opcode > XSC_MSG_OPCODE_RDMA_READ) { ++ printf("rdma cqe msg code should be send/write/read\n"); ++ return XSC_OPCODE_RDMA_CQE_ERROR; ++ } ++ return xsc_msg_opcode[cqe->msg_opcode][cqe->type][cqe->with_immdt]; ++} ++ ++static inline uint8_t get_cqe_l3_hdr_type(struct xsc_cqe64 *cqe) ++{ ++ return (cqe->l4_hdr_type_etc >> 2) & 0x3; ++} ++ ++static void *get_cqe(struct xsc_cq *cq, int n) ++{ ++ return cq->active_buf->buf + n * cq->cqe_sz; ++} ++ ++static void *get_sw_cqe(struct xsc_cq *cq, int n) ++{ ++ int cid = n & (cq->verbs_cq.cq_ex.cqe - 1); ++ struct xsc_cqe *cqe = get_cqe(cq, cid); ++ if (likely(xsc_get_cqe_sw_own(cqe, n, cq->log2_cq_ring_sz))) ++ return cqe; ++ else ++ return NULL; ++} ++ ++static void *next_cqe_sw(struct xsc_cq *cq) ++{ ++ return get_sw_cqe(cq, cq->cons_index); ++} ++ ++static void update_cons_index(struct xsc_cq *cq) ++{ ++ union xsc_db_data db; ++ ++ db.raw_data = cq->cons_index; ++ db.cqn = cq->cqn; ++ WR_REG(cq->db, db.raw_data); ++} ++ ++static inline void handle_good_req( ++ struct ibv_wc *wc, struct xsc_cqe *cqe, struct xsc_qp *qp, struct xsc_wq *wq, uint8_t opcode) ++{ ++ int idx; ++ struct xsc_send_wqe_ctrl_seg *ctrl; ++ ++ wc->opcode = xsc_cqe_opcode[opcode]; ++ wc->status = IBV_WC_SUCCESS; ++ idx = RD_LE_16(cqe->wqe_id); ++ idx >>= (qp->sq.wqe_shift - XSC_BASE_WQE_SHIFT); ++ idx &= (wq->wqe_cnt -1); ++ wc->wr_id = wq->wrid[idx]; ++ wq->tail = wq->wqe_head[idx] + 1; ++ if (opcode == XSC_OPCODE_RDMA_REQ_READ) { ++ ctrl = xsc_get_send_wqe(qp, idx); ++ wc->byte_len = ctrl->msg_len; ++ } ++ wq->flush_wqe_cnt--; ++ ++ xsc_dbg(to_xctx(qp->ibv_qp->context)->dbg_fp, XSC_DBG_CQ_CQE, ++ "wqeid:%u, wq tail:%u\n", idx, wq->tail); ++} ++ ++/* Returns IBV_WC_IP_CSUM_OK or 0 */ ++static inline int get_csum_ok(struct xsc_cqe64 *cqe) ++{ ++ return (((cqe->hds_ip_ext & (XSC_CQE_L4_OK | XSC_CQE_L3_OK)) == ++ (XSC_CQE_L4_OK | XSC_CQE_L3_OK)) & ++ (get_cqe_l3_hdr_type(cqe) == XSC_CQE_L3_HDR_TYPE_IPV4)) ++ << IBV_WC_IP_CSUM_OK_SHIFT; ++} ++ ++static inline void handle_good_responder( ++ struct ibv_wc *wc, struct xsc_cqe *cqe, struct xsc_wq *wq, uint8_t opcode) ++{ ++ uint16_t idx; ++ struct xsc_qp *qp = container_of(wq, struct xsc_qp, rq); ++ ++ wc->byte_len = RD_LE_32(cqe->msg_len); ++ wc->opcode = xsc_cqe_opcode[opcode]; ++ wc->status = IBV_WC_SUCCESS; ++ ++ idx = wq->tail & (wq->wqe_cnt - 1); ++ wc->wr_id = wq->wrid[idx]; ++ ++wq->tail; ++ wq->flush_wqe_cnt--; ++ ++ xsc_dbg(to_xctx(qp->ibv_qp->context)->dbg_fp, XSC_DBG_CQ_CQE, ++ "recv cqe idx:%u, len:%u\n", idx, wc->byte_len); ++} ++ ++static void dump_cqe(void *buf) ++{ ++ __le32 *p = buf; ++ int i; ++ ++ for (i = 0; i < 8; i += 4) ++ printf("0x%08x 0x%08x 0x%08x 0x%08x\n", p[i], p[i+1], p[i+2], p[i+3]); ++} ++ ++static enum ibv_wc_status xsc_cqe_error_code(struct xsc_cqe *cqe) ++{ ++ switch (cqe->error_code) { ++ case XSC_ERR_CODE_NAK_RETRY: ++ return IBV_WC_RETRY_EXC_ERR; ++ case XSC_ERR_CODE_NAK_OPCODE: ++ return IBV_WC_BAD_RESP_ERR; ++ case XSC_ERR_CODE_NAK_MR: ++ return IBV_WC_REM_ACCESS_ERR; ++ case XSC_ERR_CODE_NAK_OPERATION: ++ return IBV_WC_REM_OP_ERR; ++ case XSC_ERR_CODE_NAK_RNR: ++ return IBV_WC_RNR_RETRY_EXC_ERR; ++ case XSC_ERR_CODE_LOCAL_MR: ++ return IBV_WC_LOC_PROT_ERR; ++ case XSC_ERR_CODE_LOCAL_LEN: ++ return IBV_WC_LOC_LEN_ERR; ++ case XSC_ERR_CODE_LEN_GEN_CQE: ++ return IBV_WC_LOC_LEN_ERR; ++ case XSC_ERR_CODE_OPERATION: ++ return IBV_WC_LOC_ACCESS_ERR; ++ case XSC_ERR_CODE_FLUSH: ++ return IBV_WC_WR_FLUSH_ERR; ++ case XSC_ERR_CODE_MALF_WQE_HOST: ++ case XSC_ERR_CODE_STRG_ACC_GEN_CQE: ++ return IBV_WC_FATAL_ERR; ++ case XSC_ERR_CODE_OPCODE_GEN_CQE: ++ case XSC_ERR_CODE_LOCAL_OPCODE: ++ default: ++ return IBV_WC_GENERAL_ERR; ++ } ++} ++ ++ ++static inline bool xsc_qp_need_cqe(struct xsc_qp *qp, int *type, int *wqe_id) ++{ ++ struct xsc_wq *wq; ++ struct xsc_send_wqe_ctrl_seg *ctrl; ++ int idx = 0; ++ ++ /* check recv queue work request */ ++ wq = &qp->rq; ++ if (wq->head - wq->tail > 0) { ++ *type = 1; ++ return true; ++ } ++ /* check send queue work request */ ++ wq = &qp->sq; ++ while (wq->head - wq->tail > 0) { ++ idx = wq->tail & (wq->wqe_cnt - 1); ++ ++wq->tail; ++ ctrl = xsc_get_send_wqe(qp, idx); ++ if (ctrl->ce) { ++ *type = 0; ++ *wqe_id = idx << (qp->sq.wqe_shift - XSC_BASE_WQE_SHIFT); ++ return true; ++ } ++ } ++ return false; ++} ++ ++static inline void handle_bad_req( ++ struct xsc_context *xctx, ++ struct ibv_wc *wc, struct xsc_cqe *cqe, struct xsc_qp *qp, struct xsc_wq *wq) ++{ ++ int idx; ++ wc->status = xsc_cqe_error_code(cqe); ++ wc->vendor_err = cqe->error_code; ++ idx = RD_LE_16(cqe->wqe_id); ++ idx >>= (qp->sq.wqe_shift - XSC_BASE_WQE_SHIFT); ++ idx &= (wq->wqe_cnt -1); ++ wq->tail = wq->wqe_head[idx] + 1; ++ wc->wr_id = wq->wrid[idx]; ++ wq->flush_wqe_cnt--; ++ ++ if (cqe->error_code != XSC_ERR_CODE_FLUSH) { ++ printf("%s: got completion with error:\n", xctx->hostname); ++ dump_cqe(cqe); ++ } ++} ++ ++static inline void handle_bad_responder( ++ struct xsc_context *xctx, ++ struct ibv_wc *wc, struct xsc_cqe *cqe, struct xsc_wq *wq) ++{ ++ wc->status = xsc_cqe_error_code(cqe); ++ wc->vendor_err = cqe->error_code; ++ ++ ++wq->tail; ++ wq->flush_wqe_cnt--; ++ ++ if (cqe->error_code != XSC_ERR_CODE_FLUSH) { ++ printf("%s: got completion with error:\n", xctx->hostname); ++ dump_cqe(cqe); ++ } ++} ++ ++#if defined(__x86_64__) || defined (__i386__) ++static inline unsigned long get_cycles(void) ++{ ++ uint32_t low, high; ++ uint64_t val; ++ asm volatile ("rdtsc" : "=a" (low), "=d" (high)); ++ val = high; ++ val = (val << 32) | low; ++ return val; ++} ++ ++static void xsc_stall_poll_cq(void) ++{ ++ int i; ++ ++ for (i = 0; i < xsc_stall_num_loop; i++) ++ (void)get_cycles(); ++} ++static void xsc_stall_cycles_poll_cq(uint64_t cycles) ++{ ++ while (get_cycles() < cycles) ++ ; /* Nothing */ ++} ++static void xsc_get_cycles(uint64_t *cycles) ++{ ++ *cycles = get_cycles(); ++} ++#else ++static void xsc_stall_poll_cq(void) ++{ ++} ++static void xsc_stall_cycles_poll_cq(uint64_t cycles) ++{ ++} ++static void xsc_get_cycles(uint64_t *cycles) ++{ ++} ++#endif ++ ++static inline int get_qp_ctx(struct xsc_context *xctx, ++ struct xsc_resource **cur_rsc, ++ uint32_t qpn) ++ ALWAYS_INLINE; ++static inline int get_qp_ctx(struct xsc_context *xctx, ++ struct xsc_resource **cur_rsc, ++ uint32_t qpn) ++{ ++ if (!*cur_rsc || (qpn != (*cur_rsc)->rsn)) { ++ /* ++ * We do not have to take the QP table lock here, ++ * because CQs will be locked while QPs are removed ++ * from the table. ++ */ ++ *cur_rsc = (struct xsc_resource *)xsc_find_qp(xctx, qpn); ++ if (unlikely(!*cur_rsc)) ++ return CQ_POLL_ERR; ++ } ++ ++ return CQ_OK; ++} ++ ++static inline int xsc_get_next_cqe(struct xsc_cq *cq, ++ struct xsc_cqe64 **pcqe64, ++ void **pcqe) ++ ALWAYS_INLINE; ++static inline int xsc_get_next_cqe(struct xsc_cq *cq, ++ struct xsc_cqe64 **pcqe64, ++ void **pcqe) ++{ ++ void *cqe = next_cqe_sw(cq); ++ if (!cqe) ++ return CQ_EMPTY; ++ ++ ++cq->cons_index; ++ ++ /* ++ * Make sure we read CQ entry contents after we've checked the ++ * ownership bit. ++ */ ++ udma_from_device_barrier(); ++ ++ *pcqe = cqe; ++ ++ return CQ_OK; ++} ++ ++static inline int xsc_parse_cqe(struct xsc_cq *cq, ++ struct xsc_cqe *cqe, ++ struct xsc_resource **cur_rsc, ++ struct ibv_wc *wc, ++ int lazy) ++{ ++ struct xsc_wq *wq; ++ uint32_t qp_id; ++ uint8_t opcode; ++ int err = 0; ++ struct xsc_qp *xqp = NULL; ++ struct xsc_context *xctx; ++ ++ xctx = to_xctx(ibv_cq_ex_to_cq(&cq->verbs_cq.cq_ex)->context); ++ qp_id = cqe->qp_id; ++ qp_id = RD_LE_16(qp_id); ++ wc->wc_flags = 0; ++ wc->qp_num = qp_id; ++ opcode = xsc_get_cqe_opcode(cqe); ++ ++ xsc_dbg(xctx->dbg_fp, XSC_DBG_CQ_CQE, "opcode:0x%x qp_num:%u\n", opcode, qp_id); ++ switch (opcode) { ++ case XSC_OPCODE_RDMA_REQ_SEND_IMMDT: ++ case XSC_OPCODE_RDMA_REQ_WRITE_IMMDT: ++ wc->wc_flags |= IBV_WC_WITH_IMM; ++ SWITCH_FALLTHROUGH; ++ case XSC_OPCODE_RDMA_REQ_SEND: ++ case XSC_OPCODE_RDMA_REQ_WRITE: ++ case XSC_OPCODE_RDMA_REQ_READ: ++ err = get_qp_ctx(xctx, cur_rsc, qp_id); ++ if (unlikely(err)) ++ return CQ_EMPTY; ++ xqp = rsc_to_xqp(*cur_rsc); ++ wq = &xqp->sq; ++ handle_good_req(wc, cqe, xqp, wq, opcode); ++ break; ++ case XSC_OPCODE_RDMA_RSP_RECV_IMMDT: ++ case XSC_OPCODE_RDMA_RSP_WRITE_IMMDT: ++ wc->wc_flags |= IBV_WC_WITH_IMM; ++ wc->imm_data = cqe->imm_data; ++ SWITCH_FALLTHROUGH; ++ case XSC_OPCODE_RDMA_RSP_RECV: ++ err = get_qp_ctx(xctx, cur_rsc, qp_id); ++ if (unlikely(err)) ++ return CQ_EMPTY; ++ xqp = rsc_to_xqp(*cur_rsc); ++ wq = &xqp->rq; ++ handle_good_responder(wc, cqe, wq, opcode); ++ break; ++ case XSC_OPCODE_RDMA_REQ_ERROR: ++ err = get_qp_ctx(xctx, cur_rsc, qp_id); ++ if (unlikely(err)) ++ return CQ_POLL_ERR; ++ xqp = rsc_to_xqp(*cur_rsc); ++ wq = &xqp->sq; ++ handle_bad_req(xctx, wc, cqe, xqp, wq); ++ break; ++ case XSC_OPCODE_RDMA_RSP_ERROR: ++ err = get_qp_ctx(xctx, cur_rsc, qp_id); ++ if (unlikely(err)) ++ return CQ_POLL_ERR; ++ xqp = rsc_to_xqp(*cur_rsc); ++ wq = &xqp->rq; ++ handle_bad_responder(xctx, wc, cqe, wq); ++ break; ++ case XSC_OPCODE_RDMA_CQE_ERROR: ++ printf("%s: got completion with cqe format error:\n", xctx->hostname); ++ dump_cqe(cqe); ++ SWITCH_FALLTHROUGH; ++ default: ++ return CQ_POLL_ERR; ++ } ++ return CQ_OK; ++} ++ ++static inline int xsc_parse_lazy_cqe(struct xsc_cq *cq, ++ struct xsc_cqe64 *cqe64, ++ void *cqe, int cqe_ver) ++ ALWAYS_INLINE; ++static inline int xsc_parse_lazy_cqe(struct xsc_cq *cq, ++ struct xsc_cqe64 *cqe64, ++ void *cqe, int cqe_ver) ++{ ++ return xsc_parse_cqe(cq, cqe, &cq->cur_rsc, NULL, 1); ++} ++ ++static inline int xsc_poll_one(struct xsc_cq *cq, ++ struct xsc_resource **cur_rsc, ++ struct ibv_wc *wc) ++ ALWAYS_INLINE; ++static inline int xsc_poll_one(struct xsc_cq *cq, ++ struct xsc_resource **cur_rsc, ++ struct ibv_wc *wc) ++{ ++ struct xsc_cqe *cqe = get_sw_cqe(cq, cq->cons_index); ++ if (cqe == NULL) { ++ return CQ_EMPTY; ++ } ++ memset(wc, 0, sizeof(*wc)); ++ ++ ++cq->cons_index; ++ ++ /* ++ * Make sure we read CQ entry contents after we've checked the ++ * ownership bit. ++ */ ++ udma_from_device_barrier(); ++ return xsc_parse_cqe(cq, cqe, cur_rsc, wc, 0); ++} ++ ++static inline void gen_flush_err_cqe(struct xsc_err_state_qp_node *err_node, ++ uint32_t qp_id, struct xsc_wq *wq, uint32_t idx, ++ struct ibv_wc *wc) ++{ ++ memset(wc, 0, sizeof(*wc)); ++ if (err_node->is_sq) { ++ switch (wq->wr_opcode[idx]){ ++ case IBV_WR_SEND: ++ case IBV_WR_SEND_WITH_IMM: ++ case IBV_WR_SEND_WITH_INV: ++ wc->opcode = IBV_WC_SEND; ++ break; ++ case IBV_WR_RDMA_WRITE: ++ case IBV_WR_RDMA_WRITE_WITH_IMM: ++ wc->opcode = IBV_WC_RDMA_WRITE; ++ break; ++ case IBV_WR_RDMA_READ: ++ wc->opcode = IBV_WC_RDMA_READ; ++ } ++ } else { ++ wc->opcode = IBV_WC_RECV; ++ } ++ ++ wc->qp_num = qp_id; ++ wc->status = IBV_WC_WR_FLUSH_ERR; ++ wc->vendor_err = XSC_ERR_CODE_FLUSH; ++ wc->wr_id = wq->wrid[idx]; ++ wq->tail++; ++ wq->flush_wqe_cnt--; ++} ++ ++static inline int xsc_generate_flush_err_cqe(struct ibv_cq *ibcq, ++ int ne, int *npolled, struct ibv_wc *wc) ++{ ++ uint32_t qp_id = 0; ++ uint32_t flush_wqe_cnt = 0; ++ int sw_npolled = 0; ++ int ret = 0; ++ uint32_t idx = 0; ++ struct xsc_err_state_qp_node *err_qp_node, *tmp; ++ struct xsc_resource *res = NULL; ++ struct xsc_context *xctx = to_xctx(ibcq->context); ++ struct xsc_cq *cq = to_xcq(ibcq); ++ struct xsc_wq *wq; ++ ++ list_for_each_safe(&cq->err_state_qp_list, err_qp_node, tmp, entry) { ++ if (!err_qp_node) ++ break; ++ ++ sw_npolled = 0; ++ qp_id = err_qp_node->qp_id; ++ ret = get_qp_ctx(xctx, &res, qp_id); ++ if (unlikely(ret)) ++ continue; ++ wq = err_qp_node->is_sq ? &(rsc_to_xqp(res)->sq):&(rsc_to_xqp(res)->rq); ++ flush_wqe_cnt = wq->flush_wqe_cnt; ++ xsc_dbg(xctx->dbg_fp, XSC_DBG_CQ_CQE, "is_sq %d, flush_wq_cnt %d, ne %d, npolled %d, qp_id %d\n", ++ err_qp_node->is_sq, wq->flush_wqe_cnt, ne, *npolled, qp_id); ++ ++ if (flush_wqe_cnt <= (ne - *npolled)) { ++ while (sw_npolled < flush_wqe_cnt) { ++ idx = wq->tail & (wq->wqe_cnt - 1); ++ if (err_qp_node->is_sq && !wq->need_flush[idx]) { ++ wq->tail++; ++ continue; ++ } else { ++ gen_flush_err_cqe(err_qp_node, err_qp_node->qp_id, wq, ++ idx, wc + *npolled + sw_npolled); ++ ++sw_npolled; ++ } ++ } ++ list_del(&err_qp_node->entry); ++ free(err_qp_node); ++ *npolled += sw_npolled; ++ } else { ++ while (sw_npolled < (ne - *npolled)) { ++ idx = wq->tail & (wq->wqe_cnt - 1); ++ if (err_qp_node->is_sq && !wq->need_flush[idx]) { ++ wq->tail++; ++ continue; ++ } else { ++ gen_flush_err_cqe(err_qp_node, err_qp_node->qp_id, wq, ++ idx, wc + *npolled + sw_npolled); ++ ++sw_npolled; ++ } ++ } ++ *npolled = ne; ++ break; ++ } ++ } ++ ++ return 0; ++} ++ ++static inline int poll_cq(struct ibv_cq *ibcq, int ne, struct ibv_wc *wc) ALWAYS_INLINE; ++static inline int poll_cq(struct ibv_cq *ibcq, int ne, struct ibv_wc *wc) ++{ ++ struct xsc_cq *cq = to_xcq(ibcq); ++ struct xsc_resource *rsc = NULL; ++ int npolled = 0; ++ int err = CQ_OK; ++ uint32_t next_cid = cq->cons_index; ++ ++ xsc_spin_lock(&cq->lock); ++ for (npolled = 0; npolled < ne; ++npolled) { ++ err = xsc_poll_one(cq, &rsc, wc + npolled); ++ if (err != CQ_OK) ++ break; ++ } ++ ++ if (err == CQ_EMPTY) { ++ if (npolled < ne && !(list_empty(&cq->err_state_qp_list))) { ++ xsc_generate_flush_err_cqe(ibcq, ne, &npolled, wc); ++ } ++ } ++ ++ udma_to_device_barrier(); ++ if (next_cid != cq->cons_index) ++ update_cons_index(cq); ++ xsc_spin_unlock(&cq->lock); ++ ++ return err == CQ_POLL_ERR ? err : npolled; ++} ++ ++enum polling_mode { ++ POLLING_MODE_NO_STALL, ++ POLLING_MODE_STALL, ++ POLLING_MODE_STALL_ADAPTIVE ++}; ++ ++static inline void _xsc_end_poll(struct ibv_cq_ex *ibcq, ++ int lock, enum polling_mode stall) ++ ALWAYS_INLINE; ++static inline void _xsc_end_poll(struct ibv_cq_ex *ibcq, ++ int lock, enum polling_mode stall) ++{ ++ struct xsc_cq *cq = to_xcq(ibv_cq_ex_to_cq(ibcq)); ++ ++ update_cons_index(cq); ++ ++ if (lock) ++ xsc_spin_unlock(&cq->lock); ++ ++ if (stall) { ++ if (stall == POLLING_MODE_STALL_ADAPTIVE) { ++ if (!(cq->flags & XSC_CQ_FLAGS_FOUND_CQES)) { ++ cq->stall_cycles = max(cq->stall_cycles - xsc_stall_cq_dec_step, ++ xsc_stall_cq_poll_min); ++ xsc_get_cycles(&cq->stall_last_count); ++ } else if (cq->flags & XSC_CQ_FLAGS_EMPTY_DURING_POLL) { ++ cq->stall_cycles = min(cq->stall_cycles + xsc_stall_cq_inc_step, ++ xsc_stall_cq_poll_max); ++ xsc_get_cycles(&cq->stall_last_count); ++ } else { ++ cq->stall_cycles = max(cq->stall_cycles - xsc_stall_cq_dec_step, ++ xsc_stall_cq_poll_min); ++ cq->stall_last_count = 0; ++ } ++ } else if (!(cq->flags & XSC_CQ_FLAGS_FOUND_CQES)) { ++ cq->stall_next_poll = 1; ++ } ++ ++ cq->flags &= ~(XSC_CQ_FLAGS_FOUND_CQES | XSC_CQ_FLAGS_EMPTY_DURING_POLL); ++ } ++} ++ ++static inline int xsc_start_poll(struct ibv_cq_ex *ibcq, struct ibv_poll_cq_attr *attr, ++ int lock, enum polling_mode stall, ++ int cqe_version, int clock_update) ++ ALWAYS_INLINE; ++static inline int xsc_start_poll(struct ibv_cq_ex *ibcq, struct ibv_poll_cq_attr *attr, ++ int lock, enum polling_mode stall, ++ int cqe_version, int clock_update) ++{ ++ struct xsc_cq *cq = to_xcq(ibv_cq_ex_to_cq(ibcq)); ++ struct xsc_cqe64 *cqe64; ++ void *cqe; ++ int err; ++ ++ if (unlikely(attr->comp_mask)) ++ return EINVAL; ++ ++ if (stall) { ++ if (stall == POLLING_MODE_STALL_ADAPTIVE) { ++ if (cq->stall_last_count) ++ xsc_stall_cycles_poll_cq(cq->stall_last_count + cq->stall_cycles); ++ } else if (cq->stall_next_poll) { ++ cq->stall_next_poll = 0; ++ xsc_stall_poll_cq(); ++ } ++ } ++ ++ if (lock) ++ xsc_spin_lock(&cq->lock); ++ ++ cq->cur_rsc = NULL; ++ ++ err = xsc_get_next_cqe(cq, &cqe64, &cqe); ++ if (err == CQ_EMPTY) { ++ if (lock) ++ xsc_spin_unlock(&cq->lock); ++ ++ if (stall) { ++ if (stall == POLLING_MODE_STALL_ADAPTIVE) { ++ cq->stall_cycles = max(cq->stall_cycles - xsc_stall_cq_dec_step, ++ xsc_stall_cq_poll_min); ++ xsc_get_cycles(&cq->stall_last_count); ++ } else { ++ cq->stall_next_poll = 1; ++ } ++ } ++ ++ return ENOENT; ++ } ++ ++ if (stall) ++ cq->flags |= XSC_CQ_FLAGS_FOUND_CQES; ++ ++ err = xsc_parse_lazy_cqe(cq, cqe64, cqe, cqe_version); ++ if (lock && err) ++ xsc_spin_unlock(&cq->lock); ++ ++ if (stall && err) { ++ if (stall == POLLING_MODE_STALL_ADAPTIVE) { ++ cq->stall_cycles = max(cq->stall_cycles - xsc_stall_cq_dec_step, ++ xsc_stall_cq_poll_min); ++ cq->stall_last_count = 0; ++ } ++ ++ cq->flags &= ~(XSC_CQ_FLAGS_FOUND_CQES); ++ ++ goto out; ++ } ++ ++ if (clock_update && !err) ++ err = xscdv_get_clock_info(ibcq->context, &cq->last_clock_info); ++ ++out: ++ return err; ++} ++ ++static inline int xsc_next_poll(struct ibv_cq_ex *ibcq, ++ enum polling_mode stall, int cqe_version) ++ ALWAYS_INLINE; ++static inline int xsc_next_poll(struct ibv_cq_ex *ibcq, ++ enum polling_mode stall, ++ int cqe_version) ++{ ++ struct xsc_cq *cq = to_xcq(ibv_cq_ex_to_cq(ibcq)); ++ struct xsc_cqe64 *cqe64; ++ void *cqe; ++ int err; ++ ++ err = xsc_get_next_cqe(cq, &cqe64, &cqe); ++ if (err == CQ_EMPTY) { ++ if (stall == POLLING_MODE_STALL_ADAPTIVE) ++ cq->flags |= XSC_CQ_FLAGS_EMPTY_DURING_POLL; ++ ++ return ENOENT; ++ } ++ ++ return xsc_parse_lazy_cqe(cq, cqe64, cqe, cqe_version); ++} ++ ++static inline int xsc_next_poll_adaptive_v0(struct ibv_cq_ex *ibcq) ++{ ++ return xsc_next_poll(ibcq, POLLING_MODE_STALL_ADAPTIVE, 0); ++} ++ ++static inline int xsc_next_poll_adaptive_v1(struct ibv_cq_ex *ibcq) ++{ ++ return xsc_next_poll(ibcq, POLLING_MODE_STALL_ADAPTIVE, 1); ++} ++ ++static inline int xsc_next_poll_v0(struct ibv_cq_ex *ibcq) ++{ ++ return xsc_next_poll(ibcq, 0, 0); ++} ++ ++static inline int xsc_next_poll_v1(struct ibv_cq_ex *ibcq) ++{ ++ return xsc_next_poll(ibcq, 0, 1); ++} ++ ++static inline int xsc_start_poll_v0(struct ibv_cq_ex *ibcq, ++ struct ibv_poll_cq_attr *attr) ++{ ++ return xsc_start_poll(ibcq, attr, 0, 0, 0, 0); ++} ++ ++static inline int xsc_start_poll_v1(struct ibv_cq_ex *ibcq, ++ struct ibv_poll_cq_attr *attr) ++{ ++ return xsc_start_poll(ibcq, attr, 0, 0, 1, 0); ++} ++ ++static inline int xsc_start_poll_v0_lock(struct ibv_cq_ex *ibcq, ++ struct ibv_poll_cq_attr *attr) ++{ ++ return xsc_start_poll(ibcq, attr, 1, 0, 0, 0); ++} ++ ++static inline int xsc_start_poll_v1_lock(struct ibv_cq_ex *ibcq, ++ struct ibv_poll_cq_attr *attr) ++{ ++ return xsc_start_poll(ibcq, attr, 1, 0, 1, 0); ++} ++ ++static inline int xsc_start_poll_adaptive_stall_v0_lock(struct ibv_cq_ex *ibcq, ++ struct ibv_poll_cq_attr *attr) ++{ ++ return xsc_start_poll(ibcq, attr, 1, POLLING_MODE_STALL_ADAPTIVE, 0, 0); ++} ++ ++static inline int xsc_start_poll_stall_v0_lock(struct ibv_cq_ex *ibcq, ++ struct ibv_poll_cq_attr *attr) ++{ ++ return xsc_start_poll(ibcq, attr, 1, POLLING_MODE_STALL, 0, 0); ++} ++ ++static inline int xsc_start_poll_adaptive_stall_v1_lock(struct ibv_cq_ex *ibcq, ++ struct ibv_poll_cq_attr *attr) ++{ ++ return xsc_start_poll(ibcq, attr, 1, POLLING_MODE_STALL_ADAPTIVE, 1, 0); ++} ++ ++static inline int xsc_start_poll_stall_v1_lock(struct ibv_cq_ex *ibcq, ++ struct ibv_poll_cq_attr *attr) ++{ ++ return xsc_start_poll(ibcq, attr, 1, POLLING_MODE_STALL, 1, 0); ++} ++ ++static inline int xsc_start_poll_stall_v0(struct ibv_cq_ex *ibcq, ++ struct ibv_poll_cq_attr *attr) ++{ ++ return xsc_start_poll(ibcq, attr, 0, POLLING_MODE_STALL, 0, 0); ++} ++ ++static inline int xsc_start_poll_adaptive_stall_v0(struct ibv_cq_ex *ibcq, ++ struct ibv_poll_cq_attr *attr) ++{ ++ return xsc_start_poll(ibcq, attr, 0, POLLING_MODE_STALL_ADAPTIVE, 0, 0); ++} ++ ++static inline int xsc_start_poll_adaptive_stall_v1(struct ibv_cq_ex *ibcq, ++ struct ibv_poll_cq_attr *attr) ++{ ++ return xsc_start_poll(ibcq, attr, 0, POLLING_MODE_STALL_ADAPTIVE, 1, 0); ++} ++ ++static inline int xsc_start_poll_stall_v1(struct ibv_cq_ex *ibcq, ++ struct ibv_poll_cq_attr *attr) ++{ ++ return xsc_start_poll(ibcq, attr, 0, POLLING_MODE_STALL, 1, 0); ++} ++ ++static inline int xsc_start_poll_v0_lock_clock_update(struct ibv_cq_ex *ibcq, ++ struct ibv_poll_cq_attr *attr) ++{ ++ return xsc_start_poll(ibcq, attr, 1, 0, 0, 1); ++} ++ ++static inline int xsc_start_poll_v1_lock_clock_update(struct ibv_cq_ex *ibcq, ++ struct ibv_poll_cq_attr *attr) ++{ ++ return xsc_start_poll(ibcq, attr, 1, 0, 1, 1); ++} ++ ++static inline int xsc_start_poll_v1_clock_update(struct ibv_cq_ex *ibcq, ++ struct ibv_poll_cq_attr *attr) ++{ ++ return xsc_start_poll(ibcq, attr, 0, 0, 1, 1); ++} ++ ++static inline int xsc_start_poll_v0_clock_update(struct ibv_cq_ex *ibcq, ++ struct ibv_poll_cq_attr *attr) ++{ ++ return xsc_start_poll(ibcq, attr, 0, 0, 0, 1); ++} ++ ++static inline int xsc_start_poll_stall_v1_lock_clock_update(struct ibv_cq_ex *ibcq, ++ struct ibv_poll_cq_attr *attr) ++{ ++ return xsc_start_poll(ibcq, attr, 1, POLLING_MODE_STALL, 1, 1); ++} ++ ++static inline int xsc_start_poll_stall_v0_lock_clock_update(struct ibv_cq_ex *ibcq, ++ struct ibv_poll_cq_attr *attr) ++{ ++ return xsc_start_poll(ibcq, attr, 1, POLLING_MODE_STALL, 0, 1); ++} ++ ++static inline int xsc_start_poll_stall_v1_clock_update(struct ibv_cq_ex *ibcq, ++ struct ibv_poll_cq_attr *attr) ++{ ++ return xsc_start_poll(ibcq, attr, 0, POLLING_MODE_STALL, 1, 1); ++} ++ ++static inline int xsc_start_poll_stall_v0_clock_update(struct ibv_cq_ex *ibcq, ++ struct ibv_poll_cq_attr *attr) ++{ ++ return xsc_start_poll(ibcq, attr, 0, POLLING_MODE_STALL, 0, 1); ++} ++ ++static inline int xsc_start_poll_adaptive_stall_v0_lock_clock_update(struct ibv_cq_ex *ibcq, ++ struct ibv_poll_cq_attr *attr) ++{ ++ return xsc_start_poll(ibcq, attr, 1, POLLING_MODE_STALL_ADAPTIVE, 0, 1); ++} ++ ++static inline int xsc_start_poll_adaptive_stall_v1_lock_clock_update(struct ibv_cq_ex *ibcq, ++ struct ibv_poll_cq_attr *attr) ++{ ++ return xsc_start_poll(ibcq, attr, 1, POLLING_MODE_STALL_ADAPTIVE, 1, 1); ++} ++ ++static inline int xsc_start_poll_adaptive_stall_v0_clock_update(struct ibv_cq_ex *ibcq, ++ struct ibv_poll_cq_attr *attr) ++{ ++ return xsc_start_poll(ibcq, attr, 0, POLLING_MODE_STALL_ADAPTIVE, 0, 1); ++} ++ ++static inline int xsc_start_poll_adaptive_stall_v1_clock_update(struct ibv_cq_ex *ibcq, ++ struct ibv_poll_cq_attr *attr) ++{ ++ return xsc_start_poll(ibcq, attr, 0, POLLING_MODE_STALL_ADAPTIVE, 1, 1); ++} ++ ++static inline void xsc_end_poll_adaptive_stall_lock(struct ibv_cq_ex *ibcq) ++{ ++ _xsc_end_poll(ibcq, 1, POLLING_MODE_STALL_ADAPTIVE); ++} ++ ++static inline void xsc_end_poll_stall_lock(struct ibv_cq_ex *ibcq) ++{ ++ _xsc_end_poll(ibcq, 1, POLLING_MODE_STALL); ++} ++ ++static inline void xsc_end_poll_adaptive_stall(struct ibv_cq_ex *ibcq) ++{ ++ _xsc_end_poll(ibcq, 0, POLLING_MODE_STALL_ADAPTIVE); ++} ++ ++static inline void xsc_end_poll_stall(struct ibv_cq_ex *ibcq) ++{ ++ _xsc_end_poll(ibcq, 0, POLLING_MODE_STALL); ++} ++ ++static inline void xsc_end_poll(struct ibv_cq_ex *ibcq) ++{ ++ _xsc_end_poll(ibcq, 0, 0); ++} ++ ++static inline void xsc_end_poll_lock(struct ibv_cq_ex *ibcq) ++{ ++ _xsc_end_poll(ibcq, 1, 0); ++} ++ ++int xsc_poll_cq(struct ibv_cq *ibcq, int ne, struct ibv_wc *wc) ++{ ++ return poll_cq(ibcq, ne, wc); ++} ++ ++static inline enum ibv_wc_opcode xsc_cq_read_wc_opcode(struct ibv_cq_ex *ibcq) ++{ ++ struct xsc_cq *cq = to_xcq(ibv_cq_ex_to_cq(ibcq)); ++ ++ switch (xscdv_get_cqe_opcode(cq->cqe64)) { ++ case XSC_CQE_RESP_WR_IMM: ++ return IBV_WC_RECV_RDMA_WITH_IMM; ++ case XSC_CQE_RESP_SEND: ++ case XSC_CQE_RESP_SEND_IMM: ++ case XSC_CQE_RESP_SEND_INV: ++ if (unlikely(cq->cqe64->app == XSC_CQE_APP_TAG_MATCHING)) { ++ switch (cq->cqe64->app_op) { ++ case XSC_CQE_APP_OP_TM_CONSUMED_MSG_SW_RDNV: ++ case XSC_CQE_APP_OP_TM_CONSUMED_MSG: ++ case XSC_CQE_APP_OP_TM_CONSUMED_SW_RDNV: ++ case XSC_CQE_APP_OP_TM_EXPECTED: ++ case XSC_CQE_APP_OP_TM_UNEXPECTED: ++ return IBV_WC_TM_RECV; ++ case XSC_CQE_APP_OP_TM_NO_TAG: ++ return IBV_WC_TM_NO_TAG; ++ } ++ } ++ return IBV_WC_RECV; ++ case XSC_CQE_NO_PACKET: ++ switch (cq->cqe64->app_op) { ++ case XSC_CQE_APP_OP_TM_REMOVE: ++ return IBV_WC_TM_DEL; ++ case XSC_CQE_APP_OP_TM_APPEND: ++ return IBV_WC_TM_ADD; ++ case XSC_CQE_APP_OP_TM_NOOP: ++ return IBV_WC_TM_SYNC; ++ case XSC_CQE_APP_OP_TM_CONSUMED: ++ return IBV_WC_TM_RECV; ++ } ++ break; ++ case XSC_CQE_REQ: ++ switch (be32toh(cq->cqe64->sop_drop_qpn) >> 24) { ++ case XSC_OPCODE_RDMA_WRITE_IMM: ++ case XSC_OPCODE_RDMA_WRITE: ++ return IBV_WC_RDMA_WRITE; ++ case XSC_OPCODE_SEND_IMM: ++ case XSC_OPCODE_SEND: ++ case XSC_OPCODE_SEND_INVAL: ++ return IBV_WC_SEND; ++ case XSC_OPCODE_RDMA_READ: ++ return IBV_WC_RDMA_READ; ++ case XSC_OPCODE_ATOMIC_CS: ++ return IBV_WC_COMP_SWAP; ++ case XSC_OPCODE_ATOMIC_FA: ++ return IBV_WC_FETCH_ADD; ++ case XSC_OPCODE_UMR: ++ return cq->umr_opcode; ++ case XSC_OPCODE_TSO: ++ return IBV_WC_TSO; ++ } ++ } ++ ++ return 0; ++} ++ ++static inline uint32_t xsc_cq_read_wc_qp_num(struct ibv_cq_ex *ibcq) ++{ ++ struct xsc_cq *cq = to_xcq(ibv_cq_ex_to_cq(ibcq)); ++ ++ return be32toh(cq->cqe64->sop_drop_qpn) & 0xffffff; ++} ++ ++static inline unsigned int xsc_cq_read_wc_flags(struct ibv_cq_ex *ibcq) ++{ ++ struct xsc_cq *cq = to_xcq(ibv_cq_ex_to_cq(ibcq)); ++ int wc_flags = 0; ++ ++ if (cq->flags & XSC_CQ_FLAGS_RX_CSUM_VALID) ++ wc_flags = get_csum_ok(cq->cqe64); ++ ++ switch (xscdv_get_cqe_opcode(cq->cqe64)) { ++ case XSC_CQE_RESP_WR_IMM: ++ case XSC_CQE_RESP_SEND_IMM: ++ wc_flags |= IBV_WC_WITH_IMM; ++ break; ++ case XSC_CQE_RESP_SEND_INV: ++ wc_flags |= IBV_WC_WITH_INV; ++ break; ++ } ++ ++ if (cq->flags & XSC_CQ_FLAGS_TM_SYNC_REQ) ++ wc_flags |= IBV_WC_TM_SYNC_REQ; ++ ++ if (unlikely(cq->cqe64->app == XSC_CQE_APP_TAG_MATCHING)) { ++ switch (cq->cqe64->app_op) { ++ case XSC_CQE_APP_OP_TM_CONSUMED_MSG_SW_RDNV: ++ case XSC_CQE_APP_OP_TM_CONSUMED_MSG: ++ case XSC_CQE_APP_OP_TM_MSG_COMPLETION_CANCELED: ++ /* Full completion */ ++ wc_flags |= (IBV_WC_TM_MATCH | IBV_WC_TM_DATA_VALID); ++ break; ++ case XSC_CQE_APP_OP_TM_CONSUMED_SW_RDNV: ++ case XSC_CQE_APP_OP_TM_CONSUMED: /* First completion */ ++ wc_flags |= IBV_WC_TM_MATCH; ++ break; ++ case XSC_CQE_APP_OP_TM_EXPECTED: /* Second completion */ ++ wc_flags |= IBV_WC_TM_DATA_VALID; ++ break; ++ } ++ } ++ ++ wc_flags |= ((be32toh(cq->cqe64->flags_rqpn) >> 28) & 3) ? IBV_WC_GRH : 0; ++ return wc_flags; ++} ++ ++static inline uint32_t xsc_cq_read_wc_byte_len(struct ibv_cq_ex *ibcq) ++{ ++ struct xsc_cq *cq = to_xcq(ibv_cq_ex_to_cq(ibcq)); ++ ++ return be32toh(cq->cqe64->byte_cnt); ++} ++ ++static inline uint32_t xsc_cq_read_wc_vendor_err(struct ibv_cq_ex *ibcq) ++{ ++ struct xsc_cq *cq = to_xcq(ibv_cq_ex_to_cq(ibcq)); ++ struct xsc_err_cqe *ecqe = (struct xsc_err_cqe *)cq->cqe64; ++ ++ return ecqe->vendor_err_synd; ++} ++ ++static inline __be32 xsc_cq_read_wc_imm_data(struct ibv_cq_ex *ibcq) ++{ ++ struct xsc_cq *cq = to_xcq(ibv_cq_ex_to_cq(ibcq)); ++ ++ switch (xscdv_get_cqe_opcode(cq->cqe64)) { ++ case XSC_CQE_RESP_SEND_INV: ++ /* This is returning invalidate_rkey which is in host order, see ++ * ibv_wc_read_invalidated_rkey ++ */ ++ return (__force __be32)be32toh(cq->cqe64->imm_inval_pkey); ++ default: ++ return cq->cqe64->imm_inval_pkey; ++ } ++} ++ ++static inline uint32_t xsc_cq_read_wc_slid(struct ibv_cq_ex *ibcq) ++{ ++ struct xsc_cq *cq = to_xcq(ibv_cq_ex_to_cq(ibcq)); ++ ++ return (uint32_t)be16toh(cq->cqe64->slid); ++} ++ ++static inline uint8_t xsc_cq_read_wc_sl(struct ibv_cq_ex *ibcq) ++{ ++ struct xsc_cq *cq = to_xcq(ibv_cq_ex_to_cq(ibcq)); ++ ++ return (be32toh(cq->cqe64->flags_rqpn) >> 24) & 0xf; ++} ++ ++static inline uint32_t xsc_cq_read_wc_src_qp(struct ibv_cq_ex *ibcq) ++{ ++ struct xsc_cq *cq = to_xcq(ibv_cq_ex_to_cq(ibcq)); ++ ++ return be32toh(cq->cqe64->flags_rqpn) & 0xffffff; ++} ++ ++static inline uint8_t xsc_cq_read_wc_dlid_path_bits(struct ibv_cq_ex *ibcq) ++{ ++ struct xsc_cq *cq = to_xcq(ibv_cq_ex_to_cq(ibcq)); ++ ++ return cq->cqe64->ml_path & 0x7f; ++} ++ ++static inline uint64_t xsc_cq_read_wc_completion_ts(struct ibv_cq_ex *ibcq) ++{ ++ struct xsc_cq *cq = to_xcq(ibv_cq_ex_to_cq(ibcq)); ++ ++ return be64toh(cq->cqe64->timestamp); ++} ++ ++static inline uint64_t ++xsc_cq_read_wc_completion_wallclock_ns(struct ibv_cq_ex *ibcq) ++{ ++ struct xsc_cq *cq = to_xcq(ibv_cq_ex_to_cq(ibcq)); ++ ++ return xscdv_ts_to_ns(&cq->last_clock_info, ++ xsc_cq_read_wc_completion_ts(ibcq)); ++} ++ ++static inline uint16_t xsc_cq_read_wc_cvlan(struct ibv_cq_ex *ibcq) ++{ ++ struct xsc_cq *cq = to_xcq(ibv_cq_ex_to_cq(ibcq)); ++ ++ return be16toh(cq->cqe64->vlan_info); ++} ++ ++static inline uint32_t xsc_cq_read_flow_tag(struct ibv_cq_ex *ibcq) ++{ ++ struct xsc_cq *cq = to_xcq(ibv_cq_ex_to_cq(ibcq)); ++ ++ return be32toh(cq->cqe64->sop_drop_qpn) & XSC_FLOW_TAG_MASK; ++} ++ ++static inline void xsc_cq_read_wc_tm_info(struct ibv_cq_ex *ibcq, ++ struct ibv_wc_tm_info *tm_info) ++{ ++ struct xsc_cq *cq = to_xcq(ibv_cq_ex_to_cq(ibcq)); ++ ++ tm_info->tag = be64toh(cq->cqe64->tmh.tag); ++ tm_info->priv = be32toh(cq->cqe64->tmh.app_ctx); ++} ++ ++#define BIT(i) (1UL << (i)) ++ ++#define SINGLE_THREADED BIT(0) ++#define STALL BIT(1) ++#define V1 BIT(2) ++#define ADAPTIVE BIT(3) ++#define CLOCK_UPDATE BIT(4) ++ ++#define xsc_start_poll_name(cqe_ver, lock, stall, adaptive, clock_update) \ ++ xsc_start_poll##adaptive##stall##cqe_ver##lock##clock_update ++#define xsc_next_poll_name(cqe_ver, adaptive) \ ++ xsc_next_poll##adaptive##cqe_ver ++#define xsc_end_poll_name(lock, stall, adaptive) \ ++ xsc_end_poll##adaptive##stall##lock ++ ++#define POLL_FN_ENTRY(cqe_ver, lock, stall, adaptive, clock_update) { \ ++ .start_poll = &xsc_start_poll_name(cqe_ver, lock, stall, adaptive, clock_update), \ ++ .next_poll = &xsc_next_poll_name(cqe_ver, adaptive), \ ++ .end_poll = &xsc_end_poll_name(lock, stall, adaptive), \ ++ } ++ ++static const struct op ++{ ++ int (*start_poll)(struct ibv_cq_ex *ibcq, struct ibv_poll_cq_attr *attr); ++ int (*next_poll)(struct ibv_cq_ex *ibcq); ++ void (*end_poll)(struct ibv_cq_ex *ibcq); ++} ops[ADAPTIVE + V1 + STALL + SINGLE_THREADED + CLOCK_UPDATE + 1] = { ++ [V1] = POLL_FN_ENTRY(_v1, _lock, , ,), ++ [0] = POLL_FN_ENTRY(_v0, _lock, , ,), ++ [V1 | SINGLE_THREADED] = POLL_FN_ENTRY(_v1, , , , ), ++ [SINGLE_THREADED] = POLL_FN_ENTRY(_v0, , , , ), ++ [V1 | STALL] = POLL_FN_ENTRY(_v1, _lock, _stall, , ), ++ [STALL] = POLL_FN_ENTRY(_v0, _lock, _stall, , ), ++ [V1 | SINGLE_THREADED | STALL] = POLL_FN_ENTRY(_v1, , _stall, , ), ++ [SINGLE_THREADED | STALL] = POLL_FN_ENTRY(_v0, , _stall, , ), ++ [V1 | STALL | ADAPTIVE] = POLL_FN_ENTRY(_v1, _lock, _stall, _adaptive, ), ++ [STALL | ADAPTIVE] = POLL_FN_ENTRY(_v0, _lock, _stall, _adaptive, ), ++ [V1 | SINGLE_THREADED | STALL | ADAPTIVE] = POLL_FN_ENTRY(_v1, , _stall, _adaptive, ), ++ [SINGLE_THREADED | STALL | ADAPTIVE] = POLL_FN_ENTRY(_v0, , _stall, _adaptive, ), ++ [V1 | CLOCK_UPDATE] = POLL_FN_ENTRY(_v1, _lock, , , _clock_update), ++ [0 | CLOCK_UPDATE] = POLL_FN_ENTRY(_v0, _lock, , , _clock_update), ++ [V1 | SINGLE_THREADED | CLOCK_UPDATE] = POLL_FN_ENTRY(_v1, , , , _clock_update), ++ [SINGLE_THREADED | CLOCK_UPDATE] = POLL_FN_ENTRY(_v0, , , , _clock_update), ++ [V1 | STALL | CLOCK_UPDATE] = POLL_FN_ENTRY(_v1, _lock, _stall, , _clock_update), ++ [STALL | CLOCK_UPDATE] = POLL_FN_ENTRY(_v0, _lock, _stall, , _clock_update), ++ [V1 | SINGLE_THREADED | STALL | CLOCK_UPDATE] = POLL_FN_ENTRY(_v1, , _stall, , _clock_update), ++ [SINGLE_THREADED | STALL | CLOCK_UPDATE] = POLL_FN_ENTRY(_v0, , _stall, , _clock_update), ++ [V1 | STALL | ADAPTIVE | CLOCK_UPDATE] = POLL_FN_ENTRY(_v1, _lock, _stall, _adaptive, _clock_update), ++ [STALL | ADAPTIVE | CLOCK_UPDATE] = POLL_FN_ENTRY(_v0, _lock, _stall, _adaptive, _clock_update), ++ [V1 | SINGLE_THREADED | STALL | ADAPTIVE | CLOCK_UPDATE] = POLL_FN_ENTRY(_v1, , _stall, _adaptive, _clock_update), ++ [SINGLE_THREADED | STALL | ADAPTIVE | CLOCK_UPDATE] = POLL_FN_ENTRY(_v0, , _stall, _adaptive, _clock_update), ++}; ++ ++int xsc_cq_fill_pfns(struct xsc_cq *cq, ++ const struct ibv_cq_init_attr_ex *cq_attr, ++ struct xsc_context *xctx) ++{ ++ const struct op *poll_ops = &ops[((cq->stall_enable && cq->stall_adaptive_enable) ? ADAPTIVE : 0) | ++ (xctx->cqe_version ? V1 : 0) | ++ (cq->flags & XSC_CQ_FLAGS_SINGLE_THREADED ? ++ SINGLE_THREADED : 0) | ++ (cq->stall_enable ? STALL : 0) | ++ ((cq_attr->wc_flags & IBV_WC_EX_WITH_COMPLETION_TIMESTAMP_WALLCLOCK) ? ++ CLOCK_UPDATE : 0)]; ++ ++ cq->verbs_cq.cq_ex.start_poll = poll_ops->start_poll; ++ cq->verbs_cq.cq_ex.next_poll = poll_ops->next_poll; ++ cq->verbs_cq.cq_ex.end_poll = poll_ops->end_poll; ++ ++ cq->verbs_cq.cq_ex.read_opcode = xsc_cq_read_wc_opcode; ++ cq->verbs_cq.cq_ex.read_vendor_err = xsc_cq_read_wc_vendor_err; ++ cq->verbs_cq.cq_ex.read_wc_flags = xsc_cq_read_wc_flags; ++ if (cq_attr->wc_flags & IBV_WC_EX_WITH_BYTE_LEN) ++ cq->verbs_cq.cq_ex.read_byte_len = xsc_cq_read_wc_byte_len; ++ if (cq_attr->wc_flags & IBV_WC_EX_WITH_IMM) ++ cq->verbs_cq.cq_ex.read_imm_data = xsc_cq_read_wc_imm_data; ++ if (cq_attr->wc_flags & IBV_WC_EX_WITH_QP_NUM) ++ cq->verbs_cq.cq_ex.read_qp_num = xsc_cq_read_wc_qp_num; ++ if (cq_attr->wc_flags & IBV_WC_EX_WITH_SRC_QP) ++ cq->verbs_cq.cq_ex.read_src_qp = xsc_cq_read_wc_src_qp; ++ if (cq_attr->wc_flags & IBV_WC_EX_WITH_SLID) ++ cq->verbs_cq.cq_ex.read_slid = xsc_cq_read_wc_slid; ++ if (cq_attr->wc_flags & IBV_WC_EX_WITH_SL) ++ cq->verbs_cq.cq_ex.read_sl = xsc_cq_read_wc_sl; ++ if (cq_attr->wc_flags & IBV_WC_EX_WITH_DLID_PATH_BITS) ++ cq->verbs_cq.cq_ex.read_dlid_path_bits = xsc_cq_read_wc_dlid_path_bits; ++ if (cq_attr->wc_flags & IBV_WC_EX_WITH_COMPLETION_TIMESTAMP) ++ cq->verbs_cq.cq_ex.read_completion_ts = xsc_cq_read_wc_completion_ts; ++ if (cq_attr->wc_flags & IBV_WC_EX_WITH_CVLAN) ++ cq->verbs_cq.cq_ex.read_cvlan = xsc_cq_read_wc_cvlan; ++ if (cq_attr->wc_flags & IBV_WC_EX_WITH_FLOW_TAG) ++ cq->verbs_cq.cq_ex.read_flow_tag = xsc_cq_read_flow_tag; ++ if (cq_attr->wc_flags & IBV_WC_EX_WITH_TM_INFO) ++ cq->verbs_cq.cq_ex.read_tm_info = xsc_cq_read_wc_tm_info; ++ if (cq_attr->wc_flags & IBV_WC_EX_WITH_COMPLETION_TIMESTAMP_WALLCLOCK) { ++ if (!xctx->clock_info_page) ++ return EOPNOTSUPP; ++ cq->verbs_cq.cq_ex.read_completion_wallclock_ns = ++ xsc_cq_read_wc_completion_wallclock_ns; ++ } ++ ++ return 0; ++} ++ ++int xsc_arm_cq(struct ibv_cq *ibvcq, int solicited) ++{ ++ struct xsc_cq *cq = to_xcq(ibvcq); ++ union xsc_db_data doorbell; ++ ++ doorbell.cqn = cq->cqn; ++ doorbell.cq_next_cid = cq->cons_index; ++ doorbell.solicited = !!solicited; ++ ++ /* ++ * Make sure that the doorbell record in host memory is ++ * written before ringing the doorbell via PCI WC MMIO. ++ */ ++ mmio_wc_start(); ++ ++ WR_REG(cq->armdb, doorbell.raw_data); ++ ++ mmio_flush_writes(); ++ ++ return 0; ++} ++ ++void xsc_cq_event(struct ibv_cq *cq) ++{ ++ to_xcq(cq)->arm_sn++; ++} ++ ++static int is_equal_rsn(struct xsc_cqe64 *cqe64, uint32_t rsn) ++{ ++ return rsn == (be32toh(cqe64->sop_drop_qpn) & 0xffffff); ++} ++ ++static inline int is_equal_uidx(struct xsc_cqe64 *cqe64, uint32_t uidx) ++{ ++ return uidx == (be32toh(cqe64->srqn_uidx) & 0xffffff); ++} ++ ++static inline int is_responder(uint8_t opcode) ++{ ++ switch (opcode) { ++ case XSC_CQE_RESP_WR_IMM: ++ case XSC_CQE_RESP_SEND: ++ case XSC_CQE_RESP_SEND_IMM: ++ case XSC_CQE_RESP_SEND_INV: ++ case XSC_CQE_RESP_ERR: ++ return 1; ++ } ++ ++ return 0; ++} ++ ++static inline int free_res_cqe(struct xsc_cqe64 *cqe64, uint32_t rsn, int cqe_version) ++{ ++ if (cqe_version) { ++ if (is_equal_uidx(cqe64, rsn)) { ++ return 1; ++ } ++ } else { ++ if (is_equal_rsn(cqe64, rsn)) { ++ return 1; ++ } ++ } ++ ++ return 0; ++} ++ ++void __xsc_cq_clean(struct xsc_cq *cq, uint32_t rsn) ++{ ++ uint32_t prod_index; ++ int nfreed = 0; ++ struct xsc_cqe64 *cqe64, *dest64; ++ void *cqe, *dest; ++ uint8_t owner_bit; ++ int cqe_version; ++ ++ if (!cq || cq->flags & XSC_CQ_FLAGS_DV_OWNED) ++ return; ++ xsc_dbg(to_xctx(cq->verbs_cq.cq_ex.context)->dbg_fp, XSC_DBG_CQ, "\n"); ++ ++ /* ++ * First we need to find the current producer index, so we ++ * know where to start cleaning from. It doesn't matter if HW ++ * adds new entries after this loop -- the QP we're worried ++ * about is already in RESET, so the new entries won't come ++ * from our QP and therefore don't need to be checked. ++ */ ++ for (prod_index = cq->cons_index; get_sw_cqe(cq, prod_index); ++prod_index) ++ if (prod_index == cq->cons_index + cq->verbs_cq.cq_ex.cqe) ++ break; ++ ++ /* ++ * Now sweep backwards through the CQ, removing CQ entries ++ * that match our QP by copying older entries on top of them. ++ */ ++ cqe_version = (to_xctx(cq->verbs_cq.cq_ex.context))->cqe_version; ++ while ((int) --prod_index - (int) cq->cons_index >= 0) { ++ cqe = get_cqe(cq, prod_index & (cq->verbs_cq.cq_ex.cqe - 1)); ++ cqe64 = (cq->cqe_sz == 64) ? cqe : cqe + 64; ++ if (free_res_cqe(cqe64, rsn, cqe_version)) { ++ ++nfreed; ++ } else if (nfreed) { ++ dest = get_cqe(cq, (prod_index + nfreed) & (cq->verbs_cq.cq_ex.cqe - 1)); ++ dest64 = (cq->cqe_sz == 64) ? dest : dest + 64; ++ owner_bit = dest64->op_own & XSC_CQE_OWNER_MASK; ++ memcpy(dest, cqe, cq->cqe_sz); ++ dest64->op_own = owner_bit | ++ (dest64->op_own & ~XSC_CQE_OWNER_MASK); ++ } ++ } ++ ++ if (nfreed) { ++ cq->cons_index += nfreed; ++ /* ++ * Make sure update of buffer contents is done before ++ * updating consumer index. ++ */ ++ udma_to_device_barrier(); ++ update_cons_index(cq); ++ } ++} ++ ++void xsc_cq_clean(struct xsc_cq *cq, uint32_t qpn) ++{ ++ xsc_spin_lock(&cq->lock); ++ __xsc_cq_clean(cq, qpn); ++ xsc_spin_unlock(&cq->lock); ++} ++ ++int xsc_alloc_cq_buf(struct xsc_context *xctx, struct xsc_cq *cq, ++ struct xsc_buf *buf, int nent, int cqe_sz) ++{ ++ struct xsc_device *xdev = to_xdev(xctx->ibv_ctx.context.device); ++ int ret; ++ enum xsc_alloc_type type; ++ enum xsc_alloc_type default_type = XSC_ALLOC_TYPE_ANON; ++ ++ if (xsc_use_huge("HUGE_CQ")) ++ default_type = XSC_ALLOC_TYPE_HUGE; ++ ++ xsc_get_alloc_type(xctx, XSC_CQ_PREFIX, &type, default_type); ++ ++ ret = xsc_alloc_prefered_buf(xctx, buf, ++ align(nent * cqe_sz, xdev->page_size), ++ xdev->page_size, ++ type, ++ XSC_CQ_PREFIX); ++ ++ if (ret) ++ return -1; ++ ++ memset(buf->buf, 0, nent * cqe_sz); ++ ++ return 0; ++} ++ ++int xsc_free_cq_buf(struct xsc_context *ctx, struct xsc_buf *buf) ++{ ++ return xsc_free_actual_buf(ctx, buf); ++} +diff --git a/providers/xscale/cqm_csr_defines.h b/providers/xscale/cqm_csr_defines.h +new file mode 100644 +index 0000000..9d87438 +--- /dev/null ++++ b/providers/xscale/cqm_csr_defines.h +@@ -0,0 +1,180 @@ ++#ifndef _CQM_CSR_DEFINES_H_ ++#define _CQM_CSR_DEFINES_H_ ++ ++#define CQM_SOFT_RESET_REG_ADDR 0x6000 ++#define CQM_SOFT_RESET_MASK 0x1 ++#define CQM_SOFT_RESET_SHIFT 0 ++ ++#define CQM_COUNTER_CONFIG_REG_ADDR 0x6020 ++#define CQM_CFG_CNT_WRAP_MASK 0x1 ++#define CQM_CFG_CNT_WRAP_SHIFT 0 ++#define CQM_CFG_CNT_RC_MASK 0x2 ++#define CQM_CFG_CNT_RC_SHIFT 1 ++ ++#define CQM_SCRATCH_PAD_REG_ADDR 0x6040 ++#define CQM_SCRATCH_PAD_MASK 0xffffffffffffffff ++#define CQM_SCRATCH_PAD_SHIFT 0 ++ ++#define CQM_CQM_CONFIG_REG_RING_ADDR_ARRAY_ADDR 0x6060 ++#define CQM_CQM_CONFIG_REG_RING_ADDR_ARRAY_SIZE 16 ++#define CQM_CQM_CONFIG_REG_RING_ADDR_ARRAY_STRIDE 0x20 ++#define CQM_CFG_CPU2CQM_RING_ADDR_MASK 0xffffffffffffffff ++#define CQM_CFG_CPU2CQM_RING_ADDR_SHIFT 0 ++ ++#define CQM_CQM_CONFIG_REG_RING_SIZE_ARRAY_ADDR 0x6260 ++#define CQM_CQM_CONFIG_REG_RING_SIZE_ARRAY_SIZE 16 ++#define CQM_CQM_CONFIG_REG_RING_SIZE_ARRAY_STRIDE 0x20 ++#define CQM_CFG_CPU2CQM_RING_SIZE_MASK 0xffff ++#define CQM_CFG_CPU2CQM_RING_SIZE_SHIFT 0 ++ ++#define CQM_CQM_CONFIG_REG_NEXT_CID_ARRAY_ADDR 0x6460 ++#define CQM_CQM_CONFIG_REG_NEXT_CID_ARRAY_SIZE 16 ++#define CQM_CQM_CONFIG_REG_NEXT_CID_ARRAY_STRIDE 0x20 ++#define CQM_CFG_CPU2CQM_NEXT_CID_MASK 0xffff ++#define CQM_CFG_CPU2CQM_NEXT_CID_SHIFT 0 ++ ++#define CQM_CQM_CONFIG_REG_CFG_EN_ARRAY_ADDR 0x6660 ++#define CQM_CQM_CONFIG_REG_CFG_EN_ARRAY_SIZE 16 ++#define CQM_CQM_CONFIG_REG_CFG_EN_ARRAY_STRIDE 0x20 ++#define CQM_CFG_CPU2CQM_CFG_EN_MASK 0x1 ++#define CQM_CFG_CPU2CQM_CFG_EN_SHIFT 0 ++ ++#define CQM_CQM_CONFIG_CQE_FIFO_TH_ADDR 0x6860 ++#define CQM_CFG_CPU2CQM_CQE_FIFO_AFULL_TH_MASK 0xff ++#define CQM_CFG_CPU2CQM_CQE_FIFO_AFULL_TH_SHIFT 0 ++#define CQM_CFG_CPU2CQM_CQE_FIFO_AMTY_TH_MASK 0xff00 ++#define CQM_CFG_CPU2CQM_CQE_FIFO_AMTY_TH_SHIFT 8 ++ ++#define CQM_CQM_CONFIG_CID_FIFO_TH_ADDR 0x6880 ++#define CQM_CFG_CPU2CQM_CID_FIFO_AFULL_TH_MASK 0xff ++#define CQM_CFG_CPU2CQM_CID_FIFO_AFULL_TH_SHIFT 0 ++#define CQM_CFG_CPU2CQM_CID_FIFO_AMTY_TH_MASK 0xff00 ++#define CQM_CFG_CPU2CQM_CID_FIFO_AMTY_TH_SHIFT 8 ++ ++#define CQM_CQM_STATUS_REG_ARRAY_ADDR 0x68a0 ++#define CQM_CQM_STATUS_REG_ARRAY_SIZE 16 ++#define CQM_CQM_STATUS_REG_ARRAY_STRIDE 0x20 ++#define CQM_CFG_CQM2CPU_DONE_PID_MASK 0xffff ++#define CQM_CFG_CQM2CPU_DONE_PID_SHIFT 0 ++ ++#define CQM_CQM_STATUS_LOCAL_NEXT_PID_REG_ARRAY_ADDR 0x6aa0 ++#define CQM_CQM_STATUS_LOCAL_NEXT_PID_REG_ARRAY_SIZE 16 ++#define CQM_CQM_STATUS_LOCAL_NEXT_PID_REG_ARRAY_STRIDE 0x20 ++#define CQM_CQM_LOCAL_NEXT_PID_MASK 0xffff ++#define CQM_CQM_LOCAL_NEXT_PID_SHIFT 0 ++ ++#define CQM_CQM_DMA_REQ_LEN_STATE_REG_ADDR 0x6ca0 ++#define CQM_CQM_DMA_REQ_LEN_MASK 0x3ff ++#define CQM_CQM_DMA_REQ_LEN_SHIFT 0 ++ ++#define CQM_CQM_DMA_REQ_ADDR_STATE_REG_ADDR 0x6cc0 ++#define CQM_CQM_DMA_REQ_ADDR_MASK 0xffffffffffffffff ++#define CQM_CQM_DMA_REQ_ADDR_SHIFT 0 ++ ++#define CQM_CQM_CQE_L_QPID_STATE_REG_ADDR 0x6ce0 ++#define CQM_CQM_CQE_L_QP_ID_MASK 0xffffff ++#define CQM_CQM_CQE_L_QP_ID_SHIFT 0 ++ ++#define CQM_CQM_CQE_MSG_LEN_STATE_REG_ADDR 0x6d00 ++#define CQM_CQM_CQE_MSG_LEN_MASK 0xffffffff ++#define CQM_CQM_CQE_MSG_LEN_SHIFT 0 ++ ++#define CQM_CQM_CQE_ERR_CODE_STATE_REG_ADDR 0x6d20 ++#define CQM_CQM_CQE_ERR_CODE_MASK 0xff ++#define CQM_CQM_CQE_ERR_CODE_SHIFT 0 ++ ++#define CQM_CQM_CQE_MSG_OPCODE_STATE_REG_ADDR 0x6d40 ++#define CQM_CQM_CQE_MSG_OPCODE_MASK 0xff ++#define CQM_CQM_CQE_MSG_OPCODE_SHIFT 0 ++ ++#define CQM_CQM_CQE_WQEID_STATE_REG_ADDR 0x6d60 ++#define CQM_CQM_CQE_WQEID_MASK 0xffff ++#define CQM_CQM_CQE_WQEID_SHIFT 0 ++ ++#define CQM_CQM_CQE_TX0RX1_STATE_REG_ADDR 0x6d80 ++#define CQM_CQM_CQE_TX0RX1_MASK 0x1 ++#define CQM_CQM_CQE_TX0RX1_SHIFT 0 ++ ++#define CQM_CQM_CQE_CQ_ID_STATE_REG_ADDR 0x6da0 ++#define CQM_CQM_CQE_CQ_ID_MASK 0xf ++#define CQM_CQM_CQE_CQ_ID_SHIFT 0 ++ ++#define CQM_CQM_WR_ACK_CNT_STATE_REG_ADDR 0x6dc0 ++#define CQM_CQM_DMA_WR_ACK_MASK 0xff ++#define CQM_CQM_DMA_WR_ACK_SHIFT 0 ++ ++#define CQM_CQM_RD_ACK_CNT_STATE_REG_ADDR 0x6de0 ++#define CQM_CQM_DMA_RD_ACK_MASK 0xff ++#define CQM_CQM_DMA_RD_ACK_SHIFT 0 ++ ++#define CQM_CQM_CQE_ACK_CNT_STATE_REG_ADDR 0x6e00 ++#define CQM_CQM_DMA_CQE_ACK_MASK 0xff ++#define CQM_CQM_DMA_CQE_ACK_SHIFT 0 ++ ++#define CQM_CQM_CMD_FIFO_STATE_REG_ADDR 0x6e20 ++#define CQM_CQM_FIFO_OVFL_INT_MASK 0x3 ++#define CQM_CQM_FIFO_OVFL_INT_SHIFT 0 ++#define CQM_CQM_FIFO_UNFL_INT_MASK 0xc ++#define CQM_CQM_FIFO_UNFL_INT_SHIFT 2 ++#define CQM_CQM_FIFO_MTY_MASK 0x30 ++#define CQM_CQM_FIFO_MTY_SHIFT 4 ++#define CQM_CQM_FIFO_FUL_MASK 0xc0 ++#define CQM_CQM_FIFO_FUL_SHIFT 6 ++#define CQM_CQM_RING_FULL_INT_MASK 0xffff00 ++#define CQM_CQM_RING_FULL_INT_SHIFT 8 ++#define CQM_CQM_DEFINE_ERR_INT_MASK 0x1000000 ++#define CQM_CQM_DEFINE_ERR_INT_SHIFT 24 ++#define CQM_CQM_SOP_EOP_NO_EQUAL_MASK 0x2000000 ++#define CQM_CQM_SOP_EOP_NO_EQUAL_SHIFT 25 ++ ++#define CQM_CQM_FIFO_USED_CNT_REG_ADDR 0x6e40 ++#define CQM_CQM_FIFO_USED_CNT_REG_SIZE 2 ++#define CQM_CQM_FIFO_USED_CNT_REG_STRIDE 0x20 ++#define CQM_CQM_FIFO_USED_CNT_MASK 0x7f ++#define CQM_CQM_FIFO_USED_CNT_SHIFT 0 ++ ++#define CQM_CQM_DEBUG_INFO_STATE_REG_0_ADDR 0x6e80 ++#define CQM_CQM2CSR_DBG_OPCODE_MASK 0xff ++#define CQM_CQM2CSR_DBG_OPCODE_SHIFT 0 ++#define CQM_CQM2CSR_DBG_TX0_RX1_MASK 0x100 ++#define CQM_CQM2CSR_DBG_TX0_RX1_SHIFT 8 ++#define CQM_CQM2CSR_DBG_CAP_MASK 0x200 ++#define CQM_CQM2CSR_DBG_CAP_SHIFT 9 ++#define CQM_CQM2CSR_DBG_L_QPID_MASK 0x1c00 ++#define CQM_CQM2CSR_DBG_L_QPID_SHIFT 10 ++#define CQM_CQM2CSR_DBG_SN_MASK 0x1fffffe000 ++#define CQM_CQM2CSR_DBG_SN_SHIFT 13 ++ ++#define CQM_CQM_DEBUG_INFO_STATE_REG_1_ADDR 0x6ea0 ++#define CQM_CQM2CSR_DBG_MOD_IF_BM_MASK 0xffffffffffffffff ++#define CQM_CQM2CSR_DBG_MOD_IF_BM_SHIFT 0 ++ ++#define CQM_CQM_DMA_IN_SOP_CNT_REG_ADDR 0x6ec0 ++#define CQM_CQM_DMA_IN_SOP_CNT_MASK 0xffffffffffffffff ++#define CQM_CQM_DMA_IN_SOP_CNT_SHIFT 0 ++ ++#define CQM_CQM_DMA_IN_EOP_CNT_REG_ADDR 0x6ee0 ++#define CQM_CQM_DMA_IN_EOP_CNT_MASK 0xffffffffffffffff ++#define CQM_CQM_DMA_IN_EOP_CNT_SHIFT 0 ++ ++#define CQM_CQM_DMA_IN_VLD_CNT_REG_ADDR 0x6f00 ++#define CQM_CQM_DMA_IN_VLD_CNT_MASK 0xffffffffffffffff ++#define CQM_CQM_DMA_IN_VLD_CNT_SHIFT 0 ++ ++#define CQM_CQM_DMA_REQ_CNT_REG_ADDR 0x6f20 ++#define CQM_CQM_DMA_REQ_CNT_MASK 0xffffffffffffffff ++#define CQM_CQM_DMA_REQ_CNT_SHIFT 0 ++ ++#define CQM_CQM_DMA_GNT_CNT_REG_ADDR 0x6f40 ++#define CQM_CQM_DMA_GNT_CNT_MASK 0xffffffffffffffff ++#define CQM_CQM_DMA_GNT_CNT_SHIFT 0 ++ ++#define CQM_CQM_DMA_ACK_VLD_CNT_REG_ADDR 0x6f60 ++#define CQM_CQM_DMA_ACK_VLD_CNT_MASK 0xffffffffffffffff ++#define CQM_CQM_DMA_ACK_VLD_CNT_SHIFT 0 ++ ++#define CQM_CQM_MER2CQM_VLD_CNT_REG_ADDR 0x6f80 ++#define CQM_CQM_MER2CQM_VLD_CNT_MASK 0xffffffffffffffff ++#define CQM_CQM_MER2CQM_VLD_CNT_SHIFT 0 ++ ++#endif +diff --git a/providers/xscale/dbrec.c b/providers/xscale/dbrec.c +new file mode 100644 +index 0000000..3987b88 +--- /dev/null ++++ b/providers/xscale/dbrec.c +@@ -0,0 +1,131 @@ ++/* ++ * Copyright (c) 2021 - 2022, Shanghai Yunsilicon Technology Co., Ltd. ++ * All rights reserved. ++ */ ++ ++#define _GNU_SOURCE ++#include ++ ++#include ++#include ++#include ++ ++#include "xscale.h" ++ ++struct xsc_db_page { ++ struct xsc_db_page *prev, *next; ++ struct xsc_buf buf; ++ int num_db; ++ int use_cnt; ++ unsigned long free[0]; ++}; ++ ++static struct xsc_db_page *__add_page(struct xsc_context *context) ++{ ++ struct xsc_db_page *page; ++ int ps = to_xdev(context->ibv_ctx.context.device)->page_size; ++ int pp; ++ int i; ++ int nlong; ++ int ret; ++ ++ pp = ps / context->cache_line_size; ++ nlong = (pp + 8 * sizeof(long) - 1) / (8 * sizeof(long)); ++ ++ page = malloc(sizeof *page + nlong * sizeof(long)); ++ if (!page) ++ return NULL; ++ ++ if (xsc_is_extern_alloc(context)) ++ ret = xsc_alloc_buf_extern(context, &page->buf, ps); ++ else ++ ret = xsc_alloc_buf(&page->buf, ps, ps); ++ if (ret) { ++ free(page); ++ return NULL; ++ } ++ ++ page->num_db = pp; ++ page->use_cnt = 0; ++ for (i = 0; i < nlong; ++i) ++ page->free[i] = ~0; ++ ++ page->prev = NULL; ++ page->next = context->db_list; ++ context->db_list = page; ++ if (page->next) ++ page->next->prev = page; ++ ++ return page; ++} ++ ++__be32 *xsc_alloc_dbrec(struct xsc_context *context) ++{ ++ struct xsc_db_page *page; ++ __be32 *db = NULL; ++ int i, j; ++ ++ pthread_mutex_lock(&context->db_list_mutex); ++ ++ for (page = context->db_list; page; page = page->next) ++ if (page->use_cnt < page->num_db) ++ goto found; ++ ++ page = __add_page(context); ++ if (!page) ++ goto out; ++ ++found: ++ ++page->use_cnt; ++ ++ for (i = 0; !page->free[i]; ++i) ++ /* nothing */; ++ ++ j = ffsl(page->free[i]); ++ --j; ++ page->free[i] &= ~(1UL << j); ++ db = page->buf.buf + (i * 8 * sizeof(long) + j) * context->cache_line_size; ++ ++out: ++ pthread_mutex_unlock(&context->db_list_mutex); ++ ++ return db; ++} ++ ++void xsc_free_db(struct xsc_context *context, __be32 *db) ++{ ++ struct xsc_db_page *page; ++ uintptr_t ps = to_xdev(context->ibv_ctx.context.device)->page_size; ++ int i; ++ ++ pthread_mutex_lock(&context->db_list_mutex); ++ ++ for (page = context->db_list; page; page = page->next) ++ if (((uintptr_t) db & ~(ps - 1)) == (uintptr_t) page->buf.buf) ++ break; ++ ++ if (!page) ++ goto out; ++ ++ i = ((void *) db - page->buf.buf) / context->cache_line_size; ++ page->free[i / (8 * sizeof(long))] |= 1UL << (i % (8 * sizeof(long))); ++ ++ if (!--page->use_cnt) { ++ if (page->prev) ++ page->prev->next = page->next; ++ else ++ context->db_list = page->next; ++ if (page->next) ++ page->next->prev = page->prev; ++ ++ if (page->buf.type == XSC_ALLOC_TYPE_EXTERNAL) ++ xsc_free_buf_extern(context, &page->buf); ++ else ++ xsc_free_buf(&page->buf); ++ ++ free(page); ++ } ++ ++out: ++ pthread_mutex_unlock(&context->db_list_mutex); ++} +diff --git a/providers/xscale/libxsc.map b/providers/xscale/libxsc.map +new file mode 100644 +index 0000000..005c161 +--- /dev/null ++++ b/providers/xscale/libxsc.map +@@ -0,0 +1,59 @@ ++/* Export symbols should be added below according to ++ Documentation/versioning.md document. */ ++XSC_1.0 { ++ global: ++ xscdv_query_device; ++ xscdv_init_obj; ++ local: *; ++}; ++ ++XSC_1.1 { ++ global: ++ xscdv_create_cq; ++} XSC_1.0; ++ ++XSC_1.2 { ++ global: ++ xscdv_init_obj; ++ xscdv_set_context_attr; ++} XSC_1.1; ++ ++XSC_1.3 { ++ global: ++ xscdv_create_qp; ++ xscdv_create_wq; ++} XSC_1.2; ++ ++XSC_1.4 { ++ global: ++ xscdv_get_clock_info; ++} XSC_1.3; ++ ++XSC_1.5 { ++ global: ++ xscdv_create_flow_action_esp; ++} XSC_1.4; ++ ++XSC_1.6 { ++ global: ++ xscdv_create_flow_matcher; ++ xscdv_destroy_flow_matcher; ++ xscdv_create_flow; ++} XSC_1.5; ++ ++XSC_1.7 { ++ global: ++ xscdv_create_flow_action_modify_header; ++ xscdv_create_flow_action_packet_reformat; ++ xscdv_devx_alloc_uar; ++ xscdv_devx_free_uar; ++ xscdv_devx_general_cmd; ++ xscdv_devx_obj_create; ++ xscdv_devx_obj_destroy; ++ xscdv_devx_obj_modify; ++ xscdv_devx_obj_query; ++ xscdv_devx_query_eqn; ++ xscdv_devx_umem_dereg; ++ xscdv_devx_umem_reg; ++ xscdv_open_device; ++} XSC_1.6; +diff --git a/providers/xscale/qp.c b/providers/xscale/qp.c +new file mode 100644 +index 0000000..04e87e2 +--- /dev/null ++++ b/providers/xscale/qp.c +@@ -0,0 +1,678 @@ ++/* ++ * Copyright (c) 2021 - 2022, Shanghai Yunsilicon Technology Co., Ltd. ++ * All rights reserved. ++ */ ++ ++#include ++ ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++ ++#include "xscale.h" ++#include "wqe.h" ++#include "xsc_hsi.h" ++ ++static const uint32_t xsc_ib_opcode[] = { ++ [IBV_WR_SEND] = XSC_MSG_OPCODE_SEND, ++ [IBV_WR_SEND_WITH_IMM] = XSC_MSG_OPCODE_SEND, ++ [IBV_WR_RDMA_WRITE] = XSC_MSG_OPCODE_RDMA_WRITE, ++ [IBV_WR_RDMA_WRITE_WITH_IMM] = XSC_MSG_OPCODE_RDMA_WRITE, ++ [IBV_WR_RDMA_READ] = XSC_MSG_OPCODE_RDMA_READ, ++ [IBV_WR_SEND_WITH_INV] = XSC_MSG_OPCODE_SEND, ++}; ++ ++static void *get_recv_wqe(struct xsc_qp *qp, int n) ++{ ++ return qp->rq_start + (n << qp->rq.wqe_shift); ++} ++ ++static void *get_wq_recv_wqe(struct xsc_rwq *rwq, int n) ++{ ++ return rwq->pbuff + (n << rwq->rq.wqe_shift); ++} ++ ++static void *get_seg_wqe(void *first, int n) ++{ ++ return first + (n << XSC_BASE_WQE_SHIFT); ++} ++ ++void *xsc_get_send_wqe(struct xsc_qp *qp, int n) ++{ ++ return qp->sq_start + (n << qp->sq.wqe_shift); ++} ++ ++void xsc_init_rwq_indices(struct xsc_rwq *rwq) ++{ ++ rwq->rq.head = 0; ++ rwq->rq.tail = 0; ++} ++ ++void xsc_init_qp_indices(struct xsc_qp *qp) ++{ ++ qp->sq.head = 0; ++ qp->sq.tail = 0; ++ qp->rq.head = 0; ++ qp->rq.tail = 0; ++ qp->sq.cur_post = 0; ++} ++ ++static int xsc_wq_overflow(struct xsc_wq *wq, int nreq, struct xsc_cq *cq) ++{ ++ unsigned cur; ++ ++ cur = wq->head - wq->tail; ++ if (cur + nreq < wq->max_post) ++ return 0; ++ ++ xsc_spin_lock(&cq->lock); ++ cur = wq->head - wq->tail; ++ xsc_spin_unlock(&cq->lock); ++ ++ return cur + nreq >= wq->max_post; ++} ++ ++static inline void set_remote_addr_seg(struct xsc_wqe_data_seg *remote_seg, ++ uint32_t msg_len, uint64_t remote_addr, uint32_t rkey) ++{ ++ WR_LE_32(remote_seg->seg_len, msg_len); ++ WR_LE_32(remote_seg->mkey, rkey); ++ WR_LE_64(remote_seg->va, remote_addr); ++} ++ ++static void set_local_data_seg(struct xsc_wqe_data_seg *data_seg, struct ibv_sge *sg) ++{ ++ WR_LE_32(data_seg->seg_len, sg->length); ++ WR_LE_32(data_seg->mkey, sg->lkey); ++ WR_LE_64(data_seg->va, sg->addr); ++} ++ ++static __be32 send_ieth(struct ibv_send_wr *wr) ++{ ++ switch (wr->opcode) { ++ case IBV_WR_SEND_WITH_IMM: ++ case IBV_WR_RDMA_WRITE_WITH_IMM: ++ return wr->imm_data; ++ default: ++ return 0; ++ } ++} ++ ++static int set_data_inl_seg(struct xsc_qp *qp, struct ibv_send_wr *wr, ++ struct xsc_send_wqe_ctrl_seg *ctrl) ++{ ++ void *data_seg; ++ unsigned seg_index; ++ void *addr; ++ int len = 0; ++ int i; ++ const int ds_len = sizeof(struct xsc_wqe_data_seg); ++ int left_len = 0; ++ int msg_len = ctrl->msg_len; ++ ++ if (wr->opcode == IBV_WR_SEND || wr->opcode == IBV_WR_SEND_WITH_IMM) ++ seg_index = 1; ++ else ++ seg_index = 2; ++ ++ if (unlikely(msg_len > qp->max_inline_data)) ++ return ENOMEM; ++ ++ for (i = 0; i < wr->num_sge; ++i) { ++ if (likely(wr->sg_list[i].length)) { ++ addr = (void*)wr->sg_list[i].addr; ++ len = wr->sg_list[i].length; ++ if (left_len > 0) { ++ int copy_len = min_t(int, len, left_len); ++ memcpy(data_seg, addr, copy_len); ++ addr += copy_len; ++ len -= copy_len; ++ } ++ ++ while (len >= ds_len) { ++ data_seg = get_seg_wqe(ctrl, seg_index); ++ seg_index++; ++ memcpy(data_seg, addr, ds_len); ++ addr += ds_len; ++ len -= ds_len; ++ } ++ ++ if (len > 0) { ++ data_seg = get_seg_wqe(ctrl, seg_index); ++ seg_index++; ++ memcpy(data_seg, addr, len); ++ data_seg += len; ++ left_len = ds_len - len; ++ } else { ++ left_len = 0; ++ } ++ } ++ } ++ ++ ctrl->ds_data_num = seg_index - 1; ++ ++ return 0; ++} ++ ++static void zero_send_ds(int idx, struct xsc_qp *qp) ++{ ++ void *seg; ++ uint64_t *uninitialized_var(p); ++ int i; ++ ++ seg = (void*)xsc_get_send_wqe(qp, idx); ++ for (i = 1; i < qp->sq.seg_cnt; i++) { ++ p = get_seg_wqe(seg, i); ++ p[0] = p[1] = 0; ++ } ++} ++ ++static void zero_recv_ds(int idx, struct xsc_qp *qp) ++{ ++ void *seg; ++ uint64_t *uninitialized_var(p); ++ int i; ++ ++ seg = (void*)get_recv_wqe(qp, idx); ++ for (i = 1; i < qp->rq.seg_cnt; i++) { ++ p = get_seg_wqe(seg, i); ++ p[0] = p[1] = 0; ++ } ++} ++ ++#ifdef XSC_DEBUG ++static void dump_wqe(int type, int idx, struct xsc_qp *qp) ++{ ++ /* type0 send type1 recv */ ++ uint32_t *uninitialized_var(p); ++ int i; ++ void *seg; ++ ++ if (type == 0) { ++ seg = (void*)xsc_get_send_wqe(qp, idx); ++ xsc_dbg(to_xctx(qp->ibv_qp->context)->dbg_fp, XSC_DBG_QP, ++ "dump send wqe at %p\n", seg); ++ for (i = 0; i < qp->sq.seg_cnt; i++) { ++ p = get_seg_wqe(seg, i); ++ xsc_dbg(to_xctx(qp->ibv_qp->context)->dbg_fp, XSC_DBG_QP, ++ "0x%08x 0x%08x 0x%08x 0x%08x\n", p[0], p[1], p[2], p[3]); ++ } ++ } else if (type == 1) { ++ seg = (void*)get_recv_wqe(qp, idx); ++ xsc_dbg(to_xctx(qp->ibv_qp->context)->dbg_fp, XSC_DBG_QP, ++ "dump recv wqe at %p\n", seg); ++ for (i = 0; i < qp->rq.seg_cnt; i++) { ++ p = get_seg_wqe(seg, i); ++ xsc_dbg(to_xctx(qp->ibv_qp->context)->dbg_fp, XSC_DBG_QP, ++ "0x%08x 0x%08x 0x%08x 0x%08x\n", p[0], p[1], p[2], p[3]); ++ } ++ } else { ++ xsc_dbg(to_xctx(qp->ibv_qp->context)->dbg_fp, XSC_DBG_QP, ++ "unknown type %d\n", type); ++ } ++} ++#else ++static inline void dump_wqe(int type, int idx, struct xsc_qp *qp) {}; ++#endif ++ ++static inline void xsc_post_send_db(struct xsc_qp *qp, int nreq) ++{ ++ uint16_t next_pid; ++ union xsc_db_data db; ++ ++ if (unlikely(!nreq)) ++ return; ++ ++ qp->sq.head += nreq; ++ next_pid = qp->sq.head << (qp->sq.wqe_shift - XSC_BASE_WQE_SHIFT); ++ db.sq_next_pid = next_pid; ++ db.sqn = qp->sqn; ++ /* ++ * Make sure that descriptors are written before ++ * updating doorbell record and ringing the doorbell ++ */ ++ xsc_dbg(to_xctx(qp->ibv_qp->context)->dbg_fp, XSC_DBG_QP_SEND, "nreq:%d\n", nreq); ++ udma_to_device_barrier(); ++ WR_REG(qp->sq.db, db.raw_data); ++} ++ ++static inline int _xsc_post_send(struct ibv_qp *ibqp, struct ibv_send_wr *wr, ++ struct ibv_send_wr **bad_wr) ++{ ++ struct xsc_qp *qp = to_xqp(ibqp); ++ void *seg; ++ struct xsc_send_wqe_ctrl_seg *ctrl; ++ struct xsc_wqe_data_seg *data_seg; ++ ++ int nreq; ++ int err = 0; ++ int i; ++ unsigned idx; ++ unsigned seg_index = 1; ++ unsigned msg_len = 0; ++ ++ if (unlikely(ibqp->state < IBV_QPS_RTS)) { ++ xsc_dbg(to_xctx(ibqp->context)->dbg_fp, XSC_DBG_QP_SEND, ++ "qp state is %u, should not post send\n", ibqp->state); ++ err = EINVAL; ++ *bad_wr = wr; ++ return err; ++ } ++ ++ xsc_spin_lock(&qp->sq.lock); ++ ++ for (nreq = 0; wr; ++nreq, wr = wr->next) { ++ seg_index = 1; ++ msg_len = 0; ++ if (unlikely(wr->opcode < 0 || ++ wr->opcode >= sizeof(xsc_ib_opcode) / sizeof(xsc_ib_opcode[0]))) { ++ xsc_dbg(to_xctx(ibqp->context)->dbg_fp, XSC_DBG_QP_SEND, ++ "bad opcode %d\n", wr->opcode); ++ err = EINVAL; ++ *bad_wr = wr; ++ goto out; ++ } ++ ++ if (unlikely(xsc_wq_overflow(&qp->sq, nreq, ++ to_xcq(qp->ibv_qp->send_cq)))) { ++ xsc_dbg(to_xctx(ibqp->context)->dbg_fp, XSC_DBG_QP_SEND, ++ "send work queue overflow\n"); ++ err = ENOMEM; ++ *bad_wr = wr; ++ goto out; ++ } ++ ++ if (unlikely(wr->num_sge > qp->sq.max_gs)) { ++ xsc_dbg(to_xctx(ibqp->context)->dbg_fp, XSC_DBG_QP_SEND, ++ "max gs exceeded %d (max = %d)\n", ++ wr->num_sge, qp->sq.max_gs); ++ err = ENOMEM; ++ *bad_wr = wr; ++ goto out; ++ } ++ ++ if (unlikely(wr->opcode == IBV_WR_RDMA_READ && wr->num_sge > 1)) { ++ xsc_dbg(to_xctx(ibqp->context)->dbg_fp, XSC_DBG_QP_SEND, ++ "rdma read, max gs exceeded %d (max = 1)\n", ++ wr->num_sge); ++ err = ENOMEM; ++ *bad_wr = wr; ++ goto out; ++ } ++ ++ idx = qp->sq.cur_post & (qp->sq.wqe_cnt - 1); ++ zero_send_ds(idx, qp); ++ ctrl = seg = xsc_get_send_wqe(qp, idx); ++ ctrl->ds_data_num = 0; ++ WR_LE_16(ctrl->wqe_id, ++ qp->sq.cur_post << (qp->sq.wqe_shift - XSC_BASE_WQE_SHIFT)); ++ ctrl->se = wr->send_flags & IBV_SEND_SOLICITED ? 1 : 0; ++ ctrl->ce = qp->sq_signal_bits ? 1 : (wr->send_flags & IBV_SEND_SIGNALED ? 1 : 0); ++ ctrl->in_line = wr->send_flags & IBV_SEND_INLINE ? 1 : 0; ++ for (i = 0; i < wr->num_sge; ++i) { ++ if (likely(wr->sg_list[i].length)) { ++ msg_len += wr->sg_list[i].length; ++ } ++ } ++ ctrl->msg_len = msg_len; ++ ctrl->with_immdt = 0; ++ ++ if (unlikely(wr->opcode == IBV_WR_RDMA_READ && msg_len == 0)) { ++ xsc_dbg(to_xctx(ibqp->context)->dbg_fp, XSC_DBG_QP_SEND, ++ "rdma read, msg len should not be 0\n"); ++ /* workaround, return success for posting zero-length read */ ++ err = 0; ++ goto out; ++ } ++ ++ switch (ibqp->qp_type) { ++ case IBV_QPT_RC: ++ switch (wr->opcode) { ++ case IBV_WR_SEND_WITH_INV: ++ case IBV_WR_SEND: ++ break; ++ case IBV_WR_SEND_WITH_IMM: ++ ctrl->with_immdt = 1; ++ ctrl->opcode_data = send_ieth(wr); ++ break; ++ case IBV_WR_RDMA_WRITE_WITH_IMM: ++ ctrl->with_immdt = 1; ++ ctrl->opcode_data = send_ieth(wr); ++ SWITCH_FALLTHROUGH; ++ case IBV_WR_RDMA_READ: ++ case IBV_WR_RDMA_WRITE: ++ if (ctrl->msg_len == 0) ++ break; ++ ctrl->ds_data_num++; ++ data_seg = get_seg_wqe(ctrl, seg_index); ++ set_remote_addr_seg( ++ data_seg, ++ msg_len, ++ wr->wr.rdma.remote_addr, ++ wr->wr.rdma.rkey); ++ seg_index++; ++ break; ++ default: ++ printf("debug: opcode:%u NOT supported\n", wr->opcode); ++ err = EPERM; ++ *bad_wr = wr; ++ goto out; ++ } ++ break; ++ default: ++ xsc_dbg(to_xctx(ibqp->context)->dbg_fp, XSC_DBG_QP_SEND, ++ "qp type:%u NOT supported\n", ibqp->qp_type); ++ err = EPERM; ++ *bad_wr = wr; ++ goto out; ++ } ++ ++ if (wr->send_flags & IBV_SEND_INLINE && wr->num_sge) { ++ err = set_data_inl_seg(qp, wr, ctrl); ++ if (unlikely(err)) { ++ *bad_wr = wr; ++ xsc_dbg(to_xctx(ibqp->context)->dbg_fp, XSC_DBG_QP_SEND, ++ "inline layout failed, err %d\n", err); ++ goto out; ++ } ++ } else { ++ for (i = 0; i < wr->num_sge; ++i, ++seg_index) { ++ if (likely(wr->sg_list[i].length)) { ++ data_seg = get_seg_wqe(ctrl, seg_index); ++ set_local_data_seg(data_seg, &wr->sg_list[i]); ++ ctrl->ds_data_num++; ++ } ++ } ++ } ++ ++ ctrl->msg_opcode = xsc_ib_opcode[wr->opcode]; ++ if (ctrl->msg_len == 0) { ++ ctrl->ds_data_num = 0; ++ zero_send_ds(idx, qp); ++ } ++ qp->sq.wrid[idx] = wr->wr_id; ++ qp->sq.wqe_head[idx] = qp->sq.head + nreq; ++ qp->sq.cur_post += 1; ++ if (ctrl->ce) { ++ qp->sq.flush_wqe_cnt++; ++ qp->sq.need_flush[idx] = 1; ++ } ++ qp->sq.wr_opcode[idx] = wr->opcode; ++ ++ if (xsc_debug_mask & XSC_DBG_QP_SEND) ++ dump_wqe(0, idx, qp); ++ } ++ ++out: ++ xsc_post_send_db(qp, nreq); ++ xsc_spin_unlock(&qp->sq.lock); ++ ++ return err; ++} ++ ++int xsc_post_send(struct ibv_qp *ibqp, struct ibv_send_wr *wr, ++ struct ibv_send_wr **bad_wr) ++{ ++ return _xsc_post_send(ibqp, wr, bad_wr); ++} ++ ++static void set_wq_sig_seg(struct xsc_rwq *rwq, struct xsc_rwqe_sig *sig, ++ int size, uint16_t idx) ++{ ++ uint8_t sign; ++ uint32_t qpn = rwq->wq.wq_num; ++ ++ sign = calc_sig(sig, size); ++ sign ^= calc_sig(&qpn, 4); ++ sign ^= calc_sig(&idx, 2); ++ sig->signature = sign; ++} ++ ++int xsc_post_wq_recv(struct ibv_wq *ibwq, struct ibv_recv_wr *wr, ++ struct ibv_recv_wr **bad_wr) ++{ ++ struct xsc_rwq *rwq = to_xrwq(ibwq); ++ struct xsc_wqe_data_seg *scat; ++ int err = 0; ++ int nreq; ++ int ind; ++ int i, j; ++ struct xsc_rwqe_sig *sig; ++ ++ xsc_spin_lock(&rwq->rq.lock); ++ ++ ind = rwq->rq.head & (rwq->rq.wqe_cnt - 1); ++ ++ for (nreq = 0; wr; ++nreq, wr = wr->next) { ++ if (unlikely(xsc_wq_overflow(&rwq->rq, nreq, ++ to_xcq(rwq->wq.cq)))) { ++ err = ENOMEM; ++ *bad_wr = wr; ++ goto out; ++ } ++ ++ if (unlikely(wr->num_sge > rwq->rq.max_gs)) { ++ err = EINVAL; ++ *bad_wr = wr; ++ goto out; ++ } ++ ++ scat = get_wq_recv_wqe(rwq, ind); ++ sig = (struct xsc_rwqe_sig *)scat; ++ if (unlikely(rwq->wq_sig)) { ++ memset(sig, 0, 1 << rwq->rq.wqe_shift); ++ ++scat; ++ } ++ ++ for (i = 0, j = 0; i < wr->num_sge; ++i) { ++ if (unlikely(!wr->sg_list[i].length)) ++ continue; ++ //set_data_ptr_seg(scat + j++, wr->sg_list + i); ++ } ++ ++ if (j < rwq->rq.max_gs) { ++ scat[j].seg_len = 0; ++ scat[j].mkey = htole32(XSC_INVALID_LKEY); ++ scat[j].va = 0; ++ } ++ ++ if (unlikely(rwq->wq_sig)) ++ set_wq_sig_seg(rwq, sig, (wr->num_sge + 1) << 4, ++ rwq->rq.head & 0xffff); ++ ++ rwq->rq.wrid[ind] = wr->wr_id; ++ ++ ind = (ind + 1) & (rwq->rq.wqe_cnt - 1); ++ rwq->rq.flush_wqe_cnt++; ++ } ++ ++out: ++ if (likely(nreq)) { ++ rwq->rq.head += nreq; ++ /* ++ * Make sure that descriptors are written before ++ * doorbell record. ++ */ ++ udma_to_device_barrier(); ++ *(rwq->recv_db) = htobe32(rwq->rq.head & 0xffff); ++ } ++ ++ xsc_spin_unlock(&rwq->rq.lock); ++ ++ return err; ++} ++ ++int xsc_post_recv(struct ibv_qp *ibqp, struct ibv_recv_wr *wr, ++ struct ibv_recv_wr **bad_wr) ++{ ++ struct xsc_qp *qp = to_xqp(ibqp); ++ struct xsc_wqe_data_seg *recv_head; ++ struct xsc_wqe_data_seg *data_seg; ++ int err = 0; ++ uint16_t next_pid = 0; ++ union xsc_db_data db; ++ int nreq; ++ uint16_t idx; ++ int i; ++ ++ xsc_spin_lock(&qp->rq.lock); ++ ++ idx = qp->rq.head & (qp->rq.wqe_cnt - 1); ++ ++ zero_recv_ds(idx, qp); ++ for (nreq = 0; wr; ++nreq, wr = wr->next) { ++ if (unlikely(xsc_wq_overflow(&qp->rq, nreq, ++ to_xcq(qp->ibv_qp->recv_cq)))) { ++ printf("recv work queue overflow\n"); ++ err = ENOMEM; ++ *bad_wr = wr; ++ goto out; ++ } ++ ++ if (unlikely(wr->num_sge > qp->rq.max_gs)) { ++ printf("max gs exceeded %d (max = %d)\n", ++ wr->num_sge, qp->rq.max_gs); ++ err = EINVAL; ++ *bad_wr = wr; ++ goto out; ++ } ++ ++ recv_head = get_recv_wqe(qp, idx); ++ ++ for (i = 0; i < wr->num_sge; ++i) { ++ if (unlikely(!wr->sg_list[i].length)) ++ continue; ++ data_seg = get_seg_wqe(recv_head, i); ++ WR_LE_32(data_seg->seg_len, wr->sg_list[i].length); ++ WR_LE_32(data_seg->mkey, wr->sg_list[i].lkey); ++ WR_LE_64(data_seg->va, wr->sg_list[i].addr); ++ } ++ ++ qp->rq.wrid[idx] = wr->wr_id; ++ ++ dump_wqe(1, idx, qp); ++ idx = (idx + 1) & (qp->rq.wqe_cnt - 1); ++ qp->rq.flush_wqe_cnt++; ++ } ++ ++out: ++ if (likely(nreq)) { ++ qp->rq.head += nreq; ++ next_pid = qp->rq.head << (qp->rq.wqe_shift - XSC_BASE_WQE_SHIFT); ++ db.rq_next_pid = next_pid; ++ db.rqn = qp->rqn; ++ ++ /* ++ * Make sure that descriptors are written before ++ * doorbell record. ++ */ ++ udma_to_device_barrier(); ++ WR_REG(qp->rq.db, db.raw_data); ++ } ++ ++ xsc_spin_unlock(&qp->rq.lock); ++ ++ return err; ++} ++ ++int xsc_use_huge(const char *key) ++{ ++ char *e; ++ e = getenv(key); ++ if (e && !strcmp(e, "y")) ++ return 1; ++ ++ return 0; ++} ++ ++struct xsc_qp *xsc_find_qp(struct xsc_context *ctx, uint32_t qpn) ++{ ++ int tind = qpn >> XSC_QP_TABLE_SHIFT; ++ ++ if (ctx->qp_table[tind].refcnt) ++ return ctx->qp_table[tind].table[qpn & XSC_QP_TABLE_MASK]; ++ else ++ return NULL; ++} ++ ++int xsc_store_qp(struct xsc_context *ctx, uint32_t qpn, struct xsc_qp *qp) ++{ ++ int tind = qpn >> XSC_QP_TABLE_SHIFT; ++ ++ if (!ctx->qp_table[tind].refcnt) { ++ ctx->qp_table[tind].table = calloc(XSC_QP_TABLE_MASK + 1, ++ sizeof(struct xsc_qp *)); ++ if (!ctx->qp_table[tind].table) ++ return -1; ++ } ++ ++ ++ctx->qp_table[tind].refcnt; ++ ctx->qp_table[tind].table[qpn & XSC_QP_TABLE_MASK] = qp; ++ return 0; ++} ++ ++void xsc_clear_qp(struct xsc_context *ctx, uint32_t qpn) ++{ ++ int tind = qpn >> XSC_QP_TABLE_SHIFT; ++ ++ if (!--ctx->qp_table[tind].refcnt) ++ free(ctx->qp_table[tind].table); ++ else ++ ctx->qp_table[tind].table[qpn & XSC_QP_TABLE_MASK] = NULL; ++} ++ ++int xsc_err_state_qp(struct ibv_qp *qp, enum ibv_qp_state cur_state, ++ enum ibv_qp_state state) ++{ ++ struct xsc_err_state_qp_node *tmp, *err_rq_node, *err_sq_node; ++ struct xsc_qp *xqp = to_xqp(qp); ++ int ret = 0; ++ ++ xsc_dbg(to_xctx(qp->context)->dbg_fp, XSC_DBG_QP, ++ "modify qp: qpid %d, cur_qp_state %d, qp_state %d\n", xqp->rsc.rsn, cur_state, state); ++ if (cur_state == IBV_QPS_ERR && state != IBV_QPS_ERR) { ++ if(qp->recv_cq) { ++ list_for_each_safe(&to_xcq(qp->recv_cq)->err_state_qp_list, err_rq_node, tmp, entry) { ++ if (err_rq_node->qp_id == xqp->rsc.rsn) { ++ list_del(&err_rq_node->entry); ++ free(err_rq_node); ++ } ++ } ++ } ++ ++ if(qp->send_cq) { ++ list_for_each_safe(&to_xcq(qp->send_cq)->err_state_qp_list, err_sq_node, tmp, entry) { ++ if (err_sq_node->qp_id == xqp->rsc.rsn) { ++ list_del(&err_sq_node->entry); ++ free(err_sq_node); ++ } ++ } ++ } ++ return ret; ++ } ++ ++ if (cur_state != IBV_QPS_ERR && state == IBV_QPS_ERR) { ++ if(qp->recv_cq) { ++ err_rq_node = calloc(1, sizeof(*err_rq_node)); ++ if (!err_rq_node) ++ return ENOMEM; ++ err_rq_node->qp_id = xqp->rsc.rsn; ++ err_rq_node->is_sq = false; ++ list_add_tail(&to_xcq(qp->recv_cq)->err_state_qp_list, &err_rq_node->entry); ++ } ++ ++ if(qp->send_cq) { ++ err_sq_node = calloc(1, sizeof(*err_sq_node)); ++ if (!err_sq_node) ++ return ENOMEM; ++ err_sq_node->qp_id = xqp->rsc.rsn; ++ err_sq_node->is_sq = true; ++ list_add_tail(&to_xcq(qp->send_cq)->err_state_qp_list, &err_sq_node->entry); ++ } ++ } ++ return ret; ++} +diff --git a/providers/xscale/rqm_csr_defines.h b/providers/xscale/rqm_csr_defines.h +new file mode 100644 +index 0000000..9552855 +--- /dev/null ++++ b/providers/xscale/rqm_csr_defines.h +@@ -0,0 +1,200 @@ ++#ifndef _RQM_CSR_DEFINES_H_ ++#define _RQM_CSR_DEFINES_H_ ++ ++#define RQM_SOFT_RESET_REG_ADDR 0x5000 ++#define RQM_SOFT_RESET_MASK 0x1 ++#define RQM_SOFT_RESET_SHIFT 0 ++ ++#define RQM_COUNTER_CONFIG_REG_ADDR 0x5020 ++#define RQM_CFG_CNT_WRAP_MASK 0x1 ++#define RQM_CFG_CNT_WRAP_SHIFT 0 ++#define RQM_CFG_CNT_RC_MASK 0x2 ++#define RQM_CFG_CNT_RC_SHIFT 1 ++ ++#define RQM_SCRATCH_PAD_REG_ADDR 0x5040 ++#define RQM_SCRATCH_PAD_MASK 0xffffffffffffffff ++#define RQM_SCRATCH_PAD_SHIFT 0 ++ ++#define RQM_RQM_CONFIG_REG_RING_ADDR_ARRAY_ADDR 0x5060 ++#define RQM_RQM_CONFIG_REG_RING_ADDR_ARRAY_SIZE 8 ++#define RQM_RQM_CONFIG_REG_RING_ADDR_ARRAY_STRIDE 0x20 ++#define RQM_CFG_CPU2RQM_RING_ADDR_MASK 0xffffffffffffffff ++#define RQM_CFG_CPU2RQM_RING_ADDR_SHIFT 0 ++ ++#define RQM_RQM_CONFIG_REG_RING_SIZE_ARRAY_ADDR 0x5160 ++#define RQM_RQM_CONFIG_REG_RING_SIZE_ARRAY_SIZE 8 ++#define RQM_RQM_CONFIG_REG_RING_SIZE_ARRAY_STRIDE 0x20 ++#define RQM_CFG_CPU2RQM_RING_SIZE_MASK 0xffff ++#define RQM_CFG_CPU2RQM_RING_SIZE_SHIFT 0 ++ ++#define RQM_RQM_CONFIG_REG_NEXT_PID_ARRAY_ADDR 0x5260 ++#define RQM_RQM_CONFIG_REG_NEXT_PID_ARRAY_SIZE 8 ++#define RQM_RQM_CONFIG_REG_NEXT_PID_ARRAY_STRIDE 0x20 ++#define RQM_CFG_CPU2RQM_NEXT_PID_MASK 0xffff ++#define RQM_CFG_CPU2RQM_NEXT_PID_SHIFT 0 ++ ++#define RQM_RQM_CONFIG_REG_CFG_EN_ARRAY_ADDR 0x5360 ++#define RQM_RQM_CONFIG_REG_CFG_EN_ARRAY_SIZE 8 ++#define RQM_RQM_CONFIG_REG_CFG_EN_ARRAY_STRIDE 0x20 ++#define RQM_CFG_CPU2RQM_CFG_EN_MASK 0x1 ++#define RQM_CFG_CPU2RQM_CFG_EN_SHIFT 0 ++ ++#define RQM_RQM_STATUS_REG_ARRAY_ADDR 0x5460 ++#define RQM_RQM_STATUS_REG_ARRAY_SIZE 8 ++#define RQM_RQM_STATUS_REG_ARRAY_STRIDE 0x20 ++#define RQM_STS_RQM2CPU_DONE_CID_MASK 0xffff ++#define RQM_STS_RQM2CPU_DONE_CID_SHIFT 0 ++ ++#define RQM_RQM_CONFIG_MER_QPID_FIFO_TH_ADDR 0x5560 ++#define RQM_CFG_CPU2RQM_MER_QPID_FIFO_AMTY_TH_MASK 0x1f ++#define RQM_CFG_CPU2RQM_MER_QPID_FIFO_AMTY_TH_SHIFT 0 ++#define RQM_CFG_CPU2RQM_MER_QPID_FIFO_AFUL_TH_MASK 0x3e0 ++#define RQM_CFG_CPU2RQM_MER_QPID_FIFO_AFUL_TH_SHIFT 5 ++ ++#define RQM_RQM_CONFIG_DMA_QPID_FIFO_TH_ADDR 0x5580 ++#define RQM_CFG_CPU2RQM_DMA_QPID_FIFO_AMTY_TH_MASK 0x1f ++#define RQM_CFG_CPU2RQM_DMA_QPID_FIFO_AMTY_TH_SHIFT 0 ++#define RQM_CFG_CPU2RQM_DMA_QPID_FIFO_AFUL_TH_MASK 0x3e0 ++#define RQM_CFG_CPU2RQM_DMA_QPID_FIFO_AFUL_TH_SHIFT 5 ++ ++#define RQM_RQM_CONFIG_PTR_QPID_FIFO_TH_ADDR 0x55a0 ++#define RQM_CFG_CPU2RQM_PTR_QPID_FIFO_AMTY_TH_MASK 0x1f ++#define RQM_CFG_CPU2RQM_PTR_QPID_FIFO_AMTY_TH_SHIFT 0 ++#define RQM_CFG_CPU2RQM_PTR_QPID_FIFO_AFUL_TH_MASK 0x3e0 ++#define RQM_CFG_CPU2RQM_PTR_QPID_FIFO_AFUL_TH_SHIFT 5 ++ ++#define RQM_RQM_CONFIG_WQE_FIFO_AMTY_TH_ADDR 0x55c0 ++#define RQM_RQM_CONFIG_WQE_FIFO_AMTY_TH_SIZE 8 ++#define RQM_RQM_CONFIG_WQE_FIFO_AMTY_TH_STRIDE 0x20 ++#define RQM_CFG_CPU2RQM_WQE_FIFO_AMTY_TH_MASK 0x1f ++#define RQM_CFG_CPU2RQM_WQE_FIFO_AMTY_TH_SHIFT 0 ++ ++#define RQM_RQM_CONFIG_WQE_FIFO_AFUL_TH_ADDR 0x56c0 ++#define RQM_RQM_CONFIG_WQE_FIFO_AFUL_TH_SIZE 8 ++#define RQM_RQM_CONFIG_WQE_FIFO_AFUL_TH_STRIDE 0x20 ++#define RQM_CFG_CPU2RQM_WQE_FIFO_AFUL_TH_MASK 0x1f ++#define RQM_CFG_CPU2RQM_WQE_FIFO_AFUL_TH_SHIFT 0 ++ ++#define RQM_RQM_INT_STATE_REG_ADDR 0x57c0 ++#define RQM_RQM_WQE_FIFO_OVFL_ERR_MASK 0xff ++#define RQM_RQM_WQE_FIFO_OVFL_ERR_SHIFT 0 ++#define RQM_RQM_WQE_FIFO_UNFL_ERR_MASK 0xff00 ++#define RQM_RQM_WQE_FIFO_UNFL_ERR_SHIFT 8 ++#define RQM_RQM_NO_WQE_ERR_MASK 0xff0000 ++#define RQM_RQM_NO_WQE_ERR_SHIFT 16 ++ ++#define RQM_RQM_FIFO_USED_CNT_REG_ADDR 0x57e0 ++#define RQM_RQM_FIFO_USED_CNT_REG_SIZE 8 ++#define RQM_RQM_FIFO_USED_CNT_REG_STRIDE 0x20 ++#define RQM_RQM_WQE_FIFO_USED_CNT_MASK 0xf ++#define RQM_RQM_WQE_FIFO_USED_CNT_SHIFT 0 ++ ++#define RQM_RQM_CMD_FIFO_STATE_REG_ADDR 0x58e0 ++#define RQM_RQM_WQE_FIFO_MTY_MASK 0xff ++#define RQM_RQM_WQE_FIFO_MTY_SHIFT 0 ++#define RQM_RQM_WQE_FIFO_FUL_MASK 0xff00 ++#define RQM_RQM_WQE_FIFO_FUL_SHIFT 8 ++ ++#define RQM_RQM_OTH_FIFO_STATE_REG_ADDR 0x5900 ++#define RQM_RQM_OTH_FIFO_MTY_MASK 0x7 ++#define RQM_RQM_OTH_FIFO_MTY_SHIFT 0 ++#define RQM_RQM_OTH_FIFO_AFUL_MASK 0x38 ++#define RQM_RQM_OTH_FIFO_AFUL_SHIFT 3 ++#define RQM_RQM_OTH_FIFO_OVFL_ERR_MASK 0x1c0 ++#define RQM_RQM_OTH_FIFO_OVFL_ERR_SHIFT 6 ++#define RQM_RQM_OTH_FIFO_UNFL_ERR_MASK 0xe00 ++#define RQM_RQM_OTH_FIFO_UNFL_ERR_SHIFT 9 ++ ++#define RQM_RQM_OTHERS_FIFO_USED_CNT_REG_ADDR 0x5920 ++#define RQM_RQM_MER_REQ_FIFO_USED_CNT_MASK 0xf ++#define RQM_RQM_MER_REQ_FIFO_USED_CNT_SHIFT 0 ++#define RQM_RQM_DMA_REQ_FIFO_USED_CNT_MASK 0xf0 ++#define RQM_RQM_DMA_REQ_FIFO_USED_CNT_SHIFT 4 ++#define RQM_RQM_PTR_REQ_FIFO_USED_CNT_MASK 0xf00 ++#define RQM_RQM_PTR_REQ_FIFO_USED_CNT_SHIFT 8 ++ ++#define RQM_RQM_DEBUG_INFO_STATE_REG_0_ADDR 0x5940 ++#define RQM_RQM2MER_DBG_OPCODE_MASK 0xff ++#define RQM_RQM2MER_DBG_OPCODE_SHIFT 0 ++#define RQM_RQM2MER_DBG_TX0_RX1_MASK 0x100 ++#define RQM_RQM2MER_DBG_TX0_RX1_SHIFT 8 ++#define RQM_RQM2MER_DBG_CAP_MASK 0x200 ++#define RQM_RQM2MER_DBG_CAP_SHIFT 9 ++#define RQM_RQM2MER_DBG_L_QPID_MASK 0x1c00 ++#define RQM_RQM2MER_DBG_L_QPID_SHIFT 10 ++#define RQM_RQM2MER_DBG_SN_MASK 0x1fffffe000 ++#define RQM_RQM2MER_DBG_SN_SHIFT 13 ++ ++#define RQM_RQM_DEBUG_INFO_STATE_REG_1_ADDR 0x5960 ++#define RQM_RQM2MER_DBG_MOD_IF_BM_MASK 0xffffffffffffffff ++#define RQM_RQM2MER_DBG_MOD_IF_BM_SHIFT 0 ++ ++#define RQM_RQM_DEBUG_INFO_STATE_REG_2_ADDR 0x5980 ++#define RQM_RQM2MER_DBG_RQM2MER_VLD_CNT_MASK 0xffffffff ++#define RQM_RQM2MER_DBG_RQM2MER_VLD_CNT_SHIFT 0 ++#define RQM_RQM2MER_DBG_DD2RQM_DATA_VLD_CNT_MASK 0xffffffff00000000 ++#define RQM_RQM2MER_DBG_DD2RQM_DATA_VLD_CNT_SHIFT 32 ++ ++#define RQM_RQM_DMA_REQ_LEN_STATE_REG_ADDR 0x59a0 ++#define RQM_RQM_DMA_REQ_LEN_MASK 0x3ff ++#define RQM_RQM_DMA_REQ_LEN_SHIFT 0 ++ ++#define RQM_RQM_DMA_REQ_ADDR_STATE_REG_ADDR 0x59c0 ++#define RQM_RQM_DMA_REQ_ADDR_MASK 0xffffffffffffffff ++#define RQM_RQM_DMA_REQ_ADDR_SHIFT 0 ++ ++#define RQM_RQM_WQE_WQEID_ADDR 0x59e0 ++#define RQM_RQM_WQE_WQEID_MASK 0xffff ++#define RQM_RQM_WQE_WQEID_SHIFT 0 ++ ++#define RQM_RQM_WQE_RECV_LEN_ADDR 0x5a00 ++#define RQM_RQM_WQE_REC_LEN_MASK 0x7fffffff ++#define RQM_RQM_WQE_REC_LEN_SHIFT 0 ++ ++#define RQM_RQM_WQE_LOCAL_VA_ADDR 0x5a20 ++#define RQM_RQM_WQE_L_VA_MASK 0xffffffffffffffff ++#define RQM_RQM_WQE_L_VA_SHIFT 0 ++ ++#define RQM_RQM_WQE_LOCAL_KEY_ADDR 0x5a40 ++#define RQM_RQM_WQE_L_KEY_MASK 0xffffffff ++#define RQM_RQM_WQE_L_KEY_SHIFT 0 ++ ++#define RQM_MER_RQM_WQE_QPID_ADDR 0x5a60 ++#define RQM_RQM_WQE_QPID_MASK 0x7 ++#define RQM_RQM_WQE_QPID_SHIFT 0 ++ ++#define RQM_RQM_STATUS_NEXT_CID_REG_ARRAY_ADDR 0x5a80 ++#define RQM_RQM_STATUS_NEXT_CID_REG_ARRAY_SIZE 8 ++#define RQM_RQM_STATUS_NEXT_CID_REG_ARRAY_STRIDE 0x20 ++#define RQM_RQM_NEXT_CID_MASK 0xffff ++#define RQM_RQM_NEXT_CID_SHIFT 0 ++ ++#define RQM_RQM_DMA_IN_SOP_CNT_REG_ADDR 0x5b80 ++#define RQM_RQM_DMA_IN_SOP_CNT_MASK 0xffffffffffffffff ++#define RQM_RQM_DMA_IN_SOP_CNT_SHIFT 0 ++ ++#define RQM_RQM_DMA_IN_EOP_CNT_REG_ADDR 0x5ba0 ++#define RQM_RQM_DMA_IN_EOP_CNT_MASK 0xffffffffffffffff ++#define RQM_RQM_DMA_IN_EOP_CNT_SHIFT 0 ++ ++#define RQM_RQM_DMA_IN_VLD_CNT_REG_ADDR 0x5bc0 ++#define RQM_RQM_DMA_IN_VLD_CNT_MASK 0xffffffffffffffff ++#define RQM_RQM_DMA_IN_VLD_CNT_SHIFT 0 ++ ++#define RQM_RQM_DMA_REQ_CNT_REG_ADDR 0x5be0 ++#define RQM_RQM_DMA_REQ_CNT_MASK 0xffffffffffffffff ++#define RQM_RQM_DMA_REQ_CNT_SHIFT 0 ++ ++#define RQM_RQM_DMA_GNT_CNT_REG_ADDR 0x5c00 ++#define RQM_RQM_DMA_GNT_CNT_MASK 0xffffffffffffffff ++#define RQM_RQM_DMA_GNT_CNT_SHIFT 0 ++ ++#define RQM_RQM_MER_VLD_CNT_REG_ADDR 0x5c20 ++#define RQM_RQM_MER_VLD_CNT_MASK 0xffffffffffffffff ++#define RQM_RQM_MER_VLD_CNT_SHIFT 0 ++ ++#define RQM_RQM_MER_REQ_CNT_REG_ADDR 0x5c40 ++#define RQM_RQM_MER_REQ_CNT_MASK 0xffffffffffffffff ++#define RQM_RQM_MER_REQ_CNT_SHIFT 0 ++ ++#endif +diff --git a/providers/xscale/sqm_csr_defines.h b/providers/xscale/sqm_csr_defines.h +new file mode 100644 +index 0000000..e0dc6e9 +--- /dev/null ++++ b/providers/xscale/sqm_csr_defines.h +@@ -0,0 +1,204 @@ ++#ifndef _SQM_CSR_DEFINES_H_ ++#define _SQM_CSR_DEFINES_H_ ++ ++#define SQM_SOFT_RESET_REG_ADDR 0x4000 ++#define SQM_SOFT_RESET_MASK 0x1 ++#define SQM_SOFT_RESET_SHIFT 0 ++ ++#define SQM_COUNTER_CONFIG_REG_ADDR 0x4020 ++#define SQM_CFG_CNT_WRAP_MASK 0x1 ++#define SQM_CFG_CNT_WRAP_SHIFT 0 ++#define SQM_CFG_CNT_RC_MASK 0x2 ++#define SQM_CFG_CNT_RC_SHIFT 1 ++ ++#define SQM_SCRATCH_PAD_REG_ADDR 0x4040 ++#define SQM_SCRATCH_PAD_MASK 0xffffffffffffffff ++#define SQM_SCRATCH_PAD_SHIFT 0 ++ ++#define SQM_SQM_CONFIG_REG_RING_ADDR_ARRAY_ADDR 0x4060 ++#define SQM_SQM_CONFIG_REG_RING_ADDR_ARRAY_SIZE 8 ++#define SQM_SQM_CONFIG_REG_RING_ADDR_ARRAY_STRIDE 0x20 ++#define SQM_CFG_CPU2SQM_RING_ADDR_MASK 0xffffffffffffffff ++#define SQM_CFG_CPU2SQM_RING_ADDR_SHIFT 0 ++ ++#define SQM_SQM_CONFIG_REG_RING_SIZE_ARRAY_ADDR 0x4160 ++#define SQM_SQM_CONFIG_REG_RING_SIZE_ARRAY_SIZE 8 ++#define SQM_SQM_CONFIG_REG_RING_SIZE_ARRAY_STRIDE 0x20 ++#define SQM_CFG_CPU2SQM_RING_SIZE_MASK 0xffff ++#define SQM_CFG_CPU2SQM_RING_SIZE_SHIFT 0 ++ ++#define SQM_SQM_CONFIG_REG_ARRAY_ADDR 0x4260 ++#define SQM_SQM_CONFIG_REG_ARRAY_SIZE 8 ++#define SQM_SQM_CONFIG_REG_ARRAY_STRIDE 0x20 ++#define SQM_CFG_CPU2SQM_NEXT_PID_MASK 0xffff ++#define SQM_CFG_CPU2SQM_NEXT_PID_SHIFT 0 ++ ++#define SQM_SQM_CONFIG_REG_CFG_EN_ARRAY_ADDR 0x4360 ++#define SQM_SQM_CONFIG_REG_CFG_EN_ARRAY_SIZE 8 ++#define SQM_SQM_CONFIG_REG_CFG_EN_ARRAY_STRIDE 0x20 ++#define SQM_CFG_CPU2SQM_CFG_EN_MASK 0x1 ++#define SQM_CFG_CPU2SQM_CFG_EN_SHIFT 0 ++ ++#define SQM_SQM_STATUS_REG_DONE_CID_ARRAY_ADDR 0x4460 ++#define SQM_SQM_STATUS_REG_DONE_CID_ARRAY_SIZE 8 ++#define SQM_SQM_STATUS_REG_DONE_CID_ARRAY_STRIDE 0x20 ++#define SQM_STS_SQM2CPU_DONE_CID_MASK 0xffff ++#define SQM_STS_SQM2CPU_DONE_CID_SHIFT 0 ++ ++#define SQM_SQM_CFG_WQE_FIFO_TH_ADDR 0x4560 ++#define SQM_CFG_CPU2SQM_WQE_FIFO_AFUL_TH_MASK 0xff ++#define SQM_CFG_CPU2SQM_WQE_FIFO_AFUL_TH_SHIFT 0 ++#define SQM_CFG_CPU2SQM_WQE_FIFO_AMTY_TH_MASK 0xff00 ++#define SQM_CFG_CPU2SQM_WQE_FIFO_AMTY_TH_SHIFT 8 ++ ++#define SQM_SQM_CONFIG_DBG_FIFO_REG_CFG_ADDR 0x4580 ++#define SQM_CFG_CPU2SQM_DBG_FIFO_AFUL_TH_MASK 0xff ++#define SQM_CFG_CPU2SQM_DBG_FIFO_AFUL_TH_SHIFT 0 ++#define SQM_CFG_CPU2SQM_DBG_FIFO_AMTY_TH_MASK 0xff00 ++#define SQM_CFG_CPU2SQM_DBG_FIFO_AMTY_TH_SHIFT 8 ++ ++#define SQM_SQM_CONFIG_QPID_W_FIFO_REG_CFG_ADDR 0x45a0 ++#define SQM_CFG_CPU2SQM_QPID_W_FIFO_AFUL_TH_MASK 0xff ++#define SQM_CFG_CPU2SQM_QPID_W_FIFO_AFUL_TH_SHIFT 0 ++#define SQM_CFG_CPU2SQM_QPID_W_FIFO_AMTY_TH_MASK 0xff00 ++#define SQM_CFG_CPU2SQM_QPID_W_FIFO_AMTY_TH_SHIFT 8 ++ ++#define SQM_SQM_CONFIG_QPID_R_FIFO_REG_CFG_ADDR 0x45c0 ++#define SQM_CFG_CPU2SQM_QPID_R_FIFO_AFUL_TH_MASK 0xff ++#define SQM_CFG_CPU2SQM_QPID_R_FIFO_AFUL_TH_SHIFT 0 ++#define SQM_CFG_CPU2SQM_QPID_R_FIFO_AMTY_TH_MASK 0xff00 ++#define SQM_CFG_CPU2SQM_QPID_R_FIFO_AMTY_TH_SHIFT 8 ++ ++#define SQM_SQM_INT_STATE_REG_ADDR 0x45e0 ++#define SQM_SQM_FIFO_OVFL_ERR_MASK 0xf ++#define SQM_SQM_FIFO_OVFL_ERR_SHIFT 0 ++#define SQM_SQM_FIFO_UNFL_ERR_MASK 0xf0 ++#define SQM_SQM_FIFO_UNFL_ERR_SHIFT 4 ++#define SQM_SQM_FIFO_MTY_MASK 0xf00 ++#define SQM_SQM_FIFO_MTY_SHIFT 8 ++#define SQM_SQM_FIFO_AFUL_MASK 0xf000 ++#define SQM_SQM_FIFO_AFUL_SHIFT 12 ++#define SQM_SQM_SOP_EOP_NO_EQUAL_MASK 0x10000 ++#define SQM_SQM_SOP_EOP_NO_EQUAL_SHIFT 16 ++ ++#define SQM_SQM_FIFO_USED_CNT_REG_ADDR 0x4600 ++#define SQM_SQM_WQE_FIFO_USED_CNT_MASK 0x7f ++#define SQM_SQM_WQE_FIFO_USED_CNT_SHIFT 0 ++#define SQM_SQM_HEAD_FIFO_USED_CNT_MASK 0x3f80 ++#define SQM_SQM_HEAD_FIFO_USED_CNT_SHIFT 7 ++#define SQM_SQM_PTR_FIFO_USED_CNT_MASK 0x1fc000 ++#define SQM_SQM_PTR_FIFO_USED_CNT_SHIFT 14 ++#define SQM_SQM_DBG_FIFO_USED_CNT_MASK 0xfe00000 ++#define SQM_SQM_DBG_FIFO_USED_CNT_SHIFT 21 ++ ++#define SQM_SQM_DMA_REQUEST_LEN_REG_ADDR 0x4620 ++#define SQM_SQM_DMA_REQ_LEN_MASK 0x3ff ++#define SQM_SQM_DMA_REQ_LEN_SHIFT 0 ++ ++#define SQM_SQM_DMA_REQUEST_ADDR_REG_ADDR 0x4640 ++#define SQM_SQM_DMA_REQ_ADDR_MASK 0xffffffffffffffff ++#define SQM_SQM_DMA_REQ_ADDR_SHIFT 0 ++ ++#define SQM_SQM_STATUS_REG_NEXT_CID_ARRAY_ADDR 0x4660 ++#define SQM_SQM_STATUS_REG_NEXT_CID_ARRAY_SIZE 8 ++#define SQM_SQM_STATUS_REG_NEXT_CID_ARRAY_STRIDE 0x20 ++#define SQM_SQM_NEXT_CID_MASK 0xffff ++#define SQM_SQM_NEXT_CID_SHIFT 0 ++ ++#define SQM_SQM_WQE_OPCODE_ADDR 0x4760 ++#define SQM_SQM_WQE_OPCODE_MASK 0xff ++#define SQM_SQM_WQE_OPCODE_SHIFT 0 ++ ++#define SQM_SQM_WQE_WQEID_ADDR 0x4780 ++#define SQM_SQM_WQE_WQEID_MASK 0xffff ++#define SQM_SQM_WQE_WQEID_SHIFT 0 ++ ++#define SQM_SQM_WQE_R_VA_ADDR 0x47a0 ++#define SQM_SQM_WQE_R_VA_MASK 0xffffffffffffffff ++#define SQM_SQM_WQE_R_VA_SHIFT 0 ++ ++#define SQM_SQM_WQE_R_KEY_ADDR 0x47c0 ++#define SQM_SQM_WQE_R_KEY_MASK 0xffffffff ++#define SQM_SQM_WQE_R_KEY_SHIFT 0 ++ ++#define SQM_SQM_WQE_L_LEN_ADDR 0x47e0 ++#define SQM_SQM_WQE_L_LEN_MASK 0x7fffffff ++#define SQM_SQM_WQE_L_LEN_SHIFT 0 ++ ++#define SQM_SQM_WQE_L_VA_ADDR 0x4800 ++#define SQM_SQM_WQE_L_VA_MASK 0xffffffffffffffff ++#define SQM_SQM_WQE_L_VA_SHIFT 0 ++ ++#define SQM_SQM_WQE_L_KEY_ADDR 0x4820 ++#define SQM_SQM_WQE_L_KEY_MASK 0xffffffff ++#define SQM_SQM_WQE_L_KEY_SHIFT 0 ++ ++#define SQM_SQM_WQE_QPID_ADDR 0x4840 ++#define SQM_SQM_WQE_QPID_MASK 0x7 ++#define SQM_SQM_WQE_QPID_SHIFT 0 ++ ++#define SQM_SQM_DMA_IN_SOP_CNT_REG_ADDR 0x4860 ++#define SQM_SQM_DMA_IN_SOP_CNT_MASK 0xffffffffffffffff ++#define SQM_SQM_DMA_IN_SOP_CNT_SHIFT 0 ++ ++#define SQM_SQM_DMA_IN_EOP_CNT_REG_ADDR 0x4880 ++#define SQM_SQM_DMA_IN_EOP_CNT_MASK 0xffffffffffffffff ++#define SQM_SQM_DMA_IN_EOP_CNT_SHIFT 0 ++ ++#define SQM_SQM_DMA_IN_VLD_CNT_REG_ADDR 0x48a0 ++#define SQM_SQM_DMA_IN_VLD_CNT_MASK 0xffffffffffffffff ++#define SQM_SQM_DMA_IN_VLD_CNT_SHIFT 0 ++ ++#define SQM_SQM_DMA_REQ_CNT_REG_ADDR 0x48c0 ++#define SQM_SQM_DMA_REQ_CNT_MASK 0xffffffffffffffff ++#define SQM_SQM_DMA_REQ_CNT_SHIFT 0 ++ ++#define SQM_SQM_DMA_GNT_CNT_REG_ADDR 0x48e0 ++#define SQM_SQM_DMA_GNT_CNT_MASK 0xffffffffffffffff ++#define SQM_SQM_DMA_GNT_CNT_SHIFT 0 ++ ++#define SQM_SQM_MET_VLD_CNT_REG_ADDR 0x4900 ++#define SQM_SQM_MET_CNT_MASK 0xffffffffffffffff ++#define SQM_SQM_MET_CNT_SHIFT 0 ++ ++#define SQM_SQM_CONFIG_CAP_CFG_EN_ADDR 0x4920 ++#define SQM_CFG_CPU2SQM_CAP_EN_CLR_MASK 0x1 ++#define SQM_CFG_CPU2SQM_CAP_EN_CLR_SHIFT 0 ++#define SQM_CFG_CPU2SQM_CAP_QPID_EN_MASK 0x2 ++#define SQM_CFG_CPU2SQM_CAP_QPID_EN_SHIFT 1 ++#define SQM_CFG_CPU2SQM_CAP_OPCODE_EN_MASK 0x4 ++#define SQM_CFG_CPU2SQM_CAP_OPCODE_EN_SHIFT 2 ++#define SQM_CFG_CPU2SQM_CAP_QPID_MASK 0x38 ++#define SQM_CFG_CPU2SQM_CAP_QPID_SHIFT 3 ++#define SQM_CFG_CPU2SQM_CAP_OPCODE_MASK 0x3fc0 ++#define SQM_CFG_CPU2SQM_CAP_OPCODE_SHIFT 6 ++ ++#define SQM_SQM_DEBUG_INFO_STATE_REG_0_ADDR 0x4940 ++#define SQM_SQM2MET_DBG_OPCODE_MASK 0xff ++#define SQM_SQM2MET_DBG_OPCODE_SHIFT 0 ++#define SQM_SQM2MET_DBG_TX0_RX1_MASK 0x100 ++#define SQM_SQM2MET_DBG_TX0_RX1_SHIFT 8 ++#define SQM_SQM2MET_DBG_CAP_MASK 0x200 ++#define SQM_SQM2MET_DBG_CAP_SHIFT 9 ++#define SQM_SQM2MET_DBG_L_QPID_MASK 0x1c00 ++#define SQM_SQM2MET_DBG_L_QPID_SHIFT 10 ++#define SQM_SQM2MET_DBG_SN_MASK 0x1fffffe000 ++#define SQM_SQM2MET_DBG_SN_SHIFT 13 ++ ++#define SQM_SQM_DEBUG_INFO_STATE_REG_1_ADDR 0x4960 ++#define SQM_SQM2MET_DBG_MOD_IF_BM_MASK 0xffffffffffffffff ++#define SQM_SQM2MET_DBG_MOD_IF_BM_SHIFT 0 ++ ++#define SQM_SQM_DMA_REQ_COUNTER_REG_ADDR 0x4980 ++#define SQM_SQM_DMA_REQ_COUNTER_MASK 0xff ++#define SQM_SQM_DMA_REQ_COUNTER_SHIFT 0 ++ ++#define SQM_SQM_DMA_GNT_COUNTER_REG_ADDR 0x49a0 ++#define SQM_SQM_DMA_GNT_COUNTER_MASK 0xff ++#define SQM_SQM_DMA_GNT_COUNTER_SHIFT 0 ++ ++#define SQM_SQM_SQM2MET_COUNTER_REG_ADDR 0x49c0 ++#define SQM_SQM_SQM2MET_CNT_MASK 0xff ++#define SQM_SQM_SQM2MET_CNT_SHIFT 0 ++ ++#endif +diff --git a/providers/xscale/verbs.c b/providers/xscale/verbs.c +new file mode 100644 +index 0000000..937bed1 +--- /dev/null ++++ b/providers/xscale/verbs.c +@@ -0,0 +1,2816 @@ ++/* ++ * Copyright (c) 2021 - 2022, Shanghai Yunsilicon Technology Co., Ltd. ++ * All rights reserved. ++ */ ++ ++#include ++ ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++ ++#include ++#include ++#include ++#include ++#include ++ ++#include "xscale.h" ++#include "xsc-abi.h" ++#include "wqe.h" ++#include "xsc_hsi.h" ++ ++int xsc_single_threaded = 0; ++ ++static inline int is_xrc_tgt(int type) ++{ ++ return type == IBV_QPT_XRC_RECV; ++} ++ ++static void xsc_set_fw_version(struct ibv_device_attr *attr, union xsc_ib_fw_ver *fw_ver) ++{ ++ uint8_t ver_major = fw_ver->s.ver_major; ++ uint8_t ver_minor = fw_ver->s.ver_minor; ++ uint16_t ver_patch = fw_ver->s.ver_patch; ++ uint32_t ver_tweak = fw_ver->s.ver_tweak; ++ ++ if (ver_tweak == 0) { ++ snprintf(attr->fw_ver, sizeof(attr->fw_ver), "v%u.%u.%u", ++ ver_major, ver_minor, ver_patch); ++ } else { ++ snprintf(attr->fw_ver, sizeof(attr->fw_ver), "v%u.%u.%u+%u", ++ ver_major, ver_minor, ver_patch, ver_tweak); ++ } ++} ++ ++static int xsc_read_clock(struct ibv_context *context, uint64_t *cycles) ++{ ++ unsigned int clockhi, clocklo, clockhi1; ++ int i; ++ struct xsc_context *ctx = to_xctx(context); ++ ++ if (!ctx->hca_core_clock) ++ return EOPNOTSUPP; ++ ++ /* Handle wraparound */ ++ for (i = 0; i < 2; i++) { ++ clockhi = be32toh(mmio_read32_be(ctx->hca_core_clock)); ++ clocklo = be32toh(mmio_read32_be(ctx->hca_core_clock + 4)); ++ clockhi1 = be32toh(mmio_read32_be(ctx->hca_core_clock)); ++ if (clockhi == clockhi1) ++ break; ++ } ++ ++ *cycles = (uint64_t)clockhi << 32 | (uint64_t)clocklo; ++ ++ return 0; ++} ++ ++int xsc_query_rt_values(struct ibv_context *context, ++ struct ibv_values_ex *values) ++{ ++ uint32_t comp_mask = 0; ++ int err = 0; ++ ++ if (!check_comp_mask(values->comp_mask, IBV_VALUES_MASK_RAW_CLOCK)) ++ return EINVAL; ++ ++ if (values->comp_mask & IBV_VALUES_MASK_RAW_CLOCK) { ++ uint64_t cycles; ++ ++ err = xsc_read_clock(context, &cycles); ++ if (!err) { ++ values->raw_clock.tv_sec = 0; ++ values->raw_clock.tv_nsec = cycles; ++ comp_mask |= IBV_VALUES_MASK_RAW_CLOCK; ++ } ++ } ++ ++ values->comp_mask = comp_mask; ++ ++ return err; ++} ++ ++int xsc_query_port(struct ibv_context *context, uint8_t port, ++ struct ibv_port_attr *attr) ++{ ++ struct ibv_query_port cmd; ++ ++ return ibv_cmd_query_port(context, port, attr, &cmd, sizeof cmd); ++} ++ ++struct ibv_pd *xsc_alloc_pd(struct ibv_context *context) ++{ ++ struct ibv_alloc_pd cmd; ++ struct xsc_alloc_pd_resp resp; ++ struct xsc_pd *pd; ++ ++ pd = calloc(1, sizeof *pd); ++ if (!pd) ++ return NULL; ++ ++ if (ibv_cmd_alloc_pd(context, &pd->ibv_pd, &cmd, sizeof cmd, ++ &resp.ibv_resp, sizeof resp)) { ++ free(pd); ++ return NULL; ++ } ++ ++ atomic_init(&pd->refcount, 1); ++ pd->pdn = resp.pdn; ++ xsc_dbg(to_xctx(context)->dbg_fp, XSC_DBG_PD, "pd number:%u\n", pd->pdn); ++ ++ return &pd->ibv_pd; ++} ++ ++struct ibv_pd * ++xsc_alloc_parent_domain(struct ibv_context *context, ++ struct ibv_parent_domain_init_attr *attr) ++{ ++ struct xsc_parent_domain *xparent_domain; ++ ++ if (ibv_check_alloc_parent_domain(attr)) ++ return NULL; ++ ++ if (attr->comp_mask) { ++ errno = EINVAL; ++ return NULL; ++ } ++ ++ xparent_domain = calloc(1, sizeof(*xparent_domain)); ++ if (!xparent_domain) { ++ errno = ENOMEM; ++ return NULL; ++ } ++ ++ xparent_domain->xpd.xprotection_domain = to_xpd(attr->pd); ++ atomic_fetch_add(&xparent_domain->xpd.xprotection_domain->refcount, 1); ++ atomic_init(&xparent_domain->xpd.refcount, 1); ++ ++ ibv_initialize_parent_domain( ++ &xparent_domain->xpd.ibv_pd, ++ &xparent_domain->xpd.xprotection_domain->ibv_pd); ++ ++ return &xparent_domain->xpd.ibv_pd; ++} ++ ++static int xsc_dealloc_parent_domain(struct xsc_parent_domain *xparent_domain) ++{ ++ if (atomic_load(&xparent_domain->xpd.refcount) > 1) ++ return EBUSY; ++ ++ atomic_fetch_sub(&xparent_domain->xpd.xprotection_domain->refcount, 1); ++ ++ free(xparent_domain); ++ return 0; ++} ++ ++int xsc_free_pd(struct ibv_pd *pd) ++{ ++ int ret; ++ struct xsc_parent_domain *xparent_domain = to_xparent_domain(pd); ++ struct xsc_pd *xpd = to_xpd(pd); ++ ++ if (xparent_domain) ++ return xsc_dealloc_parent_domain(xparent_domain); ++ ++ if (atomic_load(&xpd->refcount) > 1) ++ return EBUSY; ++ ++ ret = ibv_cmd_dealloc_pd(pd); ++ if (ret) ++ return ret; ++ ++ xsc_dbg(to_xctx(pd->context)->dbg_fp, XSC_DBG_PD, "dealloc pd\n"); ++ free(xpd); ++ ++ return 0; ++} ++ ++struct ibv_mr *xsc_reg_mr(struct ibv_pd *pd, void *addr, size_t length, ++ uint64_t hca_va, int acc) ++{ ++ struct xsc_mr *mr; ++ struct ibv_reg_mr cmd; ++ int ret; ++ enum ibv_access_flags access = (enum ibv_access_flags)acc; ++ struct ib_uverbs_reg_mr_resp resp; ++ ++ mr = calloc(1, sizeof(*mr)); ++ if (!mr) ++ return NULL; ++ ++ ret = ibv_cmd_reg_mr(pd, addr, length, hca_va, access, ++ &mr->vmr, &cmd, sizeof(cmd), &resp, ++ sizeof resp); ++ if (ret) { ++ xsc_free_buf(&(mr->buf)); ++ free(mr); ++ return NULL; ++ } ++ mr->alloc_flags = acc; ++ ++ xsc_dbg(to_xctx(pd->context)->dbg_fp, XSC_DBG_MR, "lkey:%u, rkey:%u\n", ++ mr->vmr.ibv_mr.lkey, mr->vmr.ibv_mr.rkey); ++ ++ return &mr->vmr.ibv_mr; ++} ++ ++struct ibv_mr *xsc_alloc_null_mr(struct ibv_pd *pd) ++{ ++ struct xsc_mr *mr; ++ struct xsc_context *ctx = to_xctx(pd->context); ++ ++ if (ctx->dump_fill_mkey == XSC_INVALID_LKEY) { ++ errno = ENOTSUP; ++ return NULL; ++ } ++ ++ mr = calloc(1, sizeof(*mr)); ++ if (!mr) { ++ errno = ENOMEM; ++ return NULL; ++ } ++ ++ mr->vmr.ibv_mr.lkey = ctx->dump_fill_mkey; ++ ++ mr->vmr.ibv_mr.context = pd->context; ++ mr->vmr.ibv_mr.pd = pd; ++ mr->vmr.ibv_mr.addr = NULL; ++ mr->vmr.ibv_mr.length = SIZE_MAX; ++ mr->vmr.mr_type = IBV_MR_TYPE_NULL_MR; ++ ++ return &mr->vmr.ibv_mr; ++} ++ ++enum { ++ XSC_DM_ALLOWED_ACCESS = IBV_ACCESS_LOCAL_WRITE | ++ IBV_ACCESS_REMOTE_WRITE | ++ IBV_ACCESS_REMOTE_READ | ++ IBV_ACCESS_REMOTE_ATOMIC | ++ IBV_ACCESS_ZERO_BASED ++}; ++ ++struct ibv_mr *xsc_reg_dm_mr(struct ibv_pd *pd, struct ibv_dm *ibdm, ++ uint64_t dm_offset, size_t length, ++ unsigned int acc) ++{ ++ struct xsc_dm *dm = to_xdm(ibdm); ++ struct xsc_mr *mr; ++ int ret; ++ ++ if (acc & ~XSC_DM_ALLOWED_ACCESS) { ++ errno = EINVAL; ++ return NULL; ++ } ++ ++ mr = calloc(1, sizeof(*mr)); ++ if (!mr) { ++ errno = ENOMEM; ++ return NULL; ++ } ++ ++ ret = ibv_cmd_reg_dm_mr(pd, &dm->verbs_dm, dm_offset, length, acc, ++ &mr->vmr, NULL); ++ if (ret) { ++ free(mr); ++ return NULL; ++ } ++ ++ mr->alloc_flags = acc; ++ ++ return &mr->vmr.ibv_mr; ++} ++ ++int xsc_rereg_mr(struct verbs_mr *vmr, int flags, struct ibv_pd *pd, ++ void *addr, size_t length, int access) ++{ ++ struct ibv_rereg_mr cmd; ++ struct ib_uverbs_rereg_mr_resp resp; ++ ++ return ibv_cmd_rereg_mr(vmr, flags, addr, length, (uintptr_t)addr, ++ access, pd, &cmd, sizeof(cmd), &resp, ++ sizeof(resp)); ++} ++ ++int xsc_dereg_mr(struct verbs_mr *vmr) ++{ ++ int ret; ++ ++ if (vmr->mr_type == IBV_MR_TYPE_NULL_MR) ++ goto free; ++ ++ ret = ibv_cmd_dereg_mr(vmr); ++ if (ret) ++ return ret; ++ ++free: ++ free(vmr); ++ return 0; ++} ++ ++int xsc_round_up_power_of_two(long long sz) ++{ ++ long long ret; ++ ++ for (ret = 1; ret < sz; ret <<= 1) ++ ; /* nothing */ ++ ++ if (ret > INT_MAX) { ++ fprintf(stderr, "%s: roundup overflow\n", __func__); ++ return -ENOMEM; ++ } ++ ++ return (int)ret; ++} ++ ++static int align_queue_size(long long req) ++{ ++ return xsc_round_up_power_of_two(req); ++} ++ ++enum { ++ CREATE_CQ_SUPPORTED_WC_FLAGS = IBV_WC_STANDARD_FLAGS | ++ IBV_WC_EX_WITH_COMPLETION_TIMESTAMP | ++ IBV_WC_EX_WITH_CVLAN | ++ IBV_WC_EX_WITH_FLOW_TAG | ++ IBV_WC_EX_WITH_TM_INFO | ++ IBV_WC_EX_WITH_COMPLETION_TIMESTAMP_WALLCLOCK ++}; ++ ++enum { ++ CREATE_CQ_SUPPORTED_COMP_MASK = IBV_CQ_INIT_ATTR_MASK_FLAGS ++}; ++ ++enum { ++ CREATE_CQ_SUPPORTED_FLAGS = ++ IBV_CREATE_CQ_ATTR_SINGLE_THREADED | ++ IBV_CREATE_CQ_ATTR_IGNORE_OVERRUN ++}; ++ ++enum { ++ XSC_DV_CREATE_CQ_SUP_COMP_MASK = ++ (XSCDV_CQ_INIT_ATTR_MASK_COMPRESSED_CQE | ++ XSCDV_CQ_INIT_ATTR_MASK_FLAGS | ++ XSCDV_CQ_INIT_ATTR_MASK_CQE_SIZE), ++}; ++ ++static int xsc_cqe_depth_check(void) ++{ ++ char *e; ++ ++ e = getenv("XSC_CQE_DEPTH_CHECK"); ++ if (e && !strcmp(e, "n")) ++ return 0; ++ ++ return 1; ++} ++ ++static struct ibv_cq_ex *create_cq(struct ibv_context *context, ++ const struct ibv_cq_init_attr_ex *cq_attr, ++ int cq_alloc_flags, ++ struct xscdv_cq_init_attr *xcq_attr) ++{ ++ struct xsc_create_cq cmd = {}; ++ struct xsc_create_cq_resp resp = {}; ++ struct xsc_create_cq_ex cmd_ex = {}; ++ struct xsc_create_cq_ex_resp resp_ex = {}; ++ struct xsc_ib_create_cq *cmd_drv; ++ struct xsc_ib_create_cq_resp *resp_drv; ++ struct xsc_cq *cq; ++ int cqe_sz; ++ int ret; ++ int ncqe; ++ struct xsc_context *xctx = to_xctx(context); ++ bool use_ex = false; ++ char *env; ++ int i; ++ ++ if (!cq_attr->cqe) { ++ xsc_err("CQE invalid\n"); ++ errno = EINVAL; ++ return NULL; ++ } ++ ++ xsc_dbg(xctx->dbg_fp, XSC_DBG_CQ, "CQE number:%u\n", cq_attr->cqe); ++ ++ if (cq_attr->comp_mask & ~CREATE_CQ_SUPPORTED_COMP_MASK) { ++ xsc_err("Unsupported comp_mask for create cq\n"); ++ errno = EINVAL; ++ return NULL; ++ } ++ ++ if (cq_attr->comp_mask & IBV_CQ_INIT_ATTR_MASK_FLAGS && ++ cq_attr->flags & ~CREATE_CQ_SUPPORTED_FLAGS) { ++ xsc_err("Unsupported creation flags requested for create cq\n"); ++ errno = EINVAL; ++ return NULL; ++ } ++ ++ if (cq_attr->wc_flags & ~CREATE_CQ_SUPPORTED_WC_FLAGS) { ++ xsc_err("unsupported flgas:0x%lx\n", cq_attr->wc_flags); ++ errno = ENOTSUP; ++ return NULL; ++ } ++ ++ cq = calloc(1, sizeof *cq); ++ if (!cq) { ++ xsc_err("Alloc CQ failed\n"); ++ errno = ENOMEM; ++ return NULL; ++ } ++ ++ if (cq_attr->comp_mask & IBV_CQ_INIT_ATTR_MASK_FLAGS) { ++ if (cq_attr->flags & IBV_CREATE_CQ_ATTR_SINGLE_THREADED) ++ cq->flags |= XSC_CQ_FLAGS_SINGLE_THREADED; ++ if (cq_attr->flags & IBV_CREATE_CQ_ATTR_IGNORE_OVERRUN) ++ use_ex = true; ++ } ++ ++ xsc_dbg(xctx->dbg_fp, XSC_DBG_CQ, "use_ex:%u\n", use_ex); ++ ++ cmd_drv = use_ex ? &cmd_ex.drv_payload : &cmd.drv_payload; ++ resp_drv = use_ex ? &resp_ex.drv_payload : &resp.drv_payload; ++ ++ cq->cons_index = 0; ++ ++ if (xsc_spinlock_init(&cq->lock, !xsc_single_threaded)) ++ goto err; ++ ++ ncqe = align_queue_size(cq_attr->cqe); ++ if (ncqe < XSC_CQE_RING_DEPTH_MIN) { ++ xsc_dbg(xctx->dbg_fp, XSC_DBG_CQ, "CQE ring size %u is not enough, set it as %u\n", ++ ncqe, XSC_CQE_RING_DEPTH_MIN); ++ ncqe = XSC_CQE_RING_DEPTH_MIN; ++ } ++ ++ if (ncqe > XSC_CQE_RING_DEPTH_MAX) { ++ if (xsc_cqe_depth_check()) { ++ xsc_err("CQE ring size %u exceeds CQE ring depth %u, abort!\n", ++ ncqe, XSC_CQE_RING_DEPTH_MAX); ++ errno = EINVAL; ++ goto err_spl; ++ } else { ++ xsc_dbg(xctx->dbg_fp, XSC_DBG_CQ, "CQE ring size %u exceeds the MAX ring szie, set it as %u\n", ++ ncqe, XSC_CQE_RING_DEPTH_MAX); ++ ncqe = XSC_CQE_RING_DEPTH_MAX; ++ } ++ } ++ ++ cqe_sz = XSC_CQE_SIZE; ++ xsc_dbg(xctx->dbg_fp, XSC_DBG_CQ, "CQE number:%u, size:%u\n", ncqe, cqe_sz); ++ ++ if (xsc_alloc_cq_buf(to_xctx(context), cq, &cq->buf_a, ncqe, cqe_sz)) { ++ xsc_err("Alloc cq buffer failed.\n"); ++ errno = ENOMEM; ++ goto err_spl; ++ } ++ ++ cq->arm_sn = 0; ++ cq->cqe_sz = cqe_sz; ++ cq->flags = cq_alloc_flags; ++ ++ cmd_drv->buf_addr = (uintptr_t) cq->buf_a.buf; ++ cmd_drv->db_addr = (uintptr_t) cq->dbrec; ++ cmd_drv->cqe_size = cqe_sz; ++ ++ xsc_dbg(xctx->dbg_fp, XSC_DBG_CQ, "buf_addr:%p\n", cq->buf_a.buf); ++ ++ if (use_ex) { ++ struct ibv_cq_init_attr_ex cq_attr_ex = *cq_attr; ++ ++ cq_attr_ex.cqe = ncqe; ++ ret = ibv_cmd_create_cq_ex(context, &cq_attr_ex, &cq->verbs_cq, ++ &cmd_ex.ibv_cmd, sizeof(cmd_ex), ++ &resp_ex.ibv_resp, sizeof(resp_ex), ++ 0); ++ } else { ++ ret = ibv_cmd_create_cq(context, ncqe, cq_attr->channel, ++ cq_attr->comp_vector, ++ ibv_cq_ex_to_cq(&cq->verbs_cq.cq_ex), ++ &cmd.ibv_cmd, sizeof(cmd), ++ &resp.ibv_resp, sizeof(resp)); ++ } ++ ++ if (ret) { ++ xsc_err("ibv_cmd_create_cq failed,ret %d\n", ret); ++ goto err_buf; ++ } ++ ++ cq->active_buf = &cq->buf_a; ++ cq->resize_buf = NULL; ++ cq->cqn = resp_drv->cqn; ++ cq->stall_enable = to_xctx(context)->stall_enable; ++ cq->stall_adaptive_enable = to_xctx(context)->stall_adaptive_enable; ++ cq->stall_cycles = to_xctx(context)->stall_cycles; ++ ++ cq->db = xctx->cqm_reg_va + ++ (xctx->cqm_next_cid_reg & (xctx->page_size - 1)); ++ cq->armdb =xctx->cqm_armdb_va + ++ (xctx->cqm_armdb & (xctx->page_size - 1)); ++ cq->cqe_cnt = ncqe; ++ cq->log2_cq_ring_sz = xsc_ilog2(ncqe); ++ ++ for (i = 0; i < ncqe; i++) { ++ struct xsc_cqe *cqe = (struct xsc_cqe *)(cq->active_buf->buf + i * cq->cqe_sz); ++ cqe->owner = 1; ++ } ++ ++ env = getenv("XSC_DISABLE_FLUSH_ERROR"); ++ cq->disable_flush_error_cqe = env ? true : false; ++ xsc_dbg(xctx->dbg_fp, XSC_DBG_CQ, "cqe count:%u cqn:%u\n", cq->cqe_cnt, cq->cqn); ++ list_head_init(&cq->err_state_qp_list); ++ return &cq->verbs_cq.cq_ex; ++ ++ ++err_buf: ++ xsc_free_cq_buf(to_xctx(context), &cq->buf_a); ++ ++err_spl: ++ xsc_spinlock_destroy(&cq->lock); ++ ++err: ++ free(cq); ++ ++ return NULL; ++} ++ ++struct ibv_cq *xsc_create_cq(struct ibv_context *context, int cqe, ++ struct ibv_comp_channel *channel, ++ int comp_vector) ++{ ++ struct ibv_cq_ex *cq; ++ struct ibv_cq_init_attr_ex cq_attr = {.cqe = cqe, .channel = channel, ++ .comp_vector = comp_vector, ++ .wc_flags = IBV_WC_STANDARD_FLAGS}; ++ ++ if (cqe <= 0) { ++ errno = EINVAL; ++ return NULL; ++ } ++ ++ cq = create_cq(context, &cq_attr, 0, NULL); ++ return cq ? ibv_cq_ex_to_cq(cq) : NULL; ++} ++ ++struct ibv_cq_ex *xsc_create_cq_ex(struct ibv_context *context, ++ struct ibv_cq_init_attr_ex *cq_attr) ++{ ++ return create_cq(context, cq_attr, XSC_CQ_FLAGS_EXTENDED, NULL); ++} ++ ++struct ibv_cq_ex *xscdv_create_cq(struct ibv_context *context, ++ struct ibv_cq_init_attr_ex *cq_attr, ++ struct xscdv_cq_init_attr *xcq_attr) ++{ ++ struct ibv_cq_ex *cq; ++ ++ cq = create_cq(context, cq_attr, XSC_CQ_FLAGS_EXTENDED, xcq_attr); ++ if (!cq) ++ return NULL; ++ ++ verbs_init_cq(ibv_cq_ex_to_cq(cq), context, ++ cq_attr->channel, cq_attr->cq_context); ++ return cq; ++} ++ ++int xsc_resize_cq(struct ibv_cq *ibcq, int cqe) ++{ ++ struct xsc_cq *cq = to_xcq(ibcq); ++ ++ if (cqe < 0) { ++ errno = EINVAL; ++ return errno; ++ } ++ ++ xsc_spin_lock(&cq->lock); ++ cq->active_cqes = cq->verbs_cq.cq_ex.cqe; ++ /* currently we don't change cqe size */ ++ cq->resize_cqe_sz = cq->cqe_sz; ++ cq->resize_cqes = cq->verbs_cq.cq_ex.cqe; ++ xsc_spin_unlock(&cq->lock); ++ cq->resize_buf = NULL; ++ return 0; ++} ++ ++int xsc_destroy_cq(struct ibv_cq *cq) ++{ ++ int ret; ++ struct xsc_err_state_qp_node *tmp, *err_qp_node; ++ ++ xsc_dbg(to_xctx(cq->context)->dbg_fp, XSC_DBG_CQ, "\n"); ++ ret = ibv_cmd_destroy_cq(cq); ++ if (ret) ++ return ret; ++ ++ list_for_each_safe(&to_xcq(cq)->err_state_qp_list, err_qp_node, tmp, entry) { ++ list_del(&err_qp_node->entry); ++ free(err_qp_node); ++ } ++ ++ xsc_free_cq_buf(to_xctx(cq->context), to_xcq(cq)->active_buf); ++ free(to_xcq(cq)); ++ ++ return 0; ++} ++ ++static int xsc_calc_sq_size(struct xsc_context *ctx, ++ struct ibv_qp_init_attr_ex *attr, ++ struct xsc_qp *qp) ++{ ++ int wqe_size; ++ int wq_size; ++ int wq_size_min = 0; ++ ++ if (!attr->cap.max_send_wr) ++ return 0; ++ ++ wqe_size = 1 << (XSC_BASE_WQE_SHIFT + ctx->send_ds_shift); ++ ++ wq_size = xsc_round_up_power_of_two(attr->cap.max_send_wr); ++ ++ if (attr->qp_type != IBV_QPT_RAW_PACKET) ++ wq_size_min = XSC_SEND_WQE_RING_DEPTH_MIN; ++ if (wq_size < wq_size_min) { ++ xsc_dbg(ctx->dbg_fp, XSC_DBG_QP, "WQE size %u is not enough, set it as %u\n", ++ wq_size, wq_size_min); ++ wq_size = wq_size_min; ++ } ++ ++ if (wq_size > XSC_SEND_WQE_RING_DEPTH_MAX) { ++ xsc_dbg(ctx->dbg_fp, XSC_DBG_QP, ++ "WQE size %u exceeds WQE ring depth, set it as %u\n", ++ wq_size, XSC_SEND_WQE_RING_DEPTH_MAX); ++ wq_size = XSC_SEND_WQE_RING_DEPTH_MAX; ++ } ++ ++ qp->max_inline_data = attr->cap.max_inline_data; ++ qp->sq.wqe_cnt = wq_size; ++ qp->sq.ds_cnt = wq_size << ctx->send_ds_shift; ++ qp->sq.seg_cnt = 1 << ctx->send_ds_shift; ++ qp->sq.wqe_shift = XSC_BASE_WQE_SHIFT + ctx->send_ds_shift; ++ qp->sq.max_gs = attr->cap.max_send_sge; ++ qp->sq.max_post = qp->sq.wqe_cnt; ++ if (attr->cap.max_inline_data > ++ (qp->sq.seg_cnt - 2) * sizeof(struct xsc_wqe_data_seg)) ++ return -EINVAL; ++ ++ xsc_dbg(ctx->dbg_fp, XSC_DBG_QP, "Send WQE count:%u, max post:%u wqe shift:%u\n", ++ qp->sq.wqe_cnt, qp->sq.max_post, qp->sq.wqe_shift); ++ ++ return wqe_size * qp->sq.wqe_cnt; ++} ++ ++enum { ++ DV_CREATE_WQ_SUPPORTED_COMP_MASK = XSCDV_WQ_INIT_ATTR_MASK_STRIDING_RQ ++}; ++ ++static int xsc_calc_rwq_size(struct xsc_context *ctx, ++ struct xsc_rwq *rwq, ++ struct ibv_wq_init_attr *attr, ++ struct xscdv_wq_init_attr *xwq_attr) ++{ ++ size_t wqe_size; ++ int wq_size; ++ uint32_t num_scatter; ++ int is_mprq = 0; ++ int scat_spc; ++ ++ if (!attr->max_wr) ++ return -EINVAL; ++ if (xwq_attr) { ++ if (!check_comp_mask(xwq_attr->comp_mask, ++ DV_CREATE_WQ_SUPPORTED_COMP_MASK)) ++ return -EINVAL; ++ ++ is_mprq = !!(xwq_attr->comp_mask & ++ XSCDV_WQ_INIT_ATTR_MASK_STRIDING_RQ); ++ } ++ ++ /* TBD: check caps for RQ */ ++ num_scatter = max_t(uint32_t, attr->max_sge, 1); ++ wqe_size = sizeof(struct xsc_wqe_data_seg) * num_scatter + ++ sizeof(struct xsc_wqe_srq_next_seg) * is_mprq; ++ ++ if (rwq->wq_sig) ++ wqe_size += sizeof(struct xsc_rwqe_sig); ++ ++ if (wqe_size <= 0 || wqe_size > ctx->max_rq_desc_sz) ++ return -EINVAL; ++ ++ wqe_size = xsc_round_up_power_of_two(wqe_size); ++ wq_size = xsc_round_up_power_of_two(attr->max_wr) * wqe_size; ++ wq_size = max(wq_size, XSC_SEND_WQE_BB); ++ rwq->rq.wqe_cnt = wq_size / wqe_size; ++ rwq->rq.wqe_shift = xsc_ilog2(wqe_size); ++ rwq->rq.max_post = 1 << xsc_ilog2(wq_size / wqe_size); ++ scat_spc = wqe_size - ++ ((rwq->wq_sig) ? sizeof(struct xsc_rwqe_sig) : 0) - ++ is_mprq * sizeof(struct xsc_wqe_srq_next_seg); ++ rwq->rq.max_gs = scat_spc / sizeof(struct xsc_wqe_data_seg); ++ return wq_size; ++} ++ ++static int xsc_calc_rq_size(struct xsc_context *ctx, ++ struct ibv_qp_init_attr_ex *attr, ++ struct xsc_qp *qp) ++{ ++ int wqe_size; ++ int wq_size; ++ int wq_size_min = 0; ++ ++ if (!attr->cap.max_recv_wr) ++ return 0; ++ ++ wqe_size = 1 << (XSC_BASE_WQE_SHIFT + ctx->recv_ds_shift); ++ ++ wq_size = xsc_round_up_power_of_two(attr->cap.max_recv_wr); ++ /* due to hardware limit, rdma rq depth should be one send wqe ds num at least*/ ++ if (attr->qp_type != IBV_QPT_RAW_PACKET) ++ wq_size_min = ctx->send_ds_num; ++ if (wq_size < wq_size_min) { ++ xsc_dbg(ctx->dbg_fp, XSC_DBG_QP, "WQE size %u is not enough, set it as %u\n", ++ wq_size, wq_size_min); ++ wq_size = wq_size_min; ++ } ++ ++ if (wq_size > XSC_RECV_WQE_RING_DEPTH_MAX) { ++ xsc_dbg(ctx->dbg_fp, XSC_DBG_QP, ++ "WQE size %u exceeds WQE ring depth, set it as %u\n", ++ wq_size, XSC_RECV_WQE_RING_DEPTH_MAX); ++ wq_size = XSC_RECV_WQE_RING_DEPTH_MAX; ++ } ++ ++ qp->rq.wqe_cnt = wq_size; ++ qp->rq.ds_cnt = qp->rq.wqe_cnt << ctx->recv_ds_shift; ++ qp->rq.seg_cnt = 1 << ctx->recv_ds_shift; ++ qp->rq.wqe_shift = XSC_BASE_WQE_SHIFT + ctx->recv_ds_shift; ++ qp->rq.max_post = qp->rq.wqe_cnt; ++ qp->rq.max_gs = attr->cap.max_recv_sge; ++ ++ xsc_dbg(ctx->dbg_fp, XSC_DBG_QP, "Recv WQE count:%u, max post:%u wqe shift:%u\n", ++ qp->rq.wqe_cnt, qp->rq.max_post, qp->rq.wqe_shift); ++ return wqe_size * qp->rq.wqe_cnt; ++} ++ ++static int xsc_calc_wq_size(struct xsc_context *ctx, ++ struct ibv_qp_init_attr_ex *attr, ++ struct xsc_qp *qp) ++{ ++ int ret; ++ int result; ++ ++ ret = xsc_calc_sq_size(ctx, attr, qp); ++ if (ret < 0) ++ return ret; ++ ++ result = ret; ++ ++ ret = xsc_calc_rq_size(ctx, attr, qp); ++ if (ret < 0) ++ return ret; ++ ++ result += ret; ++ ++ qp->sq.offset = ret; ++ qp->rq.offset = 0; ++ ++ return result; ++} ++ ++static const char *qptype2key(enum ibv_qp_type type) ++{ ++ switch (type) { ++ case IBV_QPT_RC: return "HUGE_RC"; ++ case IBV_QPT_UC: return "HUGE_UC"; ++ case IBV_QPT_UD: return "HUGE_UD"; ++ case IBV_QPT_RAW_PACKET: return "HUGE_RAW_ETH"; ++ default: return "HUGE_NA"; ++ } ++} ++ ++static int xsc_alloc_qp_buf(struct ibv_context *context, ++ struct ibv_qp_init_attr_ex *attr, ++ struct xsc_qp *qp, ++ int size) ++{ ++ int err; ++ enum xsc_alloc_type alloc_type; ++ enum xsc_alloc_type default_alloc_type = XSC_ALLOC_TYPE_ANON; ++ const char *qp_huge_key; ++ ++ if (qp->sq.wqe_cnt) { ++ qp->sq.wrid = malloc(qp->sq.wqe_cnt * sizeof(*qp->sq.wrid)); ++ if (!qp->sq.wrid) { ++ errno = ENOMEM; ++ err = -1; ++ return err; ++ } ++ ++ qp->sq.wr_data = malloc(qp->sq.wqe_cnt * sizeof(*qp->sq.wr_data)); ++ if (!qp->sq.wr_data) { ++ errno = ENOMEM; ++ err = -1; ++ goto ex_wrid; ++ } ++ ++ qp->sq.wqe_head = malloc(qp->sq.wqe_cnt * sizeof(*qp->sq.wqe_head)); ++ if (!qp->sq.wqe_head) { ++ errno = ENOMEM; ++ err = -1; ++ goto ex_wrid; ++ } ++ ++ qp->sq.need_flush = malloc(qp->sq.wqe_cnt * sizeof(*qp->sq.need_flush)); ++ if (!qp->sq.need_flush) { ++ errno = ENOMEM; ++ err = -1; ++ goto ex_wrid; ++ } ++ memset(qp->sq.need_flush, 0, qp->sq.wqe_cnt); ++ ++ qp->sq.wr_opcode = malloc(qp->sq.wqe_cnt * sizeof(*qp->sq.wr_opcode)); ++ if (!qp->sq.wr_opcode) { ++ errno = ENOMEM; ++ err = -1; ++ goto ex_wrid; ++ } ++ } ++ ++ if (qp->rq.wqe_cnt) { ++ qp->rq.wrid = malloc(qp->rq.wqe_cnt * sizeof(uint64_t)); ++ if (!qp->rq.wrid) { ++ errno = ENOMEM; ++ err = -1; ++ goto ex_wrid; ++ } ++ } ++ ++ /* compatibility support */ ++ qp_huge_key = qptype2key(qp->ibv_qp->qp_type); ++ if (xsc_use_huge(qp_huge_key)) ++ default_alloc_type = XSC_ALLOC_TYPE_HUGE; ++ ++ xsc_get_alloc_type(to_xctx(context), XSC_QP_PREFIX, &alloc_type, ++ default_alloc_type); ++ ++ err = xsc_alloc_prefered_buf(to_xctx(context), &qp->buf, ++ align(qp->buf_size, to_xdev ++ (context->device)->page_size), ++ to_xdev(context->device)->page_size, ++ alloc_type, ++ XSC_QP_PREFIX); ++ ++ if (err) { ++ err = -ENOMEM; ++ goto ex_wrid; ++ } ++ ++ memset(qp->buf.buf, 0, qp->buf_size); ++ ++ if (attr->qp_type == IBV_QPT_RAW_PACKET || ++ qp->flags & XSC_QP_FLAGS_USE_UNDERLAY) { ++ size_t aligned_sq_buf_size = align(qp->sq_buf_size, ++ to_xdev(context->device)->page_size); ++ /* For Raw Packet QP, allocate a separate buffer for the SQ */ ++ err = xsc_alloc_prefered_buf(to_xctx(context), &qp->sq_buf, ++ aligned_sq_buf_size, ++ to_xdev(context->device)->page_size, ++ alloc_type, ++ XSC_QP_PREFIX); ++ if (err) { ++ err = -ENOMEM; ++ goto rq_buf; ++ } ++ ++ memset(qp->sq_buf.buf, 0, aligned_sq_buf_size); ++ } ++ ++ return 0; ++rq_buf: ++ xsc_free_actual_buf(to_xctx(context), &qp->buf); ++ex_wrid: ++ if (qp->rq.wrid) ++ free(qp->rq.wrid); ++ ++ if (qp->sq.wqe_head) ++ free(qp->sq.wqe_head); ++ ++ if (qp->sq.wr_data) ++ free(qp->sq.wr_data); ++ if (qp->sq.wrid) ++ free(qp->sq.wrid); ++ ++ if (qp->sq.need_flush) ++ free(qp->sq.need_flush); ++ ++ if (qp->sq.wr_opcode) ++ free(qp->sq.wr_opcode); ++ ++ return err; ++} ++ ++static void xsc_free_qp_buf(struct xsc_context *ctx, struct xsc_qp *qp) ++{ ++ xsc_free_actual_buf(ctx, &qp->buf); ++ ++ if (qp->sq_buf.buf) ++ xsc_free_actual_buf(ctx, &qp->sq_buf); ++ ++ if (qp->rq.wrid) ++ free(qp->rq.wrid); ++ ++ if (qp->sq.wqe_head) ++ free(qp->sq.wqe_head); ++ ++ if (qp->sq.wrid) ++ free(qp->sq.wrid); ++ ++ if (qp->sq.wr_data) ++ free(qp->sq.wr_data); ++ ++ if (qp->sq.need_flush) ++ free(qp->sq.need_flush); ++ ++ if (qp->sq.wr_opcode) ++ free(qp->sq.wr_opcode); ++} ++ ++enum { ++ XSC_CREATE_QP_SUP_COMP_MASK = IBV_QP_INIT_ATTR_PD | ++ IBV_QP_INIT_ATTR_CREATE_FLAGS ++}; ++ ++enum { ++ XSC_DV_CREATE_QP_SUP_COMP_MASK = XSCDV_QP_INIT_ATTR_MASK_QP_CREATE_FLAGS | ++ XSCDV_QP_INIT_ATTR_MASK_DC ++}; ++ ++enum { ++ XSC_CREATE_QP_EX2_COMP_MASK = (IBV_QP_INIT_ATTR_CREATE_FLAGS | ++ IBV_QP_INIT_ATTR_MAX_TSO_HEADER | ++ IBV_QP_INIT_ATTR_IND_TABLE | ++ IBV_QP_INIT_ATTR_RX_HASH), ++}; ++ ++enum { ++ XSCDV_QP_CREATE_SUP_FLAGS = ++ (XSCDV_QP_CREATE_TUNNEL_OFFLOADS | ++ XSCDV_QP_CREATE_TIR_ALLOW_SELF_LOOPBACK_UC | ++ XSCDV_QP_CREATE_TIR_ALLOW_SELF_LOOPBACK_MC | ++ XSCDV_QP_CREATE_DISABLE_SCATTER_TO_CQE | ++ XSCDV_QP_CREATE_ALLOW_SCATTER_TO_CQE), ++}; ++ ++static struct ibv_qp *create_qp(struct ibv_context *context, ++ struct ibv_qp_init_attr_ex *attr, ++ struct xscdv_qp_init_attr *xqp_attr) ++{ ++ struct xsc_create_qp cmd; ++ struct xsc_create_qp_resp resp; ++ struct xsc_create_qp_ex_resp resp_ex; ++ struct xsc_qp *qp; ++ int ret; ++ struct xsc_context *ctx = to_xctx(context); ++ struct ibv_qp *ibqp; ++ struct xsc_parent_domain *xparent_domain; ++ struct xsc_device *xdev = to_xdev(context->device); ++ ++ xsc_dbg(ctx->dbg_fp, XSC_DBG_QP, "comp_mask=0x%x.\n", attr->comp_mask); ++ ++ if (attr->comp_mask & ~XSC_CREATE_QP_SUP_COMP_MASK) { ++ xsc_err("Not supported comp_mask:0x%x\n", attr->comp_mask); ++ return NULL; ++ } ++ ++ qp = calloc(1, sizeof(*qp)); ++ if (!qp) { ++ xsc_err("QP calloc failed\n"); ++ return NULL; ++ } ++ ++ ibqp = (struct ibv_qp *)&qp->verbs_qp; ++ qp->ibv_qp = ibqp; ++ ++ memset(&cmd, 0, sizeof(cmd)); ++ memset(&resp, 0, sizeof(resp)); ++ memset(&resp_ex, 0, sizeof(resp_ex)); ++ ++ ret = xsc_calc_wq_size(ctx, attr, qp); ++ if (ret < 0) { ++ xsc_err("Calculate WQ size failed\n"); ++ errno = EINVAL; ++ goto err; ++ } ++ ++ qp->buf_size = ret; ++ qp->sq_buf_size = 0; ++ ++ if (xsc_alloc_qp_buf(context, attr, qp, ret)) { ++ xsc_err("Alloc QP buffer failed\n"); ++ errno = ENOMEM; ++ goto err; ++ } ++ ++ qp->sq_start = qp->buf.buf + qp->sq.offset; ++ qp->rq_start = qp->buf.buf + qp->rq.offset; ++ qp->sq.qend = qp->buf.buf + qp->sq.offset + ++ (qp->sq.wqe_cnt << qp->sq.wqe_shift); ++ ++ xsc_dbg(ctx->dbg_fp, XSC_DBG_QP, "sq start:%p, sq qend:%p, buffer size:%u\n", ++ qp->sq_start, qp->sq.qend, qp->buf_size); ++ ++ xsc_init_qp_indices(qp); ++ ++ if (xsc_spinlock_init_pd(&qp->sq.lock, attr->pd) || ++ xsc_spinlock_init_pd(&qp->rq.lock, attr->pd)) ++ goto err_free_qp_buf; ++ ++ cmd.buf_addr = (uintptr_t) qp->buf.buf; ++ cmd.db_addr = (uintptr_t) qp->db; ++ cmd.sq_wqe_count = qp->sq.ds_cnt; ++ cmd.rq_wqe_count = qp->rq.ds_cnt; ++ cmd.rq_wqe_shift = qp->rq.wqe_shift; ++ ++ if (attr->qp_type == IBV_QPT_RAW_PACKET) { ++ if (attr->comp_mask & IBV_QP_INIT_ATTR_CREATE_FLAGS) { ++ if (attr->create_flags & XSC_QP_CREATE_RAWPACKET_TSO) { ++ cmd.flags |= XSC_QP_FLAG_RAWPACKET_TSO;/*revert to command flags*/ ++ xsc_dbg(ctx->dbg_fp, XSC_DBG_QP, ++ "revert create_flags(0x%x) to cmd_flags(0x%x)\n", ++ attr->create_flags, cmd.flags); ++ } ++ ++ if (attr->create_flags & XSC_QP_CREATE_RAWPACKET_TX) { ++ cmd.flags |= XSC_QP_FLAG_RAWPACKET_TX;/*revert to command flags*/ ++ xsc_dbg(ctx->dbg_fp, XSC_DBG_QP, ++ "revert create_flags(0x%x) to cmd_flags(0x%x)\n", ++ attr->create_flags, cmd.flags); ++ } ++ attr->comp_mask &= ~IBV_QP_INIT_ATTR_CREATE_FLAGS; ++ } ++ } ++ ++ pthread_mutex_lock(&ctx->qp_table_mutex); ++ ++ xparent_domain = to_xparent_domain(attr->pd); ++ ++ ret = ibv_cmd_create_qp_ex(context, &qp->verbs_qp, attr, ++ &cmd.ibv_cmd, sizeof(cmd), ++ &resp.ibv_resp, sizeof(resp)); ++ if (ret) { ++ xsc_err("ibv_cmd_create_qp_ex failed,ret %d\n", ret); ++ errno = ret; ++ goto err_free_uidx; ++ } ++ ++ if (qp->sq.wqe_cnt || qp->rq.wqe_cnt) { ++ ret = xsc_store_qp(ctx, ibqp->qp_num, qp); ++ if (ret) { ++ xsc_err("xsc_store_qp failed,ret %d\n", ret); ++ errno = EINVAL; ++ goto err_destroy; ++ } ++ } ++ ++ pthread_mutex_unlock(&ctx->qp_table_mutex); ++ ++ qp->rq.max_post = qp->rq.wqe_cnt; ++ ++ if (attr->sq_sig_all) ++ qp->sq_signal_bits = 1; ++ else ++ qp->sq_signal_bits = 0; ++ ++ attr->cap.max_send_wr = qp->sq.max_post; ++ attr->cap.max_recv_wr = qp->rq.max_post; ++ attr->cap.max_recv_sge = qp->rq.max_gs; ++ ++ qp->rsc.type = XSC_RSC_TYPE_QP; ++ qp->rsc.rsn = ibqp->qp_num; ++ ++ if (xparent_domain) ++ atomic_fetch_add(&xparent_domain->xpd.refcount, 1); ++ ++ qp->rqn = ibqp->qp_num; ++ qp->sqn = ibqp->qp_num; ++ ++ xsc_dbg(ctx->dbg_fp, XSC_DBG_QP, "qp rqn:%u, sqn:%u\n", qp->rqn, qp->sqn); ++ qp->sq.db = ctx->sqm_reg_va + (ctx->qpm_tx_db & (xdev->page_size - 1)); ++ qp->rq.db = ctx->rqm_reg_va + (ctx->qpm_rx_db & (xdev->page_size - 1)); ++ ++ return ibqp; ++ ++err_destroy: ++ ibv_cmd_destroy_qp(ibqp); ++ ++err_free_uidx: ++ pthread_mutex_unlock(&to_xctx(context)->qp_table_mutex); ++ ++err_free_qp_buf: ++ xsc_free_qp_buf(ctx, qp); ++ ++err: ++ free(qp); ++ ++ return NULL; ++} ++ ++struct ibv_qp *xsc_create_qp(struct ibv_pd *pd, ++ struct ibv_qp_init_attr *attr) ++{ ++ struct ibv_qp *qp; ++ struct ibv_qp_init_attr_ex attrx; ++ ++ memset(&attrx, 0, sizeof(attrx)); ++ memcpy(&attrx, attr, sizeof(*attr)); ++ attrx.comp_mask = IBV_QP_INIT_ATTR_PD; ++ attrx.pd = pd; ++ qp = create_qp(pd->context, &attrx, NULL); ++ if (qp) ++ memcpy(attr, &attrx, sizeof(*attr)); ++ ++ return qp; ++} ++ ++static void xsc_lock_cqs(struct ibv_qp *qp) ++{ ++ struct xsc_cq *send_cq = to_xcq(qp->send_cq); ++ struct xsc_cq *recv_cq = to_xcq(qp->recv_cq); ++ ++ if (send_cq && recv_cq) { ++ if (send_cq == recv_cq) { ++ xsc_spin_lock(&send_cq->lock); ++ } else if (send_cq->cqn < recv_cq->cqn) { ++ xsc_spin_lock(&send_cq->lock); ++ xsc_spin_lock(&recv_cq->lock); ++ } else { ++ xsc_spin_lock(&recv_cq->lock); ++ xsc_spin_lock(&send_cq->lock); ++ } ++ } else if (send_cq) { ++ xsc_spin_lock(&send_cq->lock); ++ } else if (recv_cq) { ++ xsc_spin_lock(&recv_cq->lock); ++ } ++} ++ ++static void xsc_unlock_cqs(struct ibv_qp *qp) ++{ ++ struct xsc_cq *send_cq = to_xcq(qp->send_cq); ++ struct xsc_cq *recv_cq = to_xcq(qp->recv_cq); ++ ++ if (send_cq && recv_cq) { ++ if (send_cq == recv_cq) { ++ xsc_spin_unlock(&send_cq->lock); ++ } else if (send_cq->cqn < recv_cq->cqn) { ++ xsc_spin_unlock(&recv_cq->lock); ++ xsc_spin_unlock(&send_cq->lock); ++ } else { ++ xsc_spin_unlock(&send_cq->lock); ++ xsc_spin_unlock(&recv_cq->lock); ++ } ++ } else if (send_cq) { ++ xsc_spin_unlock(&send_cq->lock); ++ } else if (recv_cq) { ++ xsc_spin_unlock(&recv_cq->lock); ++ } ++} ++ ++int xsc_destroy_qp(struct ibv_qp *ibqp) ++{ ++ struct xsc_qp *qp = to_xqp(ibqp); ++ struct xsc_context *ctx = to_xctx(ibqp->context); ++ int ret; ++ struct xsc_parent_domain *xparent_domain = to_xparent_domain(ibqp->pd); ++ struct xsc_err_state_qp_node *tmp, *err_rq_node, *err_sq_node; ++ ++ xsc_dbg(ctx->dbg_fp, XSC_DBG_QP, "\n"); ++ ++ pthread_mutex_lock(&ctx->qp_table_mutex); ++ ++ ret = ibv_cmd_destroy_qp(ibqp); ++ if (ret) { ++ pthread_mutex_unlock(&ctx->qp_table_mutex); ++ return ret; ++ } ++ ++ xsc_lock_cqs(ibqp); ++ ++ list_for_each_safe(&to_xcq(ibqp->recv_cq)->err_state_qp_list, err_rq_node, tmp, entry) { ++ if (err_rq_node->qp_id == qp->rsc.rsn) { ++ list_del(&err_rq_node->entry); ++ free(err_rq_node); ++ } ++ } ++ ++ list_for_each_safe(&to_xcq(ibqp->send_cq)->err_state_qp_list, err_sq_node, tmp, entry) { ++ if (err_sq_node->qp_id == qp->rsc.rsn) { ++ list_del(&err_sq_node->entry); ++ free(err_sq_node); ++ } ++ } ++ ++ __xsc_cq_clean(to_xcq(ibqp->recv_cq), qp->rsc.rsn); ++ if (ibqp->send_cq != ibqp->recv_cq) ++ __xsc_cq_clean(to_xcq(ibqp->send_cq), qp->rsc.rsn); ++ ++ if (qp->sq.wqe_cnt || qp->rq.wqe_cnt) ++ xsc_clear_qp(ctx, ibqp->qp_num); ++ ++ xsc_unlock_cqs(ibqp); ++ pthread_mutex_unlock(&ctx->qp_table_mutex); ++ ++ xsc_free_qp_buf(ctx, qp); ++ ++ if (xparent_domain) ++ atomic_fetch_sub(&xparent_domain->xpd.refcount, 1); ++ ++ free(qp); ++ ++ return 0; ++} ++ ++int xsc_query_qp(struct ibv_qp *ibqp, struct ibv_qp_attr *attr, ++ int attr_mask, struct ibv_qp_init_attr *init_attr) ++{ ++ struct ibv_query_qp cmd; ++ struct xsc_qp *qp = to_xqp(ibqp); ++ int ret; ++ ++ xsc_dbg(to_xctx(ibqp->context)->dbg_fp, XSC_DBG_QP, "\n"); ++ ++ if (qp->rss_qp) ++ return ENOSYS; ++ ++ ret = ibv_cmd_query_qp(ibqp, attr, attr_mask, init_attr, &cmd, sizeof(cmd)); ++ if (ret) ++ return ret; ++ ++ init_attr->cap.max_send_wr = qp->sq.max_post; ++ init_attr->cap.max_send_sge = qp->sq.max_gs; ++ init_attr->cap.max_inline_data = qp->max_inline_data; ++ ++ attr->cap = init_attr->cap; ++ ++ return 0; ++} ++ ++enum { ++ XSC_MODIFY_QP_EX_ATTR_MASK = IBV_QP_RATE_LIMIT, ++}; ++ ++int xsc_modify_qp(struct ibv_qp *qp, struct ibv_qp_attr *attr, ++ int attr_mask) ++{ ++ struct ibv_modify_qp cmd = {}; ++ struct xsc_qp *xqp = to_xqp(qp); ++ int ret; ++ ++ xsc_dbg(to_xctx(qp->context)->dbg_fp, XSC_DBG_QP, "\n"); ++ ret = ibv_cmd_modify_qp(qp, attr, attr_mask, ++ &cmd, sizeof(cmd)); ++ ++ if (!ret && (attr_mask & IBV_QP_STATE) && ++ attr->qp_state == IBV_QPS_RESET) { ++ if (qp->recv_cq) { ++ xsc_cq_clean(to_xcq(qp->recv_cq), xqp->rsc.rsn); ++ } ++ if (qp->send_cq != qp->recv_cq && qp->send_cq) ++ xsc_cq_clean(to_xcq(qp->send_cq), ++ to_xqp(qp)->rsc.rsn); ++ ++ xsc_init_qp_indices(xqp); ++ } ++ ++ if (!ret && (attr_mask & IBV_QP_STATE)) ++ qp->state = attr->qp_state; ++ ++ /*workaround: generate flush err cqe if qp status turns to ERR*/ ++ if (!ret && (attr_mask & IBV_QP_STATE)) ++ ret = xsc_err_state_qp(qp, attr->cur_qp_state, attr->qp_state); ++ ++ return ret; ++} ++ ++int xsc_modify_qp_rate_limit(struct ibv_qp *qp, ++ struct ibv_qp_rate_limit_attr *attr) ++{ ++ struct ibv_qp_attr qp_attr = {}; ++ struct ib_uverbs_ex_modify_qp_resp resp = {}; ++ struct xsc_modify_qp cmd = {}; ++ struct xsc_context *xctx = to_xctx(qp->context); ++ int ret; ++ ++ if (attr->comp_mask) ++ return EINVAL; ++ ++ if ((attr->max_burst_sz || ++ attr->typical_pkt_sz) && ++ (!attr->rate_limit || ++ !(xctx->packet_pacing_caps.cap_flags & ++ XSC_IB_PP_SUPPORT_BURST))) ++ return EINVAL; ++ ++ cmd.burst_info.max_burst_sz = attr->max_burst_sz; ++ cmd.burst_info.typical_pkt_sz = attr->typical_pkt_sz; ++ qp_attr.rate_limit = attr->rate_limit; ++ ++ ret = ibv_cmd_modify_qp_ex(qp, &qp_attr, IBV_QP_RATE_LIMIT, ++ &cmd.ibv_cmd, ++ sizeof(cmd), ++ &resp, ++ sizeof(resp)); ++ ++ return ret; ++} ++ ++/* ++ * IB spec version 1.3. Table 224 Rate to xsc rate ++ * conversion table on best effort basis. ++ */ ++static const uint8_t ib_to_xsc_rate_table[] = { ++ 0, /* Invalid to unlimited */ ++ 0, /* Invalid to unlimited */ ++ 7, /* 2.5 Gbps */ ++ 8, /* 10Gbps */ ++ 9, /* 30Gbps */ ++ 10, /* 5 Gbps */ ++ 11, /* 20 Gbps */ ++ 12, /* 40 Gbps */ ++ 13, /* 60 Gbps */ ++ 14, /* 80 Gbps */ ++ 15, /* 120 Gbps */ ++ 11, /* 14 Gbps to 20 Gbps */ ++ 13, /* 56 Gbps to 60 Gbps */ ++ 15, /* 112 Gbps to 120 Gbps */ ++ 0, /* 168 Gbps to unlimited */ ++ 9, /* 25 Gbps to 30 Gbps */ ++ 15, /* 100 Gbps to 120 Gbps */ ++ 0, /* 200 Gbps to unlimited */ ++ 0, /* 300 Gbps to unlimited */ ++}; ++ ++static uint8_t ah_attr_to_xsc_rate(enum ibv_rate ah_static_rate) ++{ ++ if (ah_static_rate >= ARRAY_SIZE(ib_to_xsc_rate_table)) ++ return 0; ++ return ib_to_xsc_rate_table[ah_static_rate]; ++} ++ ++#define RROCE_UDP_SPORT_MIN 0xC000 ++#define RROCE_UDP_SPORT_MAX 0xFFFF ++struct ibv_ah *xsc_create_ah(struct ibv_pd *pd, struct ibv_ah_attr *attr) ++{ ++ struct xsc_context *ctx = to_xctx(pd->context); ++ struct ibv_port_attr port_attr; ++ struct xsc_ah *ah; ++ uint8_t static_rate; ++ uint32_t gid_type; ++ __be32 tmp; ++ uint8_t grh; ++ bool is_eth; ++ bool grh_req; ++ ++ if (attr->port_num < 1 || attr->port_num > ctx->num_ports) ++ return NULL; ++ ++ if (ctx->cached_link_layer[attr->port_num - 1]) { ++ is_eth = ctx->cached_link_layer[attr->port_num - 1] == ++ IBV_LINK_LAYER_ETHERNET; ++ grh_req = ctx->cached_port_flags[attr->port_num - 1] & ++ IBV_QPF_GRH_REQUIRED; ++ } else { ++ if (ibv_query_port(pd->context, attr->port_num, &port_attr)) ++ return NULL; ++ ++ is_eth = port_attr.link_layer == IBV_LINK_LAYER_ETHERNET; ++ grh_req = port_attr.flags & IBV_QPF_GRH_REQUIRED; ++ } ++ ++ if (unlikely((!attr->is_global) && (is_eth || grh_req))) { ++ errno = EINVAL; ++ return NULL; ++ } ++ ++ ah = calloc(1, sizeof *ah); ++ if (!ah) ++ return NULL; ++ ++ static_rate = ah_attr_to_xsc_rate(attr->static_rate); ++ if (is_eth) { ++ if (ibv_query_gid_type(pd->context, attr->port_num, ++ attr->grh.sgid_index, &gid_type)) ++ goto err; ++ ++ if (gid_type == IBV_GID_TYPE_ROCE_V2) ++ ah->av.rlid = htobe16(rand() % (RROCE_UDP_SPORT_MAX + 1 ++ - RROCE_UDP_SPORT_MIN) ++ + RROCE_UDP_SPORT_MIN); ++ /* Since RoCE packets must contain GRH, this bit is reserved ++ * for RoCE and shouldn't be set. ++ */ ++ grh = 0; ++ ah->av.stat_rate_sl = (static_rate << 4) | ((attr->sl & 0x7) << 1); ++ } else { ++ ah->av.fl_mlid = attr->src_path_bits & 0x7f; ++ ah->av.rlid = htobe16(attr->dlid); ++ grh = 1; ++ ah->av.stat_rate_sl = (static_rate << 4) | (attr->sl & 0xf); ++ } ++ if (attr->is_global) { ++ ah->av.tclass = attr->grh.traffic_class; ++ ah->av.hop_limit = attr->grh.hop_limit; ++ tmp = htobe32((grh << 30) | ++ ((attr->grh.sgid_index & 0xff) << 20) | ++ (attr->grh.flow_label & 0xfffff)); ++ ah->av.grh_gid_fl = tmp; ++ memcpy(ah->av.rgid, attr->grh.dgid.raw, 16); ++ } ++ ++ if (is_eth) { ++ if (ctx->cmds_supp_uhw & XSC_USER_CMDS_SUPP_UHW_CREATE_AH) { ++ struct xsc_create_ah_resp resp = {}; ++ ++ if (ibv_cmd_create_ah(pd, &ah->ibv_ah, attr, &resp.ibv_resp, sizeof(resp))) ++ goto err; ++ ++ ah->kern_ah = true; ++ memcpy(ah->av.rmac, resp.dmac, ETHERNET_LL_SIZE); ++ } else { ++ uint16_t vid; ++ ++ if (ibv_resolve_eth_l2_from_gid(pd->context, attr, ++ ah->av.rmac, &vid)) ++ goto err; ++ } ++ } ++ ++ return &ah->ibv_ah; ++err: ++ free(ah); ++ return NULL; ++} ++ ++int xsc_destroy_ah(struct ibv_ah *ah) ++{ ++ struct xsc_ah *xah = to_xah(ah); ++ int err; ++ ++ if (xah->kern_ah) { ++ err = ibv_cmd_destroy_ah(ah); ++ if (err) ++ return err; ++ } ++ ++ free(xah); ++ return 0; ++} ++ ++int xsc_attach_mcast(struct ibv_qp *qp, const union ibv_gid *gid, uint16_t lid) ++{ ++ return ibv_cmd_attach_mcast(qp, gid, lid); ++} ++ ++int xsc_detach_mcast(struct ibv_qp *qp, const union ibv_gid *gid, uint16_t lid) ++{ ++ return ibv_cmd_detach_mcast(qp, gid, lid); ++} ++ ++struct ibv_qp *xsc_create_qp_ex(struct ibv_context *context, ++ struct ibv_qp_init_attr_ex *attr) ++{ ++ return create_qp(context, attr, NULL); ++} ++ ++struct ibv_qp *xscdv_create_qp(struct ibv_context *context, ++ struct ibv_qp_init_attr_ex *qp_attr, ++ struct xscdv_qp_init_attr *xqp_attr) ++{ ++ return create_qp(context, qp_attr, xqp_attr); ++} ++ ++struct ibv_xrcd * ++xsc_open_xrcd(struct ibv_context *context, ++ struct ibv_xrcd_init_attr *xrcd_init_attr) ++{ ++ int err; ++ struct verbs_xrcd *xrcd; ++ struct ibv_open_xrcd cmd = {}; ++ struct ib_uverbs_open_xrcd_resp resp = {}; ++ ++ xrcd = calloc(1, sizeof(*xrcd)); ++ if (!xrcd) ++ return NULL; ++ ++ err = ibv_cmd_open_xrcd(context, xrcd, sizeof(*xrcd), xrcd_init_attr, ++ &cmd, sizeof(cmd), &resp, sizeof(resp)); ++ if (err) { ++ free(xrcd); ++ return NULL; ++ } ++ ++ return &xrcd->xrcd; ++} ++ ++int xsc_close_xrcd(struct ibv_xrcd *ib_xrcd) ++{ ++ struct verbs_xrcd *xrcd = container_of(ib_xrcd, struct verbs_xrcd, xrcd); ++ int ret; ++ ++ ret = ibv_cmd_close_xrcd(xrcd); ++ if (!ret) ++ free(xrcd); ++ ++ return ret; ++} ++ ++int xsc_query_device_ex(struct ibv_context *context, ++ const struct ibv_query_device_ex_input *input, ++ struct ibv_device_attr_ex *attr, ++ size_t attr_size) ++{ ++ struct xsc_context *xctx = to_xctx(context); ++ struct xsc_query_device_ex_resp resp = {}; ++ size_t resp_size = ++ (xctx->cmds_supp_uhw & XSC_USER_CMDS_SUPP_UHW_QUERY_DEVICE) ? ++ sizeof(resp) : ++ sizeof(resp.ibv_resp); ++ struct ibv_device_attr *a; ++ union xsc_ib_fw_ver raw_fw_ver; ++ int err; ++ ++ raw_fw_ver.data = 0; ++ err = ibv_cmd_query_device_any(context, input, attr, attr_size, ++ &resp.ibv_resp, &resp_size); ++ if (err) ++ return err; ++ ++ if (attr_size >= offsetofend(struct ibv_device_attr_ex, tso_caps)) { ++ attr->tso_caps.max_tso = resp.tso_caps.max_tso; ++ attr->tso_caps.supported_qpts = resp.tso_caps.supported_qpts; ++ } ++ if (attr_size >= offsetofend(struct ibv_device_attr_ex, rss_caps)) { ++ attr->rss_caps.rx_hash_fields_mask = ++ resp.rss_caps.rx_hash_fields_mask; ++ attr->rss_caps.rx_hash_function = ++ resp.rss_caps.rx_hash_function; ++ } ++ if (attr_size >= ++ offsetofend(struct ibv_device_attr_ex, packet_pacing_caps)) { ++ attr->packet_pacing_caps.qp_rate_limit_min = ++ resp.packet_pacing_caps.qp_rate_limit_min; ++ attr->packet_pacing_caps.qp_rate_limit_max = ++ resp.packet_pacing_caps.qp_rate_limit_max; ++ attr->packet_pacing_caps.supported_qpts = ++ resp.packet_pacing_caps.supported_qpts; ++ } ++ ++ if (resp.xsc_ib_support_multi_pkt_send_wqes & XSC_IB_ALLOW_MPW) ++ xctx->vendor_cap_flags |= XSC_VENDOR_CAP_FLAGS_MPW_ALLOWED; ++ ++ if (resp.xsc_ib_support_multi_pkt_send_wqes & XSC_IB_SUPPORT_EMPW) ++ xctx->vendor_cap_flags |= XSC_VENDOR_CAP_FLAGS_ENHANCED_MPW; ++ ++ xctx->cqe_comp_caps.max_num = resp.cqe_comp_caps.max_num; ++ xctx->cqe_comp_caps.supported_format = resp.cqe_comp_caps.supported_format; ++ xctx->sw_parsing_caps.sw_parsing_offloads = ++ resp.sw_parsing_caps.sw_parsing_offloads; ++ xctx->sw_parsing_caps.supported_qpts = ++ resp.sw_parsing_caps.supported_qpts; ++ xctx->striding_rq_caps.min_single_stride_log_num_of_bytes = ++ resp.striding_rq_caps.min_single_stride_log_num_of_bytes; ++ xctx->striding_rq_caps.max_single_stride_log_num_of_bytes = ++ resp.striding_rq_caps.max_single_stride_log_num_of_bytes; ++ xctx->striding_rq_caps.min_single_wqe_log_num_of_strides = ++ resp.striding_rq_caps.min_single_wqe_log_num_of_strides; ++ xctx->striding_rq_caps.max_single_wqe_log_num_of_strides = ++ resp.striding_rq_caps.max_single_wqe_log_num_of_strides; ++ xctx->striding_rq_caps.supported_qpts = ++ resp.striding_rq_caps.supported_qpts; ++ xctx->tunnel_offloads_caps = resp.tunnel_offloads_caps; ++ xctx->packet_pacing_caps = resp.packet_pacing_caps; ++ ++ if (resp.flags & XSC_IB_QUERY_DEV_RESP_FLAGS_CQE_128B_COMP) ++ xctx->vendor_cap_flags |= XSC_VENDOR_CAP_FLAGS_CQE_128B_COMP; ++ ++ if (resp.flags & XSC_IB_QUERY_DEV_RESP_FLAGS_CQE_128B_PAD) ++ xctx->vendor_cap_flags |= XSC_VENDOR_CAP_FLAGS_CQE_128B_PAD; ++ ++ raw_fw_ver.data = resp.ibv_resp.base.fw_ver; ++ a = &attr->orig_attr; ++ xsc_set_fw_version(a, &raw_fw_ver); ++ ++ return 0; ++} ++ ++static int rwq_sig_enabled(struct ibv_context *context) ++{ ++ char *env; ++ ++ env = getenv("XSC_RWQ_SIGNATURE"); ++ if (env) ++ return 1; ++ ++ return 0; ++} ++ ++static void xsc_free_rwq_buf(struct xsc_rwq *rwq, struct ibv_context *context) ++{ ++ struct xsc_context *ctx = to_xctx(context); ++ ++ xsc_free_actual_buf(ctx, &rwq->buf); ++ free(rwq->rq.wrid); ++} ++ ++static int xsc_alloc_rwq_buf(struct ibv_context *context, ++ struct xsc_rwq *rwq, ++ int size) ++{ ++ int err; ++ enum xsc_alloc_type alloc_type; ++ ++ xsc_get_alloc_type(to_xctx(context), XSC_RWQ_PREFIX, ++ &alloc_type, XSC_ALLOC_TYPE_ANON); ++ ++ rwq->rq.wrid = malloc(rwq->rq.wqe_cnt * sizeof(uint64_t)); ++ if (!rwq->rq.wrid) { ++ errno = ENOMEM; ++ return -1; ++ } ++ ++ err = xsc_alloc_prefered_buf(to_xctx(context), &rwq->buf, ++ align(rwq->buf_size, to_xdev ++ (context->device)->page_size), ++ to_xdev(context->device)->page_size, ++ alloc_type, ++ XSC_RWQ_PREFIX); ++ ++ if (err) { ++ free(rwq->rq.wrid); ++ errno = ENOMEM; ++ return -1; ++ } ++ ++ return 0; ++} ++ ++static struct ibv_wq *create_wq(struct ibv_context *context, ++ struct ibv_wq_init_attr *attr, ++ struct xscdv_wq_init_attr *xwq_attr) ++{ ++ struct xsc_create_wq cmd; ++ struct xsc_create_wq_resp resp; ++ int err; ++ struct xsc_rwq *rwq; ++ struct xsc_context *ctx = to_xctx(context); ++ int ret; ++ int32_t usr_idx = 0; ++ ++ if (attr->wq_type != IBV_WQT_RQ) ++ return NULL; ++ ++ memset(&cmd, 0, sizeof(cmd)); ++ memset(&resp, 0, sizeof(resp)); ++ ++ rwq = calloc(1, sizeof(*rwq)); ++ if (!rwq) ++ return NULL; ++ ++ rwq->wq_sig = rwq_sig_enabled(context); ++ if (rwq->wq_sig) ++ cmd.flags = XSC_WQ_FLAG_SIGNATURE; ++ ++ ret = xsc_calc_rwq_size(ctx, rwq, attr, xwq_attr); ++ if (ret < 0) { ++ errno = -ret; ++ goto err; ++ } ++ ++ rwq->buf_size = ret; ++ if (xsc_alloc_rwq_buf(context, rwq, ret)) ++ goto err; ++ ++ xsc_init_rwq_indices(rwq); ++ ++ if (xsc_spinlock_init_pd(&rwq->rq.lock, attr->pd)) ++ goto err_free_rwq_buf; ++ ++ rwq->db = xsc_alloc_dbrec(ctx); ++ if (!rwq->db) ++ goto err_free_rwq_buf; ++ ++ rwq->db[XSC_RCV_DBR] = 0; ++ rwq->db[XSC_SND_DBR] = 0; ++ rwq->pbuff = rwq->buf.buf + rwq->rq.offset; ++ rwq->recv_db = &rwq->db[XSC_RCV_DBR]; ++ cmd.buf_addr = (uintptr_t)rwq->buf.buf; ++ cmd.db_addr = (uintptr_t)rwq->db; ++ cmd.rq_wqe_count = rwq->rq.wqe_cnt; ++ cmd.rq_wqe_shift = rwq->rq.wqe_shift; ++ usr_idx = xsc_store_uidx(ctx, rwq); ++ if (usr_idx < 0) { ++ xsc_dbg(ctx->dbg_fp, XSC_DBG_QP, "Couldn't find free user index\n"); ++ goto err_free_db_rec; ++ } ++ ++ cmd.user_index = usr_idx; ++ ++ if (xwq_attr) { ++ if (xwq_attr->comp_mask & XSCDV_WQ_INIT_ATTR_MASK_STRIDING_RQ) { ++ if ((xwq_attr->striding_rq_attrs.single_stride_log_num_of_bytes < ++ ctx->striding_rq_caps.min_single_stride_log_num_of_bytes) || ++ (xwq_attr->striding_rq_attrs.single_stride_log_num_of_bytes > ++ ctx->striding_rq_caps.max_single_stride_log_num_of_bytes)) { ++ errno = EINVAL; ++ goto err_create; ++ } ++ ++ if ((xwq_attr->striding_rq_attrs.single_wqe_log_num_of_strides < ++ ctx->striding_rq_caps.min_single_wqe_log_num_of_strides) || ++ (xwq_attr->striding_rq_attrs.single_wqe_log_num_of_strides > ++ ctx->striding_rq_caps.max_single_wqe_log_num_of_strides)) { ++ errno = EINVAL; ++ goto err_create; ++ } ++ ++ cmd.single_stride_log_num_of_bytes = ++ xwq_attr->striding_rq_attrs.single_stride_log_num_of_bytes; ++ cmd.single_wqe_log_num_of_strides = ++ xwq_attr->striding_rq_attrs.single_wqe_log_num_of_strides; ++ cmd.two_byte_shift_en = ++ xwq_attr->striding_rq_attrs.two_byte_shift_en; ++ cmd.comp_mask |= XSC_IB_CREATE_WQ_STRIDING_RQ; ++ } ++ } ++ ++ err = ibv_cmd_create_wq(context, attr, &rwq->wq, &cmd.ibv_cmd, ++ sizeof(cmd), &resp.ibv_resp, sizeof(resp)); ++ if (err) ++ goto err_create; ++ ++ rwq->rsc.type = XSC_RSC_TYPE_RWQ; ++ rwq->rsc.rsn = cmd.user_index; ++ ++ rwq->wq.post_recv = xsc_post_wq_recv; ++ return &rwq->wq; ++ ++err_create: ++ xsc_clear_uidx(ctx, cmd.user_index); ++err_free_db_rec: ++ xsc_free_db(to_xctx(context), rwq->db); ++err_free_rwq_buf: ++ xsc_free_rwq_buf(rwq, context); ++err: ++ free(rwq); ++ return NULL; ++} ++ ++struct ibv_wq *xsc_create_wq(struct ibv_context *context, ++ struct ibv_wq_init_attr *attr) ++{ ++ return create_wq(context, attr, NULL); ++} ++ ++struct ibv_wq *xscdv_create_wq(struct ibv_context *context, ++ struct ibv_wq_init_attr *attr, ++ struct xscdv_wq_init_attr *xwq_attr) ++{ ++ return create_wq(context, attr, xwq_attr); ++} ++ ++int xsc_modify_wq(struct ibv_wq *wq, struct ibv_wq_attr *attr) ++{ ++ struct xsc_modify_wq cmd = {}; ++ struct xsc_rwq *rwq = to_xrwq(wq); ++ ++ if ((attr->attr_mask & IBV_WQ_ATTR_STATE) && ++ attr->wq_state == IBV_WQS_RDY) { ++ if ((attr->attr_mask & IBV_WQ_ATTR_CURR_STATE) && ++ attr->curr_wq_state != wq->state) ++ return -EINVAL; ++ ++ if (wq->state == IBV_WQS_RESET) { ++ xsc_spin_lock(&to_xcq(wq->cq)->lock); ++ __xsc_cq_clean(to_xcq(wq->cq), ++ rwq->rsc.rsn); ++ xsc_spin_unlock(&to_xcq(wq->cq)->lock); ++ xsc_init_rwq_indices(rwq); ++ rwq->db[XSC_RCV_DBR] = 0; ++ rwq->db[XSC_SND_DBR] = 0; ++ } ++ } ++ ++ return ibv_cmd_modify_wq(wq, attr, &cmd.ibv_cmd, sizeof(cmd)); ++} ++ ++int xsc_destroy_wq(struct ibv_wq *wq) ++{ ++ struct xsc_rwq *rwq = to_xrwq(wq); ++ int ret; ++ ++ ret = ibv_cmd_destroy_wq(wq); ++ if (ret) ++ return ret; ++ ++ xsc_spin_lock(&to_xcq(wq->cq)->lock); ++ __xsc_cq_clean(to_xcq(wq->cq), rwq->rsc.rsn); ++ xsc_spin_unlock(&to_xcq(wq->cq)->lock); ++ xsc_clear_uidx(to_xctx(wq->context), rwq->rsc.rsn); ++ xsc_free_db(to_xctx(wq->context), rwq->db); ++ xsc_free_rwq_buf(rwq, wq->context); ++ free(rwq); ++ ++ return 0; ++} ++ ++static void free_flow_counters_descriptions(struct xsc_ib_create_flow *cmd) ++{ ++ int i; ++ ++ for (i = 0; i < cmd->ncounters_data; i++) ++ free(cmd->data[i].counters_data); ++} ++ ++static int get_flow_mcounters(struct xsc_flow *mflow, ++ struct ibv_flow_attr *flow_attr, ++ struct xsc_counters **mcounters, ++ uint32_t *data_size) ++{ ++ struct ibv_flow_spec *ib_spec; ++ uint32_t ncounters_used = 0; ++ int i; ++ ++ ib_spec = (struct ibv_flow_spec *)(flow_attr + 1); ++ for (i = 0; i < flow_attr->num_of_specs; i++, ib_spec = (void *)ib_spec + ib_spec->hdr.size) { ++ if (ib_spec->hdr.type != IBV_FLOW_SPEC_ACTION_COUNT) ++ continue; ++ ++ /* currently support only one counters data */ ++ if (ncounters_used > 0) ++ return EINVAL; ++ ++ *mcounters = to_mcounters(ib_spec->flow_count.counters); ++ ncounters_used++; ++ } ++ ++ *data_size = ncounters_used * sizeof(struct xsc_ib_flow_counters_data); ++ return 0; ++} ++ ++static int allocate_flow_counters_descriptions(struct xsc_counters *mcounters, ++ struct xsc_ib_create_flow *cmd) ++{ ++ struct xsc_ib_flow_counters_data *mcntrs_data; ++ struct xsc_ib_flow_counters_desc *cntrs_data; ++ struct xsc_counter_node *cntr_node; ++ uint32_t ncounters; ++ int j = 0; ++ ++ mcntrs_data = cmd->data; ++ ncounters = mcounters->ncounters; ++ ++ /* xsc_attach_counters_point_flow was never called */ ++ if (!ncounters) ++ return EINVAL; ++ ++ /* each counter has both index and description */ ++ cntrs_data = calloc(ncounters, sizeof(*cntrs_data)); ++ if (!cntrs_data) ++ return ENOMEM; ++ ++ list_for_each(&mcounters->counters_list, cntr_node, entry) { ++ cntrs_data[j].description = cntr_node->desc; ++ cntrs_data[j].index = cntr_node->index; ++ j++; ++ } ++ ++ scrub_ptr_attr(cntrs_data); ++ mcntrs_data[cmd->ncounters_data].counters_data = cntrs_data; ++ mcntrs_data[cmd->ncounters_data].ncounters = ncounters; ++ cmd->ncounters_data++; ++ ++ return 0; ++} ++ ++struct ibv_flow *xsc_create_flow(struct ibv_qp *qp, struct ibv_flow_attr *flow_attr) ++{ ++ struct xsc_ib_create_flow *cmd; ++ uint32_t required_cmd_size = 0; ++ struct ibv_flow *flow_id; ++ struct xsc_flow *mflow; ++ int ret; ++ ++ mflow = calloc(1, sizeof(*mflow)); ++ if (!mflow) { ++ errno = ENOMEM; ++ return NULL; ++ } ++ ++ ret = get_flow_mcounters(mflow, flow_attr, &mflow->mcounters, &required_cmd_size); ++ if (ret) { ++ errno = ret; ++ goto err_get_mcounters; ++ } ++ ++ required_cmd_size += sizeof(*cmd); ++ cmd = calloc(1, required_cmd_size); ++ if (!cmd) { ++ errno = ENOMEM; ++ goto err_get_mcounters; ++ } ++ ++ if (mflow->mcounters) { ++ pthread_mutex_lock(&mflow->mcounters->lock); ++ /* if the counters already bound no need to pass its description */ ++ if (!mflow->mcounters->refcount) { ++ ret = allocate_flow_counters_descriptions(mflow->mcounters, cmd); ++ if (ret) { ++ errno = ret; ++ goto err_desc_alloc; ++ } ++ } ++ } ++ ++ flow_id = &mflow->flow_id; ++ ret = ibv_cmd_create_flow(qp, flow_id, flow_attr, ++ cmd, required_cmd_size); ++ if (ret) ++ goto err_create_flow; ++ ++ if (mflow->mcounters) { ++ free_flow_counters_descriptions(cmd); ++ mflow->mcounters->refcount++; ++ pthread_mutex_unlock(&mflow->mcounters->lock); ++ } ++ ++ free(cmd); ++ ++ return flow_id; ++ ++err_create_flow: ++ if (mflow->mcounters) { ++ free_flow_counters_descriptions(cmd); ++ pthread_mutex_unlock(&mflow->mcounters->lock); ++ } ++err_desc_alloc: ++ free(cmd); ++err_get_mcounters: ++ free(mflow); ++ return NULL; ++} ++ ++int xsc_destroy_flow(struct ibv_flow *flow_id) ++{ ++ struct xsc_flow *mflow = to_mflow(flow_id); ++ int ret; ++ ++ ret = ibv_cmd_destroy_flow(flow_id); ++ if (ret) ++ return ret; ++ ++ if (mflow->mcounters) { ++ pthread_mutex_lock(&mflow->mcounters->lock); ++ mflow->mcounters->refcount--; ++ pthread_mutex_unlock(&mflow->mcounters->lock); ++ } ++ ++ free(mflow); ++ return 0; ++} ++ ++struct ibv_rwq_ind_table *xsc_create_rwq_ind_table(struct ibv_context *context, ++ struct ibv_rwq_ind_table_init_attr *init_attr) ++{ ++ struct ibv_create_rwq_ind_table *cmd; ++ struct xsc_create_rwq_ind_table_resp resp; ++ struct ibv_rwq_ind_table *ind_table; ++ uint32_t required_tbl_size; ++ int num_tbl_entries; ++ int cmd_size; ++ int err; ++ ++ num_tbl_entries = 1 << init_attr->log_ind_tbl_size; ++ /* Data must be u64 aligned */ ++ required_tbl_size = (num_tbl_entries * sizeof(uint32_t)) < sizeof(uint64_t) ? ++ sizeof(uint64_t) : (num_tbl_entries * sizeof(uint32_t)); ++ ++ cmd_size = required_tbl_size + sizeof(*cmd); ++ cmd = calloc(1, cmd_size); ++ if (!cmd) ++ return NULL; ++ ++ memset(&resp, 0, sizeof(resp)); ++ ind_table = calloc(1, sizeof(*ind_table)); ++ if (!ind_table) ++ goto free_cmd; ++ ++ err = ibv_cmd_create_rwq_ind_table(context, init_attr, ind_table, ++ &resp.ibv_resp, sizeof(resp)); ++ if (err) ++ goto err; ++ ++ free(cmd); ++ return ind_table; ++ ++err: ++ free(ind_table); ++free_cmd: ++ free(cmd); ++ return NULL; ++} ++ ++int xsc_destroy_rwq_ind_table(struct ibv_rwq_ind_table *rwq_ind_table) ++{ ++ int ret; ++ ++ ret = ibv_cmd_destroy_rwq_ind_table(rwq_ind_table); ++ ++ if (ret) ++ return ret; ++ ++ free(rwq_ind_table); ++ return 0; ++} ++ ++int xsc_modify_cq(struct ibv_cq *cq, struct ibv_modify_cq_attr *attr) ++{ ++ struct ibv_modify_cq cmd = {}; ++ ++ return ibv_cmd_modify_cq(cq, attr, &cmd, sizeof(cmd)); ++} ++ ++static struct ibv_flow_action *_xsc_create_flow_action_esp(struct ibv_context *ctx, ++ struct ibv_flow_action_esp_attr *attr, ++ struct ibv_command_buffer *driver_attr) ++{ ++ struct verbs_flow_action *action; ++ int ret; ++ ++ if (!check_comp_mask(attr->comp_mask, IBV_FLOW_ACTION_ESP_MASK_ESN)) { ++ errno = EOPNOTSUPP; ++ return NULL; ++ } ++ ++ action = calloc(1, sizeof(*action)); ++ if (!action) { ++ errno = ENOMEM; ++ return NULL; ++ } ++ ++ ret = ibv_cmd_create_flow_action_esp(ctx, attr, action, driver_attr); ++ if (ret) { ++ free(action); ++ return NULL; ++ } ++ ++ return &action->action; ++} ++ ++struct ibv_flow_action *xsc_create_flow_action_esp(struct ibv_context *ctx, ++ struct ibv_flow_action_esp_attr *attr) ++{ ++ return _xsc_create_flow_action_esp(ctx, attr, NULL); ++} ++ ++struct ibv_flow_action *xscdv_create_flow_action_esp(struct ibv_context *ctx, ++ struct ibv_flow_action_esp_attr *esp, ++ struct xscdv_flow_action_esp *xattr) ++{ ++ DECLARE_COMMAND_BUFFER_LINK(driver_attr, UVERBS_OBJECT_FLOW_ACTION, ++ UVERBS_METHOD_FLOW_ACTION_ESP_CREATE, 1, ++ NULL); ++ ++ if (!check_comp_mask(xattr->comp_mask, ++ XSCDV_FLOW_ACTION_ESP_MASK_FLAGS)) { ++ errno = EOPNOTSUPP; ++ return NULL; ++ } ++ ++ if (xattr->comp_mask & XSCDV_FLOW_ACTION_ESP_MASK_FLAGS) { ++ if (!check_comp_mask(xattr->action_flags, ++ XSC_IB_UAPI_FLOW_ACTION_FLAGS_REQUIRE_METADATA)) { ++ errno = EOPNOTSUPP; ++ return NULL; ++ } ++ fill_attr_in_uint64(driver_attr, XSC_IB_ATTR_CREATE_FLOW_ACTION_FLAGS, ++ xattr->action_flags); ++ } ++ ++ return _xsc_create_flow_action_esp(ctx, esp, driver_attr); ++} ++ ++int xsc_modify_flow_action_esp(struct ibv_flow_action *action, ++ struct ibv_flow_action_esp_attr *attr) ++{ ++ struct verbs_flow_action *vaction = ++ container_of(action, struct verbs_flow_action, action); ++ ++ if (!check_comp_mask(attr->comp_mask, IBV_FLOW_ACTION_ESP_MASK_ESN)) ++ return EOPNOTSUPP; ++ ++ return ibv_cmd_modify_flow_action_esp(vaction, attr, NULL); ++} ++ ++struct ibv_flow_action *xscdv_create_flow_action_modify_header(struct ibv_context *ctx, ++ size_t actions_sz, ++ uint64_t actions[], ++ enum xscdv_flow_table_type ft_type) ++{ ++ DECLARE_COMMAND_BUFFER(cmd, UVERBS_OBJECT_FLOW_ACTION, ++ XSC_IB_METHOD_FLOW_ACTION_CREATE_MODIFY_HEADER, ++ 3); ++ struct ib_uverbs_attr *handle = fill_attr_out_obj(cmd, ++ XSC_IB_ATTR_CREATE_MODIFY_HEADER_HANDLE); ++ struct verbs_flow_action *action; ++ int ret; ++ ++ fill_attr_in(cmd, XSC_IB_ATTR_CREATE_MODIFY_HEADER_ACTIONS_PRM, ++ actions, actions_sz); ++ fill_attr_const_in(cmd, XSC_IB_ATTR_CREATE_MODIFY_HEADER_FT_TYPE, ++ ft_type); ++ ++ action = calloc(1, sizeof(*action)); ++ if (!action) { ++ errno = ENOMEM; ++ return NULL; ++ } ++ ++ ret = execute_ioctl(ctx, cmd); ++ if (ret) { ++ free(action); ++ return NULL; ++ } ++ ++ action->action.context = ctx; ++ action->type = IBV_FLOW_ACTION_UNSPECIFIED; ++ action->handle = read_attr_obj(XSC_IB_ATTR_CREATE_MODIFY_HEADER_HANDLE, ++ handle); ++ ++ return &action->action; ++} ++ ++struct ibv_flow_action * ++xscdv_create_flow_action_packet_reformat(struct ibv_context *ctx, ++ size_t data_sz, ++ void *data, ++ enum xscdv_flow_action_packet_reformat_type reformat_type, ++ enum xscdv_flow_table_type ft_type) ++{ ++ DECLARE_COMMAND_BUFFER(cmd, UVERBS_OBJECT_FLOW_ACTION, ++ XSC_IB_METHOD_FLOW_ACTION_CREATE_PACKET_REFORMAT, 4); ++ struct ib_uverbs_attr *handle = fill_attr_out_obj(cmd, ++ XSC_IB_ATTR_CREATE_PACKET_REFORMAT_HANDLE); ++ struct verbs_flow_action *action; ++ int ret; ++ ++ if ((!data && data_sz) || (data && !data_sz)) { ++ errno = EINVAL; ++ return NULL; ++ } ++ ++ if (data && data_sz) ++ fill_attr_in(cmd, ++ XSC_IB_ATTR_CREATE_PACKET_REFORMAT_DATA_BUF, ++ data, data_sz); ++ ++ fill_attr_const_in(cmd, XSC_IB_ATTR_CREATE_PACKET_REFORMAT_TYPE, ++ reformat_type); ++ ++ fill_attr_const_in(cmd, XSC_IB_ATTR_CREATE_PACKET_REFORMAT_FT_TYPE, ++ ft_type); ++ ++ action = calloc(1, sizeof(*action)); ++ if (!action) { ++ errno = ENOMEM; ++ return NULL; ++ } ++ ++ ret = execute_ioctl(ctx, cmd); ++ if (ret) { ++ free(action); ++ return NULL; ++ } ++ ++ action->action.context = ctx; ++ action->type = IBV_FLOW_ACTION_UNSPECIFIED; ++ action->handle = read_attr_obj(XSC_IB_ATTR_CREATE_PACKET_REFORMAT_HANDLE, ++ handle); ++ ++ return &action->action; ++} ++ ++int xsc_destroy_flow_action(struct ibv_flow_action *action) ++{ ++ struct verbs_flow_action *vaction = ++ container_of(action, struct verbs_flow_action, action); ++ int ret = ibv_cmd_destroy_flow_action(vaction); ++ ++ if (!ret) ++ free(action); ++ ++ return ret; ++} ++ ++static inline int xsc_access_dm(struct ibv_dm *ibdm, uint64_t dm_offset, ++ void *host_addr, size_t length, ++ uint32_t read) ++{ ++ struct xsc_dm *dm = to_xdm(ibdm); ++ atomic_uint32_t *dm_ptr = ++ (atomic_uint32_t *)dm->start_va + dm_offset / 4; ++ uint32_t *host_ptr = host_addr; ++ const uint32_t *host_end = host_ptr + length / 4; ++ ++ if (dm_offset + length > dm->length) ++ return EFAULT; ++ ++ /* Due to HW limitation, DM access address and length must be aligned ++ * to 4 bytes. ++ */ ++ if ((length & 3) || (dm_offset & 3)) ++ return EINVAL; ++ ++ /* Copy granularity should be 4 Bytes since we enforce copy size to be ++ * a multiple of 4 bytes. ++ */ ++ if (read) { ++ while (host_ptr != host_end) { ++ *host_ptr = atomic_load_explicit(dm_ptr, ++ memory_order_relaxed); ++ host_ptr++; ++ dm_ptr++; ++ } ++ } else { ++ while (host_ptr != host_end) { ++ atomic_store_explicit(dm_ptr, *host_ptr, ++ memory_order_relaxed); ++ host_ptr++; ++ dm_ptr++; ++ } ++ } ++ ++ return 0; ++} ++static inline int xsc_memcpy_to_dm(struct ibv_dm *ibdm, uint64_t dm_offset, ++ const void *host_addr, size_t length) ++{ ++ return xsc_access_dm(ibdm, dm_offset, (void *)host_addr, length, 0); ++} ++ ++static inline int xsc_memcpy_from_dm(void *host_addr, struct ibv_dm *ibdm, ++ uint64_t dm_offset, size_t length) ++{ ++ return xsc_access_dm(ibdm, dm_offset, host_addr, length, 1); ++} ++ ++struct ibv_dm *xsc_alloc_dm(struct ibv_context *context, ++ struct ibv_alloc_dm_attr *dm_attr) ++{ ++ DECLARE_COMMAND_BUFFER(cmdb, UVERBS_OBJECT_DM, UVERBS_METHOD_DM_ALLOC, ++ 2); ++ int page_size = to_xdev(context->device)->page_size; ++ struct xsc_context *xctx = to_xctx(context); ++ uint64_t act_size, start_offset; ++ struct xsc_dm *dm; ++ uint16_t page_idx; ++ off_t offset = 0; ++ void *va; ++ ++ if (!check_comp_mask(dm_attr->comp_mask, 0)) { ++ errno = EINVAL; ++ return NULL; ++ } ++ ++ if (dm_attr->length > xctx->max_dm_size) { ++ errno = EINVAL; ++ return NULL; ++ } ++ ++ dm = calloc(1, sizeof(*dm)); ++ if (!dm) { ++ errno = ENOMEM; ++ return NULL; ++ } ++ ++ ++ fill_attr_out(cmdb, XSC_IB_ATTR_ALLOC_DM_RESP_START_OFFSET, ++ &start_offset, sizeof(start_offset)); ++ fill_attr_out(cmdb, XSC_IB_ATTR_ALLOC_DM_RESP_PAGE_INDEX, ++ &page_idx, sizeof(page_idx)); ++ ++ if (ibv_cmd_alloc_dm(context, dm_attr, &dm->verbs_dm, cmdb)) ++ goto err_free_mem; ++ ++ act_size = align(dm_attr->length, page_size); ++ set_command(XSC_IB_MMAP_DEVICE_MEM, &offset); ++ set_extended_index(page_idx, &offset); ++ va = mmap(NULL, act_size, PROT_READ | PROT_WRITE, ++ MAP_SHARED, context->cmd_fd, ++ page_size * offset); ++ if (va == MAP_FAILED) ++ goto err_free_dm; ++ ++ dm->mmap_va = va; ++ dm->length = dm_attr->length; ++ dm->start_va = va + (start_offset & (page_size - 1)); ++ dm->verbs_dm.dm.memcpy_to_dm = xsc_memcpy_to_dm; ++ dm->verbs_dm.dm.memcpy_from_dm = xsc_memcpy_from_dm; ++ ++ return &dm->verbs_dm.dm; ++ ++err_free_dm: ++ ibv_cmd_free_dm(&dm->verbs_dm); ++ ++err_free_mem: ++ free(dm); ++ ++ return NULL; ++} ++ ++int xsc_free_dm(struct ibv_dm *ibdm) ++{ ++ struct xsc_device *xdev = to_xdev(ibdm->context->device); ++ struct xsc_dm *dm = to_xdm(ibdm); ++ size_t act_size = align(dm->length, xdev->page_size); ++ int ret; ++ ++ ret = ibv_cmd_free_dm(&dm->verbs_dm); ++ ++ if (ret) ++ return ret; ++ ++ munmap(dm->mmap_va, act_size); ++ free(dm); ++ return 0; ++} ++ ++struct ibv_counters *xsc_create_counters(struct ibv_context *context, ++ struct ibv_counters_init_attr *init_attr) ++{ ++ struct xsc_counters *mcntrs; ++ int ret; ++ ++ if (!check_comp_mask(init_attr->comp_mask, 0)) { ++ errno = EOPNOTSUPP; ++ return NULL; ++ } ++ ++ mcntrs = calloc(1, sizeof(*mcntrs)); ++ if (!mcntrs) { ++ errno = ENOMEM; ++ return NULL; ++ } ++ ++ pthread_mutex_init(&mcntrs->lock, NULL); ++ ret = ibv_cmd_create_counters(context, ++ init_attr, ++ &mcntrs->vcounters, ++ NULL); ++ if (ret) ++ goto err_create; ++ ++ list_head_init(&mcntrs->counters_list); ++ ++ return &mcntrs->vcounters.counters; ++ ++err_create: ++ free(mcntrs); ++ return NULL; ++} ++ ++int xsc_destroy_counters(struct ibv_counters *counters) ++{ ++ struct xsc_counters *mcntrs = to_mcounters(counters); ++ struct xsc_counter_node *tmp, *cntrs_node; ++ int ret; ++ ++ ret = ibv_cmd_destroy_counters(&mcntrs->vcounters); ++ if (ret) ++ return ret; ++ ++ list_for_each_safe(&mcntrs->counters_list, cntrs_node, tmp, entry) { ++ list_del(&cntrs_node->entry); ++ free(cntrs_node); ++ } ++ ++ free(mcntrs); ++ return 0; ++} ++ ++int xsc_attach_counters_point_flow(struct ibv_counters *counters, ++ struct ibv_counter_attach_attr *attr, ++ struct ibv_flow *flow) ++{ ++ struct xsc_counters *mcntrs = to_mcounters(counters); ++ struct xsc_counter_node *cntrs_node; ++ int ret; ++ ++ /* The driver supports only the static binding mode as part of ibv_create_flow */ ++ if (flow) ++ return ENOTSUP; ++ ++ if (!check_comp_mask(attr->comp_mask, 0)) ++ return EOPNOTSUPP; ++ ++ /* Check whether the attached counter is supported */ ++ if (attr->counter_desc < IBV_COUNTER_PACKETS || ++ attr->counter_desc > IBV_COUNTER_BYTES) ++ return ENOTSUP; ++ ++ cntrs_node = calloc(1, sizeof(*cntrs_node)); ++ if (!cntrs_node) ++ return ENOMEM; ++ ++ pthread_mutex_lock(&mcntrs->lock); ++ /* The counter is bound to a flow, attach is not allowed */ ++ if (mcntrs->refcount) { ++ ret = EBUSY; ++ goto err_already_bound; ++ } ++ ++ cntrs_node->index = attr->index; ++ cntrs_node->desc = attr->counter_desc; ++ list_add(&mcntrs->counters_list, &cntrs_node->entry); ++ mcntrs->ncounters++; ++ pthread_mutex_unlock(&mcntrs->lock); ++ ++ return 0; ++ ++err_already_bound: ++ pthread_mutex_unlock(&mcntrs->lock); ++ free(cntrs_node); ++ return ret; ++} ++ ++int xsc_read_counters(struct ibv_counters *counters, ++ uint64_t *counters_value, ++ uint32_t ncounters, ++ uint32_t flags) ++{ ++ struct xsc_counters *mcntrs = to_mcounters(counters); ++ ++ return ibv_cmd_read_counters(&mcntrs->vcounters, ++ counters_value, ++ ncounters, ++ flags, ++ NULL); ++ ++} ++ ++struct xscdv_flow_matcher * ++xscdv_create_flow_matcher(struct ibv_context *context, ++ struct xscdv_flow_matcher_attr *attr) ++{ ++ DECLARE_COMMAND_BUFFER(cmd, XSC_IB_OBJECT_FLOW_MATCHER, ++ XSC_IB_METHOD_FLOW_MATCHER_CREATE, ++ 5); ++ struct xscdv_flow_matcher *flow_matcher; ++ struct ib_uverbs_attr *handle; ++ int ret; ++ ++ if (!check_comp_mask(attr->comp_mask, 0)) { ++ errno = EOPNOTSUPP; ++ return NULL; ++ } ++ ++ flow_matcher = calloc(1, sizeof(*flow_matcher)); ++ if (!flow_matcher) { ++ errno = ENOMEM; ++ return NULL; ++ } ++ ++ if (attr->type != IBV_FLOW_ATTR_NORMAL) { ++ errno = EOPNOTSUPP; ++ goto err; ++ } ++ ++ handle = fill_attr_out_obj(cmd, XSC_IB_ATTR_FLOW_MATCHER_CREATE_HANDLE); ++ fill_attr_in(cmd, XSC_IB_ATTR_FLOW_MATCHER_MATCH_MASK, ++ attr->match_mask->match_buf, ++ attr->match_mask->match_sz); ++ fill_attr_in(cmd, XSC_IB_ATTR_FLOW_MATCHER_MATCH_CRITERIA, ++ &attr->match_criteria_enable, sizeof(attr->match_criteria_enable)); ++ fill_attr_in_enum(cmd, XSC_IB_ATTR_FLOW_MATCHER_FLOW_TYPE, ++ IBV_FLOW_ATTR_NORMAL, &attr->priority, ++ sizeof(attr->priority)); ++ if (attr->flags) ++ fill_attr_const_in(cmd, XSC_IB_ATTR_FLOW_MATCHER_FLOW_FLAGS, ++ attr->flags); ++ ++ ret = execute_ioctl(context, cmd); ++ if (ret) ++ goto err; ++ ++ flow_matcher->context = context; ++ flow_matcher->handle = read_attr_obj(XSC_IB_ATTR_FLOW_MATCHER_CREATE_HANDLE, handle); ++ ++ return flow_matcher; ++ ++err: ++ free(flow_matcher); ++ return NULL; ++} ++ ++int xscdv_destroy_flow_matcher(struct xscdv_flow_matcher *flow_matcher) ++{ ++ DECLARE_COMMAND_BUFFER(cmd, XSC_IB_OBJECT_FLOW_MATCHER, ++ XSC_IB_METHOD_FLOW_MATCHER_DESTROY, ++ 1); ++ int ret; ++ ++ fill_attr_in_obj(cmd, XSC_IB_ATTR_FLOW_MATCHER_DESTROY_HANDLE, flow_matcher->handle); ++ ret = execute_ioctl(flow_matcher->context, cmd); ++ verbs_is_destroy_err(&ret); ++ ++ if (ret) ++ return ret; ++ ++ free(flow_matcher); ++ return 0; ++} ++ ++#define CREATE_FLOW_MAX_FLOW_ACTIONS_SUPPORTED 8 ++struct ibv_flow * ++xscdv_create_flow(struct xscdv_flow_matcher *flow_matcher, ++ struct xscdv_flow_match_parameters *match_value, ++ size_t num_actions, ++ struct xscdv_flow_action_attr actions_attr[]) ++{ ++ uint32_t flow_actions[CREATE_FLOW_MAX_FLOW_ACTIONS_SUPPORTED]; ++ struct verbs_flow_action *vaction; ++ int num_flow_actions = 0; ++ struct xsc_flow *mflow; ++ bool have_qp = false; ++ bool have_dest_devx = false; ++ bool have_flow_tag = false; ++ int ret; ++ int i; ++ DECLARE_COMMAND_BUFFER(cmd, UVERBS_OBJECT_FLOW, ++ XSC_IB_METHOD_CREATE_FLOW, ++ 6); ++ struct ib_uverbs_attr *handle; ++ enum xscdv_flow_action_type type; ++ ++ mflow = calloc(1, sizeof(*mflow)); ++ if (!mflow) { ++ errno = ENOMEM; ++ return NULL; ++ } ++ ++ handle = fill_attr_out_obj(cmd, XSC_IB_ATTR_CREATE_FLOW_HANDLE); ++ fill_attr_in(cmd, XSC_IB_ATTR_CREATE_FLOW_MATCH_VALUE, ++ match_value->match_buf, ++ match_value->match_sz); ++ fill_attr_in_obj(cmd, XSC_IB_ATTR_CREATE_FLOW_MATCHER, flow_matcher->handle); ++ ++ for (i = 0; i < num_actions; i++) { ++ type = actions_attr[i].type; ++ switch (type) { ++ case XSCDV_FLOW_ACTION_DEST_IBV_QP: ++ if (have_qp || have_dest_devx) { ++ errno = EOPNOTSUPP; ++ goto err; ++ } ++ fill_attr_in_obj(cmd, XSC_IB_ATTR_CREATE_FLOW_DEST_QP, ++ actions_attr[i].qp->handle); ++ have_qp = true; ++ break; ++ case XSCDV_FLOW_ACTION_IBV_FLOW_ACTION: ++ if (num_flow_actions == ++ CREATE_FLOW_MAX_FLOW_ACTIONS_SUPPORTED) { ++ errno = EOPNOTSUPP; ++ goto err; ++ } ++ vaction = container_of(actions_attr[i].action, ++ struct verbs_flow_action, ++ action); ++ ++ flow_actions[num_flow_actions] = vaction->handle; ++ num_flow_actions++; ++ break; ++ case XSCDV_FLOW_ACTION_DEST_DEVX: ++ if (have_dest_devx || have_qp) { ++ errno = EOPNOTSUPP; ++ goto err; ++ } ++ fill_attr_in_obj(cmd, XSC_IB_ATTR_CREATE_FLOW_DEST_DEVX, ++ actions_attr[i].obj->handle); ++ have_dest_devx = true; ++ break; ++ case XSCDV_FLOW_ACTION_TAG: ++ if (have_flow_tag) { ++ errno = EINVAL; ++ goto err; ++ } ++ fill_attr_in_uint32(cmd, ++ XSC_IB_ATTR_CREATE_FLOW_TAG, ++ actions_attr[i].tag_value); ++ have_flow_tag = true; ++ break; ++ default: ++ errno = EOPNOTSUPP; ++ goto err; ++ } ++ } ++ ++ if (num_flow_actions) ++ fill_attr_in_objs_arr(cmd, ++ XSC_IB_ATTR_CREATE_FLOW_ARR_FLOW_ACTIONS, ++ flow_actions, ++ num_flow_actions); ++ ret = execute_ioctl(flow_matcher->context, cmd); ++ if (ret) ++ goto err; ++ ++ mflow->flow_id.handle = read_attr_obj(XSC_IB_ATTR_CREATE_FLOW_HANDLE, handle); ++ mflow->flow_id.context = flow_matcher->context; ++ return &mflow->flow_id; ++err: ++ free(mflow); ++ return NULL; ++} ++ ++struct xscdv_devx_umem * ++xscdv_devx_umem_reg(struct ibv_context *context, void *addr, size_t size, uint32_t access) ++{ ++ DECLARE_COMMAND_BUFFER(cmd, ++ XSC_IB_OBJECT_DEVX_UMEM, ++ XSC_IB_METHOD_DEVX_UMEM_REG, ++ 5); ++ struct ib_uverbs_attr *handle; ++ struct xsc_devx_umem *umem; ++ int ret; ++ ++ umem = calloc(1, sizeof(*umem)); ++ if (!umem) { ++ errno = ENOMEM; ++ return NULL; ++ } ++ ++ if (ibv_dontfork_range(addr, size)) ++ goto err; ++ ++ fill_attr_in_uint64(cmd, XSC_IB_ATTR_DEVX_UMEM_REG_ADDR, (intptr_t)addr); ++ fill_attr_in_uint64(cmd, XSC_IB_ATTR_DEVX_UMEM_REG_LEN, size); ++ fill_attr_in_uint32(cmd, XSC_IB_ATTR_DEVX_UMEM_REG_ACCESS, access); ++ fill_attr_out(cmd, XSC_IB_ATTR_DEVX_UMEM_REG_OUT_ID, ++ &umem->dv_devx_umem.umem_id, ++ sizeof(umem->dv_devx_umem.umem_id)); ++ handle = fill_attr_out_obj(cmd, XSC_IB_ATTR_DEVX_UMEM_REG_HANDLE); ++ ++ ret = execute_ioctl(context, cmd); ++ if (ret) ++ goto err_umem_reg_cmd; ++ ++ umem->handle = read_attr_obj(XSC_IB_ATTR_DEVX_UMEM_REG_HANDLE, handle); ++ umem->context = context; ++ umem->addr = addr; ++ umem->size = size; ++ ++ return &umem->dv_devx_umem; ++ ++err_umem_reg_cmd: ++ ibv_dofork_range(addr, size); ++err: ++ free(umem); ++ return NULL; ++} ++ ++int xscdv_devx_umem_dereg(struct xscdv_devx_umem *dv_devx_umem) ++{ ++ DECLARE_COMMAND_BUFFER(cmd, ++ XSC_IB_OBJECT_DEVX_UMEM, ++ XSC_IB_METHOD_DEVX_UMEM_DEREG, ++ 1); ++ int ret; ++ struct xsc_devx_umem *umem = container_of(dv_devx_umem, struct xsc_devx_umem, ++ dv_devx_umem); ++ ++ fill_attr_in_obj(cmd, XSC_IB_ATTR_DEVX_UMEM_DEREG_HANDLE, umem->handle); ++ ret = execute_ioctl(umem->context, cmd); ++ if (ret) ++ return ret; ++ ++ ibv_dofork_range(umem->addr, umem->size); ++ free(umem); ++ return 0; ++} ++ ++struct xscdv_devx_obj * ++xscdv_devx_obj_create(struct ibv_context *context, const void *in, size_t inlen, ++ void *out, size_t outlen) ++{ ++ DECLARE_COMMAND_BUFFER(cmd, ++ XSC_IB_OBJECT_DEVX_OBJ, ++ XSC_IB_METHOD_DEVX_OBJ_CREATE, ++ 3); ++ struct ib_uverbs_attr *handle; ++ struct xscdv_devx_obj *obj; ++ int ret; ++ ++ obj = calloc(1, sizeof(*obj)); ++ if (!obj) { ++ errno = ENOMEM; ++ return NULL; ++ } ++ ++ handle = fill_attr_out_obj(cmd, XSC_IB_ATTR_DEVX_OBJ_CREATE_HANDLE); ++ fill_attr_in(cmd, XSC_IB_ATTR_DEVX_OBJ_CREATE_CMD_IN, in, inlen); ++ fill_attr_out(cmd, XSC_IB_ATTR_DEVX_OBJ_CREATE_CMD_OUT, out, outlen); ++ ++ ret = execute_ioctl(context, cmd); ++ if (ret) ++ goto err; ++ ++ obj->handle = read_attr_obj(XSC_IB_ATTR_DEVX_OBJ_CREATE_HANDLE, handle); ++ obj->context = context; ++ return obj; ++err: ++ free(obj); ++ return NULL; ++} ++ ++int xscdv_devx_obj_query(struct xscdv_devx_obj *obj, const void *in, size_t inlen, ++ void *out, size_t outlen) ++{ ++ DECLARE_COMMAND_BUFFER(cmd, ++ XSC_IB_OBJECT_DEVX_OBJ, ++ XSC_IB_METHOD_DEVX_OBJ_QUERY, ++ 3); ++ ++ fill_attr_in_obj(cmd, XSC_IB_ATTR_DEVX_OBJ_QUERY_HANDLE, obj->handle); ++ fill_attr_in(cmd, XSC_IB_ATTR_DEVX_OBJ_QUERY_CMD_IN, in, inlen); ++ fill_attr_out(cmd, XSC_IB_ATTR_DEVX_OBJ_QUERY_CMD_OUT, out, outlen); ++ ++ return execute_ioctl(obj->context, cmd); ++} ++ ++int xscdv_devx_obj_modify(struct xscdv_devx_obj *obj, const void *in, size_t inlen, ++ void *out, size_t outlen) ++{ ++ DECLARE_COMMAND_BUFFER(cmd, ++ XSC_IB_OBJECT_DEVX_OBJ, ++ XSC_IB_METHOD_DEVX_OBJ_MODIFY, ++ 3); ++ ++ fill_attr_in_obj(cmd, XSC_IB_ATTR_DEVX_OBJ_MODIFY_HANDLE, obj->handle); ++ fill_attr_in(cmd, XSC_IB_ATTR_DEVX_OBJ_MODIFY_CMD_IN, in, inlen); ++ fill_attr_out(cmd, XSC_IB_ATTR_DEVX_OBJ_MODIFY_CMD_OUT, out, outlen); ++ ++ return execute_ioctl(obj->context, cmd); ++} ++ ++int xscdv_devx_obj_destroy(struct xscdv_devx_obj *obj) ++{ ++ DECLARE_COMMAND_BUFFER(cmd, ++ XSC_IB_OBJECT_DEVX_OBJ, ++ XSC_IB_METHOD_DEVX_OBJ_DESTROY, ++ 1); ++ int ret; ++ ++ fill_attr_in_obj(cmd, XSC_IB_ATTR_DEVX_OBJ_DESTROY_HANDLE, obj->handle); ++ ret = execute_ioctl(obj->context, cmd); ++ ++ if (ret) ++ return ret; ++ free(obj); ++ return 0; ++} ++ ++int xscdv_devx_general_cmd(struct ibv_context *context, const void *in, size_t inlen, ++ void *out, size_t outlen) ++{ ++ DECLARE_COMMAND_BUFFER(cmd, ++ XSC_IB_OBJECT_DEVX, ++ XSC_IB_METHOD_DEVX_OTHER, ++ 2); ++ ++ fill_attr_in(cmd, XSC_IB_ATTR_DEVX_OTHER_CMD_IN, in, inlen); ++ fill_attr_out(cmd, XSC_IB_ATTR_DEVX_OTHER_CMD_OUT, out, outlen); ++ ++ return execute_ioctl(context, cmd); ++} ++ ++int xscdv_devx_query_eqn(struct ibv_context *context, uint32_t vector, ++ uint32_t *eqn) ++{ ++ DECLARE_COMMAND_BUFFER(cmd, ++ XSC_IB_OBJECT_DEVX, ++ XSC_IB_METHOD_DEVX_QUERY_EQN, ++ 2); ++ ++ fill_attr_in_uint32(cmd, XSC_IB_ATTR_DEVX_QUERY_EQN_USER_VEC, vector); ++ fill_attr_out_ptr(cmd, XSC_IB_ATTR_DEVX_QUERY_EQN_DEV_EQN, eqn); ++ ++ return execute_ioctl(context, cmd); ++} +diff --git a/providers/xscale/wqe.h b/providers/xscale/wqe.h +new file mode 100644 +index 0000000..4b7f327 +--- /dev/null ++++ b/providers/xscale/wqe.h +@@ -0,0 +1,72 @@ ++/* ++ * Copyright (c) 2021 - 2022, Shanghai Yunsilicon Technology Co., Ltd. ++ * All rights reserved. ++ */ ++ ++#ifndef WQE_H ++#define WQE_H ++ ++#include ++ ++struct xsc_wqe_eth_pad { ++ uint8_t rsvd0[16]; ++}; ++ ++struct xsc_wqe_xrc_seg { ++ __be32 xrc_srqn; ++ uint8_t rsvd[12]; ++}; ++ ++enum { ++ XSC_IPOIB_INLINE_MIN_HEADER_SIZE = 4, ++ XSC_SOURCE_QPN_INLINE_MAX_HEADER_SIZE = 18, ++ XSC_ETH_L2_INLINE_HEADER_SIZE = 18, ++ XSC_ETH_L2_MIN_HEADER_SIZE = 14, ++}; ++ ++struct xsc_wqe_umr_ctrl_seg { ++ uint8_t flags; ++ uint8_t rsvd0[3]; ++ __be16 klm_octowords; ++ __be16 translation_offset; ++ __be64 mkey_mask; ++ uint8_t rsvd1[32]; ++}; ++ ++struct xsc_wqe_umr_klm_seg { ++ /* up to 2GB */ ++ __be32 byte_count; ++ __be32 mkey; ++ __be64 address; ++}; ++ ++union xsc_wqe_umr_inline_seg { ++ struct xsc_wqe_umr_klm_seg klm; ++}; ++ ++struct xsc_wqe_mkey_context_seg { ++ uint8_t free; ++ uint8_t reserved1; ++ uint8_t access_flags; ++ uint8_t sf; ++ __be32 qpn_mkey; ++ __be32 reserved2; ++ __be32 flags_pd; ++ __be64 start_addr; ++ __be64 len; ++ __be32 bsf_octword_size; ++ __be32 reserved3[4]; ++ __be32 translations_octword_size; ++ uint8_t reserved4[3]; ++ uint8_t log_page_size; ++ __be32 reserved; ++ union xsc_wqe_umr_inline_seg inseg[0]; ++}; ++ ++struct xsc_rwqe_sig { ++ uint8_t rsvd0[4]; ++ uint8_t signature; ++ uint8_t rsvd1[11]; ++}; ++ ++#endif /* WQE_H */ +diff --git a/providers/xscale/xsc-abi.h b/providers/xscale/xsc-abi.h +new file mode 100644 +index 0000000..7eab95c +--- /dev/null ++++ b/providers/xscale/xsc-abi.h +@@ -0,0 +1,56 @@ ++/* ++ * Copyright (c) 2021 - 2022, Shanghai Yunsilicon Technology Co., Ltd. ++ * All rights reserved. ++ */ ++ ++#ifndef XSC_ABI_H ++#define XSC_ABI_H ++ ++#include ++#include ++#include ++#include ++#include "xscdv.h" ++ ++#define XSC_UVERBS_MIN_ABI_VERSION 1 ++#define XSC_UVERBS_MAX_ABI_VERSION 1 ++ ++DECLARE_DRV_CMD(xsc_alloc_ucontext, IB_USER_VERBS_CMD_GET_CONTEXT, ++ xsc_ib_alloc_ucontext_req, xsc_ib_alloc_ucontext_resp); ++DECLARE_DRV_CMD(xsc_create_ah, IB_USER_VERBS_CMD_CREATE_AH, ++ empty, xsc_ib_create_ah_resp); ++DECLARE_DRV_CMD(xsc_alloc_pd, IB_USER_VERBS_CMD_ALLOC_PD, ++ empty, xsc_ib_alloc_pd_resp); ++DECLARE_DRV_CMD(xsc_create_cq, IB_USER_VERBS_CMD_CREATE_CQ, ++ xsc_ib_create_cq, xsc_ib_create_cq_resp); ++DECLARE_DRV_CMD(xsc_create_cq_ex, IB_USER_VERBS_EX_CMD_CREATE_CQ, ++ xsc_ib_create_cq, xsc_ib_create_cq_resp); ++DECLARE_DRV_CMD(xsc_create_qp_ex, IB_USER_VERBS_EX_CMD_CREATE_QP, ++ xsc_ib_create_qp, xsc_ib_create_qp_resp); ++DECLARE_DRV_CMD(xsc_create_qp_ex_rss, IB_USER_VERBS_EX_CMD_CREATE_QP, ++ xsc_ib_create_qp_rss, xsc_ib_create_qp_resp); ++DECLARE_DRV_CMD(xsc_create_qp, IB_USER_VERBS_CMD_CREATE_QP, ++ xsc_ib_create_qp, xsc_ib_create_qp_resp); ++DECLARE_DRV_CMD(xsc_create_wq, IB_USER_VERBS_EX_CMD_CREATE_WQ, ++ xsc_ib_create_wq, xsc_ib_create_wq_resp); ++DECLARE_DRV_CMD(xsc_modify_wq, IB_USER_VERBS_EX_CMD_MODIFY_WQ, ++ xsc_ib_modify_wq, empty); ++DECLARE_DRV_CMD(xsc_create_rwq_ind_table, IB_USER_VERBS_EX_CMD_CREATE_RWQ_IND_TBL, ++ empty, empty); ++DECLARE_DRV_CMD(xsc_destroy_rwq_ind_table, IB_USER_VERBS_EX_CMD_DESTROY_RWQ_IND_TBL, ++ empty, empty); ++DECLARE_DRV_CMD(xsc_resize_cq, IB_USER_VERBS_CMD_RESIZE_CQ, ++ xsc_ib_resize_cq, empty); ++DECLARE_DRV_CMD(xsc_query_device_ex, IB_USER_VERBS_EX_CMD_QUERY_DEVICE, ++ empty, xsc_ib_query_device_resp); ++DECLARE_DRV_CMD(xsc_modify_qp_ex, IB_USER_VERBS_EX_CMD_MODIFY_QP, ++ empty, xsc_ib_modify_qp_resp); ++ ++struct xsc_modify_qp { ++ struct ibv_modify_qp_ex ibv_cmd; ++ __u32 comp_mask; ++ struct xsc_ib_burst_info burst_info; ++ __u32 reserved; ++}; ++ ++#endif /* XSC_ABI_H */ +diff --git a/providers/xscale/xsc_api.h b/providers/xscale/xsc_api.h +new file mode 100644 +index 0000000..c533019 +--- /dev/null ++++ b/providers/xscale/xsc_api.h +@@ -0,0 +1,29 @@ ++/* ++ * Copyright (c) 2021 - 2022, Shanghai Yunsilicon Technology Co., Ltd. ++ * All rights reserved. ++ */ ++ ++#ifndef XSC_API_H ++#define XSC_API_H ++ ++#include ++ ++#define xscdv_flow_action_flags xsc_ib_uapi_flow_action_flags ++#define XSCDV_FLOW_ACTION_FLAGS_REQUIRE_METADATA XSC_IB_UAPI_FLOW_ACTION_FLAGS_REQUIRE_METADATA ++#define xscdv_flow_table_type xsc_ib_uapi_flow_table_type ++#define XSCDV_FLOW_TABLE_TYPE_NIC_RX XSC_IB_UAPI_FLOW_TABLE_TYPE_NIC_RX ++#define XSCDV_FLOW_TABLE_TYPE_NIC_TX XSC_IB_UAPI_FLOW_TABLE_TYPE_NIC_TX ++#define xscdv_flow_action_packet_reformat_type xsc_ib_uapi_flow_action_packet_reformat_type ++#define XSCDV_FLOW_ACTION_PACKET_REFORMAT_TYPE_L2_TUNNEL_TO_L2 XSC_IB_UAPI_FLOW_ACTION_PACKET_REFORMAT_TYPE_L2_TUNNEL_TO_L2 ++#define XSCDV_FLOW_ACTION_PACKET_REFORMAT_TYPE_L2_TO_L2_TUNNEL XSC_IB_UAPI_FLOW_ACTION_PACKET_REFORMAT_TYPE_L2_TO_L2_TUNNEL ++#define XSCDV_FLOW_ACTION_PACKET_REFORMAT_TYPE_L3_TUNNEL_TO_L2 XSC_IB_UAPI_FLOW_ACTION_PACKET_REFORMAT_TYPE_L3_TUNNEL_TO_L2 ++#define XSCDV_FLOW_ACTION_PACKET_REFORMAT_TYPE_L2_TO_L3_TUNNEL XSC_IB_UAPI_FLOW_ACTION_PACKET_REFORMAT_TYPE_L2_TO_L3_TUNNEL ++ ++enum xsc_qp_create_flags { ++ XSC_QP_CREATE_RAWPACKE_TSO = 1 << 0, ++ XSC_QP_CREATE_RAWPACKET_TSO = 1 << 0, ++ XSC_QP_CREATE_RAWPACKET_TX = 1 << 1, ++}; ++ ++ ++#endif +diff --git a/providers/xscale/xsc_hsi.h b/providers/xscale/xsc_hsi.h +new file mode 100644 +index 0000000..53fe552 +--- /dev/null ++++ b/providers/xscale/xsc_hsi.h +@@ -0,0 +1,252 @@ ++/* ++ * Copyright (c) 2021 - 2022, Shanghai Yunsilicon Technology Co., Ltd. ++ * All rights reserved. ++ */ ++ ++#ifndef __XSC_HSI_H__ ++#define __XSC_HSI_H__ ++ ++#include ++#include ++#include "sqm_csr_defines.h" ++#include "rqm_csr_defines.h" ++#include "cqm_csr_defines.h" ++ ++#define upper_32_bits(n) ((uint32_t)(((n) >> 16) >> 16)) ++#define lower_32_bits(n) ((uint32_t)(n)) ++ ++#define DMA_LO_LE(x) __cpu_to_le32(lower_32_bits(x)) ++#define DMA_HI_LE(x) __cpu_to_le32(upper_32_bits(x)) ++#define DMA_REGPAIR_LE(x, val) do { \ ++ (x).hi = DMA_HI_LE((val)); \ ++ (x).lo = DMA_LO_LE((val)); \ ++ } while (0) ++ ++#define WR_LE_16(x, val) x = __cpu_to_le16(val) ++#define WR_LE_32(x, val) x = __cpu_to_le32(val) ++#define WR_LE_64(x, val) x = __cpu_to_le64(val) ++#define WR_LE_R64(x, val) DMA_REGPAIR_LE(x, val) ++#define WR_BE_32(x, val) x = __cpu_to_be32(val) ++ ++#define RD_LE_16(x) __le16_to_cpu(x) ++#define RD_LE_32(x) __le32_to_cpu(x) ++#define RD_BE_32(x) __be32_to_cpu(x) ++ ++#define WR_REG(addr, val) mmio_write32_le(addr, val) ++#define RD_REG(addr) mmio_read32_le(addr) ++ ++/* message opcode */ ++enum { ++ XSC_MSG_OPCODE_SEND = 0, ++ XSC_MSG_OPCODE_RDMA_WRITE = 1, ++ XSC_MSG_OPCODE_RDMA_READ = 2, ++ XSC_MSG_OPCODE_MAD = 3, ++ XSC_MSG_OPCODE_RDMA_ACK = 4, ++ XSC_MSG_OPCODE_RDMA_ACK_READ = 5, ++ XSC_MSG_OPCODE_RDMA_CNP = 6, ++ XSC_MSG_OPCODE_RAW = 7, ++ XSC_MSG_OPCODE_VIRTIO_NET = 8, ++ XSC_MSG_OPCODE_VIRTIO_BLK = 9, ++ XSC_MSG_OPCODE_RAW_TPE = 10, ++ XSC_MSG_OPCODE_INIT_QP_REQ = 11, ++ XSC_MSG_OPCODE_INIT_QP_RSP = 12, ++ XSC_MSG_OPCODE_INIT_PATH_REQ = 13, ++ XSC_MSG_OPCODE_INIT_PATH_RSP = 14, ++}; ++ ++enum { ++ XSC_REQ = 0, ++ XSC_RSP = 1, ++}; ++ ++enum { ++ XSC_WITHOUT_IMMDT = 0, ++ XSC_WITH_IMMDT = 1, ++}; ++ ++enum { ++ XSC_ERR_CODE_NAK_RETRY = 0x40, ++ XSC_ERR_CODE_NAK_OPCODE = 0x41, ++ XSC_ERR_CODE_NAK_MR = 0x42, ++ XSC_ERR_CODE_NAK_OPERATION = 0x43, ++ XSC_ERR_CODE_NAK_RNR = 0x44, ++ XSC_ERR_CODE_LOCAL_MR = 0x45, ++ XSC_ERR_CODE_LOCAL_LEN = 0x46, ++ XSC_ERR_CODE_LOCAL_OPCODE = 0x47, ++ XSC_ERR_CODE_CQ_OVER_FLOW = 0x48, ++ XSC_ERR_CODE_STRG_ACC_GEN_CQE = 0x4c, ++ XSC_ERR_CODE_CQE_ACC = 0x4d, ++ XSC_ERR_CODE_FLUSH = 0x4e, ++ XSC_ERR_CODE_MALF_WQE_HOST = 0x50, ++ XSC_ERR_CODE_MALF_WQE_INFO = 0x51, ++ XSC_ERR_CODE_MR_NON_NAK = 0x52, ++ XSC_ERR_CODE_OPCODE_GEN_CQE = 0x61, ++ XSC_ERR_CODE_MANY_READ = 0x62, ++ XSC_ERR_CODE_LEN_GEN_CQE = 0x63, ++ XSC_ERR_CODE_MR = 0x65, ++ XSC_ERR_CODE_MR_GEN_CQE = 0x66, ++ XSC_ERR_CODE_OPERATION = 0x67, ++ XSC_ERR_CODE_MALF_WQE_INFO_GEN_NAK = 0x68, ++}; ++ ++/* TODO: sw cqe opcode*/ ++enum { ++ XSC_OPCODE_RDMA_REQ_SEND = 0, ++ XSC_OPCODE_RDMA_REQ_SEND_IMMDT = 1, ++ XSC_OPCODE_RDMA_RSP_RECV = 2, ++ XSC_OPCODE_RDMA_RSP_RECV_IMMDT = 3, ++ XSC_OPCODE_RDMA_REQ_WRITE = 4, ++ XSC_OPCODE_RDMA_REQ_WRITE_IMMDT = 5, ++ XSC_OPCODE_RDMA_RSP_WRITE_IMMDT = 6, ++ XSC_OPCODE_RDMA_REQ_READ = 7, ++ XSC_OPCODE_RDMA_REQ_ERROR = 8, ++ XSC_OPCODE_RDMA_RSP_ERROR = 9, ++ XSC_OPCODE_RDMA_CQE_ERROR = 10, ++}; ++ ++enum { ++ XSC_BASE_WQE_SHIFT = 4, ++}; ++ ++/* ++ * Descriptors that are allocated by SW and accessed by HW, 32-byte aligned ++ */ ++/* this is to keep descriptor structures packed */ ++struct regpair { ++ __le32 lo; ++ __le32 hi; ++}; ++ ++struct xsc_send_wqe_ctrl_seg { ++ uint8_t msg_opcode; ++ uint8_t with_immdt:1; ++ uint8_t :2; ++ uint8_t ds_data_num:5; ++ __le16 wqe_id; ++ __le32 msg_len; ++ __le32 opcode_data; ++ uint8_t se:1; ++ uint8_t ce:1; ++ uint8_t in_line:1; ++ __le32 :29; ++}; ++ ++ ++struct xsc_wqe_data_seg { ++ union { ++ struct { ++ uint8_t :1; ++ __le32 seg_len:31; ++ __le32 mkey; ++ __le64 va; ++ }; ++ struct { ++ uint8_t in_line_data[16]; ++ }; ++ }; ++}; ++ ++struct xsc_cqe { ++ union { ++ uint8_t msg_opcode; ++ struct { ++ uint8_t error_code:7; ++ uint8_t is_error:1; ++ }; ++ }; ++ __le32 qp_id:15; ++ uint8_t :1; ++ uint8_t se:1; ++ uint8_t has_pph:1; ++ uint8_t type:1; ++ uint8_t with_immdt:1; ++ uint8_t csum_err:4; ++ __le32 imm_data; ++ __le32 msg_len; ++ __le32 vni; ++ __le64 ts:48; ++ __le16 wqe_id; ++ __le16 rsv[3]; ++ __le16 rsv1:15; ++ uint8_t owner:1; ++}; ++ ++/* Size of CQE */ ++#define XSC_CQE_SIZE sizeof(struct xsc_cqe) ++ ++union xsc_db_data { ++ struct { ++ __le32 sq_next_pid:16; ++ __le32 sqn:15; ++ __le32 :1; ++ }; ++ struct { ++ __le32 rq_next_pid:13; ++ __le32 rqn:15; ++ __le32 :4; ++ }; ++ struct { ++ __le32 cq_next_cid:16; ++ __le32 cqn:15; ++ __le32 solicited:1; ++ }; ++ __le32 raw_data; ++}; ++ ++#define CQM_DB_NEXT_CID_OFFSET(n) (4 * (n)) ++ ++#define XSC_SEND_WQE_RING_DEPTH_MIN 16 ++#define XSC_CQE_RING_DEPTH_MIN 2 ++#define XSC_SEND_WQE_RING_DEPTH_MAX 1024 ++#define XSC_RECV_WQE_RING_DEPTH_MAX 1024 ++#define XSC_CQE_RING_DEPTH_MAX (1024 * 32) ++ ++/* ++ * Registers that are allocated by HW and accessed by SW in 4-byte granularity ++ */ ++/* MMT table (32 bytes) */ ++struct xsc_mmt_tbl { ++ struct regpair pa; ++ struct regpair va; ++ __le32 size; ++#define XSC_MMT_TBL_PD_MASK 0x00FFFFFF ++#define XSC_MMT_TBL_KEY_MASK 0xFF000000 ++ __le32 key_pd; ++#define XSC_MMT_TBL_ACC_MASK 0x0000000F ++ __le32 acc; ++ uint8_t padding[4]; ++}; ++ ++/* QP Context (16 bytes) */ ++struct xsc_qp_context { ++#define XSC_QP_CONTEXT_STATE_MASK 0x00000007 ++#define XSC_QP_CONTEXT_FUNC_MASK 0x00000018 ++#define XSC_QP_CONTEXT_DSTID_MASK 0x000000E0 ++#define XSC_QP_CONTEXT_PD_MASK 0xFFFFFF00 ++ __le32 pd_dstid_func_state; ++#define XSC_QP_CONTEXT_DSTQP_MASK 0x00FFFFFF ++#define XSC_QP_CONTEXT_RCQIDL_MASK 0xFF000000 ++ __le32 rcqidl_dstqp; ++#define XSC_QP_CONTEXT_RCQIDH_MASK 0x0000FFFF ++#define XSC_QP_CONTEXT_SCQIDL_MASK 0xFFFF0000 ++ __le32 scqidl_rcqidh; ++#define XSC_QP_CONTEXT_SCQIDH_MASK 0x000000FF ++ __le32 scqidh; ++}; ++ ++/* TODO: EPP Table and others */ ++ ++static inline bool xsc_get_cqe_sw_own(struct xsc_cqe *cqe, int cid, int ring_sz) ALWAYS_INLINE; ++ ++static inline void xsc_set_cqe_sw_own(struct xsc_cqe *cqe, int pid, int ring_sz) ALWAYS_INLINE; ++ ++static inline bool xsc_get_cqe_sw_own(struct xsc_cqe *cqe, int cid, int ring_sz) ++{ ++ return cqe->owner == ((cid >> ring_sz) & 1); ++} ++ ++static inline void xsc_set_cqe_sw_own(struct xsc_cqe *cqe, int pid, int ring_sz) ++{ ++ cqe->owner = ((pid >> ring_sz) & 1); ++} ++#endif /* __XSC_HSI_H__ */ +diff --git a/providers/xscale/xscale.c b/providers/xscale/xscale.c +new file mode 100644 +index 0000000..e6792b9 +--- /dev/null ++++ b/providers/xscale/xscale.c +@@ -0,0 +1,948 @@ ++/* ++ * Copyright (c) 2021 - 2022, Shanghai Yunsilicon Technology Co., Ltd. ++ * All rights reserved. ++ */ ++ ++#define _GNU_SOURCE ++#include ++ ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++ ++#include ++ ++#include "xscale.h" ++#include "xsc-abi.h" ++#include "wqe.h" ++#include "xsc_hsi.h" ++ ++#ifndef PCI_VENDOR_ID_MELLANOX ++#define PCI_VENDOR_ID_MELLANOX 0x15b3 ++#endif ++ ++#ifndef CPU_OR ++#define CPU_OR(x, y, z) do {} while (0) ++#endif ++ ++#ifndef CPU_EQUAL ++#define CPU_EQUAL(x, y) 1 ++#endif ++ ++#define HCA(v, d) VERBS_PCI_MATCH(PCI_VENDOR_ID_##v, d, NULL) ++static const struct verbs_match_ent hca_table[] = { ++ VERBS_MODALIAS_MATCH("*xscale*", NULL), ++ {} ++}; ++ ++uint32_t xsc_debug_mask = 0; ++int xsc_freeze_on_error_cqe; ++static void xsc_free_context(struct ibv_context *ibctx); ++ ++static const struct verbs_context_ops xsc_ctx_common_ops = { ++ .query_port = xsc_query_port, ++ .alloc_pd = xsc_alloc_pd, ++ .dealloc_pd = xsc_free_pd, ++ .reg_mr = xsc_reg_mr, ++ .rereg_mr = xsc_rereg_mr, ++ .dereg_mr = xsc_dereg_mr, ++ .alloc_mw = NULL, ++ .dealloc_mw = NULL, ++ .bind_mw = NULL, ++ .create_cq = xsc_create_cq, ++ .poll_cq = xsc_poll_cq, ++ .req_notify_cq = xsc_arm_cq, ++ .cq_event = xsc_cq_event, ++ .resize_cq = xsc_resize_cq, ++ .destroy_cq = xsc_destroy_cq, ++ .create_srq = NULL, ++ .modify_srq = NULL, ++ .query_srq = NULL, ++ .destroy_srq = NULL, ++ .post_srq_recv = NULL, ++ .create_qp = xsc_create_qp, ++ .query_qp = xsc_query_qp, ++ .modify_qp = xsc_modify_qp, ++ .destroy_qp = xsc_destroy_qp, ++ .post_send = xsc_post_send, ++ .post_recv = xsc_post_recv, ++ .create_ah = xsc_create_ah, ++ .destroy_ah = xsc_destroy_ah, ++ .attach_mcast = xsc_attach_mcast, ++ .detach_mcast = xsc_detach_mcast, ++ ++ .alloc_dm = xsc_alloc_dm, ++ .alloc_parent_domain = xsc_alloc_parent_domain, ++ .alloc_td = NULL, ++ .attach_counters_point_flow = xsc_attach_counters_point_flow, ++ .close_xrcd = xsc_close_xrcd, ++ .create_counters = xsc_create_counters, ++ .create_cq_ex = xsc_create_cq_ex, ++ .create_flow = xsc_create_flow, ++ .create_flow_action_esp = xsc_create_flow_action_esp, ++ .create_qp_ex = xsc_create_qp_ex, ++ .create_rwq_ind_table = xsc_create_rwq_ind_table, ++ .create_srq_ex = NULL, ++ .create_wq = xsc_create_wq, ++ .dealloc_td = NULL, ++ .destroy_counters = xsc_destroy_counters, ++ .destroy_flow = xsc_destroy_flow, ++ .destroy_flow_action = xsc_destroy_flow_action, ++ .destroy_rwq_ind_table = xsc_destroy_rwq_ind_table, ++ .destroy_wq = xsc_destroy_wq, ++ .free_dm = xsc_free_dm, ++ .get_srq_num = NULL, ++ .modify_cq = xsc_modify_cq, ++ .modify_flow_action_esp = xsc_modify_flow_action_esp, ++ .modify_qp_rate_limit = xsc_modify_qp_rate_limit, ++ .modify_wq = xsc_modify_wq, ++ .open_xrcd = xsc_open_xrcd, ++ .post_srq_ops = NULL, ++ .query_device_ex = xsc_query_device_ex, ++ .query_rt_values = xsc_query_rt_values, ++ .read_counters = xsc_read_counters, ++ .reg_dm_mr = xsc_reg_dm_mr, ++ .alloc_null_mr = xsc_alloc_null_mr, ++ .free_context = xsc_free_context, ++}; ++ ++static int read_number_from_line(const char *line, int *value) ++{ ++ const char *ptr; ++ ++ ptr = strchr(line, ':'); ++ if (!ptr) ++ return 1; ++ ++ ++ptr; ++ ++ *value = atoi(ptr); ++ return 0; ++} ++/** ++ * The function looks for the first free user-index in all the ++ * user-index tables. If all are used, returns -1, otherwise ++ * a valid user-index. ++ * In case the reference count of the table is zero, it means the ++ * table is not in use and wasn't allocated yet, therefore the ++ * xsc_store_uidx allocates the table, and increment the reference ++ * count on the table. ++ */ ++static int32_t get_free_uidx(struct xsc_context *ctx) ++{ ++ int32_t tind; ++ int32_t i; ++ ++ for (tind = 0; tind < XSC_UIDX_TABLE_SIZE; tind++) { ++ if (ctx->uidx_table[tind].refcnt < XSC_UIDX_TABLE_MASK) ++ break; ++ } ++ ++ if (tind == XSC_UIDX_TABLE_SIZE) ++ return -1; ++ ++ if (!ctx->uidx_table[tind].refcnt) ++ return tind << XSC_UIDX_TABLE_SHIFT; ++ ++ for (i = 0; i < XSC_UIDX_TABLE_MASK + 1; i++) { ++ if (!ctx->uidx_table[tind].table[i]) ++ break; ++ } ++ ++ return (tind << XSC_UIDX_TABLE_SHIFT) | i; ++} ++ ++int32_t xsc_store_uidx(struct xsc_context *ctx, void *rsc) ++{ ++ int32_t tind; ++ int32_t ret = -1; ++ int32_t uidx; ++ ++ pthread_mutex_lock(&ctx->uidx_table_mutex); ++ uidx = get_free_uidx(ctx); ++ if (uidx < 0) ++ goto out; ++ ++ tind = uidx >> XSC_UIDX_TABLE_SHIFT; ++ ++ if (!ctx->uidx_table[tind].refcnt) { ++ ctx->uidx_table[tind].table = calloc(XSC_UIDX_TABLE_MASK + 1, ++ sizeof(struct xsc_resource *)); ++ if (!ctx->uidx_table[tind].table) ++ goto out; ++ } ++ ++ ++ctx->uidx_table[tind].refcnt; ++ ctx->uidx_table[tind].table[uidx & XSC_UIDX_TABLE_MASK] = rsc; ++ ret = uidx; ++ ++out: ++ pthread_mutex_unlock(&ctx->uidx_table_mutex); ++ return ret; ++} ++ ++void xsc_clear_uidx(struct xsc_context *ctx, uint32_t uidx) ++{ ++ int tind = uidx >> XSC_UIDX_TABLE_SHIFT; ++ ++ pthread_mutex_lock(&ctx->uidx_table_mutex); ++ ++ if (!--ctx->uidx_table[tind].refcnt) ++ free(ctx->uidx_table[tind].table); ++ else ++ ctx->uidx_table[tind].table[uidx & XSC_UIDX_TABLE_MASK] = NULL; ++ ++ pthread_mutex_unlock(&ctx->uidx_table_mutex); ++} ++ ++static int xsc_is_sandy_bridge(int *num_cores) ++{ ++ char line[128]; ++ FILE *fd; ++ int rc = 0; ++ int cur_cpu_family = -1; ++ int cur_cpu_model = -1; ++ ++ fd = fopen("/proc/cpuinfo", "r"); ++ if (!fd) ++ return 0; ++ ++ *num_cores = 0; ++ ++ while (fgets(line, 128, fd)) { ++ int value; ++ ++ /* if this is information on new processor */ ++ if (!strncmp(line, "processor", 9)) { ++ ++*num_cores; ++ ++ cur_cpu_family = -1; ++ cur_cpu_model = -1; ++ } else if (!strncmp(line, "cpu family", 10)) { ++ if ((cur_cpu_family < 0) && (!read_number_from_line(line, &value))) ++ cur_cpu_family = value; ++ } else if (!strncmp(line, "model", 5)) { ++ if ((cur_cpu_model < 0) && (!read_number_from_line(line, &value))) ++ cur_cpu_model = value; ++ } ++ ++ /* if this is a Sandy Bridge CPU */ ++ if ((cur_cpu_family == 6) && ++ (cur_cpu_model == 0x2A || (cur_cpu_model == 0x2D) )) ++ rc = 1; ++ } ++ ++ fclose(fd); ++ return rc; ++} ++ ++/* ++man cpuset ++ ++ This format displays each 32-bit word in hexadecimal (using ASCII characters "0" - "9" and "a" - "f"); words ++ are filled with leading zeros, if required. For masks longer than one word, a comma separator is used between ++ words. Words are displayed in big-endian order, which has the most significant bit first. The hex digits ++ within a word are also in big-endian order. ++ ++ The number of 32-bit words displayed is the minimum number needed to display all bits of the bitmask, based on ++ the size of the bitmask. ++ ++ Examples of the Mask Format: ++ ++ 00000001 # just bit 0 set ++ 40000000,00000000,00000000 # just bit 94 set ++ 000000ff,00000000 # bits 32-39 set ++ 00000000,000E3862 # 1,5,6,11-13,17-19 set ++ ++ A mask with bits 0, 1, 2, 4, 8, 16, 32, and 64 set displays as: ++ ++ 00000001,00000001,00010117 ++ ++ The first "1" is for bit 64, the second for bit 32, the third for bit 16, the fourth for bit 8, the fifth for ++ bit 4, and the "7" is for bits 2, 1, and 0. ++*/ ++static void xsc_local_cpu_set(struct ibv_device *ibdev, cpu_set_t *cpu_set) ++{ ++ char *p, buf[1024] = {}; ++ char *env_value; ++ uint32_t word; ++ int i, k; ++ ++ env_value = getenv("XSC_LOCAL_CPUS"); ++ if (env_value) ++ strncpy(buf, env_value, sizeof(buf) - 1); ++ else { ++ char fname[MAXPATHLEN]; ++ FILE *fp; ++ ++ snprintf(fname, MAXPATHLEN, "/sys/class/infiniband/%s/device/local_cpus", ++ ibv_get_device_name(ibdev)); ++ ++ fp = fopen(fname, "r"); ++ if (!fp) { ++ fprintf(stderr, PFX "Warning: can not get local cpu set: failed to open %s\n", fname); ++ return; ++ } ++ if (!fgets(buf, sizeof(buf), fp)) { ++ fprintf(stderr, PFX "Warning: can not get local cpu set: failed to read cpu mask\n"); ++ fclose(fp); ++ return; ++ } ++ fclose(fp); ++ } ++ ++ p = strrchr(buf, ','); ++ if (!p) ++ p = buf; ++ ++ i = 0; ++ do { ++ if (*p == ',') { ++ *p = 0; ++ p ++; ++ } ++ ++ word = strtoul(p, NULL, 16); ++ ++ for (k = 0; word; ++k, word >>= 1) ++ if (word & 1) ++ CPU_SET(k+i, cpu_set); ++ ++ if (p == buf) ++ break; ++ ++ p = strrchr(buf, ','); ++ if (!p) ++ p = buf; ++ ++ i += 32; ++ } while (i < CPU_SETSIZE); ++} ++ ++static int xsc_enable_sandy_bridge_fix(struct ibv_device *ibdev) ++{ ++ cpu_set_t my_cpus, dev_local_cpus, result_set; ++ int stall_enable; ++ int ret; ++ int num_cores; ++ ++ if (!xsc_is_sandy_bridge(&num_cores)) ++ return 0; ++ ++ /* by default enable stall on sandy bridge arch */ ++ stall_enable = 1; ++ ++ /* ++ * check if app is bound to cpu set that is inside ++ * of device local cpu set. Disable stalling if true ++ */ ++ ++ /* use static cpu set - up to CPU_SETSIZE (1024) cpus/node */ ++ CPU_ZERO(&my_cpus); ++ CPU_ZERO(&dev_local_cpus); ++ CPU_ZERO(&result_set); ++ ret = sched_getaffinity(0, sizeof(my_cpus), &my_cpus); ++ if (ret == -1) { ++ if (errno == EINVAL) ++ fprintf(stderr, PFX "Warning: my cpu set is too small\n"); ++ else ++ fprintf(stderr, PFX "Warning: failed to get my cpu set\n"); ++ goto out; ++ } ++ ++ /* get device local cpu set */ ++ xsc_local_cpu_set(ibdev, &dev_local_cpus); ++ ++ /* check if my cpu set is in dev cpu */ ++ CPU_OR(&result_set, &my_cpus, &dev_local_cpus); ++ stall_enable = CPU_EQUAL(&result_set, &dev_local_cpus) ? 0 : 1; ++ ++out: ++ return stall_enable; ++} ++ ++static void xsc_read_env(struct ibv_device *ibdev, struct xsc_context *ctx) ++{ ++ char *env_value; ++ ++ env_value = getenv("XSC_STALL_CQ_POLL"); ++ if (env_value) ++ /* check if cq stall is enforced by user */ ++ ctx->stall_enable = (strcmp(env_value, "0")) ? 1 : 0; ++ else ++ /* autodetect if we need to do cq polling */ ++ ctx->stall_enable = xsc_enable_sandy_bridge_fix(ibdev); ++ ++ env_value = getenv("XSC_STALL_NUM_LOOP"); ++ if (env_value) ++ xsc_stall_num_loop = atoi(env_value); ++ ++ env_value = getenv("XSC_STALL_CQ_POLL_MIN"); ++ if (env_value) ++ xsc_stall_cq_poll_min = atoi(env_value); ++ ++ env_value = getenv("XSC_STALL_CQ_POLL_MAX"); ++ if (env_value) ++ xsc_stall_cq_poll_max = atoi(env_value); ++ ++ env_value = getenv("XSC_STALL_CQ_INC_STEP"); ++ if (env_value) ++ xsc_stall_cq_inc_step = atoi(env_value); ++ ++ env_value = getenv("XSC_STALL_CQ_DEC_STEP"); ++ if (env_value) ++ xsc_stall_cq_dec_step = atoi(env_value); ++ ++ ctx->stall_adaptive_enable = 0; ++ ctx->stall_cycles = 0; ++ ++ if (xsc_stall_num_loop < 0) { ++ ctx->stall_adaptive_enable = 1; ++ ctx->stall_cycles = xsc_stall_cq_poll_min; ++ } ++ ++} ++ ++static void open_debug_file(struct xsc_context *ctx) ++{ ++ char *env; ++ ++ env = getenv("XSC_DEBUG_FILE"); ++ if (!env) { ++ ctx->dbg_fp = stderr; ++ return; ++ } ++ ++ ctx->dbg_fp = fopen(env, "aw+"); ++ if (!ctx->dbg_fp) { ++ fprintf(stderr, "Failed opening debug file %s, using stderr\n", env); ++ ctx->dbg_fp = stderr; ++ return; ++ } ++} ++ ++static void close_debug_file(struct xsc_context *ctx) ++{ ++ if (ctx->dbg_fp && ctx->dbg_fp != stderr) ++ fclose(ctx->dbg_fp); ++} ++ ++static void set_debug_mask(void) ++{ ++ char *env; ++ ++ env = getenv("XSC_DEBUG_MASK"); ++ if (env) ++ xsc_debug_mask = strtol(env, NULL, 0); ++} ++ ++static void set_freeze_on_error(void) ++{ ++ char *env; ++ ++ env = getenv("XSC_FREEZE_ON_ERROR_CQE"); ++ if (env) ++ xsc_freeze_on_error_cqe = strtol(env, NULL, 0); ++} ++ ++static int single_threaded_app(void) ++{ ++ ++ char *env; ++ ++ env = getenv("XSC_SINGLE_THREADED"); ++ if (env) ++ return strcmp(env, "1") ? 0 : 1; ++ ++ return 0; ++} ++ ++static int xsc_cmd_get_context(struct xsc_context *context, ++ struct xsc_alloc_ucontext *req, ++ size_t req_len, ++ struct xsc_alloc_ucontext_resp *resp, ++ size_t resp_len) ++{ ++ struct verbs_context *verbs_ctx = &context->ibv_ctx; ++ ++ return ibv_cmd_get_context(verbs_ctx, &req->ibv_cmd, ++ req_len, &resp->ibv_resp, resp_len); ++} ++ ++int xscdv_query_device(struct ibv_context *ctx_in, ++ struct xscdv_context *attrs_out) ++{ ++ struct xsc_context *xctx = to_xctx(ctx_in); ++ uint64_t comp_mask_out = 0; ++ ++ attrs_out->version = 0; ++ attrs_out->flags = 0; ++ ++ if (xctx->cqe_version == XSC_CQE_VERSION_V1) ++ attrs_out->flags |= XSCDV_CONTEXT_FLAGS_CQE_V1; ++ ++ if (xctx->vendor_cap_flags & XSC_VENDOR_CAP_FLAGS_MPW_ALLOWED) ++ attrs_out->flags |= XSCDV_CONTEXT_FLAGS_MPW_ALLOWED; ++ ++ if (xctx->vendor_cap_flags & XSC_VENDOR_CAP_FLAGS_CQE_128B_COMP) ++ attrs_out->flags |= XSCDV_CONTEXT_FLAGS_CQE_128B_COMP; ++ ++ if (xctx->vendor_cap_flags & XSC_VENDOR_CAP_FLAGS_CQE_128B_PAD) ++ attrs_out->flags |= XSCDV_CONTEXT_FLAGS_CQE_128B_PAD; ++ ++ if (attrs_out->comp_mask & XSCDV_CONTEXT_MASK_CQE_COMPRESION) { ++ attrs_out->cqe_comp_caps = xctx->cqe_comp_caps; ++ comp_mask_out |= XSCDV_CONTEXT_MASK_CQE_COMPRESION; ++ } ++ ++ if (xctx->vendor_cap_flags & XSC_VENDOR_CAP_FLAGS_ENHANCED_MPW) ++ attrs_out->flags |= XSCDV_CONTEXT_FLAGS_ENHANCED_MPW; ++ ++ if (attrs_out->comp_mask & XSCDV_CONTEXT_MASK_SWP) { ++ attrs_out->sw_parsing_caps = xctx->sw_parsing_caps; ++ comp_mask_out |= XSCDV_CONTEXT_MASK_SWP; ++ } ++ ++ if (attrs_out->comp_mask & XSCDV_CONTEXT_MASK_STRIDING_RQ) { ++ attrs_out->striding_rq_caps = xctx->striding_rq_caps; ++ comp_mask_out |= XSCDV_CONTEXT_MASK_STRIDING_RQ; ++ } ++ ++ if (attrs_out->comp_mask & XSCDV_CONTEXT_MASK_TUNNEL_OFFLOADS) { ++ attrs_out->tunnel_offloads_caps = xctx->tunnel_offloads_caps; ++ comp_mask_out |= XSCDV_CONTEXT_MASK_TUNNEL_OFFLOADS; ++ } ++ ++ if (attrs_out->comp_mask & XSCDV_CONTEXT_MASK_CLOCK_INFO_UPDATE) { ++ if (xctx->clock_info_page) { ++ attrs_out->max_clock_info_update_nsec = ++ xctx->clock_info_page->overflow_period; ++ comp_mask_out |= XSCDV_CONTEXT_MASK_CLOCK_INFO_UPDATE; ++ } ++ } ++ ++ if (attrs_out->comp_mask & XSCDV_CONTEXT_MASK_FLOW_ACTION_FLAGS) { ++ attrs_out->flow_action_flags = xctx->flow_action_flags; ++ comp_mask_out |= XSCDV_CONTEXT_MASK_FLOW_ACTION_FLAGS; ++ } ++ ++ attrs_out->comp_mask = comp_mask_out; ++ ++ return 0; ++} ++ ++static int xscdv_get_qp(struct ibv_qp *qp_in, ++ struct xscdv_qp *qp_out) ++{ ++ struct xsc_qp *xqp = to_xqp(qp_in); ++ uint64_t mask_out = 0; ++ ++ qp_out->dbrec = xqp->db; ++ qp_out->sq.db = xqp->sq.db; ++ qp_out->rq.db = xqp->rq.db; ++ ++ if (xqp->sq_buf_size) ++ /* IBV_QPT_RAW_PACKET */ ++ qp_out->sq.buf = (void *)((uintptr_t)xqp->sq_buf.buf); ++ else ++ qp_out->sq.buf = (void *)((uintptr_t)xqp->buf.buf + xqp->sq.offset); ++ qp_out->sq.wqe_cnt = xqp->sq.wqe_cnt; ++ qp_out->sq.stride = 1 << xqp->sq.wqe_shift; ++ ++ qp_out->rq.buf = (void *)((uintptr_t)xqp->buf.buf + xqp->rq.offset); ++ qp_out->rq.wqe_cnt = xqp->rq.wqe_cnt; ++ qp_out->rq.stride = 1 << xqp->rq.wqe_shift; ++ ++ if (qp_out->comp_mask & XSCDV_QP_MASK_RAW_QP_HANDLES) { ++ qp_out->tirn = xqp->tirn; ++ qp_out->tisn = xqp->tisn; ++ qp_out->rqn = xqp->rqn; ++ qp_out->sqn = xqp->sqn; ++ mask_out |= XSCDV_QP_MASK_RAW_QP_HANDLES; ++ } ++ ++ qp_out->comp_mask = mask_out; ++ ++ return 0; ++} ++ ++static int xscdv_get_cq(struct ibv_cq *cq_in, ++ struct xscdv_cq *cq_out) ++{ ++ struct xsc_cq *xcq = to_xcq(cq_in); ++ ++ cq_out->comp_mask = 0; ++ cq_out->cqn = xcq->cqn; ++ cq_out->cqe_cnt = xcq->verbs_cq.cq_ex.cqe; ++ cq_out->cqe_size = xcq->cqe_sz; ++ cq_out->buf = xcq->active_buf->buf; ++ cq_out->dbrec = xcq->dbrec; ++ cq_out->db = xcq->db; ++ xcq->flags |= XSC_CQ_FLAGS_DV_OWNED; ++ ++ return 0; ++} ++ ++static int xscdv_get_rwq(struct ibv_wq *wq_in, ++ struct xscdv_rwq *rwq_out) ++{ ++ struct xsc_rwq *xrwq = to_xrwq(wq_in); ++ ++ rwq_out->comp_mask = 0; ++ rwq_out->buf = xrwq->pbuff; ++ rwq_out->dbrec = xrwq->recv_db; ++ rwq_out->wqe_cnt = xrwq->rq.wqe_cnt; ++ rwq_out->stride = 1 << xrwq->rq.wqe_shift; ++ rwq_out->db = xrwq->rq.db; ++ ++ return 0; ++} ++ ++static int xscdv_get_dm(struct ibv_dm *dm_in, ++ struct xscdv_dm *dm_out) ++{ ++ struct xsc_dm *xdm = to_xdm(dm_in); ++ ++ dm_out->comp_mask = 0; ++ dm_out->buf = xdm->start_va; ++ dm_out->length = xdm->length; ++ ++ return 0; ++} ++ ++static int xscdv_get_av(struct ibv_ah *ah_in, ++ struct xscdv_ah *ah_out) ++{ ++ struct xsc_ah *xah = to_xah(ah_in); ++ ++ ah_out->comp_mask = 0; ++ ah_out->av = &xah->av; ++ ++ return 0; ++} ++ ++static int xscdv_get_pd(struct ibv_pd *pd_in, ++ struct xscdv_pd *pd_out) ++{ ++ struct xsc_pd *xpd = to_xpd(pd_in); ++ ++ pd_out->comp_mask = 0; ++ pd_out->pdn = xpd->pdn; ++ ++ return 0; ++} ++ ++ int xscdv_init_obj(struct xscdv_obj *obj, uint64_t obj_type) ++{ ++ int ret = 0; ++ ++ if (obj_type & XSCDV_OBJ_QP) ++ ret = xscdv_get_qp(obj->qp.in, obj->qp.out); ++ if (!ret && (obj_type & XSCDV_OBJ_CQ)) ++ ret = xscdv_get_cq(obj->cq.in, obj->cq.out); ++ if (!ret && (obj_type & XSCDV_OBJ_RWQ)) ++ ret = xscdv_get_rwq(obj->rwq.in, obj->rwq.out); ++ if (!ret && (obj_type & XSCDV_OBJ_DM)) ++ ret = xscdv_get_dm(obj->dm.in, obj->dm.out); ++ if (!ret && (obj_type & XSCDV_OBJ_AH)) ++ ret = xscdv_get_av(obj->ah.in, obj->ah.out); ++ if (!ret && (obj_type & XSCDV_OBJ_PD)) ++ ret = xscdv_get_pd(obj->pd.in, obj->pd.out); ++ ++ return ret; ++} ++ ++int xscdv_set_context_attr(struct ibv_context *ibv_ctx, ++ enum xscdv_set_ctx_attr_type type, void *attr) ++{ ++ struct xsc_context *ctx = to_xctx(ibv_ctx); ++ ++ switch (type) { ++ case XSCDV_CTX_ATTR_BUF_ALLOCATORS: ++ ctx->extern_alloc = *((struct xscdv_ctx_allocators *)attr); ++ break; ++ default: ++ return ENOTSUP; ++ } ++ ++ return 0; ++} ++ ++int xscdv_get_clock_info(struct ibv_context *ctx_in, ++ struct xscdv_clock_info *clock_info) ++{ ++ struct xsc_context *ctx = to_xctx(ctx_in); ++ const struct xsc_ib_clock_info *ci = ctx->clock_info_page; ++ uint32_t retry, tmp_sig; ++ atomic_uint32_t *sig; ++ ++ if (!ci) ++ return EINVAL; ++ ++ sig = (atomic_uint32_t *)&ci->sign; ++ ++ do { ++ retry = 10; ++repeat: ++ tmp_sig = atomic_load(sig); ++ if (unlikely(tmp_sig & ++ XSC_IB_CLOCK_INFO_KERNEL_UPDATING)) { ++ if (--retry) ++ goto repeat; ++ return EBUSY; ++ } ++ clock_info->nsec = ci->nsec; ++ clock_info->last_cycles = ci->cycles; ++ clock_info->frac = ci->frac; ++ clock_info->mult = ci->mult; ++ clock_info->shift = ci->shift; ++ clock_info->mask = ci->mask; ++ } while (unlikely(tmp_sig != atomic_load(sig))); ++ ++ return 0; ++} ++ ++struct ibv_context * ++xscdv_open_device(struct ibv_device *device, struct xscdv_context_attr *attr) ++{ ++ return verbs_open_device(device, attr); ++} ++ ++static int xsc_mmap(struct xsc_device *xdev, struct xsc_context *context, ++ int cmd_fd, int size) ++{ ++ uint64_t page_mask; ++ ++ page_mask = (~(xdev->page_size - 1)); ++ xsc_dbg(context->dbg_fp, XSC_DBG_CTX, "page size:%d\n", size); ++ context->sqm_reg_va = mmap(NULL, size, PROT_READ | PROT_WRITE, MAP_SHARED, ++ cmd_fd, context->qpm_tx_db & page_mask); ++ if (context->sqm_reg_va == MAP_FAILED) { ++ return -1; ++ } ++ xsc_dbg(context->dbg_fp, XSC_DBG_CTX, "qpm reg va:%p\n", context->sqm_reg_va); ++ ++ context->rqm_reg_va = mmap(NULL, size, PROT_READ | PROT_WRITE, MAP_SHARED, ++ cmd_fd, context->qpm_rx_db & page_mask); ++ if (context->rqm_reg_va == MAP_FAILED) { ++ goto free_sqm; ++ } ++ xsc_dbg(context->dbg_fp, XSC_DBG_CTX, "qpm reg va:%p\n", context->rqm_reg_va); ++ ++ context->cqm_reg_va = mmap(NULL, size, PROT_READ | PROT_WRITE, MAP_SHARED, ++ cmd_fd, context->cqm_next_cid_reg & page_mask); ++ if (context->cqm_reg_va == MAP_FAILED) { ++ goto free_rqm; ++ } ++ xsc_dbg(context->dbg_fp, XSC_DBG_CTX, "cqm ci va:%p\n", context->cqm_reg_va); ++ context->db_mmap_size = size; ++ ++ context->cqm_armdb_va = mmap(NULL, size, PROT_READ | PROT_WRITE, MAP_SHARED, ++ cmd_fd, context->cqm_armdb & page_mask); ++ if (context->cqm_armdb_va == MAP_FAILED) { ++ goto free_cqm; ++ } ++ xsc_dbg(context->dbg_fp, XSC_DBG_CTX, "cqm armdb va:%p\n", context->cqm_armdb_va); ++ ++ return 0; ++ ++free_cqm: ++ munmap(context->cqm_reg_va, size); ++free_rqm: ++ munmap(context->rqm_reg_va, size); ++free_sqm: ++ munmap(context->sqm_reg_va, size); ++ ++ return -1; ++ ++} ++static void xsc_munmap(struct xsc_context *context) ++{ ++ if (context->sqm_reg_va) ++ munmap(context->sqm_reg_va, context->db_mmap_size); ++ ++ if (context->rqm_reg_va) ++ munmap(context->rqm_reg_va, context->db_mmap_size); ++ ++ if (context->cqm_reg_va) ++ munmap(context->cqm_reg_va, context->db_mmap_size); ++ ++ if (context->cqm_armdb_va) ++ munmap(context->cqm_armdb_va, context->db_mmap_size); ++ ++} ++static struct verbs_context *xsc_alloc_context(struct ibv_device *ibdev, ++ int cmd_fd, ++ void *private_data) ++{ ++ struct xsc_context *context; ++ struct xsc_alloc_ucontext req; ++ struct xsc_alloc_ucontext_resp resp; ++ int i; ++ int page_size; ++ int j; ++ struct xsc_device *xdev = to_xdev(ibdev); ++ struct verbs_context *v_ctx; ++ struct ibv_port_attr port_attr; ++ struct ibv_device_attr_ex device_attr; ++ struct xscdv_context_attr *ctx_attr = private_data; ++ ++ if (ctx_attr && ctx_attr->comp_mask) { ++ errno = EINVAL; ++ return NULL; ++ } ++ ++ context = verbs_init_and_alloc_context(ibdev, cmd_fd, context, ibv_ctx, ++ RDMA_DRIVER_XSC); ++ if (!context) ++ return NULL; ++ ++ v_ctx = &context->ibv_ctx; ++ page_size = xdev->page_size; ++ xsc_single_threaded = single_threaded_app(); ++ ++ open_debug_file(context); ++ set_debug_mask(); ++ set_freeze_on_error(); ++ if (gethostname(context->hostname, sizeof(context->hostname))) ++ strcpy(context->hostname, "host_unknown"); ++ ++ memset(&req, 0, sizeof(req)); ++ memset(&resp, 0, sizeof(resp)); ++ ++ if (xsc_cmd_get_context(context, &req, sizeof(req), &resp, ++ sizeof(resp))) ++ goto err_free; ++ ++ context->max_num_qps = resp.qp_tab_size; ++ context->cache_line_size = resp.cache_line_size; ++ context->max_sq_desc_sz = resp.max_sq_desc_sz; ++ context->max_rq_desc_sz = resp.max_rq_desc_sz; ++ context->max_send_wqebb = resp.max_send_wqebb; ++ context->num_ports = resp.num_ports; ++ context->max_recv_wr = resp.max_recv_wr; ++ context->qpm_tx_db = resp.qpm_tx_db; ++ context->qpm_rx_db = resp.qpm_rx_db; ++ context->cqm_next_cid_reg = resp.cqm_next_cid_reg; ++ context->cqm_armdb = resp.cqm_armdb; ++ context->send_ds_num = resp.send_ds_num; ++ context->send_ds_shift = xsc_ilog2(resp.send_ds_num); ++ context->recv_ds_num = resp.recv_ds_num; ++ context->recv_ds_shift = xsc_ilog2(resp.recv_ds_num); ++ ++ xsc_dbg(context->dbg_fp, XSC_DBG_CTX, ++ "max_num_qps:%u, max_sq_desc_sz:%u max_rq_desc_sz:%u " \ ++ "max_send_wqebb:%u, num_ports:%u, max_recv_wr:%u\n", ++ context->max_num_qps, context->max_sq_desc_sz, ++ context->max_rq_desc_sz, context->max_send_wqebb, ++ context->num_ports, context->max_recv_wr); ++ ++ xsc_dbg(context->dbg_fp, XSC_DBG_CTX, "send_ds_num:%u shift:%u recv_ds_num:%u shift:%u\n", ++ context->send_ds_num, context->send_ds_shift, ++ context->recv_ds_num, context->recv_ds_shift); ++ context->dump_fill_mkey = XSC_INVALID_LKEY; ++ context->dump_fill_mkey_be = htobe32(XSC_INVALID_LKEY); ++ context->eth_min_inline_size = XSC_ETH_L2_INLINE_HEADER_SIZE; ++ context->cmds_supp_uhw = resp.cmds_supp_uhw; ++ ++ pthread_mutex_init(&context->qp_table_mutex, NULL); ++ pthread_mutex_init(&context->uidx_table_mutex, NULL); ++ for (i = 0; i < XSC_QP_TABLE_SIZE; ++i) ++ context->qp_table[i].refcnt = 0; ++ ++ for (i = 0; i < XSC_QP_TABLE_SIZE; ++i) ++ context->uidx_table[i].refcnt = 0; ++ ++ context->db_list = NULL; ++ context->page_size = page_size; ++ if (xsc_mmap(xdev, context, cmd_fd, page_size)) ++ goto err_free; ++ ++ pthread_mutex_init(&context->db_list_mutex, NULL); ++ ++ context->hca_core_clock = NULL; ++ context->clock_info_page = NULL; ++ ++ xsc_read_env(ibdev, context); ++ ++ xsc_spinlock_init(&context->hugetlb_lock, !xsc_single_threaded); ++ list_head_init(&context->hugetlb_list); ++ ++ verbs_set_ops(v_ctx, &xsc_ctx_common_ops); ++ ++ memset(&device_attr, 0, sizeof(device_attr)); ++ if (!xsc_query_device_ex(&v_ctx->context, NULL, &device_attr, ++ sizeof(struct ibv_device_attr_ex))) { ++ context->cached_device_cap_flags = ++ device_attr.orig_attr.device_cap_flags; ++ context->atomic_cap = device_attr.orig_attr.atomic_cap; ++ context->cached_tso_caps = device_attr.tso_caps; ++ context->max_dm_size = device_attr.max_dm_size; ++ } ++ ++ for (j = 0; j < min(XSC_MAX_PORTS_NUM, context->num_ports); ++j) { ++ memset(&port_attr, 0, sizeof(port_attr)); ++ if (!xsc_query_port(&v_ctx->context, j + 1, &port_attr)) { ++ context->cached_link_layer[j] = port_attr.link_layer; ++ context->cached_port_flags[j] = port_attr.flags; ++ } ++ } ++ ++ return v_ctx; ++ ++err_free: ++ verbs_uninit_context(&context->ibv_ctx); ++ close_debug_file(context); ++ free(context); ++ return NULL; ++} ++ ++static void xsc_free_context(struct ibv_context *ibctx) ++{ ++ struct xsc_context *context = to_xctx(ibctx); ++ ++ xsc_dbg(context->dbg_fp, XSC_DBG_CTX, "\n"); ++ xsc_munmap(context); ++ ++ verbs_uninit_context(&context->ibv_ctx); ++ close_debug_file(context); ++ free(context); ++} ++ ++static void xsc_uninit_device(struct verbs_device *verbs_device) ++{ ++ struct xsc_device *xdev = to_xdev(&verbs_device->device); ++ ++ free(xdev); ++} ++ ++static struct verbs_device *xsc_device_alloc(struct verbs_sysfs_dev *sysfs_dev) ++{ ++ struct xsc_device *xdev; ++ ++ xdev = calloc(1, sizeof *xdev); ++ if (!xdev) ++ return NULL; ++ ++ xdev->page_size = sysconf(_SC_PAGESIZE); ++ xdev->driver_abi_ver = sysfs_dev->abi_ver; ++ ++ return &xdev->verbs_dev; ++} ++ ++static const struct verbs_device_ops xsc_dev_ops = { ++ .name = "xscale", ++ .match_min_abi_version = XSC_UVERBS_MIN_ABI_VERSION, ++ .match_max_abi_version = XSC_UVERBS_MAX_ABI_VERSION, ++ .match_table = hca_table, ++ .alloc_device = xsc_device_alloc, ++ .uninit_device = xsc_uninit_device, ++ .alloc_context = xsc_alloc_context, ++}; ++PROVIDER_DRIVER(xscale, xsc_dev_ops); +diff --git a/providers/xscale/xscale.h b/providers/xscale/xscale.h +new file mode 100644 +index 0000000..0aee472 +--- /dev/null ++++ b/providers/xscale/xscale.h +@@ -0,0 +1,834 @@ ++/* ++ * Copyright (c) 2021 - 2022, Shanghai Yunsilicon Technology Co., Ltd. ++ * All rights reserved. ++ */ ++ ++#ifndef XSCALE_H ++#define XSCALE_H ++ ++#include ++#include ++#include ++#include ++#include ++ ++#include ++#include ++#include "xsc-abi.h" ++#include ++#include "bitmap.h" ++#include ++#include "xscdv.h" ++ ++#include ++ ++#define PFX "xsc: " ++#define offsetofend(_type, _member) \ ++ (offsetof(_type, _member) + sizeof(((_type *)0)->_member)) ++ ++typedef _Atomic(uint32_t) atomic_uint32_t; ++ ++enum { ++ XSC_IB_MMAP_CMD_SHIFT = 8, ++ XSC_IB_MMAP_CMD_MASK = 0xff, ++}; ++ ++enum { ++ XSC_CQE_VERSION_V0 = 0, ++ XSC_CQE_VERSION_V1 = 1, ++}; ++ ++enum { ++ XSC_ADAPTER_PAGE_SIZE = 4096, ++}; ++ ++enum { ++ XSC_QP_FLAG_RAWPACKET_TSO = 1 << 9, ++ XSC_QP_FLAG_RAWPACKET_TX = 1 << 10, ++}; ++ ++ ++#define XSC_CQ_PREFIX "XSC_CQ" ++#define XSC_QP_PREFIX "XSC_QP" ++#define XSC_MR_PREFIX "XSC_MR" ++#define XSC_RWQ_PREFIX "XSC_RWQ" ++#define XSC_MAX_LOG2_CONTIG_BLOCK_SIZE 23 ++#define XSC_MIN_LOG2_CONTIG_BLOCK_SIZE 12 ++ ++enum { ++ XSC_DBG_QP = 1 << 0, ++ XSC_DBG_CQ = 1 << 1, ++ XSC_DBG_QP_SEND = 1 << 2, ++ XSC_DBG_QP_SEND_ERR = 1 << 3, ++ XSC_DBG_CQ_CQE = 1 << 4, ++ XSC_DBG_CONTIG = 1 << 5, ++ XSC_DBG_DR = 1 << 6, ++ XSC_DBG_CTX = 1 << 7, ++ XSC_DBG_PD = 1 << 8, ++ XSC_DBG_MR = 1 << 9, ++}; ++ ++extern uint32_t xsc_debug_mask; ++extern int xsc_freeze_on_error_cqe; ++ ++#define XSC_DEBUG ++#ifdef XSC_DEBUG ++#define xsc_dbg(fp, mask, fmt, args...) \ ++do { \ ++ if (xsc_debug_mask & mask) { \ ++ char host[256]; \ ++ char timestr[32]; \ ++ struct tm now_tm; \ ++ time_t now_time; \ ++ time(&now_time); \ ++ localtime_r(&now_time, &now_tm); \ ++ strftime(timestr, sizeof(timestr), "%Y-%m-%d %X", &now_tm); \ ++ gethostname(host, 256); \ ++ fprintf(fp, "[%s %s %s %d] " fmt, timestr, host, __func__, __LINE__, ##args); \ ++ } \ ++} while (0) ++#else ++static inline void xsc_dbg(FILE *fp, uint32_t mask, const char *fmt, ...) ++{ ++} ++#endif ++ ++#define xsc_err(fmt, args...) \ ++do { \ ++ char host[256]; \ ++ char timestr[32]; \ ++ struct tm now_tm; \ ++ time_t now_time; \ ++ time(&now_time); \ ++ localtime_r(&now_time, &now_tm); \ ++ strftime(timestr, sizeof(timestr), "%Y-%m-%d %X", &now_tm); \ ++ gethostname(host, 256); \ ++ printf("[%s %s %s %d] " fmt, timestr, host, __func__, __LINE__, ##args); \ ++} while (0) ++ ++enum { ++ XSC_QP_TABLE_SHIFT = 12, ++ XSC_QP_TABLE_MASK = (1 << XSC_QP_TABLE_SHIFT) - 1, ++ XSC_QP_TABLE_SIZE = 1 << (24 - XSC_QP_TABLE_SHIFT), ++}; ++ ++enum { ++ XSC_UIDX_TABLE_SHIFT = 12, ++ XSC_UIDX_TABLE_MASK = (1 << XSC_UIDX_TABLE_SHIFT) - 1, ++ XSC_UIDX_TABLE_SIZE = 1 << (24 - XSC_UIDX_TABLE_SHIFT), ++}; ++ ++enum { ++ XSC_MAX_PORTS_NUM = 2, ++}; ++ ++enum xsc_alloc_type { ++ XSC_ALLOC_TYPE_ANON, ++ XSC_ALLOC_TYPE_HUGE, ++ XSC_ALLOC_TYPE_CONTIG, ++ XSC_ALLOC_TYPE_PREFER_HUGE, ++ XSC_ALLOC_TYPE_PREFER_CONTIG, ++ XSC_ALLOC_TYPE_EXTERNAL, ++ XSC_ALLOC_TYPE_ALL ++}; ++ ++enum xsc_rsc_type { ++ XSC_RSC_TYPE_QP, ++ XSC_RSC_TYPE_XSRQ, ++ XSC_RSC_TYPE_SRQ, ++ XSC_RSC_TYPE_RWQ, ++ XSC_RSC_TYPE_INVAL, ++}; ++ ++enum xsc_vendor_cap_flags { ++ XSC_VENDOR_CAP_FLAGS_MPW = 1 << 0, /* Obsoleted */ ++ XSC_VENDOR_CAP_FLAGS_MPW_ALLOWED = 1 << 1, ++ XSC_VENDOR_CAP_FLAGS_ENHANCED_MPW = 1 << 2, ++ XSC_VENDOR_CAP_FLAGS_CQE_128B_COMP = 1 << 3, ++ XSC_VENDOR_CAP_FLAGS_CQE_128B_PAD = 1 << 4, ++}; ++ ++enum { ++ XSC_FLOW_TAG_MASK = 0x00ffffff, ++}; ++ ++struct xsc_resource { ++ enum xsc_rsc_type type; ++ uint32_t rsn; ++}; ++ ++struct xsc_device { ++ struct verbs_device verbs_dev; ++ int page_size; ++ int driver_abi_ver; ++}; ++ ++struct xsc_db_page; ++ ++struct xsc_spinlock { ++ pthread_spinlock_t lock; ++ int in_use; ++ int need_lock; ++}; ++ ++/* PAGE_SHIFT determines the page size */ ++ ++#define PAGE_SHIFT 12 ++#define PAGE_SIZE (1UL << PAGE_SHIFT) ++#define PAGE_MASK (~(PAGE_SIZE-1)) ++ ++struct xsc_context { ++ struct verbs_context ibv_ctx; ++ int max_num_qps; ++ struct { ++ struct xsc_qp **table; ++ int refcnt; ++ } qp_table[XSC_QP_TABLE_SIZE]; ++ pthread_mutex_t qp_table_mutex; ++ ++ struct { ++ struct xsc_resource **table; ++ int refcnt; ++ } uidx_table[XSC_UIDX_TABLE_SIZE]; ++ pthread_mutex_t uidx_table_mutex; ++ ++ struct xsc_db_page *db_list; ++ pthread_mutex_t db_list_mutex; ++ int cache_line_size; ++ int max_sq_desc_sz; ++ int max_rq_desc_sz; ++ int max_send_wqebb; ++ int max_recv_wr; ++ int num_ports; ++ int stall_enable; ++ int stall_adaptive_enable; ++ int stall_cycles; ++ char hostname[40]; ++ struct xsc_spinlock hugetlb_lock; ++ struct list_head hugetlb_list; ++ int cqe_version; ++ uint8_t cached_link_layer[XSC_MAX_PORTS_NUM]; ++ uint8_t cached_port_flags[XSC_MAX_PORTS_NUM]; ++ unsigned int cached_device_cap_flags; ++ enum ibv_atomic_cap atomic_cap; ++ struct { ++ uint64_t offset; ++ uint64_t mask; ++ } core_clock; ++ void *hca_core_clock; ++ const struct xsc_ib_clock_info *clock_info_page; ++ struct ibv_tso_caps cached_tso_caps; ++ int cmds_supp_uhw; ++ uint64_t vendor_cap_flags; /* Use enum xsc_vendor_cap_flags */ ++ struct xscdv_cqe_comp_caps cqe_comp_caps; ++ struct xscdv_ctx_allocators extern_alloc; ++ struct xscdv_sw_parsing_caps sw_parsing_caps; ++ struct xscdv_striding_rq_caps striding_rq_caps; ++ uint32_t tunnel_offloads_caps; ++ struct xsc_packet_pacing_caps packet_pacing_caps; ++ uint16_t flow_action_flags; ++ uint64_t max_dm_size; ++ uint32_t eth_min_inline_size; ++ uint32_t dump_fill_mkey; ++ __be32 dump_fill_mkey_be; ++ void *sqm_reg_va; ++ void *rqm_reg_va; ++ void *cqm_reg_va; ++ void *cqm_armdb_va; ++ int db_mmap_size; ++ uint32_t page_size; ++ uint64_t qpm_tx_db; ++ uint64_t qpm_rx_db; ++ uint64_t cqm_next_cid_reg; ++ uint64_t cqm_armdb; ++ uint32_t send_ds_num; ++ uint32_t recv_ds_num; ++ uint32_t send_ds_shift; ++ uint32_t recv_ds_shift; ++ FILE *dbg_fp; ++}; ++ ++struct xsc_bitmap { ++ uint32_t last; ++ uint32_t top; ++ uint32_t max; ++ uint32_t avail; ++ uint32_t mask; ++ unsigned long *table; ++}; ++ ++struct xsc_hugetlb_mem { ++ int shmid; ++ void *shmaddr; ++ struct xsc_bitmap bitmap; ++ struct list_node entry; ++}; ++ ++struct xsc_buf { ++ void *buf; ++ size_t length; ++ int base; ++ struct xsc_hugetlb_mem *hmem; ++ enum xsc_alloc_type type; ++}; ++ ++struct xsc_pd { ++ struct ibv_pd ibv_pd; ++ uint32_t pdn; ++ atomic_int refcount; ++ struct xsc_pd *xprotection_domain; ++}; ++ ++struct xsc_parent_domain { ++ struct xsc_pd xpd; ++}; ++ ++enum { ++ XSC_CQ_FLAGS_RX_CSUM_VALID = 1 << 0, ++ XSC_CQ_FLAGS_EMPTY_DURING_POLL = 1 << 1, ++ XSC_CQ_FLAGS_FOUND_CQES = 1 << 2, ++ XSC_CQ_FLAGS_EXTENDED = 1 << 3, ++ XSC_CQ_FLAGS_SINGLE_THREADED = 1 << 4, ++ XSC_CQ_FLAGS_DV_OWNED = 1 << 5, ++ XSC_CQ_FLAGS_TM_SYNC_REQ = 1 << 6, ++}; ++ ++struct xsc_err_state_qp_node { ++ struct list_node entry; ++ uint32_t qp_id; ++ bool is_sq; ++}; ++ ++struct xsc_cq { ++ /* ibv_cq should always be subset of ibv_cq_ex */ ++ struct verbs_cq verbs_cq; ++ struct xsc_buf buf_a; ++ struct xsc_buf buf_b; ++ struct xsc_buf *active_buf; ++ struct xsc_buf *resize_buf; ++ int resize_cqes; ++ int active_cqes; ++ struct xsc_spinlock lock; ++ uint32_t cqn; ++ uint32_t cons_index; ++ __le32 *dbrec; ++ __le32 *db; ++ __le32 *armdb; ++ uint32_t cqe_cnt; ++ int log2_cq_ring_sz; ++ int arm_sn; ++ int cqe_sz; ++ int resize_cqe_sz; ++ int stall_next_poll; ++ int stall_enable; ++ uint64_t stall_last_count; ++ int stall_adaptive_enable; ++ int stall_cycles; ++ struct xsc_resource *cur_rsc; ++ struct xsc_cqe64 *cqe64; ++ uint32_t flags; ++ int umr_opcode; ++ struct xscdv_clock_info last_clock_info; ++ bool disable_flush_error_cqe; ++ struct list_head err_state_qp_list; ++}; ++ ++struct wr_list { ++ uint16_t opcode; ++ uint16_t next; ++}; ++ ++struct xsc_wq { ++ uint64_t *wrid; ++ unsigned *wqe_head; ++ struct xsc_spinlock lock; ++ unsigned wqe_cnt; ++ unsigned max_post; ++ unsigned head; ++ unsigned tail; ++ unsigned cur_post; ++ int max_gs; ++ int wqe_shift; ++ int offset; ++ void *qend; ++ uint32_t *wr_data; ++ __le32 *db; ++ unsigned ds_cnt; ++ unsigned seg_cnt; ++ unsigned *wr_opcode; ++ unsigned *need_flush; ++ unsigned flush_wqe_cnt; ++}; ++ ++struct xsc_dm { ++ struct verbs_dm verbs_dm; ++ size_t length; ++ void *mmap_va; ++ void *start_va; ++}; ++ ++struct xsc_mr { ++ struct verbs_mr vmr; ++ struct xsc_buf buf; ++ uint32_t alloc_flags; ++}; ++ ++enum xsc_qp_flags { ++ XSC_QP_FLAGS_USE_UNDERLAY = 0x01, ++}; ++ ++struct xsc_qp { ++ struct xsc_resource rsc; /* This struct must be first */ ++ struct verbs_qp verbs_qp; ++ struct ibv_qp *ibv_qp; ++ struct xsc_buf buf; ++ void *sq_start; ++ void *rq_start; ++ int max_inline_data; ++ int buf_size; ++ /* For Raw Packet QP, use different buffers for the SQ and RQ */ ++ struct xsc_buf sq_buf; ++ int sq_buf_size; ++ ++ uint8_t fm_cache; ++ uint8_t sq_signal_bits; ++ struct xsc_wq sq; ++ ++ __le32 *db; ++ struct xsc_wq rq; ++ int wq_sig; ++ uint32_t qp_cap_cache; ++ int atomics_enabled; ++ uint32_t max_tso; ++ uint16_t max_tso_header; ++ int rss_qp; ++ uint32_t flags; /* Use enum xsc_qp_flags */ ++ enum xscdv_dc_type dc_type; ++ uint32_t tirn; ++ uint32_t tisn; ++ uint32_t rqn; ++ uint32_t sqn; ++}; ++ ++struct xsc_ah { ++ struct ibv_ah ibv_ah; ++ struct xsc_wqe_av av; ++ bool kern_ah; ++}; ++ ++struct xsc_rwq { ++ struct xsc_resource rsc; ++ struct ibv_wq wq; ++ struct xsc_buf buf; ++ int buf_size; ++ struct xsc_wq rq; ++ __le32 *db; ++ void *pbuff; ++ __le32 *recv_db; ++ int wq_sig; ++}; ++ ++struct xsc_counter_node { ++ uint32_t index; ++ struct list_node entry; ++ enum ibv_counter_description desc; ++}; ++ ++struct xsc_counters { ++ struct verbs_counters vcounters; ++ struct list_head counters_list; ++ pthread_mutex_t lock; ++ uint32_t ncounters; ++ /* number of bounded objects */ ++ int refcount; ++}; ++ ++struct xsc_flow { ++ struct ibv_flow flow_id; ++ struct xsc_counters *mcounters; ++}; ++ ++struct xscdv_flow_matcher { ++ struct ibv_context *context; ++ uint32_t handle; ++}; ++ ++struct xscdv_devx_obj { ++ struct ibv_context *context; ++ uint32_t handle; ++}; ++ ++struct xsc_devx_umem { ++ struct xscdv_devx_umem dv_devx_umem; ++ struct ibv_context *context; ++ uint32_t handle; ++ void *addr; ++ size_t size; ++}; ++ ++union xsc_ib_fw_ver { ++ uint64_t data; ++ struct { ++ uint8_t ver_major; ++ uint8_t ver_minor; ++ uint16_t ver_patch; ++ uint32_t ver_tweak; ++ } s; ++}; ++ ++static inline int xsc_ilog2(int n) ++{ ++ int t; ++ ++ if (n <= 0) ++ return -1; ++ ++ t = 0; ++ while ((1 << t) < n) ++ ++t; ++ ++ return t; ++} ++ ++extern int xsc_stall_num_loop; ++extern int xsc_stall_cq_poll_min; ++extern int xsc_stall_cq_poll_max; ++extern int xsc_stall_cq_inc_step; ++extern int xsc_stall_cq_dec_step; ++extern int xsc_single_threaded; ++ ++static inline unsigned DIV_ROUND_UP(unsigned n, unsigned d) ++{ ++ return (n + d - 1u) / d; ++} ++ ++static inline unsigned long align(unsigned long val, unsigned long align) ++{ ++ return (val + align - 1) & ~(align - 1); ++} ++ ++static inline struct xsc_device *to_xdev(struct ibv_device *ibdev) ++{ ++ return container_of(ibdev, struct xsc_device, verbs_dev.device); ++} ++ ++static inline struct xsc_context *to_xctx(struct ibv_context *ibctx) ++{ ++ return container_of(ibctx, struct xsc_context, ibv_ctx.context); ++} ++ ++/* to_xpd always returns the real xsc_pd object ie the protection domain. */ ++static inline struct xsc_pd *to_xpd(struct ibv_pd *ibpd) ++{ ++ struct xsc_pd *xpd = container_of(ibpd, struct xsc_pd, ibv_pd); ++ ++ if (xpd->xprotection_domain) ++ return xpd->xprotection_domain; ++ ++ return xpd; ++} ++ ++static inline struct xsc_parent_domain *to_xparent_domain(struct ibv_pd *ibpd) ++{ ++ struct xsc_parent_domain *xparent_domain = ++ ibpd ? container_of(ibpd, struct xsc_parent_domain, xpd.ibv_pd) : NULL; ++ ++ if (xparent_domain && xparent_domain->xpd.xprotection_domain) ++ return xparent_domain; ++ ++ /* Otherwise ibpd isn't a parent_domain */ ++ return NULL; ++} ++ ++static inline struct xsc_cq *to_xcq(struct ibv_cq *ibcq) ++{ ++ return container_of((struct ibv_cq_ex *)ibcq, struct xsc_cq, verbs_cq.cq_ex); ++} ++ ++static inline struct xsc_qp *to_xqp(struct ibv_qp *ibqp) ++{ ++ struct verbs_qp *vqp = (struct verbs_qp *)ibqp; ++ ++ return container_of(vqp, struct xsc_qp, verbs_qp); ++} ++ ++static inline struct xsc_rwq *to_xrwq(struct ibv_wq *ibwq) ++{ ++ return container_of(ibwq, struct xsc_rwq, wq); ++} ++ ++static inline struct xsc_dm *to_xdm(struct ibv_dm *ibdm) ++{ ++ return container_of(ibdm, struct xsc_dm, verbs_dm.dm); ++} ++ ++static inline struct xsc_mr *to_xmr(struct ibv_mr *ibmr) ++{ ++ return container_of(ibmr, struct xsc_mr, vmr.ibv_mr); ++} ++ ++static inline struct xsc_ah *to_xah(struct ibv_ah *ibah) ++{ ++ return container_of(ibah, struct xsc_ah, ibv_ah); ++} ++ ++static inline int max_int(int a, int b) ++{ ++ return a > b ? a : b; ++} ++ ++static inline struct xsc_qp *rsc_to_xqp(struct xsc_resource *rsc) ++{ ++ return (struct xsc_qp *)rsc; ++} ++ ++static inline struct xsc_rwq *rsc_to_mrwq(struct xsc_resource *rsc) ++{ ++ return (struct xsc_rwq *)rsc; ++} ++ ++static inline struct xsc_counters *to_mcounters(struct ibv_counters *ibcounters) ++{ ++ return container_of(ibcounters, struct xsc_counters, vcounters.counters); ++} ++ ++static inline struct xsc_flow *to_mflow(struct ibv_flow *flow_id) ++{ ++ return container_of(flow_id, struct xsc_flow, flow_id); ++} ++ ++int xsc_alloc_buf(struct xsc_buf *buf, size_t size, int page_size); ++void xsc_free_buf(struct xsc_buf *buf); ++int xsc_alloc_buf_contig(struct xsc_context *xctx, struct xsc_buf *buf, ++ size_t size, int page_size, const char *component); ++void xsc_free_buf_contig(struct xsc_context *xctx, struct xsc_buf *buf); ++int xsc_alloc_prefered_buf(struct xsc_context *xctx, ++ struct xsc_buf *buf, ++ size_t size, int page_size, ++ enum xsc_alloc_type alloc_type, ++ const char *component); ++int xsc_free_actual_buf(struct xsc_context *ctx, struct xsc_buf *buf); ++void xsc_get_alloc_type(struct xsc_context *context, ++ const char *component, ++ enum xsc_alloc_type *alloc_type, ++ enum xsc_alloc_type default_alloc_type); ++int xsc_use_huge(const char *key); ++bool xsc_is_extern_alloc(struct xsc_context *context); ++int xsc_alloc_buf_extern(struct xsc_context *ctx, struct xsc_buf *buf, ++ size_t size); ++void xsc_free_buf_extern(struct xsc_context *ctx, struct xsc_buf *buf); ++ ++__le32 *xsc_alloc_dbrec(struct xsc_context *context); ++void xsc_free_db(struct xsc_context *context, __le32 *db); ++ ++int xsc_query_device(struct ibv_context *context, ++ struct ibv_device_attr *attr); ++int xsc_query_device_ex(struct ibv_context *context, ++ const struct ibv_query_device_ex_input *input, ++ struct ibv_device_attr_ex *attr, ++ size_t attr_size); ++int xsc_query_rt_values(struct ibv_context *context, ++ struct ibv_values_ex *values); ++struct ibv_qp *xsc_create_qp_ex(struct ibv_context *context, ++ struct ibv_qp_init_attr_ex *attr); ++int xsc_query_port(struct ibv_context *context, uint8_t port, ++ struct ibv_port_attr *attr); ++ ++struct ibv_pd *xsc_alloc_pd(struct ibv_context *context); ++int xsc_free_pd(struct ibv_pd *pd); ++ ++struct ibv_mr *xsc_alloc_null_mr(struct ibv_pd *pd); ++struct ibv_mr *xsc_reg_mr(struct ibv_pd *pd, void *addr, ++ size_t length, uint64_t hca_va, int access); ++int xsc_rereg_mr(struct verbs_mr *mr, int flags, struct ibv_pd *pd, void *addr, ++ size_t length, int access); ++int xsc_dereg_mr(struct verbs_mr *mr); ++struct ibv_cq *xsc_create_cq(struct ibv_context *context, int cqe, ++ struct ibv_comp_channel *channel, ++ int comp_vector); ++struct ibv_cq_ex *xsc_create_cq_ex(struct ibv_context *context, ++ struct ibv_cq_init_attr_ex *cq_attr); ++int xsc_cq_fill_pfns(struct xsc_cq *cq, ++ const struct ibv_cq_init_attr_ex *cq_attr, ++ struct xsc_context *xctx); ++int xsc_alloc_cq_buf(struct xsc_context *xctx, struct xsc_cq *cq, ++ struct xsc_buf *buf, int nent, int cqe_sz); ++int xsc_free_cq_buf(struct xsc_context *ctx, struct xsc_buf *buf); ++int xsc_resize_cq(struct ibv_cq *cq, int cqe); ++int xsc_modify_cq(struct ibv_cq *cq, struct ibv_modify_cq_attr *attr); ++int xsc_destroy_cq(struct ibv_cq *cq); ++int xsc_poll_cq(struct ibv_cq *cq, int ne, struct ibv_wc *wc); ++int xsc_arm_cq(struct ibv_cq *cq, int solicited); ++void xsc_cq_event(struct ibv_cq *cq); ++void __xsc_cq_clean(struct xsc_cq *cq, uint32_t qpn); ++void xsc_cq_clean(struct xsc_cq *cq, uint32_t qpn); ++ ++struct ibv_qp *xsc_create_qp(struct ibv_pd *pd, struct ibv_qp_init_attr *attr); ++int xsc_query_qp(struct ibv_qp *qp, struct ibv_qp_attr *attr, ++ int attr_mask, ++ struct ibv_qp_init_attr *init_attr); ++int xsc_modify_qp(struct ibv_qp *qp, struct ibv_qp_attr *attr, ++ int attr_mask); ++int xsc_modify_qp_rate_limit(struct ibv_qp *qp, ++ struct ibv_qp_rate_limit_attr *attr); ++int xsc_destroy_qp(struct ibv_qp *qp); ++void xsc_init_qp_indices(struct xsc_qp *qp); ++void xsc_init_rwq_indices(struct xsc_rwq *rwq); ++int xsc_post_send(struct ibv_qp *ibqp, struct ibv_send_wr *wr, ++ struct ibv_send_wr **bad_wr); ++int xsc_post_recv(struct ibv_qp *ibqp, struct ibv_recv_wr *wr, ++ struct ibv_recv_wr **bad_wr); ++int xsc_post_wq_recv(struct ibv_wq *ibwq, struct ibv_recv_wr *wr, ++ struct ibv_recv_wr **bad_wr); ++struct xsc_qp *xsc_find_qp(struct xsc_context *ctx, uint32_t qpn); ++int xsc_store_qp(struct xsc_context *ctx, uint32_t qpn, struct xsc_qp *qp); ++void xsc_clear_qp(struct xsc_context *ctx, uint32_t qpn); ++int xsc_err_state_qp(struct ibv_qp *qp, enum ibv_qp_state cur_state, ++ enum ibv_qp_state state); ++int32_t xsc_store_uidx(struct xsc_context *ctx, void *rsc); ++void xsc_clear_uidx(struct xsc_context *ctx, uint32_t uidx); ++struct ibv_ah *xsc_create_ah(struct ibv_pd *pd, struct ibv_ah_attr *attr); ++int xsc_destroy_ah(struct ibv_ah *ah); ++int xsc_attach_mcast(struct ibv_qp *qp, const union ibv_gid *gid, uint16_t lid); ++int xsc_detach_mcast(struct ibv_qp *qp, const union ibv_gid *gid, uint16_t lid); ++int xsc_round_up_power_of_two(long long sz); ++void *xsc_get_send_wqe(struct xsc_qp *qp, int n); ++struct ibv_xrcd *xsc_open_xrcd(struct ibv_context *context, ++ struct ibv_xrcd_init_attr *xrcd_init_attr); ++int xsc_close_xrcd(struct ibv_xrcd *ib_xrcd); ++struct ibv_wq *xsc_create_wq(struct ibv_context *context, ++ struct ibv_wq_init_attr *attr); ++int xsc_modify_wq(struct ibv_wq *wq, struct ibv_wq_attr *attr); ++int xsc_destroy_wq(struct ibv_wq *wq); ++struct ibv_rwq_ind_table *xsc_create_rwq_ind_table(struct ibv_context *context, ++ struct ibv_rwq_ind_table_init_attr *init_attr); ++int xsc_destroy_rwq_ind_table(struct ibv_rwq_ind_table *rwq_ind_table); ++struct ibv_flow *xsc_create_flow(struct ibv_qp *qp, struct ibv_flow_attr *flow_attr); ++int xsc_destroy_flow(struct ibv_flow *flow_id); ++struct ibv_flow_action *xsc_create_flow_action_esp(struct ibv_context *ctx, ++ struct ibv_flow_action_esp_attr *attr); ++int xsc_destroy_flow_action(struct ibv_flow_action *action); ++int xsc_modify_flow_action_esp(struct ibv_flow_action *action, ++ struct ibv_flow_action_esp_attr *attr); ++ ++struct ibv_dm *xsc_alloc_dm(struct ibv_context *context, ++ struct ibv_alloc_dm_attr *dm_attr); ++int xsc_free_dm(struct ibv_dm *ibdm); ++struct ibv_mr *xsc_reg_dm_mr(struct ibv_pd *pd, struct ibv_dm *ibdm, ++ uint64_t dm_offset, size_t length, ++ unsigned int acc); ++ ++struct ibv_pd *xsc_alloc_parent_domain(struct ibv_context *context, ++ struct ibv_parent_domain_init_attr *attr); ++ ++ ++struct ibv_counters *xsc_create_counters(struct ibv_context *context, ++ struct ibv_counters_init_attr *init_attr); ++int xsc_destroy_counters(struct ibv_counters *counters); ++int xsc_attach_counters_point_flow(struct ibv_counters *counters, ++ struct ibv_counter_attach_attr *attr, ++ struct ibv_flow *flow); ++int xsc_read_counters(struct ibv_counters *counters, ++ uint64_t *counters_value, ++ uint32_t ncounters, ++ uint32_t flags); ++ ++static inline void *xsc_find_uidx(struct xsc_context *ctx, uint32_t uidx) ++{ ++ int tind = uidx >> XSC_UIDX_TABLE_SHIFT; ++ ++ if (likely(ctx->uidx_table[tind].refcnt)) ++ return ctx->uidx_table[tind].table[uidx & XSC_UIDX_TABLE_MASK]; ++ ++ return NULL; ++} ++ ++static inline int xsc_spin_lock(struct xsc_spinlock *lock) ++{ ++ if (lock->need_lock) ++ return pthread_spin_lock(&lock->lock); ++ ++ if (unlikely(lock->in_use)) { ++ fprintf(stderr, "*** ERROR: multithreading vilation ***\n" ++ "You are running a multithreaded application but\n" ++ "you set XSC_SINGLE_THREADED=1. Please unset it.\n"); ++ abort(); ++ } else { ++ lock->in_use = 1; ++ /* ++ * This fence is not at all correct, but it increases the ++ * chance that in_use is detected by another thread without ++ * much runtime cost. */ ++ atomic_thread_fence(memory_order_acq_rel); ++ } ++ ++ return 0; ++} ++ ++static inline int xsc_spin_unlock(struct xsc_spinlock *lock) ++{ ++ if (lock->need_lock) ++ return pthread_spin_unlock(&lock->lock); ++ ++ lock->in_use = 0; ++ ++ return 0; ++} ++ ++static inline int xsc_spinlock_init(struct xsc_spinlock *lock, int need_lock) ++{ ++ lock->in_use = 0; ++ lock->need_lock = need_lock; ++ return pthread_spin_init(&lock->lock, PTHREAD_PROCESS_PRIVATE); ++} ++ ++static inline int xsc_spinlock_init_pd(struct xsc_spinlock *lock, struct ibv_pd *pd) ++{ ++ int thread_safe = xsc_single_threaded; ++ ++ return xsc_spinlock_init(lock, !thread_safe); ++} ++ ++static inline int xsc_spinlock_destroy(struct xsc_spinlock *lock) ++{ ++ return pthread_spin_destroy(&lock->lock); ++} ++ ++static inline void set_command(int command, off_t *offset) ++{ ++ *offset |= (command << XSC_IB_MMAP_CMD_SHIFT); ++} ++ ++static inline void set_arg(int arg, off_t *offset) ++{ ++ *offset |= arg; ++} ++ ++static inline void set_order(int order, off_t *offset) ++{ ++ set_arg(order, offset); ++} ++ ++static inline void set_index(int index, off_t *offset) ++{ ++ set_arg(index, offset); ++} ++ ++static inline void set_extended_index(int index, off_t *offset) ++{ ++ *offset |= (index & 0xff) | ((index >> 8) << 16); ++} ++ ++static inline uint8_t calc_sig(void *wqe, int size) ++{ ++ int i; ++ uint8_t *p = wqe; ++ uint8_t res = 0; ++ ++ for (i = 0; i < size; ++i) ++ res ^= p[i]; ++ ++ return ~res; ++} ++ ++#endif /* XSC_H */ +diff --git a/providers/xscale/xscdv.h b/providers/xscale/xscdv.h +new file mode 100644 +index 0000000..98d2daf +--- /dev/null ++++ b/providers/xscale/xscdv.h +@@ -0,0 +1,876 @@ ++/* ++ * Copyright (c) 2021 - 2022, Shanghai Yunsilicon Technology Co., Ltd. ++ * All rights reserved. ++ */ ++ ++#ifndef _XSCDV_H_ ++#define _XSCDV_H_ ++ ++#include ++#include /* For the __be64 type */ ++#include ++#include ++#if defined(__SSE3__) ++#include ++#include ++#include ++#endif /* defined(__SSE3__) */ ++ ++#include ++#include ++#include ++ ++#ifdef __cplusplus ++extern "C" { ++#endif ++ ++/* Always inline the functions */ ++#ifdef __GNUC__ ++#define XSCDV_ALWAYS_INLINE inline __attribute__((always_inline)) ++#else ++#define XSCDV_ALWAYS_INLINE inline ++#endif ++ ++enum { ++ XSC_RCV_DBR = 0, ++ XSC_SND_DBR = 1, ++}; ++ ++enum xscdv_context_comp_mask { ++ XSCDV_CONTEXT_MASK_CQE_COMPRESION = 1 << 0, ++ XSCDV_CONTEXT_MASK_SWP = 1 << 1, ++ XSCDV_CONTEXT_MASK_STRIDING_RQ = 1 << 2, ++ XSCDV_CONTEXT_MASK_TUNNEL_OFFLOADS = 1 << 3, ++ XSCDV_CONTEXT_MASK_DYN_BFREGS = 1 << 4, ++ XSCDV_CONTEXT_MASK_CLOCK_INFO_UPDATE = 1 << 5, ++ XSCDV_CONTEXT_MASK_FLOW_ACTION_FLAGS = 1 << 6, ++}; ++ ++struct xscdv_cqe_comp_caps { ++ uint32_t max_num; ++ uint32_t supported_format; /* enum xscdv_cqe_comp_res_format */ ++}; ++ ++struct xscdv_sw_parsing_caps { ++ uint32_t sw_parsing_offloads; /* Use enum xscdv_sw_parsing_offloads */ ++ uint32_t supported_qpts; ++}; ++ ++struct xscdv_striding_rq_caps { ++ uint32_t min_single_stride_log_num_of_bytes; ++ uint32_t max_single_stride_log_num_of_bytes; ++ uint32_t min_single_wqe_log_num_of_strides; ++ uint32_t max_single_wqe_log_num_of_strides; ++ uint32_t supported_qpts; ++}; ++ ++/* ++ * Direct verbs device-specific attributes ++ */ ++struct xscdv_context { ++ uint8_t version; ++ uint64_t flags; ++ uint64_t comp_mask; ++ struct xscdv_cqe_comp_caps cqe_comp_caps; ++ struct xscdv_sw_parsing_caps sw_parsing_caps; ++ struct xscdv_striding_rq_caps striding_rq_caps; ++ uint32_t tunnel_offloads_caps; ++ uint64_t max_clock_info_update_nsec; ++ uint32_t flow_action_flags; ++}; ++ ++enum xscsdv_context_flags { ++ /* ++ * This flag indicates if CQE version 0 or 1 is needed. ++ */ ++ XSCDV_CONTEXT_FLAGS_CQE_V1 = (1 << 0), ++ XSCDV_CONTEXT_FLAGS_OBSOLETE = (1 << 1), /* Obsoleted, don't use */ ++ XSCDV_CONTEXT_FLAGS_MPW_ALLOWED = (1 << 2), ++ XSCDV_CONTEXT_FLAGS_ENHANCED_MPW = (1 << 3), ++ XSCDV_CONTEXT_FLAGS_CQE_128B_COMP = (1 << 4), /* Support CQE 128B compression */ ++ XSCDV_CONTEXT_FLAGS_CQE_128B_PAD = (1 << 5), /* Support CQE 128B padding */ ++}; ++ ++enum xscdv_cq_init_attr_mask { ++ XSCDV_CQ_INIT_ATTR_MASK_COMPRESSED_CQE = 1 << 0, ++ XSCDV_CQ_INIT_ATTR_MASK_FLAGS = 1 << 1, ++ XSCDV_CQ_INIT_ATTR_MASK_CQE_SIZE = 1 << 2, ++}; ++ ++struct xscdv_cq_init_attr { ++ uint64_t comp_mask; /* Use enum xscdv_cq_init_attr_mask */ ++ uint8_t cqe_comp_res_format; /* Use enum xscdv_cqe_comp_res_format */ ++ uint32_t flags; ++ uint16_t cqe_size; /* when XSCDV_CQ_INIT_ATTR_MASK_CQE_SIZE set */ ++}; ++ ++struct ibv_cq_ex *xscdv_create_cq(struct ibv_context *context, ++ struct ibv_cq_init_attr_ex *cq_attr, ++ struct xscdv_cq_init_attr *xcq_attr); ++ ++enum xscdv_qp_create_flags { ++ XSCDV_QP_CREATE_TUNNEL_OFFLOADS = 1 << 0, ++ XSCDV_QP_CREATE_TIR_ALLOW_SELF_LOOPBACK_UC = 1 << 1, ++ XSCDV_QP_CREATE_TIR_ALLOW_SELF_LOOPBACK_MC = 1 << 2, ++ XSCDV_QP_CREATE_DISABLE_SCATTER_TO_CQE = 1 << 3, ++ XSCDV_QP_CREATE_ALLOW_SCATTER_TO_CQE = 1 << 4, ++}; ++ ++enum xscdv_qp_init_attr_mask { ++ XSCDV_QP_INIT_ATTR_MASK_QP_CREATE_FLAGS = 1 << 0, ++ XSCDV_QP_INIT_ATTR_MASK_DC = 1 << 1, ++}; ++ ++enum xscdv_dc_type { ++ XSCDV_DCTYPE_DCT = 1, ++ XSCDV_DCTYPE_DCI, ++}; ++ ++struct xscdv_dc_init_attr { ++ enum xscdv_dc_type dc_type; ++ uint64_t dct_access_key; ++}; ++ ++struct xscdv_qp_init_attr { ++ uint64_t comp_mask; /* Use enum xscdv_qp_init_attr_mask */ ++ uint32_t create_flags; /* Use enum xsc_qp_create_flags */ ++ struct xscdv_dc_init_attr dc_init_attr; ++}; ++ ++struct ibv_qp *xscdv_create_qp(struct ibv_context *context, ++ struct ibv_qp_init_attr_ex *qp_attr, ++ struct xscdv_qp_init_attr *xqp_attr); ++ ++enum xscdv_flow_action_esp_mask { ++ XSCDV_FLOW_ACTION_ESP_MASK_FLAGS = 1 << 0, ++}; ++ ++struct xscdv_flow_action_esp { ++ uint64_t comp_mask; /* Use enum xscdv_flow_action_esp_mask */ ++ uint32_t action_flags; /* Use enum xscdv_flow_action_flags */ ++}; ++ ++struct xscdv_flow_match_parameters { ++ size_t match_sz; ++ uint64_t match_buf[]; /* Device spec format */ ++}; ++ ++struct xscdv_flow_matcher_attr { ++ enum ibv_flow_attr_type type; ++ uint32_t flags; /* From enum ibv_flow_flags */ ++ uint16_t priority; ++ uint8_t match_criteria_enable; /* Device spec format */ ++ struct xscdv_flow_match_parameters *match_mask; ++ uint64_t comp_mask; ++}; ++ ++struct xscdv_flow_matcher; ++ ++struct xscdv_flow_matcher * ++xscdv_create_flow_matcher(struct ibv_context *context, ++ struct xscdv_flow_matcher_attr *matcher_attr); ++ ++int xscdv_destroy_flow_matcher(struct xscdv_flow_matcher *matcher); ++ ++enum xscdv_flow_action_type { ++ XSCDV_FLOW_ACTION_DEST_IBV_QP, ++ XSCDV_FLOW_ACTION_DROP, ++ XSCDV_FLOW_ACTION_IBV_COUNTER, ++ XSCDV_FLOW_ACTION_IBV_FLOW_ACTION, ++ XSCDV_FLOW_ACTION_TAG, ++ XSCDV_FLOW_ACTION_DEST_DEVX, ++}; ++ ++struct xscdv_flow_action_attr { ++ enum xscdv_flow_action_type type; ++ union { ++ struct ibv_qp *qp; ++ struct ibv_counters *counter; ++ struct ibv_flow_action *action; ++ uint32_t tag_value; ++ struct xscdv_devx_obj *obj; ++ }; ++}; ++ ++struct ibv_flow * ++xscdv_create_flow(struct xscdv_flow_matcher *matcher, ++ struct xscdv_flow_match_parameters *match_value, ++ size_t num_actions, ++ struct xscdv_flow_action_attr actions_attr[]); ++ ++struct ibv_flow_action *xscdv_create_flow_action_esp(struct ibv_context *ctx, ++ struct ibv_flow_action_esp_attr *esp, ++ struct xscdv_flow_action_esp *xattr); ++ ++/* ++ * xscdv_create_flow_action_modify_header - Create a flow action which mutates ++ * a packet. The flow action can be attached to steering rules via ++ * ibv_create_flow(). ++ * ++ * @ctx: RDMA device context to create the action on. ++ * @actions_sz: The size of *actions* buffer in bytes. ++ * @actions: A buffer which contains modify actions provided in device spec ++ * format. ++ * @ft_type: Defines the flow table type to which the modify ++ * header action will be attached. ++ * ++ * Return a valid ibv_flow_action if successful, NULL otherwise. ++ */ ++struct ibv_flow_action * ++xscdv_create_flow_action_modify_header(struct ibv_context *ctx, ++ size_t actions_sz, ++ uint64_t actions[], ++ enum xscdv_flow_table_type ft_type); ++ ++/* ++ * xscdv_create_flow_action_packet_reformat - Create flow action which can ++ * encap/decap packets. ++ */ ++struct ibv_flow_action * ++xscdv_create_flow_action_packet_reformat(struct ibv_context *ctx, ++ size_t data_sz, ++ void *data, ++ enum xscdv_flow_action_packet_reformat_type reformat_type, ++ enum xscdv_flow_table_type ft_type); ++/* ++ * Most device capabilities are exported by ibv_query_device(...), ++ * but there is HW device-specific information which is important ++ * for data-path, but isn't provided. ++ * ++ * Return 0 on success. ++ */ ++int xscdv_query_device(struct ibv_context *ctx_in, ++ struct xscdv_context *attrs_out); ++ ++enum xscdv_qp_comp_mask { ++ XSCDV_QP_MASK_UAR_MMAP_OFFSET = 1 << 0, ++ XSCDV_QP_MASK_RAW_QP_HANDLES = 1 << 1, ++}; ++ ++struct xscdv_qp { ++ __le32 *dbrec; ++ struct { ++ void *buf; ++ uint32_t wqe_cnt; ++ uint32_t stride; ++ __le32 *db; ++ } sq; ++ struct { ++ void *buf; ++ uint32_t wqe_cnt; ++ uint32_t stride; ++ __le32 *db; ++ } rq; ++ uint64_t comp_mask; ++ uint32_t tirn; ++ uint32_t tisn; ++ uint32_t rqn; ++ uint32_t sqn; ++}; ++ ++struct xscdv_cq { ++ void *buf; ++ __le32 *dbrec; ++ __le32 *db; ++ uint32_t cqe_cnt; ++ uint32_t cqe_size; ++ uint32_t cqn; ++ uint64_t comp_mask; ++}; ++ ++struct xscdv_rwq { ++ void *buf; ++ __le32 *dbrec; ++ uint32_t wqe_cnt; ++ uint32_t stride; ++ uint64_t comp_mask; ++ __le32 *db; ++}; ++ ++struct xscdv_dm { ++ void *buf; ++ uint64_t length; ++ uint64_t comp_mask; ++}; ++ ++struct xsc_wqe_av; ++ ++struct xscdv_ah { ++ struct xsc_wqe_av *av; ++ uint64_t comp_mask; ++}; ++ ++struct xscdv_pd { ++ uint32_t pdn; ++ uint64_t comp_mask; ++}; ++ ++struct xscdv_obj { ++ struct { ++ struct ibv_qp *in; ++ struct xscdv_qp *out; ++ } qp; ++ struct { ++ struct ibv_cq *in; ++ struct xscdv_cq *out; ++ } cq; ++ struct { ++ struct ibv_wq *in; ++ struct xscdv_rwq *out; ++ } rwq; ++ struct { ++ struct ibv_dm *in; ++ struct xscdv_dm *out; ++ } dm; ++ struct { ++ struct ibv_ah *in; ++ struct xscdv_ah *out; ++ } ah; ++ struct { ++ struct ibv_pd *in; ++ struct xscdv_pd *out; ++ } pd; ++}; ++ ++enum xscdv_obj_type { ++ XSCDV_OBJ_QP = 1 << 0, ++ XSCDV_OBJ_CQ = 1 << 1, ++ XSCDV_OBJ_SRQ = 1 << 2, ++ XSCDV_OBJ_RWQ = 1 << 3, ++ XSCDV_OBJ_DM = 1 << 4, ++ XSCDV_OBJ_AH = 1 << 5, ++ XSCDV_OBJ_PD = 1 << 6, ++}; ++ ++enum xscdv_wq_init_attr_mask { ++ XSCDV_WQ_INIT_ATTR_MASK_STRIDING_RQ = 1 << 0, ++}; ++ ++struct xscdv_striding_rq_init_attr { ++ uint32_t single_stride_log_num_of_bytes; ++ uint32_t single_wqe_log_num_of_strides; ++ uint8_t two_byte_shift_en; ++}; ++ ++struct xscdv_wq_init_attr { ++ uint64_t comp_mask; /* Use enum xscdv_wq_init_attr_mask */ ++ struct xscdv_striding_rq_init_attr striding_rq_attrs; ++}; ++ ++/* ++ * This function creates a work queue object with extra properties ++ * defined by xscdv_wq_init_attr struct. ++ * ++ * For each bit in the comp_mask, a field in xscdv_wq_init_attr ++ * should follow. ++ * ++ * XSCDV_WQ_INIT_ATTR_MASK_STRIDING_RQ: Create a work queue with ++ * striding RQ capabilities. ++ * - single_stride_log_num_of_bytes represents the size of each stride in the ++ * WQE and its value should be between min_single_stride_log_num_of_bytes ++ * and max_single_stride_log_num_of_bytes that are reported in ++ * xscdv_query_device. ++ * - single_wqe_log_num_of_strides represents the number of strides in each WQE. ++ * Its value should be between min_single_wqe_log_num_of_strides and ++ * max_single_wqe_log_num_of_strides that are reported in xscdv_query_device. ++ * - two_byte_shift_en: When enabled, hardware pads 2 bytes of zeroes ++ * before writing the message to memory (e.g. for IP alignment) ++ */ ++struct ibv_wq *xscdv_create_wq(struct ibv_context *context, ++ struct ibv_wq_init_attr *wq_init_attr, ++ struct xscdv_wq_init_attr *xwq_attr); ++/* ++ * This function will initialize xscdv_xxx structs based on supplied type. ++ * The information for initialization is taken from ibv_xx structs supplied ++ * as part of input. ++ * ++ * Request information of CQ marks its owned by DV for all consumer index ++ * related actions. ++ * ++ * The initialization type can be combination of several types together. ++ * ++ * Return: 0 in case of success. ++ */ ++int xscdv_init_obj(struct xscdv_obj *obj, uint64_t obj_type); ++ ++enum { ++ XSC_OPCODE_NOP = 0x00, ++ XSC_OPCODE_SEND_INVAL = 0x01, ++ XSC_OPCODE_RDMA_WRITE = 0x08, ++ XSC_OPCODE_RDMA_WRITE_IMM = 0x09, ++ XSC_OPCODE_SEND = 0x0a, ++ XSC_OPCODE_SEND_IMM = 0x0b, ++ XSC_OPCODE_TSO = 0x0e, ++ XSC_OPCODE_RDMA_READ = 0x10, ++ XSC_OPCODE_ATOMIC_CS = 0x11, ++ XSC_OPCODE_ATOMIC_FA = 0x12, ++ XSC_OPCODE_ATOMIC_MASKED_CS = 0x14, ++ XSC_OPCODE_ATOMIC_MASKED_FA = 0x15, ++ XSC_OPCODE_FMR = 0x19, ++ XSC_OPCODE_LOCAL_INVAL = 0x1b, ++ XSC_OPCODE_CONFIG_CMD = 0x1f, ++ XSC_OPCODE_UMR = 0x25, ++ XSC_OPCODE_TAG_MATCHING = 0x28 ++}; ++ ++enum { ++ XSC_CQE_L2_OK = 1 << 0, ++ XSC_CQE_L3_OK = 1 << 1, ++ XSC_CQE_L4_OK = 1 << 2, ++}; ++ ++enum { ++ XSC_CQE_L3_HDR_TYPE_NONE = 0x0, ++ XSC_CQE_L3_HDR_TYPE_IPV6 = 0x1, ++ XSC_CQE_L3_HDR_TYPE_IPV4 = 0x2, ++}; ++ ++enum { ++ XSC_CQE_OWNER_MASK = 1, ++ XSC_CQE_REQ = 0, ++ XSC_CQE_RESP_WR_IMM = 1, ++ XSC_CQE_RESP_SEND = 2, ++ XSC_CQE_RESP_SEND_IMM = 3, ++ XSC_CQE_RESP_SEND_INV = 4, ++ XSC_CQE_RESIZE_CQ = 5, ++ XSC_CQE_NO_PACKET = 6, ++ XSC_CQE_REQ_ERR = 13, ++ XSC_CQE_RESP_ERR = 14, ++ XSC_CQE_INVALID = 15, ++}; ++ ++struct xsc_err_cqe { ++ uint8_t rsvd0[32]; ++ uint32_t srqn; ++ uint8_t rsvd1[18]; ++ uint8_t vendor_err_synd; ++ uint8_t syndrome; ++ uint32_t s_wqe_opcode_qpn; ++ uint16_t wqe_counter; ++ uint8_t signature; ++ uint8_t op_own; ++}; ++ ++struct xsc_tm_cqe { ++ __be32 success; ++ __be16 hw_phase_cnt; ++ uint8_t rsvd0[12]; ++}; ++ ++struct xsc_cqe64 { ++ union { ++ struct { ++ uint8_t rsvd0[2]; ++ __be16 wqe_id; ++ uint8_t rsvd4[13]; ++ uint8_t ml_path; ++ uint8_t rsvd20[4]; ++ __be16 slid; ++ __be32 flags_rqpn; ++ uint8_t hds_ip_ext; ++ uint8_t l4_hdr_type_etc; ++ __be16 vlan_info; ++ }; ++ struct xsc_tm_cqe tm_cqe; ++ /* TMH is scattered to CQE upon match */ ++ struct ibv_tmh tmh; ++ }; ++ __be32 srqn_uidx; ++ __be32 imm_inval_pkey; ++ uint8_t app; ++ uint8_t app_op; ++ __be16 app_info; ++ __be32 byte_cnt; ++ __be64 timestamp; ++ __be32 sop_drop_qpn; ++ __be16 wqe_counter; ++ uint8_t signature; ++ uint8_t op_own; ++}; ++ ++enum xscdv_cqe_comp_res_format { ++ XSCDV_CQE_RES_FORMAT_HASH = 1 << 0, ++ XSCDV_CQE_RES_FORMAT_CSUM = 1 << 1, ++ XSCDV_CQE_RES_FORMAT_CSUM_STRIDX = 1 << 2, ++}; ++ ++enum xscdv_sw_parsing_offloads { ++ XSCDV_SW_PARSING = 1 << 0, ++ XSCDV_SW_PARSING_CSUM = 1 << 1, ++ XSCDV_SW_PARSING_LSO = 1 << 2, ++}; ++ ++static XSCDV_ALWAYS_INLINE ++uint8_t xscdv_get_cqe_owner(struct xsc_cqe64 *cqe) ++{ ++ return cqe->op_own & 0x1; ++} ++ ++static XSCDV_ALWAYS_INLINE ++void xscdv_set_cqe_owner(struct xsc_cqe64 *cqe, uint8_t val) ++{ ++ cqe->op_own = (val & 0x1) | (cqe->op_own & ~0x1); ++} ++ ++/* Solicited event */ ++static XSCDV_ALWAYS_INLINE ++uint8_t xscdv_get_cqe_se(struct xsc_cqe64 *cqe) ++{ ++ return (cqe->op_own >> 1) & 0x1; ++} ++ ++static XSCDV_ALWAYS_INLINE ++uint8_t xscdv_get_cqe_format(struct xsc_cqe64 *cqe) ++{ ++ return (cqe->op_own >> 2) & 0x3; ++} ++ ++static XSCDV_ALWAYS_INLINE ++uint8_t xscdv_get_cqe_opcode(struct xsc_cqe64 *cqe) ++{ ++ return cqe->op_own >> 4; ++} ++ ++/* ++ * WQE related part ++ */ ++enum { ++ XSC_INVALID_LKEY = 0x100, ++}; ++ ++enum { ++ XSC_SEND_WQE_BB = 64, ++ XSC_SEND_WQE_SHIFT = 6, ++}; ++ ++struct xsc_wqe_srq_next_seg { ++ uint8_t rsvd0[2]; ++ __be16 next_wqe_index; ++ uint8_t signature; ++ uint8_t rsvd1[11]; ++}; ++ ++struct xsc_wqe_ctrl_seg { ++ __be32 opmod_idx_opcode; ++ __be32 qpn_ds; ++ uint8_t signature; ++ uint8_t rsvd[2]; ++ uint8_t fm_ce_se; ++ __be32 imm; ++}; ++ ++struct xsc_wqe_av { ++ union { ++ struct { ++ __be32 qkey; ++ __be32 reserved; ++ } qkey; ++ __be64 dc_key; ++ } key; ++ __be32 dqp_dct; ++ uint8_t stat_rate_sl; ++ uint8_t fl_mlid; ++ __be16 rlid; ++ uint8_t reserved0[4]; ++ uint8_t rmac[6]; ++ uint8_t tclass; ++ uint8_t hop_limit; ++ __be32 grh_gid_fl; ++ uint8_t rgid[16]; ++}; ++ ++struct xsc_wqe_datagram_seg { ++ struct xsc_wqe_av av; ++}; ++ ++struct xsc_wqe_raddr_seg { ++ __be64 raddr; ++ __be32 rkey; ++ __be32 reserved; ++}; ++ ++struct xsc_wqe_atomic_seg { ++ __be64 swap_add; ++ __be64 compare; ++}; ++ ++struct xsc_wqe_inl_data_seg { ++ uint32_t byte_count; ++}; ++ ++struct xsc_wqe_eth_seg { ++ __be32 rsvd0; ++ uint8_t cs_flags; ++ uint8_t rsvd1; ++ __be16 mss; ++ __be32 rsvd2; ++ __be16 inline_hdr_sz; ++ uint8_t inline_hdr_start[2]; ++ uint8_t inline_hdr[16]; ++}; ++ ++/* ++ * Control segment - contains some control information for the current WQE. ++ * ++ * Output: ++ * seg - control segment to be filled ++ * Input: ++ * pi - WQEBB number of the first block of this WQE. ++ * This number should wrap at 0xffff, regardless of ++ * size of the WQ. ++ * opcode - Opcode of this WQE. Encodes the type of operation ++ * to be executed on the QP. ++ * opmod - Opcode modifier. ++ * qp_num - QP/SQ number this WQE is posted to. ++ * fm_ce_se - FM (fence mode), CE (completion and event mode) ++ * and SE (solicited event). ++ * ds - WQE size in octowords (16-byte units). DS accounts for all ++ * the segments in the WQE as summarized in WQE construction. ++ * signature - WQE signature. ++ * imm - Immediate data/Invalidation key/UMR mkey. ++ */ ++static XSCDV_ALWAYS_INLINE ++void xscdv_set_ctrl_seg(struct xsc_wqe_ctrl_seg *seg, uint16_t pi, ++ uint8_t opcode, uint8_t opmod, uint32_t qp_num, ++ uint8_t fm_ce_se, uint8_t ds, ++ uint8_t signature, uint32_t imm) ++{ ++ seg->opmod_idx_opcode = htobe32(((uint32_t)opmod << 24) | ((uint32_t)pi << 8) | opcode); ++ seg->qpn_ds = htobe32((qp_num << 8) | ds); ++ seg->fm_ce_se = fm_ce_se; ++ seg->signature = signature; ++ /* ++ * The caller should prepare "imm" in advance based on WR opcode. ++ * For IBV_WR_SEND_WITH_IMM and IBV_WR_RDMA_WRITE_WITH_IMM, ++ * the "imm" should be assigned as is. ++ * For the IBV_WR_SEND_WITH_INV, it should be htobe32(imm). ++ */ ++ seg->imm = imm; ++} ++ ++/* x86 optimized version of xscdv_set_ctrl_seg() ++ * ++ * This is useful when doing calculations on large data sets ++ * for parallel calculations. ++ * ++ * It doesn't suit for serialized algorithms. ++ */ ++#if defined(__SSE3__) ++static XSCDV_ALWAYS_INLINE ++void xscdv_x86_set_ctrl_seg(struct xsc_wqe_ctrl_seg *seg, uint16_t pi, ++ uint8_t opcode, uint8_t opmod, uint32_t qp_num, ++ uint8_t fm_ce_se, uint8_t ds, ++ uint8_t signature, uint32_t imm) ++{ ++ __m128i val = _mm_set_epi32(imm, qp_num, (ds << 16) | pi, ++ (signature << 24) | (opcode << 16) | (opmod << 8) | fm_ce_se); ++ __m128i mask = _mm_set_epi8(15, 14, 13, 12, /* immediate */ ++ 0, /* signal/fence_mode */ ++#if CHAR_MIN ++ -128, -128, /* reserved */ ++#else ++ 0x80, 0x80, /* reserved */ ++#endif ++ 3, /* signature */ ++ 6, /* data size */ ++ 8, 9, 10, /* QP num */ ++ 2, /* opcode */ ++ 4, 5, /* sw_pi in BE */ ++ 1 /* opmod */ ++ ); ++ *(__m128i *) seg = _mm_shuffle_epi8(val, mask); ++} ++#endif /* defined(__SSE3__) */ ++ ++/* ++ * Datagram Segment - contains address information required in order ++ * to form a datagram message. ++ * ++ * Output: ++ * seg - datagram segment to be filled. ++ * Input: ++ * key - Q_key/access key. ++ * dqp_dct - Destination QP number for UD and DCT for DC. ++ * ext - Address vector extension. ++ * stat_rate_sl - Maximum static rate control, SL/ethernet priority. ++ * fl_mlid - Force loopback and source LID for IB. ++ * rlid - Remote LID ++ * rmac - Remote MAC ++ * tclass - GRH tclass/IPv6 tclass/IPv4 ToS ++ * hop_limit - GRH hop limit/IPv6 hop limit/IPv4 TTL ++ * grh_gid_fi - GRH, source GID address and IPv6 flow label. ++ * rgid - Remote GID/IP address. ++ */ ++static XSCDV_ALWAYS_INLINE ++void xscdv_set_dgram_seg(struct xsc_wqe_datagram_seg *seg, ++ uint64_t key, uint32_t dqp_dct, ++ uint8_t ext, uint8_t stat_rate_sl, ++ uint8_t fl_mlid, uint16_t rlid, ++ uint8_t *rmac, uint8_t tclass, ++ uint8_t hop_limit, uint32_t grh_gid_fi, ++ uint8_t *rgid) ++{ ++ ++ /* Always put 64 bits, in q_key, the reserved part will be 0 */ ++ seg->av.key.dc_key = htobe64(key); ++ seg->av.dqp_dct = htobe32(((uint32_t)ext << 31) | dqp_dct); ++ seg->av.stat_rate_sl = stat_rate_sl; ++ seg->av.fl_mlid = fl_mlid; ++ seg->av.rlid = htobe16(rlid); ++ memcpy(seg->av.rmac, rmac, 6); ++ seg->av.tclass = tclass; ++ seg->av.hop_limit = hop_limit; ++ seg->av.grh_gid_fl = htobe32(grh_gid_fi); ++ memcpy(seg->av.rgid, rgid, 16); ++} ++ ++/* ++ * Eth Segment - contains packet headers and information for stateless L2, L3, L4 offloading. ++ * ++ * Output: ++ * seg - Eth segment to be filled. ++ * Input: ++ * cs_flags - l3cs/l3cs_inner/l4cs/l4cs_inner. ++ * mss - Maximum segment size. For TSO WQEs, the number of bytes ++ * in the TCP payload to be transmitted in each packet. Must ++ * be 0 on non TSO WQEs. ++ * inline_hdr_sz - Length of the inlined packet headers. ++ * inline_hdr_start - Inlined packet header. ++ */ ++static XSCDV_ALWAYS_INLINE ++void xscdv_set_eth_seg(struct xsc_wqe_eth_seg *seg, uint8_t cs_flags, ++ uint16_t mss, uint16_t inline_hdr_sz, ++ uint8_t *inline_hdr_start) ++{ ++ seg->cs_flags = cs_flags; ++ seg->mss = htobe16(mss); ++ seg->inline_hdr_sz = htobe16(inline_hdr_sz); ++ memcpy(seg->inline_hdr_start, inline_hdr_start, inline_hdr_sz); ++} ++ ++enum xscdv_set_ctx_attr_type { ++ XSCDV_CTX_ATTR_BUF_ALLOCATORS = 1, ++}; ++ ++enum { ++ XSC_MMAP_GET_REGULAR_PAGES_CMD = 0, ++ XSC_MMAP_GET_NC_PAGES_CMD = 3, ++}; ++ ++struct xscdv_ctx_allocators { ++ void *(*alloc)(size_t size, void *priv_data); ++ void (*free)(void *ptr, void *priv_data); ++ void *data; ++}; ++ ++/* ++ * Generic context attributes set API ++ * ++ * Returns 0 on success, or the value of errno on failure ++ * (which indicates the failure reason). ++ */ ++int xscdv_set_context_attr(struct ibv_context *context, ++ enum xscdv_set_ctx_attr_type type, void *attr); ++ ++struct xscdv_clock_info { ++ uint64_t nsec; ++ uint64_t last_cycles; ++ uint64_t frac; ++ uint32_t mult; ++ uint32_t shift; ++ uint64_t mask; ++}; ++ ++/* ++ * Get xsc core clock info ++ * ++ * Output: ++ * clock_info - clock info to be filled ++ * Input: ++ * context - device context ++ * ++ * Return: 0 on success, or the value of errno on failure ++ */ ++int xscdv_get_clock_info(struct ibv_context *context, ++ struct xscdv_clock_info *clock_info); ++ ++/* ++ * Translate device timestamp to nano-sec ++ * ++ * Input: ++ * clock_info - clock info to be filled ++ * device_timestamp - timestamp to translate ++ * ++ * Return: nano-sec ++ */ ++static inline uint64_t xscdv_ts_to_ns(struct xscdv_clock_info *clock_info, ++ uint64_t device_timestamp) ++{ ++ uint64_t delta, nsec; ++ ++ /* ++ * device_timestamp & cycles are the free running 'mask' bit counters ++ * from the hardware hca_core_clock clock. ++ */ ++ delta = (device_timestamp - clock_info->last_cycles) & clock_info->mask; ++ nsec = clock_info->nsec; ++ ++ /* ++ * Guess if the device_timestamp is more recent than ++ * clock_info->last_cycles, if not (too far in the future) treat ++ * it as old time stamp. This will break every max_clock_info_update_nsec. ++ */ ++ ++ if (delta > clock_info->mask / 2) { ++ delta = (clock_info->last_cycles - device_timestamp) & ++ clock_info->mask; ++ nsec -= ((delta * clock_info->mult) - clock_info->frac) >> ++ clock_info->shift; ++ } else { ++ nsec += ((delta * clock_info->mult) + clock_info->frac) >> ++ clock_info->shift; ++ } ++ ++ return nsec; ++} ++ ++enum xscdv_context_attr_flags { ++ XSCDV_CONTEXT_FLAGS_DEVX = 1 << 0, ++}; ++ ++struct xscdv_context_attr { ++ uint32_t flags; /* Use enum xscdv_context_attr_flags */ ++ uint64_t comp_mask; ++}; ++ ++struct ibv_context * ++xscdv_open_device(struct ibv_device *device, struct xscdv_context_attr *attr); ++ ++struct xscdv_devx_obj; ++ ++struct xscdv_devx_obj * ++xscdv_devx_obj_create(struct ibv_context *context, const void *in, size_t inlen, ++ void *out, size_t outlen); ++int xscdv_devx_obj_query(struct xscdv_devx_obj *obj, const void *in, size_t inlen, ++ void *out, size_t outlen); ++int xscdv_devx_obj_modify(struct xscdv_devx_obj *obj, const void *in, size_t inlen, ++ void *out, size_t outlen); ++int xscdv_devx_obj_destroy(struct xscdv_devx_obj *obj); ++int xscdv_devx_general_cmd(struct ibv_context *context, const void *in, size_t inlen, ++ void *out, size_t outlen); ++ ++struct xscdv_devx_umem { ++ uint32_t umem_id; ++}; ++ ++struct xscdv_devx_umem * ++xscdv_devx_umem_reg(struct ibv_context *ctx, void *addr, size_t size, uint32_t access); ++int xscdv_devx_umem_dereg(struct xscdv_devx_umem *umem); ++int xscdv_devx_query_eqn(struct ibv_context *context, uint32_t vector, ++ uint32_t *eqn); ++ ++#ifdef __cplusplus ++} ++#endif ++ ++#endif /* _XSCDV_H_ */ +diff --git a/redhat/rdma-core.spec b/redhat/rdma-core.spec +index 321578c..a90f5dc 100644 +--- a/redhat/rdma-core.spec ++++ b/redhat/rdma-core.spec +@@ -166,6 +166,8 @@ Provides: libocrdma = %{version}-%{release} + Obsoletes: libocrdma < %{version}-%{release} + Provides: librxe = %{version}-%{release} + Obsoletes: librxe < %{version}-%{release} ++Provides: libxscale = %{version}-%{release} ++Obsoletes: libxscale < %{version}-%{release} + + %description -n libibverbs + libibverbs is a library that allows userspace processes to use RDMA +@@ -190,6 +192,7 @@ Device-specific plug-in ibverbs userspace drivers are included: + - librxe: A software implementation of the RoCE protocol + - libsiw: A software implementation of the iWarp protocol + - libvmw_pvrdma: VMware paravirtual RDMA device ++- libxscale: Yunsilicon RDMA device + + %package -n libibverbs-utils + Summary: Examples for the libibverbs library +@@ -569,6 +572,7 @@ fi + %{_libdir}/libibverbs/*.so + %{_libdir}/libmlx5.so.* + %{_libdir}/libmlx4.so.* ++%{_libdir}/libxscale.so.* + %config(noreplace) %{_sysconfdir}/libibverbs.d/*.driver + %doc %{_docdir}/%{name}/libibverbs.md + +-- +2.43.0 + + diff --git a/rdma-core.spec b/rdma-core.spec index 33781ce1db457651e053f30f25870b5c82652b1f..2756cb6c377062ee9c6aaa28959b16a10dd2beb6 100644 --- a/rdma-core.spec +++ b/rdma-core.spec @@ -1,6 +1,6 @@ Name: rdma-core Version: 41.0 -Release: 31 +Release: 32 Summary: RDMA core userspace libraries and daemons License: GPLv2 or BSD Url: https://github.com/linux-rdma/rdma-core @@ -98,6 +98,7 @@ patch89: 0089-libhns-Fix-bypassed-vendor-check-in-hnsdv_query_devi.patch patch90: 0090-libhns-Fix-coredump-during-QP-destruction-when-send_.patch patch91: 0091-libhns-Fix-the-identification-mark-of-RDMA-UD-packet.patch patch92: 0092-libhns-Fix-missing-fields-for-SRQ-WC.patch +patch93: 0093-libxscale-Add-Yunsilicon-User-Space-RDMA-Driver.patch BuildRequires: binutils cmake >= 2.8.11 gcc libudev-devel pkgconfig pkgconfig(libnl-3.0) BuildRequires: pkgconfig(libnl-route-3.0) valgrind-devel systemd systemd-devel @@ -137,6 +138,8 @@ Provides: libocrdma = %{version}-%{release} Obsoletes: libocrdma < %{version}-%{release} Provides: librxe = %{version}-%{release} Obsoletes: librxe < %{version}-%{release} +Provides: libxscale = %{version}-%{release} +Obsoletes: libxscale < %{version}-%{release} Obsoletes: srptools <= 1.0.3 Provides: srptools = %{version}-%{release} Obsoletes: openib-srptools <= 0.0.6 @@ -312,6 +315,7 @@ fi %{_libdir}/libibverbs/*.so %{_libdir}/libmlx5.so.* %{_libdir}/libmlx4.so.* +%{_libdir}/libxscale.so.* %{_libdir}/ibacm/* %{_libdir}/libibumad*.so.* %{_libdir}/librdmacm*.so.* @@ -345,6 +349,12 @@ fi %{_mandir}/* %changelog +* Tue Mar 4 2025 Xin Tian - 41.0-32 +- Type: requirement +- ID: NA +- SUG: NA +- DESC: Add Yunsilicon user space RDMA driver + * Thu Feb 27 2025 Xinghai Cen - 41.0-31 - Type: bugfix - ID: NA