From fb3d7a29c09325a069ab54b77c5268a2bac20e8b Mon Sep 17 00:00:00 2001 From: anolis-bot Date: Wed, 24 May 2023 11:22:22 +0800 Subject: [PATCH 1/3] update to rdma-core-44.0-2.el8.1 Signed-off-by: anolis-bot --- ...mparing-qp_state-for-iWARP-providers.patch | 63 - ...util-fix-overflow-in-remap_node_name.patch | 85 + ...-drop-unnecessary-nodedesc-local-cop.patch | 95 + ...tect-options-not-supported-in-RHEL-8.patch | 52 + ...le-providers-that-were-not-enabled-i.patch | 53 + ...-printing-a-possibly-non-NUL-termina.patch | 38 + ...Add-userspace-verbs-related-header-f.patch | 430 ----- ...a-Add-userspace-verbs-implementation.patch | 1656 ----------------- ...s-erdma-Add-the-main-module-of-erdma.patch | 326 ---- ...s-erdma-Add-to-the-build-environment.patch | 245 --- ...ot-perform-device-rename-on-OPA-devi.patch | 42 + ...ERNEL-as-default-interface-naming-co.patch | 0 dist | 1 + download | 2 +- rdma-core.spec | 113 +- 15 files changed, 414 insertions(+), 2787 deletions(-) delete mode 100644 0001-tests-Fix-comparing-qp_state-for-iWARP-providers.patch create mode 100644 0001-util-fix-overflow-in-remap_node_name.patch create mode 100644 0002-infiniband-diags-drop-unnecessary-nodedesc-local-cop.patch create mode 100644 0002-systemd-drop-Protect-options-not-supported-in-RHEL-8.patch create mode 100644 0003-CMakeLists-disable-providers-that-were-not-enabled-i.patch create mode 100644 0003-libibnetdisc-fix-printing-a-possibly-non-NUL-termina.patch delete mode 100644 1001-providers-erdma-Add-userspace-verbs-related-header-f.patch delete mode 100644 1002-providers-erdma-Add-userspace-verbs-implementation.patch delete mode 100644 1003-providers-erdma-Add-the-main-module-of-erdma.patch delete mode 100644 1004-providers-erdma-Add-to-the-build-environment.patch create mode 100644 9998-kernel-boot-Do-not-perform-device-rename-on-OPA-devi.patch rename udev-keep-NAME_KERNEL-as-default-interface-naming-co.patch => 9999-udev-keep-NAME_KERNEL-as-default-interface-naming-co.patch (100%) create mode 100644 dist diff --git a/0001-tests-Fix-comparing-qp_state-for-iWARP-providers.patch b/0001-tests-Fix-comparing-qp_state-for-iWARP-providers.patch deleted file mode 100644 index 686531e..0000000 --- a/0001-tests-Fix-comparing-qp_state-for-iWARP-providers.patch +++ /dev/null @@ -1,63 +0,0 @@ -From 698f2ae804767635342694d31d9590fe6ad2217e Mon Sep 17 00:00:00 2001 -From: Kamal Heib -Date: Wed, 8 Dec 2021 16:12:11 +0200 -Subject: [PATCH] tests: Fix comparing qp_state for iWARP providers. - -The initial QP state for iWARP providers is IBV_QPS_INIT (not -IBV_QPS_RESET), Change the test to handle this case. - -Signed-off-by: Kamal Heib ---- - tests/test_qp.py | 16 ++++++++++++++-- - 1 file changed, 14 insertions(+), 2 deletions(-) - -diff --git a/tests/test_qp.py b/tests/test_qp.py -index b952c95e..a66f724f 100644 ---- a/tests/test_qp.py -+++ b/tests/test_qp.py -@@ -14,6 +14,7 @@ from pyverbs.pyverbs_error import PyverbsRDMAError - from pyverbs.qp import QPInitAttr, QPAttr, QP - from tests.base import PyverbsAPITestCase - import pyverbs.utils as pu -+import pyverbs.device as d - import pyverbs.enums as e - from pyverbs.pd import PD - from pyverbs.cq import CQ -@@ -177,6 +178,11 @@ class QPTest(PyverbsAPITestCase): - self.assertLessEqual(orig_cap.max_recv_sge, init_attr.cap.max_recv_sge) - self.assertLessEqual(orig_cap.max_inline_data, init_attr.cap.max_inline_data) - -+ def get_node_type(self): -+ for dev in d.get_device_list(): -+ if dev.name.decode() == self.ctx.name: -+ return dev.node_type -+ - def query_qp_common_test(self, qp_type): - with PD(self.ctx) as pd: - with CQ(self.ctx, 100, None, None, 0) as cq: -@@ -190,14 +196,20 @@ class QPTest(PyverbsAPITestCase): - caps = qia.cap - qp = self.create_qp(pd, qia, False, False, self.ib_port) - qp_attr, qp_init_attr = qp.query(e.IBV_QP_STATE | e.IBV_QP_CAP) -- self.verify_qp_attrs(caps, e.IBV_QPS_RESET, qp_init_attr, qp_attr) -+ if self.get_node_type() == e.IBV_NODE_RNIC: -+ self.verify_qp_attrs(caps, e.IBV_QPS_INIT, qp_init_attr, qp_attr) -+ else: -+ self.verify_qp_attrs(caps, e.IBV_QPS_RESET, qp_init_attr, qp_attr) - - # Extended QP - qia = get_qp_init_attr_ex(cq, pd, self.attr, self.attr_ex, qp_type) - caps = qia.cap # Save them to verify values later - qp = self.create_qp(self.ctx, qia, True, False, self.ib_port) - qp_attr, qp_init_attr = qp.query(e.IBV_QP_STATE | e.IBV_QP_CAP) -- self.verify_qp_attrs(caps, e.IBV_QPS_RESET, qp_init_attr, qp_attr) -+ if self.get_node_type() == e.IBV_NODE_RNIC: -+ self.verify_qp_attrs(caps, e.IBV_QPS_INIT, qp_init_attr, qp_attr) -+ else: -+ self.verify_qp_attrs(caps, e.IBV_QPS_RESET, qp_init_attr, qp_attr) - - def test_query_rc_qp(self): - """ --- -2.33.1 - diff --git a/0001-util-fix-overflow-in-remap_node_name.patch b/0001-util-fix-overflow-in-remap_node_name.patch new file mode 100644 index 0000000..860c153 --- /dev/null +++ b/0001-util-fix-overflow-in-remap_node_name.patch @@ -0,0 +1,85 @@ +From 5075b961a29ff9c418e1fefe78432e95dd0a5fcc Mon Sep 17 00:00:00 2001 +From: Michal Schmidt +Date: Wed, 1 Feb 2023 22:41:06 +0100 +Subject: [PATCH 1/3] util: fix overflow in remap_node_name() + +The function remap_node_name() assumes the parameter 'nodedesc' is at +least IB_SMP_DATA_SIZE + 1 (i.e. 65) bytes long, because it passes it to +clean_nodedesc() that writes a nul-terminator to it at offset +IB_SMP_DATA_SIZE. Callers in infiniband-diags/saquery.c pass +a (struct ib_node_desc_t).description as the argument, which is only +IB_NODE_DESCRIPTION_SIZE (i.e. 64) bytes long. This is an overflow. + +An odd thing about remap_node_name() is that it may (but does not +always) rewrite the nodedesc in-place. Callers do not appear to +appreciate this behavior. Most of them are various print_* and dump_* +functions where rewriting the input makes no sense. Some callers make a +local copy of the nodedesc first, possibly to protect the original. +One caller (infiniband-diags/saquery.c:print_node_records()) checks if +either the original description or the remapped one matches a given +requested_name - so it looks like it prefers the original to be +not rewritten. + +Let's make remap_node_name() a bit safer and more convenient to use. +Allocate a fixed-sized copy first. Then use strncpy to copy from +'nodedesc', never reading more than IB_SMP_DATA_SIZE (64) bytes. +Apply clean_nodedesc() on the correctly-sized copy. This solves the +overflow bug. Also, the in-place rewrite of 'nodedesc' is gone and it +can become a (const char*). + +The overflow was found by a static checker (covscan). + +Fixes: d974c4e398d2 ("Fix max length of node description (ibnetdiscover and smpquery)") +Signed-off-by: Michal Schmidt +--- + util/node_name_map.c | 12 +++++++++--- + util/node_name_map.h | 3 +-- + 2 files changed, 10 insertions(+), 5 deletions(-) + +diff --git a/util/node_name_map.c b/util/node_name_map.c +index 30b73eb1448e..511cb92ef19c 100644 +--- a/util/node_name_map.c ++++ b/util/node_name_map.c +@@ -95,7 +95,7 @@ void close_node_name_map(nn_map_t * map) + free(map); + } + +-char *remap_node_name(nn_map_t * map, uint64_t target_guid, char *nodedesc) ++char *remap_node_name(nn_map_t * map, uint64_t target_guid, const char *nodedesc) + { + char *rc = NULL; + name_map_item_t *item = NULL; +@@ -108,8 +108,14 @@ char *remap_node_name(nn_map_t * map, uint64_t target_guid, char *nodedesc) + rc = strdup(item->name); + + done: +- if (rc == NULL) +- rc = strdup(clean_nodedesc(nodedesc)); ++ if (rc == NULL) { ++ rc = malloc(IB_SMP_DATA_SIZE + 1); ++ if (rc) { ++ strncpy(rc, nodedesc, IB_SMP_DATA_SIZE); ++ rc[IB_SMP_DATA_SIZE] = '\0'; ++ clean_nodedesc(rc); ++ } ++ } + return (rc); + } + +diff --git a/util/node_name_map.h b/util/node_name_map.h +index e78d274b116e..d83d672782c4 100644 +--- a/util/node_name_map.h ++++ b/util/node_name_map.h +@@ -12,8 +12,7 @@ typedef struct nn_map nn_map_t; + + nn_map_t *open_node_name_map(const char *node_name_map); + void close_node_name_map(nn_map_t *map); +-/* NOTE: parameter "nodedesc" may be modified here. */ +-char *remap_node_name(nn_map_t *map, uint64_t target_guid, char *nodedesc); ++char *remap_node_name(nn_map_t *map, uint64_t target_guid, const char *nodedesc); + char *clean_nodedesc(char *nodedesc); + + #endif +-- +2.39.1 + diff --git a/0002-infiniband-diags-drop-unnecessary-nodedesc-local-cop.patch b/0002-infiniband-diags-drop-unnecessary-nodedesc-local-cop.patch new file mode 100644 index 0000000..7927ba6 --- /dev/null +++ b/0002-infiniband-diags-drop-unnecessary-nodedesc-local-cop.patch @@ -0,0 +1,95 @@ +From d5723a0f69577fd3022024ca17c27e273a29695b Mon Sep 17 00:00:00 2001 +From: Michal Schmidt +Date: Wed, 1 Feb 2023 22:41:16 +0100 +Subject: [PATCH 2/3] infiniband-diags: drop unnecessary nodedesc local copies + +Now that remap_node_name() never rewrites nodedesc in-place, some +copying can be avoided. + +Signed-off-by: Michal Schmidt +--- + infiniband-diags/dump_fts.c | 14 +++----------- + 1 file changed, 3 insertions(+), 11 deletions(-) + +diff --git a/infiniband-diags/dump_fts.c b/infiniband-diags/dump_fts.c +index ce6bfb9ecc33..acef9efe692d 100644 +--- a/infiniband-diags/dump_fts.c ++++ b/infiniband-diags/dump_fts.c +@@ -109,7 +109,6 @@ static void dump_multicast_tables(ibnd_node_t *node, unsigned startl, + unsigned endl, struct ibmad_port *mad_port) + { + ib_portid_t *portid = &node->path_portid; +- char nd[IB_SMP_DATA_SIZE + 1] = { 0 }; + char str[512]; + char *s; + uint64_t nodeguid; +@@ -119,7 +118,6 @@ static void dump_multicast_tables(ibnd_node_t *node, unsigned startl, + char *mapnd = NULL; + int n = 0; + +- memcpy(nd, node->nodedesc, strlen(node->nodedesc)); + nports = node->numports; + nodeguid = node->guid; + +@@ -149,7 +147,7 @@ static void dump_multicast_tables(ibnd_node_t *node, unsigned startl, + endl = IB_MAX_MCAST_LID; + } + +- mapnd = remap_node_name(node_name_map, nodeguid, nd); ++ mapnd = remap_node_name(node_name_map, nodeguid, node->nodedesc); + + printf("Multicast mlids [0x%x-0x%x] of switch %s guid 0x%016" PRIx64 + " (%s):\n", startl, endl, portid2str(portid), nodeguid, +@@ -224,8 +222,6 @@ static int dump_lid(char *str, int str_len, int lid, int valid, + ibnd_fabric_t *fabric, int *last_port_lid, + int *base_port_lid, uint64_t *portguid) + { +- char nd[IB_SMP_DATA_SIZE + 1] = { 0 }; +- + ibnd_port_t *port = NULL; + + char ntype[50], sguid[30]; +@@ -276,14 +272,12 @@ static int dump_lid(char *str, int str_len, int lid, int valid, + baselid = port->base_lid; + lmc = port->lmc; + +- memcpy(nd, port->node->nodedesc, strlen(port->node->nodedesc)); +- + if (lmc > 0) { + *base_port_lid = baselid; + *last_port_lid = baselid + (1 << lmc) - 1; + } + +- mapnd = remap_node_name(node_name_map, nodeguid, nd); ++ mapnd = remap_node_name(node_name_map, nodeguid, port->node->nodedesc); + + rc = snprintf(str, str_len, ": (%s portguid %s: '%s')", + mad_dump_val(IB_NODE_TYPE_F, ntype, sizeof ntype, +@@ -302,7 +296,6 @@ static void dump_unicast_tables(ibnd_node_t *node, int startl, int endl, + { + ib_portid_t * portid = &node->path_portid; + char lft[IB_SMP_DATA_SIZE] = { 0 }; +- char nd[IB_SMP_DATA_SIZE + 1] = { 0 }; + char str[200]; + uint64_t nodeguid; + int block, i, e, top; +@@ -315,7 +308,6 @@ static void dump_unicast_tables(ibnd_node_t *node, int startl, int endl, + mad_decode_field(node->switchinfo, IB_SW_LINEAR_FDB_TOP_F, &top); + nodeguid = node->guid; + nports = node->numports; +- memcpy(nd, node->nodedesc, strlen(node->nodedesc)); + + if (!endl || endl > top) + endl = top; +@@ -326,7 +318,7 @@ static void dump_unicast_tables(ibnd_node_t *node, int startl, int endl, + endl = IB_MAX_UCAST_LID; + } + +- mapnd = remap_node_name(node_name_map, nodeguid, nd); ++ mapnd = remap_node_name(node_name_map, nodeguid, node->nodedesc); + + printf("Unicast lids [0x%x-0x%x] of switch %s guid 0x%016" PRIx64 + " (%s):\n", startl, endl, portid2str(portid), nodeguid, +-- +2.39.1 + diff --git a/0002-systemd-drop-Protect-options-not-supported-in-RHEL-8.patch b/0002-systemd-drop-Protect-options-not-supported-in-RHEL-8.patch new file mode 100644 index 0000000..ced3d13 --- /dev/null +++ b/0002-systemd-drop-Protect-options-not-supported-in-RHEL-8.patch @@ -0,0 +1,52 @@ +From 30f781482122db14f7bf89cb31db1c6aba30bba8 Mon Sep 17 00:00:00 2001 +From: Michal Schmidt +Date: Wed, 8 Feb 2023 15:19:17 +0100 +Subject: [PATCH 2/2] systemd: drop Protect* options not supported in RHEL 8 + systemd + +Signed-off-by: Michal Schmidt +--- + ibacm/ibacm.service.in | 2 -- + iwpmd/iwpmd.service.in | 2 -- + rdma-ndd/rdma-ndd.service.in | 1 - + 3 files changed, 5 deletions(-) + +diff --git a/ibacm/ibacm.service.in b/ibacm/ibacm.service.in +index 56538beb2b15..535e1aeae326 100644 +--- a/ibacm/ibacm.service.in ++++ b/ibacm/ibacm.service.in +@@ -19,8 +19,6 @@ Type=notify + ExecStart=@CMAKE_INSTALL_FULL_SBINDIR@/ibacm --systemd + ProtectSystem=full + ProtectHome=true +-ProtectHostname=true +-ProtectKernelLogs=true + + [Install] + Also=ibacm.socket +diff --git a/iwpmd/iwpmd.service.in b/iwpmd/iwpmd.service.in +index 47afebd8ad02..dd4dc750c26c 100644 +--- a/iwpmd/iwpmd.service.in ++++ b/iwpmd/iwpmd.service.in +@@ -26,7 +26,5 @@ ExecStart=@CMAKE_INSTALL_FULL_SBINDIR@/iwpmd --systemd + LimitNOFILE=102400 + ProtectSystem=full + ProtectHome=true +-ProtectHostname=true +-ProtectKernelLogs=true + + # iwpmd is automatically wanted by udev when an iWarp RDMA device is present +diff --git a/rdma-ndd/rdma-ndd.service.in b/rdma-ndd/rdma-ndd.service.in +index 368deec00b2c..9cbe93fb6900 100644 +--- a/rdma-ndd/rdma-ndd.service.in ++++ b/rdma-ndd/rdma-ndd.service.in +@@ -22,6 +22,5 @@ Restart=always + ExecStart=@CMAKE_INSTALL_FULL_SBINDIR@/rdma-ndd --systemd + ProtectSystem=full + ProtectHome=true +-ProtectKernelLogs=true + + # rdma-ndd is automatically wanted by udev when an RDMA device with a node description is present +-- +2.39.1 + diff --git a/0003-CMakeLists-disable-providers-that-were-not-enabled-i.patch b/0003-CMakeLists-disable-providers-that-were-not-enabled-i.patch new file mode 100644 index 0000000..84f1b16 --- /dev/null +++ b/0003-CMakeLists-disable-providers-that-were-not-enabled-i.patch @@ -0,0 +1,53 @@ +From eff6b07e92a1674818c5d8c9993651dbbeabccf4 Mon Sep 17 00:00:00 2001 +From: Michal Schmidt +Date: Wed, 1 Feb 2023 15:24:23 +0100 +Subject: [PATCH 3/5] CMakeLists: disable providers that were not enabled in + RHEL 9.1 + +Doing a package rebase, but don't want to enable additional drivers +unless explicitly requested. + +Upstream Status: RHEL only + +Signed-off-by: Michal Schmidt +--- + CMakeLists.txt | 12 ++++++------ + 1 file changed, 6 insertions(+), 6 deletions(-) + +diff --git a/CMakeLists.txt b/CMakeLists.txt +index bac10516bb85..b7eca65f0fe2 100644 +--- a/CMakeLists.txt ++++ b/CMakeLists.txt +@@ -711,23 +711,23 @@ add_subdirectory(providers/bnxt_re) + add_subdirectory(providers/cxgb4) # NO SPARSE + add_subdirectory(providers/efa) + add_subdirectory(providers/efa/man) +-add_subdirectory(providers/erdma) ++#add_subdirectory(providers/erdma) + add_subdirectory(providers/hns) + add_subdirectory(providers/irdma) +-add_subdirectory(providers/mana) +-add_subdirectory(providers/mana/man) ++#add_subdirectory(providers/mana) ++#add_subdirectory(providers/mana/man) + add_subdirectory(providers/mlx4) + add_subdirectory(providers/mlx4/man) + add_subdirectory(providers/mlx5) + add_subdirectory(providers/mlx5/man) +-add_subdirectory(providers/mthca) +-add_subdirectory(providers/ocrdma) ++#add_subdirectory(providers/mthca) ++#add_subdirectory(providers/ocrdma) + add_subdirectory(providers/qedr) + add_subdirectory(providers/vmw_pvrdma) + endif() + + add_subdirectory(providers/hfi1verbs) +-add_subdirectory(providers/ipathverbs) ++#add_subdirectory(providers/ipathverbs) + add_subdirectory(providers/rxe) + add_subdirectory(providers/rxe/man) + add_subdirectory(providers/siw) +-- +2.39.1 + diff --git a/0003-libibnetdisc-fix-printing-a-possibly-non-NUL-termina.patch b/0003-libibnetdisc-fix-printing-a-possibly-non-NUL-termina.patch new file mode 100644 index 0000000..2a5cc84 --- /dev/null +++ b/0003-libibnetdisc-fix-printing-a-possibly-non-NUL-termina.patch @@ -0,0 +1,38 @@ +From 45fcc7ad41216a93bafb452f7d7a4507d52722cd Mon Sep 17 00:00:00 2001 +From: Michal Schmidt +Date: Wed, 1 Feb 2023 23:30:52 +0100 +Subject: [PATCH 3/3] libibnetdisc: fix printing a possibly non-NUL-terminated + string + +Found by a static check (covscan). + +Fixes: d974c4e398d2 ("Fix max length of node description (ibnetdiscover and smpquery)") +Signed-off-by: Michal Schmidt +--- + libibnetdisc/chassis.c | 3 ++- + 1 file changed, 2 insertions(+), 1 deletion(-) + +diff --git a/libibnetdisc/chassis.c b/libibnetdisc/chassis.c +index a3ec1d82807c..bc1a8aff8acb 100644 +--- a/libibnetdisc/chassis.c ++++ b/libibnetdisc/chassis.c +@@ -597,7 +597,7 @@ static int fill_mellanox_chassis_record(ibnd_node_t * node) + int p = 0; + ibnd_port_t *port; + +- char node_desc[IB_SMP_DATA_SIZE]; ++ char node_desc[IB_SMP_DATA_SIZE + 1]; + char *system_name; + char *system_type; + char *system_slot_name; +@@ -617,6 +617,7 @@ static int fill_mellanox_chassis_record(ibnd_node_t * node) + */ + + memcpy(node_desc, node->nodedesc, IB_SMP_DATA_SIZE); ++ node_desc[IB_SMP_DATA_SIZE] = '\0'; + + IBND_DEBUG("fill_mellanox_chassis_record: node_desc:%s \n",node_desc); + +-- +2.39.1 + diff --git a/1001-providers-erdma-Add-userspace-verbs-related-header-f.patch b/1001-providers-erdma-Add-userspace-verbs-related-header-f.patch deleted file mode 100644 index aee39d0..0000000 --- a/1001-providers-erdma-Add-userspace-verbs-related-header-f.patch +++ /dev/null @@ -1,430 +0,0 @@ -From b8fdbf0cd29d6630ad53e78ec4724bb85f19611d Mon Sep 17 00:00:00 2001 -Message-Id: -From: Cheng Xu -Date: Thu, 1 Dec 2022 15:38:34 +0800 -Subject: [PATCH 1/4] providers/erdma: Add userspace verbs related header - files. - -Add the userspace verbs implementation related header files: 'erdma_hw.h' -for hardware interface definitions, 'erdma_verbs.h' for verbs related -definitions and 'erdma_db.h' for doorbell records related definitions. - -Signed-off-by: Cheng Xu ---- - providers/erdma/erdma_db.h | 17 +++ - providers/erdma/erdma_hw.h | 218 ++++++++++++++++++++++++++++++++++ - providers/erdma/erdma_verbs.h | 153 ++++++++++++++++++++++++ - 3 files changed, 388 insertions(+) - create mode 100644 providers/erdma/erdma_db.h - create mode 100644 providers/erdma/erdma_hw.h - create mode 100644 providers/erdma/erdma_verbs.h - -diff --git a/providers/erdma/erdma_db.h b/providers/erdma/erdma_db.h -new file mode 100644 -index 000000000000..c302cb7ab154 ---- /dev/null -+++ b/providers/erdma/erdma_db.h -@@ -0,0 +1,17 @@ -+/* SPDX-License-Identifier: GPL-2.0 or OpenIB.org BSD (MIT) See COPYING file */ -+/* -+ * Authors: Cheng Xu -+ * Copyright (c) 2020-2021, Alibaba Group. -+ */ -+ -+#ifndef __ERDMA_DB_H__ -+#define __ERDMA_DB_H__ -+ -+#include -+ -+#include "erdma.h" -+ -+uint64_t *erdma_alloc_dbrecords(struct erdma_context *ctx); -+void erdma_dealloc_dbrecords(struct erdma_context *ctx, uint64_t *dbrecords); -+ -+#endif -diff --git a/providers/erdma/erdma_hw.h b/providers/erdma/erdma_hw.h -new file mode 100644 -index 000000000000..7f567076a549 ---- /dev/null -+++ b/providers/erdma/erdma_hw.h -@@ -0,0 +1,218 @@ -+/* SPDX-License-Identifier: GPL-2.0 or OpenIB.org BSD (MIT) See COPYING file */ -+/* -+ * Authors: Cheng Xu -+ * Copyright (c) 2020-2021, Alibaba Group. -+ */ -+ -+#ifndef __ERDMA_HW_H__ -+#define __ERDMA_HW_H__ -+ -+#include -+ -+#define ERDMA_SDB_PAGE 0 -+#define ERDMA_SDB_ENTRY 1 -+#define ERDMA_SDB_SHARED 2 -+ -+#define ERDMA_NSDB_PER_ENTRY 2 -+#define ERDMA_SDB_ALLOC_QPN_MASK 0x1f -+#define ERDMA_RDB_ALLOC_QPN_MASK 0x7f -+ -+#define ERDMA_SQDB_SIZE 128 -+#define ERDMA_CQDB_SIZE 8 -+#define ERDMA_RQDB_SIZE 8 -+#define ERDMA_RQDB_SPACE_SIZE 32 -+ -+/* WQE related. */ -+#define EQE_SIZE 16 -+#define EQE_SHIFT 4 -+#define RQE_SIZE 32 -+#define RQE_SHIFT 5 -+#define CQE_SIZE 32 -+#define CQE_SHIFT 5 -+#define SQEBB_SIZE 32 -+#define SQEBB_SHIFT 5 -+#define SQEBB_MASK (~(SQEBB_SIZE - 1)) -+#define SQEBB_ALIGN(size) ((size + SQEBB_SIZE - 1) & SQEBB_MASK) -+#define SQEBB_COUNT(size) (SQEBB_ALIGN(size) >> SQEBB_SHIFT) -+ -+#define MAX_WQEBB_PER_SQE 4 -+ -+enum erdma_opcode { -+ ERDMA_OP_WRITE = 0, -+ ERDMA_OP_READ = 1, -+ ERDMA_OP_SEND = 2, -+ ERDMA_OP_SEND_WITH_IMM = 3, -+ -+ ERDMA_OP_RECEIVE = 4, -+ ERDMA_OP_RECV_IMM = 5, -+ ERDMA_OP_RECV_INV = 6, -+ -+ ERDMA_OP_REQ_ERR = 7, -+ ERDNA_OP_READ_RESPONSE = 8, -+ ERDMA_OP_WRITE_WITH_IMM = 9, -+ -+ ERDMA_OP_RECV_ERR = 10, -+ -+ ERDMA_OP_INVALIDATE = 11, -+ ERDMA_OP_RSP_SEND_IMM = 12, -+ ERDMA_OP_SEND_WITH_INV = 13, -+ -+ ERDMA_OP_REG_MR = 14, -+ ERDMA_OP_LOCAL_INV = 15, -+ ERDMA_OP_READ_WITH_INV = 16, -+ ERDMA_OP_ATOMIC_CAS = 17, -+ ERDMA_OP_ATOMIC_FAD = 18, -+ ERDMA_NUM_OPCODES = 19, -+ ERDMA_OP_INVALID = ERDMA_NUM_OPCODES + 1 -+}; -+ -+/* -+ * Inline data are kept within the work request itself occupying -+ * the space of sge[1] .. sge[n]. Therefore, inline data cannot be -+ * supported if ERDMA_MAX_SGE is below 2 elements. -+ */ -+#define ERDMA_MAX_INLINE (sizeof(struct erdma_sge) * (ERDMA_MAX_SEND_SGE)) -+ -+enum erdma_wc_status { -+ ERDMA_WC_SUCCESS = 0, -+ ERDMA_WC_GENERAL_ERR = 1, -+ ERDMA_WC_RECV_WQE_FORMAT_ERR = 2, -+ ERDMA_WC_RECV_STAG_INVALID_ERR = 3, -+ ERDMA_WC_RECV_ADDR_VIOLATION_ERR = 4, -+ ERDMA_WC_RECV_RIGHT_VIOLATION_ERR = 5, -+ ERDMA_WC_RECV_PDID_ERR = 6, -+ ERDMA_WC_RECV_WARRPING_ERR = 7, -+ ERDMA_WC_SEND_WQE_FORMAT_ERR = 8, -+ ERDMA_WC_SEND_WQE_ORD_EXCEED = 9, -+ ERDMA_WC_SEND_STAG_INVALID_ERR = 10, -+ ERDMA_WC_SEND_ADDR_VIOLATION_ERR = 11, -+ ERDMA_WC_SEND_RIGHT_VIOLATION_ERR = 12, -+ ERDMA_WC_SEND_PDID_ERR = 13, -+ ERDMA_WC_SEND_WARRPING_ERR = 14, -+ ERDMA_WC_FLUSH_ERR = 15, -+ ERDMA_WC_RETRY_EXC_ERR = 16, -+ ERDMA_NUM_WC_STATUS -+}; -+ -+enum erdma_vendor_err { -+ ERDMA_WC_VENDOR_NO_ERR = 0, -+ ERDMA_WC_VENDOR_INVALID_RQE = 1, -+ ERDMA_WC_VENDOR_RQE_INVALID_STAG = 2, -+ ERDMA_WC_VENDOR_RQE_ADDR_VIOLATION = 3, -+ ERDMA_WC_VENDOR_RQE_ACCESS_RIGHT_ERR = 4, -+ ERDMA_WC_VENDOR_RQE_INVALID_PD = 5, -+ ERDMA_WC_VENDOR_RQE_WRAP_ERR = 6, -+ ERDMA_WC_VENDOR_INVALID_SQE = 0x20, -+ ERDMA_WC_VENDOR_ZERO_ORD = 0x21, -+ ERDMA_WC_VENDOR_SQE_INVALID_STAG = 0x30, -+ ERDMA_WC_VENDOR_SQE_ADDR_VIOLATION = 0x31, -+ ERDMA_WC_VENDOR_SQE_ACCESS_ERR = 0x32, -+ ERDMA_WC_VENDOR_SQE_INVALID_PD = 0x33, -+ ERDMA_WC_VENDOR_SQE_WARP_ERR = 0x34 -+}; -+ -+/* Doorbell related. */ -+#define ERDMA_CQDB_IDX_MASK GENMASK_ULL(63, 56) -+#define ERDMA_CQDB_CQN_MASK GENMASK_ULL(55, 32) -+#define ERDMA_CQDB_ARM_MASK BIT_ULL(31) -+#define ERDMA_CQDB_SOL_MASK BIT_ULL(30) -+#define ERDMA_CQDB_CMDSN_MASK GENMASK_ULL(29, 28) -+#define ERDMA_CQDB_CI_MASK GENMASK_ULL(23, 0) -+ -+#define ERDMA_CQE_QTYPE_SQ 0 -+#define ERDMA_CQE_QTYPE_RQ 1 -+#define ERDMA_CQE_QTYPE_CMDQ 2 -+ -+/* CQE hdr */ -+#define ERDMA_CQE_HDR_OWNER_MASK BIT(31) -+#define ERDMA_CQE_HDR_OPCODE_MASK GENMASK(23, 16) -+#define ERDMA_CQE_HDR_QTYPE_MASK GENMASK(15, 8) -+#define ERDMA_CQE_HDR_SYNDROME_MASK GENMASK(7, 0) -+ -+struct erdma_cqe { -+ __be32 hdr; -+ __be32 qe_idx; -+ __be32 qpn; -+ __le32 imm_data; -+ __be32 size; -+ __be32 rsvd[3]; -+}; -+ -+struct erdma_sge { -+ __aligned_le64 laddr; -+ __le32 length; -+ __le32 lkey; -+}; -+ -+/* Receive Queue Element */ -+struct erdma_rqe { -+ __le16 qe_idx; -+ __le16 rsvd; -+ __le32 qpn; -+ __le32 rsvd2; -+ __le32 rsvd3; -+ __le64 to; -+ __le32 length; -+ __le32 stag; -+}; -+ -+/* SQE */ -+#define ERDMA_SQE_HDR_SGL_LEN_MASK GENMASK_ULL(63, 56) -+#define ERDMA_SQE_HDR_WQEBB_CNT_MASK GENMASK_ULL(54, 52) -+#define ERDMA_SQE_HDR_QPN_MASK GENMASK_ULL(51, 32) -+#define ERDMA_SQE_HDR_OPCODE_MASK GENMASK_ULL(31, 27) -+#define ERDMA_SQE_HDR_DWQE_MASK BIT_ULL(26) -+#define ERDMA_SQE_HDR_INLINE_MASK BIT_ULL(25) -+#define ERDMA_SQE_HDR_FENCE_MASK BIT_ULL(24) -+#define ERDMA_SQE_HDR_SE_MASK BIT_ULL(23) -+#define ERDMA_SQE_HDR_CE_MASK BIT_ULL(22) -+#define ERDMA_SQE_HDR_WQEBB_INDEX_MASK GENMASK_ULL(15, 0) -+ -+struct erdma_write_sqe { -+ __le64 hdr; -+ __be32 imm_data; -+ __le32 length; -+ -+ __le32 sink_stag; -+ /* avoid sink_to not 8-byte aligned. */ -+ __le32 sink_to_low; -+ __le32 sink_to_high; -+ -+ __le32 rsvd; -+ -+ struct erdma_sge sgl[0]; -+}; -+ -+struct erdma_send_sqe { -+ __le64 hdr; -+ union { -+ __be32 imm_data; -+ __le32 invalid_stag; -+ }; -+ __le32 length; -+ struct erdma_sge sgl[0]; -+}; -+ -+struct erdma_readreq_sqe { -+ __le64 hdr; -+ __le32 invalid_stag; -+ __le32 length; -+ __le32 sink_stag; -+ /* avoid sink_to not 8-byte aligned. */ -+ __le32 sink_to_low; -+ __le32 sink_to_high; -+ __le32 rsvd0; -+ struct erdma_sge sgl; -+}; -+ -+struct erdma_atomic_sqe { -+ __le64 hdr; -+ __le64 rsvd; -+ __le64 fetchadd_swap_data; -+ __le64 cmp_data; -+ -+ struct erdma_sge remote; -+ struct erdma_sge sgl; -+}; -+ -+#endif -diff --git a/providers/erdma/erdma_verbs.h b/providers/erdma/erdma_verbs.h -new file mode 100644 -index 000000000000..d9820c5a20d5 ---- /dev/null -+++ b/providers/erdma/erdma_verbs.h -@@ -0,0 +1,153 @@ -+/* SPDX-License-Identifier: GPL-2.0 or OpenIB.org BSD (MIT) See COPYING file */ -+/* -+ * Authors: Cheng Xu -+ * Copyright (c) 2020-2021, Alibaba Group. -+ */ -+ -+#ifndef __ERDMA_VERBS_H__ -+#define __ERDMA_VERBS_H__ -+ -+#include -+#include -+#include -+ -+#include "erdma.h" -+#include "erdma_hw.h" -+ -+#define ERDMA_MAX_SEND_SGE 6 -+#define ERDMA_MAX_RECV_SGE 1 -+ -+struct erdma_queue { -+ void *qbuf; -+ void *db; -+ -+ uint16_t rsvd0; -+ uint16_t depth; -+ uint32_t size; -+ -+ uint16_t pi; -+ uint16_t ci; -+ -+ uint32_t rsvd1; -+ uint64_t *wr_tbl; -+ -+ void *db_record; -+}; -+ -+struct erdma_qp { -+ struct verbs_qp verbs_qp; -+ struct erdma_device *erdma_dev; -+ -+ uint32_t id; /* qpn */ -+ -+ pthread_spinlock_t sq_lock; -+ pthread_spinlock_t rq_lock; -+ -+ int sq_sig_all; -+ int disable_dwqe; -+ pthread_spinlock_t *sdb_lock; -+ -+ struct erdma_queue sq; -+ struct erdma_queue rq; -+ -+ void *qbuf; -+ size_t qbuf_size; -+ uint64_t *db_records; -+ -+ void *cur_sqe; -+ uint16_t cur_pi; -+ uint16_t sq_pi_rb; -+ uint32_t sgl_off; /* SGL offset in wqebb. */ -+ __le32 *length_field; /* The total length field in SQE header */ -+ int err; -+}; -+ -+struct erdma_cq { -+ struct ibv_cq base_cq; -+ struct erdma_device *erdma_dev; -+ uint32_t id; -+ -+ uint32_t event_stats; -+ -+ uint32_t depth; -+ uint32_t ci; -+ struct erdma_cqe *queue; -+ -+ void *db; -+ uint16_t db_offset; -+ -+ void *db_record; -+ uint32_t cmdsn; -+ uint32_t comp_vector; -+ uint32_t db_index; -+ -+ pthread_spinlock_t lock; -+}; -+ -+static inline struct erdma_cq *to_ecq(struct ibv_cq *base) -+{ -+ return container_of(base, struct erdma_cq, base_cq); -+} -+ -+static inline struct erdma_qp *to_eqp(struct ibv_qp *ibqp) -+{ -+ return container_of(ibqp, struct erdma_qp, verbs_qp.qp); -+} -+ -+static inline void *get_sq_wqebb(struct erdma_qp *qp, uint16_t idx) -+{ -+ idx &= (qp->sq.depth - 1); -+ return qp->sq.qbuf + (idx << SQEBB_SHIFT); -+} -+ -+static inline void __kick_sq_db(struct erdma_qp *qp, uint16_t pi) -+{ -+ uint64_t db_data; -+ -+ db_data = FIELD_PREP(ERDMA_SQE_HDR_QPN_MASK, qp->id) | -+ FIELD_PREP(ERDMA_SQE_HDR_WQEBB_INDEX_MASK, pi); -+ -+ *(__le64 *)qp->sq.db_record = htole64(db_data); -+ udma_to_device_barrier(); -+ mmio_write64_le(qp->sq.db, htole64(db_data)); -+} -+ -+struct ibv_pd *erdma_alloc_pd(struct ibv_context *ctx); -+int erdma_free_pd(struct ibv_pd *pd); -+ -+int erdma_query_device(struct ibv_context *ctx, -+ const struct ibv_query_device_ex_input *input, -+ struct ibv_device_attr_ex *attr, size_t attr_size); -+int erdma_query_port(struct ibv_context *ctx, uint8_t port, -+ struct ibv_port_attr *attr); -+ -+struct ibv_mr *erdma_reg_mr(struct ibv_pd *pd, void *addr, size_t len, -+ uint64_t hca_va, int access); -+int erdma_dereg_mr(struct verbs_mr *vmr); -+ -+struct ibv_qp *erdma_create_qp(struct ibv_pd *pd, -+ struct ibv_qp_init_attr *attr); -+struct ibv_qp *erdma_create_qp_ex(struct ibv_context *context, -+ struct ibv_qp_init_attr_ex *attr); -+int erdma_modify_qp(struct ibv_qp *base_qp, struct ibv_qp_attr *attr, -+ int attr_mask); -+int erdma_query_qp(struct ibv_qp *qp, struct ibv_qp_attr *attr, int attr_mask, -+ struct ibv_qp_init_attr *init_attr); -+int erdma_post_send(struct ibv_qp *base_qp, struct ibv_send_wr *wr, -+ struct ibv_send_wr **bad_wr); -+int erdma_post_recv(struct ibv_qp *base_qp, struct ibv_recv_wr *wr, -+ struct ibv_recv_wr **bad_wr); -+int erdma_destroy_qp(struct ibv_qp *base_qp); -+ -+void erdma_free_context(struct ibv_context *ibv_ctx); -+ -+struct ibv_cq *erdma_create_cq(struct ibv_context *ctx, int num_cqe, -+ struct ibv_comp_channel *channel, -+ int comp_vector); -+int erdma_destroy_cq(struct ibv_cq *base_cq); -+int erdma_notify_cq(struct ibv_cq *ibcq, int solicited); -+void erdma_cq_event(struct ibv_cq *ibcq); -+int erdma_poll_cq(struct ibv_cq *ibcq, int num_entries, struct ibv_wc *wc); -+ -+ -+#endif --- -2.37.0 - diff --git a/1002-providers-erdma-Add-userspace-verbs-implementation.patch b/1002-providers-erdma-Add-userspace-verbs-implementation.patch deleted file mode 100644 index 5278525..0000000 --- a/1002-providers-erdma-Add-userspace-verbs-implementation.patch +++ /dev/null @@ -1,1656 +0,0 @@ -From 08f8e55dde433e81ce241003835cdb29dd2708cf Mon Sep 17 00:00:00 2001 -Message-Id: <08f8e55dde433e81ce241003835cdb29dd2708cf.1669880730.git.chengyou@linux.alibaba.com> -In-Reply-To: -References: -From: Cheng Xu -Date: Thu, 1 Dec 2022 15:39:02 +0800 -Subject: [PATCH 2/4] providers/erdma: Add userspace verbs implementation - -Implementation of the erdma's 'struct verbs_context_ops' interface. -Due to doorbells may be drop by hardware in some situations, such as -hardware hot-upgrade, driver will keep the latest doorbell value of each -QP and CQ. So we introduce the doorbell records to store the latest -doorbell values also. - -Signed-off-by: Cheng Xu ---- - providers/erdma/erdma_db.c | 115 +++ - providers/erdma/erdma_verbs.c | 1504 +++++++++++++++++++++++++++++++++ - 2 files changed, 1619 insertions(+) - create mode 100644 providers/erdma/erdma_db.c - create mode 100644 providers/erdma/erdma_verbs.c - -diff --git a/providers/erdma/erdma_db.c b/providers/erdma/erdma_db.c -new file mode 100644 -index 000000000000..382262da5f48 ---- /dev/null -+++ b/providers/erdma/erdma_db.c -@@ -0,0 +1,115 @@ -+// SPDX-License-Identifier: GPL-2.0 or OpenIB.org BSD (MIT) See COPYING file -+ -+// Authors: Cheng Xu -+// Copyright (c) 2020-2021, Alibaba Group. -+ -+// Copyright (c) 2012 Mellanox Technologies, Inc. All rights reserved. -+ -+#define _GNU_SOURCE -+#include -+#include -+#include -+#include -+ -+#include "erdma.h" -+#include "erdma_db.h" -+ -+#define ERDMA_DBRECORDS_SIZE 16 -+ -+struct erdma_dbrecord_page { -+ struct erdma_dbrecord_page *prev, *next; -+ void *page_buf; -+ int cnt; -+ int used; -+ unsigned long free[0]; -+}; -+ -+uint64_t *erdma_alloc_dbrecords(struct erdma_context *ctx) -+{ -+ int bits_perlong = (8 * sizeof(unsigned long)); -+ struct erdma_dbrecord_page *page = NULL; -+ int dbrecords_per_page, nlongs = 0; -+ uint64_t *db_records = NULL; -+ int i, j, rv; -+ -+ pthread_mutex_lock(&ctx->dbrecord_pages_mutex); -+ -+ for (page = ctx->dbrecord_pages; page; page = page->next) -+ if (page->used < page->cnt) -+ goto found; -+ -+ dbrecords_per_page = ctx->page_size / ERDMA_DBRECORDS_SIZE; -+ nlongs = align(dbrecords_per_page, bits_perlong) / bits_perlong; -+ page = malloc(sizeof(*page) + nlongs * sizeof(unsigned long)); -+ if (!page) -+ goto out; -+ -+ rv = posix_memalign(&page->page_buf, ctx->page_size, ctx->page_size); -+ if (rv) { -+ free(page); -+ goto out; -+ } -+ -+ page->cnt = dbrecords_per_page; -+ page->used = 0; -+ for (i = 0; i < nlongs; i++) -+ page->free[i] = ~0UL; -+ -+ page->prev = NULL; -+ page->next = ctx->dbrecord_pages; -+ ctx->dbrecord_pages = page; -+ if (page->next) -+ page->next->prev = page; -+ -+found: -+ ++page->used; -+ -+ for (i = 0; !page->free[i]; ++i) -+ ; /* nothing */ -+ -+ j = ffsl(page->free[i]) - 1; -+ page->free[i] &= ~(1UL << j); -+ -+ db_records = -+ page->page_buf + (i * bits_perlong + j) * ERDMA_DBRECORDS_SIZE; -+ -+out: -+ pthread_mutex_unlock(&ctx->dbrecord_pages_mutex); -+ -+ return db_records; -+} -+ -+void erdma_dealloc_dbrecords(struct erdma_context *ctx, uint64_t *dbrecords) -+{ -+ struct erdma_dbrecord_page *page; -+ int page_mask = ~(ctx->page_size - 1); -+ int idx; -+ -+ pthread_mutex_lock(&ctx->dbrecord_pages_mutex); -+ for (page = ctx->dbrecord_pages; page; page = page->next) -+ if (((uintptr_t)dbrecords & page_mask) == -+ (uintptr_t)page->page_buf) -+ break; -+ -+ if (!page) -+ goto out; -+ -+ idx = ((void *)dbrecords - page->page_buf) / ERDMA_DBRECORDS_SIZE; -+ page->free[idx / (8 * sizeof(unsigned long))] |= -+ 1UL << (idx % (8 * sizeof(unsigned long))); -+ -+ if (!--page->used) { -+ if (page->prev) -+ page->prev->next = page->next; -+ else -+ ctx->dbrecord_pages = page->next; -+ if (page->next) -+ page->next->prev = page->prev; -+ -+ free(page->page_buf); -+ free(page); -+ } -+ -+out: -+ pthread_mutex_unlock(&ctx->dbrecord_pages_mutex); -+} -diff --git a/providers/erdma/erdma_verbs.c b/providers/erdma/erdma_verbs.c -new file mode 100644 -index 000000000000..d665e0401b60 ---- /dev/null -+++ b/providers/erdma/erdma_verbs.c -@@ -0,0 +1,1504 @@ -+// SPDX-License-Identifier: GPL-2.0 or BSD-3-Clause -+ -+// Authors: Cheng Xu -+// Copyright (c) 2020-2021, Alibaba Group. -+// Authors: Bernard Metzler -+// Copyright (c) 2008-2019, IBM Corporation -+ -+#include -+#include -+#ifdef HAVE_AVX_SUPPORT -+#include -+#else -+#define _mm256_store_si256(a, b) fprintf(stderr, "not supported") -+#endif -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+ -+#include "erdma.h" -+#include "erdma_abi.h" -+#include "erdma_db.h" -+#include "erdma_hw.h" -+#include "erdma_verbs.h" -+ -+int erdma_query_device(struct ibv_context *ctx, -+ const struct ibv_query_device_ex_input *input, -+ struct ibv_device_attr_ex *attr, size_t attr_size) -+{ -+ struct ib_uverbs_ex_query_device_resp resp; -+ unsigned int major, minor, sub_minor; -+ size_t resp_size = sizeof(resp); -+ uint64_t raw_fw_ver; -+ int rv; -+ -+ rv = ibv_cmd_query_device_any(ctx, input, attr, attr_size, &resp, -+ &resp_size); -+ if (rv) -+ return rv; -+ -+ raw_fw_ver = resp.base.fw_ver; -+ major = (raw_fw_ver >> 32) & 0xffff; -+ minor = (raw_fw_ver >> 16) & 0xffff; -+ sub_minor = raw_fw_ver & 0xffff; -+ -+ snprintf(attr->orig_attr.fw_ver, sizeof(attr->orig_attr.fw_ver), -+ "%d.%d.%d", major, minor, sub_minor); -+ -+ return 0; -+} -+ -+int erdma_query_port(struct ibv_context *ctx, uint8_t port, -+ struct ibv_port_attr *attr) -+{ -+ struct ibv_query_port cmd = {}; -+ -+ return ibv_cmd_query_port(ctx, port, attr, &cmd, sizeof(cmd)); -+} -+ -+int erdma_query_qp(struct ibv_qp *qp, struct ibv_qp_attr *attr, int attr_mask, -+ struct ibv_qp_init_attr *init_attr) -+{ -+ struct ibv_query_qp cmd = {}; -+ -+ return ibv_cmd_query_qp(qp, attr, attr_mask, init_attr, &cmd, -+ sizeof(cmd)); -+} -+ -+struct ibv_pd *erdma_alloc_pd(struct ibv_context *ctx) -+{ -+ struct ib_uverbs_alloc_pd_resp resp; -+ struct ibv_alloc_pd cmd = {}; -+ struct ibv_pd *pd; -+ -+ pd = calloc(1, sizeof(*pd)); -+ if (!pd) -+ return NULL; -+ -+ if (ibv_cmd_alloc_pd(ctx, pd, &cmd, sizeof(cmd), &resp, sizeof(resp))) { -+ free(pd); -+ return NULL; -+ } -+ -+ return pd; -+} -+ -+int erdma_free_pd(struct ibv_pd *pd) -+{ -+ int rv; -+ -+ rv = ibv_cmd_dealloc_pd(pd); -+ if (rv) -+ return rv; -+ -+ free(pd); -+ return 0; -+} -+ -+struct ibv_mr *erdma_reg_mr(struct ibv_pd *pd, void *addr, size_t len, -+ uint64_t hca_va, int access) -+{ -+ struct ib_uverbs_reg_mr_resp resp; -+ struct ibv_reg_mr cmd; -+ struct verbs_mr *vmr; -+ int ret; -+ -+ vmr = calloc(1, sizeof(*vmr)); -+ if (!vmr) -+ return NULL; -+ -+ ret = ibv_cmd_reg_mr(pd, addr, len, hca_va, access, vmr, &cmd, -+ sizeof(cmd), &resp, sizeof(resp)); -+ if (ret) { -+ free(vmr); -+ return NULL; -+ } -+ -+ return &vmr->ibv_mr; -+} -+ -+int erdma_dereg_mr(struct verbs_mr *vmr) -+{ -+ int ret; -+ -+ ret = ibv_cmd_dereg_mr(vmr); -+ if (ret) -+ return ret; -+ -+ free(vmr); -+ return 0; -+} -+ -+int erdma_notify_cq(struct ibv_cq *ibcq, int solicited) -+{ -+ struct erdma_cq *cq = to_ecq(ibcq); -+ uint64_t db_data; -+ int ret; -+ -+ ret = pthread_spin_lock(&cq->lock); -+ if (ret) -+ return ret; -+ -+ db_data = FIELD_PREP(ERDMA_CQDB_IDX_MASK, cq->db_index) | -+ FIELD_PREP(ERDMA_CQDB_CQN_MASK, cq->id) | -+ FIELD_PREP(ERDMA_CQDB_ARM_MASK, 1) | -+ FIELD_PREP(ERDMA_CQDB_SOL_MASK, solicited) | -+ FIELD_PREP(ERDMA_CQDB_CMDSN_MASK, cq->cmdsn) | -+ FIELD_PREP(ERDMA_CQDB_CI_MASK, cq->ci); -+ -+ *(__le64 *)cq->db_record = htole64(db_data); -+ cq->db_index++; -+ udma_to_device_barrier(); -+ mmio_write64_le(cq->db, htole64(db_data)); -+ -+ pthread_spin_unlock(&cq->lock); -+ -+ return ret; -+} -+ -+struct ibv_cq *erdma_create_cq(struct ibv_context *ctx, int num_cqe, -+ struct ibv_comp_channel *channel, -+ int comp_vector) -+{ -+ struct erdma_context *ectx = to_ectx(ctx); -+ struct erdma_cmd_create_cq_resp resp = {}; -+ struct erdma_cmd_create_cq cmd = {}; -+ uint64_t *db_records = NULL; -+ struct erdma_cq *cq; -+ size_t cq_size; -+ int rv; -+ -+ cq = calloc(1, sizeof(*cq)); -+ if (!cq) -+ return NULL; -+ -+ if (num_cqe < 64) -+ num_cqe = 64; -+ -+ num_cqe = roundup_pow_of_two(num_cqe); -+ cq_size = align(num_cqe * sizeof(struct erdma_cqe), ERDMA_PAGE_SIZE); -+ -+ rv = posix_memalign((void **)&cq->queue, ERDMA_PAGE_SIZE, cq_size); -+ if (rv) { -+ errno = rv; -+ free(cq); -+ return NULL; -+ } -+ -+ rv = ibv_dontfork_range(cq->queue, cq_size); -+ if (rv) { -+ errno = rv; -+ free(cq->queue); -+ cq->queue = NULL; -+ goto error_alloc; -+ } -+ -+ memset(cq->queue, 0, cq_size); -+ -+ db_records = erdma_alloc_dbrecords(ectx); -+ if (!db_records) { -+ errno = ENOMEM; -+ goto error_alloc; -+ } -+ -+ cmd.db_record_va = (uintptr_t)db_records; -+ cmd.qbuf_va = (uintptr_t)cq->queue; -+ cmd.qbuf_len = cq_size; -+ -+ rv = ibv_cmd_create_cq(ctx, num_cqe, channel, comp_vector, &cq->base_cq, -+ &cmd.ibv_cmd, sizeof(cmd), &resp.ibv_resp, -+ sizeof(resp)); -+ if (rv) { -+ errno = EIO; -+ goto error_alloc; -+ } -+ -+ pthread_spin_init(&cq->lock, PTHREAD_PROCESS_PRIVATE); -+ -+ *db_records = 0; -+ cq->db_record = db_records; -+ -+ cq->id = resp.cq_id; -+ cq->depth = resp.num_cqe; -+ -+ cq->db = ectx->cdb; -+ cq->db_offset = (cq->id & (ERDMA_PAGE_SIZE / ERDMA_CQDB_SIZE - 1)) * -+ ERDMA_CQDB_SIZE; -+ cq->db += cq->db_offset; -+ -+ cq->comp_vector = comp_vector; -+ -+ return &cq->base_cq; -+ -+error_alloc: -+ if (db_records) -+ erdma_dealloc_dbrecords(ectx, db_records); -+ -+ if (cq->queue) { -+ ibv_dofork_range(cq->queue, cq_size); -+ free(cq->queue); -+ } -+ -+ free(cq); -+ -+ return NULL; -+} -+ -+int erdma_destroy_cq(struct ibv_cq *base_cq) -+{ -+ struct erdma_context *ctx = to_ectx(base_cq->context); -+ struct erdma_cq *cq = to_ecq(base_cq); -+ int rv; -+ -+ pthread_spin_lock(&cq->lock); -+ rv = ibv_cmd_destroy_cq(base_cq); -+ if (rv) { -+ pthread_spin_unlock(&cq->lock); -+ errno = EIO; -+ return rv; -+ } -+ pthread_spin_destroy(&cq->lock); -+ -+ if (cq->db_record) -+ erdma_dealloc_dbrecords(ctx, cq->db_record); -+ -+ if (cq->queue) { -+ ibv_dofork_range(cq->queue, cq->depth << CQE_SHIFT); -+ free(cq->queue); -+ } -+ -+ free(cq); -+ -+ return 0; -+} -+ -+static inline void kick_hw_sqe(struct erdma_qp *qp, uint16_t pi, -+ uint32_t wqebb_cnt) -+{ -+ uint16_t idx = pi & (qp->sq.depth - 1); -+ void *sqe = get_sq_wqebb(qp, idx); -+ uint32_t i; -+ -+ pthread_spin_lock(qp->sdb_lock); -+ -+ *(__le64 *)qp->sq.db_record = htole64(*(uint64_t *)sqe); -+ -+ udma_to_device_barrier(); -+ -+ for (i = 0; i < wqebb_cnt; i++) { -+ _mm256_store_si256(qp->sq.db + (i << 5), -+ _mm256_load_si256(sqe)); -+ sqe = get_sq_wqebb(qp, idx + i + 1); -+ } -+ -+ mmio_flush_writes(); -+ -+ pthread_spin_unlock(qp->sdb_lock); -+} -+ -+enum { -+ ERDMA_SUPPORTED_SEND_OPS_FLAGS_RC = -+ IBV_QP_EX_WITH_SEND | IBV_QP_EX_WITH_SEND_WITH_INV | -+ IBV_QP_EX_WITH_SEND_WITH_IMM | IBV_QP_EX_WITH_RDMA_WRITE | -+ IBV_QP_EX_WITH_RDMA_WRITE_WITH_IMM | IBV_QP_EX_WITH_RDMA_READ | -+ IBV_QP_EX_WITH_ATOMIC_CMP_AND_SWP | -+ IBV_QP_EX_WITH_ATOMIC_FETCH_AND_ADD, -+}; -+ -+static void erdma_wr_start(struct ibv_qp_ex *ibqp) -+{ -+ struct erdma_qp *qp = to_eqp(&ibqp->qp_base); -+ -+ if (ibqp->qp_base.state == IBV_QPS_ERR) { -+ qp->err = -EIO; -+ return; -+ } -+ -+ pthread_spin_lock(&qp->sq_lock); -+ -+ qp->err = 0; -+ qp->sq_pi_rb = qp->sq.pi; -+} -+ -+static void erdma_wr_abort(struct ibv_qp_ex *ibqp) -+{ -+ struct erdma_qp *qp = to_eqp(&ibqp->qp_base); -+ -+ /* Rolling back */ -+ qp->sq.pi = qp->sq_pi_rb; -+ -+ pthread_spin_unlock(&qp->sq_lock); -+} -+ -+static int erdma_wr_complete(struct ibv_qp_ex *ibqp) -+{ -+ struct erdma_qp *qp = to_eqp(&ibqp->qp_base); -+ int err = qp->err; -+ -+ if (unlikely(err)) { -+ qp->sq.pi = qp->sq_pi_rb; -+ goto out; -+ } -+ -+ __kick_sq_db(qp, qp->sq.pi); /* normal doorbell. */ -+ -+out: -+ pthread_spin_unlock(&qp->sq_lock); -+ -+ return err; -+} -+ -+static void *erdma_init_sqe(struct erdma_qp *qp, uint64_t wr_id, -+ unsigned int wr_flags, enum erdma_opcode opcode) -+{ -+ uint16_t sq_pi = qp->sq.pi; -+ uint64_t sqe_hdr; -+ void *sqe; -+ -+ if ((uint16_t)(sq_pi - qp->sq.ci) >= qp->sq.depth) { -+ qp->err = ENOMEM; -+ qp->cur_sqe = NULL; -+ return NULL; -+ } -+ -+ sqe = get_sq_wqebb(qp, sq_pi); -+ -+ /* Clear the first 8Byte of the wqe hdr. */ -+ *(uint64_t *)sqe = 0; -+ qp->sq.wr_tbl[sq_pi & (qp->sq.depth - 1)] = wr_id; -+ -+ sqe_hdr = FIELD_PREP(ERDMA_SQE_HDR_QPN_MASK, qp->id); -+ sqe_hdr |= FIELD_PREP(ERDMA_SQE_HDR_CE_MASK, -+ wr_flags & IBV_SEND_SIGNALED ? 1 : 0); -+ sqe_hdr |= FIELD_PREP(ERDMA_SQE_HDR_CE_MASK, qp->sq_sig_all); -+ sqe_hdr |= FIELD_PREP(ERDMA_SQE_HDR_SE_MASK, -+ wr_flags & IBV_SEND_SOLICITED ? 1 : 0); -+ sqe_hdr |= FIELD_PREP(ERDMA_SQE_HDR_FENCE_MASK, -+ wr_flags & IBV_SEND_FENCE ? 1 : 0); -+ sqe_hdr |= FIELD_PREP(ERDMA_SQE_HDR_INLINE_MASK, -+ wr_flags & IBV_SEND_INLINE ? 1 : 0); -+ sqe_hdr |= FIELD_PREP(ERDMA_SQE_HDR_OPCODE_MASK, opcode); -+ -+ *(__le64 *)sqe = htole64(sqe_hdr); -+ qp->cur_sqe = sqe; -+ -+ return sqe; -+} -+ -+static void erdma_wr_send(struct ibv_qp_ex *ibqp) -+{ -+ struct erdma_qp *qp = to_eqp(&ibqp->qp_base); -+ -+ if (!erdma_init_sqe(qp, ibqp->wr_id, ibqp->wr_flags, ERDMA_OP_SEND)) -+ return; -+ -+ qp->cur_pi = qp->sq.pi; -+ qp->sgl_off = 16; -+} -+ -+static void erdma_wr_rdma_write(struct ibv_qp_ex *ibqp, uint32_t rkey, -+ uint64_t raddr) -+{ -+ struct erdma_qp *qp = to_eqp(&ibqp->qp_base); -+ struct erdma_write_sqe *sqe; -+ -+ sqe = erdma_init_sqe(qp, ibqp->wr_id, ibqp->wr_flags, ERDMA_OP_WRITE); -+ if (!sqe) -+ return; -+ -+ qp->cur_pi = qp->sq.pi + 1; -+ qp->sgl_off = 0; -+ sqe->sink_to_low = htole32(raddr & 0xffffffff); -+ sqe->sink_to_high = htole32((raddr >> 32) & 0xffffffff); -+ sqe->sink_stag = htole32(rkey); -+} -+ -+static void erdma_wr_set_sge(struct ibv_qp_ex *ibqp, uint32_t lkey, -+ uint64_t addr, uint32_t length) -+{ -+ struct erdma_qp *qp = to_eqp(&ibqp->qp_base); -+ uint64_t *sqe_hdr = qp->cur_sqe; -+ struct erdma_sge *sge; -+ uint32_t wqebb_cnt; -+ -+ sge = get_sq_wqebb(qp, qp->cur_pi) + qp->sgl_off; -+ sge->laddr = addr; -+ sge->lkey = lkey; -+ sge->length = length; -+ -+ if (qp->length_field) -+ *qp->length_field = htole32(length); -+ -+ *sqe_hdr |= FIELD_PREP(ERDMA_SQE_HDR_SGL_LEN_MASK, 1); -+ -+ /* -+ * This is the last SQE in SQE, -+ * so anyhow we need to move the cursor to next sqebb. -+ */ -+ qp->cur_pi++; -+ wqebb_cnt = qp->cur_pi - qp->sq.pi; -+ -+ assert(wqebb_cnt <= MAX_WQEBB_PER_SQE); -+ *sqe_hdr |= FIELD_PREP(ERDMA_SQE_HDR_WQEBB_CNT_MASK, wqebb_cnt - 1); -+ *sqe_hdr |= FIELD_PREP(ERDMA_SQE_HDR_WQEBB_INDEX_MASK, qp->cur_pi); -+ -+ qp->sq.pi = qp->cur_pi; -+} -+ -+static void erdma_wr_set_sge_list(struct ibv_qp_ex *ibqp, size_t num_sge, -+ const struct ibv_sge *sg_list) -+{ -+ struct erdma_qp *qp = to_eqp(&ibqp->qp_base); -+ uint64_t *sqe_hdr = qp->cur_sqe; -+ uint32_t wqebb_cnt, sqebb_half; -+ struct erdma_sge *sge; -+ uint32_t i, bytes = 0; -+ -+ sge = get_sq_wqebb(qp, qp->cur_pi) + qp->sgl_off; -+ -+ if (num_sge > ERDMA_MAX_SEND_SGE) { -+ qp->err = EINVAL; -+ return; -+ } -+ -+ sqebb_half = !!qp->sgl_off; -+ -+ for (i = 0; i < num_sge; i++) { -+ sge->laddr = sg_list[i].addr; -+ sge->lkey = sg_list[i].lkey; -+ sge->length = sg_list[i].length; -+ bytes += sge->length; -+ -+ if (sqebb_half) { -+ qp->cur_pi++; -+ sge = get_sq_wqebb(qp, qp->cur_pi); -+ } else { -+ *((uint32_t *)sge + 7) = qp->id; -+ sge++; -+ } -+ -+ sqebb_half = !sqebb_half; -+ } -+ -+ qp->cur_pi += sqebb_half; -+ -+ if (qp->length_field) -+ *qp->length_field = htole32(bytes); -+ -+ *sqe_hdr |= FIELD_PREP(ERDMA_SQE_HDR_SGL_LEN_MASK, num_sge); -+ -+ wqebb_cnt = qp->cur_pi - qp->sq.pi; -+ assert(wqebb_cnt <= MAX_WQEBB_PER_SQE); -+ *sqe_hdr |= FIELD_PREP(ERDMA_SQE_HDR_WQEBB_CNT_MASK, wqebb_cnt - 1); -+ *sqe_hdr |= FIELD_PREP(ERDMA_SQE_HDR_WQEBB_INDEX_MASK, qp->cur_pi); -+ -+ qp->sq.pi = qp->cur_pi; -+} -+ -+static size_t free_space_to_tail(struct erdma_qp *qp, uint16_t cur_idx) -+{ -+ cur_idx &= qp->sq.depth - 1; -+ -+ return (qp->sq.depth - cur_idx) << SQEBB_SHIFT; -+} -+ -+static void erdma_wr_set_inline_data(struct ibv_qp_ex *ibqp, void *addr, -+ size_t length) -+{ -+ struct erdma_qp *qp = to_eqp(&ibqp->qp_base); -+ uint64_t *sqe_hdr = qp->cur_sqe; -+ uint32_t wqebb_cnt; -+ size_t space; -+ void *data; -+ -+ if (length > ERDMA_MAX_INLINE) { -+ qp->err = EINVAL; -+ return; -+ } -+ -+ data = get_sq_wqebb(qp, qp->cur_pi) + qp->sgl_off; -+ -+ if (qp->length_field) -+ *qp->length_field = htole32(length); -+ -+ space = free_space_to_tail(qp, qp->cur_pi) - qp->sgl_off; -+ -+ if (space >= length) { -+ memcpy(data, addr, length); -+ } else { -+ memcpy(data, addr, length - space); -+ memcpy(qp->sq.qbuf, addr + length - space, space); -+ } -+ -+ *sqe_hdr |= FIELD_PREP(ERDMA_SQE_HDR_SGL_LEN_MASK, length); -+ -+ /* -+ * This is the last SQE in SQE, -+ * so anyhow we need to move the cursor to next free sqebb. -+ */ -+ qp->cur_pi += SQEBB_COUNT(qp->sgl_off + length); -+ wqebb_cnt = qp->cur_pi - qp->sq.pi; -+ -+ assert(wqebb_cnt <= MAX_WQEBB_PER_SQE); -+ *sqe_hdr |= FIELD_PREP(ERDMA_SQE_HDR_WQEBB_CNT_MASK, wqebb_cnt - 1); -+ *sqe_hdr |= FIELD_PREP(ERDMA_SQE_HDR_WQEBB_INDEX_MASK, qp->cur_pi); -+ -+ qp->sq.pi = qp->cur_pi; -+} -+ -+static void erdma_wr_set_inline_data_list(struct ibv_qp_ex *ibqp, -+ size_t num_buf, -+ const struct ibv_data_buf *buf_list) -+{ -+ struct erdma_qp *qp = to_eqp(&ibqp->qp_base); -+ size_t space, total_size = 0, i; -+ uint64_t *sqe_hdr = qp->cur_sqe; -+ uint32_t wqebb_cnt; -+ void *data; -+ -+ data = get_sq_wqebb(qp, qp->cur_pi) + qp->sgl_off; -+ -+ space = free_space_to_tail(qp, qp->cur_pi) - qp->sgl_off; -+ for (i = 0; i < num_buf; i++) -+ total_size += buf_list[i].length; -+ -+ if (total_size > ERDMA_MAX_INLINE) { -+ qp->err = EINVAL; -+ return; -+ } -+ -+ if (qp->length_field) -+ *qp->length_field = htole32(total_size); -+ -+ if (space >= total_size) { -+ for (i = 0; i < num_buf; i++) { -+ memcpy(data, buf_list[i].addr, buf_list[i].length); -+ data += buf_list[i].length; -+ } -+ } else { -+ for (i = 0; i < num_buf; i++) { -+ if (space > buf_list[i].length) { -+ memcpy(data, buf_list[i].addr, -+ buf_list[i].length); -+ data += buf_list[i].length; -+ space -= buf_list[i].length; -+ } else { -+ if (space != 0) -+ memcpy(data, buf_list[i].addr, space); -+ -+ data = get_sq_wqebb(qp, 0); -+ memcpy(data, buf_list[i].addr + space, -+ buf_list[i].length - space); -+ data += buf_list[i].length - space; -+ space = free_space_to_tail(qp, 0) - -+ buf_list[i].length - space; -+ } -+ } -+ } -+ -+ *sqe_hdr |= FIELD_PREP(ERDMA_SQE_HDR_SGL_LEN_MASK, total_size); -+ -+ /* -+ * This is the last SQE in SQE, -+ * so anyhow we need to move the cursor to next free sqebb. -+ */ -+ qp->cur_pi += SQEBB_COUNT(qp->sgl_off + total_size); -+ wqebb_cnt = qp->cur_pi - qp->sq.pi; -+ -+ assert(wqebb_cnt <= MAX_WQEBB_PER_SQE); -+ *sqe_hdr |= FIELD_PREP(ERDMA_SQE_HDR_WQEBB_CNT_MASK, wqebb_cnt - 1); -+ *sqe_hdr |= FIELD_PREP(ERDMA_SQE_HDR_WQEBB_INDEX_MASK, qp->cur_pi); -+ -+ qp->sq.pi = qp->cur_pi; -+} -+ -+static int erdma_fill_wr_ops(struct erdma_qp *eqp, -+ const struct ibv_qp_init_attr_ex *attr) -+{ -+ struct ibv_qp_ex *ibqp_ex = &eqp->verbs_qp.qp_ex; -+ -+ ibqp_ex->wr_start = erdma_wr_start; -+ ibqp_ex->wr_complete = erdma_wr_complete; -+ ibqp_ex->wr_abort = erdma_wr_abort; -+ -+ if (attr->send_ops_flags & ~ERDMA_SUPPORTED_SEND_OPS_FLAGS_RC) -+ return -EOPNOTSUPP; -+ -+ ibqp_ex->wr_send = erdma_wr_send; -+ // ibqp->wr_send_imm = erdma_send_wr_send_imm; -+ // ibqp->wr_send_inv = erdma_send_wr_send_inv; -+ ibqp_ex->wr_rdma_write = erdma_wr_rdma_write; -+ // ibqp->wr_rdma_write_imm = erdma_send_wr_rdma_write_imm; -+ // ibqp->wr_rdma_read = erdma_send_wr_rdma_read; -+ // ibqp->wr_atomic_cmp_swp = erdma_send_wr_atomic_cmp_swp; -+ // ibqp->wr_atomic_fetch_add = erdma_send_wr_atomic_fetch_add; -+ // ibqp->wr_bind_mw = erdma_send_wr_bind_mw; -+ // ibqp->wr_local_inv = erdma_send_wr_local_inv; -+ ibqp_ex->wr_set_sge = erdma_wr_set_sge; -+ ibqp_ex->wr_set_sge_list = erdma_wr_set_sge_list; -+ ibqp_ex->wr_set_inline_data = erdma_wr_set_inline_data; -+ ibqp_ex->wr_set_inline_data_list = erdma_wr_set_inline_data_list; -+ -+ return 0; -+} -+ -+static void __erdma_alloc_dbs(struct erdma_qp *qp, struct erdma_context *ctx) -+{ -+ uint32_t qpn = qp->id; -+ -+ if (ctx->sdb_type == ERDMA_SDB_PAGE) { -+ qp->disable_dwqe = 0; -+ qp->sq.db = ctx->sdb + (qpn & 31) * 128; -+ qp->sdb_lock = &ctx->sdb_lock[qpn & 31].lock; -+ } else if (ctx->sdb_type == ERDMA_SDB_ENTRY) { -+ qp->disable_dwqe = 0; -+ -+ qp->sq.db = ctx->sdb + ctx->sdb_entid * 256; -+ qp->sdb_lock = &ctx->sdb_lock[0].lock; -+ } else { -+ qp->disable_dwqe = 1; -+ qp->sq.db = ctx->sdb + (qpn & 31) * 128; -+ qp->sdb_lock = &ctx->sdb_lock[0].lock; -+ } -+ -+#ifndef HAVE_AVX_SUPPORT -+ qp->disable_dwqe = 1; -+#endif -+ -+ /* qpn[6:0] as the index in this rq db page. */ -+ qp->rq.db = ctx->rdb + -+ (qpn & ERDMA_RDB_ALLOC_QPN_MASK) * ERDMA_RQDB_SPACE_SIZE; -+} -+ -+static int erdma_check_qp_attr(struct erdma_context *ctx, -+ struct ibv_qp_init_attr_ex *attr) -+ -+{ -+ if (!check_comp_mask(attr->comp_mask, -+ IBV_QP_INIT_ATTR_PD | -+ IBV_QP_INIT_ATTR_SEND_OPS_FLAGS)) -+ return -EOPNOTSUPP; -+ -+ if (attr->qp_type != IBV_QPT_RC || -+ !(attr->comp_mask & IBV_QP_INIT_ATTR_PD)) -+ return -EINVAL; -+ -+ if (!attr->recv_cq || !attr->send_cq) -+ return -EINVAL; -+ -+ if (attr->srq) -+ return -EINVAL; -+ -+ return 0; -+} -+ -+static int erdma_store_qp(struct erdma_context *ctx, struct erdma_qp *qp) -+{ -+ uint32_t tbl_idx, tbl_off; -+ int rv = 0; -+ -+ pthread_mutex_lock(&ctx->qp_table_mutex); -+ tbl_idx = qp->id >> ERDMA_QP_TABLE_SHIFT; -+ tbl_off = qp->id & ERDMA_QP_TABLE_MASK; -+ -+ if (ctx->qp_table[tbl_idx].refcnt == 0) { -+ ctx->qp_table[tbl_idx].table = -+ calloc(ERDMA_QP_TABLE_SIZE, sizeof(struct erdma_qp *)); -+ if (!ctx->qp_table[tbl_idx].table) { -+ rv = -ENOMEM; -+ goto out; -+ } -+ } -+ -+ /* exist qp */ -+ if (ctx->qp_table[tbl_idx].table[tbl_off]) { -+ rv = -EBUSY; -+ goto out; -+ } -+ -+ ctx->qp_table[tbl_idx].table[tbl_off] = qp; -+ ctx->qp_table[tbl_idx].refcnt++; -+ -+out: -+ pthread_mutex_unlock(&ctx->qp_table_mutex); -+ -+ return rv; -+} -+ -+static void erdma_clear_qp(struct erdma_context *ctx, struct erdma_qp *qp) -+{ -+ uint32_t tbl_idx, tbl_off; -+ -+ pthread_mutex_lock(&ctx->qp_table_mutex); -+ tbl_idx = qp->id >> ERDMA_QP_TABLE_SHIFT; -+ tbl_off = qp->id & ERDMA_QP_TABLE_MASK; -+ -+ ctx->qp_table[tbl_idx].table[tbl_off] = NULL; -+ ctx->qp_table[tbl_idx].refcnt--; -+ -+ if (ctx->qp_table[tbl_idx].refcnt == 0) { -+ free(ctx->qp_table[tbl_idx].table); -+ ctx->qp_table[tbl_idx].table = NULL; -+ } -+ -+ pthread_mutex_unlock(&ctx->qp_table_mutex); -+} -+ -+static int erdma_alloc_qp_buf_and_db(struct erdma_context *ctx, -+ struct erdma_qp *qp, -+ struct ibv_qp_init_attr_ex *attr) -+{ -+ size_t queue_size; -+ uint32_t nwqebb; -+ int rv; -+ -+ nwqebb = roundup_pow_of_two(attr->cap.max_send_wr * MAX_WQEBB_PER_SQE); -+ queue_size = align(nwqebb << SQEBB_SHIFT, ctx->page_size); -+ nwqebb = roundup_pow_of_two(attr->cap.max_recv_wr); -+ queue_size += align(nwqebb << RQE_SHIFT, ctx->page_size); -+ -+ qp->qbuf_size = queue_size; -+ rv = posix_memalign(&qp->qbuf, ctx->page_size, queue_size); -+ if (rv) { -+ errno = ENOMEM; -+ return -1; -+ } -+ -+ rv = ibv_dontfork_range(qp->qbuf, queue_size); -+ if (rv) { -+ errno = rv; -+ goto err_dontfork; -+ } -+ -+ /* doorbell record allocation. */ -+ qp->db_records = erdma_alloc_dbrecords(ctx); -+ if (!qp->db_records) { -+ errno = ENOMEM; -+ goto err_dbrec; -+ } -+ -+ *qp->db_records = 0; -+ *(qp->db_records + 1) = 0; -+ qp->sq.db_record = qp->db_records; -+ qp->rq.db_record = qp->db_records + 1; -+ -+ pthread_spin_init(&qp->sq_lock, PTHREAD_PROCESS_PRIVATE); -+ pthread_spin_init(&qp->rq_lock, PTHREAD_PROCESS_PRIVATE); -+ -+ return 0; -+ -+err_dbrec: -+ ibv_dofork_range(qp->qbuf, queue_size); -+ -+err_dontfork: -+ free(qp->qbuf); -+ -+ return -1; -+} -+ -+static void erdma_free_qp_buf_and_db(struct erdma_context *ctx, -+ struct erdma_qp *qp) -+{ -+ pthread_spin_destroy(&qp->sq_lock); -+ pthread_spin_destroy(&qp->rq_lock); -+ -+ if (qp->db_records) -+ erdma_dealloc_dbrecords(ctx, qp->db_records); -+ -+ ibv_dofork_range(qp->qbuf, qp->qbuf_size); -+ -+ if (qp->qbuf) -+ free(qp->qbuf); -+} -+ -+static int erdma_alloc_wrid_tbl(struct erdma_qp *qp) -+{ -+ qp->rq.wr_tbl = calloc(qp->rq.depth, sizeof(uint64_t)); -+ if (!qp->rq.wr_tbl) -+ return -ENOMEM; -+ -+ qp->sq.wr_tbl = calloc(qp->sq.depth, sizeof(uint64_t)); -+ if (!qp->sq.wr_tbl) { -+ free(qp->rq.wr_tbl); -+ return -ENOMEM; -+ } -+ -+ return 0; -+} -+ -+static void erdma_free_wrid_tbl(struct erdma_qp *qp) -+{ -+ if (qp->sq.wr_tbl) -+ free(qp->sq.wr_tbl); -+ -+ if (qp->rq.wr_tbl) -+ free(qp->rq.wr_tbl); -+} -+ -+static struct ibv_qp *create_qp(struct ibv_context *ibv_ctx, -+ struct ibv_qp_init_attr_ex *attr) -+{ -+ struct erdma_context *ctx = to_ectx(ibv_ctx); -+ struct erdma_cmd_create_qp_resp resp = {}; -+ struct erdma_cmd_create_qp cmd = {}; -+ struct erdma_qp *qp; -+ int rv; -+ -+ rv = erdma_check_qp_attr(ctx, attr); -+ if (rv) { -+ errno = -rv; -+ return NULL; -+ } -+ -+ qp = calloc(1, sizeof(*qp)); -+ if (!qp) -+ return NULL; -+ -+ rv = erdma_alloc_qp_buf_and_db(ctx, qp, attr); -+ if (rv) -+ goto err; -+ -+ cmd.db_record_va = (uintptr_t)qp->db_records; -+ cmd.qbuf_va = (uintptr_t)qp->qbuf; -+ cmd.qbuf_len = (__u32)qp->qbuf_size; -+ -+ rv = ibv_cmd_create_qp_ex(ibv_ctx, &qp->verbs_qp, attr, &cmd.ibv_cmd, -+ sizeof(cmd), &resp.ibv_resp, sizeof(resp)); -+ if (rv) -+ goto err_cmd; -+ -+ qp->id = resp.qp_id; -+ qp->sq.qbuf = qp->qbuf; -+ qp->rq.qbuf = qp->qbuf + resp.rq_offset; -+ qp->sq.depth = resp.num_sqe; -+ qp->rq.depth = resp.num_rqe; -+ qp->sq_sig_all = attr->sq_sig_all; -+ qp->sq.size = resp.num_sqe * SQEBB_SIZE; -+ qp->rq.size = resp.num_rqe * sizeof(struct erdma_rqe); -+ -+ /* doorbell allocation. */ -+ __erdma_alloc_dbs(qp, ctx); -+ -+ if (attr->comp_mask & IBV_QP_INIT_ATTR_SEND_OPS_FLAGS) { -+ rv = erdma_fill_wr_ops(qp, attr); -+ if (rv) { -+ errno = -rv; -+ goto err_send_ops; -+ } -+ } -+ -+ rv = erdma_alloc_wrid_tbl(qp); -+ if (rv) -+ goto err_wrid_tbl; -+ -+ rv = erdma_store_qp(ctx, qp); -+ if (rv) { -+ errno = -rv; -+ goto err_store; -+ } -+ -+ return &qp->verbs_qp.qp; -+ -+err_store: -+ erdma_free_wrid_tbl(qp); -+err_wrid_tbl: -+err_send_ops: -+ ibv_cmd_destroy_qp(&qp->verbs_qp.qp); -+err_cmd: -+ erdma_free_qp_buf_and_db(ctx, qp); -+err: -+ free(qp); -+ -+ return NULL; -+} -+ -+struct ibv_qp *erdma_create_qp(struct ibv_pd *pd, struct ibv_qp_init_attr *attr) -+{ -+ struct ibv_qp_init_attr_ex attrx = {}; -+ struct ibv_qp *qp; -+ -+ memcpy(&attrx, attr, sizeof(*attr)); -+ attrx.comp_mask = IBV_QP_INIT_ATTR_PD; -+ attrx.pd = pd; -+ -+ qp = create_qp(pd->context, &attrx); -+ if (qp) -+ memcpy(attr, &attrx, sizeof(*attr)); -+ -+ return qp; -+} -+ -+struct ibv_qp *erdma_create_qp_ex(struct ibv_context *context, -+ struct ibv_qp_init_attr_ex *attr) -+{ -+ return create_qp(context, attr); -+} -+ -+int erdma_destroy_qp(struct ibv_qp *ibqp) -+{ -+ struct ibv_context *base_ctx = ibqp->pd->context; -+ struct erdma_context *ctx = to_ectx(base_ctx); -+ struct erdma_qp *qp = to_eqp(ibqp); -+ int rv; -+ -+ erdma_clear_qp(ctx, qp); -+ -+ rv = ibv_cmd_destroy_qp(ibqp); -+ if (rv) -+ return rv; -+ -+ erdma_free_wrid_tbl(qp); -+ erdma_free_qp_buf_and_db(ctx, qp); -+ -+ free(qp); -+ -+ return 0; -+} -+ -+int erdma_modify_qp(struct ibv_qp *ibqp, struct ibv_qp_attr *attr, -+ int attr_mask) -+{ -+ struct erdma_qp *qp = to_eqp(ibqp); -+ struct ibv_modify_qp cmd = {}; -+ int rv; -+ -+ pthread_spin_lock(&qp->sq_lock); -+ pthread_spin_lock(&qp->rq_lock); -+ -+ rv = ibv_cmd_modify_qp(ibqp, attr, attr_mask, &cmd, sizeof(cmd)); -+ -+ pthread_spin_unlock(&qp->rq_lock); -+ pthread_spin_unlock(&qp->sq_lock); -+ -+ return rv; -+} -+ -+static int erdma_push_one_sqe(struct erdma_qp *qp, struct ibv_send_wr *wr, -+ uint16_t *sq_pi, int use_direct) -+{ -+ uint16_t tmp_pi = *sq_pi; -+ void *sqe; -+ uint64_t sqe_hdr; -+ struct erdma_write_sqe *write_sqe; -+ struct erdma_send_sqe *send_sqe; -+ struct erdma_readreq_sqe *read_sqe; -+ struct erdma_atomic_sqe *atomic_sqe; -+ uint32_t wqe_size = 0; -+ __le32 *length_field = NULL; -+ struct erdma_sge *sgl_base = NULL; -+ uint32_t i, bytes = 0; -+ uint32_t sgl_off, sgl_idx, wqebb_cnt, opcode; -+ -+ sqe = get_sq_wqebb(qp, tmp_pi); -+ /* Clear the first 8Byte of the wqe hdr. */ -+ *(uint64_t *)sqe = 0; -+ -+ qp->sq.wr_tbl[tmp_pi & (qp->sq.depth - 1)] = wr->wr_id; -+ -+ sqe_hdr = FIELD_PREP(ERDMA_SQE_HDR_QPN_MASK, qp->id) | -+ FIELD_PREP(ERDMA_SQE_HDR_CE_MASK, -+ wr->send_flags & IBV_SEND_SIGNALED ? 1 : 0) | -+ FIELD_PREP(ERDMA_SQE_HDR_CE_MASK, qp->sq_sig_all) | -+ FIELD_PREP(ERDMA_SQE_HDR_SE_MASK, -+ wr->send_flags & IBV_SEND_SOLICITED ? 1 : 0) | -+ FIELD_PREP(ERDMA_SQE_HDR_FENCE_MASK, -+ wr->send_flags & IBV_SEND_FENCE ? 1 : 0) | -+ FIELD_PREP(ERDMA_SQE_HDR_INLINE_MASK, -+ wr->send_flags & IBV_SEND_INLINE ? 1 : 0) | -+ FIELD_PREP(ERDMA_SQE_HDR_DWQE_MASK, use_direct); -+ -+ switch (wr->opcode) { -+ case IBV_WR_RDMA_WRITE: -+ case IBV_WR_RDMA_WRITE_WITH_IMM: -+ if (wr->opcode == IBV_WR_RDMA_WRITE) -+ opcode = ERDMA_OP_WRITE; -+ else -+ opcode = ERDMA_OP_WRITE_WITH_IMM; -+ sqe_hdr |= FIELD_PREP(ERDMA_SQE_HDR_OPCODE_MASK, opcode); -+ write_sqe = sqe; -+ write_sqe->imm_data = wr->imm_data; -+ write_sqe->sink_stag = htole32(wr->wr.rdma.rkey); -+ write_sqe->sink_to_low = -+ htole32(wr->wr.rdma.remote_addr & 0xFFFFFFFF); -+ write_sqe->sink_to_high = -+ htole32((wr->wr.rdma.remote_addr >> 32) & 0xFFFFFFFF); -+ -+ length_field = &write_sqe->length; -+ /* sgl is at the start of next wqebb. */ -+ sgl_base = get_sq_wqebb(qp, tmp_pi + 1); -+ sgl_off = 0; -+ sgl_idx = tmp_pi + 1; -+ wqe_size = sizeof(struct erdma_write_sqe); -+ -+ break; -+ case IBV_WR_SEND: -+ case IBV_WR_SEND_WITH_IMM: -+ case IBV_WR_SEND_WITH_INV: -+ if (wr->opcode == IBV_WR_SEND) -+ opcode = ERDMA_OP_SEND; -+ else if (wr->opcode == IBV_WR_SEND_WITH_IMM) -+ opcode = ERDMA_OP_SEND_WITH_IMM; -+ else -+ opcode = ERDMA_OP_SEND_WITH_INV; -+ sqe_hdr |= FIELD_PREP(ERDMA_SQE_HDR_OPCODE_MASK, opcode); -+ send_sqe = sqe; -+ if (wr->opcode == IBV_WR_SEND_WITH_INV) -+ send_sqe->invalid_stag = htole32(wr->invalidate_rkey); -+ else -+ send_sqe->imm_data = wr->imm_data; -+ -+ length_field = &send_sqe->length; -+ /* sgl is in the half of current wqebb (offset 16Byte) */ -+ sgl_base = sqe; -+ sgl_off = 16; -+ sgl_idx = tmp_pi; -+ wqe_size = sizeof(struct erdma_send_sqe); -+ -+ break; -+ case IBV_WR_RDMA_READ: -+ sqe_hdr |= FIELD_PREP(ERDMA_SQE_HDR_OPCODE_MASK, ERDMA_OP_READ); -+ read_sqe = sqe; -+ -+ read_sqe->sink_to_low = htole32(wr->sg_list->addr & 0xFFFFFFFF); -+ read_sqe->sink_to_high = -+ htole32((wr->sg_list->addr >> 32) & 0xFFFFFFFF); -+ read_sqe->sink_stag = htole32(wr->sg_list->lkey); -+ read_sqe->length = htole32(wr->sg_list->length); -+ -+ sgl_base = get_sq_wqebb(qp, tmp_pi + 1); -+ -+ sgl_base->laddr = htole64(wr->wr.rdma.remote_addr); -+ sgl_base->length = htole32(wr->sg_list->length); -+ sgl_base->lkey = htole32(wr->wr.rdma.rkey); -+ -+ wqe_size = sizeof(struct erdma_readreq_sqe); -+ -+ goto out; -+ case IBV_WR_ATOMIC_CMP_AND_SWP: -+ case IBV_WR_ATOMIC_FETCH_AND_ADD: -+ atomic_sqe = (struct erdma_atomic_sqe *)sqe; -+ -+ if (wr->opcode == IBV_WR_ATOMIC_CMP_AND_SWP) { -+ sqe_hdr |= FIELD_PREP(ERDMA_SQE_HDR_OPCODE_MASK, -+ ERDMA_OP_ATOMIC_CAS); -+ atomic_sqe->fetchadd_swap_data = wr->wr.atomic.swap; -+ atomic_sqe->cmp_data = wr->wr.atomic.compare_add; -+ } else { -+ sqe_hdr |= FIELD_PREP(ERDMA_SQE_HDR_OPCODE_MASK, -+ ERDMA_OP_ATOMIC_FAD); -+ atomic_sqe->fetchadd_swap_data = -+ wr->wr.atomic.compare_add; -+ } -+ -+ sgl_base = (struct erdma_sge *)get_sq_wqebb(qp, tmp_pi + 1); -+ /* remote SGL fields */ -+ sgl_base->laddr = wr->wr.atomic.remote_addr; -+ sgl_base->lkey = wr->wr.atomic.rkey; -+ -+ /* local SGL fields */ -+ sgl_base++; -+ sgl_base->laddr = wr->sg_list[0].addr; -+ sgl_base->length = wr->sg_list[0].length; -+ sgl_base->lkey = wr->sg_list[0].lkey; -+ wqe_size = sizeof(struct erdma_atomic_sqe); -+ goto out; -+ default: -+ return -EINVAL; -+ } -+ -+ if (wr->send_flags & IBV_SEND_INLINE) { -+ char *data = (char *)sgl_base; -+ uint32_t remain_size; -+ uint32_t copy_size; -+ uint32_t data_off; -+ -+ i = 0; -+ bytes = 0; -+ -+ /* Allow more than ERDMA_MAX_SGE, since content copied here */ -+ while (i < wr->num_sge) { -+ bytes += wr->sg_list[i].length; -+ if (bytes > (int)ERDMA_MAX_INLINE) -+ return -EINVAL; -+ -+ remain_size = wr->sg_list[i].length; -+ data_off = 0; -+ -+ while (1) { -+ copy_size = -+ min(remain_size, SQEBB_SIZE - sgl_off); -+ memcpy(data + sgl_off, -+ (void *)(uintptr_t)wr->sg_list[i].addr + -+ data_off, -+ copy_size); -+ remain_size -= copy_size; -+ -+ /* Update sgl_offset. */ -+ sgl_idx += -+ ((sgl_off + copy_size) >> SQEBB_SHIFT); -+ sgl_off = (sgl_off + copy_size) & -+ (SQEBB_SIZE - 1); -+ data_off += copy_size; -+ data = get_sq_wqebb(qp, sgl_idx); -+ -+ if (!remain_size) -+ break; -+ }; -+ -+ i++; -+ } -+ -+ *length_field = htole32(bytes); -+ wqe_size += bytes; -+ sqe_hdr |= FIELD_PREP(ERDMA_SQE_HDR_SGL_LEN_MASK, bytes); -+ } else { -+ char *sgl = (char *)sgl_base; -+ -+ if (wr->num_sge > ERDMA_MAX_SEND_SGE) -+ return -EINVAL; -+ -+ i = 0; -+ bytes = 0; -+ -+ while (i < wr->num_sge) { -+ bytes += wr->sg_list[i].length; -+ memcpy(sgl + sgl_off, &wr->sg_list[i], -+ sizeof(struct ibv_sge)); -+ -+ if (sgl_off == 0) -+ *(uint32_t *)(sgl + 28) = qp->id; -+ -+ sgl_idx += (sgl_off == sizeof(struct ibv_sge) ? 1 : 0); -+ sgl = get_sq_wqebb(qp, sgl_idx); -+ sgl_off = sizeof(struct ibv_sge) - sgl_off; -+ -+ i++; -+ } -+ -+ *length_field = htole32(bytes); -+ sqe_hdr |= FIELD_PREP(ERDMA_SQE_HDR_SGL_LEN_MASK, wr->num_sge); -+ wqe_size += wr->num_sge * sizeof(struct ibv_sge); -+ } -+ -+out: -+ wqebb_cnt = SQEBB_COUNT(wqe_size); -+ assert(wqebb_cnt <= MAX_WQEBB_PER_SQE); -+ sqe_hdr |= FIELD_PREP(ERDMA_SQE_HDR_WQEBB_CNT_MASK, wqebb_cnt - 1); -+ sqe_hdr |= -+ FIELD_PREP(ERDMA_SQE_HDR_WQEBB_INDEX_MASK, tmp_pi + wqebb_cnt); -+ -+ *(__le64 *)sqe = htole64(sqe_hdr); -+ *sq_pi = tmp_pi + wqebb_cnt; -+ -+ if (use_direct) -+ kick_hw_sqe(qp, tmp_pi, wqebb_cnt); -+ -+ return 0; -+} -+ -+int erdma_post_send(struct ibv_qp *ibqp, struct ibv_send_wr *wr, -+ struct ibv_send_wr **bad_wr) -+{ -+ struct erdma_qp *qp = to_eqp(ibqp); -+ uint16_t sq_pi; -+ int new_sqe = 0, rv = 0; -+ int use_dwqe = 0, first_wr = 1, dsqe = 0; -+ -+ *bad_wr = NULL; -+ -+ if (ibqp->state == IBV_QPS_ERR) { -+ *bad_wr = wr; -+ return -EIO; -+ } -+ -+ pthread_spin_lock(&qp->sq_lock); -+ sq_pi = qp->sq.pi; -+ -+ while (wr) { -+ if ((uint16_t)(sq_pi - qp->sq.ci) >= qp->sq.depth) { -+ rv = -ENOMEM; -+ *bad_wr = wr; -+ break; -+ } -+ -+ use_dwqe = (!qp->disable_dwqe && first_wr && sq_pi == qp->sq.ci) ? 1 : 0; -+ if (use_dwqe) -+ dsqe++; -+ -+ rv = erdma_push_one_sqe(qp, wr, &sq_pi, use_dwqe); -+ if (rv) { -+ *bad_wr = wr; -+ break; -+ } -+ -+ new_sqe++; -+ first_wr = 0; -+ wr = wr->next; -+ } -+ -+ if (new_sqe) -+ qp->sq.pi = sq_pi; -+ -+ if (new_sqe - dsqe) -+ __kick_sq_db(qp, sq_pi); /* normal doorbell. */ -+ -+ pthread_spin_unlock(&qp->sq_lock); -+ -+ return rv; -+} -+ -+static int push_recv_wqe(struct erdma_qp *qp, struct ibv_recv_wr *wr) -+{ -+ uint16_t rq_pi = qp->rq.pi; -+ uint16_t idx = rq_pi & (qp->rq.depth - 1); -+ struct erdma_rqe *rqe = (struct erdma_rqe *)qp->rq.qbuf + idx; -+ -+ if ((uint16_t)(rq_pi - qp->rq.ci) == qp->rq.depth) -+ return -ENOMEM; -+ -+ rqe->qe_idx = htole16(rq_pi + 1); -+ rqe->qpn = htole32(qp->id); -+ qp->rq.wr_tbl[idx] = wr->wr_id; -+ -+ if (wr->num_sge == 0) { -+ rqe->length = 0; -+ } else if (wr->num_sge == 1) { -+ rqe->stag = htole32(wr->sg_list[0].lkey); -+ rqe->to = htole64(wr->sg_list[0].addr); -+ rqe->length = htole32(wr->sg_list[0].length); -+ } else { -+ return -EINVAL; -+ } -+ -+ *(__le64 *)qp->rq.db_record = *(__le64 *)rqe; -+ udma_to_device_barrier(); -+ mmio_write64_le(qp->rq.db, *(__le64 *)rqe); -+ -+ qp->rq.pi = rq_pi + 1; -+ -+ return 0; -+} -+ -+int erdma_post_recv(struct ibv_qp *ibqp, struct ibv_recv_wr *wr, -+ struct ibv_recv_wr **bad_wr) -+{ -+ struct erdma_qp *qp = to_eqp(ibqp); -+ int ret = 0; -+ -+ if (ibqp->state == IBV_QPS_ERR) { -+ *bad_wr = wr; -+ return -EIO; -+ } -+ -+ pthread_spin_lock(&qp->rq_lock); -+ -+ while (wr) { -+ ret = push_recv_wqe(qp, wr); -+ if (ret) { -+ *bad_wr = wr; -+ break; -+ } -+ -+ wr = wr->next; -+ } -+ -+ pthread_spin_unlock(&qp->rq_lock); -+ -+ return ret; -+} -+ -+void erdma_cq_event(struct ibv_cq *ibcq) -+{ -+ struct erdma_cq *cq = to_ecq(ibcq); -+ -+ cq->cmdsn++; -+} -+ -+static void *get_next_valid_cqe(struct erdma_cq *cq) -+{ -+ struct erdma_cqe *cqe = cq->queue + (cq->ci & (cq->depth - 1)); -+ uint32_t owner = FIELD_GET(ERDMA_CQE_HDR_OWNER_MASK, be32toh(cqe->hdr)); -+ -+ return owner ^ !!(cq->ci & cq->depth) ? cqe : NULL; -+} -+ -+static const enum ibv_wc_opcode wc_mapping_table[ERDMA_NUM_OPCODES] = { -+ [ERDMA_OP_WRITE] = IBV_WC_RDMA_WRITE, -+ [ERDMA_OP_READ] = IBV_WC_RDMA_READ, -+ [ERDMA_OP_SEND] = IBV_WC_SEND, -+ [ERDMA_OP_SEND_WITH_IMM] = IBV_WC_SEND, -+ [ERDMA_OP_RECEIVE] = IBV_WC_RECV, -+ [ERDMA_OP_RECV_IMM] = IBV_WC_RECV_RDMA_WITH_IMM, -+ [ERDMA_OP_RECV_INV] = IBV_WC_RECV, -+ [ERDMA_OP_WRITE_WITH_IMM] = IBV_WC_RDMA_WRITE, -+ [ERDMA_OP_INVALIDATE] = IBV_WC_LOCAL_INV, -+ [ERDMA_OP_RSP_SEND_IMM] = IBV_WC_RECV, -+ [ERDMA_OP_SEND_WITH_INV] = IBV_WC_SEND, -+ [ERDMA_OP_READ_WITH_INV] = IBV_WC_RDMA_READ, -+ [ERDMA_OP_ATOMIC_CAS] = IBV_WC_COMP_SWAP, -+ [ERDMA_OP_ATOMIC_FAD] = IBV_WC_FETCH_ADD, -+}; -+ -+static const struct { -+ enum erdma_wc_status erdma; -+ enum ibv_wc_status base; -+ enum erdma_vendor_err vendor; -+} map_cqe_status[ERDMA_NUM_WC_STATUS] = { -+ { ERDMA_WC_SUCCESS, IBV_WC_SUCCESS, ERDMA_WC_VENDOR_NO_ERR }, -+ { ERDMA_WC_GENERAL_ERR, IBV_WC_GENERAL_ERR, ERDMA_WC_VENDOR_NO_ERR }, -+ { ERDMA_WC_RECV_WQE_FORMAT_ERR, IBV_WC_GENERAL_ERR, -+ ERDMA_WC_VENDOR_INVALID_RQE }, -+ { ERDMA_WC_RECV_STAG_INVALID_ERR, IBV_WC_REM_ACCESS_ERR, -+ ERDMA_WC_VENDOR_RQE_INVALID_STAG }, -+ { ERDMA_WC_RECV_ADDR_VIOLATION_ERR, IBV_WC_REM_ACCESS_ERR, -+ ERDMA_WC_VENDOR_RQE_ADDR_VIOLATION }, -+ { ERDMA_WC_RECV_RIGHT_VIOLATION_ERR, IBV_WC_REM_ACCESS_ERR, -+ ERDMA_WC_VENDOR_RQE_ACCESS_RIGHT_ERR }, -+ { ERDMA_WC_RECV_PDID_ERR, IBV_WC_REM_ACCESS_ERR, -+ ERDMA_WC_VENDOR_RQE_INVALID_PD }, -+ { ERDMA_WC_RECV_WARRPING_ERR, IBV_WC_REM_ACCESS_ERR, -+ ERDMA_WC_VENDOR_RQE_WRAP_ERR }, -+ { ERDMA_WC_SEND_WQE_FORMAT_ERR, IBV_WC_LOC_QP_OP_ERR, -+ ERDMA_WC_VENDOR_INVALID_SQE }, -+ { ERDMA_WC_SEND_WQE_ORD_EXCEED, IBV_WC_GENERAL_ERR, -+ ERDMA_WC_VENDOR_ZERO_ORD }, -+ { ERDMA_WC_SEND_STAG_INVALID_ERR, IBV_WC_LOC_ACCESS_ERR, -+ ERDMA_WC_VENDOR_SQE_INVALID_STAG }, -+ { ERDMA_WC_SEND_ADDR_VIOLATION_ERR, IBV_WC_LOC_ACCESS_ERR, -+ ERDMA_WC_VENDOR_SQE_ADDR_VIOLATION }, -+ { ERDMA_WC_SEND_RIGHT_VIOLATION_ERR, IBV_WC_LOC_ACCESS_ERR, -+ ERDMA_WC_VENDOR_SQE_ACCESS_ERR }, -+ { ERDMA_WC_SEND_PDID_ERR, IBV_WC_LOC_ACCESS_ERR, -+ ERDMA_WC_VENDOR_SQE_INVALID_PD }, -+ { ERDMA_WC_SEND_WARRPING_ERR, IBV_WC_LOC_ACCESS_ERR, -+ ERDMA_WC_VENDOR_SQE_WARP_ERR }, -+ { ERDMA_WC_FLUSH_ERR, IBV_WC_WR_FLUSH_ERR, ERDMA_WC_VENDOR_NO_ERR }, -+ { ERDMA_WC_RETRY_EXC_ERR, IBV_WC_RETRY_EXC_ERR, -+ ERDMA_WC_VENDOR_NO_ERR }, -+}; -+ -+#define ERDMA_POLLCQ_NO_QP (-1) -+#define ERDMA_POLLCQ_DUP_COMP (-2) -+#define ERDMA_POLLCQ_WRONG_IDX (-3) -+ -+static int __erdma_poll_one_cqe(struct erdma_context *ctx, struct erdma_cq *cq, -+ struct ibv_wc *wc) -+{ -+ uint32_t cqe_hdr, opcode, syndrome, qpn; -+ uint16_t depth, wqe_idx, old_ci, new_ci; -+ uint64_t *sqe_hdr, *qeidx2wrid; -+ uint32_t tbl_idx, tbl_off; -+ struct erdma_cqe *cqe; -+ struct erdma_qp *qp; -+ -+ cqe = get_next_valid_cqe(cq); -+ if (!cqe) -+ return -EAGAIN; -+ -+ cq->ci++; -+ udma_from_device_barrier(); -+ -+ cqe_hdr = be32toh(cqe->hdr); -+ syndrome = FIELD_GET(ERDMA_CQE_HDR_SYNDROME_MASK, cqe_hdr); -+ opcode = FIELD_GET(ERDMA_CQE_HDR_OPCODE_MASK, cqe_hdr); -+ qpn = be32toh(cqe->qpn); -+ wqe_idx = be32toh(cqe->qe_idx); -+ -+ tbl_idx = qpn >> ERDMA_QP_TABLE_SHIFT; -+ tbl_off = qpn & ERDMA_QP_TABLE_MASK; -+ -+ if (!ctx->qp_table[tbl_idx].table || -+ !ctx->qp_table[tbl_idx].table[tbl_off]) -+ return ERDMA_POLLCQ_NO_QP; -+ -+ qp = ctx->qp_table[tbl_idx].table[tbl_off]; -+ -+ if (FIELD_GET(ERDMA_CQE_HDR_QTYPE_MASK, cqe_hdr) == -+ ERDMA_CQE_QTYPE_SQ) { -+ qeidx2wrid = qp->sq.wr_tbl; -+ depth = qp->sq.depth; -+ sqe_hdr = get_sq_wqebb(qp, wqe_idx); -+ old_ci = qp->sq.ci; -+ new_ci = wqe_idx + -+ FIELD_GET(ERDMA_SQE_HDR_WQEBB_CNT_MASK, *sqe_hdr) + 1; -+ -+ if ((uint16_t)(new_ci - old_ci) > depth) -+ return ERDMA_POLLCQ_WRONG_IDX; -+ else if (new_ci == old_ci) -+ return ERDMA_POLLCQ_DUP_COMP; -+ -+ qp->sq.ci = new_ci; -+ } else { -+ qeidx2wrid = qp->rq.wr_tbl; -+ depth = qp->rq.depth; -+ qp->rq.ci++; -+ } -+ -+ wc->wr_id = qeidx2wrid[wqe_idx & (depth - 1)]; -+ wc->byte_len = be32toh(cqe->size); -+ wc->wc_flags = 0; -+ -+ wc->opcode = wc_mapping_table[opcode]; -+ if (opcode == ERDMA_OP_RECV_IMM || opcode == ERDMA_OP_RSP_SEND_IMM) { -+ wc->imm_data = htobe32(le32toh(cqe->imm_data)); -+ wc->wc_flags |= IBV_WC_WITH_IMM; -+ } -+ -+ if (syndrome >= ERDMA_NUM_WC_STATUS) -+ syndrome = ERDMA_WC_GENERAL_ERR; -+ -+ wc->status = map_cqe_status[syndrome].base; -+ wc->vendor_err = map_cqe_status[syndrome].vendor; -+ wc->qp_num = qpn; -+ -+ return 0; -+} -+ -+int erdma_poll_cq(struct ibv_cq *ibcq, int num_entries, struct ibv_wc *wc) -+{ -+ struct erdma_context *ctx = to_ectx(ibcq->context); -+ struct erdma_cq *cq = to_ecq(ibcq); -+ int ret, npolled = 0; -+ -+ pthread_spin_lock(&cq->lock); -+ -+ while (npolled < num_entries) { -+ ret = __erdma_poll_one_cqe(ctx, cq, wc + npolled); -+ if (ret == -EAGAIN) /* CQ is empty, break the loop. */ -+ break; -+ else if (ret) /* We handle the polling error silently. */ -+ continue; -+ npolled++; -+ } -+ -+ pthread_spin_unlock(&cq->lock); -+ -+ return npolled; -+} -+ -+void erdma_free_context(struct ibv_context *ibv_ctx) -+{ -+ struct erdma_context *ctx = to_ectx(ibv_ctx); -+ int i; -+ -+ munmap(aligned_address(ctx->sdb, ctx->page_size), ctx->page_size); -+ munmap(aligned_address(ctx->rdb, ctx->page_size), ctx->page_size); -+ munmap(aligned_address(ctx->cdb, ctx->page_size), ctx->page_size); -+ -+ pthread_mutex_lock(&ctx->qp_table_mutex); -+ for (i = 0; i < ERDMA_QP_TABLE_SIZE; ++i) { -+ if (ctx->qp_table[i].refcnt) -+ free(ctx->qp_table[i].table); -+ } -+ -+ pthread_mutex_unlock(&ctx->qp_table_mutex); -+ pthread_mutex_destroy(&ctx->qp_table_mutex); -+ -+ verbs_uninit_context(&ctx->ibv_ctx); -+ free(ctx); -+} --- -2.37.0 - diff --git a/1003-providers-erdma-Add-the-main-module-of-erdma.patch b/1003-providers-erdma-Add-the-main-module-of-erdma.patch deleted file mode 100644 index 41f02e3..0000000 --- a/1003-providers-erdma-Add-the-main-module-of-erdma.patch +++ /dev/null @@ -1,326 +0,0 @@ -From ccacbfd79f17b3955acff68135e029433b2072d6 Mon Sep 17 00:00:00 2001 -Message-Id: -In-Reply-To: -References: -From: Cheng Xu -Date: Thu, 1 Dec 2022 15:40:48 +0800 -Subject: [PATCH 3/4] providers/erdma: Add the main module of erdma - -Add the definitions of erdma provider driver, and add the application -interface to core, so that core can recognize erdma provider. - -Signed-off-by: Cheng Xu ---- - libibverbs/verbs.h | 8 +- - providers/erdma/erdma.c | 149 ++++++++++++++++++++++++++++++++++++ - providers/erdma/erdma.h | 79 +++++++++++++++++++ - providers/erdma/erdma_abi.h | 21 +++++ - 4 files changed, 254 insertions(+), 3 deletions(-) - create mode 100644 providers/erdma/erdma.c - create mode 100644 providers/erdma/erdma.h - create mode 100644 providers/erdma/erdma_abi.h - -diff --git a/libibverbs/verbs.h b/libibverbs/verbs.h -index 36b4142517c0..b2f2b4b07be6 100644 ---- a/libibverbs/verbs.h -+++ b/libibverbs/verbs.h -@@ -2190,7 +2190,7 @@ struct ibv_device **ibv_get_device_list(int *num_devices); - */ - #ifdef RDMA_STATIC_PROVIDERS - #define _RDMA_STATIC_PREFIX_(_1, _2, _3, _4, _5, _6, _7, _8, _9, _10, _11, \ -- _12, _13, _14, _15, _16, _17, ...) \ -+ _12, _13, _14, _15, _16, _17, _18, ...) \ - &verbs_provider_##_1, &verbs_provider_##_2, &verbs_provider_##_3, \ - &verbs_provider_##_4, &verbs_provider_##_5, \ - &verbs_provider_##_6, &verbs_provider_##_7, \ -@@ -2198,16 +2198,18 @@ struct ibv_device **ibv_get_device_list(int *num_devices); - &verbs_provider_##_10, &verbs_provider_##_11, \ - &verbs_provider_##_12, &verbs_provider_##_13, \ - &verbs_provider_##_14, &verbs_provider_##_15, \ -- &verbs_provider_##_16, &verbs_provider_##_17 -+ &verbs_provider_##_16, &verbs_provider_##_17, \ -+ &verbs_provider_##_18 - #define _RDMA_STATIC_PREFIX(arg) \ - _RDMA_STATIC_PREFIX_(arg, none, none, none, none, none, none, none, \ - none, none, none, none, none, none, none, none, \ -- none) -+ none, none) - - struct verbs_devices_ops; - extern const struct verbs_device_ops verbs_provider_bnxt_re; - extern const struct verbs_device_ops verbs_provider_cxgb4; - extern const struct verbs_device_ops verbs_provider_efa; -+extern const struct verbs_device_ops verbs_provider_erdma; - extern const struct verbs_device_ops verbs_provider_hfi1verbs; - extern const struct verbs_device_ops verbs_provider_hns; - extern const struct verbs_device_ops verbs_provider_ipathverbs; -diff --git a/providers/erdma/erdma.c b/providers/erdma/erdma.c -new file mode 100644 -index 000000000000..440314599cf1 ---- /dev/null -+++ b/providers/erdma/erdma.c -@@ -0,0 +1,149 @@ -+// SPDX-License-Identifier: GPL-2.0 or OpenIB.org BSD (MIT) See COPYING file -+ -+// Authors: Cheng Xu -+// Copyright (c) 2020-2021, Alibaba Group. -+ -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+ -+#include "erdma.h" -+#include "erdma_abi.h" -+#include "erdma_hw.h" -+#include "erdma_verbs.h" -+ -+static const struct verbs_context_ops erdma_context_ops = { -+ .alloc_pd = erdma_alloc_pd, -+ .cq_event = erdma_cq_event, -+ .create_cq = erdma_create_cq, -+ .create_qp = erdma_create_qp, -+ .create_qp_ex = erdma_create_qp_ex, -+ .dealloc_pd = erdma_free_pd, -+ .dereg_mr = erdma_dereg_mr, -+ .destroy_cq = erdma_destroy_cq, -+ .destroy_qp = erdma_destroy_qp, -+ .free_context = erdma_free_context, -+ .modify_qp = erdma_modify_qp, -+ .poll_cq = erdma_poll_cq, -+ .post_recv = erdma_post_recv, -+ .post_send = erdma_post_send, -+ .query_device_ex = erdma_query_device, -+ .query_port = erdma_query_port, -+ .query_qp = erdma_query_qp, -+ .reg_mr = erdma_reg_mr, -+ .req_notify_cq = erdma_notify_cq, -+}; -+ -+static struct verbs_context *erdma_alloc_context(struct ibv_device *device, -+ int cmd_fd, void *private_data) -+{ -+ struct erdma_cmd_alloc_context_resp resp = {}; -+ struct ibv_get_context cmd = {}; -+ struct erdma_context *ctx; -+ int i; -+ -+ ctx = verbs_init_and_alloc_context(device, cmd_fd, ctx, ibv_ctx, -+ RDMA_DRIVER_ERDMA); -+ if (!ctx) -+ return NULL; -+ -+ pthread_mutex_init(&ctx->qp_table_mutex, NULL); -+ for (i = 0; i < ERDMA_QP_TABLE_SIZE; ++i) -+ ctx->qp_table[i].refcnt = 0; -+ -+ if (ibv_cmd_get_context(&ctx->ibv_ctx, &cmd, sizeof(cmd), -+ &resp.ibv_resp, sizeof(resp))) -+ goto err_out; -+ -+ verbs_set_ops(&ctx->ibv_ctx, &erdma_context_ops); -+ ctx->page_size = to_edev(device)->page_size; -+ ctx->dev_id = resp.dev_id; -+ -+ ctx->sdb_type = resp.sdb_type; -+ ctx->sdb_entid = resp.sdb_entid; -+ -+ ctx->sdb = mmap(NULL, ctx->page_size, PROT_WRITE, MAP_SHARED, cmd_fd, -+ resp.sdb); -+ if (ctx->sdb == MAP_FAILED) -+ goto err_out; -+ -+ ctx->sdb += resp.sdb_off; -+ -+ ctx->rdb = mmap(NULL, ctx->page_size, PROT_WRITE, MAP_SHARED, cmd_fd, -+ resp.rdb); -+ if (ctx->rdb == MAP_FAILED) -+ goto err_rdb_map; -+ -+ ctx->rdb += resp.rdb_off; -+ -+ ctx->cdb = mmap(NULL, ctx->page_size, PROT_WRITE, MAP_SHARED, cmd_fd, -+ resp.cdb); -+ if (ctx->cdb == MAP_FAILED) -+ goto err_cdb_map; -+ -+ ctx->cdb += resp.cdb_off; -+ -+ ctx->dbrecord_pages = NULL; -+ pthread_mutex_init(&ctx->dbrecord_pages_mutex, NULL); -+ -+ for (i = 0; i < 32; i++) -+ pthread_spin_init(&ctx->sdb_lock[i].lock, -+ PTHREAD_PROCESS_PRIVATE); -+ -+ return &ctx->ibv_ctx; -+ -+err_cdb_map: -+ munmap(aligned_address(ctx->rdb, ctx->page_size), ctx->page_size); -+err_rdb_map: -+ munmap(aligned_address(ctx->sdb, ctx->page_size), ctx->page_size); -+err_out: -+ verbs_uninit_context(&ctx->ibv_ctx); -+ free(ctx); -+ -+ return NULL; -+} -+ -+static struct verbs_device * -+erdma_device_alloc(struct verbs_sysfs_dev *sysfs_dev) -+{ -+ struct erdma_device *dev; -+ -+ dev = calloc(1, sizeof(*dev)); -+ if (!dev) -+ return NULL; -+ -+ dev->page_size = sysconf(_SC_PAGESIZE); -+ -+ return &dev->ibv_dev; -+} -+ -+static void erdma_device_free(struct verbs_device *vdev) -+{ -+ struct erdma_device *dev = -+ container_of(vdev, struct erdma_device, ibv_dev); -+ -+ free(dev); -+} -+ -+static const struct verbs_match_ent match_table[] = { -+ VERBS_DRIVER_ID(RDMA_DRIVER_ERDMA), -+ VERBS_PCI_MATCH(PCI_VENDOR_ID_ALIBABA, 0x107f, NULL), -+ {}, -+}; -+ -+static const struct verbs_device_ops erdma_dev_ops = { -+ .name = "erdma", -+ .match_min_abi_version = 0, -+ .match_max_abi_version = ERDMA_ABI_VERSION, -+ .match_table = match_table, -+ .alloc_device = erdma_device_alloc, -+ .uninit_device = erdma_device_free, -+ .alloc_context = erdma_alloc_context, -+}; -+ -+PROVIDER_DRIVER(erdma, erdma_dev_ops); -diff --git a/providers/erdma/erdma.h b/providers/erdma/erdma.h -new file mode 100644 -index 000000000000..41bca6e7915f ---- /dev/null -+++ b/providers/erdma/erdma.h -@@ -0,0 +1,79 @@ -+/* SPDX-License-Identifier: GPL-2.0 or OpenIB.org BSD (MIT) See COPYING file */ -+/* -+ * Authors: Cheng Xu -+ * Copyright (c) 2020-2021, Alibaba Group. -+ */ -+ -+#ifndef __ERDMA_H__ -+#define __ERDMA_H__ -+ -+#include -+#include -+#include -+ -+#include -+#include -+#include -+ -+#ifndef PCI_VENDOR_ID_ALIBABA -+#define PCI_VENDOR_ID_ALIBABA 0x1ded -+#endif -+ -+#define ERDMA_PAGE_SIZE 4096 -+#define ERDMA_PAGE_SHIFT 12 -+#define ERDMA_SIZE_TO_NPAGE(size) \ -+ (((size) + ERDMA_PAGE_SIZE - 1) >> ERDMA_PAGE_SHIFT) -+ -+struct erdma_device { -+ struct verbs_device ibv_dev; -+ uint32_t page_size; -+}; -+ -+struct erdma_aligned_lock_t { -+ pthread_spinlock_t lock; -+} __attribute__((__packed__)) __attribute__((__aligned__(64))); -+ -+#define ERDMA_QP_TABLE_SIZE 4096 -+#define ERDMA_QP_TABLE_SHIFT 12 -+#define ERDMA_QP_TABLE_MASK 0xFFF -+ -+struct erdma_context { -+ struct verbs_context ibv_ctx; -+ uint32_t dev_id; -+ -+ struct { -+ struct erdma_qp **table; -+ int refcnt; -+ } qp_table[ERDMA_QP_TABLE_SIZE]; -+ pthread_mutex_t qp_table_mutex; -+ -+ uint8_t sdb_type; -+ uint32_t sdb_entid; -+ -+ void *sdb; -+ void *rdb; -+ void *cdb; -+ -+ uint32_t page_size; -+ pthread_mutex_t dbrecord_pages_mutex; -+ struct erdma_dbrecord_page *dbrecord_pages; -+ -+ struct erdma_aligned_lock_t sdb_lock[32]; -+}; -+ -+static inline struct erdma_context *to_ectx(struct ibv_context *base) -+{ -+ return container_of(base, struct erdma_context, ibv_ctx.context); -+} -+ -+static inline struct erdma_device *to_edev(struct ibv_device *ibv_dev) -+{ -+ return container_of(ibv_dev, struct erdma_device, ibv_dev.device); -+} -+ -+static inline void *aligned_address(void *addr, uint32_t align) -+{ -+ return addr - ((uintptr_t)addr & (align - 1)); -+} -+ -+#endif -diff --git a/providers/erdma/erdma_abi.h b/providers/erdma/erdma_abi.h -new file mode 100644 -index 000000000000..184804711d9b ---- /dev/null -+++ b/providers/erdma/erdma_abi.h -@@ -0,0 +1,21 @@ -+/* SPDX-License-Identifier: GPL-2.0 or OpenIB.org BSD (MIT) See COPYING file */ -+/* -+ * Authors: Cheng Xu -+ * Copyright (c) 2020-2021, Alibaba Group. -+ */ -+ -+#ifndef __ERDMA_ABI_H__ -+#define __ERDMA_ABI_H__ -+ -+#include -+#include -+#include -+ -+DECLARE_DRV_CMD(erdma_cmd_alloc_context, IB_USER_VERBS_CMD_GET_CONTEXT, empty, -+ erdma_uresp_alloc_ctx); -+DECLARE_DRV_CMD(erdma_cmd_create_cq, IB_USER_VERBS_CMD_CREATE_CQ, -+ erdma_ureq_create_cq, erdma_uresp_create_cq); -+DECLARE_DRV_CMD(erdma_cmd_create_qp, IB_USER_VERBS_CMD_CREATE_QP, -+ erdma_ureq_create_qp, erdma_uresp_create_qp); -+ -+#endif --- -2.37.0 - diff --git a/1004-providers-erdma-Add-to-the-build-environment.patch b/1004-providers-erdma-Add-to-the-build-environment.patch deleted file mode 100644 index faab71e..0000000 --- a/1004-providers-erdma-Add-to-the-build-environment.patch +++ /dev/null @@ -1,245 +0,0 @@ -From 500d8631e9598875063ba5daf3c668928984979c Mon Sep 17 00:00:00 2001 -Message-Id: <500d8631e9598875063ba5daf3c668928984979c.1669880730.git.chengyou@linux.alibaba.com> -In-Reply-To: -References: -From: Cheng Xu -Date: Thu, 1 Dec 2022 15:42:21 +0800 -Subject: [PATCH 4/4] providers/erdma: Add to the build environment - -Make the build system can build the provider, and add erdma to redhat package -environment and debian pkg build environment. - -Signed-off-by: Cheng Xu ---- - CMakeLists.txt | 1 + - MAINTAINERS | 5 +++ - README.md | 1 + - debian/control | 1 + - debian/copyright | 4 ++ - kernel-headers/CMakeLists.txt | 2 + - kernel-headers/rdma/erdma-abi.h | 52 +++++++++++++++++++++++ - kernel-headers/rdma/ib_user_ioctl_verbs.h | 1 + - libibverbs/examples/devinfo.c | 7 ++- - providers/erdma/CMakeLists.txt | 12 ++++++ - redhat/rdma-core.spec | 3 ++ - 11 files changed, 87 insertions(+), 2 deletions(-) - create mode 100644 kernel-headers/rdma/erdma-abi.h - create mode 100644 providers/erdma/CMakeLists.txt - -diff --git a/CMakeLists.txt b/CMakeLists.txt -index 1aecae49a507..bdf24c7fc4cd 100644 ---- a/CMakeLists.txt -+++ b/CMakeLists.txt -@@ -668,6 +668,7 @@ add_subdirectory(providers/bnxt_re) - add_subdirectory(providers/cxgb4) # NO SPARSE - add_subdirectory(providers/efa) - add_subdirectory(providers/efa/man) -+add_subdirectory(providers/erdma) - add_subdirectory(providers/hns) - add_subdirectory(providers/irdma) - add_subdirectory(providers/mlx4) -diff --git a/MAINTAINERS b/MAINTAINERS -index 9fec12406f81..bbeddabbd1ba 100644 ---- a/MAINTAINERS -+++ b/MAINTAINERS -@@ -61,6 +61,11 @@ M: Gal Pressman - S: Supported - F: providers/efa/ - -+ERDMA USERSPACE PROVIDER (for erdma.ko) -+M: Cheng Xu -+S: Supported -+F: providers/erdma/ -+ - HF1 USERSPACE PROVIDER (for hf1.ko) - M: Mike Marciniszyn - M: Dennis Dalessandro -diff --git a/README.md b/README.md -index 18c3b014bdbd..ebb941e2132a 100644 ---- a/README.md -+++ b/README.md -@@ -15,6 +15,7 @@ under the providers/ directory. Support for the following Kernel RDMA drivers - is included: - - - efa.ko -+ - erdma.ko - - iw_cxgb4.ko - - hfi1.ko - - hns-roce.ko -diff --git a/debian/control b/debian/control -index d9fec3a5fbdc..38dc6f710dbc 100644 ---- a/debian/control -+++ b/debian/control -@@ -93,6 +93,7 @@ Description: User space provider drivers for libibverbs - - bnxt_re: Broadcom NetXtreme-E RoCE HCAs - - cxgb4: Chelsio T4 iWARP HCAs - - efa: Amazon Elastic Fabric Adapter -+ - erdma: Alibaba Elastic RDMA (iWarp) Adapter - - hfi1verbs: Intel Omni-Path HFI - - hns: HiSilicon Hip06 SoC - - ipathverbs: QLogic InfiniPath HCAs -diff --git a/debian/copyright b/debian/copyright -index d58aa779a569..ead62adb437a 100644 ---- a/debian/copyright -+++ b/debian/copyright -@@ -156,6 +156,10 @@ Files: providers/efa/* - Copyright: 2019 Amazon.com, Inc. or its affiliates. - License: BSD-2-clause or GPL-2 - -+Files: providers/erdma/* -+Copyright: 2020-2021, Alibaba Group -+License: BSD-MIT or GPL-2 -+ - Files: providers/hfi1verbs/* - Copyright: 2005 PathScale, Inc. - 2006-2009 QLogic Corporation -diff --git a/kernel-headers/CMakeLists.txt b/kernel-headers/CMakeLists.txt -index d9621ee2b940..f30ff2292ace 100644 ---- a/kernel-headers/CMakeLists.txt -+++ b/kernel-headers/CMakeLists.txt -@@ -2,6 +2,7 @@ publish_internal_headers(rdma - rdma/bnxt_re-abi.h - rdma/cxgb4-abi.h - rdma/efa-abi.h -+ rdma/erdma-abi.h - rdma/hns-abi.h - rdma/ib_user_ioctl_cmds.h - rdma/ib_user_ioctl_verbs.h -@@ -64,6 +65,7 @@ rdma_kernel_provider_abi( - rdma/bnxt_re-abi.h - rdma/cxgb4-abi.h - rdma/efa-abi.h -+ rdma/erdma-abi.h - rdma/hns-abi.h - rdma/ib_user_verbs.h - rdma/irdma-abi.h -diff --git a/kernel-headers/rdma/erdma-abi.h b/kernel-headers/rdma/erdma-abi.h -new file mode 100644 -index 000000000000..455046415983 ---- /dev/null -+++ b/kernel-headers/rdma/erdma-abi.h -@@ -0,0 +1,52 @@ -+/* SPDX-License-Identifier: ((GPL-2.0 WITH Linux-syscall-note) OR Linux-OpenIB) */ -+/* -+ * Copyright (c) 2020-2022, Alibaba Group. -+ */ -+ -+#ifndef __ERDMA_USER_H__ -+#define __ERDMA_USER_H__ -+ -+#include -+ -+#define ERDMA_ABI_VERSION 1 -+ -+struct erdma_ureq_create_cq { -+ __aligned_u64 db_record_va; -+ __aligned_u64 qbuf_va; -+ __u32 qbuf_len; -+ __u32 rsvd0; -+}; -+ -+struct erdma_uresp_create_cq { -+ __u32 cq_id; -+ __u32 num_cqe; -+}; -+ -+struct erdma_ureq_create_qp { -+ __aligned_u64 db_record_va; -+ __aligned_u64 qbuf_va; -+ __u32 qbuf_len; -+ __u32 rsvd0; -+}; -+ -+struct erdma_uresp_create_qp { -+ __u32 qp_id; -+ __u32 num_sqe; -+ __u32 num_rqe; -+ __u32 rq_offset; -+}; -+ -+struct erdma_uresp_alloc_ctx { -+ __u32 dev_id; -+ __u32 pad; -+ __u32 sdb_type; -+ __u32 sdb_entid; -+ __aligned_u64 sdb; -+ __aligned_u64 rdb; -+ __aligned_u64 cdb; -+ __u32 sdb_off; -+ __u32 rdb_off; -+ __u32 cdb_off; -+}; -+ -+#endif -diff --git a/kernel-headers/rdma/ib_user_ioctl_verbs.h b/kernel-headers/rdma/ib_user_ioctl_verbs.h -index 3072e5d6b692..7dd56210226f 100644 ---- a/kernel-headers/rdma/ib_user_ioctl_verbs.h -+++ b/kernel-headers/rdma/ib_user_ioctl_verbs.h -@@ -250,6 +250,7 @@ enum rdma_driver_id { - RDMA_DRIVER_QIB, - RDMA_DRIVER_EFA, - RDMA_DRIVER_SIW, -+ RDMA_DRIVER_ERDMA, - }; - - enum ib_uverbs_gid_type { -diff --git a/libibverbs/examples/devinfo.c b/libibverbs/examples/devinfo.c -index 5db568b08040..f7680a96a2a1 100644 ---- a/libibverbs/examples/devinfo.c -+++ b/libibverbs/examples/devinfo.c -@@ -521,8 +521,11 @@ static int print_hca_cap(struct ibv_device *ib_dev, uint8_t ib_port) - } - - printf("hca_id:\t%s\n", ibv_get_device_name(ib_dev)); -- printf("\ttransport:\t\t\t%s (%d)\n", -- transport_str(ib_dev->transport_type), ib_dev->transport_type); -+ if (device_attr.orig_attr.vendor_id != 0x1ded) -+ printf("\ttransport:\t\t\t%s (%d)\n", -+ transport_str(ib_dev->transport_type), ib_dev->transport_type); -+ else -+ printf("\ttransport:\t\t\t%s\n", "eRDMA"); - if (strlen(device_attr.orig_attr.fw_ver)) - printf("\tfw_ver:\t\t\t\t%s\n", device_attr.orig_attr.fw_ver); - printf("\tnode_guid:\t\t\t%s\n", guid_str(device_attr.orig_attr.node_guid, buf)); -diff --git a/providers/erdma/CMakeLists.txt b/providers/erdma/CMakeLists.txt -new file mode 100644 -index 000000000000..eeb40f79a08e ---- /dev/null -+++ b/providers/erdma/CMakeLists.txt -@@ -0,0 +1,12 @@ -+rdma_provider(erdma -+ erdma.c -+ erdma_db.c -+ erdma_verbs.c -+) -+ -+if (${CMAKE_SYSTEM_PROCESSOR} STREQUAL "x86_64") -+ set(ERDMA_PRIV_FLAGS "-msse4.1 -msse4.2 -mavx2 -Werror") -+ add_definitions(-DHAVE_AVX_SUPPORT) -+endif() -+ -+set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} ${ERDMA_PRIV_FLAGS}") -diff --git a/redhat/rdma-core.spec b/redhat/rdma-core.spec -index 55b2d608b29b..32e9bf77e8ca 100644 ---- a/redhat/rdma-core.spec -+++ b/redhat/rdma-core.spec -@@ -148,6 +148,8 @@ Provides: libcxgb4 = %{version}-%{release} - Obsoletes: libcxgb4 < %{version}-%{release} - Provides: libefa = %{version}-%{release} - Obsoletes: libefa < %{version}-%{release} -+Provides: liberdma = %{version}-%{release} -+Obsoletes: liberdma < %{version}-%{release} - Provides: libhfi1 = %{version}-%{release} - Obsoletes: libhfi1 < %{version}-%{release} - Provides: libipathverbs = %{version}-%{release} -@@ -176,6 +178,7 @@ Device-specific plug-in ibverbs userspace drivers are included: - - - libcxgb4: Chelsio T4 iWARP HCA - - libefa: Amazon Elastic Fabric Adapter -+- liberdma: Alibaba Elastic RDMA (iWarp) Adapter - - libhfi1: Intel Omni-Path HFI - - libhns: HiSilicon Hip06 SoC - - libipathverbs: QLogic InfiniPath HCA --- -2.37.0 - diff --git a/9998-kernel-boot-Do-not-perform-device-rename-on-OPA-devi.patch b/9998-kernel-boot-Do-not-perform-device-rename-on-OPA-devi.patch new file mode 100644 index 0000000..7ee61e6 --- /dev/null +++ b/9998-kernel-boot-Do-not-perform-device-rename-on-OPA-devi.patch @@ -0,0 +1,42 @@ +From 1c63f25b55ca4f5317e1c85b548469bbc747e147 Mon Sep 17 00:00:00 2001 +From: "Goldman, Adam" +Date: Tue, 4 Feb 2020 08:55:20 -0500 +Subject: [PATCH] kernel-boot: Do not perform device rename on OPA devices + +PSM2 will not run with recent rdma-core releases. Several tools and +libraries like PSM2, require the hfi1 name to be present. + +Recent rdma-core releases added a new feature to rename kernel devices, +but the default configuration will not work with hfi1 fabrics. + +Related opa-psm2 github issue: + https://github.com/intel/opa-psm2/issues/43 + +Fixes: 5b4099d47be3 ("kernel-boot: Perform device rename to make stable names") +Reviewed-by: Mike Marciniszyn +Signed-off-by: Goldman, Adam +--- + kernel-boot/rdma-persistent-naming.rules | 4 ++-- + 1 file changed, 2 insertions(+), 2 deletions(-) + +diff --git a/kernel-boot/rdma-persistent-naming.rules b/kernel-boot/rdma-persistent-naming.rules +index 6f9c53a5..3ce34ea9 100644 +--- a/kernel-boot/rdma-persistent-naming.rules ++++ b/kernel-boot/rdma-persistent-naming.rules +@@ -26,10 +26,10 @@ + # Device type = RoCE + # mlx5_0 -> rocex525400c0fe123455 + # +-ACTION=="add", SUBSYSTEM=="infiniband", PROGRAM="rdma_rename %k NAME_FALLBACK" ++ACTION=="add", SUBSYSTEM=="infiniband", KERNEL!="hfi1*", PROGRAM="rdma_rename %k NAME_FALLBACK" + + # Example: + # * NAME_FIXED + # fixed name for specific board_id + # +-#ACTION=="add", ATTR{board_id}=="MSF0010110035", SUBSYSTEM=="infiniband", PROGRAM="rdma_rename %k NAME_FIXED myib" +\ No newline at end of file ++#ACTION=="add", ATTR{board_id}=="MSF0010110035", SUBSYSTEM=="infiniband", PROGRAM="rdma_rename %k NAME_FIXED myib" +-- +2.30.1 + diff --git a/udev-keep-NAME_KERNEL-as-default-interface-naming-co.patch b/9999-udev-keep-NAME_KERNEL-as-default-interface-naming-co.patch similarity index 100% rename from udev-keep-NAME_KERNEL-as-default-interface-naming-co.patch rename to 9999-udev-keep-NAME_KERNEL-as-default-interface-naming-co.patch diff --git a/dist b/dist new file mode 100644 index 0000000..9c0e36e --- /dev/null +++ b/dist @@ -0,0 +1 @@ +an8 diff --git a/download b/download index 91ee9dc..510e450 100644 --- a/download +++ b/download @@ -1 +1 @@ -efc96cfed2ee6898c7a6f234c185f87a rdma-core-37.2.tar.gz +ce0e0c383d682b41e31ac8fe2139e62f rdma-core-44.0.tar.gz diff --git a/rdma-core.spec b/rdma-core.spec index ab2c930..1c7fae1 100644 --- a/rdma-core.spec +++ b/rdma-core.spec @@ -1,7 +1,6 @@ -%define anolis_release .0.3 Name: rdma-core -Version: 37.2 -Release: 1%{anolis_release}%{?dist} +Version: 44.0 +Release: 2%{?dist}.1 Summary: RDMA core userspace libraries and daemons # Almost everything is licensed under the OFA dual GPLv2, 2 Clause BSD license @@ -17,17 +16,16 @@ Source1: ibdev2netdev # are extracted from libibverbs-26.0-8.el8 . Source2: rxe_cfg Source3: rxe_cfg.8.gz -Patch3: udev-keep-NAME_KERNEL-as-default-interface-naming-co.patch +# 0001-0003: https://github.com/linux-rdma/rdma-core/pull/1308 +Patch1: 0001-util-fix-overflow-in-remap_node_name.patch +Patch2: 0002-infiniband-diags-drop-unnecessary-nodedesc-local-cop.patch +Patch3: 0003-libibnetdisc-fix-printing-a-possibly-non-NUL-termina.patch # RHEL specific patch for OPA ibacm plugin Patch300: 0001-ibacm-acm.c-load-plugin-while-it-is-soft-link.patch - -Patch301: 0001-tests-Fix-comparing-qp_state-for-iWARP-providers.patch -# ERDMA provider support -Patch1000: 1001-providers-erdma-Add-userspace-verbs-related-header-f.patch -Patch1001: 1002-providers-erdma-Add-userspace-verbs-implementation.patch -Patch1002: 1003-providers-erdma-Add-the-main-module-of-erdma.patch -Patch1003: 1004-providers-erdma-Add-to-the-build-environment.patch - +Patch301: 0002-systemd-drop-Protect-options-not-supported-in-RHEL-8.patch +Patch9000: 0003-CMakeLists-disable-providers-that-were-not-enabled-i.patch +Patch9998: 9998-kernel-boot-Do-not-perform-device-rename-on-OPA-devi.patch +Patch9999: 9999-udev-keep-NAME_KERNEL-as-default-interface-naming-co.patch # Do not build static libs by default. %define with_static %{?_with_static: 1} %{?!_with_static: 0} @@ -91,14 +89,12 @@ BuildRequires: make %define cmake_install DESTDIR=%{buildroot} make install %endif -%ifnarch loongarch64 BuildRequires: pandoc -%endif %description -RDMA core userspace infrastructure and documentation, including kernel -driver-specific modprobe override configs, IPoIB network scripts, -dracut rules, and the rdma-ndd utility. +RDMA core userspace infrastructure and documentation, including initialization +scripts, kernel driver-specific modprobe override configs, IPoIB network +scripts, dracut rules, and the rdma-ndd utility. %package devel Summary: RDMA core development libraries and headers @@ -118,6 +114,13 @@ Provides: infiniband-diags-devel = %{version}-%{release} Obsoletes: infiniband-diags-devel < %{version}-%{release} Provides: libibmad-devel = %{version}-%{release} Obsoletes: libibmad-devel < %{version}-%{release} +%if %{with_static} +# Since our pkg-config files include private references to these packages they +# need to have their .pc files installed too, even for dynamic linking, or +# pkg-config breaks. +BuildRequires: pkgconfig(libnl-3.0) +BuildRequires: pkgconfig(libnl-route-3.0) +%endif %description devel RDMA core development libraries and headers. @@ -262,13 +265,18 @@ easy, object-oriented access to IB verbs. %prep %setup -q +%patch1 -p1 +%patch2 -p1 %patch3 -p1 %patch300 -p1 %patch301 -p1 -%patch1000 -p1 -%patch1001 -p1 -%patch1002 -p1 -%patch1003 -p1 +%if 0%{?fedora} +%patch9998 -p1 +%endif +%if 0%{?rhel} +%patch9000 -p1 +%patch9999 -p1 +%endif %build @@ -323,7 +331,6 @@ mkdir -p %{buildroot}/%{_sysconfdir}/rdma # Red Hat specific glue %global dracutlibdir %{_prefix}/lib/dracut %global sysmodprobedir %{_prefix}/lib/modprobe.d -mkdir -p %{buildroot}%{_sysconfdir}/udev/rules.d mkdir -p %{buildroot}%{_libexecdir} mkdir -p %{buildroot}%{_udevrulesdir} mkdir -p %{buildroot}%{dracutlibdir}/modules.d/05rdma @@ -351,17 +358,11 @@ install -D -m0644 ibacm_opts.cfg %{buildroot}%{_sysconfdir}/rdma/ # Delete the package's init.d scripts rm -rf %{buildroot}/%{_initrddir}/ -# Remove ibverbs provider libs we don't support -rm -f %{buildroot}/%{_libdir}/libibverbs/libcxgb3-rdmav*.so -rm -f %{buildroot}/%{_sysconfdir}/libibverbs.d/cxgb3.driver -rm -f %{buildroot}/%{_libdir}/libibverbs/libocrdma-rdmav*.so -rm -f %{buildroot}/%{_sysconfdir}/libibverbs.d/ocrdma.driver -rm -f %{buildroot}/%{_libdir}/libibverbs/libnes-rdmav*.so -rm -f %{buildroot}/%{_sysconfdir}/libibverbs.d/nes.driver -rm -f %{buildroot}/%{_libdir}/libibverbs/libmthca-rdmav*.so -rm -f %{buildroot}/%{_sysconfdir}/libibverbs.d/mthca.driver -rm -f %{buildroot}/%{_libdir}/libibverbs/libipathverbs-rdmav*.so -rm -f %{buildroot}/%{_sysconfdir}/libibverbs.d/ipathverbs.driver +%ldconfig_scriptlets -n libibverbs + +%ldconfig_scriptlets -n libibumad + +%ldconfig_scriptlets -n librdmacm %post -n rdma-core if [ -x /sbin/udevadm ]; then @@ -370,22 +371,6 @@ if [ -x /sbin/udevadm ]; then /sbin/udevadm trigger --subsystem-match=infiniband_mad --action=change || true fi -%post -n infiniband-diags -p /sbin/ldconfig -%postun -n infiniband-diags -%ldconfig_postun - -%post -n libibverbs -p /sbin/ldconfig -%postun -n libibverbs -%ldconfig_postun - -%post -n libibumad -p /sbin/ldconfig -%postun -n libibumad -%ldconfig_postun - -%post -n librdmacm -p /sbin/ldconfig -%postun -n librdmacm -%ldconfig_postun - %post -n ibacm %systemd_post ibacm.service %preun -n ibacm @@ -410,20 +395,21 @@ fi %files %dir %{_sysconfdir}/rdma %dir %{_docdir}/%{name} +%doc %{_docdir}/%{name}/70-persistent-ipoib.rules %doc %{_docdir}/%{name}/README.md +%doc %{_docdir}/%{name}/rxe.md %doc %{_docdir}/%{name}/udev.md +%doc %{_docdir}/%{name}/tag_matching.md %config(noreplace) %{_sysconfdir}/rdma/mlx4.conf %config(noreplace) %{_sysconfdir}/rdma/modules/infiniband.conf %config(noreplace) %{_sysconfdir}/rdma/modules/iwarp.conf %config(noreplace) %{_sysconfdir}/rdma/modules/opa.conf %config(noreplace) %{_sysconfdir}/rdma/modules/rdma.conf %config(noreplace) %{_sysconfdir}/rdma/modules/roce.conf -%config(noreplace) %{_sysconfdir}/udev/rules.d/* %dir %{_sysconfdir}/modprobe.d %ifnarch s390 %config(noreplace) %{_sysconfdir}/modprobe.d/mlx4.conf %endif -%config(noreplace) %{_sysconfdir}/modprobe.d/truescale.conf %{_unitdir}/rdma-hw.target %{_unitdir}/rdma-load-modules@.service %dir %{dracutlibdir} @@ -441,7 +427,6 @@ fi %dir %{sysmodprobedir} %{sysmodprobedir}/libmlx4.conf %{_libexecdir}/mlx4-setup.sh -%{_libexecdir}/truescale-serdes.cmds %{_sbindir}/rdma-ndd %{_bindir}/ibdev2netdev %{_unitdir}/rdma-ndd.service @@ -555,13 +540,6 @@ fi %endif %config(noreplace) %{_sysconfdir}/libibverbs.d/*.driver %doc %{_docdir}/%{name}/libibverbs.md -%doc %{_docdir}/%{name}/rxe.md -%doc %{_docdir}/%{name}/tag_matching.md -%{_mandir}/man7/rxe* -%ifnarch s390 -%{_mandir}/man7/mlx4dv* -%{_mandir}/man7/mlx5dv* -%endif %files -n libibverbs-utils %{_bindir}/ibv_* @@ -598,7 +576,7 @@ fi %files -n librdmacm %{_libdir}/librdmacm*.so.* %dir %{_libdir}/rsocket -%{_libdir}/rsocket/librspreload.so* +%{_libdir}/rsocket/*.so* %doc %{_docdir}/%{name}/librdmacm.md %{_mandir}/man7/rsocket.* @@ -654,14 +632,17 @@ fi %endif %changelog -* Thu Dec 16 2022 Cheng Xu - 37.2.1.0.3 -- Backport ERDMA support +* Wed Feb 08 2023 Michal Schmidt - 44.0-2.1 +- Do not use unsupported Protect* options in systemd unit files. +- Resolves: rhbz#2141462 -* Wed Nov 16 2022 Liwei Ge - 37.2-1.0.2 -- Fix build error +* Wed Feb 08 2023 Michal Schmidt - 44.0-2 +- Update to upstream release v44.0 +- Resolves: rhbz#2110934, rhbz#2112931, rhbz#2142691 -* Fri Apr 22 2022 Jacob Wang - 37.2-1.0.1 -- Support loongarch64 (Hang Zhao) +* Fri Aug 05 2022 Michal Schmidt - 41.0-1 +- Update to upstream release v41.0 +- Resolves: rhbz#2049518 * Thu Jan 06 2022 Honggang Li - 37.2-1 - Update to upstream v37.2 release for fixes -- Gitee From 04d687bd2161700c5170ac21f7f1b7724f6e8d9b Mon Sep 17 00:00:00 2001 From: songmingliang Date: Fri, 22 Apr 2022 18:48:45 +0800 Subject: [PATCH 2/3] build: support loongarch64 --- rdma-core.spec | 18 ++++++++++++++---- 1 file changed, 14 insertions(+), 4 deletions(-) diff --git a/rdma-core.spec b/rdma-core.spec index 1c7fae1..fe48fe8 100644 --- a/rdma-core.spec +++ b/rdma-core.spec @@ -1,6 +1,7 @@ +%define anolis_release .0.1 Name: rdma-core Version: 44.0 -Release: 2%{?dist}.1 +Release: 2%{anolis_release}%{?dist}.1 Summary: RDMA core userspace libraries and daemons # Almost everything is licensed under the OFA dual GPLv2, 2 Clause BSD license @@ -89,7 +90,9 @@ BuildRequires: make %define cmake_install DESTDIR=%{buildroot} make install %endif +%ifnarch loongarch64 BuildRequires: pandoc +%endif %description RDMA core userspace infrastructure and documentation, including initialization @@ -407,7 +410,7 @@ fi %config(noreplace) %{_sysconfdir}/rdma/modules/rdma.conf %config(noreplace) %{_sysconfdir}/rdma/modules/roce.conf %dir %{_sysconfdir}/modprobe.d -%ifnarch s390 +%ifnarch s390 loongarch64 %config(noreplace) %{_sysconfdir}/modprobe.d/mlx4.conf %endif %{_unitdir}/rdma-hw.target @@ -445,13 +448,15 @@ fi %endif %{_libdir}/lib*.so %{_libdir}/pkgconfig/*.pc +%ifnarch loongarch64 %{_mandir}/man3/efadv* +%endif %{_mandir}/man3/ibv_* %{_mandir}/man3/rdma* %{_mandir}/man3/umad* %{_mandir}/man3/*_to_ibv_rate.* %{_mandir}/man7/rdma_cm.* -%ifnarch s390 +%ifnarch s390 loongarch64 %{_mandir}/man3/mlx5dv* %{_mandir}/man3/mlx4dv* %{_mandir}/man7/efadv* @@ -531,10 +536,12 @@ fi %files -n libibverbs %dir %{_sysconfdir}/libibverbs.d %dir %{_libdir}/libibverbs +%ifnarch loongarch64 %{_libdir}/libefa.so.* +%endif %{_libdir}/libibverbs*.so.* %{_libdir}/libibverbs/*.so -%ifnarch s390 +%ifnarch s390 loongarch64 %{_libdir}/libmlx5.so.* %{_libdir}/libmlx4.so.* %endif @@ -632,6 +639,9 @@ fi %endif %changelog +* Wed May 24 2023 Jacob Wang - 44.0-2.0.1.1 +- Support loongarch64 (Hang Zhao) + * Wed Feb 08 2023 Michal Schmidt - 44.0-2.1 - Do not use unsupported Protect* options in systemd unit files. - Resolves: rhbz#2141462 -- Gitee From fab2064292dad81d186ed5f164e0bff9be823355 Mon Sep 17 00:00:00 2001 From: Liwei Ge Date: Wed, 16 Nov 2022 18:27:26 +0800 Subject: [PATCH 3/3] spec: fix build error --- rdma-core.spec | 11 ++++------- 1 file changed, 4 insertions(+), 7 deletions(-) diff --git a/rdma-core.spec b/rdma-core.spec index fe48fe8..7966dc6 100644 --- a/rdma-core.spec +++ b/rdma-core.spec @@ -410,7 +410,7 @@ fi %config(noreplace) %{_sysconfdir}/rdma/modules/rdma.conf %config(noreplace) %{_sysconfdir}/rdma/modules/roce.conf %dir %{_sysconfdir}/modprobe.d -%ifnarch s390 loongarch64 +%ifnarch s390 %config(noreplace) %{_sysconfdir}/modprobe.d/mlx4.conf %endif %{_unitdir}/rdma-hw.target @@ -448,15 +448,13 @@ fi %endif %{_libdir}/lib*.so %{_libdir}/pkgconfig/*.pc -%ifnarch loongarch64 %{_mandir}/man3/efadv* -%endif %{_mandir}/man3/ibv_* %{_mandir}/man3/rdma* %{_mandir}/man3/umad* %{_mandir}/man3/*_to_ibv_rate.* %{_mandir}/man7/rdma_cm.* -%ifnarch s390 loongarch64 +%ifnarch s390 %{_mandir}/man3/mlx5dv* %{_mandir}/man3/mlx4dv* %{_mandir}/man7/efadv* @@ -536,12 +534,10 @@ fi %files -n libibverbs %dir %{_sysconfdir}/libibverbs.d %dir %{_libdir}/libibverbs -%ifnarch loongarch64 %{_libdir}/libefa.so.* -%endif %{_libdir}/libibverbs*.so.* %{_libdir}/libibverbs/*.so -%ifnarch s390 loongarch64 +%ifnarch s390 %{_libdir}/libmlx5.so.* %{_libdir}/libmlx4.so.* %endif @@ -641,6 +637,7 @@ fi %changelog * Wed May 24 2023 Jacob Wang - 44.0-2.0.1.1 - Support loongarch64 (Hang Zhao) +- Fix build error (geliwei@openanolis.org) * Wed Feb 08 2023 Michal Schmidt - 44.0-2.1 - Do not use unsupported Protect* options in systemd unit files. -- Gitee