diff --git a/1001-providers-erdma-Add-userspace-verbs-related-header-f.patch b/1001-providers-erdma-Add-userspace-verbs-related-header-f.patch
new file mode 100644
index 0000000000000000000000000000000000000000..aee39d0d7a2556c4eb2ffa3afe60437d827e82ad
--- /dev/null
+++ b/1001-providers-erdma-Add-userspace-verbs-related-header-f.patch
@@ -0,0 +1,430 @@
+From b8fdbf0cd29d6630ad53e78ec4724bb85f19611d Mon Sep 17 00:00:00 2001
+Message-Id: <b8fdbf0cd29d6630ad53e78ec4724bb85f19611d.1669880730.git.chengyou@linux.alibaba.com>
+From: Cheng Xu <chengyou@linux.alibaba.com>
+Date: Thu, 1 Dec 2022 15:38:34 +0800
+Subject: [PATCH 1/4] providers/erdma: Add userspace verbs related header
+ files.
+
+Add the userspace verbs implementation related header files: 'erdma_hw.h'
+for hardware interface definitions, 'erdma_verbs.h' for verbs related
+definitions and 'erdma_db.h' for doorbell records related definitions.
+
+Signed-off-by: Cheng Xu <chengyou@linux.alibaba.com>
+---
+ providers/erdma/erdma_db.h    |  17 +++
+ providers/erdma/erdma_hw.h    | 218 ++++++++++++++++++++++++++++++++++
+ providers/erdma/erdma_verbs.h | 153 ++++++++++++++++++++++++
+ 3 files changed, 388 insertions(+)
+ create mode 100644 providers/erdma/erdma_db.h
+ create mode 100644 providers/erdma/erdma_hw.h
+ create mode 100644 providers/erdma/erdma_verbs.h
+
+diff --git a/providers/erdma/erdma_db.h b/providers/erdma/erdma_db.h
+new file mode 100644
+index 000000000000..c302cb7ab154
+--- /dev/null
++++ b/providers/erdma/erdma_db.h
+@@ -0,0 +1,17 @@
++/* SPDX-License-Identifier: GPL-2.0 or OpenIB.org BSD (MIT) See COPYING file */
++/*
++ * Authors: Cheng Xu <chengyou@linux.alibaba.com>
++ * Copyright (c) 2020-2021, Alibaba Group.
++ */
++
++#ifndef __ERDMA_DB_H__
++#define __ERDMA_DB_H__
++
++#include <inttypes.h>
++
++#include "erdma.h"
++
++uint64_t *erdma_alloc_dbrecords(struct erdma_context *ctx);
++void erdma_dealloc_dbrecords(struct erdma_context *ctx, uint64_t *dbrecords);
++
++#endif
+diff --git a/providers/erdma/erdma_hw.h b/providers/erdma/erdma_hw.h
+new file mode 100644
+index 000000000000..7f567076a549
+--- /dev/null
++++ b/providers/erdma/erdma_hw.h
+@@ -0,0 +1,218 @@
++/* SPDX-License-Identifier: GPL-2.0 or OpenIB.org BSD (MIT) See COPYING file */
++/*
++ * Authors: Cheng Xu <chengyou@linux.alibaba.com>
++ * Copyright (c) 2020-2021, Alibaba Group.
++ */
++
++#ifndef __ERDMA_HW_H__
++#define __ERDMA_HW_H__
++
++#include <stdint.h>
++
++#define ERDMA_SDB_PAGE 0
++#define ERDMA_SDB_ENTRY 1
++#define ERDMA_SDB_SHARED 2
++
++#define ERDMA_NSDB_PER_ENTRY 2
++#define ERDMA_SDB_ALLOC_QPN_MASK 0x1f
++#define ERDMA_RDB_ALLOC_QPN_MASK 0x7f
++
++#define ERDMA_SQDB_SIZE 128
++#define ERDMA_CQDB_SIZE 8
++#define ERDMA_RQDB_SIZE 8
++#define ERDMA_RQDB_SPACE_SIZE 32
++
++/* WQE related. */
++#define EQE_SIZE 16
++#define EQE_SHIFT 4
++#define RQE_SIZE 32
++#define RQE_SHIFT 5
++#define CQE_SIZE 32
++#define CQE_SHIFT 5
++#define SQEBB_SIZE 32
++#define SQEBB_SHIFT 5
++#define SQEBB_MASK (~(SQEBB_SIZE - 1))
++#define SQEBB_ALIGN(size) ((size + SQEBB_SIZE - 1) & SQEBB_MASK)
++#define SQEBB_COUNT(size) (SQEBB_ALIGN(size) >> SQEBB_SHIFT)
++
++#define MAX_WQEBB_PER_SQE 4
++
++enum erdma_opcode {
++	ERDMA_OP_WRITE = 0,
++	ERDMA_OP_READ = 1,
++	ERDMA_OP_SEND = 2,
++	ERDMA_OP_SEND_WITH_IMM = 3,
++
++	ERDMA_OP_RECEIVE = 4,
++	ERDMA_OP_RECV_IMM = 5,
++	ERDMA_OP_RECV_INV = 6,
++
++	ERDMA_OP_REQ_ERR = 7,
++	ERDNA_OP_READ_RESPONSE = 8,
++	ERDMA_OP_WRITE_WITH_IMM = 9,
++
++	ERDMA_OP_RECV_ERR = 10,
++
++	ERDMA_OP_INVALIDATE = 11,
++	ERDMA_OP_RSP_SEND_IMM = 12,
++	ERDMA_OP_SEND_WITH_INV = 13,
++
++	ERDMA_OP_REG_MR = 14,
++	ERDMA_OP_LOCAL_INV = 15,
++	ERDMA_OP_READ_WITH_INV = 16,
++	ERDMA_OP_ATOMIC_CAS = 17,
++	ERDMA_OP_ATOMIC_FAD = 18,
++	ERDMA_NUM_OPCODES = 19,
++	ERDMA_OP_INVALID = ERDMA_NUM_OPCODES + 1
++};
++
++/*
++ * Inline data are kept within the work request itself occupying
++ * the space of sge[1] .. sge[n]. Therefore, inline data cannot be
++ * supported if ERDMA_MAX_SGE is below 2 elements.
++ */
++#define ERDMA_MAX_INLINE (sizeof(struct erdma_sge) * (ERDMA_MAX_SEND_SGE))
++
++enum erdma_wc_status {
++	ERDMA_WC_SUCCESS = 0,
++	ERDMA_WC_GENERAL_ERR = 1,
++	ERDMA_WC_RECV_WQE_FORMAT_ERR = 2,
++	ERDMA_WC_RECV_STAG_INVALID_ERR = 3,
++	ERDMA_WC_RECV_ADDR_VIOLATION_ERR = 4,
++	ERDMA_WC_RECV_RIGHT_VIOLATION_ERR = 5,
++	ERDMA_WC_RECV_PDID_ERR = 6,
++	ERDMA_WC_RECV_WARRPING_ERR = 7,
++	ERDMA_WC_SEND_WQE_FORMAT_ERR = 8,
++	ERDMA_WC_SEND_WQE_ORD_EXCEED = 9,
++	ERDMA_WC_SEND_STAG_INVALID_ERR = 10,
++	ERDMA_WC_SEND_ADDR_VIOLATION_ERR = 11,
++	ERDMA_WC_SEND_RIGHT_VIOLATION_ERR = 12,
++	ERDMA_WC_SEND_PDID_ERR = 13,
++	ERDMA_WC_SEND_WARRPING_ERR = 14,
++	ERDMA_WC_FLUSH_ERR = 15,
++	ERDMA_WC_RETRY_EXC_ERR = 16,
++	ERDMA_NUM_WC_STATUS
++};
++
++enum erdma_vendor_err {
++	ERDMA_WC_VENDOR_NO_ERR = 0,
++	ERDMA_WC_VENDOR_INVALID_RQE = 1,
++	ERDMA_WC_VENDOR_RQE_INVALID_STAG = 2,
++	ERDMA_WC_VENDOR_RQE_ADDR_VIOLATION = 3,
++	ERDMA_WC_VENDOR_RQE_ACCESS_RIGHT_ERR = 4,
++	ERDMA_WC_VENDOR_RQE_INVALID_PD = 5,
++	ERDMA_WC_VENDOR_RQE_WRAP_ERR = 6,
++	ERDMA_WC_VENDOR_INVALID_SQE = 0x20,
++	ERDMA_WC_VENDOR_ZERO_ORD = 0x21,
++	ERDMA_WC_VENDOR_SQE_INVALID_STAG = 0x30,
++	ERDMA_WC_VENDOR_SQE_ADDR_VIOLATION = 0x31,
++	ERDMA_WC_VENDOR_SQE_ACCESS_ERR = 0x32,
++	ERDMA_WC_VENDOR_SQE_INVALID_PD = 0x33,
++	ERDMA_WC_VENDOR_SQE_WARP_ERR = 0x34
++};
++
++/* Doorbell related. */
++#define ERDMA_CQDB_IDX_MASK GENMASK_ULL(63, 56)
++#define ERDMA_CQDB_CQN_MASK GENMASK_ULL(55, 32)
++#define ERDMA_CQDB_ARM_MASK BIT_ULL(31)
++#define ERDMA_CQDB_SOL_MASK BIT_ULL(30)
++#define ERDMA_CQDB_CMDSN_MASK GENMASK_ULL(29, 28)
++#define ERDMA_CQDB_CI_MASK GENMASK_ULL(23, 0)
++
++#define ERDMA_CQE_QTYPE_SQ 0
++#define ERDMA_CQE_QTYPE_RQ 1
++#define ERDMA_CQE_QTYPE_CMDQ 2
++
++/* CQE hdr */
++#define ERDMA_CQE_HDR_OWNER_MASK BIT(31)
++#define ERDMA_CQE_HDR_OPCODE_MASK GENMASK(23, 16)
++#define ERDMA_CQE_HDR_QTYPE_MASK GENMASK(15, 8)
++#define ERDMA_CQE_HDR_SYNDROME_MASK GENMASK(7, 0)
++
++struct erdma_cqe {
++	__be32 hdr;
++	__be32 qe_idx;
++	__be32 qpn;
++	__le32 imm_data;
++	__be32 size;
++	__be32 rsvd[3];
++};
++
++struct erdma_sge {
++	__aligned_le64 laddr;
++	__le32 length;
++	__le32 lkey;
++};
++
++/* Receive Queue Element */
++struct erdma_rqe {
++	__le16 qe_idx;
++	__le16 rsvd;
++	__le32 qpn;
++	__le32 rsvd2;
++	__le32 rsvd3;
++	__le64 to;
++	__le32 length;
++	__le32 stag;
++};
++
++/* SQE */
++#define ERDMA_SQE_HDR_SGL_LEN_MASK GENMASK_ULL(63, 56)
++#define ERDMA_SQE_HDR_WQEBB_CNT_MASK GENMASK_ULL(54, 52)
++#define ERDMA_SQE_HDR_QPN_MASK GENMASK_ULL(51, 32)
++#define ERDMA_SQE_HDR_OPCODE_MASK GENMASK_ULL(31, 27)
++#define ERDMA_SQE_HDR_DWQE_MASK BIT_ULL(26)
++#define ERDMA_SQE_HDR_INLINE_MASK BIT_ULL(25)
++#define ERDMA_SQE_HDR_FENCE_MASK BIT_ULL(24)
++#define ERDMA_SQE_HDR_SE_MASK BIT_ULL(23)
++#define ERDMA_SQE_HDR_CE_MASK BIT_ULL(22)
++#define ERDMA_SQE_HDR_WQEBB_INDEX_MASK GENMASK_ULL(15, 0)
++
++struct erdma_write_sqe {
++	__le64 hdr;
++	__be32 imm_data;
++	__le32 length;
++
++	__le32 sink_stag;
++	/* avoid sink_to not 8-byte aligned. */
++	__le32 sink_to_low;
++	__le32 sink_to_high;
++
++	__le32 rsvd;
++
++	struct erdma_sge sgl[0];
++};
++
++struct erdma_send_sqe {
++	__le64 hdr;
++	union {
++		__be32 imm_data;
++		__le32 invalid_stag;
++	};
++	__le32 length;
++	struct erdma_sge sgl[0];
++};
++
++struct erdma_readreq_sqe {
++	__le64 hdr;
++	__le32 invalid_stag;
++	__le32 length;
++	__le32 sink_stag;
++	/* avoid sink_to not 8-byte aligned. */
++	__le32 sink_to_low;
++	__le32 sink_to_high;
++	__le32 rsvd0;
++	struct erdma_sge sgl;
++};
++
++struct erdma_atomic_sqe {
++	__le64 hdr;
++	__le64 rsvd;
++	__le64 fetchadd_swap_data;
++	__le64 cmp_data;
++
++	struct erdma_sge remote;
++	struct erdma_sge sgl;
++};
++
++#endif
+diff --git a/providers/erdma/erdma_verbs.h b/providers/erdma/erdma_verbs.h
+new file mode 100644
+index 000000000000..d9820c5a20d5
+--- /dev/null
++++ b/providers/erdma/erdma_verbs.h
+@@ -0,0 +1,153 @@
++/* SPDX-License-Identifier: GPL-2.0 or OpenIB.org BSD (MIT) See COPYING file */
++/*
++ * Authors: Cheng Xu <chengyou@linux.alibaba.com>
++ * Copyright (c) 2020-2021, Alibaba Group.
++ */
++
++#ifndef __ERDMA_VERBS_H__
++#define __ERDMA_VERBS_H__
++
++#include <pthread.h>
++#include <inttypes.h>
++#include <stddef.h>
++
++#include "erdma.h"
++#include "erdma_hw.h"
++
++#define ERDMA_MAX_SEND_SGE 6
++#define ERDMA_MAX_RECV_SGE 1
++
++struct erdma_queue {
++	void *qbuf;
++	void *db;
++
++	uint16_t rsvd0;
++	uint16_t depth;
++	uint32_t size;
++
++	uint16_t pi;
++	uint16_t ci;
++
++	uint32_t rsvd1;
++	uint64_t *wr_tbl;
++
++	void *db_record;
++};
++
++struct erdma_qp {
++	struct verbs_qp verbs_qp;
++	struct erdma_device *erdma_dev;
++
++	uint32_t id; /* qpn */
++
++	pthread_spinlock_t sq_lock;
++	pthread_spinlock_t rq_lock;
++
++	int sq_sig_all;
++	int disable_dwqe;
++	pthread_spinlock_t *sdb_lock;
++
++	struct erdma_queue sq;
++	struct erdma_queue rq;
++
++	void *qbuf;
++	size_t qbuf_size;
++	uint64_t *db_records;
++
++	void *cur_sqe;
++	uint16_t cur_pi;
++	uint16_t sq_pi_rb;
++	uint32_t sgl_off; /* SGL offset in wqebb. */
++	__le32 *length_field; /* The total length field in SQE header */
++	int err;
++};
++
++struct erdma_cq {
++	struct ibv_cq base_cq;
++	struct erdma_device *erdma_dev;
++	uint32_t id;
++
++	uint32_t event_stats;
++
++	uint32_t depth;
++	uint32_t ci;
++	struct erdma_cqe *queue;
++
++	void *db;
++	uint16_t db_offset;
++
++	void *db_record;
++	uint32_t cmdsn;
++	uint32_t comp_vector;
++	uint32_t db_index;
++
++	pthread_spinlock_t lock;
++};
++
++static inline struct erdma_cq *to_ecq(struct ibv_cq *base)
++{
++	return container_of(base, struct erdma_cq, base_cq);
++}
++
++static inline struct erdma_qp *to_eqp(struct ibv_qp *ibqp)
++{
++	return container_of(ibqp, struct erdma_qp, verbs_qp.qp);
++}
++
++static inline void *get_sq_wqebb(struct erdma_qp *qp, uint16_t idx)
++{
++	idx &= (qp->sq.depth - 1);
++	return qp->sq.qbuf + (idx << SQEBB_SHIFT);
++}
++
++static inline void __kick_sq_db(struct erdma_qp *qp, uint16_t pi)
++{
++	uint64_t db_data;
++
++	db_data = FIELD_PREP(ERDMA_SQE_HDR_QPN_MASK, qp->id) |
++		  FIELD_PREP(ERDMA_SQE_HDR_WQEBB_INDEX_MASK, pi);
++
++	*(__le64 *)qp->sq.db_record = htole64(db_data);
++	udma_to_device_barrier();
++	mmio_write64_le(qp->sq.db, htole64(db_data));
++}
++
++struct ibv_pd *erdma_alloc_pd(struct ibv_context *ctx);
++int erdma_free_pd(struct ibv_pd *pd);
++
++int erdma_query_device(struct ibv_context *ctx,
++		       const struct ibv_query_device_ex_input *input,
++		       struct ibv_device_attr_ex *attr, size_t attr_size);
++int erdma_query_port(struct ibv_context *ctx, uint8_t port,
++		     struct ibv_port_attr *attr);
++
++struct ibv_mr *erdma_reg_mr(struct ibv_pd *pd, void *addr, size_t len,
++			    uint64_t hca_va, int access);
++int erdma_dereg_mr(struct verbs_mr *vmr);
++
++struct ibv_qp *erdma_create_qp(struct ibv_pd *pd,
++			       struct ibv_qp_init_attr *attr);
++struct ibv_qp *erdma_create_qp_ex(struct ibv_context *context,
++				  struct ibv_qp_init_attr_ex *attr);
++int erdma_modify_qp(struct ibv_qp *base_qp, struct ibv_qp_attr *attr,
++		    int attr_mask);
++int erdma_query_qp(struct ibv_qp *qp, struct ibv_qp_attr *attr, int attr_mask,
++		   struct ibv_qp_init_attr *init_attr);
++int erdma_post_send(struct ibv_qp *base_qp, struct ibv_send_wr *wr,
++		    struct ibv_send_wr **bad_wr);
++int erdma_post_recv(struct ibv_qp *base_qp, struct ibv_recv_wr *wr,
++		    struct ibv_recv_wr **bad_wr);
++int erdma_destroy_qp(struct ibv_qp *base_qp);
++
++void erdma_free_context(struct ibv_context *ibv_ctx);
++
++struct ibv_cq *erdma_create_cq(struct ibv_context *ctx, int num_cqe,
++			       struct ibv_comp_channel *channel,
++			       int comp_vector);
++int erdma_destroy_cq(struct ibv_cq *base_cq);
++int erdma_notify_cq(struct ibv_cq *ibcq, int solicited);
++void erdma_cq_event(struct ibv_cq *ibcq);
++int erdma_poll_cq(struct ibv_cq *ibcq, int num_entries, struct ibv_wc *wc);
++
++
++#endif
+-- 
+2.37.0
+
diff --git a/1002-providers-erdma-Add-userspace-verbs-implementation.patch b/1002-providers-erdma-Add-userspace-verbs-implementation.patch
new file mode 100644
index 0000000000000000000000000000000000000000..527852507ad335fc378fbfc165fedd76d15b4aff
--- /dev/null
+++ b/1002-providers-erdma-Add-userspace-verbs-implementation.patch
@@ -0,0 +1,1656 @@
+From 08f8e55dde433e81ce241003835cdb29dd2708cf Mon Sep 17 00:00:00 2001
+Message-Id: <08f8e55dde433e81ce241003835cdb29dd2708cf.1669880730.git.chengyou@linux.alibaba.com>
+In-Reply-To: <b8fdbf0cd29d6630ad53e78ec4724bb85f19611d.1669880730.git.chengyou@linux.alibaba.com>
+References: <b8fdbf0cd29d6630ad53e78ec4724bb85f19611d.1669880730.git.chengyou@linux.alibaba.com>
+From: Cheng Xu <chengyou@linux.alibaba.com>
+Date: Thu, 1 Dec 2022 15:39:02 +0800
+Subject: [PATCH 2/4] providers/erdma: Add userspace verbs implementation
+
+Implementation of the erdma's 'struct verbs_context_ops' interface.
+Due to doorbells may be drop by hardware in some situations, such as
+hardware hot-upgrade, driver will keep the latest doorbell value of each
+QP and CQ. So we introduce the doorbell records to store the latest
+doorbell values also.
+
+Signed-off-by: Cheng Xu <chengyou@linux.alibaba.com>
+---
+ providers/erdma/erdma_db.c    |  115 +++
+ providers/erdma/erdma_verbs.c | 1504 +++++++++++++++++++++++++++++++++
+ 2 files changed, 1619 insertions(+)
+ create mode 100644 providers/erdma/erdma_db.c
+ create mode 100644 providers/erdma/erdma_verbs.c
+
+diff --git a/providers/erdma/erdma_db.c b/providers/erdma/erdma_db.c
+new file mode 100644
+index 000000000000..382262da5f48
+--- /dev/null
++++ b/providers/erdma/erdma_db.c
+@@ -0,0 +1,115 @@
++// SPDX-License-Identifier: GPL-2.0 or OpenIB.org BSD (MIT) See COPYING file
++
++// Authors: Cheng Xu <chengyou@linux.alibaba.com>
++// Copyright (c) 2020-2021, Alibaba Group.
++
++// Copyright (c) 2012 Mellanox Technologies, Inc.  All rights reserved.
++
++#define _GNU_SOURCE
++#include <inttypes.h>
++#include <stdlib.h>
++#include <string.h>
++#include <util/util.h>
++
++#include "erdma.h"
++#include "erdma_db.h"
++
++#define ERDMA_DBRECORDS_SIZE 16
++
++struct erdma_dbrecord_page {
++	struct erdma_dbrecord_page *prev, *next;
++	void *page_buf;
++	int cnt;
++	int used;
++	unsigned long free[0];
++};
++
++uint64_t *erdma_alloc_dbrecords(struct erdma_context *ctx)
++{
++	int bits_perlong = (8 * sizeof(unsigned long));
++	struct erdma_dbrecord_page *page = NULL;
++	int dbrecords_per_page, nlongs = 0;
++	uint64_t *db_records = NULL;
++	int i, j, rv;
++
++	pthread_mutex_lock(&ctx->dbrecord_pages_mutex);
++
++	for (page = ctx->dbrecord_pages; page; page = page->next)
++		if (page->used < page->cnt)
++			goto found;
++
++	dbrecords_per_page = ctx->page_size / ERDMA_DBRECORDS_SIZE;
++	nlongs = align(dbrecords_per_page, bits_perlong) / bits_perlong;
++	page = malloc(sizeof(*page) + nlongs * sizeof(unsigned long));
++	if (!page)
++		goto out;
++
++	rv = posix_memalign(&page->page_buf, ctx->page_size, ctx->page_size);
++	if (rv) {
++		free(page);
++		goto out;
++	}
++
++	page->cnt = dbrecords_per_page;
++	page->used = 0;
++	for (i = 0; i < nlongs; i++)
++		page->free[i] = ~0UL;
++
++	page->prev = NULL;
++	page->next = ctx->dbrecord_pages;
++	ctx->dbrecord_pages = page;
++	if (page->next)
++		page->next->prev = page;
++
++found:
++	++page->used;
++
++	for (i = 0; !page->free[i]; ++i)
++		; /* nothing */
++
++	j = ffsl(page->free[i]) - 1;
++	page->free[i] &= ~(1UL << j);
++
++	db_records =
++		page->page_buf + (i * bits_perlong + j) * ERDMA_DBRECORDS_SIZE;
++
++out:
++	pthread_mutex_unlock(&ctx->dbrecord_pages_mutex);
++
++	return db_records;
++}
++
++void erdma_dealloc_dbrecords(struct erdma_context *ctx, uint64_t *dbrecords)
++{
++	struct erdma_dbrecord_page *page;
++	int page_mask = ~(ctx->page_size - 1);
++	int idx;
++
++	pthread_mutex_lock(&ctx->dbrecord_pages_mutex);
++	for (page = ctx->dbrecord_pages; page; page = page->next)
++		if (((uintptr_t)dbrecords & page_mask) ==
++		    (uintptr_t)page->page_buf)
++			break;
++
++	if (!page)
++		goto out;
++
++	idx = ((void *)dbrecords - page->page_buf) / ERDMA_DBRECORDS_SIZE;
++	page->free[idx / (8 * sizeof(unsigned long))] |=
++		1UL << (idx % (8 * sizeof(unsigned long)));
++
++	if (!--page->used) {
++		if (page->prev)
++			page->prev->next = page->next;
++		else
++			ctx->dbrecord_pages = page->next;
++		if (page->next)
++			page->next->prev = page->prev;
++
++		free(page->page_buf);
++		free(page);
++	}
++
++out:
++	pthread_mutex_unlock(&ctx->dbrecord_pages_mutex);
++}
+diff --git a/providers/erdma/erdma_verbs.c b/providers/erdma/erdma_verbs.c
+new file mode 100644
+index 000000000000..d665e0401b60
+--- /dev/null
++++ b/providers/erdma/erdma_verbs.c
+@@ -0,0 +1,1504 @@
++// SPDX-License-Identifier: GPL-2.0 or BSD-3-Clause
++
++// Authors: Cheng Xu <chengyou@linux.alibaba.com>
++// Copyright (c) 2020-2021, Alibaba Group.
++// Authors: Bernard Metzler <bmt@zurich.ibm.com>
++// Copyright (c) 2008-2019, IBM Corporation
++
++#include <ccan/minmax.h>
++#include <endian.h>
++#ifdef HAVE_AVX_SUPPORT
++#include <immintrin.h>
++#else
++#define _mm256_store_si256(a, b) fprintf(stderr, "not supported")
++#endif
++#include <stdio.h>
++#include <stdlib.h>
++#include <sys/mman.h>
++#include <sys/types.h>
++#include <unistd.h>
++#include <util/mmio.h>
++#include <util/udma_barrier.h>
++#include <util/util.h>
++
++#include "erdma.h"
++#include "erdma_abi.h"
++#include "erdma_db.h"
++#include "erdma_hw.h"
++#include "erdma_verbs.h"
++
++int erdma_query_device(struct ibv_context *ctx,
++		       const struct ibv_query_device_ex_input *input,
++		       struct ibv_device_attr_ex *attr, size_t attr_size)
++{
++	struct ib_uverbs_ex_query_device_resp resp;
++	unsigned int major, minor, sub_minor;
++	size_t resp_size = sizeof(resp);
++	uint64_t raw_fw_ver;
++	int rv;
++
++	rv = ibv_cmd_query_device_any(ctx, input, attr, attr_size, &resp,
++				      &resp_size);
++	if (rv)
++		return rv;
++
++	raw_fw_ver = resp.base.fw_ver;
++	major = (raw_fw_ver >> 32) & 0xffff;
++	minor = (raw_fw_ver >> 16) & 0xffff;
++	sub_minor = raw_fw_ver & 0xffff;
++
++	snprintf(attr->orig_attr.fw_ver, sizeof(attr->orig_attr.fw_ver),
++		 "%d.%d.%d", major, minor, sub_minor);
++
++	return 0;
++}
++
++int erdma_query_port(struct ibv_context *ctx, uint8_t port,
++		     struct ibv_port_attr *attr)
++{
++	struct ibv_query_port cmd = {};
++
++	return ibv_cmd_query_port(ctx, port, attr, &cmd, sizeof(cmd));
++}
++
++int erdma_query_qp(struct ibv_qp *qp, struct ibv_qp_attr *attr, int attr_mask,
++		   struct ibv_qp_init_attr *init_attr)
++{
++	struct ibv_query_qp cmd = {};
++
++	return ibv_cmd_query_qp(qp, attr, attr_mask, init_attr, &cmd,
++				sizeof(cmd));
++}
++
++struct ibv_pd *erdma_alloc_pd(struct ibv_context *ctx)
++{
++	struct ib_uverbs_alloc_pd_resp resp;
++	struct ibv_alloc_pd cmd = {};
++	struct ibv_pd *pd;
++
++	pd = calloc(1, sizeof(*pd));
++	if (!pd)
++		return NULL;
++
++	if (ibv_cmd_alloc_pd(ctx, pd, &cmd, sizeof(cmd), &resp, sizeof(resp))) {
++		free(pd);
++		return NULL;
++	}
++
++	return pd;
++}
++
++int erdma_free_pd(struct ibv_pd *pd)
++{
++	int rv;
++
++	rv = ibv_cmd_dealloc_pd(pd);
++	if (rv)
++		return rv;
++
++	free(pd);
++	return 0;
++}
++
++struct ibv_mr *erdma_reg_mr(struct ibv_pd *pd, void *addr, size_t len,
++			    uint64_t hca_va, int access)
++{
++	struct ib_uverbs_reg_mr_resp resp;
++	struct ibv_reg_mr cmd;
++	struct verbs_mr *vmr;
++	int ret;
++
++	vmr = calloc(1, sizeof(*vmr));
++	if (!vmr)
++		return NULL;
++
++	ret = ibv_cmd_reg_mr(pd, addr, len, hca_va, access, vmr, &cmd,
++			     sizeof(cmd), &resp, sizeof(resp));
++	if (ret) {
++		free(vmr);
++		return NULL;
++	}
++
++	return &vmr->ibv_mr;
++}
++
++int erdma_dereg_mr(struct verbs_mr *vmr)
++{
++	int ret;
++
++	ret = ibv_cmd_dereg_mr(vmr);
++	if (ret)
++		return ret;
++
++	free(vmr);
++	return 0;
++}
++
++int erdma_notify_cq(struct ibv_cq *ibcq, int solicited)
++{
++	struct erdma_cq *cq = to_ecq(ibcq);
++	uint64_t db_data;
++	int ret;
++
++	ret = pthread_spin_lock(&cq->lock);
++	if (ret)
++		return ret;
++
++	db_data = FIELD_PREP(ERDMA_CQDB_IDX_MASK, cq->db_index) |
++		  FIELD_PREP(ERDMA_CQDB_CQN_MASK, cq->id) |
++		  FIELD_PREP(ERDMA_CQDB_ARM_MASK, 1) |
++		  FIELD_PREP(ERDMA_CQDB_SOL_MASK, solicited) |
++		  FIELD_PREP(ERDMA_CQDB_CMDSN_MASK, cq->cmdsn) |
++		  FIELD_PREP(ERDMA_CQDB_CI_MASK, cq->ci);
++
++	*(__le64 *)cq->db_record = htole64(db_data);
++	cq->db_index++;
++	udma_to_device_barrier();
++	mmio_write64_le(cq->db, htole64(db_data));
++
++	pthread_spin_unlock(&cq->lock);
++
++	return ret;
++}
++
++struct ibv_cq *erdma_create_cq(struct ibv_context *ctx, int num_cqe,
++			       struct ibv_comp_channel *channel,
++			       int comp_vector)
++{
++	struct erdma_context *ectx = to_ectx(ctx);
++	struct erdma_cmd_create_cq_resp resp = {};
++	struct erdma_cmd_create_cq cmd = {};
++	uint64_t *db_records = NULL;
++	struct erdma_cq *cq;
++	size_t cq_size;
++	int rv;
++
++	cq = calloc(1, sizeof(*cq));
++	if (!cq)
++		return NULL;
++
++	if (num_cqe < 64)
++		num_cqe = 64;
++
++	num_cqe = roundup_pow_of_two(num_cqe);
++	cq_size = align(num_cqe * sizeof(struct erdma_cqe), ERDMA_PAGE_SIZE);
++
++	rv = posix_memalign((void **)&cq->queue, ERDMA_PAGE_SIZE, cq_size);
++	if (rv) {
++		errno = rv;
++		free(cq);
++		return NULL;
++	}
++
++	rv = ibv_dontfork_range(cq->queue, cq_size);
++	if (rv) {
++		errno = rv;
++		free(cq->queue);
++		cq->queue = NULL;
++		goto error_alloc;
++	}
++
++	memset(cq->queue, 0, cq_size);
++
++	db_records = erdma_alloc_dbrecords(ectx);
++	if (!db_records) {
++		errno = ENOMEM;
++		goto error_alloc;
++	}
++
++	cmd.db_record_va = (uintptr_t)db_records;
++	cmd.qbuf_va = (uintptr_t)cq->queue;
++	cmd.qbuf_len = cq_size;
++
++	rv = ibv_cmd_create_cq(ctx, num_cqe, channel, comp_vector, &cq->base_cq,
++			       &cmd.ibv_cmd, sizeof(cmd), &resp.ibv_resp,
++			       sizeof(resp));
++	if (rv) {
++		errno = EIO;
++		goto error_alloc;
++	}
++
++	pthread_spin_init(&cq->lock, PTHREAD_PROCESS_PRIVATE);
++
++	*db_records = 0;
++	cq->db_record = db_records;
++
++	cq->id = resp.cq_id;
++	cq->depth = resp.num_cqe;
++
++	cq->db = ectx->cdb;
++	cq->db_offset = (cq->id & (ERDMA_PAGE_SIZE / ERDMA_CQDB_SIZE - 1)) *
++			ERDMA_CQDB_SIZE;
++	cq->db += cq->db_offset;
++
++	cq->comp_vector = comp_vector;
++
++	return &cq->base_cq;
++
++error_alloc:
++	if (db_records)
++		erdma_dealloc_dbrecords(ectx, db_records);
++
++	if (cq->queue) {
++		ibv_dofork_range(cq->queue, cq_size);
++		free(cq->queue);
++	}
++
++	free(cq);
++
++	return NULL;
++}
++
++int erdma_destroy_cq(struct ibv_cq *base_cq)
++{
++	struct erdma_context *ctx = to_ectx(base_cq->context);
++	struct erdma_cq *cq = to_ecq(base_cq);
++	int rv;
++
++	pthread_spin_lock(&cq->lock);
++	rv = ibv_cmd_destroy_cq(base_cq);
++	if (rv) {
++		pthread_spin_unlock(&cq->lock);
++		errno = EIO;
++		return rv;
++	}
++	pthread_spin_destroy(&cq->lock);
++
++	if (cq->db_record)
++		erdma_dealloc_dbrecords(ctx, cq->db_record);
++
++	if (cq->queue) {
++		ibv_dofork_range(cq->queue, cq->depth << CQE_SHIFT);
++		free(cq->queue);
++	}
++
++	free(cq);
++
++	return 0;
++}
++
++static inline void kick_hw_sqe(struct erdma_qp *qp, uint16_t pi,
++			       uint32_t wqebb_cnt)
++{
++	uint16_t idx = pi & (qp->sq.depth - 1);
++	void *sqe = get_sq_wqebb(qp, idx);
++	uint32_t i;
++
++	pthread_spin_lock(qp->sdb_lock);
++
++	*(__le64 *)qp->sq.db_record = htole64(*(uint64_t *)sqe);
++
++	udma_to_device_barrier();
++
++	for (i = 0; i < wqebb_cnt; i++) {
++		_mm256_store_si256(qp->sq.db + (i << 5),
++				   _mm256_load_si256(sqe));
++		sqe = get_sq_wqebb(qp, idx + i + 1);
++	}
++
++	mmio_flush_writes();
++
++	pthread_spin_unlock(qp->sdb_lock);
++}
++
++enum {
++	ERDMA_SUPPORTED_SEND_OPS_FLAGS_RC =
++		IBV_QP_EX_WITH_SEND | IBV_QP_EX_WITH_SEND_WITH_INV |
++		IBV_QP_EX_WITH_SEND_WITH_IMM | IBV_QP_EX_WITH_RDMA_WRITE |
++		IBV_QP_EX_WITH_RDMA_WRITE_WITH_IMM | IBV_QP_EX_WITH_RDMA_READ |
++		IBV_QP_EX_WITH_ATOMIC_CMP_AND_SWP |
++		IBV_QP_EX_WITH_ATOMIC_FETCH_AND_ADD,
++};
++
++static void erdma_wr_start(struct ibv_qp_ex *ibqp)
++{
++	struct erdma_qp *qp = to_eqp(&ibqp->qp_base);
++
++	if (ibqp->qp_base.state == IBV_QPS_ERR) {
++		qp->err = -EIO;
++		return;
++	}
++
++	pthread_spin_lock(&qp->sq_lock);
++
++	qp->err = 0;
++	qp->sq_pi_rb = qp->sq.pi;
++}
++
++static void erdma_wr_abort(struct ibv_qp_ex *ibqp)
++{
++	struct erdma_qp *qp = to_eqp(&ibqp->qp_base);
++
++	/* Rolling back */
++	qp->sq.pi = qp->sq_pi_rb;
++
++	pthread_spin_unlock(&qp->sq_lock);
++}
++
++static int erdma_wr_complete(struct ibv_qp_ex *ibqp)
++{
++	struct erdma_qp *qp = to_eqp(&ibqp->qp_base);
++	int err = qp->err;
++
++	if (unlikely(err)) {
++		qp->sq.pi = qp->sq_pi_rb;
++		goto out;
++	}
++
++	__kick_sq_db(qp, qp->sq.pi); /* normal doorbell. */
++
++out:
++	pthread_spin_unlock(&qp->sq_lock);
++
++	return err;
++}
++
++static void *erdma_init_sqe(struct erdma_qp *qp, uint64_t wr_id,
++			    unsigned int wr_flags, enum erdma_opcode opcode)
++{
++	uint16_t sq_pi = qp->sq.pi;
++	uint64_t sqe_hdr;
++	void *sqe;
++
++	if ((uint16_t)(sq_pi - qp->sq.ci) >= qp->sq.depth) {
++		qp->err = ENOMEM;
++		qp->cur_sqe = NULL;
++		return NULL;
++	}
++
++	sqe = get_sq_wqebb(qp, sq_pi);
++
++	/* Clear the first 8Byte of the wqe hdr. */
++	*(uint64_t *)sqe = 0;
++	qp->sq.wr_tbl[sq_pi & (qp->sq.depth - 1)] = wr_id;
++
++	sqe_hdr = FIELD_PREP(ERDMA_SQE_HDR_QPN_MASK, qp->id);
++	sqe_hdr |= FIELD_PREP(ERDMA_SQE_HDR_CE_MASK,
++			      wr_flags & IBV_SEND_SIGNALED ? 1 : 0);
++	sqe_hdr |= FIELD_PREP(ERDMA_SQE_HDR_CE_MASK, qp->sq_sig_all);
++	sqe_hdr |= FIELD_PREP(ERDMA_SQE_HDR_SE_MASK,
++			      wr_flags & IBV_SEND_SOLICITED ? 1 : 0);
++	sqe_hdr |= FIELD_PREP(ERDMA_SQE_HDR_FENCE_MASK,
++			      wr_flags & IBV_SEND_FENCE ? 1 : 0);
++	sqe_hdr |= FIELD_PREP(ERDMA_SQE_HDR_INLINE_MASK,
++			      wr_flags & IBV_SEND_INLINE ? 1 : 0);
++	sqe_hdr |= FIELD_PREP(ERDMA_SQE_HDR_OPCODE_MASK, opcode);
++
++	*(__le64 *)sqe = htole64(sqe_hdr);
++	qp->cur_sqe = sqe;
++
++	return sqe;
++}
++
++static void erdma_wr_send(struct ibv_qp_ex *ibqp)
++{
++	struct erdma_qp *qp = to_eqp(&ibqp->qp_base);
++
++	if (!erdma_init_sqe(qp, ibqp->wr_id, ibqp->wr_flags, ERDMA_OP_SEND))
++		return;
++
++	qp->cur_pi = qp->sq.pi;
++	qp->sgl_off = 16;
++}
++
++static void erdma_wr_rdma_write(struct ibv_qp_ex *ibqp, uint32_t rkey,
++				uint64_t raddr)
++{
++	struct erdma_qp *qp = to_eqp(&ibqp->qp_base);
++	struct erdma_write_sqe *sqe;
++
++	sqe = erdma_init_sqe(qp, ibqp->wr_id, ibqp->wr_flags, ERDMA_OP_WRITE);
++	if (!sqe)
++		return;
++
++	qp->cur_pi = qp->sq.pi + 1;
++	qp->sgl_off = 0;
++	sqe->sink_to_low = htole32(raddr & 0xffffffff);
++	sqe->sink_to_high = htole32((raddr >> 32) & 0xffffffff);
++	sqe->sink_stag = htole32(rkey);
++}
++
++static void erdma_wr_set_sge(struct ibv_qp_ex *ibqp, uint32_t lkey,
++			     uint64_t addr, uint32_t length)
++{
++	struct erdma_qp *qp = to_eqp(&ibqp->qp_base);
++	uint64_t *sqe_hdr = qp->cur_sqe;
++	struct erdma_sge *sge;
++	uint32_t wqebb_cnt;
++
++	sge = get_sq_wqebb(qp, qp->cur_pi) + qp->sgl_off;
++	sge->laddr = addr;
++	sge->lkey = lkey;
++	sge->length = length;
++
++	if (qp->length_field)
++		*qp->length_field = htole32(length);
++
++	*sqe_hdr |= FIELD_PREP(ERDMA_SQE_HDR_SGL_LEN_MASK, 1);
++
++	/*
++	 * This is the last SQE in SQE,
++	 * so anyhow we need to move the cursor to next sqebb.
++	 */
++	qp->cur_pi++;
++	wqebb_cnt = qp->cur_pi - qp->sq.pi;
++
++	assert(wqebb_cnt <= MAX_WQEBB_PER_SQE);
++	*sqe_hdr |= FIELD_PREP(ERDMA_SQE_HDR_WQEBB_CNT_MASK, wqebb_cnt - 1);
++	*sqe_hdr |= FIELD_PREP(ERDMA_SQE_HDR_WQEBB_INDEX_MASK, qp->cur_pi);
++
++	qp->sq.pi = qp->cur_pi;
++}
++
++static void erdma_wr_set_sge_list(struct ibv_qp_ex *ibqp, size_t num_sge,
++				  const struct ibv_sge *sg_list)
++{
++	struct erdma_qp *qp = to_eqp(&ibqp->qp_base);
++	uint64_t *sqe_hdr = qp->cur_sqe;
++	uint32_t wqebb_cnt, sqebb_half;
++	struct erdma_sge *sge;
++	uint32_t i, bytes = 0;
++
++	sge = get_sq_wqebb(qp, qp->cur_pi) + qp->sgl_off;
++
++	if (num_sge > ERDMA_MAX_SEND_SGE) {
++		qp->err = EINVAL;
++		return;
++	}
++
++	sqebb_half = !!qp->sgl_off;
++
++	for (i = 0; i < num_sge; i++) {
++		sge->laddr = sg_list[i].addr;
++		sge->lkey = sg_list[i].lkey;
++		sge->length = sg_list[i].length;
++		bytes += sge->length;
++
++		if (sqebb_half) {
++			qp->cur_pi++;
++			sge = get_sq_wqebb(qp, qp->cur_pi);
++		} else {
++			*((uint32_t *)sge + 7) = qp->id;
++			sge++;
++		}
++
++		sqebb_half = !sqebb_half;
++	}
++
++	qp->cur_pi += sqebb_half;
++
++	if (qp->length_field)
++		*qp->length_field = htole32(bytes);
++
++	*sqe_hdr |= FIELD_PREP(ERDMA_SQE_HDR_SGL_LEN_MASK, num_sge);
++
++	wqebb_cnt = qp->cur_pi - qp->sq.pi;
++	assert(wqebb_cnt <= MAX_WQEBB_PER_SQE);
++	*sqe_hdr |= FIELD_PREP(ERDMA_SQE_HDR_WQEBB_CNT_MASK, wqebb_cnt - 1);
++	*sqe_hdr |= FIELD_PREP(ERDMA_SQE_HDR_WQEBB_INDEX_MASK, qp->cur_pi);
++
++	qp->sq.pi = qp->cur_pi;
++}
++
++static size_t free_space_to_tail(struct erdma_qp *qp, uint16_t cur_idx)
++{
++	cur_idx &= qp->sq.depth - 1;
++
++	return (qp->sq.depth - cur_idx) << SQEBB_SHIFT;
++}
++
++static void erdma_wr_set_inline_data(struct ibv_qp_ex *ibqp, void *addr,
++				     size_t length)
++{
++	struct erdma_qp *qp = to_eqp(&ibqp->qp_base);
++	uint64_t *sqe_hdr = qp->cur_sqe;
++	uint32_t wqebb_cnt;
++	size_t space;
++	void *data;
++
++	if (length > ERDMA_MAX_INLINE) {
++		qp->err = EINVAL;
++		return;
++	}
++
++	data = get_sq_wqebb(qp, qp->cur_pi) + qp->sgl_off;
++
++	if (qp->length_field)
++		*qp->length_field = htole32(length);
++
++	space = free_space_to_tail(qp, qp->cur_pi) - qp->sgl_off;
++
++	if (space >= length) {
++		memcpy(data, addr, length);
++	} else {
++		memcpy(data, addr, length - space);
++		memcpy(qp->sq.qbuf, addr + length - space, space);
++	}
++
++	*sqe_hdr |= FIELD_PREP(ERDMA_SQE_HDR_SGL_LEN_MASK, length);
++
++	/*
++	 * This is the last SQE in SQE,
++	 * so anyhow we need to move the cursor to next free sqebb.
++	 */
++	qp->cur_pi += SQEBB_COUNT(qp->sgl_off + length);
++	wqebb_cnt = qp->cur_pi - qp->sq.pi;
++
++	assert(wqebb_cnt <= MAX_WQEBB_PER_SQE);
++	*sqe_hdr |= FIELD_PREP(ERDMA_SQE_HDR_WQEBB_CNT_MASK, wqebb_cnt - 1);
++	*sqe_hdr |= FIELD_PREP(ERDMA_SQE_HDR_WQEBB_INDEX_MASK, qp->cur_pi);
++
++	qp->sq.pi = qp->cur_pi;
++}
++
++static void erdma_wr_set_inline_data_list(struct ibv_qp_ex *ibqp,
++					  size_t num_buf,
++					  const struct ibv_data_buf *buf_list)
++{
++	struct erdma_qp *qp = to_eqp(&ibqp->qp_base);
++	size_t space, total_size = 0, i;
++	uint64_t *sqe_hdr = qp->cur_sqe;
++	uint32_t wqebb_cnt;
++	void *data;
++
++	data = get_sq_wqebb(qp, qp->cur_pi) + qp->sgl_off;
++
++	space = free_space_to_tail(qp, qp->cur_pi) - qp->sgl_off;
++	for (i = 0; i < num_buf; i++)
++		total_size += buf_list[i].length;
++
++	if (total_size > ERDMA_MAX_INLINE) {
++		qp->err = EINVAL;
++		return;
++	}
++
++	if (qp->length_field)
++		*qp->length_field = htole32(total_size);
++
++	if (space >= total_size) {
++		for (i = 0; i < num_buf; i++) {
++			memcpy(data, buf_list[i].addr, buf_list[i].length);
++			data += buf_list[i].length;
++		}
++	} else {
++		for (i = 0; i < num_buf; i++) {
++			if (space > buf_list[i].length) {
++				memcpy(data, buf_list[i].addr,
++				       buf_list[i].length);
++				data += buf_list[i].length;
++				space -= buf_list[i].length;
++			} else {
++				if (space != 0)
++					memcpy(data, buf_list[i].addr, space);
++
++				data = get_sq_wqebb(qp, 0);
++				memcpy(data, buf_list[i].addr + space,
++				       buf_list[i].length - space);
++				data += buf_list[i].length - space;
++				space = free_space_to_tail(qp, 0) -
++					buf_list[i].length - space;
++			}
++		}
++	}
++
++	*sqe_hdr |= FIELD_PREP(ERDMA_SQE_HDR_SGL_LEN_MASK, total_size);
++
++	/*
++	 * This is the last SQE in SQE,
++	 * so anyhow we need to move the cursor to next free sqebb.
++	 */
++	qp->cur_pi += SQEBB_COUNT(qp->sgl_off + total_size);
++	wqebb_cnt = qp->cur_pi - qp->sq.pi;
++
++	assert(wqebb_cnt <= MAX_WQEBB_PER_SQE);
++	*sqe_hdr |= FIELD_PREP(ERDMA_SQE_HDR_WQEBB_CNT_MASK, wqebb_cnt - 1);
++	*sqe_hdr |= FIELD_PREP(ERDMA_SQE_HDR_WQEBB_INDEX_MASK, qp->cur_pi);
++
++	qp->sq.pi = qp->cur_pi;
++}
++
++static int erdma_fill_wr_ops(struct erdma_qp *eqp,
++			     const struct ibv_qp_init_attr_ex *attr)
++{
++	struct ibv_qp_ex *ibqp_ex = &eqp->verbs_qp.qp_ex;
++
++	ibqp_ex->wr_start = erdma_wr_start;
++	ibqp_ex->wr_complete = erdma_wr_complete;
++	ibqp_ex->wr_abort = erdma_wr_abort;
++
++	if (attr->send_ops_flags & ~ERDMA_SUPPORTED_SEND_OPS_FLAGS_RC)
++		return -EOPNOTSUPP;
++
++	ibqp_ex->wr_send = erdma_wr_send;
++	// ibqp->wr_send_imm = erdma_send_wr_send_imm;
++	// ibqp->wr_send_inv = erdma_send_wr_send_inv;
++	ibqp_ex->wr_rdma_write = erdma_wr_rdma_write;
++	// ibqp->wr_rdma_write_imm = erdma_send_wr_rdma_write_imm;
++	// ibqp->wr_rdma_read = erdma_send_wr_rdma_read;
++	// ibqp->wr_atomic_cmp_swp = erdma_send_wr_atomic_cmp_swp;
++	// ibqp->wr_atomic_fetch_add = erdma_send_wr_atomic_fetch_add;
++	// ibqp->wr_bind_mw = erdma_send_wr_bind_mw;
++	// ibqp->wr_local_inv = erdma_send_wr_local_inv;
++	ibqp_ex->wr_set_sge = erdma_wr_set_sge;
++	ibqp_ex->wr_set_sge_list = erdma_wr_set_sge_list;
++	ibqp_ex->wr_set_inline_data = erdma_wr_set_inline_data;
++	ibqp_ex->wr_set_inline_data_list = erdma_wr_set_inline_data_list;
++
++	return 0;
++}
++
++static void __erdma_alloc_dbs(struct erdma_qp *qp, struct erdma_context *ctx)
++{
++	uint32_t qpn = qp->id;
++
++	if (ctx->sdb_type == ERDMA_SDB_PAGE) {
++		qp->disable_dwqe = 0;
++		qp->sq.db = ctx->sdb + (qpn & 31) * 128;
++		qp->sdb_lock = &ctx->sdb_lock[qpn & 31].lock;
++	} else if (ctx->sdb_type == ERDMA_SDB_ENTRY) {
++		qp->disable_dwqe = 0;
++
++		qp->sq.db = ctx->sdb + ctx->sdb_entid * 256;
++		qp->sdb_lock = &ctx->sdb_lock[0].lock;
++	} else {
++		qp->disable_dwqe = 1;
++		qp->sq.db = ctx->sdb + (qpn & 31) * 128;
++		qp->sdb_lock = &ctx->sdb_lock[0].lock;
++	}
++
++#ifndef HAVE_AVX_SUPPORT
++	qp->disable_dwqe = 1;
++#endif
++
++	/* qpn[6:0] as the index in this rq db page. */
++	qp->rq.db = ctx->rdb +
++		    (qpn & ERDMA_RDB_ALLOC_QPN_MASK) * ERDMA_RQDB_SPACE_SIZE;
++}
++
++static int erdma_check_qp_attr(struct erdma_context *ctx,
++			       struct ibv_qp_init_attr_ex *attr)
++
++{
++	if (!check_comp_mask(attr->comp_mask,
++			     IBV_QP_INIT_ATTR_PD |
++				     IBV_QP_INIT_ATTR_SEND_OPS_FLAGS))
++		return -EOPNOTSUPP;
++
++	if (attr->qp_type != IBV_QPT_RC ||
++	    !(attr->comp_mask & IBV_QP_INIT_ATTR_PD))
++		return -EINVAL;
++
++	if (!attr->recv_cq || !attr->send_cq)
++		return -EINVAL;
++
++	if (attr->srq)
++		return -EINVAL;
++
++	return 0;
++}
++
++static int erdma_store_qp(struct erdma_context *ctx, struct erdma_qp *qp)
++{
++	uint32_t tbl_idx, tbl_off;
++	int rv = 0;
++
++	pthread_mutex_lock(&ctx->qp_table_mutex);
++	tbl_idx = qp->id >> ERDMA_QP_TABLE_SHIFT;
++	tbl_off = qp->id & ERDMA_QP_TABLE_MASK;
++
++	if (ctx->qp_table[tbl_idx].refcnt == 0) {
++		ctx->qp_table[tbl_idx].table =
++			calloc(ERDMA_QP_TABLE_SIZE, sizeof(struct erdma_qp *));
++		if (!ctx->qp_table[tbl_idx].table) {
++			rv = -ENOMEM;
++			goto out;
++		}
++	}
++
++	/* exist qp */
++	if (ctx->qp_table[tbl_idx].table[tbl_off]) {
++		rv = -EBUSY;
++		goto out;
++	}
++
++	ctx->qp_table[tbl_idx].table[tbl_off] = qp;
++	ctx->qp_table[tbl_idx].refcnt++;
++
++out:
++	pthread_mutex_unlock(&ctx->qp_table_mutex);
++
++	return rv;
++}
++
++static void erdma_clear_qp(struct erdma_context *ctx, struct erdma_qp *qp)
++{
++	uint32_t tbl_idx, tbl_off;
++
++	pthread_mutex_lock(&ctx->qp_table_mutex);
++	tbl_idx = qp->id >> ERDMA_QP_TABLE_SHIFT;
++	tbl_off = qp->id & ERDMA_QP_TABLE_MASK;
++
++	ctx->qp_table[tbl_idx].table[tbl_off] = NULL;
++	ctx->qp_table[tbl_idx].refcnt--;
++
++	if (ctx->qp_table[tbl_idx].refcnt == 0) {
++		free(ctx->qp_table[tbl_idx].table);
++		ctx->qp_table[tbl_idx].table = NULL;
++	}
++
++	pthread_mutex_unlock(&ctx->qp_table_mutex);
++}
++
++static int erdma_alloc_qp_buf_and_db(struct erdma_context *ctx,
++				     struct erdma_qp *qp,
++				     struct ibv_qp_init_attr_ex *attr)
++{
++	size_t queue_size;
++	uint32_t nwqebb;
++	int rv;
++
++	nwqebb = roundup_pow_of_two(attr->cap.max_send_wr * MAX_WQEBB_PER_SQE);
++	queue_size = align(nwqebb << SQEBB_SHIFT, ctx->page_size);
++	nwqebb = roundup_pow_of_two(attr->cap.max_recv_wr);
++	queue_size += align(nwqebb << RQE_SHIFT, ctx->page_size);
++
++	qp->qbuf_size = queue_size;
++	rv = posix_memalign(&qp->qbuf, ctx->page_size, queue_size);
++	if (rv) {
++		errno = ENOMEM;
++		return -1;
++	}
++
++	rv = ibv_dontfork_range(qp->qbuf, queue_size);
++	if (rv) {
++		errno = rv;
++		goto err_dontfork;
++	}
++
++	/* doorbell record allocation. */
++	qp->db_records = erdma_alloc_dbrecords(ctx);
++	if (!qp->db_records) {
++		errno = ENOMEM;
++		goto err_dbrec;
++	}
++
++	*qp->db_records = 0;
++	*(qp->db_records + 1) = 0;
++	qp->sq.db_record = qp->db_records;
++	qp->rq.db_record = qp->db_records + 1;
++
++	pthread_spin_init(&qp->sq_lock, PTHREAD_PROCESS_PRIVATE);
++	pthread_spin_init(&qp->rq_lock, PTHREAD_PROCESS_PRIVATE);
++
++	return 0;
++
++err_dbrec:
++	ibv_dofork_range(qp->qbuf, queue_size);
++
++err_dontfork:
++	free(qp->qbuf);
++
++	return -1;
++}
++
++static void erdma_free_qp_buf_and_db(struct erdma_context *ctx,
++				     struct erdma_qp *qp)
++{
++	pthread_spin_destroy(&qp->sq_lock);
++	pthread_spin_destroy(&qp->rq_lock);
++
++	if (qp->db_records)
++		erdma_dealloc_dbrecords(ctx, qp->db_records);
++
++	ibv_dofork_range(qp->qbuf, qp->qbuf_size);
++
++	if (qp->qbuf)
++		free(qp->qbuf);
++}
++
++static int erdma_alloc_wrid_tbl(struct erdma_qp *qp)
++{
++	qp->rq.wr_tbl = calloc(qp->rq.depth, sizeof(uint64_t));
++	if (!qp->rq.wr_tbl)
++		return -ENOMEM;
++
++	qp->sq.wr_tbl = calloc(qp->sq.depth, sizeof(uint64_t));
++	if (!qp->sq.wr_tbl) {
++		free(qp->rq.wr_tbl);
++		return -ENOMEM;
++	}
++
++	return 0;
++}
++
++static void erdma_free_wrid_tbl(struct erdma_qp *qp)
++{
++	if (qp->sq.wr_tbl)
++		free(qp->sq.wr_tbl);
++
++	if (qp->rq.wr_tbl)
++		free(qp->rq.wr_tbl);
++}
++
++static struct ibv_qp *create_qp(struct ibv_context *ibv_ctx,
++				struct ibv_qp_init_attr_ex *attr)
++{
++	struct erdma_context *ctx = to_ectx(ibv_ctx);
++	struct erdma_cmd_create_qp_resp resp = {};
++	struct erdma_cmd_create_qp cmd = {};
++	struct erdma_qp *qp;
++	int rv;
++
++	rv = erdma_check_qp_attr(ctx, attr);
++	if (rv) {
++		errno = -rv;
++		return NULL;
++	}
++
++	qp = calloc(1, sizeof(*qp));
++	if (!qp)
++		return NULL;
++
++	rv = erdma_alloc_qp_buf_and_db(ctx, qp, attr);
++	if (rv)
++		goto err;
++
++	cmd.db_record_va = (uintptr_t)qp->db_records;
++	cmd.qbuf_va = (uintptr_t)qp->qbuf;
++	cmd.qbuf_len = (__u32)qp->qbuf_size;
++
++	rv = ibv_cmd_create_qp_ex(ibv_ctx, &qp->verbs_qp, attr, &cmd.ibv_cmd,
++				  sizeof(cmd), &resp.ibv_resp, sizeof(resp));
++	if (rv)
++		goto err_cmd;
++
++	qp->id = resp.qp_id;
++	qp->sq.qbuf = qp->qbuf;
++	qp->rq.qbuf = qp->qbuf + resp.rq_offset;
++	qp->sq.depth = resp.num_sqe;
++	qp->rq.depth = resp.num_rqe;
++	qp->sq_sig_all = attr->sq_sig_all;
++	qp->sq.size = resp.num_sqe * SQEBB_SIZE;
++	qp->rq.size = resp.num_rqe * sizeof(struct erdma_rqe);
++
++	/* doorbell allocation. */
++	__erdma_alloc_dbs(qp, ctx);
++
++	if (attr->comp_mask & IBV_QP_INIT_ATTR_SEND_OPS_FLAGS) {
++		rv = erdma_fill_wr_ops(qp, attr);
++		if (rv) {
++			errno = -rv;
++			goto err_send_ops;
++		}
++	}
++
++	rv = erdma_alloc_wrid_tbl(qp);
++	if (rv)
++		goto err_wrid_tbl;
++
++	rv = erdma_store_qp(ctx, qp);
++	if (rv) {
++		errno = -rv;
++		goto err_store;
++	}
++
++	return &qp->verbs_qp.qp;
++
++err_store:
++	erdma_free_wrid_tbl(qp);
++err_wrid_tbl:
++err_send_ops:
++	ibv_cmd_destroy_qp(&qp->verbs_qp.qp);
++err_cmd:
++	erdma_free_qp_buf_and_db(ctx, qp);
++err:
++	free(qp);
++
++	return NULL;
++}
++
++struct ibv_qp *erdma_create_qp(struct ibv_pd *pd, struct ibv_qp_init_attr *attr)
++{
++	struct ibv_qp_init_attr_ex attrx = {};
++	struct ibv_qp *qp;
++
++	memcpy(&attrx, attr, sizeof(*attr));
++	attrx.comp_mask = IBV_QP_INIT_ATTR_PD;
++	attrx.pd = pd;
++
++	qp = create_qp(pd->context, &attrx);
++	if (qp)
++		memcpy(attr, &attrx, sizeof(*attr));
++
++	return qp;
++}
++
++struct ibv_qp *erdma_create_qp_ex(struct ibv_context *context,
++				  struct ibv_qp_init_attr_ex *attr)
++{
++	return create_qp(context, attr);
++}
++
++int erdma_destroy_qp(struct ibv_qp *ibqp)
++{
++	struct ibv_context *base_ctx = ibqp->pd->context;
++	struct erdma_context *ctx = to_ectx(base_ctx);
++	struct erdma_qp *qp = to_eqp(ibqp);
++	int rv;
++
++	erdma_clear_qp(ctx, qp);
++
++	rv = ibv_cmd_destroy_qp(ibqp);
++	if (rv)
++		return rv;
++
++	erdma_free_wrid_tbl(qp);
++	erdma_free_qp_buf_and_db(ctx, qp);
++
++	free(qp);
++
++	return 0;
++}
++
++int erdma_modify_qp(struct ibv_qp *ibqp, struct ibv_qp_attr *attr,
++		    int attr_mask)
++{
++	struct erdma_qp *qp = to_eqp(ibqp);
++	struct ibv_modify_qp cmd = {};
++	int rv;
++
++	pthread_spin_lock(&qp->sq_lock);
++	pthread_spin_lock(&qp->rq_lock);
++
++	rv = ibv_cmd_modify_qp(ibqp, attr, attr_mask, &cmd, sizeof(cmd));
++
++	pthread_spin_unlock(&qp->rq_lock);
++	pthread_spin_unlock(&qp->sq_lock);
++
++	return rv;
++}
++
++static int erdma_push_one_sqe(struct erdma_qp *qp, struct ibv_send_wr *wr,
++			      uint16_t *sq_pi, int use_direct)
++{
++	uint16_t tmp_pi = *sq_pi;
++	void *sqe;
++	uint64_t sqe_hdr;
++	struct erdma_write_sqe *write_sqe;
++	struct erdma_send_sqe *send_sqe;
++	struct erdma_readreq_sqe *read_sqe;
++	struct erdma_atomic_sqe *atomic_sqe;
++	uint32_t wqe_size = 0;
++	__le32 *length_field = NULL;
++	struct erdma_sge *sgl_base = NULL;
++	uint32_t i, bytes = 0;
++	uint32_t sgl_off, sgl_idx, wqebb_cnt, opcode;
++
++	sqe = get_sq_wqebb(qp, tmp_pi);
++	/* Clear the first 8Byte of the wqe hdr. */
++	*(uint64_t *)sqe = 0;
++
++	qp->sq.wr_tbl[tmp_pi & (qp->sq.depth - 1)] = wr->wr_id;
++
++	sqe_hdr = FIELD_PREP(ERDMA_SQE_HDR_QPN_MASK, qp->id) |
++		  FIELD_PREP(ERDMA_SQE_HDR_CE_MASK,
++			     wr->send_flags & IBV_SEND_SIGNALED ? 1 : 0) |
++		  FIELD_PREP(ERDMA_SQE_HDR_CE_MASK, qp->sq_sig_all) |
++		  FIELD_PREP(ERDMA_SQE_HDR_SE_MASK,
++			     wr->send_flags & IBV_SEND_SOLICITED ? 1 : 0) |
++		  FIELD_PREP(ERDMA_SQE_HDR_FENCE_MASK,
++			     wr->send_flags & IBV_SEND_FENCE ? 1 : 0) |
++		  FIELD_PREP(ERDMA_SQE_HDR_INLINE_MASK,
++			     wr->send_flags & IBV_SEND_INLINE ? 1 : 0) |
++		  FIELD_PREP(ERDMA_SQE_HDR_DWQE_MASK, use_direct);
++
++	switch (wr->opcode) {
++	case IBV_WR_RDMA_WRITE:
++	case IBV_WR_RDMA_WRITE_WITH_IMM:
++		if (wr->opcode == IBV_WR_RDMA_WRITE)
++			opcode = ERDMA_OP_WRITE;
++		else
++			opcode = ERDMA_OP_WRITE_WITH_IMM;
++		sqe_hdr |= FIELD_PREP(ERDMA_SQE_HDR_OPCODE_MASK, opcode);
++		write_sqe = sqe;
++		write_sqe->imm_data = wr->imm_data;
++		write_sqe->sink_stag = htole32(wr->wr.rdma.rkey);
++		write_sqe->sink_to_low =
++			htole32(wr->wr.rdma.remote_addr & 0xFFFFFFFF);
++		write_sqe->sink_to_high =
++			htole32((wr->wr.rdma.remote_addr >> 32) & 0xFFFFFFFF);
++
++		length_field = &write_sqe->length;
++		/* sgl is at the start of next wqebb. */
++		sgl_base = get_sq_wqebb(qp, tmp_pi + 1);
++		sgl_off = 0;
++		sgl_idx = tmp_pi + 1;
++		wqe_size = sizeof(struct erdma_write_sqe);
++
++		break;
++	case IBV_WR_SEND:
++	case IBV_WR_SEND_WITH_IMM:
++	case IBV_WR_SEND_WITH_INV:
++		if (wr->opcode == IBV_WR_SEND)
++			opcode = ERDMA_OP_SEND;
++		else if (wr->opcode == IBV_WR_SEND_WITH_IMM)
++			opcode = ERDMA_OP_SEND_WITH_IMM;
++		else
++			opcode = ERDMA_OP_SEND_WITH_INV;
++		sqe_hdr |= FIELD_PREP(ERDMA_SQE_HDR_OPCODE_MASK, opcode);
++		send_sqe = sqe;
++		if (wr->opcode == IBV_WR_SEND_WITH_INV)
++			send_sqe->invalid_stag = htole32(wr->invalidate_rkey);
++		else
++			send_sqe->imm_data = wr->imm_data;
++
++		length_field = &send_sqe->length;
++		/* sgl is in the half of current wqebb (offset 16Byte) */
++		sgl_base = sqe;
++		sgl_off = 16;
++		sgl_idx = tmp_pi;
++		wqe_size = sizeof(struct erdma_send_sqe);
++
++		break;
++	case IBV_WR_RDMA_READ:
++		sqe_hdr |= FIELD_PREP(ERDMA_SQE_HDR_OPCODE_MASK, ERDMA_OP_READ);
++		read_sqe = sqe;
++
++		read_sqe->sink_to_low = htole32(wr->sg_list->addr & 0xFFFFFFFF);
++		read_sqe->sink_to_high =
++			htole32((wr->sg_list->addr >> 32) & 0xFFFFFFFF);
++		read_sqe->sink_stag = htole32(wr->sg_list->lkey);
++		read_sqe->length = htole32(wr->sg_list->length);
++
++		sgl_base = get_sq_wqebb(qp, tmp_pi + 1);
++
++		sgl_base->laddr = htole64(wr->wr.rdma.remote_addr);
++		sgl_base->length = htole32(wr->sg_list->length);
++		sgl_base->lkey = htole32(wr->wr.rdma.rkey);
++
++		wqe_size = sizeof(struct erdma_readreq_sqe);
++
++		goto out;
++	case IBV_WR_ATOMIC_CMP_AND_SWP:
++	case IBV_WR_ATOMIC_FETCH_AND_ADD:
++		atomic_sqe = (struct erdma_atomic_sqe *)sqe;
++
++		if (wr->opcode == IBV_WR_ATOMIC_CMP_AND_SWP) {
++			sqe_hdr |= FIELD_PREP(ERDMA_SQE_HDR_OPCODE_MASK,
++					      ERDMA_OP_ATOMIC_CAS);
++			atomic_sqe->fetchadd_swap_data = wr->wr.atomic.swap;
++			atomic_sqe->cmp_data = wr->wr.atomic.compare_add;
++		} else {
++			sqe_hdr |= FIELD_PREP(ERDMA_SQE_HDR_OPCODE_MASK,
++					      ERDMA_OP_ATOMIC_FAD);
++			atomic_sqe->fetchadd_swap_data =
++				wr->wr.atomic.compare_add;
++		}
++
++		sgl_base = (struct erdma_sge *)get_sq_wqebb(qp, tmp_pi + 1);
++		/* remote SGL fields */
++		sgl_base->laddr = wr->wr.atomic.remote_addr;
++		sgl_base->lkey = wr->wr.atomic.rkey;
++
++		/* local SGL fields */
++		sgl_base++;
++		sgl_base->laddr = wr->sg_list[0].addr;
++		sgl_base->length = wr->sg_list[0].length;
++		sgl_base->lkey = wr->sg_list[0].lkey;
++		wqe_size = sizeof(struct erdma_atomic_sqe);
++		goto out;
++	default:
++		return -EINVAL;
++	}
++
++	if (wr->send_flags & IBV_SEND_INLINE) {
++		char *data = (char *)sgl_base;
++		uint32_t remain_size;
++		uint32_t copy_size;
++		uint32_t data_off;
++
++		i = 0;
++		bytes = 0;
++
++		/* Allow more than ERDMA_MAX_SGE, since content copied here */
++		while (i < wr->num_sge) {
++			bytes += wr->sg_list[i].length;
++			if (bytes > (int)ERDMA_MAX_INLINE)
++				return -EINVAL;
++
++			remain_size = wr->sg_list[i].length;
++			data_off = 0;
++
++			while (1) {
++				copy_size =
++					min(remain_size, SQEBB_SIZE - sgl_off);
++				memcpy(data + sgl_off,
++				       (void *)(uintptr_t)wr->sg_list[i].addr +
++					       data_off,
++				       copy_size);
++				remain_size -= copy_size;
++
++				/* Update sgl_offset. */
++				sgl_idx +=
++					((sgl_off + copy_size) >> SQEBB_SHIFT);
++				sgl_off = (sgl_off + copy_size) &
++					  (SQEBB_SIZE - 1);
++				data_off += copy_size;
++				data = get_sq_wqebb(qp, sgl_idx);
++
++				if (!remain_size)
++					break;
++			};
++
++			i++;
++		}
++
++		*length_field = htole32(bytes);
++		wqe_size += bytes;
++		sqe_hdr |= FIELD_PREP(ERDMA_SQE_HDR_SGL_LEN_MASK, bytes);
++	} else {
++		char *sgl = (char *)sgl_base;
++
++		if (wr->num_sge > ERDMA_MAX_SEND_SGE)
++			return -EINVAL;
++
++		i = 0;
++		bytes = 0;
++
++		while (i < wr->num_sge) {
++			bytes += wr->sg_list[i].length;
++			memcpy(sgl + sgl_off, &wr->sg_list[i],
++			       sizeof(struct ibv_sge));
++
++			if (sgl_off == 0)
++				*(uint32_t *)(sgl + 28) = qp->id;
++
++			sgl_idx += (sgl_off == sizeof(struct ibv_sge) ? 1 : 0);
++			sgl = get_sq_wqebb(qp, sgl_idx);
++			sgl_off = sizeof(struct ibv_sge) - sgl_off;
++
++			i++;
++		}
++
++		*length_field = htole32(bytes);
++		sqe_hdr |= FIELD_PREP(ERDMA_SQE_HDR_SGL_LEN_MASK, wr->num_sge);
++		wqe_size += wr->num_sge * sizeof(struct ibv_sge);
++	}
++
++out:
++	wqebb_cnt = SQEBB_COUNT(wqe_size);
++	assert(wqebb_cnt <= MAX_WQEBB_PER_SQE);
++	sqe_hdr |= FIELD_PREP(ERDMA_SQE_HDR_WQEBB_CNT_MASK, wqebb_cnt - 1);
++	sqe_hdr |=
++		FIELD_PREP(ERDMA_SQE_HDR_WQEBB_INDEX_MASK, tmp_pi + wqebb_cnt);
++
++	*(__le64 *)sqe = htole64(sqe_hdr);
++	*sq_pi = tmp_pi + wqebb_cnt;
++
++	if (use_direct)
++		kick_hw_sqe(qp, tmp_pi, wqebb_cnt);
++
++	return 0;
++}
++
++int erdma_post_send(struct ibv_qp *ibqp, struct ibv_send_wr *wr,
++		    struct ibv_send_wr **bad_wr)
++{
++	struct erdma_qp *qp = to_eqp(ibqp);
++	uint16_t sq_pi;
++	int new_sqe = 0, rv = 0;
++	int use_dwqe = 0, first_wr = 1, dsqe = 0;
++
++	*bad_wr = NULL;
++
++	if (ibqp->state == IBV_QPS_ERR) {
++		*bad_wr = wr;
++		return -EIO;
++	}
++
++	pthread_spin_lock(&qp->sq_lock);
++	sq_pi = qp->sq.pi;
++
++	while (wr) {
++		if ((uint16_t)(sq_pi - qp->sq.ci) >= qp->sq.depth) {
++			rv = -ENOMEM;
++			*bad_wr = wr;
++			break;
++		}
++
++		use_dwqe = (!qp->disable_dwqe && first_wr && sq_pi == qp->sq.ci) ? 1 : 0;
++		if (use_dwqe)
++			dsqe++;
++
++		rv = erdma_push_one_sqe(qp, wr, &sq_pi, use_dwqe);
++		if (rv) {
++			*bad_wr = wr;
++			break;
++		}
++
++		new_sqe++;
++		first_wr = 0;
++		wr = wr->next;
++	}
++
++	if (new_sqe)
++		qp->sq.pi = sq_pi;
++
++	if (new_sqe - dsqe)
++		__kick_sq_db(qp, sq_pi); /* normal doorbell. */
++
++	pthread_spin_unlock(&qp->sq_lock);
++
++	return rv;
++}
++
++static int push_recv_wqe(struct erdma_qp *qp, struct ibv_recv_wr *wr)
++{
++	uint16_t rq_pi = qp->rq.pi;
++	uint16_t idx = rq_pi & (qp->rq.depth - 1);
++	struct erdma_rqe *rqe = (struct erdma_rqe *)qp->rq.qbuf + idx;
++
++	if ((uint16_t)(rq_pi - qp->rq.ci) == qp->rq.depth)
++		return -ENOMEM;
++
++	rqe->qe_idx = htole16(rq_pi + 1);
++	rqe->qpn = htole32(qp->id);
++	qp->rq.wr_tbl[idx] = wr->wr_id;
++
++	if (wr->num_sge == 0) {
++		rqe->length = 0;
++	} else if (wr->num_sge == 1) {
++		rqe->stag = htole32(wr->sg_list[0].lkey);
++		rqe->to = htole64(wr->sg_list[0].addr);
++		rqe->length = htole32(wr->sg_list[0].length);
++	} else {
++		return -EINVAL;
++	}
++
++	*(__le64 *)qp->rq.db_record = *(__le64 *)rqe;
++	udma_to_device_barrier();
++	mmio_write64_le(qp->rq.db, *(__le64 *)rqe);
++
++	qp->rq.pi = rq_pi + 1;
++
++	return 0;
++}
++
++int erdma_post_recv(struct ibv_qp *ibqp, struct ibv_recv_wr *wr,
++		    struct ibv_recv_wr **bad_wr)
++{
++	struct erdma_qp *qp = to_eqp(ibqp);
++	int ret = 0;
++
++	if (ibqp->state == IBV_QPS_ERR) {
++		*bad_wr = wr;
++		return -EIO;
++	}
++
++	pthread_spin_lock(&qp->rq_lock);
++
++	while (wr) {
++		ret = push_recv_wqe(qp, wr);
++		if (ret) {
++			*bad_wr = wr;
++			break;
++		}
++
++		wr = wr->next;
++	}
++
++	pthread_spin_unlock(&qp->rq_lock);
++
++	return ret;
++}
++
++void erdma_cq_event(struct ibv_cq *ibcq)
++{
++	struct erdma_cq *cq = to_ecq(ibcq);
++
++	cq->cmdsn++;
++}
++
++static void *get_next_valid_cqe(struct erdma_cq *cq)
++{
++	struct erdma_cqe *cqe = cq->queue + (cq->ci & (cq->depth - 1));
++	uint32_t owner = FIELD_GET(ERDMA_CQE_HDR_OWNER_MASK, be32toh(cqe->hdr));
++
++	return owner ^ !!(cq->ci & cq->depth) ? cqe : NULL;
++}
++
++static const enum ibv_wc_opcode wc_mapping_table[ERDMA_NUM_OPCODES] = {
++	[ERDMA_OP_WRITE] = IBV_WC_RDMA_WRITE,
++	[ERDMA_OP_READ] = IBV_WC_RDMA_READ,
++	[ERDMA_OP_SEND] = IBV_WC_SEND,
++	[ERDMA_OP_SEND_WITH_IMM] = IBV_WC_SEND,
++	[ERDMA_OP_RECEIVE] = IBV_WC_RECV,
++	[ERDMA_OP_RECV_IMM] = IBV_WC_RECV_RDMA_WITH_IMM,
++	[ERDMA_OP_RECV_INV] = IBV_WC_RECV,
++	[ERDMA_OP_WRITE_WITH_IMM] = IBV_WC_RDMA_WRITE,
++	[ERDMA_OP_INVALIDATE] = IBV_WC_LOCAL_INV,
++	[ERDMA_OP_RSP_SEND_IMM] = IBV_WC_RECV,
++	[ERDMA_OP_SEND_WITH_INV] = IBV_WC_SEND,
++	[ERDMA_OP_READ_WITH_INV] = IBV_WC_RDMA_READ,
++	[ERDMA_OP_ATOMIC_CAS] = IBV_WC_COMP_SWAP,
++	[ERDMA_OP_ATOMIC_FAD] = IBV_WC_FETCH_ADD,
++};
++
++static const struct {
++	enum erdma_wc_status erdma;
++	enum ibv_wc_status base;
++	enum erdma_vendor_err vendor;
++} map_cqe_status[ERDMA_NUM_WC_STATUS] = {
++	{ ERDMA_WC_SUCCESS, IBV_WC_SUCCESS, ERDMA_WC_VENDOR_NO_ERR },
++	{ ERDMA_WC_GENERAL_ERR, IBV_WC_GENERAL_ERR, ERDMA_WC_VENDOR_NO_ERR },
++	{ ERDMA_WC_RECV_WQE_FORMAT_ERR, IBV_WC_GENERAL_ERR,
++	  ERDMA_WC_VENDOR_INVALID_RQE },
++	{ ERDMA_WC_RECV_STAG_INVALID_ERR, IBV_WC_REM_ACCESS_ERR,
++	  ERDMA_WC_VENDOR_RQE_INVALID_STAG },
++	{ ERDMA_WC_RECV_ADDR_VIOLATION_ERR, IBV_WC_REM_ACCESS_ERR,
++	  ERDMA_WC_VENDOR_RQE_ADDR_VIOLATION },
++	{ ERDMA_WC_RECV_RIGHT_VIOLATION_ERR, IBV_WC_REM_ACCESS_ERR,
++	  ERDMA_WC_VENDOR_RQE_ACCESS_RIGHT_ERR },
++	{ ERDMA_WC_RECV_PDID_ERR, IBV_WC_REM_ACCESS_ERR,
++	  ERDMA_WC_VENDOR_RQE_INVALID_PD },
++	{ ERDMA_WC_RECV_WARRPING_ERR, IBV_WC_REM_ACCESS_ERR,
++	  ERDMA_WC_VENDOR_RQE_WRAP_ERR },
++	{ ERDMA_WC_SEND_WQE_FORMAT_ERR, IBV_WC_LOC_QP_OP_ERR,
++	  ERDMA_WC_VENDOR_INVALID_SQE },
++	{ ERDMA_WC_SEND_WQE_ORD_EXCEED, IBV_WC_GENERAL_ERR,
++	  ERDMA_WC_VENDOR_ZERO_ORD },
++	{ ERDMA_WC_SEND_STAG_INVALID_ERR, IBV_WC_LOC_ACCESS_ERR,
++	  ERDMA_WC_VENDOR_SQE_INVALID_STAG },
++	{ ERDMA_WC_SEND_ADDR_VIOLATION_ERR, IBV_WC_LOC_ACCESS_ERR,
++	  ERDMA_WC_VENDOR_SQE_ADDR_VIOLATION },
++	{ ERDMA_WC_SEND_RIGHT_VIOLATION_ERR, IBV_WC_LOC_ACCESS_ERR,
++	  ERDMA_WC_VENDOR_SQE_ACCESS_ERR },
++	{ ERDMA_WC_SEND_PDID_ERR, IBV_WC_LOC_ACCESS_ERR,
++	  ERDMA_WC_VENDOR_SQE_INVALID_PD },
++	{ ERDMA_WC_SEND_WARRPING_ERR, IBV_WC_LOC_ACCESS_ERR,
++	  ERDMA_WC_VENDOR_SQE_WARP_ERR },
++	{ ERDMA_WC_FLUSH_ERR, IBV_WC_WR_FLUSH_ERR, ERDMA_WC_VENDOR_NO_ERR },
++	{ ERDMA_WC_RETRY_EXC_ERR, IBV_WC_RETRY_EXC_ERR,
++	  ERDMA_WC_VENDOR_NO_ERR },
++};
++
++#define ERDMA_POLLCQ_NO_QP (-1)
++#define ERDMA_POLLCQ_DUP_COMP (-2)
++#define ERDMA_POLLCQ_WRONG_IDX (-3)
++
++static int __erdma_poll_one_cqe(struct erdma_context *ctx, struct erdma_cq *cq,
++				struct ibv_wc *wc)
++{
++	uint32_t cqe_hdr, opcode, syndrome, qpn;
++	uint16_t depth, wqe_idx, old_ci, new_ci;
++	uint64_t *sqe_hdr, *qeidx2wrid;
++	uint32_t tbl_idx, tbl_off;
++	struct erdma_cqe *cqe;
++	struct erdma_qp *qp;
++
++	cqe = get_next_valid_cqe(cq);
++	if (!cqe)
++		return -EAGAIN;
++
++	cq->ci++;
++	udma_from_device_barrier();
++
++	cqe_hdr = be32toh(cqe->hdr);
++	syndrome = FIELD_GET(ERDMA_CQE_HDR_SYNDROME_MASK, cqe_hdr);
++	opcode = FIELD_GET(ERDMA_CQE_HDR_OPCODE_MASK, cqe_hdr);
++	qpn = be32toh(cqe->qpn);
++	wqe_idx = be32toh(cqe->qe_idx);
++
++	tbl_idx = qpn >> ERDMA_QP_TABLE_SHIFT;
++	tbl_off = qpn & ERDMA_QP_TABLE_MASK;
++
++	if (!ctx->qp_table[tbl_idx].table ||
++	    !ctx->qp_table[tbl_idx].table[tbl_off])
++		return ERDMA_POLLCQ_NO_QP;
++
++	qp = ctx->qp_table[tbl_idx].table[tbl_off];
++
++	if (FIELD_GET(ERDMA_CQE_HDR_QTYPE_MASK, cqe_hdr) ==
++	    ERDMA_CQE_QTYPE_SQ) {
++		qeidx2wrid = qp->sq.wr_tbl;
++		depth = qp->sq.depth;
++		sqe_hdr = get_sq_wqebb(qp, wqe_idx);
++		old_ci = qp->sq.ci;
++		new_ci = wqe_idx +
++			 FIELD_GET(ERDMA_SQE_HDR_WQEBB_CNT_MASK, *sqe_hdr) + 1;
++
++		if ((uint16_t)(new_ci - old_ci) > depth)
++			return ERDMA_POLLCQ_WRONG_IDX;
++		else if (new_ci == old_ci)
++			return ERDMA_POLLCQ_DUP_COMP;
++
++		qp->sq.ci = new_ci;
++	} else {
++		qeidx2wrid = qp->rq.wr_tbl;
++		depth = qp->rq.depth;
++		qp->rq.ci++;
++	}
++
++	wc->wr_id = qeidx2wrid[wqe_idx & (depth - 1)];
++	wc->byte_len = be32toh(cqe->size);
++	wc->wc_flags = 0;
++
++	wc->opcode = wc_mapping_table[opcode];
++	if (opcode == ERDMA_OP_RECV_IMM || opcode == ERDMA_OP_RSP_SEND_IMM) {
++		wc->imm_data = htobe32(le32toh(cqe->imm_data));
++		wc->wc_flags |= IBV_WC_WITH_IMM;
++	}
++
++	if (syndrome >= ERDMA_NUM_WC_STATUS)
++		syndrome = ERDMA_WC_GENERAL_ERR;
++
++	wc->status = map_cqe_status[syndrome].base;
++	wc->vendor_err = map_cqe_status[syndrome].vendor;
++	wc->qp_num = qpn;
++
++	return 0;
++}
++
++int erdma_poll_cq(struct ibv_cq *ibcq, int num_entries, struct ibv_wc *wc)
++{
++	struct erdma_context *ctx = to_ectx(ibcq->context);
++	struct erdma_cq *cq = to_ecq(ibcq);
++	int ret, npolled = 0;
++
++	pthread_spin_lock(&cq->lock);
++
++	while (npolled < num_entries) {
++		ret = __erdma_poll_one_cqe(ctx, cq, wc + npolled);
++		if (ret == -EAGAIN) /* CQ is empty, break the loop. */
++			break;
++		else if (ret) /* We handle the polling error silently. */
++			continue;
++		npolled++;
++	}
++
++	pthread_spin_unlock(&cq->lock);
++
++	return npolled;
++}
++
++void erdma_free_context(struct ibv_context *ibv_ctx)
++{
++	struct erdma_context *ctx = to_ectx(ibv_ctx);
++	int i;
++
++	munmap(aligned_address(ctx->sdb, ctx->page_size), ctx->page_size);
++	munmap(aligned_address(ctx->rdb, ctx->page_size), ctx->page_size);
++	munmap(aligned_address(ctx->cdb, ctx->page_size), ctx->page_size);
++
++	pthread_mutex_lock(&ctx->qp_table_mutex);
++	for (i = 0; i < ERDMA_QP_TABLE_SIZE; ++i) {
++		if (ctx->qp_table[i].refcnt)
++			free(ctx->qp_table[i].table);
++	}
++
++	pthread_mutex_unlock(&ctx->qp_table_mutex);
++	pthread_mutex_destroy(&ctx->qp_table_mutex);
++
++	verbs_uninit_context(&ctx->ibv_ctx);
++	free(ctx);
++}
+-- 
+2.37.0
+
diff --git a/1003-providers-erdma-Add-the-main-module-of-erdma.patch b/1003-providers-erdma-Add-the-main-module-of-erdma.patch
new file mode 100644
index 0000000000000000000000000000000000000000..41f02e3b882875b5c045180468ab5938c703c938
--- /dev/null
+++ b/1003-providers-erdma-Add-the-main-module-of-erdma.patch
@@ -0,0 +1,326 @@
+From ccacbfd79f17b3955acff68135e029433b2072d6 Mon Sep 17 00:00:00 2001
+Message-Id: <ccacbfd79f17b3955acff68135e029433b2072d6.1669880730.git.chengyou@linux.alibaba.com>
+In-Reply-To: <b8fdbf0cd29d6630ad53e78ec4724bb85f19611d.1669880730.git.chengyou@linux.alibaba.com>
+References: <b8fdbf0cd29d6630ad53e78ec4724bb85f19611d.1669880730.git.chengyou@linux.alibaba.com>
+From: Cheng Xu <chengyou@linux.alibaba.com>
+Date: Thu, 1 Dec 2022 15:40:48 +0800
+Subject: [PATCH 3/4] providers/erdma: Add the main module of erdma
+
+Add the definitions of erdma provider driver, and add the application
+interface to core, so that core can recognize erdma provider.
+
+Signed-off-by: Cheng Xu <chengyou@linux.alibaba.com>
+---
+ libibverbs/verbs.h          |   8 +-
+ providers/erdma/erdma.c     | 149 ++++++++++++++++++++++++++++++++++++
+ providers/erdma/erdma.h     |  79 +++++++++++++++++++
+ providers/erdma/erdma_abi.h |  21 +++++
+ 4 files changed, 254 insertions(+), 3 deletions(-)
+ create mode 100644 providers/erdma/erdma.c
+ create mode 100644 providers/erdma/erdma.h
+ create mode 100644 providers/erdma/erdma_abi.h
+
+diff --git a/libibverbs/verbs.h b/libibverbs/verbs.h
+index 36b4142517c0..b2f2b4b07be6 100644
+--- a/libibverbs/verbs.h
++++ b/libibverbs/verbs.h
+@@ -2190,7 +2190,7 @@ struct ibv_device **ibv_get_device_list(int *num_devices);
+  */
+ #ifdef RDMA_STATIC_PROVIDERS
+ #define _RDMA_STATIC_PREFIX_(_1, _2, _3, _4, _5, _6, _7, _8, _9, _10, _11,     \
+-			     _12, _13, _14, _15, _16, _17, ...)                \
++			     _12, _13, _14, _15, _16, _17, _18, ...)           \
+ 	&verbs_provider_##_1, &verbs_provider_##_2, &verbs_provider_##_3,      \
+ 		&verbs_provider_##_4, &verbs_provider_##_5,                    \
+ 		&verbs_provider_##_6, &verbs_provider_##_7,                    \
+@@ -2198,16 +2198,18 @@ struct ibv_device **ibv_get_device_list(int *num_devices);
+ 		&verbs_provider_##_10, &verbs_provider_##_11,                  \
+ 		&verbs_provider_##_12, &verbs_provider_##_13,                  \
+ 		&verbs_provider_##_14, &verbs_provider_##_15,                  \
+-		&verbs_provider_##_16, &verbs_provider_##_17
++		&verbs_provider_##_16, &verbs_provider_##_17,                  \
++		&verbs_provider_##_18
+ #define _RDMA_STATIC_PREFIX(arg)                                               \
+ 	_RDMA_STATIC_PREFIX_(arg, none, none, none, none, none, none, none,    \
+ 			     none, none, none, none, none, none, none, none,   \
+-			     none)
++			     none, none)
+ 
+ struct verbs_devices_ops;
+ extern const struct verbs_device_ops verbs_provider_bnxt_re;
+ extern const struct verbs_device_ops verbs_provider_cxgb4;
+ extern const struct verbs_device_ops verbs_provider_efa;
++extern const struct verbs_device_ops verbs_provider_erdma;
+ extern const struct verbs_device_ops verbs_provider_hfi1verbs;
+ extern const struct verbs_device_ops verbs_provider_hns;
+ extern const struct verbs_device_ops verbs_provider_ipathverbs;
+diff --git a/providers/erdma/erdma.c b/providers/erdma/erdma.c
+new file mode 100644
+index 000000000000..440314599cf1
+--- /dev/null
++++ b/providers/erdma/erdma.c
+@@ -0,0 +1,149 @@
++// SPDX-License-Identifier: GPL-2.0 or OpenIB.org BSD (MIT) See COPYING file
++
++// Authors: Cheng Xu <chengyou@linux.alibaba.com>
++// Copyright (c) 2020-2021, Alibaba Group.
++
++#include <stdio.h>
++#include <stdlib.h>
++#include <sys/mman.h>
++#include <sys/types.h>
++#include <unistd.h>
++#include <util/mmio.h>
++#include <util/udma_barrier.h>
++#include <util/util.h>
++
++#include "erdma.h"
++#include "erdma_abi.h"
++#include "erdma_hw.h"
++#include "erdma_verbs.h"
++
++static const struct verbs_context_ops erdma_context_ops = {
++	.alloc_pd = erdma_alloc_pd,
++	.cq_event = erdma_cq_event,
++	.create_cq = erdma_create_cq,
++	.create_qp = erdma_create_qp,
++	.create_qp_ex = erdma_create_qp_ex,
++	.dealloc_pd = erdma_free_pd,
++	.dereg_mr = erdma_dereg_mr,
++	.destroy_cq = erdma_destroy_cq,
++	.destroy_qp = erdma_destroy_qp,
++	.free_context = erdma_free_context,
++	.modify_qp = erdma_modify_qp,
++	.poll_cq = erdma_poll_cq,
++	.post_recv = erdma_post_recv,
++	.post_send = erdma_post_send,
++	.query_device_ex = erdma_query_device,
++	.query_port = erdma_query_port,
++	.query_qp = erdma_query_qp,
++	.reg_mr = erdma_reg_mr,
++	.req_notify_cq = erdma_notify_cq,
++};
++
++static struct verbs_context *erdma_alloc_context(struct ibv_device *device,
++						 int cmd_fd, void *private_data)
++{
++	struct erdma_cmd_alloc_context_resp resp = {};
++	struct ibv_get_context cmd = {};
++	struct erdma_context *ctx;
++	int i;
++
++	ctx = verbs_init_and_alloc_context(device, cmd_fd, ctx, ibv_ctx,
++					   RDMA_DRIVER_ERDMA);
++	if (!ctx)
++		return NULL;
++
++	pthread_mutex_init(&ctx->qp_table_mutex, NULL);
++	for (i = 0; i < ERDMA_QP_TABLE_SIZE; ++i)
++		ctx->qp_table[i].refcnt = 0;
++
++	if (ibv_cmd_get_context(&ctx->ibv_ctx, &cmd, sizeof(cmd),
++				&resp.ibv_resp, sizeof(resp)))
++		goto err_out;
++
++	verbs_set_ops(&ctx->ibv_ctx, &erdma_context_ops);
++	ctx->page_size = to_edev(device)->page_size;
++	ctx->dev_id = resp.dev_id;
++
++	ctx->sdb_type = resp.sdb_type;
++	ctx->sdb_entid = resp.sdb_entid;
++
++	ctx->sdb = mmap(NULL, ctx->page_size, PROT_WRITE, MAP_SHARED, cmd_fd,
++			resp.sdb);
++	if (ctx->sdb == MAP_FAILED)
++		goto err_out;
++
++	ctx->sdb += resp.sdb_off;
++
++	ctx->rdb = mmap(NULL, ctx->page_size, PROT_WRITE, MAP_SHARED, cmd_fd,
++			resp.rdb);
++	if (ctx->rdb == MAP_FAILED)
++		goto err_rdb_map;
++
++	ctx->rdb += resp.rdb_off;
++
++	ctx->cdb = mmap(NULL, ctx->page_size, PROT_WRITE, MAP_SHARED, cmd_fd,
++			resp.cdb);
++	if (ctx->cdb == MAP_FAILED)
++		goto err_cdb_map;
++
++	ctx->cdb += resp.cdb_off;
++
++	ctx->dbrecord_pages = NULL;
++	pthread_mutex_init(&ctx->dbrecord_pages_mutex, NULL);
++
++	for (i = 0; i < 32; i++)
++		pthread_spin_init(&ctx->sdb_lock[i].lock,
++				  PTHREAD_PROCESS_PRIVATE);
++
++	return &ctx->ibv_ctx;
++
++err_cdb_map:
++	munmap(aligned_address(ctx->rdb, ctx->page_size), ctx->page_size);
++err_rdb_map:
++	munmap(aligned_address(ctx->sdb, ctx->page_size), ctx->page_size);
++err_out:
++	verbs_uninit_context(&ctx->ibv_ctx);
++	free(ctx);
++
++	return NULL;
++}
++
++static struct verbs_device *
++erdma_device_alloc(struct verbs_sysfs_dev *sysfs_dev)
++{
++	struct erdma_device *dev;
++
++	dev = calloc(1, sizeof(*dev));
++	if (!dev)
++		return NULL;
++
++	dev->page_size = sysconf(_SC_PAGESIZE);
++
++	return &dev->ibv_dev;
++}
++
++static void erdma_device_free(struct verbs_device *vdev)
++{
++	struct erdma_device *dev =
++		container_of(vdev, struct erdma_device, ibv_dev);
++
++	free(dev);
++}
++
++static const struct verbs_match_ent match_table[] = {
++	VERBS_DRIVER_ID(RDMA_DRIVER_ERDMA),
++	VERBS_PCI_MATCH(PCI_VENDOR_ID_ALIBABA, 0x107f, NULL),
++	{},
++};
++
++static const struct verbs_device_ops erdma_dev_ops = {
++	.name = "erdma",
++	.match_min_abi_version = 0,
++	.match_max_abi_version = ERDMA_ABI_VERSION,
++	.match_table = match_table,
++	.alloc_device = erdma_device_alloc,
++	.uninit_device = erdma_device_free,
++	.alloc_context = erdma_alloc_context,
++};
++
++PROVIDER_DRIVER(erdma, erdma_dev_ops);
+diff --git a/providers/erdma/erdma.h b/providers/erdma/erdma.h
+new file mode 100644
+index 000000000000..41bca6e7915f
+--- /dev/null
++++ b/providers/erdma/erdma.h
+@@ -0,0 +1,79 @@
++/* SPDX-License-Identifier: GPL-2.0 or OpenIB.org BSD (MIT) See COPYING file */
++/*
++ * Authors: Cheng Xu <chengyou@linux.alibaba.com>
++ * Copyright (c) 2020-2021, Alibaba Group.
++ */
++
++#ifndef __ERDMA_H__
++#define __ERDMA_H__
++
++#include <inttypes.h>
++#include <pthread.h>
++#include <stddef.h>
++
++#include <infiniband/driver.h>
++#include <infiniband/kern-abi.h>
++#include <sys/param.h>
++
++#ifndef PCI_VENDOR_ID_ALIBABA
++#define PCI_VENDOR_ID_ALIBABA 0x1ded
++#endif
++
++#define ERDMA_PAGE_SIZE 4096
++#define ERDMA_PAGE_SHIFT 12
++#define ERDMA_SIZE_TO_NPAGE(size)                                              \
++	(((size) + ERDMA_PAGE_SIZE - 1) >> ERDMA_PAGE_SHIFT)
++
++struct erdma_device {
++	struct verbs_device ibv_dev;
++	uint32_t page_size;
++};
++
++struct erdma_aligned_lock_t {
++	pthread_spinlock_t lock;
++} __attribute__((__packed__)) __attribute__((__aligned__(64)));
++
++#define ERDMA_QP_TABLE_SIZE 4096
++#define ERDMA_QP_TABLE_SHIFT 12
++#define ERDMA_QP_TABLE_MASK 0xFFF
++
++struct erdma_context {
++	struct verbs_context ibv_ctx;
++	uint32_t dev_id;
++
++	struct {
++		struct erdma_qp **table;
++		int refcnt;
++	} qp_table[ERDMA_QP_TABLE_SIZE];
++	pthread_mutex_t qp_table_mutex;
++
++	uint8_t sdb_type;
++	uint32_t sdb_entid;
++
++	void *sdb;
++	void *rdb;
++	void *cdb;
++
++	uint32_t page_size;
++	pthread_mutex_t dbrecord_pages_mutex;
++	struct erdma_dbrecord_page *dbrecord_pages;
++
++	struct erdma_aligned_lock_t sdb_lock[32];
++};
++
++static inline struct erdma_context *to_ectx(struct ibv_context *base)
++{
++	return container_of(base, struct erdma_context, ibv_ctx.context);
++}
++
++static inline struct erdma_device *to_edev(struct ibv_device *ibv_dev)
++{
++	return container_of(ibv_dev, struct erdma_device, ibv_dev.device);
++}
++
++static inline void *aligned_address(void *addr, uint32_t align)
++{
++	return addr - ((uintptr_t)addr & (align - 1));
++}
++
++#endif
+diff --git a/providers/erdma/erdma_abi.h b/providers/erdma/erdma_abi.h
+new file mode 100644
+index 000000000000..184804711d9b
+--- /dev/null
++++ b/providers/erdma/erdma_abi.h
+@@ -0,0 +1,21 @@
++/* SPDX-License-Identifier: GPL-2.0 or OpenIB.org BSD (MIT) See COPYING file */
++/*
++ * Authors: Cheng Xu <chengyou@linux.alibaba.com>
++ * Copyright (c) 2020-2021, Alibaba Group.
++ */
++
++#ifndef __ERDMA_ABI_H__
++#define __ERDMA_ABI_H__
++
++#include <infiniband/kern-abi.h>
++#include <rdma/erdma-abi.h>
++#include <kernel-abi/erdma-abi.h>
++
++DECLARE_DRV_CMD(erdma_cmd_alloc_context, IB_USER_VERBS_CMD_GET_CONTEXT, empty,
++		erdma_uresp_alloc_ctx);
++DECLARE_DRV_CMD(erdma_cmd_create_cq, IB_USER_VERBS_CMD_CREATE_CQ,
++		erdma_ureq_create_cq, erdma_uresp_create_cq);
++DECLARE_DRV_CMD(erdma_cmd_create_qp, IB_USER_VERBS_CMD_CREATE_QP,
++		erdma_ureq_create_qp, erdma_uresp_create_qp);
++
++#endif
+-- 
+2.37.0
+
diff --git a/1004-providers-erdma-Add-to-the-build-environment.patch b/1004-providers-erdma-Add-to-the-build-environment.patch
new file mode 100644
index 0000000000000000000000000000000000000000..faab71ed342ffe05c7c53ec94073918d3f991b04
--- /dev/null
+++ b/1004-providers-erdma-Add-to-the-build-environment.patch
@@ -0,0 +1,245 @@
+From 500d8631e9598875063ba5daf3c668928984979c Mon Sep 17 00:00:00 2001
+Message-Id: <500d8631e9598875063ba5daf3c668928984979c.1669880730.git.chengyou@linux.alibaba.com>
+In-Reply-To: <b8fdbf0cd29d6630ad53e78ec4724bb85f19611d.1669880730.git.chengyou@linux.alibaba.com>
+References: <b8fdbf0cd29d6630ad53e78ec4724bb85f19611d.1669880730.git.chengyou@linux.alibaba.com>
+From: Cheng Xu <chengyou@linux.alibaba.com>
+Date: Thu, 1 Dec 2022 15:42:21 +0800
+Subject: [PATCH 4/4] providers/erdma: Add to the build environment
+
+Make the build system can build the provider, and add erdma to redhat package
+environment and debian pkg build environment.
+
+Signed-off-by: Cheng Xu <chengyou@linux.alibaba.com>
+---
+ CMakeLists.txt                            |  1 +
+ MAINTAINERS                               |  5 +++
+ README.md                                 |  1 +
+ debian/control                            |  1 +
+ debian/copyright                          |  4 ++
+ kernel-headers/CMakeLists.txt             |  2 +
+ kernel-headers/rdma/erdma-abi.h           | 52 +++++++++++++++++++++++
+ kernel-headers/rdma/ib_user_ioctl_verbs.h |  1 +
+ libibverbs/examples/devinfo.c             |  7 ++-
+ providers/erdma/CMakeLists.txt            | 12 ++++++
+ redhat/rdma-core.spec                     |  3 ++
+ 11 files changed, 87 insertions(+), 2 deletions(-)
+ create mode 100644 kernel-headers/rdma/erdma-abi.h
+ create mode 100644 providers/erdma/CMakeLists.txt
+
+diff --git a/CMakeLists.txt b/CMakeLists.txt
+index 1aecae49a507..bdf24c7fc4cd 100644
+--- a/CMakeLists.txt
++++ b/CMakeLists.txt
+@@ -668,6 +668,7 @@ add_subdirectory(providers/bnxt_re)
+ add_subdirectory(providers/cxgb4) # NO SPARSE
+ add_subdirectory(providers/efa)
+ add_subdirectory(providers/efa/man)
++add_subdirectory(providers/erdma)
+ add_subdirectory(providers/hns)
+ add_subdirectory(providers/irdma)
+ add_subdirectory(providers/mlx4)
+diff --git a/MAINTAINERS b/MAINTAINERS
+index 9fec12406f81..bbeddabbd1ba 100644
+--- a/MAINTAINERS
++++ b/MAINTAINERS
+@@ -61,6 +61,11 @@ M:	Gal Pressman <galpress@amazon.com>
+ S:	Supported
+ F:	providers/efa/
+ 
++ERDMA USERSPACE PROVIDER (for erdma.ko)
++M:	Cheng Xu <chengyou@linux.alibaba.com>
++S:	Supported
++F:	providers/erdma/
++
+ HF1 USERSPACE PROVIDER (for hf1.ko)
+ M:	Mike Marciniszyn <mike.marciniszyn@intel.com>
+ M:	Dennis Dalessandro <dennis.dalessandro@intel.com>
+diff --git a/README.md b/README.md
+index 18c3b014bdbd..ebb941e2132a 100644
+--- a/README.md
++++ b/README.md
+@@ -15,6 +15,7 @@ under the providers/ directory. Support for the following Kernel RDMA drivers
+ is included:
+ 
+  - efa.ko
++ - erdma.ko
+  - iw_cxgb4.ko
+  - hfi1.ko
+  - hns-roce.ko
+diff --git a/debian/control b/debian/control
+index d9fec3a5fbdc..38dc6f710dbc 100644
+--- a/debian/control
++++ b/debian/control
+@@ -93,6 +93,7 @@ Description: User space provider drivers for libibverbs
+   - bnxt_re: Broadcom NetXtreme-E RoCE HCAs
+   - cxgb4: Chelsio T4 iWARP HCAs
+   - efa: Amazon Elastic Fabric Adapter
++  - erdma: Alibaba Elastic RDMA (iWarp) Adapter
+   - hfi1verbs: Intel Omni-Path HFI
+   - hns: HiSilicon Hip06 SoC
+   - ipathverbs: QLogic InfiniPath HCAs
+diff --git a/debian/copyright b/debian/copyright
+index d58aa779a569..ead62adb437a 100644
+--- a/debian/copyright
++++ b/debian/copyright
+@@ -156,6 +156,10 @@ Files: providers/efa/*
+ Copyright: 2019 Amazon.com, Inc. or its affiliates.
+ License: BSD-2-clause or GPL-2
+ 
++Files: providers/erdma/*
++Copyright: 2020-2021, Alibaba Group
++License: BSD-MIT or GPL-2
++
+ Files: providers/hfi1verbs/*
+ Copyright: 2005 PathScale, Inc.
+            2006-2009 QLogic Corporation
+diff --git a/kernel-headers/CMakeLists.txt b/kernel-headers/CMakeLists.txt
+index d9621ee2b940..f30ff2292ace 100644
+--- a/kernel-headers/CMakeLists.txt
++++ b/kernel-headers/CMakeLists.txt
+@@ -2,6 +2,7 @@ publish_internal_headers(rdma
+   rdma/bnxt_re-abi.h
+   rdma/cxgb4-abi.h
+   rdma/efa-abi.h
++  rdma/erdma-abi.h
+   rdma/hns-abi.h
+   rdma/ib_user_ioctl_cmds.h
+   rdma/ib_user_ioctl_verbs.h
+@@ -64,6 +65,7 @@ rdma_kernel_provider_abi(
+   rdma/bnxt_re-abi.h
+   rdma/cxgb4-abi.h
+   rdma/efa-abi.h
++  rdma/erdma-abi.h
+   rdma/hns-abi.h
+   rdma/ib_user_verbs.h
+   rdma/irdma-abi.h
+diff --git a/kernel-headers/rdma/erdma-abi.h b/kernel-headers/rdma/erdma-abi.h
+new file mode 100644
+index 000000000000..455046415983
+--- /dev/null
++++ b/kernel-headers/rdma/erdma-abi.h
+@@ -0,0 +1,52 @@
++/* SPDX-License-Identifier: ((GPL-2.0 WITH Linux-syscall-note) OR Linux-OpenIB) */
++/*
++ * Copyright (c) 2020-2022, Alibaba Group.
++ */
++
++#ifndef __ERDMA_USER_H__
++#define __ERDMA_USER_H__
++
++#include <linux/types.h>
++
++#define ERDMA_ABI_VERSION       1
++
++struct erdma_ureq_create_cq {
++	__aligned_u64 db_record_va;
++	__aligned_u64 qbuf_va;
++	__u32 qbuf_len;
++	__u32 rsvd0;
++};
++
++struct erdma_uresp_create_cq {
++	__u32 cq_id;
++	__u32 num_cqe;
++};
++
++struct erdma_ureq_create_qp {
++	__aligned_u64 db_record_va;
++	__aligned_u64 qbuf_va;
++	__u32 qbuf_len;
++	__u32 rsvd0;
++};
++
++struct erdma_uresp_create_qp {
++	__u32 qp_id;
++	__u32 num_sqe;
++	__u32 num_rqe;
++	__u32 rq_offset;
++};
++
++struct erdma_uresp_alloc_ctx {
++	__u32 dev_id;
++	__u32 pad;
++	__u32 sdb_type;
++	__u32 sdb_entid;
++	__aligned_u64 sdb;
++	__aligned_u64 rdb;
++	__aligned_u64 cdb;
++	__u32 sdb_off;
++	__u32 rdb_off;
++	__u32 cdb_off;
++};
++
++#endif
+diff --git a/kernel-headers/rdma/ib_user_ioctl_verbs.h b/kernel-headers/rdma/ib_user_ioctl_verbs.h
+index 3072e5d6b692..7dd56210226f 100644
+--- a/kernel-headers/rdma/ib_user_ioctl_verbs.h
++++ b/kernel-headers/rdma/ib_user_ioctl_verbs.h
+@@ -250,6 +250,7 @@ enum rdma_driver_id {
+ 	RDMA_DRIVER_QIB,
+ 	RDMA_DRIVER_EFA,
+ 	RDMA_DRIVER_SIW,
++	RDMA_DRIVER_ERDMA,
+ };
+ 
+ enum ib_uverbs_gid_type {
+diff --git a/libibverbs/examples/devinfo.c b/libibverbs/examples/devinfo.c
+index 5db568b08040..f7680a96a2a1 100644
+--- a/libibverbs/examples/devinfo.c
++++ b/libibverbs/examples/devinfo.c
+@@ -521,8 +521,11 @@ static int print_hca_cap(struct ibv_device *ib_dev, uint8_t ib_port)
+ 	}
+ 
+ 	printf("hca_id:\t%s\n", ibv_get_device_name(ib_dev));
+-	printf("\ttransport:\t\t\t%s (%d)\n",
+-	       transport_str(ib_dev->transport_type), ib_dev->transport_type);
++	if (device_attr.orig_attr.vendor_id != 0x1ded)
++		printf("\ttransport:\t\t\t%s (%d)\n",
++			transport_str(ib_dev->transport_type), ib_dev->transport_type);
++	else
++		printf("\ttransport:\t\t\t%s\n", "eRDMA");
+ 	if (strlen(device_attr.orig_attr.fw_ver))
+ 		printf("\tfw_ver:\t\t\t\t%s\n", device_attr.orig_attr.fw_ver);
+ 	printf("\tnode_guid:\t\t\t%s\n", guid_str(device_attr.orig_attr.node_guid, buf));
+diff --git a/providers/erdma/CMakeLists.txt b/providers/erdma/CMakeLists.txt
+new file mode 100644
+index 000000000000..eeb40f79a08e
+--- /dev/null
++++ b/providers/erdma/CMakeLists.txt
+@@ -0,0 +1,12 @@
++rdma_provider(erdma
++  erdma.c
++  erdma_db.c
++  erdma_verbs.c
++)
++
++if (${CMAKE_SYSTEM_PROCESSOR} STREQUAL "x86_64")
++        set(ERDMA_PRIV_FLAGS "-msse4.1 -msse4.2 -mavx2 -Werror")
++        add_definitions(-DHAVE_AVX_SUPPORT)
++endif()
++
++set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} ${ERDMA_PRIV_FLAGS}")
+diff --git a/redhat/rdma-core.spec b/redhat/rdma-core.spec
+index 55b2d608b29b..32e9bf77e8ca 100644
+--- a/redhat/rdma-core.spec
++++ b/redhat/rdma-core.spec
+@@ -148,6 +148,8 @@ Provides: libcxgb4 = %{version}-%{release}
+ Obsoletes: libcxgb4 < %{version}-%{release}
+ Provides: libefa = %{version}-%{release}
+ Obsoletes: libefa < %{version}-%{release}
++Provides: liberdma = %{version}-%{release}
++Obsoletes: liberdma < %{version}-%{release}
+ Provides: libhfi1 = %{version}-%{release}
+ Obsoletes: libhfi1 < %{version}-%{release}
+ Provides: libipathverbs = %{version}-%{release}
+@@ -176,6 +178,7 @@ Device-specific plug-in ibverbs userspace drivers are included:
+ 
+ - libcxgb4: Chelsio T4 iWARP HCA
+ - libefa: Amazon Elastic Fabric Adapter
++- liberdma: Alibaba Elastic RDMA (iWarp) Adapter
+ - libhfi1: Intel Omni-Path HFI
+ - libhns: HiSilicon Hip06 SoC
+ - libipathverbs: QLogic InfiniPath HCA
+-- 
+2.37.0
+
diff --git a/rdma-core.spec b/rdma-core.spec
index ecc8ed71e99220c118431f65fabad5f8cb5c00b2..ab2c93085ef1949a6cae7a930ca421c5b0d5a7c7 100644
--- a/rdma-core.spec
+++ b/rdma-core.spec
@@ -1,4 +1,4 @@
-%define anolis_release .0.2
+%define anolis_release .0.3
 Name: rdma-core
 Version: 37.2
 Release: 1%{anolis_release}%{?dist}
@@ -22,6 +22,12 @@ Patch3: udev-keep-NAME_KERNEL-as-default-interface-naming-co.patch
 Patch300: 0001-ibacm-acm.c-load-plugin-while-it-is-soft-link.patch
 
 Patch301: 0001-tests-Fix-comparing-qp_state-for-iWARP-providers.patch
+# ERDMA provider support
+Patch1000: 1001-providers-erdma-Add-userspace-verbs-related-header-f.patch
+Patch1001: 1002-providers-erdma-Add-userspace-verbs-implementation.patch
+Patch1002: 1003-providers-erdma-Add-the-main-module-of-erdma.patch
+Patch1003: 1004-providers-erdma-Add-to-the-build-environment.patch
+
 # Do not build static libs by default.
 %define with_static %{?_with_static: 1} %{?!_with_static: 0}
 
@@ -259,6 +265,10 @@ easy, object-oriented access to IB verbs.
 %patch3 -p1
 %patch300 -p1
 %patch301 -p1
+%patch1000 -p1
+%patch1001 -p1
+%patch1002 -p1
+%patch1003 -p1
 
 %build
 
@@ -644,6 +654,9 @@ fi
 %endif
 
 %changelog
+* Thu Dec 16 2022 Cheng Xu <chengyou@linux.alibaba.com> - 37.2.1.0.3
+- Backport ERDMA support
+
 * Wed Nov 16 2022 Liwei Ge <geliwei@openanolis.org> - 37.2-1.0.2
 - Fix build error