From 258fb3627beb2a0f65817f09c7b62dbf0a4d5bec Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=E6=9D=8E=E5=AF=8C=E8=89=B3?=
Date: Tue, 30 Jul 2024 09:10:16 +0000
Subject: [PATCH 1/3] =?UTF-8?q?add=20LICENSE.=20=E5=90=88=E5=85=A5ZTE=20Di?=
=?UTF-8?q?nghai=20rdma-core=E9=A9=B1=E5=8A=A8?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
Signed-off-by: 李富艳
---
LICENSE | 339 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++
1 file changed, 339 insertions(+)
create mode 100644 LICENSE
diff --git a/LICENSE b/LICENSE
new file mode 100644
index 0000000..89e08fb
--- /dev/null
+++ b/LICENSE
@@ -0,0 +1,339 @@
+ GNU GENERAL PUBLIC LICENSE
+ Version 2, June 1991
+
+ Copyright (C) 1989, 1991 Free Software Foundation, Inc.,
+ 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ Everyone is permitted to copy and distribute verbatim copies
+ of this license document, but changing it is not allowed.
+
+ Preamble
+
+ The licenses for most software are designed to take away your
+freedom to share and change it. By contrast, the GNU General Public
+License is intended to guarantee your freedom to share and change free
+software--to make sure the software is free for all its users. This
+General Public License applies to most of the Free Software
+Foundation's software and to any other program whose authors commit to
+using it. (Some other Free Software Foundation software is covered by
+the GNU Lesser General Public License instead.) You can apply it to
+your programs, too.
+
+ When we speak of free software, we are referring to freedom, not
+price. Our General Public Licenses are designed to make sure that you
+have the freedom to distribute copies of free software (and charge for
+this service if you wish), that you receive source code or can get it
+if you want it, that you can change the software or use pieces of it
+in new free programs; and that you know you can do these things.
+
+ To protect your rights, we need to make restrictions that forbid
+anyone to deny you these rights or to ask you to surrender the rights.
+These restrictions translate to certain responsibilities for you if you
+distribute copies of the software, or if you modify it.
+
+ For example, if you distribute copies of such a program, whether
+gratis or for a fee, you must give the recipients all the rights that
+you have. You must make sure that they, too, receive or can get the
+source code. And you must show them these terms so they know their
+rights.
+
+ We protect your rights with two steps: (1) copyright the software, and
+(2) offer you this license which gives you legal permission to copy,
+distribute and/or modify the software.
+
+ Also, for each author's protection and ours, we want to make certain
+that everyone understands that there is no warranty for this free
+software. If the software is modified by someone else and passed on, we
+want its recipients to know that what they have is not the original, so
+that any problems introduced by others will not reflect on the original
+authors' reputations.
+
+ Finally, any free program is threatened constantly by software
+patents. We wish to avoid the danger that redistributors of a free
+program will individually obtain patent licenses, in effect making the
+program proprietary. To prevent this, we have made it clear that any
+patent must be licensed for everyone's free use or not licensed at all.
+
+ The precise terms and conditions for copying, distribution and
+modification follow.
+
+ GNU GENERAL PUBLIC LICENSE
+ TERMS AND CONDITIONS FOR COPYING, DISTRIBUTION AND MODIFICATION
+
+ 0. This License applies to any program or other work which contains
+a notice placed by the copyright holder saying it may be distributed
+under the terms of this General Public License. The "Program", below,
+refers to any such program or work, and a "work based on the Program"
+means either the Program or any derivative work under copyright law:
+that is to say, a work containing the Program or a portion of it,
+either verbatim or with modifications and/or translated into another
+language. (Hereinafter, translation is included without limitation in
+the term "modification".) Each licensee is addressed as "you".
+
+Activities other than copying, distribution and modification are not
+covered by this License; they are outside its scope. The act of
+running the Program is not restricted, and the output from the Program
+is covered only if its contents constitute a work based on the
+Program (independent of having been made by running the Program).
+Whether that is true depends on what the Program does.
+
+ 1. You may copy and distribute verbatim copies of the Program's
+source code as you receive it, in any medium, provided that you
+conspicuously and appropriately publish on each copy an appropriate
+copyright notice and disclaimer of warranty; keep intact all the
+notices that refer to this License and to the absence of any warranty;
+and give any other recipients of the Program a copy of this License
+along with the Program.
+
+You may charge a fee for the physical act of transferring a copy, and
+you may at your option offer warranty protection in exchange for a fee.
+
+ 2. You may modify your copy or copies of the Program or any portion
+of it, thus forming a work based on the Program, and copy and
+distribute such modifications or work under the terms of Section 1
+above, provided that you also meet all of these conditions:
+
+ a) You must cause the modified files to carry prominent notices
+ stating that you changed the files and the date of any change.
+
+ b) You must cause any work that you distribute or publish, that in
+ whole or in part contains or is derived from the Program or any
+ part thereof, to be licensed as a whole at no charge to all third
+ parties under the terms of this License.
+
+ c) If the modified program normally reads commands interactively
+ when run, you must cause it, when started running for such
+ interactive use in the most ordinary way, to print or display an
+ announcement including an appropriate copyright notice and a
+ notice that there is no warranty (or else, saying that you provide
+ a warranty) and that users may redistribute the program under
+ these conditions, and telling the user how to view a copy of this
+ License. (Exception: if the Program itself is interactive but
+ does not normally print such an announcement, your work based on
+ the Program is not required to print an announcement.)
+
+These requirements apply to the modified work as a whole. If
+identifiable sections of that work are not derived from the Program,
+and can be reasonably considered independent and separate works in
+themselves, then this License, and its terms, do not apply to those
+sections when you distribute them as separate works. But when you
+distribute the same sections as part of a whole which is a work based
+on the Program, the distribution of the whole must be on the terms of
+this License, whose permissions for other licensees extend to the
+entire whole, and thus to each and every part regardless of who wrote it.
+
+Thus, it is not the intent of this section to claim rights or contest
+your rights to work written entirely by you; rather, the intent is to
+exercise the right to control the distribution of derivative or
+collective works based on the Program.
+
+In addition, mere aggregation of another work not based on the Program
+with the Program (or with a work based on the Program) on a volume of
+a storage or distribution medium does not bring the other work under
+the scope of this License.
+
+ 3. You may copy and distribute the Program (or a work based on it,
+under Section 2) in object code or executable form under the terms of
+Sections 1 and 2 above provided that you also do one of the following:
+
+ a) Accompany it with the complete corresponding machine-readable
+ source code, which must be distributed under the terms of Sections
+ 1 and 2 above on a medium customarily used for software interchange; or,
+
+ b) Accompany it with a written offer, valid for at least three
+ years, to give any third party, for a charge no more than your
+ cost of physically performing source distribution, a complete
+ machine-readable copy of the corresponding source code, to be
+ distributed under the terms of Sections 1 and 2 above on a medium
+ customarily used for software interchange; or,
+
+ c) Accompany it with the information you received as to the offer
+ to distribute corresponding source code. (This alternative is
+ allowed only for noncommercial distribution and only if you
+ received the program in object code or executable form with such
+ an offer, in accord with Subsection b above.)
+
+The source code for a work means the preferred form of the work for
+making modifications to it. For an executable work, complete source
+code means all the source code for all modules it contains, plus any
+associated interface definition files, plus the scripts used to
+control compilation and installation of the executable. However, as a
+special exception, the source code distributed need not include
+anything that is normally distributed (in either source or binary
+form) with the major components (compiler, kernel, and so on) of the
+operating system on which the executable runs, unless that component
+itself accompanies the executable.
+
+If distribution of executable or object code is made by offering
+access to copy from a designated place, then offering equivalent
+access to copy the source code from the same place counts as
+distribution of the source code, even though third parties are not
+compelled to copy the source along with the object code.
+
+ 4. You may not copy, modify, sublicense, or distribute the Program
+except as expressly provided under this License. Any attempt
+otherwise to copy, modify, sublicense or distribute the Program is
+void, and will automatically terminate your rights under this License.
+However, parties who have received copies, or rights, from you under
+this License will not have their licenses terminated so long as such
+parties remain in full compliance.
+
+ 5. You are not required to accept this License, since you have not
+signed it. However, nothing else grants you permission to modify or
+distribute the Program or its derivative works. These actions are
+prohibited by law if you do not accept this License. Therefore, by
+modifying or distributing the Program (or any work based on the
+Program), you indicate your acceptance of this License to do so, and
+all its terms and conditions for copying, distributing or modifying
+the Program or works based on it.
+
+ 6. Each time you redistribute the Program (or any work based on the
+Program), the recipient automatically receives a license from the
+original licensor to copy, distribute or modify the Program subject to
+these terms and conditions. You may not impose any further
+restrictions on the recipients' exercise of the rights granted herein.
+You are not responsible for enforcing compliance by third parties to
+this License.
+
+ 7. If, as a consequence of a court judgment or allegation of patent
+infringement or for any other reason (not limited to patent issues),
+conditions are imposed on you (whether by court order, agreement or
+otherwise) that contradict the conditions of this License, they do not
+excuse you from the conditions of this License. If you cannot
+distribute so as to satisfy simultaneously your obligations under this
+License and any other pertinent obligations, then as a consequence you
+may not distribute the Program at all. For example, if a patent
+license would not permit royalty-free redistribution of the Program by
+all those who receive copies directly or indirectly through you, then
+the only way you could satisfy both it and this License would be to
+refrain entirely from distribution of the Program.
+
+If any portion of this section is held invalid or unenforceable under
+any particular circumstance, the balance of the section is intended to
+apply and the section as a whole is intended to apply in other
+circumstances.
+
+It is not the purpose of this section to induce you to infringe any
+patents or other property right claims or to contest validity of any
+such claims; this section has the sole purpose of protecting the
+integrity of the free software distribution system, which is
+implemented by public license practices. Many people have made
+generous contributions to the wide range of software distributed
+through that system in reliance on consistent application of that
+system; it is up to the author/donor to decide if he or she is willing
+to distribute software through any other system and a licensee cannot
+impose that choice.
+
+This section is intended to make thoroughly clear what is believed to
+be a consequence of the rest of this License.
+
+ 8. If the distribution and/or use of the Program is restricted in
+certain countries either by patents or by copyrighted interfaces, the
+original copyright holder who places the Program under this License
+may add an explicit geographical distribution limitation excluding
+those countries, so that distribution is permitted only in or among
+countries not thus excluded. In such case, this License incorporates
+the limitation as if written in the body of this License.
+
+ 9. The Free Software Foundation may publish revised and/or new versions
+of the General Public License from time to time. Such new versions will
+be similar in spirit to the present version, but may differ in detail to
+address new problems or concerns.
+
+Each version is given a distinguishing version number. If the Program
+specifies a version number of this License which applies to it and "any
+later version", you have the option of following the terms and conditions
+either of that version or of any later version published by the Free
+Software Foundation. If the Program does not specify a version number of
+this License, you may choose any version ever published by the Free Software
+Foundation.
+
+ 10. If you wish to incorporate parts of the Program into other free
+programs whose distribution conditions are different, write to the author
+to ask for permission. For software which is copyrighted by the Free
+Software Foundation, write to the Free Software Foundation; we sometimes
+make exceptions for this. Our decision will be guided by the two goals
+of preserving the free status of all derivatives of our free software and
+of promoting the sharing and reuse of software generally.
+
+ NO WARRANTY
+
+ 11. BECAUSE THE PROGRAM IS LICENSED FREE OF CHARGE, THERE IS NO WARRANTY
+FOR THE PROGRAM, TO THE EXTENT PERMITTED BY APPLICABLE LAW. EXCEPT WHEN
+OTHERWISE STATED IN WRITING THE COPYRIGHT HOLDERS AND/OR OTHER PARTIES
+PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY OF ANY KIND, EITHER EXPRESSED
+OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
+MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. THE ENTIRE RISK AS
+TO THE QUALITY AND PERFORMANCE OF THE PROGRAM IS WITH YOU. SHOULD THE
+PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF ALL NECESSARY SERVICING,
+REPAIR OR CORRECTION.
+
+ 12. IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING
+WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MAY MODIFY AND/OR
+REDISTRIBUTE THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES,
+INCLUDING ANY GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING
+OUT OF THE USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED
+TO LOSS OF DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY
+YOU OR THIRD PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER
+PROGRAMS), EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE
+POSSIBILITY OF SUCH DAMAGES.
+
+ END OF TERMS AND CONDITIONS
+
+ How to Apply These Terms to Your New Programs
+
+ If you develop a new program, and you want it to be of the greatest
+possible use to the public, the best way to achieve this is to make it
+free software which everyone can redistribute and change under these terms.
+
+ To do so, attach the following notices to the program. It is safest
+to attach them to the start of each source file to most effectively
+convey the exclusion of warranty; and each file should have at least
+the "copyright" line and a pointer to where the full notice is found.
+
+
+ Copyright (C)
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 2 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License along
+ with this program; if not, write to the Free Software Foundation, Inc.,
+ 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+
+Also add information on how to contact you by electronic and paper mail.
+
+If the program is interactive, make it output a short notice like this
+when it starts in an interactive mode:
+
+ Gnomovision version 69, Copyright (C) year name of author
+ Gnomovision comes with ABSOLUTELY NO WARRANTY; for details type `show w'.
+ This is free software, and you are welcome to redistribute it
+ under certain conditions; type `show c' for details.
+
+The hypothetical commands `show w' and `show c' should show the appropriate
+parts of the General Public License. Of course, the commands you use may
+be called something other than `show w' and `show c'; they could even be
+mouse-clicks or menu items--whatever suits your program.
+
+You should also get your employer (if you work as a programmer) or your
+school, if any, to sign a "copyright disclaimer" for the program, if
+necessary. Here is a sample; alter the names:
+
+ Yoyodyne, Inc., hereby disclaims all copyright interest in the program
+ `Gnomovision' (which makes passes at compilers) written by James Hacker.
+
+ , 1 April 1989
+ Ty Coon, President of Vice
+
+This General Public License does not permit incorporating your program into
+proprietary programs. If your program is a subroutine library, you may
+consider it more useful to permit linking proprietary applications with the
+library. If this is what you want to do, use the GNU Lesser General
+Public License instead of this License.
--
Gitee
From 2fa692dcc4b1b92632330c70ccd0b974b9856e96 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=E6=9D=8E=E5=AF=8C=E8=89=B3?=
Date: Tue, 30 Jul 2024 17:39:40 +0800
Subject: [PATCH 2/3] Add ZTE Dinghai rdma-core driver
---
0036-Add-ZTE-Dinghai-rdma-core-driver.patch | 13254 ++++++++++++++++++
1 file changed, 13254 insertions(+)
create mode 100644 0036-Add-ZTE-Dinghai-rdma-core-driver.patch
diff --git a/0036-Add-ZTE-Dinghai-rdma-core-driver.patch b/0036-Add-ZTE-Dinghai-rdma-core-driver.patch
new file mode 100644
index 0000000..f963b05
--- /dev/null
+++ b/0036-Add-ZTE-Dinghai-rdma-core-driver.patch
@@ -0,0 +1,13254 @@
+From 0293fc40d5d1e40dcdc05b2adf21d853ecaa14c8 Mon Sep 17 00:00:00 2001
+From: =?UTF-8?q?=E6=9D=8E=E5=AF=8C=E8=89=B3?=
+Date: Tue, 30 Jul 2024 16:20:03 +0800
+Subject: [PATCH] Add ZTE Dinghai rdma-core driver
+
+---
+ CMakeLists.txt | 4 +
+ MAINTAINERS | 4 +
+ README.md | 1 +
+ debian/control | 3 +-
+ debian/copyright | 4 +
+ debian/ibverbs-providers.install | 2 +
+ debian/ibverbs-providers.lintian-overrides | 4 +-
+ debian/ibverbs-providers.symbols | 6 +
+ debian/libibverbs-dev.install | 7 +
+ kernel-headers/CMakeLists.txt | 4 +
+ kernel-headers/rdma/hns-abi.h | 103 +-
+ kernel-headers/rdma/ib_user_ioctl_verbs.h | 1 +
+ kernel-headers/rdma/zxdh-abi.h | 143 +
+ kernel-headers/rdma/zxdh_user_ioctl_cmds.h | 56 +
+ kernel-headers/rdma/zxdh_user_ioctl_verbs.h | 34 +
+ librdmacm/cma.c | 2 +-
+ providers/bnxt_re/verbs.c | 2 +-
+ providers/hns/CMakeLists.txt | 9 +-
+ providers/hns/hns_roce_u.c | 360 ++-
+ providers/hns/hns_roce_u.h | 198 +-
+ providers/hns/hns_roce_u_abi.h | 4 +
+ providers/hns/hns_roce_u_buf.c | 466 +++
+ providers/hns/hns_roce_u_db.c | 2 +
+ providers/hns/hns_roce_u_db.h | 8 +-
+ providers/hns/hns_roce_u_hw_v2.c | 583 +++-
+ providers/hns/hns_roce_u_hw_v2.h | 4 +
+ providers/hns/hns_roce_u_verbs.c | 617 +++-
+ providers/hns/hnsdv.h | 85 +
+ providers/hns/libhns.map | 10 +
+ providers/zrdma/CMakeLists.txt | 18 +
+ providers/zrdma/abi.h | 36 +
+ providers/zrdma/defs.h | 388 +++
+ providers/zrdma/libzrdma.map | 16 +
+ providers/zrdma/osdep.h | 21 +
+ providers/zrdma/private_verbs_cmd.c | 203 ++
+ providers/zrdma/private_verbs_cmd.h | 24 +
+ providers/zrdma/status.h | 75 +
+ providers/zrdma/uk.c | 2616 +++++++++++++++
+ providers/zrdma/umain.c | 236 ++
+ providers/zrdma/umain.h | 228 ++
+ providers/zrdma/user.h | 572 ++++
+ providers/zrdma/uverbs.c | 3209 +++++++++++++++++++
+ providers/zrdma/zxdh.h | 53 +
+ providers/zrdma/zxdh_devids.h | 17 +
+ providers/zrdma/zxdh_dv.h | 75 +
+ rdma-tools/man/CMakeLists.txt | 4 +
+ rdma-tools/man/zxdh_modify_sport.1 | 42 +
+ rdma-tools/man/zxdh_set_log.1 | 37 +
+ rdma-tools/scripts/CMakeLists.txt | 55 +
+ rdma-tools/scripts/ibdev2netdev | 268 ++
+ rdma-tools/scripts/show_gids | 110 +
+ rdma-tools/tools/CMakeLists.txt | 13 +
+ rdma-tools/tools/zxdh_modify_sport.c | 169 +
+ rdma-tools/tools/zxdh_set_log.c | 173 +
+ redhat/rdma-core.spec | 19 +-
+ suse/rdma-core.spec | 22 +-
+ toolchain.cmake | 12 +
+ 57 files changed, 11243 insertions(+), 194 deletions(-)
+ create mode 100644 kernel-headers/rdma/zxdh-abi.h
+ create mode 100644 kernel-headers/rdma/zxdh_user_ioctl_cmds.h
+ create mode 100644 kernel-headers/rdma/zxdh_user_ioctl_verbs.h
+ create mode 100644 providers/hns/hnsdv.h
+ create mode 100644 providers/hns/libhns.map
+ create mode 100644 providers/zrdma/CMakeLists.txt
+ create mode 100644 providers/zrdma/abi.h
+ create mode 100644 providers/zrdma/defs.h
+ create mode 100644 providers/zrdma/libzrdma.map
+ create mode 100644 providers/zrdma/osdep.h
+ create mode 100644 providers/zrdma/private_verbs_cmd.c
+ create mode 100644 providers/zrdma/private_verbs_cmd.h
+ create mode 100644 providers/zrdma/status.h
+ create mode 100644 providers/zrdma/uk.c
+ create mode 100644 providers/zrdma/umain.c
+ create mode 100644 providers/zrdma/umain.h
+ create mode 100644 providers/zrdma/user.h
+ create mode 100644 providers/zrdma/uverbs.c
+ create mode 100644 providers/zrdma/zxdh.h
+ create mode 100644 providers/zrdma/zxdh_devids.h
+ create mode 100644 providers/zrdma/zxdh_dv.h
+ create mode 100644 rdma-tools/man/CMakeLists.txt
+ create mode 100644 rdma-tools/man/zxdh_modify_sport.1
+ create mode 100644 rdma-tools/man/zxdh_set_log.1
+ create mode 100644 rdma-tools/scripts/CMakeLists.txt
+ create mode 100644 rdma-tools/scripts/ibdev2netdev
+ create mode 100644 rdma-tools/scripts/show_gids
+ create mode 100644 rdma-tools/tools/CMakeLists.txt
+ create mode 100644 rdma-tools/tools/zxdh_modify_sport.c
+ create mode 100644 rdma-tools/tools/zxdh_set_log.c
+ create mode 100644 toolchain.cmake
+
+diff --git a/CMakeLists.txt b/CMakeLists.txt
+index 98985e7..bf3097d 100644
+--- a/CMakeLists.txt
++++ b/CMakeLists.txt
+@@ -748,6 +748,7 @@ add_subdirectory(providers/mthca)
+ add_subdirectory(providers/ocrdma)
+ add_subdirectory(providers/qedr)
+ add_subdirectory(providers/vmw_pvrdma)
++add_subdirectory(providers/zrdma)
+ endif()
+
+ add_subdirectory(providers/hfi1verbs)
+@@ -762,6 +763,9 @@ add_subdirectory(libibnetdisc/man)
+ add_subdirectory(infiniband-diags)
+ add_subdirectory(infiniband-diags/scripts)
+ add_subdirectory(infiniband-diags/man)
++add_subdirectory(rdma-tools/scripts)
++add_subdirectory(rdma-tools/tools)
++add_subdirectory(rdma-tools/man)
+
+ if (CYTHON_EXECUTABLE)
+ add_subdirectory(pyverbs)
+diff --git a/MAINTAINERS b/MAINTAINERS
+index 4b24117..394c4da 100644
+--- a/MAINTAINERS
++++ b/MAINTAINERS
+@@ -185,6 +185,10 @@ L: pv-drivers@vmware.com
+ S: Supported
+ F: providers/vmw_pvrdma/
+
++ZRDMA USERSPACE PROVIDER (for zrdma.ko)
++S: Supported
++F: providers/zrdma/
++
+ PYVERBS
+ M: Edward Srouji
+ S: Supported
+diff --git a/README.md b/README.md
+index 928bdc4..8f47d3c 100644
+--- a/README.md
++++ b/README.md
+@@ -31,6 +31,7 @@ is included:
+ - rdma_rxe.ko
+ - siw.ko
+ - vmw_pvrdma.ko
++ - zrdma.ko
+
+ Additional service daemons are provided for:
+ - srp_daemon (ib_srp.ko)
+diff --git a/debian/control b/debian/control
+index 160824f..4dda44e 100644
+--- a/debian/control
++++ b/debian/control
+@@ -87,7 +87,7 @@ Description: User space provider drivers for libibverbs
+ - efa: Amazon Elastic Fabric Adapter
+ - erdma: Alibaba Elastic RDMA (iWarp) Adapter
+ - hfi1verbs: Intel Omni-Path HFI
+- - hns: HiSilicon Hip06 SoC
++ - hns: HiSilicon Hip08+ SoC
+ - ipathverbs: QLogic InfiniPath HCAs
+ - irdma: Intel Ethernet Connection RDMA
+ - mana: Microsoft Azure Network Adapter
+@@ -99,6 +99,7 @@ Description: User space provider drivers for libibverbs
+ - rxe: A software implementation of the RoCE protocol
+ - siw: A software implementation of the iWarp protocol
+ - vmw_pvrdma: VMware paravirtual RDMA device
++ - zrdma: ZTE Dinghai RDMA
+
+ Package: ibverbs-utils
+ Architecture: linux-any
+diff --git a/debian/copyright b/debian/copyright
+index 36ac71e..5c9e5a0 100644
+--- a/debian/copyright
++++ b/debian/copyright
+@@ -228,6 +228,10 @@ Files: providers/vmw_pvrdma/*
+ Copyright: 2012-2016 VMware, Inc.
+ License: BSD-2-clause or GPL-2
+
++Files: providers/zrdma/*
++Copyright: 2024 ZTE Corporation. All rights reserved.
++License: BSD-MIT or GPL-2
++
+ Files: rdma-ndd/*
+ Copyright: 2004-2016, Intel Corporation.
+ License: BSD-MIT or GPL-2
+diff --git a/debian/ibverbs-providers.install b/debian/ibverbs-providers.install
+index a003a30..360516f 100644
+--- a/debian/ibverbs-providers.install
++++ b/debian/ibverbs-providers.install
+@@ -1,6 +1,8 @@
+ etc/libibverbs.d/
+ usr/lib/*/libefa.so.*
+ usr/lib/*/libibverbs/lib*-rdmav*.so
++usr/lib/*/libhns.so.*
+ usr/lib/*/libmana.so.*
+ usr/lib/*/libmlx4.so.*
+ usr/lib/*/libmlx5.so.*
++usr/lib/*/libzrdma.so.*
+diff --git a/debian/ibverbs-providers.lintian-overrides b/debian/ibverbs-providers.lintian-overrides
+index 5815058..fd73a76 100644
+--- a/debian/ibverbs-providers.lintian-overrides
++++ b/debian/ibverbs-providers.lintian-overrides
+@@ -1,2 +1,2 @@
+-# libefa, libmana, libmlx4 and libmlx5 are ibverbs provider that provides more functions.
+-ibverbs-providers: package-name-doesnt-match-sonames libefa1 libmana1 libmlx4-1 libmlx5-1
++# libefa, libhns, libmana, libmlx4 and libmlx5 are ibverbs provider that provides more functions.
++ibverbs-providers: package-name-doesnt-match-sonames libefa1 libhns-1 libmana1 libmlx4-1 libmlx5-1
+diff --git a/debian/ibverbs-providers.symbols b/debian/ibverbs-providers.symbols
+index 72361bd..d2c0989 100644
+--- a/debian/ibverbs-providers.symbols
++++ b/debian/ibverbs-providers.symbols
+@@ -174,6 +174,12 @@ libefa.so.1 ibverbs-providers #MINVER#
+ efadv_cq_from_ibv_cq_ex@EFA_1.2 43
+ efadv_create_cq@EFA_1.2 43
+ efadv_query_mr@EFA_1.3 50
++libhns.so.1 ibverbs-providers #MINVER#
++* Build-Depends-Package: libibverbs-dev
++ HNS_1.0@HNS_1.0 51
++ hnsdv_is_supported@HNS_1.0 51
++ hnsdv_create_qp@HNS_1.0 51
++ hnsdv_query_device@HNS_1.0 51
+ libmana.so.1 ibverbs-providers #MINVER#
+ * Build-Depends-Package: libibverbs-dev
+ MANA_1.0@MANA_1.0 41
+diff --git a/debian/libibverbs-dev.install b/debian/libibverbs-dev.install
+index 5f2ffd5..ebc3df9 100644
+--- a/debian/libibverbs-dev.install
++++ b/debian/libibverbs-dev.install
+@@ -1,5 +1,6 @@
+ usr/include/infiniband/arch.h
+ usr/include/infiniband/efadv.h
++usr/include/infiniband/hnsdv.h
+ usr/include/infiniband/ib_user_ioctl_verbs.h
+ usr/include/infiniband/manadv.h
+ usr/include/infiniband/mlx4dv.h
+@@ -12,9 +13,12 @@ usr/include/infiniband/sa.h
+ usr/include/infiniband/tm_types.h
+ usr/include/infiniband/verbs.h
+ usr/include/infiniband/verbs_api.h
++usr/include/infiniband/zxdh_dv.h
+ usr/lib/*/lib*-rdmav*.a
+ usr/lib/*/libefa.a
+ usr/lib/*/libefa.so
++usr/lib/*/libhns.a
++usr/lib/*/libhns.so
+ usr/lib/*/libibverbs*.so
+ usr/lib/*/libibverbs.a
+ usr/lib/*/libmana.a
+@@ -23,11 +27,14 @@ usr/lib/*/libmlx4.a
+ usr/lib/*/libmlx4.so
+ usr/lib/*/libmlx5.a
+ usr/lib/*/libmlx5.so
++usr/lib/*/libzrdma.so
+ usr/lib/*/pkgconfig/libefa.pc
++usr/lib/*/pkgconfig/libhns.pc
+ usr/lib/*/pkgconfig/libibverbs.pc
+ usr/lib/*/pkgconfig/libmana.pc
+ usr/lib/*/pkgconfig/libmlx4.pc
+ usr/lib/*/pkgconfig/libmlx5.pc
++usr/lib/*/pkgconfig/libzrdma.pc
+ usr/share/man/man3/efadv_*.3
+ usr/share/man/man3/ibv_*
+ usr/share/man/man3/mbps_to_ibv_rate.3
+diff --git a/kernel-headers/CMakeLists.txt b/kernel-headers/CMakeLists.txt
+index 82c191c..9ceac31 100644
+--- a/kernel-headers/CMakeLists.txt
++++ b/kernel-headers/CMakeLists.txt
+@@ -26,6 +26,9 @@ publish_internal_headers(rdma
+ rdma/rvt-abi.h
+ rdma/siw-abi.h
+ rdma/vmw_pvrdma-abi.h
++ rdma/zxdh-abi.h
++ rdma/zxdh_user_ioctl_cmds.h
++ rdma/zxdh_user_ioctl_verbs.h
+ )
+
+ publish_internal_headers(rdma/hfi
+@@ -80,6 +83,7 @@ rdma_kernel_provider_abi(
+ rdma/rdma_user_rxe.h
+ rdma/siw-abi.h
+ rdma/vmw_pvrdma-abi.h
++ rdma/zxdh-abi.h
+ )
+
+ publish_headers(infiniband
+diff --git a/kernel-headers/rdma/hns-abi.h b/kernel-headers/rdma/hns-abi.h
+index c996e15..8a8f2e4 100644
+--- a/kernel-headers/rdma/hns-abi.h
++++ b/kernel-headers/rdma/hns-abi.h
+@@ -73,21 +73,38 @@ struct hns_roce_ib_create_srq_resp {
+ __u32 cap_flags; /* Use enum hns_roce_srq_cap_flags */
+ };
+
++enum hns_roce_congest_type_flags {
++ HNS_ROCE_CREATE_QP_FLAGS_DCQCN,
++ HNS_ROCE_CREATE_QP_FLAGS_LDCP,
++ HNS_ROCE_CREATE_QP_FLAGS_HC3,
++ HNS_ROCE_CREATE_QP_FLAGS_DIP,
++};
++
++enum hns_roce_create_qp_comp_mask {
++ HNS_ROCE_CREATE_QP_MASK_CONGEST_TYPE = 1 << 0,
++};
++
+ struct hns_roce_ib_create_qp {
+ __aligned_u64 buf_addr;
+ __aligned_u64 db_addr;
+ __u8 log_sq_bb_count;
+ __u8 log_sq_stride;
+ __u8 sq_no_prefetch;
+- __u8 reserved[5];
++ __u8 pageshift;
++ __u8 reserved[4];
+ __aligned_u64 sdb_addr;
++ __aligned_u64 comp_mask; /* Use enum hns_roce_create_qp_comp_mask */
++ __aligned_u64 create_flags;
++ __aligned_u64 cong_type_flags;
+ };
+
+ enum hns_roce_qp_cap_flags {
+ HNS_ROCE_QP_CAP_RQ_RECORD_DB = 1 << 0,
+ HNS_ROCE_QP_CAP_SQ_RECORD_DB = 1 << 1,
+ HNS_ROCE_QP_CAP_OWNER_DB = 1 << 2,
++ HNS_ROCE_QP_CAP_DYNAMIC_CTX_ATTACH = 1 << 4,
+ HNS_ROCE_QP_CAP_DIRECT_WQE = 1 << 5,
++ HNS_ROCE_QP_CAP_DYNAMIC_CTX_DETACH = 1 << 6,
+ };
+
+ struct hns_roce_ib_create_qp_resp {
+@@ -95,16 +112,28 @@ struct hns_roce_ib_create_qp_resp {
+ __aligned_u64 dwqe_mmap_key;
+ };
+
++struct hns_roce_ib_modify_qp_resp {
++ __u8 tc_mode;
++ __u8 priority;
++ __u8 reserved[6];
++ __u32 dcan;
++ __u32 rsv2;
++};
++
+ enum {
+ HNS_ROCE_EXSGE_FLAGS = 1 << 0,
+ HNS_ROCE_RQ_INLINE_FLAGS = 1 << 1,
+ HNS_ROCE_CQE_INLINE_FLAGS = 1 << 2,
++ HNS_ROCE_UCTX_CONFIG_DCA = 1 << 3,
++ HNS_ROCE_UCTX_DYN_QP_PGSZ = 1 << 4,
+ };
+
+ enum {
+ HNS_ROCE_RSP_EXSGE_FLAGS = 1 << 0,
+ HNS_ROCE_RSP_RQ_INLINE_FLAGS = 1 << 1,
+ HNS_ROCE_RSP_CQE_INLINE_FLAGS = 1 << 2,
++ HNS_ROCE_UCTX_RSP_DCA_FLAGS = HNS_ROCE_UCTX_CONFIG_DCA,
++ HNS_ROCE_UCTX_RSP_DYN_QP_PGSZ = HNS_ROCE_UCTX_DYN_QP_PGSZ,
+ };
+
+ struct hns_roce_ib_alloc_ucontext_resp {
+@@ -114,10 +143,22 @@ struct hns_roce_ib_alloc_ucontext_resp {
+ __u32 reserved;
+ __u32 config;
+ __u32 max_inline_data;
++ __u8 congest_type;
++ __u8 reserved0[7];
++ __u32 dca_qps;
++ __u32 dca_mmap_size;
++ __aligned_u64 dca_mmap_key;
++ __aligned_u64 reset_mmap_key;
++};
++
++enum hns_roce_uctx_comp_mask {
++ HNS_ROCE_ALLOC_UCTX_COMP_DCA_MAX_QPS = 1 << 0,
+ };
+
+ struct hns_roce_ib_alloc_ucontext {
+ __u32 config;
++ __u32 comp; /* use hns_roce_uctx_comp_mask */
++ __u32 dca_max_qps;
+ __u32 reserved;
+ };
+
+@@ -127,7 +168,65 @@ struct hns_roce_ib_alloc_pd_resp {
+
+ struct hns_roce_ib_create_ah_resp {
+ __u8 dmac[6];
+- __u8 reserved[2];
++ __u8 priority;
++ __u8 tc_mode;
++};
++
++#define UVERBS_ID_NS_MASK 0xF000
++#define UVERBS_ID_NS_SHIFT 12
++
++enum hns_ib_objects {
++ HNS_IB_OBJECT_DCA_MEM = (1U << UVERBS_ID_NS_SHIFT),
++};
++
++enum hns_ib_dca_mem_methods {
++ HNS_IB_METHOD_DCA_MEM_REG = (1U << UVERBS_ID_NS_SHIFT),
++ HNS_IB_METHOD_DCA_MEM_DEREG,
++ HNS_IB_METHOD_DCA_MEM_SHRINK,
++ HNS_IB_METHOD_DCA_MEM_ATTACH,
++ HNS_IB_METHOD_DCA_MEM_DETACH,
++ HNS_IB_METHOD_DCA_MEM_QUERY,
++};
++
++enum hns_ib_dca_mem_reg_attrs {
++ HNS_IB_ATTR_DCA_MEM_REG_HANDLE = (1U << UVERBS_ID_NS_SHIFT),
++ HNS_IB_ATTR_DCA_MEM_REG_FLAGS,
++ HNS_IB_ATTR_DCA_MEM_REG_LEN,
++ HNS_IB_ATTR_DCA_MEM_REG_ADDR,
++ HNS_IB_ATTR_DCA_MEM_REG_KEY,
++};
++
++enum hns_ib_dca_mem_dereg_attrs {
++ HNS_IB_ATTR_DCA_MEM_DEREG_HANDLE = (1U << UVERBS_ID_NS_SHIFT),
++};
++
++enum hns_ib_dca_mem_shrink_attrs {
++ HNS_IB_ATTR_DCA_MEM_SHRINK_HANDLE = (1U << UVERBS_ID_NS_SHIFT),
++ HNS_IB_ATTR_DCA_MEM_SHRINK_RESERVED_SIZE,
++ HNS_IB_ATTR_DCA_MEM_SHRINK_OUT_FREE_KEY,
++ HNS_IB_ATTR_DCA_MEM_SHRINK_OUT_FREE_MEMS,
++};
++
++enum hns_ib_dca_mem_attach_attrs {
++ HNS_IB_ATTR_DCA_MEM_ATTACH_HANDLE = (1U << UVERBS_ID_NS_SHIFT),
++ HNS_IB_ATTR_DCA_MEM_ATTACH_SQ_OFFSET,
++ HNS_IB_ATTR_DCA_MEM_ATTACH_SGE_OFFSET,
++ HNS_IB_ATTR_DCA_MEM_ATTACH_RQ_OFFSET,
++ HNS_IB_ATTR_DCA_MEM_ATTACH_OUT_ALLOC_FLAGS,
++ HNS_IB_ATTR_DCA_MEM_ATTACH_OUT_ALLOC_PAGES,
++};
++
++enum hns_ib_dca_mem_detach_attrs {
++ HNS_IB_ATTR_DCA_MEM_DETACH_HANDLE = (1U << UVERBS_ID_NS_SHIFT),
++ HNS_IB_ATTR_DCA_MEM_DETACH_SQ_INDEX,
++};
++
++enum hns_ib_dca_mem_query_attrs {
++ HNS_IB_ATTR_DCA_MEM_QUERY_HANDLE = (1U << UVERBS_ID_NS_SHIFT),
++ HNS_IB_ATTR_DCA_MEM_QUERY_PAGE_INDEX,
++ HNS_IB_ATTR_DCA_MEM_QUERY_OUT_KEY,
++ HNS_IB_ATTR_DCA_MEM_QUERY_OUT_OFFSET,
++ HNS_IB_ATTR_DCA_MEM_QUERY_OUT_PAGE_COUNT,
+ };
+
+ #endif /* HNS_ABI_USER_H */
+diff --git a/kernel-headers/rdma/ib_user_ioctl_verbs.h b/kernel-headers/rdma/ib_user_ioctl_verbs.h
+index fe15bc7..df1b2b6 100644
+--- a/kernel-headers/rdma/ib_user_ioctl_verbs.h
++++ b/kernel-headers/rdma/ib_user_ioctl_verbs.h
+@@ -255,6 +255,7 @@ enum rdma_driver_id {
+ RDMA_DRIVER_SIW,
+ RDMA_DRIVER_ERDMA,
+ RDMA_DRIVER_MANA,
++ RDMA_DRIVER_ZXDH
+ };
+
+ enum ib_uverbs_gid_type {
+diff --git a/kernel-headers/rdma/zxdh-abi.h b/kernel-headers/rdma/zxdh-abi.h
+new file mode 100644
+index 0000000..8e7fa3d
+--- /dev/null
++++ b/kernel-headers/rdma/zxdh-abi.h
+@@ -0,0 +1,143 @@
++/* SPDX-License-Identifier: GPL-2.0 or Linux-OpenIB */
++/* Copyright (C) 2019 - 2020 Intel Corporation */
++
++#ifndef ZXDH_ABI_H
++#define ZXDH_ABI_H
++
++#include
++
++/* zxdh must support legacy GEN_1 i40iw kernel
++ * and user-space whose last ABI ver is 5
++ */
++#define ZXDH_ABI_VER 5
++
++enum zxdh_memreg_type {
++ ZXDH_MEMREG_TYPE_MEM = 0,
++ ZXDH_MEMREG_TYPE_QP = 1,
++ ZXDH_MEMREG_TYPE_CQ = 2,
++ ZXDH_MEMREG_TYPE_SRQ = 3,
++};
++
++enum zxdh_db_addr_type {
++ ZXDH_DB_ADDR_PHY = 0,
++ ZXDH_DB_ADDR_BAR = 1,
++};
++
++struct zxdh_alloc_ucontext_req {
++ __u32 rsvd32;
++ __u8 userspace_ver;
++ __u8 rsvd8[3];
++};
++
++struct zxdh_alloc_ucontext_resp {
++ __u32 max_pds;
++ __u32 max_qps;
++ __u32 wq_size; /* size of the WQs (SQ+RQ) in the mmaped area */
++ __u8 kernel_ver;
++ __u8 db_addr_type;
++ __u8 rsvd[2];
++ __aligned_u64 feature_flags;
++ __aligned_u64 sq_db_mmap_key;
++ __aligned_u64 cq_db_mmap_key;
++ __aligned_u64 sq_db_pa;
++ __aligned_u64 cq_db_pa;
++ __u32 max_hw_wq_frags;
++ __u32 max_hw_read_sges;
++ __u32 max_hw_inline;
++ __u32 max_hw_rq_quanta;
++ __u32 max_hw_srq_quanta;
++ __u32 max_hw_wq_quanta;
++ __u32 max_hw_srq_wr;
++ __u32 min_hw_cq_size;
++ __u32 max_hw_cq_size;
++ __u16 max_hw_sq_chunk;
++ __u8 hw_rev;
++ __u8 rsvd2;
++};
++
++struct zxdh_alloc_pd_resp {
++ __u32 pd_id;
++ __u8 rsvd[4];
++};
++
++struct zxdh_resize_cq_req {
++ __aligned_u64 user_cq_buffer;
++};
++
++struct zxdh_create_cq_req {
++ __aligned_u64 user_cq_buf;
++ __aligned_u64 user_shadow_area;
++};
++
++struct zxdh_create_qp_req {
++ __aligned_u64 user_wqe_bufs;
++ __aligned_u64 user_compl_ctx;
++};
++
++struct zxdh_create_srq_req {
++ __aligned_u64 user_wqe_bufs;
++ __aligned_u64 user_compl_ctx;
++ __aligned_u64 user_wqe_list;
++ __aligned_u64 user_wqe_db;
++};
++
++struct zxdh_mem_reg_req {
++ __u16 reg_type; /* enum zxdh_memreg_type */
++ __u16 cq_pages;
++ __u16 rq_pages;
++ __u16 sq_pages;
++ __u16 srq_pages;
++ __u16 srq_list_pages;
++ __u8 rsvd[4];
++};
++
++struct zxdh_reg_mr_resp {
++ __u32 mr_pa_low;
++ __u32 mr_pa_hig;
++ __u16 host_page_size;
++ __u16 leaf_pbl_size;
++ __u8 rsvd[4];
++};
++
++struct zxdh_modify_qp_req {
++ __u8 sq_flush;
++ __u8 rq_flush;
++ __u8 rsvd[6];
++};
++
++struct zxdh_create_cq_resp {
++ __u32 cq_id;
++ __u32 cq_size;
++};
++
++struct zxdh_create_qp_resp {
++ __u32 qp_id;
++ __u32 actual_sq_size;
++ __u32 actual_rq_size;
++ __u32 zxdh_drv_opt;
++ __u16 push_idx;
++ __u8 lsmm;
++ __u8 rsvd;
++ __u32 qp_caps;
++};
++
++struct zxdh_create_srq_resp {
++ __u32 srq_id;
++ __u32 actual_srq_size;
++ __u32 actual_srq_list_size;
++ __u8 rsvd[4];
++};
++
++struct zxdh_modify_qp_resp {
++ __aligned_u64 push_wqe_mmap_key;
++ __aligned_u64 push_db_mmap_key;
++ __u16 push_offset;
++ __u8 push_valid;
++ __u8 rsvd[5];
++};
++
++struct zxdh_create_ah_resp {
++ __u32 ah_id;
++ __u8 rsvd[4];
++};
++#endif /* ZXDH_ABI_H */
+diff --git a/kernel-headers/rdma/zxdh_user_ioctl_cmds.h b/kernel-headers/rdma/zxdh_user_ioctl_cmds.h
+new file mode 100644
+index 0000000..96d2eb4
+--- /dev/null
++++ b/kernel-headers/rdma/zxdh_user_ioctl_cmds.h
+@@ -0,0 +1,56 @@
++/* SPDX-License-Identifier: GPL-2.0 or Linux-OpenIB */
++/* Copyright (c) 2024 ZTE Corporation. All rights reserved. */
++
++#ifndef ZXDH_USER_IOCTL_CMDS_H
++#define ZXDH_USER_IOCTL_CMDS_H
++
++#include
++#include
++
++enum zxdh_ib_dev_get_log_trace_attrs {
++ ZXDH_IB_ATTR_DEV_GET_LOG_TARCE_SWITCH = (1U << UVERBS_ID_NS_SHIFT),
++};
++
++enum zxdh_ib_dev_set_log_trace_attrs {
++ ZXDH_IB_ATTR_DEV_SET_LOG_TARCE_SWITCH = (1U << UVERBS_ID_NS_SHIFT),
++};
++
++enum zxdh_ib_dev_methods {
++ ZXDH_IB_METHOD_DEV_GET_LOG_TRACE = (1U << UVERBS_ID_NS_SHIFT),
++ ZXDH_IB_METHOD_DEV_SET_LOG_TRACE,
++};
++
++enum zxdh_ib_qp_modify_udp_sport_attrs {
++ ZXDH_IB_ATTR_QP_UDP_PORT = (1U << UVERBS_ID_NS_SHIFT),
++ ZXDH_IB_ATTR_QP_QPN,
++};
++
++enum zxdh_ib_qp_query_qpc_attrs {
++ ZXDH_IB_ATTR_QP_QUERY_HANDLE = (1U << UVERBS_ID_NS_SHIFT),
++ ZXDH_IB_ATTR_QP_QUERY_RESP,
++};
++
++enum zxdh_ib_qp_modify_qpc_attrs {
++ ZXDH_IB_ATTR_QP_MODIFY_QPC_HANDLE = (1U << UVERBS_ID_NS_SHIFT),
++ ZXDH_IB_ATTR_QP_MODIFY_QPC_REQ,
++ ZXDH_IB_ATTR_QP_MODIFY_QPC_MASK,
++};
++
++enum zxdh_ib_qp_reset_qp_attrs {
++ ZXDH_IB_ATTR_QP_RESET_QP_HANDLE = (1U << UVERBS_ID_NS_SHIFT),
++ ZXDH_IB_ATTR_QP_RESET_OP_CODE,
++};
++
++enum zxdh_ib_qp_methods {
++ ZXDH_IB_METHOD_QP_MODIFY_UDP_SPORT = (1U << UVERBS_ID_NS_SHIFT),
++ ZXDH_IB_METHOD_QP_QUERY_QPC,
++ ZXDH_IB_METHOD_QP_MODIFY_QPC,
++ ZXDH_IB_METHOD_QP_RESET_QP,
++};
++
++enum zxdh_ib_objects {
++ ZXDH_IB_OBJECT_DEV = (1U << UVERBS_ID_NS_SHIFT),
++ ZXDH_IB_OBJECT_QP_OBJ,
++};
++
++#endif
+diff --git a/kernel-headers/rdma/zxdh_user_ioctl_verbs.h b/kernel-headers/rdma/zxdh_user_ioctl_verbs.h
+new file mode 100644
+index 0000000..bc0e812
+--- /dev/null
++++ b/kernel-headers/rdma/zxdh_user_ioctl_verbs.h
+@@ -0,0 +1,34 @@
++/* SPDX-License-Identifier: GPL-2.0 or Linux-OpenIB */
++/* Copyright (c) 2024 ZTE Corporation. All rights reserved. */
++#ifndef ZXDH_USER_IOCTL_VERBS_H
++#define ZXDH_USER_IOCTL_VERBS_H
++
++#include
++
++//todo ailgn
++struct zxdh_query_qpc_resp {
++ __u8 retry_flag;
++ __u8 rnr_retry_flag;
++ __u8 read_retry_flag;
++ __u8 cur_retry_count;
++ __u8 retry_cqe_sq_opcode;
++ __u8 err_flag;
++ __u8 ack_err_flag;
++ __u8 package_err_flag;
++ __u8 recv_err_flag;
++ __u8 retry_count;
++ __u32 tx_last_ack_psn;
++};
++
++struct zxdh_modify_qpc_req {
++ __u8 retry_flag;
++ __u8 rnr_retry_flag;
++ __u8 read_retry_flag;
++ __u8 cur_retry_count;
++ __u8 retry_cqe_sq_opcode;
++ __u8 err_flag;
++ __u8 ack_err_flag;
++ __u8 package_err_flag;
++};
++
++#endif
+diff --git a/librdmacm/cma.c b/librdmacm/cma.c
+index 7b924bd..0a631bd 100644
+--- a/librdmacm/cma.c
++++ b/librdmacm/cma.c
+@@ -311,7 +311,7 @@ static void remove_cma_dev(struct cma_device *cma_dev)
+
+ static int dev_cmp(const void *a, const void *b)
+ {
+- return (int)(*(char *const *)a - *(char *const *)b);
++ return (*(uintptr_t *)a > *(uintptr_t *)b) - (*(uintptr_t *)a < *(uintptr_t *)b);
+ }
+
+ static int sync_devices_list(void)
+diff --git a/providers/bnxt_re/verbs.c b/providers/bnxt_re/verbs.c
+index 55d5284..a74d32c 100644
+--- a/providers/bnxt_re/verbs.c
++++ b/providers/bnxt_re/verbs.c
+@@ -1233,7 +1233,7 @@ static int bnxt_re_alloc_queues(struct bnxt_re_context *cntx,
+ /* psn_depth extra entries of size que->stride */
+ psn_size = bnxt_re_get_psne_size(qp->cntx);
+ psn_depth = (nswr * psn_size) / que->stride;
+- que->pad_stride_log2 = (uint32_t)ilog32(psn_size);
++ que->pad_stride_log2 = ilog32(psn_size - 1);
+ if ((nswr * psn_size) % que->stride)
+ psn_depth++;
+ que->depth += psn_depth;
+diff --git a/providers/hns/CMakeLists.txt b/providers/hns/CMakeLists.txt
+index 7aaca75..58139ae 100644
+--- a/providers/hns/CMakeLists.txt
++++ b/providers/hns/CMakeLists.txt
+@@ -1,7 +1,14 @@
+-rdma_provider(hns
++rdma_shared_provider(hns libhns.map
++ 1 1.0.${PACKAGE_VERSION}
+ hns_roce_u.c
+ hns_roce_u_buf.c
+ hns_roce_u_db.c
+ hns_roce_u_hw_v2.c
+ hns_roce_u_verbs.c
+ )
++
++publish_headers(infiniband
++ hnsdv.h
++)
++
++rdma_pkg_config("hns" "libibverbs" "${CMAKE_THREAD_LIBS_INIT}")
+diff --git a/providers/hns/hns_roce_u.c b/providers/hns/hns_roce_u.c
+index 266e73e..e219b9e 100644
+--- a/providers/hns/hns_roce_u.c
++++ b/providers/hns/hns_roce_u.c
+@@ -53,6 +53,8 @@ static const struct verbs_match_ent hca_table[] = {
+ VERBS_PCI_MATCH(PCI_VENDOR_ID_HUAWEI, 0xA226, &hns_roce_u_hw_v2),
+ VERBS_PCI_MATCH(PCI_VENDOR_ID_HUAWEI, 0xA227, &hns_roce_u_hw_v2),
+ VERBS_PCI_MATCH(PCI_VENDOR_ID_HUAWEI, 0xA228, &hns_roce_u_hw_v2),
++ VERBS_PCI_MATCH(PCI_VENDOR_ID_HUAWEI, 0xA22C, &hns_roce_u_hw_v2),
++ VERBS_PCI_MATCH(PCI_VENDOR_ID_HUAWEI, 0xA22D, &hns_roce_u_hw_v2),
+ VERBS_PCI_MATCH(PCI_VENDOR_ID_HUAWEI, 0xA22F, &hns_roce_u_hw_v2),
+ {}
+ };
+@@ -67,7 +69,7 @@ static const struct verbs_context_ops hns_common_ops = {
+ .create_qp = hns_roce_u_create_qp,
+ .create_qp_ex = hns_roce_u_create_qp_ex,
+ .dealloc_mw = hns_roce_u_dealloc_mw,
+- .dealloc_pd = hns_roce_u_free_pd,
++ .dealloc_pd = hns_roce_u_dealloc_pd,
+ .dereg_mr = hns_roce_u_dereg_mr,
+ .destroy_cq = hns_roce_u_destroy_cq,
+ .modify_cq = hns_roce_u_modify_cq,
+@@ -88,8 +90,43 @@ static const struct verbs_context_ops hns_common_ops = {
+ .close_xrcd = hns_roce_u_close_xrcd,
+ .open_qp = hns_roce_u_open_qp,
+ .get_srq_num = hns_roce_u_get_srq_num,
++ .alloc_td = hns_roce_u_alloc_td,
++ .dealloc_td = hns_roce_u_dealloc_td,
++ .alloc_parent_domain = hns_roce_u_alloc_pad,
+ };
+
++static struct {
++ uint32_t device_id;
++ enum hns_device_link_type link_type;
++} device_link_types[] = {
++ {0xA222, HNS_DEV_LINK_TYPE_ETH},
++ {0xA223, HNS_DEV_LINK_TYPE_ETH},
++ {0xA224, HNS_DEV_LINK_TYPE_ETH},
++ {0xA225, HNS_DEV_LINK_TYPE_ETH},
++ {0xA226, HNS_DEV_LINK_TYPE_ETH},
++ {0xA228, HNS_DEV_LINK_TYPE_ETH},
++ {0xA22F, HNS_DEV_LINK_TYPE_ETH},
++ {0xA227, HNS_DEV_LINK_TYPE_HCCS},
++ {0xA22C, HNS_DEV_LINK_TYPE_HCCS},
++ {0xA22D, HNS_DEV_LINK_TYPE_HCCS}
++};
++
++static int get_link_type(uint32_t device_id,
++ enum hns_device_link_type *link_type)
++{
++ int i;
++
++ for (i = 0; i < ARRAY_SIZE(device_link_types); i++) {
++ if (device_id == device_link_types[i].device_id) {
++ *link_type = device_link_types[i].link_type;
++ return 0;
++ }
++ }
++
++ return ENOENT;
++}
++
++
+ static uint32_t calc_table_shift(uint32_t entry_count, uint32_t size_shift)
+ {
+ uint32_t count_shift = hr_ilog32(entry_count);
+@@ -97,50 +134,189 @@ static uint32_t calc_table_shift(uint32_t entry_count, uint32_t size_shift)
+ return count_shift > size_shift ? count_shift - size_shift : 0;
+ }
+
+-static struct verbs_context *hns_roce_alloc_context(struct ibv_device *ibdev,
+- int cmd_fd,
+- void *private_data)
++static int hns_roce_mmap(struct hns_roce_device *hr_dev,
++ struct hns_roce_context *context, int cmd_fd)
+ {
+- struct hns_roce_device *hr_dev = to_hr_dev(ibdev);
+- struct hns_roce_alloc_ucontext_resp resp = {};
+- struct hns_roce_alloc_ucontext cmd = {};
+- struct ibv_device_attr dev_attrs;
+- struct hns_roce_context *context;
+- int i;
++ int page_size = hr_dev->page_size;
+
+- context = verbs_init_and_alloc_context(ibdev, cmd_fd, context, ibv_ctx,
+- RDMA_DRIVER_HNS);
+- if (!context)
++ context->uar = mmap(NULL, page_size, PROT_READ | PROT_WRITE,
++ MAP_SHARED, cmd_fd, 0);
++ if (context->uar == MAP_FAILED)
++ return -ENOMEM;
++
++ return 0;
++}
++
++static int mmap_dca(struct hns_roce_context *ctx, int cmd_fd,
++ int page_size, size_t size, uint64_t mmap_key)
++{
++ struct hns_roce_dca_ctx *dca_ctx = &ctx->dca_ctx;
++ void *addr;
++
++ addr = mmap(NULL, size, PROT_READ | PROT_WRITE, MAP_SHARED, cmd_fd,
++ mmap_key);
++ if (addr == MAP_FAILED) {
++ verbs_err(&ctx->ibv_ctx, "failed to mmap() dca prime qp.\n");
++ return -EINVAL;
++ }
++
++ dca_ctx->buf_status = addr;
++ dca_ctx->sync_status = addr + size / 2;
++
++ return 0;
++}
++
++struct ibv_context *hnsdv_open_device(struct ibv_device *device,
++ struct hnsdv_context_attr *attr)
++{
++ if (!is_hns_dev(device)) {
++ errno = EOPNOTSUPP;
+ return NULL;
++ }
+
+- cmd.config |= HNS_ROCE_EXSGE_FLAGS | HNS_ROCE_RQ_INLINE_FLAGS |
+- HNS_ROCE_CQE_INLINE_FLAGS;
+- if (ibv_cmd_get_context(&context->ibv_ctx, &cmd.ibv_cmd, sizeof(cmd),
+- &resp.ibv_resp, sizeof(resp)))
+- goto err_free;
++ return verbs_open_device(device, attr);
++}
++
++static void set_dca_pool_param(struct hns_roce_context *ctx,
++ struct hnsdv_context_attr *attr, int page_size)
++{
++ struct hns_roce_dca_ctx *dca_ctx = &ctx->dca_ctx;
++
++ if (attr->comp_mask & HNSDV_CONTEXT_MASK_DCA_UNIT_SIZE)
++ dca_ctx->unit_size = align(attr->dca_unit_size, page_size);
++ else
++ dca_ctx->unit_size = page_size * HNS_DCA_DEFAULT_UNIT_PAGES;
++
++ /* The memory pool cannot be expanded, only init the DCA context. */
++ if (dca_ctx->unit_size == 0)
++ return;
++
++ /* If not set, the memory pool can be expanded unlimitedly. */
++ if (attr->comp_mask & HNSDV_CONTEXT_MASK_DCA_MAX_SIZE)
++ dca_ctx->max_size = DIV_ROUND_UP(attr->dca_max_size,
++ dca_ctx->unit_size) *
++ dca_ctx->unit_size;
++ else
++ dca_ctx->max_size = HNS_DCA_MAX_MEM_SIZE;
++
++ /* If not set, the memory pool cannot be shrunk. */
++ if (attr->comp_mask & HNSDV_CONTEXT_MASK_DCA_MIN_SIZE)
++ dca_ctx->min_size = DIV_ROUND_UP(attr->dca_min_size,
++ dca_ctx->unit_size) *
++ dca_ctx->unit_size;
++ else
++ dca_ctx->min_size = HNS_DCA_MAX_MEM_SIZE;
++
++ verbs_debug(&ctx->ibv_ctx,
++ "Support DCA, unit %u, max %lu, min %lu Bytes.\n",
++ dca_ctx->unit_size, dca_ctx->max_size, dca_ctx->min_size);
++}
++
++static int init_dca_context(struct hns_roce_context *ctx, int cmd_fd,
++ struct hns_roce_alloc_ucontext_resp *resp,
++ struct hnsdv_context_attr *attr,
++ int page_size)
++{
++ struct hns_roce_dca_ctx *dca_ctx = &ctx->dca_ctx;
++ uint64_t mmap_key = resp->dca_mmap_key;
++ int mmap_size = resp->dca_mmap_size;
++ int max_qps = resp->dca_qps;
++ int ret;
++
++ if (!(ctx->config & HNS_ROCE_UCTX_RSP_DCA_FLAGS))
++ return 0;
+
+- if (!resp.cqe_size)
++ dca_ctx->unit_size = 0;
++ dca_ctx->mem_cnt = 0;
++
++ list_head_init(&dca_ctx->mem_list);
++ ret = pthread_spin_init(&dca_ctx->lock, PTHREAD_PROCESS_PRIVATE);
++ if (ret)
++ return ret;
++
++ if (!attr || !(attr->flags & HNSDV_CONTEXT_FLAGS_DCA))
++ return 0;
++
++ set_dca_pool_param(ctx, attr, page_size);
++
++ if (mmap_key) {
++ const unsigned int bits_per_qp = 2 * HNS_DCA_BITS_PER_STATUS;
++
++ if (!mmap_dca(ctx, cmd_fd, page_size, mmap_size, mmap_key)) {
++ dca_ctx->status_size = mmap_size;
++ dca_ctx->max_qps = min_t(int, max_qps,
++ mmap_size * 8 / bits_per_qp);
++ }
++ }
++
++ return 0;
++}
++
++static void uninit_dca_context(struct hns_roce_context *ctx)
++{
++ struct hns_roce_dca_ctx *dca_ctx = &ctx->dca_ctx;
++
++ if (!(ctx->config & HNS_ROCE_UCTX_RSP_DCA_FLAGS))
++ return;
++
++ pthread_spin_lock(&dca_ctx->lock);
++ hns_roce_cleanup_dca_mem(ctx);
++ pthread_spin_unlock(&dca_ctx->lock);
++ if (dca_ctx->buf_status)
++ munmap(dca_ctx->buf_status, dca_ctx->status_size);
++
++ pthread_spin_destroy(&dca_ctx->lock);
++}
++
++static int init_reset_context(struct hns_roce_context *ctx, int cmd_fd,
++ struct hns_roce_alloc_ucontext_resp *resp,
++ int page_size)
++{
++ uint64_t reset_mmap_key = resp->reset_mmap_key;
++ struct hns_roce_v2_reset_state *state;
++
++ /* The reset mmap key is 0, which means it is not supported. */
++ if (reset_mmap_key == 0)
++ return 0;
++
++ ctx->reset_state = mmap(NULL, page_size, PROT_READ, MAP_SHARED,
++ cmd_fd, reset_mmap_key);
++ if (ctx->reset_state == MAP_FAILED)
++ return -ENOMEM;
++
++ state = ctx->reset_state;
++ ctx->use_new_reset_flag = state->hw_ready;
++
++ return 0;
++}
++
++static int set_context_attr(struct hns_roce_device *hr_dev,
++ struct hns_roce_context *context,
++ struct hns_roce_alloc_ucontext_resp *resp)
++{
++ struct ibv_device_attr dev_attrs;
++ int i;
++
++ if (!resp->cqe_size)
+ context->cqe_size = HNS_ROCE_CQE_SIZE;
+- else if (resp.cqe_size <= HNS_ROCE_V3_CQE_SIZE)
+- context->cqe_size = resp.cqe_size;
++ else if (resp->cqe_size <= HNS_ROCE_V3_CQE_SIZE)
++ context->cqe_size = resp->cqe_size;
+ else
+ context->cqe_size = HNS_ROCE_V3_CQE_SIZE;
+
+- context->config = resp.config;
+- if (resp.config & HNS_ROCE_RSP_EXSGE_FLAGS)
+- context->max_inline_data = resp.max_inline_data;
++ context->config = resp->config;
++ if (resp->config & HNS_ROCE_RSP_EXSGE_FLAGS)
++ context->max_inline_data = resp->max_inline_data;
+
+- context->qp_table_shift = calc_table_shift(resp.qp_tab_size,
++ context->qp_table_shift = calc_table_shift(resp->qp_tab_size,
+ HNS_ROCE_QP_TABLE_BITS);
+ context->qp_table_mask = (1 << context->qp_table_shift) - 1;
+- pthread_mutex_init(&context->qp_table_mutex, NULL);
+ for (i = 0; i < HNS_ROCE_QP_TABLE_SIZE; ++i)
+ context->qp_table[i].refcnt = 0;
+
+- context->srq_table_shift = calc_table_shift(resp.srq_tab_size,
++ context->srq_table_shift = calc_table_shift(resp->srq_tab_size,
+ HNS_ROCE_SRQ_TABLE_BITS);
+ context->srq_table_mask = (1 << context->srq_table_shift) - 1;
+- pthread_mutex_init(&context->srq_table_mutex, NULL);
+ for (i = 0; i < HNS_ROCE_SRQ_TABLE_SIZE; ++i)
+ context->srq_table[i].refcnt = 0;
+
+@@ -149,28 +325,131 @@ static struct verbs_context *hns_roce_alloc_context(struct ibv_device *ibdev,
+ struct ibv_device_attr_ex,
+ orig_attr),
+ sizeof(dev_attrs)))
+- goto err_free;
++ return EIO;
+
+ hr_dev->hw_version = dev_attrs.hw_ver;
++ hr_dev->congest_cap = resp->congest_type;
+ context->max_qp_wr = dev_attrs.max_qp_wr;
+ context->max_sge = dev_attrs.max_sge;
+ context->max_cqe = dev_attrs.max_cqe;
+ context->max_srq_wr = dev_attrs.max_srq_wr;
+ context->max_srq_sge = dev_attrs.max_srq_sge;
+
++ return get_link_type(dev_attrs.vendor_part_id, &hr_dev->link_type);
++}
++
++static void ucontext_set_cmd(struct hns_roce_alloc_ucontext *cmd,
++ struct hnsdv_context_attr *attr)
++{
++ cmd->config |= HNS_ROCE_EXSGE_FLAGS | HNS_ROCE_RQ_INLINE_FLAGS |
++ HNS_ROCE_CQE_INLINE_FLAGS | HNS_ROCE_UCTX_DYN_QP_PGSZ;
++
++ if (!attr || !(attr->flags & HNSDV_CONTEXT_FLAGS_DCA))
++ return;
++
++ cmd->config |= HNS_ROCE_UCTX_CONFIG_DCA;
++
++ if (attr->comp_mask & HNSDV_CONTEXT_MASK_DCA_PRIME_QPS) {
++ cmd->comp |= HNS_ROCE_ALLOC_UCTX_COMP_DCA_MAX_QPS;
++ cmd->dca_max_qps = attr->dca_prime_qps;
++ }
++}
++
++static int hns_roce_init_context_lock(struct hns_roce_context *context)
++{
++ int ret;
++
++ ret = pthread_spin_init(&context->uar_lock, PTHREAD_PROCESS_PRIVATE);
++ if (ret)
++ return ret;
++
++ ret = pthread_mutex_init(&context->qp_table_mutex, NULL);
++ if (ret)
++ goto destroy_uar_lock;
++
++ ret = pthread_mutex_init(&context->srq_table_mutex, NULL);
++ if (ret)
++ goto destroy_qp_mutex;
++
++ ret = pthread_mutex_init(&context->db_list_mutex, NULL);
++ if (ret)
++ goto destroy_srq_mutex;
++
++ return 0;
++
++destroy_srq_mutex:
++ pthread_mutex_destroy(&context->srq_table_mutex);
++
++destroy_qp_mutex:
++ pthread_mutex_destroy(&context->qp_table_mutex);
++
++destroy_uar_lock:
++ pthread_spin_destroy(&context->uar_lock);
++ return ret;
++}
++
++static void hns_roce_destroy_context_lock(struct hns_roce_context *context)
++{
++ pthread_spin_destroy(&context->uar_lock);
++ pthread_mutex_destroy(&context->qp_table_mutex);
++ pthread_mutex_destroy(&context->srq_table_mutex);
++ pthread_mutex_destroy(&context->db_list_mutex);
++}
++
++static struct verbs_context *hns_roce_alloc_context(struct ibv_device *ibdev,
++ int cmd_fd,
++ void *private_data)
++{
++ struct hnsdv_context_attr *ctx_attr = private_data;
++ struct hns_roce_device *hr_dev = to_hr_dev(ibdev);
++ struct hns_roce_alloc_ucontext_resp resp = {};
++ struct hns_roce_alloc_ucontext cmd = {};
++ struct hns_roce_context *context;
++
++ context = verbs_init_and_alloc_context(ibdev, cmd_fd, context, ibv_ctx,
++ RDMA_DRIVER_HNS);
++ if (!context)
++ return NULL;
++
++ ucontext_set_cmd(&cmd, ctx_attr);
++ if (ibv_cmd_get_context(&context->ibv_ctx, &cmd.ibv_cmd, sizeof(cmd),
++ &resp.ibv_resp, sizeof(resp)))
++ goto err_ibv_cmd;
++
++ if (hns_roce_init_context_lock(context))
++ goto err_ibv_cmd;
++
++ if (set_context_attr(hr_dev, context, &resp))
++ goto err_set_attr;
++
+ context->uar = mmap(NULL, hr_dev->page_size, PROT_READ | PROT_WRITE,
+ MAP_SHARED, cmd_fd, 0);
+ if (context->uar == MAP_FAILED)
+- goto err_free;
++ goto err_set_attr;
++
++ if (init_dca_context(context, cmd_fd,
++ &resp, ctx_attr, hr_dev->page_size))
++ goto err_set_attr;
+
+- pthread_spin_init(&context->uar_lock, PTHREAD_PROCESS_PRIVATE);
++ if (init_reset_context(context, cmd_fd, &resp, hr_dev->page_size))
++ goto reset_free;
++
++ if (hns_roce_mmap(hr_dev, context, cmd_fd))
++ goto uar_free;
+
+ verbs_set_ops(&context->ibv_ctx, &hns_common_ops);
+ verbs_set_ops(&context->ibv_ctx, &hr_dev->u_hw->hw_ops);
+
+ return &context->ibv_ctx;
+
+-err_free:
++uar_free:
++ if (context->reset_state)
++ munmap(context->reset_state, hr_dev->page_size);
++reset_free:
++ uninit_dca_context(context);
++err_set_attr:
++ hns_roce_destroy_context_lock(context);
++err_ibv_cmd:
+ verbs_uninit_context(&context->ibv_ctx);
+ free(context);
+ return NULL;
+@@ -182,6 +461,10 @@ static void hns_roce_free_context(struct ibv_context *ibctx)
+ struct hns_roce_context *context = to_hr_ctx(ibctx);
+
+ munmap(context->uar, hr_dev->page_size);
++ if (context->reset_state)
++ munmap(context->reset_state, hr_dev->page_size);
++ uninit_dca_context(context);
++ hns_roce_destroy_context_lock(context);
+ verbs_uninit_context(&context->ibv_ctx);
+ free(context);
+ }
+@@ -216,4 +499,17 @@ static const struct verbs_device_ops hns_roce_dev_ops = {
+ .uninit_device = hns_uninit_device,
+ .alloc_context = hns_roce_alloc_context,
+ };
++
++bool is_hns_dev(struct ibv_device *device)
++{
++ struct verbs_device *verbs_device = verbs_get_device(device);
++
++ return verbs_device->ops == &hns_roce_dev_ops;
++}
++
++bool hnsdv_is_supported(struct ibv_device *device)
++{
++ return is_hns_dev(device);
++}
++
+ PROVIDER_DRIVER(hns, hns_roce_dev_ops);
+diff --git a/providers/hns/hns_roce_u.h b/providers/hns/hns_roce_u.h
+index afb68fe..5eedb81 100644
+--- a/providers/hns/hns_roce_u.h
++++ b/providers/hns/hns_roce_u.h
+@@ -35,6 +35,7 @@
+
+ #include
+ #include
++#include
+ #include
+
+ #include
+@@ -44,6 +45,7 @@
+ #include
+ #include
+ #include
++#include
+ #include
+ #include "hns_roce_u_abi.h"
+
+@@ -52,6 +54,8 @@
+
+ #define PFX "hns: "
+
++typedef _Atomic(uint64_t) atomic_bitmap_t;
++
+ /* The minimum page size is 4K for hardware */
+ #define HNS_HW_PAGE_SHIFT 12
+ #define HNS_HW_PAGE_SIZE (1 << HNS_HW_PAGE_SHIFT)
+@@ -147,17 +151,28 @@
+
+ #define hr_reg_read(ptr, field) _hr_reg_read(ptr, field)
+
++enum {
++ HNS_ROCE_CAP_FLAG_DCA_MODE = BIT(15),
++};
++
+ #define HNS_ROCE_QP_TABLE_BITS 8
+ #define HNS_ROCE_QP_TABLE_SIZE BIT(HNS_ROCE_QP_TABLE_BITS)
+
+ #define HNS_ROCE_SRQ_TABLE_BITS 8
+ #define HNS_ROCE_SRQ_TABLE_SIZE BIT(HNS_ROCE_SRQ_TABLE_BITS)
+
++enum hns_device_link_type {
++ HNS_DEV_LINK_TYPE_ETH,
++ HNS_DEV_LINK_TYPE_HCCS,
++};
++
+ struct hns_roce_device {
+ struct verbs_device ibv_dev;
+ int page_size;
+ const struct hns_roce_u_hw *u_hw;
+ int hw_version;
++ uint8_t congest_cap;
++ enum hns_device_link_type link_type;
+ };
+
+ struct hns_roce_buf {
+@@ -182,6 +197,11 @@ enum hns_roce_pktype {
+ HNS_ROCE_PKTYPE_ROCE_V2_IPV4,
+ };
+
++enum hns_roce_tc_map_mode {
++ HNS_ROCE_TC_MAP_MODE_PRIO,
++ HNS_ROCE_TC_MAP_MODE_DSCP,
++};
++
+ struct hns_roce_db_page {
+ struct hns_roce_db_page *prev, *next;
+ struct hns_roce_buf buf;
+@@ -190,9 +210,39 @@ struct hns_roce_db_page {
+ unsigned long *bitmap;
+ };
+
++struct hns_roce_spinlock {
++ pthread_spinlock_t lock;
++ int need_lock;
++};
++
++#define HNS_DCA_MAX_MEM_SIZE ~0UL
++#define HNS_DCA_DEFAULT_UNIT_PAGES 16
++
++struct hns_roce_dca_ctx {
++ struct list_head mem_list;
++ pthread_spinlock_t lock;
++ int mem_cnt;
++ unsigned int unit_size;
++ uint64_t max_size;
++ uint64_t min_size;
++ uint64_t curr_size;
++
++#define HNS_DCA_BITS_PER_STATUS 1
++ unsigned int max_qps;
++ unsigned int status_size;
++ atomic_bitmap_t *buf_status;
++ atomic_bitmap_t *sync_status;
++};
++
++struct hns_roce_v2_reset_state {
++ uint32_t is_reset;
++ uint32_t hw_ready;
++};
++
+ struct hns_roce_context {
+ struct verbs_context ibv_ctx;
+ void *uar;
++ void *reset_state;
+ pthread_spinlock_t uar_lock;
+
+ struct {
+@@ -222,17 +272,32 @@ struct hns_roce_context {
+ unsigned int cqe_size;
+ uint32_t config;
+ unsigned int max_inline_data;
++ struct hns_roce_dca_ctx dca_ctx;
++ bool use_new_reset_flag;
++ bool reseted;
++};
++
++struct hns_roce_td {
++ struct ibv_td ibv_td;
++ atomic_int refcount;
+ };
+
+ struct hns_roce_pd {
+ struct ibv_pd ibv_pd;
+ unsigned int pdn;
++ atomic_int refcount;
++ struct hns_roce_pd *protection_domain;
++};
++
++struct hns_roce_pad {
++ struct hns_roce_pd pd;
++ struct hns_roce_td *td;
+ };
+
+ struct hns_roce_cq {
+ struct verbs_cq verbs_cq;
+ struct hns_roce_buf buf;
+- pthread_spinlock_t lock;
++ struct hns_roce_spinlock hr_lock;
+ unsigned int cqn;
+ unsigned int cq_depth;
+ unsigned int cons_index;
+@@ -242,6 +307,12 @@ struct hns_roce_cq {
+ unsigned long flags;
+ unsigned int cqe_size;
+ struct hns_roce_v2_cqe *cqe;
++ struct ibv_pd *parent_domain;
++ struct list_head list_sq;
++ struct list_head list_rq;
++ struct list_head list_srq;
++ struct list_head list_xrc_srq;
++ struct hns_roce_v2_cqe *sw_cqe;
+ };
+
+ struct hns_roce_idx_que {
+@@ -268,7 +339,7 @@ struct hns_roce_srq {
+ struct hns_roce_idx_que idx_que;
+ struct hns_roce_buf wqe_buf;
+ struct hns_roce_rinl_buf srq_rinl_buf;
+- pthread_spinlock_t lock;
++ struct hns_roce_spinlock hr_lock;
+ unsigned long *wrid;
+ unsigned int srqn;
+ unsigned int wqe_cnt;
+@@ -278,11 +349,12 @@ struct hns_roce_srq {
+ unsigned int *rdb;
+ unsigned int cap_flags;
+ unsigned short counter;
++ struct list_node xrc_srcq_node;
+ };
+
+ struct hns_roce_wq {
+ unsigned long *wrid;
+- pthread_spinlock_t lock;
++ struct hns_roce_spinlock hr_lock;
+ unsigned int wqe_cnt;
+ int max_post;
+ unsigned int head;
+@@ -309,11 +381,19 @@ struct hns_roce_sge_ex {
+ unsigned int sge_shift;
+ };
+
++struct hns_roce_dca_buf {
++ void **bufs;
++ unsigned int max_cnt;
++ unsigned int shift;
++ unsigned int dcan;
++};
++
+ struct hns_roce_qp {
+ struct verbs_qp verbs_qp;
+ struct hns_roce_buf buf;
++ struct hns_roce_dca_buf dca_wqe;
+ int max_inline_data;
+- int buf_size;
++ unsigned int buf_size;
+ unsigned int sq_signal_bits;
+ struct hns_roce_wq sq;
+ struct hns_roce_wq rq;
+@@ -323,6 +403,9 @@ struct hns_roce_qp {
+ unsigned int next_sge;
+ int port_num;
+ uint8_t sl;
++ uint8_t tc_mode;
++ uint8_t priority;
++ uint8_t pageshift;
+ unsigned int qkey;
+ enum ibv_mtu path_mtu;
+
+@@ -336,6 +419,10 @@ struct hns_roce_qp {
+ void *cur_wqe;
+ unsigned int rb_sq_head; /* roll back sq head */
+ struct hns_roce_sge_info sge_info;
++
++ struct list_node rcq_node;
++ struct list_node scq_node;
++ struct list_node srcq_node;
+ };
+
+ struct hns_roce_av {
+@@ -360,11 +447,23 @@ struct hns_roce_u_hw {
+ struct verbs_context_ops hw_ops;
+ };
+
++struct hns_roce_dca_attach_attr {
++ uint32_t sq_offset;
++ uint32_t sge_offset;
++ uint32_t rq_offset;
++ bool force;
++};
++
++struct hns_roce_dca_detach_attr {
++ uint32_t sq_index;
++};
++
+ /*
+ * The entries's buffer should be aligned to a multiple of the hardware's
+ * minimum page size.
+ */
+ #define hr_hw_page_align(x) align(x, HNS_HW_PAGE_SIZE)
++#define hr_hw_page_count(x) (hr_hw_page_align(x) / HNS_HW_PAGE_SIZE)
+
+ static inline unsigned int to_hr_hem_entries_size(int count, int buf_shift)
+ {
+@@ -398,9 +497,35 @@ static inline struct hns_roce_context *to_hr_ctx(struct ibv_context *ibv_ctx)
+ return container_of(ibv_ctx, struct hns_roce_context, ibv_ctx.context);
+ }
+
++static inline struct hns_roce_td *to_hr_td(struct ibv_td *ibv_td)
++{
++ return container_of(ibv_td, struct hns_roce_td, ibv_td);
++}
++
++/* to_hr_pd always returns the real hns_roce_pd obj. */
+ static inline struct hns_roce_pd *to_hr_pd(struct ibv_pd *ibv_pd)
+ {
+- return container_of(ibv_pd, struct hns_roce_pd, ibv_pd);
++ struct hns_roce_pd *pd =
++ container_of(ibv_pd, struct hns_roce_pd, ibv_pd);
++
++ if (pd->protection_domain)
++ return pd->protection_domain;
++
++ return pd;
++}
++
++static inline struct hns_roce_pad *to_hr_pad(struct ibv_pd *ibv_pd)
++{
++ struct hns_roce_pad *pad =
++ ibv_pd ?
++ container_of(ibv_pd, struct hns_roce_pad, pd.ibv_pd) :
++ NULL;
++
++ if (pad && pad->pd.protection_domain)
++ return pad;
++
++ /* Otherwise ibv_pd isn't a parent_domain */
++ return NULL;
+ }
+
+ static inline struct hns_roce_cq *to_hr_cq(struct ibv_cq *ibv_cq)
+@@ -423,14 +548,63 @@ static inline struct hns_roce_ah *to_hr_ah(struct ibv_ah *ibv_ah)
+ return container_of(ibv_ah, struct hns_roce_ah, ibv_ah);
+ }
+
++static inline int hns_roce_spin_lock(struct hns_roce_spinlock *hr_lock)
++{
++ if (hr_lock->need_lock)
++ return pthread_spin_lock(&hr_lock->lock);
++
++ return 0;
++}
++
++static inline int hns_roce_spin_unlock(struct hns_roce_spinlock *hr_lock)
++{
++ if (hr_lock->need_lock)
++ return pthread_spin_unlock(&hr_lock->lock);
++
++ return 0;
++}
++
++#define HNS_ROCE_BIT_MASK(nr) (1UL << ((nr) % 64))
++#define HNS_ROCE_BIT_WORD(nr) ((nr) / 64)
++
++static inline bool atomic_test_bit(atomic_bitmap_t *p, uint32_t nr)
++{
++ p += HNS_ROCE_BIT_WORD(nr);
++ return !!(atomic_load(p) & HNS_ROCE_BIT_MASK(nr));
++}
++
++static inline bool test_and_set_bit_lock(atomic_bitmap_t *p, uint32_t nr)
++{
++ uint64_t mask = HNS_ROCE_BIT_MASK(nr);
++
++ p += HNS_ROCE_BIT_WORD(nr);
++ if (atomic_load(p) & mask)
++ return true;
++
++ return (atomic_fetch_or(p, mask) & mask) != 0;
++}
++
++static inline void clear_bit_unlock(atomic_bitmap_t *p, uint32_t nr)
++{
++ p += HNS_ROCE_BIT_WORD(nr);
++ atomic_fetch_and(p, ~HNS_ROCE_BIT_MASK(nr));
++}
++
++bool is_hns_dev(struct ibv_device *device);
++
+ int hns_roce_u_query_device(struct ibv_context *context,
+ const struct ibv_query_device_ex_input *input,
+ struct ibv_device_attr_ex *attr, size_t attr_size);
+ int hns_roce_u_query_port(struct ibv_context *context, uint8_t port,
+ struct ibv_port_attr *attr);
+
++struct ibv_td *hns_roce_u_alloc_td(struct ibv_context *context,
++ struct ibv_td_init_attr *attr);
++int hns_roce_u_dealloc_td(struct ibv_td *ibv_td);
++struct ibv_pd *hns_roce_u_alloc_pad(struct ibv_context *context,
++ struct ibv_parent_domain_init_attr *attr);
+ struct ibv_pd *hns_roce_u_alloc_pd(struct ibv_context *context);
+-int hns_roce_u_free_pd(struct ibv_pd *pd);
++int hns_roce_u_dealloc_pd(struct ibv_pd *pd);
+
+ struct ibv_mr *hns_roce_u_reg_mr(struct ibv_pd *pd, void *addr, size_t length,
+ uint64_t hca_va, int access);
+@@ -489,9 +663,21 @@ int hns_roce_u_close_xrcd(struct ibv_xrcd *ibv_xrcd);
+ int hns_roce_alloc_buf(struct hns_roce_buf *buf, unsigned int size,
+ int page_size);
+ void hns_roce_free_buf(struct hns_roce_buf *buf);
++void hns_roce_qp_spinlock_destroy(struct hns_roce_qp *qp);
+
+ void hns_roce_free_qp_buf(struct hns_roce_qp *qp, struct hns_roce_context *ctx);
+
++int hns_roce_attach_dca_mem(struct hns_roce_context *ctx, uint32_t handle,
++ struct hns_roce_dca_attach_attr *attr,
++ uint32_t size, struct hns_roce_dca_buf *buf);
++void hns_roce_detach_dca_mem(struct hns_roce_context *ctx, uint32_t handle,
++ struct hns_roce_dca_detach_attr *attr);
++bool hns_roce_dca_start_post(struct hns_roce_dca_ctx *ctx, uint32_t dcan);
++void hns_roce_dca_stop_post(struct hns_roce_dca_ctx *ctx, uint32_t dcan);
++
++void hns_roce_shrink_dca_mem(struct hns_roce_context *ctx);
++void hns_roce_cleanup_dca_mem(struct hns_roce_context *ctx);
++
+ void hns_roce_init_qp_indices(struct hns_roce_qp *qp);
+
+ extern const struct hns_roce_u_hw hns_roce_u_hw_v2;
+diff --git a/providers/hns/hns_roce_u_abi.h b/providers/hns/hns_roce_u_abi.h
+index 3f98eb3..7e9bbc1 100644
+--- a/providers/hns/hns_roce_u_abi.h
++++ b/providers/hns/hns_roce_u_abi.h
+@@ -36,6 +36,7 @@
+ #include
+ #include
+ #include
++#include "hnsdv.h"
+
+ DECLARE_DRV_CMD(hns_roce_alloc_pd, IB_USER_VERBS_CMD_ALLOC_PD,
+ empty, hns_roce_ib_alloc_pd_resp);
+@@ -64,4 +65,7 @@ DECLARE_DRV_CMD(hns_roce_create_srq_ex, IB_USER_VERBS_CMD_CREATE_XSRQ,
+ DECLARE_DRV_CMD(hns_roce_create_ah, IB_USER_VERBS_CMD_CREATE_AH, empty,
+ hns_roce_ib_create_ah_resp);
+
++DECLARE_DRV_CMD(hns_roce_modify_qp_ex, IB_USER_VERBS_EX_CMD_MODIFY_QP,
++ empty, hns_roce_ib_modify_qp_resp);
++
+ #endif /* _HNS_ROCE_U_ABI_H */
+diff --git a/providers/hns/hns_roce_u_buf.c b/providers/hns/hns_roce_u_buf.c
+index 471dd9c..780683e 100644
+--- a/providers/hns/hns_roce_u_buf.c
++++ b/providers/hns/hns_roce_u_buf.c
+@@ -56,7 +56,473 @@ int hns_roce_alloc_buf(struct hns_roce_buf *buf, unsigned int size,
+
+ void hns_roce_free_buf(struct hns_roce_buf *buf)
+ {
++ if (!buf->buf)
++ return;
++
+ ibv_dofork_range(buf->buf, buf->length);
+
+ munmap(buf->buf, buf->length);
+ }
++
++struct hns_roce_dca_mem {
++ uint32_t handle;
++ struct list_node entry;
++ struct hns_roce_buf buf;
++ struct hns_roce_context *ctx;
++};
++
++static void free_dca_mem(struct hns_roce_context *ctx,
++ struct hns_roce_dca_mem *mem)
++{
++ hns_roce_free_buf(&mem->buf);
++ free(mem);
++}
++
++static struct hns_roce_dca_mem *alloc_dca_mem(uint32_t size)
++{
++ struct hns_roce_dca_mem *mem = NULL;
++ int ret;
++
++ mem = malloc(sizeof(struct hns_roce_dca_mem));
++ if (!mem) {
++ errno = ENOMEM;
++ return NULL;
++ }
++
++ ret = hns_roce_alloc_buf(&mem->buf, size, HNS_HW_PAGE_SIZE);
++ if (ret) {
++ errno = ENOMEM;
++ free(mem);
++ return NULL;
++ }
++
++ return mem;
++}
++
++static inline uint64_t dca_mem_to_key(struct hns_roce_dca_mem *dca_mem)
++{
++ return (uintptr_t)dca_mem;
++}
++
++static struct hns_roce_dca_mem *key_to_dca_mem(struct hns_roce_dca_ctx *ctx,
++ uint64_t key)
++{
++ struct hns_roce_dca_mem *mem;
++ struct hns_roce_dca_mem *tmp;
++
++ list_for_each_safe(&ctx->mem_list, mem, tmp, entry) {
++ if (dca_mem_to_key(mem) == key)
++ return mem;
++ }
++
++ return NULL;
++}
++
++static inline void *dca_mem_addr(struct hns_roce_dca_mem *dca_mem, int offset)
++{
++ return dca_mem->buf.buf + offset;
++}
++
++static int register_dca_mem(struct hns_roce_context *ctx, uint64_t key,
++ void *addr, uint32_t size, uint32_t *handle)
++{
++ struct ib_uverbs_attr *attr;
++ int ret;
++
++ DECLARE_COMMAND_BUFFER(cmd, HNS_IB_OBJECT_DCA_MEM,
++ HNS_IB_METHOD_DCA_MEM_REG, 4);
++ fill_attr_in_uint32(cmd, HNS_IB_ATTR_DCA_MEM_REG_LEN, size);
++ fill_attr_in_uint64(cmd, HNS_IB_ATTR_DCA_MEM_REG_ADDR,
++ ioctl_ptr_to_u64(addr));
++ fill_attr_in_uint64(cmd, HNS_IB_ATTR_DCA_MEM_REG_KEY, key);
++ attr = fill_attr_out_obj(cmd, HNS_IB_ATTR_DCA_MEM_REG_HANDLE);
++
++ ret = execute_ioctl(&ctx->ibv_ctx.context, cmd);
++ if (ret) {
++ verbs_err(&ctx->ibv_ctx, "failed to reg DCA mem, ret = %d.\n",
++ ret);
++ return ret;
++ }
++
++ *handle = read_attr_obj(HNS_IB_ATTR_DCA_MEM_REG_HANDLE, attr);
++
++ return 0;
++}
++
++static void deregister_dca_mem(struct hns_roce_context *ctx, uint32_t handle)
++{
++ int ret;
++
++ DECLARE_COMMAND_BUFFER(cmd, HNS_IB_OBJECT_DCA_MEM,
++ HNS_IB_METHOD_DCA_MEM_DEREG, 1);
++ fill_attr_in_obj(cmd, HNS_IB_ATTR_DCA_MEM_DEREG_HANDLE, handle);
++ ret = execute_ioctl(&ctx->ibv_ctx.context, cmd);
++ if (ret)
++ verbs_warn(&ctx->ibv_ctx,
++ "failed to dereg DCA mem-%u, ret = %d.\n",
++ handle, ret);
++}
++
++void hns_roce_cleanup_dca_mem(struct hns_roce_context *ctx)
++{
++ struct hns_roce_dca_ctx *dca_ctx = &ctx->dca_ctx;
++ struct hns_roce_dca_mem *mem;
++ struct hns_roce_dca_mem *tmp;
++
++ list_for_each_safe(&dca_ctx->mem_list, mem, tmp, entry)
++ deregister_dca_mem(ctx, mem->handle);
++}
++
++struct hns_dca_mem_shrink_resp {
++ uint32_t free_mems;
++ uint64_t free_key;
++};
++
++static int shrink_dca_mem(struct hns_roce_context *ctx, uint32_t handle,
++ uint64_t size, struct hns_dca_mem_shrink_resp *resp)
++{
++ int ret;
++
++ DECLARE_COMMAND_BUFFER(cmd, HNS_IB_OBJECT_DCA_MEM,
++ HNS_IB_METHOD_DCA_MEM_SHRINK, 4);
++ fill_attr_in_obj(cmd, HNS_IB_ATTR_DCA_MEM_SHRINK_HANDLE, handle);
++ fill_attr_in_uint64(cmd, HNS_IB_ATTR_DCA_MEM_SHRINK_RESERVED_SIZE, size);
++ fill_attr_out(cmd, HNS_IB_ATTR_DCA_MEM_SHRINK_OUT_FREE_KEY,
++ &resp->free_key, sizeof(resp->free_key));
++ fill_attr_out(cmd, HNS_IB_ATTR_DCA_MEM_SHRINK_OUT_FREE_MEMS,
++ &resp->free_mems, sizeof(resp->free_mems));
++
++ ret = execute_ioctl(&ctx->ibv_ctx.context, cmd);
++ if (ret)
++ verbs_err(&ctx->ibv_ctx, "failed to shrink DCA mem, ret = %d.\n",
++ ret);
++
++ return ret;
++}
++
++struct hns_dca_mem_query_resp {
++ uint64_t key;
++ uint32_t offset;
++ uint32_t page_count;
++};
++
++static int query_dca_mem(struct hns_roce_context *ctx, uint32_t handle,
++ uint32_t index, struct hns_dca_mem_query_resp *resp)
++{
++ int ret;
++
++ DECLARE_COMMAND_BUFFER(cmd, HNS_IB_OBJECT_DCA_MEM,
++ HNS_IB_METHOD_DCA_MEM_QUERY, 5);
++ fill_attr_in_obj(cmd, HNS_IB_ATTR_DCA_MEM_QUERY_HANDLE, handle);
++ fill_attr_in_uint32(cmd, HNS_IB_ATTR_DCA_MEM_QUERY_PAGE_INDEX, index);
++ fill_attr_out(cmd, HNS_IB_ATTR_DCA_MEM_QUERY_OUT_KEY,
++ &resp->key, sizeof(resp->key));
++ fill_attr_out(cmd, HNS_IB_ATTR_DCA_MEM_QUERY_OUT_OFFSET,
++ &resp->offset, sizeof(resp->offset));
++ fill_attr_out(cmd, HNS_IB_ATTR_DCA_MEM_QUERY_OUT_PAGE_COUNT,
++ &resp->page_count, sizeof(resp->page_count));
++ ret = execute_ioctl(&ctx->ibv_ctx.context, cmd);
++ if (ret)
++ verbs_err(&ctx->ibv_ctx,
++ "failed to query DCA mem-%u, ret = %d.\n",
++ handle, ret);
++
++ return ret;
++}
++
++void hns_roce_detach_dca_mem(struct hns_roce_context *ctx, uint32_t handle,
++ struct hns_roce_dca_detach_attr *attr)
++{
++ int ret;
++
++ DECLARE_COMMAND_BUFFER(cmd, HNS_IB_OBJECT_DCA_MEM,
++ HNS_IB_METHOD_DCA_MEM_DETACH, 4);
++ fill_attr_in_obj(cmd, HNS_IB_ATTR_DCA_MEM_DETACH_HANDLE, handle);
++ fill_attr_in_uint32(cmd, HNS_IB_ATTR_DCA_MEM_DETACH_SQ_INDEX,
++ attr->sq_index);
++ ret = execute_ioctl(&ctx->ibv_ctx.context, cmd);
++ if (ret)
++ verbs_warn(&ctx->ibv_ctx,
++ "failed to detach DCA mem-%u, ret = %d.\n",
++ handle, ret);
++}
++
++struct hns_dca_mem_attach_resp {
++#define HNS_DCA_ATTACH_OUT_FLAGS_NEW_BUFFER BIT(0)
++ uint32_t alloc_flags;
++ uint32_t alloc_pages;
++};
++
++static int attach_dca_mem(struct hns_roce_context *ctx, uint32_t handle,
++ struct hns_roce_dca_attach_attr *attr,
++ struct hns_dca_mem_attach_resp *resp)
++{
++ int ret;
++
++ DECLARE_COMMAND_BUFFER(cmd, HNS_IB_OBJECT_DCA_MEM,
++ HNS_IB_METHOD_DCA_MEM_ATTACH, 6);
++ fill_attr_in_obj(cmd, HNS_IB_ATTR_DCA_MEM_ATTACH_HANDLE, handle);
++ fill_attr_in_uint32(cmd, HNS_IB_ATTR_DCA_MEM_ATTACH_SQ_OFFSET,
++ attr->sq_offset);
++ fill_attr_in_uint32(cmd, HNS_IB_ATTR_DCA_MEM_ATTACH_SGE_OFFSET,
++ attr->sge_offset);
++ fill_attr_in_uint32(cmd, HNS_IB_ATTR_DCA_MEM_ATTACH_RQ_OFFSET,
++ attr->rq_offset);
++ fill_attr_out(cmd, HNS_IB_ATTR_DCA_MEM_ATTACH_OUT_ALLOC_FLAGS,
++ &resp->alloc_flags, sizeof(resp->alloc_flags));
++ fill_attr_out(cmd, HNS_IB_ATTR_DCA_MEM_ATTACH_OUT_ALLOC_PAGES,
++ &resp->alloc_pages, sizeof(resp->alloc_pages));
++ ret = execute_ioctl(&ctx->ibv_ctx.context, cmd);
++ if (ret)
++ verbs_err(&ctx->ibv_ctx,
++ "failed to attach DCA mem-%u, ret = %d.\n",
++ handle, ret);
++
++ return ret;
++}
++
++static bool add_dca_mem_enabled(struct hns_roce_dca_ctx *ctx,
++ uint32_t alloc_size)
++{
++ bool enable;
++
++ pthread_spin_lock(&ctx->lock);
++
++ if (ctx->unit_size == 0) /* Pool size can't be increased */
++ enable = false;
++ else if (ctx->max_size == HNS_DCA_MAX_MEM_SIZE) /* Pool size no limit */
++ enable = true;
++ else /* Pool size doesn't exceed max size */
++ enable = (ctx->curr_size + alloc_size) < ctx->max_size;
++
++ pthread_spin_unlock(&ctx->lock);
++
++ return enable;
++}
++
++static bool shrink_dca_mem_enabled(struct hns_roce_dca_ctx *ctx)
++{
++ bool enable;
++
++ pthread_spin_lock(&ctx->lock);
++ enable = ctx->mem_cnt > 0 && ctx->min_size < ctx->max_size;
++ pthread_spin_unlock(&ctx->lock);
++
++ return enable;
++}
++
++static int add_dca_mem(struct hns_roce_context *ctx, uint32_t size)
++{
++ struct hns_roce_dca_ctx *dca_ctx = &ctx->dca_ctx;
++ struct hns_roce_dca_mem *mem;
++ int ret;
++
++ if (!add_dca_mem_enabled(&ctx->dca_ctx, size))
++ return -ENOMEM;
++
++ /* Step 1: Alloc DCA mem address */
++ mem = alloc_dca_mem(
++ DIV_ROUND_UP(size, dca_ctx->unit_size) * dca_ctx->unit_size);
++ if (!mem)
++ return -ENOMEM;
++
++ /* Step 2: Register DCA mem uobject to pin user address */
++ ret = register_dca_mem(ctx, dca_mem_to_key(mem), dca_mem_addr(mem, 0),
++ mem->buf.length, &mem->handle);
++ if (ret) {
++ free_dca_mem(ctx, mem);
++ return ret;
++ }
++
++ /* Step 3: Add DCA mem node to pool */
++ pthread_spin_lock(&dca_ctx->lock);
++ list_add_tail(&dca_ctx->mem_list, &mem->entry);
++ dca_ctx->mem_cnt++;
++ dca_ctx->curr_size += mem->buf.length;
++ pthread_spin_unlock(&dca_ctx->lock);
++
++ return 0;
++}
++
++void hns_roce_shrink_dca_mem(struct hns_roce_context *ctx)
++{
++ struct hns_roce_dca_ctx *dca_ctx = &ctx->dca_ctx;
++ struct hns_dca_mem_shrink_resp resp = {};
++ struct hns_roce_dca_mem *mem;
++ int dca_mem_cnt;
++ uint32_t handle;
++ int ret;
++
++ pthread_spin_lock(&dca_ctx->lock);
++ dca_mem_cnt = ctx->dca_ctx.mem_cnt;
++ pthread_spin_unlock(&dca_ctx->lock);
++ while (dca_mem_cnt > 0 && shrink_dca_mem_enabled(dca_ctx)) {
++ resp.free_mems = 0;
++ /* Step 1: Use any DCA mem uobject to shrink pool */
++ pthread_spin_lock(&dca_ctx->lock);
++ mem = list_tail(&dca_ctx->mem_list,
++ struct hns_roce_dca_mem, entry);
++ handle = mem ? mem->handle : 0;
++ pthread_spin_unlock(&dca_ctx->lock);
++ if (!mem)
++ break;
++
++ ret = shrink_dca_mem(ctx, handle, dca_ctx->min_size, &resp);
++ if (ret || likely(resp.free_mems < 1))
++ break;
++
++ /* Step 2: Remove shrunk DCA mem node from pool */
++ pthread_spin_lock(&dca_ctx->lock);
++ mem = key_to_dca_mem(dca_ctx, resp.free_key);
++ if (mem) {
++ list_del(&mem->entry);
++ dca_ctx->mem_cnt--;
++ dca_ctx->curr_size -= mem->buf.length;
++ }
++
++ handle = mem ? mem->handle : 0;
++ pthread_spin_unlock(&dca_ctx->lock);
++ if (!mem)
++ break;
++
++ /* Step 3: Destroy DCA mem uobject */
++ deregister_dca_mem(ctx, handle);
++ free_dca_mem(ctx, mem);
++ /* No any free memory after deregister 1 DCA mem */
++ if (resp.free_mems <= 1)
++ break;
++
++ dca_mem_cnt--;
++ }
++}
++
++static void config_dca_pages(void *addr, struct hns_roce_dca_buf *buf,
++ uint32_t page_index, int page_count)
++{
++ void **pages = &buf->bufs[page_index];
++ int page_size = 1 << buf->shift;
++ int i;
++
++ for (i = 0; i < page_count; i++) {
++ pages[i] = addr;
++ addr += page_size;
++ }
++}
++
++static int setup_dca_buf(struct hns_roce_context *ctx, uint32_t handle,
++ struct hns_roce_dca_buf *buf, uint32_t page_count)
++{
++ struct hns_roce_dca_ctx *dca_ctx = &ctx->dca_ctx;
++ struct hns_dca_mem_query_resp resp = {};
++ struct hns_roce_dca_mem *mem;
++ uint32_t idx = 0;
++ int ret;
++
++ while (idx < page_count && idx < buf->max_cnt) {
++ resp.page_count = 0;
++ ret = query_dca_mem(ctx, handle, idx, &resp);
++ if (ret)
++ return -ENOMEM;
++ if (resp.page_count < 1)
++ break;
++
++ pthread_spin_lock(&dca_ctx->lock);
++ mem = key_to_dca_mem(dca_ctx, resp.key);
++ if (mem && resp.offset < mem->buf.length) {
++ config_dca_pages(dca_mem_addr(mem, resp.offset),
++ buf, idx, resp.page_count);
++ } else {
++ pthread_spin_unlock(&dca_ctx->lock);
++ break;
++ }
++ pthread_spin_unlock(&dca_ctx->lock);
++
++ idx += resp.page_count;
++ }
++
++ return (idx >= page_count) ? 0 : -ENOMEM;
++}
++
++#define DCAN_TO_SYNC_BIT(n) ((n) * HNS_DCA_BITS_PER_STATUS)
++#define DCAN_TO_STAT_BIT(n) DCAN_TO_SYNC_BIT(n)
++
++#define MAX_DCA_TRY_LOCK_TIMES 10
++bool hns_roce_dca_start_post(struct hns_roce_dca_ctx *ctx, uint32_t dcan)
++{
++ atomic_bitmap_t *st = ctx->sync_status;
++ int try_times = 0;
++
++ if (!st || dcan >= ctx->max_qps)
++ return true;
++
++ while (test_and_set_bit_lock(st, DCAN_TO_SYNC_BIT(dcan)))
++ if (try_times++ > MAX_DCA_TRY_LOCK_TIMES)
++ return false;
++
++ return true;
++}
++
++void hns_roce_dca_stop_post(struct hns_roce_dca_ctx *ctx, uint32_t dcan)
++{
++ atomic_bitmap_t *st = ctx->sync_status;
++
++ if (!st || dcan >= ctx->max_qps)
++ return;
++
++ clear_bit_unlock(st, DCAN_TO_SYNC_BIT(dcan));
++}
++
++static bool check_dca_is_attached(struct hns_roce_dca_ctx *ctx, uint32_t dcan)
++{
++ atomic_bitmap_t *st = ctx->buf_status;
++
++ if (!st || dcan >= ctx->max_qps)
++ return false;
++
++ return atomic_test_bit(st, DCAN_TO_STAT_BIT(dcan));
++}
++
++#define DCA_EXPAND_MEM_TRY_TIMES 3
++int hns_roce_attach_dca_mem(struct hns_roce_context *ctx, uint32_t handle,
++ struct hns_roce_dca_attach_attr *attr,
++ uint32_t size, struct hns_roce_dca_buf *buf)
++{
++ uint32_t buf_pages = size >> buf->shift;
++ struct hns_dca_mem_attach_resp resp = {};
++ bool is_new_buf = true;
++ int try_times = 0;
++ int ret = 0;
++
++ if (!attr->force && check_dca_is_attached(&ctx->dca_ctx, buf->dcan))
++ return 0;
++
++ do {
++ resp.alloc_pages = 0;
++ ret = attach_dca_mem(ctx, handle, attr, &resp);
++ if (ret)
++ break;
++
++ if (resp.alloc_pages >= buf_pages) {
++ is_new_buf = !!(resp.alloc_flags &
++ HNS_DCA_ATTACH_OUT_FLAGS_NEW_BUFFER);
++ break;
++ }
++
++ ret = add_dca_mem(ctx, size);
++ if (ret)
++ break;
++ } while (try_times++ < DCA_EXPAND_MEM_TRY_TIMES);
++
++ if (ret || resp.alloc_pages < buf_pages) {
++ verbs_err(&ctx->ibv_ctx,
++ "failed to attach, size %u count %u != %u, ret = %d.\n",
++ size, buf_pages, resp.alloc_pages, ret);
++ return -ENOMEM;
++ }
++
++ /* No need config user address if DCA config not changed */
++ if (!is_new_buf && buf->bufs[0])
++ return 0;
++
++ return setup_dca_buf(ctx, handle, buf, buf_pages);
++}
+diff --git a/providers/hns/hns_roce_u_db.c b/providers/hns/hns_roce_u_db.c
+index 0314254..bbef988 100644
+--- a/providers/hns/hns_roce_u_db.c
++++ b/providers/hns/hns_roce_u_db.c
+@@ -116,6 +116,8 @@ found:
+
+ out:
+ pthread_mutex_unlock((pthread_mutex_t *)&ctx->db_list_mutex);
++ if (db)
++ *((unsigned int *)db) = 0;
+
+ return db;
+ }
+diff --git a/providers/hns/hns_roce_u_db.h b/providers/hns/hns_roce_u_db.h
+index 8c47a53..de288de 100644
+--- a/providers/hns/hns_roce_u_db.h
++++ b/providers/hns/hns_roce_u_db.h
+@@ -40,8 +40,14 @@
+
+ #define HNS_ROCE_WORD_NUM 2
+
+-static inline void hns_roce_write64(void *dest, __le32 val[HNS_ROCE_WORD_NUM])
++static inline void hns_roce_write64(struct hns_roce_context *ctx, void *dest,
++ __le32 val[HNS_ROCE_WORD_NUM])
+ {
++ struct hns_roce_v2_reset_state *state = ctx->reset_state;
++
++ if (state && state->is_reset)
++ return;
++
+ mmio_write64_le(dest, *(__le64 *)val);
+ }
+
+diff --git a/providers/hns/hns_roce_u_hw_v2.c b/providers/hns/hns_roce_u_hw_v2.c
+index daef17a..9371150 100644
+--- a/providers/hns/hns_roce_u_hw_v2.c
++++ b/providers/hns/hns_roce_u_hw_v2.c
+@@ -199,19 +199,35 @@ static struct hns_roce_v2_cqe *next_cqe_sw_v2(struct hns_roce_cq *cq)
+ return get_sw_cqe_v2(cq, cq->cons_index);
+ }
+
++static inline bool check_qp_dca_enable(struct hns_roce_qp *qp)
++{
++ return !!qp->dca_wqe.bufs;
++}
++
++static inline void *get_wqe(struct hns_roce_qp *qp, unsigned int offset)
++{
++ if (likely(qp->buf.buf))
++ return qp->buf.buf + offset;
++ else if (unlikely(check_qp_dca_enable(qp)))
++ return qp->dca_wqe.bufs[offset >> qp->dca_wqe.shift] +
++ (offset & ((1 << qp->dca_wqe.shift) - 1));
++ else
++ return NULL;
++}
++
+ static void *get_recv_wqe_v2(struct hns_roce_qp *qp, unsigned int n)
+ {
+- return qp->buf.buf + qp->rq.offset + (n << qp->rq.wqe_shift);
++ return get_wqe(qp, qp->rq.offset + (n << qp->rq.wqe_shift));
+ }
+
+ static void *get_send_wqe(struct hns_roce_qp *qp, unsigned int n)
+ {
+- return qp->buf.buf + qp->sq.offset + (n << qp->sq.wqe_shift);
++ return get_wqe(qp, qp->sq.offset + (n << qp->sq.wqe_shift));
+ }
+
+ static void *get_send_sge_ex(struct hns_roce_qp *qp, unsigned int n)
+ {
+- return qp->buf.buf + qp->ex_sge.offset + (n << qp->ex_sge.sge_shift);
++ return get_wqe(qp, qp->ex_sge.offset + (n << qp->ex_sge.sge_shift));
+ }
+
+ static void *get_srq_wqe(struct hns_roce_srq *srq, unsigned int n)
+@@ -229,14 +245,14 @@ static void hns_roce_free_srq_wqe(struct hns_roce_srq *srq, uint16_t ind)
+ uint32_t bitmap_num;
+ int bit_num;
+
+- pthread_spin_lock(&srq->lock);
++ hns_roce_spin_lock(&srq->hr_lock);
+
+ bitmap_num = ind / BIT_CNT_PER_LONG;
+ bit_num = ind % BIT_CNT_PER_LONG;
+ srq->idx_que.bitmap[bitmap_num] |= (1ULL << bit_num);
+ srq->idx_que.tail++;
+
+- pthread_spin_unlock(&srq->lock);
++ hns_roce_spin_unlock(&srq->hr_lock);
+ }
+
+ static int get_srq_from_cqe(struct hns_roce_v2_cqe *cqe,
+@@ -268,9 +284,9 @@ static int hns_roce_v2_wq_overflow(struct hns_roce_wq *wq, unsigned int nreq,
+ if (cur + nreq < wq->max_post)
+ return 0;
+
+- pthread_spin_lock(&cq->lock);
++ hns_roce_spin_lock(&cq->hr_lock);
+ cur = wq->head - wq->tail;
+- pthread_spin_unlock(&cq->lock);
++ hns_roce_spin_unlock(&cq->hr_lock);
+
+ return cur + nreq >= wq->max_post;
+ }
+@@ -284,7 +300,8 @@ static void hns_roce_update_rq_db(struct hns_roce_context *ctx,
+ hr_reg_write(&rq_db, DB_CMD, HNS_ROCE_V2_RQ_DB);
+ hr_reg_write(&rq_db, DB_PI, rq_head);
+
+- hns_roce_write64(ctx->uar + ROCEE_VF_DB_CFG0_OFFSET, (__le32 *)&rq_db);
++ hns_roce_write64(ctx, ctx->uar + ROCEE_VF_DB_CFG0_OFFSET,
++ (__le32 *)&rq_db);
+ }
+
+ static void hns_roce_update_sq_db(struct hns_roce_context *ctx,
+@@ -298,7 +315,7 @@ static void hns_roce_update_sq_db(struct hns_roce_context *ctx,
+ hr_reg_write(&sq_db, DB_PI, qp->sq.head);
+ hr_reg_write(&sq_db, DB_SL, qp->sl);
+
+- hns_roce_write64(qp->sq.db_reg, (__le32 *)&sq_db);
++ hns_roce_write64(ctx, qp->sq.db_reg, (__le32 *)&sq_db);
+ }
+
+ static void hns_roce_write512(uint64_t *dest, uint64_t *val)
+@@ -309,6 +326,12 @@ static void hns_roce_write512(uint64_t *dest, uint64_t *val)
+ static void hns_roce_write_dwqe(struct hns_roce_qp *qp, void *wqe)
+ {
+ struct hns_roce_rc_sq_wqe *rc_sq_wqe = wqe;
++ struct ibv_qp *ibvqp = &qp->verbs_qp.qp;
++ struct hns_roce_context *ctx = to_hr_ctx(ibvqp->context);
++ struct hns_roce_v2_reset_state *state = ctx->reset_state;
++
++ if (state && state->is_reset)
++ return;
+
+ /* All kinds of DirectWQE have the same header field layout */
+ hr_reg_enable(rc_sq_wqe, RCWQE_FLAG);
+@@ -328,7 +351,8 @@ static void update_cq_db(struct hns_roce_context *ctx, struct hns_roce_cq *cq)
+ hr_reg_write(&cq_db, DB_CQ_CI, cq->cons_index);
+ hr_reg_write(&cq_db, DB_CQ_CMD_SN, 1);
+
+- hns_roce_write64(ctx->uar + ROCEE_VF_DB_CFG0_OFFSET, (__le32 *)&cq_db);
++ hns_roce_write64(ctx, ctx->uar + ROCEE_VF_DB_CFG0_OFFSET,
++ (__le32 *)&cq_db);
+ }
+
+ static struct hns_roce_qp *hns_roce_v2_find_qp(struct hns_roce_context *ctx,
+@@ -507,7 +531,7 @@ static void parse_cqe_for_srq(struct hns_roce_v2_cqe *cqe, struct ibv_wc *wc,
+ handle_recv_cqe_inl_from_srq(cqe, srq);
+ }
+
+-static int parse_cqe_for_resp(struct hns_roce_v2_cqe *cqe, struct ibv_wc *wc,
++static void parse_cqe_for_resp(struct hns_roce_v2_cqe *cqe, struct ibv_wc *wc,
+ struct hns_roce_qp *hr_qp)
+ {
+ struct hns_roce_wq *wq;
+@@ -523,8 +547,6 @@ static int parse_cqe_for_resp(struct hns_roce_v2_cqe *cqe, struct ibv_wc *wc,
+ handle_recv_cqe_inl_from_rq(cqe, hr_qp);
+ else if (hr_reg_read(cqe, CQE_RQ_INLINE))
+ handle_recv_rq_inl(cqe, hr_qp);
+-
+- return 0;
+ }
+
+ static void parse_cqe_for_req(struct hns_roce_v2_cqe *cqe, struct ibv_wc *wc,
+@@ -572,6 +594,81 @@ static void parse_cqe_for_req(struct hns_roce_v2_cqe *cqe, struct ibv_wc *wc,
+ wc->opcode = wc_send_op_map[opcode];
+ }
+
++static bool check_dca_attach_enable(struct hns_roce_qp *qp)
++{
++ return check_qp_dca_enable(qp) &&
++ (qp->flags & HNS_ROCE_QP_CAP_DYNAMIC_CTX_ATTACH);
++}
++
++static bool check_dca_detach_enable(struct hns_roce_qp *qp)
++{
++ return check_qp_dca_enable(qp) &&
++ (qp->flags & HNS_ROCE_QP_CAP_DYNAMIC_CTX_DETACH);
++}
++
++static int dca_attach_qp_buf(struct hns_roce_context *ctx,
++ struct hns_roce_qp *qp)
++{
++ struct hns_roce_dca_attach_attr attr = {};
++ bool enable_detach;
++ uint32_t idx;
++ int ret;
++
++ hns_roce_spin_lock(&qp->sq.hr_lock);
++ hns_roce_spin_lock(&qp->rq.hr_lock);
++
++ if (qp->sq.wqe_cnt > 0) {
++ idx = qp->sq.head & (qp->sq.wqe_cnt - 1);
++ attr.sq_offset = idx << qp->sq.wqe_shift;
++ }
++
++ if (qp->ex_sge.sge_cnt > 0) {
++ idx = qp->next_sge & (qp->ex_sge.sge_cnt - 1);
++ attr.sge_offset = idx << qp->ex_sge.sge_shift;
++ }
++
++ if (qp->rq.wqe_cnt > 0) {
++ idx = qp->rq.head & (qp->rq.wqe_cnt - 1);
++ attr.rq_offset = idx << qp->rq.wqe_shift;
++ }
++
++ enable_detach = check_dca_detach_enable(qp);
++ if (enable_detach &&
++ !hns_roce_dca_start_post(&ctx->dca_ctx, qp->dca_wqe.dcan))
++ /* Force attach if failed to sync dca status */
++ attr.force = true;
++
++ ret = hns_roce_attach_dca_mem(ctx, qp->verbs_qp.qp.handle, &attr,
++ qp->buf_size, &qp->dca_wqe);
++ if (ret && enable_detach)
++ hns_roce_dca_stop_post(&ctx->dca_ctx, qp->dca_wqe.dcan);
++
++ hns_roce_spin_unlock(&qp->rq.hr_lock);
++ hns_roce_spin_unlock(&qp->sq.hr_lock);
++
++ return ret;
++}
++
++static void dca_detach_qp_buf(struct hns_roce_context *ctx,
++ struct hns_roce_qp *qp)
++{
++ struct hns_roce_dca_detach_attr attr;
++ bool is_empty;
++
++ hns_roce_spin_lock(&qp->sq.hr_lock);
++ hns_roce_spin_lock(&qp->rq.hr_lock);
++
++ is_empty = qp->sq.head == qp->sq.tail && qp->rq.head == qp->rq.tail;
++ if (is_empty && qp->sq.wqe_cnt > 0)
++ attr.sq_index = qp->sq.head & (qp->sq.wqe_cnt - 1);
++
++ hns_roce_spin_unlock(&qp->rq.hr_lock);
++ hns_roce_spin_unlock(&qp->sq.hr_lock);
++
++ if (is_empty && qp->sq.wqe_cnt > 0)
++ hns_roce_detach_dca_mem(ctx, qp->verbs_qp.qp.handle, &attr);
++}
++
+ static void cqe_proc_sq(struct hns_roce_qp *hr_qp, uint32_t wqe_idx,
+ struct hns_roce_cq *cq)
+ {
+@@ -715,6 +812,183 @@ static int hns_roce_poll_one(struct hns_roce_context *ctx,
+ return hns_roce_flush_cqe(*cur_qp, status);
+ }
+
++static void hns_roce_fill_swc(struct hns_roce_cq *cq, struct ibv_wc *wc,
++ uint64_t wr_id, uint32_t qp_num)
++{
++ if (!wc) {
++ cq->verbs_cq.cq_ex.status = IBV_WC_WR_FLUSH_ERR;
++ cq->verbs_cq.cq_ex.wr_id = wr_id;
++ hr_reg_write(cq->sw_cqe, CQE_LCL_QPN, qp_num);
++ return;
++ }
++
++ wc->wr_id = wr_id;
++ wc->status = IBV_WC_WR_FLUSH_ERR;
++ wc->vendor_err = 0;
++ wc->qp_num = qp_num;
++}
++
++static int hns_roce_get_wq_swc(struct hns_roce_cq *cq, struct hns_roce_qp *qp,
++ struct ibv_wc *wc, bool is_sq)
++{
++ struct hns_roce_wq *wq = is_sq ? &qp->sq : &qp->rq;
++ unsigned int left_wr;
++ uint64_t wr_id;
++
++ left_wr = wq->head - wq->tail;
++ if (left_wr == 0) {
++ if (is_sq)
++ list_del_init(&qp->scq_node);
++ else
++ list_del_init(&qp->rcq_node);
++
++ return ENOENT;
++ }
++
++ wr_id = wq->wrid[wq->tail & (wq->wqe_cnt - 1)];
++ hns_roce_fill_swc(cq, wc, wr_id, qp->verbs_qp.qp.qp_num);
++ wq->tail++;
++ return V2_CQ_OK;
++}
++
++static int hns_roce_gen_sq_swc(struct hns_roce_cq *cq, struct ibv_wc *wc)
++{
++ struct hns_roce_qp *next, *qp = NULL;
++
++ list_for_each_safe(&cq->list_sq, qp, next, scq_node) {
++ if (hns_roce_get_wq_swc(cq, qp, wc, true) == ENOENT)
++ continue;
++
++ return V2_CQ_OK;
++ }
++
++ return wc ? V2_CQ_EMPTY : ENOENT;
++}
++
++static int hns_roce_gen_rq_swc(struct hns_roce_cq *cq, struct ibv_wc *wc)
++{
++ struct hns_roce_qp *next, *qp = NULL;
++
++ list_for_each_safe(&cq->list_rq, qp, next, rcq_node) {
++ if (hns_roce_get_wq_swc(cq, qp, wc, false) == ENOENT)
++ continue;
++
++ return V2_CQ_OK;
++ }
++
++ return wc ? V2_CQ_EMPTY : ENOENT;
++}
++
++static int hns_roce_get_srq_swc(struct hns_roce_cq *cq, struct hns_roce_qp *qp,
++ struct hns_roce_srq *srq, struct ibv_wc *wc)
++{
++ unsigned int left_wr;
++ uint64_t wr_id;
++
++ hns_roce_spin_lock(&srq->hr_lock);
++ left_wr = srq->idx_que.head - srq->idx_que.tail;
++ if (left_wr == 0) {
++ if (qp)
++ list_del_init(&qp->srcq_node);
++ else
++ list_del_init(&srq->xrc_srcq_node);
++
++ hns_roce_spin_unlock(&srq->hr_lock);
++ return ENOENT;
++ }
++
++ wr_id = srq->wrid[srq->idx_que.tail & (srq->wqe_cnt - 1)];
++ hns_roce_fill_swc(cq, wc, wr_id, srq->srqn);
++ srq->idx_que.tail++;
++ hns_roce_spin_unlock(&srq->hr_lock);
++
++ return V2_CQ_OK;
++}
++
++static int hns_roce_gen_common_srq_swc(struct hns_roce_cq *cq,
++ struct ibv_wc *wc)
++{
++ struct hns_roce_qp *next, *qp = NULL;
++ struct hns_roce_srq *srq;
++
++ list_for_each_safe(&cq->list_srq, qp, next, srcq_node) {
++ srq = to_hr_srq(qp->verbs_qp.qp.srq);
++ if (hns_roce_get_srq_swc(cq, qp, srq, wc) == ENOENT)
++ continue;
++
++ return V2_CQ_OK;
++ }
++
++ return wc ? V2_CQ_EMPTY : ENOENT;
++}
++
++static int hns_roce_gen_xrc_srq_swc(struct hns_roce_cq *cq, struct ibv_wc *wc)
++{
++ struct hns_roce_srq *next, *srq = NULL;
++
++ list_for_each_safe(&cq->list_xrc_srq, srq, next, xrc_srcq_node) {
++ if (hns_roce_get_srq_swc(cq, NULL, srq, wc) == ENOENT)
++ continue;
++
++ return V2_CQ_OK;
++ }
++
++ return wc ? V2_CQ_EMPTY : ENOENT;
++}
++
++static int hns_roce_gen_srq_swc(struct hns_roce_cq *cq, struct ibv_wc *wc)
++{
++ int err;
++
++ err = hns_roce_gen_common_srq_swc(cq, wc);
++ if (err == V2_CQ_OK)
++ return err;
++
++ return hns_roce_gen_xrc_srq_swc(cq, wc);
++}
++
++static int hns_roce_poll_one_swc(struct hns_roce_cq *cq, struct ibv_wc *wc)
++{
++ int err;
++
++ err = hns_roce_gen_sq_swc(cq, wc);
++ if (err == V2_CQ_OK)
++ return err;
++
++ err = hns_roce_gen_rq_swc(cq, wc);
++ if (err == V2_CQ_OK)
++ return err;
++
++ return hns_roce_gen_srq_swc(cq, wc);
++}
++
++static int hns_roce_poll_swc(struct hns_roce_cq *cq, int ne, struct ibv_wc *wc)
++{
++ int npolled;
++ int err;
++
++ for (npolled = 0; npolled < ne; npolled++) {
++ err = hns_roce_poll_one_swc(cq, wc + npolled);
++ if (err == V2_CQ_EMPTY)
++ break;
++ }
++
++ return npolled;
++}
++
++static bool hns_roce_reseted(struct hns_roce_context *ctx)
++{
++ struct hns_roce_v2_reset_state *state = ctx->reset_state;
++
++ if (ctx->use_new_reset_flag)
++ return !state->hw_ready;
++
++ if (state && state->is_reset)
++ ctx->reseted = true;
++
++ return ctx->reseted;
++}
++
+ static int hns_roce_u_v2_poll_cq(struct ibv_cq *ibvcq, int ne,
+ struct ibv_wc *wc)
+ {
+@@ -724,10 +998,19 @@ static int hns_roce_u_v2_poll_cq(struct ibv_cq *ibvcq, int ne,
+ int err = V2_CQ_OK;
+ int npolled;
+
+- pthread_spin_lock(&cq->lock);
++ hns_roce_spin_lock(&cq->hr_lock);
++
++ if (unlikely(hns_roce_reseted(ctx))) {
++ npolled = hns_roce_poll_swc(cq, ne, wc);
++ hns_roce_spin_unlock(&cq->hr_lock);
++ return npolled;
++ }
+
+ for (npolled = 0; npolled < ne; ++npolled) {
+ err = hns_roce_poll_one(ctx, &qp, cq, wc + npolled);
++ if (qp && check_dca_detach_enable(qp))
++ dca_detach_qp_buf(ctx, qp);
++
+ if (err != V2_CQ_OK)
+ break;
+ }
+@@ -739,7 +1022,11 @@ static int hns_roce_u_v2_poll_cq(struct ibv_cq *ibvcq, int ne,
+ update_cq_db(ctx, cq);
+ }
+
+- pthread_spin_unlock(&cq->lock);
++ hns_roce_spin_unlock(&cq->hr_lock);
++
++ /* Try to shrink the DCA mem */
++ if (ctx->dca_ctx.mem_cnt > 0)
++ hns_roce_shrink_dca_mem(ctx);
+
+ return err == V2_CQ_POLL_ERR ? err : npolled;
+ }
+@@ -762,19 +1049,38 @@ static int hns_roce_u_v2_arm_cq(struct ibv_cq *ibvcq, int solicited)
+ hr_reg_write(&cq_db, DB_CQ_CMD_SN, cq->arm_sn);
+ hr_reg_write(&cq_db, DB_CQ_NOTIFY, solicited_flag);
+
+- hns_roce_write64(ctx->uar + ROCEE_VF_DB_CFG0_OFFSET, (__le32 *)&cq_db);
++ hns_roce_write64(ctx, ctx->uar + ROCEE_VF_DB_CFG0_OFFSET,
++ (__le32 *)&cq_db);
+
+ return 0;
+ }
+
+-static inline int check_qp_send(struct ibv_qp *qp)
++static int check_qp_send(struct hns_roce_qp *qp, struct hns_roce_context *ctx)
+ {
+- if (unlikely(qp->state == IBV_QPS_RESET ||
+- qp->state == IBV_QPS_INIT ||
+- qp->state == IBV_QPS_RTR))
++ struct ibv_qp *ibvqp = &qp->verbs_qp.qp;
++ int ret = 0;
++
++ if (unlikely(ibvqp->state == IBV_QPS_RESET ||
++ ibvqp->state == IBV_QPS_INIT ||
++ ibvqp->state == IBV_QPS_RTR)) {
++ verbs_err(verbs_get_ctx(qp->verbs_qp.qp.context),
++ "unsupported qp state, state = %d.\n", ibvqp->state);
+ return EINVAL;
++ } else if (unlikely(hns_roce_reseted(ctx))) {
++ verbs_err_datapath(&ctx->ibv_ctx,
++ "failed to send, device has been reseted!\n");
++ return EIO;
++ }
+
+- return 0;
++ if (check_dca_attach_enable(qp)) {
++ ret = dca_attach_qp_buf(ctx, qp);
++ if (ret)
++ verbs_err_datapath(&ctx->ibv_ctx,
++ "failed to attach QP-%u send, ret = %d.\n",
++ qp->verbs_qp.qp.qp_num, ret);
++ }
++
++ return ret;
+ }
+
+ static void set_rc_sge(struct hns_roce_v2_wqe_data_seg *dseg,
+@@ -1069,6 +1375,7 @@ static inline void enable_wqe(struct hns_roce_qp *qp, void *sq_wqe,
+ static int set_ud_wqe(void *wqe, struct hns_roce_qp *qp, struct ibv_send_wr *wr,
+ unsigned int nreq, struct hns_roce_sge_info *sge_info)
+ {
++ struct hns_roce_device *hr_dev = to_hr_dev(qp->verbs_qp.qp.context->device);
+ struct hns_roce_ah *ah = to_hr_ah(wr->wr.ud.ah);
+ struct hns_roce_ud_sq_wqe *ud_sq_wqe = wqe;
+ int ret = 0;
+@@ -1093,6 +1400,9 @@ static int set_ud_wqe(void *wqe, struct hns_roce_qp *qp, struct ibv_send_wr *wr,
+ if (ret)
+ return ret;
+
++ if (hr_dev->link_type == HNS_DEV_LINK_TYPE_HCCS)
++ ud_sq_wqe->dmac[0] = 0xF0;
++
+ ret = fill_ud_data_seg(ud_sq_wqe, qp, wr, sge_info);
+ if (ret)
+ return ret;
+@@ -1141,6 +1451,13 @@ static int set_rc_inl(struct hns_roce_qp *qp, const struct ibv_send_wr *wr,
+ return 0;
+ }
+
++static inline void fill_rc_dca_fields(uint32_t qp_num,
++ struct hns_roce_rc_sq_wqe *wqe)
++{
++ hr_reg_write(wqe, RCWQE_SQPN_L, qp_num);
++ hr_reg_write(wqe, RCWQE_SQPN_H, qp_num >> RCWQE_SQPN_L_WIDTH);
++}
++
+ static void set_bind_mw_seg(struct hns_roce_rc_sq_wqe *wqe,
+ const struct ibv_send_wr *wr)
+ {
+@@ -1248,6 +1565,9 @@ static int set_rc_wqe(void *wqe, struct hns_roce_qp *qp, struct ibv_send_wr *wr,
+ return ret;
+
+ wqe_valid:
++ if (check_qp_dca_enable(qp))
++ fill_rc_dca_fields(qp->verbs_qp.qp.qp_num, rc_sq_wqe);
++
+ enable_wqe(qp, rc_sq_wqe, qp->sq.head + nreq);
+
+ return 0;
+@@ -1264,13 +1584,13 @@ int hns_roce_u_v2_post_send(struct ibv_qp *ibvqp, struct ibv_send_wr *wr,
+ unsigned int wqe_idx, nreq;
+ int ret;
+
+- ret = check_qp_send(ibvqp);
++ ret = check_qp_send(qp, ctx);
+ if (unlikely(ret)) {
+ *bad_wr = wr;
+ return ret;
+ }
+
+- pthread_spin_lock(&qp->sq.lock);
++ hns_roce_spin_lock(&qp->sq.hr_lock);
+
+ sge_info.start_idx = qp->next_sge; /* start index of extend sge */
+
+@@ -1331,7 +1651,10 @@ out:
+ *(qp->sdb) = qp->sq.head & 0xffff;
+ }
+
+- pthread_spin_unlock(&qp->sq.lock);
++ hns_roce_spin_unlock(&qp->sq.hr_lock);
++
++ if (check_dca_detach_enable(qp))
++ hns_roce_dca_stop_post(&ctx->dca_ctx, qp->dca_wqe.dcan);
+
+ if (ibvqp->state == IBV_QPS_ERR) {
+ attr.qp_state = IBV_QPS_ERR;
+@@ -1342,12 +1665,30 @@ out:
+ return ret;
+ }
+
+-static inline int check_qp_recv(struct ibv_qp *qp)
++static int check_qp_recv(struct hns_roce_qp *qp, struct hns_roce_context *ctx)
+ {
+- if (qp->state == IBV_QPS_RESET)
++ struct ibv_qp *ibvqp = &qp->verbs_qp.qp;
++ int ret = 0;
++
++ if (ibvqp->state == IBV_QPS_RESET) {
++ verbs_err(verbs_get_ctx(qp->verbs_qp.qp.context),
++ "unsupported qp state, state = %d.\n", ibvqp->state);
+ return EINVAL;
++ } else if (unlikely(hns_roce_reseted(ctx))) {
++ verbs_err_datapath(&ctx->ibv_ctx,
++ "fail to recv, device has been reseted!\n");
++ return EIO;
++ }
+
+- return 0;
++ if (check_dca_attach_enable(qp)) {
++ ret = dca_attach_qp_buf(ctx, qp);
++ if (ret)
++ verbs_err_datapath(&ctx->ibv_ctx,
++ "failed to attach QP-%u recv, ret = %d.\n",
++ qp->verbs_qp.qp.qp_num, ret);
++ }
++
++ return ret;
+ }
+
+ static void fill_recv_sge_to_wqe(struct ibv_recv_wr *wr, void *wqe,
+@@ -1414,13 +1755,13 @@ static int hns_roce_u_v2_post_recv(struct ibv_qp *ibvqp, struct ibv_recv_wr *wr,
+ struct ibv_qp_attr attr = {};
+ int ret;
+
+- ret = check_qp_recv(ibvqp);
++ ret = check_qp_recv(qp, ctx);
+ if (unlikely(ret)) {
+ *bad_wr = wr;
+ return ret;
+ }
+
+- pthread_spin_lock(&qp->rq.lock);
++ hns_roce_spin_lock(&qp->rq.hr_lock);
+
+ max_sge = qp->rq.max_gs - qp->rq.rsv_sge;
+ for (nreq = 0; wr; ++nreq, wr = wr->next) {
+@@ -1454,7 +1795,10 @@ out:
+ hns_roce_update_rq_db(ctx, ibvqp->qp_num, qp->rq.head);
+ }
+
+- pthread_spin_unlock(&qp->rq.lock);
++ hns_roce_spin_unlock(&qp->rq.hr_lock);
++
++ if (check_dca_detach_enable(qp))
++ hns_roce_dca_stop_post(&ctx->dca_ctx, qp->dca_wqe.dcan);
+
+ if (ibvqp->state == IBV_QPS_ERR) {
+ attr.qp_state = IBV_QPS_ERR;
+@@ -1510,9 +1854,9 @@ static void __hns_roce_v2_cq_clean(struct hns_roce_cq *cq, uint32_t qpn,
+ static void hns_roce_v2_cq_clean(struct hns_roce_cq *cq, unsigned int qpn,
+ struct hns_roce_srq *srq)
+ {
+- pthread_spin_lock(&cq->lock);
++ hns_roce_spin_lock(&cq->hr_lock);
+ __hns_roce_v2_cq_clean(cq, qpn, srq);
+- pthread_spin_unlock(&cq->lock);
++ hns_roce_spin_unlock(&cq->hr_lock);
+ }
+
+ static void record_qp_attr(struct ibv_qp *qp, struct ibv_qp_attr *attr,
+@@ -1523,8 +1867,12 @@ static void record_qp_attr(struct ibv_qp *qp, struct ibv_qp_attr *attr,
+ if (attr_mask & IBV_QP_PORT)
+ hr_qp->port_num = attr->port_num;
+
+- if (attr_mask & IBV_QP_AV)
+- hr_qp->sl = attr->ah_attr.sl;
++ if (hr_qp->tc_mode == HNS_ROCE_TC_MAP_MODE_DSCP)
++ hr_qp->sl = hr_qp->priority;
++ else {
++ if (attr_mask & IBV_QP_AV)
++ hr_qp->sl = attr->ah_attr.sl;
++ }
+
+ if (attr_mask & IBV_QP_QKEY)
+ hr_qp->qkey = attr->qkey;
+@@ -1538,31 +1886,41 @@ static void record_qp_attr(struct ibv_qp *qp, struct ibv_qp_attr *attr,
+ static int hns_roce_u_v2_modify_qp(struct ibv_qp *qp, struct ibv_qp_attr *attr,
+ int attr_mask)
+ {
+- int ret;
+- struct ibv_modify_qp cmd;
++ struct hns_roce_context *ctx = to_hr_ctx(qp->context);
++ struct hns_roce_modify_qp_ex_resp resp_ex = {};
++ struct hns_roce_modify_qp_ex cmd_ex = {};
+ struct hns_roce_qp *hr_qp = to_hr_qp(qp);
+ bool flag = false; /* modify qp to error */
++ int ret;
+
+ if ((attr_mask & IBV_QP_STATE) && (attr->qp_state == IBV_QPS_ERR)) {
+- pthread_spin_lock(&hr_qp->sq.lock);
+- pthread_spin_lock(&hr_qp->rq.lock);
++ hns_roce_spin_lock(&hr_qp->sq.hr_lock);
++ hns_roce_spin_lock(&hr_qp->rq.hr_lock);
+ flag = true;
+ }
+
+- ret = ibv_cmd_modify_qp(qp, attr, attr_mask, &cmd, sizeof(cmd));
++ ret = ibv_cmd_modify_qp_ex(qp, attr, attr_mask, &cmd_ex.ibv_cmd,
++ sizeof(cmd_ex), &resp_ex.ibv_resp,
++ sizeof(resp_ex));
+
+ if (flag) {
+ if (!ret)
+ qp->state = IBV_QPS_ERR;
+- pthread_spin_unlock(&hr_qp->rq.lock);
+- pthread_spin_unlock(&hr_qp->sq.lock);
++ hns_roce_spin_unlock(&hr_qp->sq.hr_lock);
++ hns_roce_spin_unlock(&hr_qp->rq.hr_lock);
+ }
+
+ if (ret)
+ return ret;
+
+- if (attr_mask & IBV_QP_STATE)
++ if (attr_mask & IBV_QP_STATE) {
+ qp->state = attr->qp_state;
++ if (attr->qp_state == IBV_QPS_RTR) {
++ hr_qp->tc_mode = resp_ex.drv_payload.tc_mode;
++ hr_qp->priority = resp_ex.drv_payload.priority;
++ hr_qp->dca_wqe.dcan = resp_ex.drv_payload.dcan;
++ }
++ }
+
+ if ((attr_mask & IBV_QP_STATE) && attr->qp_state == IBV_QPS_RESET) {
+ if (qp->recv_cq)
+@@ -1576,58 +1934,57 @@ static int hns_roce_u_v2_modify_qp(struct ibv_qp *qp, struct ibv_qp_attr *attr,
+ hns_roce_init_qp_indices(to_hr_qp(qp));
+ }
+
++ /* Try to shrink the DCA mem */
++ if (ctx->dca_ctx.mem_cnt > 0)
++ hns_roce_shrink_dca_mem(ctx);
++
+ record_qp_attr(qp, attr, attr_mask);
+
+ return ret;
+ }
+
+-static void hns_roce_lock_cqs(struct ibv_qp *qp)
++void hns_roce_lock_cqs(struct hns_roce_cq *send_cq, struct hns_roce_cq *recv_cq)
+ {
+- struct hns_roce_cq *send_cq = to_hr_cq(qp->send_cq);
+- struct hns_roce_cq *recv_cq = to_hr_cq(qp->recv_cq);
+-
+ if (send_cq && recv_cq) {
+ if (send_cq == recv_cq) {
+- pthread_spin_lock(&send_cq->lock);
++ hns_roce_spin_lock(&send_cq->hr_lock);
+ } else if (send_cq->cqn < recv_cq->cqn) {
+- pthread_spin_lock(&send_cq->lock);
+- pthread_spin_lock(&recv_cq->lock);
++ hns_roce_spin_lock(&send_cq->hr_lock);
++ hns_roce_spin_lock(&recv_cq->hr_lock);
+ } else {
+- pthread_spin_lock(&recv_cq->lock);
+- pthread_spin_lock(&send_cq->lock);
++ hns_roce_spin_lock(&recv_cq->hr_lock);
++ hns_roce_spin_lock(&send_cq->hr_lock);
+ }
+ } else if (send_cq) {
+- pthread_spin_lock(&send_cq->lock);
++ hns_roce_spin_lock(&send_cq->hr_lock);
+ } else if (recv_cq) {
+- pthread_spin_lock(&recv_cq->lock);
++ hns_roce_spin_lock(&recv_cq->hr_lock);
+ }
+ }
+
+-static void hns_roce_unlock_cqs(struct ibv_qp *qp)
++void hns_roce_unlock_cqs(struct hns_roce_cq *send_cq, struct hns_roce_cq *recv_cq)
+ {
+- struct hns_roce_cq *send_cq = to_hr_cq(qp->send_cq);
+- struct hns_roce_cq *recv_cq = to_hr_cq(qp->recv_cq);
+-
+ if (send_cq && recv_cq) {
+ if (send_cq == recv_cq) {
+- pthread_spin_unlock(&send_cq->lock);
++ hns_roce_spin_unlock(&send_cq->hr_lock);
+ } else if (send_cq->cqn < recv_cq->cqn) {
+- pthread_spin_unlock(&recv_cq->lock);
+- pthread_spin_unlock(&send_cq->lock);
++ hns_roce_spin_unlock(&recv_cq->hr_lock);
++ hns_roce_spin_unlock(&send_cq->hr_lock);
+ } else {
+- pthread_spin_unlock(&send_cq->lock);
+- pthread_spin_unlock(&recv_cq->lock);
++ hns_roce_spin_unlock(&send_cq->hr_lock);
++ hns_roce_spin_unlock(&recv_cq->hr_lock);
+ }
+ } else if (send_cq) {
+- pthread_spin_unlock(&send_cq->lock);
++ hns_roce_spin_unlock(&send_cq->hr_lock);
+ } else if (recv_cq) {
+- pthread_spin_unlock(&recv_cq->lock);
++ hns_roce_spin_unlock(&recv_cq->hr_lock);
+ }
+ }
+
+ static int hns_roce_u_v2_destroy_qp(struct ibv_qp *ibqp)
+ {
+ struct hns_roce_context *ctx = to_hr_ctx(ibqp->context);
++ struct hns_roce_pad *pad = to_hr_pad(ibqp->pd);
+ struct hns_roce_qp *qp = to_hr_qp(ibqp);
+ int ret;
+
+@@ -1640,22 +1997,33 @@ static int hns_roce_u_v2_destroy_qp(struct ibv_qp *ibqp)
+
+ hns_roce_v2_clear_qp(ctx, qp);
+
+- hns_roce_lock_cqs(ibqp);
++ hns_roce_lock_cqs(to_hr_cq(ibqp->send_cq), to_hr_cq(ibqp->recv_cq));
+
+- if (ibqp->recv_cq)
++ if (ibqp->recv_cq) {
+ __hns_roce_v2_cq_clean(to_hr_cq(ibqp->recv_cq), ibqp->qp_num,
+ ibqp->srq ? to_hr_srq(ibqp->srq) : NULL);
++ list_del(&qp->srcq_node);
++ list_del(&qp->rcq_node);
++ }
+
+- if (ibqp->send_cq && ibqp->send_cq != ibqp->recv_cq)
++ if (ibqp->send_cq && ibqp->send_cq != ibqp->recv_cq) {
+ __hns_roce_v2_cq_clean(to_hr_cq(ibqp->send_cq), ibqp->qp_num,
+ NULL);
++ list_del(&qp->scq_node);
++ }
+
+- hns_roce_unlock_cqs(ibqp);
++ hns_roce_unlock_cqs(to_hr_cq(ibqp->send_cq), to_hr_cq(ibqp->recv_cq));
+
+ hns_roce_free_qp_buf(qp, ctx);
+
++ if (pad)
++ atomic_fetch_sub(&pad->pd.refcount, 1);
++
+ free(qp);
+
++ if (ctx->dca_ctx.mem_cnt > 0)
++ hns_roce_shrink_dca_mem(ctx);
++
+ return ret;
+ }
+
+@@ -1725,10 +2093,20 @@ static void update_srq_db(struct hns_roce_context *ctx, struct hns_roce_db *db,
+ hr_reg_write(db, DB_CMD, HNS_ROCE_V2_SRQ_DB);
+ hr_reg_write(db, DB_PI, srq->idx_que.head);
+
+- hns_roce_write64(ctx->uar + ROCEE_VF_DB_CFG0_OFFSET,
++ hns_roce_write64(ctx, ctx->uar + ROCEE_VF_DB_CFG0_OFFSET,
+ (__le32 *)db);
+ }
+
++static int check_srq_recv(struct hns_roce_context *ctx)
++{
++ if (hns_roce_reseted(ctx)) {
++ verbs_err_datapath(&ctx->ibv_ctx,
++ "srq failed to recv, device has been reseted!\n");
++ return EIO;
++ }
++ return 0;
++}
++
+ static int hns_roce_u_v2_post_srq_recv(struct ibv_srq *ib_srq,
+ struct ibv_recv_wr *wr,
+ struct ibv_recv_wr **bad_wr)
+@@ -1740,7 +2118,13 @@ static int hns_roce_u_v2_post_srq_recv(struct ibv_srq *ib_srq,
+ int ret = 0;
+ void *wqe;
+
+- pthread_spin_lock(&srq->lock);
++ ret = check_srq_recv(ctx);
++ if (ret) {
++ *bad_wr = wr;
++ return ret;
++ }
++
++ hns_roce_spin_lock(&srq->hr_lock);
+
+ max_sge = srq->max_gs - srq->rsv_sge;
+ for (nreq = 0; wr; ++nreq, wr = wr->next) {
+@@ -1779,7 +2163,7 @@ static int hns_roce_u_v2_post_srq_recv(struct ibv_srq *ib_srq,
+ update_srq_db(ctx, &srq_db, srq);
+ }
+
+- pthread_spin_unlock(&srq->lock);
++ hns_roce_spin_unlock(&srq->hr_lock);
+
+ return ret;
+ }
+@@ -1795,11 +2179,18 @@ static int wc_start_poll_cq(struct ibv_cq_ex *current,
+ if (attr->comp_mask)
+ return EINVAL;
+
+- pthread_spin_lock(&cq->lock);
++ hns_roce_spin_lock(&cq->hr_lock);
++
++ if (unlikely(hns_roce_reseted(ctx))) {
++ err = hns_roce_poll_one_swc(cq, NULL);
++ goto start_poll_done;
++ }
+
+ err = hns_roce_poll_one(ctx, &qp, cq, NULL);
++
++start_poll_done:
+ if (err != V2_CQ_OK)
+- pthread_spin_unlock(&cq->lock);
++ hns_roce_spin_unlock(&cq->hr_lock);
+
+ return err;
+ }
+@@ -1811,6 +2202,9 @@ static int wc_next_poll_cq(struct ibv_cq_ex *current)
+ struct hns_roce_qp *qp = NULL;
+ int err;
+
++ if (unlikely(hns_roce_reseted(ctx)))
++ return hns_roce_poll_one_swc(cq, NULL);
++
+ err = hns_roce_poll_one(ctx, &qp, cq, NULL);
+ if (err != V2_CQ_OK)
+ return err;
+@@ -1828,12 +2222,16 @@ static void wc_end_poll_cq(struct ibv_cq_ex *current)
+ struct hns_roce_cq *cq = to_hr_cq(ibv_cq_ex_to_cq(current));
+ struct hns_roce_context *ctx = to_hr_ctx(current->context);
+
++ if (unlikely(hns_roce_reseted(ctx)))
++ goto end_poll_done;
++
+ if (cq->flags & HNS_ROCE_CQ_FLAG_RECORD_DB)
+ *cq->db = cq->cons_index & RECORD_DB_CI_MASK;
+ else
+ update_cq_db(ctx, cq);
+
+- pthread_spin_unlock(&cq->lock);
++end_poll_done:
++ hns_roce_spin_unlock(&cq->hr_lock);
+ }
+
+ static enum ibv_wc_opcode wc_read_opcode(struct ibv_cq_ex *current)
+@@ -2096,8 +2494,6 @@ static void wr_set_sge_list_rc(struct ibv_qp_ex *ibv_qp, size_t num_sge,
+
+ wqe->msg_len = htole32(qp->sge_info.total_len);
+ hr_reg_write(wqe, RCWQE_SGE_NUM, qp->sge_info.valid_num);
+-
+- enable_wqe(qp, wqe, qp->sq.head);
+ }
+
+ static void wr_send_rc(struct ibv_qp_ex *ibv_qp)
+@@ -2236,8 +2632,8 @@ static void set_inline_data_list_rc(struct hns_roce_qp *qp,
+ {
+ unsigned int msg_len = qp->sge_info.total_len;
+ void *dseg;
++ size_t i;
+ int ret;
+- int i;
+
+ hr_reg_enable(wqe, RCWQE_INLINE);
+
+@@ -2289,7 +2685,6 @@ static void wr_set_inline_data_rc(struct ibv_qp_ex *ibv_qp, void *addr,
+
+ qp->sge_info.total_len = length;
+ set_inline_data_list_rc(qp, wqe, 1, &buff);
+- enable_wqe(qp, wqe, qp->sq.head);
+ }
+
+ static void wr_set_inline_data_list_rc(struct ibv_qp_ex *ibv_qp, size_t num_buf,
+@@ -2297,7 +2692,7 @@ static void wr_set_inline_data_list_rc(struct ibv_qp_ex *ibv_qp, size_t num_buf,
+ {
+ struct hns_roce_qp *qp = to_hr_qp(&ibv_qp->qp_base);
+ struct hns_roce_rc_sq_wqe *wqe = qp->cur_wqe;
+- int i;
++ size_t i;
+
+ if (!wqe)
+ return;
+@@ -2307,7 +2702,6 @@ static void wr_set_inline_data_list_rc(struct ibv_qp_ex *ibv_qp, size_t num_buf,
+ qp->sge_info.total_len += buf_list[i].length;
+
+ set_inline_data_list_rc(qp, wqe, num_buf, buf_list);
+- enable_wqe(qp, wqe, qp->sq.head);
+ }
+
+ static struct hns_roce_ud_sq_wqe *
+@@ -2428,7 +2822,7 @@ static void wr_set_sge_list_ud(struct ibv_qp_ex *ibv_qp, size_t num_sge,
+ }
+
+ hr_reg_write(wqe, UDWQE_MSG_START_SGE_IDX, sge_idx & mask);
+- for (int i = 0; i < num_sge; i++) {
++ for (size_t i = 0; i < num_sge; i++) {
+ if (!sg_list[i].length)
+ continue;
+
+@@ -2444,7 +2838,6 @@ static void wr_set_sge_list_ud(struct ibv_qp_ex *ibv_qp, size_t num_sge,
+ hr_reg_write(wqe, UDWQE_SGE_NUM, cnt);
+
+ qp->sge_info.start_idx += cnt;
+- enable_wqe(qp, wqe, qp->sq.head);
+ }
+
+ static void set_inline_data_list_ud(struct hns_roce_qp *qp,
+@@ -2455,8 +2848,8 @@ static void set_inline_data_list_ud(struct hns_roce_qp *qp,
+ uint8_t data[HNS_ROCE_MAX_UD_INL_INN_SZ] = {};
+ unsigned int msg_len = qp->sge_info.total_len;
+ void *tmp;
++ size_t i;
+ int ret;
+- int i;
+
+ if (!check_inl_data_len(qp, msg_len)) {
+ qp->err = EINVAL;
+@@ -2510,7 +2903,6 @@ static void wr_set_inline_data_ud(struct ibv_qp_ex *ibv_qp, void *addr,
+
+ qp->sge_info.total_len = length;
+ set_inline_data_list_ud(qp, wqe, 1, &buff);
+- enable_wqe(qp, wqe, qp->sq.head);
+ }
+
+ static void wr_set_inline_data_list_ud(struct ibv_qp_ex *ibv_qp, size_t num_buf,
+@@ -2518,7 +2910,7 @@ static void wr_set_inline_data_list_ud(struct ibv_qp_ex *ibv_qp, size_t num_buf,
+ {
+ struct hns_roce_qp *qp = to_hr_qp(&ibv_qp->qp_base);
+ struct hns_roce_ud_sq_wqe *wqe = qp->cur_wqe;
+- int i;
++ size_t i;
+
+ if (!wqe)
+ return;
+@@ -2528,22 +2920,21 @@ static void wr_set_inline_data_list_ud(struct ibv_qp_ex *ibv_qp, size_t num_buf,
+ qp->sge_info.total_len += buf_list[i].length;
+
+ set_inline_data_list_ud(qp, wqe, num_buf, buf_list);
+- enable_wqe(qp, wqe, qp->sq.head);
+ }
+
+ static void wr_start(struct ibv_qp_ex *ibv_qp)
+ {
++ struct hns_roce_context *ctx = to_hr_ctx(ibv_qp->qp_base.context);
+ struct hns_roce_qp *qp = to_hr_qp(&ibv_qp->qp_base);
+- enum ibv_qp_state state = ibv_qp->qp_base.state;
++ int ret;
+
+- if (state == IBV_QPS_RESET ||
+- state == IBV_QPS_INIT ||
+- state == IBV_QPS_RTR) {
+- qp->err = EINVAL;
++ ret = check_qp_send(qp, ctx);
++ if (ret) {
++ qp->err = ret;
+ return;
+ }
+
+- pthread_spin_lock(&qp->sq.lock);
++ hns_roce_spin_lock(&qp->sq.hr_lock);
+ qp->sge_info.start_idx = qp->next_sge;
+ qp->rb_sq_head = qp->sq.head;
+ qp->err = 0;
+@@ -2576,7 +2967,11 @@ static int wr_complete(struct ibv_qp_ex *ibv_qp)
+ }
+
+ out:
+- pthread_spin_unlock(&qp->sq.lock);
++ hns_roce_spin_unlock(&qp->sq.hr_lock);
++
++ if (check_dca_detach_enable(qp))
++ hns_roce_dca_stop_post(&ctx->dca_ctx, qp->dca_wqe.dcan);
++
+ if (ibv_qp->qp_base.state == IBV_QPS_ERR) {
+ attr.qp_state = IBV_QPS_ERR;
+ hns_roce_u_v2_modify_qp(&ibv_qp->qp_base, &attr, IBV_QP_STATE);
+@@ -2591,7 +2986,7 @@ static void wr_abort(struct ibv_qp_ex *ibv_qp)
+
+ qp->sq.head = qp->rb_sq_head;
+
+- pthread_spin_unlock(&qp->sq.lock);
++ hns_roce_spin_unlock(&qp->sq.hr_lock);
+ }
+
+ enum {
+diff --git a/providers/hns/hns_roce_u_hw_v2.h b/providers/hns/hns_roce_u_hw_v2.h
+index abf9467..50a920f 100644
+--- a/providers/hns/hns_roce_u_hw_v2.h
++++ b/providers/hns/hns_roce_u_hw_v2.h
+@@ -237,6 +237,8 @@ struct hns_roce_rc_sq_wqe {
+ #define RCWQE_MW_RR_EN RCWQE_FIELD_LOC(259, 259)
+ #define RCWQE_MW_RW_EN RCWQE_FIELD_LOC(260, 260)
+
++#define RCWQE_SQPN_L_WIDTH 2
++
+ struct hns_roce_v2_wqe_data_seg {
+ __le32 len;
+ __le32 lkey;
+@@ -344,5 +346,7 @@ void hns_roce_v2_clear_qp(struct hns_roce_context *ctx, struct hns_roce_qp *qp);
+ void hns_roce_attach_cq_ex_ops(struct ibv_cq_ex *cq_ex, uint64_t wc_flags);
+ int hns_roce_attach_qp_ex_ops(struct ibv_qp_init_attr_ex *attr,
+ struct hns_roce_qp *qp);
++void hns_roce_lock_cqs(struct hns_roce_cq *send_cq, struct hns_roce_cq *recv_cq);
++void hns_roce_unlock_cqs(struct hns_roce_cq *send_cq, struct hns_roce_cq *recv_cq);
+
+ #endif /* _HNS_ROCE_U_HW_V2_H */
+diff --git a/providers/hns/hns_roce_u_verbs.c b/providers/hns/hns_roce_u_verbs.c
+index 34f7ee4..e30880c 100644
+--- a/providers/hns/hns_roce_u_verbs.c
++++ b/providers/hns/hns_roce_u_verbs.c
+@@ -33,6 +33,7 @@
+ #include
+ #include
+ #include
++#include
+ #include
+ #include
+ #include
+@@ -42,6 +43,37 @@
+ #include "hns_roce_u_db.h"
+ #include "hns_roce_u_hw_v2.h"
+
++static bool hns_roce_whether_need_lock(struct ibv_pd *pd)
++{
++ struct hns_roce_pad *pad;
++
++ pad = to_hr_pad(pd);
++ if (pad && pad->td)
++ return false;
++
++ return true;
++}
++
++static int hns_roce_spinlock_init(struct hns_roce_spinlock *hr_lock,
++ bool need_lock)
++{
++ hr_lock->need_lock = need_lock;
++
++ if (need_lock)
++ return pthread_spin_init(&hr_lock->lock,
++ PTHREAD_PROCESS_PRIVATE);
++
++ return 0;
++}
++
++static int hns_roce_spinlock_destroy(struct hns_roce_spinlock *hr_lock)
++{
++ if (hr_lock->need_lock)
++ return pthread_spin_destroy(&hr_lock->lock);
++
++ return 0;
++}
++
+ void hns_roce_init_qp_indices(struct hns_roce_qp *qp)
+ {
+ qp->sq.head = 0;
+@@ -85,38 +117,138 @@ int hns_roce_u_query_port(struct ibv_context *context, uint8_t port,
+ return ibv_cmd_query_port(context, port, attr, &cmd, sizeof(cmd));
+ }
+
++struct ibv_td *hns_roce_u_alloc_td(struct ibv_context *context,
++ struct ibv_td_init_attr *attr)
++{
++ struct hns_roce_td *td;
++
++ if (attr->comp_mask) {
++ errno = EOPNOTSUPP;
++ return NULL;
++ }
++
++ td = calloc(1, sizeof(*td));
++ if (!td) {
++ errno = ENOMEM;
++ return NULL;
++ }
++
++ td->ibv_td.context = context;
++ atomic_init(&td->refcount, 1);
++
++ return &td->ibv_td;
++}
++
++int hns_roce_u_dealloc_td(struct ibv_td *ibv_td)
++{
++ struct hns_roce_td *td;
++
++ td = to_hr_td(ibv_td);
++ if (atomic_load(&td->refcount) > 1)
++ return EBUSY;
++
++ free(td);
++
++ return 0;
++}
++
+ struct ibv_pd *hns_roce_u_alloc_pd(struct ibv_context *context)
+ {
++ struct hns_roce_alloc_pd_resp resp = {};
+ struct ibv_alloc_pd cmd;
+ struct hns_roce_pd *pd;
+- struct hns_roce_alloc_pd_resp resp = {};
+-
+- pd = malloc(sizeof(*pd));
+- if (!pd)
+- return NULL;
+
+- if (ibv_cmd_alloc_pd(context, &pd->ibv_pd, &cmd, sizeof(cmd),
+- &resp.ibv_resp, sizeof(resp))) {
+- free(pd);
++ pd = calloc(1, sizeof(*pd));
++ if (!pd) {
++ errno = ENOMEM;
+ return NULL;
+ }
++ errno = ibv_cmd_alloc_pd(context, &pd->ibv_pd, &cmd, sizeof(cmd),
++ &resp.ibv_resp, sizeof(resp));
++
++ if (errno)
++ goto err;
+
++ atomic_init(&pd->refcount, 1);
+ pd->pdn = resp.pdn;
+
+ return &pd->ibv_pd;
++
++err:
++ free(pd);
++ return NULL;
+ }
+
+-int hns_roce_u_free_pd(struct ibv_pd *pd)
++struct ibv_pd *hns_roce_u_alloc_pad(struct ibv_context *context,
++ struct ibv_parent_domain_init_attr *attr)
++{
++ struct hns_roce_pad *pad;
++
++ if (ibv_check_alloc_parent_domain(attr))
++ return NULL;
++
++ if (attr->comp_mask) {
++ errno = EOPNOTSUPP;
++ return NULL;
++ }
++
++ pad = calloc(1, sizeof(*pad));
++ if (!pad) {
++ errno = ENOMEM;
++ return NULL;
++ }
++
++ if (attr->td) {
++ pad->td = to_hr_td(attr->td);
++ atomic_fetch_add(&pad->td->refcount, 1);
++ }
++
++ pad->pd.protection_domain = to_hr_pd(attr->pd);
++ atomic_fetch_add(&pad->pd.protection_domain->refcount, 1);
++
++ atomic_init(&pad->pd.refcount, 1);
++ ibv_initialize_parent_domain(&pad->pd.ibv_pd,
++ &pad->pd.protection_domain->ibv_pd);
++
++ return &pad->pd.ibv_pd;
++}
++
++static void hns_roce_free_pad(struct hns_roce_pad *pad)
++{
++ atomic_fetch_sub(&pad->pd.protection_domain->refcount, 1);
++
++ if (pad->td)
++ atomic_fetch_sub(&pad->td->refcount, 1);
++
++ free(pad);
++}
++
++static int hns_roce_free_pd(struct hns_roce_pd *pd)
+ {
+ int ret;
+
+- ret = ibv_cmd_dealloc_pd(pd);
++ if (atomic_load(&pd->refcount) > 1)
++ return EBUSY;
++
++ ret = ibv_cmd_dealloc_pd(&pd->ibv_pd);
+ if (ret)
+ return ret;
+
+- free(to_hr_pd(pd));
++ free(pd);
++ return 0;
++}
+
+- return ret;
++int hns_roce_u_dealloc_pd(struct ibv_pd *ibv_pd)
++{
++ struct hns_roce_pad *pad = to_hr_pad(ibv_pd);
++ struct hns_roce_pd *pd = to_hr_pd(ibv_pd);
++
++ if (pad) {
++ hns_roce_free_pad(pad);
++ return 0;
++ }
++
++ return hns_roce_free_pd(pd);
+ }
+
+ struct ibv_xrcd *hns_roce_u_open_xrcd(struct ibv_context *context,
+@@ -275,6 +407,11 @@ int hns_roce_u_dealloc_mw(struct ibv_mw *mw)
+ return 0;
+ }
+
++enum {
++ CREATE_CQ_SUPPORTED_COMP_MASK = IBV_CQ_INIT_ATTR_MASK_FLAGS |
++ IBV_CQ_INIT_ATTR_MASK_PD,
++};
++
+ enum {
+ CREATE_CQ_SUPPORTED_WC_FLAGS = IBV_WC_STANDARD_FLAGS |
+ IBV_WC_EX_WITH_CVLAN,
+@@ -283,21 +420,60 @@ enum {
+ static int verify_cq_create_attr(struct ibv_cq_init_attr_ex *attr,
+ struct hns_roce_context *context)
+ {
++ struct hns_roce_pad *pad = to_hr_pad(attr->parent_domain);
++
+ if (!attr->cqe || attr->cqe > context->max_cqe)
+ return EINVAL;
+
+- if (attr->comp_mask)
++ if (!check_comp_mask(attr->comp_mask, CREATE_CQ_SUPPORTED_COMP_MASK)) {
++ verbs_err(&context->ibv_ctx, "unsupported cq comps 0x%x\n",
++ attr->comp_mask);
+ return EOPNOTSUPP;
++ }
+
+ if (!check_comp_mask(attr->wc_flags, CREATE_CQ_SUPPORTED_WC_FLAGS))
+ return EOPNOTSUPP;
+
++ if (attr->comp_mask & IBV_CQ_INIT_ATTR_MASK_PD) {
++ if (!pad) {
++ verbs_err(&context->ibv_ctx, "failed to check the pad of cq.\n");
++ return EINVAL;
++ }
++ atomic_fetch_add(&pad->pd.refcount, 1);
++ }
++
+ attr->cqe = max_t(uint32_t, HNS_ROCE_MIN_CQE_NUM,
+ roundup_pow_of_two(attr->cqe));
+
+ return 0;
+ }
+
++static int hns_roce_cq_spinlock_init(struct ibv_context *context,
++ struct hns_roce_cq *cq,
++ struct ibv_cq_init_attr_ex *attr)
++{
++ bool need_lock;
++
++ need_lock = hns_roce_whether_need_lock(attr->parent_domain);
++ if (!need_lock)
++ verbs_info(verbs_get_ctx(context), "configure cq as no lock.\n");
++
++ return hns_roce_spinlock_init(&cq->hr_lock, need_lock);
++}
++
++static int hns_roce_srq_spinlock_init(struct ibv_context *context,
++ struct hns_roce_srq *srq,
++ struct ibv_srq_init_attr_ex *attr)
++{
++ bool need_lock;
++
++ need_lock = hns_roce_whether_need_lock(attr->pd);
++ if (!need_lock)
++ verbs_info(verbs_get_ctx(context), "configure srq as no lock.\n");
++
++ return hns_roce_spinlock_init(&srq->hr_lock, need_lock);
++}
++
+ static int hns_roce_alloc_cq_buf(struct hns_roce_cq *cq)
+ {
+ int buf_size = hr_hw_page_align(cq->cq_depth * cq->cqe_size);
+@@ -337,6 +513,32 @@ static int exec_cq_create_cmd(struct ibv_context *context,
+ return 0;
+ }
+
++static int hns_roce_init_cq_swc(struct hns_roce_cq *cq,
++ struct ibv_cq_init_attr_ex *attr)
++{
++ list_head_init(&cq->list_sq);
++ list_head_init(&cq->list_rq);
++ list_head_init(&cq->list_srq);
++ list_head_init(&cq->list_xrc_srq);
++
++ if (!(attr->wc_flags & CREATE_CQ_SUPPORTED_WC_FLAGS))
++ return 0;
++
++ cq->sw_cqe = calloc(1, sizeof(struct hns_roce_v2_cqe));
++ if (!cq->sw_cqe)
++ return -ENOMEM;
++
++ return 0;
++}
++
++static void hns_roce_uninit_cq_swc(struct hns_roce_cq *cq)
++{
++ if (cq->sw_cqe) {
++ free(cq->sw_cqe);
++ cq->sw_cqe = NULL;
++ }
++}
++
+ static struct ibv_cq_ex *create_cq(struct ibv_context *context,
+ struct ibv_cq_init_attr_ex *attr)
+ {
+@@ -354,7 +556,10 @@ static struct ibv_cq_ex *create_cq(struct ibv_context *context,
+ goto err;
+ }
+
+- ret = pthread_spin_init(&cq->lock, PTHREAD_PROCESS_PRIVATE);
++ if (attr->comp_mask & IBV_CQ_INIT_ATTR_MASK_PD)
++ cq->parent_domain = attr->parent_domain;
++
++ ret = hns_roce_cq_spinlock_init(context, cq, attr);
+ if (ret)
+ goto err_lock;
+
+@@ -371,7 +576,9 @@ static struct ibv_cq_ex *create_cq(struct ibv_context *context,
+ goto err_db;
+ }
+
+- *cq->db = 0;
++ ret = hns_roce_init_cq_swc(cq, attr);
++ if (ret)
++ goto err_swc;
+
+ ret = exec_cq_create_cmd(context, cq, attr);
+ if (ret)
+@@ -382,11 +589,14 @@ static struct ibv_cq_ex *create_cq(struct ibv_context *context,
+ return &cq->verbs_cq.cq_ex;
+
+ err_cmd:
++ hns_roce_uninit_cq_swc(cq);
++err_swc:
+ hns_roce_free_db(hr_ctx, cq->db, HNS_ROCE_CQ_TYPE_DB);
+ err_db:
+ hns_roce_free_buf(&cq->buf);
+-err_lock:
+ err_buf:
++ hns_roce_spinlock_destroy(&cq->hr_lock);
++err_lock:
+ free(cq);
+ err:
+ if (ret < 0)
+@@ -437,16 +647,25 @@ int hns_roce_u_modify_cq(struct ibv_cq *cq, struct ibv_modify_cq_attr *attr)
+
+ int hns_roce_u_destroy_cq(struct ibv_cq *cq)
+ {
++ struct hns_roce_cq *hr_cq = to_hr_cq(cq);
++ struct hns_roce_pad *pad = to_hr_pad(hr_cq->parent_domain);
+ int ret;
+
+ ret = ibv_cmd_destroy_cq(cq);
+ if (ret)
+ return ret;
+
+- hns_roce_free_db(to_hr_ctx(cq->context), to_hr_cq(cq)->db,
+- HNS_ROCE_CQ_TYPE_DB);
+- hns_roce_free_buf(&to_hr_cq(cq)->buf);
+- free(to_hr_cq(cq));
++ hns_roce_uninit_cq_swc(to_hr_cq(cq));
++
++ hns_roce_free_db(to_hr_ctx(cq->context), hr_cq->db, HNS_ROCE_CQ_TYPE_DB);
++ hns_roce_free_buf(&hr_cq->buf);
++
++ hns_roce_spinlock_destroy(&hr_cq->hr_lock);
++
++ if (pad)
++ atomic_fetch_sub(&pad->pd.refcount, 1);
++
++ free(hr_cq);
+
+ return ret;
+ }
+@@ -652,10 +871,27 @@ static int exec_srq_create_cmd(struct ibv_context *context,
+ return 0;
+ }
+
++static void init_srq_cq_list(struct hns_roce_srq *srq,
++ struct ibv_srq_init_attr_ex *init_attr)
++{
++ struct hns_roce_cq *srq_cq;
++
++ list_node_init(&srq->xrc_srcq_node);
++
++ if (!init_attr->cq)
++ return;
++
++ srq_cq = to_hr_cq(init_attr->cq);
++ hns_roce_spin_lock(&srq_cq->hr_lock);
++ list_add_tail(&srq_cq->list_xrc_srq, &srq->xrc_srcq_node);
++ hns_roce_spin_unlock(&srq_cq->hr_lock);
++}
++
+ static struct ibv_srq *create_srq(struct ibv_context *context,
+ struct ibv_srq_init_attr_ex *init_attr)
+ {
+ struct hns_roce_context *hr_ctx = to_hr_ctx(context);
++ struct hns_roce_pad *pad = to_hr_pad(init_attr->pd);
+ struct hns_roce_srq *srq;
+ int ret;
+
+@@ -669,19 +905,20 @@ static struct ibv_srq *create_srq(struct ibv_context *context,
+ goto err;
+ }
+
+- if (pthread_spin_init(&srq->lock, PTHREAD_PROCESS_PRIVATE))
++ if (pad)
++ atomic_fetch_add(&pad->pd.refcount, 1);
++
++ if (hns_roce_srq_spinlock_init(context, srq, init_attr))
+ goto err_free_srq;
+
+ set_srq_param(context, srq, init_attr);
+ if (alloc_srq_buf(srq))
+- goto err_free_srq;
++ goto err_destroy_lock;
+
+ srq->rdb = hns_roce_alloc_db(hr_ctx, HNS_ROCE_SRQ_TYPE_DB);
+ if (!srq->rdb)
+ goto err_srq_buf;
+
+- *srq->rdb = 0;
+-
+ ret = exec_srq_create_cmd(context, srq, init_attr);
+ if (ret)
+ goto err_srq_db;
+@@ -694,6 +931,8 @@ static struct ibv_srq *create_srq(struct ibv_context *context,
+ init_attr->attr.max_sge =
+ min(init_attr->attr.max_sge - srq->rsv_sge, hr_ctx->max_srq_sge);
+
++ init_srq_cq_list(srq, init_attr);
++
+ return &srq->verbs_srq.srq;
+
+ err_destroy_srq:
+@@ -705,6 +944,9 @@ err_srq_db:
+ err_srq_buf:
+ free_srq_buf(srq);
+
++err_destroy_lock:
++ hns_roce_spinlock_destroy(&srq->hr_lock);
++
+ err_free_srq:
+ free(srq);
+
+@@ -766,12 +1008,27 @@ int hns_roce_u_query_srq(struct ibv_srq *srq, struct ibv_srq_attr *srq_attr)
+ return ret;
+ }
+
++static void del_srq_from_cq_list(struct hns_roce_srq *srq)
++{
++ struct hns_roce_cq *srq_cq = to_hr_cq(srq->verbs_srq.cq);
++
++ if (!srq_cq)
++ return;
++
++ hns_roce_spin_lock(&srq_cq->hr_lock);
++ list_del(&srq->xrc_srcq_node);
++ hns_roce_spin_unlock(&srq_cq->hr_lock);
++}
++
+ int hns_roce_u_destroy_srq(struct ibv_srq *ibv_srq)
+ {
+ struct hns_roce_context *ctx = to_hr_ctx(ibv_srq->context);
++ struct hns_roce_pad *pad = to_hr_pad(ibv_srq->pd);
+ struct hns_roce_srq *srq = to_hr_srq(ibv_srq);
+ int ret;
+
++ del_srq_from_cq_list(srq);
++
+ ret = ibv_cmd_destroy_srq(ibv_srq);
+ if (ret)
+ return ret;
+@@ -780,16 +1037,51 @@ int hns_roce_u_destroy_srq(struct ibv_srq *ibv_srq)
+
+ hns_roce_free_db(ctx, srq->rdb, HNS_ROCE_SRQ_TYPE_DB);
+ free_srq_buf(srq);
++
++ hns_roce_spinlock_destroy(&srq->hr_lock);
++
++ if (pad)
++ atomic_fetch_sub(&pad->pd.refcount, 1);
++
+ free(srq);
+
+ return 0;
+ }
+
++enum {
++ HNSDV_QP_SUP_COMP_MASK = HNSDV_QP_INIT_ATTR_MASK_QP_CREATE_FLAGS |
++ HNSDV_QP_INIT_ATTR_MASK_QP_CONGEST_TYPE,
++};
++
++static int check_hnsdv_qp_attr(struct hns_roce_context *ctx,
++ struct hnsdv_qp_init_attr *hns_attr)
++{
++ if (!hns_attr)
++ return 0;
++
++ if (!check_comp_mask(hns_attr->comp_mask, HNSDV_QP_SUP_COMP_MASK)) {
++ verbs_err(&ctx->ibv_ctx, "invalid hnsdv comp_mask 0x%x.\n",
++ hns_attr->comp_mask);
++ return EINVAL;
++ }
++
++ return 0;
++}
++
+ enum {
+ CREATE_QP_SUP_COMP_MASK = IBV_QP_INIT_ATTR_PD | IBV_QP_INIT_ATTR_XRCD |
+ IBV_QP_INIT_ATTR_SEND_OPS_FLAGS,
+ };
+
++enum {
++ SEND_OPS_FLAG_MASK =
++ IBV_QP_EX_WITH_RDMA_WRITE | IBV_QP_EX_WITH_RDMA_WRITE_WITH_IMM |
++ IBV_QP_EX_WITH_SEND | IBV_QP_EX_WITH_SEND_WITH_IMM |
++ IBV_QP_EX_WITH_RDMA_READ | IBV_QP_EX_WITH_ATOMIC_CMP_AND_SWP |
++ IBV_QP_EX_WITH_ATOMIC_FETCH_AND_ADD | IBV_QP_EX_WITH_LOCAL_INV |
++ IBV_QP_EX_WITH_SEND_WITH_INV,
++};
++
+ static int check_qp_create_mask(struct hns_roce_context *ctx,
+ struct ibv_qp_init_attr_ex *attr)
+ {
+@@ -798,6 +1090,10 @@ static int check_qp_create_mask(struct hns_roce_context *ctx,
+ if (!check_comp_mask(attr->comp_mask, CREATE_QP_SUP_COMP_MASK))
+ return EOPNOTSUPP;
+
++ if (attr->comp_mask & IBV_QP_INIT_ATTR_SEND_OPS_FLAGS &&
++ !check_comp_mask(attr->send_ops_flags, SEND_OPS_FLAG_MASK))
++ return -EOPNOTSUPP;
++
+ switch (attr->qp_type) {
+ case IBV_QPT_UD:
+ if (hr_dev->hw_version == HNS_ROCE_HW_VER2)
+@@ -866,7 +1162,8 @@ static int verify_qp_create_cap(struct hns_roce_context *ctx,
+ }
+
+ static int verify_qp_create_attr(struct hns_roce_context *ctx,
+- struct ibv_qp_init_attr_ex *attr)
++ struct ibv_qp_init_attr_ex *attr,
++ struct hnsdv_qp_init_attr *hns_attr)
+ {
+ int ret;
+
+@@ -874,9 +1171,48 @@ static int verify_qp_create_attr(struct hns_roce_context *ctx,
+ if (ret)
+ return ret;
+
++ ret = check_hnsdv_qp_attr(ctx, hns_attr);
++ if (ret)
++ return ret;
++
+ return verify_qp_create_cap(ctx, attr);
+ }
+
++static int hns_roce_qp_spinlock_init(struct hns_roce_context *ctx,
++ struct ibv_qp_init_attr_ex *attr,
++ struct hns_roce_qp *qp)
++{
++ bool sq_need_lock;
++ bool rq_need_lock;
++ int ret;
++
++ sq_need_lock = hns_roce_whether_need_lock(attr->pd);
++ if (!sq_need_lock)
++ verbs_info(&ctx->ibv_ctx, "configure sq as no lock.\n");
++
++ rq_need_lock = hns_roce_whether_need_lock(attr->pd);
++ if (!rq_need_lock)
++ verbs_info(&ctx->ibv_ctx, "configure rq as no lock.\n");
++
++ ret = hns_roce_spinlock_init(&qp->sq.hr_lock, sq_need_lock);
++ if (ret)
++ return ret;
++
++ ret = hns_roce_spinlock_init(&qp->rq.hr_lock, rq_need_lock);
++ if (ret) {
++ hns_roce_spinlock_destroy(&qp->sq.hr_lock);
++ return ret;
++ }
++
++ return 0;
++}
++
++void hns_roce_qp_spinlock_destroy(struct hns_roce_qp *qp)
++{
++ hns_roce_spinlock_destroy(&qp->rq.hr_lock);
++ hns_roce_spinlock_destroy(&qp->sq.hr_lock);
++}
++
+ static int alloc_recv_rinl_buf(uint32_t max_sge,
+ struct hns_roce_rinl_buf *rinl_buf)
+ {
+@@ -918,31 +1254,73 @@ static void free_recv_rinl_buf(struct hns_roce_rinl_buf *rinl_buf)
+ }
+ }
+
++static void get_best_multi_region_pg_shift(struct hns_roce_device *hr_dev,
++ struct hns_roce_context *ctx,
++ struct hns_roce_qp *qp)
++{
++ uint32_t ext_sge_size;
++ uint32_t sq_size;
++ uint32_t rq_size;
++ uint8_t pg_shift;
++
++ if (!(ctx->config & HNS_ROCE_UCTX_RSP_DYN_QP_PGSZ)) {
++ qp->pageshift = HNS_HW_PAGE_SHIFT;
++ return;
++ }
++
++ /*
++ * The larger the pagesize used, the better the performance, but it
++ * may waste more memory. Therefore, we use the least common multiple
++ * and ext_sge buffer size as the pagesize. Additionally, since the
++ * (aligned to power of 2) of sq wqe buffer size and rq wqe buffer
++ * size as the pagesize. And the wqe buffer page cannot be larger
++ * than the buffer size used by extend sge. Additionally, since the
++ * kernel cannot guarantee the allocation of contiguous memory larger
++ * than the system page, the pagesize must be smaller than the system
++ * page.
++ */
++ sq_size = qp->sq.wqe_cnt << qp->sq.wqe_shift;
++ ext_sge_size = qp->ex_sge.sge_cnt << qp->ex_sge.sge_shift;
++ rq_size = qp->rq.wqe_cnt << qp->rq.wqe_shift;
++
++ pg_shift = max_t(uint8_t, sq_size ? hr_ilog32(sq_size) : 0,
++ rq_size ? hr_ilog32(rq_size) : 0);
++ pg_shift = ext_sge_size ?
++ min_t(uint8_t, pg_shift, hr_ilog32(ext_sge_size)) :
++ pg_shift;
++ pg_shift = max_t(uint8_t, pg_shift, HNS_HW_PAGE_SHIFT);
++ qp->pageshift = min_t(uint8_t, pg_shift, hr_ilog32(hr_dev->page_size));
++}
++
+ static int calc_qp_buff_size(struct hns_roce_device *hr_dev,
++ struct hns_roce_context *ctx,
+ struct hns_roce_qp *qp)
+ {
+ struct hns_roce_wq *sq = &qp->sq;
+ struct hns_roce_wq *rq = &qp->rq;
++ unsigned int page_size;
+ unsigned int size;
+
+ qp->buf_size = 0;
++ get_best_multi_region_pg_shift(hr_dev, ctx, qp);
++ page_size = 1 << qp->pageshift;
+
+ /* SQ WQE */
+ sq->offset = 0;
+- size = to_hr_hem_entries_size(sq->wqe_cnt, sq->wqe_shift);
++ size = align(sq->wqe_cnt << sq->wqe_shift, page_size);
+ qp->buf_size += size;
+
+ /* extend SGE WQE in SQ */
+ qp->ex_sge.offset = qp->buf_size;
+ if (qp->ex_sge.sge_cnt > 0) {
+- size = to_hr_hem_entries_size(qp->ex_sge.sge_cnt,
+- qp->ex_sge.sge_shift);
++ size = align(qp->ex_sge.sge_cnt << qp->ex_sge.sge_shift,
++ page_size);
+ qp->buf_size += size;
+ }
+
+ /* RQ WQE */
+ rq->offset = qp->buf_size;
+- size = to_hr_hem_entries_size(rq->wqe_cnt, rq->wqe_shift);
++ size = align(rq->wqe_cnt << rq->wqe_shift, page_size);
+ qp->buf_size += size;
+
+ if (qp->buf_size < 1)
+@@ -951,6 +1329,26 @@ static int calc_qp_buff_size(struct hns_roce_device *hr_dev,
+ return 0;
+ }
+
++static inline bool check_qp_support_dca(struct hns_roce_dca_ctx *dca_ctx,
++ struct ibv_qp_init_attr_ex *attr,
++ struct hnsdv_qp_init_attr *hns_attr)
++{
++ /* DCA pool disable */
++ if (!dca_ctx->unit_size)
++ return false;
++
++ /* Unsupport type */
++ if (attr->qp_type != IBV_QPT_RC && attr->qp_type != IBV_QPT_XRC_SEND)
++ return false;
++
++ if (hns_attr &&
++ (hns_attr->comp_mask & HNSDV_QP_INIT_ATTR_MASK_QP_CREATE_FLAGS) &&
++ (hns_attr->create_flags & HNSDV_QP_CREATE_ENABLE_DCA_MODE))
++ return true;
++
++ return false;
++}
++
+ static void qp_free_wqe(struct hns_roce_qp *qp)
+ {
+ free_recv_rinl_buf(&qp->rq_rinl_buf);
+@@ -962,12 +1360,13 @@ static void qp_free_wqe(struct hns_roce_qp *qp)
+ hns_roce_free_buf(&qp->buf);
+ }
+
+-static int qp_alloc_wqe(struct ibv_qp_cap *cap, struct hns_roce_qp *qp,
+- struct hns_roce_context *ctx)
++static int qp_alloc_wqe(struct ibv_qp_init_attr_ex *attr,
++ struct hnsdv_qp_init_attr *hns_attr,
++ struct hns_roce_qp *qp, struct hns_roce_context *ctx)
+ {
+ struct hns_roce_device *hr_dev = to_hr_dev(ctx->ibv_ctx.context.device);
+
+- if (calc_qp_buff_size(hr_dev, qp))
++ if (calc_qp_buff_size(hr_dev, ctx, qp))
+ return -EINVAL;
+
+ qp->sq.wrid = malloc(qp->sq.wqe_cnt * sizeof(uint64_t));
+@@ -981,12 +1380,26 @@ static int qp_alloc_wqe(struct ibv_qp_cap *cap, struct hns_roce_qp *qp,
+ }
+
+ if (qp->rq_rinl_buf.wqe_cnt) {
+- if (alloc_recv_rinl_buf(cap->max_recv_sge, &qp->rq_rinl_buf))
++ if (alloc_recv_rinl_buf(attr->cap.max_recv_sge,
++ &qp->rq_rinl_buf))
+ goto err_alloc;
+ }
+
+- if (hns_roce_alloc_buf(&qp->buf, qp->buf_size, HNS_HW_PAGE_SIZE))
+- goto err_alloc;
++ if (check_qp_support_dca(&ctx->dca_ctx, attr, hns_attr) &&
++ ctx->dca_ctx.max_size > 0) {
++ /* when DCA is enabled, use a buffer list to store page addr */
++ qp->buf.buf = NULL;
++ qp->dca_wqe.max_cnt = hr_hw_page_count(qp->buf_size);
++ qp->dca_wqe.shift = HNS_HW_PAGE_SHIFT;
++ qp->dca_wqe.bufs = calloc(qp->dca_wqe.max_cnt, sizeof(void *));
++ if (!qp->dca_wqe.bufs)
++ goto err_alloc;
++ verbs_debug(&ctx->ibv_ctx, "alloc DCA buf.\n");
++ } else {
++ if (hns_roce_alloc_buf(&qp->buf, qp->buf_size,
++ HNS_HW_PAGE_SIZE))
++ goto err_alloc;
++ }
+
+ return 0;
+
+@@ -1141,8 +1554,6 @@ static int qp_alloc_db(struct ibv_qp_init_attr_ex *attr, struct hns_roce_qp *qp,
+ qp->sdb = hns_roce_alloc_db(ctx, HNS_ROCE_QP_TYPE_DB);
+ if (!qp->sdb)
+ return -ENOMEM;
+-
+- *qp->sdb = 0;
+ }
+
+ if (attr->cap.max_recv_sge) {
+@@ -1154,8 +1565,6 @@ static int qp_alloc_db(struct ibv_qp_init_attr_ex *attr, struct hns_roce_qp *qp,
+
+ return -ENOMEM;
+ }
+-
+- *qp->rdb = 0;
+ }
+
+ return 0;
+@@ -1185,10 +1594,33 @@ static int hns_roce_store_qp(struct hns_roce_context *ctx,
+ return 0;
+ }
+
++static int to_cmd_cong_type(uint8_t cong_type, __u64 *cmd_cong_type)
++{
++ switch (cong_type) {
++ case HNSDV_QP_CREATE_ENABLE_DCQCN:
++ *cmd_cong_type = HNS_ROCE_CREATE_QP_FLAGS_DCQCN;
++ break;
++ case HNSDV_QP_CREATE_ENABLE_LDCP:
++ *cmd_cong_type = HNS_ROCE_CREATE_QP_FLAGS_LDCP;
++ break;
++ case HNSDV_QP_CREATE_ENABLE_HC3:
++ *cmd_cong_type = HNS_ROCE_CREATE_QP_FLAGS_HC3;
++ break;
++ case HNSDV_QP_CREATE_ENABLE_DIP:
++ *cmd_cong_type = HNS_ROCE_CREATE_QP_FLAGS_DIP;
++ break;
++ default:
++ return EINVAL;
++ }
++
++ return 0;
++}
++
+ static int qp_exec_create_cmd(struct ibv_qp_init_attr_ex *attr,
+ struct hns_roce_qp *qp,
+ struct hns_roce_context *ctx,
+- uint64_t *dwqe_mmap_key)
++ uint64_t *dwqe_mmap_key,
++ struct hnsdv_qp_init_attr *hns_attr)
+ {
+ struct hns_roce_create_qp_ex_resp resp_ex = {};
+ struct hns_roce_create_qp_ex cmd_ex = {};
+@@ -1199,6 +1631,16 @@ static int qp_exec_create_cmd(struct ibv_qp_init_attr_ex *attr,
+ cmd_ex.buf_addr = (uintptr_t)qp->buf.buf;
+ cmd_ex.log_sq_stride = qp->sq.wqe_shift;
+ cmd_ex.log_sq_bb_count = hr_ilog32(qp->sq.wqe_cnt);
++ cmd_ex.pageshift = qp->pageshift;
++
++ if (hns_attr &&
++ hns_attr->comp_mask & HNSDV_QP_INIT_ATTR_MASK_QP_CONGEST_TYPE) {
++ ret = to_cmd_cong_type(hns_attr->congest_type,
++ &cmd_ex.cong_type_flags);
++ if (ret)
++ return ret;
++ cmd_ex.comp_mask |= HNS_ROCE_CREATE_QP_MASK_CONGEST_TYPE;
++ }
+
+ ret = ibv_cmd_create_qp_ex2(&ctx->ibv_ctx.context, &qp->verbs_qp, attr,
+ &cmd_ex.ibv_cmd, sizeof(cmd_ex),
+@@ -1242,16 +1684,13 @@ void hns_roce_free_qp_buf(struct hns_roce_qp *qp, struct hns_roce_context *ctx)
+ }
+
+ static int hns_roce_alloc_qp_buf(struct ibv_qp_init_attr_ex *attr,
++ struct hnsdv_qp_init_attr *hns_attr,
+ struct hns_roce_qp *qp,
+ struct hns_roce_context *ctx)
+ {
+ int ret;
+
+- if (pthread_spin_init(&qp->sq.lock, PTHREAD_PROCESS_PRIVATE) ||
+- pthread_spin_init(&qp->rq.lock, PTHREAD_PROCESS_PRIVATE))
+- return -ENOMEM;
+-
+- ret = qp_alloc_wqe(&attr->cap, qp, ctx);
++ ret = qp_alloc_wqe(attr, hns_attr, qp, ctx);
+ if (ret)
+ return ret;
+
+@@ -1273,15 +1712,41 @@ static int mmap_dwqe(struct ibv_context *ibv_ctx, struct hns_roce_qp *qp,
+ return 0;
+ }
+
++static void add_qp_to_cq_list(struct ibv_qp_init_attr_ex *attr,
++ struct hns_roce_qp *qp)
++{
++ struct hns_roce_cq *send_cq, *recv_cq;
++
++ send_cq = attr->send_cq ? to_hr_cq(attr->send_cq) : NULL;
++ recv_cq = attr->recv_cq ? to_hr_cq(attr->recv_cq) : NULL;
++
++ list_node_init(&qp->scq_node);
++ list_node_init(&qp->rcq_node);
++ list_node_init(&qp->srcq_node);
++
++ hns_roce_lock_cqs(send_cq, recv_cq);
++ if (send_cq)
++ list_add_tail(&send_cq->list_sq, &qp->scq_node);
++ if (recv_cq) {
++ if (attr->srq)
++ list_add_tail(&recv_cq->list_srq, &qp->srcq_node);
++ else
++ list_add_tail(&recv_cq->list_rq, &qp->rcq_node);
++ }
++ hns_roce_unlock_cqs(send_cq, recv_cq);
++}
++
+ static struct ibv_qp *create_qp(struct ibv_context *ibv_ctx,
+- struct ibv_qp_init_attr_ex *attr)
++ struct ibv_qp_init_attr_ex *attr,
++ struct hnsdv_qp_init_attr *hns_attr)
+ {
+ struct hns_roce_context *context = to_hr_ctx(ibv_ctx);
++ struct hns_roce_pad *pad = to_hr_pad(attr->pd);
+ struct hns_roce_qp *qp;
+ uint64_t dwqe_mmap_key;
+ int ret;
+
+- ret = verify_qp_create_attr(context, attr);
++ ret = verify_qp_create_attr(context, attr, hns_attr);
+ if (ret)
+ goto err;
+
+@@ -1293,11 +1758,18 @@ static struct ibv_qp *create_qp(struct ibv_context *ibv_ctx,
+
+ hns_roce_set_qp_params(attr, qp, context);
+
+- ret = hns_roce_alloc_qp_buf(attr, qp, context);
++ if (pad)
++ atomic_fetch_add(&pad->pd.refcount, 1);
++
++ ret = hns_roce_qp_spinlock_init(context, attr, qp);
++ if (ret)
++ goto err_spinlock;
++
++ ret = hns_roce_alloc_qp_buf(attr, hns_attr, qp, context);
+ if (ret)
+ goto err_buf;
+
+- ret = qp_exec_create_cmd(attr, qp, context, &dwqe_mmap_key);
++ ret = qp_exec_create_cmd(attr, qp, context, &dwqe_mmap_key, hns_attr);
+ if (ret)
+ goto err_cmd;
+
+@@ -1316,6 +1788,7 @@ static struct ibv_qp *create_qp(struct ibv_context *ibv_ctx,
+ }
+
+ qp_setup_config(attr, qp, context);
++ add_qp_to_cq_list(attr, qp);
+
+ return &qp->verbs_qp.qp;
+
+@@ -1326,6 +1799,8 @@ err_ops:
+ err_cmd:
+ hns_roce_free_qp_buf(qp, context);
+ err_buf:
++ hns_roce_qp_spinlock_destroy(qp);
++err_spinlock:
+ free(qp);
+ err:
+ if (ret < 0)
+@@ -1345,7 +1820,7 @@ struct ibv_qp *hns_roce_u_create_qp(struct ibv_pd *pd,
+ attrx.comp_mask = IBV_QP_INIT_ATTR_PD;
+ attrx.pd = pd;
+
+- qp = create_qp(pd->context, &attrx);
++ qp = create_qp(pd->context, &attrx, NULL);
+ if (qp)
+ memcpy(attr, &attrx, sizeof(*attr));
+
+@@ -1355,7 +1830,44 @@ struct ibv_qp *hns_roce_u_create_qp(struct ibv_pd *pd,
+ struct ibv_qp *hns_roce_u_create_qp_ex(struct ibv_context *context,
+ struct ibv_qp_init_attr_ex *attr)
+ {
+- return create_qp(context, attr);
++ return create_qp(context, attr, NULL);
++}
++
++struct ibv_qp *hnsdv_create_qp(struct ibv_context *context,
++ struct ibv_qp_init_attr_ex *qp_attr,
++ struct hnsdv_qp_init_attr *hns_attr)
++{
++ if (!context || !qp_attr) {
++ errno = EINVAL;
++ return NULL;
++ }
++
++ if (!is_hns_dev(context->device)) {
++ errno = EOPNOTSUPP;
++ return NULL;
++ }
++
++ return create_qp(context, qp_attr, hns_attr);
++}
++
++int hnsdv_query_device(struct ibv_context *context,
++ struct hnsdv_context *attrs_out)
++{
++ struct hns_roce_device *hr_dev = to_hr_dev(context->device);
++
++ if (!hr_dev || !attrs_out)
++ return EINVAL;
++
++ if (!is_hns_dev(context->device)) {
++ verbs_err(verbs_get_ctx(context), "not a HNS RoCE device!\n");
++ return EOPNOTSUPP;
++ }
++ memset(attrs_out, 0, sizeof(*attrs_out));
++
++ attrs_out->comp_mask |= HNSDV_CONTEXT_MASK_CONGEST_TYPE;
++ attrs_out->congest_type = hr_dev->congest_cap;
++
++ return 0;
+ }
+
+ struct ibv_qp *hns_roce_u_open_qp(struct ibv_context *context,
+@@ -1486,6 +1998,9 @@ struct ibv_ah *hns_roce_u_create_ah(struct ibv_pd *pd, struct ibv_ah_attr *attr)
+ ah->av.mac, NULL))
+ goto err;
+
++ if (resp.tc_mode == HNS_ROCE_TC_MAP_MODE_DSCP)
++ ah->av.sl = resp.priority;
++
+ ah->av.udp_sport = get_ah_udp_sport(attr);
+
+ return &ah->ibv_ah;
+diff --git a/providers/hns/hnsdv.h b/providers/hns/hnsdv.h
+new file mode 100644
+index 0000000..68bf001
+--- /dev/null
++++ b/providers/hns/hnsdv.h
+@@ -0,0 +1,85 @@
++/* SPDX-License-Identifier: GPL-2.0 OR BSD-2-Clause */
++/*
++ * Copyright (c) 2024 Hisilicon Limited.
++ */
++
++#ifndef __HNSDV_H__
++#define __HNSDV_H__
++
++#include
++#include
++#include
++#include
++
++#ifdef __cplusplus
++extern "C" {
++#endif
++
++enum hnsdv_qp_congest_ctrl_type {
++ HNSDV_QP_CREATE_ENABLE_DCQCN = 1 << 0,
++ HNSDV_QP_CREATE_ENABLE_LDCP = 1 << 1,
++ HNSDV_QP_CREATE_ENABLE_HC3 = 1 << 2,
++ HNSDV_QP_CREATE_ENABLE_DIP = 1 << 3,
++};
++
++enum hnsdv_qp_create_flags {
++ HNSDV_QP_CREATE_ENABLE_DCA_MODE = 1 << 0,
++};
++
++enum hnsdv_context_comp_mask {
++ HNSDV_CONTEXT_MASK_DCA_PRIME_QPS = 1 << 0,
++ HNSDV_CONTEXT_MASK_DCA_UNIT_SIZE = 1 << 1,
++ HNSDV_CONTEXT_MASK_DCA_MAX_SIZE = 1 << 2,
++ HNSDV_CONTEXT_MASK_DCA_MIN_SIZE = 1 << 3,
++};
++
++enum hnsdv_qp_init_attr_mask {
++ HNSDV_QP_INIT_ATTR_MASK_QP_CREATE_FLAGS = 1 << 0,
++ HNSDV_QP_INIT_ATTR_MASK_QP_CONGEST_TYPE = 1 << 1,
++};
++
++struct hnsdv_context_attr {
++ uint64_t flags; /* Use enum hnsdv_context_attr_flags */
++ uint64_t comp_mask; /* Use enum hnsdv_context_comp_mask */
++ uint32_t dca_prime_qps;
++ uint32_t dca_unit_size;
++ uint64_t dca_max_size;
++ uint64_t dca_min_size;
++};
++
++struct hnsdv_qp_init_attr {
++ uint64_t comp_mask; /* Use enum hnsdv_qp_init_attr_mask */
++ uint32_t create_flags; /* Use enum hnsdv_qp_create_flags */
++ uint8_t congest_type; /* Use enum hnsdv_qp_congest_ctrl_type */
++ uint8_t reserved[3];
++};
++
++enum hnsdv_context_attr_flags {
++ HNSDV_CONTEXT_FLAGS_DCA = 1 << 0,
++};
++
++enum hnsdv_query_context_comp_mask {
++ HNSDV_CONTEXT_MASK_CONGEST_TYPE = 1 << 0,
++};
++
++struct hnsdv_context {
++ uint64_t comp_mask; /* Use enum hnsdv_query_context_comp_mask */
++ uint64_t flags;
++ uint8_t congest_type; /* Use enum hnsdv_qp_congest_ctrl_type */
++ uint8_t reserved[7];
++};
++
++bool hnsdv_is_supported(struct ibv_device *device);
++int hnsdv_query_device(struct ibv_context *ctx_in,
++ struct hnsdv_context *attrs_out);
++struct ibv_qp *hnsdv_create_qp(struct ibv_context *context,
++ struct ibv_qp_init_attr_ex *qp_attr,
++ struct hnsdv_qp_init_attr *hns_qp_attr);
++struct ibv_context *hnsdv_open_device(struct ibv_device *device,
++ struct hnsdv_context_attr *attr);
++
++#ifdef __cplusplus
++}
++#endif
++
++#endif /* __HNSDV_H__ */
+diff --git a/providers/hns/libhns.map b/providers/hns/libhns.map
+new file mode 100644
+index 0000000..a955346
+--- /dev/null
++++ b/providers/hns/libhns.map
+@@ -0,0 +1,10 @@
++/* Export symbols should be added below according to
++ Documentation/versioning.md document. */
++HNS_1.0 {
++ global:
++ hnsdv_is_supported;
++ hnsdv_create_qp;
++ hnsdv_query_device;
++ hnsdv_open_device;
++ local: *;
++};
+diff --git a/providers/zrdma/CMakeLists.txt b/providers/zrdma/CMakeLists.txt
+new file mode 100644
+index 0000000..7706b9a
+--- /dev/null
++++ b/providers/zrdma/CMakeLists.txt
+@@ -0,0 +1,18 @@
++# SPDX-License-Identifier: (GPL-2.0 OR Linux-OpenIB)
++# Copyright (c) 2019, Intel Corporation.
++set(CMAKE_C_FLAGS "${CMAKE_CXX_FLAGS} -Werror")
++
++rdma_shared_provider(zrdma libzrdma.map
++ 1 1.1.${PACKAGE_VERSION}
++ uk.c
++ umain.c
++ uverbs.c
++ private_verbs_cmd.c
++)
++
++publish_headers(infiniband
++ zxdh_dv.h
++)
++
++
++rdma_pkg_config("zrdma" "libibverbs" "${CMAKE_THREAD_LIBS_INIT}")
+diff --git a/providers/zrdma/abi.h b/providers/zrdma/abi.h
+new file mode 100644
+index 0000000..0fe8547
+--- /dev/null
++++ b/providers/zrdma/abi.h
+@@ -0,0 +1,36 @@
++/* SPDX-License-Identifier: GPL-2.0 or Linux-OpenIB */
++/* Copyright (c) 2024 ZTE Corporation. All rights reserved. */
++#ifndef PROVIDER_ZXDH_ABI_H
++#define PROVIDER_ZXDH_ABI_H
++
++#include "zxdh.h"
++#include
++#include
++#include
++
++#define ZXDH_MIN_ABI_VERSION 0
++#define ZXDH_MAX_ABI_VERSION 5
++
++DECLARE_DRV_CMD(zxdh_ualloc_pd, IB_USER_VERBS_CMD_ALLOC_PD, empty,
++ zxdh_alloc_pd_resp);
++DECLARE_DRV_CMD(zxdh_ucreate_cq, IB_USER_VERBS_CMD_CREATE_CQ,
++ zxdh_create_cq_req, zxdh_create_cq_resp);
++DECLARE_DRV_CMD(zxdh_ucreate_cq_ex, IB_USER_VERBS_EX_CMD_CREATE_CQ,
++ zxdh_create_cq_req, zxdh_create_cq_resp);
++DECLARE_DRV_CMD(zxdh_uresize_cq, IB_USER_VERBS_CMD_RESIZE_CQ,
++ zxdh_resize_cq_req, empty);
++DECLARE_DRV_CMD(zxdh_ucreate_qp, IB_USER_VERBS_CMD_CREATE_QP,
++ zxdh_create_qp_req, zxdh_create_qp_resp);
++DECLARE_DRV_CMD(zxdh_umodify_qp, IB_USER_VERBS_EX_CMD_MODIFY_QP,
++ zxdh_modify_qp_req, zxdh_modify_qp_resp);
++DECLARE_DRV_CMD(zxdh_get_context, IB_USER_VERBS_CMD_GET_CONTEXT,
++ zxdh_alloc_ucontext_req, zxdh_alloc_ucontext_resp);
++DECLARE_DRV_CMD(zxdh_ureg_mr, IB_USER_VERBS_CMD_REG_MR, zxdh_mem_reg_req,
++ zxdh_reg_mr_resp);
++DECLARE_DRV_CMD(zxdh_urereg_mr, IB_USER_VERBS_CMD_REREG_MR, zxdh_mem_reg_req,
++ empty);
++DECLARE_DRV_CMD(zxdh_ucreate_ah, IB_USER_VERBS_CMD_CREATE_AH, empty,
++ zxdh_create_ah_resp);
++DECLARE_DRV_CMD(zxdh_ucreate_srq, IB_USER_VERBS_CMD_CREATE_SRQ,
++ zxdh_create_srq_req, zxdh_create_srq_resp);
++#endif /* PROVIDER_ZXDH_ABI_H */
+diff --git a/providers/zrdma/defs.h b/providers/zrdma/defs.h
+new file mode 100644
+index 0000000..24b6c56
+--- /dev/null
++++ b/providers/zrdma/defs.h
+@@ -0,0 +1,388 @@
++/* SPDX-License-Identifier: GPL-2.0 or Linux-OpenIB */
++/* Copyright (c) 2024 ZTE Corporation. All rights reserved. */
++#ifndef ZXDH_DEFS_H
++#define ZXDH_DEFS_H
++
++#include "osdep.h"
++
++#define ZXDH_RECV_ERR_FLAG_NAK_RNR_NAK 1
++#define ZXDH_RECV_ERR_FLAG_READ_RESP 2
++#define ZXDH_RETRY_CQE_SQ_OPCODE_ERR 32
++#define ZXDH_QP_RETRY_COUNT 2
++#define ZXDH_RESET_RETRY_CQE_SQ_OPCODE_ERR 0x1f
++
++#define ZXDH_QP_TYPE_ROCE_RC 1
++#define ZXDH_QP_TYPE_ROCE_UD 2
++
++#define ZXDH_HW_PAGE_SIZE 4096
++#define ZXDH_HW_PAGE_SHIFT 12
++#define ZXDH_CQE_QTYPE_RQ 0
++#define ZXDH_CQE_QTYPE_SQ 1
++
++#define ZXDH_MAX_SQ_WQES_PER_PAGE 128
++#define ZXDH_MAX_SQ_DEPTH 32768
++
++#define ZXDH_QP_SW_MIN_WQSIZE 64u /* in WRs*/
++#define ZXDH_QP_WQE_MIN_SIZE 32
++#define ZXDH_QP_SQE_MIN_SIZE 32
++#define ZXDH_QP_RQE_MIN_SIZE 16
++#define ZXDH_QP_WQE_MAX_SIZE 256
++#define ZXDH_QP_WQE_MIN_QUANTA 1
++#define ZXDH_MAX_RQ_WQE_SHIFT_GEN1 2
++#define ZXDH_MAX_RQ_WQE_SHIFT_GEN2 3
++#define ZXDH_SRQ_FRAG_BYTESIZE 16
++#define ZXDH_QP_FRAG_BYTESIZE 16
++#define ZXDH_SQ_WQE_BYTESIZE 32
++#define ZXDH_SRQ_WQE_MIN_SIZE 16
++
++#define ZXDH_SQ_RSVD 258
++#define ZXDH_RQ_RSVD 1
++#define ZXDH_SRQ_RSVD 1
++
++#define ZXDH_FEATURE_RTS_AE 1ULL
++#define ZXDH_FEATURE_CQ_RESIZE 2ULL
++#define ZXDHQP_OP_RDMA_WRITE 0x00
++#define ZXDHQP_OP_RDMA_READ 0x01
++#define ZXDHQP_OP_RDMA_SEND 0x03
++#define ZXDHQP_OP_RDMA_SEND_INV 0x04
++#define ZXDHQP_OP_RDMA_SEND_SOL_EVENT 0x05
++#define ZXDHQP_OP_RDMA_SEND_SOL_EVENT_INV 0x06
++#define ZXDHQP_OP_BIND_MW 0x08
++#define ZXDHQP_OP_FAST_REGISTER 0x09
++#define ZXDHQP_OP_LOCAL_INVALIDATE 0x0a
++#define ZXDHQP_OP_RDMA_READ_LOC_INV 0x0b
++#define ZXDHQP_OP_NOP 0x0c
++
++#define ZXDH_CQPHC_QPCTX GENMASK_ULL(63, 0)
++#define ZXDH_QP_DBSA_HW_SQ_TAIL GENMASK_ULL(14, 0)
++#define ZXDH_CQ_DBSA_CQEIDX GENMASK_ULL(22, 0)
++#define ZXDH_CQ_DBSA_SW_CQ_SELECT GENMASK_ULL(28, 23)
++#define ZXDH_CQ_DBSA_ARM_NEXT BIT_ULL(31)
++// #define ZXDH_CQ_DBSA_ARM_NEXT_SE BIT_ULL(15)
++#define ZXDH_CQ_DBSA_ARM_SEQ_NUM GENMASK_ULL(30, 29)
++#define ZXDH_CQ_ARM_CQ_ID_S 10
++#define ZXDH_CQ_ARM_CQ_ID GENMASK_ULL(29, 10)
++#define ZXDH_CQ_ARM_DBSA_VLD_S 30
++#define ZXDH_CQ_ARM_DBSA_VLD BIT_ULL(30)
++
++/* CQP and iWARP Completion Queue */
++#define ZXDH_CQ_QPCTX ZXDH_CQPHC_QPCTX
++
++#define ZXDH_CQ_MINERR GENMASK_ULL(22, 7)
++#define ZXDH_CQ_MAJERR GENMASK_ULL(38, 23)
++#define ZXDH_CQ_WQEIDX GENMASK_ULL(54, 40)
++#define ZXDH_CQ_EXTCQE BIT_ULL(50)
++#define ZXDH_OOO_CMPL BIT_ULL(54)
++#define ZXDH_CQ_ERROR BIT_ULL(39)
++#define ZXDH_CQ_SQ BIT_ULL(4)
++
++#define ZXDH_CQ_VALID BIT_ULL(5)
++#define ZXDH_CQ_IMMVALID BIT_ULL(0)
++#define ZXDH_CQ_UDSMACVALID BIT_ULL(26)
++#define ZXDH_CQ_UDVLANVALID BIT_ULL(27)
++#define ZXDH_CQ_IMMDATA GENMASK_ULL(31, 0)
++#define ZXDH_CQ_UDSMAC GENMASK_ULL(47, 0)
++#define ZXDH_CQ_UDVLAN GENMASK_ULL(63, 48)
++
++#define ZXDH_CQ_IMMDATA_S 0
++#define ZXDH_CQ_IMMDATA_M (0xffffffffffffffffULL << ZXDH_CQ_IMMVALID_S)
++#define ZXDH_CQ_IMMDATALOW32 GENMASK_ULL(31, 0)
++#define ZXDH_CQ_IMMDATAUP32 GENMASK_ULL(63, 32)
++#define ZXDHCQ_PAYLDLEN GENMASK_ULL(63, 32)
++#define ZXDHCQ_TCPSEQNUMRTT GENMASK_ULL(63, 32)
++#define ZXDHCQ_INVSTAG_S 11
++#define ZXDHCQ_INVSTAG GENMASK_ULL(42, 11)
++#define ZXDHCQ_QPID GENMASK_ULL(63, 44)
++
++#define ZXDHCQ_UDSRCQPN GENMASK_ULL(24, 1)
++#define ZXDHCQ_PSHDROP BIT_ULL(51)
++#define ZXDHCQ_STAG_S 43
++#define ZXDHCQ_STAG BIT_ULL(43)
++#define ZXDHCQ_IPV4 BIT_ULL(25)
++#define ZXDHCQ_SOEVENT BIT_ULL(6)
++#define ZXDHCQ_OP GENMASK_ULL(63, 58)
++
++/* Manage Push Page - MPP */
++#define ZXDH_INVALID_PUSH_PAGE_INDEX_GEN_1 0xffff
++#define ZXDH_INVALID_PUSH_PAGE_INDEX 0xffffffff
++
++#define ZXDHQPSQ_OPCODE GENMASK_ULL(62, 57)
++#define ZXDHQPSQ_COPY_HOST_PBL BIT_ULL(43)
++#define ZXDHQPSQ_ADDFRAGCNT GENMASK_ULL(39, 32)
++#define ZXDHQPSQ_PUSHWQE BIT_ULL(56)
++#define ZXDHQPSQ_STREAMMODE BIT_ULL(58)
++#define ZXDHQPSQ_WAITFORRCVPDU BIT_ULL(59)
++#define ZXDHQPSQ_READFENCE BIT_ULL(54)
++#define ZXDHQPSQ_LOCALFENCE BIT_ULL(55)
++#define ZXDHQPSQ_UDPHEADER BIT_ULL(61)
++#define ZXDHQPSQ_L4LEN GENMASK_ULL(45, 42)
++#define ZXDHQPSQ_SIGCOMPL BIT_ULL(56)
++#define ZXDHQPSQ_SOLICITED BIT_ULL(53)
++#define ZXDHQPSQ_VALID BIT_ULL(63)
++
++#define ZXDHQPSQ_FIRST_FRAG_VALID BIT_ULL(0)
++#define ZXDHQPSQ_FIRST_FRAG_LEN GENMASK_ULL(31, 1)
++#define ZXDHQPSQ_FIRST_FRAG_STAG GENMASK_ULL(63, 32)
++#define ZXDHQPSQ_FRAG_TO ZXDH_CQPHC_QPCTX
++#define ZXDHQPSQ_FRAG_VALID BIT_ULL(63)
++#define ZXDHQPSQ_FRAG_LEN GENMASK_ULL(62, 32)
++#define ZXDHQPSQ_FRAG_STAG GENMASK_ULL(31, 0)
++#define ZXDHQPSQ_GEN1_FRAG_LEN GENMASK_ULL(31, 0)
++#define ZXDHQPSQ_GEN1_FRAG_STAG GENMASK_ULL(63, 32)
++#define ZXDHQPSQ_REMSTAGINV GENMASK_ULL(31, 0)
++#define ZXDHQPSQ_DESTQKEY GENMASK_ULL(31, 0)
++#define ZXDHQPSQ_DESTQPN GENMASK_ULL(55, 32)
++#define ZXDHQPSQ_AHID GENMASK_ULL(18, 0)
++#define ZXDHQPSQ_INLINEDATAFLAG BIT_ULL(63)
++#define ZXDHQPSQ_UD_INLINEDATAFLAG BIT_ULL(50)
++#define ZXDHQPSQ_UD_INLINEDATALEN GENMASK_ULL(49, 42)
++#define ZXDHQPSQ_UD_ADDFRAGCNT GENMASK_ULL(36, 29)
++#define ZXDHQPSQ_WRITE_INLINEDATAFLAG BIT_ULL(48)
++#define ZXDHQPSQ_WRITE_INLINEDATALEN GENMASK_ULL(47, 40)
++
++#define ZXDH_INLINE_VALID_S 7
++#define ZXDHQPSQ_INLINE_VALID BIT_ULL(63)
++#define ZXDHQPSQ_INLINEDATALEN GENMASK_ULL(62, 55)
++#define ZXDHQPSQ_IMMDATAFLAG BIT_ULL(52)
++#define ZXDHQPSQ_REPORTRTT BIT_ULL(46)
++
++#define ZXDHQPSQ_IMMDATA GENMASK_ULL(31, 0)
++#define ZXDHQPSQ_REMSTAG_S 0
++#define ZXDHQPSQ_REMSTAG GENMASK_ULL(31, 0)
++
++#define ZXDHQPSQ_REMTO ZXDH_CQPHC_QPCTX
++
++#define ZXDHQPSQ_IMMDATA_VALID BIT_ULL(63)
++#define ZXDHQPSQ_STAGRIGHTS GENMASK_ULL(50, 46)
++#define ZXDHQPSQ_VABASEDTO BIT_ULL(51)
++#define ZXDHQPSQ_MEMWINDOWTYPE BIT_ULL(52)
++
++#define ZXDHQPSQ_MWLEN ZXDH_CQPHC_QPCTX
++#define ZXDHQPSQ_PARENTMRSTAG GENMASK_ULL(31, 0)
++#define ZXDHQPSQ_MWSTAG GENMASK_ULL(31, 0)
++#define ZXDHQPSQ_MW_PA_PBLE_ONE GENMASK_ULL(63, 46)
++#define ZXDHQPSQ_MW_PA_PBLE_TWO GENMASK_ULL(63, 32)
++#define ZXDHQPSQ_MW_PA_PBLE_THREE GENMASK_ULL(33, 32)
++#define ZXDHQPSQ_MW_HOST_PAGE_SIZE GENMASK_ULL(40, 36)
++#define ZXDHQPSQ_MW_LEAF_PBL_SIZE GENMASK_ULL(35, 34)
++#define ZXDHQPSQ_MW_LEVLE2_FIRST_PBLE_INDEX GENMASK_ULL(41, 32)
++#define ZXDHQPSQ_MW_LEVLE2_ROOT_PBLE_INDEX GENMASK_ULL(50, 42)
++
++#define ZXDHQPSQ_BASEVA_TO_FBO ZXDH_CQPHC_QPCTX
++
++#define ZXDHQPSQ_LOCSTAG GENMASK_ULL(31, 0)
++
++#define ZXDHQPSRQ_RSV GENMASK_ULL(63, 40)
++#define ZXDHQPSRQ_VALID_SGE_NUM GENMASK_ULL(39, 32)
++#define ZXDHQPSRQ_SIGNATURE GENMASK_ULL(31, 24)
++#define ZXDHQPSRQ_NEXT_WQE_INDEX GENMASK_ULL(15, 0)
++#define ZXDHQPSRQ_START_PADDING BIT_ULL(63)
++#define ZXDHQPSRQ_FRAG_LEN GENMASK_ULL(62, 32)
++#define ZXDHQPSRQ_FRAG_STAG GENMASK_ULL(31, 0)
++
++/* QP RQ WQE common fields */
++#define ZXDHQPRQ_SIGNATURE GENMASK_ULL(31, 16)
++#define ZXDHQPRQ_ADDFRAGCNT ZXDHQPSQ_ADDFRAGCNT
++#define ZXDHQPRQ_VALID ZXDHQPSQ_VALID
++#define ZXDHQPRQ_COMPLCTX ZXDH_CQPHC_QPCTX
++#define ZXDHQPRQ_FRAG_LEN ZXDHQPSQ_FRAG_LEN
++#define ZXDHQPRQ_STAG ZXDHQPSQ_FRAG_STAG
++#define ZXDHQPRQ_TO ZXDHQPSQ_FRAG_TO
++
++//QP RQ DBSA fields
++#define ZXDHQPDBSA_RQ_POLARITY_S 15
++#define ZXDHQPDBSA_RQ_POLARITY BIT_ULL(15)
++#define ZXDHQPDBSA_RQ_SW_HEAD_S 0
++#define ZXDHQPDBSA_RQ_SW_HEAD GENMASK_ULL(14, 0)
++
++#define ZXDHPFINT_OICR_HMC_ERR_M BIT(26)
++#define ZXDHPFINT_OICR_PE_PUSH_M BIT(27)
++#define ZXDHPFINT_OICR_PE_CRITERR_M BIT(28)
++
++#define ZXDH_SRQ_PARITY_SIGN_S 15
++#define ZXDH_SRQ_PARITY_SIGN BIT_ULL(15)
++#define ZXDH_SRQ_SW_SRQ_HEAD_S 0
++#define ZXDH_SRQ_SW_SRQ_HEAD GENMASK_ULL(14, 0)
++#define ZXDH_CQE_SQ_OPCODE_RESET BIT(5)
++
++#define ZXDH_CQP_INIT_WQE(wqe) memset(wqe, 0, 64)
++
++#define ZXDH_GET_CURRENT_CQ_ELEM(_cq) \
++ ((_cq)->cq_base[ZXDH_RING_CURRENT_HEAD((_cq)->cq_ring)].buf)
++#define ZXDH_GET_CURRENT_EXTENDED_CQ_ELEM(_cq) \
++ (((struct zxdh_extended_cqe \
++ *)((_cq)->cq_base))[ZXDH_RING_CURRENT_HEAD((_cq)->cq_ring)] \
++ .buf)
++
++#define ZXDH_RING_INIT(_ring, _size) \
++ { \
++ (_ring).head = 0; \
++ (_ring).tail = 0; \
++ (_ring).size = (_size); \
++ }
++#define ZXDH_RING_SIZE(_ring) ((_ring).size)
++#define ZXDH_RING_CURRENT_HEAD(_ring) ((_ring).head)
++#define ZXDH_RING_CURRENT_TAIL(_ring) ((_ring).tail)
++
++#define ZXDH_RING_MOVE_HEAD(_ring, _retcode) \
++ { \
++ register __u32 size; \
++ size = (_ring).size; \
++ if (!ZXDH_RING_FULL_ERR(_ring)) { \
++ (_ring).head = ((_ring).head + 1) % size; \
++ (_retcode) = 0; \
++ } else { \
++ (_retcode) = ZXDH_ERR_RING_FULL; \
++ } \
++ }
++#define ZXDH_RING_MOVE_HEAD_BY_COUNT(_ring, _count, _retcode) \
++ { \
++ register __u32 size; \
++ size = (_ring).size; \
++ if ((ZXDH_RING_USED_QUANTA(_ring) + (_count)) < size) { \
++ (_ring).head = ((_ring).head + (_count)) % size; \
++ (_retcode) = 0; \
++ } else { \
++ (_retcode) = ZXDH_ERR_RING_FULL; \
++ } \
++ }
++#define ZXDH_SQ_RING_MOVE_HEAD(_ring, _retcode) \
++ { \
++ register __u32 size; \
++ size = (_ring).size; \
++ if (!ZXDH_SQ_RING_FULL_ERR(_ring)) { \
++ (_ring).head = ((_ring).head + 1) % size; \
++ (_retcode) = 0; \
++ } else { \
++ (_retcode) = ZXDH_ERR_RING_FULL; \
++ } \
++ }
++#define ZXDH_SQ_RING_MOVE_HEAD_BY_COUNT(_ring, _count, _retcode) \
++ { \
++ register __u32 size; \
++ size = (_ring).size; \
++ if ((ZXDH_RING_USED_QUANTA(_ring) + (_count)) < \
++ (size - 256)) { \
++ (_ring).head = ((_ring).head + (_count)) % size; \
++ (_retcode) = 0; \
++ } else { \
++ (_retcode) = ZXDH_ERR_RING_FULL; \
++ } \
++ }
++#define ZXDH_RING_MOVE_HEAD_BY_COUNT_NOCHECK(_ring, _count) \
++ (_ring).head = ((_ring).head + (_count)) % (_ring).size
++
++#define ZXDH_RING_MOVE_TAIL(_ring) \
++ (_ring).tail = ((_ring).tail + 1) % (_ring).size
++
++#define ZXDH_RING_MOVE_HEAD_NOCHECK(_ring) \
++ (_ring).head = ((_ring).head + 1) % (_ring).size
++
++#define ZXDH_RING_MOVE_TAIL_BY_COUNT(_ring, _count) \
++ (_ring).tail = ((_ring).tail + (_count)) % (_ring).size
++
++#define ZXDH_RING_SET_TAIL(_ring, _pos) (_ring).tail = (_pos) % (_ring).size
++
++#define ZXDH_RING_FULL_ERR(_ring) \
++ ((ZXDH_RING_USED_QUANTA(_ring) == ((_ring).size - 1)))
++
++#define ZXDH_ERR_RING_FULL2(_ring) \
++ ((ZXDH_RING_USED_QUANTA(_ring) == ((_ring).size - 2)))
++
++#define ZXDH_ERR_RING_FULL3(_ring) \
++ ((ZXDH_RING_USED_QUANTA(_ring) == ((_ring).size - 3)))
++
++#define ZXDH_SQ_RING_FULL_ERR(_ring) \
++ ((ZXDH_RING_USED_QUANTA(_ring) == ((_ring).size - 257)))
++
++#define ZXDH_ERR_SQ_RING_FULL2(_ring) \
++ ((ZXDH_RING_USED_QUANTA(_ring) == ((_ring).size - 258)))
++#define ZXDH_ERR_SQ_RING_FULL3(_ring) \
++ ((ZXDH_RING_USED_QUANTA(_ring) == ((_ring).size - 259)))
++#define ZXDH_RING_MORE_WORK(_ring) ((ZXDH_RING_USED_QUANTA(_ring) != 0))
++
++#define ZXDH_RING_USED_QUANTA(_ring) \
++ ((((_ring).head + (_ring).size - (_ring).tail) % (_ring).size))
++
++#define ZXDH_RING_FREE_QUANTA(_ring) \
++ (((_ring).size - ZXDH_RING_USED_QUANTA(_ring) - 1))
++
++#define ZXDH_SQ_RING_FREE_QUANTA(_ring) \
++ (((_ring).size - ZXDH_RING_USED_QUANTA(_ring) - 257))
++
++#define ZXDH_ATOMIC_RING_MOVE_HEAD(_ring, index, _retcode) \
++ { \
++ index = ZXDH_RING_CURRENT_HEAD(_ring); \
++ ZXDH_RING_MOVE_HEAD(_ring, _retcode); \
++ }
++
++enum zxdh_qp_wqe_size {
++ ZXDH_WQE_SIZE_32 = 32,
++ ZXDH_WQE_SIZE_64 = 64,
++ ZXDH_WQE_SIZE_96 = 96,
++ ZXDH_WQE_SIZE_128 = 128,
++ ZXDH_WQE_SIZE_256 = 256,
++};
++
++/**
++ * set_64bit_val - set 64 bit value to hw wqe
++ * @wqe_words: wqe addr to write
++ * @byte_index: index in wqe
++ * @val: value to write
++ **/
++static inline void set_64bit_val(__le64 *wqe_words, __u32 byte_index, __u64 val)
++{
++ wqe_words[byte_index >> 3] = htole64(val);
++}
++
++/**
++ * set_32bit_val - set 32 bit value to hw wqe
++ * @wqe_words: wqe addr to write
++ * @byte_index: index in wqe
++ * @val: value to write
++ **/
++static inline void set_32bit_val(__le32 *wqe_words, __u32 byte_index, __u32 val)
++{
++ wqe_words[byte_index >> 2] = htole32(val);
++}
++
++/**
++ * set_16bit_val - set 16 bit value to hw wqe
++ * @wqe_words: wqe addr to write
++ * @byte_index: index in wqe
++ * @val: value to write
++ **/
++static inline void set_16bit_val(__le16 *wqe_words, __u32 byte_index, __u16 val)
++{
++ wqe_words[byte_index >> 1] = htole16(val);
++}
++
++/**
++ * get_64bit_val - read 64 bit value from wqe
++ * @wqe_words: wqe addr
++ * @byte_index: index to read from
++ * @val: read value
++ **/
++static inline void get_64bit_val(__le64 *wqe_words, __u32 byte_index,
++ __u64 *val)
++{
++ *val = le64toh(wqe_words[byte_index >> 3]);
++}
++
++/**
++ * get_32bit_val - read 32 bit value from wqe
++ * @wqe_words: wqe addr
++ * @byte_index: index to reaad from
++ * @val: return 32 bit value
++ **/
++static inline void get_32bit_val(__le32 *wqe_words, __u32 byte_index,
++ __u32 *val)
++{
++ *val = le32toh(wqe_words[byte_index >> 2]);
++}
++
++#define read_wqe_need_split(pre_cal_psn, next_psn) \
++ (((pre_cal_psn < next_psn) && (pre_cal_psn != 0)) || \
++ ((next_psn <= 0x7FFFFF) && (pre_cal_psn > 0x800000)))
++
++#endif /* ZXDH_DEFS_H */
+diff --git a/providers/zrdma/libzrdma.map b/providers/zrdma/libzrdma.map
+new file mode 100644
+index 0000000..f95de4b
+--- /dev/null
++++ b/providers/zrdma/libzrdma.map
+@@ -0,0 +1,16 @@
++/* Export symbols should be added below according to
++ Documentation/versioning.md document. */
++ZRDMA_1.0 {
++ global:
++ zxdh_get_log_trace_switch;
++ local: *;
++};
++
++ZRDMA_1.1 {
++ global:
++ zxdh_set_log_trace_switch;
++ zxdh_modify_qp_udp_sport;
++ zxdh_query_qpc;
++ zxdh_modify_qpc;
++ zxdh_reset_qp;
++} ZRDMA_1.0;
+diff --git a/providers/zrdma/osdep.h b/providers/zrdma/osdep.h
+new file mode 100644
+index 0000000..f324b83
+--- /dev/null
++++ b/providers/zrdma/osdep.h
+@@ -0,0 +1,21 @@
++/* SPDX-License-Identifier: GPL-2.0 or Linux-OpenIB */
++/* Copyright (c) 2024 ZTE Corporation. All rights reserved. */
++#ifndef ZXDH_OSDEP_H
++#define ZXDH_OSDEP_H
++
++#include
++#include
++#include
++#include
++#include
++#include
++#include
++#include
++#include
++
++static inline void db_wr32(__u32 val, __u32 *wqe_word)
++{
++ *wqe_word = val;
++}
++
++#endif /* ZXDH_OSDEP_H */
+diff --git a/providers/zrdma/private_verbs_cmd.c b/providers/zrdma/private_verbs_cmd.c
+new file mode 100644
+index 0000000..63202ec
+--- /dev/null
++++ b/providers/zrdma/private_verbs_cmd.c
+@@ -0,0 +1,203 @@
++// SPDX-License-Identifier: GPL-2.0 or Linux-OpenIB
++/* Copyright (c) 2024 ZTE Corporation. All rights reserved. */
++#include
++#include
++#include "private_verbs_cmd.h"
++#include "zxdh_dv.h"
++
++static void copy_query_qpc(struct zxdh_query_qpc_resp *resp,
++ struct zxdh_rdma_qpc *qpc)
++{
++ qpc->ack_err_flag = resp->ack_err_flag;
++ qpc->retry_flag = resp->retry_flag;
++ qpc->rnr_retry_flag = resp->rnr_retry_flag;
++ qpc->cur_retry_count = resp->cur_retry_count;
++ qpc->retry_cqe_sq_opcode = resp->retry_cqe_sq_opcode;
++ qpc->err_flag = resp->err_flag;
++ qpc->package_err_flag = resp->package_err_flag;
++ qpc->recv_err_flag = resp->recv_err_flag;
++ qpc->tx_last_ack_psn = resp->tx_last_ack_psn;
++ qpc->retry_count = resp->retry_count;
++ qpc->read_retry_flag = resp->read_retry_flag;
++}
++
++int _zxdh_query_qpc(struct ibv_qp *qp, struct zxdh_rdma_qpc *qpc)
++{
++ DECLARE_COMMAND_BUFFER(cmd, ZXDH_IB_OBJECT_QP_OBJ,
++ ZXDH_IB_METHOD_QP_QUERY_QPC, 2);
++ int ret;
++ struct zxdh_query_qpc_resp resp_ex = { 0 };
++
++ fill_attr_in_obj(cmd, ZXDH_IB_ATTR_QP_QUERY_HANDLE, qp->handle);
++ fill_attr_out_ptr(cmd, ZXDH_IB_ATTR_QP_QUERY_RESP, &resp_ex);
++
++ ret = execute_ioctl(qp->context, cmd);
++ if (ret)
++ return ret;
++
++ copy_query_qpc(&resp_ex, qpc);
++ return 0;
++}
++
++static void copy_modify_qpc_fields(struct zxdh_modify_qpc_req *req_cmd,
++ uint64_t attr_mask,
++ struct zxdh_rdma_qpc *qpc)
++{
++ if (attr_mask & ZXDH_TX_READ_RETRY_FLAG_SET) {
++ req_cmd->retry_flag = qpc->retry_flag;
++ req_cmd->rnr_retry_flag = qpc->rnr_retry_flag;
++ req_cmd->read_retry_flag = qpc->read_retry_flag;
++ req_cmd->cur_retry_count = qpc->cur_retry_count;
++ }
++ if (attr_mask & ZXDH_RETRY_CQE_SQ_OPCODE)
++ req_cmd->retry_cqe_sq_opcode = qpc->retry_cqe_sq_opcode;
++
++ if (attr_mask & ZXDH_ERR_FLAG_SET) {
++ req_cmd->err_flag = qpc->err_flag;
++ req_cmd->ack_err_flag = qpc->ack_err_flag;
++ }
++ if (attr_mask & ZXDH_PACKAGE_ERR_FLAG)
++ req_cmd->package_err_flag = qpc->package_err_flag;
++}
++
++int _zxdh_reset_qp(struct ibv_qp *qp, uint64_t opcode)
++{
++ DECLARE_COMMAND_BUFFER(cmd, ZXDH_IB_OBJECT_QP_OBJ,
++ ZXDH_IB_METHOD_QP_RESET_QP, 2);
++ int ret;
++
++ fill_attr_in_obj(cmd, ZXDH_IB_ATTR_QP_RESET_QP_HANDLE, qp->handle);
++ fill_attr_in_uint64(cmd, ZXDH_IB_ATTR_QP_RESET_OP_CODE, opcode);
++ return execute_ioctl(qp->context, cmd);
++}
++
++int _zxdh_modify_qpc(struct ibv_qp *qp, struct zxdh_rdma_qpc *qpc,
++ uint64_t qpc_mask)
++{
++ DECLARE_COMMAND_BUFFER(cmd, ZXDH_IB_OBJECT_QP_OBJ,
++ ZXDH_IB_METHOD_QP_MODIFY_QPC, 3);
++ int ret;
++ struct zxdh_modify_qpc_req req = { 0 };
++
++ copy_modify_qpc_fields(&req, qpc_mask, qpc);
++ fill_attr_in_obj(cmd, ZXDH_IB_ATTR_QP_QUERY_HANDLE, qp->handle);
++ fill_attr_in_uint64(cmd, ZXDH_IB_ATTR_QP_MODIFY_QPC_MASK, qpc_mask);
++ fill_attr_in_ptr(cmd, ZXDH_IB_ATTR_QP_MODIFY_QPC_REQ, &req);
++ return execute_ioctl(qp->context, cmd);
++}
++
++static int _zxdh_modify_qp_udp_sport(struct ibv_context *ibctx,
++ uint16_t udp_sport, uint32_t qpn)
++{
++ if (udp_sport <= MIN_UDP_SPORT || qpn <= MIN_QP_QPN)
++ return -EINVAL;
++
++ DECLARE_COMMAND_BUFFER(cmd, ZXDH_IB_OBJECT_QP_OBJ,
++ ZXDH_IB_METHOD_QP_MODIFY_UDP_SPORT, 2);
++ fill_attr_in(cmd, ZXDH_IB_ATTR_QP_UDP_PORT, &udp_sport,
++ sizeof(udp_sport));
++ fill_attr_in_uint32(cmd, ZXDH_IB_ATTR_QP_QPN, qpn);
++ return execute_ioctl(ibctx, cmd);
++}
++
++static int _zxdh_get_log_trace_switch(struct ibv_context *ibctx,
++ uint8_t *switch_status)
++{
++ DECLARE_COMMAND_BUFFER(cmd, ZXDH_IB_OBJECT_DEV,
++ ZXDH_IB_METHOD_DEV_GET_LOG_TRACE, 1);
++
++ fill_attr_out_ptr(cmd, ZXDH_IB_ATTR_DEV_GET_LOG_TARCE_SWITCH,
++ switch_status);
++ return execute_ioctl(ibctx, cmd);
++}
++
++static int _zxdh_set_log_trace_switch(struct ibv_context *ibctx,
++ uint8_t switch_status)
++{
++ DECLARE_COMMAND_BUFFER(cmd, ZXDH_IB_OBJECT_DEV,
++ ZXDH_IB_METHOD_DEV_SET_LOG_TRACE, 1);
++ fill_attr_in(cmd, ZXDH_IB_ATTR_DEV_SET_LOG_TARCE_SWITCH, &switch_status,
++ sizeof(switch_status));
++ return execute_ioctl(ibctx, cmd);
++}
++
++static struct zxdh_uvcontext_ops zxdh_ctx_ops = {
++ .modify_qp_udp_sport = _zxdh_modify_qp_udp_sport,
++ .get_log_trace_switch = _zxdh_get_log_trace_switch,
++ .set_log_trace_switch = _zxdh_set_log_trace_switch,
++ .query_qpc = _zxdh_query_qpc,
++ .modify_qpc = _zxdh_modify_qpc,
++ .reset_qp = _zxdh_reset_qp,
++};
++
++static inline struct zxdh_uvcontext *to_zxdhtx(struct ibv_context *ibctx)
++{
++ return container_of(ibctx, struct zxdh_uvcontext, ibv_ctx.context);
++}
++
++int zxdh_reset_qp(struct ibv_qp *qp, uint64_t opcode)
++{
++ struct zxdh_uvcontext_ops *dvops = to_zxdhtx(qp->context)->cxt_ops;
++
++ if (!dvops || !dvops->reset_qp)
++ return -EOPNOTSUPP;
++ return dvops->reset_qp(qp, opcode);
++}
++
++int zxdh_modify_qpc(struct ibv_qp *qp, struct zxdh_rdma_qpc *qpc,
++ uint64_t qpc_mask)
++{
++ struct zxdh_uvcontext_ops *dvops = to_zxdhtx(qp->context)->cxt_ops;
++
++ if (!dvops || !dvops->modify_qpc)
++ return -EOPNOTSUPP;
++ return dvops->modify_qpc(qp, qpc, qpc_mask);
++}
++
++int zxdh_query_qpc(struct ibv_qp *qp, struct zxdh_rdma_qpc *qpc)
++{
++ struct zxdh_uvcontext_ops *dvops = to_zxdhtx(qp->context)->cxt_ops;
++
++ if (!dvops || !dvops->query_qpc)
++ return -EOPNOTSUPP;
++
++ return dvops->query_qpc(qp, qpc);
++}
++
++int zxdh_modify_qp_udp_sport(struct ibv_context *context, uint16_t udp_sport,
++ uint32_t qpn)
++{
++ struct zxdh_uvcontext_ops *dvops = to_zxdhtx(context)->cxt_ops;
++
++ if (!dvops || !dvops->modify_qp_udp_sport)
++ return -EOPNOTSUPP;
++
++ return dvops->modify_qp_udp_sport(context, udp_sport, qpn);
++}
++
++int zxdh_get_log_trace_switch(struct ibv_context *context,
++ enum switch_status *switch_status)
++{
++ struct zxdh_uvcontext_ops *dvops = to_zxdhtx(context)->cxt_ops;
++
++ if (!dvops || !dvops->get_log_trace_switch)
++ return -EOPNOTSUPP;
++
++ return dvops->get_log_trace_switch(context, (uint8_t *)switch_status);
++}
++
++int zxdh_set_log_trace_switch(struct ibv_context *context,
++ enum switch_status switch_status)
++{
++ struct zxdh_uvcontext_ops *dvops = to_zxdhtx(context)->cxt_ops;
++
++ if (!dvops || !dvops->set_log_trace_switch)
++ return -EOPNOTSUPP;
++
++ return dvops->set_log_trace_switch(context, switch_status);
++}
++
++void add_private_ops(struct zxdh_uvcontext *iwvctx)
++{
++ iwvctx->cxt_ops = &zxdh_ctx_ops;
++}
+diff --git a/providers/zrdma/private_verbs_cmd.h b/providers/zrdma/private_verbs_cmd.h
+new file mode 100644
+index 0000000..f8d9534
+--- /dev/null
++++ b/providers/zrdma/private_verbs_cmd.h
+@@ -0,0 +1,24 @@
++/* SPDX-License-Identifier: GPL-2.0 or Linux-OpenIB */
++/* Copyright (c) 2024 ZTE Corporation. All rights reserved. */
++#ifndef ZXDH_RDMA_PRIVATE_VERBS_CMD_H
++#define ZXDH_RDMA_PRIVATE_VERBS_CMD_H
++
++#include "umain.h"
++#include "zxdh_dv.h"
++
++struct zxdh_uvcontext_ops {
++ int (*modify_qp_udp_sport)(struct ibv_context *ibctx,
++ uint16_t udp_sport, uint32_t qpn);
++ int (*set_log_trace_switch)(struct ibv_context *ibctx,
++ uint8_t switch_status);
++ int (*get_log_trace_switch)(struct ibv_context *ibctx,
++ uint8_t *switch_status);
++ int (*query_qpc)(struct ibv_qp *qp, struct zxdh_rdma_qpc *qpc);
++ int (*modify_qpc)(struct ibv_qp *qp, struct zxdh_rdma_qpc *qpc,
++ uint64_t qpc_mask);
++ int (*reset_qp)(struct ibv_qp *qp, uint64_t opcode);
++};
++
++void add_private_ops(struct zxdh_uvcontext *iwvctx);
++
++#endif
+diff --git a/providers/zrdma/status.h b/providers/zrdma/status.h
+new file mode 100644
+index 0000000..d9e9f04
+--- /dev/null
++++ b/providers/zrdma/status.h
+@@ -0,0 +1,75 @@
++/* SPDX-License-Identifier: GPL-2.0 or Linux-OpenIB */
++/* Copyright (c) 2024 ZTE Corporation. All rights reserved. */
++#ifndef ZXDH_STATUS_H
++#define ZXDH_STATUS_H
++
++/* Error Codes */
++enum zxdh_status_code {
++ ZXDH_SUCCESS = 0,
++ ZXDH_ERR_NVM = -1,
++ ZXDH_ERR_NVM_CHECKSUM = -2,
++ ZXDH_ERR_CFG = -4,
++ ZXDH_ERR_PARAM = -5,
++ ZXDH_ERR_DEVICE_NOT_SUPPORTED = -6,
++ ZXDH_ERR_RESET_FAILED = -7,
++ ZXDH_ERR_SWFW_SYNC = -8,
++ ZXDH_ERR_NO_MEMORY = -9,
++ ZXDH_ERR_BAD_PTR = -10,
++ ZXDH_ERR_INVALID_PD_ID = -11,
++ ZXDH_ERR_INVALID_QP_ID = -12,
++ ZXDH_ERR_INVALID_CQ_ID = -13,
++ ZXDH_ERR_INVALID_CEQ_ID = -14,
++ ZXDH_ERR_INVALID_AEQ_ID = -15,
++ ZXDH_ERR_INVALID_SIZE = -16,
++ ZXDH_ERR_INVALID_ARP_INDEX = -17,
++ ZXDH_ERR_INVALID_FPM_FUNC_ID = -18,
++ ZXDH_ERR_QP_INVALID_MSG_SIZE = -19,
++ ZXDH_ERR_QP_TOOMANY_WRS_POSTED = -20,
++ ZXDH_ERR_INVALID_FRAG_COUNT = -21,
++ ZXDH_ERR_Q_EMPTY = -22,
++ ZXDH_ERR_INVALID_ALIGNMENT = -23,
++ ZXDH_ERR_FLUSHED_Q = -24,
++ ZXDH_ERR_INVALID_PUSH_PAGE_INDEX = -25,
++ ZXDH_ERR_INVALID_INLINE_DATA_SIZE = -26,
++ ZXDH_ERR_TIMEOUT = -27,
++ ZXDH_ERR_OPCODE_MISMATCH = -28,
++ ZXDH_ERR_CQP_COMPL_ERROR = -29,
++ ZXDH_ERR_INVALID_VF_ID = -30,
++ ZXDH_ERR_INVALID_HMCFN_ID = -31,
++ ZXDH_ERR_BACKING_PAGE_ERROR = -32,
++ ZXDH_ERR_NO_PBLCHUNKS_AVAILABLE = -33,
++ ZXDH_ERR_INVALID_PBLE_INDEX = -34,
++ ZXDH_ERR_INVALID_SD_INDEX = -35,
++ ZXDH_ERR_INVALID_PAGE_DESC_INDEX = -36,
++ ZXDH_ERR_INVALID_SD_TYPE = -37,
++ ZXDH_ERR_MEMCPY_FAILED = -38,
++ ZXDH_ERR_INVALID_HMC_OBJ_INDEX = -39,
++ ZXDH_ERR_INVALID_HMC_OBJ_COUNT = -40,
++ ZXDH_ERR_BUF_TOO_SHORT = -43,
++ ZXDH_ERR_BAD_IWARP_CQE = -44,
++ ZXDH_ERR_NVM_BLANK_MODE = -45,
++ ZXDH_ERR_NOT_IMPL = -46,
++ ZXDH_ERR_PE_DOORBELL_NOT_ENA = -47,
++ ZXDH_ERR_NOT_READY = -48,
++ ZXDH_NOT_SUPPORTED = -49,
++ ZXDH_ERR_FIRMWARE_API_VER = -50,
++ ZXDH_ERR_RING_FULL = -51,
++ ZXDH_ERR_MPA_CRC = -61,
++ ZXDH_ERR_NO_TXBUFS = -62,
++ ZXDH_ERR_SEQ_NUM = -63,
++ ZXDH_ERR_LIST_EMPTY = -64,
++ ZXDH_ERR_INVALID_MAC_ADDR = -65,
++ ZXDH_ERR_BAD_STAG = -66,
++ ZXDH_ERR_CQ_COMPL_ERROR = -67,
++ ZXDH_ERR_Q_DESTROYED = -68,
++ ZXDH_ERR_INVALID_FEAT_CNT = -69,
++ ZXDH_ERR_REG_CQ_FULL = -70,
++ ZXDH_ERR_VF_MSG_ERROR = -71,
++ ZXDH_ERR_NO_INTR = -72,
++ ZXDH_ERR_REG_QSET = -73,
++ ZXDH_ERR_FEATURES_OP = -74,
++ ZXDH_ERR_INVALID_FRAG_LEN = -75,
++ ZXDH_ERR_RETRY_ACK_ERR = -76,
++ ZXDH_ERR_RETRY_ACK_NOT_EXCEED_ERR = -77,
++};
++#endif /* ZXDH_STATUS_H */
+diff --git a/providers/zrdma/uk.c b/providers/zrdma/uk.c
+new file mode 100644
+index 0000000..fbf8348
+--- /dev/null
++++ b/providers/zrdma/uk.c
+@@ -0,0 +1,2616 @@
++// SPDX-License-Identifier: GPL-2.0 or Linux-OpenIB
++/* Copyright (c) 2024 ZTE Corporation. All rights reserved. */
++#include "osdep.h"
++#include "status.h"
++#include "defs.h"
++#include "user.h"
++#include "zxdh.h"
++#include "umain.h"
++#include
++#include
++#include "private_verbs_cmd.h"
++#include
++#include
++#include
++#define ERROR_CODE_VALUE 65
++
++static void qp_tx_psn_add(__u32 *x, __u32 y, __u16 mtu)
++{
++ if (y == 0) {
++ *x = (*x + 1) & 0xffffff;
++ return;
++ }
++ *x = (*x + ((y % mtu) ? (y / mtu + 1) : y / mtu)) & 0xffffff;
++}
++
++static int zxdh_get_write_imm_split_switch(void)
++{
++ char *env;
++ int zxdh_write_imm_split_switch;
++
++ env = getenv("ZXDH_WRITE_IMM_SPILT_ENABLE");
++ zxdh_write_imm_split_switch = (env != NULL) ? atoi(env) : 0;
++ return zxdh_write_imm_split_switch;
++}
++
++/**
++ * zxdh_set_fragment - set fragment in wqe
++ * @wqe: wqe for setting fragment
++ * @offset: offset value
++ * @sge: sge length and stag
++ * @valid: The wqe valid
++ */
++static void zxdh_set_fragment(__le64 *wqe, __u32 offset, struct zxdh_sge *sge,
++ __u8 valid)
++{
++ if (sge) {
++ set_64bit_val(wqe, offset + 8,
++ FIELD_PREP(ZXDHQPSQ_FRAG_TO, sge->tag_off));
++ set_64bit_val(wqe, offset,
++ FIELD_PREP(ZXDHQPSQ_VALID, valid) |
++ FIELD_PREP(ZXDHQPSQ_FRAG_LEN, sge->len) |
++ FIELD_PREP(ZXDHQPSQ_FRAG_STAG,
++ sge->stag));
++ } else {
++ set_64bit_val(wqe, offset + 8, 0);
++ set_64bit_val(wqe, offset, FIELD_PREP(ZXDHQPSQ_VALID, valid));
++ }
++}
++
++/**
++ * zxdh_nop_1 - insert a NOP wqe
++ * @qp: hw qp ptr
++ */
++static enum zxdh_status_code zxdh_nop_1(struct zxdh_qp_uk *qp)
++{
++ __u64 hdr;
++ __le64 *wqe;
++ __u32 wqe_idx;
++ bool signaled = false;
++
++ if (!qp->sq_ring.head)
++ return ZXDH_ERR_PARAM;
++
++ wqe_idx = ZXDH_RING_CURRENT_HEAD(qp->sq_ring);
++ wqe = qp->sq_base[wqe_idx].elem;
++
++ qp->sq_wrtrk_array[wqe_idx].quanta = ZXDH_QP_WQE_MIN_QUANTA;
++
++ set_64bit_val(wqe, 8, 0);
++ set_64bit_val(wqe, 16, 0);
++ set_64bit_val(wqe, 24, 0);
++
++ hdr = FIELD_PREP(ZXDHQPSQ_OPCODE, ZXDH_OP_TYPE_NOP) |
++ FIELD_PREP(ZXDHQPSQ_SIGCOMPL, signaled) |
++ FIELD_PREP(ZXDHQPSQ_VALID, qp->swqe_polarity);
++
++ /* make sure WQE is written before valid bit is set */
++ udma_to_device_barrier();
++
++ set_64bit_val(wqe, 0, hdr);
++
++ return 0;
++}
++
++/**
++ * zxdh_clr_wqes - clear next 128 sq entries
++ * @qp: hw qp ptr
++ * @qp_wqe_idx: wqe_idx
++ */
++void zxdh_clr_wqes(struct zxdh_qp_uk *qp, __u32 qp_wqe_idx)
++{
++ __le64 *wqe;
++ __u32 wqe_idx;
++
++ if (!(qp_wqe_idx & 0x7F)) {
++ wqe_idx = (qp_wqe_idx + 128) % qp->sq_ring.size;
++ wqe = qp->sq_base[wqe_idx].elem;
++ if (wqe_idx)
++ memset(wqe, qp->swqe_polarity ? 0 : 0xFF, 0x1000);
++ else
++ memset(wqe, qp->swqe_polarity ? 0xFF : 0, 0x1000);
++ }
++}
++
++/**
++ * zxdh_uk_qp_post_wr - ring doorbell
++ * @qp: hw qp ptr
++ */
++void zxdh_uk_qp_post_wr(struct zxdh_qp_uk *qp)
++{
++ /* valid bit is written before ringing doorbell */
++ udma_to_device_barrier();
++
++ db_wr32(qp->qp_id, qp->wqe_alloc_db);
++ qp->initial_ring.head = qp->sq_ring.head;
++}
++
++/**
++ * zxdh_uk_qp_set_shadow_area - fill SW_RQ_Head
++ * @qp: hw qp ptr
++ */
++void zxdh_uk_qp_set_shadow_area(struct zxdh_qp_uk *qp)
++{
++ __u8 polarity = 0;
++
++ polarity = ((ZXDH_RING_CURRENT_HEAD(qp->rq_ring) == 0) ?
++ !qp->rwqe_polarity :
++ qp->rwqe_polarity);
++ set_64bit_val(qp->shadow_area, 0,
++ FIELD_PREP(ZXDHQPDBSA_RQ_POLARITY, polarity) |
++ FIELD_PREP(ZXDHQPDBSA_RQ_SW_HEAD,
++ ZXDH_RING_CURRENT_HEAD(qp->rq_ring)));
++}
++
++/**
++ * zxdh_qp_ring_push_db - ring qp doorbell
++ * @qp: hw qp ptr
++ * @wqe_idx: wqe index
++ */
++static void zxdh_qp_ring_push_db(struct zxdh_qp_uk *qp, __u32 wqe_idx)
++{
++ set_32bit_val(qp->push_db, 0,
++ FIELD_PREP(ZXDH_WQEALLOC_WQE_DESC_INDEX, wqe_idx >> 3) |
++ qp->qp_id);
++ qp->initial_ring.head = qp->sq_ring.head;
++ qp->push_mode = true;
++ qp->push_dropped = false;
++}
++
++void zxdh_qp_push_wqe(struct zxdh_qp_uk *qp, __le64 *wqe, __u16 quanta,
++ __u32 wqe_idx, bool post_sq)
++{
++ __le64 *push;
++
++ if (ZXDH_RING_CURRENT_HEAD(qp->initial_ring) !=
++ ZXDH_RING_CURRENT_TAIL(qp->sq_ring) &&
++ !qp->push_mode) {
++ if (post_sq)
++ zxdh_uk_qp_post_wr(qp);
++ } else {
++ push = (__le64 *)((uintptr_t)qp->push_wqe +
++ (wqe_idx & 0x7) * 0x20);
++ memcpy(push, wqe, quanta * ZXDH_QP_WQE_MIN_SIZE);
++ zxdh_qp_ring_push_db(qp, wqe_idx);
++ }
++}
++
++/**
++ * zxdh_qp_get_next_send_wqe - pad with NOP if needed, return where next WR should go
++ * @qp: hw qp ptr
++ * @wqe_idx: return wqe index
++ * @quanta: size of WR in quanta
++ * @total_size: size of WR in bytes
++ * @info: info on WR
++ */
++__le64 *zxdh_qp_get_next_send_wqe(struct zxdh_qp_uk *qp, __u32 *wqe_idx,
++ __u16 quanta, __u32 total_size,
++ struct zxdh_post_sq_info *info)
++{
++ __le64 *wqe;
++ __u16 avail_quanta;
++ __u16 i;
++
++ avail_quanta = ZXDH_MAX_SQ_WQES_PER_PAGE -
++ (ZXDH_RING_CURRENT_HEAD(qp->sq_ring) %
++ ZXDH_MAX_SQ_WQES_PER_PAGE);
++ if (quanta <= avail_quanta) {
++ /* WR fits in current chunk */
++ if (quanta > ZXDH_SQ_RING_FREE_QUANTA(qp->sq_ring))
++ return NULL;
++ } else {
++ /* Need to pad with NOP */
++ if (quanta + avail_quanta >
++ ZXDH_SQ_RING_FREE_QUANTA(qp->sq_ring))
++ return NULL;
++
++ for (i = 0; i < avail_quanta; i++) {
++ zxdh_nop_1(qp);
++ ZXDH_RING_MOVE_HEAD_NOCHECK(qp->sq_ring);
++ }
++ }
++
++ *wqe_idx = ZXDH_RING_CURRENT_HEAD(qp->sq_ring);
++ if (!*wqe_idx)
++ qp->swqe_polarity = !qp->swqe_polarity;
++
++ ZXDH_RING_MOVE_HEAD_BY_COUNT_NOCHECK(qp->sq_ring, quanta);
++
++ wqe = qp->sq_base[*wqe_idx].elem;
++ qp->sq_wrtrk_array[*wqe_idx].wrid = info->wr_id;
++ qp->sq_wrtrk_array[*wqe_idx].wr_len = total_size;
++ qp->sq_wrtrk_array[*wqe_idx].quanta = quanta;
++
++ return wqe;
++}
++
++/**
++ * zxdh_qp_get_next_recv_wqe - get next qp's rcv wqe
++ * @qp: hw qp ptr
++ * @wqe_idx: return wqe index
++ */
++__le64 *zxdh_qp_get_next_recv_wqe(struct zxdh_qp_uk *qp, __u32 *wqe_idx)
++{
++ __le64 *wqe;
++ enum zxdh_status_code ret_code;
++
++ if (ZXDH_RING_FULL_ERR(qp->rq_ring))
++ return NULL;
++
++ ZXDH_ATOMIC_RING_MOVE_HEAD(qp->rq_ring, *wqe_idx, ret_code);
++ if (ret_code)
++ return NULL;
++
++ if (!*wqe_idx)
++ qp->rwqe_polarity = !qp->rwqe_polarity;
++ /* rq_wqe_size_multiplier is no of 16 byte quanta in one rq wqe */
++ wqe = qp->rq_base[*wqe_idx * qp->rq_wqe_size_multiplier].elem;
++
++ return wqe;
++}
++
++static enum zxdh_status_code
++zxdh_post_rdma_write(struct zxdh_qp_uk *qp, struct zxdh_post_sq_info *info,
++ bool post_sq, __u32 total_size)
++{
++ enum zxdh_status_code ret_code;
++ struct zxdh_rdma_write *op_info;
++ __u32 i, byte_off = 0;
++ __u32 frag_cnt, addl_frag_cnt;
++ __le64 *wqe;
++ __u32 wqe_idx;
++ __u16 quanta;
++ __u64 hdr;
++ bool read_fence = false;
++ bool imm_data_flag;
++
++ op_info = &info->op.rdma_write;
++ imm_data_flag = info->imm_data_valid ? 1 : 0;
++ read_fence |= info->read_fence;
++
++ if (imm_data_flag)
++ frag_cnt =
++ op_info->num_lo_sges ? (op_info->num_lo_sges + 1) : 2;
++ else
++ frag_cnt = op_info->num_lo_sges;
++ addl_frag_cnt =
++ op_info->num_lo_sges > 1 ? (op_info->num_lo_sges - 1) : 0;
++
++ ret_code = zxdh_fragcnt_to_quanta_sq(frag_cnt, &quanta);
++ if (ret_code)
++ return ret_code;
++
++ wqe = zxdh_qp_get_next_send_wqe(qp, &wqe_idx, quanta, total_size, info);
++ if (!wqe)
++ return ZXDH_ERR_QP_TOOMANY_WRS_POSTED;
++
++ zxdh_clr_wqes(qp, wqe_idx);
++
++ if (op_info->num_lo_sges) {
++ set_64bit_val(
++ wqe, 16,
++ FIELD_PREP(ZXDHQPSQ_FIRST_FRAG_VALID,
++ op_info->lo_sg_list->len ==
++ ZXDH_MAX_SQ_PAYLOAD_SIZE ?
++ 1 :
++ 0) |
++ FIELD_PREP(ZXDHQPSQ_FIRST_FRAG_LEN,
++ op_info->lo_sg_list->len) |
++ FIELD_PREP(ZXDHQPSQ_FIRST_FRAG_STAG,
++ op_info->lo_sg_list->stag));
++ set_64bit_val(wqe, 8,
++ FIELD_PREP(ZXDHQPSQ_FRAG_TO,
++ op_info->lo_sg_list->tag_off));
++ } else {
++ /*if zero sge,post a special sge with zero lenth*/
++ set_64bit_val(wqe, 16,
++ FIELD_PREP(ZXDHQPSQ_FIRST_FRAG_VALID, 0) |
++ FIELD_PREP(ZXDHQPSQ_FIRST_FRAG_LEN, 0) |
++ FIELD_PREP(ZXDHQPSQ_FIRST_FRAG_STAG,
++ 0x100));
++ set_64bit_val(wqe, 8, FIELD_PREP(ZXDHQPSQ_FRAG_TO, 0));
++ }
++
++ if (imm_data_flag) {
++ byte_off = ZXDH_SQ_WQE_BYTESIZE + ZXDH_QP_FRAG_BYTESIZE;
++ if (op_info->num_lo_sges > 1) {
++ qp->wqe_ops.iw_set_fragment(wqe, byte_off,
++ &op_info->lo_sg_list[1],
++ qp->swqe_polarity);
++ byte_off += ZXDH_QP_FRAG_BYTESIZE;
++ }
++ set_64bit_val(
++ wqe, ZXDH_SQ_WQE_BYTESIZE,
++ FIELD_PREP(ZXDHQPSQ_IMMDATA_VALID, qp->swqe_polarity) |
++ FIELD_PREP(ZXDHQPSQ_IMMDATA, info->imm_data));
++ i = 2;
++ if (i < op_info->num_lo_sges) {
++ for (byte_off = ZXDH_SQ_WQE_BYTESIZE +
++ 2 * ZXDH_QP_FRAG_BYTESIZE;
++ i < op_info->num_lo_sges; i += 2) {
++ if (i == addl_frag_cnt) {
++ qp->wqe_ops.iw_set_fragment(
++ wqe, byte_off,
++ &op_info->lo_sg_list[i],
++ qp->swqe_polarity);
++ byte_off += ZXDH_QP_FRAG_BYTESIZE;
++ break;
++ }
++ byte_off += ZXDH_QP_FRAG_BYTESIZE;
++ qp->wqe_ops.iw_set_fragment(
++ wqe, byte_off,
++ &op_info->lo_sg_list[i + 1],
++ qp->swqe_polarity);
++ byte_off -= ZXDH_QP_FRAG_BYTESIZE;
++ qp->wqe_ops.iw_set_fragment(
++ wqe, byte_off, &op_info->lo_sg_list[i],
++ qp->swqe_polarity);
++ byte_off += 2 * ZXDH_QP_FRAG_BYTESIZE;
++ }
++ }
++ } else {
++ i = 1;
++ for (byte_off = ZXDH_SQ_WQE_BYTESIZE; i < op_info->num_lo_sges;
++ i += 2) {
++ if (i == addl_frag_cnt) {
++ qp->wqe_ops.iw_set_fragment(
++ wqe, byte_off, &op_info->lo_sg_list[i],
++ qp->swqe_polarity);
++ byte_off += ZXDH_QP_FRAG_BYTESIZE;
++ break;
++ }
++ byte_off += ZXDH_QP_FRAG_BYTESIZE;
++ qp->wqe_ops.iw_set_fragment(wqe, byte_off,
++ &op_info->lo_sg_list[i + 1],
++ qp->swqe_polarity);
++ byte_off -= ZXDH_QP_FRAG_BYTESIZE;
++ qp->wqe_ops.iw_set_fragment(wqe, byte_off,
++ &op_info->lo_sg_list[i],
++ qp->swqe_polarity);
++ byte_off += 2 * ZXDH_QP_FRAG_BYTESIZE;
++ }
++ }
++ /* if not an odd number set valid bit in next fragment */
++ if (!(frag_cnt & 0x01) && frag_cnt) {
++ qp->wqe_ops.iw_set_fragment(wqe, byte_off, NULL,
++ qp->swqe_polarity);
++ }
++
++ hdr = FIELD_PREP(ZXDHQPSQ_VALID, qp->swqe_polarity) |
++ FIELD_PREP(ZXDHQPSQ_OPCODE, info->op_type) |
++ FIELD_PREP(ZXDHQPSQ_SIGCOMPL, info->signaled) |
++ FIELD_PREP(ZXDHQPSQ_LOCALFENCE, info->local_fence) |
++ FIELD_PREP(ZXDHQPSQ_READFENCE, read_fence) |
++ FIELD_PREP(ZXDHQPSQ_SOLICITED, info->solicited) |
++ FIELD_PREP(ZXDHQPSQ_IMMDATAFLAG, imm_data_flag) |
++ FIELD_PREP(ZXDHQPSQ_ADDFRAGCNT, addl_frag_cnt) |
++ FIELD_PREP(ZXDHQPSQ_REMSTAG, op_info->rem_addr.stag);
++ set_64bit_val(wqe, 24,
++ FIELD_PREP(ZXDHQPSQ_FRAG_TO, op_info->rem_addr.tag_off));
++
++ udma_to_device_barrier(); /* make sure WQE is populated before valid bit is set */
++
++ set_64bit_val(wqe, 0, hdr);
++ if (post_sq)
++ zxdh_uk_qp_post_wr(qp);
++ qp_tx_psn_add(&qp->next_psn, total_size, qp->mtu);
++ return 0;
++}
++
++static void split_write_imm_wqe(struct zxdh_qp_uk *qp,
++ struct zxdh_post_sq_info *info,
++ struct zxdh_post_sq_info *split_part1_info,
++ struct zxdh_post_sq_info *split_part2_info)
++{
++ __u32 total_size = 0;
++ struct zxdh_rdma_write *op_info;
++
++ op_info = &info->op.rdma_write;
++ total_size = op_info->rem_addr.len;
++ split_part1_info->op.rdma_write.lo_sg_list =
++ info->op.rdma_write.lo_sg_list;
++ split_part2_info->op.rdma_write.lo_sg_list = NULL;
++
++ split_part1_info->op_type = ZXDH_OP_TYPE_WRITE;
++ split_part1_info->signaled = false;
++ split_part1_info->local_fence = info->local_fence;
++ split_part1_info->read_fence = info->read_fence;
++ split_part1_info->solicited = info->solicited;
++ split_part1_info->imm_data_valid = false;
++ split_part1_info->wr_id = info->wr_id;
++ split_part1_info->op.rdma_write.num_lo_sges =
++ info->op.rdma_write.num_lo_sges;
++ split_part1_info->op.rdma_write.rem_addr.stag = op_info->rem_addr.stag;
++ split_part1_info->op.rdma_write.rem_addr.tag_off =
++ op_info->rem_addr.tag_off;
++
++ split_part2_info->op_type = info->op_type;
++ split_part2_info->signaled = info->signaled;
++ split_part2_info->local_fence = info->local_fence;
++ split_part2_info->read_fence = info->read_fence;
++ split_part2_info->solicited = info->solicited;
++ split_part2_info->imm_data_valid = info->imm_data_valid;
++ split_part2_info->wr_id = info->wr_id;
++ split_part2_info->imm_data = info->imm_data;
++ split_part2_info->op.rdma_write.num_lo_sges = 0;
++ split_part2_info->op.rdma_write.rem_addr.stag = op_info->rem_addr.stag;
++ split_part2_info->op.rdma_write.rem_addr.tag_off =
++ op_info->rem_addr.tag_off + total_size;
++}
++
++/**
++ * zxdh_uk_rdma_write - rdma write operation
++ * @qp: hw qp ptr
++ * @info: post sq information
++ * @post_sq: flag to post sq
++ */
++enum zxdh_status_code zxdh_uk_rdma_write(struct zxdh_qp_uk *qp,
++ struct zxdh_post_sq_info *info,
++ bool post_sq)
++{
++ struct zxdh_post_sq_info split_part1_info = { 0 };
++ struct zxdh_post_sq_info split_part2_info = { 0 };
++ struct zxdh_rdma_write *op_info;
++ __u32 i;
++ __u32 total_size = 0;
++ enum zxdh_status_code ret_code;
++ bool imm_data_flag = info->imm_data_valid ? 1 : 0;
++ int zxdh_write_imm_split_switch;
++
++ op_info = &info->op.rdma_write;
++ if (op_info->num_lo_sges > qp->max_sq_frag_cnt)
++ return ZXDH_ERR_INVALID_FRAG_COUNT;
++
++ for (i = 0; i < op_info->num_lo_sges; i++) {
++ total_size += op_info->lo_sg_list[i].len;
++ if (0 != i && 0 == op_info->lo_sg_list[i].len)
++ return ZXDH_ERR_INVALID_FRAG_LEN;
++ }
++
++ if (total_size > ZXDH_MAX_SQ_PAYLOAD_SIZE)
++ return ZXDH_ERR_QP_INVALID_MSG_SIZE;
++
++ zxdh_write_imm_split_switch = zxdh_get_write_imm_split_switch();
++
++ op_info->rem_addr.len = total_size;
++ if (zxdh_write_imm_split_switch == 0) {
++ ret_code = zxdh_post_rdma_write(qp, info, post_sq, total_size);
++ if (ret_code)
++ return ret_code;
++ } else {
++ if (imm_data_flag && total_size > qp->mtu) {
++ split_write_imm_wqe(qp, info, &split_part1_info,
++ &split_part2_info);
++
++ ret_code = zxdh_post_rdma_write(qp, &split_part1_info,
++ post_sq, total_size);
++ if (ret_code)
++ return ret_code;
++ ret_code = zxdh_post_rdma_write(qp, &split_part2_info,
++ post_sq, 0);
++ if (ret_code)
++ return ret_code;
++ } else {
++ ret_code = zxdh_post_rdma_write(qp, info, post_sq,
++ total_size);
++ if (ret_code)
++ return ret_code;
++ }
++ }
++
++ return 0;
++}
++
++static void split_two_part_info(struct zxdh_qp_uk *qp,
++ struct zxdh_post_sq_info *info, __u32 ori_psn,
++ __u32 pre_cal_psn,
++ struct zxdh_post_sq_info *split_part1_info,
++ struct zxdh_post_sq_info *split_part2_info)
++{
++ __u32 total_size = 0;
++ __u32 remain_size = 0;
++ __u32 split_size = 0;
++ struct zxdh_rdma_read *op_info;
++
++ op_info = &info->op.rdma_read;
++ total_size = op_info->rem_addr.len;
++ split_part1_info->op.rdma_read.lo_sg_list = qp->split_sg_list;
++ split_part2_info->op.rdma_read.lo_sg_list =
++ qp->split_sg_list + op_info->num_lo_sges;
++
++ memset(split_part1_info->op.rdma_read.lo_sg_list, 0,
++ 2 * op_info->num_lo_sges * sizeof(struct zxdh_sge));
++ if (pre_cal_psn < ori_psn && pre_cal_psn != 0)
++ remain_size = (0xffffff - ori_psn + 1) * qp->mtu;
++ else
++ remain_size = (0x800000 - ori_psn) * qp->mtu;
++
++ split_size = total_size - remain_size;
++
++ split_part1_info->signaled = false;
++ split_part1_info->local_fence = info->local_fence;
++ split_part1_info->read_fence = info->read_fence;
++ split_part1_info->solicited = false;
++ split_part1_info->wr_id = info->wr_id;
++ split_part1_info->op.rdma_read.rem_addr.stag = op_info->rem_addr.stag;
++ split_part1_info->op.rdma_read.rem_addr.tag_off =
++ op_info->rem_addr.tag_off;
++
++ split_part2_info->signaled = info->signaled;
++ split_part2_info->local_fence = info->local_fence;
++ split_part2_info->read_fence = info->read_fence;
++ split_part2_info->solicited = info->solicited;
++ split_part2_info->wr_id = info->wr_id;
++ split_part2_info->op.rdma_read.rem_addr.stag = op_info->rem_addr.stag;
++ split_part2_info->op.rdma_read.rem_addr.tag_off =
++ op_info->rem_addr.tag_off + remain_size;
++
++ for (int i = 0; i < op_info->num_lo_sges; i++) {
++ if (op_info->lo_sg_list[i].len +
++ split_part1_info->op.rdma_read.rem_addr.len <
++ remain_size) {
++ split_part1_info->op.rdma_read.rem_addr.len +=
++ op_info->lo_sg_list[i].len;
++ split_part1_info->op.rdma_read.num_lo_sges += 1;
++ memcpy(split_part1_info->op.rdma_read.lo_sg_list + i,
++ op_info->lo_sg_list + i,
++ sizeof(struct zxdh_sge));
++ continue;
++ } else if (op_info->lo_sg_list[i].len +
++ split_part1_info->op.rdma_read.rem_addr.len ==
++ remain_size) {
++ split_part1_info->op.rdma_read.rem_addr.len +=
++ op_info->lo_sg_list[i].len;
++ split_part1_info->op.rdma_read.num_lo_sges += 1;
++ memcpy(split_part1_info->op.rdma_read.lo_sg_list + i,
++ op_info->lo_sg_list + i,
++ sizeof(struct zxdh_sge));
++ split_part2_info->op.rdma_read.rem_addr.len =
++ split_size;
++ split_part2_info->op.rdma_read.num_lo_sges =
++ op_info->num_lo_sges -
++ split_part1_info->op.rdma_read.num_lo_sges;
++ memcpy(split_part2_info->op.rdma_read.lo_sg_list,
++ op_info->lo_sg_list + i + 1,
++ split_part2_info->op.rdma_read.num_lo_sges *
++ sizeof(struct zxdh_sge));
++ break;
++ }
++
++ split_part1_info->op.rdma_read.lo_sg_list[i].len =
++ remain_size -
++ split_part1_info->op.rdma_read.rem_addr.len;
++ split_part1_info->op.rdma_read.lo_sg_list[i].tag_off =
++ op_info->lo_sg_list[i].tag_off;
++ split_part1_info->op.rdma_read.lo_sg_list[i].stag =
++ op_info->lo_sg_list[i].stag;
++ split_part1_info->op.rdma_read.rem_addr.len = remain_size;
++ split_part1_info->op.rdma_read.num_lo_sges += 1;
++ split_part2_info->op.rdma_read.lo_sg_list[0].len =
++ op_info->lo_sg_list[i].len -
++ split_part1_info->op.rdma_read.lo_sg_list[i].len;
++ split_part2_info->op.rdma_read.lo_sg_list[0].tag_off =
++ op_info->lo_sg_list[i].tag_off +
++ split_part1_info->op.rdma_read.lo_sg_list[i].len;
++ split_part2_info->op.rdma_read.lo_sg_list[0].stag =
++ op_info->lo_sg_list[i].stag;
++ split_part2_info->op.rdma_read.rem_addr.len = split_size;
++ split_part2_info->op.rdma_read.num_lo_sges =
++ op_info->num_lo_sges -
++ split_part1_info->op.rdma_read.num_lo_sges + 1;
++ if (split_part2_info->op.rdma_read.num_lo_sges - 1 > 0) {
++ memcpy(split_part2_info->op.rdma_read.lo_sg_list + 1,
++ op_info->lo_sg_list + i + 1,
++ (split_part2_info->op.rdma_read.num_lo_sges -
++ 1) * sizeof(struct zxdh_sge));
++ }
++ break;
++ }
++}
++
++static enum zxdh_status_code zxdh_post_rdma_read(struct zxdh_qp_uk *qp,
++ struct zxdh_post_sq_info *info,
++ bool post_sq, __u32 total_size)
++{
++ enum zxdh_status_code ret_code;
++ struct zxdh_rdma_read *op_info;
++ __u32 i, byte_off = 0;
++ bool local_fence = false;
++ __u32 addl_frag_cnt;
++ __le64 *wqe;
++ __u32 wqe_idx;
++ __u16 quanta;
++ __u64 hdr;
++
++ op_info = &info->op.rdma_read;
++ ret_code = zxdh_fragcnt_to_quanta_sq(op_info->num_lo_sges, &quanta);
++ if (ret_code)
++ return ret_code;
++
++ wqe = zxdh_qp_get_next_send_wqe(qp, &wqe_idx, quanta, total_size, info);
++ if (!wqe)
++ return ZXDH_ERR_QP_TOOMANY_WRS_POSTED;
++
++ zxdh_clr_wqes(qp, wqe_idx);
++
++ addl_frag_cnt =
++ op_info->num_lo_sges > 1 ? (op_info->num_lo_sges - 1) : 0;
++ local_fence |= info->local_fence;
++
++ if (op_info->num_lo_sges) {
++ set_64bit_val(
++ wqe, 16,
++ FIELD_PREP(ZXDHQPSQ_FIRST_FRAG_VALID,
++ op_info->lo_sg_list->len ==
++ ZXDH_MAX_SQ_PAYLOAD_SIZE ?
++ 1 :
++ 0) |
++ FIELD_PREP(ZXDHQPSQ_FIRST_FRAG_LEN,
++ op_info->lo_sg_list->len) |
++ FIELD_PREP(ZXDHQPSQ_FIRST_FRAG_STAG,
++ op_info->lo_sg_list->stag));
++ set_64bit_val(wqe, 8,
++ FIELD_PREP(ZXDHQPSQ_FRAG_TO,
++ op_info->lo_sg_list->tag_off));
++ } else {
++ /*if zero sge,post a special sge with zero lenth*/
++ set_64bit_val(wqe, 16,
++ FIELD_PREP(ZXDHQPSQ_FIRST_FRAG_VALID, 0) |
++ FIELD_PREP(ZXDHQPSQ_FIRST_FRAG_LEN, 0) |
++ FIELD_PREP(ZXDHQPSQ_FIRST_FRAG_STAG,
++ 0x100));
++ set_64bit_val(wqe, 8, FIELD_PREP(ZXDHQPSQ_FRAG_TO, 0));
++ }
++
++ i = 1;
++ for (byte_off = ZXDH_SQ_WQE_BYTESIZE; i < op_info->num_lo_sges;
++ i += 2) {
++ if (i == addl_frag_cnt) {
++ qp->wqe_ops.iw_set_fragment(wqe, byte_off,
++ &op_info->lo_sg_list[i],
++ qp->swqe_polarity);
++ byte_off += ZXDH_QP_FRAG_BYTESIZE;
++ break;
++ }
++ byte_off += ZXDH_QP_FRAG_BYTESIZE;
++ qp->wqe_ops.iw_set_fragment(wqe, byte_off,
++ &op_info->lo_sg_list[i + 1],
++ qp->swqe_polarity);
++ byte_off -= ZXDH_QP_FRAG_BYTESIZE;
++ qp->wqe_ops.iw_set_fragment(wqe, byte_off,
++ &op_info->lo_sg_list[i],
++ qp->swqe_polarity);
++ byte_off += 2 * ZXDH_QP_FRAG_BYTESIZE;
++ }
++
++ /* if not an odd number set valid bit in next fragment */
++ if (!(op_info->num_lo_sges & 0x01) && op_info->num_lo_sges) {
++ qp->wqe_ops.iw_set_fragment(wqe, byte_off, NULL,
++ qp->swqe_polarity);
++ }
++
++ hdr = FIELD_PREP(ZXDHQPSQ_VALID, qp->swqe_polarity) |
++ FIELD_PREP(ZXDHQPSQ_OPCODE, ZXDH_OP_TYPE_READ) |
++ FIELD_PREP(ZXDHQPSQ_SIGCOMPL, info->signaled) |
++ FIELD_PREP(ZXDHQPSQ_LOCALFENCE, info->local_fence) |
++ FIELD_PREP(ZXDHQPSQ_READFENCE, info->read_fence) |
++ FIELD_PREP(ZXDHQPSQ_SOLICITED, info->solicited) |
++ FIELD_PREP(ZXDHQPSQ_ADDFRAGCNT, addl_frag_cnt) |
++ FIELD_PREP(ZXDHQPSQ_REMSTAG, op_info->rem_addr.stag);
++ set_64bit_val(wqe, 24,
++ FIELD_PREP(ZXDHQPSQ_FRAG_TO, op_info->rem_addr.tag_off));
++
++ udma_to_device_barrier(); /* make sure WQE is populated before valid bit is set */
++
++ set_64bit_val(wqe, 0, hdr);
++ if (post_sq)
++ zxdh_uk_qp_post_wr(qp);
++ return 0;
++}
++
++/**
++ * zxdh_uk_rdma_read - rdma read command
++ * @qp: hw qp ptr
++ * @info: post sq information
++ * @inv_stag: flag for inv_stag
++ * @post_sq: flag to post sq
++ */
++enum zxdh_status_code zxdh_uk_rdma_read(struct zxdh_qp_uk *qp,
++ struct zxdh_post_sq_info *info,
++ bool inv_stag, bool post_sq)
++{
++ struct zxdh_post_sq_info split_part1_info = { 0 };
++ struct zxdh_post_sq_info split_part2_info = { 0 };
++ struct zxdh_rdma_read *op_info;
++ enum zxdh_status_code ret_code;
++ __u32 i, total_size = 0, pre_cal_psn = 0;
++
++ op_info = &info->op.rdma_read;
++ if (qp->max_sq_frag_cnt < op_info->num_lo_sges)
++ return ZXDH_ERR_INVALID_FRAG_COUNT;
++
++ for (i = 0; i < op_info->num_lo_sges; i++) {
++ total_size += op_info->lo_sg_list[i].len;
++ if (0 != i && 0 == op_info->lo_sg_list[i].len)
++ return ZXDH_ERR_INVALID_FRAG_LEN;
++ }
++
++ if (total_size > ZXDH_MAX_SQ_PAYLOAD_SIZE)
++ return ZXDH_ERR_QP_INVALID_MSG_SIZE;
++ op_info->rem_addr.len = total_size;
++ pre_cal_psn = qp->next_psn;
++ qp_tx_psn_add(&pre_cal_psn, total_size, qp->mtu);
++ if (read_wqe_need_split(pre_cal_psn, qp->next_psn)) {
++ split_two_part_info(qp, info, qp->next_psn, pre_cal_psn,
++ &split_part1_info, &split_part2_info);
++ ret_code = zxdh_post_rdma_read(qp, &split_part1_info, post_sq,
++ total_size);
++ if (ret_code)
++ return ret_code;
++
++ qp_tx_psn_add(&qp->next_psn,
++ split_part1_info.op.rdma_read.rem_addr.len,
++ qp->mtu);
++ ret_code = zxdh_post_rdma_read(qp, &split_part2_info, post_sq,
++ total_size);
++ if (ret_code)
++ return ret_code;
++
++ qp_tx_psn_add(&qp->next_psn,
++ split_part2_info.op.rdma_read.rem_addr.len,
++ qp->mtu);
++ } else {
++ ret_code = zxdh_post_rdma_read(qp, info, post_sq, total_size);
++ if (ret_code)
++ return ret_code;
++
++ qp_tx_psn_add(&qp->next_psn, total_size, qp->mtu);
++ }
++ return 0;
++}
++
++/**
++ * zxdh_uk_rc_send - rdma send command
++ * @qp: hw qp ptr
++ * @info: post sq information
++ * @post_sq: flag to post sq
++ */
++enum zxdh_status_code zxdh_uk_rc_send(struct zxdh_qp_uk *qp,
++ struct zxdh_post_sq_info *info,
++ bool post_sq)
++{
++ __le64 *wqe;
++ struct zxdh_post_send *op_info;
++ __u64 hdr;
++ __u32 i, wqe_idx, total_size = 0, byte_off;
++ enum zxdh_status_code ret_code;
++ __u32 frag_cnt, addl_frag_cnt;
++ bool read_fence = false;
++ __u16 quanta;
++ bool imm_data_flag = info->imm_data_valid ? 1 : 0;
++
++ op_info = &info->op.send;
++ if (qp->max_sq_frag_cnt < op_info->num_sges)
++ return ZXDH_ERR_INVALID_FRAG_COUNT;
++
++ for (i = 0; i < op_info->num_sges; i++) {
++ total_size += op_info->sg_list[i].len;
++ if (0 != i && 0 == op_info->sg_list[i].len)
++ return ZXDH_ERR_INVALID_FRAG_LEN;
++ }
++
++ if (total_size > ZXDH_MAX_SQ_PAYLOAD_SIZE)
++ return ZXDH_ERR_QP_INVALID_MSG_SIZE;
++
++ if (imm_data_flag)
++ frag_cnt = op_info->num_sges ? (op_info->num_sges + 1) : 2;
++ else
++ frag_cnt = op_info->num_sges;
++ ret_code = zxdh_fragcnt_to_quanta_sq(frag_cnt, &quanta);
++ if (ret_code)
++ return ret_code;
++
++ wqe = zxdh_qp_get_next_send_wqe(qp, &wqe_idx, quanta, total_size, info);
++ if (!wqe)
++ return ZXDH_ERR_QP_TOOMANY_WRS_POSTED;
++
++ zxdh_clr_wqes(qp, wqe_idx);
++
++ read_fence |= info->read_fence;
++ addl_frag_cnt = op_info->num_sges > 1 ? (op_info->num_sges - 1) : 0;
++ if (op_info->num_sges) {
++ set_64bit_val(
++ wqe, 16,
++ FIELD_PREP(ZXDHQPSQ_FIRST_FRAG_VALID,
++ op_info->sg_list->len ==
++ ZXDH_MAX_SQ_PAYLOAD_SIZE ?
++ 1 :
++ 0) |
++ FIELD_PREP(ZXDHQPSQ_FIRST_FRAG_LEN,
++ op_info->sg_list->len) |
++ FIELD_PREP(ZXDHQPSQ_FIRST_FRAG_STAG,
++ op_info->sg_list->stag));
++ set_64bit_val(wqe, 8,
++ FIELD_PREP(ZXDHQPSQ_FRAG_TO,
++ op_info->sg_list->tag_off));
++ } else {
++ /*if zero sge,post a special sge with zero lenth*/
++ set_64bit_val(wqe, 16,
++ FIELD_PREP(ZXDHQPSQ_FIRST_FRAG_VALID, 0) |
++ FIELD_PREP(ZXDHQPSQ_FIRST_FRAG_LEN, 0) |
++ FIELD_PREP(ZXDHQPSQ_FIRST_FRAG_STAG,
++ 0x100));
++ set_64bit_val(wqe, 8, FIELD_PREP(ZXDHQPSQ_FRAG_TO, 0));
++ }
++
++ if (imm_data_flag) {
++ byte_off = ZXDH_SQ_WQE_BYTESIZE + ZXDH_QP_FRAG_BYTESIZE;
++ if (op_info->num_sges > 1) {
++ qp->wqe_ops.iw_set_fragment(wqe, byte_off,
++ &op_info->sg_list[1],
++ qp->swqe_polarity);
++ byte_off += ZXDH_QP_FRAG_BYTESIZE;
++ }
++ set_64bit_val(
++ wqe, ZXDH_SQ_WQE_BYTESIZE,
++ FIELD_PREP(ZXDHQPSQ_IMMDATA_VALID, qp->swqe_polarity) |
++ FIELD_PREP(ZXDHQPSQ_IMMDATA, info->imm_data));
++ i = 2;
++ if (i < op_info->num_sges) {
++ for (byte_off = ZXDH_SQ_WQE_BYTESIZE +
++ 2 * ZXDH_QP_FRAG_BYTESIZE;
++ i < op_info->num_sges; i += 2) {
++ if (i == addl_frag_cnt) {
++ qp->wqe_ops.iw_set_fragment(
++ wqe, byte_off,
++ &op_info->sg_list[i],
++ qp->swqe_polarity);
++ byte_off += ZXDH_QP_FRAG_BYTESIZE;
++ break;
++ }
++ byte_off += ZXDH_QP_FRAG_BYTESIZE;
++ qp->wqe_ops.iw_set_fragment(
++ wqe, byte_off, &op_info->sg_list[i + 1],
++ qp->swqe_polarity);
++ byte_off -= ZXDH_QP_FRAG_BYTESIZE;
++ qp->wqe_ops.iw_set_fragment(
++ wqe, byte_off, &op_info->sg_list[i],
++ qp->swqe_polarity);
++ byte_off += 2 * ZXDH_QP_FRAG_BYTESIZE;
++ }
++ }
++ } else {
++ i = 1;
++ for (byte_off = ZXDH_SQ_WQE_BYTESIZE; i < op_info->num_sges;
++ i += 2) {
++ if (i == addl_frag_cnt) {
++ qp->wqe_ops.iw_set_fragment(
++ wqe, byte_off, &op_info->sg_list[i],
++ qp->swqe_polarity);
++ byte_off += ZXDH_QP_FRAG_BYTESIZE;
++ break;
++ }
++ byte_off += ZXDH_QP_FRAG_BYTESIZE;
++ qp->wqe_ops.iw_set_fragment(wqe, byte_off,
++ &op_info->sg_list[i + 1],
++ qp->swqe_polarity);
++ byte_off -= ZXDH_QP_FRAG_BYTESIZE;
++ qp->wqe_ops.iw_set_fragment(wqe, byte_off,
++ &op_info->sg_list[i],
++ qp->swqe_polarity);
++ byte_off += 2 * ZXDH_QP_FRAG_BYTESIZE;
++ }
++ }
++
++ /* if not an odd number set valid bit in next fragment */
++ if (!(frag_cnt & 0x01) && frag_cnt) {
++ qp->wqe_ops.iw_set_fragment(wqe, byte_off, NULL,
++ qp->swqe_polarity);
++ }
++
++ hdr = FIELD_PREP(ZXDHQPSQ_VALID, qp->swqe_polarity) |
++ FIELD_PREP(ZXDHQPSQ_OPCODE, info->op_type) |
++ FIELD_PREP(ZXDHQPSQ_SIGCOMPL, info->signaled) |
++ FIELD_PREP(ZXDHQPSQ_LOCALFENCE, info->local_fence) |
++ FIELD_PREP(ZXDHQPSQ_READFENCE, read_fence) |
++ FIELD_PREP(ZXDHQPSQ_SOLICITED, info->solicited) |
++ FIELD_PREP(ZXDHQPSQ_IMMDATAFLAG, imm_data_flag) |
++ FIELD_PREP(ZXDHQPSQ_ADDFRAGCNT, addl_frag_cnt) |
++ FIELD_PREP(ZXDHQPSQ_REMSTAG, info->stag_to_inv);
++ set_64bit_val(wqe, 24,
++ FIELD_PREP(ZXDHQPSQ_INLINEDATAFLAG, 0) |
++ FIELD_PREP(ZXDHQPSQ_INLINEDATALEN, 0));
++
++ udma_to_device_barrier(); /* make sure WQE is populated before valid bit is set */
++
++ set_64bit_val(wqe, 0, hdr);
++ if (post_sq)
++ zxdh_uk_qp_post_wr(qp);
++ qp_tx_psn_add(&qp->next_psn, total_size, qp->mtu);
++
++ return 0;
++}
++
++/**
++ * zxdh_uk_ud_send - rdma send command
++ * @qp: hw qp ptr
++ * @info: post sq information
++ * @post_sq: flag to post sq
++ */
++enum zxdh_status_code zxdh_uk_ud_send(struct zxdh_qp_uk *qp,
++ struct zxdh_post_sq_info *info,
++ bool post_sq)
++{
++ __le64 *wqe_base;
++ __le64 *wqe_ex = NULL;
++ struct zxdh_post_send *op_info;
++ __u64 hdr;
++ __u32 i, wqe_idx, total_size = 0, byte_off;
++ enum zxdh_status_code ret_code;
++ __u32 frag_cnt, addl_frag_cnt;
++ bool read_fence = false;
++ __u16 quanta;
++ bool imm_data_flag = info->imm_data_valid ? 1 : 0;
++
++ op_info = &info->op.send;
++ if (qp->max_sq_frag_cnt < op_info->num_sges)
++ return ZXDH_ERR_INVALID_FRAG_COUNT;
++
++ for (i = 0; i < op_info->num_sges; i++) {
++ total_size += op_info->sg_list[i].len;
++ if (0 != i && 0 == op_info->sg_list[i].len)
++ return ZXDH_ERR_INVALID_FRAG_LEN;
++ }
++
++ if (total_size > ZXDH_MAX_SQ_PAYLOAD_SIZE)
++ return ZXDH_ERR_QP_INVALID_MSG_SIZE;
++
++ if (imm_data_flag)
++ frag_cnt = op_info->num_sges ? (op_info->num_sges + 1) : 2;
++ else
++ frag_cnt = op_info->num_sges;
++ ret_code = zxdh_fragcnt_to_quanta_sq(frag_cnt, &quanta);
++ if (ret_code)
++ return ret_code;
++
++ if (quanta > ZXDH_SQ_RING_FREE_QUANTA(qp->sq_ring))
++ return ZXDH_ERR_QP_TOOMANY_WRS_POSTED;
++
++ wqe_idx = ZXDH_RING_CURRENT_HEAD(qp->sq_ring);
++ if (!wqe_idx)
++ qp->swqe_polarity = !qp->swqe_polarity;
++
++ ZXDH_RING_MOVE_HEAD_BY_COUNT_NOCHECK(qp->sq_ring, quanta);
++
++ wqe_base = qp->sq_base[wqe_idx].elem;
++ qp->sq_wrtrk_array[wqe_idx].wrid = info->wr_id;
++ qp->sq_wrtrk_array[wqe_idx].wr_len = total_size;
++ qp->sq_wrtrk_array[wqe_idx].quanta = quanta;
++
++ zxdh_clr_wqes(qp, wqe_idx);
++
++ read_fence |= info->read_fence;
++ addl_frag_cnt = op_info->num_sges > 1 ? (op_info->num_sges - 1) : 0;
++ hdr = FIELD_PREP(ZXDHQPSQ_VALID, qp->swqe_polarity) |
++ FIELD_PREP(ZXDHQPSQ_OPCODE, info->op_type) |
++ FIELD_PREP(ZXDHQPSQ_SIGCOMPL, info->signaled) |
++ FIELD_PREP(ZXDHQPSQ_SOLICITED, info->solicited) |
++ FIELD_PREP(ZXDHQPSQ_IMMDATAFLAG, imm_data_flag) |
++ FIELD_PREP(ZXDHQPSQ_UD_INLINEDATAFLAG, 0) |
++ FIELD_PREP(ZXDHQPSQ_UD_INLINEDATALEN, 0) |
++ FIELD_PREP(ZXDHQPSQ_UD_ADDFRAGCNT, addl_frag_cnt) |
++ FIELD_PREP(ZXDHQPSQ_AHID, op_info->ah_id);
++
++ if (op_info->num_sges) {
++ set_64bit_val(
++ wqe_base, 16,
++ FIELD_PREP(ZXDHQPSQ_FIRST_FRAG_VALID,
++ op_info->sg_list->len ==
++ ZXDH_MAX_SQ_PAYLOAD_SIZE ?
++ 1 :
++ 0) |
++ FIELD_PREP(ZXDHQPSQ_FIRST_FRAG_LEN,
++ op_info->sg_list->len) |
++ FIELD_PREP(ZXDHQPSQ_FIRST_FRAG_STAG,
++ op_info->sg_list->stag));
++ set_64bit_val(wqe_base, 8,
++ FIELD_PREP(ZXDHQPSQ_FRAG_TO,
++ op_info->sg_list->tag_off));
++ } else {
++ /*if zero sge,post a special sge with zero lenth*/
++ set_64bit_val(wqe_base, 16,
++ FIELD_PREP(ZXDHQPSQ_FIRST_FRAG_VALID, 0) |
++ FIELD_PREP(ZXDHQPSQ_FIRST_FRAG_LEN, 0) |
++ FIELD_PREP(ZXDHQPSQ_FIRST_FRAG_STAG,
++ 0x100));
++ set_64bit_val(wqe_base, 8, FIELD_PREP(ZXDHQPSQ_FRAG_TO, 0));
++ }
++
++ if (imm_data_flag) {
++ wqe_idx = (wqe_idx + 1) % qp->sq_ring.size;
++ if (!wqe_idx)
++ qp->swqe_polarity = !qp->swqe_polarity;
++ wqe_ex = qp->sq_base[wqe_idx].elem;
++ if (op_info->num_sges > 1) {
++ qp->wqe_ops.iw_set_fragment(wqe_ex,
++ ZXDH_QP_FRAG_BYTESIZE,
++ &op_info->sg_list[1],
++ qp->swqe_polarity);
++ }
++ set_64bit_val(
++ wqe_ex, 0,
++ FIELD_PREP(ZXDHQPSQ_IMMDATA_VALID, qp->swqe_polarity) |
++ FIELD_PREP(ZXDHQPSQ_IMMDATA, info->imm_data));
++ i = 2;
++ for (byte_off = ZXDH_QP_FRAG_BYTESIZE; i < op_info->num_sges;
++ i += 2) {
++ if (!(i & 0x1)) {
++ wqe_idx = (wqe_idx + 1) % qp->sq_ring.size;
++ if (!wqe_idx)
++ qp->swqe_polarity = !qp->swqe_polarity;
++ wqe_ex = qp->sq_base[wqe_idx].elem;
++ }
++ if (i == addl_frag_cnt) {
++ qp->wqe_ops.iw_set_fragment(
++ wqe_ex, 0, &op_info->sg_list[i],
++ qp->swqe_polarity);
++ break;
++ }
++ qp->wqe_ops.iw_set_fragment(
++ wqe_ex, byte_off % ZXDH_SQ_WQE_BYTESIZE,
++ &op_info->sg_list[i + 1], qp->swqe_polarity);
++ byte_off -= ZXDH_QP_FRAG_BYTESIZE;
++ qp->wqe_ops.iw_set_fragment(
++ wqe_ex, byte_off % ZXDH_SQ_WQE_BYTESIZE,
++ &op_info->sg_list[i], qp->swqe_polarity);
++ byte_off += ZXDH_QP_FRAG_BYTESIZE;
++ }
++ } else {
++ i = 1;
++ for (byte_off = 0; i < op_info->num_sges; i += 2) {
++ if (i & 0x1) {
++ wqe_idx = (wqe_idx + 1) % qp->sq_ring.size;
++ if (!wqe_idx)
++ qp->swqe_polarity = !qp->swqe_polarity;
++ wqe_ex = qp->sq_base[wqe_idx].elem;
++ }
++ if (i == addl_frag_cnt) {
++ qp->wqe_ops.iw_set_fragment(
++ wqe_ex, 0, &op_info->sg_list[i],
++ qp->swqe_polarity);
++ break;
++ }
++ byte_off += ZXDH_QP_FRAG_BYTESIZE;
++ qp->wqe_ops.iw_set_fragment(
++ wqe_ex, byte_off % ZXDH_SQ_WQE_BYTESIZE,
++ &op_info->sg_list[i + 1], qp->swqe_polarity);
++ byte_off -= ZXDH_QP_FRAG_BYTESIZE;
++ qp->wqe_ops.iw_set_fragment(
++ wqe_ex, byte_off % ZXDH_SQ_WQE_BYTESIZE,
++ &op_info->sg_list[i], qp->swqe_polarity);
++ }
++ }
++
++ /* if not an odd number set valid bit in next fragment */
++ if (!(frag_cnt & 0x01) && frag_cnt && wqe_ex) {
++ qp->wqe_ops.iw_set_fragment(wqe_ex, ZXDH_QP_FRAG_BYTESIZE, NULL,
++ qp->swqe_polarity);
++ }
++
++ set_64bit_val(wqe_base, 24,
++ FIELD_PREP(ZXDHQPSQ_DESTQPN, op_info->dest_qp) |
++ FIELD_PREP(ZXDHQPSQ_DESTQKEY, op_info->qkey));
++
++ udma_to_device_barrier(); /* make sure WQE is populated before valid bit is set */
++
++ set_64bit_val(wqe_base, 0, hdr);
++ if (post_sq)
++ zxdh_uk_qp_post_wr(qp);
++
++ return 0;
++}
++
++/**
++ * zxdh_set_mw_bind_wqe - set mw bind in wqe
++ * @wqe: wqe for setting mw bind
++ * @op_info: info for setting wqe values
++ */
++static void zxdh_set_mw_bind_wqe(__le64 *wqe, struct zxdh_bind_window *op_info)
++{
++ __u32 value = 0;
++ __u8 leaf_pbl_size = op_info->leaf_pbl_size;
++
++ set_64bit_val(wqe, 8, (uintptr_t)op_info->va);
++
++ if (leaf_pbl_size == 0) {
++ value = (__u32)(op_info->mw_pa_pble_index >> 12);
++ value = (value & 0x03FFFFFFFC0000) >> 18;
++ set_64bit_val(
++ wqe, 16,
++ FIELD_PREP(ZXDHQPSQ_PARENTMRSTAG, op_info->mr_stag) |
++ FIELD_PREP(ZXDHQPSQ_MW_PA_PBLE_TWO, value));
++ } else if (leaf_pbl_size == 1) {
++ value = (__u32)((op_info->mw_pa_pble_index & 0x0FFC0000) >> 18);
++ set_64bit_val(
++ wqe, 16,
++ FIELD_PREP(ZXDHQPSQ_PARENTMRSTAG, op_info->mr_stag) |
++ FIELD_PREP(ZXDHQPSQ_MW_PA_PBLE_TWO, value));
++ } else {
++ value = (__u32)((op_info->mw_pa_pble_index & 0x0FFC0000) >> 18);
++ set_64bit_val(
++ wqe, 16,
++ FIELD_PREP(ZXDHQPSQ_PARENTMRSTAG, op_info->mr_stag) |
++ FIELD_PREP(ZXDHQPSQ_MW_LEVLE2_FIRST_PBLE_INDEX,
++ value) |
++ FIELD_PREP(ZXDHQPSQ_MW_LEVLE2_ROOT_PBLE_INDEX,
++ op_info->root_leaf_offset));
++ }
++
++ if (leaf_pbl_size == 0) {
++ value = (__u32)(op_info->mw_pa_pble_index >> 12);
++ value = value & 0x3FFFF;
++ } else {
++ value = (__u32)(op_info->mw_pa_pble_index & 0x3FFFF);
++ }
++
++ set_64bit_val(wqe, 24,
++ op_info->bind_len |
++ FIELD_PREP(ZXDHQPSQ_MW_PA_PBLE_ONE, value));
++}
++
++/**
++ * zxdh_copy_inline_data - Copy inline data to wqe
++ * @dest: pointer to wqe
++ * @src: pointer to inline data
++ * @len: length of inline data to copy
++ * @polarity: polarity of wqe valid bit
++ */
++static void zxdh_copy_inline_data(__u8 *dest, __u8 *src, __u32 len,
++ __u8 polarity, bool imm_data_flag)
++{
++ __u8 inline_valid = polarity << ZXDH_INLINE_VALID_S;
++ __u32 copy_size;
++ __u8 *inline_valid_addr;
++
++ dest += ZXDH_WQE_SIZE_32; /* point to additional 32 byte quanta */
++ if (len) {
++ inline_valid_addr = dest + WQE_OFFSET_7BYTES;
++ if (imm_data_flag) {
++ copy_size = len < INLINE_DATASIZE_24BYTES ?
++ len :
++ INLINE_DATASIZE_24BYTES;
++ dest += WQE_OFFSET_8BYTES;
++ memcpy(dest, src, copy_size);
++ len -= copy_size;
++ dest += WQE_OFFSET_24BYTES;
++ src += copy_size;
++ } else {
++ if (len <= INLINE_DATASIZE_7BYTES) {
++ copy_size = len;
++ memcpy(dest, src, copy_size);
++ *inline_valid_addr = inline_valid;
++ return;
++ }
++ memcpy(dest, src, INLINE_DATASIZE_7BYTES);
++ len -= INLINE_DATASIZE_7BYTES;
++ dest += WQE_OFFSET_8BYTES;
++ src += INLINE_DATA_OFFSET_7BYTES;
++ copy_size = len < INLINE_DATASIZE_24BYTES ?
++ len :
++ INLINE_DATASIZE_24BYTES;
++ memcpy(dest, src, copy_size);
++ len -= copy_size;
++ dest += WQE_OFFSET_24BYTES;
++ src += copy_size;
++ }
++ *inline_valid_addr = inline_valid;
++ }
++
++ while (len) {
++ inline_valid_addr = dest + WQE_OFFSET_7BYTES;
++ if (len <= INLINE_DATASIZE_7BYTES) {
++ copy_size = len;
++ memcpy(dest, src, copy_size);
++ *inline_valid_addr = inline_valid;
++ return;
++ }
++ memcpy(dest, src, INLINE_DATASIZE_7BYTES);
++ len -= INLINE_DATASIZE_7BYTES;
++ dest += WQE_OFFSET_8BYTES;
++ src += INLINE_DATA_OFFSET_7BYTES;
++ copy_size = len < INLINE_DATASIZE_24BYTES ?
++ len :
++ INLINE_DATASIZE_24BYTES;
++ memcpy(dest, src, copy_size);
++ len -= copy_size;
++ dest += WQE_OFFSET_24BYTES;
++ src += copy_size;
++
++ *inline_valid_addr = inline_valid;
++ }
++}
++
++/**
++ * zxdh_inline_data_size_to_quanta - based on inline data, quanta
++ * @data_size: data size for inline
++ * @imm_data_flag: flag for immediate data
++ *
++ * Gets the quanta based on inline and immediate data.
++ */
++static __u16 zxdh_inline_data_size_to_quanta(__u32 data_size,
++ bool imm_data_flag)
++{
++ if (imm_data_flag)
++ data_size += INLINE_DATASIZE_7BYTES;
++
++ return data_size % 31 ? data_size / 31 + 2 : data_size / 31 + 1;
++}
++
++/**
++ * zxdh_uk_inline_rdma_write - inline rdma write operation
++ * @qp: hw qp ptr
++ * @info: post sq information
++ * @post_sq: flag to post sq
++ */
++enum zxdh_status_code zxdh_uk_inline_rdma_write(struct zxdh_qp_uk *qp,
++ struct zxdh_post_sq_info *info,
++ bool post_sq)
++{
++ __le64 *wqe;
++ __u8 imm_valid;
++ struct zxdh_inline_rdma_write *op_info;
++ __u64 hdr = 0;
++ __u32 wqe_idx;
++ bool read_fence = false;
++ __u16 quanta;
++ bool imm_data_flag = info->imm_data_valid ? 1 : 0;
++
++ op_info = &info->op.inline_rdma_write;
++
++ if (op_info->len > qp->max_inline_data)
++ return ZXDH_ERR_INVALID_INLINE_DATA_SIZE;
++ if (imm_data_flag && op_info->len > ZXDH_MAX_SQ_INLINE_DATELEN_WITH_IMM)
++ return ZXDH_ERR_INVALID_INLINE_DATA_SIZE;
++
++ quanta = qp->wqe_ops.iw_inline_data_size_to_quanta(op_info->len,
++ imm_data_flag);
++ wqe = zxdh_qp_get_next_send_wqe(qp, &wqe_idx, quanta, op_info->len,
++ info);
++ if (!wqe)
++ return ZXDH_ERR_QP_TOOMANY_WRS_POSTED;
++
++ zxdh_clr_wqes(qp, wqe_idx);
++
++ read_fence |= info->read_fence;
++ hdr = FIELD_PREP(ZXDHQPSQ_VALID, qp->swqe_polarity) |
++ FIELD_PREP(ZXDHQPSQ_OPCODE, info->op_type) |
++ FIELD_PREP(ZXDHQPSQ_SIGCOMPL, info->signaled) |
++ FIELD_PREP(ZXDHQPSQ_LOCALFENCE, info->local_fence) |
++ FIELD_PREP(ZXDHQPSQ_READFENCE, read_fence) |
++ FIELD_PREP(ZXDHQPSQ_SOLICITED, info->solicited) |
++ FIELD_PREP(ZXDHQPSQ_IMMDATAFLAG, imm_data_flag) |
++ FIELD_PREP(ZXDHQPSQ_WRITE_INLINEDATAFLAG, 1) |
++ FIELD_PREP(ZXDHQPSQ_WRITE_INLINEDATALEN, op_info->len) |
++ FIELD_PREP(ZXDHQPSQ_ADDFRAGCNT, quanta - 1) |
++ FIELD_PREP(ZXDHQPSQ_REMSTAG, op_info->rem_addr.stag);
++ set_64bit_val(wqe, 24,
++ FIELD_PREP(ZXDHQPSQ_FRAG_TO, op_info->rem_addr.tag_off));
++
++ if (imm_data_flag) {
++ /* if inline exist, not update imm valid */
++ imm_valid = (op_info->len == 0) ? qp->swqe_polarity :
++ (!qp->swqe_polarity);
++
++ set_64bit_val(wqe, 32,
++ FIELD_PREP(ZXDHQPSQ_IMMDATA_VALID, imm_valid) |
++ FIELD_PREP(ZXDHQPSQ_IMMDATA,
++ info->imm_data));
++ }
++ qp->wqe_ops.iw_copy_inline_data((__u8 *)wqe, op_info->data,
++ op_info->len, qp->swqe_polarity,
++ imm_data_flag);
++
++ udma_to_device_barrier(); /* make sure WQE is populated before valid bit is set */
++
++ set_64bit_val(wqe, 0, hdr);
++
++ if (post_sq)
++ zxdh_uk_qp_post_wr(qp);
++ qp_tx_psn_add(&qp->next_psn, op_info->len, qp->mtu);
++ return 0;
++}
++
++/**
++ * zxdh_uk_rc_inline_send - inline send operation
++ * @qp: hw qp ptr
++ * @info: post sq information
++ * @post_sq: flag to post sq
++ */
++enum zxdh_status_code zxdh_uk_rc_inline_send(struct zxdh_qp_uk *qp,
++ struct zxdh_post_sq_info *info,
++ bool post_sq)
++{
++ __le64 *wqe;
++ __u8 imm_valid;
++ struct zxdh_inline_rdma_send *op_info;
++ __u64 hdr;
++ __u32 wqe_idx;
++ bool read_fence = false;
++ __u16 quanta;
++ bool imm_data_flag = info->imm_data_valid ? 1 : 0;
++
++ op_info = &info->op.inline_rdma_send;
++
++ if (op_info->len > qp->max_inline_data)
++ return ZXDH_ERR_INVALID_INLINE_DATA_SIZE;
++ if (imm_data_flag && op_info->len > ZXDH_MAX_SQ_INLINE_DATELEN_WITH_IMM)
++ return ZXDH_ERR_INVALID_INLINE_DATA_SIZE;
++
++ quanta = qp->wqe_ops.iw_inline_data_size_to_quanta(op_info->len,
++ imm_data_flag);
++ wqe = zxdh_qp_get_next_send_wqe(qp, &wqe_idx, quanta, op_info->len,
++ info);
++ if (!wqe)
++ return ZXDH_ERR_QP_TOOMANY_WRS_POSTED;
++
++ zxdh_clr_wqes(qp, wqe_idx);
++
++ read_fence |= info->read_fence;
++ hdr = FIELD_PREP(ZXDHQPSQ_VALID, qp->swqe_polarity) |
++ FIELD_PREP(ZXDHQPSQ_OPCODE, info->op_type) |
++ FIELD_PREP(ZXDHQPSQ_SIGCOMPL, info->signaled) |
++ FIELD_PREP(ZXDHQPSQ_LOCALFENCE, info->local_fence) |
++ FIELD_PREP(ZXDHQPSQ_READFENCE, read_fence) |
++ FIELD_PREP(ZXDHQPSQ_SOLICITED, info->solicited) |
++ FIELD_PREP(ZXDHQPSQ_ADDFRAGCNT, quanta - 1) |
++ FIELD_PREP(ZXDHQPSQ_IMMDATAFLAG, imm_data_flag) |
++ FIELD_PREP(ZXDHQPSQ_REMSTAG, info->stag_to_inv);
++ set_64bit_val(wqe, 24,
++ FIELD_PREP(ZXDHQPSQ_INLINEDATAFLAG, 1) |
++ FIELD_PREP(ZXDHQPSQ_INLINEDATALEN, op_info->len));
++
++ if (imm_data_flag) {
++ /* if inline exist, not update imm valid */
++ imm_valid = (op_info->len == 0) ? qp->swqe_polarity :
++ (!qp->swqe_polarity);
++ set_64bit_val(wqe, 32,
++ FIELD_PREP(ZXDHQPSQ_IMMDATA_VALID, imm_valid) |
++ FIELD_PREP(ZXDHQPSQ_IMMDATA,
++ info->imm_data));
++ }
++
++ qp->wqe_ops.iw_copy_inline_data((__u8 *)wqe, op_info->data,
++ op_info->len, qp->swqe_polarity,
++ imm_data_flag);
++
++ udma_to_device_barrier(); /* make sure WQE is populated before valid bit is set */
++
++ set_64bit_val(wqe, 0, hdr);
++
++ if (post_sq)
++ zxdh_uk_qp_post_wr(qp);
++
++ qp_tx_psn_add(&qp->next_psn, op_info->len, qp->mtu);
++ return 0;
++}
++
++/**
++ * zxdh_uk_ud_inline_send - inline send operation
++ * @qp: hw qp ptr
++ * @info: post sq information
++ * @post_sq: flag to post sq
++ */
++enum zxdh_status_code zxdh_uk_ud_inline_send(struct zxdh_qp_uk *qp,
++ struct zxdh_post_sq_info *info,
++ bool post_sq)
++{
++ __le64 *wqe_base;
++ __le64 *wqe_ex;
++ struct zxdh_inline_rdma_send *op_info;
++ __u64 hdr;
++ __u32 wqe_idx;
++ bool read_fence = false;
++ __u16 quanta;
++ bool imm_data_flag = info->imm_data_valid ? 1 : 0;
++ __u8 *inline_dest;
++ __u8 *inline_src;
++ __u32 inline_len;
++ __u32 copy_size;
++ __u8 *inline_valid_addr;
++
++ op_info = &info->op.inline_rdma_send;
++ inline_len = op_info->len;
++
++ if (op_info->len > qp->max_inline_data)
++ return ZXDH_ERR_INVALID_INLINE_DATA_SIZE;
++ if (imm_data_flag && op_info->len > ZXDH_MAX_SQ_INLINE_DATELEN_WITH_IMM)
++ return ZXDH_ERR_INVALID_INLINE_DATA_SIZE;
++
++ quanta = qp->wqe_ops.iw_inline_data_size_to_quanta(op_info->len,
++ imm_data_flag);
++ if (quanta > ZXDH_SQ_RING_FREE_QUANTA(qp->sq_ring))
++ return ZXDH_ERR_QP_TOOMANY_WRS_POSTED;
++
++ wqe_idx = ZXDH_RING_CURRENT_HEAD(qp->sq_ring);
++ if (!wqe_idx)
++ qp->swqe_polarity = !qp->swqe_polarity;
++
++ ZXDH_RING_MOVE_HEAD_BY_COUNT_NOCHECK(qp->sq_ring, quanta);
++
++ wqe_base = qp->sq_base[wqe_idx].elem;
++ qp->sq_wrtrk_array[wqe_idx].wrid = info->wr_id;
++ qp->sq_wrtrk_array[wqe_idx].wr_len = op_info->len;
++ qp->sq_wrtrk_array[wqe_idx].quanta = quanta;
++
++ zxdh_clr_wqes(qp, wqe_idx);
++
++ read_fence |= info->read_fence;
++ hdr = FIELD_PREP(ZXDHQPSQ_VALID, qp->swqe_polarity) |
++ FIELD_PREP(ZXDHQPSQ_OPCODE, info->op_type) |
++ FIELD_PREP(ZXDHQPSQ_SIGCOMPL, info->signaled) |
++ FIELD_PREP(ZXDHQPSQ_SOLICITED, info->solicited) |
++ FIELD_PREP(ZXDHQPSQ_IMMDATAFLAG, imm_data_flag) |
++ FIELD_PREP(ZXDHQPSQ_UD_INLINEDATAFLAG, 1) |
++ FIELD_PREP(ZXDHQPSQ_UD_INLINEDATALEN, op_info->len) |
++ FIELD_PREP(ZXDHQPSQ_UD_ADDFRAGCNT, quanta - 1) |
++ FIELD_PREP(ZXDHQPSQ_AHID, op_info->ah_id);
++ set_64bit_val(wqe_base, 24,
++ FIELD_PREP(ZXDHQPSQ_DESTQPN, op_info->dest_qp) |
++ FIELD_PREP(ZXDHQPSQ_DESTQKEY, op_info->qkey));
++
++ if (imm_data_flag) {
++ wqe_idx = (wqe_idx + 1) % qp->sq_ring.size;
++ if (!wqe_idx)
++ qp->swqe_polarity = !qp->swqe_polarity;
++ wqe_ex = qp->sq_base[wqe_idx].elem;
++
++ if (inline_len) {
++ /* imm and inline use the same valid, valid set after inline data updated*/
++ copy_size = inline_len < INLINE_DATASIZE_24BYTES ?
++ inline_len :
++ INLINE_DATASIZE_24BYTES;
++ inline_dest = (__u8 *)wqe_ex + WQE_OFFSET_8BYTES;
++ inline_src = (__u8 *)op_info->data;
++ memcpy(inline_dest, inline_src, copy_size);
++ inline_len -= copy_size;
++ inline_src += copy_size;
++ }
++ set_64bit_val(
++ wqe_ex, 0,
++ FIELD_PREP(ZXDHQPSQ_IMMDATA_VALID, qp->swqe_polarity) |
++ FIELD_PREP(ZXDHQPSQ_IMMDATA, info->imm_data));
++
++ } else if (inline_len) {
++ wqe_idx = (wqe_idx + 1) % qp->sq_ring.size;
++ if (!wqe_idx)
++ qp->swqe_polarity = !qp->swqe_polarity;
++ wqe_ex = qp->sq_base[wqe_idx].elem;
++ inline_dest = (__u8 *)wqe_ex;
++ inline_src = (__u8 *)op_info->data;
++
++ if (inline_len <= INLINE_DATASIZE_7BYTES) {
++ copy_size = inline_len;
++ memcpy(inline_dest, inline_src, copy_size);
++ inline_len = 0;
++ } else {
++ copy_size = INLINE_DATASIZE_7BYTES;
++ memcpy(inline_dest, inline_src, copy_size);
++ inline_len -= copy_size;
++ inline_src += copy_size;
++ inline_dest += WQE_OFFSET_8BYTES;
++ copy_size = inline_len < INLINE_DATASIZE_24BYTES ?
++ inline_len :
++ INLINE_DATASIZE_24BYTES;
++ memcpy(inline_dest, inline_src, copy_size);
++ inline_len -= copy_size;
++ inline_src += copy_size;
++ }
++ inline_valid_addr = (__u8 *)wqe_ex + WQE_OFFSET_7BYTES;
++ *inline_valid_addr = qp->swqe_polarity << ZXDH_INLINE_VALID_S;
++ }
++
++ while (inline_len) {
++ wqe_idx = (wqe_idx + 1) % qp->sq_ring.size;
++ if (!wqe_idx)
++ qp->swqe_polarity = !qp->swqe_polarity;
++ wqe_ex = qp->sq_base[wqe_idx].elem;
++ inline_dest = (__u8 *)wqe_ex;
++
++ if (inline_len <= INLINE_DATASIZE_7BYTES) {
++ copy_size = inline_len;
++ memcpy(inline_dest, inline_src, copy_size);
++ inline_len = 0;
++ } else {
++ copy_size = INLINE_DATASIZE_7BYTES;
++ memcpy(inline_dest, inline_src, copy_size);
++ inline_len -= copy_size;
++ inline_src += copy_size;
++ inline_dest += WQE_OFFSET_8BYTES;
++ copy_size = inline_len < INLINE_DATASIZE_24BYTES ?
++ inline_len :
++ INLINE_DATASIZE_24BYTES;
++ memcpy(inline_dest, inline_src, copy_size);
++ inline_len -= copy_size;
++ inline_src += copy_size;
++ }
++ inline_valid_addr = (__u8 *)wqe_ex + WQE_OFFSET_7BYTES;
++ *inline_valid_addr = qp->swqe_polarity << ZXDH_INLINE_VALID_S;
++ }
++
++ udma_to_device_barrier(); /* make sure WQE is populated before valid bit is set */
++
++ set_64bit_val(wqe_base, 0, hdr);
++
++ if (post_sq)
++ zxdh_uk_qp_post_wr(qp);
++
++ return 0;
++}
++
++/**
++ * zxdh_uk_stag_local_invalidate - stag invalidate operation
++ * @qp: hw qp ptr
++ * @info: post sq information
++ * @post_sq: flag to post sq
++ */
++enum zxdh_status_code
++zxdh_uk_stag_local_invalidate(struct zxdh_qp_uk *qp,
++ struct zxdh_post_sq_info *info, bool post_sq)
++{
++ __le64 *wqe;
++ struct zxdh_inv_local_stag *op_info;
++ __u64 hdr;
++ __u32 wqe_idx;
++ bool local_fence = true;
++
++ op_info = &info->op.inv_local_stag;
++
++ wqe = zxdh_qp_get_next_send_wqe(qp, &wqe_idx, ZXDH_QP_WQE_MIN_QUANTA, 0,
++ info);
++ if (!wqe)
++ return ZXDH_ERR_QP_TOOMANY_WRS_POSTED;
++
++ zxdh_clr_wqes(qp, wqe_idx);
++
++ set_64bit_val(wqe, 16, 0);
++
++ hdr = FIELD_PREP(ZXDHQPSQ_VALID, qp->swqe_polarity) |
++ FIELD_PREP(ZXDHQPSQ_OPCODE, ZXDH_OP_TYPE_LOCAL_INV) |
++ FIELD_PREP(ZXDHQPSQ_SIGCOMPL, info->signaled) |
++ FIELD_PREP(ZXDHQPSQ_LOCALFENCE, local_fence) |
++ FIELD_PREP(ZXDHQPSQ_READFENCE, info->read_fence) |
++ FIELD_PREP(ZXDHQPSQ_REMSTAG, op_info->target_stag);
++
++ udma_to_device_barrier(); /* make sure WQE is populated before valid bit is set */
++
++ set_64bit_val(wqe, 0, hdr);
++
++ if (post_sq)
++ zxdh_uk_qp_post_wr(qp);
++
++ return 0;
++}
++
++/**
++ * zxdh_uk_mw_bind - bind Memory Window
++ * @qp: hw qp ptr
++ * @info: post sq information
++ * @post_sq: flag to post sq
++ */
++enum zxdh_status_code zxdh_uk_mw_bind(struct zxdh_qp_uk *qp,
++ struct zxdh_post_sq_info *info,
++ bool post_sq)
++{
++ __le64 *wqe;
++ struct zxdh_bind_window *op_info;
++ __u64 hdr;
++ __u32 wqe_idx;
++ bool local_fence = true;
++ __u8 access = 1;
++ __u16 value = 0;
++
++ op_info = &info->op.bind_window;
++ local_fence |= info->local_fence;
++
++ wqe = zxdh_qp_get_next_send_wqe(qp, &wqe_idx, ZXDH_QP_WQE_MIN_QUANTA, 0,
++ info);
++ if (!wqe)
++ return ZXDH_ERR_QP_TOOMANY_WRS_POSTED;
++
++ zxdh_clr_wqes(qp, wqe_idx);
++
++ if (op_info->ena_writes) {
++ access = (op_info->ena_reads << 2) |
++ (op_info->ena_writes << 3) | (1 << 1) | access;
++ } else {
++ access = (op_info->ena_reads << 2) |
++ (op_info->ena_writes << 3) | access;
++ }
++
++ qp->wqe_ops.iw_set_mw_bind_wqe(wqe, op_info);
++
++ value = (__u16)((op_info->mw_pa_pble_index >> 12) & 0xC000000000000);
++
++ hdr = FIELD_PREP(ZXDHQPSQ_OPCODE, ZXDH_OP_TYPE_BIND_MW) |
++ FIELD_PREP(ZXDHQPSQ_MWSTAG, op_info->mw_stag) |
++ FIELD_PREP(ZXDHQPSQ_STAGRIGHTS, access) |
++ FIELD_PREP(ZXDHQPSQ_VABASEDTO,
++ (op_info->addressing_type == ZXDH_ADDR_TYPE_VA_BASED ?
++ 1 :
++ 0)) |
++ FIELD_PREP(ZXDHQPSQ_MEMWINDOWTYPE,
++ (op_info->mem_window_type_1 ? 1 : 0)) |
++ FIELD_PREP(ZXDHQPSQ_READFENCE, info->read_fence) |
++ FIELD_PREP(ZXDHQPSQ_LOCALFENCE, local_fence) |
++ FIELD_PREP(ZXDHQPSQ_SIGCOMPL, info->signaled) |
++ FIELD_PREP(ZXDHQPSQ_MW_HOST_PAGE_SIZE, op_info->host_page_size) |
++ FIELD_PREP(ZXDHQPSQ_MW_LEAF_PBL_SIZE, op_info->leaf_pbl_size) |
++ FIELD_PREP(ZXDHQPSQ_MW_PA_PBLE_THREE, value) |
++ FIELD_PREP(ZXDHQPSQ_VALID, qp->swqe_polarity);
++
++ udma_to_device_barrier(); /* make sure WQE is populated before valid bit is set */
++
++ set_64bit_val(wqe, 0, hdr);
++
++ if (post_sq)
++ zxdh_uk_qp_post_wr(qp);
++
++ return 0;
++}
++
++static void zxdh_sleep_ns(unsigned int nanoseconds)
++{
++ struct timespec req;
++
++ req.tv_sec = 0;
++ req.tv_nsec = nanoseconds;
++ nanosleep(&req, NULL);
++}
++
++/**
++ * zxdh_uk_post_receive - post receive wqe
++ * @qp: hw qp ptr
++ * @info: post rq information
++ */
++enum zxdh_status_code zxdh_uk_post_receive(struct zxdh_qp_uk *qp,
++ struct zxdh_post_rq_info *info)
++{
++ __u32 wqe_idx, i, byte_off;
++ __le64 *wqe;
++ struct zxdh_sge *sge;
++
++ if (qp->max_rq_frag_cnt < info->num_sges)
++ return ZXDH_ERR_INVALID_FRAG_COUNT;
++
++ wqe = zxdh_qp_get_next_recv_wqe(qp, &wqe_idx);
++ if (unlikely(!wqe))
++ return ZXDH_ERR_QP_TOOMANY_WRS_POSTED;
++
++ qp->rq_wrid_array[wqe_idx] = info->wr_id;
++
++ for (i = 0, byte_off = ZXDH_QP_FRAG_BYTESIZE; i < info->num_sges; i++) {
++ sge = &info->sg_list[i];
++ set_64bit_val(wqe, byte_off, sge->tag_off);
++ set_64bit_val(wqe, byte_off + 8,
++ FIELD_PREP(ZXDHQPRQ_FRAG_LEN, sge->len) |
++ FIELD_PREP(ZXDHQPRQ_STAG, sge->stag));
++ byte_off += ZXDH_QP_FRAG_BYTESIZE;
++ }
++
++ /**
++ * while info->num_sges < qp->max_rq_frag_cnt, or 0 == info->num_sges,
++ * fill next fragment with FRAG_LEN=0, FRAG_STAG=0x00000100,
++ * witch indicates a invalid fragment
++ */
++ if (info->num_sges < qp->max_rq_frag_cnt || 0 == info->num_sges) {
++ set_64bit_val(wqe, byte_off, 0);
++ set_64bit_val(wqe, byte_off + 8,
++ FIELD_PREP(ZXDHQPRQ_FRAG_LEN, 0) |
++ FIELD_PREP(ZXDHQPRQ_STAG, 0x00000100));
++ }
++
++ set_64bit_val(wqe, 0,
++ FIELD_PREP(ZXDHQPRQ_ADDFRAGCNT, info->num_sges) |
++ FIELD_PREP(ZXDHQPRQ_SIGNATURE,
++ qp->rwqe_signature));
++
++ udma_to_device_barrier(); /* make sure WQE is populated before valid bit is set */
++ if (info->num_sges > 3)
++ zxdh_sleep_ns(1000);
++
++ set_64bit_val(wqe, 8, FIELD_PREP(ZXDHQPRQ_VALID, qp->rwqe_polarity));
++
++ return 0;
++}
++
++/**
++ * zxdh_uk_cq_resize - reset the cq buffer info
++ * @cq: cq to resize
++ * @cq_base: new cq buffer addr
++ * @cq_size: number of cqes
++ */
++void zxdh_uk_cq_resize(struct zxdh_cq_uk *cq, void *cq_base, int cq_size)
++{
++ cq->cq_base = cq_base;
++ cq->cq_size = cq_size;
++ ZXDH_RING_INIT(cq->cq_ring, cq->cq_size);
++ cq->polarity = 1;
++}
++
++/**
++ * zxdh_uk_cq_set_resized_cnt - record the count of the resized buffers
++ * @cq: cq to resize
++ * @cq_cnt: the count of the resized cq buffers
++ */
++void zxdh_uk_cq_set_resized_cnt(struct zxdh_cq_uk *cq, __u16 cq_cnt)
++{
++ __u64 temp_val;
++ __u16 sw_cq_sel;
++ __u8 arm_next;
++ __u8 arm_seq_num;
++
++ get_64bit_val(cq->shadow_area, 0, &temp_val);
++
++ sw_cq_sel = (__u16)FIELD_GET(ZXDH_CQ_DBSA_SW_CQ_SELECT, temp_val);
++ sw_cq_sel += cq_cnt;
++
++ arm_seq_num = (__u8)FIELD_GET(ZXDH_CQ_DBSA_ARM_SEQ_NUM, temp_val);
++ arm_next = (__u8)FIELD_GET(ZXDH_CQ_DBSA_ARM_NEXT, temp_val);
++ cq->cqe_rd_cnt = 0;
++
++ temp_val = FIELD_PREP(ZXDH_CQ_DBSA_ARM_SEQ_NUM, arm_seq_num) |
++ FIELD_PREP(ZXDH_CQ_DBSA_SW_CQ_SELECT, sw_cq_sel) |
++ FIELD_PREP(ZXDH_CQ_DBSA_ARM_NEXT, arm_next) |
++ FIELD_PREP(ZXDH_CQ_DBSA_CQEIDX, cq->cqe_rd_cnt);
++
++ set_64bit_val(cq->shadow_area, 0, temp_val);
++}
++
++/**
++ * zxdh_uk_cq_request_notification - cq notification request (door bell)
++ * @cq: hw cq
++ * @cq_notify: notification type
++ */
++void zxdh_uk_cq_request_notification(struct zxdh_cq_uk *cq,
++ enum zxdh_cmpl_notify cq_notify)
++{
++ __u64 temp_val;
++ __u16 sw_cq_sel;
++ __u8 arm_next = 0;
++ __u8 arm_seq_num;
++ __u32 cqe_index;
++ __u32 hdr;
++
++ get_64bit_val(cq->shadow_area, 0, &temp_val);
++ arm_seq_num = (__u8)FIELD_GET(ZXDH_CQ_DBSA_ARM_SEQ_NUM, temp_val);
++ arm_seq_num++;
++ sw_cq_sel = (__u16)FIELD_GET(ZXDH_CQ_DBSA_SW_CQ_SELECT, temp_val);
++ cqe_index = (__u32)FIELD_GET(ZXDH_CQ_DBSA_CQEIDX, temp_val);
++
++ if (cq_notify == ZXDH_CQ_COMPL_SOLICITED)
++ arm_next = 1;
++ temp_val = FIELD_PREP(ZXDH_CQ_DBSA_ARM_SEQ_NUM, arm_seq_num) |
++ FIELD_PREP(ZXDH_CQ_DBSA_SW_CQ_SELECT, sw_cq_sel) |
++ FIELD_PREP(ZXDH_CQ_DBSA_ARM_NEXT, arm_next) |
++ FIELD_PREP(ZXDH_CQ_DBSA_CQEIDX, cqe_index);
++
++ set_64bit_val(cq->shadow_area, 0, temp_val);
++
++ hdr = FIELD_PREP(ZXDH_CQ_ARM_DBSA_VLD, 0) |
++ FIELD_PREP(ZXDH_CQ_ARM_CQ_ID, cq->cq_id);
++
++ udma_to_device_barrier(); /* make sure WQE is populated before valid bit is set */
++
++ db_wr32(hdr, cq->cqe_alloc_db);
++}
++
++static inline void build_comp_status(__u32 cq_type,
++ struct zxdh_cq_poll_info *info)
++{
++ if (!info->error) {
++ info->comp_status = ZXDH_COMPL_STATUS_SUCCESS;
++ if (cq_type == ZXDH_CQE_QTYPE_RQ) {
++ if (info->major_err != ERROR_CODE_VALUE &&
++ info->minor_err != ERROR_CODE_VALUE) {
++ info->comp_status = ZXDH_COMPL_STATUS_UNKNOWN;
++ }
++ }
++ return;
++ }
++ if (info->major_err == ZXDH_RETRY_ACK_MAJOR_ERR &&
++ info->minor_err == ZXDH_RETRY_ACK_MINOR_ERR) {
++ info->comp_status = ZXDH_COMPL_STATUS_RETRY_ACK_ERR;
++ return;
++ }
++ if (info->major_err == ZXDH_RETRY_ACK_MAJOR_ERR &&
++ info->minor_err == ZXDH_TX_WINDOW_QUERY_ITEM_MINOR_ERR) {
++ info->comp_status = ZXDH_COMPL_STATUS_TX_WINDOW_QUERY_ITEM_ERR;
++ return;
++ }
++ info->comp_status = (info->major_err == ZXDH_FLUSH_MAJOR_ERR) ?
++ ZXDH_COMPL_STATUS_FLUSHED :
++ ZXDH_COMPL_STATUS_UNKNOWN;
++}
++
++__le64 *get_current_cqe(struct zxdh_cq_uk *cq)
++{
++ return ZXDH_GET_CURRENT_EXTENDED_CQ_ELEM(cq);
++}
++
++static inline void zxdh_get_cq_poll_info(struct zxdh_qp_uk *qp,
++ struct zxdh_cq_poll_info *info,
++ __u64 qword2, __u64 qword3)
++{
++ __u8 qp_type;
++
++ qp_type = qp->qp_type;
++
++ info->imm_valid = (bool)FIELD_GET(ZXDH_CQ_IMMVALID, qword2);
++ if (info->imm_valid) {
++ info->imm_data = (__u32)FIELD_GET(ZXDH_CQ_IMMDATA, qword3);
++ info->op_type = ZXDH_OP_TYPE_REC_IMM;
++ } else {
++ info->op_type = ZXDH_OP_TYPE_REC;
++ }
++
++ info->bytes_xfered = (__u32)FIELD_GET(ZXDHCQ_PAYLDLEN, qword3);
++
++ if (likely(qp_type == ZXDH_QP_TYPE_ROCE_RC)) {
++ if (qword2 & ZXDHCQ_STAG) {
++ info->stag_invalid_set = true;
++ info->inv_stag =
++ (__u32)FIELD_GET(ZXDHCQ_INVSTAG, qword2);
++ } else {
++ info->stag_invalid_set = false;
++ }
++ } else if (qp_type == ZXDH_QP_TYPE_ROCE_UD) {
++ info->ipv4 = (bool)FIELD_GET(ZXDHCQ_IPV4, qword2);
++ info->ud_src_qpn = (__u32)FIELD_GET(ZXDHCQ_UDSRCQPN, qword2);
++ }
++}
++
++static void update_cq_poll_info(struct zxdh_qp_uk *qp,
++ struct zxdh_cq_poll_info *info, __u32 wqe_idx,
++ __u64 qword0)
++{
++ info->wr_id = qp->sq_wrtrk_array[wqe_idx].wrid;
++ if (!info->comp_status)
++ info->bytes_xfered = qp->sq_wrtrk_array[wqe_idx].wr_len;
++ info->op_type = (__u8)FIELD_GET(ZXDHCQ_OP, qword0);
++ ZXDH_RING_SET_TAIL(qp->sq_ring,
++ wqe_idx + qp->sq_wrtrk_array[wqe_idx].quanta);
++}
++
++static enum zxdh_status_code
++process_tx_window_query_item_err(struct zxdh_qp_uk *qp,
++ struct zxdh_cq_poll_info *info)
++{
++ int ret;
++ struct ibv_qp *ib_qp;
++ struct zxdh_uqp *iwuqp;
++ struct zxdh_rdma_qpc qpc = { 0 };
++ __u64 qpc_mask = 0;
++
++ iwuqp = container_of(qp, struct zxdh_uqp, qp);
++ ib_qp = &iwuqp->vqp.qp;
++ ret = zxdh_query_qpc(ib_qp, &qpc);
++ if (ret) {
++ zxdh_dbg(ZXDH_DBG_QP,
++ "process tx window query item query qpc failed:%d\n",
++ ret);
++ return ZXDH_ERR_RETRY_ACK_ERR;
++ }
++ if (qpc.tx_last_ack_psn != qp->qp_last_ack_qsn)
++ qp->qp_reset_cnt = 0;
++
++ qp->qp_last_ack_qsn = qpc.tx_last_ack_psn;
++ if (qp->qp_reset_cnt >= ZXDH_QP_RETRY_COUNT)
++ return ZXDH_ERR_RETRY_ACK_ERR;
++
++ ret = zxdh_reset_qp(ib_qp, ZXDH_RESET_RETRY_TX_ITEM_FLAG);
++ if (ret) {
++ zxdh_dbg(ZXDH_DBG_QP,
++ "process tx window query item reset qp failed:%d\n",
++ ret);
++ return ZXDH_ERR_RETRY_ACK_ERR;
++ }
++ qp->qp_reset_cnt++;
++ return ZXDH_ERR_RETRY_ACK_NOT_EXCEED_ERR;
++}
++
++static enum zxdh_status_code
++process_retry_ack_err(struct zxdh_qp_uk *qp, struct zxdh_cq_poll_info *info)
++{
++ int ret;
++ struct ibv_qp *ib_qp;
++ struct zxdh_uqp *iwuqp;
++ struct zxdh_rdma_qpc qpc = { 0 };
++ struct zxdh_rdma_qpc qpc_req_cmd = { 0 };
++ __u64 qpc_mask = 0;
++
++ iwuqp = container_of(qp, struct zxdh_uqp, qp);
++
++ ib_qp = &iwuqp->vqp.qp;
++ ret = zxdh_query_qpc(ib_qp, &qpc);
++ if (ret) {
++ zxdh_dbg(ZXDH_DBG_QP, "process retry ack query qpc failed:%d\n",
++ ret);
++ return ZXDH_ERR_RETRY_ACK_ERR;
++ }
++ if (!(qpc.retry_cqe_sq_opcode >= ZXDH_RETRY_CQE_SQ_OPCODE_ERR &&
++ (qpc.recv_err_flag == ZXDH_RECV_ERR_FLAG_NAK_RNR_NAK ||
++ qpc.recv_err_flag == ZXDH_RECV_ERR_FLAG_READ_RESP))) {
++ return ZXDH_ERR_RETRY_ACK_ERR;
++ }
++ if (qpc.tx_last_ack_psn != qp->cqe_last_ack_qsn)
++ qp->cqe_retry_cnt = 0;
++
++ qp->cqe_last_ack_qsn = qpc.tx_last_ack_psn;
++ if (qp->cqe_retry_cnt >= ZXDH_QP_RETRY_COUNT)
++ return ZXDH_ERR_RETRY_ACK_ERR;
++
++ memcpy(&qpc_req_cmd, &qpc, sizeof(qpc));
++ qpc_req_cmd.package_err_flag = 0;
++ qpc_req_cmd.ack_err_flag = 0;
++ qpc_req_cmd.err_flag = 0;
++ qpc_req_cmd.retry_cqe_sq_opcode &= ZXDH_RESET_RETRY_CQE_SQ_OPCODE_ERR;
++ qpc_req_cmd.cur_retry_count = qpc.retry_count;
++ ret = zxdh_modify_qpc(ib_qp, &qpc_req_cmd,
++ ZXDH_PACKAGE_ERR_FLAG | ZXDH_ERR_FLAG_SET |
++ ZXDH_RETRY_CQE_SQ_OPCODE |
++ ZXDH_TX_READ_RETRY_FLAG_SET);
++ if (ret) {
++ zxdh_dbg(ZXDH_DBG_QP,
++ "process retry ack modify qpc failed:%d\n", ret);
++ return ZXDH_ERR_RETRY_ACK_ERR;
++ }
++ qp->cqe_retry_cnt++;
++ return ZXDH_ERR_RETRY_ACK_NOT_EXCEED_ERR;
++}
++
++/**
++ * zxdh_uk_cq_poll_cmpl - get cq completion info
++ * @cq: hw cq
++ * @info: cq poll information returned
++ */
++enum zxdh_status_code zxdh_uk_cq_poll_cmpl(struct zxdh_cq_uk *cq,
++ struct zxdh_cq_poll_info *info)
++{
++ enum zxdh_status_code status_code;
++ struct zxdh_uvcontext *iwvctx;
++ __u64 comp_ctx, qword0, qword2, qword3;
++ __le64 *cqe;
++ int ret;
++ struct zxdh_qp_uk *qp;
++ struct zxdh_ring *pring = NULL;
++ __u32 wqe_idx, q_type;
++ int ret_code;
++ bool move_cq_head = true;
++ __u8 polarity;
++ struct zxdh_usrq *iwusrq = NULL;
++ struct zxdh_srq_uk *srq_uk = NULL;
++ struct zxdh_uqp *iwuqp;
++ struct ibv_qp *ib_qp;
++
++ cqe = get_current_cqe(cq);
++
++ get_64bit_val(cqe, 0, &qword0);
++ polarity = (__u8)FIELD_GET(ZXDH_CQ_VALID, qword0);
++ if (polarity != cq->polarity)
++ return ZXDH_ERR_Q_EMPTY;
++
++ /* Ensure CQE contents are read after valid bit is checked */
++ udma_from_device_barrier();
++ get_64bit_val(cqe, 8, &comp_ctx);
++ get_64bit_val(cqe, 16, &qword2);
++ get_64bit_val(cqe, 24, &qword3);
++
++ qp = (struct zxdh_qp_uk *)(unsigned long)comp_ctx;
++ if (unlikely(!qp || qp->destroy_pending)) {
++ ret_code = ZXDH_ERR_Q_DESTROYED;
++ goto exit;
++ }
++
++ info->qp_handle = (zxdh_qp_handle)(unsigned long)qp;
++ q_type = (__u8)FIELD_GET(ZXDH_CQ_SQ, qword0);
++ info->solicited_event = (bool)FIELD_GET(ZXDHCQ_SOEVENT, qword0);
++ wqe_idx = (__u32)FIELD_GET(ZXDH_CQ_WQEIDX, qword0);
++ info->error = (bool)FIELD_GET(ZXDH_CQ_ERROR, qword0);
++ info->major_err = FIELD_GET(ZXDH_CQ_MAJERR, qword0);
++ info->minor_err = FIELD_GET(ZXDH_CQ_MINERR, qword0);
++
++ /* Set the min error to standard flush error code for remaining cqes */
++ if (unlikely(info->error && info->major_err == ZXDH_FLUSH_MAJOR_ERR &&
++ info->minor_err != FLUSH_GENERAL_ERR)) {
++ qword0 &= ~ZXDH_CQ_MINERR;
++ qword0 |= FIELD_PREP(ZXDH_CQ_MINERR, FLUSH_GENERAL_ERR);
++ set_64bit_val(cqe, 0, qword0);
++ }
++ build_comp_status(q_type, info);
++
++ info->qp_id = (__u32)FIELD_GET(ZXDHCQ_QPID, qword2);
++ info->imm_valid = false;
++
++ info->qp_handle = (zxdh_qp_handle)(unsigned long)qp;
++ switch (q_type) {
++ case ZXDH_CQE_QTYPE_RQ:
++ if (qp->is_srq) {
++ iwuqp = container_of(qp, struct zxdh_uqp, qp);
++ iwusrq = iwuqp->srq;
++ srq_uk = &iwusrq->srq;
++ zxdh_free_srq_wqe(srq_uk, wqe_idx);
++ info->wr_id = srq_uk->srq_wrid_array[wqe_idx];
++ zxdh_get_cq_poll_info(qp, info, qword2, qword3);
++ } else {
++ if (unlikely(info->comp_status ==
++ ZXDH_COMPL_STATUS_FLUSHED ||
++ info->comp_status ==
++ ZXDH_COMPL_STATUS_UNKNOWN)) {
++ if (!ZXDH_RING_MORE_WORK(qp->rq_ring)) {
++ ret_code = ZXDH_ERR_Q_EMPTY;
++ goto exit;
++ }
++ wqe_idx = qp->rq_ring.tail;
++ }
++ info->wr_id = qp->rq_wrid_array[wqe_idx];
++ zxdh_get_cq_poll_info(qp, info, qword2, qword3);
++ ZXDH_RING_SET_TAIL(qp->rq_ring, wqe_idx + 1);
++ if (info->comp_status == ZXDH_COMPL_STATUS_FLUSHED) {
++ qp->rq_flush_seen = true;
++ if (!ZXDH_RING_MORE_WORK(qp->rq_ring))
++ qp->rq_flush_complete = true;
++ else
++ move_cq_head = false;
++ }
++ pring = &qp->rq_ring;
++ }
++ ret_code = ZXDH_SUCCESS;
++ break;
++ case ZXDH_CQE_QTYPE_SQ:
++ if (info->comp_status == ZXDH_COMPL_STATUS_RETRY_ACK_ERR &&
++ qp->qp_type == ZXDH_QP_TYPE_ROCE_RC) {
++ status_code = process_retry_ack_err(qp, info);
++ if (status_code == ZXDH_ERR_RETRY_ACK_ERR) {
++ update_cq_poll_info(qp, info, wqe_idx, qword0);
++ ret_code = ZXDH_SUCCESS;
++ } else {
++ ret_code = status_code;
++ }
++ } else if (info->comp_status ==
++ ZXDH_COMPL_STATUS_TX_WINDOW_QUERY_ITEM_ERR &&
++ qp->qp_type == ZXDH_QP_TYPE_ROCE_RC) {
++ status_code =
++ process_tx_window_query_item_err(qp, info);
++ if (status_code == ZXDH_ERR_RETRY_ACK_ERR) {
++ update_cq_poll_info(qp, info, wqe_idx, qword0);
++ ret_code = ZXDH_SUCCESS;
++ } else {
++ ret_code = status_code;
++ }
++ } else if (info->comp_status == ZXDH_COMPL_STATUS_FLUSHED) {
++ info->wr_id = qp->sq_wrtrk_array[wqe_idx].wrid;
++ ZXDH_RING_INIT(qp->sq_ring, qp->sq_ring.size);
++ ret_code = ZXDH_SUCCESS;
++ } else {
++ update_cq_poll_info(qp, info, wqe_idx, qword0);
++ ret_code = ZXDH_SUCCESS;
++ }
++ break;
++ default:
++ zxdh_dbg(ZXDH_DBG_CQ, "zxdh get cqe type unknow!\n");
++ ret_code = ZXDH_ERR_Q_DESTROYED;
++ break;
++ }
++exit:
++ if (move_cq_head) {
++ __u64 cq_shadow_temp;
++
++ ZXDH_RING_MOVE_HEAD_NOCHECK(cq->cq_ring);
++ if (!ZXDH_RING_CURRENT_HEAD(cq->cq_ring))
++ cq->polarity ^= 1;
++
++ ZXDH_RING_MOVE_TAIL(cq->cq_ring);
++ cq->cqe_rd_cnt++;
++ get_64bit_val(cq->shadow_area, 0, &cq_shadow_temp);
++ cq_shadow_temp &= ~ZXDH_CQ_DBSA_CQEIDX;
++ cq_shadow_temp |=
++ FIELD_PREP(ZXDH_CQ_DBSA_CQEIDX, cq->cqe_rd_cnt);
++ set_64bit_val(cq->shadow_area, 0, cq_shadow_temp);
++ } else {
++ qword0 &= ~ZXDH_CQ_WQEIDX;
++ qword0 |= FIELD_PREP(ZXDH_CQ_WQEIDX, pring->tail);
++ set_64bit_val(cqe, 0, qword0);
++ }
++
++ return ret_code;
++}
++
++/**
++ * zxdh_qp_round_up - return round up qp wq depth
++ * @wqdepth: wq depth in quanta to round up
++ */
++int zxdh_qp_round_up(__u32 wqdepth)
++{
++ int scount = 1;
++
++ for (wqdepth--; scount <= 16; scount *= 2)
++ wqdepth |= wqdepth >> scount;
++
++ return ++wqdepth;
++}
++
++/**
++ * zxdh_cq_round_up - return round up cq wq depth
++ * @wqdepth: wq depth in quanta to round up
++ */
++int zxdh_cq_round_up(__u32 wqdepth)
++{
++ int scount = 1;
++
++ for (wqdepth--; scount <= 16; scount *= 2)
++ wqdepth |= wqdepth >> scount;
++
++ return ++wqdepth;
++}
++
++/**
++ * zxdh_get_rq_wqe_shift - get shift count for maximum rq wqe size
++ * @uk_attrs: qp HW attributes
++ * @sge: Maximum Scatter Gather Elements wqe
++ * @shift: Returns the shift needed based on sge
++ *
++ * Shift can be used to left shift the rq wqe size based on number of SGEs.
++ * For 1 SGE, shift = 1 (wqe size of 2*16 bytes).
++ * For 2 or 3 SGEs, shift = 2 (wqe size of 4*16 bytes).
++ * For 4-7 SGE's Shift of 3.
++ * For 8-15 SGE's Shift of 4 otherwise (wqe size of 512 bytes).
++ */
++void zxdh_get_rq_wqe_shift(struct zxdh_uk_attrs *uk_attrs, __u32 sge,
++ __u8 *shift)
++{
++ *shift = 0; //16bytes RQE, need to confirm configuration
++ if (sge < 2)
++ *shift = 1;
++ else if (sge < 4)
++ *shift = 2;
++ else if (sge < 8)
++ *shift = 3;
++ else if (sge < 16)
++ *shift = 4;
++ else
++ *shift = 5;
++}
++
++/**
++ * zxdh_get_sq_wqe_shift - get shift count for maximum wqe size
++ * @uk_attrs: qp HW attributes
++ * @sge: Maximum Scatter Gather Elements wqe
++ * @inline_data: Maximum inline data size
++ * @shift: Returns the shift needed based on sge
++ *
++ * Shift can be used to left shift the wqe size based on number of SGEs and inlind data size.
++ * To surport WR with imm_data,shift = 1 (wqe size of 2*32 bytes).
++ * For 2-7 SGEs or 24 < inline data <= 86, shift = 2 (wqe size of 4*32 bytes).
++ * Otherwise (wqe size of 256 bytes).
++ */
++void zxdh_get_sq_wqe_shift(struct zxdh_uk_attrs *uk_attrs, __u32 sge,
++ __u32 inline_data, __u8 *shift)
++{
++ *shift = 1;
++
++ if (sge > 1 || inline_data > 24) {
++ if (sge < 8 && inline_data <= 86)
++ *shift = 2;
++ else
++ *shift = 3;
++ }
++}
++
++/*
++ * zxdh_get_sqdepth - get SQ depth (quanta)
++ * @uk_attrs: qp HW attributes
++ * @sq_size: SQ size
++ * @shift: shift which determines size of WQE
++ * @sqdepth: depth of SQ
++ *
++ */
++enum zxdh_status_code zxdh_get_sqdepth(struct zxdh_uk_attrs *uk_attrs,
++ __u32 sq_size, __u8 shift,
++ __u32 *sqdepth)
++{
++ if (sq_size > ZXDH_MAX_SQ_DEPTH)
++ return ZXDH_ERR_INVALID_SIZE;
++
++ *sqdepth = zxdh_qp_round_up((sq_size << shift) + ZXDH_SQ_RSVD);
++
++ if (*sqdepth < (ZXDH_QP_SW_MIN_WQSIZE << shift))
++ *sqdepth = ZXDH_QP_SW_MIN_WQSIZE << shift;
++ else if (*sqdepth > uk_attrs->max_hw_wq_quanta)
++ return ZXDH_ERR_INVALID_SIZE;
++
++ return 0;
++}
++
++/*
++ * zxdh_get_rqdepth - get RQ depth (quanta)
++ * @uk_attrs: qp HW attributes
++ * @rq_size: RQ size
++ * @shift: shift which determines size of WQE
++ * @rqdepth: depth of RQ
++ */
++enum zxdh_status_code zxdh_get_rqdepth(struct zxdh_uk_attrs *uk_attrs,
++ __u32 rq_size, __u8 shift,
++ __u32 *rqdepth)
++{
++ *rqdepth = zxdh_qp_round_up((rq_size << shift) + ZXDH_RQ_RSVD);
++
++ if (*rqdepth < (ZXDH_QP_SW_MIN_WQSIZE << shift))
++ *rqdepth = ZXDH_QP_SW_MIN_WQSIZE << shift;
++ else if (*rqdepth > uk_attrs->max_hw_rq_quanta)
++ return ZXDH_ERR_INVALID_SIZE;
++
++ return 0;
++}
++
++static const struct zxdh_wqe_uk_ops iw_wqe_uk_ops = {
++ .iw_copy_inline_data = zxdh_copy_inline_data,
++ .iw_inline_data_size_to_quanta = zxdh_inline_data_size_to_quanta,
++ .iw_set_fragment = zxdh_set_fragment,
++ .iw_set_mw_bind_wqe = zxdh_set_mw_bind_wqe,
++};
++
++/**
++ * zxdh_uk_qp_init - initialize shared qp
++ * @qp: hw qp (user and kernel)
++ * @info: qp initialization info
++ *
++ * initializes the vars used in both user and kernel mode.
++ * size of the wqe depends on numbers of max. fragements
++ * allowed. Then size of wqe * the number of wqes should be the
++ * amount of memory allocated for sq and rq.
++ */
++enum zxdh_status_code zxdh_uk_qp_init(struct zxdh_qp_uk *qp,
++ struct zxdh_qp_uk_init_info *info)
++{
++ enum zxdh_status_code ret_code = 0;
++ __u32 sq_ring_size;
++ __u8 sqshift, rqshift;
++
++ qp->uk_attrs = info->uk_attrs;
++ if (info->max_sq_frag_cnt > qp->uk_attrs->max_hw_wq_frags ||
++ info->max_rq_frag_cnt > qp->uk_attrs->max_hw_wq_frags)
++ return ZXDH_ERR_INVALID_FRAG_COUNT;
++
++ zxdh_get_rq_wqe_shift(qp->uk_attrs, info->max_rq_frag_cnt, &rqshift);
++ zxdh_get_sq_wqe_shift(qp->uk_attrs, info->max_sq_frag_cnt,
++ info->max_inline_data, &sqshift);
++
++ qp->qp_caps = info->qp_caps;
++ qp->sq_base = info->sq;
++ qp->rq_base = info->rq;
++ qp->qp_type = info->type;
++ qp->shadow_area = info->shadow_area;
++ set_64bit_val(qp->shadow_area, 0, 0x8000);
++ qp->sq_wrtrk_array = info->sq_wrtrk_array;
++
++ qp->rq_wrid_array = info->rq_wrid_array;
++ qp->wqe_alloc_db = info->wqe_alloc_db;
++ qp->qp_id = info->qp_id;
++ qp->sq_size = info->sq_size;
++ qp->push_mode = false;
++ qp->max_sq_frag_cnt = info->max_sq_frag_cnt;
++ sq_ring_size = qp->sq_size << sqshift;
++ ZXDH_RING_INIT(qp->sq_ring, sq_ring_size);
++ ZXDH_RING_INIT(qp->initial_ring, sq_ring_size);
++ qp->swqe_polarity = 0;
++ qp->swqe_polarity_deferred = 1;
++ qp->rwqe_polarity = 0;
++ qp->rwqe_signature = 0;
++ qp->rq_size = info->rq_size;
++ qp->max_rq_frag_cnt = info->max_rq_frag_cnt;
++ qp->max_inline_data = (info->max_inline_data == 0) ?
++ ZXDH_MAX_INLINE_DATA_SIZE :
++ info->max_inline_data;
++ qp->rq_wqe_size = rqshift;
++ ZXDH_RING_INIT(qp->rq_ring, qp->rq_size);
++ qp->rq_wqe_size_multiplier = 1 << rqshift;
++ qp->wqe_ops = iw_wqe_uk_ops;
++ return ret_code;
++}
++
++/**
++ * zxdh_uk_cq_init - initialize shared cq (user and kernel)
++ * @cq: hw cq
++ * @info: hw cq initialization info
++ */
++enum zxdh_status_code zxdh_uk_cq_init(struct zxdh_cq_uk *cq,
++ struct zxdh_cq_uk_init_info *info)
++{
++ cq->cq_base = info->cq_base;
++ cq->cq_id = info->cq_id;
++ cq->cq_size = info->cq_size;
++ cq->cqe_alloc_db = info->cqe_alloc_db;
++ cq->cq_ack_db = info->cq_ack_db;
++ cq->shadow_area = info->shadow_area;
++ cq->cqe_size = info->cqe_size;
++ ZXDH_RING_INIT(cq->cq_ring, cq->cq_size);
++ cq->polarity = 1;
++ cq->cqe_rd_cnt = 0;
++
++ return 0;
++}
++
++/**
++ * zxdh_uk_clean_cq - clean cq entries
++ * @q: completion context
++ * @cq: cq to clean
++ */
++void zxdh_uk_clean_cq(void *q, struct zxdh_cq_uk *cq)
++{
++ __le64 *cqe;
++ __u64 qword3, comp_ctx;
++ __u32 cq_head;
++ __u8 polarity, temp;
++
++ cq_head = cq->cq_ring.head;
++ temp = cq->polarity;
++ do {
++ if (cq->cqe_size)
++ cqe = ((struct zxdh_extended_cqe
++ *)(cq->cq_base))[cq_head]
++ .buf;
++ else
++ cqe = cq->cq_base[cq_head].buf;
++ get_64bit_val(cqe, 24, &qword3);
++ polarity = (__u8)FIELD_GET(ZXDH_CQ_VALID, qword3);
++
++ if (polarity != temp)
++ break;
++
++ get_64bit_val(cqe, 8, &comp_ctx);
++ if ((void *)(uintptr_t)comp_ctx == q)
++ set_64bit_val(cqe, 8, 0);
++
++ cq_head = (cq_head + 1) % cq->cq_ring.size;
++ if (!cq_head)
++ temp ^= 1;
++ } while (true);
++}
++
++/**
++ * zxdh_nop - post a nop
++ * @qp: hw qp ptr
++ * @wr_id: work request id
++ * @signaled: signaled for completion
++ * @post_sq: ring doorbell
++ */
++enum zxdh_status_code zxdh_nop(struct zxdh_qp_uk *qp, __u64 wr_id,
++ bool signaled, bool post_sq)
++{
++ __le64 *wqe;
++ __u64 hdr;
++ __u32 wqe_idx;
++ struct zxdh_post_sq_info info = {};
++
++ info.push_wqe = false;
++ info.wr_id = wr_id;
++ wqe = zxdh_qp_get_next_send_wqe(qp, &wqe_idx, ZXDH_QP_WQE_MIN_QUANTA, 0,
++ &info);
++ if (!wqe)
++ return ZXDH_ERR_QP_TOOMANY_WRS_POSTED;
++
++ zxdh_clr_wqes(qp, wqe_idx);
++
++ set_64bit_val(wqe, 0, 0);
++ set_64bit_val(wqe, 8, 0);
++ set_64bit_val(wqe, 16, 0);
++
++ hdr = FIELD_PREP(ZXDHQPSQ_OPCODE, ZXDHQP_OP_NOP) |
++ FIELD_PREP(ZXDHQPSQ_SIGCOMPL, signaled) |
++ FIELD_PREP(ZXDHQPSQ_VALID, qp->swqe_polarity);
++
++ udma_to_device_barrier(); /* make sure WQE is populated before valid bit is set */
++
++ set_64bit_val(wqe, 24, hdr);
++ if (post_sq)
++ zxdh_uk_qp_post_wr(qp);
++
++ return 0;
++}
++
++/**
++ * zxdh_fragcnt_to_quanta_sq - calculate quanta based on fragment count for SQ
++ * @frag_cnt: number of fragments
++ * @quanta: quanta for frag_cnt
++ */
++enum zxdh_status_code zxdh_fragcnt_to_quanta_sq(__u32 frag_cnt, __u16 *quanta)
++{
++ if (frag_cnt > ZXDH_MAX_SQ_FRAG)
++ return ZXDH_ERR_INVALID_FRAG_COUNT;
++ *quanta = frag_cnt / 2 + 1;
++ return 0;
++}
++
++/**
++ * zxdh_fragcnt_to_wqesize_rq - calculate wqe size based on fragment count for RQ
++ * @frag_cnt: number of fragments
++ * @wqe_size: size in bytes given frag_cnt
++ */
++enum zxdh_status_code zxdh_fragcnt_to_wqesize_rq(__u32 frag_cnt,
++ __u16 *wqe_size)
++{
++ switch (frag_cnt) {
++ case 0:
++ case 1:
++ *wqe_size = 32;
++ break;
++ case 2:
++ case 3:
++ *wqe_size = 64;
++ break;
++ case 4:
++ case 5:
++ case 6:
++ case 7:
++ *wqe_size = 128;
++ break;
++ case 8:
++ case 9:
++ case 10:
++ case 11:
++ case 12:
++ case 13:
++ case 14:
++ *wqe_size = 256;
++ break;
++ default:
++ return ZXDH_ERR_INVALID_FRAG_COUNT;
++ }
++
++ return 0;
++}
++
++/**
++ * zxdh_get_srq_wqe_shift - get shift count for maximum srq wqe size
++ * @uk_attrs: srq HW attributes
++ * @sge: Maximum Scatter Gather Elements wqe
++ * @shift: Returns the shift needed based on sge
++ *
++ * Shift can be used to left shift the srq wqe size based on number of SGEs.
++ * For 1 SGE, shift = 1 (wqe size of 2*16 bytes).
++ * For 2 or 3 SGEs, shift = 2 (wqe size of 4*16 bytes).
++ * For 4-7 SGE's Shift of 3.
++ * For 8-15 SGE's Shift of 4 otherwise (wqe size of 512 bytes).
++ */
++void zxdh_get_srq_wqe_shift(struct zxdh_uk_attrs *uk_attrs, __u32 sge,
++ __u8 *shift)
++{
++ *shift = 0; //16bytes RQE, need to confirm configuration
++ if (sge < 2)
++ *shift = 1;
++ else if (sge < 4)
++ *shift = 2;
++ else if (sge < 8)
++ *shift = 3;
++ else if (sge < 16)
++ *shift = 4;
++ else
++ *shift = 5;
++}
++
++/*
++ * zxdh_get_srqdepth - get SRQ depth (quanta)
++ * @max_hw_rq_quanta: HW SRQ size limit
++ * @srq_size: SRQ size
++ * @shift: shift which determines size of WQE
++ * @srqdepth: depth of SRQ
++ */
++int zxdh_get_srqdepth(__u32 max_hw_srq_quanta, __u32 srq_size, __u8 shift,
++ __u32 *srqdepth)
++{
++ *srqdepth = zxdh_qp_round_up((srq_size << shift) + ZXDH_SRQ_RSVD);
++
++ if (*srqdepth < (ZXDH_QP_SW_MIN_WQSIZE << shift))
++ *srqdepth = ZXDH_QP_SW_MIN_WQSIZE << shift;
++ else if ((*srqdepth >> shift) > max_hw_srq_quanta)
++ return ZXDH_ERR_INVALID_SIZE;
++
++ return 0;
++}
++
++__le64 *zxdh_get_srq_wqe(struct zxdh_srq_uk *srq, int wqe_index)
++{
++ __le64 *wqe;
++
++ wqe = srq->srq_base[wqe_index * srq->srq_wqe_size_multiplier].elem;
++ return wqe;
++}
++
++__le16 *zxdh_get_srq_list_wqe(struct zxdh_srq_uk *srq, __u16 *idx)
++{
++ __le16 *wqe;
++ __u16 wqe_idx;
++
++ wqe_idx = srq->srq_list_ring.tail;
++ srq->srq_list_ring.tail++;
++ srq->srq_list_ring.tail %= srq->srq_list_ring.size;
++ *idx = srq->srq_list_ring.tail;
++
++ if (!(*idx))
++ srq->srq_list_polarity = !srq->srq_list_polarity;
++
++ wqe = &srq->srq_list_base[wqe_idx];
++
++ return wqe;
++}
++
++/**
++ * zxdh_uk_srq_init - initialize srq
++ * @srq: hw srq (user and kernel)
++ * @info: srq initialization info
++ *
++ * initializes the vars used in both user and kernel mode.
++ * size of the wqe depends on numbers of max. fragements
++ * allowed. Then size of wqe * the number of wqes should be the
++ * amount of memory allocated for srq.
++ */
++enum zxdh_status_code zxdh_uk_srq_init(struct zxdh_srq_uk *srq,
++ struct zxdh_srq_uk_init_info *info)
++{
++ __u32 srq_ring_size;
++ __u8 srqshift;
++
++ srq->uk_attrs = info->uk_attrs;
++ if (info->max_srq_frag_cnt > srq->uk_attrs->max_hw_wq_frags)
++ return -ZXDH_ERR_INVALID_FRAG_COUNT;
++ zxdh_get_srq_wqe_shift(srq->uk_attrs, info->max_srq_frag_cnt,
++ &srqshift);
++ srq->srq_base = info->srq_base;
++ srq->srq_list_base = info->srq_list_base;
++ srq->srq_db_base = info->srq_db_base;
++ srq->srq_wrid_array = info->srq_wrid_array;
++ srq->srq_id = info->srq_id;
++ srq->srq_size = info->srq_size;
++ srq->log2_srq_size = info->log2_srq_size;
++ srq->srq_list_size = info->srq_list_size;
++ srq->max_srq_frag_cnt = info->max_srq_frag_cnt;
++ srq_ring_size = srq->srq_size;
++ srq->srq_wqe_size = srqshift;
++ srq->srq_wqe_size_multiplier = 1 << srqshift;
++ ZXDH_RING_INIT(srq->srq_ring, srq_ring_size);
++ ZXDH_RING_INIT(srq->srq_list_ring, srq->srq_list_size);
++ srq->srq_ring.tail = srq->srq_size - 1;
++ srq->srq_list_polarity = 1;
++ zxdh_dbg(ZXDH_DBG_SRQ, "%s srq_wqe_size_multiplier:%d srqshift:%d\n",
++ __func__, srq->srq_wqe_size_multiplier, srqshift);
++ zxdh_dbg(
++ ZXDH_DBG_SRQ,
++ "%s srq->srq_id:%d srq_base:0x%p srq_list_base:0x%p srq_db_base:0x%p\n",
++ __func__, srq->srq_id, srq->srq_base, srq->srq_list_base,
++ srq->srq_db_base);
++ zxdh_dbg(ZXDH_DBG_SRQ,
++ "%s srq->srq_id:%d srq_ring_size:%d srq->srq_list_size:%d\n",
++ __func__, srq->srq_id, srq_ring_size, srq->srq_list_size);
++ return 0;
++}
++
++void zxdh_free_srq_wqe(struct zxdh_srq_uk *srq, int wqe_index)
++{
++ struct zxdh_usrq *iwusrq;
++ __le64 *wqe;
++ __u64 hdr;
++
++ iwusrq = container_of(srq, struct zxdh_usrq, srq);
++ /* always called with interrupts disabled. */
++ pthread_spin_lock(&iwusrq->lock);
++ wqe = zxdh_get_srq_wqe(srq, srq->srq_ring.tail);
++ srq->srq_ring.tail = wqe_index;
++ hdr = FIELD_PREP(ZXDHQPSRQ_NEXT_WQE_INDEX, wqe_index);
++
++ udma_to_device_barrier(); /* make sure WQE is populated before valid bit is set */
++ set_64bit_val(wqe, 0, hdr);
++
++ pthread_spin_unlock(&iwusrq->lock);
++ zxdh_dbg(ZXDH_DBG_SRQ, "%s srq->srq_id:%d wqe_index:%d\n", __func__,
++ srq->srq_id, wqe_index);
++}
+diff --git a/providers/zrdma/umain.c b/providers/zrdma/umain.c
+new file mode 100644
+index 0000000..92cdd37
+--- /dev/null
++++ b/providers/zrdma/umain.c
+@@ -0,0 +1,236 @@
++// SPDX-License-Identifier: GPL-2.0 or Linux-OpenIB
++/* Copyright (c) 2024 ZTE Corporation. All rights reserved. */
++#include
++#include
++#include
++#include
++#include
++#include
++#include
++#include
++#include
++#include
++#include "zxdh_devids.h"
++#include "umain.h"
++#include "abi.h"
++#include "private_verbs_cmd.h"
++
++#define ZXDH_HCA(v, d) VERBS_PCI_MATCH(v, d, NULL)
++static const struct verbs_match_ent hca_table[] = {
++ VERBS_DRIVER_ID(RDMA_DRIVER_ZXDH),
++ ZXDH_HCA(PCI_VENDOR_ID_ZXDH_EVB, ZXDH_DEV_ID_ADAPTIVE_EVB_PF),
++ ZXDH_HCA(PCI_VENDOR_ID_ZXDH_EVB, ZXDH_DEV_ID_ADAPTIVE_EVB_VF),
++ ZXDH_HCA(PCI_VENDOR_ID_ZXDH_E312, ZXDH_DEV_ID_ADAPTIVE_E312_PF),
++ ZXDH_HCA(PCI_VENDOR_ID_ZXDH_E312, ZXDH_DEV_ID_ADAPTIVE_E312_VF),
++ ZXDH_HCA(PCI_VENDOR_ID_ZXDH_X512, ZXDH_DEV_ID_ADAPTIVE_X512_PF),
++ ZXDH_HCA(PCI_VENDOR_ID_ZXDH_X512, ZXDH_DEV_ID_ADAPTIVE_X512_VF),
++ {}
++};
++
++/**
++ * zxdh_ufree_context - free context that was allocated
++ * @ibctx: context allocated ptr
++ */
++static void zxdh_ufree_context(struct ibv_context *ibctx)
++{
++ struct zxdh_uvcontext *iwvctx;
++
++ iwvctx = container_of(ibctx, struct zxdh_uvcontext, ibv_ctx.context);
++
++ zxdh_ufree_pd(&iwvctx->iwupd->ibv_pd);
++ zxdh_munmap(iwvctx->sq_db);
++ zxdh_munmap(iwvctx->cq_db);
++ verbs_uninit_context(&iwvctx->ibv_ctx);
++ free(iwvctx);
++}
++
++static const struct verbs_context_ops zxdh_uctx_ops = {
++ .alloc_mw = zxdh_ualloc_mw,
++ .alloc_pd = zxdh_ualloc_pd,
++ .attach_mcast = zxdh_uattach_mcast,
++ .bind_mw = zxdh_ubind_mw,
++ .cq_event = zxdh_cq_event,
++ .create_ah = zxdh_ucreate_ah,
++ .create_cq = zxdh_ucreate_cq,
++ .create_cq_ex = zxdh_ucreate_cq_ex,
++ .create_qp = zxdh_ucreate_qp,
++ .create_qp_ex = zxdh_ucreate_qp_ex,
++ .create_srq = zxdh_ucreate_srq,
++ .dealloc_mw = zxdh_udealloc_mw,
++ .dealloc_pd = zxdh_ufree_pd,
++ .dereg_mr = zxdh_udereg_mr,
++ .destroy_ah = zxdh_udestroy_ah,
++ .destroy_cq = zxdh_udestroy_cq,
++ .modify_cq = zxdh_umodify_cq,
++ .destroy_qp = zxdh_udestroy_qp,
++ .destroy_srq = zxdh_udestroy_srq,
++ .detach_mcast = zxdh_udetach_mcast,
++ .modify_qp = zxdh_umodify_qp,
++ .modify_srq = zxdh_umodify_srq,
++ .poll_cq = zxdh_upoll_cq,
++ .post_recv = zxdh_upost_recv,
++ .post_send = zxdh_upost_send,
++ .post_srq_recv = zxdh_upost_srq_recv,
++ .query_device_ex = zxdh_uquery_device_ex,
++ .query_port = zxdh_uquery_port,
++ .query_qp = zxdh_uquery_qp,
++ .query_srq = zxdh_uquery_srq,
++ .reg_mr = zxdh_ureg_mr,
++ .rereg_mr = zxdh_urereg_mr,
++ .req_notify_cq = zxdh_uarm_cq,
++ .resize_cq = zxdh_uresize_cq,
++ .free_context = zxdh_ufree_context,
++ .get_srq_num = zxdh_uget_srq_num,
++};
++
++/**
++ * zxdh_ualloc_context - allocate context for user app
++ * @ibdev: ib device created during zxdh_driver_init
++ * @cmd_fd: save fd for the device
++ * @private_data: device private data
++ *
++ * Returns callback routine table and calls driver for allocating
++ * context and getting back resource information to return as ibv_context.
++ */
++static struct verbs_context *zxdh_ualloc_context(struct ibv_device *ibdev,
++ int cmd_fd, void *private_data)
++{
++ struct ibv_pd *ibv_pd;
++ struct zxdh_uvcontext *iwvctx;
++ struct zxdh_get_context cmd;
++ struct zxdh_get_context_resp resp = {};
++ __u64 sq_db_mmap_key, cq_db_mmap_key;
++ __u8 user_ver = ZXDH_ABI_VER;
++
++ iwvctx = verbs_init_and_alloc_context(ibdev, cmd_fd, iwvctx, ibv_ctx,
++ RDMA_DRIVER_ZXDH);
++ if (!iwvctx)
++ return NULL;
++
++ zxdh_set_debug_mask();
++
++ cmd.userspace_ver = user_ver;
++ if (ibv_cmd_get_context(&iwvctx->ibv_ctx,
++ (struct ibv_get_context *)&cmd, sizeof(cmd),
++ &resp.ibv_resp, sizeof(resp))) {
++ cmd.userspace_ver = 4;
++ if (ibv_cmd_get_context(
++ &iwvctx->ibv_ctx, (struct ibv_get_context *)&cmd,
++ sizeof(cmd), &resp.ibv_resp, sizeof(resp)))
++ goto err_free;
++ user_ver = cmd.userspace_ver;
++ }
++
++ verbs_set_ops(&iwvctx->ibv_ctx, &zxdh_uctx_ops);
++
++ iwvctx->uk_attrs.feature_flags = resp.feature_flags;
++ iwvctx->uk_attrs.hw_rev = resp.hw_rev;
++ iwvctx->uk_attrs.max_hw_wq_frags = resp.max_hw_wq_frags;
++ iwvctx->uk_attrs.max_hw_read_sges = resp.max_hw_read_sges;
++ iwvctx->uk_attrs.max_hw_inline = resp.max_hw_inline;
++ iwvctx->uk_attrs.max_hw_rq_quanta = resp.max_hw_rq_quanta;
++ iwvctx->uk_attrs.max_hw_srq_quanta = resp.max_hw_srq_quanta;
++ iwvctx->uk_attrs.max_hw_wq_quanta = resp.max_hw_wq_quanta;
++ iwvctx->uk_attrs.max_hw_srq_wr = resp.max_hw_srq_wr;
++ iwvctx->uk_attrs.max_hw_sq_chunk = resp.max_hw_sq_chunk;
++ iwvctx->uk_attrs.max_hw_cq_size = resp.max_hw_cq_size;
++ iwvctx->uk_attrs.min_hw_cq_size = resp.min_hw_cq_size;
++ iwvctx->abi_ver = user_ver;
++
++ sq_db_mmap_key = resp.sq_db_mmap_key;
++ cq_db_mmap_key = resp.cq_db_mmap_key;
++
++ iwvctx->uk_attrs.db_addr_type = resp.db_addr_type;
++ iwvctx->uk_attrs.sq_db_pa = resp.sq_db_pa;
++ iwvctx->uk_attrs.cq_db_pa = resp.cq_db_pa;
++
++ if (iwvctx->uk_attrs.db_addr_type == ZXDH_DB_ADDR_PHY) {
++ iwvctx->sq_db =
++ mmap(NULL, ZXDH_HW_PAGE_SIZE, PROT_READ | PROT_WRITE,
++ MAP_SHARED, cmd_fd, iwvctx->uk_attrs.sq_db_pa);
++ if (iwvctx->sq_db == MAP_FAILED) {
++ iwvctx->sq_db = NULL;
++ fprintf(stderr, "%s:%d mmap failed\n", __func__,
++ __LINE__);
++ goto err_free;
++ }
++
++ iwvctx->cq_db =
++ mmap(NULL, ZXDH_HW_PAGE_SIZE, PROT_READ | PROT_WRITE,
++ MAP_SHARED, cmd_fd, iwvctx->uk_attrs.cq_db_pa);
++ if (iwvctx->cq_db == MAP_FAILED) {
++ iwvctx->cq_db = NULL;
++ fprintf(stderr, "%s:%d mmap failed\n", __func__,
++ __LINE__);
++ goto err_free;
++ }
++
++ ibv_pd = zxdh_ualloc_pd(&iwvctx->ibv_ctx.context);
++ if (!ibv_pd) {
++ if (iwvctx->sq_db)
++ munmap(iwvctx->sq_db, ZXDH_HW_PAGE_SIZE);
++ if (iwvctx->cq_db)
++ munmap(iwvctx->cq_db, ZXDH_HW_PAGE_SIZE);
++ goto err_free;
++ }
++ } else if (iwvctx->uk_attrs.db_addr_type == ZXDH_DB_ADDR_BAR) {
++ iwvctx->sq_db = zxdh_mmap(cmd_fd, sq_db_mmap_key);
++ if (iwvctx->sq_db == MAP_FAILED)
++ goto err_free;
++
++ iwvctx->cq_db = zxdh_mmap(cmd_fd, cq_db_mmap_key);
++ if (iwvctx->cq_db == MAP_FAILED) {
++ zxdh_munmap(iwvctx->sq_db);
++ goto err_free;
++ }
++ ibv_pd = zxdh_ualloc_pd(&iwvctx->ibv_ctx.context);
++ if (!ibv_pd) {
++ zxdh_munmap(iwvctx->sq_db);
++ zxdh_munmap(iwvctx->cq_db);
++ goto err_free;
++ }
++ } else
++ goto err_free;
++
++ ibv_pd->context = &iwvctx->ibv_ctx.context;
++ iwvctx->iwupd = container_of(ibv_pd, struct zxdh_upd, ibv_pd);
++ add_private_ops(iwvctx);
++ return &iwvctx->ibv_ctx;
++
++err_free:
++ free(iwvctx);
++
++ return NULL;
++}
++
++static void zxdh_uninit_device(struct verbs_device *verbs_device)
++{
++ struct zxdh_udevice *dev;
++
++ dev = container_of(&verbs_device->device, struct zxdh_udevice,
++ ibv_dev.device);
++ free(dev);
++}
++
++static struct verbs_device *zxdh_device_alloc(struct verbs_sysfs_dev *sysfs_dev)
++{
++ struct zxdh_udevice *dev;
++
++ dev = calloc(1, sizeof(*dev));
++ if (!dev)
++ return NULL;
++
++ return &dev->ibv_dev;
++}
++
++static const struct verbs_device_ops zxdh_udev_ops = {
++ .alloc_context = zxdh_ualloc_context,
++ .alloc_device = zxdh_device_alloc,
++ .match_max_abi_version = ZXDH_MAX_ABI_VERSION,
++ .match_min_abi_version = ZXDH_MIN_ABI_VERSION,
++ .match_table = hca_table,
++ .name = "zxdh",
++ .uninit_device = zxdh_uninit_device,
++};
++
++PROVIDER_DRIVER(zxdh, zxdh_udev_ops);
+diff --git a/providers/zrdma/umain.h b/providers/zrdma/umain.h
+new file mode 100644
+index 0000000..d0b400d
+--- /dev/null
++++ b/providers/zrdma/umain.h
+@@ -0,0 +1,228 @@
++/* SPDX-License-Identifier: GPL-2.0 or Linux-OpenIB */
++/* Copyright (c) 2024 ZTE Corporation. All rights reserved. */
++#ifndef ZXDH_UMAIN_H
++#define ZXDH_UMAIN_H
++
++#include
++#include
++#include
++#include
++#include
++
++#include "osdep.h"
++#include "zxdh.h"
++#include "defs.h"
++#include "status.h"
++#include "user.h"
++
++#define ZXDH_BASE_PUSH_PAGE 1
++#define ZXDH_U_MINCQ_SIZE 4
++#define ZXDH_DB_SHADOW_AREA_SIZE 8
++#define ZXDH_DB_SQ_OFFSET 0x404
++#define ZXDH_DB_CQ_OFFSET 0x588
++
++#define MIN_UDP_SPORT 1024
++#define MIN_QP_QPN 1
++
++enum zxdh_supported_wc_flags {
++ ZXDH_CQ_SUPPORTED_WC_FLAGS =
++ IBV_WC_EX_WITH_BYTE_LEN | IBV_WC_EX_WITH_IMM |
++ IBV_WC_EX_WITH_QP_NUM | IBV_WC_EX_WITH_SRC_QP |
++ IBV_WC_EX_WITH_SLID | IBV_WC_EX_WITH_SL |
++ IBV_WC_EX_WITH_DLID_PATH_BITS |
++ IBV_WC_EX_WITH_COMPLETION_TIMESTAMP_WALLCLOCK |
++ IBV_WC_EX_WITH_COMPLETION_TIMESTAMP,
++};
++
++enum {
++ ZXDH_DBG_QP = 1 << 0,
++ ZXDH_DBG_CQ = 1 << 1,
++ ZXDH_DBG_SRQ = 1 << 2,
++};
++extern uint32_t zxdh_debug_mask;
++#define zxdh_dbg(mask, format, arg...) \
++ do { \
++ if (mask & zxdh_debug_mask) { \
++ int tmp = errno; \
++ fprintf(stdout, "%s:%d: " format, __func__, __LINE__, \
++ ##arg); \
++ errno = tmp; \
++ } \
++ } while (0)
++
++
++struct zxdh_udevice {
++ struct verbs_device ibv_dev;
++};
++
++struct zxdh_uah {
++ struct ibv_ah ibv_ah;
++ uint32_t ah_id;
++ struct ibv_global_route grh;
++};
++
++struct zxdh_upd {
++ struct ibv_pd ibv_pd;
++ void *arm_cq_page;
++ void *arm_cq;
++ uint32_t pd_id;
++};
++
++struct zxdh_uvcontext {
++ struct verbs_context ibv_ctx;
++ struct zxdh_upd *iwupd;
++ struct zxdh_uk_attrs uk_attrs;
++ void *db;
++ void *sq_db;
++ void *cq_db;
++ int abi_ver;
++ bool legacy_mode;
++ struct zxdh_uvcontext_ops *cxt_ops;
++};
++
++struct zxdh_uqp;
++
++struct zxdh_cq_buf {
++ struct list_node list;
++ struct zxdh_cq_uk cq;
++ struct verbs_mr vmr;
++};
++
++struct zxdh_ucq {
++ struct verbs_cq verbs_cq;
++ struct verbs_mr vmr;
++ struct verbs_mr vmr_shadow_area;
++ pthread_spinlock_t lock;
++ size_t buf_size;
++ bool is_armed;
++ enum zxdh_cmpl_notify last_notify;
++ // bool skip_arm;
++ // bool arm_sol;
++ // bool skip_sol;
++ int comp_vector;
++ uint32_t report_rtt;
++ struct zxdh_uqp *uqp;
++ struct zxdh_cq_uk cq;
++ struct list_head resize_list;
++ /* for extended CQ completion fields */
++ struct zxdh_cq_poll_info cur_cqe;
++ bool resize_enable;
++};
++
++struct zxdh_usrq {
++ struct ibv_srq ibv_srq;
++ struct verbs_mr vmr;
++ struct verbs_mr list_vmr;
++ struct verbs_mr db_vmr;
++ size_t total_buf_size;
++ size_t buf_size;
++ size_t list_buf_size;
++ size_t db_buf_size;
++ size_t srq_size;
++ size_t srq_list_size;
++ uint32_t srq_id;
++ uint32_t max_wr;
++ uint32_t max_sge;
++ uint32_t srq_limit;
++ pthread_spinlock_t lock;
++ uint32_t wq_size;
++ struct ibv_recv_wr *pend_rx_wr;
++ struct zxdh_srq_uk srq;
++};
++
++struct zxdh_uqp {
++ struct verbs_qp vqp;
++ struct zxdh_ucq *send_cq;
++ struct zxdh_ucq *recv_cq;
++ struct zxdh_usrq *srq;
++ struct verbs_mr vmr;
++ size_t buf_size;
++ uint32_t zxdh_drv_opt;
++ pthread_spinlock_t lock;
++ uint16_t sq_sig_all;
++ uint16_t qperr;
++ uint16_t rsvd;
++ uint32_t pending_rcvs;
++ uint32_t wq_size;
++ struct ibv_recv_wr *pend_rx_wr;
++ struct zxdh_qp_uk qp;
++ enum ibv_qp_type qp_type;
++ struct zxdh_sge *recv_sges;
++ uint8_t is_srq;
++ uint8_t inline_data[ZXDH_MAX_INLINE_DATA_SIZE];
++};
++
++struct zxdh_umr {
++ struct verbs_mr vmr;
++ uint32_t acc_flags;
++ uint8_t leaf_pbl_size;
++ uint8_t host_page_size;
++ uint64_t mr_pa_pble_index;
++};
++
++/* zxdh_uverbs.c */
++int zxdh_uquery_device_ex(struct ibv_context *context,
++ const struct ibv_query_device_ex_input *input,
++ struct ibv_device_attr_ex *attr, size_t attr_size);
++int zxdh_uquery_port(struct ibv_context *context, uint8_t port,
++ struct ibv_port_attr *attr);
++struct ibv_pd *zxdh_ualloc_pd(struct ibv_context *context);
++int zxdh_ufree_pd(struct ibv_pd *pd);
++struct ibv_mr *zxdh_ureg_mr(struct ibv_pd *pd, void *addr, size_t length,
++ uint64_t hca_va, int access);
++int zxdh_udereg_mr(struct verbs_mr *vmr);
++
++int zxdh_urereg_mr(struct verbs_mr *mr, int flags, struct ibv_pd *pd,
++ void *addr, size_t length, int access);
++
++struct ibv_mw *zxdh_ualloc_mw(struct ibv_pd *pd, enum ibv_mw_type type);
++int zxdh_ubind_mw(struct ibv_qp *qp, struct ibv_mw *mw,
++ struct ibv_mw_bind *mw_bind);
++int zxdh_udealloc_mw(struct ibv_mw *mw);
++struct ibv_cq *zxdh_ucreate_cq(struct ibv_context *context, int cqe,
++ struct ibv_comp_channel *channel,
++ int comp_vector);
++struct ibv_cq_ex *zxdh_ucreate_cq_ex(struct ibv_context *context,
++ struct ibv_cq_init_attr_ex *attr_ex);
++void zxdh_ibvcq_ex_fill_priv_funcs(struct zxdh_ucq *iwucq,
++ struct ibv_cq_init_attr_ex *attr_ex);
++int zxdh_uresize_cq(struct ibv_cq *cq, int cqe);
++int zxdh_udestroy_cq(struct ibv_cq *cq);
++int zxdh_umodify_cq(struct ibv_cq *cq, struct ibv_modify_cq_attr *attr);
++int zxdh_upoll_cq(struct ibv_cq *cq, int entries, struct ibv_wc *entry);
++int zxdh_uarm_cq(struct ibv_cq *cq, int solicited);
++void zxdh_cq_event(struct ibv_cq *cq);
++struct ibv_qp *zxdh_ucreate_qp(struct ibv_pd *pd,
++ struct ibv_qp_init_attr *attr);
++struct ibv_qp *zxdh_ucreate_qp_ex(struct ibv_context *context,
++ struct ibv_qp_init_attr_ex *attr);
++int zxdh_uquery_qp(struct ibv_qp *qp, struct ibv_qp_attr *attr, int attr_mask,
++ struct ibv_qp_init_attr *init_attr);
++int zxdh_umodify_qp(struct ibv_qp *qp, struct ibv_qp_attr *attr, int attr_mask);
++int zxdh_udestroy_qp(struct ibv_qp *qp);
++int zxdh_upost_send(struct ibv_qp *ib_qp, struct ibv_send_wr *ib_wr,
++ struct ibv_send_wr **bad_wr);
++int zxdh_upost_recv(struct ibv_qp *ib_qp, struct ibv_recv_wr *ib_wr,
++ struct ibv_recv_wr **bad_wr);
++struct ibv_srq *zxdh_ucreate_srq(struct ibv_pd *pd,
++ struct ibv_srq_init_attr *srq_init_attr);
++int zxdh_udestroy_srq(struct ibv_srq *srq);
++int zxdh_umodify_srq(struct ibv_srq *srq, struct ibv_srq_attr *srq_attr,
++ int srq_attr_mask);
++int zxdh_uquery_srq(struct ibv_srq *srq, struct ibv_srq_attr *srq_attr);
++int zxdh_upost_srq_recv(struct ibv_srq *srq, struct ibv_recv_wr *recv_wr,
++ struct ibv_recv_wr **bad_recv_wr);
++int zxdh_uget_srq_num(struct ibv_srq *srq, uint32_t *srq_num);
++struct ibv_ah *zxdh_ucreate_ah(struct ibv_pd *ibpd, struct ibv_ah_attr *attr);
++int zxdh_udestroy_ah(struct ibv_ah *ibah);
++int zxdh_uattach_mcast(struct ibv_qp *qp, const union ibv_gid *gid,
++ uint16_t lid);
++int zxdh_udetach_mcast(struct ibv_qp *qp, const union ibv_gid *gid,
++ uint16_t lid);
++void zxdh_async_event(struct ibv_context *context,
++ struct ibv_async_event *event);
++void zxdh_set_hw_attrs(struct zxdh_hw_attrs *attrs);
++void *zxdh_mmap(int fd, off_t offset);
++void zxdh_munmap(void *map);
++void zxdh_set_debug_mask(void);
++#endif /* ZXDH_UMAIN_H */
+diff --git a/providers/zrdma/user.h b/providers/zrdma/user.h
+new file mode 100644
+index 0000000..fec4f5e
+--- /dev/null
++++ b/providers/zrdma/user.h
+@@ -0,0 +1,572 @@
++/* SPDX-License-Identifier: GPL-2.0 or Linux-OpenIB */
++/* Copyright (c) 2015 - 2020 Intel Corporation */
++#ifndef ZXDH_USER_H
++#define ZXDH_USER_H
++
++#include "osdep.h"
++
++#define zxdh_handle void *
++#define zxdh_adapter_handle zxdh_handle
++#define zxdh_qp_handle zxdh_handle
++#define zxdh_cq_handle zxdh_handle
++#define zxdh_pd_id zxdh_handle
++#define zxdh_stag_handle zxdh_handle
++#define zxdh_stag_index __u32
++#define zxdh_stag __u32
++#define zxdh_stag_key __u8
++#define zxdh_tagged_offset __u64
++#define zxdh_access_privileges __u32
++#define zxdh_physical_fragment __u64
++#define zxdh_address_list __u64 *
++#define zxdh_sgl struct zxdh_sge *
++
++#define ZXDH_MAX_MR_SIZE 0x200000000000ULL
++
++#define ZXDH_ACCESS_FLAGS_LOCALREAD 0x01
++#define ZXDH_ACCESS_FLAGS_LOCALWRITE 0x02
++#define ZXDH_ACCESS_FLAGS_REMOTEREAD_ONLY 0x04
++#define ZXDH_ACCESS_FLAGS_REMOTEREAD 0x05
++#define ZXDH_ACCESS_FLAGS_REMOTEWRITE_ONLY 0x08
++#define ZXDH_ACCESS_FLAGS_REMOTEWRITE 0x0a
++#define ZXDH_ACCESS_FLAGS_BIND_WINDOW 0x10
++#define ZXDH_ACCESS_FLAGS_ZERO_BASED 0x20
++#define ZXDH_ACCESS_FLAGS_ALL 0x3f
++
++#define ZXDH_OP_TYPE_NOP 0x00
++#define ZXDH_OP_TYPE_SEND 0x01
++#define ZXDH_OP_TYPE_SEND_WITH_IMM 0x02
++#define ZXDH_OP_TYPE_SEND_INV 0x03
++#define ZXDH_OP_TYPE_WRITE 0x04
++#define ZXDH_OP_TYPE_WRITE_WITH_IMM 0x05
++#define ZXDH_OP_TYPE_READ 0x06
++#define ZXDH_OP_TYPE_BIND_MW 0x07
++#define ZXDH_OP_TYPE_FAST_REG_MR 0x08
++#define ZXDH_OP_TYPE_LOCAL_INV 0x09
++#define ZXDH_OP_TYPE_UD_SEND 0x0a
++#define ZXDH_OP_TYPE_UD_SEND_WITH_IMM 0x0b
++#define ZXDH_OP_TYPE_REC 0x3e
++#define ZXDH_OP_TYPE_REC_IMM 0x3f
++
++#define ZXDH_FLUSH_MAJOR_ERR 1
++#define ZXDH_RETRY_ACK_MAJOR_ERR 0x8
++#define ZXDH_RETRY_ACK_MINOR_ERR 0xf3
++#define ZXDH_TX_WINDOW_QUERY_ITEM_MINOR_ERR 0xf5
++
++#define ZXDH_MAX_SQ_FRAG 31
++#define ZXDH_MAX_SQ_INLINE_DATELEN_WITH_IMM 210
++
++#define INLINE_DATASIZE_7BYTES 7
++#define INLINE_DATASIZE_24BYTES 24
++#define INLINE_FRAG_DATASIZE_31BYTES 31
++
++#define INLINE_DATA_OFFSET_7BYTES 7
++#define WQE_OFFSET_7BYTES 7
++#define WQE_OFFSET_8BYTES 8
++#define WQE_OFFSET_24BYTES 24
++
++#define ZXDH_SQE_SIZE 4
++#define ZXDH_RQE_SIZE 2
++
++#define ZXDH_SRQ_INVALID_LKEY 0x100
++#define ZXDH_SRQ_DB_INIT_VALUE 0x8000
++
++enum zxdh_device_caps_const {
++ ZXDH_WQE_SIZE = 4,
++ ZXDH_SRQE_SIZE = 2,
++ ZXDH_CQP_WQE_SIZE = 8,
++ ZXDH_CQE_SIZE = 8,
++ ZXDH_EXTENDED_CQE_SIZE = 8,
++ ZXDH_AEQE_SIZE = 2,
++ ZXDH_CEQE_SIZE = 1,
++ ZXDH_CQP_CTX_SIZE = 8,
++ ZXDH_SHADOW_AREA_SIZE = 8,
++ ZXDH_GATHER_STATS_BUF_SIZE = 1024,
++ ZXDH_MIN_IW_QP_ID = 0,
++ ZXDH_QUERY_FPM_BUF_SIZE = 176,
++ ZXDH_COMMIT_FPM_BUF_SIZE = 176,
++ ZXDH_MAX_IW_QP_ID = 262143,
++ ZXDH_MIN_CEQID = 0,
++ ZXDH_MAX_CEQID = 1023,
++ ZXDH_CEQ_MAX_COUNT = ZXDH_MAX_CEQID + 1,
++ ZXDH_MIN_CQID = 0,
++ ZXDH_MAX_CQID = 524287,
++ ZXDH_MIN_AEQ_ENTRIES = 1,
++ ZXDH_MAX_AEQ_ENTRIES = 524287,
++ ZXDH_MIN_CEQ_ENTRIES = 1,
++ ZXDH_MAX_CEQ_ENTRIES = 262143,
++ ZXDH_MIN_CQ_SIZE = 1,
++ ZXDH_MAX_CQ_SIZE = 1048575,
++ ZXDH_DB_ID_ZERO = 0,
++ ZXDH_MAX_WQ_FRAGMENT_COUNT = 13,
++ ZXDH_MAX_SGE_RD = 13,
++ ZXDH_MAX_OUTBOUND_MSG_SIZE = 2147483647,
++ ZXDH_MAX_INBOUND_MSG_SIZE = 2147483647,
++ ZXDH_MAX_PUSH_PAGE_COUNT = 1024,
++ ZXDH_MAX_PE_ENA_VF_COUNT = 32,
++ ZXDH_MAX_VF_FPM_ID = 47,
++ ZXDH_MAX_SQ_PAYLOAD_SIZE = 2147483648,
++ ZXDH_MAX_INLINE_DATA_SIZE = 217,
++ ZXDH_MAX_WQ_ENTRIES = 32768,
++ ZXDH_Q2_BUF_SIZE = 256,
++ ZXDH_QP_CTX_SIZE = 256,
++ ZXDH_MAX_PDS = 262144,
++};
++
++enum zxdh_addressing_type {
++ ZXDH_ADDR_TYPE_ZERO_BASED = 0,
++ ZXDH_ADDR_TYPE_VA_BASED = 1,
++};
++
++enum zxdh_flush_opcode {
++ FLUSH_INVALID = 0,
++ FLUSH_GENERAL_ERR,
++ FLUSH_PROT_ERR,
++ FLUSH_REM_ACCESS_ERR,
++ FLUSH_LOC_QP_OP_ERR,
++ FLUSH_REM_OP_ERR,
++ FLUSH_LOC_LEN_ERR,
++ FLUSH_FATAL_ERR,
++ FLUSH_RETRY_EXC_ERR,
++ FLUSH_MW_BIND_ERR,
++ FLUSH_REM_INV_REQ_ERR,
++};
++
++enum zxdh_cmpl_status {
++ ZXDH_COMPL_STATUS_SUCCESS = 0,
++ ZXDH_COMPL_STATUS_FLUSHED,
++ ZXDH_COMPL_STATUS_INVALID_WQE,
++ ZXDH_COMPL_STATUS_QP_CATASTROPHIC,
++ ZXDH_COMPL_STATUS_REMOTE_TERMINATION,
++ ZXDH_COMPL_STATUS_INVALID_STAG,
++ ZXDH_COMPL_STATUS_BASE_BOUND_VIOLATION,
++ ZXDH_COMPL_STATUS_ACCESS_VIOLATION,
++ ZXDH_COMPL_STATUS_INVALID_PD_ID,
++ ZXDH_COMPL_STATUS_WRAP_ERROR,
++ ZXDH_COMPL_STATUS_STAG_INVALID_PDID,
++ ZXDH_COMPL_STATUS_RDMA_READ_ZERO_ORD,
++ ZXDH_COMPL_STATUS_QP_NOT_PRIVLEDGED,
++ ZXDH_COMPL_STATUS_STAG_NOT_INVALID,
++ ZXDH_COMPL_STATUS_INVALID_PHYS_BUF_SIZE,
++ ZXDH_COMPL_STATUS_INVALID_PHYS_BUF_ENTRY,
++ ZXDH_COMPL_STATUS_INVALID_FBO,
++ ZXDH_COMPL_STATUS_INVALID_LEN,
++ ZXDH_COMPL_STATUS_INVALID_ACCESS,
++ ZXDH_COMPL_STATUS_PHYS_BUF_LIST_TOO_LONG,
++ ZXDH_COMPL_STATUS_INVALID_VIRT_ADDRESS,
++ ZXDH_COMPL_STATUS_INVALID_REGION,
++ ZXDH_COMPL_STATUS_INVALID_WINDOW,
++ ZXDH_COMPL_STATUS_INVALID_TOTAL_LEN,
++ ZXDH_COMPL_STATUS_RETRY_ACK_ERR,
++ ZXDH_COMPL_STATUS_TX_WINDOW_QUERY_ITEM_ERR,
++ ZXDH_COMPL_STATUS_UNKNOWN,
++};
++
++enum zxdh_cmpl_notify {
++ ZXDH_CQ_COMPL_EVENT = 0,
++ ZXDH_CQ_COMPL_SOLICITED = 1,
++};
++
++enum zxdh_qp_caps {
++ ZXDH_WRITE_WITH_IMM = 1,
++ ZXDH_SEND_WITH_IMM = 2,
++ ZXDH_ROCE = 4,
++ ZXDH_PUSH_MODE = 8,
++};
++
++enum zxdh_page_size {
++ ZXDH_PAGE_SIZE_4K = 0,
++ ZXDH_PAGE_SIZE_2M = 9,
++ ZXDH_PAGE_SIZE_1G = 18,
++};
++
++struct zxdh_qp_uk;
++struct zxdh_cq_uk;
++struct zxdh_qp_uk_init_info;
++struct zxdh_cq_uk_init_info;
++
++struct zxdh_sge {
++ zxdh_tagged_offset tag_off;
++ __u32 len;
++ zxdh_stag stag;
++};
++
++struct zxdh_ring {
++ __u32 head;
++ __u32 tail;
++ __u32 size;
++};
++
++struct zxdh_cqe {
++ __le64 buf[ZXDH_CQE_SIZE];
++};
++
++struct zxdh_extended_cqe {
++ __le64 buf[ZXDH_EXTENDED_CQE_SIZE];
++};
++
++struct zxdh_post_send {
++ zxdh_sgl sg_list;
++ __u32 num_sges;
++ __u32 qkey;
++ __u32 dest_qp;
++ __u32 ah_id;
++};
++
++struct zxdh_inline_rdma_send {
++ void *data;
++ __u32 len;
++ __u32 qkey;
++ __u32 dest_qp;
++ __u32 ah_id;
++};
++
++struct zxdh_post_rq_info {
++ __u64 wr_id;
++ zxdh_sgl sg_list;
++ __u32 num_sges;
++};
++
++struct zxdh_rdma_write {
++ zxdh_sgl lo_sg_list;
++ __u32 num_lo_sges;
++ struct zxdh_sge rem_addr;
++};
++
++struct zxdh_inline_rdma_write {
++ void *data;
++ __u32 len;
++ struct zxdh_sge rem_addr;
++};
++
++struct zxdh_rdma_read {
++ zxdh_sgl lo_sg_list;
++ __u32 num_lo_sges;
++ struct zxdh_sge rem_addr;
++};
++
++struct zxdh_bind_window {
++ zxdh_stag mr_stag;
++ __u64 bind_len;
++ void *va;
++ enum zxdh_addressing_type addressing_type;
++ __u8 ena_reads : 1;
++ __u8 ena_writes : 1;
++ zxdh_stag mw_stag;
++ __u8 mem_window_type_1 : 1;
++ __u8 host_page_size;
++ __u8 leaf_pbl_size;
++ __u16 root_leaf_offset;
++ __u64 mw_pa_pble_index;
++};
++
++struct zxdh_inv_local_stag {
++ zxdh_stag target_stag;
++};
++
++struct zxdh_post_sq_info {
++ __u64 wr_id;
++ __u8 op_type;
++ __u8 l4len;
++ __u8 signaled : 1;
++ __u8 read_fence : 1;
++ __u8 local_fence : 1;
++ __u8 inline_data : 1;
++ __u8 imm_data_valid : 1;
++ __u8 push_wqe : 1;
++ __u8 report_rtt : 1;
++ __u8 udp_hdr : 1;
++ __u8 defer_flag : 1;
++ __u8 solicited : 1;
++ __u32 imm_data;
++ __u32 stag_to_inv;
++ union {
++ struct zxdh_post_send send;
++ struct zxdh_rdma_write rdma_write;
++ struct zxdh_rdma_read rdma_read;
++ struct zxdh_bind_window bind_window;
++ struct zxdh_inv_local_stag inv_local_stag;
++ struct zxdh_inline_rdma_write inline_rdma_write;
++ struct zxdh_inline_rdma_send inline_rdma_send;
++ } op;
++};
++
++struct zxdh_cq_poll_info {
++ __u64 wr_id;
++ zxdh_qp_handle qp_handle;
++ __u32 bytes_xfered;
++ __u32 tcp_seq_num_rtt;
++ __u32 qp_id;
++ __u32 ud_src_qpn;
++ __u32 imm_data;
++ zxdh_stag inv_stag; /* or L_R_Key */
++ enum zxdh_cmpl_status comp_status;
++ __u16 major_err;
++ __u16 minor_err;
++ __u8 op_type;
++ __u8 stag_invalid_set : 1; /* or L_R_Key set */
++ __u8 push_dropped : 1;
++ __u8 error : 1;
++ __u8 solicited_event : 1;
++ __u8 ipv4 : 1;
++ __u8 imm_valid : 1;
++};
++
++enum zxdh_status_code zxdh_uk_inline_rdma_write(struct zxdh_qp_uk *qp,
++ struct zxdh_post_sq_info *info,
++ bool post_sq);
++enum zxdh_status_code zxdh_uk_rc_inline_send(struct zxdh_qp_uk *qp,
++ struct zxdh_post_sq_info *info,
++ bool post_sq);
++enum zxdh_status_code zxdh_uk_ud_inline_send(struct zxdh_qp_uk *qp,
++ struct zxdh_post_sq_info *info,
++ bool post_sq);
++enum zxdh_status_code zxdh_uk_mw_bind(struct zxdh_qp_uk *qp,
++ struct zxdh_post_sq_info *info,
++ bool post_sq);
++enum zxdh_status_code zxdh_uk_post_nop(struct zxdh_qp_uk *qp, __u64 wr_id,
++ bool signaled, bool post_sq);
++enum zxdh_status_code zxdh_uk_post_receive(struct zxdh_qp_uk *qp,
++ struct zxdh_post_rq_info *info);
++void zxdh_uk_qp_post_wr(struct zxdh_qp_uk *qp);
++void zxdh_uk_qp_set_shadow_area(struct zxdh_qp_uk *qp);
++enum zxdh_status_code zxdh_uk_rdma_read(struct zxdh_qp_uk *qp,
++ struct zxdh_post_sq_info *info,
++ bool inv_stag, bool post_sq);
++enum zxdh_status_code zxdh_uk_rdma_write(struct zxdh_qp_uk *qp,
++ struct zxdh_post_sq_info *info,
++ bool post_sq);
++enum zxdh_status_code zxdh_uk_rc_send(struct zxdh_qp_uk *qp,
++ struct zxdh_post_sq_info *info,
++ bool post_sq);
++enum zxdh_status_code zxdh_uk_ud_send(struct zxdh_qp_uk *qp,
++ struct zxdh_post_sq_info *info,
++ bool post_sq);
++enum zxdh_status_code
++zxdh_uk_stag_local_invalidate(struct zxdh_qp_uk *qp,
++ struct zxdh_post_sq_info *info, bool post_sq);
++
++struct zxdh_wqe_uk_ops {
++ void (*iw_copy_inline_data)(__u8 *dest, __u8 *src, __u32 len,
++ __u8 polarity, bool imm_data_flag);
++ __u16 (*iw_inline_data_size_to_quanta)(__u32 data_size,
++ bool imm_data_flag);
++ void (*iw_set_fragment)(__le64 *wqe, __u32 offset, struct zxdh_sge *sge,
++ __u8 valid);
++ void (*iw_set_mw_bind_wqe)(__le64 *wqe,
++ struct zxdh_bind_window *op_info);
++};
++
++__le64 *get_current_cqe(struct zxdh_cq_uk *cq);
++enum zxdh_status_code zxdh_uk_cq_poll_cmpl(struct zxdh_cq_uk *cq,
++ struct zxdh_cq_poll_info *info);
++void zxdh_uk_cq_request_notification(struct zxdh_cq_uk *cq,
++ enum zxdh_cmpl_notify cq_notify);
++void zxdh_uk_cq_resize(struct zxdh_cq_uk *cq, void *cq_base, int size);
++void zxdh_uk_cq_set_resized_cnt(struct zxdh_cq_uk *qp, __u16 cnt);
++enum zxdh_status_code zxdh_uk_cq_init(struct zxdh_cq_uk *cq,
++ struct zxdh_cq_uk_init_info *info);
++enum zxdh_status_code zxdh_uk_qp_init(struct zxdh_qp_uk *qp,
++ struct zxdh_qp_uk_init_info *info);
++struct zxdh_sq_uk_wr_trk_info {
++ __u64 wrid;
++ __u32 wr_len;
++ __u16 quanta;
++ __u8 reserved[2];
++};
++
++struct zxdh_qp_sq_quanta {
++ __le64 elem[ZXDH_SQE_SIZE];
++};
++
++struct zxdh_qp_rq_quanta {
++ __le64 elem[ZXDH_RQE_SIZE];
++};
++
++struct zxdh_qp_uk {
++ struct zxdh_qp_sq_quanta *sq_base;
++ struct zxdh_qp_rq_quanta *rq_base;
++ struct zxdh_uk_attrs *uk_attrs;
++ __u32 *wqe_alloc_db;
++ struct zxdh_sq_uk_wr_trk_info *sq_wrtrk_array;
++ __u64 *rq_wrid_array;
++ __le64 *shadow_area;
++ __le32 *push_db;
++ __le64 *push_wqe;
++ struct zxdh_ring sq_ring;
++ struct zxdh_ring rq_ring;
++ struct zxdh_ring initial_ring;
++ __u32 qp_id;
++ __u32 qp_caps;
++ __u32 sq_size;
++ __u32 rq_size;
++ __u32 max_sq_frag_cnt;
++ __u32 max_rq_frag_cnt;
++ __u32 max_inline_data;
++ struct zxdh_wqe_uk_ops wqe_ops;
++ __u16 conn_wqes;
++ __u8 qp_type;
++ __u8 swqe_polarity;
++ __u8 swqe_polarity_deferred;
++ __u8 rwqe_polarity;
++ __u8 rq_wqe_size;
++ __u8 rq_wqe_size_multiplier;
++ __u8 deferred_flag : 1;
++ __u8 push_mode : 1; /* whether the last post wqe was pushed */
++ __u8 push_dropped : 1;
++ __u8 sq_flush_complete : 1; /* Indicates flush was seen and SQ was empty after the flush */
++ __u8 rq_flush_complete : 1; /* Indicates flush was seen and RQ was empty after the flush */
++ __u8 destroy_pending : 1; /* Indicates the QP is being destroyed */
++ void *back_qp;
++ zxdh_sgl split_sg_list;
++ pthread_spinlock_t *lock;
++ __u16 rwqe_signature;
++ __u8 dbg_rq_flushed;
++ __u8 sq_flush_seen;
++ __u8 rq_flush_seen;
++ __u8 is_srq;
++ __u16 mtu;
++ __u32 next_psn;
++ __u32 cqe_last_ack_qsn;
++ __u32 qp_last_ack_qsn;
++ __u8 cqe_retry_cnt;
++ __u8 qp_reset_cnt;
++};
++
++struct zxdh_cq_uk {
++ struct zxdh_cqe *cq_base;
++ __u32 *cqe_alloc_db;
++ __u32 *cq_ack_db;
++ __le64 *shadow_area;
++ __u32 cq_id;
++ __u32 cq_size;
++ __u32 cqe_rd_cnt;
++ struct zxdh_ring cq_ring;
++ __u8 polarity;
++ __u8 cqe_size;
++};
++
++struct zxdh_srq_uk {
++ struct zxdh_srq_wqe *srq_base;
++ struct zxdh_uk_attrs *uk_attrs;
++ __le16 *srq_list_base;
++ __le64 *srq_db_base;
++ __u32 srq_id;
++ __u32 srq_size;
++ __u32 log2_srq_size;
++ __u32 srq_list_size;
++ struct zxdh_ring srq_ring;
++ struct zxdh_ring srq_list_ring;
++ // u8 srq_polarity;
++ __u8 srq_list_polarity;
++ __u64 *srq_wrid_array;
++ __u8 srq_wqe_size;
++ __u8 srq_wqe_size_multiplier;
++ __u32 srq_caps;
++ __u32 max_srq_frag_cnt;
++ __u32 srq_type;
++ pthread_spinlock_t *lock;
++ __u8 srq_flush_complete : 1; /* Indicates flush was seen and SQ was empty after the flush */
++ __u8 destroy_pending : 1; /* Indicates the QP is being destroyed */
++ __u8 srq_flush_seen;
++};
++
++struct zxdh_qp_uk_init_info {
++ struct zxdh_qp_sq_quanta *sq;
++ struct zxdh_qp_rq_quanta *rq;
++ struct zxdh_uk_attrs *uk_attrs;
++ __u32 *wqe_alloc_db;
++ __le64 *shadow_area;
++ struct zxdh_sq_uk_wr_trk_info *sq_wrtrk_array;
++ __u64 *rq_wrid_array;
++ __u32 qp_id;
++ __u32 qp_caps;
++ __u32 sq_size;
++ __u32 rq_size;
++ __u32 max_sq_frag_cnt;
++ __u32 max_rq_frag_cnt;
++ __u32 max_inline_data;
++ __u8 type;
++ int abi_ver;
++ bool legacy_mode;
++};
++
++struct zxdh_cq_uk_init_info {
++ __u32 *cqe_alloc_db;
++ __u32 *cq_ack_db;
++ struct zxdh_cqe *cq_base;
++ __le64 *shadow_area;
++ __u32 cq_size;
++ __u32 cq_id;
++ __u8 cqe_size;
++};
++
++struct zxdh_srq_uk_init_info {
++ struct zxdh_srq_wqe *srq_base;
++ struct zxdh_uk_attrs *uk_attrs;
++ __le16 *srq_list_base;
++ __le64 *srq_db_base;
++ __u64 *srq_wrid_array;
++ __u32 srq_id;
++ __u32 srq_caps;
++ __u32 srq_size;
++ __u32 log2_srq_size;
++ __u32 srq_list_size;
++ __u32 srq_db_size;
++ __u32 max_srq_frag_cnt;
++ __u32 srq_limit;
++};
++
++struct zxdh_wqe_srq_next_sge {
++ __le16 next_wqe_index;
++ __le16 signature;
++ __u8 valid_sge_num;
++ __u8 rsvd[11];
++};
++
++struct zxdh_srq_sge {
++ __le64 addr;
++ __le32 length;
++ __le32 lkey;
++};
++
++struct zxdh_srq_wqe {
++ __le64 elem[ZXDH_SRQE_SIZE];
++};
++
++__le64 *zxdh_qp_get_next_send_wqe(struct zxdh_qp_uk *qp, __u32 *wqe_idx,
++ __u16 quanta, __u32 total_size,
++ struct zxdh_post_sq_info *info);
++__le64 *zxdh_qp_get_next_recv_wqe(struct zxdh_qp_uk *qp, __u32 *wqe_idx);
++void zxdh_uk_clean_cq(void *q, struct zxdh_cq_uk *cq);
++enum zxdh_status_code zxdh_nop(struct zxdh_qp_uk *qp, __u64 wr_id,
++ bool signaled, bool post_sq);
++enum zxdh_status_code zxdh_fragcnt_to_quanta_sq(__u32 frag_cnt, __u16 *quanta);
++enum zxdh_status_code zxdh_fragcnt_to_wqesize_rq(__u32 frag_cnt,
++ __u16 *wqe_size);
++void zxdh_get_sq_wqe_shift(struct zxdh_uk_attrs *uk_attrs, __u32 sge,
++ __u32 inline_data, __u8 *shift);
++void zxdh_get_rq_wqe_shift(struct zxdh_uk_attrs *uk_attrs, __u32 sge,
++ __u8 *shift);
++enum zxdh_status_code zxdh_get_sqdepth(struct zxdh_uk_attrs *uk_attrs,
++ __u32 sq_size, __u8 shift,
++ __u32 *wqdepth);
++enum zxdh_status_code zxdh_get_rqdepth(struct zxdh_uk_attrs *uk_attrs,
++ __u32 rq_size, __u8 shift,
++ __u32 *wqdepth);
++int zxdh_qp_round_up(__u32 wqdepth);
++int zxdh_cq_round_up(__u32 wqdepth);
++void zxdh_qp_push_wqe(struct zxdh_qp_uk *qp, __le64 *wqe, __u16 quanta,
++ __u32 wqe_idx, bool post_sq);
++void zxdh_clr_wqes(struct zxdh_qp_uk *qp, __u32 qp_wqe_idx);
++
++void zxdh_get_srq_wqe_shift(struct zxdh_uk_attrs *uk_attrs, __u32 sge,
++ __u8 *shift);
++int zxdh_get_srqdepth(__u32 max_hw_srq_quanta, __u32 srq_size, __u8 shift,
++ __u32 *srqdepth);
++__le64 *zxdh_get_srq_wqe(struct zxdh_srq_uk *srq, int wqe_index);
++__le16 *zxdh_get_srq_list_wqe(struct zxdh_srq_uk *srq, __u16 *idx);
++
++enum zxdh_status_code zxdh_uk_srq_init(struct zxdh_srq_uk *srq,
++ struct zxdh_srq_uk_init_info *info);
++void zxdh_free_srq_wqe(struct zxdh_srq_uk *srq, int wqe_index);
++#endif /* ZXDH_USER_H */
+diff --git a/providers/zrdma/uverbs.c b/providers/zrdma/uverbs.c
+new file mode 100644
+index 0000000..edd05bf
+--- /dev/null
++++ b/providers/zrdma/uverbs.c
+@@ -0,0 +1,3209 @@
++// SPDX-License-Identifier: GPL-2.0 or Linux-OpenIB
++/* Copyright (c) 2024 ZTE Corporation. All rights reserved. */
++#include
++#include
++#include
++#include
++#include
++#include
++#include
++#include
++#include
++#include
++#include
++#include
++#include
++#include
++#include
++#include
++
++#include "umain.h"
++#include "abi.h"
++
++uint32_t zxdh_debug_mask;
++
++static const unsigned int zxdh_roce_mtu[] = {
++ [IBV_MTU_256] = 256, [IBV_MTU_512] = 512, [IBV_MTU_1024] = 1024,
++ [IBV_MTU_2048] = 2048, [IBV_MTU_4096] = 4096,
++};
++
++static inline unsigned int mtu_enum_to_int(enum ibv_mtu mtu)
++{
++ return zxdh_roce_mtu[mtu];
++}
++
++static inline void print_fw_ver(uint64_t fw_ver, char *str, size_t len)
++{
++ uint16_t major, minor, sub_minor, sub_major;
++
++ major = (fw_ver >> 48) & 0xffff;
++ sub_major = (fw_ver >> 32) & 0xffff;
++ minor = (fw_ver >> 16) & 0xffff;
++ sub_minor = fw_ver & 0xffff;
++ snprintf(str, len, "%d.%02d.%02d.%02d", major, sub_major, minor,
++ sub_minor);
++}
++
++/**
++ * zxdh_get_inline_data - get inline_multi_sge data
++ * @inline_data: uint8_t*
++ * @ib_wr: work request ptr
++ * @len: sge total length
++ */
++static int zxdh_get_inline_data(uint8_t *inline_data, struct ibv_send_wr *ib_wr,
++ __u32 *len)
++{
++ int num = 0;
++ int offset = 0;
++
++ while (num < ib_wr->num_sge) {
++ *len += ib_wr->sg_list[num].length;
++ if (*len > ZXDH_MAX_INLINE_DATA_SIZE) {
++ printf("err:inline bytes over max inline length\n");
++ return -EINVAL;
++ }
++ memcpy(inline_data + offset,
++ (void *)(uintptr_t)ib_wr->sg_list[num].addr,
++ ib_wr->sg_list[num].length);
++ offset += ib_wr->sg_list[num].length;
++ num++;
++ }
++ return 0;
++}
++
++/**
++ * zxdh_uquery_device_ex - query device attributes including extended properties
++ * @context: user context for the device
++ * @input: extensible input struct for ibv_query_device_ex verb
++ * @attr: extended device attribute struct
++ * @attr_size: size of extended device attribute struct
++ **/
++int zxdh_uquery_device_ex(struct ibv_context *context,
++ const struct ibv_query_device_ex_input *input,
++ struct ibv_device_attr_ex *attr, size_t attr_size)
++{
++ struct ib_uverbs_ex_query_device_resp resp = {};
++ size_t resp_size = sizeof(resp);
++ int ret;
++
++ ret = ibv_cmd_query_device_any(context, input, attr, attr_size, &resp,
++ &resp_size);
++ if (ret)
++ return ret;
++
++ print_fw_ver(resp.base.fw_ver, attr->orig_attr.fw_ver,
++ sizeof(attr->orig_attr.fw_ver));
++
++ return 0;
++}
++
++/**
++ * zxdh_uquery_port - get port attributes (msg size, lnk, mtu...)
++ * @context: user context of the device
++ * @port: port for the attributes
++ * @attr: to return port attributes
++ **/
++int zxdh_uquery_port(struct ibv_context *context, uint8_t port,
++ struct ibv_port_attr *attr)
++{
++ struct ibv_query_port cmd;
++
++ return ibv_cmd_query_port(context, port, attr, &cmd, sizeof(cmd));
++}
++
++/**
++ * zxdh_ualloc_pd - allocates protection domain and return pd ptr
++ * @context: user context of the device
++ **/
++struct ibv_pd *zxdh_ualloc_pd(struct ibv_context *context)
++{
++ struct ibv_alloc_pd cmd;
++ struct zxdh_ualloc_pd_resp resp = {};
++ struct zxdh_upd *iwupd;
++ int err;
++
++ iwupd = malloc(sizeof(*iwupd));
++ if (!iwupd)
++ return NULL;
++
++ err = ibv_cmd_alloc_pd(context, &iwupd->ibv_pd, &cmd, sizeof(cmd),
++ &resp.ibv_resp, sizeof(resp));
++ if (err)
++ goto err_free;
++
++ iwupd->pd_id = resp.pd_id;
++
++ return &iwupd->ibv_pd;
++
++err_free:
++ free(iwupd);
++ errno = err;
++ return NULL;
++}
++
++/**
++ * zxdh_ufree_pd - free pd resources
++ * @pd: pd to free resources
++ */
++int zxdh_ufree_pd(struct ibv_pd *pd)
++{
++ struct zxdh_upd *iwupd;
++ int ret;
++
++ iwupd = container_of(pd, struct zxdh_upd, ibv_pd);
++ ret = ibv_cmd_dealloc_pd(pd);
++ if (ret)
++ return ret;
++
++ free(iwupd);
++
++ return 0;
++}
++
++/**
++ * zxdh_ureg_mr - register user memory region
++ * @pd: pd for the mr
++ * @addr: user address of the memory region
++ * @length: length of the memory
++ * @hca_va: hca_va
++ * @access: access allowed on this mr
++ */
++struct ibv_mr *zxdh_ureg_mr(struct ibv_pd *pd, void *addr, size_t length,
++ uint64_t hca_va, int access)
++{
++ struct zxdh_umr *umr;
++ struct zxdh_ureg_mr cmd;
++ struct zxdh_ureg_mr_resp resp = {};
++ int err;
++
++ umr = malloc(sizeof(*umr));
++ if (!umr)
++ return NULL;
++
++ cmd.reg_type = ZXDH_MEMREG_TYPE_MEM;
++ err = ibv_cmd_reg_mr(pd, addr, length, hca_va, access, &umr->vmr,
++ &cmd.ibv_cmd, sizeof(cmd), &resp.ibv_resp,
++ sizeof(resp));
++ if (err) {
++ free(umr);
++ errno = err;
++ return NULL;
++ }
++ umr->acc_flags = access;
++ umr->host_page_size = resp.host_page_size;
++ umr->leaf_pbl_size = resp.leaf_pbl_size;
++ umr->mr_pa_pble_index = resp.mr_pa_hig;
++ umr->mr_pa_pble_index = (umr->mr_pa_pble_index << 32) | resp.mr_pa_low;
++
++ return &umr->vmr.ibv_mr;
++}
++
++/*
++ * zxdh_urereg_mr - re-register memory region
++ * @vmr: mr that was allocated
++ * @flags: bit mask to indicate which of the attr's of MR modified
++ * @pd: pd of the mr
++ * @addr: user address of the memory region
++ * @length: length of the memory
++ * @access: access allowed on this mr
++ */
++int zxdh_urereg_mr(struct verbs_mr *vmr, int flags, struct ibv_pd *pd,
++ void *addr, size_t length, int access)
++{
++ struct zxdh_urereg_mr cmd = {};
++ struct ib_uverbs_rereg_mr_resp resp;
++
++ cmd.reg_type = ZXDH_MEMREG_TYPE_MEM;
++ return ibv_cmd_rereg_mr(vmr, flags, addr, length, (uintptr_t)addr,
++ access, pd, &cmd.ibv_cmd, sizeof(cmd), &resp,
++ sizeof(resp));
++}
++
++/**
++ * zxdh_udereg_mr - re-register memory region
++ * @vmr: mr that was allocated
++ */
++int zxdh_udereg_mr(struct verbs_mr *vmr)
++{
++ int ret;
++
++ ret = ibv_cmd_dereg_mr(vmr);
++ if (ret)
++ return ret;
++
++ free(vmr);
++
++ return 0;
++}
++
++/**
++ * zxdh_ualloc_mw - allocate memory window
++ * @pd: protection domain
++ * @type: memory window type
++ */
++struct ibv_mw *zxdh_ualloc_mw(struct ibv_pd *pd, enum ibv_mw_type type)
++{
++ struct ibv_mw *mw;
++ struct ibv_alloc_mw cmd;
++ struct ib_uverbs_alloc_mw_resp resp;
++
++ mw = calloc(1, sizeof(*mw));
++ if (!mw)
++ return NULL;
++
++ if (ibv_cmd_alloc_mw(pd, type, mw, &cmd, sizeof(cmd), &resp,
++ sizeof(resp))) {
++ free(mw);
++ return NULL;
++ }
++
++ return mw;
++}
++
++/**
++ * zxdh_ubind_mw - bind a memory window
++ * @qp: qp to post WR
++ * @mw: memory window to bind
++ * @mw_bind: bind info
++ */
++int zxdh_ubind_mw(struct ibv_qp *qp, struct ibv_mw *mw,
++ struct ibv_mw_bind *mw_bind)
++{
++ struct ibv_mw_bind_info *bind_info = &mw_bind->bind_info;
++ struct verbs_mr *vmr = verbs_get_mr(bind_info->mr);
++ struct zxdh_umr *umr = container_of(vmr, struct zxdh_umr, vmr);
++ struct ibv_send_wr wr = {};
++ struct ibv_send_wr *bad_wr;
++ int err;
++
++ if (vmr->mr_type != IBV_MR_TYPE_MR)
++ return -ENOTSUP;
++
++ if (umr->acc_flags & IBV_ACCESS_ZERO_BASED)
++ return -EINVAL;
++
++ if (mw->type != IBV_MW_TYPE_1)
++ return -EINVAL;
++
++ wr.opcode = IBV_WR_BIND_MW;
++ wr.bind_mw.bind_info = mw_bind->bind_info;
++ wr.bind_mw.mw = mw;
++ wr.bind_mw.rkey = ibv_inc_rkey(mw->rkey);
++
++ wr.wr_id = mw_bind->wr_id;
++ wr.send_flags = mw_bind->send_flags;
++
++ err = zxdh_upost_send(qp, &wr, &bad_wr);
++ if (!err)
++ mw->rkey = wr.bind_mw.rkey;
++
++ return err;
++}
++
++/**
++ * zxdh_udealloc_mw - deallocate memory window
++ * @mw: memory window to dealloc
++ */
++int zxdh_udealloc_mw(struct ibv_mw *mw)
++{
++ int ret;
++
++ ret = ibv_cmd_dealloc_mw(mw);
++ if (ret)
++ return ret;
++ free(mw);
++
++ return 0;
++}
++
++static void *zxdh_alloc_hw_buf(size_t size)
++{
++ void *buf;
++
++ buf = memalign(ZXDH_HW_PAGE_SIZE, size);
++
++ if (!buf)
++ return NULL;
++ if (ibv_dontfork_range(buf, size)) {
++ free(buf);
++ return NULL;
++ }
++
++ return buf;
++}
++
++static void zxdh_free_hw_buf(void *buf, size_t size)
++{
++ ibv_dofork_range(buf, size);
++ free(buf);
++}
++
++/**
++ * get_cq_size - returns actual cqe needed by HW
++ * @ncqe: minimum cqes requested by application
++ */
++static inline int get_cq_size(int ncqe)
++{
++ ncqe++;
++
++ /* Completions with immediate require 1 extra entry */
++ if (ncqe < ZXDH_U_MINCQ_SIZE)
++ ncqe = ZXDH_U_MINCQ_SIZE;
++
++ return ncqe;
++}
++
++static inline size_t get_cq_total_bytes(__u32 cq_size)
++{
++ return roundup(cq_size * sizeof(struct zxdh_cqe), ZXDH_HW_PAGE_SIZE);
++}
++
++/**
++ * ucreate_cq - zxdh util function to create a CQ
++ * @context: ibv context
++ * @attr_ex: CQ init attributes
++ * @ext_cq: flag to create an extendable or normal CQ
++ */
++static struct ibv_cq_ex *ucreate_cq(struct ibv_context *context,
++ struct ibv_cq_init_attr_ex *attr_ex,
++ bool ext_cq)
++{
++ struct zxdh_cq_uk_init_info info = {};
++ struct zxdh_ureg_mr reg_mr_cmd = {};
++ struct zxdh_ucreate_cq_ex cmd = {};
++ struct zxdh_ucreate_cq_ex_resp resp = {};
++ struct ib_uverbs_reg_mr_resp reg_mr_resp = {};
++ struct zxdh_ureg_mr reg_mr_shadow_cmd = {};
++ struct ib_uverbs_reg_mr_resp reg_mr_shadow_resp = {};
++ struct zxdh_uk_attrs *uk_attrs;
++ struct zxdh_uvcontext *iwvctx;
++ struct zxdh_ucq *iwucq;
++ size_t total_size;
++ __u32 cq_pages;
++ int ret, ncqe;
++ __u8 hw_rev;
++
++ iwvctx = container_of(context, struct zxdh_uvcontext, ibv_ctx.context);
++ uk_attrs = &iwvctx->uk_attrs;
++ hw_rev = uk_attrs->hw_rev;
++
++ if (attr_ex->cqe < ZXDH_MIN_CQ_SIZE ||
++ attr_ex->cqe > uk_attrs->max_hw_cq_size) {
++ errno = EINVAL;
++ return NULL;
++ }
++
++ /* save the cqe requested by application */
++ ncqe = attr_ex->cqe;
++ iwucq = calloc(1, sizeof(*iwucq));
++ if (!iwucq)
++ return NULL;
++
++ ret = pthread_spin_init(&iwucq->lock, PTHREAD_PROCESS_PRIVATE);
++ if (ret) {
++ errno = ret;
++ free(iwucq);
++ return NULL;
++ }
++
++ iwucq->resize_enable = false;
++ info.cq_size = get_cq_size(attr_ex->cqe);
++ info.cq_size = zxdh_cq_round_up(info.cq_size);
++ iwucq->comp_vector = attr_ex->comp_vector;
++ list_head_init(&iwucq->resize_list);
++ total_size = get_cq_total_bytes(info.cq_size);
++ cq_pages = total_size >> ZXDH_HW_PAGE_SHIFT;
++
++ if (!(uk_attrs->feature_flags & ZXDH_FEATURE_CQ_RESIZE))
++ total_size = (cq_pages << ZXDH_HW_PAGE_SHIFT) +
++ ZXDH_DB_SHADOW_AREA_SIZE;
++
++ iwucq->buf_size = total_size;
++ info.cq_base = zxdh_alloc_hw_buf(total_size);
++ if (!info.cq_base)
++ goto err_cq_base;
++
++ memset(info.cq_base, 0, total_size);
++ reg_mr_cmd.reg_type = ZXDH_MEMREG_TYPE_CQ;
++ reg_mr_cmd.cq_pages = cq_pages;
++
++ ret = ibv_cmd_reg_mr(&iwvctx->iwupd->ibv_pd, info.cq_base, total_size,
++ (uintptr_t)info.cq_base, IBV_ACCESS_LOCAL_WRITE,
++ &iwucq->vmr, ®_mr_cmd.ibv_cmd,
++ sizeof(reg_mr_cmd), ®_mr_resp,
++ sizeof(reg_mr_resp));
++ if (ret) {
++ errno = ret;
++ goto err_dereg_mr;
++ }
++
++ iwucq->vmr.ibv_mr.pd = &iwvctx->iwupd->ibv_pd;
++
++ if (uk_attrs->feature_flags & ZXDH_FEATURE_CQ_RESIZE) {
++ info.shadow_area = zxdh_alloc_hw_buf(ZXDH_DB_SHADOW_AREA_SIZE);
++ if (!info.shadow_area)
++ goto err_dereg_mr;
++
++ memset(info.shadow_area, 0, ZXDH_DB_SHADOW_AREA_SIZE);
++ reg_mr_shadow_cmd.reg_type = ZXDH_MEMREG_TYPE_CQ;
++ reg_mr_shadow_cmd.cq_pages = 1;
++
++ ret = ibv_cmd_reg_mr(
++ &iwvctx->iwupd->ibv_pd, info.shadow_area,
++ ZXDH_DB_SHADOW_AREA_SIZE, (uintptr_t)info.shadow_area,
++ IBV_ACCESS_LOCAL_WRITE, &iwucq->vmr_shadow_area,
++ ®_mr_shadow_cmd.ibv_cmd, sizeof(reg_mr_shadow_cmd),
++ ®_mr_shadow_resp, sizeof(reg_mr_shadow_resp));
++ if (ret) {
++ errno = ret;
++ goto err_dereg_shadow;
++ }
++
++ iwucq->vmr_shadow_area.ibv_mr.pd = &iwvctx->iwupd->ibv_pd;
++
++ } else {
++ info.shadow_area = (__le64 *)((__u8 *)info.cq_base +
++ (cq_pages << ZXDH_HW_PAGE_SHIFT));
++ }
++
++ attr_ex->cqe = info.cq_size;
++ cmd.user_cq_buf = (__u64)((uintptr_t)info.cq_base);
++ cmd.user_shadow_area = (__u64)((uintptr_t)info.shadow_area);
++
++ ret = ibv_cmd_create_cq_ex(context, attr_ex, &iwucq->verbs_cq,
++ &cmd.ibv_cmd, sizeof(cmd), &resp.ibv_resp,
++ sizeof(resp), 0);
++ if (ret) {
++ errno = ret;
++ goto err_dereg_shadow;
++ }
++
++ if (ext_cq)
++ zxdh_ibvcq_ex_fill_priv_funcs(iwucq, attr_ex);
++ info.cq_id = resp.cq_id;
++ /* Do not report the cqe's burned by HW */
++ iwucq->verbs_cq.cq.cqe = ncqe;
++
++ info.cqe_alloc_db =
++ (__u32 *)((__u8 *)iwvctx->cq_db + ZXDH_DB_CQ_OFFSET);
++ zxdh_uk_cq_init(&iwucq->cq, &info);
++
++ return &iwucq->verbs_cq.cq_ex;
++
++err_dereg_shadow:
++ ibv_cmd_dereg_mr(&iwucq->vmr);
++ if (iwucq->vmr_shadow_area.ibv_mr.handle) {
++ ibv_cmd_dereg_mr(&iwucq->vmr_shadow_area);
++ zxdh_free_hw_buf(info.shadow_area, ZXDH_DB_SHADOW_AREA_SIZE);
++ }
++err_dereg_mr:
++ zxdh_free_hw_buf(info.cq_base, total_size);
++err_cq_base:
++ pthread_spin_destroy(&iwucq->lock);
++
++ free(iwucq);
++
++ return NULL;
++}
++
++struct ibv_cq *zxdh_ucreate_cq(struct ibv_context *context, int cqe,
++ struct ibv_comp_channel *channel,
++ int comp_vector)
++{
++ struct ibv_cq_init_attr_ex attr_ex = {
++ .cqe = cqe,
++ .channel = channel,
++ .comp_vector = comp_vector,
++ };
++ struct ibv_cq_ex *ibvcq_ex;
++
++ ibvcq_ex = ucreate_cq(context, &attr_ex, false);
++
++ return ibvcq_ex ? ibv_cq_ex_to_cq(ibvcq_ex) : NULL;
++}
++
++struct ibv_cq_ex *zxdh_ucreate_cq_ex(struct ibv_context *context,
++ struct ibv_cq_init_attr_ex *attr_ex)
++{
++ if (attr_ex->wc_flags & ~ZXDH_CQ_SUPPORTED_WC_FLAGS) {
++ errno = EOPNOTSUPP;
++ return NULL;
++ }
++
++ return ucreate_cq(context, attr_ex, true);
++}
++
++/**
++ * zxdh_free_cq_buf - free memory for cq buffer
++ * @cq_buf: cq buf to free
++ */
++static void zxdh_free_cq_buf(struct zxdh_cq_buf *cq_buf)
++{
++ ibv_cmd_dereg_mr(&cq_buf->vmr);
++ zxdh_free_hw_buf(cq_buf->cq.cq_base,
++ get_cq_total_bytes(cq_buf->cq.cq_size));
++ free(cq_buf);
++}
++
++/**
++ * zxdh_process_resize_list - process the cq list to remove buffers
++ * @iwucq: cq which owns the list
++ * @lcqe_buf: cq buf where the last cqe is found
++ */
++static int zxdh_process_resize_list(struct zxdh_ucq *iwucq,
++ struct zxdh_cq_buf *lcqe_buf)
++{
++ struct zxdh_cq_buf *cq_buf, *next;
++ int cq_cnt = 0;
++
++ list_for_each_safe(&iwucq->resize_list, cq_buf, next, list) {
++ if (cq_buf == lcqe_buf)
++ return cq_cnt;
++
++ list_del(&cq_buf->list);
++ zxdh_free_cq_buf(cq_buf);
++ cq_cnt++;
++ }
++
++ return cq_cnt;
++}
++
++/**
++ * zxdh_udestroy_cq - destroys cq
++ * @cq: ptr to cq to be destroyed
++ */
++int zxdh_udestroy_cq(struct ibv_cq *cq)
++{
++ struct zxdh_uk_attrs *uk_attrs;
++ struct zxdh_uvcontext *iwvctx;
++ struct zxdh_ucq *iwucq;
++ __u64 cq_shadow_temp;
++ int ret;
++
++ iwucq = container_of(cq, struct zxdh_ucq, verbs_cq.cq);
++ iwvctx = container_of(cq->context, struct zxdh_uvcontext,
++ ibv_ctx.context);
++ uk_attrs = &iwvctx->uk_attrs;
++
++ ret = pthread_spin_destroy(&iwucq->lock);
++ if (ret)
++ goto err;
++
++ get_64bit_val(iwucq->cq.shadow_area, 0, &cq_shadow_temp);
++
++ zxdh_process_resize_list(iwucq, NULL);
++ ret = ibv_cmd_destroy_cq(cq);
++ if (ret)
++ goto err;
++
++ ibv_cmd_dereg_mr(&iwucq->vmr);
++ zxdh_free_hw_buf(iwucq->cq.cq_base, iwucq->buf_size);
++
++ if (uk_attrs->feature_flags & ZXDH_FEATURE_CQ_RESIZE) {
++ ibv_cmd_dereg_mr(&iwucq->vmr_shadow_area);
++ zxdh_free_hw_buf(iwucq->cq.shadow_area,
++ ZXDH_DB_SHADOW_AREA_SIZE);
++ }
++ free(iwucq);
++ return 0;
++
++err:
++ return ret;
++}
++
++int zxdh_umodify_cq(struct ibv_cq *cq, struct ibv_modify_cq_attr *attr)
++{
++ struct ibv_modify_cq cmd = {};
++
++ return ibv_cmd_modify_cq(cq, attr, &cmd, sizeof(cmd));
++}
++
++static enum ibv_wc_status
++zxdh_flush_err_to_ib_wc_status(enum zxdh_flush_opcode opcode)
++{
++ switch (opcode) {
++ case FLUSH_PROT_ERR:
++ return IBV_WC_LOC_PROT_ERR;
++ case FLUSH_REM_ACCESS_ERR:
++ return IBV_WC_REM_ACCESS_ERR;
++ case FLUSH_LOC_QP_OP_ERR:
++ return IBV_WC_LOC_QP_OP_ERR;
++ case FLUSH_REM_OP_ERR:
++ return IBV_WC_REM_OP_ERR;
++ case FLUSH_LOC_LEN_ERR:
++ return IBV_WC_LOC_LEN_ERR;
++ case FLUSH_GENERAL_ERR:
++ return IBV_WC_WR_FLUSH_ERR;
++ case FLUSH_RETRY_EXC_ERR:
++ return IBV_WC_RETRY_EXC_ERR;
++ case FLUSH_MW_BIND_ERR:
++ return IBV_WC_MW_BIND_ERR;
++ case FLUSH_REM_INV_REQ_ERR:
++ return IBV_WC_REM_INV_REQ_ERR;
++ case FLUSH_FATAL_ERR:
++ default:
++ return IBV_WC_FATAL_ERR;
++ }
++}
++
++/**
++ * zxdh_process_cqe_ext - process current cqe for extended CQ
++ * @cur_cqe - current cqe info
++ */
++static inline void zxdh_process_cqe_ext(struct zxdh_cq_poll_info *cur_cqe)
++{
++ struct zxdh_ucq *iwucq =
++ container_of(cur_cqe, struct zxdh_ucq, cur_cqe);
++ struct ibv_cq_ex *ibvcq_ex = &iwucq->verbs_cq.cq_ex;
++
++ ibvcq_ex->wr_id = cur_cqe->wr_id;
++ if (cur_cqe->error)
++ ibvcq_ex->status =
++ (cur_cqe->comp_status == ZXDH_COMPL_STATUS_FLUSHED) ?
++ zxdh_flush_err_to_ib_wc_status(
++ cur_cqe->minor_err) :
++ IBV_WC_GENERAL_ERR;
++ else
++ ibvcq_ex->status = IBV_WC_SUCCESS;
++}
++
++/**
++ * zxdh_process_cqe - process current cqe info
++ * @entry - ibv_wc object to fill in for non-extended CQ
++ * @cur_cqe - current cqe info
++ */
++static inline void zxdh_process_cqe(struct ibv_wc *entry,
++ struct zxdh_cq_poll_info *cur_cqe)
++{
++ struct zxdh_qp_uk *qp;
++ struct ibv_qp *ib_qp;
++
++ entry->wc_flags = 0;
++ entry->wr_id = cur_cqe->wr_id;
++ entry->qp_num = cur_cqe->qp_id;
++ qp = cur_cqe->qp_handle;
++ ib_qp = qp->back_qp;
++
++ if (cur_cqe->error) {
++ entry->status =
++ (cur_cqe->comp_status == ZXDH_COMPL_STATUS_FLUSHED) ?
++ zxdh_flush_err_to_ib_wc_status(
++ cur_cqe->minor_err) :
++ IBV_WC_GENERAL_ERR;
++ entry->vendor_err =
++ cur_cqe->major_err << 16 | cur_cqe->minor_err;
++ } else {
++ entry->status = IBV_WC_SUCCESS;
++ }
++
++ if (cur_cqe->imm_valid) {
++ entry->imm_data = htonl(cur_cqe->imm_data);
++ entry->wc_flags |= IBV_WC_WITH_IMM;
++ }
++
++ switch (cur_cqe->op_type) {
++ case ZXDH_OP_TYPE_SEND:
++ case ZXDH_OP_TYPE_SEND_WITH_IMM:
++ case ZXDH_OP_TYPE_SEND_INV:
++ case ZXDH_OP_TYPE_UD_SEND:
++ case ZXDH_OP_TYPE_UD_SEND_WITH_IMM:
++ entry->opcode = IBV_WC_SEND;
++ break;
++ case ZXDH_OP_TYPE_WRITE:
++ case ZXDH_OP_TYPE_WRITE_WITH_IMM:
++ entry->opcode = IBV_WC_RDMA_WRITE;
++ break;
++ case ZXDH_OP_TYPE_READ:
++ entry->opcode = IBV_WC_RDMA_READ;
++ break;
++ case ZXDH_OP_TYPE_BIND_MW:
++ entry->opcode = IBV_WC_BIND_MW;
++ break;
++ case ZXDH_OP_TYPE_LOCAL_INV:
++ entry->opcode = IBV_WC_LOCAL_INV;
++ break;
++ case ZXDH_OP_TYPE_REC:
++ entry->opcode = IBV_WC_RECV;
++ if (ib_qp->qp_type != IBV_QPT_UD && cur_cqe->stag_invalid_set) {
++ entry->invalidated_rkey = cur_cqe->inv_stag;
++ entry->wc_flags |= IBV_WC_WITH_INV;
++ }
++ break;
++ case ZXDH_OP_TYPE_REC_IMM:
++ entry->opcode = IBV_WC_RECV_RDMA_WITH_IMM;
++ if (ib_qp->qp_type != IBV_QPT_UD && cur_cqe->stag_invalid_set) {
++ entry->invalidated_rkey = cur_cqe->inv_stag;
++ entry->wc_flags |= IBV_WC_WITH_INV;
++ }
++ break;
++ default:
++ entry->status = IBV_WC_GENERAL_ERR;
++ return;
++ }
++
++ if (ib_qp->qp_type == IBV_QPT_UD) {
++ entry->src_qp = cur_cqe->ud_src_qpn;
++ entry->wc_flags |= IBV_WC_GRH;
++ entry->sl = cur_cqe->ipv4 ? 2 : 1;
++ } else {
++ entry->src_qp = cur_cqe->qp_id;
++ }
++ entry->byte_len = cur_cqe->bytes_xfered;
++}
++
++/**
++ * zxdh_poll_one - poll one entry of the CQ
++ * @ukcq: ukcq to poll
++ * @cur_cqe: current CQE info to be filled in
++ * @entry: ibv_wc object to be filled for non-extended CQ or NULL for extended CQ
++ *
++ * Returns the internal zxdh device error code or 0 on success
++ */
++static int zxdh_poll_one(struct zxdh_cq_uk *ukcq,
++ struct zxdh_cq_poll_info *cur_cqe,
++ struct ibv_wc *entry)
++{
++ int ret = zxdh_uk_cq_poll_cmpl(ukcq, cur_cqe);
++
++ if (ret)
++ return ret;
++
++ if (entry)
++ zxdh_process_cqe(entry, cur_cqe);
++ else
++ zxdh_process_cqe_ext(cur_cqe);
++
++ return 0;
++}
++
++/**
++ * __zxdh_upoll_resize_cq - zxdh util function to poll device CQ
++ * @iwucq: zxdh cq to poll
++ * @num_entries: max cq entries to poll
++ * @entry: pointer to array of ibv_wc objects to be filled in for each completion or NULL if ext CQ
++ *
++ * Returns non-negative value equal to the number of completions
++ * found. On failure, -EINVAL
++ */
++static int __zxdh_upoll_resize_cq(struct zxdh_ucq *iwucq, int num_entries,
++ struct ibv_wc *entry)
++{
++ struct zxdh_cq_buf *cq_buf, *next;
++ struct zxdh_cq_buf *last_buf = NULL;
++ struct zxdh_cq_poll_info *cur_cqe = &iwucq->cur_cqe;
++ bool cq_new_cqe = false;
++ int resized_bufs = 0;
++ int npolled = 0;
++ int ret;
++
++ /* go through the list of previously resized CQ buffers */
++ list_for_each_safe(&iwucq->resize_list, cq_buf, next, list) {
++ while (npolled < num_entries) {
++ ret = zxdh_poll_one(&cq_buf->cq, cur_cqe,
++ entry ? entry + npolled : NULL);
++ if (ret == ZXDH_SUCCESS) {
++ ++npolled;
++ cq_new_cqe = true;
++ continue;
++ }
++ if (ret == ZXDH_ERR_Q_EMPTY)
++ break;
++ if (ret == ZXDH_ERR_RETRY_ACK_NOT_EXCEED_ERR)
++ break;
++ /* QP using the CQ is destroyed. Skip reporting this CQE */
++ if (ret == ZXDH_ERR_Q_DESTROYED) {
++ cq_new_cqe = true;
++ continue;
++ }
++ printf("__zrdma_upoll_cq resize goto error failed\n");
++ goto error;
++ }
++
++ /* save the resized CQ buffer which received the last cqe */
++ if (cq_new_cqe)
++ last_buf = cq_buf;
++ cq_new_cqe = false;
++ }
++
++ /* check the current CQ for new cqes */
++ while (npolled < num_entries) {
++ ret = zxdh_poll_one(&iwucq->cq, cur_cqe,
++ entry ? entry + npolled : NULL);
++ if (ret == ZXDH_SUCCESS) {
++ ++npolled;
++ cq_new_cqe = true;
++ continue;
++ }
++ if (ret == ZXDH_ERR_Q_EMPTY)
++ break;
++ if (ret == ZXDH_ERR_RETRY_ACK_NOT_EXCEED_ERR)
++ break;
++ /* QP using the CQ is destroyed. Skip reporting this CQE */
++ if (ret == ZXDH_ERR_Q_DESTROYED) {
++ cq_new_cqe = true;
++ continue;
++ }
++ printf("__zrdma_upoll_cq goto error failed\n");
++ goto error;
++ }
++ if (cq_new_cqe)
++ /* all previous CQ resizes are complete */
++ resized_bufs = zxdh_process_resize_list(iwucq, NULL);
++ else if (last_buf)
++ /* only CQ resizes up to the last_buf are complete */
++ resized_bufs = zxdh_process_resize_list(iwucq, last_buf);
++ if (resized_bufs)
++ /* report to the HW the number of complete CQ resizes */
++ zxdh_uk_cq_set_resized_cnt(&iwucq->cq, resized_bufs);
++
++ return npolled;
++
++error:
++
++ return -EINVAL;
++}
++
++/**
++ * __zxdh_upoll_current_cq - zxdh util function to poll device CQ
++ * @iwucq: zxdh cq to poll
++ * @num_entries: max cq entries to poll
++ * @entry: pointer to array of ibv_wc objects to be filled in for each completion or NULL if ext CQ
++ *
++ * Returns non-negative value equal to the number of completions
++ * found. On failure, -EINVAL
++ */
++static int __zxdh_upoll_curent_cq(struct zxdh_ucq *iwucq, int num_entries,
++ struct ibv_wc *entry)
++{
++ struct zxdh_cq_poll_info *cur_cqe = &iwucq->cur_cqe;
++ int npolled = 0;
++ int ret;
++
++ /* check the current CQ for new cqes */
++ while (npolled < num_entries) {
++ ret = zxdh_poll_one(&iwucq->cq, cur_cqe,
++ entry ? entry + npolled : NULL);
++ if (unlikely(ret != ZXDH_SUCCESS))
++ break;
++ ++npolled;
++ }
++ return npolled;
++}
++
++/**
++ * zxdh_upoll_cq - verb API callback to poll device CQ
++ * @cq: ibv_cq to poll
++ * @num_entries: max cq entries to poll
++ * @entry: pointer to array of ibv_wc objects to be filled in for each completion
++ *
++ * Returns non-negative value equal to the number of completions
++ * found and a negative error code on failure
++ */
++int zxdh_upoll_cq(struct ibv_cq *cq, int num_entries, struct ibv_wc *entry)
++{
++ struct zxdh_ucq *iwucq;
++ int ret;
++
++ iwucq = container_of(cq, struct zxdh_ucq, verbs_cq.cq);
++ ret = pthread_spin_lock(&iwucq->lock);
++ if (ret)
++ return -ret;
++
++ if (likely(!iwucq->resize_enable))
++ ret = __zxdh_upoll_curent_cq(iwucq, num_entries, entry);
++ else
++ ret = __zxdh_upoll_resize_cq(iwucq, num_entries, entry);
++
++ pthread_spin_unlock(&iwucq->lock);
++
++ return ret;
++}
++
++/**
++ * zxdh_start_poll - verb_ex API callback to poll batch of WC's
++ * @ibvcq_ex: ibv extended CQ
++ * @attr: attributes (not used)
++ *
++ * Start polling batch of work completions. Return 0 on success, ENONENT when
++ * no completions are available on CQ. And an error code on errors
++ */
++static int zxdh_start_poll(struct ibv_cq_ex *ibvcq_ex,
++ struct ibv_poll_cq_attr *attr)
++{
++ struct zxdh_ucq *iwucq;
++ int ret;
++
++ iwucq = container_of(ibvcq_ex, struct zxdh_ucq, verbs_cq.cq_ex);
++ ret = pthread_spin_lock(&iwucq->lock);
++ if (ret)
++ return ret;
++
++ if (!iwucq->resize_enable) {
++ ret = __zxdh_upoll_curent_cq(iwucq, 1, NULL);
++ if (ret == 1)
++ return 0;
++ } else {
++ ret = __zxdh_upoll_resize_cq(iwucq, 1, NULL);
++ if (ret == 1)
++ return 0;
++ }
++
++ /* No Completions on CQ */
++ if (!ret)
++ ret = ENOENT;
++
++ pthread_spin_unlock(&iwucq->lock);
++
++ return ret;
++}
++
++/**
++ * zxdh_next_poll - verb_ex API callback to get next WC
++ * @ibvcq_ex: ibv extended CQ
++ *
++ * Return 0 on success, ENONENT when no completions are available on CQ.
++ * And an error code on errors
++ */
++static int zxdh_next_poll(struct ibv_cq_ex *ibvcq_ex)
++{
++ struct zxdh_ucq *iwucq;
++ int ret;
++
++ iwucq = container_of(ibvcq_ex, struct zxdh_ucq, verbs_cq.cq_ex);
++ if (!iwucq->resize_enable) {
++ ret = __zxdh_upoll_curent_cq(iwucq, 1, NULL);
++ if (ret == 1)
++ return 0;
++ } else {
++ ret = __zxdh_upoll_resize_cq(iwucq, 1, NULL);
++ if (ret == 1)
++ return 0;
++ }
++
++ /* No Completions on CQ */
++ if (!ret)
++ ret = ENOENT;
++
++ return ret;
++}
++
++/**
++ * zxdh_end_poll - verb_ex API callback to end polling of WC's
++ * @ibvcq_ex: ibv extended CQ
++ */
++static void zxdh_end_poll(struct ibv_cq_ex *ibvcq_ex)
++{
++ struct zxdh_ucq *iwucq =
++ container_of(ibvcq_ex, struct zxdh_ucq, verbs_cq.cq_ex);
++
++ pthread_spin_unlock(&iwucq->lock);
++}
++
++/**
++ * zxdh_wc_read_completion_ts - Get completion timestamp
++ * @ibvcq_ex: ibv extended CQ
++ *
++ * Get completion timestamp in HCA clock units
++ */
++static uint64_t zxdh_wc_read_completion_ts(struct ibv_cq_ex *ibvcq_ex)
++{
++ struct zxdh_ucq *iwucq =
++ container_of(ibvcq_ex, struct zxdh_ucq, verbs_cq.cq_ex);
++#define HCA_CORE_CLOCK_800_MHZ 800
++
++ return iwucq->cur_cqe.tcp_seq_num_rtt / HCA_CORE_CLOCK_800_MHZ;
++}
++
++/**
++ * zxdh_wc_read_completion_wallclock_ns - Get completion timestamp in ns
++ * @ibvcq_ex: ibv extended CQ
++ *
++ * Get completion timestamp from current completion in wall clock nanoseconds
++ */
++static uint64_t zxdh_wc_read_completion_wallclock_ns(struct ibv_cq_ex *ibvcq_ex)
++{
++ struct zxdh_ucq *iwucq =
++ container_of(ibvcq_ex, struct zxdh_ucq, verbs_cq.cq_ex);
++
++ /* RTT is in usec */
++ return iwucq->cur_cqe.tcp_seq_num_rtt * 1000;
++}
++
++static enum ibv_wc_opcode zxdh_wc_read_opcode(struct ibv_cq_ex *ibvcq_ex)
++{
++ struct zxdh_ucq *iwucq =
++ container_of(ibvcq_ex, struct zxdh_ucq, verbs_cq.cq_ex);
++
++ switch (iwucq->cur_cqe.op_type) {
++ case ZXDH_OP_TYPE_WRITE:
++ case ZXDH_OP_TYPE_WRITE_WITH_IMM:
++ return IBV_WC_RDMA_WRITE;
++ case ZXDH_OP_TYPE_READ:
++ return IBV_WC_RDMA_READ;
++ case ZXDH_OP_TYPE_SEND:
++ case ZXDH_OP_TYPE_SEND_WITH_IMM:
++ case ZXDH_OP_TYPE_SEND_INV:
++ case ZXDH_OP_TYPE_UD_SEND:
++ case ZXDH_OP_TYPE_UD_SEND_WITH_IMM:
++ return IBV_WC_SEND;
++ case ZXDH_OP_TYPE_BIND_MW:
++ return IBV_WC_BIND_MW;
++ case ZXDH_OP_TYPE_REC:
++ return IBV_WC_RECV;
++ case ZXDH_OP_TYPE_REC_IMM:
++ return IBV_WC_RECV_RDMA_WITH_IMM;
++ case ZXDH_OP_TYPE_LOCAL_INV:
++ return IBV_WC_LOCAL_INV;
++ }
++
++ return 0;
++}
++
++static uint32_t zxdh_wc_read_vendor_err(struct ibv_cq_ex *ibvcq_ex)
++{
++ struct zxdh_cq_poll_info *cur_cqe;
++ struct zxdh_ucq *iwucq;
++
++ iwucq = container_of(ibvcq_ex, struct zxdh_ucq, verbs_cq.cq_ex);
++ cur_cqe = &iwucq->cur_cqe;
++
++ return cur_cqe->error ? cur_cqe->major_err << 16 | cur_cqe->minor_err :
++ 0;
++}
++
++static unsigned int zxdh_wc_read_wc_flags(struct ibv_cq_ex *ibvcq_ex)
++{
++ struct zxdh_cq_poll_info *cur_cqe;
++ struct zxdh_ucq *iwucq;
++ struct zxdh_qp_uk *qp;
++ struct ibv_qp *ib_qp;
++ unsigned int wc_flags = 0;
++
++ iwucq = container_of(ibvcq_ex, struct zxdh_ucq, verbs_cq.cq_ex);
++ cur_cqe = &iwucq->cur_cqe;
++ qp = cur_cqe->qp_handle;
++ ib_qp = qp->back_qp;
++
++ if (cur_cqe->imm_valid)
++ wc_flags |= IBV_WC_WITH_IMM;
++
++ if (ib_qp->qp_type == IBV_QPT_UD) {
++ wc_flags |= IBV_WC_GRH;
++ } else {
++ if (cur_cqe->stag_invalid_set) {
++ switch (cur_cqe->op_type) {
++ case ZXDH_OP_TYPE_REC:
++ wc_flags |= IBV_WC_WITH_INV;
++ break;
++ case ZXDH_OP_TYPE_REC_IMM:
++ wc_flags |= IBV_WC_WITH_INV;
++ break;
++ }
++ }
++ }
++
++ return wc_flags;
++}
++
++static uint32_t zxdh_wc_read_byte_len(struct ibv_cq_ex *ibvcq_ex)
++{
++ struct zxdh_ucq *iwucq =
++ container_of(ibvcq_ex, struct zxdh_ucq, verbs_cq.cq_ex);
++
++ return iwucq->cur_cqe.bytes_xfered;
++}
++
++static __be32 zxdh_wc_read_imm_data(struct ibv_cq_ex *ibvcq_ex)
++{
++ struct zxdh_cq_poll_info *cur_cqe;
++ struct zxdh_ucq *iwucq;
++
++ iwucq = container_of(ibvcq_ex, struct zxdh_ucq, verbs_cq.cq_ex);
++ cur_cqe = &iwucq->cur_cqe;
++
++ return cur_cqe->imm_valid ? htonl(cur_cqe->imm_data) : 0;
++}
++
++static uint32_t zxdh_wc_read_qp_num(struct ibv_cq_ex *ibvcq_ex)
++{
++ struct zxdh_ucq *iwucq =
++ container_of(ibvcq_ex, struct zxdh_ucq, verbs_cq.cq_ex);
++
++ return iwucq->cur_cqe.qp_id;
++}
++
++static uint32_t zxdh_wc_read_src_qp(struct ibv_cq_ex *ibvcq_ex)
++{
++ struct zxdh_cq_poll_info *cur_cqe;
++ struct zxdh_ucq *iwucq;
++ struct zxdh_qp_uk *qp;
++ struct ibv_qp *ib_qp;
++
++ iwucq = container_of(ibvcq_ex, struct zxdh_ucq, verbs_cq.cq_ex);
++ cur_cqe = &iwucq->cur_cqe;
++ qp = cur_cqe->qp_handle;
++ ib_qp = qp->back_qp;
++
++ return ib_qp->qp_type == IBV_QPT_UD ? cur_cqe->ud_src_qpn :
++ cur_cqe->qp_id;
++}
++
++static uint32_t zxdh_wc_read_slid(struct ibv_cq_ex *ibvcq_ex)
++{
++ return 0;
++}
++
++static uint8_t zxdh_wc_read_sl(struct ibv_cq_ex *ibvcq_ex)
++{
++ return 0;
++}
++
++static uint8_t zxdh_wc_read_dlid_path_bits(struct ibv_cq_ex *ibvcq_ex)
++{
++ return 0;
++}
++
++void zxdh_ibvcq_ex_fill_priv_funcs(struct zxdh_ucq *iwucq,
++ struct ibv_cq_init_attr_ex *attr_ex)
++{
++ struct ibv_cq_ex *ibvcq_ex = &iwucq->verbs_cq.cq_ex;
++
++ ibvcq_ex->start_poll = zxdh_start_poll;
++ ibvcq_ex->end_poll = zxdh_end_poll;
++ ibvcq_ex->next_poll = zxdh_next_poll;
++
++ if (attr_ex->wc_flags & IBV_WC_EX_WITH_COMPLETION_TIMESTAMP) {
++ ibvcq_ex->read_completion_ts = zxdh_wc_read_completion_ts;
++ iwucq->report_rtt = true;
++ }
++ if (attr_ex->wc_flags & IBV_WC_EX_WITH_COMPLETION_TIMESTAMP_WALLCLOCK) {
++ ibvcq_ex->read_completion_wallclock_ns =
++ zxdh_wc_read_completion_wallclock_ns;
++ iwucq->report_rtt = true;
++ }
++
++ ibvcq_ex->read_opcode = zxdh_wc_read_opcode;
++ ibvcq_ex->read_vendor_err = zxdh_wc_read_vendor_err;
++ ibvcq_ex->read_wc_flags = zxdh_wc_read_wc_flags;
++
++ if (attr_ex->wc_flags & IBV_WC_EX_WITH_BYTE_LEN)
++ ibvcq_ex->read_byte_len = zxdh_wc_read_byte_len;
++ if (attr_ex->wc_flags & IBV_WC_EX_WITH_IMM)
++ ibvcq_ex->read_imm_data = zxdh_wc_read_imm_data;
++ if (attr_ex->wc_flags & IBV_WC_EX_WITH_QP_NUM)
++ ibvcq_ex->read_qp_num = zxdh_wc_read_qp_num;
++ if (attr_ex->wc_flags & IBV_WC_EX_WITH_SRC_QP)
++ ibvcq_ex->read_src_qp = zxdh_wc_read_src_qp;
++ if (attr_ex->wc_flags & IBV_WC_EX_WITH_SLID)
++ ibvcq_ex->read_slid = zxdh_wc_read_slid;
++ if (attr_ex->wc_flags & IBV_WC_EX_WITH_SL)
++ ibvcq_ex->read_sl = zxdh_wc_read_sl;
++ if (attr_ex->wc_flags & IBV_WC_EX_WITH_DLID_PATH_BITS)
++ ibvcq_ex->read_dlid_path_bits = zxdh_wc_read_dlid_path_bits;
++}
++
++/**
++ * zxdh_arm_cq - arm of cq
++ * @iwucq: cq to which arm
++ * @cq_notify: notification params
++ */
++static void zxdh_arm_cq(struct zxdh_ucq *iwucq, enum zxdh_cmpl_notify cq_notify)
++{
++ iwucq->is_armed = true;
++ iwucq->last_notify = cq_notify;
++
++ zxdh_uk_cq_request_notification(&iwucq->cq, cq_notify);
++}
++
++/**
++ * zxdh_uarm_cq - callback for arm of cq
++ * @cq: cq to arm
++ * @solicited: to get notify params
++ */
++int zxdh_uarm_cq(struct ibv_cq *cq, int solicited)
++{
++ struct zxdh_ucq *iwucq;
++ enum zxdh_cmpl_notify cq_notify = ZXDH_CQ_COMPL_EVENT;
++ bool promo_event = false;
++ int ret;
++
++ iwucq = container_of(cq, struct zxdh_ucq, verbs_cq.cq);
++ if (solicited) {
++ cq_notify = ZXDH_CQ_COMPL_SOLICITED;
++ } else {
++ if (iwucq->last_notify == ZXDH_CQ_COMPL_SOLICITED)
++ promo_event = true;
++ }
++
++ ret = pthread_spin_lock(&iwucq->lock);
++ if (ret)
++ return ret;
++
++ if (!iwucq->is_armed || promo_event)
++ zxdh_arm_cq(iwucq, cq_notify);
++
++ pthread_spin_unlock(&iwucq->lock);
++
++ return 0;
++}
++
++/**
++ * zxdh_cq_event - cq to do completion event
++ * @cq: cq to arm
++ */
++void zxdh_cq_event(struct ibv_cq *cq)
++{
++ struct zxdh_ucq *iwucq;
++
++ iwucq = container_of(cq, struct zxdh_ucq, verbs_cq.cq);
++ if (pthread_spin_lock(&iwucq->lock))
++ return;
++
++ iwucq->is_armed = false;
++
++ pthread_spin_unlock(&iwucq->lock);
++}
++
++void *zxdh_mmap(int fd, off_t offset)
++{
++ void *map;
++
++ map = mmap(NULL, ZXDH_HW_PAGE_SIZE, PROT_WRITE | PROT_READ, MAP_SHARED,
++ fd, offset);
++ if (map == MAP_FAILED)
++ return map;
++
++ if (ibv_dontfork_range(map, ZXDH_HW_PAGE_SIZE)) {
++ munmap(map, ZXDH_HW_PAGE_SIZE);
++ return MAP_FAILED;
++ }
++
++ return map;
++}
++
++void zxdh_munmap(void *map)
++{
++ ibv_dofork_range(map, ZXDH_HW_PAGE_SIZE);
++ munmap(map, ZXDH_HW_PAGE_SIZE);
++}
++
++/**
++ * zxdh_destroy_vmapped_qp - destroy resources for qp
++ * @iwuqp: qp struct for resources
++ */
++static int zxdh_destroy_vmapped_qp(struct zxdh_uqp *iwuqp)
++{
++ int ret;
++
++ ret = ibv_cmd_destroy_qp(&iwuqp->vqp.qp);
++ if (ret)
++ return ret;
++
++ ibv_cmd_dereg_mr(&iwuqp->vmr);
++
++ return 0;
++}
++
++/**
++ * zxdh_vmapped_qp - create resources for qp
++ * @iwuqp: qp struct for resources
++ * @pd: pd for the qp
++ * @attr: attributes of qp passed
++ * @resp: response back from create qp
++ * @sqdepth: depth of sq
++ * @rqdepth: depth of rq
++ * @info: info for initializing user level qp
++ * @abi_ver: abi version of the create qp command
++ */
++static int zxdh_vmapped_qp(struct zxdh_uqp *iwuqp, struct ibv_pd *pd,
++ struct ibv_qp_init_attr *attr, int sqdepth,
++ int rqdepth, struct zxdh_qp_uk_init_info *info,
++ bool legacy_mode)
++{
++ struct zxdh_ucreate_qp cmd = {};
++ size_t sqsize, rqsize, totalqpsize;
++ struct zxdh_ucreate_qp_resp resp = {};
++ struct zxdh_ureg_mr reg_mr_cmd = {};
++ struct ib_uverbs_reg_mr_resp reg_mr_resp = {};
++ int ret;
++
++ rqsize = 0;
++ sqsize = roundup(sqdepth * ZXDH_QP_SQE_MIN_SIZE, ZXDH_HW_PAGE_SIZE);
++ if (iwuqp->is_srq == false) {
++ rqsize = roundup(rqdepth * ZXDH_QP_RQE_MIN_SIZE,
++ ZXDH_HW_PAGE_SIZE);
++ totalqpsize = rqsize + sqsize + ZXDH_DB_SHADOW_AREA_SIZE;
++ } else {
++ totalqpsize = sqsize + ZXDH_DB_SHADOW_AREA_SIZE;
++ }
++ info->sq = zxdh_alloc_hw_buf(totalqpsize);
++ iwuqp->buf_size = totalqpsize;
++
++ if (!info->sq)
++ return -ENOMEM;
++
++ memset(info->sq, 0, totalqpsize);
++ if (iwuqp->is_srq == false) {
++ info->rq = (struct zxdh_qp_rq_quanta *)&info
++ ->sq[sqsize / ZXDH_QP_SQE_MIN_SIZE];
++ info->shadow_area =
++ info->rq[rqsize / ZXDH_QP_RQE_MIN_SIZE].elem;
++ reg_mr_cmd.rq_pages = rqsize >> ZXDH_HW_PAGE_SHIFT;
++ } else {
++ info->shadow_area =
++ (__le64 *)&info->sq[sqsize / ZXDH_QP_SQE_MIN_SIZE];
++ }
++ reg_mr_cmd.reg_type = ZXDH_MEMREG_TYPE_QP;
++ reg_mr_cmd.sq_pages = sqsize >> ZXDH_HW_PAGE_SHIFT;
++
++ ret = ibv_cmd_reg_mr(pd, info->sq, totalqpsize, (uintptr_t)info->sq,
++ IBV_ACCESS_LOCAL_WRITE, &iwuqp->vmr,
++ ®_mr_cmd.ibv_cmd, sizeof(reg_mr_cmd),
++ ®_mr_resp, sizeof(reg_mr_resp));
++ if (ret)
++ goto err_dereg_mr;
++
++ cmd.user_wqe_bufs = (__u64)((uintptr_t)info->sq);
++ cmd.user_compl_ctx = (__u64)(uintptr_t)&iwuqp->qp;
++ ret = ibv_cmd_create_qp(pd, &iwuqp->vqp.qp, attr, &cmd.ibv_cmd,
++ sizeof(cmd), &resp.ibv_resp,
++ sizeof(struct zxdh_ucreate_qp_resp));
++ if (ret)
++ goto err_qp;
++
++ info->sq_size = resp.actual_sq_size;
++ info->rq_size = resp.actual_rq_size;
++ info->qp_caps = resp.qp_caps;
++ info->qp_id = resp.qp_id;
++ iwuqp->zxdh_drv_opt = resp.zxdh_drv_opt;
++ iwuqp->vqp.qp.qp_num = resp.qp_id;
++
++ iwuqp->send_cq =
++ container_of(attr->send_cq, struct zxdh_ucq, verbs_cq.cq);
++ iwuqp->recv_cq =
++ container_of(attr->recv_cq, struct zxdh_ucq, verbs_cq.cq);
++ iwuqp->send_cq->uqp = iwuqp;
++ iwuqp->recv_cq->uqp = iwuqp;
++
++ return 0;
++err_qp:
++ ibv_cmd_dereg_mr(&iwuqp->vmr);
++err_dereg_mr:
++ zxdh_free_hw_buf(info->sq, iwuqp->buf_size);
++ return ret;
++}
++
++static void zxdh_wr_local_inv(struct ibv_qp_ex *ibqp, uint32_t invalidate_rkey)
++{
++ struct zxdh_uqp *qp = container_of(ibqp, struct zxdh_uqp, vqp.qp_ex);
++ struct ibv_send_wr wr = {};
++ struct ibv_send_wr *bad_wr = NULL;
++
++ wr.opcode = IBV_WR_LOCAL_INV;
++ wr.invalidate_rkey = invalidate_rkey;
++
++ zxdh_upost_send(&qp->vqp.qp, &wr, &bad_wr);
++}
++
++static void zxdh_send_wr_send_inv(struct ibv_qp_ex *ibqp,
++ uint32_t invalidate_rkey)
++{
++ struct zxdh_uqp *qp = container_of(ibqp, struct zxdh_uqp, vqp.qp_ex);
++ struct ibv_send_wr wr = {};
++ struct ibv_send_wr *bad_wr = NULL;
++
++ wr.opcode = IBV_WR_SEND_WITH_INV;
++ wr.invalidate_rkey = invalidate_rkey;
++
++ zxdh_upost_send(&qp->vqp.qp, &wr, &bad_wr);
++}
++
++static void zxdh_wr_bind_mw(struct ibv_qp_ex *ibqp, struct ibv_mw *ibmw,
++ uint32_t rkey, const struct ibv_mw_bind_info *info)
++{
++ struct zxdh_uqp *qp = container_of(ibqp, struct zxdh_uqp, vqp.qp_ex);
++ struct ibv_send_wr wr = {};
++ struct ibv_send_wr *bad_wr = NULL;
++
++ if (ibmw->type != IBV_MW_TYPE_2)
++ return;
++
++ wr.opcode = IBV_WR_BIND_MW;
++ wr.bind_mw.bind_info = *info;
++ wr.bind_mw.mw = ibmw;
++ wr.bind_mw.rkey = rkey;
++
++ zxdh_upost_send(&qp->vqp.qp, &wr, &bad_wr);
++}
++
++static struct ibv_qp *create_qp(struct ibv_context *ibv_ctx,
++ struct ibv_qp_init_attr_ex *attr_ex)
++{
++ struct zxdh_qp_uk_init_info info = {};
++ struct zxdh_uk_attrs *uk_attrs;
++ struct zxdh_uvcontext *iwvctx;
++ struct zxdh_uqp *iwuqp;
++ struct zxdh_usrq *iwusrq;
++ struct ibv_pd *pd = attr_ex->pd;
++ struct ibv_qp_init_attr *attr;
++ __u32 sqdepth, rqdepth;
++ __u8 sqshift, rqshift;
++ int status;
++
++ attr = calloc(1, sizeof(*attr));
++ if (!attr)
++ return NULL;
++
++ memcpy(attr, attr_ex, sizeof(*attr));
++
++ if (attr->qp_type != IBV_QPT_RC && attr->qp_type != IBV_QPT_UD) {
++ errno = EOPNOTSUPP;
++ free(attr);
++ return NULL;
++ }
++
++ iwvctx = container_of(ibv_ctx, struct zxdh_uvcontext, ibv_ctx.context);
++ uk_attrs = &iwvctx->uk_attrs;
++
++ if (attr->cap.max_send_sge > uk_attrs->max_hw_wq_frags ||
++ attr->cap.max_recv_sge > uk_attrs->max_hw_wq_frags) {
++ errno = EINVAL;
++ free(attr);
++ return NULL;
++ }
++
++ if (attr->cap.max_inline_data > uk_attrs->max_hw_inline) {
++ zxdh_dbg(ZXDH_DBG_QP, "max_inline_data over max_hw_inline\n");
++ attr->cap.max_inline_data = uk_attrs->max_hw_inline;
++ }
++
++ zxdh_get_sq_wqe_shift(uk_attrs, attr->cap.max_send_sge,
++ attr->cap.max_inline_data, &sqshift);
++ status = zxdh_get_sqdepth(uk_attrs, attr->cap.max_send_wr, sqshift,
++ &sqdepth);
++ if (status) {
++ errno = EINVAL;
++ free(attr);
++ return NULL;
++ }
++
++ zxdh_get_rq_wqe_shift(uk_attrs, attr->cap.max_recv_sge, &rqshift);
++ status = zxdh_get_rqdepth(uk_attrs, attr->cap.max_recv_wr, rqshift,
++ &rqdepth);
++ if (status) {
++ errno = EINVAL;
++ free(attr);
++ return NULL;
++ }
++
++ iwuqp = memalign(1024, sizeof(*iwuqp));
++ if (!iwuqp) {
++ free(attr);
++ return NULL;
++ }
++
++ memset(iwuqp, 0, sizeof(*iwuqp));
++
++ if (attr_ex->comp_mask & IBV_QP_INIT_ATTR_SEND_OPS_FLAGS) {
++ if (attr_ex->send_ops_flags & ~IBV_QP_EX_WITH_BIND_MW) {
++ errno = EOPNOTSUPP;
++ free(iwuqp);
++ free(attr);
++ return NULL;
++ }
++
++ iwuqp->vqp.comp_mask |= VERBS_QP_EX;
++ if (attr_ex->send_ops_flags & IBV_QP_EX_WITH_BIND_MW)
++ iwuqp->vqp.qp_ex.wr_bind_mw = zxdh_wr_bind_mw;
++
++ if (attr_ex->send_ops_flags & IBV_QP_EX_WITH_SEND_WITH_INV)
++ iwuqp->vqp.qp_ex.wr_send_inv = zxdh_send_wr_send_inv;
++
++ if (attr_ex->send_ops_flags & IBV_QP_EX_WITH_LOCAL_INV)
++ iwuqp->vqp.qp_ex.wr_local_inv = zxdh_wr_local_inv;
++ }
++
++ if (pthread_spin_init(&iwuqp->lock, PTHREAD_PROCESS_PRIVATE))
++ goto err_free_qp;
++
++ info.sq_size = sqdepth >> sqshift;
++ info.rq_size = rqdepth >> rqshift;
++ attr->cap.max_send_wr = info.sq_size;
++ attr->cap.max_recv_wr = info.rq_size;
++
++ info.uk_attrs = uk_attrs;
++ info.max_sq_frag_cnt = attr->cap.max_send_sge;
++ info.max_rq_frag_cnt = attr->cap.max_recv_sge;
++
++ if (attr->srq != NULL) {
++ iwuqp->is_srq = true;
++ iwusrq = container_of(attr->srq, struct zxdh_usrq, ibv_srq);
++ iwuqp->srq = iwusrq;
++ iwuqp->qp.is_srq = true;
++ }
++
++ if (iwuqp->is_srq == false) {
++ iwuqp->recv_sges = calloc(attr->cap.max_recv_sge,
++ sizeof(*iwuqp->recv_sges));
++ if (!iwuqp->recv_sges)
++ goto err_destroy_lock;
++ }
++
++ info.wqe_alloc_db =
++ (__u32 *)((__u8 *)iwvctx->sq_db + ZXDH_DB_SQ_OFFSET);
++ info.abi_ver = iwvctx->abi_ver;
++ info.legacy_mode = iwvctx->legacy_mode;
++ info.sq_wrtrk_array = calloc(sqdepth, sizeof(*info.sq_wrtrk_array));
++ if (!info.sq_wrtrk_array)
++ goto err_free_rsges;
++
++ if (iwuqp->is_srq == false) {
++ info.rq_wrid_array =
++ calloc(info.rq_size, sizeof(*info.rq_wrid_array));
++ if (!info.rq_wrid_array)
++ goto err_free_sq_wrtrk;
++ }
++
++ iwuqp->sq_sig_all = attr->sq_sig_all;
++ iwuqp->qp_type = attr->qp_type;
++ if (attr->qp_type == IBV_QPT_UD)
++ info.type = ZXDH_QP_TYPE_ROCE_UD;
++ else
++ info.type = ZXDH_QP_TYPE_ROCE_RC;
++ status = zxdh_vmapped_qp(iwuqp, pd, attr, sqdepth, rqdepth, &info,
++ iwvctx->legacy_mode);
++ if (status) {
++ errno = status;
++ goto err_free_rq_wrid;
++ }
++
++ iwuqp->qp.back_qp = iwuqp;
++ iwuqp->qp.lock = &iwuqp->lock;
++ info.max_sq_frag_cnt = attr->cap.max_send_sge;
++ info.max_rq_frag_cnt = attr->cap.max_recv_sge;
++ info.max_inline_data = attr->cap.max_inline_data;
++ if (info.type == ZXDH_QP_TYPE_ROCE_RC) {
++ iwuqp->qp.split_sg_list =
++ calloc(2 * uk_attrs->max_hw_read_sges,
++ sizeof(*iwuqp->qp.split_sg_list));
++ if (!iwuqp->qp.split_sg_list)
++ goto err_free_vmap_qp;
++ }
++ status = zxdh_uk_qp_init(&iwuqp->qp, &info);
++ if (status) {
++ errno = EINVAL;
++ goto err_free_sg_list;
++ }
++ iwuqp->qp.mtu = mtu_enum_to_int(IBV_MTU_1024);
++ attr->cap.max_send_wr = (sqdepth - ZXDH_SQ_RSVD) >> sqshift;
++ attr->cap.max_recv_wr = (rqdepth - ZXDH_RQ_RSVD) >> rqshift;
++ memcpy(attr_ex, attr, sizeof(*attr));
++ free(attr);
++ return &iwuqp->vqp.qp;
++
++err_free_sg_list:
++ if (iwuqp->qp.split_sg_list)
++ free(iwuqp->qp.split_sg_list);
++err_free_vmap_qp:
++ zxdh_destroy_vmapped_qp(iwuqp);
++ zxdh_free_hw_buf(info.sq, iwuqp->buf_size);
++err_free_rq_wrid:
++ free(info.rq_wrid_array);
++err_free_sq_wrtrk:
++ free(info.sq_wrtrk_array);
++err_free_rsges:
++ free(iwuqp->recv_sges);
++err_destroy_lock:
++ pthread_spin_destroy(&iwuqp->lock);
++err_free_qp:
++ free(iwuqp);
++ free(attr);
++
++ return NULL;
++}
++
++/**
++ * zxdh_ucreate_qp - create qp on user app
++ * @pd: pd for the qp
++ * @attr: attributes of the qp to be created (sizes, sge, cq)
++ */
++struct ibv_qp *zxdh_ucreate_qp(struct ibv_pd *pd, struct ibv_qp_init_attr *attr)
++{
++ struct ibv_qp_init_attr_ex attrx = {};
++ struct ibv_qp *qp;
++
++ memcpy(&attrx, attr, sizeof(*attr));
++ attrx.comp_mask = IBV_QP_INIT_ATTR_PD;
++ attrx.pd = pd;
++
++ qp = create_qp(pd->context, &attrx);
++ if (qp)
++ memcpy(attr, &attrx, sizeof(*attr));
++
++ return qp;
++}
++
++/**
++ * zxdh_ucreate_qp_ex - create qp_ex on user app
++ * @context: user context of the device
++ * @attr: attributes of the qp_ex to be created
++ */
++struct ibv_qp *zxdh_ucreate_qp_ex(struct ibv_context *context,
++ struct ibv_qp_init_attr_ex *attr)
++{
++ return create_qp(context, attr);
++}
++
++/**
++ * zxdh_uquery_qp - query qp for some attribute
++ * @qp: qp for the attributes query
++ * @attr: to return the attributes
++ * @attr_mask: mask of what is query for
++ * @init_attr: initial attributes during create_qp
++ */
++int zxdh_uquery_qp(struct ibv_qp *qp, struct ibv_qp_attr *attr, int attr_mask,
++ struct ibv_qp_init_attr *init_attr)
++{
++ struct ibv_query_qp cmd;
++
++ return ibv_cmd_query_qp(qp, attr, attr_mask, init_attr, &cmd,
++ sizeof(cmd));
++}
++
++/**
++ * zxdh_umodify_qp - send qp modify to driver
++ * @qp: qp to modify
++ * @attr: attribute to modify
++ * @attr_mask: mask of the attribute
++ */
++int zxdh_umodify_qp(struct ibv_qp *qp, struct ibv_qp_attr *attr, int attr_mask)
++{
++ struct zxdh_uqp *iwuqp;
++ struct zxdh_umodify_qp_resp resp = {};
++ struct ibv_modify_qp cmd = {};
++ struct zxdh_umodify_qp cmd_ex = {};
++ int ret;
++ __u16 mtu = 0;
++
++ iwuqp = container_of(qp, struct zxdh_uqp, vqp.qp);
++ if (attr_mask & IBV_QP_STATE || attr_mask & IBV_QP_RATE_LIMIT) {
++ ret = ibv_cmd_modify_qp_ex(qp, attr, attr_mask, &cmd_ex.ibv_cmd,
++ sizeof(cmd_ex), &resp.ibv_resp,
++ sizeof(resp));
++ } else {
++ ret = ibv_cmd_modify_qp(qp, attr, attr_mask, &cmd, sizeof(cmd));
++ }
++ if (!ret && (attr_mask & IBV_QP_PATH_MTU) &&
++ qp->qp_type == IBV_QPT_RC) {
++ mtu = mtu_enum_to_int(attr->path_mtu);
++ if (mtu == 0)
++ return -EINVAL;
++ iwuqp->qp.mtu = mtu;
++ }
++ if (!ret && (attr_mask & IBV_QP_SQ_PSN) && qp->qp_type == IBV_QPT_RC) {
++ iwuqp->qp.next_psn = attr->sq_psn;
++ iwuqp->qp.cqe_last_ack_qsn = attr->sq_psn - 1;
++ iwuqp->qp.qp_last_ack_qsn = attr->sq_psn - 1;
++ iwuqp->qp.cqe_retry_cnt = 0;
++ iwuqp->qp.qp_reset_cnt = 0;
++ }
++ return ret;
++}
++
++static void zxdh_issue_flush(struct ibv_qp *qp, bool sq_flush, bool rq_flush)
++{
++ struct ib_uverbs_ex_modify_qp_resp resp = {};
++ struct zxdh_umodify_qp cmd_ex = {};
++ struct ibv_qp_attr attr = {};
++
++ attr.qp_state = IBV_QPS_ERR;
++ cmd_ex.sq_flush = sq_flush;
++ cmd_ex.rq_flush = rq_flush;
++
++ ibv_cmd_modify_qp_ex(qp, &attr, IBV_QP_STATE, &cmd_ex.ibv_cmd,
++ sizeof(cmd_ex), &resp, sizeof(resp));
++}
++
++/**
++ * zxdh_clean_cqes - clean cq entries for qp
++ * @qp: qp for which completions are cleaned
++ * @iwcq: cq to be cleaned
++ */
++static void zxdh_clean_cqes(struct zxdh_qp_uk *qp, struct zxdh_ucq *iwucq)
++{
++ struct zxdh_cq_uk *ukcq = &iwucq->cq;
++ int ret;
++
++ ret = pthread_spin_lock(&iwucq->lock);
++ if (ret)
++ return;
++
++ zxdh_uk_clean_cq(qp, ukcq);
++ pthread_spin_unlock(&iwucq->lock);
++}
++
++/**
++ * zxdh_udestroy_qp - destroy qp
++ * @qp: qp to destroy
++ */
++int zxdh_udestroy_qp(struct ibv_qp *qp)
++{
++ struct zxdh_uqp *iwuqp;
++ int ret;
++
++ iwuqp = container_of(qp, struct zxdh_uqp, vqp.qp);
++ ret = pthread_spin_destroy(&iwuqp->lock);
++ if (ret)
++ goto err;
++
++ iwuqp->qp.destroy_pending = true;
++
++ ret = zxdh_destroy_vmapped_qp(iwuqp);
++ if (ret)
++ goto err;
++
++ /* Clean any pending completions from the cq(s) */
++ if (iwuqp->send_cq)
++ zxdh_clean_cqes(&iwuqp->qp, iwuqp->send_cq);
++
++ if (iwuqp->recv_cq && iwuqp->recv_cq != iwuqp->send_cq)
++ zxdh_clean_cqes(&iwuqp->qp, iwuqp->recv_cq);
++
++ if (iwuqp->qp.sq_wrtrk_array)
++ free(iwuqp->qp.sq_wrtrk_array);
++ if (iwuqp->qp.rq_wrid_array)
++ free(iwuqp->qp.rq_wrid_array);
++ if (iwuqp->qp.split_sg_list)
++ free(iwuqp->qp.split_sg_list);
++
++ zxdh_free_hw_buf(iwuqp->qp.sq_base, iwuqp->buf_size);
++ free(iwuqp->recv_sges);
++ free(iwuqp);
++ return 0;
++
++err:
++ return ret;
++}
++
++/**
++ * zxdh_copy_sg_list - copy sg list for qp
++ * @sg_list: copied into sg_list
++ * @sgl: copy from sgl
++ * @num_sges: count of sg entries
++ * @max_sges: count of max supported sg entries
++ */
++static void zxdh_copy_sg_list(struct zxdh_sge *sg_list, struct ibv_sge *sgl,
++ int num_sges)
++{
++ int i;
++
++ for (i = 0; i < num_sges; i++) {
++ sg_list[i].tag_off = sgl[i].addr;
++ sg_list[i].len = sgl[i].length;
++ sg_list[i].stag = sgl[i].lkey;
++ }
++}
++
++/**
++ * calc_type2_mw_stag - calculate type 2 MW stag
++ * @rkey: desired rkey of the MW
++ * @mw_rkey: type2 memory window rkey
++ *
++ * compute type2 memory window stag by taking lower 8 bits
++ * of the desired rkey and leaving 24 bits if mw->rkey unchanged
++ */
++static inline __u32 calc_type2_mw_stag(__u32 rkey, __u32 mw_rkey)
++{
++ const __u32 mask = 0xff;
++
++ return (rkey & mask) | (mw_rkey & ~mask);
++}
++
++/**
++ * zxdh_post_send - post send wr for user application
++ * @ib_qp: qp to post wr
++ * @ib_wr: work request ptr
++ * @bad_wr: return of bad wr if err
++ */
++int zxdh_upost_send(struct ibv_qp *ib_qp, struct ibv_send_wr *ib_wr,
++ struct ibv_send_wr **bad_wr)
++{
++ struct zxdh_post_sq_info info;
++ struct zxdh_uvcontext *iwvctx;
++ struct zxdh_uk_attrs *uk_attrs;
++ enum zxdh_status_code ret = 0;
++ struct zxdh_uqp *iwuqp;
++ bool reflush = false;
++ int err = 0;
++ struct verbs_mr *vmr = NULL;
++ struct zxdh_umr *umr = NULL;
++ __u64 mr_va = 0, mw_va = 0, value_dffer = 0, mw_pa_pble_index = 0;
++ __u16 mr_offset = 0;
++
++ if (ib_qp->state != IBV_QPS_RTS) {
++ *bad_wr = ib_wr;
++ printf("err:post send at state:%d\n", ib_qp->state);
++ return -EINVAL;
++ }
++
++ iwuqp = container_of(ib_qp, struct zxdh_uqp, vqp.qp);
++ iwvctx = container_of(ib_qp->context, struct zxdh_uvcontext,
++ ibv_ctx.context);
++ uk_attrs = &iwvctx->uk_attrs;
++
++ err = pthread_spin_lock(&iwuqp->lock);
++ if (err)
++ return err;
++
++ if (!ZXDH_RING_MORE_WORK(iwuqp->qp.sq_ring) &&
++ ib_qp->state == IBV_QPS_ERR)
++ reflush = true;
++
++ while (ib_wr) {
++ memset(&info, 0, sizeof(info));
++ info.wr_id = (__u64)(ib_wr->wr_id);
++ if ((ib_wr->send_flags & IBV_SEND_SIGNALED) ||
++ iwuqp->sq_sig_all)
++ info.signaled = true;
++ if (ib_wr->send_flags & IBV_SEND_FENCE)
++ info.read_fence = true;
++
++ switch (ib_wr->opcode) {
++ case IBV_WR_SEND_WITH_IMM:
++ if (iwuqp->qp.qp_caps & ZXDH_SEND_WITH_IMM) {
++ info.imm_data_valid = true;
++ info.imm_data = ntohl(ib_wr->imm_data);
++ } else {
++ err = EINVAL;
++ break;
++ }
++ SWITCH_FALLTHROUGH;
++ case IBV_WR_SEND:
++ case IBV_WR_SEND_WITH_INV:
++ if (ib_wr->send_flags & IBV_SEND_SOLICITED)
++ info.solicited = 1;
++
++ if (ib_wr->opcode == IBV_WR_SEND) {
++ if (ib_qp->qp_type == IBV_QPT_UD)
++ info.op_type = ZXDH_OP_TYPE_UD_SEND;
++ else
++ info.op_type = ZXDH_OP_TYPE_SEND;
++ } else if (ib_wr->opcode == IBV_WR_SEND_WITH_IMM) {
++ if (ib_qp->qp_type == IBV_QPT_UD)
++ info.op_type =
++ ZXDH_OP_TYPE_UD_SEND_WITH_IMM;
++ else
++ info.op_type =
++ ZXDH_OP_TYPE_SEND_WITH_IMM;
++ } else {
++ info.op_type = ZXDH_OP_TYPE_SEND_INV;
++ info.stag_to_inv = ib_wr->invalidate_rkey;
++ }
++
++ if ((ib_wr->send_flags & IBV_SEND_INLINE) &&
++ (ib_wr->num_sge != 0)) {
++ ret = zxdh_get_inline_data(
++ iwuqp->inline_data, ib_wr,
++ &info.op.inline_rdma_send.len);
++ if (ret) {
++ printf("err:zxdh_get_inline_data fail\n");
++ pthread_spin_unlock(&iwuqp->lock);
++ return -EINVAL;
++ }
++ info.op.inline_rdma_send.data =
++ iwuqp->inline_data;
++ if (ib_qp->qp_type == IBV_QPT_UD) {
++ struct zxdh_uah *ah =
++ container_of(ib_wr->wr.ud.ah,
++ struct zxdh_uah,
++ ibv_ah);
++ info.op.inline_rdma_send.ah_id =
++ ah->ah_id;
++ info.op.inline_rdma_send.qkey =
++ ib_wr->wr.ud.remote_qkey;
++ info.op.inline_rdma_send.dest_qp =
++ ib_wr->wr.ud.remote_qpn;
++ ret = zxdh_uk_ud_inline_send(
++ &iwuqp->qp, &info, false);
++ } else {
++ ret = zxdh_uk_rc_inline_send(
++ &iwuqp->qp, &info, false);
++ }
++ } else {
++ info.op.send.num_sges = ib_wr->num_sge;
++ info.op.send.sg_list =
++ (struct zxdh_sge *)ib_wr->sg_list;
++ if (ib_qp->qp_type == IBV_QPT_UD) {
++ struct zxdh_uah *ah =
++ container_of(ib_wr->wr.ud.ah,
++ struct zxdh_uah,
++ ibv_ah);
++
++ info.op.inline_rdma_send.ah_id =
++ ah->ah_id;
++ info.op.inline_rdma_send.qkey =
++ ib_wr->wr.ud.remote_qkey;
++ info.op.inline_rdma_send.dest_qp =
++ ib_wr->wr.ud.remote_qpn;
++ ret = zxdh_uk_ud_send(&iwuqp->qp, &info,
++ false);
++ } else {
++ ret = zxdh_uk_rc_send(&iwuqp->qp, &info,
++ false);
++ }
++ }
++ if (ret)
++ err = (ret == ZXDH_ERR_QP_TOOMANY_WRS_POSTED) ?
++ ENOMEM :
++ EINVAL;
++ break;
++ case IBV_WR_RDMA_WRITE_WITH_IMM:
++ if (iwuqp->qp.qp_caps & ZXDH_WRITE_WITH_IMM) {
++ info.imm_data_valid = true;
++ info.imm_data = ntohl(ib_wr->imm_data);
++ } else {
++ err = -EINVAL;
++ break;
++ }
++ SWITCH_FALLTHROUGH;
++ case IBV_WR_RDMA_WRITE:
++ if (ib_wr->send_flags & IBV_SEND_SOLICITED)
++ info.solicited = 1;
++
++ if (ib_wr->opcode == IBV_WR_RDMA_WRITE)
++ info.op_type = ZXDH_OP_TYPE_WRITE;
++ else
++ info.op_type = ZXDH_OP_TYPE_WRITE_WITH_IMM;
++
++ if ((ib_wr->send_flags & IBV_SEND_INLINE) &&
++ (ib_wr->num_sge != 0)) {
++ ret = zxdh_get_inline_data(
++ iwuqp->inline_data, ib_wr,
++ &info.op.inline_rdma_write.len);
++ if (ret) {
++ printf("err:zxdh_get_inline_data fail\n");
++ pthread_spin_unlock(&iwuqp->lock);
++ return -EINVAL;
++ }
++ info.op.inline_rdma_write.data =
++ iwuqp->inline_data;
++ info.op.inline_rdma_write.rem_addr.tag_off =
++ ib_wr->wr.rdma.remote_addr;
++ info.op.inline_rdma_write.rem_addr.stag =
++ ib_wr->wr.rdma.rkey;
++ ret = zxdh_uk_inline_rdma_write(&iwuqp->qp,
++ &info, false);
++ } else {
++ info.op.rdma_write.lo_sg_list =
++ (void *)ib_wr->sg_list;
++ info.op.rdma_write.num_lo_sges = ib_wr->num_sge;
++ info.op.rdma_write.rem_addr.tag_off =
++ ib_wr->wr.rdma.remote_addr;
++ info.op.rdma_write.rem_addr.stag =
++ ib_wr->wr.rdma.rkey;
++ ret = zxdh_uk_rdma_write(&iwuqp->qp, &info,
++ false);
++ }
++ if (ret)
++ err = (ret == ZXDH_ERR_QP_TOOMANY_WRS_POSTED) ?
++ ENOMEM :
++ EINVAL;
++ break;
++ case IBV_WR_RDMA_READ:
++ if (ib_wr->num_sge > uk_attrs->max_hw_read_sges) {
++ err = EINVAL;
++ break;
++ }
++ info.op_type = ZXDH_OP_TYPE_READ;
++ info.op.rdma_read.rem_addr.tag_off =
++ ib_wr->wr.rdma.remote_addr;
++ info.op.rdma_read.rem_addr.stag = ib_wr->wr.rdma.rkey;
++
++ info.op.rdma_read.lo_sg_list = (void *)ib_wr->sg_list;
++ info.op.rdma_read.num_lo_sges = ib_wr->num_sge;
++ ret = zxdh_uk_rdma_read(&iwuqp->qp, &info, false,
++ false);
++ if (ret)
++ err = (ret == ZXDH_ERR_QP_TOOMANY_WRS_POSTED) ?
++ ENOMEM :
++ EINVAL;
++ break;
++ case IBV_WR_BIND_MW:
++ vmr = verbs_get_mr(ib_wr->bind_mw.bind_info.mr);
++ umr = container_of(vmr, struct zxdh_umr, vmr);
++ mr_va = (uintptr_t)ib_wr->bind_mw.bind_info.mr->addr;
++ mw_va = ib_wr->bind_mw.bind_info.addr;
++ mr_offset = 0;
++ value_dffer = 0;
++ mw_pa_pble_index = 0;
++
++ if (ib_qp->qp_type != IBV_QPT_RC) {
++ err = EINVAL;
++ break;
++ }
++ info.op_type = ZXDH_OP_TYPE_BIND_MW;
++ info.op.bind_window.mr_stag =
++ ib_wr->bind_mw.bind_info.mr->rkey;
++
++ if (ib_wr->bind_mw.mw->type == IBV_MW_TYPE_1) {
++ info.op.bind_window.mem_window_type_1 = true;
++ info.op.bind_window.mw_stag =
++ ib_wr->bind_mw.rkey;
++ } else {
++ info.op.bind_window.mem_window_type_1 = false;
++ info.op.bind_window.mw_stag =
++ calc_type2_mw_stag(
++ ib_wr->bind_mw.rkey,
++ ib_wr->bind_mw.mw->rkey);
++ ib_wr->bind_mw.mw->rkey =
++ info.op.bind_window.mw_stag;
++ }
++
++ if (ib_wr->bind_mw.bind_info.mw_access_flags &
++ IBV_ACCESS_ZERO_BASED) {
++ info.op.bind_window.addressing_type =
++ ZXDH_ADDR_TYPE_ZERO_BASED;
++ if (ib_wr->bind_mw.mw->type == IBV_MW_TYPE_1) {
++ err = EINVAL;
++ break;
++ }
++
++ info.op.bind_window.addressing_type =
++ ZXDH_ADDR_TYPE_ZERO_BASED;
++ info.op.bind_window.host_page_size =
++ umr->host_page_size;
++ if (umr->host_page_size == ZXDH_PAGE_SIZE_4K) {
++ mr_offset = mr_va & 0x0fff;
++ value_dffer = mw_va - mr_va;
++ if (umr->leaf_pbl_size == 3) {
++ mw_pa_pble_index =
++ (mr_offset +
++ value_dffer) /
++ (4096 * 512);
++ info.op.bind_window
++ .mw_pa_pble_index =
++ umr->mr_pa_pble_index +
++ mw_pa_pble_index;
++ mw_pa_pble_index =
++ ((mr_offset +
++ value_dffer) /
++ 4096) %
++ 512;
++
++ info.op.bind_window
++ .root_leaf_offset =
++ (__u16)mw_pa_pble_index;
++ info.op.bind_window.va =
++ (void *)(uintptr_t)(mw_va &
++ 0x0fff);
++ info.op.bind_window
++ .leaf_pbl_size = 3;
++
++ } else if (umr->leaf_pbl_size == 1) {
++ mw_pa_pble_index =
++ (mr_offset +
++ value_dffer) /
++ 4096;
++ info.op.bind_window
++ .mw_pa_pble_index =
++ umr->mr_pa_pble_index +
++ mw_pa_pble_index;
++ info.op.bind_window
++ .leaf_pbl_size = 1;
++ info.op.bind_window.va =
++ (void *)(uintptr_t)(mw_va &
++ 0x0fff);
++ info.op.bind_window
++ .root_leaf_offset = 0;
++ } else {
++ mw_pa_pble_index =
++ umr->mr_pa_pble_index +
++ mr_offset + value_dffer;
++ info.op.bind_window.va =
++ (void *)(uintptr_t)(mw_va &
++ 0x0fff);
++ info.op.bind_window
++ .mw_pa_pble_index =
++ mw_pa_pble_index;
++ info.op.bind_window
++ .leaf_pbl_size = 0;
++ info.op.bind_window
++ .root_leaf_offset = 0;
++ }
++
++ } else if (umr->host_page_size ==
++ ZXDH_PAGE_SIZE_2M) {
++ mr_offset = mr_va & 0x1FFFFF;
++ value_dffer = mw_va - mr_va;
++ if (umr->leaf_pbl_size == 3) {
++ mw_pa_pble_index =
++ (mr_offset +
++ value_dffer) /
++ ((4096 * 512) * 512);
++ info.op.bind_window
++ .mw_pa_pble_index =
++ umr->mr_pa_pble_index +
++ mw_pa_pble_index;
++ mw_pa_pble_index =
++ ((mr_offset +
++ value_dffer) /
++ (4096 * 512)) %
++ 512;
++
++ info.op.bind_window
++ .root_leaf_offset =
++ (__u16)mw_pa_pble_index;
++ info.op.bind_window.va =
++ (void *)(uintptr_t)(mw_va &
++ 0x1FFFFF);
++ info.op.bind_window
++ .leaf_pbl_size = 3;
++
++ } else if (umr->leaf_pbl_size == 1) {
++ mw_pa_pble_index =
++ (mr_offset +
++ value_dffer) /
++ (4096 * 512);
++ info.op.bind_window
++ .mw_pa_pble_index =
++ umr->mr_pa_pble_index +
++ mw_pa_pble_index;
++ info.op.bind_window
++ .leaf_pbl_size = 1;
++ info.op.bind_window.va =
++ (void *)(uintptr_t)(mw_va &
++ 0x1FFFFF);
++ info.op.bind_window
++ .root_leaf_offset = 0;
++ } else {
++ mw_pa_pble_index =
++ umr->mr_pa_pble_index +
++ mr_offset + value_dffer;
++ info.op.bind_window.va =
++ (void *)(uintptr_t)(mw_va &
++ 0x1FFFFF);
++ info.op.bind_window
++ .mw_pa_pble_index =
++ mw_pa_pble_index;
++ info.op.bind_window
++ .leaf_pbl_size = 0;
++ info.op.bind_window
++ .root_leaf_offset = 0;
++ }
++ } else if (umr->host_page_size ==
++ ZXDH_PAGE_SIZE_1G) {
++ mr_offset = mr_va & 0x3FFFFFFF;
++ value_dffer = mw_va - mr_va;
++ if (umr->leaf_pbl_size == 1) {
++ mw_pa_pble_index =
++ (mr_offset +
++ value_dffer) /
++ (1024 * 1024 * 1024);
++ info.op.bind_window
++ .mw_pa_pble_index =
++ umr->mr_pa_pble_index +
++ mw_pa_pble_index;
++ info.op.bind_window
++ .leaf_pbl_size = 1;
++ info.op.bind_window.va =
++ (void *)(uintptr_t)(mw_va &
++ 0x3FFFFFFF);
++ info.op.bind_window
++ .root_leaf_offset = 0;
++ } else if (umr->leaf_pbl_size == 0) {
++ mw_pa_pble_index =
++ umr->mr_pa_pble_index +
++ mr_offset + value_dffer;
++ info.op.bind_window.va =
++ (void *)(uintptr_t)(mw_va &
++ 0x3FFFFFFF);
++ info.op.bind_window
++ .mw_pa_pble_index =
++ mw_pa_pble_index;
++ info.op.bind_window
++ .leaf_pbl_size = 0;
++ info.op.bind_window
++ .root_leaf_offset = 0;
++ }
++ }
++
++ } else {
++ info.op.bind_window.addressing_type =
++ ZXDH_ADDR_TYPE_VA_BASED;
++ info.op.bind_window.va =
++ (void *)(uintptr_t)
++ ib_wr->bind_mw.bind_info.addr;
++ info.op.bind_window.host_page_size =
++ umr->host_page_size;
++
++ if (umr->host_page_size == ZXDH_PAGE_SIZE_4K) {
++ mr_offset = mr_va & 0x0fff;
++ value_dffer = mw_va - mr_va;
++ if (umr->leaf_pbl_size == 3) {
++ mw_pa_pble_index =
++ (mr_offset +
++ value_dffer) /
++ (4096 * 512);
++ info.op.bind_window
++ .mw_pa_pble_index =
++ umr->mr_pa_pble_index +
++ mw_pa_pble_index;
++ mw_pa_pble_index =
++ ((mr_offset +
++ value_dffer) /
++ 4096) %
++ 512;
++ info.op.bind_window
++ .root_leaf_offset =
++ (__u16)mw_pa_pble_index;
++ info.op.bind_window
++ .leaf_pbl_size = 3;
++ } else if (umr->leaf_pbl_size == 1) {
++ info.op.bind_window
++ .mw_pa_pble_index =
++ umr->mr_pa_pble_index +
++ ((mr_offset +
++ value_dffer) /
++ 4096);
++ info.op.bind_window
++ .leaf_pbl_size = 1;
++ info.op.bind_window
++ .root_leaf_offset = 0;
++ } else {
++ info.op.bind_window
++ .leaf_pbl_size = 0;
++ info.op.bind_window
++ .mw_pa_pble_index =
++ umr->mr_pa_pble_index +
++ (mr_va & 0x0fff) +
++ (mw_va - mr_va);
++ info.op.bind_window
++ .root_leaf_offset = 0;
++ }
++ } else if (umr->host_page_size ==
++ ZXDH_PAGE_SIZE_2M) {
++ mr_offset = mr_va & 0x1FFFFF;
++ value_dffer = mw_va - mr_va;
++ if (umr->leaf_pbl_size == 3) {
++ mw_pa_pble_index =
++ (mr_offset +
++ value_dffer) /
++ ((4096 * 512) * 512);
++ info.op.bind_window
++ .mw_pa_pble_index =
++ umr->mr_pa_pble_index +
++ mw_pa_pble_index;
++ mw_pa_pble_index =
++ ((mr_offset +
++ value_dffer) /
++ (4096 * 512)) %
++ 512;
++ info.op.bind_window
++ .root_leaf_offset =
++ (__u16)mw_pa_pble_index;
++ info.op.bind_window
++ .leaf_pbl_size = 3;
++ } else if (umr->leaf_pbl_size == 1) {
++ info.op.bind_window
++ .mw_pa_pble_index =
++ umr->mr_pa_pble_index +
++ ((mr_offset +
++ value_dffer) /
++ (4096 * 512));
++ info.op.bind_window
++ .leaf_pbl_size = 1;
++ info.op.bind_window
++ .root_leaf_offset = 0;
++ } else {
++ info.op.bind_window
++ .leaf_pbl_size = 0;
++ info.op.bind_window
++ .mw_pa_pble_index =
++ umr->mr_pa_pble_index +
++ (mr_va & 0x1FFFFF) +
++ (mw_va - mr_va);
++ info.op.bind_window
++ .root_leaf_offset = 0;
++ }
++ } else if (umr->host_page_size ==
++ ZXDH_PAGE_SIZE_1G) {
++ mr_offset = mr_va & 0x3FFFFFFF;
++ value_dffer = mw_va - mr_va;
++ if (umr->leaf_pbl_size == 1) {
++ info.op.bind_window
++ .mw_pa_pble_index =
++ umr->mr_pa_pble_index +
++ ((mr_offset +
++ value_dffer) /
++ (1024 * 1024 * 1024));
++ info.op.bind_window
++ .leaf_pbl_size = 1;
++ info.op.bind_window
++ .root_leaf_offset = 0;
++ } else if (umr->leaf_pbl_size == 0) {
++ info.op.bind_window
++ .leaf_pbl_size = 0;
++ info.op.bind_window
++ .mw_pa_pble_index =
++ umr->mr_pa_pble_index +
++ (mr_va & 0x3FFFFFFF) +
++ (mw_va - mr_va);
++ info.op.bind_window
++ .root_leaf_offset = 0;
++ }
++ }
++ }
++
++ info.op.bind_window.bind_len =
++ ib_wr->bind_mw.bind_info.length;
++ info.op.bind_window.ena_reads =
++ (ib_wr->bind_mw.bind_info.mw_access_flags &
++ IBV_ACCESS_REMOTE_READ) ?
++ 1 :
++ 0;
++ info.op.bind_window.ena_writes =
++ (ib_wr->bind_mw.bind_info.mw_access_flags &
++ IBV_ACCESS_REMOTE_WRITE) ?
++ 1 :
++ 0;
++
++ ret = zxdh_uk_mw_bind(&iwuqp->qp, &info, false);
++ if (ret)
++ err = (ret == ZXDH_ERR_QP_TOOMANY_WRS_POSTED) ?
++ ENOMEM :
++ EINVAL;
++ break;
++ case IBV_WR_LOCAL_INV:
++ info.op_type = ZXDH_OP_TYPE_LOCAL_INV;
++ info.op.inv_local_stag.target_stag =
++ ib_wr->invalidate_rkey;
++ ret = zxdh_uk_stag_local_invalidate(&iwuqp->qp, &info,
++ true);
++ if (ret)
++ err = (ret == ZXDH_ERR_QP_TOOMANY_WRS_POSTED) ?
++ ENOMEM :
++ EINVAL;
++ break;
++ default:
++ /* error */
++ err = EINVAL;
++ break;
++ }
++ if (err)
++ break;
++
++ ib_wr = ib_wr->next;
++ }
++
++ if (err)
++ *bad_wr = ib_wr;
++
++ zxdh_uk_qp_post_wr(&iwuqp->qp);
++ if (reflush)
++ zxdh_issue_flush(ib_qp, 1, 0);
++
++ pthread_spin_unlock(&iwuqp->lock);
++
++ return err;
++}
++
++/**
++ * zxdh_post_recv - post receive wr for user application
++ * @ib_wr: work request for receive
++ * @bad_wr: bad wr caused an error
++ */
++int zxdh_upost_recv(struct ibv_qp *ib_qp, struct ibv_recv_wr *ib_wr,
++ struct ibv_recv_wr **bad_wr)
++{
++ struct zxdh_post_rq_info post_recv = {};
++ enum zxdh_status_code ret = 0;
++ struct zxdh_sge *sg_list;
++ struct zxdh_uqp *iwuqp;
++ bool reflush = false;
++ int err = 0;
++
++ iwuqp = container_of(ib_qp, struct zxdh_uqp, vqp.qp);
++ sg_list = iwuqp->recv_sges;
++
++ if (unlikely(ib_qp->state == IBV_QPS_RESET || ib_qp->srq)) {
++ *bad_wr = ib_wr;
++ printf("err:post recv at reset or using srq\n");
++ return -EINVAL;
++ }
++
++ err = pthread_spin_lock(&iwuqp->lock);
++ if (err)
++ return err;
++
++ if (unlikely(!ZXDH_RING_MORE_WORK(iwuqp->qp.rq_ring)) &&
++ ib_qp->state == IBV_QPS_ERR)
++ reflush = true;
++
++ while (ib_wr) {
++ if (unlikely(ib_wr->num_sge > iwuqp->qp.max_rq_frag_cnt)) {
++ *bad_wr = ib_wr;
++ err = EINVAL;
++ goto error;
++ }
++ post_recv.num_sges = ib_wr->num_sge;
++ post_recv.wr_id = ib_wr->wr_id;
++ zxdh_copy_sg_list(sg_list, ib_wr->sg_list, ib_wr->num_sge);
++ post_recv.sg_list = sg_list;
++ ret = zxdh_uk_post_receive(&iwuqp->qp, &post_recv);
++ if (unlikely(ret)) {
++ err = (ret == ZXDH_ERR_QP_TOOMANY_WRS_POSTED) ? ENOMEM :
++ EINVAL;
++ *bad_wr = ib_wr;
++ goto error;
++ }
++
++ if (reflush)
++ zxdh_issue_flush(ib_qp, 0, 1);
++
++ ib_wr = ib_wr->next;
++ }
++error:
++ zxdh_uk_qp_set_shadow_area(&iwuqp->qp);
++ pthread_spin_unlock(&iwuqp->lock);
++
++ return err;
++}
++
++/**
++ * zxdh_ucreate_ah - create address handle associated with a pd
++ * @ibpd: pd for the address handle
++ * @attr: attributes of address handle
++ */
++struct ibv_ah *zxdh_ucreate_ah(struct ibv_pd *ibpd, struct ibv_ah_attr *attr)
++{
++ struct zxdh_uah *ah;
++ union ibv_gid sgid;
++ struct zxdh_ucreate_ah_resp resp;
++ int err;
++
++ err = ibv_query_gid(ibpd->context, attr->port_num, attr->grh.sgid_index,
++ &sgid);
++ if (err) {
++ errno = err;
++ return NULL;
++ }
++
++ ah = calloc(1, sizeof(*ah));
++ if (!ah)
++ return NULL;
++
++ err = ibv_cmd_create_ah(ibpd, &ah->ibv_ah, attr, &resp.ibv_resp,
++ sizeof(resp));
++ if (err) {
++ free(ah);
++ errno = err;
++ return NULL;
++ }
++
++ ah->ah_id = resp.ah_id;
++
++ return &ah->ibv_ah;
++}
++
++/**
++ * zxdh_udestroy_ah - destroy the address handle
++ * @ibah: address handle
++ */
++int zxdh_udestroy_ah(struct ibv_ah *ibah)
++{
++ struct zxdh_uah *ah;
++ int ret;
++
++ ah = container_of(ibah, struct zxdh_uah, ibv_ah);
++
++ ret = ibv_cmd_destroy_ah(ibah);
++ if (ret)
++ return ret;
++
++ free(ah);
++
++ return 0;
++}
++
++/**
++ * zxdh_uattach_mcast - Attach qp to multicast group implemented
++ * @qp: The queue pair
++ * @gid:The Global ID for multicast group
++ * @lid: The Local ID
++ */
++int zxdh_uattach_mcast(struct ibv_qp *qp, const union ibv_gid *gid,
++ uint16_t lid)
++{
++ return ibv_cmd_attach_mcast(qp, gid, lid);
++}
++
++/**
++ * zxdh_udetach_mcast - Detach qp from multicast group
++ * @qp: The queue pair
++ * @gid:The Global ID for multicast group
++ * @lid: The Local ID
++ */
++int zxdh_udetach_mcast(struct ibv_qp *qp, const union ibv_gid *gid,
++ uint16_t lid)
++{
++ return ibv_cmd_detach_mcast(qp, gid, lid);
++}
++
++/**
++ * zxdh_uresize_cq - resizes a cq
++ * @cq: cq to resize
++ * @cqe: the number of cqes of the new cq
++ */
++int zxdh_uresize_cq(struct ibv_cq *cq, int cqe)
++{
++ struct zxdh_uvcontext *iwvctx;
++ struct zxdh_uk_attrs *uk_attrs;
++ struct zxdh_uresize_cq cmd = {};
++ struct ib_uverbs_resize_cq_resp resp = {};
++ struct zxdh_ureg_mr reg_mr_cmd = {};
++ struct ib_uverbs_reg_mr_resp reg_mr_resp = {};
++ struct zxdh_cq_buf *cq_buf = NULL;
++ struct zxdh_cqe *cq_base = NULL;
++ struct verbs_mr new_mr = {};
++ struct zxdh_ucq *iwucq;
++ size_t cq_size;
++ __u32 cq_pages;
++ int cqe_needed;
++ int ret = 0;
++
++ iwucq = container_of(cq, struct zxdh_ucq, verbs_cq.cq);
++ iwvctx = container_of(cq->context, struct zxdh_uvcontext,
++ ibv_ctx.context);
++ uk_attrs = &iwvctx->uk_attrs;
++
++ if (!(uk_attrs->feature_flags & ZXDH_FEATURE_CQ_RESIZE))
++ return -EOPNOTSUPP;
++
++ if (cqe > ZXDH_MAX_CQ_SIZE)
++ return -EINVAL;
++
++ cqe_needed = zxdh_cq_round_up(cqe + 1);
++
++ if (cqe_needed < ZXDH_U_MINCQ_SIZE)
++ cqe_needed = ZXDH_U_MINCQ_SIZE;
++
++ if (cqe_needed == iwucq->cq.cq_size)
++ return 0;
++
++ cq_size = get_cq_total_bytes(cqe_needed);
++ cq_pages = cq_size >> ZXDH_HW_PAGE_SHIFT;
++ cq_base = zxdh_alloc_hw_buf(cq_size);
++ if (!cq_base)
++ return -ENOMEM;
++
++ memset(cq_base, 0, cq_size);
++
++ cq_buf = malloc(sizeof(*cq_buf));
++ if (!cq_buf) {
++ ret = -ENOMEM;
++ goto err_buf;
++ }
++
++ new_mr.ibv_mr.pd = iwucq->vmr.ibv_mr.pd;
++ reg_mr_cmd.reg_type = ZXDH_MEMREG_TYPE_CQ;
++ reg_mr_cmd.cq_pages = cq_pages;
++
++ ret = ibv_cmd_reg_mr(new_mr.ibv_mr.pd, cq_base, cq_size,
++ (uintptr_t)cq_base, IBV_ACCESS_LOCAL_WRITE,
++ &new_mr, ®_mr_cmd.ibv_cmd, sizeof(reg_mr_cmd),
++ ®_mr_resp, sizeof(reg_mr_resp));
++ if (ret)
++ goto err_dereg_mr;
++
++ ret = pthread_spin_lock(&iwucq->lock);
++ if (ret)
++ goto err_lock;
++
++ cmd.user_cq_buffer = (__u64)((uintptr_t)cq_base);
++ ret = ibv_cmd_resize_cq(&iwucq->verbs_cq.cq, cqe_needed, &cmd.ibv_cmd,
++ sizeof(cmd), &resp, sizeof(resp));
++ if (ret)
++ goto err_resize;
++
++ memcpy(&cq_buf->cq, &iwucq->cq, sizeof(cq_buf->cq));
++ cq_buf->vmr = iwucq->vmr;
++ iwucq->vmr = new_mr;
++ zxdh_uk_cq_resize(&iwucq->cq, cq_base, cqe_needed);
++ iwucq->verbs_cq.cq.cqe = cqe;
++ list_add_tail(&iwucq->resize_list, &cq_buf->list);
++ iwucq->resize_enable = true;
++ pthread_spin_unlock(&iwucq->lock);
++
++ return ret;
++
++err_resize:
++ pthread_spin_unlock(&iwucq->lock);
++err_lock:
++ ibv_cmd_dereg_mr(&new_mr);
++err_dereg_mr:
++ free(cq_buf);
++err_buf:
++ zxdh_free_hw_buf(cq_base, cq_size);
++ return ret;
++}
++
++static void zxdh_srq_wqe_init(struct zxdh_usrq *iwusrq)
++{
++ uint32_t i;
++ struct zxdh_srq_uk *srq_uk;
++ __le64 *wqe;
++ __u64 hdr;
++
++ srq_uk = &iwusrq->srq;
++ zxdh_dbg(ZXDH_DBG_SRQ, "%s head:%d tail:%d\n", __func__,
++ srq_uk->srq_ring.head, srq_uk->srq_ring.tail);
++ for (i = srq_uk->srq_ring.head; i < srq_uk->srq_ring.tail; i++) {
++ wqe = zxdh_get_srq_wqe(srq_uk, i);
++
++ hdr = FIELD_PREP(ZXDHQPSRQ_NEXT_WQE_INDEX, (uint32_t)(i + 1));
++
++ udma_to_device_barrier(); /* make sure WQE is populated before valid bit is set */
++ set_64bit_val(wqe, 0, hdr);
++ }
++}
++
++static size_t zxdh_get_srq_queue_size(int srqdepth)
++{
++ return roundup(srqdepth * ZXDH_SRQ_WQE_MIN_SIZE, ZXDH_HW_PAGE_SIZE);
++}
++
++static size_t zxdh_get_srq_list_size(size_t srq_size)
++{
++ return roundup(srq_size * sizeof(__u16), ZXDH_HW_PAGE_SIZE);
++}
++
++static size_t zxdh_get_srq_db_size(void)
++{
++ return 8 * sizeof(char);
++}
++
++static size_t zxdh_get_total_srq_size(struct zxdh_usrq *iwusrq, int srqdepth,
++ size_t srq_size)
++{
++ size_t total_srq_queue_size;
++ size_t total_srq_list_size;
++ size_t total_srq_db_size;
++ size_t total_srq_size;
++
++ total_srq_queue_size = zxdh_get_srq_queue_size(srqdepth);
++ iwusrq->buf_size = total_srq_queue_size;
++ total_srq_list_size = zxdh_get_srq_list_size(srq_size);
++ iwusrq->list_buf_size = total_srq_list_size;
++ total_srq_db_size = zxdh_get_srq_db_size();
++ iwusrq->db_buf_size = total_srq_db_size;
++ total_srq_size =
++ total_srq_queue_size + total_srq_list_size + total_srq_db_size;
++ iwusrq->total_buf_size = total_srq_size;
++ zxdh_dbg(
++ ZXDH_DBG_SRQ,
++ "%s total_srq_queue_size:%ld total_srq_list_size:%ld total_srq_db_size:%ld srqdepth:%d\n",
++ __func__, total_srq_queue_size, total_srq_list_size,
++ total_srq_db_size, srqdepth);
++
++ return total_srq_size;
++}
++
++static int zxdh_alloc_srq_buf(struct zxdh_usrq *iwusrq,
++ struct zxdh_srq_uk_init_info *info,
++ size_t total_srq_size)
++{
++ info->srq_base = zxdh_alloc_hw_buf(total_srq_size);
++ if (!info->srq_base)
++ return -ENOMEM;
++ memset(info->srq_base, 0, total_srq_size);
++ info->srq_list_base =
++ (__le16 *)&info
++ ->srq_base[iwusrq->buf_size / ZXDH_SRQ_WQE_MIN_SIZE];
++ info->srq_db_base =
++ (__le64 *)&info->srq_list_base[iwusrq->list_buf_size /
++ (sizeof(__u16))];
++ *(__le64 *)info->srq_db_base = ZXDH_SRQ_DB_INIT_VALUE;
++ zxdh_dbg(ZXDH_DBG_SRQ,
++ "%s srq_base:0x%p srq_list_base:0x%p srq_db_base:0x%p\n",
++ __func__, info->srq_base, info->srq_list_base,
++ info->srq_db_base);
++ return 0;
++}
++
++static int zxdh_reg_srq_mr(struct ibv_pd *pd,
++ struct zxdh_srq_uk_init_info *info,
++ size_t total_srq_size, uint16_t srq_pages,
++ uint16_t srq_list_pages, struct zxdh_usrq *iwusrq)
++{
++ struct zxdh_ureg_mr reg_mr_cmd = {};
++ struct ib_uverbs_reg_mr_resp reg_mr_resp = {};
++ int ret;
++
++ reg_mr_cmd.reg_type = ZXDH_MEMREG_TYPE_SRQ;
++ reg_mr_cmd.srq_pages = srq_pages;
++ reg_mr_cmd.srq_list_pages = srq_list_pages;
++ ret = ibv_cmd_reg_mr(pd, info->srq_base, total_srq_size,
++ (uintptr_t)info->srq_base, IBV_ACCESS_LOCAL_WRITE,
++ &iwusrq->vmr, ®_mr_cmd.ibv_cmd,
++ sizeof(reg_mr_cmd), ®_mr_resp,
++ sizeof(reg_mr_resp));
++ if (ret)
++ return ret;
++
++ return 0;
++}
++
++static int create_srq(struct ibv_pd *pd, struct zxdh_usrq *iwusrq,
++ struct ibv_srq_init_attr *attr,
++ struct zxdh_srq_uk_init_info *info)
++{
++ struct zxdh_ucreate_srq cmd = {};
++ struct zxdh_ucreate_srq_resp resp = {};
++ int ret;
++
++ cmd.user_wqe_bufs = (__u64)((uintptr_t)info->srq_base);
++ cmd.user_compl_ctx = (__u64)(uintptr_t)&iwusrq->srq;
++ cmd.user_wqe_list = (__u64)((uintptr_t)info->srq_list_base);
++ cmd.user_wqe_db = (__u64)((uintptr_t)info->srq_db_base);
++ ret = ibv_cmd_create_srq(pd, &iwusrq->ibv_srq, attr, &cmd.ibv_cmd,
++ sizeof(cmd), &resp.ibv_resp,
++ sizeof(struct zxdh_ucreate_srq_resp));
++ if (ret)
++ return ret;
++
++ iwusrq->srq_id = resp.srq_id;
++ info->srq_id = resp.srq_id;
++ info->srq_size = resp.actual_srq_size;
++ info->srq_list_size = resp.actual_srq_list_size;
++ zxdh_dbg(
++ ZXDH_DBG_SRQ,
++ "%s info->srq_id:%d info->srq_size:%d info->srq_list_size:%d\n",
++ __func__, info->srq_id, info->srq_size, info->srq_list_size);
++
++ return 0;
++}
++
++/**
++ * zxdh_vmapped_srq - create resources for srq
++ * @iwusrq: srq struct for resources
++ * @pd: pd for the srq
++ * @attr: attributes of srq passed
++ * @resp: response back from create srq
++ * @srqdepth: depth of sq
++ * @info: info for initializing user level srq
++ */
++static int zxdh_vmapped_srq(struct zxdh_usrq *iwusrq, struct ibv_pd *pd,
++ struct ibv_srq_init_attr *attr, int srqdepth,
++ struct zxdh_srq_uk_init_info *info)
++{
++ size_t total_srq_size;
++ size_t srq_pages = 0;
++ size_t srq_list_pages = 0;
++ int ret;
++
++ total_srq_size =
++ zxdh_get_total_srq_size(iwusrq, srqdepth, info->srq_size);
++ srq_pages = iwusrq->buf_size >> ZXDH_HW_PAGE_SHIFT;
++ srq_list_pages = iwusrq->list_buf_size >> ZXDH_HW_PAGE_SHIFT;
++ ret = zxdh_alloc_srq_buf(iwusrq, info, total_srq_size);
++ if (ret)
++ return -ENOMEM;
++ zxdh_dbg(ZXDH_DBG_SRQ, "%s srq_pages:%ld srq_list_pages:%ld\n",
++ __func__, srq_pages, srq_list_pages);
++
++ ret = zxdh_reg_srq_mr(pd, info, total_srq_size, srq_pages,
++ srq_list_pages, iwusrq);
++ zxdh_dbg(ZXDH_DBG_SRQ, "%s %d ret:%d\n", __func__, __LINE__, ret);
++ if (ret) {
++ errno = ret;
++ goto err_dereg_srq_mr;
++ }
++ ret = create_srq(pd, iwusrq, attr, info);
++ zxdh_dbg(ZXDH_DBG_SRQ, "%s %d ret:%d\n", __func__, __LINE__, ret);
++ if (ret)
++ goto err_srq;
++
++ return 0;
++err_srq:
++ ibv_cmd_dereg_mr(&iwusrq->vmr);
++err_dereg_srq_mr:
++ zxdh_free_hw_buf(info->srq_base, total_srq_size);
++
++ return ret;
++}
++
++/**
++ * zxdh_destroy_vmapped_srq - destroy resources for srq
++ * @iwusrq: srq struct for resources
++ */
++static int zxdh_destroy_vmapped_srq(struct zxdh_usrq *iwusrq)
++{
++ int ret;
++
++ ret = ibv_cmd_destroy_srq(&iwusrq->ibv_srq);
++ if (ret)
++ return ret;
++
++ ibv_cmd_dereg_mr(&iwusrq->vmr);
++ return 0;
++}
++
++static int zxdh_check_srq_init_attr(struct ibv_srq_init_attr *srq_init_attr,
++ struct zxdh_uk_attrs *uk_attrs)
++{
++ if ((srq_init_attr->attr.srq_limit > srq_init_attr->attr.max_wr) ||
++ (srq_init_attr->attr.max_sge > uk_attrs->max_hw_wq_frags) ||
++ (srq_init_attr->attr.max_wr > uk_attrs->max_hw_srq_wr)) {
++ return 1;
++ }
++ return 0;
++}
++
++static int zxdh_init_iwusrq(struct zxdh_usrq *iwusrq,
++ struct ibv_srq_init_attr *srq_init_attr,
++ __u32 srqdepth, __u8 srqshift,
++ struct zxdh_srq_uk_init_info *info,
++ struct zxdh_uk_attrs *uk_attrs)
++{
++ info->srq_size = srqdepth >> srqshift;
++ iwusrq->max_wr = info->srq_size;
++ iwusrq->max_sge = srq_init_attr->attr.max_sge;
++ iwusrq->srq_limit = srq_init_attr->attr.srq_limit;
++
++ srq_init_attr->attr.max_wr = info->srq_size;
++ info->uk_attrs = uk_attrs;
++ info->max_srq_frag_cnt = srq_init_attr->attr.max_sge;
++ info->srq_wrid_array =
++ calloc(info->srq_size, sizeof(*info->srq_wrid_array));
++ if (info->srq_wrid_array == NULL)
++ return 1;
++
++ return 0;
++}
++
++/**
++ * zxdh_ucreate_srq - create srq on user app
++ * @pd: pd for the srq
++ * @srq_init_attr: attributes of the srq to be created (sizes, sge)
++ */
++struct ibv_srq *zxdh_ucreate_srq(struct ibv_pd *pd,
++ struct ibv_srq_init_attr *srq_init_attr)
++{
++ struct zxdh_srq_uk_init_info info = {};
++ struct zxdh_uk_attrs *uk_attrs;
++ struct zxdh_uvcontext *iwvctx;
++ __u32 srqdepth;
++ __u8 srqshift;
++ int status;
++ int ret;
++ struct zxdh_usrq *iwusrq;
++
++ iwvctx = container_of(pd->context, struct zxdh_uvcontext,
++ ibv_ctx.context);
++ uk_attrs = &iwvctx->uk_attrs;
++
++ if ((zxdh_check_srq_init_attr(srq_init_attr, uk_attrs)) != 0) {
++ zxdh_dbg(ZXDH_DBG_SRQ, "zxdh_check_srq_init_attr failed\n");
++ errno = EINVAL;
++ return NULL;
++ }
++
++ /* get shift count for maximum wqe size */
++ zxdh_get_srq_wqe_shift(uk_attrs, srq_init_attr->attr.max_sge,
++ &srqshift);
++
++ /* get RQ/SRQ depth (quanta),minimum number of units in srq */
++ status = zxdh_get_srqdepth(uk_attrs->max_hw_srq_quanta,
++ srq_init_attr->attr.max_wr, srqshift,
++ &srqdepth);
++ zxdh_dbg(
++ ZXDH_DBG_SRQ,
++ "%s %d status:%d srqshift:%d srqdepth:%d uk_attrs->max_hw_srq_quanta:%d srq_init_attr->attr.max_wr:%d\n",
++ __func__, __LINE__, status, srqshift, srqdepth,
++ uk_attrs->max_hw_srq_quanta, srq_init_attr->attr.max_wr);
++ if (status != 0) {
++ zxdh_dbg(ZXDH_DBG_SRQ, "zxdh_get_srqdepth failed\n");
++ errno = EINVAL;
++ return NULL;
++ }
++ iwusrq = memalign(1024, sizeof(*iwusrq));
++ if (!iwusrq)
++ return NULL;
++ memset(iwusrq, 0, sizeof(*iwusrq));
++ if (pthread_spin_init(&iwusrq->lock, PTHREAD_PROCESS_PRIVATE) != 0)
++ goto err_free_srq;
++
++ if (zxdh_init_iwusrq(iwusrq, srq_init_attr, srqdepth, srqshift, &info,
++ uk_attrs)) {
++ zxdh_dbg(ZXDH_DBG_SRQ, "calloc srq_wrid_array failed\n");
++ goto err_srq_wrid_array;
++ }
++ status = zxdh_vmapped_srq(iwusrq, pd, srq_init_attr, srqdepth, &info);
++ zxdh_dbg(ZXDH_DBG_SRQ, "%s %d status:%d\n", __func__, __LINE__, status);
++ if (status) {
++ zxdh_dbg(ZXDH_DBG_SRQ, "zxdh_vmapped_srq failed\n");
++ errno = status;
++ goto err_vmapped_srq;
++ }
++
++ status = zxdh_uk_srq_init(&iwusrq->srq, &info);
++ zxdh_dbg(ZXDH_DBG_SRQ, "%s %d status:%d\n", __func__, __LINE__, status);
++ if (status) {
++ zxdh_dbg(ZXDH_DBG_SRQ, "zxdh_uk_srq_init failed\n");
++ errno = EINVAL;
++ goto err_free_srq_init;
++ }
++ zxdh_srq_wqe_init(iwusrq);
++
++ srq_init_attr->attr.max_wr = (srqdepth - ZXDH_SRQ_RSVD) >> srqshift;
++
++ zxdh_dbg(ZXDH_DBG_SRQ, "%s iwusrq->srq_id:%d info.srq_size:%d\n",
++ __func__, iwusrq->srq_id, info.srq_size);
++ return &iwusrq->ibv_srq;
++
++err_free_srq_init:
++ zxdh_destroy_vmapped_srq(iwusrq);
++ zxdh_free_hw_buf(info.srq_base, iwusrq->total_buf_size);
++err_vmapped_srq:
++ free(info.srq_wrid_array);
++err_srq_wrid_array:
++ ret = pthread_spin_destroy(&iwusrq->lock);
++ if (ret)
++ errno = EINVAL;
++err_free_srq:
++ free(iwusrq);
++ zxdh_dbg(ZXDH_DBG_SRQ, "%s %d\n", __func__, __LINE__);
++ return NULL;
++}
++
++/**
++ * zxdh_udestroy_srq - destroy srq on user app
++ * @srq: srq to destroy
++ */
++int zxdh_udestroy_srq(struct ibv_srq *srq)
++{
++ struct zxdh_usrq *iwusrq;
++ int ret;
++
++ iwusrq = container_of(srq, struct zxdh_usrq, ibv_srq);
++ ret = pthread_spin_destroy(&iwusrq->lock);
++ if (ret)
++ goto err;
++
++ ret = zxdh_destroy_vmapped_srq(iwusrq);
++ if (ret)
++ goto err;
++ zxdh_dbg(ZXDH_DBG_SRQ, "%s iwusrq->srq_id:%d\n", __func__,
++ iwusrq->srq_id);
++ zxdh_free_hw_buf(iwusrq->srq.srq_base, iwusrq->total_buf_size);
++ free(iwusrq->srq.srq_wrid_array);
++ free(iwusrq);
++
++ return 0;
++
++err:
++ return ret;
++}
++
++/**
++ * zxdh_umodify_srq - modify srq on user app
++ * @srq: srq to destroy
++ */
++int zxdh_umodify_srq(struct ibv_srq *srq, struct ibv_srq_attr *srq_attr,
++ int srq_attr_mask)
++{
++ struct ibv_modify_srq cmd;
++ struct zxdh_usrq *iwusrq;
++ int ret;
++
++ iwusrq = container_of(srq, struct zxdh_usrq, ibv_srq);
++ ret = ibv_cmd_modify_srq(srq, srq_attr, srq_attr_mask, &cmd,
++ sizeof(cmd));
++ if (ret == 0)
++ iwusrq->srq_limit = srq_attr->srq_limit;
++ zxdh_dbg(ZXDH_DBG_SRQ, "%s iwusrq->srq_id:%d srq_attr->srq_limit:%d\n",
++ __func__, iwusrq->srq_id, srq_attr->srq_limit);
++ return ret;
++}
++
++/**
++ * zxdh_uquery_srq - query srq on user app
++ * @srq: srq to query
++ * @srq_attr: attributes of the srq to be query
++ */
++int zxdh_uquery_srq(struct ibv_srq *srq, struct ibv_srq_attr *srq_attr)
++{
++ struct ibv_query_srq cmd;
++
++ return ibv_cmd_query_srq(srq, srq_attr, &cmd, sizeof(cmd));
++}
++
++static int zxdh_check_srq_valid(struct ibv_recv_wr *recv_wr,
++ struct zxdh_usrq *iwusrq,
++ struct zxdh_srq_uk *srq_uk)
++{
++ if (unlikely(recv_wr->num_sge > iwusrq->max_sge))
++ return -EINVAL;
++
++ if (unlikely(srq_uk->srq_ring.head == srq_uk->srq_ring.tail))
++ return -ENOMEM;
++
++ return 0;
++}
++
++static void zxdh_fill_srq_wqe(struct zxdh_usrq *iwusrq,
++ struct zxdh_srq_uk *srq_uk, __le64 *wqe_64,
++ struct ibv_recv_wr *recv_wr)
++{
++ __u32 byte_off;
++ int i;
++
++ for (i = 0, byte_off = ZXDH_SRQ_FRAG_BYTESIZE;
++ i < recv_wr->num_sge &&
++ byte_off + ZXDH_SRQ_FRAG_BYTESIZE < UINT32_MAX;
++ i++) {
++ set_64bit_val(wqe_64, byte_off, recv_wr->sg_list[i].addr);
++ set_64bit_val(wqe_64, byte_off + 8,
++ FIELD_PREP(ZXDHQPSRQ_FRAG_LEN,
++ recv_wr->sg_list[i].length) |
++ FIELD_PREP(ZXDHQPSRQ_FRAG_STAG,
++ recv_wr->sg_list[i].lkey));
++ byte_off += ZXDH_SRQ_FRAG_BYTESIZE;
++ }
++
++ if ((recv_wr->num_sge < iwusrq->max_sge) || (recv_wr->num_sge == 0)) {
++ set_64bit_val(wqe_64, byte_off, 0);
++ set_64bit_val(wqe_64, byte_off + 8,
++ FIELD_PREP(ZXDHQPSRQ_FRAG_LEN, 0) |
++ FIELD_PREP(ZXDHQPSRQ_FRAG_STAG,
++ ZXDH_SRQ_INVALID_LKEY));
++ }
++
++ set_64bit_val(wqe_64, 8, ((uint64_t)iwusrq->srq_id) << 32);
++
++ __u64 hdr = FIELD_PREP(ZXDHQPSRQ_RSV, 0) |
++ FIELD_PREP(ZXDHQPSRQ_VALID_SGE_NUM, recv_wr->num_sge) |
++ FIELD_PREP(ZXDHQPSRQ_SIGNATURE, 0) |
++ FIELD_PREP(ZXDHQPSRQ_NEXT_WQE_INDEX, srq_uk->srq_ring.head);
++
++ udma_to_device_barrier(); /* make sure WQE is populated before valid bit is set */
++ set_64bit_val(wqe_64, 0, hdr);
++
++ zxdh_dbg(ZXDH_DBG_SRQ, "%s wqe_64[0]:0x%llx\n", __func__, wqe_64[0]);
++ zxdh_dbg(ZXDH_DBG_SRQ, "%s wqe_64[1]:0x%llx\n", __func__, wqe_64[1]);
++ zxdh_dbg(ZXDH_DBG_SRQ, "%s wqe_64[2]:0x%llx\n", __func__, wqe_64[2]);
++ zxdh_dbg(ZXDH_DBG_SRQ, "%s wqe_64[3]:0x%llx\n", __func__, wqe_64[3]);
++ zxdh_dbg(ZXDH_DBG_SRQ, "%s wqe_64[4]:0x%llx\n", __func__, wqe_64[4]);
++ zxdh_dbg(ZXDH_DBG_SRQ, "%s wqe_64[5]:0x%llx\n", __func__, wqe_64[5]);
++}
++
++static void zxdh_get_wqe_index(struct zxdh_srq_uk *srq_uk, __le16 *wqe_16,
++ __u16 *buf, __u16 nreq, __u16 *idx)
++{
++ int i;
++
++ for (i = 0; i < nreq; i++) {
++ wqe_16 = zxdh_get_srq_list_wqe(srq_uk, idx);
++ udma_to_device_barrier(); /* make sure WQE is populated before valid bit is set */
++ set_16bit_val(wqe_16, 0, buf[i]);
++ zxdh_dbg(ZXDH_DBG_SRQ, "%s idx:%hn wqe_16:0x%p buf[%d]:%d\n",
++ __func__, idx, wqe_16, i, buf[i]);
++ }
++}
++
++static void zxdh_update_srq_db_base(struct zxdh_usrq *iwusrq, __u16 idx)
++{
++ __u64 hdr = FIELD_PREP(ZXDH_SRQ_PARITY_SIGN,
++ iwusrq->srq.srq_list_polarity) |
++ FIELD_PREP(ZXDH_SRQ_SW_SRQ_HEAD, idx);
++
++ udma_to_device_barrier(); /* make sure WQE is populated before valid bit is set */
++ set_64bit_val(iwusrq->srq.srq_db_base, 0, hdr);
++ zxdh_dbg(ZXDH_DBG_SRQ, "%s srq_db_base(hdr):0x%llx\n", __func__, hdr);
++}
++
++/**
++ * zxdh_upost_srq_recv - post srq recv on user app
++ * @srq: srq to post recv
++ * @recv_wr: a list of work requests to post on the receive queue
++ * @bad_recv_wr: pointer to first rejected wr
++ */
++int zxdh_upost_srq_recv(struct ibv_srq *srq, struct ibv_recv_wr *recv_wr,
++ struct ibv_recv_wr **bad_recv_wr)
++{
++ struct zxdh_usrq *iwusrq;
++ struct zxdh_srq_uk *srq_uk;
++ __le16 *wqe_16;
++ __le64 *wqe_64;
++ __u64 temp_val;
++ int err = 0;
++ int nreq;
++ __u16 *buf;
++ size_t buf_size;
++ __u16 idx = 0;
++
++ iwusrq = container_of(srq, struct zxdh_usrq, ibv_srq);
++ srq_uk = &iwusrq->srq;
++ pthread_spin_lock(&iwusrq->lock);
++ buf_size = iwusrq->max_wr * sizeof(__u16);
++ buf = malloc(buf_size);
++ if (buf == NULL) {
++ zxdh_dbg(ZXDH_DBG_SRQ, "malloc buf_size failed\n");
++ err = -ENOMEM;
++ goto out;
++ }
++
++ for (nreq = 0; recv_wr; nreq++, recv_wr = recv_wr->next) {
++ err = zxdh_check_srq_valid(recv_wr, iwusrq, srq_uk);
++ if (err)
++ break;
++
++ iwusrq->srq.srq_wrid_array[srq_uk->srq_ring.head] =
++ recv_wr->wr_id;
++ buf[nreq] = srq_uk->srq_ring.head;
++ wqe_64 = zxdh_get_srq_wqe(srq_uk, srq_uk->srq_ring.head);
++ get_64bit_val(wqe_64, 0, &temp_val);
++ srq_uk->srq_ring.head =
++ (__u16)FIELD_GET(ZXDHQPSRQ_NEXT_WQE_INDEX, temp_val);
++ zxdh_fill_srq_wqe(iwusrq, srq_uk, wqe_64, recv_wr);
++ }
++
++ zxdh_dbg(ZXDH_DBG_SRQ, "%s nreq:%d err:%d iwusrq->srq_id:%d\n",
++ __func__, nreq, err, iwusrq->srq_id);
++
++ if (err == 0) {
++ zxdh_get_wqe_index(srq_uk, wqe_16, buf, nreq, &idx);
++ zxdh_update_srq_db_base(iwusrq, idx);
++ }
++out:
++ pthread_spin_unlock(&iwusrq->lock);
++ if (err)
++ *bad_recv_wr = recv_wr;
++ if (buf)
++ free(buf);
++ return err;
++}
++
++/**
++ * zxdh_uget_srq_num - get srq num on user app
++ * @srq: srq to get num
++ * @srq_num: to get srq num
++ */
++int zxdh_uget_srq_num(struct ibv_srq *srq, uint32_t *srq_num)
++{
++ struct zxdh_usrq *iwusrq;
++
++ iwusrq = container_of(srq, struct zxdh_usrq, ibv_srq);
++
++ *srq_num = iwusrq->srq_id;
++ return 0;
++}
++
++void zxdh_set_debug_mask(void)
++{
++ char *env;
++
++ env = getenv("ZXDH_DEBUG_MASK");
++ if (env)
++ zxdh_debug_mask = strtol(env, NULL, 0);
++}
+diff --git a/providers/zrdma/zxdh.h b/providers/zrdma/zxdh.h
+new file mode 100644
+index 0000000..293be95
+--- /dev/null
++++ b/providers/zrdma/zxdh.h
+@@ -0,0 +1,53 @@
++/* SPDX-License-Identifier: GPL-2.0 or Linux-OpenIB */
++/* Copyright (c) 2017 - 2021 Intel Corporation */
++#ifndef ZXDH_H
++#define ZXDH_H
++
++#define ZXDH_WQEALLOC_WQE_DESC_INDEX GENMASK(31, 20)
++
++
++struct zxdh_uk_attrs {
++ __u64 feature_flags;
++ __aligned_u64 sq_db_pa;
++ __aligned_u64 cq_db_pa;
++ __u32 max_hw_wq_frags;
++ __u32 max_hw_read_sges;
++ __u32 max_hw_inline;
++ __u32 max_hw_rq_quanta;
++ __u32 max_hw_srq_quanta;
++ __u32 max_hw_wq_quanta;
++ __u32 min_hw_cq_size;
++ __u32 max_hw_cq_size;
++ __u16 max_hw_sq_chunk;
++ __u32 max_hw_srq_wr;
++ __u8 hw_rev;
++ __u8 db_addr_type;
++};
++
++struct zxdh_hw_attrs {
++ struct zxdh_uk_attrs uk_attrs;
++ __u64 max_hw_outbound_msg_size;
++ __u64 max_hw_inbound_msg_size;
++ __u64 max_mr_size;
++ __u32 min_hw_qp_id;
++ __u32 min_hw_aeq_size;
++ __u32 max_hw_aeq_size;
++ __u32 min_hw_ceq_size;
++ __u32 max_hw_ceq_size;
++ __u32 max_hw_device_pages;
++ __u32 max_hw_vf_fpm_id;
++ __u32 first_hw_vf_fpm_id;
++ __u32 max_hw_ird;
++ __u32 max_hw_ord;
++ __u32 max_hw_wqes;
++ __u32 max_hw_pds;
++ __u32 max_hw_ena_vf_count;
++ __u32 max_qp_wr;
++ __u32 max_pe_ready_count;
++ __u32 max_done_count;
++ __u32 max_sleep_count;
++ __u32 max_cqp_compl_wait_time_ms;
++ __u16 max_stat_inst;
++};
++
++#endif /* ZXDH_H*/
+diff --git a/providers/zrdma/zxdh_devids.h b/providers/zrdma/zxdh_devids.h
+new file mode 100644
+index 0000000..ac23124
+--- /dev/null
++++ b/providers/zrdma/zxdh_devids.h
+@@ -0,0 +1,17 @@
++/* SPDX-License-Identifier: GPL-2.0 or Linux-OpenIB */
++/* Copyright (c) 2024 ZTE Corporation. All rights reserved. */
++#ifndef ZXDH_DEVIDS_H
++#define ZXDH_DEVIDS_H
++
++/* ZXDH VENDOR ID */
++#define PCI_VENDOR_ID_ZXDH_EVB 0x16c3
++#define PCI_VENDOR_ID_ZXDH_E312 0x1cf2
++#define PCI_VENDOR_ID_ZXDH_X512 0x1cf2
++/* ZXDH Devices ID */
++#define ZXDH_DEV_ID_ADAPTIVE_EVB_PF 0x8040 /* ZXDH EVB PF DEVICE ID*/
++#define ZXDH_DEV_ID_ADAPTIVE_EVB_VF 0x8041 /* ZXDH EVB VF DEVICE ID*/
++#define ZXDH_DEV_ID_ADAPTIVE_E312_PF 0x8049 /* ZXDH E312 PF DEVICE ID*/
++#define ZXDH_DEV_ID_ADAPTIVE_E312_VF 0x8060 /* ZXDH E312 VF DEVICE ID*/
++#define ZXDH_DEV_ID_ADAPTIVE_X512_PF 0x806B /* ZXDH X512 PF DEVICE ID*/
++#define ZXDH_DEV_ID_ADAPTIVE_X512_VF 0x806C /* ZXDH X512 VF DEVICE ID*/
++#endif /* ZXDH_DEVIDS_H */
+diff --git a/providers/zrdma/zxdh_dv.h b/providers/zrdma/zxdh_dv.h
+new file mode 100644
+index 0000000..5708699
+--- /dev/null
++++ b/providers/zrdma/zxdh_dv.h
+@@ -0,0 +1,75 @@
++/* SPDX-License-Identifier: GPL-2.0 or Linux-OpenIB */
++/* Copyright (c) 2024 ZTE Corporation. All rights reserved. */
++#ifndef _ZXDH_API_H_
++#define _ZXDH_API_H_
++
++#include
++#include
++#include /* For the __be64 type */
++#include
++#include
++#if defined(__SSE3__)
++#include
++#include
++#include
++#endif /* defined(__SSE3__) */
++
++#include
++#include
++
++#ifdef __cplusplus
++extern "C" {
++#endif
++
++enum switch_status {
++ SWITCH_CLOSE = 0,
++ SWITCH_OPEN = 1,
++ SWITCH_ERROR,
++};
++
++enum zxdh_qp_reset_qp_code {
++ ZXDH_RESET_RETRY_TX_ITEM_FLAG = 1,
++};
++
++enum zxdh_qp_modify_qpc_mask {
++ ZXDH_RETRY_CQE_SQ_OPCODE = 1 << 0,
++ ZXDH_ERR_FLAG_SET = 1 << 1,
++ ZXDH_PACKAGE_ERR_FLAG = 1 << 2,
++ ZXDH_TX_LAST_ACK_PSN = 1 << 3,
++ ZXDH_TX_LAST_ACK_WQE_OFFSET_SET = 1 << 4,
++ ZXDH_TX_READ_RETRY_FLAG_SET = 1 << 5,
++ ZXDH_TX_RDWQE_PYLD_LENGTH = 1 << 6,
++ ZXDH_TX_RECV_READ_FLAG_SET = 1 << 7,
++ ZXDH_TX_RD_MSG_LOSS_ERR_FLAG_SET = 1 << 8,
++};
++
++struct zxdh_rdma_qpc {
++ uint8_t retry_flag;
++ uint8_t rnr_retry_flag;
++ uint8_t read_retry_flag;
++ uint8_t cur_retry_count;
++ uint8_t retry_cqe_sq_opcode;
++ uint8_t err_flag;
++ uint8_t ack_err_flag;
++ uint8_t package_err_flag;
++ uint8_t recv_err_flag;
++ uint32_t tx_last_ack_psn;
++ uint8_t retry_count;
++};
++
++int zxdh_get_log_trace_switch(struct ibv_context *context,
++ enum switch_status *status);
++int zxdh_set_log_trace_switch(struct ibv_context *context,
++ enum switch_status status);
++int zxdh_modify_qp_udp_sport(struct ibv_context *context, uint16_t udp_sport,
++ uint32_t qpn);
++int zxdh_query_qpc(struct ibv_qp *qp, struct zxdh_rdma_qpc *qpc);
++int zxdh_modify_qpc(struct ibv_qp *qp, struct zxdh_rdma_qpc *qpc,
++ uint64_t qpc_mask);
++int zxdh_reset_qp(struct ibv_qp *qp, uint64_t opcode);
++
++#ifdef __cplusplus
++}
++#endif
++
++#endif
+diff --git a/rdma-tools/man/CMakeLists.txt b/rdma-tools/man/CMakeLists.txt
+new file mode 100644
+index 0000000..987527b
+--- /dev/null
++++ b/rdma-tools/man/CMakeLists.txt
+@@ -0,0 +1,4 @@
++rdma_man_pages(
++ zxdh_modify_sport.1
++ zxdh_set_log.1
++ )
+\ No newline at end of file
+diff --git a/rdma-tools/man/zxdh_modify_sport.1 b/rdma-tools/man/zxdh_modify_sport.1
+new file mode 100644
+index 0000000..6907fe9
+--- /dev/null
++++ b/rdma-tools/man/zxdh_modify_sport.1
+@@ -0,0 +1,42 @@
++.\" Licensed under the OpenIB.org BSD license (FreeBSD Variant) - See COPYING.md
++.TH ZXDH_MODIFY_SPORT 1 "Apr 30, 2024" "RDMA TOOLS"
++
++.SH NAME
++zxdh_modify_sport \- modify udp source port of zxdh rdma devices
++
++.SH SYNOPSIS
++.B zxdh_modify_sport
++[\-d device] [\-u sport] [\-q num] [\-h]
++
++.SH DESCRIPTION
++.PP
++The UDP source port is used to create entropy for network routers (ECMP),
++load balancers and 802.3ad link aggregation switching that are not aware of
++RoCE IB headers.
++
++This API enables modifying the configured UDP source port of a given RC QP
++when QP is in RTS state.
++
++.SH OPTIONS
++
++.PP
++.TP
++\fB\-d\fR, \fB\-\-ib\-dev\fR=\fIDEVICE\fR
++use IB device \fIDEVICE\fR
++
++\fB\-u\fR, \fB\-\-udp\-sport\fR=\fISPORT\fR
++the UDP source port to set for the QP
++
++\fB\-q\fR, \fB\-\-qpn\fR=\fINUM\fR
++the qpn to set fot the qp
++
++\fB\-h\fR, \fB\-\-help\fR
++print help information
++
++.SH AUTHORS
++.TP
++Dotan Barak
++.RI < dotanba@gmail.com >
++.TP
++Roland Dreier
++.RI < rolandd@cisco.com >
+diff --git a/rdma-tools/man/zxdh_set_log.1 b/rdma-tools/man/zxdh_set_log.1
+new file mode 100644
+index 0000000..b1f7dad
+--- /dev/null
++++ b/rdma-tools/man/zxdh_set_log.1
+@@ -0,0 +1,37 @@
++.\" Licensed under the OpenIB.org BSD license (FreeBSD Variant) - See COPYING.md
++.TH ZXDH_SET_LOG 1 "Apr 30, 2024" "RDMA TOOLS"
++
++.SH NAME
++zxdh_set_log \- set zxdh rdma devices log trace switch
++
++.SH SYNOPSIS
++.B zxdh_set_log
++[\-d device] [\-s switch] [\-g] [\-h]
++
++.SH DESCRIPTION
++.PP
++Get and set log trace switch of RDMA devices for use from userspace.
++
++.SH OPTIONS
++
++.PP
++.TP
++\fB\-d\fR, \fB\-\-ib\-dev\fR=\fIDEVICE\fR
++use IB device \fIDEVICE\fR
++
++\fB\-s\fR, \fB\-\-log\-set\fR=\fISWITCH\fR
++set log trace switch (0:close, 1:open)
++
++\fB\-g\fR, \fB\-\-log\-get\fR
++get log trace switch (0:close, 1:open)
++
++\fB\-h\fR, \fB\-\-help\fR
++print help information
++
++.SH AUTHORS
++.TP
++Dotan Barak
++.RI < dotanba@gmail.com >
++.TP
++Roland Dreier
++.RI < rolandd@cisco.com >
+diff --git a/rdma-tools/scripts/CMakeLists.txt b/rdma-tools/scripts/CMakeLists.txt
+new file mode 100644
+index 0000000..e5e3782
+--- /dev/null
++++ b/rdma-tools/scripts/CMakeLists.txt
+@@ -0,0 +1,55 @@
++function(_rdma_sbin_interp INTERP IFN OFN)
++ configure_file("${IFN}" "${CMAKE_CURRENT_BINARY_DIR}/${OFN}" @ONLY)
++ file(WRITE "${BUILD_BIN}/${OFN}" "#!${INTERP}\nexec ${INTERP} ${CMAKE_CURRENT_BINARY_DIR}/${OFN} \"$@\"\n")
++ execute_process(COMMAND "chmod" "a+x" "${BUILD_BIN}/${OFN}")
++
++ install(FILES "${CMAKE_CURRENT_BINARY_DIR}/${OFN}"
++ DESTINATION "${CMAKE_INSTALL_SBINDIR}"
++ PERMISSIONS OWNER_WRITE OWNER_READ GROUP_READ WORLD_READ OWNER_EXECUTE GROUP_EXECUTE WORLD_EXECUTE)
++endfunction()
++
++function(_rdma_sbin_interp_link INTERP IFN OFN)
++ file(WRITE "${BUILD_BIN}/${OFN}" "#!${INTERP}\nexec ${INTERP} ${CMAKE_CURRENT_SOURCE_DIR}/${IFN} \"$@\"\n")
++ execute_process(COMMAND "chmod" "a+x" "${BUILD_BIN}/${OFN}")
++
++ install(FILES "${IFN}"
++ DESTINATION "${CMAKE_INSTALL_SBINDIR}"
++ RENAME "${OFN}"
++ PERMISSIONS OWNER_WRITE OWNER_READ GROUP_READ WORLD_READ OWNER_EXECUTE GROUP_EXECUTE WORLD_EXECUTE)
++endfunction()
++
++function(rdma_sbin_shell_program)
++ foreach(IFN ${ARGN})
++ if (IFN MATCHES "\\.sh\\.in")
++ if (DISTRO_FLAVOUR STREQUAL Debian)
++ string(REGEX REPLACE "^(.+)\\.sh\\.in$" "\\1" OFN "${IFN}")
++ else()
++ string(REGEX REPLACE "^(.+)\\.in$" "\\1" OFN "${IFN}")
++ endif()
++ _rdma_sbin_interp("/bin/bash" "${IFN}" "${OFN}")
++ elseif (IFN MATCHES "\\.in")
++ string(REGEX REPLACE "^(.+)\\.in$" "\\1" OFN "${IFN}")
++ _rdma_sbin_interp("/bin/bash" "${IFN}" "${OFN}")
++ elseif (IFN MATCHES "\\.sh")
++ if (DISTRO_FLAVOUR STREQUAL Debian)
++ string(REGEX REPLACE "^(.+)\\.sh$" "\\1" OFN "${IFN}")
++ else()
++ set(OFN "${IFN}")
++ endif()
++ _rdma_sbin_interp_link("/bin/bash" "${IFN}" "${OFN}")
++ else()
++ _rdma_sbin_interp_link("/bin/bash" "${IFN}" "${IFN}")
++ endif()
++ endforeach()
++endfunction()
++
++
++set(IBSCRIPTPATH "${CMAKE_INSTALL_FULL_SBINDIR}")
++
++rdma_sbin_shell_program(
++ show_gids
++ ibdev2netdev
++ )
++
++
++
+diff --git a/rdma-tools/scripts/ibdev2netdev b/rdma-tools/scripts/ibdev2netdev
+new file mode 100644
+index 0000000..a4982bb
+--- /dev/null
++++ b/rdma-tools/scripts/ibdev2netdev
+@@ -0,0 +1,268 @@
++#! /bin/bash
++#
++# Copyright (c) 2017 Mellanox Technologies. All rights reserved.
++#
++# This Software is licensed under one of the following licenses:
++#
++# 1) under the terms of the "Common Public License 1.0" a copy of which is
++# available from the Open Source Initiative, see
++# http://www.opensource.org/licenses/cpl.php.
++#
++# 2) under the terms of the "The BSD License" a copy of which is
++# available from the Open Source Initiative, see
++# http://www.opensource.org/licenses/bsd-license.php.
++#
++# 3) under the terms of the "GNU General Public License (GPL) Version 2" a
++# copy of which is available from the Open Source Initiative, see
++# http://www.opensource.org/licenses/gpl-license.php.
++#
++# Licensee has the right to choose one of the above licenses.
++#
++# Redistributions of source code must retain the above copyright
++# notice and one of the license notices.
++#
++# Redistributions in binary form must reproduce both the above copyright
++# notice, one of the license notices in the documentation
++# and/or other materials provided with the distribution.
++#
++
++usage()
++{
++ echo "$(basename $0) "
++ echo "-h, --help print help message"
++ echo "-v, --verbose print more info"
++}
++
++function find_pdev()
++{
++ pdevlist=$(ls /sys/bus/pci/devices)
++
++ for pdev in $pdevlist; do
++ if [ -d /sys/bus/pci/devices/$pdev/infiniband ]; then
++ ibd=$(ls /sys/bus/pci/devices/$pdev/infiniband/)
++ if [ "x$ibd" == "x$1" ]; then
++ echo -n $pdev
++ fi
++ fi
++ done
++}
++
++case $1 in
++ "-h" | "--help")
++ usage
++ exit 0
++ ;;
++esac
++
++if (( $# > 1 )); then
++ usage
++ exit -1
++fi
++
++if (( $# == 1 )) && [ "$1" != "-v" ]; then
++ usage
++ exit -1
++fi
++
++if [ ! -d /sys/class/infiniband ]; then
++ # driver is stopped
++ exit -1
++fi
++ibdevs=$(ls /sys/class/infiniband/)
++
++devs=
++for netpath in /sys/class/net/*
++do
++ if (grep 0x* ${netpath}/device/vendor > /dev/null 2>&1); then
++ devs="$devs ${netpath##*/}"
++ fi
++done
++
++if [ "x$devs" == "x" ]; then
++ # no relevant devices - quit immediately
++ exit
++fi
++
++for d in $devs; do
++ if [ -f /sys/class/net/$d/dev_id ]; then
++ oldstyle=n
++ break
++ fi
++done
++
++if [ "$1" == "-v" ];then
++ echo -e "PCIDEV\t\tDEV\t HCA VPD"
++ echo -e "------\t\t---\t --- ---"
++else
++ echo -e "DEV\t\tINDEX\tDEV State"
++ echo -e "---\t\t-----\t--- -----"
++fi
++
++function print_verbose_info()
++{
++ d=$1
++ port=$2
++ eth=$3
++ link_state=$4
++ filepath_portstate=/sys/class/infiniband/$d/ports/$port/state
++ filepath_deviceid=/sys/class/infiniband/$d/device/device
++ filepath_fwver=/sys/class/infiniband/$d/fw_ver
++ # filepath_vpd=/sys/class/infiniband/$d/device/vpd
++
++ # read port state
++ if [ -f $filepath_portstate ]; then
++ ibstate=$(printf "%-6s" $(cat $filepath_portstate | awk '{print $2}'))
++ else
++ ibstate="NA"
++ fi
++
++ # read device
++ if [ -f $filepath_deviceid ]; then
++ devid=$(printf "%d" $(cat $filepath_deviceid))
++ else
++ devid="NA"
++ fi
++
++ # read FW version
++ if [ -f $filepath_fwver ]; then
++ fwver=$(cat $filepath_fwver)
++ else
++ fwver="NA"
++ fi
++
++ # # read device description and part ID from the VPD
++ # if [ -f $filepath_vpd ]; then
++ # tmp=$IFS
++ # IFS=":"
++ # vpd_content=`cat $filepath_vpd`
++ # devdesc=$(printf "%-15s" $(echo $vpd_content | strings | head -1))
++ # partid=$(printf "%-11s" $(echo $vpd_content | strings | head -4 | tail -1 | awk '{print $1}'))
++ # IFS=$tmp
++ # else
++ # devdesc=""
++ # partid="NA"
++ # fi
++ echo "$x $d ($devid - ) fw $fwver port $port ($ibstate) ==> $eth ($link_state)"
++}
++
++function get_link_state()
++{
++ eth=$1
++ filepath_devid=/sys/class/net/$eth/dev_id
++ if [ -f $filepath_devid ]; then
++ filepath_carrier=/sys/class/net/$eth/carrier
++ if [ -f $filepath_carrier ]; then
++ link_state=$(cat $filepath_carrier 2> /dev/null)
++ if (( link_state == 1 )); then
++ link_state="Up"
++ else
++ link_state="Down"
++ fi
++ else
++ link_state="NA"
++ fi
++ fi
++ echo -n $link_state
++}
++
++if [ "x$oldstyle" == "xn" ]; then
++ for d in $ibdevs; do
++ ports=$(ls /sys/class/infiniband/$d/ports/)
++ for port in $ports; do
++ #Honor ndev given by the kernel in the gid table for RoCE, soft RoCE
++ #if kernel doesn't expose it (for IB), try the different method of
++ #resource match.
++ ethdev=$(cat /sys/class/infiniband/$d/ports/$port/gid_attrs/ndevs/0 2> /dev/null)
++ if [ "$ethdev" == "" ]; then
++ ibrsc=$(cat /sys/class/infiniband/$d/device/resource)
++ eths=$(ls /sys/class/net/)
++ for eth in $eths; do
++ filepath_resource=/sys/class/net/$eth/device/resource
++
++ if [ -f $filepath_resource ]; then
++ ethrsc=$(cat $filepath_resource)
++ if [ "x$ethrsc" == "x$ibrsc" ]; then
++ link_state=$(get_link_state $eth)
++ x=$(find_pdev $d)
++ if [ "$1" == "-v" ]; then
++ print_verbose_info $d $port $eth $link_state
++ else
++ echo "$d port $port ==> $eth ($link_state)"
++ fi
++ break
++ fi
++ fi
++ done
++ else
++ link_state=$(get_link_state $ethdev)
++ x=$(find_pdev $d)
++ if [ "$1" == "-v" ]; then
++ print_verbose_info $d $port $ethdev $link_state
++ else
++ echo "$d port $port ==> $ethdev ($link_state)"
++ fi
++ fi
++ done
++ done
++else
++##########################
++### old style
++##########################
++
++function print_line()
++{
++ echo "$1 port $2 <===> $3"
++}
++
++function find_guid()
++{
++ ibdevs=$(ls /sys/class/infiniband/)
++ for ibdev in $ibdevs; do
++ ports=$(ls /sys/class/infiniband/$ibdev/ports/)
++ for port in $ports; do
++ gids=$(ls /sys/class/infiniband/$ibdev/ports/$port/gids)
++ for gid in $gids; do
++ pguid=$(cat /sys/class/infiniband/$ibdev/ports/$port/gids/$gid | cut -b 21- | sed -e 's/://g')
++ if [ x$pguid == x$1 ]; then
++ print_line $ibdev $port $2
++ fi
++ done
++ done
++ done
++}
++
++function find_mac()
++{
++ ibdevs=$(ls /sys/class/infiniband/)
++ for ibdev in $ibdevs; do
++ ports=$(ls /sys/class/infiniband/$ibdev/ports/)
++ for port in $ports; do
++ gids=$(ls /sys/class/infiniband/$ibdev/ports/$port/gids)
++ for gid in $gids; do
++ first=$(cat /sys/class/infiniband/$ibdev/ports/$port/gids/$gid | cut -b 21-22)
++ first=$(( first ^ 2 ))
++ first=$(printf "%02x" $first)
++ second=$(cat /sys/class/infiniband/$ibdev/ports/$port/gids/$gid | cut -b 21- | sed -e 's/://g' | cut -b 3-6)
++ third=$(cat /sys/class/infiniband/$ibdev/ports/$port/gids/$gid | cut -b 21- | sed -e 's/://g' | cut -b 11-)
++ pmac=$first$second$third
++ if [ x$pmac == x$1 ]; then
++ print_line $ibdev $port $2
++ fi
++ done
++ done
++ done
++}
++
++ifcs=$(ifconfig -a | egrep '^eth|^ib' | awk '{print $1}')
++
++for ifc in $ifcs; do
++ len=$(cat /sys/class/net/$ifc/addr_len)
++ if (( len == 20 )); then
++ guid=$(cat /sys/class/net/$ifc/address | cut -b 37- | sed -e 's/://g')
++ find_guid $guid $ifc
++ elif (( len == 6)); then
++ mac=$(cat /sys/class/net/$ifc/address | sed -e 's/://g')
++ find_mac $mac $ifc
++ fi
++done
++fi
+\ No newline at end of file
+diff --git a/rdma-tools/scripts/show_gids b/rdma-tools/scripts/show_gids
+new file mode 100644
+index 0000000..0751664
+--- /dev/null
++++ b/rdma-tools/scripts/show_gids
+@@ -0,0 +1,110 @@
++#!/bin/bash
++#
++# Copyright (c) 2016 Mellanox Technologies. All rights reserved.
++#
++# This Software is licensed under one of the following licenses:
++#
++# 1) under the terms of the "Common Public License 1.0" a copy of which is
++# available from the Open Source Initiative, see
++# http://www.opensource.org/licenses/cpl.php.
++#
++# 2) under the terms of the "The BSD License" a copy of which is
++# available from the Open Source Initiative, see
++# http://www.opensource.org/licenses/bsd-license.php.
++#
++# 3) under the terms of the "GNU General Public License (GPL) Version 2" a
++# copy of which is available from the Open Source Initiative, see
++# http://www.opensource.org/licenses/gpl-license.php.
++#
++# Licensee has the right to choose one of the above licenses.
++#
++# Redistributions of source code must retain the above copyright
++# notice and one of the license notices.
++#
++# Redistributions in binary form must reproduce both the above copyright
++# notice, one of the license notices in the documentation
++# and/or other materials provided with the distribution.
++#
++# Author: Moni Shoua
++#
++
++black='\E[30;50m'
++red='\E[31;50m'
++green='\E[32;50m'
++yellow='\E[33;50m'
++blue='\E[34;50m'
++magenta='\E[35;50m'
++cyan='\E[36;50m'
++white='\E[37;50m'
++
++bold='\033[1m'
++
++gid_count=0
++
++# cecho (color echo) prints text in color.
++# first parameter should be the desired color followed by text
++function cecho ()
++{
++ echo -en $1
++ shift
++ echo -n $*
++ tput sgr0
++}
++
++# becho (color echo) prints text in bold.
++becho ()
++{
++ echo -en $bold
++ echo -n $*
++ tput sgr0
++}
++
++function print_gids()
++{
++ dev=$1
++ port=$2
++ for gf in /sys/class/infiniband/$dev/ports/$port/gids/* ; do
++ gid=$(cat $gf);
++ if [ $gid = 0000:0000:0000:0000:0000:0000:0000:0000 ] ; then
++ continue
++ fi
++ echo -e $(basename $gf) "\t" $gid
++ done
++}
++
++echo -e "DEV\t\tPORT\tINDEX\tGID\t\t\t\t\tIPv4 \t\tVER\tDEV"
++echo -e "---\t\t----\t-----\t---\t\t\t\t\t------------ \t---\t---"
++DEVS=$1
++if [ -z "$DEVS" ] ; then
++ if [ -d "/sys/class/infiniband/" ];then
++ DEVS=$(ls /sys/class/infiniband/)
++ else
++ DEVS=""
++ fi
++fi
++for d in $DEVS ; do
++ for p in $(ls /sys/class/infiniband/$d/ports/) ; do
++ for g in $(ls /sys/class/infiniband/$d/ports/$p/gids/) ; do
++ gid=$(cat /sys/class/infiniband/$d/ports/$p/gids/$g);
++ if [ $gid = 0000:0000:0000:0000:0000:0000:0000:0000 ] ; then
++ continue
++ fi
++ if [ $gid = fe80:0000:0000:0000:0000:0000:0000:0000 ] ; then
++ continue
++ fi
++ _ndev=$(cat /sys/class/infiniband/$d/ports/$p/gid_attrs/ndevs/$g 2>/dev/null)
++ __type=$(cat /sys/class/infiniband/$d/ports/$p/gid_attrs/types/$g 2>/dev/null)
++ _type=$(echo $__type| grep -o "[Vv].*")
++ if [ $(echo $gid | cut -d ":" -f -1) = "0000" ] ; then
++ ipv4=$(printf "%d.%d.%d.%d" 0x${gid:30:2} 0x${gid:32:2} 0x${gid:35:2} 0x${gid:37:2})
++ echo -e "$d\t$p\t$g\t$gid\t$ipv4 \t$_type\t$_ndev"
++ else
++ echo -e "$d\t$p\t$g\t$gid\t\t\t$_type\t$_ndev"
++ fi
++ gid_count=$(expr 1 + $gid_count)
++ done #g (gid)
++ done #p (port)
++done #d (dev)
++
++echo n_gids_found=$gid_count
++
+diff --git a/rdma-tools/tools/CMakeLists.txt b/rdma-tools/tools/CMakeLists.txt
+new file mode 100644
+index 0000000..578150b
+--- /dev/null
++++ b/rdma-tools/tools/CMakeLists.txt
+@@ -0,0 +1,13 @@
++function(zxdh_tools_programs)
++ foreach(I ${ARGN})
++ rdma_sbin_executable(${I} "${I}.c")
++ target_link_libraries(${I} LINK_PRIVATE ${COMMON_LIBS} ${NL_LIBRARIES} ${RT_LIBRARIES} ibverbs zrdma)
++ endforeach()
++endfunction()
++set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS}")
++zxdh_tools_programs(
++ zxdh_set_log
++ zxdh_modify_sport
++ )
++
++
+diff --git a/rdma-tools/tools/zxdh_modify_sport.c b/rdma-tools/tools/zxdh_modify_sport.c
+new file mode 100644
+index 0000000..a5d9bc2
+--- /dev/null
++++ b/rdma-tools/tools/zxdh_modify_sport.c
+@@ -0,0 +1,169 @@
++// SPDX-License-Identifier: GPL-2.0 or Linux-OpenIB
++/*
++ * Copyright (c) 2024 ZTE Corporation. All rights reserved.
++ */
++
++#include
++#include
++#include
++#include
++#include
++#include
++#include
++#include
++
++#include
++#include
++#include
++
++static uint16_t g_udp_sport;
++static uint32_t g_qpn;
++
++static bool modify_udp_sport;
++
++static int zxdh_ops(struct ibv_device *ib_dev, uint8_t ib_port)
++{
++ struct ibv_context *ctx;
++ struct ibv_device_attr_ex device_attr = {};
++ int rc = 0;
++
++ ctx = ibv_open_device(ib_dev);
++ if (!ctx) {
++ fprintf(stderr, "Failed to open device\n");
++ rc = 1;
++ goto cleanup;
++ }
++ if (ibv_query_device_ex(ctx, NULL, &device_attr)) {
++ fprintf(stderr, "Failed to query device props\n");
++ rc = 2;
++ goto cleanup;
++ }
++ if (ib_port && ib_port > device_attr.orig_attr.phys_port_cnt) {
++ fprintf(stderr, "Invalid port requested for device\n");
++ /* rc = 3 is taken by failure to clean up */
++ rc = 4;
++ goto cleanup;
++ }
++ if (modify_udp_sport) {
++ rc = zxdh_modify_qp_udp_sport(ctx, g_udp_sport, g_qpn);
++ if (rc) {
++ fprintf(stderr, "Modify udp sport failed\n");
++ rc = 7;
++ goto cleanup;
++ }
++ printf("Modify udp sport succ\n");
++ }
++
++cleanup:
++ if (ctx)
++ if (ibv_close_device(ctx)) {
++ fprintf(stderr, "Failed to close device");
++ rc = 3;
++ }
++ return rc;
++}
++
++static void usage(const char *argv0)
++{
++ printf("Usage: %s print the ca ops info\n", argv0);
++ printf("\n");
++ printf("Options:\n");
++ printf(" -d, --ib-dev= use IB device \n");
++ printf(" -u, --udp-sport= modify udp source port (RC support)\n");
++ printf(" -q, --qpn= modify qpn num (RC support)\n");
++ printf(" -h, --help print a help text and exit\n");
++}
++
++int main(int argc, char *argv[])
++{
++ char *ib_devname = NULL;
++ int ret = 0;
++ struct ibv_device **dev_list, **orig_dev_list;
++ int ib_port = 0;
++
++ /* parse command line options */
++ while (1) {
++ int c;
++ static struct option long_options[] = {
++ { .name = "ib-dev", .has_arg = 1, .val = 'd' },
++ { .name = "udp-sport", .has_arg = 1, .val = 'u' },
++ { .name = "qpn", .has_arg = 1, .val = 'q' },
++ { .name = "help", .has_arg = 0, .val = 'h' },
++ {}
++ };
++
++ c = getopt_long(argc, argv, "d:u:q:h", long_options, NULL);
++ if (c == -1)
++ break;
++
++ switch (c) {
++ case 'd':
++ ib_devname = strdup(optarg);
++ break;
++ case 'u':
++ g_udp_sport = strtol(optarg, NULL, 0);
++ if (g_udp_sport <= 0) {
++ usage(argv[0]);
++ return 1;
++ }
++ modify_udp_sport = true;
++ break;
++ case 'q':
++ g_qpn = strtol(optarg, NULL, 0);
++ if (g_qpn <= 0) {
++ usage(argv[0]);
++ return 1;
++ }
++ modify_udp_sport = true;
++ break;
++ case 'h':
++ SWITCH_FALLTHROUGH;
++ default:
++ usage(argv[0]);
++ return -1;
++ }
++ }
++ if (modify_udp_sport) {
++ if (g_udp_sport == 0 || g_qpn == 0) {
++ fprintf(stderr, "invaild argument\n");
++ return -1;
++ }
++ } else {
++ usage(argv[0]);
++ return -1;
++ }
++ dev_list = orig_dev_list = ibv_get_device_list(NULL);
++ if (!dev_list) {
++ perror("Failed to get IB devices list");
++ return -1;
++ }
++
++ if (ib_devname) {
++ while (*dev_list) {
++ if (!strcmp(ibv_get_device_name(*dev_list), ib_devname))
++ break;
++ ++dev_list;
++ }
++
++ if (!*dev_list) {
++ fprintf(stderr, "IB device '%s' wasn't found\n",
++ ib_devname);
++ ret = -1;
++ goto out;
++ }
++
++ ret |= zxdh_ops(*dev_list, ib_port);
++ } else {
++ fprintf(stderr, "No IB devices found\n");
++ ret = -1;
++ goto out;
++ }
++
++out:
++ if (ib_devname)
++ free(ib_devname);
++
++ ibv_free_device_list(orig_dev_list);
++
++ return ret;
++}
+diff --git a/rdma-tools/tools/zxdh_set_log.c b/rdma-tools/tools/zxdh_set_log.c
+new file mode 100644
+index 0000000..1b97485
+--- /dev/null
++++ b/rdma-tools/tools/zxdh_set_log.c
+@@ -0,0 +1,173 @@
++// SPDX-License-Identifier: GPL-2.0 or Linux-OpenIB
++/* Copyright (c) 2024 ZTE Corporation. All rights reserved. */
++
++#include
++#include
++#include
++#include
++#include
++#include
++#include
++#include
++
++#include
++#include
++#include
++
++static uint8_t g_log_set;
++
++static bool set_log_switch;
++static bool get_log_switch;
++
++static int zxdh_ops(struct ibv_device *ib_dev, uint8_t ib_port)
++{
++ struct ibv_context *ctx;
++ struct ibv_device_attr_ex device_attr = {};
++ enum switch_status status = SWITCH_ERROR;
++ int rc = 0;
++
++ ctx = ibv_open_device(ib_dev);
++ if (!ctx) {
++ fprintf(stderr, "Failed to open device\n");
++ rc = 1;
++ goto cleanup;
++ }
++ if (ibv_query_device_ex(ctx, NULL, &device_attr)) {
++ fprintf(stderr, "Failed to query device props\n");
++ rc = 2;
++ goto cleanup;
++ }
++ if (ib_port && ib_port > device_attr.orig_attr.phys_port_cnt) {
++ fprintf(stderr, "Invalid port requested for device\n");
++ /* rc = 3 is taken by failure to clean up */
++ rc = 4;
++ goto cleanup;
++ }
++ if (get_log_switch) {
++ rc = zxdh_get_log_trace_switch(ctx, &status);
++ if (rc) {
++ fprintf(stderr, "Get log trace switch failed\n");
++ rc = 5;
++ goto cleanup;
++ }
++ printf("log trace switch:%d\n", status);
++ }
++
++ if (set_log_switch) {
++ rc = zxdh_set_log_trace_switch(ctx,
++ (enum switch_status)g_log_set);
++ if (rc) {
++ fprintf(stderr, "Set log trace switch failed\n");
++ rc = 6;
++ goto cleanup;
++ }
++ printf("log trace switch set succ\n");
++ }
++
++cleanup:
++ if (ctx)
++ if (ibv_close_device(ctx)) {
++ fprintf(stderr, "Failed to close device");
++ rc = 3;
++ }
++ return rc;
++}
++
++static void usage(const char *argv0)
++{
++ printf("Usage: %s print the ca ops info\n", argv0);
++ printf("\n");
++ printf("Options:\n");
++ printf(" -d, --ib-dev= use IB device \n");
++ printf(" -s, --log-set= set log trace switch (0:close, 1:open)\n");
++ printf(" -g, --log-get get log trace switch (0:close, 1:open)\n");
++ printf(" -h, --help print a help text and exit\n");
++}
++
++int main(int argc, char *argv[])
++{
++ char *ib_devname = NULL;
++ int ret = 0;
++ struct ibv_device **dev_list, **orig_dev_list;
++ int ib_port = 0;
++
++ /* parse command line options */
++ while (1) {
++ int c;
++ static struct option long_options[] = {
++ { .name = "ib-dev", .has_arg = 1, .val = 'd' },
++ { .name = "log-set", .has_arg = 1, .val = 's' },
++ { .name = "log-get", .has_arg = 0, .val = 'g' },
++ { .name = "help", .has_arg = 0, .val = 'h' },
++ {}
++ };
++
++ c = getopt_long(argc, argv, "d:s:gh", long_options, NULL);
++ if (c == -1)
++ break;
++
++ switch (c) {
++ case 'd':
++ ib_devname = strdup(optarg);
++ break;
++ case 's':
++ g_log_set = strtol(optarg, NULL, 0);
++ if (g_log_set != 0 && g_log_set != 1) {
++ usage(argv[0]);
++ return 1;
++ }
++ set_log_switch = true;
++ break;
++ case 'g':
++ get_log_switch = true;
++ break;
++ case 'h':
++ SWITCH_FALLTHROUGH;
++ default:
++ usage(argv[0]);
++ return -1;
++ }
++ }
++ if (get_log_switch && set_log_switch) {
++ fprintf(stderr,
++ "Get and Set log trace switch together failed\n");
++ return -1;
++ }
++ if (!get_log_switch && !set_log_switch)
++ get_log_switch = true;
++
++ dev_list = orig_dev_list = ibv_get_device_list(NULL);
++ if (!dev_list) {
++ perror("Failed to get IB devices list");
++ return -1;
++ }
++
++ if (ib_devname) {
++ while (*dev_list) {
++ if (!strcmp(ibv_get_device_name(*dev_list), ib_devname))
++ break;
++ ++dev_list;
++ }
++
++ if (!*dev_list) {
++ fprintf(stderr, "IB device '%s' wasn't found\n",
++ ib_devname);
++ ret = -1;
++ goto out;
++ }
++
++ ret |= zxdh_ops(*dev_list, ib_port);
++ } else {
++ fprintf(stderr, "No IB devices found\n");
++ ret = -1;
++ goto out;
++ }
++
++out:
++ if (ib_devname)
++ free(ib_devname);
++
++ ibv_free_device_list(orig_dev_list);
++
++ return ret;
++}
+diff --git a/redhat/rdma-core.spec b/redhat/rdma-core.spec
+index c6ddcfd..a5b4d71 100644
+--- a/redhat/rdma-core.spec
++++ b/redhat/rdma-core.spec
+@@ -108,6 +108,9 @@ Obsoletes: ibacm-devel < %{version}-%{release}
+ Requires: infiniband-diags%{?_isa} = %{version}-%{release}
+ Provides: infiniband-diags-devel = %{version}-%{release}
+ Obsoletes: infiniband-diags-devel < %{version}-%{release}
++Requires: rdma-tools%{?_isa} = %{version}-%{release}
++Provides: rdma-tools-devel = %{version}-%{release}
++Obsoletes: rdma-tools-devel < %{version}-%{release}
+ Provides: libibmad-devel = %{version}-%{release}
+ Obsoletes: libibmad-devel < %{version}-%{release}
+ %if %{with_static}
+@@ -146,6 +149,14 @@ Deprecated scripts and utilities which provide duplicated functionality, most
+ often at a reduced performance. These are maintained for the time being for
+ compatibility reasons.
+
++%package -n rdma-tools
++Summary: ZTE RDMA Tools
++Provides: rdma-tools
++Requires: rdma-core
++
++%description -n rdma-tools
++This package provides ZTE RDMA Tools,include show_gids,ibdev2netdev,zxdh_modify_sport,zxdh_set_log
++
+ %package -n libibverbs
+ Summary: A library and drivers for direct userspace use of RDMA (InfiniBand/iWARP/RoCE) hardware
+ Requires(post): /sbin/ldconfig
+@@ -158,6 +169,8 @@ Provides: liberdma = %{version}-%{release}
+ Obsoletes: liberdma < %{version}-%{release}
+ Provides: libhfi1 = %{version}-%{release}
+ Obsoletes: libhfi1 < %{version}-%{release}
++Provides: libhns = %{version}-%{release}
++Obsoletes: libhns < %{version}-%{release}
+ Provides: libipathverbs = %{version}-%{release}
+ Obsoletes: libipathverbs < %{version}-%{release}
+ Provides: libirdma = %{version}-%{release}
+@@ -174,6 +187,8 @@ Provides: libocrdma = %{version}-%{release}
+ Obsoletes: libocrdma < %{version}-%{release}
+ Provides: librxe = %{version}-%{release}
+ Obsoletes: librxe < %{version}-%{release}
++Provides: libzrdma = %{version}-%{release}
++Obsoletes: libzrdma < %{version}-%{release}
+
+ %description -n libibverbs
+ libibverbs is a library that allows userspace processes to use RDMA
+@@ -188,7 +203,7 @@ Device-specific plug-in ibverbs userspace drivers are included:
+ - libefa: Amazon Elastic Fabric Adapter
+ - liberdma: Alibaba Elastic RDMA (iWarp) Adapter
+ - libhfi1: Intel Omni-Path HFI
+-- libhns: HiSilicon Hip06 SoC
++- libhns: HiSilicon Hip08+ SoC
+ - libipathverbs: QLogic InfiniPath HCA
+ - libirdma: Intel Ethernet Connection RDMA
+ - libmana: Microsoft Azure Network Adapter
+@@ -200,6 +215,7 @@ Device-specific plug-in ibverbs userspace drivers are included:
+ - librxe: A software implementation of the RoCE protocol
+ - libsiw: A software implementation of the iWarp protocol
+ - libvmw_pvrdma: VMware paravirtual RDMA device
++- libzrdma: ZTE Dinghai RDMA
+
+ %package -n libibverbs-utils
+ Summary: Examples for the libibverbs library
+@@ -575,6 +591,7 @@ fi
+ %dir %{_sysconfdir}/libibverbs.d
+ %dir %{_libdir}/libibverbs
+ %{_libdir}/libefa.so.*
++%{_libdir}/libhns.so.*
+ %{_libdir}/libibverbs*.so.*
+ %{_libdir}/libibverbs/*.so
+ %{_libdir}/libmana.so.*
+diff --git a/suse/rdma-core.spec b/suse/rdma-core.spec
+index d534dbc..e0539c5 100644
+--- a/suse/rdma-core.spec
++++ b/suse/rdma-core.spec
+@@ -35,6 +35,7 @@ License: BSD-2-Clause OR GPL-2.0-only
+ Group: Productivity/Networking/Other
+
+ %define efa_so_major 1
++%define hns_so_major 1
+ %define verbs_so_major 1
+ %define rdmacm_so_major 1
+ %define umad_so_major 3
+@@ -45,6 +46,7 @@ Group: Productivity/Networking/Other
+ %define mad_major 5
+
+ %define efa_lname libefa%{efa_so_major}
++%define hns_lname libhns%{hns_so_major}
+ %define verbs_lname libibverbs%{verbs_so_major}
+ %define rdmacm_lname librdmacm%{rdmacm_so_major}
+ %define umad_lname libibumad%{umad_so_major}
+@@ -159,6 +161,7 @@ Requires: %{umad_lname} = %{version}-%{release}
+ Requires: %{verbs_lname} = %{version}-%{release}
+ %if 0%{?dma_coherent}
+ Requires: %{efa_lname} = %{version}-%{release}
++Requires: %{hns_lname} = %{version}-%{release}
+ Requires: %{mana_lname} = %{version}-%{release}
+ Requires: %{mlx4_lname} = %{version}-%{release}
+ Requires: %{mlx5_lname} = %{version}-%{release}
+@@ -200,6 +203,7 @@ Requires: %{name}%{?_isa} = %{version}-%{release}
+ Obsoletes: libcxgb4-rdmav2 < %{version}-%{release}
+ Obsoletes: libefa-rdmav2 < %{version}-%{release}
+ Obsoletes: libhfi1verbs-rdmav2 < %{version}-%{release}
++Obsoletes: libhns-rdmav2 < %{version}-%{release}
+ Obsoletes: libipathverbs-rdmav2 < %{version}-%{release}
+ Obsoletes: libmana-rdmav2 < %{version}-%{release}
+ Obsoletes: libmlx4-rdmav2 < %{version}-%{release}
+@@ -209,6 +213,7 @@ Obsoletes: libocrdma-rdmav2 < %{version}-%{release}
+ Obsoletes: librxe-rdmav2 < %{version}-%{release}
+ %if 0%{?dma_coherent}
+ Requires: %{efa_lname} = %{version}-%{release}
++Requires: %{hns_lname} = %{version}-%{release}
+ Requires: %{mana_lname} = %{version}-%{release}
+ Requires: %{mlx4_lname} = %{version}-%{release}
+ Requires: %{mlx5_lname} = %{version}-%{release}
+@@ -228,7 +233,7 @@ Device-specific plug-in ibverbs userspace drivers are included:
+ - libcxgb4: Chelsio T4 iWARP HCA
+ - libefa: Amazon Elastic Fabric Adapter
+ - libhfi1: Intel Omni-Path HFI
+-- libhns: HiSilicon Hip06 SoC
++- libhns: HiSilicon Hip08+ SoC
+ - libipathverbs: QLogic InfiniPath HCA
+ - libirdma: Intel Ethernet Connection RDMA
+ - libmana: Microsoft Azure Network Adapter
+@@ -240,6 +245,7 @@ Device-specific plug-in ibverbs userspace drivers are included:
+ - librxe: A software implementation of the RoCE protocol
+ - libsiw: A software implementation of the iWarp protocol
+ - libvmw_pvrdma: VMware paravirtual RDMA device
++- libzrdma: ZTE Connection RDMA
+
+ %package -n %verbs_lname
+ Summary: Ibverbs runtime library
+@@ -256,6 +262,13 @@ Group: System/Libraries
+ %description -n %efa_lname
+ This package contains the efa runtime library.
+
++%package -n %hns_lname
++Summary: HNS runtime library
++Group: System/Libraries
++
++%description -n %hns_lname
++This package contains the hns runtime library.
++
+ %package -n %mana_lname
+ Summary: MANA runtime library
+ Group: System/Libraries
+@@ -508,6 +521,9 @@ rm -rf %{buildroot}/%{_sbindir}/srp_daemon.sh
+ %post -n %efa_lname -p /sbin/ldconfig
+ %postun -n %efa_lname -p /sbin/ldconfig
+
++%post -n %hns_lname -p /sbin/ldconfig
++%postun -n %hns_lname -p /sbin/ldconfig
++
+ %post -n %mana_lname -p /sbin/ldconfig
+ %postun -n %mana_lname -p /sbin/ldconfig
+
+@@ -700,6 +716,10 @@ done
+ %files -n %efa_lname
+ %{_libdir}/libefa*.so.*
+
++%files -n %hns_lname
++%defattr(-,root,root)
++%{_libdir}/libhns*.so.*
++
+ %files -n %mana_lname
+ %{_libdir}/libmana*.so.*
+
+diff --git a/toolchain.cmake b/toolchain.cmake
+new file mode 100644
+index 0000000..061d6f8
+--- /dev/null
++++ b/toolchain.cmake
+@@ -0,0 +1,12 @@
++set(CMAKE_SYSTEM_PROCESSOR aarch64)
++set(CMAKE_SYSTEM_NAME Linux)
++# 设置交叉编译的库文件路径
++set(CMAKE_CROSSCOMPILING TRUE)
++set(CMAKE_FIND_ROOT_PATH /opt/aarch64_cgslv6.01_gcc8.3.1_glibc2.28/aarch64-pc-linux-gnu)
++set(CMAKE_FIND_ROOT_PATH_MODE_PROGRAM NEVER)
++set(CMAKE_FIND_ROOT_PATH_MODE_LIBRARY ONLY)
++set(CMAKE_FIND_ROOT_PATH_MODE_INCLUDE ONLY)
++set(CMAKE_FIND_ROOT_PATH_MODE_PACKAGE ONLY)
++
++# 设置交叉编译器的路径
++set(CMAKE_C_COMPILER /opt/aarch64_cgslv6.01_gcc8.3.1_glibc2.28/bin/aarch64-pc-linux-gnu-gcc-8.3.1)
+--
+2.27.0
+
--
Gitee
From 6a730e4e91818616ba45bb18236b9617a4c29838 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=E6=9D=8E=E5=AF=8C=E8=89=B3?=
Date: Tue, 30 Jul 2024 18:04:22 +0800
Subject: [PATCH 3/3] Add ZTE Dinghai rdma-core driver
---
...r.patch => 0036-Add-ZTE-Dinghai-RDMA.patch | 13242 +++++-----------
LICENSE | 339 -
rdma-core.spec | 17 +-
3 files changed, 4143 insertions(+), 9455 deletions(-)
rename 0036-Add-ZTE-Dinghai-rdma-core-driver.patch => 0036-Add-ZTE-Dinghai-RDMA.patch (56%)
delete mode 100644 LICENSE
diff --git a/0036-Add-ZTE-Dinghai-rdma-core-driver.patch b/0036-Add-ZTE-Dinghai-RDMA.patch
similarity index 56%
rename from 0036-Add-ZTE-Dinghai-rdma-core-driver.patch
rename to 0036-Add-ZTE-Dinghai-RDMA.patch
index f963b05..293552d 100644
--- a/0036-Add-ZTE-Dinghai-rdma-core-driver.patch
+++ b/0036-Add-ZTE-Dinghai-RDMA.patch
@@ -1,101 +1,63 @@
-From 0293fc40d5d1e40dcdc05b2adf21d853ecaa14c8 Mon Sep 17 00:00:00 2001
+From d9d86e494d8091b99209b79482b089ae091e9170 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=E6=9D=8E=E5=AF=8C=E8=89=B3?=
-Date: Tue, 30 Jul 2024 16:20:03 +0800
-Subject: [PATCH] Add ZTE Dinghai rdma-core driver
+Date: Tue, 27 Aug 2024 18:34:26 +0800
+Subject: [PATCH] add ZTE Dinghai rdma driver
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+Signed-off-by: 李富艳
---
- CMakeLists.txt | 4 +
+ CMakeLists.txt | 1 +
MAINTAINERS | 4 +
README.md | 1 +
- debian/control | 3 +-
+ debian/control | 1 +
debian/copyright | 4 +
- debian/ibverbs-providers.install | 2 +
- debian/ibverbs-providers.lintian-overrides | 4 +-
- debian/ibverbs-providers.symbols | 6 +
- debian/libibverbs-dev.install | 7 +
+ debian/ibverbs-providers.install | 1 +
+ debian/libibverbs-dev.install | 4 +
kernel-headers/CMakeLists.txt | 4 +
- kernel-headers/rdma/hns-abi.h | 103 +-
kernel-headers/rdma/ib_user_ioctl_verbs.h | 1 +
kernel-headers/rdma/zxdh-abi.h | 143 +
kernel-headers/rdma/zxdh_user_ioctl_cmds.h | 56 +
kernel-headers/rdma/zxdh_user_ioctl_verbs.h | 34 +
- librdmacm/cma.c | 2 +-
- providers/bnxt_re/verbs.c | 2 +-
- providers/hns/CMakeLists.txt | 9 +-
- providers/hns/hns_roce_u.c | 360 ++-
- providers/hns/hns_roce_u.h | 198 +-
- providers/hns/hns_roce_u_abi.h | 4 +
- providers/hns/hns_roce_u_buf.c | 466 +++
- providers/hns/hns_roce_u_db.c | 2 +
- providers/hns/hns_roce_u_db.h | 8 +-
- providers/hns/hns_roce_u_hw_v2.c | 583 +++-
- providers/hns/hns_roce_u_hw_v2.h | 4 +
- providers/hns/hns_roce_u_verbs.c | 617 +++-
- providers/hns/hnsdv.h | 85 +
- providers/hns/libhns.map | 10 +
- providers/zrdma/CMakeLists.txt | 18 +
- providers/zrdma/abi.h | 36 +
- providers/zrdma/defs.h | 388 +++
+ libibverbs/verbs.h | 1 +
+ providers/zrdma/CMakeLists.txt | 17 +
providers/zrdma/libzrdma.map | 16 +
- providers/zrdma/osdep.h | 21 +
- providers/zrdma/private_verbs_cmd.c | 203 ++
+ providers/zrdma/main.c | 202 ++
+ providers/zrdma/main.h | 223 ++
+ providers/zrdma/private_verbs_cmd.c | 201 ++
providers/zrdma/private_verbs_cmd.h | 24 +
- providers/zrdma/status.h | 75 +
- providers/zrdma/uk.c | 2616 +++++++++++++++
- providers/zrdma/umain.c | 236 ++
- providers/zrdma/umain.h | 228 ++
- providers/zrdma/user.h | 572 ++++
- providers/zrdma/uverbs.c | 3209 +++++++++++++++++++
- providers/zrdma/zxdh.h | 53 +
+ providers/zrdma/zxdh_abi.h | 36 +
+ providers/zrdma/zxdh_defs.h | 399 +++
providers/zrdma/zxdh_devids.h | 17 +
providers/zrdma/zxdh_dv.h | 75 +
- rdma-tools/man/CMakeLists.txt | 4 +
- rdma-tools/man/zxdh_modify_sport.1 | 42 +
- rdma-tools/man/zxdh_set_log.1 | 37 +
- rdma-tools/scripts/CMakeLists.txt | 55 +
- rdma-tools/scripts/ibdev2netdev | 268 ++
- rdma-tools/scripts/show_gids | 110 +
- rdma-tools/tools/CMakeLists.txt | 13 +
- rdma-tools/tools/zxdh_modify_sport.c | 169 +
- rdma-tools/tools/zxdh_set_log.c | 173 +
- redhat/rdma-core.spec | 19 +-
- suse/rdma-core.spec | 22 +-
- toolchain.cmake | 12 +
- 57 files changed, 11243 insertions(+), 194 deletions(-)
+ providers/zrdma/zxdh_hw.c | 2596 +++++++++++++++
+ providers/zrdma/zxdh_status.h | 75 +
+ providers/zrdma/zxdh_verbs.c | 3193 +++++++++++++++++++
+ providers/zrdma/zxdh_verbs.h | 611 ++++
+ redhat/rdma-core.spec | 4 +
+ suse/rdma-core.spec | 1 +
+ 29 files changed, 7945 insertions(+)
create mode 100644 kernel-headers/rdma/zxdh-abi.h
create mode 100644 kernel-headers/rdma/zxdh_user_ioctl_cmds.h
create mode 100644 kernel-headers/rdma/zxdh_user_ioctl_verbs.h
- create mode 100644 providers/hns/hnsdv.h
- create mode 100644 providers/hns/libhns.map
create mode 100644 providers/zrdma/CMakeLists.txt
- create mode 100644 providers/zrdma/abi.h
- create mode 100644 providers/zrdma/defs.h
create mode 100644 providers/zrdma/libzrdma.map
- create mode 100644 providers/zrdma/osdep.h
+ create mode 100644 providers/zrdma/main.c
+ create mode 100644 providers/zrdma/main.h
create mode 100644 providers/zrdma/private_verbs_cmd.c
create mode 100644 providers/zrdma/private_verbs_cmd.h
- create mode 100644 providers/zrdma/status.h
- create mode 100644 providers/zrdma/uk.c
- create mode 100644 providers/zrdma/umain.c
- create mode 100644 providers/zrdma/umain.h
- create mode 100644 providers/zrdma/user.h
- create mode 100644 providers/zrdma/uverbs.c
- create mode 100644 providers/zrdma/zxdh.h
+ create mode 100644 providers/zrdma/zxdh_abi.h
+ create mode 100644 providers/zrdma/zxdh_defs.h
create mode 100644 providers/zrdma/zxdh_devids.h
create mode 100644 providers/zrdma/zxdh_dv.h
- create mode 100644 rdma-tools/man/CMakeLists.txt
- create mode 100644 rdma-tools/man/zxdh_modify_sport.1
- create mode 100644 rdma-tools/man/zxdh_set_log.1
- create mode 100644 rdma-tools/scripts/CMakeLists.txt
- create mode 100644 rdma-tools/scripts/ibdev2netdev
- create mode 100644 rdma-tools/scripts/show_gids
- create mode 100644 rdma-tools/tools/CMakeLists.txt
- create mode 100644 rdma-tools/tools/zxdh_modify_sport.c
- create mode 100644 rdma-tools/tools/zxdh_set_log.c
- create mode 100644 toolchain.cmake
+ create mode 100644 providers/zrdma/zxdh_hw.c
+ create mode 100644 providers/zrdma/zxdh_status.h
+ create mode 100644 providers/zrdma/zxdh_verbs.c
+ create mode 100644 providers/zrdma/zxdh_verbs.h
diff --git a/CMakeLists.txt b/CMakeLists.txt
-index 98985e7..bf3097d 100644
+index 98985e7..432a650 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -748,6 +748,7 @@ add_subdirectory(providers/mthca)
@@ -106,16 +68,6 @@ index 98985e7..bf3097d 100644
endif()
add_subdirectory(providers/hfi1verbs)
-@@ -762,6 +763,9 @@ add_subdirectory(libibnetdisc/man)
- add_subdirectory(infiniband-diags)
- add_subdirectory(infiniband-diags/scripts)
- add_subdirectory(infiniband-diags/man)
-+add_subdirectory(rdma-tools/scripts)
-+add_subdirectory(rdma-tools/tools)
-+add_subdirectory(rdma-tools/man)
-
- if (CYTHON_EXECUTABLE)
- add_subdirectory(pyverbs)
diff --git a/MAINTAINERS b/MAINTAINERS
index 4b24117..394c4da 100644
--- a/MAINTAINERS
@@ -144,23 +96,14 @@ index 928bdc4..8f47d3c 100644
Additional service daemons are provided for:
- srp_daemon (ib_srp.ko)
diff --git a/debian/control b/debian/control
-index 160824f..4dda44e 100644
+index 160824f..f15ba96 100644
--- a/debian/control
+++ b/debian/control
-@@ -87,7 +87,7 @@ Description: User space provider drivers for libibverbs
- - efa: Amazon Elastic Fabric Adapter
- - erdma: Alibaba Elastic RDMA (iWarp) Adapter
- - hfi1verbs: Intel Omni-Path HFI
-- - hns: HiSilicon Hip06 SoC
-+ - hns: HiSilicon Hip08+ SoC
- - ipathverbs: QLogic InfiniPath HCAs
- - irdma: Intel Ethernet Connection RDMA
- - mana: Microsoft Azure Network Adapter
@@ -99,6 +99,7 @@ Description: User space provider drivers for libibverbs
- rxe: A software implementation of the RoCE protocol
- siw: A software implementation of the iWarp protocol
- vmw_pvrdma: VMware paravirtual RDMA device
-+ - zrdma: ZTE Dinghai RDMA
++ - zrdma: ZTE Connection RDMA
Package: ibverbs-utils
Architecture: linux-any
@@ -180,75 +123,33 @@ index 36ac71e..5c9e5a0 100644
Copyright: 2004-2016, Intel Corporation.
License: BSD-MIT or GPL-2
diff --git a/debian/ibverbs-providers.install b/debian/ibverbs-providers.install
-index a003a30..360516f 100644
+index a003a30..9a53768 100644
--- a/debian/ibverbs-providers.install
+++ b/debian/ibverbs-providers.install
-@@ -1,6 +1,8 @@
- etc/libibverbs.d/
- usr/lib/*/libefa.so.*
- usr/lib/*/libibverbs/lib*-rdmav*.so
-+usr/lib/*/libhns.so.*
+@@ -4,3 +4,4 @@ usr/lib/*/libibverbs/lib*-rdmav*.so
usr/lib/*/libmana.so.*
usr/lib/*/libmlx4.so.*
usr/lib/*/libmlx5.so.*
+usr/lib/*/libzrdma.so.*
-diff --git a/debian/ibverbs-providers.lintian-overrides b/debian/ibverbs-providers.lintian-overrides
-index 5815058..fd73a76 100644
---- a/debian/ibverbs-providers.lintian-overrides
-+++ b/debian/ibverbs-providers.lintian-overrides
-@@ -1,2 +1,2 @@
--# libefa, libmana, libmlx4 and libmlx5 are ibverbs provider that provides more functions.
--ibverbs-providers: package-name-doesnt-match-sonames libefa1 libmana1 libmlx4-1 libmlx5-1
-+# libefa, libhns, libmana, libmlx4 and libmlx5 are ibverbs provider that provides more functions.
-+ibverbs-providers: package-name-doesnt-match-sonames libefa1 libhns-1 libmana1 libmlx4-1 libmlx5-1
-diff --git a/debian/ibverbs-providers.symbols b/debian/ibverbs-providers.symbols
-index 72361bd..d2c0989 100644
---- a/debian/ibverbs-providers.symbols
-+++ b/debian/ibverbs-providers.symbols
-@@ -174,6 +174,12 @@ libefa.so.1 ibverbs-providers #MINVER#
- efadv_cq_from_ibv_cq_ex@EFA_1.2 43
- efadv_create_cq@EFA_1.2 43
- efadv_query_mr@EFA_1.3 50
-+libhns.so.1 ibverbs-providers #MINVER#
-+* Build-Depends-Package: libibverbs-dev
-+ HNS_1.0@HNS_1.0 51
-+ hnsdv_is_supported@HNS_1.0 51
-+ hnsdv_create_qp@HNS_1.0 51
-+ hnsdv_query_device@HNS_1.0 51
- libmana.so.1 ibverbs-providers #MINVER#
- * Build-Depends-Package: libibverbs-dev
- MANA_1.0@MANA_1.0 41
diff --git a/debian/libibverbs-dev.install b/debian/libibverbs-dev.install
-index 5f2ffd5..ebc3df9 100644
+index 5f2ffd5..71e5514 100644
--- a/debian/libibverbs-dev.install
+++ b/debian/libibverbs-dev.install
-@@ -1,5 +1,6 @@
- usr/include/infiniband/arch.h
- usr/include/infiniband/efadv.h
-+usr/include/infiniband/hnsdv.h
- usr/include/infiniband/ib_user_ioctl_verbs.h
- usr/include/infiniband/manadv.h
- usr/include/infiniband/mlx4dv.h
-@@ -12,9 +13,12 @@ usr/include/infiniband/sa.h
+@@ -12,6 +12,8 @@ usr/include/infiniband/sa.h
usr/include/infiniband/tm_types.h
usr/include/infiniband/verbs.h
usr/include/infiniband/verbs_api.h
+usr/include/infiniband/zxdh_dv.h
++usr/include/infiniband/zxdh_devids.h
usr/lib/*/lib*-rdmav*.a
usr/lib/*/libefa.a
usr/lib/*/libefa.so
-+usr/lib/*/libhns.a
-+usr/lib/*/libhns.so
- usr/lib/*/libibverbs*.so
- usr/lib/*/libibverbs.a
- usr/lib/*/libmana.a
-@@ -23,11 +27,14 @@ usr/lib/*/libmlx4.a
+@@ -23,11 +25,13 @@ usr/lib/*/libmlx4.a
usr/lib/*/libmlx4.so
usr/lib/*/libmlx5.a
usr/lib/*/libmlx5.so
+usr/lib/*/libzrdma.so
usr/lib/*/pkgconfig/libefa.pc
-+usr/lib/*/pkgconfig/libhns.pc
usr/lib/*/pkgconfig/libibverbs.pc
usr/lib/*/pkgconfig/libmana.pc
usr/lib/*/pkgconfig/libmlx4.pc
@@ -279,189 +180,26 @@ index 82c191c..9ceac31 100644
)
publish_headers(infiniband
-diff --git a/kernel-headers/rdma/hns-abi.h b/kernel-headers/rdma/hns-abi.h
-index c996e15..8a8f2e4 100644
---- a/kernel-headers/rdma/hns-abi.h
-+++ b/kernel-headers/rdma/hns-abi.h
-@@ -73,21 +73,38 @@ struct hns_roce_ib_create_srq_resp {
- __u32 cap_flags; /* Use enum hns_roce_srq_cap_flags */
- };
-
-+enum hns_roce_congest_type_flags {
-+ HNS_ROCE_CREATE_QP_FLAGS_DCQCN,
-+ HNS_ROCE_CREATE_QP_FLAGS_LDCP,
-+ HNS_ROCE_CREATE_QP_FLAGS_HC3,
-+ HNS_ROCE_CREATE_QP_FLAGS_DIP,
-+};
-+
-+enum hns_roce_create_qp_comp_mask {
-+ HNS_ROCE_CREATE_QP_MASK_CONGEST_TYPE = 1 << 0,
-+};
-+
- struct hns_roce_ib_create_qp {
- __aligned_u64 buf_addr;
- __aligned_u64 db_addr;
- __u8 log_sq_bb_count;
- __u8 log_sq_stride;
- __u8 sq_no_prefetch;
-- __u8 reserved[5];
-+ __u8 pageshift;
-+ __u8 reserved[4];
- __aligned_u64 sdb_addr;
-+ __aligned_u64 comp_mask; /* Use enum hns_roce_create_qp_comp_mask */
-+ __aligned_u64 create_flags;
-+ __aligned_u64 cong_type_flags;
- };
-
- enum hns_roce_qp_cap_flags {
- HNS_ROCE_QP_CAP_RQ_RECORD_DB = 1 << 0,
- HNS_ROCE_QP_CAP_SQ_RECORD_DB = 1 << 1,
- HNS_ROCE_QP_CAP_OWNER_DB = 1 << 2,
-+ HNS_ROCE_QP_CAP_DYNAMIC_CTX_ATTACH = 1 << 4,
- HNS_ROCE_QP_CAP_DIRECT_WQE = 1 << 5,
-+ HNS_ROCE_QP_CAP_DYNAMIC_CTX_DETACH = 1 << 6,
- };
-
- struct hns_roce_ib_create_qp_resp {
-@@ -95,16 +112,28 @@ struct hns_roce_ib_create_qp_resp {
- __aligned_u64 dwqe_mmap_key;
- };
-
-+struct hns_roce_ib_modify_qp_resp {
-+ __u8 tc_mode;
-+ __u8 priority;
-+ __u8 reserved[6];
-+ __u32 dcan;
-+ __u32 rsv2;
-+};
-+
- enum {
- HNS_ROCE_EXSGE_FLAGS = 1 << 0,
- HNS_ROCE_RQ_INLINE_FLAGS = 1 << 1,
- HNS_ROCE_CQE_INLINE_FLAGS = 1 << 2,
-+ HNS_ROCE_UCTX_CONFIG_DCA = 1 << 3,
-+ HNS_ROCE_UCTX_DYN_QP_PGSZ = 1 << 4,
- };
-
- enum {
- HNS_ROCE_RSP_EXSGE_FLAGS = 1 << 0,
- HNS_ROCE_RSP_RQ_INLINE_FLAGS = 1 << 1,
- HNS_ROCE_RSP_CQE_INLINE_FLAGS = 1 << 2,
-+ HNS_ROCE_UCTX_RSP_DCA_FLAGS = HNS_ROCE_UCTX_CONFIG_DCA,
-+ HNS_ROCE_UCTX_RSP_DYN_QP_PGSZ = HNS_ROCE_UCTX_DYN_QP_PGSZ,
- };
-
- struct hns_roce_ib_alloc_ucontext_resp {
-@@ -114,10 +143,22 @@ struct hns_roce_ib_alloc_ucontext_resp {
- __u32 reserved;
- __u32 config;
- __u32 max_inline_data;
-+ __u8 congest_type;
-+ __u8 reserved0[7];
-+ __u32 dca_qps;
-+ __u32 dca_mmap_size;
-+ __aligned_u64 dca_mmap_key;
-+ __aligned_u64 reset_mmap_key;
-+};
-+
-+enum hns_roce_uctx_comp_mask {
-+ HNS_ROCE_ALLOC_UCTX_COMP_DCA_MAX_QPS = 1 << 0,
- };
-
- struct hns_roce_ib_alloc_ucontext {
- __u32 config;
-+ __u32 comp; /* use hns_roce_uctx_comp_mask */
-+ __u32 dca_max_qps;
- __u32 reserved;
- };
-
-@@ -127,7 +168,65 @@ struct hns_roce_ib_alloc_pd_resp {
-
- struct hns_roce_ib_create_ah_resp {
- __u8 dmac[6];
-- __u8 reserved[2];
-+ __u8 priority;
-+ __u8 tc_mode;
-+};
-+
-+#define UVERBS_ID_NS_MASK 0xF000
-+#define UVERBS_ID_NS_SHIFT 12
-+
-+enum hns_ib_objects {
-+ HNS_IB_OBJECT_DCA_MEM = (1U << UVERBS_ID_NS_SHIFT),
-+};
-+
-+enum hns_ib_dca_mem_methods {
-+ HNS_IB_METHOD_DCA_MEM_REG = (1U << UVERBS_ID_NS_SHIFT),
-+ HNS_IB_METHOD_DCA_MEM_DEREG,
-+ HNS_IB_METHOD_DCA_MEM_SHRINK,
-+ HNS_IB_METHOD_DCA_MEM_ATTACH,
-+ HNS_IB_METHOD_DCA_MEM_DETACH,
-+ HNS_IB_METHOD_DCA_MEM_QUERY,
-+};
-+
-+enum hns_ib_dca_mem_reg_attrs {
-+ HNS_IB_ATTR_DCA_MEM_REG_HANDLE = (1U << UVERBS_ID_NS_SHIFT),
-+ HNS_IB_ATTR_DCA_MEM_REG_FLAGS,
-+ HNS_IB_ATTR_DCA_MEM_REG_LEN,
-+ HNS_IB_ATTR_DCA_MEM_REG_ADDR,
-+ HNS_IB_ATTR_DCA_MEM_REG_KEY,
-+};
-+
-+enum hns_ib_dca_mem_dereg_attrs {
-+ HNS_IB_ATTR_DCA_MEM_DEREG_HANDLE = (1U << UVERBS_ID_NS_SHIFT),
-+};
-+
-+enum hns_ib_dca_mem_shrink_attrs {
-+ HNS_IB_ATTR_DCA_MEM_SHRINK_HANDLE = (1U << UVERBS_ID_NS_SHIFT),
-+ HNS_IB_ATTR_DCA_MEM_SHRINK_RESERVED_SIZE,
-+ HNS_IB_ATTR_DCA_MEM_SHRINK_OUT_FREE_KEY,
-+ HNS_IB_ATTR_DCA_MEM_SHRINK_OUT_FREE_MEMS,
-+};
-+
-+enum hns_ib_dca_mem_attach_attrs {
-+ HNS_IB_ATTR_DCA_MEM_ATTACH_HANDLE = (1U << UVERBS_ID_NS_SHIFT),
-+ HNS_IB_ATTR_DCA_MEM_ATTACH_SQ_OFFSET,
-+ HNS_IB_ATTR_DCA_MEM_ATTACH_SGE_OFFSET,
-+ HNS_IB_ATTR_DCA_MEM_ATTACH_RQ_OFFSET,
-+ HNS_IB_ATTR_DCA_MEM_ATTACH_OUT_ALLOC_FLAGS,
-+ HNS_IB_ATTR_DCA_MEM_ATTACH_OUT_ALLOC_PAGES,
-+};
-+
-+enum hns_ib_dca_mem_detach_attrs {
-+ HNS_IB_ATTR_DCA_MEM_DETACH_HANDLE = (1U << UVERBS_ID_NS_SHIFT),
-+ HNS_IB_ATTR_DCA_MEM_DETACH_SQ_INDEX,
-+};
-+
-+enum hns_ib_dca_mem_query_attrs {
-+ HNS_IB_ATTR_DCA_MEM_QUERY_HANDLE = (1U << UVERBS_ID_NS_SHIFT),
-+ HNS_IB_ATTR_DCA_MEM_QUERY_PAGE_INDEX,
-+ HNS_IB_ATTR_DCA_MEM_QUERY_OUT_KEY,
-+ HNS_IB_ATTR_DCA_MEM_QUERY_OUT_OFFSET,
-+ HNS_IB_ATTR_DCA_MEM_QUERY_OUT_PAGE_COUNT,
- };
-
- #endif /* HNS_ABI_USER_H */
diff --git a/kernel-headers/rdma/ib_user_ioctl_verbs.h b/kernel-headers/rdma/ib_user_ioctl_verbs.h
-index fe15bc7..df1b2b6 100644
+index fe15bc7..17e6326 100644
--- a/kernel-headers/rdma/ib_user_ioctl_verbs.h
+++ b/kernel-headers/rdma/ib_user_ioctl_verbs.h
@@ -255,6 +255,7 @@ enum rdma_driver_id {
RDMA_DRIVER_SIW,
RDMA_DRIVER_ERDMA,
RDMA_DRIVER_MANA,
-+ RDMA_DRIVER_ZXDH
++ RDMA_DRIVER_ZXDH,
};
enum ib_uverbs_gid_type {
diff --git a/kernel-headers/rdma/zxdh-abi.h b/kernel-headers/rdma/zxdh-abi.h
new file mode 100644
-index 0000000..8e7fa3d
+index 0000000..665f874
--- /dev/null
+++ b/kernel-headers/rdma/zxdh-abi.h
@@ -0,0 +1,143 @@
+/* SPDX-License-Identifier: GPL-2.0 or Linux-OpenIB */
-+/* Copyright (C) 2019 - 2020 Intel Corporation */
++/* Copyright (c) 2024 ZTE Corporation. All rights reserved. */
+
+#ifndef ZXDH_ABI_H
+#define ZXDH_ABI_H
@@ -705,8065 +443,3977 @@ index 0000000..bc0e812
+};
+
+#endif
-diff --git a/librdmacm/cma.c b/librdmacm/cma.c
-index 7b924bd..0a631bd 100644
---- a/librdmacm/cma.c
-+++ b/librdmacm/cma.c
-@@ -311,7 +311,7 @@ static void remove_cma_dev(struct cma_device *cma_dev)
-
- static int dev_cmp(const void *a, const void *b)
- {
-- return (int)(*(char *const *)a - *(char *const *)b);
-+ return (*(uintptr_t *)a > *(uintptr_t *)b) - (*(uintptr_t *)a < *(uintptr_t *)b);
- }
-
- static int sync_devices_list(void)
-diff --git a/providers/bnxt_re/verbs.c b/providers/bnxt_re/verbs.c
-index 55d5284..a74d32c 100644
---- a/providers/bnxt_re/verbs.c
-+++ b/providers/bnxt_re/verbs.c
-@@ -1233,7 +1233,7 @@ static int bnxt_re_alloc_queues(struct bnxt_re_context *cntx,
- /* psn_depth extra entries of size que->stride */
- psn_size = bnxt_re_get_psne_size(qp->cntx);
- psn_depth = (nswr * psn_size) / que->stride;
-- que->pad_stride_log2 = (uint32_t)ilog32(psn_size);
-+ que->pad_stride_log2 = ilog32(psn_size - 1);
- if ((nswr * psn_size) % que->stride)
- psn_depth++;
- que->depth += psn_depth;
-diff --git a/providers/hns/CMakeLists.txt b/providers/hns/CMakeLists.txt
-index 7aaca75..58139ae 100644
---- a/providers/hns/CMakeLists.txt
-+++ b/providers/hns/CMakeLists.txt
-@@ -1,7 +1,14 @@
--rdma_provider(hns
-+rdma_shared_provider(hns libhns.map
-+ 1 1.0.${PACKAGE_VERSION}
- hns_roce_u.c
- hns_roce_u_buf.c
- hns_roce_u_db.c
- hns_roce_u_hw_v2.c
- hns_roce_u_verbs.c
- )
+diff --git a/libibverbs/verbs.h b/libibverbs/verbs.h
+index 78129fd..be0e76b 100644
+--- a/libibverbs/verbs.h
++++ b/libibverbs/verbs.h
+@@ -2275,6 +2275,7 @@ extern const struct verbs_device_ops verbs_provider_qedr;
+ extern const struct verbs_device_ops verbs_provider_rxe;
+ extern const struct verbs_device_ops verbs_provider_siw;
+ extern const struct verbs_device_ops verbs_provider_vmw_pvrdma;
++extern const struct verbs_device_ops verbs_provider_zrdma;
+ extern const struct verbs_device_ops verbs_provider_all;
+ extern const struct verbs_device_ops verbs_provider_none;
+ void ibv_static_providers(void *unused, ...);
+diff --git a/providers/zrdma/CMakeLists.txt b/providers/zrdma/CMakeLists.txt
+new file mode 100644
+index 0000000..1af572a
+--- /dev/null
++++ b/providers/zrdma/CMakeLists.txt
+@@ -0,0 +1,17 @@
++# SPDX-License-Identifier: GPL-2.0 or Linux-OpenIB */
++# Copyright (c) 2024 ZTE Corporation. All rights reserved. */
++set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -Werror")
++rdma_shared_provider(zrdma libzrdma.map
++ 1 1.1.${PACKAGE_VERSION}
++ zxdh_hw.c
++ main.c
++ zxdh_verbs.c
++ private_verbs_cmd.c
++)
+
+publish_headers(infiniband
-+ hnsdv.h
++ zxdh_dv.h
+)
+
-+rdma_pkg_config("hns" "libibverbs" "${CMAKE_THREAD_LIBS_INIT}")
-diff --git a/providers/hns/hns_roce_u.c b/providers/hns/hns_roce_u.c
-index 266e73e..e219b9e 100644
---- a/providers/hns/hns_roce_u.c
-+++ b/providers/hns/hns_roce_u.c
-@@ -53,6 +53,8 @@ static const struct verbs_match_ent hca_table[] = {
- VERBS_PCI_MATCH(PCI_VENDOR_ID_HUAWEI, 0xA226, &hns_roce_u_hw_v2),
- VERBS_PCI_MATCH(PCI_VENDOR_ID_HUAWEI, 0xA227, &hns_roce_u_hw_v2),
- VERBS_PCI_MATCH(PCI_VENDOR_ID_HUAWEI, 0xA228, &hns_roce_u_hw_v2),
-+ VERBS_PCI_MATCH(PCI_VENDOR_ID_HUAWEI, 0xA22C, &hns_roce_u_hw_v2),
-+ VERBS_PCI_MATCH(PCI_VENDOR_ID_HUAWEI, 0xA22D, &hns_roce_u_hw_v2),
- VERBS_PCI_MATCH(PCI_VENDOR_ID_HUAWEI, 0xA22F, &hns_roce_u_hw_v2),
- {}
- };
-@@ -67,7 +69,7 @@ static const struct verbs_context_ops hns_common_ops = {
- .create_qp = hns_roce_u_create_qp,
- .create_qp_ex = hns_roce_u_create_qp_ex,
- .dealloc_mw = hns_roce_u_dealloc_mw,
-- .dealloc_pd = hns_roce_u_free_pd,
-+ .dealloc_pd = hns_roce_u_dealloc_pd,
- .dereg_mr = hns_roce_u_dereg_mr,
- .destroy_cq = hns_roce_u_destroy_cq,
- .modify_cq = hns_roce_u_modify_cq,
-@@ -88,8 +90,43 @@ static const struct verbs_context_ops hns_common_ops = {
- .close_xrcd = hns_roce_u_close_xrcd,
- .open_qp = hns_roce_u_open_qp,
- .get_srq_num = hns_roce_u_get_srq_num,
-+ .alloc_td = hns_roce_u_alloc_td,
-+ .dealloc_td = hns_roce_u_dealloc_td,
-+ .alloc_parent_domain = hns_roce_u_alloc_pad,
- };
-
-+static struct {
-+ uint32_t device_id;
-+ enum hns_device_link_type link_type;
-+} device_link_types[] = {
-+ {0xA222, HNS_DEV_LINK_TYPE_ETH},
-+ {0xA223, HNS_DEV_LINK_TYPE_ETH},
-+ {0xA224, HNS_DEV_LINK_TYPE_ETH},
-+ {0xA225, HNS_DEV_LINK_TYPE_ETH},
-+ {0xA226, HNS_DEV_LINK_TYPE_ETH},
-+ {0xA228, HNS_DEV_LINK_TYPE_ETH},
-+ {0xA22F, HNS_DEV_LINK_TYPE_ETH},
-+ {0xA227, HNS_DEV_LINK_TYPE_HCCS},
-+ {0xA22C, HNS_DEV_LINK_TYPE_HCCS},
-+ {0xA22D, HNS_DEV_LINK_TYPE_HCCS}
-+};
-+
-+static int get_link_type(uint32_t device_id,
-+ enum hns_device_link_type *link_type)
-+{
-+ int i;
-+
-+ for (i = 0; i < ARRAY_SIZE(device_link_types); i++) {
-+ if (device_id == device_link_types[i].device_id) {
-+ *link_type = device_link_types[i].link_type;
-+ return 0;
-+ }
-+ }
-+
-+ return ENOENT;
-+}
+
++rdma_pkg_config("zrdma" "libibverbs" "${CMAKE_THREAD_LIBS_INIT}")
+diff --git a/providers/zrdma/libzrdma.map b/providers/zrdma/libzrdma.map
+new file mode 100644
+index 0000000..f95de4b
+--- /dev/null
++++ b/providers/zrdma/libzrdma.map
+@@ -0,0 +1,16 @@
++/* Export symbols should be added below according to
++ Documentation/versioning.md document. */
++ZRDMA_1.0 {
++ global:
++ zxdh_get_log_trace_switch;
++ local: *;
++};
+
- static uint32_t calc_table_shift(uint32_t entry_count, uint32_t size_shift)
- {
- uint32_t count_shift = hr_ilog32(entry_count);
-@@ -97,50 +134,189 @@ static uint32_t calc_table_shift(uint32_t entry_count, uint32_t size_shift)
- return count_shift > size_shift ? count_shift - size_shift : 0;
- }
-
--static struct verbs_context *hns_roce_alloc_context(struct ibv_device *ibdev,
-- int cmd_fd,
-- void *private_data)
-+static int hns_roce_mmap(struct hns_roce_device *hr_dev,
-+ struct hns_roce_context *context, int cmd_fd)
- {
-- struct hns_roce_device *hr_dev = to_hr_dev(ibdev);
-- struct hns_roce_alloc_ucontext_resp resp = {};
-- struct hns_roce_alloc_ucontext cmd = {};
-- struct ibv_device_attr dev_attrs;
-- struct hns_roce_context *context;
-- int i;
-+ int page_size = hr_dev->page_size;
-
-- context = verbs_init_and_alloc_context(ibdev, cmd_fd, context, ibv_ctx,
-- RDMA_DRIVER_HNS);
-- if (!context)
-+ context->uar = mmap(NULL, page_size, PROT_READ | PROT_WRITE,
-+ MAP_SHARED, cmd_fd, 0);
-+ if (context->uar == MAP_FAILED)
-+ return -ENOMEM;
++ZRDMA_1.1 {
++ global:
++ zxdh_set_log_trace_switch;
++ zxdh_modify_qp_udp_sport;
++ zxdh_query_qpc;
++ zxdh_modify_qpc;
++ zxdh_reset_qp;
++} ZRDMA_1.0;
+diff --git a/providers/zrdma/main.c b/providers/zrdma/main.c
+new file mode 100644
+index 0000000..a61d134
+--- /dev/null
++++ b/providers/zrdma/main.c
+@@ -0,0 +1,202 @@
++// SPDX-License-Identifier: GPL-2.0 or Linux-OpenIB
++/* Copyright (c) 2024 ZTE Corporation. All rights reserved. */
++#include
++#include
++#include
++#include
++#include
++#include
++#include
++#include
++#include
++#include
++#include "zxdh_devids.h"
++#include "main.h"
++#include "zxdh_abi.h"
++#include "private_verbs_cmd.h"
+
-+ return 0;
-+}
++#define ZXDH_HCA(v, d) VERBS_PCI_MATCH(v, d, NULL)
++static const struct verbs_match_ent hca_table[] = {
++ VERBS_DRIVER_ID(RDMA_DRIVER_ZXDH),
++ ZXDH_HCA(PCI_VENDOR_ID_ZXDH_EVB, ZXDH_DEV_ID_ADAPTIVE_EVB_PF),
++ ZXDH_HCA(PCI_VENDOR_ID_ZXDH_EVB, ZXDH_DEV_ID_ADAPTIVE_EVB_VF),
++ ZXDH_HCA(PCI_VENDOR_ID_ZXDH_E312, ZXDH_DEV_ID_ADAPTIVE_E312_PF),
++ ZXDH_HCA(PCI_VENDOR_ID_ZXDH_E312, ZXDH_DEV_ID_ADAPTIVE_E312_VF),
++ ZXDH_HCA(PCI_VENDOR_ID_ZXDH_X512, ZXDH_DEV_ID_ADAPTIVE_X512_PF),
++ ZXDH_HCA(PCI_VENDOR_ID_ZXDH_X512, ZXDH_DEV_ID_ADAPTIVE_X512_VF),
++ {}
++};
+
-+static int mmap_dca(struct hns_roce_context *ctx, int cmd_fd,
-+ int page_size, size_t size, uint64_t mmap_key)
++/**
++ * zxdh_ufree_context - free context that was allocated
++ * @ibctx: context allocated ptr
++ */
++static void zxdh_ufree_context(struct ibv_context *ibctx)
+{
-+ struct hns_roce_dca_ctx *dca_ctx = &ctx->dca_ctx;
-+ void *addr;
-+
-+ addr = mmap(NULL, size, PROT_READ | PROT_WRITE, MAP_SHARED, cmd_fd,
-+ mmap_key);
-+ if (addr == MAP_FAILED) {
-+ verbs_err(&ctx->ibv_ctx, "failed to mmap() dca prime qp.\n");
-+ return -EINVAL;
-+ }
++ struct zxdh_uvcontext *iwvctx;
+
-+ dca_ctx->buf_status = addr;
-+ dca_ctx->sync_status = addr + size / 2;
++ iwvctx = container_of(ibctx, struct zxdh_uvcontext, ibv_ctx.context);
+
-+ return 0;
++ zxdh_ufree_pd(&iwvctx->iwupd->ibv_pd);
++ zxdh_munmap(iwvctx->sq_db);
++ zxdh_munmap(iwvctx->cq_db);
++ verbs_uninit_context(&iwvctx->ibv_ctx);
++ free(iwvctx);
+}
+
-+struct ibv_context *hnsdv_open_device(struct ibv_device *device,
-+ struct hnsdv_context_attr *attr)
-+{
-+ if (!is_hns_dev(device)) {
-+ errno = EOPNOTSUPP;
- return NULL;
-+ }
-
-- cmd.config |= HNS_ROCE_EXSGE_FLAGS | HNS_ROCE_RQ_INLINE_FLAGS |
-- HNS_ROCE_CQE_INLINE_FLAGS;
-- if (ibv_cmd_get_context(&context->ibv_ctx, &cmd.ibv_cmd, sizeof(cmd),
-- &resp.ibv_resp, sizeof(resp)))
-- goto err_free;
-+ return verbs_open_device(device, attr);
-+}
++static const struct verbs_context_ops zxdh_uctx_ops = {
++ .alloc_mw = zxdh_ualloc_mw,
++ .alloc_pd = zxdh_ualloc_pd,
++ .attach_mcast = zxdh_uattach_mcast,
++ .bind_mw = zxdh_ubind_mw,
++ .cq_event = zxdh_cq_event,
++ .create_ah = zxdh_ucreate_ah,
++ .create_cq = zxdh_ucreate_cq,
++ .create_cq_ex = zxdh_ucreate_cq_ex,
++ .create_qp = zxdh_ucreate_qp,
++ .create_qp_ex = zxdh_ucreate_qp_ex,
++ .create_srq = zxdh_ucreate_srq,
++ .dealloc_mw = zxdh_udealloc_mw,
++ .dealloc_pd = zxdh_ufree_pd,
++ .dereg_mr = zxdh_udereg_mr,
++ .destroy_ah = zxdh_udestroy_ah,
++ .destroy_cq = zxdh_udestroy_cq,
++ .modify_cq = zxdh_umodify_cq,
++ .destroy_qp = zxdh_udestroy_qp,
++ .destroy_srq = zxdh_udestroy_srq,
++ .detach_mcast = zxdh_udetach_mcast,
++ .modify_qp = zxdh_umodify_qp,
++ .modify_srq = zxdh_umodify_srq,
++ .poll_cq = zxdh_upoll_cq,
++ .post_recv = zxdh_upost_recv,
++ .post_send = zxdh_upost_send,
++ .post_srq_recv = zxdh_upost_srq_recv,
++ .query_device_ex = zxdh_uquery_device_ex,
++ .query_port = zxdh_uquery_port,
++ .query_qp = zxdh_uquery_qp,
++ .query_srq = zxdh_uquery_srq,
++ .reg_mr = zxdh_ureg_mr,
++ .rereg_mr = zxdh_urereg_mr,
++ .req_notify_cq = zxdh_uarm_cq,
++ .resize_cq = zxdh_uresize_cq,
++ .free_context = zxdh_ufree_context,
++ .get_srq_num = zxdh_uget_srq_num,
++};
+
-+static void set_dca_pool_param(struct hns_roce_context *ctx,
-+ struct hnsdv_context_attr *attr, int page_size)
++/**
++ * zxdh_ualloc_context - allocate context for user app
++ * @ibdev: ib device created during zxdh_driver_init
++ * @cmd_fd: save fd for the device
++ * @private_data: device private data
++ *
++ * Returns callback routine table and calls driver for allocating
++ * context and getting back resource information to return as ibv_context.
++ */
++static struct verbs_context *zxdh_ualloc_context(struct ibv_device *ibdev,
++ int cmd_fd, void *private_data)
+{
-+ struct hns_roce_dca_ctx *dca_ctx = &ctx->dca_ctx;
-+
-+ if (attr->comp_mask & HNSDV_CONTEXT_MASK_DCA_UNIT_SIZE)
-+ dca_ctx->unit_size = align(attr->dca_unit_size, page_size);
-+ else
-+ dca_ctx->unit_size = page_size * HNS_DCA_DEFAULT_UNIT_PAGES;
-+
-+ /* The memory pool cannot be expanded, only init the DCA context. */
-+ if (dca_ctx->unit_size == 0)
-+ return;
++ struct ibv_pd *ibv_pd;
++ struct zxdh_uvcontext *iwvctx;
++ struct zxdh_get_context cmd;
++ struct zxdh_get_context_resp resp = {};
++ __u64 sq_db_mmap_key, cq_db_mmap_key;
++ __u8 user_ver = ZXDH_ABI_VER;
+
-+ /* If not set, the memory pool can be expanded unlimitedly. */
-+ if (attr->comp_mask & HNSDV_CONTEXT_MASK_DCA_MAX_SIZE)
-+ dca_ctx->max_size = DIV_ROUND_UP(attr->dca_max_size,
-+ dca_ctx->unit_size) *
-+ dca_ctx->unit_size;
-+ else
-+ dca_ctx->max_size = HNS_DCA_MAX_MEM_SIZE;
++ iwvctx = verbs_init_and_alloc_context(ibdev, cmd_fd, iwvctx, ibv_ctx,
++ RDMA_DRIVER_ZXDH);
++ if (!iwvctx)
++ return NULL;
+
-+ /* If not set, the memory pool cannot be shrunk. */
-+ if (attr->comp_mask & HNSDV_CONTEXT_MASK_DCA_MIN_SIZE)
-+ dca_ctx->min_size = DIV_ROUND_UP(attr->dca_min_size,
-+ dca_ctx->unit_size) *
-+ dca_ctx->unit_size;
-+ else
-+ dca_ctx->min_size = HNS_DCA_MAX_MEM_SIZE;
++ zxdh_set_debug_mask();
++ iwvctx->zxdh_write_imm_split_switch = zxdh_get_write_imm_split_switch();
++ cmd.userspace_ver = user_ver;
++ if (ibv_cmd_get_context(&iwvctx->ibv_ctx,
++ (struct ibv_get_context *)&cmd, sizeof(cmd),
++ &resp.ibv_resp, sizeof(resp))) {
++ cmd.userspace_ver = 4;
++ if (ibv_cmd_get_context(
++ &iwvctx->ibv_ctx, (struct ibv_get_context *)&cmd,
++ sizeof(cmd), &resp.ibv_resp, sizeof(resp)))
++ goto err_free;
++ user_ver = cmd.userspace_ver;
++ }
+
-+ verbs_debug(&ctx->ibv_ctx,
-+ "Support DCA, unit %u, max %lu, min %lu Bytes.\n",
-+ dca_ctx->unit_size, dca_ctx->max_size, dca_ctx->min_size);
-+}
++ verbs_set_ops(&iwvctx->ibv_ctx, &zxdh_uctx_ops);
+
-+static int init_dca_context(struct hns_roce_context *ctx, int cmd_fd,
-+ struct hns_roce_alloc_ucontext_resp *resp,
-+ struct hnsdv_context_attr *attr,
-+ int page_size)
-+{
-+ struct hns_roce_dca_ctx *dca_ctx = &ctx->dca_ctx;
-+ uint64_t mmap_key = resp->dca_mmap_key;
-+ int mmap_size = resp->dca_mmap_size;
-+ int max_qps = resp->dca_qps;
-+ int ret;
++ iwvctx->dev_attrs.feature_flags = resp.feature_flags;
++ iwvctx->dev_attrs.hw_rev = resp.hw_rev;
++ iwvctx->dev_attrs.max_hw_wq_frags = resp.max_hw_wq_frags;
++ iwvctx->dev_attrs.max_hw_read_sges = resp.max_hw_read_sges;
++ iwvctx->dev_attrs.max_hw_inline = resp.max_hw_inline;
++ iwvctx->dev_attrs.max_hw_rq_quanta = resp.max_hw_rq_quanta;
++ iwvctx->dev_attrs.max_hw_srq_quanta = resp.max_hw_srq_quanta;
++ iwvctx->dev_attrs.max_hw_wq_quanta = resp.max_hw_wq_quanta;
++ iwvctx->dev_attrs.max_hw_srq_wr = resp.max_hw_srq_wr;
++ iwvctx->dev_attrs.max_hw_sq_chunk = resp.max_hw_sq_chunk;
++ iwvctx->dev_attrs.max_hw_cq_size = resp.max_hw_cq_size;
++ iwvctx->dev_attrs.min_hw_cq_size = resp.min_hw_cq_size;
++ iwvctx->abi_ver = user_ver;
+
-+ if (!(ctx->config & HNS_ROCE_UCTX_RSP_DCA_FLAGS))
-+ return 0;
-
-- if (!resp.cqe_size)
-+ dca_ctx->unit_size = 0;
-+ dca_ctx->mem_cnt = 0;
++ sq_db_mmap_key = resp.sq_db_mmap_key;
++ cq_db_mmap_key = resp.cq_db_mmap_key;
+
-+ list_head_init(&dca_ctx->mem_list);
-+ ret = pthread_spin_init(&dca_ctx->lock, PTHREAD_PROCESS_PRIVATE);
-+ if (ret)
-+ return ret;
++ iwvctx->dev_attrs.db_addr_type = resp.db_addr_type;
+
-+ if (!attr || !(attr->flags & HNSDV_CONTEXT_FLAGS_DCA))
-+ return 0;
++ iwvctx->sq_db = zxdh_mmap(cmd_fd, sq_db_mmap_key);
++ if (iwvctx->sq_db == MAP_FAILED)
++ goto err_free;
+
-+ set_dca_pool_param(ctx, attr, page_size);
++ iwvctx->cq_db = zxdh_mmap(cmd_fd, cq_db_mmap_key);
++ if (iwvctx->cq_db == MAP_FAILED) {
++ zxdh_munmap(iwvctx->sq_db);
++ goto err_free;
++ }
++ ibv_pd = zxdh_ualloc_pd(&iwvctx->ibv_ctx.context);
++ if (!ibv_pd) {
++ zxdh_munmap(iwvctx->sq_db);
++ zxdh_munmap(iwvctx->cq_db);
++ goto err_free;
++ }
+
-+ if (mmap_key) {
-+ const unsigned int bits_per_qp = 2 * HNS_DCA_BITS_PER_STATUS;
++ ibv_pd->context = &iwvctx->ibv_ctx.context;
++ iwvctx->iwupd = container_of(ibv_pd, struct zxdh_upd, ibv_pd);
++ add_private_ops(iwvctx);
++ return &iwvctx->ibv_ctx;
+
-+ if (!mmap_dca(ctx, cmd_fd, page_size, mmap_size, mmap_key)) {
-+ dca_ctx->status_size = mmap_size;
-+ dca_ctx->max_qps = min_t(int, max_qps,
-+ mmap_size * 8 / bits_per_qp);
-+ }
-+ }
++err_free:
++ free(iwvctx);
+
-+ return 0;
++ return NULL;
+}
+
-+static void uninit_dca_context(struct hns_roce_context *ctx)
++static void zxdh_uninit_device(struct verbs_device *verbs_device)
+{
-+ struct hns_roce_dca_ctx *dca_ctx = &ctx->dca_ctx;
-+
-+ if (!(ctx->config & HNS_ROCE_UCTX_RSP_DCA_FLAGS))
-+ return;
-+
-+ pthread_spin_lock(&dca_ctx->lock);
-+ hns_roce_cleanup_dca_mem(ctx);
-+ pthread_spin_unlock(&dca_ctx->lock);
-+ if (dca_ctx->buf_status)
-+ munmap(dca_ctx->buf_status, dca_ctx->status_size);
++ struct zxdh_udevice *dev;
+
-+ pthread_spin_destroy(&dca_ctx->lock);
++ dev = container_of(&verbs_device->device, struct zxdh_udevice,
++ ibv_dev.device);
++ free(dev);
+}
+
-+static int init_reset_context(struct hns_roce_context *ctx, int cmd_fd,
-+ struct hns_roce_alloc_ucontext_resp *resp,
-+ int page_size)
++static struct verbs_device *zxdh_device_alloc(struct verbs_sysfs_dev *sysfs_dev)
+{
-+ uint64_t reset_mmap_key = resp->reset_mmap_key;
-+ struct hns_roce_v2_reset_state *state;
-+
-+ /* The reset mmap key is 0, which means it is not supported. */
-+ if (reset_mmap_key == 0)
-+ return 0;
++ struct zxdh_udevice *dev;
+
-+ ctx->reset_state = mmap(NULL, page_size, PROT_READ, MAP_SHARED,
-+ cmd_fd, reset_mmap_key);
-+ if (ctx->reset_state == MAP_FAILED)
-+ return -ENOMEM;
-+
-+ state = ctx->reset_state;
-+ ctx->use_new_reset_flag = state->hw_ready;
-+
-+ return 0;
-+}
-+
-+static int set_context_attr(struct hns_roce_device *hr_dev,
-+ struct hns_roce_context *context,
-+ struct hns_roce_alloc_ucontext_resp *resp)
-+{
-+ struct ibv_device_attr dev_attrs;
-+ int i;
-+
-+ if (!resp->cqe_size)
- context->cqe_size = HNS_ROCE_CQE_SIZE;
-- else if (resp.cqe_size <= HNS_ROCE_V3_CQE_SIZE)
-- context->cqe_size = resp.cqe_size;
-+ else if (resp->cqe_size <= HNS_ROCE_V3_CQE_SIZE)
-+ context->cqe_size = resp->cqe_size;
- else
- context->cqe_size = HNS_ROCE_V3_CQE_SIZE;
-
-- context->config = resp.config;
-- if (resp.config & HNS_ROCE_RSP_EXSGE_FLAGS)
-- context->max_inline_data = resp.max_inline_data;
-+ context->config = resp->config;
-+ if (resp->config & HNS_ROCE_RSP_EXSGE_FLAGS)
-+ context->max_inline_data = resp->max_inline_data;
-
-- context->qp_table_shift = calc_table_shift(resp.qp_tab_size,
-+ context->qp_table_shift = calc_table_shift(resp->qp_tab_size,
- HNS_ROCE_QP_TABLE_BITS);
- context->qp_table_mask = (1 << context->qp_table_shift) - 1;
-- pthread_mutex_init(&context->qp_table_mutex, NULL);
- for (i = 0; i < HNS_ROCE_QP_TABLE_SIZE; ++i)
- context->qp_table[i].refcnt = 0;
-
-- context->srq_table_shift = calc_table_shift(resp.srq_tab_size,
-+ context->srq_table_shift = calc_table_shift(resp->srq_tab_size,
- HNS_ROCE_SRQ_TABLE_BITS);
- context->srq_table_mask = (1 << context->srq_table_shift) - 1;
-- pthread_mutex_init(&context->srq_table_mutex, NULL);
- for (i = 0; i < HNS_ROCE_SRQ_TABLE_SIZE; ++i)
- context->srq_table[i].refcnt = 0;
-
-@@ -149,28 +325,131 @@ static struct verbs_context *hns_roce_alloc_context(struct ibv_device *ibdev,
- struct ibv_device_attr_ex,
- orig_attr),
- sizeof(dev_attrs)))
-- goto err_free;
-+ return EIO;
-
- hr_dev->hw_version = dev_attrs.hw_ver;
-+ hr_dev->congest_cap = resp->congest_type;
- context->max_qp_wr = dev_attrs.max_qp_wr;
- context->max_sge = dev_attrs.max_sge;
- context->max_cqe = dev_attrs.max_cqe;
- context->max_srq_wr = dev_attrs.max_srq_wr;
- context->max_srq_sge = dev_attrs.max_srq_sge;
-
-+ return get_link_type(dev_attrs.vendor_part_id, &hr_dev->link_type);
-+}
-+
-+static void ucontext_set_cmd(struct hns_roce_alloc_ucontext *cmd,
-+ struct hnsdv_context_attr *attr)
-+{
-+ cmd->config |= HNS_ROCE_EXSGE_FLAGS | HNS_ROCE_RQ_INLINE_FLAGS |
-+ HNS_ROCE_CQE_INLINE_FLAGS | HNS_ROCE_UCTX_DYN_QP_PGSZ;
-+
-+ if (!attr || !(attr->flags & HNSDV_CONTEXT_FLAGS_DCA))
-+ return;
-+
-+ cmd->config |= HNS_ROCE_UCTX_CONFIG_DCA;
-+
-+ if (attr->comp_mask & HNSDV_CONTEXT_MASK_DCA_PRIME_QPS) {
-+ cmd->comp |= HNS_ROCE_ALLOC_UCTX_COMP_DCA_MAX_QPS;
-+ cmd->dca_max_qps = attr->dca_prime_qps;
-+ }
-+}
-+
-+static int hns_roce_init_context_lock(struct hns_roce_context *context)
-+{
-+ int ret;
-+
-+ ret = pthread_spin_init(&context->uar_lock, PTHREAD_PROCESS_PRIVATE);
-+ if (ret)
-+ return ret;
-+
-+ ret = pthread_mutex_init(&context->qp_table_mutex, NULL);
-+ if (ret)
-+ goto destroy_uar_lock;
-+
-+ ret = pthread_mutex_init(&context->srq_table_mutex, NULL);
-+ if (ret)
-+ goto destroy_qp_mutex;
-+
-+ ret = pthread_mutex_init(&context->db_list_mutex, NULL);
-+ if (ret)
-+ goto destroy_srq_mutex;
-+
-+ return 0;
-+
-+destroy_srq_mutex:
-+ pthread_mutex_destroy(&context->srq_table_mutex);
-+
-+destroy_qp_mutex:
-+ pthread_mutex_destroy(&context->qp_table_mutex);
-+
-+destroy_uar_lock:
-+ pthread_spin_destroy(&context->uar_lock);
-+ return ret;
-+}
-+
-+static void hns_roce_destroy_context_lock(struct hns_roce_context *context)
-+{
-+ pthread_spin_destroy(&context->uar_lock);
-+ pthread_mutex_destroy(&context->qp_table_mutex);
-+ pthread_mutex_destroy(&context->srq_table_mutex);
-+ pthread_mutex_destroy(&context->db_list_mutex);
-+}
-+
-+static struct verbs_context *hns_roce_alloc_context(struct ibv_device *ibdev,
-+ int cmd_fd,
-+ void *private_data)
-+{
-+ struct hnsdv_context_attr *ctx_attr = private_data;
-+ struct hns_roce_device *hr_dev = to_hr_dev(ibdev);
-+ struct hns_roce_alloc_ucontext_resp resp = {};
-+ struct hns_roce_alloc_ucontext cmd = {};
-+ struct hns_roce_context *context;
-+
-+ context = verbs_init_and_alloc_context(ibdev, cmd_fd, context, ibv_ctx,
-+ RDMA_DRIVER_HNS);
-+ if (!context)
++ dev = calloc(1, sizeof(*dev));
++ if (!dev)
+ return NULL;
+
-+ ucontext_set_cmd(&cmd, ctx_attr);
-+ if (ibv_cmd_get_context(&context->ibv_ctx, &cmd.ibv_cmd, sizeof(cmd),
-+ &resp.ibv_resp, sizeof(resp)))
-+ goto err_ibv_cmd;
-+
-+ if (hns_roce_init_context_lock(context))
-+ goto err_ibv_cmd;
-+
-+ if (set_context_attr(hr_dev, context, &resp))
-+ goto err_set_attr;
++ return &dev->ibv_dev;
++}
+
- context->uar = mmap(NULL, hr_dev->page_size, PROT_READ | PROT_WRITE,
- MAP_SHARED, cmd_fd, 0);
- if (context->uar == MAP_FAILED)
-- goto err_free;
-+ goto err_set_attr;
++static const struct verbs_device_ops zxdh_udev_ops = {
++ .alloc_context = zxdh_ualloc_context,
++ .alloc_device = zxdh_device_alloc,
++ .match_max_abi_version = ZXDH_MAX_ABI_VERSION,
++ .match_min_abi_version = ZXDH_MIN_ABI_VERSION,
++ .match_table = hca_table,
++ .name = "zxdh",
++ .uninit_device = zxdh_uninit_device,
++};
+
-+ if (init_dca_context(context, cmd_fd,
-+ &resp, ctx_attr, hr_dev->page_size))
-+ goto err_set_attr;
-
-- pthread_spin_init(&context->uar_lock, PTHREAD_PROCESS_PRIVATE);
-+ if (init_reset_context(context, cmd_fd, &resp, hr_dev->page_size))
-+ goto reset_free;
++PROVIDER_DRIVER(zxdh, zxdh_udev_ops);
+diff --git a/providers/zrdma/main.h b/providers/zrdma/main.h
+new file mode 100644
+index 0000000..e28c77b
+--- /dev/null
++++ b/providers/zrdma/main.h
+@@ -0,0 +1,223 @@
++/* SPDX-License-Identifier: GPL-2.0 or Linux-OpenIB */
++/* Copyright (c) 2024 ZTE Corporation. All rights reserved. */
++#ifndef ZXDH_UMAIN_H
++#define ZXDH_UMAIN_H
+
-+ if (hns_roce_mmap(hr_dev, context, cmd_fd))
-+ goto uar_free;
-
- verbs_set_ops(&context->ibv_ctx, &hns_common_ops);
- verbs_set_ops(&context->ibv_ctx, &hr_dev->u_hw->hw_ops);
-
- return &context->ibv_ctx;
-
--err_free:
-+uar_free:
-+ if (context->reset_state)
-+ munmap(context->reset_state, hr_dev->page_size);
-+reset_free:
-+ uninit_dca_context(context);
-+err_set_attr:
-+ hns_roce_destroy_context_lock(context);
-+err_ibv_cmd:
- verbs_uninit_context(&context->ibv_ctx);
- free(context);
- return NULL;
-@@ -182,6 +461,10 @@ static void hns_roce_free_context(struct ibv_context *ibctx)
- struct hns_roce_context *context = to_hr_ctx(ibctx);
-
- munmap(context->uar, hr_dev->page_size);
-+ if (context->reset_state)
-+ munmap(context->reset_state, hr_dev->page_size);
-+ uninit_dca_context(context);
-+ hns_roce_destroy_context_lock(context);
- verbs_uninit_context(&context->ibv_ctx);
- free(context);
- }
-@@ -216,4 +499,17 @@ static const struct verbs_device_ops hns_roce_dev_ops = {
- .uninit_device = hns_uninit_device,
- .alloc_context = hns_roce_alloc_context,
- };
++#include
++#include
++#include
++#include
++#include
+
-+bool is_hns_dev(struct ibv_device *device)
-+{
-+ struct verbs_device *verbs_device = verbs_get_device(device);
++#include "zxdh_defs.h"
++#include "zxdh_status.h"
++#include "zxdh_verbs.h"
+
-+ return verbs_device->ops == &hns_roce_dev_ops;
-+}
++#define ZXDH_BASE_PUSH_PAGE 1
++#define ZXDH_U_MINCQ_SIZE 4
++#define ZXDH_DB_SHADOW_AREA_SIZE 8
++#define ZXDH_DB_SQ_OFFSET 0x404
++#define ZXDH_DB_CQ_OFFSET 0x588
+
-+bool hnsdv_is_supported(struct ibv_device *device)
-+{
-+ return is_hns_dev(device);
-+}
++#define MIN_UDP_SPORT 1024
++#define MIN_QP_QPN 1
+
- PROVIDER_DRIVER(hns, hns_roce_dev_ops);
-diff --git a/providers/hns/hns_roce_u.h b/providers/hns/hns_roce_u.h
-index afb68fe..5eedb81 100644
---- a/providers/hns/hns_roce_u.h
-+++ b/providers/hns/hns_roce_u.h
-@@ -35,6 +35,7 @@
-
- #include
- #include
-+#include
- #include
-
- #include
-@@ -44,6 +45,7 @@
- #include
- #include
- #include
-+#include
- #include
- #include "hns_roce_u_abi.h"
-
-@@ -52,6 +54,8 @@
-
- #define PFX "hns: "
-
-+typedef _Atomic(uint64_t) atomic_bitmap_t;
++enum zxdh_supported_wc_flags {
++ ZXDH_CQ_SUPPORTED_WC_FLAGS =
++ IBV_WC_EX_WITH_BYTE_LEN | IBV_WC_EX_WITH_IMM |
++ IBV_WC_EX_WITH_QP_NUM | IBV_WC_EX_WITH_SRC_QP |
++ IBV_WC_EX_WITH_SLID | IBV_WC_EX_WITH_SL |
++ IBV_WC_EX_WITH_DLID_PATH_BITS |
++ IBV_WC_EX_WITH_COMPLETION_TIMESTAMP_WALLCLOCK |
++ IBV_WC_EX_WITH_COMPLETION_TIMESTAMP,
++};
+
- /* The minimum page size is 4K for hardware */
- #define HNS_HW_PAGE_SHIFT 12
- #define HNS_HW_PAGE_SIZE (1 << HNS_HW_PAGE_SHIFT)
-@@ -147,17 +151,28 @@
-
- #define hr_reg_read(ptr, field) _hr_reg_read(ptr, field)
-
+enum {
-+ HNS_ROCE_CAP_FLAG_DCA_MODE = BIT(15),
++ ZXDH_DBG_QP = 1 << 0,
++ ZXDH_DBG_CQ = 1 << 1,
++ ZXDH_DBG_SRQ = 1 << 2,
+};
++extern uint32_t zxdh_debug_mask;
++#define zxdh_dbg(ctx, mask, format, arg...) \
++ do { \
++ if (mask & zxdh_debug_mask) { \
++ int zxdh_dbg_tmp = errno; \
++ verbs_debug(ctx, format, ##arg); \
++ errno = zxdh_dbg_tmp; \
++ } \
++ } while (0)
+
- #define HNS_ROCE_QP_TABLE_BITS 8
- #define HNS_ROCE_QP_TABLE_SIZE BIT(HNS_ROCE_QP_TABLE_BITS)
-
- #define HNS_ROCE_SRQ_TABLE_BITS 8
- #define HNS_ROCE_SRQ_TABLE_SIZE BIT(HNS_ROCE_SRQ_TABLE_BITS)
-
-+enum hns_device_link_type {
-+ HNS_DEV_LINK_TYPE_ETH,
-+ HNS_DEV_LINK_TYPE_HCCS,
++struct zxdh_udevice {
++ struct verbs_device ibv_dev;
+};
+
- struct hns_roce_device {
- struct verbs_device ibv_dev;
- int page_size;
- const struct hns_roce_u_hw *u_hw;
- int hw_version;
-+ uint8_t congest_cap;
-+ enum hns_device_link_type link_type;
- };
-
- struct hns_roce_buf {
-@@ -182,6 +197,11 @@ enum hns_roce_pktype {
- HNS_ROCE_PKTYPE_ROCE_V2_IPV4,
- };
-
-+enum hns_roce_tc_map_mode {
-+ HNS_ROCE_TC_MAP_MODE_PRIO,
-+ HNS_ROCE_TC_MAP_MODE_DSCP,
++struct zxdh_uah {
++ struct ibv_ah ibv_ah;
++ uint32_t ah_id;
++ struct ibv_global_route grh;
+};
+
- struct hns_roce_db_page {
- struct hns_roce_db_page *prev, *next;
- struct hns_roce_buf buf;
-@@ -190,9 +210,39 @@ struct hns_roce_db_page {
- unsigned long *bitmap;
- };
-
-+struct hns_roce_spinlock {
-+ pthread_spinlock_t lock;
-+ int need_lock;
++struct zxdh_upd {
++ struct ibv_pd ibv_pd;
++ void *arm_cq_page;
++ void *arm_cq;
++ uint32_t pd_id;
+};
+
-+#define HNS_DCA_MAX_MEM_SIZE ~0UL
-+#define HNS_DCA_DEFAULT_UNIT_PAGES 16
-+
-+struct hns_roce_dca_ctx {
-+ struct list_head mem_list;
-+ pthread_spinlock_t lock;
-+ int mem_cnt;
-+ unsigned int unit_size;
-+ uint64_t max_size;
-+ uint64_t min_size;
-+ uint64_t curr_size;
-+
-+#define HNS_DCA_BITS_PER_STATUS 1
-+ unsigned int max_qps;
-+ unsigned int status_size;
-+ atomic_bitmap_t *buf_status;
-+ atomic_bitmap_t *sync_status;
++struct zxdh_uvcontext {
++ struct verbs_context ibv_ctx;
++ struct zxdh_upd *iwupd;
++ struct zxdh_dev_attrs dev_attrs;
++ void *db;
++ void *sq_db;
++ void *cq_db;
++ int abi_ver;
++ bool legacy_mode;
++ uint8_t zxdh_write_imm_split_switch;
++ struct zxdh_uvcontext_ops *cxt_ops;
+};
+
-+struct hns_roce_v2_reset_state {
-+ uint32_t is_reset;
-+ uint32_t hw_ready;
-+};
++struct zxdh_uqp;
+
- struct hns_roce_context {
- struct verbs_context ibv_ctx;
- void *uar;
-+ void *reset_state;
- pthread_spinlock_t uar_lock;
-
- struct {
-@@ -222,17 +272,32 @@ struct hns_roce_context {
- unsigned int cqe_size;
- uint32_t config;
- unsigned int max_inline_data;
-+ struct hns_roce_dca_ctx dca_ctx;
-+ bool use_new_reset_flag;
-+ bool reseted;
++struct zxdh_cq_buf {
++ struct list_node list;
++ struct zxdh_cq cq;
++ struct verbs_mr vmr;
+};
+
-+struct hns_roce_td {
-+ struct ibv_td ibv_td;
-+ atomic_int refcount;
- };
-
- struct hns_roce_pd {
- struct ibv_pd ibv_pd;
- unsigned int pdn;
-+ atomic_int refcount;
-+ struct hns_roce_pd *protection_domain;
++struct zxdh_ucq {
++ struct verbs_cq verbs_cq;
++ struct verbs_mr vmr;
++ struct verbs_mr vmr_shadow_area;
++ pthread_spinlock_t lock;
++ size_t buf_size;
++ bool is_armed;
++ enum zxdh_cmpl_notify last_notify;
++ int comp_vector;
++ uint32_t report_rtt;
++ struct zxdh_uqp *uqp;
++ struct zxdh_cq cq;
++ struct list_head resize_list;
++ /* for extended CQ completion fields */
++ struct zxdh_cq_poll_info cur_cqe;
++ bool resize_enable;
+};
+
-+struct hns_roce_pad {
-+ struct hns_roce_pd pd;
-+ struct hns_roce_td *td;
- };
-
- struct hns_roce_cq {
- struct verbs_cq verbs_cq;
- struct hns_roce_buf buf;
-- pthread_spinlock_t lock;
-+ struct hns_roce_spinlock hr_lock;
- unsigned int cqn;
- unsigned int cq_depth;
- unsigned int cons_index;
-@@ -242,6 +307,12 @@ struct hns_roce_cq {
- unsigned long flags;
- unsigned int cqe_size;
- struct hns_roce_v2_cqe *cqe;
-+ struct ibv_pd *parent_domain;
-+ struct list_head list_sq;
-+ struct list_head list_rq;
-+ struct list_head list_srq;
-+ struct list_head list_xrc_srq;
-+ struct hns_roce_v2_cqe *sw_cqe;
- };
-
- struct hns_roce_idx_que {
-@@ -268,7 +339,7 @@ struct hns_roce_srq {
- struct hns_roce_idx_que idx_que;
- struct hns_roce_buf wqe_buf;
- struct hns_roce_rinl_buf srq_rinl_buf;
-- pthread_spinlock_t lock;
-+ struct hns_roce_spinlock hr_lock;
- unsigned long *wrid;
- unsigned int srqn;
- unsigned int wqe_cnt;
-@@ -278,11 +349,12 @@ struct hns_roce_srq {
- unsigned int *rdb;
- unsigned int cap_flags;
- unsigned short counter;
-+ struct list_node xrc_srcq_node;
- };
-
- struct hns_roce_wq {
- unsigned long *wrid;
-- pthread_spinlock_t lock;
-+ struct hns_roce_spinlock hr_lock;
- unsigned int wqe_cnt;
- int max_post;
- unsigned int head;
-@@ -309,11 +381,19 @@ struct hns_roce_sge_ex {
- unsigned int sge_shift;
- };
-
-+struct hns_roce_dca_buf {
-+ void **bufs;
-+ unsigned int max_cnt;
-+ unsigned int shift;
-+ unsigned int dcan;
++struct zxdh_usrq {
++ struct ibv_srq ibv_srq;
++ struct verbs_mr vmr;
++ struct verbs_mr list_vmr;
++ struct verbs_mr db_vmr;
++ size_t total_buf_size;
++ size_t buf_size;
++ size_t list_buf_size;
++ size_t db_buf_size;
++ size_t srq_size;
++ size_t srq_list_size;
++ uint32_t srq_id;
++ uint32_t max_wr;
++ uint32_t max_sge;
++ uint32_t srq_limit;
++ pthread_spinlock_t lock;
++ uint32_t wq_size;
++ struct ibv_recv_wr *pend_rx_wr;
++ struct zxdh_srq srq;
+};
+
- struct hns_roce_qp {
- struct verbs_qp verbs_qp;
- struct hns_roce_buf buf;
-+ struct hns_roce_dca_buf dca_wqe;
- int max_inline_data;
-- int buf_size;
-+ unsigned int buf_size;
- unsigned int sq_signal_bits;
- struct hns_roce_wq sq;
- struct hns_roce_wq rq;
-@@ -323,6 +403,9 @@ struct hns_roce_qp {
- unsigned int next_sge;
- int port_num;
- uint8_t sl;
-+ uint8_t tc_mode;
-+ uint8_t priority;
-+ uint8_t pageshift;
- unsigned int qkey;
- enum ibv_mtu path_mtu;
-
-@@ -336,6 +419,10 @@ struct hns_roce_qp {
- void *cur_wqe;
- unsigned int rb_sq_head; /* roll back sq head */
- struct hns_roce_sge_info sge_info;
-+
-+ struct list_node rcq_node;
-+ struct list_node scq_node;
-+ struct list_node srcq_node;
- };
-
- struct hns_roce_av {
-@@ -360,11 +447,23 @@ struct hns_roce_u_hw {
- struct verbs_context_ops hw_ops;
- };
-
-+struct hns_roce_dca_attach_attr {
-+ uint32_t sq_offset;
-+ uint32_t sge_offset;
-+ uint32_t rq_offset;
-+ bool force;
++struct zxdh_uqp {
++ struct verbs_qp vqp;
++ struct zxdh_ucq *send_cq;
++ struct zxdh_ucq *recv_cq;
++ struct zxdh_usrq *srq;
++ struct verbs_mr vmr;
++ size_t buf_size;
++ uint32_t zxdh_drv_opt;
++ pthread_spinlock_t lock;
++ uint16_t sq_sig_all;
++ uint16_t qperr;
++ uint16_t rsvd;
++ uint32_t pending_rcvs;
++ uint32_t wq_size;
++ struct ibv_recv_wr *pend_rx_wr;
++ struct zxdh_qp qp;
++ enum ibv_qp_type qp_type;
++ struct zxdh_sge *recv_sges;
++ uint8_t is_srq;
++ uint8_t inline_data[ZXDH_MAX_INLINE_DATA_SIZE];
+};
+
-+struct hns_roce_dca_detach_attr {
-+ uint32_t sq_index;
++struct zxdh_umr {
++ struct verbs_mr vmr;
++ uint32_t acc_flags;
++ uint8_t leaf_pbl_size;
++ uint8_t host_page_size;
++ uint64_t mr_pa_pble_index;
+};
+
- /*
- * The entries's buffer should be aligned to a multiple of the hardware's
- * minimum page size.
- */
- #define hr_hw_page_align(x) align(x, HNS_HW_PAGE_SIZE)
-+#define hr_hw_page_count(x) (hr_hw_page_align(x) / HNS_HW_PAGE_SIZE)
-
- static inline unsigned int to_hr_hem_entries_size(int count, int buf_shift)
- {
-@@ -398,9 +497,35 @@ static inline struct hns_roce_context *to_hr_ctx(struct ibv_context *ibv_ctx)
- return container_of(ibv_ctx, struct hns_roce_context, ibv_ctx.context);
- }
-
-+static inline struct hns_roce_td *to_hr_td(struct ibv_td *ibv_td)
-+{
-+ return container_of(ibv_td, struct hns_roce_td, ibv_td);
-+}
-+
-+/* to_hr_pd always returns the real hns_roce_pd obj. */
- static inline struct hns_roce_pd *to_hr_pd(struct ibv_pd *ibv_pd)
- {
-- return container_of(ibv_pd, struct hns_roce_pd, ibv_pd);
-+ struct hns_roce_pd *pd =
-+ container_of(ibv_pd, struct hns_roce_pd, ibv_pd);
-+
-+ if (pd->protection_domain)
-+ return pd->protection_domain;
-+
-+ return pd;
-+}
-+
-+static inline struct hns_roce_pad *to_hr_pad(struct ibv_pd *ibv_pd)
-+{
-+ struct hns_roce_pad *pad =
-+ ibv_pd ?
-+ container_of(ibv_pd, struct hns_roce_pad, pd.ibv_pd) :
-+ NULL;
-+
-+ if (pad && pad->pd.protection_domain)
-+ return pad;
++/* zxdh_verbs.c */
++int zxdh_uquery_device_ex(struct ibv_context *context,
++ const struct ibv_query_device_ex_input *input,
++ struct ibv_device_attr_ex *attr, size_t attr_size);
++int zxdh_uquery_port(struct ibv_context *context, uint8_t port,
++ struct ibv_port_attr *attr);
++struct ibv_pd *zxdh_ualloc_pd(struct ibv_context *context);
++int zxdh_ufree_pd(struct ibv_pd *pd);
++struct ibv_mr *zxdh_ureg_mr(struct ibv_pd *pd, void *addr, size_t length,
++ uint64_t hca_va, int access);
++int zxdh_udereg_mr(struct verbs_mr *vmr);
+
-+ /* Otherwise ibv_pd isn't a parent_domain */
-+ return NULL;
- }
-
- static inline struct hns_roce_cq *to_hr_cq(struct ibv_cq *ibv_cq)
-@@ -423,14 +548,63 @@ static inline struct hns_roce_ah *to_hr_ah(struct ibv_ah *ibv_ah)
- return container_of(ibv_ah, struct hns_roce_ah, ibv_ah);
- }
-
-+static inline int hns_roce_spin_lock(struct hns_roce_spinlock *hr_lock)
-+{
-+ if (hr_lock->need_lock)
-+ return pthread_spin_lock(&hr_lock->lock);
++int zxdh_urereg_mr(struct verbs_mr *mr, int flags, struct ibv_pd *pd,
++ void *addr, size_t length, int access);
+
-+ return 0;
-+}
-+
-+static inline int hns_roce_spin_unlock(struct hns_roce_spinlock *hr_lock)
-+{
-+ if (hr_lock->need_lock)
-+ return pthread_spin_unlock(&hr_lock->lock);
-+
-+ return 0;
-+}
-+
-+#define HNS_ROCE_BIT_MASK(nr) (1UL << ((nr) % 64))
-+#define HNS_ROCE_BIT_WORD(nr) ((nr) / 64)
-+
-+static inline bool atomic_test_bit(atomic_bitmap_t *p, uint32_t nr)
-+{
-+ p += HNS_ROCE_BIT_WORD(nr);
-+ return !!(atomic_load(p) & HNS_ROCE_BIT_MASK(nr));
-+}
++struct ibv_mw *zxdh_ualloc_mw(struct ibv_pd *pd, enum ibv_mw_type type);
++int zxdh_ubind_mw(struct ibv_qp *qp, struct ibv_mw *mw,
++ struct ibv_mw_bind *mw_bind);
++int zxdh_udealloc_mw(struct ibv_mw *mw);
++struct ibv_cq *zxdh_ucreate_cq(struct ibv_context *context, int cqe,
++ struct ibv_comp_channel *channel,
++ int comp_vector);
++struct ibv_cq_ex *zxdh_ucreate_cq_ex(struct ibv_context *context,
++ struct ibv_cq_init_attr_ex *attr_ex);
++void zxdh_ibvcq_ex_fill_priv_funcs(struct zxdh_ucq *iwucq,
++ struct ibv_cq_init_attr_ex *attr_ex);
++int zxdh_uresize_cq(struct ibv_cq *cq, int cqe);
++int zxdh_udestroy_cq(struct ibv_cq *cq);
++int zxdh_umodify_cq(struct ibv_cq *cq, struct ibv_modify_cq_attr *attr);
++int zxdh_upoll_cq(struct ibv_cq *cq, int entries, struct ibv_wc *entry);
++int zxdh_uarm_cq(struct ibv_cq *cq, int solicited);
++void zxdh_cq_event(struct ibv_cq *cq);
++struct ibv_qp *zxdh_ucreate_qp(struct ibv_pd *pd,
++ struct ibv_qp_init_attr *attr);
++struct ibv_qp *zxdh_ucreate_qp_ex(struct ibv_context *context,
++ struct ibv_qp_init_attr_ex *attr);
++int zxdh_uquery_qp(struct ibv_qp *qp, struct ibv_qp_attr *attr, int attr_mask,
++ struct ibv_qp_init_attr *init_attr);
++int zxdh_umodify_qp(struct ibv_qp *qp, struct ibv_qp_attr *attr, int attr_mask);
++int zxdh_udestroy_qp(struct ibv_qp *qp);
++int zxdh_upost_send(struct ibv_qp *ib_qp, struct ibv_send_wr *ib_wr,
++ struct ibv_send_wr **bad_wr);
++int zxdh_upost_recv(struct ibv_qp *ib_qp, struct ibv_recv_wr *ib_wr,
++ struct ibv_recv_wr **bad_wr);
++struct ibv_srq *zxdh_ucreate_srq(struct ibv_pd *pd,
++ struct ibv_srq_init_attr *srq_init_attr);
++int zxdh_udestroy_srq(struct ibv_srq *srq);
++int zxdh_umodify_srq(struct ibv_srq *srq, struct ibv_srq_attr *srq_attr,
++ int srq_attr_mask);
++int zxdh_uquery_srq(struct ibv_srq *srq, struct ibv_srq_attr *srq_attr);
++int zxdh_upost_srq_recv(struct ibv_srq *srq, struct ibv_recv_wr *recv_wr,
++ struct ibv_recv_wr **bad_recv_wr);
++int zxdh_uget_srq_num(struct ibv_srq *srq, uint32_t *srq_num);
++struct ibv_ah *zxdh_ucreate_ah(struct ibv_pd *ibpd, struct ibv_ah_attr *attr);
++int zxdh_udestroy_ah(struct ibv_ah *ibah);
++int zxdh_uattach_mcast(struct ibv_qp *qp, const union ibv_gid *gid,
++ uint16_t lid);
++int zxdh_udetach_mcast(struct ibv_qp *qp, const union ibv_gid *gid,
++ uint16_t lid);
++void zxdh_async_event(struct ibv_context *context,
++ struct ibv_async_event *event);
++void zxdh_set_hw_attrs(struct zxdh_hw_attrs *attrs);
++void *zxdh_mmap(int fd, off_t offset);
++void zxdh_munmap(void *map);
++void zxdh_set_debug_mask(void);
++int zxdh_get_write_imm_split_switch(void);
++#endif /* ZXDH_UMAIN_H */
+diff --git a/providers/zrdma/private_verbs_cmd.c b/providers/zrdma/private_verbs_cmd.c
+new file mode 100644
+index 0000000..68bba23
+--- /dev/null
++++ b/providers/zrdma/private_verbs_cmd.c
+@@ -0,0 +1,201 @@
++// SPDX-License-Identifier: GPL-2.0 or Linux-OpenIB
++/* Copyright (c) 2024 ZTE Corporation. All rights reserved. */
++#include
++#include
++#include "private_verbs_cmd.h"
++#include "zxdh_dv.h"
+
-+static inline bool test_and_set_bit_lock(atomic_bitmap_t *p, uint32_t nr)
++static void copy_query_qpc(struct zxdh_query_qpc_resp *resp,
++ struct zxdh_rdma_qpc *qpc)
+{
-+ uint64_t mask = HNS_ROCE_BIT_MASK(nr);
-+
-+ p += HNS_ROCE_BIT_WORD(nr);
-+ if (atomic_load(p) & mask)
-+ return true;
-+
-+ return (atomic_fetch_or(p, mask) & mask) != 0;
++ qpc->ack_err_flag = resp->ack_err_flag;
++ qpc->retry_flag = resp->retry_flag;
++ qpc->rnr_retry_flag = resp->rnr_retry_flag;
++ qpc->cur_retry_count = resp->cur_retry_count;
++ qpc->retry_cqe_sq_opcode = resp->retry_cqe_sq_opcode;
++ qpc->err_flag = resp->err_flag;
++ qpc->package_err_flag = resp->package_err_flag;
++ qpc->recv_err_flag = resp->recv_err_flag;
++ qpc->tx_last_ack_psn = resp->tx_last_ack_psn;
++ qpc->retry_count = resp->retry_count;
++ qpc->read_retry_flag = resp->read_retry_flag;
+}
+
-+static inline void clear_bit_unlock(atomic_bitmap_t *p, uint32_t nr)
++static int _zxdh_query_qpc(struct ibv_qp *qp, struct zxdh_rdma_qpc *qpc)
+{
-+ p += HNS_ROCE_BIT_WORD(nr);
-+ atomic_fetch_and(p, ~HNS_ROCE_BIT_MASK(nr));
-+}
-+
-+bool is_hns_dev(struct ibv_device *device);
++ DECLARE_COMMAND_BUFFER(cmd, ZXDH_IB_OBJECT_QP_OBJ,
++ ZXDH_IB_METHOD_QP_QUERY_QPC, 2);
++ int ret;
++ struct zxdh_query_qpc_resp resp_ex = { 0 };
+
- int hns_roce_u_query_device(struct ibv_context *context,
- const struct ibv_query_device_ex_input *input,
- struct ibv_device_attr_ex *attr, size_t attr_size);
- int hns_roce_u_query_port(struct ibv_context *context, uint8_t port,
- struct ibv_port_attr *attr);
-
-+struct ibv_td *hns_roce_u_alloc_td(struct ibv_context *context,
-+ struct ibv_td_init_attr *attr);
-+int hns_roce_u_dealloc_td(struct ibv_td *ibv_td);
-+struct ibv_pd *hns_roce_u_alloc_pad(struct ibv_context *context,
-+ struct ibv_parent_domain_init_attr *attr);
- struct ibv_pd *hns_roce_u_alloc_pd(struct ibv_context *context);
--int hns_roce_u_free_pd(struct ibv_pd *pd);
-+int hns_roce_u_dealloc_pd(struct ibv_pd *pd);
-
- struct ibv_mr *hns_roce_u_reg_mr(struct ibv_pd *pd, void *addr, size_t length,
- uint64_t hca_va, int access);
-@@ -489,9 +663,21 @@ int hns_roce_u_close_xrcd(struct ibv_xrcd *ibv_xrcd);
- int hns_roce_alloc_buf(struct hns_roce_buf *buf, unsigned int size,
- int page_size);
- void hns_roce_free_buf(struct hns_roce_buf *buf);
-+void hns_roce_qp_spinlock_destroy(struct hns_roce_qp *qp);
-
- void hns_roce_free_qp_buf(struct hns_roce_qp *qp, struct hns_roce_context *ctx);
-
-+int hns_roce_attach_dca_mem(struct hns_roce_context *ctx, uint32_t handle,
-+ struct hns_roce_dca_attach_attr *attr,
-+ uint32_t size, struct hns_roce_dca_buf *buf);
-+void hns_roce_detach_dca_mem(struct hns_roce_context *ctx, uint32_t handle,
-+ struct hns_roce_dca_detach_attr *attr);
-+bool hns_roce_dca_start_post(struct hns_roce_dca_ctx *ctx, uint32_t dcan);
-+void hns_roce_dca_stop_post(struct hns_roce_dca_ctx *ctx, uint32_t dcan);
-+
-+void hns_roce_shrink_dca_mem(struct hns_roce_context *ctx);
-+void hns_roce_cleanup_dca_mem(struct hns_roce_context *ctx);
-+
- void hns_roce_init_qp_indices(struct hns_roce_qp *qp);
-
- extern const struct hns_roce_u_hw hns_roce_u_hw_v2;
-diff --git a/providers/hns/hns_roce_u_abi.h b/providers/hns/hns_roce_u_abi.h
-index 3f98eb3..7e9bbc1 100644
---- a/providers/hns/hns_roce_u_abi.h
-+++ b/providers/hns/hns_roce_u_abi.h
-@@ -36,6 +36,7 @@
- #include
- #include
- #include
-+#include "hnsdv.h"
-
- DECLARE_DRV_CMD(hns_roce_alloc_pd, IB_USER_VERBS_CMD_ALLOC_PD,
- empty, hns_roce_ib_alloc_pd_resp);
-@@ -64,4 +65,7 @@ DECLARE_DRV_CMD(hns_roce_create_srq_ex, IB_USER_VERBS_CMD_CREATE_XSRQ,
- DECLARE_DRV_CMD(hns_roce_create_ah, IB_USER_VERBS_CMD_CREATE_AH, empty,
- hns_roce_ib_create_ah_resp);
-
-+DECLARE_DRV_CMD(hns_roce_modify_qp_ex, IB_USER_VERBS_EX_CMD_MODIFY_QP,
-+ empty, hns_roce_ib_modify_qp_resp);
-+
- #endif /* _HNS_ROCE_U_ABI_H */
-diff --git a/providers/hns/hns_roce_u_buf.c b/providers/hns/hns_roce_u_buf.c
-index 471dd9c..780683e 100644
---- a/providers/hns/hns_roce_u_buf.c
-+++ b/providers/hns/hns_roce_u_buf.c
-@@ -56,7 +56,473 @@ int hns_roce_alloc_buf(struct hns_roce_buf *buf, unsigned int size,
-
- void hns_roce_free_buf(struct hns_roce_buf *buf)
- {
-+ if (!buf->buf)
-+ return;
++ fill_attr_in_obj(cmd, ZXDH_IB_ATTR_QP_QUERY_HANDLE, qp->handle);
++ fill_attr_out_ptr(cmd, ZXDH_IB_ATTR_QP_QUERY_RESP, &resp_ex);
+
- ibv_dofork_range(buf->buf, buf->length);
-
- munmap(buf->buf, buf->length);
- }
-+
-+struct hns_roce_dca_mem {
-+ uint32_t handle;
-+ struct list_node entry;
-+ struct hns_roce_buf buf;
-+ struct hns_roce_context *ctx;
-+};
++ ret = execute_ioctl(qp->context, cmd);
++ if (ret)
++ return ret;
+
-+static void free_dca_mem(struct hns_roce_context *ctx,
-+ struct hns_roce_dca_mem *mem)
-+{
-+ hns_roce_free_buf(&mem->buf);
-+ free(mem);
++ copy_query_qpc(&resp_ex, qpc);
++ return 0;
+}
+
-+static struct hns_roce_dca_mem *alloc_dca_mem(uint32_t size)
++static void copy_modify_qpc_fields(struct zxdh_modify_qpc_req *req_cmd,
++ uint64_t attr_mask,
++ struct zxdh_rdma_qpc *qpc)
+{
-+ struct hns_roce_dca_mem *mem = NULL;
-+ int ret;
-+
-+ mem = malloc(sizeof(struct hns_roce_dca_mem));
-+ if (!mem) {
-+ errno = ENOMEM;
-+ return NULL;
++ if (attr_mask & ZXDH_TX_READ_RETRY_FLAG_SET) {
++ req_cmd->retry_flag = qpc->retry_flag;
++ req_cmd->rnr_retry_flag = qpc->rnr_retry_flag;
++ req_cmd->read_retry_flag = qpc->read_retry_flag;
++ req_cmd->cur_retry_count = qpc->cur_retry_count;
+ }
++ if (attr_mask & ZXDH_RETRY_CQE_SQ_OPCODE)
++ req_cmd->retry_cqe_sq_opcode = qpc->retry_cqe_sq_opcode;
+
-+ ret = hns_roce_alloc_buf(&mem->buf, size, HNS_HW_PAGE_SIZE);
-+ if (ret) {
-+ errno = ENOMEM;
-+ free(mem);
-+ return NULL;
++ if (attr_mask & ZXDH_ERR_FLAG_SET) {
++ req_cmd->err_flag = qpc->err_flag;
++ req_cmd->ack_err_flag = qpc->ack_err_flag;
+ }
-+
-+ return mem;
-+}
-+
-+static inline uint64_t dca_mem_to_key(struct hns_roce_dca_mem *dca_mem)
-+{
-+ return (uintptr_t)dca_mem;
++ if (attr_mask & ZXDH_PACKAGE_ERR_FLAG)
++ req_cmd->package_err_flag = qpc->package_err_flag;
+}
+
-+static struct hns_roce_dca_mem *key_to_dca_mem(struct hns_roce_dca_ctx *ctx,
-+ uint64_t key)
++static int _zxdh_reset_qp(struct ibv_qp *qp, uint64_t opcode)
+{
-+ struct hns_roce_dca_mem *mem;
-+ struct hns_roce_dca_mem *tmp;
-+
-+ list_for_each_safe(&ctx->mem_list, mem, tmp, entry) {
-+ if (dca_mem_to_key(mem) == key)
-+ return mem;
-+ }
++ DECLARE_COMMAND_BUFFER(cmd, ZXDH_IB_OBJECT_QP_OBJ,
++ ZXDH_IB_METHOD_QP_RESET_QP, 2);
+
-+ return NULL;
++ fill_attr_in_obj(cmd, ZXDH_IB_ATTR_QP_RESET_QP_HANDLE, qp->handle);
++ fill_attr_in_uint64(cmd, ZXDH_IB_ATTR_QP_RESET_OP_CODE, opcode);
++ return execute_ioctl(qp->context, cmd);
+}
+
-+static inline void *dca_mem_addr(struct hns_roce_dca_mem *dca_mem, int offset)
++static int _zxdh_modify_qpc(struct ibv_qp *qp, struct zxdh_rdma_qpc *qpc,
++ uint64_t qpc_mask)
+{
-+ return dca_mem->buf.buf + offset;
++ DECLARE_COMMAND_BUFFER(cmd, ZXDH_IB_OBJECT_QP_OBJ,
++ ZXDH_IB_METHOD_QP_MODIFY_QPC, 3);
++ struct zxdh_modify_qpc_req req = { 0 };
++
++ copy_modify_qpc_fields(&req, qpc_mask, qpc);
++ fill_attr_in_obj(cmd, ZXDH_IB_ATTR_QP_QUERY_HANDLE, qp->handle);
++ fill_attr_in_uint64(cmd, ZXDH_IB_ATTR_QP_MODIFY_QPC_MASK, qpc_mask);
++ fill_attr_in_ptr(cmd, ZXDH_IB_ATTR_QP_MODIFY_QPC_REQ, &req);
++ return execute_ioctl(qp->context, cmd);
+}
+
-+static int register_dca_mem(struct hns_roce_context *ctx, uint64_t key,
-+ void *addr, uint32_t size, uint32_t *handle)
++static int _zxdh_modify_qp_udp_sport(struct ibv_context *ibctx,
++ uint16_t udp_sport, uint32_t qpn)
+{
-+ struct ib_uverbs_attr *attr;
-+ int ret;
-+
-+ DECLARE_COMMAND_BUFFER(cmd, HNS_IB_OBJECT_DCA_MEM,
-+ HNS_IB_METHOD_DCA_MEM_REG, 4);
-+ fill_attr_in_uint32(cmd, HNS_IB_ATTR_DCA_MEM_REG_LEN, size);
-+ fill_attr_in_uint64(cmd, HNS_IB_ATTR_DCA_MEM_REG_ADDR,
-+ ioctl_ptr_to_u64(addr));
-+ fill_attr_in_uint64(cmd, HNS_IB_ATTR_DCA_MEM_REG_KEY, key);
-+ attr = fill_attr_out_obj(cmd, HNS_IB_ATTR_DCA_MEM_REG_HANDLE);
-+
-+ ret = execute_ioctl(&ctx->ibv_ctx.context, cmd);
-+ if (ret) {
-+ verbs_err(&ctx->ibv_ctx, "failed to reg DCA mem, ret = %d.\n",
-+ ret);
-+ return ret;
-+ }
-+
-+ *handle = read_attr_obj(HNS_IB_ATTR_DCA_MEM_REG_HANDLE, attr);
++ if (udp_sport <= MIN_UDP_SPORT || qpn <= MIN_QP_QPN)
++ return -EINVAL;
+
-+ return 0;
++ DECLARE_COMMAND_BUFFER(cmd, ZXDH_IB_OBJECT_QP_OBJ,
++ ZXDH_IB_METHOD_QP_MODIFY_UDP_SPORT, 2);
++ fill_attr_in(cmd, ZXDH_IB_ATTR_QP_UDP_PORT, &udp_sport,
++ sizeof(udp_sport));
++ fill_attr_in_uint32(cmd, ZXDH_IB_ATTR_QP_QPN, qpn);
++ return execute_ioctl(ibctx, cmd);
+}
+
-+static void deregister_dca_mem(struct hns_roce_context *ctx, uint32_t handle)
++static int _zxdh_get_log_trace_switch(struct ibv_context *ibctx,
++ uint8_t *switch_status)
+{
-+ int ret;
++ DECLARE_COMMAND_BUFFER(cmd, ZXDH_IB_OBJECT_DEV,
++ ZXDH_IB_METHOD_DEV_GET_LOG_TRACE, 1);
+
-+ DECLARE_COMMAND_BUFFER(cmd, HNS_IB_OBJECT_DCA_MEM,
-+ HNS_IB_METHOD_DCA_MEM_DEREG, 1);
-+ fill_attr_in_obj(cmd, HNS_IB_ATTR_DCA_MEM_DEREG_HANDLE, handle);
-+ ret = execute_ioctl(&ctx->ibv_ctx.context, cmd);
-+ if (ret)
-+ verbs_warn(&ctx->ibv_ctx,
-+ "failed to dereg DCA mem-%u, ret = %d.\n",
-+ handle, ret);
++ fill_attr_out_ptr(cmd, ZXDH_IB_ATTR_DEV_GET_LOG_TARCE_SWITCH,
++ switch_status);
++ return execute_ioctl(ibctx, cmd);
+}
+
-+void hns_roce_cleanup_dca_mem(struct hns_roce_context *ctx)
++static int _zxdh_set_log_trace_switch(struct ibv_context *ibctx,
++ uint8_t switch_status)
+{
-+ struct hns_roce_dca_ctx *dca_ctx = &ctx->dca_ctx;
-+ struct hns_roce_dca_mem *mem;
-+ struct hns_roce_dca_mem *tmp;
-+
-+ list_for_each_safe(&dca_ctx->mem_list, mem, tmp, entry)
-+ deregister_dca_mem(ctx, mem->handle);
++ DECLARE_COMMAND_BUFFER(cmd, ZXDH_IB_OBJECT_DEV,
++ ZXDH_IB_METHOD_DEV_SET_LOG_TRACE, 1);
++ fill_attr_in(cmd, ZXDH_IB_ATTR_DEV_SET_LOG_TARCE_SWITCH, &switch_status,
++ sizeof(switch_status));
++ return execute_ioctl(ibctx, cmd);
+}
+
-+struct hns_dca_mem_shrink_resp {
-+ uint32_t free_mems;
-+ uint64_t free_key;
++static struct zxdh_uvcontext_ops zxdh_ctx_ops = {
++ .modify_qp_udp_sport = _zxdh_modify_qp_udp_sport,
++ .get_log_trace_switch = _zxdh_get_log_trace_switch,
++ .set_log_trace_switch = _zxdh_set_log_trace_switch,
++ .query_qpc = _zxdh_query_qpc,
++ .modify_qpc = _zxdh_modify_qpc,
++ .reset_qp = _zxdh_reset_qp,
+};
+
-+static int shrink_dca_mem(struct hns_roce_context *ctx, uint32_t handle,
-+ uint64_t size, struct hns_dca_mem_shrink_resp *resp)
++static inline struct zxdh_uvcontext *to_zxdhtx(struct ibv_context *ibctx)
+{
-+ int ret;
-+
-+ DECLARE_COMMAND_BUFFER(cmd, HNS_IB_OBJECT_DCA_MEM,
-+ HNS_IB_METHOD_DCA_MEM_SHRINK, 4);
-+ fill_attr_in_obj(cmd, HNS_IB_ATTR_DCA_MEM_SHRINK_HANDLE, handle);
-+ fill_attr_in_uint64(cmd, HNS_IB_ATTR_DCA_MEM_SHRINK_RESERVED_SIZE, size);
-+ fill_attr_out(cmd, HNS_IB_ATTR_DCA_MEM_SHRINK_OUT_FREE_KEY,
-+ &resp->free_key, sizeof(resp->free_key));
-+ fill_attr_out(cmd, HNS_IB_ATTR_DCA_MEM_SHRINK_OUT_FREE_MEMS,
-+ &resp->free_mems, sizeof(resp->free_mems));
-+
-+ ret = execute_ioctl(&ctx->ibv_ctx.context, cmd);
-+ if (ret)
-+ verbs_err(&ctx->ibv_ctx, "failed to shrink DCA mem, ret = %d.\n",
-+ ret);
-+
-+ return ret;
++ return container_of(ibctx, struct zxdh_uvcontext, ibv_ctx.context);
+}
+
-+struct hns_dca_mem_query_resp {
-+ uint64_t key;
-+ uint32_t offset;
-+ uint32_t page_count;
-+};
-+
-+static int query_dca_mem(struct hns_roce_context *ctx, uint32_t handle,
-+ uint32_t index, struct hns_dca_mem_query_resp *resp)
++int zxdh_reset_qp(struct ibv_qp *qp, uint64_t opcode)
+{
-+ int ret;
-+
-+ DECLARE_COMMAND_BUFFER(cmd, HNS_IB_OBJECT_DCA_MEM,
-+ HNS_IB_METHOD_DCA_MEM_QUERY, 5);
-+ fill_attr_in_obj(cmd, HNS_IB_ATTR_DCA_MEM_QUERY_HANDLE, handle);
-+ fill_attr_in_uint32(cmd, HNS_IB_ATTR_DCA_MEM_QUERY_PAGE_INDEX, index);
-+ fill_attr_out(cmd, HNS_IB_ATTR_DCA_MEM_QUERY_OUT_KEY,
-+ &resp->key, sizeof(resp->key));
-+ fill_attr_out(cmd, HNS_IB_ATTR_DCA_MEM_QUERY_OUT_OFFSET,
-+ &resp->offset, sizeof(resp->offset));
-+ fill_attr_out(cmd, HNS_IB_ATTR_DCA_MEM_QUERY_OUT_PAGE_COUNT,
-+ &resp->page_count, sizeof(resp->page_count));
-+ ret = execute_ioctl(&ctx->ibv_ctx.context, cmd);
-+ if (ret)
-+ verbs_err(&ctx->ibv_ctx,
-+ "failed to query DCA mem-%u, ret = %d.\n",
-+ handle, ret);
++ struct zxdh_uvcontext_ops *dvops = to_zxdhtx(qp->context)->cxt_ops;
+
-+ return ret;
++ if (!dvops || !dvops->reset_qp)
++ return -EOPNOTSUPP;
++ return dvops->reset_qp(qp, opcode);
+}
+
-+void hns_roce_detach_dca_mem(struct hns_roce_context *ctx, uint32_t handle,
-+ struct hns_roce_dca_detach_attr *attr)
++int zxdh_modify_qpc(struct ibv_qp *qp, struct zxdh_rdma_qpc *qpc,
++ uint64_t qpc_mask)
+{
-+ int ret;
++ struct zxdh_uvcontext_ops *dvops = to_zxdhtx(qp->context)->cxt_ops;
+
-+ DECLARE_COMMAND_BUFFER(cmd, HNS_IB_OBJECT_DCA_MEM,
-+ HNS_IB_METHOD_DCA_MEM_DETACH, 4);
-+ fill_attr_in_obj(cmd, HNS_IB_ATTR_DCA_MEM_DETACH_HANDLE, handle);
-+ fill_attr_in_uint32(cmd, HNS_IB_ATTR_DCA_MEM_DETACH_SQ_INDEX,
-+ attr->sq_index);
-+ ret = execute_ioctl(&ctx->ibv_ctx.context, cmd);
-+ if (ret)
-+ verbs_warn(&ctx->ibv_ctx,
-+ "failed to detach DCA mem-%u, ret = %d.\n",
-+ handle, ret);
++ if (!dvops || !dvops->modify_qpc)
++ return -EOPNOTSUPP;
++ return dvops->modify_qpc(qp, qpc, qpc_mask);
+}
+
-+struct hns_dca_mem_attach_resp {
-+#define HNS_DCA_ATTACH_OUT_FLAGS_NEW_BUFFER BIT(0)
-+ uint32_t alloc_flags;
-+ uint32_t alloc_pages;
-+};
-+
-+static int attach_dca_mem(struct hns_roce_context *ctx, uint32_t handle,
-+ struct hns_roce_dca_attach_attr *attr,
-+ struct hns_dca_mem_attach_resp *resp)
++int zxdh_query_qpc(struct ibv_qp *qp, struct zxdh_rdma_qpc *qpc)
+{
-+ int ret;
++ struct zxdh_uvcontext_ops *dvops = to_zxdhtx(qp->context)->cxt_ops;
+
-+ DECLARE_COMMAND_BUFFER(cmd, HNS_IB_OBJECT_DCA_MEM,
-+ HNS_IB_METHOD_DCA_MEM_ATTACH, 6);
-+ fill_attr_in_obj(cmd, HNS_IB_ATTR_DCA_MEM_ATTACH_HANDLE, handle);
-+ fill_attr_in_uint32(cmd, HNS_IB_ATTR_DCA_MEM_ATTACH_SQ_OFFSET,
-+ attr->sq_offset);
-+ fill_attr_in_uint32(cmd, HNS_IB_ATTR_DCA_MEM_ATTACH_SGE_OFFSET,
-+ attr->sge_offset);
-+ fill_attr_in_uint32(cmd, HNS_IB_ATTR_DCA_MEM_ATTACH_RQ_OFFSET,
-+ attr->rq_offset);
-+ fill_attr_out(cmd, HNS_IB_ATTR_DCA_MEM_ATTACH_OUT_ALLOC_FLAGS,
-+ &resp->alloc_flags, sizeof(resp->alloc_flags));
-+ fill_attr_out(cmd, HNS_IB_ATTR_DCA_MEM_ATTACH_OUT_ALLOC_PAGES,
-+ &resp->alloc_pages, sizeof(resp->alloc_pages));
-+ ret = execute_ioctl(&ctx->ibv_ctx.context, cmd);
-+ if (ret)
-+ verbs_err(&ctx->ibv_ctx,
-+ "failed to attach DCA mem-%u, ret = %d.\n",
-+ handle, ret);
++ if (!dvops || !dvops->query_qpc)
++ return -EOPNOTSUPP;
+
-+ return ret;
++ return dvops->query_qpc(qp, qpc);
+}
+
-+static bool add_dca_mem_enabled(struct hns_roce_dca_ctx *ctx,
-+ uint32_t alloc_size)
++int zxdh_modify_qp_udp_sport(struct ibv_context *context, uint16_t udp_sport,
++ uint32_t qpn)
+{
-+ bool enable;
-+
-+ pthread_spin_lock(&ctx->lock);
-+
-+ if (ctx->unit_size == 0) /* Pool size can't be increased */
-+ enable = false;
-+ else if (ctx->max_size == HNS_DCA_MAX_MEM_SIZE) /* Pool size no limit */
-+ enable = true;
-+ else /* Pool size doesn't exceed max size */
-+ enable = (ctx->curr_size + alloc_size) < ctx->max_size;
++ struct zxdh_uvcontext_ops *dvops = to_zxdhtx(context)->cxt_ops;
+
-+ pthread_spin_unlock(&ctx->lock);
++ if (!dvops || !dvops->modify_qp_udp_sport)
++ return -EOPNOTSUPP;
+
-+ return enable;
++ return dvops->modify_qp_udp_sport(context, udp_sport, qpn);
+}
+
-+static bool shrink_dca_mem_enabled(struct hns_roce_dca_ctx *ctx)
++int zxdh_get_log_trace_switch(struct ibv_context *context,
++ enum switch_status *switch_status)
+{
-+ bool enable;
++ struct zxdh_uvcontext_ops *dvops = to_zxdhtx(context)->cxt_ops;
+
-+ pthread_spin_lock(&ctx->lock);
-+ enable = ctx->mem_cnt > 0 && ctx->min_size < ctx->max_size;
-+ pthread_spin_unlock(&ctx->lock);
++ if (!dvops || !dvops->get_log_trace_switch)
++ return -EOPNOTSUPP;
+
-+ return enable;
++ return dvops->get_log_trace_switch(context, (uint8_t *)switch_status);
+}
+
-+static int add_dca_mem(struct hns_roce_context *ctx, uint32_t size)
++int zxdh_set_log_trace_switch(struct ibv_context *context,
++ enum switch_status switch_status)
+{
-+ struct hns_roce_dca_ctx *dca_ctx = &ctx->dca_ctx;
-+ struct hns_roce_dca_mem *mem;
-+ int ret;
++ struct zxdh_uvcontext_ops *dvops = to_zxdhtx(context)->cxt_ops;
+
-+ if (!add_dca_mem_enabled(&ctx->dca_ctx, size))
-+ return -ENOMEM;
++ if (!dvops || !dvops->set_log_trace_switch)
++ return -EOPNOTSUPP;
+
-+ /* Step 1: Alloc DCA mem address */
-+ mem = alloc_dca_mem(
-+ DIV_ROUND_UP(size, dca_ctx->unit_size) * dca_ctx->unit_size);
-+ if (!mem)
-+ return -ENOMEM;
++ return dvops->set_log_trace_switch(context, switch_status);
++}
+
-+ /* Step 2: Register DCA mem uobject to pin user address */
-+ ret = register_dca_mem(ctx, dca_mem_to_key(mem), dca_mem_addr(mem, 0),
-+ mem->buf.length, &mem->handle);
-+ if (ret) {
-+ free_dca_mem(ctx, mem);
-+ return ret;
-+ }
-+
-+ /* Step 3: Add DCA mem node to pool */
-+ pthread_spin_lock(&dca_ctx->lock);
-+ list_add_tail(&dca_ctx->mem_list, &mem->entry);
-+ dca_ctx->mem_cnt++;
-+ dca_ctx->curr_size += mem->buf.length;
-+ pthread_spin_unlock(&dca_ctx->lock);
-+
-+ return 0;
++void add_private_ops(struct zxdh_uvcontext *iwvctx)
++{
++ iwvctx->cxt_ops = &zxdh_ctx_ops;
+}
+diff --git a/providers/zrdma/private_verbs_cmd.h b/providers/zrdma/private_verbs_cmd.h
+new file mode 100644
+index 0000000..32d0d68
+--- /dev/null
++++ b/providers/zrdma/private_verbs_cmd.h
+@@ -0,0 +1,24 @@
++/* SPDX-License-Identifier: GPL-2.0 or Linux-OpenIB */
++/* Copyright (c) 2024 ZTE Corporation. All rights reserved. */
++#ifndef ZXDH_RDMA_PRIVATE_VERBS_CMD_H
++#define ZXDH_RDMA_PRIVATE_VERBS_CMD_H
+
-+void hns_roce_shrink_dca_mem(struct hns_roce_context *ctx)
-+{
-+ struct hns_roce_dca_ctx *dca_ctx = &ctx->dca_ctx;
-+ struct hns_dca_mem_shrink_resp resp = {};
-+ struct hns_roce_dca_mem *mem;
-+ int dca_mem_cnt;
-+ uint32_t handle;
-+ int ret;
++#include "main.h"
++#include "zxdh_dv.h"
+
-+ pthread_spin_lock(&dca_ctx->lock);
-+ dca_mem_cnt = ctx->dca_ctx.mem_cnt;
-+ pthread_spin_unlock(&dca_ctx->lock);
-+ while (dca_mem_cnt > 0 && shrink_dca_mem_enabled(dca_ctx)) {
-+ resp.free_mems = 0;
-+ /* Step 1: Use any DCA mem uobject to shrink pool */
-+ pthread_spin_lock(&dca_ctx->lock);
-+ mem = list_tail(&dca_ctx->mem_list,
-+ struct hns_roce_dca_mem, entry);
-+ handle = mem ? mem->handle : 0;
-+ pthread_spin_unlock(&dca_ctx->lock);
-+ if (!mem)
-+ break;
++struct zxdh_uvcontext_ops {
++ int (*modify_qp_udp_sport)(struct ibv_context *ibctx,
++ uint16_t udp_sport, uint32_t qpn);
++ int (*set_log_trace_switch)(struct ibv_context *ibctx,
++ uint8_t switch_status);
++ int (*get_log_trace_switch)(struct ibv_context *ibctx,
++ uint8_t *switch_status);
++ int (*query_qpc)(struct ibv_qp *qp, struct zxdh_rdma_qpc *qpc);
++ int (*modify_qpc)(struct ibv_qp *qp, struct zxdh_rdma_qpc *qpc,
++ uint64_t qpc_mask);
++ int (*reset_qp)(struct ibv_qp *qp, uint64_t opcode);
++};
+
-+ ret = shrink_dca_mem(ctx, handle, dca_ctx->min_size, &resp);
-+ if (ret || likely(resp.free_mems < 1))
-+ break;
++void add_private_ops(struct zxdh_uvcontext *iwvctx);
+
-+ /* Step 2: Remove shrunk DCA mem node from pool */
-+ pthread_spin_lock(&dca_ctx->lock);
-+ mem = key_to_dca_mem(dca_ctx, resp.free_key);
-+ if (mem) {
-+ list_del(&mem->entry);
-+ dca_ctx->mem_cnt--;
-+ dca_ctx->curr_size -= mem->buf.length;
-+ }
++#endif
+diff --git a/providers/zrdma/zxdh_abi.h b/providers/zrdma/zxdh_abi.h
+new file mode 100644
+index 0000000..f3cff03
+--- /dev/null
++++ b/providers/zrdma/zxdh_abi.h
+@@ -0,0 +1,36 @@
++/* SPDX-License-Identifier: GPL-2.0 or Linux-OpenIB */
++/* Copyright (c) 2024 ZTE Corporation. All rights reserved. */
++#ifndef PROVIDER_ZXDH_ABI_H
++#define PROVIDER_ZXDH_ABI_H
+
-+ handle = mem ? mem->handle : 0;
-+ pthread_spin_unlock(&dca_ctx->lock);
-+ if (!mem)
-+ break;
++#include
++#include
++#include
++#include "zxdh_verbs.h"
+
-+ /* Step 3: Destroy DCA mem uobject */
-+ deregister_dca_mem(ctx, handle);
-+ free_dca_mem(ctx, mem);
-+ /* No any free memory after deregister 1 DCA mem */
-+ if (resp.free_mems <= 1)
-+ break;
++#define ZXDH_MIN_ABI_VERSION 0
++#define ZXDH_MAX_ABI_VERSION 5
+
-+ dca_mem_cnt--;
-+ }
-+}
++DECLARE_DRV_CMD(zxdh_ualloc_pd, IB_USER_VERBS_CMD_ALLOC_PD, empty,
++ zxdh_alloc_pd_resp);
++DECLARE_DRV_CMD(zxdh_ucreate_cq, IB_USER_VERBS_CMD_CREATE_CQ,
++ zxdh_create_cq_req, zxdh_create_cq_resp);
++DECLARE_DRV_CMD(zxdh_ucreate_cq_ex, IB_USER_VERBS_EX_CMD_CREATE_CQ,
++ zxdh_create_cq_req, zxdh_create_cq_resp);
++DECLARE_DRV_CMD(zxdh_uresize_cq, IB_USER_VERBS_CMD_RESIZE_CQ,
++ zxdh_resize_cq_req, empty);
++DECLARE_DRV_CMD(zxdh_ucreate_qp, IB_USER_VERBS_CMD_CREATE_QP,
++ zxdh_create_qp_req, zxdh_create_qp_resp);
++DECLARE_DRV_CMD(zxdh_umodify_qp, IB_USER_VERBS_EX_CMD_MODIFY_QP,
++ zxdh_modify_qp_req, zxdh_modify_qp_resp);
++DECLARE_DRV_CMD(zxdh_get_context, IB_USER_VERBS_CMD_GET_CONTEXT,
++ zxdh_alloc_ucontext_req, zxdh_alloc_ucontext_resp);
++DECLARE_DRV_CMD(zxdh_ureg_mr, IB_USER_VERBS_CMD_REG_MR, zxdh_mem_reg_req,
++ zxdh_reg_mr_resp);
++DECLARE_DRV_CMD(zxdh_urereg_mr, IB_USER_VERBS_CMD_REREG_MR, zxdh_mem_reg_req,
++ empty);
++DECLARE_DRV_CMD(zxdh_ucreate_ah, IB_USER_VERBS_CMD_CREATE_AH, empty,
++ zxdh_create_ah_resp);
++DECLARE_DRV_CMD(zxdh_ucreate_srq, IB_USER_VERBS_CMD_CREATE_SRQ,
++ zxdh_create_srq_req, zxdh_create_srq_resp);
++#endif /* PROVIDER_ZXDH_ABI_H */
+diff --git a/providers/zrdma/zxdh_defs.h b/providers/zrdma/zxdh_defs.h
+new file mode 100644
+index 0000000..eaf73ca
+--- /dev/null
++++ b/providers/zrdma/zxdh_defs.h
+@@ -0,0 +1,399 @@
++/* SPDX-License-Identifier: GPL-2.0 or Linux-OpenIB */
++/* Copyright (c) 2024 ZTE Corporation. All rights reserved. */
++#ifndef ZXDH_DEFS_H
++#define ZXDH_DEFS_H
++#include
++#include
++#include
++#include
++#include
++#include
++#include
++#include
++#include
++#define ZXDH_RECV_ERR_FLAG_NAK_RNR_NAK 1
++#define ZXDH_RECV_ERR_FLAG_READ_RESP 2
++#define ZXDH_RETRY_CQE_SQ_OPCODE_ERR 32
++#define ZXDH_QP_RETRY_COUNT 2
++#define ZXDH_RESET_RETRY_CQE_SQ_OPCODE_ERR 0x1f
+
-+static void config_dca_pages(void *addr, struct hns_roce_dca_buf *buf,
-+ uint32_t page_index, int page_count)
-+{
-+ void **pages = &buf->bufs[page_index];
-+ int page_size = 1 << buf->shift;
-+ int i;
++#define ZXDH_QP_TYPE_ROCE_RC 1
++#define ZXDH_QP_TYPE_ROCE_UD 2
+
-+ for (i = 0; i < page_count; i++) {
-+ pages[i] = addr;
-+ addr += page_size;
-+ }
-+}
++#define ZXDH_HW_PAGE_SIZE 4096
++#define ZXDH_HW_PAGE_SHIFT 12
++#define ZXDH_CQE_QTYPE_RQ 0
++#define ZXDH_CQE_QTYPE_SQ 1
+
-+static int setup_dca_buf(struct hns_roce_context *ctx, uint32_t handle,
-+ struct hns_roce_dca_buf *buf, uint32_t page_count)
-+{
-+ struct hns_roce_dca_ctx *dca_ctx = &ctx->dca_ctx;
-+ struct hns_dca_mem_query_resp resp = {};
-+ struct hns_roce_dca_mem *mem;
-+ uint32_t idx = 0;
-+ int ret;
++#define ZXDH_MAX_SQ_WQES_PER_PAGE 128
++#define ZXDH_MAX_SQ_DEPTH 32768
+
-+ while (idx < page_count && idx < buf->max_cnt) {
-+ resp.page_count = 0;
-+ ret = query_dca_mem(ctx, handle, idx, &resp);
-+ if (ret)
-+ return -ENOMEM;
-+ if (resp.page_count < 1)
-+ break;
++#define ZXDH_QP_SW_MIN_WQSIZE 64u /* in WRs*/
++#define ZXDH_QP_WQE_MIN_SIZE 32
++#define ZXDH_QP_SQE_MIN_SIZE 32
++#define ZXDH_QP_RQE_MIN_SIZE 16
++#define ZXDH_QP_WQE_MAX_SIZE 256
++#define ZXDH_QP_WQE_MIN_QUANTA 1
++#define ZXDH_MAX_RQ_WQE_SHIFT_GEN1 2
++#define ZXDH_MAX_RQ_WQE_SHIFT_GEN2 3
++#define ZXDH_SRQ_FRAG_BYTESIZE 16
++#define ZXDH_QP_FRAG_BYTESIZE 16
++#define ZXDH_SQ_WQE_BYTESIZE 32
++#define ZXDH_SRQ_WQE_MIN_SIZE 16
+
-+ pthread_spin_lock(&dca_ctx->lock);
-+ mem = key_to_dca_mem(dca_ctx, resp.key);
-+ if (mem && resp.offset < mem->buf.length) {
-+ config_dca_pages(dca_mem_addr(mem, resp.offset),
-+ buf, idx, resp.page_count);
-+ } else {
-+ pthread_spin_unlock(&dca_ctx->lock);
-+ break;
-+ }
-+ pthread_spin_unlock(&dca_ctx->lock);
++#define ZXDH_SQ_RSVD 258
++#define ZXDH_RQ_RSVD 1
++#define ZXDH_SRQ_RSVD 1
+
-+ idx += resp.page_count;
-+ }
++#define ZXDH_FEATURE_RTS_AE 1ULL
++#define ZXDH_FEATURE_CQ_RESIZE 2ULL
++#define ZXDHQP_OP_RDMA_WRITE 0x00
++#define ZXDHQP_OP_RDMA_READ 0x01
++#define ZXDHQP_OP_RDMA_SEND 0x03
++#define ZXDHQP_OP_RDMA_SEND_INV 0x04
++#define ZXDHQP_OP_RDMA_SEND_SOL_EVENT 0x05
++#define ZXDHQP_OP_RDMA_SEND_SOL_EVENT_INV 0x06
++#define ZXDHQP_OP_BIND_MW 0x08
++#define ZXDHQP_OP_FAST_REGISTER 0x09
++#define ZXDHQP_OP_LOCAL_INVALIDATE 0x0a
++#define ZXDHQP_OP_RDMA_READ_LOC_INV 0x0b
++#define ZXDHQP_OP_NOP 0x0c
+
-+ return (idx >= page_count) ? 0 : -ENOMEM;
-+}
++#define ZXDH_CQPHC_QPCTX GENMASK_ULL(63, 0)
++#define ZXDH_QP_DBSA_HW_SQ_TAIL GENMASK_ULL(14, 0)
++#define ZXDH_CQ_DBSA_CQEIDX GENMASK_ULL(22, 0)
++#define ZXDH_CQ_DBSA_SW_CQ_SELECT GENMASK_ULL(28, 23)
++#define ZXDH_CQ_DBSA_ARM_NEXT BIT_ULL(31)
++// #define ZXDH_CQ_DBSA_ARM_NEXT_SE BIT_ULL(15)
++#define ZXDH_CQ_DBSA_ARM_SEQ_NUM GENMASK_ULL(30, 29)
++#define ZXDH_CQ_ARM_CQ_ID_S 10
++#define ZXDH_CQ_ARM_CQ_ID GENMASK_ULL(29, 10)
++#define ZXDH_CQ_ARM_DBSA_VLD_S 30
++#define ZXDH_CQ_ARM_DBSA_VLD BIT_ULL(30)
+
-+#define DCAN_TO_SYNC_BIT(n) ((n) * HNS_DCA_BITS_PER_STATUS)
-+#define DCAN_TO_STAT_BIT(n) DCAN_TO_SYNC_BIT(n)
++/* CQP and iWARP Completion Queue */
++#define ZXDH_CQ_QPCTX ZXDH_CQPHC_QPCTX
+
-+#define MAX_DCA_TRY_LOCK_TIMES 10
-+bool hns_roce_dca_start_post(struct hns_roce_dca_ctx *ctx, uint32_t dcan)
-+{
-+ atomic_bitmap_t *st = ctx->sync_status;
-+ int try_times = 0;
++#define ZXDH_CQ_MINERR GENMASK_ULL(22, 7)
++#define ZXDH_CQ_MAJERR GENMASK_ULL(38, 23)
++#define ZXDH_CQ_WQEIDX GENMASK_ULL(54, 40)
++#define ZXDH_CQ_EXTCQE BIT_ULL(50)
++#define ZXDH_OOO_CMPL BIT_ULL(54)
++#define ZXDH_CQ_ERROR BIT_ULL(39)
++#define ZXDH_CQ_SQ BIT_ULL(4)
+
-+ if (!st || dcan >= ctx->max_qps)
-+ return true;
++#define ZXDH_CQ_VALID BIT_ULL(5)
++#define ZXDH_CQ_IMMVALID BIT_ULL(0)
++#define ZXDH_CQ_UDSMACVALID BIT_ULL(26)
++#define ZXDH_CQ_UDVLANVALID BIT_ULL(27)
++#define ZXDH_CQ_IMMDATA GENMASK_ULL(31, 0)
++#define ZXDH_CQ_UDSMAC GENMASK_ULL(47, 0)
++#define ZXDH_CQ_UDVLAN GENMASK_ULL(63, 48)
+
-+ while (test_and_set_bit_lock(st, DCAN_TO_SYNC_BIT(dcan)))
-+ if (try_times++ > MAX_DCA_TRY_LOCK_TIMES)
-+ return false;
++#define ZXDH_CQ_IMMDATA_S 0
++#define ZXDH_CQ_IMMDATA_M (0xffffffffffffffffULL << ZXDH_CQ_IMMVALID_S)
++#define ZXDH_CQ_IMMDATALOW32 GENMASK_ULL(31, 0)
++#define ZXDH_CQ_IMMDATAUP32 GENMASK_ULL(63, 32)
++#define ZXDHCQ_PAYLDLEN GENMASK_ULL(63, 32)
++#define ZXDHCQ_TCPSEQNUMRTT GENMASK_ULL(63, 32)
++#define ZXDHCQ_INVSTAG_S 11
++#define ZXDHCQ_INVSTAG GENMASK_ULL(42, 11)
++#define ZXDHCQ_QPID GENMASK_ULL(63, 44)
+
-+ return true;
-+}
++#define ZXDHCQ_UDSRCQPN GENMASK_ULL(24, 1)
++#define ZXDHCQ_PSHDROP BIT_ULL(51)
++#define ZXDHCQ_STAG_S 43
++#define ZXDHCQ_STAG BIT_ULL(43)
++#define ZXDHCQ_IPV4 BIT_ULL(25)
++#define ZXDHCQ_SOEVENT BIT_ULL(6)
++#define ZXDHCQ_OP GENMASK_ULL(63, 58)
+
-+void hns_roce_dca_stop_post(struct hns_roce_dca_ctx *ctx, uint32_t dcan)
-+{
-+ atomic_bitmap_t *st = ctx->sync_status;
++/* Manage Push Page - MPP */
++#define ZXDH_INVALID_PUSH_PAGE_INDEX_GEN_1 0xffff
++#define ZXDH_INVALID_PUSH_PAGE_INDEX 0xffffffff
+
-+ if (!st || dcan >= ctx->max_qps)
-+ return;
++#define ZXDHQPSQ_OPCODE GENMASK_ULL(62, 57)
++#define ZXDHQPSQ_COPY_HOST_PBL BIT_ULL(43)
++#define ZXDHQPSQ_ADDFRAGCNT GENMASK_ULL(39, 32)
++#define ZXDHQPSQ_PUSHWQE BIT_ULL(56)
++#define ZXDHQPSQ_STREAMMODE BIT_ULL(58)
++#define ZXDHQPSQ_WAITFORRCVPDU BIT_ULL(59)
++#define ZXDHQPSQ_READFENCE BIT_ULL(54)
++#define ZXDHQPSQ_LOCALFENCE BIT_ULL(55)
++#define ZXDHQPSQ_UDPHEADER BIT_ULL(61)
++#define ZXDHQPSQ_L4LEN GENMASK_ULL(45, 42)
++#define ZXDHQPSQ_SIGCOMPL BIT_ULL(56)
++#define ZXDHQPSQ_SOLICITED BIT_ULL(53)
++#define ZXDHQPSQ_VALID BIT_ULL(63)
+
-+ clear_bit_unlock(st, DCAN_TO_SYNC_BIT(dcan));
-+}
++#define ZXDHQPSQ_FIRST_FRAG_VALID BIT_ULL(0)
++#define ZXDHQPSQ_FIRST_FRAG_LEN GENMASK_ULL(31, 1)
++#define ZXDHQPSQ_FIRST_FRAG_STAG GENMASK_ULL(63, 32)
++#define ZXDHQPSQ_FRAG_TO ZXDH_CQPHC_QPCTX
++#define ZXDHQPSQ_FRAG_VALID BIT_ULL(63)
++#define ZXDHQPSQ_FRAG_LEN GENMASK_ULL(62, 32)
++#define ZXDHQPSQ_FRAG_STAG GENMASK_ULL(31, 0)
++#define ZXDHQPSQ_GEN1_FRAG_LEN GENMASK_ULL(31, 0)
++#define ZXDHQPSQ_GEN1_FRAG_STAG GENMASK_ULL(63, 32)
++#define ZXDHQPSQ_REMSTAGINV GENMASK_ULL(31, 0)
++#define ZXDHQPSQ_DESTQKEY GENMASK_ULL(31, 0)
++#define ZXDHQPSQ_DESTQPN GENMASK_ULL(55, 32)
++#define ZXDHQPSQ_AHID GENMASK_ULL(18, 0)
++#define ZXDHQPSQ_INLINEDATAFLAG BIT_ULL(63)
++#define ZXDHQPSQ_UD_INLINEDATAFLAG BIT_ULL(50)
++#define ZXDHQPSQ_UD_INLINEDATALEN GENMASK_ULL(49, 42)
++#define ZXDHQPSQ_UD_ADDFRAGCNT GENMASK_ULL(36, 29)
++#define ZXDHQPSQ_WRITE_INLINEDATAFLAG BIT_ULL(48)
++#define ZXDHQPSQ_WRITE_INLINEDATALEN GENMASK_ULL(47, 40)
+
-+static bool check_dca_is_attached(struct hns_roce_dca_ctx *ctx, uint32_t dcan)
-+{
-+ atomic_bitmap_t *st = ctx->buf_status;
++#define ZXDH_INLINE_VALID_S 7
++#define ZXDHQPSQ_INLINE_VALID BIT_ULL(63)
++#define ZXDHQPSQ_INLINEDATALEN GENMASK_ULL(62, 55)
++#define ZXDHQPSQ_IMMDATAFLAG BIT_ULL(52)
++#define ZXDHQPSQ_REPORTRTT BIT_ULL(46)
+
-+ if (!st || dcan >= ctx->max_qps)
-+ return false;
++#define ZXDHQPSQ_IMMDATA GENMASK_ULL(31, 0)
++#define ZXDHQPSQ_REMSTAG_S 0
++#define ZXDHQPSQ_REMSTAG GENMASK_ULL(31, 0)
+
-+ return atomic_test_bit(st, DCAN_TO_STAT_BIT(dcan));
-+}
++#define ZXDHQPSQ_REMTO ZXDH_CQPHC_QPCTX
+
-+#define DCA_EXPAND_MEM_TRY_TIMES 3
-+int hns_roce_attach_dca_mem(struct hns_roce_context *ctx, uint32_t handle,
-+ struct hns_roce_dca_attach_attr *attr,
-+ uint32_t size, struct hns_roce_dca_buf *buf)
-+{
-+ uint32_t buf_pages = size >> buf->shift;
-+ struct hns_dca_mem_attach_resp resp = {};
-+ bool is_new_buf = true;
-+ int try_times = 0;
-+ int ret = 0;
++#define ZXDHQPSQ_IMMDATA_VALID BIT_ULL(63)
++#define ZXDHQPSQ_STAGRIGHTS GENMASK_ULL(50, 46)
++#define ZXDHQPSQ_VABASEDTO BIT_ULL(51)
++#define ZXDHQPSQ_MEMWINDOWTYPE BIT_ULL(52)
+
-+ if (!attr->force && check_dca_is_attached(&ctx->dca_ctx, buf->dcan))
-+ return 0;
++#define ZXDHQPSQ_MWLEN ZXDH_CQPHC_QPCTX
++#define ZXDHQPSQ_PARENTMRSTAG GENMASK_ULL(31, 0)
++#define ZXDHQPSQ_MWSTAG GENMASK_ULL(31, 0)
++#define ZXDHQPSQ_MW_PA_PBLE_ONE GENMASK_ULL(63, 46)
++#define ZXDHQPSQ_MW_PA_PBLE_TWO GENMASK_ULL(63, 32)
++#define ZXDHQPSQ_MW_PA_PBLE_THREE GENMASK_ULL(33, 32)
++#define ZXDHQPSQ_MW_HOST_PAGE_SIZE GENMASK_ULL(40, 36)
++#define ZXDHQPSQ_MW_LEAF_PBL_SIZE GENMASK_ULL(35, 34)
++#define ZXDHQPSQ_MW_LEVLE2_FIRST_PBLE_INDEX GENMASK_ULL(41, 32)
++#define ZXDHQPSQ_MW_LEVLE2_ROOT_PBLE_INDEX GENMASK_ULL(50, 42)
+
-+ do {
-+ resp.alloc_pages = 0;
-+ ret = attach_dca_mem(ctx, handle, attr, &resp);
-+ if (ret)
-+ break;
-+
-+ if (resp.alloc_pages >= buf_pages) {
-+ is_new_buf = !!(resp.alloc_flags &
-+ HNS_DCA_ATTACH_OUT_FLAGS_NEW_BUFFER);
-+ break;
-+ }
-+
-+ ret = add_dca_mem(ctx, size);
-+ if (ret)
-+ break;
-+ } while (try_times++ < DCA_EXPAND_MEM_TRY_TIMES);
-+
-+ if (ret || resp.alloc_pages < buf_pages) {
-+ verbs_err(&ctx->ibv_ctx,
-+ "failed to attach, size %u count %u != %u, ret = %d.\n",
-+ size, buf_pages, resp.alloc_pages, ret);
-+ return -ENOMEM;
-+ }
-+
-+ /* No need config user address if DCA config not changed */
-+ if (!is_new_buf && buf->bufs[0])
-+ return 0;
++#define ZXDHQPSQ_BASEVA_TO_FBO ZXDH_CQPHC_QPCTX
+
-+ return setup_dca_buf(ctx, handle, buf, buf_pages);
-+}
-diff --git a/providers/hns/hns_roce_u_db.c b/providers/hns/hns_roce_u_db.c
-index 0314254..bbef988 100644
---- a/providers/hns/hns_roce_u_db.c
-+++ b/providers/hns/hns_roce_u_db.c
-@@ -116,6 +116,8 @@ found:
-
- out:
- pthread_mutex_unlock((pthread_mutex_t *)&ctx->db_list_mutex);
-+ if (db)
-+ *((unsigned int *)db) = 0;
-
- return db;
- }
-diff --git a/providers/hns/hns_roce_u_db.h b/providers/hns/hns_roce_u_db.h
-index 8c47a53..de288de 100644
---- a/providers/hns/hns_roce_u_db.h
-+++ b/providers/hns/hns_roce_u_db.h
-@@ -40,8 +40,14 @@
-
- #define HNS_ROCE_WORD_NUM 2
-
--static inline void hns_roce_write64(void *dest, __le32 val[HNS_ROCE_WORD_NUM])
-+static inline void hns_roce_write64(struct hns_roce_context *ctx, void *dest,
-+ __le32 val[HNS_ROCE_WORD_NUM])
- {
-+ struct hns_roce_v2_reset_state *state = ctx->reset_state;
++#define ZXDHQPSQ_LOCSTAG GENMASK_ULL(31, 0)
+
-+ if (state && state->is_reset)
-+ return;
++#define ZXDHQPSRQ_RSV GENMASK_ULL(63, 40)
++#define ZXDHQPSRQ_VALID_SGE_NUM GENMASK_ULL(39, 32)
++#define ZXDHQPSRQ_SIGNATURE GENMASK_ULL(31, 24)
++#define ZXDHQPSRQ_NEXT_WQE_INDEX GENMASK_ULL(15, 0)
++#define ZXDHQPSRQ_START_PADDING BIT_ULL(63)
++#define ZXDHQPSRQ_FRAG_LEN GENMASK_ULL(62, 32)
++#define ZXDHQPSRQ_FRAG_STAG GENMASK_ULL(31, 0)
+
- mmio_write64_le(dest, *(__le64 *)val);
- }
-
-diff --git a/providers/hns/hns_roce_u_hw_v2.c b/providers/hns/hns_roce_u_hw_v2.c
-index daef17a..9371150 100644
---- a/providers/hns/hns_roce_u_hw_v2.c
-+++ b/providers/hns/hns_roce_u_hw_v2.c
-@@ -199,19 +199,35 @@ static struct hns_roce_v2_cqe *next_cqe_sw_v2(struct hns_roce_cq *cq)
- return get_sw_cqe_v2(cq, cq->cons_index);
- }
-
-+static inline bool check_qp_dca_enable(struct hns_roce_qp *qp)
-+{
-+ return !!qp->dca_wqe.bufs;
-+}
++/* QP RQ WQE common fields */
++#define ZXDHQPRQ_SIGNATURE GENMASK_ULL(31, 16)
++#define ZXDHQPRQ_ADDFRAGCNT ZXDHQPSQ_ADDFRAGCNT
++#define ZXDHQPRQ_VALID ZXDHQPSQ_VALID
++#define ZXDHQPRQ_COMPLCTX ZXDH_CQPHC_QPCTX
++#define ZXDHQPRQ_FRAG_LEN ZXDHQPSQ_FRAG_LEN
++#define ZXDHQPRQ_STAG ZXDHQPSQ_FRAG_STAG
++#define ZXDHQPRQ_TO ZXDHQPSQ_FRAG_TO
+
-+static inline void *get_wqe(struct hns_roce_qp *qp, unsigned int offset)
-+{
-+ if (likely(qp->buf.buf))
-+ return qp->buf.buf + offset;
-+ else if (unlikely(check_qp_dca_enable(qp)))
-+ return qp->dca_wqe.bufs[offset >> qp->dca_wqe.shift] +
-+ (offset & ((1 << qp->dca_wqe.shift) - 1));
-+ else
-+ return NULL;
-+}
++//QP RQ DBSA fields
++#define ZXDHQPDBSA_RQ_POLARITY_S 15
++#define ZXDHQPDBSA_RQ_POLARITY BIT_ULL(15)
++#define ZXDHQPDBSA_RQ_SW_HEAD_S 0
++#define ZXDHQPDBSA_RQ_SW_HEAD GENMASK_ULL(14, 0)
+
- static void *get_recv_wqe_v2(struct hns_roce_qp *qp, unsigned int n)
- {
-- return qp->buf.buf + qp->rq.offset + (n << qp->rq.wqe_shift);
-+ return get_wqe(qp, qp->rq.offset + (n << qp->rq.wqe_shift));
- }
-
- static void *get_send_wqe(struct hns_roce_qp *qp, unsigned int n)
- {
-- return qp->buf.buf + qp->sq.offset + (n << qp->sq.wqe_shift);
-+ return get_wqe(qp, qp->sq.offset + (n << qp->sq.wqe_shift));
- }
-
- static void *get_send_sge_ex(struct hns_roce_qp *qp, unsigned int n)
- {
-- return qp->buf.buf + qp->ex_sge.offset + (n << qp->ex_sge.sge_shift);
-+ return get_wqe(qp, qp->ex_sge.offset + (n << qp->ex_sge.sge_shift));
- }
-
- static void *get_srq_wqe(struct hns_roce_srq *srq, unsigned int n)
-@@ -229,14 +245,14 @@ static void hns_roce_free_srq_wqe(struct hns_roce_srq *srq, uint16_t ind)
- uint32_t bitmap_num;
- int bit_num;
-
-- pthread_spin_lock(&srq->lock);
-+ hns_roce_spin_lock(&srq->hr_lock);
-
- bitmap_num = ind / BIT_CNT_PER_LONG;
- bit_num = ind % BIT_CNT_PER_LONG;
- srq->idx_que.bitmap[bitmap_num] |= (1ULL << bit_num);
- srq->idx_que.tail++;
-
-- pthread_spin_unlock(&srq->lock);
-+ hns_roce_spin_unlock(&srq->hr_lock);
- }
-
- static int get_srq_from_cqe(struct hns_roce_v2_cqe *cqe,
-@@ -268,9 +284,9 @@ static int hns_roce_v2_wq_overflow(struct hns_roce_wq *wq, unsigned int nreq,
- if (cur + nreq < wq->max_post)
- return 0;
-
-- pthread_spin_lock(&cq->lock);
-+ hns_roce_spin_lock(&cq->hr_lock);
- cur = wq->head - wq->tail;
-- pthread_spin_unlock(&cq->lock);
-+ hns_roce_spin_unlock(&cq->hr_lock);
-
- return cur + nreq >= wq->max_post;
- }
-@@ -284,7 +300,8 @@ static void hns_roce_update_rq_db(struct hns_roce_context *ctx,
- hr_reg_write(&rq_db, DB_CMD, HNS_ROCE_V2_RQ_DB);
- hr_reg_write(&rq_db, DB_PI, rq_head);
-
-- hns_roce_write64(ctx->uar + ROCEE_VF_DB_CFG0_OFFSET, (__le32 *)&rq_db);
-+ hns_roce_write64(ctx, ctx->uar + ROCEE_VF_DB_CFG0_OFFSET,
-+ (__le32 *)&rq_db);
- }
-
- static void hns_roce_update_sq_db(struct hns_roce_context *ctx,
-@@ -298,7 +315,7 @@ static void hns_roce_update_sq_db(struct hns_roce_context *ctx,
- hr_reg_write(&sq_db, DB_PI, qp->sq.head);
- hr_reg_write(&sq_db, DB_SL, qp->sl);
-
-- hns_roce_write64(qp->sq.db_reg, (__le32 *)&sq_db);
-+ hns_roce_write64(ctx, qp->sq.db_reg, (__le32 *)&sq_db);
- }
-
- static void hns_roce_write512(uint64_t *dest, uint64_t *val)
-@@ -309,6 +326,12 @@ static void hns_roce_write512(uint64_t *dest, uint64_t *val)
- static void hns_roce_write_dwqe(struct hns_roce_qp *qp, void *wqe)
- {
- struct hns_roce_rc_sq_wqe *rc_sq_wqe = wqe;
-+ struct ibv_qp *ibvqp = &qp->verbs_qp.qp;
-+ struct hns_roce_context *ctx = to_hr_ctx(ibvqp->context);
-+ struct hns_roce_v2_reset_state *state = ctx->reset_state;
-+
-+ if (state && state->is_reset)
-+ return;
-
- /* All kinds of DirectWQE have the same header field layout */
- hr_reg_enable(rc_sq_wqe, RCWQE_FLAG);
-@@ -328,7 +351,8 @@ static void update_cq_db(struct hns_roce_context *ctx, struct hns_roce_cq *cq)
- hr_reg_write(&cq_db, DB_CQ_CI, cq->cons_index);
- hr_reg_write(&cq_db, DB_CQ_CMD_SN, 1);
-
-- hns_roce_write64(ctx->uar + ROCEE_VF_DB_CFG0_OFFSET, (__le32 *)&cq_db);
-+ hns_roce_write64(ctx, ctx->uar + ROCEE_VF_DB_CFG0_OFFSET,
-+ (__le32 *)&cq_db);
- }
-
- static struct hns_roce_qp *hns_roce_v2_find_qp(struct hns_roce_context *ctx,
-@@ -507,7 +531,7 @@ static void parse_cqe_for_srq(struct hns_roce_v2_cqe *cqe, struct ibv_wc *wc,
- handle_recv_cqe_inl_from_srq(cqe, srq);
- }
-
--static int parse_cqe_for_resp(struct hns_roce_v2_cqe *cqe, struct ibv_wc *wc,
-+static void parse_cqe_for_resp(struct hns_roce_v2_cqe *cqe, struct ibv_wc *wc,
- struct hns_roce_qp *hr_qp)
- {
- struct hns_roce_wq *wq;
-@@ -523,8 +547,6 @@ static int parse_cqe_for_resp(struct hns_roce_v2_cqe *cqe, struct ibv_wc *wc,
- handle_recv_cqe_inl_from_rq(cqe, hr_qp);
- else if (hr_reg_read(cqe, CQE_RQ_INLINE))
- handle_recv_rq_inl(cqe, hr_qp);
--
-- return 0;
- }
-
- static void parse_cqe_for_req(struct hns_roce_v2_cqe *cqe, struct ibv_wc *wc,
-@@ -572,6 +594,81 @@ static void parse_cqe_for_req(struct hns_roce_v2_cqe *cqe, struct ibv_wc *wc,
- wc->opcode = wc_send_op_map[opcode];
- }
-
-+static bool check_dca_attach_enable(struct hns_roce_qp *qp)
-+{
-+ return check_qp_dca_enable(qp) &&
-+ (qp->flags & HNS_ROCE_QP_CAP_DYNAMIC_CTX_ATTACH);
-+}
++#define ZXDHPFINT_OICR_HMC_ERR_M BIT(26)
++#define ZXDHPFINT_OICR_PE_PUSH_M BIT(27)
++#define ZXDHPFINT_OICR_PE_CRITERR_M BIT(28)
+
-+static bool check_dca_detach_enable(struct hns_roce_qp *qp)
-+{
-+ return check_qp_dca_enable(qp) &&
-+ (qp->flags & HNS_ROCE_QP_CAP_DYNAMIC_CTX_DETACH);
-+}
++#define ZXDH_SRQ_PARITY_SIGN_S 15
++#define ZXDH_SRQ_PARITY_SIGN BIT_ULL(15)
++#define ZXDH_SRQ_SW_SRQ_HEAD_S 0
++#define ZXDH_SRQ_SW_SRQ_HEAD GENMASK_ULL(14, 0)
++#define ZXDH_CQE_SQ_OPCODE_RESET BIT(5)
+
-+static int dca_attach_qp_buf(struct hns_roce_context *ctx,
-+ struct hns_roce_qp *qp)
-+{
-+ struct hns_roce_dca_attach_attr attr = {};
-+ bool enable_detach;
-+ uint32_t idx;
-+ int ret;
++#define ZXDH_CQP_INIT_WQE(wqe) memset(wqe, 0, 64)
+
-+ hns_roce_spin_lock(&qp->sq.hr_lock);
-+ hns_roce_spin_lock(&qp->rq.hr_lock);
++#define ZXDH_GET_CURRENT_CQ_ELEM(_cq) \
++ ((_cq)->cq_base[ZXDH_RING_CURRENT_HEAD((_cq)->cq_ring)].buf)
++#define ZXDH_GET_CURRENT_EXTENDED_CQ_ELEM(_cq) \
++ (((struct zxdh_extended_cqe \
++ *)((_cq)->cq_base))[ZXDH_RING_CURRENT_HEAD((_cq)->cq_ring)] \
++ .buf)
+
-+ if (qp->sq.wqe_cnt > 0) {
-+ idx = qp->sq.head & (qp->sq.wqe_cnt - 1);
-+ attr.sq_offset = idx << qp->sq.wqe_shift;
++#define ZXDH_RING_INIT(_ring, _size) \
++ { \
++ (_ring).head = 0; \
++ (_ring).tail = 0; \
++ (_ring).size = (_size); \
+ }
++#define ZXDH_RING_SIZE(_ring) ((_ring).size)
++#define ZXDH_RING_CURRENT_HEAD(_ring) ((_ring).head)
++#define ZXDH_RING_CURRENT_TAIL(_ring) ((_ring).tail)
+
-+ if (qp->ex_sge.sge_cnt > 0) {
-+ idx = qp->next_sge & (qp->ex_sge.sge_cnt - 1);
-+ attr.sge_offset = idx << qp->ex_sge.sge_shift;
++#define ZXDH_RING_MOVE_HEAD(_ring, _retcode) \
++ { \
++ register __u32 size; \
++ size = (_ring).size; \
++ if (!ZXDH_RING_FULL_ERR(_ring)) { \
++ (_ring).head = ((_ring).head + 1) % size; \
++ (_retcode) = 0; \
++ } else { \
++ (_retcode) = ZXDH_ERR_RING_FULL; \
++ } \
+ }
-+
-+ if (qp->rq.wqe_cnt > 0) {
-+ idx = qp->rq.head & (qp->rq.wqe_cnt - 1);
-+ attr.rq_offset = idx << qp->rq.wqe_shift;
++#define ZXDH_RING_MOVE_HEAD_BY_COUNT(_ring, _count, _retcode) \
++ { \
++ register __u32 size; \
++ size = (_ring).size; \
++ if ((ZXDH_RING_USED_QUANTA(_ring) + (_count)) < size) { \
++ (_ring).head = ((_ring).head + (_count)) % size; \
++ (_retcode) = 0; \
++ } else { \
++ (_retcode) = ZXDH_ERR_RING_FULL; \
++ } \
+ }
++#define ZXDH_SQ_RING_MOVE_HEAD(_ring, _retcode) \
++ { \
++ register __u32 size; \
++ size = (_ring).size; \
++ if (!ZXDH_SQ_RING_FULL_ERR(_ring)) { \
++ (_ring).head = ((_ring).head + 1) % size; \
++ (_retcode) = 0; \
++ } else { \
++ (_retcode) = ZXDH_ERR_RING_FULL; \
++ } \
++ }
++#define ZXDH_SQ_RING_MOVE_HEAD_BY_COUNT(_ring, _count, _retcode) \
++ { \
++ register __u32 size; \
++ size = (_ring).size; \
++ if ((ZXDH_RING_USED_QUANTA(_ring) + (_count)) < \
++ (size - 256)) { \
++ (_ring).head = ((_ring).head + (_count)) % size; \
++ (_retcode) = 0; \
++ } else { \
++ (_retcode) = ZXDH_ERR_RING_FULL; \
++ } \
++ }
++#define ZXDH_RING_MOVE_HEAD_BY_COUNT_NOCHECK(_ring, _count) \
++ (_ring).head = ((_ring).head + (_count)) % (_ring).size
+
-+ enable_detach = check_dca_detach_enable(qp);
-+ if (enable_detach &&
-+ !hns_roce_dca_start_post(&ctx->dca_ctx, qp->dca_wqe.dcan))
-+ /* Force attach if failed to sync dca status */
-+ attr.force = true;
-+
-+ ret = hns_roce_attach_dca_mem(ctx, qp->verbs_qp.qp.handle, &attr,
-+ qp->buf_size, &qp->dca_wqe);
-+ if (ret && enable_detach)
-+ hns_roce_dca_stop_post(&ctx->dca_ctx, qp->dca_wqe.dcan);
++#define ZXDH_RING_MOVE_TAIL(_ring) \
++ (_ring).tail = ((_ring).tail + 1) % (_ring).size
+
-+ hns_roce_spin_unlock(&qp->rq.hr_lock);
-+ hns_roce_spin_unlock(&qp->sq.hr_lock);
++#define ZXDH_RING_MOVE_HEAD_NOCHECK(_ring) \
++ (_ring).head = ((_ring).head + 1) % (_ring).size
+
-+ return ret;
-+}
++#define ZXDH_RING_MOVE_TAIL_BY_COUNT(_ring, _count) \
++ (_ring).tail = ((_ring).tail + (_count)) % (_ring).size
+
-+static void dca_detach_qp_buf(struct hns_roce_context *ctx,
-+ struct hns_roce_qp *qp)
-+{
-+ struct hns_roce_dca_detach_attr attr;
-+ bool is_empty;
++#define ZXDH_RING_SET_TAIL(_ring, _pos) (_ring).tail = (_pos) % (_ring).size
+
-+ hns_roce_spin_lock(&qp->sq.hr_lock);
-+ hns_roce_spin_lock(&qp->rq.hr_lock);
++#define ZXDH_RING_FULL_ERR(_ring) \
++ ((ZXDH_RING_USED_QUANTA(_ring) == ((_ring).size - 1)))
+
-+ is_empty = qp->sq.head == qp->sq.tail && qp->rq.head == qp->rq.tail;
-+ if (is_empty && qp->sq.wqe_cnt > 0)
-+ attr.sq_index = qp->sq.head & (qp->sq.wqe_cnt - 1);
++#define ZXDH_ERR_RING_FULL2(_ring) \
++ ((ZXDH_RING_USED_QUANTA(_ring) == ((_ring).size - 2)))
+
-+ hns_roce_spin_unlock(&qp->rq.hr_lock);
-+ hns_roce_spin_unlock(&qp->sq.hr_lock);
++#define ZXDH_ERR_RING_FULL3(_ring) \
++ ((ZXDH_RING_USED_QUANTA(_ring) == ((_ring).size - 3)))
+
-+ if (is_empty && qp->sq.wqe_cnt > 0)
-+ hns_roce_detach_dca_mem(ctx, qp->verbs_qp.qp.handle, &attr);
-+}
++#define ZXDH_SQ_RING_FULL_ERR(_ring) \
++ ((ZXDH_RING_USED_QUANTA(_ring) == ((_ring).size - 257)))
+
- static void cqe_proc_sq(struct hns_roce_qp *hr_qp, uint32_t wqe_idx,
- struct hns_roce_cq *cq)
- {
-@@ -715,6 +812,183 @@ static int hns_roce_poll_one(struct hns_roce_context *ctx,
- return hns_roce_flush_cqe(*cur_qp, status);
- }
-
-+static void hns_roce_fill_swc(struct hns_roce_cq *cq, struct ibv_wc *wc,
-+ uint64_t wr_id, uint32_t qp_num)
-+{
-+ if (!wc) {
-+ cq->verbs_cq.cq_ex.status = IBV_WC_WR_FLUSH_ERR;
-+ cq->verbs_cq.cq_ex.wr_id = wr_id;
-+ hr_reg_write(cq->sw_cqe, CQE_LCL_QPN, qp_num);
-+ return;
-+ }
++#define ZXDH_ERR_SQ_RING_FULL2(_ring) \
++ ((ZXDH_RING_USED_QUANTA(_ring) == ((_ring).size - 258)))
++#define ZXDH_ERR_SQ_RING_FULL3(_ring) \
++ ((ZXDH_RING_USED_QUANTA(_ring) == ((_ring).size - 259)))
++#define ZXDH_RING_MORE_WORK(_ring) ((ZXDH_RING_USED_QUANTA(_ring) != 0))
+
-+ wc->wr_id = wr_id;
-+ wc->status = IBV_WC_WR_FLUSH_ERR;
-+ wc->vendor_err = 0;
-+ wc->qp_num = qp_num;
-+}
++#define ZXDH_RING_USED_QUANTA(_ring) \
++ ((((_ring).head + (_ring).size - (_ring).tail) % (_ring).size))
+
-+static int hns_roce_get_wq_swc(struct hns_roce_cq *cq, struct hns_roce_qp *qp,
-+ struct ibv_wc *wc, bool is_sq)
-+{
-+ struct hns_roce_wq *wq = is_sq ? &qp->sq : &qp->rq;
-+ unsigned int left_wr;
-+ uint64_t wr_id;
++#define ZXDH_RING_FREE_QUANTA(_ring) \
++ (((_ring).size - ZXDH_RING_USED_QUANTA(_ring) - 1))
+
-+ left_wr = wq->head - wq->tail;
-+ if (left_wr == 0) {
-+ if (is_sq)
-+ list_del_init(&qp->scq_node);
-+ else
-+ list_del_init(&qp->rcq_node);
++#define ZXDH_SQ_RING_FREE_QUANTA(_ring) \
++ (((_ring).size - ZXDH_RING_USED_QUANTA(_ring) - 257))
+
-+ return ENOENT;
++#define ZXDH_ATOMIC_RING_MOVE_HEAD(_ring, index, _retcode) \
++ { \
++ index = ZXDH_RING_CURRENT_HEAD(_ring); \
++ ZXDH_RING_MOVE_HEAD(_ring, _retcode); \
+ }
+
-+ wr_id = wq->wrid[wq->tail & (wq->wqe_cnt - 1)];
-+ hns_roce_fill_swc(cq, wc, wr_id, qp->verbs_qp.qp.qp_num);
-+ wq->tail++;
-+ return V2_CQ_OK;
-+}
++enum zxdh_qp_wqe_size {
++ ZXDH_WQE_SIZE_32 = 32,
++ ZXDH_WQE_SIZE_64 = 64,
++ ZXDH_WQE_SIZE_96 = 96,
++ ZXDH_WQE_SIZE_128 = 128,
++ ZXDH_WQE_SIZE_256 = 256,
++};
+
-+static int hns_roce_gen_sq_swc(struct hns_roce_cq *cq, struct ibv_wc *wc)
++/**
++ * set_64bit_val - set 64 bit value to hw wqe
++ * @wqe_words: wqe addr to write
++ * @byte_index: index in wqe
++ * @val: value to write
++ **/
++static inline void set_64bit_val(__le64 *wqe_words, __u32 byte_index, __u64 val)
+{
-+ struct hns_roce_qp *next, *qp = NULL;
-+
-+ list_for_each_safe(&cq->list_sq, qp, next, scq_node) {
-+ if (hns_roce_get_wq_swc(cq, qp, wc, true) == ENOENT)
-+ continue;
-+
-+ return V2_CQ_OK;
-+ }
-+
-+ return wc ? V2_CQ_EMPTY : ENOENT;
++ wqe_words[byte_index >> 3] = htole64(val);
+}
+
-+static int hns_roce_gen_rq_swc(struct hns_roce_cq *cq, struct ibv_wc *wc)
++/**
++ * set_32bit_val - set 32 bit value to hw wqe
++ * @wqe_words: wqe addr to write
++ * @byte_index: index in wqe
++ * @val: value to write
++ **/
++static inline void set_32bit_val(__le32 *wqe_words, __u32 byte_index, __u32 val)
+{
-+ struct hns_roce_qp *next, *qp = NULL;
-+
-+ list_for_each_safe(&cq->list_rq, qp, next, rcq_node) {
-+ if (hns_roce_get_wq_swc(cq, qp, wc, false) == ENOENT)
-+ continue;
-+
-+ return V2_CQ_OK;
-+ }
-+
-+ return wc ? V2_CQ_EMPTY : ENOENT;
++ wqe_words[byte_index >> 2] = htole32(val);
+}
+
-+static int hns_roce_get_srq_swc(struct hns_roce_cq *cq, struct hns_roce_qp *qp,
-+ struct hns_roce_srq *srq, struct ibv_wc *wc)
++/**
++ * set_16bit_val - set 16 bit value to hw wqe
++ * @wqe_words: wqe addr to write
++ * @byte_index: index in wqe
++ * @val: value to write
++ **/
++static inline void set_16bit_val(__le16 *wqe_words, __u32 byte_index, __u16 val)
+{
-+ unsigned int left_wr;
-+ uint64_t wr_id;
-+
-+ hns_roce_spin_lock(&srq->hr_lock);
-+ left_wr = srq->idx_que.head - srq->idx_que.tail;
-+ if (left_wr == 0) {
-+ if (qp)
-+ list_del_init(&qp->srcq_node);
-+ else
-+ list_del_init(&srq->xrc_srcq_node);
-+
-+ hns_roce_spin_unlock(&srq->hr_lock);
-+ return ENOENT;
-+ }
-+
-+ wr_id = srq->wrid[srq->idx_que.tail & (srq->wqe_cnt - 1)];
-+ hns_roce_fill_swc(cq, wc, wr_id, srq->srqn);
-+ srq->idx_que.tail++;
-+ hns_roce_spin_unlock(&srq->hr_lock);
-+
-+ return V2_CQ_OK;
++ wqe_words[byte_index >> 1] = htole16(val);
+}
+
-+static int hns_roce_gen_common_srq_swc(struct hns_roce_cq *cq,
-+ struct ibv_wc *wc)
++/**
++ * get_64bit_val - read 64 bit value from wqe
++ * @wqe_words: wqe addr
++ * @byte_index: index to read from
++ * @val: read value
++ **/
++static inline void get_64bit_val(__le64 *wqe_words, __u32 byte_index,
++ __u64 *val)
+{
-+ struct hns_roce_qp *next, *qp = NULL;
-+ struct hns_roce_srq *srq;
-+
-+ list_for_each_safe(&cq->list_srq, qp, next, srcq_node) {
-+ srq = to_hr_srq(qp->verbs_qp.qp.srq);
-+ if (hns_roce_get_srq_swc(cq, qp, srq, wc) == ENOENT)
-+ continue;
-+
-+ return V2_CQ_OK;
-+ }
-+
-+ return wc ? V2_CQ_EMPTY : ENOENT;
++ *val = le64toh(wqe_words[byte_index >> 3]);
+}
+
-+static int hns_roce_gen_xrc_srq_swc(struct hns_roce_cq *cq, struct ibv_wc *wc)
++/**
++ * get_32bit_val - read 32 bit value from wqe
++ * @wqe_words: wqe addr
++ * @byte_index: index to reaad from
++ * @val: return 32 bit value
++ **/
++static inline void get_32bit_val(__le32 *wqe_words, __u32 byte_index,
++ __u32 *val)
+{
-+ struct hns_roce_srq *next, *srq = NULL;
-+
-+ list_for_each_safe(&cq->list_xrc_srq, srq, next, xrc_srcq_node) {
-+ if (hns_roce_get_srq_swc(cq, NULL, srq, wc) == ENOENT)
-+ continue;
-+
-+ return V2_CQ_OK;
-+ }
-+
-+ return wc ? V2_CQ_EMPTY : ENOENT;
++ *val = le32toh(wqe_words[byte_index >> 2]);
+}
+
-+static int hns_roce_gen_srq_swc(struct hns_roce_cq *cq, struct ibv_wc *wc)
++static inline void db_wr32(__u32 val, __u32 *wqe_word)
+{
-+ int err;
-+
-+ err = hns_roce_gen_common_srq_swc(cq, wc);
-+ if (err == V2_CQ_OK)
-+ return err;
-+
-+ return hns_roce_gen_xrc_srq_swc(cq, wc);
++ *wqe_word = val;
+}
+
-+static int hns_roce_poll_one_swc(struct hns_roce_cq *cq, struct ibv_wc *wc)
-+{
-+ int err;
++#define read_wqe_need_split(pre_cal_psn, next_psn) \
++ (((pre_cal_psn < next_psn) && (pre_cal_psn != 0)) || \
++ ((next_psn <= 0x7FFFFF) && (pre_cal_psn > 0x800000)))
+
-+ err = hns_roce_gen_sq_swc(cq, wc);
-+ if (err == V2_CQ_OK)
-+ return err;
++#endif /* ZXDH_DEFS_H */
+diff --git a/providers/zrdma/zxdh_devids.h b/providers/zrdma/zxdh_devids.h
+new file mode 100644
+index 0000000..ac23124
+--- /dev/null
++++ b/providers/zrdma/zxdh_devids.h
+@@ -0,0 +1,17 @@
++/* SPDX-License-Identifier: GPL-2.0 or Linux-OpenIB */
++/* Copyright (c) 2024 ZTE Corporation. All rights reserved. */
++#ifndef ZXDH_DEVIDS_H
++#define ZXDH_DEVIDS_H
+
-+ err = hns_roce_gen_rq_swc(cq, wc);
-+ if (err == V2_CQ_OK)
-+ return err;
++/* ZXDH VENDOR ID */
++#define PCI_VENDOR_ID_ZXDH_EVB 0x16c3
++#define PCI_VENDOR_ID_ZXDH_E312 0x1cf2
++#define PCI_VENDOR_ID_ZXDH_X512 0x1cf2
++/* ZXDH Devices ID */
++#define ZXDH_DEV_ID_ADAPTIVE_EVB_PF 0x8040 /* ZXDH EVB PF DEVICE ID*/
++#define ZXDH_DEV_ID_ADAPTIVE_EVB_VF 0x8041 /* ZXDH EVB VF DEVICE ID*/
++#define ZXDH_DEV_ID_ADAPTIVE_E312_PF 0x8049 /* ZXDH E312 PF DEVICE ID*/
++#define ZXDH_DEV_ID_ADAPTIVE_E312_VF 0x8060 /* ZXDH E312 VF DEVICE ID*/
++#define ZXDH_DEV_ID_ADAPTIVE_X512_PF 0x806B /* ZXDH X512 PF DEVICE ID*/
++#define ZXDH_DEV_ID_ADAPTIVE_X512_VF 0x806C /* ZXDH X512 VF DEVICE ID*/
++#endif /* ZXDH_DEVIDS_H */
+diff --git a/providers/zrdma/zxdh_dv.h b/providers/zrdma/zxdh_dv.h
+new file mode 100644
+index 0000000..bb7a845
+--- /dev/null
++++ b/providers/zrdma/zxdh_dv.h
+@@ -0,0 +1,75 @@
++/* SPDX-License-Identifier: GPL-2.0 or Linux-OpenIB */
++/* Copyright (c) 2024 ZTE Corporation. All rights reserved. */
++#ifndef _ZXDH_DV_H_
++#define _ZXDH_DV_H_
+
-+ return hns_roce_gen_srq_swc(cq, wc);
-+}
++#include
++#include
++#include /* For the __be64 type */
++#include
++#include
++#if defined(__SSE3__)
++#include
++#include
++#include
++#endif /* defined(__SSE3__) */
+
-+static int hns_roce_poll_swc(struct hns_roce_cq *cq, int ne, struct ibv_wc *wc)
-+{
-+ int npolled;
-+ int err;
++#include
++#include
+
-+ for (npolled = 0; npolled < ne; npolled++) {
-+ err = hns_roce_poll_one_swc(cq, wc + npolled);
-+ if (err == V2_CQ_EMPTY)
-+ break;
-+ }
++#ifdef __cplusplus
++extern "C" {
++#endif
+
-+ return npolled;
-+}
++enum switch_status {
++ SWITCH_CLOSE = 0,
++ SWITCH_OPEN = 1,
++ SWITCH_ERROR,
++};
+
-+static bool hns_roce_reseted(struct hns_roce_context *ctx)
-+{
-+ struct hns_roce_v2_reset_state *state = ctx->reset_state;
++enum zxdh_qp_reset_qp_code {
++ ZXDH_RESET_RETRY_TX_ITEM_FLAG = 1,
++};
++
++enum zxdh_qp_modify_qpc_mask {
++ ZXDH_RETRY_CQE_SQ_OPCODE = 1 << 0,
++ ZXDH_ERR_FLAG_SET = 1 << 1,
++ ZXDH_PACKAGE_ERR_FLAG = 1 << 2,
++ ZXDH_TX_LAST_ACK_PSN = 1 << 3,
++ ZXDH_TX_LAST_ACK_WQE_OFFSET_SET = 1 << 4,
++ ZXDH_TX_READ_RETRY_FLAG_SET = 1 << 5,
++ ZXDH_TX_RDWQE_PYLD_LENGTH = 1 << 6,
++ ZXDH_TX_RECV_READ_FLAG_SET = 1 << 7,
++ ZXDH_TX_RD_MSG_LOSS_ERR_FLAG_SET = 1 << 8,
++};
+
-+ if (ctx->use_new_reset_flag)
-+ return !state->hw_ready;
++struct zxdh_rdma_qpc {
++ uint8_t retry_flag;
++ uint8_t rnr_retry_flag;
++ uint8_t read_retry_flag;
++ uint8_t cur_retry_count;
++ uint8_t retry_cqe_sq_opcode;
++ uint8_t err_flag;
++ uint8_t ack_err_flag;
++ uint8_t package_err_flag;
++ uint8_t recv_err_flag;
++ uint32_t tx_last_ack_psn;
++ uint8_t retry_count;
++};
+
-+ if (state && state->is_reset)
-+ ctx->reseted = true;
++int zxdh_get_log_trace_switch(struct ibv_context *context,
++ enum switch_status *status);
++int zxdh_set_log_trace_switch(struct ibv_context *context,
++ enum switch_status status);
++int zxdh_modify_qp_udp_sport(struct ibv_context *context, uint16_t udp_sport,
++ uint32_t qpn);
++int zxdh_query_qpc(struct ibv_qp *qp, struct zxdh_rdma_qpc *qpc);
++int zxdh_modify_qpc(struct ibv_qp *qp, struct zxdh_rdma_qpc *qpc,
++ uint64_t qpc_mask);
++int zxdh_reset_qp(struct ibv_qp *qp, uint64_t opcode);
+
-+ return ctx->reseted;
++#ifdef __cplusplus
+}
++#endif
+
- static int hns_roce_u_v2_poll_cq(struct ibv_cq *ibvcq, int ne,
- struct ibv_wc *wc)
- {
-@@ -724,10 +998,19 @@ static int hns_roce_u_v2_poll_cq(struct ibv_cq *ibvcq, int ne,
- int err = V2_CQ_OK;
- int npolled;
-
-- pthread_spin_lock(&cq->lock);
-+ hns_roce_spin_lock(&cq->hr_lock);
++#endif
+diff --git a/providers/zrdma/zxdh_hw.c b/providers/zrdma/zxdh_hw.c
+new file mode 100644
+index 0000000..ed577a9
+--- /dev/null
++++ b/providers/zrdma/zxdh_hw.c
+@@ -0,0 +1,2596 @@
++// SPDX-License-Identifier: GPL-2.0 or Linux-OpenIB
++/* Copyright (c) 2024 ZTE Corporation. All rights reserved. */
++#include "zxdh_status.h"
++#include "zxdh_defs.h"
++#include "zxdh_verbs.h"
++#include "main.h"
++#include
++#include
++#include "private_verbs_cmd.h"
++#include
++#include
++#include
++#include
++#include
++#include
++#include
++#include
++#include
++#include
++#include
++#define ERROR_CODE_VALUE 65
+
-+ if (unlikely(hns_roce_reseted(ctx))) {
-+ npolled = hns_roce_poll_swc(cq, ne, wc);
-+ hns_roce_spin_unlock(&cq->hr_lock);
-+ return npolled;
++static void qp_tx_psn_add(__u32 *x, __u32 y, __u16 mtu)
++{
++ if (y == 0) {
++ *x = (*x + 1) & 0xffffff;
++ return;
+ }
-
- for (npolled = 0; npolled < ne; ++npolled) {
- err = hns_roce_poll_one(ctx, &qp, cq, wc + npolled);
-+ if (qp && check_dca_detach_enable(qp))
-+ dca_detach_qp_buf(ctx, qp);
-+
- if (err != V2_CQ_OK)
- break;
- }
-@@ -739,7 +1022,11 @@ static int hns_roce_u_v2_poll_cq(struct ibv_cq *ibvcq, int ne,
- update_cq_db(ctx, cq);
- }
-
-- pthread_spin_unlock(&cq->lock);
-+ hns_roce_spin_unlock(&cq->hr_lock);
-+
-+ /* Try to shrink the DCA mem */
-+ if (ctx->dca_ctx.mem_cnt > 0)
-+ hns_roce_shrink_dca_mem(ctx);
-
- return err == V2_CQ_POLL_ERR ? err : npolled;
- }
-@@ -762,19 +1049,38 @@ static int hns_roce_u_v2_arm_cq(struct ibv_cq *ibvcq, int solicited)
- hr_reg_write(&cq_db, DB_CQ_CMD_SN, cq->arm_sn);
- hr_reg_write(&cq_db, DB_CQ_NOTIFY, solicited_flag);
-
-- hns_roce_write64(ctx->uar + ROCEE_VF_DB_CFG0_OFFSET, (__le32 *)&cq_db);
-+ hns_roce_write64(ctx, ctx->uar + ROCEE_VF_DB_CFG0_OFFSET,
-+ (__le32 *)&cq_db);
-
- return 0;
- }
-
--static inline int check_qp_send(struct ibv_qp *qp)
-+static int check_qp_send(struct hns_roce_qp *qp, struct hns_roce_context *ctx)
- {
-- if (unlikely(qp->state == IBV_QPS_RESET ||
-- qp->state == IBV_QPS_INIT ||
-- qp->state == IBV_QPS_RTR))
-+ struct ibv_qp *ibvqp = &qp->verbs_qp.qp;
-+ int ret = 0;
++ *x = (*x + ((y % mtu) ? (y / mtu + 1) : y / mtu)) & 0xffffff;
++}
+
-+ if (unlikely(ibvqp->state == IBV_QPS_RESET ||
-+ ibvqp->state == IBV_QPS_INIT ||
-+ ibvqp->state == IBV_QPS_RTR)) {
-+ verbs_err(verbs_get_ctx(qp->verbs_qp.qp.context),
-+ "unsupported qp state, state = %d.\n", ibvqp->state);
- return EINVAL;
-+ } else if (unlikely(hns_roce_reseted(ctx))) {
-+ verbs_err_datapath(&ctx->ibv_ctx,
-+ "failed to send, device has been reseted!\n");
-+ return EIO;
-+ }
-
-- return 0;
-+ if (check_dca_attach_enable(qp)) {
-+ ret = dca_attach_qp_buf(ctx, qp);
-+ if (ret)
-+ verbs_err_datapath(&ctx->ibv_ctx,
-+ "failed to attach QP-%u send, ret = %d.\n",
-+ qp->verbs_qp.qp.qp_num, ret);
-+ }
++int zxdh_get_write_imm_split_switch(void)
++{
++ char *env;
++ env = getenv("ZXDH_WRITE_IMM_SPILT_ENABLE");
++ return (env != NULL) ? atoi(env) : 0;
++}
+
-+ return ret;
- }
-
- static void set_rc_sge(struct hns_roce_v2_wqe_data_seg *dseg,
-@@ -1069,6 +1375,7 @@ static inline void enable_wqe(struct hns_roce_qp *qp, void *sq_wqe,
- static int set_ud_wqe(void *wqe, struct hns_roce_qp *qp, struct ibv_send_wr *wr,
- unsigned int nreq, struct hns_roce_sge_info *sge_info)
- {
-+ struct hns_roce_device *hr_dev = to_hr_dev(qp->verbs_qp.qp.context->device);
- struct hns_roce_ah *ah = to_hr_ah(wr->wr.ud.ah);
- struct hns_roce_ud_sq_wqe *ud_sq_wqe = wqe;
- int ret = 0;
-@@ -1093,6 +1400,9 @@ static int set_ud_wqe(void *wqe, struct hns_roce_qp *qp, struct ibv_send_wr *wr,
- if (ret)
- return ret;
-
-+ if (hr_dev->link_type == HNS_DEV_LINK_TYPE_HCCS)
-+ ud_sq_wqe->dmac[0] = 0xF0;
-+
- ret = fill_ud_data_seg(ud_sq_wqe, qp, wr, sge_info);
- if (ret)
- return ret;
-@@ -1141,6 +1451,13 @@ static int set_rc_inl(struct hns_roce_qp *qp, const struct ibv_send_wr *wr,
- return 0;
- }
-
-+static inline void fill_rc_dca_fields(uint32_t qp_num,
-+ struct hns_roce_rc_sq_wqe *wqe)
++/**
++ * zxdh_set_fragment - set fragment in wqe
++ * @wqe: wqe for setting fragment
++ * @offset: offset value
++ * @sge: sge length and stag
++ * @valid: The wqe valid
++ */
++static void zxdh_set_fragment(__le64 *wqe, __u32 offset, struct zxdh_sge *sge,
++ __u8 valid)
+{
-+ hr_reg_write(wqe, RCWQE_SQPN_L, qp_num);
-+ hr_reg_write(wqe, RCWQE_SQPN_H, qp_num >> RCWQE_SQPN_L_WIDTH);
++ if (sge) {
++ set_64bit_val(wqe, offset + 8,
++ FIELD_PREP(ZXDHQPSQ_FRAG_TO, sge->tag_off));
++ set_64bit_val(wqe, offset,
++ FIELD_PREP(ZXDHQPSQ_VALID, valid) |
++ FIELD_PREP(ZXDHQPSQ_FRAG_LEN, sge->len) |
++ FIELD_PREP(ZXDHQPSQ_FRAG_STAG,
++ sge->stag));
++ } else {
++ set_64bit_val(wqe, offset + 8, 0);
++ set_64bit_val(wqe, offset, FIELD_PREP(ZXDHQPSQ_VALID, valid));
++ }
+}
+
- static void set_bind_mw_seg(struct hns_roce_rc_sq_wqe *wqe,
- const struct ibv_send_wr *wr)
- {
-@@ -1248,6 +1565,9 @@ static int set_rc_wqe(void *wqe, struct hns_roce_qp *qp, struct ibv_send_wr *wr,
- return ret;
-
- wqe_valid:
-+ if (check_qp_dca_enable(qp))
-+ fill_rc_dca_fields(qp->verbs_qp.qp.qp_num, rc_sq_wqe);
++/**
++ * zxdh_nop_1 - insert a NOP wqe
++ * @qp: hw qp ptr
++ */
++static enum zxdh_status_code zxdh_nop_1(struct zxdh_qp *qp)
++{
++ __u64 hdr;
++ __le64 *wqe;
++ __u32 wqe_idx;
++ bool signaled = false;
+
- enable_wqe(qp, rc_sq_wqe, qp->sq.head + nreq);
-
- return 0;
-@@ -1264,13 +1584,13 @@ int hns_roce_u_v2_post_send(struct ibv_qp *ibvqp, struct ibv_send_wr *wr,
- unsigned int wqe_idx, nreq;
- int ret;
-
-- ret = check_qp_send(ibvqp);
-+ ret = check_qp_send(qp, ctx);
- if (unlikely(ret)) {
- *bad_wr = wr;
- return ret;
- }
-
-- pthread_spin_lock(&qp->sq.lock);
-+ hns_roce_spin_lock(&qp->sq.hr_lock);
-
- sge_info.start_idx = qp->next_sge; /* start index of extend sge */
-
-@@ -1331,7 +1651,10 @@ out:
- *(qp->sdb) = qp->sq.head & 0xffff;
- }
-
-- pthread_spin_unlock(&qp->sq.lock);
-+ hns_roce_spin_unlock(&qp->sq.hr_lock);
++ if (!qp->sq_ring.head)
++ return ZXDH_ERR_PARAM;
+
-+ if (check_dca_detach_enable(qp))
-+ hns_roce_dca_stop_post(&ctx->dca_ctx, qp->dca_wqe.dcan);
-
- if (ibvqp->state == IBV_QPS_ERR) {
- attr.qp_state = IBV_QPS_ERR;
-@@ -1342,12 +1665,30 @@ out:
- return ret;
- }
-
--static inline int check_qp_recv(struct ibv_qp *qp)
-+static int check_qp_recv(struct hns_roce_qp *qp, struct hns_roce_context *ctx)
- {
-- if (qp->state == IBV_QPS_RESET)
-+ struct ibv_qp *ibvqp = &qp->verbs_qp.qp;
-+ int ret = 0;
++ wqe_idx = ZXDH_RING_CURRENT_HEAD(qp->sq_ring);
++ wqe = qp->sq_base[wqe_idx].elem;
+
-+ if (ibvqp->state == IBV_QPS_RESET) {
-+ verbs_err(verbs_get_ctx(qp->verbs_qp.qp.context),
-+ "unsupported qp state, state = %d.\n", ibvqp->state);
- return EINVAL;
-+ } else if (unlikely(hns_roce_reseted(ctx))) {
-+ verbs_err_datapath(&ctx->ibv_ctx,
-+ "fail to recv, device has been reseted!\n");
-+ return EIO;
-+ }
-
-- return 0;
-+ if (check_dca_attach_enable(qp)) {
-+ ret = dca_attach_qp_buf(ctx, qp);
-+ if (ret)
-+ verbs_err_datapath(&ctx->ibv_ctx,
-+ "failed to attach QP-%u recv, ret = %d.\n",
-+ qp->verbs_qp.qp.qp_num, ret);
-+ }
++ qp->sq_wrtrk_array[wqe_idx].quanta = ZXDH_QP_WQE_MIN_QUANTA;
+
-+ return ret;
- }
-
- static void fill_recv_sge_to_wqe(struct ibv_recv_wr *wr, void *wqe,
-@@ -1414,13 +1755,13 @@ static int hns_roce_u_v2_post_recv(struct ibv_qp *ibvqp, struct ibv_recv_wr *wr,
- struct ibv_qp_attr attr = {};
- int ret;
-
-- ret = check_qp_recv(ibvqp);
-+ ret = check_qp_recv(qp, ctx);
- if (unlikely(ret)) {
- *bad_wr = wr;
- return ret;
- }
-
-- pthread_spin_lock(&qp->rq.lock);
-+ hns_roce_spin_lock(&qp->rq.hr_lock);
-
- max_sge = qp->rq.max_gs - qp->rq.rsv_sge;
- for (nreq = 0; wr; ++nreq, wr = wr->next) {
-@@ -1454,7 +1795,10 @@ out:
- hns_roce_update_rq_db(ctx, ibvqp->qp_num, qp->rq.head);
- }
-
-- pthread_spin_unlock(&qp->rq.lock);
-+ hns_roce_spin_unlock(&qp->rq.hr_lock);
++ set_64bit_val(wqe, 8, 0);
++ set_64bit_val(wqe, 16, 0);
++ set_64bit_val(wqe, 24, 0);
+
-+ if (check_dca_detach_enable(qp))
-+ hns_roce_dca_stop_post(&ctx->dca_ctx, qp->dca_wqe.dcan);
-
- if (ibvqp->state == IBV_QPS_ERR) {
- attr.qp_state = IBV_QPS_ERR;
-@@ -1510,9 +1854,9 @@ static void __hns_roce_v2_cq_clean(struct hns_roce_cq *cq, uint32_t qpn,
- static void hns_roce_v2_cq_clean(struct hns_roce_cq *cq, unsigned int qpn,
- struct hns_roce_srq *srq)
- {
-- pthread_spin_lock(&cq->lock);
-+ hns_roce_spin_lock(&cq->hr_lock);
- __hns_roce_v2_cq_clean(cq, qpn, srq);
-- pthread_spin_unlock(&cq->lock);
-+ hns_roce_spin_unlock(&cq->hr_lock);
- }
-
- static void record_qp_attr(struct ibv_qp *qp, struct ibv_qp_attr *attr,
-@@ -1523,8 +1867,12 @@ static void record_qp_attr(struct ibv_qp *qp, struct ibv_qp_attr *attr,
- if (attr_mask & IBV_QP_PORT)
- hr_qp->port_num = attr->port_num;
-
-- if (attr_mask & IBV_QP_AV)
-- hr_qp->sl = attr->ah_attr.sl;
-+ if (hr_qp->tc_mode == HNS_ROCE_TC_MAP_MODE_DSCP)
-+ hr_qp->sl = hr_qp->priority;
-+ else {
-+ if (attr_mask & IBV_QP_AV)
-+ hr_qp->sl = attr->ah_attr.sl;
-+ }
-
- if (attr_mask & IBV_QP_QKEY)
- hr_qp->qkey = attr->qkey;
-@@ -1538,31 +1886,41 @@ static void record_qp_attr(struct ibv_qp *qp, struct ibv_qp_attr *attr,
- static int hns_roce_u_v2_modify_qp(struct ibv_qp *qp, struct ibv_qp_attr *attr,
- int attr_mask)
- {
-- int ret;
-- struct ibv_modify_qp cmd;
-+ struct hns_roce_context *ctx = to_hr_ctx(qp->context);
-+ struct hns_roce_modify_qp_ex_resp resp_ex = {};
-+ struct hns_roce_modify_qp_ex cmd_ex = {};
- struct hns_roce_qp *hr_qp = to_hr_qp(qp);
- bool flag = false; /* modify qp to error */
-+ int ret;
-
- if ((attr_mask & IBV_QP_STATE) && (attr->qp_state == IBV_QPS_ERR)) {
-- pthread_spin_lock(&hr_qp->sq.lock);
-- pthread_spin_lock(&hr_qp->rq.lock);
-+ hns_roce_spin_lock(&hr_qp->sq.hr_lock);
-+ hns_roce_spin_lock(&hr_qp->rq.hr_lock);
- flag = true;
- }
-
-- ret = ibv_cmd_modify_qp(qp, attr, attr_mask, &cmd, sizeof(cmd));
-+ ret = ibv_cmd_modify_qp_ex(qp, attr, attr_mask, &cmd_ex.ibv_cmd,
-+ sizeof(cmd_ex), &resp_ex.ibv_resp,
-+ sizeof(resp_ex));
-
- if (flag) {
- if (!ret)
- qp->state = IBV_QPS_ERR;
-- pthread_spin_unlock(&hr_qp->rq.lock);
-- pthread_spin_unlock(&hr_qp->sq.lock);
-+ hns_roce_spin_unlock(&hr_qp->sq.hr_lock);
-+ hns_roce_spin_unlock(&hr_qp->rq.hr_lock);
- }
-
- if (ret)
- return ret;
-
-- if (attr_mask & IBV_QP_STATE)
-+ if (attr_mask & IBV_QP_STATE) {
- qp->state = attr->qp_state;
-+ if (attr->qp_state == IBV_QPS_RTR) {
-+ hr_qp->tc_mode = resp_ex.drv_payload.tc_mode;
-+ hr_qp->priority = resp_ex.drv_payload.priority;
-+ hr_qp->dca_wqe.dcan = resp_ex.drv_payload.dcan;
-+ }
-+ }
-
- if ((attr_mask & IBV_QP_STATE) && attr->qp_state == IBV_QPS_RESET) {
- if (qp->recv_cq)
-@@ -1576,58 +1934,57 @@ static int hns_roce_u_v2_modify_qp(struct ibv_qp *qp, struct ibv_qp_attr *attr,
- hns_roce_init_qp_indices(to_hr_qp(qp));
- }
-
-+ /* Try to shrink the DCA mem */
-+ if (ctx->dca_ctx.mem_cnt > 0)
-+ hns_roce_shrink_dca_mem(ctx);
++ hdr = FIELD_PREP(ZXDHQPSQ_OPCODE, ZXDH_OP_TYPE_NOP) |
++ FIELD_PREP(ZXDHQPSQ_SIGCOMPL, signaled) |
++ FIELD_PREP(ZXDHQPSQ_VALID, qp->swqe_polarity);
+
- record_qp_attr(qp, attr, attr_mask);
-
- return ret;
- }
-
--static void hns_roce_lock_cqs(struct ibv_qp *qp)
-+void hns_roce_lock_cqs(struct hns_roce_cq *send_cq, struct hns_roce_cq *recv_cq)
- {
-- struct hns_roce_cq *send_cq = to_hr_cq(qp->send_cq);
-- struct hns_roce_cq *recv_cq = to_hr_cq(qp->recv_cq);
--
- if (send_cq && recv_cq) {
- if (send_cq == recv_cq) {
-- pthread_spin_lock(&send_cq->lock);
-+ hns_roce_spin_lock(&send_cq->hr_lock);
- } else if (send_cq->cqn < recv_cq->cqn) {
-- pthread_spin_lock(&send_cq->lock);
-- pthread_spin_lock(&recv_cq->lock);
-+ hns_roce_spin_lock(&send_cq->hr_lock);
-+ hns_roce_spin_lock(&recv_cq->hr_lock);
- } else {
-- pthread_spin_lock(&recv_cq->lock);
-- pthread_spin_lock(&send_cq->lock);
-+ hns_roce_spin_lock(&recv_cq->hr_lock);
-+ hns_roce_spin_lock(&send_cq->hr_lock);
- }
- } else if (send_cq) {
-- pthread_spin_lock(&send_cq->lock);
-+ hns_roce_spin_lock(&send_cq->hr_lock);
- } else if (recv_cq) {
-- pthread_spin_lock(&recv_cq->lock);
-+ hns_roce_spin_lock(&recv_cq->hr_lock);
- }
- }
-
--static void hns_roce_unlock_cqs(struct ibv_qp *qp)
-+void hns_roce_unlock_cqs(struct hns_roce_cq *send_cq, struct hns_roce_cq *recv_cq)
- {
-- struct hns_roce_cq *send_cq = to_hr_cq(qp->send_cq);
-- struct hns_roce_cq *recv_cq = to_hr_cq(qp->recv_cq);
--
- if (send_cq && recv_cq) {
- if (send_cq == recv_cq) {
-- pthread_spin_unlock(&send_cq->lock);
-+ hns_roce_spin_unlock(&send_cq->hr_lock);
- } else if (send_cq->cqn < recv_cq->cqn) {
-- pthread_spin_unlock(&recv_cq->lock);
-- pthread_spin_unlock(&send_cq->lock);
-+ hns_roce_spin_unlock(&recv_cq->hr_lock);
-+ hns_roce_spin_unlock(&send_cq->hr_lock);
- } else {
-- pthread_spin_unlock(&send_cq->lock);
-- pthread_spin_unlock(&recv_cq->lock);
-+ hns_roce_spin_unlock(&send_cq->hr_lock);
-+ hns_roce_spin_unlock(&recv_cq->hr_lock);
- }
- } else if (send_cq) {
-- pthread_spin_unlock(&send_cq->lock);
-+ hns_roce_spin_unlock(&send_cq->hr_lock);
- } else if (recv_cq) {
-- pthread_spin_unlock(&recv_cq->lock);
-+ hns_roce_spin_unlock(&recv_cq->hr_lock);
- }
- }
-
- static int hns_roce_u_v2_destroy_qp(struct ibv_qp *ibqp)
- {
- struct hns_roce_context *ctx = to_hr_ctx(ibqp->context);
-+ struct hns_roce_pad *pad = to_hr_pad(ibqp->pd);
- struct hns_roce_qp *qp = to_hr_qp(ibqp);
- int ret;
-
-@@ -1640,22 +1997,33 @@ static int hns_roce_u_v2_destroy_qp(struct ibv_qp *ibqp)
-
- hns_roce_v2_clear_qp(ctx, qp);
-
-- hns_roce_lock_cqs(ibqp);
-+ hns_roce_lock_cqs(to_hr_cq(ibqp->send_cq), to_hr_cq(ibqp->recv_cq));
-
-- if (ibqp->recv_cq)
-+ if (ibqp->recv_cq) {
- __hns_roce_v2_cq_clean(to_hr_cq(ibqp->recv_cq), ibqp->qp_num,
- ibqp->srq ? to_hr_srq(ibqp->srq) : NULL);
-+ list_del(&qp->srcq_node);
-+ list_del(&qp->rcq_node);
-+ }
-
-- if (ibqp->send_cq && ibqp->send_cq != ibqp->recv_cq)
-+ if (ibqp->send_cq && ibqp->send_cq != ibqp->recv_cq) {
- __hns_roce_v2_cq_clean(to_hr_cq(ibqp->send_cq), ibqp->qp_num,
- NULL);
-+ list_del(&qp->scq_node);
-+ }
-
-- hns_roce_unlock_cqs(ibqp);
-+ hns_roce_unlock_cqs(to_hr_cq(ibqp->send_cq), to_hr_cq(ibqp->recv_cq));
-
- hns_roce_free_qp_buf(qp, ctx);
-
-+ if (pad)
-+ atomic_fetch_sub(&pad->pd.refcount, 1);
++ /* make sure WQE is written before valid bit is set */
++ udma_to_device_barrier();
+
- free(qp);
-
-+ if (ctx->dca_ctx.mem_cnt > 0)
-+ hns_roce_shrink_dca_mem(ctx);
++ set_64bit_val(wqe, 0, hdr);
+
- return ret;
- }
-
-@@ -1725,10 +2093,20 @@ static void update_srq_db(struct hns_roce_context *ctx, struct hns_roce_db *db,
- hr_reg_write(db, DB_CMD, HNS_ROCE_V2_SRQ_DB);
- hr_reg_write(db, DB_PI, srq->idx_que.head);
-
-- hns_roce_write64(ctx->uar + ROCEE_VF_DB_CFG0_OFFSET,
-+ hns_roce_write64(ctx, ctx->uar + ROCEE_VF_DB_CFG0_OFFSET,
- (__le32 *)db);
- }
-
-+static int check_srq_recv(struct hns_roce_context *ctx)
-+{
-+ if (hns_roce_reseted(ctx)) {
-+ verbs_err_datapath(&ctx->ibv_ctx,
-+ "srq failed to recv, device has been reseted!\n");
-+ return EIO;
-+ }
+ return 0;
+}
+
- static int hns_roce_u_v2_post_srq_recv(struct ibv_srq *ib_srq,
- struct ibv_recv_wr *wr,
- struct ibv_recv_wr **bad_wr)
-@@ -1740,7 +2118,13 @@ static int hns_roce_u_v2_post_srq_recv(struct ibv_srq *ib_srq,
- int ret = 0;
- void *wqe;
-
-- pthread_spin_lock(&srq->lock);
-+ ret = check_srq_recv(ctx);
-+ if (ret) {
-+ *bad_wr = wr;
-+ return ret;
-+ }
-+
-+ hns_roce_spin_lock(&srq->hr_lock);
-
- max_sge = srq->max_gs - srq->rsv_sge;
- for (nreq = 0; wr; ++nreq, wr = wr->next) {
-@@ -1779,7 +2163,7 @@ static int hns_roce_u_v2_post_srq_recv(struct ibv_srq *ib_srq,
- update_srq_db(ctx, &srq_db, srq);
- }
-
-- pthread_spin_unlock(&srq->lock);
-+ hns_roce_spin_unlock(&srq->hr_lock);
-
- return ret;
- }
-@@ -1795,11 +2179,18 @@ static int wc_start_poll_cq(struct ibv_cq_ex *current,
- if (attr->comp_mask)
- return EINVAL;
-
-- pthread_spin_lock(&cq->lock);
-+ hns_roce_spin_lock(&cq->hr_lock);
-+
-+ if (unlikely(hns_roce_reseted(ctx))) {
-+ err = hns_roce_poll_one_swc(cq, NULL);
-+ goto start_poll_done;
-+ }
-
- err = hns_roce_poll_one(ctx, &qp, cq, NULL);
-+
-+start_poll_done:
- if (err != V2_CQ_OK)
-- pthread_spin_unlock(&cq->lock);
-+ hns_roce_spin_unlock(&cq->hr_lock);
-
- return err;
- }
-@@ -1811,6 +2202,9 @@ static int wc_next_poll_cq(struct ibv_cq_ex *current)
- struct hns_roce_qp *qp = NULL;
- int err;
-
-+ if (unlikely(hns_roce_reseted(ctx)))
-+ return hns_roce_poll_one_swc(cq, NULL);
-+
- err = hns_roce_poll_one(ctx, &qp, cq, NULL);
- if (err != V2_CQ_OK)
- return err;
-@@ -1828,12 +2222,16 @@ static void wc_end_poll_cq(struct ibv_cq_ex *current)
- struct hns_roce_cq *cq = to_hr_cq(ibv_cq_ex_to_cq(current));
- struct hns_roce_context *ctx = to_hr_ctx(current->context);
-
-+ if (unlikely(hns_roce_reseted(ctx)))
-+ goto end_poll_done;
-+
- if (cq->flags & HNS_ROCE_CQ_FLAG_RECORD_DB)
- *cq->db = cq->cons_index & RECORD_DB_CI_MASK;
- else
- update_cq_db(ctx, cq);
-
-- pthread_spin_unlock(&cq->lock);
-+end_poll_done:
-+ hns_roce_spin_unlock(&cq->hr_lock);
- }
-
- static enum ibv_wc_opcode wc_read_opcode(struct ibv_cq_ex *current)
-@@ -2096,8 +2494,6 @@ static void wr_set_sge_list_rc(struct ibv_qp_ex *ibv_qp, size_t num_sge,
-
- wqe->msg_len = htole32(qp->sge_info.total_len);
- hr_reg_write(wqe, RCWQE_SGE_NUM, qp->sge_info.valid_num);
--
-- enable_wqe(qp, wqe, qp->sq.head);
- }
-
- static void wr_send_rc(struct ibv_qp_ex *ibv_qp)
-@@ -2236,8 +2632,8 @@ static void set_inline_data_list_rc(struct hns_roce_qp *qp,
- {
- unsigned int msg_len = qp->sge_info.total_len;
- void *dseg;
-+ size_t i;
- int ret;
-- int i;
-
- hr_reg_enable(wqe, RCWQE_INLINE);
-
-@@ -2289,7 +2685,6 @@ static void wr_set_inline_data_rc(struct ibv_qp_ex *ibv_qp, void *addr,
-
- qp->sge_info.total_len = length;
- set_inline_data_list_rc(qp, wqe, 1, &buff);
-- enable_wqe(qp, wqe, qp->sq.head);
- }
-
- static void wr_set_inline_data_list_rc(struct ibv_qp_ex *ibv_qp, size_t num_buf,
-@@ -2297,7 +2692,7 @@ static void wr_set_inline_data_list_rc(struct ibv_qp_ex *ibv_qp, size_t num_buf,
- {
- struct hns_roce_qp *qp = to_hr_qp(&ibv_qp->qp_base);
- struct hns_roce_rc_sq_wqe *wqe = qp->cur_wqe;
-- int i;
-+ size_t i;
-
- if (!wqe)
- return;
-@@ -2307,7 +2702,6 @@ static void wr_set_inline_data_list_rc(struct ibv_qp_ex *ibv_qp, size_t num_buf,
- qp->sge_info.total_len += buf_list[i].length;
-
- set_inline_data_list_rc(qp, wqe, num_buf, buf_list);
-- enable_wqe(qp, wqe, qp->sq.head);
- }
-
- static struct hns_roce_ud_sq_wqe *
-@@ -2428,7 +2822,7 @@ static void wr_set_sge_list_ud(struct ibv_qp_ex *ibv_qp, size_t num_sge,
- }
-
- hr_reg_write(wqe, UDWQE_MSG_START_SGE_IDX, sge_idx & mask);
-- for (int i = 0; i < num_sge; i++) {
-+ for (size_t i = 0; i < num_sge; i++) {
- if (!sg_list[i].length)
- continue;
-
-@@ -2444,7 +2838,6 @@ static void wr_set_sge_list_ud(struct ibv_qp_ex *ibv_qp, size_t num_sge,
- hr_reg_write(wqe, UDWQE_SGE_NUM, cnt);
-
- qp->sge_info.start_idx += cnt;
-- enable_wqe(qp, wqe, qp->sq.head);
- }
-
- static void set_inline_data_list_ud(struct hns_roce_qp *qp,
-@@ -2455,8 +2848,8 @@ static void set_inline_data_list_ud(struct hns_roce_qp *qp,
- uint8_t data[HNS_ROCE_MAX_UD_INL_INN_SZ] = {};
- unsigned int msg_len = qp->sge_info.total_len;
- void *tmp;
-+ size_t i;
- int ret;
-- int i;
-
- if (!check_inl_data_len(qp, msg_len)) {
- qp->err = EINVAL;
-@@ -2510,7 +2903,6 @@ static void wr_set_inline_data_ud(struct ibv_qp_ex *ibv_qp, void *addr,
-
- qp->sge_info.total_len = length;
- set_inline_data_list_ud(qp, wqe, 1, &buff);
-- enable_wqe(qp, wqe, qp->sq.head);
- }
-
- static void wr_set_inline_data_list_ud(struct ibv_qp_ex *ibv_qp, size_t num_buf,
-@@ -2518,7 +2910,7 @@ static void wr_set_inline_data_list_ud(struct ibv_qp_ex *ibv_qp, size_t num_buf,
- {
- struct hns_roce_qp *qp = to_hr_qp(&ibv_qp->qp_base);
- struct hns_roce_ud_sq_wqe *wqe = qp->cur_wqe;
-- int i;
-+ size_t i;
-
- if (!wqe)
- return;
-@@ -2528,22 +2920,21 @@ static void wr_set_inline_data_list_ud(struct ibv_qp_ex *ibv_qp, size_t num_buf,
- qp->sge_info.total_len += buf_list[i].length;
-
- set_inline_data_list_ud(qp, wqe, num_buf, buf_list);
-- enable_wqe(qp, wqe, qp->sq.head);
- }
-
- static void wr_start(struct ibv_qp_ex *ibv_qp)
- {
-+ struct hns_roce_context *ctx = to_hr_ctx(ibv_qp->qp_base.context);
- struct hns_roce_qp *qp = to_hr_qp(&ibv_qp->qp_base);
-- enum ibv_qp_state state = ibv_qp->qp_base.state;
-+ int ret;
-
-- if (state == IBV_QPS_RESET ||
-- state == IBV_QPS_INIT ||
-- state == IBV_QPS_RTR) {
-- qp->err = EINVAL;
-+ ret = check_qp_send(qp, ctx);
-+ if (ret) {
-+ qp->err = ret;
- return;
- }
-
-- pthread_spin_lock(&qp->sq.lock);
-+ hns_roce_spin_lock(&qp->sq.hr_lock);
- qp->sge_info.start_idx = qp->next_sge;
- qp->rb_sq_head = qp->sq.head;
- qp->err = 0;
-@@ -2576,7 +2967,11 @@ static int wr_complete(struct ibv_qp_ex *ibv_qp)
- }
-
- out:
-- pthread_spin_unlock(&qp->sq.lock);
-+ hns_roce_spin_unlock(&qp->sq.hr_lock);
-+
-+ if (check_dca_detach_enable(qp))
-+ hns_roce_dca_stop_post(&ctx->dca_ctx, qp->dca_wqe.dcan);
-+
- if (ibv_qp->qp_base.state == IBV_QPS_ERR) {
- attr.qp_state = IBV_QPS_ERR;
- hns_roce_u_v2_modify_qp(&ibv_qp->qp_base, &attr, IBV_QP_STATE);
-@@ -2591,7 +2986,7 @@ static void wr_abort(struct ibv_qp_ex *ibv_qp)
-
- qp->sq.head = qp->rb_sq_head;
-
-- pthread_spin_unlock(&qp->sq.lock);
-+ hns_roce_spin_unlock(&qp->sq.hr_lock);
- }
-
- enum {
-diff --git a/providers/hns/hns_roce_u_hw_v2.h b/providers/hns/hns_roce_u_hw_v2.h
-index abf9467..50a920f 100644
---- a/providers/hns/hns_roce_u_hw_v2.h
-+++ b/providers/hns/hns_roce_u_hw_v2.h
-@@ -237,6 +237,8 @@ struct hns_roce_rc_sq_wqe {
- #define RCWQE_MW_RR_EN RCWQE_FIELD_LOC(259, 259)
- #define RCWQE_MW_RW_EN RCWQE_FIELD_LOC(260, 260)
-
-+#define RCWQE_SQPN_L_WIDTH 2
-+
- struct hns_roce_v2_wqe_data_seg {
- __le32 len;
- __le32 lkey;
-@@ -344,5 +346,7 @@ void hns_roce_v2_clear_qp(struct hns_roce_context *ctx, struct hns_roce_qp *qp);
- void hns_roce_attach_cq_ex_ops(struct ibv_cq_ex *cq_ex, uint64_t wc_flags);
- int hns_roce_attach_qp_ex_ops(struct ibv_qp_init_attr_ex *attr,
- struct hns_roce_qp *qp);
-+void hns_roce_lock_cqs(struct hns_roce_cq *send_cq, struct hns_roce_cq *recv_cq);
-+void hns_roce_unlock_cqs(struct hns_roce_cq *send_cq, struct hns_roce_cq *recv_cq);
-
- #endif /* _HNS_ROCE_U_HW_V2_H */
-diff --git a/providers/hns/hns_roce_u_verbs.c b/providers/hns/hns_roce_u_verbs.c
-index 34f7ee4..e30880c 100644
---- a/providers/hns/hns_roce_u_verbs.c
-+++ b/providers/hns/hns_roce_u_verbs.c
-@@ -33,6 +33,7 @@
- #include
- #include
- #include
-+#include
- #include
- #include
- #include
-@@ -42,6 +43,37 @@
- #include "hns_roce_u_db.h"
- #include "hns_roce_u_hw_v2.h"
-
-+static bool hns_roce_whether_need_lock(struct ibv_pd *pd)
++/**
++ * zxdh_clr_wqes - clear next 128 sq entries
++ * @qp: hw qp ptr
++ * @qp_wqe_idx: wqe_idx
++ */
++void zxdh_clr_wqes(struct zxdh_qp *qp, __u32 qp_wqe_idx)
+{
-+ struct hns_roce_pad *pad;
-+
-+ pad = to_hr_pad(pd);
-+ if (pad && pad->td)
-+ return false;
++ __le64 *wqe;
++ __u32 wqe_idx;
+
-+ return true;
++ if (!(qp_wqe_idx & 0x7F)) {
++ wqe_idx = (qp_wqe_idx + 128) % qp->sq_ring.size;
++ wqe = qp->sq_base[wqe_idx].elem;
++ if (wqe_idx)
++ memset(wqe, qp->swqe_polarity ? 0 : 0xFF, 0x1000);
++ else
++ memset(wqe, qp->swqe_polarity ? 0xFF : 0, 0x1000);
++ }
+}
+
-+static int hns_roce_spinlock_init(struct hns_roce_spinlock *hr_lock,
-+ bool need_lock)
++/**
++ * zxdh_qp_post_wr - ring doorbell
++ * @qp: hw qp ptr
++ */
++void zxdh_qp_post_wr(struct zxdh_qp *qp)
+{
-+ hr_lock->need_lock = need_lock;
-+
-+ if (need_lock)
-+ return pthread_spin_init(&hr_lock->lock,
-+ PTHREAD_PROCESS_PRIVATE);
++ /* valid bit is written before ringing doorbell */
++ udma_to_device_barrier();
+
-+ return 0;
++ db_wr32(qp->qp_id, qp->wqe_alloc_db);
++ qp->initial_ring.head = qp->sq_ring.head;
+}
+
-+static int hns_roce_spinlock_destroy(struct hns_roce_spinlock *hr_lock)
++/**
++ * zxdh_qp_set_shadow_area - fill SW_RQ_Head
++ * @qp: hw qp ptr
++ */
++void zxdh_qp_set_shadow_area(struct zxdh_qp *qp)
+{
-+ if (hr_lock->need_lock)
-+ return pthread_spin_destroy(&hr_lock->lock);
++ __u8 polarity = 0;
+
-+ return 0;
++ polarity = ((ZXDH_RING_CURRENT_HEAD(qp->rq_ring) == 0) ?
++ !qp->rwqe_polarity :
++ qp->rwqe_polarity);
++ set_64bit_val(qp->shadow_area, 0,
++ FIELD_PREP(ZXDHQPDBSA_RQ_POLARITY, polarity) |
++ FIELD_PREP(ZXDHQPDBSA_RQ_SW_HEAD,
++ ZXDH_RING_CURRENT_HEAD(qp->rq_ring)));
+}
+
- void hns_roce_init_qp_indices(struct hns_roce_qp *qp)
- {
- qp->sq.head = 0;
-@@ -85,38 +117,138 @@ int hns_roce_u_query_port(struct ibv_context *context, uint8_t port,
- return ibv_cmd_query_port(context, port, attr, &cmd, sizeof(cmd));
- }
-
-+struct ibv_td *hns_roce_u_alloc_td(struct ibv_context *context,
-+ struct ibv_td_init_attr *attr)
++/**
++ * zxdh_qp_ring_push_db - ring qp doorbell
++ * @qp: hw qp ptr
++ * @wqe_idx: wqe index
++ */
++static void zxdh_qp_ring_push_db(struct zxdh_qp *qp, __u32 wqe_idx)
+{
-+ struct hns_roce_td *td;
-+
-+ if (attr->comp_mask) {
-+ errno = EOPNOTSUPP;
-+ return NULL;
-+ }
-+
-+ td = calloc(1, sizeof(*td));
-+ if (!td) {
-+ errno = ENOMEM;
-+ return NULL;
-+ }
-+
-+ td->ibv_td.context = context;
-+ atomic_init(&td->refcount, 1);
-+
-+ return &td->ibv_td;
++ set_32bit_val(qp->push_db, 0,
++ FIELD_PREP(ZXDH_WQEALLOC_WQE_DESC_INDEX, wqe_idx >> 3) |
++ qp->qp_id);
++ qp->initial_ring.head = qp->sq_ring.head;
++ qp->push_mode = true;
++ qp->push_dropped = false;
+}
+
-+int hns_roce_u_dealloc_td(struct ibv_td *ibv_td)
++void zxdh_qp_push_wqe(struct zxdh_qp *qp, __le64 *wqe, __u16 quanta,
++ __u32 wqe_idx, bool post_sq)
+{
-+ struct hns_roce_td *td;
-+
-+ td = to_hr_td(ibv_td);
-+ if (atomic_load(&td->refcount) > 1)
-+ return EBUSY;
-+
-+ free(td);
++ __le64 *push;
+
-+ return 0;
++ if (ZXDH_RING_CURRENT_HEAD(qp->initial_ring) !=
++ ZXDH_RING_CURRENT_TAIL(qp->sq_ring) &&
++ !qp->push_mode) {
++ if (post_sq)
++ zxdh_qp_post_wr(qp);
++ } else {
++ push = (__le64 *)((uintptr_t)qp->push_wqe +
++ (wqe_idx & 0x7) * 0x20);
++ memcpy(push, wqe, quanta * ZXDH_QP_WQE_MIN_SIZE);
++ zxdh_qp_ring_push_db(qp, wqe_idx);
++ }
+}
+
- struct ibv_pd *hns_roce_u_alloc_pd(struct ibv_context *context)
- {
-+ struct hns_roce_alloc_pd_resp resp = {};
- struct ibv_alloc_pd cmd;
- struct hns_roce_pd *pd;
-- struct hns_roce_alloc_pd_resp resp = {};
--
-- pd = malloc(sizeof(*pd));
-- if (!pd)
-- return NULL;
-
-- if (ibv_cmd_alloc_pd(context, &pd->ibv_pd, &cmd, sizeof(cmd),
-- &resp.ibv_resp, sizeof(resp))) {
-- free(pd);
-+ pd = calloc(1, sizeof(*pd));
-+ if (!pd) {
-+ errno = ENOMEM;
- return NULL;
- }
-+ errno = ibv_cmd_alloc_pd(context, &pd->ibv_pd, &cmd, sizeof(cmd),
-+ &resp.ibv_resp, sizeof(resp));
-+
-+ if (errno)
-+ goto err;
-
-+ atomic_init(&pd->refcount, 1);
- pd->pdn = resp.pdn;
-
- return &pd->ibv_pd;
-+
-+err:
-+ free(pd);
-+ return NULL;
- }
-
--int hns_roce_u_free_pd(struct ibv_pd *pd)
-+struct ibv_pd *hns_roce_u_alloc_pad(struct ibv_context *context,
-+ struct ibv_parent_domain_init_attr *attr)
++/**
++ * zxdh_qp_get_next_send_wqe - pad with NOP if needed, return where next WR should go
++ * @qp: hw qp ptr
++ * @wqe_idx: return wqe index
++ * @quanta: size of WR in quanta
++ * @total_size: size of WR in bytes
++ * @info: info on WR
++ */
++__le64 *zxdh_qp_get_next_send_wqe(struct zxdh_qp *qp, __u32 *wqe_idx,
++ __u16 quanta, __u32 total_size,
++ struct zxdh_post_sq_info *info)
+{
-+ struct hns_roce_pad *pad;
-+
-+ if (ibv_check_alloc_parent_domain(attr))
-+ return NULL;
++ __le64 *wqe;
++ __u16 avail_quanta;
++ __u16 i;
+
-+ if (attr->comp_mask) {
-+ errno = EOPNOTSUPP;
-+ return NULL;
-+ }
++ avail_quanta = ZXDH_MAX_SQ_WQES_PER_PAGE -
++ (ZXDH_RING_CURRENT_HEAD(qp->sq_ring) %
++ ZXDH_MAX_SQ_WQES_PER_PAGE);
++ if (quanta <= avail_quanta) {
++ /* WR fits in current chunk */
++ if (quanta > ZXDH_SQ_RING_FREE_QUANTA(qp->sq_ring))
++ return NULL;
++ } else {
++ /* Need to pad with NOP */
++ if (quanta + avail_quanta >
++ ZXDH_SQ_RING_FREE_QUANTA(qp->sq_ring))
++ return NULL;
+
-+ pad = calloc(1, sizeof(*pad));
-+ if (!pad) {
-+ errno = ENOMEM;
-+ return NULL;
++ for (i = 0; i < avail_quanta; i++) {
++ zxdh_nop_1(qp);
++ ZXDH_RING_MOVE_HEAD_NOCHECK(qp->sq_ring);
++ }
+ }
+
-+ if (attr->td) {
-+ pad->td = to_hr_td(attr->td);
-+ atomic_fetch_add(&pad->td->refcount, 1);
-+ }
++ *wqe_idx = ZXDH_RING_CURRENT_HEAD(qp->sq_ring);
++ if (!*wqe_idx)
++ qp->swqe_polarity = !qp->swqe_polarity;
+
-+ pad->pd.protection_domain = to_hr_pd(attr->pd);
-+ atomic_fetch_add(&pad->pd.protection_domain->refcount, 1);
++ ZXDH_RING_MOVE_HEAD_BY_COUNT_NOCHECK(qp->sq_ring, quanta);
+
-+ atomic_init(&pad->pd.refcount, 1);
-+ ibv_initialize_parent_domain(&pad->pd.ibv_pd,
-+ &pad->pd.protection_domain->ibv_pd);
++ wqe = qp->sq_base[*wqe_idx].elem;
++ qp->sq_wrtrk_array[*wqe_idx].wrid = info->wr_id;
++ qp->sq_wrtrk_array[*wqe_idx].wr_len = total_size;
++ qp->sq_wrtrk_array[*wqe_idx].quanta = quanta;
+
-+ return &pad->pd.ibv_pd;
++ return wqe;
+}
+
-+static void hns_roce_free_pad(struct hns_roce_pad *pad)
++/**
++ * zxdh_qp_get_next_recv_wqe - get next qp's rcv wqe
++ * @qp: hw qp ptr
++ * @wqe_idx: return wqe index
++ */
++__le64 *zxdh_qp_get_next_recv_wqe(struct zxdh_qp *qp, __u32 *wqe_idx)
+{
-+ atomic_fetch_sub(&pad->pd.protection_domain->refcount, 1);
++ __le64 *wqe;
++ enum zxdh_status_code ret_code;
+
-+ if (pad->td)
-+ atomic_fetch_sub(&pad->td->refcount, 1);
++ if (ZXDH_RING_FULL_ERR(qp->rq_ring))
++ return NULL;
+
-+ free(pad);
-+}
++ ZXDH_ATOMIC_RING_MOVE_HEAD(qp->rq_ring, *wqe_idx, ret_code);
++ if (ret_code)
++ return NULL;
+
-+static int hns_roce_free_pd(struct hns_roce_pd *pd)
- {
- int ret;
-
-- ret = ibv_cmd_dealloc_pd(pd);
-+ if (atomic_load(&pd->refcount) > 1)
-+ return EBUSY;
++ if (!*wqe_idx)
++ qp->rwqe_polarity = !qp->rwqe_polarity;
++ /* rq_wqe_size_multiplier is no of 16 byte quanta in one rq wqe */
++ wqe = qp->rq_base[*wqe_idx * qp->rq_wqe_size_multiplier].elem;
+
-+ ret = ibv_cmd_dealloc_pd(&pd->ibv_pd);
- if (ret)
- return ret;
-
-- free(to_hr_pd(pd));
-+ free(pd);
-+ return 0;
++ return wqe;
+}
-
-- return ret;
-+int hns_roce_u_dealloc_pd(struct ibv_pd *ibv_pd)
-+{
-+ struct hns_roce_pad *pad = to_hr_pad(ibv_pd);
-+ struct hns_roce_pd *pd = to_hr_pd(ibv_pd);
-+
-+ if (pad) {
-+ hns_roce_free_pad(pad);
-+ return 0;
-+ }
-+
-+ return hns_roce_free_pd(pd);
- }
-
- struct ibv_xrcd *hns_roce_u_open_xrcd(struct ibv_context *context,
-@@ -275,6 +407,11 @@ int hns_roce_u_dealloc_mw(struct ibv_mw *mw)
- return 0;
- }
-
-+enum {
-+ CREATE_CQ_SUPPORTED_COMP_MASK = IBV_CQ_INIT_ATTR_MASK_FLAGS |
-+ IBV_CQ_INIT_ATTR_MASK_PD,
-+};
-+
- enum {
- CREATE_CQ_SUPPORTED_WC_FLAGS = IBV_WC_STANDARD_FLAGS |
- IBV_WC_EX_WITH_CVLAN,
-@@ -283,21 +420,60 @@ enum {
- static int verify_cq_create_attr(struct ibv_cq_init_attr_ex *attr,
- struct hns_roce_context *context)
- {
-+ struct hns_roce_pad *pad = to_hr_pad(attr->parent_domain);
-+
- if (!attr->cqe || attr->cqe > context->max_cqe)
- return EINVAL;
-
-- if (attr->comp_mask)
-+ if (!check_comp_mask(attr->comp_mask, CREATE_CQ_SUPPORTED_COMP_MASK)) {
-+ verbs_err(&context->ibv_ctx, "unsupported cq comps 0x%x\n",
-+ attr->comp_mask);
- return EOPNOTSUPP;
-+ }
-
- if (!check_comp_mask(attr->wc_flags, CREATE_CQ_SUPPORTED_WC_FLAGS))
- return EOPNOTSUPP;
-
-+ if (attr->comp_mask & IBV_CQ_INIT_ATTR_MASK_PD) {
-+ if (!pad) {
-+ verbs_err(&context->ibv_ctx, "failed to check the pad of cq.\n");
-+ return EINVAL;
-+ }
-+ atomic_fetch_add(&pad->pd.refcount, 1);
-+ }
+
- attr->cqe = max_t(uint32_t, HNS_ROCE_MIN_CQE_NUM,
- roundup_pow_of_two(attr->cqe));
-
- return 0;
- }
-
-+static int hns_roce_cq_spinlock_init(struct ibv_context *context,
-+ struct hns_roce_cq *cq,
-+ struct ibv_cq_init_attr_ex *attr)
++static enum zxdh_status_code
++zxdh_post_rdma_write(struct zxdh_qp *qp, struct zxdh_post_sq_info *info,
++ bool post_sq, __u32 total_size)
+{
-+ bool need_lock;
++ enum zxdh_status_code ret_code;
++ struct zxdh_rdma_write *op_info;
++ __u32 i, byte_off = 0;
++ __u32 frag_cnt, addl_frag_cnt;
++ __le64 *wqe;
++ __u32 wqe_idx;
++ __u16 quanta;
++ __u64 hdr;
++ bool read_fence = false;
++ bool imm_data_flag;
+
-+ need_lock = hns_roce_whether_need_lock(attr->parent_domain);
-+ if (!need_lock)
-+ verbs_info(verbs_get_ctx(context), "configure cq as no lock.\n");
++ op_info = &info->op.rdma_write;
++ imm_data_flag = info->imm_data_valid ? 1 : 0;
++ read_fence |= info->read_fence;
+
-+ return hns_roce_spinlock_init(&cq->hr_lock, need_lock);
-+}
++ if (imm_data_flag)
++ frag_cnt =
++ op_info->num_lo_sges ? (op_info->num_lo_sges + 1) : 2;
++ else
++ frag_cnt = op_info->num_lo_sges;
++ addl_frag_cnt =
++ op_info->num_lo_sges > 1 ? (op_info->num_lo_sges - 1) : 0;
+
-+static int hns_roce_srq_spinlock_init(struct ibv_context *context,
-+ struct hns_roce_srq *srq,
-+ struct ibv_srq_init_attr_ex *attr)
-+{
-+ bool need_lock;
++ ret_code = zxdh_fragcnt_to_quanta_sq(frag_cnt, &quanta);
++ if (ret_code)
++ return ret_code;
+
-+ need_lock = hns_roce_whether_need_lock(attr->pd);
-+ if (!need_lock)
-+ verbs_info(verbs_get_ctx(context), "configure srq as no lock.\n");
++ wqe = zxdh_qp_get_next_send_wqe(qp, &wqe_idx, quanta, total_size, info);
++ if (!wqe)
++ return ZXDH_ERR_QP_TOOMANY_WRS_POSTED;
+
-+ return hns_roce_spinlock_init(&srq->hr_lock, need_lock);
-+}
++ zxdh_clr_wqes(qp, wqe_idx);
+
- static int hns_roce_alloc_cq_buf(struct hns_roce_cq *cq)
- {
- int buf_size = hr_hw_page_align(cq->cq_depth * cq->cqe_size);
-@@ -337,6 +513,32 @@ static int exec_cq_create_cmd(struct ibv_context *context,
- return 0;
- }
-
-+static int hns_roce_init_cq_swc(struct hns_roce_cq *cq,
-+ struct ibv_cq_init_attr_ex *attr)
-+{
-+ list_head_init(&cq->list_sq);
-+ list_head_init(&cq->list_rq);
-+ list_head_init(&cq->list_srq);
-+ list_head_init(&cq->list_xrc_srq);
-+
-+ if (!(attr->wc_flags & CREATE_CQ_SUPPORTED_WC_FLAGS))
-+ return 0;
-+
-+ cq->sw_cqe = calloc(1, sizeof(struct hns_roce_v2_cqe));
-+ if (!cq->sw_cqe)
-+ return -ENOMEM;
-+
-+ return 0;
-+}
-+
-+static void hns_roce_uninit_cq_swc(struct hns_roce_cq *cq)
-+{
-+ if (cq->sw_cqe) {
-+ free(cq->sw_cqe);
-+ cq->sw_cqe = NULL;
++ if (op_info->num_lo_sges) {
++ set_64bit_val(
++ wqe, 16,
++ FIELD_PREP(ZXDHQPSQ_FIRST_FRAG_VALID,
++ op_info->lo_sg_list->len ==
++ ZXDH_MAX_SQ_PAYLOAD_SIZE ?
++ 1 :
++ 0) |
++ FIELD_PREP(ZXDHQPSQ_FIRST_FRAG_LEN,
++ op_info->lo_sg_list->len) |
++ FIELD_PREP(ZXDHQPSQ_FIRST_FRAG_STAG,
++ op_info->lo_sg_list->stag));
++ set_64bit_val(wqe, 8,
++ FIELD_PREP(ZXDHQPSQ_FRAG_TO,
++ op_info->lo_sg_list->tag_off));
++ } else {
++ /*if zero sge,post a special sge with zero lenth*/
++ set_64bit_val(wqe, 16,
++ FIELD_PREP(ZXDHQPSQ_FIRST_FRAG_VALID, 0) |
++ FIELD_PREP(ZXDHQPSQ_FIRST_FRAG_LEN, 0) |
++ FIELD_PREP(ZXDHQPSQ_FIRST_FRAG_STAG,
++ 0x100));
++ set_64bit_val(wqe, 8, FIELD_PREP(ZXDHQPSQ_FRAG_TO, 0));
+ }
-+}
-+
- static struct ibv_cq_ex *create_cq(struct ibv_context *context,
- struct ibv_cq_init_attr_ex *attr)
- {
-@@ -354,7 +556,10 @@ static struct ibv_cq_ex *create_cq(struct ibv_context *context,
- goto err;
- }
-
-- ret = pthread_spin_init(&cq->lock, PTHREAD_PROCESS_PRIVATE);
-+ if (attr->comp_mask & IBV_CQ_INIT_ATTR_MASK_PD)
-+ cq->parent_domain = attr->parent_domain;
+
-+ ret = hns_roce_cq_spinlock_init(context, cq, attr);
- if (ret)
- goto err_lock;
-
-@@ -371,7 +576,9 @@ static struct ibv_cq_ex *create_cq(struct ibv_context *context,
- goto err_db;
- }
-
-- *cq->db = 0;
-+ ret = hns_roce_init_cq_swc(cq, attr);
-+ if (ret)
-+ goto err_swc;
-
- ret = exec_cq_create_cmd(context, cq, attr);
- if (ret)
-@@ -382,11 +589,14 @@ static struct ibv_cq_ex *create_cq(struct ibv_context *context,
- return &cq->verbs_cq.cq_ex;
-
- err_cmd:
-+ hns_roce_uninit_cq_swc(cq);
-+err_swc:
- hns_roce_free_db(hr_ctx, cq->db, HNS_ROCE_CQ_TYPE_DB);
- err_db:
- hns_roce_free_buf(&cq->buf);
--err_lock:
- err_buf:
-+ hns_roce_spinlock_destroy(&cq->hr_lock);
-+err_lock:
- free(cq);
- err:
- if (ret < 0)
-@@ -437,16 +647,25 @@ int hns_roce_u_modify_cq(struct ibv_cq *cq, struct ibv_modify_cq_attr *attr)
-
- int hns_roce_u_destroy_cq(struct ibv_cq *cq)
- {
-+ struct hns_roce_cq *hr_cq = to_hr_cq(cq);
-+ struct hns_roce_pad *pad = to_hr_pad(hr_cq->parent_domain);
- int ret;
-
- ret = ibv_cmd_destroy_cq(cq);
- if (ret)
- return ret;
-
-- hns_roce_free_db(to_hr_ctx(cq->context), to_hr_cq(cq)->db,
-- HNS_ROCE_CQ_TYPE_DB);
-- hns_roce_free_buf(&to_hr_cq(cq)->buf);
-- free(to_hr_cq(cq));
-+ hns_roce_uninit_cq_swc(to_hr_cq(cq));
++ if (imm_data_flag) {
++ byte_off = ZXDH_SQ_WQE_BYTESIZE + ZXDH_QP_FRAG_BYTESIZE;
++ if (op_info->num_lo_sges > 1) {
++ qp->wqe_ops.iw_set_fragment(wqe, byte_off,
++ &op_info->lo_sg_list[1],
++ qp->swqe_polarity);
++ byte_off += ZXDH_QP_FRAG_BYTESIZE;
++ }
++ set_64bit_val(
++ wqe, ZXDH_SQ_WQE_BYTESIZE,
++ FIELD_PREP(ZXDHQPSQ_IMMDATA_VALID, qp->swqe_polarity) |
++ FIELD_PREP(ZXDHQPSQ_IMMDATA, info->imm_data));
++ i = 2;
++ if (i < op_info->num_lo_sges) {
++ for (byte_off = ZXDH_SQ_WQE_BYTESIZE +
++ 2 * ZXDH_QP_FRAG_BYTESIZE;
++ i < op_info->num_lo_sges; i += 2) {
++ if (i == addl_frag_cnt) {
++ qp->wqe_ops.iw_set_fragment(
++ wqe, byte_off,
++ &op_info->lo_sg_list[i],
++ qp->swqe_polarity);
++ byte_off += ZXDH_QP_FRAG_BYTESIZE;
++ break;
++ }
++ byte_off += ZXDH_QP_FRAG_BYTESIZE;
++ qp->wqe_ops.iw_set_fragment(
++ wqe, byte_off,
++ &op_info->lo_sg_list[i + 1],
++ qp->swqe_polarity);
++ byte_off -= ZXDH_QP_FRAG_BYTESIZE;
++ qp->wqe_ops.iw_set_fragment(
++ wqe, byte_off, &op_info->lo_sg_list[i],
++ qp->swqe_polarity);
++ byte_off += 2 * ZXDH_QP_FRAG_BYTESIZE;
++ }
++ }
++ } else {
++ i = 1;
++ for (byte_off = ZXDH_SQ_WQE_BYTESIZE; i < op_info->num_lo_sges;
++ i += 2) {
++ if (i == addl_frag_cnt) {
++ qp->wqe_ops.iw_set_fragment(
++ wqe, byte_off, &op_info->lo_sg_list[i],
++ qp->swqe_polarity);
++ byte_off += ZXDH_QP_FRAG_BYTESIZE;
++ break;
++ }
++ byte_off += ZXDH_QP_FRAG_BYTESIZE;
++ qp->wqe_ops.iw_set_fragment(wqe, byte_off,
++ &op_info->lo_sg_list[i + 1],
++ qp->swqe_polarity);
++ byte_off -= ZXDH_QP_FRAG_BYTESIZE;
++ qp->wqe_ops.iw_set_fragment(wqe, byte_off,
++ &op_info->lo_sg_list[i],
++ qp->swqe_polarity);
++ byte_off += 2 * ZXDH_QP_FRAG_BYTESIZE;
++ }
++ }
++ /* if not an odd number set valid bit in next fragment */
++ if (!(frag_cnt & 0x01) && frag_cnt) {
++ qp->wqe_ops.iw_set_fragment(wqe, byte_off, NULL,
++ qp->swqe_polarity);
++ }
+
-+ hns_roce_free_db(to_hr_ctx(cq->context), hr_cq->db, HNS_ROCE_CQ_TYPE_DB);
-+ hns_roce_free_buf(&hr_cq->buf);
++ hdr = FIELD_PREP(ZXDHQPSQ_VALID, qp->swqe_polarity) |
++ FIELD_PREP(ZXDHQPSQ_OPCODE, info->op_type) |
++ FIELD_PREP(ZXDHQPSQ_SIGCOMPL, info->signaled) |
++ FIELD_PREP(ZXDHQPSQ_LOCALFENCE, info->local_fence) |
++ FIELD_PREP(ZXDHQPSQ_READFENCE, read_fence) |
++ FIELD_PREP(ZXDHQPSQ_SOLICITED, info->solicited) |
++ FIELD_PREP(ZXDHQPSQ_IMMDATAFLAG, imm_data_flag) |
++ FIELD_PREP(ZXDHQPSQ_ADDFRAGCNT, addl_frag_cnt) |
++ FIELD_PREP(ZXDHQPSQ_REMSTAG, op_info->rem_addr.stag);
++ set_64bit_val(wqe, 24,
++ FIELD_PREP(ZXDHQPSQ_FRAG_TO, op_info->rem_addr.tag_off));
+
-+ hns_roce_spinlock_destroy(&hr_cq->hr_lock);
++ udma_to_device_barrier(); /* make sure WQE is populated before valid bit is set */
+
-+ if (pad)
-+ atomic_fetch_sub(&pad->pd.refcount, 1);
++ set_64bit_val(wqe, 0, hdr);
++ if (post_sq)
++ zxdh_qp_post_wr(qp);
++ qp_tx_psn_add(&qp->next_psn, total_size, qp->mtu);
++ return 0;
++}
+
-+ free(hr_cq);
-
- return ret;
- }
-@@ -652,10 +871,27 @@ static int exec_srq_create_cmd(struct ibv_context *context,
- return 0;
- }
-
-+static void init_srq_cq_list(struct hns_roce_srq *srq,
-+ struct ibv_srq_init_attr_ex *init_attr)
++static void split_write_imm_wqe(struct zxdh_qp *qp,
++ struct zxdh_post_sq_info *info,
++ struct zxdh_post_sq_info *split_part1_info,
++ struct zxdh_post_sq_info *split_part2_info)
+{
-+ struct hns_roce_cq *srq_cq;
++ __u32 total_size = 0;
++ struct zxdh_rdma_write *op_info;
+
-+ list_node_init(&srq->xrc_srcq_node);
++ op_info = &info->op.rdma_write;
++ total_size = op_info->rem_addr.len;
++ split_part1_info->op.rdma_write.lo_sg_list =
++ info->op.rdma_write.lo_sg_list;
++ split_part2_info->op.rdma_write.lo_sg_list = NULL;
+
-+ if (!init_attr->cq)
-+ return;
++ split_part1_info->op_type = ZXDH_OP_TYPE_WRITE;
++ split_part1_info->signaled = false;
++ split_part1_info->local_fence = info->local_fence;
++ split_part1_info->read_fence = info->read_fence;
++ split_part1_info->solicited = info->solicited;
++ split_part1_info->imm_data_valid = false;
++ split_part1_info->wr_id = info->wr_id;
++ split_part1_info->op.rdma_write.num_lo_sges =
++ info->op.rdma_write.num_lo_sges;
++ split_part1_info->op.rdma_write.rem_addr.stag = op_info->rem_addr.stag;
++ split_part1_info->op.rdma_write.rem_addr.tag_off =
++ op_info->rem_addr.tag_off;
+
-+ srq_cq = to_hr_cq(init_attr->cq);
-+ hns_roce_spin_lock(&srq_cq->hr_lock);
-+ list_add_tail(&srq_cq->list_xrc_srq, &srq->xrc_srcq_node);
-+ hns_roce_spin_unlock(&srq_cq->hr_lock);
++ split_part2_info->op_type = info->op_type;
++ split_part2_info->signaled = info->signaled;
++ split_part2_info->local_fence = info->local_fence;
++ split_part2_info->read_fence = info->read_fence;
++ split_part2_info->solicited = info->solicited;
++ split_part2_info->imm_data_valid = info->imm_data_valid;
++ split_part2_info->wr_id = info->wr_id;
++ split_part2_info->imm_data = info->imm_data;
++ split_part2_info->op.rdma_write.num_lo_sges = 0;
++ split_part2_info->op.rdma_write.rem_addr.stag = op_info->rem_addr.stag;
++ split_part2_info->op.rdma_write.rem_addr.tag_off =
++ op_info->rem_addr.tag_off + total_size;
+}
+
- static struct ibv_srq *create_srq(struct ibv_context *context,
- struct ibv_srq_init_attr_ex *init_attr)
- {
- struct hns_roce_context *hr_ctx = to_hr_ctx(context);
-+ struct hns_roce_pad *pad = to_hr_pad(init_attr->pd);
- struct hns_roce_srq *srq;
- int ret;
-
-@@ -669,19 +905,20 @@ static struct ibv_srq *create_srq(struct ibv_context *context,
- goto err;
- }
-
-- if (pthread_spin_init(&srq->lock, PTHREAD_PROCESS_PRIVATE))
-+ if (pad)
-+ atomic_fetch_add(&pad->pd.refcount, 1);
-+
-+ if (hns_roce_srq_spinlock_init(context, srq, init_attr))
- goto err_free_srq;
-
- set_srq_param(context, srq, init_attr);
- if (alloc_srq_buf(srq))
-- goto err_free_srq;
-+ goto err_destroy_lock;
-
- srq->rdb = hns_roce_alloc_db(hr_ctx, HNS_ROCE_SRQ_TYPE_DB);
- if (!srq->rdb)
- goto err_srq_buf;
-
-- *srq->rdb = 0;
--
- ret = exec_srq_create_cmd(context, srq, init_attr);
- if (ret)
- goto err_srq_db;
-@@ -694,6 +931,8 @@ static struct ibv_srq *create_srq(struct ibv_context *context,
- init_attr->attr.max_sge =
- min(init_attr->attr.max_sge - srq->rsv_sge, hr_ctx->max_srq_sge);
-
-+ init_srq_cq_list(srq, init_attr);
-+
- return &srq->verbs_srq.srq;
-
- err_destroy_srq:
-@@ -705,6 +944,9 @@ err_srq_db:
- err_srq_buf:
- free_srq_buf(srq);
-
-+err_destroy_lock:
-+ hns_roce_spinlock_destroy(&srq->hr_lock);
-+
- err_free_srq:
- free(srq);
-
-@@ -766,12 +1008,27 @@ int hns_roce_u_query_srq(struct ibv_srq *srq, struct ibv_srq_attr *srq_attr)
- return ret;
- }
-
-+static void del_srq_from_cq_list(struct hns_roce_srq *srq)
++/**
++ * zxdh_rdma_write - rdma write operation
++ * @qp: hw qp ptr
++ * @info: post sq information
++ * @post_sq: flag to post sq
++ */
++enum zxdh_status_code zxdh_rdma_write(struct zxdh_qp *qp,
++ struct zxdh_post_sq_info *info,
++ bool post_sq)
+{
-+ struct hns_roce_cq *srq_cq = to_hr_cq(srq->verbs_srq.cq);
-+
-+ if (!srq_cq)
-+ return;
-+
-+ hns_roce_spin_lock(&srq_cq->hr_lock);
-+ list_del(&srq->xrc_srcq_node);
-+ hns_roce_spin_unlock(&srq_cq->hr_lock);
-+}
-+
- int hns_roce_u_destroy_srq(struct ibv_srq *ibv_srq)
- {
- struct hns_roce_context *ctx = to_hr_ctx(ibv_srq->context);
-+ struct hns_roce_pad *pad = to_hr_pad(ibv_srq->pd);
- struct hns_roce_srq *srq = to_hr_srq(ibv_srq);
- int ret;
-
-+ del_srq_from_cq_list(srq);
-+
- ret = ibv_cmd_destroy_srq(ibv_srq);
- if (ret)
- return ret;
-@@ -780,16 +1037,51 @@ int hns_roce_u_destroy_srq(struct ibv_srq *ibv_srq)
-
- hns_roce_free_db(ctx, srq->rdb, HNS_ROCE_SRQ_TYPE_DB);
- free_srq_buf(srq);
-+
-+ hns_roce_spinlock_destroy(&srq->hr_lock);
++ struct zxdh_post_sq_info split_part1_info = { 0 };
++ struct zxdh_post_sq_info split_part2_info = { 0 };
++ struct zxdh_rdma_write *op_info;
++ struct zxdh_uqp *iwuqp;
++ struct zxdh_uvcontext *iwvctx;
++ __u32 i;
++ __u32 total_size = 0;
++ enum zxdh_status_code ret_code;
++ bool imm_data_flag = info->imm_data_valid ? 1 : 0;
++ iwuqp = container_of(qp, struct zxdh_uqp, qp);
++ iwvctx = container_of(iwuqp->vqp.qp.context, struct zxdh_uvcontext,
++ ibv_ctx.context);
++ op_info = &info->op.rdma_write;
++ if (op_info->num_lo_sges > qp->max_sq_frag_cnt)
++ return ZXDH_ERR_INVALID_FRAG_COUNT;
+
-+ if (pad)
-+ atomic_fetch_sub(&pad->pd.refcount, 1);
++ for (i = 0; i < op_info->num_lo_sges; i++) {
++ total_size += op_info->lo_sg_list[i].len;
++ if (0 != i && 0 == op_info->lo_sg_list[i].len)
++ return ZXDH_ERR_INVALID_FRAG_LEN;
++ }
+
- free(srq);
-
- return 0;
- }
-
-+enum {
-+ HNSDV_QP_SUP_COMP_MASK = HNSDV_QP_INIT_ATTR_MASK_QP_CREATE_FLAGS |
-+ HNSDV_QP_INIT_ATTR_MASK_QP_CONGEST_TYPE,
-+};
++ if (total_size > ZXDH_MAX_SQ_PAYLOAD_SIZE)
++ return ZXDH_ERR_QP_INVALID_MSG_SIZE;
+
-+static int check_hnsdv_qp_attr(struct hns_roce_context *ctx,
-+ struct hnsdv_qp_init_attr *hns_attr)
-+{
-+ if (!hns_attr)
-+ return 0;
++ op_info->rem_addr.len = total_size;
++ if (iwvctx->zxdh_write_imm_split_switch == 0) {
++ ret_code = zxdh_post_rdma_write(qp, info, post_sq, total_size);
++ if (ret_code)
++ return ret_code;
++ } else {
++ if (imm_data_flag && total_size > qp->mtu) {
++ split_write_imm_wqe(qp, info, &split_part1_info,
++ &split_part2_info);
+
-+ if (!check_comp_mask(hns_attr->comp_mask, HNSDV_QP_SUP_COMP_MASK)) {
-+ verbs_err(&ctx->ibv_ctx, "invalid hnsdv comp_mask 0x%x.\n",
-+ hns_attr->comp_mask);
-+ return EINVAL;
++ ret_code = zxdh_post_rdma_write(qp, &split_part1_info,
++ post_sq, total_size);
++ if (ret_code)
++ return ret_code;
++ ret_code = zxdh_post_rdma_write(qp, &split_part2_info,
++ post_sq, 0);
++ if (ret_code)
++ return ret_code;
++ } else {
++ ret_code = zxdh_post_rdma_write(qp, info, post_sq,
++ total_size);
++ if (ret_code)
++ return ret_code;
++ }
+ }
+
+ return 0;
+}
+
- enum {
- CREATE_QP_SUP_COMP_MASK = IBV_QP_INIT_ATTR_PD | IBV_QP_INIT_ATTR_XRCD |
- IBV_QP_INIT_ATTR_SEND_OPS_FLAGS,
- };
-
-+enum {
-+ SEND_OPS_FLAG_MASK =
-+ IBV_QP_EX_WITH_RDMA_WRITE | IBV_QP_EX_WITH_RDMA_WRITE_WITH_IMM |
-+ IBV_QP_EX_WITH_SEND | IBV_QP_EX_WITH_SEND_WITH_IMM |
-+ IBV_QP_EX_WITH_RDMA_READ | IBV_QP_EX_WITH_ATOMIC_CMP_AND_SWP |
-+ IBV_QP_EX_WITH_ATOMIC_FETCH_AND_ADD | IBV_QP_EX_WITH_LOCAL_INV |
-+ IBV_QP_EX_WITH_SEND_WITH_INV,
-+};
++static void split_two_part_info(struct zxdh_qp *qp,
++ struct zxdh_post_sq_info *info, __u32 ori_psn,
++ __u32 pre_cal_psn,
++ struct zxdh_post_sq_info *split_part1_info,
++ struct zxdh_post_sq_info *split_part2_info)
++{
++ __u32 total_size = 0;
++ __u32 remain_size = 0;
++ __u32 split_size = 0;
++ struct zxdh_rdma_read *op_info;
+
- static int check_qp_create_mask(struct hns_roce_context *ctx,
- struct ibv_qp_init_attr_ex *attr)
- {
-@@ -798,6 +1090,10 @@ static int check_qp_create_mask(struct hns_roce_context *ctx,
- if (!check_comp_mask(attr->comp_mask, CREATE_QP_SUP_COMP_MASK))
- return EOPNOTSUPP;
-
-+ if (attr->comp_mask & IBV_QP_INIT_ATTR_SEND_OPS_FLAGS &&
-+ !check_comp_mask(attr->send_ops_flags, SEND_OPS_FLAG_MASK))
-+ return -EOPNOTSUPP;
++ op_info = &info->op.rdma_read;
++ total_size = op_info->rem_addr.len;
++ split_part1_info->op.rdma_read.lo_sg_list = qp->split_sg_list;
++ split_part2_info->op.rdma_read.lo_sg_list =
++ qp->split_sg_list + op_info->num_lo_sges;
+
- switch (attr->qp_type) {
- case IBV_QPT_UD:
- if (hr_dev->hw_version == HNS_ROCE_HW_VER2)
-@@ -866,7 +1162,8 @@ static int verify_qp_create_cap(struct hns_roce_context *ctx,
- }
-
- static int verify_qp_create_attr(struct hns_roce_context *ctx,
-- struct ibv_qp_init_attr_ex *attr)
-+ struct ibv_qp_init_attr_ex *attr,
-+ struct hnsdv_qp_init_attr *hns_attr)
- {
- int ret;
-
-@@ -874,9 +1171,48 @@ static int verify_qp_create_attr(struct hns_roce_context *ctx,
- if (ret)
- return ret;
-
-+ ret = check_hnsdv_qp_attr(ctx, hns_attr);
-+ if (ret)
-+ return ret;
++ memset(split_part1_info->op.rdma_read.lo_sg_list, 0,
++ 2 * op_info->num_lo_sges * sizeof(struct zxdh_sge));
++ if (pre_cal_psn < ori_psn && pre_cal_psn != 0)
++ remain_size = (0xffffff - ori_psn + 1) * qp->mtu;
++ else
++ remain_size = (0x800000 - ori_psn) * qp->mtu;
+
- return verify_qp_create_cap(ctx, attr);
- }
-
-+static int hns_roce_qp_spinlock_init(struct hns_roce_context *ctx,
-+ struct ibv_qp_init_attr_ex *attr,
-+ struct hns_roce_qp *qp)
-+{
-+ bool sq_need_lock;
-+ bool rq_need_lock;
-+ int ret;
++ split_size = total_size - remain_size;
+
-+ sq_need_lock = hns_roce_whether_need_lock(attr->pd);
-+ if (!sq_need_lock)
-+ verbs_info(&ctx->ibv_ctx, "configure sq as no lock.\n");
++ split_part1_info->signaled = false;
++ split_part1_info->local_fence = info->local_fence;
++ split_part1_info->read_fence = info->read_fence;
++ split_part1_info->solicited = false;
++ split_part1_info->wr_id = info->wr_id;
++ split_part1_info->op.rdma_read.rem_addr.stag = op_info->rem_addr.stag;
++ split_part1_info->op.rdma_read.rem_addr.tag_off =
++ op_info->rem_addr.tag_off;
+
-+ rq_need_lock = hns_roce_whether_need_lock(attr->pd);
-+ if (!rq_need_lock)
-+ verbs_info(&ctx->ibv_ctx, "configure rq as no lock.\n");
++ split_part2_info->signaled = info->signaled;
++ split_part2_info->local_fence = info->local_fence;
++ split_part2_info->read_fence = info->read_fence;
++ split_part2_info->solicited = info->solicited;
++ split_part2_info->wr_id = info->wr_id;
++ split_part2_info->op.rdma_read.rem_addr.stag = op_info->rem_addr.stag;
++ split_part2_info->op.rdma_read.rem_addr.tag_off =
++ op_info->rem_addr.tag_off + remain_size;
+
-+ ret = hns_roce_spinlock_init(&qp->sq.hr_lock, sq_need_lock);
-+ if (ret)
-+ return ret;
++ for (int i = 0; i < op_info->num_lo_sges; i++) {
++ if (op_info->lo_sg_list[i].len +
++ split_part1_info->op.rdma_read.rem_addr.len <
++ remain_size) {
++ split_part1_info->op.rdma_read.rem_addr.len +=
++ op_info->lo_sg_list[i].len;
++ split_part1_info->op.rdma_read.num_lo_sges += 1;
++ memcpy(split_part1_info->op.rdma_read.lo_sg_list + i,
++ op_info->lo_sg_list + i,
++ sizeof(struct zxdh_sge));
++ continue;
++ } else if (op_info->lo_sg_list[i].len +
++ split_part1_info->op.rdma_read.rem_addr.len ==
++ remain_size) {
++ split_part1_info->op.rdma_read.rem_addr.len +=
++ op_info->lo_sg_list[i].len;
++ split_part1_info->op.rdma_read.num_lo_sges += 1;
++ memcpy(split_part1_info->op.rdma_read.lo_sg_list + i,
++ op_info->lo_sg_list + i,
++ sizeof(struct zxdh_sge));
++ split_part2_info->op.rdma_read.rem_addr.len =
++ split_size;
++ split_part2_info->op.rdma_read.num_lo_sges =
++ op_info->num_lo_sges -
++ split_part1_info->op.rdma_read.num_lo_sges;
++ memcpy(split_part2_info->op.rdma_read.lo_sg_list,
++ op_info->lo_sg_list + i + 1,
++ split_part2_info->op.rdma_read.num_lo_sges *
++ sizeof(struct zxdh_sge));
++ break;
++ }
+
-+ ret = hns_roce_spinlock_init(&qp->rq.hr_lock, rq_need_lock);
-+ if (ret) {
-+ hns_roce_spinlock_destroy(&qp->sq.hr_lock);
-+ return ret;
++ split_part1_info->op.rdma_read.lo_sg_list[i].len =
++ remain_size -
++ split_part1_info->op.rdma_read.rem_addr.len;
++ split_part1_info->op.rdma_read.lo_sg_list[i].tag_off =
++ op_info->lo_sg_list[i].tag_off;
++ split_part1_info->op.rdma_read.lo_sg_list[i].stag =
++ op_info->lo_sg_list[i].stag;
++ split_part1_info->op.rdma_read.rem_addr.len = remain_size;
++ split_part1_info->op.rdma_read.num_lo_sges += 1;
++ split_part2_info->op.rdma_read.lo_sg_list[0].len =
++ op_info->lo_sg_list[i].len -
++ split_part1_info->op.rdma_read.lo_sg_list[i].len;
++ split_part2_info->op.rdma_read.lo_sg_list[0].tag_off =
++ op_info->lo_sg_list[i].tag_off +
++ split_part1_info->op.rdma_read.lo_sg_list[i].len;
++ split_part2_info->op.rdma_read.lo_sg_list[0].stag =
++ op_info->lo_sg_list[i].stag;
++ split_part2_info->op.rdma_read.rem_addr.len = split_size;
++ split_part2_info->op.rdma_read.num_lo_sges =
++ op_info->num_lo_sges -
++ split_part1_info->op.rdma_read.num_lo_sges + 1;
++ if (split_part2_info->op.rdma_read.num_lo_sges - 1 > 0) {
++ memcpy(split_part2_info->op.rdma_read.lo_sg_list + 1,
++ op_info->lo_sg_list + i + 1,
++ (split_part2_info->op.rdma_read.num_lo_sges -
++ 1) * sizeof(struct zxdh_sge));
++ }
++ break;
+ }
-+
-+ return 0;
-+}
-+
-+void hns_roce_qp_spinlock_destroy(struct hns_roce_qp *qp)
-+{
-+ hns_roce_spinlock_destroy(&qp->rq.hr_lock);
-+ hns_roce_spinlock_destroy(&qp->sq.hr_lock);
+}
+
- static int alloc_recv_rinl_buf(uint32_t max_sge,
- struct hns_roce_rinl_buf *rinl_buf)
- {
-@@ -918,31 +1254,73 @@ static void free_recv_rinl_buf(struct hns_roce_rinl_buf *rinl_buf)
- }
- }
-
-+static void get_best_multi_region_pg_shift(struct hns_roce_device *hr_dev,
-+ struct hns_roce_context *ctx,
-+ struct hns_roce_qp *qp)
++static enum zxdh_status_code zxdh_post_rdma_read(struct zxdh_qp *qp,
++ struct zxdh_post_sq_info *info,
++ bool post_sq, __u32 total_size)
+{
-+ uint32_t ext_sge_size;
-+ uint32_t sq_size;
-+ uint32_t rq_size;
-+ uint8_t pg_shift;
-+
-+ if (!(ctx->config & HNS_ROCE_UCTX_RSP_DYN_QP_PGSZ)) {
-+ qp->pageshift = HNS_HW_PAGE_SHIFT;
-+ return;
-+ }
++ enum zxdh_status_code ret_code;
++ struct zxdh_rdma_read *op_info;
++ __u32 i, byte_off = 0;
++ bool local_fence = false;
++ __u32 addl_frag_cnt;
++ __le64 *wqe;
++ __u32 wqe_idx;
++ __u16 quanta;
++ __u64 hdr;
+
-+ /*
-+ * The larger the pagesize used, the better the performance, but it
-+ * may waste more memory. Therefore, we use the least common multiple
-+ * and ext_sge buffer size as the pagesize. Additionally, since the
-+ * (aligned to power of 2) of sq wqe buffer size and rq wqe buffer
-+ * size as the pagesize. And the wqe buffer page cannot be larger
-+ * than the buffer size used by extend sge. Additionally, since the
-+ * kernel cannot guarantee the allocation of contiguous memory larger
-+ * than the system page, the pagesize must be smaller than the system
-+ * page.
-+ */
-+ sq_size = qp->sq.wqe_cnt << qp->sq.wqe_shift;
-+ ext_sge_size = qp->ex_sge.sge_cnt << qp->ex_sge.sge_shift;
-+ rq_size = qp->rq.wqe_cnt << qp->rq.wqe_shift;
-+
-+ pg_shift = max_t(uint8_t, sq_size ? hr_ilog32(sq_size) : 0,
-+ rq_size ? hr_ilog32(rq_size) : 0);
-+ pg_shift = ext_sge_size ?
-+ min_t(uint8_t, pg_shift, hr_ilog32(ext_sge_size)) :
-+ pg_shift;
-+ pg_shift = max_t(uint8_t, pg_shift, HNS_HW_PAGE_SHIFT);
-+ qp->pageshift = min_t(uint8_t, pg_shift, hr_ilog32(hr_dev->page_size));
-+}
-+
- static int calc_qp_buff_size(struct hns_roce_device *hr_dev,
-+ struct hns_roce_context *ctx,
- struct hns_roce_qp *qp)
- {
- struct hns_roce_wq *sq = &qp->sq;
- struct hns_roce_wq *rq = &qp->rq;
-+ unsigned int page_size;
- unsigned int size;
-
- qp->buf_size = 0;
-+ get_best_multi_region_pg_shift(hr_dev, ctx, qp);
-+ page_size = 1 << qp->pageshift;
-
- /* SQ WQE */
- sq->offset = 0;
-- size = to_hr_hem_entries_size(sq->wqe_cnt, sq->wqe_shift);
-+ size = align(sq->wqe_cnt << sq->wqe_shift, page_size);
- qp->buf_size += size;
-
- /* extend SGE WQE in SQ */
- qp->ex_sge.offset = qp->buf_size;
- if (qp->ex_sge.sge_cnt > 0) {
-- size = to_hr_hem_entries_size(qp->ex_sge.sge_cnt,
-- qp->ex_sge.sge_shift);
-+ size = align(qp->ex_sge.sge_cnt << qp->ex_sge.sge_shift,
-+ page_size);
- qp->buf_size += size;
- }
-
- /* RQ WQE */
- rq->offset = qp->buf_size;
-- size = to_hr_hem_entries_size(rq->wqe_cnt, rq->wqe_shift);
-+ size = align(rq->wqe_cnt << rq->wqe_shift, page_size);
- qp->buf_size += size;
-
- if (qp->buf_size < 1)
-@@ -951,6 +1329,26 @@ static int calc_qp_buff_size(struct hns_roce_device *hr_dev,
- return 0;
- }
-
-+static inline bool check_qp_support_dca(struct hns_roce_dca_ctx *dca_ctx,
-+ struct ibv_qp_init_attr_ex *attr,
-+ struct hnsdv_qp_init_attr *hns_attr)
-+{
-+ /* DCA pool disable */
-+ if (!dca_ctx->unit_size)
-+ return false;
++ op_info = &info->op.rdma_read;
++ ret_code = zxdh_fragcnt_to_quanta_sq(op_info->num_lo_sges, &quanta);
++ if (ret_code)
++ return ret_code;
+
-+ /* Unsupport type */
-+ if (attr->qp_type != IBV_QPT_RC && attr->qp_type != IBV_QPT_XRC_SEND)
-+ return false;
++ wqe = zxdh_qp_get_next_send_wqe(qp, &wqe_idx, quanta, total_size, info);
++ if (!wqe)
++ return ZXDH_ERR_QP_TOOMANY_WRS_POSTED;
+
-+ if (hns_attr &&
-+ (hns_attr->comp_mask & HNSDV_QP_INIT_ATTR_MASK_QP_CREATE_FLAGS) &&
-+ (hns_attr->create_flags & HNSDV_QP_CREATE_ENABLE_DCA_MODE))
-+ return true;
++ zxdh_clr_wqes(qp, wqe_idx);
+
-+ return false;
-+}
++ addl_frag_cnt =
++ op_info->num_lo_sges > 1 ? (op_info->num_lo_sges - 1) : 0;
++ local_fence |= info->local_fence;
+
- static void qp_free_wqe(struct hns_roce_qp *qp)
- {
- free_recv_rinl_buf(&qp->rq_rinl_buf);
-@@ -962,12 +1360,13 @@ static void qp_free_wqe(struct hns_roce_qp *qp)
- hns_roce_free_buf(&qp->buf);
- }
-
--static int qp_alloc_wqe(struct ibv_qp_cap *cap, struct hns_roce_qp *qp,
-- struct hns_roce_context *ctx)
-+static int qp_alloc_wqe(struct ibv_qp_init_attr_ex *attr,
-+ struct hnsdv_qp_init_attr *hns_attr,
-+ struct hns_roce_qp *qp, struct hns_roce_context *ctx)
- {
- struct hns_roce_device *hr_dev = to_hr_dev(ctx->ibv_ctx.context.device);
-
-- if (calc_qp_buff_size(hr_dev, qp))
-+ if (calc_qp_buff_size(hr_dev, ctx, qp))
- return -EINVAL;
-
- qp->sq.wrid = malloc(qp->sq.wqe_cnt * sizeof(uint64_t));
-@@ -981,12 +1380,26 @@ static int qp_alloc_wqe(struct ibv_qp_cap *cap, struct hns_roce_qp *qp,
- }
-
- if (qp->rq_rinl_buf.wqe_cnt) {
-- if (alloc_recv_rinl_buf(cap->max_recv_sge, &qp->rq_rinl_buf))
-+ if (alloc_recv_rinl_buf(attr->cap.max_recv_sge,
-+ &qp->rq_rinl_buf))
- goto err_alloc;
- }
-
-- if (hns_roce_alloc_buf(&qp->buf, qp->buf_size, HNS_HW_PAGE_SIZE))
-- goto err_alloc;
-+ if (check_qp_support_dca(&ctx->dca_ctx, attr, hns_attr) &&
-+ ctx->dca_ctx.max_size > 0) {
-+ /* when DCA is enabled, use a buffer list to store page addr */
-+ qp->buf.buf = NULL;
-+ qp->dca_wqe.max_cnt = hr_hw_page_count(qp->buf_size);
-+ qp->dca_wqe.shift = HNS_HW_PAGE_SHIFT;
-+ qp->dca_wqe.bufs = calloc(qp->dca_wqe.max_cnt, sizeof(void *));
-+ if (!qp->dca_wqe.bufs)
-+ goto err_alloc;
-+ verbs_debug(&ctx->ibv_ctx, "alloc DCA buf.\n");
++ if (op_info->num_lo_sges) {
++ set_64bit_val(
++ wqe, 16,
++ FIELD_PREP(ZXDHQPSQ_FIRST_FRAG_VALID,
++ op_info->lo_sg_list->len ==
++ ZXDH_MAX_SQ_PAYLOAD_SIZE ?
++ 1 :
++ 0) |
++ FIELD_PREP(ZXDHQPSQ_FIRST_FRAG_LEN,
++ op_info->lo_sg_list->len) |
++ FIELD_PREP(ZXDHQPSQ_FIRST_FRAG_STAG,
++ op_info->lo_sg_list->stag));
++ set_64bit_val(wqe, 8,
++ FIELD_PREP(ZXDHQPSQ_FRAG_TO,
++ op_info->lo_sg_list->tag_off));
+ } else {
-+ if (hns_roce_alloc_buf(&qp->buf, qp->buf_size,
-+ HNS_HW_PAGE_SIZE))
-+ goto err_alloc;
-+ }
-
- return 0;
-
-@@ -1141,8 +1554,6 @@ static int qp_alloc_db(struct ibv_qp_init_attr_ex *attr, struct hns_roce_qp *qp,
- qp->sdb = hns_roce_alloc_db(ctx, HNS_ROCE_QP_TYPE_DB);
- if (!qp->sdb)
- return -ENOMEM;
--
-- *qp->sdb = 0;
- }
-
- if (attr->cap.max_recv_sge) {
-@@ -1154,8 +1565,6 @@ static int qp_alloc_db(struct ibv_qp_init_attr_ex *attr, struct hns_roce_qp *qp,
-
- return -ENOMEM;
- }
--
-- *qp->rdb = 0;
- }
-
- return 0;
-@@ -1185,10 +1594,33 @@ static int hns_roce_store_qp(struct hns_roce_context *ctx,
- return 0;
- }
-
-+static int to_cmd_cong_type(uint8_t cong_type, __u64 *cmd_cong_type)
-+{
-+ switch (cong_type) {
-+ case HNSDV_QP_CREATE_ENABLE_DCQCN:
-+ *cmd_cong_type = HNS_ROCE_CREATE_QP_FLAGS_DCQCN;
-+ break;
-+ case HNSDV_QP_CREATE_ENABLE_LDCP:
-+ *cmd_cong_type = HNS_ROCE_CREATE_QP_FLAGS_LDCP;
-+ break;
-+ case HNSDV_QP_CREATE_ENABLE_HC3:
-+ *cmd_cong_type = HNS_ROCE_CREATE_QP_FLAGS_HC3;
-+ break;
-+ case HNSDV_QP_CREATE_ENABLE_DIP:
-+ *cmd_cong_type = HNS_ROCE_CREATE_QP_FLAGS_DIP;
-+ break;
-+ default:
-+ return EINVAL;
++ /*if zero sge,post a special sge with zero lenth*/
++ set_64bit_val(wqe, 16,
++ FIELD_PREP(ZXDHQPSQ_FIRST_FRAG_VALID, 0) |
++ FIELD_PREP(ZXDHQPSQ_FIRST_FRAG_LEN, 0) |
++ FIELD_PREP(ZXDHQPSQ_FIRST_FRAG_STAG,
++ 0x100));
++ set_64bit_val(wqe, 8, FIELD_PREP(ZXDHQPSQ_FRAG_TO, 0));
+ }
+
-+ return 0;
-+}
-+
- static int qp_exec_create_cmd(struct ibv_qp_init_attr_ex *attr,
- struct hns_roce_qp *qp,
- struct hns_roce_context *ctx,
-- uint64_t *dwqe_mmap_key)
-+ uint64_t *dwqe_mmap_key,
-+ struct hnsdv_qp_init_attr *hns_attr)
- {
- struct hns_roce_create_qp_ex_resp resp_ex = {};
- struct hns_roce_create_qp_ex cmd_ex = {};
-@@ -1199,6 +1631,16 @@ static int qp_exec_create_cmd(struct ibv_qp_init_attr_ex *attr,
- cmd_ex.buf_addr = (uintptr_t)qp->buf.buf;
- cmd_ex.log_sq_stride = qp->sq.wqe_shift;
- cmd_ex.log_sq_bb_count = hr_ilog32(qp->sq.wqe_cnt);
-+ cmd_ex.pageshift = qp->pageshift;
-+
-+ if (hns_attr &&
-+ hns_attr->comp_mask & HNSDV_QP_INIT_ATTR_MASK_QP_CONGEST_TYPE) {
-+ ret = to_cmd_cong_type(hns_attr->congest_type,
-+ &cmd_ex.cong_type_flags);
-+ if (ret)
-+ return ret;
-+ cmd_ex.comp_mask |= HNS_ROCE_CREATE_QP_MASK_CONGEST_TYPE;
++ i = 1;
++ for (byte_off = ZXDH_SQ_WQE_BYTESIZE; i < op_info->num_lo_sges;
++ i += 2) {
++ if (i == addl_frag_cnt) {
++ qp->wqe_ops.iw_set_fragment(wqe, byte_off,
++ &op_info->lo_sg_list[i],
++ qp->swqe_polarity);
++ byte_off += ZXDH_QP_FRAG_BYTESIZE;
++ break;
++ }
++ byte_off += ZXDH_QP_FRAG_BYTESIZE;
++ qp->wqe_ops.iw_set_fragment(wqe, byte_off,
++ &op_info->lo_sg_list[i + 1],
++ qp->swqe_polarity);
++ byte_off -= ZXDH_QP_FRAG_BYTESIZE;
++ qp->wqe_ops.iw_set_fragment(wqe, byte_off,
++ &op_info->lo_sg_list[i],
++ qp->swqe_polarity);
++ byte_off += 2 * ZXDH_QP_FRAG_BYTESIZE;
+ }
-
- ret = ibv_cmd_create_qp_ex2(&ctx->ibv_ctx.context, &qp->verbs_qp, attr,
- &cmd_ex.ibv_cmd, sizeof(cmd_ex),
-@@ -1242,16 +1684,13 @@ void hns_roce_free_qp_buf(struct hns_roce_qp *qp, struct hns_roce_context *ctx)
- }
-
- static int hns_roce_alloc_qp_buf(struct ibv_qp_init_attr_ex *attr,
-+ struct hnsdv_qp_init_attr *hns_attr,
- struct hns_roce_qp *qp,
- struct hns_roce_context *ctx)
- {
- int ret;
-
-- if (pthread_spin_init(&qp->sq.lock, PTHREAD_PROCESS_PRIVATE) ||
-- pthread_spin_init(&qp->rq.lock, PTHREAD_PROCESS_PRIVATE))
-- return -ENOMEM;
--
-- ret = qp_alloc_wqe(&attr->cap, qp, ctx);
-+ ret = qp_alloc_wqe(attr, hns_attr, qp, ctx);
- if (ret)
- return ret;
-
-@@ -1273,15 +1712,41 @@ static int mmap_dwqe(struct ibv_context *ibv_ctx, struct hns_roce_qp *qp,
- return 0;
- }
-
-+static void add_qp_to_cq_list(struct ibv_qp_init_attr_ex *attr,
-+ struct hns_roce_qp *qp)
-+{
-+ struct hns_roce_cq *send_cq, *recv_cq;
+
-+ send_cq = attr->send_cq ? to_hr_cq(attr->send_cq) : NULL;
-+ recv_cq = attr->recv_cq ? to_hr_cq(attr->recv_cq) : NULL;
-+
-+ list_node_init(&qp->scq_node);
-+ list_node_init(&qp->rcq_node);
-+ list_node_init(&qp->srcq_node);
-+
-+ hns_roce_lock_cqs(send_cq, recv_cq);
-+ if (send_cq)
-+ list_add_tail(&send_cq->list_sq, &qp->scq_node);
-+ if (recv_cq) {
-+ if (attr->srq)
-+ list_add_tail(&recv_cq->list_srq, &qp->srcq_node);
-+ else
-+ list_add_tail(&recv_cq->list_rq, &qp->rcq_node);
++ /* if not an odd number set valid bit in next fragment */
++ if (!(op_info->num_lo_sges & 0x01) && op_info->num_lo_sges) {
++ qp->wqe_ops.iw_set_fragment(wqe, byte_off, NULL,
++ qp->swqe_polarity);
+ }
-+ hns_roce_unlock_cqs(send_cq, recv_cq);
-+}
+
- static struct ibv_qp *create_qp(struct ibv_context *ibv_ctx,
-- struct ibv_qp_init_attr_ex *attr)
-+ struct ibv_qp_init_attr_ex *attr,
-+ struct hnsdv_qp_init_attr *hns_attr)
- {
- struct hns_roce_context *context = to_hr_ctx(ibv_ctx);
-+ struct hns_roce_pad *pad = to_hr_pad(attr->pd);
- struct hns_roce_qp *qp;
- uint64_t dwqe_mmap_key;
- int ret;
-
-- ret = verify_qp_create_attr(context, attr);
-+ ret = verify_qp_create_attr(context, attr, hns_attr);
- if (ret)
- goto err;
-
-@@ -1293,11 +1758,18 @@ static struct ibv_qp *create_qp(struct ibv_context *ibv_ctx,
-
- hns_roce_set_qp_params(attr, qp, context);
-
-- ret = hns_roce_alloc_qp_buf(attr, qp, context);
-+ if (pad)
-+ atomic_fetch_add(&pad->pd.refcount, 1);
++ hdr = FIELD_PREP(ZXDHQPSQ_VALID, qp->swqe_polarity) |
++ FIELD_PREP(ZXDHQPSQ_OPCODE, ZXDH_OP_TYPE_READ) |
++ FIELD_PREP(ZXDHQPSQ_SIGCOMPL, info->signaled) |
++ FIELD_PREP(ZXDHQPSQ_LOCALFENCE, info->local_fence) |
++ FIELD_PREP(ZXDHQPSQ_READFENCE, info->read_fence) |
++ FIELD_PREP(ZXDHQPSQ_SOLICITED, info->solicited) |
++ FIELD_PREP(ZXDHQPSQ_ADDFRAGCNT, addl_frag_cnt) |
++ FIELD_PREP(ZXDHQPSQ_REMSTAG, op_info->rem_addr.stag);
++ set_64bit_val(wqe, 24,
++ FIELD_PREP(ZXDHQPSQ_FRAG_TO, op_info->rem_addr.tag_off));
+
-+ ret = hns_roce_qp_spinlock_init(context, attr, qp);
-+ if (ret)
-+ goto err_spinlock;
++ udma_to_device_barrier(); /* make sure WQE is populated before valid bit is set */
+
-+ ret = hns_roce_alloc_qp_buf(attr, hns_attr, qp, context);
- if (ret)
- goto err_buf;
-
-- ret = qp_exec_create_cmd(attr, qp, context, &dwqe_mmap_key);
-+ ret = qp_exec_create_cmd(attr, qp, context, &dwqe_mmap_key, hns_attr);
- if (ret)
- goto err_cmd;
-
-@@ -1316,6 +1788,7 @@ static struct ibv_qp *create_qp(struct ibv_context *ibv_ctx,
- }
-
- qp_setup_config(attr, qp, context);
-+ add_qp_to_cq_list(attr, qp);
-
- return &qp->verbs_qp.qp;
-
-@@ -1326,6 +1799,8 @@ err_ops:
- err_cmd:
- hns_roce_free_qp_buf(qp, context);
- err_buf:
-+ hns_roce_qp_spinlock_destroy(qp);
-+err_spinlock:
- free(qp);
- err:
- if (ret < 0)
-@@ -1345,7 +1820,7 @@ struct ibv_qp *hns_roce_u_create_qp(struct ibv_pd *pd,
- attrx.comp_mask = IBV_QP_INIT_ATTR_PD;
- attrx.pd = pd;
-
-- qp = create_qp(pd->context, &attrx);
-+ qp = create_qp(pd->context, &attrx, NULL);
- if (qp)
- memcpy(attr, &attrx, sizeof(*attr));
-
-@@ -1355,7 +1830,44 @@ struct ibv_qp *hns_roce_u_create_qp(struct ibv_pd *pd,
- struct ibv_qp *hns_roce_u_create_qp_ex(struct ibv_context *context,
- struct ibv_qp_init_attr_ex *attr)
- {
-- return create_qp(context, attr);
-+ return create_qp(context, attr, NULL);
++ set_64bit_val(wqe, 0, hdr);
++ if (post_sq)
++ zxdh_qp_post_wr(qp);
++ return 0;
+}
+
-+struct ibv_qp *hnsdv_create_qp(struct ibv_context *context,
-+ struct ibv_qp_init_attr_ex *qp_attr,
-+ struct hnsdv_qp_init_attr *hns_attr)
++/**
++ * zxdh_rdma_read - rdma read command
++ * @qp: hw qp ptr
++ * @info: post sq information
++ * @inv_stag: flag for inv_stag
++ * @post_sq: flag to post sq
++ */
++enum zxdh_status_code zxdh_rdma_read(struct zxdh_qp *qp,
++ struct zxdh_post_sq_info *info,
++ bool inv_stag, bool post_sq)
+{
-+ if (!context || !qp_attr) {
-+ errno = EINVAL;
-+ return NULL;
-+ }
++ struct zxdh_post_sq_info split_part1_info = { 0 };
++ struct zxdh_post_sq_info split_part2_info = { 0 };
++ struct zxdh_rdma_read *op_info;
++ enum zxdh_status_code ret_code;
++ __u32 i, total_size = 0, pre_cal_psn = 0;
+
-+ if (!is_hns_dev(context->device)) {
-+ errno = EOPNOTSUPP;
-+ return NULL;
++ op_info = &info->op.rdma_read;
++ if (qp->max_sq_frag_cnt < op_info->num_lo_sges)
++ return ZXDH_ERR_INVALID_FRAG_COUNT;
++
++ for (i = 0; i < op_info->num_lo_sges; i++) {
++ total_size += op_info->lo_sg_list[i].len;
++ if (0 != i && 0 == op_info->lo_sg_list[i].len)
++ return ZXDH_ERR_INVALID_FRAG_LEN;
+ }
+
-+ return create_qp(context, qp_attr, hns_attr);
-+}
++ if (total_size > ZXDH_MAX_SQ_PAYLOAD_SIZE)
++ return ZXDH_ERR_QP_INVALID_MSG_SIZE;
++ op_info->rem_addr.len = total_size;
++ pre_cal_psn = qp->next_psn;
++ qp_tx_psn_add(&pre_cal_psn, total_size, qp->mtu);
++ if (read_wqe_need_split(pre_cal_psn, qp->next_psn)) {
++ split_two_part_info(qp, info, qp->next_psn, pre_cal_psn,
++ &split_part1_info, &split_part2_info);
++ ret_code = zxdh_post_rdma_read(qp, &split_part1_info, post_sq,
++ total_size);
++ if (ret_code)
++ return ret_code;
+
-+int hnsdv_query_device(struct ibv_context *context,
-+ struct hnsdv_context *attrs_out)
-+{
-+ struct hns_roce_device *hr_dev = to_hr_dev(context->device);
++ qp_tx_psn_add(&qp->next_psn,
++ split_part1_info.op.rdma_read.rem_addr.len,
++ qp->mtu);
++ ret_code = zxdh_post_rdma_read(qp, &split_part2_info, post_sq,
++ total_size);
++ if (ret_code)
++ return ret_code;
+
-+ if (!hr_dev || !attrs_out)
-+ return EINVAL;
++ qp_tx_psn_add(&qp->next_psn,
++ split_part2_info.op.rdma_read.rem_addr.len,
++ qp->mtu);
++ } else {
++ ret_code = zxdh_post_rdma_read(qp, info, post_sq, total_size);
++ if (ret_code)
++ return ret_code;
+
-+ if (!is_hns_dev(context->device)) {
-+ verbs_err(verbs_get_ctx(context), "not a HNS RoCE device!\n");
-+ return EOPNOTSUPP;
++ qp_tx_psn_add(&qp->next_psn, total_size, qp->mtu);
+ }
-+ memset(attrs_out, 0, sizeof(*attrs_out));
-+
-+ attrs_out->comp_mask |= HNSDV_CONTEXT_MASK_CONGEST_TYPE;
-+ attrs_out->congest_type = hr_dev->congest_cap;
-+
+ return 0;
- }
-
- struct ibv_qp *hns_roce_u_open_qp(struct ibv_context *context,
-@@ -1486,6 +1998,9 @@ struct ibv_ah *hns_roce_u_create_ah(struct ibv_pd *pd, struct ibv_ah_attr *attr)
- ah->av.mac, NULL))
- goto err;
-
-+ if (resp.tc_mode == HNS_ROCE_TC_MAP_MODE_DSCP)
-+ ah->av.sl = resp.priority;
++}
+
- ah->av.udp_sport = get_ah_udp_sport(attr);
-
- return &ah->ibv_ah;
-diff --git a/providers/hns/hnsdv.h b/providers/hns/hnsdv.h
-new file mode 100644
-index 0000000..68bf001
---- /dev/null
-+++ b/providers/hns/hnsdv.h
-@@ -0,0 +1,85 @@
-+/* SPDX-License-Identifier: GPL-2.0 OR BSD-2-Clause */
-+/*
-+ * Copyright (c) 2024 Hisilicon Limited.
++/**
++ * zxdh_rc_send - rdma send command
++ * @qp: hw qp ptr
++ * @info: post sq information
++ * @post_sq: flag to post sq
+ */
++enum zxdh_status_code zxdh_rc_send(struct zxdh_qp *qp,
++ struct zxdh_post_sq_info *info, bool post_sq)
++{
++ __le64 *wqe;
++ struct zxdh_post_send *op_info;
++ __u64 hdr;
++ __u32 i, wqe_idx, total_size = 0, byte_off;
++ enum zxdh_status_code ret_code;
++ __u32 frag_cnt, addl_frag_cnt;
++ bool read_fence = false;
++ __u16 quanta;
++ bool imm_data_flag = info->imm_data_valid ? 1 : 0;
+
-+#ifndef __HNSDV_H__
-+#define __HNSDV_H__
-+
-+#include
-+#include
-+#include
-+#include
++ op_info = &info->op.send;
++ if (qp->max_sq_frag_cnt < op_info->num_sges)
++ return ZXDH_ERR_INVALID_FRAG_COUNT;
+
-+#ifdef __cplusplus
-+extern "C" {
-+#endif
++ for (i = 0; i < op_info->num_sges; i++) {
++ total_size += op_info->sg_list[i].len;
++ if (0 != i && 0 == op_info->sg_list[i].len)
++ return ZXDH_ERR_INVALID_FRAG_LEN;
++ }
+
-+enum hnsdv_qp_congest_ctrl_type {
-+ HNSDV_QP_CREATE_ENABLE_DCQCN = 1 << 0,
-+ HNSDV_QP_CREATE_ENABLE_LDCP = 1 << 1,
-+ HNSDV_QP_CREATE_ENABLE_HC3 = 1 << 2,
-+ HNSDV_QP_CREATE_ENABLE_DIP = 1 << 3,
-+};
++ if (total_size > ZXDH_MAX_SQ_PAYLOAD_SIZE)
++ return ZXDH_ERR_QP_INVALID_MSG_SIZE;
+
-+enum hnsdv_qp_create_flags {
-+ HNSDV_QP_CREATE_ENABLE_DCA_MODE = 1 << 0,
-+};
++ if (imm_data_flag)
++ frag_cnt = op_info->num_sges ? (op_info->num_sges + 1) : 2;
++ else
++ frag_cnt = op_info->num_sges;
++ ret_code = zxdh_fragcnt_to_quanta_sq(frag_cnt, &quanta);
++ if (ret_code)
++ return ret_code;
+
-+enum hnsdv_context_comp_mask {
-+ HNSDV_CONTEXT_MASK_DCA_PRIME_QPS = 1 << 0,
-+ HNSDV_CONTEXT_MASK_DCA_UNIT_SIZE = 1 << 1,
-+ HNSDV_CONTEXT_MASK_DCA_MAX_SIZE = 1 << 2,
-+ HNSDV_CONTEXT_MASK_DCA_MIN_SIZE = 1 << 3,
-+};
++ wqe = zxdh_qp_get_next_send_wqe(qp, &wqe_idx, quanta, total_size, info);
++ if (!wqe)
++ return ZXDH_ERR_QP_TOOMANY_WRS_POSTED;
+
-+enum hnsdv_qp_init_attr_mask {
-+ HNSDV_QP_INIT_ATTR_MASK_QP_CREATE_FLAGS = 1 << 0,
-+ HNSDV_QP_INIT_ATTR_MASK_QP_CONGEST_TYPE = 1 << 1,
-+};
++ zxdh_clr_wqes(qp, wqe_idx);
+
-+struct hnsdv_context_attr {
-+ uint64_t flags; /* Use enum hnsdv_context_attr_flags */
-+ uint64_t comp_mask; /* Use enum hnsdv_context_comp_mask */
-+ uint32_t dca_prime_qps;
-+ uint32_t dca_unit_size;
-+ uint64_t dca_max_size;
-+ uint64_t dca_min_size;
-+};
++ read_fence |= info->read_fence;
++ addl_frag_cnt = op_info->num_sges > 1 ? (op_info->num_sges - 1) : 0;
++ if (op_info->num_sges) {
++ set_64bit_val(
++ wqe, 16,
++ FIELD_PREP(ZXDHQPSQ_FIRST_FRAG_VALID,
++ op_info->sg_list->len ==
++ ZXDH_MAX_SQ_PAYLOAD_SIZE ?
++ 1 :
++ 0) |
++ FIELD_PREP(ZXDHQPSQ_FIRST_FRAG_LEN,
++ op_info->sg_list->len) |
++ FIELD_PREP(ZXDHQPSQ_FIRST_FRAG_STAG,
++ op_info->sg_list->stag));
++ set_64bit_val(wqe, 8,
++ FIELD_PREP(ZXDHQPSQ_FRAG_TO,
++ op_info->sg_list->tag_off));
++ } else {
++ /*if zero sge,post a special sge with zero lenth*/
++ set_64bit_val(wqe, 16,
++ FIELD_PREP(ZXDHQPSQ_FIRST_FRAG_VALID, 0) |
++ FIELD_PREP(ZXDHQPSQ_FIRST_FRAG_LEN, 0) |
++ FIELD_PREP(ZXDHQPSQ_FIRST_FRAG_STAG,
++ 0x100));
++ set_64bit_val(wqe, 8, FIELD_PREP(ZXDHQPSQ_FRAG_TO, 0));
++ }
+
-+struct hnsdv_qp_init_attr {
-+ uint64_t comp_mask; /* Use enum hnsdv_qp_init_attr_mask */
-+ uint32_t create_flags; /* Use enum hnsdv_qp_create_flags */
-+ uint8_t congest_type; /* Use enum hnsdv_qp_congest_ctrl_type */
-+ uint8_t reserved[3];
-+};
++ if (imm_data_flag) {
++ byte_off = ZXDH_SQ_WQE_BYTESIZE + ZXDH_QP_FRAG_BYTESIZE;
++ if (op_info->num_sges > 1) {
++ qp->wqe_ops.iw_set_fragment(wqe, byte_off,
++ &op_info->sg_list[1],
++ qp->swqe_polarity);
++ byte_off += ZXDH_QP_FRAG_BYTESIZE;
++ }
++ set_64bit_val(
++ wqe, ZXDH_SQ_WQE_BYTESIZE,
++ FIELD_PREP(ZXDHQPSQ_IMMDATA_VALID, qp->swqe_polarity) |
++ FIELD_PREP(ZXDHQPSQ_IMMDATA, info->imm_data));
++ i = 2;
++ if (i < op_info->num_sges) {
++ for (byte_off = ZXDH_SQ_WQE_BYTESIZE +
++ 2 * ZXDH_QP_FRAG_BYTESIZE;
++ i < op_info->num_sges; i += 2) {
++ if (i == addl_frag_cnt) {
++ qp->wqe_ops.iw_set_fragment(
++ wqe, byte_off,
++ &op_info->sg_list[i],
++ qp->swqe_polarity);
++ byte_off += ZXDH_QP_FRAG_BYTESIZE;
++ break;
++ }
++ byte_off += ZXDH_QP_FRAG_BYTESIZE;
++ qp->wqe_ops.iw_set_fragment(
++ wqe, byte_off, &op_info->sg_list[i + 1],
++ qp->swqe_polarity);
++ byte_off -= ZXDH_QP_FRAG_BYTESIZE;
++ qp->wqe_ops.iw_set_fragment(
++ wqe, byte_off, &op_info->sg_list[i],
++ qp->swqe_polarity);
++ byte_off += 2 * ZXDH_QP_FRAG_BYTESIZE;
++ }
++ }
++ } else {
++ i = 1;
++ for (byte_off = ZXDH_SQ_WQE_BYTESIZE; i < op_info->num_sges;
++ i += 2) {
++ if (i == addl_frag_cnt) {
++ qp->wqe_ops.iw_set_fragment(
++ wqe, byte_off, &op_info->sg_list[i],
++ qp->swqe_polarity);
++ byte_off += ZXDH_QP_FRAG_BYTESIZE;
++ break;
++ }
++ byte_off += ZXDH_QP_FRAG_BYTESIZE;
++ qp->wqe_ops.iw_set_fragment(wqe, byte_off,
++ &op_info->sg_list[i + 1],
++ qp->swqe_polarity);
++ byte_off -= ZXDH_QP_FRAG_BYTESIZE;
++ qp->wqe_ops.iw_set_fragment(wqe, byte_off,
++ &op_info->sg_list[i],
++ qp->swqe_polarity);
++ byte_off += 2 * ZXDH_QP_FRAG_BYTESIZE;
++ }
++ }
+
-+enum hnsdv_context_attr_flags {
-+ HNSDV_CONTEXT_FLAGS_DCA = 1 << 0,
-+};
++ /* if not an odd number set valid bit in next fragment */
++ if (!(frag_cnt & 0x01) && frag_cnt) {
++ qp->wqe_ops.iw_set_fragment(wqe, byte_off, NULL,
++ qp->swqe_polarity);
++ }
+
-+enum hnsdv_query_context_comp_mask {
-+ HNSDV_CONTEXT_MASK_CONGEST_TYPE = 1 << 0,
-+};
++ hdr = FIELD_PREP(ZXDHQPSQ_VALID, qp->swqe_polarity) |
++ FIELD_PREP(ZXDHQPSQ_OPCODE, info->op_type) |
++ FIELD_PREP(ZXDHQPSQ_SIGCOMPL, info->signaled) |
++ FIELD_PREP(ZXDHQPSQ_LOCALFENCE, info->local_fence) |
++ FIELD_PREP(ZXDHQPSQ_READFENCE, read_fence) |
++ FIELD_PREP(ZXDHQPSQ_SOLICITED, info->solicited) |
++ FIELD_PREP(ZXDHQPSQ_IMMDATAFLAG, imm_data_flag) |
++ FIELD_PREP(ZXDHQPSQ_ADDFRAGCNT, addl_frag_cnt) |
++ FIELD_PREP(ZXDHQPSQ_REMSTAG, info->stag_to_inv);
++ set_64bit_val(wqe, 24,
++ FIELD_PREP(ZXDHQPSQ_INLINEDATAFLAG, 0) |
++ FIELD_PREP(ZXDHQPSQ_INLINEDATALEN, 0));
+
-+struct hnsdv_context {
-+ uint64_t comp_mask; /* Use enum hnsdv_query_context_comp_mask */
-+ uint64_t flags;
-+ uint8_t congest_type; /* Use enum hnsdv_qp_congest_ctrl_type */
-+ uint8_t reserved[7];
-+};
++ udma_to_device_barrier(); /* make sure WQE is populated before valid bit is set */
+
-+bool hnsdv_is_supported(struct ibv_device *device);
-+int hnsdv_query_device(struct ibv_context *ctx_in,
-+ struct hnsdv_context *attrs_out);
-+struct ibv_qp *hnsdv_create_qp(struct ibv_context *context,
-+ struct ibv_qp_init_attr_ex *qp_attr,
-+ struct hnsdv_qp_init_attr *hns_qp_attr);
-+struct ibv_context *hnsdv_open_device(struct ibv_device *device,
-+ struct hnsdv_context_attr *attr);
++ set_64bit_val(wqe, 0, hdr);
++ if (post_sq)
++ zxdh_qp_post_wr(qp);
++ qp_tx_psn_add(&qp->next_psn, total_size, qp->mtu);
+
-+#ifdef __cplusplus
++ return 0;
+}
-+#endif
+
-+#endif /* __HNSDV_H__ */
-diff --git a/providers/hns/libhns.map b/providers/hns/libhns.map
-new file mode 100644
-index 0000000..a955346
---- /dev/null
-+++ b/providers/hns/libhns.map
-@@ -0,0 +1,10 @@
-+/* Export symbols should be added below according to
-+ Documentation/versioning.md document. */
-+HNS_1.0 {
-+ global:
-+ hnsdv_is_supported;
-+ hnsdv_create_qp;
-+ hnsdv_query_device;
-+ hnsdv_open_device;
-+ local: *;
-+};
-diff --git a/providers/zrdma/CMakeLists.txt b/providers/zrdma/CMakeLists.txt
-new file mode 100644
-index 0000000..7706b9a
---- /dev/null
-+++ b/providers/zrdma/CMakeLists.txt
-@@ -0,0 +1,18 @@
-+# SPDX-License-Identifier: (GPL-2.0 OR Linux-OpenIB)
-+# Copyright (c) 2019, Intel Corporation.
-+set(CMAKE_C_FLAGS "${CMAKE_CXX_FLAGS} -Werror")
++/**
++ * zxdh_ud_send - rdma send command
++ * @qp: hw qp ptr
++ * @info: post sq information
++ * @post_sq: flag to post sq
++ */
++enum zxdh_status_code zxdh_ud_send(struct zxdh_qp *qp,
++ struct zxdh_post_sq_info *info, bool post_sq)
++{
++ __le64 *wqe_base;
++ __le64 *wqe_ex = NULL;
++ struct zxdh_post_send *op_info;
++ __u64 hdr;
++ __u32 i, wqe_idx, total_size = 0, byte_off;
++ enum zxdh_status_code ret_code;
++ __u32 frag_cnt, addl_frag_cnt;
++ bool read_fence = false;
++ __u16 quanta;
++ bool imm_data_flag = info->imm_data_valid ? 1 : 0;
+
-+rdma_shared_provider(zrdma libzrdma.map
-+ 1 1.1.${PACKAGE_VERSION}
-+ uk.c
-+ umain.c
-+ uverbs.c
-+ private_verbs_cmd.c
-+)
++ op_info = &info->op.send;
++ if (qp->max_sq_frag_cnt < op_info->num_sges)
++ return ZXDH_ERR_INVALID_FRAG_COUNT;
+
-+publish_headers(infiniband
-+ zxdh_dv.h
-+)
++ for (i = 0; i < op_info->num_sges; i++) {
++ total_size += op_info->sg_list[i].len;
++ if (0 != i && 0 == op_info->sg_list[i].len)
++ return ZXDH_ERR_INVALID_FRAG_LEN;
++ }
+
++ if (total_size > ZXDH_MAX_SQ_PAYLOAD_SIZE)
++ return ZXDH_ERR_QP_INVALID_MSG_SIZE;
+
-+rdma_pkg_config("zrdma" "libibverbs" "${CMAKE_THREAD_LIBS_INIT}")
-diff --git a/providers/zrdma/abi.h b/providers/zrdma/abi.h
-new file mode 100644
-index 0000000..0fe8547
---- /dev/null
-+++ b/providers/zrdma/abi.h
-@@ -0,0 +1,36 @@
-+/* SPDX-License-Identifier: GPL-2.0 or Linux-OpenIB */
-+/* Copyright (c) 2024 ZTE Corporation. All rights reserved. */
-+#ifndef PROVIDER_ZXDH_ABI_H
-+#define PROVIDER_ZXDH_ABI_H
-+
-+#include "zxdh.h"
-+#include
-+#include
-+#include
++ if (imm_data_flag)
++ frag_cnt = op_info->num_sges ? (op_info->num_sges + 1) : 2;
++ else
++ frag_cnt = op_info->num_sges;
++ ret_code = zxdh_fragcnt_to_quanta_sq(frag_cnt, &quanta);
++ if (ret_code)
++ return ret_code;
+
-+#define ZXDH_MIN_ABI_VERSION 0
-+#define ZXDH_MAX_ABI_VERSION 5
++ if (quanta > ZXDH_SQ_RING_FREE_QUANTA(qp->sq_ring))
++ return ZXDH_ERR_QP_TOOMANY_WRS_POSTED;
+
-+DECLARE_DRV_CMD(zxdh_ualloc_pd, IB_USER_VERBS_CMD_ALLOC_PD, empty,
-+ zxdh_alloc_pd_resp);
-+DECLARE_DRV_CMD(zxdh_ucreate_cq, IB_USER_VERBS_CMD_CREATE_CQ,
-+ zxdh_create_cq_req, zxdh_create_cq_resp);
-+DECLARE_DRV_CMD(zxdh_ucreate_cq_ex, IB_USER_VERBS_EX_CMD_CREATE_CQ,
-+ zxdh_create_cq_req, zxdh_create_cq_resp);
-+DECLARE_DRV_CMD(zxdh_uresize_cq, IB_USER_VERBS_CMD_RESIZE_CQ,
-+ zxdh_resize_cq_req, empty);
-+DECLARE_DRV_CMD(zxdh_ucreate_qp, IB_USER_VERBS_CMD_CREATE_QP,
-+ zxdh_create_qp_req, zxdh_create_qp_resp);
-+DECLARE_DRV_CMD(zxdh_umodify_qp, IB_USER_VERBS_EX_CMD_MODIFY_QP,
-+ zxdh_modify_qp_req, zxdh_modify_qp_resp);
-+DECLARE_DRV_CMD(zxdh_get_context, IB_USER_VERBS_CMD_GET_CONTEXT,
-+ zxdh_alloc_ucontext_req, zxdh_alloc_ucontext_resp);
-+DECLARE_DRV_CMD(zxdh_ureg_mr, IB_USER_VERBS_CMD_REG_MR, zxdh_mem_reg_req,
-+ zxdh_reg_mr_resp);
-+DECLARE_DRV_CMD(zxdh_urereg_mr, IB_USER_VERBS_CMD_REREG_MR, zxdh_mem_reg_req,
-+ empty);
-+DECLARE_DRV_CMD(zxdh_ucreate_ah, IB_USER_VERBS_CMD_CREATE_AH, empty,
-+ zxdh_create_ah_resp);
-+DECLARE_DRV_CMD(zxdh_ucreate_srq, IB_USER_VERBS_CMD_CREATE_SRQ,
-+ zxdh_create_srq_req, zxdh_create_srq_resp);
-+#endif /* PROVIDER_ZXDH_ABI_H */
-diff --git a/providers/zrdma/defs.h b/providers/zrdma/defs.h
-new file mode 100644
-index 0000000..24b6c56
---- /dev/null
-+++ b/providers/zrdma/defs.h
-@@ -0,0 +1,388 @@
-+/* SPDX-License-Identifier: GPL-2.0 or Linux-OpenIB */
-+/* Copyright (c) 2024 ZTE Corporation. All rights reserved. */
-+#ifndef ZXDH_DEFS_H
-+#define ZXDH_DEFS_H
++ wqe_idx = ZXDH_RING_CURRENT_HEAD(qp->sq_ring);
++ if (!wqe_idx)
++ qp->swqe_polarity = !qp->swqe_polarity;
+
-+#include "osdep.h"
++ ZXDH_RING_MOVE_HEAD_BY_COUNT_NOCHECK(qp->sq_ring, quanta);
+
-+#define ZXDH_RECV_ERR_FLAG_NAK_RNR_NAK 1
-+#define ZXDH_RECV_ERR_FLAG_READ_RESP 2
-+#define ZXDH_RETRY_CQE_SQ_OPCODE_ERR 32
-+#define ZXDH_QP_RETRY_COUNT 2
-+#define ZXDH_RESET_RETRY_CQE_SQ_OPCODE_ERR 0x1f
++ wqe_base = qp->sq_base[wqe_idx].elem;
++ qp->sq_wrtrk_array[wqe_idx].wrid = info->wr_id;
++ qp->sq_wrtrk_array[wqe_idx].wr_len = total_size;
++ qp->sq_wrtrk_array[wqe_idx].quanta = quanta;
+
-+#define ZXDH_QP_TYPE_ROCE_RC 1
-+#define ZXDH_QP_TYPE_ROCE_UD 2
++ zxdh_clr_wqes(qp, wqe_idx);
+
-+#define ZXDH_HW_PAGE_SIZE 4096
-+#define ZXDH_HW_PAGE_SHIFT 12
-+#define ZXDH_CQE_QTYPE_RQ 0
-+#define ZXDH_CQE_QTYPE_SQ 1
++ read_fence |= info->read_fence;
++ addl_frag_cnt = op_info->num_sges > 1 ? (op_info->num_sges - 1) : 0;
++ hdr = FIELD_PREP(ZXDHQPSQ_VALID, qp->swqe_polarity) |
++ FIELD_PREP(ZXDHQPSQ_OPCODE, info->op_type) |
++ FIELD_PREP(ZXDHQPSQ_SIGCOMPL, info->signaled) |
++ FIELD_PREP(ZXDHQPSQ_SOLICITED, info->solicited) |
++ FIELD_PREP(ZXDHQPSQ_IMMDATAFLAG, imm_data_flag) |
++ FIELD_PREP(ZXDHQPSQ_UD_INLINEDATAFLAG, 0) |
++ FIELD_PREP(ZXDHQPSQ_UD_INLINEDATALEN, 0) |
++ FIELD_PREP(ZXDHQPSQ_UD_ADDFRAGCNT, addl_frag_cnt) |
++ FIELD_PREP(ZXDHQPSQ_AHID, op_info->ah_id);
+
-+#define ZXDH_MAX_SQ_WQES_PER_PAGE 128
-+#define ZXDH_MAX_SQ_DEPTH 32768
++ if (op_info->num_sges) {
++ set_64bit_val(
++ wqe_base, 16,
++ FIELD_PREP(ZXDHQPSQ_FIRST_FRAG_VALID,
++ op_info->sg_list->len ==
++ ZXDH_MAX_SQ_PAYLOAD_SIZE ?
++ 1 :
++ 0) |
++ FIELD_PREP(ZXDHQPSQ_FIRST_FRAG_LEN,
++ op_info->sg_list->len) |
++ FIELD_PREP(ZXDHQPSQ_FIRST_FRAG_STAG,
++ op_info->sg_list->stag));
++ set_64bit_val(wqe_base, 8,
++ FIELD_PREP(ZXDHQPSQ_FRAG_TO,
++ op_info->sg_list->tag_off));
++ } else {
++ /*if zero sge,post a special sge with zero lenth*/
++ set_64bit_val(wqe_base, 16,
++ FIELD_PREP(ZXDHQPSQ_FIRST_FRAG_VALID, 0) |
++ FIELD_PREP(ZXDHQPSQ_FIRST_FRAG_LEN, 0) |
++ FIELD_PREP(ZXDHQPSQ_FIRST_FRAG_STAG,
++ 0x100));
++ set_64bit_val(wqe_base, 8, FIELD_PREP(ZXDHQPSQ_FRAG_TO, 0));
++ }
+
-+#define ZXDH_QP_SW_MIN_WQSIZE 64u /* in WRs*/
-+#define ZXDH_QP_WQE_MIN_SIZE 32
-+#define ZXDH_QP_SQE_MIN_SIZE 32
-+#define ZXDH_QP_RQE_MIN_SIZE 16
-+#define ZXDH_QP_WQE_MAX_SIZE 256
-+#define ZXDH_QP_WQE_MIN_QUANTA 1
-+#define ZXDH_MAX_RQ_WQE_SHIFT_GEN1 2
-+#define ZXDH_MAX_RQ_WQE_SHIFT_GEN2 3
-+#define ZXDH_SRQ_FRAG_BYTESIZE 16
-+#define ZXDH_QP_FRAG_BYTESIZE 16
-+#define ZXDH_SQ_WQE_BYTESIZE 32
-+#define ZXDH_SRQ_WQE_MIN_SIZE 16
++ if (imm_data_flag) {
++ wqe_idx = (wqe_idx + 1) % qp->sq_ring.size;
++ if (!wqe_idx)
++ qp->swqe_polarity = !qp->swqe_polarity;
++ wqe_ex = qp->sq_base[wqe_idx].elem;
++ if (op_info->num_sges > 1) {
++ qp->wqe_ops.iw_set_fragment(wqe_ex,
++ ZXDH_QP_FRAG_BYTESIZE,
++ &op_info->sg_list[1],
++ qp->swqe_polarity);
++ }
++ set_64bit_val(
++ wqe_ex, 0,
++ FIELD_PREP(ZXDHQPSQ_IMMDATA_VALID, qp->swqe_polarity) |
++ FIELD_PREP(ZXDHQPSQ_IMMDATA, info->imm_data));
++ i = 2;
++ for (byte_off = ZXDH_QP_FRAG_BYTESIZE; i < op_info->num_sges;
++ i += 2) {
++ if (!(i & 0x1)) {
++ wqe_idx = (wqe_idx + 1) % qp->sq_ring.size;
++ if (!wqe_idx)
++ qp->swqe_polarity = !qp->swqe_polarity;
++ wqe_ex = qp->sq_base[wqe_idx].elem;
++ }
++ if (i == addl_frag_cnt) {
++ qp->wqe_ops.iw_set_fragment(
++ wqe_ex, 0, &op_info->sg_list[i],
++ qp->swqe_polarity);
++ break;
++ }
++ qp->wqe_ops.iw_set_fragment(
++ wqe_ex, byte_off % ZXDH_SQ_WQE_BYTESIZE,
++ &op_info->sg_list[i + 1], qp->swqe_polarity);
++ byte_off -= ZXDH_QP_FRAG_BYTESIZE;
++ qp->wqe_ops.iw_set_fragment(
++ wqe_ex, byte_off % ZXDH_SQ_WQE_BYTESIZE,
++ &op_info->sg_list[i], qp->swqe_polarity);
++ byte_off += ZXDH_QP_FRAG_BYTESIZE;
++ }
++ } else {
++ i = 1;
++ for (byte_off = 0; i < op_info->num_sges; i += 2) {
++ if (i & 0x1) {
++ wqe_idx = (wqe_idx + 1) % qp->sq_ring.size;
++ if (!wqe_idx)
++ qp->swqe_polarity = !qp->swqe_polarity;
++ wqe_ex = qp->sq_base[wqe_idx].elem;
++ }
++ if (i == addl_frag_cnt) {
++ qp->wqe_ops.iw_set_fragment(
++ wqe_ex, 0, &op_info->sg_list[i],
++ qp->swqe_polarity);
++ break;
++ }
++ byte_off += ZXDH_QP_FRAG_BYTESIZE;
++ qp->wqe_ops.iw_set_fragment(
++ wqe_ex, byte_off % ZXDH_SQ_WQE_BYTESIZE,
++ &op_info->sg_list[i + 1], qp->swqe_polarity);
++ byte_off -= ZXDH_QP_FRAG_BYTESIZE;
++ qp->wqe_ops.iw_set_fragment(
++ wqe_ex, byte_off % ZXDH_SQ_WQE_BYTESIZE,
++ &op_info->sg_list[i], qp->swqe_polarity);
++ }
++ }
+
-+#define ZXDH_SQ_RSVD 258
-+#define ZXDH_RQ_RSVD 1
-+#define ZXDH_SRQ_RSVD 1
++ /* if not an odd number set valid bit in next fragment */
++ if (!(frag_cnt & 0x01) && frag_cnt && wqe_ex) {
++ qp->wqe_ops.iw_set_fragment(wqe_ex, ZXDH_QP_FRAG_BYTESIZE, NULL,
++ qp->swqe_polarity);
++ }
+
-+#define ZXDH_FEATURE_RTS_AE 1ULL
-+#define ZXDH_FEATURE_CQ_RESIZE 2ULL
-+#define ZXDHQP_OP_RDMA_WRITE 0x00
-+#define ZXDHQP_OP_RDMA_READ 0x01
-+#define ZXDHQP_OP_RDMA_SEND 0x03
-+#define ZXDHQP_OP_RDMA_SEND_INV 0x04
-+#define ZXDHQP_OP_RDMA_SEND_SOL_EVENT 0x05
-+#define ZXDHQP_OP_RDMA_SEND_SOL_EVENT_INV 0x06
-+#define ZXDHQP_OP_BIND_MW 0x08
-+#define ZXDHQP_OP_FAST_REGISTER 0x09
-+#define ZXDHQP_OP_LOCAL_INVALIDATE 0x0a
-+#define ZXDHQP_OP_RDMA_READ_LOC_INV 0x0b
-+#define ZXDHQP_OP_NOP 0x0c
++ set_64bit_val(wqe_base, 24,
++ FIELD_PREP(ZXDHQPSQ_DESTQPN, op_info->dest_qp) |
++ FIELD_PREP(ZXDHQPSQ_DESTQKEY, op_info->qkey));
+
-+#define ZXDH_CQPHC_QPCTX GENMASK_ULL(63, 0)
-+#define ZXDH_QP_DBSA_HW_SQ_TAIL GENMASK_ULL(14, 0)
-+#define ZXDH_CQ_DBSA_CQEIDX GENMASK_ULL(22, 0)
-+#define ZXDH_CQ_DBSA_SW_CQ_SELECT GENMASK_ULL(28, 23)
-+#define ZXDH_CQ_DBSA_ARM_NEXT BIT_ULL(31)
-+// #define ZXDH_CQ_DBSA_ARM_NEXT_SE BIT_ULL(15)
-+#define ZXDH_CQ_DBSA_ARM_SEQ_NUM GENMASK_ULL(30, 29)
-+#define ZXDH_CQ_ARM_CQ_ID_S 10
-+#define ZXDH_CQ_ARM_CQ_ID GENMASK_ULL(29, 10)
-+#define ZXDH_CQ_ARM_DBSA_VLD_S 30
-+#define ZXDH_CQ_ARM_DBSA_VLD BIT_ULL(30)
++ udma_to_device_barrier(); /* make sure WQE is populated before valid bit is set */
+
-+/* CQP and iWARP Completion Queue */
-+#define ZXDH_CQ_QPCTX ZXDH_CQPHC_QPCTX
++ set_64bit_val(wqe_base, 0, hdr);
++ if (post_sq)
++ zxdh_qp_post_wr(qp);
+
-+#define ZXDH_CQ_MINERR GENMASK_ULL(22, 7)
-+#define ZXDH_CQ_MAJERR GENMASK_ULL(38, 23)
-+#define ZXDH_CQ_WQEIDX GENMASK_ULL(54, 40)
-+#define ZXDH_CQ_EXTCQE BIT_ULL(50)
-+#define ZXDH_OOO_CMPL BIT_ULL(54)
-+#define ZXDH_CQ_ERROR BIT_ULL(39)
-+#define ZXDH_CQ_SQ BIT_ULL(4)
++ return 0;
++}
+
-+#define ZXDH_CQ_VALID BIT_ULL(5)
-+#define ZXDH_CQ_IMMVALID BIT_ULL(0)
-+#define ZXDH_CQ_UDSMACVALID BIT_ULL(26)
-+#define ZXDH_CQ_UDVLANVALID BIT_ULL(27)
-+#define ZXDH_CQ_IMMDATA GENMASK_ULL(31, 0)
-+#define ZXDH_CQ_UDSMAC GENMASK_ULL(47, 0)
-+#define ZXDH_CQ_UDVLAN GENMASK_ULL(63, 48)
++/**
++ * zxdh_set_mw_bind_wqe - set mw bind in wqe
++ * @wqe: wqe for setting mw bind
++ * @op_info: info for setting wqe values
++ */
++static void zxdh_set_mw_bind_wqe(__le64 *wqe, struct zxdh_bind_window *op_info)
++{
++ __u32 value = 0;
++ __u8 leaf_pbl_size = op_info->leaf_pbl_size;
+
-+#define ZXDH_CQ_IMMDATA_S 0
-+#define ZXDH_CQ_IMMDATA_M (0xffffffffffffffffULL << ZXDH_CQ_IMMVALID_S)
-+#define ZXDH_CQ_IMMDATALOW32 GENMASK_ULL(31, 0)
-+#define ZXDH_CQ_IMMDATAUP32 GENMASK_ULL(63, 32)
-+#define ZXDHCQ_PAYLDLEN GENMASK_ULL(63, 32)
-+#define ZXDHCQ_TCPSEQNUMRTT GENMASK_ULL(63, 32)
-+#define ZXDHCQ_INVSTAG_S 11
-+#define ZXDHCQ_INVSTAG GENMASK_ULL(42, 11)
-+#define ZXDHCQ_QPID GENMASK_ULL(63, 44)
++ set_64bit_val(wqe, 8, (uintptr_t)op_info->va);
+
-+#define ZXDHCQ_UDSRCQPN GENMASK_ULL(24, 1)
-+#define ZXDHCQ_PSHDROP BIT_ULL(51)
-+#define ZXDHCQ_STAG_S 43
-+#define ZXDHCQ_STAG BIT_ULL(43)
-+#define ZXDHCQ_IPV4 BIT_ULL(25)
-+#define ZXDHCQ_SOEVENT BIT_ULL(6)
-+#define ZXDHCQ_OP GENMASK_ULL(63, 58)
++ if (leaf_pbl_size == 0) {
++ value = (__u32)(op_info->mw_pa_pble_index >> 12);
++ value = (value & 0x03FFFFFFFC0000) >> 18;
++ set_64bit_val(
++ wqe, 16,
++ FIELD_PREP(ZXDHQPSQ_PARENTMRSTAG, op_info->mr_stag) |
++ FIELD_PREP(ZXDHQPSQ_MW_PA_PBLE_TWO, value));
++ } else if (leaf_pbl_size == 1) {
++ value = (__u32)((op_info->mw_pa_pble_index & 0x0FFC0000) >> 18);
++ set_64bit_val(
++ wqe, 16,
++ FIELD_PREP(ZXDHQPSQ_PARENTMRSTAG, op_info->mr_stag) |
++ FIELD_PREP(ZXDHQPSQ_MW_PA_PBLE_TWO, value));
++ } else {
++ value = (__u32)((op_info->mw_pa_pble_index & 0x0FFC0000) >> 18);
++ set_64bit_val(
++ wqe, 16,
++ FIELD_PREP(ZXDHQPSQ_PARENTMRSTAG, op_info->mr_stag) |
++ FIELD_PREP(ZXDHQPSQ_MW_LEVLE2_FIRST_PBLE_INDEX,
++ value) |
++ FIELD_PREP(ZXDHQPSQ_MW_LEVLE2_ROOT_PBLE_INDEX,
++ op_info->root_leaf_offset));
++ }
+
-+/* Manage Push Page - MPP */
-+#define ZXDH_INVALID_PUSH_PAGE_INDEX_GEN_1 0xffff
-+#define ZXDH_INVALID_PUSH_PAGE_INDEX 0xffffffff
++ if (leaf_pbl_size == 0) {
++ value = (__u32)(op_info->mw_pa_pble_index >> 12);
++ value = value & 0x3FFFF;
++ } else {
++ value = (__u32)(op_info->mw_pa_pble_index & 0x3FFFF);
++ }
+
-+#define ZXDHQPSQ_OPCODE GENMASK_ULL(62, 57)
-+#define ZXDHQPSQ_COPY_HOST_PBL BIT_ULL(43)
-+#define ZXDHQPSQ_ADDFRAGCNT GENMASK_ULL(39, 32)
-+#define ZXDHQPSQ_PUSHWQE BIT_ULL(56)
-+#define ZXDHQPSQ_STREAMMODE BIT_ULL(58)
-+#define ZXDHQPSQ_WAITFORRCVPDU BIT_ULL(59)
-+#define ZXDHQPSQ_READFENCE BIT_ULL(54)
-+#define ZXDHQPSQ_LOCALFENCE BIT_ULL(55)
-+#define ZXDHQPSQ_UDPHEADER BIT_ULL(61)
-+#define ZXDHQPSQ_L4LEN GENMASK_ULL(45, 42)
-+#define ZXDHQPSQ_SIGCOMPL BIT_ULL(56)
-+#define ZXDHQPSQ_SOLICITED BIT_ULL(53)
-+#define ZXDHQPSQ_VALID BIT_ULL(63)
++ set_64bit_val(wqe, 24,
++ op_info->bind_len |
++ FIELD_PREP(ZXDHQPSQ_MW_PA_PBLE_ONE, value));
++}
+
-+#define ZXDHQPSQ_FIRST_FRAG_VALID BIT_ULL(0)
-+#define ZXDHQPSQ_FIRST_FRAG_LEN GENMASK_ULL(31, 1)
-+#define ZXDHQPSQ_FIRST_FRAG_STAG GENMASK_ULL(63, 32)
-+#define ZXDHQPSQ_FRAG_TO ZXDH_CQPHC_QPCTX
-+#define ZXDHQPSQ_FRAG_VALID BIT_ULL(63)
-+#define ZXDHQPSQ_FRAG_LEN GENMASK_ULL(62, 32)
-+#define ZXDHQPSQ_FRAG_STAG GENMASK_ULL(31, 0)
-+#define ZXDHQPSQ_GEN1_FRAG_LEN GENMASK_ULL(31, 0)
-+#define ZXDHQPSQ_GEN1_FRAG_STAG GENMASK_ULL(63, 32)
-+#define ZXDHQPSQ_REMSTAGINV GENMASK_ULL(31, 0)
-+#define ZXDHQPSQ_DESTQKEY GENMASK_ULL(31, 0)
-+#define ZXDHQPSQ_DESTQPN GENMASK_ULL(55, 32)
-+#define ZXDHQPSQ_AHID GENMASK_ULL(18, 0)
-+#define ZXDHQPSQ_INLINEDATAFLAG BIT_ULL(63)
-+#define ZXDHQPSQ_UD_INLINEDATAFLAG BIT_ULL(50)
-+#define ZXDHQPSQ_UD_INLINEDATALEN GENMASK_ULL(49, 42)
-+#define ZXDHQPSQ_UD_ADDFRAGCNT GENMASK_ULL(36, 29)
-+#define ZXDHQPSQ_WRITE_INLINEDATAFLAG BIT_ULL(48)
-+#define ZXDHQPSQ_WRITE_INLINEDATALEN GENMASK_ULL(47, 40)
++/**
++ * zxdh_copy_inline_data - Copy inline data to wqe
++ * @dest: pointer to wqe
++ * @src: pointer to inline data
++ * @len: length of inline data to copy
++ * @polarity: polarity of wqe valid bit
++ */
++static void zxdh_copy_inline_data(__u8 *dest, __u8 *src, __u32 len,
++ __u8 polarity, bool imm_data_flag)
++{
++ __u8 inline_valid = polarity << ZXDH_INLINE_VALID_S;
++ __u32 copy_size;
++ __u8 *inline_valid_addr;
+
-+#define ZXDH_INLINE_VALID_S 7
-+#define ZXDHQPSQ_INLINE_VALID BIT_ULL(63)
-+#define ZXDHQPSQ_INLINEDATALEN GENMASK_ULL(62, 55)
-+#define ZXDHQPSQ_IMMDATAFLAG BIT_ULL(52)
-+#define ZXDHQPSQ_REPORTRTT BIT_ULL(46)
++ dest += ZXDH_WQE_SIZE_32; /* point to additional 32 byte quanta */
++ if (len) {
++ inline_valid_addr = dest + WQE_OFFSET_7BYTES;
++ if (imm_data_flag) {
++ copy_size = len < INLINE_DATASIZE_24BYTES ?
++ len :
++ INLINE_DATASIZE_24BYTES;
++ dest += WQE_OFFSET_8BYTES;
++ memcpy(dest, src, copy_size);
++ len -= copy_size;
++ dest += WQE_OFFSET_24BYTES;
++ src += copy_size;
++ } else {
++ if (len <= INLINE_DATASIZE_7BYTES) {
++ copy_size = len;
++ memcpy(dest, src, copy_size);
++ *inline_valid_addr = inline_valid;
++ return;
++ }
++ memcpy(dest, src, INLINE_DATASIZE_7BYTES);
++ len -= INLINE_DATASIZE_7BYTES;
++ dest += WQE_OFFSET_8BYTES;
++ src += INLINE_DATA_OFFSET_7BYTES;
++ copy_size = len < INLINE_DATASIZE_24BYTES ?
++ len :
++ INLINE_DATASIZE_24BYTES;
++ memcpy(dest, src, copy_size);
++ len -= copy_size;
++ dest += WQE_OFFSET_24BYTES;
++ src += copy_size;
++ }
++ *inline_valid_addr = inline_valid;
++ }
+
-+#define ZXDHQPSQ_IMMDATA GENMASK_ULL(31, 0)
-+#define ZXDHQPSQ_REMSTAG_S 0
-+#define ZXDHQPSQ_REMSTAG GENMASK_ULL(31, 0)
++ while (len) {
++ inline_valid_addr = dest + WQE_OFFSET_7BYTES;
++ if (len <= INLINE_DATASIZE_7BYTES) {
++ copy_size = len;
++ memcpy(dest, src, copy_size);
++ *inline_valid_addr = inline_valid;
++ return;
++ }
++ memcpy(dest, src, INLINE_DATASIZE_7BYTES);
++ len -= INLINE_DATASIZE_7BYTES;
++ dest += WQE_OFFSET_8BYTES;
++ src += INLINE_DATA_OFFSET_7BYTES;
++ copy_size = len < INLINE_DATASIZE_24BYTES ?
++ len :
++ INLINE_DATASIZE_24BYTES;
++ memcpy(dest, src, copy_size);
++ len -= copy_size;
++ dest += WQE_OFFSET_24BYTES;
++ src += copy_size;
+
-+#define ZXDHQPSQ_REMTO ZXDH_CQPHC_QPCTX
++ *inline_valid_addr = inline_valid;
++ }
++}
+
-+#define ZXDHQPSQ_IMMDATA_VALID BIT_ULL(63)
-+#define ZXDHQPSQ_STAGRIGHTS GENMASK_ULL(50, 46)
-+#define ZXDHQPSQ_VABASEDTO BIT_ULL(51)
-+#define ZXDHQPSQ_MEMWINDOWTYPE BIT_ULL(52)
++/**
++ * zxdh_inline_data_size_to_quanta - based on inline data, quanta
++ * @data_size: data size for inline
++ * @imm_data_flag: flag for immediate data
++ *
++ * Gets the quanta based on inline and immediate data.
++ */
++static __u16 zxdh_inline_data_size_to_quanta(__u32 data_size,
++ bool imm_data_flag)
++{
++ if (imm_data_flag)
++ data_size += INLINE_DATASIZE_7BYTES;
+
-+#define ZXDHQPSQ_MWLEN ZXDH_CQPHC_QPCTX
-+#define ZXDHQPSQ_PARENTMRSTAG GENMASK_ULL(31, 0)
-+#define ZXDHQPSQ_MWSTAG GENMASK_ULL(31, 0)
-+#define ZXDHQPSQ_MW_PA_PBLE_ONE GENMASK_ULL(63, 46)
-+#define ZXDHQPSQ_MW_PA_PBLE_TWO GENMASK_ULL(63, 32)
-+#define ZXDHQPSQ_MW_PA_PBLE_THREE GENMASK_ULL(33, 32)
-+#define ZXDHQPSQ_MW_HOST_PAGE_SIZE GENMASK_ULL(40, 36)
-+#define ZXDHQPSQ_MW_LEAF_PBL_SIZE GENMASK_ULL(35, 34)
-+#define ZXDHQPSQ_MW_LEVLE2_FIRST_PBLE_INDEX GENMASK_ULL(41, 32)
-+#define ZXDHQPSQ_MW_LEVLE2_ROOT_PBLE_INDEX GENMASK_ULL(50, 42)
++ return data_size % 31 ? data_size / 31 + 2 : data_size / 31 + 1;
++}
+
-+#define ZXDHQPSQ_BASEVA_TO_FBO ZXDH_CQPHC_QPCTX
++/**
++ * zxdh_inline_rdma_write - inline rdma write operation
++ * @qp: hw qp ptr
++ * @info: post sq information
++ * @post_sq: flag to post sq
++ */
++enum zxdh_status_code zxdh_inline_rdma_write(struct zxdh_qp *qp,
++ struct zxdh_post_sq_info *info,
++ bool post_sq)
++{
++ __le64 *wqe;
++ __u8 imm_valid;
++ struct zxdh_inline_rdma_write *op_info;
++ __u64 hdr = 0;
++ __u32 wqe_idx;
++ bool read_fence = false;
++ __u16 quanta;
++ bool imm_data_flag = info->imm_data_valid ? 1 : 0;
+
-+#define ZXDHQPSQ_LOCSTAG GENMASK_ULL(31, 0)
++ op_info = &info->op.inline_rdma_write;
+
-+#define ZXDHQPSRQ_RSV GENMASK_ULL(63, 40)
-+#define ZXDHQPSRQ_VALID_SGE_NUM GENMASK_ULL(39, 32)
-+#define ZXDHQPSRQ_SIGNATURE GENMASK_ULL(31, 24)
-+#define ZXDHQPSRQ_NEXT_WQE_INDEX GENMASK_ULL(15, 0)
-+#define ZXDHQPSRQ_START_PADDING BIT_ULL(63)
-+#define ZXDHQPSRQ_FRAG_LEN GENMASK_ULL(62, 32)
-+#define ZXDHQPSRQ_FRAG_STAG GENMASK_ULL(31, 0)
++ if (op_info->len > qp->max_inline_data)
++ return ZXDH_ERR_INVALID_INLINE_DATA_SIZE;
++ if (imm_data_flag && op_info->len > ZXDH_MAX_SQ_INLINE_DATELEN_WITH_IMM)
++ return ZXDH_ERR_INVALID_INLINE_DATA_SIZE;
+
-+/* QP RQ WQE common fields */
-+#define ZXDHQPRQ_SIGNATURE GENMASK_ULL(31, 16)
-+#define ZXDHQPRQ_ADDFRAGCNT ZXDHQPSQ_ADDFRAGCNT
-+#define ZXDHQPRQ_VALID ZXDHQPSQ_VALID
-+#define ZXDHQPRQ_COMPLCTX ZXDH_CQPHC_QPCTX
-+#define ZXDHQPRQ_FRAG_LEN ZXDHQPSQ_FRAG_LEN
-+#define ZXDHQPRQ_STAG ZXDHQPSQ_FRAG_STAG
-+#define ZXDHQPRQ_TO ZXDHQPSQ_FRAG_TO
++ quanta = qp->wqe_ops.iw_inline_data_size_to_quanta(op_info->len,
++ imm_data_flag);
++ wqe = zxdh_qp_get_next_send_wqe(qp, &wqe_idx, quanta, op_info->len,
++ info);
++ if (!wqe)
++ return ZXDH_ERR_QP_TOOMANY_WRS_POSTED;
+
-+//QP RQ DBSA fields
-+#define ZXDHQPDBSA_RQ_POLARITY_S 15
-+#define ZXDHQPDBSA_RQ_POLARITY BIT_ULL(15)
-+#define ZXDHQPDBSA_RQ_SW_HEAD_S 0
-+#define ZXDHQPDBSA_RQ_SW_HEAD GENMASK_ULL(14, 0)
++ zxdh_clr_wqes(qp, wqe_idx);
+
-+#define ZXDHPFINT_OICR_HMC_ERR_M BIT(26)
-+#define ZXDHPFINT_OICR_PE_PUSH_M BIT(27)
-+#define ZXDHPFINT_OICR_PE_CRITERR_M BIT(28)
++ read_fence |= info->read_fence;
++ hdr = FIELD_PREP(ZXDHQPSQ_VALID, qp->swqe_polarity) |
++ FIELD_PREP(ZXDHQPSQ_OPCODE, info->op_type) |
++ FIELD_PREP(ZXDHQPSQ_SIGCOMPL, info->signaled) |
++ FIELD_PREP(ZXDHQPSQ_LOCALFENCE, info->local_fence) |
++ FIELD_PREP(ZXDHQPSQ_READFENCE, read_fence) |
++ FIELD_PREP(ZXDHQPSQ_SOLICITED, info->solicited) |
++ FIELD_PREP(ZXDHQPSQ_IMMDATAFLAG, imm_data_flag) |
++ FIELD_PREP(ZXDHQPSQ_WRITE_INLINEDATAFLAG, 1) |
++ FIELD_PREP(ZXDHQPSQ_WRITE_INLINEDATALEN, op_info->len) |
++ FIELD_PREP(ZXDHQPSQ_ADDFRAGCNT, quanta - 1) |
++ FIELD_PREP(ZXDHQPSQ_REMSTAG, op_info->rem_addr.stag);
++ set_64bit_val(wqe, 24,
++ FIELD_PREP(ZXDHQPSQ_FRAG_TO, op_info->rem_addr.tag_off));
+
-+#define ZXDH_SRQ_PARITY_SIGN_S 15
-+#define ZXDH_SRQ_PARITY_SIGN BIT_ULL(15)
-+#define ZXDH_SRQ_SW_SRQ_HEAD_S 0
-+#define ZXDH_SRQ_SW_SRQ_HEAD GENMASK_ULL(14, 0)
-+#define ZXDH_CQE_SQ_OPCODE_RESET BIT(5)
++ if (imm_data_flag) {
++ /* if inline exist, not update imm valid */
++ imm_valid = (op_info->len == 0) ? qp->swqe_polarity :
++ (!qp->swqe_polarity);
+
-+#define ZXDH_CQP_INIT_WQE(wqe) memset(wqe, 0, 64)
-+
-+#define ZXDH_GET_CURRENT_CQ_ELEM(_cq) \
-+ ((_cq)->cq_base[ZXDH_RING_CURRENT_HEAD((_cq)->cq_ring)].buf)
-+#define ZXDH_GET_CURRENT_EXTENDED_CQ_ELEM(_cq) \
-+ (((struct zxdh_extended_cqe \
-+ *)((_cq)->cq_base))[ZXDH_RING_CURRENT_HEAD((_cq)->cq_ring)] \
-+ .buf)
-+
-+#define ZXDH_RING_INIT(_ring, _size) \
-+ { \
-+ (_ring).head = 0; \
-+ (_ring).tail = 0; \
-+ (_ring).size = (_size); \
-+ }
-+#define ZXDH_RING_SIZE(_ring) ((_ring).size)
-+#define ZXDH_RING_CURRENT_HEAD(_ring) ((_ring).head)
-+#define ZXDH_RING_CURRENT_TAIL(_ring) ((_ring).tail)
-+
-+#define ZXDH_RING_MOVE_HEAD(_ring, _retcode) \
-+ { \
-+ register __u32 size; \
-+ size = (_ring).size; \
-+ if (!ZXDH_RING_FULL_ERR(_ring)) { \
-+ (_ring).head = ((_ring).head + 1) % size; \
-+ (_retcode) = 0; \
-+ } else { \
-+ (_retcode) = ZXDH_ERR_RING_FULL; \
-+ } \
-+ }
-+#define ZXDH_RING_MOVE_HEAD_BY_COUNT(_ring, _count, _retcode) \
-+ { \
-+ register __u32 size; \
-+ size = (_ring).size; \
-+ if ((ZXDH_RING_USED_QUANTA(_ring) + (_count)) < size) { \
-+ (_ring).head = ((_ring).head + (_count)) % size; \
-+ (_retcode) = 0; \
-+ } else { \
-+ (_retcode) = ZXDH_ERR_RING_FULL; \
-+ } \
-+ }
-+#define ZXDH_SQ_RING_MOVE_HEAD(_ring, _retcode) \
-+ { \
-+ register __u32 size; \
-+ size = (_ring).size; \
-+ if (!ZXDH_SQ_RING_FULL_ERR(_ring)) { \
-+ (_ring).head = ((_ring).head + 1) % size; \
-+ (_retcode) = 0; \
-+ } else { \
-+ (_retcode) = ZXDH_ERR_RING_FULL; \
-+ } \
-+ }
-+#define ZXDH_SQ_RING_MOVE_HEAD_BY_COUNT(_ring, _count, _retcode) \
-+ { \
-+ register __u32 size; \
-+ size = (_ring).size; \
-+ if ((ZXDH_RING_USED_QUANTA(_ring) + (_count)) < \
-+ (size - 256)) { \
-+ (_ring).head = ((_ring).head + (_count)) % size; \
-+ (_retcode) = 0; \
-+ } else { \
-+ (_retcode) = ZXDH_ERR_RING_FULL; \
-+ } \
++ set_64bit_val(wqe, 32,
++ FIELD_PREP(ZXDHQPSQ_IMMDATA_VALID, imm_valid) |
++ FIELD_PREP(ZXDHQPSQ_IMMDATA,
++ info->imm_data));
+ }
-+#define ZXDH_RING_MOVE_HEAD_BY_COUNT_NOCHECK(_ring, _count) \
-+ (_ring).head = ((_ring).head + (_count)) % (_ring).size
++ qp->wqe_ops.iw_copy_inline_data((__u8 *)wqe, op_info->data,
++ op_info->len, qp->swqe_polarity,
++ imm_data_flag);
+
-+#define ZXDH_RING_MOVE_TAIL(_ring) \
-+ (_ring).tail = ((_ring).tail + 1) % (_ring).size
++ udma_to_device_barrier(); /* make sure WQE is populated before valid bit is set */
+
-+#define ZXDH_RING_MOVE_HEAD_NOCHECK(_ring) \
-+ (_ring).head = ((_ring).head + 1) % (_ring).size
++ set_64bit_val(wqe, 0, hdr);
+
-+#define ZXDH_RING_MOVE_TAIL_BY_COUNT(_ring, _count) \
-+ (_ring).tail = ((_ring).tail + (_count)) % (_ring).size
++ if (post_sq)
++ zxdh_qp_post_wr(qp);
++ qp_tx_psn_add(&qp->next_psn, op_info->len, qp->mtu);
++ return 0;
++}
+
-+#define ZXDH_RING_SET_TAIL(_ring, _pos) (_ring).tail = (_pos) % (_ring).size
++/**
++ * zxdh_rc_inline_send - inline send operation
++ * @qp: hw qp ptr
++ * @info: post sq information
++ * @post_sq: flag to post sq
++ */
++enum zxdh_status_code zxdh_rc_inline_send(struct zxdh_qp *qp,
++ struct zxdh_post_sq_info *info,
++ bool post_sq)
++{
++ __le64 *wqe;
++ __u8 imm_valid;
++ struct zxdh_inline_rdma_send *op_info;
++ __u64 hdr;
++ __u32 wqe_idx;
++ bool read_fence = false;
++ __u16 quanta;
++ bool imm_data_flag = info->imm_data_valid ? 1 : 0;
+
-+#define ZXDH_RING_FULL_ERR(_ring) \
-+ ((ZXDH_RING_USED_QUANTA(_ring) == ((_ring).size - 1)))
++ op_info = &info->op.inline_rdma_send;
+
-+#define ZXDH_ERR_RING_FULL2(_ring) \
-+ ((ZXDH_RING_USED_QUANTA(_ring) == ((_ring).size - 2)))
++ if (op_info->len > qp->max_inline_data)
++ return ZXDH_ERR_INVALID_INLINE_DATA_SIZE;
++ if (imm_data_flag && op_info->len > ZXDH_MAX_SQ_INLINE_DATELEN_WITH_IMM)
++ return ZXDH_ERR_INVALID_INLINE_DATA_SIZE;
+
-+#define ZXDH_ERR_RING_FULL3(_ring) \
-+ ((ZXDH_RING_USED_QUANTA(_ring) == ((_ring).size - 3)))
++ quanta = qp->wqe_ops.iw_inline_data_size_to_quanta(op_info->len,
++ imm_data_flag);
++ wqe = zxdh_qp_get_next_send_wqe(qp, &wqe_idx, quanta, op_info->len,
++ info);
++ if (!wqe)
++ return ZXDH_ERR_QP_TOOMANY_WRS_POSTED;
+
-+#define ZXDH_SQ_RING_FULL_ERR(_ring) \
-+ ((ZXDH_RING_USED_QUANTA(_ring) == ((_ring).size - 257)))
++ zxdh_clr_wqes(qp, wqe_idx);
+
-+#define ZXDH_ERR_SQ_RING_FULL2(_ring) \
-+ ((ZXDH_RING_USED_QUANTA(_ring) == ((_ring).size - 258)))
-+#define ZXDH_ERR_SQ_RING_FULL3(_ring) \
-+ ((ZXDH_RING_USED_QUANTA(_ring) == ((_ring).size - 259)))
-+#define ZXDH_RING_MORE_WORK(_ring) ((ZXDH_RING_USED_QUANTA(_ring) != 0))
++ read_fence |= info->read_fence;
++ hdr = FIELD_PREP(ZXDHQPSQ_VALID, qp->swqe_polarity) |
++ FIELD_PREP(ZXDHQPSQ_OPCODE, info->op_type) |
++ FIELD_PREP(ZXDHQPSQ_SIGCOMPL, info->signaled) |
++ FIELD_PREP(ZXDHQPSQ_LOCALFENCE, info->local_fence) |
++ FIELD_PREP(ZXDHQPSQ_READFENCE, read_fence) |
++ FIELD_PREP(ZXDHQPSQ_SOLICITED, info->solicited) |
++ FIELD_PREP(ZXDHQPSQ_ADDFRAGCNT, quanta - 1) |
++ FIELD_PREP(ZXDHQPSQ_IMMDATAFLAG, imm_data_flag) |
++ FIELD_PREP(ZXDHQPSQ_REMSTAG, info->stag_to_inv);
++ set_64bit_val(wqe, 24,
++ FIELD_PREP(ZXDHQPSQ_INLINEDATAFLAG, 1) |
++ FIELD_PREP(ZXDHQPSQ_INLINEDATALEN, op_info->len));
+
-+#define ZXDH_RING_USED_QUANTA(_ring) \
-+ ((((_ring).head + (_ring).size - (_ring).tail) % (_ring).size))
++ if (imm_data_flag) {
++ /* if inline exist, not update imm valid */
++ imm_valid = (op_info->len == 0) ? qp->swqe_polarity :
++ (!qp->swqe_polarity);
++ set_64bit_val(wqe, 32,
++ FIELD_PREP(ZXDHQPSQ_IMMDATA_VALID, imm_valid) |
++ FIELD_PREP(ZXDHQPSQ_IMMDATA,
++ info->imm_data));
++ }
+
-+#define ZXDH_RING_FREE_QUANTA(_ring) \
-+ (((_ring).size - ZXDH_RING_USED_QUANTA(_ring) - 1))
++ qp->wqe_ops.iw_copy_inline_data((__u8 *)wqe, op_info->data,
++ op_info->len, qp->swqe_polarity,
++ imm_data_flag);
+
-+#define ZXDH_SQ_RING_FREE_QUANTA(_ring) \
-+ (((_ring).size - ZXDH_RING_USED_QUANTA(_ring) - 257))
++ udma_to_device_barrier(); /* make sure WQE is populated before valid bit is set */
+
-+#define ZXDH_ATOMIC_RING_MOVE_HEAD(_ring, index, _retcode) \
-+ { \
-+ index = ZXDH_RING_CURRENT_HEAD(_ring); \
-+ ZXDH_RING_MOVE_HEAD(_ring, _retcode); \
-+ }
++ set_64bit_val(wqe, 0, hdr);
+
-+enum zxdh_qp_wqe_size {
-+ ZXDH_WQE_SIZE_32 = 32,
-+ ZXDH_WQE_SIZE_64 = 64,
-+ ZXDH_WQE_SIZE_96 = 96,
-+ ZXDH_WQE_SIZE_128 = 128,
-+ ZXDH_WQE_SIZE_256 = 256,
-+};
++ if (post_sq)
++ zxdh_qp_post_wr(qp);
+
-+/**
-+ * set_64bit_val - set 64 bit value to hw wqe
-+ * @wqe_words: wqe addr to write
-+ * @byte_index: index in wqe
-+ * @val: value to write
-+ **/
-+static inline void set_64bit_val(__le64 *wqe_words, __u32 byte_index, __u64 val)
-+{
-+ wqe_words[byte_index >> 3] = htole64(val);
++ qp_tx_psn_add(&qp->next_psn, op_info->len, qp->mtu);
++ return 0;
+}
+
+/**
-+ * set_32bit_val - set 32 bit value to hw wqe
-+ * @wqe_words: wqe addr to write
-+ * @byte_index: index in wqe
-+ * @val: value to write
-+ **/
-+static inline void set_32bit_val(__le32 *wqe_words, __u32 byte_index, __u32 val)
++ * zxdh_ud_inline_send - inline send operation
++ * @qp: hw qp ptr
++ * @info: post sq information
++ * @post_sq: flag to post sq
++ */
++enum zxdh_status_code zxdh_ud_inline_send(struct zxdh_qp *qp,
++ struct zxdh_post_sq_info *info,
++ bool post_sq)
+{
-+ wqe_words[byte_index >> 2] = htole32(val);
-+}
++ __le64 *wqe_base;
++ __le64 *wqe_ex;
++ struct zxdh_inline_rdma_send *op_info;
++ __u64 hdr;
++ __u32 wqe_idx;
++ bool read_fence = false;
++ __u16 quanta;
++ bool imm_data_flag = info->imm_data_valid ? 1 : 0;
++ __u8 *inline_dest;
++ __u8 *inline_src;
++ __u32 inline_len;
++ __u32 copy_size;
++ __u8 *inline_valid_addr;
+
-+/**
-+ * set_16bit_val - set 16 bit value to hw wqe
-+ * @wqe_words: wqe addr to write
-+ * @byte_index: index in wqe
-+ * @val: value to write
-+ **/
-+static inline void set_16bit_val(__le16 *wqe_words, __u32 byte_index, __u16 val)
-+{
-+ wqe_words[byte_index >> 1] = htole16(val);
-+}
++ op_info = &info->op.inline_rdma_send;
++ inline_len = op_info->len;
+
-+/**
-+ * get_64bit_val - read 64 bit value from wqe
-+ * @wqe_words: wqe addr
-+ * @byte_index: index to read from
-+ * @val: read value
-+ **/
-+static inline void get_64bit_val(__le64 *wqe_words, __u32 byte_index,
-+ __u64 *val)
-+{
-+ *val = le64toh(wqe_words[byte_index >> 3]);
-+}
++ if (op_info->len > qp->max_inline_data)
++ return ZXDH_ERR_INVALID_INLINE_DATA_SIZE;
++ if (imm_data_flag && op_info->len > ZXDH_MAX_SQ_INLINE_DATELEN_WITH_IMM)
++ return ZXDH_ERR_INVALID_INLINE_DATA_SIZE;
+
-+/**
-+ * get_32bit_val - read 32 bit value from wqe
-+ * @wqe_words: wqe addr
-+ * @byte_index: index to reaad from
-+ * @val: return 32 bit value
-+ **/
-+static inline void get_32bit_val(__le32 *wqe_words, __u32 byte_index,
-+ __u32 *val)
-+{
-+ *val = le32toh(wqe_words[byte_index >> 2]);
-+}
++ quanta = qp->wqe_ops.iw_inline_data_size_to_quanta(op_info->len,
++ imm_data_flag);
++ if (quanta > ZXDH_SQ_RING_FREE_QUANTA(qp->sq_ring))
++ return ZXDH_ERR_QP_TOOMANY_WRS_POSTED;
+
-+#define read_wqe_need_split(pre_cal_psn, next_psn) \
-+ (((pre_cal_psn < next_psn) && (pre_cal_psn != 0)) || \
-+ ((next_psn <= 0x7FFFFF) && (pre_cal_psn > 0x800000)))
++ wqe_idx = ZXDH_RING_CURRENT_HEAD(qp->sq_ring);
++ if (!wqe_idx)
++ qp->swqe_polarity = !qp->swqe_polarity;
+
-+#endif /* ZXDH_DEFS_H */
-diff --git a/providers/zrdma/libzrdma.map b/providers/zrdma/libzrdma.map
-new file mode 100644
-index 0000000..f95de4b
---- /dev/null
-+++ b/providers/zrdma/libzrdma.map
-@@ -0,0 +1,16 @@
-+/* Export symbols should be added below according to
-+ Documentation/versioning.md document. */
-+ZRDMA_1.0 {
-+ global:
-+ zxdh_get_log_trace_switch;
-+ local: *;
-+};
-+
-+ZRDMA_1.1 {
-+ global:
-+ zxdh_set_log_trace_switch;
-+ zxdh_modify_qp_udp_sport;
-+ zxdh_query_qpc;
-+ zxdh_modify_qpc;
-+ zxdh_reset_qp;
-+} ZRDMA_1.0;
-diff --git a/providers/zrdma/osdep.h b/providers/zrdma/osdep.h
-new file mode 100644
-index 0000000..f324b83
---- /dev/null
-+++ b/providers/zrdma/osdep.h
-@@ -0,0 +1,21 @@
-+/* SPDX-License-Identifier: GPL-2.0 or Linux-OpenIB */
-+/* Copyright (c) 2024 ZTE Corporation. All rights reserved. */
-+#ifndef ZXDH_OSDEP_H
-+#define ZXDH_OSDEP_H
-+
-+#include
-+#include
-+#include
-+#include
-+#include
-+#include
-+#include
-+#include
-+#include
-+
-+static inline void db_wr32(__u32 val, __u32 *wqe_word)
-+{
-+ *wqe_word = val;
-+}
++ ZXDH_RING_MOVE_HEAD_BY_COUNT_NOCHECK(qp->sq_ring, quanta);
+
-+#endif /* ZXDH_OSDEP_H */
-diff --git a/providers/zrdma/private_verbs_cmd.c b/providers/zrdma/private_verbs_cmd.c
-new file mode 100644
-index 0000000..63202ec
---- /dev/null
-+++ b/providers/zrdma/private_verbs_cmd.c
-@@ -0,0 +1,203 @@
-+// SPDX-License-Identifier: GPL-2.0 or Linux-OpenIB
-+/* Copyright (c) 2024 ZTE Corporation. All rights reserved. */
-+#include
-+#include
-+#include "private_verbs_cmd.h"
-+#include "zxdh_dv.h"
++ wqe_base = qp->sq_base[wqe_idx].elem;
++ qp->sq_wrtrk_array[wqe_idx].wrid = info->wr_id;
++ qp->sq_wrtrk_array[wqe_idx].wr_len = op_info->len;
++ qp->sq_wrtrk_array[wqe_idx].quanta = quanta;
+
-+static void copy_query_qpc(struct zxdh_query_qpc_resp *resp,
-+ struct zxdh_rdma_qpc *qpc)
-+{
-+ qpc->ack_err_flag = resp->ack_err_flag;
-+ qpc->retry_flag = resp->retry_flag;
-+ qpc->rnr_retry_flag = resp->rnr_retry_flag;
-+ qpc->cur_retry_count = resp->cur_retry_count;
-+ qpc->retry_cqe_sq_opcode = resp->retry_cqe_sq_opcode;
-+ qpc->err_flag = resp->err_flag;
-+ qpc->package_err_flag = resp->package_err_flag;
-+ qpc->recv_err_flag = resp->recv_err_flag;
-+ qpc->tx_last_ack_psn = resp->tx_last_ack_psn;
-+ qpc->retry_count = resp->retry_count;
-+ qpc->read_retry_flag = resp->read_retry_flag;
-+}
++ zxdh_clr_wqes(qp, wqe_idx);
+
-+int _zxdh_query_qpc(struct ibv_qp *qp, struct zxdh_rdma_qpc *qpc)
-+{
-+ DECLARE_COMMAND_BUFFER(cmd, ZXDH_IB_OBJECT_QP_OBJ,
-+ ZXDH_IB_METHOD_QP_QUERY_QPC, 2);
-+ int ret;
-+ struct zxdh_query_qpc_resp resp_ex = { 0 };
++ read_fence |= info->read_fence;
++ hdr = FIELD_PREP(ZXDHQPSQ_VALID, qp->swqe_polarity) |
++ FIELD_PREP(ZXDHQPSQ_OPCODE, info->op_type) |
++ FIELD_PREP(ZXDHQPSQ_SIGCOMPL, info->signaled) |
++ FIELD_PREP(ZXDHQPSQ_SOLICITED, info->solicited) |
++ FIELD_PREP(ZXDHQPSQ_IMMDATAFLAG, imm_data_flag) |
++ FIELD_PREP(ZXDHQPSQ_UD_INLINEDATAFLAG, 1) |
++ FIELD_PREP(ZXDHQPSQ_UD_INLINEDATALEN, op_info->len) |
++ FIELD_PREP(ZXDHQPSQ_UD_ADDFRAGCNT, quanta - 1) |
++ FIELD_PREP(ZXDHQPSQ_AHID, op_info->ah_id);
++ set_64bit_val(wqe_base, 24,
++ FIELD_PREP(ZXDHQPSQ_DESTQPN, op_info->dest_qp) |
++ FIELD_PREP(ZXDHQPSQ_DESTQKEY, op_info->qkey));
+
-+ fill_attr_in_obj(cmd, ZXDH_IB_ATTR_QP_QUERY_HANDLE, qp->handle);
-+ fill_attr_out_ptr(cmd, ZXDH_IB_ATTR_QP_QUERY_RESP, &resp_ex);
++ if (imm_data_flag) {
++ wqe_idx = (wqe_idx + 1) % qp->sq_ring.size;
++ if (!wqe_idx)
++ qp->swqe_polarity = !qp->swqe_polarity;
++ wqe_ex = qp->sq_base[wqe_idx].elem;
+
-+ ret = execute_ioctl(qp->context, cmd);
-+ if (ret)
-+ return ret;
++ if (inline_len) {
++ /* imm and inline use the same valid, valid set after inline data updated*/
++ copy_size = inline_len < INLINE_DATASIZE_24BYTES ?
++ inline_len :
++ INLINE_DATASIZE_24BYTES;
++ inline_dest = (__u8 *)wqe_ex + WQE_OFFSET_8BYTES;
++ inline_src = (__u8 *)op_info->data;
++ memcpy(inline_dest, inline_src, copy_size);
++ inline_len -= copy_size;
++ inline_src += copy_size;
++ }
++ set_64bit_val(
++ wqe_ex, 0,
++ FIELD_PREP(ZXDHQPSQ_IMMDATA_VALID, qp->swqe_polarity) |
++ FIELD_PREP(ZXDHQPSQ_IMMDATA, info->imm_data));
+
-+ copy_query_qpc(&resp_ex, qpc);
-+ return 0;
-+}
++ } else if (inline_len) {
++ wqe_idx = (wqe_idx + 1) % qp->sq_ring.size;
++ if (!wqe_idx)
++ qp->swqe_polarity = !qp->swqe_polarity;
++ wqe_ex = qp->sq_base[wqe_idx].elem;
++ inline_dest = (__u8 *)wqe_ex;
++ inline_src = (__u8 *)op_info->data;
+
-+static void copy_modify_qpc_fields(struct zxdh_modify_qpc_req *req_cmd,
-+ uint64_t attr_mask,
-+ struct zxdh_rdma_qpc *qpc)
-+{
-+ if (attr_mask & ZXDH_TX_READ_RETRY_FLAG_SET) {
-+ req_cmd->retry_flag = qpc->retry_flag;
-+ req_cmd->rnr_retry_flag = qpc->rnr_retry_flag;
-+ req_cmd->read_retry_flag = qpc->read_retry_flag;
-+ req_cmd->cur_retry_count = qpc->cur_retry_count;
++ if (inline_len <= INLINE_DATASIZE_7BYTES) {
++ copy_size = inline_len;
++ memcpy(inline_dest, inline_src, copy_size);
++ inline_len = 0;
++ } else {
++ copy_size = INLINE_DATASIZE_7BYTES;
++ memcpy(inline_dest, inline_src, copy_size);
++ inline_len -= copy_size;
++ inline_src += copy_size;
++ inline_dest += WQE_OFFSET_8BYTES;
++ copy_size = inline_len < INLINE_DATASIZE_24BYTES ?
++ inline_len :
++ INLINE_DATASIZE_24BYTES;
++ memcpy(inline_dest, inline_src, copy_size);
++ inline_len -= copy_size;
++ inline_src += copy_size;
++ }
++ inline_valid_addr = (__u8 *)wqe_ex + WQE_OFFSET_7BYTES;
++ *inline_valid_addr = qp->swqe_polarity << ZXDH_INLINE_VALID_S;
+ }
-+ if (attr_mask & ZXDH_RETRY_CQE_SQ_OPCODE)
-+ req_cmd->retry_cqe_sq_opcode = qpc->retry_cqe_sq_opcode;
+
-+ if (attr_mask & ZXDH_ERR_FLAG_SET) {
-+ req_cmd->err_flag = qpc->err_flag;
-+ req_cmd->ack_err_flag = qpc->ack_err_flag;
++ while (inline_len) {
++ wqe_idx = (wqe_idx + 1) % qp->sq_ring.size;
++ if (!wqe_idx)
++ qp->swqe_polarity = !qp->swqe_polarity;
++ wqe_ex = qp->sq_base[wqe_idx].elem;
++ inline_dest = (__u8 *)wqe_ex;
++
++ if (inline_len <= INLINE_DATASIZE_7BYTES) {
++ copy_size = inline_len;
++ memcpy(inline_dest, inline_src, copy_size);
++ inline_len = 0;
++ } else {
++ copy_size = INLINE_DATASIZE_7BYTES;
++ memcpy(inline_dest, inline_src, copy_size);
++ inline_len -= copy_size;
++ inline_src += copy_size;
++ inline_dest += WQE_OFFSET_8BYTES;
++ copy_size = inline_len < INLINE_DATASIZE_24BYTES ?
++ inline_len :
++ INLINE_DATASIZE_24BYTES;
++ memcpy(inline_dest, inline_src, copy_size);
++ inline_len -= copy_size;
++ inline_src += copy_size;
++ }
++ inline_valid_addr = (__u8 *)wqe_ex + WQE_OFFSET_7BYTES;
++ *inline_valid_addr = qp->swqe_polarity << ZXDH_INLINE_VALID_S;
+ }
-+ if (attr_mask & ZXDH_PACKAGE_ERR_FLAG)
-+ req_cmd->package_err_flag = qpc->package_err_flag;
-+}
+
-+int _zxdh_reset_qp(struct ibv_qp *qp, uint64_t opcode)
-+{
-+ DECLARE_COMMAND_BUFFER(cmd, ZXDH_IB_OBJECT_QP_OBJ,
-+ ZXDH_IB_METHOD_QP_RESET_QP, 2);
-+ int ret;
++ udma_to_device_barrier(); /* make sure WQE is populated before valid bit is set */
+
-+ fill_attr_in_obj(cmd, ZXDH_IB_ATTR_QP_RESET_QP_HANDLE, qp->handle);
-+ fill_attr_in_uint64(cmd, ZXDH_IB_ATTR_QP_RESET_OP_CODE, opcode);
-+ return execute_ioctl(qp->context, cmd);
-+}
++ set_64bit_val(wqe_base, 0, hdr);
+
-+int _zxdh_modify_qpc(struct ibv_qp *qp, struct zxdh_rdma_qpc *qpc,
-+ uint64_t qpc_mask)
-+{
-+ DECLARE_COMMAND_BUFFER(cmd, ZXDH_IB_OBJECT_QP_OBJ,
-+ ZXDH_IB_METHOD_QP_MODIFY_QPC, 3);
-+ int ret;
-+ struct zxdh_modify_qpc_req req = { 0 };
++ if (post_sq)
++ zxdh_qp_post_wr(qp);
+
-+ copy_modify_qpc_fields(&req, qpc_mask, qpc);
-+ fill_attr_in_obj(cmd, ZXDH_IB_ATTR_QP_QUERY_HANDLE, qp->handle);
-+ fill_attr_in_uint64(cmd, ZXDH_IB_ATTR_QP_MODIFY_QPC_MASK, qpc_mask);
-+ fill_attr_in_ptr(cmd, ZXDH_IB_ATTR_QP_MODIFY_QPC_REQ, &req);
-+ return execute_ioctl(qp->context, cmd);
++ return 0;
+}
+
-+static int _zxdh_modify_qp_udp_sport(struct ibv_context *ibctx,
-+ uint16_t udp_sport, uint32_t qpn)
++/**
++ * zxdh_stag_local_invalidate - stag invalidate operation
++ * @qp: hw qp ptr
++ * @info: post sq information
++ * @post_sq: flag to post sq
++ */
++enum zxdh_status_code zxdh_stag_local_invalidate(struct zxdh_qp *qp,
++ struct zxdh_post_sq_info *info,
++ bool post_sq)
+{
-+ if (udp_sport <= MIN_UDP_SPORT || qpn <= MIN_QP_QPN)
-+ return -EINVAL;
++ __le64 *wqe;
++ struct zxdh_inv_local_stag *op_info;
++ __u64 hdr;
++ __u32 wqe_idx;
++ bool local_fence = true;
+
-+ DECLARE_COMMAND_BUFFER(cmd, ZXDH_IB_OBJECT_QP_OBJ,
-+ ZXDH_IB_METHOD_QP_MODIFY_UDP_SPORT, 2);
-+ fill_attr_in(cmd, ZXDH_IB_ATTR_QP_UDP_PORT, &udp_sport,
-+ sizeof(udp_sport));
-+ fill_attr_in_uint32(cmd, ZXDH_IB_ATTR_QP_QPN, qpn);
-+ return execute_ioctl(ibctx, cmd);
-+}
++ op_info = &info->op.inv_local_stag;
+
-+static int _zxdh_get_log_trace_switch(struct ibv_context *ibctx,
-+ uint8_t *switch_status)
-+{
-+ DECLARE_COMMAND_BUFFER(cmd, ZXDH_IB_OBJECT_DEV,
-+ ZXDH_IB_METHOD_DEV_GET_LOG_TRACE, 1);
++ wqe = zxdh_qp_get_next_send_wqe(qp, &wqe_idx, ZXDH_QP_WQE_MIN_QUANTA, 0,
++ info);
++ if (!wqe)
++ return ZXDH_ERR_QP_TOOMANY_WRS_POSTED;
+
-+ fill_attr_out_ptr(cmd, ZXDH_IB_ATTR_DEV_GET_LOG_TARCE_SWITCH,
-+ switch_status);
-+ return execute_ioctl(ibctx, cmd);
-+}
++ zxdh_clr_wqes(qp, wqe_idx);
+
-+static int _zxdh_set_log_trace_switch(struct ibv_context *ibctx,
-+ uint8_t switch_status)
-+{
-+ DECLARE_COMMAND_BUFFER(cmd, ZXDH_IB_OBJECT_DEV,
-+ ZXDH_IB_METHOD_DEV_SET_LOG_TRACE, 1);
-+ fill_attr_in(cmd, ZXDH_IB_ATTR_DEV_SET_LOG_TARCE_SWITCH, &switch_status,
-+ sizeof(switch_status));
-+ return execute_ioctl(ibctx, cmd);
-+}
++ set_64bit_val(wqe, 16, 0);
+
-+static struct zxdh_uvcontext_ops zxdh_ctx_ops = {
-+ .modify_qp_udp_sport = _zxdh_modify_qp_udp_sport,
-+ .get_log_trace_switch = _zxdh_get_log_trace_switch,
-+ .set_log_trace_switch = _zxdh_set_log_trace_switch,
-+ .query_qpc = _zxdh_query_qpc,
-+ .modify_qpc = _zxdh_modify_qpc,
-+ .reset_qp = _zxdh_reset_qp,
-+};
++ hdr = FIELD_PREP(ZXDHQPSQ_VALID, qp->swqe_polarity) |
++ FIELD_PREP(ZXDHQPSQ_OPCODE, ZXDH_OP_TYPE_LOCAL_INV) |
++ FIELD_PREP(ZXDHQPSQ_SIGCOMPL, info->signaled) |
++ FIELD_PREP(ZXDHQPSQ_LOCALFENCE, local_fence) |
++ FIELD_PREP(ZXDHQPSQ_READFENCE, info->read_fence) |
++ FIELD_PREP(ZXDHQPSQ_REMSTAG, op_info->target_stag);
+
-+static inline struct zxdh_uvcontext *to_zxdhtx(struct ibv_context *ibctx)
-+{
-+ return container_of(ibctx, struct zxdh_uvcontext, ibv_ctx.context);
++ udma_to_device_barrier(); /* make sure WQE is populated before valid bit is set */
++
++ set_64bit_val(wqe, 0, hdr);
++
++ if (post_sq)
++ zxdh_qp_post_wr(qp);
++
++ return 0;
+}
+
-+int zxdh_reset_qp(struct ibv_qp *qp, uint64_t opcode)
++/**
++ * zxdh_mw_bind - bind Memory Window
++ * @qp: hw qp ptr
++ * @info: post sq information
++ * @post_sq: flag to post sq
++ */
++enum zxdh_status_code zxdh_mw_bind(struct zxdh_qp *qp,
++ struct zxdh_post_sq_info *info, bool post_sq)
+{
-+ struct zxdh_uvcontext_ops *dvops = to_zxdhtx(qp->context)->cxt_ops;
++ __le64 *wqe;
++ struct zxdh_bind_window *op_info;
++ __u64 hdr;
++ __u32 wqe_idx;
++ bool local_fence = true;
++ __u8 access = 1;
++ __u16 value = 0;
+
-+ if (!dvops || !dvops->reset_qp)
-+ return -EOPNOTSUPP;
-+ return dvops->reset_qp(qp, opcode);
++ op_info = &info->op.bind_window;
++ local_fence |= info->local_fence;
++
++ wqe = zxdh_qp_get_next_send_wqe(qp, &wqe_idx, ZXDH_QP_WQE_MIN_QUANTA, 0,
++ info);
++ if (!wqe)
++ return ZXDH_ERR_QP_TOOMANY_WRS_POSTED;
++
++ zxdh_clr_wqes(qp, wqe_idx);
++
++ if (op_info->ena_writes) {
++ access = (op_info->ena_reads << 2) |
++ (op_info->ena_writes << 3) | (1 << 1) | access;
++ } else {
++ access = (op_info->ena_reads << 2) |
++ (op_info->ena_writes << 3) | access;
++ }
++
++ qp->wqe_ops.iw_set_mw_bind_wqe(wqe, op_info);
++
++ value = (__u16)((op_info->mw_pa_pble_index >> 12) & 0xC000000000000);
++
++ hdr = FIELD_PREP(ZXDHQPSQ_OPCODE, ZXDH_OP_TYPE_BIND_MW) |
++ FIELD_PREP(ZXDHQPSQ_MWSTAG, op_info->mw_stag) |
++ FIELD_PREP(ZXDHQPSQ_STAGRIGHTS, access) |
++ FIELD_PREP(ZXDHQPSQ_VABASEDTO,
++ (op_info->addressing_type == ZXDH_ADDR_TYPE_VA_BASED ?
++ 1 :
++ 0)) |
++ FIELD_PREP(ZXDHQPSQ_MEMWINDOWTYPE,
++ (op_info->mem_window_type_1 ? 1 : 0)) |
++ FIELD_PREP(ZXDHQPSQ_READFENCE, info->read_fence) |
++ FIELD_PREP(ZXDHQPSQ_LOCALFENCE, local_fence) |
++ FIELD_PREP(ZXDHQPSQ_SIGCOMPL, info->signaled) |
++ FIELD_PREP(ZXDHQPSQ_MW_HOST_PAGE_SIZE, op_info->host_page_size) |
++ FIELD_PREP(ZXDHQPSQ_MW_LEAF_PBL_SIZE, op_info->leaf_pbl_size) |
++ FIELD_PREP(ZXDHQPSQ_MW_PA_PBLE_THREE, value) |
++ FIELD_PREP(ZXDHQPSQ_VALID, qp->swqe_polarity);
++
++ udma_to_device_barrier(); /* make sure WQE is populated before valid bit is set */
++
++ set_64bit_val(wqe, 0, hdr);
++
++ if (post_sq)
++ zxdh_qp_post_wr(qp);
++
++ return 0;
+}
+
-+int zxdh_modify_qpc(struct ibv_qp *qp, struct zxdh_rdma_qpc *qpc,
-+ uint64_t qpc_mask)
++static void zxdh_sleep_ns(unsigned int nanoseconds)
+{
-+ struct zxdh_uvcontext_ops *dvops = to_zxdhtx(qp->context)->cxt_ops;
++ struct timespec req;
+
-+ if (!dvops || !dvops->modify_qpc)
-+ return -EOPNOTSUPP;
-+ return dvops->modify_qpc(qp, qpc, qpc_mask);
++ req.tv_sec = 0;
++ req.tv_nsec = nanoseconds;
++ nanosleep(&req, NULL);
+}
+
-+int zxdh_query_qpc(struct ibv_qp *qp, struct zxdh_rdma_qpc *qpc)
++/**
++ * zxdh_post_receive - post receive wqe
++ * @qp: hw qp ptr
++ * @info: post rq information
++ */
++enum zxdh_status_code zxdh_post_receive(struct zxdh_qp *qp,
++ struct zxdh_post_rq_info *info)
+{
-+ struct zxdh_uvcontext_ops *dvops = to_zxdhtx(qp->context)->cxt_ops;
++ __u32 wqe_idx, i, byte_off;
++ __le64 *wqe;
++ struct zxdh_sge *sge;
+
-+ if (!dvops || !dvops->query_qpc)
-+ return -EOPNOTSUPP;
++ if (qp->max_rq_frag_cnt < info->num_sges)
++ return ZXDH_ERR_INVALID_FRAG_COUNT;
+
-+ return dvops->query_qpc(qp, qpc);
-+}
++ wqe = zxdh_qp_get_next_recv_wqe(qp, &wqe_idx);
++ if (unlikely(!wqe))
++ return ZXDH_ERR_QP_TOOMANY_WRS_POSTED;
+
-+int zxdh_modify_qp_udp_sport(struct ibv_context *context, uint16_t udp_sport,
-+ uint32_t qpn)
-+{
-+ struct zxdh_uvcontext_ops *dvops = to_zxdhtx(context)->cxt_ops;
++ qp->rq_wrid_array[wqe_idx] = info->wr_id;
+
-+ if (!dvops || !dvops->modify_qp_udp_sport)
-+ return -EOPNOTSUPP;
++ for (i = 0, byte_off = ZXDH_QP_FRAG_BYTESIZE; i < info->num_sges; i++) {
++ sge = &info->sg_list[i];
++ set_64bit_val(wqe, byte_off, sge->tag_off);
++ set_64bit_val(wqe, byte_off + 8,
++ FIELD_PREP(ZXDHQPRQ_FRAG_LEN, sge->len) |
++ FIELD_PREP(ZXDHQPRQ_STAG, sge->stag));
++ byte_off += ZXDH_QP_FRAG_BYTESIZE;
++ }
+
-+ return dvops->modify_qp_udp_sport(context, udp_sport, qpn);
-+}
++ /**
++ * while info->num_sges < qp->max_rq_frag_cnt, or 0 == info->num_sges,
++ * fill next fragment with FRAG_LEN=0, FRAG_STAG=0x00000100,
++ * witch indicates a invalid fragment
++ */
++ if (info->num_sges < qp->max_rq_frag_cnt || 0 == info->num_sges) {
++ set_64bit_val(wqe, byte_off, 0);
++ set_64bit_val(wqe, byte_off + 8,
++ FIELD_PREP(ZXDHQPRQ_FRAG_LEN, 0) |
++ FIELD_PREP(ZXDHQPRQ_STAG, 0x00000100));
++ }
+
-+int zxdh_get_log_trace_switch(struct ibv_context *context,
-+ enum switch_status *switch_status)
-+{
-+ struct zxdh_uvcontext_ops *dvops = to_zxdhtx(context)->cxt_ops;
++ set_64bit_val(wqe, 0,
++ FIELD_PREP(ZXDHQPRQ_ADDFRAGCNT, info->num_sges) |
++ FIELD_PREP(ZXDHQPRQ_SIGNATURE,
++ qp->rwqe_signature));
+
-+ if (!dvops || !dvops->get_log_trace_switch)
-+ return -EOPNOTSUPP;
++ udma_to_device_barrier(); /* make sure WQE is populated before valid bit is set */
++ if (info->num_sges > 3)
++ zxdh_sleep_ns(1000);
+
-+ return dvops->get_log_trace_switch(context, (uint8_t *)switch_status);
++ set_64bit_val(wqe, 8, FIELD_PREP(ZXDHQPRQ_VALID, qp->rwqe_polarity));
++
++ return 0;
+}
+
-+int zxdh_set_log_trace_switch(struct ibv_context *context,
-+ enum switch_status switch_status)
++/**
++ * zxdh_cq_resize - reset the cq buffer info
++ * @cq: cq to resize
++ * @cq_base: new cq buffer addr
++ * @cq_size: number of cqes
++ */
++void zxdh_cq_resize(struct zxdh_cq *cq, void *cq_base, int cq_size)
+{
-+ struct zxdh_uvcontext_ops *dvops = to_zxdhtx(context)->cxt_ops;
++ cq->cq_base = cq_base;
++ cq->cq_size = cq_size;
++ ZXDH_RING_INIT(cq->cq_ring, cq->cq_size);
++ cq->polarity = 1;
++}
+
-+ if (!dvops || !dvops->set_log_trace_switch)
-+ return -EOPNOTSUPP;
++/**
++ * zxdh_cq_set_resized_cnt - record the count of the resized buffers
++ * @cq: cq to resize
++ * @cq_cnt: the count of the resized cq buffers
++ */
++void zxdh_cq_set_resized_cnt(struct zxdh_cq *cq, __u16 cq_cnt)
++{
++ __u64 temp_val;
++ __u16 sw_cq_sel;
++ __u8 arm_next;
++ __u8 arm_seq_num;
+
-+ return dvops->set_log_trace_switch(context, switch_status);
++ get_64bit_val(cq->shadow_area, 0, &temp_val);
++
++ sw_cq_sel = (__u16)FIELD_GET(ZXDH_CQ_DBSA_SW_CQ_SELECT, temp_val);
++ sw_cq_sel += cq_cnt;
++
++ arm_seq_num = (__u8)FIELD_GET(ZXDH_CQ_DBSA_ARM_SEQ_NUM, temp_val);
++ arm_next = (__u8)FIELD_GET(ZXDH_CQ_DBSA_ARM_NEXT, temp_val);
++ cq->cqe_rd_cnt = 0;
++
++ temp_val = FIELD_PREP(ZXDH_CQ_DBSA_ARM_SEQ_NUM, arm_seq_num) |
++ FIELD_PREP(ZXDH_CQ_DBSA_SW_CQ_SELECT, sw_cq_sel) |
++ FIELD_PREP(ZXDH_CQ_DBSA_ARM_NEXT, arm_next) |
++ FIELD_PREP(ZXDH_CQ_DBSA_CQEIDX, cq->cqe_rd_cnt);
++
++ set_64bit_val(cq->shadow_area, 0, temp_val);
+}
+
-+void add_private_ops(struct zxdh_uvcontext *iwvctx)
++/**
++ * zxdh_cq_request_notification - cq notification request (door bell)
++ * @cq: hw cq
++ * @cq_notify: notification type
++ */
++void zxdh_cq_request_notification(struct zxdh_cq *cq,
++ enum zxdh_cmpl_notify cq_notify)
+{
-+ iwvctx->cxt_ops = &zxdh_ctx_ops;
-+}
-diff --git a/providers/zrdma/private_verbs_cmd.h b/providers/zrdma/private_verbs_cmd.h
-new file mode 100644
-index 0000000..f8d9534
---- /dev/null
-+++ b/providers/zrdma/private_verbs_cmd.h
-@@ -0,0 +1,24 @@
-+/* SPDX-License-Identifier: GPL-2.0 or Linux-OpenIB */
-+/* Copyright (c) 2024 ZTE Corporation. All rights reserved. */
-+#ifndef ZXDH_RDMA_PRIVATE_VERBS_CMD_H
-+#define ZXDH_RDMA_PRIVATE_VERBS_CMD_H
++ __u64 temp_val;
++ __u16 sw_cq_sel;
++ __u8 arm_next = 0;
++ __u8 arm_seq_num;
++ __u32 cqe_index;
++ __u32 hdr;
+
-+#include "umain.h"
-+#include "zxdh_dv.h"
++ get_64bit_val(cq->shadow_area, 0, &temp_val);
++ arm_seq_num = (__u8)FIELD_GET(ZXDH_CQ_DBSA_ARM_SEQ_NUM, temp_val);
++ arm_seq_num++;
++ sw_cq_sel = (__u16)FIELD_GET(ZXDH_CQ_DBSA_SW_CQ_SELECT, temp_val);
++ cqe_index = (__u32)FIELD_GET(ZXDH_CQ_DBSA_CQEIDX, temp_val);
+
-+struct zxdh_uvcontext_ops {
-+ int (*modify_qp_udp_sport)(struct ibv_context *ibctx,
-+ uint16_t udp_sport, uint32_t qpn);
-+ int (*set_log_trace_switch)(struct ibv_context *ibctx,
-+ uint8_t switch_status);
-+ int (*get_log_trace_switch)(struct ibv_context *ibctx,
-+ uint8_t *switch_status);
-+ int (*query_qpc)(struct ibv_qp *qp, struct zxdh_rdma_qpc *qpc);
-+ int (*modify_qpc)(struct ibv_qp *qp, struct zxdh_rdma_qpc *qpc,
-+ uint64_t qpc_mask);
-+ int (*reset_qp)(struct ibv_qp *qp, uint64_t opcode);
-+};
++ if (cq_notify == ZXDH_CQ_COMPL_SOLICITED)
++ arm_next = 1;
++ temp_val = FIELD_PREP(ZXDH_CQ_DBSA_ARM_SEQ_NUM, arm_seq_num) |
++ FIELD_PREP(ZXDH_CQ_DBSA_SW_CQ_SELECT, sw_cq_sel) |
++ FIELD_PREP(ZXDH_CQ_DBSA_ARM_NEXT, arm_next) |
++ FIELD_PREP(ZXDH_CQ_DBSA_CQEIDX, cqe_index);
+
-+void add_private_ops(struct zxdh_uvcontext *iwvctx);
++ set_64bit_val(cq->shadow_area, 0, temp_val);
+
-+#endif
-diff --git a/providers/zrdma/status.h b/providers/zrdma/status.h
-new file mode 100644
-index 0000000..d9e9f04
---- /dev/null
-+++ b/providers/zrdma/status.h
-@@ -0,0 +1,75 @@
-+/* SPDX-License-Identifier: GPL-2.0 or Linux-OpenIB */
-+/* Copyright (c) 2024 ZTE Corporation. All rights reserved. */
-+#ifndef ZXDH_STATUS_H
-+#define ZXDH_STATUS_H
++ hdr = FIELD_PREP(ZXDH_CQ_ARM_DBSA_VLD, 0) |
++ FIELD_PREP(ZXDH_CQ_ARM_CQ_ID, cq->cq_id);
+
-+/* Error Codes */
-+enum zxdh_status_code {
-+ ZXDH_SUCCESS = 0,
-+ ZXDH_ERR_NVM = -1,
-+ ZXDH_ERR_NVM_CHECKSUM = -2,
-+ ZXDH_ERR_CFG = -4,
-+ ZXDH_ERR_PARAM = -5,
-+ ZXDH_ERR_DEVICE_NOT_SUPPORTED = -6,
-+ ZXDH_ERR_RESET_FAILED = -7,
-+ ZXDH_ERR_SWFW_SYNC = -8,
-+ ZXDH_ERR_NO_MEMORY = -9,
-+ ZXDH_ERR_BAD_PTR = -10,
-+ ZXDH_ERR_INVALID_PD_ID = -11,
-+ ZXDH_ERR_INVALID_QP_ID = -12,
-+ ZXDH_ERR_INVALID_CQ_ID = -13,
-+ ZXDH_ERR_INVALID_CEQ_ID = -14,
-+ ZXDH_ERR_INVALID_AEQ_ID = -15,
-+ ZXDH_ERR_INVALID_SIZE = -16,
-+ ZXDH_ERR_INVALID_ARP_INDEX = -17,
-+ ZXDH_ERR_INVALID_FPM_FUNC_ID = -18,
-+ ZXDH_ERR_QP_INVALID_MSG_SIZE = -19,
-+ ZXDH_ERR_QP_TOOMANY_WRS_POSTED = -20,
-+ ZXDH_ERR_INVALID_FRAG_COUNT = -21,
-+ ZXDH_ERR_Q_EMPTY = -22,
-+ ZXDH_ERR_INVALID_ALIGNMENT = -23,
-+ ZXDH_ERR_FLUSHED_Q = -24,
-+ ZXDH_ERR_INVALID_PUSH_PAGE_INDEX = -25,
-+ ZXDH_ERR_INVALID_INLINE_DATA_SIZE = -26,
-+ ZXDH_ERR_TIMEOUT = -27,
-+ ZXDH_ERR_OPCODE_MISMATCH = -28,
-+ ZXDH_ERR_CQP_COMPL_ERROR = -29,
-+ ZXDH_ERR_INVALID_VF_ID = -30,
-+ ZXDH_ERR_INVALID_HMCFN_ID = -31,
-+ ZXDH_ERR_BACKING_PAGE_ERROR = -32,
-+ ZXDH_ERR_NO_PBLCHUNKS_AVAILABLE = -33,
-+ ZXDH_ERR_INVALID_PBLE_INDEX = -34,
-+ ZXDH_ERR_INVALID_SD_INDEX = -35,
-+ ZXDH_ERR_INVALID_PAGE_DESC_INDEX = -36,
-+ ZXDH_ERR_INVALID_SD_TYPE = -37,
-+ ZXDH_ERR_MEMCPY_FAILED = -38,
-+ ZXDH_ERR_INVALID_HMC_OBJ_INDEX = -39,
-+ ZXDH_ERR_INVALID_HMC_OBJ_COUNT = -40,
-+ ZXDH_ERR_BUF_TOO_SHORT = -43,
-+ ZXDH_ERR_BAD_IWARP_CQE = -44,
-+ ZXDH_ERR_NVM_BLANK_MODE = -45,
-+ ZXDH_ERR_NOT_IMPL = -46,
-+ ZXDH_ERR_PE_DOORBELL_NOT_ENA = -47,
-+ ZXDH_ERR_NOT_READY = -48,
-+ ZXDH_NOT_SUPPORTED = -49,
-+ ZXDH_ERR_FIRMWARE_API_VER = -50,
-+ ZXDH_ERR_RING_FULL = -51,
-+ ZXDH_ERR_MPA_CRC = -61,
-+ ZXDH_ERR_NO_TXBUFS = -62,
-+ ZXDH_ERR_SEQ_NUM = -63,
-+ ZXDH_ERR_LIST_EMPTY = -64,
-+ ZXDH_ERR_INVALID_MAC_ADDR = -65,
-+ ZXDH_ERR_BAD_STAG = -66,
-+ ZXDH_ERR_CQ_COMPL_ERROR = -67,
-+ ZXDH_ERR_Q_DESTROYED = -68,
-+ ZXDH_ERR_INVALID_FEAT_CNT = -69,
-+ ZXDH_ERR_REG_CQ_FULL = -70,
-+ ZXDH_ERR_VF_MSG_ERROR = -71,
-+ ZXDH_ERR_NO_INTR = -72,
-+ ZXDH_ERR_REG_QSET = -73,
-+ ZXDH_ERR_FEATURES_OP = -74,
-+ ZXDH_ERR_INVALID_FRAG_LEN = -75,
-+ ZXDH_ERR_RETRY_ACK_ERR = -76,
-+ ZXDH_ERR_RETRY_ACK_NOT_EXCEED_ERR = -77,
-+};
-+#endif /* ZXDH_STATUS_H */
-diff --git a/providers/zrdma/uk.c b/providers/zrdma/uk.c
-new file mode 100644
-index 0000000..fbf8348
---- /dev/null
-+++ b/providers/zrdma/uk.c
-@@ -0,0 +1,2616 @@
-+// SPDX-License-Identifier: GPL-2.0 or Linux-OpenIB
-+/* Copyright (c) 2024 ZTE Corporation. All rights reserved. */
-+#include "osdep.h"
-+#include "status.h"
-+#include "defs.h"
-+#include "user.h"
-+#include "zxdh.h"
-+#include "umain.h"
-+#include
-+#include
-+#include "private_verbs_cmd.h"
-+#include
-+#include
-+#include
-+#define ERROR_CODE_VALUE 65
++ udma_to_device_barrier(); /* make sure WQE is populated before valid bit is set */
+
-+static void qp_tx_psn_add(__u32 *x, __u32 y, __u16 mtu)
-+{
-+ if (y == 0) {
-+ *x = (*x + 1) & 0xffffff;
-+ return;
-+ }
-+ *x = (*x + ((y % mtu) ? (y / mtu + 1) : y / mtu)) & 0xffffff;
++ db_wr32(hdr, cq->cqe_alloc_db);
+}
+
-+static int zxdh_get_write_imm_split_switch(void)
++static inline void build_comp_status(__u32 cq_type,
++ struct zxdh_cq_poll_info *info)
+{
-+ char *env;
-+ int zxdh_write_imm_split_switch;
-+
-+ env = getenv("ZXDH_WRITE_IMM_SPILT_ENABLE");
-+ zxdh_write_imm_split_switch = (env != NULL) ? atoi(env) : 0;
-+ return zxdh_write_imm_split_switch;
++ if (!info->error) {
++ info->comp_status = ZXDH_COMPL_STATUS_SUCCESS;
++ if (cq_type == ZXDH_CQE_QTYPE_RQ) {
++ if (info->major_err != ERROR_CODE_VALUE &&
++ info->minor_err != ERROR_CODE_VALUE) {
++ info->comp_status = ZXDH_COMPL_STATUS_UNKNOWN;
++ }
++ }
++ return;
++ }
++ if (info->major_err == ZXDH_RETRY_ACK_MAJOR_ERR &&
++ info->minor_err == ZXDH_RETRY_ACK_MINOR_ERR) {
++ info->comp_status = ZXDH_COMPL_STATUS_RETRY_ACK_ERR;
++ return;
++ }
++ if (info->major_err == ZXDH_RETRY_ACK_MAJOR_ERR &&
++ info->minor_err == ZXDH_TX_WINDOW_QUERY_ITEM_MINOR_ERR) {
++ info->comp_status = ZXDH_COMPL_STATUS_TX_WINDOW_QUERY_ITEM_ERR;
++ return;
++ }
++ info->comp_status = (info->major_err == ZXDH_FLUSH_MAJOR_ERR) ?
++ ZXDH_COMPL_STATUS_FLUSHED :
++ ZXDH_COMPL_STATUS_UNKNOWN;
+}
+
-+/**
-+ * zxdh_set_fragment - set fragment in wqe
-+ * @wqe: wqe for setting fragment
-+ * @offset: offset value
-+ * @sge: sge length and stag
-+ * @valid: The wqe valid
-+ */
-+static void zxdh_set_fragment(__le64 *wqe, __u32 offset, struct zxdh_sge *sge,
-+ __u8 valid)
++__le64 *get_current_cqe(struct zxdh_cq *cq)
+{
-+ if (sge) {
-+ set_64bit_val(wqe, offset + 8,
-+ FIELD_PREP(ZXDHQPSQ_FRAG_TO, sge->tag_off));
-+ set_64bit_val(wqe, offset,
-+ FIELD_PREP(ZXDHQPSQ_VALID, valid) |
-+ FIELD_PREP(ZXDHQPSQ_FRAG_LEN, sge->len) |
-+ FIELD_PREP(ZXDHQPSQ_FRAG_STAG,
-+ sge->stag));
-+ } else {
-+ set_64bit_val(wqe, offset + 8, 0);
-+ set_64bit_val(wqe, offset, FIELD_PREP(ZXDHQPSQ_VALID, valid));
-+ }
++ return ZXDH_GET_CURRENT_EXTENDED_CQ_ELEM(cq);
+}
+
-+/**
-+ * zxdh_nop_1 - insert a NOP wqe
-+ * @qp: hw qp ptr
-+ */
-+static enum zxdh_status_code zxdh_nop_1(struct zxdh_qp_uk *qp)
++static inline void zxdh_get_cq_poll_info(struct zxdh_qp *qp,
++ struct zxdh_cq_poll_info *info,
++ __u64 qword2, __u64 qword3)
+{
-+ __u64 hdr;
-+ __le64 *wqe;
-+ __u32 wqe_idx;
-+ bool signaled = false;
-+
-+ if (!qp->sq_ring.head)
-+ return ZXDH_ERR_PARAM;
-+
-+ wqe_idx = ZXDH_RING_CURRENT_HEAD(qp->sq_ring);
-+ wqe = qp->sq_base[wqe_idx].elem;
-+
-+ qp->sq_wrtrk_array[wqe_idx].quanta = ZXDH_QP_WQE_MIN_QUANTA;
++ __u8 qp_type;
+
-+ set_64bit_val(wqe, 8, 0);
-+ set_64bit_val(wqe, 16, 0);
-+ set_64bit_val(wqe, 24, 0);
++ qp_type = qp->qp_type;
+
-+ hdr = FIELD_PREP(ZXDHQPSQ_OPCODE, ZXDH_OP_TYPE_NOP) |
-+ FIELD_PREP(ZXDHQPSQ_SIGCOMPL, signaled) |
-+ FIELD_PREP(ZXDHQPSQ_VALID, qp->swqe_polarity);
++ info->imm_valid = (bool)FIELD_GET(ZXDH_CQ_IMMVALID, qword2);
++ if (info->imm_valid) {
++ info->imm_data = (__u32)FIELD_GET(ZXDH_CQ_IMMDATA, qword3);
++ info->op_type = ZXDH_OP_TYPE_REC_IMM;
++ } else {
++ info->op_type = ZXDH_OP_TYPE_REC;
++ }
+
-+ /* make sure WQE is written before valid bit is set */
-+ udma_to_device_barrier();
++ info->bytes_xfered = (__u32)FIELD_GET(ZXDHCQ_PAYLDLEN, qword3);
+
-+ set_64bit_val(wqe, 0, hdr);
++ if (likely(qp_type == ZXDH_QP_TYPE_ROCE_RC)) {
++ if (qword2 & ZXDHCQ_STAG) {
++ info->stag_invalid_set = true;
++ info->inv_stag =
++ (__u32)FIELD_GET(ZXDHCQ_INVSTAG, qword2);
++ } else {
++ info->stag_invalid_set = false;
++ }
++ } else if (qp_type == ZXDH_QP_TYPE_ROCE_UD) {
++ info->ipv4 = (bool)FIELD_GET(ZXDHCQ_IPV4, qword2);
++ info->ud_src_qpn = (__u32)FIELD_GET(ZXDHCQ_UDSRCQPN, qword2);
++ }
++}
+
-+ return 0;
++static void update_cq_poll_info(struct zxdh_qp *qp,
++ struct zxdh_cq_poll_info *info, __u32 wqe_idx,
++ __u64 qword0)
++{
++ info->wr_id = qp->sq_wrtrk_array[wqe_idx].wrid;
++ if (!info->comp_status)
++ info->bytes_xfered = qp->sq_wrtrk_array[wqe_idx].wr_len;
++ info->op_type = (__u8)FIELD_GET(ZXDHCQ_OP, qword0);
++ ZXDH_RING_SET_TAIL(qp->sq_ring,
++ wqe_idx + qp->sq_wrtrk_array[wqe_idx].quanta);
+}
+
-+/**
-+ * zxdh_clr_wqes - clear next 128 sq entries
-+ * @qp: hw qp ptr
-+ * @qp_wqe_idx: wqe_idx
-+ */
-+void zxdh_clr_wqes(struct zxdh_qp_uk *qp, __u32 qp_wqe_idx)
++static enum zxdh_status_code
++process_tx_window_query_item_err(struct zxdh_qp *qp,
++ struct zxdh_cq_poll_info *info)
+{
-+ __le64 *wqe;
-+ __u32 wqe_idx;
++ int ret;
++ struct ibv_qp *ib_qp;
++ struct zxdh_uqp *iwuqp;
++ struct zxdh_rdma_qpc qpc = { 0 };
+
-+ if (!(qp_wqe_idx & 0x7F)) {
-+ wqe_idx = (qp_wqe_idx + 128) % qp->sq_ring.size;
-+ wqe = qp->sq_base[wqe_idx].elem;
-+ if (wqe_idx)
-+ memset(wqe, qp->swqe_polarity ? 0 : 0xFF, 0x1000);
-+ else
-+ memset(wqe, qp->swqe_polarity ? 0xFF : 0, 0x1000);
++ iwuqp = container_of(qp, struct zxdh_uqp, qp);
++ ib_qp = &iwuqp->vqp.qp;
++ ret = zxdh_query_qpc(ib_qp, &qpc);
++ if (ret) {
++ verbs_err(verbs_get_ctx(ib_qp->context),
++ "process tx window query item query qpc failed:%d\n",
++ ret);
++ return ZXDH_ERR_RETRY_ACK_ERR;
+ }
-+}
++ if (qpc.tx_last_ack_psn != qp->qp_last_ack_qsn)
++ qp->qp_reset_cnt = 0;
+
-+/**
-+ * zxdh_uk_qp_post_wr - ring doorbell
-+ * @qp: hw qp ptr
-+ */
-+void zxdh_uk_qp_post_wr(struct zxdh_qp_uk *qp)
-+{
-+ /* valid bit is written before ringing doorbell */
-+ udma_to_device_barrier();
++ qp->qp_last_ack_qsn = qpc.tx_last_ack_psn;
++ if (qp->qp_reset_cnt >= ZXDH_QP_RETRY_COUNT)
++ return ZXDH_ERR_RETRY_ACK_ERR;
+
-+ db_wr32(qp->qp_id, qp->wqe_alloc_db);
-+ qp->initial_ring.head = qp->sq_ring.head;
++ ret = zxdh_reset_qp(ib_qp, ZXDH_RESET_RETRY_TX_ITEM_FLAG);
++ if (ret) {
++ verbs_err(verbs_get_ctx(ib_qp->context),
++ "process tx window query item reset qp failed:%d\n",
++ ret);
++ return ZXDH_ERR_RETRY_ACK_ERR;
++ }
++ qp->qp_reset_cnt++;
++ return ZXDH_ERR_RETRY_ACK_NOT_EXCEED_ERR;
+}
+
-+/**
-+ * zxdh_uk_qp_set_shadow_area - fill SW_RQ_Head
-+ * @qp: hw qp ptr
-+ */
-+void zxdh_uk_qp_set_shadow_area(struct zxdh_qp_uk *qp)
++static enum zxdh_status_code
++process_retry_ack_err(struct zxdh_qp *qp, struct zxdh_cq_poll_info *info)
+{
-+ __u8 polarity = 0;
-+
-+ polarity = ((ZXDH_RING_CURRENT_HEAD(qp->rq_ring) == 0) ?
-+ !qp->rwqe_polarity :
-+ qp->rwqe_polarity);
-+ set_64bit_val(qp->shadow_area, 0,
-+ FIELD_PREP(ZXDHQPDBSA_RQ_POLARITY, polarity) |
-+ FIELD_PREP(ZXDHQPDBSA_RQ_SW_HEAD,
-+ ZXDH_RING_CURRENT_HEAD(qp->rq_ring)));
-+}
++ int ret;
++ struct ibv_qp *ib_qp;
++ struct zxdh_uqp *iwuqp;
++ struct zxdh_rdma_qpc qpc = { 0 };
++ struct zxdh_rdma_qpc qpc_req_cmd = { 0 };
+
-+/**
-+ * zxdh_qp_ring_push_db - ring qp doorbell
-+ * @qp: hw qp ptr
-+ * @wqe_idx: wqe index
-+ */
-+static void zxdh_qp_ring_push_db(struct zxdh_qp_uk *qp, __u32 wqe_idx)
-+{
-+ set_32bit_val(qp->push_db, 0,
-+ FIELD_PREP(ZXDH_WQEALLOC_WQE_DESC_INDEX, wqe_idx >> 3) |
-+ qp->qp_id);
-+ qp->initial_ring.head = qp->sq_ring.head;
-+ qp->push_mode = true;
-+ qp->push_dropped = false;
-+}
++ iwuqp = container_of(qp, struct zxdh_uqp, qp);
+
-+void zxdh_qp_push_wqe(struct zxdh_qp_uk *qp, __le64 *wqe, __u16 quanta,
-+ __u32 wqe_idx, bool post_sq)
-+{
-+ __le64 *push;
++ ib_qp = &iwuqp->vqp.qp;
++ ret = zxdh_query_qpc(ib_qp, &qpc);
++ if (ret) {
++ verbs_err(verbs_get_ctx(ib_qp->context),
++ "process retry ack query qpc failed:%d\n", ret);
++ return ZXDH_ERR_RETRY_ACK_ERR;
++ }
++ if (!(qpc.retry_cqe_sq_opcode >= ZXDH_RETRY_CQE_SQ_OPCODE_ERR &&
++ (qpc.recv_err_flag == ZXDH_RECV_ERR_FLAG_NAK_RNR_NAK ||
++ qpc.recv_err_flag == ZXDH_RECV_ERR_FLAG_READ_RESP))) {
++ return ZXDH_ERR_RETRY_ACK_ERR;
++ }
++ if (qpc.tx_last_ack_psn != qp->cqe_last_ack_qsn)
++ qp->cqe_retry_cnt = 0;
+
-+ if (ZXDH_RING_CURRENT_HEAD(qp->initial_ring) !=
-+ ZXDH_RING_CURRENT_TAIL(qp->sq_ring) &&
-+ !qp->push_mode) {
-+ if (post_sq)
-+ zxdh_uk_qp_post_wr(qp);
-+ } else {
-+ push = (__le64 *)((uintptr_t)qp->push_wqe +
-+ (wqe_idx & 0x7) * 0x20);
-+ memcpy(push, wqe, quanta * ZXDH_QP_WQE_MIN_SIZE);
-+ zxdh_qp_ring_push_db(qp, wqe_idx);
++ qp->cqe_last_ack_qsn = qpc.tx_last_ack_psn;
++ if (qp->cqe_retry_cnt >= ZXDH_QP_RETRY_COUNT)
++ return ZXDH_ERR_RETRY_ACK_ERR;
++
++ memcpy(&qpc_req_cmd, &qpc, sizeof(qpc));
++ qpc_req_cmd.package_err_flag = 0;
++ qpc_req_cmd.ack_err_flag = 0;
++ qpc_req_cmd.err_flag = 0;
++ qpc_req_cmd.retry_cqe_sq_opcode &= ZXDH_RESET_RETRY_CQE_SQ_OPCODE_ERR;
++ qpc_req_cmd.cur_retry_count = qpc.retry_count;
++ ret = zxdh_modify_qpc(ib_qp, &qpc_req_cmd,
++ ZXDH_PACKAGE_ERR_FLAG | ZXDH_ERR_FLAG_SET |
++ ZXDH_RETRY_CQE_SQ_OPCODE |
++ ZXDH_TX_READ_RETRY_FLAG_SET);
++ if (ret) {
++ verbs_err(verbs_get_ctx(ib_qp->context),
++ "process retry ack modify qpc failed:%d\n", ret);
++ return ZXDH_ERR_RETRY_ACK_ERR;
+ }
++ qp->cqe_retry_cnt++;
++ return ZXDH_ERR_RETRY_ACK_NOT_EXCEED_ERR;
+}
+
+/**
-+ * zxdh_qp_get_next_send_wqe - pad with NOP if needed, return where next WR should go
-+ * @qp: hw qp ptr
-+ * @wqe_idx: return wqe index
-+ * @quanta: size of WR in quanta
-+ * @total_size: size of WR in bytes
-+ * @info: info on WR
++ * zxdh_cq_poll_cmpl - get cq completion info
++ * @cq: hw cq
++ * @info: cq poll information returned
+ */
-+__le64 *zxdh_qp_get_next_send_wqe(struct zxdh_qp_uk *qp, __u32 *wqe_idx,
-+ __u16 quanta, __u32 total_size,
-+ struct zxdh_post_sq_info *info)
++enum zxdh_status_code zxdh_cq_poll_cmpl(struct zxdh_cq *cq,
++ struct zxdh_cq_poll_info *info)
+{
-+ __le64 *wqe;
-+ __u16 avail_quanta;
-+ __u16 i;
++ enum zxdh_status_code status_code;
++ __u64 comp_ctx, qword0, qword2, qword3;
++ __le64 *cqe;
++ struct zxdh_qp *qp;
++ struct zxdh_ring *pring = NULL;
++ __u32 wqe_idx, q_type;
++ int ret_code;
++ bool move_cq_head = true;
++ __u8 polarity;
++ struct zxdh_usrq *iwusrq = NULL;
++ struct zxdh_srq *srq = NULL;
++ struct zxdh_uqp *iwuqp;
+
-+ avail_quanta = ZXDH_MAX_SQ_WQES_PER_PAGE -
-+ (ZXDH_RING_CURRENT_HEAD(qp->sq_ring) %
-+ ZXDH_MAX_SQ_WQES_PER_PAGE);
-+ if (quanta <= avail_quanta) {
-+ /* WR fits in current chunk */
-+ if (quanta > ZXDH_SQ_RING_FREE_QUANTA(qp->sq_ring))
-+ return NULL;
-+ } else {
-+ /* Need to pad with NOP */
-+ if (quanta + avail_quanta >
-+ ZXDH_SQ_RING_FREE_QUANTA(qp->sq_ring))
-+ return NULL;
++ cqe = get_current_cqe(cq);
+
-+ for (i = 0; i < avail_quanta; i++) {
-+ zxdh_nop_1(qp);
-+ ZXDH_RING_MOVE_HEAD_NOCHECK(qp->sq_ring);
-+ }
++ get_64bit_val(cqe, 0, &qword0);
++ polarity = (__u8)FIELD_GET(ZXDH_CQ_VALID, qword0);
++ if (polarity != cq->polarity)
++ return ZXDH_ERR_Q_EMPTY;
++
++ /* Ensure CQE contents are read after valid bit is checked */
++ udma_from_device_barrier();
++ get_64bit_val(cqe, 8, &comp_ctx);
++ get_64bit_val(cqe, 16, &qword2);
++ get_64bit_val(cqe, 24, &qword3);
++
++ qp = (struct zxdh_qp *)(unsigned long)comp_ctx;
++ if (unlikely(!qp || qp->destroy_pending)) {
++ ret_code = ZXDH_ERR_Q_DESTROYED;
++ goto exit;
++ }
++ iwuqp = container_of(qp, struct zxdh_uqp, qp);
++ info->qp_handle = (zxdh_qp_handle)(unsigned long)qp;
++ q_type = (__u8)FIELD_GET(ZXDH_CQ_SQ, qword0);
++ info->solicited_event = (bool)FIELD_GET(ZXDHCQ_SOEVENT, qword0);
++ wqe_idx = (__u32)FIELD_GET(ZXDH_CQ_WQEIDX, qword0);
++ info->error = (bool)FIELD_GET(ZXDH_CQ_ERROR, qword0);
++ info->major_err = FIELD_GET(ZXDH_CQ_MAJERR, qword0);
++ info->minor_err = FIELD_GET(ZXDH_CQ_MINERR, qword0);
++
++ /* Set the min error to standard flush error code for remaining cqes */
++ if (unlikely(info->error && info->major_err == ZXDH_FLUSH_MAJOR_ERR &&
++ info->minor_err != FLUSH_GENERAL_ERR)) {
++ qword0 &= ~ZXDH_CQ_MINERR;
++ qword0 |= FIELD_PREP(ZXDH_CQ_MINERR, FLUSH_GENERAL_ERR);
++ set_64bit_val(cqe, 0, qword0);
+ }
++ build_comp_status(q_type, info);
+
-+ *wqe_idx = ZXDH_RING_CURRENT_HEAD(qp->sq_ring);
-+ if (!*wqe_idx)
-+ qp->swqe_polarity = !qp->swqe_polarity;
++ info->qp_id = (__u32)FIELD_GET(ZXDHCQ_QPID, qword2);
++ info->imm_valid = false;
+
-+ ZXDH_RING_MOVE_HEAD_BY_COUNT_NOCHECK(qp->sq_ring, quanta);
++ info->qp_handle = (zxdh_qp_handle)(unsigned long)qp;
++ switch (q_type) {
++ case ZXDH_CQE_QTYPE_RQ:
++ if (qp->is_srq) {
++ iwusrq = iwuqp->srq;
++ srq = &iwusrq->srq;
++ zxdh_free_srq_wqe(srq, wqe_idx);
++ info->wr_id = srq->srq_wrid_array[wqe_idx];
++ zxdh_get_cq_poll_info(qp, info, qword2, qword3);
++ } else {
++ if (unlikely(info->comp_status ==
++ ZXDH_COMPL_STATUS_FLUSHED ||
++ info->comp_status ==
++ ZXDH_COMPL_STATUS_UNKNOWN)) {
++ if (!ZXDH_RING_MORE_WORK(qp->rq_ring)) {
++ ret_code = ZXDH_ERR_Q_EMPTY;
++ goto exit;
++ }
++ wqe_idx = qp->rq_ring.tail;
++ }
++ info->wr_id = qp->rq_wrid_array[wqe_idx];
++ zxdh_get_cq_poll_info(qp, info, qword2, qword3);
++ ZXDH_RING_SET_TAIL(qp->rq_ring, wqe_idx + 1);
++ if (info->comp_status == ZXDH_COMPL_STATUS_FLUSHED) {
++ qp->rq_flush_seen = true;
++ if (!ZXDH_RING_MORE_WORK(qp->rq_ring))
++ qp->rq_flush_complete = true;
++ else
++ move_cq_head = false;
++ }
++ pring = &qp->rq_ring;
++ }
++ ret_code = ZXDH_SUCCESS;
++ break;
++ case ZXDH_CQE_QTYPE_SQ:
++ if (info->comp_status == ZXDH_COMPL_STATUS_RETRY_ACK_ERR &&
++ qp->qp_type == ZXDH_QP_TYPE_ROCE_RC) {
++ status_code = process_retry_ack_err(qp, info);
++ if (status_code == ZXDH_ERR_RETRY_ACK_ERR) {
++ update_cq_poll_info(qp, info, wqe_idx, qword0);
++ ret_code = ZXDH_SUCCESS;
++ } else {
++ ret_code = status_code;
++ }
++ } else if (info->comp_status ==
++ ZXDH_COMPL_STATUS_TX_WINDOW_QUERY_ITEM_ERR &&
++ qp->qp_type == ZXDH_QP_TYPE_ROCE_RC) {
++ status_code =
++ process_tx_window_query_item_err(qp, info);
++ if (status_code == ZXDH_ERR_RETRY_ACK_ERR) {
++ update_cq_poll_info(qp, info, wqe_idx, qword0);
++ ret_code = ZXDH_SUCCESS;
++ } else {
++ ret_code = status_code;
++ }
++ } else if (info->comp_status == ZXDH_COMPL_STATUS_FLUSHED) {
++ info->wr_id = qp->sq_wrtrk_array[wqe_idx].wrid;
++ ZXDH_RING_INIT(qp->sq_ring, qp->sq_ring.size);
++ ret_code = ZXDH_SUCCESS;
++ } else {
++ update_cq_poll_info(qp, info, wqe_idx, qword0);
++ ret_code = ZXDH_SUCCESS;
++ }
++ break;
++ default:
++ zxdh_dbg(verbs_get_ctx(iwuqp->vqp.qp.context), ZXDH_DBG_CQ,
++ "zxdh get cqe type unknow!\n");
++ ret_code = ZXDH_ERR_Q_DESTROYED;
++ break;
++ }
++exit:
++ if (move_cq_head) {
++ __u64 cq_shadow_temp;
+
-+ wqe = qp->sq_base[*wqe_idx].elem;
-+ qp->sq_wrtrk_array[*wqe_idx].wrid = info->wr_id;
-+ qp->sq_wrtrk_array[*wqe_idx].wr_len = total_size;
-+ qp->sq_wrtrk_array[*wqe_idx].quanta = quanta;
++ ZXDH_RING_MOVE_HEAD_NOCHECK(cq->cq_ring);
++ if (!ZXDH_RING_CURRENT_HEAD(cq->cq_ring))
++ cq->polarity ^= 1;
+
-+ return wqe;
++ ZXDH_RING_MOVE_TAIL(cq->cq_ring);
++ cq->cqe_rd_cnt++;
++ get_64bit_val(cq->shadow_area, 0, &cq_shadow_temp);
++ cq_shadow_temp &= ~ZXDH_CQ_DBSA_CQEIDX;
++ cq_shadow_temp |=
++ FIELD_PREP(ZXDH_CQ_DBSA_CQEIDX, cq->cqe_rd_cnt);
++ set_64bit_val(cq->shadow_area, 0, cq_shadow_temp);
++ } else {
++ qword0 &= ~ZXDH_CQ_WQEIDX;
++ qword0 |= FIELD_PREP(ZXDH_CQ_WQEIDX, pring->tail);
++ set_64bit_val(cqe, 0, qword0);
++ }
++
++ return ret_code;
+}
+
+/**
-+ * zxdh_qp_get_next_recv_wqe - get next qp's rcv wqe
-+ * @qp: hw qp ptr
-+ * @wqe_idx: return wqe index
++ * zxdh_qp_round_up - return round up qp wq depth
++ * @wqdepth: wq depth in quanta to round up
+ */
-+__le64 *zxdh_qp_get_next_recv_wqe(struct zxdh_qp_uk *qp, __u32 *wqe_idx)
++int zxdh_qp_round_up(__u32 wqdepth)
+{
-+ __le64 *wqe;
-+ enum zxdh_status_code ret_code;
++ int scount = 1;
+
-+ if (ZXDH_RING_FULL_ERR(qp->rq_ring))
-+ return NULL;
++ for (wqdepth--; scount <= 16; scount *= 2)
++ wqdepth |= wqdepth >> scount;
+
-+ ZXDH_ATOMIC_RING_MOVE_HEAD(qp->rq_ring, *wqe_idx, ret_code);
-+ if (ret_code)
-+ return NULL;
++ return ++wqdepth;
++}
+
-+ if (!*wqe_idx)
-+ qp->rwqe_polarity = !qp->rwqe_polarity;
-+ /* rq_wqe_size_multiplier is no of 16 byte quanta in one rq wqe */
-+ wqe = qp->rq_base[*wqe_idx * qp->rq_wqe_size_multiplier].elem;
++/**
++ * zxdh_cq_round_up - return round up cq wq depth
++ * @wqdepth: wq depth in quanta to round up
++ */
++int zxdh_cq_round_up(__u32 wqdepth)
++{
++ int scount = 1;
+
-+ return wqe;
++ for (wqdepth--; scount <= 16; scount *= 2)
++ wqdepth |= wqdepth >> scount;
++
++ return ++wqdepth;
+}
+
-+static enum zxdh_status_code
-+zxdh_post_rdma_write(struct zxdh_qp_uk *qp, struct zxdh_post_sq_info *info,
-+ bool post_sq, __u32 total_size)
++/**
++ * zxdh_get_rq_wqe_shift - get shift count for maximum rq wqe size
++ * @sge: Maximum Scatter Gather Elements wqe
++ * @shift: Returns the shift needed based on sge
++ *
++ * Shift can be used to left shift the rq wqe size based on number of SGEs.
++ * For 1 SGE, shift = 1 (wqe size of 2*16 bytes).
++ * For 2 or 3 SGEs, shift = 2 (wqe size of 4*16 bytes).
++ * For 4-7 SGE's Shift of 3.
++ * For 8-15 SGE's Shift of 4 otherwise (wqe size of 512 bytes).
++ */
++void zxdh_get_rq_wqe_shift(__u32 sge, __u8 *shift)
+{
-+ enum zxdh_status_code ret_code;
-+ struct zxdh_rdma_write *op_info;
-+ __u32 i, byte_off = 0;
-+ __u32 frag_cnt, addl_frag_cnt;
-+ __le64 *wqe;
-+ __u32 wqe_idx;
-+ __u16 quanta;
-+ __u64 hdr;
-+ bool read_fence = false;
-+ bool imm_data_flag;
-+
-+ op_info = &info->op.rdma_write;
-+ imm_data_flag = info->imm_data_valid ? 1 : 0;
-+ read_fence |= info->read_fence;
-+
-+ if (imm_data_flag)
-+ frag_cnt =
-+ op_info->num_lo_sges ? (op_info->num_lo_sges + 1) : 2;
++ *shift = 0; //16bytes RQE, need to confirm configuration
++ if (sge < 2)
++ *shift = 1;
++ else if (sge < 4)
++ *shift = 2;
++ else if (sge < 8)
++ *shift = 3;
++ else if (sge < 16)
++ *shift = 4;
+ else
-+ frag_cnt = op_info->num_lo_sges;
-+ addl_frag_cnt =
-+ op_info->num_lo_sges > 1 ? (op_info->num_lo_sges - 1) : 0;
-+
-+ ret_code = zxdh_fragcnt_to_quanta_sq(frag_cnt, &quanta);
-+ if (ret_code)
-+ return ret_code;
-+
-+ wqe = zxdh_qp_get_next_send_wqe(qp, &wqe_idx, quanta, total_size, info);
-+ if (!wqe)
-+ return ZXDH_ERR_QP_TOOMANY_WRS_POSTED;
++ *shift = 5;
++}
+
-+ zxdh_clr_wqes(qp, wqe_idx);
++/**
++ * zxdh_get_sq_wqe_shift - get shift count for maximum wqe size
++ * @sge: Maximum Scatter Gather Elements wqe
++ * @inline_data: Maximum inline data size
++ * @shift: Returns the shift needed based on sge
++ *
++ * Shift can be used to left shift the wqe size based on number of SGEs and inlind data size.
++ * To surport WR with imm_data,shift = 1 (wqe size of 2*32 bytes).
++ * For 2-7 SGEs or 24 < inline data <= 86, shift = 2 (wqe size of 4*32 bytes).
++ * Otherwise (wqe size of 256 bytes).
++ */
++void zxdh_get_sq_wqe_shift(__u32 sge, __u32 inline_data, __u8 *shift)
++{
++ *shift = 1;
+
-+ if (op_info->num_lo_sges) {
-+ set_64bit_val(
-+ wqe, 16,
-+ FIELD_PREP(ZXDHQPSQ_FIRST_FRAG_VALID,
-+ op_info->lo_sg_list->len ==
-+ ZXDH_MAX_SQ_PAYLOAD_SIZE ?
-+ 1 :
-+ 0) |
-+ FIELD_PREP(ZXDHQPSQ_FIRST_FRAG_LEN,
-+ op_info->lo_sg_list->len) |
-+ FIELD_PREP(ZXDHQPSQ_FIRST_FRAG_STAG,
-+ op_info->lo_sg_list->stag));
-+ set_64bit_val(wqe, 8,
-+ FIELD_PREP(ZXDHQPSQ_FRAG_TO,
-+ op_info->lo_sg_list->tag_off));
-+ } else {
-+ /*if zero sge,post a special sge with zero lenth*/
-+ set_64bit_val(wqe, 16,
-+ FIELD_PREP(ZXDHQPSQ_FIRST_FRAG_VALID, 0) |
-+ FIELD_PREP(ZXDHQPSQ_FIRST_FRAG_LEN, 0) |
-+ FIELD_PREP(ZXDHQPSQ_FIRST_FRAG_STAG,
-+ 0x100));
-+ set_64bit_val(wqe, 8, FIELD_PREP(ZXDHQPSQ_FRAG_TO, 0));
++ if (sge > 1 || inline_data > 24) {
++ if (sge < 8 && inline_data <= 86)
++ *shift = 2;
++ else
++ *shift = 3;
+ }
++}
+
-+ if (imm_data_flag) {
-+ byte_off = ZXDH_SQ_WQE_BYTESIZE + ZXDH_QP_FRAG_BYTESIZE;
-+ if (op_info->num_lo_sges > 1) {
-+ qp->wqe_ops.iw_set_fragment(wqe, byte_off,
-+ &op_info->lo_sg_list[1],
-+ qp->swqe_polarity);
-+ byte_off += ZXDH_QP_FRAG_BYTESIZE;
-+ }
-+ set_64bit_val(
-+ wqe, ZXDH_SQ_WQE_BYTESIZE,
-+ FIELD_PREP(ZXDHQPSQ_IMMDATA_VALID, qp->swqe_polarity) |
-+ FIELD_PREP(ZXDHQPSQ_IMMDATA, info->imm_data));
-+ i = 2;
-+ if (i < op_info->num_lo_sges) {
-+ for (byte_off = ZXDH_SQ_WQE_BYTESIZE +
-+ 2 * ZXDH_QP_FRAG_BYTESIZE;
-+ i < op_info->num_lo_sges; i += 2) {
-+ if (i == addl_frag_cnt) {
-+ qp->wqe_ops.iw_set_fragment(
-+ wqe, byte_off,
-+ &op_info->lo_sg_list[i],
-+ qp->swqe_polarity);
-+ byte_off += ZXDH_QP_FRAG_BYTESIZE;
-+ break;
-+ }
-+ byte_off += ZXDH_QP_FRAG_BYTESIZE;
-+ qp->wqe_ops.iw_set_fragment(
-+ wqe, byte_off,
-+ &op_info->lo_sg_list[i + 1],
-+ qp->swqe_polarity);
-+ byte_off -= ZXDH_QP_FRAG_BYTESIZE;
-+ qp->wqe_ops.iw_set_fragment(
-+ wqe, byte_off, &op_info->lo_sg_list[i],
-+ qp->swqe_polarity);
-+ byte_off += 2 * ZXDH_QP_FRAG_BYTESIZE;
-+ }
-+ }
-+ } else {
-+ i = 1;
-+ for (byte_off = ZXDH_SQ_WQE_BYTESIZE; i < op_info->num_lo_sges;
-+ i += 2) {
-+ if (i == addl_frag_cnt) {
-+ qp->wqe_ops.iw_set_fragment(
-+ wqe, byte_off, &op_info->lo_sg_list[i],
-+ qp->swqe_polarity);
-+ byte_off += ZXDH_QP_FRAG_BYTESIZE;
-+ break;
-+ }
-+ byte_off += ZXDH_QP_FRAG_BYTESIZE;
-+ qp->wqe_ops.iw_set_fragment(wqe, byte_off,
-+ &op_info->lo_sg_list[i + 1],
-+ qp->swqe_polarity);
-+ byte_off -= ZXDH_QP_FRAG_BYTESIZE;
-+ qp->wqe_ops.iw_set_fragment(wqe, byte_off,
-+ &op_info->lo_sg_list[i],
-+ qp->swqe_polarity);
-+ byte_off += 2 * ZXDH_QP_FRAG_BYTESIZE;
-+ }
-+ }
-+ /* if not an odd number set valid bit in next fragment */
-+ if (!(frag_cnt & 0x01) && frag_cnt) {
-+ qp->wqe_ops.iw_set_fragment(wqe, byte_off, NULL,
-+ qp->swqe_polarity);
-+ }
++/*
++ * zxdh_get_sqdepth - get SQ depth (quanta)
++ * @dev_attrs: qp HW attributes
++ * @sq_size: SQ size
++ * @shift: shift which determines size of WQE
++ * @sqdepth: depth of SQ
++ *
++ */
++enum zxdh_status_code zxdh_get_sqdepth(struct zxdh_dev_attrs *dev_attrs,
++ __u32 sq_size, __u8 shift,
++ __u32 *sqdepth)
++{
++ if (sq_size > ZXDH_MAX_SQ_DEPTH)
++ return ZXDH_ERR_INVALID_SIZE;
+
-+ hdr = FIELD_PREP(ZXDHQPSQ_VALID, qp->swqe_polarity) |
-+ FIELD_PREP(ZXDHQPSQ_OPCODE, info->op_type) |
-+ FIELD_PREP(ZXDHQPSQ_SIGCOMPL, info->signaled) |
-+ FIELD_PREP(ZXDHQPSQ_LOCALFENCE, info->local_fence) |
-+ FIELD_PREP(ZXDHQPSQ_READFENCE, read_fence) |
-+ FIELD_PREP(ZXDHQPSQ_SOLICITED, info->solicited) |
-+ FIELD_PREP(ZXDHQPSQ_IMMDATAFLAG, imm_data_flag) |
-+ FIELD_PREP(ZXDHQPSQ_ADDFRAGCNT, addl_frag_cnt) |
-+ FIELD_PREP(ZXDHQPSQ_REMSTAG, op_info->rem_addr.stag);
-+ set_64bit_val(wqe, 24,
-+ FIELD_PREP(ZXDHQPSQ_FRAG_TO, op_info->rem_addr.tag_off));
++ *sqdepth = zxdh_qp_round_up((sq_size << shift) + ZXDH_SQ_RSVD);
+
-+ udma_to_device_barrier(); /* make sure WQE is populated before valid bit is set */
++ if (*sqdepth < (ZXDH_QP_SW_MIN_WQSIZE << shift))
++ *sqdepth = ZXDH_QP_SW_MIN_WQSIZE << shift;
++ else if (*sqdepth > dev_attrs->max_hw_wq_quanta)
++ return ZXDH_ERR_INVALID_SIZE;
+
-+ set_64bit_val(wqe, 0, hdr);
-+ if (post_sq)
-+ zxdh_uk_qp_post_wr(qp);
-+ qp_tx_psn_add(&qp->next_psn, total_size, qp->mtu);
+ return 0;
+}
+
-+static void split_write_imm_wqe(struct zxdh_qp_uk *qp,
-+ struct zxdh_post_sq_info *info,
-+ struct zxdh_post_sq_info *split_part1_info,
-+ struct zxdh_post_sq_info *split_part2_info)
++/*
++ * zxdh_get_rqdepth - get RQ depth (quanta)
++ * @dev_attrs: qp HW attributes
++ * @rq_size: RQ size
++ * @shift: shift which determines size of WQE
++ * @rqdepth: depth of RQ
++ */
++enum zxdh_status_code zxdh_get_rqdepth(struct zxdh_dev_attrs *dev_attrs,
++ __u32 rq_size, __u8 shift,
++ __u32 *rqdepth)
+{
-+ __u32 total_size = 0;
-+ struct zxdh_rdma_write *op_info;
-+
-+ op_info = &info->op.rdma_write;
-+ total_size = op_info->rem_addr.len;
-+ split_part1_info->op.rdma_write.lo_sg_list =
-+ info->op.rdma_write.lo_sg_list;
-+ split_part2_info->op.rdma_write.lo_sg_list = NULL;
++ *rqdepth = zxdh_qp_round_up((rq_size << shift) + ZXDH_RQ_RSVD);
+
-+ split_part1_info->op_type = ZXDH_OP_TYPE_WRITE;
-+ split_part1_info->signaled = false;
-+ split_part1_info->local_fence = info->local_fence;
-+ split_part1_info->read_fence = info->read_fence;
-+ split_part1_info->solicited = info->solicited;
-+ split_part1_info->imm_data_valid = false;
-+ split_part1_info->wr_id = info->wr_id;
-+ split_part1_info->op.rdma_write.num_lo_sges =
-+ info->op.rdma_write.num_lo_sges;
-+ split_part1_info->op.rdma_write.rem_addr.stag = op_info->rem_addr.stag;
-+ split_part1_info->op.rdma_write.rem_addr.tag_off =
-+ op_info->rem_addr.tag_off;
++ if (*rqdepth < (ZXDH_QP_SW_MIN_WQSIZE << shift))
++ *rqdepth = ZXDH_QP_SW_MIN_WQSIZE << shift;
++ else if (*rqdepth > dev_attrs->max_hw_rq_quanta)
++ return ZXDH_ERR_INVALID_SIZE;
+
-+ split_part2_info->op_type = info->op_type;
-+ split_part2_info->signaled = info->signaled;
-+ split_part2_info->local_fence = info->local_fence;
-+ split_part2_info->read_fence = info->read_fence;
-+ split_part2_info->solicited = info->solicited;
-+ split_part2_info->imm_data_valid = info->imm_data_valid;
-+ split_part2_info->wr_id = info->wr_id;
-+ split_part2_info->imm_data = info->imm_data;
-+ split_part2_info->op.rdma_write.num_lo_sges = 0;
-+ split_part2_info->op.rdma_write.rem_addr.stag = op_info->rem_addr.stag;
-+ split_part2_info->op.rdma_write.rem_addr.tag_off =
-+ op_info->rem_addr.tag_off + total_size;
++ return 0;
+}
+
-+/**
-+ * zxdh_uk_rdma_write - rdma write operation
-+ * @qp: hw qp ptr
-+ * @info: post sq information
-+ * @post_sq: flag to post sq
++static const struct zxdh_wqe_ops iw_wqe_ops = {
++ .iw_copy_inline_data = zxdh_copy_inline_data,
++ .iw_inline_data_size_to_quanta = zxdh_inline_data_size_to_quanta,
++ .iw_set_fragment = zxdh_set_fragment,
++ .iw_set_mw_bind_wqe = zxdh_set_mw_bind_wqe,
++};
++
++/**
++ * zxdh_qp_init - initialize shared qp
++ * @qp: hw qp (user and kernel)
++ * @info: qp initialization info
++ *
++ * initializes the vars used in both user and kernel mode.
++ * size of the wqe depends on numbers of max. fragements
++ * allowed. Then size of wqe * the number of wqes should be the
++ * amount of memory allocated for sq and rq.
+ */
-+enum zxdh_status_code zxdh_uk_rdma_write(struct zxdh_qp_uk *qp,
-+ struct zxdh_post_sq_info *info,
-+ bool post_sq)
++enum zxdh_status_code zxdh_qp_init(struct zxdh_qp *qp,
++ struct zxdh_qp_init_info *info)
+{
-+ struct zxdh_post_sq_info split_part1_info = { 0 };
-+ struct zxdh_post_sq_info split_part2_info = { 0 };
-+ struct zxdh_rdma_write *op_info;
-+ __u32 i;
-+ __u32 total_size = 0;
-+ enum zxdh_status_code ret_code;
-+ bool imm_data_flag = info->imm_data_valid ? 1 : 0;
-+ int zxdh_write_imm_split_switch;
++ enum zxdh_status_code ret_code = 0;
++ __u32 sq_ring_size;
++ __u8 sqshift, rqshift;
+
-+ op_info = &info->op.rdma_write;
-+ if (op_info->num_lo_sges > qp->max_sq_frag_cnt)
++ qp->dev_attrs = info->dev_attrs;
++ if (info->max_sq_frag_cnt > qp->dev_attrs->max_hw_wq_frags ||
++ info->max_rq_frag_cnt > qp->dev_attrs->max_hw_wq_frags)
+ return ZXDH_ERR_INVALID_FRAG_COUNT;
+
-+ for (i = 0; i < op_info->num_lo_sges; i++) {
-+ total_size += op_info->lo_sg_list[i].len;
-+ if (0 != i && 0 == op_info->lo_sg_list[i].len)
-+ return ZXDH_ERR_INVALID_FRAG_LEN;
-+ }
-+
-+ if (total_size > ZXDH_MAX_SQ_PAYLOAD_SIZE)
-+ return ZXDH_ERR_QP_INVALID_MSG_SIZE;
++ zxdh_get_rq_wqe_shift(info->max_rq_frag_cnt, &rqshift);
++ zxdh_get_sq_wqe_shift(info->max_sq_frag_cnt, info->max_inline_data,
++ &sqshift);
+
-+ zxdh_write_imm_split_switch = zxdh_get_write_imm_split_switch();
++ qp->qp_caps = info->qp_caps;
++ qp->sq_base = info->sq;
++ qp->rq_base = info->rq;
++ qp->qp_type = info->type;
++ qp->shadow_area = info->shadow_area;
++ set_64bit_val(qp->shadow_area, 0, 0x8000);
++ qp->sq_wrtrk_array = info->sq_wrtrk_array;
+
-+ op_info->rem_addr.len = total_size;
-+ if (zxdh_write_imm_split_switch == 0) {
-+ ret_code = zxdh_post_rdma_write(qp, info, post_sq, total_size);
-+ if (ret_code)
-+ return ret_code;
-+ } else {
-+ if (imm_data_flag && total_size > qp->mtu) {
-+ split_write_imm_wqe(qp, info, &split_part1_info,
-+ &split_part2_info);
++ qp->rq_wrid_array = info->rq_wrid_array;
++ qp->wqe_alloc_db = info->wqe_alloc_db;
++ qp->qp_id = info->qp_id;
++ qp->sq_size = info->sq_size;
++ qp->push_mode = false;
++ qp->max_sq_frag_cnt = info->max_sq_frag_cnt;
++ sq_ring_size = qp->sq_size << sqshift;
++ ZXDH_RING_INIT(qp->sq_ring, sq_ring_size);
++ ZXDH_RING_INIT(qp->initial_ring, sq_ring_size);
++ qp->swqe_polarity = 0;
++ qp->swqe_polarity_deferred = 1;
++ qp->rwqe_polarity = 0;
++ qp->rwqe_signature = 0;
++ qp->rq_size = info->rq_size;
++ qp->max_rq_frag_cnt = info->max_rq_frag_cnt;
++ qp->max_inline_data = (info->max_inline_data == 0) ?
++ ZXDH_MAX_INLINE_DATA_SIZE :
++ info->max_inline_data;
++ qp->rq_wqe_size = rqshift;
++ ZXDH_RING_INIT(qp->rq_ring, qp->rq_size);
++ qp->rq_wqe_size_multiplier = 1 << rqshift;
++ qp->wqe_ops = iw_wqe_ops;
++ return ret_code;
++}
+
-+ ret_code = zxdh_post_rdma_write(qp, &split_part1_info,
-+ post_sq, total_size);
-+ if (ret_code)
-+ return ret_code;
-+ ret_code = zxdh_post_rdma_write(qp, &split_part2_info,
-+ post_sq, 0);
-+ if (ret_code)
-+ return ret_code;
-+ } else {
-+ ret_code = zxdh_post_rdma_write(qp, info, post_sq,
-+ total_size);
-+ if (ret_code)
-+ return ret_code;
-+ }
-+ }
++/**
++ * zxdh_cq_init - initialize shared cq (user and kernel)
++ * @cq: hw cq
++ * @info: hw cq initialization info
++ */
++enum zxdh_status_code zxdh_cq_init(struct zxdh_cq *cq,
++ struct zxdh_cq_init_info *info)
++{
++ cq->cq_base = info->cq_base;
++ cq->cq_id = info->cq_id;
++ cq->cq_size = info->cq_size;
++ cq->cqe_alloc_db = info->cqe_alloc_db;
++ cq->cq_ack_db = info->cq_ack_db;
++ cq->shadow_area = info->shadow_area;
++ cq->cqe_size = info->cqe_size;
++ ZXDH_RING_INIT(cq->cq_ring, cq->cq_size);
++ cq->polarity = 1;
++ cq->cqe_rd_cnt = 0;
+
+ return 0;
+}
+
-+static void split_two_part_info(struct zxdh_qp_uk *qp,
-+ struct zxdh_post_sq_info *info, __u32 ori_psn,
-+ __u32 pre_cal_psn,
-+ struct zxdh_post_sq_info *split_part1_info,
-+ struct zxdh_post_sq_info *split_part2_info)
++/**
++ * zxdh_clean_cq - clean cq entries
++ * @q: completion context
++ * @cq: cq to clean
++ */
++void zxdh_clean_cq(void *q, struct zxdh_cq *cq)
+{
-+ __u32 total_size = 0;
-+ __u32 remain_size = 0;
-+ __u32 split_size = 0;
-+ struct zxdh_rdma_read *op_info;
-+
-+ op_info = &info->op.rdma_read;
-+ total_size = op_info->rem_addr.len;
-+ split_part1_info->op.rdma_read.lo_sg_list = qp->split_sg_list;
-+ split_part2_info->op.rdma_read.lo_sg_list =
-+ qp->split_sg_list + op_info->num_lo_sges;
-+
-+ memset(split_part1_info->op.rdma_read.lo_sg_list, 0,
-+ 2 * op_info->num_lo_sges * sizeof(struct zxdh_sge));
-+ if (pre_cal_psn < ori_psn && pre_cal_psn != 0)
-+ remain_size = (0xffffff - ori_psn + 1) * qp->mtu;
-+ else
-+ remain_size = (0x800000 - ori_psn) * qp->mtu;
-+
-+ split_size = total_size - remain_size;
-+
-+ split_part1_info->signaled = false;
-+ split_part1_info->local_fence = info->local_fence;
-+ split_part1_info->read_fence = info->read_fence;
-+ split_part1_info->solicited = false;
-+ split_part1_info->wr_id = info->wr_id;
-+ split_part1_info->op.rdma_read.rem_addr.stag = op_info->rem_addr.stag;
-+ split_part1_info->op.rdma_read.rem_addr.tag_off =
-+ op_info->rem_addr.tag_off;
++ __le64 *cqe;
++ __u64 qword3, comp_ctx;
++ __u32 cq_head;
++ __u8 polarity, temp;
+
-+ split_part2_info->signaled = info->signaled;
-+ split_part2_info->local_fence = info->local_fence;
-+ split_part2_info->read_fence = info->read_fence;
-+ split_part2_info->solicited = info->solicited;
-+ split_part2_info->wr_id = info->wr_id;
-+ split_part2_info->op.rdma_read.rem_addr.stag = op_info->rem_addr.stag;
-+ split_part2_info->op.rdma_read.rem_addr.tag_off =
-+ op_info->rem_addr.tag_off + remain_size;
++ cq_head = cq->cq_ring.head;
++ temp = cq->polarity;
++ do {
++ if (cq->cqe_size)
++ cqe = ((struct zxdh_extended_cqe
++ *)(cq->cq_base))[cq_head]
++ .buf;
++ else
++ cqe = cq->cq_base[cq_head].buf;
++ get_64bit_val(cqe, 24, &qword3);
++ polarity = (__u8)FIELD_GET(ZXDH_CQ_VALID, qword3);
+
-+ for (int i = 0; i < op_info->num_lo_sges; i++) {
-+ if (op_info->lo_sg_list[i].len +
-+ split_part1_info->op.rdma_read.rem_addr.len <
-+ remain_size) {
-+ split_part1_info->op.rdma_read.rem_addr.len +=
-+ op_info->lo_sg_list[i].len;
-+ split_part1_info->op.rdma_read.num_lo_sges += 1;
-+ memcpy(split_part1_info->op.rdma_read.lo_sg_list + i,
-+ op_info->lo_sg_list + i,
-+ sizeof(struct zxdh_sge));
-+ continue;
-+ } else if (op_info->lo_sg_list[i].len +
-+ split_part1_info->op.rdma_read.rem_addr.len ==
-+ remain_size) {
-+ split_part1_info->op.rdma_read.rem_addr.len +=
-+ op_info->lo_sg_list[i].len;
-+ split_part1_info->op.rdma_read.num_lo_sges += 1;
-+ memcpy(split_part1_info->op.rdma_read.lo_sg_list + i,
-+ op_info->lo_sg_list + i,
-+ sizeof(struct zxdh_sge));
-+ split_part2_info->op.rdma_read.rem_addr.len =
-+ split_size;
-+ split_part2_info->op.rdma_read.num_lo_sges =
-+ op_info->num_lo_sges -
-+ split_part1_info->op.rdma_read.num_lo_sges;
-+ memcpy(split_part2_info->op.rdma_read.lo_sg_list,
-+ op_info->lo_sg_list + i + 1,
-+ split_part2_info->op.rdma_read.num_lo_sges *
-+ sizeof(struct zxdh_sge));
++ if (polarity != temp)
+ break;
-+ }
+
-+ split_part1_info->op.rdma_read.lo_sg_list[i].len =
-+ remain_size -
-+ split_part1_info->op.rdma_read.rem_addr.len;
-+ split_part1_info->op.rdma_read.lo_sg_list[i].tag_off =
-+ op_info->lo_sg_list[i].tag_off;
-+ split_part1_info->op.rdma_read.lo_sg_list[i].stag =
-+ op_info->lo_sg_list[i].stag;
-+ split_part1_info->op.rdma_read.rem_addr.len = remain_size;
-+ split_part1_info->op.rdma_read.num_lo_sges += 1;
-+ split_part2_info->op.rdma_read.lo_sg_list[0].len =
-+ op_info->lo_sg_list[i].len -
-+ split_part1_info->op.rdma_read.lo_sg_list[i].len;
-+ split_part2_info->op.rdma_read.lo_sg_list[0].tag_off =
-+ op_info->lo_sg_list[i].tag_off +
-+ split_part1_info->op.rdma_read.lo_sg_list[i].len;
-+ split_part2_info->op.rdma_read.lo_sg_list[0].stag =
-+ op_info->lo_sg_list[i].stag;
-+ split_part2_info->op.rdma_read.rem_addr.len = split_size;
-+ split_part2_info->op.rdma_read.num_lo_sges =
-+ op_info->num_lo_sges -
-+ split_part1_info->op.rdma_read.num_lo_sges + 1;
-+ if (split_part2_info->op.rdma_read.num_lo_sges - 1 > 0) {
-+ memcpy(split_part2_info->op.rdma_read.lo_sg_list + 1,
-+ op_info->lo_sg_list + i + 1,
-+ (split_part2_info->op.rdma_read.num_lo_sges -
-+ 1) * sizeof(struct zxdh_sge));
-+ }
-+ break;
-+ }
++ get_64bit_val(cqe, 8, &comp_ctx);
++ if ((void *)(uintptr_t)comp_ctx == q)
++ set_64bit_val(cqe, 8, 0);
++
++ cq_head = (cq_head + 1) % cq->cq_ring.size;
++ if (!cq_head)
++ temp ^= 1;
++ } while (true);
+}
+
-+static enum zxdh_status_code zxdh_post_rdma_read(struct zxdh_qp_uk *qp,
-+ struct zxdh_post_sq_info *info,
-+ bool post_sq, __u32 total_size)
++/**
++ * zxdh_nop - post a nop
++ * @qp: hw qp ptr
++ * @wr_id: work request id
++ * @signaled: signaled for completion
++ * @post_sq: ring doorbell
++ */
++enum zxdh_status_code zxdh_nop(struct zxdh_qp *qp, __u64 wr_id, bool signaled,
++ bool post_sq)
+{
-+ enum zxdh_status_code ret_code;
-+ struct zxdh_rdma_read *op_info;
-+ __u32 i, byte_off = 0;
-+ bool local_fence = false;
-+ __u32 addl_frag_cnt;
+ __le64 *wqe;
-+ __u32 wqe_idx;
-+ __u16 quanta;
+ __u64 hdr;
++ __u32 wqe_idx;
++ struct zxdh_post_sq_info info = {};
+
-+ op_info = &info->op.rdma_read;
-+ ret_code = zxdh_fragcnt_to_quanta_sq(op_info->num_lo_sges, &quanta);
-+ if (ret_code)
-+ return ret_code;
-+
-+ wqe = zxdh_qp_get_next_send_wqe(qp, &wqe_idx, quanta, total_size, info);
-+ if (!wqe)
-+ return ZXDH_ERR_QP_TOOMANY_WRS_POSTED;
++ info.push_wqe = false;
++ info.wr_id = wr_id;
++ wqe = zxdh_qp_get_next_send_wqe(qp, &wqe_idx, ZXDH_QP_WQE_MIN_QUANTA, 0,
++ &info);
++ if (!wqe)
++ return ZXDH_ERR_QP_TOOMANY_WRS_POSTED;
+
+ zxdh_clr_wqes(qp, wqe_idx);
+
-+ addl_frag_cnt =
-+ op_info->num_lo_sges > 1 ? (op_info->num_lo_sges - 1) : 0;
-+ local_fence |= info->local_fence;
-+
-+ if (op_info->num_lo_sges) {
-+ set_64bit_val(
-+ wqe, 16,
-+ FIELD_PREP(ZXDHQPSQ_FIRST_FRAG_VALID,
-+ op_info->lo_sg_list->len ==
-+ ZXDH_MAX_SQ_PAYLOAD_SIZE ?
-+ 1 :
-+ 0) |
-+ FIELD_PREP(ZXDHQPSQ_FIRST_FRAG_LEN,
-+ op_info->lo_sg_list->len) |
-+ FIELD_PREP(ZXDHQPSQ_FIRST_FRAG_STAG,
-+ op_info->lo_sg_list->stag));
-+ set_64bit_val(wqe, 8,
-+ FIELD_PREP(ZXDHQPSQ_FRAG_TO,
-+ op_info->lo_sg_list->tag_off));
-+ } else {
-+ /*if zero sge,post a special sge with zero lenth*/
-+ set_64bit_val(wqe, 16,
-+ FIELD_PREP(ZXDHQPSQ_FIRST_FRAG_VALID, 0) |
-+ FIELD_PREP(ZXDHQPSQ_FIRST_FRAG_LEN, 0) |
-+ FIELD_PREP(ZXDHQPSQ_FIRST_FRAG_STAG,
-+ 0x100));
-+ set_64bit_val(wqe, 8, FIELD_PREP(ZXDHQPSQ_FRAG_TO, 0));
-+ }
-+
-+ i = 1;
-+ for (byte_off = ZXDH_SQ_WQE_BYTESIZE; i < op_info->num_lo_sges;
-+ i += 2) {
-+ if (i == addl_frag_cnt) {
-+ qp->wqe_ops.iw_set_fragment(wqe, byte_off,
-+ &op_info->lo_sg_list[i],
-+ qp->swqe_polarity);
-+ byte_off += ZXDH_QP_FRAG_BYTESIZE;
-+ break;
-+ }
-+ byte_off += ZXDH_QP_FRAG_BYTESIZE;
-+ qp->wqe_ops.iw_set_fragment(wqe, byte_off,
-+ &op_info->lo_sg_list[i + 1],
-+ qp->swqe_polarity);
-+ byte_off -= ZXDH_QP_FRAG_BYTESIZE;
-+ qp->wqe_ops.iw_set_fragment(wqe, byte_off,
-+ &op_info->lo_sg_list[i],
-+ qp->swqe_polarity);
-+ byte_off += 2 * ZXDH_QP_FRAG_BYTESIZE;
-+ }
-+
-+ /* if not an odd number set valid bit in next fragment */
-+ if (!(op_info->num_lo_sges & 0x01) && op_info->num_lo_sges) {
-+ qp->wqe_ops.iw_set_fragment(wqe, byte_off, NULL,
-+ qp->swqe_polarity);
-+ }
++ set_64bit_val(wqe, 0, 0);
++ set_64bit_val(wqe, 8, 0);
++ set_64bit_val(wqe, 16, 0);
+
-+ hdr = FIELD_PREP(ZXDHQPSQ_VALID, qp->swqe_polarity) |
-+ FIELD_PREP(ZXDHQPSQ_OPCODE, ZXDH_OP_TYPE_READ) |
-+ FIELD_PREP(ZXDHQPSQ_SIGCOMPL, info->signaled) |
-+ FIELD_PREP(ZXDHQPSQ_LOCALFENCE, info->local_fence) |
-+ FIELD_PREP(ZXDHQPSQ_READFENCE, info->read_fence) |
-+ FIELD_PREP(ZXDHQPSQ_SOLICITED, info->solicited) |
-+ FIELD_PREP(ZXDHQPSQ_ADDFRAGCNT, addl_frag_cnt) |
-+ FIELD_PREP(ZXDHQPSQ_REMSTAG, op_info->rem_addr.stag);
-+ set_64bit_val(wqe, 24,
-+ FIELD_PREP(ZXDHQPSQ_FRAG_TO, op_info->rem_addr.tag_off));
++ hdr = FIELD_PREP(ZXDHQPSQ_OPCODE, ZXDHQP_OP_NOP) |
++ FIELD_PREP(ZXDHQPSQ_SIGCOMPL, signaled) |
++ FIELD_PREP(ZXDHQPSQ_VALID, qp->swqe_polarity);
+
+ udma_to_device_barrier(); /* make sure WQE is populated before valid bit is set */
+
-+ set_64bit_val(wqe, 0, hdr);
++ set_64bit_val(wqe, 24, hdr);
+ if (post_sq)
-+ zxdh_uk_qp_post_wr(qp);
++ zxdh_qp_post_wr(qp);
++
+ return 0;
+}
+
+/**
-+ * zxdh_uk_rdma_read - rdma read command
-+ * @qp: hw qp ptr
-+ * @info: post sq information
-+ * @inv_stag: flag for inv_stag
-+ * @post_sq: flag to post sq
++ * zxdh_fragcnt_to_quanta_sq - calculate quanta based on fragment count for SQ
++ * @frag_cnt: number of fragments
++ * @quanta: quanta for frag_cnt
+ */
-+enum zxdh_status_code zxdh_uk_rdma_read(struct zxdh_qp_uk *qp,
-+ struct zxdh_post_sq_info *info,
-+ bool inv_stag, bool post_sq)
++enum zxdh_status_code zxdh_fragcnt_to_quanta_sq(__u32 frag_cnt, __u16 *quanta)
+{
-+ struct zxdh_post_sq_info split_part1_info = { 0 };
-+ struct zxdh_post_sq_info split_part2_info = { 0 };
-+ struct zxdh_rdma_read *op_info;
-+ enum zxdh_status_code ret_code;
-+ __u32 i, total_size = 0, pre_cal_psn = 0;
-+
-+ op_info = &info->op.rdma_read;
-+ if (qp->max_sq_frag_cnt < op_info->num_lo_sges)
++ if (frag_cnt > ZXDH_MAX_SQ_FRAG)
+ return ZXDH_ERR_INVALID_FRAG_COUNT;
++ *quanta = frag_cnt / 2 + 1;
++ return 0;
++}
+
-+ for (i = 0; i < op_info->num_lo_sges; i++) {
-+ total_size += op_info->lo_sg_list[i].len;
-+ if (0 != i && 0 == op_info->lo_sg_list[i].len)
-+ return ZXDH_ERR_INVALID_FRAG_LEN;
++/**
++ * zxdh_fragcnt_to_wqesize_rq - calculate wqe size based on fragment count for RQ
++ * @frag_cnt: number of fragments
++ * @wqe_size: size in bytes given frag_cnt
++ */
++enum zxdh_status_code zxdh_fragcnt_to_wqesize_rq(__u32 frag_cnt,
++ __u16 *wqe_size)
++{
++ switch (frag_cnt) {
++ case 0:
++ case 1:
++ *wqe_size = 32;
++ break;
++ case 2:
++ case 3:
++ *wqe_size = 64;
++ break;
++ case 4:
++ case 5:
++ case 6:
++ case 7:
++ *wqe_size = 128;
++ break;
++ case 8:
++ case 9:
++ case 10:
++ case 11:
++ case 12:
++ case 13:
++ case 14:
++ *wqe_size = 256;
++ break;
++ default:
++ return ZXDH_ERR_INVALID_FRAG_COUNT;
+ }
+
-+ if (total_size > ZXDH_MAX_SQ_PAYLOAD_SIZE)
-+ return ZXDH_ERR_QP_INVALID_MSG_SIZE;
-+ op_info->rem_addr.len = total_size;
-+ pre_cal_psn = qp->next_psn;
-+ qp_tx_psn_add(&pre_cal_psn, total_size, qp->mtu);
-+ if (read_wqe_need_split(pre_cal_psn, qp->next_psn)) {
-+ split_two_part_info(qp, info, qp->next_psn, pre_cal_psn,
-+ &split_part1_info, &split_part2_info);
-+ ret_code = zxdh_post_rdma_read(qp, &split_part1_info, post_sq,
-+ total_size);
-+ if (ret_code)
-+ return ret_code;
-+
-+ qp_tx_psn_add(&qp->next_psn,
-+ split_part1_info.op.rdma_read.rem_addr.len,
-+ qp->mtu);
-+ ret_code = zxdh_post_rdma_read(qp, &split_part2_info, post_sq,
-+ total_size);
-+ if (ret_code)
-+ return ret_code;
-+
-+ qp_tx_psn_add(&qp->next_psn,
-+ split_part2_info.op.rdma_read.rem_addr.len,
-+ qp->mtu);
-+ } else {
-+ ret_code = zxdh_post_rdma_read(qp, info, post_sq, total_size);
-+ if (ret_code)
-+ return ret_code;
-+
-+ qp_tx_psn_add(&qp->next_psn, total_size, qp->mtu);
-+ }
+ return 0;
+}
+
+/**
-+ * zxdh_uk_rc_send - rdma send command
-+ * @qp: hw qp ptr
-+ * @info: post sq information
-+ * @post_sq: flag to post sq
++ * zxdh_get_srq_wqe_shift - get shift count for maximum srq wqe size
++ * @dev_attrs: srq HW attributes
++ * @sge: Maximum Scatter Gather Elements wqe
++ * @shift: Returns the shift needed based on sge
++ *
++ * Shift can be used to left shift the srq wqe size based on number of SGEs.
++ * For 1 SGE, shift = 1 (wqe size of 2*16 bytes).
++ * For 2 or 3 SGEs, shift = 2 (wqe size of 4*16 bytes).
++ * For 4-7 SGE's Shift of 3.
++ * For 8-15 SGE's Shift of 4 otherwise (wqe size of 512 bytes).
+ */
-+enum zxdh_status_code zxdh_uk_rc_send(struct zxdh_qp_uk *qp,
-+ struct zxdh_post_sq_info *info,
-+ bool post_sq)
++void zxdh_get_srq_wqe_shift(struct zxdh_dev_attrs *dev_attrs, __u32 sge,
++ __u8 *shift)
+{
-+ __le64 *wqe;
-+ struct zxdh_post_send *op_info;
-+ __u64 hdr;
-+ __u32 i, wqe_idx, total_size = 0, byte_off;
-+ enum zxdh_status_code ret_code;
-+ __u32 frag_cnt, addl_frag_cnt;
-+ bool read_fence = false;
-+ __u16 quanta;
-+ bool imm_data_flag = info->imm_data_valid ? 1 : 0;
++ *shift = 0; //16bytes RQE, need to confirm configuration
++ if (sge < 2)
++ *shift = 1;
++ else if (sge < 4)
++ *shift = 2;
++ else if (sge < 8)
++ *shift = 3;
++ else if (sge < 16)
++ *shift = 4;
++ else
++ *shift = 5;
++}
+
-+ op_info = &info->op.send;
-+ if (qp->max_sq_frag_cnt < op_info->num_sges)
-+ return ZXDH_ERR_INVALID_FRAG_COUNT;
++/*
++ * zxdh_get_srqdepth - get SRQ depth (quanta)
++ * @max_hw_rq_quanta: HW SRQ size limit
++ * @srq_size: SRQ size
++ * @shift: shift which determines size of WQE
++ * @srqdepth: depth of SRQ
++ */
++int zxdh_get_srqdepth(__u32 max_hw_srq_quanta, __u32 srq_size, __u8 shift,
++ __u32 *srqdepth)
++{
++ *srqdepth = zxdh_qp_round_up((srq_size << shift) + ZXDH_SRQ_RSVD);
+
-+ for (i = 0; i < op_info->num_sges; i++) {
-+ total_size += op_info->sg_list[i].len;
-+ if (0 != i && 0 == op_info->sg_list[i].len)
-+ return ZXDH_ERR_INVALID_FRAG_LEN;
-+ }
++ if (*srqdepth < (ZXDH_QP_SW_MIN_WQSIZE << shift))
++ *srqdepth = ZXDH_QP_SW_MIN_WQSIZE << shift;
++ else if ((*srqdepth >> shift) > max_hw_srq_quanta)
++ return ZXDH_ERR_INVALID_SIZE;
+
-+ if (total_size > ZXDH_MAX_SQ_PAYLOAD_SIZE)
-+ return ZXDH_ERR_QP_INVALID_MSG_SIZE;
++ return 0;
++}
+
-+ if (imm_data_flag)
-+ frag_cnt = op_info->num_sges ? (op_info->num_sges + 1) : 2;
-+ else
-+ frag_cnt = op_info->num_sges;
-+ ret_code = zxdh_fragcnt_to_quanta_sq(frag_cnt, &quanta);
-+ if (ret_code)
-+ return ret_code;
++__le64 *zxdh_get_srq_wqe(struct zxdh_srq *srq, int wqe_index)
++{
++ __le64 *wqe;
+
-+ wqe = zxdh_qp_get_next_send_wqe(qp, &wqe_idx, quanta, total_size, info);
-+ if (!wqe)
-+ return ZXDH_ERR_QP_TOOMANY_WRS_POSTED;
++ wqe = srq->srq_base[wqe_index * srq->srq_wqe_size_multiplier].elem;
++ return wqe;
++}
+
-+ zxdh_clr_wqes(qp, wqe_idx);
++__le16 *zxdh_get_srq_list_wqe(struct zxdh_srq *srq, __u16 *idx)
++{
++ __le16 *wqe;
++ __u16 wqe_idx;
+
-+ read_fence |= info->read_fence;
-+ addl_frag_cnt = op_info->num_sges > 1 ? (op_info->num_sges - 1) : 0;
-+ if (op_info->num_sges) {
-+ set_64bit_val(
-+ wqe, 16,
-+ FIELD_PREP(ZXDHQPSQ_FIRST_FRAG_VALID,
-+ op_info->sg_list->len ==
-+ ZXDH_MAX_SQ_PAYLOAD_SIZE ?
-+ 1 :
-+ 0) |
-+ FIELD_PREP(ZXDHQPSQ_FIRST_FRAG_LEN,
-+ op_info->sg_list->len) |
-+ FIELD_PREP(ZXDHQPSQ_FIRST_FRAG_STAG,
-+ op_info->sg_list->stag));
-+ set_64bit_val(wqe, 8,
-+ FIELD_PREP(ZXDHQPSQ_FRAG_TO,
-+ op_info->sg_list->tag_off));
-+ } else {
-+ /*if zero sge,post a special sge with zero lenth*/
-+ set_64bit_val(wqe, 16,
-+ FIELD_PREP(ZXDHQPSQ_FIRST_FRAG_VALID, 0) |
-+ FIELD_PREP(ZXDHQPSQ_FIRST_FRAG_LEN, 0) |
-+ FIELD_PREP(ZXDHQPSQ_FIRST_FRAG_STAG,
-+ 0x100));
-+ set_64bit_val(wqe, 8, FIELD_PREP(ZXDHQPSQ_FRAG_TO, 0));
-+ }
-+
-+ if (imm_data_flag) {
-+ byte_off = ZXDH_SQ_WQE_BYTESIZE + ZXDH_QP_FRAG_BYTESIZE;
-+ if (op_info->num_sges > 1) {
-+ qp->wqe_ops.iw_set_fragment(wqe, byte_off,
-+ &op_info->sg_list[1],
-+ qp->swqe_polarity);
-+ byte_off += ZXDH_QP_FRAG_BYTESIZE;
-+ }
-+ set_64bit_val(
-+ wqe, ZXDH_SQ_WQE_BYTESIZE,
-+ FIELD_PREP(ZXDHQPSQ_IMMDATA_VALID, qp->swqe_polarity) |
-+ FIELD_PREP(ZXDHQPSQ_IMMDATA, info->imm_data));
-+ i = 2;
-+ if (i < op_info->num_sges) {
-+ for (byte_off = ZXDH_SQ_WQE_BYTESIZE +
-+ 2 * ZXDH_QP_FRAG_BYTESIZE;
-+ i < op_info->num_sges; i += 2) {
-+ if (i == addl_frag_cnt) {
-+ qp->wqe_ops.iw_set_fragment(
-+ wqe, byte_off,
-+ &op_info->sg_list[i],
-+ qp->swqe_polarity);
-+ byte_off += ZXDH_QP_FRAG_BYTESIZE;
-+ break;
-+ }
-+ byte_off += ZXDH_QP_FRAG_BYTESIZE;
-+ qp->wqe_ops.iw_set_fragment(
-+ wqe, byte_off, &op_info->sg_list[i + 1],
-+ qp->swqe_polarity);
-+ byte_off -= ZXDH_QP_FRAG_BYTESIZE;
-+ qp->wqe_ops.iw_set_fragment(
-+ wqe, byte_off, &op_info->sg_list[i],
-+ qp->swqe_polarity);
-+ byte_off += 2 * ZXDH_QP_FRAG_BYTESIZE;
-+ }
-+ }
-+ } else {
-+ i = 1;
-+ for (byte_off = ZXDH_SQ_WQE_BYTESIZE; i < op_info->num_sges;
-+ i += 2) {
-+ if (i == addl_frag_cnt) {
-+ qp->wqe_ops.iw_set_fragment(
-+ wqe, byte_off, &op_info->sg_list[i],
-+ qp->swqe_polarity);
-+ byte_off += ZXDH_QP_FRAG_BYTESIZE;
-+ break;
-+ }
-+ byte_off += ZXDH_QP_FRAG_BYTESIZE;
-+ qp->wqe_ops.iw_set_fragment(wqe, byte_off,
-+ &op_info->sg_list[i + 1],
-+ qp->swqe_polarity);
-+ byte_off -= ZXDH_QP_FRAG_BYTESIZE;
-+ qp->wqe_ops.iw_set_fragment(wqe, byte_off,
-+ &op_info->sg_list[i],
-+ qp->swqe_polarity);
-+ byte_off += 2 * ZXDH_QP_FRAG_BYTESIZE;
-+ }
-+ }
-+
-+ /* if not an odd number set valid bit in next fragment */
-+ if (!(frag_cnt & 0x01) && frag_cnt) {
-+ qp->wqe_ops.iw_set_fragment(wqe, byte_off, NULL,
-+ qp->swqe_polarity);
-+ }
-+
-+ hdr = FIELD_PREP(ZXDHQPSQ_VALID, qp->swqe_polarity) |
-+ FIELD_PREP(ZXDHQPSQ_OPCODE, info->op_type) |
-+ FIELD_PREP(ZXDHQPSQ_SIGCOMPL, info->signaled) |
-+ FIELD_PREP(ZXDHQPSQ_LOCALFENCE, info->local_fence) |
-+ FIELD_PREP(ZXDHQPSQ_READFENCE, read_fence) |
-+ FIELD_PREP(ZXDHQPSQ_SOLICITED, info->solicited) |
-+ FIELD_PREP(ZXDHQPSQ_IMMDATAFLAG, imm_data_flag) |
-+ FIELD_PREP(ZXDHQPSQ_ADDFRAGCNT, addl_frag_cnt) |
-+ FIELD_PREP(ZXDHQPSQ_REMSTAG, info->stag_to_inv);
-+ set_64bit_val(wqe, 24,
-+ FIELD_PREP(ZXDHQPSQ_INLINEDATAFLAG, 0) |
-+ FIELD_PREP(ZXDHQPSQ_INLINEDATALEN, 0));
++ wqe_idx = srq->srq_list_ring.tail;
++ srq->srq_list_ring.tail++;
++ srq->srq_list_ring.tail %= srq->srq_list_ring.size;
++ *idx = srq->srq_list_ring.tail;
+
-+ udma_to_device_barrier(); /* make sure WQE is populated before valid bit is set */
++ if (!(*idx))
++ srq->srq_list_polarity = !srq->srq_list_polarity;
+
-+ set_64bit_val(wqe, 0, hdr);
-+ if (post_sq)
-+ zxdh_uk_qp_post_wr(qp);
-+ qp_tx_psn_add(&qp->next_psn, total_size, qp->mtu);
++ wqe = &srq->srq_list_base[wqe_idx];
+
-+ return 0;
++ return wqe;
+}
+
+/**
-+ * zxdh_uk_ud_send - rdma send command
-+ * @qp: hw qp ptr
-+ * @info: post sq information
-+ * @post_sq: flag to post sq
++ * zxdh_srq_init - initialize srq
++ * @srq: hw srq (user and kernel)
++ * @info: srq initialization info
++ *
++ * initializes the vars used in both user and kernel mode.
++ * size of the wqe depends on numbers of max. fragements
++ * allowed. Then size of wqe * the number of wqes should be the
++ * amount of memory allocated for srq.
+ */
-+enum zxdh_status_code zxdh_uk_ud_send(struct zxdh_qp_uk *qp,
-+ struct zxdh_post_sq_info *info,
-+ bool post_sq)
++enum zxdh_status_code zxdh_srq_init(struct zxdh_srq *srq,
++ struct zxdh_srq_init_info *info)
+{
-+ __le64 *wqe_base;
-+ __le64 *wqe_ex = NULL;
-+ struct zxdh_post_send *op_info;
-+ __u64 hdr;
-+ __u32 i, wqe_idx, total_size = 0, byte_off;
-+ enum zxdh_status_code ret_code;
-+ __u32 frag_cnt, addl_frag_cnt;
-+ bool read_fence = false;
-+ __u16 quanta;
-+ bool imm_data_flag = info->imm_data_valid ? 1 : 0;
-+
-+ op_info = &info->op.send;
-+ if (qp->max_sq_frag_cnt < op_info->num_sges)
-+ return ZXDH_ERR_INVALID_FRAG_COUNT;
-+
-+ for (i = 0; i < op_info->num_sges; i++) {
-+ total_size += op_info->sg_list[i].len;
-+ if (0 != i && 0 == op_info->sg_list[i].len)
-+ return ZXDH_ERR_INVALID_FRAG_LEN;
-+ }
-+
-+ if (total_size > ZXDH_MAX_SQ_PAYLOAD_SIZE)
-+ return ZXDH_ERR_QP_INVALID_MSG_SIZE;
-+
-+ if (imm_data_flag)
-+ frag_cnt = op_info->num_sges ? (op_info->num_sges + 1) : 2;
-+ else
-+ frag_cnt = op_info->num_sges;
-+ ret_code = zxdh_fragcnt_to_quanta_sq(frag_cnt, &quanta);
-+ if (ret_code)
-+ return ret_code;
-+
-+ if (quanta > ZXDH_SQ_RING_FREE_QUANTA(qp->sq_ring))
-+ return ZXDH_ERR_QP_TOOMANY_WRS_POSTED;
-+
-+ wqe_idx = ZXDH_RING_CURRENT_HEAD(qp->sq_ring);
-+ if (!wqe_idx)
-+ qp->swqe_polarity = !qp->swqe_polarity;
++ __u32 srq_ring_size;
++ __u8 srqshift;
+
-+ ZXDH_RING_MOVE_HEAD_BY_COUNT_NOCHECK(qp->sq_ring, quanta);
++ srq->dev_attrs = info->dev_attrs;
++ if (info->max_srq_frag_cnt > srq->dev_attrs->max_hw_wq_frags)
++ return -ZXDH_ERR_INVALID_FRAG_COUNT;
++ zxdh_get_srq_wqe_shift(srq->dev_attrs, info->max_srq_frag_cnt,
++ &srqshift);
++ srq->srq_base = info->srq_base;
++ srq->srq_list_base = info->srq_list_base;
++ srq->srq_db_base = info->srq_db_base;
++ srq->srq_wrid_array = info->srq_wrid_array;
++ srq->srq_id = info->srq_id;
++ srq->srq_size = info->srq_size;
++ srq->log2_srq_size = info->log2_srq_size;
++ srq->srq_list_size = info->srq_list_size;
++ srq->max_srq_frag_cnt = info->max_srq_frag_cnt;
++ srq_ring_size = srq->srq_size;
++ srq->srq_wqe_size = srqshift;
++ srq->srq_wqe_size_multiplier = 1 << srqshift;
++ ZXDH_RING_INIT(srq->srq_ring, srq_ring_size);
++ ZXDH_RING_INIT(srq->srq_list_ring, srq->srq_list_size);
++ srq->srq_ring.tail = srq->srq_size - 1;
++ srq->srq_list_polarity = 1;
++ return 0;
++}
+
-+ wqe_base = qp->sq_base[wqe_idx].elem;
-+ qp->sq_wrtrk_array[wqe_idx].wrid = info->wr_id;
-+ qp->sq_wrtrk_array[wqe_idx].wr_len = total_size;
-+ qp->sq_wrtrk_array[wqe_idx].quanta = quanta;
++void zxdh_free_srq_wqe(struct zxdh_srq *srq, int wqe_index)
++{
++ struct zxdh_usrq *iwusrq;
++ __le64 *wqe;
++ __u64 hdr;
+
-+ zxdh_clr_wqes(qp, wqe_idx);
++ iwusrq = container_of(srq, struct zxdh_usrq, srq);
++ /* always called with interrupts disabled. */
++ pthread_spin_lock(&iwusrq->lock);
++ wqe = zxdh_get_srq_wqe(srq, srq->srq_ring.tail);
++ srq->srq_ring.tail = wqe_index;
++ hdr = FIELD_PREP(ZXDHQPSRQ_NEXT_WQE_INDEX, wqe_index);
+
-+ read_fence |= info->read_fence;
-+ addl_frag_cnt = op_info->num_sges > 1 ? (op_info->num_sges - 1) : 0;
-+ hdr = FIELD_PREP(ZXDHQPSQ_VALID, qp->swqe_polarity) |
-+ FIELD_PREP(ZXDHQPSQ_OPCODE, info->op_type) |
-+ FIELD_PREP(ZXDHQPSQ_SIGCOMPL, info->signaled) |
-+ FIELD_PREP(ZXDHQPSQ_SOLICITED, info->solicited) |
-+ FIELD_PREP(ZXDHQPSQ_IMMDATAFLAG, imm_data_flag) |
-+ FIELD_PREP(ZXDHQPSQ_UD_INLINEDATAFLAG, 0) |
-+ FIELD_PREP(ZXDHQPSQ_UD_INLINEDATALEN, 0) |
-+ FIELD_PREP(ZXDHQPSQ_UD_ADDFRAGCNT, addl_frag_cnt) |
-+ FIELD_PREP(ZXDHQPSQ_AHID, op_info->ah_id);
++ udma_to_device_barrier(); /* make sure WQE is populated before valid bit is set */
++ set_64bit_val(wqe, 0, hdr);
+
-+ if (op_info->num_sges) {
-+ set_64bit_val(
-+ wqe_base, 16,
-+ FIELD_PREP(ZXDHQPSQ_FIRST_FRAG_VALID,
-+ op_info->sg_list->len ==
-+ ZXDH_MAX_SQ_PAYLOAD_SIZE ?
-+ 1 :
-+ 0) |
-+ FIELD_PREP(ZXDHQPSQ_FIRST_FRAG_LEN,
-+ op_info->sg_list->len) |
-+ FIELD_PREP(ZXDHQPSQ_FIRST_FRAG_STAG,
-+ op_info->sg_list->stag));
-+ set_64bit_val(wqe_base, 8,
-+ FIELD_PREP(ZXDHQPSQ_FRAG_TO,
-+ op_info->sg_list->tag_off));
-+ } else {
-+ /*if zero sge,post a special sge with zero lenth*/
-+ set_64bit_val(wqe_base, 16,
-+ FIELD_PREP(ZXDHQPSQ_FIRST_FRAG_VALID, 0) |
-+ FIELD_PREP(ZXDHQPSQ_FIRST_FRAG_LEN, 0) |
-+ FIELD_PREP(ZXDHQPSQ_FIRST_FRAG_STAG,
-+ 0x100));
-+ set_64bit_val(wqe_base, 8, FIELD_PREP(ZXDHQPSQ_FRAG_TO, 0));
-+ }
++ pthread_spin_unlock(&iwusrq->lock);
++}
+diff --git a/providers/zrdma/zxdh_status.h b/providers/zrdma/zxdh_status.h
+new file mode 100644
+index 0000000..d9e9f04
+--- /dev/null
++++ b/providers/zrdma/zxdh_status.h
+@@ -0,0 +1,75 @@
++/* SPDX-License-Identifier: GPL-2.0 or Linux-OpenIB */
++/* Copyright (c) 2024 ZTE Corporation. All rights reserved. */
++#ifndef ZXDH_STATUS_H
++#define ZXDH_STATUS_H
+
-+ if (imm_data_flag) {
-+ wqe_idx = (wqe_idx + 1) % qp->sq_ring.size;
-+ if (!wqe_idx)
-+ qp->swqe_polarity = !qp->swqe_polarity;
-+ wqe_ex = qp->sq_base[wqe_idx].elem;
-+ if (op_info->num_sges > 1) {
-+ qp->wqe_ops.iw_set_fragment(wqe_ex,
-+ ZXDH_QP_FRAG_BYTESIZE,
-+ &op_info->sg_list[1],
-+ qp->swqe_polarity);
-+ }
-+ set_64bit_val(
-+ wqe_ex, 0,
-+ FIELD_PREP(ZXDHQPSQ_IMMDATA_VALID, qp->swqe_polarity) |
-+ FIELD_PREP(ZXDHQPSQ_IMMDATA, info->imm_data));
-+ i = 2;
-+ for (byte_off = ZXDH_QP_FRAG_BYTESIZE; i < op_info->num_sges;
-+ i += 2) {
-+ if (!(i & 0x1)) {
-+ wqe_idx = (wqe_idx + 1) % qp->sq_ring.size;
-+ if (!wqe_idx)
-+ qp->swqe_polarity = !qp->swqe_polarity;
-+ wqe_ex = qp->sq_base[wqe_idx].elem;
-+ }
-+ if (i == addl_frag_cnt) {
-+ qp->wqe_ops.iw_set_fragment(
-+ wqe_ex, 0, &op_info->sg_list[i],
-+ qp->swqe_polarity);
-+ break;
-+ }
-+ qp->wqe_ops.iw_set_fragment(
-+ wqe_ex, byte_off % ZXDH_SQ_WQE_BYTESIZE,
-+ &op_info->sg_list[i + 1], qp->swqe_polarity);
-+ byte_off -= ZXDH_QP_FRAG_BYTESIZE;
-+ qp->wqe_ops.iw_set_fragment(
-+ wqe_ex, byte_off % ZXDH_SQ_WQE_BYTESIZE,
-+ &op_info->sg_list[i], qp->swqe_polarity);
-+ byte_off += ZXDH_QP_FRAG_BYTESIZE;
-+ }
-+ } else {
-+ i = 1;
-+ for (byte_off = 0; i < op_info->num_sges; i += 2) {
-+ if (i & 0x1) {
-+ wqe_idx = (wqe_idx + 1) % qp->sq_ring.size;
-+ if (!wqe_idx)
-+ qp->swqe_polarity = !qp->swqe_polarity;
-+ wqe_ex = qp->sq_base[wqe_idx].elem;
-+ }
-+ if (i == addl_frag_cnt) {
-+ qp->wqe_ops.iw_set_fragment(
-+ wqe_ex, 0, &op_info->sg_list[i],
-+ qp->swqe_polarity);
-+ break;
-+ }
-+ byte_off += ZXDH_QP_FRAG_BYTESIZE;
-+ qp->wqe_ops.iw_set_fragment(
-+ wqe_ex, byte_off % ZXDH_SQ_WQE_BYTESIZE,
-+ &op_info->sg_list[i + 1], qp->swqe_polarity);
-+ byte_off -= ZXDH_QP_FRAG_BYTESIZE;
-+ qp->wqe_ops.iw_set_fragment(
-+ wqe_ex, byte_off % ZXDH_SQ_WQE_BYTESIZE,
-+ &op_info->sg_list[i], qp->swqe_polarity);
-+ }
-+ }
-+
-+ /* if not an odd number set valid bit in next fragment */
-+ if (!(frag_cnt & 0x01) && frag_cnt && wqe_ex) {
-+ qp->wqe_ops.iw_set_fragment(wqe_ex, ZXDH_QP_FRAG_BYTESIZE, NULL,
-+ qp->swqe_polarity);
-+ }
-+
-+ set_64bit_val(wqe_base, 24,
-+ FIELD_PREP(ZXDHQPSQ_DESTQPN, op_info->dest_qp) |
-+ FIELD_PREP(ZXDHQPSQ_DESTQKEY, op_info->qkey));
-+
-+ udma_to_device_barrier(); /* make sure WQE is populated before valid bit is set */
-+
-+ set_64bit_val(wqe_base, 0, hdr);
-+ if (post_sq)
-+ zxdh_uk_qp_post_wr(qp);
-+
-+ return 0;
-+}
-+
-+/**
-+ * zxdh_set_mw_bind_wqe - set mw bind in wqe
-+ * @wqe: wqe for setting mw bind
-+ * @op_info: info for setting wqe values
-+ */
-+static void zxdh_set_mw_bind_wqe(__le64 *wqe, struct zxdh_bind_window *op_info)
-+{
-+ __u32 value = 0;
-+ __u8 leaf_pbl_size = op_info->leaf_pbl_size;
-+
-+ set_64bit_val(wqe, 8, (uintptr_t)op_info->va);
-+
-+ if (leaf_pbl_size == 0) {
-+ value = (__u32)(op_info->mw_pa_pble_index >> 12);
-+ value = (value & 0x03FFFFFFFC0000) >> 18;
-+ set_64bit_val(
-+ wqe, 16,
-+ FIELD_PREP(ZXDHQPSQ_PARENTMRSTAG, op_info->mr_stag) |
-+ FIELD_PREP(ZXDHQPSQ_MW_PA_PBLE_TWO, value));
-+ } else if (leaf_pbl_size == 1) {
-+ value = (__u32)((op_info->mw_pa_pble_index & 0x0FFC0000) >> 18);
-+ set_64bit_val(
-+ wqe, 16,
-+ FIELD_PREP(ZXDHQPSQ_PARENTMRSTAG, op_info->mr_stag) |
-+ FIELD_PREP(ZXDHQPSQ_MW_PA_PBLE_TWO, value));
-+ } else {
-+ value = (__u32)((op_info->mw_pa_pble_index & 0x0FFC0000) >> 18);
-+ set_64bit_val(
-+ wqe, 16,
-+ FIELD_PREP(ZXDHQPSQ_PARENTMRSTAG, op_info->mr_stag) |
-+ FIELD_PREP(ZXDHQPSQ_MW_LEVLE2_FIRST_PBLE_INDEX,
-+ value) |
-+ FIELD_PREP(ZXDHQPSQ_MW_LEVLE2_ROOT_PBLE_INDEX,
-+ op_info->root_leaf_offset));
-+ }
-+
-+ if (leaf_pbl_size == 0) {
-+ value = (__u32)(op_info->mw_pa_pble_index >> 12);
-+ value = value & 0x3FFFF;
-+ } else {
-+ value = (__u32)(op_info->mw_pa_pble_index & 0x3FFFF);
-+ }
-+
-+ set_64bit_val(wqe, 24,
-+ op_info->bind_len |
-+ FIELD_PREP(ZXDHQPSQ_MW_PA_PBLE_ONE, value));
-+}
-+
-+/**
-+ * zxdh_copy_inline_data - Copy inline data to wqe
-+ * @dest: pointer to wqe
-+ * @src: pointer to inline data
-+ * @len: length of inline data to copy
-+ * @polarity: polarity of wqe valid bit
-+ */
-+static void zxdh_copy_inline_data(__u8 *dest, __u8 *src, __u32 len,
-+ __u8 polarity, bool imm_data_flag)
-+{
-+ __u8 inline_valid = polarity << ZXDH_INLINE_VALID_S;
-+ __u32 copy_size;
-+ __u8 *inline_valid_addr;
-+
-+ dest += ZXDH_WQE_SIZE_32; /* point to additional 32 byte quanta */
-+ if (len) {
-+ inline_valid_addr = dest + WQE_OFFSET_7BYTES;
-+ if (imm_data_flag) {
-+ copy_size = len < INLINE_DATASIZE_24BYTES ?
-+ len :
-+ INLINE_DATASIZE_24BYTES;
-+ dest += WQE_OFFSET_8BYTES;
-+ memcpy(dest, src, copy_size);
-+ len -= copy_size;
-+ dest += WQE_OFFSET_24BYTES;
-+ src += copy_size;
-+ } else {
-+ if (len <= INLINE_DATASIZE_7BYTES) {
-+ copy_size = len;
-+ memcpy(dest, src, copy_size);
-+ *inline_valid_addr = inline_valid;
-+ return;
-+ }
-+ memcpy(dest, src, INLINE_DATASIZE_7BYTES);
-+ len -= INLINE_DATASIZE_7BYTES;
-+ dest += WQE_OFFSET_8BYTES;
-+ src += INLINE_DATA_OFFSET_7BYTES;
-+ copy_size = len < INLINE_DATASIZE_24BYTES ?
-+ len :
-+ INLINE_DATASIZE_24BYTES;
-+ memcpy(dest, src, copy_size);
-+ len -= copy_size;
-+ dest += WQE_OFFSET_24BYTES;
-+ src += copy_size;
-+ }
-+ *inline_valid_addr = inline_valid;
-+ }
-+
-+ while (len) {
-+ inline_valid_addr = dest + WQE_OFFSET_7BYTES;
-+ if (len <= INLINE_DATASIZE_7BYTES) {
-+ copy_size = len;
-+ memcpy(dest, src, copy_size);
-+ *inline_valid_addr = inline_valid;
-+ return;
-+ }
-+ memcpy(dest, src, INLINE_DATASIZE_7BYTES);
-+ len -= INLINE_DATASIZE_7BYTES;
-+ dest += WQE_OFFSET_8BYTES;
-+ src += INLINE_DATA_OFFSET_7BYTES;
-+ copy_size = len < INLINE_DATASIZE_24BYTES ?
-+ len :
-+ INLINE_DATASIZE_24BYTES;
-+ memcpy(dest, src, copy_size);
-+ len -= copy_size;
-+ dest += WQE_OFFSET_24BYTES;
-+ src += copy_size;
-+
-+ *inline_valid_addr = inline_valid;
-+ }
-+}
-+
-+/**
-+ * zxdh_inline_data_size_to_quanta - based on inline data, quanta
-+ * @data_size: data size for inline
-+ * @imm_data_flag: flag for immediate data
-+ *
-+ * Gets the quanta based on inline and immediate data.
-+ */
-+static __u16 zxdh_inline_data_size_to_quanta(__u32 data_size,
-+ bool imm_data_flag)
-+{
-+ if (imm_data_flag)
-+ data_size += INLINE_DATASIZE_7BYTES;
-+
-+ return data_size % 31 ? data_size / 31 + 2 : data_size / 31 + 1;
-+}
-+
-+/**
-+ * zxdh_uk_inline_rdma_write - inline rdma write operation
-+ * @qp: hw qp ptr
-+ * @info: post sq information
-+ * @post_sq: flag to post sq
-+ */
-+enum zxdh_status_code zxdh_uk_inline_rdma_write(struct zxdh_qp_uk *qp,
-+ struct zxdh_post_sq_info *info,
-+ bool post_sq)
-+{
-+ __le64 *wqe;
-+ __u8 imm_valid;
-+ struct zxdh_inline_rdma_write *op_info;
-+ __u64 hdr = 0;
-+ __u32 wqe_idx;
-+ bool read_fence = false;
-+ __u16 quanta;
-+ bool imm_data_flag = info->imm_data_valid ? 1 : 0;
-+
-+ op_info = &info->op.inline_rdma_write;
-+
-+ if (op_info->len > qp->max_inline_data)
-+ return ZXDH_ERR_INVALID_INLINE_DATA_SIZE;
-+ if (imm_data_flag && op_info->len > ZXDH_MAX_SQ_INLINE_DATELEN_WITH_IMM)
-+ return ZXDH_ERR_INVALID_INLINE_DATA_SIZE;
-+
-+ quanta = qp->wqe_ops.iw_inline_data_size_to_quanta(op_info->len,
-+ imm_data_flag);
-+ wqe = zxdh_qp_get_next_send_wqe(qp, &wqe_idx, quanta, op_info->len,
-+ info);
-+ if (!wqe)
-+ return ZXDH_ERR_QP_TOOMANY_WRS_POSTED;
-+
-+ zxdh_clr_wqes(qp, wqe_idx);
-+
-+ read_fence |= info->read_fence;
-+ hdr = FIELD_PREP(ZXDHQPSQ_VALID, qp->swqe_polarity) |
-+ FIELD_PREP(ZXDHQPSQ_OPCODE, info->op_type) |
-+ FIELD_PREP(ZXDHQPSQ_SIGCOMPL, info->signaled) |
-+ FIELD_PREP(ZXDHQPSQ_LOCALFENCE, info->local_fence) |
-+ FIELD_PREP(ZXDHQPSQ_READFENCE, read_fence) |
-+ FIELD_PREP(ZXDHQPSQ_SOLICITED, info->solicited) |
-+ FIELD_PREP(ZXDHQPSQ_IMMDATAFLAG, imm_data_flag) |
-+ FIELD_PREP(ZXDHQPSQ_WRITE_INLINEDATAFLAG, 1) |
-+ FIELD_PREP(ZXDHQPSQ_WRITE_INLINEDATALEN, op_info->len) |
-+ FIELD_PREP(ZXDHQPSQ_ADDFRAGCNT, quanta - 1) |
-+ FIELD_PREP(ZXDHQPSQ_REMSTAG, op_info->rem_addr.stag);
-+ set_64bit_val(wqe, 24,
-+ FIELD_PREP(ZXDHQPSQ_FRAG_TO, op_info->rem_addr.tag_off));
-+
-+ if (imm_data_flag) {
-+ /* if inline exist, not update imm valid */
-+ imm_valid = (op_info->len == 0) ? qp->swqe_polarity :
-+ (!qp->swqe_polarity);
-+
-+ set_64bit_val(wqe, 32,
-+ FIELD_PREP(ZXDHQPSQ_IMMDATA_VALID, imm_valid) |
-+ FIELD_PREP(ZXDHQPSQ_IMMDATA,
-+ info->imm_data));
-+ }
-+ qp->wqe_ops.iw_copy_inline_data((__u8 *)wqe, op_info->data,
-+ op_info->len, qp->swqe_polarity,
-+ imm_data_flag);
-+
-+ udma_to_device_barrier(); /* make sure WQE is populated before valid bit is set */
-+
-+ set_64bit_val(wqe, 0, hdr);
-+
-+ if (post_sq)
-+ zxdh_uk_qp_post_wr(qp);
-+ qp_tx_psn_add(&qp->next_psn, op_info->len, qp->mtu);
-+ return 0;
-+}
-+
-+/**
-+ * zxdh_uk_rc_inline_send - inline send operation
-+ * @qp: hw qp ptr
-+ * @info: post sq information
-+ * @post_sq: flag to post sq
-+ */
-+enum zxdh_status_code zxdh_uk_rc_inline_send(struct zxdh_qp_uk *qp,
-+ struct zxdh_post_sq_info *info,
-+ bool post_sq)
-+{
-+ __le64 *wqe;
-+ __u8 imm_valid;
-+ struct zxdh_inline_rdma_send *op_info;
-+ __u64 hdr;
-+ __u32 wqe_idx;
-+ bool read_fence = false;
-+ __u16 quanta;
-+ bool imm_data_flag = info->imm_data_valid ? 1 : 0;
-+
-+ op_info = &info->op.inline_rdma_send;
-+
-+ if (op_info->len > qp->max_inline_data)
-+ return ZXDH_ERR_INVALID_INLINE_DATA_SIZE;
-+ if (imm_data_flag && op_info->len > ZXDH_MAX_SQ_INLINE_DATELEN_WITH_IMM)
-+ return ZXDH_ERR_INVALID_INLINE_DATA_SIZE;
-+
-+ quanta = qp->wqe_ops.iw_inline_data_size_to_quanta(op_info->len,
-+ imm_data_flag);
-+ wqe = zxdh_qp_get_next_send_wqe(qp, &wqe_idx, quanta, op_info->len,
-+ info);
-+ if (!wqe)
-+ return ZXDH_ERR_QP_TOOMANY_WRS_POSTED;
-+
-+ zxdh_clr_wqes(qp, wqe_idx);
-+
-+ read_fence |= info->read_fence;
-+ hdr = FIELD_PREP(ZXDHQPSQ_VALID, qp->swqe_polarity) |
-+ FIELD_PREP(ZXDHQPSQ_OPCODE, info->op_type) |
-+ FIELD_PREP(ZXDHQPSQ_SIGCOMPL, info->signaled) |
-+ FIELD_PREP(ZXDHQPSQ_LOCALFENCE, info->local_fence) |
-+ FIELD_PREP(ZXDHQPSQ_READFENCE, read_fence) |
-+ FIELD_PREP(ZXDHQPSQ_SOLICITED, info->solicited) |
-+ FIELD_PREP(ZXDHQPSQ_ADDFRAGCNT, quanta - 1) |
-+ FIELD_PREP(ZXDHQPSQ_IMMDATAFLAG, imm_data_flag) |
-+ FIELD_PREP(ZXDHQPSQ_REMSTAG, info->stag_to_inv);
-+ set_64bit_val(wqe, 24,
-+ FIELD_PREP(ZXDHQPSQ_INLINEDATAFLAG, 1) |
-+ FIELD_PREP(ZXDHQPSQ_INLINEDATALEN, op_info->len));
-+
-+ if (imm_data_flag) {
-+ /* if inline exist, not update imm valid */
-+ imm_valid = (op_info->len == 0) ? qp->swqe_polarity :
-+ (!qp->swqe_polarity);
-+ set_64bit_val(wqe, 32,
-+ FIELD_PREP(ZXDHQPSQ_IMMDATA_VALID, imm_valid) |
-+ FIELD_PREP(ZXDHQPSQ_IMMDATA,
-+ info->imm_data));
-+ }
-+
-+ qp->wqe_ops.iw_copy_inline_data((__u8 *)wqe, op_info->data,
-+ op_info->len, qp->swqe_polarity,
-+ imm_data_flag);
-+
-+ udma_to_device_barrier(); /* make sure WQE is populated before valid bit is set */
-+
-+ set_64bit_val(wqe, 0, hdr);
-+
-+ if (post_sq)
-+ zxdh_uk_qp_post_wr(qp);
-+
-+ qp_tx_psn_add(&qp->next_psn, op_info->len, qp->mtu);
-+ return 0;
-+}
-+
-+/**
-+ * zxdh_uk_ud_inline_send - inline send operation
-+ * @qp: hw qp ptr
-+ * @info: post sq information
-+ * @post_sq: flag to post sq
-+ */
-+enum zxdh_status_code zxdh_uk_ud_inline_send(struct zxdh_qp_uk *qp,
-+ struct zxdh_post_sq_info *info,
-+ bool post_sq)
-+{
-+ __le64 *wqe_base;
-+ __le64 *wqe_ex;
-+ struct zxdh_inline_rdma_send *op_info;
-+ __u64 hdr;
-+ __u32 wqe_idx;
-+ bool read_fence = false;
-+ __u16 quanta;
-+ bool imm_data_flag = info->imm_data_valid ? 1 : 0;
-+ __u8 *inline_dest;
-+ __u8 *inline_src;
-+ __u32 inline_len;
-+ __u32 copy_size;
-+ __u8 *inline_valid_addr;
-+
-+ op_info = &info->op.inline_rdma_send;
-+ inline_len = op_info->len;
-+
-+ if (op_info->len > qp->max_inline_data)
-+ return ZXDH_ERR_INVALID_INLINE_DATA_SIZE;
-+ if (imm_data_flag && op_info->len > ZXDH_MAX_SQ_INLINE_DATELEN_WITH_IMM)
-+ return ZXDH_ERR_INVALID_INLINE_DATA_SIZE;
-+
-+ quanta = qp->wqe_ops.iw_inline_data_size_to_quanta(op_info->len,
-+ imm_data_flag);
-+ if (quanta > ZXDH_SQ_RING_FREE_QUANTA(qp->sq_ring))
-+ return ZXDH_ERR_QP_TOOMANY_WRS_POSTED;
-+
-+ wqe_idx = ZXDH_RING_CURRENT_HEAD(qp->sq_ring);
-+ if (!wqe_idx)
-+ qp->swqe_polarity = !qp->swqe_polarity;
-+
-+ ZXDH_RING_MOVE_HEAD_BY_COUNT_NOCHECK(qp->sq_ring, quanta);
-+
-+ wqe_base = qp->sq_base[wqe_idx].elem;
-+ qp->sq_wrtrk_array[wqe_idx].wrid = info->wr_id;
-+ qp->sq_wrtrk_array[wqe_idx].wr_len = op_info->len;
-+ qp->sq_wrtrk_array[wqe_idx].quanta = quanta;
-+
-+ zxdh_clr_wqes(qp, wqe_idx);
-+
-+ read_fence |= info->read_fence;
-+ hdr = FIELD_PREP(ZXDHQPSQ_VALID, qp->swqe_polarity) |
-+ FIELD_PREP(ZXDHQPSQ_OPCODE, info->op_type) |
-+ FIELD_PREP(ZXDHQPSQ_SIGCOMPL, info->signaled) |
-+ FIELD_PREP(ZXDHQPSQ_SOLICITED, info->solicited) |
-+ FIELD_PREP(ZXDHQPSQ_IMMDATAFLAG, imm_data_flag) |
-+ FIELD_PREP(ZXDHQPSQ_UD_INLINEDATAFLAG, 1) |
-+ FIELD_PREP(ZXDHQPSQ_UD_INLINEDATALEN, op_info->len) |
-+ FIELD_PREP(ZXDHQPSQ_UD_ADDFRAGCNT, quanta - 1) |
-+ FIELD_PREP(ZXDHQPSQ_AHID, op_info->ah_id);
-+ set_64bit_val(wqe_base, 24,
-+ FIELD_PREP(ZXDHQPSQ_DESTQPN, op_info->dest_qp) |
-+ FIELD_PREP(ZXDHQPSQ_DESTQKEY, op_info->qkey));
-+
-+ if (imm_data_flag) {
-+ wqe_idx = (wqe_idx + 1) % qp->sq_ring.size;
-+ if (!wqe_idx)
-+ qp->swqe_polarity = !qp->swqe_polarity;
-+ wqe_ex = qp->sq_base[wqe_idx].elem;
-+
-+ if (inline_len) {
-+ /* imm and inline use the same valid, valid set after inline data updated*/
-+ copy_size = inline_len < INLINE_DATASIZE_24BYTES ?
-+ inline_len :
-+ INLINE_DATASIZE_24BYTES;
-+ inline_dest = (__u8 *)wqe_ex + WQE_OFFSET_8BYTES;
-+ inline_src = (__u8 *)op_info->data;
-+ memcpy(inline_dest, inline_src, copy_size);
-+ inline_len -= copy_size;
-+ inline_src += copy_size;
-+ }
-+ set_64bit_val(
-+ wqe_ex, 0,
-+ FIELD_PREP(ZXDHQPSQ_IMMDATA_VALID, qp->swqe_polarity) |
-+ FIELD_PREP(ZXDHQPSQ_IMMDATA, info->imm_data));
-+
-+ } else if (inline_len) {
-+ wqe_idx = (wqe_idx + 1) % qp->sq_ring.size;
-+ if (!wqe_idx)
-+ qp->swqe_polarity = !qp->swqe_polarity;
-+ wqe_ex = qp->sq_base[wqe_idx].elem;
-+ inline_dest = (__u8 *)wqe_ex;
-+ inline_src = (__u8 *)op_info->data;
-+
-+ if (inline_len <= INLINE_DATASIZE_7BYTES) {
-+ copy_size = inline_len;
-+ memcpy(inline_dest, inline_src, copy_size);
-+ inline_len = 0;
-+ } else {
-+ copy_size = INLINE_DATASIZE_7BYTES;
-+ memcpy(inline_dest, inline_src, copy_size);
-+ inline_len -= copy_size;
-+ inline_src += copy_size;
-+ inline_dest += WQE_OFFSET_8BYTES;
-+ copy_size = inline_len < INLINE_DATASIZE_24BYTES ?
-+ inline_len :
-+ INLINE_DATASIZE_24BYTES;
-+ memcpy(inline_dest, inline_src, copy_size);
-+ inline_len -= copy_size;
-+ inline_src += copy_size;
-+ }
-+ inline_valid_addr = (__u8 *)wqe_ex + WQE_OFFSET_7BYTES;
-+ *inline_valid_addr = qp->swqe_polarity << ZXDH_INLINE_VALID_S;
-+ }
-+
-+ while (inline_len) {
-+ wqe_idx = (wqe_idx + 1) % qp->sq_ring.size;
-+ if (!wqe_idx)
-+ qp->swqe_polarity = !qp->swqe_polarity;
-+ wqe_ex = qp->sq_base[wqe_idx].elem;
-+ inline_dest = (__u8 *)wqe_ex;
-+
-+ if (inline_len <= INLINE_DATASIZE_7BYTES) {
-+ copy_size = inline_len;
-+ memcpy(inline_dest, inline_src, copy_size);
-+ inline_len = 0;
-+ } else {
-+ copy_size = INLINE_DATASIZE_7BYTES;
-+ memcpy(inline_dest, inline_src, copy_size);
-+ inline_len -= copy_size;
-+ inline_src += copy_size;
-+ inline_dest += WQE_OFFSET_8BYTES;
-+ copy_size = inline_len < INLINE_DATASIZE_24BYTES ?
-+ inline_len :
-+ INLINE_DATASIZE_24BYTES;
-+ memcpy(inline_dest, inline_src, copy_size);
-+ inline_len -= copy_size;
-+ inline_src += copy_size;
-+ }
-+ inline_valid_addr = (__u8 *)wqe_ex + WQE_OFFSET_7BYTES;
-+ *inline_valid_addr = qp->swqe_polarity << ZXDH_INLINE_VALID_S;
-+ }
-+
-+ udma_to_device_barrier(); /* make sure WQE is populated before valid bit is set */
-+
-+ set_64bit_val(wqe_base, 0, hdr);
-+
-+ if (post_sq)
-+ zxdh_uk_qp_post_wr(qp);
-+
-+ return 0;
-+}
-+
-+/**
-+ * zxdh_uk_stag_local_invalidate - stag invalidate operation
-+ * @qp: hw qp ptr
-+ * @info: post sq information
-+ * @post_sq: flag to post sq
-+ */
-+enum zxdh_status_code
-+zxdh_uk_stag_local_invalidate(struct zxdh_qp_uk *qp,
-+ struct zxdh_post_sq_info *info, bool post_sq)
-+{
-+ __le64 *wqe;
-+ struct zxdh_inv_local_stag *op_info;
-+ __u64 hdr;
-+ __u32 wqe_idx;
-+ bool local_fence = true;
-+
-+ op_info = &info->op.inv_local_stag;
-+
-+ wqe = zxdh_qp_get_next_send_wqe(qp, &wqe_idx, ZXDH_QP_WQE_MIN_QUANTA, 0,
-+ info);
-+ if (!wqe)
-+ return ZXDH_ERR_QP_TOOMANY_WRS_POSTED;
-+
-+ zxdh_clr_wqes(qp, wqe_idx);
-+
-+ set_64bit_val(wqe, 16, 0);
-+
-+ hdr = FIELD_PREP(ZXDHQPSQ_VALID, qp->swqe_polarity) |
-+ FIELD_PREP(ZXDHQPSQ_OPCODE, ZXDH_OP_TYPE_LOCAL_INV) |
-+ FIELD_PREP(ZXDHQPSQ_SIGCOMPL, info->signaled) |
-+ FIELD_PREP(ZXDHQPSQ_LOCALFENCE, local_fence) |
-+ FIELD_PREP(ZXDHQPSQ_READFENCE, info->read_fence) |
-+ FIELD_PREP(ZXDHQPSQ_REMSTAG, op_info->target_stag);
-+
-+ udma_to_device_barrier(); /* make sure WQE is populated before valid bit is set */
-+
-+ set_64bit_val(wqe, 0, hdr);
-+
-+ if (post_sq)
-+ zxdh_uk_qp_post_wr(qp);
-+
-+ return 0;
-+}
-+
-+/**
-+ * zxdh_uk_mw_bind - bind Memory Window
-+ * @qp: hw qp ptr
-+ * @info: post sq information
-+ * @post_sq: flag to post sq
-+ */
-+enum zxdh_status_code zxdh_uk_mw_bind(struct zxdh_qp_uk *qp,
-+ struct zxdh_post_sq_info *info,
-+ bool post_sq)
-+{
-+ __le64 *wqe;
-+ struct zxdh_bind_window *op_info;
-+ __u64 hdr;
-+ __u32 wqe_idx;
-+ bool local_fence = true;
-+ __u8 access = 1;
-+ __u16 value = 0;
-+
-+ op_info = &info->op.bind_window;
-+ local_fence |= info->local_fence;
-+
-+ wqe = zxdh_qp_get_next_send_wqe(qp, &wqe_idx, ZXDH_QP_WQE_MIN_QUANTA, 0,
-+ info);
-+ if (!wqe)
-+ return ZXDH_ERR_QP_TOOMANY_WRS_POSTED;
-+
-+ zxdh_clr_wqes(qp, wqe_idx);
-+
-+ if (op_info->ena_writes) {
-+ access = (op_info->ena_reads << 2) |
-+ (op_info->ena_writes << 3) | (1 << 1) | access;
-+ } else {
-+ access = (op_info->ena_reads << 2) |
-+ (op_info->ena_writes << 3) | access;
-+ }
-+
-+ qp->wqe_ops.iw_set_mw_bind_wqe(wqe, op_info);
-+
-+ value = (__u16)((op_info->mw_pa_pble_index >> 12) & 0xC000000000000);
-+
-+ hdr = FIELD_PREP(ZXDHQPSQ_OPCODE, ZXDH_OP_TYPE_BIND_MW) |
-+ FIELD_PREP(ZXDHQPSQ_MWSTAG, op_info->mw_stag) |
-+ FIELD_PREP(ZXDHQPSQ_STAGRIGHTS, access) |
-+ FIELD_PREP(ZXDHQPSQ_VABASEDTO,
-+ (op_info->addressing_type == ZXDH_ADDR_TYPE_VA_BASED ?
-+ 1 :
-+ 0)) |
-+ FIELD_PREP(ZXDHQPSQ_MEMWINDOWTYPE,
-+ (op_info->mem_window_type_1 ? 1 : 0)) |
-+ FIELD_PREP(ZXDHQPSQ_READFENCE, info->read_fence) |
-+ FIELD_PREP(ZXDHQPSQ_LOCALFENCE, local_fence) |
-+ FIELD_PREP(ZXDHQPSQ_SIGCOMPL, info->signaled) |
-+ FIELD_PREP(ZXDHQPSQ_MW_HOST_PAGE_SIZE, op_info->host_page_size) |
-+ FIELD_PREP(ZXDHQPSQ_MW_LEAF_PBL_SIZE, op_info->leaf_pbl_size) |
-+ FIELD_PREP(ZXDHQPSQ_MW_PA_PBLE_THREE, value) |
-+ FIELD_PREP(ZXDHQPSQ_VALID, qp->swqe_polarity);
-+
-+ udma_to_device_barrier(); /* make sure WQE is populated before valid bit is set */
-+
-+ set_64bit_val(wqe, 0, hdr);
-+
-+ if (post_sq)
-+ zxdh_uk_qp_post_wr(qp);
-+
-+ return 0;
-+}
-+
-+static void zxdh_sleep_ns(unsigned int nanoseconds)
-+{
-+ struct timespec req;
-+
-+ req.tv_sec = 0;
-+ req.tv_nsec = nanoseconds;
-+ nanosleep(&req, NULL);
-+}
-+
-+/**
-+ * zxdh_uk_post_receive - post receive wqe
-+ * @qp: hw qp ptr
-+ * @info: post rq information
-+ */
-+enum zxdh_status_code zxdh_uk_post_receive(struct zxdh_qp_uk *qp,
-+ struct zxdh_post_rq_info *info)
-+{
-+ __u32 wqe_idx, i, byte_off;
-+ __le64 *wqe;
-+ struct zxdh_sge *sge;
-+
-+ if (qp->max_rq_frag_cnt < info->num_sges)
-+ return ZXDH_ERR_INVALID_FRAG_COUNT;
-+
-+ wqe = zxdh_qp_get_next_recv_wqe(qp, &wqe_idx);
-+ if (unlikely(!wqe))
-+ return ZXDH_ERR_QP_TOOMANY_WRS_POSTED;
-+
-+ qp->rq_wrid_array[wqe_idx] = info->wr_id;
-+
-+ for (i = 0, byte_off = ZXDH_QP_FRAG_BYTESIZE; i < info->num_sges; i++) {
-+ sge = &info->sg_list[i];
-+ set_64bit_val(wqe, byte_off, sge->tag_off);
-+ set_64bit_val(wqe, byte_off + 8,
-+ FIELD_PREP(ZXDHQPRQ_FRAG_LEN, sge->len) |
-+ FIELD_PREP(ZXDHQPRQ_STAG, sge->stag));
-+ byte_off += ZXDH_QP_FRAG_BYTESIZE;
-+ }
-+
-+ /**
-+ * while info->num_sges < qp->max_rq_frag_cnt, or 0 == info->num_sges,
-+ * fill next fragment with FRAG_LEN=0, FRAG_STAG=0x00000100,
-+ * witch indicates a invalid fragment
-+ */
-+ if (info->num_sges < qp->max_rq_frag_cnt || 0 == info->num_sges) {
-+ set_64bit_val(wqe, byte_off, 0);
-+ set_64bit_val(wqe, byte_off + 8,
-+ FIELD_PREP(ZXDHQPRQ_FRAG_LEN, 0) |
-+ FIELD_PREP(ZXDHQPRQ_STAG, 0x00000100));
-+ }
-+
-+ set_64bit_val(wqe, 0,
-+ FIELD_PREP(ZXDHQPRQ_ADDFRAGCNT, info->num_sges) |
-+ FIELD_PREP(ZXDHQPRQ_SIGNATURE,
-+ qp->rwqe_signature));
-+
-+ udma_to_device_barrier(); /* make sure WQE is populated before valid bit is set */
-+ if (info->num_sges > 3)
-+ zxdh_sleep_ns(1000);
-+
-+ set_64bit_val(wqe, 8, FIELD_PREP(ZXDHQPRQ_VALID, qp->rwqe_polarity));
-+
-+ return 0;
-+}
-+
-+/**
-+ * zxdh_uk_cq_resize - reset the cq buffer info
-+ * @cq: cq to resize
-+ * @cq_base: new cq buffer addr
-+ * @cq_size: number of cqes
-+ */
-+void zxdh_uk_cq_resize(struct zxdh_cq_uk *cq, void *cq_base, int cq_size)
-+{
-+ cq->cq_base = cq_base;
-+ cq->cq_size = cq_size;
-+ ZXDH_RING_INIT(cq->cq_ring, cq->cq_size);
-+ cq->polarity = 1;
-+}
-+
-+/**
-+ * zxdh_uk_cq_set_resized_cnt - record the count of the resized buffers
-+ * @cq: cq to resize
-+ * @cq_cnt: the count of the resized cq buffers
-+ */
-+void zxdh_uk_cq_set_resized_cnt(struct zxdh_cq_uk *cq, __u16 cq_cnt)
-+{
-+ __u64 temp_val;
-+ __u16 sw_cq_sel;
-+ __u8 arm_next;
-+ __u8 arm_seq_num;
-+
-+ get_64bit_val(cq->shadow_area, 0, &temp_val);
-+
-+ sw_cq_sel = (__u16)FIELD_GET(ZXDH_CQ_DBSA_SW_CQ_SELECT, temp_val);
-+ sw_cq_sel += cq_cnt;
-+
-+ arm_seq_num = (__u8)FIELD_GET(ZXDH_CQ_DBSA_ARM_SEQ_NUM, temp_val);
-+ arm_next = (__u8)FIELD_GET(ZXDH_CQ_DBSA_ARM_NEXT, temp_val);
-+ cq->cqe_rd_cnt = 0;
-+
-+ temp_val = FIELD_PREP(ZXDH_CQ_DBSA_ARM_SEQ_NUM, arm_seq_num) |
-+ FIELD_PREP(ZXDH_CQ_DBSA_SW_CQ_SELECT, sw_cq_sel) |
-+ FIELD_PREP(ZXDH_CQ_DBSA_ARM_NEXT, arm_next) |
-+ FIELD_PREP(ZXDH_CQ_DBSA_CQEIDX, cq->cqe_rd_cnt);
-+
-+ set_64bit_val(cq->shadow_area, 0, temp_val);
-+}
-+
-+/**
-+ * zxdh_uk_cq_request_notification - cq notification request (door bell)
-+ * @cq: hw cq
-+ * @cq_notify: notification type
-+ */
-+void zxdh_uk_cq_request_notification(struct zxdh_cq_uk *cq,
-+ enum zxdh_cmpl_notify cq_notify)
-+{
-+ __u64 temp_val;
-+ __u16 sw_cq_sel;
-+ __u8 arm_next = 0;
-+ __u8 arm_seq_num;
-+ __u32 cqe_index;
-+ __u32 hdr;
-+
-+ get_64bit_val(cq->shadow_area, 0, &temp_val);
-+ arm_seq_num = (__u8)FIELD_GET(ZXDH_CQ_DBSA_ARM_SEQ_NUM, temp_val);
-+ arm_seq_num++;
-+ sw_cq_sel = (__u16)FIELD_GET(ZXDH_CQ_DBSA_SW_CQ_SELECT, temp_val);
-+ cqe_index = (__u32)FIELD_GET(ZXDH_CQ_DBSA_CQEIDX, temp_val);
-+
-+ if (cq_notify == ZXDH_CQ_COMPL_SOLICITED)
-+ arm_next = 1;
-+ temp_val = FIELD_PREP(ZXDH_CQ_DBSA_ARM_SEQ_NUM, arm_seq_num) |
-+ FIELD_PREP(ZXDH_CQ_DBSA_SW_CQ_SELECT, sw_cq_sel) |
-+ FIELD_PREP(ZXDH_CQ_DBSA_ARM_NEXT, arm_next) |
-+ FIELD_PREP(ZXDH_CQ_DBSA_CQEIDX, cqe_index);
-+
-+ set_64bit_val(cq->shadow_area, 0, temp_val);
-+
-+ hdr = FIELD_PREP(ZXDH_CQ_ARM_DBSA_VLD, 0) |
-+ FIELD_PREP(ZXDH_CQ_ARM_CQ_ID, cq->cq_id);
-+
-+ udma_to_device_barrier(); /* make sure WQE is populated before valid bit is set */
-+
-+ db_wr32(hdr, cq->cqe_alloc_db);
-+}
-+
-+static inline void build_comp_status(__u32 cq_type,
-+ struct zxdh_cq_poll_info *info)
-+{
-+ if (!info->error) {
-+ info->comp_status = ZXDH_COMPL_STATUS_SUCCESS;
-+ if (cq_type == ZXDH_CQE_QTYPE_RQ) {
-+ if (info->major_err != ERROR_CODE_VALUE &&
-+ info->minor_err != ERROR_CODE_VALUE) {
-+ info->comp_status = ZXDH_COMPL_STATUS_UNKNOWN;
-+ }
-+ }
-+ return;
-+ }
-+ if (info->major_err == ZXDH_RETRY_ACK_MAJOR_ERR &&
-+ info->minor_err == ZXDH_RETRY_ACK_MINOR_ERR) {
-+ info->comp_status = ZXDH_COMPL_STATUS_RETRY_ACK_ERR;
-+ return;
-+ }
-+ if (info->major_err == ZXDH_RETRY_ACK_MAJOR_ERR &&
-+ info->minor_err == ZXDH_TX_WINDOW_QUERY_ITEM_MINOR_ERR) {
-+ info->comp_status = ZXDH_COMPL_STATUS_TX_WINDOW_QUERY_ITEM_ERR;
-+ return;
-+ }
-+ info->comp_status = (info->major_err == ZXDH_FLUSH_MAJOR_ERR) ?
-+ ZXDH_COMPL_STATUS_FLUSHED :
-+ ZXDH_COMPL_STATUS_UNKNOWN;
-+}
-+
-+__le64 *get_current_cqe(struct zxdh_cq_uk *cq)
-+{
-+ return ZXDH_GET_CURRENT_EXTENDED_CQ_ELEM(cq);
-+}
-+
-+static inline void zxdh_get_cq_poll_info(struct zxdh_qp_uk *qp,
-+ struct zxdh_cq_poll_info *info,
-+ __u64 qword2, __u64 qword3)
-+{
-+ __u8 qp_type;
-+
-+ qp_type = qp->qp_type;
-+
-+ info->imm_valid = (bool)FIELD_GET(ZXDH_CQ_IMMVALID, qword2);
-+ if (info->imm_valid) {
-+ info->imm_data = (__u32)FIELD_GET(ZXDH_CQ_IMMDATA, qword3);
-+ info->op_type = ZXDH_OP_TYPE_REC_IMM;
-+ } else {
-+ info->op_type = ZXDH_OP_TYPE_REC;
-+ }
-+
-+ info->bytes_xfered = (__u32)FIELD_GET(ZXDHCQ_PAYLDLEN, qword3);
-+
-+ if (likely(qp_type == ZXDH_QP_TYPE_ROCE_RC)) {
-+ if (qword2 & ZXDHCQ_STAG) {
-+ info->stag_invalid_set = true;
-+ info->inv_stag =
-+ (__u32)FIELD_GET(ZXDHCQ_INVSTAG, qword2);
-+ } else {
-+ info->stag_invalid_set = false;
-+ }
-+ } else if (qp_type == ZXDH_QP_TYPE_ROCE_UD) {
-+ info->ipv4 = (bool)FIELD_GET(ZXDHCQ_IPV4, qword2);
-+ info->ud_src_qpn = (__u32)FIELD_GET(ZXDHCQ_UDSRCQPN, qword2);
-+ }
-+}
-+
-+static void update_cq_poll_info(struct zxdh_qp_uk *qp,
-+ struct zxdh_cq_poll_info *info, __u32 wqe_idx,
-+ __u64 qword0)
-+{
-+ info->wr_id = qp->sq_wrtrk_array[wqe_idx].wrid;
-+ if (!info->comp_status)
-+ info->bytes_xfered = qp->sq_wrtrk_array[wqe_idx].wr_len;
-+ info->op_type = (__u8)FIELD_GET(ZXDHCQ_OP, qword0);
-+ ZXDH_RING_SET_TAIL(qp->sq_ring,
-+ wqe_idx + qp->sq_wrtrk_array[wqe_idx].quanta);
-+}
-+
-+static enum zxdh_status_code
-+process_tx_window_query_item_err(struct zxdh_qp_uk *qp,
-+ struct zxdh_cq_poll_info *info)
-+{
-+ int ret;
-+ struct ibv_qp *ib_qp;
-+ struct zxdh_uqp *iwuqp;
-+ struct zxdh_rdma_qpc qpc = { 0 };
-+ __u64 qpc_mask = 0;
-+
-+ iwuqp = container_of(qp, struct zxdh_uqp, qp);
-+ ib_qp = &iwuqp->vqp.qp;
-+ ret = zxdh_query_qpc(ib_qp, &qpc);
-+ if (ret) {
-+ zxdh_dbg(ZXDH_DBG_QP,
-+ "process tx window query item query qpc failed:%d\n",
-+ ret);
-+ return ZXDH_ERR_RETRY_ACK_ERR;
-+ }
-+ if (qpc.tx_last_ack_psn != qp->qp_last_ack_qsn)
-+ qp->qp_reset_cnt = 0;
-+
-+ qp->qp_last_ack_qsn = qpc.tx_last_ack_psn;
-+ if (qp->qp_reset_cnt >= ZXDH_QP_RETRY_COUNT)
-+ return ZXDH_ERR_RETRY_ACK_ERR;
-+
-+ ret = zxdh_reset_qp(ib_qp, ZXDH_RESET_RETRY_TX_ITEM_FLAG);
-+ if (ret) {
-+ zxdh_dbg(ZXDH_DBG_QP,
-+ "process tx window query item reset qp failed:%d\n",
-+ ret);
-+ return ZXDH_ERR_RETRY_ACK_ERR;
-+ }
-+ qp->qp_reset_cnt++;
-+ return ZXDH_ERR_RETRY_ACK_NOT_EXCEED_ERR;
-+}
-+
-+static enum zxdh_status_code
-+process_retry_ack_err(struct zxdh_qp_uk *qp, struct zxdh_cq_poll_info *info)
-+{
-+ int ret;
-+ struct ibv_qp *ib_qp;
-+ struct zxdh_uqp *iwuqp;
-+ struct zxdh_rdma_qpc qpc = { 0 };
-+ struct zxdh_rdma_qpc qpc_req_cmd = { 0 };
-+ __u64 qpc_mask = 0;
-+
-+ iwuqp = container_of(qp, struct zxdh_uqp, qp);
-+
-+ ib_qp = &iwuqp->vqp.qp;
-+ ret = zxdh_query_qpc(ib_qp, &qpc);
-+ if (ret) {
-+ zxdh_dbg(ZXDH_DBG_QP, "process retry ack query qpc failed:%d\n",
-+ ret);
-+ return ZXDH_ERR_RETRY_ACK_ERR;
-+ }
-+ if (!(qpc.retry_cqe_sq_opcode >= ZXDH_RETRY_CQE_SQ_OPCODE_ERR &&
-+ (qpc.recv_err_flag == ZXDH_RECV_ERR_FLAG_NAK_RNR_NAK ||
-+ qpc.recv_err_flag == ZXDH_RECV_ERR_FLAG_READ_RESP))) {
-+ return ZXDH_ERR_RETRY_ACK_ERR;
-+ }
-+ if (qpc.tx_last_ack_psn != qp->cqe_last_ack_qsn)
-+ qp->cqe_retry_cnt = 0;
-+
-+ qp->cqe_last_ack_qsn = qpc.tx_last_ack_psn;
-+ if (qp->cqe_retry_cnt >= ZXDH_QP_RETRY_COUNT)
-+ return ZXDH_ERR_RETRY_ACK_ERR;
-+
-+ memcpy(&qpc_req_cmd, &qpc, sizeof(qpc));
-+ qpc_req_cmd.package_err_flag = 0;
-+ qpc_req_cmd.ack_err_flag = 0;
-+ qpc_req_cmd.err_flag = 0;
-+ qpc_req_cmd.retry_cqe_sq_opcode &= ZXDH_RESET_RETRY_CQE_SQ_OPCODE_ERR;
-+ qpc_req_cmd.cur_retry_count = qpc.retry_count;
-+ ret = zxdh_modify_qpc(ib_qp, &qpc_req_cmd,
-+ ZXDH_PACKAGE_ERR_FLAG | ZXDH_ERR_FLAG_SET |
-+ ZXDH_RETRY_CQE_SQ_OPCODE |
-+ ZXDH_TX_READ_RETRY_FLAG_SET);
-+ if (ret) {
-+ zxdh_dbg(ZXDH_DBG_QP,
-+ "process retry ack modify qpc failed:%d\n", ret);
-+ return ZXDH_ERR_RETRY_ACK_ERR;
-+ }
-+ qp->cqe_retry_cnt++;
-+ return ZXDH_ERR_RETRY_ACK_NOT_EXCEED_ERR;
-+}
-+
-+/**
-+ * zxdh_uk_cq_poll_cmpl - get cq completion info
-+ * @cq: hw cq
-+ * @info: cq poll information returned
-+ */
-+enum zxdh_status_code zxdh_uk_cq_poll_cmpl(struct zxdh_cq_uk *cq,
-+ struct zxdh_cq_poll_info *info)
-+{
-+ enum zxdh_status_code status_code;
-+ struct zxdh_uvcontext *iwvctx;
-+ __u64 comp_ctx, qword0, qword2, qword3;
-+ __le64 *cqe;
-+ int ret;
-+ struct zxdh_qp_uk *qp;
-+ struct zxdh_ring *pring = NULL;
-+ __u32 wqe_idx, q_type;
-+ int ret_code;
-+ bool move_cq_head = true;
-+ __u8 polarity;
-+ struct zxdh_usrq *iwusrq = NULL;
-+ struct zxdh_srq_uk *srq_uk = NULL;
-+ struct zxdh_uqp *iwuqp;
-+ struct ibv_qp *ib_qp;
-+
-+ cqe = get_current_cqe(cq);
-+
-+ get_64bit_val(cqe, 0, &qword0);
-+ polarity = (__u8)FIELD_GET(ZXDH_CQ_VALID, qword0);
-+ if (polarity != cq->polarity)
-+ return ZXDH_ERR_Q_EMPTY;
-+
-+ /* Ensure CQE contents are read after valid bit is checked */
-+ udma_from_device_barrier();
-+ get_64bit_val(cqe, 8, &comp_ctx);
-+ get_64bit_val(cqe, 16, &qword2);
-+ get_64bit_val(cqe, 24, &qword3);
-+
-+ qp = (struct zxdh_qp_uk *)(unsigned long)comp_ctx;
-+ if (unlikely(!qp || qp->destroy_pending)) {
-+ ret_code = ZXDH_ERR_Q_DESTROYED;
-+ goto exit;
-+ }
-+
-+ info->qp_handle = (zxdh_qp_handle)(unsigned long)qp;
-+ q_type = (__u8)FIELD_GET(ZXDH_CQ_SQ, qword0);
-+ info->solicited_event = (bool)FIELD_GET(ZXDHCQ_SOEVENT, qword0);
-+ wqe_idx = (__u32)FIELD_GET(ZXDH_CQ_WQEIDX, qword0);
-+ info->error = (bool)FIELD_GET(ZXDH_CQ_ERROR, qword0);
-+ info->major_err = FIELD_GET(ZXDH_CQ_MAJERR, qword0);
-+ info->minor_err = FIELD_GET(ZXDH_CQ_MINERR, qword0);
-+
-+ /* Set the min error to standard flush error code for remaining cqes */
-+ if (unlikely(info->error && info->major_err == ZXDH_FLUSH_MAJOR_ERR &&
-+ info->minor_err != FLUSH_GENERAL_ERR)) {
-+ qword0 &= ~ZXDH_CQ_MINERR;
-+ qword0 |= FIELD_PREP(ZXDH_CQ_MINERR, FLUSH_GENERAL_ERR);
-+ set_64bit_val(cqe, 0, qword0);
-+ }
-+ build_comp_status(q_type, info);
-+
-+ info->qp_id = (__u32)FIELD_GET(ZXDHCQ_QPID, qword2);
-+ info->imm_valid = false;
-+
-+ info->qp_handle = (zxdh_qp_handle)(unsigned long)qp;
-+ switch (q_type) {
-+ case ZXDH_CQE_QTYPE_RQ:
-+ if (qp->is_srq) {
-+ iwuqp = container_of(qp, struct zxdh_uqp, qp);
-+ iwusrq = iwuqp->srq;
-+ srq_uk = &iwusrq->srq;
-+ zxdh_free_srq_wqe(srq_uk, wqe_idx);
-+ info->wr_id = srq_uk->srq_wrid_array[wqe_idx];
-+ zxdh_get_cq_poll_info(qp, info, qword2, qword3);
-+ } else {
-+ if (unlikely(info->comp_status ==
-+ ZXDH_COMPL_STATUS_FLUSHED ||
-+ info->comp_status ==
-+ ZXDH_COMPL_STATUS_UNKNOWN)) {
-+ if (!ZXDH_RING_MORE_WORK(qp->rq_ring)) {
-+ ret_code = ZXDH_ERR_Q_EMPTY;
-+ goto exit;
-+ }
-+ wqe_idx = qp->rq_ring.tail;
-+ }
-+ info->wr_id = qp->rq_wrid_array[wqe_idx];
-+ zxdh_get_cq_poll_info(qp, info, qword2, qword3);
-+ ZXDH_RING_SET_TAIL(qp->rq_ring, wqe_idx + 1);
-+ if (info->comp_status == ZXDH_COMPL_STATUS_FLUSHED) {
-+ qp->rq_flush_seen = true;
-+ if (!ZXDH_RING_MORE_WORK(qp->rq_ring))
-+ qp->rq_flush_complete = true;
-+ else
-+ move_cq_head = false;
-+ }
-+ pring = &qp->rq_ring;
-+ }
-+ ret_code = ZXDH_SUCCESS;
-+ break;
-+ case ZXDH_CQE_QTYPE_SQ:
-+ if (info->comp_status == ZXDH_COMPL_STATUS_RETRY_ACK_ERR &&
-+ qp->qp_type == ZXDH_QP_TYPE_ROCE_RC) {
-+ status_code = process_retry_ack_err(qp, info);
-+ if (status_code == ZXDH_ERR_RETRY_ACK_ERR) {
-+ update_cq_poll_info(qp, info, wqe_idx, qword0);
-+ ret_code = ZXDH_SUCCESS;
-+ } else {
-+ ret_code = status_code;
-+ }
-+ } else if (info->comp_status ==
-+ ZXDH_COMPL_STATUS_TX_WINDOW_QUERY_ITEM_ERR &&
-+ qp->qp_type == ZXDH_QP_TYPE_ROCE_RC) {
-+ status_code =
-+ process_tx_window_query_item_err(qp, info);
-+ if (status_code == ZXDH_ERR_RETRY_ACK_ERR) {
-+ update_cq_poll_info(qp, info, wqe_idx, qword0);
-+ ret_code = ZXDH_SUCCESS;
-+ } else {
-+ ret_code = status_code;
-+ }
-+ } else if (info->comp_status == ZXDH_COMPL_STATUS_FLUSHED) {
-+ info->wr_id = qp->sq_wrtrk_array[wqe_idx].wrid;
-+ ZXDH_RING_INIT(qp->sq_ring, qp->sq_ring.size);
-+ ret_code = ZXDH_SUCCESS;
-+ } else {
-+ update_cq_poll_info(qp, info, wqe_idx, qword0);
-+ ret_code = ZXDH_SUCCESS;
-+ }
-+ break;
-+ default:
-+ zxdh_dbg(ZXDH_DBG_CQ, "zxdh get cqe type unknow!\n");
-+ ret_code = ZXDH_ERR_Q_DESTROYED;
-+ break;
-+ }
-+exit:
-+ if (move_cq_head) {
-+ __u64 cq_shadow_temp;
-+
-+ ZXDH_RING_MOVE_HEAD_NOCHECK(cq->cq_ring);
-+ if (!ZXDH_RING_CURRENT_HEAD(cq->cq_ring))
-+ cq->polarity ^= 1;
-+
-+ ZXDH_RING_MOVE_TAIL(cq->cq_ring);
-+ cq->cqe_rd_cnt++;
-+ get_64bit_val(cq->shadow_area, 0, &cq_shadow_temp);
-+ cq_shadow_temp &= ~ZXDH_CQ_DBSA_CQEIDX;
-+ cq_shadow_temp |=
-+ FIELD_PREP(ZXDH_CQ_DBSA_CQEIDX, cq->cqe_rd_cnt);
-+ set_64bit_val(cq->shadow_area, 0, cq_shadow_temp);
-+ } else {
-+ qword0 &= ~ZXDH_CQ_WQEIDX;
-+ qword0 |= FIELD_PREP(ZXDH_CQ_WQEIDX, pring->tail);
-+ set_64bit_val(cqe, 0, qword0);
-+ }
-+
-+ return ret_code;
-+}
-+
-+/**
-+ * zxdh_qp_round_up - return round up qp wq depth
-+ * @wqdepth: wq depth in quanta to round up
-+ */
-+int zxdh_qp_round_up(__u32 wqdepth)
-+{
-+ int scount = 1;
-+
-+ for (wqdepth--; scount <= 16; scount *= 2)
-+ wqdepth |= wqdepth >> scount;
-+
-+ return ++wqdepth;
-+}
-+
-+/**
-+ * zxdh_cq_round_up - return round up cq wq depth
-+ * @wqdepth: wq depth in quanta to round up
-+ */
-+int zxdh_cq_round_up(__u32 wqdepth)
-+{
-+ int scount = 1;
-+
-+ for (wqdepth--; scount <= 16; scount *= 2)
-+ wqdepth |= wqdepth >> scount;
-+
-+ return ++wqdepth;
-+}
-+
-+/**
-+ * zxdh_get_rq_wqe_shift - get shift count for maximum rq wqe size
-+ * @uk_attrs: qp HW attributes
-+ * @sge: Maximum Scatter Gather Elements wqe
-+ * @shift: Returns the shift needed based on sge
-+ *
-+ * Shift can be used to left shift the rq wqe size based on number of SGEs.
-+ * For 1 SGE, shift = 1 (wqe size of 2*16 bytes).
-+ * For 2 or 3 SGEs, shift = 2 (wqe size of 4*16 bytes).
-+ * For 4-7 SGE's Shift of 3.
-+ * For 8-15 SGE's Shift of 4 otherwise (wqe size of 512 bytes).
-+ */
-+void zxdh_get_rq_wqe_shift(struct zxdh_uk_attrs *uk_attrs, __u32 sge,
-+ __u8 *shift)
-+{
-+ *shift = 0; //16bytes RQE, need to confirm configuration
-+ if (sge < 2)
-+ *shift = 1;
-+ else if (sge < 4)
-+ *shift = 2;
-+ else if (sge < 8)
-+ *shift = 3;
-+ else if (sge < 16)
-+ *shift = 4;
-+ else
-+ *shift = 5;
-+}
-+
-+/**
-+ * zxdh_get_sq_wqe_shift - get shift count for maximum wqe size
-+ * @uk_attrs: qp HW attributes
-+ * @sge: Maximum Scatter Gather Elements wqe
-+ * @inline_data: Maximum inline data size
-+ * @shift: Returns the shift needed based on sge
-+ *
-+ * Shift can be used to left shift the wqe size based on number of SGEs and inlind data size.
-+ * To surport WR with imm_data,shift = 1 (wqe size of 2*32 bytes).
-+ * For 2-7 SGEs or 24 < inline data <= 86, shift = 2 (wqe size of 4*32 bytes).
-+ * Otherwise (wqe size of 256 bytes).
-+ */
-+void zxdh_get_sq_wqe_shift(struct zxdh_uk_attrs *uk_attrs, __u32 sge,
-+ __u32 inline_data, __u8 *shift)
-+{
-+ *shift = 1;
-+
-+ if (sge > 1 || inline_data > 24) {
-+ if (sge < 8 && inline_data <= 86)
-+ *shift = 2;
-+ else
-+ *shift = 3;
-+ }
-+}
-+
-+/*
-+ * zxdh_get_sqdepth - get SQ depth (quanta)
-+ * @uk_attrs: qp HW attributes
-+ * @sq_size: SQ size
-+ * @shift: shift which determines size of WQE
-+ * @sqdepth: depth of SQ
-+ *
-+ */
-+enum zxdh_status_code zxdh_get_sqdepth(struct zxdh_uk_attrs *uk_attrs,
-+ __u32 sq_size, __u8 shift,
-+ __u32 *sqdepth)
-+{
-+ if (sq_size > ZXDH_MAX_SQ_DEPTH)
-+ return ZXDH_ERR_INVALID_SIZE;
-+
-+ *sqdepth = zxdh_qp_round_up((sq_size << shift) + ZXDH_SQ_RSVD);
-+
-+ if (*sqdepth < (ZXDH_QP_SW_MIN_WQSIZE << shift))
-+ *sqdepth = ZXDH_QP_SW_MIN_WQSIZE << shift;
-+ else if (*sqdepth > uk_attrs->max_hw_wq_quanta)
-+ return ZXDH_ERR_INVALID_SIZE;
-+
-+ return 0;
-+}
-+
-+/*
-+ * zxdh_get_rqdepth - get RQ depth (quanta)
-+ * @uk_attrs: qp HW attributes
-+ * @rq_size: RQ size
-+ * @shift: shift which determines size of WQE
-+ * @rqdepth: depth of RQ
-+ */
-+enum zxdh_status_code zxdh_get_rqdepth(struct zxdh_uk_attrs *uk_attrs,
-+ __u32 rq_size, __u8 shift,
-+ __u32 *rqdepth)
-+{
-+ *rqdepth = zxdh_qp_round_up((rq_size << shift) + ZXDH_RQ_RSVD);
-+
-+ if (*rqdepth < (ZXDH_QP_SW_MIN_WQSIZE << shift))
-+ *rqdepth = ZXDH_QP_SW_MIN_WQSIZE << shift;
-+ else if (*rqdepth > uk_attrs->max_hw_rq_quanta)
-+ return ZXDH_ERR_INVALID_SIZE;
-+
-+ return 0;
-+}
-+
-+static const struct zxdh_wqe_uk_ops iw_wqe_uk_ops = {
-+ .iw_copy_inline_data = zxdh_copy_inline_data,
-+ .iw_inline_data_size_to_quanta = zxdh_inline_data_size_to_quanta,
-+ .iw_set_fragment = zxdh_set_fragment,
-+ .iw_set_mw_bind_wqe = zxdh_set_mw_bind_wqe,
-+};
-+
-+/**
-+ * zxdh_uk_qp_init - initialize shared qp
-+ * @qp: hw qp (user and kernel)
-+ * @info: qp initialization info
-+ *
-+ * initializes the vars used in both user and kernel mode.
-+ * size of the wqe depends on numbers of max. fragements
-+ * allowed. Then size of wqe * the number of wqes should be the
-+ * amount of memory allocated for sq and rq.
-+ */
-+enum zxdh_status_code zxdh_uk_qp_init(struct zxdh_qp_uk *qp,
-+ struct zxdh_qp_uk_init_info *info)
-+{
-+ enum zxdh_status_code ret_code = 0;
-+ __u32 sq_ring_size;
-+ __u8 sqshift, rqshift;
-+
-+ qp->uk_attrs = info->uk_attrs;
-+ if (info->max_sq_frag_cnt > qp->uk_attrs->max_hw_wq_frags ||
-+ info->max_rq_frag_cnt > qp->uk_attrs->max_hw_wq_frags)
-+ return ZXDH_ERR_INVALID_FRAG_COUNT;
-+
-+ zxdh_get_rq_wqe_shift(qp->uk_attrs, info->max_rq_frag_cnt, &rqshift);
-+ zxdh_get_sq_wqe_shift(qp->uk_attrs, info->max_sq_frag_cnt,
-+ info->max_inline_data, &sqshift);
-+
-+ qp->qp_caps = info->qp_caps;
-+ qp->sq_base = info->sq;
-+ qp->rq_base = info->rq;
-+ qp->qp_type = info->type;
-+ qp->shadow_area = info->shadow_area;
-+ set_64bit_val(qp->shadow_area, 0, 0x8000);
-+ qp->sq_wrtrk_array = info->sq_wrtrk_array;
-+
-+ qp->rq_wrid_array = info->rq_wrid_array;
-+ qp->wqe_alloc_db = info->wqe_alloc_db;
-+ qp->qp_id = info->qp_id;
-+ qp->sq_size = info->sq_size;
-+ qp->push_mode = false;
-+ qp->max_sq_frag_cnt = info->max_sq_frag_cnt;
-+ sq_ring_size = qp->sq_size << sqshift;
-+ ZXDH_RING_INIT(qp->sq_ring, sq_ring_size);
-+ ZXDH_RING_INIT(qp->initial_ring, sq_ring_size);
-+ qp->swqe_polarity = 0;
-+ qp->swqe_polarity_deferred = 1;
-+ qp->rwqe_polarity = 0;
-+ qp->rwqe_signature = 0;
-+ qp->rq_size = info->rq_size;
-+ qp->max_rq_frag_cnt = info->max_rq_frag_cnt;
-+ qp->max_inline_data = (info->max_inline_data == 0) ?
-+ ZXDH_MAX_INLINE_DATA_SIZE :
-+ info->max_inline_data;
-+ qp->rq_wqe_size = rqshift;
-+ ZXDH_RING_INIT(qp->rq_ring, qp->rq_size);
-+ qp->rq_wqe_size_multiplier = 1 << rqshift;
-+ qp->wqe_ops = iw_wqe_uk_ops;
-+ return ret_code;
-+}
-+
-+/**
-+ * zxdh_uk_cq_init - initialize shared cq (user and kernel)
-+ * @cq: hw cq
-+ * @info: hw cq initialization info
-+ */
-+enum zxdh_status_code zxdh_uk_cq_init(struct zxdh_cq_uk *cq,
-+ struct zxdh_cq_uk_init_info *info)
-+{
-+ cq->cq_base = info->cq_base;
-+ cq->cq_id = info->cq_id;
-+ cq->cq_size = info->cq_size;
-+ cq->cqe_alloc_db = info->cqe_alloc_db;
-+ cq->cq_ack_db = info->cq_ack_db;
-+ cq->shadow_area = info->shadow_area;
-+ cq->cqe_size = info->cqe_size;
-+ ZXDH_RING_INIT(cq->cq_ring, cq->cq_size);
-+ cq->polarity = 1;
-+ cq->cqe_rd_cnt = 0;
-+
-+ return 0;
-+}
-+
-+/**
-+ * zxdh_uk_clean_cq - clean cq entries
-+ * @q: completion context
-+ * @cq: cq to clean
-+ */
-+void zxdh_uk_clean_cq(void *q, struct zxdh_cq_uk *cq)
-+{
-+ __le64 *cqe;
-+ __u64 qword3, comp_ctx;
-+ __u32 cq_head;
-+ __u8 polarity, temp;
-+
-+ cq_head = cq->cq_ring.head;
-+ temp = cq->polarity;
-+ do {
-+ if (cq->cqe_size)
-+ cqe = ((struct zxdh_extended_cqe
-+ *)(cq->cq_base))[cq_head]
-+ .buf;
-+ else
-+ cqe = cq->cq_base[cq_head].buf;
-+ get_64bit_val(cqe, 24, &qword3);
-+ polarity = (__u8)FIELD_GET(ZXDH_CQ_VALID, qword3);
-+
-+ if (polarity != temp)
-+ break;
-+
-+ get_64bit_val(cqe, 8, &comp_ctx);
-+ if ((void *)(uintptr_t)comp_ctx == q)
-+ set_64bit_val(cqe, 8, 0);
-+
-+ cq_head = (cq_head + 1) % cq->cq_ring.size;
-+ if (!cq_head)
-+ temp ^= 1;
-+ } while (true);
-+}
-+
-+/**
-+ * zxdh_nop - post a nop
-+ * @qp: hw qp ptr
-+ * @wr_id: work request id
-+ * @signaled: signaled for completion
-+ * @post_sq: ring doorbell
-+ */
-+enum zxdh_status_code zxdh_nop(struct zxdh_qp_uk *qp, __u64 wr_id,
-+ bool signaled, bool post_sq)
-+{
-+ __le64 *wqe;
-+ __u64 hdr;
-+ __u32 wqe_idx;
-+ struct zxdh_post_sq_info info = {};
-+
-+ info.push_wqe = false;
-+ info.wr_id = wr_id;
-+ wqe = zxdh_qp_get_next_send_wqe(qp, &wqe_idx, ZXDH_QP_WQE_MIN_QUANTA, 0,
-+ &info);
-+ if (!wqe)
-+ return ZXDH_ERR_QP_TOOMANY_WRS_POSTED;
-+
-+ zxdh_clr_wqes(qp, wqe_idx);
-+
-+ set_64bit_val(wqe, 0, 0);
-+ set_64bit_val(wqe, 8, 0);
-+ set_64bit_val(wqe, 16, 0);
-+
-+ hdr = FIELD_PREP(ZXDHQPSQ_OPCODE, ZXDHQP_OP_NOP) |
-+ FIELD_PREP(ZXDHQPSQ_SIGCOMPL, signaled) |
-+ FIELD_PREP(ZXDHQPSQ_VALID, qp->swqe_polarity);
-+
-+ udma_to_device_barrier(); /* make sure WQE is populated before valid bit is set */
-+
-+ set_64bit_val(wqe, 24, hdr);
-+ if (post_sq)
-+ zxdh_uk_qp_post_wr(qp);
-+
-+ return 0;
-+}
-+
-+/**
-+ * zxdh_fragcnt_to_quanta_sq - calculate quanta based on fragment count for SQ
-+ * @frag_cnt: number of fragments
-+ * @quanta: quanta for frag_cnt
-+ */
-+enum zxdh_status_code zxdh_fragcnt_to_quanta_sq(__u32 frag_cnt, __u16 *quanta)
-+{
-+ if (frag_cnt > ZXDH_MAX_SQ_FRAG)
-+ return ZXDH_ERR_INVALID_FRAG_COUNT;
-+ *quanta = frag_cnt / 2 + 1;
-+ return 0;
-+}
-+
-+/**
-+ * zxdh_fragcnt_to_wqesize_rq - calculate wqe size based on fragment count for RQ
-+ * @frag_cnt: number of fragments
-+ * @wqe_size: size in bytes given frag_cnt
-+ */
-+enum zxdh_status_code zxdh_fragcnt_to_wqesize_rq(__u32 frag_cnt,
-+ __u16 *wqe_size)
-+{
-+ switch (frag_cnt) {
-+ case 0:
-+ case 1:
-+ *wqe_size = 32;
-+ break;
-+ case 2:
-+ case 3:
-+ *wqe_size = 64;
-+ break;
-+ case 4:
-+ case 5:
-+ case 6:
-+ case 7:
-+ *wqe_size = 128;
-+ break;
-+ case 8:
-+ case 9:
-+ case 10:
-+ case 11:
-+ case 12:
-+ case 13:
-+ case 14:
-+ *wqe_size = 256;
-+ break;
-+ default:
-+ return ZXDH_ERR_INVALID_FRAG_COUNT;
-+ }
-+
-+ return 0;
-+}
-+
-+/**
-+ * zxdh_get_srq_wqe_shift - get shift count for maximum srq wqe size
-+ * @uk_attrs: srq HW attributes
-+ * @sge: Maximum Scatter Gather Elements wqe
-+ * @shift: Returns the shift needed based on sge
-+ *
-+ * Shift can be used to left shift the srq wqe size based on number of SGEs.
-+ * For 1 SGE, shift = 1 (wqe size of 2*16 bytes).
-+ * For 2 or 3 SGEs, shift = 2 (wqe size of 4*16 bytes).
-+ * For 4-7 SGE's Shift of 3.
-+ * For 8-15 SGE's Shift of 4 otherwise (wqe size of 512 bytes).
-+ */
-+void zxdh_get_srq_wqe_shift(struct zxdh_uk_attrs *uk_attrs, __u32 sge,
-+ __u8 *shift)
-+{
-+ *shift = 0; //16bytes RQE, need to confirm configuration
-+ if (sge < 2)
-+ *shift = 1;
-+ else if (sge < 4)
-+ *shift = 2;
-+ else if (sge < 8)
-+ *shift = 3;
-+ else if (sge < 16)
-+ *shift = 4;
-+ else
-+ *shift = 5;
-+}
-+
-+/*
-+ * zxdh_get_srqdepth - get SRQ depth (quanta)
-+ * @max_hw_rq_quanta: HW SRQ size limit
-+ * @srq_size: SRQ size
-+ * @shift: shift which determines size of WQE
-+ * @srqdepth: depth of SRQ
-+ */
-+int zxdh_get_srqdepth(__u32 max_hw_srq_quanta, __u32 srq_size, __u8 shift,
-+ __u32 *srqdepth)
-+{
-+ *srqdepth = zxdh_qp_round_up((srq_size << shift) + ZXDH_SRQ_RSVD);
-+
-+ if (*srqdepth < (ZXDH_QP_SW_MIN_WQSIZE << shift))
-+ *srqdepth = ZXDH_QP_SW_MIN_WQSIZE << shift;
-+ else if ((*srqdepth >> shift) > max_hw_srq_quanta)
-+ return ZXDH_ERR_INVALID_SIZE;
-+
-+ return 0;
-+}
-+
-+__le64 *zxdh_get_srq_wqe(struct zxdh_srq_uk *srq, int wqe_index)
-+{
-+ __le64 *wqe;
-+
-+ wqe = srq->srq_base[wqe_index * srq->srq_wqe_size_multiplier].elem;
-+ return wqe;
-+}
-+
-+__le16 *zxdh_get_srq_list_wqe(struct zxdh_srq_uk *srq, __u16 *idx)
-+{
-+ __le16 *wqe;
-+ __u16 wqe_idx;
-+
-+ wqe_idx = srq->srq_list_ring.tail;
-+ srq->srq_list_ring.tail++;
-+ srq->srq_list_ring.tail %= srq->srq_list_ring.size;
-+ *idx = srq->srq_list_ring.tail;
-+
-+ if (!(*idx))
-+ srq->srq_list_polarity = !srq->srq_list_polarity;
-+
-+ wqe = &srq->srq_list_base[wqe_idx];
-+
-+ return wqe;
-+}
-+
-+/**
-+ * zxdh_uk_srq_init - initialize srq
-+ * @srq: hw srq (user and kernel)
-+ * @info: srq initialization info
-+ *
-+ * initializes the vars used in both user and kernel mode.
-+ * size of the wqe depends on numbers of max. fragements
-+ * allowed. Then size of wqe * the number of wqes should be the
-+ * amount of memory allocated for srq.
-+ */
-+enum zxdh_status_code zxdh_uk_srq_init(struct zxdh_srq_uk *srq,
-+ struct zxdh_srq_uk_init_info *info)
-+{
-+ __u32 srq_ring_size;
-+ __u8 srqshift;
-+
-+ srq->uk_attrs = info->uk_attrs;
-+ if (info->max_srq_frag_cnt > srq->uk_attrs->max_hw_wq_frags)
-+ return -ZXDH_ERR_INVALID_FRAG_COUNT;
-+ zxdh_get_srq_wqe_shift(srq->uk_attrs, info->max_srq_frag_cnt,
-+ &srqshift);
-+ srq->srq_base = info->srq_base;
-+ srq->srq_list_base = info->srq_list_base;
-+ srq->srq_db_base = info->srq_db_base;
-+ srq->srq_wrid_array = info->srq_wrid_array;
-+ srq->srq_id = info->srq_id;
-+ srq->srq_size = info->srq_size;
-+ srq->log2_srq_size = info->log2_srq_size;
-+ srq->srq_list_size = info->srq_list_size;
-+ srq->max_srq_frag_cnt = info->max_srq_frag_cnt;
-+ srq_ring_size = srq->srq_size;
-+ srq->srq_wqe_size = srqshift;
-+ srq->srq_wqe_size_multiplier = 1 << srqshift;
-+ ZXDH_RING_INIT(srq->srq_ring, srq_ring_size);
-+ ZXDH_RING_INIT(srq->srq_list_ring, srq->srq_list_size);
-+ srq->srq_ring.tail = srq->srq_size - 1;
-+ srq->srq_list_polarity = 1;
-+ zxdh_dbg(ZXDH_DBG_SRQ, "%s srq_wqe_size_multiplier:%d srqshift:%d\n",
-+ __func__, srq->srq_wqe_size_multiplier, srqshift);
-+ zxdh_dbg(
-+ ZXDH_DBG_SRQ,
-+ "%s srq->srq_id:%d srq_base:0x%p srq_list_base:0x%p srq_db_base:0x%p\n",
-+ __func__, srq->srq_id, srq->srq_base, srq->srq_list_base,
-+ srq->srq_db_base);
-+ zxdh_dbg(ZXDH_DBG_SRQ,
-+ "%s srq->srq_id:%d srq_ring_size:%d srq->srq_list_size:%d\n",
-+ __func__, srq->srq_id, srq_ring_size, srq->srq_list_size);
-+ return 0;
-+}
-+
-+void zxdh_free_srq_wqe(struct zxdh_srq_uk *srq, int wqe_index)
-+{
-+ struct zxdh_usrq *iwusrq;
-+ __le64 *wqe;
-+ __u64 hdr;
-+
-+ iwusrq = container_of(srq, struct zxdh_usrq, srq);
-+ /* always called with interrupts disabled. */
-+ pthread_spin_lock(&iwusrq->lock);
-+ wqe = zxdh_get_srq_wqe(srq, srq->srq_ring.tail);
-+ srq->srq_ring.tail = wqe_index;
-+ hdr = FIELD_PREP(ZXDHQPSRQ_NEXT_WQE_INDEX, wqe_index);
-+
-+ udma_to_device_barrier(); /* make sure WQE is populated before valid bit is set */
-+ set_64bit_val(wqe, 0, hdr);
-+
-+ pthread_spin_unlock(&iwusrq->lock);
-+ zxdh_dbg(ZXDH_DBG_SRQ, "%s srq->srq_id:%d wqe_index:%d\n", __func__,
-+ srq->srq_id, wqe_index);
-+}
-diff --git a/providers/zrdma/umain.c b/providers/zrdma/umain.c
-new file mode 100644
-index 0000000..92cdd37
---- /dev/null
-+++ b/providers/zrdma/umain.c
-@@ -0,0 +1,236 @@
-+// SPDX-License-Identifier: GPL-2.0 or Linux-OpenIB
-+/* Copyright (c) 2024 ZTE Corporation. All rights reserved. */
-+#include
-+#include
-+#include
-+#include
-+#include
-+#include
-+#include
-+#include
-+#include
-+#include
-+#include "zxdh_devids.h"
-+#include "umain.h"
-+#include "abi.h"
-+#include "private_verbs_cmd.h"
-+
-+#define ZXDH_HCA(v, d) VERBS_PCI_MATCH(v, d, NULL)
-+static const struct verbs_match_ent hca_table[] = {
-+ VERBS_DRIVER_ID(RDMA_DRIVER_ZXDH),
-+ ZXDH_HCA(PCI_VENDOR_ID_ZXDH_EVB, ZXDH_DEV_ID_ADAPTIVE_EVB_PF),
-+ ZXDH_HCA(PCI_VENDOR_ID_ZXDH_EVB, ZXDH_DEV_ID_ADAPTIVE_EVB_VF),
-+ ZXDH_HCA(PCI_VENDOR_ID_ZXDH_E312, ZXDH_DEV_ID_ADAPTIVE_E312_PF),
-+ ZXDH_HCA(PCI_VENDOR_ID_ZXDH_E312, ZXDH_DEV_ID_ADAPTIVE_E312_VF),
-+ ZXDH_HCA(PCI_VENDOR_ID_ZXDH_X512, ZXDH_DEV_ID_ADAPTIVE_X512_PF),
-+ ZXDH_HCA(PCI_VENDOR_ID_ZXDH_X512, ZXDH_DEV_ID_ADAPTIVE_X512_VF),
-+ {}
-+};
-+
-+/**
-+ * zxdh_ufree_context - free context that was allocated
-+ * @ibctx: context allocated ptr
-+ */
-+static void zxdh_ufree_context(struct ibv_context *ibctx)
-+{
-+ struct zxdh_uvcontext *iwvctx;
-+
-+ iwvctx = container_of(ibctx, struct zxdh_uvcontext, ibv_ctx.context);
-+
-+ zxdh_ufree_pd(&iwvctx->iwupd->ibv_pd);
-+ zxdh_munmap(iwvctx->sq_db);
-+ zxdh_munmap(iwvctx->cq_db);
-+ verbs_uninit_context(&iwvctx->ibv_ctx);
-+ free(iwvctx);
-+}
-+
-+static const struct verbs_context_ops zxdh_uctx_ops = {
-+ .alloc_mw = zxdh_ualloc_mw,
-+ .alloc_pd = zxdh_ualloc_pd,
-+ .attach_mcast = zxdh_uattach_mcast,
-+ .bind_mw = zxdh_ubind_mw,
-+ .cq_event = zxdh_cq_event,
-+ .create_ah = zxdh_ucreate_ah,
-+ .create_cq = zxdh_ucreate_cq,
-+ .create_cq_ex = zxdh_ucreate_cq_ex,
-+ .create_qp = zxdh_ucreate_qp,
-+ .create_qp_ex = zxdh_ucreate_qp_ex,
-+ .create_srq = zxdh_ucreate_srq,
-+ .dealloc_mw = zxdh_udealloc_mw,
-+ .dealloc_pd = zxdh_ufree_pd,
-+ .dereg_mr = zxdh_udereg_mr,
-+ .destroy_ah = zxdh_udestroy_ah,
-+ .destroy_cq = zxdh_udestroy_cq,
-+ .modify_cq = zxdh_umodify_cq,
-+ .destroy_qp = zxdh_udestroy_qp,
-+ .destroy_srq = zxdh_udestroy_srq,
-+ .detach_mcast = zxdh_udetach_mcast,
-+ .modify_qp = zxdh_umodify_qp,
-+ .modify_srq = zxdh_umodify_srq,
-+ .poll_cq = zxdh_upoll_cq,
-+ .post_recv = zxdh_upost_recv,
-+ .post_send = zxdh_upost_send,
-+ .post_srq_recv = zxdh_upost_srq_recv,
-+ .query_device_ex = zxdh_uquery_device_ex,
-+ .query_port = zxdh_uquery_port,
-+ .query_qp = zxdh_uquery_qp,
-+ .query_srq = zxdh_uquery_srq,
-+ .reg_mr = zxdh_ureg_mr,
-+ .rereg_mr = zxdh_urereg_mr,
-+ .req_notify_cq = zxdh_uarm_cq,
-+ .resize_cq = zxdh_uresize_cq,
-+ .free_context = zxdh_ufree_context,
-+ .get_srq_num = zxdh_uget_srq_num,
-+};
-+
-+/**
-+ * zxdh_ualloc_context - allocate context for user app
-+ * @ibdev: ib device created during zxdh_driver_init
-+ * @cmd_fd: save fd for the device
-+ * @private_data: device private data
-+ *
-+ * Returns callback routine table and calls driver for allocating
-+ * context and getting back resource information to return as ibv_context.
-+ */
-+static struct verbs_context *zxdh_ualloc_context(struct ibv_device *ibdev,
-+ int cmd_fd, void *private_data)
-+{
-+ struct ibv_pd *ibv_pd;
-+ struct zxdh_uvcontext *iwvctx;
-+ struct zxdh_get_context cmd;
-+ struct zxdh_get_context_resp resp = {};
-+ __u64 sq_db_mmap_key, cq_db_mmap_key;
-+ __u8 user_ver = ZXDH_ABI_VER;
-+
-+ iwvctx = verbs_init_and_alloc_context(ibdev, cmd_fd, iwvctx, ibv_ctx,
-+ RDMA_DRIVER_ZXDH);
-+ if (!iwvctx)
-+ return NULL;
-+
-+ zxdh_set_debug_mask();
-+
-+ cmd.userspace_ver = user_ver;
-+ if (ibv_cmd_get_context(&iwvctx->ibv_ctx,
-+ (struct ibv_get_context *)&cmd, sizeof(cmd),
-+ &resp.ibv_resp, sizeof(resp))) {
-+ cmd.userspace_ver = 4;
-+ if (ibv_cmd_get_context(
-+ &iwvctx->ibv_ctx, (struct ibv_get_context *)&cmd,
-+ sizeof(cmd), &resp.ibv_resp, sizeof(resp)))
-+ goto err_free;
-+ user_ver = cmd.userspace_ver;
-+ }
-+
-+ verbs_set_ops(&iwvctx->ibv_ctx, &zxdh_uctx_ops);
-+
-+ iwvctx->uk_attrs.feature_flags = resp.feature_flags;
-+ iwvctx->uk_attrs.hw_rev = resp.hw_rev;
-+ iwvctx->uk_attrs.max_hw_wq_frags = resp.max_hw_wq_frags;
-+ iwvctx->uk_attrs.max_hw_read_sges = resp.max_hw_read_sges;
-+ iwvctx->uk_attrs.max_hw_inline = resp.max_hw_inline;
-+ iwvctx->uk_attrs.max_hw_rq_quanta = resp.max_hw_rq_quanta;
-+ iwvctx->uk_attrs.max_hw_srq_quanta = resp.max_hw_srq_quanta;
-+ iwvctx->uk_attrs.max_hw_wq_quanta = resp.max_hw_wq_quanta;
-+ iwvctx->uk_attrs.max_hw_srq_wr = resp.max_hw_srq_wr;
-+ iwvctx->uk_attrs.max_hw_sq_chunk = resp.max_hw_sq_chunk;
-+ iwvctx->uk_attrs.max_hw_cq_size = resp.max_hw_cq_size;
-+ iwvctx->uk_attrs.min_hw_cq_size = resp.min_hw_cq_size;
-+ iwvctx->abi_ver = user_ver;
-+
-+ sq_db_mmap_key = resp.sq_db_mmap_key;
-+ cq_db_mmap_key = resp.cq_db_mmap_key;
-+
-+ iwvctx->uk_attrs.db_addr_type = resp.db_addr_type;
-+ iwvctx->uk_attrs.sq_db_pa = resp.sq_db_pa;
-+ iwvctx->uk_attrs.cq_db_pa = resp.cq_db_pa;
-+
-+ if (iwvctx->uk_attrs.db_addr_type == ZXDH_DB_ADDR_PHY) {
-+ iwvctx->sq_db =
-+ mmap(NULL, ZXDH_HW_PAGE_SIZE, PROT_READ | PROT_WRITE,
-+ MAP_SHARED, cmd_fd, iwvctx->uk_attrs.sq_db_pa);
-+ if (iwvctx->sq_db == MAP_FAILED) {
-+ iwvctx->sq_db = NULL;
-+ fprintf(stderr, "%s:%d mmap failed\n", __func__,
-+ __LINE__);
-+ goto err_free;
-+ }
-+
-+ iwvctx->cq_db =
-+ mmap(NULL, ZXDH_HW_PAGE_SIZE, PROT_READ | PROT_WRITE,
-+ MAP_SHARED, cmd_fd, iwvctx->uk_attrs.cq_db_pa);
-+ if (iwvctx->cq_db == MAP_FAILED) {
-+ iwvctx->cq_db = NULL;
-+ fprintf(stderr, "%s:%d mmap failed\n", __func__,
-+ __LINE__);
-+ goto err_free;
-+ }
-+
-+ ibv_pd = zxdh_ualloc_pd(&iwvctx->ibv_ctx.context);
-+ if (!ibv_pd) {
-+ if (iwvctx->sq_db)
-+ munmap(iwvctx->sq_db, ZXDH_HW_PAGE_SIZE);
-+ if (iwvctx->cq_db)
-+ munmap(iwvctx->cq_db, ZXDH_HW_PAGE_SIZE);
-+ goto err_free;
-+ }
-+ } else if (iwvctx->uk_attrs.db_addr_type == ZXDH_DB_ADDR_BAR) {
-+ iwvctx->sq_db = zxdh_mmap(cmd_fd, sq_db_mmap_key);
-+ if (iwvctx->sq_db == MAP_FAILED)
-+ goto err_free;
-+
-+ iwvctx->cq_db = zxdh_mmap(cmd_fd, cq_db_mmap_key);
-+ if (iwvctx->cq_db == MAP_FAILED) {
-+ zxdh_munmap(iwvctx->sq_db);
-+ goto err_free;
-+ }
-+ ibv_pd = zxdh_ualloc_pd(&iwvctx->ibv_ctx.context);
-+ if (!ibv_pd) {
-+ zxdh_munmap(iwvctx->sq_db);
-+ zxdh_munmap(iwvctx->cq_db);
-+ goto err_free;
-+ }
-+ } else
-+ goto err_free;
-+
-+ ibv_pd->context = &iwvctx->ibv_ctx.context;
-+ iwvctx->iwupd = container_of(ibv_pd, struct zxdh_upd, ibv_pd);
-+ add_private_ops(iwvctx);
-+ return &iwvctx->ibv_ctx;
-+
-+err_free:
-+ free(iwvctx);
-+
-+ return NULL;
-+}
-+
-+static void zxdh_uninit_device(struct verbs_device *verbs_device)
-+{
-+ struct zxdh_udevice *dev;
-+
-+ dev = container_of(&verbs_device->device, struct zxdh_udevice,
-+ ibv_dev.device);
-+ free(dev);
-+}
-+
-+static struct verbs_device *zxdh_device_alloc(struct verbs_sysfs_dev *sysfs_dev)
-+{
-+ struct zxdh_udevice *dev;
-+
-+ dev = calloc(1, sizeof(*dev));
-+ if (!dev)
-+ return NULL;
-+
-+ return &dev->ibv_dev;
-+}
-+
-+static const struct verbs_device_ops zxdh_udev_ops = {
-+ .alloc_context = zxdh_ualloc_context,
-+ .alloc_device = zxdh_device_alloc,
-+ .match_max_abi_version = ZXDH_MAX_ABI_VERSION,
-+ .match_min_abi_version = ZXDH_MIN_ABI_VERSION,
-+ .match_table = hca_table,
-+ .name = "zxdh",
-+ .uninit_device = zxdh_uninit_device,
-+};
-+
-+PROVIDER_DRIVER(zxdh, zxdh_udev_ops);
-diff --git a/providers/zrdma/umain.h b/providers/zrdma/umain.h
-new file mode 100644
-index 0000000..d0b400d
---- /dev/null
-+++ b/providers/zrdma/umain.h
-@@ -0,0 +1,228 @@
-+/* SPDX-License-Identifier: GPL-2.0 or Linux-OpenIB */
-+/* Copyright (c) 2024 ZTE Corporation. All rights reserved. */
-+#ifndef ZXDH_UMAIN_H
-+#define ZXDH_UMAIN_H
-+
-+#include
-+#include
-+#include
-+#include
-+#include
-+
-+#include "osdep.h"
-+#include "zxdh.h"
-+#include "defs.h"
-+#include "status.h"
-+#include "user.h"
-+
-+#define ZXDH_BASE_PUSH_PAGE 1
-+#define ZXDH_U_MINCQ_SIZE 4
-+#define ZXDH_DB_SHADOW_AREA_SIZE 8
-+#define ZXDH_DB_SQ_OFFSET 0x404
-+#define ZXDH_DB_CQ_OFFSET 0x588
-+
-+#define MIN_UDP_SPORT 1024
-+#define MIN_QP_QPN 1
-+
-+enum zxdh_supported_wc_flags {
-+ ZXDH_CQ_SUPPORTED_WC_FLAGS =
-+ IBV_WC_EX_WITH_BYTE_LEN | IBV_WC_EX_WITH_IMM |
-+ IBV_WC_EX_WITH_QP_NUM | IBV_WC_EX_WITH_SRC_QP |
-+ IBV_WC_EX_WITH_SLID | IBV_WC_EX_WITH_SL |
-+ IBV_WC_EX_WITH_DLID_PATH_BITS |
-+ IBV_WC_EX_WITH_COMPLETION_TIMESTAMP_WALLCLOCK |
-+ IBV_WC_EX_WITH_COMPLETION_TIMESTAMP,
-+};
-+
-+enum {
-+ ZXDH_DBG_QP = 1 << 0,
-+ ZXDH_DBG_CQ = 1 << 1,
-+ ZXDH_DBG_SRQ = 1 << 2,
-+};
-+extern uint32_t zxdh_debug_mask;
-+#define zxdh_dbg(mask, format, arg...) \
-+ do { \
-+ if (mask & zxdh_debug_mask) { \
-+ int tmp = errno; \
-+ fprintf(stdout, "%s:%d: " format, __func__, __LINE__, \
-+ ##arg); \
-+ errno = tmp; \
-+ } \
-+ } while (0)
-+
-+
-+struct zxdh_udevice {
-+ struct verbs_device ibv_dev;
-+};
-+
-+struct zxdh_uah {
-+ struct ibv_ah ibv_ah;
-+ uint32_t ah_id;
-+ struct ibv_global_route grh;
-+};
-+
-+struct zxdh_upd {
-+ struct ibv_pd ibv_pd;
-+ void *arm_cq_page;
-+ void *arm_cq;
-+ uint32_t pd_id;
-+};
-+
-+struct zxdh_uvcontext {
-+ struct verbs_context ibv_ctx;
-+ struct zxdh_upd *iwupd;
-+ struct zxdh_uk_attrs uk_attrs;
-+ void *db;
-+ void *sq_db;
-+ void *cq_db;
-+ int abi_ver;
-+ bool legacy_mode;
-+ struct zxdh_uvcontext_ops *cxt_ops;
-+};
-+
-+struct zxdh_uqp;
-+
-+struct zxdh_cq_buf {
-+ struct list_node list;
-+ struct zxdh_cq_uk cq;
-+ struct verbs_mr vmr;
-+};
-+
-+struct zxdh_ucq {
-+ struct verbs_cq verbs_cq;
-+ struct verbs_mr vmr;
-+ struct verbs_mr vmr_shadow_area;
-+ pthread_spinlock_t lock;
-+ size_t buf_size;
-+ bool is_armed;
-+ enum zxdh_cmpl_notify last_notify;
-+ // bool skip_arm;
-+ // bool arm_sol;
-+ // bool skip_sol;
-+ int comp_vector;
-+ uint32_t report_rtt;
-+ struct zxdh_uqp *uqp;
-+ struct zxdh_cq_uk cq;
-+ struct list_head resize_list;
-+ /* for extended CQ completion fields */
-+ struct zxdh_cq_poll_info cur_cqe;
-+ bool resize_enable;
-+};
-+
-+struct zxdh_usrq {
-+ struct ibv_srq ibv_srq;
-+ struct verbs_mr vmr;
-+ struct verbs_mr list_vmr;
-+ struct verbs_mr db_vmr;
-+ size_t total_buf_size;
-+ size_t buf_size;
-+ size_t list_buf_size;
-+ size_t db_buf_size;
-+ size_t srq_size;
-+ size_t srq_list_size;
-+ uint32_t srq_id;
-+ uint32_t max_wr;
-+ uint32_t max_sge;
-+ uint32_t srq_limit;
-+ pthread_spinlock_t lock;
-+ uint32_t wq_size;
-+ struct ibv_recv_wr *pend_rx_wr;
-+ struct zxdh_srq_uk srq;
-+};
-+
-+struct zxdh_uqp {
-+ struct verbs_qp vqp;
-+ struct zxdh_ucq *send_cq;
-+ struct zxdh_ucq *recv_cq;
-+ struct zxdh_usrq *srq;
-+ struct verbs_mr vmr;
-+ size_t buf_size;
-+ uint32_t zxdh_drv_opt;
-+ pthread_spinlock_t lock;
-+ uint16_t sq_sig_all;
-+ uint16_t qperr;
-+ uint16_t rsvd;
-+ uint32_t pending_rcvs;
-+ uint32_t wq_size;
-+ struct ibv_recv_wr *pend_rx_wr;
-+ struct zxdh_qp_uk qp;
-+ enum ibv_qp_type qp_type;
-+ struct zxdh_sge *recv_sges;
-+ uint8_t is_srq;
-+ uint8_t inline_data[ZXDH_MAX_INLINE_DATA_SIZE];
-+};
-+
-+struct zxdh_umr {
-+ struct verbs_mr vmr;
-+ uint32_t acc_flags;
-+ uint8_t leaf_pbl_size;
-+ uint8_t host_page_size;
-+ uint64_t mr_pa_pble_index;
-+};
-+
-+/* zxdh_uverbs.c */
-+int zxdh_uquery_device_ex(struct ibv_context *context,
-+ const struct ibv_query_device_ex_input *input,
-+ struct ibv_device_attr_ex *attr, size_t attr_size);
-+int zxdh_uquery_port(struct ibv_context *context, uint8_t port,
-+ struct ibv_port_attr *attr);
-+struct ibv_pd *zxdh_ualloc_pd(struct ibv_context *context);
-+int zxdh_ufree_pd(struct ibv_pd *pd);
-+struct ibv_mr *zxdh_ureg_mr(struct ibv_pd *pd, void *addr, size_t length,
-+ uint64_t hca_va, int access);
-+int zxdh_udereg_mr(struct verbs_mr *vmr);
-+
-+int zxdh_urereg_mr(struct verbs_mr *mr, int flags, struct ibv_pd *pd,
-+ void *addr, size_t length, int access);
-+
-+struct ibv_mw *zxdh_ualloc_mw(struct ibv_pd *pd, enum ibv_mw_type type);
-+int zxdh_ubind_mw(struct ibv_qp *qp, struct ibv_mw *mw,
-+ struct ibv_mw_bind *mw_bind);
-+int zxdh_udealloc_mw(struct ibv_mw *mw);
-+struct ibv_cq *zxdh_ucreate_cq(struct ibv_context *context, int cqe,
-+ struct ibv_comp_channel *channel,
-+ int comp_vector);
-+struct ibv_cq_ex *zxdh_ucreate_cq_ex(struct ibv_context *context,
-+ struct ibv_cq_init_attr_ex *attr_ex);
-+void zxdh_ibvcq_ex_fill_priv_funcs(struct zxdh_ucq *iwucq,
-+ struct ibv_cq_init_attr_ex *attr_ex);
-+int zxdh_uresize_cq(struct ibv_cq *cq, int cqe);
-+int zxdh_udestroy_cq(struct ibv_cq *cq);
-+int zxdh_umodify_cq(struct ibv_cq *cq, struct ibv_modify_cq_attr *attr);
-+int zxdh_upoll_cq(struct ibv_cq *cq, int entries, struct ibv_wc *entry);
-+int zxdh_uarm_cq(struct ibv_cq *cq, int solicited);
-+void zxdh_cq_event(struct ibv_cq *cq);
-+struct ibv_qp *zxdh_ucreate_qp(struct ibv_pd *pd,
-+ struct ibv_qp_init_attr *attr);
-+struct ibv_qp *zxdh_ucreate_qp_ex(struct ibv_context *context,
-+ struct ibv_qp_init_attr_ex *attr);
-+int zxdh_uquery_qp(struct ibv_qp *qp, struct ibv_qp_attr *attr, int attr_mask,
-+ struct ibv_qp_init_attr *init_attr);
-+int zxdh_umodify_qp(struct ibv_qp *qp, struct ibv_qp_attr *attr, int attr_mask);
-+int zxdh_udestroy_qp(struct ibv_qp *qp);
-+int zxdh_upost_send(struct ibv_qp *ib_qp, struct ibv_send_wr *ib_wr,
-+ struct ibv_send_wr **bad_wr);
-+int zxdh_upost_recv(struct ibv_qp *ib_qp, struct ibv_recv_wr *ib_wr,
-+ struct ibv_recv_wr **bad_wr);
-+struct ibv_srq *zxdh_ucreate_srq(struct ibv_pd *pd,
-+ struct ibv_srq_init_attr *srq_init_attr);
-+int zxdh_udestroy_srq(struct ibv_srq *srq);
-+int zxdh_umodify_srq(struct ibv_srq *srq, struct ibv_srq_attr *srq_attr,
-+ int srq_attr_mask);
-+int zxdh_uquery_srq(struct ibv_srq *srq, struct ibv_srq_attr *srq_attr);
-+int zxdh_upost_srq_recv(struct ibv_srq *srq, struct ibv_recv_wr *recv_wr,
-+ struct ibv_recv_wr **bad_recv_wr);
-+int zxdh_uget_srq_num(struct ibv_srq *srq, uint32_t *srq_num);
-+struct ibv_ah *zxdh_ucreate_ah(struct ibv_pd *ibpd, struct ibv_ah_attr *attr);
-+int zxdh_udestroy_ah(struct ibv_ah *ibah);
-+int zxdh_uattach_mcast(struct ibv_qp *qp, const union ibv_gid *gid,
-+ uint16_t lid);
-+int zxdh_udetach_mcast(struct ibv_qp *qp, const union ibv_gid *gid,
-+ uint16_t lid);
-+void zxdh_async_event(struct ibv_context *context,
-+ struct ibv_async_event *event);
-+void zxdh_set_hw_attrs(struct zxdh_hw_attrs *attrs);
-+void *zxdh_mmap(int fd, off_t offset);
-+void zxdh_munmap(void *map);
-+void zxdh_set_debug_mask(void);
-+#endif /* ZXDH_UMAIN_H */
-diff --git a/providers/zrdma/user.h b/providers/zrdma/user.h
-new file mode 100644
-index 0000000..fec4f5e
---- /dev/null
-+++ b/providers/zrdma/user.h
-@@ -0,0 +1,572 @@
-+/* SPDX-License-Identifier: GPL-2.0 or Linux-OpenIB */
-+/* Copyright (c) 2015 - 2020 Intel Corporation */
-+#ifndef ZXDH_USER_H
-+#define ZXDH_USER_H
-+
-+#include "osdep.h"
-+
-+#define zxdh_handle void *
-+#define zxdh_adapter_handle zxdh_handle
-+#define zxdh_qp_handle zxdh_handle
-+#define zxdh_cq_handle zxdh_handle
-+#define zxdh_pd_id zxdh_handle
-+#define zxdh_stag_handle zxdh_handle
-+#define zxdh_stag_index __u32
-+#define zxdh_stag __u32
-+#define zxdh_stag_key __u8
-+#define zxdh_tagged_offset __u64
-+#define zxdh_access_privileges __u32
-+#define zxdh_physical_fragment __u64
-+#define zxdh_address_list __u64 *
-+#define zxdh_sgl struct zxdh_sge *
-+
-+#define ZXDH_MAX_MR_SIZE 0x200000000000ULL
-+
-+#define ZXDH_ACCESS_FLAGS_LOCALREAD 0x01
-+#define ZXDH_ACCESS_FLAGS_LOCALWRITE 0x02
-+#define ZXDH_ACCESS_FLAGS_REMOTEREAD_ONLY 0x04
-+#define ZXDH_ACCESS_FLAGS_REMOTEREAD 0x05
-+#define ZXDH_ACCESS_FLAGS_REMOTEWRITE_ONLY 0x08
-+#define ZXDH_ACCESS_FLAGS_REMOTEWRITE 0x0a
-+#define ZXDH_ACCESS_FLAGS_BIND_WINDOW 0x10
-+#define ZXDH_ACCESS_FLAGS_ZERO_BASED 0x20
-+#define ZXDH_ACCESS_FLAGS_ALL 0x3f
-+
-+#define ZXDH_OP_TYPE_NOP 0x00
-+#define ZXDH_OP_TYPE_SEND 0x01
-+#define ZXDH_OP_TYPE_SEND_WITH_IMM 0x02
-+#define ZXDH_OP_TYPE_SEND_INV 0x03
-+#define ZXDH_OP_TYPE_WRITE 0x04
-+#define ZXDH_OP_TYPE_WRITE_WITH_IMM 0x05
-+#define ZXDH_OP_TYPE_READ 0x06
-+#define ZXDH_OP_TYPE_BIND_MW 0x07
-+#define ZXDH_OP_TYPE_FAST_REG_MR 0x08
-+#define ZXDH_OP_TYPE_LOCAL_INV 0x09
-+#define ZXDH_OP_TYPE_UD_SEND 0x0a
-+#define ZXDH_OP_TYPE_UD_SEND_WITH_IMM 0x0b
-+#define ZXDH_OP_TYPE_REC 0x3e
-+#define ZXDH_OP_TYPE_REC_IMM 0x3f
-+
-+#define ZXDH_FLUSH_MAJOR_ERR 1
-+#define ZXDH_RETRY_ACK_MAJOR_ERR 0x8
-+#define ZXDH_RETRY_ACK_MINOR_ERR 0xf3
-+#define ZXDH_TX_WINDOW_QUERY_ITEM_MINOR_ERR 0xf5
-+
-+#define ZXDH_MAX_SQ_FRAG 31
-+#define ZXDH_MAX_SQ_INLINE_DATELEN_WITH_IMM 210
-+
-+#define INLINE_DATASIZE_7BYTES 7
-+#define INLINE_DATASIZE_24BYTES 24
-+#define INLINE_FRAG_DATASIZE_31BYTES 31
-+
-+#define INLINE_DATA_OFFSET_7BYTES 7
-+#define WQE_OFFSET_7BYTES 7
-+#define WQE_OFFSET_8BYTES 8
-+#define WQE_OFFSET_24BYTES 24
-+
-+#define ZXDH_SQE_SIZE 4
-+#define ZXDH_RQE_SIZE 2
-+
-+#define ZXDH_SRQ_INVALID_LKEY 0x100
-+#define ZXDH_SRQ_DB_INIT_VALUE 0x8000
-+
-+enum zxdh_device_caps_const {
-+ ZXDH_WQE_SIZE = 4,
-+ ZXDH_SRQE_SIZE = 2,
-+ ZXDH_CQP_WQE_SIZE = 8,
-+ ZXDH_CQE_SIZE = 8,
-+ ZXDH_EXTENDED_CQE_SIZE = 8,
-+ ZXDH_AEQE_SIZE = 2,
-+ ZXDH_CEQE_SIZE = 1,
-+ ZXDH_CQP_CTX_SIZE = 8,
-+ ZXDH_SHADOW_AREA_SIZE = 8,
-+ ZXDH_GATHER_STATS_BUF_SIZE = 1024,
-+ ZXDH_MIN_IW_QP_ID = 0,
-+ ZXDH_QUERY_FPM_BUF_SIZE = 176,
-+ ZXDH_COMMIT_FPM_BUF_SIZE = 176,
-+ ZXDH_MAX_IW_QP_ID = 262143,
-+ ZXDH_MIN_CEQID = 0,
-+ ZXDH_MAX_CEQID = 1023,
-+ ZXDH_CEQ_MAX_COUNT = ZXDH_MAX_CEQID + 1,
-+ ZXDH_MIN_CQID = 0,
-+ ZXDH_MAX_CQID = 524287,
-+ ZXDH_MIN_AEQ_ENTRIES = 1,
-+ ZXDH_MAX_AEQ_ENTRIES = 524287,
-+ ZXDH_MIN_CEQ_ENTRIES = 1,
-+ ZXDH_MAX_CEQ_ENTRIES = 262143,
-+ ZXDH_MIN_CQ_SIZE = 1,
-+ ZXDH_MAX_CQ_SIZE = 1048575,
-+ ZXDH_DB_ID_ZERO = 0,
-+ ZXDH_MAX_WQ_FRAGMENT_COUNT = 13,
-+ ZXDH_MAX_SGE_RD = 13,
-+ ZXDH_MAX_OUTBOUND_MSG_SIZE = 2147483647,
-+ ZXDH_MAX_INBOUND_MSG_SIZE = 2147483647,
-+ ZXDH_MAX_PUSH_PAGE_COUNT = 1024,
-+ ZXDH_MAX_PE_ENA_VF_COUNT = 32,
-+ ZXDH_MAX_VF_FPM_ID = 47,
-+ ZXDH_MAX_SQ_PAYLOAD_SIZE = 2147483648,
-+ ZXDH_MAX_INLINE_DATA_SIZE = 217,
-+ ZXDH_MAX_WQ_ENTRIES = 32768,
-+ ZXDH_Q2_BUF_SIZE = 256,
-+ ZXDH_QP_CTX_SIZE = 256,
-+ ZXDH_MAX_PDS = 262144,
-+};
-+
-+enum zxdh_addressing_type {
-+ ZXDH_ADDR_TYPE_ZERO_BASED = 0,
-+ ZXDH_ADDR_TYPE_VA_BASED = 1,
-+};
-+
-+enum zxdh_flush_opcode {
-+ FLUSH_INVALID = 0,
-+ FLUSH_GENERAL_ERR,
-+ FLUSH_PROT_ERR,
-+ FLUSH_REM_ACCESS_ERR,
-+ FLUSH_LOC_QP_OP_ERR,
-+ FLUSH_REM_OP_ERR,
-+ FLUSH_LOC_LEN_ERR,
-+ FLUSH_FATAL_ERR,
-+ FLUSH_RETRY_EXC_ERR,
-+ FLUSH_MW_BIND_ERR,
-+ FLUSH_REM_INV_REQ_ERR,
-+};
-+
-+enum zxdh_cmpl_status {
-+ ZXDH_COMPL_STATUS_SUCCESS = 0,
-+ ZXDH_COMPL_STATUS_FLUSHED,
-+ ZXDH_COMPL_STATUS_INVALID_WQE,
-+ ZXDH_COMPL_STATUS_QP_CATASTROPHIC,
-+ ZXDH_COMPL_STATUS_REMOTE_TERMINATION,
-+ ZXDH_COMPL_STATUS_INVALID_STAG,
-+ ZXDH_COMPL_STATUS_BASE_BOUND_VIOLATION,
-+ ZXDH_COMPL_STATUS_ACCESS_VIOLATION,
-+ ZXDH_COMPL_STATUS_INVALID_PD_ID,
-+ ZXDH_COMPL_STATUS_WRAP_ERROR,
-+ ZXDH_COMPL_STATUS_STAG_INVALID_PDID,
-+ ZXDH_COMPL_STATUS_RDMA_READ_ZERO_ORD,
-+ ZXDH_COMPL_STATUS_QP_NOT_PRIVLEDGED,
-+ ZXDH_COMPL_STATUS_STAG_NOT_INVALID,
-+ ZXDH_COMPL_STATUS_INVALID_PHYS_BUF_SIZE,
-+ ZXDH_COMPL_STATUS_INVALID_PHYS_BUF_ENTRY,
-+ ZXDH_COMPL_STATUS_INVALID_FBO,
-+ ZXDH_COMPL_STATUS_INVALID_LEN,
-+ ZXDH_COMPL_STATUS_INVALID_ACCESS,
-+ ZXDH_COMPL_STATUS_PHYS_BUF_LIST_TOO_LONG,
-+ ZXDH_COMPL_STATUS_INVALID_VIRT_ADDRESS,
-+ ZXDH_COMPL_STATUS_INVALID_REGION,
-+ ZXDH_COMPL_STATUS_INVALID_WINDOW,
-+ ZXDH_COMPL_STATUS_INVALID_TOTAL_LEN,
-+ ZXDH_COMPL_STATUS_RETRY_ACK_ERR,
-+ ZXDH_COMPL_STATUS_TX_WINDOW_QUERY_ITEM_ERR,
-+ ZXDH_COMPL_STATUS_UNKNOWN,
-+};
-+
-+enum zxdh_cmpl_notify {
-+ ZXDH_CQ_COMPL_EVENT = 0,
-+ ZXDH_CQ_COMPL_SOLICITED = 1,
-+};
-+
-+enum zxdh_qp_caps {
-+ ZXDH_WRITE_WITH_IMM = 1,
-+ ZXDH_SEND_WITH_IMM = 2,
-+ ZXDH_ROCE = 4,
-+ ZXDH_PUSH_MODE = 8,
-+};
-+
-+enum zxdh_page_size {
-+ ZXDH_PAGE_SIZE_4K = 0,
-+ ZXDH_PAGE_SIZE_2M = 9,
-+ ZXDH_PAGE_SIZE_1G = 18,
-+};
-+
-+struct zxdh_qp_uk;
-+struct zxdh_cq_uk;
-+struct zxdh_qp_uk_init_info;
-+struct zxdh_cq_uk_init_info;
-+
-+struct zxdh_sge {
-+ zxdh_tagged_offset tag_off;
-+ __u32 len;
-+ zxdh_stag stag;
-+};
-+
-+struct zxdh_ring {
-+ __u32 head;
-+ __u32 tail;
-+ __u32 size;
-+};
-+
-+struct zxdh_cqe {
-+ __le64 buf[ZXDH_CQE_SIZE];
-+};
-+
-+struct zxdh_extended_cqe {
-+ __le64 buf[ZXDH_EXTENDED_CQE_SIZE];
-+};
-+
-+struct zxdh_post_send {
-+ zxdh_sgl sg_list;
-+ __u32 num_sges;
-+ __u32 qkey;
-+ __u32 dest_qp;
-+ __u32 ah_id;
-+};
-+
-+struct zxdh_inline_rdma_send {
-+ void *data;
-+ __u32 len;
-+ __u32 qkey;
-+ __u32 dest_qp;
-+ __u32 ah_id;
-+};
-+
-+struct zxdh_post_rq_info {
-+ __u64 wr_id;
-+ zxdh_sgl sg_list;
-+ __u32 num_sges;
-+};
-+
-+struct zxdh_rdma_write {
-+ zxdh_sgl lo_sg_list;
-+ __u32 num_lo_sges;
-+ struct zxdh_sge rem_addr;
-+};
-+
-+struct zxdh_inline_rdma_write {
-+ void *data;
-+ __u32 len;
-+ struct zxdh_sge rem_addr;
-+};
-+
-+struct zxdh_rdma_read {
-+ zxdh_sgl lo_sg_list;
-+ __u32 num_lo_sges;
-+ struct zxdh_sge rem_addr;
-+};
-+
-+struct zxdh_bind_window {
-+ zxdh_stag mr_stag;
-+ __u64 bind_len;
-+ void *va;
-+ enum zxdh_addressing_type addressing_type;
-+ __u8 ena_reads : 1;
-+ __u8 ena_writes : 1;
-+ zxdh_stag mw_stag;
-+ __u8 mem_window_type_1 : 1;
-+ __u8 host_page_size;
-+ __u8 leaf_pbl_size;
-+ __u16 root_leaf_offset;
-+ __u64 mw_pa_pble_index;
-+};
-+
-+struct zxdh_inv_local_stag {
-+ zxdh_stag target_stag;
-+};
-+
-+struct zxdh_post_sq_info {
-+ __u64 wr_id;
-+ __u8 op_type;
-+ __u8 l4len;
-+ __u8 signaled : 1;
-+ __u8 read_fence : 1;
-+ __u8 local_fence : 1;
-+ __u8 inline_data : 1;
-+ __u8 imm_data_valid : 1;
-+ __u8 push_wqe : 1;
-+ __u8 report_rtt : 1;
-+ __u8 udp_hdr : 1;
-+ __u8 defer_flag : 1;
-+ __u8 solicited : 1;
-+ __u32 imm_data;
-+ __u32 stag_to_inv;
-+ union {
-+ struct zxdh_post_send send;
-+ struct zxdh_rdma_write rdma_write;
-+ struct zxdh_rdma_read rdma_read;
-+ struct zxdh_bind_window bind_window;
-+ struct zxdh_inv_local_stag inv_local_stag;
-+ struct zxdh_inline_rdma_write inline_rdma_write;
-+ struct zxdh_inline_rdma_send inline_rdma_send;
-+ } op;
-+};
-+
-+struct zxdh_cq_poll_info {
-+ __u64 wr_id;
-+ zxdh_qp_handle qp_handle;
-+ __u32 bytes_xfered;
-+ __u32 tcp_seq_num_rtt;
-+ __u32 qp_id;
-+ __u32 ud_src_qpn;
-+ __u32 imm_data;
-+ zxdh_stag inv_stag; /* or L_R_Key */
-+ enum zxdh_cmpl_status comp_status;
-+ __u16 major_err;
-+ __u16 minor_err;
-+ __u8 op_type;
-+ __u8 stag_invalid_set : 1; /* or L_R_Key set */
-+ __u8 push_dropped : 1;
-+ __u8 error : 1;
-+ __u8 solicited_event : 1;
-+ __u8 ipv4 : 1;
-+ __u8 imm_valid : 1;
-+};
-+
-+enum zxdh_status_code zxdh_uk_inline_rdma_write(struct zxdh_qp_uk *qp,
-+ struct zxdh_post_sq_info *info,
-+ bool post_sq);
-+enum zxdh_status_code zxdh_uk_rc_inline_send(struct zxdh_qp_uk *qp,
-+ struct zxdh_post_sq_info *info,
-+ bool post_sq);
-+enum zxdh_status_code zxdh_uk_ud_inline_send(struct zxdh_qp_uk *qp,
-+ struct zxdh_post_sq_info *info,
-+ bool post_sq);
-+enum zxdh_status_code zxdh_uk_mw_bind(struct zxdh_qp_uk *qp,
-+ struct zxdh_post_sq_info *info,
-+ bool post_sq);
-+enum zxdh_status_code zxdh_uk_post_nop(struct zxdh_qp_uk *qp, __u64 wr_id,
-+ bool signaled, bool post_sq);
-+enum zxdh_status_code zxdh_uk_post_receive(struct zxdh_qp_uk *qp,
-+ struct zxdh_post_rq_info *info);
-+void zxdh_uk_qp_post_wr(struct zxdh_qp_uk *qp);
-+void zxdh_uk_qp_set_shadow_area(struct zxdh_qp_uk *qp);
-+enum zxdh_status_code zxdh_uk_rdma_read(struct zxdh_qp_uk *qp,
-+ struct zxdh_post_sq_info *info,
-+ bool inv_stag, bool post_sq);
-+enum zxdh_status_code zxdh_uk_rdma_write(struct zxdh_qp_uk *qp,
-+ struct zxdh_post_sq_info *info,
-+ bool post_sq);
-+enum zxdh_status_code zxdh_uk_rc_send(struct zxdh_qp_uk *qp,
-+ struct zxdh_post_sq_info *info,
-+ bool post_sq);
-+enum zxdh_status_code zxdh_uk_ud_send(struct zxdh_qp_uk *qp,
-+ struct zxdh_post_sq_info *info,
-+ bool post_sq);
-+enum zxdh_status_code
-+zxdh_uk_stag_local_invalidate(struct zxdh_qp_uk *qp,
-+ struct zxdh_post_sq_info *info, bool post_sq);
-+
-+struct zxdh_wqe_uk_ops {
-+ void (*iw_copy_inline_data)(__u8 *dest, __u8 *src, __u32 len,
-+ __u8 polarity, bool imm_data_flag);
-+ __u16 (*iw_inline_data_size_to_quanta)(__u32 data_size,
-+ bool imm_data_flag);
-+ void (*iw_set_fragment)(__le64 *wqe, __u32 offset, struct zxdh_sge *sge,
-+ __u8 valid);
-+ void (*iw_set_mw_bind_wqe)(__le64 *wqe,
-+ struct zxdh_bind_window *op_info);
-+};
-+
-+__le64 *get_current_cqe(struct zxdh_cq_uk *cq);
-+enum zxdh_status_code zxdh_uk_cq_poll_cmpl(struct zxdh_cq_uk *cq,
-+ struct zxdh_cq_poll_info *info);
-+void zxdh_uk_cq_request_notification(struct zxdh_cq_uk *cq,
-+ enum zxdh_cmpl_notify cq_notify);
-+void zxdh_uk_cq_resize(struct zxdh_cq_uk *cq, void *cq_base, int size);
-+void zxdh_uk_cq_set_resized_cnt(struct zxdh_cq_uk *qp, __u16 cnt);
-+enum zxdh_status_code zxdh_uk_cq_init(struct zxdh_cq_uk *cq,
-+ struct zxdh_cq_uk_init_info *info);
-+enum zxdh_status_code zxdh_uk_qp_init(struct zxdh_qp_uk *qp,
-+ struct zxdh_qp_uk_init_info *info);
-+struct zxdh_sq_uk_wr_trk_info {
-+ __u64 wrid;
-+ __u32 wr_len;
-+ __u16 quanta;
-+ __u8 reserved[2];
-+};
-+
-+struct zxdh_qp_sq_quanta {
-+ __le64 elem[ZXDH_SQE_SIZE];
-+};
-+
-+struct zxdh_qp_rq_quanta {
-+ __le64 elem[ZXDH_RQE_SIZE];
-+};
-+
-+struct zxdh_qp_uk {
-+ struct zxdh_qp_sq_quanta *sq_base;
-+ struct zxdh_qp_rq_quanta *rq_base;
-+ struct zxdh_uk_attrs *uk_attrs;
-+ __u32 *wqe_alloc_db;
-+ struct zxdh_sq_uk_wr_trk_info *sq_wrtrk_array;
-+ __u64 *rq_wrid_array;
-+ __le64 *shadow_area;
-+ __le32 *push_db;
-+ __le64 *push_wqe;
-+ struct zxdh_ring sq_ring;
-+ struct zxdh_ring rq_ring;
-+ struct zxdh_ring initial_ring;
-+ __u32 qp_id;
-+ __u32 qp_caps;
-+ __u32 sq_size;
-+ __u32 rq_size;
-+ __u32 max_sq_frag_cnt;
-+ __u32 max_rq_frag_cnt;
-+ __u32 max_inline_data;
-+ struct zxdh_wqe_uk_ops wqe_ops;
-+ __u16 conn_wqes;
-+ __u8 qp_type;
-+ __u8 swqe_polarity;
-+ __u8 swqe_polarity_deferred;
-+ __u8 rwqe_polarity;
-+ __u8 rq_wqe_size;
-+ __u8 rq_wqe_size_multiplier;
-+ __u8 deferred_flag : 1;
-+ __u8 push_mode : 1; /* whether the last post wqe was pushed */
-+ __u8 push_dropped : 1;
-+ __u8 sq_flush_complete : 1; /* Indicates flush was seen and SQ was empty after the flush */
-+ __u8 rq_flush_complete : 1; /* Indicates flush was seen and RQ was empty after the flush */
-+ __u8 destroy_pending : 1; /* Indicates the QP is being destroyed */
-+ void *back_qp;
-+ zxdh_sgl split_sg_list;
-+ pthread_spinlock_t *lock;
-+ __u16 rwqe_signature;
-+ __u8 dbg_rq_flushed;
-+ __u8 sq_flush_seen;
-+ __u8 rq_flush_seen;
-+ __u8 is_srq;
-+ __u16 mtu;
-+ __u32 next_psn;
-+ __u32 cqe_last_ack_qsn;
-+ __u32 qp_last_ack_qsn;
-+ __u8 cqe_retry_cnt;
-+ __u8 qp_reset_cnt;
-+};
-+
-+struct zxdh_cq_uk {
-+ struct zxdh_cqe *cq_base;
-+ __u32 *cqe_alloc_db;
-+ __u32 *cq_ack_db;
-+ __le64 *shadow_area;
-+ __u32 cq_id;
-+ __u32 cq_size;
-+ __u32 cqe_rd_cnt;
-+ struct zxdh_ring cq_ring;
-+ __u8 polarity;
-+ __u8 cqe_size;
-+};
-+
-+struct zxdh_srq_uk {
-+ struct zxdh_srq_wqe *srq_base;
-+ struct zxdh_uk_attrs *uk_attrs;
-+ __le16 *srq_list_base;
-+ __le64 *srq_db_base;
-+ __u32 srq_id;
-+ __u32 srq_size;
-+ __u32 log2_srq_size;
-+ __u32 srq_list_size;
-+ struct zxdh_ring srq_ring;
-+ struct zxdh_ring srq_list_ring;
-+ // u8 srq_polarity;
-+ __u8 srq_list_polarity;
-+ __u64 *srq_wrid_array;
-+ __u8 srq_wqe_size;
-+ __u8 srq_wqe_size_multiplier;
-+ __u32 srq_caps;
-+ __u32 max_srq_frag_cnt;
-+ __u32 srq_type;
-+ pthread_spinlock_t *lock;
-+ __u8 srq_flush_complete : 1; /* Indicates flush was seen and SQ was empty after the flush */
-+ __u8 destroy_pending : 1; /* Indicates the QP is being destroyed */
-+ __u8 srq_flush_seen;
-+};
-+
-+struct zxdh_qp_uk_init_info {
-+ struct zxdh_qp_sq_quanta *sq;
-+ struct zxdh_qp_rq_quanta *rq;
-+ struct zxdh_uk_attrs *uk_attrs;
-+ __u32 *wqe_alloc_db;
-+ __le64 *shadow_area;
-+ struct zxdh_sq_uk_wr_trk_info *sq_wrtrk_array;
-+ __u64 *rq_wrid_array;
-+ __u32 qp_id;
-+ __u32 qp_caps;
-+ __u32 sq_size;
-+ __u32 rq_size;
-+ __u32 max_sq_frag_cnt;
-+ __u32 max_rq_frag_cnt;
-+ __u32 max_inline_data;
-+ __u8 type;
-+ int abi_ver;
-+ bool legacy_mode;
-+};
-+
-+struct zxdh_cq_uk_init_info {
-+ __u32 *cqe_alloc_db;
-+ __u32 *cq_ack_db;
-+ struct zxdh_cqe *cq_base;
-+ __le64 *shadow_area;
-+ __u32 cq_size;
-+ __u32 cq_id;
-+ __u8 cqe_size;
-+};
-+
-+struct zxdh_srq_uk_init_info {
-+ struct zxdh_srq_wqe *srq_base;
-+ struct zxdh_uk_attrs *uk_attrs;
-+ __le16 *srq_list_base;
-+ __le64 *srq_db_base;
-+ __u64 *srq_wrid_array;
-+ __u32 srq_id;
-+ __u32 srq_caps;
-+ __u32 srq_size;
-+ __u32 log2_srq_size;
-+ __u32 srq_list_size;
-+ __u32 srq_db_size;
-+ __u32 max_srq_frag_cnt;
-+ __u32 srq_limit;
-+};
-+
-+struct zxdh_wqe_srq_next_sge {
-+ __le16 next_wqe_index;
-+ __le16 signature;
-+ __u8 valid_sge_num;
-+ __u8 rsvd[11];
-+};
-+
-+struct zxdh_srq_sge {
-+ __le64 addr;
-+ __le32 length;
-+ __le32 lkey;
-+};
-+
-+struct zxdh_srq_wqe {
-+ __le64 elem[ZXDH_SRQE_SIZE];
++/* Error Codes */
++enum zxdh_status_code {
++ ZXDH_SUCCESS = 0,
++ ZXDH_ERR_NVM = -1,
++ ZXDH_ERR_NVM_CHECKSUM = -2,
++ ZXDH_ERR_CFG = -4,
++ ZXDH_ERR_PARAM = -5,
++ ZXDH_ERR_DEVICE_NOT_SUPPORTED = -6,
++ ZXDH_ERR_RESET_FAILED = -7,
++ ZXDH_ERR_SWFW_SYNC = -8,
++ ZXDH_ERR_NO_MEMORY = -9,
++ ZXDH_ERR_BAD_PTR = -10,
++ ZXDH_ERR_INVALID_PD_ID = -11,
++ ZXDH_ERR_INVALID_QP_ID = -12,
++ ZXDH_ERR_INVALID_CQ_ID = -13,
++ ZXDH_ERR_INVALID_CEQ_ID = -14,
++ ZXDH_ERR_INVALID_AEQ_ID = -15,
++ ZXDH_ERR_INVALID_SIZE = -16,
++ ZXDH_ERR_INVALID_ARP_INDEX = -17,
++ ZXDH_ERR_INVALID_FPM_FUNC_ID = -18,
++ ZXDH_ERR_QP_INVALID_MSG_SIZE = -19,
++ ZXDH_ERR_QP_TOOMANY_WRS_POSTED = -20,
++ ZXDH_ERR_INVALID_FRAG_COUNT = -21,
++ ZXDH_ERR_Q_EMPTY = -22,
++ ZXDH_ERR_INVALID_ALIGNMENT = -23,
++ ZXDH_ERR_FLUSHED_Q = -24,
++ ZXDH_ERR_INVALID_PUSH_PAGE_INDEX = -25,
++ ZXDH_ERR_INVALID_INLINE_DATA_SIZE = -26,
++ ZXDH_ERR_TIMEOUT = -27,
++ ZXDH_ERR_OPCODE_MISMATCH = -28,
++ ZXDH_ERR_CQP_COMPL_ERROR = -29,
++ ZXDH_ERR_INVALID_VF_ID = -30,
++ ZXDH_ERR_INVALID_HMCFN_ID = -31,
++ ZXDH_ERR_BACKING_PAGE_ERROR = -32,
++ ZXDH_ERR_NO_PBLCHUNKS_AVAILABLE = -33,
++ ZXDH_ERR_INVALID_PBLE_INDEX = -34,
++ ZXDH_ERR_INVALID_SD_INDEX = -35,
++ ZXDH_ERR_INVALID_PAGE_DESC_INDEX = -36,
++ ZXDH_ERR_INVALID_SD_TYPE = -37,
++ ZXDH_ERR_MEMCPY_FAILED = -38,
++ ZXDH_ERR_INVALID_HMC_OBJ_INDEX = -39,
++ ZXDH_ERR_INVALID_HMC_OBJ_COUNT = -40,
++ ZXDH_ERR_BUF_TOO_SHORT = -43,
++ ZXDH_ERR_BAD_IWARP_CQE = -44,
++ ZXDH_ERR_NVM_BLANK_MODE = -45,
++ ZXDH_ERR_NOT_IMPL = -46,
++ ZXDH_ERR_PE_DOORBELL_NOT_ENA = -47,
++ ZXDH_ERR_NOT_READY = -48,
++ ZXDH_NOT_SUPPORTED = -49,
++ ZXDH_ERR_FIRMWARE_API_VER = -50,
++ ZXDH_ERR_RING_FULL = -51,
++ ZXDH_ERR_MPA_CRC = -61,
++ ZXDH_ERR_NO_TXBUFS = -62,
++ ZXDH_ERR_SEQ_NUM = -63,
++ ZXDH_ERR_LIST_EMPTY = -64,
++ ZXDH_ERR_INVALID_MAC_ADDR = -65,
++ ZXDH_ERR_BAD_STAG = -66,
++ ZXDH_ERR_CQ_COMPL_ERROR = -67,
++ ZXDH_ERR_Q_DESTROYED = -68,
++ ZXDH_ERR_INVALID_FEAT_CNT = -69,
++ ZXDH_ERR_REG_CQ_FULL = -70,
++ ZXDH_ERR_VF_MSG_ERROR = -71,
++ ZXDH_ERR_NO_INTR = -72,
++ ZXDH_ERR_REG_QSET = -73,
++ ZXDH_ERR_FEATURES_OP = -74,
++ ZXDH_ERR_INVALID_FRAG_LEN = -75,
++ ZXDH_ERR_RETRY_ACK_ERR = -76,
++ ZXDH_ERR_RETRY_ACK_NOT_EXCEED_ERR = -77,
+};
-+
-+__le64 *zxdh_qp_get_next_send_wqe(struct zxdh_qp_uk *qp, __u32 *wqe_idx,
-+ __u16 quanta, __u32 total_size,
-+ struct zxdh_post_sq_info *info);
-+__le64 *zxdh_qp_get_next_recv_wqe(struct zxdh_qp_uk *qp, __u32 *wqe_idx);
-+void zxdh_uk_clean_cq(void *q, struct zxdh_cq_uk *cq);
-+enum zxdh_status_code zxdh_nop(struct zxdh_qp_uk *qp, __u64 wr_id,
-+ bool signaled, bool post_sq);
-+enum zxdh_status_code zxdh_fragcnt_to_quanta_sq(__u32 frag_cnt, __u16 *quanta);
-+enum zxdh_status_code zxdh_fragcnt_to_wqesize_rq(__u32 frag_cnt,
-+ __u16 *wqe_size);
-+void zxdh_get_sq_wqe_shift(struct zxdh_uk_attrs *uk_attrs, __u32 sge,
-+ __u32 inline_data, __u8 *shift);
-+void zxdh_get_rq_wqe_shift(struct zxdh_uk_attrs *uk_attrs, __u32 sge,
-+ __u8 *shift);
-+enum zxdh_status_code zxdh_get_sqdepth(struct zxdh_uk_attrs *uk_attrs,
-+ __u32 sq_size, __u8 shift,
-+ __u32 *wqdepth);
-+enum zxdh_status_code zxdh_get_rqdepth(struct zxdh_uk_attrs *uk_attrs,
-+ __u32 rq_size, __u8 shift,
-+ __u32 *wqdepth);
-+int zxdh_qp_round_up(__u32 wqdepth);
-+int zxdh_cq_round_up(__u32 wqdepth);
-+void zxdh_qp_push_wqe(struct zxdh_qp_uk *qp, __le64 *wqe, __u16 quanta,
-+ __u32 wqe_idx, bool post_sq);
-+void zxdh_clr_wqes(struct zxdh_qp_uk *qp, __u32 qp_wqe_idx);
-+
-+void zxdh_get_srq_wqe_shift(struct zxdh_uk_attrs *uk_attrs, __u32 sge,
-+ __u8 *shift);
-+int zxdh_get_srqdepth(__u32 max_hw_srq_quanta, __u32 srq_size, __u8 shift,
-+ __u32 *srqdepth);
-+__le64 *zxdh_get_srq_wqe(struct zxdh_srq_uk *srq, int wqe_index);
-+__le16 *zxdh_get_srq_list_wqe(struct zxdh_srq_uk *srq, __u16 *idx);
-+
-+enum zxdh_status_code zxdh_uk_srq_init(struct zxdh_srq_uk *srq,
-+ struct zxdh_srq_uk_init_info *info);
-+void zxdh_free_srq_wqe(struct zxdh_srq_uk *srq, int wqe_index);
-+#endif /* ZXDH_USER_H */
-diff --git a/providers/zrdma/uverbs.c b/providers/zrdma/uverbs.c
++#endif /* ZXDH_STATUS_H */
+diff --git a/providers/zrdma/zxdh_verbs.c b/providers/zrdma/zxdh_verbs.c
new file mode 100644
-index 0000000..edd05bf
+index 0000000..6973504
--- /dev/null
-+++ b/providers/zrdma/uverbs.c
-@@ -0,0 +1,3209 @@
++++ b/providers/zrdma/zxdh_verbs.c
+@@ -0,0 +1,3193 @@
+// SPDX-License-Identifier: GPL-2.0 or Linux-OpenIB
+/* Copyright (c) 2024 ZTE Corporation. All rights reserved. */
+#include
@@ -8783,8 +4433,8 @@ index 0000000..edd05bf
+#include
+#include
+
-+#include "umain.h"
-+#include "abi.h"
++#include "main.h"
++#include "zxdh_abi.h"
+
+uint32_t zxdh_debug_mask;
+
@@ -8825,7 +4475,6 @@ index 0000000..edd05bf
+ while (num < ib_wr->num_sge) {
+ *len += ib_wr->sg_list[num].length;
+ if (*len > ZXDH_MAX_INLINE_DATA_SIZE) {
-+ printf("err:inline bytes over max inline length\n");
+ return -EINVAL;
+ }
+ memcpy(inline_data + offset,
@@ -9134,27 +4783,25 @@ index 0000000..edd05bf
+ struct ibv_cq_init_attr_ex *attr_ex,
+ bool ext_cq)
+{
-+ struct zxdh_cq_uk_init_info info = {};
++ struct zxdh_cq_init_info info = {};
+ struct zxdh_ureg_mr reg_mr_cmd = {};
+ struct zxdh_ucreate_cq_ex cmd = {};
+ struct zxdh_ucreate_cq_ex_resp resp = {};
+ struct ib_uverbs_reg_mr_resp reg_mr_resp = {};
+ struct zxdh_ureg_mr reg_mr_shadow_cmd = {};
+ struct ib_uverbs_reg_mr_resp reg_mr_shadow_resp = {};
-+ struct zxdh_uk_attrs *uk_attrs;
++ struct zxdh_dev_attrs *dev_attrs;
+ struct zxdh_uvcontext *iwvctx;
+ struct zxdh_ucq *iwucq;
+ size_t total_size;
+ __u32 cq_pages;
+ int ret, ncqe;
-+ __u8 hw_rev;
+
+ iwvctx = container_of(context, struct zxdh_uvcontext, ibv_ctx.context);
-+ uk_attrs = &iwvctx->uk_attrs;
-+ hw_rev = uk_attrs->hw_rev;
++ dev_attrs = &iwvctx->dev_attrs;
+
+ if (attr_ex->cqe < ZXDH_MIN_CQ_SIZE ||
-+ attr_ex->cqe > uk_attrs->max_hw_cq_size) {
++ attr_ex->cqe > dev_attrs->max_hw_cq_size) {
+ errno = EINVAL;
+ return NULL;
+ }
@@ -9180,7 +4827,7 @@ index 0000000..edd05bf
+ total_size = get_cq_total_bytes(info.cq_size);
+ cq_pages = total_size >> ZXDH_HW_PAGE_SHIFT;
+
-+ if (!(uk_attrs->feature_flags & ZXDH_FEATURE_CQ_RESIZE))
++ if (!(dev_attrs->feature_flags & ZXDH_FEATURE_CQ_RESIZE))
+ total_size = (cq_pages << ZXDH_HW_PAGE_SHIFT) +
+ ZXDH_DB_SHADOW_AREA_SIZE;
+
@@ -9205,7 +4852,7 @@ index 0000000..edd05bf
+
+ iwucq->vmr.ibv_mr.pd = &iwvctx->iwupd->ibv_pd;
+
-+ if (uk_attrs->feature_flags & ZXDH_FEATURE_CQ_RESIZE) {
++ if (dev_attrs->feature_flags & ZXDH_FEATURE_CQ_RESIZE) {
+ info.shadow_area = zxdh_alloc_hw_buf(ZXDH_DB_SHADOW_AREA_SIZE);
+ if (!info.shadow_area)
+ goto err_dereg_mr;
@@ -9252,7 +4899,7 @@ index 0000000..edd05bf
+
+ info.cqe_alloc_db =
+ (__u32 *)((__u8 *)iwvctx->cq_db + ZXDH_DB_CQ_OFFSET);
-+ zxdh_uk_cq_init(&iwucq->cq, &info);
++ zxdh_cq_init(&iwucq->cq, &info);
+
+ return &iwucq->verbs_cq.cq_ex;
+
@@ -9322,7 +4969,7 @@ index 0000000..edd05bf
+ struct zxdh_cq_buf *cq_buf, *next;
+ int cq_cnt = 0;
+
-+ list_for_each_safe(&iwucq->resize_list, cq_buf, next, list) {
++ list_for_each_safe (&iwucq->resize_list, cq_buf, next, list) {
+ if (cq_buf == lcqe_buf)
+ return cq_cnt;
+
@@ -9340,7 +4987,7 @@ index 0000000..edd05bf
+ */
+int zxdh_udestroy_cq(struct ibv_cq *cq)
+{
-+ struct zxdh_uk_attrs *uk_attrs;
++ struct zxdh_dev_attrs *dev_attrs;
+ struct zxdh_uvcontext *iwvctx;
+ struct zxdh_ucq *iwucq;
+ __u64 cq_shadow_temp;
@@ -9349,7 +4996,7 @@ index 0000000..edd05bf
+ iwucq = container_of(cq, struct zxdh_ucq, verbs_cq.cq);
+ iwvctx = container_of(cq->context, struct zxdh_uvcontext,
+ ibv_ctx.context);
-+ uk_attrs = &iwvctx->uk_attrs;
++ dev_attrs = &iwvctx->dev_attrs;
+
+ ret = pthread_spin_destroy(&iwucq->lock);
+ if (ret)
@@ -9365,7 +5012,7 @@ index 0000000..edd05bf
+ ibv_cmd_dereg_mr(&iwucq->vmr);
+ zxdh_free_hw_buf(iwucq->cq.cq_base, iwucq->buf_size);
+
-+ if (uk_attrs->feature_flags & ZXDH_FEATURE_CQ_RESIZE) {
++ if (dev_attrs->feature_flags & ZXDH_FEATURE_CQ_RESIZE) {
+ ibv_cmd_dereg_mr(&iwucq->vmr_shadow_area);
+ zxdh_free_hw_buf(iwucq->cq.shadow_area,
+ ZXDH_DB_SHADOW_AREA_SIZE);
@@ -9441,7 +5088,7 @@ index 0000000..edd05bf
+static inline void zxdh_process_cqe(struct ibv_wc *entry,
+ struct zxdh_cq_poll_info *cur_cqe)
+{
-+ struct zxdh_qp_uk *qp;
++ struct zxdh_qp *qp;
+ struct ibv_qp *ib_qp;
+
+ entry->wc_flags = 0;
@@ -9519,17 +5166,16 @@ index 0000000..edd05bf
+
+/**
+ * zxdh_poll_one - poll one entry of the CQ
-+ * @ukcq: ukcq to poll
++ * @cq: cq to poll
+ * @cur_cqe: current CQE info to be filled in
+ * @entry: ibv_wc object to be filled for non-extended CQ or NULL for extended CQ
+ *
+ * Returns the internal zxdh device error code or 0 on success
+ */
-+static int zxdh_poll_one(struct zxdh_cq_uk *ukcq,
-+ struct zxdh_cq_poll_info *cur_cqe,
++static int zxdh_poll_one(struct zxdh_cq *cq, struct zxdh_cq_poll_info *cur_cqe,
+ struct ibv_wc *entry)
+{
-+ int ret = zxdh_uk_cq_poll_cmpl(ukcq, cur_cqe);
++ int ret = zxdh_cq_poll_cmpl(cq, cur_cqe);
+
+ if (ret)
+ return ret;
@@ -9563,7 +5209,7 @@ index 0000000..edd05bf
+ int ret;
+
+ /* go through the list of previously resized CQ buffers */
-+ list_for_each_safe(&iwucq->resize_list, cq_buf, next, list) {
++ list_for_each_safe (&iwucq->resize_list, cq_buf, next, list) {
+ while (npolled < num_entries) {
+ ret = zxdh_poll_one(&cq_buf->cq, cur_cqe,
+ entry ? entry + npolled : NULL);
@@ -9581,7 +5227,6 @@ index 0000000..edd05bf
+ cq_new_cqe = true;
+ continue;
+ }
-+ printf("__zrdma_upoll_cq resize goto error failed\n");
+ goto error;
+ }
+
@@ -9609,7 +5254,6 @@ index 0000000..edd05bf
+ cq_new_cqe = true;
+ continue;
+ }
-+ printf("__zrdma_upoll_cq goto error failed\n");
+ goto error;
+ }
+ if (cq_new_cqe)
@@ -9620,7 +5264,7 @@ index 0000000..edd05bf
+ resized_bufs = zxdh_process_resize_list(iwucq, last_buf);
+ if (resized_bufs)
+ /* report to the HW the number of complete CQ resizes */
-+ zxdh_uk_cq_set_resized_cnt(&iwucq->cq, resized_bufs);
++ zxdh_cq_set_resized_cnt(&iwucq->cq, resized_bufs);
+
+ return npolled;
+
@@ -9841,7 +5485,7 @@ index 0000000..edd05bf
+{
+ struct zxdh_cq_poll_info *cur_cqe;
+ struct zxdh_ucq *iwucq;
-+ struct zxdh_qp_uk *qp;
++ struct zxdh_qp *qp;
+ struct ibv_qp *ib_qp;
+ unsigned int wc_flags = 0;
+
@@ -9902,7 +5546,7 @@ index 0000000..edd05bf
+{
+ struct zxdh_cq_poll_info *cur_cqe;
+ struct zxdh_ucq *iwucq;
-+ struct zxdh_qp_uk *qp;
++ struct zxdh_qp *qp;
+ struct ibv_qp *ib_qp;
+
+ iwucq = container_of(ibvcq_ex, struct zxdh_ucq, verbs_cq.cq_ex);
@@ -9978,7 +5622,7 @@ index 0000000..edd05bf
+ iwucq->is_armed = true;
+ iwucq->last_notify = cq_notify;
+
-+ zxdh_uk_cq_request_notification(&iwucq->cq, cq_notify);
++ zxdh_cq_request_notification(&iwucq->cq, cq_notify);
+}
+
+/**
@@ -10083,7 +5727,7 @@ index 0000000..edd05bf
+ */
+static int zxdh_vmapped_qp(struct zxdh_uqp *iwuqp, struct ibv_pd *pd,
+ struct ibv_qp_init_attr *attr, int sqdepth,
-+ int rqdepth, struct zxdh_qp_uk_init_info *info,
++ int rqdepth, struct zxdh_qp_init_info *info,
+ bool legacy_mode)
+{
+ struct zxdh_ucreate_qp cmd = {};
@@ -10205,8 +5849,8 @@ index 0000000..edd05bf
+static struct ibv_qp *create_qp(struct ibv_context *ibv_ctx,
+ struct ibv_qp_init_attr_ex *attr_ex)
+{
-+ struct zxdh_qp_uk_init_info info = {};
-+ struct zxdh_uk_attrs *uk_attrs;
++ struct zxdh_qp_init_info info = {};
++ struct zxdh_dev_attrs *dev_attrs;
+ struct zxdh_uvcontext *iwvctx;
+ struct zxdh_uqp *iwuqp;
+ struct zxdh_usrq *iwusrq;
@@ -10229,23 +5873,24 @@ index 0000000..edd05bf
+ }
+
+ iwvctx = container_of(ibv_ctx, struct zxdh_uvcontext, ibv_ctx.context);
-+ uk_attrs = &iwvctx->uk_attrs;
++ dev_attrs = &iwvctx->dev_attrs;
+
-+ if (attr->cap.max_send_sge > uk_attrs->max_hw_wq_frags ||
-+ attr->cap.max_recv_sge > uk_attrs->max_hw_wq_frags) {
++ if (attr->cap.max_send_sge > dev_attrs->max_hw_wq_frags ||
++ attr->cap.max_recv_sge > dev_attrs->max_hw_wq_frags) {
+ errno = EINVAL;
+ free(attr);
+ return NULL;
+ }
+
-+ if (attr->cap.max_inline_data > uk_attrs->max_hw_inline) {
-+ zxdh_dbg(ZXDH_DBG_QP, "max_inline_data over max_hw_inline\n");
-+ attr->cap.max_inline_data = uk_attrs->max_hw_inline;
++ if (attr->cap.max_inline_data > dev_attrs->max_hw_inline) {
++ zxdh_dbg(&iwvctx->ibv_ctx, ZXDH_DBG_QP,
++ "max_inline_data over max_hw_inline\n");
++ attr->cap.max_inline_data = dev_attrs->max_hw_inline;
+ }
+
-+ zxdh_get_sq_wqe_shift(uk_attrs, attr->cap.max_send_sge,
-+ attr->cap.max_inline_data, &sqshift);
-+ status = zxdh_get_sqdepth(uk_attrs, attr->cap.max_send_wr, sqshift,
++ zxdh_get_sq_wqe_shift(attr->cap.max_send_sge, attr->cap.max_inline_data,
++ &sqshift);
++ status = zxdh_get_sqdepth(dev_attrs, attr->cap.max_send_wr, sqshift,
+ &sqdepth);
+ if (status) {
+ errno = EINVAL;
@@ -10253,8 +5898,8 @@ index 0000000..edd05bf
+ return NULL;
+ }
+
-+ zxdh_get_rq_wqe_shift(uk_attrs, attr->cap.max_recv_sge, &rqshift);
-+ status = zxdh_get_rqdepth(uk_attrs, attr->cap.max_recv_wr, rqshift,
++ zxdh_get_rq_wqe_shift(attr->cap.max_recv_sge, &rqshift);
++ status = zxdh_get_rqdepth(dev_attrs, attr->cap.max_recv_wr, rqshift,
+ &rqdepth);
+ if (status) {
+ errno = EINVAL;
@@ -10297,7 +5942,7 @@ index 0000000..edd05bf
+ attr->cap.max_send_wr = info.sq_size;
+ attr->cap.max_recv_wr = info.rq_size;
+
-+ info.uk_attrs = uk_attrs;
++ info.dev_attrs = dev_attrs;
+ info.max_sq_frag_cnt = attr->cap.max_send_sge;
+ info.max_rq_frag_cnt = attr->cap.max_recv_sge;
+
@@ -10350,12 +5995,12 @@ index 0000000..edd05bf
+ info.max_inline_data = attr->cap.max_inline_data;
+ if (info.type == ZXDH_QP_TYPE_ROCE_RC) {
+ iwuqp->qp.split_sg_list =
-+ calloc(2 * uk_attrs->max_hw_read_sges,
++ calloc(2 * dev_attrs->max_hw_read_sges,
+ sizeof(*iwuqp->qp.split_sg_list));
+ if (!iwuqp->qp.split_sg_list)
+ goto err_free_vmap_qp;
+ }
-+ status = zxdh_uk_qp_init(&iwuqp->qp, &info);
++ status = zxdh_qp_init(&iwuqp->qp, &info);
+ if (status) {
+ errno = EINVAL;
+ goto err_free_sg_list;
@@ -10495,16 +6140,16 @@ index 0000000..edd05bf
+ * @qp: qp for which completions are cleaned
+ * @iwcq: cq to be cleaned
+ */
-+static void zxdh_clean_cqes(struct zxdh_qp_uk *qp, struct zxdh_ucq *iwucq)
++static void zxdh_clean_cqes(struct zxdh_qp *qp, struct zxdh_ucq *iwucq)
+{
-+ struct zxdh_cq_uk *ukcq = &iwucq->cq;
++ struct zxdh_cq *cq = &iwucq->cq;
+ int ret;
+
+ ret = pthread_spin_lock(&iwucq->lock);
+ if (ret)
+ return;
+
-+ zxdh_uk_clean_cq(qp, ukcq);
++ zxdh_clean_cq(qp, cq);
+ pthread_spin_unlock(&iwucq->lock);
+}
+
@@ -10596,7 +6241,7 @@ index 0000000..edd05bf
+{
+ struct zxdh_post_sq_info info;
+ struct zxdh_uvcontext *iwvctx;
-+ struct zxdh_uk_attrs *uk_attrs;
++ struct zxdh_dev_attrs *dev_attrs;
+ enum zxdh_status_code ret = 0;
+ struct zxdh_uqp *iwuqp;
+ bool reflush = false;
@@ -10605,17 +6250,17 @@ index 0000000..edd05bf
+ struct zxdh_umr *umr = NULL;
+ __u64 mr_va = 0, mw_va = 0, value_dffer = 0, mw_pa_pble_index = 0;
+ __u16 mr_offset = 0;
-+
++ iwvctx = container_of(ib_qp->context, struct zxdh_uvcontext,
++ ibv_ctx.context);
+ if (ib_qp->state != IBV_QPS_RTS) {
+ *bad_wr = ib_wr;
-+ printf("err:post send at state:%d\n", ib_qp->state);
++ verbs_err(&iwvctx->ibv_ctx, "zrdma: post send at state:%d\n",
++ ib_qp->state);
+ return -EINVAL;
+ }
+
+ iwuqp = container_of(ib_qp, struct zxdh_uqp, vqp.qp);
-+ iwvctx = container_of(ib_qp->context, struct zxdh_uvcontext,
-+ ibv_ctx.context);
-+ uk_attrs = &iwvctx->uk_attrs;
++ dev_attrs = &iwvctx->dev_attrs;
+
+ err = pthread_spin_lock(&iwuqp->lock);
+ if (err)
@@ -10672,7 +6317,9 @@ index 0000000..edd05bf
+ iwuqp->inline_data, ib_wr,
+ &info.op.inline_rdma_send.len);
+ if (ret) {
-+ printf("err:zxdh_get_inline_data fail\n");
++ verbs_err(
++ &iwvctx->ibv_ctx,
++ "zrdma: get inline data fail\n");
+ pthread_spin_unlock(&iwuqp->lock);
+ return -EINVAL;
+ }
@@ -10689,11 +6336,11 @@ index 0000000..edd05bf
+ ib_wr->wr.ud.remote_qkey;
+ info.op.inline_rdma_send.dest_qp =
+ ib_wr->wr.ud.remote_qpn;
-+ ret = zxdh_uk_ud_inline_send(
-+ &iwuqp->qp, &info, false);
++ ret = zxdh_ud_inline_send(&iwuqp->qp,
++ &info, false);
+ } else {
-+ ret = zxdh_uk_rc_inline_send(
-+ &iwuqp->qp, &info, false);
++ ret = zxdh_rc_inline_send(&iwuqp->qp,
++ &info, false);
+ }
+ } else {
+ info.op.send.num_sges = ib_wr->num_sge;
@@ -10711,11 +6358,11 @@ index 0000000..edd05bf
+ ib_wr->wr.ud.remote_qkey;
+ info.op.inline_rdma_send.dest_qp =
+ ib_wr->wr.ud.remote_qpn;
-+ ret = zxdh_uk_ud_send(&iwuqp->qp, &info,
-+ false);
++ ret = zxdh_ud_send(&iwuqp->qp, &info,
++ false);
+ } else {
-+ ret = zxdh_uk_rc_send(&iwuqp->qp, &info,
-+ false);
++ ret = zxdh_rc_send(&iwuqp->qp, &info,
++ false);
+ }
+ }
+ if (ret)
@@ -10747,7 +6394,9 @@ index 0000000..edd05bf
+ iwuqp->inline_data, ib_wr,
+ &info.op.inline_rdma_write.len);
+ if (ret) {
-+ printf("err:zxdh_get_inline_data fail\n");
++ verbs_err(
++ &iwvctx->ibv_ctx,
++ "zrdma: get inline data fail\n");
+ pthread_spin_unlock(&iwuqp->lock);
+ return -EINVAL;
+ }
@@ -10757,8 +6406,8 @@ index 0000000..edd05bf
+ ib_wr->wr.rdma.remote_addr;
+ info.op.inline_rdma_write.rem_addr.stag =
+ ib_wr->wr.rdma.rkey;
-+ ret = zxdh_uk_inline_rdma_write(&iwuqp->qp,
-+ &info, false);
++ ret = zxdh_inline_rdma_write(&iwuqp->qp, &info,
++ false);
+ } else {
+ info.op.rdma_write.lo_sg_list =
+ (void *)ib_wr->sg_list;
@@ -10767,8 +6416,7 @@ index 0000000..edd05bf
+ ib_wr->wr.rdma.remote_addr;
+ info.op.rdma_write.rem_addr.stag =
+ ib_wr->wr.rdma.rkey;
-+ ret = zxdh_uk_rdma_write(&iwuqp->qp, &info,
-+ false);
++ ret = zxdh_rdma_write(&iwuqp->qp, &info, false);
+ }
+ if (ret)
+ err = (ret == ZXDH_ERR_QP_TOOMANY_WRS_POSTED) ?
@@ -10776,7 +6424,7 @@ index 0000000..edd05bf
+ EINVAL;
+ break;
+ case IBV_WR_RDMA_READ:
-+ if (ib_wr->num_sge > uk_attrs->max_hw_read_sges) {
++ if (ib_wr->num_sge > dev_attrs->max_hw_read_sges) {
+ err = EINVAL;
+ break;
+ }
@@ -10787,8 +6435,7 @@ index 0000000..edd05bf
+
+ info.op.rdma_read.lo_sg_list = (void *)ib_wr->sg_list;
+ info.op.rdma_read.num_lo_sges = ib_wr->num_sge;
-+ ret = zxdh_uk_rdma_read(&iwuqp->qp, &info, false,
-+ false);
++ ret = zxdh_rdma_read(&iwuqp->qp, &info, false, false);
+ if (ret)
+ err = (ret == ZXDH_ERR_QP_TOOMANY_WRS_POSTED) ?
+ ENOMEM :
@@ -11133,7 +6780,7 @@ index 0000000..edd05bf
+ 1 :
+ 0;
+
-+ ret = zxdh_uk_mw_bind(&iwuqp->qp, &info, false);
++ ret = zxdh_mw_bind(&iwuqp->qp, &info, false);
+ if (ret)
+ err = (ret == ZXDH_ERR_QP_TOOMANY_WRS_POSTED) ?
+ ENOMEM :
@@ -11143,8 +6790,8 @@ index 0000000..edd05bf
+ info.op_type = ZXDH_OP_TYPE_LOCAL_INV;
+ info.op.inv_local_stag.target_stag =
+ ib_wr->invalidate_rkey;
-+ ret = zxdh_uk_stag_local_invalidate(&iwuqp->qp, &info,
-+ true);
++ ret = zxdh_stag_local_invalidate(&iwuqp->qp, &info,
++ true);
+ if (ret)
+ err = (ret == ZXDH_ERR_QP_TOOMANY_WRS_POSTED) ?
+ ENOMEM :
@@ -11164,7 +6811,7 @@ index 0000000..edd05bf
+ if (err)
+ *bad_wr = ib_wr;
+
-+ zxdh_uk_qp_post_wr(&iwuqp->qp);
++ zxdh_qp_post_wr(&iwuqp->qp);
+ if (reflush)
+ zxdh_issue_flush(ib_qp, 1, 0);
+
@@ -11193,7 +6840,6 @@ index 0000000..edd05bf
+
+ if (unlikely(ib_qp->state == IBV_QPS_RESET || ib_qp->srq)) {
+ *bad_wr = ib_wr;
-+ printf("err:post recv at reset or using srq\n");
+ return -EINVAL;
+ }
+
@@ -11215,7 +6861,7 @@ index 0000000..edd05bf
+ post_recv.wr_id = ib_wr->wr_id;
+ zxdh_copy_sg_list(sg_list, ib_wr->sg_list, ib_wr->num_sge);
+ post_recv.sg_list = sg_list;
-+ ret = zxdh_uk_post_receive(&iwuqp->qp, &post_recv);
++ ret = zxdh_post_receive(&iwuqp->qp, &post_recv);
+ if (unlikely(ret)) {
+ err = (ret == ZXDH_ERR_QP_TOOMANY_WRS_POSTED) ? ENOMEM :
+ EINVAL;
@@ -11229,7 +6875,7 @@ index 0000000..edd05bf
+ ib_wr = ib_wr->next;
+ }
+error:
-+ zxdh_uk_qp_set_shadow_area(&iwuqp->qp);
++ zxdh_qp_set_shadow_area(&iwuqp->qp);
+ pthread_spin_unlock(&iwuqp->lock);
+
+ return err;
@@ -11323,7 +6969,7 @@ index 0000000..edd05bf
+int zxdh_uresize_cq(struct ibv_cq *cq, int cqe)
+{
+ struct zxdh_uvcontext *iwvctx;
-+ struct zxdh_uk_attrs *uk_attrs;
++ struct zxdh_dev_attrs *dev_attrs;
+ struct zxdh_uresize_cq cmd = {};
+ struct ib_uverbs_resize_cq_resp resp = {};
+ struct zxdh_ureg_mr reg_mr_cmd = {};
@@ -11340,9 +6986,9 @@ index 0000000..edd05bf
+ iwucq = container_of(cq, struct zxdh_ucq, verbs_cq.cq);
+ iwvctx = container_of(cq->context, struct zxdh_uvcontext,
+ ibv_ctx.context);
-+ uk_attrs = &iwvctx->uk_attrs;
++ dev_attrs = &iwvctx->dev_attrs;
+
-+ if (!(uk_attrs->feature_flags & ZXDH_FEATURE_CQ_RESIZE))
++ if (!(dev_attrs->feature_flags & ZXDH_FEATURE_CQ_RESIZE))
+ return -EOPNOTSUPP;
+
+ if (cqe > ZXDH_MAX_CQ_SIZE)
@@ -11394,7 +7040,7 @@ index 0000000..edd05bf
+ memcpy(&cq_buf->cq, &iwucq->cq, sizeof(cq_buf->cq));
+ cq_buf->vmr = iwucq->vmr;
+ iwucq->vmr = new_mr;
-+ zxdh_uk_cq_resize(&iwucq->cq, cq_base, cqe_needed);
++ zxdh_cq_resize(&iwucq->cq, cq_base, cqe_needed);
+ iwucq->verbs_cq.cq.cqe = cqe;
+ list_add_tail(&iwucq->resize_list, &cq_buf->list);
+ iwucq->resize_enable = true;
@@ -11416,15 +7062,16 @@ index 0000000..edd05bf
+static void zxdh_srq_wqe_init(struct zxdh_usrq *iwusrq)
+{
+ uint32_t i;
-+ struct zxdh_srq_uk *srq_uk;
++ struct zxdh_srq *srq;
+ __le64 *wqe;
+ __u64 hdr;
+
-+ srq_uk = &iwusrq->srq;
-+ zxdh_dbg(ZXDH_DBG_SRQ, "%s head:%d tail:%d\n", __func__,
-+ srq_uk->srq_ring.head, srq_uk->srq_ring.tail);
-+ for (i = srq_uk->srq_ring.head; i < srq_uk->srq_ring.tail; i++) {
-+ wqe = zxdh_get_srq_wqe(srq_uk, i);
++ srq = &iwusrq->srq;
++ zxdh_dbg(verbs_get_ctx(iwusrq->ibv_srq.context), ZXDH_DBG_SRQ,
++ "%s head:%d tail:%d\n", __func__, srq->srq_ring.head,
++ srq->srq_ring.tail);
++ for (i = srq->srq_ring.head; i < srq->srq_ring.tail; i++) {
++ wqe = zxdh_get_srq_wqe(srq, i);
+
+ hdr = FIELD_PREP(ZXDHQPSRQ_NEXT_WQE_INDEX, (uint32_t)(i + 1));
+
@@ -11466,7 +7113,7 @@ index 0000000..edd05bf
+ total_srq_queue_size + total_srq_list_size + total_srq_db_size;
+ iwusrq->total_buf_size = total_srq_size;
+ zxdh_dbg(
-+ ZXDH_DBG_SRQ,
++ verbs_get_ctx(iwusrq->ibv_srq.context), ZXDH_DBG_SRQ,
+ "%s total_srq_queue_size:%ld total_srq_list_size:%ld total_srq_db_size:%ld srqdepth:%d\n",
+ __func__, total_srq_queue_size, total_srq_list_size,
+ total_srq_db_size, srqdepth);
@@ -11475,7 +7122,7 @@ index 0000000..edd05bf
+}
+
+static int zxdh_alloc_srq_buf(struct zxdh_usrq *iwusrq,
-+ struct zxdh_srq_uk_init_info *info,
++ struct zxdh_srq_init_info *info,
+ size_t total_srq_size)
+{
+ info->srq_base = zxdh_alloc_hw_buf(total_srq_size);
@@ -11489,15 +7136,14 @@ index 0000000..edd05bf
+ (__le64 *)&info->srq_list_base[iwusrq->list_buf_size /
+ (sizeof(__u16))];
+ *(__le64 *)info->srq_db_base = ZXDH_SRQ_DB_INIT_VALUE;
-+ zxdh_dbg(ZXDH_DBG_SRQ,
++ zxdh_dbg(verbs_get_ctx(iwusrq->ibv_srq.context), ZXDH_DBG_SRQ,
+ "%s srq_base:0x%p srq_list_base:0x%p srq_db_base:0x%p\n",
+ __func__, info->srq_base, info->srq_list_base,
+ info->srq_db_base);
+ return 0;
+}
+
-+static int zxdh_reg_srq_mr(struct ibv_pd *pd,
-+ struct zxdh_srq_uk_init_info *info,
++static int zxdh_reg_srq_mr(struct ibv_pd *pd, struct zxdh_srq_init_info *info,
+ size_t total_srq_size, uint16_t srq_pages,
+ uint16_t srq_list_pages, struct zxdh_usrq *iwusrq)
+{
@@ -11521,7 +7167,7 @@ index 0000000..edd05bf
+
+static int create_srq(struct ibv_pd *pd, struct zxdh_usrq *iwusrq,
+ struct ibv_srq_init_attr *attr,
-+ struct zxdh_srq_uk_init_info *info)
++ struct zxdh_srq_init_info *info)
+{
+ struct zxdh_ucreate_srq cmd = {};
+ struct zxdh_ucreate_srq_resp resp = {};
@@ -11542,7 +7188,7 @@ index 0000000..edd05bf
+ info->srq_size = resp.actual_srq_size;
+ info->srq_list_size = resp.actual_srq_list_size;
+ zxdh_dbg(
-+ ZXDH_DBG_SRQ,
++ verbs_get_ctx(iwusrq->ibv_srq.context), ZXDH_DBG_SRQ,
+ "%s info->srq_id:%d info->srq_size:%d info->srq_list_size:%d\n",
+ __func__, info->srq_id, info->srq_size, info->srq_list_size);
+
@@ -11560,7 +7206,7 @@ index 0000000..edd05bf
+ */
+static int zxdh_vmapped_srq(struct zxdh_usrq *iwusrq, struct ibv_pd *pd,
+ struct ibv_srq_init_attr *attr, int srqdepth,
-+ struct zxdh_srq_uk_init_info *info)
++ struct zxdh_srq_init_info *info)
+{
+ size_t total_srq_size;
+ size_t srq_pages = 0;
@@ -11574,21 +7220,19 @@ index 0000000..edd05bf
+ ret = zxdh_alloc_srq_buf(iwusrq, info, total_srq_size);
+ if (ret)
+ return -ENOMEM;
-+ zxdh_dbg(ZXDH_DBG_SRQ, "%s srq_pages:%ld srq_list_pages:%ld\n",
-+ __func__, srq_pages, srq_list_pages);
++ zxdh_dbg(verbs_get_ctx(iwusrq->ibv_srq.context), ZXDH_DBG_SRQ,
++ "%s srq_pages:%ld srq_list_pages:%ld\n", __func__, srq_pages,
++ srq_list_pages);
+
+ ret = zxdh_reg_srq_mr(pd, info, total_srq_size, srq_pages,
+ srq_list_pages, iwusrq);
-+ zxdh_dbg(ZXDH_DBG_SRQ, "%s %d ret:%d\n", __func__, __LINE__, ret);
+ if (ret) {
+ errno = ret;
+ goto err_dereg_srq_mr;
+ }
+ ret = create_srq(pd, iwusrq, attr, info);
-+ zxdh_dbg(ZXDH_DBG_SRQ, "%s %d ret:%d\n", __func__, __LINE__, ret);
+ if (ret)
+ goto err_srq;
-+
+ return 0;
+err_srq:
+ ibv_cmd_dereg_mr(&iwusrq->vmr);
@@ -11615,11 +7259,11 @@ index 0000000..edd05bf
+}
+
+static int zxdh_check_srq_init_attr(struct ibv_srq_init_attr *srq_init_attr,
-+ struct zxdh_uk_attrs *uk_attrs)
++ struct zxdh_dev_attrs *dev_attrs)
+{
+ if ((srq_init_attr->attr.srq_limit > srq_init_attr->attr.max_wr) ||
-+ (srq_init_attr->attr.max_sge > uk_attrs->max_hw_wq_frags) ||
-+ (srq_init_attr->attr.max_wr > uk_attrs->max_hw_srq_wr)) {
++ (srq_init_attr->attr.max_sge > dev_attrs->max_hw_wq_frags) ||
++ (srq_init_attr->attr.max_wr > dev_attrs->max_hw_srq_wr)) {
+ return 1;
+ }
+ return 0;
@@ -11628,8 +7272,8 @@ index 0000000..edd05bf
+static int zxdh_init_iwusrq(struct zxdh_usrq *iwusrq,
+ struct ibv_srq_init_attr *srq_init_attr,
+ __u32 srqdepth, __u8 srqshift,
-+ struct zxdh_srq_uk_init_info *info,
-+ struct zxdh_uk_attrs *uk_attrs)
++ struct zxdh_srq_init_info *info,
++ struct zxdh_dev_attrs *dev_attrs)
+{
+ info->srq_size = srqdepth >> srqshift;
+ iwusrq->max_wr = info->srq_size;
@@ -11637,7 +7281,7 @@ index 0000000..edd05bf
+ iwusrq->srq_limit = srq_init_attr->attr.srq_limit;
+
+ srq_init_attr->attr.max_wr = info->srq_size;
-+ info->uk_attrs = uk_attrs;
++ info->dev_attrs = dev_attrs;
+ info->max_srq_frag_cnt = srq_init_attr->attr.max_sge;
+ info->srq_wrid_array =
+ calloc(info->srq_size, sizeof(*info->srq_wrid_array));
@@ -11655,8 +7299,8 @@ index 0000000..edd05bf
+struct ibv_srq *zxdh_ucreate_srq(struct ibv_pd *pd,
+ struct ibv_srq_init_attr *srq_init_attr)
+{
-+ struct zxdh_srq_uk_init_info info = {};
-+ struct zxdh_uk_attrs *uk_attrs;
++ struct zxdh_srq_init_info info = {};
++ struct zxdh_dev_attrs *dev_attrs;
+ struct zxdh_uvcontext *iwvctx;
+ __u32 srqdepth;
+ __u8 srqshift;
@@ -11666,29 +7310,30 @@ index 0000000..edd05bf
+
+ iwvctx = container_of(pd->context, struct zxdh_uvcontext,
+ ibv_ctx.context);
-+ uk_attrs = &iwvctx->uk_attrs;
++ dev_attrs = &iwvctx->dev_attrs;
+
-+ if ((zxdh_check_srq_init_attr(srq_init_attr, uk_attrs)) != 0) {
-+ zxdh_dbg(ZXDH_DBG_SRQ, "zxdh_check_srq_init_attr failed\n");
++ if ((zxdh_check_srq_init_attr(srq_init_attr, dev_attrs)) != 0) {
++ verbs_err(&iwvctx->ibv_ctx,
++ "zxdh_check_srq_init_attr failed\n");
+ errno = EINVAL;
+ return NULL;
+ }
+
+ /* get shift count for maximum wqe size */
-+ zxdh_get_srq_wqe_shift(uk_attrs, srq_init_attr->attr.max_sge,
++ zxdh_get_srq_wqe_shift(dev_attrs, srq_init_attr->attr.max_sge,
+ &srqshift);
+
+ /* get RQ/SRQ depth (quanta),minimum number of units in srq */
-+ status = zxdh_get_srqdepth(uk_attrs->max_hw_srq_quanta,
++ status = zxdh_get_srqdepth(dev_attrs->max_hw_srq_quanta,
+ srq_init_attr->attr.max_wr, srqshift,
+ &srqdepth);
+ zxdh_dbg(
-+ ZXDH_DBG_SRQ,
-+ "%s %d status:%d srqshift:%d srqdepth:%d uk_attrs->max_hw_srq_quanta:%d srq_init_attr->attr.max_wr:%d\n",
++ &iwvctx->ibv_ctx, ZXDH_DBG_SRQ,
++ "%s %d status:%d srqshift:%d srqdepth:%d dev_attrs->max_hw_srq_quanta:%d srq_init_attr->attr.max_wr:%d\n",
+ __func__, __LINE__, status, srqshift, srqdepth,
-+ uk_attrs->max_hw_srq_quanta, srq_init_attr->attr.max_wr);
++ dev_attrs->max_hw_srq_quanta, srq_init_attr->attr.max_wr);
+ if (status != 0) {
-+ zxdh_dbg(ZXDH_DBG_SRQ, "zxdh_get_srqdepth failed\n");
++ verbs_err(&iwvctx->ibv_ctx, "zxdh_get_srqdepth failed\n");
+ errno = EINVAL;
+ return NULL;
+ }
@@ -11700,22 +7345,20 @@ index 0000000..edd05bf
+ goto err_free_srq;
+
+ if (zxdh_init_iwusrq(iwusrq, srq_init_attr, srqdepth, srqshift, &info,
-+ uk_attrs)) {
-+ zxdh_dbg(ZXDH_DBG_SRQ, "calloc srq_wrid_array failed\n");
++ dev_attrs)) {
++ verbs_err(&iwvctx->ibv_ctx, "calloc srq_wrid_array failed\n");
+ goto err_srq_wrid_array;
+ }
+ status = zxdh_vmapped_srq(iwusrq, pd, srq_init_attr, srqdepth, &info);
-+ zxdh_dbg(ZXDH_DBG_SRQ, "%s %d status:%d\n", __func__, __LINE__, status);
+ if (status) {
-+ zxdh_dbg(ZXDH_DBG_SRQ, "zxdh_vmapped_srq failed\n");
++ verbs_err(&iwvctx->ibv_ctx, "zxdh_vmapped_srq failed\n");
+ errno = status;
+ goto err_vmapped_srq;
+ }
+
-+ status = zxdh_uk_srq_init(&iwusrq->srq, &info);
-+ zxdh_dbg(ZXDH_DBG_SRQ, "%s %d status:%d\n", __func__, __LINE__, status);
++ status = zxdh_srq_init(&iwusrq->srq, &info);
+ if (status) {
-+ zxdh_dbg(ZXDH_DBG_SRQ, "zxdh_uk_srq_init failed\n");
++ verbs_err(&iwvctx->ibv_ctx, "zxdh_srq_init failed\n");
+ errno = EINVAL;
+ goto err_free_srq_init;
+ }
@@ -11723,8 +7366,9 @@ index 0000000..edd05bf
+
+ srq_init_attr->attr.max_wr = (srqdepth - ZXDH_SRQ_RSVD) >> srqshift;
+
-+ zxdh_dbg(ZXDH_DBG_SRQ, "%s iwusrq->srq_id:%d info.srq_size:%d\n",
-+ __func__, iwusrq->srq_id, info.srq_size);
++ zxdh_dbg(&iwvctx->ibv_ctx, ZXDH_DBG_SRQ,
++ "iwusrq->srq_id:%d info.srq_size:%d\n", iwusrq->srq_id,
++ info.srq_size);
+ return &iwusrq->ibv_srq;
+
+err_free_srq_init:
@@ -11738,7 +7382,6 @@ index 0000000..edd05bf
+ errno = EINVAL;
+err_free_srq:
+ free(iwusrq);
-+ zxdh_dbg(ZXDH_DBG_SRQ, "%s %d\n", __func__, __LINE__);
+ return NULL;
+}
+
@@ -11759,8 +7402,8 @@ index 0000000..edd05bf
+ ret = zxdh_destroy_vmapped_srq(iwusrq);
+ if (ret)
+ goto err;
-+ zxdh_dbg(ZXDH_DBG_SRQ, "%s iwusrq->srq_id:%d\n", __func__,
-+ iwusrq->srq_id);
++ zxdh_dbg(verbs_get_ctx(iwusrq->ibv_srq.context), ZXDH_DBG_SRQ,
++ "iwusrq->srq_id:%d\n", iwusrq->srq_id);
+ zxdh_free_hw_buf(iwusrq->srq.srq_base, iwusrq->total_buf_size);
+ free(iwusrq->srq.srq_wrid_array);
+ free(iwusrq);
@@ -11787,8 +7430,9 @@ index 0000000..edd05bf
+ sizeof(cmd));
+ if (ret == 0)
+ iwusrq->srq_limit = srq_attr->srq_limit;
-+ zxdh_dbg(ZXDH_DBG_SRQ, "%s iwusrq->srq_id:%d srq_attr->srq_limit:%d\n",
-+ __func__, iwusrq->srq_id, srq_attr->srq_limit);
++ zxdh_dbg(verbs_get_ctx(iwusrq->ibv_srq.context), ZXDH_DBG_SRQ,
++ "iwusrq->srq_id:%d srq_attr->srq_limit:%d\n", iwusrq->srq_id,
++ srq_attr->srq_limit);
+ return ret;
+}
+
@@ -11805,21 +7449,19 @@ index 0000000..edd05bf
+}
+
+static int zxdh_check_srq_valid(struct ibv_recv_wr *recv_wr,
-+ struct zxdh_usrq *iwusrq,
-+ struct zxdh_srq_uk *srq_uk)
++ struct zxdh_usrq *iwusrq, struct zxdh_srq *srq)
+{
+ if (unlikely(recv_wr->num_sge > iwusrq->max_sge))
+ return -EINVAL;
+
-+ if (unlikely(srq_uk->srq_ring.head == srq_uk->srq_ring.tail))
++ if (unlikely(srq->srq_ring.head == srq->srq_ring.tail))
+ return -ENOMEM;
+
+ return 0;
+}
+
-+static void zxdh_fill_srq_wqe(struct zxdh_usrq *iwusrq,
-+ struct zxdh_srq_uk *srq_uk, __le64 *wqe_64,
-+ struct ibv_recv_wr *recv_wr)
++static void zxdh_fill_srq_wqe(struct zxdh_usrq *iwusrq, struct zxdh_srq *srq,
++ __le64 *wqe_64, struct ibv_recv_wr *recv_wr)
+{
+ __u32 byte_off;
+ int i;
@@ -11850,30 +7492,21 @@ index 0000000..edd05bf
+ __u64 hdr = FIELD_PREP(ZXDHQPSRQ_RSV, 0) |
+ FIELD_PREP(ZXDHQPSRQ_VALID_SGE_NUM, recv_wr->num_sge) |
+ FIELD_PREP(ZXDHQPSRQ_SIGNATURE, 0) |
-+ FIELD_PREP(ZXDHQPSRQ_NEXT_WQE_INDEX, srq_uk->srq_ring.head);
++ FIELD_PREP(ZXDHQPSRQ_NEXT_WQE_INDEX, srq->srq_ring.head);
+
+ udma_to_device_barrier(); /* make sure WQE is populated before valid bit is set */
+ set_64bit_val(wqe_64, 0, hdr);
-+
-+ zxdh_dbg(ZXDH_DBG_SRQ, "%s wqe_64[0]:0x%llx\n", __func__, wqe_64[0]);
-+ zxdh_dbg(ZXDH_DBG_SRQ, "%s wqe_64[1]:0x%llx\n", __func__, wqe_64[1]);
-+ zxdh_dbg(ZXDH_DBG_SRQ, "%s wqe_64[2]:0x%llx\n", __func__, wqe_64[2]);
-+ zxdh_dbg(ZXDH_DBG_SRQ, "%s wqe_64[3]:0x%llx\n", __func__, wqe_64[3]);
-+ zxdh_dbg(ZXDH_DBG_SRQ, "%s wqe_64[4]:0x%llx\n", __func__, wqe_64[4]);
-+ zxdh_dbg(ZXDH_DBG_SRQ, "%s wqe_64[5]:0x%llx\n", __func__, wqe_64[5]);
+}
+
-+static void zxdh_get_wqe_index(struct zxdh_srq_uk *srq_uk, __le16 *wqe_16,
-+ __u16 *buf, __u16 nreq, __u16 *idx)
++static void zxdh_get_wqe_index(struct zxdh_srq *srq, __le16 *wqe_16, __u16 *buf,
++ __u16 nreq, __u16 *idx)
+{
+ int i;
+
+ for (i = 0; i < nreq; i++) {
-+ wqe_16 = zxdh_get_srq_list_wqe(srq_uk, idx);
++ wqe_16 = zxdh_get_srq_list_wqe(srq, idx);
+ udma_to_device_barrier(); /* make sure WQE is populated before valid bit is set */
+ set_16bit_val(wqe_16, 0, buf[i]);
-+ zxdh_dbg(ZXDH_DBG_SRQ, "%s idx:%hn wqe_16:0x%p buf[%d]:%d\n",
-+ __func__, idx, wqe_16, i, buf[i]);
+ }
+}
+
@@ -11885,7 +7518,6 @@ index 0000000..edd05bf
+
+ udma_to_device_barrier(); /* make sure WQE is populated before valid bit is set */
+ set_64bit_val(iwusrq->srq.srq_db_base, 0, hdr);
-+ zxdh_dbg(ZXDH_DBG_SRQ, "%s srq_db_base(hdr):0x%llx\n", __func__, hdr);
+}
+
+/**
@@ -11898,7 +7530,7 @@ index 0000000..edd05bf
+ struct ibv_recv_wr **bad_recv_wr)
+{
+ struct zxdh_usrq *iwusrq;
-+ struct zxdh_srq_uk *srq_uk;
++ struct zxdh_srq *hw_srq;
+ __le16 *wqe_16;
+ __le64 *wqe_64;
+ __u64 temp_val;
@@ -11909,36 +7541,38 @@ index 0000000..edd05bf
+ __u16 idx = 0;
+
+ iwusrq = container_of(srq, struct zxdh_usrq, ibv_srq);
-+ srq_uk = &iwusrq->srq;
++ hw_srq = &iwusrq->srq;
+ pthread_spin_lock(&iwusrq->lock);
+ buf_size = iwusrq->max_wr * sizeof(__u16);
+ buf = malloc(buf_size);
+ if (buf == NULL) {
-+ zxdh_dbg(ZXDH_DBG_SRQ, "malloc buf_size failed\n");
++ verbs_err(verbs_get_ctx(iwusrq->ibv_srq.context),
++ "malloc buf_size failed\n");
+ err = -ENOMEM;
+ goto out;
+ }
+
+ for (nreq = 0; recv_wr; nreq++, recv_wr = recv_wr->next) {
-+ err = zxdh_check_srq_valid(recv_wr, iwusrq, srq_uk);
++ err = zxdh_check_srq_valid(recv_wr, iwusrq, hw_srq);
+ if (err)
+ break;
+
-+ iwusrq->srq.srq_wrid_array[srq_uk->srq_ring.head] =
++ iwusrq->srq.srq_wrid_array[hw_srq->srq_ring.head] =
+ recv_wr->wr_id;
-+ buf[nreq] = srq_uk->srq_ring.head;
-+ wqe_64 = zxdh_get_srq_wqe(srq_uk, srq_uk->srq_ring.head);
++ buf[nreq] = hw_srq->srq_ring.head;
++ wqe_64 = zxdh_get_srq_wqe(hw_srq, hw_srq->srq_ring.head);
+ get_64bit_val(wqe_64, 0, &temp_val);
-+ srq_uk->srq_ring.head =
++ hw_srq->srq_ring.head =
+ (__u16)FIELD_GET(ZXDHQPSRQ_NEXT_WQE_INDEX, temp_val);
-+ zxdh_fill_srq_wqe(iwusrq, srq_uk, wqe_64, recv_wr);
++ zxdh_fill_srq_wqe(iwusrq, hw_srq, wqe_64, recv_wr);
+ }
+
-+ zxdh_dbg(ZXDH_DBG_SRQ, "%s nreq:%d err:%d iwusrq->srq_id:%d\n",
-+ __func__, nreq, err, iwusrq->srq_id);
++ zxdh_dbg(verbs_get_ctx(iwusrq->ibv_srq.context), ZXDH_DBG_SRQ,
++ "nreq:%d err:%d iwusrq->srq_id:%d\n", nreq, err,
++ iwusrq->srq_id);
+
+ if (err == 0) {
-+ zxdh_get_wqe_index(srq_uk, wqe_16, buf, nreq, &idx);
++ zxdh_get_wqe_index(hw_srq, wqe_16, buf, nreq, &idx);
+ zxdh_update_srq_db_base(iwusrq, idx);
+ }
+out:
@@ -11973,1135 +7607,628 @@ index 0000000..edd05bf
+ if (env)
+ zxdh_debug_mask = strtol(env, NULL, 0);
+}
-diff --git a/providers/zrdma/zxdh.h b/providers/zrdma/zxdh.h
+diff --git a/providers/zrdma/zxdh_verbs.h b/providers/zrdma/zxdh_verbs.h
new file mode 100644
-index 0000000..293be95
+index 0000000..69a98cc
--- /dev/null
-+++ b/providers/zrdma/zxdh.h
-@@ -0,0 +1,53 @@
-+/* SPDX-License-Identifier: GPL-2.0 or Linux-OpenIB */
-+/* Copyright (c) 2017 - 2021 Intel Corporation */
-+#ifndef ZXDH_H
-+#define ZXDH_H
++++ b/providers/zrdma/zxdh_verbs.h
+@@ -0,0 +1,611 @@
++// SPDX-License-Identifier: GPL-2.0 or Linux-OpenIB
++/* Copyright (c) 2024 ZTE Corporation. All rights reserved. */
++#ifndef ZXDH_VERBS_H
++#define ZXDH_VERBS_H
++#include "zxdh_defs.h"
+
-+#define ZXDH_WQEALLOC_WQE_DESC_INDEX GENMASK(31, 20)
++#define zxdh_handle void *
++#define zxdh_adapter_handle zxdh_handle
++#define zxdh_qp_handle zxdh_handle
++#define zxdh_cq_handle zxdh_handle
++#define zxdh_pd_id zxdh_handle
++#define zxdh_stag_handle zxdh_handle
++#define zxdh_stag_index __u32
++#define zxdh_stag __u32
++#define zxdh_stag_key __u8
++#define zxdh_tagged_offset __u64
++#define zxdh_access_privileges __u32
++#define zxdh_physical_fragment __u64
++#define zxdh_address_list __u64 *
++#define zxdh_sgl struct zxdh_sge *
+
++#define ZXDH_MAX_MR_SIZE 0x200000000000ULL
+
-+struct zxdh_uk_attrs {
-+ __u64 feature_flags;
-+ __aligned_u64 sq_db_pa;
-+ __aligned_u64 cq_db_pa;
-+ __u32 max_hw_wq_frags;
-+ __u32 max_hw_read_sges;
-+ __u32 max_hw_inline;
-+ __u32 max_hw_rq_quanta;
-+ __u32 max_hw_srq_quanta;
-+ __u32 max_hw_wq_quanta;
-+ __u32 min_hw_cq_size;
-+ __u32 max_hw_cq_size;
-+ __u16 max_hw_sq_chunk;
-+ __u32 max_hw_srq_wr;
-+ __u8 hw_rev;
-+ __u8 db_addr_type;
-+};
++#define ZXDH_ACCESS_FLAGS_LOCALREAD 0x01
++#define ZXDH_ACCESS_FLAGS_LOCALWRITE 0x02
++#define ZXDH_ACCESS_FLAGS_REMOTEREAD_ONLY 0x04
++#define ZXDH_ACCESS_FLAGS_REMOTEREAD 0x05
++#define ZXDH_ACCESS_FLAGS_REMOTEWRITE_ONLY 0x08
++#define ZXDH_ACCESS_FLAGS_REMOTEWRITE 0x0a
++#define ZXDH_ACCESS_FLAGS_BIND_WINDOW 0x10
++#define ZXDH_ACCESS_FLAGS_ZERO_BASED 0x20
++#define ZXDH_ACCESS_FLAGS_ALL 0x3f
+
-+struct zxdh_hw_attrs {
-+ struct zxdh_uk_attrs uk_attrs;
-+ __u64 max_hw_outbound_msg_size;
-+ __u64 max_hw_inbound_msg_size;
-+ __u64 max_mr_size;
-+ __u32 min_hw_qp_id;
-+ __u32 min_hw_aeq_size;
-+ __u32 max_hw_aeq_size;
-+ __u32 min_hw_ceq_size;
-+ __u32 max_hw_ceq_size;
-+ __u32 max_hw_device_pages;
-+ __u32 max_hw_vf_fpm_id;
-+ __u32 first_hw_vf_fpm_id;
-+ __u32 max_hw_ird;
-+ __u32 max_hw_ord;
-+ __u32 max_hw_wqes;
-+ __u32 max_hw_pds;
-+ __u32 max_hw_ena_vf_count;
-+ __u32 max_qp_wr;
-+ __u32 max_pe_ready_count;
-+ __u32 max_done_count;
-+ __u32 max_sleep_count;
-+ __u32 max_cqp_compl_wait_time_ms;
-+ __u16 max_stat_inst;
-+};
++#define ZXDH_OP_TYPE_NOP 0x00
++#define ZXDH_OP_TYPE_SEND 0x01
++#define ZXDH_OP_TYPE_SEND_WITH_IMM 0x02
++#define ZXDH_OP_TYPE_SEND_INV 0x03
++#define ZXDH_OP_TYPE_WRITE 0x04
++#define ZXDH_OP_TYPE_WRITE_WITH_IMM 0x05
++#define ZXDH_OP_TYPE_READ 0x06
++#define ZXDH_OP_TYPE_BIND_MW 0x07
++#define ZXDH_OP_TYPE_FAST_REG_MR 0x08
++#define ZXDH_OP_TYPE_LOCAL_INV 0x09
++#define ZXDH_OP_TYPE_UD_SEND 0x0a
++#define ZXDH_OP_TYPE_UD_SEND_WITH_IMM 0x0b
++#define ZXDH_OP_TYPE_REC 0x3e
++#define ZXDH_OP_TYPE_REC_IMM 0x3f
+
-+#endif /* ZXDH_H*/
-diff --git a/providers/zrdma/zxdh_devids.h b/providers/zrdma/zxdh_devids.h
-new file mode 100644
-index 0000000..ac23124
---- /dev/null
-+++ b/providers/zrdma/zxdh_devids.h
-@@ -0,0 +1,17 @@
-+/* SPDX-License-Identifier: GPL-2.0 or Linux-OpenIB */
-+/* Copyright (c) 2024 ZTE Corporation. All rights reserved. */
-+#ifndef ZXDH_DEVIDS_H
-+#define ZXDH_DEVIDS_H
++#define ZXDH_FLUSH_MAJOR_ERR 1
++#define ZXDH_RETRY_ACK_MAJOR_ERR 0x8
++#define ZXDH_RETRY_ACK_MINOR_ERR 0xf3
++#define ZXDH_TX_WINDOW_QUERY_ITEM_MINOR_ERR 0xf5
+
-+/* ZXDH VENDOR ID */
-+#define PCI_VENDOR_ID_ZXDH_EVB 0x16c3
-+#define PCI_VENDOR_ID_ZXDH_E312 0x1cf2
-+#define PCI_VENDOR_ID_ZXDH_X512 0x1cf2
-+/* ZXDH Devices ID */
-+#define ZXDH_DEV_ID_ADAPTIVE_EVB_PF 0x8040 /* ZXDH EVB PF DEVICE ID*/
-+#define ZXDH_DEV_ID_ADAPTIVE_EVB_VF 0x8041 /* ZXDH EVB VF DEVICE ID*/
-+#define ZXDH_DEV_ID_ADAPTIVE_E312_PF 0x8049 /* ZXDH E312 PF DEVICE ID*/
-+#define ZXDH_DEV_ID_ADAPTIVE_E312_VF 0x8060 /* ZXDH E312 VF DEVICE ID*/
-+#define ZXDH_DEV_ID_ADAPTIVE_X512_PF 0x806B /* ZXDH X512 PF DEVICE ID*/
-+#define ZXDH_DEV_ID_ADAPTIVE_X512_VF 0x806C /* ZXDH X512 VF DEVICE ID*/
-+#endif /* ZXDH_DEVIDS_H */
-diff --git a/providers/zrdma/zxdh_dv.h b/providers/zrdma/zxdh_dv.h
-new file mode 100644
-index 0000000..5708699
---- /dev/null
-+++ b/providers/zrdma/zxdh_dv.h
-@@ -0,0 +1,75 @@
-+/* SPDX-License-Identifier: GPL-2.0 or Linux-OpenIB */
-+/* Copyright (c) 2024 ZTE Corporation. All rights reserved. */
-+#ifndef _ZXDH_API_H_
-+#define _ZXDH_API_H_
++#define ZXDH_MAX_SQ_FRAG 31
++#define ZXDH_MAX_SQ_INLINE_DATELEN_WITH_IMM 210
+
-+#include
-+#include
-+#include /* For the __be64 type */
-+#include
-+#include
-+#if defined(__SSE3__)
-+#include
-+#include
-+#include
-+#endif /* defined(__SSE3__) */
++#define INLINE_DATASIZE_7BYTES 7
++#define INLINE_DATASIZE_24BYTES 24
++#define INLINE_FRAG_DATASIZE_31BYTES 31
+
-+#include
-+#include