rdma-core/0036-add-ZTE-Dinghai-rdma-driver.patch
李富艳 cf00311534 add ZTE Dinghai rdma driver
Signed-off-by: 李富艳 <li.fuyan@zte.com.cn>
(cherry picked from commit bc9537acadff7ef3930afceec74adc3968d5a6da)
2024-11-14 14:17:23 +08:00

8251 lines
234 KiB
Diff
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

From e4eff3b4ead0430772c249d3a1a3fd734c8d3832 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=E6=9D=8E=E5=AF=8C=E8=89=B3?= <li.fuyan@zte.com.cn>
Date: Wed, 4 Sep 2024 15:49:06 +0800
Subject: [PATCH] add ZTE Dinghai RDMA driver
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
Signed-off-by: 李富艳 <li.fuyan@zte.com.cn>
---
CMakeLists.txt | 1 +
MAINTAINERS | 5 +
README.md | 1 +
debian/control | 1 +
debian/copyright | 4 +
debian/ibverbs-providers.install | 1 +
debian/libibverbs-dev.install | 5 +
kernel-headers/CMakeLists.txt | 4 +
kernel-headers/rdma/ib_user_ioctl_verbs.h | 1 +
kernel-headers/rdma/zxdh-abi.h | 143 +
kernel-headers/rdma/zxdh_user_ioctl_cmds.h | 56 +
kernel-headers/rdma/zxdh_user_ioctl_verbs.h | 34 +
libibverbs/verbs.h | 1 +
providers/zrdma/CMakeLists.txt | 17 +
providers/zrdma/libzrdma.map | 16 +
providers/zrdma/main.c | 200 ++
providers/zrdma/main.h | 223 ++
providers/zrdma/private_verbs_cmd.c | 201 ++
providers/zrdma/private_verbs_cmd.h | 24 +
providers/zrdma/zxdh_abi.h | 36 +
providers/zrdma/zxdh_defs.h | 399 +++
providers/zrdma/zxdh_devids.h | 17 +
providers/zrdma/zxdh_dv.h | 75 +
providers/zrdma/zxdh_hw.c | 2596 +++++++++++++++
providers/zrdma/zxdh_status.h | 75 +
providers/zrdma/zxdh_verbs.c | 3185 +++++++++++++++++++
providers/zrdma/zxdh_verbs.h | 611 ++++
redhat/rdma-core.spec | 4 +
28 files changed, 7936 insertions(+)
create mode 100644 kernel-headers/rdma/zxdh-abi.h
create mode 100644 kernel-headers/rdma/zxdh_user_ioctl_cmds.h
create mode 100644 kernel-headers/rdma/zxdh_user_ioctl_verbs.h
create mode 100644 providers/zrdma/CMakeLists.txt
create mode 100644 providers/zrdma/libzrdma.map
create mode 100644 providers/zrdma/main.c
create mode 100644 providers/zrdma/main.h
create mode 100644 providers/zrdma/private_verbs_cmd.c
create mode 100644 providers/zrdma/private_verbs_cmd.h
create mode 100644 providers/zrdma/zxdh_abi.h
create mode 100644 providers/zrdma/zxdh_defs.h
create mode 100644 providers/zrdma/zxdh_devids.h
create mode 100644 providers/zrdma/zxdh_dv.h
create mode 100644 providers/zrdma/zxdh_hw.c
create mode 100644 providers/zrdma/zxdh_status.h
create mode 100644 providers/zrdma/zxdh_verbs.c
create mode 100644 providers/zrdma/zxdh_verbs.h
diff --git a/CMakeLists.txt b/CMakeLists.txt
index 98985e7..432a650 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -748,6 +748,7 @@ add_subdirectory(providers/mthca)
add_subdirectory(providers/ocrdma)
add_subdirectory(providers/qedr)
add_subdirectory(providers/vmw_pvrdma)
+add_subdirectory(providers/zrdma)
endif()
add_subdirectory(providers/hfi1verbs)
diff --git a/MAINTAINERS b/MAINTAINERS
index 4b24117..aa41217 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -185,6 +185,11 @@ L: pv-drivers@vmware.com
S: Supported
F: providers/vmw_pvrdma/
+ZRDMA USERSPACE PROVIDER (for zrdma.ko)
+M: Li Fuyan <li.fuyan@zte.com.cn>
+S: Supported
+F: providers/zrdma/
+
PYVERBS
M: Edward Srouji <edwards@mellanox.com>
S: Supported
diff --git a/README.md b/README.md
index 928bdc4..8f47d3c 100644
--- a/README.md
+++ b/README.md
@@ -31,6 +31,7 @@ is included:
- rdma_rxe.ko
- siw.ko
- vmw_pvrdma.ko
+ - zrdma.ko
Additional service daemons are provided for:
- srp_daemon (ib_srp.ko)
diff --git a/debian/control b/debian/control
index 2a55372..f86cc77 100644
--- a/debian/control
+++ b/debian/control
@@ -99,6 +99,7 @@ Description: User space provider drivers for libibverbs
- rxe: A software implementation of the RoCE protocol
- siw: A software implementation of the iWarp protocol
- vmw_pvrdma: VMware paravirtual RDMA device
+ - zrdma: ZTE Connection RDMA
Package: ibverbs-utils
Architecture: linux-any
diff --git a/debian/copyright b/debian/copyright
index 36ac71e..7e435b5 100644
--- a/debian/copyright
+++ b/debian/copyright
@@ -228,6 +228,10 @@ Files: providers/vmw_pvrdma/*
Copyright: 2012-2016 VMware, Inc.
License: BSD-2-clause or GPL-2
+Files: providers/zrdma/*
+Copyright: 2024 ZTE Corporation.
+License: BSD-MIT or GPL-2
+
Files: rdma-ndd/*
Copyright: 2004-2016, Intel Corporation.
License: BSD-MIT or GPL-2
diff --git a/debian/ibverbs-providers.install b/debian/ibverbs-providers.install
index fea15e0..360516f 100644
--- a/debian/ibverbs-providers.install
+++ b/debian/ibverbs-providers.install
@@ -5,3 +5,4 @@ usr/lib/*/libhns.so.*
usr/lib/*/libmana.so.*
usr/lib/*/libmlx4.so.*
usr/lib/*/libmlx5.so.*
+usr/lib/*/libzrdma.so.*
diff --git a/debian/libibverbs-dev.install b/debian/libibverbs-dev.install
index ef5b9a4..73dd8c7 100644
--- a/debian/libibverbs-dev.install
+++ b/debian/libibverbs-dev.install
@@ -13,6 +13,8 @@ usr/include/infiniband/sa.h
usr/include/infiniband/tm_types.h
usr/include/infiniband/verbs.h
usr/include/infiniband/verbs_api.h
+usr/include/infiniband/zxdh_dv.h
+usr/include/infiniband/zxdh_devids.h
usr/lib/*/lib*-rdmav*.a
usr/lib/*/libefa.a
usr/lib/*/libefa.so
@@ -26,12 +28,15 @@ usr/lib/*/libmlx4.a
usr/lib/*/libmlx4.so
usr/lib/*/libmlx5.a
usr/lib/*/libmlx5.so
+usr/lib/*/libzrdma.a
+usr/lib/*/libzrdma.so
usr/lib/*/pkgconfig/libefa.pc
usr/lib/*/pkgconfig/libhns.pc
usr/lib/*/pkgconfig/libibverbs.pc
usr/lib/*/pkgconfig/libmana.pc
usr/lib/*/pkgconfig/libmlx4.pc
usr/lib/*/pkgconfig/libmlx5.pc
+usr/lib/*/pkgconfig/libzrdma.pc
usr/share/man/man3/efadv_*.3
usr/share/man/man3/ibv_*
usr/share/man/man3/mbps_to_ibv_rate.3
diff --git a/kernel-headers/CMakeLists.txt b/kernel-headers/CMakeLists.txt
index 82c191c..9ceac31 100644
--- a/kernel-headers/CMakeLists.txt
+++ b/kernel-headers/CMakeLists.txt
@@ -26,6 +26,9 @@ publish_internal_headers(rdma
rdma/rvt-abi.h
rdma/siw-abi.h
rdma/vmw_pvrdma-abi.h
+ rdma/zxdh-abi.h
+ rdma/zxdh_user_ioctl_cmds.h
+ rdma/zxdh_user_ioctl_verbs.h
)
publish_internal_headers(rdma/hfi
@@ -80,6 +83,7 @@ rdma_kernel_provider_abi(
rdma/rdma_user_rxe.h
rdma/siw-abi.h
rdma/vmw_pvrdma-abi.h
+ rdma/zxdh-abi.h
)
publish_headers(infiniband
diff --git a/kernel-headers/rdma/ib_user_ioctl_verbs.h b/kernel-headers/rdma/ib_user_ioctl_verbs.h
index fe15bc7..a31f330 100644
--- a/kernel-headers/rdma/ib_user_ioctl_verbs.h
+++ b/kernel-headers/rdma/ib_user_ioctl_verbs.h
@@ -255,6 +255,7 @@ enum rdma_driver_id {
RDMA_DRIVER_SIW,
RDMA_DRIVER_ERDMA,
RDMA_DRIVER_MANA,
+ RDMA_DRIVER_ZXDH = 50,
};
enum ib_uverbs_gid_type {
diff --git a/kernel-headers/rdma/zxdh-abi.h b/kernel-headers/rdma/zxdh-abi.h
new file mode 100644
index 0000000..665f874
--- /dev/null
+++ b/kernel-headers/rdma/zxdh-abi.h
@@ -0,0 +1,143 @@
+/* SPDX-License-Identifier: GPL-2.0 or Linux-OpenIB */
+/* Copyright (c) 2024 ZTE Corporation. All rights reserved. */
+
+#ifndef ZXDH_ABI_H
+#define ZXDH_ABI_H
+
+#include <linux/types.h>
+
+/* zxdh must support legacy GEN_1 i40iw kernel
+ * and user-space whose last ABI ver is 5
+ */
+#define ZXDH_ABI_VER 5
+
+enum zxdh_memreg_type {
+ ZXDH_MEMREG_TYPE_MEM = 0,
+ ZXDH_MEMREG_TYPE_QP = 1,
+ ZXDH_MEMREG_TYPE_CQ = 2,
+ ZXDH_MEMREG_TYPE_SRQ = 3,
+};
+
+enum zxdh_db_addr_type {
+ ZXDH_DB_ADDR_PHY = 0,
+ ZXDH_DB_ADDR_BAR = 1,
+};
+
+struct zxdh_alloc_ucontext_req {
+ __u32 rsvd32;
+ __u8 userspace_ver;
+ __u8 rsvd8[3];
+};
+
+struct zxdh_alloc_ucontext_resp {
+ __u32 max_pds;
+ __u32 max_qps;
+ __u32 wq_size; /* size of the WQs (SQ+RQ) in the mmaped area */
+ __u8 kernel_ver;
+ __u8 db_addr_type;
+ __u8 rsvd[2];
+ __aligned_u64 feature_flags;
+ __aligned_u64 sq_db_mmap_key;
+ __aligned_u64 cq_db_mmap_key;
+ __aligned_u64 sq_db_pa;
+ __aligned_u64 cq_db_pa;
+ __u32 max_hw_wq_frags;
+ __u32 max_hw_read_sges;
+ __u32 max_hw_inline;
+ __u32 max_hw_rq_quanta;
+ __u32 max_hw_srq_quanta;
+ __u32 max_hw_wq_quanta;
+ __u32 max_hw_srq_wr;
+ __u32 min_hw_cq_size;
+ __u32 max_hw_cq_size;
+ __u16 max_hw_sq_chunk;
+ __u8 hw_rev;
+ __u8 rsvd2;
+};
+
+struct zxdh_alloc_pd_resp {
+ __u32 pd_id;
+ __u8 rsvd[4];
+};
+
+struct zxdh_resize_cq_req {
+ __aligned_u64 user_cq_buffer;
+};
+
+struct zxdh_create_cq_req {
+ __aligned_u64 user_cq_buf;
+ __aligned_u64 user_shadow_area;
+};
+
+struct zxdh_create_qp_req {
+ __aligned_u64 user_wqe_bufs;
+ __aligned_u64 user_compl_ctx;
+};
+
+struct zxdh_create_srq_req {
+ __aligned_u64 user_wqe_bufs;
+ __aligned_u64 user_compl_ctx;
+ __aligned_u64 user_wqe_list;
+ __aligned_u64 user_wqe_db;
+};
+
+struct zxdh_mem_reg_req {
+ __u16 reg_type; /* enum zxdh_memreg_type */
+ __u16 cq_pages;
+ __u16 rq_pages;
+ __u16 sq_pages;
+ __u16 srq_pages;
+ __u16 srq_list_pages;
+ __u8 rsvd[4];
+};
+
+struct zxdh_reg_mr_resp {
+ __u32 mr_pa_low;
+ __u32 mr_pa_hig;
+ __u16 host_page_size;
+ __u16 leaf_pbl_size;
+ __u8 rsvd[4];
+};
+
+struct zxdh_modify_qp_req {
+ __u8 sq_flush;
+ __u8 rq_flush;
+ __u8 rsvd[6];
+};
+
+struct zxdh_create_cq_resp {
+ __u32 cq_id;
+ __u32 cq_size;
+};
+
+struct zxdh_create_qp_resp {
+ __u32 qp_id;
+ __u32 actual_sq_size;
+ __u32 actual_rq_size;
+ __u32 zxdh_drv_opt;
+ __u16 push_idx;
+ __u8 lsmm;
+ __u8 rsvd;
+ __u32 qp_caps;
+};
+
+struct zxdh_create_srq_resp {
+ __u32 srq_id;
+ __u32 actual_srq_size;
+ __u32 actual_srq_list_size;
+ __u8 rsvd[4];
+};
+
+struct zxdh_modify_qp_resp {
+ __aligned_u64 push_wqe_mmap_key;
+ __aligned_u64 push_db_mmap_key;
+ __u16 push_offset;
+ __u8 push_valid;
+ __u8 rsvd[5];
+};
+
+struct zxdh_create_ah_resp {
+ __u32 ah_id;
+ __u8 rsvd[4];
+};
+#endif /* ZXDH_ABI_H */
diff --git a/kernel-headers/rdma/zxdh_user_ioctl_cmds.h b/kernel-headers/rdma/zxdh_user_ioctl_cmds.h
new file mode 100644
index 0000000..96d2eb4
--- /dev/null
+++ b/kernel-headers/rdma/zxdh_user_ioctl_cmds.h
@@ -0,0 +1,56 @@
+/* SPDX-License-Identifier: GPL-2.0 or Linux-OpenIB */
+/* Copyright (c) 2024 ZTE Corporation. All rights reserved. */
+
+#ifndef ZXDH_USER_IOCTL_CMDS_H
+#define ZXDH_USER_IOCTL_CMDS_H
+
+#include <linux/types.h>
+#include <rdma/ib_user_ioctl_cmds.h>
+
+enum zxdh_ib_dev_get_log_trace_attrs {
+ ZXDH_IB_ATTR_DEV_GET_LOG_TARCE_SWITCH = (1U << UVERBS_ID_NS_SHIFT),
+};
+
+enum zxdh_ib_dev_set_log_trace_attrs {
+ ZXDH_IB_ATTR_DEV_SET_LOG_TARCE_SWITCH = (1U << UVERBS_ID_NS_SHIFT),
+};
+
+enum zxdh_ib_dev_methods {
+ ZXDH_IB_METHOD_DEV_GET_LOG_TRACE = (1U << UVERBS_ID_NS_SHIFT),
+ ZXDH_IB_METHOD_DEV_SET_LOG_TRACE,
+};
+
+enum zxdh_ib_qp_modify_udp_sport_attrs {
+ ZXDH_IB_ATTR_QP_UDP_PORT = (1U << UVERBS_ID_NS_SHIFT),
+ ZXDH_IB_ATTR_QP_QPN,
+};
+
+enum zxdh_ib_qp_query_qpc_attrs {
+ ZXDH_IB_ATTR_QP_QUERY_HANDLE = (1U << UVERBS_ID_NS_SHIFT),
+ ZXDH_IB_ATTR_QP_QUERY_RESP,
+};
+
+enum zxdh_ib_qp_modify_qpc_attrs {
+ ZXDH_IB_ATTR_QP_MODIFY_QPC_HANDLE = (1U << UVERBS_ID_NS_SHIFT),
+ ZXDH_IB_ATTR_QP_MODIFY_QPC_REQ,
+ ZXDH_IB_ATTR_QP_MODIFY_QPC_MASK,
+};
+
+enum zxdh_ib_qp_reset_qp_attrs {
+ ZXDH_IB_ATTR_QP_RESET_QP_HANDLE = (1U << UVERBS_ID_NS_SHIFT),
+ ZXDH_IB_ATTR_QP_RESET_OP_CODE,
+};
+
+enum zxdh_ib_qp_methods {
+ ZXDH_IB_METHOD_QP_MODIFY_UDP_SPORT = (1U << UVERBS_ID_NS_SHIFT),
+ ZXDH_IB_METHOD_QP_QUERY_QPC,
+ ZXDH_IB_METHOD_QP_MODIFY_QPC,
+ ZXDH_IB_METHOD_QP_RESET_QP,
+};
+
+enum zxdh_ib_objects {
+ ZXDH_IB_OBJECT_DEV = (1U << UVERBS_ID_NS_SHIFT),
+ ZXDH_IB_OBJECT_QP_OBJ,
+};
+
+#endif
diff --git a/kernel-headers/rdma/zxdh_user_ioctl_verbs.h b/kernel-headers/rdma/zxdh_user_ioctl_verbs.h
new file mode 100644
index 0000000..bc0e812
--- /dev/null
+++ b/kernel-headers/rdma/zxdh_user_ioctl_verbs.h
@@ -0,0 +1,34 @@
+/* SPDX-License-Identifier: GPL-2.0 or Linux-OpenIB */
+/* Copyright (c) 2024 ZTE Corporation. All rights reserved. */
+#ifndef ZXDH_USER_IOCTL_VERBS_H
+#define ZXDH_USER_IOCTL_VERBS_H
+
+#include <linux/types.h>
+
+//todo ailgn
+struct zxdh_query_qpc_resp {
+ __u8 retry_flag;
+ __u8 rnr_retry_flag;
+ __u8 read_retry_flag;
+ __u8 cur_retry_count;
+ __u8 retry_cqe_sq_opcode;
+ __u8 err_flag;
+ __u8 ack_err_flag;
+ __u8 package_err_flag;
+ __u8 recv_err_flag;
+ __u8 retry_count;
+ __u32 tx_last_ack_psn;
+};
+
+struct zxdh_modify_qpc_req {
+ __u8 retry_flag;
+ __u8 rnr_retry_flag;
+ __u8 read_retry_flag;
+ __u8 cur_retry_count;
+ __u8 retry_cqe_sq_opcode;
+ __u8 err_flag;
+ __u8 ack_err_flag;
+ __u8 package_err_flag;
+};
+
+#endif
diff --git a/libibverbs/verbs.h b/libibverbs/verbs.h
index 78129fd..be0e76b 100644
--- a/libibverbs/verbs.h
+++ b/libibverbs/verbs.h
@@ -2275,6 +2275,7 @@ extern const struct verbs_device_ops verbs_provider_qedr;
extern const struct verbs_device_ops verbs_provider_rxe;
extern const struct verbs_device_ops verbs_provider_siw;
extern const struct verbs_device_ops verbs_provider_vmw_pvrdma;
+extern const struct verbs_device_ops verbs_provider_zrdma;
extern const struct verbs_device_ops verbs_provider_all;
extern const struct verbs_device_ops verbs_provider_none;
void ibv_static_providers(void *unused, ...);
diff --git a/providers/zrdma/CMakeLists.txt b/providers/zrdma/CMakeLists.txt
new file mode 100644
index 0000000..1af572a
--- /dev/null
+++ b/providers/zrdma/CMakeLists.txt
@@ -0,0 +1,17 @@
+# SPDX-License-Identifier: GPL-2.0 or Linux-OpenIB */
+# Copyright (c) 2024 ZTE Corporation. All rights reserved. */
+set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -Werror")
+rdma_shared_provider(zrdma libzrdma.map
+ 1 1.1.${PACKAGE_VERSION}
+ zxdh_hw.c
+ main.c
+ zxdh_verbs.c
+ private_verbs_cmd.c
+)
+
+publish_headers(infiniband
+ zxdh_dv.h
+)
+
+
+rdma_pkg_config("zrdma" "libibverbs" "${CMAKE_THREAD_LIBS_INIT}")
diff --git a/providers/zrdma/libzrdma.map b/providers/zrdma/libzrdma.map
new file mode 100644
index 0000000..f95de4b
--- /dev/null
+++ b/providers/zrdma/libzrdma.map
@@ -0,0 +1,16 @@
+/* Export symbols should be added below according to
+ Documentation/versioning.md document. */
+ZRDMA_1.0 {
+ global:
+ zxdh_get_log_trace_switch;
+ local: *;
+};
+
+ZRDMA_1.1 {
+ global:
+ zxdh_set_log_trace_switch;
+ zxdh_modify_qp_udp_sport;
+ zxdh_query_qpc;
+ zxdh_modify_qpc;
+ zxdh_reset_qp;
+} ZRDMA_1.0;
diff --git a/providers/zrdma/main.c b/providers/zrdma/main.c
new file mode 100644
index 0000000..e25a1a2
--- /dev/null
+++ b/providers/zrdma/main.c
@@ -0,0 +1,200 @@
+// SPDX-License-Identifier: GPL-2.0 or Linux-OpenIB
+/* Copyright (c) 2024 ZTE Corporation. All rights reserved. */
+#include <config.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <unistd.h>
+#include <errno.h>
+#include <sys/mman.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <fcntl.h>
+#include "zxdh_devids.h"
+#include "main.h"
+#include "zxdh_abi.h"
+#include "private_verbs_cmd.h"
+
+#define ZXDH_HCA(v, d) VERBS_PCI_MATCH(v, d, NULL)
+static const struct verbs_match_ent hca_table[] = {
+ VERBS_DRIVER_ID(RDMA_DRIVER_ZXDH),
+ ZXDH_HCA(PCI_VENDOR_ID_ZXDH_EVB, ZXDH_DEV_ID_ADAPTIVE_EVB_PF),
+ ZXDH_HCA(PCI_VENDOR_ID_ZXDH_EVB, ZXDH_DEV_ID_ADAPTIVE_EVB_VF),
+ ZXDH_HCA(PCI_VENDOR_ID_ZXDH_E312, ZXDH_DEV_ID_ADAPTIVE_E312_PF),
+ ZXDH_HCA(PCI_VENDOR_ID_ZXDH_E312, ZXDH_DEV_ID_ADAPTIVE_E312_VF),
+ ZXDH_HCA(PCI_VENDOR_ID_ZXDH_X512, ZXDH_DEV_ID_ADAPTIVE_X512_PF),
+ ZXDH_HCA(PCI_VENDOR_ID_ZXDH_X512, ZXDH_DEV_ID_ADAPTIVE_X512_VF),
+ {}
+};
+
+/**
+ * zxdh_ufree_context - free context that was allocated
+ * @ibctx: context allocated ptr
+ */
+static void zxdh_ufree_context(struct ibv_context *ibctx)
+{
+ struct zxdh_uvcontext *iwvctx;
+
+ iwvctx = container_of(ibctx, struct zxdh_uvcontext, ibv_ctx.context);
+
+ zxdh_ufree_pd(&iwvctx->iwupd->ibv_pd);
+ zxdh_munmap(iwvctx->sq_db);
+ zxdh_munmap(iwvctx->cq_db);
+ verbs_uninit_context(&iwvctx->ibv_ctx);
+ free(iwvctx);
+}
+
+static const struct verbs_context_ops zxdh_uctx_ops = {
+ .alloc_mw = zxdh_ualloc_mw,
+ .alloc_pd = zxdh_ualloc_pd,
+ .attach_mcast = zxdh_uattach_mcast,
+ .bind_mw = zxdh_ubind_mw,
+ .cq_event = zxdh_cq_event,
+ .create_ah = zxdh_ucreate_ah,
+ .create_cq = zxdh_ucreate_cq,
+ .create_cq_ex = zxdh_ucreate_cq_ex,
+ .create_qp = zxdh_ucreate_qp,
+ .create_qp_ex = zxdh_ucreate_qp_ex,
+ .create_srq = zxdh_ucreate_srq,
+ .dealloc_mw = zxdh_udealloc_mw,
+ .dealloc_pd = zxdh_ufree_pd,
+ .dereg_mr = zxdh_udereg_mr,
+ .destroy_ah = zxdh_udestroy_ah,
+ .destroy_cq = zxdh_udestroy_cq,
+ .modify_cq = zxdh_umodify_cq,
+ .destroy_qp = zxdh_udestroy_qp,
+ .destroy_srq = zxdh_udestroy_srq,
+ .detach_mcast = zxdh_udetach_mcast,
+ .modify_qp = zxdh_umodify_qp,
+ .modify_srq = zxdh_umodify_srq,
+ .poll_cq = zxdh_upoll_cq,
+ .post_recv = zxdh_upost_recv,
+ .post_send = zxdh_upost_send,
+ .post_srq_recv = zxdh_upost_srq_recv,
+ .query_device_ex = zxdh_uquery_device_ex,
+ .query_port = zxdh_uquery_port,
+ .query_qp = zxdh_uquery_qp,
+ .query_srq = zxdh_uquery_srq,
+ .reg_mr = zxdh_ureg_mr,
+ .rereg_mr = zxdh_urereg_mr,
+ .req_notify_cq = zxdh_uarm_cq,
+ .resize_cq = zxdh_uresize_cq,
+ .free_context = zxdh_ufree_context,
+ .get_srq_num = zxdh_uget_srq_num,
+};
+
+/**
+ * zxdh_ualloc_context - allocate context for user app
+ * @ibdev: ib device created during zxdh_driver_init
+ * @cmd_fd: save fd for the device
+ * @private_data: device private data
+ *
+ * Returns callback routine table and calls driver for allocating
+ * context and getting back resource information to return as ibv_context.
+ */
+static struct verbs_context *zxdh_ualloc_context(struct ibv_device *ibdev,
+ int cmd_fd, void *private_data)
+{
+ struct ibv_pd *ibv_pd;
+ struct zxdh_uvcontext *iwvctx;
+ struct zxdh_get_context cmd;
+ struct zxdh_get_context_resp resp = {};
+ __u64 sq_db_mmap_key, cq_db_mmap_key;
+ __u8 user_ver = ZXDH_ABI_VER;
+
+ iwvctx = verbs_init_and_alloc_context(ibdev, cmd_fd, iwvctx, ibv_ctx,
+ RDMA_DRIVER_ZXDH);
+ if (!iwvctx)
+ return NULL;
+
+ zxdh_set_debug_mask();
+ iwvctx->zxdh_write_imm_split_switch = zxdh_get_write_imm_split_switch();
+ cmd.userspace_ver = user_ver;
+ if (ibv_cmd_get_context(&iwvctx->ibv_ctx,
+ (struct ibv_get_context *)&cmd, sizeof(cmd),
+ &resp.ibv_resp, sizeof(resp))) {
+ cmd.userspace_ver = 4;
+ if (ibv_cmd_get_context(
+ &iwvctx->ibv_ctx, (struct ibv_get_context *)&cmd,
+ sizeof(cmd), &resp.ibv_resp, sizeof(resp)))
+ goto err_free;
+ user_ver = cmd.userspace_ver;
+ }
+
+ verbs_set_ops(&iwvctx->ibv_ctx, &zxdh_uctx_ops);
+
+ iwvctx->dev_attrs.feature_flags = resp.feature_flags;
+ iwvctx->dev_attrs.hw_rev = resp.hw_rev;
+ iwvctx->dev_attrs.max_hw_wq_frags = resp.max_hw_wq_frags;
+ iwvctx->dev_attrs.max_hw_read_sges = resp.max_hw_read_sges;
+ iwvctx->dev_attrs.max_hw_inline = resp.max_hw_inline;
+ iwvctx->dev_attrs.max_hw_rq_quanta = resp.max_hw_rq_quanta;
+ iwvctx->dev_attrs.max_hw_srq_quanta = resp.max_hw_srq_quanta;
+ iwvctx->dev_attrs.max_hw_wq_quanta = resp.max_hw_wq_quanta;
+ iwvctx->dev_attrs.max_hw_srq_wr = resp.max_hw_srq_wr;
+ iwvctx->dev_attrs.max_hw_sq_chunk = resp.max_hw_sq_chunk;
+ iwvctx->dev_attrs.max_hw_cq_size = resp.max_hw_cq_size;
+ iwvctx->dev_attrs.min_hw_cq_size = resp.min_hw_cq_size;
+ iwvctx->abi_ver = user_ver;
+
+ sq_db_mmap_key = resp.sq_db_mmap_key;
+ cq_db_mmap_key = resp.cq_db_mmap_key;
+
+ iwvctx->sq_db = zxdh_mmap(cmd_fd, sq_db_mmap_key);
+ if (iwvctx->sq_db == MAP_FAILED)
+ goto err_free;
+
+ iwvctx->cq_db = zxdh_mmap(cmd_fd, cq_db_mmap_key);
+ if (iwvctx->cq_db == MAP_FAILED) {
+ zxdh_munmap(iwvctx->sq_db);
+ goto err_free;
+ }
+ ibv_pd = zxdh_ualloc_pd(&iwvctx->ibv_ctx.context);
+ if (!ibv_pd) {
+ zxdh_munmap(iwvctx->sq_db);
+ zxdh_munmap(iwvctx->cq_db);
+ goto err_free;
+ }
+
+ ibv_pd->context = &iwvctx->ibv_ctx.context;
+ iwvctx->iwupd = container_of(ibv_pd, struct zxdh_upd, ibv_pd);
+ add_private_ops(iwvctx);
+ return &iwvctx->ibv_ctx;
+
+err_free:
+ free(iwvctx);
+
+ return NULL;
+}
+
+static void zxdh_uninit_device(struct verbs_device *verbs_device)
+{
+ struct zxdh_udevice *dev;
+
+ dev = container_of(&verbs_device->device, struct zxdh_udevice,
+ ibv_dev.device);
+ free(dev);
+}
+
+static struct verbs_device *zxdh_device_alloc(struct verbs_sysfs_dev *sysfs_dev)
+{
+ struct zxdh_udevice *dev;
+
+ dev = calloc(1, sizeof(*dev));
+ if (!dev)
+ return NULL;
+
+ return &dev->ibv_dev;
+}
+
+static const struct verbs_device_ops zxdh_udev_ops = {
+ .alloc_context = zxdh_ualloc_context,
+ .alloc_device = zxdh_device_alloc,
+ .match_max_abi_version = ZXDH_MAX_ABI_VERSION,
+ .match_min_abi_version = ZXDH_MIN_ABI_VERSION,
+ .match_table = hca_table,
+ .name = "zxdh",
+ .uninit_device = zxdh_uninit_device,
+};
+
+PROVIDER_DRIVER(zxdh, zxdh_udev_ops);
diff --git a/providers/zrdma/main.h b/providers/zrdma/main.h
new file mode 100644
index 0000000..e28c77b
--- /dev/null
+++ b/providers/zrdma/main.h
@@ -0,0 +1,223 @@
+/* SPDX-License-Identifier: GPL-2.0 or Linux-OpenIB */
+/* Copyright (c) 2024 ZTE Corporation. All rights reserved. */
+#ifndef ZXDH_UMAIN_H
+#define ZXDH_UMAIN_H
+
+#include <inttypes.h>
+#include <stddef.h>
+#include <sys/socket.h>
+#include <netinet/in.h>
+#include <infiniband/driver.h>
+
+#include "zxdh_defs.h"
+#include "zxdh_status.h"
+#include "zxdh_verbs.h"
+
+#define ZXDH_BASE_PUSH_PAGE 1
+#define ZXDH_U_MINCQ_SIZE 4
+#define ZXDH_DB_SHADOW_AREA_SIZE 8
+#define ZXDH_DB_SQ_OFFSET 0x404
+#define ZXDH_DB_CQ_OFFSET 0x588
+
+#define MIN_UDP_SPORT 1024
+#define MIN_QP_QPN 1
+
+enum zxdh_supported_wc_flags {
+ ZXDH_CQ_SUPPORTED_WC_FLAGS =
+ IBV_WC_EX_WITH_BYTE_LEN | IBV_WC_EX_WITH_IMM |
+ IBV_WC_EX_WITH_QP_NUM | IBV_WC_EX_WITH_SRC_QP |
+ IBV_WC_EX_WITH_SLID | IBV_WC_EX_WITH_SL |
+ IBV_WC_EX_WITH_DLID_PATH_BITS |
+ IBV_WC_EX_WITH_COMPLETION_TIMESTAMP_WALLCLOCK |
+ IBV_WC_EX_WITH_COMPLETION_TIMESTAMP,
+};
+
+enum {
+ ZXDH_DBG_QP = 1 << 0,
+ ZXDH_DBG_CQ = 1 << 1,
+ ZXDH_DBG_SRQ = 1 << 2,
+};
+extern uint32_t zxdh_debug_mask;
+#define zxdh_dbg(ctx, mask, format, arg...) \
+ do { \
+ if (mask & zxdh_debug_mask) { \
+ int zxdh_dbg_tmp = errno; \
+ verbs_debug(ctx, format, ##arg); \
+ errno = zxdh_dbg_tmp; \
+ } \
+ } while (0)
+
+struct zxdh_udevice {
+ struct verbs_device ibv_dev;
+};
+
+struct zxdh_uah {
+ struct ibv_ah ibv_ah;
+ uint32_t ah_id;
+ struct ibv_global_route grh;
+};
+
+struct zxdh_upd {
+ struct ibv_pd ibv_pd;
+ void *arm_cq_page;
+ void *arm_cq;
+ uint32_t pd_id;
+};
+
+struct zxdh_uvcontext {
+ struct verbs_context ibv_ctx;
+ struct zxdh_upd *iwupd;
+ struct zxdh_dev_attrs dev_attrs;
+ void *db;
+ void *sq_db;
+ void *cq_db;
+ int abi_ver;
+ bool legacy_mode;
+ uint8_t zxdh_write_imm_split_switch;
+ struct zxdh_uvcontext_ops *cxt_ops;
+};
+
+struct zxdh_uqp;
+
+struct zxdh_cq_buf {
+ struct list_node list;
+ struct zxdh_cq cq;
+ struct verbs_mr vmr;
+};
+
+struct zxdh_ucq {
+ struct verbs_cq verbs_cq;
+ struct verbs_mr vmr;
+ struct verbs_mr vmr_shadow_area;
+ pthread_spinlock_t lock;
+ size_t buf_size;
+ bool is_armed;
+ enum zxdh_cmpl_notify last_notify;
+ int comp_vector;
+ uint32_t report_rtt;
+ struct zxdh_uqp *uqp;
+ struct zxdh_cq cq;
+ struct list_head resize_list;
+ /* for extended CQ completion fields */
+ struct zxdh_cq_poll_info cur_cqe;
+ bool resize_enable;
+};
+
+struct zxdh_usrq {
+ struct ibv_srq ibv_srq;
+ struct verbs_mr vmr;
+ struct verbs_mr list_vmr;
+ struct verbs_mr db_vmr;
+ size_t total_buf_size;
+ size_t buf_size;
+ size_t list_buf_size;
+ size_t db_buf_size;
+ size_t srq_size;
+ size_t srq_list_size;
+ uint32_t srq_id;
+ uint32_t max_wr;
+ uint32_t max_sge;
+ uint32_t srq_limit;
+ pthread_spinlock_t lock;
+ uint32_t wq_size;
+ struct ibv_recv_wr *pend_rx_wr;
+ struct zxdh_srq srq;
+};
+
+struct zxdh_uqp {
+ struct verbs_qp vqp;
+ struct zxdh_ucq *send_cq;
+ struct zxdh_ucq *recv_cq;
+ struct zxdh_usrq *srq;
+ struct verbs_mr vmr;
+ size_t buf_size;
+ uint32_t zxdh_drv_opt;
+ pthread_spinlock_t lock;
+ uint16_t sq_sig_all;
+ uint16_t qperr;
+ uint16_t rsvd;
+ uint32_t pending_rcvs;
+ uint32_t wq_size;
+ struct ibv_recv_wr *pend_rx_wr;
+ struct zxdh_qp qp;
+ enum ibv_qp_type qp_type;
+ struct zxdh_sge *recv_sges;
+ uint8_t is_srq;
+ uint8_t inline_data[ZXDH_MAX_INLINE_DATA_SIZE];
+};
+
+struct zxdh_umr {
+ struct verbs_mr vmr;
+ uint32_t acc_flags;
+ uint8_t leaf_pbl_size;
+ uint8_t host_page_size;
+ uint64_t mr_pa_pble_index;
+};
+
+/* zxdh_verbs.c */
+int zxdh_uquery_device_ex(struct ibv_context *context,
+ const struct ibv_query_device_ex_input *input,
+ struct ibv_device_attr_ex *attr, size_t attr_size);
+int zxdh_uquery_port(struct ibv_context *context, uint8_t port,
+ struct ibv_port_attr *attr);
+struct ibv_pd *zxdh_ualloc_pd(struct ibv_context *context);
+int zxdh_ufree_pd(struct ibv_pd *pd);
+struct ibv_mr *zxdh_ureg_mr(struct ibv_pd *pd, void *addr, size_t length,
+ uint64_t hca_va, int access);
+int zxdh_udereg_mr(struct verbs_mr *vmr);
+
+int zxdh_urereg_mr(struct verbs_mr *mr, int flags, struct ibv_pd *pd,
+ void *addr, size_t length, int access);
+
+struct ibv_mw *zxdh_ualloc_mw(struct ibv_pd *pd, enum ibv_mw_type type);
+int zxdh_ubind_mw(struct ibv_qp *qp, struct ibv_mw *mw,
+ struct ibv_mw_bind *mw_bind);
+int zxdh_udealloc_mw(struct ibv_mw *mw);
+struct ibv_cq *zxdh_ucreate_cq(struct ibv_context *context, int cqe,
+ struct ibv_comp_channel *channel,
+ int comp_vector);
+struct ibv_cq_ex *zxdh_ucreate_cq_ex(struct ibv_context *context,
+ struct ibv_cq_init_attr_ex *attr_ex);
+void zxdh_ibvcq_ex_fill_priv_funcs(struct zxdh_ucq *iwucq,
+ struct ibv_cq_init_attr_ex *attr_ex);
+int zxdh_uresize_cq(struct ibv_cq *cq, int cqe);
+int zxdh_udestroy_cq(struct ibv_cq *cq);
+int zxdh_umodify_cq(struct ibv_cq *cq, struct ibv_modify_cq_attr *attr);
+int zxdh_upoll_cq(struct ibv_cq *cq, int entries, struct ibv_wc *entry);
+int zxdh_uarm_cq(struct ibv_cq *cq, int solicited);
+void zxdh_cq_event(struct ibv_cq *cq);
+struct ibv_qp *zxdh_ucreate_qp(struct ibv_pd *pd,
+ struct ibv_qp_init_attr *attr);
+struct ibv_qp *zxdh_ucreate_qp_ex(struct ibv_context *context,
+ struct ibv_qp_init_attr_ex *attr);
+int zxdh_uquery_qp(struct ibv_qp *qp, struct ibv_qp_attr *attr, int attr_mask,
+ struct ibv_qp_init_attr *init_attr);
+int zxdh_umodify_qp(struct ibv_qp *qp, struct ibv_qp_attr *attr, int attr_mask);
+int zxdh_udestroy_qp(struct ibv_qp *qp);
+int zxdh_upost_send(struct ibv_qp *ib_qp, struct ibv_send_wr *ib_wr,
+ struct ibv_send_wr **bad_wr);
+int zxdh_upost_recv(struct ibv_qp *ib_qp, struct ibv_recv_wr *ib_wr,
+ struct ibv_recv_wr **bad_wr);
+struct ibv_srq *zxdh_ucreate_srq(struct ibv_pd *pd,
+ struct ibv_srq_init_attr *srq_init_attr);
+int zxdh_udestroy_srq(struct ibv_srq *srq);
+int zxdh_umodify_srq(struct ibv_srq *srq, struct ibv_srq_attr *srq_attr,
+ int srq_attr_mask);
+int zxdh_uquery_srq(struct ibv_srq *srq, struct ibv_srq_attr *srq_attr);
+int zxdh_upost_srq_recv(struct ibv_srq *srq, struct ibv_recv_wr *recv_wr,
+ struct ibv_recv_wr **bad_recv_wr);
+int zxdh_uget_srq_num(struct ibv_srq *srq, uint32_t *srq_num);
+struct ibv_ah *zxdh_ucreate_ah(struct ibv_pd *ibpd, struct ibv_ah_attr *attr);
+int zxdh_udestroy_ah(struct ibv_ah *ibah);
+int zxdh_uattach_mcast(struct ibv_qp *qp, const union ibv_gid *gid,
+ uint16_t lid);
+int zxdh_udetach_mcast(struct ibv_qp *qp, const union ibv_gid *gid,
+ uint16_t lid);
+void zxdh_async_event(struct ibv_context *context,
+ struct ibv_async_event *event);
+void zxdh_set_hw_attrs(struct zxdh_hw_attrs *attrs);
+void *zxdh_mmap(int fd, off_t offset);
+void zxdh_munmap(void *map);
+void zxdh_set_debug_mask(void);
+int zxdh_get_write_imm_split_switch(void);
+#endif /* ZXDH_UMAIN_H */
diff --git a/providers/zrdma/private_verbs_cmd.c b/providers/zrdma/private_verbs_cmd.c
new file mode 100644
index 0000000..68bba23
--- /dev/null
+++ b/providers/zrdma/private_verbs_cmd.c
@@ -0,0 +1,201 @@
+// SPDX-License-Identifier: GPL-2.0 or Linux-OpenIB
+/* Copyright (c) 2024 ZTE Corporation. All rights reserved. */
+#include <rdma/zxdh_user_ioctl_cmds.h>
+#include <rdma/zxdh_user_ioctl_verbs.h>
+#include "private_verbs_cmd.h"
+#include "zxdh_dv.h"
+
+static void copy_query_qpc(struct zxdh_query_qpc_resp *resp,
+ struct zxdh_rdma_qpc *qpc)
+{
+ qpc->ack_err_flag = resp->ack_err_flag;
+ qpc->retry_flag = resp->retry_flag;
+ qpc->rnr_retry_flag = resp->rnr_retry_flag;
+ qpc->cur_retry_count = resp->cur_retry_count;
+ qpc->retry_cqe_sq_opcode = resp->retry_cqe_sq_opcode;
+ qpc->err_flag = resp->err_flag;
+ qpc->package_err_flag = resp->package_err_flag;
+ qpc->recv_err_flag = resp->recv_err_flag;
+ qpc->tx_last_ack_psn = resp->tx_last_ack_psn;
+ qpc->retry_count = resp->retry_count;
+ qpc->read_retry_flag = resp->read_retry_flag;
+}
+
+static int _zxdh_query_qpc(struct ibv_qp *qp, struct zxdh_rdma_qpc *qpc)
+{
+ DECLARE_COMMAND_BUFFER(cmd, ZXDH_IB_OBJECT_QP_OBJ,
+ ZXDH_IB_METHOD_QP_QUERY_QPC, 2);
+ int ret;
+ struct zxdh_query_qpc_resp resp_ex = { 0 };
+
+ fill_attr_in_obj(cmd, ZXDH_IB_ATTR_QP_QUERY_HANDLE, qp->handle);
+ fill_attr_out_ptr(cmd, ZXDH_IB_ATTR_QP_QUERY_RESP, &resp_ex);
+
+ ret = execute_ioctl(qp->context, cmd);
+ if (ret)
+ return ret;
+
+ copy_query_qpc(&resp_ex, qpc);
+ return 0;
+}
+
+static void copy_modify_qpc_fields(struct zxdh_modify_qpc_req *req_cmd,
+ uint64_t attr_mask,
+ struct zxdh_rdma_qpc *qpc)
+{
+ if (attr_mask & ZXDH_TX_READ_RETRY_FLAG_SET) {
+ req_cmd->retry_flag = qpc->retry_flag;
+ req_cmd->rnr_retry_flag = qpc->rnr_retry_flag;
+ req_cmd->read_retry_flag = qpc->read_retry_flag;
+ req_cmd->cur_retry_count = qpc->cur_retry_count;
+ }
+ if (attr_mask & ZXDH_RETRY_CQE_SQ_OPCODE)
+ req_cmd->retry_cqe_sq_opcode = qpc->retry_cqe_sq_opcode;
+
+ if (attr_mask & ZXDH_ERR_FLAG_SET) {
+ req_cmd->err_flag = qpc->err_flag;
+ req_cmd->ack_err_flag = qpc->ack_err_flag;
+ }
+ if (attr_mask & ZXDH_PACKAGE_ERR_FLAG)
+ req_cmd->package_err_flag = qpc->package_err_flag;
+}
+
+static int _zxdh_reset_qp(struct ibv_qp *qp, uint64_t opcode)
+{
+ DECLARE_COMMAND_BUFFER(cmd, ZXDH_IB_OBJECT_QP_OBJ,
+ ZXDH_IB_METHOD_QP_RESET_QP, 2);
+
+ fill_attr_in_obj(cmd, ZXDH_IB_ATTR_QP_RESET_QP_HANDLE, qp->handle);
+ fill_attr_in_uint64(cmd, ZXDH_IB_ATTR_QP_RESET_OP_CODE, opcode);
+ return execute_ioctl(qp->context, cmd);
+}
+
+static int _zxdh_modify_qpc(struct ibv_qp *qp, struct zxdh_rdma_qpc *qpc,
+ uint64_t qpc_mask)
+{
+ DECLARE_COMMAND_BUFFER(cmd, ZXDH_IB_OBJECT_QP_OBJ,
+ ZXDH_IB_METHOD_QP_MODIFY_QPC, 3);
+ struct zxdh_modify_qpc_req req = { 0 };
+
+ copy_modify_qpc_fields(&req, qpc_mask, qpc);
+ fill_attr_in_obj(cmd, ZXDH_IB_ATTR_QP_QUERY_HANDLE, qp->handle);
+ fill_attr_in_uint64(cmd, ZXDH_IB_ATTR_QP_MODIFY_QPC_MASK, qpc_mask);
+ fill_attr_in_ptr(cmd, ZXDH_IB_ATTR_QP_MODIFY_QPC_REQ, &req);
+ return execute_ioctl(qp->context, cmd);
+}
+
+static int _zxdh_modify_qp_udp_sport(struct ibv_context *ibctx,
+ uint16_t udp_sport, uint32_t qpn)
+{
+ if (udp_sport <= MIN_UDP_SPORT || qpn <= MIN_QP_QPN)
+ return -EINVAL;
+
+ DECLARE_COMMAND_BUFFER(cmd, ZXDH_IB_OBJECT_QP_OBJ,
+ ZXDH_IB_METHOD_QP_MODIFY_UDP_SPORT, 2);
+ fill_attr_in(cmd, ZXDH_IB_ATTR_QP_UDP_PORT, &udp_sport,
+ sizeof(udp_sport));
+ fill_attr_in_uint32(cmd, ZXDH_IB_ATTR_QP_QPN, qpn);
+ return execute_ioctl(ibctx, cmd);
+}
+
+static int _zxdh_get_log_trace_switch(struct ibv_context *ibctx,
+ uint8_t *switch_status)
+{
+ DECLARE_COMMAND_BUFFER(cmd, ZXDH_IB_OBJECT_DEV,
+ ZXDH_IB_METHOD_DEV_GET_LOG_TRACE, 1);
+
+ fill_attr_out_ptr(cmd, ZXDH_IB_ATTR_DEV_GET_LOG_TARCE_SWITCH,
+ switch_status);
+ return execute_ioctl(ibctx, cmd);
+}
+
+static int _zxdh_set_log_trace_switch(struct ibv_context *ibctx,
+ uint8_t switch_status)
+{
+ DECLARE_COMMAND_BUFFER(cmd, ZXDH_IB_OBJECT_DEV,
+ ZXDH_IB_METHOD_DEV_SET_LOG_TRACE, 1);
+ fill_attr_in(cmd, ZXDH_IB_ATTR_DEV_SET_LOG_TARCE_SWITCH, &switch_status,
+ sizeof(switch_status));
+ return execute_ioctl(ibctx, cmd);
+}
+
+static struct zxdh_uvcontext_ops zxdh_ctx_ops = {
+ .modify_qp_udp_sport = _zxdh_modify_qp_udp_sport,
+ .get_log_trace_switch = _zxdh_get_log_trace_switch,
+ .set_log_trace_switch = _zxdh_set_log_trace_switch,
+ .query_qpc = _zxdh_query_qpc,
+ .modify_qpc = _zxdh_modify_qpc,
+ .reset_qp = _zxdh_reset_qp,
+};
+
+static inline struct zxdh_uvcontext *to_zxdhtx(struct ibv_context *ibctx)
+{
+ return container_of(ibctx, struct zxdh_uvcontext, ibv_ctx.context);
+}
+
+int zxdh_reset_qp(struct ibv_qp *qp, uint64_t opcode)
+{
+ struct zxdh_uvcontext_ops *dvops = to_zxdhtx(qp->context)->cxt_ops;
+
+ if (!dvops || !dvops->reset_qp)
+ return -EOPNOTSUPP;
+ return dvops->reset_qp(qp, opcode);
+}
+
+int zxdh_modify_qpc(struct ibv_qp *qp, struct zxdh_rdma_qpc *qpc,
+ uint64_t qpc_mask)
+{
+ struct zxdh_uvcontext_ops *dvops = to_zxdhtx(qp->context)->cxt_ops;
+
+ if (!dvops || !dvops->modify_qpc)
+ return -EOPNOTSUPP;
+ return dvops->modify_qpc(qp, qpc, qpc_mask);
+}
+
+int zxdh_query_qpc(struct ibv_qp *qp, struct zxdh_rdma_qpc *qpc)
+{
+ struct zxdh_uvcontext_ops *dvops = to_zxdhtx(qp->context)->cxt_ops;
+
+ if (!dvops || !dvops->query_qpc)
+ return -EOPNOTSUPP;
+
+ return dvops->query_qpc(qp, qpc);
+}
+
+int zxdh_modify_qp_udp_sport(struct ibv_context *context, uint16_t udp_sport,
+ uint32_t qpn)
+{
+ struct zxdh_uvcontext_ops *dvops = to_zxdhtx(context)->cxt_ops;
+
+ if (!dvops || !dvops->modify_qp_udp_sport)
+ return -EOPNOTSUPP;
+
+ return dvops->modify_qp_udp_sport(context, udp_sport, qpn);
+}
+
+int zxdh_get_log_trace_switch(struct ibv_context *context,
+ enum switch_status *switch_status)
+{
+ struct zxdh_uvcontext_ops *dvops = to_zxdhtx(context)->cxt_ops;
+
+ if (!dvops || !dvops->get_log_trace_switch)
+ return -EOPNOTSUPP;
+
+ return dvops->get_log_trace_switch(context, (uint8_t *)switch_status);
+}
+
+int zxdh_set_log_trace_switch(struct ibv_context *context,
+ enum switch_status switch_status)
+{
+ struct zxdh_uvcontext_ops *dvops = to_zxdhtx(context)->cxt_ops;
+
+ if (!dvops || !dvops->set_log_trace_switch)
+ return -EOPNOTSUPP;
+
+ return dvops->set_log_trace_switch(context, switch_status);
+}
+
+void add_private_ops(struct zxdh_uvcontext *iwvctx)
+{
+ iwvctx->cxt_ops = &zxdh_ctx_ops;
+}
diff --git a/providers/zrdma/private_verbs_cmd.h b/providers/zrdma/private_verbs_cmd.h
new file mode 100644
index 0000000..32d0d68
--- /dev/null
+++ b/providers/zrdma/private_verbs_cmd.h
@@ -0,0 +1,24 @@
+/* SPDX-License-Identifier: GPL-2.0 or Linux-OpenIB */
+/* Copyright (c) 2024 ZTE Corporation. All rights reserved. */
+#ifndef ZXDH_RDMA_PRIVATE_VERBS_CMD_H
+#define ZXDH_RDMA_PRIVATE_VERBS_CMD_H
+
+#include "main.h"
+#include "zxdh_dv.h"
+
+struct zxdh_uvcontext_ops {
+ int (*modify_qp_udp_sport)(struct ibv_context *ibctx,
+ uint16_t udp_sport, uint32_t qpn);
+ int (*set_log_trace_switch)(struct ibv_context *ibctx,
+ uint8_t switch_status);
+ int (*get_log_trace_switch)(struct ibv_context *ibctx,
+ uint8_t *switch_status);
+ int (*query_qpc)(struct ibv_qp *qp, struct zxdh_rdma_qpc *qpc);
+ int (*modify_qpc)(struct ibv_qp *qp, struct zxdh_rdma_qpc *qpc,
+ uint64_t qpc_mask);
+ int (*reset_qp)(struct ibv_qp *qp, uint64_t opcode);
+};
+
+void add_private_ops(struct zxdh_uvcontext *iwvctx);
+
+#endif
diff --git a/providers/zrdma/zxdh_abi.h b/providers/zrdma/zxdh_abi.h
new file mode 100644
index 0000000..f3cff03
--- /dev/null
+++ b/providers/zrdma/zxdh_abi.h
@@ -0,0 +1,36 @@
+/* SPDX-License-Identifier: GPL-2.0 or Linux-OpenIB */
+/* Copyright (c) 2024 ZTE Corporation. All rights reserved. */
+#ifndef PROVIDER_ZXDH_ABI_H
+#define PROVIDER_ZXDH_ABI_H
+
+#include <infiniband/kern-abi.h>
+#include <rdma/zxdh-abi.h>
+#include <kernel-abi/zxdh-abi.h>
+#include "zxdh_verbs.h"
+
+#define ZXDH_MIN_ABI_VERSION 0
+#define ZXDH_MAX_ABI_VERSION 5
+
+DECLARE_DRV_CMD(zxdh_ualloc_pd, IB_USER_VERBS_CMD_ALLOC_PD, empty,
+ zxdh_alloc_pd_resp);
+DECLARE_DRV_CMD(zxdh_ucreate_cq, IB_USER_VERBS_CMD_CREATE_CQ,
+ zxdh_create_cq_req, zxdh_create_cq_resp);
+DECLARE_DRV_CMD(zxdh_ucreate_cq_ex, IB_USER_VERBS_EX_CMD_CREATE_CQ,
+ zxdh_create_cq_req, zxdh_create_cq_resp);
+DECLARE_DRV_CMD(zxdh_uresize_cq, IB_USER_VERBS_CMD_RESIZE_CQ,
+ zxdh_resize_cq_req, empty);
+DECLARE_DRV_CMD(zxdh_ucreate_qp, IB_USER_VERBS_CMD_CREATE_QP,
+ zxdh_create_qp_req, zxdh_create_qp_resp);
+DECLARE_DRV_CMD(zxdh_umodify_qp, IB_USER_VERBS_EX_CMD_MODIFY_QP,
+ zxdh_modify_qp_req, zxdh_modify_qp_resp);
+DECLARE_DRV_CMD(zxdh_get_context, IB_USER_VERBS_CMD_GET_CONTEXT,
+ zxdh_alloc_ucontext_req, zxdh_alloc_ucontext_resp);
+DECLARE_DRV_CMD(zxdh_ureg_mr, IB_USER_VERBS_CMD_REG_MR, zxdh_mem_reg_req,
+ zxdh_reg_mr_resp);
+DECLARE_DRV_CMD(zxdh_urereg_mr, IB_USER_VERBS_CMD_REREG_MR, zxdh_mem_reg_req,
+ empty);
+DECLARE_DRV_CMD(zxdh_ucreate_ah, IB_USER_VERBS_CMD_CREATE_AH, empty,
+ zxdh_create_ah_resp);
+DECLARE_DRV_CMD(zxdh_ucreate_srq, IB_USER_VERBS_CMD_CREATE_SRQ,
+ zxdh_create_srq_req, zxdh_create_srq_resp);
+#endif /* PROVIDER_ZXDH_ABI_H */
diff --git a/providers/zrdma/zxdh_defs.h b/providers/zrdma/zxdh_defs.h
new file mode 100644
index 0000000..eaf73ca
--- /dev/null
+++ b/providers/zrdma/zxdh_defs.h
@@ -0,0 +1,399 @@
+/* SPDX-License-Identifier: GPL-2.0 or Linux-OpenIB */
+/* Copyright (c) 2024 ZTE Corporation. All rights reserved. */
+#ifndef ZXDH_DEFS_H
+#define ZXDH_DEFS_H
+#include <stdbool.h>
+#include <stdio.h>
+#include <string.h>
+#include <util/udma_barrier.h>
+#include <util/util.h>
+#include <linux/types.h>
+#include <inttypes.h>
+#include <pthread.h>
+#include <endian.h>
+#define ZXDH_RECV_ERR_FLAG_NAK_RNR_NAK 1
+#define ZXDH_RECV_ERR_FLAG_READ_RESP 2
+#define ZXDH_RETRY_CQE_SQ_OPCODE_ERR 32
+#define ZXDH_QP_RETRY_COUNT 2
+#define ZXDH_RESET_RETRY_CQE_SQ_OPCODE_ERR 0x1f
+
+#define ZXDH_QP_TYPE_ROCE_RC 1
+#define ZXDH_QP_TYPE_ROCE_UD 2
+
+#define ZXDH_HW_PAGE_SIZE 4096
+#define ZXDH_HW_PAGE_SHIFT 12
+#define ZXDH_CQE_QTYPE_RQ 0
+#define ZXDH_CQE_QTYPE_SQ 1
+
+#define ZXDH_MAX_SQ_WQES_PER_PAGE 128
+#define ZXDH_MAX_SQ_DEPTH 32768
+
+#define ZXDH_QP_SW_MIN_WQSIZE 64u /* in WRs*/
+#define ZXDH_QP_WQE_MIN_SIZE 32
+#define ZXDH_QP_SQE_MIN_SIZE 32
+#define ZXDH_QP_RQE_MIN_SIZE 16
+#define ZXDH_QP_WQE_MAX_SIZE 256
+#define ZXDH_QP_WQE_MIN_QUANTA 1
+#define ZXDH_MAX_RQ_WQE_SHIFT_GEN1 2
+#define ZXDH_MAX_RQ_WQE_SHIFT_GEN2 3
+#define ZXDH_SRQ_FRAG_BYTESIZE 16
+#define ZXDH_QP_FRAG_BYTESIZE 16
+#define ZXDH_SQ_WQE_BYTESIZE 32
+#define ZXDH_SRQ_WQE_MIN_SIZE 16
+
+#define ZXDH_SQ_RSVD 258
+#define ZXDH_RQ_RSVD 1
+#define ZXDH_SRQ_RSVD 1
+
+#define ZXDH_FEATURE_RTS_AE 1ULL
+#define ZXDH_FEATURE_CQ_RESIZE 2ULL
+#define ZXDHQP_OP_RDMA_WRITE 0x00
+#define ZXDHQP_OP_RDMA_READ 0x01
+#define ZXDHQP_OP_RDMA_SEND 0x03
+#define ZXDHQP_OP_RDMA_SEND_INV 0x04
+#define ZXDHQP_OP_RDMA_SEND_SOL_EVENT 0x05
+#define ZXDHQP_OP_RDMA_SEND_SOL_EVENT_INV 0x06
+#define ZXDHQP_OP_BIND_MW 0x08
+#define ZXDHQP_OP_FAST_REGISTER 0x09
+#define ZXDHQP_OP_LOCAL_INVALIDATE 0x0a
+#define ZXDHQP_OP_RDMA_READ_LOC_INV 0x0b
+#define ZXDHQP_OP_NOP 0x0c
+
+#define ZXDH_CQPHC_QPCTX GENMASK_ULL(63, 0)
+#define ZXDH_QP_DBSA_HW_SQ_TAIL GENMASK_ULL(14, 0)
+#define ZXDH_CQ_DBSA_CQEIDX GENMASK_ULL(22, 0)
+#define ZXDH_CQ_DBSA_SW_CQ_SELECT GENMASK_ULL(28, 23)
+#define ZXDH_CQ_DBSA_ARM_NEXT BIT_ULL(31)
+// #define ZXDH_CQ_DBSA_ARM_NEXT_SE BIT_ULL(15)
+#define ZXDH_CQ_DBSA_ARM_SEQ_NUM GENMASK_ULL(30, 29)
+#define ZXDH_CQ_ARM_CQ_ID_S 10
+#define ZXDH_CQ_ARM_CQ_ID GENMASK_ULL(29, 10)
+#define ZXDH_CQ_ARM_DBSA_VLD_S 30
+#define ZXDH_CQ_ARM_DBSA_VLD BIT_ULL(30)
+
+/* CQP and iWARP Completion Queue */
+#define ZXDH_CQ_QPCTX ZXDH_CQPHC_QPCTX
+
+#define ZXDH_CQ_MINERR GENMASK_ULL(22, 7)
+#define ZXDH_CQ_MAJERR GENMASK_ULL(38, 23)
+#define ZXDH_CQ_WQEIDX GENMASK_ULL(54, 40)
+#define ZXDH_CQ_EXTCQE BIT_ULL(50)
+#define ZXDH_OOO_CMPL BIT_ULL(54)
+#define ZXDH_CQ_ERROR BIT_ULL(39)
+#define ZXDH_CQ_SQ BIT_ULL(4)
+
+#define ZXDH_CQ_VALID BIT_ULL(5)
+#define ZXDH_CQ_IMMVALID BIT_ULL(0)
+#define ZXDH_CQ_UDSMACVALID BIT_ULL(26)
+#define ZXDH_CQ_UDVLANVALID BIT_ULL(27)
+#define ZXDH_CQ_IMMDATA GENMASK_ULL(31, 0)
+#define ZXDH_CQ_UDSMAC GENMASK_ULL(47, 0)
+#define ZXDH_CQ_UDVLAN GENMASK_ULL(63, 48)
+
+#define ZXDH_CQ_IMMDATA_S 0
+#define ZXDH_CQ_IMMDATA_M (0xffffffffffffffffULL << ZXDH_CQ_IMMVALID_S)
+#define ZXDH_CQ_IMMDATALOW32 GENMASK_ULL(31, 0)
+#define ZXDH_CQ_IMMDATAUP32 GENMASK_ULL(63, 32)
+#define ZXDHCQ_PAYLDLEN GENMASK_ULL(63, 32)
+#define ZXDHCQ_TCPSEQNUMRTT GENMASK_ULL(63, 32)
+#define ZXDHCQ_INVSTAG_S 11
+#define ZXDHCQ_INVSTAG GENMASK_ULL(42, 11)
+#define ZXDHCQ_QPID GENMASK_ULL(63, 44)
+
+#define ZXDHCQ_UDSRCQPN GENMASK_ULL(24, 1)
+#define ZXDHCQ_PSHDROP BIT_ULL(51)
+#define ZXDHCQ_STAG_S 43
+#define ZXDHCQ_STAG BIT_ULL(43)
+#define ZXDHCQ_IPV4 BIT_ULL(25)
+#define ZXDHCQ_SOEVENT BIT_ULL(6)
+#define ZXDHCQ_OP GENMASK_ULL(63, 58)
+
+/* Manage Push Page - MPP */
+#define ZXDH_INVALID_PUSH_PAGE_INDEX_GEN_1 0xffff
+#define ZXDH_INVALID_PUSH_PAGE_INDEX 0xffffffff
+
+#define ZXDHQPSQ_OPCODE GENMASK_ULL(62, 57)
+#define ZXDHQPSQ_COPY_HOST_PBL BIT_ULL(43)
+#define ZXDHQPSQ_ADDFRAGCNT GENMASK_ULL(39, 32)
+#define ZXDHQPSQ_PUSHWQE BIT_ULL(56)
+#define ZXDHQPSQ_STREAMMODE BIT_ULL(58)
+#define ZXDHQPSQ_WAITFORRCVPDU BIT_ULL(59)
+#define ZXDHQPSQ_READFENCE BIT_ULL(54)
+#define ZXDHQPSQ_LOCALFENCE BIT_ULL(55)
+#define ZXDHQPSQ_UDPHEADER BIT_ULL(61)
+#define ZXDHQPSQ_L4LEN GENMASK_ULL(45, 42)
+#define ZXDHQPSQ_SIGCOMPL BIT_ULL(56)
+#define ZXDHQPSQ_SOLICITED BIT_ULL(53)
+#define ZXDHQPSQ_VALID BIT_ULL(63)
+
+#define ZXDHQPSQ_FIRST_FRAG_VALID BIT_ULL(0)
+#define ZXDHQPSQ_FIRST_FRAG_LEN GENMASK_ULL(31, 1)
+#define ZXDHQPSQ_FIRST_FRAG_STAG GENMASK_ULL(63, 32)
+#define ZXDHQPSQ_FRAG_TO ZXDH_CQPHC_QPCTX
+#define ZXDHQPSQ_FRAG_VALID BIT_ULL(63)
+#define ZXDHQPSQ_FRAG_LEN GENMASK_ULL(62, 32)
+#define ZXDHQPSQ_FRAG_STAG GENMASK_ULL(31, 0)
+#define ZXDHQPSQ_GEN1_FRAG_LEN GENMASK_ULL(31, 0)
+#define ZXDHQPSQ_GEN1_FRAG_STAG GENMASK_ULL(63, 32)
+#define ZXDHQPSQ_REMSTAGINV GENMASK_ULL(31, 0)
+#define ZXDHQPSQ_DESTQKEY GENMASK_ULL(31, 0)
+#define ZXDHQPSQ_DESTQPN GENMASK_ULL(55, 32)
+#define ZXDHQPSQ_AHID GENMASK_ULL(18, 0)
+#define ZXDHQPSQ_INLINEDATAFLAG BIT_ULL(63)
+#define ZXDHQPSQ_UD_INLINEDATAFLAG BIT_ULL(50)
+#define ZXDHQPSQ_UD_INLINEDATALEN GENMASK_ULL(49, 42)
+#define ZXDHQPSQ_UD_ADDFRAGCNT GENMASK_ULL(36, 29)
+#define ZXDHQPSQ_WRITE_INLINEDATAFLAG BIT_ULL(48)
+#define ZXDHQPSQ_WRITE_INLINEDATALEN GENMASK_ULL(47, 40)
+
+#define ZXDH_INLINE_VALID_S 7
+#define ZXDHQPSQ_INLINE_VALID BIT_ULL(63)
+#define ZXDHQPSQ_INLINEDATALEN GENMASK_ULL(62, 55)
+#define ZXDHQPSQ_IMMDATAFLAG BIT_ULL(52)
+#define ZXDHQPSQ_REPORTRTT BIT_ULL(46)
+
+#define ZXDHQPSQ_IMMDATA GENMASK_ULL(31, 0)
+#define ZXDHQPSQ_REMSTAG_S 0
+#define ZXDHQPSQ_REMSTAG GENMASK_ULL(31, 0)
+
+#define ZXDHQPSQ_REMTO ZXDH_CQPHC_QPCTX
+
+#define ZXDHQPSQ_IMMDATA_VALID BIT_ULL(63)
+#define ZXDHQPSQ_STAGRIGHTS GENMASK_ULL(50, 46)
+#define ZXDHQPSQ_VABASEDTO BIT_ULL(51)
+#define ZXDHQPSQ_MEMWINDOWTYPE BIT_ULL(52)
+
+#define ZXDHQPSQ_MWLEN ZXDH_CQPHC_QPCTX
+#define ZXDHQPSQ_PARENTMRSTAG GENMASK_ULL(31, 0)
+#define ZXDHQPSQ_MWSTAG GENMASK_ULL(31, 0)
+#define ZXDHQPSQ_MW_PA_PBLE_ONE GENMASK_ULL(63, 46)
+#define ZXDHQPSQ_MW_PA_PBLE_TWO GENMASK_ULL(63, 32)
+#define ZXDHQPSQ_MW_PA_PBLE_THREE GENMASK_ULL(33, 32)
+#define ZXDHQPSQ_MW_HOST_PAGE_SIZE GENMASK_ULL(40, 36)
+#define ZXDHQPSQ_MW_LEAF_PBL_SIZE GENMASK_ULL(35, 34)
+#define ZXDHQPSQ_MW_LEVLE2_FIRST_PBLE_INDEX GENMASK_ULL(41, 32)
+#define ZXDHQPSQ_MW_LEVLE2_ROOT_PBLE_INDEX GENMASK_ULL(50, 42)
+
+#define ZXDHQPSQ_BASEVA_TO_FBO ZXDH_CQPHC_QPCTX
+
+#define ZXDHQPSQ_LOCSTAG GENMASK_ULL(31, 0)
+
+#define ZXDHQPSRQ_RSV GENMASK_ULL(63, 40)
+#define ZXDHQPSRQ_VALID_SGE_NUM GENMASK_ULL(39, 32)
+#define ZXDHQPSRQ_SIGNATURE GENMASK_ULL(31, 24)
+#define ZXDHQPSRQ_NEXT_WQE_INDEX GENMASK_ULL(15, 0)
+#define ZXDHQPSRQ_START_PADDING BIT_ULL(63)
+#define ZXDHQPSRQ_FRAG_LEN GENMASK_ULL(62, 32)
+#define ZXDHQPSRQ_FRAG_STAG GENMASK_ULL(31, 0)
+
+/* QP RQ WQE common fields */
+#define ZXDHQPRQ_SIGNATURE GENMASK_ULL(31, 16)
+#define ZXDHQPRQ_ADDFRAGCNT ZXDHQPSQ_ADDFRAGCNT
+#define ZXDHQPRQ_VALID ZXDHQPSQ_VALID
+#define ZXDHQPRQ_COMPLCTX ZXDH_CQPHC_QPCTX
+#define ZXDHQPRQ_FRAG_LEN ZXDHQPSQ_FRAG_LEN
+#define ZXDHQPRQ_STAG ZXDHQPSQ_FRAG_STAG
+#define ZXDHQPRQ_TO ZXDHQPSQ_FRAG_TO
+
+//QP RQ DBSA fields
+#define ZXDHQPDBSA_RQ_POLARITY_S 15
+#define ZXDHQPDBSA_RQ_POLARITY BIT_ULL(15)
+#define ZXDHQPDBSA_RQ_SW_HEAD_S 0
+#define ZXDHQPDBSA_RQ_SW_HEAD GENMASK_ULL(14, 0)
+
+#define ZXDHPFINT_OICR_HMC_ERR_M BIT(26)
+#define ZXDHPFINT_OICR_PE_PUSH_M BIT(27)
+#define ZXDHPFINT_OICR_PE_CRITERR_M BIT(28)
+
+#define ZXDH_SRQ_PARITY_SIGN_S 15
+#define ZXDH_SRQ_PARITY_SIGN BIT_ULL(15)
+#define ZXDH_SRQ_SW_SRQ_HEAD_S 0
+#define ZXDH_SRQ_SW_SRQ_HEAD GENMASK_ULL(14, 0)
+#define ZXDH_CQE_SQ_OPCODE_RESET BIT(5)
+
+#define ZXDH_CQP_INIT_WQE(wqe) memset(wqe, 0, 64)
+
+#define ZXDH_GET_CURRENT_CQ_ELEM(_cq) \
+ ((_cq)->cq_base[ZXDH_RING_CURRENT_HEAD((_cq)->cq_ring)].buf)
+#define ZXDH_GET_CURRENT_EXTENDED_CQ_ELEM(_cq) \
+ (((struct zxdh_extended_cqe \
+ *)((_cq)->cq_base))[ZXDH_RING_CURRENT_HEAD((_cq)->cq_ring)] \
+ .buf)
+
+#define ZXDH_RING_INIT(_ring, _size) \
+ { \
+ (_ring).head = 0; \
+ (_ring).tail = 0; \
+ (_ring).size = (_size); \
+ }
+#define ZXDH_RING_SIZE(_ring) ((_ring).size)
+#define ZXDH_RING_CURRENT_HEAD(_ring) ((_ring).head)
+#define ZXDH_RING_CURRENT_TAIL(_ring) ((_ring).tail)
+
+#define ZXDH_RING_MOVE_HEAD(_ring, _retcode) \
+ { \
+ register __u32 size; \
+ size = (_ring).size; \
+ if (!ZXDH_RING_FULL_ERR(_ring)) { \
+ (_ring).head = ((_ring).head + 1) % size; \
+ (_retcode) = 0; \
+ } else { \
+ (_retcode) = ZXDH_ERR_RING_FULL; \
+ } \
+ }
+#define ZXDH_RING_MOVE_HEAD_BY_COUNT(_ring, _count, _retcode) \
+ { \
+ register __u32 size; \
+ size = (_ring).size; \
+ if ((ZXDH_RING_USED_QUANTA(_ring) + (_count)) < size) { \
+ (_ring).head = ((_ring).head + (_count)) % size; \
+ (_retcode) = 0; \
+ } else { \
+ (_retcode) = ZXDH_ERR_RING_FULL; \
+ } \
+ }
+#define ZXDH_SQ_RING_MOVE_HEAD(_ring, _retcode) \
+ { \
+ register __u32 size; \
+ size = (_ring).size; \
+ if (!ZXDH_SQ_RING_FULL_ERR(_ring)) { \
+ (_ring).head = ((_ring).head + 1) % size; \
+ (_retcode) = 0; \
+ } else { \
+ (_retcode) = ZXDH_ERR_RING_FULL; \
+ } \
+ }
+#define ZXDH_SQ_RING_MOVE_HEAD_BY_COUNT(_ring, _count, _retcode) \
+ { \
+ register __u32 size; \
+ size = (_ring).size; \
+ if ((ZXDH_RING_USED_QUANTA(_ring) + (_count)) < \
+ (size - 256)) { \
+ (_ring).head = ((_ring).head + (_count)) % size; \
+ (_retcode) = 0; \
+ } else { \
+ (_retcode) = ZXDH_ERR_RING_FULL; \
+ } \
+ }
+#define ZXDH_RING_MOVE_HEAD_BY_COUNT_NOCHECK(_ring, _count) \
+ (_ring).head = ((_ring).head + (_count)) % (_ring).size
+
+#define ZXDH_RING_MOVE_TAIL(_ring) \
+ (_ring).tail = ((_ring).tail + 1) % (_ring).size
+
+#define ZXDH_RING_MOVE_HEAD_NOCHECK(_ring) \
+ (_ring).head = ((_ring).head + 1) % (_ring).size
+
+#define ZXDH_RING_MOVE_TAIL_BY_COUNT(_ring, _count) \
+ (_ring).tail = ((_ring).tail + (_count)) % (_ring).size
+
+#define ZXDH_RING_SET_TAIL(_ring, _pos) (_ring).tail = (_pos) % (_ring).size
+
+#define ZXDH_RING_FULL_ERR(_ring) \
+ ((ZXDH_RING_USED_QUANTA(_ring) == ((_ring).size - 1)))
+
+#define ZXDH_ERR_RING_FULL2(_ring) \
+ ((ZXDH_RING_USED_QUANTA(_ring) == ((_ring).size - 2)))
+
+#define ZXDH_ERR_RING_FULL3(_ring) \
+ ((ZXDH_RING_USED_QUANTA(_ring) == ((_ring).size - 3)))
+
+#define ZXDH_SQ_RING_FULL_ERR(_ring) \
+ ((ZXDH_RING_USED_QUANTA(_ring) == ((_ring).size - 257)))
+
+#define ZXDH_ERR_SQ_RING_FULL2(_ring) \
+ ((ZXDH_RING_USED_QUANTA(_ring) == ((_ring).size - 258)))
+#define ZXDH_ERR_SQ_RING_FULL3(_ring) \
+ ((ZXDH_RING_USED_QUANTA(_ring) == ((_ring).size - 259)))
+#define ZXDH_RING_MORE_WORK(_ring) ((ZXDH_RING_USED_QUANTA(_ring) != 0))
+
+#define ZXDH_RING_USED_QUANTA(_ring) \
+ ((((_ring).head + (_ring).size - (_ring).tail) % (_ring).size))
+
+#define ZXDH_RING_FREE_QUANTA(_ring) \
+ (((_ring).size - ZXDH_RING_USED_QUANTA(_ring) - 1))
+
+#define ZXDH_SQ_RING_FREE_QUANTA(_ring) \
+ (((_ring).size - ZXDH_RING_USED_QUANTA(_ring) - 257))
+
+#define ZXDH_ATOMIC_RING_MOVE_HEAD(_ring, index, _retcode) \
+ { \
+ index = ZXDH_RING_CURRENT_HEAD(_ring); \
+ ZXDH_RING_MOVE_HEAD(_ring, _retcode); \
+ }
+
+enum zxdh_qp_wqe_size {
+ ZXDH_WQE_SIZE_32 = 32,
+ ZXDH_WQE_SIZE_64 = 64,
+ ZXDH_WQE_SIZE_96 = 96,
+ ZXDH_WQE_SIZE_128 = 128,
+ ZXDH_WQE_SIZE_256 = 256,
+};
+
+/**
+ * set_64bit_val - set 64 bit value to hw wqe
+ * @wqe_words: wqe addr to write
+ * @byte_index: index in wqe
+ * @val: value to write
+ **/
+static inline void set_64bit_val(__le64 *wqe_words, __u32 byte_index, __u64 val)
+{
+ wqe_words[byte_index >> 3] = htole64(val);
+}
+
+/**
+ * set_32bit_val - set 32 bit value to hw wqe
+ * @wqe_words: wqe addr to write
+ * @byte_index: index in wqe
+ * @val: value to write
+ **/
+static inline void set_32bit_val(__le32 *wqe_words, __u32 byte_index, __u32 val)
+{
+ wqe_words[byte_index >> 2] = htole32(val);
+}
+
+/**
+ * set_16bit_val - set 16 bit value to hw wqe
+ * @wqe_words: wqe addr to write
+ * @byte_index: index in wqe
+ * @val: value to write
+ **/
+static inline void set_16bit_val(__le16 *wqe_words, __u32 byte_index, __u16 val)
+{
+ wqe_words[byte_index >> 1] = htole16(val);
+}
+
+/**
+ * get_64bit_val - read 64 bit value from wqe
+ * @wqe_words: wqe addr
+ * @byte_index: index to read from
+ * @val: read value
+ **/
+static inline void get_64bit_val(__le64 *wqe_words, __u32 byte_index,
+ __u64 *val)
+{
+ *val = le64toh(wqe_words[byte_index >> 3]);
+}
+
+/**
+ * get_32bit_val - read 32 bit value from wqe
+ * @wqe_words: wqe addr
+ * @byte_index: index to reaad from
+ * @val: return 32 bit value
+ **/
+static inline void get_32bit_val(__le32 *wqe_words, __u32 byte_index,
+ __u32 *val)
+{
+ *val = le32toh(wqe_words[byte_index >> 2]);
+}
+
+static inline void db_wr32(__u32 val, __u32 *wqe_word)
+{
+ *wqe_word = val;
+}
+
+#define read_wqe_need_split(pre_cal_psn, next_psn) \
+ (((pre_cal_psn < next_psn) && (pre_cal_psn != 0)) || \
+ ((next_psn <= 0x7FFFFF) && (pre_cal_psn > 0x800000)))
+
+#endif /* ZXDH_DEFS_H */
diff --git a/providers/zrdma/zxdh_devids.h b/providers/zrdma/zxdh_devids.h
new file mode 100644
index 0000000..ac23124
--- /dev/null
+++ b/providers/zrdma/zxdh_devids.h
@@ -0,0 +1,17 @@
+/* SPDX-License-Identifier: GPL-2.0 or Linux-OpenIB */
+/* Copyright (c) 2024 ZTE Corporation. All rights reserved. */
+#ifndef ZXDH_DEVIDS_H
+#define ZXDH_DEVIDS_H
+
+/* ZXDH VENDOR ID */
+#define PCI_VENDOR_ID_ZXDH_EVB 0x16c3
+#define PCI_VENDOR_ID_ZXDH_E312 0x1cf2
+#define PCI_VENDOR_ID_ZXDH_X512 0x1cf2
+/* ZXDH Devices ID */
+#define ZXDH_DEV_ID_ADAPTIVE_EVB_PF 0x8040 /* ZXDH EVB PF DEVICE ID*/
+#define ZXDH_DEV_ID_ADAPTIVE_EVB_VF 0x8041 /* ZXDH EVB VF DEVICE ID*/
+#define ZXDH_DEV_ID_ADAPTIVE_E312_PF 0x8049 /* ZXDH E312 PF DEVICE ID*/
+#define ZXDH_DEV_ID_ADAPTIVE_E312_VF 0x8060 /* ZXDH E312 VF DEVICE ID*/
+#define ZXDH_DEV_ID_ADAPTIVE_X512_PF 0x806B /* ZXDH X512 PF DEVICE ID*/
+#define ZXDH_DEV_ID_ADAPTIVE_X512_VF 0x806C /* ZXDH X512 VF DEVICE ID*/
+#endif /* ZXDH_DEVIDS_H */
diff --git a/providers/zrdma/zxdh_dv.h b/providers/zrdma/zxdh_dv.h
new file mode 100644
index 0000000..bb7a845
--- /dev/null
+++ b/providers/zrdma/zxdh_dv.h
@@ -0,0 +1,75 @@
+/* SPDX-License-Identifier: GPL-2.0 or Linux-OpenIB */
+/* Copyright (c) 2024 ZTE Corporation. All rights reserved. */
+#ifndef _ZXDH_DV_H_
+#define _ZXDH_DV_H_
+
+#include <stdio.h>
+#include <stdbool.h>
+#include <linux/types.h> /* For the __be64 type */
+#include <sys/types.h>
+#include <endian.h>
+#if defined(__SSE3__)
+#include <limits.h>
+#include <emmintrin.h>
+#include <tmmintrin.h>
+#endif /* defined(__SSE3__) */
+
+#include <infiniband/verbs.h>
+#include <infiniband/tm_types.h>
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+enum switch_status {
+ SWITCH_CLOSE = 0,
+ SWITCH_OPEN = 1,
+ SWITCH_ERROR,
+};
+
+enum zxdh_qp_reset_qp_code {
+ ZXDH_RESET_RETRY_TX_ITEM_FLAG = 1,
+};
+
+enum zxdh_qp_modify_qpc_mask {
+ ZXDH_RETRY_CQE_SQ_OPCODE = 1 << 0,
+ ZXDH_ERR_FLAG_SET = 1 << 1,
+ ZXDH_PACKAGE_ERR_FLAG = 1 << 2,
+ ZXDH_TX_LAST_ACK_PSN = 1 << 3,
+ ZXDH_TX_LAST_ACK_WQE_OFFSET_SET = 1 << 4,
+ ZXDH_TX_READ_RETRY_FLAG_SET = 1 << 5,
+ ZXDH_TX_RDWQE_PYLD_LENGTH = 1 << 6,
+ ZXDH_TX_RECV_READ_FLAG_SET = 1 << 7,
+ ZXDH_TX_RD_MSG_LOSS_ERR_FLAG_SET = 1 << 8,
+};
+
+struct zxdh_rdma_qpc {
+ uint8_t retry_flag;
+ uint8_t rnr_retry_flag;
+ uint8_t read_retry_flag;
+ uint8_t cur_retry_count;
+ uint8_t retry_cqe_sq_opcode;
+ uint8_t err_flag;
+ uint8_t ack_err_flag;
+ uint8_t package_err_flag;
+ uint8_t recv_err_flag;
+ uint32_t tx_last_ack_psn;
+ uint8_t retry_count;
+};
+
+int zxdh_get_log_trace_switch(struct ibv_context *context,
+ enum switch_status *status);
+int zxdh_set_log_trace_switch(struct ibv_context *context,
+ enum switch_status status);
+int zxdh_modify_qp_udp_sport(struct ibv_context *context, uint16_t udp_sport,
+ uint32_t qpn);
+int zxdh_query_qpc(struct ibv_qp *qp, struct zxdh_rdma_qpc *qpc);
+int zxdh_modify_qpc(struct ibv_qp *qp, struct zxdh_rdma_qpc *qpc,
+ uint64_t qpc_mask);
+int zxdh_reset_qp(struct ibv_qp *qp, uint64_t opcode);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif
diff --git a/providers/zrdma/zxdh_hw.c b/providers/zrdma/zxdh_hw.c
new file mode 100644
index 0000000..ed577a9
--- /dev/null
+++ b/providers/zrdma/zxdh_hw.c
@@ -0,0 +1,2596 @@
+// SPDX-License-Identifier: GPL-2.0 or Linux-OpenIB
+/* Copyright (c) 2024 ZTE Corporation. All rights reserved. */
+#include "zxdh_status.h"
+#include "zxdh_defs.h"
+#include "zxdh_verbs.h"
+#include "main.h"
+#include <errno.h>
+#include <ccan/container_of.h>
+#include "private_verbs_cmd.h"
+#include <stdlib.h>
+#include <stdio.h>
+#include <unistd.h>
+#include <stdbool.h>
+#include <string.h>
+#include <util/udma_barrier.h>
+#include <util/util.h>
+#include <linux/types.h>
+#include <inttypes.h>
+#include <pthread.h>
+#include <endian.h>
+#define ERROR_CODE_VALUE 65
+
+static void qp_tx_psn_add(__u32 *x, __u32 y, __u16 mtu)
+{
+ if (y == 0) {
+ *x = (*x + 1) & 0xffffff;
+ return;
+ }
+ *x = (*x + ((y % mtu) ? (y / mtu + 1) : y / mtu)) & 0xffffff;
+}
+
+int zxdh_get_write_imm_split_switch(void)
+{
+ char *env;
+ env = getenv("ZXDH_WRITE_IMM_SPILT_ENABLE");
+ return (env != NULL) ? atoi(env) : 0;
+}
+
+/**
+ * zxdh_set_fragment - set fragment in wqe
+ * @wqe: wqe for setting fragment
+ * @offset: offset value
+ * @sge: sge length and stag
+ * @valid: The wqe valid
+ */
+static void zxdh_set_fragment(__le64 *wqe, __u32 offset, struct zxdh_sge *sge,
+ __u8 valid)
+{
+ if (sge) {
+ set_64bit_val(wqe, offset + 8,
+ FIELD_PREP(ZXDHQPSQ_FRAG_TO, sge->tag_off));
+ set_64bit_val(wqe, offset,
+ FIELD_PREP(ZXDHQPSQ_VALID, valid) |
+ FIELD_PREP(ZXDHQPSQ_FRAG_LEN, sge->len) |
+ FIELD_PREP(ZXDHQPSQ_FRAG_STAG,
+ sge->stag));
+ } else {
+ set_64bit_val(wqe, offset + 8, 0);
+ set_64bit_val(wqe, offset, FIELD_PREP(ZXDHQPSQ_VALID, valid));
+ }
+}
+
+/**
+ * zxdh_nop_1 - insert a NOP wqe
+ * @qp: hw qp ptr
+ */
+static enum zxdh_status_code zxdh_nop_1(struct zxdh_qp *qp)
+{
+ __u64 hdr;
+ __le64 *wqe;
+ __u32 wqe_idx;
+ bool signaled = false;
+
+ if (!qp->sq_ring.head)
+ return ZXDH_ERR_PARAM;
+
+ wqe_idx = ZXDH_RING_CURRENT_HEAD(qp->sq_ring);
+ wqe = qp->sq_base[wqe_idx].elem;
+
+ qp->sq_wrtrk_array[wqe_idx].quanta = ZXDH_QP_WQE_MIN_QUANTA;
+
+ set_64bit_val(wqe, 8, 0);
+ set_64bit_val(wqe, 16, 0);
+ set_64bit_val(wqe, 24, 0);
+
+ hdr = FIELD_PREP(ZXDHQPSQ_OPCODE, ZXDH_OP_TYPE_NOP) |
+ FIELD_PREP(ZXDHQPSQ_SIGCOMPL, signaled) |
+ FIELD_PREP(ZXDHQPSQ_VALID, qp->swqe_polarity);
+
+ /* make sure WQE is written before valid bit is set */
+ udma_to_device_barrier();
+
+ set_64bit_val(wqe, 0, hdr);
+
+ return 0;
+}
+
+/**
+ * zxdh_clr_wqes - clear next 128 sq entries
+ * @qp: hw qp ptr
+ * @qp_wqe_idx: wqe_idx
+ */
+void zxdh_clr_wqes(struct zxdh_qp *qp, __u32 qp_wqe_idx)
+{
+ __le64 *wqe;
+ __u32 wqe_idx;
+
+ if (!(qp_wqe_idx & 0x7F)) {
+ wqe_idx = (qp_wqe_idx + 128) % qp->sq_ring.size;
+ wqe = qp->sq_base[wqe_idx].elem;
+ if (wqe_idx)
+ memset(wqe, qp->swqe_polarity ? 0 : 0xFF, 0x1000);
+ else
+ memset(wqe, qp->swqe_polarity ? 0xFF : 0, 0x1000);
+ }
+}
+
+/**
+ * zxdh_qp_post_wr - ring doorbell
+ * @qp: hw qp ptr
+ */
+void zxdh_qp_post_wr(struct zxdh_qp *qp)
+{
+ /* valid bit is written before ringing doorbell */
+ udma_to_device_barrier();
+
+ db_wr32(qp->qp_id, qp->wqe_alloc_db);
+ qp->initial_ring.head = qp->sq_ring.head;
+}
+
+/**
+ * zxdh_qp_set_shadow_area - fill SW_RQ_Head
+ * @qp: hw qp ptr
+ */
+void zxdh_qp_set_shadow_area(struct zxdh_qp *qp)
+{
+ __u8 polarity = 0;
+
+ polarity = ((ZXDH_RING_CURRENT_HEAD(qp->rq_ring) == 0) ?
+ !qp->rwqe_polarity :
+ qp->rwqe_polarity);
+ set_64bit_val(qp->shadow_area, 0,
+ FIELD_PREP(ZXDHQPDBSA_RQ_POLARITY, polarity) |
+ FIELD_PREP(ZXDHQPDBSA_RQ_SW_HEAD,
+ ZXDH_RING_CURRENT_HEAD(qp->rq_ring)));
+}
+
+/**
+ * zxdh_qp_ring_push_db - ring qp doorbell
+ * @qp: hw qp ptr
+ * @wqe_idx: wqe index
+ */
+static void zxdh_qp_ring_push_db(struct zxdh_qp *qp, __u32 wqe_idx)
+{
+ set_32bit_val(qp->push_db, 0,
+ FIELD_PREP(ZXDH_WQEALLOC_WQE_DESC_INDEX, wqe_idx >> 3) |
+ qp->qp_id);
+ qp->initial_ring.head = qp->sq_ring.head;
+ qp->push_mode = true;
+ qp->push_dropped = false;
+}
+
+void zxdh_qp_push_wqe(struct zxdh_qp *qp, __le64 *wqe, __u16 quanta,
+ __u32 wqe_idx, bool post_sq)
+{
+ __le64 *push;
+
+ if (ZXDH_RING_CURRENT_HEAD(qp->initial_ring) !=
+ ZXDH_RING_CURRENT_TAIL(qp->sq_ring) &&
+ !qp->push_mode) {
+ if (post_sq)
+ zxdh_qp_post_wr(qp);
+ } else {
+ push = (__le64 *)((uintptr_t)qp->push_wqe +
+ (wqe_idx & 0x7) * 0x20);
+ memcpy(push, wqe, quanta * ZXDH_QP_WQE_MIN_SIZE);
+ zxdh_qp_ring_push_db(qp, wqe_idx);
+ }
+}
+
+/**
+ * zxdh_qp_get_next_send_wqe - pad with NOP if needed, return where next WR should go
+ * @qp: hw qp ptr
+ * @wqe_idx: return wqe index
+ * @quanta: size of WR in quanta
+ * @total_size: size of WR in bytes
+ * @info: info on WR
+ */
+__le64 *zxdh_qp_get_next_send_wqe(struct zxdh_qp *qp, __u32 *wqe_idx,
+ __u16 quanta, __u32 total_size,
+ struct zxdh_post_sq_info *info)
+{
+ __le64 *wqe;
+ __u16 avail_quanta;
+ __u16 i;
+
+ avail_quanta = ZXDH_MAX_SQ_WQES_PER_PAGE -
+ (ZXDH_RING_CURRENT_HEAD(qp->sq_ring) %
+ ZXDH_MAX_SQ_WQES_PER_PAGE);
+ if (quanta <= avail_quanta) {
+ /* WR fits in current chunk */
+ if (quanta > ZXDH_SQ_RING_FREE_QUANTA(qp->sq_ring))
+ return NULL;
+ } else {
+ /* Need to pad with NOP */
+ if (quanta + avail_quanta >
+ ZXDH_SQ_RING_FREE_QUANTA(qp->sq_ring))
+ return NULL;
+
+ for (i = 0; i < avail_quanta; i++) {
+ zxdh_nop_1(qp);
+ ZXDH_RING_MOVE_HEAD_NOCHECK(qp->sq_ring);
+ }
+ }
+
+ *wqe_idx = ZXDH_RING_CURRENT_HEAD(qp->sq_ring);
+ if (!*wqe_idx)
+ qp->swqe_polarity = !qp->swqe_polarity;
+
+ ZXDH_RING_MOVE_HEAD_BY_COUNT_NOCHECK(qp->sq_ring, quanta);
+
+ wqe = qp->sq_base[*wqe_idx].elem;
+ qp->sq_wrtrk_array[*wqe_idx].wrid = info->wr_id;
+ qp->sq_wrtrk_array[*wqe_idx].wr_len = total_size;
+ qp->sq_wrtrk_array[*wqe_idx].quanta = quanta;
+
+ return wqe;
+}
+
+/**
+ * zxdh_qp_get_next_recv_wqe - get next qp's rcv wqe
+ * @qp: hw qp ptr
+ * @wqe_idx: return wqe index
+ */
+__le64 *zxdh_qp_get_next_recv_wqe(struct zxdh_qp *qp, __u32 *wqe_idx)
+{
+ __le64 *wqe;
+ enum zxdh_status_code ret_code;
+
+ if (ZXDH_RING_FULL_ERR(qp->rq_ring))
+ return NULL;
+
+ ZXDH_ATOMIC_RING_MOVE_HEAD(qp->rq_ring, *wqe_idx, ret_code);
+ if (ret_code)
+ return NULL;
+
+ if (!*wqe_idx)
+ qp->rwqe_polarity = !qp->rwqe_polarity;
+ /* rq_wqe_size_multiplier is no of 16 byte quanta in one rq wqe */
+ wqe = qp->rq_base[*wqe_idx * qp->rq_wqe_size_multiplier].elem;
+
+ return wqe;
+}
+
+static enum zxdh_status_code
+zxdh_post_rdma_write(struct zxdh_qp *qp, struct zxdh_post_sq_info *info,
+ bool post_sq, __u32 total_size)
+{
+ enum zxdh_status_code ret_code;
+ struct zxdh_rdma_write *op_info;
+ __u32 i, byte_off = 0;
+ __u32 frag_cnt, addl_frag_cnt;
+ __le64 *wqe;
+ __u32 wqe_idx;
+ __u16 quanta;
+ __u64 hdr;
+ bool read_fence = false;
+ bool imm_data_flag;
+
+ op_info = &info->op.rdma_write;
+ imm_data_flag = info->imm_data_valid ? 1 : 0;
+ read_fence |= info->read_fence;
+
+ if (imm_data_flag)
+ frag_cnt =
+ op_info->num_lo_sges ? (op_info->num_lo_sges + 1) : 2;
+ else
+ frag_cnt = op_info->num_lo_sges;
+ addl_frag_cnt =
+ op_info->num_lo_sges > 1 ? (op_info->num_lo_sges - 1) : 0;
+
+ ret_code = zxdh_fragcnt_to_quanta_sq(frag_cnt, &quanta);
+ if (ret_code)
+ return ret_code;
+
+ wqe = zxdh_qp_get_next_send_wqe(qp, &wqe_idx, quanta, total_size, info);
+ if (!wqe)
+ return ZXDH_ERR_QP_TOOMANY_WRS_POSTED;
+
+ zxdh_clr_wqes(qp, wqe_idx);
+
+ if (op_info->num_lo_sges) {
+ set_64bit_val(
+ wqe, 16,
+ FIELD_PREP(ZXDHQPSQ_FIRST_FRAG_VALID,
+ op_info->lo_sg_list->len ==
+ ZXDH_MAX_SQ_PAYLOAD_SIZE ?
+ 1 :
+ 0) |
+ FIELD_PREP(ZXDHQPSQ_FIRST_FRAG_LEN,
+ op_info->lo_sg_list->len) |
+ FIELD_PREP(ZXDHQPSQ_FIRST_FRAG_STAG,
+ op_info->lo_sg_list->stag));
+ set_64bit_val(wqe, 8,
+ FIELD_PREP(ZXDHQPSQ_FRAG_TO,
+ op_info->lo_sg_list->tag_off));
+ } else {
+ /*if zero sge,post a special sge with zero lenth*/
+ set_64bit_val(wqe, 16,
+ FIELD_PREP(ZXDHQPSQ_FIRST_FRAG_VALID, 0) |
+ FIELD_PREP(ZXDHQPSQ_FIRST_FRAG_LEN, 0) |
+ FIELD_PREP(ZXDHQPSQ_FIRST_FRAG_STAG,
+ 0x100));
+ set_64bit_val(wqe, 8, FIELD_PREP(ZXDHQPSQ_FRAG_TO, 0));
+ }
+
+ if (imm_data_flag) {
+ byte_off = ZXDH_SQ_WQE_BYTESIZE + ZXDH_QP_FRAG_BYTESIZE;
+ if (op_info->num_lo_sges > 1) {
+ qp->wqe_ops.iw_set_fragment(wqe, byte_off,
+ &op_info->lo_sg_list[1],
+ qp->swqe_polarity);
+ byte_off += ZXDH_QP_FRAG_BYTESIZE;
+ }
+ set_64bit_val(
+ wqe, ZXDH_SQ_WQE_BYTESIZE,
+ FIELD_PREP(ZXDHQPSQ_IMMDATA_VALID, qp->swqe_polarity) |
+ FIELD_PREP(ZXDHQPSQ_IMMDATA, info->imm_data));
+ i = 2;
+ if (i < op_info->num_lo_sges) {
+ for (byte_off = ZXDH_SQ_WQE_BYTESIZE +
+ 2 * ZXDH_QP_FRAG_BYTESIZE;
+ i < op_info->num_lo_sges; i += 2) {
+ if (i == addl_frag_cnt) {
+ qp->wqe_ops.iw_set_fragment(
+ wqe, byte_off,
+ &op_info->lo_sg_list[i],
+ qp->swqe_polarity);
+ byte_off += ZXDH_QP_FRAG_BYTESIZE;
+ break;
+ }
+ byte_off += ZXDH_QP_FRAG_BYTESIZE;
+ qp->wqe_ops.iw_set_fragment(
+ wqe, byte_off,
+ &op_info->lo_sg_list[i + 1],
+ qp->swqe_polarity);
+ byte_off -= ZXDH_QP_FRAG_BYTESIZE;
+ qp->wqe_ops.iw_set_fragment(
+ wqe, byte_off, &op_info->lo_sg_list[i],
+ qp->swqe_polarity);
+ byte_off += 2 * ZXDH_QP_FRAG_BYTESIZE;
+ }
+ }
+ } else {
+ i = 1;
+ for (byte_off = ZXDH_SQ_WQE_BYTESIZE; i < op_info->num_lo_sges;
+ i += 2) {
+ if (i == addl_frag_cnt) {
+ qp->wqe_ops.iw_set_fragment(
+ wqe, byte_off, &op_info->lo_sg_list[i],
+ qp->swqe_polarity);
+ byte_off += ZXDH_QP_FRAG_BYTESIZE;
+ break;
+ }
+ byte_off += ZXDH_QP_FRAG_BYTESIZE;
+ qp->wqe_ops.iw_set_fragment(wqe, byte_off,
+ &op_info->lo_sg_list[i + 1],
+ qp->swqe_polarity);
+ byte_off -= ZXDH_QP_FRAG_BYTESIZE;
+ qp->wqe_ops.iw_set_fragment(wqe, byte_off,
+ &op_info->lo_sg_list[i],
+ qp->swqe_polarity);
+ byte_off += 2 * ZXDH_QP_FRAG_BYTESIZE;
+ }
+ }
+ /* if not an odd number set valid bit in next fragment */
+ if (!(frag_cnt & 0x01) && frag_cnt) {
+ qp->wqe_ops.iw_set_fragment(wqe, byte_off, NULL,
+ qp->swqe_polarity);
+ }
+
+ hdr = FIELD_PREP(ZXDHQPSQ_VALID, qp->swqe_polarity) |
+ FIELD_PREP(ZXDHQPSQ_OPCODE, info->op_type) |
+ FIELD_PREP(ZXDHQPSQ_SIGCOMPL, info->signaled) |
+ FIELD_PREP(ZXDHQPSQ_LOCALFENCE, info->local_fence) |
+ FIELD_PREP(ZXDHQPSQ_READFENCE, read_fence) |
+ FIELD_PREP(ZXDHQPSQ_SOLICITED, info->solicited) |
+ FIELD_PREP(ZXDHQPSQ_IMMDATAFLAG, imm_data_flag) |
+ FIELD_PREP(ZXDHQPSQ_ADDFRAGCNT, addl_frag_cnt) |
+ FIELD_PREP(ZXDHQPSQ_REMSTAG, op_info->rem_addr.stag);
+ set_64bit_val(wqe, 24,
+ FIELD_PREP(ZXDHQPSQ_FRAG_TO, op_info->rem_addr.tag_off));
+
+ udma_to_device_barrier(); /* make sure WQE is populated before valid bit is set */
+
+ set_64bit_val(wqe, 0, hdr);
+ if (post_sq)
+ zxdh_qp_post_wr(qp);
+ qp_tx_psn_add(&qp->next_psn, total_size, qp->mtu);
+ return 0;
+}
+
+static void split_write_imm_wqe(struct zxdh_qp *qp,
+ struct zxdh_post_sq_info *info,
+ struct zxdh_post_sq_info *split_part1_info,
+ struct zxdh_post_sq_info *split_part2_info)
+{
+ __u32 total_size = 0;
+ struct zxdh_rdma_write *op_info;
+
+ op_info = &info->op.rdma_write;
+ total_size = op_info->rem_addr.len;
+ split_part1_info->op.rdma_write.lo_sg_list =
+ info->op.rdma_write.lo_sg_list;
+ split_part2_info->op.rdma_write.lo_sg_list = NULL;
+
+ split_part1_info->op_type = ZXDH_OP_TYPE_WRITE;
+ split_part1_info->signaled = false;
+ split_part1_info->local_fence = info->local_fence;
+ split_part1_info->read_fence = info->read_fence;
+ split_part1_info->solicited = info->solicited;
+ split_part1_info->imm_data_valid = false;
+ split_part1_info->wr_id = info->wr_id;
+ split_part1_info->op.rdma_write.num_lo_sges =
+ info->op.rdma_write.num_lo_sges;
+ split_part1_info->op.rdma_write.rem_addr.stag = op_info->rem_addr.stag;
+ split_part1_info->op.rdma_write.rem_addr.tag_off =
+ op_info->rem_addr.tag_off;
+
+ split_part2_info->op_type = info->op_type;
+ split_part2_info->signaled = info->signaled;
+ split_part2_info->local_fence = info->local_fence;
+ split_part2_info->read_fence = info->read_fence;
+ split_part2_info->solicited = info->solicited;
+ split_part2_info->imm_data_valid = info->imm_data_valid;
+ split_part2_info->wr_id = info->wr_id;
+ split_part2_info->imm_data = info->imm_data;
+ split_part2_info->op.rdma_write.num_lo_sges = 0;
+ split_part2_info->op.rdma_write.rem_addr.stag = op_info->rem_addr.stag;
+ split_part2_info->op.rdma_write.rem_addr.tag_off =
+ op_info->rem_addr.tag_off + total_size;
+}
+
+/**
+ * zxdh_rdma_write - rdma write operation
+ * @qp: hw qp ptr
+ * @info: post sq information
+ * @post_sq: flag to post sq
+ */
+enum zxdh_status_code zxdh_rdma_write(struct zxdh_qp *qp,
+ struct zxdh_post_sq_info *info,
+ bool post_sq)
+{
+ struct zxdh_post_sq_info split_part1_info = { 0 };
+ struct zxdh_post_sq_info split_part2_info = { 0 };
+ struct zxdh_rdma_write *op_info;
+ struct zxdh_uqp *iwuqp;
+ struct zxdh_uvcontext *iwvctx;
+ __u32 i;
+ __u32 total_size = 0;
+ enum zxdh_status_code ret_code;
+ bool imm_data_flag = info->imm_data_valid ? 1 : 0;
+ iwuqp = container_of(qp, struct zxdh_uqp, qp);
+ iwvctx = container_of(iwuqp->vqp.qp.context, struct zxdh_uvcontext,
+ ibv_ctx.context);
+ op_info = &info->op.rdma_write;
+ if (op_info->num_lo_sges > qp->max_sq_frag_cnt)
+ return ZXDH_ERR_INVALID_FRAG_COUNT;
+
+ for (i = 0; i < op_info->num_lo_sges; i++) {
+ total_size += op_info->lo_sg_list[i].len;
+ if (0 != i && 0 == op_info->lo_sg_list[i].len)
+ return ZXDH_ERR_INVALID_FRAG_LEN;
+ }
+
+ if (total_size > ZXDH_MAX_SQ_PAYLOAD_SIZE)
+ return ZXDH_ERR_QP_INVALID_MSG_SIZE;
+
+ op_info->rem_addr.len = total_size;
+ if (iwvctx->zxdh_write_imm_split_switch == 0) {
+ ret_code = zxdh_post_rdma_write(qp, info, post_sq, total_size);
+ if (ret_code)
+ return ret_code;
+ } else {
+ if (imm_data_flag && total_size > qp->mtu) {
+ split_write_imm_wqe(qp, info, &split_part1_info,
+ &split_part2_info);
+
+ ret_code = zxdh_post_rdma_write(qp, &split_part1_info,
+ post_sq, total_size);
+ if (ret_code)
+ return ret_code;
+ ret_code = zxdh_post_rdma_write(qp, &split_part2_info,
+ post_sq, 0);
+ if (ret_code)
+ return ret_code;
+ } else {
+ ret_code = zxdh_post_rdma_write(qp, info, post_sq,
+ total_size);
+ if (ret_code)
+ return ret_code;
+ }
+ }
+
+ return 0;
+}
+
+static void split_two_part_info(struct zxdh_qp *qp,
+ struct zxdh_post_sq_info *info, __u32 ori_psn,
+ __u32 pre_cal_psn,
+ struct zxdh_post_sq_info *split_part1_info,
+ struct zxdh_post_sq_info *split_part2_info)
+{
+ __u32 total_size = 0;
+ __u32 remain_size = 0;
+ __u32 split_size = 0;
+ struct zxdh_rdma_read *op_info;
+
+ op_info = &info->op.rdma_read;
+ total_size = op_info->rem_addr.len;
+ split_part1_info->op.rdma_read.lo_sg_list = qp->split_sg_list;
+ split_part2_info->op.rdma_read.lo_sg_list =
+ qp->split_sg_list + op_info->num_lo_sges;
+
+ memset(split_part1_info->op.rdma_read.lo_sg_list, 0,
+ 2 * op_info->num_lo_sges * sizeof(struct zxdh_sge));
+ if (pre_cal_psn < ori_psn && pre_cal_psn != 0)
+ remain_size = (0xffffff - ori_psn + 1) * qp->mtu;
+ else
+ remain_size = (0x800000 - ori_psn) * qp->mtu;
+
+ split_size = total_size - remain_size;
+
+ split_part1_info->signaled = false;
+ split_part1_info->local_fence = info->local_fence;
+ split_part1_info->read_fence = info->read_fence;
+ split_part1_info->solicited = false;
+ split_part1_info->wr_id = info->wr_id;
+ split_part1_info->op.rdma_read.rem_addr.stag = op_info->rem_addr.stag;
+ split_part1_info->op.rdma_read.rem_addr.tag_off =
+ op_info->rem_addr.tag_off;
+
+ split_part2_info->signaled = info->signaled;
+ split_part2_info->local_fence = info->local_fence;
+ split_part2_info->read_fence = info->read_fence;
+ split_part2_info->solicited = info->solicited;
+ split_part2_info->wr_id = info->wr_id;
+ split_part2_info->op.rdma_read.rem_addr.stag = op_info->rem_addr.stag;
+ split_part2_info->op.rdma_read.rem_addr.tag_off =
+ op_info->rem_addr.tag_off + remain_size;
+
+ for (int i = 0; i < op_info->num_lo_sges; i++) {
+ if (op_info->lo_sg_list[i].len +
+ split_part1_info->op.rdma_read.rem_addr.len <
+ remain_size) {
+ split_part1_info->op.rdma_read.rem_addr.len +=
+ op_info->lo_sg_list[i].len;
+ split_part1_info->op.rdma_read.num_lo_sges += 1;
+ memcpy(split_part1_info->op.rdma_read.lo_sg_list + i,
+ op_info->lo_sg_list + i,
+ sizeof(struct zxdh_sge));
+ continue;
+ } else if (op_info->lo_sg_list[i].len +
+ split_part1_info->op.rdma_read.rem_addr.len ==
+ remain_size) {
+ split_part1_info->op.rdma_read.rem_addr.len +=
+ op_info->lo_sg_list[i].len;
+ split_part1_info->op.rdma_read.num_lo_sges += 1;
+ memcpy(split_part1_info->op.rdma_read.lo_sg_list + i,
+ op_info->lo_sg_list + i,
+ sizeof(struct zxdh_sge));
+ split_part2_info->op.rdma_read.rem_addr.len =
+ split_size;
+ split_part2_info->op.rdma_read.num_lo_sges =
+ op_info->num_lo_sges -
+ split_part1_info->op.rdma_read.num_lo_sges;
+ memcpy(split_part2_info->op.rdma_read.lo_sg_list,
+ op_info->lo_sg_list + i + 1,
+ split_part2_info->op.rdma_read.num_lo_sges *
+ sizeof(struct zxdh_sge));
+ break;
+ }
+
+ split_part1_info->op.rdma_read.lo_sg_list[i].len =
+ remain_size -
+ split_part1_info->op.rdma_read.rem_addr.len;
+ split_part1_info->op.rdma_read.lo_sg_list[i].tag_off =
+ op_info->lo_sg_list[i].tag_off;
+ split_part1_info->op.rdma_read.lo_sg_list[i].stag =
+ op_info->lo_sg_list[i].stag;
+ split_part1_info->op.rdma_read.rem_addr.len = remain_size;
+ split_part1_info->op.rdma_read.num_lo_sges += 1;
+ split_part2_info->op.rdma_read.lo_sg_list[0].len =
+ op_info->lo_sg_list[i].len -
+ split_part1_info->op.rdma_read.lo_sg_list[i].len;
+ split_part2_info->op.rdma_read.lo_sg_list[0].tag_off =
+ op_info->lo_sg_list[i].tag_off +
+ split_part1_info->op.rdma_read.lo_sg_list[i].len;
+ split_part2_info->op.rdma_read.lo_sg_list[0].stag =
+ op_info->lo_sg_list[i].stag;
+ split_part2_info->op.rdma_read.rem_addr.len = split_size;
+ split_part2_info->op.rdma_read.num_lo_sges =
+ op_info->num_lo_sges -
+ split_part1_info->op.rdma_read.num_lo_sges + 1;
+ if (split_part2_info->op.rdma_read.num_lo_sges - 1 > 0) {
+ memcpy(split_part2_info->op.rdma_read.lo_sg_list + 1,
+ op_info->lo_sg_list + i + 1,
+ (split_part2_info->op.rdma_read.num_lo_sges -
+ 1) * sizeof(struct zxdh_sge));
+ }
+ break;
+ }
+}
+
+static enum zxdh_status_code zxdh_post_rdma_read(struct zxdh_qp *qp,
+ struct zxdh_post_sq_info *info,
+ bool post_sq, __u32 total_size)
+{
+ enum zxdh_status_code ret_code;
+ struct zxdh_rdma_read *op_info;
+ __u32 i, byte_off = 0;
+ bool local_fence = false;
+ __u32 addl_frag_cnt;
+ __le64 *wqe;
+ __u32 wqe_idx;
+ __u16 quanta;
+ __u64 hdr;
+
+ op_info = &info->op.rdma_read;
+ ret_code = zxdh_fragcnt_to_quanta_sq(op_info->num_lo_sges, &quanta);
+ if (ret_code)
+ return ret_code;
+
+ wqe = zxdh_qp_get_next_send_wqe(qp, &wqe_idx, quanta, total_size, info);
+ if (!wqe)
+ return ZXDH_ERR_QP_TOOMANY_WRS_POSTED;
+
+ zxdh_clr_wqes(qp, wqe_idx);
+
+ addl_frag_cnt =
+ op_info->num_lo_sges > 1 ? (op_info->num_lo_sges - 1) : 0;
+ local_fence |= info->local_fence;
+
+ if (op_info->num_lo_sges) {
+ set_64bit_val(
+ wqe, 16,
+ FIELD_PREP(ZXDHQPSQ_FIRST_FRAG_VALID,
+ op_info->lo_sg_list->len ==
+ ZXDH_MAX_SQ_PAYLOAD_SIZE ?
+ 1 :
+ 0) |
+ FIELD_PREP(ZXDHQPSQ_FIRST_FRAG_LEN,
+ op_info->lo_sg_list->len) |
+ FIELD_PREP(ZXDHQPSQ_FIRST_FRAG_STAG,
+ op_info->lo_sg_list->stag));
+ set_64bit_val(wqe, 8,
+ FIELD_PREP(ZXDHQPSQ_FRAG_TO,
+ op_info->lo_sg_list->tag_off));
+ } else {
+ /*if zero sge,post a special sge with zero lenth*/
+ set_64bit_val(wqe, 16,
+ FIELD_PREP(ZXDHQPSQ_FIRST_FRAG_VALID, 0) |
+ FIELD_PREP(ZXDHQPSQ_FIRST_FRAG_LEN, 0) |
+ FIELD_PREP(ZXDHQPSQ_FIRST_FRAG_STAG,
+ 0x100));
+ set_64bit_val(wqe, 8, FIELD_PREP(ZXDHQPSQ_FRAG_TO, 0));
+ }
+
+ i = 1;
+ for (byte_off = ZXDH_SQ_WQE_BYTESIZE; i < op_info->num_lo_sges;
+ i += 2) {
+ if (i == addl_frag_cnt) {
+ qp->wqe_ops.iw_set_fragment(wqe, byte_off,
+ &op_info->lo_sg_list[i],
+ qp->swqe_polarity);
+ byte_off += ZXDH_QP_FRAG_BYTESIZE;
+ break;
+ }
+ byte_off += ZXDH_QP_FRAG_BYTESIZE;
+ qp->wqe_ops.iw_set_fragment(wqe, byte_off,
+ &op_info->lo_sg_list[i + 1],
+ qp->swqe_polarity);
+ byte_off -= ZXDH_QP_FRAG_BYTESIZE;
+ qp->wqe_ops.iw_set_fragment(wqe, byte_off,
+ &op_info->lo_sg_list[i],
+ qp->swqe_polarity);
+ byte_off += 2 * ZXDH_QP_FRAG_BYTESIZE;
+ }
+
+ /* if not an odd number set valid bit in next fragment */
+ if (!(op_info->num_lo_sges & 0x01) && op_info->num_lo_sges) {
+ qp->wqe_ops.iw_set_fragment(wqe, byte_off, NULL,
+ qp->swqe_polarity);
+ }
+
+ hdr = FIELD_PREP(ZXDHQPSQ_VALID, qp->swqe_polarity) |
+ FIELD_PREP(ZXDHQPSQ_OPCODE, ZXDH_OP_TYPE_READ) |
+ FIELD_PREP(ZXDHQPSQ_SIGCOMPL, info->signaled) |
+ FIELD_PREP(ZXDHQPSQ_LOCALFENCE, info->local_fence) |
+ FIELD_PREP(ZXDHQPSQ_READFENCE, info->read_fence) |
+ FIELD_PREP(ZXDHQPSQ_SOLICITED, info->solicited) |
+ FIELD_PREP(ZXDHQPSQ_ADDFRAGCNT, addl_frag_cnt) |
+ FIELD_PREP(ZXDHQPSQ_REMSTAG, op_info->rem_addr.stag);
+ set_64bit_val(wqe, 24,
+ FIELD_PREP(ZXDHQPSQ_FRAG_TO, op_info->rem_addr.tag_off));
+
+ udma_to_device_barrier(); /* make sure WQE is populated before valid bit is set */
+
+ set_64bit_val(wqe, 0, hdr);
+ if (post_sq)
+ zxdh_qp_post_wr(qp);
+ return 0;
+}
+
+/**
+ * zxdh_rdma_read - rdma read command
+ * @qp: hw qp ptr
+ * @info: post sq information
+ * @inv_stag: flag for inv_stag
+ * @post_sq: flag to post sq
+ */
+enum zxdh_status_code zxdh_rdma_read(struct zxdh_qp *qp,
+ struct zxdh_post_sq_info *info,
+ bool inv_stag, bool post_sq)
+{
+ struct zxdh_post_sq_info split_part1_info = { 0 };
+ struct zxdh_post_sq_info split_part2_info = { 0 };
+ struct zxdh_rdma_read *op_info;
+ enum zxdh_status_code ret_code;
+ __u32 i, total_size = 0, pre_cal_psn = 0;
+
+ op_info = &info->op.rdma_read;
+ if (qp->max_sq_frag_cnt < op_info->num_lo_sges)
+ return ZXDH_ERR_INVALID_FRAG_COUNT;
+
+ for (i = 0; i < op_info->num_lo_sges; i++) {
+ total_size += op_info->lo_sg_list[i].len;
+ if (0 != i && 0 == op_info->lo_sg_list[i].len)
+ return ZXDH_ERR_INVALID_FRAG_LEN;
+ }
+
+ if (total_size > ZXDH_MAX_SQ_PAYLOAD_SIZE)
+ return ZXDH_ERR_QP_INVALID_MSG_SIZE;
+ op_info->rem_addr.len = total_size;
+ pre_cal_psn = qp->next_psn;
+ qp_tx_psn_add(&pre_cal_psn, total_size, qp->mtu);
+ if (read_wqe_need_split(pre_cal_psn, qp->next_psn)) {
+ split_two_part_info(qp, info, qp->next_psn, pre_cal_psn,
+ &split_part1_info, &split_part2_info);
+ ret_code = zxdh_post_rdma_read(qp, &split_part1_info, post_sq,
+ total_size);
+ if (ret_code)
+ return ret_code;
+
+ qp_tx_psn_add(&qp->next_psn,
+ split_part1_info.op.rdma_read.rem_addr.len,
+ qp->mtu);
+ ret_code = zxdh_post_rdma_read(qp, &split_part2_info, post_sq,
+ total_size);
+ if (ret_code)
+ return ret_code;
+
+ qp_tx_psn_add(&qp->next_psn,
+ split_part2_info.op.rdma_read.rem_addr.len,
+ qp->mtu);
+ } else {
+ ret_code = zxdh_post_rdma_read(qp, info, post_sq, total_size);
+ if (ret_code)
+ return ret_code;
+
+ qp_tx_psn_add(&qp->next_psn, total_size, qp->mtu);
+ }
+ return 0;
+}
+
+/**
+ * zxdh_rc_send - rdma send command
+ * @qp: hw qp ptr
+ * @info: post sq information
+ * @post_sq: flag to post sq
+ */
+enum zxdh_status_code zxdh_rc_send(struct zxdh_qp *qp,
+ struct zxdh_post_sq_info *info, bool post_sq)
+{
+ __le64 *wqe;
+ struct zxdh_post_send *op_info;
+ __u64 hdr;
+ __u32 i, wqe_idx, total_size = 0, byte_off;
+ enum zxdh_status_code ret_code;
+ __u32 frag_cnt, addl_frag_cnt;
+ bool read_fence = false;
+ __u16 quanta;
+ bool imm_data_flag = info->imm_data_valid ? 1 : 0;
+
+ op_info = &info->op.send;
+ if (qp->max_sq_frag_cnt < op_info->num_sges)
+ return ZXDH_ERR_INVALID_FRAG_COUNT;
+
+ for (i = 0; i < op_info->num_sges; i++) {
+ total_size += op_info->sg_list[i].len;
+ if (0 != i && 0 == op_info->sg_list[i].len)
+ return ZXDH_ERR_INVALID_FRAG_LEN;
+ }
+
+ if (total_size > ZXDH_MAX_SQ_PAYLOAD_SIZE)
+ return ZXDH_ERR_QP_INVALID_MSG_SIZE;
+
+ if (imm_data_flag)
+ frag_cnt = op_info->num_sges ? (op_info->num_sges + 1) : 2;
+ else
+ frag_cnt = op_info->num_sges;
+ ret_code = zxdh_fragcnt_to_quanta_sq(frag_cnt, &quanta);
+ if (ret_code)
+ return ret_code;
+
+ wqe = zxdh_qp_get_next_send_wqe(qp, &wqe_idx, quanta, total_size, info);
+ if (!wqe)
+ return ZXDH_ERR_QP_TOOMANY_WRS_POSTED;
+
+ zxdh_clr_wqes(qp, wqe_idx);
+
+ read_fence |= info->read_fence;
+ addl_frag_cnt = op_info->num_sges > 1 ? (op_info->num_sges - 1) : 0;
+ if (op_info->num_sges) {
+ set_64bit_val(
+ wqe, 16,
+ FIELD_PREP(ZXDHQPSQ_FIRST_FRAG_VALID,
+ op_info->sg_list->len ==
+ ZXDH_MAX_SQ_PAYLOAD_SIZE ?
+ 1 :
+ 0) |
+ FIELD_PREP(ZXDHQPSQ_FIRST_FRAG_LEN,
+ op_info->sg_list->len) |
+ FIELD_PREP(ZXDHQPSQ_FIRST_FRAG_STAG,
+ op_info->sg_list->stag));
+ set_64bit_val(wqe, 8,
+ FIELD_PREP(ZXDHQPSQ_FRAG_TO,
+ op_info->sg_list->tag_off));
+ } else {
+ /*if zero sge,post a special sge with zero lenth*/
+ set_64bit_val(wqe, 16,
+ FIELD_PREP(ZXDHQPSQ_FIRST_FRAG_VALID, 0) |
+ FIELD_PREP(ZXDHQPSQ_FIRST_FRAG_LEN, 0) |
+ FIELD_PREP(ZXDHQPSQ_FIRST_FRAG_STAG,
+ 0x100));
+ set_64bit_val(wqe, 8, FIELD_PREP(ZXDHQPSQ_FRAG_TO, 0));
+ }
+
+ if (imm_data_flag) {
+ byte_off = ZXDH_SQ_WQE_BYTESIZE + ZXDH_QP_FRAG_BYTESIZE;
+ if (op_info->num_sges > 1) {
+ qp->wqe_ops.iw_set_fragment(wqe, byte_off,
+ &op_info->sg_list[1],
+ qp->swqe_polarity);
+ byte_off += ZXDH_QP_FRAG_BYTESIZE;
+ }
+ set_64bit_val(
+ wqe, ZXDH_SQ_WQE_BYTESIZE,
+ FIELD_PREP(ZXDHQPSQ_IMMDATA_VALID, qp->swqe_polarity) |
+ FIELD_PREP(ZXDHQPSQ_IMMDATA, info->imm_data));
+ i = 2;
+ if (i < op_info->num_sges) {
+ for (byte_off = ZXDH_SQ_WQE_BYTESIZE +
+ 2 * ZXDH_QP_FRAG_BYTESIZE;
+ i < op_info->num_sges; i += 2) {
+ if (i == addl_frag_cnt) {
+ qp->wqe_ops.iw_set_fragment(
+ wqe, byte_off,
+ &op_info->sg_list[i],
+ qp->swqe_polarity);
+ byte_off += ZXDH_QP_FRAG_BYTESIZE;
+ break;
+ }
+ byte_off += ZXDH_QP_FRAG_BYTESIZE;
+ qp->wqe_ops.iw_set_fragment(
+ wqe, byte_off, &op_info->sg_list[i + 1],
+ qp->swqe_polarity);
+ byte_off -= ZXDH_QP_FRAG_BYTESIZE;
+ qp->wqe_ops.iw_set_fragment(
+ wqe, byte_off, &op_info->sg_list[i],
+ qp->swqe_polarity);
+ byte_off += 2 * ZXDH_QP_FRAG_BYTESIZE;
+ }
+ }
+ } else {
+ i = 1;
+ for (byte_off = ZXDH_SQ_WQE_BYTESIZE; i < op_info->num_sges;
+ i += 2) {
+ if (i == addl_frag_cnt) {
+ qp->wqe_ops.iw_set_fragment(
+ wqe, byte_off, &op_info->sg_list[i],
+ qp->swqe_polarity);
+ byte_off += ZXDH_QP_FRAG_BYTESIZE;
+ break;
+ }
+ byte_off += ZXDH_QP_FRAG_BYTESIZE;
+ qp->wqe_ops.iw_set_fragment(wqe, byte_off,
+ &op_info->sg_list[i + 1],
+ qp->swqe_polarity);
+ byte_off -= ZXDH_QP_FRAG_BYTESIZE;
+ qp->wqe_ops.iw_set_fragment(wqe, byte_off,
+ &op_info->sg_list[i],
+ qp->swqe_polarity);
+ byte_off += 2 * ZXDH_QP_FRAG_BYTESIZE;
+ }
+ }
+
+ /* if not an odd number set valid bit in next fragment */
+ if (!(frag_cnt & 0x01) && frag_cnt) {
+ qp->wqe_ops.iw_set_fragment(wqe, byte_off, NULL,
+ qp->swqe_polarity);
+ }
+
+ hdr = FIELD_PREP(ZXDHQPSQ_VALID, qp->swqe_polarity) |
+ FIELD_PREP(ZXDHQPSQ_OPCODE, info->op_type) |
+ FIELD_PREP(ZXDHQPSQ_SIGCOMPL, info->signaled) |
+ FIELD_PREP(ZXDHQPSQ_LOCALFENCE, info->local_fence) |
+ FIELD_PREP(ZXDHQPSQ_READFENCE, read_fence) |
+ FIELD_PREP(ZXDHQPSQ_SOLICITED, info->solicited) |
+ FIELD_PREP(ZXDHQPSQ_IMMDATAFLAG, imm_data_flag) |
+ FIELD_PREP(ZXDHQPSQ_ADDFRAGCNT, addl_frag_cnt) |
+ FIELD_PREP(ZXDHQPSQ_REMSTAG, info->stag_to_inv);
+ set_64bit_val(wqe, 24,
+ FIELD_PREP(ZXDHQPSQ_INLINEDATAFLAG, 0) |
+ FIELD_PREP(ZXDHQPSQ_INLINEDATALEN, 0));
+
+ udma_to_device_barrier(); /* make sure WQE is populated before valid bit is set */
+
+ set_64bit_val(wqe, 0, hdr);
+ if (post_sq)
+ zxdh_qp_post_wr(qp);
+ qp_tx_psn_add(&qp->next_psn, total_size, qp->mtu);
+
+ return 0;
+}
+
+/**
+ * zxdh_ud_send - rdma send command
+ * @qp: hw qp ptr
+ * @info: post sq information
+ * @post_sq: flag to post sq
+ */
+enum zxdh_status_code zxdh_ud_send(struct zxdh_qp *qp,
+ struct zxdh_post_sq_info *info, bool post_sq)
+{
+ __le64 *wqe_base;
+ __le64 *wqe_ex = NULL;
+ struct zxdh_post_send *op_info;
+ __u64 hdr;
+ __u32 i, wqe_idx, total_size = 0, byte_off;
+ enum zxdh_status_code ret_code;
+ __u32 frag_cnt, addl_frag_cnt;
+ bool read_fence = false;
+ __u16 quanta;
+ bool imm_data_flag = info->imm_data_valid ? 1 : 0;
+
+ op_info = &info->op.send;
+ if (qp->max_sq_frag_cnt < op_info->num_sges)
+ return ZXDH_ERR_INVALID_FRAG_COUNT;
+
+ for (i = 0; i < op_info->num_sges; i++) {
+ total_size += op_info->sg_list[i].len;
+ if (0 != i && 0 == op_info->sg_list[i].len)
+ return ZXDH_ERR_INVALID_FRAG_LEN;
+ }
+
+ if (total_size > ZXDH_MAX_SQ_PAYLOAD_SIZE)
+ return ZXDH_ERR_QP_INVALID_MSG_SIZE;
+
+ if (imm_data_flag)
+ frag_cnt = op_info->num_sges ? (op_info->num_sges + 1) : 2;
+ else
+ frag_cnt = op_info->num_sges;
+ ret_code = zxdh_fragcnt_to_quanta_sq(frag_cnt, &quanta);
+ if (ret_code)
+ return ret_code;
+
+ if (quanta > ZXDH_SQ_RING_FREE_QUANTA(qp->sq_ring))
+ return ZXDH_ERR_QP_TOOMANY_WRS_POSTED;
+
+ wqe_idx = ZXDH_RING_CURRENT_HEAD(qp->sq_ring);
+ if (!wqe_idx)
+ qp->swqe_polarity = !qp->swqe_polarity;
+
+ ZXDH_RING_MOVE_HEAD_BY_COUNT_NOCHECK(qp->sq_ring, quanta);
+
+ wqe_base = qp->sq_base[wqe_idx].elem;
+ qp->sq_wrtrk_array[wqe_idx].wrid = info->wr_id;
+ qp->sq_wrtrk_array[wqe_idx].wr_len = total_size;
+ qp->sq_wrtrk_array[wqe_idx].quanta = quanta;
+
+ zxdh_clr_wqes(qp, wqe_idx);
+
+ read_fence |= info->read_fence;
+ addl_frag_cnt = op_info->num_sges > 1 ? (op_info->num_sges - 1) : 0;
+ hdr = FIELD_PREP(ZXDHQPSQ_VALID, qp->swqe_polarity) |
+ FIELD_PREP(ZXDHQPSQ_OPCODE, info->op_type) |
+ FIELD_PREP(ZXDHQPSQ_SIGCOMPL, info->signaled) |
+ FIELD_PREP(ZXDHQPSQ_SOLICITED, info->solicited) |
+ FIELD_PREP(ZXDHQPSQ_IMMDATAFLAG, imm_data_flag) |
+ FIELD_PREP(ZXDHQPSQ_UD_INLINEDATAFLAG, 0) |
+ FIELD_PREP(ZXDHQPSQ_UD_INLINEDATALEN, 0) |
+ FIELD_PREP(ZXDHQPSQ_UD_ADDFRAGCNT, addl_frag_cnt) |
+ FIELD_PREP(ZXDHQPSQ_AHID, op_info->ah_id);
+
+ if (op_info->num_sges) {
+ set_64bit_val(
+ wqe_base, 16,
+ FIELD_PREP(ZXDHQPSQ_FIRST_FRAG_VALID,
+ op_info->sg_list->len ==
+ ZXDH_MAX_SQ_PAYLOAD_SIZE ?
+ 1 :
+ 0) |
+ FIELD_PREP(ZXDHQPSQ_FIRST_FRAG_LEN,
+ op_info->sg_list->len) |
+ FIELD_PREP(ZXDHQPSQ_FIRST_FRAG_STAG,
+ op_info->sg_list->stag));
+ set_64bit_val(wqe_base, 8,
+ FIELD_PREP(ZXDHQPSQ_FRAG_TO,
+ op_info->sg_list->tag_off));
+ } else {
+ /*if zero sge,post a special sge with zero lenth*/
+ set_64bit_val(wqe_base, 16,
+ FIELD_PREP(ZXDHQPSQ_FIRST_FRAG_VALID, 0) |
+ FIELD_PREP(ZXDHQPSQ_FIRST_FRAG_LEN, 0) |
+ FIELD_PREP(ZXDHQPSQ_FIRST_FRAG_STAG,
+ 0x100));
+ set_64bit_val(wqe_base, 8, FIELD_PREP(ZXDHQPSQ_FRAG_TO, 0));
+ }
+
+ if (imm_data_flag) {
+ wqe_idx = (wqe_idx + 1) % qp->sq_ring.size;
+ if (!wqe_idx)
+ qp->swqe_polarity = !qp->swqe_polarity;
+ wqe_ex = qp->sq_base[wqe_idx].elem;
+ if (op_info->num_sges > 1) {
+ qp->wqe_ops.iw_set_fragment(wqe_ex,
+ ZXDH_QP_FRAG_BYTESIZE,
+ &op_info->sg_list[1],
+ qp->swqe_polarity);
+ }
+ set_64bit_val(
+ wqe_ex, 0,
+ FIELD_PREP(ZXDHQPSQ_IMMDATA_VALID, qp->swqe_polarity) |
+ FIELD_PREP(ZXDHQPSQ_IMMDATA, info->imm_data));
+ i = 2;
+ for (byte_off = ZXDH_QP_FRAG_BYTESIZE; i < op_info->num_sges;
+ i += 2) {
+ if (!(i & 0x1)) {
+ wqe_idx = (wqe_idx + 1) % qp->sq_ring.size;
+ if (!wqe_idx)
+ qp->swqe_polarity = !qp->swqe_polarity;
+ wqe_ex = qp->sq_base[wqe_idx].elem;
+ }
+ if (i == addl_frag_cnt) {
+ qp->wqe_ops.iw_set_fragment(
+ wqe_ex, 0, &op_info->sg_list[i],
+ qp->swqe_polarity);
+ break;
+ }
+ qp->wqe_ops.iw_set_fragment(
+ wqe_ex, byte_off % ZXDH_SQ_WQE_BYTESIZE,
+ &op_info->sg_list[i + 1], qp->swqe_polarity);
+ byte_off -= ZXDH_QP_FRAG_BYTESIZE;
+ qp->wqe_ops.iw_set_fragment(
+ wqe_ex, byte_off % ZXDH_SQ_WQE_BYTESIZE,
+ &op_info->sg_list[i], qp->swqe_polarity);
+ byte_off += ZXDH_QP_FRAG_BYTESIZE;
+ }
+ } else {
+ i = 1;
+ for (byte_off = 0; i < op_info->num_sges; i += 2) {
+ if (i & 0x1) {
+ wqe_idx = (wqe_idx + 1) % qp->sq_ring.size;
+ if (!wqe_idx)
+ qp->swqe_polarity = !qp->swqe_polarity;
+ wqe_ex = qp->sq_base[wqe_idx].elem;
+ }
+ if (i == addl_frag_cnt) {
+ qp->wqe_ops.iw_set_fragment(
+ wqe_ex, 0, &op_info->sg_list[i],
+ qp->swqe_polarity);
+ break;
+ }
+ byte_off += ZXDH_QP_FRAG_BYTESIZE;
+ qp->wqe_ops.iw_set_fragment(
+ wqe_ex, byte_off % ZXDH_SQ_WQE_BYTESIZE,
+ &op_info->sg_list[i + 1], qp->swqe_polarity);
+ byte_off -= ZXDH_QP_FRAG_BYTESIZE;
+ qp->wqe_ops.iw_set_fragment(
+ wqe_ex, byte_off % ZXDH_SQ_WQE_BYTESIZE,
+ &op_info->sg_list[i], qp->swqe_polarity);
+ }
+ }
+
+ /* if not an odd number set valid bit in next fragment */
+ if (!(frag_cnt & 0x01) && frag_cnt && wqe_ex) {
+ qp->wqe_ops.iw_set_fragment(wqe_ex, ZXDH_QP_FRAG_BYTESIZE, NULL,
+ qp->swqe_polarity);
+ }
+
+ set_64bit_val(wqe_base, 24,
+ FIELD_PREP(ZXDHQPSQ_DESTQPN, op_info->dest_qp) |
+ FIELD_PREP(ZXDHQPSQ_DESTQKEY, op_info->qkey));
+
+ udma_to_device_barrier(); /* make sure WQE is populated before valid bit is set */
+
+ set_64bit_val(wqe_base, 0, hdr);
+ if (post_sq)
+ zxdh_qp_post_wr(qp);
+
+ return 0;
+}
+
+/**
+ * zxdh_set_mw_bind_wqe - set mw bind in wqe
+ * @wqe: wqe for setting mw bind
+ * @op_info: info for setting wqe values
+ */
+static void zxdh_set_mw_bind_wqe(__le64 *wqe, struct zxdh_bind_window *op_info)
+{
+ __u32 value = 0;
+ __u8 leaf_pbl_size = op_info->leaf_pbl_size;
+
+ set_64bit_val(wqe, 8, (uintptr_t)op_info->va);
+
+ if (leaf_pbl_size == 0) {
+ value = (__u32)(op_info->mw_pa_pble_index >> 12);
+ value = (value & 0x03FFFFFFFC0000) >> 18;
+ set_64bit_val(
+ wqe, 16,
+ FIELD_PREP(ZXDHQPSQ_PARENTMRSTAG, op_info->mr_stag) |
+ FIELD_PREP(ZXDHQPSQ_MW_PA_PBLE_TWO, value));
+ } else if (leaf_pbl_size == 1) {
+ value = (__u32)((op_info->mw_pa_pble_index & 0x0FFC0000) >> 18);
+ set_64bit_val(
+ wqe, 16,
+ FIELD_PREP(ZXDHQPSQ_PARENTMRSTAG, op_info->mr_stag) |
+ FIELD_PREP(ZXDHQPSQ_MW_PA_PBLE_TWO, value));
+ } else {
+ value = (__u32)((op_info->mw_pa_pble_index & 0x0FFC0000) >> 18);
+ set_64bit_val(
+ wqe, 16,
+ FIELD_PREP(ZXDHQPSQ_PARENTMRSTAG, op_info->mr_stag) |
+ FIELD_PREP(ZXDHQPSQ_MW_LEVLE2_FIRST_PBLE_INDEX,
+ value) |
+ FIELD_PREP(ZXDHQPSQ_MW_LEVLE2_ROOT_PBLE_INDEX,
+ op_info->root_leaf_offset));
+ }
+
+ if (leaf_pbl_size == 0) {
+ value = (__u32)(op_info->mw_pa_pble_index >> 12);
+ value = value & 0x3FFFF;
+ } else {
+ value = (__u32)(op_info->mw_pa_pble_index & 0x3FFFF);
+ }
+
+ set_64bit_val(wqe, 24,
+ op_info->bind_len |
+ FIELD_PREP(ZXDHQPSQ_MW_PA_PBLE_ONE, value));
+}
+
+/**
+ * zxdh_copy_inline_data - Copy inline data to wqe
+ * @dest: pointer to wqe
+ * @src: pointer to inline data
+ * @len: length of inline data to copy
+ * @polarity: polarity of wqe valid bit
+ */
+static void zxdh_copy_inline_data(__u8 *dest, __u8 *src, __u32 len,
+ __u8 polarity, bool imm_data_flag)
+{
+ __u8 inline_valid = polarity << ZXDH_INLINE_VALID_S;
+ __u32 copy_size;
+ __u8 *inline_valid_addr;
+
+ dest += ZXDH_WQE_SIZE_32; /* point to additional 32 byte quanta */
+ if (len) {
+ inline_valid_addr = dest + WQE_OFFSET_7BYTES;
+ if (imm_data_flag) {
+ copy_size = len < INLINE_DATASIZE_24BYTES ?
+ len :
+ INLINE_DATASIZE_24BYTES;
+ dest += WQE_OFFSET_8BYTES;
+ memcpy(dest, src, copy_size);
+ len -= copy_size;
+ dest += WQE_OFFSET_24BYTES;
+ src += copy_size;
+ } else {
+ if (len <= INLINE_DATASIZE_7BYTES) {
+ copy_size = len;
+ memcpy(dest, src, copy_size);
+ *inline_valid_addr = inline_valid;
+ return;
+ }
+ memcpy(dest, src, INLINE_DATASIZE_7BYTES);
+ len -= INLINE_DATASIZE_7BYTES;
+ dest += WQE_OFFSET_8BYTES;
+ src += INLINE_DATA_OFFSET_7BYTES;
+ copy_size = len < INLINE_DATASIZE_24BYTES ?
+ len :
+ INLINE_DATASIZE_24BYTES;
+ memcpy(dest, src, copy_size);
+ len -= copy_size;
+ dest += WQE_OFFSET_24BYTES;
+ src += copy_size;
+ }
+ *inline_valid_addr = inline_valid;
+ }
+
+ while (len) {
+ inline_valid_addr = dest + WQE_OFFSET_7BYTES;
+ if (len <= INLINE_DATASIZE_7BYTES) {
+ copy_size = len;
+ memcpy(dest, src, copy_size);
+ *inline_valid_addr = inline_valid;
+ return;
+ }
+ memcpy(dest, src, INLINE_DATASIZE_7BYTES);
+ len -= INLINE_DATASIZE_7BYTES;
+ dest += WQE_OFFSET_8BYTES;
+ src += INLINE_DATA_OFFSET_7BYTES;
+ copy_size = len < INLINE_DATASIZE_24BYTES ?
+ len :
+ INLINE_DATASIZE_24BYTES;
+ memcpy(dest, src, copy_size);
+ len -= copy_size;
+ dest += WQE_OFFSET_24BYTES;
+ src += copy_size;
+
+ *inline_valid_addr = inline_valid;
+ }
+}
+
+/**
+ * zxdh_inline_data_size_to_quanta - based on inline data, quanta
+ * @data_size: data size for inline
+ * @imm_data_flag: flag for immediate data
+ *
+ * Gets the quanta based on inline and immediate data.
+ */
+static __u16 zxdh_inline_data_size_to_quanta(__u32 data_size,
+ bool imm_data_flag)
+{
+ if (imm_data_flag)
+ data_size += INLINE_DATASIZE_7BYTES;
+
+ return data_size % 31 ? data_size / 31 + 2 : data_size / 31 + 1;
+}
+
+/**
+ * zxdh_inline_rdma_write - inline rdma write operation
+ * @qp: hw qp ptr
+ * @info: post sq information
+ * @post_sq: flag to post sq
+ */
+enum zxdh_status_code zxdh_inline_rdma_write(struct zxdh_qp *qp,
+ struct zxdh_post_sq_info *info,
+ bool post_sq)
+{
+ __le64 *wqe;
+ __u8 imm_valid;
+ struct zxdh_inline_rdma_write *op_info;
+ __u64 hdr = 0;
+ __u32 wqe_idx;
+ bool read_fence = false;
+ __u16 quanta;
+ bool imm_data_flag = info->imm_data_valid ? 1 : 0;
+
+ op_info = &info->op.inline_rdma_write;
+
+ if (op_info->len > qp->max_inline_data)
+ return ZXDH_ERR_INVALID_INLINE_DATA_SIZE;
+ if (imm_data_flag && op_info->len > ZXDH_MAX_SQ_INLINE_DATELEN_WITH_IMM)
+ return ZXDH_ERR_INVALID_INLINE_DATA_SIZE;
+
+ quanta = qp->wqe_ops.iw_inline_data_size_to_quanta(op_info->len,
+ imm_data_flag);
+ wqe = zxdh_qp_get_next_send_wqe(qp, &wqe_idx, quanta, op_info->len,
+ info);
+ if (!wqe)
+ return ZXDH_ERR_QP_TOOMANY_WRS_POSTED;
+
+ zxdh_clr_wqes(qp, wqe_idx);
+
+ read_fence |= info->read_fence;
+ hdr = FIELD_PREP(ZXDHQPSQ_VALID, qp->swqe_polarity) |
+ FIELD_PREP(ZXDHQPSQ_OPCODE, info->op_type) |
+ FIELD_PREP(ZXDHQPSQ_SIGCOMPL, info->signaled) |
+ FIELD_PREP(ZXDHQPSQ_LOCALFENCE, info->local_fence) |
+ FIELD_PREP(ZXDHQPSQ_READFENCE, read_fence) |
+ FIELD_PREP(ZXDHQPSQ_SOLICITED, info->solicited) |
+ FIELD_PREP(ZXDHQPSQ_IMMDATAFLAG, imm_data_flag) |
+ FIELD_PREP(ZXDHQPSQ_WRITE_INLINEDATAFLAG, 1) |
+ FIELD_PREP(ZXDHQPSQ_WRITE_INLINEDATALEN, op_info->len) |
+ FIELD_PREP(ZXDHQPSQ_ADDFRAGCNT, quanta - 1) |
+ FIELD_PREP(ZXDHQPSQ_REMSTAG, op_info->rem_addr.stag);
+ set_64bit_val(wqe, 24,
+ FIELD_PREP(ZXDHQPSQ_FRAG_TO, op_info->rem_addr.tag_off));
+
+ if (imm_data_flag) {
+ /* if inline exist, not update imm valid */
+ imm_valid = (op_info->len == 0) ? qp->swqe_polarity :
+ (!qp->swqe_polarity);
+
+ set_64bit_val(wqe, 32,
+ FIELD_PREP(ZXDHQPSQ_IMMDATA_VALID, imm_valid) |
+ FIELD_PREP(ZXDHQPSQ_IMMDATA,
+ info->imm_data));
+ }
+ qp->wqe_ops.iw_copy_inline_data((__u8 *)wqe, op_info->data,
+ op_info->len, qp->swqe_polarity,
+ imm_data_flag);
+
+ udma_to_device_barrier(); /* make sure WQE is populated before valid bit is set */
+
+ set_64bit_val(wqe, 0, hdr);
+
+ if (post_sq)
+ zxdh_qp_post_wr(qp);
+ qp_tx_psn_add(&qp->next_psn, op_info->len, qp->mtu);
+ return 0;
+}
+
+/**
+ * zxdh_rc_inline_send - inline send operation
+ * @qp: hw qp ptr
+ * @info: post sq information
+ * @post_sq: flag to post sq
+ */
+enum zxdh_status_code zxdh_rc_inline_send(struct zxdh_qp *qp,
+ struct zxdh_post_sq_info *info,
+ bool post_sq)
+{
+ __le64 *wqe;
+ __u8 imm_valid;
+ struct zxdh_inline_rdma_send *op_info;
+ __u64 hdr;
+ __u32 wqe_idx;
+ bool read_fence = false;
+ __u16 quanta;
+ bool imm_data_flag = info->imm_data_valid ? 1 : 0;
+
+ op_info = &info->op.inline_rdma_send;
+
+ if (op_info->len > qp->max_inline_data)
+ return ZXDH_ERR_INVALID_INLINE_DATA_SIZE;
+ if (imm_data_flag && op_info->len > ZXDH_MAX_SQ_INLINE_DATELEN_WITH_IMM)
+ return ZXDH_ERR_INVALID_INLINE_DATA_SIZE;
+
+ quanta = qp->wqe_ops.iw_inline_data_size_to_quanta(op_info->len,
+ imm_data_flag);
+ wqe = zxdh_qp_get_next_send_wqe(qp, &wqe_idx, quanta, op_info->len,
+ info);
+ if (!wqe)
+ return ZXDH_ERR_QP_TOOMANY_WRS_POSTED;
+
+ zxdh_clr_wqes(qp, wqe_idx);
+
+ read_fence |= info->read_fence;
+ hdr = FIELD_PREP(ZXDHQPSQ_VALID, qp->swqe_polarity) |
+ FIELD_PREP(ZXDHQPSQ_OPCODE, info->op_type) |
+ FIELD_PREP(ZXDHQPSQ_SIGCOMPL, info->signaled) |
+ FIELD_PREP(ZXDHQPSQ_LOCALFENCE, info->local_fence) |
+ FIELD_PREP(ZXDHQPSQ_READFENCE, read_fence) |
+ FIELD_PREP(ZXDHQPSQ_SOLICITED, info->solicited) |
+ FIELD_PREP(ZXDHQPSQ_ADDFRAGCNT, quanta - 1) |
+ FIELD_PREP(ZXDHQPSQ_IMMDATAFLAG, imm_data_flag) |
+ FIELD_PREP(ZXDHQPSQ_REMSTAG, info->stag_to_inv);
+ set_64bit_val(wqe, 24,
+ FIELD_PREP(ZXDHQPSQ_INLINEDATAFLAG, 1) |
+ FIELD_PREP(ZXDHQPSQ_INLINEDATALEN, op_info->len));
+
+ if (imm_data_flag) {
+ /* if inline exist, not update imm valid */
+ imm_valid = (op_info->len == 0) ? qp->swqe_polarity :
+ (!qp->swqe_polarity);
+ set_64bit_val(wqe, 32,
+ FIELD_PREP(ZXDHQPSQ_IMMDATA_VALID, imm_valid) |
+ FIELD_PREP(ZXDHQPSQ_IMMDATA,
+ info->imm_data));
+ }
+
+ qp->wqe_ops.iw_copy_inline_data((__u8 *)wqe, op_info->data,
+ op_info->len, qp->swqe_polarity,
+ imm_data_flag);
+
+ udma_to_device_barrier(); /* make sure WQE is populated before valid bit is set */
+
+ set_64bit_val(wqe, 0, hdr);
+
+ if (post_sq)
+ zxdh_qp_post_wr(qp);
+
+ qp_tx_psn_add(&qp->next_psn, op_info->len, qp->mtu);
+ return 0;
+}
+
+/**
+ * zxdh_ud_inline_send - inline send operation
+ * @qp: hw qp ptr
+ * @info: post sq information
+ * @post_sq: flag to post sq
+ */
+enum zxdh_status_code zxdh_ud_inline_send(struct zxdh_qp *qp,
+ struct zxdh_post_sq_info *info,
+ bool post_sq)
+{
+ __le64 *wqe_base;
+ __le64 *wqe_ex;
+ struct zxdh_inline_rdma_send *op_info;
+ __u64 hdr;
+ __u32 wqe_idx;
+ bool read_fence = false;
+ __u16 quanta;
+ bool imm_data_flag = info->imm_data_valid ? 1 : 0;
+ __u8 *inline_dest;
+ __u8 *inline_src;
+ __u32 inline_len;
+ __u32 copy_size;
+ __u8 *inline_valid_addr;
+
+ op_info = &info->op.inline_rdma_send;
+ inline_len = op_info->len;
+
+ if (op_info->len > qp->max_inline_data)
+ return ZXDH_ERR_INVALID_INLINE_DATA_SIZE;
+ if (imm_data_flag && op_info->len > ZXDH_MAX_SQ_INLINE_DATELEN_WITH_IMM)
+ return ZXDH_ERR_INVALID_INLINE_DATA_SIZE;
+
+ quanta = qp->wqe_ops.iw_inline_data_size_to_quanta(op_info->len,
+ imm_data_flag);
+ if (quanta > ZXDH_SQ_RING_FREE_QUANTA(qp->sq_ring))
+ return ZXDH_ERR_QP_TOOMANY_WRS_POSTED;
+
+ wqe_idx = ZXDH_RING_CURRENT_HEAD(qp->sq_ring);
+ if (!wqe_idx)
+ qp->swqe_polarity = !qp->swqe_polarity;
+
+ ZXDH_RING_MOVE_HEAD_BY_COUNT_NOCHECK(qp->sq_ring, quanta);
+
+ wqe_base = qp->sq_base[wqe_idx].elem;
+ qp->sq_wrtrk_array[wqe_idx].wrid = info->wr_id;
+ qp->sq_wrtrk_array[wqe_idx].wr_len = op_info->len;
+ qp->sq_wrtrk_array[wqe_idx].quanta = quanta;
+
+ zxdh_clr_wqes(qp, wqe_idx);
+
+ read_fence |= info->read_fence;
+ hdr = FIELD_PREP(ZXDHQPSQ_VALID, qp->swqe_polarity) |
+ FIELD_PREP(ZXDHQPSQ_OPCODE, info->op_type) |
+ FIELD_PREP(ZXDHQPSQ_SIGCOMPL, info->signaled) |
+ FIELD_PREP(ZXDHQPSQ_SOLICITED, info->solicited) |
+ FIELD_PREP(ZXDHQPSQ_IMMDATAFLAG, imm_data_flag) |
+ FIELD_PREP(ZXDHQPSQ_UD_INLINEDATAFLAG, 1) |
+ FIELD_PREP(ZXDHQPSQ_UD_INLINEDATALEN, op_info->len) |
+ FIELD_PREP(ZXDHQPSQ_UD_ADDFRAGCNT, quanta - 1) |
+ FIELD_PREP(ZXDHQPSQ_AHID, op_info->ah_id);
+ set_64bit_val(wqe_base, 24,
+ FIELD_PREP(ZXDHQPSQ_DESTQPN, op_info->dest_qp) |
+ FIELD_PREP(ZXDHQPSQ_DESTQKEY, op_info->qkey));
+
+ if (imm_data_flag) {
+ wqe_idx = (wqe_idx + 1) % qp->sq_ring.size;
+ if (!wqe_idx)
+ qp->swqe_polarity = !qp->swqe_polarity;
+ wqe_ex = qp->sq_base[wqe_idx].elem;
+
+ if (inline_len) {
+ /* imm and inline use the same valid, valid set after inline data updated*/
+ copy_size = inline_len < INLINE_DATASIZE_24BYTES ?
+ inline_len :
+ INLINE_DATASIZE_24BYTES;
+ inline_dest = (__u8 *)wqe_ex + WQE_OFFSET_8BYTES;
+ inline_src = (__u8 *)op_info->data;
+ memcpy(inline_dest, inline_src, copy_size);
+ inline_len -= copy_size;
+ inline_src += copy_size;
+ }
+ set_64bit_val(
+ wqe_ex, 0,
+ FIELD_PREP(ZXDHQPSQ_IMMDATA_VALID, qp->swqe_polarity) |
+ FIELD_PREP(ZXDHQPSQ_IMMDATA, info->imm_data));
+
+ } else if (inline_len) {
+ wqe_idx = (wqe_idx + 1) % qp->sq_ring.size;
+ if (!wqe_idx)
+ qp->swqe_polarity = !qp->swqe_polarity;
+ wqe_ex = qp->sq_base[wqe_idx].elem;
+ inline_dest = (__u8 *)wqe_ex;
+ inline_src = (__u8 *)op_info->data;
+
+ if (inline_len <= INLINE_DATASIZE_7BYTES) {
+ copy_size = inline_len;
+ memcpy(inline_dest, inline_src, copy_size);
+ inline_len = 0;
+ } else {
+ copy_size = INLINE_DATASIZE_7BYTES;
+ memcpy(inline_dest, inline_src, copy_size);
+ inline_len -= copy_size;
+ inline_src += copy_size;
+ inline_dest += WQE_OFFSET_8BYTES;
+ copy_size = inline_len < INLINE_DATASIZE_24BYTES ?
+ inline_len :
+ INLINE_DATASIZE_24BYTES;
+ memcpy(inline_dest, inline_src, copy_size);
+ inline_len -= copy_size;
+ inline_src += copy_size;
+ }
+ inline_valid_addr = (__u8 *)wqe_ex + WQE_OFFSET_7BYTES;
+ *inline_valid_addr = qp->swqe_polarity << ZXDH_INLINE_VALID_S;
+ }
+
+ while (inline_len) {
+ wqe_idx = (wqe_idx + 1) % qp->sq_ring.size;
+ if (!wqe_idx)
+ qp->swqe_polarity = !qp->swqe_polarity;
+ wqe_ex = qp->sq_base[wqe_idx].elem;
+ inline_dest = (__u8 *)wqe_ex;
+
+ if (inline_len <= INLINE_DATASIZE_7BYTES) {
+ copy_size = inline_len;
+ memcpy(inline_dest, inline_src, copy_size);
+ inline_len = 0;
+ } else {
+ copy_size = INLINE_DATASIZE_7BYTES;
+ memcpy(inline_dest, inline_src, copy_size);
+ inline_len -= copy_size;
+ inline_src += copy_size;
+ inline_dest += WQE_OFFSET_8BYTES;
+ copy_size = inline_len < INLINE_DATASIZE_24BYTES ?
+ inline_len :
+ INLINE_DATASIZE_24BYTES;
+ memcpy(inline_dest, inline_src, copy_size);
+ inline_len -= copy_size;
+ inline_src += copy_size;
+ }
+ inline_valid_addr = (__u8 *)wqe_ex + WQE_OFFSET_7BYTES;
+ *inline_valid_addr = qp->swqe_polarity << ZXDH_INLINE_VALID_S;
+ }
+
+ udma_to_device_barrier(); /* make sure WQE is populated before valid bit is set */
+
+ set_64bit_val(wqe_base, 0, hdr);
+
+ if (post_sq)
+ zxdh_qp_post_wr(qp);
+
+ return 0;
+}
+
+/**
+ * zxdh_stag_local_invalidate - stag invalidate operation
+ * @qp: hw qp ptr
+ * @info: post sq information
+ * @post_sq: flag to post sq
+ */
+enum zxdh_status_code zxdh_stag_local_invalidate(struct zxdh_qp *qp,
+ struct zxdh_post_sq_info *info,
+ bool post_sq)
+{
+ __le64 *wqe;
+ struct zxdh_inv_local_stag *op_info;
+ __u64 hdr;
+ __u32 wqe_idx;
+ bool local_fence = true;
+
+ op_info = &info->op.inv_local_stag;
+
+ wqe = zxdh_qp_get_next_send_wqe(qp, &wqe_idx, ZXDH_QP_WQE_MIN_QUANTA, 0,
+ info);
+ if (!wqe)
+ return ZXDH_ERR_QP_TOOMANY_WRS_POSTED;
+
+ zxdh_clr_wqes(qp, wqe_idx);
+
+ set_64bit_val(wqe, 16, 0);
+
+ hdr = FIELD_PREP(ZXDHQPSQ_VALID, qp->swqe_polarity) |
+ FIELD_PREP(ZXDHQPSQ_OPCODE, ZXDH_OP_TYPE_LOCAL_INV) |
+ FIELD_PREP(ZXDHQPSQ_SIGCOMPL, info->signaled) |
+ FIELD_PREP(ZXDHQPSQ_LOCALFENCE, local_fence) |
+ FIELD_PREP(ZXDHQPSQ_READFENCE, info->read_fence) |
+ FIELD_PREP(ZXDHQPSQ_REMSTAG, op_info->target_stag);
+
+ udma_to_device_barrier(); /* make sure WQE is populated before valid bit is set */
+
+ set_64bit_val(wqe, 0, hdr);
+
+ if (post_sq)
+ zxdh_qp_post_wr(qp);
+
+ return 0;
+}
+
+/**
+ * zxdh_mw_bind - bind Memory Window
+ * @qp: hw qp ptr
+ * @info: post sq information
+ * @post_sq: flag to post sq
+ */
+enum zxdh_status_code zxdh_mw_bind(struct zxdh_qp *qp,
+ struct zxdh_post_sq_info *info, bool post_sq)
+{
+ __le64 *wqe;
+ struct zxdh_bind_window *op_info;
+ __u64 hdr;
+ __u32 wqe_idx;
+ bool local_fence = true;
+ __u8 access = 1;
+ __u16 value = 0;
+
+ op_info = &info->op.bind_window;
+ local_fence |= info->local_fence;
+
+ wqe = zxdh_qp_get_next_send_wqe(qp, &wqe_idx, ZXDH_QP_WQE_MIN_QUANTA, 0,
+ info);
+ if (!wqe)
+ return ZXDH_ERR_QP_TOOMANY_WRS_POSTED;
+
+ zxdh_clr_wqes(qp, wqe_idx);
+
+ if (op_info->ena_writes) {
+ access = (op_info->ena_reads << 2) |
+ (op_info->ena_writes << 3) | (1 << 1) | access;
+ } else {
+ access = (op_info->ena_reads << 2) |
+ (op_info->ena_writes << 3) | access;
+ }
+
+ qp->wqe_ops.iw_set_mw_bind_wqe(wqe, op_info);
+
+ value = (__u16)((op_info->mw_pa_pble_index >> 12) & 0xC000000000000);
+
+ hdr = FIELD_PREP(ZXDHQPSQ_OPCODE, ZXDH_OP_TYPE_BIND_MW) |
+ FIELD_PREP(ZXDHQPSQ_MWSTAG, op_info->mw_stag) |
+ FIELD_PREP(ZXDHQPSQ_STAGRIGHTS, access) |
+ FIELD_PREP(ZXDHQPSQ_VABASEDTO,
+ (op_info->addressing_type == ZXDH_ADDR_TYPE_VA_BASED ?
+ 1 :
+ 0)) |
+ FIELD_PREP(ZXDHQPSQ_MEMWINDOWTYPE,
+ (op_info->mem_window_type_1 ? 1 : 0)) |
+ FIELD_PREP(ZXDHQPSQ_READFENCE, info->read_fence) |
+ FIELD_PREP(ZXDHQPSQ_LOCALFENCE, local_fence) |
+ FIELD_PREP(ZXDHQPSQ_SIGCOMPL, info->signaled) |
+ FIELD_PREP(ZXDHQPSQ_MW_HOST_PAGE_SIZE, op_info->host_page_size) |
+ FIELD_PREP(ZXDHQPSQ_MW_LEAF_PBL_SIZE, op_info->leaf_pbl_size) |
+ FIELD_PREP(ZXDHQPSQ_MW_PA_PBLE_THREE, value) |
+ FIELD_PREP(ZXDHQPSQ_VALID, qp->swqe_polarity);
+
+ udma_to_device_barrier(); /* make sure WQE is populated before valid bit is set */
+
+ set_64bit_val(wqe, 0, hdr);
+
+ if (post_sq)
+ zxdh_qp_post_wr(qp);
+
+ return 0;
+}
+
+static void zxdh_sleep_ns(unsigned int nanoseconds)
+{
+ struct timespec req;
+
+ req.tv_sec = 0;
+ req.tv_nsec = nanoseconds;
+ nanosleep(&req, NULL);
+}
+
+/**
+ * zxdh_post_receive - post receive wqe
+ * @qp: hw qp ptr
+ * @info: post rq information
+ */
+enum zxdh_status_code zxdh_post_receive(struct zxdh_qp *qp,
+ struct zxdh_post_rq_info *info)
+{
+ __u32 wqe_idx, i, byte_off;
+ __le64 *wqe;
+ struct zxdh_sge *sge;
+
+ if (qp->max_rq_frag_cnt < info->num_sges)
+ return ZXDH_ERR_INVALID_FRAG_COUNT;
+
+ wqe = zxdh_qp_get_next_recv_wqe(qp, &wqe_idx);
+ if (unlikely(!wqe))
+ return ZXDH_ERR_QP_TOOMANY_WRS_POSTED;
+
+ qp->rq_wrid_array[wqe_idx] = info->wr_id;
+
+ for (i = 0, byte_off = ZXDH_QP_FRAG_BYTESIZE; i < info->num_sges; i++) {
+ sge = &info->sg_list[i];
+ set_64bit_val(wqe, byte_off, sge->tag_off);
+ set_64bit_val(wqe, byte_off + 8,
+ FIELD_PREP(ZXDHQPRQ_FRAG_LEN, sge->len) |
+ FIELD_PREP(ZXDHQPRQ_STAG, sge->stag));
+ byte_off += ZXDH_QP_FRAG_BYTESIZE;
+ }
+
+ /**
+ * while info->num_sges < qp->max_rq_frag_cnt, or 0 == info->num_sges
+ * fill next fragment with FRAG_LEN=0, FRAG_STAG=0x00000100,
+ * witch indicates a invalid fragment
+ */
+ if (info->num_sges < qp->max_rq_frag_cnt || 0 == info->num_sges) {
+ set_64bit_val(wqe, byte_off, 0);
+ set_64bit_val(wqe, byte_off + 8,
+ FIELD_PREP(ZXDHQPRQ_FRAG_LEN, 0) |
+ FIELD_PREP(ZXDHQPRQ_STAG, 0x00000100));
+ }
+
+ set_64bit_val(wqe, 0,
+ FIELD_PREP(ZXDHQPRQ_ADDFRAGCNT, info->num_sges) |
+ FIELD_PREP(ZXDHQPRQ_SIGNATURE,
+ qp->rwqe_signature));
+
+ udma_to_device_barrier(); /* make sure WQE is populated before valid bit is set */
+ if (info->num_sges > 3)
+ zxdh_sleep_ns(1000);
+
+ set_64bit_val(wqe, 8, FIELD_PREP(ZXDHQPRQ_VALID, qp->rwqe_polarity));
+
+ return 0;
+}
+
+/**
+ * zxdh_cq_resize - reset the cq buffer info
+ * @cq: cq to resize
+ * @cq_base: new cq buffer addr
+ * @cq_size: number of cqes
+ */
+void zxdh_cq_resize(struct zxdh_cq *cq, void *cq_base, int cq_size)
+{
+ cq->cq_base = cq_base;
+ cq->cq_size = cq_size;
+ ZXDH_RING_INIT(cq->cq_ring, cq->cq_size);
+ cq->polarity = 1;
+}
+
+/**
+ * zxdh_cq_set_resized_cnt - record the count of the resized buffers
+ * @cq: cq to resize
+ * @cq_cnt: the count of the resized cq buffers
+ */
+void zxdh_cq_set_resized_cnt(struct zxdh_cq *cq, __u16 cq_cnt)
+{
+ __u64 temp_val;
+ __u16 sw_cq_sel;
+ __u8 arm_next;
+ __u8 arm_seq_num;
+
+ get_64bit_val(cq->shadow_area, 0, &temp_val);
+
+ sw_cq_sel = (__u16)FIELD_GET(ZXDH_CQ_DBSA_SW_CQ_SELECT, temp_val);
+ sw_cq_sel += cq_cnt;
+
+ arm_seq_num = (__u8)FIELD_GET(ZXDH_CQ_DBSA_ARM_SEQ_NUM, temp_val);
+ arm_next = (__u8)FIELD_GET(ZXDH_CQ_DBSA_ARM_NEXT, temp_val);
+ cq->cqe_rd_cnt = 0;
+
+ temp_val = FIELD_PREP(ZXDH_CQ_DBSA_ARM_SEQ_NUM, arm_seq_num) |
+ FIELD_PREP(ZXDH_CQ_DBSA_SW_CQ_SELECT, sw_cq_sel) |
+ FIELD_PREP(ZXDH_CQ_DBSA_ARM_NEXT, arm_next) |
+ FIELD_PREP(ZXDH_CQ_DBSA_CQEIDX, cq->cqe_rd_cnt);
+
+ set_64bit_val(cq->shadow_area, 0, temp_val);
+}
+
+/**
+ * zxdh_cq_request_notification - cq notification request (door bell)
+ * @cq: hw cq
+ * @cq_notify: notification type
+ */
+void zxdh_cq_request_notification(struct zxdh_cq *cq,
+ enum zxdh_cmpl_notify cq_notify)
+{
+ __u64 temp_val;
+ __u16 sw_cq_sel;
+ __u8 arm_next = 0;
+ __u8 arm_seq_num;
+ __u32 cqe_index;
+ __u32 hdr;
+
+ get_64bit_val(cq->shadow_area, 0, &temp_val);
+ arm_seq_num = (__u8)FIELD_GET(ZXDH_CQ_DBSA_ARM_SEQ_NUM, temp_val);
+ arm_seq_num++;
+ sw_cq_sel = (__u16)FIELD_GET(ZXDH_CQ_DBSA_SW_CQ_SELECT, temp_val);
+ cqe_index = (__u32)FIELD_GET(ZXDH_CQ_DBSA_CQEIDX, temp_val);
+
+ if (cq_notify == ZXDH_CQ_COMPL_SOLICITED)
+ arm_next = 1;
+ temp_val = FIELD_PREP(ZXDH_CQ_DBSA_ARM_SEQ_NUM, arm_seq_num) |
+ FIELD_PREP(ZXDH_CQ_DBSA_SW_CQ_SELECT, sw_cq_sel) |
+ FIELD_PREP(ZXDH_CQ_DBSA_ARM_NEXT, arm_next) |
+ FIELD_PREP(ZXDH_CQ_DBSA_CQEIDX, cqe_index);
+
+ set_64bit_val(cq->shadow_area, 0, temp_val);
+
+ hdr = FIELD_PREP(ZXDH_CQ_ARM_DBSA_VLD, 0) |
+ FIELD_PREP(ZXDH_CQ_ARM_CQ_ID, cq->cq_id);
+
+ udma_to_device_barrier(); /* make sure WQE is populated before valid bit is set */
+
+ db_wr32(hdr, cq->cqe_alloc_db);
+}
+
+static inline void build_comp_status(__u32 cq_type,
+ struct zxdh_cq_poll_info *info)
+{
+ if (!info->error) {
+ info->comp_status = ZXDH_COMPL_STATUS_SUCCESS;
+ if (cq_type == ZXDH_CQE_QTYPE_RQ) {
+ if (info->major_err != ERROR_CODE_VALUE &&
+ info->minor_err != ERROR_CODE_VALUE) {
+ info->comp_status = ZXDH_COMPL_STATUS_UNKNOWN;
+ }
+ }
+ return;
+ }
+ if (info->major_err == ZXDH_RETRY_ACK_MAJOR_ERR &&
+ info->minor_err == ZXDH_RETRY_ACK_MINOR_ERR) {
+ info->comp_status = ZXDH_COMPL_STATUS_RETRY_ACK_ERR;
+ return;
+ }
+ if (info->major_err == ZXDH_RETRY_ACK_MAJOR_ERR &&
+ info->minor_err == ZXDH_TX_WINDOW_QUERY_ITEM_MINOR_ERR) {
+ info->comp_status = ZXDH_COMPL_STATUS_TX_WINDOW_QUERY_ITEM_ERR;
+ return;
+ }
+ info->comp_status = (info->major_err == ZXDH_FLUSH_MAJOR_ERR) ?
+ ZXDH_COMPL_STATUS_FLUSHED :
+ ZXDH_COMPL_STATUS_UNKNOWN;
+}
+
+__le64 *get_current_cqe(struct zxdh_cq *cq)
+{
+ return ZXDH_GET_CURRENT_EXTENDED_CQ_ELEM(cq);
+}
+
+static inline void zxdh_get_cq_poll_info(struct zxdh_qp *qp,
+ struct zxdh_cq_poll_info *info,
+ __u64 qword2, __u64 qword3)
+{
+ __u8 qp_type;
+
+ qp_type = qp->qp_type;
+
+ info->imm_valid = (bool)FIELD_GET(ZXDH_CQ_IMMVALID, qword2);
+ if (info->imm_valid) {
+ info->imm_data = (__u32)FIELD_GET(ZXDH_CQ_IMMDATA, qword3);
+ info->op_type = ZXDH_OP_TYPE_REC_IMM;
+ } else {
+ info->op_type = ZXDH_OP_TYPE_REC;
+ }
+
+ info->bytes_xfered = (__u32)FIELD_GET(ZXDHCQ_PAYLDLEN, qword3);
+
+ if (likely(qp_type == ZXDH_QP_TYPE_ROCE_RC)) {
+ if (qword2 & ZXDHCQ_STAG) {
+ info->stag_invalid_set = true;
+ info->inv_stag =
+ (__u32)FIELD_GET(ZXDHCQ_INVSTAG, qword2);
+ } else {
+ info->stag_invalid_set = false;
+ }
+ } else if (qp_type == ZXDH_QP_TYPE_ROCE_UD) {
+ info->ipv4 = (bool)FIELD_GET(ZXDHCQ_IPV4, qword2);
+ info->ud_src_qpn = (__u32)FIELD_GET(ZXDHCQ_UDSRCQPN, qword2);
+ }
+}
+
+static void update_cq_poll_info(struct zxdh_qp *qp,
+ struct zxdh_cq_poll_info *info, __u32 wqe_idx,
+ __u64 qword0)
+{
+ info->wr_id = qp->sq_wrtrk_array[wqe_idx].wrid;
+ if (!info->comp_status)
+ info->bytes_xfered = qp->sq_wrtrk_array[wqe_idx].wr_len;
+ info->op_type = (__u8)FIELD_GET(ZXDHCQ_OP, qword0);
+ ZXDH_RING_SET_TAIL(qp->sq_ring,
+ wqe_idx + qp->sq_wrtrk_array[wqe_idx].quanta);
+}
+
+static enum zxdh_status_code
+process_tx_window_query_item_err(struct zxdh_qp *qp,
+ struct zxdh_cq_poll_info *info)
+{
+ int ret;
+ struct ibv_qp *ib_qp;
+ struct zxdh_uqp *iwuqp;
+ struct zxdh_rdma_qpc qpc = { 0 };
+
+ iwuqp = container_of(qp, struct zxdh_uqp, qp);
+ ib_qp = &iwuqp->vqp.qp;
+ ret = zxdh_query_qpc(ib_qp, &qpc);
+ if (ret) {
+ verbs_err(verbs_get_ctx(ib_qp->context),
+ "process tx window query item query qpc failed:%d\n",
+ ret);
+ return ZXDH_ERR_RETRY_ACK_ERR;
+ }
+ if (qpc.tx_last_ack_psn != qp->qp_last_ack_qsn)
+ qp->qp_reset_cnt = 0;
+
+ qp->qp_last_ack_qsn = qpc.tx_last_ack_psn;
+ if (qp->qp_reset_cnt >= ZXDH_QP_RETRY_COUNT)
+ return ZXDH_ERR_RETRY_ACK_ERR;
+
+ ret = zxdh_reset_qp(ib_qp, ZXDH_RESET_RETRY_TX_ITEM_FLAG);
+ if (ret) {
+ verbs_err(verbs_get_ctx(ib_qp->context),
+ "process tx window query item reset qp failed:%d\n",
+ ret);
+ return ZXDH_ERR_RETRY_ACK_ERR;
+ }
+ qp->qp_reset_cnt++;
+ return ZXDH_ERR_RETRY_ACK_NOT_EXCEED_ERR;
+}
+
+static enum zxdh_status_code
+process_retry_ack_err(struct zxdh_qp *qp, struct zxdh_cq_poll_info *info)
+{
+ int ret;
+ struct ibv_qp *ib_qp;
+ struct zxdh_uqp *iwuqp;
+ struct zxdh_rdma_qpc qpc = { 0 };
+ struct zxdh_rdma_qpc qpc_req_cmd = { 0 };
+
+ iwuqp = container_of(qp, struct zxdh_uqp, qp);
+
+ ib_qp = &iwuqp->vqp.qp;
+ ret = zxdh_query_qpc(ib_qp, &qpc);
+ if (ret) {
+ verbs_err(verbs_get_ctx(ib_qp->context),
+ "process retry ack query qpc failed:%d\n", ret);
+ return ZXDH_ERR_RETRY_ACK_ERR;
+ }
+ if (!(qpc.retry_cqe_sq_opcode >= ZXDH_RETRY_CQE_SQ_OPCODE_ERR &&
+ (qpc.recv_err_flag == ZXDH_RECV_ERR_FLAG_NAK_RNR_NAK ||
+ qpc.recv_err_flag == ZXDH_RECV_ERR_FLAG_READ_RESP))) {
+ return ZXDH_ERR_RETRY_ACK_ERR;
+ }
+ if (qpc.tx_last_ack_psn != qp->cqe_last_ack_qsn)
+ qp->cqe_retry_cnt = 0;
+
+ qp->cqe_last_ack_qsn = qpc.tx_last_ack_psn;
+ if (qp->cqe_retry_cnt >= ZXDH_QP_RETRY_COUNT)
+ return ZXDH_ERR_RETRY_ACK_ERR;
+
+ memcpy(&qpc_req_cmd, &qpc, sizeof(qpc));
+ qpc_req_cmd.package_err_flag = 0;
+ qpc_req_cmd.ack_err_flag = 0;
+ qpc_req_cmd.err_flag = 0;
+ qpc_req_cmd.retry_cqe_sq_opcode &= ZXDH_RESET_RETRY_CQE_SQ_OPCODE_ERR;
+ qpc_req_cmd.cur_retry_count = qpc.retry_count;
+ ret = zxdh_modify_qpc(ib_qp, &qpc_req_cmd,
+ ZXDH_PACKAGE_ERR_FLAG | ZXDH_ERR_FLAG_SET |
+ ZXDH_RETRY_CQE_SQ_OPCODE |
+ ZXDH_TX_READ_RETRY_FLAG_SET);
+ if (ret) {
+ verbs_err(verbs_get_ctx(ib_qp->context),
+ "process retry ack modify qpc failed:%d\n", ret);
+ return ZXDH_ERR_RETRY_ACK_ERR;
+ }
+ qp->cqe_retry_cnt++;
+ return ZXDH_ERR_RETRY_ACK_NOT_EXCEED_ERR;
+}
+
+/**
+ * zxdh_cq_poll_cmpl - get cq completion info
+ * @cq: hw cq
+ * @info: cq poll information returned
+ */
+enum zxdh_status_code zxdh_cq_poll_cmpl(struct zxdh_cq *cq,
+ struct zxdh_cq_poll_info *info)
+{
+ enum zxdh_status_code status_code;
+ __u64 comp_ctx, qword0, qword2, qword3;
+ __le64 *cqe;
+ struct zxdh_qp *qp;
+ struct zxdh_ring *pring = NULL;
+ __u32 wqe_idx, q_type;
+ int ret_code;
+ bool move_cq_head = true;
+ __u8 polarity;
+ struct zxdh_usrq *iwusrq = NULL;
+ struct zxdh_srq *srq = NULL;
+ struct zxdh_uqp *iwuqp;
+
+ cqe = get_current_cqe(cq);
+
+ get_64bit_val(cqe, 0, &qword0);
+ polarity = (__u8)FIELD_GET(ZXDH_CQ_VALID, qword0);
+ if (polarity != cq->polarity)
+ return ZXDH_ERR_Q_EMPTY;
+
+ /* Ensure CQE contents are read after valid bit is checked */
+ udma_from_device_barrier();
+ get_64bit_val(cqe, 8, &comp_ctx);
+ get_64bit_val(cqe, 16, &qword2);
+ get_64bit_val(cqe, 24, &qword3);
+
+ qp = (struct zxdh_qp *)(unsigned long)comp_ctx;
+ if (unlikely(!qp || qp->destroy_pending)) {
+ ret_code = ZXDH_ERR_Q_DESTROYED;
+ goto exit;
+ }
+ iwuqp = container_of(qp, struct zxdh_uqp, qp);
+ info->qp_handle = (zxdh_qp_handle)(unsigned long)qp;
+ q_type = (__u8)FIELD_GET(ZXDH_CQ_SQ, qword0);
+ info->solicited_event = (bool)FIELD_GET(ZXDHCQ_SOEVENT, qword0);
+ wqe_idx = (__u32)FIELD_GET(ZXDH_CQ_WQEIDX, qword0);
+ info->error = (bool)FIELD_GET(ZXDH_CQ_ERROR, qword0);
+ info->major_err = FIELD_GET(ZXDH_CQ_MAJERR, qword0);
+ info->minor_err = FIELD_GET(ZXDH_CQ_MINERR, qword0);
+
+ /* Set the min error to standard flush error code for remaining cqes */
+ if (unlikely(info->error && info->major_err == ZXDH_FLUSH_MAJOR_ERR &&
+ info->minor_err != FLUSH_GENERAL_ERR)) {
+ qword0 &= ~ZXDH_CQ_MINERR;
+ qword0 |= FIELD_PREP(ZXDH_CQ_MINERR, FLUSH_GENERAL_ERR);
+ set_64bit_val(cqe, 0, qword0);
+ }
+ build_comp_status(q_type, info);
+
+ info->qp_id = (__u32)FIELD_GET(ZXDHCQ_QPID, qword2);
+ info->imm_valid = false;
+
+ info->qp_handle = (zxdh_qp_handle)(unsigned long)qp;
+ switch (q_type) {
+ case ZXDH_CQE_QTYPE_RQ:
+ if (qp->is_srq) {
+ iwusrq = iwuqp->srq;
+ srq = &iwusrq->srq;
+ zxdh_free_srq_wqe(srq, wqe_idx);
+ info->wr_id = srq->srq_wrid_array[wqe_idx];
+ zxdh_get_cq_poll_info(qp, info, qword2, qword3);
+ } else {
+ if (unlikely(info->comp_status ==
+ ZXDH_COMPL_STATUS_FLUSHED ||
+ info->comp_status ==
+ ZXDH_COMPL_STATUS_UNKNOWN)) {
+ if (!ZXDH_RING_MORE_WORK(qp->rq_ring)) {
+ ret_code = ZXDH_ERR_Q_EMPTY;
+ goto exit;
+ }
+ wqe_idx = qp->rq_ring.tail;
+ }
+ info->wr_id = qp->rq_wrid_array[wqe_idx];
+ zxdh_get_cq_poll_info(qp, info, qword2, qword3);
+ ZXDH_RING_SET_TAIL(qp->rq_ring, wqe_idx + 1);
+ if (info->comp_status == ZXDH_COMPL_STATUS_FLUSHED) {
+ qp->rq_flush_seen = true;
+ if (!ZXDH_RING_MORE_WORK(qp->rq_ring))
+ qp->rq_flush_complete = true;
+ else
+ move_cq_head = false;
+ }
+ pring = &qp->rq_ring;
+ }
+ ret_code = ZXDH_SUCCESS;
+ break;
+ case ZXDH_CQE_QTYPE_SQ:
+ if (info->comp_status == ZXDH_COMPL_STATUS_RETRY_ACK_ERR &&
+ qp->qp_type == ZXDH_QP_TYPE_ROCE_RC) {
+ status_code = process_retry_ack_err(qp, info);
+ if (status_code == ZXDH_ERR_RETRY_ACK_ERR) {
+ update_cq_poll_info(qp, info, wqe_idx, qword0);
+ ret_code = ZXDH_SUCCESS;
+ } else {
+ ret_code = status_code;
+ }
+ } else if (info->comp_status ==
+ ZXDH_COMPL_STATUS_TX_WINDOW_QUERY_ITEM_ERR &&
+ qp->qp_type == ZXDH_QP_TYPE_ROCE_RC) {
+ status_code =
+ process_tx_window_query_item_err(qp, info);
+ if (status_code == ZXDH_ERR_RETRY_ACK_ERR) {
+ update_cq_poll_info(qp, info, wqe_idx, qword0);
+ ret_code = ZXDH_SUCCESS;
+ } else {
+ ret_code = status_code;
+ }
+ } else if (info->comp_status == ZXDH_COMPL_STATUS_FLUSHED) {
+ info->wr_id = qp->sq_wrtrk_array[wqe_idx].wrid;
+ ZXDH_RING_INIT(qp->sq_ring, qp->sq_ring.size);
+ ret_code = ZXDH_SUCCESS;
+ } else {
+ update_cq_poll_info(qp, info, wqe_idx, qword0);
+ ret_code = ZXDH_SUCCESS;
+ }
+ break;
+ default:
+ zxdh_dbg(verbs_get_ctx(iwuqp->vqp.qp.context), ZXDH_DBG_CQ,
+ "zxdh get cqe type unknow!\n");
+ ret_code = ZXDH_ERR_Q_DESTROYED;
+ break;
+ }
+exit:
+ if (move_cq_head) {
+ __u64 cq_shadow_temp;
+
+ ZXDH_RING_MOVE_HEAD_NOCHECK(cq->cq_ring);
+ if (!ZXDH_RING_CURRENT_HEAD(cq->cq_ring))
+ cq->polarity ^= 1;
+
+ ZXDH_RING_MOVE_TAIL(cq->cq_ring);
+ cq->cqe_rd_cnt++;
+ get_64bit_val(cq->shadow_area, 0, &cq_shadow_temp);
+ cq_shadow_temp &= ~ZXDH_CQ_DBSA_CQEIDX;
+ cq_shadow_temp |=
+ FIELD_PREP(ZXDH_CQ_DBSA_CQEIDX, cq->cqe_rd_cnt);
+ set_64bit_val(cq->shadow_area, 0, cq_shadow_temp);
+ } else {
+ qword0 &= ~ZXDH_CQ_WQEIDX;
+ qword0 |= FIELD_PREP(ZXDH_CQ_WQEIDX, pring->tail);
+ set_64bit_val(cqe, 0, qword0);
+ }
+
+ return ret_code;
+}
+
+/**
+ * zxdh_qp_round_up - return round up qp wq depth
+ * @wqdepth: wq depth in quanta to round up
+ */
+int zxdh_qp_round_up(__u32 wqdepth)
+{
+ int scount = 1;
+
+ for (wqdepth--; scount <= 16; scount *= 2)
+ wqdepth |= wqdepth >> scount;
+
+ return ++wqdepth;
+}
+
+/**
+ * zxdh_cq_round_up - return round up cq wq depth
+ * @wqdepth: wq depth in quanta to round up
+ */
+int zxdh_cq_round_up(__u32 wqdepth)
+{
+ int scount = 1;
+
+ for (wqdepth--; scount <= 16; scount *= 2)
+ wqdepth |= wqdepth >> scount;
+
+ return ++wqdepth;
+}
+
+/**
+ * zxdh_get_rq_wqe_shift - get shift count for maximum rq wqe size
+ * @sge: Maximum Scatter Gather Elements wqe
+ * @shift: Returns the shift needed based on sge
+ *
+ * Shift can be used to left shift the rq wqe size based on number of SGEs.
+ * For 1 SGE, shift = 1 (wqe size of 2*16 bytes).
+ * For 2 or 3 SGEs, shift = 2 (wqe size of 4*16 bytes).
+ * For 4-7 SGE's Shift of 3.
+ * For 8-15 SGE's Shift of 4 otherwise (wqe size of 512 bytes).
+ */
+void zxdh_get_rq_wqe_shift(__u32 sge, __u8 *shift)
+{
+ *shift = 0; //16bytes RQE, need to confirm configuration
+ if (sge < 2)
+ *shift = 1;
+ else if (sge < 4)
+ *shift = 2;
+ else if (sge < 8)
+ *shift = 3;
+ else if (sge < 16)
+ *shift = 4;
+ else
+ *shift = 5;
+}
+
+/**
+ * zxdh_get_sq_wqe_shift - get shift count for maximum wqe size
+ * @sge: Maximum Scatter Gather Elements wqe
+ * @inline_data: Maximum inline data size
+ * @shift: Returns the shift needed based on sge
+ *
+ * Shift can be used to left shift the wqe size based on number of SGEs and inlind data size.
+ * To surport WR with imm_data,shift = 1 (wqe size of 2*32 bytes).
+ * For 2-7 SGEs or 24 < inline data <= 86, shift = 2 (wqe size of 4*32 bytes).
+ * Otherwise (wqe size of 256 bytes).
+ */
+void zxdh_get_sq_wqe_shift(__u32 sge, __u32 inline_data, __u8 *shift)
+{
+ *shift = 1;
+
+ if (sge > 1 || inline_data > 24) {
+ if (sge < 8 && inline_data <= 86)
+ *shift = 2;
+ else
+ *shift = 3;
+ }
+}
+
+/*
+ * zxdh_get_sqdepth - get SQ depth (quanta)
+ * @dev_attrs: qp HW attributes
+ * @sq_size: SQ size
+ * @shift: shift which determines size of WQE
+ * @sqdepth: depth of SQ
+ *
+ */
+enum zxdh_status_code zxdh_get_sqdepth(struct zxdh_dev_attrs *dev_attrs,
+ __u32 sq_size, __u8 shift,
+ __u32 *sqdepth)
+{
+ if (sq_size > ZXDH_MAX_SQ_DEPTH)
+ return ZXDH_ERR_INVALID_SIZE;
+
+ *sqdepth = zxdh_qp_round_up((sq_size << shift) + ZXDH_SQ_RSVD);
+
+ if (*sqdepth < (ZXDH_QP_SW_MIN_WQSIZE << shift))
+ *sqdepth = ZXDH_QP_SW_MIN_WQSIZE << shift;
+ else if (*sqdepth > dev_attrs->max_hw_wq_quanta)
+ return ZXDH_ERR_INVALID_SIZE;
+
+ return 0;
+}
+
+/*
+ * zxdh_get_rqdepth - get RQ depth (quanta)
+ * @dev_attrs: qp HW attributes
+ * @rq_size: RQ size
+ * @shift: shift which determines size of WQE
+ * @rqdepth: depth of RQ
+ */
+enum zxdh_status_code zxdh_get_rqdepth(struct zxdh_dev_attrs *dev_attrs,
+ __u32 rq_size, __u8 shift,
+ __u32 *rqdepth)
+{
+ *rqdepth = zxdh_qp_round_up((rq_size << shift) + ZXDH_RQ_RSVD);
+
+ if (*rqdepth < (ZXDH_QP_SW_MIN_WQSIZE << shift))
+ *rqdepth = ZXDH_QP_SW_MIN_WQSIZE << shift;
+ else if (*rqdepth > dev_attrs->max_hw_rq_quanta)
+ return ZXDH_ERR_INVALID_SIZE;
+
+ return 0;
+}
+
+static const struct zxdh_wqe_ops iw_wqe_ops = {
+ .iw_copy_inline_data = zxdh_copy_inline_data,
+ .iw_inline_data_size_to_quanta = zxdh_inline_data_size_to_quanta,
+ .iw_set_fragment = zxdh_set_fragment,
+ .iw_set_mw_bind_wqe = zxdh_set_mw_bind_wqe,
+};
+
+/**
+ * zxdh_qp_init - initialize shared qp
+ * @qp: hw qp (user and kernel)
+ * @info: qp initialization info
+ *
+ * initializes the vars used in both user and kernel mode.
+ * size of the wqe depends on numbers of max. fragements
+ * allowed. Then size of wqe * the number of wqes should be the
+ * amount of memory allocated for sq and rq.
+ */
+enum zxdh_status_code zxdh_qp_init(struct zxdh_qp *qp,
+ struct zxdh_qp_init_info *info)
+{
+ enum zxdh_status_code ret_code = 0;
+ __u32 sq_ring_size;
+ __u8 sqshift, rqshift;
+
+ qp->dev_attrs = info->dev_attrs;
+ if (info->max_sq_frag_cnt > qp->dev_attrs->max_hw_wq_frags ||
+ info->max_rq_frag_cnt > qp->dev_attrs->max_hw_wq_frags)
+ return ZXDH_ERR_INVALID_FRAG_COUNT;
+
+ zxdh_get_rq_wqe_shift(info->max_rq_frag_cnt, &rqshift);
+ zxdh_get_sq_wqe_shift(info->max_sq_frag_cnt, info->max_inline_data,
+ &sqshift);
+
+ qp->qp_caps = info->qp_caps;
+ qp->sq_base = info->sq;
+ qp->rq_base = info->rq;
+ qp->qp_type = info->type;
+ qp->shadow_area = info->shadow_area;
+ set_64bit_val(qp->shadow_area, 0, 0x8000);
+ qp->sq_wrtrk_array = info->sq_wrtrk_array;
+
+ qp->rq_wrid_array = info->rq_wrid_array;
+ qp->wqe_alloc_db = info->wqe_alloc_db;
+ qp->qp_id = info->qp_id;
+ qp->sq_size = info->sq_size;
+ qp->push_mode = false;
+ qp->max_sq_frag_cnt = info->max_sq_frag_cnt;
+ sq_ring_size = qp->sq_size << sqshift;
+ ZXDH_RING_INIT(qp->sq_ring, sq_ring_size);
+ ZXDH_RING_INIT(qp->initial_ring, sq_ring_size);
+ qp->swqe_polarity = 0;
+ qp->swqe_polarity_deferred = 1;
+ qp->rwqe_polarity = 0;
+ qp->rwqe_signature = 0;
+ qp->rq_size = info->rq_size;
+ qp->max_rq_frag_cnt = info->max_rq_frag_cnt;
+ qp->max_inline_data = (info->max_inline_data == 0) ?
+ ZXDH_MAX_INLINE_DATA_SIZE :
+ info->max_inline_data;
+ qp->rq_wqe_size = rqshift;
+ ZXDH_RING_INIT(qp->rq_ring, qp->rq_size);
+ qp->rq_wqe_size_multiplier = 1 << rqshift;
+ qp->wqe_ops = iw_wqe_ops;
+ return ret_code;
+}
+
+/**
+ * zxdh_cq_init - initialize shared cq (user and kernel)
+ * @cq: hw cq
+ * @info: hw cq initialization info
+ */
+enum zxdh_status_code zxdh_cq_init(struct zxdh_cq *cq,
+ struct zxdh_cq_init_info *info)
+{
+ cq->cq_base = info->cq_base;
+ cq->cq_id = info->cq_id;
+ cq->cq_size = info->cq_size;
+ cq->cqe_alloc_db = info->cqe_alloc_db;
+ cq->cq_ack_db = info->cq_ack_db;
+ cq->shadow_area = info->shadow_area;
+ cq->cqe_size = info->cqe_size;
+ ZXDH_RING_INIT(cq->cq_ring, cq->cq_size);
+ cq->polarity = 1;
+ cq->cqe_rd_cnt = 0;
+
+ return 0;
+}
+
+/**
+ * zxdh_clean_cq - clean cq entries
+ * @q: completion context
+ * @cq: cq to clean
+ */
+void zxdh_clean_cq(void *q, struct zxdh_cq *cq)
+{
+ __le64 *cqe;
+ __u64 qword3, comp_ctx;
+ __u32 cq_head;
+ __u8 polarity, temp;
+
+ cq_head = cq->cq_ring.head;
+ temp = cq->polarity;
+ do {
+ if (cq->cqe_size)
+ cqe = ((struct zxdh_extended_cqe
+ *)(cq->cq_base))[cq_head]
+ .buf;
+ else
+ cqe = cq->cq_base[cq_head].buf;
+ get_64bit_val(cqe, 24, &qword3);
+ polarity = (__u8)FIELD_GET(ZXDH_CQ_VALID, qword3);
+
+ if (polarity != temp)
+ break;
+
+ get_64bit_val(cqe, 8, &comp_ctx);
+ if ((void *)(uintptr_t)comp_ctx == q)
+ set_64bit_val(cqe, 8, 0);
+
+ cq_head = (cq_head + 1) % cq->cq_ring.size;
+ if (!cq_head)
+ temp ^= 1;
+ } while (true);
+}
+
+/**
+ * zxdh_nop - post a nop
+ * @qp: hw qp ptr
+ * @wr_id: work request id
+ * @signaled: signaled for completion
+ * @post_sq: ring doorbell
+ */
+enum zxdh_status_code zxdh_nop(struct zxdh_qp *qp, __u64 wr_id, bool signaled,
+ bool post_sq)
+{
+ __le64 *wqe;
+ __u64 hdr;
+ __u32 wqe_idx;
+ struct zxdh_post_sq_info info = {};
+
+ info.push_wqe = false;
+ info.wr_id = wr_id;
+ wqe = zxdh_qp_get_next_send_wqe(qp, &wqe_idx, ZXDH_QP_WQE_MIN_QUANTA, 0,
+ &info);
+ if (!wqe)
+ return ZXDH_ERR_QP_TOOMANY_WRS_POSTED;
+
+ zxdh_clr_wqes(qp, wqe_idx);
+
+ set_64bit_val(wqe, 0, 0);
+ set_64bit_val(wqe, 8, 0);
+ set_64bit_val(wqe, 16, 0);
+
+ hdr = FIELD_PREP(ZXDHQPSQ_OPCODE, ZXDHQP_OP_NOP) |
+ FIELD_PREP(ZXDHQPSQ_SIGCOMPL, signaled) |
+ FIELD_PREP(ZXDHQPSQ_VALID, qp->swqe_polarity);
+
+ udma_to_device_barrier(); /* make sure WQE is populated before valid bit is set */
+
+ set_64bit_val(wqe, 24, hdr);
+ if (post_sq)
+ zxdh_qp_post_wr(qp);
+
+ return 0;
+}
+
+/**
+ * zxdh_fragcnt_to_quanta_sq - calculate quanta based on fragment count for SQ
+ * @frag_cnt: number of fragments
+ * @quanta: quanta for frag_cnt
+ */
+enum zxdh_status_code zxdh_fragcnt_to_quanta_sq(__u32 frag_cnt, __u16 *quanta)
+{
+ if (frag_cnt > ZXDH_MAX_SQ_FRAG)
+ return ZXDH_ERR_INVALID_FRAG_COUNT;
+ *quanta = frag_cnt / 2 + 1;
+ return 0;
+}
+
+/**
+ * zxdh_fragcnt_to_wqesize_rq - calculate wqe size based on fragment count for RQ
+ * @frag_cnt: number of fragments
+ * @wqe_size: size in bytes given frag_cnt
+ */
+enum zxdh_status_code zxdh_fragcnt_to_wqesize_rq(__u32 frag_cnt,
+ __u16 *wqe_size)
+{
+ switch (frag_cnt) {
+ case 0:
+ case 1:
+ *wqe_size = 32;
+ break;
+ case 2:
+ case 3:
+ *wqe_size = 64;
+ break;
+ case 4:
+ case 5:
+ case 6:
+ case 7:
+ *wqe_size = 128;
+ break;
+ case 8:
+ case 9:
+ case 10:
+ case 11:
+ case 12:
+ case 13:
+ case 14:
+ *wqe_size = 256;
+ break;
+ default:
+ return ZXDH_ERR_INVALID_FRAG_COUNT;
+ }
+
+ return 0;
+}
+
+/**
+ * zxdh_get_srq_wqe_shift - get shift count for maximum srq wqe size
+ * @dev_attrs: srq HW attributes
+ * @sge: Maximum Scatter Gather Elements wqe
+ * @shift: Returns the shift needed based on sge
+ *
+ * Shift can be used to left shift the srq wqe size based on number of SGEs.
+ * For 1 SGE, shift = 1 (wqe size of 2*16 bytes).
+ * For 2 or 3 SGEs, shift = 2 (wqe size of 4*16 bytes).
+ * For 4-7 SGE's Shift of 3.
+ * For 8-15 SGE's Shift of 4 otherwise (wqe size of 512 bytes).
+ */
+void zxdh_get_srq_wqe_shift(struct zxdh_dev_attrs *dev_attrs, __u32 sge,
+ __u8 *shift)
+{
+ *shift = 0; //16bytes RQE, need to confirm configuration
+ if (sge < 2)
+ *shift = 1;
+ else if (sge < 4)
+ *shift = 2;
+ else if (sge < 8)
+ *shift = 3;
+ else if (sge < 16)
+ *shift = 4;
+ else
+ *shift = 5;
+}
+
+/*
+ * zxdh_get_srqdepth - get SRQ depth (quanta)
+ * @max_hw_rq_quanta: HW SRQ size limit
+ * @srq_size: SRQ size
+ * @shift: shift which determines size of WQE
+ * @srqdepth: depth of SRQ
+ */
+int zxdh_get_srqdepth(__u32 max_hw_srq_quanta, __u32 srq_size, __u8 shift,
+ __u32 *srqdepth)
+{
+ *srqdepth = zxdh_qp_round_up((srq_size << shift) + ZXDH_SRQ_RSVD);
+
+ if (*srqdepth < (ZXDH_QP_SW_MIN_WQSIZE << shift))
+ *srqdepth = ZXDH_QP_SW_MIN_WQSIZE << shift;
+ else if ((*srqdepth >> shift) > max_hw_srq_quanta)
+ return ZXDH_ERR_INVALID_SIZE;
+
+ return 0;
+}
+
+__le64 *zxdh_get_srq_wqe(struct zxdh_srq *srq, int wqe_index)
+{
+ __le64 *wqe;
+
+ wqe = srq->srq_base[wqe_index * srq->srq_wqe_size_multiplier].elem;
+ return wqe;
+}
+
+__le16 *zxdh_get_srq_list_wqe(struct zxdh_srq *srq, __u16 *idx)
+{
+ __le16 *wqe;
+ __u16 wqe_idx;
+
+ wqe_idx = srq->srq_list_ring.tail;
+ srq->srq_list_ring.tail++;
+ srq->srq_list_ring.tail %= srq->srq_list_ring.size;
+ *idx = srq->srq_list_ring.tail;
+
+ if (!(*idx))
+ srq->srq_list_polarity = !srq->srq_list_polarity;
+
+ wqe = &srq->srq_list_base[wqe_idx];
+
+ return wqe;
+}
+
+/**
+ * zxdh_srq_init - initialize srq
+ * @srq: hw srq (user and kernel)
+ * @info: srq initialization info
+ *
+ * initializes the vars used in both user and kernel mode.
+ * size of the wqe depends on numbers of max. fragements
+ * allowed. Then size of wqe * the number of wqes should be the
+ * amount of memory allocated for srq.
+ */
+enum zxdh_status_code zxdh_srq_init(struct zxdh_srq *srq,
+ struct zxdh_srq_init_info *info)
+{
+ __u32 srq_ring_size;
+ __u8 srqshift;
+
+ srq->dev_attrs = info->dev_attrs;
+ if (info->max_srq_frag_cnt > srq->dev_attrs->max_hw_wq_frags)
+ return -ZXDH_ERR_INVALID_FRAG_COUNT;
+ zxdh_get_srq_wqe_shift(srq->dev_attrs, info->max_srq_frag_cnt,
+ &srqshift);
+ srq->srq_base = info->srq_base;
+ srq->srq_list_base = info->srq_list_base;
+ srq->srq_db_base = info->srq_db_base;
+ srq->srq_wrid_array = info->srq_wrid_array;
+ srq->srq_id = info->srq_id;
+ srq->srq_size = info->srq_size;
+ srq->log2_srq_size = info->log2_srq_size;
+ srq->srq_list_size = info->srq_list_size;
+ srq->max_srq_frag_cnt = info->max_srq_frag_cnt;
+ srq_ring_size = srq->srq_size;
+ srq->srq_wqe_size = srqshift;
+ srq->srq_wqe_size_multiplier = 1 << srqshift;
+ ZXDH_RING_INIT(srq->srq_ring, srq_ring_size);
+ ZXDH_RING_INIT(srq->srq_list_ring, srq->srq_list_size);
+ srq->srq_ring.tail = srq->srq_size - 1;
+ srq->srq_list_polarity = 1;
+ return 0;
+}
+
+void zxdh_free_srq_wqe(struct zxdh_srq *srq, int wqe_index)
+{
+ struct zxdh_usrq *iwusrq;
+ __le64 *wqe;
+ __u64 hdr;
+
+ iwusrq = container_of(srq, struct zxdh_usrq, srq);
+ /* always called with interrupts disabled. */
+ pthread_spin_lock(&iwusrq->lock);
+ wqe = zxdh_get_srq_wqe(srq, srq->srq_ring.tail);
+ srq->srq_ring.tail = wqe_index;
+ hdr = FIELD_PREP(ZXDHQPSRQ_NEXT_WQE_INDEX, wqe_index);
+
+ udma_to_device_barrier(); /* make sure WQE is populated before valid bit is set */
+ set_64bit_val(wqe, 0, hdr);
+
+ pthread_spin_unlock(&iwusrq->lock);
+}
diff --git a/providers/zrdma/zxdh_status.h b/providers/zrdma/zxdh_status.h
new file mode 100644
index 0000000..d9e9f04
--- /dev/null
+++ b/providers/zrdma/zxdh_status.h
@@ -0,0 +1,75 @@
+/* SPDX-License-Identifier: GPL-2.0 or Linux-OpenIB */
+/* Copyright (c) 2024 ZTE Corporation. All rights reserved. */
+#ifndef ZXDH_STATUS_H
+#define ZXDH_STATUS_H
+
+/* Error Codes */
+enum zxdh_status_code {
+ ZXDH_SUCCESS = 0,
+ ZXDH_ERR_NVM = -1,
+ ZXDH_ERR_NVM_CHECKSUM = -2,
+ ZXDH_ERR_CFG = -4,
+ ZXDH_ERR_PARAM = -5,
+ ZXDH_ERR_DEVICE_NOT_SUPPORTED = -6,
+ ZXDH_ERR_RESET_FAILED = -7,
+ ZXDH_ERR_SWFW_SYNC = -8,
+ ZXDH_ERR_NO_MEMORY = -9,
+ ZXDH_ERR_BAD_PTR = -10,
+ ZXDH_ERR_INVALID_PD_ID = -11,
+ ZXDH_ERR_INVALID_QP_ID = -12,
+ ZXDH_ERR_INVALID_CQ_ID = -13,
+ ZXDH_ERR_INVALID_CEQ_ID = -14,
+ ZXDH_ERR_INVALID_AEQ_ID = -15,
+ ZXDH_ERR_INVALID_SIZE = -16,
+ ZXDH_ERR_INVALID_ARP_INDEX = -17,
+ ZXDH_ERR_INVALID_FPM_FUNC_ID = -18,
+ ZXDH_ERR_QP_INVALID_MSG_SIZE = -19,
+ ZXDH_ERR_QP_TOOMANY_WRS_POSTED = -20,
+ ZXDH_ERR_INVALID_FRAG_COUNT = -21,
+ ZXDH_ERR_Q_EMPTY = -22,
+ ZXDH_ERR_INVALID_ALIGNMENT = -23,
+ ZXDH_ERR_FLUSHED_Q = -24,
+ ZXDH_ERR_INVALID_PUSH_PAGE_INDEX = -25,
+ ZXDH_ERR_INVALID_INLINE_DATA_SIZE = -26,
+ ZXDH_ERR_TIMEOUT = -27,
+ ZXDH_ERR_OPCODE_MISMATCH = -28,
+ ZXDH_ERR_CQP_COMPL_ERROR = -29,
+ ZXDH_ERR_INVALID_VF_ID = -30,
+ ZXDH_ERR_INVALID_HMCFN_ID = -31,
+ ZXDH_ERR_BACKING_PAGE_ERROR = -32,
+ ZXDH_ERR_NO_PBLCHUNKS_AVAILABLE = -33,
+ ZXDH_ERR_INVALID_PBLE_INDEX = -34,
+ ZXDH_ERR_INVALID_SD_INDEX = -35,
+ ZXDH_ERR_INVALID_PAGE_DESC_INDEX = -36,
+ ZXDH_ERR_INVALID_SD_TYPE = -37,
+ ZXDH_ERR_MEMCPY_FAILED = -38,
+ ZXDH_ERR_INVALID_HMC_OBJ_INDEX = -39,
+ ZXDH_ERR_INVALID_HMC_OBJ_COUNT = -40,
+ ZXDH_ERR_BUF_TOO_SHORT = -43,
+ ZXDH_ERR_BAD_IWARP_CQE = -44,
+ ZXDH_ERR_NVM_BLANK_MODE = -45,
+ ZXDH_ERR_NOT_IMPL = -46,
+ ZXDH_ERR_PE_DOORBELL_NOT_ENA = -47,
+ ZXDH_ERR_NOT_READY = -48,
+ ZXDH_NOT_SUPPORTED = -49,
+ ZXDH_ERR_FIRMWARE_API_VER = -50,
+ ZXDH_ERR_RING_FULL = -51,
+ ZXDH_ERR_MPA_CRC = -61,
+ ZXDH_ERR_NO_TXBUFS = -62,
+ ZXDH_ERR_SEQ_NUM = -63,
+ ZXDH_ERR_LIST_EMPTY = -64,
+ ZXDH_ERR_INVALID_MAC_ADDR = -65,
+ ZXDH_ERR_BAD_STAG = -66,
+ ZXDH_ERR_CQ_COMPL_ERROR = -67,
+ ZXDH_ERR_Q_DESTROYED = -68,
+ ZXDH_ERR_INVALID_FEAT_CNT = -69,
+ ZXDH_ERR_REG_CQ_FULL = -70,
+ ZXDH_ERR_VF_MSG_ERROR = -71,
+ ZXDH_ERR_NO_INTR = -72,
+ ZXDH_ERR_REG_QSET = -73,
+ ZXDH_ERR_FEATURES_OP = -74,
+ ZXDH_ERR_INVALID_FRAG_LEN = -75,
+ ZXDH_ERR_RETRY_ACK_ERR = -76,
+ ZXDH_ERR_RETRY_ACK_NOT_EXCEED_ERR = -77,
+};
+#endif /* ZXDH_STATUS_H */
diff --git a/providers/zrdma/zxdh_verbs.c b/providers/zrdma/zxdh_verbs.c
new file mode 100644
index 0000000..93cf705
--- /dev/null
+++ b/providers/zrdma/zxdh_verbs.c
@@ -0,0 +1,3185 @@
+// SPDX-License-Identifier: GPL-2.0 or Linux-OpenIB
+/* Copyright (c) 2024 ZTE Corporation. All rights reserved. */
+#include <config.h>
+#include <stdlib.h>
+#include <stdio.h>
+#include <string.h>
+#include <unistd.h>
+#include <signal.h>
+#include <errno.h>
+#include <sys/param.h>
+#include <sys/mman.h>
+#include <netinet/in.h>
+#include <sys/stat.h>
+#include <fcntl.h>
+#include <malloc.h>
+#include <linux/if_ether.h>
+#include <infiniband/driver.h>
+#include <ccan/container_of.h>
+
+#include "main.h"
+#include "zxdh_abi.h"
+
+uint32_t zxdh_debug_mask;
+
+static const unsigned int zxdh_roce_mtu[] = {
+ [IBV_MTU_256] = 256, [IBV_MTU_512] = 512, [IBV_MTU_1024] = 1024,
+ [IBV_MTU_2048] = 2048, [IBV_MTU_4096] = 4096,
+};
+
+static inline unsigned int mtu_enum_to_int(enum ibv_mtu mtu)
+{
+ return zxdh_roce_mtu[mtu];
+}
+
+static inline void print_fw_ver(uint64_t fw_ver, char *str, size_t len)
+{
+ uint16_t major, minor, sub_minor, sub_major;
+
+ major = (fw_ver >> 48) & 0xffff;
+ sub_major = (fw_ver >> 32) & 0xffff;
+ minor = (fw_ver >> 16) & 0xffff;
+ sub_minor = fw_ver & 0xffff;
+ snprintf(str, len, "%d.%02d.%02d.%02d", major, sub_major, minor,
+ sub_minor);
+}
+
+/**
+ * zxdh_get_inline_data - get inline_multi_sge data
+ * @inline_data: uint8_t*
+ * @ib_wr: work request ptr
+ * @len: sge total length
+ */
+static int zxdh_get_inline_data(uint8_t *inline_data, struct ibv_send_wr *ib_wr,
+ __u32 *len)
+{
+ int num = 0;
+ int offset = 0;
+
+ while (num < ib_wr->num_sge) {
+ *len += ib_wr->sg_list[num].length;
+ if (*len > ZXDH_MAX_INLINE_DATA_SIZE) {
+ return -EINVAL;
+ }
+ memcpy(inline_data + offset,
+ (void *)(uintptr_t)ib_wr->sg_list[num].addr,
+ ib_wr->sg_list[num].length);
+ offset += ib_wr->sg_list[num].length;
+ num++;
+ }
+ return 0;
+}
+
+/**
+ * zxdh_uquery_device_ex - query device attributes including extended properties
+ * @context: user context for the device
+ * @input: extensible input struct for ibv_query_device_ex verb
+ * @attr: extended device attribute struct
+ * @attr_size: size of extended device attribute struct
+ **/
+int zxdh_uquery_device_ex(struct ibv_context *context,
+ const struct ibv_query_device_ex_input *input,
+ struct ibv_device_attr_ex *attr, size_t attr_size)
+{
+ struct ib_uverbs_ex_query_device_resp resp = {};
+ size_t resp_size = sizeof(resp);
+ int ret;
+
+ ret = ibv_cmd_query_device_any(context, input, attr, attr_size, &resp,
+ &resp_size);
+ if (ret)
+ return ret;
+
+ print_fw_ver(resp.base.fw_ver, attr->orig_attr.fw_ver,
+ sizeof(attr->orig_attr.fw_ver));
+
+ return 0;
+}
+
+/**
+ * zxdh_uquery_port - get port attributes (msg size, lnk, mtu...)
+ * @context: user context of the device
+ * @port: port for the attributes
+ * @attr: to return port attributes
+ **/
+int zxdh_uquery_port(struct ibv_context *context, uint8_t port,
+ struct ibv_port_attr *attr)
+{
+ struct ibv_query_port cmd;
+
+ return ibv_cmd_query_port(context, port, attr, &cmd, sizeof(cmd));
+}
+
+/**
+ * zxdh_ualloc_pd - allocates protection domain and return pd ptr
+ * @context: user context of the device
+ **/
+struct ibv_pd *zxdh_ualloc_pd(struct ibv_context *context)
+{
+ struct ibv_alloc_pd cmd;
+ struct zxdh_ualloc_pd_resp resp = {};
+ struct zxdh_upd *iwupd;
+ int err;
+
+ iwupd = malloc(sizeof(*iwupd));
+ if (!iwupd)
+ return NULL;
+
+ err = ibv_cmd_alloc_pd(context, &iwupd->ibv_pd, &cmd, sizeof(cmd),
+ &resp.ibv_resp, sizeof(resp));
+ if (err)
+ goto err_free;
+
+ iwupd->pd_id = resp.pd_id;
+
+ return &iwupd->ibv_pd;
+
+err_free:
+ free(iwupd);
+ errno = err;
+ return NULL;
+}
+
+/**
+ * zxdh_ufree_pd - free pd resources
+ * @pd: pd to free resources
+ */
+int zxdh_ufree_pd(struct ibv_pd *pd)
+{
+ struct zxdh_upd *iwupd;
+ int ret;
+
+ iwupd = container_of(pd, struct zxdh_upd, ibv_pd);
+ ret = ibv_cmd_dealloc_pd(pd);
+ if (ret)
+ return ret;
+
+ free(iwupd);
+
+ return 0;
+}
+
+/**
+ * zxdh_ureg_mr - register user memory region
+ * @pd: pd for the mr
+ * @addr: user address of the memory region
+ * @length: length of the memory
+ * @hca_va: hca_va
+ * @access: access allowed on this mr
+ */
+struct ibv_mr *zxdh_ureg_mr(struct ibv_pd *pd, void *addr, size_t length,
+ uint64_t hca_va, int access)
+{
+ struct zxdh_umr *umr;
+ struct zxdh_ureg_mr cmd;
+ struct zxdh_ureg_mr_resp resp = {};
+ int err;
+
+ umr = malloc(sizeof(*umr));
+ if (!umr)
+ return NULL;
+
+ cmd.reg_type = ZXDH_MEMREG_TYPE_MEM;
+ err = ibv_cmd_reg_mr(pd, addr, length, hca_va, access, &umr->vmr,
+ &cmd.ibv_cmd, sizeof(cmd), &resp.ibv_resp,
+ sizeof(resp));
+ if (err) {
+ free(umr);
+ errno = err;
+ return NULL;
+ }
+ umr->acc_flags = access;
+ umr->host_page_size = resp.host_page_size;
+ umr->leaf_pbl_size = resp.leaf_pbl_size;
+ umr->mr_pa_pble_index = resp.mr_pa_hig;
+ umr->mr_pa_pble_index = (umr->mr_pa_pble_index << 32) | resp.mr_pa_low;
+
+ return &umr->vmr.ibv_mr;
+}
+
+/*
+ * zxdh_urereg_mr - re-register memory region
+ * @vmr: mr that was allocated
+ * @flags: bit mask to indicate which of the attr's of MR modified
+ * @pd: pd of the mr
+ * @addr: user address of the memory region
+ * @length: length of the memory
+ * @access: access allowed on this mr
+ */
+int zxdh_urereg_mr(struct verbs_mr *vmr, int flags, struct ibv_pd *pd,
+ void *addr, size_t length, int access)
+{
+ struct zxdh_urereg_mr cmd = {};
+ struct ib_uverbs_rereg_mr_resp resp;
+
+ cmd.reg_type = ZXDH_MEMREG_TYPE_MEM;
+ return ibv_cmd_rereg_mr(vmr, flags, addr, length, (uintptr_t)addr,
+ access, pd, &cmd.ibv_cmd, sizeof(cmd), &resp,
+ sizeof(resp));
+}
+
+/**
+ * zxdh_udereg_mr - re-register memory region
+ * @vmr: mr that was allocated
+ */
+int zxdh_udereg_mr(struct verbs_mr *vmr)
+{
+ int ret;
+
+ ret = ibv_cmd_dereg_mr(vmr);
+ if (ret)
+ return ret;
+
+ free(vmr);
+
+ return 0;
+}
+
+/**
+ * zxdh_ualloc_mw - allocate memory window
+ * @pd: protection domain
+ * @type: memory window type
+ */
+struct ibv_mw *zxdh_ualloc_mw(struct ibv_pd *pd, enum ibv_mw_type type)
+{
+ struct ibv_mw *mw;
+ struct ibv_alloc_mw cmd;
+ struct ib_uverbs_alloc_mw_resp resp;
+
+ mw = calloc(1, sizeof(*mw));
+ if (!mw)
+ return NULL;
+
+ if (ibv_cmd_alloc_mw(pd, type, mw, &cmd, sizeof(cmd), &resp,
+ sizeof(resp))) {
+ free(mw);
+ return NULL;
+ }
+
+ return mw;
+}
+
+/**
+ * zxdh_ubind_mw - bind a memory window
+ * @qp: qp to post WR
+ * @mw: memory window to bind
+ * @mw_bind: bind info
+ */
+int zxdh_ubind_mw(struct ibv_qp *qp, struct ibv_mw *mw,
+ struct ibv_mw_bind *mw_bind)
+{
+ struct ibv_mw_bind_info *bind_info = &mw_bind->bind_info;
+ struct verbs_mr *vmr = verbs_get_mr(bind_info->mr);
+ struct zxdh_umr *umr = container_of(vmr, struct zxdh_umr, vmr);
+ struct ibv_send_wr wr = {};
+ struct ibv_send_wr *bad_wr;
+ int err;
+
+ if (vmr->mr_type != IBV_MR_TYPE_MR)
+ return -ENOTSUP;
+
+ if (umr->acc_flags & IBV_ACCESS_ZERO_BASED)
+ return -EINVAL;
+
+ if (mw->type != IBV_MW_TYPE_1)
+ return -EINVAL;
+
+ wr.opcode = IBV_WR_BIND_MW;
+ wr.bind_mw.bind_info = mw_bind->bind_info;
+ wr.bind_mw.mw = mw;
+ wr.bind_mw.rkey = ibv_inc_rkey(mw->rkey);
+
+ wr.wr_id = mw_bind->wr_id;
+ wr.send_flags = mw_bind->send_flags;
+
+ err = zxdh_upost_send(qp, &wr, &bad_wr);
+ if (!err)
+ mw->rkey = wr.bind_mw.rkey;
+
+ return err;
+}
+
+/**
+ * zxdh_udealloc_mw - deallocate memory window
+ * @mw: memory window to dealloc
+ */
+int zxdh_udealloc_mw(struct ibv_mw *mw)
+{
+ int ret;
+
+ ret = ibv_cmd_dealloc_mw(mw);
+ if (ret)
+ return ret;
+ free(mw);
+
+ return 0;
+}
+
+static void *zxdh_alloc_hw_buf(size_t size)
+{
+ void *buf;
+
+ buf = memalign(ZXDH_HW_PAGE_SIZE, size);
+
+ if (!buf)
+ return NULL;
+ if (ibv_dontfork_range(buf, size)) {
+ free(buf);
+ return NULL;
+ }
+
+ return buf;
+}
+
+static void zxdh_free_hw_buf(void *buf, size_t size)
+{
+ ibv_dofork_range(buf, size);
+ free(buf);
+}
+
+/**
+ * get_cq_size - returns actual cqe needed by HW
+ * @ncqe: minimum cqes requested by application
+ */
+static inline int get_cq_size(int ncqe)
+{
+ ncqe++;
+
+ /* Completions with immediate require 1 extra entry */
+ if (ncqe < ZXDH_U_MINCQ_SIZE)
+ ncqe = ZXDH_U_MINCQ_SIZE;
+
+ return ncqe;
+}
+
+static inline size_t get_cq_total_bytes(__u32 cq_size)
+{
+ return roundup(cq_size * sizeof(struct zxdh_cqe), ZXDH_HW_PAGE_SIZE);
+}
+
+/**
+ * ucreate_cq - zxdh util function to create a CQ
+ * @context: ibv context
+ * @attr_ex: CQ init attributes
+ * @ext_cq: flag to create an extendable or normal CQ
+ */
+static struct ibv_cq_ex *ucreate_cq(struct ibv_context *context,
+ struct ibv_cq_init_attr_ex *attr_ex,
+ bool ext_cq)
+{
+ struct zxdh_cq_init_info info = {};
+ struct zxdh_ureg_mr reg_mr_cmd = {};
+ struct zxdh_ucreate_cq_ex cmd = {};
+ struct zxdh_ucreate_cq_ex_resp resp = {};
+ struct ib_uverbs_reg_mr_resp reg_mr_resp = {};
+ struct zxdh_ureg_mr reg_mr_shadow_cmd = {};
+ struct ib_uverbs_reg_mr_resp reg_mr_shadow_resp = {};
+ struct zxdh_dev_attrs *dev_attrs;
+ struct zxdh_uvcontext *iwvctx;
+ struct zxdh_ucq *iwucq;
+ size_t total_size;
+ __u32 cq_pages;
+ int ret, ncqe;
+
+ iwvctx = container_of(context, struct zxdh_uvcontext, ibv_ctx.context);
+ dev_attrs = &iwvctx->dev_attrs;
+
+ if (attr_ex->cqe < ZXDH_MIN_CQ_SIZE ||
+ attr_ex->cqe > dev_attrs->max_hw_cq_size) {
+ errno = EINVAL;
+ return NULL;
+ }
+
+ /* save the cqe requested by application */
+ ncqe = attr_ex->cqe;
+ iwucq = calloc(1, sizeof(*iwucq));
+ if (!iwucq)
+ return NULL;
+
+ ret = pthread_spin_init(&iwucq->lock, PTHREAD_PROCESS_PRIVATE);
+ if (ret) {
+ errno = ret;
+ free(iwucq);
+ return NULL;
+ }
+
+ iwucq->resize_enable = false;
+ info.cq_size = get_cq_size(attr_ex->cqe);
+ info.cq_size = zxdh_cq_round_up(info.cq_size);
+ iwucq->comp_vector = attr_ex->comp_vector;
+ list_head_init(&iwucq->resize_list);
+ total_size = get_cq_total_bytes(info.cq_size);
+ cq_pages = total_size >> ZXDH_HW_PAGE_SHIFT;
+
+ if (!(dev_attrs->feature_flags & ZXDH_FEATURE_CQ_RESIZE))
+ total_size = (cq_pages << ZXDH_HW_PAGE_SHIFT) +
+ ZXDH_DB_SHADOW_AREA_SIZE;
+
+ iwucq->buf_size = total_size;
+ info.cq_base = zxdh_alloc_hw_buf(total_size);
+ if (!info.cq_base)
+ goto err_cq_base;
+
+ memset(info.cq_base, 0, total_size);
+ reg_mr_cmd.reg_type = ZXDH_MEMREG_TYPE_CQ;
+ reg_mr_cmd.cq_pages = cq_pages;
+
+ ret = ibv_cmd_reg_mr(&iwvctx->iwupd->ibv_pd, info.cq_base, total_size,
+ (uintptr_t)info.cq_base, IBV_ACCESS_LOCAL_WRITE,
+ &iwucq->vmr, &reg_mr_cmd.ibv_cmd,
+ sizeof(reg_mr_cmd), &reg_mr_resp,
+ sizeof(reg_mr_resp));
+ if (ret) {
+ errno = ret;
+ goto err_dereg_mr;
+ }
+
+ iwucq->vmr.ibv_mr.pd = &iwvctx->iwupd->ibv_pd;
+
+ if (dev_attrs->feature_flags & ZXDH_FEATURE_CQ_RESIZE) {
+ info.shadow_area = zxdh_alloc_hw_buf(ZXDH_DB_SHADOW_AREA_SIZE);
+ if (!info.shadow_area)
+ goto err_dereg_mr;
+
+ memset(info.shadow_area, 0, ZXDH_DB_SHADOW_AREA_SIZE);
+ reg_mr_shadow_cmd.reg_type = ZXDH_MEMREG_TYPE_CQ;
+ reg_mr_shadow_cmd.cq_pages = 1;
+
+ ret = ibv_cmd_reg_mr(
+ &iwvctx->iwupd->ibv_pd, info.shadow_area,
+ ZXDH_DB_SHADOW_AREA_SIZE, (uintptr_t)info.shadow_area,
+ IBV_ACCESS_LOCAL_WRITE, &iwucq->vmr_shadow_area,
+ &reg_mr_shadow_cmd.ibv_cmd, sizeof(reg_mr_shadow_cmd),
+ &reg_mr_shadow_resp, sizeof(reg_mr_shadow_resp));
+ if (ret) {
+ errno = ret;
+ goto err_dereg_shadow;
+ }
+
+ iwucq->vmr_shadow_area.ibv_mr.pd = &iwvctx->iwupd->ibv_pd;
+
+ } else {
+ info.shadow_area = (__le64 *)((__u8 *)info.cq_base +
+ (cq_pages << ZXDH_HW_PAGE_SHIFT));
+ }
+
+ attr_ex->cqe = info.cq_size;
+ cmd.user_cq_buf = (__u64)((uintptr_t)info.cq_base);
+ cmd.user_shadow_area = (__u64)((uintptr_t)info.shadow_area);
+
+ ret = ibv_cmd_create_cq_ex(context, attr_ex, &iwucq->verbs_cq,
+ &cmd.ibv_cmd, sizeof(cmd), &resp.ibv_resp,
+ sizeof(resp), 0);
+ if (ret) {
+ errno = ret;
+ goto err_dereg_shadow;
+ }
+
+ if (ext_cq)
+ zxdh_ibvcq_ex_fill_priv_funcs(iwucq, attr_ex);
+ info.cq_id = resp.cq_id;
+ /* Do not report the cqe's burned by HW */
+ iwucq->verbs_cq.cq.cqe = ncqe;
+
+ info.cqe_alloc_db =
+ (__u32 *)((__u8 *)iwvctx->cq_db + ZXDH_DB_CQ_OFFSET);
+ zxdh_cq_init(&iwucq->cq, &info);
+
+ return &iwucq->verbs_cq.cq_ex;
+
+err_dereg_shadow:
+ ibv_cmd_dereg_mr(&iwucq->vmr);
+ if (iwucq->vmr_shadow_area.ibv_mr.handle) {
+ ibv_cmd_dereg_mr(&iwucq->vmr_shadow_area);
+ zxdh_free_hw_buf(info.shadow_area, ZXDH_DB_SHADOW_AREA_SIZE);
+ }
+err_dereg_mr:
+ zxdh_free_hw_buf(info.cq_base, total_size);
+err_cq_base:
+ pthread_spin_destroy(&iwucq->lock);
+
+ free(iwucq);
+
+ return NULL;
+}
+
+struct ibv_cq *zxdh_ucreate_cq(struct ibv_context *context, int cqe,
+ struct ibv_comp_channel *channel,
+ int comp_vector)
+{
+ struct ibv_cq_init_attr_ex attr_ex = {
+ .cqe = cqe,
+ .channel = channel,
+ .comp_vector = comp_vector,
+ };
+ struct ibv_cq_ex *ibvcq_ex;
+
+ ibvcq_ex = ucreate_cq(context, &attr_ex, false);
+
+ return ibvcq_ex ? ibv_cq_ex_to_cq(ibvcq_ex) : NULL;
+}
+
+struct ibv_cq_ex *zxdh_ucreate_cq_ex(struct ibv_context *context,
+ struct ibv_cq_init_attr_ex *attr_ex)
+{
+ if (attr_ex->wc_flags & ~ZXDH_CQ_SUPPORTED_WC_FLAGS) {
+ errno = EOPNOTSUPP;
+ return NULL;
+ }
+
+ return ucreate_cq(context, attr_ex, true);
+}
+
+/**
+ * zxdh_free_cq_buf - free memory for cq buffer
+ * @cq_buf: cq buf to free
+ */
+static void zxdh_free_cq_buf(struct zxdh_cq_buf *cq_buf)
+{
+ ibv_cmd_dereg_mr(&cq_buf->vmr);
+ zxdh_free_hw_buf(cq_buf->cq.cq_base,
+ get_cq_total_bytes(cq_buf->cq.cq_size));
+ free(cq_buf);
+}
+
+/**
+ * zxdh_process_resize_list - process the cq list to remove buffers
+ * @iwucq: cq which owns the list
+ * @lcqe_buf: cq buf where the last cqe is found
+ */
+static int zxdh_process_resize_list(struct zxdh_ucq *iwucq,
+ struct zxdh_cq_buf *lcqe_buf)
+{
+ struct zxdh_cq_buf *cq_buf, *next;
+ int cq_cnt = 0;
+
+ list_for_each_safe (&iwucq->resize_list, cq_buf, next, list) {
+ if (cq_buf == lcqe_buf)
+ return cq_cnt;
+
+ list_del(&cq_buf->list);
+ zxdh_free_cq_buf(cq_buf);
+ cq_cnt++;
+ }
+
+ return cq_cnt;
+}
+
+/**
+ * zxdh_udestroy_cq - destroys cq
+ * @cq: ptr to cq to be destroyed
+ */
+int zxdh_udestroy_cq(struct ibv_cq *cq)
+{
+ struct zxdh_dev_attrs *dev_attrs;
+ struct zxdh_uvcontext *iwvctx;
+ struct zxdh_ucq *iwucq;
+ __u64 cq_shadow_temp;
+ int ret;
+
+ iwucq = container_of(cq, struct zxdh_ucq, verbs_cq.cq);
+ iwvctx = container_of(cq->context, struct zxdh_uvcontext,
+ ibv_ctx.context);
+ dev_attrs = &iwvctx->dev_attrs;
+
+ ret = pthread_spin_destroy(&iwucq->lock);
+ if (ret)
+ goto err;
+
+ get_64bit_val(iwucq->cq.shadow_area, 0, &cq_shadow_temp);
+
+ zxdh_process_resize_list(iwucq, NULL);
+ ret = ibv_cmd_destroy_cq(cq);
+ if (ret)
+ goto err;
+
+ ibv_cmd_dereg_mr(&iwucq->vmr);
+ zxdh_free_hw_buf(iwucq->cq.cq_base, iwucq->buf_size);
+
+ if (dev_attrs->feature_flags & ZXDH_FEATURE_CQ_RESIZE) {
+ ibv_cmd_dereg_mr(&iwucq->vmr_shadow_area);
+ zxdh_free_hw_buf(iwucq->cq.shadow_area,
+ ZXDH_DB_SHADOW_AREA_SIZE);
+ }
+ free(iwucq);
+ return 0;
+
+err:
+ return ret;
+}
+
+int zxdh_umodify_cq(struct ibv_cq *cq, struct ibv_modify_cq_attr *attr)
+{
+ struct ibv_modify_cq cmd = {};
+
+ return ibv_cmd_modify_cq(cq, attr, &cmd, sizeof(cmd));
+}
+
+static enum ibv_wc_status
+zxdh_flush_err_to_ib_wc_status(enum zxdh_flush_opcode opcode)
+{
+ switch (opcode) {
+ case FLUSH_PROT_ERR:
+ return IBV_WC_LOC_PROT_ERR;
+ case FLUSH_REM_ACCESS_ERR:
+ return IBV_WC_REM_ACCESS_ERR;
+ case FLUSH_LOC_QP_OP_ERR:
+ return IBV_WC_LOC_QP_OP_ERR;
+ case FLUSH_REM_OP_ERR:
+ return IBV_WC_REM_OP_ERR;
+ case FLUSH_LOC_LEN_ERR:
+ return IBV_WC_LOC_LEN_ERR;
+ case FLUSH_GENERAL_ERR:
+ return IBV_WC_WR_FLUSH_ERR;
+ case FLUSH_RETRY_EXC_ERR:
+ return IBV_WC_RETRY_EXC_ERR;
+ case FLUSH_MW_BIND_ERR:
+ return IBV_WC_MW_BIND_ERR;
+ case FLUSH_REM_INV_REQ_ERR:
+ return IBV_WC_REM_INV_REQ_ERR;
+ case FLUSH_FATAL_ERR:
+ default:
+ return IBV_WC_FATAL_ERR;
+ }
+}
+
+/**
+ * zxdh_process_cqe_ext - process current cqe for extended CQ
+ * @cur_cqe - current cqe info
+ */
+static inline void zxdh_process_cqe_ext(struct zxdh_cq_poll_info *cur_cqe)
+{
+ struct zxdh_ucq *iwucq =
+ container_of(cur_cqe, struct zxdh_ucq, cur_cqe);
+ struct ibv_cq_ex *ibvcq_ex = &iwucq->verbs_cq.cq_ex;
+
+ ibvcq_ex->wr_id = cur_cqe->wr_id;
+ if (cur_cqe->error)
+ ibvcq_ex->status =
+ (cur_cqe->comp_status == ZXDH_COMPL_STATUS_FLUSHED) ?
+ zxdh_flush_err_to_ib_wc_status(
+ cur_cqe->minor_err) :
+ IBV_WC_GENERAL_ERR;
+ else
+ ibvcq_ex->status = IBV_WC_SUCCESS;
+}
+
+/**
+ * zxdh_process_cqe - process current cqe info
+ * @entry - ibv_wc object to fill in for non-extended CQ
+ * @cur_cqe - current cqe info
+ */
+static inline void zxdh_process_cqe(struct ibv_wc *entry,
+ struct zxdh_cq_poll_info *cur_cqe)
+{
+ struct zxdh_qp *qp;
+ struct ibv_qp *ib_qp;
+
+ entry->wc_flags = 0;
+ entry->wr_id = cur_cqe->wr_id;
+ entry->qp_num = cur_cqe->qp_id;
+ qp = cur_cqe->qp_handle;
+ ib_qp = qp->back_qp;
+
+ if (cur_cqe->error) {
+ entry->status =
+ (cur_cqe->comp_status == ZXDH_COMPL_STATUS_FLUSHED) ?
+ zxdh_flush_err_to_ib_wc_status(
+ cur_cqe->minor_err) :
+ IBV_WC_GENERAL_ERR;
+ entry->vendor_err =
+ cur_cqe->major_err << 16 | cur_cqe->minor_err;
+ } else {
+ entry->status = IBV_WC_SUCCESS;
+ }
+
+ if (cur_cqe->imm_valid) {
+ entry->imm_data = htonl(cur_cqe->imm_data);
+ entry->wc_flags |= IBV_WC_WITH_IMM;
+ }
+
+ switch (cur_cqe->op_type) {
+ case ZXDH_OP_TYPE_SEND:
+ case ZXDH_OP_TYPE_SEND_WITH_IMM:
+ case ZXDH_OP_TYPE_SEND_INV:
+ case ZXDH_OP_TYPE_UD_SEND:
+ case ZXDH_OP_TYPE_UD_SEND_WITH_IMM:
+ entry->opcode = IBV_WC_SEND;
+ break;
+ case ZXDH_OP_TYPE_WRITE:
+ case ZXDH_OP_TYPE_WRITE_WITH_IMM:
+ entry->opcode = IBV_WC_RDMA_WRITE;
+ break;
+ case ZXDH_OP_TYPE_READ:
+ entry->opcode = IBV_WC_RDMA_READ;
+ break;
+ case ZXDH_OP_TYPE_BIND_MW:
+ entry->opcode = IBV_WC_BIND_MW;
+ break;
+ case ZXDH_OP_TYPE_LOCAL_INV:
+ entry->opcode = IBV_WC_LOCAL_INV;
+ break;
+ case ZXDH_OP_TYPE_REC:
+ entry->opcode = IBV_WC_RECV;
+ if (ib_qp->qp_type != IBV_QPT_UD && cur_cqe->stag_invalid_set) {
+ entry->invalidated_rkey = cur_cqe->inv_stag;
+ entry->wc_flags |= IBV_WC_WITH_INV;
+ }
+ break;
+ case ZXDH_OP_TYPE_REC_IMM:
+ entry->opcode = IBV_WC_RECV_RDMA_WITH_IMM;
+ if (ib_qp->qp_type != IBV_QPT_UD && cur_cqe->stag_invalid_set) {
+ entry->invalidated_rkey = cur_cqe->inv_stag;
+ entry->wc_flags |= IBV_WC_WITH_INV;
+ }
+ break;
+ default:
+ entry->status = IBV_WC_GENERAL_ERR;
+ return;
+ }
+
+ if (ib_qp->qp_type == IBV_QPT_UD) {
+ entry->src_qp = cur_cqe->ud_src_qpn;
+ entry->wc_flags |= IBV_WC_GRH;
+ entry->sl = cur_cqe->ipv4 ? 2 : 1;
+ } else {
+ entry->src_qp = cur_cqe->qp_id;
+ }
+ entry->byte_len = cur_cqe->bytes_xfered;
+}
+
+/**
+ * zxdh_poll_one - poll one entry of the CQ
+ * @cq: cq to poll
+ * @cur_cqe: current CQE info to be filled in
+ * @entry: ibv_wc object to be filled for non-extended CQ or NULL for extended CQ
+ *
+ * Returns the internal zxdh device error code or 0 on success
+ */
+static int zxdh_poll_one(struct zxdh_cq *cq, struct zxdh_cq_poll_info *cur_cqe,
+ struct ibv_wc *entry)
+{
+ int ret = zxdh_cq_poll_cmpl(cq, cur_cqe);
+
+ if (ret)
+ return ret;
+
+ if (entry)
+ zxdh_process_cqe(entry, cur_cqe);
+ else
+ zxdh_process_cqe_ext(cur_cqe);
+
+ return 0;
+}
+
+/**
+ * __zxdh_upoll_resize_cq - zxdh util function to poll device CQ
+ * @iwucq: zxdh cq to poll
+ * @num_entries: max cq entries to poll
+ * @entry: pointer to array of ibv_wc objects to be filled in for each completion or NULL if ext CQ
+ *
+ * Returns non-negative value equal to the number of completions
+ * found. On failure, -EINVAL
+ */
+static int __zxdh_upoll_resize_cq(struct zxdh_ucq *iwucq, int num_entries,
+ struct ibv_wc *entry)
+{
+ struct zxdh_cq_buf *cq_buf, *next;
+ struct zxdh_cq_buf *last_buf = NULL;
+ struct zxdh_cq_poll_info *cur_cqe = &iwucq->cur_cqe;
+ bool cq_new_cqe = false;
+ int resized_bufs = 0;
+ int npolled = 0;
+ int ret;
+
+ /* go through the list of previously resized CQ buffers */
+ list_for_each_safe (&iwucq->resize_list, cq_buf, next, list) {
+ while (npolled < num_entries) {
+ ret = zxdh_poll_one(&cq_buf->cq, cur_cqe,
+ entry ? entry + npolled : NULL);
+ if (ret == ZXDH_SUCCESS) {
+ ++npolled;
+ cq_new_cqe = true;
+ continue;
+ }
+ if (ret == ZXDH_ERR_Q_EMPTY)
+ break;
+ if (ret == ZXDH_ERR_RETRY_ACK_NOT_EXCEED_ERR)
+ break;
+ /* QP using the CQ is destroyed. Skip reporting this CQE */
+ if (ret == ZXDH_ERR_Q_DESTROYED) {
+ cq_new_cqe = true;
+ continue;
+ }
+ goto error;
+ }
+
+ /* save the resized CQ buffer which received the last cqe */
+ if (cq_new_cqe)
+ last_buf = cq_buf;
+ cq_new_cqe = false;
+ }
+
+ /* check the current CQ for new cqes */
+ while (npolled < num_entries) {
+ ret = zxdh_poll_one(&iwucq->cq, cur_cqe,
+ entry ? entry + npolled : NULL);
+ if (ret == ZXDH_SUCCESS) {
+ ++npolled;
+ cq_new_cqe = true;
+ continue;
+ }
+ if (ret == ZXDH_ERR_Q_EMPTY)
+ break;
+ if (ret == ZXDH_ERR_RETRY_ACK_NOT_EXCEED_ERR)
+ break;
+ /* QP using the CQ is destroyed. Skip reporting this CQE */
+ if (ret == ZXDH_ERR_Q_DESTROYED) {
+ cq_new_cqe = true;
+ continue;
+ }
+ goto error;
+ }
+ if (cq_new_cqe)
+ /* all previous CQ resizes are complete */
+ resized_bufs = zxdh_process_resize_list(iwucq, NULL);
+ else if (last_buf)
+ /* only CQ resizes up to the last_buf are complete */
+ resized_bufs = zxdh_process_resize_list(iwucq, last_buf);
+ if (resized_bufs)
+ /* report to the HW the number of complete CQ resizes */
+ zxdh_cq_set_resized_cnt(&iwucq->cq, resized_bufs);
+
+ return npolled;
+
+error:
+
+ return -EINVAL;
+}
+
+/**
+ * __zxdh_upoll_current_cq - zxdh util function to poll device CQ
+ * @iwucq: zxdh cq to poll
+ * @num_entries: max cq entries to poll
+ * @entry: pointer to array of ibv_wc objects to be filled in for each completion or NULL if ext CQ
+ *
+ * Returns non-negative value equal to the number of completions
+ * found. On failure, -EINVAL
+ */
+static int __zxdh_upoll_curent_cq(struct zxdh_ucq *iwucq, int num_entries,
+ struct ibv_wc *entry)
+{
+ struct zxdh_cq_poll_info *cur_cqe = &iwucq->cur_cqe;
+ int npolled = 0;
+ int ret;
+
+ /* check the current CQ for new cqes */
+ while (npolled < num_entries) {
+ ret = zxdh_poll_one(&iwucq->cq, cur_cqe,
+ entry ? entry + npolled : NULL);
+ if (unlikely(ret != ZXDH_SUCCESS))
+ break;
+ ++npolled;
+ }
+ return npolled;
+}
+
+/**
+ * zxdh_upoll_cq - verb API callback to poll device CQ
+ * @cq: ibv_cq to poll
+ * @num_entries: max cq entries to poll
+ * @entry: pointer to array of ibv_wc objects to be filled in for each completion
+ *
+ * Returns non-negative value equal to the number of completions
+ * found and a negative error code on failure
+ */
+int zxdh_upoll_cq(struct ibv_cq *cq, int num_entries, struct ibv_wc *entry)
+{
+ struct zxdh_ucq *iwucq;
+ int ret;
+
+ iwucq = container_of(cq, struct zxdh_ucq, verbs_cq.cq);
+ ret = pthread_spin_lock(&iwucq->lock);
+ if (ret)
+ return -ret;
+
+ if (likely(!iwucq->resize_enable))
+ ret = __zxdh_upoll_curent_cq(iwucq, num_entries, entry);
+ else
+ ret = __zxdh_upoll_resize_cq(iwucq, num_entries, entry);
+
+ pthread_spin_unlock(&iwucq->lock);
+
+ return ret;
+}
+
+/**
+ * zxdh_start_poll - verb_ex API callback to poll batch of WC's
+ * @ibvcq_ex: ibv extended CQ
+ * @attr: attributes (not used)
+ *
+ * Start polling batch of work completions. Return 0 on success, ENONENT when
+ * no completions are available on CQ. And an error code on errors
+ */
+static int zxdh_start_poll(struct ibv_cq_ex *ibvcq_ex,
+ struct ibv_poll_cq_attr *attr)
+{
+ struct zxdh_ucq *iwucq;
+ int ret;
+
+ iwucq = container_of(ibvcq_ex, struct zxdh_ucq, verbs_cq.cq_ex);
+ ret = pthread_spin_lock(&iwucq->lock);
+ if (ret)
+ return ret;
+
+ if (!iwucq->resize_enable) {
+ ret = __zxdh_upoll_curent_cq(iwucq, 1, NULL);
+ if (ret == 1)
+ return 0;
+ } else {
+ ret = __zxdh_upoll_resize_cq(iwucq, 1, NULL);
+ if (ret == 1)
+ return 0;
+ }
+
+ /* No Completions on CQ */
+ if (!ret)
+ ret = ENOENT;
+
+ pthread_spin_unlock(&iwucq->lock);
+
+ return ret;
+}
+
+/**
+ * zxdh_next_poll - verb_ex API callback to get next WC
+ * @ibvcq_ex: ibv extended CQ
+ *
+ * Return 0 on success, ENONENT when no completions are available on CQ.
+ * And an error code on errors
+ */
+static int zxdh_next_poll(struct ibv_cq_ex *ibvcq_ex)
+{
+ struct zxdh_ucq *iwucq;
+ int ret;
+
+ iwucq = container_of(ibvcq_ex, struct zxdh_ucq, verbs_cq.cq_ex);
+ if (!iwucq->resize_enable) {
+ ret = __zxdh_upoll_curent_cq(iwucq, 1, NULL);
+ if (ret == 1)
+ return 0;
+ } else {
+ ret = __zxdh_upoll_resize_cq(iwucq, 1, NULL);
+ if (ret == 1)
+ return 0;
+ }
+
+ /* No Completions on CQ */
+ if (!ret)
+ ret = ENOENT;
+
+ return ret;
+}
+
+/**
+ * zxdh_end_poll - verb_ex API callback to end polling of WC's
+ * @ibvcq_ex: ibv extended CQ
+ */
+static void zxdh_end_poll(struct ibv_cq_ex *ibvcq_ex)
+{
+ struct zxdh_ucq *iwucq =
+ container_of(ibvcq_ex, struct zxdh_ucq, verbs_cq.cq_ex);
+
+ pthread_spin_unlock(&iwucq->lock);
+}
+
+/**
+ * zxdh_wc_read_completion_ts - Get completion timestamp
+ * @ibvcq_ex: ibv extended CQ
+ *
+ * Get completion timestamp in HCA clock units
+ */
+static uint64_t zxdh_wc_read_completion_ts(struct ibv_cq_ex *ibvcq_ex)
+{
+ struct zxdh_ucq *iwucq =
+ container_of(ibvcq_ex, struct zxdh_ucq, verbs_cq.cq_ex);
+#define HCA_CORE_CLOCK_800_MHZ 800
+
+ return iwucq->cur_cqe.tcp_seq_num_rtt / HCA_CORE_CLOCK_800_MHZ;
+}
+
+/**
+ * zxdh_wc_read_completion_wallclock_ns - Get completion timestamp in ns
+ * @ibvcq_ex: ibv extended CQ
+ *
+ * Get completion timestamp from current completion in wall clock nanoseconds
+ */
+static uint64_t zxdh_wc_read_completion_wallclock_ns(struct ibv_cq_ex *ibvcq_ex)
+{
+ struct zxdh_ucq *iwucq =
+ container_of(ibvcq_ex, struct zxdh_ucq, verbs_cq.cq_ex);
+
+ /* RTT is in usec */
+ return iwucq->cur_cqe.tcp_seq_num_rtt * 1000;
+}
+
+static enum ibv_wc_opcode zxdh_wc_read_opcode(struct ibv_cq_ex *ibvcq_ex)
+{
+ struct zxdh_ucq *iwucq =
+ container_of(ibvcq_ex, struct zxdh_ucq, verbs_cq.cq_ex);
+
+ switch (iwucq->cur_cqe.op_type) {
+ case ZXDH_OP_TYPE_WRITE:
+ case ZXDH_OP_TYPE_WRITE_WITH_IMM:
+ return IBV_WC_RDMA_WRITE;
+ case ZXDH_OP_TYPE_READ:
+ return IBV_WC_RDMA_READ;
+ case ZXDH_OP_TYPE_SEND:
+ case ZXDH_OP_TYPE_SEND_WITH_IMM:
+ case ZXDH_OP_TYPE_SEND_INV:
+ case ZXDH_OP_TYPE_UD_SEND:
+ case ZXDH_OP_TYPE_UD_SEND_WITH_IMM:
+ return IBV_WC_SEND;
+ case ZXDH_OP_TYPE_BIND_MW:
+ return IBV_WC_BIND_MW;
+ case ZXDH_OP_TYPE_REC:
+ return IBV_WC_RECV;
+ case ZXDH_OP_TYPE_REC_IMM:
+ return IBV_WC_RECV_RDMA_WITH_IMM;
+ case ZXDH_OP_TYPE_LOCAL_INV:
+ return IBV_WC_LOCAL_INV;
+ }
+
+ return 0;
+}
+
+static uint32_t zxdh_wc_read_vendor_err(struct ibv_cq_ex *ibvcq_ex)
+{
+ struct zxdh_cq_poll_info *cur_cqe;
+ struct zxdh_ucq *iwucq;
+
+ iwucq = container_of(ibvcq_ex, struct zxdh_ucq, verbs_cq.cq_ex);
+ cur_cqe = &iwucq->cur_cqe;
+
+ return cur_cqe->error ? cur_cqe->major_err << 16 | cur_cqe->minor_err :
+ 0;
+}
+
+static unsigned int zxdh_wc_read_wc_flags(struct ibv_cq_ex *ibvcq_ex)
+{
+ struct zxdh_cq_poll_info *cur_cqe;
+ struct zxdh_ucq *iwucq;
+ struct zxdh_qp *qp;
+ struct ibv_qp *ib_qp;
+ unsigned int wc_flags = 0;
+
+ iwucq = container_of(ibvcq_ex, struct zxdh_ucq, verbs_cq.cq_ex);
+ cur_cqe = &iwucq->cur_cqe;
+ qp = cur_cqe->qp_handle;
+ ib_qp = qp->back_qp;
+
+ if (cur_cqe->imm_valid)
+ wc_flags |= IBV_WC_WITH_IMM;
+
+ if (ib_qp->qp_type == IBV_QPT_UD) {
+ wc_flags |= IBV_WC_GRH;
+ } else {
+ if (cur_cqe->stag_invalid_set) {
+ switch (cur_cqe->op_type) {
+ case ZXDH_OP_TYPE_REC:
+ wc_flags |= IBV_WC_WITH_INV;
+ break;
+ case ZXDH_OP_TYPE_REC_IMM:
+ wc_flags |= IBV_WC_WITH_INV;
+ break;
+ }
+ }
+ }
+
+ return wc_flags;
+}
+
+static uint32_t zxdh_wc_read_byte_len(struct ibv_cq_ex *ibvcq_ex)
+{
+ struct zxdh_ucq *iwucq =
+ container_of(ibvcq_ex, struct zxdh_ucq, verbs_cq.cq_ex);
+
+ return iwucq->cur_cqe.bytes_xfered;
+}
+
+static __be32 zxdh_wc_read_imm_data(struct ibv_cq_ex *ibvcq_ex)
+{
+ struct zxdh_cq_poll_info *cur_cqe;
+ struct zxdh_ucq *iwucq;
+
+ iwucq = container_of(ibvcq_ex, struct zxdh_ucq, verbs_cq.cq_ex);
+ cur_cqe = &iwucq->cur_cqe;
+
+ return cur_cqe->imm_valid ? htonl(cur_cqe->imm_data) : 0;
+}
+
+static uint32_t zxdh_wc_read_qp_num(struct ibv_cq_ex *ibvcq_ex)
+{
+ struct zxdh_ucq *iwucq =
+ container_of(ibvcq_ex, struct zxdh_ucq, verbs_cq.cq_ex);
+
+ return iwucq->cur_cqe.qp_id;
+}
+
+static uint32_t zxdh_wc_read_src_qp(struct ibv_cq_ex *ibvcq_ex)
+{
+ struct zxdh_cq_poll_info *cur_cqe;
+ struct zxdh_ucq *iwucq;
+ struct zxdh_qp *qp;
+ struct ibv_qp *ib_qp;
+
+ iwucq = container_of(ibvcq_ex, struct zxdh_ucq, verbs_cq.cq_ex);
+ cur_cqe = &iwucq->cur_cqe;
+ qp = cur_cqe->qp_handle;
+ ib_qp = qp->back_qp;
+
+ return ib_qp->qp_type == IBV_QPT_UD ? cur_cqe->ud_src_qpn :
+ cur_cqe->qp_id;
+}
+
+static uint32_t zxdh_wc_read_slid(struct ibv_cq_ex *ibvcq_ex)
+{
+ return 0;
+}
+
+static uint8_t zxdh_wc_read_sl(struct ibv_cq_ex *ibvcq_ex)
+{
+ return 0;
+}
+
+static uint8_t zxdh_wc_read_dlid_path_bits(struct ibv_cq_ex *ibvcq_ex)
+{
+ return 0;
+}
+
+void zxdh_ibvcq_ex_fill_priv_funcs(struct zxdh_ucq *iwucq,
+ struct ibv_cq_init_attr_ex *attr_ex)
+{
+ struct ibv_cq_ex *ibvcq_ex = &iwucq->verbs_cq.cq_ex;
+
+ ibvcq_ex->start_poll = zxdh_start_poll;
+ ibvcq_ex->end_poll = zxdh_end_poll;
+ ibvcq_ex->next_poll = zxdh_next_poll;
+
+ if (attr_ex->wc_flags & IBV_WC_EX_WITH_COMPLETION_TIMESTAMP) {
+ ibvcq_ex->read_completion_ts = zxdh_wc_read_completion_ts;
+ iwucq->report_rtt = true;
+ }
+ if (attr_ex->wc_flags & IBV_WC_EX_WITH_COMPLETION_TIMESTAMP_WALLCLOCK) {
+ ibvcq_ex->read_completion_wallclock_ns =
+ zxdh_wc_read_completion_wallclock_ns;
+ iwucq->report_rtt = true;
+ }
+
+ ibvcq_ex->read_opcode = zxdh_wc_read_opcode;
+ ibvcq_ex->read_vendor_err = zxdh_wc_read_vendor_err;
+ ibvcq_ex->read_wc_flags = zxdh_wc_read_wc_flags;
+
+ if (attr_ex->wc_flags & IBV_WC_EX_WITH_BYTE_LEN)
+ ibvcq_ex->read_byte_len = zxdh_wc_read_byte_len;
+ if (attr_ex->wc_flags & IBV_WC_EX_WITH_IMM)
+ ibvcq_ex->read_imm_data = zxdh_wc_read_imm_data;
+ if (attr_ex->wc_flags & IBV_WC_EX_WITH_QP_NUM)
+ ibvcq_ex->read_qp_num = zxdh_wc_read_qp_num;
+ if (attr_ex->wc_flags & IBV_WC_EX_WITH_SRC_QP)
+ ibvcq_ex->read_src_qp = zxdh_wc_read_src_qp;
+ if (attr_ex->wc_flags & IBV_WC_EX_WITH_SLID)
+ ibvcq_ex->read_slid = zxdh_wc_read_slid;
+ if (attr_ex->wc_flags & IBV_WC_EX_WITH_SL)
+ ibvcq_ex->read_sl = zxdh_wc_read_sl;
+ if (attr_ex->wc_flags & IBV_WC_EX_WITH_DLID_PATH_BITS)
+ ibvcq_ex->read_dlid_path_bits = zxdh_wc_read_dlid_path_bits;
+}
+
+/**
+ * zxdh_arm_cq - arm of cq
+ * @iwucq: cq to which arm
+ * @cq_notify: notification params
+ */
+static void zxdh_arm_cq(struct zxdh_ucq *iwucq, enum zxdh_cmpl_notify cq_notify)
+{
+ iwucq->is_armed = true;
+ iwucq->last_notify = cq_notify;
+
+ zxdh_cq_request_notification(&iwucq->cq, cq_notify);
+}
+
+/**
+ * zxdh_uarm_cq - callback for arm of cq
+ * @cq: cq to arm
+ * @solicited: to get notify params
+ */
+int zxdh_uarm_cq(struct ibv_cq *cq, int solicited)
+{
+ struct zxdh_ucq *iwucq;
+ enum zxdh_cmpl_notify cq_notify = ZXDH_CQ_COMPL_EVENT;
+ bool promo_event = false;
+ int ret;
+
+ iwucq = container_of(cq, struct zxdh_ucq, verbs_cq.cq);
+ if (solicited) {
+ cq_notify = ZXDH_CQ_COMPL_SOLICITED;
+ } else {
+ if (iwucq->last_notify == ZXDH_CQ_COMPL_SOLICITED)
+ promo_event = true;
+ }
+
+ ret = pthread_spin_lock(&iwucq->lock);
+ if (ret)
+ return ret;
+
+ if (!iwucq->is_armed || promo_event)
+ zxdh_arm_cq(iwucq, cq_notify);
+
+ pthread_spin_unlock(&iwucq->lock);
+
+ return 0;
+}
+
+/**
+ * zxdh_cq_event - cq to do completion event
+ * @cq: cq to arm
+ */
+void zxdh_cq_event(struct ibv_cq *cq)
+{
+ struct zxdh_ucq *iwucq;
+
+ iwucq = container_of(cq, struct zxdh_ucq, verbs_cq.cq);
+ if (pthread_spin_lock(&iwucq->lock))
+ return;
+
+ iwucq->is_armed = false;
+
+ pthread_spin_unlock(&iwucq->lock);
+}
+
+void *zxdh_mmap(int fd, off_t offset)
+{
+ void *map;
+
+ map = mmap(NULL, ZXDH_HW_PAGE_SIZE, PROT_WRITE | PROT_READ, MAP_SHARED,
+ fd, offset);
+ if (map == MAP_FAILED)
+ return map;
+
+ if (ibv_dontfork_range(map, ZXDH_HW_PAGE_SIZE)) {
+ munmap(map, ZXDH_HW_PAGE_SIZE);
+ return MAP_FAILED;
+ }
+
+ return map;
+}
+
+void zxdh_munmap(void *map)
+{
+ ibv_dofork_range(map, ZXDH_HW_PAGE_SIZE);
+ munmap(map, ZXDH_HW_PAGE_SIZE);
+}
+
+/**
+ * zxdh_destroy_vmapped_qp - destroy resources for qp
+ * @iwuqp: qp struct for resources
+ */
+static int zxdh_destroy_vmapped_qp(struct zxdh_uqp *iwuqp)
+{
+ int ret;
+
+ ret = ibv_cmd_destroy_qp(&iwuqp->vqp.qp);
+ if (ret)
+ return ret;
+
+ ibv_cmd_dereg_mr(&iwuqp->vmr);
+
+ return 0;
+}
+
+/**
+ * zxdh_vmapped_qp - create resources for qp
+ * @iwuqp: qp struct for resources
+ * @pd: pd for the qp
+ * @attr: attributes of qp passed
+ * @resp: response back from create qp
+ * @sqdepth: depth of sq
+ * @rqdepth: depth of rq
+ * @info: info for initializing user level qp
+ * @abi_ver: abi version of the create qp command
+ */
+static int zxdh_vmapped_qp(struct zxdh_uqp *iwuqp, struct ibv_pd *pd,
+ struct ibv_qp_init_attr *attr, int sqdepth,
+ int rqdepth, struct zxdh_qp_init_info *info,
+ bool legacy_mode)
+{
+ struct zxdh_ucreate_qp cmd = {};
+ size_t sqsize, rqsize, totalqpsize;
+ struct zxdh_ucreate_qp_resp resp = {};
+ struct zxdh_ureg_mr reg_mr_cmd = {};
+ struct ib_uverbs_reg_mr_resp reg_mr_resp = {};
+ int ret;
+
+ rqsize = 0;
+ sqsize = roundup(sqdepth * ZXDH_QP_SQE_MIN_SIZE, ZXDH_HW_PAGE_SIZE);
+ if (iwuqp->is_srq == false) {
+ rqsize = roundup(rqdepth * ZXDH_QP_RQE_MIN_SIZE,
+ ZXDH_HW_PAGE_SIZE);
+ totalqpsize = rqsize + sqsize + ZXDH_DB_SHADOW_AREA_SIZE;
+ } else {
+ totalqpsize = sqsize + ZXDH_DB_SHADOW_AREA_SIZE;
+ }
+ info->sq = zxdh_alloc_hw_buf(totalqpsize);
+ iwuqp->buf_size = totalqpsize;
+
+ if (!info->sq)
+ return -ENOMEM;
+
+ memset(info->sq, 0, totalqpsize);
+ if (iwuqp->is_srq == false) {
+ info->rq = (struct zxdh_qp_rq_quanta *)&info
+ ->sq[sqsize / ZXDH_QP_SQE_MIN_SIZE];
+ info->shadow_area =
+ info->rq[rqsize / ZXDH_QP_RQE_MIN_SIZE].elem;
+ reg_mr_cmd.rq_pages = rqsize >> ZXDH_HW_PAGE_SHIFT;
+ } else {
+ info->shadow_area =
+ (__le64 *)&info->sq[sqsize / ZXDH_QP_SQE_MIN_SIZE];
+ }
+ reg_mr_cmd.reg_type = ZXDH_MEMREG_TYPE_QP;
+ reg_mr_cmd.sq_pages = sqsize >> ZXDH_HW_PAGE_SHIFT;
+
+ ret = ibv_cmd_reg_mr(pd, info->sq, totalqpsize, (uintptr_t)info->sq,
+ IBV_ACCESS_LOCAL_WRITE, &iwuqp->vmr,
+ &reg_mr_cmd.ibv_cmd, sizeof(reg_mr_cmd),
+ &reg_mr_resp, sizeof(reg_mr_resp));
+ if (ret)
+ goto err_dereg_mr;
+
+ cmd.user_wqe_bufs = (__u64)((uintptr_t)info->sq);
+ cmd.user_compl_ctx = (__u64)(uintptr_t)&iwuqp->qp;
+ ret = ibv_cmd_create_qp(pd, &iwuqp->vqp.qp, attr, &cmd.ibv_cmd,
+ sizeof(cmd), &resp.ibv_resp,
+ sizeof(struct zxdh_ucreate_qp_resp));
+ if (ret)
+ goto err_qp;
+
+ info->sq_size = resp.actual_sq_size;
+ info->rq_size = resp.actual_rq_size;
+ info->qp_caps = resp.qp_caps;
+ info->qp_id = resp.qp_id;
+ iwuqp->zxdh_drv_opt = resp.zxdh_drv_opt;
+ iwuqp->vqp.qp.qp_num = resp.qp_id;
+
+ iwuqp->send_cq =
+ container_of(attr->send_cq, struct zxdh_ucq, verbs_cq.cq);
+ iwuqp->recv_cq =
+ container_of(attr->recv_cq, struct zxdh_ucq, verbs_cq.cq);
+ iwuqp->send_cq->uqp = iwuqp;
+ iwuqp->recv_cq->uqp = iwuqp;
+
+ return 0;
+err_qp:
+ ibv_cmd_dereg_mr(&iwuqp->vmr);
+err_dereg_mr:
+ zxdh_free_hw_buf(info->sq, iwuqp->buf_size);
+ return ret;
+}
+
+static void zxdh_wr_local_inv(struct ibv_qp_ex *ibqp, uint32_t invalidate_rkey)
+{
+ struct zxdh_uqp *qp = container_of(ibqp, struct zxdh_uqp, vqp.qp_ex);
+ struct ibv_send_wr wr = {};
+ struct ibv_send_wr *bad_wr = NULL;
+
+ wr.opcode = IBV_WR_LOCAL_INV;
+ wr.invalidate_rkey = invalidate_rkey;
+
+ zxdh_upost_send(&qp->vqp.qp, &wr, &bad_wr);
+}
+
+static void zxdh_send_wr_send_inv(struct ibv_qp_ex *ibqp,
+ uint32_t invalidate_rkey)
+{
+ struct zxdh_uqp *qp = container_of(ibqp, struct zxdh_uqp, vqp.qp_ex);
+ struct ibv_send_wr wr = {};
+ struct ibv_send_wr *bad_wr = NULL;
+
+ wr.opcode = IBV_WR_SEND_WITH_INV;
+ wr.invalidate_rkey = invalidate_rkey;
+
+ zxdh_upost_send(&qp->vqp.qp, &wr, &bad_wr);
+}
+
+static void zxdh_wr_bind_mw(struct ibv_qp_ex *ibqp, struct ibv_mw *ibmw,
+ uint32_t rkey, const struct ibv_mw_bind_info *info)
+{
+ struct zxdh_uqp *qp = container_of(ibqp, struct zxdh_uqp, vqp.qp_ex);
+ struct ibv_send_wr wr = {};
+ struct ibv_send_wr *bad_wr = NULL;
+
+ if (ibmw->type != IBV_MW_TYPE_2)
+ return;
+
+ wr.opcode = IBV_WR_BIND_MW;
+ wr.bind_mw.bind_info = *info;
+ wr.bind_mw.mw = ibmw;
+ wr.bind_mw.rkey = rkey;
+
+ zxdh_upost_send(&qp->vqp.qp, &wr, &bad_wr);
+}
+
+static struct ibv_qp *create_qp(struct ibv_context *ibv_ctx,
+ struct ibv_qp_init_attr_ex *attr_ex)
+{
+ struct zxdh_qp_init_info info = {};
+ struct zxdh_dev_attrs *dev_attrs;
+ struct zxdh_uvcontext *iwvctx;
+ struct zxdh_uqp *iwuqp;
+ struct zxdh_usrq *iwusrq;
+ struct ibv_pd *pd = attr_ex->pd;
+ struct ibv_qp_init_attr *attr;
+ __u32 sqdepth, rqdepth;
+ __u8 sqshift, rqshift;
+ int status;
+
+ attr = calloc(1, sizeof(*attr));
+ if (!attr)
+ return NULL;
+
+ memcpy(attr, attr_ex, sizeof(*attr));
+
+ if (attr->qp_type != IBV_QPT_RC && attr->qp_type != IBV_QPT_UD) {
+ errno = EOPNOTSUPP;
+ free(attr);
+ return NULL;
+ }
+
+ iwvctx = container_of(ibv_ctx, struct zxdh_uvcontext, ibv_ctx.context);
+ dev_attrs = &iwvctx->dev_attrs;
+
+ if (attr->cap.max_send_sge > dev_attrs->max_hw_wq_frags ||
+ attr->cap.max_recv_sge > dev_attrs->max_hw_wq_frags) {
+ errno = EINVAL;
+ free(attr);
+ return NULL;
+ }
+
+ if (attr->cap.max_inline_data > dev_attrs->max_hw_inline) {
+ zxdh_dbg(&iwvctx->ibv_ctx, ZXDH_DBG_QP,
+ "max_inline_data over max_hw_inline\n");
+ attr->cap.max_inline_data = dev_attrs->max_hw_inline;
+ }
+
+ zxdh_get_sq_wqe_shift(attr->cap.max_send_sge, attr->cap.max_inline_data,
+ &sqshift);
+ status = zxdh_get_sqdepth(dev_attrs, attr->cap.max_send_wr, sqshift,
+ &sqdepth);
+ if (status) {
+ errno = EINVAL;
+ free(attr);
+ return NULL;
+ }
+
+ zxdh_get_rq_wqe_shift(attr->cap.max_recv_sge, &rqshift);
+ status = zxdh_get_rqdepth(dev_attrs, attr->cap.max_recv_wr, rqshift,
+ &rqdepth);
+ if (status) {
+ errno = EINVAL;
+ free(attr);
+ return NULL;
+ }
+
+ iwuqp = memalign(1024, sizeof(*iwuqp));
+ if (!iwuqp) {
+ free(attr);
+ return NULL;
+ }
+
+ memset(iwuqp, 0, sizeof(*iwuqp));
+
+ if (attr_ex->comp_mask & IBV_QP_INIT_ATTR_SEND_OPS_FLAGS) {
+ if (attr_ex->send_ops_flags & ~IBV_QP_EX_WITH_BIND_MW) {
+ errno = EOPNOTSUPP;
+ free(iwuqp);
+ free(attr);
+ return NULL;
+ }
+
+ iwuqp->vqp.comp_mask |= VERBS_QP_EX;
+ if (attr_ex->send_ops_flags & IBV_QP_EX_WITH_BIND_MW)
+ iwuqp->vqp.qp_ex.wr_bind_mw = zxdh_wr_bind_mw;
+
+ if (attr_ex->send_ops_flags & IBV_QP_EX_WITH_SEND_WITH_INV)
+ iwuqp->vqp.qp_ex.wr_send_inv = zxdh_send_wr_send_inv;
+
+ if (attr_ex->send_ops_flags & IBV_QP_EX_WITH_LOCAL_INV)
+ iwuqp->vqp.qp_ex.wr_local_inv = zxdh_wr_local_inv;
+ }
+
+ if (pthread_spin_init(&iwuqp->lock, PTHREAD_PROCESS_PRIVATE))
+ goto err_free_qp;
+
+ info.sq_size = sqdepth >> sqshift;
+ info.rq_size = rqdepth >> rqshift;
+ attr->cap.max_send_wr = info.sq_size;
+ attr->cap.max_recv_wr = info.rq_size;
+
+ info.dev_attrs = dev_attrs;
+ info.max_sq_frag_cnt = attr->cap.max_send_sge;
+ info.max_rq_frag_cnt = attr->cap.max_recv_sge;
+
+ if (attr->srq != NULL) {
+ iwuqp->is_srq = true;
+ iwusrq = container_of(attr->srq, struct zxdh_usrq, ibv_srq);
+ iwuqp->srq = iwusrq;
+ iwuqp->qp.is_srq = true;
+ }
+
+ if (iwuqp->is_srq == false) {
+ iwuqp->recv_sges = calloc(attr->cap.max_recv_sge,
+ sizeof(*iwuqp->recv_sges));
+ if (!iwuqp->recv_sges)
+ goto err_destroy_lock;
+ }
+
+ info.wqe_alloc_db =
+ (__u32 *)((__u8 *)iwvctx->sq_db + ZXDH_DB_SQ_OFFSET);
+ info.abi_ver = iwvctx->abi_ver;
+ info.legacy_mode = iwvctx->legacy_mode;
+ info.sq_wrtrk_array = calloc(sqdepth, sizeof(*info.sq_wrtrk_array));
+ if (!info.sq_wrtrk_array)
+ goto err_free_rsges;
+
+ if (iwuqp->is_srq == false) {
+ info.rq_wrid_array =
+ calloc(info.rq_size, sizeof(*info.rq_wrid_array));
+ if (!info.rq_wrid_array)
+ goto err_free_sq_wrtrk;
+ }
+
+ iwuqp->sq_sig_all = attr->sq_sig_all;
+ iwuqp->qp_type = attr->qp_type;
+ if (attr->qp_type == IBV_QPT_UD)
+ info.type = ZXDH_QP_TYPE_ROCE_UD;
+ else
+ info.type = ZXDH_QP_TYPE_ROCE_RC;
+ status = zxdh_vmapped_qp(iwuqp, pd, attr, sqdepth, rqdepth, &info,
+ iwvctx->legacy_mode);
+ if (status) {
+ errno = status;
+ goto err_free_rq_wrid;
+ }
+
+ iwuqp->qp.back_qp = iwuqp;
+ iwuqp->qp.lock = &iwuqp->lock;
+ info.max_sq_frag_cnt = attr->cap.max_send_sge;
+ info.max_rq_frag_cnt = attr->cap.max_recv_sge;
+ info.max_inline_data = attr->cap.max_inline_data;
+ if (info.type == ZXDH_QP_TYPE_ROCE_RC) {
+ iwuqp->qp.split_sg_list =
+ calloc(2 * dev_attrs->max_hw_read_sges,
+ sizeof(*iwuqp->qp.split_sg_list));
+ if (!iwuqp->qp.split_sg_list)
+ goto err_free_vmap_qp;
+ }
+ status = zxdh_qp_init(&iwuqp->qp, &info);
+ if (status) {
+ errno = EINVAL;
+ goto err_free_sg_list;
+ }
+ iwuqp->qp.mtu = mtu_enum_to_int(IBV_MTU_1024);
+ attr->cap.max_send_wr = (sqdepth - ZXDH_SQ_RSVD) >> sqshift;
+ attr->cap.max_recv_wr = (rqdepth - ZXDH_RQ_RSVD) >> rqshift;
+ memcpy(attr_ex, attr, sizeof(*attr));
+ free(attr);
+ return &iwuqp->vqp.qp;
+
+err_free_sg_list:
+ if (iwuqp->qp.split_sg_list)
+ free(iwuqp->qp.split_sg_list);
+err_free_vmap_qp:
+ zxdh_destroy_vmapped_qp(iwuqp);
+ zxdh_free_hw_buf(info.sq, iwuqp->buf_size);
+err_free_rq_wrid:
+ free(info.rq_wrid_array);
+err_free_sq_wrtrk:
+ free(info.sq_wrtrk_array);
+err_free_rsges:
+ free(iwuqp->recv_sges);
+err_destroy_lock:
+ pthread_spin_destroy(&iwuqp->lock);
+err_free_qp:
+ free(iwuqp);
+ free(attr);
+
+ return NULL;
+}
+
+/**
+ * zxdh_ucreate_qp - create qp on user app
+ * @pd: pd for the qp
+ * @attr: attributes of the qp to be created (sizes, sge, cq)
+ */
+struct ibv_qp *zxdh_ucreate_qp(struct ibv_pd *pd, struct ibv_qp_init_attr *attr)
+{
+ struct ibv_qp_init_attr_ex attrx = {};
+ struct ibv_qp *qp;
+
+ memcpy(&attrx, attr, sizeof(*attr));
+ attrx.comp_mask = IBV_QP_INIT_ATTR_PD;
+ attrx.pd = pd;
+
+ qp = create_qp(pd->context, &attrx);
+ if (qp)
+ memcpy(attr, &attrx, sizeof(*attr));
+
+ return qp;
+}
+
+/**
+ * zxdh_ucreate_qp_ex - create qp_ex on user app
+ * @context: user context of the device
+ * @attr: attributes of the qp_ex to be created
+ */
+struct ibv_qp *zxdh_ucreate_qp_ex(struct ibv_context *context,
+ struct ibv_qp_init_attr_ex *attr)
+{
+ return create_qp(context, attr);
+}
+
+/**
+ * zxdh_uquery_qp - query qp for some attribute
+ * @qp: qp for the attributes query
+ * @attr: to return the attributes
+ * @attr_mask: mask of what is query for
+ * @init_attr: initial attributes during create_qp
+ */
+int zxdh_uquery_qp(struct ibv_qp *qp, struct ibv_qp_attr *attr, int attr_mask,
+ struct ibv_qp_init_attr *init_attr)
+{
+ struct ibv_query_qp cmd;
+
+ return ibv_cmd_query_qp(qp, attr, attr_mask, init_attr, &cmd,
+ sizeof(cmd));
+}
+
+/**
+ * zxdh_umodify_qp - send qp modify to driver
+ * @qp: qp to modify
+ * @attr: attribute to modify
+ * @attr_mask: mask of the attribute
+ */
+int zxdh_umodify_qp(struct ibv_qp *qp, struct ibv_qp_attr *attr, int attr_mask)
+{
+ struct zxdh_uqp *iwuqp;
+ struct zxdh_umodify_qp_resp resp = {};
+ struct ibv_modify_qp cmd = {};
+ struct zxdh_umodify_qp cmd_ex = {};
+ int ret;
+ __u16 mtu = 0;
+
+ iwuqp = container_of(qp, struct zxdh_uqp, vqp.qp);
+ if (attr_mask & IBV_QP_STATE || attr_mask & IBV_QP_RATE_LIMIT) {
+ ret = ibv_cmd_modify_qp_ex(qp, attr, attr_mask, &cmd_ex.ibv_cmd,
+ sizeof(cmd_ex), &resp.ibv_resp,
+ sizeof(resp));
+ } else {
+ ret = ibv_cmd_modify_qp(qp, attr, attr_mask, &cmd, sizeof(cmd));
+ }
+ if (!ret && (attr_mask & IBV_QP_PATH_MTU) &&
+ qp->qp_type == IBV_QPT_RC) {
+ mtu = mtu_enum_to_int(attr->path_mtu);
+ if (mtu == 0)
+ return -EINVAL;
+ iwuqp->qp.mtu = mtu;
+ }
+ if (!ret && (attr_mask & IBV_QP_SQ_PSN) && qp->qp_type == IBV_QPT_RC) {
+ iwuqp->qp.next_psn = attr->sq_psn;
+ iwuqp->qp.cqe_last_ack_qsn = attr->sq_psn - 1;
+ iwuqp->qp.qp_last_ack_qsn = attr->sq_psn - 1;
+ iwuqp->qp.cqe_retry_cnt = 0;
+ iwuqp->qp.qp_reset_cnt = 0;
+ }
+ return ret;
+}
+
+static void zxdh_issue_flush(struct ibv_qp *qp, bool sq_flush, bool rq_flush)
+{
+ struct ib_uverbs_ex_modify_qp_resp resp = {};
+ struct zxdh_umodify_qp cmd_ex = {};
+ struct ibv_qp_attr attr = {};
+
+ attr.qp_state = IBV_QPS_ERR;
+ cmd_ex.sq_flush = sq_flush;
+ cmd_ex.rq_flush = rq_flush;
+
+ ibv_cmd_modify_qp_ex(qp, &attr, IBV_QP_STATE, &cmd_ex.ibv_cmd,
+ sizeof(cmd_ex), &resp, sizeof(resp));
+}
+
+/**
+ * zxdh_clean_cqes - clean cq entries for qp
+ * @qp: qp for which completions are cleaned
+ * @iwcq: cq to be cleaned
+ */
+static void zxdh_clean_cqes(struct zxdh_qp *qp, struct zxdh_ucq *iwucq)
+{
+ struct zxdh_cq *cq = &iwucq->cq;
+ int ret;
+
+ ret = pthread_spin_lock(&iwucq->lock);
+ if (ret)
+ return;
+
+ zxdh_clean_cq(qp, cq);
+ pthread_spin_unlock(&iwucq->lock);
+}
+
+/**
+ * zxdh_udestroy_qp - destroy qp
+ * @qp: qp to destroy
+ */
+int zxdh_udestroy_qp(struct ibv_qp *qp)
+{
+ struct zxdh_uqp *iwuqp;
+ int ret;
+
+ iwuqp = container_of(qp, struct zxdh_uqp, vqp.qp);
+ ret = pthread_spin_destroy(&iwuqp->lock);
+ if (ret)
+ goto err;
+
+ iwuqp->qp.destroy_pending = true;
+
+ ret = zxdh_destroy_vmapped_qp(iwuqp);
+ if (ret)
+ goto err;
+
+ /* Clean any pending completions from the cq(s) */
+ if (iwuqp->send_cq)
+ zxdh_clean_cqes(&iwuqp->qp, iwuqp->send_cq);
+
+ if (iwuqp->recv_cq && iwuqp->recv_cq != iwuqp->send_cq)
+ zxdh_clean_cqes(&iwuqp->qp, iwuqp->recv_cq);
+
+ if (iwuqp->qp.sq_wrtrk_array)
+ free(iwuqp->qp.sq_wrtrk_array);
+ if (iwuqp->qp.rq_wrid_array)
+ free(iwuqp->qp.rq_wrid_array);
+ if (iwuqp->qp.split_sg_list)
+ free(iwuqp->qp.split_sg_list);
+
+ zxdh_free_hw_buf(iwuqp->qp.sq_base, iwuqp->buf_size);
+ free(iwuqp->recv_sges);
+ free(iwuqp);
+ return 0;
+
+err:
+ return ret;
+}
+
+/**
+ * zxdh_copy_sg_list - copy sg list for qp
+ * @sg_list: copied into sg_list
+ * @sgl: copy from sgl
+ * @num_sges: count of sg entries
+ * @max_sges: count of max supported sg entries
+ */
+static void zxdh_copy_sg_list(struct zxdh_sge *sg_list, struct ibv_sge *sgl,
+ int num_sges)
+{
+ int i;
+
+ for (i = 0; i < num_sges; i++) {
+ sg_list[i].tag_off = sgl[i].addr;
+ sg_list[i].len = sgl[i].length;
+ sg_list[i].stag = sgl[i].lkey;
+ }
+}
+
+/**
+ * calc_type2_mw_stag - calculate type 2 MW stag
+ * @rkey: desired rkey of the MW
+ * @mw_rkey: type2 memory window rkey
+ *
+ * compute type2 memory window stag by taking lower 8 bits
+ * of the desired rkey and leaving 24 bits if mw->rkey unchanged
+ */
+static inline __u32 calc_type2_mw_stag(__u32 rkey, __u32 mw_rkey)
+{
+ const __u32 mask = 0xff;
+
+ return (rkey & mask) | (mw_rkey & ~mask);
+}
+
+/**
+ * zxdh_post_send - post send wr for user application
+ * @ib_qp: qp to post wr
+ * @ib_wr: work request ptr
+ * @bad_wr: return of bad wr if err
+ */
+int zxdh_upost_send(struct ibv_qp *ib_qp, struct ibv_send_wr *ib_wr,
+ struct ibv_send_wr **bad_wr)
+{
+ struct zxdh_post_sq_info info;
+ struct zxdh_uvcontext *iwvctx;
+ struct zxdh_dev_attrs *dev_attrs;
+ enum zxdh_status_code ret = 0;
+ struct zxdh_uqp *iwuqp;
+ bool reflush = false;
+ int err = 0;
+ struct verbs_mr *vmr = NULL;
+ struct zxdh_umr *umr = NULL;
+ __u64 mr_va = 0, mw_va = 0, value_dffer = 0, mw_pa_pble_index = 0;
+ __u16 mr_offset = 0;
+ iwvctx = container_of(ib_qp->context, struct zxdh_uvcontext,
+ ibv_ctx.context);
+ if (ib_qp->state != IBV_QPS_RTS) {
+ *bad_wr = ib_wr;
+ verbs_err(&iwvctx->ibv_ctx, "zrdma: post send at state:%d\n",
+ ib_qp->state);
+ return -EINVAL;
+ }
+
+ iwuqp = container_of(ib_qp, struct zxdh_uqp, vqp.qp);
+ dev_attrs = &iwvctx->dev_attrs;
+
+ err = pthread_spin_lock(&iwuqp->lock);
+ if (err)
+ return err;
+
+ if (!ZXDH_RING_MORE_WORK(iwuqp->qp.sq_ring) &&
+ ib_qp->state == IBV_QPS_ERR)
+ reflush = true;
+
+ while (ib_wr) {
+ memset(&info, 0, sizeof(info));
+ info.wr_id = (__u64)(ib_wr->wr_id);
+ if ((ib_wr->send_flags & IBV_SEND_SIGNALED) ||
+ iwuqp->sq_sig_all)
+ info.signaled = true;
+ if (ib_wr->send_flags & IBV_SEND_FENCE)
+ info.read_fence = true;
+
+ switch (ib_wr->opcode) {
+ case IBV_WR_SEND_WITH_IMM:
+ if (iwuqp->qp.qp_caps & ZXDH_SEND_WITH_IMM) {
+ info.imm_data_valid = true;
+ info.imm_data = ntohl(ib_wr->imm_data);
+ } else {
+ err = EINVAL;
+ break;
+ }
+ SWITCH_FALLTHROUGH;
+ case IBV_WR_SEND:
+ case IBV_WR_SEND_WITH_INV:
+ if (ib_wr->send_flags & IBV_SEND_SOLICITED)
+ info.solicited = 1;
+
+ if (ib_wr->opcode == IBV_WR_SEND) {
+ if (ib_qp->qp_type == IBV_QPT_UD)
+ info.op_type = ZXDH_OP_TYPE_UD_SEND;
+ else
+ info.op_type = ZXDH_OP_TYPE_SEND;
+ } else if (ib_wr->opcode == IBV_WR_SEND_WITH_IMM) {
+ if (ib_qp->qp_type == IBV_QPT_UD)
+ info.op_type =
+ ZXDH_OP_TYPE_UD_SEND_WITH_IMM;
+ else
+ info.op_type =
+ ZXDH_OP_TYPE_SEND_WITH_IMM;
+ } else {
+ info.op_type = ZXDH_OP_TYPE_SEND_INV;
+ info.stag_to_inv = ib_wr->invalidate_rkey;
+ }
+
+ if ((ib_wr->send_flags & IBV_SEND_INLINE) &&
+ (ib_wr->num_sge != 0)) {
+ ret = zxdh_get_inline_data(
+ iwuqp->inline_data, ib_wr,
+ &info.op.inline_rdma_send.len);
+ if (ret) {
+ verbs_err(
+ &iwvctx->ibv_ctx,
+ "zrdma: get inline data fail\n");
+ pthread_spin_unlock(&iwuqp->lock);
+ return -EINVAL;
+ }
+ info.op.inline_rdma_send.data =
+ iwuqp->inline_data;
+ if (ib_qp->qp_type == IBV_QPT_UD) {
+ struct zxdh_uah *ah =
+ container_of(ib_wr->wr.ud.ah,
+ struct zxdh_uah,
+ ibv_ah);
+ info.op.inline_rdma_send.ah_id =
+ ah->ah_id;
+ info.op.inline_rdma_send.qkey =
+ ib_wr->wr.ud.remote_qkey;
+ info.op.inline_rdma_send.dest_qp =
+ ib_wr->wr.ud.remote_qpn;
+ ret = zxdh_ud_inline_send(&iwuqp->qp,
+ &info, false);
+ } else {
+ ret = zxdh_rc_inline_send(&iwuqp->qp,
+ &info, false);
+ }
+ } else {
+ info.op.send.num_sges = ib_wr->num_sge;
+ info.op.send.sg_list =
+ (struct zxdh_sge *)ib_wr->sg_list;
+ if (ib_qp->qp_type == IBV_QPT_UD) {
+ struct zxdh_uah *ah =
+ container_of(ib_wr->wr.ud.ah,
+ struct zxdh_uah,
+ ibv_ah);
+
+ info.op.inline_rdma_send.ah_id =
+ ah->ah_id;
+ info.op.inline_rdma_send.qkey =
+ ib_wr->wr.ud.remote_qkey;
+ info.op.inline_rdma_send.dest_qp =
+ ib_wr->wr.ud.remote_qpn;
+ ret = zxdh_ud_send(&iwuqp->qp, &info,
+ false);
+ } else {
+ ret = zxdh_rc_send(&iwuqp->qp, &info,
+ false);
+ }
+ }
+ if (ret)
+ err = (ret == ZXDH_ERR_QP_TOOMANY_WRS_POSTED) ?
+ ENOMEM :
+ EINVAL;
+ break;
+ case IBV_WR_RDMA_WRITE_WITH_IMM:
+ if (iwuqp->qp.qp_caps & ZXDH_WRITE_WITH_IMM) {
+ info.imm_data_valid = true;
+ info.imm_data = ntohl(ib_wr->imm_data);
+ } else {
+ err = -EINVAL;
+ break;
+ }
+ SWITCH_FALLTHROUGH;
+ case IBV_WR_RDMA_WRITE:
+ if (ib_wr->send_flags & IBV_SEND_SOLICITED)
+ info.solicited = 1;
+
+ if (ib_wr->opcode == IBV_WR_RDMA_WRITE)
+ info.op_type = ZXDH_OP_TYPE_WRITE;
+ else
+ info.op_type = ZXDH_OP_TYPE_WRITE_WITH_IMM;
+
+ if ((ib_wr->send_flags & IBV_SEND_INLINE) &&
+ (ib_wr->num_sge != 0)) {
+ ret = zxdh_get_inline_data(
+ iwuqp->inline_data, ib_wr,
+ &info.op.inline_rdma_write.len);
+ if (ret) {
+ verbs_err(
+ &iwvctx->ibv_ctx,
+ "zrdma: get inline data fail\n");
+ pthread_spin_unlock(&iwuqp->lock);
+ return -EINVAL;
+ }
+ info.op.inline_rdma_write.data =
+ iwuqp->inline_data;
+ info.op.inline_rdma_write.rem_addr.tag_off =
+ ib_wr->wr.rdma.remote_addr;
+ info.op.inline_rdma_write.rem_addr.stag =
+ ib_wr->wr.rdma.rkey;
+ ret = zxdh_inline_rdma_write(&iwuqp->qp, &info,
+ false);
+ } else {
+ info.op.rdma_write.lo_sg_list =
+ (void *)ib_wr->sg_list;
+ info.op.rdma_write.num_lo_sges = ib_wr->num_sge;
+ info.op.rdma_write.rem_addr.tag_off =
+ ib_wr->wr.rdma.remote_addr;
+ info.op.rdma_write.rem_addr.stag =
+ ib_wr->wr.rdma.rkey;
+ ret = zxdh_rdma_write(&iwuqp->qp, &info, false);
+ }
+ if (ret)
+ err = (ret == ZXDH_ERR_QP_TOOMANY_WRS_POSTED) ?
+ ENOMEM :
+ EINVAL;
+ break;
+ case IBV_WR_RDMA_READ:
+ if (ib_wr->num_sge > dev_attrs->max_hw_read_sges) {
+ err = EINVAL;
+ break;
+ }
+ info.op_type = ZXDH_OP_TYPE_READ;
+ info.op.rdma_read.rem_addr.tag_off =
+ ib_wr->wr.rdma.remote_addr;
+ info.op.rdma_read.rem_addr.stag = ib_wr->wr.rdma.rkey;
+
+ info.op.rdma_read.lo_sg_list = (void *)ib_wr->sg_list;
+ info.op.rdma_read.num_lo_sges = ib_wr->num_sge;
+ ret = zxdh_rdma_read(&iwuqp->qp, &info, false, false);
+ if (ret)
+ err = (ret == ZXDH_ERR_QP_TOOMANY_WRS_POSTED) ?
+ ENOMEM :
+ EINVAL;
+ break;
+ case IBV_WR_BIND_MW:
+ vmr = verbs_get_mr(ib_wr->bind_mw.bind_info.mr);
+ umr = container_of(vmr, struct zxdh_umr, vmr);
+ mr_va = (uintptr_t)ib_wr->bind_mw.bind_info.mr->addr;
+ mw_va = ib_wr->bind_mw.bind_info.addr;
+ mr_offset = 0;
+ value_dffer = 0;
+ mw_pa_pble_index = 0;
+
+ if (ib_qp->qp_type != IBV_QPT_RC) {
+ err = EINVAL;
+ break;
+ }
+ info.op_type = ZXDH_OP_TYPE_BIND_MW;
+ info.op.bind_window.mr_stag =
+ ib_wr->bind_mw.bind_info.mr->rkey;
+
+ if (ib_wr->bind_mw.mw->type == IBV_MW_TYPE_1) {
+ info.op.bind_window.mem_window_type_1 = true;
+ info.op.bind_window.mw_stag =
+ ib_wr->bind_mw.rkey;
+ } else {
+ info.op.bind_window.mem_window_type_1 = false;
+ info.op.bind_window.mw_stag =
+ calc_type2_mw_stag(
+ ib_wr->bind_mw.rkey,
+ ib_wr->bind_mw.mw->rkey);
+ ib_wr->bind_mw.mw->rkey =
+ info.op.bind_window.mw_stag;
+ }
+
+ if (ib_wr->bind_mw.bind_info.mw_access_flags &
+ IBV_ACCESS_ZERO_BASED) {
+ info.op.bind_window.addressing_type =
+ ZXDH_ADDR_TYPE_ZERO_BASED;
+ if (ib_wr->bind_mw.mw->type == IBV_MW_TYPE_1) {
+ err = EINVAL;
+ break;
+ }
+
+ info.op.bind_window.addressing_type =
+ ZXDH_ADDR_TYPE_ZERO_BASED;
+ info.op.bind_window.host_page_size =
+ umr->host_page_size;
+ if (umr->host_page_size == ZXDH_PAGE_SIZE_4K) {
+ mr_offset = mr_va & 0x0fff;
+ value_dffer = mw_va - mr_va;
+ if (umr->leaf_pbl_size == 3) {
+ mw_pa_pble_index =
+ (mr_offset +
+ value_dffer) /
+ (4096 * 512);
+ info.op.bind_window
+ .mw_pa_pble_index =
+ umr->mr_pa_pble_index +
+ mw_pa_pble_index;
+ mw_pa_pble_index =
+ ((mr_offset +
+ value_dffer) /
+ 4096) %
+ 512;
+
+ info.op.bind_window
+ .root_leaf_offset =
+ (__u16)mw_pa_pble_index;
+ info.op.bind_window.va =
+ (void *)(uintptr_t)(mw_va &
+ 0x0fff);
+ info.op.bind_window
+ .leaf_pbl_size = 3;
+
+ } else if (umr->leaf_pbl_size == 1) {
+ mw_pa_pble_index =
+ (mr_offset +
+ value_dffer) /
+ 4096;
+ info.op.bind_window
+ .mw_pa_pble_index =
+ umr->mr_pa_pble_index +
+ mw_pa_pble_index;
+ info.op.bind_window
+ .leaf_pbl_size = 1;
+ info.op.bind_window.va =
+ (void *)(uintptr_t)(mw_va &
+ 0x0fff);
+ info.op.bind_window
+ .root_leaf_offset = 0;
+ } else {
+ mw_pa_pble_index =
+ umr->mr_pa_pble_index +
+ mr_offset + value_dffer;
+ info.op.bind_window.va =
+ (void *)(uintptr_t)(mw_va &
+ 0x0fff);
+ info.op.bind_window
+ .mw_pa_pble_index =
+ mw_pa_pble_index;
+ info.op.bind_window
+ .leaf_pbl_size = 0;
+ info.op.bind_window
+ .root_leaf_offset = 0;
+ }
+
+ } else if (umr->host_page_size ==
+ ZXDH_PAGE_SIZE_2M) {
+ mr_offset = mr_va & 0x1FFFFF;
+ value_dffer = mw_va - mr_va;
+ if (umr->leaf_pbl_size == 3) {
+ mw_pa_pble_index =
+ (mr_offset +
+ value_dffer) /
+ ((4096 * 512) * 512);
+ info.op.bind_window
+ .mw_pa_pble_index =
+ umr->mr_pa_pble_index +
+ mw_pa_pble_index;
+ mw_pa_pble_index =
+ ((mr_offset +
+ value_dffer) /
+ (4096 * 512)) %
+ 512;
+
+ info.op.bind_window
+ .root_leaf_offset =
+ (__u16)mw_pa_pble_index;
+ info.op.bind_window.va =
+ (void *)(uintptr_t)(mw_va &
+ 0x1FFFFF);
+ info.op.bind_window
+ .leaf_pbl_size = 3;
+
+ } else if (umr->leaf_pbl_size == 1) {
+ mw_pa_pble_index =
+ (mr_offset +
+ value_dffer) /
+ (4096 * 512);
+ info.op.bind_window
+ .mw_pa_pble_index =
+ umr->mr_pa_pble_index +
+ mw_pa_pble_index;
+ info.op.bind_window
+ .leaf_pbl_size = 1;
+ info.op.bind_window.va =
+ (void *)(uintptr_t)(mw_va &
+ 0x1FFFFF);
+ info.op.bind_window
+ .root_leaf_offset = 0;
+ } else {
+ mw_pa_pble_index =
+ umr->mr_pa_pble_index +
+ mr_offset + value_dffer;
+ info.op.bind_window.va =
+ (void *)(uintptr_t)(mw_va &
+ 0x1FFFFF);
+ info.op.bind_window
+ .mw_pa_pble_index =
+ mw_pa_pble_index;
+ info.op.bind_window
+ .leaf_pbl_size = 0;
+ info.op.bind_window
+ .root_leaf_offset = 0;
+ }
+ } else if (umr->host_page_size ==
+ ZXDH_PAGE_SIZE_1G) {
+ mr_offset = mr_va & 0x3FFFFFFF;
+ value_dffer = mw_va - mr_va;
+ if (umr->leaf_pbl_size == 1) {
+ mw_pa_pble_index =
+ (mr_offset +
+ value_dffer) /
+ (1024 * 1024 * 1024);
+ info.op.bind_window
+ .mw_pa_pble_index =
+ umr->mr_pa_pble_index +
+ mw_pa_pble_index;
+ info.op.bind_window
+ .leaf_pbl_size = 1;
+ info.op.bind_window.va =
+ (void *)(uintptr_t)(mw_va &
+ 0x3FFFFFFF);
+ info.op.bind_window
+ .root_leaf_offset = 0;
+ } else if (umr->leaf_pbl_size == 0) {
+ mw_pa_pble_index =
+ umr->mr_pa_pble_index +
+ mr_offset + value_dffer;
+ info.op.bind_window.va =
+ (void *)(uintptr_t)(mw_va &
+ 0x3FFFFFFF);
+ info.op.bind_window
+ .mw_pa_pble_index =
+ mw_pa_pble_index;
+ info.op.bind_window
+ .leaf_pbl_size = 0;
+ info.op.bind_window
+ .root_leaf_offset = 0;
+ }
+ }
+
+ } else {
+ info.op.bind_window.addressing_type =
+ ZXDH_ADDR_TYPE_VA_BASED;
+ info.op.bind_window.va =
+ (void *)(uintptr_t)
+ ib_wr->bind_mw.bind_info.addr;
+ info.op.bind_window.host_page_size =
+ umr->host_page_size;
+
+ if (umr->host_page_size == ZXDH_PAGE_SIZE_4K) {
+ mr_offset = mr_va & 0x0fff;
+ value_dffer = mw_va - mr_va;
+ if (umr->leaf_pbl_size == 3) {
+ mw_pa_pble_index =
+ (mr_offset +
+ value_dffer) /
+ (4096 * 512);
+ info.op.bind_window
+ .mw_pa_pble_index =
+ umr->mr_pa_pble_index +
+ mw_pa_pble_index;
+ mw_pa_pble_index =
+ ((mr_offset +
+ value_dffer) /
+ 4096) %
+ 512;
+ info.op.bind_window
+ .root_leaf_offset =
+ (__u16)mw_pa_pble_index;
+ info.op.bind_window
+ .leaf_pbl_size = 3;
+ } else if (umr->leaf_pbl_size == 1) {
+ info.op.bind_window
+ .mw_pa_pble_index =
+ umr->mr_pa_pble_index +
+ ((mr_offset +
+ value_dffer) /
+ 4096);
+ info.op.bind_window
+ .leaf_pbl_size = 1;
+ info.op.bind_window
+ .root_leaf_offset = 0;
+ } else {
+ info.op.bind_window
+ .leaf_pbl_size = 0;
+ info.op.bind_window
+ .mw_pa_pble_index =
+ umr->mr_pa_pble_index +
+ (mr_va & 0x0fff) +
+ (mw_va - mr_va);
+ info.op.bind_window
+ .root_leaf_offset = 0;
+ }
+ } else if (umr->host_page_size ==
+ ZXDH_PAGE_SIZE_2M) {
+ mr_offset = mr_va & 0x1FFFFF;
+ value_dffer = mw_va - mr_va;
+ if (umr->leaf_pbl_size == 3) {
+ mw_pa_pble_index =
+ (mr_offset +
+ value_dffer) /
+ ((4096 * 512) * 512);
+ info.op.bind_window
+ .mw_pa_pble_index =
+ umr->mr_pa_pble_index +
+ mw_pa_pble_index;
+ mw_pa_pble_index =
+ ((mr_offset +
+ value_dffer) /
+ (4096 * 512)) %
+ 512;
+ info.op.bind_window
+ .root_leaf_offset =
+ (__u16)mw_pa_pble_index;
+ info.op.bind_window
+ .leaf_pbl_size = 3;
+ } else if (umr->leaf_pbl_size == 1) {
+ info.op.bind_window
+ .mw_pa_pble_index =
+ umr->mr_pa_pble_index +
+ ((mr_offset +
+ value_dffer) /
+ (4096 * 512));
+ info.op.bind_window
+ .leaf_pbl_size = 1;
+ info.op.bind_window
+ .root_leaf_offset = 0;
+ } else {
+ info.op.bind_window
+ .leaf_pbl_size = 0;
+ info.op.bind_window
+ .mw_pa_pble_index =
+ umr->mr_pa_pble_index +
+ (mr_va & 0x1FFFFF) +
+ (mw_va - mr_va);
+ info.op.bind_window
+ .root_leaf_offset = 0;
+ }
+ } else if (umr->host_page_size ==
+ ZXDH_PAGE_SIZE_1G) {
+ mr_offset = mr_va & 0x3FFFFFFF;
+ value_dffer = mw_va - mr_va;
+ if (umr->leaf_pbl_size == 1) {
+ info.op.bind_window
+ .mw_pa_pble_index =
+ umr->mr_pa_pble_index +
+ ((mr_offset +
+ value_dffer) /
+ (1024 * 1024 * 1024));
+ info.op.bind_window
+ .leaf_pbl_size = 1;
+ info.op.bind_window
+ .root_leaf_offset = 0;
+ } else if (umr->leaf_pbl_size == 0) {
+ info.op.bind_window
+ .leaf_pbl_size = 0;
+ info.op.bind_window
+ .mw_pa_pble_index =
+ umr->mr_pa_pble_index +
+ (mr_va & 0x3FFFFFFF) +
+ (mw_va - mr_va);
+ info.op.bind_window
+ .root_leaf_offset = 0;
+ }
+ }
+ }
+
+ info.op.bind_window.bind_len =
+ ib_wr->bind_mw.bind_info.length;
+ info.op.bind_window.ena_reads =
+ (ib_wr->bind_mw.bind_info.mw_access_flags &
+ IBV_ACCESS_REMOTE_READ) ?
+ 1 :
+ 0;
+ info.op.bind_window.ena_writes =
+ (ib_wr->bind_mw.bind_info.mw_access_flags &
+ IBV_ACCESS_REMOTE_WRITE) ?
+ 1 :
+ 0;
+
+ ret = zxdh_mw_bind(&iwuqp->qp, &info, false);
+ if (ret)
+ err = (ret == ZXDH_ERR_QP_TOOMANY_WRS_POSTED) ?
+ ENOMEM :
+ EINVAL;
+ break;
+ case IBV_WR_LOCAL_INV:
+ info.op_type = ZXDH_OP_TYPE_LOCAL_INV;
+ info.op.inv_local_stag.target_stag =
+ ib_wr->invalidate_rkey;
+ ret = zxdh_stag_local_invalidate(&iwuqp->qp, &info,
+ true);
+ if (ret)
+ err = (ret == ZXDH_ERR_QP_TOOMANY_WRS_POSTED) ?
+ ENOMEM :
+ EINVAL;
+ break;
+ default:
+ /* error */
+ err = EINVAL;
+ break;
+ }
+ if (err)
+ break;
+
+ ib_wr = ib_wr->next;
+ }
+
+ if (err)
+ *bad_wr = ib_wr;
+
+ zxdh_qp_post_wr(&iwuqp->qp);
+ if (reflush)
+ zxdh_issue_flush(ib_qp, 1, 0);
+
+ pthread_spin_unlock(&iwuqp->lock);
+
+ return err;
+}
+
+/**
+ * zxdh_post_recv - post receive wr for user application
+ * @ib_wr: work request for receive
+ * @bad_wr: bad wr caused an error
+ */
+int zxdh_upost_recv(struct ibv_qp *ib_qp, struct ibv_recv_wr *ib_wr,
+ struct ibv_recv_wr **bad_wr)
+{
+ struct zxdh_post_rq_info post_recv = {};
+ enum zxdh_status_code ret = 0;
+ struct zxdh_sge *sg_list;
+ struct zxdh_uqp *iwuqp;
+ bool reflush = false;
+ int err = 0;
+
+ iwuqp = container_of(ib_qp, struct zxdh_uqp, vqp.qp);
+ sg_list = iwuqp->recv_sges;
+
+ if (unlikely(ib_qp->state == IBV_QPS_RESET || ib_qp->srq)) {
+ *bad_wr = ib_wr;
+ return -EINVAL;
+ }
+
+ err = pthread_spin_lock(&iwuqp->lock);
+ if (err)
+ return err;
+
+ if (unlikely(!ZXDH_RING_MORE_WORK(iwuqp->qp.rq_ring)) &&
+ ib_qp->state == IBV_QPS_ERR)
+ reflush = true;
+
+ while (ib_wr) {
+ if (unlikely(ib_wr->num_sge > iwuqp->qp.max_rq_frag_cnt)) {
+ *bad_wr = ib_wr;
+ err = EINVAL;
+ goto error;
+ }
+ post_recv.num_sges = ib_wr->num_sge;
+ post_recv.wr_id = ib_wr->wr_id;
+ zxdh_copy_sg_list(sg_list, ib_wr->sg_list, ib_wr->num_sge);
+ post_recv.sg_list = sg_list;
+ ret = zxdh_post_receive(&iwuqp->qp, &post_recv);
+ if (unlikely(ret)) {
+ err = (ret == ZXDH_ERR_QP_TOOMANY_WRS_POSTED) ? ENOMEM :
+ EINVAL;
+ *bad_wr = ib_wr;
+ goto error;
+ }
+
+ if (reflush)
+ zxdh_issue_flush(ib_qp, 0, 1);
+
+ ib_wr = ib_wr->next;
+ }
+error:
+ zxdh_qp_set_shadow_area(&iwuqp->qp);
+ pthread_spin_unlock(&iwuqp->lock);
+
+ return err;
+}
+
+/**
+ * zxdh_ucreate_ah - create address handle associated with a pd
+ * @ibpd: pd for the address handle
+ * @attr: attributes of address handle
+ */
+struct ibv_ah *zxdh_ucreate_ah(struct ibv_pd *ibpd, struct ibv_ah_attr *attr)
+{
+ struct zxdh_uah *ah;
+ struct zxdh_ucreate_ah_resp resp;
+ int err;
+
+ ah = calloc(1, sizeof(*ah));
+ if (!ah)
+ return NULL;
+
+ err = ibv_cmd_create_ah(ibpd, &ah->ibv_ah, attr, &resp.ibv_resp,
+ sizeof(resp));
+ if (err) {
+ free(ah);
+ errno = err;
+ return NULL;
+ }
+
+ ah->ah_id = resp.ah_id;
+
+ return &ah->ibv_ah;
+}
+
+/**
+ * zxdh_udestroy_ah - destroy the address handle
+ * @ibah: address handle
+ */
+int zxdh_udestroy_ah(struct ibv_ah *ibah)
+{
+ struct zxdh_uah *ah;
+ int ret;
+
+ ah = container_of(ibah, struct zxdh_uah, ibv_ah);
+
+ ret = ibv_cmd_destroy_ah(ibah);
+ if (ret)
+ return ret;
+
+ free(ah);
+
+ return 0;
+}
+
+/**
+ * zxdh_uattach_mcast - Attach qp to multicast group implemented
+ * @qp: The queue pair
+ * @gid:The Global ID for multicast group
+ * @lid: The Local ID
+ */
+int zxdh_uattach_mcast(struct ibv_qp *qp, const union ibv_gid *gid,
+ uint16_t lid)
+{
+ return ibv_cmd_attach_mcast(qp, gid, lid);
+}
+
+/**
+ * zxdh_udetach_mcast - Detach qp from multicast group
+ * @qp: The queue pair
+ * @gid:The Global ID for multicast group
+ * @lid: The Local ID
+ */
+int zxdh_udetach_mcast(struct ibv_qp *qp, const union ibv_gid *gid,
+ uint16_t lid)
+{
+ return ibv_cmd_detach_mcast(qp, gid, lid);
+}
+
+/**
+ * zxdh_uresize_cq - resizes a cq
+ * @cq: cq to resize
+ * @cqe: the number of cqes of the new cq
+ */
+int zxdh_uresize_cq(struct ibv_cq *cq, int cqe)
+{
+ struct zxdh_uvcontext *iwvctx;
+ struct zxdh_dev_attrs *dev_attrs;
+ struct zxdh_uresize_cq cmd = {};
+ struct ib_uverbs_resize_cq_resp resp = {};
+ struct zxdh_ureg_mr reg_mr_cmd = {};
+ struct ib_uverbs_reg_mr_resp reg_mr_resp = {};
+ struct zxdh_cq_buf *cq_buf = NULL;
+ struct zxdh_cqe *cq_base = NULL;
+ struct verbs_mr new_mr = {};
+ struct zxdh_ucq *iwucq;
+ size_t cq_size;
+ __u32 cq_pages;
+ int cqe_needed;
+ int ret = 0;
+
+ iwucq = container_of(cq, struct zxdh_ucq, verbs_cq.cq);
+ iwvctx = container_of(cq->context, struct zxdh_uvcontext,
+ ibv_ctx.context);
+ dev_attrs = &iwvctx->dev_attrs;
+
+ if (!(dev_attrs->feature_flags & ZXDH_FEATURE_CQ_RESIZE))
+ return -EOPNOTSUPP;
+
+ if (cqe > ZXDH_MAX_CQ_SIZE)
+ return -EINVAL;
+
+ cqe_needed = zxdh_cq_round_up(cqe + 1);
+
+ if (cqe_needed < ZXDH_U_MINCQ_SIZE)
+ cqe_needed = ZXDH_U_MINCQ_SIZE;
+
+ if (cqe_needed == iwucq->cq.cq_size)
+ return 0;
+
+ cq_size = get_cq_total_bytes(cqe_needed);
+ cq_pages = cq_size >> ZXDH_HW_PAGE_SHIFT;
+ cq_base = zxdh_alloc_hw_buf(cq_size);
+ if (!cq_base)
+ return -ENOMEM;
+
+ memset(cq_base, 0, cq_size);
+
+ cq_buf = malloc(sizeof(*cq_buf));
+ if (!cq_buf) {
+ ret = -ENOMEM;
+ goto err_buf;
+ }
+
+ new_mr.ibv_mr.pd = iwucq->vmr.ibv_mr.pd;
+ reg_mr_cmd.reg_type = ZXDH_MEMREG_TYPE_CQ;
+ reg_mr_cmd.cq_pages = cq_pages;
+
+ ret = ibv_cmd_reg_mr(new_mr.ibv_mr.pd, cq_base, cq_size,
+ (uintptr_t)cq_base, IBV_ACCESS_LOCAL_WRITE,
+ &new_mr, &reg_mr_cmd.ibv_cmd, sizeof(reg_mr_cmd),
+ &reg_mr_resp, sizeof(reg_mr_resp));
+ if (ret)
+ goto err_dereg_mr;
+
+ ret = pthread_spin_lock(&iwucq->lock);
+ if (ret)
+ goto err_lock;
+
+ cmd.user_cq_buffer = (__u64)((uintptr_t)cq_base);
+ ret = ibv_cmd_resize_cq(&iwucq->verbs_cq.cq, cqe_needed, &cmd.ibv_cmd,
+ sizeof(cmd), &resp, sizeof(resp));
+ if (ret)
+ goto err_resize;
+
+ memcpy(&cq_buf->cq, &iwucq->cq, sizeof(cq_buf->cq));
+ cq_buf->vmr = iwucq->vmr;
+ iwucq->vmr = new_mr;
+ zxdh_cq_resize(&iwucq->cq, cq_base, cqe_needed);
+ iwucq->verbs_cq.cq.cqe = cqe;
+ list_add_tail(&iwucq->resize_list, &cq_buf->list);
+ iwucq->resize_enable = true;
+ pthread_spin_unlock(&iwucq->lock);
+
+ return ret;
+
+err_resize:
+ pthread_spin_unlock(&iwucq->lock);
+err_lock:
+ ibv_cmd_dereg_mr(&new_mr);
+err_dereg_mr:
+ free(cq_buf);
+err_buf:
+ zxdh_free_hw_buf(cq_base, cq_size);
+ return ret;
+}
+
+static void zxdh_srq_wqe_init(struct zxdh_usrq *iwusrq)
+{
+ uint32_t i;
+ struct zxdh_srq *srq;
+ __le64 *wqe;
+ __u64 hdr;
+
+ srq = &iwusrq->srq;
+ zxdh_dbg(verbs_get_ctx(iwusrq->ibv_srq.context), ZXDH_DBG_SRQ,
+ "%s head:%d tail:%d\n", __func__, srq->srq_ring.head,
+ srq->srq_ring.tail);
+ for (i = srq->srq_ring.head; i < srq->srq_ring.tail; i++) {
+ wqe = zxdh_get_srq_wqe(srq, i);
+
+ hdr = FIELD_PREP(ZXDHQPSRQ_NEXT_WQE_INDEX, (uint32_t)(i + 1));
+
+ udma_to_device_barrier(); /* make sure WQE is populated before valid bit is set */
+ set_64bit_val(wqe, 0, hdr);
+ }
+}
+
+static size_t zxdh_get_srq_queue_size(int srqdepth)
+{
+ return roundup(srqdepth * ZXDH_SRQ_WQE_MIN_SIZE, ZXDH_HW_PAGE_SIZE);
+}
+
+static size_t zxdh_get_srq_list_size(size_t srq_size)
+{
+ return roundup(srq_size * sizeof(__u16), ZXDH_HW_PAGE_SIZE);
+}
+
+static size_t zxdh_get_srq_db_size(void)
+{
+ return 8 * sizeof(char);
+}
+
+static size_t zxdh_get_total_srq_size(struct zxdh_usrq *iwusrq, int srqdepth,
+ size_t srq_size)
+{
+ size_t total_srq_queue_size;
+ size_t total_srq_list_size;
+ size_t total_srq_db_size;
+ size_t total_srq_size;
+
+ total_srq_queue_size = zxdh_get_srq_queue_size(srqdepth);
+ iwusrq->buf_size = total_srq_queue_size;
+ total_srq_list_size = zxdh_get_srq_list_size(srq_size);
+ iwusrq->list_buf_size = total_srq_list_size;
+ total_srq_db_size = zxdh_get_srq_db_size();
+ iwusrq->db_buf_size = total_srq_db_size;
+ total_srq_size =
+ total_srq_queue_size + total_srq_list_size + total_srq_db_size;
+ iwusrq->total_buf_size = total_srq_size;
+ zxdh_dbg(
+ verbs_get_ctx(iwusrq->ibv_srq.context), ZXDH_DBG_SRQ,
+ "%s total_srq_queue_size:%ld total_srq_list_size:%ld total_srq_db_size:%ld srqdepth:%d\n",
+ __func__, total_srq_queue_size, total_srq_list_size,
+ total_srq_db_size, srqdepth);
+
+ return total_srq_size;
+}
+
+static int zxdh_alloc_srq_buf(struct zxdh_usrq *iwusrq,
+ struct zxdh_srq_init_info *info,
+ size_t total_srq_size)
+{
+ info->srq_base = zxdh_alloc_hw_buf(total_srq_size);
+ if (!info->srq_base)
+ return -ENOMEM;
+ memset(info->srq_base, 0, total_srq_size);
+ info->srq_list_base =
+ (__le16 *)&info
+ ->srq_base[iwusrq->buf_size / ZXDH_SRQ_WQE_MIN_SIZE];
+ info->srq_db_base =
+ (__le64 *)&info->srq_list_base[iwusrq->list_buf_size /
+ (sizeof(__u16))];
+ *(__le64 *)info->srq_db_base = ZXDH_SRQ_DB_INIT_VALUE;
+ zxdh_dbg(verbs_get_ctx(iwusrq->ibv_srq.context), ZXDH_DBG_SRQ,
+ "%s srq_base:0x%p srq_list_base:0x%p srq_db_base:0x%p\n",
+ __func__, info->srq_base, info->srq_list_base,
+ info->srq_db_base);
+ return 0;
+}
+
+static int zxdh_reg_srq_mr(struct ibv_pd *pd, struct zxdh_srq_init_info *info,
+ size_t total_srq_size, uint16_t srq_pages,
+ uint16_t srq_list_pages, struct zxdh_usrq *iwusrq)
+{
+ struct zxdh_ureg_mr reg_mr_cmd = {};
+ struct ib_uverbs_reg_mr_resp reg_mr_resp = {};
+ int ret;
+
+ reg_mr_cmd.reg_type = ZXDH_MEMREG_TYPE_SRQ;
+ reg_mr_cmd.srq_pages = srq_pages;
+ reg_mr_cmd.srq_list_pages = srq_list_pages;
+ ret = ibv_cmd_reg_mr(pd, info->srq_base, total_srq_size,
+ (uintptr_t)info->srq_base, IBV_ACCESS_LOCAL_WRITE,
+ &iwusrq->vmr, &reg_mr_cmd.ibv_cmd,
+ sizeof(reg_mr_cmd), &reg_mr_resp,
+ sizeof(reg_mr_resp));
+ if (ret)
+ return ret;
+
+ return 0;
+}
+
+static int create_srq(struct ibv_pd *pd, struct zxdh_usrq *iwusrq,
+ struct ibv_srq_init_attr *attr,
+ struct zxdh_srq_init_info *info)
+{
+ struct zxdh_ucreate_srq cmd = {};
+ struct zxdh_ucreate_srq_resp resp = {};
+ int ret;
+
+ cmd.user_wqe_bufs = (__u64)((uintptr_t)info->srq_base);
+ cmd.user_compl_ctx = (__u64)(uintptr_t)&iwusrq->srq;
+ cmd.user_wqe_list = (__u64)((uintptr_t)info->srq_list_base);
+ cmd.user_wqe_db = (__u64)((uintptr_t)info->srq_db_base);
+ ret = ibv_cmd_create_srq(pd, &iwusrq->ibv_srq, attr, &cmd.ibv_cmd,
+ sizeof(cmd), &resp.ibv_resp,
+ sizeof(struct zxdh_ucreate_srq_resp));
+ if (ret)
+ return ret;
+
+ iwusrq->srq_id = resp.srq_id;
+ info->srq_id = resp.srq_id;
+ info->srq_size = resp.actual_srq_size;
+ info->srq_list_size = resp.actual_srq_list_size;
+ zxdh_dbg(
+ verbs_get_ctx(iwusrq->ibv_srq.context), ZXDH_DBG_SRQ,
+ "%s info->srq_id:%d info->srq_size:%d info->srq_list_size:%d\n",
+ __func__, info->srq_id, info->srq_size, info->srq_list_size);
+
+ return 0;
+}
+
+/**
+ * zxdh_vmapped_srq - create resources for srq
+ * @iwusrq: srq struct for resources
+ * @pd: pd for the srq
+ * @attr: attributes of srq passed
+ * @resp: response back from create srq
+ * @srqdepth: depth of sq
+ * @info: info for initializing user level srq
+ */
+static int zxdh_vmapped_srq(struct zxdh_usrq *iwusrq, struct ibv_pd *pd,
+ struct ibv_srq_init_attr *attr, int srqdepth,
+ struct zxdh_srq_init_info *info)
+{
+ size_t total_srq_size;
+ size_t srq_pages = 0;
+ size_t srq_list_pages = 0;
+ int ret;
+
+ total_srq_size =
+ zxdh_get_total_srq_size(iwusrq, srqdepth, info->srq_size);
+ srq_pages = iwusrq->buf_size >> ZXDH_HW_PAGE_SHIFT;
+ srq_list_pages = iwusrq->list_buf_size >> ZXDH_HW_PAGE_SHIFT;
+ ret = zxdh_alloc_srq_buf(iwusrq, info, total_srq_size);
+ if (ret)
+ return -ENOMEM;
+ zxdh_dbg(verbs_get_ctx(iwusrq->ibv_srq.context), ZXDH_DBG_SRQ,
+ "%s srq_pages:%ld srq_list_pages:%ld\n", __func__, srq_pages,
+ srq_list_pages);
+
+ ret = zxdh_reg_srq_mr(pd, info, total_srq_size, srq_pages,
+ srq_list_pages, iwusrq);
+ if (ret) {
+ errno = ret;
+ goto err_dereg_srq_mr;
+ }
+ ret = create_srq(pd, iwusrq, attr, info);
+ if (ret)
+ goto err_srq;
+ return 0;
+err_srq:
+ ibv_cmd_dereg_mr(&iwusrq->vmr);
+err_dereg_srq_mr:
+ zxdh_free_hw_buf(info->srq_base, total_srq_size);
+
+ return ret;
+}
+
+/**
+ * zxdh_destroy_vmapped_srq - destroy resources for srq
+ * @iwusrq: srq struct for resources
+ */
+static int zxdh_destroy_vmapped_srq(struct zxdh_usrq *iwusrq)
+{
+ int ret;
+
+ ret = ibv_cmd_destroy_srq(&iwusrq->ibv_srq);
+ if (ret)
+ return ret;
+
+ ibv_cmd_dereg_mr(&iwusrq->vmr);
+ return 0;
+}
+
+static int zxdh_check_srq_init_attr(struct ibv_srq_init_attr *srq_init_attr,
+ struct zxdh_dev_attrs *dev_attrs)
+{
+ if ((srq_init_attr->attr.srq_limit > srq_init_attr->attr.max_wr) ||
+ (srq_init_attr->attr.max_sge > dev_attrs->max_hw_wq_frags) ||
+ (srq_init_attr->attr.max_wr > dev_attrs->max_hw_srq_wr)) {
+ return 1;
+ }
+ return 0;
+}
+
+static int zxdh_init_iwusrq(struct zxdh_usrq *iwusrq,
+ struct ibv_srq_init_attr *srq_init_attr,
+ __u32 srqdepth, __u8 srqshift,
+ struct zxdh_srq_init_info *info,
+ struct zxdh_dev_attrs *dev_attrs)
+{
+ info->srq_size = srqdepth >> srqshift;
+ iwusrq->max_wr = info->srq_size;
+ iwusrq->max_sge = srq_init_attr->attr.max_sge;
+ iwusrq->srq_limit = srq_init_attr->attr.srq_limit;
+
+ srq_init_attr->attr.max_wr = info->srq_size;
+ info->dev_attrs = dev_attrs;
+ info->max_srq_frag_cnt = srq_init_attr->attr.max_sge;
+ info->srq_wrid_array =
+ calloc(info->srq_size, sizeof(*info->srq_wrid_array));
+ if (info->srq_wrid_array == NULL)
+ return 1;
+
+ return 0;
+}
+
+/**
+ * zxdh_ucreate_srq - create srq on user app
+ * @pd: pd for the srq
+ * @srq_init_attr: attributes of the srq to be created (sizes, sge)
+ */
+struct ibv_srq *zxdh_ucreate_srq(struct ibv_pd *pd,
+ struct ibv_srq_init_attr *srq_init_attr)
+{
+ struct zxdh_srq_init_info info = {};
+ struct zxdh_dev_attrs *dev_attrs;
+ struct zxdh_uvcontext *iwvctx;
+ __u32 srqdepth;
+ __u8 srqshift;
+ int status;
+ int ret;
+ struct zxdh_usrq *iwusrq;
+
+ iwvctx = container_of(pd->context, struct zxdh_uvcontext,
+ ibv_ctx.context);
+ dev_attrs = &iwvctx->dev_attrs;
+
+ if ((zxdh_check_srq_init_attr(srq_init_attr, dev_attrs)) != 0) {
+ verbs_err(&iwvctx->ibv_ctx,
+ "zxdh_check_srq_init_attr failed\n");
+ errno = EINVAL;
+ return NULL;
+ }
+
+ /* get shift count for maximum wqe size */
+ zxdh_get_srq_wqe_shift(dev_attrs, srq_init_attr->attr.max_sge,
+ &srqshift);
+
+ /* get RQ/SRQ depth (quanta)minimum number of units in srq */
+ status = zxdh_get_srqdepth(dev_attrs->max_hw_srq_quanta,
+ srq_init_attr->attr.max_wr, srqshift,
+ &srqdepth);
+ zxdh_dbg(
+ &iwvctx->ibv_ctx, ZXDH_DBG_SRQ,
+ "%s %d status:%d srqshift:%d srqdepth:%d dev_attrs->max_hw_srq_quanta:%d srq_init_attr->attr.max_wr:%d\n",
+ __func__, __LINE__, status, srqshift, srqdepth,
+ dev_attrs->max_hw_srq_quanta, srq_init_attr->attr.max_wr);
+ if (status != 0) {
+ verbs_err(&iwvctx->ibv_ctx, "zxdh_get_srqdepth failed\n");
+ errno = EINVAL;
+ return NULL;
+ }
+ iwusrq = memalign(1024, sizeof(*iwusrq));
+ if (!iwusrq)
+ return NULL;
+ memset(iwusrq, 0, sizeof(*iwusrq));
+ if (pthread_spin_init(&iwusrq->lock, PTHREAD_PROCESS_PRIVATE) != 0)
+ goto err_free_srq;
+
+ if (zxdh_init_iwusrq(iwusrq, srq_init_attr, srqdepth, srqshift, &info,
+ dev_attrs)) {
+ verbs_err(&iwvctx->ibv_ctx, "calloc srq_wrid_array failed\n");
+ goto err_srq_wrid_array;
+ }
+ status = zxdh_vmapped_srq(iwusrq, pd, srq_init_attr, srqdepth, &info);
+ if (status) {
+ verbs_err(&iwvctx->ibv_ctx, "zxdh_vmapped_srq failed\n");
+ errno = status;
+ goto err_vmapped_srq;
+ }
+
+ status = zxdh_srq_init(&iwusrq->srq, &info);
+ if (status) {
+ verbs_err(&iwvctx->ibv_ctx, "zxdh_srq_init failed\n");
+ errno = EINVAL;
+ goto err_free_srq_init;
+ }
+ zxdh_srq_wqe_init(iwusrq);
+
+ srq_init_attr->attr.max_wr = (srqdepth - ZXDH_SRQ_RSVD) >> srqshift;
+
+ zxdh_dbg(&iwvctx->ibv_ctx, ZXDH_DBG_SRQ,
+ "iwusrq->srq_id:%d info.srq_size:%d\n", iwusrq->srq_id,
+ info.srq_size);
+ return &iwusrq->ibv_srq;
+
+err_free_srq_init:
+ zxdh_destroy_vmapped_srq(iwusrq);
+ zxdh_free_hw_buf(info.srq_base, iwusrq->total_buf_size);
+err_vmapped_srq:
+ free(info.srq_wrid_array);
+err_srq_wrid_array:
+ ret = pthread_spin_destroy(&iwusrq->lock);
+ if (ret)
+ errno = EINVAL;
+err_free_srq:
+ free(iwusrq);
+ return NULL;
+}
+
+/**
+ * zxdh_udestroy_srq - destroy srq on user app
+ * @srq: srq to destroy
+ */
+int zxdh_udestroy_srq(struct ibv_srq *srq)
+{
+ struct zxdh_usrq *iwusrq;
+ int ret;
+
+ iwusrq = container_of(srq, struct zxdh_usrq, ibv_srq);
+ ret = pthread_spin_destroy(&iwusrq->lock);
+ if (ret)
+ goto err;
+
+ ret = zxdh_destroy_vmapped_srq(iwusrq);
+ if (ret)
+ goto err;
+ zxdh_dbg(verbs_get_ctx(iwusrq->ibv_srq.context), ZXDH_DBG_SRQ,
+ "iwusrq->srq_id:%d\n", iwusrq->srq_id);
+ zxdh_free_hw_buf(iwusrq->srq.srq_base, iwusrq->total_buf_size);
+ free(iwusrq->srq.srq_wrid_array);
+ free(iwusrq);
+
+ return 0;
+
+err:
+ return ret;
+}
+
+/**
+ * zxdh_umodify_srq - modify srq on user app
+ * @srq: srq to destroy
+ */
+int zxdh_umodify_srq(struct ibv_srq *srq, struct ibv_srq_attr *srq_attr,
+ int srq_attr_mask)
+{
+ struct ibv_modify_srq cmd;
+ struct zxdh_usrq *iwusrq;
+ int ret;
+
+ iwusrq = container_of(srq, struct zxdh_usrq, ibv_srq);
+ ret = ibv_cmd_modify_srq(srq, srq_attr, srq_attr_mask, &cmd,
+ sizeof(cmd));
+ if (ret == 0)
+ iwusrq->srq_limit = srq_attr->srq_limit;
+ zxdh_dbg(verbs_get_ctx(iwusrq->ibv_srq.context), ZXDH_DBG_SRQ,
+ "iwusrq->srq_id:%d srq_attr->srq_limit:%d\n", iwusrq->srq_id,
+ srq_attr->srq_limit);
+ return ret;
+}
+
+/**
+ * zxdh_uquery_srq - query srq on user app
+ * @srq: srq to query
+ * @srq_attr: attributes of the srq to be query
+ */
+int zxdh_uquery_srq(struct ibv_srq *srq, struct ibv_srq_attr *srq_attr)
+{
+ struct ibv_query_srq cmd;
+
+ return ibv_cmd_query_srq(srq, srq_attr, &cmd, sizeof(cmd));
+}
+
+static int zxdh_check_srq_valid(struct ibv_recv_wr *recv_wr,
+ struct zxdh_usrq *iwusrq, struct zxdh_srq *srq)
+{
+ if (unlikely(recv_wr->num_sge > iwusrq->max_sge))
+ return -EINVAL;
+
+ if (unlikely(srq->srq_ring.head == srq->srq_ring.tail))
+ return -ENOMEM;
+
+ return 0;
+}
+
+static void zxdh_fill_srq_wqe(struct zxdh_usrq *iwusrq, struct zxdh_srq *srq,
+ __le64 *wqe_64, struct ibv_recv_wr *recv_wr)
+{
+ __u32 byte_off;
+ int i;
+
+ for (i = 0, byte_off = ZXDH_SRQ_FRAG_BYTESIZE;
+ i < recv_wr->num_sge &&
+ byte_off + ZXDH_SRQ_FRAG_BYTESIZE < UINT32_MAX;
+ i++) {
+ set_64bit_val(wqe_64, byte_off, recv_wr->sg_list[i].addr);
+ set_64bit_val(wqe_64, byte_off + 8,
+ FIELD_PREP(ZXDHQPSRQ_FRAG_LEN,
+ recv_wr->sg_list[i].length) |
+ FIELD_PREP(ZXDHQPSRQ_FRAG_STAG,
+ recv_wr->sg_list[i].lkey));
+ byte_off += ZXDH_SRQ_FRAG_BYTESIZE;
+ }
+
+ if ((recv_wr->num_sge < iwusrq->max_sge) || (recv_wr->num_sge == 0)) {
+ set_64bit_val(wqe_64, byte_off, 0);
+ set_64bit_val(wqe_64, byte_off + 8,
+ FIELD_PREP(ZXDHQPSRQ_FRAG_LEN, 0) |
+ FIELD_PREP(ZXDHQPSRQ_FRAG_STAG,
+ ZXDH_SRQ_INVALID_LKEY));
+ }
+
+ set_64bit_val(wqe_64, 8, ((uint64_t)iwusrq->srq_id) << 32);
+
+ __u64 hdr = FIELD_PREP(ZXDHQPSRQ_RSV, 0) |
+ FIELD_PREP(ZXDHQPSRQ_VALID_SGE_NUM, recv_wr->num_sge) |
+ FIELD_PREP(ZXDHQPSRQ_SIGNATURE, 0) |
+ FIELD_PREP(ZXDHQPSRQ_NEXT_WQE_INDEX, srq->srq_ring.head);
+
+ udma_to_device_barrier(); /* make sure WQE is populated before valid bit is set */
+ set_64bit_val(wqe_64, 0, hdr);
+}
+
+static void zxdh_get_wqe_index(struct zxdh_srq *srq, __le16 *wqe_16, __u16 *buf,
+ __u16 nreq, __u16 *idx)
+{
+ int i;
+
+ for (i = 0; i < nreq; i++) {
+ wqe_16 = zxdh_get_srq_list_wqe(srq, idx);
+ udma_to_device_barrier(); /* make sure WQE is populated before valid bit is set */
+ set_16bit_val(wqe_16, 0, buf[i]);
+ }
+}
+
+static void zxdh_update_srq_db_base(struct zxdh_usrq *iwusrq, __u16 idx)
+{
+ __u64 hdr = FIELD_PREP(ZXDH_SRQ_PARITY_SIGN,
+ iwusrq->srq.srq_list_polarity) |
+ FIELD_PREP(ZXDH_SRQ_SW_SRQ_HEAD, idx);
+
+ udma_to_device_barrier(); /* make sure WQE is populated before valid bit is set */
+ set_64bit_val(iwusrq->srq.srq_db_base, 0, hdr);
+}
+
+/**
+ * zxdh_upost_srq_recv - post srq recv on user app
+ * @srq: srq to post recv
+ * @recv_wr: a list of work requests to post on the receive queue
+ * @bad_recv_wr: pointer to first rejected wr
+ */
+int zxdh_upost_srq_recv(struct ibv_srq *srq, struct ibv_recv_wr *recv_wr,
+ struct ibv_recv_wr **bad_recv_wr)
+{
+ struct zxdh_usrq *iwusrq;
+ struct zxdh_srq *hw_srq;
+ __le16 *wqe_16;
+ __le64 *wqe_64;
+ __u64 temp_val;
+ int err = 0;
+ int nreq;
+ __u16 *buf;
+ size_t buf_size;
+ __u16 idx = 0;
+
+ iwusrq = container_of(srq, struct zxdh_usrq, ibv_srq);
+ hw_srq = &iwusrq->srq;
+ pthread_spin_lock(&iwusrq->lock);
+ buf_size = iwusrq->max_wr * sizeof(__u16);
+ buf = malloc(buf_size);
+ if (buf == NULL) {
+ verbs_err(verbs_get_ctx(iwusrq->ibv_srq.context),
+ "malloc buf_size failed\n");
+ err = -ENOMEM;
+ goto out;
+ }
+
+ for (nreq = 0; recv_wr; nreq++, recv_wr = recv_wr->next) {
+ err = zxdh_check_srq_valid(recv_wr, iwusrq, hw_srq);
+ if (err)
+ break;
+
+ iwusrq->srq.srq_wrid_array[hw_srq->srq_ring.head] =
+ recv_wr->wr_id;
+ buf[nreq] = hw_srq->srq_ring.head;
+ wqe_64 = zxdh_get_srq_wqe(hw_srq, hw_srq->srq_ring.head);
+ get_64bit_val(wqe_64, 0, &temp_val);
+ hw_srq->srq_ring.head =
+ (__u16)FIELD_GET(ZXDHQPSRQ_NEXT_WQE_INDEX, temp_val);
+ zxdh_fill_srq_wqe(iwusrq, hw_srq, wqe_64, recv_wr);
+ }
+
+ zxdh_dbg(verbs_get_ctx(iwusrq->ibv_srq.context), ZXDH_DBG_SRQ,
+ "nreq:%d err:%d iwusrq->srq_id:%d\n", nreq, err,
+ iwusrq->srq_id);
+
+ if (err == 0) {
+ zxdh_get_wqe_index(hw_srq, wqe_16, buf, nreq, &idx);
+ zxdh_update_srq_db_base(iwusrq, idx);
+ }
+out:
+ pthread_spin_unlock(&iwusrq->lock);
+ if (err)
+ *bad_recv_wr = recv_wr;
+ if (buf)
+ free(buf);
+ return err;
+}
+
+/**
+ * zxdh_uget_srq_num - get srq num on user app
+ * @srq: srq to get num
+ * @srq_num: to get srq num
+ */
+int zxdh_uget_srq_num(struct ibv_srq *srq, uint32_t *srq_num)
+{
+ struct zxdh_usrq *iwusrq;
+
+ iwusrq = container_of(srq, struct zxdh_usrq, ibv_srq);
+
+ *srq_num = iwusrq->srq_id;
+ return 0;
+}
+
+void zxdh_set_debug_mask(void)
+{
+ char *env;
+
+ env = getenv("ZXDH_DEBUG_MASK");
+ if (env)
+ zxdh_debug_mask = strtol(env, NULL, 0);
+}
diff --git a/providers/zrdma/zxdh_verbs.h b/providers/zrdma/zxdh_verbs.h
new file mode 100644
index 0000000..69a98cc
--- /dev/null
+++ b/providers/zrdma/zxdh_verbs.h
@@ -0,0 +1,611 @@
+// SPDX-License-Identifier: GPL-2.0 or Linux-OpenIB
+/* Copyright (c) 2024 ZTE Corporation. All rights reserved. */
+#ifndef ZXDH_VERBS_H
+#define ZXDH_VERBS_H
+#include "zxdh_defs.h"
+
+#define zxdh_handle void *
+#define zxdh_adapter_handle zxdh_handle
+#define zxdh_qp_handle zxdh_handle
+#define zxdh_cq_handle zxdh_handle
+#define zxdh_pd_id zxdh_handle
+#define zxdh_stag_handle zxdh_handle
+#define zxdh_stag_index __u32
+#define zxdh_stag __u32
+#define zxdh_stag_key __u8
+#define zxdh_tagged_offset __u64
+#define zxdh_access_privileges __u32
+#define zxdh_physical_fragment __u64
+#define zxdh_address_list __u64 *
+#define zxdh_sgl struct zxdh_sge *
+
+#define ZXDH_MAX_MR_SIZE 0x200000000000ULL
+
+#define ZXDH_ACCESS_FLAGS_LOCALREAD 0x01
+#define ZXDH_ACCESS_FLAGS_LOCALWRITE 0x02
+#define ZXDH_ACCESS_FLAGS_REMOTEREAD_ONLY 0x04
+#define ZXDH_ACCESS_FLAGS_REMOTEREAD 0x05
+#define ZXDH_ACCESS_FLAGS_REMOTEWRITE_ONLY 0x08
+#define ZXDH_ACCESS_FLAGS_REMOTEWRITE 0x0a
+#define ZXDH_ACCESS_FLAGS_BIND_WINDOW 0x10
+#define ZXDH_ACCESS_FLAGS_ZERO_BASED 0x20
+#define ZXDH_ACCESS_FLAGS_ALL 0x3f
+
+#define ZXDH_OP_TYPE_NOP 0x00
+#define ZXDH_OP_TYPE_SEND 0x01
+#define ZXDH_OP_TYPE_SEND_WITH_IMM 0x02
+#define ZXDH_OP_TYPE_SEND_INV 0x03
+#define ZXDH_OP_TYPE_WRITE 0x04
+#define ZXDH_OP_TYPE_WRITE_WITH_IMM 0x05
+#define ZXDH_OP_TYPE_READ 0x06
+#define ZXDH_OP_TYPE_BIND_MW 0x07
+#define ZXDH_OP_TYPE_FAST_REG_MR 0x08
+#define ZXDH_OP_TYPE_LOCAL_INV 0x09
+#define ZXDH_OP_TYPE_UD_SEND 0x0a
+#define ZXDH_OP_TYPE_UD_SEND_WITH_IMM 0x0b
+#define ZXDH_OP_TYPE_REC 0x3e
+#define ZXDH_OP_TYPE_REC_IMM 0x3f
+
+#define ZXDH_FLUSH_MAJOR_ERR 1
+#define ZXDH_RETRY_ACK_MAJOR_ERR 0x8
+#define ZXDH_RETRY_ACK_MINOR_ERR 0xf3
+#define ZXDH_TX_WINDOW_QUERY_ITEM_MINOR_ERR 0xf5
+
+#define ZXDH_MAX_SQ_FRAG 31
+#define ZXDH_MAX_SQ_INLINE_DATELEN_WITH_IMM 210
+
+#define INLINE_DATASIZE_7BYTES 7
+#define INLINE_DATASIZE_24BYTES 24
+#define INLINE_FRAG_DATASIZE_31BYTES 31
+
+#define INLINE_DATA_OFFSET_7BYTES 7
+#define WQE_OFFSET_7BYTES 7
+#define WQE_OFFSET_8BYTES 8
+#define WQE_OFFSET_24BYTES 24
+
+#define ZXDH_SQE_SIZE 4
+#define ZXDH_RQE_SIZE 2
+
+#define ZXDH_SRQ_INVALID_LKEY 0x100
+#define ZXDH_SRQ_DB_INIT_VALUE 0x8000
+
+#define ZXDH_WQEALLOC_WQE_DESC_INDEX GENMASK(31, 20)
+
+enum zxdh_device_caps_const {
+ ZXDH_WQE_SIZE = 4,
+ ZXDH_SRQE_SIZE = 2,
+ ZXDH_CQP_WQE_SIZE = 8,
+ ZXDH_CQE_SIZE = 8,
+ ZXDH_EXTENDED_CQE_SIZE = 8,
+ ZXDH_AEQE_SIZE = 2,
+ ZXDH_CEQE_SIZE = 1,
+ ZXDH_CQP_CTX_SIZE = 8,
+ ZXDH_SHADOW_AREA_SIZE = 8,
+ ZXDH_GATHER_STATS_BUF_SIZE = 1024,
+ ZXDH_MIN_IW_QP_ID = 0,
+ ZXDH_QUERY_FPM_BUF_SIZE = 176,
+ ZXDH_COMMIT_FPM_BUF_SIZE = 176,
+ ZXDH_MAX_IW_QP_ID = 262143,
+ ZXDH_MIN_CEQID = 0,
+ ZXDH_MAX_CEQID = 1023,
+ ZXDH_CEQ_MAX_COUNT = ZXDH_MAX_CEQID + 1,
+ ZXDH_MIN_CQID = 0,
+ ZXDH_MAX_CQID = 524287,
+ ZXDH_MIN_AEQ_ENTRIES = 1,
+ ZXDH_MAX_AEQ_ENTRIES = 524287,
+ ZXDH_MIN_CEQ_ENTRIES = 1,
+ ZXDH_MAX_CEQ_ENTRIES = 262143,
+ ZXDH_MIN_CQ_SIZE = 1,
+ ZXDH_MAX_CQ_SIZE = 1048575,
+ ZXDH_DB_ID_ZERO = 0,
+ ZXDH_MAX_WQ_FRAGMENT_COUNT = 13,
+ ZXDH_MAX_SGE_RD = 13,
+ ZXDH_MAX_OUTBOUND_MSG_SIZE = 2147483647,
+ ZXDH_MAX_INBOUND_MSG_SIZE = 2147483647,
+ ZXDH_MAX_PUSH_PAGE_COUNT = 1024,
+ ZXDH_MAX_PE_ENA_VF_COUNT = 32,
+ ZXDH_MAX_VF_FPM_ID = 47,
+ ZXDH_MAX_SQ_PAYLOAD_SIZE = 2147483648,
+ ZXDH_MAX_INLINE_DATA_SIZE = 217,
+ ZXDH_MAX_WQ_ENTRIES = 32768,
+ ZXDH_Q2_BUF_SIZE = 256,
+ ZXDH_QP_CTX_SIZE = 256,
+ ZXDH_MAX_PDS = 262144,
+};
+
+enum zxdh_addressing_type {
+ ZXDH_ADDR_TYPE_ZERO_BASED = 0,
+ ZXDH_ADDR_TYPE_VA_BASED = 1,
+};
+
+enum zxdh_flush_opcode {
+ FLUSH_INVALID = 0,
+ FLUSH_GENERAL_ERR,
+ FLUSH_PROT_ERR,
+ FLUSH_REM_ACCESS_ERR,
+ FLUSH_LOC_QP_OP_ERR,
+ FLUSH_REM_OP_ERR,
+ FLUSH_LOC_LEN_ERR,
+ FLUSH_FATAL_ERR,
+ FLUSH_RETRY_EXC_ERR,
+ FLUSH_MW_BIND_ERR,
+ FLUSH_REM_INV_REQ_ERR,
+};
+
+enum zxdh_cmpl_status {
+ ZXDH_COMPL_STATUS_SUCCESS = 0,
+ ZXDH_COMPL_STATUS_FLUSHED,
+ ZXDH_COMPL_STATUS_INVALID_WQE,
+ ZXDH_COMPL_STATUS_QP_CATASTROPHIC,
+ ZXDH_COMPL_STATUS_REMOTE_TERMINATION,
+ ZXDH_COMPL_STATUS_INVALID_STAG,
+ ZXDH_COMPL_STATUS_BASE_BOUND_VIOLATION,
+ ZXDH_COMPL_STATUS_ACCESS_VIOLATION,
+ ZXDH_COMPL_STATUS_INVALID_PD_ID,
+ ZXDH_COMPL_STATUS_WRAP_ERROR,
+ ZXDH_COMPL_STATUS_STAG_INVALID_PDID,
+ ZXDH_COMPL_STATUS_RDMA_READ_ZERO_ORD,
+ ZXDH_COMPL_STATUS_QP_NOT_PRIVLEDGED,
+ ZXDH_COMPL_STATUS_STAG_NOT_INVALID,
+ ZXDH_COMPL_STATUS_INVALID_PHYS_BUF_SIZE,
+ ZXDH_COMPL_STATUS_INVALID_PHYS_BUF_ENTRY,
+ ZXDH_COMPL_STATUS_INVALID_FBO,
+ ZXDH_COMPL_STATUS_INVALID_LEN,
+ ZXDH_COMPL_STATUS_INVALID_ACCESS,
+ ZXDH_COMPL_STATUS_PHYS_BUF_LIST_TOO_LONG,
+ ZXDH_COMPL_STATUS_INVALID_VIRT_ADDRESS,
+ ZXDH_COMPL_STATUS_INVALID_REGION,
+ ZXDH_COMPL_STATUS_INVALID_WINDOW,
+ ZXDH_COMPL_STATUS_INVALID_TOTAL_LEN,
+ ZXDH_COMPL_STATUS_RETRY_ACK_ERR,
+ ZXDH_COMPL_STATUS_TX_WINDOW_QUERY_ITEM_ERR,
+ ZXDH_COMPL_STATUS_UNKNOWN,
+};
+
+enum zxdh_cmpl_notify {
+ ZXDH_CQ_COMPL_EVENT = 0,
+ ZXDH_CQ_COMPL_SOLICITED = 1,
+};
+
+enum zxdh_qp_caps {
+ ZXDH_WRITE_WITH_IMM = 1,
+ ZXDH_SEND_WITH_IMM = 2,
+ ZXDH_ROCE = 4,
+ ZXDH_PUSH_MODE = 8,
+};
+
+enum zxdh_page_size {
+ ZXDH_PAGE_SIZE_4K = 0,
+ ZXDH_PAGE_SIZE_2M = 9,
+ ZXDH_PAGE_SIZE_1G = 18,
+};
+
+struct zxdh_qp;
+struct zxdh_cq;
+struct zxdh_qp_init_info;
+struct zxdh_cq_init_info;
+
+struct zxdh_sge {
+ zxdh_tagged_offset tag_off;
+ __u32 len;
+ zxdh_stag stag;
+};
+
+struct zxdh_ring {
+ __u32 head;
+ __u32 tail;
+ __u32 size;
+};
+
+struct zxdh_cqe {
+ __le64 buf[ZXDH_CQE_SIZE];
+};
+
+struct zxdh_extended_cqe {
+ __le64 buf[ZXDH_EXTENDED_CQE_SIZE];
+};
+
+struct zxdh_post_send {
+ zxdh_sgl sg_list;
+ __u32 num_sges;
+ __u32 qkey;
+ __u32 dest_qp;
+ __u32 ah_id;
+};
+
+struct zxdh_inline_rdma_send {
+ void *data;
+ __u32 len;
+ __u32 qkey;
+ __u32 dest_qp;
+ __u32 ah_id;
+};
+
+struct zxdh_post_rq_info {
+ __u64 wr_id;
+ zxdh_sgl sg_list;
+ __u32 num_sges;
+};
+
+struct zxdh_rdma_write {
+ zxdh_sgl lo_sg_list;
+ __u32 num_lo_sges;
+ struct zxdh_sge rem_addr;
+};
+
+struct zxdh_inline_rdma_write {
+ void *data;
+ __u32 len;
+ struct zxdh_sge rem_addr;
+};
+
+struct zxdh_rdma_read {
+ zxdh_sgl lo_sg_list;
+ __u32 num_lo_sges;
+ struct zxdh_sge rem_addr;
+};
+
+struct zxdh_bind_window {
+ zxdh_stag mr_stag;
+ __u64 bind_len;
+ void *va;
+ enum zxdh_addressing_type addressing_type;
+ __u8 ena_reads : 1;
+ __u8 ena_writes : 1;
+ zxdh_stag mw_stag;
+ __u8 mem_window_type_1 : 1;
+ __u8 host_page_size;
+ __u8 leaf_pbl_size;
+ __u16 root_leaf_offset;
+ __u64 mw_pa_pble_index;
+};
+
+struct zxdh_inv_local_stag {
+ zxdh_stag target_stag;
+};
+
+struct zxdh_post_sq_info {
+ __u64 wr_id;
+ __u8 op_type;
+ __u8 l4len;
+ __u8 signaled : 1;
+ __u8 read_fence : 1;
+ __u8 local_fence : 1;
+ __u8 inline_data : 1;
+ __u8 imm_data_valid : 1;
+ __u8 push_wqe : 1;
+ __u8 report_rtt : 1;
+ __u8 udp_hdr : 1;
+ __u8 defer_flag : 1;
+ __u8 solicited : 1;
+ __u32 imm_data;
+ __u32 stag_to_inv;
+ union {
+ struct zxdh_post_send send;
+ struct zxdh_rdma_write rdma_write;
+ struct zxdh_rdma_read rdma_read;
+ struct zxdh_bind_window bind_window;
+ struct zxdh_inv_local_stag inv_local_stag;
+ struct zxdh_inline_rdma_write inline_rdma_write;
+ struct zxdh_inline_rdma_send inline_rdma_send;
+ } op;
+};
+
+struct zxdh_cq_poll_info {
+ __u64 wr_id;
+ zxdh_qp_handle qp_handle;
+ __u32 bytes_xfered;
+ __u32 tcp_seq_num_rtt;
+ __u32 qp_id;
+ __u32 ud_src_qpn;
+ __u32 imm_data;
+ zxdh_stag inv_stag; /* or L_R_Key */
+ enum zxdh_cmpl_status comp_status;
+ __u16 major_err;
+ __u16 minor_err;
+ __u8 op_type;
+ __u8 stag_invalid_set : 1; /* or L_R_Key set */
+ __u8 push_dropped : 1;
+ __u8 error : 1;
+ __u8 solicited_event : 1;
+ __u8 ipv4 : 1;
+ __u8 imm_valid : 1;
+};
+
+enum zxdh_status_code zxdh_inline_rdma_write(struct zxdh_qp *qp,
+ struct zxdh_post_sq_info *info,
+ bool post_sq);
+enum zxdh_status_code zxdh_rc_inline_send(struct zxdh_qp *qp,
+ struct zxdh_post_sq_info *info,
+ bool post_sq);
+enum zxdh_status_code zxdh_ud_inline_send(struct zxdh_qp *qp,
+ struct zxdh_post_sq_info *info,
+ bool post_sq);
+enum zxdh_status_code
+zxdh_mw_bind(struct zxdh_qp *qp, struct zxdh_post_sq_info *info, bool post_sq);
+enum zxdh_status_code zxdh_post_nop(struct zxdh_qp *qp, __u64 wr_id,
+ bool signaled, bool post_sq);
+enum zxdh_status_code zxdh_post_receive(struct zxdh_qp *qp,
+ struct zxdh_post_rq_info *info);
+void zxdh_qp_post_wr(struct zxdh_qp *qp);
+void zxdh_qp_set_shadow_area(struct zxdh_qp *qp);
+enum zxdh_status_code zxdh_rdma_read(struct zxdh_qp *qp,
+ struct zxdh_post_sq_info *info,
+ bool inv_stag, bool post_sq);
+enum zxdh_status_code zxdh_rdma_write(struct zxdh_qp *qp,
+ struct zxdh_post_sq_info *info,
+ bool post_sq);
+enum zxdh_status_code
+zxdh_rc_send(struct zxdh_qp *qp, struct zxdh_post_sq_info *info, bool post_sq);
+enum zxdh_status_code
+zxdh_ud_send(struct zxdh_qp *qp, struct zxdh_post_sq_info *info, bool post_sq);
+enum zxdh_status_code zxdh_stag_local_invalidate(struct zxdh_qp *qp,
+ struct zxdh_post_sq_info *info,
+ bool post_sq);
+
+struct zxdh_wqe_ops {
+ void (*iw_copy_inline_data)(__u8 *dest, __u8 *src, __u32 len,
+ __u8 polarity, bool imm_data_flag);
+ __u16 (*iw_inline_data_size_to_quanta)(__u32 data_size,
+ bool imm_data_flag);
+ void (*iw_set_fragment)(__le64 *wqe, __u32 offset, struct zxdh_sge *sge,
+ __u8 valid);
+ void (*iw_set_mw_bind_wqe)(__le64 *wqe,
+ struct zxdh_bind_window *op_info);
+};
+
+__le64 *get_current_cqe(struct zxdh_cq *cq);
+enum zxdh_status_code zxdh_cq_poll_cmpl(struct zxdh_cq *cq,
+ struct zxdh_cq_poll_info *info);
+void zxdh_cq_request_notification(struct zxdh_cq *cq,
+ enum zxdh_cmpl_notify cq_notify);
+void zxdh_cq_resize(struct zxdh_cq *cq, void *cq_base, int size);
+void zxdh_cq_set_resized_cnt(struct zxdh_cq *qp, __u16 cnt);
+enum zxdh_status_code zxdh_cq_init(struct zxdh_cq *cq,
+ struct zxdh_cq_init_info *info);
+enum zxdh_status_code zxdh_qp_init(struct zxdh_qp *qp,
+ struct zxdh_qp_init_info *info);
+struct zxdh_sq_wr_trk_info {
+ __u64 wrid;
+ __u32 wr_len;
+ __u16 quanta;
+ __u8 reserved[2];
+};
+
+struct zxdh_qp_sq_quanta {
+ __le64 elem[ZXDH_SQE_SIZE];
+};
+
+struct zxdh_qp_rq_quanta {
+ __le64 elem[ZXDH_RQE_SIZE];
+};
+
+struct zxdh_dev_attrs {
+ __u64 feature_flags;
+ __aligned_u64 sq_db_pa;
+ __aligned_u64 cq_db_pa;
+ __u32 max_hw_wq_frags;
+ __u32 max_hw_read_sges;
+ __u32 max_hw_inline;
+ __u32 max_hw_rq_quanta;
+ __u32 max_hw_srq_quanta;
+ __u32 max_hw_wq_quanta;
+ __u32 min_hw_cq_size;
+ __u32 max_hw_cq_size;
+ __u16 max_hw_sq_chunk;
+ __u32 max_hw_srq_wr;
+ __u8 hw_rev;
+ __u8 db_addr_type;
+};
+
+struct zxdh_hw_attrs {
+ struct zxdh_dev_attrs dev_attrs;
+ __u64 max_hw_outbound_msg_size;
+ __u64 max_hw_inbound_msg_size;
+ __u64 max_mr_size;
+ __u32 min_hw_qp_id;
+ __u32 min_hw_aeq_size;
+ __u32 max_hw_aeq_size;
+ __u32 min_hw_ceq_size;
+ __u32 max_hw_ceq_size;
+ __u32 max_hw_device_pages;
+ __u32 max_hw_vf_fpm_id;
+ __u32 first_hw_vf_fpm_id;
+ __u32 max_hw_ird;
+ __u32 max_hw_ord;
+ __u32 max_hw_wqes;
+ __u32 max_hw_pds;
+ __u32 max_hw_ena_vf_count;
+ __u32 max_qp_wr;
+ __u32 max_pe_ready_count;
+ __u32 max_done_count;
+ __u32 max_sleep_count;
+ __u32 max_cqp_compl_wait_time_ms;
+ __u16 max_stat_inst;
+};
+
+struct zxdh_qp {
+ struct zxdh_qp_sq_quanta *sq_base;
+ struct zxdh_qp_rq_quanta *rq_base;
+ struct zxdh_dev_attrs *dev_attrs;
+ __u32 *wqe_alloc_db;
+ struct zxdh_sq_wr_trk_info *sq_wrtrk_array;
+ __u64 *rq_wrid_array;
+ __le64 *shadow_area;
+ __le32 *push_db;
+ __le64 *push_wqe;
+ struct zxdh_ring sq_ring;
+ struct zxdh_ring rq_ring;
+ struct zxdh_ring initial_ring;
+ __u32 qp_id;
+ __u32 qp_caps;
+ __u32 sq_size;
+ __u32 rq_size;
+ __u32 max_sq_frag_cnt;
+ __u32 max_rq_frag_cnt;
+ __u32 max_inline_data;
+ struct zxdh_wqe_ops wqe_ops;
+ __u16 conn_wqes;
+ __u8 qp_type;
+ __u8 swqe_polarity;
+ __u8 swqe_polarity_deferred;
+ __u8 rwqe_polarity;
+ __u8 rq_wqe_size;
+ __u8 rq_wqe_size_multiplier;
+ __u8 deferred_flag : 1;
+ __u8 push_mode : 1; /* whether the last post wqe was pushed */
+ __u8 push_dropped : 1;
+ __u8 sq_flush_complete : 1; /* Indicates flush was seen and SQ was empty after the flush */
+ __u8 rq_flush_complete : 1; /* Indicates flush was seen and RQ was empty after the flush */
+ __u8 destroy_pending : 1; /* Indicates the QP is being destroyed */
+ void *back_qp;
+ zxdh_sgl split_sg_list;
+ pthread_spinlock_t *lock;
+ __u16 rwqe_signature;
+ __u8 dbg_rq_flushed;
+ __u8 sq_flush_seen;
+ __u8 rq_flush_seen;
+ __u8 is_srq;
+ __u16 mtu;
+ __u32 next_psn;
+ __u32 cqe_last_ack_qsn;
+ __u32 qp_last_ack_qsn;
+ __u8 cqe_retry_cnt;
+ __u8 qp_reset_cnt;
+};
+
+struct zxdh_cq {
+ struct zxdh_cqe *cq_base;
+ __u32 *cqe_alloc_db;
+ __u32 *cq_ack_db;
+ __le64 *shadow_area;
+ __u32 cq_id;
+ __u32 cq_size;
+ __u32 cqe_rd_cnt;
+ struct zxdh_ring cq_ring;
+ __u8 polarity;
+ __u8 cqe_size;
+};
+
+struct zxdh_srq {
+ struct zxdh_srq_wqe *srq_base;
+ struct zxdh_dev_attrs *dev_attrs;
+ __le16 *srq_list_base;
+ __le64 *srq_db_base;
+ __u32 srq_id;
+ __u32 srq_size;
+ __u32 log2_srq_size;
+ __u32 srq_list_size;
+ struct zxdh_ring srq_ring;
+ struct zxdh_ring srq_list_ring;
+ __u8 srq_list_polarity;
+ __u64 *srq_wrid_array;
+ __u8 srq_wqe_size;
+ __u8 srq_wqe_size_multiplier;
+ __u32 srq_caps;
+ __u32 max_srq_frag_cnt;
+ __u32 srq_type;
+ pthread_spinlock_t *lock;
+ __u8 srq_flush_complete : 1; /* Indicates flush was seen and SQ was empty after the flush */
+ __u8 destroy_pending : 1; /* Indicates the QP is being destroyed */
+ __u8 srq_flush_seen;
+};
+
+struct zxdh_qp_init_info {
+ struct zxdh_qp_sq_quanta *sq;
+ struct zxdh_qp_rq_quanta *rq;
+ struct zxdh_dev_attrs *dev_attrs;
+ __u32 *wqe_alloc_db;
+ __le64 *shadow_area;
+ struct zxdh_sq_wr_trk_info *sq_wrtrk_array;
+ __u64 *rq_wrid_array;
+ __u32 qp_id;
+ __u32 qp_caps;
+ __u32 sq_size;
+ __u32 rq_size;
+ __u32 max_sq_frag_cnt;
+ __u32 max_rq_frag_cnt;
+ __u32 max_inline_data;
+ __u8 type;
+ int abi_ver;
+ bool legacy_mode;
+};
+
+struct zxdh_cq_init_info {
+ __u32 *cqe_alloc_db;
+ __u32 *cq_ack_db;
+ struct zxdh_cqe *cq_base;
+ __le64 *shadow_area;
+ __u32 cq_size;
+ __u32 cq_id;
+ __u8 cqe_size;
+};
+
+struct zxdh_srq_init_info {
+ struct zxdh_srq_wqe *srq_base;
+ struct zxdh_dev_attrs *dev_attrs;
+ __le16 *srq_list_base;
+ __le64 *srq_db_base;
+ __u64 *srq_wrid_array;
+ __u32 srq_id;
+ __u32 srq_caps;
+ __u32 srq_size;
+ __u32 log2_srq_size;
+ __u32 srq_list_size;
+ __u32 srq_db_size;
+ __u32 max_srq_frag_cnt;
+ __u32 srq_limit;
+};
+
+struct zxdh_wqe_srq_next_sge {
+ __le16 next_wqe_index;
+ __le16 signature;
+ __u8 valid_sge_num;
+ __u8 rsvd[11];
+};
+
+struct zxdh_srq_sge {
+ __le64 addr;
+ __le32 length;
+ __le32 lkey;
+};
+
+struct zxdh_srq_wqe {
+ __le64 elem[ZXDH_SRQE_SIZE];
+};
+
+__le64 *zxdh_qp_get_next_send_wqe(struct zxdh_qp *qp, __u32 *wqe_idx,
+ __u16 quanta, __u32 total_size,
+ struct zxdh_post_sq_info *info);
+__le64 *zxdh_qp_get_next_recv_wqe(struct zxdh_qp *qp, __u32 *wqe_idx);
+void zxdh_clean_cq(void *q, struct zxdh_cq *cq);
+enum zxdh_status_code zxdh_nop(struct zxdh_qp *qp, __u64 wr_id, bool signaled,
+ bool post_sq);
+enum zxdh_status_code zxdh_fragcnt_to_quanta_sq(__u32 frag_cnt, __u16 *quanta);
+enum zxdh_status_code zxdh_fragcnt_to_wqesize_rq(__u32 frag_cnt,
+ __u16 *wqe_size);
+void zxdh_get_sq_wqe_shift(__u32 sge, __u32 inline_data, __u8 *shift);
+void zxdh_get_rq_wqe_shift(__u32 sge, __u8 *shift);
+enum zxdh_status_code zxdh_get_sqdepth(struct zxdh_dev_attrs *dev_attrs,
+ __u32 sq_size, __u8 shift,
+ __u32 *wqdepth);
+enum zxdh_status_code zxdh_get_rqdepth(struct zxdh_dev_attrs *dev_attrs,
+ __u32 rq_size, __u8 shift,
+ __u32 *wqdepth);
+int zxdh_qp_round_up(__u32 wqdepth);
+int zxdh_cq_round_up(__u32 wqdepth);
+void zxdh_qp_push_wqe(struct zxdh_qp *qp, __le64 *wqe, __u16 quanta,
+ __u32 wqe_idx, bool post_sq);
+void zxdh_clr_wqes(struct zxdh_qp *qp, __u32 qp_wqe_idx);
+
+void zxdh_get_srq_wqe_shift(struct zxdh_dev_attrs *dev_attrs, __u32 sge,
+ __u8 *shift);
+int zxdh_get_srqdepth(__u32 max_hw_srq_quanta, __u32 srq_size, __u8 shift,
+ __u32 *srqdepth);
+__le64 *zxdh_get_srq_wqe(struct zxdh_srq *srq, int wqe_index);
+__le16 *zxdh_get_srq_list_wqe(struct zxdh_srq *srq, __u16 *idx);
+
+enum zxdh_status_code zxdh_srq_init(struct zxdh_srq *srq,
+ struct zxdh_srq_init_info *info);
+void zxdh_free_srq_wqe(struct zxdh_srq *srq, int wqe_index);
+#endif /* ZXDH_USER_H */
diff --git a/redhat/rdma-core.spec b/redhat/rdma-core.spec
index c347195..6d82a64 100644
--- a/redhat/rdma-core.spec
+++ b/redhat/rdma-core.spec
@@ -176,6 +176,8 @@ Provides: libocrdma = %{version}-%{release}
Obsoletes: libocrdma < %{version}-%{release}
Provides: librxe = %{version}-%{release}
Obsoletes: librxe < %{version}-%{release}
+Provides: libzrdma = %{version}-%{release}
+Obsoletes: libzrdma < %{version}-%{release}
%description -n libibverbs
libibverbs is a library that allows userspace processes to use RDMA
@@ -202,6 +204,7 @@ Device-specific plug-in ibverbs userspace drivers are included:
- librxe: A software implementation of the RoCE protocol
- libsiw: A software implementation of the iWarp protocol
- libvmw_pvrdma: VMware paravirtual RDMA device
+- libzrdma: ZTE Connection RDMA
%package -n libibverbs-utils
Summary: Examples for the libibverbs library
@@ -583,6 +586,7 @@ fi
%{_libdir}/libmana.so.*
%{_libdir}/libmlx5.so.*
%{_libdir}/libmlx4.so.*
+%{_libdir}/libzrdma.so.*
%config(noreplace) %{_sysconfdir}/libibverbs.d/*.driver
%doc %{_docdir}/%{name}/libibverbs.md
--
2.27.0