diff --git a/drivers/infiniband/hw/erdma/Makefile b/drivers/infiniband/hw/erdma/Makefile index 51d2ef91905a8c4c097e234abf99650e5ce68088..5ee13a025b08c41403b8dfebb98921ea56baf6a8 100644 --- a/drivers/infiniband/hw/erdma/Makefile +++ b/drivers/infiniband/hw/erdma/Makefile @@ -1,4 +1,10 @@ # SPDX-License-Identifier: GPL-2.0 obj-$(CONFIG_INFINIBAND_ERDMA) := erdma.o -erdma-y := erdma_cm.o erdma_main.o erdma_cmdq.o erdma_cq.o erdma_verbs.o erdma_qp.o erdma_eq.o +erdma-y :=\ + erdma_cm.o erdma_cmd.o erdma_cmdq.o erdma_compat.o erdma_cq.o\ + erdma_debugfs.o erdma_eq.o erdma_ioctl.o erdma_main.o erdma_qp.o\ + erdma_stats.o erdma_verbs.o compat/sw_av.o compat/sw_comp.o compat/sw_cq.o\ + compat/sw_dev.o compat/sw_icrc.o compat/sw_mcast.o compat/sw_mr.o compat/sw_net.o\ + compat/sw_opcode.o compat/sw_pool.o compat/sw_qp.o compat/sw_queue.o compat/sw_recv.o\ + compat/sw_req.o compat/sw_resp.o compat/sw_task.o compat/sw_verbs.o diff --git a/drivers/infiniband/hw/erdma/compat/rdma_user_sw.h b/drivers/infiniband/hw/erdma/compat/rdma_user_sw.h new file mode 100644 index 0000000000000000000000000000000000000000..ec768eee8aa19219740c8c8c40af69ca75a1a39f --- /dev/null +++ b/drivers/infiniband/hw/erdma/compat/rdma_user_sw.h @@ -0,0 +1,184 @@ +/* SPDX-License-Identifier: ((GPL-2.0 WITH Linux-syscall-note) OR Linux-OpenIB) */ +/* + * Copyright (c) 2016 Mellanox Technologies Ltd. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#ifndef RDMA_USER_SW_H +#define RDMA_USER_SW_H + +#include +#include +#include +#include + +enum { + SW_NETWORK_TYPE_IPV4 = 1, + SW_NETWORK_TYPE_IPV6 = 2, +}; + +union sw_gid { + __u8 raw[16]; + struct { + __be64 subnet_prefix; + __be64 interface_id; + } global; +}; + +struct sw_global_route { + union sw_gid dgid; + __u32 flow_label; + __u8 sgid_index; + __u8 hop_limit; + __u8 traffic_class; +}; + +struct sw_av { + __u8 port_num; + /* From SW_NETWORK_TYPE_* */ + __u8 network_type; + __u8 dmac[6]; + struct sw_global_route grh; + union { + struct sockaddr_in _sockaddr_in; + struct sockaddr_in6 _sockaddr_in6; + } sgid_addr, dgid_addr; +}; + +struct sw_send_wr { + __aligned_u64 wr_id; + __u32 num_sge; + __u32 opcode; + __u32 send_flags; + union { + __be32 imm_data; + __u32 invalidate_rkey; + } ex; + union { + struct { + __aligned_u64 remote_addr; + __u32 rkey; + __u32 reserved; + } rdma; + struct { + __aligned_u64 remote_addr; + __aligned_u64 compare_add; + __aligned_u64 swap; + __u32 rkey; + __u32 reserved; + } atomic; + struct { + __u32 remote_qpn; + __u32 remote_qkey; + __u16 pkey_index; + } ud; + /* reg is only used by the kernel and is not part of the uapi */ + struct { + union { + struct ib_mr *mr; + __aligned_u64 reserved; + }; + __u32 key; + __u32 access; + } reg; + } wr; +}; + +struct sw_sge { + __aligned_u64 addr; + __u32 length; + __u32 lkey; +}; + +struct mminfo { + __aligned_u64 offset; + __u32 size; + __u32 pad; +}; + +struct sw_dma_info { + __u32 length; + __u32 resid; + __u32 cur_sge; + __u32 num_sge; + __u32 sge_offset; + __u32 reserved; + union { + __DECLARE_FLEX_ARRAY(__u8, inline_data); + __DECLARE_FLEX_ARRAY(struct sw_sge, sge); + }; +}; + +struct sw_send_wqe { + struct sw_send_wr wr; + struct sw_av av; + __u32 status; + __u32 state; + __aligned_u64 iova; + __u32 mask; + __u32 first_psn; + __u32 last_psn; + __u32 ack_length; + __u32 ssn; + __u32 has_rd_atomic; + struct sw_dma_info dma; +}; + +struct sw_recv_wqe { + __aligned_u64 wr_id; + __u32 num_sge; + __u32 padding; + struct sw_dma_info dma; +}; + +struct sw_create_cq_resp { + struct mminfo mi; +}; + +struct sw_resize_cq_resp { + struct mminfo mi; +}; + +struct sw_create_qp_resp { + struct mminfo rq_mi; + struct mminfo sq_mi; +}; + +struct sw_create_srq_resp { + struct mminfo mi; + __u32 srq_num; + __u32 reserved; +}; + +struct sw_modify_srq_cmd { + __aligned_u64 mmap_info_addr; +}; + +#endif /* RDMA_USER_SW_H */ diff --git a/drivers/infiniband/hw/erdma/compat/sw.h b/drivers/infiniband/hw/erdma/compat/sw.h new file mode 100644 index 0000000000000000000000000000000000000000..8cca5cc3a87395adf395373c5ea785b43f6a58da --- /dev/null +++ b/drivers/infiniband/hw/erdma/compat/sw.h @@ -0,0 +1,93 @@ +/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */ +/* + * Copyright (c) 2016 Mellanox Technologies Ltd. All rights reserved. + * Copyright (c) 2015 System Fabric Works, Inc. All rights reserved. + */ + +#ifndef SW_H +#define SW_H + +#ifdef pr_fmt +#undef pr_fmt +#endif +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt + +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include + +#include "sw_net.h" +#include "sw_opcode.h" +#include "sw_hdr.h" +#include "sw_param.h" +#include "sw_verbs.h" +#include "sw_loc.h" + +#include "../erdma.h" +/* + * Version 1 and Version 2 are identical on 64 bit machines, but on 32 bit + * machines Version 2 has a different struct layout. + */ +#define SW_UVERBS_ABI_VERSION 2 + +#define SW_ROCE_V2_SPORT (0xc000) + +extern bool sw_initialized; + +static inline u32 sw_crc32(struct sw_dev *sw, + u32 crc, void *next, size_t len) +{ + u32 retval; + int err; + + SHASH_DESC_ON_STACK(shash, sw->tfm); + + shash->tfm = sw->tfm; + *(u32 *)shash_desc_ctx(shash) = crc; + err = crypto_shash_update(shash, next, len); + if (unlikely(err)) { + pr_warn_ratelimited("failed crc calculation, err: %d\n", err); + return crc32_le(crc, next, len); + } + + retval = *(u32 *)shash_desc_ctx(shash); + barrier_data(shash_desc_ctx(shash)); + return retval; +} + +void sw_set_mtu(struct sw_dev *sw, unsigned int dev_mtu); + +int sw_add(struct sw_dev *sw, unsigned int mtu, const char *ibdev_name); + +void sw_rcv(struct sk_buff *skb); + +/* The caller must do a matching ib_device_put(&dev->ib_dev) */ +static inline struct sw_dev *sw_get_dev_from_net(struct net_device *ndev) +{ + struct ib_device *ibdev; + struct erdma_dev *dev; + + ibdev = ib_device_get_by_netdev(ndev, RDMA_DRIVER_ERDMA); + if (!ibdev) { + pr_err_ratelimited("ib_device_get_by_netdev non"); + return NULL; + } + + dev = container_of(ibdev, struct erdma_dev, ibdev); + return &dev->sw_dev; +} + +void sw_port_up(struct sw_dev *sw); +void sw_port_down(struct sw_dev *sw); +void sw_set_port_state(struct sw_dev *sw); + +#endif /* SW_H */ diff --git a/drivers/infiniband/hw/erdma/compat/sw_av.c b/drivers/infiniband/hw/erdma/compat/sw_av.c new file mode 100644 index 0000000000000000000000000000000000000000..67e76c987d03e2876bce9ec2b9817488733a97b8 --- /dev/null +++ b/drivers/infiniband/hw/erdma/compat/sw_av.c @@ -0,0 +1,111 @@ +// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB +/* + * Copyright (c) 2016 Mellanox Technologies Ltd. All rights reserved. + * Copyright (c) 2015 System Fabric Works, Inc. All rights reserved. + */ + +#include "sw.h" +#include "sw_loc.h" + +void sw_init_av(struct ib_device *ibdev, struct rdma_ah_attr *attr, struct sw_av *av) +{ + sw_av_from_attr(rdma_ah_get_port_num(attr), av, attr); + sw_av_fill_ip_info(ibdev, av, attr); + memcpy(av->dmac, attr->roce.dmac, ETH_ALEN); +} + +int sw_av_chk_attr(struct sw_dev *sw, struct rdma_ah_attr *attr) +{ + const struct ib_global_route *grh = rdma_ah_read_grh(attr); + struct sw_port *port; + int type; + + port = &sw->port; + + if (rdma_ah_get_ah_flags(attr) & IB_AH_GRH) { + if (grh->sgid_index > port->attr.gid_tbl_len) { + pr_warn("invalid sgid index = %d\n", + grh->sgid_index); + return -EINVAL; + } + + type = rdma_gid_attr_network_type(grh->sgid_attr); + + if (type < RDMA_NETWORK_IPV4 || + type > RDMA_NETWORK_IPV6) { + pr_warn("invalid network type for rdma_sw = %d\n", + type); + return -EINVAL; + } + } + + return 0; +} + +void sw_av_from_attr(u8 port_num, struct sw_av *av, + struct rdma_ah_attr *attr) +{ + const struct ib_global_route *grh = rdma_ah_read_grh(attr); + + memset(av, 0, sizeof(*av)); + memcpy(av->grh.dgid.raw, grh->dgid.raw, sizeof(grh->dgid.raw)); + av->grh.flow_label = grh->flow_label; + av->grh.sgid_index = grh->sgid_index; + av->grh.hop_limit = grh->hop_limit; + av->grh.traffic_class = grh->traffic_class; + av->port_num = port_num; +} + +void sw_av_to_attr(struct sw_av *av, struct rdma_ah_attr *attr) +{ + struct ib_global_route *grh = rdma_ah_retrieve_grh(attr); + + attr->type = RDMA_AH_ATTR_TYPE_ROCE; + + memcpy(grh->dgid.raw, av->grh.dgid.raw, sizeof(av->grh.dgid.raw)); + grh->flow_label = av->grh.flow_label; + grh->sgid_index = av->grh.sgid_index; + grh->hop_limit = av->grh.hop_limit; + grh->traffic_class = av->grh.traffic_class; + + rdma_ah_set_ah_flags(attr, IB_AH_GRH); + rdma_ah_set_port_num(attr, av->port_num); +} + +void sw_av_fill_ip_info(struct ib_device *ibdev, struct sw_av *av, struct rdma_ah_attr *attr) +{ + const struct ib_gid_attr *sgid_attr = attr->grh.sgid_attr; + int ibtype; + int type; + + ibtype = rdma_gid_attr_network_type(sgid_attr); + rdma_gid2ip((struct sockaddr *)&av->sgid_addr, &sgid_attr->gid); + rdma_gid2ip((struct sockaddr *)&av->dgid_addr, + &rdma_ah_read_grh(attr)->dgid); + + switch (ibtype) { + case RDMA_NETWORK_IPV4: + type = SW_NETWORK_TYPE_IPV4; + break; + case RDMA_NETWORK_IPV6: + type = SW_NETWORK_TYPE_IPV6; + break; + default: + /* not reached - checked in sw_av_chk_attr */ + type = 0; + break; + } + + av->network_type = type; +} + +struct sw_av *sw_get_av(struct sw_pkt_info *pkt) +{ + if (!pkt || !pkt->qp) + return NULL; + + if (qp_type(pkt->qp) == IB_QPT_RC || qp_type(pkt->qp) == IB_QPT_UC) + return &pkt->qp->pri_av; + + return (pkt->wqe) ? &pkt->wqe->av : NULL; +} diff --git a/drivers/infiniband/hw/erdma/compat/sw_comp.c b/drivers/infiniband/hw/erdma/compat/sw_comp.c new file mode 100644 index 0000000000000000000000000000000000000000..bd63a90c52c15537691fdb43d502387abc428dc7 --- /dev/null +++ b/drivers/infiniband/hw/erdma/compat/sw_comp.c @@ -0,0 +1,761 @@ +// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB +/* + * Copyright (c) 2016 Mellanox Technologies Ltd. All rights reserved. + * Copyright (c) 2015 System Fabric Works, Inc. All rights reserved. + */ + +#include + +#include "sw.h" +#include "sw_loc.h" +#include "sw_queue.h" +#include "sw_task.h" + +enum comp_state { + COMPST_GET_ACK, + COMPST_GET_WQE, + COMPST_COMP_WQE, + COMPST_COMP_ACK, + COMPST_CHECK_PSN, + COMPST_CHECK_ACK, + COMPST_READ, + COMPST_ATOMIC, + COMPST_WRITE_SEND, + COMPST_UPDATE_COMP, + COMPST_ERROR_RETRY, + COMPST_RNR_RETRY, + COMPST_ERROR, + COMPST_EXIT, /* We have an issue, and we want to rerun the completer */ + COMPST_DONE, /* The completer finished successflly */ +}; + +static char *comp_state_name[] = { + [COMPST_GET_ACK] = "GET ACK", + [COMPST_GET_WQE] = "GET WQE", + [COMPST_COMP_WQE] = "COMP WQE", + [COMPST_COMP_ACK] = "COMP ACK", + [COMPST_CHECK_PSN] = "CHECK PSN", + [COMPST_CHECK_ACK] = "CHECK ACK", + [COMPST_READ] = "READ", + [COMPST_ATOMIC] = "ATOMIC", + [COMPST_WRITE_SEND] = "WRITE/SEND", + [COMPST_UPDATE_COMP] = "UPDATE COMP", + [COMPST_ERROR_RETRY] = "ERROR RETRY", + [COMPST_RNR_RETRY] = "RNR RETRY", + [COMPST_ERROR] = "ERROR", + [COMPST_EXIT] = "EXIT", + [COMPST_DONE] = "DONE", +}; + +static unsigned long rnrnak_usec[32] = { + [IB_RNR_TIMER_655_36] = 655360, + [IB_RNR_TIMER_000_01] = 10, + [IB_RNR_TIMER_000_02] = 20, + [IB_RNR_TIMER_000_03] = 30, + [IB_RNR_TIMER_000_04] = 40, + [IB_RNR_TIMER_000_06] = 60, + [IB_RNR_TIMER_000_08] = 80, + [IB_RNR_TIMER_000_12] = 120, + [IB_RNR_TIMER_000_16] = 160, + [IB_RNR_TIMER_000_24] = 240, + [IB_RNR_TIMER_000_32] = 320, + [IB_RNR_TIMER_000_48] = 480, + [IB_RNR_TIMER_000_64] = 640, + [IB_RNR_TIMER_000_96] = 960, + [IB_RNR_TIMER_001_28] = 1280, + [IB_RNR_TIMER_001_92] = 1920, + [IB_RNR_TIMER_002_56] = 2560, + [IB_RNR_TIMER_003_84] = 3840, + [IB_RNR_TIMER_005_12] = 5120, + [IB_RNR_TIMER_007_68] = 7680, + [IB_RNR_TIMER_010_24] = 10240, + [IB_RNR_TIMER_015_36] = 15360, + [IB_RNR_TIMER_020_48] = 20480, + [IB_RNR_TIMER_030_72] = 30720, + [IB_RNR_TIMER_040_96] = 40960, + [IB_RNR_TIMER_061_44] = 61410, + [IB_RNR_TIMER_081_92] = 81920, + [IB_RNR_TIMER_122_88] = 122880, + [IB_RNR_TIMER_163_84] = 163840, + [IB_RNR_TIMER_245_76] = 245760, + [IB_RNR_TIMER_327_68] = 327680, + [IB_RNR_TIMER_491_52] = 491520, +}; + +static inline unsigned long rnrnak_jiffies(u8 timeout) +{ + return max_t(unsigned long, + usecs_to_jiffies(rnrnak_usec[timeout]), 1); +} + +static enum ib_wc_opcode wr_to_wc_opcode(enum ib_wr_opcode opcode) +{ + switch (opcode) { + case IB_WR_RDMA_WRITE: return IB_WC_RDMA_WRITE; + case IB_WR_RDMA_WRITE_WITH_IMM: return IB_WC_RDMA_WRITE; + case IB_WR_SEND: return IB_WC_SEND; + case IB_WR_SEND_WITH_IMM: return IB_WC_SEND; + case IB_WR_RDMA_READ: return IB_WC_RDMA_READ; + case IB_WR_ATOMIC_CMP_AND_SWP: return IB_WC_COMP_SWAP; + case IB_WR_ATOMIC_FETCH_AND_ADD: return IB_WC_FETCH_ADD; + case IB_WR_LSO: return IB_WC_LSO; + case IB_WR_SEND_WITH_INV: return IB_WC_SEND; + case IB_WR_RDMA_READ_WITH_INV: return IB_WC_RDMA_READ; + case IB_WR_LOCAL_INV: return IB_WC_LOCAL_INV; + case IB_WR_REG_MR: return IB_WC_REG_MR; + + default: + return 0xff; + } +} + +void sw_retransmit_timer(struct timer_list *t) +{ + struct sw_qp *qp = from_timer(qp, t, retrans_timer); + + if (qp->valid) { + qp->comp.timeout = 1; + sw_run_task(&qp->comp.task, 1); + } +} + +void sw_comp_queue_pkt(struct sw_qp *qp, struct sk_buff *skb) +{ + int must_sched; + + skb_queue_tail(&qp->resp_pkts, skb); + + must_sched = skb_queue_len(&qp->resp_pkts) > 1; + if (must_sched != 0) + sw_counter_inc(SKB_TO_PKT(skb)->sw, SW_CNT_COMPLETER_SCHED); + + sw_run_task(&qp->comp.task, must_sched); +} + +static inline enum comp_state get_wqe(struct sw_qp *qp, + struct sw_pkt_info *pkt, + struct sw_send_wqe **wqe_p) +{ + struct sw_send_wqe *wqe; + + /* we come here whether or not we found a response packet to see if + * there are any posted WQEs + */ + wqe = queue_head(qp->sq.queue); + *wqe_p = wqe; + + /* no WQE or requester has not started it yet */ + if (!wqe || wqe->state == wqe_state_posted) + return pkt ? COMPST_DONE : COMPST_EXIT; + + /* WQE does not require an ack */ + if (wqe->state == wqe_state_done) + return COMPST_COMP_WQE; + + /* WQE caused an error */ + if (wqe->state == wqe_state_error) + return COMPST_ERROR; + + /* we have a WQE, if we also have an ack check its PSN */ + return pkt ? COMPST_CHECK_PSN : COMPST_EXIT; +} + +static inline void reset_retry_counters(struct sw_qp *qp) +{ + qp->comp.retry_cnt = qp->attr.retry_cnt; + qp->comp.rnr_retry = qp->attr.rnr_retry; + qp->comp.started_retry = 0; +} + +static inline enum comp_state check_psn(struct sw_qp *qp, + struct sw_pkt_info *pkt, + struct sw_send_wqe *wqe) +{ + s32 diff; + + /* check to see if response is past the oldest WQE. if it is, complete + * send/write or error read/atomic + */ + diff = psn_compare(pkt->psn, wqe->last_psn); + if (diff > 0) { + if (wqe->state == wqe_state_pending) { + if (wqe->mask & WR_ATOMIC_OR_READ_MASK) + return COMPST_ERROR_RETRY; + + reset_retry_counters(qp); + return COMPST_COMP_WQE; + } else { + return COMPST_DONE; + } + } + + /* compare response packet to expected response */ + diff = psn_compare(pkt->psn, qp->comp.psn); + if (diff < 0) { + /* response is most likely a retried packet if it matches an + * uncompleted WQE go complete it else ignore it + */ + if (pkt->psn == wqe->last_psn) + return COMPST_COMP_ACK; + else + return COMPST_DONE; + } else if ((diff > 0) && (wqe->mask & WR_ATOMIC_OR_READ_MASK)) { + return COMPST_DONE; + } else { + return COMPST_CHECK_ACK; + } +} + +static inline enum comp_state check_ack(struct sw_qp *qp, + struct sw_pkt_info *pkt, + struct sw_send_wqe *wqe) +{ + unsigned int mask = pkt->mask; + u8 syn; + struct sw_dev *sw = to_rdev(qp->ibqp.device); + + /* Check the sequence only */ + switch (qp->comp.opcode) { + case -1: + /* Will catch all *_ONLY cases. */ + if (!(mask & SW_START_MASK)) + return COMPST_ERROR; + + break; + + case IB_OPCODE_RC_RDMA_READ_RESPONSE_FIRST: + case IB_OPCODE_RC_RDMA_READ_RESPONSE_MIDDLE: + if (pkt->opcode != IB_OPCODE_RC_RDMA_READ_RESPONSE_MIDDLE && + pkt->opcode != IB_OPCODE_RC_RDMA_READ_RESPONSE_LAST) { + /* read retries of partial data may restart from + * read response first or response only. + */ + if ((pkt->psn == wqe->first_psn && + pkt->opcode == + IB_OPCODE_RC_RDMA_READ_RESPONSE_FIRST) || + (wqe->first_psn == wqe->last_psn && + pkt->opcode == + IB_OPCODE_RC_RDMA_READ_RESPONSE_ONLY)) + break; + + return COMPST_ERROR; + } + break; + default: + WARN_ON_ONCE(1); + } + + /* Check operation validity. */ + switch (pkt->opcode) { + case IB_OPCODE_RC_RDMA_READ_RESPONSE_FIRST: + case IB_OPCODE_RC_RDMA_READ_RESPONSE_LAST: + case IB_OPCODE_RC_RDMA_READ_RESPONSE_ONLY: + syn = aeth_syn(pkt); + + if ((syn & AETH_TYPE_MASK) != AETH_ACK) + return COMPST_ERROR; +#ifdef fallthrough + fallthrough; +#endif + /* (IB_OPCODE_RC_RDMA_READ_RESPONSE_MIDDLE doesn't have an AETH) + */ + case IB_OPCODE_RC_RDMA_READ_RESPONSE_MIDDLE: + if (wqe->wr.opcode != IB_WR_RDMA_READ && + wqe->wr.opcode != IB_WR_RDMA_READ_WITH_INV) { + wqe->status = IB_WC_FATAL_ERR; + return COMPST_ERROR; + } + reset_retry_counters(qp); + return COMPST_READ; + + case IB_OPCODE_RC_ATOMIC_ACKNOWLEDGE: + syn = aeth_syn(pkt); + + if ((syn & AETH_TYPE_MASK) != AETH_ACK) + return COMPST_ERROR; + + if (wqe->wr.opcode != IB_WR_ATOMIC_CMP_AND_SWP && + wqe->wr.opcode != IB_WR_ATOMIC_FETCH_AND_ADD) + return COMPST_ERROR; + reset_retry_counters(qp); + return COMPST_ATOMIC; + + case IB_OPCODE_RC_ACKNOWLEDGE: + syn = aeth_syn(pkt); + switch (syn & AETH_TYPE_MASK) { + case AETH_ACK: + reset_retry_counters(qp); + return COMPST_WRITE_SEND; + + case AETH_RNR_NAK: + sw_counter_inc(sw, SW_CNT_RCV_RNR); + return COMPST_RNR_RETRY; + + case AETH_NAK: + switch (syn) { + case AETH_NAK_PSN_SEQ_ERROR: + /* a nak implicitly acks all packets with psns + * before + */ + if (psn_compare(pkt->psn, qp->comp.psn) > 0) { + sw_counter_inc(sw, + SW_CNT_RCV_SEQ_ERR); + qp->comp.psn = pkt->psn; + if (qp->req.wait_psn) { + qp->req.wait_psn = 0; + sw_run_task(&qp->req.task, 0); + } + } + return COMPST_ERROR_RETRY; + + case AETH_NAK_INVALID_REQ: + wqe->status = IB_WC_REM_INV_REQ_ERR; + return COMPST_ERROR; + + case AETH_NAK_REM_ACC_ERR: + wqe->status = IB_WC_REM_ACCESS_ERR; + return COMPST_ERROR; + + case AETH_NAK_REM_OP_ERR: + wqe->status = IB_WC_REM_OP_ERR; + return COMPST_ERROR; + + default: + pr_warn("unexpected nak %x\n", syn); + wqe->status = IB_WC_REM_OP_ERR; + return COMPST_ERROR; + } + + default: + return COMPST_ERROR; + } + break; + + default: + pr_warn("unexpected opcode\n"); + } + + return COMPST_ERROR; +} + +static inline enum comp_state do_read(struct sw_qp *qp, + struct sw_pkt_info *pkt, + struct sw_send_wqe *wqe) +{ + int ret; + + ret = sw_copy_data(qp->pd, IB_ACCESS_LOCAL_WRITE, + &wqe->dma, payload_addr(pkt), + payload_size(pkt), to_mem_obj, NULL); + if (ret) + return COMPST_ERROR; + + if (wqe->dma.resid == 0 && (pkt->mask & SW_END_MASK)) + return COMPST_COMP_ACK; + else + return COMPST_UPDATE_COMP; +} + +static inline enum comp_state do_atomic(struct sw_qp *qp, + struct sw_pkt_info *pkt, + struct sw_send_wqe *wqe) +{ + int ret; + + u64 atomic_orig = atmack_orig(pkt); + + ret = sw_copy_data(qp->pd, IB_ACCESS_LOCAL_WRITE, + &wqe->dma, &atomic_orig, + sizeof(u64), to_mem_obj, NULL); + if (ret) + return COMPST_ERROR; + else + return COMPST_COMP_ACK; +} + +static void make_send_cqe(struct sw_qp *qp, struct sw_send_wqe *wqe, + struct sw_cqe *cqe) +{ + struct ib_wc *wc = &cqe->ibwc; + + memset(cqe, 0, sizeof(*cqe)); + + wc->wr_id = wqe->wr.wr_id; + wc->status = wqe->status; + wc->opcode = wr_to_wc_opcode(wqe->wr.opcode); + if (wqe->wr.opcode == IB_WR_RDMA_WRITE_WITH_IMM || + wqe->wr.opcode == IB_WR_SEND_WITH_IMM) + wc->wc_flags = IB_WC_WITH_IMM; + wc->byte_len = wqe->dma.length; + wc->qp = &qp->ibqp; +} + +/* + * IBA Spec. Section 10.7.3.1 SIGNALED COMPLETIONS + * ---------8<---------8<------------- + * ...Note that if a completion error occurs, a Work Completion + * will always be generated, even if the signaling + * indicator requests an Unsignaled Completion. + * ---------8<---------8<------------- + */ +static void do_complete(struct sw_qp *qp, struct sw_send_wqe *wqe) +{ + struct sw_dev *sw = to_rdev(qp->ibqp.device); + struct sw_cqe cqe; + + if ((qp->sq_sig_type == IB_SIGNAL_ALL_WR) || + (wqe->wr.send_flags & IB_SEND_SIGNALED) || + wqe->status != IB_WC_SUCCESS) { + make_send_cqe(qp, wqe, &cqe); + advance_consumer(qp->sq.queue); + sw_cq_post(qp->scq, &cqe, 0); + } else { + advance_consumer(qp->sq.queue); + } + + if (wqe->wr.opcode == IB_WR_SEND || + wqe->wr.opcode == IB_WR_SEND_WITH_IMM || + wqe->wr.opcode == IB_WR_SEND_WITH_INV) + sw_counter_inc(sw, SW_CNT_RDMA_SEND); + + /* + * we completed something so let req run again + * if it is trying to fence + */ + if (qp->req.wait_fence) { + qp->req.wait_fence = 0; + sw_run_task(&qp->req.task, 0); + } +} + +static inline enum comp_state complete_ack(struct sw_qp *qp, + struct sw_pkt_info *pkt, + struct sw_send_wqe *wqe) +{ + unsigned long flags; + + if (wqe->has_rd_atomic) { + wqe->has_rd_atomic = 0; + atomic_inc(&qp->req.rd_atomic); + if (qp->req.need_rd_atomic) { + qp->comp.timeout_retry = 0; + qp->req.need_rd_atomic = 0; + sw_run_task(&qp->req.task, 0); + } + } + + if (unlikely(qp->req.state == QP_STATE_DRAIN)) { + /* state_lock used by requester & completer */ + spin_lock_irqsave(&qp->state_lock, flags); + if ((qp->req.state == QP_STATE_DRAIN) && + (qp->comp.psn == qp->req.psn)) { + qp->req.state = QP_STATE_DRAINED; + spin_unlock_irqrestore(&qp->state_lock, flags); + + if (qp->ibqp.event_handler) { + struct ib_event ev; + + ev.device = qp->ibqp.device; + ev.element.qp = &qp->ibqp; + ev.event = IB_EVENT_SQ_DRAINED; + qp->ibqp.event_handler(&ev, + qp->ibqp.qp_context); + } + } else { + spin_unlock_irqrestore(&qp->state_lock, flags); + } + } + + do_complete(qp, wqe); + + if (psn_compare(pkt->psn, qp->comp.psn) >= 0) + return COMPST_UPDATE_COMP; + else + return COMPST_DONE; +} + +static inline enum comp_state complete_wqe(struct sw_qp *qp, + struct sw_pkt_info *pkt, + struct sw_send_wqe *wqe) +{ + if (pkt && wqe->state == wqe_state_pending) { + if (psn_compare(wqe->last_psn, qp->comp.psn) >= 0) { + qp->comp.psn = (wqe->last_psn + 1) & BTH_PSN_MASK; + qp->comp.opcode = -1; + } + + if (qp->req.wait_psn) { + qp->req.wait_psn = 0; + sw_run_task(&qp->req.task, 1); + } + } + + do_complete(qp, wqe); + + return COMPST_GET_WQE; +} + +static void sw_drain_resp_pkts(struct sw_qp *qp, bool notify) +{ + struct sk_buff *skb; + struct sw_send_wqe *wqe; + + while ((skb = skb_dequeue(&qp->resp_pkts))) { + sw_drop_ref(qp); + kfree_skb(skb); + } + + while ((wqe = queue_head(qp->sq.queue))) { + if (notify) { + wqe->status = IB_WC_WR_FLUSH_ERR; + do_complete(qp, wqe); + } else { + advance_consumer(qp->sq.queue); + } + } +} + +int sw_completer(void *arg) +{ + struct sw_qp *qp = (struct sw_qp *)arg; + struct sw_dev *sw = to_rdev(qp->ibqp.device); + struct sw_send_wqe *wqe = NULL; + struct sk_buff *skb = NULL; + struct sw_pkt_info *pkt = NULL; + enum comp_state state; + + sw_add_ref(qp); + + if (!qp->valid || qp->req.state == QP_STATE_ERROR || + qp->req.state == QP_STATE_RESET) { + sw_drain_resp_pkts(qp, qp->valid && + qp->req.state == QP_STATE_ERROR); + goto exit; + } + + if (qp->comp.timeout) { + qp->comp.timeout_retry = 1; + qp->comp.timeout = 0; + } else { + qp->comp.timeout_retry = 0; + } + + if (qp->req.need_retry) + goto exit; + + state = COMPST_GET_ACK; + + while (1) { + pr_debug("qp#%d state = %s\n", qp_num(qp), + comp_state_name[state]); + if (state == COMPST_ERROR) + dump_stack(); + switch (state) { + case COMPST_GET_ACK: + skb = skb_dequeue(&qp->resp_pkts); + if (skb) { + pkt = SKB_TO_PKT(skb); + qp->comp.timeout_retry = 0; + } + state = COMPST_GET_WQE; + break; + + case COMPST_GET_WQE: + state = get_wqe(qp, pkt, &wqe); + break; + + case COMPST_CHECK_PSN: + state = check_psn(qp, pkt, wqe); + break; + + case COMPST_CHECK_ACK: + state = check_ack(qp, pkt, wqe); + break; + + case COMPST_READ: + state = do_read(qp, pkt, wqe); + break; + + case COMPST_ATOMIC: + state = do_atomic(qp, pkt, wqe); + break; + + case COMPST_WRITE_SEND: + if (wqe->state == wqe_state_pending && + wqe->last_psn == pkt->psn) + state = COMPST_COMP_ACK; + else + state = COMPST_UPDATE_COMP; + break; + + case COMPST_COMP_ACK: + state = complete_ack(qp, pkt, wqe); + break; + + case COMPST_COMP_WQE: + state = complete_wqe(qp, pkt, wqe); + break; + + case COMPST_UPDATE_COMP: + if (pkt->mask & SW_END_MASK) + qp->comp.opcode = -1; + else + qp->comp.opcode = pkt->opcode; + + if (psn_compare(pkt->psn, qp->comp.psn) >= 0) + qp->comp.psn = (pkt->psn + 1) & BTH_PSN_MASK; + + if (qp->req.wait_psn) { + qp->req.wait_psn = 0; + sw_run_task(&qp->req.task, 1); + } + + state = COMPST_DONE; + break; + + case COMPST_DONE: + if (pkt) { + sw_drop_ref(pkt->qp); + kfree_skb(skb); + skb = NULL; + } + goto done; + + case COMPST_EXIT: + if (qp->comp.timeout_retry && wqe) { + state = COMPST_ERROR_RETRY; + break; + } + + /* re reset the timeout counter if + * (1) QP is type RC + * (2) the QP is alive + * (3) there is a packet sent by the requester that + * might be acked (we still might get spurious + * timeouts but try to keep them as few as possible) + * (4) the timeout parameter is set + */ + if ((qp_type(qp) == IB_QPT_RC) && + (qp->req.state == QP_STATE_READY) && + (psn_compare(qp->req.psn, qp->comp.psn) > 0) && + qp->qp_timeout_jiffies) + mod_timer(&qp->retrans_timer, + jiffies + qp->qp_timeout_jiffies); + goto exit; + + case COMPST_ERROR_RETRY: + /* we come here if the retry timer fired and we did + * not receive a response packet. try to retry the send + * queue if that makes sense and the limits have not + * been exceeded. remember that some timeouts are + * spurious since we do not reset the timer but kick + * it down the road or let it expire + */ + + /* there is nothing to retry in this case */ + if (!wqe || (wqe->state == wqe_state_posted)) + goto exit; + + /* if we've started a retry, don't start another + * retry sequence, unless this is a timeout. + */ + if (qp->comp.started_retry && + !qp->comp.timeout_retry) { + if (pkt) { + sw_drop_ref(pkt->qp); + kfree_skb(skb); + skb = NULL; + } + + goto done; + } + + if (qp->comp.retry_cnt > 0) { + if (qp->comp.retry_cnt != 7) + qp->comp.retry_cnt--; + + /* no point in retrying if we have already + * seen the last ack that the requester could + * have caused + */ + if (psn_compare(qp->req.psn, + qp->comp.psn) > 0) { + /* tell the requester to retry the + * send queue next time around + */ + sw_counter_inc(sw, + SW_CNT_COMP_RETRY); + qp->req.need_retry = 1; + qp->comp.started_retry = 1; + sw_run_task(&qp->req.task, 0); + } + + if (pkt) { + sw_drop_ref(pkt->qp); + kfree_skb(skb); + skb = NULL; + } + + goto done; + + } else { + sw_counter_inc(sw, SW_CNT_RETRY_EXCEEDED); + wqe->status = IB_WC_RETRY_EXC_ERR; + state = COMPST_ERROR; + } + break; + + case COMPST_RNR_RETRY: + if (qp->comp.rnr_retry > 0) { + if (qp->comp.rnr_retry != 7) + qp->comp.rnr_retry--; + + qp->req.need_retry = 1; + pr_debug("qp#%d set rnr nak timer\n", + qp_num(qp)); + mod_timer(&qp->rnr_nak_timer, + jiffies + rnrnak_jiffies(aeth_syn(pkt) + & ~AETH_TYPE_MASK)); + sw_drop_ref(pkt->qp); + kfree_skb(skb); + skb = NULL; + goto exit; + } else { + sw_counter_inc(sw, + SW_CNT_RNR_RETRY_EXCEEDED); + wqe->status = IB_WC_RNR_RETRY_EXC_ERR; + state = COMPST_ERROR; + } + break; + + case COMPST_ERROR: + WARN_ON_ONCE(wqe->status == IB_WC_SUCCESS); + do_complete(qp, wqe); + sw_qp_error(qp); + + if (pkt) { + sw_drop_ref(pkt->qp); + kfree_skb(skb); + skb = NULL; + } + + goto exit; + } + } + +exit: + /* we come here if we are done with processing and want the task to + * exit from the loop calling us + */ + WARN_ON_ONCE(skb); + sw_drop_ref(qp); + return -EAGAIN; + +done: + /* we come here if we have processed a packet we want the task to call + * us again to see if there is anything else to do + */ + WARN_ON_ONCE(skb); + sw_drop_ref(qp); + return 0; +} diff --git a/drivers/infiniband/hw/erdma/compat/sw_cq.c b/drivers/infiniband/hw/erdma/compat/sw_cq.c new file mode 100644 index 0000000000000000000000000000000000000000..5921c161084d0e90a971730b308fcf83326cdc43 --- /dev/null +++ b/drivers/infiniband/hw/erdma/compat/sw_cq.c @@ -0,0 +1,131 @@ +// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB +/* + * Copyright (c) 2016 Mellanox Technologies Ltd. All rights reserved. + * Copyright (c) 2015 System Fabric Works, Inc. All rights reserved. + */ +#include +#include "sw.h" +#include "sw_loc.h" +#include "sw_queue.h" +#include "../erdma_verbs.h" + +int sw_cq_chk_attr(struct sw_dev *sw, struct sw_cq *cq, + int cqe, int comp_vector) +{ + int count; + + if (cqe <= 0) { + pr_warn("cqe(%d) <= 0\n", cqe); + goto err1; + } + + if (cqe > sw->attr.max_cqe) { + pr_warn("cqe(%d) > max_cqe(%d)\n", + cqe, sw->attr.max_cqe); + goto err1; + } + + if (cq) { + count = queue_count(cq->queue); + if (cqe < count) { + pr_warn("cqe(%d) < current # elements in queue (%d)", + cqe, count); + goto err1; + } + } + + return 0; + +err1: + return -EINVAL; +} + +static void sw_send_complete(unsigned long data) +{ + struct sw_cq *cq = (struct sw_cq *)data; + unsigned long flags; + + spin_lock_irqsave(&cq->cq_lock, flags); + if (cq->is_dying) { + spin_unlock_irqrestore(&cq->cq_lock, flags); + return; + } + spin_unlock_irqrestore(&cq->cq_lock, flags); + + cq->master->ibcq.comp_handler(&cq->master->ibcq, cq->master->ibcq.cq_context); +} + +int sw_cq_from_init(struct sw_dev *sw, struct sw_cq *cq, int cqe, + int comp_vector, struct ib_udata *udata, + struct sw_create_cq_resp __user *uresp) +{ + cq->queue = sw_queue_init(sw, &cqe, + sizeof(struct sw_cqe)); + if (!cq->queue) { + pr_warn("unable to create cq\n"); + return -ENOMEM; + } + + cq->is_dying = false; + + tasklet_init(&cq->comp_task, sw_send_complete, (unsigned long)cq); + + spin_lock_init(&cq->cq_lock); + cq->ibcq.cqe = cqe; + return 0; +} + +int sw_cq_post(struct sw_cq *cq, struct sw_cqe *cqe, int solicited) +{ + struct ib_event ev; + unsigned long flags; + + spin_lock_irqsave(&cq->cq_lock, flags); + + if (unlikely(queue_full(cq->queue))) { + spin_unlock_irqrestore(&cq->cq_lock, flags); + if (cq->ibcq.event_handler) { + ev.device = cq->ibcq.device; + ev.element.cq = &cq->ibcq; + ev.event = IB_EVENT_CQ_ERR; + cq->ibcq.event_handler(&ev, cq->ibcq.cq_context); + } + + return -EBUSY; + } + + memcpy(producer_addr(cq->queue), cqe, sizeof(*cqe)); + + /* make sure all changes to the CQ are written before we update the + * producer pointer + */ + smp_wmb(); + + advance_producer(cq->queue); + spin_unlock_irqrestore(&cq->cq_lock, flags); + + if ((cq->notify == IB_CQ_NEXT_COMP) || + (cq->notify == IB_CQ_SOLICITED && solicited)) { + cq->notify = 0; + tasklet_schedule(&cq->comp_task); + } + + return 0; +} + +void sw_cq_disable(struct sw_cq *cq) +{ + unsigned long flags; + + spin_lock_irqsave(&cq->cq_lock, flags); + cq->is_dying = true; + spin_unlock_irqrestore(&cq->cq_lock, flags); +} + +void sw_cq_cleanup(struct sw_pool_entry *arg) +{ + struct sw_cq *cq = container_of(arg, typeof(*cq), pelem); + + if (cq->queue) + sw_queue_cleanup(cq->queue); +} diff --git a/drivers/infiniband/hw/erdma/compat/sw_dev.c b/drivers/infiniband/hw/erdma/compat/sw_dev.c new file mode 100644 index 0000000000000000000000000000000000000000..1bc4420fa511fb02fd89f49f54384fc2ef757048 --- /dev/null +++ b/drivers/infiniband/hw/erdma/compat/sw_dev.c @@ -0,0 +1,230 @@ +// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB +/* + * Copyright (c) 2016 Mellanox Technologies Ltd. All rights reserved. + * Copyright (c) 2015 System Fabric Works, Inc. All rights reserved. + */ + +#include +#include +#include "sw.h" +#include "sw_loc.h" + +MODULE_AUTHOR("Bob Pearson, Frank Zago, John Groves, Kamal Heib"); +MODULE_DESCRIPTION("Soft RDMA transport"); +MODULE_LICENSE("Dual BSD/GPL"); + +bool sw_initialized; + +/* free resources for a sw device all objects created for this device must + * have been destroyed + */ +void sw_dealloc(struct sw_dev *sw) +{ + sw_pool_cleanup(&sw->uc_pool); + sw_pool_cleanup(&sw->pd_pool); + sw_pool_cleanup(&sw->ah_pool); + sw_pool_cleanup(&sw->srq_pool); + sw_pool_cleanup(&sw->qp_pool); + sw_pool_cleanup(&sw->cq_pool); + sw_pool_cleanup(&sw->mr_pool); + sw_pool_cleanup(&sw->mw_pool); + sw_pool_cleanup(&sw->mc_grp_pool); + sw_pool_cleanup(&sw->mc_elem_pool); + + if (sw->tfm) + crypto_free_shash(sw->tfm); +} + +/* initialize sw device parameters */ +static void sw_init_device_param(struct sw_dev *sw) +{ + sw->max_inline_data = SW_MAX_INLINE_DATA; + + sw->attr.vendor_id = SW_VENDOR_ID; + sw->attr.max_mr_size = SW_MAX_MR_SIZE; + sw->attr.page_size_cap = SW_PAGE_SIZE_CAP; + sw->attr.max_qp = SW_MAX_QP; + sw->attr.max_qp_wr = SW_MAX_QP_WR; + sw->attr.device_cap_flags = SW_DEVICE_CAP_FLAGS; + sw->attr.max_send_sge = SW_MAX_SGE; + sw->attr.max_recv_sge = SW_MAX_SGE; + + sw->attr.max_sge_rd = SW_MAX_SGE_RD; + sw->attr.max_cq = SW_MAX_CQ; + sw->attr.max_cqe = (1 << SW_MAX_LOG_CQE) - 1; + sw->attr.max_mr = SW_MAX_MR; + sw->attr.max_pd = SW_MAX_PD; + sw->attr.max_qp_rd_atom = SW_MAX_QP_RD_ATOM; + sw->attr.max_res_rd_atom = SW_MAX_RES_RD_ATOM; + sw->attr.max_qp_init_rd_atom = SW_MAX_QP_INIT_RD_ATOM; + sw->attr.atomic_cap = IB_ATOMIC_HCA; + sw->attr.max_mcast_grp = SW_MAX_MCAST_GRP; + sw->attr.max_mcast_qp_attach = SW_MAX_MCAST_QP_ATTACH; + sw->attr.max_total_mcast_qp_attach = SW_MAX_TOT_MCAST_QP_ATTACH; + sw->attr.max_ah = SW_MAX_AH; + sw->attr.max_srq = SW_MAX_SRQ; + sw->attr.max_srq_wr = SW_MAX_SRQ_WR; + sw->attr.max_srq_sge = SW_MAX_SRQ_SGE; + sw->attr.max_fast_reg_page_list_len = SW_MAX_FMR_PAGE_LIST_LEN; + sw->attr.max_pkeys = SW_MAX_PKEYS; + sw->attr.local_ca_ack_delay = SW_LOCAL_CA_ACK_DELAY; + addrconf_addr_eui48((unsigned char *)&sw->attr.sys_image_guid, + sw->ndev->dev_addr); + + sw->max_ucontext = SW_MAX_UCONTEXT; +} + +/* initialize port attributes */ +static void sw_init_port_param(struct sw_port *port) +{ + port->attr.state = IB_PORT_DOWN; + port->attr.max_mtu = IB_MTU_4096; + port->attr.active_mtu = IB_MTU_256; + port->attr.gid_tbl_len = SW_PORT_GID_TBL_LEN; + port->attr.port_cap_flags = SW_PORT_PORT_CAP_FLAGS; + port->attr.max_msg_sz = SW_PORT_MAX_MSG_SZ; + port->attr.bad_pkey_cntr = SW_PORT_BAD_PKEY_CNTR; + port->attr.qkey_viol_cntr = SW_PORT_QKEY_VIOL_CNTR; + port->attr.pkey_tbl_len = SW_PORT_PKEY_TBL_LEN; + port->attr.lid = SW_PORT_LID; + port->attr.sm_lid = SW_PORT_SM_LID; + port->attr.lmc = SW_PORT_LMC; + port->attr.max_vl_num = SW_PORT_MAX_VL_NUM; + port->attr.sm_sl = SW_PORT_SM_SL; + port->attr.subnet_timeout = SW_PORT_SUBNET_TIMEOUT; + port->attr.init_type_reply = SW_PORT_INIT_TYPE_REPLY; + port->attr.active_width = SW_PORT_ACTIVE_WIDTH; + port->attr.active_speed = SW_PORT_ACTIVE_SPEED; + port->mtu_cap = ib_mtu_enum_to_int(IB_MTU_256); + port->subnet_prefix = cpu_to_be64(SW_PORT_SUBNET_PREFIX); +} + +/* initialize port state, note IB convention that HCA ports are always + * numbered from 1 + */ +void sw_init_ports(struct sw_dev *sw) +{ + struct sw_port *port = &sw->port; + + sw_init_port_param(port); + addrconf_addr_eui48((unsigned char *)&port->port_guid, + sw->ndev->dev_addr); + spin_lock_init(&port->port_lock); +} + +/* init pools of managed objects */ +static int sw_init_pools(struct sw_dev *sw) +{ + int err; + + err = sw_pool_init(sw, &sw->uc_pool, SW_TYPE_UC, + sw->max_ucontext); + if (err) + goto err1; + + err = sw_pool_init(sw, &sw->pd_pool, SW_TYPE_PD, + sw->attr.max_pd); + if (err) + goto err2; + + err = sw_pool_init(sw, &sw->ah_pool, SW_TYPE_AH, + sw->attr.max_ah); + if (err) + goto err3; + + err = sw_pool_init(sw, &sw->srq_pool, SW_TYPE_SRQ, + sw->attr.max_srq); + if (err) + goto err4; + + err = sw_pool_init(sw, &sw->qp_pool, SW_TYPE_QP, + sw->attr.max_qp); + if (err) + goto err5; + + err = sw_pool_init(sw, &sw->cq_pool, SW_TYPE_CQ, + sw->attr.max_cq); + if (err) + goto err6; + + err = sw_pool_init(sw, &sw->mr_pool, SW_TYPE_MR, + sw->attr.max_mr); + if (err) + goto err7; + + err = sw_pool_init(sw, &sw->mw_pool, SW_TYPE_MW, + sw->attr.max_mw); + if (err) + goto err8; + + err = sw_pool_init(sw, &sw->mc_grp_pool, SW_TYPE_MC_GRP, + sw->attr.max_mcast_grp); + if (err) + goto err9; + + err = sw_pool_init(sw, &sw->mc_elem_pool, SW_TYPE_MC_ELEM, + sw->attr.max_total_mcast_qp_attach); + if (err) + goto err10; + + return 0; + +err10: + sw_pool_cleanup(&sw->mc_grp_pool); +err9: + sw_pool_cleanup(&sw->mw_pool); +err8: + sw_pool_cleanup(&sw->mr_pool); +err7: + sw_pool_cleanup(&sw->cq_pool); +err6: + sw_pool_cleanup(&sw->qp_pool); +err5: + sw_pool_cleanup(&sw->srq_pool); +err4: + sw_pool_cleanup(&sw->ah_pool); +err3: + sw_pool_cleanup(&sw->pd_pool); +err2: + sw_pool_cleanup(&sw->uc_pool); +err1: + return err; +} + +/* initialize sw device state */ +int sw_init(struct sw_dev *sw) +{ + int err; + + /* init default device parameters */ + sw_init_device_param(sw); + + sw_init_ports(sw); + + err = sw_init_pools(sw); + if (err) + return err; + + /* init pending mmap list */ + spin_lock_init(&sw->mmap_offset_lock); + spin_lock_init(&sw->pending_lock); + INIT_LIST_HEAD(&sw->pending_mmaps); + + mutex_init(&sw->usdev_lock); + + return 0; +} + +void sw_set_mtu(struct sw_dev *sw, unsigned int ndev_mtu) +{ + struct sw_port *port = &sw->port; + enum ib_mtu mtu; + + mtu = eth_mtu_int_to_enum(ndev_mtu); + + /* Make sure that new MTU in range */ + mtu = mtu ? min_t(enum ib_mtu, mtu, IB_MTU_4096) : IB_MTU_256; + + port->attr.active_mtu = mtu; + port->mtu_cap = ib_mtu_enum_to_int(mtu); +} diff --git a/drivers/infiniband/hw/erdma/compat/sw_hdr.h b/drivers/infiniband/hw/erdma/compat/sw_hdr.h new file mode 100644 index 0000000000000000000000000000000000000000..fbd8d5450738e409ccc8b05c93f5b886615962e9 --- /dev/null +++ b/drivers/infiniband/hw/erdma/compat/sw_hdr.h @@ -0,0 +1,933 @@ +/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */ +/* + * Copyright (c) 2016 Mellanox Technologies Ltd. All rights reserved. + * Copyright (c) 2015 System Fabric Works, Inc. All rights reserved. + */ + +#ifndef SW_HDR_H +#define SW_HDR_H + +/* extracted information about a packet carried in an sk_buff struct fits in + * the skbuff cb array. Must be at most 48 bytes. stored in control block of + * sk_buff for received packets. + */ +struct sw_pkt_info { + struct sw_dev *sw; /* device that owns packet */ + struct sw_qp *qp; /* qp that owns packet */ + struct sw_send_wqe *wqe; /* send wqe */ + u8 *hdr; /* points to bth */ + u32 mask; /* useful info about pkt */ + u32 psn; /* bth psn of packet */ + u16 pkey_index; /* partition of pkt */ + u16 paylen; /* length of bth - icrc */ + u8 port_num; /* port pkt received on */ + u8 opcode; /* bth opcode of packet */ + u8 offset; /* bth offset from pkt->hdr */ +}; + +/* Macros should be used only for received skb */ +static inline struct sw_pkt_info *SKB_TO_PKT(struct sk_buff *skb) +{ + BUILD_BUG_ON(sizeof(struct sw_pkt_info) > sizeof(skb->cb)); + return (void *)skb->cb; +} + +static inline struct sk_buff *PKT_TO_SKB(struct sw_pkt_info *pkt) +{ + return container_of((void *)pkt, struct sk_buff, cb); +} + +/* + * IBA header types and methods + * + * Some of these are for reference and completeness only since + * sw does not currently support RD transport + * most of this could be moved into IB core. ib_pack.h has + * part of this but is incomplete + * + * Header specific routines to insert/extract values to/from headers + * the routines that are named __hhh_(set_)fff() take a pointer to a + * hhh header and get(set) the fff field. The routines named + * hhh_(set_)fff take a packet info struct and find the + * header and field based on the opcode in the packet. + * Conversion to/from network byte order from cpu order is also done. + */ + +#define SW_ICRC_SIZE (4) +#define SW_MAX_HDR_LENGTH (80) + +/****************************************************************************** + * Base Transport Header + ******************************************************************************/ +struct sw_bth { + u8 opcode; + u8 flags; + __be16 pkey; + __be32 qpn; + __be32 apsn; +}; + +#define BTH_TVER (0) +#define BTH_DEF_PKEY (0xffff) + +#define BTH_SE_MASK (0x80) +#define BTH_MIG_MASK (0x40) +#define BTH_PAD_MASK (0x30) +#define BTH_TVER_MASK (0x0f) +#define BTH_FECN_MASK (0x80000000) +#define BTH_BECN_MASK (0x40000000) +#define BTH_RESV6A_MASK (0x3f000000) +#define BTH_QPN_MASK (0x00ffffff) +#define BTH_ACK_MASK (0x80000000) +#define BTH_RESV7_MASK (0x7f000000) +#define BTH_PSN_MASK (0x00ffffff) + +static inline u8 __bth_opcode(void *arg) +{ + struct sw_bth *bth = arg; + + return bth->opcode; +} + +static inline void __bth_set_opcode(void *arg, u8 opcode) +{ + struct sw_bth *bth = arg; + + bth->opcode = opcode; +} + +static inline u8 __bth_se(void *arg) +{ + struct sw_bth *bth = arg; + + return 0 != (BTH_SE_MASK & bth->flags); +} + +static inline void __bth_set_se(void *arg, int se) +{ + struct sw_bth *bth = arg; + + if (se) + bth->flags |= BTH_SE_MASK; + else + bth->flags &= ~BTH_SE_MASK; +} + +static inline u8 __bth_mig(void *arg) +{ + struct sw_bth *bth = arg; + + return 0 != (BTH_MIG_MASK & bth->flags); +} + +static inline void __bth_set_mig(void *arg, u8 mig) +{ + struct sw_bth *bth = arg; + + if (mig) + bth->flags |= BTH_MIG_MASK; + else + bth->flags &= ~BTH_MIG_MASK; +} + +static inline u8 __bth_pad(void *arg) +{ + struct sw_bth *bth = arg; + + return (BTH_PAD_MASK & bth->flags) >> 4; +} + +static inline void __bth_set_pad(void *arg, u8 pad) +{ + struct sw_bth *bth = arg; + + bth->flags = (BTH_PAD_MASK & (pad << 4)) | + (~BTH_PAD_MASK & bth->flags); +} + +static inline u8 __bth_tver(void *arg) +{ + struct sw_bth *bth = arg; + + return BTH_TVER_MASK & bth->flags; +} + +static inline void __bth_set_tver(void *arg, u8 tver) +{ + struct sw_bth *bth = arg; + + bth->flags = (BTH_TVER_MASK & tver) | + (~BTH_TVER_MASK & bth->flags); +} + +static inline u16 __bth_pkey(void *arg) +{ + struct sw_bth *bth = arg; + + return be16_to_cpu(bth->pkey); +} + +static inline void __bth_set_pkey(void *arg, u16 pkey) +{ + struct sw_bth *bth = arg; + + bth->pkey = cpu_to_be16(pkey); +} + +static inline u32 __bth_qpn(void *arg) +{ + struct sw_bth *bth = arg; + + return BTH_QPN_MASK & be32_to_cpu(bth->qpn); +} + +static inline void __bth_set_qpn(void *arg, u32 qpn) +{ + struct sw_bth *bth = arg; + u32 resvqpn = be32_to_cpu(bth->qpn); + + bth->qpn = cpu_to_be32((BTH_QPN_MASK & qpn) | + (~BTH_QPN_MASK & resvqpn)); +} + +static inline int __bth_fecn(void *arg) +{ + struct sw_bth *bth = arg; + + return 0 != (cpu_to_be32(BTH_FECN_MASK) & bth->qpn); +} + +static inline void __bth_set_fecn(void *arg, int fecn) +{ + struct sw_bth *bth = arg; + + if (fecn) + bth->qpn |= cpu_to_be32(BTH_FECN_MASK); + else + bth->qpn &= ~cpu_to_be32(BTH_FECN_MASK); +} + +static inline int __bth_becn(void *arg) +{ + struct sw_bth *bth = arg; + + return 0 != (cpu_to_be32(BTH_BECN_MASK) & bth->qpn); +} + +static inline void __bth_set_becn(void *arg, int becn) +{ + struct sw_bth *bth = arg; + + if (becn) + bth->qpn |= cpu_to_be32(BTH_BECN_MASK); + else + bth->qpn &= ~cpu_to_be32(BTH_BECN_MASK); +} + +static inline u8 __bth_resv6a(void *arg) +{ + struct sw_bth *bth = arg; + + return (BTH_RESV6A_MASK & be32_to_cpu(bth->qpn)) >> 24; +} + +static inline void __bth_set_resv6a(void *arg) +{ + struct sw_bth *bth = arg; + + bth->qpn = cpu_to_be32(~BTH_RESV6A_MASK); +} + +static inline int __bth_ack(void *arg) +{ + struct sw_bth *bth = arg; + + return 0 != (cpu_to_be32(BTH_ACK_MASK) & bth->apsn); +} + +static inline void __bth_set_ack(void *arg, int ack) +{ + struct sw_bth *bth = arg; + + if (ack) + bth->apsn |= cpu_to_be32(BTH_ACK_MASK); + else + bth->apsn &= ~cpu_to_be32(BTH_ACK_MASK); +} + +static inline void __bth_set_resv7(void *arg) +{ + struct sw_bth *bth = arg; + + bth->apsn &= ~cpu_to_be32(BTH_RESV7_MASK); +} + +static inline u32 __bth_psn(void *arg) +{ + struct sw_bth *bth = arg; + + return BTH_PSN_MASK & be32_to_cpu(bth->apsn); +} + +static inline void __bth_set_psn(void *arg, u32 psn) +{ + struct sw_bth *bth = arg; + u32 apsn = be32_to_cpu(bth->apsn); + + bth->apsn = cpu_to_be32((BTH_PSN_MASK & psn) | + (~BTH_PSN_MASK & apsn)); +} + +static inline u8 bth_opcode(struct sw_pkt_info *pkt) +{ + return __bth_opcode(pkt->hdr + pkt->offset); +} + +static inline void bth_set_opcode(struct sw_pkt_info *pkt, u8 opcode) +{ + __bth_set_opcode(pkt->hdr + pkt->offset, opcode); +} + +static inline u8 bth_se(struct sw_pkt_info *pkt) +{ + return __bth_se(pkt->hdr + pkt->offset); +} + +static inline void bth_set_se(struct sw_pkt_info *pkt, int se) +{ + __bth_set_se(pkt->hdr + pkt->offset, se); +} + +static inline u8 bth_mig(struct sw_pkt_info *pkt) +{ + return __bth_mig(pkt->hdr + pkt->offset); +} + +static inline void bth_set_mig(struct sw_pkt_info *pkt, u8 mig) +{ + __bth_set_mig(pkt->hdr + pkt->offset, mig); +} + +static inline u8 bth_pad(struct sw_pkt_info *pkt) +{ + return __bth_pad(pkt->hdr + pkt->offset); +} + +static inline void bth_set_pad(struct sw_pkt_info *pkt, u8 pad) +{ + __bth_set_pad(pkt->hdr + pkt->offset, pad); +} + +static inline u8 bth_tver(struct sw_pkt_info *pkt) +{ + return __bth_tver(pkt->hdr + pkt->offset); +} + +static inline void bth_set_tver(struct sw_pkt_info *pkt, u8 tver) +{ + __bth_set_tver(pkt->hdr + pkt->offset, tver); +} + +static inline u16 bth_pkey(struct sw_pkt_info *pkt) +{ + return __bth_pkey(pkt->hdr + pkt->offset); +} + +static inline void bth_set_pkey(struct sw_pkt_info *pkt, u16 pkey) +{ + __bth_set_pkey(pkt->hdr + pkt->offset, pkey); +} + +static inline u32 bth_qpn(struct sw_pkt_info *pkt) +{ + return __bth_qpn(pkt->hdr + pkt->offset); +} + +static inline void bth_set_qpn(struct sw_pkt_info *pkt, u32 qpn) +{ + __bth_set_qpn(pkt->hdr + pkt->offset, qpn); +} + +static inline int bth_fecn(struct sw_pkt_info *pkt) +{ + return __bth_fecn(pkt->hdr + pkt->offset); +} + +static inline void bth_set_fecn(struct sw_pkt_info *pkt, int fecn) +{ + __bth_set_fecn(pkt->hdr + pkt->offset, fecn); +} + +static inline int bth_becn(struct sw_pkt_info *pkt) +{ + return __bth_becn(pkt->hdr + pkt->offset); +} + +static inline void bth_set_becn(struct sw_pkt_info *pkt, int becn) +{ + __bth_set_becn(pkt->hdr + pkt->offset, becn); +} + +static inline u8 bth_resv6a(struct sw_pkt_info *pkt) +{ + return __bth_resv6a(pkt->hdr + pkt->offset); +} + +static inline void bth_set_resv6a(struct sw_pkt_info *pkt) +{ + __bth_set_resv6a(pkt->hdr + pkt->offset); +} + +static inline int bth_ack(struct sw_pkt_info *pkt) +{ + return __bth_ack(pkt->hdr + pkt->offset); +} + +static inline void bth_set_ack(struct sw_pkt_info *pkt, int ack) +{ + __bth_set_ack(pkt->hdr + pkt->offset, ack); +} + +static inline void bth_set_resv7(struct sw_pkt_info *pkt) +{ + __bth_set_resv7(pkt->hdr + pkt->offset); +} + +static inline u32 bth_psn(struct sw_pkt_info *pkt) +{ + return __bth_psn(pkt->hdr + pkt->offset); +} + +static inline void bth_set_psn(struct sw_pkt_info *pkt, u32 psn) +{ + __bth_set_psn(pkt->hdr + pkt->offset, psn); +} + +static inline void bth_init(struct sw_pkt_info *pkt, u8 opcode, int se, + int mig, int pad, u16 pkey, u32 qpn, int ack_req, + u32 psn) +{ + struct sw_bth *bth = (struct sw_bth *)(pkt->hdr + pkt->offset); + + bth->opcode = opcode; + bth->flags = (pad << 4) & BTH_PAD_MASK; + if (se) + bth->flags |= BTH_SE_MASK; + if (mig) + bth->flags |= BTH_MIG_MASK; + bth->pkey = cpu_to_be16(pkey); + bth->qpn = cpu_to_be32(qpn & BTH_QPN_MASK); + psn &= BTH_PSN_MASK; + if (ack_req) + psn |= BTH_ACK_MASK; + bth->apsn = cpu_to_be32(psn); +} + +/****************************************************************************** + * Reliable Datagram Extended Transport Header + ******************************************************************************/ +struct sw_rdeth { + __be32 een; +}; + +#define RDETH_EEN_MASK (0x00ffffff) + +static inline u8 __rdeth_een(void *arg) +{ + struct sw_rdeth *rdeth = arg; + + return RDETH_EEN_MASK & be32_to_cpu(rdeth->een); +} + +static inline void __rdeth_set_een(void *arg, u32 een) +{ + struct sw_rdeth *rdeth = arg; + + rdeth->een = cpu_to_be32(RDETH_EEN_MASK & een); +} + +static inline u8 rdeth_een(struct sw_pkt_info *pkt) +{ + return __rdeth_een(pkt->hdr + pkt->offset + + sw_opcode[pkt->opcode].offset[SW_RDETH]); +} + +static inline void rdeth_set_een(struct sw_pkt_info *pkt, u32 een) +{ + __rdeth_set_een(pkt->hdr + pkt->offset + + sw_opcode[pkt->opcode].offset[SW_RDETH], een); +} + +/****************************************************************************** + * Datagram Extended Transport Header + ******************************************************************************/ +struct sw_deth { + __be32 qkey; + __be32 sqp; +}; + +#define GSI_QKEY (0x80010000) +#define DETH_SQP_MASK (0x00ffffff) + +static inline u32 __deth_qkey(void *arg) +{ + struct sw_deth *deth = arg; + + return be32_to_cpu(deth->qkey); +} + +static inline void __deth_set_qkey(void *arg, u32 qkey) +{ + struct sw_deth *deth = arg; + + deth->qkey = cpu_to_be32(qkey); +} + +static inline u32 __deth_sqp(void *arg) +{ + struct sw_deth *deth = arg; + + return DETH_SQP_MASK & be32_to_cpu(deth->sqp); +} + +static inline void __deth_set_sqp(void *arg, u32 sqp) +{ + struct sw_deth *deth = arg; + + deth->sqp = cpu_to_be32(DETH_SQP_MASK & sqp); +} + +static inline u32 deth_qkey(struct sw_pkt_info *pkt) +{ + return __deth_qkey(pkt->hdr + pkt->offset + + sw_opcode[pkt->opcode].offset[SW_DETH]); +} + +static inline void deth_set_qkey(struct sw_pkt_info *pkt, u32 qkey) +{ + __deth_set_qkey(pkt->hdr + pkt->offset + + sw_opcode[pkt->opcode].offset[SW_DETH], qkey); +} + +static inline u32 deth_sqp(struct sw_pkt_info *pkt) +{ + return __deth_sqp(pkt->hdr + pkt->offset + + sw_opcode[pkt->opcode].offset[SW_DETH]); +} + +static inline void deth_set_sqp(struct sw_pkt_info *pkt, u32 sqp) +{ + __deth_set_sqp(pkt->hdr + pkt->offset + + sw_opcode[pkt->opcode].offset[SW_DETH], sqp); +} + +/****************************************************************************** + * RDMA Extended Transport Header + ******************************************************************************/ +struct sw_reth { + __be64 va; + __be32 rkey; + __be32 len; +}; + +static inline u64 __reth_va(void *arg) +{ + struct sw_reth *reth = arg; + + return be64_to_cpu(reth->va); +} + +static inline void __reth_set_va(void *arg, u64 va) +{ + struct sw_reth *reth = arg; + + reth->va = cpu_to_be64(va); +} + +static inline u32 __reth_rkey(void *arg) +{ + struct sw_reth *reth = arg; + + return be32_to_cpu(reth->rkey); +} + +static inline void __reth_set_rkey(void *arg, u32 rkey) +{ + struct sw_reth *reth = arg; + + reth->rkey = cpu_to_be32(rkey); +} + +static inline u32 __reth_len(void *arg) +{ + struct sw_reth *reth = arg; + + return be32_to_cpu(reth->len); +} + +static inline void __reth_set_len(void *arg, u32 len) +{ + struct sw_reth *reth = arg; + + reth->len = cpu_to_be32(len); +} + +static inline u64 reth_va(struct sw_pkt_info *pkt) +{ + return __reth_va(pkt->hdr + pkt->offset + + sw_opcode[pkt->opcode].offset[SW_RETH]); +} + +static inline void reth_set_va(struct sw_pkt_info *pkt, u64 va) +{ + __reth_set_va(pkt->hdr + pkt->offset + + sw_opcode[pkt->opcode].offset[SW_RETH], va); +} + +static inline u32 reth_rkey(struct sw_pkt_info *pkt) +{ + return __reth_rkey(pkt->hdr + pkt->offset + + sw_opcode[pkt->opcode].offset[SW_RETH]); +} + +static inline void reth_set_rkey(struct sw_pkt_info *pkt, u32 rkey) +{ + __reth_set_rkey(pkt->hdr + pkt->offset + + sw_opcode[pkt->opcode].offset[SW_RETH], rkey); +} + +static inline u32 reth_len(struct sw_pkt_info *pkt) +{ + return __reth_len(pkt->hdr + pkt->offset + + sw_opcode[pkt->opcode].offset[SW_RETH]); +} + +static inline void reth_set_len(struct sw_pkt_info *pkt, u32 len) +{ + __reth_set_len(pkt->hdr + pkt->offset + + sw_opcode[pkt->opcode].offset[SW_RETH], len); +} + +/****************************************************************************** + * Atomic Extended Transport Header + ******************************************************************************/ +struct sw_atmeth { + __be64 va; + __be32 rkey; + __be64 swap_add; + __be64 comp; +} __packed; + +static inline u64 __atmeth_va(void *arg) +{ + struct sw_atmeth *atmeth = arg; + + return be64_to_cpu(atmeth->va); +} + +static inline void __atmeth_set_va(void *arg, u64 va) +{ + struct sw_atmeth *atmeth = arg; + + atmeth->va = cpu_to_be64(va); +} + +static inline u32 __atmeth_rkey(void *arg) +{ + struct sw_atmeth *atmeth = arg; + + return be32_to_cpu(atmeth->rkey); +} + +static inline void __atmeth_set_rkey(void *arg, u32 rkey) +{ + struct sw_atmeth *atmeth = arg; + + atmeth->rkey = cpu_to_be32(rkey); +} + +static inline u64 __atmeth_swap_add(void *arg) +{ + struct sw_atmeth *atmeth = arg; + + return be64_to_cpu(atmeth->swap_add); +} + +static inline void __atmeth_set_swap_add(void *arg, u64 swap_add) +{ + struct sw_atmeth *atmeth = arg; + + atmeth->swap_add = cpu_to_be64(swap_add); +} + +static inline u64 __atmeth_comp(void *arg) +{ + struct sw_atmeth *atmeth = arg; + + return be64_to_cpu(atmeth->comp); +} + +static inline void __atmeth_set_comp(void *arg, u64 comp) +{ + struct sw_atmeth *atmeth = arg; + + atmeth->comp = cpu_to_be64(comp); +} + +static inline u64 atmeth_va(struct sw_pkt_info *pkt) +{ + return __atmeth_va(pkt->hdr + pkt->offset + + sw_opcode[pkt->opcode].offset[SW_ATMETH]); +} + +static inline void atmeth_set_va(struct sw_pkt_info *pkt, u64 va) +{ + __atmeth_set_va(pkt->hdr + pkt->offset + + sw_opcode[pkt->opcode].offset[SW_ATMETH], va); +} + +static inline u32 atmeth_rkey(struct sw_pkt_info *pkt) +{ + return __atmeth_rkey(pkt->hdr + pkt->offset + + sw_opcode[pkt->opcode].offset[SW_ATMETH]); +} + +static inline void atmeth_set_rkey(struct sw_pkt_info *pkt, u32 rkey) +{ + __atmeth_set_rkey(pkt->hdr + pkt->offset + + sw_opcode[pkt->opcode].offset[SW_ATMETH], rkey); +} + +static inline u64 atmeth_swap_add(struct sw_pkt_info *pkt) +{ + return __atmeth_swap_add(pkt->hdr + pkt->offset + + sw_opcode[pkt->opcode].offset[SW_ATMETH]); +} + +static inline void atmeth_set_swap_add(struct sw_pkt_info *pkt, u64 swap_add) +{ + __atmeth_set_swap_add(pkt->hdr + pkt->offset + + sw_opcode[pkt->opcode].offset[SW_ATMETH], swap_add); +} + +static inline u64 atmeth_comp(struct sw_pkt_info *pkt) +{ + return __atmeth_comp(pkt->hdr + pkt->offset + + sw_opcode[pkt->opcode].offset[SW_ATMETH]); +} + +static inline void atmeth_set_comp(struct sw_pkt_info *pkt, u64 comp) +{ + __atmeth_set_comp(pkt->hdr + pkt->offset + + sw_opcode[pkt->opcode].offset[SW_ATMETH], comp); +} + +/****************************************************************************** + * Ack Extended Transport Header + ******************************************************************************/ +struct sw_aeth { + __be32 smsn; +}; + +#define AETH_SYN_MASK (0xff000000) +#define AETH_MSN_MASK (0x00ffffff) + +enum aeth_syndrome { + AETH_TYPE_MASK = 0xe0, + AETH_ACK = 0x00, + AETH_RNR_NAK = 0x20, + AETH_RSVD = 0x40, + AETH_NAK = 0x60, + AETH_ACK_UNLIMITED = 0x1f, + AETH_NAK_PSN_SEQ_ERROR = 0x60, + AETH_NAK_INVALID_REQ = 0x61, + AETH_NAK_REM_ACC_ERR = 0x62, + AETH_NAK_REM_OP_ERR = 0x63, + AETH_NAK_INV_RD_REQ = 0x64, +}; + +static inline u8 __aeth_syn(void *arg) +{ + struct sw_aeth *aeth = arg; + + return (AETH_SYN_MASK & be32_to_cpu(aeth->smsn)) >> 24; +} + +static inline void __aeth_set_syn(void *arg, u8 syn) +{ + struct sw_aeth *aeth = arg; + u32 smsn = be32_to_cpu(aeth->smsn); + + aeth->smsn = cpu_to_be32((AETH_SYN_MASK & (syn << 24)) | + (~AETH_SYN_MASK & smsn)); +} + +static inline u32 __aeth_msn(void *arg) +{ + struct sw_aeth *aeth = arg; + + return AETH_MSN_MASK & be32_to_cpu(aeth->smsn); +} + +static inline void __aeth_set_msn(void *arg, u32 msn) +{ + struct sw_aeth *aeth = arg; + u32 smsn = be32_to_cpu(aeth->smsn); + + aeth->smsn = cpu_to_be32((AETH_MSN_MASK & msn) | + (~AETH_MSN_MASK & smsn)); +} + +static inline u8 aeth_syn(struct sw_pkt_info *pkt) +{ + return __aeth_syn(pkt->hdr + pkt->offset + + sw_opcode[pkt->opcode].offset[SW_AETH]); +} + +static inline void aeth_set_syn(struct sw_pkt_info *pkt, u8 syn) +{ + __aeth_set_syn(pkt->hdr + pkt->offset + + sw_opcode[pkt->opcode].offset[SW_AETH], syn); +} + +static inline u32 aeth_msn(struct sw_pkt_info *pkt) +{ + return __aeth_msn(pkt->hdr + pkt->offset + + sw_opcode[pkt->opcode].offset[SW_AETH]); +} + +static inline void aeth_set_msn(struct sw_pkt_info *pkt, u32 msn) +{ + __aeth_set_msn(pkt->hdr + pkt->offset + + sw_opcode[pkt->opcode].offset[SW_AETH], msn); +} + +/****************************************************************************** + * Atomic Ack Extended Transport Header + ******************************************************************************/ +struct sw_atmack { + __be64 orig; +}; + +static inline u64 __atmack_orig(void *arg) +{ + struct sw_atmack *atmack = arg; + + return be64_to_cpu(atmack->orig); +} + +static inline void __atmack_set_orig(void *arg, u64 orig) +{ + struct sw_atmack *atmack = arg; + + atmack->orig = cpu_to_be64(orig); +} + +static inline u64 atmack_orig(struct sw_pkt_info *pkt) +{ + return __atmack_orig(pkt->hdr + pkt->offset + + sw_opcode[pkt->opcode].offset[SW_ATMACK]); +} + +static inline void atmack_set_orig(struct sw_pkt_info *pkt, u64 orig) +{ + __atmack_set_orig(pkt->hdr + pkt->offset + + sw_opcode[pkt->opcode].offset[SW_ATMACK], orig); +} + +/****************************************************************************** + * Immediate Extended Transport Header + ******************************************************************************/ +struct sw_immdt { + __be32 imm; +}; + +static inline __be32 __immdt_imm(void *arg) +{ + struct sw_immdt *immdt = arg; + + return immdt->imm; +} + +static inline void __immdt_set_imm(void *arg, __be32 imm) +{ + struct sw_immdt *immdt = arg; + + immdt->imm = imm; +} + +static inline __be32 immdt_imm(struct sw_pkt_info *pkt) +{ + return __immdt_imm(pkt->hdr + pkt->offset + + sw_opcode[pkt->opcode].offset[SW_IMMDT]); +} + +static inline void immdt_set_imm(struct sw_pkt_info *pkt, __be32 imm) +{ + __immdt_set_imm(pkt->hdr + pkt->offset + + sw_opcode[pkt->opcode].offset[SW_IMMDT], imm); +} + +/****************************************************************************** + * Invalidate Extended Transport Header + ******************************************************************************/ +struct sw_ieth { + __be32 rkey; +}; + +static inline u32 __ieth_rkey(void *arg) +{ + struct sw_ieth *ieth = arg; + + return be32_to_cpu(ieth->rkey); +} + +static inline void __ieth_set_rkey(void *arg, u32 rkey) +{ + struct sw_ieth *ieth = arg; + + ieth->rkey = cpu_to_be32(rkey); +} + +static inline u32 ieth_rkey(struct sw_pkt_info *pkt) +{ + return __ieth_rkey(pkt->hdr + pkt->offset + + sw_opcode[pkt->opcode].offset[SW_IETH]); +} + +static inline void ieth_set_rkey(struct sw_pkt_info *pkt, u32 rkey) +{ + __ieth_set_rkey(pkt->hdr + pkt->offset + + sw_opcode[pkt->opcode].offset[SW_IETH], rkey); +} + +enum sw_hdr_length { + SW_BTH_BYTES = sizeof(struct sw_bth), + SW_DETH_BYTES = sizeof(struct sw_deth), + SW_IMMDT_BYTES = sizeof(struct sw_immdt), + SW_RETH_BYTES = sizeof(struct sw_reth), + SW_AETH_BYTES = sizeof(struct sw_aeth), + SW_ATMACK_BYTES = sizeof(struct sw_atmack), + SW_ATMETH_BYTES = sizeof(struct sw_atmeth), + SW_IETH_BYTES = sizeof(struct sw_ieth), + SW_RDETH_BYTES = sizeof(struct sw_rdeth), +}; + +static inline size_t header_size(struct sw_pkt_info *pkt) +{ + return pkt->offset + sw_opcode[pkt->opcode].length; +} + +static inline void *payload_addr(struct sw_pkt_info *pkt) +{ + return pkt->hdr + pkt->offset + + sw_opcode[pkt->opcode].offset[SW_PAYLOAD]; +} + +static inline size_t payload_size(struct sw_pkt_info *pkt) +{ + return pkt->paylen - sw_opcode[pkt->opcode].offset[SW_PAYLOAD] + - bth_pad(pkt) - SW_ICRC_SIZE; +} + +#endif /* SW_HDR_H */ diff --git a/drivers/infiniband/hw/erdma/compat/sw_hw_counters.h b/drivers/infiniband/hw/erdma/compat/sw_hw_counters.h new file mode 100644 index 0000000000000000000000000000000000000000..28820864ae82dc80baca0b3173a31b3fd12c0934 --- /dev/null +++ b/drivers/infiniband/hw/erdma/compat/sw_hw_counters.h @@ -0,0 +1,37 @@ +/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */ +/* + * Copyright (c) 2017 Mellanox Technologies Ltd. All rights reserved. + */ + +#ifndef SW_HW_COUNTERS_H +#define SW_HW_COUNTERS_H + +/* + * when adding counters to enum also add + * them to sw_counter_name[] vector. + */ +enum sw_counters { + SW_CNT_SENT_PKTS, + SW_CNT_RCVD_PKTS, + SW_CNT_DUP_REQ, + SW_CNT_OUT_OF_SEQ_REQ, + SW_CNT_RCV_RNR, + SW_CNT_SND_RNR, + SW_CNT_RCV_SEQ_ERR, + SW_CNT_COMPLETER_SCHED, + SW_CNT_RETRY_EXCEEDED, + SW_CNT_RNR_RETRY_EXCEEDED, + SW_CNT_COMP_RETRY, + SW_CNT_SEND_ERR, + SW_CNT_LINK_DOWNED, + SW_CNT_RDMA_SEND, + SW_CNT_RDMA_RECV, + SW_NUM_OF_COUNTERS +}; + +struct rdma_hw_stats *sw_ib_alloc_hw_stats(struct ib_device *ibdev, + u8 port_num); +int sw_ib_get_hw_stats(struct ib_device *ibdev, + struct rdma_hw_stats *stats, + u8 port, int index); +#endif /* SW_HW_COUNTERS_H */ diff --git a/drivers/infiniband/hw/erdma/compat/sw_icrc.c b/drivers/infiniband/hw/erdma/compat/sw_icrc.c new file mode 100644 index 0000000000000000000000000000000000000000..b4b8a447a0cd068e5ee729d0e3e28f62c4d2e6a2 --- /dev/null +++ b/drivers/infiniband/hw/erdma/compat/sw_icrc.c @@ -0,0 +1,69 @@ +// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB +/* + * Copyright (c) 2016 Mellanox Technologies Ltd. All rights reserved. + * Copyright (c) 2015 System Fabric Works, Inc. All rights reserved. + */ + +#include "sw.h" +#include "sw_loc.h" + +/* Compute a partial ICRC for all the IB transport headers. */ +u32 sw_icrc_hdr(struct sw_pkt_info *pkt, struct sk_buff *skb) +{ + unsigned int bth_offset = 0; + struct iphdr *ip4h = NULL; + struct ipv6hdr *ip6h = NULL; + struct udphdr *udph; + struct sw_bth *bth; + int crc; + int length; + int hdr_size = sizeof(struct udphdr) + + (skb->protocol == htons(ETH_P_IP) ? + sizeof(struct iphdr) : sizeof(struct ipv6hdr)); + /* pseudo header buffer size is calculate using ipv6 header size since + * it is bigger than ipv4 + */ + u8 pshdr[sizeof(struct udphdr) + + sizeof(struct ipv6hdr) + + SW_BTH_BYTES]; + + /* This seed is the result of computing a CRC with a seed of + * 0xfffffff and 8 bytes of 0xff representing a masked LRH. + */ + crc = 0xdebb20e3; + + if (skb->protocol == htons(ETH_P_IP)) { /* IPv4 */ + memcpy(pshdr, ip_hdr(skb), hdr_size); + ip4h = (struct iphdr *)pshdr; + udph = (struct udphdr *)(ip4h + 1); + + ip4h->ttl = 0xff; + ip4h->check = CSUM_MANGLED_0; + ip4h->tos = 0xff; + } else { /* IPv6 */ + memcpy(pshdr, ipv6_hdr(skb), hdr_size); + ip6h = (struct ipv6hdr *)pshdr; + udph = (struct udphdr *)(ip6h + 1); + + memset(ip6h->flow_lbl, 0xff, sizeof(ip6h->flow_lbl)); + ip6h->priority = 0xf; + ip6h->hop_limit = 0xff; + } + udph->check = CSUM_MANGLED_0; + + bth_offset += hdr_size; + + memcpy(&pshdr[bth_offset], pkt->hdr, SW_BTH_BYTES); + bth = (struct sw_bth *)&pshdr[bth_offset]; + + /* exclude bth.resv8a */ + bth->qpn |= cpu_to_be32(~BTH_QPN_MASK); + + length = hdr_size + SW_BTH_BYTES; + crc = sw_crc32(pkt->sw, crc, pshdr, length); + + /* And finish to compute the CRC on the remainder of the headers. */ + crc = sw_crc32(pkt->sw, crc, pkt->hdr + SW_BTH_BYTES, + sw_opcode[pkt->opcode].length - SW_BTH_BYTES); + return crc; +} diff --git a/drivers/infiniband/hw/erdma/compat/sw_loc.h b/drivers/infiniband/hw/erdma/compat/sw_loc.h new file mode 100644 index 0000000000000000000000000000000000000000..6caf4879699761367449014db9534d877f8a00cd --- /dev/null +++ b/drivers/infiniband/hw/erdma/compat/sw_loc.h @@ -0,0 +1,256 @@ +/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */ +/* + * Copyright (c) 2016 Mellanox Technologies Ltd. All rights reserved. + * Copyright (c) 2015 System Fabric Works, Inc. All rights reserved. + */ + +#ifndef SW_LOC_H +#define SW_LOC_H + +/* sw_av.c */ +void sw_init_av(struct ib_device *ibdev, struct rdma_ah_attr *attr, struct sw_av *av); + +int sw_av_chk_attr(struct sw_dev *sw, struct rdma_ah_attr *attr); + +void sw_av_from_attr(u8 port_num, struct sw_av *av, + struct rdma_ah_attr *attr); + +void sw_av_to_attr(struct sw_av *av, struct rdma_ah_attr *attr); + +void sw_av_fill_ip_info(struct ib_device *ibdev, struct sw_av *av, struct rdma_ah_attr *attr); + +struct sw_av *sw_get_av(struct sw_pkt_info *pkt); + +/* sw_cq.c */ +int sw_cq_chk_attr(struct sw_dev *sw, struct sw_cq *cq, + int cqe, int comp_vector); + +int sw_cq_from_init(struct sw_dev *sw, struct sw_cq *cq, int cqe, + int comp_vector, struct ib_udata *udata, + struct sw_create_cq_resp __user *uresp); + +int sw_cq_post(struct sw_cq *cq, struct sw_cqe *cqe, int solicited); + +void sw_cq_disable(struct sw_cq *cq); + +void sw_cq_cleanup(struct sw_pool_entry *arg); + +/* sw_mcast.c */ +int sw_mcast_get_grp(struct sw_dev *sw, union ib_gid *mgid, + struct sw_mc_grp **grp_p); + +int sw_mcast_add_grp_elem(struct sw_dev *sw, struct sw_qp *qp, + struct sw_mc_grp *grp); + +int sw_mcast_drop_grp_elem(struct sw_dev *sw, struct sw_qp *qp, + union ib_gid *mgid); + +void sw_drop_all_mcast_groups(struct sw_qp *qp); + +void sw_mc_cleanup(struct sw_pool_entry *arg); + +/* sw_mmap.c */ +struct sw_mmap_info { + struct list_head pending_mmaps; + struct ib_ucontext *context; + struct kref ref; + void *obj; + + struct mminfo info; +}; + +struct sw_mmap_info *sw_create_mmap_info(struct sw_dev *dev, u32 size, + struct ib_udata *udata, void *obj); + +int sw_mmap(struct ib_ucontext *context, struct vm_area_struct *vma); + +/* sw_mr.c */ +enum copy_direction { + to_mem_obj, + from_mem_obj, +}; + +void sw_mem_init_dma(struct sw_pd *pd, + int access, struct sw_mem *mem); + +int sw_mem_init_user(struct sw_pd *pd, u64 start, + u64 length, u64 iova, int access, struct ib_udata *udata, + struct sw_mem *mr); + +int sw_mem_init_fast(struct sw_pd *pd, + int max_pages, struct sw_mem *mem); + +int sw_mem_copy(struct sw_mem *mem, u64 iova, void *addr, + int length, enum copy_direction dir, u32 *crcp); + +int sw_copy_data(struct sw_pd *pd, int access, + struct sw_dma_info *dma, void *addr, int length, + enum copy_direction dir, u32 *crcp); + +void *iova_to_vaddr(struct sw_mem *mem, u64 iova, int length); + +enum lookup_type { + lookup_local, + lookup_remote, +}; + +struct sw_mem *lookup_mem(struct sw_pd *pd, int access, u32 key, + enum lookup_type type); + +int mem_check_range(struct sw_mem *mem, u64 iova, size_t length); + +void sw_mem_cleanup(struct sw_pool_entry *arg); + +int sw_advance_dma_data(struct sw_dma_info *dma, unsigned int length); + +/* sw_net.c */ +void sw_loopback(struct sk_buff *skb); +int sw_send(struct sw_pkt_info *pkt, struct sk_buff *skb); +struct sk_buff *sw_init_packet(struct sw_dev *sw, struct sw_av *av, + int paylen, struct sw_pkt_info *pkt); +int sw_prepare(struct sw_pkt_info *pkt, struct sk_buff *skb, u32 *crc); +const char *sw_parent_name(struct sw_dev *sw, unsigned int port_num); +struct device *sw_dma_device(struct sw_dev *sw); +int sw_mcast_add(struct sw_dev *sw, union ib_gid *mgid); +int sw_mcast_delete(struct sw_dev *sw, union ib_gid *mgid); + +/* sw_qp.c */ +int sw_qp_chk_init(struct sw_dev *sw, struct ib_qp_init_attr *init); + +int sw_qp_from_init(struct sw_dev *sw, struct sw_qp *qp, + struct ib_qp_init_attr *init, + struct sw_create_qp_resp __user *uresp, + struct ib_pd *ibpd, struct ib_udata *udata); + +int sw_qp_to_init(struct sw_qp *qp, struct ib_qp_init_attr *init); + +int sw_qp_chk_attr(struct sw_dev *sw, struct sw_qp *qp, + struct ib_qp_attr *attr, int mask); + +int sw_qp_from_attr(struct sw_qp *qp, struct ib_qp_attr *attr, + int mask, struct ib_udata *udata); + +int sw_qp_to_attr(struct sw_qp *qp, struct ib_qp_attr *attr, int mask); + +void sw_qp_error(struct sw_qp *qp); + +void sw_qp_destroy(struct sw_qp *qp); + +void sw_qp_cleanup(struct sw_pool_entry *arg); +/* for erdma_sw */ +void cleanup_sw_qp(struct sw_qp *qp); + +static inline int qp_num(struct sw_qp *qp) +{ + return qp->ibqp.qp_num; +} + +static inline enum ib_qp_type qp_type(struct sw_qp *qp) +{ + return qp->ibqp.qp_type; +} + +static inline enum ib_qp_state qp_state(struct sw_qp *qp) +{ + return qp->attr.qp_state; +} + +static inline int qp_mtu(struct sw_qp *qp) +{ + if (qp->ibqp.qp_type == IB_QPT_RC || qp->ibqp.qp_type == IB_QPT_UC) + return qp->attr.path_mtu; + else + return IB_MTU_4096; +} + +static inline int rcv_wqe_size(int max_sge) +{ + return sizeof(struct sw_recv_wqe) + + max_sge * sizeof(struct ib_sge); +} + +void sw_free_rd_atomic_resource(struct sw_qp *qp, struct resp_res *res); + +static inline void sw_advance_resp_resource(struct sw_qp *qp) +{ + qp->resp.res_head++; + if (unlikely(qp->resp.res_head == qp->attr.max_dest_rd_atomic)) + qp->resp.res_head = 0; +} + +void sw_retransmit_timer(struct timer_list *t); +void sw_rnr_nak_timer(struct timer_list *t); + +/* sw_srq.c */ +#define IB_SRQ_INIT_MASK (~IB_SRQ_LIMIT) + +int sw_srq_chk_attr(struct sw_dev *sw, struct sw_srq *srq, + struct ib_srq_attr *attr, enum ib_srq_attr_mask mask); + +int sw_srq_from_init(struct sw_dev *sw, struct sw_srq *srq, + struct ib_srq_init_attr *init, struct ib_udata *udata, + struct sw_create_srq_resp __user *uresp); + +int sw_srq_from_attr(struct sw_dev *sw, struct sw_srq *srq, + struct ib_srq_attr *attr, enum ib_srq_attr_mask mask, + struct sw_modify_srq_cmd *ucmd, struct ib_udata *udata); + +int sw_completer(void *arg); +int sw_requester(void *arg); +int sw_responder(void *arg); + +u32 sw_icrc_hdr(struct sw_pkt_info *pkt, struct sk_buff *skb); + +void sw_resp_queue_pkt(struct sw_qp *qp, struct sk_buff *skb); + +void sw_comp_queue_pkt(struct sw_qp *qp, struct sk_buff *skb); + +static inline unsigned int wr_opcode_mask(int opcode, struct sw_qp *qp) +{ + return sw_wr_opcode_info[opcode].mask[qp->ibqp.qp_type]; +} + +static inline int sw_xmit_packet(struct sw_qp *qp, struct sw_pkt_info *pkt, + struct sk_buff *skb) +{ + int err; + int is_request = pkt->mask & SW_REQ_MASK; + struct sw_dev *sw = to_rdev(qp->ibqp.device); + + if ((is_request && (qp->req.state != QP_STATE_READY)) || + (!is_request && (qp->resp.state != QP_STATE_READY))) { + pr_info("Packet dropped. QP is not in ready state\n"); + goto drop; + } + + if (pkt->mask & SW_LOOPBACK_MASK) { + memcpy(SKB_TO_PKT(skb), pkt, sizeof(*pkt)); + sw_loopback(skb); + err = 0; + } else { + err = sw_send(pkt, skb); + } + + if (err) { + sw->xmit_errors++; + sw_counter_inc(sw, SW_CNT_SEND_ERR); + return err; + } + + if ((qp_type(qp) != IB_QPT_RC) && + (pkt->mask & SW_END_MASK)) { + pkt->wqe->state = wqe_state_done; + sw_run_task(&qp->comp.task, 1); + } + + sw_counter_inc(sw, SW_CNT_SENT_PKTS); + goto done; + +drop: + kfree_skb(skb); + err = 0; +done: + return err; +} + +#endif /* SW_LOC_H */ diff --git a/drivers/infiniband/hw/erdma/compat/sw_mcast.c b/drivers/infiniband/hw/erdma/compat/sw_mcast.c new file mode 100644 index 0000000000000000000000000000000000000000..771209e3d20e7e768758e103822d38fad94e6e82 --- /dev/null +++ b/drivers/infiniband/hw/erdma/compat/sw_mcast.c @@ -0,0 +1,43 @@ +// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB +/* + * Copyright (c) 2016 Mellanox Technologies Ltd. All rights reserved. + * Copyright (c) 2015 System Fabric Works, Inc. All rights reserved. + */ + +#include "sw.h" +#include "sw_loc.h" + +void sw_drop_all_mcast_groups(struct sw_qp *qp) +{ + struct sw_mc_grp *grp; + struct sw_mc_elem *elem; + + while (1) { + spin_lock_bh(&qp->grp_lock); + if (list_empty(&qp->grp_list)) { + spin_unlock_bh(&qp->grp_lock); + break; + } + elem = list_first_entry(&qp->grp_list, struct sw_mc_elem, + grp_list); + list_del(&elem->grp_list); + spin_unlock_bh(&qp->grp_lock); + + grp = elem->grp; + spin_lock_bh(&grp->mcg_lock); + list_del(&elem->qp_list); + grp->num_qp--; + spin_unlock_bh(&grp->mcg_lock); + sw_drop_ref(grp); + sw_drop_ref(elem); + } +} + +void sw_mc_cleanup(struct sw_pool_entry *arg) +{ + struct sw_mc_grp *grp = container_of(arg, typeof(*grp), pelem); + struct sw_dev *sw = grp->sw; + + sw_drop_key(grp); + sw_mcast_delete(sw, &grp->mgid); +} diff --git a/drivers/infiniband/hw/erdma/compat/sw_mr.c b/drivers/infiniband/hw/erdma/compat/sw_mr.c new file mode 100644 index 0000000000000000000000000000000000000000..53be8d9e64b0c842b88911266246655c460adf40 --- /dev/null +++ b/drivers/infiniband/hw/erdma/compat/sw_mr.c @@ -0,0 +1,483 @@ +// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB +/* + * Copyright (c) 2016 Mellanox Technologies Ltd. All rights reserved. + * Copyright (c) 2015 System Fabric Works, Inc. All rights reserved. + */ + +#include "sw.h" +#include "sw_loc.h" + +/* + * lfsr (linear feedback shift register) with period 255 + */ +static u8 sw_get_key(void) +{ + static u32 key = 1; + + key = key << 1; + + key |= (0 != (key & 0x100)) ^ (0 != (key & 0x10)) + ^ (0 != (key & 0x80)) ^ (0 != (key & 0x40)); + + key &= 0xff; + + return key; +} + +int mem_check_range(struct sw_mem *mem, u64 iova, size_t length) +{ + switch (mem->type) { + case SW_MEM_TYPE_DMA: + return 0; + + case SW_MEM_TYPE_MR: + case SW_MEM_TYPE_FMR: + if (iova < mem->iova || + length > mem->length || + iova > mem->iova + mem->length - length) + return -EFAULT; + return 0; + + default: + return -EFAULT; + } +} + +#define IB_ACCESS_REMOTE (IB_ACCESS_REMOTE_READ \ + | IB_ACCESS_REMOTE_WRITE \ + | IB_ACCESS_REMOTE_ATOMIC) + +static void sw_mem_init(int access, struct sw_mem *mem) +{ + u32 lkey = mem->pelem.index << 8 | sw_get_key(); + u32 rkey = (access & IB_ACCESS_REMOTE) ? lkey : 0; + + mem->ibmr.lkey = lkey; + mem->ibmr.rkey = rkey; + mem->state = SW_MEM_STATE_INVALID; + mem->type = SW_MEM_TYPE_NONE; + mem->map_shift = ilog2(SW_BUF_PER_MAP); +} + +void sw_mem_cleanup(struct sw_pool_entry *arg) +{ + struct sw_mem *mem = container_of(arg, typeof(*mem), pelem); + int i; + + if (mem->umem) + ib_umem_release(mem->umem); + + if (mem->map) { + for (i = 0; i < mem->num_map; i++) + kfree(mem->map[i]); + + kfree(mem->map); + } +} + +static int sw_mem_alloc(struct sw_mem *mem, int num_buf) +{ + int i; + int num_map; + struct sw_map **map = mem->map; + + num_map = (num_buf + SW_BUF_PER_MAP - 1) / SW_BUF_PER_MAP; + + mem->map = kmalloc_array(num_map, sizeof(*map), GFP_KERNEL); + if (!mem->map) + goto err1; + + for (i = 0; i < num_map; i++) { + mem->map[i] = kmalloc(sizeof(**map), GFP_KERNEL); + if (!mem->map[i]) + goto err2; + } + + BUILD_BUG_ON(!is_power_of_2(SW_BUF_PER_MAP)); + + mem->map_shift = ilog2(SW_BUF_PER_MAP); + mem->map_mask = SW_BUF_PER_MAP - 1; + + mem->num_buf = num_buf; + mem->num_map = num_map; + mem->max_buf = num_map * SW_BUF_PER_MAP; + + return 0; + +err2: + for (i--; i >= 0; i--) + kfree(mem->map[i]); + + kfree(mem->map); +err1: + return -ENOMEM; +} + +void sw_mem_init_dma(struct sw_pd *pd, + int access, struct sw_mem *mem) +{ + sw_mem_init(access, mem); + + mem->ibmr.pd = &pd->ibpd; + mem->access = access; + mem->state = SW_MEM_STATE_VALID; + mem->type = SW_MEM_TYPE_DMA; +} + +int sw_mem_init_fast(struct sw_pd *pd, + int max_pages, struct sw_mem *mem) +{ + int err; + + sw_mem_init(0, mem); + + /* In fastreg, we also set the rkey */ + mem->ibmr.rkey = mem->ibmr.lkey; + + err = sw_mem_alloc(mem, max_pages); + if (err) + goto err1; + + mem->ibmr.pd = &pd->ibpd; + mem->max_buf = max_pages; + mem->state = SW_MEM_STATE_FREE; + mem->type = SW_MEM_TYPE_MR; + + return 0; + +err1: + return err; +} + +static void lookup_iova( + struct sw_mem *mem, + u64 iova, + int *m_out, + int *n_out, + size_t *offset_out) +{ + size_t offset = iova - mem->iova + mem->offset; + int map_index; + int buf_index; + u64 length; + + if (likely(mem->page_shift)) { + *offset_out = offset & mem->page_mask; + offset >>= mem->page_shift; + *n_out = offset & mem->map_mask; + *m_out = offset >> mem->map_shift; + } else { + map_index = 0; + buf_index = 0; + + length = mem->map[map_index]->buf[buf_index].size; + + while (offset >= length) { + offset -= length; + buf_index++; + + if (buf_index == SW_BUF_PER_MAP) { + map_index++; + buf_index = 0; + } + length = mem->map[map_index]->buf[buf_index].size; + } + + *m_out = map_index; + *n_out = buf_index; + *offset_out = offset; + } +} + +void *iova_to_vaddr(struct sw_mem *mem, u64 iova, int length) +{ + size_t offset; + int m, n; + void *addr; + + if (mem->state != SW_MEM_STATE_VALID) { + pr_warn("mem not in valid state\n"); + addr = NULL; + goto out; + } + + if (!mem->map) { + addr = (void *)(uintptr_t)iova; + goto out; + } + + if (mem_check_range(mem, iova, length)) { + pr_warn("range violation\n"); + addr = NULL; + goto out; + } + + lookup_iova(mem, iova, &m, &n, &offset); + + if (offset + length > mem->map[m]->buf[n].size) { + pr_warn("crosses page boundary\n"); + addr = NULL; + goto out; + } + + addr = (void *)(uintptr_t)mem->map[m]->buf[n].addr + offset; + +out: + return addr; +} + +/* copy data from a range (vaddr, vaddr+length-1) to or from + * a mem object starting at iova. Compute incremental value of + * crc32 if crcp is not zero. caller must hold a reference to mem + */ +int sw_mem_copy(struct sw_mem *mem, u64 iova, void *addr, int length, + enum copy_direction dir, u32 *crcp) +{ + int err; + int bytes; + u8 *va; + struct sw_map **map; + struct sw_phys_buf *buf; + int m; + int i; + size_t offset; + u32 crc = crcp ? (*crcp) : 0; + + if (length == 0) + return 0; + + iova = (u64)phys_to_virt(iova); + if (mem->type == SW_MEM_TYPE_DMA) { + u8 *src, *dest; + + src = (dir == to_mem_obj) ? + addr : ((void *)(uintptr_t)iova); + + dest = (dir == to_mem_obj) ? + ((void *)(uintptr_t)iova) : addr; + + memcpy(dest, src, length); + + if (crcp) + *crcp = sw_crc32(to_rdev(mem->ibmr.device), + *crcp, dest, length); + + return 0; + } + + WARN_ON_ONCE(!mem->map); + + err = mem_check_range(mem, iova, length); + if (err) { + err = -EFAULT; + goto err1; + } + + lookup_iova(mem, iova, &m, &i, &offset); + + map = mem->map + m; + buf = map[0]->buf + i; + + while (length > 0) { + u8 *src, *dest; + + va = (u8 *)(uintptr_t)buf->addr + offset; + src = (dir == to_mem_obj) ? addr : va; + dest = (dir == to_mem_obj) ? va : addr; + + bytes = buf->size - offset; + + if (bytes > length) + bytes = length; + + memcpy(dest, src, bytes); + + if (crcp) + crc = sw_crc32(to_rdev(mem->ibmr.device), + crc, dest, bytes); + + length -= bytes; + addr += bytes; + + offset = 0; + buf++; + i++; + + if (i == SW_BUF_PER_MAP) { + i = 0; + map++; + buf = map[0]->buf; + } + } + + if (crcp) + *crcp = crc; + + return 0; + +err1: + return err; +} + +/* copy data in or out of a wqe, i.e. sg list + * under the control of a dma descriptor + */ +int sw_copy_data( + struct sw_pd *pd, + int access, + struct sw_dma_info *dma, + void *addr, + int length, + enum copy_direction dir, + u32 *crcp) +{ + int bytes; + struct sw_sge *sge = &dma->sge[dma->cur_sge]; + int offset = dma->sge_offset; + int resid = dma->resid; + struct sw_mem *mem = NULL; + u64 iova; + int err; + + if (length == 0) + return 0; + + if (length > resid) { + err = -EINVAL; + goto err2; + } + + if (sge->length && (offset < sge->length)) { + mem = lookup_mem(pd, access, sge->lkey, lookup_local); + if (!mem) { + err = -EINVAL; + goto err1; + } + } + + while (length > 0) { + bytes = length; + + if (offset >= sge->length) { + if (mem) { + sw_drop_ref(mem); + mem = NULL; + } + sge++; + dma->cur_sge++; + offset = 0; + + if (dma->cur_sge >= dma->num_sge) { + err = -ENOSPC; + goto err2; + } + + if (sge->length) { + mem = lookup_mem(pd, access, sge->lkey, + lookup_local); + if (!mem) { + err = -EINVAL; + goto err1; + } + } else { + continue; + } + } + + if (bytes > sge->length - offset) + bytes = sge->length - offset; + + if (bytes > 0) { + iova = sge->addr + offset; + + err = sw_mem_copy(mem, iova, addr, bytes, dir, crcp); + if (err) + goto err2; + + offset += bytes; + resid -= bytes; + length -= bytes; + addr += bytes; + } + } + + dma->sge_offset = offset; + dma->resid = resid; + + if (mem) + sw_drop_ref(mem); + + return 0; + +err2: + if (mem) + sw_drop_ref(mem); +err1: + return err; +} + +int sw_advance_dma_data(struct sw_dma_info *dma, unsigned int length) +{ + struct sw_sge *sge = &dma->sge[dma->cur_sge]; + int offset = dma->sge_offset; + int resid = dma->resid; + + while (length) { + unsigned int bytes; + + if (offset >= sge->length) { + sge++; + dma->cur_sge++; + offset = 0; + if (dma->cur_sge >= dma->num_sge) + return -ENOSPC; + } + + bytes = length; + + if (bytes > sge->length - offset) + bytes = sge->length - offset; + + offset += bytes; + resid -= bytes; + length -= bytes; + } + + dma->sge_offset = offset; + dma->resid = resid; + + return 0; +} + +/* (1) find the mem (mr or mw) corresponding to lkey/rkey + * depending on lookup_type + * (2) verify that the (qp) pd matches the mem pd + * (3) verify that the mem can support the requested access + * (4) verify that mem state is valid + */ +struct sw_mem *lookup_mem(struct sw_pd *pd, int access, u32 key, + enum lookup_type type) +{ + struct sw_mem *mem; + struct sw_dev *sw = to_rdev(pd->ibpd.device); + int index = key >> 8; + + if ((key & 0xffff0000) == 0x4ac00000) { + key = pd->ibpd.local_dma_lkey; + index = key >> 8; + } + mem = sw_pool_get_index(&sw->mr_pool, index); + if (!mem) + return NULL; + + if (unlikely((type == lookup_local && mr_lkey(mem) != key) || + (type == lookup_remote && mr_rkey(mem) != key) || + mr_pd(mem) != pd || + (access && !(access & mem->access)) || + mem->state != SW_MEM_STATE_VALID)) { + sw_drop_ref(mem); + mem = NULL; + } + + return mem; +} diff --git a/drivers/infiniband/hw/erdma/compat/sw_net.c b/drivers/infiniband/hw/erdma/compat/sw_net.c new file mode 100644 index 0000000000000000000000000000000000000000..ba6f361e6536af93742b02332f34600e95626e06 --- /dev/null +++ b/drivers/infiniband/hw/erdma/compat/sw_net.c @@ -0,0 +1,520 @@ +// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB +/* + * Copyright (c) 2016 Mellanox Technologies Ltd. All rights reserved. + * Copyright (c) 2015 System Fabric Works, Inc. All rights reserved. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "sw.h" +#include "sw_net.h" +#include "sw_loc.h" +#include "../erdma_verbs.h" + +static struct sw_recv_sockets recv_sockets; + +int sw_mcast_add(struct sw_dev *sw, union ib_gid *mgid) +{ + int err; + unsigned char ll_addr[ETH_ALEN]; + + ipv6_eth_mc_map((struct in6_addr *)mgid->raw, ll_addr); + err = dev_mc_add(sw->ndev, ll_addr); + + return err; +} + +int sw_mcast_delete(struct sw_dev *sw, union ib_gid *mgid) +{ + int err; + unsigned char ll_addr[ETH_ALEN]; + + ipv6_eth_mc_map((struct in6_addr *)mgid->raw, ll_addr); + err = dev_mc_del(sw->ndev, ll_addr); + + return err; +} + +static struct dst_entry *sw_find_route4(struct net_device *ndev, + struct in_addr *saddr, + struct in_addr *daddr) +{ + struct rtable *rt; + struct flowi4 fl = { { 0 } }; + + memset(&fl, 0, sizeof(fl)); + fl.flowi4_oif = ndev->ifindex; + memcpy(&fl.saddr, saddr, sizeof(*saddr)); + memcpy(&fl.daddr, daddr, sizeof(*daddr)); + fl.flowi4_proto = IPPROTO_UDP; + + rt = ip_route_output_key(&init_net, &fl); + if (IS_ERR(rt)) { + pr_err_ratelimited("no route to %pI4\n", &daddr->s_addr); + return NULL; + } + + return &rt->dst; +} + +static struct dst_entry *sw_find_route6(struct net_device *ndev, + struct in6_addr *saddr, + struct in6_addr *daddr) +{ + return NULL; +} + +static struct dst_entry *sw_find_route(struct net_device *ndev, + struct sw_qp *qp, + struct sw_av *av) +{ + struct dst_entry *dst = NULL; + + if (qp_type(qp) == IB_QPT_RC) + dst = sk_dst_get(qp->sk->sk); + + if (!dst || !dst_check(dst, qp->dst_cookie)) { + if (dst) + dst_release(dst); + + if (av->network_type == SW_NETWORK_TYPE_IPV4) { + struct in_addr *saddr; + struct in_addr *daddr; + + saddr = &av->sgid_addr._sockaddr_in.sin_addr; + daddr = &av->dgid_addr._sockaddr_in.sin_addr; + dst = sw_find_route4(ndev, saddr, daddr); + } else if (av->network_type == SW_NETWORK_TYPE_IPV6) { + struct in6_addr *saddr6; + struct in6_addr *daddr6; + + saddr6 = &av->sgid_addr._sockaddr_in6.sin6_addr; + daddr6 = &av->dgid_addr._sockaddr_in6.sin6_addr; + dst = sw_find_route6(ndev, saddr6, daddr6); +#if IS_ENABLED(CONFIG_IPV6) + if (dst) + qp->dst_cookie = + rt6_get_cookie((struct rt6_info *)dst); +#endif + } + + if (dst && (qp_type(qp) == IB_QPT_RC)) { + dst_hold(dst); + sk_dst_set(qp->sk->sk, dst); + } + } + return dst; +} + +static int sw_udp_encap_recv(struct sock *sk, struct sk_buff *skb) +{ + struct udphdr *udph; + struct net_device *ndev = skb->dev; + struct net_device *rdev = ndev; + struct sw_dev *sw = sw_get_dev_from_net(ndev); + struct sw_pkt_info *pkt = SKB_TO_PKT(skb); + + if (!sw && is_vlan_dev(rdev)) { + rdev = vlan_dev_real_dev(ndev); + sw = sw_get_dev_from_net(rdev); + } + if (!sw) + goto drop; + + if (skb_linearize(skb)) { + pr_err("skb_linearize failed\n"); + ib_device_put(&sw->master->ibdev); + goto drop; + } + + udph = udp_hdr(skb); + pkt->sw = sw; + pkt->port_num = 1; + pkt->hdr = (u8 *)(udph + 1); + pkt->mask = SW_GRH_MASK; + pkt->paylen = be16_to_cpu(udph->len) - sizeof(*udph); + + sw_rcv(skb); + + /* + * FIXME: this is in the wrong place, it needs to be done when pkt is + * destroyed + */ + ib_device_put(&sw->master->ibdev); + + return 0; +drop: + kfree_skb(skb); + + return 0; +} + +static struct socket *sw_setup_udp_tunnel(struct net *net, __be16 port, + bool ipv6) +{ + int err; + struct socket *sock; + struct udp_port_cfg udp_cfg = { }; + struct udp_tunnel_sock_cfg tnl_cfg = { }; + + if (ipv6) { + udp_cfg.family = AF_INET6; + udp_cfg.ipv6_v6only = 1; + } else { + udp_cfg.family = AF_INET; + } + + udp_cfg.local_udp_port = port; + + /* Create UDP socket */ + err = udp_sock_create(net, &udp_cfg, &sock); + if (err < 0) + return ERR_PTR(err); + + tnl_cfg.encap_type = 1; + tnl_cfg.encap_rcv = sw_udp_encap_recv; + + /* Setup UDP tunnel */ + setup_udp_tunnel_sock(net, sock, &tnl_cfg); + + return sock; +} + +static void sw_release_udp_tunnel(struct socket *sk) +{ + if (sk) + udp_tunnel_sock_release(sk); +} + +static void prepare_udp_hdr(struct sk_buff *skb, __be16 src_port, + __be16 dst_port) +{ + struct udphdr *udph; + + __skb_push(skb, sizeof(*udph)); + skb_reset_transport_header(skb); + udph = udp_hdr(skb); + + udph->dest = dst_port; + udph->source = src_port; + udph->len = htons(skb->len); + udph->check = 0; +} + +static void prepare_ipv4_hdr(struct dst_entry *dst, struct sk_buff *skb, + __be32 saddr, __be32 daddr, __u8 proto, + __u8 tos, __u8 ttl, __be16 df, bool xnet) +{ + struct iphdr *iph; + + skb_scrub_packet(skb, xnet); + + skb_clear_hash(skb); + skb_dst_set(skb, dst_clone(dst)); + memset(IPCB(skb), 0, sizeof(*IPCB(skb))); + + skb_push(skb, sizeof(struct iphdr)); + skb_reset_network_header(skb); + + iph = ip_hdr(skb); + + iph->version = IPVERSION; + iph->ihl = sizeof(struct iphdr) >> 2; + iph->frag_off = df; + iph->protocol = proto; + iph->tos = tos; + iph->daddr = daddr; + iph->saddr = saddr; + iph->ttl = ttl; + __ip_select_ident(dev_net(dst->dev), iph, + skb_shinfo(skb)->gso_segs ?: 1); + iph->tot_len = htons(skb->len); + ip_send_check(iph); +} + +static void prepare_ipv6_hdr(struct dst_entry *dst, struct sk_buff *skb, + struct in6_addr *saddr, struct in6_addr *daddr, + __u8 proto, __u8 prio, __u8 ttl) +{ + struct ipv6hdr *ip6h; + + memset(&(IPCB(skb)->opt), 0, sizeof(IPCB(skb)->opt)); + IPCB(skb)->flags &= ~(IPSKB_XFRM_TUNNEL_SIZE | IPSKB_XFRM_TRANSFORMED + | IPSKB_REROUTED); + skb_dst_set(skb, dst_clone(dst)); + + __skb_push(skb, sizeof(*ip6h)); + skb_reset_network_header(skb); + ip6h = ipv6_hdr(skb); + ip6_flow_hdr(ip6h, prio, htonl(0)); + ip6h->payload_len = htons(skb->len); + ip6h->nexthdr = proto; + ip6h->hop_limit = ttl; + ip6h->daddr = *daddr; + ip6h->saddr = *saddr; + ip6h->payload_len = htons(skb->len - sizeof(*ip6h)); +} + +static int prepare4(struct sw_pkt_info *pkt, struct sk_buff *skb) +{ + struct sw_qp *qp = pkt->qp; + struct dst_entry *dst; + bool xnet = false; + __be16 df = htons(IP_DF); + struct sw_av *av = sw_get_av(pkt); + struct in_addr *saddr = &av->sgid_addr._sockaddr_in.sin_addr; + struct in_addr *daddr = &av->dgid_addr._sockaddr_in.sin_addr; + + dst = sw_find_route(skb->dev, qp, av); + if (!dst) { + pr_err("Host not reachable\n"); + return -EHOSTUNREACH; + } + + prepare_udp_hdr(skb, cpu_to_be16(qp->src_port), + cpu_to_be16(ROCE_V2_UDP_DPORT)); + + prepare_ipv4_hdr(dst, skb, saddr->s_addr, daddr->s_addr, IPPROTO_UDP, + av->grh.traffic_class, av->grh.hop_limit, df, xnet); + + dst_release(dst); + return 0; +} + +static int prepare6(struct sw_pkt_info *pkt, struct sk_buff *skb) +{ + struct sw_qp *qp = pkt->qp; + struct dst_entry *dst; + struct sw_av *av = sw_get_av(pkt); + struct in6_addr *saddr = &av->sgid_addr._sockaddr_in6.sin6_addr; + struct in6_addr *daddr = &av->dgid_addr._sockaddr_in6.sin6_addr; + + dst = sw_find_route(skb->dev, qp, av); + if (!dst) { + pr_err("Host not reachable\n"); + return -EHOSTUNREACH; + } + + prepare_udp_hdr(skb, cpu_to_be16(qp->src_port), + cpu_to_be16(ROCE_V2_UDP_DPORT)); + + prepare_ipv6_hdr(dst, skb, saddr, daddr, IPPROTO_UDP, + av->grh.traffic_class, + av->grh.hop_limit); + + dst_release(dst); + return 0; +} + +int sw_prepare(struct sw_pkt_info *pkt, struct sk_buff *skb, u32 *crc) +{ + int err = 0; + + if (skb->protocol == htons(ETH_P_IP)) + err = prepare4(pkt, skb); + else if (skb->protocol == htons(ETH_P_IPV6)) + err = prepare6(pkt, skb); + + *crc = sw_icrc_hdr(pkt, skb); + + if (ether_addr_equal(skb->dev->dev_addr, sw_get_av(pkt)->dmac)) + pkt->mask |= SW_LOOPBACK_MASK; + + return err; +} + +static void sw_skb_tx_dtor(struct sk_buff *skb) +{ + struct sock *sk = skb->sk; + struct sw_qp *qp = sk->sk_user_data; + int skb_out = atomic_dec_return(&qp->skb_out); + + if (unlikely(qp->need_req_skb && + skb_out < SW_INFLIGHT_SKBS_PER_QP_LOW)) + sw_run_task(&qp->req.task, 1); + + sw_drop_ref(qp); +} + +int sw_send(struct sw_pkt_info *pkt, struct sk_buff *skb) +{ + int err; + + skb->destructor = sw_skb_tx_dtor; + skb->sk = pkt->qp->sk->sk; + + sw_add_ref(pkt->qp); + atomic_inc(&pkt->qp->skb_out); + + if (skb->protocol == htons(ETH_P_IP)) { + err = ip_local_out(dev_net(skb_dst(skb)->dev), skb->sk, skb); + } else { + pr_err("Unknown layer 3 protocol: %d\n", skb->protocol); + atomic_dec(&pkt->qp->skb_out); + sw_drop_ref(pkt->qp); + kfree_skb(skb); + return -EINVAL; + } + + if (unlikely(net_xmit_eval(err))) { + pr_debug("error sending packet: %d\n", err); + return -EAGAIN; + } + + return 0; +} + +void sw_loopback(struct sk_buff *skb) +{ + if (skb->protocol == htons(ETH_P_IP)) + skb_pull(skb, sizeof(struct iphdr)); + else + skb_pull(skb, sizeof(struct ipv6hdr)); + + sw_rcv(skb); +} + +struct sk_buff *sw_init_packet(struct sw_dev *sw, struct sw_av *av, + int paylen, struct sw_pkt_info *pkt) +{ + unsigned int hdr_len; + struct sk_buff *skb = NULL; + struct net_device *ndev; + const struct ib_gid_attr *attr; + const int port_num = 1; + + attr = rdma_get_gid_attr(&sw->master->ibdev, port_num, av->grh.sgid_index); + if (IS_ERR(attr)) + return NULL; + + if (av->network_type == SW_NETWORK_TYPE_IPV4) + hdr_len = ETH_HLEN + sizeof(struct udphdr) + + sizeof(struct iphdr); + else + hdr_len = ETH_HLEN + sizeof(struct udphdr) + + sizeof(struct ipv6hdr); + + rcu_read_lock(); + + ndev = rdma_read_gid_attr_ndev_rcu(attr); + + if (IS_ERR(ndev)) { + rcu_read_unlock(); + goto out; + } + skb = alloc_skb(paylen + hdr_len + LL_RESERVED_SPACE(ndev), + GFP_ATOMIC); + + if (unlikely(!skb)) { + rcu_read_unlock(); + goto out; + } + + skb_reserve(skb, hdr_len + LL_RESERVED_SPACE(ndev)); + + /* FIXME: hold reference to this netdev until life of this skb. */ + skb->dev = ndev; + rcu_read_unlock(); + + if (av->network_type == SW_NETWORK_TYPE_IPV4) + skb->protocol = htons(ETH_P_IP); + else + skb->protocol = htons(ETH_P_IPV6); + + pkt->sw = sw; + pkt->port_num = port_num; + pkt->hdr = skb_put_zero(skb, paylen); + pkt->mask |= SW_GRH_MASK; + +out: + rdma_put_gid_attr(attr); + return skb; +} + +/* + * this is required by sw_cfg to match sw devices in + * /sys/class/infiniband up with their underlying ethernet devices + */ +const char *sw_parent_name(struct sw_dev *sw, unsigned int port_num) +{ + return sw->ndev->name; +} + +static void sw_port_event(struct sw_dev *sw, + enum ib_event_type event) +{ + struct ib_event ev; + + ev.device = &sw->ib_dev; + ev.element.port_num = 1; + ev.event = event; + + ib_dispatch_event(&ev); +} + +/* Caller must hold net_info_lock */ +void sw_port_up(struct sw_dev *sw) +{ + struct sw_port *port; + + port = &sw->port; + port->attr.state = IB_PORT_ACTIVE; + + sw_port_event(sw, IB_EVENT_PORT_ACTIVE); + dev_info(&sw->ib_dev.dev, "set active\n"); +} + +/* Caller must hold net_info_lock */ +void sw_port_down(struct sw_dev *sw) +{ + struct sw_port *port; + + port = &sw->port; + port->attr.state = IB_PORT_DOWN; + + sw_port_event(sw, IB_EVENT_PORT_ERR); + sw_counter_inc(sw, SW_CNT_LINK_DOWNED); + dev_info(&sw->ib_dev.dev, "set down\n"); +} + +void sw_set_port_state(struct sw_dev *sw) +{ + if (netif_running(sw->ndev) && netif_carrier_ok(sw->ndev)) + sw_port_up(sw); + else + sw_port_down(sw); +} + +static int sw_net_ipv4_init(void) +{ + recv_sockets.sk4 = sw_setup_udp_tunnel(&init_net, + htons(ROCE_V2_UDP_DPORT), false); + if (IS_ERR(recv_sockets.sk4)) { + recv_sockets.sk4 = NULL; + pr_err("Failed to create IPv4 UDP tunnel\n"); + return -1; + } + + return 0; +} + +void sw_net_exit(void) +{ + sw_release_udp_tunnel(recv_sockets.sk6); + sw_release_udp_tunnel(recv_sockets.sk4); +} + +int sw_net_init(void) +{ + recv_sockets.sk6 = NULL; + + return sw_net_ipv4_init(); +} diff --git a/drivers/infiniband/hw/erdma/compat/sw_net.h b/drivers/infiniband/hw/erdma/compat/sw_net.h new file mode 100644 index 0000000000000000000000000000000000000000..3373b903c116b24e6e7d3d627132149e479ea95f --- /dev/null +++ b/drivers/infiniband/hw/erdma/compat/sw_net.h @@ -0,0 +1,23 @@ +/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */ +/* + * Copyright (c) 2016 Mellanox Technologies Ltd. All rights reserved. + * Copyright (c) 2015 System Fabric Works, Inc. All rights reserved. + */ + +#ifndef SW_NET_H +#define SW_NET_H + +#include +#include +#include +#include "../kcompat.h" + +struct sw_recv_sockets { + struct socket *sk4; + struct socket *sk6; +}; + +int sw_net_init(void); +void sw_net_exit(void); + +#endif /* SW_NET_H */ diff --git a/drivers/infiniband/hw/erdma/compat/sw_opcode.c b/drivers/infiniband/hw/erdma/compat/sw_opcode.c new file mode 100644 index 0000000000000000000000000000000000000000..2c0df92218810557ed14ddf4d4f58bfc2db20a91 --- /dev/null +++ b/drivers/infiniband/hw/erdma/compat/sw_opcode.c @@ -0,0 +1,934 @@ +// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB +/* + * Copyright (c) 2016 Mellanox Technologies Ltd. All rights reserved. + * Copyright (c) 2015 System Fabric Works, Inc. All rights reserved. + */ + +#include +#include "sw_opcode.h" +#include "sw_hdr.h" + +/* useful information about work request opcodes and pkt opcodes in + * table form + */ +struct sw_wr_opcode_info sw_wr_opcode_info[] = { + [IB_WR_RDMA_WRITE] = { + .name = "IB_WR_RDMA_WRITE", + .mask = { + [IB_QPT_RC] = WR_INLINE_MASK | WR_WRITE_MASK, + [IB_QPT_UC] = WR_INLINE_MASK | WR_WRITE_MASK, + }, + }, + [IB_WR_RDMA_WRITE_WITH_IMM] = { + .name = "IB_WR_RDMA_WRITE_WITH_IMM", + .mask = { + [IB_QPT_RC] = WR_INLINE_MASK | WR_WRITE_MASK, + [IB_QPT_UC] = WR_INLINE_MASK | WR_WRITE_MASK, + }, + }, + [IB_WR_SEND] = { + .name = "IB_WR_SEND", + .mask = { + [IB_QPT_SMI] = WR_INLINE_MASK | WR_SEND_MASK, + [IB_QPT_GSI] = WR_INLINE_MASK | WR_SEND_MASK, + [IB_QPT_RC] = WR_INLINE_MASK | WR_SEND_MASK, + [IB_QPT_UC] = WR_INLINE_MASK | WR_SEND_MASK, + [IB_QPT_UD] = WR_INLINE_MASK | WR_SEND_MASK, + }, + }, + [IB_WR_SEND_WITH_IMM] = { + .name = "IB_WR_SEND_WITH_IMM", + .mask = { + [IB_QPT_SMI] = WR_INLINE_MASK | WR_SEND_MASK, + [IB_QPT_GSI] = WR_INLINE_MASK | WR_SEND_MASK, + [IB_QPT_RC] = WR_INLINE_MASK | WR_SEND_MASK, + [IB_QPT_UC] = WR_INLINE_MASK | WR_SEND_MASK, + [IB_QPT_UD] = WR_INLINE_MASK | WR_SEND_MASK, + }, + }, + [IB_WR_RDMA_READ] = { + .name = "IB_WR_RDMA_READ", + .mask = { + [IB_QPT_RC] = WR_READ_MASK, + }, + }, + [IB_WR_ATOMIC_CMP_AND_SWP] = { + .name = "IB_WR_ATOMIC_CMP_AND_SWP", + .mask = { + [IB_QPT_RC] = WR_ATOMIC_MASK, + }, + }, + [IB_WR_ATOMIC_FETCH_AND_ADD] = { + .name = "IB_WR_ATOMIC_FETCH_AND_ADD", + .mask = { + [IB_QPT_RC] = WR_ATOMIC_MASK, + }, + }, + [IB_WR_LSO] = { + .name = "IB_WR_LSO", + .mask = { + /* not supported */ + }, + }, + [IB_WR_SEND_WITH_INV] = { + .name = "IB_WR_SEND_WITH_INV", + .mask = { + [IB_QPT_RC] = WR_INLINE_MASK | WR_SEND_MASK, + [IB_QPT_UC] = WR_INLINE_MASK | WR_SEND_MASK, + [IB_QPT_UD] = WR_INLINE_MASK | WR_SEND_MASK, + }, + }, + [IB_WR_RDMA_READ_WITH_INV] = { + .name = "IB_WR_RDMA_READ_WITH_INV", + .mask = { + [IB_QPT_RC] = WR_READ_MASK, + }, + }, + [IB_WR_LOCAL_INV] = { + .name = "IB_WR_LOCAL_INV", + .mask = { + [IB_QPT_RC] = WR_REG_MASK, + }, + }, + [IB_WR_REG_MR] = { + .name = "IB_WR_REG_MR", + .mask = { + [IB_QPT_RC] = WR_REG_MASK, + }, + }, +}; + +struct sw_opcode_info sw_opcode[SW_NUM_OPCODE] = { + [IB_OPCODE_RC_SEND_FIRST] = { + .name = "IB_OPCODE_RC_SEND_FIRST", + .mask = SW_PAYLOAD_MASK | SW_REQ_MASK | SW_RWR_MASK + | SW_SEND_MASK | SW_START_MASK, + .length = SW_BTH_BYTES, + .offset = { + [SW_BTH] = 0, + [SW_PAYLOAD] = SW_BTH_BYTES, + } + }, + [IB_OPCODE_RC_SEND_MIDDLE] = { + .name = "IB_OPCODE_RC_SEND_MIDDLE]", + .mask = SW_PAYLOAD_MASK | SW_REQ_MASK | SW_SEND_MASK + | SW_MIDDLE_MASK, + .length = SW_BTH_BYTES, + .offset = { + [SW_BTH] = 0, + [SW_PAYLOAD] = SW_BTH_BYTES, + } + }, + [IB_OPCODE_RC_SEND_LAST] = { + .name = "IB_OPCODE_RC_SEND_LAST", + .mask = SW_PAYLOAD_MASK | SW_REQ_MASK | SW_COMP_MASK + | SW_SEND_MASK | SW_END_MASK, + .length = SW_BTH_BYTES, + .offset = { + [SW_BTH] = 0, + [SW_PAYLOAD] = SW_BTH_BYTES, + } + }, + [IB_OPCODE_RC_SEND_LAST_WITH_IMMEDIATE] = { + .name = "IB_OPCODE_RC_SEND_LAST_WITH_IMMEDIATE", + .mask = SW_IMMDT_MASK | SW_PAYLOAD_MASK | SW_REQ_MASK + | SW_COMP_MASK | SW_SEND_MASK | SW_END_MASK, + .length = SW_BTH_BYTES + SW_IMMDT_BYTES, + .offset = { + [SW_BTH] = 0, + [SW_IMMDT] = SW_BTH_BYTES, + [SW_PAYLOAD] = SW_BTH_BYTES + + SW_IMMDT_BYTES, + } + }, + [IB_OPCODE_RC_SEND_ONLY] = { + .name = "IB_OPCODE_RC_SEND_ONLY", + .mask = SW_PAYLOAD_MASK | SW_REQ_MASK | SW_COMP_MASK + | SW_RWR_MASK | SW_SEND_MASK + | SW_START_MASK | SW_END_MASK, + .length = SW_BTH_BYTES, + .offset = { + [SW_BTH] = 0, + [SW_PAYLOAD] = SW_BTH_BYTES, + } + }, + [IB_OPCODE_RC_SEND_ONLY_WITH_IMMEDIATE] = { + .name = "IB_OPCODE_RC_SEND_ONLY_WITH_IMMEDIATE", + .mask = SW_IMMDT_MASK | SW_PAYLOAD_MASK | SW_REQ_MASK + | SW_COMP_MASK | SW_RWR_MASK | SW_SEND_MASK + | SW_START_MASK | SW_END_MASK, + .length = SW_BTH_BYTES + SW_IMMDT_BYTES, + .offset = { + [SW_BTH] = 0, + [SW_IMMDT] = SW_BTH_BYTES, + [SW_PAYLOAD] = SW_BTH_BYTES + + SW_IMMDT_BYTES, + } + }, + [IB_OPCODE_RC_RDMA_WRITE_FIRST] = { + .name = "IB_OPCODE_RC_RDMA_WRITE_FIRST", + .mask = SW_RETH_MASK | SW_PAYLOAD_MASK | SW_REQ_MASK + | SW_WRITE_MASK | SW_START_MASK, + .length = SW_BTH_BYTES + SW_RETH_BYTES, + .offset = { + [SW_BTH] = 0, + [SW_RETH] = SW_BTH_BYTES, + [SW_PAYLOAD] = SW_BTH_BYTES + + SW_RETH_BYTES, + } + }, + [IB_OPCODE_RC_RDMA_WRITE_MIDDLE] = { + .name = "IB_OPCODE_RC_RDMA_WRITE_MIDDLE", + .mask = SW_PAYLOAD_MASK | SW_REQ_MASK | SW_WRITE_MASK + | SW_MIDDLE_MASK, + .length = SW_BTH_BYTES, + .offset = { + [SW_BTH] = 0, + [SW_PAYLOAD] = SW_BTH_BYTES, + } + }, + [IB_OPCODE_RC_RDMA_WRITE_LAST] = { + .name = "IB_OPCODE_RC_RDMA_WRITE_LAST", + .mask = SW_PAYLOAD_MASK | SW_REQ_MASK | SW_WRITE_MASK + | SW_END_MASK, + .length = SW_BTH_BYTES, + .offset = { + [SW_BTH] = 0, + [SW_PAYLOAD] = SW_BTH_BYTES, + } + }, + [IB_OPCODE_RC_RDMA_WRITE_LAST_WITH_IMMEDIATE] = { + .name = "IB_OPCODE_RC_RDMA_WRITE_LAST_WITH_IMMEDIATE", + .mask = SW_IMMDT_MASK | SW_PAYLOAD_MASK | SW_REQ_MASK + | SW_WRITE_MASK | SW_COMP_MASK | SW_RWR_MASK + | SW_END_MASK, + .length = SW_BTH_BYTES + SW_IMMDT_BYTES, + .offset = { + [SW_BTH] = 0, + [SW_IMMDT] = SW_BTH_BYTES, + [SW_PAYLOAD] = SW_BTH_BYTES + + SW_IMMDT_BYTES, + } + }, + [IB_OPCODE_RC_RDMA_WRITE_ONLY] = { + .name = "IB_OPCODE_RC_RDMA_WRITE_ONLY", + .mask = SW_RETH_MASK | SW_PAYLOAD_MASK | SW_REQ_MASK + | SW_WRITE_MASK | SW_START_MASK + | SW_END_MASK, + .length = SW_BTH_BYTES + SW_RETH_BYTES, + .offset = { + [SW_BTH] = 0, + [SW_RETH] = SW_BTH_BYTES, + [SW_PAYLOAD] = SW_BTH_BYTES + + SW_RETH_BYTES, + } + }, + [IB_OPCODE_RC_RDMA_WRITE_ONLY_WITH_IMMEDIATE] = { + .name = "IB_OPCODE_RC_RDMA_WRITE_ONLY_WITH_IMMEDIATE", + .mask = SW_RETH_MASK | SW_IMMDT_MASK | SW_PAYLOAD_MASK + | SW_REQ_MASK | SW_WRITE_MASK + | SW_COMP_MASK | SW_RWR_MASK + | SW_START_MASK | SW_END_MASK, + .length = SW_BTH_BYTES + SW_IMMDT_BYTES + SW_RETH_BYTES, + .offset = { + [SW_BTH] = 0, + [SW_RETH] = SW_BTH_BYTES, + [SW_IMMDT] = SW_BTH_BYTES + + SW_RETH_BYTES, + [SW_PAYLOAD] = SW_BTH_BYTES + + SW_RETH_BYTES + + SW_IMMDT_BYTES, + } + }, + [IB_OPCODE_RC_RDMA_READ_REQUEST] = { + .name = "IB_OPCODE_RC_RDMA_READ_REQUEST", + .mask = SW_RETH_MASK | SW_REQ_MASK | SW_READ_MASK + | SW_START_MASK | SW_END_MASK, + .length = SW_BTH_BYTES + SW_RETH_BYTES, + .offset = { + [SW_BTH] = 0, + [SW_RETH] = SW_BTH_BYTES, + [SW_PAYLOAD] = SW_BTH_BYTES + + SW_RETH_BYTES, + } + }, + [IB_OPCODE_RC_RDMA_READ_RESPONSE_FIRST] = { + .name = "IB_OPCODE_RC_RDMA_READ_RESPONSE_FIRST", + .mask = SW_AETH_MASK | SW_PAYLOAD_MASK | SW_ACK_MASK + | SW_START_MASK, + .length = SW_BTH_BYTES + SW_AETH_BYTES, + .offset = { + [SW_BTH] = 0, + [SW_AETH] = SW_BTH_BYTES, + [SW_PAYLOAD] = SW_BTH_BYTES + + SW_AETH_BYTES, + } + }, + [IB_OPCODE_RC_RDMA_READ_RESPONSE_MIDDLE] = { + .name = "IB_OPCODE_RC_RDMA_READ_RESPONSE_MIDDLE", + .mask = SW_PAYLOAD_MASK | SW_ACK_MASK | SW_MIDDLE_MASK, + .length = SW_BTH_BYTES, + .offset = { + [SW_BTH] = 0, + [SW_PAYLOAD] = SW_BTH_BYTES, + } + }, + [IB_OPCODE_RC_RDMA_READ_RESPONSE_LAST] = { + .name = "IB_OPCODE_RC_RDMA_READ_RESPONSE_LAST", + .mask = SW_AETH_MASK | SW_PAYLOAD_MASK | SW_ACK_MASK + | SW_END_MASK, + .length = SW_BTH_BYTES + SW_AETH_BYTES, + .offset = { + [SW_BTH] = 0, + [SW_AETH] = SW_BTH_BYTES, + [SW_PAYLOAD] = SW_BTH_BYTES + + SW_AETH_BYTES, + } + }, + [IB_OPCODE_RC_RDMA_READ_RESPONSE_ONLY] = { + .name = "IB_OPCODE_RC_RDMA_READ_RESPONSE_ONLY", + .mask = SW_AETH_MASK | SW_PAYLOAD_MASK | SW_ACK_MASK + | SW_START_MASK | SW_END_MASK, + .length = SW_BTH_BYTES + SW_AETH_BYTES, + .offset = { + [SW_BTH] = 0, + [SW_AETH] = SW_BTH_BYTES, + [SW_PAYLOAD] = SW_BTH_BYTES + + SW_AETH_BYTES, + } + }, + [IB_OPCODE_RC_ACKNOWLEDGE] = { + .name = "IB_OPCODE_RC_ACKNOWLEDGE", + .mask = SW_AETH_MASK | SW_ACK_MASK | SW_START_MASK + | SW_END_MASK, + .length = SW_BTH_BYTES + SW_AETH_BYTES, + .offset = { + [SW_BTH] = 0, + [SW_AETH] = SW_BTH_BYTES, + [SW_PAYLOAD] = SW_BTH_BYTES + + SW_AETH_BYTES, + } + }, + [IB_OPCODE_RC_ATOMIC_ACKNOWLEDGE] = { + .name = "IB_OPCODE_RC_ATOMIC_ACKNOWLEDGE", + .mask = SW_AETH_MASK | SW_ATMACK_MASK | SW_ACK_MASK + | SW_START_MASK | SW_END_MASK, + .length = SW_BTH_BYTES + SW_ATMACK_BYTES + SW_AETH_BYTES, + .offset = { + [SW_BTH] = 0, + [SW_AETH] = SW_BTH_BYTES, + [SW_ATMACK] = SW_BTH_BYTES + + SW_AETH_BYTES, + [SW_PAYLOAD] = SW_BTH_BYTES + + SW_ATMACK_BYTES + SW_AETH_BYTES, + } + }, + [IB_OPCODE_RC_COMPARE_SWAP] = { + .name = "IB_OPCODE_RC_COMPARE_SWAP", + .mask = SW_ATMETH_MASK | SW_REQ_MASK | SW_ATOMIC_MASK + | SW_START_MASK | SW_END_MASK, + .length = SW_BTH_BYTES + SW_ATMETH_BYTES, + .offset = { + [SW_BTH] = 0, + [SW_ATMETH] = SW_BTH_BYTES, + [SW_PAYLOAD] = SW_BTH_BYTES + + SW_ATMETH_BYTES, + } + }, + [IB_OPCODE_RC_FETCH_ADD] = { + .name = "IB_OPCODE_RC_FETCH_ADD", + .mask = SW_ATMETH_MASK | SW_REQ_MASK | SW_ATOMIC_MASK + | SW_START_MASK | SW_END_MASK, + .length = SW_BTH_BYTES + SW_ATMETH_BYTES, + .offset = { + [SW_BTH] = 0, + [SW_ATMETH] = SW_BTH_BYTES, + [SW_PAYLOAD] = SW_BTH_BYTES + + SW_ATMETH_BYTES, + } + }, + [IB_OPCODE_RC_SEND_LAST_WITH_INVALIDATE] = { + .name = "IB_OPCODE_RC_SEND_LAST_WITH_INVALIDATE", + .mask = SW_IETH_MASK | SW_PAYLOAD_MASK | SW_REQ_MASK + | SW_COMP_MASK | SW_SEND_MASK | SW_END_MASK, + .length = SW_BTH_BYTES + SW_IETH_BYTES, + .offset = { + [SW_BTH] = 0, + [SW_IETH] = SW_BTH_BYTES, + [SW_PAYLOAD] = SW_BTH_BYTES + + SW_IETH_BYTES, + } + }, + [IB_OPCODE_RC_SEND_ONLY_WITH_INVALIDATE] = { + .name = "IB_OPCODE_RC_SEND_ONLY_INV", + .mask = SW_IETH_MASK | SW_PAYLOAD_MASK | SW_REQ_MASK + | SW_COMP_MASK | SW_RWR_MASK | SW_SEND_MASK + | SW_END_MASK | SW_START_MASK, + .length = SW_BTH_BYTES + SW_IETH_BYTES, + .offset = { + [SW_BTH] = 0, + [SW_IETH] = SW_BTH_BYTES, + [SW_PAYLOAD] = SW_BTH_BYTES + + SW_IETH_BYTES, + } + }, + + /* UC */ + [IB_OPCODE_UC_SEND_FIRST] = { + .name = "IB_OPCODE_UC_SEND_FIRST", + .mask = SW_PAYLOAD_MASK | SW_REQ_MASK | SW_RWR_MASK + | SW_SEND_MASK | SW_START_MASK, + .length = SW_BTH_BYTES, + .offset = { + [SW_BTH] = 0, + [SW_PAYLOAD] = SW_BTH_BYTES, + } + }, + [IB_OPCODE_UC_SEND_MIDDLE] = { + .name = "IB_OPCODE_UC_SEND_MIDDLE", + .mask = SW_PAYLOAD_MASK | SW_REQ_MASK | SW_SEND_MASK + | SW_MIDDLE_MASK, + .length = SW_BTH_BYTES, + .offset = { + [SW_BTH] = 0, + [SW_PAYLOAD] = SW_BTH_BYTES, + } + }, + [IB_OPCODE_UC_SEND_LAST] = { + .name = "IB_OPCODE_UC_SEND_LAST", + .mask = SW_PAYLOAD_MASK | SW_REQ_MASK | SW_COMP_MASK + | SW_SEND_MASK | SW_END_MASK, + .length = SW_BTH_BYTES, + .offset = { + [SW_BTH] = 0, + [SW_PAYLOAD] = SW_BTH_BYTES, + } + }, + [IB_OPCODE_UC_SEND_LAST_WITH_IMMEDIATE] = { + .name = "IB_OPCODE_UC_SEND_LAST_WITH_IMMEDIATE", + .mask = SW_IMMDT_MASK | SW_PAYLOAD_MASK | SW_REQ_MASK + | SW_COMP_MASK | SW_SEND_MASK | SW_END_MASK, + .length = SW_BTH_BYTES + SW_IMMDT_BYTES, + .offset = { + [SW_BTH] = 0, + [SW_IMMDT] = SW_BTH_BYTES, + [SW_PAYLOAD] = SW_BTH_BYTES + + SW_IMMDT_BYTES, + } + }, + [IB_OPCODE_UC_SEND_ONLY] = { + .name = "IB_OPCODE_UC_SEND_ONLY", + .mask = SW_PAYLOAD_MASK | SW_REQ_MASK | SW_COMP_MASK + | SW_RWR_MASK | SW_SEND_MASK + | SW_START_MASK | SW_END_MASK, + .length = SW_BTH_BYTES, + .offset = { + [SW_BTH] = 0, + [SW_PAYLOAD] = SW_BTH_BYTES, + } + }, + [IB_OPCODE_UC_SEND_ONLY_WITH_IMMEDIATE] = { + .name = "IB_OPCODE_UC_SEND_ONLY_WITH_IMMEDIATE", + .mask = SW_IMMDT_MASK | SW_PAYLOAD_MASK | SW_REQ_MASK + | SW_COMP_MASK | SW_RWR_MASK | SW_SEND_MASK + | SW_START_MASK | SW_END_MASK, + .length = SW_BTH_BYTES + SW_IMMDT_BYTES, + .offset = { + [SW_BTH] = 0, + [SW_IMMDT] = SW_BTH_BYTES, + [SW_PAYLOAD] = SW_BTH_BYTES + + SW_IMMDT_BYTES, + } + }, + [IB_OPCODE_UC_RDMA_WRITE_FIRST] = { + .name = "IB_OPCODE_UC_RDMA_WRITE_FIRST", + .mask = SW_RETH_MASK | SW_PAYLOAD_MASK | SW_REQ_MASK + | SW_WRITE_MASK | SW_START_MASK, + .length = SW_BTH_BYTES + SW_RETH_BYTES, + .offset = { + [SW_BTH] = 0, + [SW_RETH] = SW_BTH_BYTES, + [SW_PAYLOAD] = SW_BTH_BYTES + + SW_RETH_BYTES, + } + }, + [IB_OPCODE_UC_RDMA_WRITE_MIDDLE] = { + .name = "IB_OPCODE_UC_RDMA_WRITE_MIDDLE", + .mask = SW_PAYLOAD_MASK | SW_REQ_MASK | SW_WRITE_MASK + | SW_MIDDLE_MASK, + .length = SW_BTH_BYTES, + .offset = { + [SW_BTH] = 0, + [SW_PAYLOAD] = SW_BTH_BYTES, + } + }, + [IB_OPCODE_UC_RDMA_WRITE_LAST] = { + .name = "IB_OPCODE_UC_RDMA_WRITE_LAST", + .mask = SW_PAYLOAD_MASK | SW_REQ_MASK | SW_WRITE_MASK + | SW_END_MASK, + .length = SW_BTH_BYTES, + .offset = { + [SW_BTH] = 0, + [SW_PAYLOAD] = SW_BTH_BYTES, + } + }, + [IB_OPCODE_UC_RDMA_WRITE_LAST_WITH_IMMEDIATE] = { + .name = "IB_OPCODE_UC_RDMA_WRITE_LAST_WITH_IMMEDIATE", + .mask = SW_IMMDT_MASK | SW_PAYLOAD_MASK | SW_REQ_MASK + | SW_WRITE_MASK | SW_COMP_MASK | SW_RWR_MASK + | SW_END_MASK, + .length = SW_BTH_BYTES + SW_IMMDT_BYTES, + .offset = { + [SW_BTH] = 0, + [SW_IMMDT] = SW_BTH_BYTES, + [SW_PAYLOAD] = SW_BTH_BYTES + + SW_IMMDT_BYTES, + } + }, + [IB_OPCODE_UC_RDMA_WRITE_ONLY] = { + .name = "IB_OPCODE_UC_RDMA_WRITE_ONLY", + .mask = SW_RETH_MASK | SW_PAYLOAD_MASK | SW_REQ_MASK + | SW_WRITE_MASK | SW_START_MASK + | SW_END_MASK, + .length = SW_BTH_BYTES + SW_RETH_BYTES, + .offset = { + [SW_BTH] = 0, + [SW_RETH] = SW_BTH_BYTES, + [SW_PAYLOAD] = SW_BTH_BYTES + + SW_RETH_BYTES, + } + }, + [IB_OPCODE_UC_RDMA_WRITE_ONLY_WITH_IMMEDIATE] = { + .name = "IB_OPCODE_UC_RDMA_WRITE_ONLY_WITH_IMMEDIATE", + .mask = SW_RETH_MASK | SW_IMMDT_MASK | SW_PAYLOAD_MASK + | SW_REQ_MASK | SW_WRITE_MASK + | SW_COMP_MASK | SW_RWR_MASK + | SW_START_MASK | SW_END_MASK, + .length = SW_BTH_BYTES + SW_IMMDT_BYTES + SW_RETH_BYTES, + .offset = { + [SW_BTH] = 0, + [SW_RETH] = SW_BTH_BYTES, + [SW_IMMDT] = SW_BTH_BYTES + + SW_RETH_BYTES, + [SW_PAYLOAD] = SW_BTH_BYTES + + SW_RETH_BYTES + + SW_IMMDT_BYTES, + } + }, + + /* RD */ + [IB_OPCODE_RD_SEND_FIRST] = { + .name = "IB_OPCODE_RD_SEND_FIRST", + .mask = SW_RDETH_MASK | SW_DETH_MASK | SW_PAYLOAD_MASK + | SW_REQ_MASK | SW_RWR_MASK | SW_SEND_MASK + | SW_START_MASK, + .length = SW_BTH_BYTES + SW_DETH_BYTES + SW_RDETH_BYTES, + .offset = { + [SW_BTH] = 0, + [SW_RDETH] = SW_BTH_BYTES, + [SW_DETH] = SW_BTH_BYTES + + SW_RDETH_BYTES, + [SW_PAYLOAD] = SW_BTH_BYTES + + SW_RDETH_BYTES + + SW_DETH_BYTES, + } + }, + [IB_OPCODE_RD_SEND_MIDDLE] = { + .name = "IB_OPCODE_RD_SEND_MIDDLE", + .mask = SW_RDETH_MASK | SW_DETH_MASK | SW_PAYLOAD_MASK + | SW_REQ_MASK | SW_SEND_MASK + | SW_MIDDLE_MASK, + .length = SW_BTH_BYTES + SW_DETH_BYTES + SW_RDETH_BYTES, + .offset = { + [SW_BTH] = 0, + [SW_RDETH] = SW_BTH_BYTES, + [SW_DETH] = SW_BTH_BYTES + + SW_RDETH_BYTES, + [SW_PAYLOAD] = SW_BTH_BYTES + + SW_RDETH_BYTES + + SW_DETH_BYTES, + } + }, + [IB_OPCODE_RD_SEND_LAST] = { + .name = "IB_OPCODE_RD_SEND_LAST", + .mask = SW_RDETH_MASK | SW_DETH_MASK | SW_PAYLOAD_MASK + | SW_REQ_MASK | SW_COMP_MASK | SW_SEND_MASK + | SW_END_MASK, + .length = SW_BTH_BYTES + SW_DETH_BYTES + SW_RDETH_BYTES, + .offset = { + [SW_BTH] = 0, + [SW_RDETH] = SW_BTH_BYTES, + [SW_DETH] = SW_BTH_BYTES + + SW_RDETH_BYTES, + [SW_PAYLOAD] = SW_BTH_BYTES + + SW_RDETH_BYTES + + SW_DETH_BYTES, + } + }, + [IB_OPCODE_RD_SEND_LAST_WITH_IMMEDIATE] = { + .name = "IB_OPCODE_RD_SEND_LAST_WITH_IMMEDIATE", + .mask = SW_RDETH_MASK | SW_DETH_MASK | SW_IMMDT_MASK + | SW_PAYLOAD_MASK | SW_REQ_MASK + | SW_COMP_MASK | SW_SEND_MASK + | SW_END_MASK, + .length = SW_BTH_BYTES + SW_IMMDT_BYTES + SW_DETH_BYTES + + SW_RDETH_BYTES, + .offset = { + [SW_BTH] = 0, + [SW_RDETH] = SW_BTH_BYTES, + [SW_DETH] = SW_BTH_BYTES + + SW_RDETH_BYTES, + [SW_IMMDT] = SW_BTH_BYTES + + SW_RDETH_BYTES + + SW_DETH_BYTES, + [SW_PAYLOAD] = SW_BTH_BYTES + + SW_RDETH_BYTES + + SW_DETH_BYTES + + SW_IMMDT_BYTES, + } + }, + [IB_OPCODE_RD_SEND_ONLY] = { + .name = "IB_OPCODE_RD_SEND_ONLY", + .mask = SW_RDETH_MASK | SW_DETH_MASK | SW_PAYLOAD_MASK + | SW_REQ_MASK | SW_COMP_MASK | SW_RWR_MASK + | SW_SEND_MASK | SW_START_MASK | SW_END_MASK, + .length = SW_BTH_BYTES + SW_DETH_BYTES + SW_RDETH_BYTES, + .offset = { + [SW_BTH] = 0, + [SW_RDETH] = SW_BTH_BYTES, + [SW_DETH] = SW_BTH_BYTES + + SW_RDETH_BYTES, + [SW_PAYLOAD] = SW_BTH_BYTES + + SW_RDETH_BYTES + + SW_DETH_BYTES, + } + }, + [IB_OPCODE_RD_SEND_ONLY_WITH_IMMEDIATE] = { + .name = "IB_OPCODE_RD_SEND_ONLY_WITH_IMMEDIATE", + .mask = SW_RDETH_MASK | SW_DETH_MASK | SW_IMMDT_MASK + | SW_PAYLOAD_MASK | SW_REQ_MASK + | SW_COMP_MASK | SW_RWR_MASK | SW_SEND_MASK + | SW_START_MASK | SW_END_MASK, + .length = SW_BTH_BYTES + SW_IMMDT_BYTES + SW_DETH_BYTES + + SW_RDETH_BYTES, + .offset = { + [SW_BTH] = 0, + [SW_RDETH] = SW_BTH_BYTES, + [SW_DETH] = SW_BTH_BYTES + + SW_RDETH_BYTES, + [SW_IMMDT] = SW_BTH_BYTES + + SW_RDETH_BYTES + + SW_DETH_BYTES, + [SW_PAYLOAD] = SW_BTH_BYTES + + SW_RDETH_BYTES + + SW_DETH_BYTES + + SW_IMMDT_BYTES, + } + }, + [IB_OPCODE_RD_RDMA_WRITE_FIRST] = { + .name = "IB_OPCODE_RD_RDMA_WRITE_FIRST", + .mask = SW_RDETH_MASK | SW_DETH_MASK | SW_RETH_MASK + | SW_PAYLOAD_MASK | SW_REQ_MASK + | SW_WRITE_MASK | SW_START_MASK, + .length = SW_BTH_BYTES + SW_RETH_BYTES + SW_DETH_BYTES + + SW_RDETH_BYTES, + .offset = { + [SW_BTH] = 0, + [SW_RDETH] = SW_BTH_BYTES, + [SW_DETH] = SW_BTH_BYTES + + SW_RDETH_BYTES, + [SW_RETH] = SW_BTH_BYTES + + SW_RDETH_BYTES + + SW_DETH_BYTES, + [SW_PAYLOAD] = SW_BTH_BYTES + + SW_RDETH_BYTES + + SW_DETH_BYTES + + SW_RETH_BYTES, + } + }, + [IB_OPCODE_RD_RDMA_WRITE_MIDDLE] = { + .name = "IB_OPCODE_RD_RDMA_WRITE_MIDDLE", + .mask = SW_RDETH_MASK | SW_DETH_MASK | SW_PAYLOAD_MASK + | SW_REQ_MASK | SW_WRITE_MASK + | SW_MIDDLE_MASK, + .length = SW_BTH_BYTES + SW_DETH_BYTES + SW_RDETH_BYTES, + .offset = { + [SW_BTH] = 0, + [SW_RDETH] = SW_BTH_BYTES, + [SW_DETH] = SW_BTH_BYTES + + SW_RDETH_BYTES, + [SW_PAYLOAD] = SW_BTH_BYTES + + SW_RDETH_BYTES + + SW_DETH_BYTES, + } + }, + [IB_OPCODE_RD_RDMA_WRITE_LAST] = { + .name = "IB_OPCODE_RD_RDMA_WRITE_LAST", + .mask = SW_RDETH_MASK | SW_DETH_MASK | SW_PAYLOAD_MASK + | SW_REQ_MASK | SW_WRITE_MASK + | SW_END_MASK, + .length = SW_BTH_BYTES + SW_DETH_BYTES + SW_RDETH_BYTES, + .offset = { + [SW_BTH] = 0, + [SW_RDETH] = SW_BTH_BYTES, + [SW_DETH] = SW_BTH_BYTES + + SW_RDETH_BYTES, + [SW_PAYLOAD] = SW_BTH_BYTES + + SW_RDETH_BYTES + + SW_DETH_BYTES, + } + }, + [IB_OPCODE_RD_RDMA_WRITE_LAST_WITH_IMMEDIATE] = { + .name = "IB_OPCODE_RD_RDMA_WRITE_LAST_WITH_IMMEDIATE", + .mask = SW_RDETH_MASK | SW_DETH_MASK | SW_IMMDT_MASK + | SW_PAYLOAD_MASK | SW_REQ_MASK + | SW_WRITE_MASK | SW_COMP_MASK | SW_RWR_MASK + | SW_END_MASK, + .length = SW_BTH_BYTES + SW_IMMDT_BYTES + SW_DETH_BYTES + + SW_RDETH_BYTES, + .offset = { + [SW_BTH] = 0, + [SW_RDETH] = SW_BTH_BYTES, + [SW_DETH] = SW_BTH_BYTES + + SW_RDETH_BYTES, + [SW_IMMDT] = SW_BTH_BYTES + + SW_RDETH_BYTES + + SW_DETH_BYTES, + [SW_PAYLOAD] = SW_BTH_BYTES + + SW_RDETH_BYTES + + SW_DETH_BYTES + + SW_IMMDT_BYTES, + } + }, + [IB_OPCODE_RD_RDMA_WRITE_ONLY] = { + .name = "IB_OPCODE_RD_RDMA_WRITE_ONLY", + .mask = SW_RDETH_MASK | SW_DETH_MASK | SW_RETH_MASK + | SW_PAYLOAD_MASK | SW_REQ_MASK + | SW_WRITE_MASK | SW_START_MASK + | SW_END_MASK, + .length = SW_BTH_BYTES + SW_RETH_BYTES + SW_DETH_BYTES + + SW_RDETH_BYTES, + .offset = { + [SW_BTH] = 0, + [SW_RDETH] = SW_BTH_BYTES, + [SW_DETH] = SW_BTH_BYTES + + SW_RDETH_BYTES, + [SW_RETH] = SW_BTH_BYTES + + SW_RDETH_BYTES + + SW_DETH_BYTES, + [SW_PAYLOAD] = SW_BTH_BYTES + + SW_RDETH_BYTES + + SW_DETH_BYTES + + SW_RETH_BYTES, + } + }, + [IB_OPCODE_RD_RDMA_WRITE_ONLY_WITH_IMMEDIATE] = { + .name = "IB_OPCODE_RD_RDMA_WRITE_ONLY_WITH_IMMEDIATE", + .mask = SW_RDETH_MASK | SW_DETH_MASK | SW_RETH_MASK + | SW_IMMDT_MASK | SW_PAYLOAD_MASK + | SW_REQ_MASK | SW_WRITE_MASK + | SW_COMP_MASK | SW_RWR_MASK + | SW_START_MASK | SW_END_MASK, + .length = SW_BTH_BYTES + SW_IMMDT_BYTES + SW_RETH_BYTES + + SW_DETH_BYTES + SW_RDETH_BYTES, + .offset = { + [SW_BTH] = 0, + [SW_RDETH] = SW_BTH_BYTES, + [SW_DETH] = SW_BTH_BYTES + + SW_RDETH_BYTES, + [SW_RETH] = SW_BTH_BYTES + + SW_RDETH_BYTES + + SW_DETH_BYTES, + [SW_IMMDT] = SW_BTH_BYTES + + SW_RDETH_BYTES + + SW_DETH_BYTES + + SW_RETH_BYTES, + [SW_PAYLOAD] = SW_BTH_BYTES + + SW_RDETH_BYTES + + SW_DETH_BYTES + + SW_RETH_BYTES + + SW_IMMDT_BYTES, + } + }, + [IB_OPCODE_RD_RDMA_READ_REQUEST] = { + .name = "IB_OPCODE_RD_RDMA_READ_REQUEST", + .mask = SW_RDETH_MASK | SW_DETH_MASK | SW_RETH_MASK + | SW_REQ_MASK | SW_READ_MASK + | SW_START_MASK | SW_END_MASK, + .length = SW_BTH_BYTES + SW_RETH_BYTES + SW_DETH_BYTES + + SW_RDETH_BYTES, + .offset = { + [SW_BTH] = 0, + [SW_RDETH] = SW_BTH_BYTES, + [SW_DETH] = SW_BTH_BYTES + + SW_RDETH_BYTES, + [SW_RETH] = SW_BTH_BYTES + + SW_RDETH_BYTES + + SW_DETH_BYTES, + [SW_PAYLOAD] = SW_BTH_BYTES + + SW_RETH_BYTES + + SW_DETH_BYTES + + SW_RDETH_BYTES, + } + }, + [IB_OPCODE_RD_RDMA_READ_RESPONSE_FIRST] = { + .name = "IB_OPCODE_RD_RDMA_READ_RESPONSE_FIRST", + .mask = SW_RDETH_MASK | SW_AETH_MASK + | SW_PAYLOAD_MASK | SW_ACK_MASK + | SW_START_MASK, + .length = SW_BTH_BYTES + SW_AETH_BYTES + SW_RDETH_BYTES, + .offset = { + [SW_BTH] = 0, + [SW_RDETH] = SW_BTH_BYTES, + [SW_AETH] = SW_BTH_BYTES + + SW_RDETH_BYTES, + [SW_PAYLOAD] = SW_BTH_BYTES + + SW_RDETH_BYTES + + SW_AETH_BYTES, + } + }, + [IB_OPCODE_RD_RDMA_READ_RESPONSE_MIDDLE] = { + .name = "IB_OPCODE_RD_RDMA_READ_RESPONSE_MIDDLE", + .mask = SW_RDETH_MASK | SW_PAYLOAD_MASK | SW_ACK_MASK + | SW_MIDDLE_MASK, + .length = SW_BTH_BYTES + SW_RDETH_BYTES, + .offset = { + [SW_BTH] = 0, + [SW_RDETH] = SW_BTH_BYTES, + [SW_PAYLOAD] = SW_BTH_BYTES + + SW_RDETH_BYTES, + } + }, + [IB_OPCODE_RD_RDMA_READ_RESPONSE_LAST] = { + .name = "IB_OPCODE_RD_RDMA_READ_RESPONSE_LAST", + .mask = SW_RDETH_MASK | SW_AETH_MASK | SW_PAYLOAD_MASK + | SW_ACK_MASK | SW_END_MASK, + .length = SW_BTH_BYTES + SW_AETH_BYTES + SW_RDETH_BYTES, + .offset = { + [SW_BTH] = 0, + [SW_RDETH] = SW_BTH_BYTES, + [SW_AETH] = SW_BTH_BYTES + + SW_RDETH_BYTES, + [SW_PAYLOAD] = SW_BTH_BYTES + + SW_RDETH_BYTES + + SW_AETH_BYTES, + } + }, + [IB_OPCODE_RD_RDMA_READ_RESPONSE_ONLY] = { + .name = "IB_OPCODE_RD_RDMA_READ_RESPONSE_ONLY", + .mask = SW_RDETH_MASK | SW_AETH_MASK | SW_PAYLOAD_MASK + | SW_ACK_MASK | SW_START_MASK | SW_END_MASK, + .length = SW_BTH_BYTES + SW_AETH_BYTES + SW_RDETH_BYTES, + .offset = { + [SW_BTH] = 0, + [SW_RDETH] = SW_BTH_BYTES, + [SW_AETH] = SW_BTH_BYTES + + SW_RDETH_BYTES, + [SW_PAYLOAD] = SW_BTH_BYTES + + SW_RDETH_BYTES + + SW_AETH_BYTES, + } + }, + [IB_OPCODE_RD_ACKNOWLEDGE] = { + .name = "IB_OPCODE_RD_ACKNOWLEDGE", + .mask = SW_RDETH_MASK | SW_AETH_MASK | SW_ACK_MASK + | SW_START_MASK | SW_END_MASK, + .length = SW_BTH_BYTES + SW_AETH_BYTES + SW_RDETH_BYTES, + .offset = { + [SW_BTH] = 0, + [SW_RDETH] = SW_BTH_BYTES, + [SW_AETH] = SW_BTH_BYTES + + SW_RDETH_BYTES, + } + }, + [IB_OPCODE_RD_ATOMIC_ACKNOWLEDGE] = { + .name = "IB_OPCODE_RD_ATOMIC_ACKNOWLEDGE", + .mask = SW_RDETH_MASK | SW_AETH_MASK | SW_ATMACK_MASK + | SW_ACK_MASK | SW_START_MASK | SW_END_MASK, + .length = SW_BTH_BYTES + SW_ATMACK_BYTES + SW_AETH_BYTES + + SW_RDETH_BYTES, + .offset = { + [SW_BTH] = 0, + [SW_RDETH] = SW_BTH_BYTES, + [SW_AETH] = SW_BTH_BYTES + + SW_RDETH_BYTES, + [SW_ATMACK] = SW_BTH_BYTES + + SW_RDETH_BYTES + + SW_AETH_BYTES, + } + }, + [IB_OPCODE_RD_COMPARE_SWAP] = { + .name = "RD_COMPARE_SWAP", + .mask = SW_RDETH_MASK | SW_DETH_MASK | SW_ATMETH_MASK + | SW_REQ_MASK | SW_ATOMIC_MASK + | SW_START_MASK | SW_END_MASK, + .length = SW_BTH_BYTES + SW_ATMETH_BYTES + SW_DETH_BYTES + + SW_RDETH_BYTES, + .offset = { + [SW_BTH] = 0, + [SW_RDETH] = SW_BTH_BYTES, + [SW_DETH] = SW_BTH_BYTES + + SW_RDETH_BYTES, + [SW_ATMETH] = SW_BTH_BYTES + + SW_RDETH_BYTES + + SW_DETH_BYTES, + [SW_PAYLOAD] = SW_BTH_BYTES + + + SW_ATMETH_BYTES + + SW_DETH_BYTES + + + SW_RDETH_BYTES, + } + }, + [IB_OPCODE_RD_FETCH_ADD] = { + .name = "IB_OPCODE_RD_FETCH_ADD", + .mask = SW_RDETH_MASK | SW_DETH_MASK | SW_ATMETH_MASK + | SW_REQ_MASK | SW_ATOMIC_MASK + | SW_START_MASK | SW_END_MASK, + .length = SW_BTH_BYTES + SW_ATMETH_BYTES + SW_DETH_BYTES + + SW_RDETH_BYTES, + .offset = { + [SW_BTH] = 0, + [SW_RDETH] = SW_BTH_BYTES, + [SW_DETH] = SW_BTH_BYTES + + SW_RDETH_BYTES, + [SW_ATMETH] = SW_BTH_BYTES + + SW_RDETH_BYTES + + SW_DETH_BYTES, + [SW_PAYLOAD] = SW_BTH_BYTES + + + SW_ATMETH_BYTES + + SW_DETH_BYTES + + + SW_RDETH_BYTES, + } + }, + + /* UD */ + [IB_OPCODE_UD_SEND_ONLY] = { + .name = "IB_OPCODE_UD_SEND_ONLY", + .mask = SW_DETH_MASK | SW_PAYLOAD_MASK | SW_REQ_MASK + | SW_COMP_MASK | SW_RWR_MASK | SW_SEND_MASK + | SW_START_MASK | SW_END_MASK, + .length = SW_BTH_BYTES + SW_DETH_BYTES, + .offset = { + [SW_BTH] = 0, + [SW_DETH] = SW_BTH_BYTES, + [SW_PAYLOAD] = SW_BTH_BYTES + + SW_DETH_BYTES, + } + }, + [IB_OPCODE_UD_SEND_ONLY_WITH_IMMEDIATE] = { + .name = "IB_OPCODE_UD_SEND_ONLY_WITH_IMMEDIATE", + .mask = SW_DETH_MASK | SW_IMMDT_MASK | SW_PAYLOAD_MASK + | SW_REQ_MASK | SW_COMP_MASK | SW_RWR_MASK + | SW_SEND_MASK | SW_START_MASK | SW_END_MASK, + .length = SW_BTH_BYTES + SW_IMMDT_BYTES + SW_DETH_BYTES, + .offset = { + [SW_BTH] = 0, + [SW_DETH] = SW_BTH_BYTES, + [SW_IMMDT] = SW_BTH_BYTES + + SW_DETH_BYTES, + [SW_PAYLOAD] = SW_BTH_BYTES + + SW_DETH_BYTES + + SW_IMMDT_BYTES, + } + }, + +}; diff --git a/drivers/infiniband/hw/erdma/compat/sw_opcode.h b/drivers/infiniband/hw/erdma/compat/sw_opcode.h new file mode 100644 index 0000000000000000000000000000000000000000..14b3cf582ec910308a7852b76f9a98a42c392d9b --- /dev/null +++ b/drivers/infiniband/hw/erdma/compat/sw_opcode.h @@ -0,0 +1,102 @@ +/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */ +/* + * Copyright (c) 2016 Mellanox Technologies Ltd. All rights reserved. + * Copyright (c) 2015 System Fabric Works, Inc. All rights reserved. + */ + +#ifndef SW_OPCODE_H +#define SW_OPCODE_H + +/* + * contains header bit mask definitions and header lengths + * declaration of the sw_opcode_info struct and + * sw_wr_opcode_info struct + */ + +enum sw_wr_mask { + WR_INLINE_MASK = BIT(0), + WR_ATOMIC_MASK = BIT(1), + WR_SEND_MASK = BIT(2), + WR_READ_MASK = BIT(3), + WR_WRITE_MASK = BIT(4), + WR_LOCAL_MASK = BIT(5), + WR_REG_MASK = BIT(6), + + WR_READ_OR_WRITE_MASK = WR_READ_MASK | WR_WRITE_MASK, + WR_READ_WRITE_OR_SEND_MASK = WR_READ_OR_WRITE_MASK | WR_SEND_MASK, + WR_WRITE_OR_SEND_MASK = WR_WRITE_MASK | WR_SEND_MASK, + WR_ATOMIC_OR_READ_MASK = WR_ATOMIC_MASK | WR_READ_MASK, +}; + +#define WR_MAX_QPT (8) + +struct sw_wr_opcode_info { + char *name; + enum sw_wr_mask mask[WR_MAX_QPT]; +}; + +extern struct sw_wr_opcode_info sw_wr_opcode_info[]; + +enum sw_hdr_type { + SW_LRH, + SW_GRH, + SW_BTH, + SW_RETH, + SW_AETH, + SW_ATMETH, + SW_ATMACK, + SW_IETH, + SW_RDETH, + SW_DETH, + SW_IMMDT, + SW_PAYLOAD, + NUM_HDR_TYPES +}; + +enum sw_hdr_mask { + SW_LRH_MASK = BIT(SW_LRH), + SW_GRH_MASK = BIT(SW_GRH), + SW_BTH_MASK = BIT(SW_BTH), + SW_IMMDT_MASK = BIT(SW_IMMDT), + SW_RETH_MASK = BIT(SW_RETH), + SW_AETH_MASK = BIT(SW_AETH), + SW_ATMETH_MASK = BIT(SW_ATMETH), + SW_ATMACK_MASK = BIT(SW_ATMACK), + SW_IETH_MASK = BIT(SW_IETH), + SW_RDETH_MASK = BIT(SW_RDETH), + SW_DETH_MASK = BIT(SW_DETH), + SW_PAYLOAD_MASK = BIT(SW_PAYLOAD), + + SW_REQ_MASK = BIT(NUM_HDR_TYPES + 0), + SW_ACK_MASK = BIT(NUM_HDR_TYPES + 1), + SW_SEND_MASK = BIT(NUM_HDR_TYPES + 2), + SW_WRITE_MASK = BIT(NUM_HDR_TYPES + 3), + SW_READ_MASK = BIT(NUM_HDR_TYPES + 4), + SW_ATOMIC_MASK = BIT(NUM_HDR_TYPES + 5), + + SW_RWR_MASK = BIT(NUM_HDR_TYPES + 6), + SW_COMP_MASK = BIT(NUM_HDR_TYPES + 7), + + SW_START_MASK = BIT(NUM_HDR_TYPES + 8), + SW_MIDDLE_MASK = BIT(NUM_HDR_TYPES + 9), + SW_END_MASK = BIT(NUM_HDR_TYPES + 10), + + SW_LOOPBACK_MASK = BIT(NUM_HDR_TYPES + 12), + + SW_READ_OR_ATOMIC = (SW_READ_MASK | SW_ATOMIC_MASK), + SW_WRITE_OR_SEND = (SW_WRITE_MASK | SW_SEND_MASK), +}; + +#define OPCODE_NONE (-1) +#define SW_NUM_OPCODE 256 + +struct sw_opcode_info { + char *name; + enum sw_hdr_mask mask; + int length; + int offset[NUM_HDR_TYPES]; +}; + +extern struct sw_opcode_info sw_opcode[SW_NUM_OPCODE]; + +#endif /* SW_OPCODE_H */ diff --git a/drivers/infiniband/hw/erdma/compat/sw_param.h b/drivers/infiniband/hw/erdma/compat/sw_param.h new file mode 100644 index 0000000000000000000000000000000000000000..1bdb7d74ef27d6908c844e63fbe8e63b80a83231 --- /dev/null +++ b/drivers/infiniband/hw/erdma/compat/sw_param.h @@ -0,0 +1,139 @@ +/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */ +/* + * Copyright (c) 2016 Mellanox Technologies Ltd. All rights reserved. + * Copyright (c) 2015 System Fabric Works, Inc. All rights reserved. + */ + +#ifndef SW_PARAM_H +#define SW_PARAM_H + +#include "rdma_user_sw.h" + +static inline enum ib_mtu sw_mtu_int_to_enum(int mtu) +{ + if (mtu < 256) + return 0; + else if (mtu < 512) + return IB_MTU_256; + else if (mtu < 1024) + return IB_MTU_512; + else if (mtu < 2048) + return IB_MTU_1024; + else if (mtu < 4096) + return IB_MTU_2048; + else + return IB_MTU_4096; +} + +/* lack */ +#ifndef SW_MAX_HDR_LENGTH +#define SW_MAX_HDR_LENGTH (80) +#endif + +/* Find the IB mtu for a given network MTU. */ +static inline enum ib_mtu eth_mtu_int_to_enum(int mtu) +{ + mtu -= SW_MAX_HDR_LENGTH; + + return sw_mtu_int_to_enum(mtu); +} + +/* default/initial sw device parameter settings */ +enum sw_device_param { + SW_MAX_MR_SIZE = -1ull, + SW_PAGE_SIZE_CAP = 0xfffff000, + SW_MAX_QP = 0x10000, + SW_MAX_QP_WR = 0x4000, + SW_DEVICE_CAP_FLAGS = IB_DEVICE_BAD_PKEY_CNTR + | IB_DEVICE_BAD_QKEY_CNTR + | IB_DEVICE_AUTO_PATH_MIG + | IB_DEVICE_CHANGE_PHY_PORT + | IB_DEVICE_UD_AV_PORT_ENFORCE + | IB_DEVICE_PORT_ACTIVE_EVENT + | IB_DEVICE_SYS_IMAGE_GUID + | IB_DEVICE_RC_RNR_NAK_GEN + | IB_DEVICE_SRQ_RESIZE + | IB_DEVICE_MEM_MGT_EXTENSIONS, + SW_MAX_SGE = 32, + SW_MAX_WQE_SIZE = sizeof(struct sw_send_wqe) + + sizeof(struct ib_sge) * SW_MAX_SGE, + SW_MAX_INLINE_DATA = SW_MAX_WQE_SIZE - + sizeof(struct sw_send_wqe), + SW_MAX_SGE_RD = 32, + SW_MAX_CQ = 16384, + SW_MAX_LOG_CQE = 15, + SW_MAX_MR = 256 * 1024, + SW_MAX_PD = 0x7ffc, + SW_MAX_QP_RD_ATOM = 128, + SW_MAX_RES_RD_ATOM = 0x3f000, + SW_MAX_QP_INIT_RD_ATOM = 128, + SW_MAX_MCAST_GRP = 8192, + SW_MAX_MCAST_QP_ATTACH = 56, + SW_MAX_TOT_MCAST_QP_ATTACH = 0x70000, + SW_MAX_AH = 10000, + SW_MAX_SRQ = 960, + SW_MAX_SRQ_WR = 0x4000, + SW_MIN_SRQ_WR = 1, + SW_MAX_SRQ_SGE = 27, + SW_MIN_SRQ_SGE = 1, + SW_MAX_FMR_PAGE_LIST_LEN = 512, + SW_MAX_PKEYS = 1, + SW_LOCAL_CA_ACK_DELAY = 15, + + SW_MAX_UCONTEXT = 512, + + SW_NUM_PORT = 1, + + SW_MIN_QP_INDEX = 16, + SW_MAX_QP_INDEX = 0x00020000, + + SW_MIN_SRQ_INDEX = 0x00020001, + SW_MAX_SRQ_INDEX = 0x00040000, + + SW_MIN_MR_INDEX = 0x00000001, + SW_MAX_MR_INDEX = 0x00040000, + SW_MIN_MW_INDEX = 0x00040001, + SW_MAX_MW_INDEX = 0x00060000, + SW_MAX_PKT_PER_ACK = 64, + + SW_MAX_UNACKED_PSNS = 128, + + /* Max inflight SKBs per queue pair */ + SW_INFLIGHT_SKBS_PER_QP_HIGH = 64, + SW_INFLIGHT_SKBS_PER_QP_LOW = 16, + + /* Delay before calling arbiter timer */ + SW_NSEC_ARB_TIMER_DELAY = 200, + + /* IBTA v1.4 A3.3.1 VENDOR INFORMATION section */ + SW_VENDOR_ID = 0XFFFFFF, +}; + +/* default/initial sw port parameters */ +enum sw_port_param { + SW_PORT_GID_TBL_LEN = 1024, + SW_PORT_PORT_CAP_FLAGS = RDMA_CORE_CAP_PROT_ROCE_UDP_ENCAP, + SW_PORT_MAX_MSG_SZ = 0x800000, + SW_PORT_BAD_PKEY_CNTR = 0, + SW_PORT_QKEY_VIOL_CNTR = 0, + SW_PORT_LID = 0, + SW_PORT_SM_LID = 0, + SW_PORT_SM_SL = 0, + SW_PORT_LMC = 0, + SW_PORT_MAX_VL_NUM = 1, + SW_PORT_SUBNET_TIMEOUT = 0, + SW_PORT_INIT_TYPE_REPLY = 0, + SW_PORT_ACTIVE_WIDTH = IB_WIDTH_1X, + SW_PORT_ACTIVE_SPEED = 1, + SW_PORT_PKEY_TBL_LEN = 1, + SW_PORT_SUBNET_PREFIX = 0xfe80000000000000ULL, +}; + +/* default/initial port info parameters */ +enum sw_port_info_param { + SW_PORT_INFO_VL_CAP = 4, /* 1-8 */ + SW_PORT_INFO_MTU_CAP = 5, /* 4096 */ + SW_PORT_INFO_OPER_VL = 1, /* 1 */ +}; + +#endif /* SW_PARAM_H */ diff --git a/drivers/infiniband/hw/erdma/compat/sw_pool.c b/drivers/infiniband/hw/erdma/compat/sw_pool.c new file mode 100644 index 0000000000000000000000000000000000000000..5235b1eff05828d8bbc5344c037e8904701e4473 --- /dev/null +++ b/drivers/infiniband/hw/erdma/compat/sw_pool.c @@ -0,0 +1,448 @@ +// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB +/* + * Copyright (c) 2016 Mellanox Technologies Ltd. All rights reserved. + * Copyright (c) 2015 System Fabric Works, Inc. All rights reserved. + */ + +#include "sw.h" +#include "sw_loc.h" + +/* info about object pools + * note that mr and mw share a single index space + * so that one can map an lkey to the correct type of object + */ +struct sw_type_info sw_type_info[SW_NUM_TYPES] = { + [SW_TYPE_UC] = { + .name = "sw-uc", + .size = sizeof(struct sw_ucontext), + .flags = SW_POOL_NO_ALLOC, + }, + [SW_TYPE_PD] = { + .name = "sw-pd", + .size = sizeof(struct sw_pd), + .flags = SW_POOL_NO_ALLOC, + }, + [SW_TYPE_AH] = { + .name = "sw-ah", + .size = sizeof(struct sw_ah), + .flags = SW_POOL_ATOMIC | SW_POOL_NO_ALLOC, + }, + [SW_TYPE_SRQ] = { + .name = "sw-srq", + .size = sizeof(struct sw_srq), + .flags = SW_POOL_INDEX | SW_POOL_NO_ALLOC, + .min_index = SW_MIN_SRQ_INDEX, + .max_index = SW_MAX_SRQ_INDEX, + }, + [SW_TYPE_QP] = { + .name = "sw-qp", + .size = sizeof(struct sw_qp), + .cleanup = sw_qp_cleanup, + .flags = SW_POOL_INDEX, + .min_index = SW_MIN_QP_INDEX, + .max_index = SW_MAX_QP_INDEX, + }, + [SW_TYPE_CQ] = { + .name = "sw-cq", + .size = sizeof(struct sw_cq), + .flags = SW_POOL_NO_ALLOC, + .cleanup = sw_cq_cleanup, + }, + [SW_TYPE_MR] = { + .name = "sw-mr", + .size = sizeof(struct sw_mem), + .cleanup = sw_mem_cleanup, + .flags = SW_POOL_INDEX, + .max_index = SW_MAX_MR_INDEX, + .min_index = SW_MIN_MR_INDEX, + }, + [SW_TYPE_MW] = { + .name = "sw-mw", + .size = sizeof(struct sw_mem), + .flags = SW_POOL_INDEX, + .max_index = SW_MAX_MW_INDEX, + .min_index = SW_MIN_MW_INDEX, + }, + [SW_TYPE_MC_GRP] = { + .name = "sw-mc_grp", + .size = sizeof(struct sw_mc_grp), + .cleanup = sw_mc_cleanup, + .flags = SW_POOL_KEY, + .key_offset = offsetof(struct sw_mc_grp, mgid), + .key_size = sizeof(union ib_gid), + }, + [SW_TYPE_MC_ELEM] = { + .name = "sw-mc_elem", + .size = sizeof(struct sw_mc_elem), + .flags = SW_POOL_ATOMIC, + }, +}; + +static inline const char *pool_name(struct sw_pool *pool) +{ + return sw_type_info[pool->type].name; +} + +static int sw_pool_init_index(struct sw_pool *pool, u32 max, u32 min) +{ + int err = 0; + size_t size; + + if ((max - min + 1) < pool->max_elem) { + pr_warn("not enough indices for max_elem\n"); + err = -EINVAL; + goto out; + } + + pool->max_index = max; + pool->min_index = min; + + size = BITS_TO_LONGS(max - min + 1) * sizeof(long); + pool->table = kmalloc(size, GFP_KERNEL); + if (!pool->table) { + err = -ENOMEM; + goto out; + } + + pool->table_size = size; + bitmap_zero(pool->table, max - min + 1); + +out: + return err; +} + +int sw_pool_init( + struct sw_dev *sw, + struct sw_pool *pool, + enum sw_elem_type type, + unsigned int max_elem) +{ + int err = 0; + size_t size = sw_type_info[type].size; + + memset(pool, 0, sizeof(*pool)); + + pool->sw = sw; + pool->type = type; + pool->max_elem = max_elem; + pool->elem_size = ALIGN(size, SW_POOL_ALIGN); + pool->flags = sw_type_info[type].flags; + pool->tree = RB_ROOT; + pool->cleanup = sw_type_info[type].cleanup; + + atomic_set(&pool->num_elem, 0); + + kref_init(&pool->ref_cnt); + + rwlock_init(&pool->pool_lock); + + if (sw_type_info[type].flags & SW_POOL_INDEX) { + err = sw_pool_init_index(pool, + sw_type_info[type].max_index, + sw_type_info[type].min_index); + if (err) + goto out; + } + + if (sw_type_info[type].flags & SW_POOL_KEY) { + pool->key_offset = sw_type_info[type].key_offset; + pool->key_size = sw_type_info[type].key_size; + } + + pool->state = SW_POOL_STATE_VALID; + +out: + return err; +} + +static void sw_pool_release(struct kref *kref) +{ + struct sw_pool *pool = container_of(kref, struct sw_pool, ref_cnt); + + pool->state = SW_POOL_STATE_INVALID; + kfree(pool->table); +} + +static void sw_pool_put(struct sw_pool *pool) +{ + kref_put(&pool->ref_cnt, sw_pool_release); +} + +void sw_pool_cleanup(struct sw_pool *pool) +{ + unsigned long flags; + + write_lock_irqsave(&pool->pool_lock, flags); + pool->state = SW_POOL_STATE_INVALID; + if (atomic_read(&pool->num_elem) > 0) + pr_warn("%s pool destroyed with unfree'd elem\n", + pool_name(pool)); + write_unlock_irqrestore(&pool->pool_lock, flags); + + sw_pool_put(pool); +} + +static u32 alloc_index(struct sw_pool *pool) +{ + u32 index; + u32 range = pool->max_index - pool->min_index + 1; + + index = find_next_zero_bit(pool->table, range, pool->last); + if (index >= range) + index = find_first_zero_bit(pool->table, range); + + WARN_ON_ONCE(index >= range); + set_bit(index, pool->table); + pool->last = index; + return index + pool->min_index; +} + +static void insert_index(struct sw_pool *pool, struct sw_pool_entry *new) +{ + struct rb_node **link = &pool->tree.rb_node; + struct rb_node *parent = NULL; + struct sw_pool_entry *elem; + + while (*link) { + parent = *link; + elem = rb_entry(parent, struct sw_pool_entry, node); + + if (elem->index == new->index) { + pr_warn("element already exists!\n"); + goto out; + } + + if (elem->index > new->index) + link = &(*link)->rb_left; + else + link = &(*link)->rb_right; + } + + rb_link_node(&new->node, parent, link); + rb_insert_color(&new->node, &pool->tree); +out: + return; +} + +static void insert_key(struct sw_pool *pool, struct sw_pool_entry *new) +{ + struct rb_node **link = &pool->tree.rb_node; + struct rb_node *parent = NULL; + struct sw_pool_entry *elem; + int cmp; + + while (*link) { + parent = *link; + elem = rb_entry(parent, struct sw_pool_entry, node); + + cmp = memcmp((u8 *)elem + pool->key_offset, + (u8 *)new + pool->key_offset, pool->key_size); + + if (cmp == 0) { + pr_warn("key already exists!\n"); + goto out; + } + + if (cmp > 0) + link = &(*link)->rb_left; + else + link = &(*link)->rb_right; + } + + rb_link_node(&new->node, parent, link); + rb_insert_color(&new->node, &pool->tree); +out: + return; +} + +void sw_add_key(void *arg, void *key) +{ + struct sw_pool_entry *elem = arg; + struct sw_pool *pool = elem->pool; + unsigned long flags; + + write_lock_irqsave(&pool->pool_lock, flags); + memcpy((u8 *)elem + pool->key_offset, key, pool->key_size); + insert_key(pool, elem); + write_unlock_irqrestore(&pool->pool_lock, flags); +} + +void sw_drop_key(void *arg) +{ + struct sw_pool_entry *elem = arg; + struct sw_pool *pool = elem->pool; + unsigned long flags; + + write_lock_irqsave(&pool->pool_lock, flags); + rb_erase(&elem->node, &pool->tree); + write_unlock_irqrestore(&pool->pool_lock, flags); +} + +void sw_add_index(void *arg) +{ + struct sw_pool_entry *elem = arg; + struct sw_pool *pool = elem->pool; + unsigned long flags; + + write_lock_irqsave(&pool->pool_lock, flags); + elem->index = alloc_index(pool); + insert_index(pool, elem); + write_unlock_irqrestore(&pool->pool_lock, flags); +} + +void sw_drop_index(void *arg) +{ + struct sw_pool_entry *elem = arg; + struct sw_pool *pool = elem->pool; + unsigned long flags; + + write_lock_irqsave(&pool->pool_lock, flags); + clear_bit(elem->index - pool->min_index, pool->table); + rb_erase(&elem->node, &pool->tree); + write_unlock_irqrestore(&pool->pool_lock, flags); +} + +void *sw_alloc(struct sw_pool *pool) +{ + struct sw_pool_entry *elem; + unsigned long flags; + + might_sleep_if(!(pool->flags & SW_POOL_ATOMIC)); + + read_lock_irqsave(&pool->pool_lock, flags); + if (pool->state != SW_POOL_STATE_VALID) { + read_unlock_irqrestore(&pool->pool_lock, flags); + return NULL; + } + kref_get(&pool->ref_cnt); + read_unlock_irqrestore(&pool->pool_lock, flags); + + if (atomic_inc_return(&pool->num_elem) > pool->max_elem) + goto out_cnt; + + elem = kzalloc(sw_type_info[pool->type].size, + (pool->flags & SW_POOL_ATOMIC) ? + GFP_ATOMIC : GFP_KERNEL); + if (!elem) + goto out_cnt; + + elem->pool = pool; + kref_init(&elem->ref_cnt); + + return elem; + +out_cnt: + atomic_dec(&pool->num_elem); + sw_pool_put(pool); + return NULL; +} + +int sw_add_to_pool(struct sw_pool *pool, struct sw_pool_entry *elem) +{ + unsigned long flags; + + might_sleep_if(!(pool->flags & SW_POOL_ATOMIC)); + + read_lock_irqsave(&pool->pool_lock, flags); + if (pool->state != SW_POOL_STATE_VALID) { + read_unlock_irqrestore(&pool->pool_lock, flags); + return -EINVAL; + } + kref_get(&pool->ref_cnt); + read_unlock_irqrestore(&pool->pool_lock, flags); + + if (atomic_inc_return(&pool->num_elem) > pool->max_elem) + goto out_cnt; + + elem->pool = pool; + kref_init(&elem->ref_cnt); + + return 0; + +out_cnt: + atomic_dec(&pool->num_elem); + sw_pool_put(pool); + return -EINVAL; +} + +void sw_elem_release(struct kref *kref) +{ + struct sw_pool_entry *elem = + container_of(kref, struct sw_pool_entry, ref_cnt); + struct sw_pool *pool = elem->pool; + + if (pool->cleanup) + pool->cleanup(elem); + + if (!(pool->flags & SW_POOL_NO_ALLOC)) + kfree(elem); + atomic_dec(&pool->num_elem); + sw_pool_put(pool); +} + +void *sw_pool_get_index(struct sw_pool *pool, u32 index) +{ + struct rb_node *node = NULL; + struct sw_pool_entry *elem = NULL; + unsigned long flags; + + read_lock_irqsave(&pool->pool_lock, flags); + + if (pool->state != SW_POOL_STATE_VALID) + goto out; + + node = pool->tree.rb_node; + + while (node) { + elem = rb_entry(node, struct sw_pool_entry, node); + + if (elem->index > index) + node = node->rb_left; + else if (elem->index < index) + node = node->rb_right; + else { + kref_get(&elem->ref_cnt); + break; + } + } + +out: + read_unlock_irqrestore(&pool->pool_lock, flags); + return node ? elem : NULL; +} + +void *sw_pool_get_key(struct sw_pool *pool, void *key) +{ + struct rb_node *node = NULL; + struct sw_pool_entry *elem = NULL; + int cmp; + unsigned long flags; + + read_lock_irqsave(&pool->pool_lock, flags); + + if (pool->state != SW_POOL_STATE_VALID) + goto out; + + node = pool->tree.rb_node; + + while (node) { + elem = rb_entry(node, struct sw_pool_entry, node); + + cmp = memcmp((u8 *)elem + pool->key_offset, + key, pool->key_size); + + if (cmp > 0) + node = node->rb_left; + else if (cmp < 0) + node = node->rb_right; + else + break; + } + + if (node) + kref_get(&elem->ref_cnt); + +out: + read_unlock_irqrestore(&pool->pool_lock, flags); + return node ? elem : NULL; +} diff --git a/drivers/infiniband/hw/erdma/compat/sw_pool.h b/drivers/infiniband/hw/erdma/compat/sw_pool.h new file mode 100644 index 0000000000000000000000000000000000000000..0b6c28d46ab17cd41a32b4915566e2128ef75c95 --- /dev/null +++ b/drivers/infiniband/hw/erdma/compat/sw_pool.h @@ -0,0 +1,135 @@ +/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */ +/* + * Copyright (c) 2016 Mellanox Technologies Ltd. All rights reserved. + * Copyright (c) 2015 System Fabric Works, Inc. All rights reserved. + */ + +#ifndef SW_POOL_H +#define SW_POOL_H + +#define SW_POOL_ALIGN (16) +#define SW_POOL_CACHE_FLAGS (0) + +enum sw_pool_flags { + SW_POOL_ATOMIC = BIT(0), + SW_POOL_INDEX = BIT(1), + SW_POOL_KEY = BIT(2), + SW_POOL_NO_ALLOC = BIT(4), +}; + +enum sw_elem_type { + SW_TYPE_UC, + SW_TYPE_PD, + SW_TYPE_AH, + SW_TYPE_SRQ, + SW_TYPE_QP, + SW_TYPE_CQ, + SW_TYPE_MR, + SW_TYPE_MW, + SW_TYPE_MC_GRP, + SW_TYPE_MC_ELEM, + SW_NUM_TYPES, /* keep me last */ +}; + +struct sw_pool_entry; + +struct sw_type_info { + const char *name; + size_t size; + void (*cleanup)(struct sw_pool_entry *obj); + enum sw_pool_flags flags; + u32 max_index; + u32 min_index; + size_t key_offset; + size_t key_size; +}; + +extern struct sw_type_info sw_type_info[]; + +enum sw_pool_state { + SW_POOL_STATE_INVALID, + SW_POOL_STATE_VALID, +}; + +struct sw_pool_entry { + struct sw_pool *pool; + struct kref ref_cnt; + struct list_head list; + + /* only used if indexed or keyed */ + struct rb_node node; + u32 index; +}; + +struct sw_pool { + struct sw_dev *sw; + rwlock_t pool_lock; /* protects pool add/del/search */ + size_t elem_size; + struct kref ref_cnt; + void (*cleanup)(struct sw_pool_entry *obj); + enum sw_pool_state state; + enum sw_pool_flags flags; + enum sw_elem_type type; + + unsigned int max_elem; + atomic_t num_elem; + + /* only used if indexed or keyed */ + struct rb_root tree; + unsigned long *table; + size_t table_size; + u32 max_index; + u32 min_index; + u32 last; + size_t key_offset; + size_t key_size; +}; + +/* initialize a pool of objects with given limit on + * number of elements. gets parameters from sw_type_info + * pool elements will be allocated out of a slab cache + */ +int sw_pool_init(struct sw_dev *sw, struct sw_pool *pool, + enum sw_elem_type type, u32 max_elem); + +/* free resources from object pool */ +void sw_pool_cleanup(struct sw_pool *pool); + +/* allocate an object from pool */ +void *sw_alloc(struct sw_pool *pool); + +/* connect already allocated object to pool */ +int sw_add_to_pool(struct sw_pool *pool, struct sw_pool_entry *elem); + +/* assign an index to an indexed object and insert object into + * pool's rb tree + */ +void sw_add_index(void *elem); + +/* drop an index and remove object from rb tree */ +void sw_drop_index(void *elem); + +/* assign a key to a keyed object and insert object into + * pool's rb tree + */ +void sw_add_key(void *elem, void *key); + +/* remove elem from rb tree */ +void sw_drop_key(void *elem); + +/* lookup an indexed object from index. takes a reference on object */ +void *sw_pool_get_index(struct sw_pool *pool, u32 index); + +/* lookup keyed object from key. takes a reference on the object */ +void *sw_pool_get_key(struct sw_pool *pool, void *key); + +/* cleanup an object when all references are dropped */ +void sw_elem_release(struct kref *kref); + +/* take a reference on an object */ +#define sw_add_ref(elem) kref_get(&(elem)->pelem.ref_cnt) + +/* drop a reference on an object */ +#define sw_drop_ref(elem) kref_put(&(elem)->pelem.ref_cnt, sw_elem_release) + +#endif /* SW_POOL_H */ diff --git a/drivers/infiniband/hw/erdma/compat/sw_qp.c b/drivers/infiniband/hw/erdma/compat/sw_qp.c new file mode 100644 index 0000000000000000000000000000000000000000..fb35d15b204f7b4fe591a9b01a988e6deb9af959 --- /dev/null +++ b/drivers/infiniband/hw/erdma/compat/sw_qp.c @@ -0,0 +1,985 @@ +// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB +/* + * Copyright (c) 2016 Mellanox Technologies Ltd. All rights reserved. + * Copyright (c) 2015 System Fabric Works, Inc. All rights reserved. + */ + +#include +#include +#include +#include +#include + +#include "sw.h" +#include "sw_loc.h" +#include "sw_queue.h" +#include "sw_task.h" +#include "../erdma_verbs.h" + +static int sw_qp_chk_cap(struct sw_dev *sw, struct ib_qp_cap *cap, + int has_srq) +{ + if (cap->max_send_wr > sw->attr.max_qp_wr) { + pr_warn("invalid send wr = %d > %d\n", + cap->max_send_wr, sw->attr.max_qp_wr); + goto err1; + } + if (cap->max_send_sge > sw->attr.max_send_sge) { + pr_warn("invalid send sge = %d > %d\n", + cap->max_send_sge, sw->attr.max_send_sge); + + goto err1; + } + + if (!has_srq) { + if (cap->max_recv_wr > sw->attr.max_qp_wr) { + pr_warn("invalid recv wr = %d > %d\n", + cap->max_recv_wr, sw->attr.max_qp_wr); + goto err1; + } + + if (cap->max_recv_sge > sw->attr.max_recv_sge) { + pr_warn("invalid recv sge = %d > %d\n", + cap->max_recv_sge, sw->attr.max_recv_sge); + goto err1; + } + } + + if (cap->max_inline_data > sw->max_inline_data) { + pr_warn("invalid max inline data = %d > %d\n", + cap->max_inline_data, sw->max_inline_data); + goto err1; + } + + return 0; + +err1: + return -EINVAL; +} + +int sw_qp_chk_init(struct sw_dev *sw, struct ib_qp_init_attr *init) +{ + struct ib_qp_cap *cap = &init->cap; + struct sw_port *port; + int port_num = init->port_num; + + if (!init->recv_cq || !init->send_cq) { + pr_warn("missing cq\n"); + goto err1; + } + + if (sw_qp_chk_cap(sw, cap, !!init->srq)) + goto err1; + + if (init->qp_type == IB_QPT_SMI || init->qp_type == IB_QPT_GSI) { + //if (!rdma_is_port_valid(&sw->ib_dev, port_num)) { + // pr_warn("invalid port = %d\n", port_num); + // goto err1; + //} + + port = &sw->port; + + if (init->qp_type == IB_QPT_SMI && port->qp_smi_index) { + pr_warn("SMI QP exists for port %d\n", port_num); + goto err1; + } + + if (init->qp_type == IB_QPT_GSI && port->qp_gsi_index) { + pr_warn("GSI QP exists for port %d\n", port_num); + goto err1; + } + } + + return 0; + +err1: + return -EINVAL; +} + +static int alloc_rd_atomic_resources(struct sw_qp *qp, unsigned int n) +{ + qp->resp.res_head = 0; + qp->resp.res_tail = 0; + qp->resp.resources = kcalloc(n, sizeof(struct resp_res), GFP_KERNEL); + + if (!qp->resp.resources) + return -ENOMEM; + + return 0; +} + +void sw_free_rd_atomic_resource(struct sw_qp *qp, struct resp_res *res) +{ + if (res->type == SW_ATOMIC_MASK) { + kfree_skb(res->atomic.skb); + } else if (res->type == SW_READ_MASK) { + if (res->read.mr) + sw_drop_ref(res->read.mr); + } + res->type = 0; +} + +static void sw_free_rd_atomic_resources(struct sw_qp *qp) +{ + if (qp->resp.resources) { + int i; + + for (i = 0; i < qp->attr.max_dest_rd_atomic; i++) { + struct resp_res *res = &qp->resp.resources[i]; + + sw_free_rd_atomic_resource(qp, res); + } + kfree(qp->resp.resources); + qp->resp.resources = NULL; + } +} + +static void cleanup_rd_atomic_resources(struct sw_qp *qp) +{ + int i; + struct resp_res *res; + + if (qp->resp.resources) { + for (i = 0; i < qp->attr.max_dest_rd_atomic; i++) { + res = &qp->resp.resources[i]; + sw_free_rd_atomic_resource(qp, res); + } + } +} + +static void sw_qp_init_misc(struct sw_dev *sw, struct sw_qp *qp, + struct ib_qp_init_attr *init) +{ + struct sw_port *port; + u32 qpn; + + qp->sq_sig_type = init->sq_sig_type; + qp->attr.path_mtu = 1; + qp->mtu = ib_mtu_enum_to_int(qp->attr.path_mtu); + + qpn = qp->pelem.index; + port = &sw->port; + + switch (init->qp_type) { + case IB_QPT_SMI: + qp->ibqp.qp_num = 0; + port->qp_smi_index = qpn; + qp->attr.port_num = init->port_num; + break; + + case IB_QPT_GSI: + qp->ibqp.qp_num = 1; + port->qp_gsi_index = qpn; + qp->attr.port_num = init->port_num; + break; + + default: + qp->ibqp.qp_num = qpn; + break; + } + + INIT_LIST_HEAD(&qp->grp_list); + + skb_queue_head_init(&qp->send_pkts); + + spin_lock_init(&qp->grp_lock); + spin_lock_init(&qp->state_lock); + + atomic_set(&qp->ssn, 0); + atomic_set(&qp->skb_out, 0); +} + +static int sw_qp_init_req(struct sw_dev *sw, struct sw_qp *qp, + struct ib_qp_init_attr *init, struct ib_udata *udata, + struct sw_create_qp_resp __user *uresp) +{ + int err; + int wqe_size; + + err = sock_create_kern(&init_net, AF_INET, SOCK_DGRAM, 0, &qp->sk); + if (err < 0) + return err; + qp->sk->sk->sk_user_data = qp; + + /* pick a source UDP port number for this QP based on + * the source QPN. this spreads traffic for different QPs + * across different NIC RX queues (while using a single + * flow for a given QP to maintain packet order). + * the port number must be in the Dynamic Ports range + * (0xc000 - 0xffff). + */ + qp->src_port = SW_ROCE_V2_SPORT + + (hash_32(qp_num(qp), 14) & 0x3fff); + qp->sq.max_wr = init->cap.max_send_wr; + + /* These caps are limited by sw_qp_chk_cap() done by the caller */ + wqe_size = max_t(int, init->cap.max_send_sge * sizeof(struct ib_sge), + init->cap.max_inline_data); + qp->sq.max_sge = init->cap.max_send_sge = + wqe_size / sizeof(struct ib_sge); + qp->sq.max_inline = init->cap.max_inline_data = wqe_size; + wqe_size += sizeof(struct sw_send_wqe); + + qp->sq.queue = sw_queue_init(sw, &qp->sq.max_wr, wqe_size); + if (!qp->sq.queue) + return -ENOMEM; + + qp->req.wqe_index = producer_index(qp->sq.queue); + qp->req.state = QP_STATE_RESET; + qp->req.opcode = -1; + qp->comp.opcode = -1; + + spin_lock_init(&qp->sq.sq_lock); + skb_queue_head_init(&qp->req_pkts); + + sw_init_task(sw, &qp->req.task, qp, + sw_requester, "req"); + sw_init_task(sw, &qp->comp.task, qp, + sw_completer, "comp"); + + qp->qp_timeout_jiffies = 0; /* Can't be set for UD/UC in modify_qp */ + if (init->qp_type == IB_QPT_RC) { + timer_setup(&qp->rnr_nak_timer, sw_rnr_nak_timer, 0); + timer_setup(&qp->retrans_timer, sw_retransmit_timer, 0); + } + return 0; +} + +static int sw_qp_init_resp(struct sw_dev *sw, struct sw_qp *qp, + struct ib_qp_init_attr *init, + struct ib_udata *udata, + struct sw_create_qp_resp __user *uresp) +{ + int wqe_size; + + if (!qp->srq) { + qp->rq.max_wr = init->cap.max_recv_wr; + qp->rq.max_sge = init->cap.max_recv_sge; + + wqe_size = rcv_wqe_size(qp->rq.max_sge); + + pr_debug("qp#%d max_wr = %d, max_sge = %d, wqe_size = %d\n", + qp_num(qp), qp->rq.max_wr, qp->rq.max_sge, wqe_size); + + qp->rq.queue = sw_queue_init(sw, + &qp->rq.max_wr, + wqe_size); + if (!qp->rq.queue) + return -ENOMEM; + } + + spin_lock_init(&qp->rq.producer_lock); + spin_lock_init(&qp->rq.consumer_lock); + + skb_queue_head_init(&qp->resp_pkts); + + sw_init_task(sw, &qp->resp.task, qp, + sw_responder, "resp"); + + qp->resp.opcode = OPCODE_NONE; + qp->resp.msn = 0; + qp->resp.state = QP_STATE_RESET; + + return 0; +} + +static int attach_sw_pd(struct erdma_pd *pd, struct sw_dev *sw) +{ + struct ib_mr *mr; + int ret; + + pd->sw_pd = kzalloc(sizeof(*pd->sw_pd), GFP_KERNEL); + if (!pd->sw_pd) + return -ENOMEM; + + pd->sw_pd->ibpd.device = &sw->ib_dev; + ret = sw_alloc_pd(&pd->sw_pd->ibpd, NULL); + if (ret) + goto out; + + mr = sw_get_dma_mr(&pd->sw_pd->ibpd, IB_ACCESS_LOCAL_WRITE); + if (IS_ERR(mr)) + goto out; + + pd->sw_pd->ibpd.local_dma_lkey = mr->lkey; + pd->sw_pd->internal_mr = mr; + mr->device = &sw->ib_dev; + mr->pd = &pd->sw_pd->ibpd; + mr->uobject = NULL; + mr->need_inval = false; + + return 0; +out: + kfree(pd->sw_pd); + pd->sw_pd = NULL; + return ret; +} + +static int dealloc_sw_mr(struct ib_mr *ibmr) +{ + struct sw_mem *mr = to_rmr(ibmr); + + mr->state = SW_MEM_STATE_ZOMBIE; + sw_drop_ref(mr_pd(mr)); + sw_drop_index(mr); + sw_drop_ref(mr); + return 0; +} + +void detach_sw_pd(struct erdma_pd *pd) +{ + dealloc_sw_mr(pd->sw_pd->internal_mr); + sw_dealloc_pd(&pd->sw_pd->ibpd, NULL); + kfree(pd->sw_pd); + pd->sw_pd = NULL; +} + +static int attach_sw_cq(struct erdma_cq *cq, struct sw_dev *sw) +{ + struct ib_cq_init_attr attr; + int ret; + + if (!rdma_is_kernel_res(&cq->ibcq.res)) + return -EINVAL; + + cq->sw_cq = kzalloc(sizeof(*cq->sw_cq), GFP_KERNEL); + if (!cq->sw_cq) + return -ENOMEM; + + cq->sw_cq->ibcq.device = &sw->ib_dev; + attr.cqe = cq->ibcq.cqe; + attr.comp_vector = cq->assoc_eqn - 1; + attr.flags = 0; + + ret = sw_create_cq(&cq->sw_cq->ibcq, &attr, NULL); + if (ret) + goto free_scq; + + memcpy(&cq->sw_cq->ibcq, &cq->ibcq, sizeof(cq->ibcq)); + + return 0; + +free_scq: + kfree(cq->sw_cq); + cq->sw_cq = NULL; + return ret; +} + +void detach_sw_cq(struct erdma_cq *cq) +{ + sw_destroy_cq(&cq->sw_cq->ibcq, NULL); + kfree(cq->sw_cq); + cq->sw_cq = NULL; +} + +static int create_sw_qp_components(struct sw_qp *sw_qp, struct ib_pd *ibpd, + struct sw_dev *sw) +{ + struct erdma_qp *qp = sw_qp->master; + int ret; + + ret = attach_sw_pd(to_epd(ibpd), sw); + if (ret) + return ret; + + ret = attach_sw_cq(qp->scq, sw); + if (ret) + goto free_pd; + + if (qp->scq != qp->rcq) { + ret = attach_sw_cq(qp->rcq, sw); + if (ret) + goto free_scq; + } + return 0; + +free_scq: + detach_sw_cq(qp->scq); +free_pd: + detach_sw_pd(to_epd(ibpd)); + return ret; +} + +static void destroy_sw_qp_components(struct sw_qp *sw_qp, struct ib_pd *ibpd) +{ + struct erdma_qp *qp = sw_qp->master; + + detach_sw_cq(qp->scq); + detach_sw_pd(to_epd(ibpd)); +} + +/* called by the create qp verb */ +int sw_qp_from_init(struct sw_dev *sw, struct sw_qp *qp, + struct ib_qp_init_attr *init, + struct sw_create_qp_resp __user *uresp, + struct ib_pd *ibpd, + struct ib_udata *udata) +{ + struct erdma_cq *rcq = to_ecq(init->recv_cq); + struct erdma_cq *scq = to_ecq(init->send_cq); + struct erdma_pd *pd = to_epd(ibpd); + int err; + + if (init->srq) + return -EINVAL; + + qp->master->scq = to_ecq(init->send_cq); + qp->master->rcq = to_ecq(init->recv_cq); + qp->master->dev = container_of(sw, struct erdma_dev, sw_dev); + + err = create_sw_qp_components(qp, ibpd, sw); + if (err) + return err; + + sw_add_ref(pd->sw_pd); + sw_add_ref(rcq->sw_cq); + sw_add_ref(scq->sw_cq); + + qp->pd = pd->sw_pd; + qp->rcq = rcq->sw_cq; + qp->scq = scq->sw_cq; + qp->srq = NULL; + + rcq->sw_cq->ibcq.device = &sw->ib_dev; + scq->sw_cq->ibcq.device = &sw->ib_dev; + scq->sw_cq->ibcq.comp_handler = scq->ibcq.comp_handler; + rcq->sw_cq->ibcq.comp_handler = rcq->ibcq.comp_handler; + scq->sw_cq->ibcq.event_handler = scq->ibcq.event_handler; + rcq->sw_cq->ibcq.event_handler = rcq->ibcq.event_handler; + scq->sw_cq->ibcq.cq_context = scq->ibcq.cq_context; + rcq->sw_cq->ibcq.cq_context = rcq->ibcq.cq_context; + scq->sw_cq->master = scq; + rcq->sw_cq->master = rcq; + + sw_qp_init_misc(sw, qp, init); + + err = sw_qp_init_req(sw, qp, init, udata, uresp); + if (err) + goto err1; + + err = sw_qp_init_resp(sw, qp, init, udata, uresp); + if (err) + goto err2; + + qp->attr.qp_state = IB_QPS_RESET; + qp->valid = 1; + + return 0; + +err2: + kernel_sock_shutdown(qp->sk, SHUT_RDWR); + sock_release(qp->sk); + sw_queue_cleanup(qp->sq.queue); +err1: + qp->pd = NULL; + qp->rcq = NULL; + qp->scq = NULL; + qp->srq = NULL; + + destroy_sw_qp_components(qp, ibpd); + + sw_drop_ref(pd->sw_pd); + sw_drop_ref(rcq->sw_cq); + sw_drop_ref(scq->sw_cq); + + return err; +} + +/* called by the query qp verb */ +int sw_qp_to_init(struct sw_qp *qp, struct ib_qp_init_attr *init) +{ + init->event_handler = qp->ibqp.event_handler; + init->qp_context = qp->ibqp.qp_context; + init->send_cq = qp->ibqp.send_cq; + init->recv_cq = qp->ibqp.recv_cq; + init->srq = qp->ibqp.srq; + + init->cap.max_send_wr = qp->sq.max_wr; + init->cap.max_send_sge = qp->sq.max_sge; + init->cap.max_inline_data = qp->sq.max_inline; + + if (!qp->srq) { + init->cap.max_recv_wr = qp->rq.max_wr; + init->cap.max_recv_sge = qp->rq.max_sge; + } + + init->sq_sig_type = qp->sq_sig_type; + + init->qp_type = qp->ibqp.qp_type; + init->port_num = 1; + + return 0; +} + +/* called by the modify qp verb, this routine checks all the parameters before + * making any changes + */ +int sw_qp_chk_attr(struct sw_dev *sw, struct sw_qp *qp, + struct ib_qp_attr *attr, int mask) +{ + enum ib_qp_state cur_state = (mask & IB_QP_CUR_STATE) ? + attr->cur_qp_state : qp->attr.qp_state; + enum ib_qp_state new_state = (mask & IB_QP_STATE) ? + attr->qp_state : cur_state; + + //if (!ib_modify_qp_is_ok(cur_state, new_state, qp_type(qp), mask)) { + // pr_warn("invalid mask or state for qp\n"); + // goto err1; + //} + + if (mask & IB_QP_STATE) { + if (cur_state == IB_QPS_SQD) { + if (qp->req.state == QP_STATE_DRAIN && + new_state != IB_QPS_ERR) + goto err1; + } + } + + if (mask & IB_QP_PORT) { + if (!rdma_is_port_valid(&sw->ib_dev, attr->port_num)) { + pr_warn("invalid port %d\n", attr->port_num); + goto err1; + } + } + + if (mask & IB_QP_CAP && sw_qp_chk_cap(sw, &attr->cap, !!qp->srq)) + goto err1; + + if (mask & IB_QP_AV && sw_av_chk_attr(sw, &attr->ah_attr)) + goto err1; + + if (mask & IB_QP_ALT_PATH) { + if (sw_av_chk_attr(sw, &attr->alt_ah_attr)) + goto err1; + if (!rdma_is_port_valid(&sw->ib_dev, attr->alt_port_num)) { + pr_warn("invalid alt port %d\n", attr->alt_port_num); + goto err1; + } + if (attr->alt_timeout > 31) { + pr_warn("invalid QP alt timeout %d > 31\n", + attr->alt_timeout); + goto err1; + } + } + + if (mask & IB_QP_PATH_MTU) { + struct sw_port *port = &sw->port; + + enum ib_mtu max_mtu = port->attr.max_mtu; + enum ib_mtu mtu = attr->path_mtu; + + if (mtu > max_mtu) { + pr_debug("invalid mtu (%d) > (%d)\n", + ib_mtu_enum_to_int(mtu), + ib_mtu_enum_to_int(max_mtu)); + goto err1; + } + } + + if (mask & IB_QP_MAX_QP_RD_ATOMIC) { + if (attr->max_rd_atomic > sw->attr.max_qp_rd_atom) { + pr_warn("invalid max_rd_atomic %d > %d\n", + attr->max_rd_atomic, + sw->attr.max_qp_rd_atom); + goto err1; + } + } + + if (mask & IB_QP_TIMEOUT) { + if (attr->timeout > 31) { + pr_warn("invalid QP timeout %d > 31\n", + attr->timeout); + goto err1; + } + } + + return 0; + +err1: + return -EINVAL; +} + +/* move the qp to the reset state */ +static void sw_qp_reset(struct sw_qp *qp) +{ + /* stop tasks from running */ + sw_disable_task(&qp->resp.task); + + /* stop request/comp */ + if (qp->sq.queue) { + if (qp_type(qp) == IB_QPT_RC) + sw_disable_task(&qp->comp.task); + sw_disable_task(&qp->req.task); + } + + /* move qp to the reset state */ + qp->req.state = QP_STATE_RESET; + qp->resp.state = QP_STATE_RESET; + + /* let state machines reset themselves drain work and packet queues + * etc. + */ + __sw_do_task(&qp->resp.task); + + if (qp->sq.queue) { + __sw_do_task(&qp->comp.task); + __sw_do_task(&qp->req.task); + sw_queue_reset(qp->sq.queue); + } + + /* cleanup attributes */ + atomic_set(&qp->ssn, 0); + qp->req.opcode = -1; + qp->req.need_retry = 0; + qp->req.noack_pkts = 0; + qp->resp.msn = 0; + qp->resp.opcode = -1; + qp->resp.drop_msg = 0; + qp->resp.goto_error = 0; + qp->resp.sent_psn_nak = 0; + + if (qp->resp.mr) { + sw_drop_ref(qp->resp.mr); + qp->resp.mr = NULL; + } + + cleanup_rd_atomic_resources(qp); + + /* reenable tasks */ + sw_enable_task(&qp->resp.task); + + if (qp->sq.queue) { + if (qp_type(qp) == IB_QPT_RC) + sw_enable_task(&qp->comp.task); + + sw_enable_task(&qp->req.task); + } +} + +/* drain the send queue */ +static void sw_qp_drain(struct sw_qp *qp) +{ + if (qp->sq.queue) { + if (qp->req.state != QP_STATE_DRAINED) { + qp->req.state = QP_STATE_DRAIN; + if (qp_type(qp) == IB_QPT_RC) + sw_run_task(&qp->comp.task, 1); + else + __sw_do_task(&qp->comp.task); + sw_run_task(&qp->req.task, 1); + } + } +} + +/* move the qp to the error state */ +void sw_qp_error(struct sw_qp *qp) +{ + qp->req.state = QP_STATE_ERROR; + qp->resp.state = QP_STATE_ERROR; + qp->attr.qp_state = IB_QPS_ERR; + + /* drain work and packet queues */ + dump_stack(); + sw_run_task(&qp->resp.task, 1); + + if (qp_type(qp) == IB_QPT_RC) + sw_run_task(&qp->comp.task, 1); + else + __sw_do_task(&qp->comp.task); + sw_run_task(&qp->req.task, 1); +} + +/* called by the modify qp verb */ +int sw_qp_from_attr(struct sw_qp *qp, struct ib_qp_attr *attr, int mask, + struct ib_udata *udata) +{ + int err; + + if (mask & IB_QP_MAX_QP_RD_ATOMIC) { + int max_rd_atomic = attr->max_rd_atomic ? + roundup_pow_of_two(attr->max_rd_atomic) : 0; + + qp->attr.max_rd_atomic = max_rd_atomic; + atomic_set(&qp->req.rd_atomic, max_rd_atomic); + } + + if (mask & IB_QP_MAX_DEST_RD_ATOMIC) { + int max_dest_rd_atomic = attr->max_dest_rd_atomic ? + roundup_pow_of_two(attr->max_dest_rd_atomic) : 0; + + qp->attr.max_dest_rd_atomic = max_dest_rd_atomic; + + sw_free_rd_atomic_resources(qp); + + err = alloc_rd_atomic_resources(qp, max_dest_rd_atomic); + if (err) + return err; + } + + if (mask & IB_QP_CUR_STATE) + qp->attr.cur_qp_state = attr->qp_state; + + if (mask & IB_QP_EN_SQD_ASYNC_NOTIFY) + qp->attr.en_sqd_async_notify = attr->en_sqd_async_notify; + + if (mask & IB_QP_ACCESS_FLAGS) + qp->attr.qp_access_flags = attr->qp_access_flags; + + if (mask & IB_QP_PKEY_INDEX) + qp->attr.pkey_index = attr->pkey_index; + + if (mask & IB_QP_PORT) + qp->attr.port_num = attr->port_num; + + if (mask & IB_QP_QKEY) + qp->attr.qkey = attr->qkey; + + if (mask & IB_QP_AV) + sw_init_av(&qp->master->dev->ibdev, &attr->ah_attr, &qp->pri_av); + + if (mask & IB_QP_ALT_PATH) { + sw_init_av(&qp->master->dev->ibdev, &attr->alt_ah_attr, &qp->alt_av); + qp->attr.alt_port_num = attr->alt_port_num; + qp->attr.alt_pkey_index = attr->alt_pkey_index; + qp->attr.alt_timeout = attr->alt_timeout; + } + + if (mask & IB_QP_PATH_MTU) { + qp->attr.path_mtu = attr->path_mtu; + qp->mtu = ib_mtu_enum_to_int(attr->path_mtu); + } + + if (mask & IB_QP_TIMEOUT) { + qp->attr.timeout = attr->timeout; + if (attr->timeout == 0) { + qp->qp_timeout_jiffies = 0; + } else { + /* According to the spec, timeout = 4.096 * 2 ^ attr->timeout [us] */ + int j = nsecs_to_jiffies(4096ULL << attr->timeout); + + qp->qp_timeout_jiffies = j ? j : 1; + } + } + + if (mask & IB_QP_RETRY_CNT) { + qp->attr.retry_cnt = attr->retry_cnt; + qp->comp.retry_cnt = attr->retry_cnt; + pr_debug("qp#%d set retry count = %d\n", qp_num(qp), + attr->retry_cnt); + } + + if (mask & IB_QP_RNR_RETRY) { + qp->attr.rnr_retry = attr->rnr_retry; + qp->comp.rnr_retry = attr->rnr_retry; + pr_debug("qp#%d set rnr retry count = %d\n", qp_num(qp), + attr->rnr_retry); + } + + if (mask & IB_QP_RQ_PSN) { + qp->attr.rq_psn = (attr->rq_psn & BTH_PSN_MASK); + qp->resp.psn = qp->attr.rq_psn; + pr_debug("qp#%d set resp psn = 0x%x\n", qp_num(qp), + qp->resp.psn); + } + + if (mask & IB_QP_MIN_RNR_TIMER) { + qp->attr.min_rnr_timer = attr->min_rnr_timer; + pr_debug("qp#%d set min rnr timer = 0x%x\n", qp_num(qp), + attr->min_rnr_timer); + } + + if (mask & IB_QP_SQ_PSN) { + qp->attr.sq_psn = (attr->sq_psn & BTH_PSN_MASK); + qp->req.psn = qp->attr.sq_psn; + qp->comp.psn = qp->attr.sq_psn; + pr_debug("qp#%d set req psn = 0x%x\n", qp_num(qp), qp->req.psn); + } + + if (mask & IB_QP_PATH_MIG_STATE) + qp->attr.path_mig_state = attr->path_mig_state; + + if (mask & IB_QP_DEST_QPN) + qp->attr.dest_qp_num = attr->dest_qp_num; + + if (mask & IB_QP_STATE) { + qp->attr.qp_state = attr->qp_state; + + switch (attr->qp_state) { + case IB_QPS_RESET: + pr_debug("qp#%d state -> RESET\n", qp_num(qp)); + sw_qp_reset(qp); + break; + + case IB_QPS_INIT: + pr_debug("qp#%d state -> INIT\n", qp_num(qp)); + qp->req.state = QP_STATE_INIT; + qp->resp.state = QP_STATE_INIT; + break; + + case IB_QPS_RTR: + pr_debug("qp#%d state -> RTR\n", qp_num(qp)); + qp->resp.state = QP_STATE_READY; + break; + + case IB_QPS_RTS: + pr_debug("qp#%d state -> RTS\n", qp_num(qp)); + qp->req.state = QP_STATE_READY; + break; + + case IB_QPS_SQD: + pr_debug("qp#%d state -> SQD\n", qp_num(qp)); + sw_qp_drain(qp); + break; + + case IB_QPS_SQE: + pr_warn("qp#%d state -> SQE !!?\n", qp_num(qp)); + /* Not possible from modify_qp. */ + break; + + case IB_QPS_ERR: + pr_emerg("qp#%d state -> ERR\n", qp_num(qp)); + sw_qp_error(qp); + break; + } + } + + return 0; +} + +/* called by the query qp verb */ +int sw_qp_to_attr(struct sw_qp *qp, struct ib_qp_attr *attr, int mask) +{ + *attr = qp->attr; + + attr->rq_psn = qp->resp.psn; + attr->sq_psn = qp->req.psn; + + attr->cap.max_send_wr = qp->sq.max_wr; + attr->cap.max_send_sge = qp->sq.max_sge; + attr->cap.max_inline_data = qp->sq.max_inline; + + if (!qp->srq) { + attr->cap.max_recv_wr = qp->rq.max_wr; + attr->cap.max_recv_sge = qp->rq.max_sge; + } + + sw_av_to_attr(&qp->pri_av, &attr->ah_attr); + sw_av_to_attr(&qp->alt_av, &attr->alt_ah_attr); + + if (qp->req.state == QP_STATE_DRAIN) { + attr->sq_draining = 1; + /* applications that get this state + * typically spin on it. yield the + * processor + */ + cond_resched(); + } else { + attr->sq_draining = 0; + } + + pr_debug("attr->sq_draining = %d\n", attr->sq_draining); + + return 0; +} + +/* called by the destroy qp verb */ +void sw_qp_destroy(struct sw_qp *qp) +{ + qp->valid = 0; + qp->qp_timeout_jiffies = 0; + sw_cleanup_task(&qp->resp.task); + + if (qp_type(qp) == IB_QPT_RC) { + del_timer_sync(&qp->retrans_timer); + del_timer_sync(&qp->rnr_nak_timer); + } + + sw_cleanup_task(&qp->req.task); + sw_cleanup_task(&qp->comp.task); + + /* flush out any receive wr's or pending requests */ + __sw_do_task(&qp->req.task); + if (qp->sq.queue) { + __sw_do_task(&qp->comp.task); + __sw_do_task(&qp->req.task); + } +} + +void cleanup_sw_qp(struct sw_qp *qp) +{ + sw_drop_all_mcast_groups(qp); + + if (qp->sq.queue) + sw_queue_cleanup(qp->sq.queue); + + if (qp->srq) + sw_drop_ref(qp->srq); + + if (qp->rq.queue) + sw_queue_cleanup(qp->rq.queue); + + if (qp->scq) + sw_drop_ref(qp->scq); + if (qp->rcq) + sw_drop_ref(qp->rcq); + if (qp->pd) + sw_drop_ref(qp->pd); + + if (qp->resp.mr) { + sw_drop_ref(qp->resp.mr); + qp->resp.mr = NULL; + } + + if (qp_type(qp) == IB_QPT_RC) + sk_dst_reset(qp->sk->sk); + + sw_free_rd_atomic_resources(qp); + + kernel_sock_shutdown(qp->sk, SHUT_RDWR); + sock_release(qp->sk); +} + +/* called when the last reference to the qp is dropped */ +static void sw_qp_do_cleanup(struct work_struct *work) +{ + struct sw_qp *qp = container_of(work, typeof(*qp), cleanup_work.work); + + sw_drop_all_mcast_groups(qp); + + if (qp->sq.queue) + sw_queue_cleanup(qp->sq.queue); + + if (qp->srq) + sw_drop_ref(qp->srq); + + if (qp->rq.queue) + sw_queue_cleanup(qp->rq.queue); + + if (qp->scq) + sw_drop_ref(qp->scq); + if (qp->rcq) + sw_drop_ref(qp->rcq); + if (qp->pd) + sw_drop_ref(qp->pd); + + if (qp->resp.mr) { + sw_drop_ref(qp->resp.mr); + qp->resp.mr = NULL; + } + + if (qp_type(qp) == IB_QPT_RC) + sk_dst_reset(qp->sk->sk); + + sw_free_rd_atomic_resources(qp); + + kernel_sock_shutdown(qp->sk, SHUT_RDWR); + sock_release(qp->sk); +} + +/* called when the last reference to the qp is dropped */ +void sw_qp_cleanup(struct sw_pool_entry *arg) +{ + struct sw_qp *qp = container_of(arg, typeof(*qp), pelem); + + execute_in_process_context(sw_qp_do_cleanup, &qp->cleanup_work); +} diff --git a/drivers/infiniband/hw/erdma/compat/sw_queue.c b/drivers/infiniband/hw/erdma/compat/sw_queue.c new file mode 100644 index 0000000000000000000000000000000000000000..abf2221fc1d0490630a2766aa60093da0a512be1 --- /dev/null +++ b/drivers/infiniband/hw/erdma/compat/sw_queue.c @@ -0,0 +1,77 @@ +// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB +/* + * Copyright (c) 2016 Mellanox Technologies Ltd. All rights reserved. + * Copyright (c) 2015 System Fabric Works, Inc. All rights reserved. + */ + +#include +#include "sw.h" +#include "sw_loc.h" +#include "sw_queue.h" + +inline void sw_queue_reset(struct sw_queue *q) +{ + /* queue is comprised from header and the memory + * of the actual queue. See "struct sw_queue_buf" in sw_queue.h + * reset only the queue itself and not the management header + */ + memset(q->buf->data, 0, q->buf_size - sizeof(struct sw_queue_buf)); +} + +struct sw_queue *sw_queue_init(struct sw_dev *sw, + int *num_elem, + unsigned int elem_size) +{ + struct sw_queue *q; + size_t buf_size; + unsigned int num_slots; + + /* num_elem == 0 is allowed, but uninteresting */ + if (*num_elem < 0) + goto err1; + + q = kmalloc(sizeof(*q), GFP_KERNEL); + if (!q) + goto err1; + + q->sw = sw; + + /* used in resize, only need to copy used part of queue */ + q->elem_size = elem_size; + + /* pad element up to at least a cacheline and always a power of 2 */ + if (elem_size < cache_line_size()) + elem_size = cache_line_size(); + elem_size = roundup_pow_of_two(elem_size); + + q->log2_elem_size = order_base_2(elem_size); + + num_slots = *num_elem + 1; + num_slots = roundup_pow_of_two(num_slots); + q->index_mask = num_slots - 1; + + buf_size = sizeof(struct sw_queue_buf) + num_slots * elem_size; + + q->buf = vmalloc_user(buf_size); + if (!q->buf) + goto err2; + + q->buf->log2_elem_size = q->log2_elem_size; + q->buf->index_mask = q->index_mask; + + q->buf_size = buf_size; + + *num_elem = num_slots - 1; + return q; + +err2: + kfree(q); +err1: + return NULL; +} + +void sw_queue_cleanup(struct sw_queue *q) +{ + vfree(q->buf); + kfree(q); +} diff --git a/drivers/infiniband/hw/erdma/compat/sw_queue.h b/drivers/infiniband/hw/erdma/compat/sw_queue.h new file mode 100644 index 0000000000000000000000000000000000000000..3458e24a758777d56937ed46b25e808654be106f --- /dev/null +++ b/drivers/infiniband/hw/erdma/compat/sw_queue.h @@ -0,0 +1,135 @@ +/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */ +/* + * Copyright (c) 2016 Mellanox Technologies Ltd. All rights reserved. + * Copyright (c) 2015 System Fabric Works, Inc. All rights reserved. + */ + +#ifndef SW_QUEUE_H +#define SW_QUEUE_H + +/* implements a simple circular buffer that can optionally be + * shared between user space and the kernel and can be resized + + * the requested element size is rounded up to a power of 2 + * and the number of elements in the buffer is also rounded + * up to a power of 2. Since the queue is empty when the + * producer and consumer indices match the maximum capacity + * of the queue is one less than the number of element slots + */ + +/* this data structure is shared between user space and kernel + * space for those cases where the queue is shared. It contains + * the producer and consumer indices. Is also contains a copy + * of the queue size parameters for user space to use but the + * kernel must use the parameters in the sw_queue struct + * this MUST MATCH the corresponding libsw struct + * for performance reasons arrange to have producer and consumer + * pointers in separate cache lines + * the kernel should always mask the indices to avoid accessing + * memory outside of the data area + */ +struct sw_queue_buf { + __u32 log2_elem_size; + __u32 index_mask; + __u32 pad_1[30]; + __u32 producer_index; + __u32 pad_2[31]; + __u32 consumer_index; + __u32 pad_3[31]; + __u8 data[]; +}; + +struct sw_queue { + struct sw_dev *sw; + struct sw_queue_buf *buf; + struct sw_mmap_info *ip; + size_t buf_size; + size_t elem_size; + unsigned int log2_elem_size; + unsigned int index_mask; +}; + +void sw_queue_reset(struct sw_queue *q); + +struct sw_queue *sw_queue_init(struct sw_dev *sw, + int *num_elem, + unsigned int elem_size); + +void sw_queue_cleanup(struct sw_queue *queue); + +static inline int next_index(struct sw_queue *q, int index) +{ + return (index + 1) & q->buf->index_mask; +} + +static inline int queue_empty(struct sw_queue *q) +{ + return ((q->buf->producer_index - q->buf->consumer_index) + & q->index_mask) == 0; +} + +static inline int queue_full(struct sw_queue *q) +{ + return ((q->buf->producer_index + 1 - q->buf->consumer_index) + & q->index_mask) == 0; +} + +static inline void advance_producer(struct sw_queue *q) +{ + q->buf->producer_index = (q->buf->producer_index + 1) + & q->index_mask; +} + +static inline void advance_consumer(struct sw_queue *q) +{ + q->buf->consumer_index = (q->buf->consumer_index + 1) + & q->index_mask; +} + +static inline void *producer_addr(struct sw_queue *q) +{ + return q->buf->data + ((q->buf->producer_index & q->index_mask) + << q->log2_elem_size); +} + +static inline void *consumer_addr(struct sw_queue *q) +{ + return q->buf->data + ((q->buf->consumer_index & q->index_mask) + << q->log2_elem_size); +} + +static inline unsigned int producer_index(struct sw_queue *q) +{ + return q->buf->producer_index; +} + +static inline unsigned int consumer_index(struct sw_queue *q) +{ + return q->buf->consumer_index; +} + +static inline void *addr_from_index(struct sw_queue *q, unsigned int index) +{ + return q->buf->data + ((index & q->index_mask) + << q->buf->log2_elem_size); +} + +static inline unsigned int index_from_addr(const struct sw_queue *q, + const void *addr) +{ + return (((u8 *)addr - q->buf->data) >> q->log2_elem_size) + & q->index_mask; +} + +static inline unsigned int queue_count(const struct sw_queue *q) +{ + return (q->buf->producer_index - q->buf->consumer_index) + & q->index_mask; +} + +static inline void *queue_head(struct sw_queue *q) +{ + return queue_empty(q) ? NULL : consumer_addr(q); +} + +#endif /* SW_QUEUE_H */ diff --git a/drivers/infiniband/hw/erdma/compat/sw_recv.c b/drivers/infiniband/hw/erdma/compat/sw_recv.c new file mode 100644 index 0000000000000000000000000000000000000000..8d7f86d1ea740b50a465f9c2dcdc8a549efe55fc --- /dev/null +++ b/drivers/infiniband/hw/erdma/compat/sw_recv.c @@ -0,0 +1,419 @@ +// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB +/* + * Copyright (c) 2016 Mellanox Technologies Ltd. All rights reserved. + * Copyright (c) 2015 System Fabric Works, Inc. All rights reserved. + */ + +#include + +#include "sw.h" +#include "sw_loc.h" +#include "../erdma_verbs.h" + +/* check that QP matches packet opcode type and is in a valid state */ +static int check_type_state(struct sw_dev *sw, struct sw_pkt_info *pkt, + struct sw_qp *qp) +{ + unsigned int pkt_type; + + if (unlikely(!qp->valid)) + goto err1; + + pkt_type = pkt->opcode & 0xe0; + + switch (qp_type(qp)) { + case IB_QPT_RC: + if (unlikely(pkt_type != IB_OPCODE_RC)) { + pr_warn_ratelimited("bad qp type\n"); + goto err1; + } + break; + case IB_QPT_UC: + if (unlikely(pkt_type != IB_OPCODE_UC)) { + pr_warn_ratelimited("bad qp type\n"); + goto err1; + } + break; + case IB_QPT_UD: + case IB_QPT_SMI: + case IB_QPT_GSI: + if (unlikely(pkt_type != IB_OPCODE_UD)) { + pr_warn_ratelimited("bad qp type\n"); + goto err1; + } + break; + default: + pr_warn_ratelimited("unsupported qp type\n"); + goto err1; + } + + if (pkt->mask & SW_REQ_MASK) { + if (unlikely(qp->resp.state != QP_STATE_READY)) + goto err1; + } else if (unlikely(qp->req.state < QP_STATE_READY || + qp->req.state > QP_STATE_DRAINED)) { + goto err1; + } + + return 0; + +err1: + return -EINVAL; +} + +static void set_bad_pkey_cntr(struct sw_port *port) +{ + spin_lock_bh(&port->port_lock); + port->attr.bad_pkey_cntr = min((u32)0xffff, + port->attr.bad_pkey_cntr + 1); + spin_unlock_bh(&port->port_lock); +} + +static void set_qkey_viol_cntr(struct sw_port *port) +{ + spin_lock_bh(&port->port_lock); + port->attr.qkey_viol_cntr = min((u32)0xffff, + port->attr.qkey_viol_cntr + 1); + spin_unlock_bh(&port->port_lock); +} + +static int check_keys(struct sw_dev *sw, struct sw_pkt_info *pkt, + u32 qpn, struct sw_qp *qp) +{ + struct sw_port *port = &sw->port; + u16 pkey = bth_pkey(pkt); + + pkt->pkey_index = 0; + + if (!pkey_match(pkey, IB_DEFAULT_PKEY_FULL)) { + pr_warn_ratelimited("bad pkey = 0x%x\n", pkey); + set_bad_pkey_cntr(port); + goto err1; + } + + if ((qp_type(qp) == IB_QPT_UD || qp_type(qp) == IB_QPT_GSI) && + pkt->mask) { + u32 qkey = (qpn == 1) ? GSI_QKEY : qp->attr.qkey; + + if (unlikely(deth_qkey(pkt) != qkey)) { + pr_warn_ratelimited("bad qkey, got 0x%x expected 0x%x for qpn 0x%x\n", + deth_qkey(pkt), qkey, qpn); + set_qkey_viol_cntr(port); + goto err1; + } + } + + return 0; + +err1: + return -EINVAL; +} + +static int check_addr(struct sw_dev *sw, struct sw_pkt_info *pkt, + struct sw_qp *qp) +{ + struct sk_buff *skb = PKT_TO_SKB(pkt); + + if (qp_type(qp) != IB_QPT_RC && qp_type(qp) != IB_QPT_UC) + goto done; + + if (unlikely(pkt->port_num != qp->attr.port_num)) { + pr_warn_ratelimited("port %d != qp port %d\n", + pkt->port_num, qp->attr.port_num); + goto err1; + } + + if (skb->protocol == htons(ETH_P_IP)) { + struct in_addr *saddr = + &qp->pri_av.sgid_addr._sockaddr_in.sin_addr; + struct in_addr *daddr = + &qp->pri_av.dgid_addr._sockaddr_in.sin_addr; + + if (ip_hdr(skb)->daddr != saddr->s_addr) { + pr_warn_ratelimited("dst addr %pI4 != qp source addr %pI4\n", + &ip_hdr(skb)->daddr, + &saddr->s_addr); + goto err1; + } + + if (ip_hdr(skb)->saddr != daddr->s_addr) { + pr_warn_ratelimited("source addr %pI4 != qp dst addr %pI4\n", + &ip_hdr(skb)->saddr, + &daddr->s_addr); + goto err1; + } + + } else if (skb->protocol == htons(ETH_P_IPV6)) { + struct in6_addr *saddr = + &qp->pri_av.sgid_addr._sockaddr_in6.sin6_addr; + struct in6_addr *daddr = + &qp->pri_av.dgid_addr._sockaddr_in6.sin6_addr; + + if (memcmp(&ipv6_hdr(skb)->daddr, saddr, sizeof(*saddr))) { + pr_warn_ratelimited("dst addr %pI6 != qp source addr %pI6\n", + &ipv6_hdr(skb)->daddr, saddr); + goto err1; + } + + if (memcmp(&ipv6_hdr(skb)->saddr, daddr, sizeof(*daddr))) { + pr_warn_ratelimited("source addr %pI6 != qp dst addr %pI6\n", + &ipv6_hdr(skb)->saddr, daddr); + goto err1; + } + } + +done: + return 0; + +err1: + return -EINVAL; +} + +static int hdr_check(struct sw_pkt_info *pkt) +{ + struct sw_dev *sw = pkt->sw; + struct sw_port *port = &sw->port; + struct erdma_qp *master_qp; + struct sw_qp *qp = NULL; + u32 qpn = bth_qpn(pkt); + int index; + int err; + + if (unlikely(bth_tver(pkt) != BTH_TVER)) { + pr_warn_ratelimited("bad tver\n"); + goto err1; + } + + if (unlikely(qpn == 0)) { + pr_warn_once("QP 0 not supported"); + goto err1; + } + + if (qpn != IB_MULTICAST_QPN) { + index = (qpn == 1) ? port->qp_gsi_index : qpn; + + master_qp = find_qp_by_qpn(sw->master, qpn);/* gsi qpn pr_info -----*/ + if (unlikely(!master_qp || master_qp->attrs.flags & ERDMA_QP_IN_DESTROY)) { + pr_warn_ratelimited("no qp matches qpn 0x%x\n", qpn); + goto err1; + } + + //qp = sw_pool_get_index(&sw->qp_pool, index); + qp = master_qp->sw_qp; + if (unlikely(!qp)) { + pr_warn_ratelimited("no qp matches qpn 0x%x\n", qpn); + goto err1; + } + sw_add_ref(qp); + + err = check_type_state(sw, pkt, qp); + if (unlikely(err)) + goto err2; + + err = check_addr(sw, pkt, qp); + if (unlikely(err)) + goto err2; + + err = check_keys(sw, pkt, qpn, qp); + if (unlikely(err)) + goto err2; + } else { + if (unlikely((pkt->mask & SW_GRH_MASK) == 0)) { + pr_warn_ratelimited("no grh for mcast qpn\n"); + goto err1; + } + } + + pkt->qp = qp; + return 0; + +err2: + sw_drop_ref(qp); +err1: + return -EINVAL; +} + +static inline void sw_rcv_pkt(struct sw_pkt_info *pkt, struct sk_buff *skb) +{ + if (pkt->mask & SW_REQ_MASK) + sw_resp_queue_pkt(pkt->qp, skb); + else + sw_comp_queue_pkt(pkt->qp, skb); +} + +static void sw_rcv_mcast_pkt(struct sw_dev *sw, struct sk_buff *skb) +{ + struct sw_pkt_info *pkt = SKB_TO_PKT(skb); + struct sw_mc_grp *mcg; + struct sw_mc_elem *mce; + struct sw_qp *qp; + union ib_gid dgid; + struct sk_buff *per_qp_skb; + struct sw_pkt_info *per_qp_pkt; + int err; + + if (skb->protocol == htons(ETH_P_IP)) + ipv6_addr_set_v4mapped(ip_hdr(skb)->daddr, + (struct in6_addr *)&dgid); + else if (skb->protocol == htons(ETH_P_IPV6)) + memcpy(&dgid, &ipv6_hdr(skb)->daddr, sizeof(dgid)); + + /* lookup mcast group corresponding to mgid, takes a ref */ + mcg = sw_pool_get_key(&sw->mc_grp_pool, &dgid); + if (!mcg) + goto err1; /* mcast group not registered */ + + spin_lock_bh(&mcg->mcg_lock); + + list_for_each_entry(mce, &mcg->qp_list, qp_list) { + qp = mce->qp; + + /* validate qp for incoming packet */ + err = check_type_state(sw, pkt, qp); + if (err) + continue; + + err = check_keys(sw, pkt, bth_qpn(pkt), qp); + if (err) + continue; + + /* for all but the last qp create a new clone of the + * skb and pass to the qp. If an error occurs in the + * checks for the last qp in the list we need to + * free the skb since it hasn't been passed on to + * sw_rcv_pkt() which would free it later. + */ + if (mce->qp_list.next != &mcg->qp_list) { + per_qp_skb = skb_clone(skb, GFP_ATOMIC); + } else { + per_qp_skb = skb; + /* show we have consumed the skb */ + skb = NULL; + } + + if (unlikely(!per_qp_skb)) + continue; + + per_qp_pkt = SKB_TO_PKT(per_qp_skb); + per_qp_pkt->qp = qp; + sw_add_ref(qp); + sw_rcv_pkt(per_qp_pkt, per_qp_skb); + } + + spin_unlock_bh(&mcg->mcg_lock); + + sw_drop_ref(mcg); /* drop ref from sw_pool_get_key. */ + +err1: + /* free skb if not consumed */ + kfree_skb(skb); +} + +/** + * sw_chk_dgid - validate destination IP address + * @sw: sw device that received packet + * @skb: the received packet buffer + * + * Accept any loopback packets + * Extract IP address from packet and + * Accept if multicast packet + * Accept if matches an SGID table entry + */ +static int sw_chk_dgid(struct sw_dev *sw, struct sk_buff *skb) +{ + struct sw_pkt_info *pkt = SKB_TO_PKT(skb); + const struct ib_gid_attr *gid_attr; + union ib_gid dgid; + union ib_gid *pdgid; + + if (pkt->mask & SW_LOOPBACK_MASK) + return 0; + + if (skb->protocol == htons(ETH_P_IP)) { + ipv6_addr_set_v4mapped(ip_hdr(skb)->daddr, + (struct in6_addr *)&dgid); + pdgid = &dgid; + } else { + pdgid = (union ib_gid *)&ipv6_hdr(skb)->daddr; + } + + if (rdma_is_multicast_addr((struct in6_addr *)pdgid)) + return 0; + + gid_attr = rdma_find_gid_by_port(&sw->master->ibdev, pdgid, + IB_GID_TYPE_ROCE_UDP_ENCAP, + 1, skb->dev); + if (IS_ERR(gid_attr)) + return PTR_ERR(gid_attr); + + rdma_put_gid_attr(gid_attr); + return 0; +} + +/* sw_rcv is called from the interface driver */ +void sw_rcv(struct sk_buff *skb) +{ + int err; + struct sw_pkt_info *pkt = SKB_TO_PKT(skb); + struct sw_dev *sw = pkt->sw; + __be32 *icrcp; + u32 calc_icrc, pack_icrc; + + pkt->offset = 0; + + if (unlikely(skb->len < pkt->offset + SW_BTH_BYTES)) + goto drop; + + if (sw_chk_dgid(sw, skb) < 0) { + pr_warn_ratelimited("failed checking dgid\n"); + goto drop; + } + + pkt->opcode = bth_opcode(pkt); + pkt->psn = bth_psn(pkt); + pkt->qp = NULL; + pkt->mask |= sw_opcode[pkt->opcode].mask; + + if (unlikely(skb->len < header_size(pkt))) + goto drop; + + err = hdr_check(pkt); + if (unlikely(err)) + goto drop; + + /* Verify ICRC */ + icrcp = (__be32 *)(pkt->hdr + pkt->paylen - SW_ICRC_SIZE); + pack_icrc = be32_to_cpu(*icrcp); + + calc_icrc = sw_icrc_hdr(pkt, skb); + calc_icrc = sw_crc32(sw, calc_icrc, (u8 *)payload_addr(pkt), + payload_size(pkt) + bth_pad(pkt)); + calc_icrc = (__force u32)cpu_to_be32(~calc_icrc); + if (unlikely(calc_icrc != pack_icrc)) { + if (skb->protocol == htons(ETH_P_IPV6)) + pr_warn_ratelimited("bad ICRC from %pI6c\n", + &ipv6_hdr(skb)->saddr); + else if (skb->protocol == htons(ETH_P_IP)) + pr_warn_ratelimited("bad ICRC from %pI4\n", + &ip_hdr(skb)->saddr); + else + pr_warn_ratelimited("bad ICRC from unknown\n"); + + goto drop; + } + + sw_counter_inc(sw, SW_CNT_RCVD_PKTS); + + if (unlikely(bth_qpn(pkt) == IB_MULTICAST_QPN)) + sw_rcv_mcast_pkt(sw, skb); + else + sw_rcv_pkt(pkt, skb); + + return; + +drop: + if (pkt->qp) + sw_drop_ref(pkt->qp); + + kfree_skb(skb); +} diff --git a/drivers/infiniband/hw/erdma/compat/sw_req.c b/drivers/infiniband/hw/erdma/compat/sw_req.c new file mode 100644 index 0000000000000000000000000000000000000000..83753e127f6262b820065888e12c2828fdd8f4ad --- /dev/null +++ b/drivers/infiniband/hw/erdma/compat/sw_req.c @@ -0,0 +1,603 @@ +// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB +/* + * Copyright (c) 2016 Mellanox Technologies Ltd. All rights reserved. + * Copyright (c) 2015 System Fabric Works, Inc. All rights reserved. + */ + +#include +#include + +#include "sw.h" +#include "sw_loc.h" +#include "sw_queue.h" + +static int next_opcode(struct sw_qp *qp, struct sw_send_wqe *wqe, + u32 opcode); + +static inline void retry_first_write_send(struct sw_qp *qp, + struct sw_send_wqe *wqe, + unsigned int mask, int npsn) +{ + int i; + + for (i = 0; i < npsn; i++) { + int to_send = (wqe->dma.resid > qp->mtu) ? + qp->mtu : wqe->dma.resid; + + qp->req.opcode = next_opcode(qp, wqe, + wqe->wr.opcode); + + if (wqe->wr.send_flags & IB_SEND_INLINE) { + wqe->dma.resid -= to_send; + wqe->dma.sge_offset += to_send; + } else { + sw_advance_dma_data(&wqe->dma, to_send); + } + if (mask & WR_WRITE_MASK) + wqe->iova += qp->mtu; + } +} + +static void req_retry(struct sw_qp *qp) +{ + struct sw_send_wqe *wqe; + unsigned int wqe_index; + unsigned int mask; + int npsn; + int first = 1; + + qp->req.wqe_index = consumer_index(qp->sq.queue); + qp->req.psn = qp->comp.psn; + qp->req.opcode = -1; + + for (wqe_index = consumer_index(qp->sq.queue); + wqe_index != producer_index(qp->sq.queue); + wqe_index = next_index(qp->sq.queue, wqe_index)) { + wqe = addr_from_index(qp->sq.queue, wqe_index); + mask = wr_opcode_mask(wqe->wr.opcode, qp); + + if (wqe->state == wqe_state_posted) + break; + + if (wqe->state == wqe_state_done) + continue; + + wqe->iova = (mask & WR_ATOMIC_MASK) ? + wqe->wr.wr.atomic.remote_addr : + (mask & WR_READ_OR_WRITE_MASK) ? + wqe->wr.wr.rdma.remote_addr : + 0; + + if (!first || (mask & WR_READ_MASK) == 0) { + wqe->dma.resid = wqe->dma.length; + wqe->dma.cur_sge = 0; + wqe->dma.sge_offset = 0; + } + + if (first) { + first = 0; + + if (mask & WR_WRITE_OR_SEND_MASK) { + npsn = (qp->comp.psn - wqe->first_psn) & + BTH_PSN_MASK; + retry_first_write_send(qp, wqe, mask, npsn); + } + + if (mask & WR_READ_MASK) { + npsn = (wqe->dma.length - wqe->dma.resid) / + qp->mtu; + wqe->iova += npsn * qp->mtu; + } + } + + wqe->state = wqe_state_posted; + } +} + +void sw_rnr_nak_timer(struct timer_list *t) +{ + struct sw_qp *qp = from_timer(qp, t, rnr_nak_timer); + + pr_debug("qp#%d rnr nak timer fired\n", qp_num(qp)); + sw_run_task(&qp->req.task, 1); +} + +static struct sw_send_wqe *req_next_wqe(struct sw_qp *qp) +{ + struct sw_send_wqe *wqe = queue_head(qp->sq.queue); + unsigned long flags; + + if (unlikely(qp->req.state == QP_STATE_DRAIN)) { + /* check to see if we are drained; + * state_lock used by requester and completer + */ + spin_lock_irqsave(&qp->state_lock, flags); + do { + if (qp->req.state != QP_STATE_DRAIN) { + /* comp just finished */ + spin_unlock_irqrestore(&qp->state_lock, + flags); + break; + } + + if (wqe && ((qp->req.wqe_index != + consumer_index(qp->sq.queue)) || + (wqe->state != wqe_state_posted))) { + /* comp not done yet */ + spin_unlock_irqrestore(&qp->state_lock, + flags); + break; + } + + qp->req.state = QP_STATE_DRAINED; + spin_unlock_irqrestore(&qp->state_lock, flags); + + if (qp->ibqp.event_handler) { + struct ib_event ev; + + ev.device = qp->ibqp.device; + ev.element.qp = &qp->ibqp; + ev.event = IB_EVENT_SQ_DRAINED; + qp->ibqp.event_handler(&ev, + qp->ibqp.qp_context); + } + } while (0); + } + + if (qp->req.wqe_index == producer_index(qp->sq.queue)) + return NULL; + + wqe = addr_from_index(qp->sq.queue, qp->req.wqe_index); + + if (unlikely((qp->req.state == QP_STATE_DRAIN || + qp->req.state == QP_STATE_DRAINED) && + (wqe->state != wqe_state_processing))) + return NULL; + + if (unlikely((wqe->wr.send_flags & IB_SEND_FENCE) && + (qp->req.wqe_index != consumer_index(qp->sq.queue)))) { + qp->req.wait_fence = 1; + return NULL; + } + + wqe->mask = wr_opcode_mask(wqe->wr.opcode, qp); + return wqe; +} + +static int next_opcode(struct sw_qp *qp, struct sw_send_wqe *wqe, + u32 opcode) +{ + switch (qp_type(qp)) { + case IB_QPT_GSI: + switch (opcode) { + case IB_WR_SEND: + return IB_OPCODE_UD_SEND_ONLY; + + case IB_WR_SEND_WITH_IMM: + return IB_OPCODE_UD_SEND_ONLY_WITH_IMMEDIATE; + } + break; + + default: + break; + } + + return -EINVAL; +} + +static inline int check_init_depth(struct sw_qp *qp, struct sw_send_wqe *wqe) +{ + int depth; + + if (wqe->has_rd_atomic) + return 0; + + qp->req.need_rd_atomic = 1; + depth = atomic_dec_return(&qp->req.rd_atomic); + + if (depth >= 0) { + qp->req.need_rd_atomic = 0; + wqe->has_rd_atomic = 1; + return 0; + } + + atomic_inc(&qp->req.rd_atomic); + return -EAGAIN; +} + +static inline int get_mtu(struct sw_qp *qp) +{ + struct sw_dev *sw = to_rdev(qp->ibqp.device); + + if ((qp_type(qp) == IB_QPT_RC) || (qp_type(qp) == IB_QPT_UC)) + return qp->mtu; + + return sw->port.mtu_cap; +} + +static struct sk_buff *init_req_packet(struct sw_qp *qp, + struct sw_send_wqe *wqe, + int opcode, int payload, + struct sw_pkt_info *pkt) +{ + struct sw_dev *sw = to_rdev(qp->ibqp.device); + struct sk_buff *skb; + struct sw_send_wr *ibwr = &wqe->wr; + struct sw_av *av; + int pad = (-payload) & 0x3; + int paylen; + int solicited; + u16 pkey; + u32 qp_num; + int ack_req; + + /* length from start of bth to end of icrc */ + paylen = sw_opcode[opcode].length + payload + pad + SW_ICRC_SIZE; + + /* pkt->hdr, sw, port_num and mask are initialized in ifc + * layer + */ + pkt->opcode = opcode; + pkt->qp = qp; + pkt->psn = qp->req.psn; + pkt->mask = sw_opcode[opcode].mask; + pkt->paylen = paylen; + pkt->offset = 0; + pkt->wqe = wqe; + + /* init skb */ + av = sw_get_av(pkt); + skb = sw_init_packet(sw, av, paylen, pkt); + if (unlikely(!skb)) + return NULL; + + /* init bth */ + solicited = (ibwr->send_flags & IB_SEND_SOLICITED) && + (pkt->mask & SW_END_MASK) && + ((pkt->mask & (SW_SEND_MASK)) || + (pkt->mask & (SW_WRITE_MASK | SW_IMMDT_MASK)) == + (SW_WRITE_MASK | SW_IMMDT_MASK)); + + pkey = IB_DEFAULT_PKEY_FULL; + + qp_num = (pkt->mask & SW_DETH_MASK) ? ibwr->wr.ud.remote_qpn : + qp->attr.dest_qp_num; + + ack_req = ((pkt->mask & SW_END_MASK) || + (qp->req.noack_pkts++ > SW_MAX_PKT_PER_ACK)); + if (ack_req) + qp->req.noack_pkts = 0; + + bth_init(pkt, pkt->opcode, solicited, 0, pad, pkey, qp_num, + ack_req, pkt->psn); + + /* init optional headers */ + if (pkt->mask & SW_RETH_MASK) { + reth_set_rkey(pkt, ibwr->wr.rdma.rkey); + reth_set_va(pkt, wqe->iova); + reth_set_len(pkt, wqe->dma.resid); + } + + if (pkt->mask & SW_IMMDT_MASK) + immdt_set_imm(pkt, ibwr->ex.imm_data); + + if (pkt->mask & SW_IETH_MASK) + ieth_set_rkey(pkt, ibwr->ex.invalidate_rkey); + + if (pkt->mask & SW_ATMETH_MASK) { + atmeth_set_va(pkt, wqe->iova); + if (opcode == IB_OPCODE_RC_COMPARE_SWAP || + opcode == IB_OPCODE_RD_COMPARE_SWAP) { + atmeth_set_swap_add(pkt, ibwr->wr.atomic.swap); + atmeth_set_comp(pkt, ibwr->wr.atomic.compare_add); + } else { + atmeth_set_swap_add(pkt, ibwr->wr.atomic.compare_add); + } + atmeth_set_rkey(pkt, ibwr->wr.atomic.rkey); + } + + if (pkt->mask & SW_DETH_MASK) { + if (qp->ibqp.qp_num == 1) + deth_set_qkey(pkt, GSI_QKEY); + else + deth_set_qkey(pkt, ibwr->wr.ud.remote_qkey); + deth_set_sqp(pkt, qp->ibqp.qp_num); + } + + return skb; +} + +static int fill_packet(struct sw_qp *qp, struct sw_send_wqe *wqe, + struct sw_pkt_info *pkt, struct sk_buff *skb, + int paylen) +{ + struct sw_dev *sw = to_rdev(qp->ibqp.device); + u32 crc = 0; + u32 *p; + int err; + + err = sw_prepare(pkt, skb, &crc); + if (err) + return err; + + if (pkt->mask & SW_WRITE_OR_SEND) { + if (wqe->wr.send_flags & IB_SEND_INLINE) { + u8 *tmp = &wqe->dma.inline_data[wqe->dma.sge_offset]; + + crc = sw_crc32(sw, crc, tmp, paylen); + memcpy(payload_addr(pkt), tmp, paylen); + + wqe->dma.resid -= paylen; + wqe->dma.sge_offset += paylen; + } else { + err = sw_copy_data(qp->pd, 0, &wqe->dma, + payload_addr(pkt), paylen, + from_mem_obj, + &crc); + if (err) + return err; + } + if (bth_pad(pkt)) { + u8 *pad = payload_addr(pkt) + paylen; + + memset(pad, 0, bth_pad(pkt)); + crc = sw_crc32(sw, crc, pad, bth_pad(pkt)); + } + } + p = payload_addr(pkt) + paylen + bth_pad(pkt); + + *p = ~crc; + + return 0; +} + +static void update_wqe_state(struct sw_qp *qp, + struct sw_send_wqe *wqe, + struct sw_pkt_info *pkt) +{ + if (pkt->mask & SW_END_MASK) { + if (qp_type(qp) == IB_QPT_RC) + wqe->state = wqe_state_pending; + } else { + wqe->state = wqe_state_processing; + } +} + +static void update_wqe_psn(struct sw_qp *qp, + struct sw_send_wqe *wqe, + struct sw_pkt_info *pkt, + int payload) +{ + /* number of packets left to send including current one */ + int num_pkt = (wqe->dma.resid + payload + qp->mtu - 1) / qp->mtu; + + /* handle zero length packet case */ + if (num_pkt == 0) + num_pkt = 1; + + if (pkt->mask & SW_START_MASK) { + wqe->first_psn = qp->req.psn; + wqe->last_psn = (qp->req.psn + num_pkt - 1) & BTH_PSN_MASK; + } + + if (pkt->mask & SW_READ_MASK) + qp->req.psn = (wqe->first_psn + num_pkt) & BTH_PSN_MASK; + else + qp->req.psn = (qp->req.psn + 1) & BTH_PSN_MASK; +} + +static void save_state(struct sw_send_wqe *wqe, + struct sw_qp *qp, + struct sw_send_wqe *rollback_wqe, + u32 *rollback_psn) +{ + rollback_wqe->state = wqe->state; + rollback_wqe->first_psn = wqe->first_psn; + rollback_wqe->last_psn = wqe->last_psn; + *rollback_psn = qp->req.psn; +} + +static void rollback_state(struct sw_send_wqe *wqe, + struct sw_qp *qp, + struct sw_send_wqe *rollback_wqe, + u32 rollback_psn) +{ + wqe->state = rollback_wqe->state; + wqe->first_psn = rollback_wqe->first_psn; + wqe->last_psn = rollback_wqe->last_psn; + qp->req.psn = rollback_psn; +} + +static void update_state(struct sw_qp *qp, struct sw_send_wqe *wqe, + struct sw_pkt_info *pkt, int payload) +{ + qp->req.opcode = pkt->opcode; + + if (pkt->mask & SW_END_MASK) + qp->req.wqe_index = next_index(qp->sq.queue, qp->req.wqe_index); + + qp->need_req_skb = 0; + + if (qp->qp_timeout_jiffies && !timer_pending(&qp->retrans_timer)) + mod_timer(&qp->retrans_timer, + jiffies + qp->qp_timeout_jiffies); +} + +int sw_requester(void *arg) +{ + struct sw_qp *qp = (struct sw_qp *)arg; + struct sw_pkt_info pkt; + struct sk_buff *skb; + struct sw_send_wqe *wqe; + enum sw_hdr_mask mask; + int payload; + int mtu; + int opcode; + int ret; + struct sw_send_wqe rollback_wqe; + u32 rollback_psn; + + sw_add_ref(qp); + +next_wqe: + if (unlikely(!qp->valid || qp->req.state == QP_STATE_ERROR)) + goto exit; + + if (unlikely(qp->req.state == QP_STATE_RESET)) { + qp->req.wqe_index = consumer_index(qp->sq.queue); + qp->req.opcode = -1; + qp->req.need_rd_atomic = 0; + qp->req.wait_psn = 0; + qp->req.need_retry = 0; + goto exit; + } + + if (unlikely(qp->req.need_retry)) { + req_retry(qp); + qp->req.need_retry = 0; + } + + wqe = req_next_wqe(qp); + if (unlikely(!wqe)) + goto exit; + + if (wqe->mask & WR_REG_MASK) { + if (wqe->wr.opcode == IB_WR_LOCAL_INV) { + struct sw_dev *sw = to_rdev(qp->ibqp.device); + struct sw_mem *rmr; + + rmr = sw_pool_get_index(&sw->mr_pool, + wqe->wr.ex.invalidate_rkey >> 8); + if (!rmr) { + pr_err("No mr for key %#x\n", + wqe->wr.ex.invalidate_rkey); + wqe->state = wqe_state_error; + wqe->status = IB_WC_MW_BIND_ERR; + goto exit; + } + rmr->state = SW_MEM_STATE_FREE; + sw_drop_ref(rmr); + wqe->state = wqe_state_done; + wqe->status = IB_WC_SUCCESS; + } else if (wqe->wr.opcode == IB_WR_REG_MR) { + struct sw_mem *rmr = to_rmr(wqe->wr.wr.reg.mr); + + rmr->state = SW_MEM_STATE_VALID; + rmr->access = wqe->wr.wr.reg.access; + rmr->ibmr.lkey = wqe->wr.wr.reg.key; + rmr->ibmr.rkey = wqe->wr.wr.reg.key; + rmr->iova = wqe->wr.wr.reg.mr->iova; + wqe->state = wqe_state_done; + wqe->status = IB_WC_SUCCESS; + } else { + goto exit; + } + if ((wqe->wr.send_flags & IB_SEND_SIGNALED) || + qp->sq_sig_type == IB_SIGNAL_ALL_WR) + sw_run_task(&qp->comp.task, 1); + qp->req.wqe_index = next_index(qp->sq.queue, + qp->req.wqe_index); + goto next_wqe; + } + + if (unlikely(qp_type(qp) == IB_QPT_RC && + psn_compare(qp->req.psn, (qp->comp.psn + + SW_MAX_UNACKED_PSNS)) > 0)) { + qp->req.wait_psn = 1; + goto exit; + } + + /* Limit the number of inflight SKBs per QP */ + if (unlikely(atomic_read(&qp->skb_out) > + SW_INFLIGHT_SKBS_PER_QP_HIGH)) { + qp->need_req_skb = 1; + goto exit; + } + + opcode = next_opcode(qp, wqe, wqe->wr.opcode); + if (unlikely(opcode < 0)) { + wqe->status = IB_WC_LOC_QP_OP_ERR; + goto exit; + } + + mask = sw_opcode[opcode].mask; + if (unlikely(mask & SW_READ_OR_ATOMIC)) { + if (check_init_depth(qp, wqe)) + goto exit; + } + + mtu = get_mtu(qp); + payload = (mask & SW_WRITE_OR_SEND) ? wqe->dma.resid : 0; + if (payload > mtu) { + if (qp_type(qp) == IB_QPT_UD) { + /* C10-93.1.1: If the total sum of all the buffer lengths specified for a + * UD message exceeds the MTU of the port as returned by QueryHCA, the CI + * shall not emit any packets for this message. Further, the CI shall not + * generate an error due to this condition. + */ + + /* fake a successful UD send */ + wqe->first_psn = qp->req.psn; + wqe->last_psn = qp->req.psn; + qp->req.psn = (qp->req.psn + 1) & BTH_PSN_MASK; + qp->req.opcode = IB_OPCODE_UD_SEND_ONLY; + qp->req.wqe_index = next_index(qp->sq.queue, + qp->req.wqe_index); + wqe->state = wqe_state_done; + wqe->status = IB_WC_SUCCESS; + __sw_do_task(&qp->comp.task); + sw_drop_ref(qp); + return 0; + } + payload = mtu; + } + + skb = init_req_packet(qp, wqe, opcode, payload, &pkt); + if (unlikely(!skb)) { + pr_err("qp#%d Failed allocating skb\n", qp_num(qp)); + goto err; + } + + if (fill_packet(qp, wqe, &pkt, skb, payload)) { + pr_debug("qp#%d Error during fill packet\n", qp_num(qp)); + kfree_skb(skb); + goto err; + } + + /* + * To prevent a race on wqe access between requester and completer, + * wqe members state and psn need to be set before calling + * sw_xmit_packet(). + * Otherwise, completer might initiate an unjustified retry flow. + */ + save_state(wqe, qp, &rollback_wqe, &rollback_psn); + update_wqe_state(qp, wqe, &pkt); + update_wqe_psn(qp, wqe, &pkt, payload); + ret = sw_xmit_packet(qp, &pkt, skb); + if (ret) { + qp->need_req_skb = 1; + pr_err("sw_xmit_packet %d", ret); + + rollback_state(wqe, qp, &rollback_wqe, rollback_psn); + + if (ret == -EAGAIN) { + sw_run_task(&qp->req.task, 1); + goto exit; + } + + goto err; + } + + update_state(qp, wqe, &pkt, payload); + + goto next_wqe; + +err: + wqe->status = IB_WC_LOC_PROT_ERR; + wqe->state = wqe_state_error; + __sw_do_task(&qp->comp.task); + +exit: + sw_drop_ref(qp); + return -EAGAIN; +} diff --git a/drivers/infiniband/hw/erdma/compat/sw_resp.c b/drivers/infiniband/hw/erdma/compat/sw_resp.c new file mode 100644 index 0000000000000000000000000000000000000000..429fc3472691c97c25d1ab547081cae9218cf07d --- /dev/null +++ b/drivers/infiniband/hw/erdma/compat/sw_resp.c @@ -0,0 +1,1381 @@ +// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB +/* + * Copyright (c) 2016 Mellanox Technologies Ltd. All rights reserved. + * Copyright (c) 2015 System Fabric Works, Inc. All rights reserved. + */ + +#include + +#include "sw.h" +#include "sw_loc.h" +#include "sw_queue.h" + +enum resp_states { + RESPST_NONE, + RESPST_GET_REQ, + RESPST_CHK_PSN, + RESPST_CHK_OP_SEQ, + RESPST_CHK_OP_VALID, + RESPST_CHK_RESOURCE, + RESPST_CHK_LENGTH, + RESPST_CHK_RKEY, + RESPST_EXECUTE, + RESPST_READ_REPLY, + RESPST_COMPLETE, + RESPST_ACKNOWLEDGE, + RESPST_CLEANUP, + RESPST_DUPLICATE_REQUEST, + RESPST_ERR_MALFORMED_WQE, + RESPST_ERR_UNSUPPORTED_OPCODE, + RESPST_ERR_MISALIGNED_ATOMIC, + RESPST_ERR_PSN_OUT_OF_SEQ, + RESPST_ERR_MISSING_OPCODE_FIRST, + RESPST_ERR_MISSING_OPCODE_LAST_C, + RESPST_ERR_MISSING_OPCODE_LAST_D1E, + RESPST_ERR_TOO_MANY_RDMA_ATM_REQ, + RESPST_ERR_RNR, + RESPST_ERR_RKEY_VIOLATION, + RESPST_ERR_LENGTH, + RESPST_ERR_CQ_OVERFLOW, + RESPST_ERROR, + RESPST_RESET, + RESPST_DONE, + RESPST_EXIT, +}; + +static char *resp_state_name[] = { + [RESPST_NONE] = "NONE", + [RESPST_GET_REQ] = "GET_REQ", + [RESPST_CHK_PSN] = "CHK_PSN", + [RESPST_CHK_OP_SEQ] = "CHK_OP_SEQ", + [RESPST_CHK_OP_VALID] = "CHK_OP_VALID", + [RESPST_CHK_RESOURCE] = "CHK_RESOURCE", + [RESPST_CHK_LENGTH] = "CHK_LENGTH", + [RESPST_CHK_RKEY] = "CHK_RKEY", + [RESPST_EXECUTE] = "EXECUTE", + [RESPST_READ_REPLY] = "READ_REPLY", + [RESPST_COMPLETE] = "COMPLETE", + [RESPST_ACKNOWLEDGE] = "ACKNOWLEDGE", + [RESPST_CLEANUP] = "CLEANUP", + [RESPST_DUPLICATE_REQUEST] = "DUPLICATE_REQUEST", + [RESPST_ERR_MALFORMED_WQE] = "ERR_MALFORMED_WQE", + [RESPST_ERR_UNSUPPORTED_OPCODE] = "ERR_UNSUPPORTED_OPCODE", + [RESPST_ERR_MISALIGNED_ATOMIC] = "ERR_MISALIGNED_ATOMIC", + [RESPST_ERR_PSN_OUT_OF_SEQ] = "ERR_PSN_OUT_OF_SEQ", + [RESPST_ERR_MISSING_OPCODE_FIRST] = "ERR_MISSING_OPCODE_FIRST", + [RESPST_ERR_MISSING_OPCODE_LAST_C] = "ERR_MISSING_OPCODE_LAST_C", + [RESPST_ERR_MISSING_OPCODE_LAST_D1E] = "ERR_MISSING_OPCODE_LAST_D1E", + [RESPST_ERR_TOO_MANY_RDMA_ATM_REQ] = "ERR_TOO_MANY_RDMA_ATM_REQ", + [RESPST_ERR_RNR] = "ERR_RNR", + [RESPST_ERR_RKEY_VIOLATION] = "ERR_RKEY_VIOLATION", + [RESPST_ERR_LENGTH] = "ERR_LENGTH", + [RESPST_ERR_CQ_OVERFLOW] = "ERR_CQ_OVERFLOW", + [RESPST_ERROR] = "ERROR", + [RESPST_RESET] = "RESET", + [RESPST_DONE] = "DONE", + [RESPST_EXIT] = "EXIT", +}; + +/* sw_recv calls here to add a request packet to the input queue */ +void sw_resp_queue_pkt(struct sw_qp *qp, struct sk_buff *skb) +{ + int must_sched; + struct sw_pkt_info *pkt = SKB_TO_PKT(skb); + + skb_queue_tail(&qp->req_pkts, skb); + + must_sched = (pkt->opcode == IB_OPCODE_RC_RDMA_READ_REQUEST) || + (skb_queue_len(&qp->req_pkts) > 1); + + sw_run_task(&qp->resp.task, must_sched); +} + +static inline enum resp_states get_req(struct sw_qp *qp, + struct sw_pkt_info **pkt_p) +{ + struct sk_buff *skb; + + if (qp->resp.state == QP_STATE_ERROR) { + while ((skb = skb_dequeue(&qp->req_pkts))) { + sw_drop_ref(qp); + kfree_skb(skb); + } + + /* go drain recv wr queue */ + return RESPST_CHK_RESOURCE; + } + + skb = skb_peek(&qp->req_pkts); + if (!skb) + return RESPST_EXIT; + + *pkt_p = SKB_TO_PKT(skb); + + return (qp->resp.res) ? RESPST_READ_REPLY : RESPST_CHK_PSN; +} + +static enum resp_states check_psn(struct sw_qp *qp, + struct sw_pkt_info *pkt) +{ + int diff = psn_compare(pkt->psn, qp->resp.psn); + struct sw_dev *sw = to_rdev(qp->ibqp.device); + + switch (qp_type(qp)) { + case IB_QPT_RC: + if (diff > 0) { + if (qp->resp.sent_psn_nak) + return RESPST_CLEANUP; + + qp->resp.sent_psn_nak = 1; + sw_counter_inc(sw, SW_CNT_OUT_OF_SEQ_REQ); + return RESPST_ERR_PSN_OUT_OF_SEQ; + + } else if (diff < 0) { + sw_counter_inc(sw, SW_CNT_DUP_REQ); + return RESPST_DUPLICATE_REQUEST; + } + + if (qp->resp.sent_psn_nak) + qp->resp.sent_psn_nak = 0; + + break; + + case IB_QPT_UC: + if (qp->resp.drop_msg || diff != 0) { + if (pkt->mask & SW_START_MASK) { + qp->resp.drop_msg = 0; + return RESPST_CHK_OP_SEQ; + } + + qp->resp.drop_msg = 1; + return RESPST_CLEANUP; + } + break; + default: + break; + } + + return RESPST_CHK_OP_SEQ; +} + +static enum resp_states check_op_seq(struct sw_qp *qp, + struct sw_pkt_info *pkt) +{ + switch (qp_type(qp)) { + case IB_QPT_RC: + switch (qp->resp.opcode) { + case IB_OPCODE_RC_SEND_FIRST: + case IB_OPCODE_RC_SEND_MIDDLE: + switch (pkt->opcode) { + case IB_OPCODE_RC_SEND_MIDDLE: + case IB_OPCODE_RC_SEND_LAST: + case IB_OPCODE_RC_SEND_LAST_WITH_IMMEDIATE: + case IB_OPCODE_RC_SEND_LAST_WITH_INVALIDATE: + return RESPST_CHK_OP_VALID; + default: + return RESPST_ERR_MISSING_OPCODE_LAST_C; + } + + case IB_OPCODE_RC_RDMA_WRITE_FIRST: + case IB_OPCODE_RC_RDMA_WRITE_MIDDLE: + switch (pkt->opcode) { + case IB_OPCODE_RC_RDMA_WRITE_MIDDLE: + case IB_OPCODE_RC_RDMA_WRITE_LAST: + case IB_OPCODE_RC_RDMA_WRITE_LAST_WITH_IMMEDIATE: + return RESPST_CHK_OP_VALID; + default: + return RESPST_ERR_MISSING_OPCODE_LAST_C; + } + + default: + switch (pkt->opcode) { + case IB_OPCODE_RC_SEND_MIDDLE: + case IB_OPCODE_RC_SEND_LAST: + case IB_OPCODE_RC_SEND_LAST_WITH_IMMEDIATE: + case IB_OPCODE_RC_SEND_LAST_WITH_INVALIDATE: + case IB_OPCODE_RC_RDMA_WRITE_MIDDLE: + case IB_OPCODE_RC_RDMA_WRITE_LAST: + case IB_OPCODE_RC_RDMA_WRITE_LAST_WITH_IMMEDIATE: + return RESPST_ERR_MISSING_OPCODE_FIRST; + default: + return RESPST_CHK_OP_VALID; + } + } + break; + + case IB_QPT_UC: + switch (qp->resp.opcode) { + case IB_OPCODE_UC_SEND_FIRST: + case IB_OPCODE_UC_SEND_MIDDLE: + switch (pkt->opcode) { + case IB_OPCODE_UC_SEND_MIDDLE: + case IB_OPCODE_UC_SEND_LAST: + case IB_OPCODE_UC_SEND_LAST_WITH_IMMEDIATE: + return RESPST_CHK_OP_VALID; + default: + return RESPST_ERR_MISSING_OPCODE_LAST_D1E; + } + + case IB_OPCODE_UC_RDMA_WRITE_FIRST: + case IB_OPCODE_UC_RDMA_WRITE_MIDDLE: + switch (pkt->opcode) { + case IB_OPCODE_UC_RDMA_WRITE_MIDDLE: + case IB_OPCODE_UC_RDMA_WRITE_LAST: + case IB_OPCODE_UC_RDMA_WRITE_LAST_WITH_IMMEDIATE: + return RESPST_CHK_OP_VALID; + default: + return RESPST_ERR_MISSING_OPCODE_LAST_D1E; + } + + default: + switch (pkt->opcode) { + case IB_OPCODE_UC_SEND_MIDDLE: + case IB_OPCODE_UC_SEND_LAST: + case IB_OPCODE_UC_SEND_LAST_WITH_IMMEDIATE: + case IB_OPCODE_UC_RDMA_WRITE_MIDDLE: + case IB_OPCODE_UC_RDMA_WRITE_LAST: + case IB_OPCODE_UC_RDMA_WRITE_LAST_WITH_IMMEDIATE: + qp->resp.drop_msg = 1; + return RESPST_CLEANUP; + default: + return RESPST_CHK_OP_VALID; + } + } + break; + + default: + return RESPST_CHK_OP_VALID; + } +} + +static enum resp_states check_op_valid(struct sw_qp *qp, + struct sw_pkt_info *pkt) +{ + switch (qp_type(qp)) { + case IB_QPT_RC: + if (((pkt->mask & SW_READ_MASK) && + !(qp->attr.qp_access_flags & IB_ACCESS_REMOTE_READ)) || + ((pkt->mask & SW_WRITE_MASK) && + !(qp->attr.qp_access_flags & IB_ACCESS_REMOTE_WRITE)) || + ((pkt->mask & SW_ATOMIC_MASK) && + !(qp->attr.qp_access_flags & IB_ACCESS_REMOTE_ATOMIC))) { + return RESPST_ERR_UNSUPPORTED_OPCODE; + } + + break; + + case IB_QPT_UC: + if ((pkt->mask & SW_WRITE_MASK) && + !(qp->attr.qp_access_flags & IB_ACCESS_REMOTE_WRITE)) { + qp->resp.drop_msg = 1; + return RESPST_CLEANUP; + } + + break; + + case IB_QPT_UD: + case IB_QPT_SMI: + case IB_QPT_GSI: + break; + + default: + WARN_ON_ONCE(1); + break; + } + + return RESPST_CHK_RESOURCE; +} + +static enum resp_states get_srq_wqe(struct sw_qp *qp) +{ + struct sw_srq *srq = qp->srq; + struct sw_queue *q = srq->rq.queue; + struct sw_recv_wqe *wqe; + struct ib_event ev; + + if (srq->error) + return RESPST_ERR_RNR; + + spin_lock_bh(&srq->rq.consumer_lock); + + wqe = queue_head(q); + if (!wqe) { + spin_unlock_bh(&srq->rq.consumer_lock); + return RESPST_ERR_RNR; + } + + /* note kernel and user space recv wqes have same size */ + memcpy(&qp->resp.srq_wqe, wqe, sizeof(qp->resp.srq_wqe)); + + qp->resp.wqe = &qp->resp.srq_wqe.wqe; + advance_consumer(q); + + if (srq->limit && srq->ibsrq.event_handler && + (queue_count(q) < srq->limit)) { + srq->limit = 0; + goto event; + } + + spin_unlock_bh(&srq->rq.consumer_lock); + return RESPST_CHK_LENGTH; + +event: + spin_unlock_bh(&srq->rq.consumer_lock); + ev.device = qp->ibqp.device; + ev.element.srq = qp->ibqp.srq; + ev.event = IB_EVENT_SRQ_LIMIT_REACHED; + srq->ibsrq.event_handler(&ev, srq->ibsrq.srq_context); + return RESPST_CHK_LENGTH; +} + +static enum resp_states check_resource(struct sw_qp *qp, + struct sw_pkt_info *pkt) +{ + struct sw_srq *srq = qp->srq; + + if (qp->resp.state == QP_STATE_ERROR) { + if (qp->resp.wqe) { + qp->resp.status = IB_WC_WR_FLUSH_ERR; + return RESPST_COMPLETE; + } else if (!srq) { + qp->resp.wqe = queue_head(qp->rq.queue); + if (qp->resp.wqe) { + qp->resp.status = IB_WC_WR_FLUSH_ERR; + return RESPST_COMPLETE; + } else { + return RESPST_EXIT; + } + } else { + return RESPST_EXIT; + } + } + + if (pkt->mask & SW_READ_OR_ATOMIC) { + /* it is the requesters job to not send + * too many read/atomic ops, we just + * recycle the responder resource queue + */ + if (likely(qp->attr.max_dest_rd_atomic > 0)) + return RESPST_CHK_LENGTH; + else + return RESPST_ERR_TOO_MANY_RDMA_ATM_REQ; + } + + if (pkt->mask & SW_RWR_MASK) { + if (srq) + return get_srq_wqe(qp); + + qp->resp.wqe = queue_head(qp->rq.queue); + return (qp->resp.wqe) ? RESPST_CHK_LENGTH : RESPST_ERR_RNR; + } + + return RESPST_CHK_LENGTH; +} + +static enum resp_states check_length(struct sw_qp *qp, + struct sw_pkt_info *pkt) +{ + switch (qp_type(qp)) { + case IB_QPT_RC: + return RESPST_CHK_RKEY; + + case IB_QPT_UC: + return RESPST_CHK_RKEY; + + default: + return RESPST_CHK_RKEY; + } +} + +static enum resp_states check_rkey(struct sw_qp *qp, + struct sw_pkt_info *pkt) +{ + struct sw_mem *mem = NULL; + u64 va; + u32 rkey; + u32 resid; + u32 pktlen; + int mtu = qp->mtu; + enum resp_states state; + int access; + + if (pkt->mask & (SW_READ_MASK | SW_WRITE_MASK)) { + if (pkt->mask & SW_RETH_MASK) { + qp->resp.va = reth_va(pkt); + qp->resp.rkey = reth_rkey(pkt); + qp->resp.resid = reth_len(pkt); + qp->resp.length = reth_len(pkt); + } + access = (pkt->mask & SW_READ_MASK) ? IB_ACCESS_REMOTE_READ + : IB_ACCESS_REMOTE_WRITE; + } else if (pkt->mask & SW_ATOMIC_MASK) { + qp->resp.va = atmeth_va(pkt); + qp->resp.rkey = atmeth_rkey(pkt); + qp->resp.resid = sizeof(u64); + access = IB_ACCESS_REMOTE_ATOMIC; + } else { + return RESPST_EXECUTE; + } + + /* A zero-byte op is not required to set an addr or rkey. */ + if ((pkt->mask & (SW_READ_MASK | SW_WRITE_OR_SEND)) && + (pkt->mask & SW_RETH_MASK) && + reth_len(pkt) == 0) { + return RESPST_EXECUTE; + } + + va = qp->resp.va; + rkey = qp->resp.rkey; + resid = qp->resp.resid; + pktlen = payload_size(pkt); + + mem = lookup_mem(qp->pd, access, rkey, lookup_remote); + if (!mem) { + state = RESPST_ERR_RKEY_VIOLATION; + goto err; + } + + if (unlikely(mem->state == SW_MEM_STATE_FREE)) { + state = RESPST_ERR_RKEY_VIOLATION; + goto err; + } + + if (mem_check_range(mem, va, resid)) { + state = RESPST_ERR_RKEY_VIOLATION; + goto err; + } + + if (pkt->mask & SW_WRITE_MASK) { + if (resid > mtu) { + if (pktlen != mtu || bth_pad(pkt)) { + state = RESPST_ERR_LENGTH; + goto err; + } + } else { + if (pktlen != resid) { + state = RESPST_ERR_LENGTH; + goto err; + } + if ((bth_pad(pkt) != (0x3 & (-resid)))) { + /* This case may not be exactly that + * but nothing else fits. + */ + state = RESPST_ERR_LENGTH; + goto err; + } + } + } + + WARN_ON_ONCE(qp->resp.mr); + + qp->resp.mr = mem; + return RESPST_EXECUTE; + +err: + if (mem) + sw_drop_ref(mem); + return state; +} + +static enum resp_states send_data_in(struct sw_qp *qp, void *data_addr, + int data_len) +{ + int err; + + err = sw_copy_data(qp->pd, IB_ACCESS_LOCAL_WRITE, &qp->resp.wqe->dma, + data_addr, data_len, to_mem_obj, NULL); + if (unlikely(err)) + return (err == -ENOSPC) ? RESPST_ERR_LENGTH + : RESPST_ERR_MALFORMED_WQE; + + return RESPST_NONE; +} + +static enum resp_states write_data_in(struct sw_qp *qp, + struct sw_pkt_info *pkt) +{ + enum resp_states rc = RESPST_NONE; + int err; + int data_len = payload_size(pkt); + + err = sw_mem_copy(qp->resp.mr, qp->resp.va, payload_addr(pkt), + data_len, to_mem_obj, NULL); + if (err) { + rc = RESPST_ERR_RKEY_VIOLATION; + goto out; + } + + qp->resp.va += data_len; + qp->resp.resid -= data_len; + +out: + return rc; +} + +/* Guarantee atomicity of atomic operations at the machine level. */ +static DEFINE_SPINLOCK(atomic_ops_lock); + +static enum resp_states process_atomic(struct sw_qp *qp, + struct sw_pkt_info *pkt) +{ + u64 iova = atmeth_va(pkt); + u64 *vaddr; + enum resp_states ret; + struct sw_mem *mr = qp->resp.mr; + + if (mr->state != SW_MEM_STATE_VALID) { + ret = RESPST_ERR_RKEY_VIOLATION; + goto out; + } + + vaddr = iova_to_vaddr(mr, iova, sizeof(u64)); + + /* check vaddr is 8 bytes aligned. */ + if (!vaddr || (uintptr_t)vaddr & 7) { + ret = RESPST_ERR_MISALIGNED_ATOMIC; + goto out; + } + + spin_lock_bh(&atomic_ops_lock); + + qp->resp.atomic_orig = *vaddr; + + if (pkt->opcode == IB_OPCODE_RC_COMPARE_SWAP || + pkt->opcode == IB_OPCODE_RD_COMPARE_SWAP) { + if (*vaddr == atmeth_comp(pkt)) + *vaddr = atmeth_swap_add(pkt); + } else { + *vaddr += atmeth_swap_add(pkt); + } + + spin_unlock_bh(&atomic_ops_lock); + + ret = RESPST_NONE; +out: + return ret; +} + +static struct sk_buff *prepare_ack_packet(struct sw_qp *qp, + struct sw_pkt_info *pkt, + struct sw_pkt_info *ack, + int opcode, + int payload, + u32 psn, + u8 syndrome, + u32 *crcp) +{ + struct sw_dev *sw = to_rdev(qp->ibqp.device); + struct sk_buff *skb; + u32 crc = 0; + u32 *p; + int paylen; + int pad; + int err; + + /* + * allocate packet + */ + pad = (-payload) & 0x3; + paylen = sw_opcode[opcode].length + payload + pad + SW_ICRC_SIZE; + + skb = sw_init_packet(sw, &qp->pri_av, paylen, ack); + if (!skb) + return NULL; + + ack->qp = qp; + ack->opcode = opcode; + ack->mask = sw_opcode[opcode].mask; + ack->offset = pkt->offset; + ack->paylen = paylen; + + /* fill in bth using the request packet headers */ + memcpy(ack->hdr, pkt->hdr, pkt->offset + SW_BTH_BYTES); + + bth_set_opcode(ack, opcode); + bth_set_qpn(ack, qp->attr.dest_qp_num); + bth_set_pad(ack, pad); + bth_set_se(ack, 0); + bth_set_psn(ack, psn); + bth_set_ack(ack, 0); + ack->psn = psn; + + if (ack->mask & SW_AETH_MASK) { + aeth_set_syn(ack, syndrome); + aeth_set_msn(ack, qp->resp.msn); + } + + if (ack->mask & SW_ATMACK_MASK) + atmack_set_orig(ack, qp->resp.atomic_orig); + + err = sw_prepare(ack, skb, &crc); + if (err) { + kfree_skb(skb); + return NULL; + } + + if (crcp) { + /* CRC computation will be continued by the caller */ + *crcp = crc; + } else { + p = payload_addr(ack) + payload + bth_pad(ack); + *p = ~crc; + } + + return skb; +} + +/* RDMA read response. If res is not NULL, then we have a current RDMA request + * being processed or replayed. + */ +static enum resp_states read_reply(struct sw_qp *qp, + struct sw_pkt_info *req_pkt) +{ + struct sw_pkt_info ack_pkt; + struct sk_buff *skb; + int mtu = qp->mtu; + enum resp_states state; + int payload; + int opcode; + int err; + struct resp_res *res = qp->resp.res; + u32 icrc; + u32 *p; + + if (!res) { + /* This is the first time we process that request. Get a + * resource + */ + res = &qp->resp.resources[qp->resp.res_head]; + + sw_free_rd_atomic_resource(qp, res); + sw_advance_resp_resource(qp); + + res->type = SW_READ_MASK; + res->replay = 0; + + res->read.va = qp->resp.va; + res->read.va_org = qp->resp.va; + + res->first_psn = req_pkt->psn; + + if (reth_len(req_pkt)) { + res->last_psn = (req_pkt->psn + + (reth_len(req_pkt) + mtu - 1) / + mtu - 1) & BTH_PSN_MASK; + } else { + res->last_psn = res->first_psn; + } + res->cur_psn = req_pkt->psn; + + res->read.resid = qp->resp.resid; + res->read.length = qp->resp.resid; + res->read.rkey = qp->resp.rkey; + + /* note res inherits the reference to mr from qp */ + res->read.mr = qp->resp.mr; + qp->resp.mr = NULL; + + qp->resp.res = res; + res->state = rdatm_res_state_new; + } + + if (res->state == rdatm_res_state_new) { + if (res->read.resid <= mtu) + opcode = IB_OPCODE_RC_RDMA_READ_RESPONSE_ONLY; + else + opcode = IB_OPCODE_RC_RDMA_READ_RESPONSE_FIRST; + } else { + if (res->read.resid > mtu) + opcode = IB_OPCODE_RC_RDMA_READ_RESPONSE_MIDDLE; + else + opcode = IB_OPCODE_RC_RDMA_READ_RESPONSE_LAST; + } + + res->state = rdatm_res_state_next; + + payload = min_t(int, res->read.resid, mtu); + + skb = prepare_ack_packet(qp, req_pkt, &ack_pkt, opcode, payload, + res->cur_psn, AETH_ACK_UNLIMITED, &icrc); + if (!skb) + return RESPST_ERR_RNR; + + err = sw_mem_copy(res->read.mr, res->read.va, payload_addr(&ack_pkt), + payload, from_mem_obj, &icrc); + if (err) + pr_err("Failed copying memory\n"); + + if (bth_pad(&ack_pkt)) { + struct sw_dev *sw = to_rdev(qp->ibqp.device); + u8 *pad = payload_addr(&ack_pkt) + payload; + + memset(pad, 0, bth_pad(&ack_pkt)); + icrc = sw_crc32(sw, icrc, pad, bth_pad(&ack_pkt)); + } + p = payload_addr(&ack_pkt) + payload + bth_pad(&ack_pkt); + *p = ~icrc; + + err = sw_xmit_packet(qp, &ack_pkt, skb); + if (err) { + pr_err("Failed sending RDMA reply.\n"); + return RESPST_ERR_RNR; + } + + res->read.va += payload; + res->read.resid -= payload; + res->cur_psn = (res->cur_psn + 1) & BTH_PSN_MASK; + + if (res->read.resid > 0) { + state = RESPST_DONE; + } else { + qp->resp.res = NULL; + if (!res->replay) + qp->resp.opcode = -1; + if (psn_compare(res->cur_psn, qp->resp.psn) >= 0) + qp->resp.psn = res->cur_psn; + state = RESPST_CLEANUP; + } + + return state; +} + +static void build_rdma_network_hdr(union rdma_network_hdr *hdr, + struct sw_pkt_info *pkt) +{ + struct sk_buff *skb = PKT_TO_SKB(pkt); + + memset(hdr, 0, sizeof(*hdr)); + if (skb->protocol == htons(ETH_P_IP)) + memcpy(&hdr->roce4grh, ip_hdr(skb), sizeof(hdr->roce4grh)); + else if (skb->protocol == htons(ETH_P_IPV6)) + memcpy(&hdr->ibgrh, ipv6_hdr(skb), sizeof(hdr->ibgrh)); +} + +/* Executes a new request. A retried request never reach that function (send + * and writes are discarded, and reads and atomics are retried elsewhere. + */ +static enum resp_states execute(struct sw_qp *qp, struct sw_pkt_info *pkt) +{ + enum resp_states err; + + if (pkt->mask & SW_SEND_MASK) { + if (qp_type(qp) == IB_QPT_UD || + qp_type(qp) == IB_QPT_SMI || + qp_type(qp) == IB_QPT_GSI) { + union rdma_network_hdr hdr; + + build_rdma_network_hdr(&hdr, pkt); + + err = send_data_in(qp, &hdr, sizeof(hdr)); + if (err) + return err; + } + err = send_data_in(qp, payload_addr(pkt), payload_size(pkt)); + if (err) + return err; + } else if (pkt->mask & SW_WRITE_MASK) { + err = write_data_in(qp, pkt); + if (err) + return err; + } else if (pkt->mask & SW_READ_MASK) { + /* For RDMA Read we can increment the msn now. See C9-148. */ + qp->resp.msn++; + return RESPST_READ_REPLY; + } else if (pkt->mask & SW_ATOMIC_MASK) { + err = process_atomic(qp, pkt); + if (err) + return err; + } else { + /* Unreachable */ + WARN_ON_ONCE(1); + } + + /* next expected psn, read handles this separately */ + qp->resp.psn = (pkt->psn + 1) & BTH_PSN_MASK; + qp->resp.ack_psn = qp->resp.psn; + + qp->resp.opcode = pkt->opcode; + qp->resp.status = IB_WC_SUCCESS; + + if (pkt->mask & SW_COMP_MASK) { + /* We successfully processed this new request. */ + qp->resp.msn++; + return RESPST_COMPLETE; + } else if (qp_type(qp) == IB_QPT_RC) + return RESPST_ACKNOWLEDGE; + else + return RESPST_CLEANUP; +} + +static enum resp_states do_complete(struct sw_qp *qp, + struct sw_pkt_info *pkt) +{ + struct sw_cqe cqe; + struct ib_wc *wc = &cqe.ibwc; + struct ib_uverbs_wc *uwc = &cqe.uibwc; + struct sw_recv_wqe *wqe = qp->resp.wqe; + struct sw_dev *sw = to_rdev(qp->ibqp.device); + + if (unlikely(!wqe)) + return RESPST_CLEANUP; + + memset(&cqe, 0, sizeof(cqe)); + + if (qp->rcq->is_user) { + uwc->status = qp->resp.status; + uwc->qp_num = qp->ibqp.qp_num; + uwc->wr_id = wqe->wr_id; + } else { + wc->status = qp->resp.status; + wc->qp = &qp->ibqp; + wc->wr_id = wqe->wr_id; + } + + if (wc->status == IB_WC_SUCCESS) { + sw_counter_inc(sw, SW_CNT_RDMA_RECV); + wc->opcode = (pkt->mask & SW_IMMDT_MASK && + pkt->mask & SW_WRITE_MASK) ? + IB_WC_RECV_RDMA_WITH_IMM : IB_WC_RECV; + wc->vendor_err = 0; + wc->byte_len = (pkt->mask & SW_IMMDT_MASK && + pkt->mask & SW_WRITE_MASK) ? + qp->resp.length : wqe->dma.length - wqe->dma.resid; + + /* fields after byte_len are different between kernel and user + * space + */ + if (qp->rcq->is_user) { + uwc->wc_flags = IB_WC_GRH; + + if (pkt->mask & SW_IMMDT_MASK) { + uwc->wc_flags |= IB_WC_WITH_IMM; + uwc->ex.imm_data = immdt_imm(pkt); + } + + if (pkt->mask & SW_IETH_MASK) { + uwc->wc_flags |= IB_WC_WITH_INVALIDATE; + uwc->ex.invalidate_rkey = ieth_rkey(pkt); + } + + uwc->qp_num = qp->ibqp.qp_num; + + if (pkt->mask & SW_DETH_MASK) + uwc->src_qp = deth_sqp(pkt); + + uwc->port_num = qp->attr.port_num; + } else { + struct sk_buff *skb = PKT_TO_SKB(pkt); + + wc->wc_flags = IB_WC_GRH | IB_WC_WITH_NETWORK_HDR_TYPE; + if (skb->protocol == htons(ETH_P_IP)) + wc->network_hdr_type = RDMA_NETWORK_IPV4; + else + wc->network_hdr_type = RDMA_NETWORK_IPV6; + + if (is_vlan_dev(skb->dev)) { + wc->wc_flags |= IB_WC_WITH_VLAN; + wc->vlan_id = vlan_dev_vlan_id(skb->dev); + } + + if (pkt->mask & SW_IMMDT_MASK) { + wc->wc_flags |= IB_WC_WITH_IMM; + wc->ex.imm_data = immdt_imm(pkt); + } + + if (pkt->mask & SW_IETH_MASK) { + struct sw_mem *rmr; + + wc->wc_flags |= IB_WC_WITH_INVALIDATE; + wc->ex.invalidate_rkey = ieth_rkey(pkt); + + rmr = sw_pool_get_index(&sw->mr_pool, + wc->ex.invalidate_rkey >> 8); + if (unlikely(!rmr)) { + pr_err("Bad rkey %#x invalidation\n", + wc->ex.invalidate_rkey); + return RESPST_ERROR; + } + rmr->state = SW_MEM_STATE_FREE; + sw_drop_ref(rmr); + } + + wc->qp = &qp->ibqp; + + if (pkt->mask & SW_DETH_MASK) + wc->src_qp = deth_sqp(pkt); + + wc->port_num = qp->attr.port_num; + } + } + + /* have copy for srq and reference for !srq */ + if (!qp->srq) + advance_consumer(qp->rq.queue); + + qp->resp.wqe = NULL; + + if (sw_cq_post(qp->rcq, &cqe, pkt ? bth_se(pkt) : 1)) + return RESPST_ERR_CQ_OVERFLOW; + + if (qp->resp.state == QP_STATE_ERROR) + return RESPST_CHK_RESOURCE; + + if (!pkt) + return RESPST_DONE; + else if (qp_type(qp) == IB_QPT_RC) + return RESPST_ACKNOWLEDGE; + else + return RESPST_CLEANUP; +} + +static int send_ack(struct sw_qp *qp, struct sw_pkt_info *pkt, + u8 syndrome, u32 psn) +{ + int err = 0; + struct sw_pkt_info ack_pkt; + struct sk_buff *skb; + + skb = prepare_ack_packet(qp, pkt, &ack_pkt, IB_OPCODE_RC_ACKNOWLEDGE, + 0, psn, syndrome, NULL); + if (!skb) { + err = -ENOMEM; + goto err1; + } + + err = sw_xmit_packet(qp, &ack_pkt, skb); + if (err) + pr_err_ratelimited("Failed sending ack\n"); + +err1: + return err; +} + +static int send_atomic_ack(struct sw_qp *qp, struct sw_pkt_info *pkt, + u8 syndrome) +{ + int rc = 0; + struct sw_pkt_info ack_pkt; + struct sk_buff *skb; + struct resp_res *res; + + skb = prepare_ack_packet(qp, pkt, &ack_pkt, + IB_OPCODE_RC_ATOMIC_ACKNOWLEDGE, 0, pkt->psn, + syndrome, NULL); + if (!skb) { + rc = -ENOMEM; + goto out; + } + + res = &qp->resp.resources[qp->resp.res_head]; + sw_free_rd_atomic_resource(qp, res); + sw_advance_resp_resource(qp); + + memcpy(SKB_TO_PKT(skb), &ack_pkt, sizeof(ack_pkt)); + memset((unsigned char *)SKB_TO_PKT(skb) + sizeof(ack_pkt), 0, + sizeof(skb->cb) - sizeof(ack_pkt)); + + skb_get(skb); + res->type = SW_ATOMIC_MASK; + res->atomic.skb = skb; + res->first_psn = ack_pkt.psn; + res->last_psn = ack_pkt.psn; + res->cur_psn = ack_pkt.psn; + + rc = sw_xmit_packet(qp, &ack_pkt, skb); + if (rc) { + pr_err_ratelimited("Failed sending ack\n"); + sw_drop_ref(qp); + } +out: + return rc; +} + +static enum resp_states acknowledge(struct sw_qp *qp, + struct sw_pkt_info *pkt) +{ + if (qp_type(qp) != IB_QPT_RC) + return RESPST_CLEANUP; + + if (qp->resp.aeth_syndrome != AETH_ACK_UNLIMITED) + send_ack(qp, pkt, qp->resp.aeth_syndrome, pkt->psn); + else if (pkt->mask & SW_ATOMIC_MASK) + send_atomic_ack(qp, pkt, AETH_ACK_UNLIMITED); + else if (bth_ack(pkt)) + send_ack(qp, pkt, AETH_ACK_UNLIMITED, pkt->psn); + + return RESPST_CLEANUP; +} + +static enum resp_states cleanup(struct sw_qp *qp, + struct sw_pkt_info *pkt) +{ + struct sk_buff *skb; + + if (pkt) { + skb = skb_dequeue(&qp->req_pkts); + sw_drop_ref(qp); + kfree_skb(skb); + } + + if (qp->resp.mr) { + sw_drop_ref(qp->resp.mr); + qp->resp.mr = NULL; + } + + return RESPST_DONE; +} + +static struct resp_res *find_resource(struct sw_qp *qp, u32 psn) +{ + int i; + + for (i = 0; i < qp->attr.max_dest_rd_atomic; i++) { + struct resp_res *res = &qp->resp.resources[i]; + + if (res->type == 0) + continue; + + if (psn_compare(psn, res->first_psn) >= 0 && + psn_compare(psn, res->last_psn) <= 0) { + return res; + } + } + + return NULL; +} + +static enum resp_states duplicate_request(struct sw_qp *qp, + struct sw_pkt_info *pkt) +{ + enum resp_states rc; + u32 prev_psn = (qp->resp.ack_psn - 1) & BTH_PSN_MASK; + + if (pkt->mask & SW_SEND_MASK || + pkt->mask & SW_WRITE_MASK) { + /* SEND. Ack again and cleanup. C9-105. */ + if (bth_ack(pkt)) + send_ack(qp, pkt, AETH_ACK_UNLIMITED, prev_psn); + rc = RESPST_CLEANUP; + goto out; + } else if (pkt->mask & SW_READ_MASK) { + struct resp_res *res; + + res = find_resource(qp, pkt->psn); + if (!res) { + /* Resource not found. Class D error. Drop the + * request. + */ + rc = RESPST_CLEANUP; + goto out; + } else { + /* Ensure this new request is the same as the previous + * one or a subset of it. + */ + u64 iova = reth_va(pkt); + u32 resid = reth_len(pkt); + + if (iova < res->read.va_org || + resid > res->read.length || + (iova + resid) > (res->read.va_org + + res->read.length)) { + rc = RESPST_CLEANUP; + goto out; + } + + if (reth_rkey(pkt) != res->read.rkey) { + rc = RESPST_CLEANUP; + goto out; + } + + res->cur_psn = pkt->psn; + res->state = (pkt->psn == res->first_psn) ? + rdatm_res_state_new : + rdatm_res_state_replay; + res->replay = 1; + + /* Reset the resource, except length. */ + res->read.va_org = iova; + res->read.va = iova; + res->read.resid = resid; + + /* Replay the RDMA read reply. */ + qp->resp.res = res; + rc = RESPST_READ_REPLY; + goto out; + } + } else { + struct resp_res *res; + + /* Find the operation in our list of responder resources. */ + res = find_resource(qp, pkt->psn); + if (res) { + skb_get(res->atomic.skb); + /* Resend the result. */ + rc = sw_xmit_packet(qp, pkt, res->atomic.skb); + if (rc) { + pr_err("Failed resending result. This flow is not handled - skb ignored\n"); + rc = RESPST_CLEANUP; + goto out; + } + } + + /* Resource not found. Class D error. Drop the request. */ + rc = RESPST_CLEANUP; + goto out; + } +out: + return rc; +} + +/* Process a class A or C. Both are treated the same in this implementation. */ +static void do_class_ac_error(struct sw_qp *qp, u8 syndrome, + enum ib_wc_status status) +{ + qp->resp.aeth_syndrome = syndrome; + qp->resp.status = status; + + /* indicate that we should go through the ERROR state */ + qp->resp.goto_error = 1; +} + +static enum resp_states do_class_d1e_error(struct sw_qp *qp) +{ + /* UC */ + if (qp->srq) { + /* Class E */ + qp->resp.drop_msg = 1; + if (qp->resp.wqe) { + qp->resp.status = IB_WC_REM_INV_REQ_ERR; + return RESPST_COMPLETE; + } else { + return RESPST_CLEANUP; + } + } else { + /* Class D1. This packet may be the start of a + * new message and could be valid. The previous + * message is invalid and ignored. reset the + * recv wr to its original state + */ + if (qp->resp.wqe) { + qp->resp.wqe->dma.resid = qp->resp.wqe->dma.length; + qp->resp.wqe->dma.cur_sge = 0; + qp->resp.wqe->dma.sge_offset = 0; + qp->resp.opcode = -1; + } + + if (qp->resp.mr) { + sw_drop_ref(qp->resp.mr); + qp->resp.mr = NULL; + } + + return RESPST_CLEANUP; + } +} + +static void sw_drain_req_pkts(struct sw_qp *qp, bool notify) +{ + struct sk_buff *skb; + + while ((skb = skb_dequeue(&qp->req_pkts))) { + sw_drop_ref(qp); + kfree_skb(skb); + } + + if (notify) + return; + + while (!qp->srq && qp->rq.queue && queue_head(qp->rq.queue)) + advance_consumer(qp->rq.queue); +} + +int sw_responder(void *arg) +{ + struct sw_qp *qp = (struct sw_qp *)arg; + struct sw_dev *sw = to_rdev(qp->ibqp.device); + enum resp_states state; + struct sw_pkt_info *pkt = NULL; + int ret = 0; + + sw_add_ref(qp); + + qp->resp.aeth_syndrome = AETH_ACK_UNLIMITED; + + if (!qp->valid) { + ret = -EINVAL; + goto done; + } + + switch (qp->resp.state) { + case QP_STATE_RESET: + state = RESPST_RESET; + break; + + default: + state = RESPST_GET_REQ; + break; + } + + while (1) { + pr_debug("qp#%d state = %s\n", qp_num(qp), + resp_state_name[state]); + switch (state) { + case RESPST_GET_REQ: + state = get_req(qp, &pkt); + break; + case RESPST_CHK_PSN: + state = check_psn(qp, pkt); + break; + case RESPST_CHK_OP_SEQ: + state = check_op_seq(qp, pkt); + break; + case RESPST_CHK_OP_VALID: + state = check_op_valid(qp, pkt); + break; + case RESPST_CHK_RESOURCE: + state = check_resource(qp, pkt); + break; + case RESPST_CHK_LENGTH: + state = check_length(qp, pkt); + break; + case RESPST_CHK_RKEY: + state = check_rkey(qp, pkt); + break; + case RESPST_EXECUTE: + state = execute(qp, pkt); + break; + case RESPST_COMPLETE: + state = do_complete(qp, pkt); + break; + case RESPST_READ_REPLY: + state = read_reply(qp, pkt); + break; + case RESPST_ACKNOWLEDGE: + state = acknowledge(qp, pkt); + break; + case RESPST_CLEANUP: + state = cleanup(qp, pkt); + break; + case RESPST_DUPLICATE_REQUEST: + state = duplicate_request(qp, pkt); + break; + case RESPST_ERR_PSN_OUT_OF_SEQ: + /* RC only - Class B. Drop packet. */ + send_ack(qp, pkt, AETH_NAK_PSN_SEQ_ERROR, qp->resp.psn); + state = RESPST_CLEANUP; + break; + + case RESPST_ERR_TOO_MANY_RDMA_ATM_REQ: + case RESPST_ERR_MISSING_OPCODE_FIRST: + case RESPST_ERR_MISSING_OPCODE_LAST_C: + case RESPST_ERR_UNSUPPORTED_OPCODE: + case RESPST_ERR_MISALIGNED_ATOMIC: + /* RC Only - Class C. */ + do_class_ac_error(qp, AETH_NAK_INVALID_REQ, + IB_WC_REM_INV_REQ_ERR); + state = RESPST_COMPLETE; + break; + + case RESPST_ERR_MISSING_OPCODE_LAST_D1E: + state = do_class_d1e_error(qp); + break; + case RESPST_ERR_RNR: + if (qp_type(qp) == IB_QPT_RC) { + sw_counter_inc(sw, SW_CNT_SND_RNR); + /* RC - class B */ + send_ack(qp, pkt, AETH_RNR_NAK | + (~AETH_TYPE_MASK & + qp->attr.min_rnr_timer), + pkt->psn); + } else { + /* UD/UC - class D */ + qp->resp.drop_msg = 1; + } + state = RESPST_CLEANUP; + break; + + case RESPST_ERR_RKEY_VIOLATION: + if (qp_type(qp) == IB_QPT_RC) { + /* Class C */ + do_class_ac_error(qp, AETH_NAK_REM_ACC_ERR, + IB_WC_REM_ACCESS_ERR); + state = RESPST_COMPLETE; + } else { + qp->resp.drop_msg = 1; + if (qp->srq) { + /* UC/SRQ Class D */ + qp->resp.status = IB_WC_REM_ACCESS_ERR; + state = RESPST_COMPLETE; + } else { + /* UC/non-SRQ Class E. */ + state = RESPST_CLEANUP; + } + } + break; + + case RESPST_ERR_LENGTH: + if (qp_type(qp) == IB_QPT_RC) { + /* Class C */ + do_class_ac_error(qp, AETH_NAK_INVALID_REQ, + IB_WC_REM_INV_REQ_ERR); + state = RESPST_COMPLETE; + } else if (qp->srq) { + /* UC/UD - class E */ + qp->resp.status = IB_WC_REM_INV_REQ_ERR; + state = RESPST_COMPLETE; + } else { + /* UC/UD - class D */ + qp->resp.drop_msg = 1; + state = RESPST_CLEANUP; + } + break; + + case RESPST_ERR_MALFORMED_WQE: + /* All, Class A. */ + do_class_ac_error(qp, AETH_NAK_REM_OP_ERR, + IB_WC_LOC_QP_OP_ERR); + state = RESPST_COMPLETE; + break; + + case RESPST_ERR_CQ_OVERFLOW: + /* All - Class G */ + state = RESPST_ERROR; + break; + + case RESPST_DONE: + if (qp->resp.goto_error) { + state = RESPST_ERROR; + break; + } + + goto done; + + case RESPST_EXIT: + if (qp->resp.goto_error) { + state = RESPST_ERROR; + break; + } + + goto exit; + + case RESPST_RESET: + sw_drain_req_pkts(qp, false); + qp->resp.wqe = NULL; + goto exit; + + case RESPST_ERROR: + qp->resp.goto_error = 0; + pr_warn("qp#%d moved to error state\n", qp_num(qp)); + sw_qp_error(qp); + goto exit; + + default: + WARN_ON_ONCE(1); + } + } + +exit: + ret = -EAGAIN; +done: + sw_drop_ref(qp); + return ret; +} diff --git a/drivers/infiniband/hw/erdma/compat/sw_task.c b/drivers/infiniband/hw/erdma/compat/sw_task.c new file mode 100644 index 0000000000000000000000000000000000000000..19eaf4465b93b5d12ef50056fb6facddfa1acdce --- /dev/null +++ b/drivers/infiniband/hw/erdma/compat/sw_task.c @@ -0,0 +1,148 @@ +// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB +/* + * Copyright (c) 2016 Mellanox Technologies Ltd. All rights reserved. + * Copyright (c) 2015 System Fabric Works, Inc. All rights reserved. + */ + +#include +#include +#include + +#include "sw_task.h" + +int __sw_do_task(struct sw_task *task) + +{ + int ret; + + while ((ret = task->func(task->arg)) == 0) + ; + + task->ret = ret; + + return ret; +} + +/* + * this locking is due to a potential race where + * a second caller finds the task already running + * but looks just after the last call to func + */ +void sw_do_task(unsigned long data) +{ + int cont; + int ret; + unsigned long flags; + struct sw_task *task = (struct sw_task *)data; + + spin_lock_irqsave(&task->state_lock, flags); + switch (task->state) { + case TASK_STATE_START: + task->state = TASK_STATE_BUSY; + spin_unlock_irqrestore(&task->state_lock, flags); + break; + + case TASK_STATE_BUSY: + task->state = TASK_STATE_ARMED; +#ifdef fallthrough + fallthrough; +#endif + case TASK_STATE_ARMED: + spin_unlock_irqrestore(&task->state_lock, flags); + return; + + default: + spin_unlock_irqrestore(&task->state_lock, flags); + pr_warn("%s failed with bad state %d\n", __func__, task->state); + return; + } + + do { + cont = 0; + ret = task->func(task->arg); + + spin_lock_irqsave(&task->state_lock, flags); + switch (task->state) { + case TASK_STATE_BUSY: + if (ret) + task->state = TASK_STATE_START; + else + cont = 1; + break; + + /* soneone tried to run the task since the last time we called + * func, so we will call one more time regardless of the + * return value + */ + case TASK_STATE_ARMED: + task->state = TASK_STATE_BUSY; + cont = 1; + break; + + default: + pr_warn("%s failed with bad state %d\n", __func__, + task->state); + } + spin_unlock_irqrestore(&task->state_lock, flags); + } while (cont); + + task->ret = ret; +} + +int sw_init_task(void *obj, struct sw_task *task, + void *arg, int (*func)(void *), char *name) +{ + task->obj = obj; + task->arg = arg; + task->func = func; + snprintf(task->name, sizeof(task->name), "%s", name); + task->destroyed = false; + + tasklet_init(&task->tasklet, sw_do_task, (unsigned long)task); + + task->state = TASK_STATE_START; + spin_lock_init(&task->state_lock); + + return 0; +} + +void sw_cleanup_task(struct sw_task *task) +{ + unsigned long flags; + bool idle; + + /* + * Mark the task, then wait for it to finish. It might be + * running in a non-tasklet (direct call) context. + */ + task->destroyed = true; + + do { + spin_lock_irqsave(&task->state_lock, flags); + idle = (task->state == TASK_STATE_START); + spin_unlock_irqrestore(&task->state_lock, flags); + } while (!idle); + + tasklet_kill(&task->tasklet); +} + +void sw_run_task(struct sw_task *task, int sched) +{ + if (task->destroyed) + return; + + if (sched) + tasklet_schedule(&task->tasklet); + else + sw_do_task((unsigned long)task); +} + +void sw_disable_task(struct sw_task *task) +{ + tasklet_disable(&task->tasklet); +} + +void sw_enable_task(struct sw_task *task) +{ + tasklet_enable(&task->tasklet); +} diff --git a/drivers/infiniband/hw/erdma/compat/sw_task.h b/drivers/infiniband/hw/erdma/compat/sw_task.h new file mode 100644 index 0000000000000000000000000000000000000000..468df0453470a9cbca35ce44bc72a623d4795f73 --- /dev/null +++ b/drivers/infiniband/hw/erdma/compat/sw_task.h @@ -0,0 +1,69 @@ +/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */ +/* + * Copyright (c) 2016 Mellanox Technologies Ltd. All rights reserved. + * Copyright (c) 2015 System Fabric Works, Inc. All rights reserved. + */ + +#ifndef SW_TASK_H +#define SW_TASK_H + +enum { + TASK_STATE_START = 0, + TASK_STATE_BUSY = 1, + TASK_STATE_ARMED = 2, +}; + +/* + * data structure to describe a 'task' which is a short + * function that returns 0 as long as it needs to be + * called again. + */ +struct sw_task { + void *obj; + struct tasklet_struct tasklet; + int state; + spinlock_t state_lock; /* spinlock for task state */ + void *arg; + int (*func)(void *arg); + int ret; + char name[16]; + bool destroyed; +}; + +/* + * init sw_task structure + * arg => parameter to pass to fcn + * func => function to call until it returns != 0 + */ +int sw_init_task(void *obj, struct sw_task *task, + void *arg, int (*func)(void *), char *name); + +/* cleanup task */ +void sw_cleanup_task(struct sw_task *task); + +/* + * raw call to func in loop without any checking + * can call when tasklets are disabled + */ +int __sw_do_task(struct sw_task *task); + +/* + * common function called by any of the main tasklets + * If there is any chance that there is additional + * work to do someone must reschedule the task before + * leaving + */ +void sw_do_task(unsigned long data); + +/* run a task, else schedule it to run as a tasklet, The decision + * to run or schedule tasklet is based on the parameter sched. + */ +void sw_run_task(struct sw_task *task, int sched); + +/* keep a task from scheduling */ +void sw_disable_task(struct sw_task *task); + +/* allow task to run */ +void sw_enable_task(struct sw_task *task); + +#endif /* SW_TASK_H */ diff --git a/drivers/infiniband/hw/erdma/compat/sw_verbs.c b/drivers/infiniband/hw/erdma/compat/sw_verbs.c new file mode 100644 index 0000000000000000000000000000000000000000..1a56603e808f9a2ad5c557888594f6f0dfa095ed --- /dev/null +++ b/drivers/infiniband/hw/erdma/compat/sw_verbs.c @@ -0,0 +1,448 @@ +// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB +/* + * Copyright (c) 2016 Mellanox Technologies Ltd. All rights reserved. + * Copyright (c) 2015 System Fabric Works, Inc. All rights reserved. + */ + +#include +#include +#include +#include "sw.h" +#include "sw_loc.h" +#include "sw_queue.h" +#include "sw_hw_counters.h" +#include "../erdma_verbs.h" + +int sw_alloc_pd(struct ib_pd *ibpd, struct ib_udata *udata) +{ + struct sw_dev *sw = to_rdev(ibpd->device); + struct sw_pd *pd = to_rpd(ibpd); + + return sw_add_to_pool(&sw->pd_pool, &pd->pelem); +} + +int sw_dealloc_pd(struct ib_pd *ibpd, struct ib_udata *udata) +{ + struct sw_pd *pd = to_rpd(ibpd); + + sw_drop_ref(pd); + return 0; +} + +int sw_create_ah(struct ib_ah *ibah, + struct rdma_ah_attr *attr, + struct ib_udata *udata) + +{ + int err; + struct erdma_dev *dev = to_edev(ibah->device); + struct sw_dev *sw = &dev->sw_dev; + struct sw_ah *ah = to_rah(ibah); + + err = sw_av_chk_attr(sw, attr); + if (err) + return err; + + err = sw_add_to_pool(&sw->ah_pool, &ah->pelem); + if (err) + return err; + + sw_init_av(&dev->ibdev, attr, &ah->av); + return 0; +} + +static int post_one_recv(struct sw_rq *rq, const struct ib_recv_wr *ibwr) +{ + int err; + int i; + u32 length; + struct sw_recv_wqe *recv_wqe; + int num_sge = ibwr->num_sge; + + if (unlikely(queue_full(rq->queue))) { + err = -ENOMEM; + goto err1; + } + + if (unlikely(num_sge > rq->max_sge)) { + err = -EINVAL; + goto err1; + } + + length = 0; + for (i = 0; i < num_sge; i++) + length += ibwr->sg_list[i].length; + + recv_wqe = producer_addr(rq->queue); + recv_wqe->wr_id = ibwr->wr_id; + recv_wqe->num_sge = num_sge; + + memcpy(recv_wqe->dma.sge, ibwr->sg_list, + num_sge * sizeof(struct ib_sge)); + + recv_wqe->dma.length = length; + recv_wqe->dma.resid = length; + recv_wqe->dma.num_sge = num_sge; + recv_wqe->dma.cur_sge = 0; + recv_wqe->dma.sge_offset = 0; + + /* make sure all changes to the work queue are written before we + * update the producer pointer + */ + smp_wmb(); + + advance_producer(rq->queue); + return 0; + +err1: + return err; +} + +int sw_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, + int mask, struct ib_udata *udata) +{ + int err; + struct sw_dev *sw = to_rdev(ibqp->device); + struct sw_qp *qp = to_rqp(ibqp); + + err = sw_qp_chk_attr(sw, qp, attr, mask); + if (err) + goto err1; + + err = sw_qp_from_attr(qp, attr, mask, udata); + if (err) + goto err1; + + return 0; + +err1: + return err; +} +static int validate_send_wr(struct sw_qp *qp, const struct ib_send_wr *ibwr, + unsigned int mask, unsigned int length) +{ + int num_sge = ibwr->num_sge; + struct sw_sq *sq = &qp->sq; + + if (unlikely(num_sge > sq->max_sge)) + goto err1; + + if (unlikely(mask & WR_ATOMIC_MASK)) { + if (length < 8) + goto err1; + + if (atomic_wr(ibwr)->remote_addr & 0x7) + goto err1; + } + + if (unlikely((ibwr->send_flags & IB_SEND_INLINE) && + (length > sq->max_inline))) + goto err1; + + return 0; + +err1: + return -EINVAL; +} + +static void init_send_wr(struct sw_qp *qp, struct sw_send_wr *wr, + const struct ib_send_wr *ibwr) +{ + wr->wr_id = ibwr->wr_id; + wr->num_sge = ibwr->num_sge; + wr->opcode = ibwr->opcode; + wr->send_flags = ibwr->send_flags; + + if (qp_type(qp) == IB_QPT_GSI) { + wr->wr.ud.remote_qpn = ud_wr(ibwr)->remote_qpn; + wr->wr.ud.remote_qkey = ud_wr(ibwr)->remote_qkey; + wr->wr.ud.pkey_index = ud_wr(ibwr)->pkey_index; + if (wr->opcode == IB_WR_SEND_WITH_IMM) + wr->ex.imm_data = ibwr->ex.imm_data; + } +} + +static int init_send_wqe(struct sw_qp *qp, const struct ib_send_wr *ibwr, + unsigned int mask, unsigned int length, + struct sw_send_wqe *wqe) +{ + int num_sge = ibwr->num_sge; + struct ib_sge *sge; + int i; + u8 *p; + + init_send_wr(qp, &wqe->wr, ibwr); + + if (qp_type(qp) == IB_QPT_UD || + qp_type(qp) == IB_QPT_SMI || + qp_type(qp) == IB_QPT_GSI) + memcpy(&wqe->av, &to_rah(ud_wr(ibwr)->ah)->av, sizeof(wqe->av)); + + if (unlikely(ibwr->send_flags & IB_SEND_INLINE)) { + p = wqe->dma.inline_data; + + sge = ibwr->sg_list; + for (i = 0; i < num_sge; i++, sge++) { + memcpy(p, (void *)(uintptr_t)sge->addr, + sge->length); + + p += sge->length; + } + } else { + if (mask & WR_REG_MASK) { + wqe->mask = mask; + wqe->state = wqe_state_posted; + return 0; + } + memcpy(wqe->dma.sge, ibwr->sg_list, + num_sge * sizeof(struct ib_sge)); + } + + wqe->iova = mask & WR_ATOMIC_MASK ? atomic_wr(ibwr)->remote_addr : + mask & WR_READ_OR_WRITE_MASK ? rdma_wr(ibwr)->remote_addr : 0; + wqe->mask = mask; + wqe->dma.length = length; + wqe->dma.resid = length; + wqe->dma.num_sge = num_sge; + wqe->dma.cur_sge = 0; + wqe->dma.sge_offset = 0; + wqe->state = wqe_state_posted; + wqe->ssn = atomic_add_return(1, &qp->ssn); + + return 0; +} + +static int post_one_send(struct sw_qp *qp, const struct ib_send_wr *ibwr, + unsigned int mask, u32 length) +{ + int err; + struct sw_sq *sq = &qp->sq; + struct sw_send_wqe *send_wqe; + unsigned long flags; + + err = validate_send_wr(qp, ibwr, mask, length); + if (err) + return err; + + spin_lock_irqsave(&qp->sq.sq_lock, flags); + + if (unlikely(queue_full(sq->queue))) { + err = -ENOMEM; + goto err1; + } + + send_wqe = producer_addr(sq->queue); + + err = init_send_wqe(qp, ibwr, mask, length, send_wqe); + if (unlikely(err)) + goto err1; + + /* + * make sure all changes to the work queue are + * written before we update the producer pointer + */ + smp_wmb(); + + advance_producer(sq->queue); + spin_unlock_irqrestore(&qp->sq.sq_lock, flags); + + return 0; + +err1: + spin_unlock_irqrestore(&qp->sq.sq_lock, flags); + return err; +} + +static int sw_post_send_kernel(struct sw_qp *qp, const struct ib_send_wr *wr, + const struct ib_send_wr **bad_wr) +{ + unsigned int length = 0, mask; + int err = 0, i; + struct ib_send_wr *next; + + while (wr) { + mask = wr_opcode_mask(wr->opcode, qp); + if (unlikely(!mask)) { + err = -EINVAL; + *bad_wr = wr; + break; + } + + if (unlikely((wr->send_flags & IB_SEND_INLINE) && + !(mask & WR_INLINE_MASK))) { + err = -EINVAL; + *bad_wr = wr; + break; + } + + next = wr->next; + + length = 0; + for (i = 0; i < wr->num_sge; i++) + length += wr->sg_list[i].length; + + err = post_one_send(qp, wr, mask, length); + if (err) { + *bad_wr = wr; + break; + } + wr = next; + } + + sw_run_task(&qp->req.task, 1); + if (unlikely(qp->req.state == QP_STATE_ERROR)) + sw_run_task(&qp->comp.task, 1); + + return err; +} + +int sw_post_send(struct ib_qp *ibqp, const struct ib_send_wr *wr, + const struct ib_send_wr **bad_wr) +{ + struct sw_qp *qp = to_rqp(ibqp); + + if (unlikely(!qp->valid)) { + *bad_wr = wr; + return -EINVAL; + } + + if (unlikely(qp->req.state < QP_STATE_READY)) { + *bad_wr = wr; + return -EINVAL; + } + + return sw_post_send_kernel(qp, wr, bad_wr); +} + +int sw_post_recv(struct ib_qp *ibqp, const struct ib_recv_wr *wr, + const struct ib_recv_wr **bad_wr) +{ + int err = 0; + struct sw_qp *qp = to_rqp(ibqp); + struct sw_rq *rq = &qp->rq; + unsigned long flags; + + if (unlikely((qp_state(qp) < IB_QPS_INIT) || !qp->valid)) { + *bad_wr = wr; + err = -EINVAL; + goto err1; + } + + if (unlikely(qp->srq)) { + *bad_wr = wr; + err = -EINVAL; + goto err1; + } + + spin_lock_irqsave(&rq->producer_lock, flags); + + while (wr) { + err = post_one_recv(rq, wr); + if (unlikely(err)) { + *bad_wr = wr; + break; + } + wr = wr->next; + } + + spin_unlock_irqrestore(&rq->producer_lock, flags); + + if (qp->resp.state == QP_STATE_ERROR) + sw_run_task(&qp->resp.task, 1); + +err1: + return err; +} + +int sw_create_cq(struct ib_cq *ibcq, const struct ib_cq_init_attr *attr, + struct ib_udata *udata) +{ + int err; + struct ib_device *dev = ibcq->device; + struct sw_dev *sw = to_rdev(dev); + struct sw_cq *cq = to_rcq(ibcq); + struct sw_create_cq_resp __user *uresp = NULL; + + if (udata) + return -EINVAL; + + if (attr->flags) + return -EINVAL; + + err = sw_cq_chk_attr(sw, NULL, attr->cqe, attr->comp_vector); + if (err) + return err; + + err = sw_cq_from_init(sw, cq, attr->cqe, attr->comp_vector, udata, + uresp); + if (err) + return err; + + return sw_add_to_pool(&sw->cq_pool, &cq->pelem); +} + +int sw_destroy_cq(struct ib_cq *ibcq, struct ib_udata *udata) +{ + struct sw_cq *cq = to_rcq(ibcq); + + sw_cq_disable(cq); + + sw_drop_ref(cq); + return 0; +} + +int sw_poll_cq(struct ib_cq *ibcq, int num_entries, struct ib_wc *wc) +{ + int i; + struct sw_cq *cq = to_rcq(ibcq); + struct sw_cqe *cqe; + unsigned long flags; + + spin_lock_irqsave(&cq->cq_lock, flags); + for (i = 0; i < num_entries; i++) { + cqe = queue_head(cq->queue); + if (!cqe) + break; + + memcpy(wc++, &cqe->ibwc, sizeof(*wc)); + advance_consumer(cq->queue); + } + spin_unlock_irqrestore(&cq->cq_lock, flags); + + return i; +} + +int sw_req_notify_cq(struct ib_cq *ibcq, enum ib_cq_notify_flags flags) +{ + struct sw_cq *cq = to_rcq(ibcq); + unsigned long irq_flags; + int ret = 0; + + spin_lock_irqsave(&cq->cq_lock, irq_flags); + if (cq->notify != IB_CQ_NEXT_COMP) + cq->notify = flags & IB_CQ_SOLICITED_MASK; + + if ((flags & IB_CQ_REPORT_MISSED_EVENTS) && !queue_empty(cq->queue)) + ret = 1; + + spin_unlock_irqrestore(&cq->cq_lock, irq_flags); + + return ret; +} + +struct ib_mr *sw_get_dma_mr(struct ib_pd *ibpd, int access) +{ + struct sw_dev *sw = to_rdev(ibpd->device); + struct sw_pd *pd = to_rpd(ibpd); + struct sw_mem *mr; + + mr = sw_alloc(&sw->mr_pool); + if (!mr) + return ERR_PTR(-ENOMEM); + + sw_add_index(mr); + sw_add_ref(pd); + sw_mem_init_dma(pd, access, mr); + + return &mr->ibmr; +} diff --git a/drivers/infiniband/hw/erdma/compat/sw_verbs.h b/drivers/infiniband/hw/erdma/compat/sw_verbs.h new file mode 100644 index 0000000000000000000000000000000000000000..4411bc264267cb6cf7c8640a559835b9687d9e0a --- /dev/null +++ b/drivers/infiniband/hw/erdma/compat/sw_verbs.h @@ -0,0 +1,483 @@ +/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */ +/* + * Copyright (c) 2016 Mellanox Technologies Ltd. All rights reserved. + * Copyright (c) 2015 System Fabric Works, Inc. All rights reserved. + */ + +#ifndef SW_VERBS_H +#define SW_VERBS_H + +#include +#include +#include "rdma_user_sw.h" +#include "sw_pool.h" +#include "sw_task.h" +#include "sw_hw_counters.h" +/* lack */ +#include "sw_param.h" +#include "../kcompat.h" + +static inline int pkey_match(u16 key1, u16 key2) +{ + return (((key1 & 0x7fff) != 0) && + ((key1 & 0x7fff) == (key2 & 0x7fff)) && + ((key1 & 0x8000) || (key2 & 0x8000))) ? 1 : 0; +} + +/* Return >0 if psn_a > psn_b + * 0 if psn_a == psn_b + * <0 if psn_a < psn_b + */ +static inline int psn_compare(u32 psn_a, u32 psn_b) +{ + s32 diff; + + diff = (psn_a - psn_b) << 8; + return diff; +} + +struct sw_ucontext { + struct ib_ucontext ibuc; + struct sw_pool_entry pelem; +}; + +struct sw_pd { + struct ib_pd ibpd; + struct sw_pool_entry pelem; + struct ib_mr *internal_mr; +}; + +struct sw_ah { + struct ib_ah ibah; + struct sw_pool_entry pelem; + struct sw_pd *pd; + struct sw_av av; +}; + +struct sw_cqe { + union { + struct ib_wc ibwc; + struct ib_uverbs_wc uibwc; + }; +}; + +struct sw_cq { + struct ib_cq ibcq; + struct sw_pool_entry pelem; + struct sw_queue *queue; + spinlock_t cq_lock; + u8 notify; + bool is_dying; + int is_user; + struct tasklet_struct comp_task; + struct erdma_cq *master; +}; + +enum wqe_state { + wqe_state_posted, + wqe_state_processing, + wqe_state_pending, + wqe_state_done, + wqe_state_error, +}; + +struct sw_sq { + int max_wr; + int max_sge; + int max_inline; + spinlock_t sq_lock; /* guard queue */ + struct sw_queue *queue; +}; + +struct sw_rq { + int max_wr; + int max_sge; + spinlock_t producer_lock; /* guard queue producer */ + spinlock_t consumer_lock; /* guard queue consumer */ + struct sw_queue *queue; +}; + +struct sw_srq { + struct ib_srq ibsrq; + struct sw_pool_entry pelem; + struct sw_pd *pd; + struct sw_rq rq; + u32 srq_num; + + int limit; + int error; +}; + +enum sw_qp_state { + QP_STATE_RESET, + QP_STATE_INIT, + QP_STATE_READY, + QP_STATE_DRAIN, /* req only */ + QP_STATE_DRAINED, /* req only */ + QP_STATE_ERROR +}; + +struct sw_req_info { + enum sw_qp_state state; + int wqe_index; + u32 psn; + int opcode; + atomic_t rd_atomic; + int wait_fence; + int need_rd_atomic; + int wait_psn; + int need_retry; + int noack_pkts; + struct sw_task task; +}; + +struct sw_comp_info { + u32 psn; + int opcode; + int timeout; + int timeout_retry; + int started_retry; + u32 retry_cnt; + u32 rnr_retry; + struct sw_task task; +}; + +enum rdatm_res_state { + rdatm_res_state_next, + rdatm_res_state_new, + rdatm_res_state_replay, +}; + +struct resp_res { + int type; + int replay; + u32 first_psn; + u32 last_psn; + u32 cur_psn; + enum rdatm_res_state state; + + union { + struct { + struct sk_buff *skb; + } atomic; + struct { + struct sw_mem *mr; + u64 va_org; + u32 rkey; + u32 length; + u64 va; + u32 resid; + } read; + }; +}; + +struct sw_resp_info { + enum sw_qp_state state; + u32 msn; + u32 psn; + u32 ack_psn; + int opcode; + int drop_msg; + int goto_error; + int sent_psn_nak; + enum ib_wc_status status; + u8 aeth_syndrome; + + /* Receive only */ + struct sw_recv_wqe *wqe; + + /* RDMA read / atomic only */ + u64 va; + struct sw_mem *mr; + u32 resid; + u32 rkey; + u32 length; + u64 atomic_orig; + + /* SRQ only */ + struct { + struct sw_recv_wqe wqe; + struct ib_sge sge[SW_MAX_SGE]; + } srq_wqe; + + /* Responder resources. It's a circular list where the oldest + * resource is dropped first. + */ + struct resp_res *resources; + unsigned int res_head; + unsigned int res_tail; + struct resp_res *res; + struct sw_task task; +}; + +struct sw_qp { + struct sw_pool_entry pelem; + struct ib_qp ibqp; + struct ib_qp_attr attr; + unsigned int valid; + unsigned int mtu; + int is_user; + + struct erdma_qp *master; + struct sw_pd *pd; + struct sw_srq *srq; + struct sw_cq *scq; + struct sw_cq *rcq; + + enum ib_sig_type sq_sig_type; + + struct sw_sq sq; + struct sw_rq rq; + + struct socket *sk; + u32 dst_cookie; + u16 src_port; + + struct sw_av pri_av; + struct sw_av alt_av; + + /* list of mcast groups qp has joined (for cleanup) */ + struct list_head grp_list; + spinlock_t grp_lock; /* guard grp_list */ + + struct sk_buff_head req_pkts; + struct sk_buff_head resp_pkts; + struct sk_buff_head send_pkts; + + struct sw_req_info req; + struct sw_comp_info comp; + struct sw_resp_info resp; + + atomic_t ssn; + atomic_t skb_out; + int need_req_skb; + + /* Timer for retranmitting packet when ACKs have been lost. RC + * only. The requester sets it when it is not already + * started. The responder resets it whenever an ack is + * received. + */ + struct timer_list retrans_timer; + u64 qp_timeout_jiffies; + + /* Timer for handling RNR NAKS. */ + struct timer_list rnr_nak_timer; + + spinlock_t state_lock; /* guard requester and completer */ + + struct execute_work cleanup_work; +}; + +enum sw_mem_state { + SW_MEM_STATE_ZOMBIE, + SW_MEM_STATE_INVALID, + SW_MEM_STATE_FREE, + SW_MEM_STATE_VALID, +}; + +enum sw_mem_type { + SW_MEM_TYPE_NONE, + SW_MEM_TYPE_DMA, + SW_MEM_TYPE_MR, + SW_MEM_TYPE_FMR, + SW_MEM_TYPE_MW, +}; + +#define SW_BUF_PER_MAP (PAGE_SIZE / sizeof(struct sw_phys_buf)) + +struct sw_phys_buf { + u64 addr; + u64 size; +}; + +struct sw_map { + struct sw_phys_buf buf[SW_BUF_PER_MAP]; +}; + +struct sw_mem { + struct sw_pool_entry pelem; + union { + struct ib_mr ibmr; + struct ib_mw ibmw; + }; + + struct ib_umem *umem; + + enum sw_mem_state state; + enum sw_mem_type type; + u64 va; + u64 iova; + size_t length; + u32 offset; + int access; + + int page_shift; + int page_mask; + int map_shift; + int map_mask; + + u32 num_buf; + u32 nbuf; + + u32 max_buf; + u32 num_map; + + struct sw_map **map; +}; + +struct sw_mc_grp { + struct sw_pool_entry pelem; + spinlock_t mcg_lock; /* guard group */ + struct sw_dev *sw; + struct list_head qp_list; + union ib_gid mgid; + int num_qp; + u32 qkey; + u16 pkey; +}; + +struct sw_mc_elem { + struct sw_pool_entry pelem; + struct list_head qp_list; + struct list_head grp_list; + struct sw_qp *qp; + struct sw_mc_grp *grp; +}; + +struct sw_port { + struct ib_port_attr attr; + __be64 port_guid; + __be64 subnet_prefix; + spinlock_t port_lock; /* guard port */ + unsigned int mtu_cap; + /* special QPs */ + u32 qp_smi_index; + u32 qp_gsi_index; +}; + +struct sw_dev { + struct ib_device ib_dev; + struct ib_device_attr attr; + int max_ucontext; + int max_inline_data; + struct mutex usdev_lock; + + struct net_device *ndev; + struct erdma_dev *master; + + int xmit_errors; + + struct sw_pool uc_pool; + struct sw_pool pd_pool; + struct sw_pool ah_pool; + struct sw_pool srq_pool; + struct sw_pool qp_pool; + struct sw_pool cq_pool; + struct sw_pool mr_pool; + struct sw_pool mw_pool; + struct sw_pool mc_grp_pool; + struct sw_pool mc_elem_pool; + + spinlock_t pending_lock; /* guard pending_mmaps */ + struct list_head pending_mmaps; + + spinlock_t mmap_offset_lock; /* guard mmap_offset */ + u64 mmap_offset; + + atomic64_t stats_counters[SW_NUM_OF_COUNTERS]; + + struct sw_port port; + struct crypto_shash *tfm; +}; + +static inline void sw_counter_inc(struct sw_dev *sw, enum sw_counters index) +{ + atomic64_inc(&sw->stats_counters[index]); +} + +static inline struct sw_dev *to_rdev(struct ib_device *dev) +{ + return dev ? container_of(dev, struct sw_dev, ib_dev) : NULL; +} + +static inline struct sw_ucontext *to_ruc(struct ib_ucontext *uc) +{ + return uc ? container_of(uc, struct sw_ucontext, ibuc) : NULL; +} + +static inline struct sw_pd *to_rpd(struct ib_pd *pd) +{ + return pd ? container_of(pd, struct sw_pd, ibpd) : NULL; +} + +static inline struct sw_ah *to_rah(struct ib_ah *ah) +{ + return ah ? container_of(ah, struct sw_ah, ibah) : NULL; +} + +static inline struct sw_srq *to_rsrq(struct ib_srq *srq) +{ + return srq ? container_of(srq, struct sw_srq, ibsrq) : NULL; +} + +static inline struct sw_qp *to_rqp(struct ib_qp *qp) +{ + return qp ? container_of(qp, struct sw_qp, ibqp) : NULL; +} + +static inline struct sw_cq *to_rcq(struct ib_cq *cq) +{ + return cq ? container_of(cq, struct sw_cq, ibcq) : NULL; +} + +static inline struct sw_mem *to_rmr(struct ib_mr *mr) +{ + return mr ? container_of(mr, struct sw_mem, ibmr) : NULL; +} + +static inline struct sw_mem *to_rmw(struct ib_mw *mw) +{ + return mw ? container_of(mw, struct sw_mem, ibmw) : NULL; +} + +static inline struct sw_pd *mr_pd(struct sw_mem *mr) +{ + return to_rpd(mr->ibmr.pd); +} + +static inline u32 mr_lkey(struct sw_mem *mr) +{ + return mr->ibmr.lkey; +} + +static inline u32 mr_rkey(struct sw_mem *mr) +{ + return mr->ibmr.rkey; +} + +int sw_register_device(struct sw_dev *sw, const char *ibdev_name); + +void sw_mc_cleanup(struct sw_pool_entry *arg); +/* verbs interface */ +int sw_create_cq(struct ib_cq *ibcq, const struct ib_cq_init_attr *attr, + struct ib_udata *udata); +int sw_destroy_cq(struct ib_cq *ibcq, struct ib_udata *udata); +int sw_alloc_pd(struct ib_pd *ibpd, struct ib_udata *udata); +int sw_dealloc_pd(struct ib_pd *ibpd, struct ib_udata *udata); +void sw_init_ports(struct sw_dev *sw); +int sw_init(struct sw_dev *sw); +void sw_dealloc(struct sw_dev *sw); +void sw_set_mtu(struct sw_dev *sw, unsigned int ndev_mtu); +int sw_post_send(struct ib_qp *ibqp, const struct ib_send_wr *wr, + const struct ib_send_wr **bad_wr); +int sw_post_recv(struct ib_qp *ibqp, const struct ib_recv_wr *wr, + const struct ib_recv_wr **bad_wr); +int sw_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, + int mask, struct ib_udata *udata); +int sw_create_ah(struct ib_ah *ibah, + struct rdma_ah_attr *attr, + struct ib_udata *udata); +int sw_poll_cq(struct ib_cq *ibcq, int num_entries, struct ib_wc *wc); +int sw_req_notify_cq(struct ib_cq *ibcq, enum ib_cq_notify_flags flags); +struct ib_mr *sw_get_dma_mr(struct ib_pd *ibpd, int access); +#endif /* SW_VERBS_H */ diff --git a/drivers/infiniband/hw/erdma/erdma.h b/drivers/infiniband/hw/erdma/erdma.h index f190111840e97f83cdba469e4c7fe48c5a2b6555..469b344a192bcb1ec37478ef7e4b98a8c7a53c6f 100644 --- a/drivers/infiniband/hw/erdma/erdma.h +++ b/drivers/infiniband/hw/erdma/erdma.h @@ -7,6 +7,7 @@ #ifndef __ERDMA_H__ #define __ERDMA_H__ +#include "kcompat.h" #include #include #include @@ -14,10 +15,21 @@ #include #include "erdma_hw.h" +#include "erdma_ioctl.h" +#include "erdma_stats.h" +#include "compat/sw_verbs.h" #define DRV_MODULE_NAME "erdma" #define ERDMA_NODE_DESC "Elastic RDMA(iWARP) stack" +extern bool legacy_mode; +extern bool use_zeronet; +extern bool compat_mode; + +struct erdma_stats { + atomic64_t value[ERDMA_STATS_MAX]; +}; + struct erdma_eq { void *qbuf; dma_addr_t qbuf_dma_addr; @@ -33,7 +45,7 @@ struct erdma_eq { atomic64_t notify_num; void __iomem *db; - u64 *db_record; + u64 *dbrec; }; struct erdma_cmdq_sq { @@ -48,7 +60,9 @@ struct erdma_cmdq_sq { u16 wqebb_cnt; - u64 *db_record; + u64 total_cmds; + u64 total_comp_cmds; + u64 *dbrec; }; struct erdma_cmdq_cq { @@ -61,7 +75,7 @@ struct erdma_cmdq_cq { u32 ci; u32 cmdsn; - u64 *db_record; + u64 *dbrec; atomic64_t armed_num; }; @@ -116,7 +130,7 @@ enum erdma_cc_alg { ERDMA_CC_CUBIC, ERDMA_CC_HPCC_RTT, ERDMA_CC_HPCC_ECN, - ERDMA_CC_HPCC_INT, + ERDMA_CC_MPCC, ERDMA_CC_METHODS_NUM }; @@ -128,7 +142,15 @@ struct erdma_devattr { int numa_node; enum erdma_cc_alg cc; - u32 irq_num; + u8 retrans_num; + u8 rsvd; + u32 grp_num; + u32 max_ceqs; + int irq_num; + + bool disable_dwqe; + u16 dwqe_pages; + u16 dwqe_entries; u32 max_qp; u32 max_send_wr; @@ -180,11 +202,17 @@ enum { #define ERDMA_EXTRA_BUFFER_SIZE ERDMA_DB_SIZE #define WARPPED_BUFSIZE(size) ((size) + ERDMA_EXTRA_BUFFER_SIZE) +enum { + ERDMA_STATE_AEQ_INIT_DONE = 0, +}; + struct erdma_dev { struct ib_device ibdev; struct net_device *netdev; + rwlock_t netdev_lock; struct pci_dev *pdev; struct notifier_block netdev_nb; + struct sw_dev sw_dev; struct workqueue_struct *reflush_wq; resource_size_t func_bar_addr; @@ -193,7 +221,8 @@ struct erdma_dev { struct erdma_devattr attrs; /* physical port state (only one port per device) */ - enum ib_port_state state; + enum ib_port_state port_state; + unsigned long state; u32 mtu; /* cmdq and aeq use the same msix vector */ @@ -202,6 +231,7 @@ struct erdma_dev { struct erdma_eq aeq; struct erdma_eq_cb ceqs[ERDMA_NUM_MSIX_VEC - 1]; + struct erdma_stats stats; spinlock_t lock; struct erdma_resource_cb res_cb[ERDMA_RES_CNT]; struct xarray qp_xa; @@ -210,8 +240,28 @@ struct erdma_dev { u32 next_alloc_qpn; u32 next_alloc_cqn; + spinlock_t db_bitmap_lock; + /* We provide max 64 uContexts that each has one SQ doorbell Page. */ + DECLARE_BITMAP(sdb_page, ERDMA_DWQE_TYPE0_CNT); + /* + * We provide max 496 uContexts that each has one SQ normal Db, + * and one directWQE db. + */ + DECLARE_BITMAP(sdb_entry, ERDMA_DWQE_TYPE1_CNT); + atomic_t num_ctx; + atomic_t num_cep; struct list_head cep_list; + + /* Fields for compat */ + struct list_head dev_list; + refcount_t refcount; + struct completion unreg_completion; + + struct dma_pool *db_pool; + struct dma_pool *resp_pool; + + struct dentry *dbg_root; }; static inline void *get_queue_entry(void *qbuf, u32 idx, u32 depth, u32 shift) @@ -254,8 +304,6 @@ static inline u32 erdma_reg_read32_filed(struct erdma_dev *dev, u32 reg, return FIELD_GET(filed_mask, val); } -#define ERDMA_GET(val, name) FIELD_GET(ERDMA_CMD_##name##_MASK, val) - int erdma_cmdq_init(struct erdma_dev *dev); void erdma_finish_cmdq_init(struct erdma_dev *dev); void erdma_cmdq_destroy(struct erdma_dev *dev); @@ -276,4 +324,22 @@ void erdma_aeq_destroy(struct erdma_dev *dev); void erdma_aeq_event_handler(struct erdma_dev *dev); void erdma_ceq_completion_handler(struct erdma_eq_cb *ceq_cb); +void erdma_chrdev_destroy(void); +int erdma_chrdev_init(void); + +int erdma_query_resource(struct erdma_dev *dev, u32 mod, u32 op, u32 index, + void *out, u32 len); +int erdma_query_ext_attr(struct erdma_dev *dev, void *out); +int erdma_set_ext_attr(struct erdma_dev *dev, struct erdma_ext_attr *attr); +int erdma_set_dack_count(struct erdma_dev *dev, u32 value); +int erdma_enable_legacy_mode(struct erdma_dev *dev, u32 value); +void erdma_sync_info(struct erdma_dev *dev); + +void erdma_debugfs_register(void); +void erdma_debugfs_unregister(void); + +int erdma_debugfs_files_create(struct erdma_dev *dev); +void erdma_debugfs_files_destroy(struct erdma_dev *dev); +extern struct dentry *erdma_debugfs_root; + #endif diff --git a/drivers/infiniband/hw/erdma/erdma_cm.c b/drivers/infiniband/hw/erdma/erdma_cm.c index 771059a8eb7d7f76eb38652edbce9bfdb5ee89b3..bfe96acf89e545c7771089d2a38020a46382487c 100644 --- a/drivers/infiniband/hw/erdma/erdma_cm.c +++ b/drivers/infiniband/hw/erdma/erdma_cm.c @@ -11,7 +11,6 @@ /* Copyright (c) 2017, Open Grid Computing, Inc. */ #include -#include #include "erdma.h" #include "erdma_cm.h" @@ -110,7 +109,9 @@ static struct erdma_cep *erdma_cep_alloc(struct erdma_dev *dev) spin_lock_irqsave(&dev->lock, flags); list_add_tail(&cep->devq, &dev->cep_list); spin_unlock_irqrestore(&dev->lock, flags); + atomic_inc(&dev->num_cep); + ibdev_dbg(&dev->ibdev, "(CEP 0x%p): New Object\n", cep); return cep; } @@ -151,6 +152,8 @@ static void erdma_cep_set_inuse(struct erdma_cep *cep) { unsigned long flags; + ibdev_dbg(&cep->dev->ibdev, " (CEP 0x%p): use %d\n", cep, cep->in_use); + spin_lock_irqsave(&cep->lock, flags); while (cep->in_use) { spin_unlock_irqrestore(&cep->lock, flags); @@ -158,6 +161,8 @@ static void erdma_cep_set_inuse(struct erdma_cep *cep) if (signal_pending(current)) flush_signals(current); + ibdev_dbg(&cep->dev->ibdev, " (CEP 0x%p): use %d\n", cep, + cep->in_use); spin_lock_irqsave(&cep->lock, flags); } @@ -169,6 +174,8 @@ static void erdma_cep_set_free(struct erdma_cep *cep) { unsigned long flags; + ibdev_dbg(&cep->dev->ibdev, " (CEP 0x%p): use %d\n", cep, cep->in_use); + spin_lock_irqsave(&cep->lock, flags); cep->in_use = 0; spin_unlock_irqrestore(&cep->lock, flags); @@ -194,6 +201,7 @@ static void __erdma_cep_dealloc(struct kref *ref) spin_lock_irqsave(&dev->lock, flags); list_del(&cep->devq); spin_unlock_irqrestore(&dev->lock, flags); + atomic_dec(&dev->num_cep); kfree(cep); } @@ -221,6 +229,7 @@ static int erdma_cm_alloc_work(struct erdma_cep *cep, int num) if (!work) { if (!(list_empty(&cep->work_freelist))) erdma_cm_free_work(cep); + ibdev_dbg(&cep->dev->ibdev, " CEP alloc work failed\n"); return -ENOMEM; } work->cep = cep; @@ -264,6 +273,12 @@ static int erdma_cm_upcall(struct erdma_cep *cep, enum iw_cm_event_type reason, getname_peer(cep->sock, &event.remote_addr); } + ibdev_dbg( + &cep->dev->ibdev, + " (QP%d): cep=0x%p, id=0x%p, dev(id)=%s, reason=%d, status=%d\n", + cep->qp ? QP_ID(cep->qp) : -1, cep, cm_id, cm_id->device->name, + reason, status); + return cm_id->event_handler(cm_id, &event); } @@ -274,6 +289,15 @@ void erdma_qp_cm_drop(struct erdma_qp *qp) if (!qp->cep) return; + /* + * Immediately close socket + */ + ibdev_dbg( + &qp->dev->ibdev, + "(): immediate close, cep=0x%p, state=%d, id=0x%p, sock=0x%p, QP%d\n", + cep, cep->state, cep->cm_id, cep->sock, + cep->qp ? QP_ID(cep->qp) : -1); + erdma_cep_set_inuse(cep); /* already closed. */ @@ -320,6 +344,9 @@ void erdma_qp_cm_drop(struct erdma_qp *qp) void erdma_cep_put(struct erdma_cep *cep) { + ibdev_dbg(&cep->dev->ibdev, "(CEP 0x%p): New refcount: %d\n", cep, + kref_read(&cep->ref) - 1); + WARN_ON(kref_read(&cep->ref) < 1); kref_put(&cep->ref, __erdma_cep_dealloc); } @@ -327,6 +354,9 @@ void erdma_cep_put(struct erdma_cep *cep) void erdma_cep_get(struct erdma_cep *cep) { kref_get(&cep->ref); + + ibdev_dbg(&cep->dev->ibdev, "(CEP 0x%p): New refcount: %d\n", cep, + kref_read(&cep->ref)); } static int erdma_send_mpareqrep(struct erdma_cep *cep, const void *pdata, @@ -484,8 +514,10 @@ static int erdma_recv_mpa_rr(struct erdma_cep *cep) if (ret == -EAGAIN && rcvd == 0) return 0; - if (ret) + if (ret) { + ibdev_dbg(&cep->dev->ibdev, " ERROR: %d:\n", ret); return ret; + } return -EPROTO; } @@ -510,8 +542,11 @@ static int erdma_recv_mpa_rr(struct erdma_cep *cep) cep->mpa.bytes_rcvd += rcvd; - if (to_rcv == rcvd) + if (to_rcv == rcvd) { + ibdev_dbg(&cep->dev->ibdev, " %d bytes private_data received\n", + pd_len); return 0; + } return -EAGAIN; } @@ -666,13 +701,23 @@ static void erdma_accept_newconn(struct erdma_cep *cep) new_cep->sk_error_report = cep->sk_error_report; ret = kernel_accept(s, &new_s, O_NONBLOCK); - if (ret != 0) + if (ret != 0) { + ibdev_dbg(&cep->dev->ibdev, + "(cep=0x%p): ERROR: kernel_accept(): rv=%d\n", cep, + ret); + goto error; + } new_cep->sock = new_s; erdma_cep_get(new_cep); new_s->sk->sk_user_data = new_cep; + ibdev_dbg( + &cep->dev->ibdev, + "(cep=0x%p, s=0x%p, new_s=0x%p): New LLP connection accepted\n", + cep, s, new_s); + tcp_sock_set_nodelay(new_s->sk); new_cep->state = ERDMA_EPSTATE_AWAIT_MPAREQ; @@ -684,6 +729,8 @@ static void erdma_accept_newconn(struct erdma_cep *cep) erdma_cep_get(cep); if (atomic_read(&new_s->sk->sk_rmem_alloc)) { + ibdev_dbg(&cep->dev->ibdev, "(cep=0x%p): Immediate MPA req.\n", + cep); /* MPA REQ already queued */ erdma_cep_set_inuse(new_cep); ret = erdma_proc_mpareq(new_cep); @@ -712,6 +759,7 @@ static void erdma_accept_newconn(struct erdma_cep *cep) erdma_socket_disassoc(new_s); sock_release(new_s); } + ibdev_dbg(&cep->dev->ibdev, "(cep=0x%p): ERROR: rv=%d\n", cep, ret); } static int erdma_newconn_connected(struct erdma_cep *cep) @@ -740,9 +788,14 @@ static void erdma_cm_work_handler(struct work_struct *w) struct erdma_cm_work *work; struct erdma_cep *cep; int release_cep = 0, ret = 0; + struct erdma_dev *dev; work = container_of(w, struct erdma_cm_work, work.work); cep = work->cep; + dev = cep->dev; + + ibdev_dbg(&dev->ibdev, " (QP%d): WORK type: %d, CEP: 0x%p, state: %d\n", + cep->qp ? QP_ID(cep->qp) : -1, work->type, cep, cep->state); erdma_cep_set_inuse(cep); @@ -764,6 +817,8 @@ static void erdma_cm_work_handler(struct work_struct *w) erdma_cm_upcall(cep, IW_CM_EVENT_CONNECT_REPLY, -ETIMEDOUT); release_cep = 1; + + ERDMA_INC_CNT(dev, IW_CONNECT_TIMEOUT); } break; case ERDMA_CM_WORK_ACCEPT: @@ -791,6 +846,8 @@ static void erdma_cm_work_handler(struct work_struct *w) } } else if (cep->state == ERDMA_EPSTATE_AWAIT_MPAREP) { ret = erdma_proc_mpareply(cep); + if (!ret) + ERDMA_INC_CNT(dev, IW_CONNECT_SUCCESS); } if (ret && ret != -EAGAIN) @@ -810,6 +867,7 @@ static void erdma_cm_work_handler(struct work_struct *w) */ erdma_cm_upcall(cep, IW_CM_EVENT_CONNECT_REPLY, -ECONNRESET); + ERDMA_INC_CNT(dev, IW_CONNECT_RST); } else if (cep->state == ERDMA_EPSTATE_RDMA_MODE) { /* * NOTE: IW_CM_EVENT_DISCONNECT is given just @@ -852,6 +910,12 @@ static void erdma_cm_work_handler(struct work_struct *w) } if (release_cep) { + ibdev_dbg( + &cep->dev->ibdev, + " (CEP 0x%p): Release: mpa_timer=%s, sock=0x%p, QP%d, id=0x%p\n", + cep, cep->mpa_timer ? "y" : "n", cep->sock, + cep->qp ? QP_ID(cep->qp) : -1, cep->cm_id); + erdma_cancel_mpatimer(cep); cep->state = ERDMA_EPSTATE_CLOSED; if (cep->qp) { @@ -885,6 +949,9 @@ static void erdma_cm_work_handler(struct work_struct *w) } } erdma_cep_set_free(cep); + + ibdev_dbg(&cep->dev->ibdev, " (Exit): WORK type: %d, CEP: 0x%p\n", + work->type, cep); erdma_put_work(work); erdma_cep_put(cep); } @@ -917,6 +984,10 @@ int erdma_cm_queue_work(struct erdma_cep *cep, enum erdma_work_type type) delay = CONNECT_TIMEOUT; } + ibdev_dbg(&cep->dev->ibdev, + " (QP%d): WORK type: %d, CEP: 0x%p, work 0x%p, timeout %lu\n", + cep->qp ? QP_ID(cep->qp) : -1, type, cep, work, delay); + queue_delayed_work(erdma_cm_wq, &work->work, delay); return 0; @@ -926,14 +997,15 @@ static void erdma_cm_llp_data_ready(struct sock *sk) { struct erdma_cep *cep; - trace_sk_data_ready(sk); - read_lock(&sk->sk_callback_lock); cep = sk_to_cep(sk); if (!cep) goto out; + ibdev_dbg(&cep->dev->ibdev, "(): cep 0x%p, state: %d\n", cep, + cep->state); + if (cep->state == ERDMA_EPSTATE_AWAIT_MPAREQ || cep->state == ERDMA_EPSTATE_AWAIT_MPAREP) erdma_cm_queue_work(cep, ERDMA_CM_WORK_READ_MPAHDR); @@ -946,6 +1018,9 @@ static void erdma_cm_llp_error_report(struct sock *sk) { struct erdma_cep *cep = sk_to_cep(sk); + ibdev_dbg(&cep->dev->ibdev, "(): error: %d, state: %d\n", sk->sk_err, + sk->sk_state); + if (cep) cep->sk_error_report(sk); } @@ -964,6 +1039,9 @@ static void erdma_cm_llp_state_change(struct sock *sk) } orig_state_change = cep->sk_state_change; + ibdev_dbg(&cep->dev->ibdev, "(): cep: 0x%p, state: %d, tcp_state: %d\n", + cep, cep->state, sk->sk_state); + switch (sk->sk_state) { case TCP_ESTABLISHED: if (cep->state == ERDMA_EPSTATE_CONNECTING) @@ -984,16 +1062,25 @@ static void erdma_cm_llp_state_change(struct sock *sk) } static int kernel_bindconnect(struct socket *s, struct sockaddr *laddr, - int laddrlen, struct sockaddr *raddr, - int raddrlen, int flags) + struct sockaddr *raddr, int flags) { + size_t size = laddr->sa_family == AF_INET ? + sizeof(struct sockaddr_in) : sizeof(struct sockaddr_in6); int ret; sock_set_reuseaddr(s->sk); - ret = s->ops->bind(s, laddr, laddrlen); - if (ret) - return ret; - ret = s->ops->connect(s, raddr, raddrlen, flags); + ret = s->ops->bind(s, laddr, size); + if (ret) { + if (laddr->sa_family == AF_INET) + ((struct sockaddr_in *)laddr)->sin_port = 0; + else + ((struct sockaddr_in6 *)laddr)->sin6_port = 0; + ret = s->ops->bind(s, laddr, size); + if (ret) + return ret; + } + + ret = s->ops->connect(s, raddr, size, flags); return ret < 0 ? ret : 0; } @@ -1006,24 +1093,54 @@ int erdma_connect(struct iw_cm_id *id, struct iw_cm_conn_param *params) struct sockaddr *laddr = (struct sockaddr *)&id->m_local_addr; struct sockaddr *raddr = (struct sockaddr *)&id->m_remote_addr; u16 pd_len = params->private_data_len; + bool v4 = false; int ret; - if (pd_len > MPA_MAX_PRIVDATA) + ERDMA_INC_CNT(dev, IW_CONNECT); + + if (pd_len > MPA_MAX_PRIVDATA) { + ERDMA_INC_CNT(dev, IW_CONNECT_FAILED); return -EINVAL; + } if (params->ird > dev->attrs.max_ird || - params->ord > dev->attrs.max_ord) + params->ord > dev->attrs.max_ord) { + ERDMA_INC_CNT(dev, IW_CONNECT_FAILED); return -EINVAL; + } - if (laddr->sa_family != AF_INET || raddr->sa_family != AF_INET) + if (laddr->sa_family == AF_INET && raddr->sa_family == AF_INET) { + v4 = true; + } else if (laddr->sa_family != AF_INET6 || raddr->sa_family != AF_INET6) { + ERDMA_INC_CNT(dev, IW_CONNECT_FAILED); return -EAFNOSUPPORT; + } else if (!(dev->attrs.cap_flags & ERDMA_DEV_CAP_FLAGS_IPV6)) { + ERDMA_INC_CNT(dev, IW_CONNECT_FAILED); + return -EAFNOSUPPORT; + } qp = find_qp_by_qpn(dev, params->qpn); - if (!qp) + if (!qp) { + ERDMA_INC_CNT(dev, IW_CONNECT_FAILED); return -ENOENT; + } erdma_qp_get(qp); - ret = sock_create(AF_INET, SOCK_STREAM, IPPROTO_TCP, &s); + ibdev_dbg(&dev->ibdev, "(id=0x%p, QP%d): dev(id)=%s, netdev=%s\n", id, + QP_ID(qp), dev->ibdev.name, dev->netdev->name); + ibdev_dbg( + &dev->ibdev, + "(id=0x%p, QP%d): laddr=(0x%x,%d,mport %d), raddr=(0x%x,%d,mport %d)\n", + id, QP_ID(qp), + ntohl(to_sockaddr_in(id->local_addr).sin_addr.s_addr), + ntohs(to_sockaddr_in(id->local_addr).sin_port), + ntohs(to_sockaddr_in(id->m_local_addr).sin_port), + ntohl(to_sockaddr_in(id->remote_addr).sin_addr.s_addr), + ntohs(to_sockaddr_in(id->remote_addr).sin_port), + ntohs(to_sockaddr_in(id->m_remote_addr).sin_port)); + + ret = __sock_create(current->nsproxy->net_ns, v4 ? AF_INET : AF_INET6, + SOCK_STREAM, IPPROTO_TCP, &s, 1); if (ret < 0) goto error_put_qp; @@ -1060,6 +1177,9 @@ int erdma_connect(struct iw_cm_id *id, struct iw_cm_conn_param *params) cep->ord = params->ord; cep->state = ERDMA_EPSTATE_CONNECTING; + ibdev_dbg(&dev->ibdev, " (id=0x%p, QP%d): pd_len = %u\n", id, QP_ID(qp), + pd_len); + erdma_cep_socket_assoc(cep, s); if (pd_len) { @@ -1074,8 +1194,7 @@ int erdma_connect(struct iw_cm_id *id, struct iw_cm_conn_param *params) params->private_data_len); } - ret = kernel_bindconnect(s, laddr, sizeof(*laddr), raddr, - sizeof(*raddr), O_NONBLOCK); + ret = kernel_bindconnect(s, laddr, raddr, O_NONBLOCK); if (ret != -EINPROGRESS && ret != 0) { goto error_disassoc; } else if (ret == 0) { @@ -1092,6 +1211,7 @@ int erdma_connect(struct iw_cm_id *id, struct iw_cm_conn_param *params) return 0; error_disassoc: + ibdev_dbg(&dev->ibdev, " Failed: %d\n", ret); kfree(cep->private_data); cep->private_data = NULL; cep->pd_len = 0; @@ -1121,6 +1241,8 @@ int erdma_connect(struct iw_cm_id *id, struct iw_cm_conn_param *params) error_put_qp: erdma_qp_put(qp); + ERDMA_INC_CNT(dev, IW_CONNECT_FAILED); + return ret; } @@ -1132,6 +1254,8 @@ int erdma_accept(struct iw_cm_id *id, struct iw_cm_conn_param *params) struct erdma_qp_attrs qp_attrs; int ret; + ERDMA_INC_CNT(dev, IW_ACCEPT); + erdma_cep_set_inuse(cep); erdma_cep_put(cep); @@ -1146,13 +1270,16 @@ int erdma_accept(struct iw_cm_id *id, struct iw_cm_conn_param *params) if (cep->state != ERDMA_EPSTATE_RECVD_MPAREQ) { erdma_cep_set_free(cep); erdma_cep_put(cep); - + ERDMA_INC_CNT(dev, IW_ACCEPT_FAILED); return -ECONNRESET; } qp = find_qp_by_qpn(dev, params->qpn); - if (!qp) + if (!qp) { + ERDMA_INC_CNT(dev, IW_ACCEPT_FAILED); return -ENOENT; + } + erdma_qp_get(qp); down_write(&qp->state_lock); @@ -1225,6 +1352,9 @@ int erdma_accept(struct iw_cm_id *id, struct iw_cm_conn_param *params) erdma_cep_set_free(cep); + ibdev_dbg(&dev->ibdev, "(id=0x%p, QP%d): Exit\n", id, + QP_ID(qp)); + ERDMA_INC_CNT(dev, IW_ACCEPT_SUCCESS); return 0; } @@ -1251,12 +1381,17 @@ int erdma_accept(struct iw_cm_id *id, struct iw_cm_conn_param *params) erdma_cep_set_free(cep); erdma_cep_put(cep); + ERDMA_INC_CNT(dev, IW_ACCEPT_FAILED); + return ret; } int erdma_reject(struct iw_cm_id *id, const void *pdata, u8 plen) { struct erdma_cep *cep = (struct erdma_cep *)id->provider_data; + struct erdma_dev *dev = cep->dev; + + ERDMA_INC_CNT(dev, IW_REJECT); erdma_cep_set_inuse(cep); erdma_cep_put(cep); @@ -1266,10 +1401,14 @@ int erdma_reject(struct iw_cm_id *id, const void *pdata, u8 plen) if (cep->state != ERDMA_EPSTATE_RECVD_MPAREQ) { erdma_cep_set_free(cep); erdma_cep_put(cep); - + ERDMA_INC_CNT(dev, IW_REJECT_FAILED); return -ECONNRESET; } + ibdev_dbg(&dev->ibdev, "(id=0x%p): cep->state=%d\n", id, cep->state); + ibdev_dbg(&dev->ibdev, " Reject: %d: %x\n", plen, + plen ? *(char *)pdata : 0); + if (__mpa_rr_revision(cep->mpa.hdr.params.bits) == MPA_REVISION_EXT_1) { cep->mpa.hdr.params.bits |= MPA_RR_FLAG_REJECT; /* reject */ erdma_send_mpareqrep(cep, pdata, plen); @@ -1289,28 +1428,44 @@ int erdma_reject(struct iw_cm_id *id, const void *pdata, u8 plen) int erdma_create_listen(struct iw_cm_id *id, int backlog) { - struct socket *s; - struct erdma_cep *cep = NULL; - int ret = 0; struct erdma_dev *dev = to_edev(id->device); int addr_family = id->local_addr.ss_family; - struct sockaddr_in *laddr = &to_sockaddr_in(id->local_addr); + struct erdma_cep *cep = NULL; + struct socket *s; + int ret = 0; - if (addr_family != AF_INET) - return -EAFNOSUPPORT; + ERDMA_INC_CNT(dev, IW_LISTEN_CREATE); - ret = sock_create(addr_family, SOCK_STREAM, IPPROTO_TCP, &s); - if (ret < 0) + if (addr_family != AF_INET) { + ERDMA_INC_CNT(dev, IW_LISTEN_IPV6); + if (!(dev->attrs.cap_flags & ERDMA_DEV_CAP_FLAGS_IPV6)) + return -EAFNOSUPPORT; + } + + ret = __sock_create(current->nsproxy->net_ns, addr_family, SOCK_STREAM, + IPPROTO_TCP, &s, 1); + if (ret < 0) { + ERDMA_INC_CNT(dev, IW_LISTEN_FAILED); return ret; + } sock_set_reuseaddr(s->sk); /* For wildcard addr, limit binding to current device only */ - if (ipv4_is_zeronet(laddr->sin_addr.s_addr)) - s->sk->sk_bound_dev_if = dev->netdev->ifindex; - ret = s->ops->bind(s, (struct sockaddr *)laddr, + if (addr_family == AF_INET) { + struct sockaddr_in *laddr = &to_sockaddr_in(id->local_addr); + + if (ipv4_is_zeronet(laddr->sin_addr.s_addr) && use_zeronet) + s->sk->sk_bound_dev_if = dev->netdev->ifindex; + ret = s->ops->bind(s, (struct sockaddr *)laddr, sizeof(struct sockaddr_in)); + } else { + struct sockaddr_in6 *laddr = &to_sockaddr_in6(id->local_addr); + + ret = s->ops->bind(s, (struct sockaddr *)laddr, + sizeof(struct sockaddr_in6)); + } if (ret) goto error; @@ -1342,12 +1497,22 @@ int erdma_create_listen(struct iw_cm_id *id, int backlog) INIT_LIST_HEAD((struct list_head *)id->provider_data); } + ibdev_dbg( + &dev->ibdev, + "(id=0x%p): dev(id)=%s, netdev=%s, id->provider_data=0x%p, cep=0x%p\n", + id, id->device->name, to_edev(id->device)->netdev->name, + id->provider_data, cep); + list_add_tail(&cep->listenq, (struct list_head *)id->provider_data); cep->state = ERDMA_EPSTATE_LISTENING; + ERDMA_INC_CNT(dev, IW_LISTEN_SUCCESS); + return 0; error: + ibdev_dbg(&dev->ibdev, " Failed: %d\n", ret); + if (cep) { erdma_cep_set_inuse(cep); @@ -1364,6 +1529,8 @@ int erdma_create_listen(struct iw_cm_id *id, int backlog) } sock_release(s); + ERDMA_INC_CNT(dev, IW_LISTEN_FAILED); + return ret; } @@ -1379,7 +1546,9 @@ static void erdma_drop_listeners(struct iw_cm_id *id) list_entry(p, struct erdma_cep, listenq); list_del(p); - + ibdev_dbg(&cep->dev->ibdev, + "(id=0x%p): drop CEP 0x%p, state %d\n", id, cep, + cep->state); erdma_cep_set_inuse(cep); if (cep->cm_id) { @@ -1399,6 +1568,11 @@ static void erdma_drop_listeners(struct iw_cm_id *id) int erdma_destroy_listen(struct iw_cm_id *id) { + struct erdma_dev *dev = to_edev(id->device); + + ibdev_dbg(&dev->ibdev, "(id=0x%p): dev(id)=%s, netdev=%s\n", id, + id->device->name, dev->netdev->name); + if (!id->provider_data) return 0; @@ -1406,6 +1580,7 @@ int erdma_destroy_listen(struct iw_cm_id *id) kfree(id->provider_data); id->provider_data = NULL; + ERDMA_INC_CNT(dev, IW_LISTEN_DESTROY); return 0; } diff --git a/drivers/infiniband/hw/erdma/erdma_cm.h b/drivers/infiniband/hw/erdma/erdma_cm.h index a26d807701884b8d3559a9a3a10dec4a8b383ac4..2a6939b9a01112d7443d66d9885cc499bae51233 100644 --- a/drivers/infiniband/hw/erdma/erdma_cm.h +++ b/drivers/infiniband/hw/erdma/erdma_cm.h @@ -138,6 +138,7 @@ struct erdma_cm_work { }; #define to_sockaddr_in(a) (*(struct sockaddr_in *)(&(a))) +#define to_sockaddr_in6(a) (*(struct sockaddr_in6 *)(&(a))) static inline int getname_peer(struct socket *s, struct sockaddr_storage *a) { diff --git a/drivers/infiniband/hw/erdma/erdma_cmd.c b/drivers/infiniband/hw/erdma/erdma_cmd.c new file mode 100644 index 0000000000000000000000000000000000000000..10c58b081b2d35ce20298c8a35922ba687eb26cd --- /dev/null +++ b/drivers/infiniband/hw/erdma/erdma_cmd.c @@ -0,0 +1,103 @@ +// SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause + +/* Authors: Cheng Xu */ +/* Kai Shen */ +/* Copyright (c) 2020-2022, Alibaba Group. */ + +#include + +#include "erdma.h" + +extern bool compat_mode; + +int erdma_query_resource(struct erdma_dev *dev, u32 mod, u32 op, u32 index, + void *out, u32 len) +{ + struct erdma_cmdq_query_req req; + dma_addr_t dma_addr; + void *resp; + int err; + + erdma_cmdq_build_reqhdr(&req.hdr, mod, op); + + resp = dma_pool_zalloc(dev->resp_pool, GFP_KERNEL, &dma_addr); + if (!resp) + return -ENOMEM; + + req.index = index; + req.target_addr = dma_addr; + req.target_length = ERDMA_HW_RESP_SIZE; + + err = erdma_post_cmd_wait(&dev->cmdq, &req, sizeof(req), NULL, NULL); + if (err) + goto out; + + if (out) + memcpy(out, resp, len); + +out: + dma_pool_free(dev->resp_pool, resp, dma_addr); + + return err; +} + +int erdma_query_ext_attr(struct erdma_dev *dev, void *out) +{ + BUILD_BUG_ON(sizeof(struct erdma_cmdq_query_ext_attr_resp) > + ERDMA_HW_RESP_SIZE); + + return erdma_query_resource( + dev, CMDQ_SUBMOD_COMMON, CMDQ_OPCODE_GET_EXT_ATTR, 0, out, + sizeof(struct erdma_cmdq_query_ext_attr_resp)); +} + +int erdma_set_ext_attr(struct erdma_dev *dev, struct erdma_ext_attr *attr) +{ + struct erdma_cmdq_set_ext_attr_req req; + + erdma_cmdq_build_reqhdr(&req.hdr, CMDQ_SUBMOD_COMMON, + CMDQ_OPCODE_SET_EXT_ATTR); + + memcpy(&req.attr, attr, sizeof(*attr)); + + return erdma_post_cmd_wait(&dev->cmdq, &req, sizeof(req), NULL, NULL); +} + +int erdma_set_dack_count(struct erdma_dev *dev, u32 value) +{ + struct erdma_ext_attr attr; + + if (value > 0xff) + return -EINVAL; + + attr.attr_mask = ERDMA_EXT_ATTR_DACK_COUNT_MASK; + attr.dack_count = (u8)value; + + return erdma_set_ext_attr(dev, &attr); +} + +int erdma_enable_legacy_mode(struct erdma_dev *dev, u32 value) +{ + struct erdma_ext_attr attr; + + attr.attr_mask = ERDMA_EXT_ATTR_LEGACY_MODE_MASK; + attr.enable = value != 0 ? 1 : 0; + + return erdma_set_ext_attr(dev, &attr); +} + +void erdma_sync_info(struct erdma_dev *dev) +{ + struct erdma_cmdq_sync_info_req req; + + memset(&req, 0, sizeof(req)); + erdma_cmdq_build_reqhdr(&req.hdr, CMDQ_SUBMOD_COMMON, + CMDQ_OPCODE_SYNC_INFO); + req.version = + FIELD_PREP(ERDMA_SYNC_INFO_MINOR_VER, ERDMA_MINOR_VER) | + FIELD_PREP(ERDMA_SYNC_INFO_MEDIUM_VER, ERDMA_MEDIUM_VER) | + FIELD_PREP(ERDMA_SYNC_INFO_MAJOR_VER, ERDMA_MAJOR_VER) | + FIELD_PREP(ERDMA_SYNC_INFO_COMPAT_MODE, compat_mode ? 1 : 0); + + erdma_post_cmd_wait(&dev->cmdq, &req, sizeof(req), NULL, NULL); +} diff --git a/drivers/infiniband/hw/erdma/erdma_cmdq.c b/drivers/infiniband/hw/erdma/erdma_cmdq.c index a151a7bdd5049dd14da57dfece144bd66b8168fc..f9005de44a1126ada4c5c927693624ea2f79b4c1 100644 --- a/drivers/infiniband/hw/erdma/erdma_cmdq.c +++ b/drivers/infiniband/hw/erdma/erdma_cmdq.c @@ -6,6 +6,14 @@ #include "erdma.h" +static void init_cmdq_cq_dbrec(struct erdma_cmdq *cmdq) +{ + u64 db_data = FIELD_PREP(ERDMA_CQDB_CMDSN_MASK, 0x3) | + FIELD_PREP(ERDMA_CQDB_IDX_MASK, 0xFF); + + *cmdq->cq.dbrec = db_data; +} + static void arm_cmdq_cq(struct erdma_cmdq *cmdq) { struct erdma_dev *dev = container_of(cmdq, struct erdma_dev, cmdq); @@ -14,7 +22,7 @@ static void arm_cmdq_cq(struct erdma_cmdq *cmdq) FIELD_PREP(ERDMA_CQDB_CMDSN_MASK, cmdq->cq.cmdsn) | FIELD_PREP(ERDMA_CQDB_IDX_MASK, cmdq->cq.cmdsn); - *cmdq->cq.db_record = db_data; + *cmdq->cq.dbrec = db_data; writeq(db_data, dev->func_bar + ERDMA_CMDQ_CQDB_REG); atomic64_inc(&cmdq->cq.armed_num); @@ -25,7 +33,7 @@ static void kick_cmdq_db(struct erdma_cmdq *cmdq) struct erdma_dev *dev = container_of(cmdq, struct erdma_dev, cmdq); u64 db_data = FIELD_PREP(ERDMA_CMD_HDR_WQEBB_INDEX_MASK, cmdq->sq.pi); - *cmdq->sq.db_record = db_data; + *cmdq->sq.dbrec = db_data; writeq(db_data, dev->func_bar + ERDMA_CMDQ_SQDB_REG); } @@ -65,17 +73,20 @@ static int erdma_cmdq_wait_res_init(struct erdma_dev *dev, { int i; - cmdq->wait_pool = - devm_kcalloc(&dev->pdev->dev, cmdq->max_outstandings, - sizeof(struct erdma_comp_wait), GFP_KERNEL); + cmdq->wait_pool = devm_kcalloc(&dev->pdev->dev, cmdq->max_outstandings, + sizeof(struct erdma_comp_wait), + GFP_KERNEL); if (!cmdq->wait_pool) return -ENOMEM; spin_lock_init(&cmdq->lock); - cmdq->comp_wait_bitmap = devm_bitmap_zalloc( - &dev->pdev->dev, cmdq->max_outstandings, GFP_KERNEL); - if (!cmdq->comp_wait_bitmap) + cmdq->comp_wait_bitmap = devm_kcalloc( + &dev->pdev->dev, BITS_TO_LONGS(cmdq->max_outstandings), + sizeof(unsigned long), GFP_KERNEL); + if (!cmdq->comp_wait_bitmap) { + devm_kfree(&dev->pdev->dev, cmdq->wait_pool); return -ENOMEM; + } for (i = 0; i < cmdq->max_outstandings; i++) { init_completion(&cmdq->wait_pool[i].wait_event); @@ -96,13 +107,13 @@ static int erdma_cmdq_sq_init(struct erdma_dev *dev) buf_size = sq->depth << SQEBB_SHIFT; - sq->qbuf = - dma_alloc_coherent(&dev->pdev->dev, WARPPED_BUFSIZE(buf_size), - &sq->qbuf_dma_addr, GFP_KERNEL); + sq->qbuf = dma_alloc_coherent(&dev->pdev->dev, + WARPPED_BUFSIZE(buf_size), + &sq->qbuf_dma_addr, GFP_KERNEL); if (!sq->qbuf) return -ENOMEM; - sq->db_record = (u64 *)(sq->qbuf + buf_size); + sq->dbrec = (u64 *)(sq->qbuf + buf_size); spin_lock_init(&sq->lock); @@ -117,6 +128,16 @@ static int erdma_cmdq_sq_init(struct erdma_dev *dev) return 0; } +static void erdma_cmdq_sq_destroy(struct erdma_dev *dev) +{ + struct erdma_cmdq *cmdq = &dev->cmdq; + + dma_free_coherent(&dev->pdev->dev, + (cmdq->sq.depth << SQEBB_SHIFT) + + ERDMA_EXTRA_BUFFER_SIZE, + cmdq->sq.qbuf, cmdq->sq.qbuf_dma_addr); +} + static int erdma_cmdq_cq_init(struct erdma_dev *dev) { struct erdma_cmdq *cmdq = &dev->cmdq; @@ -134,10 +155,12 @@ static int erdma_cmdq_cq_init(struct erdma_dev *dev) spin_lock_init(&cq->lock); - cq->db_record = (u64 *)(cq->qbuf + buf_size); + cq->dbrec = (u64 *)(cq->qbuf + buf_size); atomic64_set(&cq->armed_num, 0); + init_cmdq_cq_dbrec(cmdq); + erdma_reg_write32(dev, ERDMA_REGS_CMDQ_CQ_ADDR_H_REG, upper_32_bits(cq->qbuf_dma_addr)); erdma_reg_write32(dev, ERDMA_REGS_CMDQ_CQ_ADDR_L_REG, @@ -148,6 +171,16 @@ static int erdma_cmdq_cq_init(struct erdma_dev *dev) return 0; } +static void erdma_cmdq_cq_destroy(struct erdma_dev *dev) +{ + struct erdma_cmdq *cmdq = &dev->cmdq; + + dma_free_coherent(&dev->pdev->dev, + (cmdq->cq.depth << CQE_SHIFT) + + ERDMA_EXTRA_BUFFER_SIZE, + cmdq->cq.qbuf, cmdq->cq.qbuf_dma_addr); +} + static int erdma_cmdq_eq_init(struct erdma_dev *dev) { struct erdma_cmdq *cmdq = &dev->cmdq; @@ -167,7 +200,7 @@ static int erdma_cmdq_eq_init(struct erdma_dev *dev) atomic64_set(&eq->event_num, 0); eq->db = dev->func_bar + ERDMA_REGS_CEQ_DB_BASE_REG; - eq->db_record = (u64 *)(eq->qbuf + buf_size); + eq->dbrec = (u64 *)(eq->qbuf + buf_size); erdma_reg_write32(dev, ERDMA_REGS_CMDQ_EQ_ADDR_H_REG, upper_32_bits(eq->qbuf_dma_addr)); @@ -180,6 +213,16 @@ static int erdma_cmdq_eq_init(struct erdma_dev *dev) return 0; } +static void erdma_cmdq_eq_destroy(struct erdma_dev *dev) +{ + struct erdma_cmdq *cmdq = &dev->cmdq; + + dma_free_coherent(&dev->pdev->dev, + (cmdq->eq.depth << EQE_SHIFT) + + ERDMA_EXTRA_BUFFER_SIZE, + cmdq->eq.qbuf, cmdq->eq.qbuf_dma_addr); +} + int erdma_cmdq_init(struct erdma_dev *dev) { struct erdma_cmdq *cmdq = &dev->cmdq; @@ -211,16 +254,9 @@ int erdma_cmdq_init(struct erdma_dev *dev) return 0; err_destroy_cq: - dma_free_coherent(&dev->pdev->dev, - (cmdq->cq.depth << CQE_SHIFT) + - ERDMA_EXTRA_BUFFER_SIZE, - cmdq->cq.qbuf, cmdq->cq.qbuf_dma_addr); - + erdma_cmdq_cq_destroy(dev); err_destroy_sq: - dma_free_coherent(&dev->pdev->dev, - (cmdq->sq.depth << SQEBB_SHIFT) + - ERDMA_EXTRA_BUFFER_SIZE, - cmdq->sq.qbuf, cmdq->sq.qbuf_dma_addr); + erdma_cmdq_sq_destroy(dev); return err; } @@ -238,18 +274,9 @@ void erdma_cmdq_destroy(struct erdma_dev *dev) clear_bit(ERDMA_CMDQ_STATE_OK_BIT, &cmdq->state); - dma_free_coherent(&dev->pdev->dev, - (cmdq->eq.depth << EQE_SHIFT) + - ERDMA_EXTRA_BUFFER_SIZE, - cmdq->eq.qbuf, cmdq->eq.qbuf_dma_addr); - dma_free_coherent(&dev->pdev->dev, - (cmdq->sq.depth << SQEBB_SHIFT) + - ERDMA_EXTRA_BUFFER_SIZE, - cmdq->sq.qbuf, cmdq->sq.qbuf_dma_addr); - dma_free_coherent(&dev->pdev->dev, - (cmdq->cq.depth << CQE_SHIFT) + - ERDMA_EXTRA_BUFFER_SIZE, - cmdq->cq.qbuf, cmdq->cq.qbuf_dma_addr); + erdma_cmdq_eq_destroy(dev); + erdma_cmdq_cq_destroy(dev); + erdma_cmdq_sq_destroy(dev); } static void *get_next_valid_cmdq_cqe(struct erdma_cmdq *cmdq) @@ -283,6 +310,8 @@ static void push_cmdq_sqe(struct erdma_cmdq *cmdq, u64 *req, size_t req_len, FIELD_PREP(ERDMA_CMD_HDR_WQEBB_CNT_MASK, cmdq->sq.wqebb_cnt - 1); *wqe = cpu_to_le64(hdr); + cmdq->sq.total_cmds++; + kick_cmdq_db(cmdq); } @@ -313,9 +342,8 @@ static int erdma_poll_single_cmd_completion(struct erdma_cmdq *cmdq) comp_wait->cmd_status = ERDMA_CMD_STATUS_FINISHED; comp_wait->comp_status = FIELD_GET(ERDMA_CQE_HDR_SYNDROME_MASK, hdr0); - cmdq->sq.ci += cmdq->sq.wqebb_cnt; - /* Copy 16B comp data after cqe hdr to outer */ be32_to_cpu_array(comp_wait->comp_data, cqe + 2, 4); + cmdq->sq.ci += cmdq->sq.wqebb_cnt; if (cmdq->use_event) complete(&comp_wait->wait_event); @@ -337,6 +365,7 @@ static void erdma_polling_cmd_completions(struct erdma_cmdq *cmdq) if (erdma_poll_single_cmd_completion(cmdq)) break; + cmdq->sq.total_comp_cmds += comp_num; if (comp_num && cmdq->use_event) arm_cmdq_cq(cmdq); @@ -410,6 +439,7 @@ void erdma_cmdq_build_reqhdr(u64 *hdr, u32 mod, u32 op) int erdma_post_cmd_wait(struct erdma_cmdq *cmdq, void *req, u32 req_size, u64 *resp0, u64 *resp1) { + struct erdma_dev *dev = container_of(cmdq, struct erdma_dev, cmdq); struct erdma_comp_wait *comp_wait; int ret; @@ -443,8 +473,7 @@ int erdma_post_cmd_wait(struct erdma_cmdq *cmdq, void *req, u32 req_size, goto out; } - if (comp_wait->comp_status) - ret = -EIO; + ret = -(int)comp_wait->comp_status; if (resp0 && resp1) { *resp0 = *((u64 *)&comp_wait->comp_data[0]); @@ -455,5 +484,10 @@ int erdma_post_cmd_wait(struct erdma_cmdq *cmdq, void *req, u32 req_size, out: up(&cmdq->credits); + if (ret) + ibdev_err_ratelimited(&dev->ibdev, + "CMD(hdr 0x%llx) return with error %d\n", + *(u64 *)req, ret); + return ret; } diff --git a/drivers/infiniband/hw/erdma/erdma_compat.c b/drivers/infiniband/hw/erdma/erdma_compat.c new file mode 100644 index 0000000000000000000000000000000000000000..c2239c19c0d16d749fcbb37250f6bc34ef65705e --- /dev/null +++ b/drivers/infiniband/hw/erdma/erdma_compat.c @@ -0,0 +1,409 @@ +// SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause +/* Authors: Cheng Xu */ +/* Kai Shen */ +/* Copyright (c) 2020-2022, Alibaba Group. */ +#include +#include +#include +#include +#include +#include +#include + +#include "erdma_verbs.h" + +#include +#include + +struct erdma_net { + struct list_head erdma_list; + struct socket *rsvd_sock[16]; +}; + +static unsigned int erdma_net_id; + +bool compat_mode; +module_param(compat_mode, bool, 0444); +MODULE_PARM_DESC(compat_mode, "compat mode support"); + +bool legacy_mode; +module_param(legacy_mode, bool, 0444); +MODULE_PARM_DESC(legacy_mode, "legacy mode support"); + +u16 reserve_ports_base = 0x7790; +module_param(reserve_ports_base, ushort, 0444); +MODULE_PARM_DESC(reserve_ports_base, "ports reserved in compat mode"); + +bool use_zeronet; +module_param(use_zeronet, bool, 0444); +MODULE_PARM_DESC(use_zeronet, "can use zeronet"); + +#include "compat/sw.h" +#include "compat/sw_loc.h" +#include "compat/sw_queue.h" +#include "compat/sw_hw_counters.h" + +int erdma_create_mad_qp(struct ib_qp *ibqp, struct ib_qp_init_attr *init, + struct ib_udata *udata) +{ + struct erdma_dev *dev = to_edev(ibqp->device); + struct erdma_cq *scq = to_ecq(init->send_cq); + struct erdma_cq *rcq = to_ecq(init->recv_cq); + struct erdma_qp *qp = to_eqp(ibqp); + struct sw_dev *sw = &dev->sw_dev; + struct sw_qp *sw_qp; + int err; + + if (udata) + return -EINVAL; + + err = sw_qp_chk_init(sw, init); + if (err) + goto err1; + + sw_qp = kzalloc(sizeof(*sw_qp), GFP_KERNEL); + if (!qp) { + err = -ENOMEM; + goto err1; + } + kref_init(&sw_qp->pelem.ref_cnt); + memcpy(&sw_qp->ibqp, &qp->ibqp, sizeof(qp->ibqp)); + + scq->is_soft = true; + rcq->is_soft = true; + qp->sw_qp = sw_qp; + sw_qp->master = qp; + sw_qp->ibqp.device = &sw->ib_dev; + + err = sw_qp_from_init(sw, sw_qp, init, NULL, qp->ibqp.pd, NULL); + if (err) + goto err2; + + return 0; + +err2: + kfree(sw_qp); +err1: + return err; +} + +void erdma_destroy_mad_qp(struct ib_qp *ibqp) +{ + struct erdma_qp *qp = to_eqp(ibqp); + + sw_qp_destroy(qp->sw_qp); + cleanup_sw_qp(qp->sw_qp); + kfree(qp->sw_qp); +} + +int erdma_modify_mad_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, + int attr_mask, struct ib_udata *udata) +{ + struct erdma_qp *qp = to_eqp(ibqp); + int ret; + + ret = sw_modify_qp(&qp->sw_qp->ibqp, attr, attr_mask, udata); + return ret; +} + +int erdma_post_send_mad(struct ib_qp *ibqp, const struct ib_send_wr *send_wr, + const struct ib_send_wr **bad_send_wr) +{ + struct erdma_qp *qp = to_eqp(ibqp); + + return sw_post_send(&qp->sw_qp->ibqp, send_wr, bad_send_wr); +} + +int erdma_post_recv_mad(struct ib_qp *ibqp, const struct ib_recv_wr *recv_wr, + const struct ib_recv_wr **bad_recv_wr) +{ + struct erdma_qp *qp = to_eqp(ibqp); + + return sw_post_recv(&qp->sw_qp->ibqp, recv_wr, bad_recv_wr); +} + +int erdma_mad_poll_cq(struct ib_cq *ibcq, int num_entries, struct ib_wc *wc) +{ + struct erdma_cq *cq = to_ecq(ibcq); + unsigned long flags; + int ret; + + spin_lock_irqsave(&cq->kern_cq.lock, flags); + ret = sw_poll_cq(&cq->sw_cq->ibcq, num_entries, wc); + spin_unlock_irqrestore(&cq->kern_cq.lock, flags); + + return ret; +} + +int erdma_mad_req_notify_cq(struct ib_cq *ibcq, enum ib_cq_notify_flags flags) +{ + struct erdma_cq *cq = to_ecq(ibcq); + + return sw_req_notify_cq(&cq->sw_cq->ibcq, flags); +} + +int attach_sw_dev(struct erdma_dev *dev) +{ + struct sw_dev *sw = &dev->sw_dev; + struct crypto_shash *tfm; + int err; + + if (!compat_mode) + return 0; + + dev->sw_dev.master = dev; + dev->sw_dev.ndev = dev->netdev; + + err = sw_init(sw); + if (err) + return err; + + sw_set_mtu(sw, dev->netdev->mtu); + + tfm = crypto_alloc_shash("crc32", 0, 0); + if (IS_ERR(tfm)) { + sw_dealloc(sw); + pr_err("failed to allocate crc algorithm err:%ld\n", + PTR_ERR(tfm)); + return PTR_ERR(tfm); + } + sw->tfm = tfm; + + return 0; +} + +void detach_sw_dev(struct erdma_dev *dev) +{ + if (!compat_mode) + return; + + sw_dealloc(&dev->sw_dev); +} + +int erdma_create_ah(struct ib_ah *ibah, + struct rdma_ah_init_attr *init_attr, + struct ib_udata *udata) +{ + if (!compat_mode) + return -EOPNOTSUPP; + return sw_create_ah(ibah, init_attr->ah_attr, udata); +} + +int erdma_destroy_ah(struct ib_ah *ibah, u32 flags) +{ + struct sw_ah *ah = to_rah(ibah); + + if (!compat_mode) + return -EOPNOTSUPP; + + sw_drop_ref(ah); + + return 0; +} + +int erdma_query_pkey(struct ib_device *ibdev, port_t port, u16 index, u16 *pkey) +{ + if (index > 0) + return -EINVAL; + + *pkey = 0xffff; + return 0; +} + +enum rdma_link_layer erdma_get_link_layer(struct ib_device *dev, + port_t port_num) +{ + return IB_LINK_LAYER_ETHERNET; +} + +int erdma_add_gid(const struct ib_gid_attr *attr, void **context) +{ + return 0; +} + +int erdma_del_gid(const struct ib_gid_attr *attr, void **context) +{ + return 0; +} + +void erdma_gen_port_from_qpn(u32 sip, u32 dip, u32 lqpn, u32 rqpn, u16 *sport, + u16 *dport) +{ + /* select lqpn 0, select rqpn 1 */ + u32 select_type = 1; + + lqpn &= 0xFFFFF; + rqpn &= 0xFFFFF; + + if (dip < sip || (dip == sip && lqpn < rqpn)) + select_type = 0; + + if (select_type) { + *sport = reserve_ports_base + upper_16_bits(rqpn); + *dport = lower_16_bits(rqpn); + } else { + *dport = reserve_ports_base + upper_16_bits(lqpn); + *sport = lower_16_bits(lqpn); + } +} + +static int erdma_av_from_attr(struct erdma_qp *qp, struct ib_qp_attr *attr) +{ + struct rdma_ah_attr *ah_attr = &attr->ah_attr; + const struct ib_gid_attr *sgid_attr = ah_attr->grh.sgid_attr; + int ntype; + union ib_gid sgid; + + if (ah_attr->type != RDMA_AH_ATTR_TYPE_ROCE) { + ibdev_dbg(&qp->dev->ibdev, "unsupport ah_attr type %u.\n", + ah_attr->type); + return -EOPNOTSUPP; + } + + ntype = rdma_gid_attr_network_type(sgid_attr); + sgid = sgid_attr->gid; + + ibdev_dbg(&qp->dev->ibdev, "gid type:%d, sgid: %pI6\n", ntype, + sgid.raw); + + rdma_gid2ip((struct sockaddr *)&qp->attrs.laddr, &sgid); + rdma_gid2ip((struct sockaddr *)&qp->attrs.raddr, + &rdma_ah_read_grh(ah_attr)->dgid); + + ibdev_dbg(&qp->dev->ibdev, "dgid: %pI6\n", + rdma_ah_read_grh(ah_attr)->dgid.raw); + + ibdev_dbg(&qp->dev->ibdev, "laddr:0x%x\n", + ntohl(qp->attrs.laddr.in.sin_addr.s_addr)); + ibdev_dbg(&qp->dev->ibdev, "raddr:0x%x\n", + ntohl(qp->attrs.raddr.in.sin_addr.s_addr)); + return 0; +} + +int erdma_handle_compat_attr(struct erdma_qp *qp, struct ib_qp_attr *attr, + int attr_mask) +{ + ibdev_dbg(&qp->dev->ibdev, "attr mask: %x, av: %d, state:%d\n", + attr_mask, attr_mask & IB_QP_AV, attr_mask & IB_QP_STATE); + + if (attr_mask & IB_QP_AV) + erdma_av_from_attr(qp, attr); + + if (attr_mask & IB_QP_DEST_QPN) { + ibdev_dbg(&qp->dev->ibdev, "get remote qpn %u\n", + attr->dest_qp_num); + qp->attrs.remote_qp_num = attr->dest_qp_num; + } + + if (attr_mask & IB_QP_SQ_PSN) { + ibdev_dbg(&qp->dev->ibdev, "get sqsn:%u\n", attr->sq_psn); + qp->attrs.sq_psn = attr->sq_psn; + } + + if (attr_mask & IB_QP_RQ_PSN) { + ibdev_dbg(&qp->dev->ibdev, "get rqsn:%u\n", attr->rq_psn); + qp->attrs.rq_psn = attr->rq_psn; + } + + return 0; +} + +static int erdma_port_init(struct net *net, struct socket **rsvd_sock) +{ + struct sockaddr_in laddr; + int ret = 0, i, j; + + for (i = 0; i < 16; i++) { + ret = __sock_create(net, AF_INET, + SOCK_STREAM, IPPROTO_TCP, &rsvd_sock[i], 1); + if (ret < 0) + goto err_out; + memset(&laddr, 0, sizeof(struct sockaddr_in)); + laddr.sin_port = htons(reserve_ports_base + i); + ret = rsvd_sock[i]->ops->bind(rsvd_sock[i], + (struct sockaddr *)&laddr, + sizeof(struct sockaddr_in)); + if (ret) { + sock_release(rsvd_sock[i]); + goto err_out; + } + } + + return 0; + +err_out: + for (j = 0; j < i; j++) { + sock_release(rsvd_sock[j]); + rsvd_sock[j] = NULL; + } + + return ret; +} + +static void erdma_port_release(struct socket **rsvd_sock) +{ + int i; + + if (!compat_mode) + return; + + for (i = 0; i < 16; i++) + if (rsvd_sock[i]) + sock_release(rsvd_sock[i]); +} + +static __net_init int erdma_init_net(struct net *net) +{ + struct erdma_net *node = net_generic(net, erdma_net_id); + + return erdma_port_init(net, node->rsvd_sock); +} + +static void __net_exit erdma_exit_batch_net(struct list_head *net_list) +{ + struct net *net; + LIST_HEAD(list); + + rtnl_lock(); + list_for_each_entry(net, net_list, exit_list) { + struct erdma_net *node = net_generic(net, erdma_net_id); + + erdma_port_release(node->rsvd_sock); + } + rtnl_unlock(); +} + +static struct pernet_operations erdma_net_ops = { + .init = erdma_init_net, + .exit_batch = erdma_exit_batch_net, + .id = &erdma_net_id, + .size = sizeof(struct erdma_net), +}; + +int erdma_compat_init(void) +{ + int ret; + + if (!compat_mode) + return 0; + + ret = sw_net_init(); + if (ret) + return ret; + + ret = register_pernet_subsys(&erdma_net_ops); + if (ret) + sw_net_exit(); + + return ret; +} + +void erdma_compat_exit(void) +{ + if (!compat_mode) + return; + + unregister_pernet_subsys(&erdma_net_ops); + + sw_net_exit(); +} diff --git a/drivers/infiniband/hw/erdma/erdma_compat.h b/drivers/infiniband/hw/erdma/erdma_compat.h new file mode 100644 index 0000000000000000000000000000000000000000..db5f04b630c45d900543fc958670bff10a81ca35 --- /dev/null +++ b/drivers/infiniband/hw/erdma/erdma_compat.h @@ -0,0 +1,54 @@ +/* SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause */ + +/* Authors: Cheng Xu */ +/* Kai Shen */ +/* Copyright (c) 2020-2022, Alibaba Group. */ + +#ifndef __ERDMA_SW_H__ +#define __ERDMA_SW_H__ + +#include "kcompat.h" +#include "erdma_verbs.h" + +int erdma_compat_init(void); +void erdma_compat_exit(void); + +void erdma_gen_port_from_qpn(u32 sip, u32 dip, u32 lqpn, u32 rqpn, u16 *sport, + u16 *dport); + +int erdma_handle_compat_attr(struct erdma_qp *qp, struct ib_qp_attr *attr, + int attr_mask); + +int erdma_add_gid(const struct ib_gid_attr *attr, void **context); + +int erdma_del_gid(const struct ib_gid_attr *attr, void **context); + +int erdma_create_ah(struct ib_ah *ibah, + struct rdma_ah_init_attr *init_attr, + struct ib_udata *udata); + +int erdma_destroy_ah(struct ib_ah *ibah, u32 flags); + +#include "compat/sw_verbs.h" +#include "compat/sw_net.h" + +int erdma_modify_mad_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, + int attr_mask, struct ib_udata *udata); + +int erdma_create_mad_qp(struct ib_qp *ibqp, struct ib_qp_init_attr *attrs, + struct ib_udata *udata); +int erdma_post_send_mad(struct ib_qp *ibqp, const struct ib_send_wr *send_wr, + const struct ib_send_wr **bad_send_wr); +int erdma_post_recv_mad(struct ib_qp *ibqp, const struct ib_recv_wr *recv_wr, + const struct ib_recv_wr **bad_recv_wr); + +int erdma_create_qp_mad(struct ib_qp *ibqp, struct ib_qp_init_attr *attrs, + struct ib_udata *udata); +int attach_sw_dev(struct erdma_dev *dev); +void detach_sw_dev(struct erdma_dev *dev); +int erdma_mad_poll_cq(struct ib_cq *ibcq, int num_entries, struct ib_wc *wc); +int erdma_mad_req_notify_cq(struct ib_cq *ibcq, enum ib_cq_notify_flags flags); +void erdma_destroy_mad_qp(struct ib_qp *ibqp); +void detach_sw_pd(struct erdma_pd *pd); +void detach_sw_cq(struct erdma_cq *cq); +#endif diff --git a/drivers/infiniband/hw/erdma/erdma_cq.c b/drivers/infiniband/hw/erdma/erdma_cq.c index c1cb5568eab2d590f33db67d81cee70fe9fdbbc5..07ec41239c68c76988cdb7e9a88bea128e8d8bcf 100644 --- a/drivers/infiniband/hw/erdma/erdma_cq.c +++ b/drivers/infiniband/hw/erdma/erdma_cq.c @@ -6,6 +6,8 @@ #include "erdma_verbs.h" +extern bool compat_mode; + static void *get_next_valid_cqe(struct erdma_cq *cq) { __be32 *cqe = get_queue_entry(cq->kern_cq.qbuf, cq->kern_cq.ci, @@ -26,25 +28,38 @@ static void notify_cq(struct erdma_cq *cq, u8 solcitied) FIELD_PREP(ERDMA_CQDB_CMDSN_MASK, cq->kern_cq.cmdsn) | FIELD_PREP(ERDMA_CQDB_CI_MASK, cq->kern_cq.ci); - *cq->kern_cq.db_record = db_data; + *cq->kern_cq.dbrec = db_data; writeq(db_data, cq->kern_cq.db); } int erdma_req_notify_cq(struct ib_cq *ibcq, enum ib_cq_notify_flags flags) { struct erdma_cq *cq = to_ecq(ibcq); + u16 dim_timeout = cq->dim.timeout; unsigned long irq_flags; int ret = 0; - spin_lock_irqsave(&cq->kern_cq.lock, irq_flags); + if (compat_mode && unlikely(cq->is_soft)) + return erdma_mad_req_notify_cq(ibcq, flags); - notify_cq(cq, (flags & IB_CQ_SOLICITED_MASK) == IB_CQ_SOLICITED); + spin_lock_irqsave(&cq->kern_cq.lock, irq_flags); - if ((flags & IB_CQ_REPORT_MISSED_EVENTS) && get_next_valid_cqe(cq)) + if ((flags & IB_CQ_REPORT_MISSED_EVENTS) && get_next_valid_cqe(cq)) { ret = 1; + goto unlock; + } - cq->kern_cq.notify_cnt++; - + if (!dim_timeout) { + notify_cq(cq, + (flags & IB_CQ_SOLICITED_MASK) == IB_CQ_SOLICITED); + cq->kern_cq.notify_cnt++; + } else { + cq->dim.flags |= flags; + hrtimer_start(&cq->dim.timer, + ns_to_ktime(dim_timeout * NSEC_PER_USEC), + HRTIMER_MODE_REL_PINNED); + } +unlock: spin_unlock_irqrestore(&cq->kern_cq.lock, irq_flags); return ret; @@ -64,8 +79,6 @@ static const enum ib_wc_opcode wc_mapping_table[ERDMA_NUM_OPCODES] = { [ERDMA_OP_REG_MR] = IB_WC_REG_MR, [ERDMA_OP_LOCAL_INV] = IB_WC_LOCAL_INV, [ERDMA_OP_READ_WITH_INV] = IB_WC_RDMA_READ, - [ERDMA_OP_ATOMIC_CAS] = IB_WC_COMP_SWAP, - [ERDMA_OP_ATOMIC_FAA] = IB_WC_FETCH_ADD, }; static const struct { @@ -133,7 +146,7 @@ static int erdma_poll_one_cqe(struct erdma_cq *cq, struct ib_wc *wc) cqe_hdr = be32_to_cpu(cqe->hdr); qp = find_qp_by_qpn(dev, qpn); - if (!qp) + if (!qp || (qp->attrs.flags & ERDMA_QP_IN_DESTROY)) return ERDMA_POLLCQ_NO_QP; kern_qp = &qp->kern_qp; @@ -153,6 +166,11 @@ static int erdma_poll_one_cqe(struct erdma_cq *cq, struct ib_wc *wc) } else { id_table = kern_qp->rwr_tbl; depth = qp->attrs.rq_size; + /* Prevent rqe out of range from HW */ + if (kern_qp->rq_pi == wqe_idx || + (u16)(kern_qp->rq_pi - wqe_idx) > (u16)depth) + syndrome = ERDMA_WC_GENERAL_ERR; + kern_qp->rq_ci++; } wc->wr_id = id_table[wqe_idx & (depth - 1)]; wc->byte_len = be32_to_cpu(cqe->size); @@ -184,6 +202,9 @@ int erdma_poll_cq(struct ib_cq *ibcq, int num_entries, struct ib_wc *wc) unsigned long flags; int npolled, ret; + if (compat_mode && unlikely(cq->is_soft)) + return erdma_mad_poll_cq(ibcq, num_entries, wc); + spin_lock_irqsave(&cq->kern_cq.lock, flags); for (npolled = 0; npolled < num_entries;) { @@ -201,3 +222,25 @@ int erdma_poll_cq(struct ib_cq *ibcq, int num_entries, struct ib_wc *wc) return npolled; } + +enum hrtimer_restart cq_timer_fn(struct hrtimer *t) +{ + struct erdma_cq *cq = container_of(t, struct erdma_cq, dim.timer); + + notify_cq(cq, + (cq->dim.flags & IB_CQ_SOLICITED_MASK) == IB_CQ_SOLICITED); + cq->kern_cq.notify_cnt++; + cq->dim.flags = 0; + + return HRTIMER_NORESTART; +} + +#define DIM_OFF_THRESHOLD 3 +int erdma_modify_cq(struct ib_cq *ibcq, u16 cq_count, u16 cq_period) +{ + struct erdma_cq *cq = to_ecq(ibcq); + + cq->dim.timeout = cq_period >= DIM_OFF_THRESHOLD ? cq_period : 0; + + return 0; +} diff --git a/drivers/infiniband/hw/erdma/erdma_debugfs.c b/drivers/infiniband/hw/erdma/erdma_debugfs.c new file mode 100644 index 0000000000000000000000000000000000000000..37d57e1f88c962a23bd56a478b8cc04540ed6502 --- /dev/null +++ b/drivers/infiniband/hw/erdma/erdma_debugfs.c @@ -0,0 +1,319 @@ +// SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause + +/* Authors: Cheng Xu */ +/* Kai Shen */ +/* Copyright (c) 2020-2022, Alibaba Group. */ + +#include +#include +#include + +#include "erdma.h" + +struct dentry *erdma_debugfs_root; +EXPORT_SYMBOL(erdma_debugfs_root); + +static int erdma_query_cc_profiler_list(struct erdma_dev *dev, void *out) +{ + BUILD_BUG_ON(sizeof(struct erdma_cmdq_query_cc_profiler_list_resp) > + ERDMA_HW_RESP_SIZE); + + return erdma_query_resource(dev, CMDQ_SUBMOD_COMMON, + CMDQ_OPCODE_QUERY_CC_PROFILER_LIST, 0, out, + sizeof(struct erdma_cmdq_query_cc_profiler_list_resp)); +} + +static int erdma_query_cc_profiler_name(struct erdma_dev *dev, u32 index, void *out) +{ + BUILD_BUG_ON(sizeof(struct erdma_cmdq_query_cc_profiler_name_resp) > + ERDMA_HW_RESP_SIZE); + + return erdma_query_resource(dev, CMDQ_SUBMOD_COMMON, + CMDQ_OPCODE_QUERY_CC_PROFILER_NAME, index, out, + sizeof(struct erdma_cmdq_query_cc_profiler_name_resp)); +} + +static int erdma_set_cc_profiler(struct erdma_dev *dev, u32 value) +{ + struct erdma_ext_attr attr; + + attr.attr_mask = ERDMA_EXT_ATTR_CC_PROFILER_MASK; + attr.enable = value < (u32)ERDMA_HW_CC_PROFILER_NUM ? 1 : 0; + attr.cc_profiler = (u16)value; + + return erdma_set_ext_attr(dev, &attr); +} + +static ssize_t tlp_read(struct file *filp, char __user *buf, size_t count, + loff_t *pos) +{ + struct erdma_cmdq_query_ext_attr_resp resp; + struct erdma_dev *dev; + char cbuf[40]; + int ret; + + dev = filp->private_data; + ret = erdma_query_ext_attr(dev, &resp); + if (ret) + return ret; + + ret = snprintf(cbuf, sizeof(cbuf), "%d\n", (resp.attr_mask & ERDMA_EXT_ATTR_TLP_MASK) != 0); + + return simple_read_from_buffer(buf, count, pos, cbuf, ret); +} + +static ssize_t tlp_write(struct file *filp, const char __user *buf, + size_t count, loff_t *pos) +{ + struct erdma_ext_attr attr; + struct erdma_dev *dev; + u32 var; + int ret; + + dev = filp->private_data; + + if (kstrtouint_from_user(buf, count, 0, &var)) + return -EFAULT; + + attr.attr_mask = ERDMA_EXT_ATTR_TLP_MASK; + attr.enable = var != 0 ? 1 : 0; + + ret = erdma_set_ext_attr(dev, &attr); + if (ret) + return ret; + + return count; +} + +static const struct file_operations tlp_fops = { + .owner = THIS_MODULE, + .open = simple_open, + .read = tlp_read, + .write = tlp_write, +}; + +static ssize_t dack_read(struct file *filp, char __user *buf, size_t count, + loff_t *pos) +{ + struct erdma_cmdq_query_ext_attr_resp resp; + struct erdma_dev *dev; + char cbuf[20]; + int ret; + + dev = filp->private_data; + ret = erdma_query_ext_attr(dev, &resp); + if (ret) + return ret; + + ret = snprintf(cbuf, sizeof(cbuf), "0x%x\n", resp.dack_count); + + return simple_read_from_buffer(buf, count, pos, cbuf, ret); +} + +static ssize_t dack_write(struct file *filp, const char __user *buf, + size_t count, loff_t *pos) +{ + struct erdma_dev *dev; + u32 var; + int ret; + + dev = filp->private_data; + + if (kstrtouint_from_user(buf, count, 0, &var)) + return -EFAULT; + + ret = erdma_set_dack_count(dev, var); + if (ret) + return ret; + + return count; +} + +static const struct file_operations dack_fops = { + .owner = THIS_MODULE, + .open = simple_open, + .read = dack_read, + .write = dack_write, +}; + +static ssize_t +cc_profiler_list_read(struct file *filp, char __user *buf, size_t count, loff_t *pos) +{ + struct erdma_cmdq_query_cc_profiler_list_resp list_resp; + struct erdma_cmdq_query_cc_profiler_name_resp name_resp; + struct erdma_dev *dev = filp->private_data; + unsigned long size; + int len = 0; + int ret, i; + char *cbuf; + + /* It takes 6 bytes to hold up to 4 numerals 1 colon and 1 line break. */ + size = ERDMA_HW_CC_PROFILER_NUM * (ERDMA_HW_CC_PROFILER_NAME_LEN + 6); + cbuf = vmalloc(size); + if (!cbuf) + return -ENOMEM; + + ret = erdma_query_cc_profiler_list(dev, &list_resp); + if (ret) + goto free_vm; + + for (i = 0; i < ERDMA_HW_CC_PROFILER_NUM; i++) { + if (list_resp.idx_mask[i / 32] & ((u32)1 << (i % 32))) { + ret = erdma_query_cc_profiler_name(dev, i, &name_resp); + if (ret) + goto free_vm; + if (name_resp.valid) { + name_resp.name[ERDMA_HW_CC_PROFILER_NAME_LEN - 1] = '\0'; + len += snprintf(cbuf + len, size - len, + "%d:%s\n", i, name_resp.name); + } + } + } + + ret = simple_read_from_buffer(buf, count, pos, cbuf, len); + +free_vm: + vfree(cbuf); + return ret; +} + +static const struct file_operations cc_profiler_list_fops = { + .owner = THIS_MODULE, + .open = simple_open, + .read = cc_profiler_list_read, +}; + +static ssize_t +cc_profiler_read(struct file *filp, char __user *buf, size_t count, loff_t *pos) +{ + struct erdma_cmdq_query_ext_attr_resp resp; + struct erdma_dev *dev = filp->private_data; + char cbuf[20]; + int ret; + + ret = erdma_query_ext_attr(dev, &resp); + if (ret) + return ret; + + if (resp.cap_mask & ERDMA_EXT_ATTR_CC_PROFILER_MASK && + resp.attr_mask & ERDMA_EXT_ATTR_CC_PROFILER_MASK) + ret = snprintf(cbuf, sizeof(cbuf), "%d\n", resp.cc_profiler); + else + ret = snprintf(cbuf, sizeof(cbuf), "Invalid\n"); + + return simple_read_from_buffer(buf, count, pos, cbuf, ret); +} + +static ssize_t cc_profiler_write(struct file *filp, const char __user *buf, + size_t count, loff_t *pos) +{ + struct erdma_dev *dev = filp->private_data; + u32 var; + int ret; + + if (kstrtouint_from_user(buf, count, 0, &var)) + return -EFAULT; + + ret = erdma_set_cc_profiler(dev, var); + if (ret) + return ret; + + return count; +} + +static const struct file_operations cc_profiler_fops = { + .owner = THIS_MODULE, + .open = simple_open, + .write = cc_profiler_write, + .read = cc_profiler_read, +}; + +static ssize_t cap_read(struct file *filp, char __user *buf, size_t count, + loff_t *pos) +{ + struct erdma_cmdq_query_ext_attr_resp resp; + struct erdma_dev *dev; + char cbuf[40]; + int ret; + + dev = filp->private_data; + ret = erdma_query_ext_attr(dev, &resp); + if (ret) + return ret; + + ret = snprintf(cbuf, sizeof(cbuf), "cap 0x%lx\next_cap 0x%x\n", + dev->attrs.cap_flags, resp.cap_mask); + + return simple_read_from_buffer(buf, count, pos, cbuf, ret); +} + +static const struct file_operations cap_fops = { + .owner = THIS_MODULE, + .open = simple_open, + .read = cap_read, +}; + +int erdma_debugfs_files_create(struct erdma_dev *dev) +{ + struct dentry *ent; + + if (!erdma_debugfs_root) + return 0; + + dev->dbg_root = debugfs_create_dir(dev_name(&dev->pdev->dev), erdma_debugfs_root); + if (!dev->dbg_root) { + dev_err(&dev->pdev->dev, "erdma: Cannot create debugfs dir, aborting\n"); + return -ENOMEM; + } + + ent = debugfs_create_file("cc_profiler_list", 0400, dev->dbg_root, dev, + &cc_profiler_list_fops); + if (!ent) + goto err_out; + + ent = debugfs_create_file("cc_profiler", 0600, dev->dbg_root, dev, + &cc_profiler_fops); + if (!ent) + goto err_out; + + ent = debugfs_create_file("tlp", 0600, dev->dbg_root, dev, + &tlp_fops); + if (!ent) + goto err_out; + + ent = debugfs_create_file("delay_ack", 0600, dev->dbg_root, dev, + &dack_fops); + if (!ent) + goto err_out; + + ent = debugfs_create_file("cap", 0400, dev->dbg_root, dev, + &cap_fops); + if (!ent) + goto err_out; + + return 0; + +err_out: + debugfs_remove_recursive(dev->dbg_root); + + return -ENOMEM; +} + +void erdma_debugfs_files_destroy(struct erdma_dev *dev) +{ + if (erdma_debugfs_root) + debugfs_remove_recursive(dev->dbg_root); +} + +void erdma_debugfs_register(void) +{ + erdma_debugfs_root = debugfs_create_dir("erdma", NULL); + + if (IS_ERR_OR_NULL(erdma_debugfs_root)) + erdma_debugfs_root = NULL; +} + +void erdma_debugfs_unregister(void) +{ + debugfs_remove(erdma_debugfs_root); +} diff --git a/drivers/infiniband/hw/erdma/erdma_eq.c b/drivers/infiniband/hw/erdma/erdma_eq.c index ea47cb21fdb8caf0161c47f6b0ec7a4dd4891164..5b291d72640cfbac029667d5dd5f63546527c039 100644 --- a/drivers/infiniband/hw/erdma/erdma_eq.c +++ b/drivers/infiniband/hw/erdma/erdma_eq.c @@ -13,7 +13,7 @@ void notify_eq(struct erdma_eq *eq) u64 db_data = FIELD_PREP(ERDMA_EQDB_CI_MASK, eq->ci) | FIELD_PREP(ERDMA_EQDB_ARM_MASK, 1); - *eq->db_record = db_data; + *eq->dbrec = db_data; writeq(db_data, eq->db); atomic64_inc(&eq->notify_num); @@ -36,6 +36,9 @@ void erdma_aeq_event_handler(struct erdma_dev *dev) struct ib_event event; u32 poll_cnt = 0; + if (!test_bit(ERDMA_STATE_AEQ_INIT_DONE, &dev->state)) + return; + memset(&event, 0, sizeof(event)); while (poll_cnt < MAX_POLL_CHUNK_SIZE) { @@ -99,7 +102,7 @@ int erdma_aeq_init(struct erdma_dev *dev) atomic64_set(&eq->notify_num, 0); eq->db = dev->func_bar + ERDMA_REGS_AEQ_DB_REG; - eq->db_record = (u64 *)(eq->qbuf + buf_size); + eq->dbrec = (u64 *)(eq->qbuf + buf_size); erdma_reg_write32(dev, ERDMA_REGS_AEQ_ADDR_H_REG, upper_32_bits(eq->qbuf_dma_addr)); @@ -109,6 +112,9 @@ int erdma_aeq_init(struct erdma_dev *dev) erdma_reg_write64(dev, ERDMA_AEQ_DB_HOST_ADDR_REG, eq->qbuf_dma_addr + buf_size); + /* erdma_reg_writeXX has memory barrier implicitly */ + set_bit(ERDMA_STATE_AEQ_INIT_DONE, &dev->state); + return 0; } @@ -209,7 +215,7 @@ static void erdma_free_ceq_irq(struct erdma_dev *dev, u16 ceqn) static int create_eq_cmd(struct erdma_dev *dev, u32 eqn, struct erdma_eq *eq) { struct erdma_cmdq_create_eq_req req; - dma_addr_t db_info_dma_addr; + dma_addr_t dbrec_dma; erdma_cmdq_build_reqhdr(&req.hdr, CMDQ_SUBMOD_COMMON, CMDQ_OPCODE_CREATE_EQ); @@ -219,11 +225,13 @@ static int create_eq_cmd(struct erdma_dev *dev, u32 eqn, struct erdma_eq *eq) req.qtype = ERDMA_EQ_TYPE_CEQ; /* Vector index is the same as EQN. */ req.vector_idx = eqn; - db_info_dma_addr = eq->qbuf_dma_addr + (eq->depth << EQE_SHIFT); - req.db_dma_addr_l = lower_32_bits(db_info_dma_addr); - req.db_dma_addr_h = upper_32_bits(db_info_dma_addr); + dbrec_dma = eq->qbuf_dma_addr + (eq->depth << EQE_SHIFT); + req.db_dma_addr_l = lower_32_bits(dbrec_dma); + req.db_dma_addr_h = upper_32_bits(dbrec_dma); - return erdma_post_cmd_wait(&dev->cmdq, &req, sizeof(req), NULL, NULL); + return erdma_post_cmd_wait(&dev->cmdq, &req, + sizeof(struct erdma_cmdq_create_eq_req), + NULL, NULL); } static int erdma_ceq_init_one(struct erdma_dev *dev, u16 ceqn) @@ -245,7 +253,7 @@ static int erdma_ceq_init_one(struct erdma_dev *dev, u16 ceqn) eq->depth = ERDMA_DEFAULT_EQ_DEPTH; eq->db = dev->func_bar + ERDMA_REGS_CEQ_DB_BASE_REG + (ceqn + 1) * ERDMA_DB_SIZE; - eq->db_record = (u64 *)(eq->qbuf + buf_size); + eq->dbrec = (u64 *)(eq->qbuf + buf_size); eq->ci = 0; dev->ceqs[ceqn].dev = dev; @@ -282,10 +290,25 @@ static void erdma_ceq_uninit_one(struct erdma_dev *dev, u16 ceqn) int erdma_ceqs_init(struct erdma_dev *dev) { + u64 req_hdr, cap0, cap1; u32 i, j; int err; - for (i = 0; i < dev->attrs.irq_num - 1; i++) { + erdma_cmdq_build_reqhdr(&req_hdr, CMDQ_SUBMOD_RDMA, + CMDQ_OPCODE_QUERY_DEVICE); + + err = erdma_post_cmd_wait(&dev->cmdq, &req_hdr, sizeof(req_hdr), &cap0, + &cap1); + if (err) + return err; + +#define ERDMA_GET_CAP(name, cap) FIELD_GET(ERDMA_CMD_DEV_CAP_##name##_MASK, cap) + + dev->attrs.max_ceqs = + min((size_t)8 * (size_t)ERDMA_GET_CAP(QBLOCK, cap1), + (size_t)dev->attrs.irq_num); + + for (i = 0; i < dev->attrs.max_ceqs - 1; i++) { err = erdma_ceq_init_one(dev, i); if (err) goto out_err; @@ -312,7 +335,7 @@ void erdma_ceqs_uninit(struct erdma_dev *dev) { u32 i; - for (i = 0; i < dev->attrs.irq_num - 1; i++) { + for (i = 0; i < dev->attrs.max_ceqs - 1; i++) { erdma_free_ceq_irq(dev, i); erdma_ceq_uninit_one(dev, i); } diff --git a/drivers/infiniband/hw/erdma/erdma_hw.h b/drivers/infiniband/hw/erdma/erdma_hw.h index a155519a862f83e6e4bec2dea2dec9fbc27eb141..92f433777c39f52f133dc941e3cebc89a84b0f40 100644 --- a/drivers/infiniband/hw/erdma/erdma_hw.h +++ b/drivers/infiniband/hw/erdma/erdma_hw.h @@ -11,6 +11,8 @@ #include /* PCIe device related definition. */ +#define PCI_VENDOR_ID_ALIBABA 0x1ded + #define ERDMA_PCI_WIDTH 64 #define ERDMA_FUNC_BAR 0 #define ERDMA_MISX_BAR 2 @@ -44,17 +46,6 @@ #define ERDMA_CMDQ_CQ_DB_HOST_ADDR_REG 0x68 #define ERDMA_CMDQ_EQ_DB_HOST_ADDR_REG 0x70 #define ERDMA_AEQ_DB_HOST_ADDR_REG 0x78 -#define ERDMA_REGS_STATS_TSO_IN_PKTS_REG 0x80 -#define ERDMA_REGS_STATS_TSO_OUT_PKTS_REG 0x88 -#define ERDMA_REGS_STATS_TSO_OUT_BYTES_REG 0x90 -#define ERDMA_REGS_STATS_TX_DROP_PKTS_REG 0x98 -#define ERDMA_REGS_STATS_TX_BPS_METER_DROP_PKTS_REG 0xa0 -#define ERDMA_REGS_STATS_TX_PPS_METER_DROP_PKTS_REG 0xa8 -#define ERDMA_REGS_STATS_RX_PKTS_REG 0xc0 -#define ERDMA_REGS_STATS_RX_BYTES_REG 0xc8 -#define ERDMA_REGS_STATS_RX_DROP_PKTS_REG 0xd0 -#define ERDMA_REGS_STATS_RX_BPS_METER_DROP_PKTS_REG 0xd8 -#define ERDMA_REGS_STATS_RX_PPS_METER_DROP_PKTS_REG 0xe0 #define ERDMA_REGS_CEQ_DB_BASE_REG 0x100 #define ERDMA_CMDQ_SQDB_REG 0x200 #define ERDMA_CMDQ_CQDB_REG 0x300 @@ -80,6 +71,19 @@ #define ERDMA_BAR_CQDB_SPACE_OFFSET \ (ERDMA_BAR_RQDB_SPACE_OFFSET + ERDMA_BAR_RQDB_SPACE_SIZE) +/* Doorbell page resources related. */ +/* + * Max # of parallelly issued directSQE is 3072 per device, + * hardware organizes this into 24 group, per group has 128 credits. + */ +#define ERDMA_DWQE_MAX_GRP_CNT 24 +#define ERDMA_DWQE_NUM_PER_GRP 128 + +#define ERDMA_DWQE_TYPE0_CNT 64 +#define ERDMA_DWQE_TYPE1_CNT 496 +/* type1 DB contains 2 DBs, takes 256Byte. */ +#define ERDMA_DWQE_TYPE1_CNT_PER_PAGE 16 + #define ERDMA_SDB_SHARED_PAGE_INDEX 95 /* Doorbell related. */ @@ -136,7 +140,9 @@ enum CMDQ_RDMA_OPCODE { CMDQ_OPCODE_DESTROY_CQ = 5, CMDQ_OPCODE_REFLUSH = 6, CMDQ_OPCODE_REG_MR = 8, - CMDQ_OPCODE_DEREG_MR = 9 + CMDQ_OPCODE_DEREG_MR = 9, + CMDQ_OPCODE_QUERY_QPC = 11, + CMDQ_OPCODE_QUERY_CQC = 12, }; enum CMDQ_COMMON_OPCODE { @@ -144,9 +150,16 @@ enum CMDQ_COMMON_OPCODE { CMDQ_OPCODE_DESTROY_EQ = 1, CMDQ_OPCODE_QUERY_FW_INFO = 2, CMDQ_OPCODE_CONF_MTU = 3, - CMDQ_OPCODE_CONF_DEVICE = 5, - CMDQ_OPCODE_ALLOC_DB = 8, - CMDQ_OPCODE_FREE_DB = 9, + CMDQ_OPCODE_GET_STATS = 4, + CMDQ_OPCODE_QUERY_EQC = 6, + CMDQ_OPCODE_SET_RETRANS_NUM = 7, + + CMDQ_OPCODE_SET_EXT_ATTR = 10, + CMDQ_OPCODE_GET_EXT_ATTR = 11, + CMDQ_OPCODE_SYNC_INFO = 12, + CMDQ_OPCODE_RESERVED_0 = 13, + CMDQ_OPCODE_QUERY_CC_PROFILER_LIST = 14, + CMDQ_OPCODE_QUERY_CC_PROFILER_NAME = 15, }; /* cmdq-SQE HDR */ @@ -184,40 +197,41 @@ struct erdma_cmdq_destroy_eq_req { u8 qtype; }; -/* config device cfg */ -#define ERDMA_CMD_CONFIG_DEVICE_PS_EN_MASK BIT(31) -#define ERDMA_CMD_CONFIG_DEVICE_PGSHIFT_MASK GENMASK(4, 0) - -struct erdma_cmdq_config_device_req { +struct erdma_cmdq_config_mtu_req { u64 hdr; - u32 cfg; - u32 rsvd[5]; + u32 mtu; }; -struct erdma_cmdq_config_mtu_req { +struct erdma_cmdq_set_retrans_num_req { u64 hdr; - u32 mtu; + u32 retrans_num; }; -/* ext db requests(alloc and free) cfg */ -#define ERDMA_CMD_EXT_DB_CQ_EN_MASK BIT(2) -#define ERDMA_CMD_EXT_DB_RQ_EN_MASK BIT(1) -#define ERDMA_CMD_EXT_DB_SQ_EN_MASK BIT(0) +#define ERDMA_EXT_ATTR_DACK_COUNT_MASK BIT(0) +#define ERDMA_EXT_ATTR_LEGACY_MODE_MASK BIT(2) +#define ERDMA_EXT_ATTR_TLP_MASK BIT(4) +#define ERDMA_EXT_ATTR_CC_PROFILER_MASK BIT(5) +struct erdma_ext_attr { + u32 attr_mask; + u8 dack_count; + u8 enable; + u16 cc_profiler; +}; -struct erdma_cmdq_ext_db_req { +struct erdma_cmdq_set_ext_attr_req { u64 hdr; - u32 cfg; - u16 rdb_off; - u16 sdb_off; - u16 rsvd0; - u16 cdb_off; - u32 rsvd1[3]; + struct erdma_ext_attr attr; }; -/* alloc db response qword 0 definition */ -#define ERDMA_CMD_ALLOC_DB_RESP_RDB_MASK GENMASK_ULL(63, 48) -#define ERDMA_CMD_ALLOC_DB_RESP_CDB_MASK GENMASK_ULL(47, 32) -#define ERDMA_CMD_ALLOC_DB_RESP_SDB_MASK GENMASK_ULL(15, 0) +#define ERDMA_SYNC_INFO_COMPAT_MODE BIT(24) +#define ERDMA_SYNC_INFO_MAJOR_VER GENMASK(23, 16) +#define ERDMA_SYNC_INFO_MEDIUM_VER GENMASK(15, 8) +#define ERDMA_SYNC_INFO_MINOR_VER GENMASK(7, 0) +struct erdma_cmdq_sync_info_req { + u64 hdr; + u32 version; + u32 rsvd[5]; +}; /* create_cq cfg0 */ #define ERDMA_CMD_CREATE_CQ_DEPTH_MASK GENMASK(31, 24) @@ -226,13 +240,9 @@ struct erdma_cmdq_ext_db_req { /* create_cq cfg1 */ #define ERDMA_CMD_CREATE_CQ_MTT_CNT_MASK GENMASK(31, 16) -#define ERDMA_CMD_CREATE_CQ_MTT_LEVEL_MASK BIT(15) -#define ERDMA_CMD_CREATE_CQ_MTT_DB_CFG_MASK BIT(11) +#define ERDMA_CMD_CREATE_CQ_MTT_TYPE_MASK BIT(15) #define ERDMA_CMD_CREATE_CQ_EQN_MASK GENMASK(9, 0) -/* create_cq cfg2 */ -#define ERDMA_CMD_CREATE_CQ_DB_CFG_MASK GENMASK(15, 0) - struct erdma_cmdq_create_cq_req { u64 hdr; u32 cfg0; @@ -241,7 +251,6 @@ struct erdma_cmdq_create_cq_req { u32 cfg1; u64 cq_db_info_addr; u32 first_page_offset; - u32 cfg2; }; /* regmr/deregmr cfg0 */ @@ -257,8 +266,8 @@ struct erdma_cmdq_create_cq_req { /* regmr cfg2 */ #define ERDMA_CMD_REGMR_PAGESIZE_MASK GENMASK(31, 27) -#define ERDMA_CMD_REGMR_MTT_PAGESIZE_MASK GENMASK(26, 24) -#define ERDMA_CMD_REGMR_MTT_LEVEL_MASK GENMASK(21, 20) +#define ERDMA_CMD_REGMR_PBL_PAGESIZE_MASK GENMASK(26, 24) +#define ERDMA_CMD_REGMR_MTT_TYPE_MASK GENMASK(21, 20) #define ERDMA_CMD_REGMR_MTT_CNT_MASK GENMASK(19, 0) struct erdma_cmdq_reg_mr_req { @@ -288,6 +297,11 @@ struct erdma_cmdq_dereg_mr_req { #define ERDMA_CMD_MODIFY_QP_CC_MASK GENMASK(23, 20) #define ERDMA_CMD_MODIFY_QP_QPN_MASK GENMASK(19, 0) +#define ERDMA_CMD_MODIFY_QP_IPV6_MASK BIT(31) +#define ERDMA_CMD_MODIFY_QP_WWI_PERF_MASK BIT(30) +#define ERDMA_CMD_MODIFY_QP_TLP_MASK BIT(29) +#define ERDMA_CMD_MODIFY_QP_RQPN_MASK GENMASK(19, 0) + struct erdma_cmdq_modify_qp_req { u64 hdr; u32 cfg; @@ -298,6 +312,11 @@ struct erdma_cmdq_modify_qp_req { __be16 dport; u32 send_nxt; u32 recv_nxt; + u32 rsvd0; + u32 rsvd1; + __be32 flow_label; + u8 ipv6_daddr[16]; + u8 ipv6_saddr[16]; }; /* create qp cfg0 */ @@ -310,17 +329,12 @@ struct erdma_cmdq_modify_qp_req { /* create qp cqn_mtt_cfg */ #define ERDMA_CMD_CREATE_QP_PAGE_SIZE_MASK GENMASK(31, 28) -#define ERDMA_CMD_CREATE_QP_DB_CFG_MASK BIT(25) #define ERDMA_CMD_CREATE_QP_CQN_MASK GENMASK(23, 0) /* create qp mtt_cfg */ #define ERDMA_CMD_CREATE_QP_PAGE_OFFSET_MASK GENMASK(31, 12) #define ERDMA_CMD_CREATE_QP_MTT_CNT_MASK GENMASK(11, 1) -#define ERDMA_CMD_CREATE_QP_MTT_LEVEL_MASK BIT(0) - -/* create qp db cfg */ -#define ERDMA_CMD_CREATE_QP_SQDB_CFG_MASK GENMASK(31, 16) -#define ERDMA_CMD_CREATE_QP_RQDB_CFG_MASK GENMASK(15, 0) +#define ERDMA_CMD_CREATE_QP_MTT_TYPE_MASK BIT(0) #define ERDMA_CMDQ_CREATE_QP_RESP_COOKIE_MASK GENMASK_ULL(31, 0) @@ -334,13 +348,11 @@ struct erdma_cmdq_create_qp_req { u64 rq_buf_addr; u32 sq_mtt_cfg; u32 rq_mtt_cfg; - u64 sq_db_info_dma_addr; - u64 rq_db_info_dma_addr; + u64 sq_dbrec_dma; + u64 rq_dbrec_dma; u64 sq_mtt_entry[3]; u64 rq_mtt_entry[3]; - - u32 db_cfg; }; struct erdma_cmdq_destroy_qp_req { @@ -371,8 +383,9 @@ struct erdma_cmdq_reflush_req { enum { ERDMA_DEV_CAP_FLAGS_ATOMIC = 1 << 7, + ERDMA_DEV_CAP_FLAGS_QUERY_QC = 1 << 6, ERDMA_DEV_CAP_FLAGS_MTT_VA = 1 << 5, - ERDMA_DEV_CAP_FLAGS_EXTEND_DB = 1 << 3, + ERDMA_DEV_CAP_FLAGS_IPV6 = 1 << 4, }; #define ERDMA_CMD_INFO0_FW_VER_MASK GENMASK_ULL(31, 0) @@ -430,9 +443,11 @@ struct erdma_rqe { #define ERDMA_SQE_HDR_WQEBB_INDEX_MASK GENMASK_ULL(15, 0) /* REG MR attrs */ +#define ERDMA_SQE_MR_PGSZ_AVAIL_MASK BIT_ULL(0) #define ERDMA_SQE_MR_ACCESS_MASK GENMASK(5, 1) #define ERDMA_SQE_MR_MTT_TYPE_MASK GENMASK(7, 6) #define ERDMA_SQE_MR_MTT_CNT_MASK GENMASK(31, 12) +#define ERDMA_SQE_MR_PGSZ_MASK GENMASK(4, 0) struct erdma_write_sqe { __le64 hdr; @@ -484,8 +499,8 @@ struct erdma_reg_mr_sqe { __le64 addr; __le32 length; __le32 stag; - __le32 attrs; - __le32 rsvd; + __le32 attr0; + __le32 attr1; }; /* EQ related. */ @@ -582,4 +597,163 @@ enum erdma_vendor_err { ERDMA_WC_VENDOR_SQE_WARP_ERR = 0x34 }; +/* Response Definitions for Query Command Category */ +#define ERDMA_HW_RESP_SIZE 256 + +struct erdma_cmdq_query_req { + u64 hdr; + u32 rsvd; + u32 index; + + u64 target_addr; + u32 target_length; +}; + +struct erdma_cmdq_query_resp_hdr { + u16 magic; + u8 ver; + u8 length; + + u32 index; + u32 rsvd[2]; +}; + +#define ERDMA_HW_CC_PROFILER_NUM 1024 +#define ERDMA_HW_CC_PROFILER_NAME_LEN 124 +struct erdma_cmdq_query_cc_profiler_list_resp { + struct erdma_cmdq_query_resp_hdr hdr; + u32 idx_mask[32]; +}; + +struct erdma_cmdq_query_cc_profiler_name_resp { + struct erdma_cmdq_query_resp_hdr hdr; + u32 valid; + char name[ERDMA_HW_CC_PROFILER_NAME_LEN]; +}; + +struct erdma_cmdq_query_stats_resp { + struct erdma_cmdq_query_resp_hdr hdr; + + u64 tx_req_cnt; + u64 tx_packets_cnt; + u64 tx_bytes_cnt; + u64 tx_drop_packets_cnt; + u64 tx_bps_meter_drop_packets_cnt; + u64 tx_pps_meter_drop_packets_cnt; + u64 rx_packets_cnt; + u64 rx_bytes_cnt; + u64 rx_drop_packets_cnt; + u64 rx_bps_meter_drop_packets_cnt; + u64 rx_pps_meter_drop_packets_cnt; +}; + +struct erdma_cmdq_query_qpc_resp { + struct erdma_cmdq_query_resp_hdr hdr; + + struct { + u8 status; /* 0 - disabled, 1 - enabled. */ + u8 qbuf_page_offset; + u8 qbuf_page_size; + u8 qbuf_depth; + + u16 hw_pi; + u16 hw_ci; + } qpc[2]; + + /* hardware io stat */ + u16 last_comp_sqe_idx; + u16 last_comp_rqe_idx; + u16 scqe_counter; + u16 rcqe_counter; + + u16 tx_pkts_cnt; + u16 rx_pkts_cnt; + u16 rx_error_drop_cnt; + u16 rx_invalid_drop_cnt; + + u32 rto_retrans_cnt; + //qp sw info + u32 rqpn; + + u32 pd; + u16 fw_sq_pi; + u16 fw_sq_ci; + + u16 fw_rq_ci; + u8 sq_in_flush; + u8 rq_in_flush; + u16 sq_flushed_pi; + u16 rq_flushed_pi; + + u32 scqn; + u32 rcqn; + + u64 sqbuf_addr; + u64 rqbuf_addr; + u64 sdbrec_addr; + u64 rdbrec_addr; + + u64 sdbrec_cur; + u64 rdbrec_cur; + + u32 ip_src; + u32 ip_dst; + u16 srcport; + u16 dstport; +}; + +struct erdma_cmdq_query_cqc_resp { + struct erdma_cmdq_query_resp_hdr hdr; + + u32 pi; + u8 q_en; + u8 log_depth; + u8 cq_cur_ownership; + u8 last_errdb_type; /* 0,dup db;1,out-order db */ + + u32 last_errdb_ci; + u8 out_order_db_cnt; + u8 dup_db_cnt; + u16 rsvd; + + u64 cn_cq_db_addr; + u64 cq_db_record; +}; + +struct erdma_cmdq_query_eqc_resp { + struct erdma_cmdq_query_resp_hdr hdr; + u16 depth; + u16 vector; + + u8 int_suppression; + u8 tail_owner; + u8 head_owner; + u8 overflow; + + u32 head; + u32 tail; + + u64 cn_addr; + u64 cn_db_addr; + u64 eq_db_record; +}; + +struct erdma_cmdq_query_ext_attr_resp { + struct erdma_cmdq_query_resp_hdr hdr; + + u32 cap_mask; + u32 attr_mask; + + u8 dack_count; + u8 rsvd; + u16 cc_profiler; +}; + +struct erdma_cmdq_dump_addr_req { + u64 hdr; + u64 dump_addr; + u64 target_addr; + u32 target_length; +}; + #endif diff --git a/drivers/infiniband/hw/erdma/erdma_ioctl.c b/drivers/infiniband/hw/erdma/erdma_ioctl.c new file mode 100644 index 0000000000000000000000000000000000000000..54cf35d2983ec57c47a67ea7ac0271faebae8423 --- /dev/null +++ b/drivers/infiniband/hw/erdma/erdma_ioctl.c @@ -0,0 +1,794 @@ +// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB + +/* Authors: Cheng Xu */ +/* Copyright (c) 2020-2022, Alibaba Group. */ + +#include +#include +#include +#include +#include + +#include "erdma.h" +#include "erdma_cm.h" +#include "erdma_ioctl.h" +#include "erdma_verbs.h" + +static struct class *erdma_chrdev_class; +static struct cdev erdma_cdev; +static struct device *erdma_chrdev; +static dev_t erdma_char_dev; + +#define ERDMA_CHRDEV_NAME "erdma" + +static int erdma_query_qpc(struct erdma_dev *dev, u32 qpn, void *out) +{ + BUILD_BUG_ON(sizeof(struct erdma_cmdq_query_qpc_resp) > + ERDMA_HW_RESP_SIZE); + + return erdma_query_resource(dev, CMDQ_SUBMOD_RDMA, + CMDQ_OPCODE_QUERY_QPC, qpn, out, + sizeof(struct erdma_cmdq_query_qpc_resp)); +} + +static int erdma_query_cqc(struct erdma_dev *dev, u32 cqn, void *out) +{ + BUILD_BUG_ON(sizeof(struct erdma_cmdq_query_cqc_resp) > + ERDMA_HW_RESP_SIZE); + + return erdma_query_resource(dev, CMDQ_SUBMOD_RDMA, + CMDQ_OPCODE_QUERY_CQC, cqn, out, + sizeof(struct erdma_cmdq_query_cqc_resp)); +} + +static int erdma_query_eqc(struct erdma_dev *dev, u32 eqn, void *out) +{ + BUILD_BUG_ON(sizeof(struct erdma_cmdq_query_eqc_resp) > + ERDMA_HW_RESP_SIZE); + + return erdma_query_resource(dev, CMDQ_SUBMOD_COMMON, + CMDQ_OPCODE_QUERY_EQC, eqn, out, + sizeof(struct erdma_cmdq_query_eqc_resp)); +} + +static int erdma_ioctl_conf_cmd(struct erdma_dev *edev, + struct erdma_ioctl_msg *msg) +{ + int ret = 0; + + if (msg->in.opcode == ERDMA_CONFIG_TYPE_CC) { + if (msg->in.config_req.is_set) + edev->attrs.cc = msg->in.config_req.value; + else + msg->out.config_resp.value = edev->attrs.cc; + } else if (msg->in.opcode == ERDMA_CONFIG_TYPE_RETRANS_NUM) { + if (msg->in.config_req.is_set) + ret = erdma_set_retrans_num(edev, + msg->in.config_req.value); + else + msg->out.config_resp.value = edev->attrs.retrans_num; + } else if (msg->in.opcode == ERDMA_CONFIG_TYPE_DACK_COUNT) { + if (msg->in.config_req.is_set) + ret = erdma_set_dack_count(edev, + msg->in.config_req.value); + else + ret = -EINVAL; + } else if (msg->in.opcode == ERDMA_CONFIG_TYPE_LEGACY_MODE) { + if (msg->in.config_req.is_set) + ret = erdma_enable_legacy_mode( + edev, msg->in.config_req.value); + else + ret = -EINVAL; + } + + msg->out.length = 4; + return ret; +} + +static void fill_eq_info(struct erdma_dev *dev, struct erdma_eq_info *info, + struct erdma_eq *eq) +{ + struct erdma_cmdq_query_eqc_resp resp; + int ret; + + info->event_cnt = atomic64_read(&eq->event_num); + info->notify_cnt = atomic64_read(&eq->notify_num); + info->depth = eq->depth; + info->ci = eq->ci; + info->qbuf_dma = eq->qbuf_dma_addr; + info->qbuf_va = (u64)eq->qbuf; + info->hw_info_valid = 0; + + ret = erdma_query_eqc(dev, info->eqn, &resp); + if (ret) + return; + + info->hw_info_valid = 1; + info->hw_depth = resp.depth; + info->vector = resp.vector; + info->int_suppression = resp.int_suppression; + info->tail_owner = resp.tail_owner; + info->head_owner = resp.head_owner; + info->overflow = resp.overflow; + info->head = resp.head; + info->tail = resp.tail; + info->cn_addr = resp.cn_addr; + info->cn_db_addr = resp.cn_db_addr; + info->eq_db_record = resp.eq_db_record; +} + +static void show_cep_info(struct erdma_dev *edev) +{ + u64 num_cep = atomic_read(&edev->num_cep); + struct list_head *pos, *tmp; + + pr_info("%s: %llu CEPs\n", edev->ibdev.name, num_cep); + + if (!num_cep) + return; + + pr_info("%-20s%-6s%-6s%-7s%-3s%-3s%-4s%-21s%-9s\n", "CEP", "State", + "Ref's", "QP-ID", "LQ", "LC", "U", "Sock", "CM-ID"); + + list_for_each_safe(pos, tmp, &edev->cep_list) { + struct erdma_cep *cep = list_entry(pos, struct erdma_cep, devq); + + pr_info("0x%-18p%-6d%-6d%-7d%-3s%-3s%-4d0x%-18p 0x%-16p\n", cep, + cep->state, kref_read(&cep->ref), + cep->qp ? QP_ID(cep->qp) : -1, + list_empty(&cep->listenq) ? "n" : "y", + cep->listen_cep ? "y" : "n", cep->in_use, cep->sock, + cep->cm_id); + } +} + +static int fill_cq_info(struct erdma_dev *dev, u32 cqn, + struct erdma_ioctl_msg *msg) +{ + struct erdma_cq_info *info = &msg->out.cq_info; + struct erdma_cmdq_query_cqc_resp resp; + struct rdma_restrack_entry *res; + struct erdma_cq *cq; + struct erdma_mem *mtt; + int ret; + + if (cqn == 0) { + info->cqn = 0; + info->depth = dev->cmdq.cq.depth; + info->assoc_eqn = 0; + info->qbuf_dma_addr = dev->cmdq.cq.qbuf_dma_addr; + info->ci = dev->cmdq.cq.ci; + info->cmdsn = dev->cmdq.cq.cmdsn; + info->notify_cnt = atomic64_read(&dev->cmdq.cq.armed_num); + + goto query_hw_cqc; + } + + cq = find_cq_by_cqn(dev, cqn); + if (!cq) + return -EINVAL; + + info->cqn = cq->cqn; + info->depth = cq->depth; + info->assoc_eqn = cq->assoc_eqn; + + res = &cq->ibcq.res; + info->is_user = !rdma_is_kernel_res(res); + mtt = info->is_user ? &cq->user_cq.qbuf_mtt : &cq->kern_cq.qbuf_mtt; + + info->mtt.page_size = mtt->page_size; + info->mtt.page_offset = mtt->page_offset; + info->mtt.page_cnt = mtt->page_cnt; + info->mtt.mtt_nents = mtt->mtt_nents; + info->mtt.va = mtt->va; + info->mtt.len = mtt->len; + + if (!info->is_user) { + info->ci = cq->kern_cq.ci; + info->cmdsn = cq->kern_cq.cmdsn; + info->notify_cnt = cq->kern_cq.notify_cnt; + } + + info->hw_info_valid = 0; + +query_hw_cqc: + ret = erdma_query_cqc(dev, cqn, &resp); + if (ret) + return 0; + + info->hw_info_valid = 1; + info->hw_pi = resp.pi; + info->enable = resp.q_en; + info->log_depth = resp.log_depth; + info->cq_cur_ownership = resp.cq_cur_ownership; + info->last_errdb_type = resp.last_errdb_type; + info->last_errdb_ci = resp.last_errdb_ci; + info->out_order_db_cnt = resp.out_order_db_cnt; + info->dup_db_cnt = resp.dup_db_cnt; + info->cn_cq_db_addr = resp.cn_cq_db_addr; + info->cq_db_record = resp.cq_db_record; + + return 0; +} + +static int fill_ext_attr_info(struct erdma_dev *dev, + struct erdma_ioctl_msg *msg) +{ + struct erdma_ext_attr_info *info = &msg->out.ext_attr_info; + struct erdma_cmdq_query_ext_attr_resp resp; + int ret = 0; + + ret = erdma_query_ext_attr(dev, &resp); + + info->cap = dev->attrs.cap_flags; + info->ext_cap = resp.cap_mask; + info->attr_mask = resp.attr_mask; + info->dack_count = resp.dack_count; + + return ret; +} + +static int erdma_ioctl_ver_cmd(struct erdma_dev *edev, + struct erdma_ioctl_msg *msg) +{ + msg->out.version = ERDMA_MAJOR_VER << 16 | ERDMA_MEDIUM_VER << 8 | + ERDMA_MINOR_VER; + + return 0; +} + +static int erdma_fill_qp_info(struct erdma_dev *dev, u32 qpn, + struct erdma_qp_info *qp_info) +{ + struct erdma_cmdq_query_qpc_resp resp; + struct rdma_restrack_entry *res; + struct erdma_mem *sq_mem, *rq_mem; + struct erdma_qp *qp; + int ret; + + if (qpn == 0) + goto query_hw_qpc; + + qp = find_qp_by_qpn(dev, qpn); + if (!qp) + return -EINVAL; + + if (qp->ibqp.qp_type != IB_QPT_RC) + return -EINVAL; + + erdma_qp_get(qp); + + qp_info->hw_info_valid = 0; + qp_info->qpn = qp->ibqp.qp_num; + qp_info->qp_state = qp->attrs.state; + qp_info->ref_cnt = kref_read(&qp->ref); + qp_info->qtype = qp->attrs.qp_type; + qp_info->sq_depth = qp->attrs.sq_size; + qp_info->rq_depth = qp->attrs.rq_size; + qp_info->cookie = qp->attrs.remote_cookie; + qp_info->cc = qp->attrs.cc; + qp_info->assoc_scqn = qp->scq->cqn; + qp_info->assoc_rcqn = qp->rcq->cqn; + + if (qp->cep && qp->cep->cm_id) { + struct erdma_cep *cep = qp->cep; + struct iw_cm_id *id = cep->cm_id; + struct sockaddr_storage remote_addr; + struct sockaddr_storage local_addr; + + qp_info->sip = + ntohl(to_sockaddr_in(id->local_addr).sin_addr.s_addr); + qp_info->dip = + ntohl(to_sockaddr_in(id->remote_addr).sin_addr.s_addr); + qp_info->sport = ntohs(to_sockaddr_in(id->local_addr).sin_port); + qp_info->dport = + ntohs(to_sockaddr_in(id->remote_addr).sin_port); + + if (cep->sock) { + getname_local(cep->sock, &local_addr); + getname_peer(cep->sock, &remote_addr); + qp_info->origin_sport = + ntohs(to_sockaddr_in(local_addr).sin_port); + qp_info->sip = ntohl( + to_sockaddr_in(local_addr).sin_addr.s_addr); + } + } + + res = &qp->ibqp.res; + qp_info->is_user = !rdma_is_kernel_res(res); + if (qp_info->is_user) { + qp_info->pid = res->task->pid; + get_task_comm(qp_info->buf, res->task); + } + sq_mem = qp_info->is_user ? &qp->user_qp.sq_mem : &qp->kern_qp.sq_mem; + + qp_info->sq_mem.page_size = sq_mem->page_size; + qp_info->sq_mem.page_offset = sq_mem->page_offset; + qp_info->sq_mem.page_cnt = sq_mem->page_cnt; + qp_info->sq_mem.mtt_nents = sq_mem->mtt_nents; + qp_info->sq_mem.va = sq_mem->va; + qp_info->sq_mem.len = sq_mem->len; + + rq_mem = qp_info->is_user ? &qp->user_qp.rq_mem : &qp->kern_qp.rq_mem; + + qp_info->rq_mem.page_size = rq_mem->page_size; + qp_info->rq_mem.page_offset = rq_mem->page_offset; + qp_info->rq_mem.page_cnt = rq_mem->page_cnt; + qp_info->rq_mem.mtt_nents = rq_mem->mtt_nents; + qp_info->rq_mem.va = rq_mem->va; + qp_info->rq_mem.len = rq_mem->len; + + if (!qp_info->is_user) { + qp_info->sqci = qp->kern_qp.sq_ci; + qp_info->sqpi = qp->kern_qp.sq_pi; + qp_info->rqci = qp->kern_qp.rq_ci; + qp_info->rqpi = qp->kern_qp.rq_pi; + qp_info->sqdbrec_dma = qp->kern_qp.sq_dbrec_dma; + qp_info->rqdbrec_dma = qp->kern_qp.rq_dbrec_dma; + } + + erdma_qp_put(qp); + +query_hw_qpc: + ret = erdma_query_qpc(dev, qpn, &resp); + if (ret) + return 0; + + qp_info->hw_info_valid = 1; + qp_info->sq_enable = resp.qpc[0].status; + qp_info->sqbuf_page_offset = resp.qpc[0].qbuf_page_offset; + qp_info->sqbuf_page_size = resp.qpc[0].qbuf_page_size; + qp_info->sqbuf_depth = resp.qpc[0].qbuf_depth; + qp_info->hw_sq_ci = resp.qpc[0].hw_ci; + qp_info->hw_sq_pi = resp.qpc[0].hw_pi; + + qp_info->rq_enable = resp.qpc[1].status; + qp_info->rqbuf_page_offset = resp.qpc[1].qbuf_page_offset; + qp_info->rqbuf_page_size = resp.qpc[1].qbuf_page_size; + qp_info->rqbuf_depth = resp.qpc[1].qbuf_depth; + qp_info->hw_rq_ci = resp.qpc[1].hw_ci; + qp_info->hw_rq_pi = resp.qpc[1].hw_pi; + qp_info->last_comp_sqe_idx = resp.last_comp_sqe_idx; + qp_info->last_comp_rqe_idx = resp.last_comp_rqe_idx; + qp_info->scqe_counter = resp.scqe_counter; + qp_info->rcqe_counter = resp.rcqe_counter; + qp_info->tx_pkts_cnt = resp.tx_pkts_cnt; + qp_info->rx_pkts_cnt = resp.rx_pkts_cnt; + qp_info->rx_error_drop_cnt = resp.rx_error_drop_cnt; + qp_info->rx_invalid_drop_cnt = resp.rx_invalid_drop_cnt; + qp_info->rto_retrans_cnt = resp.rto_retrans_cnt; + qp_info->pd = resp.pd; + qp_info->fw_sq_pi = resp.fw_sq_pi; + qp_info->fw_sq_ci = resp.fw_sq_ci; + qp_info->fw_rq_ci = resp.fw_rq_ci; + qp_info->sq_in_flush = resp.sq_in_flush; + qp_info->rq_in_flush = resp.rq_in_flush; + qp_info->sq_flushed_pi = resp.sq_flushed_pi; + qp_info->rq_flushed_pi = resp.rq_flushed_pi; + qp_info->sqbuf_addr = resp.sqbuf_addr; + qp_info->rqbuf_addr = resp.rqbuf_addr; + qp_info->sdbrec_addr = resp.sdbrec_addr; + qp_info->rdbrec_addr = resp.rdbrec_addr; + qp_info->ip_src = resp.ip_src; + qp_info->ip_dst = resp.ip_dst; + qp_info->srcport = resp.srcport; + qp_info->dstport = resp.dstport; + qp_info->sdbrec_val = resp.sdbrec_cur; + qp_info->rdbrec_val = resp.rdbrec_cur; + + if (qpn != 0 && resp.scqn != qp_info->assoc_scqn) + ibdev_info(&dev->ibdev, "hw scqn(%u) != drv scqn(%u)\n", + resp.scqn, qp_info->assoc_scqn); + + if (qpn != 0 && resp.rcqn != qp_info->assoc_rcqn) + ibdev_info(&dev->ibdev, "hw rcqn(%u) != drv rcqn(%u)\n", + resp.rcqn, qp_info->assoc_rcqn); + + return 0; +} + +static int erdma_ioctl_info_cmd(struct erdma_dev *edev, + struct erdma_ioctl_msg *msg) +{ + struct erdma_qp_info *qp_info; + int ret = 0, count = 0, i; + struct erdma_qp *qp; + struct erdma_cq *cq; + unsigned long index; + + switch (msg->in.opcode) { + case ERDMA_INFO_TYPE_QP: + qp_info = &msg->out.qp_info; + ret = erdma_fill_qp_info(edev, msg->in.info_req.qn, qp_info); + + break; + case ERDMA_INFO_TYPE_ALLOCED_QP: + xa_for_each_start(&edev->qp_xa, index, qp, + msg->in.info_req.qn) { + msg->out.allocted_qpn[count++] = index; + if (count == msg->in.info_req.max_result_cnt) + break; + } + msg->out.length = count * 4; + break; + case ERDMA_INFO_TYPE_ALLOCED_CQ: + xa_for_each_start(&edev->cq_xa, index, cq, + msg->in.info_req.qn) { + msg->out.allocted_cqn[count++] = index; + if (count == msg->in.info_req.max_result_cnt) + break; + } + msg->out.length = count * 4; + + break; + case ERDMA_INFO_TYPE_EQ: + msg->out.eq_info[0].ready = 1; + msg->out.eq_info[0].eqn = 0; + fill_eq_info(edev, &msg->out.eq_info[0], &edev->aeq); + + msg->out.eq_info[1].ready = 1; + msg->out.eq_info[1].eqn = 1; + fill_eq_info(edev, &msg->out.eq_info[1], &edev->cmdq.eq); + + for (i = 0; i < 31; i++) { + msg->out.eq_info[i + 2].ready = edev->ceqs[i].ready; + msg->out.eq_info[i + 2].eqn = i + 2; + fill_eq_info(edev, &msg->out.eq_info[i + 2], + &edev->ceqs[i].eq); + } + break; + case ERDMA_INFO_TYPE_CEP: + show_cep_info(edev); + break; + case ERDMA_INFO_TYPE_CQ: + ret = fill_cq_info(edev, msg->in.info_req.qn, msg); + break; + case ERDMA_INFO_TYPE_EXT_ATTR: + ret = fill_ext_attr_info(edev, msg); + break; + default: + pr_info("unknown opcode:%u\n", msg->in.opcode); + return -EINVAL; + } + + return ret; +} + +static int erdma_ioctl_stat_cmd(struct erdma_dev *edev, + struct erdma_ioctl_msg *msg) +{ + int ret; + + switch (msg->in.opcode) { + case ERDMA_STAT_TYPE_QP: + case ERDMA_STAT_TYPE_CQ: + break; + case ERDMA_STAT_TYPE_DEV: + ret = erdma_query_hw_stats(edev); + if (ret) + return ret; + + /* Make sure that no overflow happens. */ + BUILD_BUG_ON(ERDMA_STATS_MAX > 512); + + memcpy(msg->out.stats, &edev->stats, + sizeof(__u64) * ERDMA_STATS_MAX); + + msg->out.length = ERDMA_STATS_MAX * sizeof(__u64); + break; + default: + pr_err("unknown stat opcode %d.\n", msg->in.opcode); + return -1; + } + + return 0; +} + +static int erdma_ioctl_dump_cmd(struct erdma_dev *edev, + struct erdma_ioctl_msg *msg) +{ + u32 qe_idx = msg->in.dump_req.qe_idx; + u32 qn = msg->in.dump_req.qn; + struct erdma_qp *qp; + struct erdma_cq *cq; + struct erdma_eq *eq; + int ret = 0; + u64 address; + u32 wqe_idx; + + switch (msg->in.opcode) { + case ERDMA_DUMP_TYPE_SQE: + /* CMDQ-SQ */ + if (qn == 0) { + wqe_idx = qe_idx & (edev->cmdq.sq.depth - 1); + memcpy(msg->out.data, + edev->cmdq.sq.qbuf + (wqe_idx << SQEBB_SHIFT), + SQEBB_SIZE); + } else { + qp = find_qp_by_qpn(edev, qn); + if (!qp) + return -EINVAL; + erdma_qp_get(qp); + + if (!rdma_is_kernel_res(&qp->ibqp.res)) { + address = qp->user_qp.sq_mem.umem->address; + wqe_idx = qe_idx & (qp->attrs.sq_size - 1); + address += wqe_idx << SQEBB_SHIFT; + ret = access_process_vm(qp->ibqp.res.task, + address, msg->out.data, + SQEBB_SIZE, FOLL_FORCE); + if (ret != SQEBB_SIZE) { + pr_info("access address with error (%d)\n", + ret); + erdma_qp_put(qp); + return -EIO; + } + ret = 0; + } else { + wqe_idx = qe_idx & (qp->attrs.sq_size - 1); + memcpy(msg->out.data, + qp->kern_qp.sq_buf + + (wqe_idx << SQEBB_SHIFT), + SQEBB_SIZE); + } + erdma_qp_put(qp); + } + msg->out.length = SQEBB_SIZE; + break; + case ERDMA_DUMP_TYPE_RQE: + qp = find_qp_by_qpn(edev, qn); + if (!qp) + return -EINVAL; + erdma_qp_get(qp); + + if (!rdma_is_kernel_res(&qp->ibqp.res)) { + address = qp->user_qp.rq_mem.umem->address; + wqe_idx = qe_idx & (qp->attrs.rq_size - 1); + address += wqe_idx << RQE_SHIFT; + ret = access_process_vm(qp->ibqp.res.task, address, + msg->out.data, RQE_SIZE, + FOLL_FORCE); + if (ret != RQE_SIZE) { + pr_info("access address with error (%d)\n", + ret); + erdma_qp_put(qp); + return -EIO; + } + ret = 0; + } else { + wqe_idx = qe_idx & (qp->attrs.rq_size - 1); + memcpy(msg->out.data, + qp->kern_qp.rq_buf + (wqe_idx << RQE_SHIFT), + RQE_SIZE); + } + erdma_qp_put(qp); + msg->out.length = RQE_SIZE; + break; + case ERDMA_DUMP_TYPE_CQE: + if (qn == 0) { + /* CMDQ-CQ */ + wqe_idx = qe_idx & (edev->cmdq.cq.depth - 1); + memcpy(msg->out.data, + edev->cmdq.cq.qbuf + (wqe_idx << CQE_SHIFT), + CQE_SIZE); + } else { + cq = find_cq_by_cqn(edev, qn); + if (!cq) + return -EINVAL; + + if (!rdma_is_kernel_res(&cq->ibcq.res)) { + address = cq->user_cq.qbuf_mtt.umem->address; + wqe_idx = qe_idx & (cq->depth - 1); + address += wqe_idx << CQE_SHIFT; + ret = access_process_vm(cq->ibcq.res.task, + address, msg->out.data, + CQE_SIZE, FOLL_FORCE); + if (ret != CQE_SIZE) { + pr_info("access address with error (%d)\n", + ret); + return -EIO; + } + ret = 0; + } else { + wqe_idx = qe_idx & (cq->depth - 1); + memcpy(msg->out.data, + cq->kern_cq.qbuf + + (wqe_idx << CQE_SHIFT), + CQE_SIZE); + } + } + msg->out.length = CQE_SIZE; + break; + + case ERDMA_DUMP_TYPE_EQE: + /* 0: AEQ, 1: CMD-EQ, 2 - 33: CEQ */ + if (qn == 0) { /* AEQ */ + eq = &edev->aeq; + } else if (qn == 1) { + eq = &edev->cmdq.eq; + } else if (qn > 1 && qn <= 33) { + if (edev->ceqs[qn - 2].ready == 0) + return -EINVAL; + eq = &edev->ceqs[qn - 2].eq; + } else { + return -EINVAL; + } + + wqe_idx = qe_idx & (eq->depth - 1); + memcpy(msg->out.data, eq->qbuf + (wqe_idx << EQE_SHIFT), + EQE_SIZE); + msg->out.length = EQE_SIZE; + break; + default: + break; + } + + return ret; +} + +typedef int (*ioctl_proc)(struct erdma_dev *, struct erdma_ioctl_msg *); + +static const ioctl_proc erdma_ioctl_proc_table[EADM_CMD_MAX] = { + [EADM_DUMP_CMD] = erdma_ioctl_dump_cmd, + [EADM_INFO_CMD] = erdma_ioctl_info_cmd, + [EADM_CONF_CMD] = erdma_ioctl_conf_cmd, + [EADM_STAT_CMD] = erdma_ioctl_stat_cmd, + [EADM_VER_CMD] = erdma_ioctl_ver_cmd, +}; + +long do_ioctl(unsigned int cmd, unsigned long arg) +{ + struct erdma_dev *edev = NULL; + struct ib_device *ibdev = NULL; + struct erdma_ioctl_msg *msg; + int ret = 0, bypass_dev = 0; + int command; + + msg = kzalloc(sizeof(*msg), GFP_KERNEL); + if (!msg) + return -ENOMEM; + + ret = copy_from_user(msg, (const void *)arg, + sizeof(struct erdma_ioctl_msg)); + if (ret) { + kfree(msg); + return -EINVAL; + } + + if (_IOC_TYPE(cmd) != ERDMA_IOC_MAGIC || + _IOC_NR(cmd) > ERDMA_IOC_MAXNR) { + kfree(msg); + return -EINVAL; + } + + command = _IOC_NR(cmd); + if (command >= EADM_CMD_MAX || !erdma_ioctl_proc_table[command]) { + ret = -EINVAL; + goto out; + } + + /* 允许某些命令在没有ibdev的情况下执行 */ + if (command == EADM_VER_CMD) + bypass_dev = 1; + + if (bypass_dev) + goto exec_cmd; + + ibdev = ib_device_get_by_name(msg->in.ibdev_name, RDMA_DRIVER_ERDMA); + if (ibdev) { + edev = to_edev(ibdev); + } else { + kfree(msg); + return -ENODEV; + } + +exec_cmd: + msg->out.status = erdma_ioctl_proc_table[command](edev, msg); + + ret = copy_to_user((void *)arg, (const void *)msg, + sizeof(struct erdma_ioctl_msg)); + +out: + if (!bypass_dev) + ib_device_put(ibdev); + + kfree(msg); + return ret; +} + +long chardev_ioctl(struct file *filp, unsigned int cmd, unsigned long arg) +{ + return do_ioctl(cmd, arg); +} + +static char *erdma_chrdev_devnode( + const struct device *dev, + umode_t *mode) +{ + if (mode) + *mode = 0666; + return kasprintf(GFP_KERNEL, "%s", dev_name(dev)); +} + +static int chardev_open(struct inode *inode, struct file *file) +{ + return 0; +} + +static ssize_t chardev_read(struct file *file, char __user *buf, size_t size, + loff_t *ppos) +{ + return 0; +} + +static int chardev_close(struct inode *inode, struct file *filp) +{ + return 0; +} + +/* clang-format off */ +static const struct file_operations chardev_fops = { + .owner = THIS_MODULE, + .open = chardev_open, + .release = chardev_close, + .read = chardev_read, + .unlocked_ioctl = chardev_ioctl +}; +/* clang-format on */ + +void erdma_chrdev_destroy(void) +{ + device_destroy(erdma_chrdev_class, erdma_char_dev); + cdev_del(&erdma_cdev); + class_destroy(erdma_chrdev_class); + + unregister_chrdev_region(erdma_char_dev, 1); +} + +int erdma_chrdev_init(void) +{ + int ret; + + ret = alloc_chrdev_region(&erdma_char_dev, 0, 1, ERDMA_CHRDEV_NAME); + if (ret) { + pr_err("alloc chrdev failed.\n"); + return ret; + } + + erdma_chrdev_class = class_create(ERDMA_CHRDEV_NAME); + if (IS_ERR(erdma_chrdev_class)) { + ret = PTR_ERR(erdma_chrdev_class); + pr_err("create class failed.\n"); + goto free_chrdev_region; + } + + erdma_chrdev_class->devnode = erdma_chrdev_devnode; + + cdev_init(&erdma_cdev, &chardev_fops); + erdma_cdev.owner = THIS_MODULE; + ret = cdev_add(&erdma_cdev, erdma_char_dev, 1); + if (ret) { + pr_err("cdev add failed. ret = %d\n", ret); + goto destroy_class; + } + + erdma_chrdev = device_create(erdma_chrdev_class, NULL, erdma_char_dev, + NULL, ERDMA_CHRDEV_NAME); + if (IS_ERR(erdma_chrdev)) { + pr_err("create_device failed.\n"); + goto delete_cdev; + } + + return 0; + +delete_cdev: + cdev_del(&erdma_cdev); + +destroy_class: + class_destroy(erdma_chrdev_class); + +free_chrdev_region: + unregister_chrdev_region(erdma_char_dev, 1); + + return ret; +} diff --git a/drivers/infiniband/hw/erdma/erdma_ioctl.h b/drivers/infiniband/hw/erdma/erdma_ioctl.h new file mode 100644 index 0000000000000000000000000000000000000000..8a23091666c011b2d117fc21cf922bc7c73d4b4b --- /dev/null +++ b/drivers/infiniband/hw/erdma/erdma_ioctl.h @@ -0,0 +1,327 @@ +/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */ + +/* Authors: Cheng Xu */ +/* Copyright (c) 2020-2022, Alibaba Group. */ + +#ifndef __EADM_IOCTL_H__ +#define __EADM_IOCTL_H__ + +#include +#include +#ifdef __KERNEL__ +#include +#else +#define TASK_COMM_LEN 16 +#endif + +#define ERDMA_DEVICE_NAME_MAX_LEN 20 + +enum erdma_cmd { + EADM_DUMP_CMD = 0x0, + EADM_TEST_CMD, + EADM_CTRL_CMD, + EADM_STAT_CMD, + EADM_INFO_CMD, + EADM_CONF_CMD, + EADM_VER_CMD, + EADM_CMD_MAX, +}; + +#define ERDMA_DUMP_OPCODE_CQE 0 +#define ERDMA_DUMP_OPCODE_SQE 1 +#define ERDMA_DUMP_OPCODE_RQE 2 +#define ERDMA_DUMP_OPCODE_EQE 3 + +#define ERDMA_CM_TEST_SERVER 0 +#define ERDMA_CM_TEST_CLIENT 1 + +#define ERDMA_TEST_DATA 3 +#define ERDMA_TEST_ECHO 4 +#define ERDMA_TEST_CONN 5 +#define ERDMA_TEST_ORDER 6 + +enum erdma_stat_type { + ERDMA_STAT_TYPE_QP = 0, + ERDMA_STAT_TYPE_CQ, + ERDMA_STAT_TYPE_DEV, + ERDMA_STAT_TYPE_MAX, +}; + +enum erdma_info_type { + ERDMA_INFO_TYPE_DEV = 0, + ERDMA_INFO_TYPE_ALLOCED_QP, + ERDMA_INFO_TYPE_QP, + ERDMA_INFO_TYPE_ALLOCED_CQ, + ERDMA_INFO_TYPE_CQ, + ERDMA_INFO_TYPE_EQ, + ERDMA_INFO_TYPE_CEP, + ERDMA_INFO_TYPE_EXT_ATTR, + ERDMA_INFO_TYPE_MAX, +}; + +enum erdma_config_type { + ERDMA_CONFIG_TYPE_CC = 0, + ERDMA_CONFIG_TYPE_DISCARD0, + ERDMA_CONFIG_TYPE_RETRANS_NUM, + ERDMA_CONFIG_TYPE_DACK_COUNT, + ERDMA_CONFIG_TYPE_LEGACY_MODE, + ERDMA_CONFIG_MAX +}; + +enum erdma_dump_type { + ERDMA_DUMP_TYPE_SQE = 0, + ERDMA_DUMP_TYPE_RQE, + ERDMA_DUMP_TYPE_CQE, + ERDMA_DUMP_TYPE_EQE, + ERDMA_DUMP_MAX = ERDMA_DUMP_TYPE_EQE + 1, +}; + +struct erdma_qp_info { + __u32 qpn; + __u32 qp_state; + __u32 ref_cnt; + + __u32 sip; + __u32 dip; + __u16 sport; + __u16 dport; + + __u16 qtype; /* Client or Server. */ + __u16 origin_sport; + __u16 sq_depth; + __u16 rq_depth; + + __u32 cookie; + __u8 cc; + __u8 is_user; + __u8 sq_mtt_type; + __u8 rq_mtt_type; + + __u32 assoc_scqn; + __u32 assoc_rcqn; + + __u16 sqci; + __u16 sqpi; + __u16 rqci; + __u16 rqpi; + __u64 sqbuf_dma; + __u64 rqbuf_dma; + __u64 sqdbrec_dma; + __u64 rqdbrec_dma; + + __u32 pid; + char buf[TASK_COMM_LEN]; + __u8 rsvd0[15]; + __u8 hw_info_valid; + + struct { + __u32 page_size; + __u32 page_offset; + __u32 page_cnt; + __u32 mtt_nents; + __u64 mtt_entry[4]; + __u64 va; + __u64 len; + } sq_mem, rq_mem; + + __u8 sq_enable; + __u8 sqbuf_page_offset; + __u8 sqbuf_page_size; + __u8 sqbuf_depth; + __u16 hw_sq_ci; + __u16 hw_sq_pi; + + __u8 rq_enable; + __u8 rqbuf_page_offset; + __u8 rqbuf_page_size; + __u8 rqbuf_depth; + __u16 hw_rq_ci; + __u16 hw_rq_pi; + + __u16 last_comp_sqe_idx; + __u16 last_comp_rqe_idx; + __u16 scqe_counter; + __u16 rcqe_counter; + __u16 tx_pkts_cnt; + __u16 rx_pkts_cnt; + __u16 rx_error_drop_cnt; + __u16 rx_invalid_drop_cnt; + __u32 rto_retrans_cnt; + + __u32 pd; + __u16 fw_sq_pi; + __u16 fw_sq_ci; + __u16 fw_rq_ci; + __u8 sq_in_flush; + __u8 rq_in_flush; + + __u16 sq_flushed_pi; + __u16 rq_flushed_pi; + + __u64 sqbuf_addr; + __u64 rqbuf_addr; + __u64 sdbrec_addr; + __u64 rdbrec_addr; + __u64 sdbrec_val; + __u64 rdbrec_val; + + __u32 ip_src; + __u32 ip_dst; + __u16 srcport; + __u16 dstport; +}; + +struct erdma_cq_info { + __u32 cqn; + __u32 depth; + + __u32 assoc_eqn; + __u8 is_user; + __u8 rsvd0; + __u8 mtt_type; + __u8 hw_info_valid; + + __u64 qbuf_dma_addr; + __u32 ci; + __u32 cmdsn; + __u32 notify_cnt; + __u32 rsvd1; + + struct { + __u32 page_size; + __u32 page_offset; + __u32 page_cnt; + __u32 mtt_nents; + __u64 mtt_entry[4]; + __u64 va; + __u64 len; + } mtt; + + __u32 hw_pi; + __u8 enable; + __u8 log_depth; + __u8 cq_cur_ownership; + __u8 last_errdb_type; /* 0,dup db;1,out-order db */ + + __u32 last_errdb_ci; + __u8 out_order_db_cnt; + __u8 dup_db_cnt; + __u16 rsvd; + + __u64 cn_cq_db_addr; + __u64 cq_db_record; +}; + +struct erdma_eq_info { + __u32 eqn; + __u8 ready; + __u8 rsvd[2]; + __u8 hw_info_valid; + + __u64 event_cnt; + __u64 notify_cnt; + + __u32 depth; + __u32 ci; + __u64 qbuf_dma; + __u64 qbuf_va; + + __u16 hw_depth; + __u16 vector; + + __u8 int_suppression; + __u8 tail_owner; + __u8 head_owner; + __u8 overflow; + + __u32 head; + __u32 tail; + + __u64 cn_addr; + __u64 cn_db_addr; + __u64 eq_db_record; + +}; + +struct erdma_ext_attr_info { + __u8 cap; + __u8 rsvd[3]; + __u32 ext_cap; + __u32 attr_mask; + __u8 dack_count; +}; + +struct erdma_ioctl_inbuf { + __u32 opcode; + char ibdev_name[ERDMA_DEVICE_NAME_MAX_LEN + 1]; + union { + struct { + __u32 value; + __u32 is_set; + } config_req; + + struct { + __u32 qn; + __u32 qe_idx; + } dump_req; + struct { + __u32 qn; + __u32 max_result_cnt; + } info_req; + struct { + __u32 qn; + } stat_req; + }; +}; + +struct erdma_ioctl_outbuf { + __u32 status; + __u32 length; + union { + char data[4096]; + struct { + __u32 value; + } config_resp; + + __u32 allocted_qpn[1024]; + __u32 allocted_cqn[1024]; + + struct erdma_qp_info qp_info; + /* 0: AEQ, 1: Cmd-EQ, 2-32: Completion-EQ */ + struct erdma_eq_info eq_info[33]; + struct erdma_cq_info cq_info; + struct erdma_ext_attr_info ext_attr_info; + + __u32 version; + __u64 stats[512]; + }; +}; + +struct erdma_ioctl_msg { + struct erdma_ioctl_inbuf in; + struct erdma_ioctl_outbuf out; +}; + +/* 定义幻数 */ +#define ERDMA_IOC_MAGIC 'k' + +/* 定义命令 */ +#define ERDMA_DUMP _IOWR(ERDMA_IOC_MAGIC, EADM_DUMP_CMD, struct erdma_ioctl_msg) +#define ERDMA_TEST _IOWR(ERDMA_IOC_MAGIC, EADM_TEST_CMD, struct erdma_ioctl_msg) +#define ERDMA_CTRL _IOWR(ERDMA_IOC_MAGIC, EADM_CTRL_CMD, struct erdma_ioctl_msg) +#define ERDMA_STAT _IOWR(ERDMA_IOC_MAGIC, EADM_STAT_CMD, struct erdma_ioctl_msg) +#define ERDMA_INFO _IOWR(ERDMA_IOC_MAGIC, EADM_INFO_CMD, struct erdma_ioctl_msg) +#define ERDMA_CONF _IOWR(ERDMA_IOC_MAGIC, EADM_CONF_CMD, struct erdma_ioctl_msg) +#define ERDMA_VER _IOWR(ERDMA_IOC_MAGIC, EADM_VER_CMD, struct erdma_ioctl_msg) + +#define ERDMA_IOC_MAXNR EADM_CMD_MAX + +#ifdef __KERNEL__ +long chardev_ioctl(struct file *filp, unsigned int cmd, unsigned long arg); +long do_ioctl(unsigned int cmd, unsigned long arg); +#else + +#endif +int exec_ioctl_cmd(char *dev_path, int cmd, struct erdma_ioctl_msg *msg); + +#endif diff --git a/drivers/infiniband/hw/erdma/erdma_main.c b/drivers/infiniband/hw/erdma/erdma_main.c index 0880c79a978c2873abcf6835559a457eeb7278a0..313eb5beae2fceadb3217ba77f88a85b173a7be6 100644 --- a/drivers/infiniband/hw/erdma/erdma_main.c +++ b/drivers/infiniband/hw/erdma/erdma_main.c @@ -4,34 +4,82 @@ /* Kai Shen */ /* Copyright (c) 2020-2022, Alibaba Group. */ +#include +#include #include #include -#include +#include #include "erdma.h" #include "erdma_cm.h" #include "erdma_verbs.h" MODULE_AUTHOR("Cheng Xu "); +MODULE_AUTHOR("Kai Shen "); MODULE_DESCRIPTION("Alibaba elasticRDMA adapter driver"); MODULE_LICENSE("Dual BSD/GPL"); +static unsigned int vector_num = ERDMA_NUM_MSIX_VEC; +module_param(vector_num, uint, 0444); +MODULE_PARM_DESC(vector_num, "number of compeletion vectors"); + +static int default_cc = -1; +module_param(default_cc, int, 0444); +MODULE_PARM_DESC(default_cc, "default cc method"); + +bool rand_qpn = true; +module_param(rand_qpn, bool, 0444); +MODULE_PARM_DESC(rand_qpn, "randomized qpn"); + +static LIST_HEAD(dev_list); + +static void erdma_add_dev_to_list(struct erdma_dev *dev) +{ + refcount_set(&dev->refcount, 1); + init_completion(&dev->unreg_completion); + + list_add_tail_rcu(&dev->dev_list, &dev_list); +} + +static void erdma_device_put(struct erdma_dev *dev) +{ + ibdev_dbg(&dev->ibdev, "%s: custom called.\n", __func__); + if (refcount_dec_and_test(&dev->refcount)) + complete(&dev->unreg_completion); +} + +static void erdma_remove_dev_from_list(struct erdma_dev *dev) +{ + list_del_rcu(&dev->dev_list); + /* + * We cannot move forward after a list_del_rcu until the + * grace period + */ + synchronize_rcu(); + erdma_device_put(dev); + wait_for_completion(&dev->unreg_completion); +} + static int erdma_netdev_event(struct notifier_block *nb, unsigned long event, void *arg) { struct net_device *netdev = netdev_notifier_info_to_dev(arg); struct erdma_dev *dev = container_of(nb, struct erdma_dev, netdev_nb); - if (dev->netdev == NULL || dev->netdev != netdev) + ibdev_dbg(&dev->ibdev, " netdev:%s,ns:%p: Event %lu to erdma_dev %p\n", + netdev->name, dev_net(netdev), event, dev); + + if (dev->netdev != netdev && + (dev->netdev || event != NETDEV_REGISTER)) goto done; switch (event) { case NETDEV_UP: - dev->state = IB_PORT_ACTIVE; + dev->port_state = IB_PORT_ACTIVE; erdma_port_event(dev, IB_EVENT_PORT_ACTIVE); break; case NETDEV_DOWN: - dev->state = IB_PORT_DOWN; + dev->port_state = IB_PORT_DOWN; erdma_port_event(dev, IB_EVENT_PORT_ERR); break; case NETDEV_CHANGEMTU: @@ -40,8 +88,24 @@ static int erdma_netdev_event(struct notifier_block *nb, unsigned long event, dev->mtu = netdev->mtu; } break; - case NETDEV_REGISTER: case NETDEV_UNREGISTER: + ib_device_set_netdev(&dev->ibdev, NULL, 1); + write_lock(&dev->netdev_lock); + dev->netdev = NULL; + write_unlock(&dev->netdev_lock); + break; + case NETDEV_REGISTER: + if (netdev->lower_level > 1) + break; + write_lock(&dev->netdev_lock); + if (dev->netdev == NULL && + ether_addr_equal_unaligned(netdev->perm_addr, + dev->attrs.peer_addr)) { + ib_device_set_netdev(&dev->ibdev, netdev, 1); + dev->netdev = netdev; + } + write_unlock(&dev->netdev_lock); + break; case NETDEV_CHANGEADDR: case NETDEV_GOING_DOWN: case NETDEV_CHANGE: @@ -56,57 +120,116 @@ static int erdma_netdev_event(struct notifier_block *nb, unsigned long event, static int erdma_enum_and_get_netdev(struct erdma_dev *dev) { struct net_device *netdev; - int ret = -EPROBE_DEFER; + struct net *net; + int ret = -ENODEV; /* Already binded to a net_device, so we skip. */ if (dev->netdev) return 0; rtnl_lock(); - for_each_netdev(&init_net, netdev) { - /* - * In erdma, the paired netdev and ibdev should have the same - * MAC address. erdma can get the value from its PCIe bar - * registers. Since erdma can not get the paired netdev - * reference directly, we do a traverse here to get the paired - * netdev. - */ - if (ether_addr_equal_unaligned(netdev->perm_addr, - dev->attrs.peer_addr)) { - ret = ib_device_set_netdev(&dev->ibdev, netdev, 1); - if (ret) { - rtnl_unlock(); - ibdev_warn(&dev->ibdev, - "failed (%d) to link netdev", ret); - return ret; + down_read(&net_rwsem); + + for_each_net(net) { + for_each_netdev(net, netdev) { + /* + * In erdma, the paired netdev and ibdev should have the same + * MAC address. erdma can get the value from its PCIe bar + * registers. Since erdma can not get the paired netdev + * reference directly, we do a traverse here to get the paired + * netdev. + */ + if (netdev->lower_level > 1) + continue; + if (ether_addr_equal_unaligned(netdev->perm_addr, + dev->attrs.peer_addr)) { + ret = ib_device_set_netdev(&dev->ibdev, netdev, 1); + if (ret) { + up_read(&net_rwsem); + rtnl_unlock(); + ibdev_warn(&dev->ibdev, + "failed (%d) to link netdev", ret); + return ret; + } + /* This is initialize flow, no need use rwlock to protect netdev */ + dev->netdev = netdev; + break; } - - dev->netdev = netdev; - break; } } + up_read(&net_rwsem); rtnl_unlock(); return ret; } +static void erdma_device_unregister(struct erdma_dev *dev) +{ + unregister_netdevice_notifier(&dev->netdev_nb); + + ib_unregister_device(&dev->ibdev); + + erdma_remove_dev_from_list(dev); +} + static int erdma_device_register(struct erdma_dev *dev) { struct ib_device *ibdev = &dev->ibdev; int ret; + memset(ibdev->name, 0, IB_DEVICE_NAME_MAX); + /* + * In Ali ECS environment, ENI's mac address is unique in VPC. + * So, generating the ibdev's name from mac address of the binded + * netdev. + */ + strscpy(ibdev->name, "erdma_%d", IB_DEVICE_NAME_MAX); + + ret = erdma_set_dack_count(dev, 0); + if (ret) + ibdev_warn(&dev->ibdev, "failed to disable dack err=%d.\n", ret); + + if (legacy_mode) { + ret = erdma_enable_legacy_mode(dev, 1); + if (ret) { + ibdev_err(&dev->ibdev, "failed to enable legacy mode err=%d.\n", ret); + return -EINVAL; + } + } + + rwlock_init(&dev->netdev_lock); ret = erdma_enum_and_get_netdev(dev); if (ret) - return ret; + return -EPROBE_DEFER; + + if (compat_mode) { + ret = attach_sw_dev(dev); + if (ret) + return ret == -ENOENT ? -EPROBE_DEFER : ret; + } dev->mtu = dev->netdev->mtu; + erdma_set_mtu(dev, dev->mtu); addrconf_addr_eui48((u8 *)&ibdev->node_guid, dev->netdev->dev_addr); - ret = ib_register_device(ibdev, "erdma_%d", &dev->pdev->dev); + ret = erdma_set_retrans_num(dev, ERDMA_DEFAULT_RETRANS_NUM); + if (ret) + dev->attrs.retrans_num = 0; + + erdma_add_dev_to_list(dev); + erdma_sync_info(dev); + + ret = ib_register_device(ibdev, ibdev->name, &dev->pdev->dev); if (ret) { dev_err(&dev->pdev->dev, - "ib_register_device failed: ret = %d\n", ret); + "ib_register_device(%s) failed: ret = %d\n", + ibdev->name, ret); + + erdma_remove_dev_from_list(dev); + if (compat_mode) + detach_sw_dev(dev); + return ret; } @@ -114,10 +237,27 @@ static int erdma_device_register(struct erdma_dev *dev) ret = register_netdevice_notifier(&dev->netdev_nb); if (ret) { ibdev_err(&dev->ibdev, "failed to register notifier.\n"); + ib_unregister_device(ibdev); + erdma_remove_dev_from_list(dev); + + if (compat_mode) + detach_sw_dev(dev); + + return ret; } - return ret; + ibdev_dbg( + &dev->ibdev, + " Registered '%s' for interface '%s',HWaddr=%02x.%02x.%02x.%02x.%02x.%02x\n", + ibdev->name, dev->netdev->name, *(__u8 *)dev->netdev->dev_addr, + *((__u8 *)dev->netdev->dev_addr + 1), + *((__u8 *)dev->netdev->dev_addr + 2), + *((__u8 *)dev->netdev->dev_addr + 3), + *((__u8 *)dev->netdev->dev_addr + 4), + *((__u8 *)dev->netdev->dev_addr + 5)); + + return 0; } static irqreturn_t erdma_comm_irq_handler(int irq, void *data) @@ -130,18 +270,49 @@ static irqreturn_t erdma_comm_irq_handler(int irq, void *data) return IRQ_HANDLED; } +static void erdma_dwqe_resource_init(struct erdma_dev *dev) +{ + int total_pages, type0, type1; + + dev->attrs.grp_num = erdma_reg_read32(dev, ERDMA_REGS_GRP_NUM_REG); + + if (dev->attrs.grp_num < 4) + dev->attrs.disable_dwqe = true; + else + dev->attrs.disable_dwqe = false; + + /* One page contains 4 goups. */ + total_pages = dev->attrs.grp_num * 4; + + if (dev->attrs.grp_num >= ERDMA_DWQE_MAX_GRP_CNT) { + dev->attrs.grp_num = ERDMA_DWQE_MAX_GRP_CNT; + type0 = ERDMA_DWQE_TYPE0_CNT; + type1 = ERDMA_DWQE_TYPE1_CNT / ERDMA_DWQE_TYPE1_CNT_PER_PAGE; + } else { + type1 = total_pages / 3; + type0 = total_pages - type1 - 1; + } + + dev->attrs.dwqe_pages = type0; + dev->attrs.dwqe_entries = type1 * ERDMA_DWQE_TYPE1_CNT_PER_PAGE; + + dev_info( + &dev->pdev->dev, + "grp_num:%d, total pages:%d, type0:%d, type1:%d, type1_db_cnt:%d\n", + dev->attrs.grp_num, total_pages, type0, type1, type1 * 16); +} + static int erdma_request_vectors(struct erdma_dev *dev) { - int expect_irq_num = min(num_possible_cpus() + 1, ERDMA_NUM_MSIX_VEC); - int ret; + int expect_irq_num = min(num_possible_cpus() + 1, vector_num); - ret = pci_alloc_irq_vectors(dev->pdev, 1, expect_irq_num, PCI_IRQ_MSIX); - if (ret < 0) { + dev->attrs.irq_num = pci_alloc_irq_vectors(dev->pdev, 1, expect_irq_num, + PCI_IRQ_MSIX); + if (dev->attrs.irq_num <= 0) { dev_err(&dev->pdev->dev, "request irq vectors failed(%d)\n", - ret); - return ret; + dev->attrs.irq_num); + return -ENOSPC; } - dev->attrs.irq_num = ret; return 0; } @@ -168,25 +339,50 @@ static void erdma_comm_irq_uninit(struct erdma_dev *dev) free_irq(dev->comm_irq.msix_vector, dev); } +static int erdma_hw_resp_pool_init(struct erdma_dev *dev) +{ + dev->resp_pool = + dma_pool_create("erdma_resp_pool", &dev->pdev->dev, + ERDMA_HW_RESP_SIZE, ERDMA_HW_RESP_SIZE, 0); + if (!dev->resp_pool) + return -ENOMEM; + + return 0; +} + +static void erdma_hw_resp_pool_destroy(struct erdma_dev *dev) +{ + dma_pool_destroy(dev->resp_pool); +} + static int erdma_device_init(struct erdma_dev *dev, struct pci_dev *pdev) { int ret; + erdma_dwqe_resource_init(dev); + + ret = erdma_hw_resp_pool_init(dev); + if (ret) + return ret; + ret = dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(ERDMA_PCI_WIDTH)); if (ret) - return ret; + goto destroy_pool; dma_set_max_seg_size(&pdev->dev, UINT_MAX); return 0; + +destroy_pool: + erdma_hw_resp_pool_destroy(dev); + + return ret; } -static void erdma_hw_reset(struct erdma_dev *dev) +static void erdma_device_uninit(struct erdma_dev *dev) { - u32 ctrl = FIELD_PREP(ERDMA_REG_DEV_CTRL_RESET_MASK, 1); - - erdma_reg_write32(dev, ERDMA_REGS_DEV_CTRL_REG, ctrl); + erdma_hw_resp_pool_destroy(dev); } static int erdma_wait_hw_init_done(struct erdma_dev *dev) @@ -212,6 +408,47 @@ static int erdma_wait_hw_init_done(struct erdma_dev *dev) return 0; } +static int erdma_hw_stop(struct erdma_dev *dev, bool wait) +{ + u32 ctrl = FIELD_PREP(ERDMA_REG_DEV_CTRL_RESET_MASK, 1); + int i; + + erdma_reg_write32(dev, ERDMA_REGS_DEV_CTRL_REG, ctrl); + + if (!wait) + return 0; + + for (i = 0; i < 50; i++) { + if (erdma_reg_read32_filed(dev, ERDMA_REGS_DEV_ST_REG, + ERDMA_REG_DEV_ST_RESET_DONE_MASK)) + break; + + msleep(ERDMA_REG_ACCESS_WAIT_MS); + } + + if (i == 50) { + dev_err(&dev->pdev->dev, "wait reset done timeout.\n"); + return -ETIME; + } + + return 0; +} + +static int erdma_preinit_proc(struct erdma_dev *dev) +{ + u32 version = + be32_to_cpu(erdma_reg_read32(dev, ERDMA_REGS_VERSION_REG)); + + switch (version) { + case 0: + return -ENODEV; + case 2: + return erdma_hw_stop(dev, true); + default: + return 0; + } +} + static const struct pci_device_id erdma_pci_tbl[] = { { PCI_DEVICE(PCI_VENDOR_ID_ALIBABA, 0x107f) }, {} @@ -221,7 +458,6 @@ static int erdma_probe_dev(struct pci_dev *pdev) { struct erdma_dev *dev; int bars, err; - u32 version; err = pci_enable_device(pdev); if (err) { @@ -241,6 +477,7 @@ static int erdma_probe_dev(struct pci_dev *pdev) pci_set_drvdata(pdev, dev); dev->pdev = pdev; dev->attrs.numa_node = dev_to_node(&pdev->dev); + dev->state = 0; bars = pci_select_bars(pdev, IORESOURCE_MEM); err = pci_request_selected_regions(pdev, bars, DRV_MODULE_NAME); @@ -260,12 +497,9 @@ static int erdma_probe_dev(struct pci_dev *pdev) goto err_release_bars; } - version = erdma_reg_read32(dev, ERDMA_REGS_VERSION_REG); - if (version == 0) { - /* we knows that it is a non-functional function. */ - err = -ENODEV; + err = erdma_preinit_proc(dev); + if (err) goto err_iounmap_func_bar; - } err = erdma_device_init(dev, pdev); if (err) @@ -273,7 +507,7 @@ static int erdma_probe_dev(struct pci_dev *pdev) err = erdma_request_vectors(dev); if (err) - goto err_iounmap_func_bar; + goto err_uninit_device; err = erdma_comm_irq_init(dev); if (err) @@ -293,14 +527,16 @@ static int erdma_probe_dev(struct pci_dev *pdev) err = erdma_ceqs_init(dev); if (err) - goto err_reset_hw; + goto err_stop_hw; + + msleep(500); erdma_finish_cmdq_init(dev); return 0; -err_reset_hw: - erdma_hw_reset(dev); +err_stop_hw: + erdma_hw_stop(dev, false); err_uninit_cmdq: erdma_cmdq_destroy(dev); @@ -314,6 +550,9 @@ static int erdma_probe_dev(struct pci_dev *pdev) err_free_vectors: pci_free_irq_vectors(dev->pdev); +err_uninit_device: + erdma_device_uninit(dev); + err_iounmap_func_bar: devm_iounmap(&pdev->dev, dev->func_bar); @@ -334,20 +573,37 @@ static void erdma_remove_dev(struct pci_dev *pdev) struct erdma_dev *dev = pci_get_drvdata(pdev); erdma_ceqs_uninit(dev); - erdma_hw_reset(dev); + erdma_hw_stop(dev, false); erdma_cmdq_destroy(dev); erdma_aeq_destroy(dev); erdma_comm_irq_uninit(dev); pci_free_irq_vectors(dev->pdev); - + erdma_device_uninit(dev); devm_iounmap(&pdev->dev, dev->func_bar); pci_release_selected_regions(pdev, ERDMA_BAR_MASK); - ib_dealloc_device(&dev->ibdev); - pci_disable_device(pdev); } +static void erdma_stats_init(struct erdma_dev *dev) +{ + atomic64_t *s = (atomic64_t *)&dev->stats; + int i; + + for (i = 0; i < sizeof(dev->stats) / sizeof(*s); i++, s++) + atomic64_set(s, 0); +} + +static int erdma_check_version(struct erdma_dev *dev) +{ + u8 fw_major = (dev->attrs.fw_version >> 16); + u8 fw_medium = (dev->attrs.fw_version >> 8); + + return (fw_major != ERDMA_MAJOR_VER || fw_medium != ERDMA_MEDIUM_VER) ? + -1 : + 0; +} + #define ERDMA_GET_CAP(name, cap) FIELD_GET(ERDMA_CMD_DEV_CAP_##name##_MASK, cap) static int erdma_dev_attrs_init(struct erdma_dev *dev) @@ -368,7 +624,10 @@ static int erdma_dev_attrs_init(struct erdma_dev *dev) dev->attrs.max_mw = 1 << ERDMA_GET_CAP(MAX_MW, cap1); dev->attrs.max_recv_wr = 1 << ERDMA_GET_CAP(MAX_RECV_WR, cap0); dev->attrs.local_dma_key = ERDMA_GET_CAP(DMA_LOCAL_KEY, cap1); - dev->attrs.cc = ERDMA_GET_CAP(DEFAULT_CC, cap1); + if (default_cc >= 0 && default_cc < ERDMA_CC_METHODS_NUM) + dev->attrs.cc = default_cc; + else + dev->attrs.cc = ERDMA_GET_CAP(DEFAULT_CC, cap1); dev->attrs.max_qp = ERDMA_NQP_PER_QBLOCK * ERDMA_GET_CAP(QBLOCK, cap1); dev->attrs.max_mr = dev->attrs.max_qp << 1; dev->attrs.max_cq = dev->attrs.max_qp << 1; @@ -394,23 +653,7 @@ static int erdma_dev_attrs_init(struct erdma_dev *dev) dev->attrs.fw_version = FIELD_GET(ERDMA_CMD_INFO0_FW_VER_MASK, cap0); - return err; -} - -static int erdma_device_config(struct erdma_dev *dev) -{ - struct erdma_cmdq_config_device_req req = {}; - - if (!(dev->attrs.cap_flags & ERDMA_DEV_CAP_FLAGS_EXTEND_DB)) - return 0; - - erdma_cmdq_build_reqhdr(&req.hdr, CMDQ_SUBMOD_COMMON, - CMDQ_OPCODE_CONF_DEVICE); - - req.cfg = FIELD_PREP(ERDMA_CMD_CONFIG_DEVICE_PGSHIFT_MASK, PAGE_SHIFT) | - FIELD_PREP(ERDMA_CMD_CONFIG_DEVICE_PS_EN_MASK, 1); - - return erdma_post_cmd_wait(&dev->cmdq, &req, sizeof(req), NULL, NULL); + return erdma_check_version(dev); } static int erdma_res_cb_init(struct erdma_dev *dev) @@ -421,7 +664,8 @@ static int erdma_res_cb_init(struct erdma_dev *dev) dev->res_cb[i].next_alloc_idx = 1; spin_lock_init(&dev->res_cb[i].lock); dev->res_cb[i].bitmap = - bitmap_zalloc(dev->res_cb[i].max_cap, GFP_KERNEL); + kcalloc(BITS_TO_LONGS(dev->res_cb[i].max_cap), + sizeof(unsigned long), GFP_KERNEL); if (!dev->res_cb[i].bitmap) goto err; } @@ -430,7 +674,7 @@ static int erdma_res_cb_init(struct erdma_dev *dev) err: for (j = 0; j < i; j++) - bitmap_free(dev->res_cb[j].bitmap); + kfree(dev->res_cb[j].bitmap); return -ENOMEM; } @@ -440,14 +684,14 @@ static void erdma_res_cb_free(struct erdma_dev *dev) int i; for (i = 0; i < ERDMA_RES_CNT; i++) - bitmap_free(dev->res_cb[i].bitmap); + kfree(dev->res_cb[i].bitmap); } static const struct ib_device_ops erdma_device_ops = { .owner = THIS_MODULE, .driver_id = RDMA_DRIVER_ERDMA, .uverbs_abi_ver = ERDMA_ABI_VERSION, - + .alloc_hw_port_stats = erdma_alloc_hw_stats, .alloc_mr = erdma_ib_alloc_mr, .alloc_pd = erdma_alloc_pd, .alloc_ucontext = erdma_alloc_ucontext, @@ -459,6 +703,7 @@ static const struct ib_device_ops erdma_device_ops = { .destroy_cq = erdma_destroy_cq, .destroy_qp = erdma_destroy_qp, .get_dma_mr = erdma_get_dma_mr, + .get_hw_stats = erdma_get_hw_stats, .get_port_immutable = erdma_get_port_immutable, .iw_accept = erdma_accept, .iw_add_ref = erdma_qp_get_ref, @@ -481,6 +726,13 @@ static const struct ib_device_ops erdma_device_ops = { .query_qp = erdma_query_qp, .req_notify_cq = erdma_req_notify_cq, .reg_user_mr = erdma_reg_user_mr, + .get_netdev = erdma_get_netdev, + .query_pkey = erdma_query_pkey, + .modify_cq = erdma_modify_cq, + .create_ah = erdma_create_ah, + .destroy_ah = erdma_destroy_ah, + INIT_RDMA_OBJ_SIZE(ib_ah, sw_ah, ibah), + .get_vector_affinity = erdma_get_vector_affinity, INIT_RDMA_OBJ_SIZE(ib_cq, erdma_cq, ibcq), INIT_RDMA_OBJ_SIZE(ib_pd, erdma_pd, ibpd), @@ -488,22 +740,30 @@ static const struct ib_device_ops erdma_device_ops = { INIT_RDMA_OBJ_SIZE(ib_qp, erdma_qp, ibqp), }; +static const struct ib_device_ops erdma_compat_ops = { + .get_link_layer = erdma_get_link_layer, + .add_gid = erdma_add_gid, + .del_gid = erdma_del_gid, +}; + static int erdma_ib_device_add(struct pci_dev *pdev) { struct erdma_dev *dev = pci_get_drvdata(pdev); struct ib_device *ibdev = &dev->ibdev; + u32 tmp_idx; u64 mac; int ret; - ret = erdma_dev_attrs_init(dev); - if (ret) - return ret; + erdma_stats_init(dev); - ret = erdma_device_config(dev); + ret = erdma_dev_attrs_init(dev); if (ret) return ret; - ibdev->node_type = RDMA_NODE_RNIC; + if (compat_mode) + ibdev->node_type = RDMA_NODE_IB_CA; + else + ibdev->node_type = RDMA_NODE_RNIC; memcpy(ibdev->node_desc, ERDMA_NODE_DESC, sizeof(ERDMA_NODE_DESC)); /* @@ -512,9 +772,12 @@ static int erdma_ib_device_add(struct pci_dev *pdev) * per physical port. */ ibdev->phys_port_cnt = 1; - ibdev->num_comp_vectors = dev->attrs.irq_num - 1; + ibdev->num_comp_vectors = dev->attrs.max_ceqs - 1; + ibdev->dev.parent = &pdev->dev; ib_set_device_ops(ibdev, &erdma_device_ops); + if (compat_mode) + ib_set_device_ops(ibdev, &erdma_compat_ops); INIT_LIST_HEAD(&dev->cep_list); @@ -523,34 +786,62 @@ static int erdma_ib_device_add(struct pci_dev *pdev) xa_init_flags(&dev->cq_xa, XA_FLAGS_ALLOC1); dev->next_alloc_cqn = 1; dev->next_alloc_qpn = 1; + if (rand_qpn) { + get_random_bytes(&tmp_idx, sizeof(u32)); + dev->next_alloc_qpn = tmp_idx % dev->attrs.max_qp; + if (!dev->next_alloc_qpn) + dev->next_alloc_qpn = 1; + } ret = erdma_res_cb_init(dev); if (ret) return ret; + bitmap_zero(dev->sdb_page, ERDMA_DWQE_TYPE0_CNT); + bitmap_zero(dev->sdb_entry, ERDMA_DWQE_TYPE1_CNT); atomic_set(&dev->num_ctx, 0); mac = erdma_reg_read32(dev, ERDMA_REGS_NETDEV_MAC_L_REG); mac |= (u64)erdma_reg_read32(dev, ERDMA_REGS_NETDEV_MAC_H_REG) << 32; + dev_info(&dev->pdev->dev, "assoc netdev mac addr is 0x%llx.\n", mac); + u64_to_ether_addr(mac, dev->attrs.peer_addr); + dev->db_pool = dma_pool_create("erdma_db", &pdev->dev, ERDMA_DB_SIZE, + ERDMA_DB_SIZE, 0); + if (!dev->db_pool) { + ret = -ENOMEM; + goto err_out; + } + dev->reflush_wq = alloc_workqueue("erdma-reflush-wq", WQ_UNBOUND, WQ_UNBOUND_MAX_ACTIVE); if (!dev->reflush_wq) { ret = -ENOMEM; - goto err_alloc_workqueue; + goto free_pool; } ret = erdma_device_register(dev); if (ret) - goto err_register; + goto free_wq; + + ret = erdma_debugfs_files_create(dev); + if (ret) + goto device_unregister; + + dev->ibdev.use_cq_dim = false; return 0; -err_register: +device_unregister: + erdma_device_unregister(dev); +free_wq: destroy_workqueue(dev->reflush_wq); -err_alloc_workqueue: +free_pool: + dma_pool_destroy(dev->db_pool); +err_out: + xa_destroy(&dev->qp_xa); xa_destroy(&dev->cq_xa); @@ -563,13 +854,20 @@ static void erdma_ib_device_remove(struct pci_dev *pdev) { struct erdma_dev *dev = pci_get_drvdata(pdev); - unregister_netdevice_notifier(&dev->netdev_nb); - ib_unregister_device(&dev->ibdev); + erdma_debugfs_files_destroy(dev); + erdma_device_unregister(dev); + + WARN_ON(atomic_read(&dev->num_ctx)); + WARN_ON(atomic_read(&dev->num_cep)); + WARN_ON(!list_empty(&dev->cep_list)); - destroy_workqueue(dev->reflush_wq); erdma_res_cb_free(dev); xa_destroy(&dev->qp_xa); xa_destroy(&dev->cq_xa); + dma_pool_destroy(dev->db_pool); + destroy_workqueue(dev->reflush_wq); + if (compat_mode) + detach_sw_dev(dev); } static int erdma_probe(struct pci_dev *pdev, const struct pci_device_id *ent) @@ -608,13 +906,34 @@ static __init int erdma_init_module(void) { int ret; - ret = erdma_cm_init(); + erdma_debugfs_register(); + + ret = erdma_compat_init(); if (ret) return ret; - ret = pci_register_driver(&erdma_pci_driver); + ret = erdma_cm_init(); if (ret) - erdma_cm_exit(); + goto uninit_compat; + + ret = erdma_chrdev_init(); + if (ret) + goto uninit_cm; + + ret = pci_register_driver(&erdma_pci_driver); + if (ret) { + pr_err("Couldn't register erdma driver.\n"); + goto uninit_chrdev; + } + + return ret; + +uninit_chrdev: + erdma_chrdev_destroy(); +uninit_cm: + erdma_cm_exit(); +uninit_compat: + erdma_compat_exit(); return ret; } @@ -622,8 +941,10 @@ static __init int erdma_init_module(void) static void __exit erdma_exit_module(void) { pci_unregister_driver(&erdma_pci_driver); - + erdma_chrdev_destroy(); erdma_cm_exit(); + erdma_compat_exit(); + erdma_debugfs_unregister(); } module_init(erdma_init_module); diff --git a/drivers/infiniband/hw/erdma/erdma_qp.c b/drivers/infiniband/hw/erdma/erdma_qp.c index 6d0330badd68e86d1db20104803f166098590407..ce73bb8dc0f80dbe15e690beb3ad89a66a187aeb 100644 --- a/drivers/infiniband/hw/erdma/erdma_qp.c +++ b/drivers/infiniband/hw/erdma/erdma_qp.c @@ -6,9 +6,17 @@ /* Authors: Bernard Metzler */ /* Copyright (c) 2008-2019, IBM Corporation */ +#include "kcompat.h" + +#include + #include "erdma_cm.h" #include "erdma_verbs.h" +bool wwi_perf; +module_param(wwi_perf, bool, 0644); +MODULE_PARM_DESC(wwi_perf, "Write with Immediate optimize"); + void erdma_qp_llp_close(struct erdma_qp *qp) { struct erdma_qp_attrs qp_attrs; @@ -20,7 +28,7 @@ void erdma_qp_llp_close(struct erdma_qp *qp) case ERDMA_QP_STATE_RTR: case ERDMA_QP_STATE_IDLE: case ERDMA_QP_STATE_TERMINATE: - qp_attrs.state = ERDMA_QP_STATE_CLOSING; + qp_attrs.state = ERDMA_QP_STATE_ERROR; erdma_modify_qp_internal(qp, &qp_attrs, ERDMA_QP_ATTR_STATE); break; case ERDMA_QP_STATE_CLOSING: @@ -59,6 +67,34 @@ static int erdma_modify_qp_state_to_rts(struct erdma_qp *qp, struct erdma_cep *cep = qp->cep; struct sockaddr_storage local_addr, remote_addr; + if (qp->attrs.connect_without_cm) { + req.cookie = FIELD_PREP(ERDMA_CMD_MODIFY_QP_WWI_PERF_MASK, 1) | + FIELD_PREP(ERDMA_CMD_MODIFY_QP_RQPN_MASK, + qp->attrs.remote_qp_num); + if (((struct sockaddr_in *)&qp->attrs.raddr)->sin_family == + AF_INET) { + req.dip = qp->attrs.raddr.in.sin_addr.s_addr; + req.sip = qp->attrs.laddr.in.sin_addr.s_addr; + } else if (dev->attrs.cap_flags & ERDMA_DEV_CAP_FLAGS_IPV6) { + memcpy(req.ipv6_daddr, + &qp->attrs.raddr.in6.sin6_addr.s6_addr, + sizeof(struct in6_addr)); + memcpy(req.ipv6_saddr, + &qp->attrs.laddr.in6.sin6_addr.s6_addr, + sizeof(struct in6_addr)); + req.cookie |= + FIELD_PREP(ERDMA_CMD_MODIFY_QP_IPV6_MASK, 1); + req.flow_label = 0; + } else { + return -EAFNOSUPPORT; + } + req.dport = htons(qp->attrs.dport); + req.sport = htons(qp->attrs.sport); + req.send_nxt = 0; + req.recv_nxt = 0; + + goto without_cep; + } if (!(mask & ERDMA_QP_ATTR_LLP_HANDLE)) return -EINVAL; @@ -73,9 +109,63 @@ static int erdma_modify_qp_state_to_rts(struct erdma_qp *qp, if (ret < 0) return ret; + tp = tcp_sk(qp->cep->sock->sk); + + qp->attrs.remote_cookie = be32_to_cpu(qp->cep->mpa.ext_data.cookie); + + req.cookie = be32_to_cpu(qp->cep->mpa.ext_data.cookie); + if (qp->cep->sock->sk->sk_family == AF_INET) { + req.dip = to_sockaddr_in(remote_addr).sin_addr.s_addr; + req.sip = to_sockaddr_in(local_addr).sin_addr.s_addr; + req.dport = to_sockaddr_in(remote_addr).sin_port; + req.sport = to_sockaddr_in(local_addr).sin_port; + } else if (dev->attrs.cap_flags & ERDMA_DEV_CAP_FLAGS_IPV6) { + req.cookie = + FIELD_PREP(ERDMA_CMD_MODIFY_QP_IPV6_MASK, 1) | + FIELD_PREP(ERDMA_CMD_MODIFY_QP_RQPN_MASK, req.cookie); + memcpy(req.ipv6_daddr, &to_sockaddr_in6(remote_addr).sin6_addr, + sizeof(struct in6_addr)); + memcpy(req.ipv6_saddr, &to_sockaddr_in6(local_addr).sin6_addr, + sizeof(struct in6_addr)); + req.dport = to_sockaddr_in6(remote_addr).sin6_port; + req.sport = to_sockaddr_in6(local_addr).sin6_port; + req.flow_label = to_sockaddr_in6(remote_addr).sin6_flowinfo; + } else { + return -EAFNOSUPPORT; + } + + req.send_nxt = tp->snd_nxt; + /* rsvd tcp seq for mpa-rsp in server. */ + if (qp->attrs.qp_type == ERDMA_QP_PASSIVE) + req.send_nxt += MPA_DEFAULT_HDR_LEN + qp->attrs.pd_len; + req.recv_nxt = tp->rcv_nxt; + +without_cep: + qp->attrs.state = ERDMA_QP_STATE_RTS; - tp = tcp_sk(qp->cep->sock->sk); + erdma_cmdq_build_reqhdr(&req.hdr, CMDQ_SUBMOD_RDMA, + CMDQ_OPCODE_MODIFY_QP); + + req.cfg = FIELD_PREP(ERDMA_CMD_MODIFY_QP_STATE_MASK, qp->attrs.state) | + FIELD_PREP(ERDMA_CMD_MODIFY_QP_CC_MASK, qp->attrs.cc) | + FIELD_PREP(ERDMA_CMD_MODIFY_QP_QPN_MASK, QP_ID(qp)); + req.cookie |= FIELD_PREP(ERDMA_CMD_MODIFY_QP_TLP_MASK, 1); + + if (wwi_perf || compat_mode) + req.cookie |= FIELD_PREP(ERDMA_CMD_MODIFY_QP_WWI_PERF_MASK, 1); + + return erdma_post_cmd_wait(&dev->cmdq, &req, sizeof(req), NULL, NULL); +} + +static int erdma_modify_qp_state_to_rts_compat(struct erdma_qp *qp, + struct erdma_qp_attrs *attrs, + enum erdma_qp_attr_mask mask) +{ + struct erdma_dev *dev = qp->dev; + struct erdma_cmdq_modify_qp_req req; + + qp->attrs.state = ERDMA_QP_STATE_RTS; erdma_cmdq_build_reqhdr(&req.hdr, CMDQ_SUBMOD_RDMA, CMDQ_OPCODE_MODIFY_QP); @@ -83,18 +173,35 @@ static int erdma_modify_qp_state_to_rts(struct erdma_qp *qp, req.cfg = FIELD_PREP(ERDMA_CMD_MODIFY_QP_STATE_MASK, qp->attrs.state) | FIELD_PREP(ERDMA_CMD_MODIFY_QP_CC_MASK, qp->attrs.cc) | FIELD_PREP(ERDMA_CMD_MODIFY_QP_QPN_MASK, QP_ID(qp)); + req.cookie = + FIELD_PREP(ERDMA_CMD_MODIFY_QP_RQPN_MASK, qp->attrs.remote_qp_num) | + FIELD_PREP(ERDMA_CMD_MODIFY_QP_TLP_MASK, 1); + + if (((struct sockaddr_in *)&qp->attrs.raddr)->sin_family == AF_INET) { + req.dip = qp->attrs.raddr.in.sin_addr.s_addr; + req.sip = qp->attrs.laddr.in.sin_addr.s_addr; + } else if (dev->attrs.cap_flags & ERDMA_DEV_CAP_FLAGS_IPV6) { + req.cookie |= FIELD_PREP(ERDMA_CMD_MODIFY_QP_IPV6_MASK, 1); + memcpy(req.ipv6_daddr, &qp->attrs.raddr.in6.sin6_addr.s6_addr, + sizeof(struct in6_addr)); + memcpy(req.ipv6_saddr, &qp->attrs.laddr.in6.sin6_addr.s6_addr, + sizeof(struct in6_addr)); + req.flow_label = 0; + } else { + return -EAFNOSUPPORT; + } - req.cookie = be32_to_cpu(qp->cep->mpa.ext_data.cookie); - req.dip = to_sockaddr_in(remote_addr).sin_addr.s_addr; - req.sip = to_sockaddr_in(local_addr).sin_addr.s_addr; - req.dport = to_sockaddr_in(remote_addr).sin_port; - req.sport = to_sockaddr_in(local_addr).sin_port; + erdma_gen_port_from_qpn(req.sip, req.dip, QP_ID(qp), + qp->attrs.remote_qp_num, &req.sport, + &req.dport); + req.sport = htons(req.sport); + req.dport = htons(req.dport); - req.send_nxt = tp->snd_nxt; - /* rsvd tcp seq for mpa-rsp in server. */ - if (qp->attrs.qp_type == ERDMA_QP_PASSIVE) - req.send_nxt += MPA_DEFAULT_HDR_LEN + qp->attrs.pd_len; - req.recv_nxt = tp->rcv_nxt; + req.send_nxt = req.sport * 4; + req.recv_nxt = req.dport * 4; + + if (wwi_perf || compat_mode) + req.cookie |= FIELD_PREP(ERDMA_CMD_MODIFY_QP_WWI_PERF_MASK, 1); return erdma_post_cmd_wait(&dev->cmdq, &req, sizeof(req), NULL, NULL); } @@ -120,8 +227,8 @@ static int erdma_modify_qp_state_to_stop(struct erdma_qp *qp, int erdma_modify_qp_internal(struct erdma_qp *qp, struct erdma_qp_attrs *attrs, enum erdma_qp_attr_mask mask) { - bool need_reflush = false; int drop_conn, ret = 0; + bool need_reflush = false; if (!mask) return 0; @@ -133,15 +240,23 @@ int erdma_modify_qp_internal(struct erdma_qp *qp, struct erdma_qp_attrs *attrs, case ERDMA_QP_STATE_IDLE: case ERDMA_QP_STATE_RTR: if (attrs->state == ERDMA_QP_STATE_RTS) { - ret = erdma_modify_qp_state_to_rts(qp, attrs, mask); + if (compat_mode) + ret = erdma_modify_qp_state_to_rts_compat( + qp, attrs, mask); + else + ret = erdma_modify_qp_state_to_rts(qp, attrs, + mask); } else if (attrs->state == ERDMA_QP_STATE_ERROR) { qp->attrs.state = ERDMA_QP_STATE_ERROR; - need_reflush = true; if (qp->cep) { erdma_cep_put(qp->cep); qp->cep = NULL; } + ret = erdma_modify_qp_state_to_stop(qp, attrs, mask); + /* We apply to kernel qp first. */ + if (rdma_is_kernel_res(&qp->ibqp.res)) + need_reflush = true; } break; case ERDMA_QP_STATE_RTS: @@ -150,9 +265,13 @@ int erdma_modify_qp_internal(struct erdma_qp *qp, struct erdma_qp_attrs *attrs, if (attrs->state == ERDMA_QP_STATE_CLOSING || attrs->state == ERDMA_QP_STATE_TERMINATE || attrs->state == ERDMA_QP_STATE_ERROR) { - ret = erdma_modify_qp_state_to_stop(qp, attrs, mask); drop_conn = 1; - need_reflush = true; + if (!(qp->attrs.flags & ERDMA_QP_IN_DESTROY)) + ret = erdma_modify_qp_state_to_stop(qp, attrs, + mask); + /* We apply to kernel qp first. */ + if (rdma_is_kernel_res(&qp->ibqp.res)) + need_reflush = true; } if (drop_conn) @@ -177,7 +296,7 @@ int erdma_modify_qp_internal(struct erdma_qp *qp, struct erdma_qp_attrs *attrs, break; } - if (need_reflush && !ret && rdma_is_kernel_res(&qp->ibqp.res)) { + if (need_reflush && !ret) { qp->flags |= ERDMA_QP_IN_FLUSHING; mod_delayed_work(qp->dev->reflush_wq, &qp->reflush_dwork, usecs_to_jiffies(100)); @@ -398,10 +517,16 @@ static int erdma_push_one_sqe(struct erdma_qp *qp, u16 *pi, mr->access = ERDMA_MR_ACC_LR | to_erdma_access_flags(reg_wr(send_wr)->access); + if (compat_mode) + mr->access = mr->access | ERDMA_MR_ACC_RW; + regmr_sge->addr = cpu_to_le64(mr->ibmr.iova); regmr_sge->length = cpu_to_le32(mr->ibmr.length); regmr_sge->stag = cpu_to_le32(reg_wr(send_wr)->key); - attrs = FIELD_PREP(ERDMA_SQE_MR_ACCESS_MASK, mr->access) | + regmr_sge->attr1 = FIELD_PREP(ERDMA_SQE_MR_PGSZ_MASK, + ilog2(mr->ibmr.page_size)); + attrs = FIELD_PREP(ERDMA_SQE_MR_PGSZ_AVAIL_MASK, 1) | + FIELD_PREP(ERDMA_SQE_MR_ACCESS_MASK, mr->access) | FIELD_PREP(ERDMA_SQE_MR_MTT_CNT_MASK, mr->mem.mtt_nents); @@ -418,7 +543,7 @@ static int erdma_push_one_sqe(struct erdma_qp *qp, u16 *pi, wqe_size = sizeof(struct erdma_reg_mr_sqe); } - regmr_sge->attrs = cpu_to_le32(attrs); + regmr_sge->attr0 = cpu_to_le32(attrs); goto out; case IB_WR_LOCAL_INV: wqe_hdr |= FIELD_PREP(ERDMA_SQE_HDR_OPCODE_MASK, @@ -492,23 +617,26 @@ static void kick_sq_db(struct erdma_qp *qp, u16 pi) u64 db_data = FIELD_PREP(ERDMA_SQE_HDR_QPN_MASK, QP_ID(qp)) | FIELD_PREP(ERDMA_SQE_HDR_WQEBB_INDEX_MASK, pi); - *(u64 *)qp->kern_qp.sq_db_info = db_data; + *(u64 *)qp->kern_qp.sq_dbrec = db_data; writeq(db_data, qp->kern_qp.hw_sq_db); } int erdma_post_send(struct ib_qp *ibqp, const struct ib_send_wr *send_wr, const struct ib_send_wr **bad_send_wr) { + const struct ib_send_wr *wr = send_wr; struct erdma_qp *qp = to_eqp(ibqp); int ret = 0; - const struct ib_send_wr *wr = send_wr; unsigned long flags; u16 sq_pi; if (!send_wr) return -EINVAL; - spin_lock_irqsave(&qp->lock, flags); + if (compat_mode && unlikely(ibqp->qp_type == IB_QPT_GSI)) + return erdma_post_send_mad(ibqp, send_wr, bad_send_wr); + + spin_lock_irqsave(&qp->kern_qp.sq_lock, flags); sq_pi = qp->kern_qp.sq_pi; while (wr) { @@ -528,7 +656,7 @@ int erdma_post_send(struct ib_qp *ibqp, const struct ib_send_wr *send_wr, wr = wr->next; } - spin_unlock_irqrestore(&qp->lock, flags); + spin_unlock_irqrestore(&qp->kern_qp.sq_lock, flags); if (unlikely(qp->flags & ERDMA_QP_IN_FLUSHING)) mod_delayed_work(qp->dev->reflush_wq, &qp->reflush_dwork, @@ -540,13 +668,17 @@ int erdma_post_send(struct ib_qp *ibqp, const struct ib_send_wr *send_wr, static int erdma_post_recv_one(struct erdma_qp *qp, const struct ib_recv_wr *recv_wr) { - struct erdma_rqe *rqe = - get_queue_entry(qp->kern_qp.rq_buf, qp->kern_qp.rq_pi, - qp->attrs.rq_size, RQE_SHIFT); + struct erdma_rqe *rqe = get_queue_entry(qp->kern_qp.rq_buf, + qp->kern_qp.rq_pi, + qp->attrs.rq_size, RQE_SHIFT); rqe->qe_idx = cpu_to_le16(qp->kern_qp.rq_pi + 1); rqe->qpn = cpu_to_le32(QP_ID(qp)); + if ((u16)(qp->kern_qp.rq_pi - qp->kern_qp.rq_ci) == + (u16)qp->attrs.rq_size) + return -ENOMEM; + if (recv_wr->num_sge == 0) { rqe->length = 0; } else if (recv_wr->num_sge == 1) { @@ -557,7 +689,7 @@ static int erdma_post_recv_one(struct erdma_qp *qp, return -EINVAL; } - *(u64 *)qp->kern_qp.rq_db_info = *(u64 *)rqe; + *(u64 *)qp->kern_qp.rq_dbrec = *(u64 *)rqe; writeq(*(u64 *)rqe, qp->kern_qp.hw_rq_db); qp->kern_qp.rwr_tbl[qp->kern_qp.rq_pi & (qp->attrs.rq_size - 1)] = @@ -573,9 +705,12 @@ int erdma_post_recv(struct ib_qp *ibqp, const struct ib_recv_wr *recv_wr, const struct ib_recv_wr *wr = recv_wr; struct erdma_qp *qp = to_eqp(ibqp); unsigned long flags; - int ret; + int ret = 0; + + if (compat_mode && unlikely(ibqp->qp_type == IB_QPT_GSI)) + return erdma_post_recv_mad(ibqp, recv_wr, bad_recv_wr); - spin_lock_irqsave(&qp->lock, flags); + spin_lock_irqsave(&qp->kern_qp.rq_lock, flags); while (wr) { ret = erdma_post_recv_one(qp, wr); @@ -586,7 +721,7 @@ int erdma_post_recv(struct ib_qp *ibqp, const struct ib_recv_wr *recv_wr, wr = wr->next; } - spin_unlock_irqrestore(&qp->lock, flags); + spin_unlock_irqrestore(&qp->kern_qp.rq_lock, flags); if (unlikely(qp->flags & ERDMA_QP_IN_FLUSHING)) mod_delayed_work(qp->dev->reflush_wq, &qp->reflush_dwork, diff --git a/drivers/infiniband/hw/erdma/erdma_stats.c b/drivers/infiniband/hw/erdma/erdma_stats.c new file mode 100644 index 0000000000000000000000000000000000000000..feb7337178fbb996cff34b90faad5f0a82866980 --- /dev/null +++ b/drivers/infiniband/hw/erdma/erdma_stats.c @@ -0,0 +1,119 @@ +// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB + +/* Authors: Cheng Xu */ +/* Kai Shen */ +/* Copyright (c) 2020-2022, Alibaba Group. */ + +#include "erdma.h" +#include "erdma_verbs.h" + +static const struct rdma_stat_desc erdma_hw_stat_descs[] = { + [ERDMA_STATS_IW_LISTEN_CREATE].name = "listen_create_cnt", + [ERDMA_STATS_IW_LISTEN_IPV6].name = "listen_ipv6_cnt", + [ERDMA_STATS_IW_LISTEN_SUCCESS].name = "listen_success_cnt", + [ERDMA_STATS_IW_LISTEN_FAILED].name = "listen_failed_cnt", + [ERDMA_STATS_IW_LISTEN_DESTROY].name = "listen_destroy_cnt", + [ERDMA_STATS_IW_ACCEPT].name = "accept_total_cnt", + [ERDMA_STATS_IW_ACCEPT_SUCCESS].name = "accept_success_cnt", + [ERDMA_STATS_IW_ACCEPT_FAILED].name = "accept_failed_cnt", + [ERDMA_STATS_IW_REJECT].name = "reject_cnt", + [ERDMA_STATS_IW_REJECT_FAILED].name = "reject_failed_cnt", + [ERDMA_STATS_IW_CONNECT].name = "connect_total_cnt", + [ERDMA_STATS_IW_CONNECT_SUCCESS].name = "connect_success_cnt", + [ERDMA_STATS_IW_CONNECT_FAILED].name = "connect_failed_cnt", + [ERDMA_STATS_IW_CONNECT_TIMEOUT].name = "connect_timeout_cnt", + [ERDMA_STATS_IW_CONNECT_RST].name = "connect_reset_cnt", + [ERDMA_STATS_CMDQ_SUBMITTED].name = "cmdq_submitted_cnt", + [ERDMA_STATS_CMDQ_COMP].name = "cmdq_comp_cnt", + [ERDMA_STATS_CMDQ_EQ_NOTIFY].name = "cmdq_eq_notify_cnt", + [ERDMA_STATS_CMDQ_EQ_EVENT].name = "cmdq_eq_event_cnt", + [ERDMA_STATS_CMDQ_CQ_ARMED].name = "cmdq_cq_armed_cnt", + + [ERDMA_STATS_AEQ_EVENT].name = "erdma_aeq_event_cnt", + [ERDMA_STATS_AEQ_NOTIFY].name = "erdma_aeq_notify_cnt", + + [ERDMA_STATS_CMD_ALLOC_MR].name = "verbs_alloc_mr_cnt", + [ERDMA_STATS_CMD_ALLOC_MR_FAILED].name = "verbs_alloc_mr_failed_cnt", + [ERDMA_STATS_CMD_ALLOC_PD].name = "verbs_alloc_pd_cnt", + [ERDMA_STATS_CMD_ALLOC_PD_FAILED].name = "verbs_alloc_pd_failed_cnt", + [ERDMA_STATS_CMD_ALLOC_UCTX].name = "verbs_alloc_uctx_cnt", + [ERDMA_STATS_CMD_ALLOC_UCTX_FAILED].name = + "verbs_alloc_uctx_failed_cnt", + + [ERDMA_STATS_CMD_CREATE_CQ].name = "verbs_create_cq_cnt", + [ERDMA_STATS_CMD_CREATE_CQ_FAILED].name = "verbs_create_cq_failed_cnt", + [ERDMA_STATS_CMD_CREATE_QP].name = "verbs_create_qp_cnt", + [ERDMA_STATS_CMD_CREATE_QP_FAILED].name = "verbs_create_qp_failed_cnt", + + [ERDMA_STATS_CMD_DESTROY_QP].name = "verbs_create_qp_failed_cnt", + [ERDMA_STATS_CMD_DESTROY_CQ].name = "verbs_create_cq_failed_cnt", + + [ERDMA_STATS_CMD_DEALLOC_PD].name = "verbs_dealloc_pd_cnt", + [ERDMA_STATS_CMD_DEALLOC_UCTX].name = "verbs_dealloc_uctx_cnt", + [ERDMA_STATS_CMD_DEREG_MR].name = "verbs_dereg_mr_cnt", + [ERDMA_STATS_CMD_DEREG_MR_FAILED].name = "verbs_dereg_mr_failed_cnt", + [ERDMA_STATS_CMD_DESTROY_CQ].name = "verbs_destroy_cq_cnt", + [ERDMA_STATS_CMD_DESTROY_CQ_FAILED].name = + "verbs_destroy_cq_failed_cnt", + [ERDMA_STATS_CMD_DESTROY_QP].name = "verbs_destroy_qp_cnt", + [ERDMA_STATS_CMD_DESTROY_QP_FAILED].name = + "verbs_destroy_qp_failed_cnt", + + [ERDMA_STATS_CMD_GET_DMA_MR].name = "verbs_get_dma_mr_cnt", + [ERDMA_STATS_CMD_GET_DMA_MR_FAILED].name = + "verbs_get_dma_mr_failed_cnt", + [ERDMA_STATS_CMD_REG_USR_MR].name = "verbs_reg_usr_mr_cnt", + [ERDMA_STATS_CMD_REG_USR_MR_FAILED].name = + "verbs_reg_usr_mr_failed_cnt", + + [ERDMA_STATS_TX_REQS_CNT].name = "hw_tx_reqs_cnt", + [ERDMA_STATS_TX_PACKETS_CNT].name = "hw_tx_packets_cnt", + [ERDMA_STATS_TX_BYTES_CNT].name = "hw_tx_bytes_cnt", + [ERDMA_STATS_TX_DISABLE_DROP_CNT].name = "hw_disable_drop_cnt", + [ERDMA_STATS_TX_BPS_METER_DROP_CNT].name = "hw_bps_limit_drop_cnt", + [ERDMA_STATS_TX_PPS_METER_DROP_CNT].name = "hw_pps_limit_drop_cnt", + [ERDMA_STATS_RX_PACKETS_CNT].name = "hw_rx_packets_cnt", + [ERDMA_STATS_RX_BYTES_CNT].name = "hw_rx_bytes_cnt", + [ERDMA_STATS_RX_DISABLE_DROP_CNT].name = "hw_rx_disable_drop_cnt", + [ERDMA_STATS_RX_BPS_METER_DROP_CNT].name = "hw_rx_bps_limit_drop_cnt", + [ERDMA_STATS_RX_PPS_METER_DROP_CNT].name = "hw_rx_pps_limit_drop_cnt", + +}; + +struct rdma_hw_stats *erdma_alloc_hw_stats(struct ib_device *ibdev, + port_t port_num) +{ + return rdma_alloc_hw_stats_struct(erdma_hw_stat_descs, ERDMA_STATS_MAX, + RDMA_HW_STATS_DEFAULT_LIFESPAN); +} + +int erdma_get_hw_stats(struct ib_device *ibdev, struct rdma_hw_stats *stats, + port_t port_num, int index) +{ + struct erdma_dev *dev = to_edev(ibdev); + int ret; + + ret = erdma_query_hw_stats(dev); + if (ret) + return ret; + + atomic64_set(&dev->stats.value[ERDMA_STATS_CMDQ_SUBMITTED], + dev->cmdq.sq.total_cmds); + atomic64_set(&dev->stats.value[ERDMA_STATS_CMDQ_COMP], + dev->cmdq.sq.total_comp_cmds); + atomic64_set(&dev->stats.value[ERDMA_STATS_CMDQ_EQ_NOTIFY], + atomic64_read(&dev->cmdq.eq.notify_num)); + atomic64_set(&dev->stats.value[ERDMA_STATS_CMDQ_EQ_EVENT], + atomic64_read(&dev->cmdq.eq.event_num)); + atomic64_set(&dev->stats.value[ERDMA_STATS_CMDQ_CQ_ARMED], + atomic64_read(&dev->cmdq.cq.armed_num)); + atomic64_set(&dev->stats.value[ERDMA_STATS_AEQ_EVENT], + atomic64_read(&dev->aeq.event_num)); + atomic64_set(&dev->stats.value[ERDMA_STATS_AEQ_NOTIFY], + atomic64_read(&dev->aeq.notify_num)); + + memcpy(&stats->value[0], &dev->stats.value[0], + sizeof(u64) * ERDMA_STATS_MAX); + + return stats->num_counters; +} diff --git a/drivers/infiniband/hw/erdma/erdma_stats.h b/drivers/infiniband/hw/erdma/erdma_stats.h new file mode 100644 index 0000000000000000000000000000000000000000..c4f7c950421b9562b759bc998fbdc8db5328d8b5 --- /dev/null +++ b/drivers/infiniband/hw/erdma/erdma_stats.h @@ -0,0 +1,91 @@ +/* SPDX-License-Identifier: GPL-2.0 or BSD-3-Clause */ + +/* Authors: Cheng Xu */ +/* Kai Shen */ +/* Copyright (c) 2020-2022, Alibaba Group. */ + +#ifndef __ERDMA_STATS_H__ +#define __ERDMA_STATS_H__ + +#include "kcompat.h" +#include + +#define ERDMA_INC_CNT(dev, name) \ + atomic64_inc(&dev->stats.value[ERDMA_STATS_##name]) + +enum erdma_hw_stats_index { + ERDMA_STATS_IW_LISTEN_CREATE = 0, + ERDMA_STATS_IW_LISTEN_IPV6, + ERDMA_STATS_IW_LISTEN_SUCCESS, + ERDMA_STATS_IW_LISTEN_FAILED, + ERDMA_STATS_IW_LISTEN_DESTROY, + + ERDMA_STATS_IW_ACCEPT, + ERDMA_STATS_IW_ACCEPT_SUCCESS, + ERDMA_STATS_IW_ACCEPT_FAILED, + + ERDMA_STATS_IW_REJECT, + ERDMA_STATS_IW_REJECT_FAILED, + ERDMA_STATS_IW_CONNECT, + ERDMA_STATS_IW_CONNECT_SUCCESS, + ERDMA_STATS_IW_CONNECT_FAILED, + ERDMA_STATS_IW_CONNECT_TIMEOUT, + ERDMA_STATS_IW_CONNECT_RST, + + ERDMA_STATS_CMDQ_SUBMITTED, + ERDMA_STATS_CMDQ_COMP, + ERDMA_STATS_CMDQ_EQ_NOTIFY, + ERDMA_STATS_CMDQ_EQ_EVENT, + ERDMA_STATS_CMDQ_CQ_ARMED, + + ERDMA_STATS_AEQ_EVENT, + ERDMA_STATS_AEQ_NOTIFY, + + ERDMA_STATS_CMD_ALLOC_MR, + ERDMA_STATS_CMD_ALLOC_MR_FAILED, + ERDMA_STATS_CMD_ALLOC_PD, + ERDMA_STATS_CMD_ALLOC_PD_FAILED, + ERDMA_STATS_CMD_ALLOC_UCTX, + ERDMA_STATS_CMD_ALLOC_UCTX_FAILED, + + ERDMA_STATS_CMD_CREATE_CQ, + ERDMA_STATS_CMD_CREATE_CQ_FAILED, + ERDMA_STATS_CMD_CREATE_QP, + ERDMA_STATS_CMD_CREATE_QP_FAILED, + + ERDMA_STATS_CMD_DEALLOC_PD, + ERDMA_STATS_CMD_DEALLOC_UCTX, + ERDMA_STATS_CMD_DEREG_MR, + ERDMA_STATS_CMD_DEREG_MR_FAILED, + ERDMA_STATS_CMD_DESTROY_CQ, + ERDMA_STATS_CMD_DESTROY_CQ_FAILED, + ERDMA_STATS_CMD_DESTROY_QP, + ERDMA_STATS_CMD_DESTROY_QP_FAILED, + + ERDMA_STATS_CMD_GET_DMA_MR, + ERDMA_STATS_CMD_GET_DMA_MR_FAILED, + ERDMA_STATS_CMD_REG_USR_MR, + ERDMA_STATS_CMD_REG_USR_MR_FAILED, + + ERDMA_STATS_TX_REQS_CNT, + ERDMA_STATS_TX_PACKETS_CNT, + ERDMA_STATS_TX_BYTES_CNT, + ERDMA_STATS_TX_DISABLE_DROP_CNT, + ERDMA_STATS_TX_BPS_METER_DROP_CNT, + ERDMA_STATS_TX_PPS_METER_DROP_CNT, + + ERDMA_STATS_RX_PACKETS_CNT, + ERDMA_STATS_RX_BYTES_CNT, + ERDMA_STATS_RX_DISABLE_DROP_CNT, + ERDMA_STATS_RX_BPS_METER_DROP_CNT, + ERDMA_STATS_RX_PPS_METER_DROP_CNT, + + ERDMA_STATS_MAX +}; + +struct rdma_hw_stats *erdma_alloc_hw_stats(struct ib_device *ibdev, + port_t port_num); +int erdma_get_hw_stats(struct ib_device *ibdev, struct rdma_hw_stats *stats, + port_t port_num, int index); + +#endif diff --git a/drivers/infiniband/hw/erdma/erdma_verbs.c b/drivers/infiniband/hw/erdma/erdma_verbs.c index b010c4209ea38189c9ea2170a1de15bdf812ca46..fff10523c376e0bcb8b8589afb942b763c432edb 100644 --- a/drivers/infiniband/hw/erdma/erdma_verbs.c +++ b/drivers/infiniband/hw/erdma/erdma_verbs.c @@ -9,16 +9,22 @@ /* Copyright (c) 2013-2015, Mellanox Technologies. All rights reserved. */ +#include "kcompat.h" + +#include +#include #include #include -#include #include #include +#include #include "erdma.h" #include "erdma_cm.h" #include "erdma_verbs.h" +extern bool compat_mode; + static void assemble_qbuf_mtt_for_cmd(struct erdma_mem *mem, u32 *cfg, u64 *addr0, u64 *addr1) { @@ -26,22 +32,50 @@ static void assemble_qbuf_mtt_for_cmd(struct erdma_mem *mem, u32 *cfg, if (mem->mtt_nents > ERDMA_MAX_INLINE_MTT_ENTRIES) { *addr0 = mtt->buf_dma; - *cfg |= FIELD_PREP(ERDMA_CMD_CREATE_QP_MTT_LEVEL_MASK, - ERDMA_MR_MTT_1LEVEL); + *cfg |= FIELD_PREP(ERDMA_CMD_CREATE_QP_MTT_TYPE_MASK, + ERDMA_MR_INDIRECT_MTT); } else { - *addr0 = mtt->buf[0]; - memcpy(addr1, mtt->buf + 1, MTT_SIZE(mem->mtt_nents - 1)); - *cfg |= FIELD_PREP(ERDMA_CMD_CREATE_QP_MTT_LEVEL_MASK, - ERDMA_MR_MTT_0LEVEL); + *addr0 = ((u64 *)(uintptr_t)mtt->buf)[0]; + memcpy(addr1, mtt->buf + sizeof(dma_addr_t), + MTT_SIZE(mem->mtt_nents - 1)); + *cfg |= FIELD_PREP(ERDMA_CMD_CREATE_QP_MTT_TYPE_MASK, + ERDMA_MR_INLINE_MTT); } } -static int create_qp_cmd(struct erdma_ucontext *uctx, struct erdma_qp *qp) +static void create_qp_mtt_cfg(struct erdma_cmdq_create_qp_req *req, + struct erdma_mem *sq_mem, + struct erdma_mem *rq_mem, u32 scqn, u32 rcqn) +{ + req->sq_cqn_mtt_cfg = + FIELD_PREP(ERDMA_CMD_CREATE_QP_PAGE_SIZE_MASK, + ilog2(sq_mem->page_size) - ERDMA_HW_PAGE_SHIFT); + req->sq_cqn_mtt_cfg |= FIELD_PREP(ERDMA_CMD_CREATE_QP_CQN_MASK, scqn); + + req->rq_cqn_mtt_cfg = + FIELD_PREP(ERDMA_CMD_CREATE_QP_PAGE_SIZE_MASK, + ilog2(rq_mem->page_size) - ERDMA_HW_PAGE_SHIFT); + req->rq_cqn_mtt_cfg |= FIELD_PREP(ERDMA_CMD_CREATE_QP_CQN_MASK, rcqn); + + req->sq_mtt_cfg = sq_mem->page_offset; + req->sq_mtt_cfg |= + FIELD_PREP(ERDMA_CMD_CREATE_QP_MTT_CNT_MASK, sq_mem->mtt_nents); + + req->rq_mtt_cfg = rq_mem->page_offset; + req->rq_mtt_cfg |= + FIELD_PREP(ERDMA_CMD_CREATE_QP_MTT_CNT_MASK, rq_mem->mtt_nents); + + assemble_qbuf_mtt_for_cmd(sq_mem, &req->sq_mtt_cfg, &req->sq_buf_addr, + req->sq_mtt_entry); + assemble_qbuf_mtt_for_cmd(rq_mem, &req->rq_mtt_cfg, &req->rq_buf_addr, + req->rq_mtt_entry); +} + +static int create_qp_cmd(struct erdma_dev *dev, struct erdma_qp *qp, + bool is_user) { - struct erdma_dev *dev = to_edev(qp->ibqp.device); struct erdma_pd *pd = to_epd(qp->ibqp.pd); struct erdma_cmdq_create_qp_req req; - struct erdma_uqp *user_qp; u64 resp0, resp1; int err; @@ -55,85 +89,31 @@ static int create_qp_cmd(struct erdma_ucontext *uctx, struct erdma_qp *qp) ilog2(qp->attrs.rq_size)) | FIELD_PREP(ERDMA_CMD_CREATE_QP_PD_MASK, pd->pdn); - if (rdma_is_kernel_res(&qp->ibqp.res)) { - u32 pgsz_range = ilog2(SZ_1M) - ERDMA_HW_PAGE_SHIFT; - - req.sq_cqn_mtt_cfg = - FIELD_PREP(ERDMA_CMD_CREATE_QP_PAGE_SIZE_MASK, - pgsz_range) | - FIELD_PREP(ERDMA_CMD_CREATE_QP_CQN_MASK, qp->scq->cqn); - req.rq_cqn_mtt_cfg = - FIELD_PREP(ERDMA_CMD_CREATE_QP_PAGE_SIZE_MASK, - pgsz_range) | - FIELD_PREP(ERDMA_CMD_CREATE_QP_CQN_MASK, qp->rcq->cqn); - - req.sq_mtt_cfg = - FIELD_PREP(ERDMA_CMD_CREATE_QP_PAGE_OFFSET_MASK, 0) | - FIELD_PREP(ERDMA_CMD_CREATE_QP_MTT_CNT_MASK, 1) | - FIELD_PREP(ERDMA_CMD_CREATE_QP_MTT_LEVEL_MASK, - ERDMA_MR_MTT_0LEVEL); - req.rq_mtt_cfg = req.sq_mtt_cfg; - - req.rq_buf_addr = qp->kern_qp.rq_buf_dma_addr; - req.sq_buf_addr = qp->kern_qp.sq_buf_dma_addr; - req.sq_db_info_dma_addr = qp->kern_qp.sq_buf_dma_addr + - (qp->attrs.sq_size << SQEBB_SHIFT); - req.rq_db_info_dma_addr = qp->kern_qp.rq_buf_dma_addr + - (qp->attrs.rq_size << RQE_SHIFT); - } else { - user_qp = &qp->user_qp; - req.sq_cqn_mtt_cfg = FIELD_PREP( - ERDMA_CMD_CREATE_QP_PAGE_SIZE_MASK, - ilog2(user_qp->sq_mem.page_size) - ERDMA_HW_PAGE_SHIFT); - req.sq_cqn_mtt_cfg |= - FIELD_PREP(ERDMA_CMD_CREATE_QP_CQN_MASK, qp->scq->cqn); - - req.rq_cqn_mtt_cfg = FIELD_PREP( - ERDMA_CMD_CREATE_QP_PAGE_SIZE_MASK, - ilog2(user_qp->rq_mem.page_size) - ERDMA_HW_PAGE_SHIFT); - req.rq_cqn_mtt_cfg |= - FIELD_PREP(ERDMA_CMD_CREATE_QP_CQN_MASK, qp->rcq->cqn); - - req.sq_mtt_cfg = user_qp->sq_mem.page_offset; - req.sq_mtt_cfg |= FIELD_PREP(ERDMA_CMD_CREATE_QP_MTT_CNT_MASK, - user_qp->sq_mem.mtt_nents); - - req.rq_mtt_cfg = user_qp->rq_mem.page_offset; - req.rq_mtt_cfg |= FIELD_PREP(ERDMA_CMD_CREATE_QP_MTT_CNT_MASK, - user_qp->rq_mem.mtt_nents); - - assemble_qbuf_mtt_for_cmd(&user_qp->sq_mem, &req.sq_mtt_cfg, - &req.sq_buf_addr, req.sq_mtt_entry); - assemble_qbuf_mtt_for_cmd(&user_qp->rq_mem, &req.rq_mtt_cfg, - &req.rq_buf_addr, req.rq_mtt_entry); - - req.sq_db_info_dma_addr = user_qp->sq_db_info_dma_addr; - req.rq_db_info_dma_addr = user_qp->rq_db_info_dma_addr; - - if (uctx->ext_db.enable) { - req.sq_cqn_mtt_cfg |= - FIELD_PREP(ERDMA_CMD_CREATE_QP_DB_CFG_MASK, 1); - req.db_cfg = - FIELD_PREP(ERDMA_CMD_CREATE_QP_SQDB_CFG_MASK, - uctx->ext_db.sdb_off) | - FIELD_PREP(ERDMA_CMD_CREATE_QP_RQDB_CFG_MASK, - uctx->ext_db.rdb_off); - } - } + create_qp_mtt_cfg(&req, + is_user ? &qp->user_qp.sq_mem : &qp->kern_qp.sq_mem, + is_user ? &qp->user_qp.rq_mem : &qp->kern_qp.rq_mem, + qp->scq->cqn, qp->rcq->cqn); + + req.sq_dbrec_dma = is_user ? qp->user_qp.sq_dbrec_dma : + qp->kern_qp.sq_dbrec_dma; + req.rq_dbrec_dma = is_user ? qp->user_qp.rq_dbrec_dma : + qp->kern_qp.rq_dbrec_dma; err = erdma_post_cmd_wait(&dev->cmdq, &req, sizeof(req), &resp0, &resp1); - if (!err) - qp->attrs.cookie = - FIELD_GET(ERDMA_CMDQ_CREATE_QP_RESP_COOKIE_MASK, resp0); + if (err) + return err; - return err; + qp->attrs.cookie = + FIELD_GET(ERDMA_CMDQ_CREATE_QP_RESP_COOKIE_MASK, resp0); + + return 0; } static int regmr_cmd(struct erdma_dev *dev, struct erdma_mr *mr) { struct erdma_pd *pd = to_epd(mr->ibmr.pd); - u32 mtt_level = ERDMA_MR_MTT_0LEVEL; + u32 mtt_type = ERDMA_MR_INLINE_MTT; struct erdma_cmdq_reg_mr_req req; erdma_cmdq_build_reqhdr(&req.hdr, CMDQ_SUBMOD_RDMA, CMDQ_OPCODE_REG_MR); @@ -142,10 +122,10 @@ static int regmr_cmd(struct erdma_dev *dev, struct erdma_mr *mr) mr->mem.page_cnt > ERDMA_MAX_INLINE_MTT_ENTRIES) { if (mr->mem.mtt->continuous) { req.phy_addr[0] = mr->mem.mtt->buf_dma; - mtt_level = ERDMA_MR_MTT_1LEVEL; + mtt_type = ERDMA_MR_INDIRECT_MTT; } else { - req.phy_addr[0] = sg_dma_address(mr->mem.mtt->sglist); - mtt_level = mr->mem.mtt->level; + req.phy_addr[0] = mr->mem.mtt->dma_addrs[0]; + mtt_type = mr->mem.mtt->level; } } else if (mr->type != ERDMA_MR_TYPE_DMA) { memcpy(req.phy_addr, mr->mem.mtt->buf, @@ -160,8 +140,10 @@ static int regmr_cmd(struct erdma_dev *dev, struct erdma_mr *mr) FIELD_PREP(ERDMA_CMD_REGMR_RIGHT_MASK, mr->access); req.cfg2 = FIELD_PREP(ERDMA_CMD_REGMR_PAGESIZE_MASK, ilog2(mr->mem.page_size)) | - FIELD_PREP(ERDMA_CMD_REGMR_MTT_LEVEL_MASK, mtt_level) | + FIELD_PREP(ERDMA_CMD_REGMR_MTT_TYPE_MASK, mtt_type) | FIELD_PREP(ERDMA_CMD_REGMR_MTT_CNT_MASK, mr->mem.page_cnt); + /* Clear this field because hardware will check it. */ + req.size = 0; if (mr->type == ERDMA_MR_TYPE_DMA) goto post_cmd; @@ -173,22 +155,27 @@ static int regmr_cmd(struct erdma_dev *dev, struct erdma_mr *mr) if (!mr->mem.mtt->continuous && mr->mem.mtt->level > 1) { req.cfg0 |= FIELD_PREP(ERDMA_CMD_MR_VERSION_MASK, 1); - req.cfg2 |= FIELD_PREP(ERDMA_CMD_REGMR_MTT_PAGESIZE_MASK, + req.cfg2 |= FIELD_PREP(ERDMA_CMD_REGMR_PBL_PAGESIZE_MASK, PAGE_SHIFT - ERDMA_HW_PAGE_SHIFT); req.size_h = upper_32_bits(mr->mem.len); req.mtt_cnt_h = mr->mem.page_cnt >> 20; + ibdev_dbg(&dev->ibdev, + "cfg0 %x, cfg2 %x, size_h %u, mtt_cmt_h %u\n", + req.cfg0, req.cfg2, req.size_h, req.mtt_cnt_h); + ibdev_dbg(&dev->ibdev, "mtt_0_level: 0x%llx\n", + req.phy_addr[0]); } post_cmd: return erdma_post_cmd_wait(&dev->cmdq, &req, sizeof(req), NULL, NULL); } -static int create_cq_cmd(struct erdma_ucontext *uctx, struct erdma_cq *cq) +static int create_cq_cmd(struct erdma_dev *dev, struct erdma_cq *cq, + bool is_user) { - struct erdma_dev *dev = to_edev(cq->ibcq.device); struct erdma_cmdq_create_cq_req req; - struct erdma_mem *mem; - u32 page_size; + struct erdma_mem *mtt = is_user ? &cq->user_cq.qbuf_mtt : + &cq->kern_cq.qbuf_mtt; erdma_cmdq_build_reqhdr(&req.hdr, CMDQ_SUBMOD_RDMA, CMDQ_OPCODE_CREATE_CQ); @@ -197,51 +184,27 @@ static int create_cq_cmd(struct erdma_ucontext *uctx, struct erdma_cq *cq) FIELD_PREP(ERDMA_CMD_CREATE_CQ_DEPTH_MASK, ilog2(cq->depth)); req.cfg1 = FIELD_PREP(ERDMA_CMD_CREATE_CQ_EQN_MASK, cq->assoc_eqn); - if (rdma_is_kernel_res(&cq->ibcq.res)) { - page_size = SZ_32M; - req.cfg0 |= FIELD_PREP(ERDMA_CMD_CREATE_CQ_PAGESIZE_MASK, - ilog2(page_size) - ERDMA_HW_PAGE_SHIFT); - req.qbuf_addr_l = lower_32_bits(cq->kern_cq.qbuf_dma_addr); - req.qbuf_addr_h = upper_32_bits(cq->kern_cq.qbuf_dma_addr); - - req.cfg1 |= FIELD_PREP(ERDMA_CMD_CREATE_CQ_MTT_CNT_MASK, 1) | - FIELD_PREP(ERDMA_CMD_CREATE_CQ_MTT_LEVEL_MASK, - ERDMA_MR_MTT_0LEVEL); - - req.first_page_offset = 0; - req.cq_db_info_addr = - cq->kern_cq.qbuf_dma_addr + (cq->depth << CQE_SHIFT); + req.cfg0 |= FIELD_PREP(ERDMA_CMD_CREATE_CQ_PAGESIZE_MASK, + ilog2(mtt->page_size) - ERDMA_HW_PAGE_SHIFT); + if (mtt->mtt_nents == 1) { + req.qbuf_addr_l = + lower_32_bits(((u64 *)(uintptr_t)mtt->mtt->buf)[0]); + req.qbuf_addr_h = + upper_32_bits(((u64 *)(uintptr_t)mtt->mtt->buf)[0]); + req.cfg1 |= FIELD_PREP(ERDMA_CMD_CREATE_CQ_MTT_TYPE_MASK, + ERDMA_MR_INLINE_MTT); } else { - mem = &cq->user_cq.qbuf_mem; - req.cfg0 |= - FIELD_PREP(ERDMA_CMD_CREATE_CQ_PAGESIZE_MASK, - ilog2(mem->page_size) - ERDMA_HW_PAGE_SHIFT); - if (mem->mtt_nents == 1) { - req.qbuf_addr_l = lower_32_bits(mem->mtt->buf[0]); - req.qbuf_addr_h = upper_32_bits(mem->mtt->buf[0]); - req.cfg1 |= - FIELD_PREP(ERDMA_CMD_CREATE_CQ_MTT_LEVEL_MASK, - ERDMA_MR_MTT_0LEVEL); - } else { - req.qbuf_addr_l = lower_32_bits(mem->mtt->buf_dma); - req.qbuf_addr_h = upper_32_bits(mem->mtt->buf_dma); - req.cfg1 |= - FIELD_PREP(ERDMA_CMD_CREATE_CQ_MTT_LEVEL_MASK, - ERDMA_MR_MTT_1LEVEL); - } - req.cfg1 |= FIELD_PREP(ERDMA_CMD_CREATE_CQ_MTT_CNT_MASK, - mem->mtt_nents); - - req.first_page_offset = mem->page_offset; - req.cq_db_info_addr = cq->user_cq.db_info_dma_addr; - - if (uctx->ext_db.enable) { - req.cfg1 |= FIELD_PREP( - ERDMA_CMD_CREATE_CQ_MTT_DB_CFG_MASK, 1); - req.cfg2 = FIELD_PREP(ERDMA_CMD_CREATE_CQ_DB_CFG_MASK, - uctx->ext_db.cdb_off); - } + req.qbuf_addr_l = lower_32_bits(mtt->mtt->buf_dma); + req.qbuf_addr_h = upper_32_bits(mtt->mtt->buf_dma); + req.cfg1 |= FIELD_PREP(ERDMA_CMD_CREATE_CQ_MTT_TYPE_MASK, + ERDMA_MR_INDIRECT_MTT); } + req.cfg1 |= + FIELD_PREP(ERDMA_CMD_CREATE_CQ_MTT_CNT_MASK, mtt->mtt_nents); + + req.first_page_offset = mtt->page_offset; + req.cq_db_info_addr = is_user ? cq->user_cq.dbrec_dma : + cq->kern_cq.dbrec_dma; return erdma_post_cmd_wait(&dev->cmdq, &req, sizeof(req), NULL, NULL); } @@ -282,8 +245,8 @@ static inline void erdma_free_idx(struct erdma_resource_cb *res_cb, u32 idx) } static struct rdma_user_mmap_entry * -erdma_user_mmap_entry_insert(struct erdma_ucontext *uctx, void *address, - u32 size, u8 mmap_flag, u64 *mmap_offset) +erdma_user_mmap_entry_insert(struct ib_ucontext *uctx, u64 address, u32 size, + u8 mmap_flag, u64 *mmap_offset) { struct erdma_user_mmap_entry *entry = kzalloc(sizeof(*entry), GFP_KERNEL); @@ -297,8 +260,7 @@ erdma_user_mmap_entry_insert(struct erdma_ucontext *uctx, void *address, size = PAGE_ALIGN(size); - ret = rdma_user_mmap_entry_insert(&uctx->ibucontext, &entry->rdma_entry, - size); + ret = rdma_user_mmap_entry_insert(uctx, &entry->rdma_entry, size); if (ret) { kfree(entry); return NULL; @@ -339,19 +301,25 @@ int erdma_query_device(struct ib_device *ibdev, struct ib_device_attr *attr, attr->max_fast_reg_page_list_len = ERDMA_MAX_FRMR_PA; attr->page_size_cap = ERDMA_PAGE_SIZE_SUPPORT; - if (dev->attrs.cap_flags & ERDMA_DEV_CAP_FLAGS_ATOMIC) + if (dev->attrs.cap_flags & ERDMA_DEV_CAP_FLAGS_ATOMIC) { attr->atomic_cap = IB_ATOMIC_GLOB; + attr->masked_atomic_cap = IB_ATOMIC_GLOB; + } - attr->fw_ver = dev->attrs.fw_version; + attr->fw_ver = ((u64)(dev->attrs.fw_version >> 16) << 32) | + (((dev->attrs.fw_version >> 8) & 0xFF) << 16) | + ((dev->attrs.fw_version & 0xFF)); + read_lock(&dev->netdev_lock); if (dev->netdev) addrconf_addr_eui48((u8 *)&attr->sys_image_guid, dev->netdev->dev_addr); + read_unlock(&dev->netdev_lock); return 0; } -int erdma_query_gid(struct ib_device *ibdev, u32 port, int idx, +int erdma_query_gid(struct ib_device *ibdev, port_t port, int idx, union ib_gid *gid) { struct erdma_dev *dev = to_edev(ibdev); @@ -362,32 +330,27 @@ int erdma_query_gid(struct ib_device *ibdev, u32 port, int idx, return 0; } -int erdma_query_port(struct ib_device *ibdev, u32 port, +int erdma_query_port(struct ib_device *ibdev, port_t port, struct ib_port_attr *attr) { struct erdma_dev *dev = to_edev(ibdev); - struct net_device *ndev = dev->netdev; memset(attr, 0, sizeof(*attr)); - attr->gid_tbl_len = 1; - attr->port_cap_flags = IB_PORT_CM_SUP | IB_PORT_DEVICE_MGMT_SUP; - attr->max_msg_sz = -1; - - if (!ndev) - goto out; + attr->state = dev->port_state; + attr->active_speed = IB_SPEED_EDR; + attr->active_width = IB_WIDTH_4X; + attr->max_mtu = ib_mtu_int_to_enum(dev->mtu); + attr->active_mtu = ib_mtu_int_to_enum(dev->mtu); - ib_get_eth_speed(ibdev, port, &attr->active_speed, &attr->active_width); - attr->max_mtu = ib_mtu_int_to_enum(ndev->mtu); - attr->active_mtu = ib_mtu_int_to_enum(ndev->mtu); - if (netif_running(ndev) && netif_carrier_ok(ndev)) - dev->state = IB_PORT_ACTIVE; + if (compat_mode) + attr->gid_tbl_len = 16; else - dev->state = IB_PORT_DOWN; - attr->state = dev->state; - -out: - if (dev->state == IB_PORT_ACTIVE) + attr->gid_tbl_len = 1; + attr->pkey_tbl_len = 1; + attr->port_cap_flags = IB_PORT_CM_SUP | IB_PORT_DEVICE_MGMT_SUP; + attr->max_msg_sz = -1; + if (dev->port_state == IB_PORT_ACTIVE) attr->phys_state = IB_PORT_PHYS_STATE_LINK_UP; else attr->phys_state = IB_PORT_PHYS_STATE_DISABLED; @@ -395,11 +358,19 @@ int erdma_query_port(struct ib_device *ibdev, u32 port, return 0; } -int erdma_get_port_immutable(struct ib_device *ibdev, u32 port, +int erdma_get_port_immutable(struct ib_device *ibdev, port_t port, struct ib_port_immutable *port_immutable) { - port_immutable->gid_tbl_len = 1; - port_immutable->core_cap_flags = RDMA_CORE_PORT_IWARP; + if (compat_mode) { + port_immutable->gid_tbl_len = 16; + port_immutable->core_cap_flags = + RDMA_CORE_PORT_IBA_ROCE_UDP_ENCAP; + port_immutable->max_mad_size = IB_MGMT_MAD_SIZE; + port_immutable->pkey_tbl_len = 1; + } else { + port_immutable->gid_tbl_len = 1; + port_immutable->core_cap_flags = RDMA_CORE_PORT_IWARP; + } return 0; } @@ -410,9 +381,13 @@ int erdma_alloc_pd(struct ib_pd *ibpd, struct ib_udata *udata) struct erdma_dev *dev = to_edev(ibpd->device); int pdn; + ERDMA_INC_CNT(dev, CMD_ALLOC_PD); + pdn = erdma_alloc_idx(&dev->res_cb[ERDMA_RES_TYPE_PD]); - if (pdn < 0) + if (pdn < 0) { + ERDMA_INC_CNT(dev, CMD_ALLOC_PD_FAILED); return pdn; + } pd->pdn = pdn; @@ -421,11 +396,15 @@ int erdma_alloc_pd(struct ib_pd *ibpd, struct ib_udata *udata) int erdma_dealloc_pd(struct ib_pd *ibpd, struct ib_udata *udata) { - struct erdma_pd *pd = to_epd(ibpd); struct erdma_dev *dev = to_edev(ibpd->device); + struct erdma_pd *pd = to_epd(ibpd); - erdma_free_idx(&dev->res_cb[ERDMA_RES_TYPE_PD], pd->pdn); + ERDMA_INC_CNT(dev, CMD_DEALLOC_PD); + + if (compat_mode && pd->sw_pd) + detach_sw_pd(pd); + erdma_free_idx(&dev->res_cb[ERDMA_RES_TYPE_PD], pd->pdn); return 0; } @@ -447,6 +426,13 @@ static void erdma_flush_worker(struct work_struct *work) static int erdma_qp_validate_cap(struct erdma_dev *dev, struct ib_qp_init_attr *attrs) { + ibdev_dbg( + &dev->ibdev, + "create_qp_cap:send_wr(%u),recv_wr(%u),send_sge(%u),recv_sge(%u),inline(%u)\n", + attrs->cap.max_send_wr, attrs->cap.max_recv_wr, + attrs->cap.max_send_sge, attrs->cap.max_recv_sge, + attrs->cap.max_inline_data); + if ((attrs->cap.max_send_wr > dev->attrs.max_send_wr) || (attrs->cap.max_recv_wr > dev->attrs.max_recv_wr) || (attrs->cap.max_send_sge > dev->attrs.max_send_sge) || @@ -474,87 +460,49 @@ static int erdma_qp_validate_attr(struct erdma_dev *dev, return 0; } -static void free_kernel_qp(struct erdma_qp *qp) +static int update_kernel_qp_oob_attr(struct erdma_qp *qp, + struct ib_qp_attr *attr, int attr_mask) { - struct erdma_dev *dev = qp->dev; - - vfree(qp->kern_qp.swr_tbl); - vfree(qp->kern_qp.rwr_tbl); - - if (qp->kern_qp.sq_buf) - dma_free_coherent( - &dev->pdev->dev, - WARPPED_BUFSIZE(qp->attrs.sq_size << SQEBB_SHIFT), - qp->kern_qp.sq_buf, qp->kern_qp.sq_buf_dma_addr); - - if (qp->kern_qp.rq_buf) - dma_free_coherent( - &dev->pdev->dev, - WARPPED_BUFSIZE(qp->attrs.rq_size << RQE_SHIFT), - qp->kern_qp.rq_buf, qp->kern_qp.rq_buf_dma_addr); -} - -static int init_kernel_qp(struct erdma_dev *dev, struct erdma_qp *qp, - struct ib_qp_init_attr *attrs) -{ - struct erdma_kqp *kqp = &qp->kern_qp; - int size; - - if (attrs->sq_sig_type == IB_SIGNAL_ALL_WR) - kqp->sig_all = 1; - - kqp->sq_pi = 0; - kqp->sq_ci = 0; - kqp->rq_pi = 0; - kqp->rq_ci = 0; - kqp->hw_sq_db = - dev->func_bar + (ERDMA_SDB_SHARED_PAGE_INDEX << PAGE_SHIFT); - kqp->hw_rq_db = dev->func_bar + ERDMA_BAR_RQDB_SPACE_OFFSET; - - kqp->swr_tbl = vmalloc_array(qp->attrs.sq_size, sizeof(u64)); - kqp->rwr_tbl = vmalloc_array(qp->attrs.rq_size, sizeof(u64)); - if (!kqp->swr_tbl || !kqp->rwr_tbl) - goto err_out; + struct iw_ext_conn_param *param = + (struct iw_ext_conn_param *)(qp->ibqp.qp_context); - size = (qp->attrs.sq_size << SQEBB_SHIFT) + ERDMA_EXTRA_BUFFER_SIZE; - kqp->sq_buf = dma_alloc_coherent(&dev->pdev->dev, size, - &kqp->sq_buf_dma_addr, GFP_KERNEL); - if (!kqp->sq_buf) - goto err_out; + if (!qp->attrs.connect_without_cm) + return -EINVAL; - size = (qp->attrs.rq_size << RQE_SHIFT) + ERDMA_EXTRA_BUFFER_SIZE; - kqp->rq_buf = dma_alloc_coherent(&dev->pdev->dev, size, - &kqp->rq_buf_dma_addr, GFP_KERNEL); - if (!kqp->rq_buf) - goto err_out; + if (param == NULL) + return -EINVAL; - kqp->sq_db_info = kqp->sq_buf + (qp->attrs.sq_size << SQEBB_SHIFT); - kqp->rq_db_info = kqp->rq_buf + (qp->attrs.rq_size << RQE_SHIFT); + if (attr_mask & IB_QP_DEST_QPN) + qp->attrs.remote_qp_num = attr->dest_qp_num; + + if (param->sk_addr.family == AF_INET) { + ((struct sockaddr_in *)&qp->attrs.raddr)->sin_family = AF_INET; + ((struct sockaddr_in *)&qp->attrs.laddr)->sin_family = AF_INET; + qp->attrs.raddr.in.sin_addr.s_addr = param->sk_addr.daddr_v4; + qp->attrs.laddr.in.sin_addr.s_addr = param->sk_addr.saddr_v4; + } else if (param->sk_addr.family == AF_INET6) { + ((struct sockaddr_in6 *)&qp->attrs.raddr)->sin6_family = + AF_INET6; + ((struct sockaddr_in6 *)&qp->attrs.laddr)->sin6_family = + AF_INET6; + memcpy(&qp->attrs.raddr.in6.sin6_addr, ¶m->sk_addr.daddr_v6, + sizeof(struct in6_addr)); + memcpy(&qp->attrs.laddr.in6.sin6_addr, ¶m->sk_addr.saddr_v6, + sizeof(struct in6_addr)); + } else { + return -EINVAL; + } + qp->attrs.dport = ntohs(param->sk_addr.dport); + qp->attrs.sport = param->sk_addr.sport; return 0; - -err_out: - free_kernel_qp(qp); - return -ENOMEM; -} - -static void erdma_fill_bottom_mtt(struct erdma_dev *dev, struct erdma_mem *mem) -{ - struct erdma_mtt *mtt = mem->mtt; - struct ib_block_iter biter; - u32 idx = 0; - - while (mtt->low_level) - mtt = mtt->low_level; - - rdma_umem_for_each_dma_block(mem->umem, &biter, mem->page_size) - mtt->buf[idx++] = rdma_block_iter_dma_address(&biter); } static struct erdma_mtt *erdma_create_cont_mtt(struct erdma_dev *dev, size_t size) { struct erdma_mtt *mtt; + int ret = -ENOMEM; mtt = kzalloc(sizeof(*mtt), GFP_KERNEL); if (!mtt) @@ -571,6 +519,9 @@ static struct erdma_mtt *erdma_create_cont_mtt(struct erdma_dev *dev, if (dma_mapping_error(&dev->pdev->dev, mtt->buf_dma)) goto err_free_mtt_buf; + ibdev_dbg(&dev->ibdev, "map buffer: va:%p, pa:%llx, size:%lu\n", + mtt->buf, mtt->buf_dma, mtt->size); + return mtt; err_free_mtt_buf: @@ -579,71 +530,87 @@ static struct erdma_mtt *erdma_create_cont_mtt(struct erdma_dev *dev, err_free_mtt: kfree(mtt); - return ERR_PTR(-ENOMEM); -} - -static void erdma_destroy_mtt_buf_sg(struct erdma_dev *dev, - struct erdma_mtt *mtt) -{ - dma_unmap_sg(&dev->pdev->dev, mtt->sglist, mtt->nsg, DMA_TO_DEVICE); - vfree(mtt->sglist); + return ERR_PTR(ret); } -static void erdma_destroy_scatter_mtt(struct erdma_dev *dev, - struct erdma_mtt *mtt) +static u32 range_num_blocks(u64 start, u64 len, u64 blk_sz) { - erdma_destroy_mtt_buf_sg(dev, mtt); - vfree(mtt->buf); - kfree(mtt); + return (ALIGN(start + len, blk_sz) - ALIGN_DOWN(start, blk_sz)) / + blk_sz; } -static void erdma_init_middle_mtt(struct erdma_mtt *mtt, - struct erdma_mtt *low_mtt) +static void erdma_unmap_page_list(struct erdma_dev *dev, dma_addr_t *pg_dma, + int npages) { - struct scatterlist *sg; - u32 idx = 0, i; + int i; - for_each_sg(low_mtt->sglist, sg, low_mtt->nsg, i) - mtt->buf[idx++] = sg_dma_address(sg); + for (i = 0; i < npages; i++) + dma_unmap_page(&dev->pdev->dev, pg_dma[i], PAGE_SIZE, + DMA_BIDIRECTIONAL); } -static int erdma_create_mtt_buf_sg(struct erdma_dev *dev, struct erdma_mtt *mtt) +static u32 vmalloc_to_dma_addrs(struct erdma_dev *dev, dma_addr_t **dma_addrs, + void *buf, u64 len) { - struct scatterlist *sglist; - void *buf = mtt->buf; - u32 npages, i, nsg; + dma_addr_t *pg_dma; struct page *pg; + u32 npages, i; + void *addr; - /* Failed if buf is not page aligned */ - if ((uintptr_t)buf & ~PAGE_MASK) - return -EINVAL; - - npages = DIV_ROUND_UP(mtt->size, PAGE_SIZE); - sglist = vzalloc(npages * sizeof(*sglist)); - if (!sglist) - return -ENOMEM; + npages = range_num_blocks((u64)buf, len, PAGE_SIZE); + pg_dma = vzalloc(npages * sizeof(dma_addr_t)); + if (!pg_dma) + return 0; - sg_init_table(sglist, npages); + addr = buf; for (i = 0; i < npages; i++) { - pg = vmalloc_to_page(buf); + pg = vmalloc_to_page(addr); if (!pg) goto err; - sg_set_page(&sglist[i], pg, PAGE_SIZE, 0); - buf += PAGE_SIZE; + + pg_dma[i] = dma_map_page(&dev->pdev->dev, pg, 0, PAGE_SIZE, + DMA_BIDIRECTIONAL); + if (dma_mapping_error(&dev->pdev->dev, pg_dma[i])) + goto err; + + addr += PAGE_SIZE; } - nsg = dma_map_sg(&dev->pdev->dev, sglist, npages, DMA_TO_DEVICE); - if (!nsg) - goto err; + *dma_addrs = pg_dma; - mtt->sglist = sglist; - mtt->nsg = nsg; + return npages; +err: + erdma_unmap_page_list(dev, pg_dma, i); + vfree(pg_dma); return 0; -err: - vfree(sglist); +} + +static int erdma_create_mtt_buf_dma_addrs(struct erdma_dev *dev, + struct erdma_mtt *mtt) +{ + dma_addr_t *addrs; + u32 npages; - return -ENOMEM; + /* Failed if buf is not page aligned */ + if ((uintptr_t)mtt->buf & ~PAGE_MASK) + return -EINVAL; + + npages = vmalloc_to_dma_addrs(dev, &addrs, mtt->buf, mtt->size); + if (!npages) + return -ENOMEM; + + mtt->dma_addrs = addrs; + mtt->npages = npages; + + return 0; +} + +static void erdma_destroy_mtt_buf_dma_addrs(struct erdma_dev *dev, + struct erdma_mtt *mtt) +{ + erdma_unmap_page_list(dev, mtt->dma_addrs, mtt->npages); + vfree(mtt->dma_addrs); } static struct erdma_mtt *erdma_create_scatter_mtt(struct erdma_dev *dev, @@ -662,12 +629,12 @@ static struct erdma_mtt *erdma_create_scatter_mtt(struct erdma_dev *dev, if (!mtt->buf) goto err_free_mtt; - ret = erdma_create_mtt_buf_sg(dev, mtt); + ret = erdma_create_mtt_buf_dma_addrs(dev, mtt); if (ret) goto err_free_mtt_buf; - ibdev_dbg(&dev->ibdev, "create scatter mtt, size:%lu, nsg:%u\n", - mtt->size, mtt->nsg); + ibdev_dbg(&dev->ibdev, "create scatter mtt, size:%lu, npages:%u\n", + mtt->size, mtt->npages); return mtt; @@ -680,6 +647,24 @@ static struct erdma_mtt *erdma_create_scatter_mtt(struct erdma_dev *dev, return ERR_PTR(ret); } +static void erdma_destroy_scatter_mtt(struct erdma_dev *dev, + struct erdma_mtt *mtt) +{ + erdma_destroy_mtt_buf_dma_addrs(dev, mtt); + vfree(mtt->buf); + kfree(mtt); +} + +static void erdma_init_middle_mtt(struct erdma_mtt *mtt, + struct erdma_mtt *next_mtt) +{ + dma_addr_t *pg_addr = mtt->buf; + u32 i; + + for (i = 0; i < next_mtt->npages; i++) + pg_addr[i] = next_mtt->dma_addrs[i]; +} + static struct erdma_mtt *erdma_create_mtt(struct erdma_dev *dev, size_t size, bool force_continuous) { @@ -701,8 +686,8 @@ static struct erdma_mtt *erdma_create_mtt(struct erdma_dev *dev, size_t size, level = 1; /* convergence the mtt table. */ - while (mtt->nsg != 1 && level <= 3) { - tmp_mtt = erdma_create_scatter_mtt(dev, MTT_SIZE(mtt->nsg)); + while (mtt->npages != 1 && level <= 3) { + tmp_mtt = erdma_create_scatter_mtt(dev, MTT_SIZE(mtt->npages)); if (IS_ERR(tmp_mtt)) { ret = PTR_ERR(tmp_mtt); goto err_free_mtt; @@ -720,9 +705,10 @@ static struct erdma_mtt *erdma_create_mtt(struct erdma_dev *dev, size_t size, mtt->level = level; ibdev_dbg(&dev->ibdev, "top mtt: level:%d, dma_addr 0x%llx\n", - mtt->level, mtt->sglist[0].dma_address); + mtt->level, mtt->dma_addrs[0]); return mtt; + err_free_mtt: while (mtt) { tmp_mtt = mtt->low_level; @@ -733,6 +719,31 @@ static struct erdma_mtt *erdma_create_mtt(struct erdma_dev *dev, size_t size, return ERR_PTR(ret); } +static void erdma_init_mtt_leaf(struct erdma_mem *mem, struct erdma_mtt *mtt) +{ + dma_addr_t *page_list = mtt->buf; + struct ib_block_iter biter; + u32 idx = 0; + + if (mem->type == ERDMA_UMEM) { + rdma_umem_for_each_dma_block(mem->umem, &biter, mem->page_size) + page_list[idx++] = rdma_block_iter_dma_address(&biter); + } else { + for (; idx < mem->kmem->npages; idx++) + page_list[idx] = mem->kmem->dma_addrs[idx]; + } +} + +static void erdma_init_bottom_mtt(struct erdma_dev *dev, struct erdma_mem *mem) +{ + struct erdma_mtt *mtt = mem->mtt; + + while (mtt->low_level) + mtt = mtt->low_level; + + erdma_init_mtt_leaf(mem, mtt); +} + static void erdma_destroy_mtt(struct erdma_dev *dev, struct erdma_mtt *mtt) { struct erdma_mtt *tmp_mtt; @@ -751,57 +762,224 @@ static void erdma_destroy_mtt(struct erdma_dev *dev, struct erdma_mtt *mtt) } } -static int get_mtt_entries(struct erdma_dev *dev, struct erdma_mem *mem, - u64 start, u64 len, int access, u64 virt, - unsigned long req_page_size, bool force_continuous) +static void erdma_mem_free(struct erdma_dev *dev, struct erdma_mem *mem) { - int ret = 0; + switch (mem->type) { + case ERDMA_UMEM: + if (mem->umem) { + ib_umem_release(mem->umem); + mem->umem = NULL; + } + break; + case ERDMA_KMEM: + if (mem->kmem) { + if (mem->kmem->dma_addrs) { + erdma_unmap_page_list(dev, mem->kmem->dma_addrs, + mem->kmem->npages); + vfree(mem->kmem->dma_addrs); + mem->kmem->dma_addrs = NULL; + } + kfree(mem->kmem); + mem->kmem = NULL; + } + break; + default: + break; + } +} - mem->umem = ib_umem_get(&dev->ibdev, start, len, access); - if (IS_ERR(mem->umem)) { - ret = PTR_ERR(mem->umem); - mem->umem = NULL; - return ret; +static int get_mtt_entries(void *data, struct erdma_ucontext *ctx, + struct erdma_mem *mem, u64 start, u64 len, + int access, u64 virt, unsigned long req_page_size, + bool is_mr) +{ + struct erdma_dev *dev = ctx ? to_edev(ctx->ibucontext.device) : + (struct erdma_dev *)data; + bool is_user = ctx ? true : false; + int ret; + + if (is_user) { + mem->type = ERDMA_UMEM; + mem->umem = ib_umem_get(&dev->ibdev, start, len, access); + if (IS_ERR(mem->umem)) { + ret = PTR_ERR(mem->umem); + mem->umem = NULL; + return ret; + } + + mem->page_size = + ib_umem_find_best_pgsz(mem->umem, req_page_size, virt); + mem->mtt_nents = + ib_umem_num_dma_blocks(mem->umem, mem->page_size); + + } else { + mem->type = ERDMA_KMEM; + mem->kmem = kzalloc(sizeof(struct erdma_kmem), GFP_KERNEL); + if (!mem->kmem) + return -ENOMEM; + + mem->kmem->npages = + vmalloc_to_dma_addrs(dev, &mem->kmem->dma_addrs, + (void *)(uintptr_t)start, len); + if (!mem->kmem->npages) { + kfree(mem->kmem); + mem->kmem = NULL; + return -ENOMEM; + } + + mem->page_size = PAGE_SIZE; + mem->mtt_nents = mem->kmem->npages; } mem->va = virt; mem->len = len; - mem->page_size = ib_umem_find_best_pgsz(mem->umem, req_page_size, virt); mem->page_offset = start & (mem->page_size - 1); - mem->mtt_nents = ib_umem_num_dma_blocks(mem->umem, mem->page_size); mem->page_cnt = mem->mtt_nents; - mem->mtt = erdma_create_mtt(dev, MTT_SIZE(mem->page_cnt), - force_continuous); + + ibdev_dbg(&dev->ibdev, "page_size:%u, page_offset:%u, mtt_nents:%u\n", + mem->page_size, mem->page_offset, mem->page_cnt); + + mem->mtt = erdma_create_mtt(dev, MTT_SIZE(mem->page_cnt), !is_mr); if (IS_ERR(mem->mtt)) { ret = PTR_ERR(mem->mtt); goto error_ret; } - erdma_fill_bottom_mtt(dev, mem); + erdma_init_bottom_mtt(dev, mem); return 0; error_ret: - if (mem->umem) { - ib_umem_release(mem->umem); - mem->umem = NULL; - } + erdma_mem_free(dev, mem); return ret; } static void put_mtt_entries(struct erdma_dev *dev, struct erdma_mem *mem) { - if (mem->mtt) + if (mem->mtt) { erdma_destroy_mtt(dev, mem->mtt); + mem->mtt = NULL; + } + + erdma_mem_free(dev, mem); +} - if (mem->umem) { - ib_umem_release(mem->umem); - mem->umem = NULL; +static void free_kernel_qp(struct erdma_qp *qp) +{ + struct erdma_dev *dev = qp->dev; + + vfree(qp->kern_qp.swr_tbl); + vfree(qp->kern_qp.rwr_tbl); + + if (qp->kern_qp.sq_buf) { + put_mtt_entries(dev, &qp->kern_qp.sq_mem); + vfree(qp->kern_qp.sq_buf); + qp->kern_qp.sq_buf = NULL; } + + if (qp->kern_qp.rq_buf) { + put_mtt_entries(dev, &qp->kern_qp.rq_mem); + vfree(qp->kern_qp.rq_buf); + qp->kern_qp.rq_buf = NULL; + } + + if (qp->kern_qp.sq_dbrec) + dma_pool_free(dev->db_pool, qp->kern_qp.sq_dbrec, + qp->kern_qp.sq_dbrec_dma); + + if (qp->kern_qp.rq_dbrec) + dma_pool_free(dev->db_pool, qp->kern_qp.rq_dbrec, + qp->kern_qp.rq_dbrec_dma); +} + +static int init_kernel_qp(struct erdma_dev *dev, struct erdma_qp *qp, + struct ib_qp_init_attr *attrs) +{ + struct erdma_kqp *kqp = &qp->kern_qp; + int ret = -ENOMEM; + + if (attrs->sq_sig_type == IB_SIGNAL_ALL_WR) + kqp->sig_all = 1; + + kqp->sq_pi = 0; + kqp->sq_ci = 0; + kqp->rq_pi = 0; + kqp->rq_ci = 0; + kqp->hw_sq_db = dev->func_bar + + (ERDMA_SDB_SHARED_PAGE_INDEX << ERDMA_HW_PAGE_SHIFT); + kqp->hw_rq_db = dev->func_bar + ERDMA_BAR_RQDB_SPACE_OFFSET; + + kqp->swr_tbl = vmalloc(qp->attrs.sq_size * sizeof(u64)); + kqp->rwr_tbl = vmalloc(qp->attrs.rq_size * sizeof(u64)); + if (!kqp->swr_tbl || !kqp->rwr_tbl) + goto err_out; + + kqp->sq_buf = vmalloc(qp->attrs.sq_size << SQEBB_SHIFT); + if (!kqp->sq_buf) + goto err_out; + + ret = get_mtt_entries(dev, NULL, &kqp->sq_mem, (u64)kqp->sq_buf, + qp->attrs.sq_size << SQEBB_SHIFT, 0, + (u64)kqp->sq_buf, 0, false); + if (ret) + goto err_out; + + kqp->rq_buf = vmalloc(qp->attrs.rq_size << RQE_SHIFT); + if (!kqp->rq_buf) + goto err_out; + + ret = get_mtt_entries(dev, NULL, &kqp->rq_mem, (u64)kqp->rq_buf, + qp->attrs.rq_size << RQE_SHIFT, 0, + (u64)kqp->rq_buf, 0, false); + if (ret) + goto err_out; + + kqp->sq_dbrec = + dma_pool_zalloc(dev->db_pool, GFP_KERNEL, &kqp->sq_dbrec_dma); + if (!kqp->sq_dbrec) + goto err_out; + + kqp->rq_dbrec = + dma_pool_zalloc(dev->db_pool, GFP_KERNEL, &kqp->rq_dbrec_dma); + if (!kqp->rq_dbrec) + goto err_out; + + if (attrs->create_flags & IB_QP_CREATE_IWARP_WITHOUT_CM) { + struct iw_ext_conn_param *param = + (struct iw_ext_conn_param *)(attrs->qp_context); + + if (param == NULL) { + ret = -EINVAL; + goto err_out; + } + if (param->sk_addr.family != PF_INET) { + ibdev_err_ratelimited( + &dev->ibdev, + "IPv4 address is required for connection without CM.\n"); + ret = -EINVAL; + goto err_out; + } + qp->attrs.connect_without_cm = true; + ((struct sockaddr_in *)&qp->attrs.raddr)->sin_family = AF_INET; + ((struct sockaddr_in *)&qp->attrs.laddr)->sin_family = AF_INET; + qp->attrs.raddr.in.sin_addr.s_addr = param->sk_addr.daddr_v4; + qp->attrs.laddr.in.sin_addr.s_addr = param->sk_addr.saddr_v4; + qp->attrs.dport = ntohs(param->sk_addr.dport); + qp->attrs.sport = param->sk_addr.sport; + } + spin_lock_init(&kqp->sq_lock); + spin_lock_init(&kqp->rq_lock); + + return 0; + +err_out: + free_kernel_qp(qp); + return ret; } -static int erdma_map_user_dbrecords(struct erdma_ucontext *ctx, +static int erdma_map_user_dbrecords(struct ib_udata *udata, + struct erdma_ucontext *uctx, u64 dbrecords_va, struct erdma_user_dbrecords_page **dbr_page, dma_addr_t *dma_addr) @@ -809,9 +987,9 @@ static int erdma_map_user_dbrecords(struct erdma_ucontext *ctx, struct erdma_user_dbrecords_page *page = NULL; int rv = 0; - mutex_lock(&ctx->dbrecords_page_mutex); + mutex_lock(&uctx->dbrecords_page_mutex); - list_for_each_entry(page, &ctx->dbrecords_page_list, list) + list_for_each_entry(page, &uctx->dbrecords_page_list, list) if (page->va == (dbrecords_va & PAGE_MASK)) goto found; @@ -824,7 +1002,7 @@ static int erdma_map_user_dbrecords(struct erdma_ucontext *ctx, page->va = (dbrecords_va & PAGE_MASK); page->refcnt = 0; - page->umem = ib_umem_get(ctx->ibucontext.device, + page->umem = ib_umem_get(uctx->ibucontext.device, dbrecords_va & PAGE_MASK, PAGE_SIZE, 0); if (IS_ERR(page->umem)) { rv = PTR_ERR(page->umem); @@ -832,7 +1010,7 @@ static int erdma_map_user_dbrecords(struct erdma_ucontext *ctx, goto out; } - list_add(&page->list, &ctx->dbrecords_page_list); + list_add(&page->list, &uctx->dbrecords_page_list); found: *dma_addr = sg_dma_address(page->umem->sgt_append.sgt.sgl) + @@ -841,7 +1019,7 @@ static int erdma_map_user_dbrecords(struct erdma_ucontext *ctx, page->refcnt++; out: - mutex_unlock(&ctx->dbrecords_page_mutex); + mutex_unlock(&uctx->dbrecords_page_mutex); return rv; } @@ -863,10 +1041,11 @@ erdma_unmap_user_dbrecords(struct erdma_ucontext *ctx, mutex_unlock(&ctx->dbrecords_page_mutex); } -static int init_user_qp(struct erdma_qp *qp, struct erdma_ucontext *uctx, - u64 va, u32 len, u64 db_info_va) +static int init_user_qp(struct erdma_qp *qp, struct ib_udata *udata, + struct erdma_ucontext *uctx, u64 va, u32 len, + u64 db_info_va) { - dma_addr_t db_info_dma_addr; + dma_addr_t dbrec_dma; u32 rq_offset; int ret; @@ -874,29 +1053,28 @@ static int init_user_qp(struct erdma_qp *qp, struct erdma_ucontext *uctx, qp->attrs.rq_size * RQE_SIZE)) return -EINVAL; - ret = get_mtt_entries(qp->dev, &qp->user_qp.sq_mem, va, + ret = get_mtt_entries(udata, uctx, &qp->user_qp.sq_mem, va, qp->attrs.sq_size << SQEBB_SHIFT, 0, va, - (SZ_1M - SZ_4K), true); + (SZ_1M - SZ_4K), false); if (ret) return ret; rq_offset = ALIGN(qp->attrs.sq_size << SQEBB_SHIFT, ERDMA_HW_PAGE_SIZE); qp->user_qp.rq_offset = rq_offset; - ret = get_mtt_entries(qp->dev, &qp->user_qp.rq_mem, va + rq_offset, + ret = get_mtt_entries(udata, uctx, &qp->user_qp.rq_mem, va + rq_offset, qp->attrs.rq_size << RQE_SHIFT, 0, va + rq_offset, - (SZ_1M - SZ_4K), true); + (SZ_1M - SZ_4K), false); if (ret) goto put_sq_mtt; - ret = erdma_map_user_dbrecords(uctx, db_info_va, - &qp->user_qp.user_dbr_page, - &db_info_dma_addr); + ret = erdma_map_user_dbrecords(udata, uctx, db_info_va, + &qp->user_qp.user_dbr_page, &dbrec_dma); if (ret) goto put_rq_mtt; - qp->user_qp.sq_db_info_dma_addr = db_info_dma_addr; - qp->user_qp.rq_db_info_dma_addr = db_info_dma_addr + ERDMA_DB_SIZE; + qp->user_qp.sq_dbrec_dma = dbrec_dma; + qp->user_qp.rq_dbrec_dma = dbrec_dma + ERDMA_DB_SIZE; return 0; @@ -916,17 +1094,67 @@ static void free_user_qp(struct erdma_qp *qp, struct erdma_ucontext *uctx) erdma_unmap_user_dbrecords(uctx, &qp->user_qp.user_dbr_page); } +static inline int erdma_alloc_qpn(struct erdma_dev *dev, int *qpn, void *qp) +{ + int ret; + + ret = xa_alloc_cyclic(&dev->qp_xa, qpn, qp, + XA_LIMIT(1, dev->attrs.max_qp - 1), + &dev->next_alloc_qpn, GFP_KERNEL); + if (ret < 0) + return ret; + + return 0; +} + +static inline int erdma_set_qpn(struct erdma_dev *dev, u32 qpn, void *qp) +{ + void *entry; + int ret = 0; + + entry = xa_store(&dev->qp_xa, qpn, qp, GFP_KERNEL); + if (xa_is_err(entry)) + ret = xa_err(entry); + if (ret < 0) + return ret; + + return 0; +} + +static inline void erdma_clear_qpn(struct erdma_dev *dev, u32 qpn) +{ + xa_erase(&dev->qp_xa, qpn); +} + int erdma_create_qp(struct ib_qp *ibqp, struct ib_qp_init_attr *attrs, struct ib_udata *udata) { - struct erdma_qp *qp = to_eqp(ibqp); struct erdma_dev *dev = to_edev(ibqp->device); - struct erdma_ucontext *uctx = rdma_udata_to_drv_context( - udata, struct erdma_ucontext, ibucontext); - struct erdma_ureq_create_qp ureq; struct erdma_uresp_create_qp uresp; + struct erdma_qp *qp = to_eqp(ibqp); + struct erdma_ureq_create_qp ureq; + struct erdma_ucontext *uctx; int ret; + if (compat_mode && unlikely(attrs->qp_type == IB_QPT_GSI)) { + QP_ID(qp) = 1; + + ret = erdma_set_qpn(dev, QP_ID(qp), qp); + if (ret < 0) + return ret; + + ret = erdma_create_mad_qp(ibqp, attrs, udata); + if (ret) + erdma_clear_qpn(dev, QP_ID(qp)); + + return ret; + } + + uctx = rdma_udata_to_drv_context(udata, struct erdma_ucontext, + ibucontext); + + ERDMA_INC_CNT(dev, CMD_CREATE_QP); + ret = erdma_qp_validate_cap(dev, attrs); if (ret) goto err_out; @@ -944,9 +1172,7 @@ int erdma_create_qp(struct ib_qp *ibqp, struct ib_qp_init_attr *attrs, kref_init(&qp->ref); init_completion(&qp->safe_free); - ret = xa_alloc_cyclic(&dev->qp_xa, &qp->ibqp.qp_num, qp, - XA_LIMIT(1, dev->attrs.max_qp - 1), - &dev->next_alloc_qpn, GFP_KERNEL); + ret = erdma_alloc_qpn(dev, &qp->ibqp.qp_num, qp); if (ret < 0) { ret = -ENOMEM; goto err_out; @@ -962,7 +1188,7 @@ int erdma_create_qp(struct ib_qp *ibqp, struct ib_qp_init_attr *attrs, if (ret) goto err_out_xa; - ret = init_user_qp(qp, uctx, ureq.qbuf_va, ureq.qbuf_len, + ret = init_user_qp(qp, udata, uctx, ureq.qbuf_va, ureq.qbuf_len, ureq.db_record_va); if (ret) goto err_out_xa; @@ -978,20 +1204,21 @@ int erdma_create_qp(struct ib_qp *ibqp, struct ib_qp_init_attr *attrs, if (ret) goto err_out_cmd; } else { - init_kernel_qp(dev, qp, attrs); + ret = init_kernel_qp(dev, qp, attrs); + if (ret) + goto err_out_xa; } + INIT_DELAYED_WORK(&qp->reflush_dwork, erdma_flush_worker); + qp->attrs.max_send_sge = attrs->cap.max_send_sge; qp->attrs.max_recv_sge = attrs->cap.max_recv_sge; qp->attrs.state = ERDMA_QP_STATE_IDLE; - INIT_DELAYED_WORK(&qp->reflush_dwork, erdma_flush_worker); - ret = create_qp_cmd(uctx, qp); + ret = create_qp_cmd(dev, qp, uctx ? true : false); if (ret) goto err_out_cmd; - spin_lock_init(&qp->lock); - return 0; err_out_cmd: @@ -1000,8 +1227,9 @@ int erdma_create_qp(struct ib_qp *ibqp, struct ib_qp_init_attr *attrs, else free_kernel_qp(qp); err_out_xa: - xa_erase(&dev->qp_xa, QP_ID(qp)); + erdma_clear_qpn(dev, QP_ID(qp)); err_out: + ERDMA_INC_CNT(dev, CMD_CREATE_QP_FAILED); return ret; } @@ -1021,15 +1249,18 @@ static int erdma_create_stag(struct erdma_dev *dev, u32 *stag) struct ib_mr *erdma_get_dma_mr(struct ib_pd *ibpd, int acc) { - struct erdma_dev *dev = to_edev(ibpd->device); struct erdma_mr *mr; - u32 stag; + struct erdma_dev *dev = to_edev(ibpd->device); int ret; + u32 stag; + + ERDMA_INC_CNT(dev, CMD_GET_DMA_MR); mr = kzalloc(sizeof(*mr), GFP_KERNEL); - if (!mr) + if (!mr) { + ERDMA_INC_CNT(dev, CMD_GET_DMA_MR_FAILED); return ERR_PTR(-ENOMEM); - + } ret = erdma_create_stag(dev, &stag); if (ret) goto out_free; @@ -1041,8 +1272,10 @@ struct ib_mr *erdma_get_dma_mr(struct ib_pd *ibpd, int acc) mr->ibmr.pd = ibpd; mr->access = ERDMA_MR_ACC_LR | to_erdma_access_flags(acc); ret = regmr_cmd(dev, mr); - if (ret) + if (ret) { + ret = -EIO; goto out_remove_stag; + } return &mr->ibmr; @@ -1053,6 +1286,7 @@ struct ib_mr *erdma_get_dma_mr(struct ib_pd *ibpd, int acc) out_free: kfree(mr); + ERDMA_INC_CNT(dev, CMD_GET_DMA_MR_FAILED); return ERR_PTR(ret); } @@ -1064,15 +1298,24 @@ struct ib_mr *erdma_ib_alloc_mr(struct ib_pd *ibpd, enum ib_mr_type mr_type, int ret; u32 stag; - if (mr_type != IB_MR_TYPE_MEM_REG) + ERDMA_INC_CNT(dev, CMD_ALLOC_MR); + + if (mr_type != IB_MR_TYPE_MEM_REG) { + ERDMA_INC_CNT(dev, CMD_ALLOC_MR_FAILED); return ERR_PTR(-EOPNOTSUPP); + } - if (max_num_sg > ERDMA_MR_MAX_MTT_CNT) + if (max_num_sg > ERDMA_MR_MAX_MTT_CNT) { + ibdev_err(&dev->ibdev, "max_num_sg too large:%u", max_num_sg); + ERDMA_INC_CNT(dev, CMD_ALLOC_MR_FAILED); return ERR_PTR(-EINVAL); + } mr = kzalloc(sizeof(*mr), GFP_KERNEL); - if (!mr) + if (!mr) { + ERDMA_INC_CNT(dev, CMD_ALLOC_MR_FAILED); return ERR_PTR(-ENOMEM); + } ret = erdma_create_stag(dev, &stag); if (ret) @@ -1089,6 +1332,7 @@ struct ib_mr *erdma_ib_alloc_mr(struct ib_pd *ibpd, enum ib_mr_type mr_type, mr->mem.page_size = PAGE_SIZE; /* update it later. */ mr->mem.page_cnt = max_num_sg; + mr->mem.mtt = erdma_create_mtt(dev, MTT_SIZE(max_num_sg), true); if (IS_ERR(mr->mem.mtt)) { ret = PTR_ERR(mr->mem.mtt); @@ -1096,8 +1340,10 @@ struct ib_mr *erdma_ib_alloc_mr(struct ib_pd *ibpd, enum ib_mr_type mr_type, } ret = regmr_cmd(dev, mr); - if (ret) + if (ret) { + ret = -EIO; goto out_destroy_mtt; + } return &mr->ibmr; @@ -1111,17 +1357,20 @@ struct ib_mr *erdma_ib_alloc_mr(struct ib_pd *ibpd, enum ib_mr_type mr_type, out_free: kfree(mr); + ERDMA_INC_CNT(dev, CMD_ALLOC_MR_FAILED); + return ERR_PTR(ret); } static int erdma_set_page(struct ib_mr *ibmr, u64 addr) { struct erdma_mr *mr = to_emr(ibmr); + dma_addr_t *pg_dma = mr->mem.mtt->buf; if (mr->mem.mtt_nents >= mr->mem.page_cnt) return -1; - mr->mem.mtt->buf[mr->mem.mtt_nents] = addr; + pg_dma[mr->mem.mtt_nents] = addr; mr->mem.mtt_nents++; return 0; @@ -1144,20 +1393,33 @@ int erdma_map_mr_sg(struct ib_mr *ibmr, struct scatterlist *sg, int sg_nents, struct ib_mr *erdma_reg_user_mr(struct ib_pd *ibpd, u64 start, u64 len, u64 virt, int access, struct ib_udata *udata) { - struct erdma_mr *mr = NULL; struct erdma_dev *dev = to_edev(ibpd->device); + struct erdma_mr *mr = NULL; u32 stag; int ret; + struct erdma_ucontext *uctx = rdma_udata_to_drv_context( + udata, struct erdma_ucontext, ibucontext); + + ERDMA_INC_CNT(dev, CMD_REG_USR_MR); + + ibdev_dbg(&dev->ibdev, + "start:0x%llx, len:%llu, virt:0x%llx, access:0x%x\n", start, + len, virt, access); - if (!len || len > dev->attrs.max_mr_size) + if (!len || len > dev->attrs.max_mr_size) { + ibdev_err(&dev->ibdev, + "ERROR: Out of mr size: %llu, max %llu\n", len, + dev->attrs.max_mr_size); + ERDMA_INC_CNT(dev, CMD_REG_USR_MR_FAILED); return ERR_PTR(-EINVAL); + } mr = kzalloc(sizeof(*mr), GFP_KERNEL); if (!mr) return ERR_PTR(-ENOMEM); - ret = get_mtt_entries(dev, &mr->mem, start, len, access, virt, - SZ_2G - SZ_4K, false); + ret = get_mtt_entries(udata, uctx, &mr->mem, start, len, access, virt, + SZ_2G - SZ_4K, true); if (ret) goto err_out_free; @@ -1170,12 +1432,16 @@ struct ib_mr *erdma_reg_user_mr(struct ib_pd *ibpd, u64 start, u64 len, mr->mem.va = virt; mr->mem.len = len; mr->access = ERDMA_MR_ACC_LR | to_erdma_access_flags(access); + if (compat_mode) + mr->access = mr->access | ERDMA_MR_ACC_RW; mr->valid = 1; mr->type = ERDMA_MR_TYPE_NORMAL; ret = regmr_cmd(dev, mr); - if (ret) + if (ret) { + ret = -EIO; goto err_out_mr; + } return &mr->ibmr; @@ -1189,17 +1455,18 @@ struct ib_mr *erdma_reg_user_mr(struct ib_pd *ibpd, u64 start, u64 len, err_out_free: kfree(mr); + ERDMA_INC_CNT(dev, CMD_REG_USR_MR_FAILED); return ERR_PTR(ret); } int erdma_dereg_mr(struct ib_mr *ibmr, struct ib_udata *udata) { - struct erdma_mr *mr; struct erdma_dev *dev = to_edev(ibmr->device); + struct erdma_mr *mr = to_emr(ibmr); struct erdma_cmdq_dereg_mr_req req; int ret; - mr = to_emr(ibmr); + ERDMA_INC_CNT(dev, CMD_DEREG_MR); erdma_cmdq_build_reqhdr(&req.hdr, CMDQ_SUBMOD_RDMA, CMDQ_OPCODE_DEREG_MR); @@ -1208,8 +1475,13 @@ int erdma_dereg_mr(struct ib_mr *ibmr, struct ib_udata *udata) FIELD_PREP(ERDMA_CMD_MR_KEY_MASK, ibmr->lkey & 0xFF); ret = erdma_post_cmd_wait(&dev->cmdq, &req, sizeof(req), NULL, NULL); - if (ret) + if (ret) { + ERDMA_INC_CNT(dev, CMD_DEREG_MR_FAILED); + dev_err_ratelimited( + &dev->pdev->dev, + "ERROR: err code = %d, cmd of dereg mr failed.\n", ret); return ret; + } erdma_free_idx(&dev->res_cb[ERDMA_RES_TYPE_STAG_IDX], ibmr->lkey >> 8); @@ -1219,6 +1491,20 @@ int erdma_dereg_mr(struct ib_mr *ibmr, struct ib_udata *udata) return 0; } +static void free_kernel_cq(struct erdma_dev *dev, struct erdma_kcq_info *kcq) +{ + if (kcq->qbuf) { + put_mtt_entries(dev, &kcq->qbuf_mtt); + vfree(kcq->qbuf); + kcq->qbuf = NULL; + } + + if (kcq->dbrec) { + dma_pool_free(dev->db_pool, kcq->dbrec, kcq->dbrec_dma); + kcq->dbrec = NULL; + } +} + int erdma_destroy_cq(struct ib_cq *ibcq, struct ib_udata *udata) { struct erdma_cq *cq = to_ecq(ibcq); @@ -1228,28 +1514,97 @@ int erdma_destroy_cq(struct ib_cq *ibcq, struct ib_udata *udata) int err; struct erdma_cmdq_destroy_cq_req req; + if (compat_mode && cq->sw_cq) + detach_sw_cq(cq); + + ERDMA_INC_CNT(dev, CMD_DESTROY_CQ); + + hrtimer_cancel(&cq->dim.timer); + erdma_cmdq_build_reqhdr(&req.hdr, CMDQ_SUBMOD_RDMA, CMDQ_OPCODE_DESTROY_CQ); req.cqn = cq->cqn; err = erdma_post_cmd_wait(&dev->cmdq, &req, sizeof(req), NULL, NULL); - if (err) + if (err) { + dev_err_ratelimited( + &dev->pdev->dev, + "ERROR: err code = %d, cmd of destroy cq failed.\n", + err); + ERDMA_INC_CNT(dev, CMD_DESTROY_CQ_FAILED); return err; - + } if (rdma_is_kernel_res(&cq->ibcq.res)) { - dma_free_coherent(&dev->pdev->dev, - WARPPED_BUFSIZE(cq->depth << CQE_SHIFT), - cq->kern_cq.qbuf, cq->kern_cq.qbuf_dma_addr); + free_kernel_cq(dev, &cq->kern_cq); } else { erdma_unmap_user_dbrecords(ctx, &cq->user_cq.user_dbr_page); - put_mtt_entries(dev, &cq->user_cq.qbuf_mem); + put_mtt_entries(dev, &cq->user_cq.qbuf_mtt); } xa_erase(&dev->cq_xa, cq->cqn); - return 0; } +static void erdma_ib_lock_cqs(struct erdma_cq *send_cq, + struct erdma_cq *recv_cq) + __acquires(&send_cq->kern_cq.lock) __acquires(&recv_cq->kern_cq.lock) +{ + if (send_cq) { + if (recv_cq) { + if (send_cq->cqn < recv_cq->cqn) { + spin_lock(&send_cq->kern_cq.lock); + spin_lock_nested(&recv_cq->kern_cq.lock, + SINGLE_DEPTH_NESTING); + } else if (send_cq->cqn == recv_cq->cqn) { + spin_lock(&send_cq->kern_cq.lock); + __acquire(&recv_cq->kern_cq.lock); + } else { + spin_lock(&recv_cq->kern_cq.lock); + spin_lock_nested(&send_cq->kern_cq.lock, + SINGLE_DEPTH_NESTING); + } + } else { + spin_lock(&send_cq->kern_cq.lock); + __acquire(&recv_cq->kern_cq.lock); + } + } else if (recv_cq) { + spin_lock(&recv_cq->kern_cq.lock); + __acquire(&send_cq->kern_cq.lock); + } else { + __acquire(&send_cq->kern_cq.lock); + __acquire(&recv_cq->kern_cq.lock); + } +} + +static void erdma_ib_unlock_cqs(struct erdma_cq *send_cq, + struct erdma_cq *recv_cq) + __releases(&send_cq->kern_cq.lock) __releases(&recv_cq->kern_cq.lock) +{ + if (send_cq) { + if (recv_cq) { + if (send_cq->cqn < recv_cq->cqn) { + spin_unlock(&recv_cq->kern_cq.lock); + spin_unlock(&send_cq->kern_cq.lock); + } else if (send_cq->cqn == recv_cq->cqn) { + __release(&recv_cq->kern_cq.lock); + spin_unlock(&send_cq->kern_cq.lock); + } else { + spin_unlock(&send_cq->kern_cq.lock); + spin_unlock(&recv_cq->kern_cq.lock); + } + } else { + __release(&recv_cq->kern_cq.lock); + spin_unlock(&send_cq->kern_cq.lock); + } + } else if (recv_cq) { + __release(&send_cq->kern_cq.lock); + spin_unlock(&recv_cq->kern_cq.lock); + } else { + __release(&recv_cq->kern_cq.lock); + __release(&send_cq->kern_cq.lock); + } +} + int erdma_destroy_qp(struct ib_qp *ibqp, struct ib_udata *udata) { struct erdma_qp *qp = to_eqp(ibqp); @@ -1259,6 +1614,22 @@ int erdma_destroy_qp(struct ib_qp *ibqp, struct ib_udata *udata) struct erdma_qp_attrs qp_attrs; int err; struct erdma_cmdq_destroy_qp_req req; + unsigned long flags; + + if (rdma_is_kernel_res(&qp->ibqp.res)) { + local_irq_save(flags); + erdma_ib_lock_cqs(qp->scq, qp->rcq); + qp->attrs.flags |= ERDMA_QP_IN_DESTROY; + erdma_ib_unlock_cqs(qp->scq, qp->rcq); + local_irq_restore(flags); + } + + if (compat_mode && unlikely(ibqp->qp_type == IB_QPT_GSI)) { + erdma_destroy_mad_qp(ibqp); + goto free_idr; + } + + ERDMA_INC_CNT(dev, CMD_DESTROY_QP); down_write(&qp->state_lock); qp_attrs.state = ERDMA_QP_STATE_ERROR; @@ -1272,23 +1643,20 @@ int erdma_destroy_qp(struct ib_qp *ibqp, struct ib_udata *udata) req.qpn = QP_ID(qp); err = erdma_post_cmd_wait(&dev->cmdq, &req, sizeof(req), NULL, NULL); - if (err) + if (err) { + dev_err_ratelimited( + &dev->pdev->dev, + "ERROR: err code = %d, cmd of destroy qp failed.\n", + err); + ERDMA_INC_CNT(dev, CMD_DESTROY_QP_FAILED); return err; + } erdma_qp_put(qp); wait_for_completion(&qp->safe_free); if (rdma_is_kernel_res(&qp->ibqp.res)) { - vfree(qp->kern_qp.swr_tbl); - vfree(qp->kern_qp.rwr_tbl); - dma_free_coherent( - &dev->pdev->dev, - WARPPED_BUFSIZE(qp->attrs.rq_size << RQE_SHIFT), - qp->kern_qp.rq_buf, qp->kern_qp.rq_buf_dma_addr); - dma_free_coherent( - &dev->pdev->dev, - WARPPED_BUFSIZE(qp->attrs.sq_size << SQEBB_SHIFT), - qp->kern_qp.sq_buf, qp->kern_qp.sq_buf_dma_addr); + free_kernel_qp(qp); } else { put_mtt_entries(dev, &qp->user_qp.sq_mem); put_mtt_entries(dev, &qp->user_qp.rq_mem); @@ -1297,7 +1665,9 @@ int erdma_destroy_qp(struct ib_qp *ibqp, struct ib_udata *udata) if (qp->cep) erdma_cep_put(qp->cep); - xa_erase(&dev->qp_xa, QP_ID(qp)); + +free_idr: + erdma_clear_qpn(dev, QP_ID(qp)); return 0; } @@ -1317,7 +1687,7 @@ int erdma_mmap(struct ib_ucontext *ctx, struct vm_area_struct *vma) struct rdma_user_mmap_entry *rdma_entry; struct erdma_user_mmap_entry *entry; pgprot_t prot; - int err; + int err = -EINVAL; rdma_entry = rdma_user_mmap_entry_get(ctx, vma); if (!rdma_entry) @@ -1328,17 +1698,14 @@ int erdma_mmap(struct ib_ucontext *ctx, struct vm_area_struct *vma) switch (entry->mmap_flag) { case ERDMA_MMAP_IO_NC: /* map doorbell. */ - prot = pgprot_device(vma->vm_page_prot); + prot = pgprot_noncached(vma->vm_page_prot); + err = rdma_user_mmap_io(ctx, vma, PFN_DOWN(entry->address), + PAGE_SIZE, prot, rdma_entry); break; default: - err = -EINVAL; - goto put_entry; + return -EINVAL; } - err = rdma_user_mmap_io(ctx, vma, PFN_DOWN(entry->address), PAGE_SIZE, - prot, rdma_entry); - -put_entry: rdma_user_mmap_entry_put(rdma_entry); return err; } @@ -1350,73 +1717,56 @@ void erdma_mmap_free(struct rdma_user_mmap_entry *rdma_entry) kfree(entry); } -static int alloc_db_resources(struct erdma_dev *dev, struct erdma_ucontext *ctx, - bool ext_db_en) -{ - struct erdma_cmdq_ext_db_req req = {}; - u64 val0, val1; - int ret; +#define ERDMA_SDB_PAGE 0 +#define ERDMA_SDB_ENTRY 1 +#define ERDMA_SDB_SHARED 2 - /* - * CAP_SYS_RAWIO is required if hardware does not support extend - * doorbell mechanism. - */ - if (!ext_db_en && !capable(CAP_SYS_RAWIO)) - return -EPERM; - - if (!ext_db_en) { - ctx->sdb = dev->func_bar_addr + ERDMA_BAR_SQDB_SPACE_OFFSET; - ctx->rdb = dev->func_bar_addr + ERDMA_BAR_RQDB_SPACE_OFFSET; - ctx->cdb = dev->func_bar_addr + ERDMA_BAR_CQDB_SPACE_OFFSET; - return 0; - } +static void alloc_db_resources(struct erdma_dev *dev, + struct erdma_ucontext *ctx) +{ + struct erdma_devattr *attrs = &dev->attrs; + u32 bitmap_idx, hw_page_idx; - erdma_cmdq_build_reqhdr(&req.hdr, CMDQ_SUBMOD_COMMON, - CMDQ_OPCODE_ALLOC_DB); + if (attrs->disable_dwqe) + goto alloc_normal_db; - req.cfg = FIELD_PREP(ERDMA_CMD_EXT_DB_CQ_EN_MASK, 1) | - FIELD_PREP(ERDMA_CMD_EXT_DB_RQ_EN_MASK, 1) | - FIELD_PREP(ERDMA_CMD_EXT_DB_SQ_EN_MASK, 1); + /* Try to alloc independent SDB page. */ + spin_lock(&dev->db_bitmap_lock); + bitmap_idx = find_first_zero_bit(dev->sdb_page, attrs->dwqe_pages); + if (bitmap_idx != attrs->dwqe_pages) { + set_bit(bitmap_idx, dev->sdb_page); + spin_unlock(&dev->db_bitmap_lock); - ret = erdma_post_cmd_wait(&dev->cmdq, &req, sizeof(req), &val0, &val1); - if (ret) - return ret; - - ctx->ext_db.enable = true; - ctx->ext_db.sdb_off = ERDMA_GET(val0, ALLOC_DB_RESP_SDB); - ctx->ext_db.rdb_off = ERDMA_GET(val0, ALLOC_DB_RESP_RDB); - ctx->ext_db.cdb_off = ERDMA_GET(val0, ALLOC_DB_RESP_CDB); + ctx->sdb_type = ERDMA_SDB_PAGE; + ctx->sdb_bitmap_idx = bitmap_idx; + ctx->sdb = dev->func_bar_addr + ERDMA_BAR_SQDB_SPACE_OFFSET + + (bitmap_idx << ERDMA_HW_PAGE_SHIFT); - ctx->sdb = dev->func_bar_addr + (ctx->ext_db.sdb_off << PAGE_SHIFT); - ctx->cdb = dev->func_bar_addr + (ctx->ext_db.rdb_off << PAGE_SHIFT); - ctx->rdb = dev->func_bar_addr + (ctx->ext_db.cdb_off << PAGE_SHIFT); + return; + } - return 0; -} + bitmap_idx = find_first_zero_bit(dev->sdb_entry, attrs->dwqe_entries); + if (bitmap_idx != attrs->dwqe_entries) { + set_bit(bitmap_idx, dev->sdb_entry); + spin_unlock(&dev->db_bitmap_lock); -static void free_db_resources(struct erdma_dev *dev, struct erdma_ucontext *ctx) -{ - struct erdma_cmdq_ext_db_req req = {}; - int ret; + ctx->sdb_type = ERDMA_SDB_ENTRY; + ctx->sdb_bitmap_idx = bitmap_idx; + hw_page_idx = attrs->dwqe_pages + + bitmap_idx / ERDMA_DWQE_TYPE1_CNT_PER_PAGE; + ctx->sdb_entid = bitmap_idx % ERDMA_DWQE_TYPE1_CNT_PER_PAGE; + ctx->sdb = dev->func_bar_addr + ERDMA_BAR_SQDB_SPACE_OFFSET + + (hw_page_idx << ERDMA_HW_PAGE_SHIFT); - if (!ctx->ext_db.enable) return; + } - erdma_cmdq_build_reqhdr(&req.hdr, CMDQ_SUBMOD_COMMON, - CMDQ_OPCODE_FREE_DB); - - req.cfg = FIELD_PREP(ERDMA_CMD_EXT_DB_CQ_EN_MASK, 1) | - FIELD_PREP(ERDMA_CMD_EXT_DB_RQ_EN_MASK, 1) | - FIELD_PREP(ERDMA_CMD_EXT_DB_SQ_EN_MASK, 1); - - req.sdb_off = ctx->ext_db.sdb_off; - req.rdb_off = ctx->ext_db.rdb_off; - req.cdb_off = ctx->ext_db.cdb_off; + spin_unlock(&dev->db_bitmap_lock); - ret = erdma_post_cmd_wait(&dev->cmdq, &req, sizeof(req), NULL, NULL); - if (ret) - ibdev_err_ratelimited(&dev->ibdev, - "free db resources failed %d", ret); +alloc_normal_db: + ctx->sdb_type = ERDMA_SDB_SHARED; + ctx->sdb = dev->func_bar_addr + + (ERDMA_SDB_SHARED_PAGE_INDEX << ERDMA_HW_PAGE_SHIFT); } static void erdma_uctx_user_mmap_entries_remove(struct erdma_ucontext *uctx) @@ -1428,77 +1778,87 @@ static void erdma_uctx_user_mmap_entries_remove(struct erdma_ucontext *uctx) int erdma_alloc_ucontext(struct ib_ucontext *ibctx, struct ib_udata *udata) { - struct erdma_ucontext *ctx = to_ectx(ibctx); struct erdma_dev *dev = to_edev(ibctx->device); - int ret; + struct erdma_ucontext *ctx = to_ectx(ibctx); struct erdma_uresp_alloc_ctx uresp = {}; + int ret; + + ERDMA_INC_CNT(dev, CMD_ALLOC_UCTX); if (atomic_inc_return(&dev->num_ctx) > ERDMA_MAX_CONTEXT) { ret = -ENOMEM; goto err_out; } - if (udata->outlen < sizeof(uresp)) { - ret = -EINVAL; - goto err_out; - } - INIT_LIST_HEAD(&ctx->dbrecords_page_list); mutex_init(&ctx->dbrecords_page_mutex); - ret = alloc_db_resources(dev, ctx, - !!(dev->attrs.cap_flags & - ERDMA_DEV_CAP_FLAGS_EXTEND_DB)); - if (ret) - goto err_out; + alloc_db_resources(dev, ctx); + + ctx->rdb = dev->func_bar_addr + ERDMA_BAR_RQDB_SPACE_OFFSET; + ctx->cdb = dev->func_bar_addr + ERDMA_BAR_CQDB_SPACE_OFFSET; ctx->sq_db_mmap_entry = erdma_user_mmap_entry_insert( - ctx, (void *)ctx->sdb, PAGE_SIZE, ERDMA_MMAP_IO_NC, &uresp.sdb); + ibctx, (u64)ctx->sdb, PAGE_SIZE, ERDMA_MMAP_IO_NC, &uresp.sdb); if (!ctx->sq_db_mmap_entry) { ret = -ENOMEM; - goto err_free_ext_db; + goto err_out; } ctx->rq_db_mmap_entry = erdma_user_mmap_entry_insert( - ctx, (void *)ctx->rdb, PAGE_SIZE, ERDMA_MMAP_IO_NC, &uresp.rdb); + ibctx, (u64)ctx->rdb, PAGE_SIZE, ERDMA_MMAP_IO_NC, &uresp.rdb); if (!ctx->rq_db_mmap_entry) { ret = -EINVAL; - goto err_put_mmap_entries; + goto err_out; } ctx->cq_db_mmap_entry = erdma_user_mmap_entry_insert( - ctx, (void *)ctx->cdb, PAGE_SIZE, ERDMA_MMAP_IO_NC, &uresp.cdb); + ibctx, (u64)ctx->cdb, PAGE_SIZE, ERDMA_MMAP_IO_NC, &uresp.cdb); if (!ctx->cq_db_mmap_entry) { ret = -EINVAL; - goto err_put_mmap_entries; + goto err_out; } uresp.dev_id = dev->pdev->device; - - ret = ib_copy_to_udata(udata, &uresp, sizeof(uresp)); + uresp.sdb_type = ctx->sdb_type; + uresp.sdb_entid = ctx->sdb_entid; + uresp.sdb_off = ctx->sdb & ~PAGE_MASK; + uresp.rdb_off = ctx->rdb & ~PAGE_MASK; + uresp.cdb_off = ctx->cdb & ~PAGE_MASK; + + ret = ib_copy_to_udata(udata, &uresp, + min(sizeof(uresp), udata->outlen)); if (ret) - goto err_put_mmap_entries; + goto err_out; return 0; -err_put_mmap_entries: +err_out: erdma_uctx_user_mmap_entries_remove(ctx); + atomic_dec(&dev->num_ctx); -err_free_ext_db: - free_db_resources(dev, ctx); + if (ret) + ERDMA_INC_CNT(dev, CMD_ALLOC_UCTX_FAILED); -err_out: - atomic_dec(&dev->num_ctx); return ret; } void erdma_dealloc_ucontext(struct ib_ucontext *ibctx) { - struct erdma_dev *dev = to_edev(ibctx->device); struct erdma_ucontext *ctx = to_ectx(ibctx); + struct erdma_dev *dev = to_edev(ibctx->device); + + ERDMA_INC_CNT(dev, CMD_DEALLOC_UCTX); + spin_lock(&dev->db_bitmap_lock); + if (ctx->sdb_type == ERDMA_SDB_PAGE) + clear_bit(ctx->sdb_bitmap_idx, dev->sdb_page); + else if (ctx->sdb_type == ERDMA_SDB_ENTRY) + clear_bit(ctx->sdb_bitmap_idx, dev->sdb_entry); erdma_uctx_user_mmap_entries_remove(ctx); - free_db_resources(dev, ctx); + + spin_unlock(&dev->db_bitmap_lock); + atomic_dec(&dev->num_ctx); } @@ -1512,22 +1872,36 @@ static int ib_qp_state_to_erdma_qp_state[IB_QPS_ERR + 1] = { [IB_QPS_ERR] = ERDMA_QP_STATE_ERROR }; +#define IB_QP_OOB_CONN_ATTR IB_QP_RESERVED1 int erdma_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, int attr_mask, struct ib_udata *udata) { - struct erdma_qp_attrs new_attrs; enum erdma_qp_attr_mask erdma_attr_mask = 0; struct erdma_qp *qp = to_eqp(ibqp); + struct erdma_qp_attrs new_attrs; int ret = 0; - if (attr_mask & ~IB_QP_ATTR_STANDARD_BITS) + if (attr_mask & ~(IB_QP_ATTR_STANDARD_BITS | IB_QP_OOB_CONN_ATTR)) return -EOPNOTSUPP; + if (compat_mode && ibqp->qp_type == IB_QPT_GSI) + return erdma_modify_mad_qp(ibqp, attr, attr_mask, udata); + + if (attr_mask & IB_QP_OOB_CONN_ATTR) { + ret = update_kernel_qp_oob_attr(qp, attr, attr_mask); + if (ret) + return ret; + } + + if (compat_mode) + erdma_handle_compat_attr(qp, attr, attr_mask); memset(&new_attrs, 0, sizeof(new_attrs)); if (attr_mask & IB_QP_STATE) { new_attrs.state = ib_qp_state_to_erdma_qp_state[attr->qp_state]; - + if ((qp->attrs.connect_without_cm || compat_mode) && + new_attrs.state == ERDMA_QP_STATE_RTR) + new_attrs.state = ERDMA_QP_STATE_RTS; erdma_attr_mask |= ERDMA_QP_ATTR_STATE; } @@ -1540,7 +1914,7 @@ int erdma_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, int attr_mask, return ret; } -static enum ib_qp_state query_qp_state(struct erdma_qp *qp) +static inline enum ib_qp_state query_qp_state(struct erdma_qp *qp) { switch (qp->attrs.state) { case ERDMA_QP_STATE_IDLE: @@ -1581,7 +1955,7 @@ int erdma_query_qp(struct ib_qp *ibqp, struct ib_qp_attr *qp_attr, qp_attr->cap.max_send_sge = qp->attrs.max_send_sge; qp_attr->cap.max_recv_sge = qp->attrs.max_recv_sge; - qp_attr->path_mtu = ib_mtu_int_to_enum(dev->netdev->mtu); + qp_attr->path_mtu = ib_mtu_int_to_enum(dev->mtu); qp_attr->max_rd_atomic = qp->attrs.irq_size; qp_attr->max_dest_rd_atomic = qp->attrs.orq_size; @@ -1597,59 +1971,97 @@ int erdma_query_qp(struct ib_qp *ibqp, struct ib_qp_attr *qp_attr, return 0; } -static int erdma_init_user_cq(struct erdma_ucontext *ctx, struct erdma_cq *cq, +static int erdma_init_user_cq(struct ib_udata *udata, + struct erdma_ucontext *uctx, struct erdma_cq *cq, struct erdma_ureq_create_cq *ureq) { - int ret; struct erdma_dev *dev = to_edev(cq->ibcq.device); + int ret; - ret = get_mtt_entries(dev, &cq->user_cq.qbuf_mem, ureq->qbuf_va, + ret = get_mtt_entries(udata, uctx, &cq->user_cq.qbuf_mtt, ureq->qbuf_va, ureq->qbuf_len, 0, ureq->qbuf_va, SZ_64M - SZ_4K, - true); + false); if (ret) return ret; - ret = erdma_map_user_dbrecords(ctx, ureq->db_record_va, + ret = erdma_map_user_dbrecords(udata, uctx, ureq->db_record_va, &cq->user_cq.user_dbr_page, - &cq->user_cq.db_info_dma_addr); + &cq->user_cq.dbrec_dma); if (ret) - put_mtt_entries(dev, &cq->user_cq.qbuf_mem); + put_mtt_entries(dev, &cq->user_cq.qbuf_mtt); return ret; } +static void init_cq_dbrec(struct erdma_cq *cq) +{ + u64 db_data = FIELD_PREP(ERDMA_CQDB_IDX_MASK, 0xFF) | + FIELD_PREP(ERDMA_CQDB_CQN_MASK, cq->cqn) | + FIELD_PREP(ERDMA_CQDB_CMDSN_MASK, 3); + + *cq->kern_cq.dbrec = db_data; +} + static int erdma_init_kernel_cq(struct erdma_cq *cq) { struct erdma_dev *dev = to_edev(cq->ibcq.device); + u64 cq_sz = cq->depth << CQE_SHIFT; + int ret; - cq->kern_cq.qbuf = - dma_alloc_coherent(&dev->pdev->dev, - WARPPED_BUFSIZE(cq->depth << CQE_SHIFT), - &cq->kern_cq.qbuf_dma_addr, GFP_KERNEL); - if (!cq->kern_cq.qbuf) - return -ENOMEM; + cq->kern_cq.qbuf = vzalloc(cq_sz); + if (!cq->kern_cq.qbuf) { + ret = -ENOMEM; + goto err_out; + } + + ret = get_mtt_entries(dev, NULL, &cq->kern_cq.qbuf_mtt, + (u64)cq->kern_cq.qbuf, cq_sz, 0, + (u64)cq->kern_cq.qbuf, 0, false); + if (ret) + goto err_free_qbuf; + + cq->kern_cq.dbrec = dma_pool_zalloc(dev->db_pool, GFP_KERNEL, + &cq->kern_cq.dbrec_dma); + if (!cq->kern_cq.dbrec) { + ret = -ENOMEM; + goto err_free_mtt; + } + init_cq_dbrec(cq); - cq->kern_cq.db_record = - (u64 *)(cq->kern_cq.qbuf + (cq->depth << CQE_SHIFT)); spin_lock_init(&cq->kern_cq.lock); /* use default cqdb addr */ cq->kern_cq.db = dev->func_bar + ERDMA_BAR_CQDB_SPACE_OFFSET; return 0; + +err_free_mtt: + put_mtt_entries(dev, &cq->kern_cq.qbuf_mtt); +err_free_qbuf: + vfree(cq->kern_cq.qbuf); + cq->kern_cq.qbuf = NULL; +err_out: + return ret; } int erdma_create_cq(struct ib_cq *ibcq, const struct ib_cq_init_attr *attr, struct ib_udata *udata) { - struct erdma_cq *cq = to_ecq(ibcq); struct erdma_dev *dev = to_edev(ibcq->device); + struct erdma_cq *cq = to_ecq(ibcq); unsigned int depth = attr->cqe; int ret; - struct erdma_ucontext *ctx = rdma_udata_to_drv_context( + struct erdma_ucontext *uctx = rdma_udata_to_drv_context( udata, struct erdma_ucontext, ibucontext); - if (depth > dev->attrs.max_cqe) + ERDMA_INC_CNT(dev, CMD_CREATE_CQ); + + if (depth > dev->attrs.max_cqe) { + dev_warn(&dev->pdev->dev, + "WARN: exceed cqe(%d) > capbility(%d)\n", depth, + dev->attrs.max_cqe); + ERDMA_INC_CNT(dev, CMD_CREATE_CQ_FAILED); return -EINVAL; + } depth = roundup_pow_of_two(depth); cq->ibcq.cqe = depth; @@ -1659,10 +2071,12 @@ int erdma_create_cq(struct ib_cq *ibcq, const struct ib_cq_init_attr *attr, ret = xa_alloc_cyclic(&dev->cq_xa, &cq->cqn, cq, XA_LIMIT(1, dev->attrs.max_cq - 1), &dev->next_alloc_cqn, GFP_KERNEL); - if (ret < 0) + if (ret < 0) { + ERDMA_INC_CNT(dev, CMD_CREATE_CQ_FAILED); return ret; + } - if (!rdma_is_kernel_res(&ibcq->res)) { + if (udata) { struct erdma_ureq_create_cq ureq; struct erdma_uresp_create_cq uresp; @@ -1671,7 +2085,7 @@ int erdma_create_cq(struct ib_cq *ibcq, const struct ib_cq_init_attr *attr, if (ret) goto err_out_xa; - ret = erdma_init_user_cq(ctx, cq, &ureq); + ret = erdma_init_user_cq(udata, uctx, cq, &ureq); if (ret) goto err_out_xa; @@ -1688,28 +2102,45 @@ int erdma_create_cq(struct ib_cq *ibcq, const struct ib_cq_init_attr *attr, goto err_out_xa; } - ret = create_cq_cmd(ctx, cq); + ret = create_cq_cmd(dev, cq, udata ? true : false); if (ret) goto err_free_res; + hrtimer_init(&cq->dim.timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL); + cq->dim.timer.function = cq_timer_fn; + return 0; err_free_res: - if (!rdma_is_kernel_res(&ibcq->res)) { - erdma_unmap_user_dbrecords(ctx, &cq->user_cq.user_dbr_page); - put_mtt_entries(dev, &cq->user_cq.qbuf_mem); + if (udata) { + erdma_unmap_user_dbrecords(uctx, &cq->user_cq.user_dbr_page); + put_mtt_entries(dev, &cq->user_cq.qbuf_mtt); } else { - dma_free_coherent(&dev->pdev->dev, - WARPPED_BUFSIZE(depth << CQE_SHIFT), - cq->kern_cq.qbuf, cq->kern_cq.qbuf_dma_addr); + free_kernel_cq(dev, &cq->kern_cq); } err_out_xa: xa_erase(&dev->cq_xa, cq->cqn); - + ERDMA_INC_CNT(dev, CMD_CREATE_CQ_FAILED); return ret; } +struct net_device *erdma_get_netdev(struct ib_device *device, port_t port_num) +{ + struct erdma_dev *edev = to_edev(device); + + read_lock(&edev->netdev_lock); + if (edev->netdev) + dev_hold(edev->netdev); + read_unlock(&edev->netdev_lock); + + return edev->netdev; +} + +void erdma_disassociate_ucontext(struct ib_ucontext *ibcontext) +{ +} + void erdma_set_mtu(struct erdma_dev *dev, u32 mtu) { struct erdma_cmdq_config_mtu_req req; @@ -1721,6 +2152,25 @@ void erdma_set_mtu(struct erdma_dev *dev, u32 mtu) erdma_post_cmd_wait(&dev->cmdq, &req, sizeof(req), NULL, NULL); } +int erdma_set_retrans_num(struct erdma_dev *dev, u32 retrans_num) +{ + struct erdma_cmdq_set_retrans_num_req req; + int ret; + + if (retrans_num == 0 || retrans_num > 0xffUL) + return -EINVAL; + + erdma_cmdq_build_reqhdr(&req.hdr, CMDQ_SUBMOD_COMMON, + CMDQ_OPCODE_SET_RETRANS_NUM); + req.retrans_num = retrans_num; + + ret = erdma_post_cmd_wait(&dev->cmdq, &req, sizeof(req), NULL, NULL); + if (!ret) + dev->attrs.retrans_num = retrans_num; + + return ret; +} + void erdma_port_event(struct erdma_dev *dev, enum ib_event_type reason) { struct ib_event event; @@ -1731,3 +2181,49 @@ void erdma_port_event(struct erdma_dev *dev, enum ib_event_type reason) ib_dispatch_event(&event); } + +int erdma_query_hw_stats(struct erdma_dev *dev) +{ + struct erdma_cmdq_query_stats_resp *stats; + struct erdma_cmdq_query_req req; + dma_addr_t dma_addr; + int err; + + erdma_cmdq_build_reqhdr(&req.hdr, CMDQ_SUBMOD_COMMON, + CMDQ_OPCODE_GET_STATS); + + stats = dma_pool_zalloc(dev->resp_pool, GFP_KERNEL, &dma_addr); + if (!stats) + return -ENOMEM; + + req.target_addr = dma_addr; + req.target_length = ERDMA_HW_RESP_SIZE; + /* Clear the magic fileds. */ + stats->hdr.magic = 0; + + err = erdma_post_cmd_wait(&dev->cmdq, &req, sizeof(req), NULL, NULL); + if (err) + goto out; + + if (stats->hdr.magic != 0x5566) { + err = -EINVAL; + goto out; + } + + memcpy(&dev->stats.value[ERDMA_STATS_TX_REQS_CNT], &stats->tx_req_cnt, + sizeof(__u64) * (ERDMA_STATS_RX_PPS_METER_DROP_CNT - + ERDMA_STATS_TX_REQS_CNT + 1)); + +out: + dma_pool_free(dev->resp_pool, stats, dma_addr); + + return err; +} + +const struct cpumask *erdma_get_vector_affinity(struct ib_device *ibdev, + int comp_vector) +{ + struct erdma_dev *dev = to_edev(ibdev); + + return &dev->ceqs[comp_vector].irq.affinity_hint_mask; +} diff --git a/drivers/infiniband/hw/erdma/erdma_verbs.h b/drivers/infiniband/hw/erdma/erdma_verbs.h index eb9c0f92fb6f0f99057d47ac85c87522e4ba0d68..7a7c820eb65fd2d1afbdaa5b46eb8572faf47ca7 100644 --- a/drivers/infiniband/hw/erdma/erdma_verbs.h +++ b/drivers/infiniband/hw/erdma/erdma_verbs.h @@ -20,6 +20,7 @@ #define ERDMA_MAX_RECV_SGE 1 #define ERDMA_MAX_INLINE (sizeof(struct erdma_sge) * (ERDMA_MAX_SEND_SGE)) #define ERDMA_MAX_FRMR_PA 512 +#define ERDMA_DEFAULT_RETRANS_NUM 24 enum { ERDMA_MMAP_IO_NC = 0, /* no cache */ @@ -31,18 +32,12 @@ struct erdma_user_mmap_entry { u8 mmap_flag; }; -struct erdma_ext_db_info { - bool enable; - u16 sdb_off; - u16 rdb_off; - u16 cdb_off; -}; - struct erdma_ucontext { struct ib_ucontext ibucontext; - struct erdma_ext_db_info ext_db; - + u32 sdb_type; + u32 sdb_bitmap_idx; + u32 sdb_entid; u64 sdb; u64 rdb; u64 cdb; @@ -59,13 +54,14 @@ struct erdma_ucontext { struct erdma_pd { struct ib_pd ibpd; u32 pdn; + struct sw_pd *sw_pd; }; /* * MemoryRegion definition. */ #define ERDMA_MAX_INLINE_MTT_ENTRIES 4 -#define MTT_SIZE(mtt_cnt) ((mtt_cnt) << 3) /* per mtt entry takes 8 Bytes. */ +#define MTT_SIZE(mtt_cnt) ((mtt_cnt) << 3) /* per mtt takes 8 Bytes. */ #define ERDMA_MR_MAX_MTT_CNT 524288 #define ERDMA_MTT_ENTRY_SIZE 8 @@ -73,8 +69,8 @@ struct erdma_pd { #define ERDMA_MR_TYPE_FRMR 1 #define ERDMA_MR_TYPE_DMA 2 -#define ERDMA_MR_MTT_0LEVEL 0 -#define ERDMA_MR_MTT_1LEVEL 1 +#define ERDMA_MR_INLINE_MTT 0 +#define ERDMA_MR_INDIRECT_MTT 1 #define ERDMA_MR_ACC_RA BIT(0) #define ERDMA_MR_ACC_LR BIT(1) @@ -90,17 +86,16 @@ static inline u8 to_erdma_access_flags(int access) (access & IB_ACCESS_REMOTE_ATOMIC ? ERDMA_MR_ACC_RA : 0); } -/* Hierarchical storage structure for MTT entries */ struct erdma_mtt { - u64 *buf; + void *buf; size_t size; bool continuous; union { dma_addr_t buf_dma; struct { - struct scatterlist *sglist; - u32 nsg; + dma_addr_t *dma_addrs; + u32 npages; u32 level; }; }; @@ -108,15 +103,29 @@ struct erdma_mtt { struct erdma_mtt *low_level; }; -struct erdma_mem { - struct ib_umem *umem; - struct erdma_mtt *mtt; +enum erdma_mem_type { + ERDMA_UMEM = 0, + ERDMA_KMEM = 1, +}; +struct erdma_kmem { + dma_addr_t *dma_addrs; + u64 npages; +}; + +struct erdma_mem { + enum erdma_mem_type type; + union { + struct ib_umem *umem; + struct erdma_kmem *kmem; + }; u32 page_size; u32 page_offset; u32 page_cnt; u32 mtt_nents; + struct erdma_mtt *mtt; + u64 va; u64 len; }; @@ -140,8 +149,8 @@ struct erdma_uqp { struct erdma_mem sq_mem; struct erdma_mem rq_mem; - dma_addr_t sq_db_info_dma_addr; - dma_addr_t rq_db_info_dma_addr; + dma_addr_t sq_dbrec_dma; + dma_addr_t rq_dbrec_dma; struct erdma_user_dbrecords_page *user_dbr_page; @@ -149,26 +158,27 @@ struct erdma_uqp { }; struct erdma_kqp { + spinlock_t sq_lock ____cacheline_aligned; u16 sq_pi; u16 sq_ci; + u64 *swr_tbl; + void *hw_sq_db; + void *sq_buf; + void *sq_dbrec; + spinlock_t rq_lock ____cacheline_aligned; u16 rq_pi; u16 rq_ci; - - u64 *swr_tbl; u64 *rwr_tbl; - - void __iomem *hw_sq_db; - void __iomem *hw_rq_db; - - void *sq_buf; - dma_addr_t sq_buf_dma_addr; - + void *hw_rq_db; void *rq_buf; - dma_addr_t rq_buf_dma_addr; + void *rq_dbrec; - void *sq_db_info; - void *rq_db_info; + struct erdma_mem sq_mem; + struct erdma_mem rq_mem; + + dma_addr_t sq_dbrec_dma; + dma_addr_t rq_dbrec_dma; u8 sig_all; }; @@ -195,7 +205,8 @@ enum erdma_qp_attr_mask { }; enum erdma_qp_flags { - ERDMA_QP_IN_FLUSHING = (1 << 0), + ERDMA_QP_IN_DESTROY = (1 << 0), + ERDMA_QP_IN_FLUSHING = (1 << 1), }; struct erdma_qp_attrs { @@ -208,14 +219,28 @@ struct erdma_qp_attrs { u32 max_send_sge; u32 max_recv_sge; u32 cookie; + u32 flags; + + u32 remote_cookie; #define ERDMA_QP_ACTIVE 0 #define ERDMA_QP_PASSIVE 1 u8 qp_type; u8 pd_len; + bool connect_without_cm; + __u16 sport; + __u16 dport; + union { + struct sockaddr_in6 in6; + struct sockaddr_in in; + } laddr, raddr; + u32 remote_qp_num; + u32 sq_psn; + u32 rq_psn; }; struct erdma_qp { struct ib_qp ibqp; + struct sw_qp *sw_qp; struct kref ref; struct completion safe_free; struct erdma_dev *dev; @@ -234,25 +259,32 @@ struct erdma_qp { struct erdma_cq *rcq; struct erdma_qp_attrs attrs; - spinlock_t lock; + }; struct erdma_kcq_info { void *qbuf; - dma_addr_t qbuf_dma_addr; + struct erdma_mem qbuf_mtt; + dma_addr_t dbrec_dma; u32 ci; u32 cmdsn; u32 notify_cnt; spinlock_t lock; u8 __iomem *db; - u64 *db_record; + u64 *dbrec; }; struct erdma_ucq_info { - struct erdma_mem qbuf_mem; + struct erdma_mem qbuf_mtt; struct erdma_user_dbrecords_page *user_dbr_page; - dma_addr_t db_info_dma_addr; + dma_addr_t dbrec_dma; +}; + +struct erdma_dim { + enum ib_cq_notify_flags flags; + struct hrtimer timer; + u16 timeout; }; struct erdma_cq { @@ -266,6 +298,10 @@ struct erdma_cq { struct erdma_kcq_info kern_cq; struct erdma_ucq_info user_cq; }; + + struct erdma_dim dim; + bool is_soft; + struct sw_cq *sw_cq; }; #define QP_ID(qp) ((qp)->ibqp.qp_num) @@ -318,17 +354,20 @@ to_emmap(struct rdma_user_mmap_entry *ibmmap) return container_of(ibmmap, struct erdma_user_mmap_entry, rdma_entry); } +enum hrtimer_restart cq_timer_fn(struct hrtimer *t); + int erdma_alloc_ucontext(struct ib_ucontext *ibctx, struct ib_udata *data); void erdma_dealloc_ucontext(struct ib_ucontext *ibctx); int erdma_query_device(struct ib_device *dev, struct ib_device_attr *attr, struct ib_udata *data); -int erdma_get_port_immutable(struct ib_device *dev, u32 port, +int erdma_get_port_immutable(struct ib_device *dev, port_t port, struct ib_port_immutable *ib_port_immutable); int erdma_create_cq(struct ib_cq *ibcq, const struct ib_cq_init_attr *attr, struct ib_udata *data); -int erdma_query_port(struct ib_device *dev, u32 port, + +int erdma_query_port(struct ib_device *dev, port_t port, struct ib_port_attr *attr); -int erdma_query_gid(struct ib_device *dev, u32 port, int idx, +int erdma_query_gid(struct ib_device *dev, port_t port, int idx, union ib_gid *gid); int erdma_alloc_pd(struct ib_pd *ibpd, struct ib_udata *data); int erdma_dealloc_pd(struct ib_pd *ibpd, struct ib_udata *udata); @@ -359,7 +398,22 @@ struct ib_mr *erdma_ib_alloc_mr(struct ib_pd *ibpd, enum ib_mr_type mr_type, u32 max_num_sg); int erdma_map_mr_sg(struct ib_mr *ibmr, struct scatterlist *sg, int sg_nents, unsigned int *sg_offset); +void erdma_disassociate_ucontext(struct ib_ucontext *ibcontext); void erdma_port_event(struct erdma_dev *dev, enum ib_event_type reason); void erdma_set_mtu(struct erdma_dev *dev, u32 mtu); +int erdma_set_retrans_num(struct erdma_dev *dev, u32 retrans_num); + +struct net_device *erdma_get_netdev(struct ib_device *device, port_t port_num); +enum rdma_link_layer erdma_get_link_layer(struct ib_device *dev, + port_t port_num); +int erdma_query_pkey(struct ib_device *ibdev, port_t port, u16 index, + u16 *pkey); +int erdma_modify_cq(struct ib_cq *ibcq, u16 cq_count, u16 cq_period); + +int erdma_query_hw_stats(struct erdma_dev *dev); +const struct cpumask *erdma_get_vector_affinity(struct ib_device *ibdev, + int comp_vector); + +#include "erdma_compat.h" #endif diff --git a/drivers/infiniband/hw/erdma/kcompat.h b/drivers/infiniband/hw/erdma/kcompat.h new file mode 100644 index 0000000000000000000000000000000000000000..a76a41ba5bb5224a4de4e808bd1abfd696d7c177 --- /dev/null +++ b/drivers/infiniband/hw/erdma/kcompat.h @@ -0,0 +1,39 @@ +/* SPDX-License-Identifier: GPL-2.0 OR BSD-2-Clause */ + +/* Authors: Cheng Xu */ +/* Kai Shen */ +/* Copyright (c) 2020-2022, Alibaba Group. */ + +/* + * Copyright 2018-2021 Amazon.com, Inc. or its affiliates. All rights reserved. + */ + +#ifndef __KCOMPAT_H__ +#define __KCOMPAT_H__ + +#include +#include +#include +#include + +#define ERDMA_MAJOR_VER 0 +#define ERDMA_MEDIUM_VER 2 +#define ERDMA_MINOR_VER 38 + +#include +#define RDMA_DRIVER_ERDMA 19 + +#define upper_16_bits(n) ((u16)((n) >> 16)) +#define lower_16_bits(n) ((u16)((n) & 0xffff)) + +typedef u32 port_t; + +#include +#include +#include +#include +#include + +#define IB_QP_CREATE_IWARP_WITHOUT_CM (1 << 27) + +#endif diff --git a/include/rdma/ib_verbs.h b/include/rdma/ib_verbs.h index 62f9d126a71ad15273748be598fc607af6927119..9f1b3a8a822d82047578773f9b53c4bd5c13bd19 100644 --- a/include/rdma/ib_verbs.h +++ b/include/rdma/ib_verbs.h @@ -1159,6 +1159,8 @@ enum ib_qp_create_flags { IB_UVERBS_QP_CREATE_PCI_WRITE_END_PADDING, /* reserve bits 26-31 for low level drivers' internal use */ IB_QP_CREATE_RESERVED_START = 1 << 26, + /* reserve for eRDMA OOB connection establishment */ + IB_QP_CREATE_IWARP_WITHOUT_CM = 1 << 27, IB_QP_CREATE_RESERVED_END = 1 << 31, }; @@ -2290,6 +2292,26 @@ struct uverbs_attr_bundle; struct iw_cm_id; struct iw_cm_conn_param; +struct iw_ext_conn_param { + struct { + union { + __be32 daddr_v4; +#if IS_ENABLED(CONFIG_IPV6) + struct in6_addr daddr_v6; +#endif + }; + union { + __be32 saddr_v4; +#if IS_ENABLED(CONFIG_IPV6) + struct in6_addr saddr_v6; +#endif + }; + __be16 dport; + __u16 sport; + unsigned short family; + } sk_addr; +}; + #define INIT_RDMA_OBJ_SIZE(ib_struct, drv_struct, member) \ .size_##ib_struct = \ (sizeof(struct drv_struct) + \ diff --git a/include/uapi/rdma/erdma-abi.h b/include/uapi/rdma/erdma-abi.h index b7a0222f978f6f18a7c7735938cc553f69c2c66b..455046415983589d36c3cdf6401fc2c70742f26a 100644 --- a/include/uapi/rdma/erdma-abi.h +++ b/include/uapi/rdma/erdma-abi.h @@ -1,4 +1,4 @@ -/* SPDX-License-Identifier: ((GPL-2.0 WITH Linux-syscall-note) OR BSD-3-Clause) */ +/* SPDX-License-Identifier: ((GPL-2.0 WITH Linux-syscall-note) OR Linux-OpenIB) */ /* * Copyright (c) 2020-2022, Alibaba Group. */ @@ -40,10 +40,13 @@ struct erdma_uresp_alloc_ctx { __u32 dev_id; __u32 pad; __u32 sdb_type; - __u32 sdb_offset; + __u32 sdb_entid; __aligned_u64 sdb; __aligned_u64 rdb; __aligned_u64 cdb; + __u32 sdb_off; + __u32 rdb_off; + __u32 cdb_off; }; #endif