diff --git a/net/smc/af_smc.c b/net/smc/af_smc.c index 41cbc7c89c9d21ddcf0b6bd71e05cc9fbaa2917c..9b2c61f93539b8f242ece81531937073f92ccc0c 100644 --- a/net/smc/af_smc.c +++ b/net/smc/af_smc.c @@ -517,6 +517,13 @@ static void smc_link_save_peer_info(struct smc_link *link, memcpy(link->peer_mac, clc->r0.lcl.mac, sizeof(link->peer_mac)); link->peer_psn = ntoh24(clc->r0.psn); link->peer_mtu = clc->r0.qp_mtu; + link->credits_enable = clc->r0.init_credits ? 1 : 0; + if (link->credits_enable) { + atomic_set(&link->peer_rq_credits, clc->r0.init_credits); + // set peer rq credits watermark, if less than init_credits * 2/3, + // then credit announcement is needed. + link->peer_cr_watermark_low = max(clc->r0.init_credits * 2 / 3, 1); + } } static void smc_switch_to_fallback(struct smc_sock *smc) @@ -809,6 +816,11 @@ static int smc_connect_rdma(struct smc_sock *smc, goto connect_abort; } } else { + if (smc_llc_announce_credits(link, SMC_LLC_RESP, true)) { + reason_code = SMC_CLC_DECL_CREDITSERR; + goto connect_abort; + } + if (smcr_lgr_reg_rmbs(link, smc->conn.rmb_desc)) { reason_code = SMC_CLC_DECL_ERR_REGRMB; goto connect_abort; diff --git a/net/smc/smc_cdc.c b/net/smc/smc_cdc.c index 94503f36b9a6113513d34b8c9c880dcb172c350f..3156be1e5eb3e54164a002ffee4736b02364c0d5 100644 --- a/net/smc/smc_cdc.c +++ b/net/smc/smc_cdc.c @@ -101,25 +101,31 @@ int smc_cdc_msg_send(struct smc_connection *conn, struct smc_cdc_tx_pend *pend) { struct smc_link *link = conn->lnk; + struct smc_cdc_msg *cdc_msg = (struct smc_cdc_msg *)wr_buf; union smc_host_cursor cfed; + u8 saved_credits = 0; int rc; smc_cdc_add_pending_send(conn, pend); conn->tx_cdc_seq++; conn->local_tx_ctrl.seqno = conn->tx_cdc_seq; - smc_host_msg_to_cdc((struct smc_cdc_msg *)wr_buf, conn, &cfed); + smc_host_msg_to_cdc(cdc_msg, conn, &cfed); + if (smc_wr_rx_credits_need_announce_frequent(link)) + saved_credits = (u8)smc_wr_rx_get_credits(link); + cdc_msg->credits = saved_credits; atomic_inc(&conn->cdc_pend_tx_wr); smp_mb__after_atomic(); /* Make sure cdc_pend_tx_wr added before post */ rc = smc_wr_tx_send(link, (struct smc_wr_tx_pend_priv *)pend); - if (!rc) { + if (likely(!rc)) { smc_curs_copy(&conn->rx_curs_confirmed, &cfed, conn); conn->local_rx_ctrl.prod_flags.cons_curs_upd_req = 0; } else { conn->tx_cdc_seq--; conn->local_tx_ctrl.seqno = conn->tx_cdc_seq; + smc_wr_rx_put_credits(link, saved_credits); atomic_dec(&conn->cdc_pend_tx_wr); } @@ -430,6 +436,9 @@ static void smc_cdc_rx_handler(struct ib_wc *wc, void *buf) if (cdc->len != SMC_WR_TX_SIZE) return; /* invalid message */ + if (cdc->credits) + smc_wr_tx_put_credits(link, cdc->credits, true); + /* lookup connection */ lgr = smc_get_lgr(link); read_lock_bh(&lgr->conns_lock); diff --git a/net/smc/smc_cdc.h b/net/smc/smc_cdc.h index 696cc11f2303b95318f6750479bb8abffde3ca24..145ce7997e64207427b297d7f5c9bdd957d03704 100644 --- a/net/smc/smc_cdc.h +++ b/net/smc/smc_cdc.h @@ -47,7 +47,8 @@ struct smc_cdc_msg { union smc_cdc_cursor cons; /* piggy backed "ack" */ struct smc_cdc_producer_flags prod_flags; struct smc_cdc_conn_state_flags conn_state_flags; - u8 reserved[18]; + u8 credits; /* credits synced by every cdc msg */ + u8 reserved[17]; }; /* SMC-D cursor format */ diff --git a/net/smc/smc_clc.c b/net/smc/smc_clc.c index 5ee5b2ce29a6e2c99d4f745b6cd435079a385489..76fe3d5eb27a51899e2dc8211edf548b7db119ea 100644 --- a/net/smc/smc_clc.c +++ b/net/smc/smc_clc.c @@ -699,9 +699,12 @@ static int smc_clc_send_confirm_accept(struct smc_sock *smc, switch (clc->hdr.type) { case SMC_CLC_ACCEPT: clc->r0.qp_mtu = link->path_mtu; + clc->r0.init_credits = (u8)link->wr_rx_cnt; break; case SMC_CLC_CONFIRM: clc->r0.qp_mtu = min(link->path_mtu, link->peer_mtu); + clc->r0.init_credits = + link->credits_enable ? (u8)link->wr_rx_cnt : 0; break; } clc->r0.rmbe_size = conn->rmbe_size_short; diff --git a/net/smc/smc_clc.h b/net/smc/smc_clc.h index c579d1d5995a9e94c608eb4f3db521c97d471f5d..d152d134685f14b339edb61ccc7876ae4eba93e9 100644 --- a/net/smc/smc_clc.h +++ b/net/smc/smc_clc.h @@ -53,6 +53,7 @@ #define SMC_CLC_DECL_ERR_RTOK 0x09990001 /* rtoken handling failed */ #define SMC_CLC_DECL_ERR_RDYLNK 0x09990002 /* ib ready link failed */ #define SMC_CLC_DECL_ERR_REGRMB 0x09990003 /* reg rmb failed */ +#define SMC_CLC_DECL_CREDITSERR 0x09990004 /* announce credits failed */ #define SMC_FIRST_CONTACT_MASK 0b10 /* first contact bit within typev2 */ @@ -178,7 +179,7 @@ struct smcr_clc_msg_accept_confirm { /* SMCR accept/confirm */ u8 qp_mtu : 4, rmbe_size : 4; #endif - u8 reserved; + u8 init_credits; /* QP rq init credits for rq flowctrl */ __be64 rmb_dma_addr; /* RMB virtual address */ u8 reserved2; u8 psn[3]; /* packet sequence number */ diff --git a/net/smc/smc_core.h b/net/smc/smc_core.h index 9364d0f35ccecf7352ce59a9ae26ef2b7d7df686..c0f00f7198c3e6ed2db35ad54458bdf2e930b7f8 100644 --- a/net/smc/smc_core.h +++ b/net/smc/smc_core.h @@ -18,7 +18,12 @@ #include "smc.h" #include "smc_ib.h" -#define SMC_RMBS_PER_LGR_MAX 255 /* max. # of RMBs per link group */ +#define SMC_RMBS_PER_LGR_MAX 32 /* max. # of RMBs per link group. Correspondingly, + * SMC_WR_BUF_CNT should not be less than 2 * + * SMC_RMBS_PER_LGR_MAX, since every connection at + * least has two rq/sq credits in average, otherwise + * may result in waiting for credits in sending process. + */ struct smc_lgr_list { /* list of link group definition */ struct list_head list; @@ -72,6 +77,8 @@ struct smc_rdma_wr { /* work requests per message #define SMC_LGR_ID_SIZE 4 +#define SMC_LINKFLAG_ANNOUNCE_PENDING 0 + struct smc_link { struct smc_ib_device *smcibdev; /* ib-device */ u8 ibport; /* port - values 1 | 2 */ @@ -110,6 +117,15 @@ struct smc_link { atomic_t wr_reg_refcnt; /* reg refs to link */ enum smc_wr_reg_state wr_reg_state; /* state of wr_reg request */ + atomic_t peer_rq_credits; /* credits for peer rq flowctrl */ + atomic_t local_rq_credits; /* credits for local rq flowctrl */ + u8 credits_enable; /* credits enable flag, set when negotiation */ + u8 local_cr_watermark_high; /* local rq credits watermark */ + u8 peer_cr_watermark_low; /* peer rq credits watermark */ + u8 credits_update_limit; /* credits update limit for cdc msg */ + struct work_struct credits_announce_work; /* work for credits announcement */ + unsigned long flags; /* link flags, SMC_LINKFLAG_ANNOUNCE_PENDING .etc */ + u8 gid[SMC_GID_SIZE];/* gid matching used vlan id*/ u8 sgid_index; /* gid index for vlan id */ u32 peer_qpn; /* QP number of peer */ diff --git a/net/smc/smc_ib.c b/net/smc/smc_ib.c index f1ffbd414602ee7f9dfc42f4cfdc73151031ed80..4ec4c57a80b89ff91ea59df001aa411a2e36d9df 100644 --- a/net/smc/smc_ib.c +++ b/net/smc/smc_ib.c @@ -122,10 +122,11 @@ int smc_ib_ready_link(struct smc_link *lnk) if (rc) goto out; smc_wr_remember_qp_attr(lnk); - rc = ib_req_notify_cq(lnk->smcibdev->roce_cq_recv, - IB_CQ_SOLICITED_MASK); + rc = ib_req_notify_cq(lnk->smcibdev->roce_cq_send, + IB_CQ_NEXT_COMP | IB_CQ_REPORT_MISSED_EVENTS); if (rc) goto out; + rc = smc_wr_rx_post_init(lnk); if (rc) goto out; @@ -365,10 +366,12 @@ int smc_ib_create_queue_pair(struct smc_link *lnk) .srq = NULL, .cap = { /* include unsolicited rdma_writes as well, - * there are max. 2 RDMA_WRITE per 1 WR_SEND + * there are max. 2 RDMA_WRITE per 1 WR_SEND. + * RDMA_WRITE consumes send queue entities, + * without recv queue entities. */ .max_send_wr = SMC_WR_BUF_CNT * 3, - .max_recv_wr = SMC_WR_BUF_CNT * 3, + .max_recv_wr = SMC_WR_BUF_CNT, .max_send_sge = SMC_IB_MAX_SEND_SGE, .max_recv_sge = 1, }, diff --git a/net/smc/smc_llc.c b/net/smc/smc_llc.c index 0ef15f8fba902062aab7b81bb7c1410ead7763f8..e45cd3e7e5c240abe10a75715f7704d34f679e7e 100644 --- a/net/smc/smc_llc.c +++ b/net/smc/smc_llc.c @@ -67,7 +67,8 @@ struct smc_llc_msg_add_link { /* type 0x02 */ reserved3 : 4; #endif u8 initial_psn[3]; - u8 reserved[8]; + u8 init_credits; /* QP rq init credits for rq flowctrl */ + u8 reserved[7]; }; struct smc_llc_msg_add_link_cont_rt { @@ -135,6 +136,12 @@ struct smc_llc_msg_delete_rkey { /* type 0x09 */ u8 reserved2[4]; }; +struct smc_llc_msg_announce_credits { /* type 0x0A */ + struct smc_llc_hdr hd; + u8 credits; + u8 reserved[39]; +}; + union smc_llc_msg { struct smc_llc_msg_confirm_link confirm_link; struct smc_llc_msg_add_link add_link; @@ -145,6 +152,7 @@ union smc_llc_msg { struct smc_llc_msg_delete_rkey delete_rkey; struct smc_llc_msg_test_link test_link; + struct smc_llc_msg_announce_credits announce_credits; struct { struct smc_llc_hdr hdr; u8 data[SMC_LLC_DATA_LEN]; @@ -584,6 +592,46 @@ static int smc_llc_send_test_link(struct smc_link *link, u8 user_data[16]) return rc; } +/* send credits announce request or response */ +int smc_llc_announce_credits(struct smc_link *link, + enum smc_llc_reqresp reqresp, bool force) +{ + struct smc_llc_msg_announce_credits *announce_credits; + struct smc_wr_tx_pend_priv *pend; + struct smc_wr_buf *wr_buf; + int rc; + u8 saved_credits = 0; + + if (!link->credits_enable || + (!force && !smc_wr_rx_credits_need_announce(link))) + return 0; + + saved_credits = (u8)smc_wr_rx_get_credits(link); + if (!saved_credits) + /* maybe synced by cdc msg */ + return 0; + + rc = smc_llc_add_pending_send(link, &wr_buf, &pend); + if (rc) { + smc_wr_rx_put_credits(link, saved_credits); + return rc; + } + + announce_credits = (struct smc_llc_msg_announce_credits *)wr_buf; + memset(announce_credits, 0, sizeof(*announce_credits)); + announce_credits->hd.common.type = SMC_LLC_ANNOUNCE_CREDITS; + announce_credits->hd.length = sizeof(struct smc_llc_msg_announce_credits); + if (reqresp == SMC_LLC_RESP) + announce_credits->hd.flags |= SMC_LLC_FLAG_RESP; + announce_credits->credits = saved_credits; + /* send llc message */ + rc = smc_wr_tx_send(link, pend); + if (rc) + smc_wr_rx_put_credits(link, saved_credits); + + return rc; +} + /* schedule an llc send on link, may wait for buffers */ static int smc_llc_send_message(struct smc_link *link, void *llcbuf) { @@ -862,6 +910,13 @@ static void smc_llc_save_add_link_info(struct smc_link *link, memcpy(link->peer_mac, add_llc->sender_mac, ETH_ALEN); link->peer_psn = ntoh24(add_llc->initial_psn); link->peer_mtu = add_llc->qp_mtu; + link->credits_enable = add_llc->init_credits ? 1 : 0; + if (link->credits_enable) { + atomic_set(&link->peer_rq_credits, add_llc->init_credits); + // set peer rq credits watermark, if less than init_credits * 2/3, + // then credit announcement is needed. + link->peer_cr_watermark_low = max(add_llc->init_credits * 2 / 3, 1); + } } /* as an SMC client, process an add link request */ @@ -1620,6 +1675,10 @@ static void smc_llc_event_handler(struct smc_llc_qentry *qentry) smc_llc_flow_stop(lgr, &lgr->llc_flow_rmt); } return; + case SMC_LLC_ANNOUNCE_CREDITS: + if (smc_link_active(link)) + smc_wr_tx_put_credits(link, llc->announce_credits.credits, true); + break; default: smc_llc_protocol_violation(lgr, llc->raw.hdr.common.type); break; @@ -1688,6 +1747,10 @@ static void smc_llc_rx_response(struct smc_link *link, case SMC_LLC_CONFIRM_RKEY_CONT: /* not used because max links is 3 */ break; + case SMC_LLC_ANNOUNCE_CREDITS: + if (smc_link_active(link)) + smc_wr_tx_put_credits(link, qentry->msg.announce_credits.credits, true); + break; default: smc_llc_protocol_violation(link->lgr, llc_type); break; @@ -1774,6 +1837,27 @@ static void smc_llc_testlink_work(struct work_struct *work) schedule_delayed_work(&link->llc_testlink_wrk, next_interval); } +static void smc_llc_announce_credits_work(struct work_struct *work) +{ + struct smc_link *link = container_of(work, + struct smc_link, credits_announce_work); + int rc, retry = 0, agains = 0; + +again: + do { + rc = smc_llc_announce_credits(link, SMC_LLC_RESP, false); + } while ((rc == -EBUSY) && smc_link_sendable(link) && + (retry++ < SMC_LLC_ANNOUNCE_CR_MAX_RETRY)); + + if (smc_wr_rx_credits_need_announce(link) && + smc_link_sendable(link) && agains <= 5 && !rc) { + agains++; + goto again; + } + + clear_bit(SMC_LINKFLAG_ANNOUNCE_PENDING, &link->flags); +} + void smc_llc_lgr_init(struct smc_link_group *lgr, struct smc_sock *smc) { struct net *net = sock_net(smc->clcsock->sk); @@ -1809,6 +1893,7 @@ int smc_llc_link_init(struct smc_link *link) { init_completion(&link->llc_testlink_resp); INIT_DELAYED_WORK(&link->llc_testlink_wrk, smc_llc_testlink_work); + INIT_WORK(&link->credits_announce_work, smc_llc_announce_credits_work); return 0; } @@ -1840,6 +1925,7 @@ void smc_llc_link_clear(struct smc_link *link, bool log) link->smcibdev->ibdev->name, link->ibport); complete(&link->llc_testlink_resp); cancel_delayed_work_sync(&link->llc_testlink_wrk); + cancel_work_sync(&link->credits_announce_work); } /* register a new rtoken at the remote peer (for all links) */ @@ -1954,6 +2040,10 @@ static struct smc_wr_rx_handler smc_llc_rx_handlers[] = { .handler = smc_llc_rx_handler, .type = SMC_LLC_DELETE_RKEY }, + { + .handler = smc_llc_rx_handler, + .type = SMC_LLC_ANNOUNCE_CREDITS + }, { .handler = NULL, } diff --git a/net/smc/smc_llc.h b/net/smc/smc_llc.h index cc00a2ec4e92e8060bce1926169832f7f7d96eb2..7694ff573e3496f69783005ce8be83f855e0b153 100644 --- a/net/smc/smc_llc.h +++ b/net/smc/smc_llc.h @@ -20,6 +20,8 @@ #define SMC_LLC_WAIT_FIRST_TIME (5 * HZ) #define SMC_LLC_WAIT_TIME (2 * HZ) +#define SMC_LLC_ANNOUNCE_CR_MAX_RETRY (1) + enum smc_llc_reqresp { SMC_LLC_REQ, SMC_LLC_RESP @@ -34,6 +36,7 @@ enum smc_llc_msg_type { SMC_LLC_TEST_LINK = 0x07, SMC_LLC_CONFIRM_RKEY_CONT = 0x08, SMC_LLC_DELETE_RKEY = 0x09, + SMC_LLC_ANNOUNCE_CREDITS = 0X0A, }; #define smc_link_downing(state) \ @@ -77,6 +80,8 @@ int smc_llc_send_add_link(struct smc_link *link, u8 mac[], u8 gid[], int smc_llc_send_delete_link(struct smc_link *link, u8 link_del_id, enum smc_llc_reqresp reqresp, bool orderly, u32 reason); +int smc_llc_announce_credits(struct smc_link *link, + enum smc_llc_reqresp reqresp, bool force); void smc_llc_srv_delete_link_local(struct smc_link *link, u8 del_link_id); void smc_llc_lgr_init(struct smc_link_group *lgr, struct smc_sock *smc); void smc_llc_lgr_clear(struct smc_link_group *lgr); diff --git a/net/smc/smc_wr.c b/net/smc/smc_wr.c index 5a81f8c9ebf9050297163abe3e8093c1de3aabc0..ba47f261bb0604be5c403ed0bc773766e27b5437 100644 --- a/net/smc/smc_wr.c +++ b/net/smc/smc_wr.c @@ -115,7 +115,8 @@ static inline void smc_wr_tx_process_cqe(struct ib_wc *wc) } if (pnd_snd.handler) pnd_snd.handler(&pnd_snd.priv, link, wc->status); - wake_up(&link->wr_tx_wait); + if (wq_has_sleeper(&link->wr_tx_wait)) + wake_up(&link->wr_tx_wait); } static void smc_wr_tx_tasklet_fn(unsigned long data) @@ -158,11 +159,16 @@ static inline int smc_wr_tx_get_free_slot_index(struct smc_link *link, u32 *idx) *idx = link->wr_tx_cnt; if (!smc_link_sendable(link)) return -ENOLINK; + + if (!smc_wr_tx_get_credit(link)) + return -EBUSY; + for_each_clear_bit(*idx, link->wr_tx_mask, link->wr_tx_cnt) { if (!test_and_set_bit(*idx, link->wr_tx_mask)) return 0; } *idx = link->wr_tx_cnt; + smc_wr_tx_put_credits(link, 1, false); return -EBUSY; } @@ -241,7 +247,7 @@ int smc_wr_tx_put_slot(struct smc_link *link, memset(&link->wr_tx_bufs[idx], 0, sizeof(link->wr_tx_bufs[idx])); test_and_clear_bit(idx, link->wr_tx_mask); - wake_up(&link->wr_tx_wait); + smc_wr_tx_put_credits(link, 1, true); return 1; } @@ -256,8 +262,6 @@ int smc_wr_tx_send(struct smc_link *link, struct smc_wr_tx_pend_priv *priv) struct smc_wr_tx_pend *pend; int rc; - ib_req_notify_cq(link->smcibdev->roce_cq_send, - IB_CQ_NEXT_COMP | IB_CQ_REPORT_MISSED_EVENTS); pend = container_of(priv, struct smc_wr_tx_pend, priv); rc = ib_post_send(link->roce_qp, &link->wr_tx_ibs[pend->idx], NULL); if (rc) { @@ -405,6 +409,12 @@ static inline void smc_wr_rx_process_cqes(struct ib_wc wc[], int num) break; } } + + if (smc_wr_rx_credits_need_announce(link) && + !test_bit(SMC_LINKFLAG_ANNOUNCE_PENDING, &link->flags)) { + set_bit(SMC_LINKFLAG_ANNOUNCE_PENDING, &link->flags); + schedule_work(&link->credits_announce_work); + } } } @@ -447,6 +457,8 @@ int smc_wr_rx_post_init(struct smc_link *link) for (i = 0; i < link->wr_rx_cnt; i++) rc = smc_wr_rx_post(link); + // credits have already been announced to peer + atomic_set(&link->local_rq_credits, 0); return rc; } @@ -481,7 +493,7 @@ void smc_wr_remember_qp_attr(struct smc_link *lnk) lnk->wr_tx_cnt = min_t(size_t, SMC_WR_BUF_CNT, lnk->qp_attr.cap.max_send_wr); - lnk->wr_rx_cnt = min_t(size_t, SMC_WR_BUF_CNT * 3, + lnk->wr_rx_cnt = min_t(size_t, SMC_WR_BUF_CNT, lnk->qp_attr.cap.max_recv_wr); } @@ -592,7 +604,7 @@ int smc_wr_alloc_link_mem(struct smc_link *link) link->wr_tx_bufs = kcalloc(SMC_WR_BUF_CNT, SMC_WR_BUF_SIZE, GFP_KERNEL); if (!link->wr_tx_bufs) goto no_mem; - link->wr_rx_bufs = kcalloc(SMC_WR_BUF_CNT * 3, SMC_WR_BUF_SIZE, + link->wr_rx_bufs = kcalloc(SMC_WR_BUF_CNT, SMC_WR_BUF_SIZE, GFP_KERNEL); if (!link->wr_rx_bufs) goto no_mem_wr_tx_bufs; @@ -600,7 +612,7 @@ int smc_wr_alloc_link_mem(struct smc_link *link) GFP_KERNEL); if (!link->wr_tx_ibs) goto no_mem_wr_rx_bufs; - link->wr_rx_ibs = kcalloc(SMC_WR_BUF_CNT * 3, + link->wr_rx_ibs = kcalloc(SMC_WR_BUF_CNT, sizeof(link->wr_rx_ibs[0]), GFP_KERNEL); if (!link->wr_rx_ibs) @@ -619,7 +631,7 @@ int smc_wr_alloc_link_mem(struct smc_link *link) GFP_KERNEL); if (!link->wr_tx_sges) goto no_mem_wr_tx_rdma_sges; - link->wr_rx_sges = kcalloc(SMC_WR_BUF_CNT * 3, + link->wr_rx_sges = kcalloc(SMC_WR_BUF_CNT, sizeof(link->wr_rx_sges[0]), GFP_KERNEL); if (!link->wr_rx_sges) @@ -708,6 +720,16 @@ int smc_wr_create_link(struct smc_link *lnk) atomic_set(&lnk->wr_tx_refcnt, 0); init_waitqueue_head(&lnk->wr_reg_wait); atomic_set(&lnk->wr_reg_refcnt, 0); + atomic_set(&lnk->peer_rq_credits, 0); + atomic_set(&lnk->local_rq_credits, 0); + lnk->flags = 0; + lnk->local_cr_watermark_high = max(lnk->wr_rx_cnt / 3, 1U); + lnk->peer_cr_watermark_low = 0; + + /* if credits accumlated less than 10% of wr_rx_cnt(at least 5), + * will not be announced by cdc msg. + */ + lnk->credits_update_limit = max(lnk->wr_rx_cnt / 10, 5U); return rc; dma_unmap: diff --git a/net/smc/smc_wr.h b/net/smc/smc_wr.h index cb58e60078f57aabef19d4a8ad29a184246bffe0..119f1aa0076ee4b975f0afc59bb7432ca740995d 100644 --- a/net/smc/smc_wr.h +++ b/net/smc/smc_wr.h @@ -19,7 +19,12 @@ #include "smc.h" #include "smc_core.h" -#define SMC_WR_BUF_CNT 16 /* # of ctrl buffers per link */ +#define SMC_WR_BUF_CNT 64 /* # of ctrl buffers per link, SMC_WR_BUF_CNT + * should not be less than 2 * SMC_RMBS_PER_LGR_MAX, + * since every connection at least has two rq/sq + * credits in average, otherwise may result in + * waiting for credits in sending process. + */ #define SMC_WR_TX_WAIT_FREE_SLOT_TIME (10 * HZ) @@ -83,6 +88,62 @@ static inline void smc_wr_wakeup_reg_wait(struct smc_link *lnk) wake_up(&lnk->wr_reg_wait); } +// get one tx credit, and peer rq credits dec +static inline int smc_wr_tx_get_credit(struct smc_link *link) +{ + return !link->credits_enable || atomic_dec_if_positive(&link->peer_rq_credits) >= 0; +} + +// put tx credits, when some failures occurred after tx credits got +// or receive announce credits msgs +static inline void smc_wr_tx_put_credits(struct smc_link *link, int credits, bool wakeup) +{ + if (link->credits_enable && credits) { + atomic_add(credits, &link->peer_rq_credits); + if (wakeup && wq_has_sleeper(&link->wr_tx_wait)) + wake_up_nr(&link->wr_tx_wait, credits); + } +} + +// to check whether peer rq credits is lower than watermark. +static inline int smc_wr_tx_credits_need_announce(struct smc_link *link) +{ + return link->credits_enable && + atomic_read(&link->peer_rq_credits) <= link->peer_cr_watermark_low; +} + +// get local rq credits and set credits to zero. +// may called when announcing credits +static inline int smc_wr_rx_get_credits(struct smc_link *link) +{ + return link->credits_enable ? atomic_fetch_and(0, &link->local_rq_credits) : 0; +} + +// called when post_recv a rqe +static inline void smc_wr_rx_put_credits(struct smc_link *link, int credits) +{ + if (link->credits_enable && credits) + atomic_add(credits, &link->local_rq_credits); +} + +// to check whether local rq credits is higher than watermark. +static inline int smc_wr_rx_credits_need_announce(struct smc_link *link) +{ + return link->credits_enable && + atomic_read(&link->local_rq_credits) >= link->local_cr_watermark_high; +} + +static inline int smc_wr_rx_credits_need_announce_frequent(struct smc_link *link) +{ + /* announce when local rq credits accumulated more than credits_update_limit, or + * peer rq credits is empty. As peer credits empty and local credits is less than + * credits_update_limit, may results in credits deadlock. + */ + return link->credits_enable && + (atomic_read(&link->local_rq_credits) >= link->credits_update_limit || + !atomic_read(&link->peer_rq_credits)); +} + /* post a new receive work request to fill a completed old work request entry */ static inline int smc_wr_rx_post(struct smc_link *link) { @@ -95,6 +156,8 @@ static inline int smc_wr_rx_post(struct smc_link *link) index = do_div(temp_wr_id, link->wr_rx_cnt); link->wr_rx_ibs[index].wr_id = wr_id; rc = ib_post_recv(link->roce_qp, &link->wr_rx_ibs[index], NULL); + if (!rc) + smc_wr_rx_put_credits(link, 1); return rc; }