diff options
-rw-r--r-- | net/smc/af_smc.c | 66 | ||||
-rw-r--r-- | net/smc/smc_clc.c | 8 | ||||
-rw-r--r-- | net/smc/smc_clc.h | 2 | ||||
-rw-r--r-- | net/smc/smc_core.c | 213 | ||||
-rw-r--r-- | net/smc/smc_core.h | 10 | ||||
-rw-r--r-- | net/smc/smc_ib.c | 15 | ||||
-rw-r--r-- | net/smc/smc_llc.c | 33 | ||||
-rw-r--r-- | net/smc/smc_rx.c | 90 | ||||
-rw-r--r-- | net/smc/smc_tx.c | 9 |
9 files changed, 328 insertions, 118 deletions
diff --git a/net/smc/af_smc.c b/net/smc/af_smc.c index 9497a3b9ad09..6e70d9c10b78 100644 --- a/net/smc/af_smc.c +++ b/net/smc/af_smc.c @@ -487,6 +487,29 @@ static void smc_copy_sock_settings_to_smc(struct smc_sock *smc) smc_copy_sock_settings(&smc->sk, smc->clcsock->sk, SK_FLAGS_CLC_TO_SMC); } +/* register the new vzalloced sndbuf on all links */ +static int smcr_lgr_reg_sndbufs(struct smc_link *link, + struct smc_buf_desc *snd_desc) +{ + struct smc_link_group *lgr = link->lgr; + int i, rc = 0; + + if (!snd_desc->is_vm) + return -EINVAL; + + /* protect against parallel smcr_link_reg_buf() */ + mutex_lock(&lgr->llc_conf_mutex); + for (i = 0; i < SMC_LINKS_PER_LGR_MAX; i++) { + if (!smc_link_active(&lgr->lnk[i])) + continue; + rc = smcr_link_reg_buf(&lgr->lnk[i], snd_desc); + if (rc) + break; + } + mutex_unlock(&lgr->llc_conf_mutex); + return rc; +} + /* register the new rmb on all links */ static int smcr_lgr_reg_rmbs(struct smc_link *link, struct smc_buf_desc *rmb_desc) @@ -498,13 +521,13 @@ static int smcr_lgr_reg_rmbs(struct smc_link *link, if (rc) return rc; /* protect against parallel smc_llc_cli_rkey_exchange() and - * parallel smcr_link_reg_rmb() + * parallel smcr_link_reg_buf() */ mutex_lock(&lgr->llc_conf_mutex); for (i = 0; i < SMC_LINKS_PER_LGR_MAX; i++) { if (!smc_link_active(&lgr->lnk[i])) continue; - rc = smcr_link_reg_rmb(&lgr->lnk[i], rmb_desc); + rc = smcr_link_reg_buf(&lgr->lnk[i], rmb_desc); if (rc) goto out; } @@ -550,8 +573,15 @@ static int smcr_clnt_conf_first_link(struct smc_sock *smc) smc_wr_remember_qp_attr(link); - if (smcr_link_reg_rmb(link, smc->conn.rmb_desc)) - return SMC_CLC_DECL_ERR_REGRMB; + /* reg the sndbuf if it was vzalloced */ + if (smc->conn.sndbuf_desc->is_vm) { + if (smcr_link_reg_buf(link, smc->conn.sndbuf_desc)) + return SMC_CLC_DECL_ERR_REGBUF; + } + + /* reg the rmb */ + if (smcr_link_reg_buf(link, smc->conn.rmb_desc)) + return SMC_CLC_DECL_ERR_REGBUF; /* confirm_rkey is implicit on 1st contact */ smc->conn.rmb_desc->is_conf_rkey = true; @@ -1221,8 +1251,15 @@ static int smc_connect_rdma(struct smc_sock *smc, goto connect_abort; } } else { + /* reg sendbufs if they were vzalloced */ + if (smc->conn.sndbuf_desc->is_vm) { + if (smcr_lgr_reg_sndbufs(link, smc->conn.sndbuf_desc)) { + reason_code = SMC_CLC_DECL_ERR_REGBUF; + goto connect_abort; + } + } if (smcr_lgr_reg_rmbs(link, smc->conn.rmb_desc)) { - reason_code = SMC_CLC_DECL_ERR_REGRMB; + reason_code = SMC_CLC_DECL_ERR_REGBUF; goto connect_abort; } } @@ -1749,8 +1786,15 @@ static int smcr_serv_conf_first_link(struct smc_sock *smc) struct smc_llc_qentry *qentry; int rc; - if (smcr_link_reg_rmb(link, smc->conn.rmb_desc)) - return SMC_CLC_DECL_ERR_REGRMB; + /* reg the sndbuf if it was vzalloced*/ + if (smc->conn.sndbuf_desc->is_vm) { + if (smcr_link_reg_buf(link, smc->conn.sndbuf_desc)) + return SMC_CLC_DECL_ERR_REGBUF; + } + + /* reg the rmb */ + if (smcr_link_reg_buf(link, smc->conn.rmb_desc)) + return SMC_CLC_DECL_ERR_REGBUF; /* send CONFIRM LINK request to client over the RoCE fabric */ rc = smc_llc_send_confirm_link(link, SMC_LLC_REQ); @@ -2109,8 +2153,14 @@ static int smc_listen_rdma_reg(struct smc_sock *new_smc, bool local_first) struct smc_connection *conn = &new_smc->conn; if (!local_first) { + /* reg sendbufs if they were vzalloced */ + if (conn->sndbuf_desc->is_vm) { + if (smcr_lgr_reg_sndbufs(conn->lnk, + conn->sndbuf_desc)) + return SMC_CLC_DECL_ERR_REGBUF; + } if (smcr_lgr_reg_rmbs(conn->lnk, conn->rmb_desc)) - return SMC_CLC_DECL_ERR_REGRMB; + return SMC_CLC_DECL_ERR_REGBUF; } return 0; diff --git a/net/smc/smc_clc.c b/net/smc/smc_clc.c index f9f3f59c79de..1472f31480d8 100644 --- a/net/smc/smc_clc.c +++ b/net/smc/smc_clc.c @@ -1034,7 +1034,7 @@ static int smc_clc_send_confirm_accept(struct smc_sock *smc, ETH_ALEN); hton24(clc->r0.qpn, link->roce_qp->qp_num); clc->r0.rmb_rkey = - htonl(conn->rmb_desc->mr_rx[link->link_idx]->rkey); + htonl(conn->rmb_desc->mr[link->link_idx]->rkey); clc->r0.rmbe_idx = 1; /* for now: 1 RMB = 1 RMBE */ clc->r0.rmbe_alert_token = htonl(conn->alert_token_local); switch (clc->hdr.type) { @@ -1046,8 +1046,10 @@ static int smc_clc_send_confirm_accept(struct smc_sock *smc, break; } clc->r0.rmbe_size = conn->rmbe_size_short; - clc->r0.rmb_dma_addr = cpu_to_be64((u64)sg_dma_address - (conn->rmb_desc->sgt[link->link_idx].sgl)); + clc->r0.rmb_dma_addr = conn->rmb_desc->is_vm ? + cpu_to_be64((uintptr_t)conn->rmb_desc->cpu_addr) : + cpu_to_be64((u64)sg_dma_address + (conn->rmb_desc->sgt[link->link_idx].sgl)); hton24(clc->r0.psn, link->psn_initial); if (version == SMC_V1) { clc->hdr.length = htons(SMCR_CLC_ACCEPT_CONFIRM_LEN); diff --git a/net/smc/smc_clc.h b/net/smc/smc_clc.h index 83f02f131fc0..5fee545c9a10 100644 --- a/net/smc/smc_clc.h +++ b/net/smc/smc_clc.h @@ -62,7 +62,7 @@ #define SMC_CLC_DECL_INTERR 0x09990000 /* internal error */ #define SMC_CLC_DECL_ERR_RTOK 0x09990001 /* rtoken handling failed */ #define SMC_CLC_DECL_ERR_RDYLNK 0x09990002 /* ib ready link failed */ -#define SMC_CLC_DECL_ERR_REGRMB 0x09990003 /* reg rmb failed */ +#define SMC_CLC_DECL_ERR_REGBUF 0x09990003 /* reg rdma bufs failed */ #define SMC_FIRST_CONTACT_MASK 0b10 /* first contact bit within typev2 */ diff --git a/net/smc/smc_core.c b/net/smc/smc_core.c index 86afbbce4fca..f26770c29d78 100644 --- a/net/smc/smc_core.c +++ b/net/smc/smc_core.c @@ -1087,34 +1087,37 @@ err_out: return NULL; } -static void smcr_buf_unuse(struct smc_buf_desc *rmb_desc, +static void smcr_buf_unuse(struct smc_buf_desc *buf_desc, bool is_rmb, struct smc_link_group *lgr) { + struct mutex *lock; /* lock buffer list */ int rc; - if (rmb_desc->is_conf_rkey && !list_empty(&lgr->list)) { + if (is_rmb && buf_desc->is_conf_rkey && !list_empty(&lgr->list)) { /* unregister rmb with peer */ rc = smc_llc_flow_initiate(lgr, SMC_LLC_FLOW_RKEY); if (!rc) { /* protect against smc_llc_cli_rkey_exchange() */ mutex_lock(&lgr->llc_conf_mutex); - smc_llc_do_delete_rkey(lgr, rmb_desc); - rmb_desc->is_conf_rkey = false; + smc_llc_do_delete_rkey(lgr, buf_desc); + buf_desc->is_conf_rkey = false; mutex_unlock(&lgr->llc_conf_mutex); smc_llc_flow_stop(lgr, &lgr->llc_flow_lcl); } } - if (rmb_desc->is_reg_err) { + if (buf_desc->is_reg_err) { /* buf registration failed, reuse not possible */ - mutex_lock(&lgr->rmbs_lock); - list_del(&rmb_desc->list); - mutex_unlock(&lgr->rmbs_lock); + lock = is_rmb ? &lgr->rmbs_lock : + &lgr->sndbufs_lock; + mutex_lock(lock); + list_del(&buf_desc->list); + mutex_unlock(lock); - smc_buf_free(lgr, true, rmb_desc); + smc_buf_free(lgr, is_rmb, buf_desc); } else { - rmb_desc->used = 0; - memset(rmb_desc->cpu_addr, 0, rmb_desc->len); + buf_desc->used = 0; + memset(buf_desc->cpu_addr, 0, buf_desc->len); } } @@ -1122,15 +1125,23 @@ static void smc_buf_unuse(struct smc_connection *conn, struct smc_link_group *lgr) { if (conn->sndbuf_desc) { - conn->sndbuf_desc->used = 0; - memset(conn->sndbuf_desc->cpu_addr, 0, conn->sndbuf_desc->len); + if (!lgr->is_smcd && conn->sndbuf_desc->is_vm) { + smcr_buf_unuse(conn->sndbuf_desc, false, lgr); + } else { + conn->sndbuf_desc->used = 0; + memset(conn->sndbuf_desc->cpu_addr, 0, + conn->sndbuf_desc->len); + } } - if (conn->rmb_desc && lgr->is_smcd) { - conn->rmb_desc->used = 0; - memset(conn->rmb_desc->cpu_addr, 0, conn->rmb_desc->len + - sizeof(struct smcd_cdc_msg)); - } else if (conn->rmb_desc) { - smcr_buf_unuse(conn->rmb_desc, lgr); + if (conn->rmb_desc) { + if (!lgr->is_smcd) { + smcr_buf_unuse(conn->rmb_desc, true, lgr); + } else { + conn->rmb_desc->used = 0; + memset(conn->rmb_desc->cpu_addr, 0, + conn->rmb_desc->len + + sizeof(struct smcd_cdc_msg)); + } } } @@ -1178,20 +1189,21 @@ lgr_put: static void smcr_buf_unmap_link(struct smc_buf_desc *buf_desc, bool is_rmb, struct smc_link *lnk) { - if (is_rmb) + if (is_rmb || buf_desc->is_vm) buf_desc->is_reg_mr[lnk->link_idx] = false; if (!buf_desc->is_map_ib[lnk->link_idx]) return; - if (is_rmb) { - if (buf_desc->mr_rx[lnk->link_idx]) { - smc_ib_put_memory_region( - buf_desc->mr_rx[lnk->link_idx]); - buf_desc->mr_rx[lnk->link_idx] = NULL; - } + + if ((is_rmb || buf_desc->is_vm) && + buf_desc->mr[lnk->link_idx]) { + smc_ib_put_memory_region(buf_desc->mr[lnk->link_idx]); + buf_desc->mr[lnk->link_idx] = NULL; + } + if (is_rmb) smc_ib_buf_unmap_sg(lnk, buf_desc, DMA_FROM_DEVICE); - } else { + else smc_ib_buf_unmap_sg(lnk, buf_desc, DMA_TO_DEVICE); - } + sg_free_table(&buf_desc->sgt[lnk->link_idx]); buf_desc->is_map_ib[lnk->link_idx] = false; } @@ -1280,8 +1292,10 @@ static void smcr_buf_free(struct smc_link_group *lgr, bool is_rmb, for (i = 0; i < SMC_LINKS_PER_LGR_MAX; i++) smcr_buf_unmap_link(buf_desc, is_rmb, &lgr->lnk[i]); - if (buf_desc->pages) + if (!buf_desc->is_vm && buf_desc->pages) __free_pages(buf_desc->pages, buf_desc->order); + else if (buf_desc->is_vm && buf_desc->cpu_addr) + vfree(buf_desc->cpu_addr); kfree(buf_desc); } @@ -1993,26 +2007,50 @@ static inline int smc_rmb_wnd_update_limit(int rmbe_size) return max_t(int, rmbe_size / 10, SOCK_MIN_SNDBUF / 2); } -/* map an rmb buf to a link */ +/* map an buf to a link */ static int smcr_buf_map_link(struct smc_buf_desc *buf_desc, bool is_rmb, struct smc_link *lnk) { - int rc; + int rc, i, nents, offset, buf_size, size, access_flags; + struct scatterlist *sg; + void *buf; if (buf_desc->is_map_ib[lnk->link_idx]) return 0; - rc = sg_alloc_table(&buf_desc->sgt[lnk->link_idx], 1, GFP_KERNEL); + if (buf_desc->is_vm) { + buf = buf_desc->cpu_addr; + buf_size = buf_desc->len; + offset = offset_in_page(buf_desc->cpu_addr); + nents = PAGE_ALIGN(buf_size + offset) / PAGE_SIZE; + } else { + nents = 1; + } + + rc = sg_alloc_table(&buf_desc->sgt[lnk->link_idx], nents, GFP_KERNEL); if (rc) return rc; - sg_set_buf(buf_desc->sgt[lnk->link_idx].sgl, - buf_desc->cpu_addr, buf_desc->len); + + if (buf_desc->is_vm) { + /* virtually contiguous buffer */ + for_each_sg(buf_desc->sgt[lnk->link_idx].sgl, sg, nents, i) { + size = min_t(int, PAGE_SIZE - offset, buf_size); + sg_set_page(sg, vmalloc_to_page(buf), size, offset); + buf += size / sizeof(*buf); + buf_size -= size; + offset = 0; + } + } else { + /* physically contiguous buffer */ + sg_set_buf(buf_desc->sgt[lnk->link_idx].sgl, + buf_desc->cpu_addr, buf_desc->len); + } /* map sg table to DMA address */ rc = smc_ib_buf_map_sg(lnk, buf_desc, is_rmb ? DMA_FROM_DEVICE : DMA_TO_DEVICE); /* SMC protocol depends on mapping to one DMA address only */ - if (rc != 1) { + if (rc != nents) { rc = -EAGAIN; goto free_table; } @@ -2020,15 +2058,18 @@ static int smcr_buf_map_link(struct smc_buf_desc *buf_desc, bool is_rmb, buf_desc->is_dma_need_sync |= smc_ib_is_sg_need_sync(lnk, buf_desc) << lnk->link_idx; - /* create a new memory region for the RMB */ - if (is_rmb) { - rc = smc_ib_get_memory_region(lnk->roce_pd, - IB_ACCESS_REMOTE_WRITE | - IB_ACCESS_LOCAL_WRITE, + if (is_rmb || buf_desc->is_vm) { + /* create a new memory region for the RMB or vzalloced sndbuf */ + access_flags = is_rmb ? + IB_ACCESS_REMOTE_WRITE | IB_ACCESS_LOCAL_WRITE : + IB_ACCESS_LOCAL_WRITE; + + rc = smc_ib_get_memory_region(lnk->roce_pd, access_flags, buf_desc, lnk->link_idx); if (rc) goto buf_unmap; - smc_ib_sync_sg_for_device(lnk, buf_desc, DMA_FROM_DEVICE); + smc_ib_sync_sg_for_device(lnk, buf_desc, + is_rmb ? DMA_FROM_DEVICE : DMA_TO_DEVICE); } buf_desc->is_map_ib[lnk->link_idx] = true; return 0; @@ -2041,20 +2082,23 @@ free_table: return rc; } -/* register a new rmb on IB device, +/* register a new buf on IB device, rmb or vzalloced sndbuf * must be called under lgr->llc_conf_mutex lock */ -int smcr_link_reg_rmb(struct smc_link *link, struct smc_buf_desc *rmb_desc) +int smcr_link_reg_buf(struct smc_link *link, struct smc_buf_desc *buf_desc) { if (list_empty(&link->lgr->list)) return -ENOLINK; - if (!rmb_desc->is_reg_mr[link->link_idx]) { - /* register memory region for new rmb */ - if (smc_wr_reg_send(link, rmb_desc->mr_rx[link->link_idx])) { - rmb_desc->is_reg_err = true; + if (!buf_desc->is_reg_mr[link->link_idx]) { + /* register memory region for new buf */ + if (buf_desc->is_vm) + buf_desc->mr[link->link_idx]->iova = + (uintptr_t)buf_desc->cpu_addr; + if (smc_wr_reg_send(link, buf_desc->mr[link->link_idx])) { + buf_desc->is_reg_err = true; return -EFAULT; } - rmb_desc->is_reg_mr[link->link_idx] = true; + buf_desc->is_reg_mr[link->link_idx] = true; } return 0; } @@ -2106,18 +2150,38 @@ int smcr_buf_reg_lgr(struct smc_link *lnk) struct smc_buf_desc *buf_desc, *bf; int i, rc = 0; + /* reg all RMBs for a new link */ mutex_lock(&lgr->rmbs_lock); for (i = 0; i < SMC_RMBE_SIZES; i++) { list_for_each_entry_safe(buf_desc, bf, &lgr->rmbs[i], list) { if (!buf_desc->used) continue; - rc = smcr_link_reg_rmb(lnk, buf_desc); - if (rc) - goto out; + rc = smcr_link_reg_buf(lnk, buf_desc); + if (rc) { + mutex_unlock(&lgr->rmbs_lock); + return rc; + } } } -out: mutex_unlock(&lgr->rmbs_lock); + + if (lgr->buf_type == SMCR_PHYS_CONT_BUFS) + return rc; + + /* reg all vzalloced sndbufs for a new link */ + mutex_lock(&lgr->sndbufs_lock); + for (i = 0; i < SMC_RMBE_SIZES; i++) { + list_for_each_entry_safe(buf_desc, bf, &lgr->sndbufs[i], list) { + if (!buf_desc->used || !buf_desc->is_vm) + continue; + rc = smcr_link_reg_buf(lnk, buf_desc); + if (rc) { + mutex_unlock(&lgr->sndbufs_lock); + return rc; + } + } + } + mutex_unlock(&lgr->sndbufs_lock); return rc; } @@ -2131,18 +2195,39 @@ static struct smc_buf_desc *smcr_new_buf_create(struct smc_link_group *lgr, if (!buf_desc) return ERR_PTR(-ENOMEM); - buf_desc->order = get_order(bufsize); - buf_desc->pages = alloc_pages(GFP_KERNEL | __GFP_NOWARN | - __GFP_NOMEMALLOC | __GFP_COMP | - __GFP_NORETRY | __GFP_ZERO, - buf_desc->order); - if (!buf_desc->pages) { - kfree(buf_desc); - return ERR_PTR(-EAGAIN); - } - buf_desc->cpu_addr = (void *)page_address(buf_desc->pages); - buf_desc->len = bufsize; + switch (lgr->buf_type) { + case SMCR_PHYS_CONT_BUFS: + case SMCR_MIXED_BUFS: + buf_desc->order = get_order(bufsize); + buf_desc->pages = alloc_pages(GFP_KERNEL | __GFP_NOWARN | + __GFP_NOMEMALLOC | __GFP_COMP | + __GFP_NORETRY | __GFP_ZERO, + buf_desc->order); + if (buf_desc->pages) { + buf_desc->cpu_addr = + (void *)page_address(buf_desc->pages); + buf_desc->len = bufsize; + buf_desc->is_vm = false; + break; + } + if (lgr->buf_type == SMCR_PHYS_CONT_BUFS) + goto out; + fallthrough; // try virtually continguous buf + case SMCR_VIRT_CONT_BUFS: + buf_desc->order = get_order(bufsize); + buf_desc->cpu_addr = vzalloc(PAGE_SIZE << buf_desc->order); + if (!buf_desc->cpu_addr) + goto out; + buf_desc->pages = NULL; + buf_desc->len = bufsize; + buf_desc->is_vm = true; + break; + } return buf_desc; + +out: + kfree(buf_desc); + return ERR_PTR(-EAGAIN); } /* map buf_desc on all usable links, @@ -2273,7 +2358,7 @@ static int __smc_buf_create(struct smc_sock *smc, bool is_smcd, bool is_rmb) if (!is_smcd) { if (smcr_buf_map_usable_links(lgr, buf_desc, is_rmb)) { - smcr_buf_unuse(buf_desc, lgr); + smcr_buf_unuse(buf_desc, is_rmb, lgr); return -ENOMEM; } } diff --git a/net/smc/smc_core.h b/net/smc/smc_core.h index 0261124a7a70..fe8b524ad846 100644 --- a/net/smc/smc_core.h +++ b/net/smc/smc_core.h @@ -168,9 +168,11 @@ struct smc_buf_desc { struct { /* SMC-R */ struct sg_table sgt[SMC_LINKS_PER_LGR_MAX]; /* virtual buffer */ - struct ib_mr *mr_rx[SMC_LINKS_PER_LGR_MAX]; - /* for rmb only: memory region + struct ib_mr *mr[SMC_LINKS_PER_LGR_MAX]; + /* memory region: for rmb and + * vzalloced sndbuf * incl. rkey provided to peer + * and lkey provided to local */ u32 order; /* allocation order */ @@ -183,6 +185,8 @@ struct smc_buf_desc { u8 is_dma_need_sync; u8 is_reg_err; /* buffer registration err */ + u8 is_vm; + /* virtually contiguous */ }; struct { /* SMC-D */ unsigned short sba_idx; @@ -543,7 +547,7 @@ int smcr_buf_reg_lgr(struct smc_link *lnk); void smcr_lgr_set_type(struct smc_link_group *lgr, enum smc_lgr_type new_type); void smcr_lgr_set_type_asym(struct smc_link_group *lgr, enum smc_lgr_type new_type, int asym_lnk_idx); -int smcr_link_reg_rmb(struct smc_link *link, struct smc_buf_desc *rmb_desc); +int smcr_link_reg_buf(struct smc_link *link, struct smc_buf_desc *rmb_desc); struct smc_link *smc_switch_conns(struct smc_link_group *lgr, struct smc_link *from_lnk, bool is_dev_err); void smcr_link_down_cond(struct smc_link *lnk); diff --git a/net/smc/smc_ib.c b/net/smc/smc_ib.c index 60e5095890b1..854772dd52fd 100644 --- a/net/smc/smc_ib.c +++ b/net/smc/smc_ib.c @@ -698,7 +698,7 @@ static int smc_ib_map_mr_sg(struct smc_buf_desc *buf_slot, u8 link_idx) int sg_num; /* map the largest prefix of a dma mapped SG list */ - sg_num = ib_map_mr_sg(buf_slot->mr_rx[link_idx], + sg_num = ib_map_mr_sg(buf_slot->mr[link_idx], buf_slot->sgt[link_idx].sgl, buf_slot->sgt[link_idx].orig_nents, &offset, PAGE_SIZE); @@ -710,20 +710,21 @@ static int smc_ib_map_mr_sg(struct smc_buf_desc *buf_slot, u8 link_idx) int smc_ib_get_memory_region(struct ib_pd *pd, int access_flags, struct smc_buf_desc *buf_slot, u8 link_idx) { - if (buf_slot->mr_rx[link_idx]) + if (buf_slot->mr[link_idx]) return 0; /* already done */ - buf_slot->mr_rx[link_idx] = + buf_slot->mr[link_idx] = ib_alloc_mr(pd, IB_MR_TYPE_MEM_REG, 1 << buf_slot->order); - if (IS_ERR(buf_slot->mr_rx[link_idx])) { + if (IS_ERR(buf_slot->mr[link_idx])) { int rc; - rc = PTR_ERR(buf_slot->mr_rx[link_idx]); - buf_slot->mr_rx[link_idx] = NULL; + rc = PTR_ERR(buf_slot->mr[link_idx]); + buf_slot->mr[link_idx] = NULL; return rc; } - if (smc_ib_map_mr_sg(buf_slot, link_idx) != 1) + if (smc_ib_map_mr_sg(buf_slot, link_idx) != + buf_slot->sgt[link_idx].orig_nents) return -EINVAL; return 0; diff --git a/net/smc/smc_llc.c b/net/smc/smc_llc.c index c4d057b2941d..1d83fa9ae56f 100644 --- a/net/smc/smc_llc.c +++ b/net/smc/smc_llc.c @@ -505,19 +505,22 @@ static int smc_llc_send_confirm_rkey(struct smc_link *send_link, if (smc_link_active(link) && link != send_link) { rkeyllc->rtoken[rtok_ix].link_id = link->link_id; rkeyllc->rtoken[rtok_ix].rmb_key = - htonl(rmb_desc->mr_rx[link->link_idx]->rkey); - rkeyllc->rtoken[rtok_ix].rmb_vaddr = cpu_to_be64( - (u64)sg_dma_address( - rmb_desc->sgt[link->link_idx].sgl)); + htonl(rmb_desc->mr[link->link_idx]->rkey); + rkeyllc->rtoken[rtok_ix].rmb_vaddr = rmb_desc->is_vm ? + cpu_to_be64((uintptr_t)rmb_desc->cpu_addr) : + cpu_to_be64((u64)sg_dma_address + (rmb_desc->sgt[link->link_idx].sgl)); rtok_ix++; } } /* rkey of send_link is in rtoken[0] */ rkeyllc->rtoken[0].num_rkeys = rtok_ix - 1; rkeyllc->rtoken[0].rmb_key = - htonl(rmb_desc->mr_rx[send_link->link_idx]->rkey); - rkeyllc->rtoken[0].rmb_vaddr = cpu_to_be64( - (u64)sg_dma_address(rmb_desc->sgt[send_link->link_idx].sgl)); + htonl(rmb_desc->mr[send_link->link_idx]->rkey); + rkeyllc->rtoken[0].rmb_vaddr = rmb_desc->is_vm ? + cpu_to_be64((uintptr_t)rmb_desc->cpu_addr) : + cpu_to_be64((u64)sg_dma_address + (rmb_desc->sgt[send_link->link_idx].sgl)); /* send llc message */ rc = smc_wr_tx_send(send_link, pend); put_out: @@ -544,7 +547,7 @@ static int smc_llc_send_delete_rkey(struct smc_link *link, rkeyllc->hd.common.llc_type = SMC_LLC_DELETE_RKEY; smc_llc_init_msg_hdr(&rkeyllc->hd, link->lgr, sizeof(*rkeyllc)); rkeyllc->num_rkeys = 1; - rkeyllc->rkey[0] = htonl(rmb_desc->mr_rx[link->link_idx]->rkey); + rkeyllc->rkey[0] = htonl(rmb_desc->mr[link->link_idx]->rkey); /* send llc message */ rc = smc_wr_tx_send(link, pend); put_out: @@ -614,9 +617,10 @@ static int smc_llc_fill_ext_v2(struct smc_llc_msg_add_link_v2_ext *ext, if (!buf_pos) break; rmb = buf_pos; - ext->rt[i].rmb_key = htonl(rmb->mr_rx[prim_lnk_idx]->rkey); - ext->rt[i].rmb_key_new = htonl(rmb->mr_rx[lnk_idx]->rkey); - ext->rt[i].rmb_vaddr_new = + ext->rt[i].rmb_key = htonl(rmb->mr[prim_lnk_idx]->rkey); + ext->rt[i].rmb_key_new = htonl(rmb->mr[lnk_idx]->rkey); + ext->rt[i].rmb_vaddr_new = rmb->is_vm ? + cpu_to_be64((uintptr_t)rmb->cpu_addr) : cpu_to_be64((u64)sg_dma_address(rmb->sgt[lnk_idx].sgl)); buf_pos = smc_llc_get_next_rmb(lgr, &buf_lst, buf_pos); while (buf_pos && !(buf_pos)->used) @@ -852,9 +856,10 @@ static int smc_llc_add_link_cont(struct smc_link *link, } rmb = *buf_pos; - addc_llc->rt[i].rmb_key = htonl(rmb->mr_rx[prim_lnk_idx]->rkey); - addc_llc->rt[i].rmb_key_new = htonl(rmb->mr_rx[lnk_idx]->rkey); - addc_llc->rt[i].rmb_vaddr_new = + addc_llc->rt[i].rmb_key = htonl(rmb->mr[prim_lnk_idx]->rkey); + addc_llc->rt[i].rmb_key_new = htonl(rmb->mr[lnk_idx]->rkey); + addc_llc->rt[i].rmb_vaddr_new = rmb->is_vm ? + cpu_to_be64((uintptr_t)rmb->cpu_addr) : cpu_to_be64((u64)sg_dma_address(rmb->sgt[lnk_idx].sgl)); (*num_rkeys_todo)--; diff --git a/net/smc/smc_rx.c b/net/smc/smc_rx.c index 00ad004835e6..17c5aee7ee4f 100644 --- a/net/smc/smc_rx.c +++ b/net/smc/smc_rx.c @@ -145,35 +145,93 @@ static void smc_rx_spd_release(struct splice_pipe_desc *spd, static int smc_rx_splice(struct pipe_inode_info *pipe, char *src, size_t len, struct smc_sock *smc) { + struct smc_link_group *lgr = smc->conn.lgr; + int offset = offset_in_page(src); + struct partial_page *partial; struct splice_pipe_desc spd; - struct partial_page partial; - struct smc_spd_priv *priv; - int bytes; + struct smc_spd_priv **priv; + struct page **pages; + int bytes, nr_pages; + int i; - priv = kzalloc(sizeof(*priv), GFP_KERNEL); + nr_pages = !lgr->is_smcd && smc->conn.rmb_desc->is_vm ? + PAGE_ALIGN(len + offset) / PAGE_SIZE : 1; + + pages = kcalloc(nr_pages, sizeof(*pages), GFP_KERNEL); + if (!pages) + goto out; + partial = kcalloc(nr_pages, sizeof(*partial), GFP_KERNEL); + if (!partial) + goto out_page; + priv = kcalloc(nr_pages, sizeof(*priv), GFP_KERNEL); if (!priv) - return -ENOMEM; - priv->len = len; - priv->smc = smc; - partial.offset = src - (char *)smc->conn.rmb_desc->cpu_addr; - partial.len = len; - partial.private = (unsigned long)priv; - - spd.nr_pages_max = 1; - spd.nr_pages = 1; - spd.pages = &smc->conn.rmb_desc->pages; - spd.partial = &partial; + goto out_part; + for (i = 0; i < nr_pages; i++) { + priv[i] = kzalloc(sizeof(**priv), GFP_KERNEL); + if (!priv[i]) + goto out_priv; + } + + if (lgr->is_smcd || + (!lgr->is_smcd && !smc->conn.rmb_desc->is_vm)) { + /* smcd or smcr that uses physically contiguous RMBs */ + priv[0]->len = len; + priv[0]->smc = smc; + partial[0].offset = src - (char *)smc->conn.rmb_desc->cpu_addr; + partial[0].len = len; + partial[0].private = (unsigned long)priv[0]; + pages[0] = smc->conn.rmb_desc->pages; + } else { + int size, left = len; + void *buf = src; + /* smcr that uses virtually contiguous RMBs*/ + for (i = 0; i < nr_pages; i++) { + size = min_t(int, PAGE_SIZE - offset, left); + priv[i]->len = size; + priv[i]->smc = smc; + pages[i] = vmalloc_to_page(buf); + partial[i].offset = offset; + partial[i].len = size; + partial[i].private = (unsigned long)priv[i]; + buf += size / sizeof(*buf); + left -= size; + offset = 0; + } + } + spd.nr_pages_max = nr_pages; + spd.nr_pages = nr_pages; + spd.pages = pages; + spd.partial = partial; spd.ops = &smc_pipe_ops; spd.spd_release = smc_rx_spd_release; bytes = splice_to_pipe(pipe, &spd); if (bytes > 0) { sock_hold(&smc->sk); - get_page(smc->conn.rmb_desc->pages); + if (!lgr->is_smcd && smc->conn.rmb_desc->is_vm) { + for (i = 0; i < PAGE_ALIGN(bytes + offset) / PAGE_SIZE; i++) + get_page(pages[i]); + } else { + get_page(smc->conn.rmb_desc->pages); + } atomic_add(bytes, &smc->conn.splice_pending); } + kfree(priv); + kfree(partial); + kfree(pages); return bytes; + +out_priv: + for (i = (i - 1); i >= 0; i--) + kfree(priv[i]); + kfree(priv); +out_part: + kfree(partial); +out_page: + kfree(pages); +out: + return -ENOMEM; } static int smc_rx_data_available_and_no_splice_pend(struct smc_connection *conn) diff --git a/net/smc/smc_tx.c b/net/smc/smc_tx.c index ca0d5f57908c..4e8377657a62 100644 --- a/net/smc/smc_tx.c +++ b/net/smc/smc_tx.c @@ -383,6 +383,7 @@ static int smcr_tx_rdma_writes(struct smc_connection *conn, size_t len, dma_addr_t dma_addr = sg_dma_address(conn->sndbuf_desc->sgt[link->link_idx].sgl); + u64 virt_addr = (uintptr_t)conn->sndbuf_desc->cpu_addr; int src_len_sum = src_len, dst_len_sum = dst_len; int sent_count = src_off; int srcchunk, dstchunk; @@ -395,7 +396,7 @@ static int smcr_tx_rdma_writes(struct smc_connection *conn, size_t len, u64 base_addr = dma_addr; if (dst_len < link->qp_attr.cap.max_inline_data) { - base_addr = (uintptr_t)conn->sndbuf_desc->cpu_addr; + base_addr = virt_addr; wr->wr.send_flags |= IB_SEND_INLINE; } else { wr->wr.send_flags &= ~IB_SEND_INLINE; @@ -403,8 +404,12 @@ static int smcr_tx_rdma_writes(struct smc_connection *conn, size_t len, num_sges = 0; for (srcchunk = 0; srcchunk < 2; srcchunk++) { - sge[srcchunk].addr = base_addr + src_off; + sge[srcchunk].addr = conn->sndbuf_desc->is_vm ? + (virt_addr + src_off) : (base_addr + src_off); sge[srcchunk].length = src_len; + if (conn->sndbuf_desc->is_vm) + sge[srcchunk].lkey = + conn->sndbuf_desc->mr[link->link_idx]->lkey; num_sges++; src_off += src_len; |