diff options
author | Linus Torvalds | 2021-02-21 10:22:20 -0800 |
---|---|---|
committer | Linus Torvalds | 2021-02-21 10:22:20 -0800 |
commit | 99f1a5872b706094ece117368170a92c66b2e242 (patch) | |
tree | 0cac82e658e4ed5acd43db63ef3a381c42f4d4d4 /net/sunrpc | |
parent | 681e2abe2191058b320716896cccda05b161eedc (diff) | |
parent | 428a23d2bf0ca8fd4d364a464c3e468f0e81671e (diff) |
Merge tag 'nfsd-5.12' of git://git.kernel.org/pub/scm/linux/kernel/git/cel/linux
Pull nfsd updates from Chuck Lever:
- Update NFSv2 and NFSv3 XDR decoding functions
- Further improve support for re-exporting NFS mounts
- Convert NFSD stats to per-CPU counters
- Add batch Receive posting to the server's RPC/RDMA transport
* tag 'nfsd-5.12' of git://git.kernel.org/pub/scm/linux/kernel/git/cel/linux: (65 commits)
nfsd: skip some unnecessary stats in the v4 case
nfs: use change attribute for NFS re-exports
NFSv4_2: SSC helper should use its own config.
nfsd: cstate->session->se_client -> cstate->clp
nfsd: simplify nfsd4_check_open_reclaim
nfsd: remove unused set_client argument
nfsd: find_cpntf_state cleanup
nfsd: refactor set_client
nfsd: rename lookup_clientid->set_client
nfsd: simplify nfsd_renew
nfsd: simplify process_lock
nfsd4: simplify process_lookup1
SUNRPC: Correct a comment
svcrdma: DMA-sync the receive buffer in svc_rdma_recvfrom()
svcrdma: Reduce Receive doorbell rate
svcrdma: Deprecate stat variables that are no longer used
svcrdma: Restore read and write stats
svcrdma: Convert rdma_stat_sq_starve to a per-CPU counter
svcrdma: Convert rdma_stat_recv to a per-CPU counter
svcrdma: Refactor svc_rdma_init() and svc_rdma_clean_up()
...
Diffstat (limited to 'net/sunrpc')
-rw-r--r-- | net/sunrpc/svc.c | 2 | ||||
-rw-r--r-- | net/sunrpc/xprtrdma/svc_rdma.c | 198 | ||||
-rw-r--r-- | net/sunrpc/xprtrdma/svc_rdma_recvfrom.c | 91 | ||||
-rw-r--r-- | net/sunrpc/xprtrdma/svc_rdma_rw.c | 3 | ||||
-rw-r--r-- | net/sunrpc/xprtrdma/svc_rdma_sendto.c | 2 |
5 files changed, 175 insertions, 121 deletions
diff --git a/net/sunrpc/svc.c b/net/sunrpc/svc.c index 4187745887f0..61fb8a18552c 100644 --- a/net/sunrpc/svc.c +++ b/net/sunrpc/svc.c @@ -559,7 +559,7 @@ EXPORT_SYMBOL_GPL(svc_destroy); /* * Allocate an RPC server's buffer space. - * We allocate pages and place them in rq_argpages. + * We allocate pages and place them in rq_pages. */ static int svc_init_buffer(struct svc_rqst *rqstp, unsigned int size, int node) diff --git a/net/sunrpc/xprtrdma/svc_rdma.c b/net/sunrpc/xprtrdma/svc_rdma.c index 526da5d4710b..5bc20e9d09cd 100644 --- a/net/sunrpc/xprtrdma/svc_rdma.c +++ b/net/sunrpc/xprtrdma/svc_rdma.c @@ -62,51 +62,47 @@ static unsigned int max_max_requests = 16384; unsigned int svcrdma_max_req_size = RPCRDMA_DEF_INLINE_THRESH; static unsigned int min_max_inline = RPCRDMA_DEF_INLINE_THRESH; static unsigned int max_max_inline = RPCRDMA_MAX_INLINE_THRESH; +static unsigned int svcrdma_stat_unused; +static unsigned int zero; -atomic_t rdma_stat_recv; -atomic_t rdma_stat_read; -atomic_t rdma_stat_write; -atomic_t rdma_stat_sq_starve; -atomic_t rdma_stat_rq_starve; -atomic_t rdma_stat_rq_poll; -atomic_t rdma_stat_rq_prod; -atomic_t rdma_stat_sq_poll; -atomic_t rdma_stat_sq_prod; +struct percpu_counter svcrdma_stat_read; +struct percpu_counter svcrdma_stat_recv; +struct percpu_counter svcrdma_stat_sq_starve; +struct percpu_counter svcrdma_stat_write; -/* - * This function implements reading and resetting an atomic_t stat - * variable through read/write to a proc file. Any write to the file - * resets the associated statistic to zero. Any read returns it's - * current value. - */ -static int read_reset_stat(struct ctl_table *table, int write, - void *buffer, size_t *lenp, loff_t *ppos) +enum { + SVCRDMA_COUNTER_BUFSIZ = sizeof(unsigned long long), +}; + +static int svcrdma_counter_handler(struct ctl_table *table, int write, + void *buffer, size_t *lenp, loff_t *ppos) { - atomic_t *stat = (atomic_t *)table->data; - - if (!stat) - return -EINVAL; - - if (write) - atomic_set(stat, 0); - else { - char str_buf[32]; - int len = snprintf(str_buf, 32, "%d\n", atomic_read(stat)); - if (len >= 32) - return -EFAULT; - len = strlen(str_buf); - if (*ppos > len) { - *lenp = 0; - return 0; - } - len -= *ppos; - if (len > *lenp) - len = *lenp; - if (len) - memcpy(buffer, str_buf, len); - *lenp = len; - *ppos += len; + struct percpu_counter *stat = (struct percpu_counter *)table->data; + char tmp[SVCRDMA_COUNTER_BUFSIZ + 1]; + int len; + + if (write) { + percpu_counter_set(stat, 0); + return 0; } + + len = snprintf(tmp, SVCRDMA_COUNTER_BUFSIZ, "%lld\n", + percpu_counter_sum_positive(stat)); + if (len >= SVCRDMA_COUNTER_BUFSIZ) + return -EFAULT; + len = strlen(tmp); + if (*ppos > len) { + *lenp = 0; + return 0; + } + len -= *ppos; + if (len > *lenp) + len = *lenp; + if (len) + memcpy(buffer, tmp, len); + *lenp = len; + *ppos += len; + return 0; } @@ -142,66 +138,76 @@ static struct ctl_table svcrdma_parm_table[] = { { .procname = "rdma_stat_read", - .data = &rdma_stat_read, - .maxlen = sizeof(atomic_t), + .data = &svcrdma_stat_read, + .maxlen = SVCRDMA_COUNTER_BUFSIZ, .mode = 0644, - .proc_handler = read_reset_stat, + .proc_handler = svcrdma_counter_handler, }, { .procname = "rdma_stat_recv", - .data = &rdma_stat_recv, - .maxlen = sizeof(atomic_t), + .data = &svcrdma_stat_recv, + .maxlen = SVCRDMA_COUNTER_BUFSIZ, .mode = 0644, - .proc_handler = read_reset_stat, + .proc_handler = svcrdma_counter_handler, }, { .procname = "rdma_stat_write", - .data = &rdma_stat_write, - .maxlen = sizeof(atomic_t), + .data = &svcrdma_stat_write, + .maxlen = SVCRDMA_COUNTER_BUFSIZ, .mode = 0644, - .proc_handler = read_reset_stat, + .proc_handler = svcrdma_counter_handler, }, { .procname = "rdma_stat_sq_starve", - .data = &rdma_stat_sq_starve, - .maxlen = sizeof(atomic_t), + .data = &svcrdma_stat_sq_starve, + .maxlen = SVCRDMA_COUNTER_BUFSIZ, .mode = 0644, - .proc_handler = read_reset_stat, + .proc_handler = svcrdma_counter_handler, }, { .procname = "rdma_stat_rq_starve", - .data = &rdma_stat_rq_starve, - .maxlen = sizeof(atomic_t), + .data = &svcrdma_stat_unused, + .maxlen = sizeof(unsigned int), .mode = 0644, - .proc_handler = read_reset_stat, + .proc_handler = proc_dointvec_minmax, + .extra1 = &zero, + .extra2 = &zero, }, { .procname = "rdma_stat_rq_poll", - .data = &rdma_stat_rq_poll, - .maxlen = sizeof(atomic_t), + .data = &svcrdma_stat_unused, + .maxlen = sizeof(unsigned int), .mode = 0644, - .proc_handler = read_reset_stat, + .proc_handler = proc_dointvec_minmax, + .extra1 = &zero, + .extra2 = &zero, }, { .procname = "rdma_stat_rq_prod", - .data = &rdma_stat_rq_prod, - .maxlen = sizeof(atomic_t), + .data = &svcrdma_stat_unused, + .maxlen = sizeof(unsigned int), .mode = 0644, - .proc_handler = read_reset_stat, + .proc_handler = proc_dointvec_minmax, + .extra1 = &zero, + .extra2 = &zero, }, { .procname = "rdma_stat_sq_poll", - .data = &rdma_stat_sq_poll, - .maxlen = sizeof(atomic_t), + .data = &svcrdma_stat_unused, + .maxlen = sizeof(unsigned int), .mode = 0644, - .proc_handler = read_reset_stat, + .proc_handler = proc_dointvec_minmax, + .extra1 = &zero, + .extra2 = &zero, }, { .procname = "rdma_stat_sq_prod", - .data = &rdma_stat_sq_prod, - .maxlen = sizeof(atomic_t), + .data = &svcrdma_stat_unused, + .maxlen = sizeof(unsigned int), .mode = 0644, - .proc_handler = read_reset_stat, + .proc_handler = proc_dointvec_minmax, + .extra1 = &zero, + .extra2 = &zero, }, { }, }; @@ -224,27 +230,69 @@ static struct ctl_table svcrdma_root_table[] = { { }, }; +static void svc_rdma_proc_cleanup(void) +{ + if (!svcrdma_table_header) + return; + unregister_sysctl_table(svcrdma_table_header); + svcrdma_table_header = NULL; + + percpu_counter_destroy(&svcrdma_stat_write); + percpu_counter_destroy(&svcrdma_stat_sq_starve); + percpu_counter_destroy(&svcrdma_stat_recv); + percpu_counter_destroy(&svcrdma_stat_read); +} + +static int svc_rdma_proc_init(void) +{ + int rc; + + if (svcrdma_table_header) + return 0; + + rc = percpu_counter_init(&svcrdma_stat_read, 0, GFP_KERNEL); + if (rc) + goto out_err; + rc = percpu_counter_init(&svcrdma_stat_recv, 0, GFP_KERNEL); + if (rc) + goto out_err; + rc = percpu_counter_init(&svcrdma_stat_sq_starve, 0, GFP_KERNEL); + if (rc) + goto out_err; + rc = percpu_counter_init(&svcrdma_stat_write, 0, GFP_KERNEL); + if (rc) + goto out_err; + + svcrdma_table_header = register_sysctl_table(svcrdma_root_table); + return 0; + +out_err: + percpu_counter_destroy(&svcrdma_stat_sq_starve); + percpu_counter_destroy(&svcrdma_stat_recv); + percpu_counter_destroy(&svcrdma_stat_read); + return rc; +} + void svc_rdma_cleanup(void) { dprintk("SVCRDMA Module Removed, deregister RPC RDMA transport\n"); - if (svcrdma_table_header) { - unregister_sysctl_table(svcrdma_table_header); - svcrdma_table_header = NULL; - } svc_unreg_xprt_class(&svc_rdma_class); + svc_rdma_proc_cleanup(); } int svc_rdma_init(void) { + int rc; + dprintk("SVCRDMA Module Init, register RPC RDMA transport\n"); dprintk("\tsvcrdma_ord : %d\n", svcrdma_ord); dprintk("\tmax_requests : %u\n", svcrdma_max_requests); dprintk("\tmax_bc_requests : %u\n", svcrdma_max_bc_requests); dprintk("\tmax_inline : %d\n", svcrdma_max_req_size); - if (!svcrdma_table_header) - svcrdma_table_header = - register_sysctl_table(svcrdma_root_table); + rc = svc_rdma_proc_init(); + if (rc) + return rc; /* Register RDMA with the SVC transport switch */ svc_reg_xprt_class(&svc_rdma_class); diff --git a/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c b/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c index cbdb71247755..6d28f23ceb35 100644 --- a/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c +++ b/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c @@ -266,33 +266,46 @@ void svc_rdma_release_rqst(struct svc_rqst *rqstp) svc_rdma_recv_ctxt_put(rdma, ctxt); } -static int __svc_rdma_post_recv(struct svcxprt_rdma *rdma, - struct svc_rdma_recv_ctxt *ctxt) +static bool svc_rdma_refresh_recvs(struct svcxprt_rdma *rdma, + unsigned int wanted, bool temp) { + const struct ib_recv_wr *bad_wr = NULL; + struct svc_rdma_recv_ctxt *ctxt; + struct ib_recv_wr *recv_chain; int ret; - trace_svcrdma_post_recv(ctxt); - ret = ib_post_recv(rdma->sc_qp, &ctxt->rc_recv_wr, NULL); + recv_chain = NULL; + while (wanted--) { + ctxt = svc_rdma_recv_ctxt_get(rdma); + if (!ctxt) + break; + + trace_svcrdma_post_recv(ctxt); + ctxt->rc_temp = temp; + ctxt->rc_recv_wr.next = recv_chain; + recv_chain = &ctxt->rc_recv_wr; + rdma->sc_pending_recvs++; + } + if (!recv_chain) + return false; + + ret = ib_post_recv(rdma->sc_qp, recv_chain, &bad_wr); if (ret) goto err_post; - return 0; + return true; err_post: - trace_svcrdma_rq_post_err(rdma, ret); - svc_rdma_recv_ctxt_put(rdma, ctxt); - return ret; -} - -static int svc_rdma_post_recv(struct svcxprt_rdma *rdma) -{ - struct svc_rdma_recv_ctxt *ctxt; + while (bad_wr) { + ctxt = container_of(bad_wr, struct svc_rdma_recv_ctxt, + rc_recv_wr); + bad_wr = bad_wr->next; + svc_rdma_recv_ctxt_put(rdma, ctxt); + } - if (test_bit(XPT_CLOSE, &rdma->sc_xprt.xpt_flags)) - return 0; - ctxt = svc_rdma_recv_ctxt_get(rdma); - if (!ctxt) - return -ENOMEM; - return __svc_rdma_post_recv(rdma, ctxt); + trace_svcrdma_rq_post_err(rdma, ret); + /* Since we're destroying the xprt, no need to reset + * sc_pending_recvs. */ + return false; } /** @@ -303,20 +316,7 @@ static int svc_rdma_post_recv(struct svcxprt_rdma *rdma) */ bool svc_rdma_post_recvs(struct svcxprt_rdma *rdma) { - struct svc_rdma_recv_ctxt *ctxt; - unsigned int i; - int ret; - - for (i = 0; i < rdma->sc_max_requests; i++) { - ctxt = svc_rdma_recv_ctxt_get(rdma); - if (!ctxt) - return false; - ctxt->rc_temp = true; - ret = __svc_rdma_post_recv(rdma, ctxt); - if (ret) - return false; - } - return true; + return svc_rdma_refresh_recvs(rdma, rdma->sc_max_requests, true); } /** @@ -324,8 +324,6 @@ bool svc_rdma_post_recvs(struct svcxprt_rdma *rdma) * @cq: Completion Queue context * @wc: Work Completion object * - * NB: The svc_xprt/svcxprt_rdma is pinned whenever it's possible that - * the Receive completion handler could be running. */ static void svc_rdma_wc_receive(struct ib_cq *cq, struct ib_wc *wc) { @@ -333,6 +331,8 @@ static void svc_rdma_wc_receive(struct ib_cq *cq, struct ib_wc *wc) struct ib_cqe *cqe = wc->wr_cqe; struct svc_rdma_recv_ctxt *ctxt; + rdma->sc_pending_recvs--; + /* WARNING: Only wc->wr_cqe and wc->status are reliable */ ctxt = container_of(cqe, struct svc_rdma_recv_ctxt, rc_cqe); @@ -340,14 +340,8 @@ static void svc_rdma_wc_receive(struct ib_cq *cq, struct ib_wc *wc) if (wc->status != IB_WC_SUCCESS) goto flushed; - if (svc_rdma_post_recv(rdma)) - goto post_err; - /* All wc fields are now known to be valid */ ctxt->rc_byte_len = wc->byte_len; - ib_dma_sync_single_for_cpu(rdma->sc_pd->device, - ctxt->rc_recv_sge.addr, - wc->byte_len, DMA_FROM_DEVICE); spin_lock(&rdma->sc_rq_dto_lock); list_add_tail(&ctxt->rc_list, &rdma->sc_rq_dto_q); @@ -356,11 +350,18 @@ static void svc_rdma_wc_receive(struct ib_cq *cq, struct ib_wc *wc) spin_unlock(&rdma->sc_rq_dto_lock); if (!test_bit(RDMAXPRT_CONN_PENDING, &rdma->sc_flags)) svc_xprt_enqueue(&rdma->sc_xprt); + + if (!test_bit(XPT_CLOSE, &rdma->sc_xprt.xpt_flags) && + rdma->sc_pending_recvs < rdma->sc_max_requests) + if (!svc_rdma_refresh_recvs(rdma, RPCRDMA_MAX_RECV_BATCH, + false)) + goto post_err; + return; flushed: -post_err: svc_rdma_recv_ctxt_put(rdma, ctxt); +post_err: set_bit(XPT_CLOSE, &rdma->sc_xprt.xpt_flags); svc_xprt_enqueue(&rdma->sc_xprt); } @@ -845,9 +846,11 @@ int svc_rdma_recvfrom(struct svc_rqst *rqstp) } list_del(&ctxt->rc_list); spin_unlock(&rdma_xprt->sc_rq_dto_lock); + percpu_counter_inc(&svcrdma_stat_recv); - atomic_inc(&rdma_stat_recv); - + ib_dma_sync_single_for_cpu(rdma_xprt->sc_pd->device, + ctxt->rc_recv_sge.addr, ctxt->rc_byte_len, + DMA_FROM_DEVICE); svc_rdma_build_arg_xdr(rqstp, ctxt); /* Prevent svc_xprt_release from releasing pages in rq_pages diff --git a/net/sunrpc/xprtrdma/svc_rdma_rw.c b/net/sunrpc/xprtrdma/svc_rdma_rw.c index 0b63e1321d74..693d139a8633 100644 --- a/net/sunrpc/xprtrdma/svc_rdma_rw.c +++ b/net/sunrpc/xprtrdma/svc_rdma_rw.c @@ -364,6 +364,7 @@ static int svc_rdma_post_chunk_ctxt(struct svc_rdma_chunk_ctxt *cc) return 0; } + percpu_counter_inc(&svcrdma_stat_sq_starve); trace_svcrdma_sq_full(rdma); atomic_add(cc->cc_sqecount, &rdma->sc_sq_avail); wait_event(rdma->sc_send_wait, @@ -468,6 +469,7 @@ svc_rdma_build_writes(struct svc_rdma_write_info *info, DMA_TO_DEVICE); if (ret < 0) return -EIO; + percpu_counter_inc(&svcrdma_stat_write); list_add(&ctxt->rw_list, &cc->cc_rwctxts); cc->cc_sqecount += ret; @@ -718,6 +720,7 @@ static int svc_rdma_build_read_segment(struct svc_rdma_read_info *info, segment->rs_handle, DMA_FROM_DEVICE); if (ret < 0) return -EIO; + percpu_counter_inc(&svcrdma_stat_read); list_add(&ctxt->rw_list, &cc->cc_rwctxts); cc->cc_sqecount += ret; diff --git a/net/sunrpc/xprtrdma/svc_rdma_sendto.c b/net/sunrpc/xprtrdma/svc_rdma_sendto.c index 68af79d4f04f..52c759a8543e 100644 --- a/net/sunrpc/xprtrdma/svc_rdma_sendto.c +++ b/net/sunrpc/xprtrdma/svc_rdma_sendto.c @@ -317,7 +317,7 @@ int svc_rdma_send(struct svcxprt_rdma *rdma, struct svc_rdma_send_ctxt *ctxt) /* If the SQ is full, wait until an SQ entry is available */ while (1) { if ((atomic_dec_return(&rdma->sc_sq_avail) < 0)) { - atomic_inc(&rdma_stat_sq_starve); + percpu_counter_inc(&svcrdma_stat_sq_starve); trace_svcrdma_sq_full(rdma); atomic_inc(&rdma->sc_sq_avail); wait_event(rdma->sc_send_wait, |