From 48bb6ec17cde09122931438add817a69a91c3ab6 Mon Sep 17 00:00:00 2001 From: Colin Ian King Date: Wed, 10 Jun 2020 00:22:57 +0100 Subject: NFS: remove redundant pointer clnt The pointer clnt is being initialized with a value that is never read and so this is assignment redundant and can be removed. The pointer can removed because it is being used as a temporary variable and it is clearer to make the direct assignment and remove it completely. Addresses-Coverity: ("Unused value") Signed-off-by: Colin Ian King Signed-off-by: Anna Schumaker --- fs/nfs/nfs4proc.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index 6e95c85fe395..1d23dc640208 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -9693,7 +9693,6 @@ _nfs41_proc_secinfo_no_name(struct nfs_server *server, struct nfs_fh *fhandle, .rpc_argp = &args, .rpc_resp = &res, }; - struct rpc_clnt *clnt = server->client; struct nfs4_call_sync_data data = { .seq_server = server, .seq_args = &args.seq_args, @@ -9710,8 +9709,7 @@ _nfs41_proc_secinfo_no_name(struct nfs_server *server, struct nfs_fh *fhandle, int status; if (use_integrity) { - clnt = server->nfs_client->cl_rpcclient; - task_setup.rpc_client = clnt; + task_setup.rpc_client = server->nfs_client->cl_rpcclient; cred = nfs4_get_clid_cred(server->nfs_client); msg.rpc_cred = cred; -- cgit v1.2.3 From 82c596ebaa104f994d25256523ae2f9047323fe7 Mon Sep 17 00:00:00 2001 From: Chengguang Xu Date: Wed, 17 Jun 2020 09:09:39 +0800 Subject: nfs4: strengthen error check to avoid unexpected result The variable error is ssize_t, which is signed and will cast to unsigned when comapre with variable size, so add a check to avoid unexpected result in case of negative value of error. Signed-off-by: Chengguang Xu Signed-off-by: Anna Schumaker --- fs/nfs/nfs4proc.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index 1d23dc640208..aca52e52538f 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -7440,7 +7440,7 @@ nfs4_listxattr_nfs4_label(struct inode *inode, char *list, size_t list_len) if (nfs_server_capable(inode, NFS_CAP_SECURITY_LABEL)) { len = security_inode_listsecurity(inode, list, list_len); - if (list_len && len > list_len) + if (len >= 0 && list_len && len > list_len) return -ERANGE; } return len; -- cgit v1.2.3 From 88428cc4ae7abcc879295fbb19373dd76aad2bdd Mon Sep 17 00:00:00 2001 From: Olga Kornievskaia Date: Tue, 23 Jun 2020 11:24:09 -0400 Subject: SUNRPC dont update timeout value on connection reset Current behaviour: every time a v3 operation is re-sent to the server we update (double) the timeout. There is no distinction between whether or not the previous timer had expired before the re-sent happened. Here's the scenario: 1. Client sends a v3 operation 2. Server RST-s the connection (prior to the timeout) (eg., connection is immediately reset) 3. Client re-sends a v3 operation but the timeout is now 120sec. As a result, an application sees 2mins pause before a retry in case server again does not reply. Where as if a connection reset didn't change the timeout value, the client would have re-tried (the 3rd time) after 60secs. Signed-off-by: Olga Kornievskaia Signed-off-by: Anna Schumaker --- net/sunrpc/clnt.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/net/sunrpc/clnt.c b/net/sunrpc/clnt.c index 62e0b6c1e8cf..35bbd9c16e87 100644 --- a/net/sunrpc/clnt.c +++ b/net/sunrpc/clnt.c @@ -2405,7 +2405,8 @@ call_status(struct rpc_task *task) goto out_exit; } task->tk_action = call_encode; - rpc_check_timeout(task); + if (status != -ECONNRESET && status != -ECONNABORTED) + rpc_check_timeout(task); return; out_exit: rpc_call_rpcerror(task, status); -- cgit v1.2.3 From e4378a0fdd43a676bb4dbd858bc9e61c6661193b Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Wed, 8 Jul 2020 16:09:05 -0400 Subject: SUNRPC: Remove trace_xprt_complete_rqst() Request completion is already recorded by an "rpc_task_wakeup queue=xprt_pending" trace record. A subsequent rpc_xdr_recvfrom trace record shows the number of bytes received. Signed-off-by: Chuck Lever Signed-off-by: Anna Schumaker --- include/trace/events/sunrpc.h | 1 - net/sunrpc/xprt.c | 2 -- 2 files changed, 3 deletions(-) diff --git a/include/trace/events/sunrpc.h b/include/trace/events/sunrpc.h index 65d7dfbbc9cd..75b5df2a02fa 100644 --- a/include/trace/events/sunrpc.h +++ b/include/trace/events/sunrpc.h @@ -969,7 +969,6 @@ DECLARE_EVENT_CLASS(rpc_xprt_event, DEFINE_RPC_XPRT_EVENT(timer); DEFINE_RPC_XPRT_EVENT(lookup_rqst); -DEFINE_RPC_XPRT_EVENT(complete_rqst); TRACE_EVENT(xprt_transmit, TP_PROTO( diff --git a/net/sunrpc/xprt.c b/net/sunrpc/xprt.c index 5a8e47bbfb9f..7ad4009c9033 100644 --- a/net/sunrpc/xprt.c +++ b/net/sunrpc/xprt.c @@ -1131,8 +1131,6 @@ void xprt_complete_rqst(struct rpc_task *task, int copied) struct rpc_rqst *req = task->tk_rqstp; struct rpc_xprt *xprt = req->rq_xprt; - trace_xprt_complete_rqst(xprt, req->rq_xid, copied); - xprt->stat.recvs++; req->rq_private_buf.len = copied; -- cgit v1.2.3 From 06e234c6132784c56198423c653f1ad0e1e1fdc1 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Wed, 8 Jul 2020 16:09:11 -0400 Subject: SUNRPC: Hoist trace_xprtrdma_op_allocate into generic code Introduce a tracepoint in call_allocate that reports the exact sizes in the RPC buffer allocation request and the status of the result. This helps catch problems with XDR buffer provisioning, and replaces transport-specific debugging instrumentation. Signed-off-by: Chuck Lever Signed-off-by: Anna Schumaker --- include/trace/events/rpcrdma.h | 30 ------------------------------ include/trace/events/sunrpc.h | 30 ++++++++++++++++++++++++++++++ net/sunrpc/clnt.c | 3 +-- net/sunrpc/sched.c | 2 -- net/sunrpc/xprtrdma/transport.c | 2 -- 5 files changed, 31 insertions(+), 36 deletions(-) diff --git a/include/trace/events/rpcrdma.h b/include/trace/events/rpcrdma.h index abe942225637..c187a9a8ead6 100644 --- a/include/trace/events/rpcrdma.h +++ b/include/trace/events/rpcrdma.h @@ -1191,36 +1191,6 @@ TRACE_EVENT(xprtrdma_decode_seg, ** Allocation/release of rpcrdma_reqs and rpcrdma_reps **/ -TRACE_EVENT(xprtrdma_op_allocate, - TP_PROTO( - const struct rpc_task *task, - const struct rpcrdma_req *req - ), - - TP_ARGS(task, req), - - TP_STRUCT__entry( - __field(unsigned int, task_id) - __field(unsigned int, client_id) - __field(const void *, req) - __field(size_t, callsize) - __field(size_t, rcvsize) - ), - - TP_fast_assign( - __entry->task_id = task->tk_pid; - __entry->client_id = task->tk_client->cl_clid; - __entry->req = req; - __entry->callsize = task->tk_rqstp->rq_callsize; - __entry->rcvsize = task->tk_rqstp->rq_rcvsize; - ), - - TP_printk("task:%u@%u req=%p (%zu, %zu)", - __entry->task_id, __entry->client_id, - __entry->req, __entry->callsize, __entry->rcvsize - ) -); - TRACE_EVENT(xprtrdma_op_free, TP_PROTO( const struct rpc_task *task, diff --git a/include/trace/events/sunrpc.h b/include/trace/events/sunrpc.h index 75b5df2a02fa..7addf7d1596b 100644 --- a/include/trace/events/sunrpc.h +++ b/include/trace/events/sunrpc.h @@ -517,6 +517,36 @@ DEFINE_RPC_REPLY_EVENT(stale_creds); DEFINE_RPC_REPLY_EVENT(bad_creds); DEFINE_RPC_REPLY_EVENT(auth_tooweak); +TRACE_EVENT(rpc_buf_alloc, + TP_PROTO( + const struct rpc_task *task, + int status + ), + + TP_ARGS(task, status), + + TP_STRUCT__entry( + __field(unsigned int, task_id) + __field(unsigned int, client_id) + __field(size_t, callsize) + __field(size_t, recvsize) + __field(int, status) + ), + + TP_fast_assign( + __entry->task_id = task->tk_pid; + __entry->client_id = task->tk_client->cl_clid; + __entry->callsize = task->tk_rqstp->rq_callsize; + __entry->recvsize = task->tk_rqstp->rq_rcvsize; + __entry->status = status; + ), + + TP_printk("task:%u@%u callsize=%zu recvsize=%zu status=%d", + __entry->task_id, __entry->client_id, + __entry->callsize, __entry->recvsize, __entry->status + ) +); + TRACE_EVENT(rpc_call_rpcerror, TP_PROTO( const struct rpc_task *task, diff --git a/net/sunrpc/clnt.c b/net/sunrpc/clnt.c index 35bbd9c16e87..3ab9e6a97898 100644 --- a/net/sunrpc/clnt.c +++ b/net/sunrpc/clnt.c @@ -1823,6 +1823,7 @@ call_allocate(struct rpc_task *task) req->rq_rcvsize <<= 2; status = xprt->ops->buf_alloc(task); + trace_rpc_buf_alloc(task, status); xprt_inject_disconnect(xprt); if (status == 0) return; @@ -1831,8 +1832,6 @@ call_allocate(struct rpc_task *task) return; } - dprintk("RPC: %5u rpc_buffer allocation failed\n", task->tk_pid); - if (RPC_IS_ASYNC(task) || !fatal_signal_pending(current)) { task->tk_action = call_allocate; rpc_delay(task, HZ>>4); diff --git a/net/sunrpc/sched.c b/net/sunrpc/sched.c index 7eba20a88438..adce1e2ed10d 100644 --- a/net/sunrpc/sched.c +++ b/net/sunrpc/sched.c @@ -1036,8 +1036,6 @@ int rpc_malloc(struct rpc_task *task) return -ENOMEM; buf->len = size; - dprintk("RPC: %5u allocated buffer of size %zu at %p\n", - task->tk_pid, size, buf); rqst->rq_buffer = buf->data; rqst->rq_rbuffer = (char *)rqst->rq_buffer + rqst->rq_callsize; return 0; diff --git a/net/sunrpc/xprtrdma/transport.c b/net/sunrpc/xprtrdma/transport.c index 053c8ab1265a..612b60f31302 100644 --- a/net/sunrpc/xprtrdma/transport.c +++ b/net/sunrpc/xprtrdma/transport.c @@ -586,11 +586,9 @@ xprt_rdma_allocate(struct rpc_task *task) rqst->rq_buffer = rdmab_data(req->rl_sendbuf); rqst->rq_rbuffer = rdmab_data(req->rl_recvbuf); - trace_xprtrdma_op_allocate(task, req); return 0; out_fail: - trace_xprtrdma_op_allocate(task, NULL); return -ENOMEM; } -- cgit v1.2.3 From 78069487539dbb18e10aae15644019b468829ab2 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Wed, 8 Jul 2020 16:09:16 -0400 Subject: SUNRPC: Remove debugging instrumentation from xprt_release These instruments don't appear to add any substantial value. We already have this at the termination of each RPC: iozone-2617 [002] 975.713126: rpc_stats_latency: task:418@5 xid=0x260eab5d nfsv3 LOOKUP backlog=15 rtt=32 execute=58 iozone-2617 [002] 975.713127: xprt_release_cong: task:418@5 snd_task:4294967295 cong=256 cwnd=16384 iozone-2617 [002] 975.713127: xprt_put_cong: task:418@5 snd_task:4294967295 cong=0 cwnd=16384 Signed-off-by: Chuck Lever Signed-off-by: Anna Schumaker --- include/trace/events/rpcrdma.h | 32 -------------------------------- net/sunrpc/sched.c | 3 --- net/sunrpc/xprt.c | 1 - net/sunrpc/xprtrdma/transport.c | 2 -- 4 files changed, 38 deletions(-) diff --git a/include/trace/events/rpcrdma.h b/include/trace/events/rpcrdma.h index c187a9a8ead6..1e17c2fc9640 100644 --- a/include/trace/events/rpcrdma.h +++ b/include/trace/events/rpcrdma.h @@ -1187,38 +1187,6 @@ TRACE_EVENT(xprtrdma_decode_seg, ) ); -/** - ** Allocation/release of rpcrdma_reqs and rpcrdma_reps - **/ - -TRACE_EVENT(xprtrdma_op_free, - TP_PROTO( - const struct rpc_task *task, - const struct rpcrdma_req *req - ), - - TP_ARGS(task, req), - - TP_STRUCT__entry( - __field(unsigned int, task_id) - __field(unsigned int, client_id) - __field(const void *, req) - __field(const void *, rep) - ), - - TP_fast_assign( - __entry->task_id = task->tk_pid; - __entry->client_id = task->tk_client->cl_clid; - __entry->req = req; - __entry->rep = req->rl_reply; - ), - - TP_printk("task:%u@%u req=%p rep=%p", - __entry->task_id, __entry->client_id, - __entry->req, __entry->rep - ) -); - /** ** Callback events **/ diff --git a/net/sunrpc/sched.c b/net/sunrpc/sched.c index adce1e2ed10d..402b1c8869fd 100644 --- a/net/sunrpc/sched.c +++ b/net/sunrpc/sched.c @@ -1056,9 +1056,6 @@ void rpc_free(struct rpc_task *task) buf = container_of(buffer, struct rpc_buffer, data); size = buf->len; - dprintk("RPC: freeing buffer of size %zu at %p\n", - size, buf); - if (size <= RPC_BUFFER_MAXSIZE) mempool_free(buf, rpc_buffer_mempool); else diff --git a/net/sunrpc/xprt.c b/net/sunrpc/xprt.c index 7ad4009c9033..2217dfed8f88 100644 --- a/net/sunrpc/xprt.c +++ b/net/sunrpc/xprt.c @@ -1836,7 +1836,6 @@ void xprt_release(struct rpc_task *task) if (req->rq_release_snd_buf) req->rq_release_snd_buf(req); - dprintk("RPC: %5u release request %p\n", task->tk_pid, req); if (likely(!bc_prealloc(req))) xprt->ops->free_slot(xprt, req); else diff --git a/net/sunrpc/xprtrdma/transport.c b/net/sunrpc/xprtrdma/transport.c index 612b60f31302..819a922830da 100644 --- a/net/sunrpc/xprtrdma/transport.c +++ b/net/sunrpc/xprtrdma/transport.c @@ -605,8 +605,6 @@ xprt_rdma_free(struct rpc_task *task) struct rpcrdma_xprt *r_xprt = rpcx_to_rdmax(rqst->rq_xprt); struct rpcrdma_req *req = rpcr_to_rdmar(rqst); - trace_xprtrdma_op_free(task, req); - if (!list_empty(&req->rl_registered)) frwr_unmap_sync(r_xprt, req); -- cgit v1.2.3 From 09d2ba0cb1b311b2e3b144a0bbccc99f3c0d82d1 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Wed, 8 Jul 2020 16:09:21 -0400 Subject: SUNRPC: Update debugging instrumentation in xprt_do_reserve() Replace a dprintk() with a tracepoint. The tracepoint marks the point where an RPC request is assigned an XID. Additional clean up: Remove trace_xprt_enq_xmit, which reports much the same thing. That tracepoint was added for debugging commit 918f3c1fe83c ("SUNRPC: Improve latency for interactive tasks"). Signed-off-by: Chuck Lever Signed-off-by: Anna Schumaker --- include/trace/events/sunrpc.h | 55 +++++++++++++++++++------------------------ net/sunrpc/xprt.c | 8 ++----- 2 files changed, 26 insertions(+), 37 deletions(-) diff --git a/include/trace/events/sunrpc.h b/include/trace/events/sunrpc.h index 7addf7d1596b..781277bb9cb2 100644 --- a/include/trace/events/sunrpc.h +++ b/include/trace/events/sunrpc.h @@ -1031,37 +1031,6 @@ TRACE_EVENT(xprt_transmit, __entry->seqno, __entry->status) ); -TRACE_EVENT(xprt_enq_xmit, - TP_PROTO( - const struct rpc_task *task, - int stage - ), - - TP_ARGS(task, stage), - - TP_STRUCT__entry( - __field(unsigned int, task_id) - __field(unsigned int, client_id) - __field(u32, xid) - __field(u32, seqno) - __field(int, stage) - ), - - TP_fast_assign( - __entry->task_id = task->tk_pid; - __entry->client_id = task->tk_client ? - task->tk_client->cl_clid : -1; - __entry->xid = be32_to_cpu(task->tk_rqstp->rq_xid); - __entry->seqno = task->tk_rqstp->rq_seqno; - __entry->stage = stage; - ), - - TP_printk( - "task:%u@%u xid=0x%08x seqno=%u stage=%d", - __entry->task_id, __entry->client_id, __entry->xid, - __entry->seqno, __entry->stage) -); - TRACE_EVENT(xprt_ping, TP_PROTO(const struct rpc_xprt *xprt, int status), @@ -1176,6 +1145,30 @@ DEFINE_CONG_EVENT(release_cong); DEFINE_CONG_EVENT(get_cong); DEFINE_CONG_EVENT(put_cong); +TRACE_EVENT(xprt_reserve, + TP_PROTO( + const struct rpc_rqst *rqst + ), + + TP_ARGS(rqst), + + TP_STRUCT__entry( + __field(unsigned int, task_id) + __field(unsigned int, client_id) + __field(u32, xid) + ), + + TP_fast_assign( + __entry->task_id = rqst->rq_task->tk_pid; + __entry->client_id = rqst->rq_task->tk_client->cl_clid; + __entry->xid = be32_to_cpu(rqst->rq_xid); + ), + + TP_printk("task:%u@%u xid=0x%08x", + __entry->task_id, __entry->client_id, __entry->xid + ) +); + TRACE_EVENT(xs_stream_read_data, TP_PROTO(struct rpc_xprt *xprt, ssize_t err, size_t total), diff --git a/net/sunrpc/xprt.c b/net/sunrpc/xprt.c index 2217dfed8f88..0e4659bd72f4 100644 --- a/net/sunrpc/xprt.c +++ b/net/sunrpc/xprt.c @@ -1267,7 +1267,6 @@ xprt_request_enqueue_transmit(struct rpc_task *task) /* Note: req is added _before_ pos */ list_add_tail(&req->rq_xmit, &pos->rq_xmit); INIT_LIST_HEAD(&req->rq_xmit2); - trace_xprt_enq_xmit(task, 1); goto out; } } else if (RPC_IS_SWAPPER(task)) { @@ -1279,7 +1278,6 @@ xprt_request_enqueue_transmit(struct rpc_task *task) /* Note: req is added _before_ pos */ list_add_tail(&req->rq_xmit, &pos->rq_xmit); INIT_LIST_HEAD(&req->rq_xmit2); - trace_xprt_enq_xmit(task, 2); goto out; } } else if (!req->rq_seqno) { @@ -1288,13 +1286,11 @@ xprt_request_enqueue_transmit(struct rpc_task *task) continue; list_add_tail(&req->rq_xmit2, &pos->rq_xmit2); INIT_LIST_HEAD(&req->rq_xmit); - trace_xprt_enq_xmit(task, 3); goto out; } } list_add_tail(&req->rq_xmit, &xprt->xmit_queue); INIT_LIST_HEAD(&req->rq_xmit2); - trace_xprt_enq_xmit(task, 4); out: set_bit(RPC_TASK_NEED_XMIT, &task->tk_runstate); spin_unlock(&xprt->queue_lock); @@ -1745,8 +1741,8 @@ xprt_request_init(struct rpc_task *task) req->rq_rcv_buf.bvec = NULL; req->rq_release_snd_buf = NULL; xprt_init_majortimeo(task, req); - dprintk("RPC: %5u reserved req %p xid %08x\n", task->tk_pid, - req, ntohl(req->rq_xid)); + + trace_xprt_reserve(req); } static void -- cgit v1.2.3 From 9ce07ae5eb1d968c975a4dace0cac25d9e602c9a Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Wed, 8 Jul 2020 16:09:26 -0400 Subject: SUNRPC: Replace dprintk() call site in xprt_prepare_transmit Generate a trace event when an RPC request is queued without being sent immediately. Signed-off-by: Chuck Lever Signed-off-by: Anna Schumaker --- include/trace/events/sunrpc.h | 1 + net/sunrpc/xprt.c | 4 ++-- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/include/trace/events/sunrpc.h b/include/trace/events/sunrpc.h index 781277bb9cb2..9746a722c5a2 100644 --- a/include/trace/events/sunrpc.h +++ b/include/trace/events/sunrpc.h @@ -1093,6 +1093,7 @@ DECLARE_EVENT_CLASS(xprt_writelock_event, DEFINE_WRITELOCK_EVENT(reserve_xprt); DEFINE_WRITELOCK_EVENT(release_xprt); +DEFINE_WRITELOCK_EVENT(transmit_queued); DECLARE_EVENT_CLASS(xprt_cong_event, TP_PROTO( diff --git a/net/sunrpc/xprt.c b/net/sunrpc/xprt.c index 0e4659bd72f4..9da7c6e72c2d 100644 --- a/net/sunrpc/xprt.c +++ b/net/sunrpc/xprt.c @@ -1408,9 +1408,9 @@ bool xprt_prepare_transmit(struct rpc_task *task) struct rpc_rqst *req = task->tk_rqstp; struct rpc_xprt *xprt = req->rq_xprt; - dprintk("RPC: %5u xprt_prepare_transmit\n", task->tk_pid); - if (!xprt_lock_write(xprt, task)) { + trace_xprt_transmit_queued(xprt, task); + /* Race breaker: someone may have transmitted us */ if (!test_bit(RPC_TASK_NEED_XMIT, &task->tk_runstate)) rpc_wake_up_queued_task_set_status(&xprt->sending, -- cgit v1.2.3 From 015747d296798510f94035a832252fcf477db434 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Wed, 8 Jul 2020 16:09:32 -0400 Subject: SUNRPC: Replace dprintk() call site in xs_nospace() "no socket space" is an exceptional and infrequent condition that troubleshooters want to know about. Signed-off-by: Chuck Lever Signed-off-by: Anna Schumaker --- include/trace/events/sunrpc.h | 28 ++++++++++++++++++++++++++++ net/sunrpc/xprtsock.c | 5 +---- 2 files changed, 29 insertions(+), 4 deletions(-) diff --git a/include/trace/events/sunrpc.h b/include/trace/events/sunrpc.h index 9746a722c5a2..90d7b9916fe3 100644 --- a/include/trace/events/sunrpc.h +++ b/include/trace/events/sunrpc.h @@ -898,6 +898,34 @@ DEFINE_RPC_SOCKET_EVENT_DONE(rpc_socket_reset_connection); DEFINE_RPC_SOCKET_EVENT(rpc_socket_close); DEFINE_RPC_SOCKET_EVENT(rpc_socket_shutdown); +TRACE_EVENT(rpc_socket_nospace, + TP_PROTO( + const struct rpc_rqst *rqst, + const struct sock_xprt *transport + ), + + TP_ARGS(rqst, transport), + + TP_STRUCT__entry( + __field(unsigned int, task_id) + __field(unsigned int, client_id) + __field(unsigned int, total) + __field(unsigned int, remaining) + ), + + TP_fast_assign( + __entry->task_id = rqst->rq_task->tk_pid; + __entry->client_id = rqst->rq_task->tk_client->cl_clid; + __entry->total = rqst->rq_slen; + __entry->remaining = rqst->rq_slen - transport->xmit.offset; + ), + + TP_printk("task:%u@%u total=%u remaining=%u", + __entry->task_id, __entry->client_id, + __entry->total, __entry->remaining + ) +); + TRACE_DEFINE_ENUM(XPRT_LOCKED); TRACE_DEFINE_ENUM(XPRT_CONNECTED); TRACE_DEFINE_ENUM(XPRT_CONNECTING); diff --git a/net/sunrpc/xprtsock.c b/net/sunrpc/xprtsock.c index 554e1bb4c1c7..7090bbee0ec5 100644 --- a/net/sunrpc/xprtsock.c +++ b/net/sunrpc/xprtsock.c @@ -762,10 +762,7 @@ static int xs_nospace(struct rpc_rqst *req) struct sock *sk = transport->inet; int ret = -EAGAIN; - dprintk("RPC: %5u xmit incomplete (%u left of %u)\n", - req->rq_task->tk_pid, - req->rq_slen - transport->xmit.offset, - req->rq_slen); + trace_rpc_socket_nospace(req, transport); /* Protect against races with write_space */ spin_lock(&xprt->transport_lock); -- cgit v1.2.3 From 6387039d6d753e02d9a6eed9a80a38a386d996d3 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Wed, 8 Jul 2020 16:09:37 -0400 Subject: SUNRPC: Remove the dprint_status() macro Clean up: The rpc_task_run_action tracepoint serves the same purpose. Signed-off-by: Chuck Lever Signed-off-by: Anna Schumaker --- net/sunrpc/clnt.c | 31 +------------------------------ 1 file changed, 1 insertion(+), 30 deletions(-) diff --git a/net/sunrpc/clnt.c b/net/sunrpc/clnt.c index 3ab9e6a97898..b51e744206d2 100644 --- a/net/sunrpc/clnt.c +++ b/net/sunrpc/clnt.c @@ -47,10 +47,6 @@ # define RPCDBG_FACILITY RPCDBG_CALL #endif -#define dprint_status(t) \ - dprintk("RPC: %5u %s (status %d)\n", t->tk_pid, \ - __func__, t->tk_status) - /* * All RPC clients are linked into this list */ @@ -1658,8 +1654,6 @@ call_start(struct rpc_task *task) static void call_reserve(struct rpc_task *task) { - dprint_status(task); - task->tk_status = 0; task->tk_action = call_reserveresult; xprt_reserve(task); @@ -1675,8 +1669,6 @@ call_reserveresult(struct rpc_task *task) { int status = task->tk_status; - dprint_status(task); - /* * After a call to xprt_reserve(), we must have either * a request slot or else an error status. @@ -1717,8 +1709,6 @@ call_reserveresult(struct rpc_task *task) static void call_retry_reserve(struct rpc_task *task) { - dprint_status(task); - task->tk_status = 0; task->tk_action = call_reserveresult; xprt_retry_reserve(task); @@ -1730,8 +1720,6 @@ call_retry_reserve(struct rpc_task *task) static void call_refresh(struct rpc_task *task) { - dprint_status(task); - task->tk_action = call_refreshresult; task->tk_status = 0; task->tk_client->cl_stats->rpcauthrefresh++; @@ -1746,8 +1734,6 @@ call_refreshresult(struct rpc_task *task) { int status = task->tk_status; - dprint_status(task); - task->tk_status = 0; task->tk_action = call_refresh; switch (status) { @@ -1792,8 +1778,6 @@ call_allocate(struct rpc_task *task) const struct rpc_procinfo *proc = task->tk_msg.rpc_proc; int status; - dprint_status(task); - task->tk_status = 0; task->tk_action = call_encode; @@ -1882,7 +1866,7 @@ call_encode(struct rpc_task *task) { if (!rpc_task_need_encode(task)) goto out; - dprint_status(task); + /* Dequeue task from the receive queue while we're encoding */ xprt_request_dequeue_xprt(task); /* Encode here so that rpcsec_gss can use correct sequence number. */ @@ -1959,8 +1943,6 @@ call_bind(struct rpc_task *task) return; } - dprint_status(task); - task->tk_action = call_bind_status; if (!xprt_prepare_transmit(task)) return; @@ -1982,7 +1964,6 @@ call_bind_status(struct rpc_task *task) return; } - dprint_status(task); trace_rpc_bind_status(task); if (task->tk_status >= 0) goto out_next; @@ -2109,7 +2090,6 @@ call_connect_status(struct rpc_task *task) return; } - dprint_status(task); trace_rpc_connect_status(task); if (task->tk_status == 0) { @@ -2177,8 +2157,6 @@ call_transmit(struct rpc_task *task) return; } - dprint_status(task); - task->tk_action = call_transmit_status; if (!xprt_prepare_transmit(task)) return; @@ -2213,7 +2191,6 @@ call_transmit_status(struct rpc_task *task) switch (task->tk_status) { default: - dprint_status(task); break; case -EBADMSG: task->tk_status = 0; @@ -2295,8 +2272,6 @@ call_bc_transmit_status(struct rpc_task *task) if (rpc_task_transmitted(task)) task->tk_status = 0; - dprint_status(task); - switch (task->tk_status) { case 0: /* Success */ @@ -2356,8 +2331,6 @@ call_status(struct rpc_task *task) if (!task->tk_msg.rpc_proc->p_proc) trace_xprt_ping(task->tk_xprt, task->tk_status); - dprint_status(task); - status = task->tk_status; if (status >= 0) { task->tk_action = call_decode; @@ -2492,8 +2465,6 @@ call_decode(struct rpc_task *task) struct xdr_stream xdr; int err; - dprint_status(task); - if (!task->tk_msg.rpc_proc->p_decode) { task->tk_action = rpc_exit_task; return; -- cgit v1.2.3 From 0ec36cc9cd991d0cd9055949abd582921bbcfea8 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Wed, 8 Jul 2020 16:09:42 -0400 Subject: SUNRPC: Remove dprintk call site in call_start() Clean up: The rpc_rpc_request tracepoint serves the same purpose. Signed-off-by: Chuck Lever Signed-off-by: Anna Schumaker --- net/sunrpc/clnt.c | 4 ---- 1 file changed, 4 deletions(-) diff --git a/net/sunrpc/clnt.c b/net/sunrpc/clnt.c index b51e744206d2..7d97d18df9bd 100644 --- a/net/sunrpc/clnt.c +++ b/net/sunrpc/clnt.c @@ -1635,10 +1635,6 @@ call_start(struct rpc_task *task) int idx = task->tk_msg.rpc_proc->p_statidx; trace_rpc_request(task); - dprintk("RPC: %5u call_start %s%d proc %s (%s)\n", task->tk_pid, - clnt->cl_program->name, clnt->cl_vers, - rpc_proc_name(task), - (RPC_IS_ASYNC(task) ? "async" : "sync")); /* Increment call count (version might not be valid for ping) */ if (clnt->cl_program->version[clnt->cl_vers]) -- cgit v1.2.3 From db0a86c4266ac36769f54e2ce33fff0300c9bc00 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Wed, 8 Jul 2020 16:09:47 -0400 Subject: SUNRPC: Replace connect dprintk call sites with a tracepoint This trace event can be used to audit transport connections from the client. Signed-off-by: Chuck Lever Signed-off-by: Anna Schumaker --- include/trace/events/sunrpc.h | 1 + net/sunrpc/clnt.c | 4 ---- net/sunrpc/xprt.c | 3 +-- 3 files changed, 2 insertions(+), 6 deletions(-) diff --git a/include/trace/events/sunrpc.h b/include/trace/events/sunrpc.h index 90d7b9916fe3..0aa15cc3985d 100644 --- a/include/trace/events/sunrpc.h +++ b/include/trace/events/sunrpc.h @@ -983,6 +983,7 @@ DECLARE_EVENT_CLASS(rpc_xprt_lifetime_class, TP_ARGS(xprt)) DEFINE_RPC_XPRT_LIFETIME_EVENT(create); +DEFINE_RPC_XPRT_LIFETIME_EVENT(connect); DEFINE_RPC_XPRT_LIFETIME_EVENT(disconnect_auto); DEFINE_RPC_XPRT_LIFETIME_EVENT(disconnect_done); DEFINE_RPC_XPRT_LIFETIME_EVENT(disconnect_force); diff --git a/net/sunrpc/clnt.c b/net/sunrpc/clnt.c index 7d97d18df9bd..48d86814b942 100644 --- a/net/sunrpc/clnt.c +++ b/net/sunrpc/clnt.c @@ -2055,10 +2055,6 @@ call_connect(struct rpc_task *task) return; } - dprintk("RPC: %5u call_connect xprt %p %s connected\n", - task->tk_pid, xprt, - (xprt_connected(xprt) ? "is" : "is not")); - task->tk_action = call_connect_status; if (task->tk_status < 0) return; diff --git a/net/sunrpc/xprt.c b/net/sunrpc/xprt.c index 9da7c6e72c2d..ceb190d1754d 100644 --- a/net/sunrpc/xprt.c +++ b/net/sunrpc/xprt.c @@ -834,8 +834,7 @@ void xprt_connect(struct rpc_task *task) { struct rpc_xprt *xprt = task->tk_rqstp->rq_xprt; - dprintk("RPC: %5u xprt_connect xprt %p %s connected\n", task->tk_pid, - xprt, (xprt_connected(xprt) ? "is" : "is not")); + trace_xprt_connect(xprt); if (!xprt_bound(xprt)) { task->tk_status = -EAGAIN; -- cgit v1.2.3 From 6f9f17287e78e5049931af2037b15b26d134a32a Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Wed, 8 Jul 2020 16:09:53 -0400 Subject: SUNRPC: Mitigate cond_resched() in xprt_transmit() The original purpose of this expensive call is to prevent a long queue of requests from blocking other work. The cond_resched() call is unnecessary after just a single send operation. For longer queues, instead of invoking the kernel scheduler, simply release the transport send lock and return to the RPC scheduler. Signed-off-by: Chuck Lever Signed-off-by: Anna Schumaker --- net/sunrpc/xprt.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/net/sunrpc/xprt.c b/net/sunrpc/xprt.c index ceb190d1754d..f6c17e75f20e 100644 --- a/net/sunrpc/xprt.c +++ b/net/sunrpc/xprt.c @@ -1513,10 +1513,13 @@ xprt_transmit(struct rpc_task *task) { struct rpc_rqst *next, *req = task->tk_rqstp; struct rpc_xprt *xprt = req->rq_xprt; - int status; + int counter, status; spin_lock(&xprt->queue_lock); + counter = 0; while (!list_empty(&xprt->xmit_queue)) { + if (++counter == 20) + break; next = list_first_entry(&xprt->xmit_queue, struct rpc_rqst, rq_xmit); xprt_pin_rqst(next); @@ -1524,7 +1527,6 @@ xprt_transmit(struct rpc_task *task) status = xprt_request_transmit(next, task); if (status == -EBADMSG && next != req) status = 0; - cond_resched(); spin_lock(&xprt->queue_lock); xprt_unpin_rqst(next); if (status == 0) { -- cgit v1.2.3 From 914cdcc78a668a90bc627542f82fc3a92525141c Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Wed, 8 Jul 2020 16:09:58 -0400 Subject: SUNRPC: Add trace_rpc_timeout_status() For a long while we've wanted a tracepoint that fires when a major timeout is reported in the system log. Such a tracepoint can be attached to other actions that can take place when a timeout is detected (eg, server or connection health assessment). Signed-off-by: Chuck Lever Signed-off-by: Anna Schumaker --- include/trace/events/sunrpc.h | 1 + net/sunrpc/clnt.c | 2 +- 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/include/trace/events/sunrpc.h b/include/trace/events/sunrpc.h index 0aa15cc3985d..edb41e187cf6 100644 --- a/include/trace/events/sunrpc.h +++ b/include/trace/events/sunrpc.h @@ -261,6 +261,7 @@ DECLARE_EVENT_CLASS(rpc_task_status, DEFINE_RPC_STATUS_EVENT(call); DEFINE_RPC_STATUS_EVENT(bind); DEFINE_RPC_STATUS_EVENT(connect); +DEFINE_RPC_STATUS_EVENT(timeout); TRACE_EVENT(rpc_request, TP_PROTO(const struct rpc_task *task), diff --git a/net/sunrpc/clnt.c b/net/sunrpc/clnt.c index 48d86814b942..cabde92f283d 100644 --- a/net/sunrpc/clnt.c +++ b/net/sunrpc/clnt.c @@ -2398,7 +2398,7 @@ rpc_check_timeout(struct rpc_task *task) if (xprt_adjust_timeout(task->tk_rqstp) == 0) return; - dprintk("RPC: %5u call_timeout (major)\n", task->tk_pid); + trace_rpc_timeout_status(task); task->tk_timeouts++; if (RPC_IS_SOFTCONN(task) && !rpc_check_connected(task->tk_rqstp)) { -- cgit v1.2.3 From 7c8099f6ada2654ef87aa992ec1b69b86249a353 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Wed, 8 Jul 2020 16:10:03 -0400 Subject: SUNRPC: Trace call_refresh events Clean up: Replace dprintk call sites. Note that rpc_call_rpcerror() already has a trace point, so perhaps adding trace_rpc_refresh_status() isn't necessary. However, it does report a particular category of error. Signed-off-by: Chuck Lever Signed-off-by: Anna Schumaker --- include/trace/events/sunrpc.h | 2 ++ net/sunrpc/clnt.c | 9 +++------ 2 files changed, 5 insertions(+), 6 deletions(-) diff --git a/include/trace/events/sunrpc.h b/include/trace/events/sunrpc.h index edb41e187cf6..3c5d707f7d54 100644 --- a/include/trace/events/sunrpc.h +++ b/include/trace/events/sunrpc.h @@ -262,6 +262,8 @@ DEFINE_RPC_STATUS_EVENT(call); DEFINE_RPC_STATUS_EVENT(bind); DEFINE_RPC_STATUS_EVENT(connect); DEFINE_RPC_STATUS_EVENT(timeout); +DEFINE_RPC_STATUS_EVENT(retry_refresh); +DEFINE_RPC_STATUS_EVENT(refresh); TRACE_EVENT(rpc_request, TP_PROTO(const struct rpc_task *task), diff --git a/net/sunrpc/clnt.c b/net/sunrpc/clnt.c index cabde92f283d..af7c88e41e11 100644 --- a/net/sunrpc/clnt.c +++ b/net/sunrpc/clnt.c @@ -1752,12 +1752,10 @@ call_refreshresult(struct rpc_task *task) if (!task->tk_cred_retry) break; task->tk_cred_retry--; - dprintk("RPC: %5u %s: retry refresh creds\n", - task->tk_pid, __func__); + trace_rpc_retry_refresh_status(task); return; } - dprintk("RPC: %5u %s: refresh creds failed with error %d\n", - task->tk_pid, __func__, status); + trace_rpc_refresh_status(task); rpc_call_rpcerror(task, status); } @@ -1881,8 +1879,7 @@ call_encode(struct rpc_task *task) } else { task->tk_action = call_refresh; task->tk_cred_retry--; - dprintk("RPC: %5u %s: retry refresh creds\n", - task->tk_pid, __func__); + trace_rpc_retry_refresh_status(task); } break; default: -- cgit v1.2.3 From fd66e2a79d96efcbea78e0dc33797e25fe353a71 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Wed, 8 Jul 2020 16:10:08 -0400 Subject: SUNRPC: Remove dprintk call site in call_decode Clean up. When enabled, this dprintk adds a line in /var/log/messages after every RPC that reports the task ID (no connection to on the wire XID values) and the RPC's result (no connection to the program, operation, or the arguments and results). Thus it's value is pretty low. Let's remove it. Signed-off-by: Chuck Lever Signed-off-by: Anna Schumaker --- net/sunrpc/clnt.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/net/sunrpc/clnt.c b/net/sunrpc/clnt.c index af7c88e41e11..e13db512b164 100644 --- a/net/sunrpc/clnt.c +++ b/net/sunrpc/clnt.c @@ -2497,8 +2497,6 @@ out: case 0: task->tk_action = rpc_exit_task; task->tk_status = rpcauth_unwrap_resp(task, &xdr); - dprintk("RPC: %5u %s result %d\n", - task->tk_pid, __func__, task->tk_status); return; case -EAGAIN: task->tk_status = 0; -- cgit v1.2.3 From 42ebfc2cbf22df0abf2a17414db256d1db87c154 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Wed, 8 Jul 2020 16:10:14 -0400 Subject: SUNRPC: Clean up call_bind_status() observability Time to remove dprintk call sites in here. Regarding the rpc_bind_status tracepoint: It's friendlier to administrators if they don't have to look up the error code to figure out what went wrong. Replace trace_rpc_bind_status with a set of tracepoints that report more specifically what the problem was, and what RPC program/version was being queried. Signed-off-by: Chuck Lever Signed-off-by: Anna Schumaker --- include/trace/events/sunrpc.h | 14 +++++++++++++- net/sunrpc/clnt.c | 20 ++++++-------------- 2 files changed, 19 insertions(+), 15 deletions(-) diff --git a/include/trace/events/sunrpc.h b/include/trace/events/sunrpc.h index 3c5d707f7d54..353d9a18e254 100644 --- a/include/trace/events/sunrpc.h +++ b/include/trace/events/sunrpc.h @@ -259,7 +259,6 @@ DECLARE_EVENT_CLASS(rpc_task_status, TP_ARGS(task)) DEFINE_RPC_STATUS_EVENT(call); -DEFINE_RPC_STATUS_EVENT(bind); DEFINE_RPC_STATUS_EVENT(connect); DEFINE_RPC_STATUS_EVENT(timeout); DEFINE_RPC_STATUS_EVENT(retry_refresh); @@ -520,6 +519,19 @@ DEFINE_RPC_REPLY_EVENT(stale_creds); DEFINE_RPC_REPLY_EVENT(bad_creds); DEFINE_RPC_REPLY_EVENT(auth_tooweak); +#define DEFINE_RPCB_ERROR_EVENT(name) \ + DEFINE_EVENT(rpc_reply_event, rpcb_##name##_err, \ + TP_PROTO( \ + const struct rpc_task *task \ + ), \ + TP_ARGS(task)) + +DEFINE_RPCB_ERROR_EVENT(prog_unavail); +DEFINE_RPCB_ERROR_EVENT(timeout); +DEFINE_RPCB_ERROR_EVENT(bind_version); +DEFINE_RPCB_ERROR_EVENT(unreachable); +DEFINE_RPCB_ERROR_EVENT(unrecognized); + TRACE_EVENT(rpc_buf_alloc, TP_PROTO( const struct rpc_task *task, diff --git a/net/sunrpc/clnt.c b/net/sunrpc/clnt.c index e13db512b164..3259120462ed 100644 --- a/net/sunrpc/clnt.c +++ b/net/sunrpc/clnt.c @@ -1957,7 +1957,6 @@ call_bind_status(struct rpc_task *task) return; } - trace_rpc_bind_status(task); if (task->tk_status >= 0) goto out_next; if (xprt_bound(xprt)) { @@ -1967,12 +1966,10 @@ call_bind_status(struct rpc_task *task) switch (task->tk_status) { case -ENOMEM: - dprintk("RPC: %5u rpcbind out of memory\n", task->tk_pid); rpc_delay(task, HZ >> 2); goto retry_timeout; case -EACCES: - dprintk("RPC: %5u remote rpcbind: RPC program/version " - "unavailable\n", task->tk_pid); + trace_rpcb_prog_unavail_err(task); /* fail immediately if this is an RPC ping */ if (task->tk_msg.rpc_proc->p_proc == 0) { status = -EOPNOTSUPP; @@ -1989,17 +1986,14 @@ call_bind_status(struct rpc_task *task) case -EAGAIN: goto retry_timeout; case -ETIMEDOUT: - dprintk("RPC: %5u rpcbind request timed out\n", - task->tk_pid); + trace_rpcb_timeout_err(task); goto retry_timeout; case -EPFNOSUPPORT: /* server doesn't support any rpcbind version we know of */ - dprintk("RPC: %5u unrecognized remote rpcbind service\n", - task->tk_pid); + trace_rpcb_bind_version_err(task); break; case -EPROTONOSUPPORT: - dprintk("RPC: %5u remote rpcbind version unavailable, retrying\n", - task->tk_pid); + trace_rpcb_bind_version_err(task); goto retry_timeout; case -ECONNREFUSED: /* connection problems */ case -ECONNRESET: @@ -2010,8 +2004,7 @@ call_bind_status(struct rpc_task *task) case -EHOSTUNREACH: case -ENETUNREACH: case -EPIPE: - dprintk("RPC: %5u remote rpcbind unreachable: %d\n", - task->tk_pid, task->tk_status); + trace_rpcb_unreachable_err(task); if (!RPC_IS_SOFTCONN(task)) { rpc_delay(task, 5*HZ); goto retry_timeout; @@ -2019,8 +2012,7 @@ call_bind_status(struct rpc_task *task) status = task->tk_status; break; default: - dprintk("RPC: %5u unrecognized rpcbind error (%d)\n", - task->tk_pid, -task->tk_status); + trace_rpcb_unrecognized_err(task); } rpc_call_rpcerror(task, status); -- cgit v1.2.3 From e465cc3fa86341121205d3faca26866bdf331ed5 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Wed, 8 Jul 2020 16:10:19 -0400 Subject: SUNRPC: Remove rpcb_getport_async dprintk call sites In many cases, tracepoints already report these errors. In others, the dprintks were mainly useful when this code was less mature. Signed-off-by: Chuck Lever Signed-off-by: Anna Schumaker --- include/trace/events/sunrpc.h | 35 +++++++++++++++++++++++++++++++++++ net/sunrpc/rpcb_clnt.c | 23 +++-------------------- 2 files changed, 38 insertions(+), 20 deletions(-) diff --git a/include/trace/events/sunrpc.h b/include/trace/events/sunrpc.h index 353d9a18e254..6e5eba54fd30 100644 --- a/include/trace/events/sunrpc.h +++ b/include/trace/events/sunrpc.h @@ -1269,6 +1269,41 @@ TRACE_EVENT(xs_stream_read_request, __entry->copied, __entry->reclen, __entry->offset) ); +TRACE_EVENT(rpcb_getport, + TP_PROTO( + const struct rpc_clnt *clnt, + const struct rpc_task *task, + unsigned int bind_version + ), + + TP_ARGS(clnt, task, bind_version), + + TP_STRUCT__entry( + __field(unsigned int, task_id) + __field(unsigned int, client_id) + __field(unsigned int, program) + __field(unsigned int, version) + __field(int, protocol) + __field(unsigned int, bind_version) + __string(servername, task->tk_xprt->servername) + ), + + TP_fast_assign( + __entry->task_id = task->tk_pid; + __entry->client_id = clnt->cl_clid; + __entry->program = clnt->cl_prog; + __entry->version = clnt->cl_vers; + __entry->protocol = task->tk_xprt->prot; + __entry->bind_version = bind_version; + __assign_str(servername, task->tk_xprt->servername); + ), + + TP_printk("task:%u@%u server=%s program=%u version=%u protocol=%d bind_version=%u", + __entry->task_id, __entry->client_id, __get_str(servername), + __entry->program, __entry->version, __entry->protocol, + __entry->bind_version + ) +); DECLARE_EVENT_CLASS(svc_xdr_buf_class, TP_PROTO( diff --git a/net/sunrpc/rpcb_clnt.c b/net/sunrpc/rpcb_clnt.c index 4a67685c83eb..e306aab89a0b 100644 --- a/net/sunrpc/rpcb_clnt.c +++ b/net/sunrpc/rpcb_clnt.c @@ -31,6 +31,8 @@ #include #include +#include + #include "netns.h" #if IS_ENABLED(CONFIG_SUNRPC_DEBUG) @@ -693,18 +695,12 @@ void rpcb_getport_async(struct rpc_task *task) rcu_read_unlock(); xprt = xprt_get(task->tk_xprt); - dprintk("RPC: %5u %s(%s, %u, %u, %d)\n", - task->tk_pid, __func__, - xprt->servername, clnt->cl_prog, clnt->cl_vers, xprt->prot); - /* Put self on the wait queue to ensure we get notified if * some other task is already attempting to bind the port */ rpc_sleep_on_timeout(&xprt->binding, task, NULL, jiffies + xprt->bind_timeout); if (xprt_test_and_set_binding(xprt)) { - dprintk("RPC: %5u %s: waiting for another binder\n", - task->tk_pid, __func__); xprt_put(xprt); return; } @@ -712,8 +708,6 @@ void rpcb_getport_async(struct rpc_task *task) /* Someone else may have bound if we slept */ if (xprt_bound(xprt)) { status = 0; - dprintk("RPC: %5u %s: already bound\n", - task->tk_pid, __func__); goto bailout_nofree; } @@ -732,20 +726,15 @@ void rpcb_getport_async(struct rpc_task *task) break; default: status = -EAFNOSUPPORT; - dprintk("RPC: %5u %s: bad address family\n", - task->tk_pid, __func__); goto bailout_nofree; } if (proc == NULL) { xprt->bind_index = 0; status = -EPFNOSUPPORT; - dprintk("RPC: %5u %s: no more getport versions available\n", - task->tk_pid, __func__); goto bailout_nofree; } - dprintk("RPC: %5u %s: trying rpcbind version %u\n", - task->tk_pid, __func__, bind_version); + trace_rpcb_getport(clnt, task, bind_version); rpcb_clnt = rpcb_create(xprt->xprt_net, clnt->cl_nodename, @@ -754,16 +743,12 @@ void rpcb_getport_async(struct rpc_task *task) clnt->cl_cred); if (IS_ERR(rpcb_clnt)) { status = PTR_ERR(rpcb_clnt); - dprintk("RPC: %5u %s: rpcb_create failed, error %ld\n", - task->tk_pid, __func__, PTR_ERR(rpcb_clnt)); goto bailout_nofree; } map = kzalloc(sizeof(struct rpcbind_args), GFP_NOFS); if (!map) { status = -ENOMEM; - dprintk("RPC: %5u %s: no memory available\n", - task->tk_pid, __func__); goto bailout_release_client; } map->r_prog = clnt->cl_prog; @@ -780,8 +765,6 @@ void rpcb_getport_async(struct rpc_task *task) map->r_addr = rpc_sockaddr2uaddr(sap, GFP_NOFS); if (!map->r_addr) { status = -ENOMEM; - dprintk("RPC: %5u %s: no memory available\n", - task->tk_pid, __func__); goto bailout_free_args; } map->r_owner = ""; -- cgit v1.2.3 From ac1ae534215b9b0a346547654b4720abd0882f15 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Wed, 8 Jul 2020 16:10:24 -0400 Subject: SUNRPC: Hoist trace_xprtrdma_op_setport into generic code Signed-off-by: Chuck Lever Signed-off-by: Anna Schumaker --- include/trace/events/rpcrdma.h | 1 - include/trace/events/sunrpc.h | 29 +++++++++++++++++++++++++++++ net/sunrpc/rpcb_clnt.c | 29 ++++++++++++++--------------- net/sunrpc/xprtrdma/transport.c | 3 --- 4 files changed, 43 insertions(+), 19 deletions(-) diff --git a/include/trace/events/rpcrdma.h b/include/trace/events/rpcrdma.h index 1e17c2fc9640..ad46bec3a65e 100644 --- a/include/trace/events/rpcrdma.h +++ b/include/trace/events/rpcrdma.h @@ -423,7 +423,6 @@ DEFINE_CONN_EVENT(connect); DEFINE_CONN_EVENT(disconnect); DEFINE_RXPRT_EVENT(xprtrdma_op_inject_dsc); -DEFINE_RXPRT_EVENT(xprtrdma_op_setport); TRACE_EVENT(xprtrdma_op_connect, TP_PROTO( diff --git a/include/trace/events/sunrpc.h b/include/trace/events/sunrpc.h index 6e5eba54fd30..05291ce099d6 100644 --- a/include/trace/events/sunrpc.h +++ b/include/trace/events/sunrpc.h @@ -1305,6 +1305,35 @@ TRACE_EVENT(rpcb_getport, ) ); +TRACE_EVENT(rpcb_setport, + TP_PROTO( + const struct rpc_task *task, + int status, + unsigned short port + ), + + TP_ARGS(task, status, port), + + TP_STRUCT__entry( + __field(unsigned int, task_id) + __field(unsigned int, client_id) + __field(int, status) + __field(unsigned short, port) + ), + + TP_fast_assign( + __entry->task_id = task->tk_pid; + __entry->client_id = task->tk_client->cl_clid; + __entry->status = status; + __entry->port = port; + ), + + TP_printk("task:%u@%u status=%d port=%u", + __entry->task_id, __entry->client_id, + __entry->status, __entry->port + ) +); + DECLARE_EVENT_CLASS(svc_xdr_buf_class, TP_PROTO( const struct svc_rqst *rqst, diff --git a/net/sunrpc/rpcb_clnt.c b/net/sunrpc/rpcb_clnt.c index e306aab89a0b..cdf86c0580d3 100644 --- a/net/sunrpc/rpcb_clnt.c +++ b/net/sunrpc/rpcb_clnt.c @@ -801,34 +801,33 @@ static void rpcb_getport_done(struct rpc_task *child, void *data) { struct rpcbind_args *map = data; struct rpc_xprt *xprt = map->r_xprt; - int status = child->tk_status; + + map->r_status = child->tk_status; /* Garbage reply: retry with a lesser rpcbind version */ - if (status == -EIO) - status = -EPROTONOSUPPORT; + if (map->r_status == -EIO) + map->r_status = -EPROTONOSUPPORT; /* rpcbind server doesn't support this rpcbind protocol version */ - if (status == -EPROTONOSUPPORT) + if (map->r_status == -EPROTONOSUPPORT) xprt->bind_index++; - if (status < 0) { + if (map->r_status < 0) { /* rpcbind server not available on remote host? */ - xprt->ops->set_port(xprt, 0); + map->r_port = 0; + } else if (map->r_port == 0) { /* Requested RPC service wasn't registered on remote host */ - xprt->ops->set_port(xprt, 0); - status = -EACCES; + map->r_status = -EACCES; } else { /* Succeeded */ - xprt->ops->set_port(xprt, map->r_port); - xprt_set_bound(xprt); - status = 0; + map->r_status = 0; } - dprintk("RPC: %5u rpcb_getport_done(status %d, port %u)\n", - child->tk_pid, status, map->r_port); - - map->r_status = status; + trace_rpcb_setport(child, map->r_status, map->r_port); + xprt->ops->set_port(xprt, map->r_port); + if (map->r_port) + xprt_set_bound(xprt); } /* diff --git a/net/sunrpc/xprtrdma/transport.c b/net/sunrpc/xprtrdma/transport.c index 819a922830da..8915e42240d3 100644 --- a/net/sunrpc/xprtrdma/transport.c +++ b/net/sunrpc/xprtrdma/transport.c @@ -413,9 +413,6 @@ xprt_rdma_set_port(struct rpc_xprt *xprt, u16 port) kfree(xprt->address_strings[RPC_DISPLAY_HEX_PORT]); snprintf(buf, sizeof(buf), "%4hx", port); xprt->address_strings[RPC_DISPLAY_HEX_PORT] = kstrdup(buf, GFP_KERNEL); - - trace_xprtrdma_op_setport(container_of(xprt, struct rpcrdma_xprt, - rx_xprt)); } /** -- cgit v1.2.3 From 15a798d6ce7ea67680973541e8a690edee9a71e9 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Wed, 8 Jul 2020 16:10:29 -0400 Subject: SUNRPC: Remove dprintk call sites in rpcbind XDR functions Clean up: Other XDR functions no longer have dprintk call sites. These were added during development and can be removed now that the code is mature. Signed-off-by: Chuck Lever Signed-off-by: Anna Schumaker --- net/sunrpc/rpcb_clnt.c | 28 +--------------------------- 1 file changed, 1 insertion(+), 27 deletions(-) diff --git a/net/sunrpc/rpcb_clnt.c b/net/sunrpc/rpcb_clnt.c index cdf86c0580d3..6df12a13edc6 100644 --- a/net/sunrpc/rpcb_clnt.c +++ b/net/sunrpc/rpcb_clnt.c @@ -840,11 +840,6 @@ static void rpcb_enc_mapping(struct rpc_rqst *req, struct xdr_stream *xdr, const struct rpcbind_args *rpcb = data; __be32 *p; - dprintk("RPC: %5u encoding PMAP_%s call (%u, %u, %d, %u)\n", - req->rq_task->tk_pid, - req->rq_task->tk_msg.rpc_proc->p_name, - rpcb->r_prog, rpcb->r_vers, rpcb->r_prot, rpcb->r_port); - p = xdr_reserve_space(xdr, RPCB_mappingargs_sz << 2); *p++ = cpu_to_be32(rpcb->r_prog); *p++ = cpu_to_be32(rpcb->r_vers); @@ -866,8 +861,6 @@ static int rpcb_dec_getport(struct rpc_rqst *req, struct xdr_stream *xdr, return -EIO; port = be32_to_cpup(p); - dprintk("RPC: %5u PMAP_%s result: %lu\n", req->rq_task->tk_pid, - req->rq_task->tk_msg.rpc_proc->p_name, port); if (unlikely(port > USHRT_MAX)) return -EIO; @@ -888,11 +881,6 @@ static int rpcb_dec_set(struct rpc_rqst *req, struct xdr_stream *xdr, *boolp = 0; if (*p != xdr_zero) *boolp = 1; - - dprintk("RPC: %5u RPCB_%s call %s\n", - req->rq_task->tk_pid, - req->rq_task->tk_msg.rpc_proc->p_name, - (*boolp ? "succeeded" : "failed")); return 0; } @@ -917,12 +905,6 @@ static void rpcb_enc_getaddr(struct rpc_rqst *req, struct xdr_stream *xdr, const struct rpcbind_args *rpcb = data; __be32 *p; - dprintk("RPC: %5u encoding RPCB_%s call (%u, %u, '%s', '%s')\n", - req->rq_task->tk_pid, - req->rq_task->tk_msg.rpc_proc->p_name, - rpcb->r_prog, rpcb->r_vers, - rpcb->r_netid, rpcb->r_addr); - p = xdr_reserve_space(xdr, (RPCB_program_sz + RPCB_version_sz) << 2); *p++ = cpu_to_be32(rpcb->r_prog); *p = cpu_to_be32(rpcb->r_vers); @@ -952,11 +934,8 @@ static int rpcb_dec_getaddr(struct rpc_rqst *req, struct xdr_stream *xdr, * If the returned universal address is a null string, * the requested RPC service was not registered. */ - if (len == 0) { - dprintk("RPC: %5u RPCB reply: program not registered\n", - req->rq_task->tk_pid); + if (len == 0) return 0; - } if (unlikely(len > RPCBIND_MAXUADDRLEN)) goto out_fail; @@ -964,8 +943,6 @@ static int rpcb_dec_getaddr(struct rpc_rqst *req, struct xdr_stream *xdr, p = xdr_inline_decode(xdr, len); if (unlikely(p == NULL)) goto out_fail; - dprintk("RPC: %5u RPCB_%s reply: %*pE\n", req->rq_task->tk_pid, - req->rq_task->tk_msg.rpc_proc->p_name, len, (char *)p); if (rpc_uaddr2sockaddr(req->rq_xprt->xprt_net, (char *)p, len, sap, sizeof(address)) == 0) @@ -975,9 +952,6 @@ static int rpcb_dec_getaddr(struct rpc_rqst *req, struct xdr_stream *xdr, return 0; out_fail: - dprintk("RPC: %5u malformed RPCB_%s reply\n", - req->rq_task->tk_pid, - req->rq_task->tk_msg.rpc_proc->p_name); return -EIO; } -- cgit v1.2.3 From 1e664987a9165ada0dfb347a9e6cf935a6d495e7 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Wed, 8 Jul 2020 16:10:35 -0400 Subject: SUNRPC: Remove more dprintks in rpcb_clnt.c Clean up: These are superfluous now that rpc_create() and friends have tracepoints to report errors. Signed-off-by: Chuck Lever Signed-off-by: Anna Schumaker --- net/sunrpc/rpcb_clnt.c | 25 +++---------------------- 1 file changed, 3 insertions(+), 22 deletions(-) diff --git a/net/sunrpc/rpcb_clnt.c b/net/sunrpc/rpcb_clnt.c index 6df12a13edc6..af2882c62a3b 100644 --- a/net/sunrpc/rpcb_clnt.c +++ b/net/sunrpc/rpcb_clnt.c @@ -218,10 +218,6 @@ static void rpcb_set_local(struct net *net, struct rpc_clnt *clnt, sn->rpcb_is_af_local = is_af_local ? 1 : 0; smp_wmb(); sn->rpcb_users = 1; - dprintk("RPC: created new rpcb local clients (rpcb_local_clnt: " - "%p, rpcb_local_clnt4: %p) for net %x%s\n", - sn->rpcb_local_clnt, sn->rpcb_local_clnt4, - net->ns.inum, (net == &init_net) ? " (init_net)" : ""); } /* @@ -263,19 +259,13 @@ static int rpcb_create_local_unix(struct net *net) */ clnt = rpc_create(&args); if (IS_ERR(clnt)) { - dprintk("RPC: failed to create AF_LOCAL rpcbind " - "client (errno %ld).\n", PTR_ERR(clnt)); result = PTR_ERR(clnt); goto out; } clnt4 = rpc_bind_new_program(clnt, &rpcb_program, RPCBVERS_4); - if (IS_ERR(clnt4)) { - dprintk("RPC: failed to bind second program to " - "rpcbind v4 client (errno %ld).\n", - PTR_ERR(clnt4)); + if (IS_ERR(clnt4)) clnt4 = NULL; - } rpcb_set_local(net, clnt, clnt4, true); @@ -311,8 +301,6 @@ static int rpcb_create_local_net(struct net *net) clnt = rpc_create(&args); if (IS_ERR(clnt)) { - dprintk("RPC: failed to create local rpcbind " - "client (errno %ld).\n", PTR_ERR(clnt)); result = PTR_ERR(clnt); goto out; } @@ -323,12 +311,8 @@ static int rpcb_create_local_net(struct net *net) * v4 upcalls. */ clnt4 = rpc_bind_new_program(clnt, &rpcb_program, RPCBVERS_4); - if (IS_ERR(clnt4)) { - dprintk("RPC: failed to bind second program to " - "rpcbind v4 client (errno %ld).\n", - PTR_ERR(clnt4)); + if (IS_ERR(clnt4)) clnt4 = NULL; - } rpcb_set_local(net, clnt, clnt4, false); @@ -405,11 +389,8 @@ static int rpcb_register_call(struct sunrpc_net *sn, struct rpc_clnt *clnt, stru msg->rpc_resp = &result; error = rpc_call_sync(clnt, msg, flags); - if (error < 0) { - dprintk("RPC: failed to contact local rpcbind " - "server (errno %d).\n", -error); + if (error < 0) return error; - } if (!result) return -EACCES; -- cgit v1.2.3 From c3adcc7dfbfcb1bd7e29fa95f03d1f96b514e03e Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Wed, 8 Jul 2020 16:10:40 -0400 Subject: SUNRPC: Replace rpcbind dprintk call sites with tracepoints In many cases, tracepoints already report these errors. In others, the dprintks were mainly useful when this code was less mature. Signed-off-by: Chuck Lever Signed-off-by: Anna Schumaker --- include/trace/events/sunrpc.h | 86 +++++++++++++++++++++++++++++++++++++++++++ net/sunrpc/rpcb_clnt.c | 24 ++---------- 2 files changed, 90 insertions(+), 20 deletions(-) diff --git a/include/trace/events/sunrpc.h b/include/trace/events/sunrpc.h index 05291ce099d6..b5d4cbbaf4b0 100644 --- a/include/trace/events/sunrpc.h +++ b/include/trace/events/sunrpc.h @@ -1334,6 +1334,92 @@ TRACE_EVENT(rpcb_setport, ) ); +TRACE_EVENT(pmap_register, + TP_PROTO( + u32 program, + u32 version, + int protocol, + unsigned short port + ), + + TP_ARGS(program, version, protocol, port), + + TP_STRUCT__entry( + __field(unsigned int, program) + __field(unsigned int, version) + __field(int, protocol) + __field(unsigned int, port) + ), + + TP_fast_assign( + __entry->program = program; + __entry->version = version; + __entry->protocol = protocol; + __entry->port = port; + ), + + TP_printk("program=%u version=%u protocol=%d port=%u", + __entry->program, __entry->version, + __entry->protocol, __entry->port + ) +); + +TRACE_EVENT(rpcb_register, + TP_PROTO( + u32 program, + u32 version, + const char *addr, + const char *netid + ), + + TP_ARGS(program, version, addr, netid), + + TP_STRUCT__entry( + __field(unsigned int, program) + __field(unsigned int, version) + __string(addr, addr) + __string(netid, netid) + ), + + TP_fast_assign( + __entry->program = program; + __entry->version = version; + __assign_str(addr, addr); + __assign_str(netid, netid); + ), + + TP_printk("program=%u version=%u addr=%s netid=%s", + __entry->program, __entry->version, + __get_str(addr), __get_str(netid) + ) +); + +TRACE_EVENT(rpcb_unregister, + TP_PROTO( + u32 program, + u32 version, + const char *netid + ), + + TP_ARGS(program, version, netid), + + TP_STRUCT__entry( + __field(unsigned int, program) + __field(unsigned int, version) + __string(netid, netid) + ), + + TP_fast_assign( + __entry->program = program; + __entry->version = version; + __assign_str(netid, netid); + ), + + TP_printk("program=%u version=%u netid=%s", + __entry->program, __entry->version, __get_str(netid) + ) +); + DECLARE_EVENT_CLASS(svc_xdr_buf_class, TP_PROTO( const struct svc_rqst *rqst, diff --git a/net/sunrpc/rpcb_clnt.c b/net/sunrpc/rpcb_clnt.c index af2882c62a3b..38fe2ce8a5aa 100644 --- a/net/sunrpc/rpcb_clnt.c +++ b/net/sunrpc/rpcb_clnt.c @@ -35,10 +35,6 @@ #include "netns.h" -#if IS_ENABLED(CONFIG_SUNRPC_DEBUG) -# define RPCDBG_FACILITY RPCDBG_BIND -#endif - #define RPCBIND_SOCK_PATHNAME "/var/run/rpcbind.sock" #define RPCBIND_PROGRAM (100000u) @@ -444,9 +440,7 @@ int rpcb_register(struct net *net, u32 prog, u32 vers, int prot, unsigned short struct sunrpc_net *sn = net_generic(net, sunrpc_net_id); bool is_set = false; - dprintk("RPC: %sregistering (%u, %u, %d, %u) with local " - "rpcbind\n", (port ? "" : "un"), - prog, vers, prot, port); + trace_pmap_register(prog, vers, prot, port); msg.rpc_proc = &rpcb_procedures2[RPCBPROC_UNSET]; if (port != 0) { @@ -472,11 +466,6 @@ static int rpcb_register_inet4(struct sunrpc_net *sn, map->r_addr = rpc_sockaddr2uaddr(sap, GFP_KERNEL); - dprintk("RPC: %sregistering [%u, %u, %s, '%s'] with " - "local rpcbind\n", (port ? "" : "un"), - map->r_prog, map->r_vers, - map->r_addr, map->r_netid); - msg->rpc_proc = &rpcb_procedures4[RPCBPROC_UNSET]; if (port != 0) { msg->rpc_proc = &rpcb_procedures4[RPCBPROC_SET]; @@ -503,11 +492,6 @@ static int rpcb_register_inet6(struct sunrpc_net *sn, map->r_addr = rpc_sockaddr2uaddr(sap, GFP_KERNEL); - dprintk("RPC: %sregistering [%u, %u, %s, '%s'] with " - "local rpcbind\n", (port ? "" : "un"), - map->r_prog, map->r_vers, - map->r_addr, map->r_netid); - msg->rpc_proc = &rpcb_procedures4[RPCBPROC_UNSET]; if (port != 0) { msg->rpc_proc = &rpcb_procedures4[RPCBPROC_SET]; @@ -524,9 +508,7 @@ static int rpcb_unregister_all_protofamilies(struct sunrpc_net *sn, { struct rpcbind_args *map = msg->rpc_argp; - dprintk("RPC: unregistering [%u, %u, '%s'] with " - "local rpcbind\n", - map->r_prog, map->r_vers, map->r_netid); + trace_rpcb_unregister(map->r_prog, map->r_vers, map->r_netid); map->r_addr = ""; msg->rpc_proc = &rpcb_procedures4[RPCBPROC_UNSET]; @@ -598,6 +580,8 @@ int rpcb_v4_register(struct net *net, const u32 program, const u32 version, if (address == NULL) return rpcb_unregister_all_protofamilies(sn, &msg); + trace_rpcb_register(map.r_prog, map.r_vers, map.r_addr, map.r_netid); + switch (address->sa_family) { case AF_INET: return rpcb_register_inet4(sn, address, &msg); -- cgit v1.2.3 From 1466c2216382fc392817fc8888e4ebefb2ef4816 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Wed, 8 Jul 2020 16:10:45 -0400 Subject: SUNRPC: Clean up RPC scheduler tracepoints Remove several redundant dprintk call sites, and replace a couple of potentially useful ones with tracepoints. Signed-off-by: Chuck Lever Signed-off-by: Anna Schumaker --- include/trace/events/sunrpc.h | 2 ++ net/sunrpc/sched.c | 15 +++------------ 2 files changed, 5 insertions(+), 12 deletions(-) diff --git a/include/trace/events/sunrpc.h b/include/trace/events/sunrpc.h index b5d4cbbaf4b0..11e5e52f4ce5 100644 --- a/include/trace/events/sunrpc.h +++ b/include/trace/events/sunrpc.h @@ -387,6 +387,8 @@ DECLARE_EVENT_CLASS(rpc_task_running, DEFINE_RPC_RUNNING_EVENT(begin); DEFINE_RPC_RUNNING_EVENT(run_action); +DEFINE_RPC_RUNNING_EVENT(sync_sleep); +DEFINE_RPC_RUNNING_EVENT(sync_wake); DEFINE_RPC_RUNNING_EVENT(complete); DEFINE_RPC_RUNNING_EVENT(signalled); DEFINE_RPC_RUNNING_EVENT(end); diff --git a/net/sunrpc/sched.c b/net/sunrpc/sched.c index 402b1c8869fd..a0d5a98fbf32 100644 --- a/net/sunrpc/sched.c +++ b/net/sunrpc/sched.c @@ -885,9 +885,6 @@ static void __rpc_execute(struct rpc_task *task) int task_is_async = RPC_IS_ASYNC(task); int status = 0; - dprintk("RPC: %5u __rpc_execute flags=0x%x\n", - task->tk_pid, task->tk_flags); - WARN_ON_ONCE(RPC_IS_QUEUED(task)); if (RPC_IS_QUEUED(task)) return; @@ -947,7 +944,7 @@ static void __rpc_execute(struct rpc_task *task) return; /* sync task: sleep here */ - dprintk("RPC: %5u sync task going to sleep\n", task->tk_pid); + trace_rpc_task_sync_sleep(task, task->tk_action); status = out_of_line_wait_on_bit(&task->tk_runstate, RPC_TASK_QUEUED, rpc_wait_bit_killable, TASK_KILLABLE); @@ -963,11 +960,9 @@ static void __rpc_execute(struct rpc_task *task) task->tk_rpc_status = -ERESTARTSYS; rpc_exit(task, -ERESTARTSYS); } - dprintk("RPC: %5u sync task resuming\n", task->tk_pid); + trace_rpc_task_sync_wake(task, task->tk_action); } - dprintk("RPC: %5u return %d, status %d\n", task->tk_pid, status, - task->tk_status); /* Release all resources associated with the task */ rpc_release_task(task); } @@ -1146,10 +1141,8 @@ static void rpc_free_task(struct rpc_task *task) put_rpccred(task->tk_op_cred); rpc_release_calldata(task->tk_ops, task->tk_calldata); - if (tk_flags & RPC_TASK_DYNAMIC) { - dprintk("RPC: %5u freeing task\n", task->tk_pid); + if (tk_flags & RPC_TASK_DYNAMIC) mempool_free(task, rpc_task_mempool); - } } static void rpc_async_release(struct work_struct *work) @@ -1203,8 +1196,6 @@ EXPORT_SYMBOL_GPL(rpc_put_task_async); static void rpc_release_task(struct rpc_task *task) { - dprintk("RPC: %5u release task\n", task->tk_pid); - WARN_ON_ONCE(RPC_IS_QUEUED(task)); rpc_release_resources_task(task); -- cgit v1.2.3 From 721a1d388b5536adb220aba25775a256f09790c3 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Wed, 8 Jul 2020 16:10:50 -0400 Subject: SUNRPC: Remove dprintk call sites in RPC queuing functions Remove redundant call sites or call sites that are already covered by tracepoints. Signed-off-by: Chuck Lever Signed-off-by: Anna Schumaker --- include/trace/events/sunrpc.h | 1 + net/sunrpc/sched.c | 22 +--------------------- 2 files changed, 2 insertions(+), 21 deletions(-) diff --git a/include/trace/events/sunrpc.h b/include/trace/events/sunrpc.h index 11e5e52f4ce5..f45b3c01370c 100644 --- a/include/trace/events/sunrpc.h +++ b/include/trace/events/sunrpc.h @@ -390,6 +390,7 @@ DEFINE_RPC_RUNNING_EVENT(run_action); DEFINE_RPC_RUNNING_EVENT(sync_sleep); DEFINE_RPC_RUNNING_EVENT(sync_wake); DEFINE_RPC_RUNNING_EVENT(complete); +DEFINE_RPC_RUNNING_EVENT(timeout); DEFINE_RPC_RUNNING_EVENT(signalled); DEFINE_RPC_RUNNING_EVENT(end); diff --git a/net/sunrpc/sched.c b/net/sunrpc/sched.c index a0d5a98fbf32..116b3abaed3f 100644 --- a/net/sunrpc/sched.c +++ b/net/sunrpc/sched.c @@ -85,7 +85,6 @@ __rpc_disable_timer(struct rpc_wait_queue *queue, struct rpc_task *task) { if (list_empty(&task->u.tk_wait.timer_list)) return; - dprintk("RPC: %5u disabling timer\n", task->tk_pid); task->tk_timeout = 0; list_del(&task->u.tk_wait.timer_list); if (list_empty(&queue->timer_list.list)) @@ -111,9 +110,6 @@ static void __rpc_add_timer(struct rpc_wait_queue *queue, struct rpc_task *task, unsigned long timeout) { - dprintk("RPC: %5u setting alarm for %u ms\n", - task->tk_pid, jiffies_to_msecs(timeout - jiffies)); - task->tk_timeout = timeout; if (list_empty(&queue->timer_list.list) || time_before(timeout, queue->timer_list.expires)) rpc_set_queue_timer(queue, timeout); @@ -216,9 +212,6 @@ static void __rpc_add_wait_queue(struct rpc_wait_queue *queue, /* barrier matches the read in rpc_wake_up_task_queue_locked() */ smp_wmb(); rpc_set_queued(task); - - dprintk("RPC: %5u added to queue %p \"%s\"\n", - task->tk_pid, queue, rpc_qname(queue)); } /* @@ -241,8 +234,6 @@ static void __rpc_remove_wait_queue(struct rpc_wait_queue *queue, struct rpc_tas else list_del(&task->u.tk_wait.list); queue->qlen--; - dprintk("RPC: %5u removed from queue %p \"%s\"\n", - task->tk_pid, queue, rpc_qname(queue)); } static void __rpc_init_priority_wait_queue(struct rpc_wait_queue *queue, const char *qname, unsigned char nr_queues) @@ -382,13 +373,9 @@ static void __rpc_do_sleep_on_priority(struct rpc_wait_queue *q, struct rpc_task *task, unsigned char queue_priority) { - dprintk("RPC: %5u sleep_on(queue \"%s\" time %lu)\n", - task->tk_pid, rpc_qname(q), jiffies); - trace_rpc_task_sleep(task, q); __rpc_add_wait_queue(q, task, queue_priority); - } static void __rpc_sleep_on_priority(struct rpc_wait_queue *q, @@ -510,9 +497,6 @@ static void __rpc_do_wake_up_task_on_wq(struct workqueue_struct *wq, struct rpc_wait_queue *queue, struct rpc_task *task) { - dprintk("RPC: %5u __rpc_wake_up_task (now %lu)\n", - task->tk_pid, jiffies); - /* Has the task been executed yet? If not, we cannot wake it up! */ if (!RPC_IS_ACTIVATED(task)) { printk(KERN_ERR "RPC: Inactive task (%p) being woken up!\n", task); @@ -524,8 +508,6 @@ static void __rpc_do_wake_up_task_on_wq(struct workqueue_struct *wq, __rpc_remove_wait_queue(queue, task); rpc_make_runnable(wq, task); - - dprintk("RPC: __rpc_wake_up_task done\n"); } /* @@ -663,8 +645,6 @@ struct rpc_task *rpc_wake_up_first_on_wq(struct workqueue_struct *wq, { struct rpc_task *task = NULL; - dprintk("RPC: wake_up_first(%p \"%s\")\n", - queue, rpc_qname(queue)); spin_lock(&queue->lock); task = __rpc_find_next_queued(queue); if (task != NULL) @@ -770,7 +750,7 @@ static void __rpc_queue_timer_fn(struct work_struct *work) list_for_each_entry_safe(task, n, &queue->timer_list.list, u.tk_wait.timer_list) { timeo = task->tk_timeout; if (time_after_eq(now, timeo)) { - dprintk("RPC: %5u timeout\n", task->tk_pid); + trace_rpc_task_timeout(task, task->tk_action); task->tk_status = -ETIMEDOUT; rpc_wake_up_task_queue_locked(queue, task); continue; -- cgit v1.2.3 From 5589cc4778e23424c44c3a82cd0685f87904fe91 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Wed, 8 Jul 2020 16:10:56 -0400 Subject: SUNRPC: Remove remaining dprintks from sched.c Clean up. Signed-off-by: Chuck Lever Signed-off-by: Anna Schumaker --- net/sunrpc/sched.c | 10 ---------- 1 file changed, 10 deletions(-) diff --git a/net/sunrpc/sched.c b/net/sunrpc/sched.c index 116b3abaed3f..f06d7c315017 100644 --- a/net/sunrpc/sched.c +++ b/net/sunrpc/sched.c @@ -27,10 +27,6 @@ #include "sunrpc.h" -#if IS_ENABLED(CONFIG_SUNRPC_DEBUG) -#define RPCDBG_FACILITY RPCDBG_SCHED -#endif - #define CREATE_TRACE_POINTS #include @@ -1065,9 +1061,6 @@ static void rpc_init_task(struct rpc_task *task, const struct rpc_task_setup *ta task->tk_action = rpc_prepare_task; rpc_init_task_statistics(task); - - dprintk("RPC: new task initialized, procpid %u\n", - task_pid_nr(current)); } static struct rpc_task * @@ -1091,7 +1084,6 @@ struct rpc_task *rpc_new_task(const struct rpc_task_setup *setup_data) rpc_init_task(task, setup_data); task->tk_flags |= flags; - dprintk("RPC: allocated task %p\n", task); return task; } @@ -1216,7 +1208,6 @@ static int rpciod_start(void) /* * Create the rpciod thread and wait for it to start. */ - dprintk("RPC: creating workqueue rpciod\n"); wq = alloc_workqueue("rpciod", WQ_MEM_RECLAIM | WQ_UNBOUND, 0); if (!wq) goto out_failed; @@ -1241,7 +1232,6 @@ static void rpciod_stop(void) if (rpciod_workqueue == NULL) return; - dprintk("RPC: destroying workqueue rpciod\n"); wq = rpciod_workqueue; rpciod_workqueue = NULL; -- cgit v1.2.3 From 1138ce1cf60954d1c0e2d7b4eba5b4df5813fd86 Mon Sep 17 00:00:00 2001 From: Randy Dunlap Date: Wed, 15 Jul 2020 18:31:30 -0700 Subject: sunrpc: fix duplicated word in Change "time time" to "time expiry_time" to match the field name. Signed-off-by: Randy Dunlap Cc: "J. Bruce Fields" Cc: Chuck Lever Cc: Trond Myklebust Cc: Anna Schumaker Cc: linux-nfs@vger.kernel.org Signed-off-by: Anna Schumaker --- include/linux/sunrpc/cache.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/include/linux/sunrpc/cache.h b/include/linux/sunrpc/cache.h index 10891b70fc7b..d0965e2997b0 100644 --- a/include/linux/sunrpc/cache.h +++ b/include/linux/sunrpc/cache.h @@ -45,7 +45,8 @@ */ struct cache_head { struct hlist_node cache_list; - time64_t expiry_time; /* After time time, don't use the data */ + time64_t expiry_time; /* After time expiry_time, don't use + * the data */ time64_t last_refresh; /* If CACHE_PENDING, this is when upcall was * sent, else this is when update was * received, though it is alway set to -- cgit v1.2.3 From 0bdd4cea12a9fd79a7eb7de8493a5fef54d0eea6 Mon Sep 17 00:00:00 2001 From: Alexander A. Klimov Date: Tue, 7 Jul 2020 21:50:12 +0200 Subject: Replace HTTP links with HTTPS ones: NFS, SUNRPC, and LOCKD clients Rationale: Reduces attack surface on kernel devs opening the links for MITM as HTTPS traffic is much harder to manipulate. Deterministic algorithm: For each file: If not .svg: For each line: If doesn't contain `\bxmlns\b`: For each link, `\bhttp://[^# \t\r\n]*(?:\w|/)`: If both the HTTP and HTTPS versions return 200 OK and serve the same content: Replace HTTP with HTTPS. Signed-off-by: Alexander A. Klimov Signed-off-by: Anna Schumaker --- fs/lockd/mon.c | 2 +- include/linux/sunrpc/bc_xprt.h | 2 +- include/linux/sunrpc/msg_prot.h | 2 +- net/sunrpc/backchannel_rqst.c | 2 +- net/sunrpc/sunrpc.h | 2 +- 5 files changed, 5 insertions(+), 5 deletions(-) diff --git a/fs/lockd/mon.c b/fs/lockd/mon.c index 1eabd91870e6..1d9488cf0534 100644 --- a/fs/lockd/mon.c +++ b/fs/lockd/mon.c @@ -417,7 +417,7 @@ void nsm_release(struct nsm_handle *nsm) /* * XDR functions for NSM. * - * See http://www.opengroup.org/ for details on the Network + * See https://www.opengroup.org/ for details on the Network * Status Monitor wire protocol. */ diff --git a/include/linux/sunrpc/bc_xprt.h b/include/linux/sunrpc/bc_xprt.h index d796058cdff2..f07c334c599f 100644 --- a/include/linux/sunrpc/bc_xprt.h +++ b/include/linux/sunrpc/bc_xprt.h @@ -4,7 +4,7 @@ NetApp provides this source code under the GPL v2 License. The GPL v2 license is available at -http://opensource.org/licenses/gpl-license.php. +https://opensource.org/licenses/gpl-license.php. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT diff --git a/include/linux/sunrpc/msg_prot.h b/include/linux/sunrpc/msg_prot.h index bea40d9f03a1..43f854487539 100644 --- a/include/linux/sunrpc/msg_prot.h +++ b/include/linux/sunrpc/msg_prot.h @@ -143,7 +143,7 @@ typedef __be32 rpc_fraghdr; /* * Well-known netids. See: * - * http://www.iana.org/assignments/rpc-netids/rpc-netids.xhtml + * https://www.iana.org/assignments/rpc-netids/rpc-netids.xhtml */ #define RPCBIND_NETID_UDP "udp" #define RPCBIND_NETID_TCP "tcp" diff --git a/net/sunrpc/backchannel_rqst.c b/net/sunrpc/backchannel_rqst.c index 195b40c5dae4..3fecad369592 100644 --- a/net/sunrpc/backchannel_rqst.c +++ b/net/sunrpc/backchannel_rqst.c @@ -5,7 +5,7 @@ NetApp provides this source code under the GPL v2 License. The GPL v2 license is available at -http://opensource.org/licenses/gpl-license.php. +https://opensource.org/licenses/gpl-license.php. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT diff --git a/net/sunrpc/sunrpc.h b/net/sunrpc/sunrpc.h index f6fe2e6cd65a..2f59464e6524 100644 --- a/net/sunrpc/sunrpc.h +++ b/net/sunrpc/sunrpc.h @@ -4,7 +4,7 @@ NetApp provides this source code under the GPL v2 License. The GPL v2 license is available at -http://opensource.org/licenses/gpl-license.php. +https://opensource.org/licenses/gpl-license.php. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT -- cgit v1.2.3 From d8a6ad913c286d4763ae20b14c02fe6f39d7cd9f Mon Sep 17 00:00:00 2001 From: Dave Wysochanski Date: Tue, 4 Aug 2020 12:11:47 -0400 Subject: NFS4: Fix oops when copy_file_range is attempted with NFS4.0 source The following oops is seen during xfstest/565 when the 'test' (source of the copy) is NFS4.0 and 'scratch' (destination) is NFS4.2 [ 59.692458] run fstests generic/565 at 2020-08-01 05:50:35 [ 60.613588] BUG: kernel NULL pointer dereference, address: 0000000000000008 [ 60.624970] #PF: supervisor read access in kernel mode [ 60.627671] #PF: error_code(0x0000) - not-present page [ 60.630347] PGD 0 P4D 0 [ 60.631853] Oops: 0000 [#1] SMP PTI [ 60.634086] CPU: 6 PID: 2828 Comm: xfs_io Kdump: loaded Not tainted 5.8.0-rc3 #1 [ 60.637676] Hardware name: Red Hat KVM, BIOS 0.5.1 01/01/2011 [ 60.639901] RIP: 0010:nfs4_check_serverowner_major_id+0x5/0x30 [nfsv4] [ 60.642719] Code: 89 ff e8 3e b3 b8 e1 e9 71 fe ff ff 41 bc da d8 ff ff e9 c3 fe ff ff e8 e9 9d 08 e2 66 0f 1f 84 00 00 00 00 00 66 66 66 66 90 <8b> 57 08 31 c0 3b 56 08 75 12 48 83 c6 0c 48 83 c7 0c e8 c4 97 bb [ 60.652629] RSP: 0018:ffffc265417f7e10 EFLAGS: 00010287 [ 60.655379] RAX: ffffa0664b066400 RBX: 0000000000000000 RCX: 0000000000000001 [ 60.658754] RDX: ffffa066725fb000 RSI: ffffa066725fd000 RDI: 0000000000000000 [ 60.662292] RBP: 0000000000020000 R08: 0000000000020000 R09: 0000000000000000 [ 60.666189] R10: 0000000000000003 R11: 0000000000000000 R12: ffffa06648258d00 [ 60.669914] R13: 0000000000000000 R14: 0000000000000000 R15: ffffa06648258100 [ 60.673645] FS: 00007faa9fb35800(0000) GS:ffffa06677d80000(0000) knlGS:0000000000000000 [ 60.677698] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [ 60.680773] CR2: 0000000000000008 CR3: 0000000203f14000 CR4: 00000000000406e0 [ 60.684476] Call Trace: [ 60.685809] nfs4_copy_file_range+0xfc/0x230 [nfsv4] [ 60.688704] vfs_copy_file_range+0x2ee/0x310 [ 60.691104] __x64_sys_copy_file_range+0xd6/0x210 [ 60.693527] do_syscall_64+0x4d/0x90 [ 60.695512] entry_SYSCALL_64_after_hwframe+0x44/0xa9 [ 60.698006] RIP: 0033:0x7faa9febc1bd Signed-off-by: Dave Wysochanski Signed-off-by: Anna Schumaker --- fs/nfs/nfs4file.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/fs/nfs/nfs4file.c b/fs/nfs/nfs4file.c index fdfc77486ace..91be7f628e4a 100644 --- a/fs/nfs/nfs4file.c +++ b/fs/nfs/nfs4file.c @@ -145,7 +145,8 @@ static ssize_t __nfs4_copy_file_range(struct file *file_in, loff_t pos_in, /* Only offload copy if superblock is the same */ if (file_in->f_op != &nfs4_file_operations) return -EXDEV; - if (!nfs_server_capable(file_inode(file_out), NFS_CAP_COPY)) + if (!nfs_server_capable(file_inode(file_out), NFS_CAP_COPY) || + !nfs_server_capable(file_inode(file_in), NFS_CAP_COPY)) return -EOPNOTSUPP; if (file_inode(file_in) == file_inode(file_out)) return -EOPNOTSUPP; -- cgit v1.2.3 From cf65e49f89f2ccad54b1d560691cfa3cd371b2d2 Mon Sep 17 00:00:00 2001 From: Miaohe Lin Date: Thu, 20 Aug 2020 08:01:49 -0400 Subject: nfs: Convert to use the preferred fallthrough macro Convert the uses of fallthrough comments to fallthrough macro. Please see commit 294f69e662d1 ("compiler_attributes.h: Add 'fallthrough' pseudo keyword for switch/case use") for detail. Signed-off-by: Hongxiang Lou Signed-off-by: Miaohe Lin Signed-off-by: Anna Schumaker --- fs/nfs/super.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/nfs/super.c b/fs/nfs/super.c index 7a70287f21a2..d20326ee0475 100644 --- a/fs/nfs/super.c +++ b/fs/nfs/super.c @@ -889,7 +889,7 @@ static struct nfs_server *nfs_try_mount_request(struct fs_context *fc) default: if (rpcauth_get_gssinfo(flavor, &info) != 0) continue; - /* Fallthrough */ + fallthrough; } dfprintk(MOUNT, "NFS: attempting to use auth flavor %u\n", flavor); ctx->selected_flavor = flavor; -- cgit v1.2.3 From 68274f97aeb6ebcd74c391ddbff0b517b9b0ca0f Mon Sep 17 00:00:00 2001 From: Frank van der Linden Date: Thu, 27 Aug 2020 20:46:55 +0000 Subject: NFSv4.2: xattr cache: remove unused cache struct field The hash_lock field of the cache structure was a leftover of a previous iteration of the code. It is now unused, so remove it. Signed-off-by: Frank van der Linden Signed-off-by: Anna Schumaker --- fs/nfs/nfs42xattr.c | 1 - 1 file changed, 1 deletion(-) diff --git a/fs/nfs/nfs42xattr.c b/fs/nfs/nfs42xattr.c index 86777996cfec..22396a7eebe1 100644 --- a/fs/nfs/nfs42xattr.c +++ b/fs/nfs/nfs42xattr.c @@ -67,7 +67,6 @@ struct nfs4_xattr_bucket { struct nfs4_xattr_cache { struct kref ref; - spinlock_t hash_lock; /* protects hashtable and lru */ struct nfs4_xattr_bucket buckets[NFS4_XATTR_HASH_SIZE]; struct list_head lru; struct list_head dispose; -- cgit v1.2.3 From c0a1d129d3e01751d410343cb8e4a694716ca825 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Fri, 18 Sep 2020 15:29:58 -0400 Subject: pNFS/flexfiles: Ensure we initialise the mirror bsizes correctly on read While it is true that reading from an unmirrored source always uses index 0, that is no longer true for mirrored sources when we fail over. Fixes: 563c53e73b8b ("NFS: Fix flexfiles read failover") Signed-off-by: Trond Myklebust Signed-off-by: Anna Schumaker --- fs/nfs/flexfilelayout/flexfilelayout.c | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/fs/nfs/flexfilelayout/flexfilelayout.c b/fs/nfs/flexfilelayout/flexfilelayout.c index ff8965d1a4d4..1edeebd51937 100644 --- a/fs/nfs/flexfilelayout/flexfilelayout.c +++ b/fs/nfs/flexfilelayout/flexfilelayout.c @@ -838,6 +838,7 @@ ff_layout_pg_init_read(struct nfs_pageio_descriptor *pgio, struct nfs4_ff_layout_mirror *mirror; struct nfs4_pnfs_ds *ds; int ds_idx; + u32 i; retry: ff_layout_pg_check_layout(pgio, req); @@ -863,14 +864,14 @@ retry: goto retry; } - mirror = FF_LAYOUT_COMP(pgio->pg_lseg, ds_idx); + for (i = 0; i < pgio->pg_mirror_count; i++) { + mirror = FF_LAYOUT_COMP(pgio->pg_lseg, i); + pgm = &pgio->pg_mirrors[i]; + pgm->pg_bsize = mirror->mirror_ds->ds_versions[0].rsize; + } pgio->pg_mirror_idx = ds_idx; - /* read always uses only one mirror - idx 0 for pgio layer */ - pgm = &pgio->pg_mirrors[0]; - pgm->pg_bsize = mirror->mirror_ds->ds_versions[0].rsize; - if (NFS_SERVER(pgio->pg_inode)->flags & (NFS_MOUNT_SOFT|NFS_MOUNT_SOFTERR)) pgio->pg_maxretrans = io_maxretrans; -- cgit v1.2.3 From c754e137f55e075d6b6ad9b866c32e9aad260a83 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Fri, 18 Sep 2020 15:29:59 -0400 Subject: pNFS/flexfiles: Be consistent about mirror index types A mirror index is always of type u32. Signed-off-by: Trond Myklebust Signed-off-by: Anna Schumaker --- fs/nfs/flexfilelayout/flexfilelayout.c | 34 +++++++++++++++++----------------- include/linux/nfs_xdr.h | 4 ++-- 2 files changed, 19 insertions(+), 19 deletions(-) diff --git a/fs/nfs/flexfilelayout/flexfilelayout.c b/fs/nfs/flexfilelayout/flexfilelayout.c index 1edeebd51937..a163533446fa 100644 --- a/fs/nfs/flexfilelayout/flexfilelayout.c +++ b/fs/nfs/flexfilelayout/flexfilelayout.c @@ -715,7 +715,7 @@ nfs4_ff_layout_stat_io_end_write(struct rpc_task *task, } static void -ff_layout_mark_ds_unreachable(struct pnfs_layout_segment *lseg, int idx) +ff_layout_mark_ds_unreachable(struct pnfs_layout_segment *lseg, u32 idx) { struct nfs4_deviceid_node *devid = FF_LAYOUT_DEVID_NODE(lseg, idx); @@ -724,7 +724,7 @@ ff_layout_mark_ds_unreachable(struct pnfs_layout_segment *lseg, int idx) } static void -ff_layout_mark_ds_reachable(struct pnfs_layout_segment *lseg, int idx) +ff_layout_mark_ds_reachable(struct pnfs_layout_segment *lseg, u32 idx) { struct nfs4_deviceid_node *devid = FF_LAYOUT_DEVID_NODE(lseg, idx); @@ -734,14 +734,14 @@ ff_layout_mark_ds_reachable(struct pnfs_layout_segment *lseg, int idx) static struct nfs4_pnfs_ds * ff_layout_choose_ds_for_read(struct pnfs_layout_segment *lseg, - int start_idx, int *best_idx, + u32 start_idx, u32 *best_idx, bool check_device) { struct nfs4_ff_layout_segment *fls = FF_LAYOUT_LSEG(lseg); struct nfs4_ff_layout_mirror *mirror; struct nfs4_pnfs_ds *ds; bool fail_return = false; - int idx; + u32 idx; /* mirrors are initially sorted by efficiency */ for (idx = start_idx; idx < fls->mirror_array_cnt; idx++) { @@ -766,21 +766,21 @@ ff_layout_choose_ds_for_read(struct pnfs_layout_segment *lseg, static struct nfs4_pnfs_ds * ff_layout_choose_any_ds_for_read(struct pnfs_layout_segment *lseg, - int start_idx, int *best_idx) + u32 start_idx, u32 *best_idx) { return ff_layout_choose_ds_for_read(lseg, start_idx, best_idx, false); } static struct nfs4_pnfs_ds * ff_layout_choose_valid_ds_for_read(struct pnfs_layout_segment *lseg, - int start_idx, int *best_idx) + u32 start_idx, u32 *best_idx) { return ff_layout_choose_ds_for_read(lseg, start_idx, best_idx, true); } static struct nfs4_pnfs_ds * ff_layout_choose_best_ds_for_read(struct pnfs_layout_segment *lseg, - int start_idx, int *best_idx) + u32 start_idx, u32 *best_idx) { struct nfs4_pnfs_ds *ds; @@ -791,7 +791,8 @@ ff_layout_choose_best_ds_for_read(struct pnfs_layout_segment *lseg, } static struct nfs4_pnfs_ds * -ff_layout_get_ds_for_read(struct nfs_pageio_descriptor *pgio, int *best_idx) +ff_layout_get_ds_for_read(struct nfs_pageio_descriptor *pgio, + u32 *best_idx) { struct pnfs_layout_segment *lseg = pgio->pg_lseg; struct nfs4_pnfs_ds *ds; @@ -837,8 +838,7 @@ ff_layout_pg_init_read(struct nfs_pageio_descriptor *pgio, struct nfs_pgio_mirror *pgm; struct nfs4_ff_layout_mirror *mirror; struct nfs4_pnfs_ds *ds; - int ds_idx; - u32 i; + u32 ds_idx, i; retry: ff_layout_pg_check_layout(pgio, req); @@ -895,7 +895,7 @@ ff_layout_pg_init_write(struct nfs_pageio_descriptor *pgio, struct nfs4_ff_layout_mirror *mirror; struct nfs_pgio_mirror *pgm; struct nfs4_pnfs_ds *ds; - int i; + u32 i; retry: ff_layout_pg_check_layout(pgio, req); @@ -1039,7 +1039,7 @@ static void ff_layout_reset_write(struct nfs_pgio_header *hdr, bool retry_pnfs) static void ff_layout_resend_pnfs_read(struct nfs_pgio_header *hdr) { u32 idx = hdr->pgio_mirror_idx + 1; - int new_idx = 0; + u32 new_idx = 0; if (ff_layout_choose_any_ds_for_read(hdr->lseg, idx + 1, &new_idx)) ff_layout_send_layouterror(hdr->lseg); @@ -1076,7 +1076,7 @@ static int ff_layout_async_handle_error_v4(struct rpc_task *task, struct nfs4_state *state, struct nfs_client *clp, struct pnfs_layout_segment *lseg, - int idx) + u32 idx) { struct pnfs_layout_hdr *lo = lseg->pls_layout; struct inode *inode = lo->plh_inode; @@ -1150,7 +1150,7 @@ reset: /* Retry all errors through either pNFS or MDS except for -EJUKEBOX */ static int ff_layout_async_handle_error_v3(struct rpc_task *task, struct pnfs_layout_segment *lseg, - int idx) + u32 idx) { struct nfs4_deviceid_node *devid = FF_LAYOUT_DEVID_NODE(lseg, idx); @@ -1185,7 +1185,7 @@ static int ff_layout_async_handle_error(struct rpc_task *task, struct nfs4_state *state, struct nfs_client *clp, struct pnfs_layout_segment *lseg, - int idx) + u32 idx) { int vers = clp->cl_nfs_mod->rpc_vers->number; @@ -1212,7 +1212,7 @@ static int ff_layout_async_handle_error(struct rpc_task *task, } static void ff_layout_io_track_ds_error(struct pnfs_layout_segment *lseg, - int idx, u64 offset, u64 length, + u32 idx, u64 offset, u64 length, u32 *op_status, int opnum, int error) { struct nfs4_ff_layout_mirror *mirror; @@ -1810,7 +1810,7 @@ ff_layout_write_pagelist(struct nfs_pgio_header *hdr, int sync) loff_t offset = hdr->args.offset; int vers; struct nfs_fh *fh; - int idx = hdr->pgio_mirror_idx; + u32 idx = hdr->pgio_mirror_idx; mirror = FF_LAYOUT_COMP(lseg, idx); ds = nfs4_ff_layout_prepare_ds(lseg, mirror, true); diff --git a/include/linux/nfs_xdr.h b/include/linux/nfs_xdr.h index 9408f3252c8e..69cb46f7b8d2 100644 --- a/include/linux/nfs_xdr.h +++ b/include/linux/nfs_xdr.h @@ -1611,8 +1611,8 @@ struct nfs_pgio_header { __u64 mds_offset; /* Filelayout dense stripe */ struct nfs_page_array page_array; struct nfs_client *ds_clp; /* pNFS data server */ - int ds_commit_idx; /* ds index if ds_clp is set */ - int pgio_mirror_idx;/* mirror index in pgio layer */ + u32 ds_commit_idx; /* ds index if ds_clp is set */ + u32 pgio_mirror_idx;/* mirror index in pgio layer */ }; struct nfs_mds_commit_info { -- cgit v1.2.3 From ed38c33f1cc5a4e6da63f71879106acc0027e286 Mon Sep 17 00:00:00 2001 From: Julia Lawall Date: Sun, 20 Sep 2020 13:26:20 +0200 Subject: xprtrdma: drop double zeroing sg_init_table zeroes its first argument, so the allocation of that argument doesn't have to. the semantic patch that makes this change is as follows: (http://coccinelle.lip6.fr/) // @@ expression x,n,flags; @@ x = - kcalloc + kmalloc_array (n,sizeof(*x),flags) ... sg_init_table(x,n) // Signed-off-by: Julia Lawall Acked-by: Chuck Lever Signed-off-by: Anna Schumaker --- net/sunrpc/xprtrdma/frwr_ops.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/sunrpc/xprtrdma/frwr_ops.c b/net/sunrpc/xprtrdma/frwr_ops.c index 7f94c9a19fd3..44888f5badef 100644 --- a/net/sunrpc/xprtrdma/frwr_ops.c +++ b/net/sunrpc/xprtrdma/frwr_ops.c @@ -124,7 +124,7 @@ int frwr_mr_init(struct rpcrdma_xprt *r_xprt, struct rpcrdma_mr *mr) if (IS_ERR(frmr)) goto out_mr_err; - sg = kcalloc(depth, sizeof(*sg), GFP_NOFS); + sg = kmalloc_array(depth, sizeof(*sg), GFP_NOFS); if (!sg) goto out_list_err; -- cgit v1.2.3 From 9f2664512711788e4e1d06f25a925eb7ac681582 Mon Sep 17 00:00:00 2001 From: Wang Qing Date: Thu, 17 Sep 2020 10:19:00 +0800 Subject: nfs: fix spellint typo in pnfs.c Change the comment typo: "manger" -> "manager". Signed-off-by: Wang Qing Signed-off-by: Anna Schumaker --- fs/nfs/pnfs.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c index 71f7741126b6..0e50b9d45c32 100644 --- a/fs/nfs/pnfs.c +++ b/fs/nfs/pnfs.c @@ -902,7 +902,7 @@ restart: } /* - * Called by the state manger to remove all layouts established under an + * Called by the state manager to remove all layouts established under an * expired lease. */ void -- cgit v1.2.3 From 76bd5c016ef49683d626a48748ef1764aaf8ba63 Mon Sep 17 00:00:00 2001 From: Olga Kornievskaia Date: Mon, 14 Sep 2020 17:05:08 -0400 Subject: NFSv4: make cache consistency bitmask dynamic Client uses static bitmask for GETATTR on CLOSE/WRITE/DELEGRETURN and ignores the fact that it might have some attributes marked invalid in its cache. Compared to v3 where all attributes are retrieved in postop attributes, v4's cache is frequently out of sync and leads to standalone GETATTRs being sent to the server. Instead, in addition to the minimum cache consistency attributes also check cache_validity and adjust the GETATTR request accordingly. Signed-off-by: Olga Kornievskaia Signed-off-by: Anna Schumaker --- fs/nfs/nfs4proc.c | 45 ++++++++++++++++++++++++++++++++++++++++++--- include/linux/nfs_xdr.h | 6 +++--- 2 files changed, 45 insertions(+), 6 deletions(-) diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index aca52e52538f..542961ffa529 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -107,6 +107,9 @@ static int nfs41_test_stateid(struct nfs_server *, nfs4_stateid *, static int nfs41_free_stateid(struct nfs_server *, const nfs4_stateid *, const struct cred *, bool); #endif +static void nfs4_bitmask_adjust(__u32 *bitmask, struct inode *inode, + struct nfs_server *server, + struct nfs4_label *label); #ifdef CONFIG_NFS_V4_SECURITY_LABEL static inline struct nfs4_label * @@ -3632,9 +3635,10 @@ static void nfs4_close_prepare(struct rpc_task *task, void *data) if (calldata->arg.fmode == 0 || calldata->arg.fmode == FMODE_READ) { /* Close-to-open cache consistency revalidation */ - if (!nfs4_have_delegation(inode, FMODE_READ)) + if (!nfs4_have_delegation(inode, FMODE_READ)) { calldata->arg.bitmask = NFS_SERVER(inode)->cache_consistency_bitmask; - else + nfs4_bitmask_adjust(calldata->arg.bitmask, inode, NFS_SERVER(inode), NULL); + } else calldata->arg.bitmask = NULL; } @@ -5360,6 +5364,38 @@ bool nfs4_write_need_cache_consistency_data(struct nfs_pgio_header *hdr) return nfs4_have_delegation(hdr->inode, FMODE_READ) == 0; } +static void nfs4_bitmask_adjust(__u32 *bitmask, struct inode *inode, + struct nfs_server *server, + struct nfs4_label *label) +{ + + unsigned long cache_validity = READ_ONCE(NFS_I(inode)->cache_validity); + + if ((cache_validity & NFS_INO_INVALID_DATA) || + (cache_validity & NFS_INO_REVAL_PAGECACHE) || + (cache_validity & NFS_INO_REVAL_FORCED) || + (cache_validity & NFS_INO_INVALID_OTHER)) + nfs4_bitmap_copy_adjust(bitmask, nfs4_bitmask(server, label), inode); + + if (cache_validity & NFS_INO_INVALID_ATIME) + bitmask[1] |= FATTR4_WORD1_TIME_ACCESS; + if (cache_validity & NFS_INO_INVALID_ACCESS) + bitmask[0] |= FATTR4_WORD1_MODE | FATTR4_WORD1_OWNER | + FATTR4_WORD1_OWNER_GROUP; + if (cache_validity & NFS_INO_INVALID_ACL) + bitmask[0] |= FATTR4_WORD0_ACL; + if (cache_validity & NFS_INO_INVALID_LABEL) + bitmask[2] |= FATTR4_WORD2_SECURITY_LABEL; + if (cache_validity & NFS_INO_INVALID_CTIME) + bitmask[0] |= FATTR4_WORD0_CHANGE; + if (cache_validity & NFS_INO_INVALID_MTIME) + bitmask[1] |= FATTR4_WORD1_TIME_MODIFY; + if (cache_validity & NFS_INO_INVALID_SIZE) + bitmask[0] |= FATTR4_WORD0_SIZE; + if (cache_validity & NFS_INO_INVALID_BLOCKS) + bitmask[1] |= FATTR4_WORD1_SPACE_USED; +} + static void nfs4_proc_write_setup(struct nfs_pgio_header *hdr, struct rpc_message *msg, struct rpc_clnt **clnt) @@ -5369,8 +5405,10 @@ static void nfs4_proc_write_setup(struct nfs_pgio_header *hdr, if (!nfs4_write_need_cache_consistency_data(hdr)) { hdr->args.bitmask = NULL; hdr->res.fattr = NULL; - } else + } else { hdr->args.bitmask = server->cache_consistency_bitmask; + nfs4_bitmask_adjust(hdr->args.bitmask, hdr->inode, server, NULL); + } if (!hdr->pgio_done_cb) hdr->pgio_done_cb = nfs4_write_done_cb; @@ -6406,6 +6444,7 @@ static int _nfs4_proc_delegreturn(struct inode *inode, const struct cred *cred, data->args.fhandle = &data->fh; data->args.stateid = &data->stateid; data->args.bitmask = server->cache_consistency_bitmask; + nfs4_bitmask_adjust(data->args.bitmask, inode, server, NULL); nfs_copy_fh(&data->fh, NFS_FH(inode)); nfs4_stateid_copy(&data->stateid, stateid); data->res.fattr = &data->fattr; diff --git a/include/linux/nfs_xdr.h b/include/linux/nfs_xdr.h index 69cb46f7b8d2..0599efd57eb9 100644 --- a/include/linux/nfs_xdr.h +++ b/include/linux/nfs_xdr.h @@ -525,7 +525,7 @@ struct nfs_closeargs { struct nfs_seqid * seqid; fmode_t fmode; u32 share_access; - const u32 * bitmask; + u32 * bitmask; struct nfs4_layoutreturn_args *lr_args; }; @@ -608,7 +608,7 @@ struct nfs4_delegreturnargs { struct nfs4_sequence_args seq_args; const struct nfs_fh *fhandle; const nfs4_stateid *stateid; - const u32 * bitmask; + u32 * bitmask; struct nfs4_layoutreturn_args *lr_args; }; @@ -648,7 +648,7 @@ struct nfs_pgio_args { union { unsigned int replen; /* used by read */ struct { - const u32 * bitmask; /* used by write */ + u32 * bitmask; /* used by write */ enum nfs3_stable_how stable; /* used by write */ }; }; -- cgit v1.2.3 From fb08334bb38f56d308d3e15b47bca67529cffc87 Mon Sep 17 00:00:00 2001 From: Nick Desaulniers Date: Thu, 17 Sep 2020 14:45:45 -0700 Subject: nfs: remove incorrect fallthrough label There is no case after the default from which to fallthrough to. Clang will error in this case (unhelpfully without context, see link below) and GCC will with -Wswitch-unreachable. The previous commit should have just replaced the comment with a break statement. If we consider implicit fallthrough to be a design mistake of C, then all case statements should be terminated with one of the following statements: * break * continue * return * fallthrough * goto * (call of function with __attribute__(__noreturn__)) Fixes: 2a1390c95a69 ("nfs: Convert to use the preferred fallthrough macro") Link: https://bugs.llvm.org/show_bug.cgi?id=47539 Acked-by: Gustavo A. R. Silva Reviewed-by: Gustavo A. R. Silva Reviewed-by: Miaohe Lin Reviewed-by: Nathan Chancellor Suggested-by: Joe Perches Signed-off-by: Nick Desaulniers Signed-off-by: Anna Schumaker --- fs/nfs/super.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/nfs/super.c b/fs/nfs/super.c index d20326ee0475..eb2401079b04 100644 --- a/fs/nfs/super.c +++ b/fs/nfs/super.c @@ -889,7 +889,7 @@ static struct nfs_server *nfs_try_mount_request(struct fs_context *fc) default: if (rpcauth_get_gssinfo(flavor, &info) != 0) continue; - fallthrough; + break; } dfprintk(MOUNT, "NFS: attempting to use auth flavor %u\n", flavor); ctx->selected_flavor = flavor; -- cgit v1.2.3 From b4868b44c5628995fdd8ef2e24dda73cef963a75 Mon Sep 17 00:00:00 2001 From: Benjamin Coddington Date: Fri, 25 Sep 2020 15:48:39 -0400 Subject: NFSv4: Wait for stateid updates after CLOSE/OPEN_DOWNGRADE Since commit 0e0cb35b417f ("NFSv4: Handle NFS4ERR_OLD_STATEID in CLOSE/OPEN_DOWNGRADE") the following livelock may occur if a CLOSE races with the update of the nfs_state: Process 1 Process 2 Server ========= ========= ======== OPEN file OPEN file Reply OPEN (1) Reply OPEN (2) Update state (1) CLOSE file (1) Reply OLD_STATEID (1) CLOSE file (2) Reply CLOSE (-1) Update state (2) wait for state change OPEN file wake CLOSE file OPEN file wake CLOSE file ... ... We can avoid this situation by not issuing an immediate retry with a bumped seqid when CLOSE/OPEN_DOWNGRADE receives NFS4ERR_OLD_STATEID. Instead, take the same approach used by OPEN and wait at least 5 seconds for outstanding stateid updates to complete if we can detect that we're out of sequence. Note that after this change it is still possible (though unlikely) that CLOSE waits a full 5 seconds, bumps the seqid, and retries -- and that attempt races with another OPEN at the same time. In order to avoid this race (which would result in the livelock), update nfs_need_update_open_stateid() to handle the case where: - the state is NFS_OPEN_STATE, and - the stateid doesn't match the current open stateid Finally, nfs_need_update_open_stateid() is modified to be idempotent and renamed to better suit the purpose of signaling that the stateid passed is the next stateid in sequence. Fixes: 0e0cb35b417f ("NFSv4: Handle NFS4ERR_OLD_STATEID in CLOSE/OPEN_DOWNGRADE") Cc: stable@vger.kernel.org # v5.4+ Signed-off-by: Benjamin Coddington Signed-off-by: Anna Schumaker --- fs/nfs/nfs4_fs.h | 8 ++++++ fs/nfs/nfs4proc.c | 81 +++++++++++++++++++++++++++++++----------------------- fs/nfs/nfs4trace.h | 1 + 3 files changed, 56 insertions(+), 34 deletions(-) diff --git a/fs/nfs/nfs4_fs.h b/fs/nfs/nfs4_fs.h index 0c9505dc852c..065cb04222a1 100644 --- a/fs/nfs/nfs4_fs.h +++ b/fs/nfs/nfs4_fs.h @@ -599,6 +599,14 @@ static inline bool nfs4_stateid_is_newer(const nfs4_stateid *s1, const nfs4_stat return (s32)(be32_to_cpu(s1->seqid) - be32_to_cpu(s2->seqid)) > 0; } +static inline bool nfs4_stateid_is_next(const nfs4_stateid *s1, const nfs4_stateid *s2) +{ + u32 seq1 = be32_to_cpu(s1->seqid); + u32 seq2 = be32_to_cpu(s2->seqid); + + return seq2 == seq1 + 1U || (seq2 == 1U && seq1 == 0xffffffffU); +} + static inline bool nfs4_stateid_match_or_older(const nfs4_stateid *dst, const nfs4_stateid *src) { return nfs4_stateid_match_other(dst, src) && diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index 542961ffa529..f7ef2ca699a5 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -1550,19 +1550,6 @@ static void nfs_state_log_update_open_stateid(struct nfs4_state *state) wake_up_all(&state->waitq); } -static void nfs_state_log_out_of_order_open_stateid(struct nfs4_state *state, - const nfs4_stateid *stateid) -{ - u32 state_seqid = be32_to_cpu(state->open_stateid.seqid); - u32 stateid_seqid = be32_to_cpu(stateid->seqid); - - if (stateid_seqid == state_seqid + 1U || - (stateid_seqid == 1U && state_seqid == 0xffffffffU)) - nfs_state_log_update_open_stateid(state); - else - set_bit(NFS_STATE_CHANGE_WAIT, &state->flags); -} - static void nfs_test_and_clear_all_open_stateid(struct nfs4_state *state) { struct nfs_client *clp = state->owner->so_server->nfs_client; @@ -1588,21 +1575,19 @@ static void nfs_test_and_clear_all_open_stateid(struct nfs4_state *state) * i.e. The stateid seqids have to be initialised to 1, and * are then incremented on every state transition. */ -static bool nfs_need_update_open_stateid(struct nfs4_state *state, +static bool nfs_stateid_is_sequential(struct nfs4_state *state, const nfs4_stateid *stateid) { - if (test_bit(NFS_OPEN_STATE, &state->flags) == 0 || - !nfs4_stateid_match_other(stateid, &state->open_stateid)) { + if (test_bit(NFS_OPEN_STATE, &state->flags)) { + /* The common case - we're updating to a new sequence number */ + if (nfs4_stateid_match_other(stateid, &state->open_stateid) && + nfs4_stateid_is_next(&state->open_stateid, stateid)) { + return true; + } + } else { + /* This is the first OPEN in this generation */ if (stateid->seqid == cpu_to_be32(1)) - nfs_state_log_update_open_stateid(state); - else - set_bit(NFS_STATE_CHANGE_WAIT, &state->flags); - return true; - } - - if (nfs4_stateid_is_newer(stateid, &state->open_stateid)) { - nfs_state_log_out_of_order_open_stateid(state, stateid); - return true; + return true; } return false; } @@ -1676,16 +1661,16 @@ static void nfs_set_open_stateid_locked(struct nfs4_state *state, int status = 0; for (;;) { - if (!nfs_need_update_open_stateid(state, stateid)) - return; - if (!test_bit(NFS_STATE_CHANGE_WAIT, &state->flags)) + if (nfs_stateid_is_sequential(state, stateid)) break; + if (status) break; /* Rely on seqids for serialisation with NFSv4.0 */ if (!nfs4_has_session(NFS_SERVER(state->inode)->nfs_client)) break; + set_bit(NFS_STATE_CHANGE_WAIT, &state->flags); prepare_to_wait(&state->waitq, &wait, TASK_KILLABLE); /* * Ensure we process the state changes in the same order @@ -1696,6 +1681,7 @@ static void nfs_set_open_stateid_locked(struct nfs4_state *state, spin_unlock(&state->owner->so_lock); rcu_read_unlock(); trace_nfs4_open_stateid_update_wait(state->inode, stateid, 0); + if (!signal_pending(current)) { if (schedule_timeout(5*HZ) == 0) status = -EAGAIN; @@ -3438,7 +3424,8 @@ static bool nfs4_refresh_open_old_stateid(nfs4_stateid *dst, __be32 seqid_open; u32 dst_seqid; bool ret; - int seq; + int seq, status = -EAGAIN; + DEFINE_WAIT(wait); for (;;) { ret = false; @@ -3450,15 +3437,41 @@ static bool nfs4_refresh_open_old_stateid(nfs4_stateid *dst, continue; break; } + + write_seqlock(&state->seqlock); seqid_open = state->open_stateid.seqid; - if (read_seqretry(&state->seqlock, seq)) - continue; dst_seqid = be32_to_cpu(dst->seqid); - if ((s32)(dst_seqid - be32_to_cpu(seqid_open)) >= 0) - dst->seqid = cpu_to_be32(dst_seqid + 1); - else + + /* Did another OPEN bump the state's seqid? try again: */ + if ((s32)(be32_to_cpu(seqid_open) - dst_seqid) > 0) { dst->seqid = seqid_open; + write_sequnlock(&state->seqlock); + ret = true; + break; + } + + /* server says we're behind but we haven't seen the update yet */ + set_bit(NFS_STATE_CHANGE_WAIT, &state->flags); + prepare_to_wait(&state->waitq, &wait, TASK_KILLABLE); + write_sequnlock(&state->seqlock); + trace_nfs4_close_stateid_update_wait(state->inode, dst, 0); + + if (signal_pending(current)) + status = -EINTR; + else + if (schedule_timeout(5*HZ) != 0) + status = 0; + + finish_wait(&state->waitq, &wait); + + if (!status) + continue; + if (status == -EINTR) + break; + + /* we slept the whole 5 seconds, we must have lost a seqid */ + dst->seqid = cpu_to_be32(dst_seqid + 1); ret = true; break; } diff --git a/fs/nfs/nfs4trace.h b/fs/nfs/nfs4trace.h index b4f852d4d099..484c1da96dea 100644 --- a/fs/nfs/nfs4trace.h +++ b/fs/nfs/nfs4trace.h @@ -1511,6 +1511,7 @@ DEFINE_NFS4_INODE_STATEID_EVENT(nfs4_setattr); DEFINE_NFS4_INODE_STATEID_EVENT(nfs4_delegreturn); DEFINE_NFS4_INODE_STATEID_EVENT(nfs4_open_stateid_update); DEFINE_NFS4_INODE_STATEID_EVENT(nfs4_open_stateid_update_wait); +DEFINE_NFS4_INODE_STATEID_EVENT(nfs4_close_stateid_update_wait); DECLARE_EVENT_CLASS(nfs4_getattr_event, TP_PROTO( -- cgit v1.2.3 From 5904c16d2210b967caf66b04a0c26cfa6a7a0328 Mon Sep 17 00:00:00 2001 From: Yang Shi Date: Sun, 27 Sep 2020 04:42:20 -0700 Subject: fs: nfs: return per memcg count for xattr shrinkers The list_lru_count() returns the pre node count, but the new xattr shrinkers are memcg aware, so the shrinkers should return per memcg count by calling list_lru_shrink_count() instead. Otherwise over-shrink might be experienced. The problem was spotted by visual code inspection. Cc: Trond Myklebust Cc: Anna Schumaker Cc: Frank van der Linden Signed-off-by: Yang Shi Reviewed-by: Frank van der Linden Signed-off-by: Anna Schumaker --- fs/nfs/nfs42xattr.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/fs/nfs/nfs42xattr.c b/fs/nfs/nfs42xattr.c index 22396a7eebe1..b51424ff8159 100644 --- a/fs/nfs/nfs42xattr.c +++ b/fs/nfs/nfs42xattr.c @@ -881,7 +881,7 @@ nfs4_xattr_cache_count(struct shrinker *shrink, struct shrink_control *sc) { unsigned long count; - count = list_lru_count(&nfs4_xattr_cache_lru); + count = list_lru_shrink_count(&nfs4_xattr_cache_lru, sc); return vfs_pressure_ratio(count); } @@ -975,7 +975,7 @@ nfs4_xattr_entry_count(struct shrinker *shrink, struct shrink_control *sc) lru = (shrink == &nfs4_xattr_large_entry_shrinker) ? &nfs4_xattr_large_entry_lru : &nfs4_xattr_entry_lru; - count = list_lru_count(lru); + count = list_lru_shrink_count(lru, sc); return vfs_pressure_ratio(count); } -- cgit v1.2.3 From 247db73560bc3e5aef6db50c443c3c0db115bc93 Mon Sep 17 00:00:00 2001 From: Ashish Sangwan Date: Mon, 5 Oct 2020 02:22:43 -0700 Subject: NFS: fix nfs_path in case of a rename retry We are generating incorrect path in case of rename retry because we are restarting from wrong dentry. We should restart from the dentry which was received in the call to nfs_path. CC: stable@vger.kernel.org Signed-off-by: Ashish Sangwan Signed-off-by: Anna Schumaker --- fs/nfs/namespace.c | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/fs/nfs/namespace.c b/fs/nfs/namespace.c index 6b063227e34e..2bcbe38afe2e 100644 --- a/fs/nfs/namespace.c +++ b/fs/nfs/namespace.c @@ -32,9 +32,9 @@ int nfs_mountpoint_expiry_timeout = 500 * HZ; /* * nfs_path - reconstruct the path given an arbitrary dentry * @base - used to return pointer to the end of devname part of path - * @dentry - pointer to dentry + * @dentry_in - pointer to dentry * @buffer - result buffer - * @buflen - length of buffer + * @buflen_in - length of buffer * @flags - options (see below) * * Helper function for constructing the server pathname @@ -49,15 +49,19 @@ int nfs_mountpoint_expiry_timeout = 500 * HZ; * the original device (export) name * (if unset, the original name is returned verbatim) */ -char *nfs_path(char **p, struct dentry *dentry, char *buffer, ssize_t buflen, - unsigned flags) +char *nfs_path(char **p, struct dentry *dentry_in, char *buffer, + ssize_t buflen_in, unsigned flags) { char *end; int namelen; unsigned seq; const char *base; + struct dentry *dentry; + ssize_t buflen; rename_retry: + buflen = buflen_in; + dentry = dentry_in; end = buffer+buflen; *--end = '\0'; buflen--; -- cgit v1.2.3 From f7d61ee414cadaeb05af3bf7a64fb99760b9c6e7 Mon Sep 17 00:00:00 2001 From: Anna Schumaker Date: Mon, 26 Jan 2015 17:26:19 -0500 Subject: SUNRPC: Split out a function for setting current page I'm going to need this bit of code in a few places for READ_PLUS decoding, so let's make it a helper function. Signed-off-by: Anna Schumaker --- net/sunrpc/xdr.c | 13 +++++++++---- 1 file changed, 9 insertions(+), 4 deletions(-) diff --git a/net/sunrpc/xdr.c b/net/sunrpc/xdr.c index be11d672b5b9..fa7517c1d125 100644 --- a/net/sunrpc/xdr.c +++ b/net/sunrpc/xdr.c @@ -825,6 +825,13 @@ static int xdr_set_page_base(struct xdr_stream *xdr, return 0; } +static void xdr_set_page(struct xdr_stream *xdr, unsigned int base, + unsigned int len) +{ + if (xdr_set_page_base(xdr, base, len) < 0) + xdr_set_iov(xdr, xdr->buf->tail, xdr->nwords << 2); +} + static void xdr_set_next_page(struct xdr_stream *xdr) { unsigned int newbase; @@ -832,8 +839,7 @@ static void xdr_set_next_page(struct xdr_stream *xdr) newbase = (1 + xdr->page_ptr - xdr->buf->pages) << PAGE_SHIFT; newbase -= xdr->buf->page_base; - if (xdr_set_page_base(xdr, newbase, PAGE_SIZE) < 0) - xdr_set_iov(xdr, xdr->buf->tail, xdr->nwords << 2); + xdr_set_page(xdr, newbase, PAGE_SIZE); } static bool xdr_set_next_buffer(struct xdr_stream *xdr) @@ -841,8 +847,7 @@ static bool xdr_set_next_buffer(struct xdr_stream *xdr) if (xdr->page_ptr != NULL) xdr_set_next_page(xdr); else if (xdr->iov == xdr->buf->head) { - if (xdr_set_page_base(xdr, 0, PAGE_SIZE) < 0) - xdr_set_iov(xdr, xdr->buf->tail, xdr->nwords << 2); + xdr_set_page(xdr, 0, PAGE_SIZE); } return xdr->p != xdr->end; } -- cgit v1.2.3 From cf1f08cac375630af6b6307907a3fc20fcf847c7 Mon Sep 17 00:00:00 2001 From: Anna Schumaker Date: Fri, 17 Apr 2020 11:00:24 -0400 Subject: SUNRPC: Implement a xdr_page_pos() function I'll need this for READ_PLUS to help figure out the offset where page data is stored at, but it might also be useful for other things. Signed-off-by: Anna Schumaker --- include/linux/sunrpc/xdr.h | 1 + net/sunrpc/xdr.c | 13 +++++++++++++ 2 files changed, 14 insertions(+) diff --git a/include/linux/sunrpc/xdr.h b/include/linux/sunrpc/xdr.h index 5a6a81b7cd9f..25a68dd87ecf 100644 --- a/include/linux/sunrpc/xdr.h +++ b/include/linux/sunrpc/xdr.h @@ -240,6 +240,7 @@ extern int xdr_restrict_buflen(struct xdr_stream *xdr, int newbuflen); extern void xdr_write_pages(struct xdr_stream *xdr, struct page **pages, unsigned int base, unsigned int len); extern unsigned int xdr_stream_pos(const struct xdr_stream *xdr); +extern unsigned int xdr_page_pos(const struct xdr_stream *xdr); extern void xdr_init_decode(struct xdr_stream *xdr, struct xdr_buf *buf, __be32 *p, struct rpc_rqst *rqst); extern void xdr_init_decode_pages(struct xdr_stream *xdr, struct xdr_buf *buf, diff --git a/net/sunrpc/xdr.c b/net/sunrpc/xdr.c index fa7517c1d125..909920fab93b 100644 --- a/net/sunrpc/xdr.c +++ b/net/sunrpc/xdr.c @@ -505,6 +505,19 @@ unsigned int xdr_stream_pos(const struct xdr_stream *xdr) } EXPORT_SYMBOL_GPL(xdr_stream_pos); +/** + * xdr_page_pos - Return the current offset from the start of the xdr pages + * @xdr: pointer to struct xdr_stream + */ +unsigned int xdr_page_pos(const struct xdr_stream *xdr) +{ + unsigned int pos = xdr_stream_pos(xdr); + + WARN_ON(pos < xdr->buf->head[0].iov_len); + return pos - xdr->buf->head[0].iov_len; +} +EXPORT_SYMBOL_GPL(xdr_page_pos); + /** * xdr_init_encode - Initialize a struct xdr_stream for sending data. * @xdr: pointer to xdr_stream struct -- cgit v1.2.3 From a14a63594cc2e5bdcbb1543d29df945da71e380f Mon Sep 17 00:00:00 2001 From: Anna Schumaker Date: Fri, 17 Apr 2020 11:01:50 -0400 Subject: NFS: Use xdr_page_pos() in NFSv4 decode_getacl() Signed-off-by: Anna Schumaker --- fs/nfs/nfs4xdr.c | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/fs/nfs/nfs4xdr.c b/fs/nfs/nfs4xdr.c index 0b3510f62623..3336ea3407a0 100644 --- a/fs/nfs/nfs4xdr.c +++ b/fs/nfs/nfs4xdr.c @@ -5308,7 +5308,6 @@ static int decode_getacl(struct xdr_stream *xdr, struct rpc_rqst *req, uint32_t attrlen, bitmap[3] = {0}; int status; - unsigned int pg_offset; res->acl_len = 0; if ((status = decode_op_hdr(xdr, OP_GETATTR)) != 0) @@ -5316,9 +5315,6 @@ static int decode_getacl(struct xdr_stream *xdr, struct rpc_rqst *req, xdr_enter_page(xdr, xdr->buf->page_len); - /* Calculate the offset of the page data */ - pg_offset = xdr->buf->head[0].iov_len; - if ((status = decode_attr_bitmap(xdr, bitmap)) != 0) goto out; if ((status = decode_attr_length(xdr, &attrlen, &savep)) != 0) @@ -5331,7 +5327,7 @@ static int decode_getacl(struct xdr_stream *xdr, struct rpc_rqst *req, /* The bitmap (xdr len + bitmaps) and the attr xdr len words * are stored with the acl data to handle the problem of * variable length bitmaps.*/ - res->acl_data_offset = xdr_stream_pos(xdr) - pg_offset; + res->acl_data_offset = xdr_page_pos(xdr); res->acl_len = attrlen; /* Check for receive buffer overflow */ -- cgit v1.2.3 From c567552612ece787b178e3b147b5854ad422a836 Mon Sep 17 00:00:00 2001 From: Anna Schumaker Date: Wed, 28 May 2014 13:41:22 -0400 Subject: NFS: Add READ_PLUS data segment support This patch adds client support for decoding a single NFS4_CONTENT_DATA segment returned by the server. This is the simplest implementation possible, since it does not account for any hole segments in the reply. Signed-off-by: Anna Schumaker --- fs/nfs/nfs42xdr.c | 141 ++++++++++++++++++++++++++++++++++++++++++++++ fs/nfs/nfs4client.c | 2 + fs/nfs/nfs4proc.c | 43 +++++++++++++- fs/nfs/nfs4xdr.c | 1 + include/linux/nfs4.h | 2 +- include/linux/nfs_fs_sb.h | 1 + include/linux/nfs_xdr.h | 2 +- 7 files changed, 187 insertions(+), 5 deletions(-) diff --git a/fs/nfs/nfs42xdr.c b/fs/nfs/nfs42xdr.c index cc50085e151c..930b4ca212c1 100644 --- a/fs/nfs/nfs42xdr.c +++ b/fs/nfs/nfs42xdr.c @@ -45,6 +45,15 @@ #define encode_deallocate_maxsz (op_encode_hdr_maxsz + \ encode_fallocate_maxsz) #define decode_deallocate_maxsz (op_decode_hdr_maxsz) +#define encode_read_plus_maxsz (op_encode_hdr_maxsz + \ + encode_stateid_maxsz + 3) +#define NFS42_READ_PLUS_SEGMENT_SIZE (1 /* data_content4 */ + \ + 2 /* data_info4.di_offset */ + \ + 2 /* data_info4.di_length */) +#define decode_read_plus_maxsz (op_decode_hdr_maxsz + \ + 1 /* rpr_eof */ + \ + 1 /* rpr_contents count */ + \ + NFS42_READ_PLUS_SEGMENT_SIZE) #define encode_seek_maxsz (op_encode_hdr_maxsz + \ encode_stateid_maxsz + \ 2 /* offset */ + \ @@ -128,6 +137,14 @@ decode_putfh_maxsz + \ decode_deallocate_maxsz + \ decode_getattr_maxsz) +#define NFS4_enc_read_plus_sz (compound_encode_hdr_maxsz + \ + encode_sequence_maxsz + \ + encode_putfh_maxsz + \ + encode_read_plus_maxsz) +#define NFS4_dec_read_plus_sz (compound_decode_hdr_maxsz + \ + decode_sequence_maxsz + \ + decode_putfh_maxsz + \ + decode_read_plus_maxsz) #define NFS4_enc_seek_sz (compound_encode_hdr_maxsz + \ encode_sequence_maxsz + \ encode_putfh_maxsz + \ @@ -324,6 +341,16 @@ static void encode_deallocate(struct xdr_stream *xdr, encode_fallocate(xdr, args); } +static void encode_read_plus(struct xdr_stream *xdr, + const struct nfs_pgio_args *args, + struct compound_hdr *hdr) +{ + encode_op_hdr(xdr, OP_READ_PLUS, decode_read_plus_maxsz, hdr); + encode_nfs4_stateid(xdr, &args->stateid); + encode_uint64(xdr, args->offset); + encode_uint32(xdr, args->count); +} + static void encode_seek(struct xdr_stream *xdr, const struct nfs42_seek_args *args, struct compound_hdr *hdr) @@ -722,6 +749,28 @@ static void nfs4_xdr_enc_deallocate(struct rpc_rqst *req, encode_nops(&hdr); } +/* + * Encode READ_PLUS request + */ +static void nfs4_xdr_enc_read_plus(struct rpc_rqst *req, + struct xdr_stream *xdr, + const void *data) +{ + const struct nfs_pgio_args *args = data; + struct compound_hdr hdr = { + .minorversion = nfs4_xdr_minorversion(&args->seq_args), + }; + + encode_compound_hdr(xdr, req, &hdr); + encode_sequence(xdr, &args->seq_args, &hdr); + encode_putfh(xdr, args->fh, &hdr); + encode_read_plus(xdr, args, &hdr); + + rpc_prepare_reply_pages(req, args->pages, args->pgbase, + args->count, hdr.replen); + encode_nops(&hdr); +} + /* * Encode SEEK request */ @@ -970,6 +1019,71 @@ static int decode_deallocate(struct xdr_stream *xdr, struct nfs42_falloc_res *re return decode_op_hdr(xdr, OP_DEALLOCATE); } +static int decode_read_plus_data(struct xdr_stream *xdr, struct nfs_pgio_res *res, + uint32_t *eof) +{ + uint32_t count, recvd; + uint64_t offset; + __be32 *p; + + p = xdr_inline_decode(xdr, 8 + 4); + if (unlikely(!p)) + return -EIO; + + p = xdr_decode_hyper(p, &offset); + count = be32_to_cpup(p); + recvd = xdr_read_pages(xdr, count); + res->count += recvd; + + if (count > recvd) { + dprintk("NFS: server cheating in read reply: " + "count %u > recvd %u\n", count, recvd); + *eof = 0; + return 1; + } + + return 0; +} + +static int decode_read_plus(struct xdr_stream *xdr, struct nfs_pgio_res *res) +{ + uint32_t eof, segments, type; + int status; + __be32 *p; + + status = decode_op_hdr(xdr, OP_READ_PLUS); + if (status) + return status; + + p = xdr_inline_decode(xdr, 4 + 4); + if (unlikely(!p)) + return -EIO; + + eof = be32_to_cpup(p++); + segments = be32_to_cpup(p++); + if (segments == 0) + goto out; + + p = xdr_inline_decode(xdr, 4); + if (unlikely(!p)) + return -EIO; + + type = be32_to_cpup(p++); + if (type == NFS4_CONTENT_DATA) + status = decode_read_plus_data(xdr, res, &eof); + else + return -EINVAL; + + if (status) + return status; + if (segments > 1) + eof = 0; + +out: + res->eof = eof; + return 0; +} + static int decode_seek(struct xdr_stream *xdr, struct nfs42_seek_res *res) { int status; @@ -1146,6 +1260,33 @@ out: return status; } +/* + * Decode READ_PLUS request + */ +static int nfs4_xdr_dec_read_plus(struct rpc_rqst *rqstp, + struct xdr_stream *xdr, + void *data) +{ + struct nfs_pgio_res *res = data; + struct compound_hdr hdr; + int status; + + status = decode_compound_hdr(xdr, &hdr); + if (status) + goto out; + status = decode_sequence(xdr, &res->seq_res, rqstp); + if (status) + goto out; + status = decode_putfh(xdr); + if (status) + goto out; + status = decode_read_plus(xdr, res); + if (!status) + status = res->count; +out: + return status; +} + /* * Decode SEEK request */ diff --git a/fs/nfs/nfs4client.c b/fs/nfs/nfs4client.c index daacc78a3d48..be7915c861ce 100644 --- a/fs/nfs/nfs4client.c +++ b/fs/nfs/nfs4client.c @@ -1045,6 +1045,8 @@ static int nfs4_server_common_setup(struct nfs_server *server, server->caps |= server->nfs_client->cl_mvops->init_caps; if (server->flags & NFS_MOUNT_NORDIRPLUS) server->caps &= ~NFS_CAP_READDIRPLUS; + if (server->nfs_client->cl_proto == XPRT_TRANSPORT_RDMA) + server->caps &= ~NFS_CAP_READ_PLUS; /* * Don't use NFS uid/gid mapping if we're using AUTH_SYS or lower * authentication. diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index f7ef2ca699a5..d09fd3236820 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -70,6 +70,10 @@ #include "nfs4trace.h" +#ifdef CONFIG_NFS_V4_2 +#include "nfs42.h" +#endif /* CONFIG_NFS_V4_2 */ + #define NFSDBG_FACILITY NFSDBG_PROC #define NFS4_BITMASK_SZ 3 @@ -5272,28 +5276,60 @@ static bool nfs4_read_stateid_changed(struct rpc_task *task, return true; } -static int nfs4_read_done(struct rpc_task *task, struct nfs_pgio_header *hdr) +static bool nfs4_read_plus_not_supported(struct rpc_task *task, + struct nfs_pgio_header *hdr) { + struct nfs_server *server = NFS_SERVER(hdr->inode); + struct rpc_message *msg = &task->tk_msg; + if (msg->rpc_proc == &nfs4_procedures[NFSPROC4_CLNT_READ_PLUS] && + server->caps & NFS_CAP_READ_PLUS && task->tk_status == -ENOTSUPP) { + server->caps &= ~NFS_CAP_READ_PLUS; + msg->rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_READ]; + rpc_restart_call_prepare(task); + return true; + } + return false; +} + +static int nfs4_read_done(struct rpc_task *task, struct nfs_pgio_header *hdr) +{ dprintk("--> %s\n", __func__); if (!nfs4_sequence_done(task, &hdr->res.seq_res)) return -EAGAIN; if (nfs4_read_stateid_changed(task, &hdr->args)) return -EAGAIN; + if (nfs4_read_plus_not_supported(task, hdr)) + return -EAGAIN; if (task->tk_status > 0) nfs_invalidate_atime(hdr->inode); return hdr->pgio_done_cb ? hdr->pgio_done_cb(task, hdr) : nfs4_read_done_cb(task, hdr); } +#ifdef CONFIG_NFS_V4_2 +static void nfs42_read_plus_support(struct nfs_server *server, struct rpc_message *msg) +{ + if (server->caps & NFS_CAP_READ_PLUS) + msg->rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_READ_PLUS]; + else + msg->rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_READ]; +} +#else +static void nfs42_read_plus_support(struct nfs_server *server, struct rpc_message *msg) +{ + msg->rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_READ]; +} +#endif /* CONFIG_NFS_V4_2 */ + static void nfs4_proc_read_setup(struct nfs_pgio_header *hdr, struct rpc_message *msg) { hdr->timestamp = jiffies; if (!hdr->pgio_done_cb) hdr->pgio_done_cb = nfs4_read_done_cb; - msg->rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_READ]; + nfs42_read_plus_support(NFS_SERVER(hdr->inode), msg); nfs4_init_sequence(&hdr->args.seq_args, &hdr->res.seq_res, 0, 0); } @@ -10215,7 +10251,8 @@ static const struct nfs4_minor_version_ops nfs_v4_2_minor_ops = { | NFS_CAP_SEEK | NFS_CAP_LAYOUTSTATS | NFS_CAP_CLONE - | NFS_CAP_LAYOUTERROR, + | NFS_CAP_LAYOUTERROR + | NFS_CAP_READ_PLUS, .init_client = nfs41_init_client, .shutdown_client = nfs41_shutdown_client, .match_stateid = nfs41_match_stateid, diff --git a/fs/nfs/nfs4xdr.c b/fs/nfs/nfs4xdr.c index 3336ea3407a0..c6dbfcae7517 100644 --- a/fs/nfs/nfs4xdr.c +++ b/fs/nfs/nfs4xdr.c @@ -7615,6 +7615,7 @@ const struct rpc_procinfo nfs4_procedures[] = { PROC42(SETXATTR, enc_setxattr, dec_setxattr), PROC42(LISTXATTRS, enc_listxattrs, dec_listxattrs), PROC42(REMOVEXATTR, enc_removexattr, dec_removexattr), + PROC42(READ_PLUS, enc_read_plus, dec_read_plus), }; static unsigned int nfs_version4_counts[ARRAY_SIZE(nfs4_procedures)]; diff --git a/include/linux/nfs4.h b/include/linux/nfs4.h index b8360be141da..9dc7eeac924f 100644 --- a/include/linux/nfs4.h +++ b/include/linux/nfs4.h @@ -551,13 +551,13 @@ enum { NFSPROC4_CLNT_LOOKUPP, NFSPROC4_CLNT_LAYOUTERROR, - NFSPROC4_CLNT_COPY_NOTIFY, NFSPROC4_CLNT_GETXATTR, NFSPROC4_CLNT_SETXATTR, NFSPROC4_CLNT_LISTXATTRS, NFSPROC4_CLNT_REMOVEXATTR, + NFSPROC4_CLNT_READ_PLUS, }; /* nfs41 types */ diff --git a/include/linux/nfs_fs_sb.h b/include/linux/nfs_fs_sb.h index 7eae72a8762e..38e60ec742df 100644 --- a/include/linux/nfs_fs_sb.h +++ b/include/linux/nfs_fs_sb.h @@ -287,5 +287,6 @@ struct nfs_server { #define NFS_CAP_LAYOUTERROR (1U << 26) #define NFS_CAP_COPY_NOTIFY (1U << 27) #define NFS_CAP_XATTR (1U << 28) +#define NFS_CAP_READ_PLUS (1U << 29) #endif diff --git a/include/linux/nfs_xdr.h b/include/linux/nfs_xdr.h index 0599efd57eb9..d63cb862d58e 100644 --- a/include/linux/nfs_xdr.h +++ b/include/linux/nfs_xdr.h @@ -657,7 +657,7 @@ struct nfs_pgio_args { struct nfs_pgio_res { struct nfs4_sequence_res seq_res; struct nfs_fattr * fattr; - __u32 count; + __u64 count; __u32 op_status; union { struct { -- cgit v1.2.3 From 06216ecbd93688f7acb617e186b9556a565a13bd Mon Sep 17 00:00:00 2001 From: Anna Schumaker Date: Mon, 20 Apr 2020 17:38:17 -0400 Subject: SUNRPC: Split out xdr_realign_pages() from xdr_align_pages() I don't need the entire align pages code for READ_PLUS, so split out the part I do need so I don't need to reimplement anything. Signed-off-by: Anna Schumaker --- net/sunrpc/xdr.c | 21 ++++++++++++++------- 1 file changed, 14 insertions(+), 7 deletions(-) diff --git a/net/sunrpc/xdr.c b/net/sunrpc/xdr.c index 909920fab93b..d93bcad5ba9f 100644 --- a/net/sunrpc/xdr.c +++ b/net/sunrpc/xdr.c @@ -997,26 +997,33 @@ out_overflow: } EXPORT_SYMBOL_GPL(xdr_inline_decode); -static unsigned int xdr_align_pages(struct xdr_stream *xdr, unsigned int len) +static void xdr_realign_pages(struct xdr_stream *xdr) { struct xdr_buf *buf = xdr->buf; - struct kvec *iov; - unsigned int nwords = XDR_QUADLEN(len); + struct kvec *iov = buf->head; unsigned int cur = xdr_stream_pos(xdr); unsigned int copied, offset; - if (xdr->nwords == 0) - return 0; - /* Realign pages to current pointer position */ - iov = buf->head; if (iov->iov_len > cur) { offset = iov->iov_len - cur; copied = xdr_shrink_bufhead(buf, offset); trace_rpc_xdr_alignment(xdr, offset, copied); xdr->nwords = XDR_QUADLEN(buf->len - cur); } +} + +static unsigned int xdr_align_pages(struct xdr_stream *xdr, unsigned int len) +{ + struct xdr_buf *buf = xdr->buf; + unsigned int nwords = XDR_QUADLEN(len); + unsigned int cur = xdr_stream_pos(xdr); + unsigned int copied, offset; + + if (xdr->nwords == 0) + return 0; + xdr_realign_pages(xdr); if (nwords > xdr->nwords) { nwords = xdr->nwords; len = nwords << 2; -- cgit v1.2.3 From 43f0f0816cdbe7361dd17db3b4c1033446033ba6 Mon Sep 17 00:00:00 2001 From: Anna Schumaker Date: Wed, 6 May 2020 13:21:30 -0400 Subject: SUNRPC: Split out _shift_data_right_tail() xdr_shrink_pagelen() is very similar to what we need for hole expansion, so split out the common code into its own function that can be used by both functions. Signed-off-by: Anna Schumaker --- net/sunrpc/xdr.c | 68 ++++++++++++++++++++++++++++++++++---------------------- 1 file changed, 41 insertions(+), 27 deletions(-) diff --git a/net/sunrpc/xdr.c b/net/sunrpc/xdr.c index d93bcad5ba9f..10a88a67206a 100644 --- a/net/sunrpc/xdr.c +++ b/net/sunrpc/xdr.c @@ -266,6 +266,46 @@ _shift_data_right_pages(struct page **pages, size_t pgto_base, } while ((len -= copy) != 0); } +static unsigned int +_shift_data_right_tail(struct xdr_buf *buf, unsigned int pgfrom, size_t len) +{ + struct kvec *tail = buf->tail; + unsigned int tailbuf_len; + unsigned int result = 0; + size_t copy; + + tailbuf_len = buf->buflen - buf->head->iov_len - buf->page_len; + + /* Shift the tail first */ + if (tailbuf_len != 0) { + unsigned int free_space = tailbuf_len - tail->iov_len; + + if (len < free_space) + free_space = len; + if (len > free_space) + len = free_space; + + tail->iov_len += free_space; + copy = len; + + if (tail->iov_len > len) { + char *p = (char *)tail->iov_base + len; + memmove(p, tail->iov_base, tail->iov_len - free_space); + result += tail->iov_len - free_space; + } else + copy = tail->iov_len; + + /* Copy from the inlined pages into the tail */ + _copy_from_pages((char *)tail->iov_base, + buf->pages, + buf->page_base + pgfrom, + copy); + result += copy; + } + + return result; +} + /** * _copy_to_pages * @pages: array of pages @@ -446,39 +486,13 @@ xdr_shrink_bufhead(struct xdr_buf *buf, size_t len) static unsigned int xdr_shrink_pagelen(struct xdr_buf *buf, size_t len) { - struct kvec *tail; - size_t copy; unsigned int pglen = buf->page_len; - unsigned int tailbuf_len; unsigned int result; - result = 0; - tail = buf->tail; if (len > buf->page_len) len = buf-> page_len; - tailbuf_len = buf->buflen - buf->head->iov_len - buf->page_len; - /* Shift the tail first */ - if (tailbuf_len != 0) { - unsigned int free_space = tailbuf_len - tail->iov_len; - - if (len < free_space) - free_space = len; - tail->iov_len += free_space; - - copy = len; - if (tail->iov_len > len) { - char *p = (char *)tail->iov_base + len; - memmove(p, tail->iov_base, tail->iov_len - len); - result += tail->iov_len - len; - } else - copy = tail->iov_len; - /* Copy from the inlined pages into the tail */ - _copy_from_pages((char *)tail->iov_base, - buf->pages, buf->page_base + pglen - len, - copy); - result += copy; - } + result = _shift_data_right_tail(buf, pglen - len, len); buf->page_len -= len; buf->buflen -= len; /* Have we truncated the message? */ -- cgit v1.2.3 From 84ce182ab85b8ad5002fb1125ba572df99dd0d1c Mon Sep 17 00:00:00 2001 From: Anna Schumaker Date: Wed, 28 May 2014 13:38:53 -0400 Subject: SUNRPC: Add the ability to expand holes in data pages This patch adds the ability to "read a hole" into a set of XDR data pages by taking the following steps: 1) Shift all data after the current xdr->p to the right, possibly into the tail, 2) Zero the specified range, and 3) Update xdr->p to point beyond the hole. Signed-off-by: Anna Schumaker --- include/linux/sunrpc/xdr.h | 1 + net/sunrpc/xdr.c | 69 ++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 70 insertions(+) diff --git a/include/linux/sunrpc/xdr.h b/include/linux/sunrpc/xdr.h index 25a68dd87ecf..f9636d2a6d54 100644 --- a/include/linux/sunrpc/xdr.h +++ b/include/linux/sunrpc/xdr.h @@ -250,6 +250,7 @@ extern __be32 *xdr_inline_decode(struct xdr_stream *xdr, size_t nbytes); extern unsigned int xdr_read_pages(struct xdr_stream *xdr, unsigned int len); extern void xdr_enter_page(struct xdr_stream *xdr, unsigned int len); extern int xdr_process_buf(struct xdr_buf *buf, unsigned int offset, unsigned int len, int (*actor)(struct scatterlist *, void *), void *data); +extern uint64_t xdr_expand_hole(struct xdr_stream *, uint64_t, uint64_t); /** * xdr_stream_remaining - Return the number of bytes remaining in the stream diff --git a/net/sunrpc/xdr.c b/net/sunrpc/xdr.c index 10a88a67206a..1052ccdb4e99 100644 --- a/net/sunrpc/xdr.c +++ b/net/sunrpc/xdr.c @@ -390,6 +390,38 @@ _copy_from_pages(char *p, struct page **pages, size_t pgbase, size_t len) } EXPORT_SYMBOL_GPL(_copy_from_pages); +/** + * _zero_pages + * @pages: array of pages + * @pgbase: beginning page vector address + * @len: length + */ +static void +_zero_pages(struct page **pages, size_t pgbase, size_t len) +{ + struct page **page; + char *vpage; + size_t zero; + + page = pages + (pgbase >> PAGE_SHIFT); + pgbase &= ~PAGE_MASK; + + do { + zero = PAGE_SIZE - pgbase; + if (zero > len) + zero = len; + + vpage = kmap_atomic(*page); + memset(vpage + pgbase, 0, zero); + kunmap_atomic(vpage); + + flush_dcache_page(*page); + pgbase = 0; + page++; + + } while ((len -= zero) != 0); +} + /** * xdr_shrink_bufhead * @buf: xdr_buf @@ -1096,6 +1128,43 @@ unsigned int xdr_read_pages(struct xdr_stream *xdr, unsigned int len) } EXPORT_SYMBOL_GPL(xdr_read_pages); +uint64_t xdr_expand_hole(struct xdr_stream *xdr, uint64_t offset, uint64_t length) +{ + struct xdr_buf *buf = xdr->buf; + unsigned int bytes; + unsigned int from; + unsigned int truncated = 0; + + if ((offset + length) < offset || + (offset + length) > buf->page_len) + length = buf->page_len - offset; + + xdr_realign_pages(xdr); + from = xdr_page_pos(xdr); + bytes = xdr->nwords << 2; + + if (offset + length + bytes > buf->page_len) { + unsigned int shift = (offset + length + bytes) - buf->page_len; + unsigned int res = _shift_data_right_tail(buf, from + bytes - shift, shift); + truncated = shift - res; + xdr->nwords -= XDR_QUADLEN(truncated); + bytes -= shift; + } + + /* Now move the page data over and zero pages */ + if (bytes > 0) + _shift_data_right_pages(buf->pages, + buf->page_base + offset + length, + buf->page_base + from, + bytes); + _zero_pages(buf->pages, buf->page_base + offset, length); + + buf->len += length - (from - offset) - truncated; + xdr_set_page(xdr, offset + length, PAGE_SIZE); + return length; +} +EXPORT_SYMBOL_GPL(xdr_expand_hole); + /** * xdr_enter_page - decode data from the XDR page * @xdr: pointer to xdr_stream struct -- cgit v1.2.3 From c05eafad6b034772921e56de5c01df2326d9e3b3 Mon Sep 17 00:00:00 2001 From: Anna Schumaker Date: Thu, 28 Mar 2019 16:43:44 -0400 Subject: NFS: Add READ_PLUS hole segment decoding We keep things simple for now by only decoding a single hole or data segment returned by the server, even if they returned more to us. Signed-off-by: Anna Schumaker --- fs/nfs/nfs42xdr.c | 26 +++++++++++++++++++++++++- 1 file changed, 25 insertions(+), 1 deletion(-) diff --git a/fs/nfs/nfs42xdr.c b/fs/nfs/nfs42xdr.c index 930b4ca212c1..9720fedd2e57 100644 --- a/fs/nfs/nfs42xdr.c +++ b/fs/nfs/nfs42xdr.c @@ -53,7 +53,7 @@ #define decode_read_plus_maxsz (op_decode_hdr_maxsz + \ 1 /* rpr_eof */ + \ 1 /* rpr_contents count */ + \ - NFS42_READ_PLUS_SEGMENT_SIZE) + 2 * NFS42_READ_PLUS_SEGMENT_SIZE) #define encode_seek_maxsz (op_encode_hdr_maxsz + \ encode_stateid_maxsz + \ 2 /* offset */ + \ @@ -1045,6 +1045,28 @@ static int decode_read_plus_data(struct xdr_stream *xdr, struct nfs_pgio_res *re return 0; } +static int decode_read_plus_hole(struct xdr_stream *xdr, struct nfs_pgio_res *res, + uint32_t *eof) +{ + uint64_t offset, length, recvd; + __be32 *p; + + p = xdr_inline_decode(xdr, 8 + 8); + if (unlikely(!p)) + return -EIO; + + p = xdr_decode_hyper(p, &offset); + p = xdr_decode_hyper(p, &length); + recvd = xdr_expand_hole(xdr, 0, length); + res->count += recvd; + + if (recvd < length) { + *eof = 0; + return 1; + } + return 0; +} + static int decode_read_plus(struct xdr_stream *xdr, struct nfs_pgio_res *res) { uint32_t eof, segments, type; @@ -1071,6 +1093,8 @@ static int decode_read_plus(struct xdr_stream *xdr, struct nfs_pgio_res *res) type = be32_to_cpup(p++); if (type == NFS4_CONTENT_DATA) status = decode_read_plus_data(xdr, res, &eof); + else if (type == NFS4_CONTENT_HOLE) + status = decode_read_plus_hole(xdr, res, &eof); else return -EINVAL; -- cgit v1.2.3 From e6ac0accb27c6892b7ebc7799e7ce56b3390a678 Mon Sep 17 00:00:00 2001 From: Anna Schumaker Date: Tue, 21 Apr 2020 11:27:00 -0400 Subject: SUNRPC: Add an xdr_align_data() function For now, this function simply aligns the data at the beginning of the pages. This can eventually be expanded to shift data to the correct offsets when we're ready. Signed-off-by: Anna Schumaker --- include/linux/sunrpc/xdr.h | 1 + net/sunrpc/xdr.c | 121 +++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 122 insertions(+) diff --git a/include/linux/sunrpc/xdr.h b/include/linux/sunrpc/xdr.h index f9636d2a6d54..fe7ff7f5b584 100644 --- a/include/linux/sunrpc/xdr.h +++ b/include/linux/sunrpc/xdr.h @@ -250,6 +250,7 @@ extern __be32 *xdr_inline_decode(struct xdr_stream *xdr, size_t nbytes); extern unsigned int xdr_read_pages(struct xdr_stream *xdr, unsigned int len); extern void xdr_enter_page(struct xdr_stream *xdr, unsigned int len); extern int xdr_process_buf(struct xdr_buf *buf, unsigned int offset, unsigned int len, int (*actor)(struct scatterlist *, void *), void *data); +extern uint64_t xdr_align_data(struct xdr_stream *, uint64_t, uint32_t); extern uint64_t xdr_expand_hole(struct xdr_stream *, uint64_t, uint64_t); /** diff --git a/net/sunrpc/xdr.c b/net/sunrpc/xdr.c index 1052ccdb4e99..3feff529a764 100644 --- a/net/sunrpc/xdr.c +++ b/net/sunrpc/xdr.c @@ -19,6 +19,9 @@ #include #include +static void _copy_to_pages(struct page **, size_t, const char *, size_t); + + /* * XDR functions for basic NFS types */ @@ -201,6 +204,88 @@ EXPORT_SYMBOL_GPL(xdr_inline_pages); * Helper routines for doing 'memmove' like operations on a struct xdr_buf */ +/** + * _shift_data_left_pages + * @pages: vector of pages containing both the source and dest memory area. + * @pgto_base: page vector address of destination + * @pgfrom_base: page vector address of source + * @len: number of bytes to copy + * + * Note: the addresses pgto_base and pgfrom_base are both calculated in + * the same way: + * if a memory area starts at byte 'base' in page 'pages[i]', + * then its address is given as (i << PAGE_CACHE_SHIFT) + base + * Alse note: pgto_base must be < pgfrom_base, but the memory areas + * they point to may overlap. + */ +static void +_shift_data_left_pages(struct page **pages, size_t pgto_base, + size_t pgfrom_base, size_t len) +{ + struct page **pgfrom, **pgto; + char *vfrom, *vto; + size_t copy; + + BUG_ON(pgfrom_base <= pgto_base); + + pgto = pages + (pgto_base >> PAGE_SHIFT); + pgfrom = pages + (pgfrom_base >> PAGE_SHIFT); + + pgto_base &= ~PAGE_MASK; + pgfrom_base &= ~PAGE_MASK; + + do { + if (pgto_base >= PAGE_SIZE) { + pgto_base = 0; + pgto++; + } + if (pgfrom_base >= PAGE_SIZE){ + pgfrom_base = 0; + pgfrom++; + } + + copy = len; + if (copy > (PAGE_SIZE - pgto_base)) + copy = PAGE_SIZE - pgto_base; + if (copy > (PAGE_SIZE - pgfrom_base)) + copy = PAGE_SIZE - pgfrom_base; + + vto = kmap_atomic(*pgto); + if (*pgto != *pgfrom) { + vfrom = kmap_atomic(*pgfrom); + memcpy(vto + pgto_base, vfrom + pgfrom_base, copy); + kunmap_atomic(vfrom); + } else + memmove(vto + pgto_base, vto + pgfrom_base, copy); + flush_dcache_page(*pgto); + kunmap_atomic(vto); + + pgto_base += copy; + pgfrom_base += copy; + + } while ((len -= copy) != 0); +} + +static void +_shift_data_left_tail(struct xdr_buf *buf, unsigned int pgto, size_t len) +{ + struct kvec *tail = buf->tail; + + if (len > tail->iov_len) + len = tail->iov_len; + + _copy_to_pages(buf->pages, + buf->page_base + pgto, + (char *)tail->iov_base, + len); + tail->iov_len -= len; + + if (tail->iov_len > 0) + memmove((char *)tail->iov_base, + tail->iov_base + len, + tail->iov_len); +} + /** * _shift_data_right_pages * @pages: vector of pages containing both the source and dest memory area. @@ -1128,6 +1213,42 @@ unsigned int xdr_read_pages(struct xdr_stream *xdr, unsigned int len) } EXPORT_SYMBOL_GPL(xdr_read_pages); +uint64_t xdr_align_data(struct xdr_stream *xdr, uint64_t offset, uint32_t length) +{ + struct xdr_buf *buf = xdr->buf; + unsigned int from, bytes; + unsigned int shift = 0; + + if ((offset + length) < offset || + (offset + length) > buf->page_len) + length = buf->page_len - offset; + + xdr_realign_pages(xdr); + from = xdr_page_pos(xdr); + bytes = xdr->nwords << 2; + if (length < bytes) + bytes = length; + + /* Move page data to the left */ + if (from > offset) { + shift = min_t(unsigned int, bytes, buf->page_len - from); + _shift_data_left_pages(buf->pages, + buf->page_base + offset, + buf->page_base + from, + shift); + bytes -= shift; + + /* Move tail data into the pages, if necessary */ + if (bytes > 0) + _shift_data_left_tail(buf, offset + shift, bytes); + } + + xdr->nwords -= XDR_QUADLEN(length); + xdr_set_page(xdr, from + length, PAGE_SIZE); + return length; +} +EXPORT_SYMBOL_GPL(xdr_align_data); + uint64_t xdr_expand_hole(struct xdr_stream *xdr, uint64_t offset, uint64_t length) { struct xdr_buf *buf = xdr->buf; -- cgit v1.2.3 From bff049a3b5001eb462f27eda98f32f3ff10f4ec2 Mon Sep 17 00:00:00 2001 From: Anna Schumaker Date: Wed, 1 Apr 2020 16:28:51 -0400 Subject: NFS: Decode a full READ_PLUS reply Decode multiple hole and data segments sent by the server, placing everything directly where they need to go in the xdr pages. Signed-off-by: Anna Schumaker --- fs/nfs/nfs42xdr.c | 36 +++++++++++++++++++----------------- 1 file changed, 19 insertions(+), 17 deletions(-) diff --git a/fs/nfs/nfs42xdr.c b/fs/nfs/nfs42xdr.c index 9720fedd2e57..0dc31ad2362e 100644 --- a/fs/nfs/nfs42xdr.c +++ b/fs/nfs/nfs42xdr.c @@ -1032,7 +1032,7 @@ static int decode_read_plus_data(struct xdr_stream *xdr, struct nfs_pgio_res *re p = xdr_decode_hyper(p, &offset); count = be32_to_cpup(p); - recvd = xdr_read_pages(xdr, count); + recvd = xdr_align_data(xdr, res->count, count); res->count += recvd; if (count > recvd) { @@ -1057,7 +1057,7 @@ static int decode_read_plus_hole(struct xdr_stream *xdr, struct nfs_pgio_res *re p = xdr_decode_hyper(p, &offset); p = xdr_decode_hyper(p, &length); - recvd = xdr_expand_hole(xdr, 0, length); + recvd = xdr_expand_hole(xdr, res->count, length); res->count += recvd; if (recvd < length) { @@ -1070,7 +1070,7 @@ static int decode_read_plus_hole(struct xdr_stream *xdr, struct nfs_pgio_res *re static int decode_read_plus(struct xdr_stream *xdr, struct nfs_pgio_res *res) { uint32_t eof, segments, type; - int status; + int status, i; __be32 *p; status = decode_op_hdr(xdr, OP_READ_PLUS); @@ -1086,22 +1086,24 @@ static int decode_read_plus(struct xdr_stream *xdr, struct nfs_pgio_res *res) if (segments == 0) goto out; - p = xdr_inline_decode(xdr, 4); - if (unlikely(!p)) - return -EIO; + for (i = 0; i < segments; i++) { + p = xdr_inline_decode(xdr, 4); + if (unlikely(!p)) + return -EIO; - type = be32_to_cpup(p++); - if (type == NFS4_CONTENT_DATA) - status = decode_read_plus_data(xdr, res, &eof); - else if (type == NFS4_CONTENT_HOLE) - status = decode_read_plus_hole(xdr, res, &eof); - else - return -EINVAL; + type = be32_to_cpup(p++); + if (type == NFS4_CONTENT_DATA) + status = decode_read_plus_data(xdr, res, &eof); + else if (type == NFS4_CONTENT_HOLE) + status = decode_read_plus_hole(xdr, res, &eof); + else + return -EINVAL; - if (status) - return status; - if (segments > 1) - eof = 0; + if (status < 0) + return status; + if (status > 0) + break; + } out: res->eof = eof; -- cgit v1.2.3 From 1aee551334cda1fed8b8112dbe38257397a55c78 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Wed, 7 Oct 2020 18:24:17 -0400 Subject: NFSv4: Clean up initialisation of uniquified client id strings When the user sets a uniquifier, then ensure we copy the string so that calls to strlen() etc are atomic with calls to snprintf(). Signed-off-by: Trond Myklebust Signed-off-by: Anna Schumaker --- fs/nfs/nfs4proc.c | 75 +++++++++++++++++++++++++------------------------------ 1 file changed, 34 insertions(+), 41 deletions(-) diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index d09fd3236820..50a96ca2c385 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -6093,9 +6093,22 @@ static void nfs4_init_boot_verifier(const struct nfs_client *clp, memcpy(bootverf->data, verf, sizeof(bootverf->data)); } +static size_t +nfs4_get_uniquifier(char *buf, size_t buflen) +{ + buf[0] = '\0'; + + if (nfs4_client_id_uniquifier[0] != '\0') + strscpy(buf, nfs4_client_id_uniquifier, buflen); + + return strlen(buf); +} + static int nfs4_init_nonuniform_client_string(struct nfs_client *clp) { + char buf[NFS4_CLIENT_ID_UNIQ_LEN]; + size_t buflen; size_t len; char *str; @@ -6109,8 +6122,11 @@ nfs4_init_nonuniform_client_string(struct nfs_client *clp) strlen(rpc_peeraddr2str(clp->cl_rpcclient, RPC_DISPLAY_ADDR)) + 1; rcu_read_unlock(); - if (nfs4_client_id_uniquifier[0] != '\0') - len += strlen(nfs4_client_id_uniquifier) + 1; + + buflen = nfs4_get_uniquifier(buf, sizeof(buf)); + if (buflen) + len += buflen + 1; + if (len > NFS4_OPAQUE_LIMIT + 1) return -EINVAL; @@ -6124,10 +6140,9 @@ nfs4_init_nonuniform_client_string(struct nfs_client *clp) return -ENOMEM; rcu_read_lock(); - if (nfs4_client_id_uniquifier[0] != '\0') + if (buflen) scnprintf(str, len, "Linux NFSv4.0 %s/%s/%s", - clp->cl_rpcclient->cl_nodename, - nfs4_client_id_uniquifier, + clp->cl_rpcclient->cl_nodename, buf, rpc_peeraddr2str(clp->cl_rpcclient, RPC_DISPLAY_ADDR)); else @@ -6141,51 +6156,24 @@ nfs4_init_nonuniform_client_string(struct nfs_client *clp) return 0; } -static int -nfs4_init_uniquifier_client_string(struct nfs_client *clp) -{ - size_t len; - char *str; - - len = 10 + 10 + 1 + 10 + 1 + - strlen(nfs4_client_id_uniquifier) + 1 + - strlen(clp->cl_rpcclient->cl_nodename) + 1; - - if (len > NFS4_OPAQUE_LIMIT + 1) - return -EINVAL; - - /* - * Since this string is allocated at mount time, and held until the - * nfs_client is destroyed, we can use GFP_KERNEL here w/o worrying - * about a memory-reclaim deadlock. - */ - str = kmalloc(len, GFP_KERNEL); - if (!str) - return -ENOMEM; - - scnprintf(str, len, "Linux NFSv%u.%u %s/%s", - clp->rpc_ops->version, clp->cl_minorversion, - nfs4_client_id_uniquifier, - clp->cl_rpcclient->cl_nodename); - clp->cl_owner_id = str; - return 0; -} - static int nfs4_init_uniform_client_string(struct nfs_client *clp) { + char buf[NFS4_CLIENT_ID_UNIQ_LEN]; + size_t buflen; size_t len; char *str; if (clp->cl_owner_id != NULL) return 0; - if (nfs4_client_id_uniquifier[0] != '\0') - return nfs4_init_uniquifier_client_string(clp); - len = 10 + 10 + 1 + 10 + 1 + strlen(clp->cl_rpcclient->cl_nodename) + 1; + buflen = nfs4_get_uniquifier(buf, sizeof(buf)); + if (buflen) + len += buflen + 1; + if (len > NFS4_OPAQUE_LIMIT + 1) return -EINVAL; @@ -6198,9 +6186,14 @@ nfs4_init_uniform_client_string(struct nfs_client *clp) if (!str) return -ENOMEM; - scnprintf(str, len, "Linux NFSv%u.%u %s", - clp->rpc_ops->version, clp->cl_minorversion, - clp->cl_rpcclient->cl_nodename); + if (buflen) + scnprintf(str, len, "Linux NFSv%u.%u %s/%s", + clp->rpc_ops->version, clp->cl_minorversion, + buf, clp->cl_rpcclient->cl_nodename); + else + scnprintf(str, len, "Linux NFSv%u.%u %s", + clp->rpc_ops->version, clp->cl_minorversion, + clp->cl_rpcclient->cl_nodename); clp->cl_owner_id = str; return 0; } -- cgit v1.2.3 From 39d43d164127da7fbc62d0ef73146e04e31a828d Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Wed, 7 Oct 2020 18:24:18 -0400 Subject: NFSv4: Use the net namespace uniquifier if it is set If a container sets a net namespace specific uniquifier, then use that in the setclientid/exchangeid process. Signed-off-by: Trond Myklebust Signed-off-by: Anna Schumaker --- fs/nfs/nfs4proc.c | 21 +++++++++++++++++---- 1 file changed, 17 insertions(+), 4 deletions(-) diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index 50a96ca2c385..2e33995691f5 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -63,6 +63,7 @@ #include "callback.h" #include "pnfs.h" #include "netns.h" +#include "sysfs.h" #include "nfs4idmap.h" #include "nfs4session.h" #include "fscache.h" @@ -6094,11 +6095,23 @@ static void nfs4_init_boot_verifier(const struct nfs_client *clp, } static size_t -nfs4_get_uniquifier(char *buf, size_t buflen) +nfs4_get_uniquifier(struct nfs_client *clp, char *buf, size_t buflen) { + struct nfs_net *nn = net_generic(clp->cl_net, nfs_net_id); + struct nfs_netns_client *nn_clp = nn->nfs_client; + const char *id; + buf[0] = '\0'; - if (nfs4_client_id_uniquifier[0] != '\0') + if (nn_clp) { + rcu_read_lock(); + id = rcu_dereference(nn_clp->identifier); + if (id) + strscpy(buf, id, buflen); + rcu_read_unlock(); + } + + if (nfs4_client_id_uniquifier[0] != '\0' && buf[0] == '\0') strscpy(buf, nfs4_client_id_uniquifier, buflen); return strlen(buf); @@ -6123,7 +6136,7 @@ nfs4_init_nonuniform_client_string(struct nfs_client *clp) 1; rcu_read_unlock(); - buflen = nfs4_get_uniquifier(buf, sizeof(buf)); + buflen = nfs4_get_uniquifier(clp, buf, sizeof(buf)); if (buflen) len += buflen + 1; @@ -6170,7 +6183,7 @@ nfs4_init_uniform_client_string(struct nfs_client *clp) len = 10 + 10 + 1 + 10 + 1 + strlen(clp->cl_rpcclient->cl_nodename) + 1; - buflen = nfs4_get_uniquifier(buf, sizeof(buf)); + buflen = nfs4_get_uniquifier(clp, buf, sizeof(buf)); if (buflen) len += buflen + 1; -- cgit v1.2.3 From a2d24bcb97dc7b0be1cb891e60ae133bdf36c786 Mon Sep 17 00:00:00 2001 From: Scott Mayhew Date: Sat, 10 Oct 2020 10:03:12 -0400 Subject: nfs: add missing "posix" local_lock constant table definition "mount -o local_lock=posix..." was broken by the mount API conversion due to the missing constant. Fixes: e38bb238ed8c ("NFS: Convert mount option parsing to use functionality from fs_parser.h") Signed-off-by: Scott Mayhew Signed-off-by: Anna Schumaker --- fs/nfs/fs_context.c | 1 + 1 file changed, 1 insertion(+) diff --git a/fs/nfs/fs_context.c b/fs/nfs/fs_context.c index 524812984e2d..009987e69020 100644 --- a/fs/nfs/fs_context.c +++ b/fs/nfs/fs_context.c @@ -94,6 +94,7 @@ enum { static const struct constant_table nfs_param_enums_local_lock[] = { { "all", Opt_local_lock_all }, { "flock", Opt_local_lock_flock }, + { "posix", Opt_local_lock_posix }, { "none", Opt_local_lock_none }, {} }; -- cgit v1.2.3 From 61ca2c4afd9d108899e0fa48e7b1cfc9afe80596 Mon Sep 17 00:00:00 2001 From: Sargun Dhillon Date: Mon, 12 Oct 2020 14:43:39 -0700 Subject: NFS: Only reference user namespace from nfs4idmap struct instead of cred The nfs4idmapper only needs access to the user namespace, and not the entire cred struct. This replaces the struct cred* member with struct user_namespace*. This is mostly hygiene, so we don't have to hold onto the cred object, which has extraneous references to things like user_struct. This also makes switching away from init_user_ns more straightforward in the future. Signed-off-by: Sargun Dhillon Signed-off-by: Anna Schumaker --- fs/nfs/nfs4idmap.c | 15 ++++++++------- 1 file changed, 8 insertions(+), 7 deletions(-) diff --git a/fs/nfs/nfs4idmap.c b/fs/nfs/nfs4idmap.c index 62e6eea5c516..8d8aba305ecc 100644 --- a/fs/nfs/nfs4idmap.c +++ b/fs/nfs/nfs4idmap.c @@ -46,6 +46,7 @@ #include #include #include +#include #include "internal.h" #include "netns.h" @@ -69,13 +70,13 @@ struct idmap { struct rpc_pipe *idmap_pipe; struct idmap_legacy_upcalldata *idmap_upcall_data; struct mutex idmap_mutex; - const struct cred *cred; + struct user_namespace *user_ns; }; static struct user_namespace *idmap_userns(const struct idmap *idmap) { - if (idmap && idmap->cred) - return idmap->cred->user_ns; + if (idmap && idmap->user_ns) + return idmap->user_ns; return &init_user_ns; } @@ -286,7 +287,7 @@ static struct key *nfs_idmap_request_key(const char *name, size_t namelen, if (ret < 0) return ERR_PTR(ret); - if (!idmap->cred || idmap->cred->user_ns == &init_user_ns) + if (!idmap->user_ns || idmap->user_ns == &init_user_ns) rkey = request_key(&key_type_id_resolver, desc, ""); if (IS_ERR(rkey)) { mutex_lock(&idmap->idmap_mutex); @@ -462,7 +463,7 @@ nfs_idmap_new(struct nfs_client *clp) return -ENOMEM; mutex_init(&idmap->idmap_mutex); - idmap->cred = get_cred(clp->cl_rpcclient->cl_cred); + idmap->user_ns = get_user_ns(clp->cl_rpcclient->cl_cred->user_ns); rpc_init_pipe_dir_object(&idmap->idmap_pdo, &nfs_idmap_pipe_dir_object_ops, @@ -486,7 +487,7 @@ nfs_idmap_new(struct nfs_client *clp) err_destroy_pipe: rpc_destroy_pipe_data(idmap->idmap_pipe); err: - put_cred(idmap->cred); + get_user_ns(idmap->user_ns); kfree(idmap); return error; } @@ -503,7 +504,7 @@ nfs_idmap_delete(struct nfs_client *clp) &clp->cl_rpcclient->cl_pipedir_objects, &idmap->idmap_pdo); rpc_destroy_pipe_data(idmap->idmap_pipe); - put_cred(idmap->cred); + put_user_ns(idmap->user_ns); kfree(idmap); } -- cgit v1.2.3 From 094eca37193c516106ef8ed7f60ed058ed9fc458 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Wed, 14 Oct 2020 15:22:11 -0400 Subject: NFSv4: Fix up RCU annotations for struct nfs_netns_client The identifier is read as an RCU protected string. Its value may be changed during the lifetime of the network namespace by writing a new string into the sysfs pseudofile (at which point, we free the old string only after a call to synchronize_rcu()). Signed-off-by: Trond Myklebust Signed-off-by: Anna Schumaker --- fs/nfs/sysfs.c | 11 ++++++++--- fs/nfs/sysfs.h | 2 +- 2 files changed, 9 insertions(+), 4 deletions(-) diff --git a/fs/nfs/sysfs.c b/fs/nfs/sysfs.c index c489496b5659..8cb70755e3c9 100644 --- a/fs/nfs/sysfs.c +++ b/fs/nfs/sysfs.c @@ -79,7 +79,12 @@ static ssize_t nfs_netns_identifier_show(struct kobject *kobj, struct nfs_netns_client *c = container_of(kobj, struct nfs_netns_client, kobject); - return scnprintf(buf, PAGE_SIZE, "%s\n", c->identifier); + ssize_t ret; + + rcu_read_lock(); + ret = scnprintf(buf, PAGE_SIZE, "%s\n", rcu_dereference(c->identifier)); + rcu_read_unlock(); + return ret; } /* Strip trailing '\n' */ @@ -107,7 +112,7 @@ static ssize_t nfs_netns_identifier_store(struct kobject *kobj, p = kmemdup_nul(buf, len, GFP_KERNEL); if (!p) return -ENOMEM; - old = xchg(&c->identifier, p); + old = rcu_dereference_protected(xchg(&c->identifier, (char __rcu *)p), 1); if (old) { synchronize_rcu(); kfree(old); @@ -121,7 +126,7 @@ static void nfs_netns_client_release(struct kobject *kobj) struct nfs_netns_client, kobject); - kfree(c->identifier); + kfree(rcu_dereference_raw(c->identifier)); kfree(c); } diff --git a/fs/nfs/sysfs.h b/fs/nfs/sysfs.h index ebcbdc40483b..5501ef573c32 100644 --- a/fs/nfs/sysfs.h +++ b/fs/nfs/sysfs.h @@ -11,7 +11,7 @@ struct nfs_netns_client { struct kobject kobject; struct net *net; - const char *identifier; + const char __rcu *identifier; }; extern struct kobject *nfs_client_kobj; -- cgit v1.2.3 From 8c39076c276be0b31982e44654e2c2357473258a Mon Sep 17 00:00:00 2001 From: Olga Kornievskaia Date: Fri, 16 Oct 2020 09:25:45 -0400 Subject: NFSv4.2: support EXCHGID4_FLAG_SUPP_FENCE_OPS 4.2 EXCHANGE_ID flag RFC 7862 introduced a new flag that either client or server is allowed to set: EXCHGID4_FLAG_SUPP_FENCE_OPS. Client needs to update its bitmask to allow for this flag value. v2: changed minor version argument to unsigned int Signed-off-by: Olga Kornievskaia CC: Signed-off-by: Anna Schumaker --- fs/nfs/nfs4proc.c | 9 ++++++--- include/uapi/linux/nfs4.h | 3 +++ 2 files changed, 9 insertions(+), 3 deletions(-) diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index 2e33995691f5..9e0ca9b2b210 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -8133,9 +8133,11 @@ int nfs4_proc_secinfo(struct inode *dir, const struct qstr *name, * both PNFS and NON_PNFS flags set, and not having one of NON_PNFS, PNFS, or * DS flags set. */ -static int nfs4_check_cl_exchange_flags(u32 flags) +static int nfs4_check_cl_exchange_flags(u32 flags, u32 version) { - if (flags & ~EXCHGID4_FLAG_MASK_R) + if (version >= 2 && (flags & ~EXCHGID4_2_FLAG_MASK_R)) + goto out_inval; + else if (version < 2 && (flags & ~EXCHGID4_FLAG_MASK_R)) goto out_inval; if ((flags & EXCHGID4_FLAG_USE_PNFS_MDS) && (flags & EXCHGID4_FLAG_USE_NON_PNFS)) @@ -8548,7 +8550,8 @@ static int _nfs4_proc_exchange_id(struct nfs_client *clp, const struct cred *cre if (status != 0) goto out; - status = nfs4_check_cl_exchange_flags(resp->flags); + status = nfs4_check_cl_exchange_flags(resp->flags, + clp->cl_mvops->minor_version); if (status != 0) goto out; diff --git a/include/uapi/linux/nfs4.h b/include/uapi/linux/nfs4.h index bf197e99b98f..ed5415e0f1c1 100644 --- a/include/uapi/linux/nfs4.h +++ b/include/uapi/linux/nfs4.h @@ -139,6 +139,8 @@ #define EXCHGID4_FLAG_UPD_CONFIRMED_REC_A 0x40000000 #define EXCHGID4_FLAG_CONFIRMED_R 0x80000000 + +#define EXCHGID4_FLAG_SUPP_FENCE_OPS 0x00000004 /* * Since the validity of these bits depends on whether * they're set in the argument or response, have separate @@ -146,6 +148,7 @@ */ #define EXCHGID4_FLAG_MASK_A 0x40070103 #define EXCHGID4_FLAG_MASK_R 0x80070103 +#define EXCHGID4_2_FLAG_MASK_R 0x80070107 #define SEQ4_STATUS_CB_PATH_DOWN 0x00000001 #define SEQ4_STATUS_CB_GSS_CONTEXTS_EXPIRING 0x00000002 -- cgit v1.2.3