diff options
author | Linus Torvalds | 2014-10-08 12:51:44 -0400 |
---|---|---|
committer | Linus Torvalds | 2014-10-08 12:51:44 -0400 |
commit | 6dea0737bc5e160efc77f4c39d393b94fd2746dc (patch) | |
tree | 5f4021fa66f86a9cf0700bdefb8fcdfb370161df /net/sunrpc | |
parent | 25641c0c8d72f3d235c022fd2c19181912c2ae8b (diff) | |
parent | 34549ab09e62db9703811c6ed4715f2ffa1fd7fb (diff) |
Merge branch 'for-3.18' of git://linux-nfs.org/~bfields/linux
Pull nfsd updates from Bruce Fields:
"Highlights:
- support the NFSv4.2 SEEK operation (allowing clients to support
SEEK_HOLE/SEEK_DATA), thanks to Anna.
- end the grace period early in a number of cases, mitigating a
long-standing annoyance, thanks to Jeff
- improve SMP scalability, thanks to Trond"
* 'for-3.18' of git://linux-nfs.org/~bfields/linux: (55 commits)
nfsd: eliminate "to_delegation" define
NFSD: Implement SEEK
NFSD: Add generic v4.2 infrastructure
svcrdma: advertise the correct max payload
nfsd: introduce nfsd4_callback_ops
nfsd: split nfsd4_callback initialization and use
nfsd: introduce a generic nfsd4_cb
nfsd: remove nfsd4_callback.cb_op
nfsd: do not clear rpc_resp in nfsd4_cb_done_sequence
nfsd: fix nfsd4_cb_recall_done error handling
nfsd4: clarify how grace period ends
nfsd4: stop grace_time update at end of grace period
nfsd: skip subsequent UMH "create" operations after the first one for v4.0 clients
nfsd: set and test NFSD4_CLIENT_STABLE bit to reduce nfsdcltrack upcalls
nfsd: serialize nfsdcltrack upcalls for a particular client
nfsd: pass extra info in env vars to upcalls to allow for early grace period end
nfsd: add a v4_end_grace file to /proc/fs/nfsd
lockd: add a /proc/fs/lockd/nlm_end_grace file
nfsd: reject reclaim request when client has already sent RECLAIM_COMPLETE
nfsd: remove redundant boot_time parm from grace_done client tracking op
...
Diffstat (limited to 'net/sunrpc')
-rw-r--r-- | net/sunrpc/svc.c | 2 | ||||
-rw-r--r-- | net/sunrpc/svc_xprt.c | 81 | ||||
-rw-r--r-- | net/sunrpc/svcsock.c | 25 | ||||
-rw-r--r-- | net/sunrpc/xprtrdma/svc_rdma_transport.c | 2 | ||||
-rw-r--r-- | net/sunrpc/xprtrdma/xprt_rdma.h | 7 |
5 files changed, 48 insertions, 69 deletions
diff --git a/net/sunrpc/svc.c b/net/sunrpc/svc.c index 1db5007ddbce..ca8a7958f4e6 100644 --- a/net/sunrpc/svc.c +++ b/net/sunrpc/svc.c @@ -612,8 +612,6 @@ svc_prepare_thread(struct svc_serv *serv, struct svc_pool *pool, int node) if (!rqstp) goto out_enomem; - init_waitqueue_head(&rqstp->rq_wait); - serv->sv_nrthreads++; spin_lock_bh(&pool->sp_lock); pool->sp_nrthreads++; diff --git a/net/sunrpc/svc_xprt.c b/net/sunrpc/svc_xprt.c index 6666c6745858..c179ca2a5aa4 100644 --- a/net/sunrpc/svc_xprt.c +++ b/net/sunrpc/svc_xprt.c @@ -346,20 +346,6 @@ static void svc_xprt_do_enqueue(struct svc_xprt *xprt) if (!svc_xprt_has_something_to_do(xprt)) return; - cpu = get_cpu(); - pool = svc_pool_for_cpu(xprt->xpt_server, cpu); - put_cpu(); - - spin_lock_bh(&pool->sp_lock); - - if (!list_empty(&pool->sp_threads) && - !list_empty(&pool->sp_sockets)) - printk(KERN_ERR - "svc_xprt_enqueue: " - "threads and transports both waiting??\n"); - - pool->sp_stats.packets++; - /* Mark transport as busy. It will remain in this state until * the provider calls svc_xprt_received. We update XPT_BUSY * atomically because it also guards against trying to enqueue @@ -368,9 +354,15 @@ static void svc_xprt_do_enqueue(struct svc_xprt *xprt) if (test_and_set_bit(XPT_BUSY, &xprt->xpt_flags)) { /* Don't enqueue transport while already enqueued */ dprintk("svc: transport %p busy, not enqueued\n", xprt); - goto out_unlock; + return; } + cpu = get_cpu(); + pool = svc_pool_for_cpu(xprt->xpt_server, cpu); + spin_lock_bh(&pool->sp_lock); + + pool->sp_stats.packets++; + if (!list_empty(&pool->sp_threads)) { rqstp = list_entry(pool->sp_threads.next, struct svc_rqst, @@ -382,18 +374,23 @@ static void svc_xprt_do_enqueue(struct svc_xprt *xprt) printk(KERN_ERR "svc_xprt_enqueue: server %p, rq_xprt=%p!\n", rqstp, rqstp->rq_xprt); - rqstp->rq_xprt = xprt; + /* Note the order of the following 3 lines: + * We want to assign xprt to rqstp->rq_xprt only _after_ + * we've woken up the process, so that we don't race with + * the lockless check in svc_get_next_xprt(). + */ svc_xprt_get(xprt); + wake_up_process(rqstp->rq_task); + rqstp->rq_xprt = xprt; pool->sp_stats.threads_woken++; - wake_up(&rqstp->rq_wait); } else { dprintk("svc: transport %p put into queue\n", xprt); list_add_tail(&xprt->xpt_ready, &pool->sp_sockets); pool->sp_stats.sockets_queued++; } -out_unlock: spin_unlock_bh(&pool->sp_lock); + put_cpu(); } /* @@ -509,7 +506,7 @@ void svc_wake_up(struct svc_serv *serv) svc_thread_dequeue(pool, rqstp); rqstp->rq_xprt = NULL; */ - wake_up(&rqstp->rq_wait); + wake_up_process(rqstp->rq_task); } else pool->sp_task_pending = 1; spin_unlock_bh(&pool->sp_lock); @@ -628,8 +625,7 @@ static struct svc_xprt *svc_get_next_xprt(struct svc_rqst *rqstp, long timeout) { struct svc_xprt *xprt; struct svc_pool *pool = rqstp->rq_pool; - DECLARE_WAITQUEUE(wait, current); - long time_left; + long time_left = 0; /* Normally we will wait up to 5 seconds for any required * cache information to be provided. @@ -651,40 +647,32 @@ static struct svc_xprt *svc_get_next_xprt(struct svc_rqst *rqstp, long timeout) } else { if (pool->sp_task_pending) { pool->sp_task_pending = 0; - spin_unlock_bh(&pool->sp_lock); - return ERR_PTR(-EAGAIN); + xprt = ERR_PTR(-EAGAIN); + goto out; } - /* No data pending. Go to sleep */ - svc_thread_enqueue(pool, rqstp); - /* * We have to be able to interrupt this wait * to bring down the daemons ... */ set_current_state(TASK_INTERRUPTIBLE); - /* - * checking kthread_should_stop() here allows us to avoid - * locking and signalling when stopping kthreads that call - * svc_recv. If the thread has already been woken up, then - * we can exit here without sleeping. If not, then it - * it'll be woken up quickly during the schedule_timeout - */ - if (kthread_should_stop()) { - set_current_state(TASK_RUNNING); - spin_unlock_bh(&pool->sp_lock); - return ERR_PTR(-EINTR); - } - - add_wait_queue(&rqstp->rq_wait, &wait); + /* No data pending. Go to sleep */ + svc_thread_enqueue(pool, rqstp); spin_unlock_bh(&pool->sp_lock); - time_left = schedule_timeout(timeout); + if (!(signalled() || kthread_should_stop())) { + time_left = schedule_timeout(timeout); + __set_current_state(TASK_RUNNING); - try_to_freeze(); + try_to_freeze(); + + xprt = rqstp->rq_xprt; + if (xprt != NULL) + return xprt; + } else + __set_current_state(TASK_RUNNING); spin_lock_bh(&pool->sp_lock); - remove_wait_queue(&rqstp->rq_wait, &wait); if (!time_left) pool->sp_stats.threads_timedout++; @@ -699,6 +687,7 @@ static struct svc_xprt *svc_get_next_xprt(struct svc_rqst *rqstp, long timeout) return ERR_PTR(-EAGAIN); } } +out: spin_unlock_bh(&pool->sp_lock); return xprt; } @@ -744,7 +733,7 @@ static int svc_handle_xprt(struct svc_rqst *rqstp, struct svc_xprt *xprt) svc_add_new_temp_xprt(serv, newxpt); else module_put(xprt->xpt_class->xcl_owner); - } else if (xprt->xpt_ops->xpo_has_wspace(xprt)) { + } else { /* XPT_DATA|XPT_DEFERRED case: */ dprintk("svc: server %p, pool %u, transport %p, inuse=%d\n", rqstp, rqstp->rq_pool->sp_id, xprt, @@ -781,10 +770,6 @@ int svc_recv(struct svc_rqst *rqstp, long timeout) printk(KERN_ERR "svc_recv: service %p, transport not NULL!\n", rqstp); - if (waitqueue_active(&rqstp->rq_wait)) - printk(KERN_ERR - "svc_recv: service %p, wait queue active!\n", - rqstp); err = svc_alloc_arg(rqstp); if (err) diff --git a/net/sunrpc/svcsock.c b/net/sunrpc/svcsock.c index c24a8ff33f8f..3f959c681885 100644 --- a/net/sunrpc/svcsock.c +++ b/net/sunrpc/svcsock.c @@ -312,19 +312,6 @@ static int svc_one_sock_name(struct svc_sock *svsk, char *buf, int remaining) } /* - * Check input queue length - */ -static int svc_recv_available(struct svc_sock *svsk) -{ - struct socket *sock = svsk->sk_sock; - int avail, err; - - err = kernel_sock_ioctl(sock, TIOCINQ, (unsigned long) &avail); - - return (err >= 0)? avail : err; -} - -/* * Generic recvfrom routine. */ static int svc_recvfrom(struct svc_rqst *rqstp, struct kvec *iov, int nr, @@ -339,8 +326,14 @@ static int svc_recvfrom(struct svc_rqst *rqstp, struct kvec *iov, int nr, rqstp->rq_xprt_hlen = 0; + clear_bit(XPT_DATA, &svsk->sk_xprt.xpt_flags); len = kernel_recvmsg(svsk->sk_sock, &msg, iov, nr, buflen, msg.msg_flags); + /* If we read a full record, then assume there may be more + * data to read (stream based sockets only!) + */ + if (len == buflen) + set_bit(XPT_DATA, &svsk->sk_xprt.xpt_flags); dprintk("svc: socket %p recvfrom(%p, %Zu) = %d\n", svsk, iov[0].iov_base, iov[0].iov_len, len); @@ -980,8 +973,6 @@ static int svc_tcp_recv_record(struct svc_sock *svsk, struct svc_rqst *rqstp) unsigned int want; int len; - clear_bit(XPT_DATA, &svsk->sk_xprt.xpt_flags); - if (svsk->sk_tcplen < sizeof(rpc_fraghdr)) { struct kvec iov; @@ -1036,7 +1027,7 @@ static int receive_cb_reply(struct svc_sock *svsk, struct svc_rqst *rqstp) "%s: Got unrecognized reply: " "calldir 0x%x xpt_bc_xprt %p xid %08x\n", __func__, ntohl(calldir), - bc_xprt, xid); + bc_xprt, ntohl(xid)); return -EAGAIN; } @@ -1073,8 +1064,6 @@ static int copy_pages_to_kvecs(struct kvec *vec, struct page **pages, int len) static void svc_tcp_fragment_received(struct svc_sock *svsk) { /* If we have more data, signal svc_xprt_enqueue() to try again */ - if (svc_recv_available(svsk) > sizeof(rpc_fraghdr)) - set_bit(XPT_DATA, &svsk->sk_xprt.xpt_flags); dprintk("svc: TCP %s record (%d bytes)\n", svc_sock_final_rec(svsk) ? "final" : "nonfinal", svc_sock_reclen(svsk)); diff --git a/net/sunrpc/xprtrdma/svc_rdma_transport.c b/net/sunrpc/xprtrdma/svc_rdma_transport.c index 374feb44afea..4e618808bc98 100644 --- a/net/sunrpc/xprtrdma/svc_rdma_transport.c +++ b/net/sunrpc/xprtrdma/svc_rdma_transport.c @@ -91,7 +91,7 @@ struct svc_xprt_class svc_rdma_class = { .xcl_name = "rdma", .xcl_owner = THIS_MODULE, .xcl_ops = &svc_rdma_ops, - .xcl_max_payload = RPCSVC_MAXPAYLOAD_TCP, + .xcl_max_payload = RPCSVC_MAXPAYLOAD_RDMA, .xcl_ident = XPRT_TRANSPORT_RDMA, }; diff --git a/net/sunrpc/xprtrdma/xprt_rdma.h b/net/sunrpc/xprtrdma/xprt_rdma.h index c419498b8f46..ac7fc9a31342 100644 --- a/net/sunrpc/xprtrdma/xprt_rdma.h +++ b/net/sunrpc/xprtrdma/xprt_rdma.h @@ -51,6 +51,7 @@ #include <linux/sunrpc/clnt.h> /* rpc_xprt */ #include <linux/sunrpc/rpc_rdma.h> /* RPC/RDMA protocol */ #include <linux/sunrpc/xprtrdma.h> /* xprt parameters */ +#include <linux/sunrpc/svc.h> /* RPCSVC_MAXPAYLOAD */ #define RDMA_RESOLVE_TIMEOUT (5000) /* 5 seconds */ #define RDMA_CONNECT_RETRY_MAX (2) /* retries if no listener backlog */ @@ -392,4 +393,10 @@ extern struct kmem_cache *svc_rdma_ctxt_cachep; /* Workqueue created in svc_rdma.c */ extern struct workqueue_struct *svc_rdma_wq; +#if RPCSVC_MAXPAYLOAD < (RPCRDMA_MAX_DATA_SEGS << PAGE_SHIFT) +#define RPCSVC_MAXPAYLOAD_RDMA RPCSVC_MAXPAYLOAD +#else +#define RPCSVC_MAXPAYLOAD_RDMA (RPCRDMA_MAX_DATA_SEGS << PAGE_SHIFT) +#endif + #endif /* _LINUX_SUNRPC_XPRT_RDMA_H */ |