aboutsummaryrefslogtreecommitdiff
path: root/net/sunrpc
diff options
context:
space:
mode:
authorLinus Torvalds2014-10-08 12:51:44 -0400
committerLinus Torvalds2014-10-08 12:51:44 -0400
commit6dea0737bc5e160efc77f4c39d393b94fd2746dc (patch)
tree5f4021fa66f86a9cf0700bdefb8fcdfb370161df /net/sunrpc
parent25641c0c8d72f3d235c022fd2c19181912c2ae8b (diff)
parent34549ab09e62db9703811c6ed4715f2ffa1fd7fb (diff)
Merge branch 'for-3.18' of git://linux-nfs.org/~bfields/linux
Pull nfsd updates from Bruce Fields: "Highlights: - support the NFSv4.2 SEEK operation (allowing clients to support SEEK_HOLE/SEEK_DATA), thanks to Anna. - end the grace period early in a number of cases, mitigating a long-standing annoyance, thanks to Jeff - improve SMP scalability, thanks to Trond" * 'for-3.18' of git://linux-nfs.org/~bfields/linux: (55 commits) nfsd: eliminate "to_delegation" define NFSD: Implement SEEK NFSD: Add generic v4.2 infrastructure svcrdma: advertise the correct max payload nfsd: introduce nfsd4_callback_ops nfsd: split nfsd4_callback initialization and use nfsd: introduce a generic nfsd4_cb nfsd: remove nfsd4_callback.cb_op nfsd: do not clear rpc_resp in nfsd4_cb_done_sequence nfsd: fix nfsd4_cb_recall_done error handling nfsd4: clarify how grace period ends nfsd4: stop grace_time update at end of grace period nfsd: skip subsequent UMH "create" operations after the first one for v4.0 clients nfsd: set and test NFSD4_CLIENT_STABLE bit to reduce nfsdcltrack upcalls nfsd: serialize nfsdcltrack upcalls for a particular client nfsd: pass extra info in env vars to upcalls to allow for early grace period end nfsd: add a v4_end_grace file to /proc/fs/nfsd lockd: add a /proc/fs/lockd/nlm_end_grace file nfsd: reject reclaim request when client has already sent RECLAIM_COMPLETE nfsd: remove redundant boot_time parm from grace_done client tracking op ...
Diffstat (limited to 'net/sunrpc')
-rw-r--r--net/sunrpc/svc.c2
-rw-r--r--net/sunrpc/svc_xprt.c81
-rw-r--r--net/sunrpc/svcsock.c25
-rw-r--r--net/sunrpc/xprtrdma/svc_rdma_transport.c2
-rw-r--r--net/sunrpc/xprtrdma/xprt_rdma.h7
5 files changed, 48 insertions, 69 deletions
diff --git a/net/sunrpc/svc.c b/net/sunrpc/svc.c
index 1db5007ddbce..ca8a7958f4e6 100644
--- a/net/sunrpc/svc.c
+++ b/net/sunrpc/svc.c
@@ -612,8 +612,6 @@ svc_prepare_thread(struct svc_serv *serv, struct svc_pool *pool, int node)
if (!rqstp)
goto out_enomem;
- init_waitqueue_head(&rqstp->rq_wait);
-
serv->sv_nrthreads++;
spin_lock_bh(&pool->sp_lock);
pool->sp_nrthreads++;
diff --git a/net/sunrpc/svc_xprt.c b/net/sunrpc/svc_xprt.c
index 6666c6745858..c179ca2a5aa4 100644
--- a/net/sunrpc/svc_xprt.c
+++ b/net/sunrpc/svc_xprt.c
@@ -346,20 +346,6 @@ static void svc_xprt_do_enqueue(struct svc_xprt *xprt)
if (!svc_xprt_has_something_to_do(xprt))
return;
- cpu = get_cpu();
- pool = svc_pool_for_cpu(xprt->xpt_server, cpu);
- put_cpu();
-
- spin_lock_bh(&pool->sp_lock);
-
- if (!list_empty(&pool->sp_threads) &&
- !list_empty(&pool->sp_sockets))
- printk(KERN_ERR
- "svc_xprt_enqueue: "
- "threads and transports both waiting??\n");
-
- pool->sp_stats.packets++;
-
/* Mark transport as busy. It will remain in this state until
* the provider calls svc_xprt_received. We update XPT_BUSY
* atomically because it also guards against trying to enqueue
@@ -368,9 +354,15 @@ static void svc_xprt_do_enqueue(struct svc_xprt *xprt)
if (test_and_set_bit(XPT_BUSY, &xprt->xpt_flags)) {
/* Don't enqueue transport while already enqueued */
dprintk("svc: transport %p busy, not enqueued\n", xprt);
- goto out_unlock;
+ return;
}
+ cpu = get_cpu();
+ pool = svc_pool_for_cpu(xprt->xpt_server, cpu);
+ spin_lock_bh(&pool->sp_lock);
+
+ pool->sp_stats.packets++;
+
if (!list_empty(&pool->sp_threads)) {
rqstp = list_entry(pool->sp_threads.next,
struct svc_rqst,
@@ -382,18 +374,23 @@ static void svc_xprt_do_enqueue(struct svc_xprt *xprt)
printk(KERN_ERR
"svc_xprt_enqueue: server %p, rq_xprt=%p!\n",
rqstp, rqstp->rq_xprt);
- rqstp->rq_xprt = xprt;
+ /* Note the order of the following 3 lines:
+ * We want to assign xprt to rqstp->rq_xprt only _after_
+ * we've woken up the process, so that we don't race with
+ * the lockless check in svc_get_next_xprt().
+ */
svc_xprt_get(xprt);
+ wake_up_process(rqstp->rq_task);
+ rqstp->rq_xprt = xprt;
pool->sp_stats.threads_woken++;
- wake_up(&rqstp->rq_wait);
} else {
dprintk("svc: transport %p put into queue\n", xprt);
list_add_tail(&xprt->xpt_ready, &pool->sp_sockets);
pool->sp_stats.sockets_queued++;
}
-out_unlock:
spin_unlock_bh(&pool->sp_lock);
+ put_cpu();
}
/*
@@ -509,7 +506,7 @@ void svc_wake_up(struct svc_serv *serv)
svc_thread_dequeue(pool, rqstp);
rqstp->rq_xprt = NULL;
*/
- wake_up(&rqstp->rq_wait);
+ wake_up_process(rqstp->rq_task);
} else
pool->sp_task_pending = 1;
spin_unlock_bh(&pool->sp_lock);
@@ -628,8 +625,7 @@ static struct svc_xprt *svc_get_next_xprt(struct svc_rqst *rqstp, long timeout)
{
struct svc_xprt *xprt;
struct svc_pool *pool = rqstp->rq_pool;
- DECLARE_WAITQUEUE(wait, current);
- long time_left;
+ long time_left = 0;
/* Normally we will wait up to 5 seconds for any required
* cache information to be provided.
@@ -651,40 +647,32 @@ static struct svc_xprt *svc_get_next_xprt(struct svc_rqst *rqstp, long timeout)
} else {
if (pool->sp_task_pending) {
pool->sp_task_pending = 0;
- spin_unlock_bh(&pool->sp_lock);
- return ERR_PTR(-EAGAIN);
+ xprt = ERR_PTR(-EAGAIN);
+ goto out;
}
- /* No data pending. Go to sleep */
- svc_thread_enqueue(pool, rqstp);
-
/*
* We have to be able to interrupt this wait
* to bring down the daemons ...
*/
set_current_state(TASK_INTERRUPTIBLE);
- /*
- * checking kthread_should_stop() here allows us to avoid
- * locking and signalling when stopping kthreads that call
- * svc_recv. If the thread has already been woken up, then
- * we can exit here without sleeping. If not, then it
- * it'll be woken up quickly during the schedule_timeout
- */
- if (kthread_should_stop()) {
- set_current_state(TASK_RUNNING);
- spin_unlock_bh(&pool->sp_lock);
- return ERR_PTR(-EINTR);
- }
-
- add_wait_queue(&rqstp->rq_wait, &wait);
+ /* No data pending. Go to sleep */
+ svc_thread_enqueue(pool, rqstp);
spin_unlock_bh(&pool->sp_lock);
- time_left = schedule_timeout(timeout);
+ if (!(signalled() || kthread_should_stop())) {
+ time_left = schedule_timeout(timeout);
+ __set_current_state(TASK_RUNNING);
- try_to_freeze();
+ try_to_freeze();
+
+ xprt = rqstp->rq_xprt;
+ if (xprt != NULL)
+ return xprt;
+ } else
+ __set_current_state(TASK_RUNNING);
spin_lock_bh(&pool->sp_lock);
- remove_wait_queue(&rqstp->rq_wait, &wait);
if (!time_left)
pool->sp_stats.threads_timedout++;
@@ -699,6 +687,7 @@ static struct svc_xprt *svc_get_next_xprt(struct svc_rqst *rqstp, long timeout)
return ERR_PTR(-EAGAIN);
}
}
+out:
spin_unlock_bh(&pool->sp_lock);
return xprt;
}
@@ -744,7 +733,7 @@ static int svc_handle_xprt(struct svc_rqst *rqstp, struct svc_xprt *xprt)
svc_add_new_temp_xprt(serv, newxpt);
else
module_put(xprt->xpt_class->xcl_owner);
- } else if (xprt->xpt_ops->xpo_has_wspace(xprt)) {
+ } else {
/* XPT_DATA|XPT_DEFERRED case: */
dprintk("svc: server %p, pool %u, transport %p, inuse=%d\n",
rqstp, rqstp->rq_pool->sp_id, xprt,
@@ -781,10 +770,6 @@ int svc_recv(struct svc_rqst *rqstp, long timeout)
printk(KERN_ERR
"svc_recv: service %p, transport not NULL!\n",
rqstp);
- if (waitqueue_active(&rqstp->rq_wait))
- printk(KERN_ERR
- "svc_recv: service %p, wait queue active!\n",
- rqstp);
err = svc_alloc_arg(rqstp);
if (err)
diff --git a/net/sunrpc/svcsock.c b/net/sunrpc/svcsock.c
index c24a8ff33f8f..3f959c681885 100644
--- a/net/sunrpc/svcsock.c
+++ b/net/sunrpc/svcsock.c
@@ -312,19 +312,6 @@ static int svc_one_sock_name(struct svc_sock *svsk, char *buf, int remaining)
}
/*
- * Check input queue length
- */
-static int svc_recv_available(struct svc_sock *svsk)
-{
- struct socket *sock = svsk->sk_sock;
- int avail, err;
-
- err = kernel_sock_ioctl(sock, TIOCINQ, (unsigned long) &avail);
-
- return (err >= 0)? avail : err;
-}
-
-/*
* Generic recvfrom routine.
*/
static int svc_recvfrom(struct svc_rqst *rqstp, struct kvec *iov, int nr,
@@ -339,8 +326,14 @@ static int svc_recvfrom(struct svc_rqst *rqstp, struct kvec *iov, int nr,
rqstp->rq_xprt_hlen = 0;
+ clear_bit(XPT_DATA, &svsk->sk_xprt.xpt_flags);
len = kernel_recvmsg(svsk->sk_sock, &msg, iov, nr, buflen,
msg.msg_flags);
+ /* If we read a full record, then assume there may be more
+ * data to read (stream based sockets only!)
+ */
+ if (len == buflen)
+ set_bit(XPT_DATA, &svsk->sk_xprt.xpt_flags);
dprintk("svc: socket %p recvfrom(%p, %Zu) = %d\n",
svsk, iov[0].iov_base, iov[0].iov_len, len);
@@ -980,8 +973,6 @@ static int svc_tcp_recv_record(struct svc_sock *svsk, struct svc_rqst *rqstp)
unsigned int want;
int len;
- clear_bit(XPT_DATA, &svsk->sk_xprt.xpt_flags);
-
if (svsk->sk_tcplen < sizeof(rpc_fraghdr)) {
struct kvec iov;
@@ -1036,7 +1027,7 @@ static int receive_cb_reply(struct svc_sock *svsk, struct svc_rqst *rqstp)
"%s: Got unrecognized reply: "
"calldir 0x%x xpt_bc_xprt %p xid %08x\n",
__func__, ntohl(calldir),
- bc_xprt, xid);
+ bc_xprt, ntohl(xid));
return -EAGAIN;
}
@@ -1073,8 +1064,6 @@ static int copy_pages_to_kvecs(struct kvec *vec, struct page **pages, int len)
static void svc_tcp_fragment_received(struct svc_sock *svsk)
{
/* If we have more data, signal svc_xprt_enqueue() to try again */
- if (svc_recv_available(svsk) > sizeof(rpc_fraghdr))
- set_bit(XPT_DATA, &svsk->sk_xprt.xpt_flags);
dprintk("svc: TCP %s record (%d bytes)\n",
svc_sock_final_rec(svsk) ? "final" : "nonfinal",
svc_sock_reclen(svsk));
diff --git a/net/sunrpc/xprtrdma/svc_rdma_transport.c b/net/sunrpc/xprtrdma/svc_rdma_transport.c
index 374feb44afea..4e618808bc98 100644
--- a/net/sunrpc/xprtrdma/svc_rdma_transport.c
+++ b/net/sunrpc/xprtrdma/svc_rdma_transport.c
@@ -91,7 +91,7 @@ struct svc_xprt_class svc_rdma_class = {
.xcl_name = "rdma",
.xcl_owner = THIS_MODULE,
.xcl_ops = &svc_rdma_ops,
- .xcl_max_payload = RPCSVC_MAXPAYLOAD_TCP,
+ .xcl_max_payload = RPCSVC_MAXPAYLOAD_RDMA,
.xcl_ident = XPRT_TRANSPORT_RDMA,
};
diff --git a/net/sunrpc/xprtrdma/xprt_rdma.h b/net/sunrpc/xprtrdma/xprt_rdma.h
index c419498b8f46..ac7fc9a31342 100644
--- a/net/sunrpc/xprtrdma/xprt_rdma.h
+++ b/net/sunrpc/xprtrdma/xprt_rdma.h
@@ -51,6 +51,7 @@
#include <linux/sunrpc/clnt.h> /* rpc_xprt */
#include <linux/sunrpc/rpc_rdma.h> /* RPC/RDMA protocol */
#include <linux/sunrpc/xprtrdma.h> /* xprt parameters */
+#include <linux/sunrpc/svc.h> /* RPCSVC_MAXPAYLOAD */
#define RDMA_RESOLVE_TIMEOUT (5000) /* 5 seconds */
#define RDMA_CONNECT_RETRY_MAX (2) /* retries if no listener backlog */
@@ -392,4 +393,10 @@ extern struct kmem_cache *svc_rdma_ctxt_cachep;
/* Workqueue created in svc_rdma.c */
extern struct workqueue_struct *svc_rdma_wq;
+#if RPCSVC_MAXPAYLOAD < (RPCRDMA_MAX_DATA_SEGS << PAGE_SHIFT)
+#define RPCSVC_MAXPAYLOAD_RDMA RPCSVC_MAXPAYLOAD
+#else
+#define RPCSVC_MAXPAYLOAD_RDMA (RPCRDMA_MAX_DATA_SEGS << PAGE_SHIFT)
+#endif
+
#endif /* _LINUX_SUNRPC_XPRT_RDMA_H */