From feb8ca37cc3d83c07fd042509ef1e176cfeb2cfa Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Thu, 3 Dec 2009 08:10:17 -0500 Subject: SUNRPC: Ensure that we honour autoclose before attempting to reconnect If the XPRT_CLOSE_WAIT flag is set, we need to ensure that we call xprt->ops->close() while holding xprt_lock_write() before we can start reconnecting. Signed-off-by: Trond Myklebust --- net/sunrpc/xprt.c | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'net/sunrpc') diff --git a/net/sunrpc/xprt.c b/net/sunrpc/xprt.c index fd46d42afa89..469de292c23c 100644 --- a/net/sunrpc/xprt.c +++ b/net/sunrpc/xprt.c @@ -700,6 +700,10 @@ void xprt_connect(struct rpc_task *task) } if (!xprt_lock_write(xprt, task)) return; + + if (test_and_clear_bit(XPRT_CLOSE_WAIT, &xprt->state)) + xprt->ops->close(xprt); + if (xprt_connected(xprt)) xprt_release_write(xprt, task); else { -- cgit v1.2.3 From f0380f3d16df8f9e2fcd1d8c16fb0d94370bea99 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Thu, 3 Dec 2009 08:10:17 -0500 Subject: RPC: Fix two potential races in put_rpccred It is possible for rpcauth_destroy_credcache() to cause the rpc credentials to be unhashed while put_rpccred is waiting for the rpc_credcache_lock on another cpu. Should this happen, then we can end up calling hlist_del_rcu(&cred->cr_hash) a second time in put_rpccred, thus causing list corruption. Should the credential actually be hashed, it is also possible for rpcauth_lookup_credcache to find and reference it before we get round to unhashing it. In this case, the call to rpcauth_unhash_cred will fail, and so we should just exit without destroying the cred. Reported-by: Neil Brown Signed-off-by: Trond Myklebust --- net/sunrpc/auth.c | 39 +++++++++++++++++++++++---------------- 1 file changed, 23 insertions(+), 16 deletions(-) (limited to 'net/sunrpc') diff --git a/net/sunrpc/auth.c b/net/sunrpc/auth.c index 54a4e042f104..7ee6f7eaddfb 100644 --- a/net/sunrpc/auth.c +++ b/net/sunrpc/auth.c @@ -123,16 +123,19 @@ rpcauth_unhash_cred_locked(struct rpc_cred *cred) clear_bit(RPCAUTH_CRED_HASHED, &cred->cr_flags); } -static void +static int rpcauth_unhash_cred(struct rpc_cred *cred) { spinlock_t *cache_lock; + int ret; cache_lock = &cred->cr_auth->au_credcache->lock; spin_lock(cache_lock); - if (atomic_read(&cred->cr_count) == 0) + ret = atomic_read(&cred->cr_count) == 0; + if (ret) rpcauth_unhash_cred_locked(cred); spin_unlock(cache_lock); + return ret; } /* @@ -446,31 +449,35 @@ void put_rpccred(struct rpc_cred *cred) { /* Fast path for unhashed credentials */ - if (test_bit(RPCAUTH_CRED_HASHED, &cred->cr_flags) != 0) - goto need_lock; - - if (!atomic_dec_and_test(&cred->cr_count)) + if (test_bit(RPCAUTH_CRED_HASHED, &cred->cr_flags) == 0) { + if (atomic_dec_and_test(&cred->cr_count)) + cred->cr_ops->crdestroy(cred); return; - goto out_destroy; -need_lock: + } + if (!atomic_dec_and_lock(&cred->cr_count, &rpc_credcache_lock)) return; if (!list_empty(&cred->cr_lru)) { number_cred_unused--; list_del_init(&cred->cr_lru); } - if (test_bit(RPCAUTH_CRED_UPTODATE, &cred->cr_flags) == 0) - rpcauth_unhash_cred(cred); if (test_bit(RPCAUTH_CRED_HASHED, &cred->cr_flags) != 0) { - cred->cr_expire = jiffies; - list_add_tail(&cred->cr_lru, &cred_unused); - number_cred_unused++; - spin_unlock(&rpc_credcache_lock); - return; + if (test_bit(RPCAUTH_CRED_UPTODATE, &cred->cr_flags) != 0) { + cred->cr_expire = jiffies; + list_add_tail(&cred->cr_lru, &cred_unused); + number_cred_unused++; + goto out_nodestroy; + } + if (!rpcauth_unhash_cred(cred)) { + /* We were hashed and someone looked us up... */ + goto out_nodestroy; + } } spin_unlock(&rpc_credcache_lock); -out_destroy: cred->cr_ops->crdestroy(cred); + return; +out_nodestroy: + spin_unlock(&rpc_credcache_lock); } EXPORT_SYMBOL_GPL(put_rpccred); -- cgit v1.2.3 From dd1fd90fe65e2e642f0e58e2ff4849f317a6c43d Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Thu, 3 Dec 2009 15:58:56 -0500 Subject: SUNRPC: Display compressed (shorthand) IPv6 presentation addresses Recent changes to snprintf() introduced the %pI6c formatter, which can display an IPv6 address with standard shorthanding. Using a shorthanded address can save us a few bytes of memory for each stored presentation address, or a few bytes on the wire when sending these in a universal address. Signed-off-by: Chuck Lever Signed-off-by: Trond Myklebust --- net/sunrpc/addr.c | 10 +--------- 1 file changed, 1 insertion(+), 9 deletions(-) (limited to 'net/sunrpc') diff --git a/net/sunrpc/addr.c b/net/sunrpc/addr.c index c7450c8f0a7c..6dcdd2517819 100644 --- a/net/sunrpc/addr.c +++ b/net/sunrpc/addr.c @@ -55,16 +55,8 @@ static size_t rpc_ntop6_noscopeid(const struct sockaddr *sap, /* * RFC 4291, Section 2.2.1 - * - * To keep the result as short as possible, especially - * since we don't shorthand, we don't want leading zeros - * in each halfword, so avoid %pI6. */ - return snprintf(buf, buflen, "%x:%x:%x:%x:%x:%x:%x:%x", - ntohs(addr->s6_addr16[0]), ntohs(addr->s6_addr16[1]), - ntohs(addr->s6_addr16[2]), ntohs(addr->s6_addr16[3]), - ntohs(addr->s6_addr16[4]), ntohs(addr->s6_addr16[5]), - ntohs(addr->s6_addr16[6]), ntohs(addr->s6_addr16[7])); + return snprintf(buf, buflen, "%pI6c", addr); } static size_t rpc_ntop6(const struct sockaddr *sap, -- cgit v1.2.3 From 206a134b4d8abf57cd34dffacf993869355b9aac Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Thu, 3 Dec 2009 15:58:56 -0500 Subject: SUNRPC: Check explicitly for tk_status == 0 in call_transmit_status() The success case, where task->tk_status == 0, is by far the most frequent case in call_transmit_status(). The default: arm of the switch statement in call_transmit_status() handles the 0 case. default: was moved close to the top of the switch statement in call_transmit_status() under the theory that the compiler places object code for the earliest arms of a switch statement first, making the CPU do less work. The default: arm of a switch statement, however, is executed only after all the other cases have been checked. Even if the compiler rearranges the object code, the default: arm is the "last resort", meaning all of the other cases have been explicitly exhausted. That makes the current arrangement about as inefficient as it gets for the common case. To fix this, add an explicit check for zero before the switch statement. That forces the compiler to do the zero check first, no matter what optimizations it might try to do to the switch statement. Signed-off-by: Chuck Lever Signed-off-by: Trond Myklebust --- net/sunrpc/clnt.c | 12 ++++++++++++ 1 file changed, 12 insertions(+) (limited to 'net/sunrpc') diff --git a/net/sunrpc/clnt.c b/net/sunrpc/clnt.c index 38829e20500b..7bcd931e06ee 100644 --- a/net/sunrpc/clnt.c +++ b/net/sunrpc/clnt.c @@ -1180,10 +1180,22 @@ static void call_transmit_status(struct rpc_task *task) { task->tk_action = call_status; + + /* + * Common case: success. Force the compiler to put this + * test first. + */ + if (task->tk_status == 0) { + xprt_end_transmit(task); + rpc_task_force_reencode(task); + return; + } + switch (task->tk_status) { case -EAGAIN: break; default: + dprint_status(task); xprt_end_transmit(task); /* * Special cases: if we've been waiting on the -- cgit v1.2.3 From 09a21c4102c8f7893368553273d39c0cadedf9af Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Thu, 3 Dec 2009 15:58:56 -0500 Subject: SUNRPC: Allow RPCs to fail quickly if the server is unreachable The kernel sometimes makes RPC calls to services that aren't running. Because the kernel's RPC client always assumes the hard retry semantic when reconnecting a connection-oriented RPC transport, the underlying reconnect logic takes a long while to time out, even though the remote may have responded immediately with ECONNREFUSED. In certain cases, like upcalls to our local rpcbind daemon, or for NFS mount requests, we'd like the kernel to fail immediately if the remote service isn't reachable. This allows another transport to be tried immediately, or the pending request can be abandoned quickly. Introduce a per-request flag which controls how call_transmit_status() behaves when request transmission fails because the server cannot be reached. We don't want soft connection semantics to apply to other errors. The default case of the switch statement in call_transmit_status() no longer falls through; the fall through code is copied to the default case, and a "break;" is added. The transport's connection re-establishment timeout is also ignored for such requests. We want the request to fail immediately, so the reconnect delay is skipped. Additionally, we don't want a connect failure here to further increase the reconnect timeout value, since this request will not be retried. Signed-off-by: Chuck Lever Signed-off-by: Trond Myklebust --- include/linux/sunrpc/sched.h | 2 ++ net/sunrpc/clnt.c | 11 +++++++++-- net/sunrpc/xprtsock.c | 2 +- 3 files changed, 12 insertions(+), 3 deletions(-) (limited to 'net/sunrpc') diff --git a/include/linux/sunrpc/sched.h b/include/linux/sunrpc/sched.h index 401097781fc0..1906782ec86b 100644 --- a/include/linux/sunrpc/sched.h +++ b/include/linux/sunrpc/sched.h @@ -130,12 +130,14 @@ struct rpc_task_setup { #define RPC_TASK_DYNAMIC 0x0080 /* task was kmalloc'ed */ #define RPC_TASK_KILLED 0x0100 /* task was killed */ #define RPC_TASK_SOFT 0x0200 /* Use soft timeouts */ +#define RPC_TASK_SOFTCONN 0x0400 /* Fail if can't connect */ #define RPC_IS_ASYNC(t) ((t)->tk_flags & RPC_TASK_ASYNC) #define RPC_IS_SWAPPER(t) ((t)->tk_flags & RPC_TASK_SWAPPER) #define RPC_DO_ROOTOVERRIDE(t) ((t)->tk_flags & RPC_TASK_ROOTCREDS) #define RPC_ASSASSINATED(t) ((t)->tk_flags & RPC_TASK_KILLED) #define RPC_IS_SOFT(t) ((t)->tk_flags & RPC_TASK_SOFT) +#define RPC_IS_SOFTCONN(t) ((t)->tk_flags & RPC_TASK_SOFTCONN) #define RPC_TASK_RUNNING 0 #define RPC_TASK_QUEUED 1 diff --git a/net/sunrpc/clnt.c b/net/sunrpc/clnt.c index 7bcd931e06ee..68a23583f44c 100644 --- a/net/sunrpc/clnt.c +++ b/net/sunrpc/clnt.c @@ -1197,6 +1197,8 @@ call_transmit_status(struct rpc_task *task) default: dprint_status(task); xprt_end_transmit(task); + rpc_task_force_reencode(task); + break; /* * Special cases: if we've been waiting on the * socket's write_space() callback, or if the @@ -1204,11 +1206,16 @@ call_transmit_status(struct rpc_task *task) * then hold onto the transport lock. */ case -ECONNREFUSED: - case -ECONNRESET: - case -ENOTCONN: case -EHOSTDOWN: case -EHOSTUNREACH: case -ENETUNREACH: + if (RPC_IS_SOFTCONN(task)) { + xprt_end_transmit(task); + rpc_exit(task, task->tk_status); + break; + } + case -ECONNRESET: + case -ENOTCONN: case -EPIPE: rpc_task_force_reencode(task); } diff --git a/net/sunrpc/xprtsock.c b/net/sunrpc/xprtsock.c index 37c5475ba258..ff312f8b018d 100644 --- a/net/sunrpc/xprtsock.c +++ b/net/sunrpc/xprtsock.c @@ -2033,7 +2033,7 @@ static void xs_connect(struct rpc_task *task) if (xprt_test_and_set_connecting(xprt)) return; - if (transport->sock != NULL) { + if (transport->sock != NULL && !RPC_IS_SOFTCONN(task)) { dprintk("RPC: xs_connect delayed xprt %p for %lu " "seconds\n", xprt, xprt->reestablish_timeout / HZ); -- cgit v1.2.3 From 5a46211540a83871196489247f57da2bdde58d87 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Thu, 3 Dec 2009 15:58:56 -0500 Subject: SUNRPC: Simplify synopsis of rpcb_local_clnt() Clean up: At one point, rpcb_local_clnt() handled IPv6 loopback addresses too, but it doesn't any more; only IPv4 loopback is used now. Get rid of the @addr and @addrlen arguments to rpcb_local_clnt(). Signed-off-by: Chuck Lever Signed-off-by: Trond Myklebust --- net/sunrpc/rpcb_clnt.c | 11 ++++------- 1 file changed, 4 insertions(+), 7 deletions(-) (limited to 'net/sunrpc') diff --git a/net/sunrpc/rpcb_clnt.c b/net/sunrpc/rpcb_clnt.c index 830faf4d9997..28f50da60ceb 100644 --- a/net/sunrpc/rpcb_clnt.c +++ b/net/sunrpc/rpcb_clnt.c @@ -163,13 +163,12 @@ static const struct sockaddr_in rpcb_inaddr_loopback = { .sin_port = htons(RPCBIND_PORT), }; -static struct rpc_clnt *rpcb_create_local(struct sockaddr *addr, - size_t addrlen, u32 version) +static struct rpc_clnt *rpcb_create_local(u32 version) { struct rpc_create_args args = { .protocol = XPRT_TRANSPORT_UDP, - .address = addr, - .addrsize = addrlen, + .address = (struct sockaddr *)&rpcb_inaddr_loopback, + .addrsize = sizeof(rpcb_inaddr_loopback), .servername = "localhost", .program = &rpcb_program, .version = version, @@ -211,14 +210,12 @@ static struct rpc_clnt *rpcb_create(char *hostname, struct sockaddr *srvaddr, static int rpcb_register_call(const u32 version, struct rpc_message *msg) { - struct sockaddr *addr = (struct sockaddr *)&rpcb_inaddr_loopback; - size_t addrlen = sizeof(rpcb_inaddr_loopback); struct rpc_clnt *rpcb_clnt; int result, error = 0; msg->rpc_resp = &result; - rpcb_clnt = rpcb_create_local(addr, addrlen, version); + rpcb_clnt = rpcb_create_local(version); if (!IS_ERR(rpcb_clnt)) { error = rpc_call_sync(rpcb_clnt, msg, 0); rpc_shutdown_client(rpcb_clnt); -- cgit v1.2.3 From c526611dd631b2802b6b0221ffb306c5fa25c86c Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Thu, 3 Dec 2009 15:58:56 -0500 Subject: SUNRPC: Use a cached RPC client and transport for rpcbind upcalls The kernel's rpcbind client creates and deletes an rpc_clnt and its underlying transport socket for every upcall to the local rpcbind daemon. When starting a typical NFS server on IPv4 and IPv6, the NFS service itself does three upcalls (one per version) times two upcalls (one per transport) times two upcalls (one per address family), making 12, plus another one for the initial call to unregister previous NFS services. Starting the NLM service adds an additional 13 upcalls, for similar reasons. (Currently the NFS service doesn't start IPv6 listeners, but it will soon enough). Instead, let's create an rpc_clnt for rpcbind upcalls during the first local rpcbind query, and cache it. This saves the overhead of creating and destroying an rpc_clnt and a socket for every upcall. The new logic also prevents the kernel from attempting an RPCB_SET or RPCB_UNSET if it knows from the start that the local portmapper does not support rpcbind protocol version 4. This will cut down on the number of rpcbind upcalls in legacy environments. Signed-off-by: Chuck Lever --- net/sunrpc/rpcb_clnt.c | 93 +++++++++++++++++++++++++++++++++++++++--------- net/sunrpc/sunrpc_syms.c | 3 ++ 2 files changed, 80 insertions(+), 16 deletions(-) (limited to 'net/sunrpc') diff --git a/net/sunrpc/rpcb_clnt.c b/net/sunrpc/rpcb_clnt.c index 28f50da60ceb..116db74dd942 100644 --- a/net/sunrpc/rpcb_clnt.c +++ b/net/sunrpc/rpcb_clnt.c @@ -20,6 +20,7 @@ #include #include #include +#include #include #include @@ -110,6 +111,9 @@ static void rpcb_getport_done(struct rpc_task *, void *); static void rpcb_map_release(void *data); static struct rpc_program rpcb_program; +static struct rpc_clnt * rpcb_local_clnt; +static struct rpc_clnt * rpcb_local_clnt4; + struct rpcbind_args { struct rpc_xprt * r_xprt; @@ -163,7 +167,13 @@ static const struct sockaddr_in rpcb_inaddr_loopback = { .sin_port = htons(RPCBIND_PORT), }; -static struct rpc_clnt *rpcb_create_local(u32 version) +static DEFINE_MUTEX(rpcb_create_local_mutex); + +/* + * Returns zero on success, otherwise a negative errno value + * is returned. + */ +static int rpcb_create_local(void) { struct rpc_create_args args = { .protocol = XPRT_TRANSPORT_UDP, @@ -171,12 +181,46 @@ static struct rpc_clnt *rpcb_create_local(u32 version) .addrsize = sizeof(rpcb_inaddr_loopback), .servername = "localhost", .program = &rpcb_program, - .version = version, + .version = RPCBVERS_2, .authflavor = RPC_AUTH_UNIX, .flags = RPC_CLNT_CREATE_NOPING, }; + struct rpc_clnt *clnt, *clnt4; + int result = 0; + + if (rpcb_local_clnt) + return result; + + mutex_lock(&rpcb_create_local_mutex); + if (rpcb_local_clnt) + goto out; + + clnt = rpc_create(&args); + if (IS_ERR(clnt)) { + dprintk("RPC: failed to create local rpcbind " + "client (errno %ld).\n", PTR_ERR(clnt)); + result = -PTR_ERR(clnt); + goto out; + } - return rpc_create(&args); + /* + * This results in an RPC ping. On systems running portmapper, + * the v4 ping will fail. Proceed anyway, but disallow rpcb + * v4 upcalls. + */ + clnt4 = rpc_bind_new_program(clnt, &rpcb_program, RPCBVERS_4); + if (IS_ERR(clnt4)) { + dprintk("RPC: failed to create local rpcbind v4 " + "cleint (errno %ld).\n", PTR_ERR(clnt4)); + clnt4 = NULL; + } + + rpcb_local_clnt = clnt; + rpcb_local_clnt4 = clnt4; + +out: + mutex_unlock(&rpcb_create_local_mutex); + return result; } static struct rpc_clnt *rpcb_create(char *hostname, struct sockaddr *srvaddr, @@ -208,20 +252,13 @@ static struct rpc_clnt *rpcb_create(char *hostname, struct sockaddr *srvaddr, return rpc_create(&args); } -static int rpcb_register_call(const u32 version, struct rpc_message *msg) +static int rpcb_register_call(struct rpc_clnt *clnt, struct rpc_message *msg) { - struct rpc_clnt *rpcb_clnt; int result, error = 0; msg->rpc_resp = &result; - rpcb_clnt = rpcb_create_local(version); - if (!IS_ERR(rpcb_clnt)) { - error = rpc_call_sync(rpcb_clnt, msg, 0); - rpc_shutdown_client(rpcb_clnt); - } else - error = PTR_ERR(rpcb_clnt); - + error = rpc_call_sync(clnt, msg, 0); if (error < 0) { dprintk("RPC: failed to contact local rpcbind " "server (errno %d).\n", -error); @@ -276,6 +313,11 @@ int rpcb_register(u32 prog, u32 vers, int prot, unsigned short port) struct rpc_message msg = { .rpc_argp = &map, }; + int error; + + error = rpcb_create_local(); + if (error) + return error; dprintk("RPC: %sregistering (%u, %u, %d, %u) with local " "rpcbind\n", (port ? "" : "un"), @@ -285,7 +327,7 @@ int rpcb_register(u32 prog, u32 vers, int prot, unsigned short port) if (port) msg.rpc_proc = &rpcb_procedures2[RPCBPROC_SET]; - return rpcb_register_call(RPCBVERS_2, &msg); + return rpcb_register_call(rpcb_local_clnt, &msg); } /* @@ -310,7 +352,7 @@ static int rpcb_register_inet4(const struct sockaddr *sap, if (port) msg->rpc_proc = &rpcb_procedures4[RPCBPROC_SET]; - result = rpcb_register_call(RPCBVERS_4, msg); + result = rpcb_register_call(rpcb_local_clnt4, msg); kfree(map->r_addr); return result; } @@ -337,7 +379,7 @@ static int rpcb_register_inet6(const struct sockaddr *sap, if (port) msg->rpc_proc = &rpcb_procedures4[RPCBPROC_SET]; - result = rpcb_register_call(RPCBVERS_4, msg); + result = rpcb_register_call(rpcb_local_clnt4, msg); kfree(map->r_addr); return result; } @@ -353,7 +395,7 @@ static int rpcb_unregister_all_protofamilies(struct rpc_message *msg) map->r_addr = ""; msg->rpc_proc = &rpcb_procedures4[RPCBPROC_UNSET]; - return rpcb_register_call(RPCBVERS_4, msg); + return rpcb_register_call(rpcb_local_clnt4, msg); } /** @@ -411,6 +453,13 @@ int rpcb_v4_register(const u32 program, const u32 version, struct rpc_message msg = { .rpc_argp = &map, }; + int error; + + error = rpcb_create_local(); + if (error) + return error; + if (rpcb_local_clnt4 == NULL) + return -EPROTONOSUPPORT; if (address == NULL) return rpcb_unregister_all_protofamilies(&msg); @@ -1024,3 +1073,15 @@ static struct rpc_program rpcb_program = { .version = rpcb_version, .stats = &rpcb_stats, }; + +/** + * cleanup_rpcb_clnt - remove xprtsock's sysctls, unregister + * + */ +void cleanup_rpcb_clnt(void) +{ + if (rpcb_local_clnt4) + rpc_shutdown_client(rpcb_local_clnt4); + if (rpcb_local_clnt) + rpc_shutdown_client(rpcb_local_clnt); +} diff --git a/net/sunrpc/sunrpc_syms.c b/net/sunrpc/sunrpc_syms.c index 8cce92189019..f438347d817b 100644 --- a/net/sunrpc/sunrpc_syms.c +++ b/net/sunrpc/sunrpc_syms.c @@ -24,6 +24,8 @@ extern struct cache_detail ip_map_cache, unix_gid_cache; +extern void cleanup_rpcb_clnt(void); + static int __init init_sunrpc(void) { @@ -53,6 +55,7 @@ out: static void __exit cleanup_sunrpc(void) { + cleanup_rpcb_clnt(); rpcauth_remove_module(); cleanup_socket_xprt(); svc_cleanup_xprt_sock(); -- cgit v1.2.3 From 2a76b3bfa22993fc09166bd6a8db0dbe902b6813 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Thu, 3 Dec 2009 15:58:56 -0500 Subject: SUNRPC: Use TCP for local rpcbind upcalls Use TCP with the soft connect semantic for local rpcbind upcalls so the kernel can detect immediately if the local rpcbind daemon is not running. Signed-off-by: Chuck Lever --- net/sunrpc/rpcb_clnt.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'net/sunrpc') diff --git a/net/sunrpc/rpcb_clnt.c b/net/sunrpc/rpcb_clnt.c index 116db74dd942..401154094ee1 100644 --- a/net/sunrpc/rpcb_clnt.c +++ b/net/sunrpc/rpcb_clnt.c @@ -176,7 +176,7 @@ static DEFINE_MUTEX(rpcb_create_local_mutex); static int rpcb_create_local(void) { struct rpc_create_args args = { - .protocol = XPRT_TRANSPORT_UDP, + .protocol = XPRT_TRANSPORT_TCP, .address = (struct sockaddr *)&rpcb_inaddr_loopback, .addrsize = sizeof(rpcb_inaddr_loopback), .servername = "localhost", @@ -258,7 +258,7 @@ static int rpcb_register_call(struct rpc_clnt *clnt, struct rpc_message *msg) msg->rpc_resp = &result; - error = rpc_call_sync(clnt, msg, 0); + error = rpc_call_sync(clnt, msg, RPC_TASK_SOFTCONN); if (error < 0) { dprintk("RPC: failed to contact local rpcbind " "server (errno %d).\n", -error); -- cgit v1.2.3 From 012da158f636347c4eb28fd1e1cca020fef5e54d Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Thu, 3 Dec 2009 15:58:56 -0500 Subject: SUNRPC: Use soft connects for autobinding over TCP Autobinding is handled by the rpciod process, not in user processes that are generating regular RPC requests. Thus autobinding is usually not affected by signals targetting user processes, such as KILL or timer expiration events. In addition, an RPC request generated by a user process that has RPC_TASK_SOFTCONN set and needs to perform an autobind will hang if the remote rpcbind service is not available. For rpcbind queries on connection-oriented transports, let's use the new soft connect semantic to return control to the user's process quickly, if the kernel's rpcbind client can't connect to the remote rpcbind service. Logic is introduced in call_bind_status() to handle connection errors that occurred during an asynchronous rpcbind query. The logic abandons the rpcbind query if the RPC request has SOFTCONN set, and retries after a few seconds in the normal case. Signed-off-by: Chuck Lever Signed-off-by: Trond Myklebust --- net/sunrpc/clnt.c | 17 ++++++++++++++++- net/sunrpc/rpcb_clnt.c | 2 +- 2 files changed, 17 insertions(+), 2 deletions(-) (limited to 'net/sunrpc') diff --git a/net/sunrpc/clnt.c b/net/sunrpc/clnt.c index 68a23583f44c..4b76ef9336c7 100644 --- a/net/sunrpc/clnt.c +++ b/net/sunrpc/clnt.c @@ -1060,7 +1060,7 @@ call_bind_status(struct rpc_task *task) goto retry_timeout; case -EPFNOSUPPORT: /* server doesn't support any rpcbind version we know of */ - dprintk("RPC: %5u remote rpcbind service unavailable\n", + dprintk("RPC: %5u unrecognized remote rpcbind service\n", task->tk_pid); break; case -EPROTONOSUPPORT: @@ -1069,6 +1069,21 @@ call_bind_status(struct rpc_task *task) task->tk_status = 0; task->tk_action = call_bind; return; + case -ECONNREFUSED: /* connection problems */ + case -ECONNRESET: + case -ENOTCONN: + case -EHOSTDOWN: + case -EHOSTUNREACH: + case -ENETUNREACH: + case -EPIPE: + dprintk("RPC: %5u remote rpcbind unreachable: %d\n", + task->tk_pid, task->tk_status); + if (!RPC_IS_SOFTCONN(task)) { + rpc_delay(task, 5*HZ); + goto retry_timeout; + } + status = task->tk_status; + break; default: dprintk("RPC: %5u unrecognized rpcbind error (%d)\n", task->tk_pid, -task->tk_status); diff --git a/net/sunrpc/rpcb_clnt.c b/net/sunrpc/rpcb_clnt.c index 401154094ee1..3e3772d8eb92 100644 --- a/net/sunrpc/rpcb_clnt.c +++ b/net/sunrpc/rpcb_clnt.c @@ -537,7 +537,7 @@ static struct rpc_task *rpcb_call_async(struct rpc_clnt *rpcb_clnt, struct rpcbi .rpc_message = &msg, .callback_ops = &rpcb_getport_ops, .callback_data = map, - .flags = RPC_TASK_ASYNC, + .flags = RPC_TASK_ASYNC | RPC_TASK_SOFTCONN, }; return rpc_run_task(&task_setup_data); -- cgit v1.2.3 From caabea8a565fb4e16f8e58e16bb9d535e591b709 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Thu, 3 Dec 2009 15:58:56 -0500 Subject: SUNRPC: Use soft connect semantics when performing RPC ping Currently, if a remote RPC service is unreachable, an RPC ping will hang until the underlying transport connect attempt times out. A more desirable behavior might be to have the ping fail immediately so upper layers can recover appropriately. In the case of an NFS mount, for instance, this would mean the mount(2) system call could fail immediately if the server isn't listening, rather than hanging uninterruptibly for more than 3 minutes. Change rpc_ping() so that it fails immediately for connection-oriented transports. rpc_create() will then fail immediately for such transports if an RPC ping was requested. Signed-off-by: Chuck Lever Signed-off-by: Trond Myklebust --- net/sunrpc/clnt.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) (limited to 'net/sunrpc') diff --git a/net/sunrpc/clnt.c b/net/sunrpc/clnt.c index 4b76ef9336c7..97931d9f8579 100644 --- a/net/sunrpc/clnt.c +++ b/net/sunrpc/clnt.c @@ -79,7 +79,7 @@ static void call_connect_status(struct rpc_task *task); static __be32 *rpc_encode_header(struct rpc_task *task); static __be32 *rpc_verify_header(struct rpc_task *task); -static int rpc_ping(struct rpc_clnt *clnt, int flags); +static int rpc_ping(struct rpc_clnt *clnt); static void rpc_register_client(struct rpc_clnt *clnt) { @@ -340,7 +340,7 @@ struct rpc_clnt *rpc_create(struct rpc_create_args *args) return clnt; if (!(args->flags & RPC_CLNT_CREATE_NOPING)) { - int err = rpc_ping(clnt, RPC_TASK_SOFT); + int err = rpc_ping(clnt); if (err != 0) { rpc_shutdown_client(clnt); return ERR_PTR(err); @@ -528,7 +528,7 @@ struct rpc_clnt *rpc_bind_new_program(struct rpc_clnt *old, clnt->cl_prog = program->number; clnt->cl_vers = version->number; clnt->cl_stats = program->stats; - err = rpc_ping(clnt, RPC_TASK_SOFT); + err = rpc_ping(clnt); if (err != 0) { rpc_shutdown_client(clnt); clnt = ERR_PTR(err); @@ -1709,14 +1709,14 @@ static struct rpc_procinfo rpcproc_null = { .p_decode = rpcproc_decode_null, }; -static int rpc_ping(struct rpc_clnt *clnt, int flags) +static int rpc_ping(struct rpc_clnt *clnt) { struct rpc_message msg = { .rpc_proc = &rpcproc_null, }; int err; msg.rpc_cred = authnull_ops.lookup_cred(NULL, NULL, 0); - err = rpc_call_sync(clnt, &msg, flags); + err = rpc_call_sync(clnt, &msg, RPC_TASK_SOFT | RPC_TASK_SOFTCONN); put_rpccred(msg.rpc_cred); return err; } -- cgit v1.2.3 From 3a28becc35e5c8f1fabb707bcd8a473712653de6 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Thu, 3 Dec 2009 15:58:56 -0500 Subject: SUNRPC: soft connect semantics for UDP Introduce soft connect behavior for UDP transports. In this case, a major timeout returns ETIMEDOUT instead of EIO. Signed-off-by: Chuck Lever Signed-off-by: Trond Myklebust --- net/sunrpc/clnt.c | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'net/sunrpc') diff --git a/net/sunrpc/clnt.c b/net/sunrpc/clnt.c index 97931d9f8579..154034b675bd 100644 --- a/net/sunrpc/clnt.c +++ b/net/sunrpc/clnt.c @@ -1380,6 +1380,10 @@ call_timeout(struct rpc_task *task) dprintk("RPC: %5u call_timeout (major)\n", task->tk_pid); task->tk_timeouts++; + if (RPC_IS_SOFTCONN(task)) { + rpc_exit(task, -ETIMEDOUT); + return; + } if (RPC_IS_SOFT(task)) { if (clnt->cl_chatty) printk(KERN_NOTICE "%s: server %s not responding, timed out\n", -- cgit v1.2.3 From 480e3243df156e39eea6c91057e2ae612a6bbe19 Mon Sep 17 00:00:00 2001 From: Roel Kluin Date: Tue, 8 Dec 2009 13:13:03 -0500 Subject: SUNRPC: IS_ERR/PTR_ERR confusion IS_ERR returns 1 or 0, PTR_ERR returns the error value. Signed-off-by: Roel Kluin Cc: stable@kernel.org Signed-off-by: Trond Myklebust --- net/sunrpc/auth_gss/auth_gss.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net/sunrpc') diff --git a/net/sunrpc/auth_gss/auth_gss.c b/net/sunrpc/auth_gss/auth_gss.c index fc6a43ccd950..129d75ea25d2 100644 --- a/net/sunrpc/auth_gss/auth_gss.c +++ b/net/sunrpc/auth_gss/auth_gss.c @@ -485,7 +485,7 @@ gss_refresh_upcall(struct rpc_task *task) dprintk("RPC: %5u gss_refresh_upcall for uid %u\n", task->tk_pid, cred->cr_uid); gss_msg = gss_setup_upcall(task->tk_client, gss_auth, cred); - if (IS_ERR(gss_msg) == -EAGAIN) { + if (PTR_ERR(gss_msg) == -EAGAIN) { /* XXX: warning on the first, under the assumption we * shouldn't normally hit this case on a refresh. */ warn_gssd(); -- cgit v1.2.3 From 053e324f67b9921fe7de0c4cbc720d29cb4bf207 Mon Sep 17 00:00:00 2001 From: Suresh Jayaraman Date: Wed, 9 Dec 2009 23:15:22 +0530 Subject: rpc: remove unneeded function parameter in gss_add_msg() The pointer to struct gss_auth parameter in gss_add_msg is not really needed after commit 5b7ddd4a. Zap it. Signed-off-by: Suresh Jayaraman Signed-off-by: Trond Myklebust --- net/sunrpc/auth_gss/auth_gss.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'net/sunrpc') diff --git a/net/sunrpc/auth_gss/auth_gss.c b/net/sunrpc/auth_gss/auth_gss.c index 129d75ea25d2..3c3c50f38a1c 100644 --- a/net/sunrpc/auth_gss/auth_gss.c +++ b/net/sunrpc/auth_gss/auth_gss.c @@ -304,7 +304,7 @@ __gss_find_upcall(struct rpc_inode *rpci, uid_t uid) * to that upcall instead of adding the new upcall. */ static inline struct gss_upcall_msg * -gss_add_msg(struct gss_auth *gss_auth, struct gss_upcall_msg *gss_msg) +gss_add_msg(struct gss_upcall_msg *gss_msg) { struct rpc_inode *rpci = gss_msg->inode; struct inode *inode = &rpci->vfs_inode; @@ -445,7 +445,7 @@ gss_setup_upcall(struct rpc_clnt *clnt, struct gss_auth *gss_auth, struct rpc_cr gss_new = gss_alloc_msg(gss_auth, uid, clnt, gss_cred->gc_machine_cred); if (IS_ERR(gss_new)) return gss_new; - gss_msg = gss_add_msg(gss_auth, gss_new); + gss_msg = gss_add_msg(gss_new); if (gss_msg == gss_new) { struct inode *inode = &gss_new->inode->vfs_inode; int res = rpc_queue_upcall(inode, &gss_new->msg); -- cgit v1.2.3