From 2c6434888cef9e5f450d6c5b7df6d8c625ed27c1 Mon Sep 17 00:00:00 2001 From: Jeff Layton Date: Thu, 7 Jan 2010 09:42:03 -0500 Subject: nfs4: handle -EKEYEXPIRED errors from RPC layer If a KRB5 TGT ticket expires, we don't want to return an error immediatel. If someone has a long running job and just forgets to run "kinit" in time then this will make it fail. Instead, we want to treat this situation as we would NFS4ERR_DELAY and retry the upcall after delaying a bit with an exponential backoff. This patch just makes any place that would handle NFS4ERR_DELAY also handle -EKEYEXPIRED the same way. In the future, we may want to be more sophisticated however and handle hard vs. soft mounts differently, or specify some upper limit on how long we'll wait for a new TGT to be acquired. Signed-off-by: Jeff Layton Signed-off-by: Trond Myklebust --- fs/nfs/nfs4proc.c | 11 +++++++++-- fs/nfs/nfs4state.c | 1 + 2 files changed, 10 insertions(+), 2 deletions(-) (limited to 'fs') diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index 375f0fae2c6a..8d0c3a977c36 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -281,6 +281,7 @@ static int nfs4_handle_exception(const struct nfs_server *server, int errorcode, } case -NFS4ERR_GRACE: case -NFS4ERR_DELAY: + case -EKEYEXPIRED: ret = nfs4_delay(server->client, &exception->timeout); if (ret != 0) break; @@ -1163,7 +1164,7 @@ static int nfs4_do_open_reclaim(struct nfs_open_context *ctx, struct nfs4_state int err; do { err = _nfs4_do_open_reclaim(ctx, state); - if (err != -NFS4ERR_DELAY) + if (err != -NFS4ERR_DELAY && err != -EKEYEXPIRED) break; nfs4_handle_exception(server, err, &exception); } while (exception.retry); @@ -1582,6 +1583,7 @@ static int nfs4_do_open_expired(struct nfs_open_context *ctx, struct nfs4_state goto out; case -NFS4ERR_GRACE: case -NFS4ERR_DELAY: + case -EKEYEXPIRED: nfs4_handle_exception(server, err, &exception); err = 0; } @@ -3452,6 +3454,7 @@ _nfs4_async_handle_error(struct rpc_task *task, const struct nfs_server *server, if (server) nfs_inc_server_stats(server, NFSIOS_DELAY); case -NFS4ERR_GRACE: + case -EKEYEXPIRED: rpc_delay(task, NFS4_POLL_RETRY_MAX); task->tk_status = 0; return -EAGAIN; @@ -3564,6 +3567,7 @@ int nfs4_proc_setclientid_confirm(struct nfs_client *clp, struct rpc_cred *cred) case -NFS4ERR_RESOURCE: /* The IBM lawyers misread another document! */ case -NFS4ERR_DELAY: + case -EKEYEXPIRED: err = nfs4_delay(clp->cl_rpcclient, &timeout); } } while (err == 0); @@ -4179,7 +4183,7 @@ static int nfs4_lock_reclaim(struct nfs4_state *state, struct file_lock *request if (test_bit(NFS_DELEGATED_STATE, &state->flags) != 0) return 0; err = _nfs4_do_setlk(state, F_SETLK, request, NFS_LOCK_RECLAIM); - if (err != -NFS4ERR_DELAY) + if (err != -NFS4ERR_DELAY && err != -EKEYEXPIRED) break; nfs4_handle_exception(server, err, &exception); } while (exception.retry); @@ -4204,6 +4208,7 @@ static int nfs4_lock_expired(struct nfs4_state *state, struct file_lock *request goto out; case -NFS4ERR_GRACE: case -NFS4ERR_DELAY: + case -EKEYEXPIRED: nfs4_handle_exception(server, err, &exception); err = 0; } @@ -4355,6 +4360,7 @@ int nfs4_lock_delegation_recall(struct nfs4_state *state, struct file_lock *fl) err = 0; goto out; case -NFS4ERR_DELAY: + case -EKEYEXPIRED: break; } err = nfs4_handle_exception(server, err, &exception); @@ -4554,6 +4560,7 @@ static void nfs4_get_lease_time_done(struct rpc_task *task, void *calldata) switch (task->tk_status) { case -NFS4ERR_DELAY: case -NFS4ERR_GRACE: + case -EKEYEXPIRED: dprintk("%s Retry: tk_status %d\n", __func__, task->tk_status); rpc_delay(task, NFS4_POLL_RETRY_MIN); task->tk_status = 0; diff --git a/fs/nfs/nfs4state.c b/fs/nfs/nfs4state.c index c1e2733f4fa4..8406cacd3240 100644 --- a/fs/nfs/nfs4state.c +++ b/fs/nfs/nfs4state.c @@ -1314,6 +1314,7 @@ static void nfs4_set_lease_expired(struct nfs_client *clp, int status) case -NFS4ERR_DELAY: case -NFS4ERR_CLID_INUSE: case -EAGAIN: + case -EKEYEXPIRED: break; case -NFS4ERR_NOT_SAME: /* FixMe: implement recovery -- cgit v1.2.3 From b68d69b8c6d19f4c2174f26fe8b750a0e82eb732 Mon Sep 17 00:00:00 2001 From: Jeff Layton Date: Thu, 7 Jan 2010 09:42:04 -0500 Subject: nfs: handle NFSv3 -EKEYEXPIRED errors as we would -EJUKEBOX We're using -EKEYEXPIRED to indicate that a krb5 credcache contains an expired ticket and that we should have the NFS layer retry the RPC call instead of returning an error back to the caller. Handle this as we would an -EJUKEBOX error return. Signed-off-by: Jeff Layton Signed-off-by: Trond Myklebust --- fs/nfs/nfs3proc.c | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) (limited to 'fs') diff --git a/fs/nfs/nfs3proc.c b/fs/nfs/nfs3proc.c index 3f8881d1a050..24992f0a29f2 100644 --- a/fs/nfs/nfs3proc.c +++ b/fs/nfs/nfs3proc.c @@ -22,14 +22,14 @@ #define NFSDBG_FACILITY NFSDBG_PROC -/* A wrapper to handle the EJUKEBOX error message */ +/* A wrapper to handle the EJUKEBOX and EKEYEXPIRED error messages */ static int nfs3_rpc_wrapper(struct rpc_clnt *clnt, struct rpc_message *msg, int flags) { int res; do { res = rpc_call_sync(clnt, msg, flags); - if (res != -EJUKEBOX) + if (res != -EJUKEBOX && res != -EKEYEXPIRED) break; schedule_timeout_killable(NFS_JUKEBOX_RETRY_TIME); res = -ERESTARTSYS; @@ -42,9 +42,10 @@ nfs3_rpc_wrapper(struct rpc_clnt *clnt, struct rpc_message *msg, int flags) static int nfs3_async_handle_jukebox(struct rpc_task *task, struct inode *inode) { - if (task->tk_status != -EJUKEBOX) + if (task->tk_status != -EJUKEBOX && task->tk_status != -EKEYEXPIRED) return 0; - nfs_inc_stats(inode, NFSIOS_DELAY); + if (task->tk_status == -EJUKEBOX) + nfs_inc_stats(inode, NFSIOS_DELAY); task->tk_status = 0; rpc_restart_call(task); rpc_delay(task, NFS_JUKEBOX_RETRY_TIME); -- cgit v1.2.3 From 97cefcc6d0aa6b4fc9ba67eb1ef4cc9e25f826f2 Mon Sep 17 00:00:00 2001 From: Jeff Layton Date: Fri, 8 Jan 2010 12:17:21 -0500 Subject: nfs: handle NFSv2 -EKEYEXPIRED returns from RPC layer appropriately Add a wrapper around rpc_call_sync that handles -EKEYEXPIRED errors from the RPC layer as it would an -EJUKEBOX error if NFSv2 had such a thing. Also, add a handler for that error for async calls that makes it resubmit the RPC on -EKEYEXPIRED. Signed-off-by: Jeff Layton Signed-off-by: Trond Myklebust --- fs/nfs/proc.c | 41 +++++++++++++++++++++++++++++++++++++++++ 1 file changed, 41 insertions(+) (limited to 'fs') diff --git a/fs/nfs/proc.c b/fs/nfs/proc.c index ef583854d8d0..c752d944fe9e 100644 --- a/fs/nfs/proc.c +++ b/fs/nfs/proc.c @@ -46,6 +46,39 @@ #define NFSDBG_FACILITY NFSDBG_PROC +/* + * wrapper to handle the -EKEYEXPIRED error message. This should generally + * only happen if using krb5 auth and a user's TGT expires. NFSv2 doesn't + * support the NFSERR_JUKEBOX error code, but we handle this situation in the + * same way that we handle that error with NFSv3. + */ +static int +nfs_rpc_wrapper(struct rpc_clnt *clnt, struct rpc_message *msg, int flags) +{ + int res; + do { + res = rpc_call_sync(clnt, msg, flags); + if (res != -EKEYEXPIRED) + break; + schedule_timeout_killable(NFS_JUKEBOX_RETRY_TIME); + res = -ERESTARTSYS; + } while (!fatal_signal_pending(current)); + return res; +} + +#define rpc_call_sync(clnt, msg, flags) nfs_rpc_wrapper(clnt, msg, flags) + +static int +nfs_async_handle_expired_key(struct rpc_task *task) +{ + if (task->tk_status != -EKEYEXPIRED) + return 0; + task->tk_status = 0; + rpc_restart_call(task); + rpc_delay(task, NFS_JUKEBOX_RETRY_TIME); + return 1; +} + /* * Bare-bones access to getattr: this is for nfs_read_super. */ @@ -307,6 +340,8 @@ nfs_proc_unlink_setup(struct rpc_message *msg, struct inode *dir) static int nfs_proc_unlink_done(struct rpc_task *task, struct inode *dir) { + if (nfs_async_handle_expired_key(task)) + return 0; nfs_mark_for_revalidate(dir); return 1; } @@ -560,6 +595,9 @@ nfs_proc_pathconf(struct nfs_server *server, struct nfs_fh *fhandle, static int nfs_read_done(struct rpc_task *task, struct nfs_read_data *data) { + if (nfs_async_handle_expired_key(task)) + return -EAGAIN; + nfs_invalidate_atime(data->inode); if (task->tk_status >= 0) { nfs_refresh_inode(data->inode, data->res.fattr); @@ -579,6 +617,9 @@ static void nfs_proc_read_setup(struct nfs_read_data *data, struct rpc_message * static int nfs_write_done(struct rpc_task *task, struct nfs_write_data *data) { + if (nfs_async_handle_expired_key(task)) + return -EAGAIN; + if (task->tk_status >= 0) nfs_post_op_update_inode_force_wcc(data->inode, data->res.fattr); return 0; -- cgit v1.2.3 From 8e0d46e13833b06832395e7eacccae8af8743461 Mon Sep 17 00:00:00 2001 From: Mike Sager Date: Thu, 17 Dec 2009 12:06:26 -0500 Subject: nfs41: Adjust max cache response size value For the CREATE_SESSION attribute ca_maxresponsesize_cached, calculate the value based on the rpc reply header size plus the maximum nfs compound reply size. Signed-off-by: Mike Sager Signed-off-by: Trond Myklebust --- fs/nfs/nfs4proc.c | 6 ++---- fs/nfs/nfs4xdr.c | 10 +++++++++- 2 files changed, 11 insertions(+), 5 deletions(-) (limited to 'fs') diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index 8d0c3a977c36..b829118c7e04 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -4805,16 +4805,14 @@ static void nfs4_init_channel_attrs(struct nfs41_create_session_args *args) args->fc_attrs.headerpadsz = 0; args->fc_attrs.max_rqst_sz = mxrqst_sz; args->fc_attrs.max_resp_sz = mxresp_sz; - args->fc_attrs.max_resp_sz_cached = mxresp_sz; args->fc_attrs.max_ops = NFS4_MAX_OPS; args->fc_attrs.max_reqs = session->clp->cl_rpcclient->cl_xprt->max_reqs; dprintk("%s: Fore Channel : max_rqst_sz=%u max_resp_sz=%u " - "max_resp_sz_cached=%u max_ops=%u max_reqs=%u\n", + "max_ops=%u max_reqs=%u\n", __func__, args->fc_attrs.max_rqst_sz, args->fc_attrs.max_resp_sz, - args->fc_attrs.max_resp_sz_cached, args->fc_attrs.max_ops, - args->fc_attrs.max_reqs); + args->fc_attrs.max_ops, args->fc_attrs.max_reqs); /* Back channel attributes */ args->bc_attrs.headerpadsz = 0; diff --git a/fs/nfs/nfs4xdr.c b/fs/nfs/nfs4xdr.c index e437fd6a819f..020ebf151184 100644 --- a/fs/nfs/nfs4xdr.c +++ b/fs/nfs/nfs4xdr.c @@ -1578,6 +1578,14 @@ static void encode_create_session(struct xdr_stream *xdr, char machine_name[NFS4_MAX_MACHINE_NAME_LEN]; uint32_t len; struct nfs_client *clp = args->client; + u32 max_resp_sz_cached; + + /* + * Assumes OPEN is the biggest non-idempotent compound. + * 2 is the verifier. + */ + max_resp_sz_cached = (NFS4_dec_open_sz + RPC_REPHDRSIZE + + RPC_MAX_AUTH_SIZE + 2) * XDR_UNIT; len = scnprintf(machine_name, sizeof(machine_name), "%s", clp->cl_ipaddr); @@ -1592,7 +1600,7 @@ static void encode_create_session(struct xdr_stream *xdr, *p++ = cpu_to_be32(args->fc_attrs.headerpadsz); /* header padding size */ *p++ = cpu_to_be32(args->fc_attrs.max_rqst_sz); /* max req size */ *p++ = cpu_to_be32(args->fc_attrs.max_resp_sz); /* max resp size */ - *p++ = cpu_to_be32(args->fc_attrs.max_resp_sz_cached); /* Max resp sz cached */ + *p++ = cpu_to_be32(max_resp_sz_cached); /* Max resp sz cached */ *p++ = cpu_to_be32(args->fc_attrs.max_ops); /* max operations */ *p++ = cpu_to_be32(args->fc_attrs.max_reqs); /* max requests */ *p++ = cpu_to_be32(0); /* rdmachannel_attrs */ -- cgit v1.2.3 From a7989c3e4702203baa5ddb3614f92bfc49a6e491 Mon Sep 17 00:00:00 2001 From: Mike Sager Date: Tue, 19 Jan 2010 12:54:40 -0500 Subject: nfs41: Check slot table for referring calls Traverse a list of referring calls and look for a session/slot/seq number match. Signed-off-by: Mike Sager Signed-off-by: Trond Myklebust --- fs/nfs/callback_proc.c | 55 ++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 55 insertions(+) (limited to 'fs') diff --git a/fs/nfs/callback_proc.c b/fs/nfs/callback_proc.c index defa9b4c470e..631b44c1439b 100644 --- a/fs/nfs/callback_proc.c +++ b/fs/nfs/callback_proc.c @@ -225,6 +225,61 @@ validate_seqid(struct nfs4_slot_table *tbl, u32 slotid, u32 seqid) return NULL; } +/* + * For each referring call triple, check the session's slot table for + * a match. If the slot is in use and the sequence numbers match, the + * client is still waiting for a response to the original request. + */ +static bool referring_call_exists(struct nfs_client *clp, + uint32_t nrclists, + struct referring_call_list *rclists) +{ + bool status = 0; + int i, j; + struct nfs4_session *session; + struct nfs4_slot_table *tbl; + struct referring_call_list *rclist; + struct referring_call *ref; + + /* + * XXX When client trunking is implemented, this becomes + * a session lookup from within the loop + */ + session = clp->cl_session; + tbl = &session->fc_slot_table; + + for (i = 0; i < nrclists; i++) { + rclist = &rclists[i]; + if (memcmp(session->sess_id.data, + rclist->rcl_sessionid.data, + NFS4_MAX_SESSIONID_LEN) != 0) + continue; + + for (j = 0; j < rclist->rcl_nrefcalls; j++) { + ref = &rclist->rcl_refcalls[j]; + + dprintk("%s: sessionid %x:%x:%x:%x sequenceid %u " + "slotid %u\n", __func__, + ((u32 *)&rclist->rcl_sessionid.data)[0], + ((u32 *)&rclist->rcl_sessionid.data)[1], + ((u32 *)&rclist->rcl_sessionid.data)[2], + ((u32 *)&rclist->rcl_sessionid.data)[3], + ref->rc_sequenceid, ref->rc_slotid); + + spin_lock(&tbl->slot_tbl_lock); + status = (test_bit(ref->rc_slotid, tbl->used_slots) && + tbl->slots[ref->rc_slotid].seq_nr == + ref->rc_sequenceid); + spin_unlock(&tbl->slot_tbl_lock); + if (status) + goto out; + } + } + +out: + return status; +} + /* FIXME: referring calls should be processed */ unsigned nfs4_callback_sequence(struct cb_sequenceargs *args, struct cb_sequenceres *res) -- cgit v1.2.3 From 72ce2b3c064471fc511a9ca2fb6c38d90d2ab826 Mon Sep 17 00:00:00 2001 From: Mike Sager Date: Tue, 19 Jan 2010 12:54:41 -0500 Subject: nfs41: Process callback's referring call list If a CB_SEQUENCE referring call triple matches a slot table entry, the client is still waiting for a response to the original request. In this case, return NFS4ERR_DELAY as the response to the callback. Signed-off-by: Mike Sager Signed-off-by: Trond Myklebust --- fs/nfs/callback_proc.c | 19 ++++++++++++++----- 1 file changed, 14 insertions(+), 5 deletions(-) (limited to 'fs') diff --git a/fs/nfs/callback_proc.c b/fs/nfs/callback_proc.c index 631b44c1439b..49c4b548b4d0 100644 --- a/fs/nfs/callback_proc.c +++ b/fs/nfs/callback_proc.c @@ -280,17 +280,12 @@ out: return status; } -/* FIXME: referring calls should be processed */ unsigned nfs4_callback_sequence(struct cb_sequenceargs *args, struct cb_sequenceres *res) { struct nfs_client *clp; int i, status; - for (i = 0; i < args->csa_nrclists; i++) - kfree(args->csa_rclists[i].rcl_refcalls); - kfree(args->csa_rclists); - status = htonl(NFS4ERR_BADSESSION); clp = find_client_with_session(args->csa_addr, 4, &args->csa_sessionid); if (clp == NULL) @@ -301,6 +296,16 @@ unsigned nfs4_callback_sequence(struct cb_sequenceargs *args, if (status) goto out_putclient; + /* + * Check for pending referring calls. If a match is found, a + * related callback was received before the response to the original + * call. + */ + if (referring_call_exists(clp, args->csa_nrclists, args->csa_rclists)) { + status = htonl(NFS4ERR_DELAY); + goto out_putclient; + } + memcpy(&res->csr_sessionid, &args->csa_sessionid, sizeof(res->csr_sessionid)); res->csr_sequenceid = args->csa_sequenceid; @@ -311,6 +316,10 @@ unsigned nfs4_callback_sequence(struct cb_sequenceargs *args, out_putclient: nfs_put_client(clp); out: + for (i = 0; i < args->csa_nrclists; i++) + kfree(args->csa_rclists[i].rcl_refcalls); + kfree(args->csa_rclists); + dprintk("%s: exit with status = %d\n", __func__, ntohl(status)); res->csr_status = status; return res->csr_status; -- cgit v1.2.3 From 31d2b4356b054537c35f4f8a7533e0b4a494dcc6 Mon Sep 17 00:00:00 2001 From: Andy Adamson Date: Thu, 14 Jan 2010 17:45:04 -0500 Subject: nfs41: fix wrong error on callback header xdr overflow Set NFS4ERR_RESOURCE as CB_COMPOUND status and do not return an op on decode_op_hdr or encode_op_hdr buffer overflow. NFS4ERR_RESOURCE is correct for v4.0. Will fix the return for v4.1 along with all the other NFS4ERR_RESOURCE errors in a later patch. Signed-off-by: Andy Adamson Signed-off-by: Trond Myklebust --- fs/nfs/callback_xdr.c | 28 ++++++++++++++++++---------- 1 file changed, 18 insertions(+), 10 deletions(-) (limited to 'fs') diff --git a/fs/nfs/callback_xdr.c b/fs/nfs/callback_xdr.c index 8e1a2511c8be..6ae327871b86 100644 --- a/fs/nfs/callback_xdr.c +++ b/fs/nfs/callback_xdr.c @@ -28,6 +28,9 @@ #define NFSDBG_FACILITY NFSDBG_CALLBACK +/* Internal error code */ +#define NFS4ERR_RESOURCE_HDR 11050 + typedef __be32 (*callback_process_op_t)(void *, void *); typedef __be32 (*callback_decode_arg_t)(struct svc_rqst *, struct xdr_stream *, void *); typedef __be32 (*callback_encode_res_t)(struct svc_rqst *, struct xdr_stream *, void *); @@ -173,7 +176,7 @@ static __be32 decode_op_hdr(struct xdr_stream *xdr, unsigned int *op) __be32 *p; p = read_buf(xdr, 4); if (unlikely(p == NULL)) - return htonl(NFS4ERR_RESOURCE); + return htonl(NFS4ERR_RESOURCE_HDR); *op = ntohl(*p); return 0; } @@ -465,7 +468,7 @@ static __be32 encode_op_hdr(struct xdr_stream *xdr, uint32_t op, __be32 res) p = xdr_reserve_space(xdr, 8); if (unlikely(p == NULL)) - return htonl(NFS4ERR_RESOURCE); + return htonl(NFS4ERR_RESOURCE_HDR); *p++ = htonl(op); *p = res; return 0; @@ -605,17 +608,15 @@ static __be32 process_op(uint32_t minorversion, int nop, struct xdr_stream *xdr_out, void *resp) { struct callback_op *op = &callback_ops[0]; - unsigned int op_nr = OP_CB_ILLEGAL; + unsigned int op_nr; __be32 status; long maxlen; __be32 res; dprintk("%s: start\n", __func__); status = decode_op_hdr(xdr_in, &op_nr); - if (unlikely(status)) { - status = htonl(NFS4ERR_OP_ILLEGAL); - goto out; - } + if (unlikely(status)) + return status; dprintk("%s: minorversion=%d nop=%d op_nr=%u\n", __func__, minorversion, nop, op_nr); @@ -624,7 +625,7 @@ static __be32 process_op(uint32_t minorversion, int nop, preprocess_nfs4_op(op_nr, &op); if (status == htonl(NFS4ERR_OP_ILLEGAL)) op_nr = OP_CB_ILLEGAL; -out: + maxlen = xdr_out->end - xdr_out->p; if (maxlen > 0 && maxlen < PAGE_SIZE) { if (likely(status == 0 && op->decode_args != NULL)) @@ -635,8 +636,8 @@ out: status = htonl(NFS4ERR_RESOURCE); res = encode_op_hdr(xdr_out, op_nr, status); - if (status == 0) - status = res; + if (unlikely(res)) + return res; if (op->encode_res != NULL && status == 0) status = op->encode_res(rqstp, xdr_out, resp); dprintk("%s: done, status = %d\n", __func__, ntohl(status)); @@ -677,6 +678,13 @@ static __be32 nfs4_callback_compound(struct svc_rqst *rqstp, void *argp, void *r nops++; } + /* Buffer overflow in decode_ops_hdr or encode_ops_hdr. Return + * resource error in cb_compound status without returning op */ + if (unlikely(status == htonl(NFS4ERR_RESOURCE_HDR))) { + status = htonl(NFS4ERR_RESOURCE); + nops--; + } + *hdr_res.status = status; *hdr_res.nops = htonl(nops); dprintk("%s: done, status = %u\n", __func__, ntohl(status)); -- cgit v1.2.3 From b92b30190093377828efcde5fc4cf7598fa1ee46 Mon Sep 17 00:00:00 2001 From: Andy Adamson Date: Thu, 14 Jan 2010 17:45:05 -0500 Subject: nfs41: directly encode back channel error Skip all other processing when error is encountered. Signed-off-by: Andy Adamson Signed-off-by: Trond Myklebust --- fs/nfs/callback_xdr.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) (limited to 'fs') diff --git a/fs/nfs/callback_xdr.c b/fs/nfs/callback_xdr.c index 6ae327871b86..d3e07f469949 100644 --- a/fs/nfs/callback_xdr.c +++ b/fs/nfs/callback_xdr.c @@ -625,16 +625,19 @@ static __be32 process_op(uint32_t minorversion, int nop, preprocess_nfs4_op(op_nr, &op); if (status == htonl(NFS4ERR_OP_ILLEGAL)) op_nr = OP_CB_ILLEGAL; + if (status) + goto encode_hdr; maxlen = xdr_out->end - xdr_out->p; if (maxlen > 0 && maxlen < PAGE_SIZE) { - if (likely(status == 0 && op->decode_args != NULL)) + if (likely(op->decode_args != NULL)) status = op->decode_args(rqstp, xdr_in, argp); if (likely(status == 0 && op->process_op != NULL)) status = op->process_op(argp, resp); } else status = htonl(NFS4ERR_RESOURCE); +encode_hdr: res = encode_op_hdr(xdr_out, op_nr, status); if (unlikely(res)) return res; -- cgit v1.2.3 From e95e60daee44fade63f32429ddcf1c2012a95632 Mon Sep 17 00:00:00 2001 From: Andy Adamson Date: Thu, 14 Jan 2010 17:45:06 -0500 Subject: nfs41: remove uneeded checks in callback processing All callback operations have arguments to decode and require processing. The preprocess_nfs4X_op functions catch unsupported or illegal ops so decode_args and process_op pointers are always non NULL. Signed-off-by: Andy Adamson Signed-off-by: Trond Myklebust --- fs/nfs/callback_xdr.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) (limited to 'fs') diff --git a/fs/nfs/callback_xdr.c b/fs/nfs/callback_xdr.c index d3e07f469949..a6f2ded72b17 100644 --- a/fs/nfs/callback_xdr.c +++ b/fs/nfs/callback_xdr.c @@ -630,9 +630,8 @@ static __be32 process_op(uint32_t minorversion, int nop, maxlen = xdr_out->end - xdr_out->p; if (maxlen > 0 && maxlen < PAGE_SIZE) { - if (likely(op->decode_args != NULL)) - status = op->decode_args(rqstp, xdr_in, argp); - if (likely(status == 0 && op->process_op != NULL)) + status = op->decode_args(rqstp, xdr_in, argp); + if (likely(status == 0)) status = op->process_op(argp, resp); } else status = htonl(NFS4ERR_RESOURCE); -- cgit v1.2.3 From b2f28bd78354b9bbcd178bf6bbf6b2277cd9b761 Mon Sep 17 00:00:00 2001 From: Andy Adamson Date: Thu, 14 Jan 2010 17:45:07 -0500 Subject: nfs41: prepare for back channel drc Make all cb_sequence arguments available to verify_seqid which will make replay decisions. Signed-off-by: Andy Adamson Signed-off-by: Trond Myklebust --- fs/nfs/callback_proc.c | 19 +++++++++---------- 1 file changed, 9 insertions(+), 10 deletions(-) (limited to 'fs') diff --git a/fs/nfs/callback_proc.c b/fs/nfs/callback_proc.c index 49c4b548b4d0..3d7edd65577b 100644 --- a/fs/nfs/callback_proc.c +++ b/fs/nfs/callback_proc.c @@ -153,34 +153,34 @@ int nfs41_validate_delegation_stateid(struct nfs_delegation *delegation, const n * a single outstanding callback request at a time. */ static int -validate_seqid(struct nfs4_slot_table *tbl, u32 slotid, u32 seqid) +validate_seqid(struct nfs4_slot_table *tbl, struct cb_sequenceargs * args) { struct nfs4_slot *slot; dprintk("%s enter. slotid %d seqid %d\n", - __func__, slotid, seqid); + __func__, args->csa_slotid, args->csa_sequenceid); - if (slotid > NFS41_BC_MAX_CALLBACKS) + if (args->csa_slotid > NFS41_BC_MAX_CALLBACKS) return htonl(NFS4ERR_BADSLOT); - slot = tbl->slots + slotid; + slot = tbl->slots + args->csa_slotid; dprintk("%s slot table seqid: %d\n", __func__, slot->seq_nr); /* Normal */ - if (likely(seqid == slot->seq_nr + 1)) { + if (likely(args->csa_sequenceid == slot->seq_nr + 1)) { slot->seq_nr++; return htonl(NFS4_OK); } /* Replay */ - if (seqid == slot->seq_nr) { + if (args->csa_sequenceid == slot->seq_nr) { dprintk("%s seqid %d is a replay - no DRC available\n", - __func__, seqid); + __func__, args->csa_sequenceid); return htonl(NFS4_OK); } /* Wraparound */ - if (seqid == 1 && (slot->seq_nr + 1) == 0) { + if (args->csa_sequenceid == 1 && (slot->seq_nr + 1) == 0) { slot->seq_nr = 1; return htonl(NFS4_OK); } @@ -291,8 +291,7 @@ unsigned nfs4_callback_sequence(struct cb_sequenceargs *args, if (clp == NULL) goto out; - status = validate_seqid(&clp->cl_session->bc_slot_table, - args->csa_slotid, args->csa_sequenceid); + status = validate_seqid(&clp->cl_session->bc_slot_table, args); if (status) goto out_putclient; -- cgit v1.2.3 From 4911096f1a5df73c12c287a42ece4e7b5d9c19ec Mon Sep 17 00:00:00 2001 From: Andy Adamson Date: Thu, 14 Jan 2010 17:45:08 -0500 Subject: nfs41: back channel drc minimal implementation For now the back channel ca_maxresponsesize_cached is 0 and there is no backchannel DRC. Return NFS4ERR_REP_TOO_BIG_TO_CACHE when a cb_sequence cachethis is true. When it is false, return NFS4ERR_RETRY_UNCACHED_REP as the next operation error. Remember the replay error accross compound operation processing. Signed-off-by: Andy Adamson Signed-off-by: Trond Myklebust --- fs/nfs/callback_proc.c | 25 +++++++++++++++++-------- fs/nfs/callback_xdr.c | 19 +++++++++++++++---- 2 files changed, 32 insertions(+), 12 deletions(-) (limited to 'fs') diff --git a/fs/nfs/callback_proc.c b/fs/nfs/callback_proc.c index 3d7edd65577b..4062f7690a33 100644 --- a/fs/nfs/callback_proc.c +++ b/fs/nfs/callback_proc.c @@ -143,9 +143,8 @@ int nfs41_validate_delegation_stateid(struct nfs_delegation *delegation, const n * Return success if the sequenceID is one more than what we last saw on * this slot, accounting for wraparound. Increments the slot's sequence. * - * We don't yet implement a duplicate request cache, so at this time - * we will log replays, and process them as if we had not seen them before, - * but we don't bump the sequence in the slot. Not too worried about it, + * We don't yet implement a duplicate request cache, instead we set the + * back channel ca_maxresponsesize_cached to zero. This is OK for now * since we only currently implement idempotent callbacks anyway. * * We have a single slot backchannel at this time, so we don't bother @@ -174,9 +173,15 @@ validate_seqid(struct nfs4_slot_table *tbl, struct cb_sequenceargs * args) /* Replay */ if (args->csa_sequenceid == slot->seq_nr) { - dprintk("%s seqid %d is a replay - no DRC available\n", + dprintk("%s seqid %d is a replay\n", __func__, args->csa_sequenceid); - return htonl(NFS4_OK); + /* Signal process_op to set this error on next op */ + if (args->csa_cachethis == 0) + return htonl(NFS4ERR_RETRY_UNCACHED_REP); + + /* The ca_maxresponsesize_cached is 0 with no DRC */ + else if (args->csa_cachethis == 1) + return htonl(NFS4ERR_REP_TOO_BIG_TO_CACHE); } /* Wraparound */ @@ -319,9 +324,13 @@ out: kfree(args->csa_rclists[i].rcl_refcalls); kfree(args->csa_rclists); - dprintk("%s: exit with status = %d\n", __func__, ntohl(status)); - res->csr_status = status; - return res->csr_status; + if (status == htonl(NFS4ERR_RETRY_UNCACHED_REP)) + res->csr_status = 0; + else + res->csr_status = status; + dprintk("%s: exit with status = %d res->csr_status %d\n", __func__, + ntohl(status), ntohl(res->csr_status)); + return status; } unsigned nfs4_callback_recallany(struct cb_recallanyargs *args, void *dummy) diff --git a/fs/nfs/callback_xdr.c b/fs/nfs/callback_xdr.c index a6f2ded72b17..08b430d922c4 100644 --- a/fs/nfs/callback_xdr.c +++ b/fs/nfs/callback_xdr.c @@ -605,7 +605,7 @@ preprocess_nfs4_op(unsigned int op_nr, struct callback_op **op) static __be32 process_op(uint32_t minorversion, int nop, struct svc_rqst *rqstp, struct xdr_stream *xdr_in, void *argp, - struct xdr_stream *xdr_out, void *resp) + struct xdr_stream *xdr_out, void *resp, int* drc_status) { struct callback_op *op = &callback_ops[0]; unsigned int op_nr; @@ -628,6 +628,11 @@ static __be32 process_op(uint32_t minorversion, int nop, if (status) goto encode_hdr; + if (*drc_status) { + status = *drc_status; + goto encode_hdr; + } + maxlen = xdr_out->end - xdr_out->p; if (maxlen > 0 && maxlen < PAGE_SIZE) { status = op->decode_args(rqstp, xdr_in, argp); @@ -636,6 +641,12 @@ static __be32 process_op(uint32_t minorversion, int nop, } else status = htonl(NFS4ERR_RESOURCE); + /* Only set by OP_CB_SEQUENCE processing */ + if (status == htonl(NFS4ERR_RETRY_UNCACHED_REP)) { + *drc_status = status; + status = 0; + } + encode_hdr: res = encode_op_hdr(xdr_out, op_nr, status); if (unlikely(res)) @@ -655,7 +666,7 @@ static __be32 nfs4_callback_compound(struct svc_rqst *rqstp, void *argp, void *r struct cb_compound_hdr_res hdr_res = { NULL }; struct xdr_stream xdr_in, xdr_out; __be32 *p; - __be32 status; + __be32 status, drc_status = 0; unsigned int nops = 0; dprintk("%s: start\n", __func__); @@ -675,8 +686,8 @@ static __be32 nfs4_callback_compound(struct svc_rqst *rqstp, void *argp, void *r return rpc_system_err; while (status == 0 && nops != hdr_arg.nops) { - status = process_op(hdr_arg.minorversion, nops, - rqstp, &xdr_in, argp, &xdr_out, resp); + status = process_op(hdr_arg.minorversion, nops, rqstp, + &xdr_in, argp, &xdr_out, resp, &drc_status); nops++; } -- cgit v1.2.3 From b9efa1b27e25b1286504973c0a6bf0f24106faa8 Mon Sep 17 00:00:00 2001 From: Andy Adamson Date: Wed, 20 Jan 2010 16:06:27 -0500 Subject: nfs41: implement cb_recall_slot Drain the fore channel and reset the max_slots to the new value. Signed-off-by: Andy Adamson Signed-off-by: Trond Myklebust --- fs/nfs/callback.h | 8 ++++++++ fs/nfs/callback_proc.c | 32 ++++++++++++++++++++++++++++++++ fs/nfs/callback_xdr.c | 22 +++++++++++++++++++++- fs/nfs/nfs4_fs.h | 2 ++ fs/nfs/nfs4state.c | 44 ++++++++++++++++++++++++++++++++++++++++++++ include/linux/nfs_fs_sb.h | 2 ++ 6 files changed, 109 insertions(+), 1 deletion(-) (limited to 'fs') diff --git a/fs/nfs/callback.h b/fs/nfs/callback.h index d4036be0b589..85a7cfd1b8dd 100644 --- a/fs/nfs/callback.h +++ b/fs/nfs/callback.h @@ -119,6 +119,14 @@ struct cb_recallanyargs { }; extern unsigned nfs4_callback_recallany(struct cb_recallanyargs *args, void *dummy); + +struct cb_recallslotargs { + struct sockaddr *crsa_addr; + uint32_t crsa_target_max_slots; +}; +extern unsigned nfs4_callback_recallslot(struct cb_recallslotargs *args, + void *dummy); + #endif /* CONFIG_NFS_V4_1 */ extern __be32 nfs4_callback_getattr(struct cb_getattrargs *args, struct cb_getattrres *res); diff --git a/fs/nfs/callback_proc.c b/fs/nfs/callback_proc.c index 4062f7690a33..e5155d9df595 100644 --- a/fs/nfs/callback_proc.c +++ b/fs/nfs/callback_proc.c @@ -361,4 +361,36 @@ out: dprintk("%s: exit with status = %d\n", __func__, ntohl(status)); return status; } + +/* Reduce the fore channel's max_slots to the target value */ +unsigned nfs4_callback_recallslot(struct cb_recallslotargs *args, void *dummy) +{ + struct nfs_client *clp; + struct nfs4_slot_table *fc_tbl; + int status; + + status = htonl(NFS4ERR_OP_NOT_IN_SESSION); + clp = nfs_find_client(args->crsa_addr, 4); + if (clp == NULL) + goto out; + + dprintk("NFS: CB_RECALL_SLOT request from %s target max slots %d\n", + rpc_peeraddr2str(clp->cl_rpcclient, RPC_DISPLAY_ADDR), + args->crsa_target_max_slots); + + fc_tbl = &clp->cl_session->fc_slot_table; + + status = htonl(NFS4ERR_BAD_HIGH_SLOT); + if (args->crsa_target_max_slots >= fc_tbl->max_slots || + args->crsa_target_max_slots < 1) + goto out; + + fc_tbl->target_max_slots = args->crsa_target_max_slots; + nfs41_handle_recall_slot(clp); + status = htonl(NFS4_OK); + nfs_put_client(clp); /* balance nfs_find_client */ +out: + dprintk("%s: exit with status = %d\n", __func__, ntohl(status)); + return status; +} #endif /* CONFIG_NFS_V4_1 */ diff --git a/fs/nfs/callback_xdr.c b/fs/nfs/callback_xdr.c index 08b430d922c4..8e66e20b59fd 100644 --- a/fs/nfs/callback_xdr.c +++ b/fs/nfs/callback_xdr.c @@ -24,6 +24,7 @@ #define CB_OP_SEQUENCE_RES_MAXSZ (CB_OP_HDR_RES_MAXSZ + \ 4 + 1 + 3) #define CB_OP_RECALLANY_RES_MAXSZ (CB_OP_HDR_RES_MAXSZ) +#define CB_OP_RECALLSLOT_RES_MAXSZ (CB_OP_HDR_RES_MAXSZ) #endif /* CONFIG_NFS_V4_1 */ #define NFSDBG_FACILITY NFSDBG_CALLBACK @@ -349,6 +350,20 @@ static unsigned decode_recallany_args(struct svc_rqst *rqstp, return 0; } +static unsigned decode_recallslot_args(struct svc_rqst *rqstp, + struct xdr_stream *xdr, + struct cb_recallslotargs *args) +{ + __be32 *p; + + args->crsa_addr = svc_addr(rqstp); + p = read_buf(xdr, 4); + if (unlikely(p == NULL)) + return htonl(NFS4ERR_BADXDR); + args->crsa_target_max_slots = ntohl(*p++); + return 0; +} + #endif /* CONFIG_NFS_V4_1 */ static __be32 encode_string(struct xdr_stream *xdr, unsigned int len, const char *str) @@ -557,6 +572,7 @@ preprocess_nfs41_op(int nop, unsigned int op_nr, struct callback_op **op) case OP_CB_RECALL: case OP_CB_SEQUENCE: case OP_CB_RECALL_ANY: + case OP_CB_RECALL_SLOT: *op = &callback_ops[op_nr]; break; @@ -565,7 +581,6 @@ preprocess_nfs41_op(int nop, unsigned int op_nr, struct callback_op **op) case OP_CB_NOTIFY: case OP_CB_PUSH_DELEG: case OP_CB_RECALLABLE_OBJ_AVAIL: - case OP_CB_RECALL_SLOT: case OP_CB_WANTS_CANCELLED: case OP_CB_NOTIFY_LOCK: return htonl(NFS4ERR_NOTSUPP); @@ -734,6 +749,11 @@ static struct callback_op callback_ops[] = { .decode_args = (callback_decode_arg_t)decode_recallany_args, .res_maxsize = CB_OP_RECALLANY_RES_MAXSZ, }, + [OP_CB_RECALL_SLOT] = { + .process_op = (callback_process_op_t)nfs4_callback_recallslot, + .decode_args = (callback_decode_arg_t)decode_recallslot_args, + .res_maxsize = CB_OP_RECALLSLOT_RES_MAXSZ, + }, #endif /* CONFIG_NFS_V4_1 */ }; diff --git a/fs/nfs/nfs4_fs.h b/fs/nfs/nfs4_fs.h index 0c6fda33d66e..a187200a7aac 100644 --- a/fs/nfs/nfs4_fs.h +++ b/fs/nfs/nfs4_fs.h @@ -46,6 +46,7 @@ enum nfs4_client_state { NFS4CLNT_DELEGRETURN, NFS4CLNT_SESSION_RESET, NFS4CLNT_SESSION_DRAINING, + NFS4CLNT_RECALL_SLOT, }; /* @@ -280,6 +281,7 @@ extern void nfs4_schedule_state_manager(struct nfs_client *); extern int nfs4_state_mark_reclaim_nograce(struct nfs_client *clp, struct nfs4_state *state); extern int nfs4_state_mark_reclaim_reboot(struct nfs_client *clp, struct nfs4_state *state); extern void nfs41_handle_sequence_flag_errors(struct nfs_client *clp, u32 flags); +extern void nfs41_handle_recall_slot(struct nfs_client *clp); extern void nfs4_put_lock_state(struct nfs4_lock_state *lsp); extern int nfs4_set_lock_state(struct nfs4_state *state, struct file_lock *fl); extern void nfs4_copy_stateid(nfs4_stateid *, struct nfs4_state *, fl_owner_t); diff --git a/fs/nfs/nfs4state.c b/fs/nfs/nfs4state.c index 8406cacd3240..9164758c1ace 100644 --- a/fs/nfs/nfs4state.c +++ b/fs/nfs/nfs4state.c @@ -1249,6 +1249,12 @@ static int nfs4_reclaim_lease(struct nfs_client *clp) } #ifdef CONFIG_NFS_V4_1 +void nfs41_handle_recall_slot(struct nfs_client *clp) +{ + set_bit(NFS4CLNT_RECALL_SLOT, &clp->cl_state); + nfs4_schedule_state_recovery(clp); +} + void nfs41_handle_sequence_flag_errors(struct nfs_client *clp, u32 flags) { if (!flags) @@ -1299,9 +1305,38 @@ out: return status; } +static int nfs4_recall_slot(struct nfs_client *clp) +{ + struct nfs4_slot_table *fc_tbl = &clp->cl_session->fc_slot_table; + struct nfs4_channel_attrs *fc_attrs = &clp->cl_session->fc_attrs; + struct nfs4_slot *new, *old; + int i; + + nfs4_begin_drain_session(clp); + new = kmalloc(fc_tbl->target_max_slots * sizeof(struct nfs4_slot), + GFP_KERNEL); + if (!new) + return -ENOMEM; + + spin_lock(&fc_tbl->slot_tbl_lock); + for (i = 0; i < fc_tbl->target_max_slots; i++) + new[i].seq_nr = fc_tbl->slots[i].seq_nr; + old = fc_tbl->slots; + fc_tbl->slots = new; + fc_tbl->max_slots = fc_tbl->target_max_slots; + fc_tbl->target_max_slots = 0; + fc_attrs->max_reqs = fc_tbl->max_slots; + spin_unlock(&fc_tbl->slot_tbl_lock); + + kfree(old); + nfs4_end_drain_session(clp); + return 0; +} + #else /* CONFIG_NFS_V4_1 */ static int nfs4_reset_session(struct nfs_client *clp) { return 0; } static int nfs4_end_drain_session(struct nfs_client *clp) { return 0; } +static int nfs4_recall_slot(struct nfs_client *clp) { return 0; } #endif /* CONFIG_NFS_V4_1 */ /* Set NFS4CLNT_LEASE_EXPIRED for all v4.0 errors and for recoverable errors @@ -1398,6 +1433,15 @@ static void nfs4_state_manager(struct nfs_client *clp) nfs_client_return_marked_delegations(clp); continue; } + /* Recall session slots */ + if (test_and_clear_bit(NFS4CLNT_RECALL_SLOT, &clp->cl_state) + && nfs4_has_session(clp)) { + status = nfs4_recall_slot(clp); + if (status < 0) + goto out_error; + continue; + } + nfs4_clear_state_manager_bit(clp); /* Did we race with an attempt to give us more work? */ diff --git a/include/linux/nfs_fs_sb.h b/include/linux/nfs_fs_sb.h index 34fc6be5bfcf..ecd9e6c74d06 100644 --- a/include/linux/nfs_fs_sb.h +++ b/include/linux/nfs_fs_sb.h @@ -193,6 +193,8 @@ struct nfs4_slot_table { int max_slots; /* # slots in table */ int highest_used_slotid; /* sent to server on each SEQ. * op for dynamic resizing */ + int target_max_slots; /* Set by CB_RECALL_SLOT as + * the new max_slots */ }; static inline int slot_idx(struct nfs4_slot_table *tbl, struct nfs4_slot *sp) -- cgit v1.2.3 From 104aeba484c9291cde2def6d037b836af46d8eb0 Mon Sep 17 00:00:00 2001 From: Andy Adamson Date: Thu, 14 Jan 2010 17:45:10 -0500 Subject: nfs41: resize slot table in reset When session is reset, client can renegotiate slot table size. Signed-off-by: Andy Adamson Signed-off-by: Trond Myklebust --- fs/nfs/nfs4proc.c | 40 +++++++++++++++++++++------------------- 1 file changed, 21 insertions(+), 19 deletions(-) (limited to 'fs') diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index b829118c7e04..84b53d38f50b 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -4618,26 +4618,32 @@ int nfs4_proc_get_lease_time(struct nfs_client *clp, struct nfs_fsinfo *fsinfo) /* * Reset a slot table */ -static int nfs4_reset_slot_table(struct nfs4_slot_table *tbl, int max_slots, - int old_max_slots, int ivalue) +static int nfs4_reset_slot_table(struct nfs4_slot_table *tbl, u32 max_reqs, + int ivalue) { + struct nfs4_slot *new = NULL; int i; int ret = 0; - dprintk("--> %s: max_reqs=%u, tbl %p\n", __func__, max_slots, tbl); + dprintk("--> %s: max_reqs=%u, tbl->max_slots %d\n", __func__, + max_reqs, tbl->max_slots); - /* - * Until we have dynamic slot table adjustment, insist - * upon the same slot table size - */ - if (max_slots != old_max_slots) { - dprintk("%s reset slot table does't match old\n", - __func__); - ret = -EINVAL; /*XXX NFS4ERR_REQ_TOO_BIG ? */ - goto out; + /* Does the newly negotiated max_reqs match the existing slot table? */ + if (max_reqs != tbl->max_slots) { + ret = -ENOMEM; + new = kmalloc(max_reqs * sizeof(struct nfs4_slot), + GFP_KERNEL); + if (!new) + goto out; + ret = 0; + kfree(tbl->slots); } spin_lock(&tbl->slot_tbl_lock); - for (i = 0; i < max_slots; ++i) + if (new) { + tbl->slots = new; + tbl->max_slots = max_reqs; + } + for (i = 0; i < tbl->max_slots; ++i) tbl->slots[i].seq_nr = ivalue; spin_unlock(&tbl->slot_tbl_lock); dprintk("%s: tbl=%p slots=%p max_slots=%d\n", __func__, @@ -4655,16 +4661,12 @@ static int nfs4_reset_slot_tables(struct nfs4_session *session) int status; status = nfs4_reset_slot_table(&session->fc_slot_table, - session->fc_attrs.max_reqs, - session->fc_slot_table.max_slots, - 1); + session->fc_attrs.max_reqs, 1); if (status) return status; status = nfs4_reset_slot_table(&session->bc_slot_table, - session->bc_attrs.max_reqs, - session->bc_slot_table.max_slots, - 0); + session->bc_attrs.max_reqs, 0); return status; } -- cgit v1.2.3 From bae0ac0ee1839e345a9b26d8c00eb3ef565caad1 Mon Sep 17 00:00:00 2001 From: Andy Adamson Date: Thu, 21 Jan 2010 14:19:16 -0500 Subject: nfs41: fix nfs4_callback_recallslot Return NFS4_OK if target high slotid equals enforced high slotid. Fix nfs_client reference leak. Signed-off-by: Andy Adamson Signed-off-by: Trond Myklebust --- fs/nfs/callback_proc.c | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) (limited to 'fs') diff --git a/fs/nfs/callback_proc.c b/fs/nfs/callback_proc.c index e5155d9df595..c79e18cd0e15 100644 --- a/fs/nfs/callback_proc.c +++ b/fs/nfs/callback_proc.c @@ -381,13 +381,17 @@ unsigned nfs4_callback_recallslot(struct cb_recallslotargs *args, void *dummy) fc_tbl = &clp->cl_session->fc_slot_table; status = htonl(NFS4ERR_BAD_HIGH_SLOT); - if (args->crsa_target_max_slots >= fc_tbl->max_slots || + if (args->crsa_target_max_slots > fc_tbl->max_slots || args->crsa_target_max_slots < 1) - goto out; + goto out_putclient; + + status = htonl(NFS4_OK); + if (args->crsa_target_max_slots == fc_tbl->max_slots) + goto out_putclient; fc_tbl->target_max_slots = args->crsa_target_max_slots; nfs41_handle_recall_slot(clp); - status = htonl(NFS4_OK); +out_putclient: nfs_put_client(clp); /* balance nfs_find_client */ out: dprintk("%s: exit with status = %d\n", __func__, ntohl(status)); -- cgit v1.2.3 From 41f54a55480c752d9419cac5e647785cb794142e Mon Sep 17 00:00:00 2001 From: Andy Adamson Date: Thu, 21 Jan 2010 14:54:13 -0500 Subject: nfs41: clear NFS4CLNT_RECALL_SLOT bit on session reset Signed-off-by: Andy Adamson Signed-off-by: Trond Myklebust --- fs/nfs/nfs4state.c | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) (limited to 'fs') diff --git a/fs/nfs/nfs4state.c b/fs/nfs/nfs4state.c index 9164758c1ace..2931c46c4127 100644 --- a/fs/nfs/nfs4state.c +++ b/fs/nfs/nfs4state.c @@ -1291,17 +1291,17 @@ static int nfs4_reset_session(struct nfs_client *clp) memset(clp->cl_session->sess_id.data, 0, NFS4_MAX_SESSIONID_LEN); status = nfs4_proc_create_session(clp); - if (status) + if (status) { status = nfs4_recovery_handle_error(clp, status); + goto out; + } + /* create_session negotiated new slot table */ + clear_bit(NFS4CLNT_RECALL_SLOT, &clp->cl_state); -out: - /* - * Let the state manager reestablish state - */ - if (!test_bit(NFS4CLNT_LEASE_EXPIRED, &clp->cl_state) && - status == 0) + /* Let the state manager reestablish state */ + if (!test_bit(NFS4CLNT_LEASE_EXPIRED, &clp->cl_state)) nfs41_setup_state_renewal(clp); - +out: return status; } -- cgit v1.2.3 From 9733f0d9289cbcac4fa03db0cb5aec1ab01c6bc9 Mon Sep 17 00:00:00 2001 From: Andy Adamson Date: Fri, 22 Jan 2010 12:03:08 -0500 Subject: nfs41: cleanup callback code to use __be32 type Signed-off-by: Andy Adamson Signed-off-by: Trond Myklebust --- fs/nfs/callback_proc.c | 15 ++++++++------- fs/nfs/callback_xdr.c | 30 +++++++++++++++--------------- 2 files changed, 23 insertions(+), 22 deletions(-) (limited to 'fs') diff --git a/fs/nfs/callback_proc.c b/fs/nfs/callback_proc.c index c79e18cd0e15..84761b5bb8e2 100644 --- a/fs/nfs/callback_proc.c +++ b/fs/nfs/callback_proc.c @@ -151,7 +151,7 @@ int nfs41_validate_delegation_stateid(struct nfs_delegation *delegation, const n * checking the used_slots bit array on the table. The lower layer guarantees * a single outstanding callback request at a time. */ -static int +static __be32 validate_seqid(struct nfs4_slot_table *tbl, struct cb_sequenceargs * args) { struct nfs4_slot *slot; @@ -285,11 +285,12 @@ out: return status; } -unsigned nfs4_callback_sequence(struct cb_sequenceargs *args, +__be32 nfs4_callback_sequence(struct cb_sequenceargs *args, struct cb_sequenceres *res) { struct nfs_client *clp; - int i, status; + int i; + __be32 status; status = htonl(NFS4ERR_BADSESSION); clp = find_client_with_session(args->csa_addr, 4, &args->csa_sessionid); @@ -333,10 +334,10 @@ out: return status; } -unsigned nfs4_callback_recallany(struct cb_recallanyargs *args, void *dummy) +__be32 nfs4_callback_recallany(struct cb_recallanyargs *args, void *dummy) { struct nfs_client *clp; - int status; + __be32 status; fmode_t flags = 0; status = htonl(NFS4ERR_OP_NOT_IN_SESSION); @@ -363,11 +364,11 @@ out: } /* Reduce the fore channel's max_slots to the target value */ -unsigned nfs4_callback_recallslot(struct cb_recallslotargs *args, void *dummy) +__be32 nfs4_callback_recallslot(struct cb_recallslotargs *args, void *dummy) { struct nfs_client *clp; struct nfs4_slot_table *fc_tbl; - int status; + __be32 status; status = htonl(NFS4ERR_OP_NOT_IN_SESSION); clp = nfs_find_client(args->crsa_addr, 4); diff --git a/fs/nfs/callback_xdr.c b/fs/nfs/callback_xdr.c index 8e66e20b59fd..db30c0b398b5 100644 --- a/fs/nfs/callback_xdr.c +++ b/fs/nfs/callback_xdr.c @@ -219,10 +219,10 @@ out: #if defined(CONFIG_NFS_V4_1) -static unsigned decode_sessionid(struct xdr_stream *xdr, +static __be32 decode_sessionid(struct xdr_stream *xdr, struct nfs4_sessionid *sid) { - uint32_t *p; + __be32 *p; int len = NFS4_MAX_SESSIONID_LEN; p = read_buf(xdr, len); @@ -233,12 +233,12 @@ static unsigned decode_sessionid(struct xdr_stream *xdr, return 0; } -static unsigned decode_rc_list(struct xdr_stream *xdr, +static __be32 decode_rc_list(struct xdr_stream *xdr, struct referring_call_list *rc_list) { - uint32_t *p; + __be32 *p; int i; - unsigned status; + __be32 status; status = decode_sessionid(xdr, &rc_list->rcl_sessionid); if (status) @@ -271,13 +271,13 @@ out: return status; } -static unsigned decode_cb_sequence_args(struct svc_rqst *rqstp, +static __be32 decode_cb_sequence_args(struct svc_rqst *rqstp, struct xdr_stream *xdr, struct cb_sequenceargs *args) { - uint32_t *p; + __be32 *p; int i; - unsigned status; + __be32 status; status = decode_sessionid(xdr, &args->csa_sessionid); if (status) @@ -331,11 +331,11 @@ out_free: goto out; } -static unsigned decode_recallany_args(struct svc_rqst *rqstp, +static __be32 decode_recallany_args(struct svc_rqst *rqstp, struct xdr_stream *xdr, struct cb_recallanyargs *args) { - uint32_t *p; + __be32 *p; args->craa_addr = svc_addr(rqstp); p = read_buf(xdr, 4); @@ -350,7 +350,7 @@ static unsigned decode_recallany_args(struct svc_rqst *rqstp, return 0; } -static unsigned decode_recallslot_args(struct svc_rqst *rqstp, +static __be32 decode_recallslot_args(struct svc_rqst *rqstp, struct xdr_stream *xdr, struct cb_recallslotargs *args) { @@ -517,10 +517,10 @@ out: #if defined(CONFIG_NFS_V4_1) -static unsigned encode_sessionid(struct xdr_stream *xdr, +static __be32 encode_sessionid(struct xdr_stream *xdr, const struct nfs4_sessionid *sid) { - uint32_t *p; + __be32 *p; int len = NFS4_MAX_SESSIONID_LEN; p = xdr_reserve_space(xdr, len); @@ -531,11 +531,11 @@ static unsigned encode_sessionid(struct xdr_stream *xdr, return 0; } -static unsigned encode_cb_sequence_res(struct svc_rqst *rqstp, +static __be32 encode_cb_sequence_res(struct svc_rqst *rqstp, struct xdr_stream *xdr, const struct cb_sequenceres *res) { - uint32_t *p; + __be32 *p; unsigned status = res->csr_status; if (unlikely(status != 0)) -- cgit v1.2.3 From c2459dc46269728e4a080ec8d5a316b2bba2e142 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Mon, 1 Feb 2010 14:17:14 -0500 Subject: NFS: Proper accounting for NFS VFS calls Nit: The VFSOPEN and VFSFLUSH counters are function call counters. Count every call to these routines. Signed-off-by: Chuck Lever Signed-off-by: Trond Myklebust --- fs/nfs/file.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'fs') diff --git a/fs/nfs/file.c b/fs/nfs/file.c index 63f2071d6445..57cf94f129ba 100644 --- a/fs/nfs/file.c +++ b/fs/nfs/file.c @@ -123,11 +123,11 @@ nfs_file_open(struct inode *inode, struct file *filp) filp->f_path.dentry->d_parent->d_name.name, filp->f_path.dentry->d_name.name); + nfs_inc_stats(inode, NFSIOS_VFSOPEN); res = nfs_check_flags(filp->f_flags); if (res) return res; - nfs_inc_stats(inode, NFSIOS_VFSOPEN); res = nfs_open(inode, filp); return res; } @@ -237,9 +237,9 @@ nfs_file_flush(struct file *file, fl_owner_t id) dentry->d_parent->d_name.name, dentry->d_name.name); + nfs_inc_stats(inode, NFSIOS_VFSFLUSH); if ((file->f_mode & FMODE_WRITE) == 0) return 0; - nfs_inc_stats(inode, NFSIOS_VFSFLUSH); /* Flush writes to the server and return any errors */ return nfs_do_fsync(ctx, inode); -- cgit v1.2.3 From 4184dcf2dbde481b34d370e1704f2b91a8c9f0d1 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Mon, 1 Feb 2010 14:17:23 -0500 Subject: NFS: Fix byte accounting for generic NFS reads Currently, the NFS I/O counters count the number of bytes requested by applications, rather than the number of bytes actually read by the system calls. The number of bytes requested for reads is actually not that useful, because the value is usually a buffer size for reads. That is, that requested number is usually a maximum, and frequently doesn't reflect the actual number of bytes read. Signed-off-by: Chuck Lever Signed-off-by: Trond Myklebust --- fs/nfs/file.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) (limited to 'fs') diff --git a/fs/nfs/file.c b/fs/nfs/file.c index 57cf94f129ba..7f4910c98c7c 100644 --- a/fs/nfs/file.c +++ b/fs/nfs/file.c @@ -262,9 +262,11 @@ nfs_file_read(struct kiocb *iocb, const struct iovec *iov, (unsigned long) count, (unsigned long) pos); result = nfs_revalidate_mapping(inode, iocb->ki_filp->f_mapping); - nfs_add_stats(inode, NFSIOS_NORMALREADBYTES, count); - if (!result) + if (!result) { result = generic_file_aio_read(iocb, iov, nr_segs, pos); + if (result > 0) + nfs_add_stats(inode, NFSIOS_NORMALREADBYTES, result); + } return result; } -- cgit v1.2.3 From aa2f1ef10e6ad65c9138ec576f82c08f32e6f32c Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Mon, 1 Feb 2010 14:17:32 -0500 Subject: NFS: Account for NFS bytes read via the splice API Bytes read via the splice API should be accounted for in the NFS performance statistics. Signed-off-by: Chuck Lever Signed-off-by: Trond Myklebust --- fs/nfs/file.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) (limited to 'fs') diff --git a/fs/nfs/file.c b/fs/nfs/file.c index 7f4910c98c7c..abbc20281ea4 100644 --- a/fs/nfs/file.c +++ b/fs/nfs/file.c @@ -284,8 +284,11 @@ nfs_file_splice_read(struct file *filp, loff_t *ppos, (unsigned long) count, (unsigned long long) *ppos); res = nfs_revalidate_mapping(inode, filp->f_mapping); - if (!res) + if (!res) { res = generic_file_splice_read(filp, ppos, pipe, count, flags); + if (res > 0) + nfs_add_stats(inode, NFSIOS_NORMALREADBYTES, res); + } return res; } -- cgit v1.2.3 From 7e381172cf6e0282a56374e50667515aed55166a Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Mon, 1 Feb 2010 14:17:41 -0500 Subject: NFS: Improve NFS iostat byte count accuracy for writes The bytes counted by the performance counters for NFS writes should reflect write and sync errors. If the write(2) system call reports an error, the bytes should not be counted. And, if the write is short, the actual number of bytes that was written should be counted, not the number of bytes that was requested. Signed-off-by: Chuck Lever Signed-off-by: Trond Myklebust --- fs/nfs/file.c | 15 ++++++++++++--- 1 file changed, 12 insertions(+), 3 deletions(-) (limited to 'fs') diff --git a/fs/nfs/file.c b/fs/nfs/file.c index abbc20281ea4..ae8d02294e46 100644 --- a/fs/nfs/file.c +++ b/fs/nfs/file.c @@ -601,6 +601,7 @@ static ssize_t nfs_file_write(struct kiocb *iocb, const struct iovec *iov, { struct dentry * dentry = iocb->ki_filp->f_path.dentry; struct inode * inode = dentry->d_inode; + unsigned long written = 0; ssize_t result; size_t count = iov_length(iov, nr_segs); @@ -627,14 +628,18 @@ static ssize_t nfs_file_write(struct kiocb *iocb, const struct iovec *iov, if (!count) goto out; - nfs_add_stats(inode, NFSIOS_NORMALWRITTENBYTES, count); result = generic_file_aio_write(iocb, iov, nr_segs, pos); + if (result > 0) + written = result; + /* Return error values for O_DSYNC and IS_SYNC() */ if (result >= 0 && nfs_need_sync_write(iocb->ki_filp, inode)) { int err = nfs_do_fsync(nfs_file_open_context(iocb->ki_filp), inode); if (err < 0) result = err; } + if (result > 0) + nfs_add_stats(inode, NFSIOS_NORMALWRITTENBYTES, written); out: return result; @@ -649,6 +654,7 @@ static ssize_t nfs_file_splice_write(struct pipe_inode_info *pipe, { struct dentry *dentry = filp->f_path.dentry; struct inode *inode = dentry->d_inode; + unsigned long written = 0; ssize_t ret; dprintk("NFS splice_write(%s/%s, %lu@%llu)\n", @@ -659,14 +665,17 @@ static ssize_t nfs_file_splice_write(struct pipe_inode_info *pipe, * The combination of splice and an O_APPEND destination is disallowed. */ - nfs_add_stats(inode, NFSIOS_NORMALWRITTENBYTES, count); - ret = generic_file_splice_write(pipe, filp, ppos, count, flags); + if (ret > 0) + written = ret; + if (ret >= 0 && nfs_need_sync_write(filp, inode)) { int err = nfs_do_fsync(nfs_file_open_context(filp), inode); if (err < 0) ret = err; } + if (ret > 0) + nfs_add_stats(inode, NFSIOS_NORMALWRITTENBYTES, written); return ret; } -- cgit v1.2.3 From f895c53f8ace3c3e49ebf9def90e63fc6d46d2bf Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Mon, 1 Feb 2010 14:17:50 -0500 Subject: NFS: Make close(2) asynchronous when closing NFS O_DIRECT files For NFSv2 and v3: O_DIRECT writes are always synchronous, and aren't cached, so nothing should be flushed when closing an NFS O_DIRECT file descriptor. Thus there are no write errors to report on close(2). In addition, there's no cached data to verify on the next open(2), so we don't need clean GETATTR results at close time to compare with. Thus, there's no need for the nfs_revalidate_inode() call when closing an NFS O_DIRECT file. This reduces the number of synchronous on-the-wire requests for a simple open-write-close of an NFS O_DIRECT file by roughly 20%. For NFSv4: Call nfs4_do_close() with wait set to zero when closing an NFS O_DIRECT file. The CLOSE will go on the wire, but the application won't wait for it to complete. Signed-off-by: Chuck Lever Signed-off-by: Trond Myklebust --- fs/nfs/inode.c | 7 +------ 1 file changed, 1 insertion(+), 6 deletions(-) (limited to 'fs') diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c index f141bde7756a..87cca56846d6 100644 --- a/fs/nfs/inode.c +++ b/fs/nfs/inode.c @@ -620,11 +620,6 @@ void put_nfs_open_context(struct nfs_open_context *ctx) __put_nfs_open_context(ctx, 0); } -static void put_nfs_open_context_sync(struct nfs_open_context *ctx) -{ - __put_nfs_open_context(ctx, 1); -} - /* * Ensure that mmap has a recent RPC credential for use when writing out * shared pages @@ -671,7 +666,7 @@ static void nfs_file_clear_open_context(struct file *filp) spin_lock(&inode->i_lock); list_move_tail(&ctx->list, &NFS_I(inode)->open_files); spin_unlock(&inode->i_lock); - put_nfs_open_context_sync(ctx); + __put_nfs_open_context(ctx, filp->f_flags & O_DIRECT ? 0 : 1); } } -- cgit v1.2.3 From 888ef2e3f8b7b8daeb031bfb4ad1fd4fa817e193 Mon Sep 17 00:00:00 2001 From: Alexandros Batsakis Date: Fri, 5 Feb 2010 03:45:03 -0800 Subject: nfs: kill renewd before clearing client minor version renewd should be synchronously killed before we destroy the session in nfs4_clear_minor_version Signed-off-by: Alexandros Batsakis [Trond.Myklebust@netapp.com: clean up to remove 'unused function warning when !CONFIG_NFS_V4] Signed-off-by: Trond Myklebust --- fs/nfs/client.c | 48 ++++++++++++++++++++++++------------------------ 1 file changed, 24 insertions(+), 24 deletions(-) (limited to 'fs') diff --git a/fs/nfs/client.c b/fs/nfs/client.c index ee77713ce68b..2274f1737336 100644 --- a/fs/nfs/client.c +++ b/fs/nfs/client.c @@ -164,30 +164,7 @@ error_0: return ERR_PTR(err); } -static void nfs4_shutdown_client(struct nfs_client *clp) -{ -#ifdef CONFIG_NFS_V4 - if (__test_and_clear_bit(NFS_CS_RENEWD, &clp->cl_res_state)) - nfs4_kill_renewd(clp); - BUG_ON(!RB_EMPTY_ROOT(&clp->cl_state_owners)); - if (__test_and_clear_bit(NFS_CS_IDMAP, &clp->cl_res_state)) - nfs_idmap_delete(clp); - - rpc_destroy_wait_queue(&clp->cl_rpcwaitq); -#endif -} - -/* - * Destroy the NFS4 callback service - */ -static void nfs4_destroy_callback(struct nfs_client *clp) -{ #ifdef CONFIG_NFS_V4 - if (__test_and_clear_bit(NFS_CS_CALLBACK, &clp->cl_res_state)) - nfs_callback_down(clp->cl_minorversion); -#endif /* CONFIG_NFS_V4 */ -} - /* * Clears/puts all minor version specific parts from an nfs_client struct * reverting it to minorversion 0. @@ -202,9 +179,33 @@ static void nfs4_clear_client_minor_version(struct nfs_client *clp) clp->cl_call_sync = _nfs4_call_sync; #endif /* CONFIG_NFS_V4_1 */ +} +/* + * Destroy the NFS4 callback service + */ +static void nfs4_destroy_callback(struct nfs_client *clp) +{ + if (__test_and_clear_bit(NFS_CS_CALLBACK, &clp->cl_res_state)) + nfs_callback_down(clp->cl_minorversion); +} + +static void nfs4_shutdown_client(struct nfs_client *clp) +{ + if (__test_and_clear_bit(NFS_CS_RENEWD, &clp->cl_res_state)) + nfs4_kill_renewd(clp); + nfs4_clear_client_minor_version(clp); nfs4_destroy_callback(clp); + if (__test_and_clear_bit(NFS_CS_IDMAP, &clp->cl_res_state)) + nfs_idmap_delete(clp); + + rpc_destroy_wait_queue(&clp->cl_rpcwaitq); } +#else +static void nfs4_shutdown_client(struct nfs_client *clp) +{ +} +#endif /* CONFIG_NFS_V4 */ /* * Destroy a shared client record @@ -213,7 +214,6 @@ static void nfs_free_client(struct nfs_client *clp) { dprintk("--> nfs_free_client(%u)\n", clp->rpc_ops->version); - nfs4_clear_client_minor_version(clp); nfs4_shutdown_client(clp); nfs_fscache_release_client_cookie(clp); -- cgit v1.2.3 From dc96aef96a75348b4d1b01c4c0429ab52780683e Mon Sep 17 00:00:00 2001 From: Alexandros Batsakis Date: Fri, 5 Feb 2010 03:45:04 -0800 Subject: nfs: prevent backlogging of renewd requests If the renewd send queue gets backlogged (e.g., if the server goes down), we will keep filling the queue with periodic RENEW/SEQUENCE requests. This patch schedules a new renewd request if and only if the previous one returns (either success or failure) Signed-off-by: Alexandros Batsakis [Trond.Myklebust@netapp.com: moved nfs4_schedule_state_renewal() into separate nfs4_renew_release() and nfs41_sequence_release() callbacks to ensure correct behaviour on call setup failure] Signed-off-by: Trond Myklebust --- fs/nfs/nfs4proc.c | 24 ++++++++++++++++++++---- fs/nfs/nfs4renewd.c | 24 +++++++----------------- 2 files changed, 27 insertions(+), 21 deletions(-) (limited to 'fs') diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index 84b53d38f50b..726bc195039d 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -3147,10 +3147,17 @@ static void nfs4_proc_commit_setup(struct nfs_write_data *data, struct rpc_messa * nfs4_proc_async_renew(): This is not one of the nfs_rpc_ops; it is a special * standalone procedure for queueing an asynchronous RENEW. */ +static void nfs4_renew_release(void *data) +{ + struct nfs_client *clp = data; + + nfs4_schedule_state_renewal(clp); +} + static void nfs4_renew_done(struct rpc_task *task, void *data) { - struct nfs_client *clp = (struct nfs_client *)task->tk_msg.rpc_argp; - unsigned long timestamp = (unsigned long)data; + struct nfs_client *clp = data; + unsigned long timestamp = task->tk_start; if (task->tk_status < 0) { /* Unless we're shutting down, schedule state recovery! */ @@ -3166,6 +3173,7 @@ static void nfs4_renew_done(struct rpc_task *task, void *data) static const struct rpc_call_ops nfs4_renew_ops = { .rpc_call_done = nfs4_renew_done, + .rpc_release = nfs4_renew_release, }; int nfs4_proc_async_renew(struct nfs_client *clp, struct rpc_cred *cred) @@ -3177,7 +3185,7 @@ int nfs4_proc_async_renew(struct nfs_client *clp, struct rpc_cred *cred) }; return rpc_call_async(clp->cl_rpcclient, &msg, RPC_TASK_SOFT, - &nfs4_renew_ops, (void *)jiffies); + &nfs4_renew_ops, clp); } int nfs4_proc_renew(struct nfs_client *clp, struct rpc_cred *cred) @@ -5023,7 +5031,14 @@ static int nfs4_proc_sequence(struct nfs_client *clp, struct rpc_cred *cred) &res, args.sa_cache_this, 1); } -void nfs41_sequence_call_done(struct rpc_task *task, void *data) +static void nfs41_sequence_release(void *data) +{ + struct nfs_client *clp = (struct nfs_client *)data; + + nfs4_schedule_state_renewal(clp); +} + +static void nfs41_sequence_call_done(struct rpc_task *task, void *data) { struct nfs_client *clp = (struct nfs_client *)data; @@ -5064,6 +5079,7 @@ static void nfs41_sequence_prepare(struct rpc_task *task, void *data) static const struct rpc_call_ops nfs41_sequence_ops = { .rpc_call_done = nfs41_sequence_call_done, .rpc_call_prepare = nfs41_sequence_prepare, + .rpc_release = nfs41_sequence_release, }; static int nfs41_proc_async_sequence(struct nfs_client *clp, diff --git a/fs/nfs/nfs4renewd.c b/fs/nfs/nfs4renewd.c index 0156c01c212c..d87f10327b72 100644 --- a/fs/nfs/nfs4renewd.c +++ b/fs/nfs/nfs4renewd.c @@ -36,11 +36,6 @@ * as an rpc_task, not a real kernel thread, so it always runs in rpciod's * context. There is one renewd per nfs_server. * - * TODO: If the send queue gets backlogged (e.g., if the server goes down), - * we will keep filling the queue with periodic RENEW requests. We need a - * mechanism for ensuring that if renewd successfully sends off a request, - * then it only wakes up when the request is finished. Maybe use the - * child task framework of the RPC layer? */ #include @@ -63,7 +58,7 @@ nfs4_renew_state(struct work_struct *work) struct nfs_client *clp = container_of(work, struct nfs_client, cl_renewd.work); struct rpc_cred *cred; - long lease, timeout; + long lease; unsigned long last, now; ops = nfs4_state_renewal_ops[clp->cl_minorversion]; @@ -75,7 +70,6 @@ nfs4_renew_state(struct work_struct *work) lease = clp->cl_lease_time; last = clp->cl_last_renewal; now = jiffies; - timeout = (2 * lease) / 3 + (long)last - (long)now; /* Are we close to a lease timeout? */ if (time_after(now, last + lease/3)) { cred = ops->get_state_renewal_cred_locked(clp); @@ -90,19 +84,15 @@ nfs4_renew_state(struct work_struct *work) /* Queue an asynchronous RENEW. */ ops->sched_state_renewal(clp, cred); put_rpccred(cred); + goto out_exp; } - timeout = (2 * lease) / 3; - spin_lock(&clp->cl_lock); - } else + } else { dprintk("%s: failed to call renewd. Reason: lease not expired \n", __func__); - if (timeout < 5 * HZ) /* safeguard */ - timeout = 5 * HZ; - dprintk("%s: requeueing work. Lease period = %ld\n", - __func__, (timeout + HZ - 1) / HZ); - cancel_delayed_work(&clp->cl_renewd); - schedule_delayed_work(&clp->cl_renewd, timeout); - spin_unlock(&clp->cl_lock); + spin_unlock(&clp->cl_lock); + } + nfs4_schedule_state_renewal(clp); +out_exp: nfs_expire_unreferenced_delegations(clp); out: dprintk("%s: done\n", __func__); -- cgit v1.2.3 From 7135840fc74699513d50e0c9c64922f2d38aa5e3 Mon Sep 17 00:00:00 2001 From: Alexandros Batsakis Date: Fri, 5 Feb 2010 03:45:05 -0800 Subject: nfs41: renewd sequence operations should take/put client reference renewd sends SEQUENCE requests to the NFS server in order to renew state. As the request is asynchronous, renewd should take a reference to the nfs_client to prevent concurrent umounts from freeing the session/client Signed-off-by: Alexandros Batsakis Signed-off-by: Trond Myklebust --- fs/nfs/nfs4proc.c | 18 ++++++++++++------ 1 file changed, 12 insertions(+), 6 deletions(-) (limited to 'fs') diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index 726bc195039d..663ae0c36834 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -419,7 +419,8 @@ static void nfs41_sequence_done(struct nfs_client *clp, clp->cl_last_renewal = timestamp; spin_unlock(&clp->cl_lock); /* Check sequence flags */ - nfs41_handle_sequence_flag_errors(clp, res->sr_status_flags); + if (atomic_read(&clp->cl_count) > 1) + nfs41_handle_sequence_flag_errors(clp, res->sr_status_flags); } out: /* The session may be reset by one of the error handlers. */ @@ -5035,7 +5036,9 @@ static void nfs41_sequence_release(void *data) { struct nfs_client *clp = (struct nfs_client *)data; - nfs4_schedule_state_renewal(clp); + if (atomic_read(&clp->cl_count) > 1) + nfs4_schedule_state_renewal(clp); + nfs_put_client(clp); } static void nfs41_sequence_call_done(struct rpc_task *task, void *data) @@ -5046,6 +5049,8 @@ static void nfs41_sequence_call_done(struct rpc_task *task, void *data) if (task->tk_status < 0) { dprintk("%s ERROR %d\n", __func__, task->tk_status); + if (atomic_read(&clp->cl_count) == 1) + goto out; if (_nfs4_async_handle_error(task, NULL, clp, NULL) == -EAGAIN) { @@ -5054,7 +5059,7 @@ static void nfs41_sequence_call_done(struct rpc_task *task, void *data) } } dprintk("%s rpc_cred %p\n", __func__, task->tk_msg.rpc_cred); - +out: kfree(task->tk_msg.rpc_argp); kfree(task->tk_msg.rpc_resp); @@ -5092,12 +5097,13 @@ static int nfs41_proc_async_sequence(struct nfs_client *clp, .rpc_cred = cred, }; + if (!atomic_inc_not_zero(&clp->cl_count)) + return -EIO; args = kzalloc(sizeof(*args), GFP_KERNEL); - if (!args) - return -ENOMEM; res = kzalloc(sizeof(*res), GFP_KERNEL); - if (!res) { + if (!args || !res) { kfree(args); + nfs_put_client(clp); return -ENOMEM; } res->sr_slotid = NFS4_MAX_SLOT_TABLE; -- cgit v1.2.3 From 0851de06174e9800e76b26e4be0ca94294c09c8c Mon Sep 17 00:00:00 2001 From: Alexandros Batsakis Date: Fri, 5 Feb 2010 03:45:06 -0800 Subject: nfs4: renewd renew operations should take/put a client reference renewd sends RENEW requests to the NFS server in order to renew state. As the request is asynchronous, renewd should take a reference to the nfs_client to prevent concurrent umounts from freeing the client Signed-off-by: Alexandros Batsakis Signed-off-by: Trond Myklebust --- fs/nfs/nfs4proc.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) (limited to 'fs') diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index 663ae0c36834..68f1fe00c08c 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -3152,7 +3152,9 @@ static void nfs4_renew_release(void *data) { struct nfs_client *clp = data; - nfs4_schedule_state_renewal(clp); + if (atomic_read(&clp->cl_count) > 1) + nfs4_schedule_state_renewal(clp); + nfs_put_client(clp); } static void nfs4_renew_done(struct rpc_task *task, void *data) @@ -3185,6 +3187,8 @@ int nfs4_proc_async_renew(struct nfs_client *clp, struct rpc_cred *cred) .rpc_cred = cred, }; + if (!atomic_inc_not_zero(&clp->cl_count)) + return -EIO; return rpc_call_async(clp->cl_rpcclient, &msg, RPC_TASK_SOFT, &nfs4_renew_ops, clp); } -- cgit v1.2.3 From 0f79fd6f5c52e05918e44996b0a1b18383d0fbc2 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Tue, 2 Mar 2010 13:06:21 -0500 Subject: NFSv4.1: Various fixes to the sequence flag error handling Ensure that we change the EXCHANGE_ID verifier (i.e. clp->cl_boot_time) when we want to reset all state. This is mainly needed when the server tells us that it is revoking our open or lock stateids. Handle revoking of recallable state by expiring the delegations. Handle callback path issues by expiring the delegations and then resetting the session. Signed-off-by: Trond Myklebust --- fs/nfs/nfs4state.c | 57 ++++++++++++++++++++++++++++++++++++++++++------------ 1 file changed, 45 insertions(+), 12 deletions(-) (limited to 'fs') diff --git a/fs/nfs/nfs4state.c b/fs/nfs/nfs4state.c index 2931c46c4127..6c5ed51f105e 100644 --- a/fs/nfs/nfs4state.c +++ b/fs/nfs/nfs4state.c @@ -1255,26 +1255,59 @@ void nfs41_handle_recall_slot(struct nfs_client *clp) nfs4_schedule_state_recovery(clp); } +static void nfs4_reset_all_state(struct nfs_client *clp) +{ + if (test_and_set_bit(NFS4CLNT_LEASE_EXPIRED, &clp->cl_state) == 0) { + clp->cl_boot_time = CURRENT_TIME; + nfs4_state_start_reclaim_nograce(clp); + nfs4_schedule_state_recovery(clp); + } +} + +static void nfs41_handle_server_reboot(struct nfs_client *clp) +{ + if (test_and_set_bit(NFS4CLNT_LEASE_EXPIRED, &clp->cl_state) == 0) { + nfs4_state_start_reclaim_reboot(clp); + nfs4_schedule_state_recovery(clp); + } +} + +static void nfs41_handle_state_revoked(struct nfs_client *clp) +{ + /* Temporary */ + nfs4_reset_all_state(clp); +} + +static void nfs41_handle_recallable_state_revoked(struct nfs_client *clp) +{ + /* This will need to handle layouts too */ + nfs_expire_all_delegations(clp); +} + +static void nfs41_handle_cb_path_down(struct nfs_client *clp) +{ + nfs_expire_all_delegations(clp); + if (test_and_set_bit(NFS4CLNT_SESSION_RESET, &clp->cl_state) == 0) + nfs4_schedule_state_recovery(clp); +} + void nfs41_handle_sequence_flag_errors(struct nfs_client *clp, u32 flags) { if (!flags) return; - else if (flags & SEQ4_STATUS_RESTART_RECLAIM_NEEDED) { - set_bit(NFS4CLNT_LEASE_EXPIRED, &clp->cl_state); - nfs4_state_start_reclaim_reboot(clp); - nfs4_schedule_state_recovery(clp); - } else if (flags & (SEQ4_STATUS_EXPIRED_ALL_STATE_REVOKED | + else if (flags & SEQ4_STATUS_RESTART_RECLAIM_NEEDED) + nfs41_handle_server_reboot(clp); + else if (flags & (SEQ4_STATUS_EXPIRED_ALL_STATE_REVOKED | SEQ4_STATUS_EXPIRED_SOME_STATE_REVOKED | SEQ4_STATUS_ADMIN_STATE_REVOKED | - SEQ4_STATUS_RECALLABLE_STATE_REVOKED | - SEQ4_STATUS_LEASE_MOVED)) { - set_bit(NFS4CLNT_LEASE_EXPIRED, &clp->cl_state); - nfs4_state_start_reclaim_nograce(clp); - nfs4_schedule_state_recovery(clp); - } else if (flags & (SEQ4_STATUS_CB_PATH_DOWN | + SEQ4_STATUS_LEASE_MOVED)) + nfs41_handle_state_revoked(clp); + else if (flags & SEQ4_STATUS_RECALLABLE_STATE_REVOKED) + nfs41_handle_recallable_state_revoked(clp); + else if (flags & (SEQ4_STATUS_CB_PATH_DOWN | SEQ4_STATUS_BACKCHANNEL_FAULT | SEQ4_STATUS_CB_PATH_DOWN_SESSION)) - nfs_expire_all_delegations(clp); + nfs41_handle_cb_path_down(clp); } static int nfs4_reset_session(struct nfs_client *clp) -- cgit v1.2.3 From ebed9203b68a4f333ce5d17e874b26c3afcfeff1 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Tue, 2 Mar 2010 13:06:22 -0500 Subject: NFS: Fix an allocation-under-spinlock bug sunrpc_cache_update() will always call detail->update() from inside the detail->hash_lock, so it cannot allocate memory. Signed-off-by: Trond Myklebust Cc: stable@kernel.org --- fs/nfs/dns_resolve.c | 18 +++++++++++++++--- 1 file changed, 15 insertions(+), 3 deletions(-) (limited to 'fs') diff --git a/fs/nfs/dns_resolve.c b/fs/nfs/dns_resolve.c index 95e1ca765d47..3f0cd4dfddaf 100644 --- a/fs/nfs/dns_resolve.c +++ b/fs/nfs/dns_resolve.c @@ -36,6 +36,19 @@ struct nfs_dns_ent { }; +static void nfs_dns_ent_update(struct cache_head *cnew, + struct cache_head *ckey) +{ + struct nfs_dns_ent *new; + struct nfs_dns_ent *key; + + new = container_of(cnew, struct nfs_dns_ent, h); + key = container_of(ckey, struct nfs_dns_ent, h); + + memcpy(&new->addr, &key->addr, key->addrlen); + new->addrlen = key->addrlen; +} + static void nfs_dns_ent_init(struct cache_head *cnew, struct cache_head *ckey) { @@ -49,8 +62,7 @@ static void nfs_dns_ent_init(struct cache_head *cnew, new->hostname = kstrndup(key->hostname, key->namelen, GFP_KERNEL); if (new->hostname) { new->namelen = key->namelen; - memcpy(&new->addr, &key->addr, key->addrlen); - new->addrlen = key->addrlen; + nfs_dns_ent_update(cnew, ckey); } else { new->namelen = 0; new->addrlen = 0; @@ -234,7 +246,7 @@ static struct cache_detail nfs_dns_resolve = { .cache_show = nfs_dns_show, .match = nfs_dns_match, .init = nfs_dns_ent_init, - .update = nfs_dns_ent_init, + .update = nfs_dns_ent_update, .alloc = nfs_dns_ent_alloc, }; -- cgit v1.2.3 From 180b62a3d837613fcac3ce89576526423926c3c3 Mon Sep 17 00:00:00 2001 From: Andy Adamson Date: Tue, 2 Mar 2010 13:19:36 -0500 Subject: nfs41 fix NFS4ERR_CLID_INUSE for exchange id Signed-off-by: Andy Adamson Signed-off-by: Benny Halevy Signed-off-by: Trond Myklebust --- fs/nfs/nfs4proc.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'fs') diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index 68f1fe00c08c..adc116c57e14 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -4519,7 +4519,7 @@ int nfs4_proc_exchange_id(struct nfs_client *clp, struct rpc_cred *cred) status = rpc_call_sync(clp->cl_rpcclient, &msg, 0); - if (status != NFS4ERR_CLID_INUSE) + if (status != -NFS4ERR_CLID_INUSE) break; if (signalled()) -- cgit v1.2.3 From 8fc795f703c5138e1a8bfb88c69f52632031aa6a Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Fri, 19 Feb 2010 16:46:56 -0800 Subject: NFS: Cleanup - move nfs_write_inode() into fs/nfs/write.c The sole purpose of nfs_write_inode is to commit unstable writes, so move it into fs/nfs/write.c, and make nfs_commit_inode static. Signed-off-by: Trond Myklebust --- fs/nfs/inode.c | 12 ------------ fs/nfs/write.c | 24 +++++++++++++++++++++++- include/linux/nfs_fs.h | 7 ------- 3 files changed, 23 insertions(+), 20 deletions(-) (limited to 'fs') diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c index 7f9ecc46f3fb..89e98312599d 100644 --- a/fs/nfs/inode.c +++ b/fs/nfs/inode.c @@ -97,18 +97,6 @@ u64 nfs_compat_user_ino64(u64 fileid) return ino; } -int nfs_write_inode(struct inode *inode, struct writeback_control *wbc) -{ - int ret; - - ret = nfs_commit_inode(inode, - wbc->sync_mode == WB_SYNC_ALL ? FLUSH_SYNC : 0); - if (ret >= 0) - return 0; - __mark_inode_dirty(inode, I_DIRTY_DATASYNC); - return ret; -} - void nfs_clear_inode(struct inode *inode) { /* diff --git a/fs/nfs/write.c b/fs/nfs/write.c index d63d964a0392..09e97097baaa 100644 --- a/fs/nfs/write.c +++ b/fs/nfs/write.c @@ -1391,7 +1391,7 @@ static const struct rpc_call_ops nfs_commit_ops = { .rpc_release = nfs_commit_release, }; -int nfs_commit_inode(struct inode *inode, int how) +static int nfs_commit_inode(struct inode *inode, int how) { LIST_HEAD(head); int res; @@ -1406,13 +1406,35 @@ int nfs_commit_inode(struct inode *inode, int how) } return res; } + +static int nfs_commit_unstable_pages(struct inode *inode, struct writeback_control *wbc) +{ + int ret; + + ret = nfs_commit_inode(inode, + wbc->sync_mode == WB_SYNC_ALL ? FLUSH_SYNC : 0); + if (ret >= 0) + return 0; + __mark_inode_dirty(inode, I_DIRTY_DATASYNC); + return ret; +} #else static inline int nfs_commit_list(struct inode *inode, struct list_head *head, int how) { return 0; } + +static int nfs_commit_unstable_pages(struct inode *inode, struct writeback_control *wbc) +{ + return 0; +} #endif +int nfs_write_inode(struct inode *inode, struct writeback_control *wbc) +{ + return nfs_commit_unstable_pages(inode, wbc); +} + long nfs_sync_mapping_wait(struct address_space *mapping, struct writeback_control *wbc, int how) { struct inode *inode = mapping->host; diff --git a/include/linux/nfs_fs.h b/include/linux/nfs_fs.h index d09db1bc9083..384ea3ef2863 100644 --- a/include/linux/nfs_fs.h +++ b/include/linux/nfs_fs.h @@ -483,15 +483,8 @@ extern int nfs_wb_nocommit(struct inode *inode); extern int nfs_wb_page(struct inode *inode, struct page* page); extern int nfs_wb_page_cancel(struct inode *inode, struct page* page); #if defined(CONFIG_NFS_V3) || defined(CONFIG_NFS_V4) -extern int nfs_commit_inode(struct inode *, int); extern struct nfs_write_data *nfs_commitdata_alloc(void); extern void nfs_commit_free(struct nfs_write_data *wdata); -#else -static inline int -nfs_commit_inode(struct inode *inode, int how) -{ - return 0; -} #endif static inline int -- cgit v1.2.3 From ff778d02bf867e1733a09b34ad6dbb723b024814 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Fri, 19 Feb 2010 16:53:39 -0800 Subject: NFS: Add a count of the number of unstable writes carried by an inode In order to know when we should do opportunistic commits of the unstable writes, when the VM is doing a background flush, we add a field to count the number of unstable writes. Signed-off-by: Trond Myklebust --- fs/nfs/inode.c | 1 + fs/nfs/write.c | 14 ++++++++++---- include/linux/nfs_fs.h | 1 + 3 files changed, 12 insertions(+), 4 deletions(-) (limited to 'fs') diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c index 89e98312599d..aa5a831001ab 100644 --- a/fs/nfs/inode.c +++ b/fs/nfs/inode.c @@ -1404,6 +1404,7 @@ static void init_once(void *foo) INIT_LIST_HEAD(&nfsi->access_cache_inode_lru); INIT_RADIX_TREE(&nfsi->nfs_page_tree, GFP_ATOMIC); nfsi->npages = 0; + nfsi->ncommit = 0; atomic_set(&nfsi->silly_count, 1); INIT_HLIST_HEAD(&nfsi->silly_list); init_waitqueue_head(&nfsi->waitqueue); diff --git a/fs/nfs/write.c b/fs/nfs/write.c index 09e97097baaa..dc08a6fbde67 100644 --- a/fs/nfs/write.c +++ b/fs/nfs/write.c @@ -438,6 +438,7 @@ nfs_mark_request_commit(struct nfs_page *req) radix_tree_tag_set(&nfsi->nfs_page_tree, req->wb_index, NFS_PAGE_TAG_COMMIT); + nfsi->ncommit++; spin_unlock(&inode->i_lock); inc_zone_page_state(req->wb_page, NR_UNSTABLE_NFS); inc_bdi_stat(req->wb_page->mapping->backing_dev_info, BDI_RECLAIMABLE); @@ -573,11 +574,15 @@ static int nfs_scan_commit(struct inode *inode, struct list_head *dst, pgoff_t idx_start, unsigned int npages) { struct nfs_inode *nfsi = NFS_I(inode); + int ret; if (!nfs_need_commit(nfsi)) return 0; - return nfs_scan_list(nfsi, dst, idx_start, npages, NFS_PAGE_TAG_COMMIT); + ret = nfs_scan_list(nfsi, dst, idx_start, npages, NFS_PAGE_TAG_COMMIT); + if (ret > 0) + nfsi->ncommit -= ret; + return ret; } #else static inline int nfs_need_commit(struct nfs_inode *nfsi) @@ -642,9 +647,10 @@ static struct nfs_page *nfs_try_to_update_request(struct inode *inode, spin_lock(&inode->i_lock); } - if (nfs_clear_request_commit(req)) - radix_tree_tag_clear(&NFS_I(inode)->nfs_page_tree, - req->wb_index, NFS_PAGE_TAG_COMMIT); + if (nfs_clear_request_commit(req) && + radix_tree_tag_clear(&NFS_I(inode)->nfs_page_tree, + req->wb_index, NFS_PAGE_TAG_COMMIT) != NULL) + NFS_I(inode)->ncommit--; /* Okay, the request matches. Update the region */ if (offset < req->wb_offset) { diff --git a/include/linux/nfs_fs.h b/include/linux/nfs_fs.h index 384ea3ef2863..309217f46e28 100644 --- a/include/linux/nfs_fs.h +++ b/include/linux/nfs_fs.h @@ -166,6 +166,7 @@ struct nfs_inode { struct radix_tree_root nfs_page_tree; unsigned long npages; + unsigned long ncommit; /* Open contexts for shared mmap writes */ struct list_head open_files; -- cgit v1.2.3 From 420e3646bb7d93a571734034249fbb1ae1a7a5c7 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Fri, 19 Feb 2010 17:00:02 -0800 Subject: NFS: Reduce the number of unnecessary COMMIT calls If the caller is doing a non-blocking flush, and there are still writebacks pending on the wire, we can usually defer the COMMIT call until those writes are done. Also ensure that we honour the wbc->nonblocking flag. Signed-off-by: Trond Myklebust --- fs/nfs/write.c | 26 ++++++++++++++++++++++---- 1 file changed, 22 insertions(+), 4 deletions(-) (limited to 'fs') diff --git a/fs/nfs/write.c b/fs/nfs/write.c index dc08a6fbde67..fc05e35da6a9 100644 --- a/fs/nfs/write.c +++ b/fs/nfs/write.c @@ -1415,12 +1415,30 @@ static int nfs_commit_inode(struct inode *inode, int how) static int nfs_commit_unstable_pages(struct inode *inode, struct writeback_control *wbc) { - int ret; + struct nfs_inode *nfsi = NFS_I(inode); + int flags = FLUSH_SYNC; + int ret = 0; - ret = nfs_commit_inode(inode, - wbc->sync_mode == WB_SYNC_ALL ? FLUSH_SYNC : 0); - if (ret >= 0) + /* Don't commit yet if this is a non-blocking flush and there are + * lots of outstanding writes for this mapping. + */ + if (wbc->sync_mode == WB_SYNC_NONE && + nfsi->ncommit <= (nfsi->npages >> 1)) + goto out_mark_dirty; + + if (wbc->nonblocking) + flags = 0; + ret = nfs_commit_inode(inode, flags); + if (ret >= 0) { + if (wbc->sync_mode == WB_SYNC_NONE) { + if (ret < wbc->nr_to_write) + wbc->nr_to_write -= ret; + else + wbc->nr_to_write = 0; + } return 0; + } +out_mark_dirty: __mark_inode_dirty(inode, I_DIRTY_DATASYNC); return ret; } -- cgit v1.2.3 From 5bad5abec4058c5214bfc72cec418348d6747977 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Fri, 19 Feb 2010 17:02:24 -0800 Subject: NFS: Run COMMIT as an asynchronous RPC call when wbc->for_background is set Signed-off-by: Trond Myklebust Acked-by: Peter Zijlstra Acked-by: Wu Fengguang --- fs/nfs/write.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'fs') diff --git a/fs/nfs/write.c b/fs/nfs/write.c index fc05e35da6a9..704e67d392e5 100644 --- a/fs/nfs/write.c +++ b/fs/nfs/write.c @@ -1426,7 +1426,7 @@ static int nfs_commit_unstable_pages(struct inode *inode, struct writeback_contr nfsi->ncommit <= (nfsi->npages >> 1)) goto out_mark_dirty; - if (wbc->nonblocking) + if (wbc->nonblocking || wbc->for_background) flags = 0; ret = nfs_commit_inode(inode, flags); if (ret >= 0) { -- cgit v1.2.3 From 2928db1ffeacc9717c2d5c230d450bcc377b3ae9 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Fri, 19 Feb 2010 17:03:18 -0800 Subject: NFS: Ensure inode is always marked I_DIRTY_DATASYNC, if it has unstable pages Since nfs_scan_list() doesn't wait for locked pages, we have a race in which it is possible to end up with an inode that needs to send a COMMIT, but which does not have the I_DIRTY_DATASYNC flag set. Signed-off-by: Trond Myklebust --- fs/nfs/write.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'fs') diff --git a/fs/nfs/write.c b/fs/nfs/write.c index 704e67d392e5..e40e949598fd 100644 --- a/fs/nfs/write.c +++ b/fs/nfs/write.c @@ -582,6 +582,8 @@ nfs_scan_commit(struct inode *inode, struct list_head *dst, pgoff_t idx_start, u ret = nfs_scan_list(nfsi, dst, idx_start, npages, NFS_PAGE_TAG_COMMIT); if (ret > 0) nfsi->ncommit -= ret; + if (nfs_need_commit(NFS_I(inode))) + __mark_inode_dirty(inode, I_DIRTY_DATASYNC); return ret; } #else -- cgit v1.2.3 From c988950eb6dd6f8e6d98503ca094622729e9aa13 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Fri, 19 Feb 2010 17:03:21 -0800 Subject: NFS: Simplify nfs_wb_page_cancel() In all cases we should be able to just remove the request and call cancel_dirty_page(). Signed-off-by: Trond Myklebust --- fs/nfs/write.c | 39 +-------------------------------------- include/linux/nfs_fs.h | 2 -- 2 files changed, 1 insertion(+), 40 deletions(-) (limited to 'fs') diff --git a/fs/nfs/write.c b/fs/nfs/write.c index e40e949598fd..dc7f5e9a23b4 100644 --- a/fs/nfs/write.c +++ b/fs/nfs/write.c @@ -540,19 +540,6 @@ static int nfs_wait_on_requests_locked(struct inode *inode, pgoff_t idx_start, u return res; } -static void nfs_cancel_commit_list(struct list_head *head) -{ - struct nfs_page *req; - - while(!list_empty(head)) { - req = nfs_list_entry(head->next); - nfs_list_remove_request(req); - nfs_clear_request_commit(req); - nfs_inode_remove_request(req); - nfs_unlock_request(req); - } -} - #if defined(CONFIG_NFS_V3) || defined(CONFIG_NFS_V4) static int nfs_need_commit(struct nfs_inode *nfsi) @@ -1495,13 +1482,6 @@ long nfs_sync_mapping_wait(struct address_space *mapping, struct writeback_contr pages = nfs_scan_commit(inode, &head, idx_start, npages); if (pages == 0) break; - if (how & FLUSH_INVALIDATE) { - spin_unlock(&inode->i_lock); - nfs_cancel_commit_list(&head); - ret = pages; - spin_lock(&inode->i_lock); - continue; - } pages += nfs_scan_commit(inode, &head, 0, 0); spin_unlock(&inode->i_lock); ret = nfs_commit_list(inode, &head, how); @@ -1558,26 +1538,13 @@ int nfs_wb_nocommit(struct inode *inode) int nfs_wb_page_cancel(struct inode *inode, struct page *page) { struct nfs_page *req; - loff_t range_start = page_offset(page); - loff_t range_end = range_start + (loff_t)(PAGE_CACHE_SIZE - 1); - struct writeback_control wbc = { - .bdi = page->mapping->backing_dev_info, - .sync_mode = WB_SYNC_ALL, - .nr_to_write = LONG_MAX, - .range_start = range_start, - .range_end = range_end, - }; int ret = 0; BUG_ON(!PageLocked(page)); for (;;) { req = nfs_page_find_request(page); if (req == NULL) - goto out; - if (test_bit(PG_CLEAN, &req->wb_flags)) { - nfs_release_request(req); break; - } if (nfs_lock_request_dontget(req)) { nfs_inode_remove_request(req); /* @@ -1591,12 +1558,8 @@ int nfs_wb_page_cancel(struct inode *inode, struct page *page) ret = nfs_wait_on_request(req); nfs_release_request(req); if (ret < 0) - goto out; + break; } - if (!PagePrivate(page)) - return 0; - ret = nfs_sync_mapping_wait(page->mapping, &wbc, FLUSH_INVALIDATE); -out: return ret; } diff --git a/include/linux/nfs_fs.h b/include/linux/nfs_fs.h index 309217f46e28..1083134c02ff 100644 --- a/include/linux/nfs_fs.h +++ b/include/linux/nfs_fs.h @@ -34,8 +34,6 @@ #define FLUSH_LOWPRI 8 /* low priority background flush */ #define FLUSH_HIGHPRI 16 /* high priority memory reclaim flush */ #define FLUSH_NOCOMMIT 32 /* Don't send the NFSv3/v4 COMMIT */ -#define FLUSH_INVALIDATE 64 /* Invalidate the page cache */ -#define FLUSH_NOWRITEPAGE 128 /* Don't call writepage() */ #ifdef __KERNEL__ -- cgit v1.2.3 From acdc53b2146c7ee67feb1f02f7bc3020126514b8 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Fri, 19 Feb 2010 17:03:26 -0800 Subject: NFS: Replace __nfs_write_mapping with sync_inode() Now that we have correct COMMIT semantics in writeback_single_inode, we can reduce and simplify nfs_wb_all(). Also replace nfs_wb_nocommit() with a call to filemap_write_and_wait(), which doesn't need to hold the inode->i_mutex. With that done, we can eliminate nfs_write_mapping() altogether. Signed-off-by: Trond Myklebust --- fs/nfs/inode.c | 15 +++++---------- fs/nfs/write.c | 42 +++++------------------------------------- include/linux/nfs_fs.h | 2 -- 3 files changed, 10 insertions(+), 49 deletions(-) (limited to 'fs') diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c index aa5a831001ab..443772df9b17 100644 --- a/fs/nfs/inode.c +++ b/fs/nfs/inode.c @@ -495,17 +495,11 @@ int nfs_getattr(struct vfsmount *mnt, struct dentry *dentry, struct kstat *stat) int need_atime = NFS_I(inode)->cache_validity & NFS_INO_INVALID_ATIME; int err; - /* - * Flush out writes to the server in order to update c/mtime. - * - * Hold the i_mutex to suspend application writes temporarily; - * this prevents long-running writing applications from blocking - * nfs_wb_nocommit. - */ + /* Flush out writes to the server in order to update c/mtime. */ if (S_ISREG(inode->i_mode)) { - mutex_lock(&inode->i_mutex); - nfs_wb_nocommit(inode); - mutex_unlock(&inode->i_mutex); + err = filemap_write_and_wait(inode->i_mapping); + if (err) + goto out; } /* @@ -529,6 +523,7 @@ int nfs_getattr(struct vfsmount *mnt, struct dentry *dentry, struct kstat *stat) generic_fillattr(inode, stat); stat->ino = nfs_compat_user_ino64(NFS_FILEID(inode)); } +out: return err; } diff --git a/fs/nfs/write.c b/fs/nfs/write.c index dc7f5e9a23b4..0b323091b481 100644 --- a/fs/nfs/write.c +++ b/fs/nfs/write.c @@ -1454,7 +1454,6 @@ long nfs_sync_mapping_wait(struct address_space *mapping, struct writeback_contr pgoff_t idx_start, idx_end; unsigned int npages = 0; LIST_HEAD(head); - int nocommit = how & FLUSH_NOCOMMIT; long pages, ret; /* FIXME */ @@ -1471,14 +1470,11 @@ long nfs_sync_mapping_wait(struct address_space *mapping, struct writeback_contr npages = 0; } } - how &= ~FLUSH_NOCOMMIT; spin_lock(&inode->i_lock); do { ret = nfs_wait_on_requests_locked(inode, idx_start, npages); if (ret != 0) continue; - if (nocommit) - break; pages = nfs_scan_commit(inode, &head, idx_start, npages); if (pages == 0) break; @@ -1492,47 +1488,19 @@ long nfs_sync_mapping_wait(struct address_space *mapping, struct writeback_contr return ret; } -static int __nfs_write_mapping(struct address_space *mapping, struct writeback_control *wbc, int how) -{ - int ret; - - ret = nfs_writepages(mapping, wbc); - if (ret < 0) - goto out; - ret = nfs_sync_mapping_wait(mapping, wbc, how); - if (ret < 0) - goto out; - return 0; -out: - __mark_inode_dirty(mapping->host, I_DIRTY_PAGES); - return ret; -} - -/* Two pass sync: first using WB_SYNC_NONE, then WB_SYNC_ALL */ -static int nfs_write_mapping(struct address_space *mapping, int how) +/* + * flush the inode to disk. + */ +int nfs_wb_all(struct inode *inode) { struct writeback_control wbc = { - .bdi = mapping->backing_dev_info, .sync_mode = WB_SYNC_ALL, .nr_to_write = LONG_MAX, .range_start = 0, .range_end = LLONG_MAX, }; - return __nfs_write_mapping(mapping, &wbc, how); -} - -/* - * flush the inode to disk. - */ -int nfs_wb_all(struct inode *inode) -{ - return nfs_write_mapping(inode->i_mapping, 0); -} - -int nfs_wb_nocommit(struct inode *inode) -{ - return nfs_write_mapping(inode->i_mapping, FLUSH_NOCOMMIT); + return sync_inode(inode, &wbc); } int nfs_wb_page_cancel(struct inode *inode, struct page *page) diff --git a/include/linux/nfs_fs.h b/include/linux/nfs_fs.h index 1083134c02ff..93f439e7c5bf 100644 --- a/include/linux/nfs_fs.h +++ b/include/linux/nfs_fs.h @@ -33,7 +33,6 @@ #define FLUSH_STABLE 4 /* commit to stable storage */ #define FLUSH_LOWPRI 8 /* low priority background flush */ #define FLUSH_HIGHPRI 16 /* high priority memory reclaim flush */ -#define FLUSH_NOCOMMIT 32 /* Don't send the NFSv3/v4 COMMIT */ #ifdef __KERNEL__ @@ -478,7 +477,6 @@ extern int nfs_writeback_done(struct rpc_task *, struct nfs_write_data *); */ extern long nfs_sync_mapping_wait(struct address_space *, struct writeback_control *, int); extern int nfs_wb_all(struct inode *inode); -extern int nfs_wb_nocommit(struct inode *inode); extern int nfs_wb_page(struct inode *inode, struct page* page); extern int nfs_wb_page_cancel(struct inode *inode, struct page* page); #if defined(CONFIG_NFS_V3) || defined(CONFIG_NFS_V4) -- cgit v1.2.3 From 7f2f12d963e7c33a93bfb0b22f0178eb1e6a4196 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Fri, 19 Feb 2010 17:03:28 -0800 Subject: NFS: Simplify nfs_wb_page() Signed-off-by: Trond Myklebust --- fs/nfs/write.c | 120 ++++++++++--------------------------------------- include/linux/nfs_fs.h | 1 - 2 files changed, 23 insertions(+), 98 deletions(-) (limited to 'fs') diff --git a/fs/nfs/write.c b/fs/nfs/write.c index 0b323091b481..53ff70e23993 100644 --- a/fs/nfs/write.c +++ b/fs/nfs/write.c @@ -502,44 +502,6 @@ int nfs_reschedule_unstable_write(struct nfs_page *req) } #endif -/* - * Wait for a request to complete. - * - * Interruptible by fatal signals only. - */ -static int nfs_wait_on_requests_locked(struct inode *inode, pgoff_t idx_start, unsigned int npages) -{ - struct nfs_inode *nfsi = NFS_I(inode); - struct nfs_page *req; - pgoff_t idx_end, next; - unsigned int res = 0; - int error; - - if (npages == 0) - idx_end = ~0; - else - idx_end = idx_start + npages - 1; - - next = idx_start; - while (radix_tree_gang_lookup_tag(&nfsi->nfs_page_tree, (void **)&req, next, 1, NFS_PAGE_TAG_LOCKED)) { - if (req->wb_index > idx_end) - break; - - next = req->wb_index + 1; - BUG_ON(!NFS_WBACK_BUSY(req)); - - kref_get(&req->wb_kref); - spin_unlock(&inode->i_lock); - error = nfs_wait_on_request(req); - nfs_release_request(req); - spin_lock(&inode->i_lock); - if (error < 0) - return error; - res++; - } - return res; -} - #if defined(CONFIG_NFS_V3) || defined(CONFIG_NFS_V4) static int nfs_need_commit(struct nfs_inode *nfsi) @@ -1432,7 +1394,7 @@ out_mark_dirty: return ret; } #else -static inline int nfs_commit_list(struct inode *inode, struct list_head *head, int how) +static int nfs_commit_inode(struct inode *inode, int how) { return 0; } @@ -1448,46 +1410,6 @@ int nfs_write_inode(struct inode *inode, struct writeback_control *wbc) return nfs_commit_unstable_pages(inode, wbc); } -long nfs_sync_mapping_wait(struct address_space *mapping, struct writeback_control *wbc, int how) -{ - struct inode *inode = mapping->host; - pgoff_t idx_start, idx_end; - unsigned int npages = 0; - LIST_HEAD(head); - long pages, ret; - - /* FIXME */ - if (wbc->range_cyclic) - idx_start = 0; - else { - idx_start = wbc->range_start >> PAGE_CACHE_SHIFT; - idx_end = wbc->range_end >> PAGE_CACHE_SHIFT; - if (idx_end > idx_start) { - pgoff_t l_npages = 1 + idx_end - idx_start; - npages = l_npages; - if (sizeof(npages) != sizeof(l_npages) && - (pgoff_t)npages != l_npages) - npages = 0; - } - } - spin_lock(&inode->i_lock); - do { - ret = nfs_wait_on_requests_locked(inode, idx_start, npages); - if (ret != 0) - continue; - pages = nfs_scan_commit(inode, &head, idx_start, npages); - if (pages == 0) - break; - pages += nfs_scan_commit(inode, &head, 0, 0); - spin_unlock(&inode->i_lock); - ret = nfs_commit_list(inode, &head, how); - spin_lock(&inode->i_lock); - - } while (ret >= 0); - spin_unlock(&inode->i_lock); - return ret; -} - /* * flush the inode to disk. */ @@ -1531,45 +1453,49 @@ int nfs_wb_page_cancel(struct inode *inode, struct page *page) return ret; } -static int nfs_wb_page_priority(struct inode *inode, struct page *page, - int how) +/* + * Write back all requests on one page - we do this before reading it. + */ +int nfs_wb_page(struct inode *inode, struct page *page) { loff_t range_start = page_offset(page); loff_t range_end = range_start + (loff_t)(PAGE_CACHE_SIZE - 1); struct writeback_control wbc = { - .bdi = page->mapping->backing_dev_info, .sync_mode = WB_SYNC_ALL, - .nr_to_write = LONG_MAX, + .nr_to_write = 0, .range_start = range_start, .range_end = range_end, }; + struct nfs_page *req; + int need_commit; int ret; - do { + while(PagePrivate(page)) { if (clear_page_dirty_for_io(page)) { ret = nfs_writepage_locked(page, &wbc); if (ret < 0) goto out_error; - } else if (!PagePrivate(page)) + } + req = nfs_find_and_lock_request(page); + if (!req) break; - ret = nfs_sync_mapping_wait(page->mapping, &wbc, how); - if (ret < 0) + if (IS_ERR(req)) { + ret = PTR_ERR(req); goto out_error; - } while (PagePrivate(page)); + } + need_commit = test_bit(PG_CLEAN, &req->wb_flags); + nfs_clear_page_tag_locked(req); + if (need_commit) { + ret = nfs_commit_inode(inode, FLUSH_SYNC); + if (ret < 0) + goto out_error; + } + } return 0; out_error: - __mark_inode_dirty(inode, I_DIRTY_PAGES); return ret; } -/* - * Write back all requests on one page - we do this before reading it. - */ -int nfs_wb_page(struct inode *inode, struct page* page) -{ - return nfs_wb_page_priority(inode, page, FLUSH_STABLE); -} - #ifdef CONFIG_MIGRATION int nfs_migrate_page(struct address_space *mapping, struct page *newpage, struct page *page) diff --git a/include/linux/nfs_fs.h b/include/linux/nfs_fs.h index 93f439e7c5bf..b789d85bff82 100644 --- a/include/linux/nfs_fs.h +++ b/include/linux/nfs_fs.h @@ -475,7 +475,6 @@ extern int nfs_writeback_done(struct rpc_task *, struct nfs_write_data *); * Try to write back everything synchronously (but check the * return value!) */ -extern long nfs_sync_mapping_wait(struct address_space *, struct writeback_control *, int); extern int nfs_wb_all(struct inode *inode); extern int nfs_wb_page(struct inode *inode, struct page* page); extern int nfs_wb_page_cancel(struct inode *inode, struct page* page); -- cgit v1.2.3 From 5cf95214ccb915591e2214f81de4659302d3e452 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Fri, 19 Feb 2010 17:03:29 -0800 Subject: NFS: Clean up nfs_sync_mapping Remove the redundant call to filemap_write_and_wait(). Signed-off-by: Trond Myklebust --- fs/nfs/inode.c | 14 +++++--------- 1 file changed, 5 insertions(+), 9 deletions(-) (limited to 'fs') diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c index 443772df9b17..e8b41170d295 100644 --- a/fs/nfs/inode.c +++ b/fs/nfs/inode.c @@ -114,16 +114,12 @@ void nfs_clear_inode(struct inode *inode) */ int nfs_sync_mapping(struct address_space *mapping) { - int ret; + int ret = 0; - if (mapping->nrpages == 0) - return 0; - unmap_mapping_range(mapping, 0, 0, 0); - ret = filemap_write_and_wait(mapping); - if (ret != 0) - goto out; - ret = nfs_wb_all(mapping->host); -out: + if (mapping->nrpages != 0) { + unmap_mapping_range(mapping, 0, 0, 0); + ret = nfs_wb_all(mapping->host); + } return ret; } -- cgit v1.2.3 From 1cda707d52e51a6cafac0aef12d2bd7052d572e6 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Fri, 19 Feb 2010 17:03:30 -0800 Subject: NFS: Remove requirement for inode->i_mutex from nfs_invalidate_mapping Signed-off-by: Trond Myklebust --- fs/nfs/dir.c | 2 +- fs/nfs/inode.c | 41 +---------------------------------------- fs/nfs/symlink.c | 2 +- include/linux/nfs_fs.h | 1 - 4 files changed, 3 insertions(+), 43 deletions(-) (limited to 'fs') diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c index 3c7f03b669fb..a1f6b4438fb1 100644 --- a/fs/nfs/dir.c +++ b/fs/nfs/dir.c @@ -560,7 +560,7 @@ static int nfs_readdir(struct file *filp, void *dirent, filldir_t filldir) desc->entry = &my_entry; nfs_block_sillyrename(dentry); - res = nfs_revalidate_mapping_nolock(inode, filp->f_mapping); + res = nfs_revalidate_mapping(inode, filp->f_mapping); if (res < 0) goto out; diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c index e8b41170d295..dbaaf7d2a188 100644 --- a/fs/nfs/inode.c +++ b/fs/nfs/inode.c @@ -754,7 +754,7 @@ int nfs_revalidate_inode(struct nfs_server *server, struct inode *inode) return __nfs_revalidate_inode(server, inode); } -static int nfs_invalidate_mapping_nolock(struct inode *inode, struct address_space *mapping) +static int nfs_invalidate_mapping(struct inode *inode, struct address_space *mapping) { struct nfs_inode *nfsi = NFS_I(inode); @@ -775,49 +775,10 @@ static int nfs_invalidate_mapping_nolock(struct inode *inode, struct address_spa return 0; } -static int nfs_invalidate_mapping(struct inode *inode, struct address_space *mapping) -{ - int ret = 0; - - mutex_lock(&inode->i_mutex); - if (NFS_I(inode)->cache_validity & NFS_INO_INVALID_DATA) { - ret = nfs_sync_mapping(mapping); - if (ret == 0) - ret = nfs_invalidate_mapping_nolock(inode, mapping); - } - mutex_unlock(&inode->i_mutex); - return ret; -} - -/** - * nfs_revalidate_mapping_nolock - Revalidate the pagecache - * @inode - pointer to host inode - * @mapping - pointer to mapping - */ -int nfs_revalidate_mapping_nolock(struct inode *inode, struct address_space *mapping) -{ - struct nfs_inode *nfsi = NFS_I(inode); - int ret = 0; - - if ((nfsi->cache_validity & NFS_INO_REVAL_PAGECACHE) - || nfs_attribute_timeout(inode) || NFS_STALE(inode)) { - ret = __nfs_revalidate_inode(NFS_SERVER(inode), inode); - if (ret < 0) - goto out; - } - if (nfsi->cache_validity & NFS_INO_INVALID_DATA) - ret = nfs_invalidate_mapping_nolock(inode, mapping); -out: - return ret; -} - /** * nfs_revalidate_mapping - Revalidate the pagecache * @inode - pointer to host inode * @mapping - pointer to mapping - * - * This version of the function will take the inode->i_mutex and attempt to - * flush out all dirty data if it needs to invalidate the page cache. */ int nfs_revalidate_mapping(struct inode *inode, struct address_space *mapping) { diff --git a/fs/nfs/symlink.c b/fs/nfs/symlink.c index 412738dbfbc7..2ea9e5c27e55 100644 --- a/fs/nfs/symlink.c +++ b/fs/nfs/symlink.c @@ -50,7 +50,7 @@ static void *nfs_follow_link(struct dentry *dentry, struct nameidata *nd) struct page *page; void *err; - err = ERR_PTR(nfs_revalidate_mapping_nolock(inode, inode->i_mapping)); + err = ERR_PTR(nfs_revalidate_mapping(inode, inode->i_mapping)); if (err) goto read_failed; page = read_cache_page(&inode->i_data, 0, diff --git a/include/linux/nfs_fs.h b/include/linux/nfs_fs.h index b789d85bff82..1a0b85aa151e 100644 --- a/include/linux/nfs_fs.h +++ b/include/linux/nfs_fs.h @@ -347,7 +347,6 @@ extern int nfs_attribute_timeout(struct inode *inode); extern int nfs_revalidate_inode(struct nfs_server *server, struct inode *inode); extern int __nfs_revalidate_inode(struct nfs_server *, struct inode *); extern int nfs_revalidate_mapping(struct inode *inode, struct address_space *mapping); -extern int nfs_revalidate_mapping_nolock(struct inode *inode, struct address_space *mapping); extern int nfs_setattr(struct dentry *, struct iattr *); extern void nfs_setattr_update_inode(struct inode *inode, struct iattr *attr); extern struct nfs_open_context *get_nfs_open_context(struct nfs_open_context *ctx); -- cgit v1.2.3