diff options
author | Linus Torvalds | 2017-09-11 22:01:44 -0700 |
---|---|---|
committer | Linus Torvalds | 2017-09-11 22:01:44 -0700 |
commit | 8e7757d83d07cc77ee2661e9615a2f9f4ce540cd (patch) | |
tree | b6c15efae2c117e7fbbe56ad0aaa2859d1cd35a8 /fs/nfs | |
parent | dd198ce7141aa8dd9ffcc9549de422fb055508de (diff) | |
parent | 1bd5d6d08ea7ed0794c8a3908383d6d6fc202cdd (diff) |
Merge tag 'nfs-for-4.14-1' of git://git.linux-nfs.org/projects/trondmy/linux-nfs
Pull NFS client updates from Trond Myklebust:
"Hightlights include:
Stable bugfixes:
- Fix mirror allocation in the writeback code to avoid a use after
free
- Fix the O_DSYNC writes to use the correct byte range
- Fix 2 use after free issues in the I/O code
Features:
- Writeback fixes to split up the inode->i_lock in order to reduce
contention
- RPC client receive fixes to reduce the amount of time the
xprt->transport_lock is held when receiving data from a socket into
am XDR buffer.
- Ditto fixes to reduce contention between call side users of the
rdma rb_lock, and its use in rpcrdma_reply_handler.
- Re-arrange rdma stats to reduce false cacheline sharing.
- Various rdma cleanups and optimisations.
- Refactor the NFSv4.1 exchange id code and clean up the code.
- Const-ify all instances of struct rpc_xprt_ops
Bugfixes:
- Fix the NFSv2 'sec=' mount option.
- NFSv4.1: don't use machine credentials for CLOSE when using
'sec=sys'
- Fix the NFSv3 GRANT callback when the port changes on the server.
- Fix livelock issues with COMMIT
- NFSv4: Use correct inode in _nfs4_opendata_to_nfs4_state() when
doing and NFSv4.1 open by filehandle"
* tag 'nfs-for-4.14-1' of git://git.linux-nfs.org/projects/trondmy/linux-nfs: (69 commits)
NFS: Count the bytes of skipped subrequests in nfs_lock_and_join_requests()
NFS: Don't hold the group lock when calling nfs_release_request()
NFS: Remove pnfs_generic_transfer_commit_list()
NFS: nfs_lock_and_join_requests and nfs_scan_commit_list can deadlock
NFS: Fix 2 use after free issues in the I/O code
NFS: Sync the correct byte range during synchronous writes
lockd: Delete an error message for a failed memory allocation in reclaimer()
NFS: remove jiffies field from access cache
NFS: flush data when locking a file to ensure cache coherence for mmap.
SUNRPC: remove some dead code.
NFS: don't expect errors from mempool_alloc().
xprtrdma: Use xprt_pin_rqst in rpcrdma_reply_handler
xprtrdma: Re-arrange struct rx_stats
NFS: Fix NFSv2 security settings
NFSv4.1: don't use machine credentials for CLOSE when using 'sec=sys'
SUNRPC: ECONNREFUSED should cause a rebind.
NFS: Remove unused parameter gfp_flags from nfs_pageio_init()
NFSv4: Fix up mirror allocation
SUNRPC: Add a separate spinlock to protect the RPC request receive list
SUNRPC: Cleanup xs_tcp_read_common()
...
Diffstat (limited to 'fs/nfs')
-rw-r--r-- | fs/nfs/callback_proc.c | 2 | ||||
-rw-r--r-- | fs/nfs/delegation.c | 2 | ||||
-rw-r--r-- | fs/nfs/dir.c | 4 | ||||
-rw-r--r-- | fs/nfs/direct.c | 4 | ||||
-rw-r--r-- | fs/nfs/file.c | 17 | ||||
-rw-r--r-- | fs/nfs/inode.c | 10 | ||||
-rw-r--r-- | fs/nfs/internal.h | 1 | ||||
-rw-r--r-- | fs/nfs/nfs4_fs.h | 11 | ||||
-rw-r--r-- | fs/nfs/nfs4proc.c | 274 | ||||
-rw-r--r-- | fs/nfs/pagelist.c | 170 | ||||
-rw-r--r-- | fs/nfs/pnfs.c | 43 | ||||
-rw-r--r-- | fs/nfs/pnfs.h | 2 | ||||
-rw-r--r-- | fs/nfs/pnfs_nfs.c | 44 | ||||
-rw-r--r-- | fs/nfs/read.c | 2 | ||||
-rw-r--r-- | fs/nfs/super.c | 12 | ||||
-rw-r--r-- | fs/nfs/write.c | 457 |
16 files changed, 490 insertions, 565 deletions
diff --git a/fs/nfs/callback_proc.c b/fs/nfs/callback_proc.c index 5427cdf04c5a..14358de173fb 100644 --- a/fs/nfs/callback_proc.c +++ b/fs/nfs/callback_proc.c @@ -51,7 +51,7 @@ __be32 nfs4_callback_getattr(void *argp, void *resp, goto out_iput; res->size = i_size_read(inode); res->change_attr = delegation->change_attr; - if (nfsi->nrequests != 0) + if (nfs_have_writebacks(inode)) res->change_attr++; res->ctime = inode->i_ctime; res->mtime = inode->i_mtime; diff --git a/fs/nfs/delegation.c b/fs/nfs/delegation.c index d7df5e67b0c1..606dd3871f66 100644 --- a/fs/nfs/delegation.c +++ b/fs/nfs/delegation.c @@ -1089,7 +1089,7 @@ bool nfs4_delegation_flush_on_close(const struct inode *inode) delegation = rcu_dereference(nfsi->delegation); if (delegation == NULL || !(delegation->type & FMODE_WRITE)) goto out; - if (nfsi->nrequests < delegation->pagemod_limit) + if (atomic_long_read(&nfsi->nrequests) < delegation->pagemod_limit) ret = false; out: rcu_read_unlock(); diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c index 3522b1249019..5ceaeb1f6fb6 100644 --- a/fs/nfs/dir.c +++ b/fs/nfs/dir.c @@ -2260,7 +2260,6 @@ static int nfs_access_get_cached(struct inode *inode, struct rpc_cred *cred, str spin_lock(&inode->i_lock); retry = false; } - res->jiffies = cache->jiffies; res->cred = cache->cred; res->mask = cache->mask; list_move_tail(&cache->lru, &nfsi->access_cache_entry_lru); @@ -2296,7 +2295,6 @@ static int nfs_access_get_cached_rcu(struct inode *inode, struct rpc_cred *cred, goto out; if (nfs_check_cache_invalid(inode, NFS_INO_INVALID_ACCESS)) goto out; - res->jiffies = cache->jiffies; res->cred = cache->cred; res->mask = cache->mask; err = 0; @@ -2344,7 +2342,6 @@ void nfs_access_add_cache(struct inode *inode, struct nfs_access_entry *set) if (cache == NULL) return; RB_CLEAR_NODE(&cache->rb_node); - cache->jiffies = set->jiffies; cache->cred = get_rpccred(set->cred); cache->mask = set->mask; @@ -2432,7 +2429,6 @@ static int nfs_do_access(struct inode *inode, struct rpc_cred *cred, int mask) cache.mask = NFS_MAY_LOOKUP | NFS_MAY_EXECUTE | NFS_MAY_WRITE | NFS_MAY_READ; cache.cred = cred; - cache.jiffies = jiffies; status = NFS_PROTO(inode)->access(inode, &cache); if (status != 0) { if (status == -ESTALE) { diff --git a/fs/nfs/direct.c b/fs/nfs/direct.c index 6fb9fad2d1e6..d2972d537469 100644 --- a/fs/nfs/direct.c +++ b/fs/nfs/direct.c @@ -616,13 +616,13 @@ nfs_direct_write_scan_commit_list(struct inode *inode, struct list_head *list, struct nfs_commit_info *cinfo) { - spin_lock(&cinfo->inode->i_lock); + mutex_lock(&NFS_I(cinfo->inode)->commit_mutex); #ifdef CONFIG_NFS_V4_1 if (cinfo->ds != NULL && cinfo->ds->nwritten != 0) NFS_SERVER(inode)->pnfs_curr_ld->recover_commit_reqs(list, cinfo); #endif nfs_scan_commit_list(&cinfo->mds->list, list, cinfo, 0); - spin_unlock(&cinfo->inode->i_lock); + mutex_unlock(&NFS_I(cinfo->inode)->commit_mutex); } static void nfs_direct_write_reschedule(struct nfs_direct_req *dreq) diff --git a/fs/nfs/file.c b/fs/nfs/file.c index af330c31f627..a385d1c3f146 100644 --- a/fs/nfs/file.c +++ b/fs/nfs/file.c @@ -631,11 +631,11 @@ ssize_t nfs_file_write(struct kiocb *iocb, struct iov_iter *from) if (result <= 0) goto out; - result = generic_write_sync(iocb, result); - if (result < 0) - goto out; written = result; iocb->ki_pos += written; + result = generic_write_sync(iocb, written); + if (result < 0) + goto out; /* Return error values */ if (nfs_need_check_write(file, inode)) { @@ -744,15 +744,18 @@ do_setlk(struct file *filp, int cmd, struct file_lock *fl, int is_local) goto out; /* - * Revalidate the cache if the server has time stamps granular - * enough to detect subsecond changes. Otherwise, clear the - * cache to prevent missing any changes. + * Invalidate cache to prevent missing any changes. If + * the file is mapped, clear the page cache as well so + * those mappings will be loaded. * * This makes locking act as a cache coherency point. */ nfs_sync_mapping(filp->f_mapping); - if (!NFS_PROTO(inode)->have_delegation(inode, FMODE_READ)) + if (!NFS_PROTO(inode)->have_delegation(inode, FMODE_READ)) { nfs_zap_caches(inode); + if (mapping_mapped(filp->f_mapping)) + nfs_revalidate_mapping(inode, filp->f_mapping); + } out: return status; } diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c index 109279d6d91b..134d9f560240 100644 --- a/fs/nfs/inode.c +++ b/fs/nfs/inode.c @@ -1285,7 +1285,6 @@ static bool nfs_file_has_buffered_writers(struct nfs_inode *nfsi) static unsigned long nfs_wcc_update_inode(struct inode *inode, struct nfs_fattr *fattr) { - struct nfs_inode *nfsi = NFS_I(inode); unsigned long ret = 0; if ((fattr->valid & NFS_ATTR_FATTR_PRECHANGE) @@ -1315,7 +1314,7 @@ static unsigned long nfs_wcc_update_inode(struct inode *inode, struct nfs_fattr if ((fattr->valid & NFS_ATTR_FATTR_PRESIZE) && (fattr->valid & NFS_ATTR_FATTR_SIZE) && i_size_read(inode) == nfs_size_to_loff_t(fattr->pre_size) - && nfsi->nrequests == 0) { + && !nfs_have_writebacks(inode)) { i_size_write(inode, nfs_size_to_loff_t(fattr->size)); ret |= NFS_INO_INVALID_ATTR; } @@ -1823,7 +1822,7 @@ static int nfs_update_inode(struct inode *inode, struct nfs_fattr *fattr) if (new_isize != cur_isize) { /* Do we perhaps have any outstanding writes, or has * the file grown beyond our last write? */ - if (nfsi->nrequests == 0 || new_isize > cur_isize) { + if (!nfs_have_writebacks(inode) || new_isize > cur_isize) { i_size_write(inode, new_isize); if (!have_writers) invalid |= NFS_INO_INVALID_ATTR|NFS_INO_INVALID_DATA; @@ -2012,10 +2011,11 @@ static void init_once(void *foo) INIT_LIST_HEAD(&nfsi->access_cache_entry_lru); INIT_LIST_HEAD(&nfsi->access_cache_inode_lru); INIT_LIST_HEAD(&nfsi->commit_info.list); - nfsi->nrequests = 0; - nfsi->commit_info.ncommit = 0; + atomic_long_set(&nfsi->nrequests, 0); + atomic_long_set(&nfsi->commit_info.ncommit, 0); atomic_set(&nfsi->commit_info.rpcs_out, 0); init_rwsem(&nfsi->rmdir_sem); + mutex_init(&nfsi->commit_mutex); nfs4_init_once(nfsi); } diff --git a/fs/nfs/internal.h b/fs/nfs/internal.h index dc456416d2be..68cc22083639 100644 --- a/fs/nfs/internal.h +++ b/fs/nfs/internal.h @@ -251,7 +251,6 @@ int nfs_iocounter_wait(struct nfs_lock_context *l_ctx); extern const struct nfs_pageio_ops nfs_pgio_rw_ops; struct nfs_pgio_header *nfs_pgio_header_alloc(const struct nfs_rw_ops *); void nfs_pgio_header_free(struct nfs_pgio_header *); -void nfs_pgio_data_destroy(struct nfs_pgio_header *); int nfs_generic_pgio(struct nfs_pageio_descriptor *, struct nfs_pgio_header *); int nfs_initiate_pgio(struct rpc_clnt *clnt, struct nfs_pgio_header *hdr, struct rpc_cred *cred, const struct nfs_rpc_ops *rpc_ops, diff --git a/fs/nfs/nfs4_fs.h b/fs/nfs/nfs4_fs.h index 40bd05f05e74..ac4f10b7f6c1 100644 --- a/fs/nfs/nfs4_fs.h +++ b/fs/nfs/nfs4_fs.h @@ -303,6 +303,17 @@ _nfs4_state_protect(struct nfs_client *clp, unsigned long sp4_mode, struct rpc_cred *newcred = NULL; rpc_authflavor_t flavor; + if (sp4_mode == NFS_SP4_MACH_CRED_CLEANUP || + sp4_mode == NFS_SP4_MACH_CRED_PNFS_CLEANUP) { + /* Using machine creds for cleanup operations + * is only relevent if the client credentials + * might expire. So don't bother for + * RPC_AUTH_UNIX. If file was only exported to + * sec=sys, the PUTFH would fail anyway. + */ + if ((*clntp)->cl_auth->au_flavor == RPC_AUTH_UNIX) + return false; + } if (test_bit(sp4_mode, &clp->cl_sp4_flags)) { spin_lock(&clp->cl_lock); if (clp->cl_machine_cred != NULL) diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index d90132642340..6c61e2b99635 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -1659,12 +1659,52 @@ update: return state; } +static struct inode * +nfs4_opendata_get_inode(struct nfs4_opendata *data) +{ + struct inode *inode; + + switch (data->o_arg.claim) { + case NFS4_OPEN_CLAIM_NULL: + case NFS4_OPEN_CLAIM_DELEGATE_CUR: + case NFS4_OPEN_CLAIM_DELEGATE_PREV: + if (!(data->f_attr.valid & NFS_ATTR_FATTR)) + return ERR_PTR(-EAGAIN); + inode = nfs_fhget(data->dir->d_sb, &data->o_res.fh, + &data->f_attr, data->f_label); + break; + default: + inode = d_inode(data->dentry); + ihold(inode); + nfs_refresh_inode(inode, &data->f_attr); + } + return inode; +} + static struct nfs4_state * -_nfs4_opendata_to_nfs4_state(struct nfs4_opendata *data) +nfs4_opendata_find_nfs4_state(struct nfs4_opendata *data) { + struct nfs4_state *state; struct inode *inode; - struct nfs4_state *state = NULL; - int ret; + + inode = nfs4_opendata_get_inode(data); + if (IS_ERR(inode)) + return ERR_CAST(inode); + if (data->state != NULL && data->state->inode == inode) { + state = data->state; + atomic_inc(&state->count); + } else + state = nfs4_get_open_state(inode, data->owner); + iput(inode); + if (state == NULL) + state = ERR_PTR(-ENOMEM); + return state; +} + +static struct nfs4_state * +_nfs4_opendata_to_nfs4_state(struct nfs4_opendata *data) +{ + struct nfs4_state *state; if (!data->rpc_done) { state = nfs4_try_open_cached(data); @@ -1672,29 +1712,17 @@ _nfs4_opendata_to_nfs4_state(struct nfs4_opendata *data) goto out; } - ret = -EAGAIN; - if (!(data->f_attr.valid & NFS_ATTR_FATTR)) - goto err; - inode = nfs_fhget(data->dir->d_sb, &data->o_res.fh, &data->f_attr, data->f_label); - ret = PTR_ERR(inode); - if (IS_ERR(inode)) - goto err; - ret = -ENOMEM; - state = nfs4_get_open_state(inode, data->owner); - if (state == NULL) - goto err_put_inode; + state = nfs4_opendata_find_nfs4_state(data); + if (IS_ERR(state)) + goto out; + if (data->o_res.delegation_type != 0) nfs4_opendata_check_deleg(data, state); update_open_stateid(state, &data->o_res.stateid, NULL, data->o_arg.fmode); - iput(inode); out: nfs_release_seqid(data->o_arg.seqid); return state; -err_put_inode: - iput(inode); -err: - return ERR_PTR(ret); } static struct nfs4_state * @@ -2071,7 +2099,6 @@ static void nfs4_open_prepare(struct rpc_task *task, void *calldata) data->o_arg.open_bitmap = &nfs4_open_noattr_bitmap[0]; case NFS4_OPEN_CLAIM_FH: task->tk_msg.rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_OPEN_NOATTR]; - nfs_copy_fh(&data->o_res.fh, data->o_arg.fh); } data->timestamp = jiffies; if (nfs4_setup_sequence(data->o_arg.server->nfs_client, @@ -2258,7 +2285,6 @@ static int nfs4_opendata_access(struct rpc_cred *cred, mask = NFS4_ACCESS_READ; cache.cred = cred; - cache.jiffies = jiffies; nfs_access_set_mask(&cache, opendata->o_res.access_result); nfs_access_add_cache(state->inode, &cache); @@ -7318,7 +7344,9 @@ static int nfs4_sp4_select_mode(struct nfs_client *clp, 1 << (OP_DESTROY_SESSION - 32) | 1 << (OP_DESTROY_CLIENTID - 32) }; + unsigned long flags = 0; unsigned int i; + int ret = 0; if (sp->how == SP4_MACH_CRED) { /* Print state protect result */ @@ -7334,7 +7362,8 @@ static int nfs4_sp4_select_mode(struct nfs_client *clp, for (i = 0; i < NFS4_OP_MAP_NUM_WORDS; i++) { if (sp->enforce.u.words[i] & ~supported_enforce[i]) { dfprintk(MOUNT, "sp4_mach_cred: disabled\n"); - return -EINVAL; + ret = -EINVAL; + goto out; } } @@ -7353,10 +7382,11 @@ static int nfs4_sp4_select_mode(struct nfs_client *clp, test_bit(OP_DESTROY_CLIENTID, sp->enforce.u.longs)) { dfprintk(MOUNT, "sp4_mach_cred:\n"); dfprintk(MOUNT, " minimal mode enabled\n"); - set_bit(NFS_SP4_MACH_CRED_MINIMAL, &clp->cl_sp4_flags); + __set_bit(NFS_SP4_MACH_CRED_MINIMAL, &flags); } else { dfprintk(MOUNT, "sp4_mach_cred: disabled\n"); - return -EINVAL; + ret = -EINVAL; + goto out; } if (test_bit(OP_CLOSE, sp->allow.u.longs) && @@ -7364,110 +7394,46 @@ static int nfs4_sp4_select_mode(struct nfs_client *clp, test_bit(OP_DELEGRETURN, sp->allow.u.longs) && test_bit(OP_LOCKU, sp->allow.u.longs)) { dfprintk(MOUNT, " cleanup mode enabled\n"); - set_bit(NFS_SP4_MACH_CRED_CLEANUP, &clp->cl_sp4_flags); + __set_bit(NFS_SP4_MACH_CRED_CLEANUP, &flags); } if (test_bit(OP_LAYOUTRETURN, sp->allow.u.longs)) { dfprintk(MOUNT, " pnfs cleanup mode enabled\n"); - set_bit(NFS_SP4_MACH_CRED_PNFS_CLEANUP, - &clp->cl_sp4_flags); + __set_bit(NFS_SP4_MACH_CRED_PNFS_CLEANUP, &flags); } if (test_bit(OP_SECINFO, sp->allow.u.longs) && test_bit(OP_SECINFO_NO_NAME, sp->allow.u.longs)) { dfprintk(MOUNT, " secinfo mode enabled\n"); - set_bit(NFS_SP4_MACH_CRED_SECINFO, &clp->cl_sp4_flags); + __set_bit(NFS_SP4_MACH_CRED_SECINFO, &flags); } if (test_bit(OP_TEST_STATEID, sp->allow.u.longs) && test_bit(OP_FREE_STATEID, sp->allow.u.longs)) { dfprintk(MOUNT, " stateid mode enabled\n"); - set_bit(NFS_SP4_MACH_CRED_STATEID, &clp->cl_sp4_flags); + __set_bit(NFS_SP4_MACH_CRED_STATEID, &flags); } if (test_bit(OP_WRITE, sp->allow.u.longs)) { dfprintk(MOUNT, " write mode enabled\n"); - set_bit(NFS_SP4_MACH_CRED_WRITE, &clp->cl_sp4_flags); + __set_bit(NFS_SP4_MACH_CRED_WRITE, &flags); } if (test_bit(OP_COMMIT, sp->allow.u.longs)) { dfprintk(MOUNT, " commit mode enabled\n"); - set_bit(NFS_SP4_MACH_CRED_COMMIT, &clp->cl_sp4_flags); + __set_bit(NFS_SP4_MACH_CRED_COMMIT, &flags); } } - +out: + clp->cl_sp4_flags = flags; return 0; } struct nfs41_exchange_id_data { struct nfs41_exchange_id_res res; struct nfs41_exchange_id_args args; - struct rpc_xprt *xprt; - int rpc_status; }; -static void nfs4_exchange_id_done(struct rpc_task *task, void *data) -{ - struct nfs41_exchange_id_data *cdata = - (struct nfs41_exchange_id_data *)data; - struct nfs_client *clp = cdata->args.client; - int status = task->tk_status; - - trace_nfs4_exchange_id(clp, status); - - if (status == 0) - status = nfs4_check_cl_exchange_flags(cdata->res.flags); - - if (cdata->xprt && status == 0) { - status = nfs4_detect_session_trunking(clp, &cdata->res, - cdata->xprt); - goto out; - } - - if (status == 0) - status = nfs4_sp4_select_mode(clp, &cdata->res.state_protect); - - if (status == 0) { - clp->cl_clientid = cdata->res.clientid; - clp->cl_exchange_flags = cdata->res.flags; - clp->cl_seqid = cdata->res.seqid; - /* Client ID is not confirmed */ - if (!(cdata->res.flags & EXCHGID4_FLAG_CONFIRMED_R)) - clear_bit(NFS4_SESSION_ESTABLISHED, - &clp->cl_session->session_state); - - kfree(clp->cl_serverowner); - clp->cl_serverowner = cdata->res.server_owner; - cdata->res.server_owner = NULL; - - /* use the most recent implementation id */ - kfree(clp->cl_implid); - clp->cl_implid = cdata->res.impl_id; - cdata->res.impl_id = NULL; - - if (clp->cl_serverscope != NULL && - !nfs41_same_server_scope(clp->cl_serverscope, - cdata->res.server_scope)) { - dprintk("%s: server_scope mismatch detected\n", - __func__); - set_bit(NFS4CLNT_SERVER_SCOPE_MISMATCH, &clp->cl_state); - kfree(clp->cl_serverscope); - clp->cl_serverscope = NULL; - } - - if (clp->cl_serverscope == NULL) { - clp->cl_serverscope = cdata->res.server_scope; - cdata->res.server_scope = NULL; - } - /* Save the EXCHANGE_ID verifier session trunk tests */ - memcpy(clp->cl_confirm.data, cdata->args.verifier.data, - sizeof(clp->cl_confirm.data)); - } -out: - cdata->rpc_status = status; - return; -} - static void nfs4_exchange_id_release(void *data) { struct nfs41_exchange_id_data *cdata = @@ -7481,7 +7447,6 @@ static void nfs4_exchange_id_release(void *data) } static const struct rpc_call_ops nfs4_exchange_id_call_ops = { - .rpc_call_done = nfs4_exchange_id_done, .rpc_release = nfs4_exchange_id_release, }; @@ -7490,7 +7455,8 @@ static const struct rpc_call_ops nfs4_exchange_id_call_ops = { * * Wrapper for EXCHANGE_ID operation. */ -static int _nfs4_proc_exchange_id(struct nfs_client *clp, struct rpc_cred *cred, +static struct rpc_task * +nfs4_run_exchange_id(struct nfs_client *clp, struct rpc_cred *cred, u32 sp4_how, struct rpc_xprt *xprt) { struct rpc_message msg = { @@ -7504,17 +7470,15 @@ static int _nfs4_proc_exchange_id(struct nfs_client *clp, struct rpc_cred *cred, .flags = RPC_TASK_TIMEOUT, }; struct nfs41_exchange_id_data *calldata; - struct rpc_task *task; int status; if (!atomic_inc_not_zero(&clp->cl_count)) - return -EIO; + return ERR_PTR(-EIO); + status = -ENOMEM; calldata = kzalloc(sizeof(*calldata), GFP_NOFS); - if (!calldata) { - nfs_put_client(clp); - return -ENOMEM; - } + if (!calldata) + goto out; nfs4_init_boot_verifier(clp, &calldata->args.verifier); @@ -7553,34 +7517,22 @@ static int _nfs4_proc_exchange_id(struct nfs_client *clp, struct rpc_cred *cred, goto out_impl_id; } if (xprt) { - calldata->xprt = xprt; task_setup_data.rpc_xprt = xprt; task_setup_data.flags |= RPC_TASK_SOFTCONN; memcpy(calldata->args.verifier.data, clp->cl_confirm.data, sizeof(calldata->args.verifier.data)); } calldata->args.client = clp; -#ifdef CONFIG_NFS_V4_1_MIGRATION calldata->args.flags = EXCHGID4_FLAG_SUPP_MOVED_REFER | - EXCHGID4_FLAG_BIND_PRINC_STATEID | - EXCHGID4_FLAG_SUPP_MOVED_MIGR, -#else - calldata->args.flags = EXCHGID4_FLAG_SUPP_MOVED_REFER | - EXCHGID4_FLAG_BIND_PRINC_STATEID, + EXCHGID4_FLAG_BIND_PRINC_STATEID; +#ifdef CONFIG_NFS_V4_1_MIGRATION + calldata->args.flags |= EXCHGID4_FLAG_SUPP_MOVED_MIGR; #endif msg.rpc_argp = &calldata->args; msg.rpc_resp = &calldata->res; task_setup_data.callback_data = calldata; - task = rpc_run_task(&task_setup_data); - if (IS_ERR(task)) - return PTR_ERR(task); - - status = calldata->rpc_status; - - rpc_put_task(task); -out: - return status; + return rpc_run_task(&task_setup_data); out_impl_id: kfree(calldata->res.impl_id); @@ -7590,8 +7542,69 @@ out_server_owner: kfree(calldata->res.server_owner); out_calldata: kfree(calldata); +out: nfs_put_client(clp); - goto out; + return ERR_PTR(status); +} + +/* + * _nfs4_proc_exchange_id() + * + * Wrapper for EXCHANGE_ID operation. + */ +static int _nfs4_proc_exchange_id(struct nfs_client *clp, struct rpc_cred *cred, + u32 sp4_how) +{ + struct rpc_task *task; + struct nfs41_exchange_id_args *argp; + struct nfs41_exchange_id_res *resp; + int status; + + task = nfs4_run_exchange_id(clp, cred, sp4_how, NULL); + if (IS_ERR(task)) + return PTR_ERR(task); + + argp = task->tk_msg.rpc_argp; + resp = task->tk_msg.rpc_resp; + status = task->tk_status; + if (status != 0) + goto out; + + status = nfs4_check_cl_exchange_flags(resp->flags); + if (status != 0) + goto out; + + status = nfs4_sp4_select_mode(clp, &resp->state_protect); + if (status != 0) + goto out; + + clp->cl_clientid = resp->clientid; + clp->cl_exchange_flags = resp->flags; + clp->cl_seqid = resp->seqid; + /* Client ID is not confirmed */ + if (!(resp->flags & EXCHGID4_FLAG_CONFIRMED_R)) + clear_bit(NFS4_SESSION_ESTABLISHED, + &clp->cl_session->session_state); + + if (clp->cl_serverscope != NULL && + !nfs41_same_server_scope(clp->cl_serverscope, + resp->server_scope)) { + dprintk("%s: server_scope mismatch detected\n", + __func__); + set_bit(NFS4CLNT_SERVER_SCOPE_MISMATCH, &clp->cl_state); + } + + swap(clp->cl_serverowner, resp->server_owner); + swap(clp->cl_serverscope, resp->server_scope); + swap(clp->cl_implid, resp->impl_id); + + /* Save the EXCHANGE_ID verifier session trunk tests */ + memcpy(clp->cl_confirm.data, argp->verifier.data, + sizeof(clp->cl_confirm.data)); +out: + trace_nfs4_exchange_id(clp, status); + rpc_put_task(task); + return status; } /* @@ -7614,13 +7627,13 @@ int nfs4_proc_exchange_id(struct nfs_client *clp, struct rpc_cred *cred) /* try SP4_MACH_CRED if krb5i/p */ if (authflavor == RPC_AUTH_GSS_KRB5I || authflavor == RPC_AUTH_GSS_KRB5P) { - status = _nfs4_proc_exchange_id(clp, cred, SP4_MACH_CRED, NULL); + status = _nfs4_proc_exchange_id(clp, cred, SP4_MACH_CRED); if (!status) return 0; } /* try SP4_NONE */ - return _nfs4_proc_exchange_id(clp, cred, SP4_NONE, NULL); + return _nfs4_proc_exchange_id(clp, cred, SP4_NONE); } /** @@ -7642,6 +7655,9 @@ int nfs4_test_session_trunk(struct rpc_clnt *clnt, struct rpc_xprt *xprt, void *data) { struct nfs4_add_xprt_data *adata = (struct nfs4_add_xprt_data *)data; + struct rpc_task *task; + int status; + u32 sp4_how; dprintk("--> %s try %s\n", __func__, @@ -7650,7 +7666,17 @@ int nfs4_test_session_trunk(struct rpc_clnt *clnt, struct rpc_xprt *xprt, sp4_how = (adata->clp->cl_sp4_flags == 0 ? SP4_NONE : SP4_MACH_CRED); /* Test connection for session trunking. Async exchange_id call */ - return _nfs4_proc_exchange_id(adata->clp, adata->cred, sp4_how, xprt); + task = nfs4_run_exchange_id(adata->clp, adata->cred, sp4_how, xprt); + if (IS_ERR(task)) + return PTR_ERR(task); + + status = task->tk_status; + if (status == 0) + status = nfs4_detect_session_trunking(adata->clp, + task->tk_msg.rpc_resp, xprt); + + rpc_put_task(task); + return status; } EXPORT_SYMBOL_GPL(nfs4_test_session_trunk); diff --git a/fs/nfs/pagelist.c b/fs/nfs/pagelist.c index de9066a92c0d..bec120ec1967 100644 --- a/fs/nfs/pagelist.c +++ b/fs/nfs/pagelist.c @@ -134,19 +134,14 @@ EXPORT_SYMBOL_GPL(nfs_async_iocounter_wait); /* * nfs_page_group_lock - lock the head of the page group * @req - request in group that is to be locked - * @nonblock - if true don't block waiting for lock * - * this lock must be held if modifying the page group list + * this lock must be held when traversing or modifying the page + * group list * - * return 0 on success, < 0 on error: -EDELAY if nonblocking or the - * result from wait_on_bit_lock - * - * NOTE: calling with nonblock=false should always have set the - * lock bit (see fs/buffer.c and other uses of wait_on_bit_lock - * with TASK_UNINTERRUPTIBLE), so there is no need to check the result. + * return 0 on success, < 0 on error */ int -nfs_page_group_lock(struct nfs_page *req, bool nonblock) +nfs_page_group_lock(struct nfs_page *req) { struct nfs_page *head = req->wb_head; @@ -155,35 +150,10 @@ nfs_page_group_lock(struct nfs_page *req, bool nonblock) if (!test_and_set_bit(PG_HEADLOCK, &head->wb_flags)) return 0; - if (!nonblock) { - set_bit(PG_CONTENDED1, &head->wb_flags); - smp_mb__after_atomic(); - return wait_on_bit_lock(&head->wb_flags, PG_HEADLOCK, - TASK_UNINTERRUPTIBLE); - } - - return -EAGAIN; -} - -/* - * nfs_page_group_lock_wait - wait for the lock to clear, but don't grab it - * @req - a request in the group - * - * This is a blocking call to wait for the group lock to be cleared. - */ -void -nfs_page_group_lock_wait(struct nfs_page *req) -{ - struct nfs_page *head = req->wb_head; - - WARN_ON_ONCE(head != head->wb_head); - - if (!test_bit(PG_HEADLOCK, &head->wb_flags)) - return; set_bit(PG_CONTENDED1, &head->wb_flags); smp_mb__after_atomic(); - wait_on_bit(&head->wb_flags, PG_HEADLOCK, - TASK_UNINTERRUPTIBLE); + return wait_on_bit_lock(&head->wb_flags, PG_HEADLOCK, + TASK_UNINTERRUPTIBLE); } /* @@ -246,7 +216,7 @@ bool nfs_page_group_sync_on_bit(struct nfs_page *req, unsigned int bit) { bool ret; - nfs_page_group_lock(req, false); + nfs_page_group_lock(req); ret = nfs_page_group_sync_on_bit_locked(req, bit); nfs_page_group_unlock(req); @@ -288,9 +258,7 @@ nfs_page_group_init(struct nfs_page *req, struct nfs_page *prev) inode = page_file_mapping(req->wb_page)->host; set_bit(PG_INODE_REF, &req->wb_flags); kref_get(&req->wb_kref); - spin_lock(&inode->i_lock); - NFS_I(inode)->nrequests++; - spin_unlock(&inode->i_lock); + atomic_long_inc(&NFS_I(inode)->nrequests); } } } @@ -306,14 +274,11 @@ static void nfs_page_group_destroy(struct kref *kref) { struct nfs_page *req = container_of(kref, struct nfs_page, wb_kref); + struct nfs_page *head = req->wb_head; struct nfs_page *tmp, *next; - /* subrequests must release the ref on the head request */ - if (req->wb_head != req) - nfs_release_request(req->wb_head); - if (!nfs_page_group_sync_on_bit(req, PG_TEARDOWN)) - return; + goto out; tmp = req; do { @@ -324,6 +289,10 @@ nfs_page_group_destroy(struct kref *kref) nfs_free_request(tmp); tmp = next; } while (tmp != req); +out: + /* subrequests must release the ref on the head request */ + if (head != req) + nfs_release_request(head); } /** @@ -465,6 +434,7 @@ void nfs_release_request(struct nfs_page *req) { kref_put(&req->wb_kref, nfs_page_group_destroy); } +EXPORT_SYMBOL_GPL(nfs_release_request); /** * nfs_wait_on_request - Wait for a request to complete. @@ -483,6 +453,7 @@ nfs_wait_on_request(struct nfs_page *req) return wait_on_bit_io(&req->wb_flags, PG_BUSY, TASK_UNINTERRUPTIBLE); } +EXPORT_SYMBOL_GPL(nfs_wait_on_request); /* * nfs_generic_pg_test - determine if requests can be coalesced @@ -530,16 +501,6 @@ struct nfs_pgio_header *nfs_pgio_header_alloc(const struct nfs_rw_ops *ops) } EXPORT_SYMBOL_GPL(nfs_pgio_header_alloc); -/* - * nfs_pgio_header_free - Free a read or write header - * @hdr: The header to free - */ -void nfs_pgio_header_free(struct nfs_pgio_header *hdr) -{ - hdr->rw_ops->rw_free_header(hdr); -} -EXPORT_SYMBOL_GPL(nfs_pgio_header_free); - /** * nfs_pgio_data_destroy - make @hdr suitable for reuse * @@ -548,14 +509,24 @@ EXPORT_SYMBOL_GPL(nfs_pgio_header_free); * * @hdr: A header that has had nfs_generic_pgio called */ -void nfs_pgio_data_destroy(struct nfs_pgio_header *hdr) +static void nfs_pgio_data_destroy(struct nfs_pgio_header *hdr) { if (hdr->args.context) put_nfs_open_context(hdr->args.context); if (hdr->page_array.pagevec != hdr->page_array.page_array) kfree(hdr->page_array.pagevec); } -EXPORT_SYMBOL_GPL(nfs_pgio_data_destroy); + +/* + * nfs_pgio_header_free - Free a read or write header + * @hdr: The header to free + */ +void nfs_pgio_header_free(struct nfs_pgio_header *hdr) +{ + nfs_pgio_data_destroy(hdr); + hdr->rw_ops->rw_free_header(hdr); +} +EXPORT_SYMBOL_GPL(nfs_pgio_header_free); /** * nfs_pgio_rpcsetup - Set up arguments for a pageio call @@ -669,7 +640,6 @@ EXPORT_SYMBOL_GPL(nfs_initiate_pgio); static void nfs_pgio_error(struct nfs_pgio_header *hdr) { set_bit(NFS_IOHDR_REDO, &hdr->flags); - nfs_pgio_data_destroy(hdr); hdr->completion_ops->completion(hdr); } @@ -680,7 +650,6 @@ static void nfs_pgio_error(struct nfs_pgio_header *hdr) static void nfs_pgio_release(void *calldata) { struct nfs_pgio_header *hdr = calldata; - nfs_pgio_data_destroy(hdr); hdr->completion_ops->completion(hdr); } @@ -711,12 +680,8 @@ void nfs_pageio_init(struct nfs_pageio_descriptor *desc, const struct nfs_pgio_completion_ops *compl_ops, const struct nfs_rw_ops *rw_ops, size_t bsize, - int io_flags, - gfp_t gfp_flags) + int io_flags) { - struct nfs_pgio_mirror *new; - int i; - desc->pg_moreio = 0; desc->pg_inode = inode; desc->pg_ops = pg_ops; @@ -732,23 +697,10 @@ void nfs_pageio_init(struct nfs_pageio_descriptor *desc, desc->pg_mirror_count = 1; desc->pg_mirror_idx = 0; - if (pg_ops->pg_get_mirror_count) { - /* until we have a request, we don't have an lseg and no - * idea how many mirrors there will be */ - new = kcalloc(NFS_PAGEIO_DESCRIPTOR_MIRROR_MAX, - sizeof(struct nfs_pgio_mirror), gfp_flags); - desc->pg_mirrors_dynamic = new; - desc->pg_mirrors = new; - - for (i = 0; i < NFS_PAGEIO_DESCRIPTOR_MIRROR_MAX; i++) - nfs_pageio_mirror_init(&desc->pg_mirrors[i], bsize); - } else { - desc->pg_mirrors_dynamic = NULL; - desc->pg_mirrors = desc->pg_mirrors_static; - nfs_pageio_mirror_init(&desc->pg_mirrors[0], bsize); - } + desc->pg_mirrors_dynamic = NULL; + desc->pg_mirrors = desc->pg_mirrors_static; + nfs_pageio_mirror_init(&desc->pg_mirrors[0], bsize); } -EXPORT_SYMBOL_GPL(nfs_pageio_init); /** * nfs_pgio_result - Basic pageio error handling @@ -865,32 +817,52 @@ static int nfs_generic_pg_pgios(struct nfs_pageio_descriptor *desc) return ret; } +static struct nfs_pgio_mirror * +nfs_pageio_alloc_mirrors(struct nfs_pageio_descriptor *desc, + unsigned int mirror_count) +{ + struct nfs_pgio_mirror *ret; + unsigned int i; + + kfree(desc->pg_mirrors_dynamic); + desc->pg_mirrors_dynamic = NULL; + if (mirror_count == 1) + return desc->pg_mirrors_static; + ret = kmalloc_array(mirror_count, sizeof(*ret), GFP_NOFS); + if (ret != NULL) { + for (i = 0; i < mirror_count; i++) + nfs_pageio_mirror_init(&ret[i], desc->pg_bsize); + desc->pg_mirrors_dynamic = ret; + } + return ret; +} + /* * nfs_pageio_setup_mirroring - determine if mirroring is to be used * by calling the pg_get_mirror_count op */ -static int nfs_pageio_setup_mirroring(struct nfs_pageio_descriptor *pgio, +static void nfs_pageio_setup_mirroring(struct nfs_pageio_descriptor *pgio, struct nfs_page *req) { - int mirror_count = 1; - - if (!pgio->pg_ops->pg_get_mirror_count) - return 0; + unsigned int mirror_count = 1; - mirror_count = pgio->pg_ops->pg_get_mirror_count(pgio, req); - - if (pgio->pg_error < 0) - return pgio->pg_error; - - if (!mirror_count || mirror_count > NFS_PAGEIO_DESCRIPTOR_MIRROR_MAX) - return -EINVAL; + if (pgio->pg_ops->pg_get_mirror_count) + mirror_count = pgio->pg_ops->pg_get_mirror_count(pgio, req); + if (mirror_count == pgio->pg_mirror_count || pgio->pg_error < 0) + return; - if (WARN_ON_ONCE(!pgio->pg_mirrors_dynamic)) - return -EINVAL; + if (!mirror_count || mirror_count > NFS_PAGEIO_DESCRIPTOR_MIRROR_MAX) { + pgio->pg_error = -EINVAL; + return; + } + pgio->pg_mirrors = nfs_pageio_alloc_mirrors(pgio, mirror_count); + if (pgio->pg_mirrors == NULL) { + pgio->pg_error = -ENOMEM; + pgio->pg_mirrors = pgio->pg_mirrors_static; + mirror_count = 1; + } pgio->pg_mirror_count = mirror_count; - - return 0; } /* @@ -1036,7 +1008,7 @@ static int __nfs_pageio_add_request(struct nfs_pageio_descriptor *desc, unsigned int bytes_left = 0; unsigned int offset, pgbase; - nfs_page_group_lock(req, false); + nfs_page_group_lock(req); subreq = req; bytes_left = subreq->wb_bytes; @@ -1058,7 +1030,7 @@ static int __nfs_pageio_add_request(struct nfs_pageio_descriptor *desc, if (mirror->pg_recoalesce) return 0; /* retry add_request for this subreq */ - nfs_page_group_lock(req, false); + nfs_page_group_lock(req); continue; } @@ -1155,7 +1127,7 @@ int nfs_pageio_add_request(struct nfs_pageio_descriptor *desc, for (midx = 0; midx < desc->pg_mirror_count; midx++) { if (midx) { - nfs_page_group_lock(req, false); + nfs_page_group_lock(req); /* find the last request */ for (lastreq = req->wb_head; diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c index c383d0913b54..7879ed8ceb76 100644 --- a/fs/nfs/pnfs.c +++ b/fs/nfs/pnfs.c @@ -529,47 +529,6 @@ pnfs_put_lseg(struct pnfs_layout_segment *lseg) } EXPORT_SYMBOL_GPL(pnfs_put_lseg); -static void pnfs_free_lseg_async_work(struct work_struct *work) -{ - struct pnfs_layout_segment *lseg; - struct pnfs_layout_hdr *lo; - - lseg = container_of(work, struct pnfs_layout_segment, pls_work); - lo = lseg->pls_layout; - - pnfs_free_lseg(lseg); - pnfs_put_layout_hdr(lo); -} - -static void pnfs_free_lseg_async(struct pnfs_layout_segment *lseg) -{ - INIT_WORK(&lseg->pls_work, pnfs_free_lseg_async_work); - schedule_work(&lseg->pls_work); -} - -void -pnfs_put_lseg_locked(struct pnfs_layout_segment *lseg) -{ - if (!lseg) - return; - - assert_spin_locked(&lseg->pls_layout->plh_inode->i_lock); - - dprintk("%s: lseg %p ref %d valid %d\n", __func__, lseg, - atomic_read(&lseg->pls_refcount), - test_bit(NFS_LSEG_VALID, &lseg->pls_flags)); - if (atomic_dec_and_test(&lseg->pls_refcount)) { - struct pnfs_layout_hdr *lo = lseg->pls_layout; - if (test_bit(NFS_LSEG_VALID, &lseg->pls_flags)) - return; - pnfs_layout_remove_lseg(lo, lseg); - if (!pnfs_cache_lseg_for_layoutreturn(lo, lseg)) { - pnfs_get_layout_hdr(lo); - pnfs_free_lseg_async(lseg); - } - } -} - /* * is l2 fully contained in l1? * start1 end1 @@ -2274,7 +2233,6 @@ pnfs_write_through_mds(struct nfs_pageio_descriptor *desc, nfs_pageio_reset_write_mds(desc); mirror->pg_recoalesce = 1; } - nfs_pgio_data_destroy(hdr); hdr->release(hdr); } @@ -2398,7 +2356,6 @@ pnfs_read_through_mds(struct nfs_pageio_descriptor *desc, nfs_pageio_reset_read_mds(desc); mirror->pg_recoalesce = 1; } - nfs_pgio_data_destroy(hdr); hdr->release(hdr); } diff --git a/fs/nfs/pnfs.h b/fs/nfs/pnfs.h index 99731e3e332f..87f144f14d1e 100644 --- a/fs/nfs/pnfs.h +++ b/fs/nfs/pnfs.h @@ -67,7 +67,6 @@ struct pnfs_layout_segment { u32 pls_seq; unsigned long pls_flags; struct pnfs_layout_hdr *pls_layout; - struct work_struct pls_work; }; enum pnfs_try_status { @@ -230,7 +229,6 @@ extern int nfs4_proc_layoutreturn(struct nfs4_layoutreturn *lrp, bool sync); /* pnfs.c */ void pnfs_get_layout_hdr(struct pnfs_layout_hdr *lo); void pnfs_put_lseg(struct pnfs_layout_segment *lseg); -void pnfs_put_lseg_locked(struct pnfs_layout_segment *lseg); void set_pnfs_layoutdriver(struct nfs_server *, const struct nfs_fh *, struct nfs_fsinfo *); void unset_pnfs_layoutdriver(struct nfs_server *); diff --git a/fs/nfs/pnfs_nfs.c b/fs/nfs/pnfs_nfs.c index 25f28fa64c57..60da59be83b6 100644 --- a/fs/nfs/pnfs_nfs.c +++ b/fs/nfs/pnfs_nfs.c @@ -83,34 +83,11 @@ pnfs_generic_clear_request_commit(struct nfs_page *req, } out: nfs_request_remove_commit_list(req, cinfo); - pnfs_put_lseg_locked(freeme); + pnfs_put_lseg(freeme); } EXPORT_SYMBOL_GPL(pnfs_generic_clear_request_commit); static int -pnfs_generic_transfer_commit_list(struct list_head *src, struct list_head *dst, - struct nfs_commit_info *cinfo, int max) -{ - struct nfs_page *req, *tmp; - int ret = 0; - - list_for_each_entry_safe(req, tmp, src, wb_list) { - if (!nfs_lock_request(req)) - continue; - kref_get(&req->wb_kref); - if (cond_resched_lock(&cinfo->inode->i_lock)) - list_safe_reset_next(req, tmp, wb_list); - nfs_request_remove_commit_list(req, cinfo); - clear_bit(PG_COMMIT_TO_DS, &req->wb_flags); - nfs_list_add_request(req, dst); - ret++; - if ((ret == max) && !cinfo->dreq) - break; - } - return ret; -} - -static int pnfs_generic_scan_ds_commit_list(struct pnfs_commit_bucket *bucket, struct nfs_commit_info *cinfo, int max) @@ -119,15 +96,15 @@ pnfs_generic_scan_ds_commit_list(struct pnfs_commit_bucket *bucket, struct list_head *dst = &bucket->committing; int ret; - lockdep_assert_held(&cinfo->inode->i_lock); - ret = pnfs_generic_transfer_commit_list(src, dst, cinfo, max); + lockdep_assert_held(&NFS_I(cinfo->inode)->commit_mutex); + ret = nfs_scan_commit_list(src, dst, cinfo, max); if (ret) { cinfo->ds->nwritten -= ret; cinfo->ds->ncommitting += ret; if (bucket->clseg == NULL) bucket->clseg = pnfs_get_lseg(bucket->wlseg); if (list_empty(src)) { - pnfs_put_lseg_locked(bucket->wlseg); + pnfs_put_lseg(bucket->wlseg); bucket->wlseg = NULL; } } @@ -142,7 +119,7 @@ int pnfs_generic_scan_commit_lists(struct nfs_commit_info *cinfo, { int i, rv = 0, cnt; - lockdep_assert_held(&cinfo->inode->i_lock); + lockdep_assert_held(&NFS_I(cinfo->inode)->commit_mutex); for (i = 0; i < cinfo->ds->nbuckets && max != 0; i++) { cnt = pnfs_generic_scan_ds_commit_list(&cinfo->ds->buckets[i], cinfo, max); @@ -162,11 +139,10 @@ void pnfs_generic_recover_commit_reqs(struct list_head *dst, int nwritten; int i; - lockdep_assert_held(&cinfo->inode->i_lock); + lockdep_assert_held(&NFS_I(cinfo->inode)->commit_mutex); restart: for (i = 0, b = cinfo->ds->buckets; i < cinfo->ds->nbuckets; i++, b++) { - nwritten = pnfs_generic_transfer_commit_list(&b->written, - dst, cinfo, 0); + nwritten = nfs_scan_commit_list(&b->written, dst, cinfo, 0); if (!nwritten) continue; cinfo->ds->nwritten -= nwritten; @@ -953,12 +929,12 @@ pnfs_layout_mark_request_commit(struct nfs_page *req, struct list_head *list; struct pnfs_commit_bucket *buckets; - spin_lock(&cinfo->inode->i_lock); + mutex_lock(&NFS_I(cinfo->inode)->commit_mutex); buckets = cinfo->ds->buckets; list = &buckets[ds_commit_idx].written; if (list_empty(list)) { if (!pnfs_is_valid_lseg(lseg)) { - spin_unlock(&cinfo->inode->i_lock); + mutex_unlock(&NFS_I(cinfo->inode)->commit_mutex); cinfo->completion_ops->resched_write(cinfo, req); return; } @@ -975,7 +951,7 @@ pnfs_layout_mark_request_commit(struct nfs_page *req, cinfo->ds->nwritten++; nfs_request_add_commit_list_locked(req, list, cinfo); - spin_unlock(&cinfo->inode->i_lock); + mutex_unlock(&NFS_I(cinfo->inode)->commit_mutex); nfs_mark_page_unstable(req->wb_page, cinfo); } EXPORT_SYMBOL_GPL(pnfs_layout_mark_request_commit); diff --git a/fs/nfs/read.c b/fs/nfs/read.c index a8421d9dab6a..0d42573d423d 100644 --- a/fs/nfs/read.c +++ b/fs/nfs/read.c @@ -68,7 +68,7 @@ void nfs_pageio_init_read(struct nfs_pageio_descriptor *pgio, pg_ops = server->pnfs_curr_ld->pg_read_ops; #endif nfs_pageio_init(pgio, inode, pg_ops, compl_ops, &nfs_rw_read_ops, - server->rsize, 0, GFP_KERNEL); + server->rsize, 0); } EXPORT_SYMBOL_GPL(nfs_pageio_init_read); diff --git a/fs/nfs/super.c b/fs/nfs/super.c index d828ef88e7db..6b179af59b92 100644 --- a/fs/nfs/super.c +++ b/fs/nfs/super.c @@ -1691,8 +1691,8 @@ static int nfs_verify_authflavors(struct nfs_parsed_mount_data *args, rpc_authflavor_t *server_authlist, unsigned int count) { rpc_authflavor_t flavor = RPC_AUTH_MAXFLAVOR; + bool found_auth_null = false; unsigned int i; - int use_auth_null = false; /* * If the sec= mount option is used, the specified flavor or AUTH_NULL @@ -1701,6 +1701,10 @@ static int nfs_verify_authflavors(struct nfs_parsed_mount_data *args, * AUTH_NULL has a special meaning when it's in the server list - it * means that the server will ignore the rpc creds, so any flavor * can be used but still use the sec= that was specified. + * + * Note also that the MNT procedure in MNTv1 does not return a list + * of supported security flavors. In this case, nfs_mount() fabricates + * a security flavor list containing just AUTH_NULL. */ for (i = 0; i < count; i++) { flavor = server_authlist[i]; @@ -1709,11 +1713,11 @@ static int nfs_verify_authflavors(struct nfs_parsed_mount_data *args, goto out; if (flavor == RPC_AUTH_NULL) - use_auth_null = true; + found_auth_null = true; } - if (use_auth_null) { - flavor = RPC_AUTH_NULL; + if (found_auth_null) { + flavor = args->auth_info.flavors[0]; goto out; } diff --git a/fs/nfs/write.c b/fs/nfs/write.c index b1af5dee5e0a..f68083db63c8 100644 --- a/fs/nfs/write.c +++ b/fs/nfs/write.c @@ -102,10 +102,8 @@ static struct nfs_pgio_header *nfs_writehdr_alloc(void) { struct nfs_pgio_header *p = mempool_alloc(nfs_wdata_mempool, GFP_NOIO); - if (p) { - memset(p, 0, sizeof(*p)); - p->rw_mode = FMODE_WRITE; - } + memset(p, 0, sizeof(*p)); + p->rw_mode = FMODE_WRITE; return p; } @@ -154,6 +152,14 @@ static void nfs_context_set_write_error(struct nfs_open_context *ctx, int error) set_bit(NFS_CONTEXT_ERROR_WRITE, &ctx->flags); } +static struct nfs_page * +nfs_page_private_request(struct page *page) +{ + if (!PagePrivate(page)) + return NULL; + return (struct nfs_page *)page_private(page); +} + /* * nfs_page_find_head_request_locked - find head request associated with @page * @@ -162,21 +168,41 @@ static void nfs_context_set_write_error(struct nfs_open_context *ctx, int error) * returns matching head request with reference held, or NULL if not found. */ static struct nfs_page * -nfs_page_find_head_request_locked(struct nfs_inode *nfsi, struct page *page) +nfs_page_find_private_request(struct page *page) { - struct nfs_page *req = NULL; - - if (PagePrivate(page)) - req = (struct nfs_page *)page_private(page); - else if (unlikely(PageSwapCache(page))) - req = nfs_page_search_commits_for_head_request_locked(nfsi, - page); + struct address_space *mapping = page_file_mapping(page); + struct nfs_page *req; + if (!PagePrivate(page)) + return NULL; + spin_lock(&mapping->private_lock); + req = nfs_page_private_request(page); if (req) { WARN_ON_ONCE(req->wb_head != req); kref_get(&req->wb_kref); } + spin_unlock(&mapping->private_lock); + return req; +} +static struct nfs_page * +nfs_page_find_swap_request(struct page *page) +{ + struct inode *inode = page_file_mapping(page)->host; + struct nfs_inode *nfsi = NFS_I(inode); + struct nfs_page *req = NULL; + if (!PageSwapCache(page)) + return NULL; + mutex_lock(&nfsi->commit_mutex); + if (PageSwapCache(page)) { + req = nfs_page_search_commits_for_head_request_locked(nfsi, + page); + if (req) { + WARN_ON_ONCE(req->wb_head != req); + kref_get(&req->wb_kref); + } + } + mutex_unlock(&nfsi->commit_mutex); return req; } @@ -187,12 +213,11 @@ nfs_page_find_head_request_locked(struct nfs_inode *nfsi, struct page *page) */ static struct nfs_page *nfs_page_find_head_request(struct page *page) { - struct inode *inode = page_file_mapping(page)->host; - struct nfs_page *req = NULL; + struct nfs_page *req; - spin_lock(&inode->i_lock); - req = nfs_page_find_head_request_locked(NFS_I(inode), page); - spin_unlock(&inode->i_lock); + req = nfs_page_find_private_request(page); + if (!req) + req = nfs_page_find_swap_request(page); return req; } @@ -241,9 +266,6 @@ nfs_page_group_search_locked(struct nfs_page *head, unsigned int page_offset) { struct nfs_page *req; - WARN_ON_ONCE(head != head->wb_head); - WARN_ON_ONCE(!test_bit(PG_HEADLOCK, &head->wb_head->wb_flags)); - req = head; do { if (page_offset >= req->wb_pgbase && @@ -269,20 +291,17 @@ static bool nfs_page_group_covers_page(struct nfs_page *req) unsigned int pos = 0; unsigned int len = nfs_page_length(req->wb_page); - nfs_page_group_lock(req, false); + nfs_page_group_lock(req); - do { + for (;;) { tmp = nfs_page_group_search_locked(req->wb_head, pos); - if (tmp) { - /* no way this should happen */ - WARN_ON_ONCE(tmp->wb_pgbase != pos); - pos += tmp->wb_bytes - (pos - tmp->wb_pgbase); - } - } while (tmp && pos < len); + if (!tmp) + break; + pos = tmp->wb_pgbase + tmp->wb_bytes; + } nfs_page_group_unlock(req); - WARN_ON_ONCE(pos > len); - return pos == len; + return pos >= len; } /* We can set the PG_uptodate flag if we see that a write request @@ -333,8 +352,11 @@ static void nfs_end_page_writeback(struct nfs_page *req) { struct inode *inode = page_file_mapping(req->wb_page)->host; struct nfs_server *nfss = NFS_SERVER(inode); + bool is_done; - if (!nfs_page_group_sync_on_bit(req, PG_WB_END)) + is_done = nfs_page_group_sync_on_bit(req, PG_WB_END); + nfs_unlock_request(req); + if (!is_done) return; end_page_writeback(req->wb_page); @@ -342,22 +364,6 @@ static void nfs_end_page_writeback(struct nfs_page *req) clear_bdi_congested(inode_to_bdi(inode), BLK_RW_ASYNC); } - -/* nfs_page_group_clear_bits - * @req - an nfs request - * clears all page group related bits from @req - */ -static void -nfs_page_group_clear_bits(struct nfs_page *req) -{ - clear_bit(PG_TEARDOWN, &req->wb_flags); - clear_bit(PG_UNLOCKPAGE, &req->wb_flags); - clear_bit(PG_UPTODATE, &req->wb_flags); - clear_bit(PG_WB_END, &req->wb_flags); - clear_bit(PG_REMOVE, &req->wb_flags); -} - - /* * nfs_unroll_locks_and_wait - unlock all newly locked reqs and wait on @req * @@ -366,43 +372,24 @@ nfs_page_group_clear_bits(struct nfs_page *req) * @inode - inode associated with request page group, must be holding inode lock * @head - head request of page group, must be holding head lock * @req - request that couldn't lock and needs to wait on the req bit lock - * @nonblock - if true, don't actually wait * - * NOTE: this must be called holding page_group bit lock and inode spin lock - * and BOTH will be released before returning. + * NOTE: this must be called holding page_group bit lock + * which will be released before returning. * * returns 0 on success, < 0 on error. */ -static int -nfs_unroll_locks_and_wait(struct inode *inode, struct nfs_page *head, - struct nfs_page *req, bool nonblock) - __releases(&inode->i_lock) +static void +nfs_unroll_locks(struct inode *inode, struct nfs_page *head, + struct nfs_page *req) { struct nfs_page *tmp; - int ret; /* relinquish all the locks successfully grabbed this run */ - for (tmp = head ; tmp != req; tmp = tmp->wb_this_page) - nfs_unlock_request(tmp); - - WARN_ON_ONCE(test_bit(PG_TEARDOWN, &req->wb_flags)); - - /* grab a ref on the request that will be waited on */ - kref_get(&req->wb_kref); - - nfs_page_group_unlock(head); - spin_unlock(&inode->i_lock); - - /* release ref from nfs_page_find_head_request_locked */ - nfs_release_request(head); - - if (!nonblock) - ret = nfs_wait_on_request(req); - else - ret = -EAGAIN; - nfs_release_request(req); - - return ret; + for (tmp = head->wb_this_page ; tmp != req; tmp = tmp->wb_this_page) { + if (!kref_read(&tmp->wb_kref)) + continue; + nfs_unlock_and_release_request(tmp); + } } /* @@ -417,7 +404,8 @@ nfs_unroll_locks_and_wait(struct inode *inode, struct nfs_page *head, */ static void nfs_destroy_unlinked_subrequests(struct nfs_page *destroy_list, - struct nfs_page *old_head) + struct nfs_page *old_head, + struct inode *inode) { while (destroy_list) { struct nfs_page *subreq = destroy_list; @@ -428,33 +416,28 @@ nfs_destroy_unlinked_subrequests(struct nfs_page *destroy_list, WARN_ON_ONCE(old_head != subreq->wb_head); /* make sure old group is not used */ - subreq->wb_head = subreq; subreq->wb_this_page = subreq; - /* subreq is now totally disconnected from page group or any - * write / commit lists. last chance to wake any waiters */ - nfs_unlock_request(subreq); + clear_bit(PG_REMOVE, &subreq->wb_flags); - if (!test_bit(PG_TEARDOWN, &subreq->wb_flags)) { - /* release ref on old head request */ - nfs_release_request(old_head); + /* Note: races with nfs_page_group_destroy() */ + if (!kref_read(&subreq->wb_kref)) { + /* Check if we raced with nfs_page_group_destroy() */ + if (test_and_clear_bit(PG_TEARDOWN, &subreq->wb_flags)) + nfs_free_request(subreq); + continue; + } - nfs_page_group_clear_bits(subreq); + subreq->wb_head = subreq; - /* release the PG_INODE_REF reference */ - if (test_and_clear_bit(PG_INODE_REF, &subreq->wb_flags)) - nfs_release_request(subreq); - else - WARN_ON_ONCE(1); - } else { - WARN_ON_ONCE(test_bit(PG_CLEAN, &subreq->wb_flags)); - /* zombie requests have already released the last - * reference and were waiting on the rest of the - * group to complete. Since it's no longer part of a - * group, simply free the request */ - nfs_page_group_clear_bits(subreq); - nfs_free_request(subreq); + if (test_and_clear_bit(PG_INODE_REF, &subreq->wb_flags)) { + nfs_release_request(subreq); + atomic_long_dec(&NFS_I(inode)->nrequests); } + + /* subreq is now totally disconnected from page group or any + * write / commit lists. last chance to wake any waiters */ + nfs_unlock_and_release_request(subreq); } } @@ -464,7 +447,6 @@ nfs_destroy_unlinked_subrequests(struct nfs_page *destroy_list, * operations for this page. * * @page - the page used to lookup the "page group" of nfs_page structures - * @nonblock - if true, don't block waiting for request locks * * This function joins all sub requests to the head request by first * locking all requests in the group, cancelling any pending operations @@ -478,7 +460,7 @@ nfs_destroy_unlinked_subrequests(struct nfs_page *destroy_list, * error was encountered. */ static struct nfs_page * -nfs_lock_and_join_requests(struct page *page, bool nonblock) +nfs_lock_and_join_requests(struct page *page) { struct inode *inode = page_file_mapping(page)->host; struct nfs_page *head, *subreq; @@ -487,43 +469,63 @@ nfs_lock_and_join_requests(struct page *page, bool nonblock) int ret; try_again: - total_bytes = 0; - - WARN_ON_ONCE(destroy_list); - - spin_lock(&inode->i_lock); - /* * A reference is taken only on the head request which acts as a * reference to the whole page group - the group will not be destroyed * until the head reference is released. */ - head = nfs_page_find_head_request_locked(NFS_I(inode), page); - - if (!head) { - spin_unlock(&inode->i_lock); + head = nfs_page_find_head_request(page); + if (!head) return NULL; - } - /* holding inode lock, so always make a non-blocking call to try the - * page group lock */ - ret = nfs_page_group_lock(head, true); - if (ret < 0) { - spin_unlock(&inode->i_lock); + /* lock the page head first in order to avoid an ABBA inefficiency */ + if (!nfs_lock_request(head)) { + ret = nfs_wait_on_request(head); + nfs_release_request(head); + if (ret < 0) + return ERR_PTR(ret); + goto try_again; + } - if (!nonblock && ret == -EAGAIN) { - nfs_page_group_lock_wait(head); - nfs_release_request(head); - goto try_again; - } + /* Ensure that nobody removed the request before we locked it */ + if (head != nfs_page_private_request(page) && !PageSwapCache(page)) { + nfs_unlock_and_release_request(head); + goto try_again; + } - nfs_release_request(head); + ret = nfs_page_group_lock(head); + if (ret < 0) { + nfs_unlock_and_release_request(head); return ERR_PTR(ret); } /* lock each request in the page group */ - subreq = head; - do { + total_bytes = head->wb_bytes; + for (subreq = head->wb_this_page; subreq != head; + subreq = subreq->wb_this_page) { + + if (!kref_get_unless_zero(&subreq->wb_kref)) { + if (subreq->wb_offset == head->wb_offset + total_bytes) + total_bytes += subreq->wb_bytes; + continue; + } + + while (!nfs_lock_request(subreq)) { + /* + * Unlock page to allow nfs_page_group_sync_on_bit() + * to succeed + */ + nfs_page_group_unlock(head); + ret = nfs_wait_on_request(subreq); + if (!ret) + ret = nfs_page_group_lock(head); + if (ret < 0) { + nfs_unroll_locks(inode, head, subreq); + nfs_release_request(subreq); + nfs_unlock_and_release_request(head); + return ERR_PTR(ret); + } + } /* * Subrequests are always contiguous, non overlapping * and in order - but may be repeated (mirrored writes). @@ -535,24 +537,12 @@ try_again: ((subreq->wb_offset + subreq->wb_bytes) > (head->wb_offset + total_bytes)))) { nfs_page_group_unlock(head); - spin_unlock(&inode->i_lock); + nfs_unroll_locks(inode, head, subreq); + nfs_unlock_and_release_request(subreq); + nfs_unlock_and_release_request(head); return ERR_PTR(-EIO); } - - if (!nfs_lock_request(subreq)) { - /* releases page group bit lock and - * inode spin lock and all references */ - ret = nfs_unroll_locks_and_wait(inode, head, - subreq, nonblock); - - if (ret == 0) - goto try_again; - - return ERR_PTR(ret); - } - - subreq = subreq->wb_this_page; - } while (subreq != head); + } /* Now that all requests are locked, make sure they aren't on any list. * Commit list removal accounting is done after locks are dropped */ @@ -573,34 +563,30 @@ try_again: head->wb_bytes = total_bytes; } - /* - * prepare head request to be added to new pgio descriptor - */ - nfs_page_group_clear_bits(head); - - /* - * some part of the group was still on the inode list - otherwise - * the group wouldn't be involved in async write. - * grab a reference for the head request, iff it needs one. - */ - if (!test_and_set_bit(PG_INODE_REF, &head->wb_flags)) + /* Postpone destruction of this request */ + if (test_and_clear_bit(PG_REMOVE, &head->wb_flags)) { + set_bit(PG_INODE_REF, &head->wb_flags); kref_get(&head->wb_kref); + atomic_long_inc(&NFS_I(inode)->nrequests); + } nfs_page_group_unlock(head); - /* drop lock to clean uprequests on destroy list */ - spin_unlock(&inode->i_lock); + nfs_destroy_unlinked_subrequests(destroy_list, head, inode); - nfs_destroy_unlinked_subrequests(destroy_list, head); + /* Did we lose a race with nfs_inode_remove_request()? */ + if (!(PagePrivate(page) || PageSwapCache(page))) { + nfs_unlock_and_release_request(head); + return NULL; + } - /* still holds ref on head from nfs_page_find_head_request_locked + /* still holds ref on head from nfs_page_find_head_request * and still has lock on head from lock loop */ return head; } static void nfs_write_error_remove_page(struct nfs_page *req) { - nfs_unlock_request(req); nfs_end_page_writeback(req); generic_error_remove_page(page_file_mapping(req->wb_page), req->wb_page); @@ -624,12 +610,12 @@ nfs_error_is_fatal_on_server(int err) * May return an error if the user signalled nfs_wait_on_request(). */ static int nfs_page_async_flush(struct nfs_pageio_descriptor *pgio, - struct page *page, bool nonblock) + struct page *page) { struct nfs_page *req; int ret = 0; - req = nfs_lock_and_join_requests(page, nonblock); + req = nfs_lock_and_join_requests(page); if (!req) goto out; ret = PTR_ERR(req); @@ -672,7 +658,7 @@ static int nfs_do_writepage(struct page *page, struct writeback_control *wbc, int ret; nfs_pageio_cond_complete(pgio, page_index(page)); - ret = nfs_page_async_flush(pgio, page, wbc->sync_mode == WB_SYNC_NONE); + ret = nfs_page_async_flush(pgio, page); if (ret == -EAGAIN) { redirty_page_for_writepage(wbc, page); ret = 0; @@ -759,6 +745,7 @@ out_err: */ static void nfs_inode_add_request(struct inode *inode, struct nfs_page *req) { + struct address_space *mapping = page_file_mapping(req->wb_page); struct nfs_inode *nfsi = NFS_I(inode); WARN_ON_ONCE(req->wb_this_page != req); @@ -766,27 +753,30 @@ static void nfs_inode_add_request(struct inode *inode, struct nfs_page *req) /* Lock the request! */ nfs_lock_request(req); - spin_lock(&inode->i_lock); - if (!nfsi->nrequests && - NFS_PROTO(inode)->have_delegation(inode, FMODE_WRITE)) - inode->i_version++; /* * Swap-space should not get truncated. Hence no need to plug the race * with invalidate/truncate. */ + spin_lock(&mapping->private_lock); + if (!nfs_have_writebacks(inode) && + NFS_PROTO(inode)->have_delegation(inode, FMODE_WRITE)) { + spin_lock(&inode->i_lock); + inode->i_version++; + spin_unlock(&inode->i_lock); + } if (likely(!PageSwapCache(req->wb_page))) { set_bit(PG_MAPPED, &req->wb_flags); SetPagePrivate(req->wb_page); set_page_private(req->wb_page, (unsigned long)req); } - nfsi->nrequests++; + spin_unlock(&mapping->private_lock); + atomic_long_inc(&nfsi->nrequests); /* this a head request for a page group - mark it as having an * extra reference so sub groups can follow suit. * This flag also informs pgio layer when to bump nrequests when * adding subrequests. */ WARN_ON(test_and_set_bit(PG_INODE_REF, &req->wb_flags)); kref_get(&req->wb_kref); - spin_unlock(&inode->i_lock); } /* @@ -794,25 +784,22 @@ static void nfs_inode_add_request(struct inode *inode, struct nfs_page *req) */ static void nfs_inode_remove_request(struct nfs_page *req) { - struct inode *inode = d_inode(req->wb_context->dentry); + struct address_space *mapping = page_file_mapping(req->wb_page); + struct inode *inode = mapping->host; struct nfs_inode *nfsi = NFS_I(inode); struct nfs_page *head; + atomic_long_dec(&nfsi->nrequests); if (nfs_page_group_sync_on_bit(req, PG_REMOVE)) { head = req->wb_head; - spin_lock(&inode->i_lock); + spin_lock(&mapping->private_lock); if (likely(head->wb_page && !PageSwapCache(head->wb_page))) { set_page_private(head->wb_page, 0); ClearPagePrivate(head->wb_page); clear_bit(PG_MAPPED, &head->wb_flags); } - nfsi->nrequests--; - spin_unlock(&inode->i_lock); - } else { - spin_lock(&inode->i_lock); - nfsi->nrequests--; - spin_unlock(&inode->i_lock); + spin_unlock(&mapping->private_lock); } if (test_and_clear_bit(PG_INODE_REF, &req->wb_flags)) @@ -868,7 +855,8 @@ nfs_page_search_commits_for_head_request_locked(struct nfs_inode *nfsi, * number of outstanding requests requiring a commit as well as * the MM page stats. * - * The caller must hold cinfo->inode->i_lock, and the nfs_page lock. + * The caller must hold NFS_I(cinfo->inode)->commit_mutex, and the + * nfs_page lock. */ void nfs_request_add_commit_list_locked(struct nfs_page *req, struct list_head *dst, @@ -876,7 +864,7 @@ nfs_request_add_commit_list_locked(struct nfs_page *req, struct list_head *dst, { set_bit(PG_CLEAN, &req->wb_flags); nfs_list_add_request(req, dst); - cinfo->mds->ncommit++; + atomic_long_inc(&cinfo->mds->ncommit); } EXPORT_SYMBOL_GPL(nfs_request_add_commit_list_locked); @@ -896,9 +884,9 @@ EXPORT_SYMBOL_GPL(nfs_request_add_commit_list_locked); void nfs_request_add_commit_list(struct nfs_page *req, struct nfs_commit_info *cinfo) { - spin_lock(&cinfo->inode->i_lock); + mutex_lock(&NFS_I(cinfo->inode)->commit_mutex); nfs_request_add_commit_list_locked(req, &cinfo->mds->list, cinfo); - spin_unlock(&cinfo->inode->i_lock); + mutex_unlock(&NFS_I(cinfo->inode)->commit_mutex); if (req->wb_page) nfs_mark_page_unstable(req->wb_page, cinfo); } @@ -922,7 +910,7 @@ nfs_request_remove_commit_list(struct nfs_page *req, if (!test_and_clear_bit(PG_CLEAN, &(req)->wb_flags)) return; nfs_list_remove_request(req); - cinfo->mds->ncommit--; + atomic_long_dec(&cinfo->mds->ncommit); } EXPORT_SYMBOL_GPL(nfs_request_remove_commit_list); @@ -967,7 +955,7 @@ nfs_clear_page_commit(struct page *page) WB_RECLAIMABLE); } -/* Called holding inode (/cinfo) lock */ +/* Called holding the request lock on @req */ static void nfs_clear_request_commit(struct nfs_page *req) { @@ -976,9 +964,11 @@ nfs_clear_request_commit(struct nfs_page *req) struct nfs_commit_info cinfo; nfs_init_cinfo_from_inode(&cinfo, inode); + mutex_lock(&NFS_I(inode)->commit_mutex); if (!pnfs_clear_request_commit(req, &cinfo)) { nfs_request_remove_commit_list(req, &cinfo); } + mutex_unlock(&NFS_I(inode)->commit_mutex); nfs_clear_page_commit(req->wb_page); } } @@ -1023,7 +1013,6 @@ static void nfs_write_completion(struct nfs_pgio_header *hdr) remove_req: nfs_inode_remove_request(req); next: - nfs_unlock_request(req); nfs_end_page_writeback(req); nfs_release_request(req); } @@ -1035,10 +1024,10 @@ out: unsigned long nfs_reqs_to_commit(struct nfs_commit_info *cinfo) { - return cinfo->mds->ncommit; + return atomic_long_read(&cinfo->mds->ncommit); } -/* cinfo->inode->i_lock held by caller */ +/* NFS_I(cinfo->inode)->commit_mutex held by caller */ int nfs_scan_commit_list(struct list_head *src, struct list_head *dst, struct nfs_commit_info *cinfo, int max) @@ -1046,20 +1035,37 @@ nfs_scan_commit_list(struct list_head *src, struct list_head *dst, struct nfs_page *req, *tmp; int ret = 0; +restart: list_for_each_entry_safe(req, tmp, src, wb_list) { - if (!nfs_lock_request(req)) - continue; kref_get(&req->wb_kref); - if (cond_resched_lock(&cinfo->inode->i_lock)) - list_safe_reset_next(req, tmp, wb_list); + if (!nfs_lock_request(req)) { + int status; + + /* Prevent deadlock with nfs_lock_and_join_requests */ + if (!list_empty(dst)) { + nfs_release_request(req); + continue; + } + /* Ensure we make progress to prevent livelock */ + mutex_unlock(&NFS_I(cinfo->inode)->commit_mutex); + status = nfs_wait_on_request(req); + nfs_release_request(req); + mutex_lock(&NFS_I(cinfo->inode)->commit_mutex); + if (status < 0) + break; + goto restart; + } nfs_request_remove_commit_list(req, cinfo); + clear_bit(PG_COMMIT_TO_DS, &req->wb_flags); nfs_list_add_request(req, dst); ret++; if ((ret == max) && !cinfo->dreq) break; + cond_resched(); } return ret; } +EXPORT_SYMBOL_GPL(nfs_scan_commit_list); /* * nfs_scan_commit - Scan an inode for commit requests @@ -1076,15 +1082,17 @@ nfs_scan_commit(struct inode *inode, struct list_head *dst, { int ret = 0; - spin_lock(&cinfo->inode->i_lock); - if (cinfo->mds->ncommit > 0) { + if (!atomic_long_read(&cinfo->mds->ncommit)) + return 0; + mutex_lock(&NFS_I(cinfo->inode)->commit_mutex); + if (atomic_long_read(&cinfo->mds->ncommit) > 0) { const int max = INT_MAX; ret = nfs_scan_commit_list(&cinfo->mds->list, dst, cinfo, max); ret += pnfs_scan_commit_lists(inode, cinfo, max - ret); } - spin_unlock(&cinfo->inode->i_lock); + mutex_unlock(&NFS_I(cinfo->inode)->commit_mutex); return ret; } @@ -1105,43 +1113,21 @@ static struct nfs_page *nfs_try_to_update_request(struct inode *inode, unsigned int end; int error; - if (!PagePrivate(page)) - return NULL; - end = offset + bytes; - spin_lock(&inode->i_lock); - - for (;;) { - req = nfs_page_find_head_request_locked(NFS_I(inode), page); - if (req == NULL) - goto out_unlock; - /* should be handled by nfs_flush_incompatible */ - WARN_ON_ONCE(req->wb_head != req); - WARN_ON_ONCE(req->wb_this_page != req); - - rqend = req->wb_offset + req->wb_bytes; - /* - * Tell the caller to flush out the request if - * the offsets are non-contiguous. - * Note: nfs_flush_incompatible() will already - * have flushed out requests having wrong owners. - */ - if (offset > rqend - || end < req->wb_offset) - goto out_flushme; - - if (nfs_lock_request(req)) - break; + req = nfs_lock_and_join_requests(page); + if (IS_ERR_OR_NULL(req)) + return req; - /* The request is locked, so wait and then retry */ - spin_unlock(&inode->i_lock); - error = nfs_wait_on_request(req); - nfs_release_request(req); - if (error != 0) - goto out_err; - spin_lock(&inode->i_lock); - } + rqend = req->wb_offset + req->wb_bytes; + /* + * Tell the caller to flush out the request if + * the offsets are non-contiguous. + * Note: nfs_flush_incompatible() will already + * have flushed out requests having wrong owners. + */ + if (offset > rqend || end < req->wb_offset) + goto out_flushme; /* Okay, the request matches. Update the region */ if (offset < req->wb_offset) { @@ -1152,17 +1138,17 @@ static struct nfs_page *nfs_try_to_update_request(struct inode *inode, req->wb_bytes = end - req->wb_offset; else req->wb_bytes = rqend - req->wb_offset; -out_unlock: - if (req) - nfs_clear_request_commit(req); - spin_unlock(&inode->i_lock); return req; out_flushme: - spin_unlock(&inode->i_lock); - nfs_release_request(req); + /* + * Note: we mark the request dirty here because + * nfs_lock_and_join_requests() cannot preserve + * commit flags, so we have to replay the write. + */ + nfs_mark_request_dirty(req); + nfs_unlock_and_release_request(req); error = nfs_wb_page(inode, page); -out_err: - return ERR_PTR(error); + return (error < 0) ? ERR_PTR(error) : NULL; } /* @@ -1227,8 +1213,6 @@ int nfs_flush_incompatible(struct file *file, struct page *page) l_ctx = req->wb_lock_context; do_flush = req->wb_page != page || !nfs_match_open_context(req->wb_context, ctx); - /* for now, flush if more than 1 request in page_group */ - do_flush |= req->wb_this_page != req; if (l_ctx && flctx && !(list_empty_careful(&flctx->flc_posix) && list_empty_careful(&flctx->flc_flock))) { @@ -1412,7 +1396,6 @@ static void nfs_redirty_request(struct nfs_page *req) { nfs_mark_request_dirty(req); set_bit(NFS_CONTEXT_RESEND_WRITES, &req->wb_context->flags); - nfs_unlock_request(req); nfs_end_page_writeback(req); nfs_release_request(req); } @@ -1452,7 +1435,7 @@ void nfs_pageio_init_write(struct nfs_pageio_descriptor *pgio, pg_ops = server->pnfs_curr_ld->pg_write_ops; #endif nfs_pageio_init(pgio, inode, pg_ops, compl_ops, &nfs_rw_write_ops, - server->wsize, ioflags, GFP_NOIO); + server->wsize, ioflags); } EXPORT_SYMBOL_GPL(nfs_pageio_init_write); @@ -1934,7 +1917,7 @@ int nfs_write_inode(struct inode *inode, struct writeback_control *wbc) int ret = 0; /* no commits means nothing needs to be done */ - if (!nfsi->commit_info.ncommit) + if (!atomic_long_read(&nfsi->commit_info.ncommit)) return ret; if (wbc->sync_mode == WB_SYNC_NONE) { @@ -2015,7 +1998,7 @@ int nfs_wb_page_cancel(struct inode *inode, struct page *page) /* blocking call to cancel all requests and join to a single (head) * request */ - req = nfs_lock_and_join_requests(page, false); + req = nfs_lock_and_join_requests(page); if (IS_ERR(req)) { ret = PTR_ERR(req); |