diff options
author | Linus Torvalds | 2021-07-09 09:43:57 -0700 |
---|---|---|
committer | Linus Torvalds | 2021-07-09 09:43:57 -0700 |
commit | 96890bc2eaa1f6bfc1b194e0f0815a10824352a4 (patch) | |
tree | c26a6f72adc8aa5ce62cc7549d95e275109e5617 | |
parent | 227c4d507c71acb7bece298a98d83e5b44433f62 (diff) | |
parent | 878b3dfc42c4ddbf9e38cd9061e3ddd99a69747a (diff) |
Merge tag 'nfs-for-5.14-1' of git://git.linux-nfs.org/projects/trondmy/linux-nfs
Pull NFS client updates from Trond Myklebust:
"Highlights include:
Features:
- Multiple patches to add support for fcntl() leases over NFSv4.
- A sysfs interface to display more information about the various
transport connections used by the RPC client
- A sysfs interface to allow a suitably privileged user to offline a
transport that may no longer point to a valid server
- A sysfs interface to allow a suitably privileged user to change the
server IP address used by the RPC client
Stable fixes:
- Two sunrpc fixes for deadlocks involving privileged rpc_wait_queues
Bugfixes:
- SUNRPC: Avoid a KASAN slab-out-of-bounds bug in xdr_set_page_base()
- SUNRPC: prevent port reuse on transports which don't request it.
- NFSv3: Fix memory leak in posix_acl_create()
- NFS: Various fixes to attribute revalidation timeouts
- NFSv4: Fix handling of non-atomic change attribute updates
- NFSv4: If a server is down, don't cause mounts to other servers to
hang as well
- pNFS: Fix an Oops in pnfs_mark_request_commit() when doing O_DIRECT
- NFS: Fix mount failures due to incorrect setting of the
has_sec_mnt_opts filesystem flag
- NFS: Ensure nfs_readpage returns promptly when an internal error
occurs
- NFS: Fix fscache read from NFS after cache error
- pNFS: Various bugfixes around the LAYOUTGET operation"
* tag 'nfs-for-5.14-1' of git://git.linux-nfs.org/projects/trondmy/linux-nfs: (46 commits)
NFSv4/pNFS: Return an error if _nfs4_pnfs_v3_ds_connect can't load NFSv3
NFSv4/pNFS: Don't call _nfs4_pnfs_v3_ds_connect multiple times
NFSv4/pnfs: Clean up layout get on open
NFSv4/pnfs: Fix layoutget behaviour after invalidation
NFSv4/pnfs: Fix the layout barrier update
NFS: Fix fscache read from NFS after cache error
NFS: Ensure nfs_readpage returns promptly when internal error occurs
sunrpc: remove an offlined xprt using sysfs
sunrpc: provide showing transport's state info in the sysfs directory
sunrpc: display xprt's queuelen of assigned tasks via sysfs
sunrpc: provide multipath info in the sysfs directory
NFSv4.1 identify and mark RPC tasks that can move between transports
sunrpc: provide transport info in the sysfs directory
SUNRPC: take a xprt offline using sysfs
sunrpc: add dst_attr attributes to the sysfs xprt directory
SUNRPC for TCP display xprt's source port in sysfs xprt_info
SUNRPC query transport's source port
SUNRPC display xprt's main value in sysfs's xprt_info
SUNRPC mark the first transport
sunrpc: add add sysfs directory per xprt under each xprt_switch
...
34 files changed, 1140 insertions, 234 deletions
diff --git a/fs/nfs/delegation.c b/fs/nfs/delegation.c index e6ec6f09ac6e..11118398f495 100644 --- a/fs/nfs/delegation.c +++ b/fs/nfs/delegation.c @@ -75,6 +75,13 @@ void nfs_mark_delegation_referenced(struct nfs_delegation *delegation) set_bit(NFS_DELEGATION_REFERENCED, &delegation->flags); } +static void nfs_mark_return_delegation(struct nfs_server *server, + struct nfs_delegation *delegation) +{ + set_bit(NFS_DELEGATION_RETURN, &delegation->flags); + set_bit(NFS4CLNT_DELEGRETURN, &server->nfs_client->cl_state); +} + static bool nfs4_is_valid_delegation(const struct nfs_delegation *delegation, fmode_t flags) @@ -293,6 +300,7 @@ nfs_start_delegation_return_locked(struct nfs_inode *nfsi) goto out; spin_lock(&delegation->lock); if (!test_and_set_bit(NFS_DELEGATION_RETURNING, &delegation->flags)) { + clear_bit(NFS_DELEGATION_RETURN_DELAYED, &delegation->flags); /* Refcount matched in nfs_end_delegation_return() */ ret = nfs_get_delegation(delegation); } @@ -314,16 +322,17 @@ nfs_start_delegation_return(struct nfs_inode *nfsi) return delegation; } -static void -nfs_abort_delegation_return(struct nfs_delegation *delegation, - struct nfs_client *clp) +static void nfs_abort_delegation_return(struct nfs_delegation *delegation, + struct nfs_client *clp, int err) { spin_lock(&delegation->lock); clear_bit(NFS_DELEGATION_RETURNING, &delegation->flags); - set_bit(NFS_DELEGATION_RETURN, &delegation->flags); + if (err == -EAGAIN) { + set_bit(NFS_DELEGATION_RETURN_DELAYED, &delegation->flags); + set_bit(NFS4CLNT_DELEGRETURN_DELAYED, &clp->cl_state); + } spin_unlock(&delegation->lock); - set_bit(NFS4CLNT_DELEGRETURN, &clp->cl_state); } static struct nfs_delegation * @@ -521,11 +530,18 @@ out: static int nfs_end_delegation_return(struct inode *inode, struct nfs_delegation *delegation, int issync) { struct nfs_client *clp = NFS_SERVER(inode)->nfs_client; + unsigned int mode = O_WRONLY | O_RDWR; int err = 0; if (delegation == NULL) return 0; - do { + + if (!issync) + mode |= O_NONBLOCK; + /* Recall of any remaining application leases */ + err = break_lease(inode, mode); + + while (err == 0) { if (test_bit(NFS_DELEGATION_REVOKED, &delegation->flags)) break; err = nfs_delegation_claim_opens(inode, &delegation->stateid, @@ -536,10 +552,10 @@ static int nfs_end_delegation_return(struct inode *inode, struct nfs_delegation * Guard against state recovery */ err = nfs4_wait_clnt_recover(clp); - } while (err == 0); + } if (err) { - nfs_abort_delegation_return(delegation, clp); + nfs_abort_delegation_return(delegation, clp, err); goto out; } @@ -568,6 +584,7 @@ static bool nfs_delegation_need_return(struct nfs_delegation *delegation) if (ret) clear_bit(NFS_DELEGATION_RETURN_IF_CLOSED, &delegation->flags); if (test_bit(NFS_DELEGATION_RETURNING, &delegation->flags) || + test_bit(NFS_DELEGATION_RETURN_DELAYED, &delegation->flags) || test_bit(NFS_DELEGATION_REVOKED, &delegation->flags)) ret = false; @@ -647,6 +664,38 @@ out: return err; } +static bool nfs_server_clear_delayed_delegations(struct nfs_server *server) +{ + struct nfs_delegation *d; + bool ret = false; + + list_for_each_entry_rcu (d, &server->delegations, super_list) { + if (!test_bit(NFS_DELEGATION_RETURN_DELAYED, &d->flags)) + continue; + nfs_mark_return_delegation(server, d); + clear_bit(NFS_DELEGATION_RETURN_DELAYED, &d->flags); + ret = true; + } + return ret; +} + +static bool nfs_client_clear_delayed_delegations(struct nfs_client *clp) +{ + struct nfs_server *server; + bool ret = false; + + if (!test_and_clear_bit(NFS4CLNT_DELEGRETURN_DELAYED, &clp->cl_state)) + goto out; + rcu_read_lock(); + list_for_each_entry_rcu (server, &clp->cl_superblocks, client_link) { + if (nfs_server_clear_delayed_delegations(server)) + ret = true; + } + rcu_read_unlock(); +out: + return ret; +} + /** * nfs_client_return_marked_delegations - return previously marked delegations * @clp: nfs_client to process @@ -659,8 +708,14 @@ out: */ int nfs_client_return_marked_delegations(struct nfs_client *clp) { - return nfs_client_for_each_server(clp, - nfs_server_return_marked_delegations, NULL); + int err = nfs_client_for_each_server( + clp, nfs_server_return_marked_delegations, NULL); + if (err) + return err; + /* If a return was delayed, sleep to prevent hard looping */ + if (nfs_client_clear_delayed_delegations(clp)) + ssleep(1); + return 0; } /** @@ -698,13 +753,14 @@ int nfs4_inode_return_delegation(struct inode *inode) { struct nfs_inode *nfsi = NFS_I(inode); struct nfs_delegation *delegation; - int err = 0; - nfs_wb_all(inode); delegation = nfs_start_delegation_return(nfsi); + /* Synchronous recall of any application leases */ + break_lease(inode, O_WRONLY | O_RDWR); + nfs_wb_all(inode); if (delegation != NULL) - err = nfs_end_delegation_return(inode, delegation, 1); - return err; + return nfs_end_delegation_return(inode, delegation, 1); + return 0; } /** @@ -775,13 +831,6 @@ static void nfs_mark_return_if_closed_delegation(struct nfs_server *server, set_bit(NFS4CLNT_DELEGRETURN, &server->nfs_client->cl_state); } -static void nfs_mark_return_delegation(struct nfs_server *server, - struct nfs_delegation *delegation) -{ - set_bit(NFS_DELEGATION_RETURN, &delegation->flags); - set_bit(NFS4CLNT_DELEGRETURN, &server->nfs_client->cl_state); -} - static bool nfs_server_mark_return_all_delegations(struct nfs_server *server) { struct nfs_delegation *delegation; @@ -1010,6 +1059,9 @@ int nfs_async_inode_return_delegation(struct inode *inode, nfs_mark_return_delegation(server, delegation); rcu_read_unlock(); + /* If there are any application leases or delegations, recall them */ + break_lease(inode, O_WRONLY | O_RDWR | O_NONBLOCK); + nfs_delegation_run_state_manager(clp); return 0; out_enoent: diff --git a/fs/nfs/delegation.h b/fs/nfs/delegation.h index c19b4fd20781..1c378992b7c0 100644 --- a/fs/nfs/delegation.h +++ b/fs/nfs/delegation.h @@ -36,6 +36,7 @@ enum { NFS_DELEGATION_REVOKED, NFS_DELEGATION_TEST_EXPIRED, NFS_DELEGATION_INODE_FREEING, + NFS_DELEGATION_RETURN_DELAYED, }; int nfs_inode_set_delegation(struct inode *inode, const struct cred *cred, diff --git a/fs/nfs/direct.c b/fs/nfs/direct.c index 2d30a4da49fa..2e894fec036b 100644 --- a/fs/nfs/direct.c +++ b/fs/nfs/direct.c @@ -700,8 +700,8 @@ static void nfs_direct_write_completion(struct nfs_pgio_header *hdr) { struct nfs_direct_req *dreq = hdr->dreq; struct nfs_commit_info cinfo; - bool request_commit = false; struct nfs_page *req = nfs_list_entry(hdr->pages.next); + int flags = NFS_ODIRECT_DONE; nfs_init_cinfo_from_dreq(&cinfo, dreq); @@ -713,15 +713,9 @@ static void nfs_direct_write_completion(struct nfs_pgio_header *hdr) nfs_direct_count_bytes(dreq, hdr); if (hdr->good_bytes != 0 && nfs_write_need_commit(hdr)) { - switch (dreq->flags) { - case 0: + if (!dreq->flags) dreq->flags = NFS_ODIRECT_DO_COMMIT; - request_commit = true; - break; - case NFS_ODIRECT_RESCHED_WRITES: - case NFS_ODIRECT_DO_COMMIT: - request_commit = true; - } + flags = dreq->flags; } spin_unlock(&dreq->lock); @@ -729,12 +723,15 @@ static void nfs_direct_write_completion(struct nfs_pgio_header *hdr) req = nfs_list_entry(hdr->pages.next); nfs_list_remove_request(req); - if (request_commit) { + if (flags == NFS_ODIRECT_DO_COMMIT) { kref_get(&req->wb_kref); memcpy(&req->wb_verf, &hdr->verf.verifier, sizeof(req->wb_verf)); nfs_mark_request_commit(req, hdr->lseg, &cinfo, hdr->ds_commit_idx); + } else if (flags == NFS_ODIRECT_RESCHED_WRITES) { + kref_get(&req->wb_kref); + nfs_mark_request_commit(req, NULL, &cinfo, 0); } nfs_unlock_and_release_request(req); } diff --git a/fs/nfs/fscache.c b/fs/nfs/fscache.c index c4c021c6ebbd..d743629e05e1 100644 --- a/fs/nfs/fscache.c +++ b/fs/nfs/fscache.c @@ -385,12 +385,15 @@ static void nfs_readpage_from_fscache_complete(struct page *page, "NFS: readpage_from_fscache_complete (0x%p/0x%p/%d)\n", page, context, error); - /* if the read completes with an error, we just unlock the page and let - * the VM reissue the readpage */ - if (!error) { + /* + * If the read completes with an error, mark the page with PG_checked, + * unlock the page, and let the VM reissue the readpage. + */ + if (!error) SetPageUptodate(page); - unlock_page(page); - } + else + SetPageChecked(page); + unlock_page(page); } /* @@ -405,6 +408,11 @@ int __nfs_readpage_from_fscache(struct nfs_open_context *ctx, "NFS: readpage_from_fscache(fsc:%p/p:%p(i:%lx f:%lx)/0x%p)\n", nfs_i_fscache(inode), page, page->index, page->flags, inode); + if (PageChecked(page)) { + ClearPageChecked(page); + return 1; + } + ret = fscache_read_or_alloc_page(nfs_i_fscache(inode), page, nfs_readpage_from_fscache_complete, diff --git a/fs/nfs/getroot.c b/fs/nfs/getroot.c index aaeeb4659bff..59355c106ece 100644 --- a/fs/nfs/getroot.c +++ b/fs/nfs/getroot.c @@ -67,7 +67,7 @@ static int nfs_superblock_set_dummy_root(struct super_block *sb, struct inode *i int nfs_get_root(struct super_block *s, struct fs_context *fc) { struct nfs_fs_context *ctx = nfs_fc2context(fc); - struct nfs_server *server = NFS_SB(s); + struct nfs_server *server = NFS_SB(s), *clone_server; struct nfs_fsinfo fsinfo; struct dentry *root; struct inode *inode; @@ -127,7 +127,7 @@ int nfs_get_root(struct super_block *s, struct fs_context *fc) } spin_unlock(&root->d_lock); fc->root = root; - if (NFS_SB(s)->caps & NFS_CAP_SECURITY_LABEL) + if (server->caps & NFS_CAP_SECURITY_LABEL) kflags |= SECURITY_LSM_NATIVE_LABELS; if (ctx->clone_data.sb) { if (d_inode(fc->root)->i_fop != &nfs_dir_operations) { @@ -137,15 +137,19 @@ int nfs_get_root(struct super_block *s, struct fs_context *fc) /* clone lsm security options from the parent to the new sb */ error = security_sb_clone_mnt_opts(ctx->clone_data.sb, s, kflags, &kflags_out); + if (error) + goto error_splat_root; + clone_server = NFS_SB(ctx->clone_data.sb); + server->has_sec_mnt_opts = clone_server->has_sec_mnt_opts; } else { error = security_sb_set_mnt_opts(s, fc->security, kflags, &kflags_out); } if (error) goto error_splat_root; - if (NFS_SB(s)->caps & NFS_CAP_SECURITY_LABEL && + if (server->caps & NFS_CAP_SECURITY_LABEL && !(kflags_out & SECURITY_LSM_NATIVE_LABELS)) - NFS_SB(s)->caps &= ~NFS_CAP_SECURITY_LABEL; + server->caps &= ~NFS_CAP_SECURITY_LABEL; nfs_setsecurity(inode, fsinfo.fattr, fsinfo.fattr->label); error = 0; diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c index 529c4099f482..853213b3a209 100644 --- a/fs/nfs/inode.c +++ b/fs/nfs/inode.c @@ -1101,6 +1101,7 @@ EXPORT_SYMBOL_GPL(nfs_inode_attach_open_context); void nfs_file_set_open_context(struct file *filp, struct nfs_open_context *ctx) { filp->private_data = get_nfs_open_context(ctx); + set_bit(NFS_CONTEXT_FILE_OPEN, &ctx->flags); if (list_empty(&ctx->list)) nfs_inode_attach_open_context(ctx); } @@ -1120,6 +1121,8 @@ struct nfs_open_context *nfs_find_open_context(struct inode *inode, const struct continue; if ((pos->mode & (FMODE_READ|FMODE_WRITE)) != mode) continue; + if (!test_bit(NFS_CONTEXT_FILE_OPEN, &pos->flags)) + continue; ctx = get_nfs_open_context(pos); if (ctx) break; @@ -1135,6 +1138,7 @@ void nfs_file_clear_open_context(struct file *filp) if (ctx) { struct inode *inode = d_inode(ctx->dentry); + clear_bit(NFS_CONTEXT_FILE_OPEN, &ctx->flags); /* * We fatal error on write before. Try to writeback * every page again. @@ -2055,35 +2059,33 @@ static int nfs_update_inode(struct inode *inode, struct nfs_fattr *fattr) | NFS_INO_INVALID_OTHER; if (S_ISDIR(inode->i_mode)) nfs_force_lookup_revalidate(inode); + attr_changed = true; dprintk("NFS: change_attr change on server for file %s/%ld\n", inode->i_sb->s_id, inode->i_ino); } else if (!have_delegation) nfsi->cache_validity |= NFS_INO_DATA_INVAL_DEFER; inode_set_iversion_raw(inode, fattr->change_attr); - attr_changed = true; } } else { nfsi->cache_validity |= save_cache_validity & NFS_INO_INVALID_CHANGE; - cache_revalidated = false; + if (!have_delegation || + (nfsi->cache_validity & NFS_INO_INVALID_CHANGE) != 0) + cache_revalidated = false; } - if (fattr->valid & NFS_ATTR_FATTR_MTIME) { + if (fattr->valid & NFS_ATTR_FATTR_MTIME) inode->i_mtime = fattr->mtime; - } else if (fattr_supported & NFS_ATTR_FATTR_MTIME) { + else if (fattr_supported & NFS_ATTR_FATTR_MTIME) nfsi->cache_validity |= save_cache_validity & NFS_INO_INVALID_MTIME; - cache_revalidated = false; - } - if (fattr->valid & NFS_ATTR_FATTR_CTIME) { + if (fattr->valid & NFS_ATTR_FATTR_CTIME) inode->i_ctime = fattr->ctime; - } else if (fattr_supported & NFS_ATTR_FATTR_CTIME) { + else if (fattr_supported & NFS_ATTR_FATTR_CTIME) nfsi->cache_validity |= save_cache_validity & NFS_INO_INVALID_CTIME; - cache_revalidated = false; - } /* Check if our cached file size is stale */ if (fattr->valid & NFS_ATTR_FATTR_SIZE) { @@ -2096,7 +2098,6 @@ static int nfs_update_inode(struct inode *inode, struct nfs_fattr *fattr) i_size_write(inode, new_isize); if (!have_writers) invalid |= NFS_INO_INVALID_DATA; - attr_changed = true; } dprintk("NFS: isize change on server for file %s/%ld " "(%Ld to %Ld)\n", @@ -2111,19 +2112,15 @@ static int nfs_update_inode(struct inode *inode, struct nfs_fattr *fattr) fattr->du.nfs3.used = 0; fattr->valid |= NFS_ATTR_FATTR_SPACE_USED; } - } else { + } else nfsi->cache_validity |= save_cache_validity & NFS_INO_INVALID_SIZE; - cache_revalidated = false; - } if (fattr->valid & NFS_ATTR_FATTR_ATIME) inode->i_atime = fattr->atime; - else if (fattr_supported & NFS_ATTR_FATTR_ATIME) { + else if (fattr_supported & NFS_ATTR_FATTR_ATIME) nfsi->cache_validity |= save_cache_validity & NFS_INO_INVALID_ATIME; - cache_revalidated = false; - } if (fattr->valid & NFS_ATTR_FATTR_MODE) { if ((inode->i_mode & S_IALLUGO) != (fattr->mode & S_IALLUGO)) { @@ -2132,71 +2129,55 @@ static int nfs_update_inode(struct inode *inode, struct nfs_fattr *fattr) inode->i_mode = newmode; invalid |= NFS_INO_INVALID_ACCESS | NFS_INO_INVALID_ACL; - attr_changed = true; } - } else if (fattr_supported & NFS_ATTR_FATTR_MODE) { + } else if (fattr_supported & NFS_ATTR_FATTR_MODE) nfsi->cache_validity |= save_cache_validity & NFS_INO_INVALID_MODE; - cache_revalidated = false; - } if (fattr->valid & NFS_ATTR_FATTR_OWNER) { if (!uid_eq(inode->i_uid, fattr->uid)) { invalid |= NFS_INO_INVALID_ACCESS | NFS_INO_INVALID_ACL; inode->i_uid = fattr->uid; - attr_changed = true; } - } else if (fattr_supported & NFS_ATTR_FATTR_OWNER) { + } else if (fattr_supported & NFS_ATTR_FATTR_OWNER) nfsi->cache_validity |= save_cache_validity & NFS_INO_INVALID_OTHER; - cache_revalidated = false; - } if (fattr->valid & NFS_ATTR_FATTR_GROUP) { if (!gid_eq(inode->i_gid, fattr->gid)) { invalid |= NFS_INO_INVALID_ACCESS | NFS_INO_INVALID_ACL; inode->i_gid = fattr->gid; - attr_changed = true; } - } else if (fattr_supported & NFS_ATTR_FATTR_GROUP) { + } else if (fattr_supported & NFS_ATTR_FATTR_GROUP) nfsi->cache_validity |= save_cache_validity & NFS_INO_INVALID_OTHER; - cache_revalidated = false; - } if (fattr->valid & NFS_ATTR_FATTR_NLINK) { if (inode->i_nlink != fattr->nlink) { if (S_ISDIR(inode->i_mode)) invalid |= NFS_INO_INVALID_DATA; set_nlink(inode, fattr->nlink); - attr_changed = true; } - } else if (fattr_supported & NFS_ATTR_FATTR_NLINK) { + } else if (fattr_supported & NFS_ATTR_FATTR_NLINK) nfsi->cache_validity |= save_cache_validity & NFS_INO_INVALID_NLINK; - cache_revalidated = false; - } if (fattr->valid & NFS_ATTR_FATTR_SPACE_USED) { /* * report the blocks in 512byte units */ inode->i_blocks = nfs_calc_block_size(fattr->du.nfs3.used); - } else if (fattr_supported & NFS_ATTR_FATTR_SPACE_USED) { + } else if (fattr_supported & NFS_ATTR_FATTR_SPACE_USED) nfsi->cache_validity |= save_cache_validity & NFS_INO_INVALID_BLOCKS; - cache_revalidated = false; - } - if (fattr->valid & NFS_ATTR_FATTR_BLOCKS_USED) { + if (fattr->valid & NFS_ATTR_FATTR_BLOCKS_USED) inode->i_blocks = fattr->du.nfs2.blocks; - } else if (fattr_supported & NFS_ATTR_FATTR_BLOCKS_USED) { + else if (fattr_supported & NFS_ATTR_FATTR_BLOCKS_USED) nfsi->cache_validity |= save_cache_validity & NFS_INO_INVALID_BLOCKS; - cache_revalidated = false; - } /* Update attrtimeo value if we're out of the unstable period */ if (attr_changed) { diff --git a/fs/nfs/nfs3proc.c b/fs/nfs/nfs3proc.c index 5c4e23abc345..2299446b3b89 100644 --- a/fs/nfs/nfs3proc.c +++ b/fs/nfs/nfs3proc.c @@ -385,7 +385,7 @@ nfs3_proc_create(struct inode *dir, struct dentry *dentry, struct iattr *sattr, break; case NFS3_CREATE_UNCHECKED: - goto out; + goto out_release_acls; } nfs_fattr_init(data->res.dir_attr); nfs_fattr_init(data->res.fattr); @@ -751,7 +751,7 @@ nfs3_proc_mknod(struct inode *dir, struct dentry *dentry, struct iattr *sattr, break; default: status = -EINVAL; - goto out; + goto out_release_acls; } d_alias = nfs3_do_create(dir, dentry, data); diff --git a/fs/nfs/nfs4_fs.h b/fs/nfs/nfs4_fs.h index 543d916f79ab..ba78df4b13d9 100644 --- a/fs/nfs/nfs4_fs.h +++ b/fs/nfs/nfs4_fs.h @@ -45,6 +45,7 @@ enum nfs4_client_state { NFS4CLNT_RECALL_RUNNING, NFS4CLNT_RECALL_ANY_LAYOUT_READ, NFS4CLNT_RECALL_ANY_LAYOUT_RW, + NFS4CLNT_DELEGRETURN_DELAYED, }; #define NFS4_RENEW_TIMEOUT 0x01 @@ -322,7 +323,8 @@ extern int update_open_stateid(struct nfs4_state *state, const nfs4_stateid *open_stateid, const nfs4_stateid *deleg_stateid, fmode_t fmode); - +extern int nfs4_proc_setlease(struct file *file, long arg, + struct file_lock **lease, void **priv); extern int nfs4_proc_get_lease_time(struct nfs_client *clp, struct nfs_fsinfo *fsinfo); extern void nfs4_update_changeattr(struct inode *dir, diff --git a/fs/nfs/nfs4client.c b/fs/nfs/nfs4client.c index 42719384e25f..28431acd1230 100644 --- a/fs/nfs/nfs4client.c +++ b/fs/nfs/nfs4client.c @@ -197,8 +197,11 @@ void nfs40_shutdown_client(struct nfs_client *clp) struct nfs_client *nfs4_alloc_client(const struct nfs_client_initdata *cl_init) { - int err; + char buf[INET6_ADDRSTRLEN + 1]; + const char *ip_addr = cl_init->ip_addr; struct nfs_client *clp = nfs_alloc_client(cl_init); + int err; + if (IS_ERR(clp)) return clp; @@ -222,6 +225,44 @@ struct nfs_client *nfs4_alloc_client(const struct nfs_client_initdata *cl_init) init_waitqueue_head(&clp->cl_lock_waitq); #endif INIT_LIST_HEAD(&clp->pending_cb_stateids); + + if (cl_init->minorversion != 0) + __set_bit(NFS_CS_INFINITE_SLOTS, &clp->cl_flags); + __set_bit(NFS_CS_DISCRTRY, &clp->cl_flags); + __set_bit(NFS_CS_NO_RETRANS_TIMEOUT, &clp->cl_flags); + + /* + * Set up the connection to the server before we add add to the + * global list. + */ + err = nfs_create_rpc_client(clp, cl_init, RPC_AUTH_GSS_KRB5I); + if (err == -EINVAL) + err = nfs_create_rpc_client(clp, cl_init, RPC_AUTH_UNIX); + if (err < 0) + goto error; + + /* If no clientaddr= option was specified, find a usable cb address */ + if (ip_addr == NULL) { + struct sockaddr_storage cb_addr; + struct sockaddr *sap = (struct sockaddr *)&cb_addr; + + err = rpc_localaddr(clp->cl_rpcclient, sap, sizeof(cb_addr)); + if (err < 0) + goto error; + err = rpc_ntop(sap, buf, sizeof(buf)); + if (err < 0) + goto error; + ip_addr = (const char *)buf; + } + strlcpy(clp->cl_ipaddr, ip_addr, sizeof(clp->cl_ipaddr)); + + err = nfs_idmap_new(clp); + if (err < 0) { + dprintk("%s: failed to create idmapper. Error = %d\n", + __func__, err); + goto error; + } + __set_bit(NFS_CS_IDMAP, &clp->cl_res_state); return clp; error: @@ -372,8 +413,6 @@ static int nfs4_init_client_minor_version(struct nfs_client *clp) struct nfs_client *nfs4_init_client(struct nfs_client *clp, const struct nfs_client_initdata *cl_init) { - char buf[INET6_ADDRSTRLEN + 1]; - const char *ip_addr = cl_init->ip_addr; struct nfs_client *old; int error; @@ -381,43 +420,6 @@ struct nfs_client *nfs4_init_client(struct nfs_client *clp, /* the client is initialised already */ return clp; - /* Check NFS protocol revision and initialize RPC op vector */ - clp->rpc_ops = &nfs_v4_clientops; - - if (clp->cl_minorversion != 0) - __set_bit(NFS_CS_INFINITE_SLOTS, &clp->cl_flags); - __set_bit(NFS_CS_DISCRTRY, &clp->cl_flags); - __set_bit(NFS_CS_NO_RETRANS_TIMEOUT, &clp->cl_flags); - - error = nfs_create_rpc_client(clp, cl_init, RPC_AUTH_GSS_KRB5I); - if (error == -EINVAL) - error = nfs_create_rpc_client(clp, cl_init, RPC_AUTH_UNIX); - if (error < 0) - goto error; - - /* If no clientaddr= option was specified, find a usable cb address */ - if (ip_addr == NULL) { - struct sockaddr_storage cb_addr; - struct sockaddr *sap = (struct sockaddr *)&cb_addr; - - error = rpc_localaddr(clp->cl_rpcclient, sap, sizeof(cb_addr)); - if (error < 0) - goto error; - error = rpc_ntop(sap, buf, sizeof(buf)); - if (error < 0) - goto error; - ip_addr = (const char *)buf; - } - strlcpy(clp->cl_ipaddr, ip_addr, sizeof(clp->cl_ipaddr)); - - error = nfs_idmap_new(clp); - if (error < 0) { - dprintk("%s: failed to create idmapper. Error = %d\n", - __func__, error); - goto error; - } - __set_bit(NFS_CS_IDMAP, &clp->cl_res_state); - error = nfs4_init_client_minor_version(clp); if (error < 0) goto error; diff --git a/fs/nfs/nfs4file.c b/fs/nfs/nfs4file.c index a1e5c6b85ded..c820de58a661 100644 --- a/fs/nfs/nfs4file.c +++ b/fs/nfs/nfs4file.c @@ -435,6 +435,12 @@ void nfs42_ssc_unregister_ops(void) } #endif /* CONFIG_NFS_V4_2 */ +static int nfs4_setlease(struct file *file, long arg, struct file_lock **lease, + void **priv) +{ + return nfs4_proc_setlease(file, arg, lease, priv); +} + const struct file_operations nfs4_file_operations = { .read_iter = nfs_file_read, .write_iter = nfs_file_write, @@ -448,7 +454,7 @@ const struct file_operations nfs4_file_operations = { .splice_read = generic_file_splice_read, .splice_write = iter_file_splice_write, .check_flags = nfs_check_flags, - .setlease = simple_nosetlease, + .setlease = nfs4_setlease, #ifdef CONFIG_NFS_V4_2 .copy_file_range = nfs4_copy_file_range, .llseek = nfs4_file_llseek, diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index e653654c10bc..e1214bb6b7ee 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -1155,7 +1155,11 @@ static int nfs4_call_sync_sequence(struct rpc_clnt *clnt, struct nfs4_sequence_args *args, struct nfs4_sequence_res *res) { - return nfs4_do_call_sync(clnt, server, msg, args, res, 0); + unsigned short task_flags = 0; + + if (server->nfs_client->cl_minorversion) + task_flags = RPC_TASK_MOVEABLE; + return nfs4_do_call_sync(clnt, server, msg, args, res, task_flags); } @@ -1205,12 +1209,12 @@ nfs4_update_changeattr_locked(struct inode *inode, u64 change_attr = inode_peek_iversion_raw(inode); cache_validity |= NFS_INO_INVALID_CTIME | NFS_INO_INVALID_MTIME; + if (S_ISDIR(inode->i_mode)) + cache_validity |= NFS_INO_INVALID_DATA; switch (NFS_SERVER(inode)->change_attr_type) { case NFS4_CHANGE_TYPE_IS_UNDEFINED: - break; - case NFS4_CHANGE_TYPE_IS_TIME_METADATA: - if ((s64)(change_attr - cinfo->after) > 0) + if (cinfo->after == change_attr) goto out; break; default: @@ -1218,24 +1222,21 @@ nfs4_update_changeattr_locked(struct inode *inode, goto out; } - if (cinfo->atomic && cinfo->before == change_attr) { - nfsi->attrtimeo_timestamp = jiffies; - } else { - if (S_ISDIR(inode->i_mode)) { - cache_validity |= NFS_INO_INVALID_DATA; + inode_set_iversion_raw(inode, cinfo->after); + if (!cinfo->atomic || cinfo->before != change_attr) { + if (S_ISDIR(inode->i_mode)) nfs_force_lookup_revalidate(inode); - } else { - if (!NFS_PROTO(inode)->have_delegation(inode, - FMODE_READ)) - cache_validity |= NFS_INO_REVAL_PAGECACHE; - } - if (cinfo->before != change_attr) - cache_validity |= NFS_INO_INVALID_ACCESS | - NFS_INO_INVALID_ACL | - NFS_INO_INVALID_XATTR; + if (!NFS_PROTO(inode)->have_delegation(inode, FMODE_READ)) + cache_validity |= + NFS_INO_INVALID_ACCESS | NFS_INO_INVALID_ACL | + NFS_INO_INVALID_SIZE | NFS_INO_INVALID_OTHER | + NFS_INO_INVALID_BLOCKS | NFS_INO_INVALID_NLINK | + NFS_INO_INVALID_MODE | NFS_INO_INVALID_XATTR | + NFS_INO_REVAL_PAGECACHE; + nfsi->attrtimeo = NFS_MINATTRTIMEO(inode); } - inode_set_iversion_raw(inode, cinfo->after); + nfsi->attrtimeo_timestamp = jiffies; nfsi->read_cache_jiffies = timestamp; nfsi->attr_gencount = nfs_inc_attr_generation_counter(); nfsi->cache_validity &= ~NFS_INO_INVALID_CHANGE; @@ -2569,6 +2570,9 @@ static int nfs4_run_open_task(struct nfs4_opendata *data, }; int status; + if (server->nfs_client->cl_minorversion) + task_setup_data.flags |= RPC_TASK_MOVEABLE; + kref_get(&data->kref); data->rpc_done = false; data->rpc_status = 0; @@ -3749,6 +3753,9 @@ int nfs4_do_close(struct nfs4_state *state, gfp_t gfp_mask, int wait) }; int status = -ENOMEM; + if (server->nfs_client->cl_minorversion) + task_setup_data.flags |= RPC_TASK_MOVEABLE; + nfs4_state_protect(server->nfs_client, NFS_SP4_MACH_CRED_CLEANUP, &task_setup_data.rpc_client, &msg); @@ -4188,6 +4195,9 @@ static int _nfs4_proc_getattr(struct nfs_server *server, struct nfs_fh *fhandle, }; unsigned short task_flags = 0; + if (nfs4_has_session(server->nfs_client)) + task_flags = RPC_TASK_MOVEABLE; + /* Is this is an attribute revalidation, subject to softreval? */ if (inode && (server->flags & NFS_MOUNT_SOFTREVAL)) task_flags |= RPC_TASK_TIMEOUT; @@ -4307,6 +4317,9 @@ static int _nfs4_proc_lookup(struct rpc_clnt *clnt, struct inode *dir, }; unsigned short task_flags = 0; + if (server->nfs_client->cl_minorversion) + task_flags = RPC_TASK_MOVEABLE; + /* Is this is an attribute revalidation, subject to softreval? */ if (nfs_lookup_is_soft_revalidate(dentry)) task_flags |= RPC_TASK_TIMEOUT; @@ -6538,7 +6551,7 @@ static int _nfs4_proc_delegreturn(struct inode *inode, const struct cred *cred, .rpc_client = server->client, .rpc_message = &msg, .callback_ops = &nfs4_delegreturn_ops, - .flags = RPC_TASK_ASYNC | RPC_TASK_TIMEOUT, + .flags = RPC_TASK_ASYNC | RPC_TASK_TIMEOUT | RPC_TASK_MOVEABLE, }; int status = 0; @@ -6856,6 +6869,11 @@ static struct rpc_task *nfs4_do_unlck(struct file_lock *fl, .workqueue = nfsiod_workqueue, .flags = RPC_TASK_ASYNC, }; + struct nfs_client *client = + NFS_SERVER(lsp->ls_state->inode)->nfs_client; + + if (client->cl_minorversion) + task_setup_data.flags |= RPC_TASK_MOVEABLE; nfs4_state_protect(NFS_SERVER(lsp->ls_state->inode)->nfs_client, NFS_SP4_MACH_CRED_CLEANUP, &task_setup_data.rpc_client, &msg); @@ -7130,6 +7148,10 @@ static int _nfs4_do_setlk(struct nfs4_state *state, int cmd, struct file_lock *f .flags = RPC_TASK_ASYNC | RPC_TASK_CRED_NOREF, }; int ret; + struct nfs_client *client = NFS_SERVER(state->inode)->nfs_client; + + if (client->cl_minorversion) + task_setup_data.flags |= RPC_TASK_MOVEABLE; dprintk("%s: begin!\n", __func__); data = nfs4_alloc_lockdata(fl, nfs_file_open_context(fl->fl_file), @@ -7438,6 +7460,43 @@ nfs4_proc_lock(struct file *filp, int cmd, struct file_lock *request) return nfs4_retry_setlk(state, cmd, request); } +static int nfs4_delete_lease(struct file *file, void **priv) +{ + return generic_setlease(file, F_UNLCK, NULL, priv); +} + +static int nfs4_add_lease(struct file *file, long arg, struct file_lock **lease, + void **priv) +{ + struct inode *inode = file_inode(file); + fmode_t type = arg == F_RDLCK ? FMODE_READ : FMODE_WRITE; + int ret; + + /* No delegation, no lease */ + if (!nfs4_have_delegation(inode, type)) + return -EAGAIN; + ret = generic_setlease(file, arg, lease, priv); + if (ret || nfs4_have_delegation(inode, type)) + return ret; + /* We raced with a delegation return */ + nfs4_delete_lease(file, priv); + return -EAGAIN; +} + +int nfs4_proc_setlease(struct file *file, long arg, struct file_lock **lease, + void **priv) +{ + switch (arg) { + case F_RDLCK: + case F_WRLCK: + return nfs4_add_lease(file, arg, lease, priv); + case F_UNLCK: + return nfs4_delete_lease(file, priv); + default: + return -EINVAL; + } +} + int nfs4_lock_delegation_recall(struct file_lock *fl, struct nfs4_state *state, const nfs4_stateid *stateid) { struct nfs_server *server = NFS_SERVER(state->inode); @@ -9186,7 +9245,7 @@ static struct rpc_task *_nfs41_proc_sequence(struct nfs_client *clp, .rpc_client = clp->cl_rpcclient, .rpc_message = &msg, .callback_ops = &nfs41_sequence_ops, - .flags = RPC_TASK_ASYNC | RPC_TASK_TIMEOUT, + .flags = RPC_TASK_ASYNC | RPC_TASK_TIMEOUT | RPC_TASK_MOVEABLE, }; struct rpc_task *ret; @@ -9385,7 +9444,7 @@ nfs4_layoutget_handle_exception(struct rpc_task *task, { struct inode *inode = lgp->args.inode; struct nfs_server *server = NFS_SERVER(inode); - struct pnfs_layout_hdr *lo; + struct pnfs_layout_hdr *lo = lgp->lo; int nfs4err = task->tk_status; int err, status = 0; LIST_HEAD(head); @@ -9437,7 +9496,6 @@ nfs4_layoutget_handle_exception(struct rpc_task *task, case -NFS4ERR_BAD_STATEID: exception->timeout = 0; spin_lock(&inode->i_lock); - lo = NFS_I(inode)->layout; /* If the open stateid was bad, then recover it. */ if (!lo || test_bit(NFS_LAYOUT_INVALID_STID, &lo->plh_flags) || !nfs4_stateid_match_other(&lgp->args.stateid, &lo->plh_stateid)) { @@ -9509,7 +9567,8 @@ nfs4_proc_layoutget(struct nfs4_layoutget *lgp, long *timeout) .rpc_message = &msg, .callback_ops = &nfs4_layoutget_call_ops, .callback_data = lgp, - .flags = RPC_TASK_ASYNC | RPC_TASK_CRED_NOREF, + .flags = RPC_TASK_ASYNC | RPC_TASK_CRED_NOREF | + RPC_TASK_MOVEABLE, }; struct pnfs_layout_segment *lseg = NULL; struct nfs4_exception exception = { @@ -9520,9 +9579,6 @@ nfs4_proc_layoutget(struct nfs4_layoutget *lgp, long *timeout) dprintk("--> %s\n", __func__); - /* nfs4_layoutget_release calls pnfs_put_layout_hdr */ - pnfs_get_layout_hdr(NFS_I(inode)->layout); - nfs4_init_sequence(&lgp->args.seq_args, &lgp->res.seq_res, 0, 0); task = rpc_run_task(&task_setup_data); @@ -9650,6 +9706,7 @@ int nfs4_proc_layoutreturn(struct nfs4_layoutreturn *lrp, bool sync) .rpc_message = &msg, .callback_ops = &nfs4_layoutreturn_call_ops, .callback_data = lrp, + .flags = RPC_TASK_MOVEABLE, }; int status = 0; @@ -9804,6 +9861,7 @@ nfs4_proc_layoutcommit(struct nfs4_layoutcommit_data *data, bool sync) .rpc_message = &msg, .callback_ops = &nfs4_layoutcommit_ops, .callback_data = data, + .flags = RPC_TASK_MOVEABLE, }; struct rpc_task *task; int status = 0; @@ -10131,7 +10189,7 @@ static int nfs41_free_stateid(struct nfs_server *server, .rpc_client = server->client, .rpc_message = &msg, .callback_ops = &nfs41_free_stateid_ops, - .flags = RPC_TASK_ASYNC, + .flags = RPC_TASK_ASYNC | RPC_TASK_MOVEABLE, }; struct nfs_free_stateid_data *data; struct rpc_task *task; diff --git a/fs/nfs/pagelist.c b/fs/nfs/pagelist.c index cf9cc62ec48e..cc232d1f16f2 100644 --- a/fs/nfs/pagelist.c +++ b/fs/nfs/pagelist.c @@ -954,6 +954,7 @@ static int nfs_generic_pg_pgios(struct nfs_pageio_descriptor *desc) { struct nfs_pgio_header *hdr; int ret; + unsigned short task_flags = 0; hdr = nfs_pgio_header_alloc(desc->pg_rw_ops); if (!hdr) { @@ -962,14 +963,17 @@ static int nfs_generic_pg_pgios(struct nfs_pageio_descriptor *desc) } nfs_pgheader_init(desc, hdr, nfs_pgio_header_free); ret = nfs_generic_pgio(desc, hdr); - if (ret == 0) + if (ret == 0) { + if (NFS_SERVER(hdr->inode)->nfs_client->cl_minorversion) + task_flags = RPC_TASK_MOVEABLE; ret = nfs_initiate_pgio(NFS_CLIENT(hdr->inode), hdr, hdr->cred, NFS_PROTO(hdr->inode), desc->pg_rpc_callops, desc->pg_ioflags, - RPC_TASK_CRED_NOREF); + RPC_TASK_CRED_NOREF | task_flags); + } return ret; } diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c index 2c01ee805306..ef14ea0b6ab8 100644 --- a/fs/nfs/pnfs.c +++ b/fs/nfs/pnfs.c @@ -966,10 +966,8 @@ void pnfs_set_layout_stateid(struct pnfs_layout_hdr *lo, const nfs4_stateid *new, const struct cred *cred, bool update_barrier) { - u32 oldseq, newseq, new_barrier = 0; - - oldseq = be32_to_cpu(lo->plh_stateid.seqid); - newseq = be32_to_cpu(new->seqid); + u32 oldseq = be32_to_cpu(lo->plh_stateid.seqid); + u32 newseq = be32_to_cpu(new->seqid); if (!pnfs_layout_is_valid(lo)) { pnfs_set_layout_cred(lo, cred); @@ -979,19 +977,21 @@ pnfs_set_layout_stateid(struct pnfs_layout_hdr *lo, const nfs4_stateid *new, clear_bit(NFS_LAYOUT_INVALID_STID, &lo->plh_flags); return; } - if (pnfs_seqid_is_newer(newseq, oldseq)) { + + if (pnfs_seqid_is_newer(newseq, oldseq)) nfs4_stateid_copy(&lo->plh_stateid, new); - /* - * Because of wraparound, we want to keep the barrier - * "close" to the current seqids. - */ - new_barrier = newseq - atomic_read(&lo->plh_outstanding); - } - if (update_barrier) - new_barrier = be32_to_cpu(new->seqid); - else if (new_barrier == 0) + + if (update_barrier) { + pnfs_barrier_update(lo, newseq); return; - pnfs_barrier_update(lo, new_barrier); + } + /* + * Because of wraparound, we want to keep the barrier + * "close" to the current seqids. We really only want to + * get here from a layoutget call. + */ + if (atomic_read(&lo->plh_outstanding) == 1) + pnfs_barrier_update(lo, be32_to_cpu(lo->plh_stateid.seqid)); } static bool @@ -1128,8 +1128,7 @@ void pnfs_layoutget_free(struct nfs4_layoutget *lgp) size_t max_pages = lgp->args.layout.pglen / PAGE_SIZE; nfs4_free_pages(lgp->args.layout.pages, max_pages); - if (lgp->args.inode) - pnfs_put_layout_hdr(NFS_I(lgp->args.inode)->layout); + pnfs_put_layout_hdr(lgp->lo); put_nfs_open_context(lgp->args.ctx); kfree(lgp); } @@ -2014,7 +2013,7 @@ lookup_again: * If the layout segment list is empty, but there are outstanding * layoutget calls, then they might be subject to a layoutrecall. */ - if (list_empty(&lo->plh_segs) && + if ((list_empty(&lo->plh_segs) || !pnfs_layout_is_valid(lo)) && atomic_read(&lo->plh_outstanding) != 0) { spin_unlock(&ino->i_lock); lseg = ERR_PTR(wait_var_event_killable(&lo->plh_outstanding, @@ -2124,6 +2123,9 @@ lookup_again: goto out_put_layout_hdr; } + lgp->lo = lo; + pnfs_get_layout_hdr(lo); + lseg = nfs4_proc_layoutget(lgp, &timeout); trace_pnfs_update_layout(ino, pos, count, iomode, lo, lseg, PNFS_UPDATE_LAYOUT_SEND_LAYOUTGET); @@ -2255,6 +2257,7 @@ static void _lgopen_prepare_attached(struct nfs4_opendata *data, pnfs_put_layout_hdr(lo); return; } + lgp->lo = lo; data->lgp = lgp; data->o_arg.lg_args = &lgp->args; data->o_res.lg_res = &lgp->res; @@ -2263,6 +2266,7 @@ static void _lgopen_prepare_attached(struct nfs4_opendata *data, static void _lgopen_prepare_floating(struct nfs4_opendata *data, struct nfs_open_context *ctx) { + struct inode *ino = data->dentry->d_inode; struct pnfs_layout_range rng = { .iomode = (data->o_arg.fmode & FMODE_WRITE) ? IOMODE_RW: IOMODE_READ, @@ -2271,7 +2275,7 @@ static void _lgopen_prepare_floating(struct nfs4_opendata *data, }; struct nfs4_layoutget *lgp; - lgp = pnfs_alloc_init_layoutget_args(NULL, ctx, ¤t_stateid, + lgp = pnfs_alloc_init_layoutget_args(ino, ctx, ¤t_stateid, &rng, GFP_KERNEL); if (!lgp) return; @@ -2291,6 +2295,8 @@ void pnfs_lgopen_prepare(struct nfs4_opendata *data, /* Could check on max_ops, but currently hardcoded high enough */ if (!nfs_server_capable(data->dir->d_inode, NFS_CAP_LGOPEN)) return; + if (data->lgp) + return; if (data->state) _lgopen_prepare_attached(data, ctx); else @@ -2330,13 +2336,13 @@ void pnfs_parse_lgopen(struct inode *ino, struct nfs4_layoutget *lgp, } return; } - if (!lgp->args.inode) { + if (!lgp->lo) { lo = _pnfs_grab_empty_layout(ino, ctx); if (!lo) return; - lgp->args.inode = ino; + lgp->lo = lo; } else - lo = NFS_I(lgp->args.inode)->layout; + lo = lgp->lo; lseg = pnfs_layout_process(lgp); if (!IS_ERR(lseg)) { @@ -2349,11 +2355,9 @@ void pnfs_parse_lgopen(struct inode *ino, struct nfs4_layoutget *lgp, void nfs4_lgopen_release(struct nfs4_layoutget *lgp) { if (lgp != NULL) { - struct inode *inode = lgp->args.inode; - if (inode) { - struct pnfs_layout_hdr *lo = NFS_I(inode)->layout; - pnfs_clear_first_layoutget(lo); - nfs_layoutget_end(lo); + if (lgp->lo) { + pnfs_clear_first_layoutget(lgp->lo); + nfs_layoutget_end(lgp->lo); } pnfs_layoutget_free(lgp); } @@ -2362,7 +2366,7 @@ void nfs4_lgopen_release(struct nfs4_layoutget *lgp) struct pnfs_layout_segment * pnfs_layout_process(struct nfs4_layoutget *lgp) { - struct pnfs_layout_hdr *lo = NFS_I(lgp->args.inode)->layout; + struct pnfs_layout_hdr *lo = lgp->lo; struct nfs4_layoutget_res *res = &lgp->res; struct pnfs_layout_segment *lseg; struct inode *ino = lo->plh_inode; @@ -2390,11 +2394,13 @@ pnfs_layout_process(struct nfs4_layoutget *lgp) goto out_forget; } + if (!pnfs_layout_is_valid(lo) && !pnfs_is_first_layoutget(lo)) + goto out_forget; + if (nfs4_stateid_match_other(&lo->plh_stateid, &res->stateid)) { /* existing state ID, make sure the sequence number matches. */ if (pnfs_layout_stateid_blocked(lo, &res->stateid)) { - if (!pnfs_layout_is_valid(lo) && - pnfs_is_first_layoutget(lo)) + if (!pnfs_layout_is_valid(lo)) lo->plh_barrier = 0; dprintk("%s forget reply due to sequence\n", __func__); goto out_forget; @@ -2413,8 +2419,6 @@ pnfs_layout_process(struct nfs4_layoutget *lgp) goto out_forget; } else { /* We have a completely new layout */ - if (!pnfs_is_first_layoutget(lo)) - goto out_forget; pnfs_set_layout_stateid(lo, &res->stateid, lgp->cred, true); } diff --git a/fs/nfs/pnfs_nfs.c b/fs/nfs/pnfs_nfs.c index 49d3389bd813..cf19914fec81 100644 --- a/fs/nfs/pnfs_nfs.c +++ b/fs/nfs/pnfs_nfs.c @@ -805,19 +805,16 @@ out: } EXPORT_SYMBOL_GPL(nfs4_pnfs_ds_add); -static void nfs4_wait_ds_connect(struct nfs4_pnfs_ds *ds) +static int nfs4_wait_ds_connect(struct nfs4_pnfs_ds *ds) { might_sleep(); - wait_on_bit(&ds->ds_state, NFS4DS_CONNECTING, - TASK_KILLABLE); + return wait_on_bit(&ds->ds_state, NFS4DS_CONNECTING, TASK_KILLABLE); } static void nfs4_clear_ds_conn_bit(struct nfs4_pnfs_ds *ds) { smp_mb__before_atomic(); - clear_bit(NFS4DS_CONNECTING, &ds->ds_state); - smp_mb__after_atomic(); - wake_up_bit(&ds->ds_state, NFS4DS_CONNECTING); + clear_and_wake_up_bit(NFS4DS_CONNECTING, &ds->ds_state); } static struct nfs_client *(*get_v3_ds_connect)( @@ -858,7 +855,7 @@ static int _nfs4_pnfs_v3_ds_connect(struct nfs_server *mds_srv, dprintk("--> %s DS %s\n", __func__, ds->ds_remotestr); if (!load_v3_ds_connect()) - goto out; + return -EPROTONOSUPPORT; list_for_each_entry(da, &ds->ds_addrs, da_node) { dprintk("%s: DS %s: trying address %s\n", @@ -993,30 +990,33 @@ int nfs4_pnfs_ds_connect(struct nfs_server *mds_srv, struct nfs4_pnfs_ds *ds, { int err; -again: - err = 0; - if (test_and_set_bit(NFS4DS_CONNECTING, &ds->ds_state) == 0) { - if (version == 3) { - err = _nfs4_pnfs_v3_ds_connect(mds_srv, ds, timeo, - retrans); - } else if (version == 4) { - err = _nfs4_pnfs_v4_ds_connect(mds_srv, ds, timeo, - retrans, minor_version); - } else { - dprintk("%s: unsupported DS version %d\n", __func__, - version); - err = -EPROTONOSUPPORT; - } + do { + err = nfs4_wait_ds_connect(ds); + if (err || ds->ds_clp) + goto out; + if (nfs4_test_deviceid_unavailable(devid)) + return -ENODEV; + } while (test_and_set_bit(NFS4DS_CONNECTING, &ds->ds_state) != 0); - nfs4_clear_ds_conn_bit(ds); - } else { - nfs4_wait_ds_connect(ds); + if (ds->ds_clp) + goto connect_done; - /* what was waited on didn't connect AND didn't mark unavail */ - if (!ds->ds_clp && !nfs4_test_deviceid_unavailable(devid)) - goto again; + switch (version) { + case 3: + err = _nfs4_pnfs_v3_ds_connect(mds_srv, ds, timeo, retrans); + break; + case 4: + err = _nfs4_pnfs_v4_ds_connect(mds_srv, ds, timeo, retrans, + minor_version); + break; + default: + dprintk("%s: unsupported DS version %d\n", __func__, version); + err = -EPROTONOSUPPORT; } +connect_done: + nfs4_clear_ds_conn_bit(ds); +out: /* * At this point the ds->ds_clp should be ready, but it might have * hit an error. diff --git a/fs/nfs/read.c b/fs/nfs/read.c index d2b6dce1f99f..9f39e0a1a38b 100644 --- a/fs/nfs/read.c +++ b/fs/nfs/read.c @@ -74,8 +74,7 @@ void nfs_pageio_init_read(struct nfs_pageio_descriptor *pgio, } EXPORT_SYMBOL_GPL(nfs_pageio_init_read); -static void nfs_pageio_complete_read(struct nfs_pageio_descriptor *pgio, - struct inode *inode) +static void nfs_pageio_complete_read(struct nfs_pageio_descriptor *pgio) { struct nfs_pgio_mirror *pgm; unsigned long npages; @@ -86,9 +85,9 @@ static void nfs_pageio_complete_read(struct nfs_pageio_descriptor *pgio, WARN_ON_ONCE(pgio->pg_mirror_count != 1); pgm = &pgio->pg_mirrors[0]; - NFS_I(inode)->read_io += pgm->pg_bytes_written; + NFS_I(pgio->pg_inode)->read_io += pgm->pg_bytes_written; npages = (pgm->pg_bytes_written + PAGE_SIZE - 1) >> PAGE_SHIFT; - nfs_add_stats(inode, NFSIOS_READPAGES, npages); + nfs_add_stats(pgio->pg_inode, NFSIOS_READPAGES, npages); } @@ -363,22 +362,23 @@ int nfs_readpage(struct file *file, struct page *page) } else desc.ctx = get_nfs_open_context(nfs_file_open_context(file)); + xchg(&desc.ctx->error, 0); if (!IS_SYNC(inode)) { ret = nfs_readpage_from_fscache(desc.ctx, inode, page); if (ret == 0) - goto out; + goto out_wait; } - xchg(&desc.ctx->error, 0); nfs_pageio_init_read(&desc.pgio, inode, false, &nfs_async_read_completion_ops); ret = readpage_async_filler(&desc, page); + if (ret) + goto out; - if (!ret) - nfs_pageio_complete_read(&desc.pgio, inode); - + nfs_pageio_complete_read(&desc.pgio); ret = desc.pgio.pg_error < 0 ? desc.pgio.pg_error : 0; +out_wait: if (!ret) { ret = wait_on_page_locked_killable(page); if (!PageUptodate(page) && !ret) @@ -430,7 +430,7 @@ int nfs_readpages(struct file *file, struct address_space *mapping, ret = read_cache_pages(mapping, pages, readpage_async_filler, &desc); - nfs_pageio_complete_read(&desc.pgio, inode); + nfs_pageio_complete_read(&desc.pgio); read_complete: put_nfs_open_context(desc.ctx); diff --git a/fs/nfs/write.c b/fs/nfs/write.c index 3bf82178166a..eae9bf114041 100644 --- a/fs/nfs/write.c +++ b/fs/nfs/write.c @@ -1810,6 +1810,7 @@ nfs_commit_list(struct inode *inode, struct list_head *head, int how, struct nfs_commit_info *cinfo) { struct nfs_commit_data *data; + unsigned short task_flags = 0; /* another commit raced with us */ if (list_empty(head)) @@ -1820,8 +1821,11 @@ nfs_commit_list(struct inode *inode, struct list_head *head, int how, /* Set up the argument struct */ nfs_init_commit(data, head, NULL, cinfo); atomic_inc(&cinfo->mds->rpcs_out); + if (NFS_SERVER(inode)->nfs_client->cl_minorversion) + task_flags = RPC_TASK_MOVEABLE; return nfs_initiate_commit(NFS_CLIENT(inode), data, NFS_PROTO(inode), - data->mds_ops, how, RPC_TASK_CRED_NOREF); + data->mds_ops, how, + RPC_TASK_CRED_NOREF | task_flags); } /* diff --git a/include/linux/nfs_fs.h b/include/linux/nfs_fs.h index ffba254d2098..ce6474594872 100644 --- a/include/linux/nfs_fs.h +++ b/include/linux/nfs_fs.h @@ -84,6 +84,7 @@ struct nfs_open_context { #define NFS_CONTEXT_RESEND_WRITES (1) #define NFS_CONTEXT_BAD (2) #define NFS_CONTEXT_UNLOCK (3) +#define NFS_CONTEXT_FILE_OPEN (4) int error; struct list_head list; diff --git a/include/linux/nfs_xdr.h b/include/linux/nfs_xdr.h index 717ecc87c9e7..e9698b6278a5 100644 --- a/include/linux/nfs_xdr.h +++ b/include/linux/nfs_xdr.h @@ -277,6 +277,7 @@ struct nfs4_layoutget { struct nfs4_layoutget_args args; struct nfs4_layoutget_res res; const struct cred *cred; + struct pnfs_layout_hdr *lo; gfp_t gfp_flags; }; diff --git a/include/linux/sunrpc/clnt.h b/include/linux/sunrpc/clnt.h index 02e7a5863d28..8b5d5c97553e 100644 --- a/include/linux/sunrpc/clnt.h +++ b/include/linux/sunrpc/clnt.h @@ -29,6 +29,7 @@ #include <linux/sunrpc/xprtmultipath.h> struct rpc_inode; +struct rpc_sysfs_client; /* * The high-level client handle @@ -71,6 +72,7 @@ struct rpc_clnt { #if IS_ENABLED(CONFIG_SUNRPC_DEBUG) struct dentry *cl_debugfs; /* debugfs directory */ #endif + struct rpc_sysfs_client *cl_sysfs; /* sysfs directory */ /* cl_work is only needed after cl_xpi is no longer used, * and that are of similar size */ diff --git a/include/linux/sunrpc/sched.h b/include/linux/sunrpc/sched.h index df696efdd675..a237b8dbf608 100644 --- a/include/linux/sunrpc/sched.h +++ b/include/linux/sunrpc/sched.h @@ -121,6 +121,7 @@ struct rpc_task_setup { */ #define RPC_TASK_ASYNC 0x0001 /* is an async task */ #define RPC_TASK_SWAPPER 0x0002 /* is swapping in/out */ +#define RPC_TASK_MOVEABLE 0x0004 /* nfs4.1+ rpc tasks */ #define RPC_TASK_NULLCREDS 0x0010 /* Use AUTH_NULL credential */ #define RPC_CALL_MAJORSEEN 0x0020 /* major timeout seen */ #define RPC_TASK_ROOTCREDS 0x0040 /* force root creds */ @@ -139,6 +140,7 @@ struct rpc_task_setup { #define RPC_IS_SOFT(t) ((t)->tk_flags & (RPC_TASK_SOFT|RPC_TASK_TIMEOUT)) #define RPC_IS_SOFTCONN(t) ((t)->tk_flags & RPC_TASK_SOFTCONN) #define RPC_WAS_SENT(t) ((t)->tk_flags & RPC_TASK_SENT) +#define RPC_IS_MOVEABLE(t) ((t)->tk_flags & RPC_TASK_MOVEABLE) #define RPC_TASK_RUNNING 0 #define RPC_TASK_QUEUED 1 diff --git a/include/linux/sunrpc/xprt.h b/include/linux/sunrpc/xprt.h index 61b622e334ee..c8c39f22d3b1 100644 --- a/include/linux/sunrpc/xprt.h +++ b/include/linux/sunrpc/xprt.h @@ -53,6 +53,7 @@ enum rpc_display_format_t { struct rpc_task; struct rpc_xprt; +struct xprt_class; struct seq_file; struct svc_serv; struct net; @@ -182,9 +183,11 @@ enum xprt_transports { XPRT_TRANSPORT_LOCAL = 257, }; +struct rpc_sysfs_xprt; struct rpc_xprt { struct kref kref; /* Reference count */ const struct rpc_xprt_ops *ops; /* transport methods */ + unsigned int id; /* transport id */ const struct rpc_timeout *timeout; /* timeout parms */ struct sockaddr_storage addr; /* server address */ @@ -288,6 +291,9 @@ struct rpc_xprt { atomic_t inject_disconnect; #endif struct rcu_head rcu; + const struct xprt_class *xprt_class; + struct rpc_sysfs_xprt *xprt_sysfs; + bool main; /*mark if this is the 1st transport */ }; #if defined(CONFIG_SUNRPC_BACKCHANNEL) @@ -370,6 +376,7 @@ struct rpc_xprt * xprt_alloc(struct net *net, size_t size, void xprt_free(struct rpc_xprt *); void xprt_add_backlog(struct rpc_xprt *xprt, struct rpc_task *task); bool xprt_wake_up_backlog(struct rpc_xprt *xprt, struct rpc_rqst *req); +void xprt_cleanup_ids(void); static inline int xprt_enable_swap(struct rpc_xprt *xprt) @@ -408,6 +415,7 @@ void xprt_conditional_disconnect(struct rpc_xprt *xprt, unsigned int cookie); bool xprt_lock_connect(struct rpc_xprt *, struct rpc_task *, void *); void xprt_unlock_connect(struct rpc_xprt *, void *); +void xprt_release_write(struct rpc_xprt *, struct rpc_task *); /* * Reserved bit positions in xprt->state @@ -419,6 +427,8 @@ void xprt_unlock_connect(struct rpc_xprt *, void *); #define XPRT_BOUND (4) #define XPRT_BINDING (5) #define XPRT_CLOSING (6) +#define XPRT_OFFLINE (7) +#define XPRT_REMOVE (8) #define XPRT_CONGESTED (9) #define XPRT_CWND_WAIT (10) #define XPRT_WRITE_SPACE (11) diff --git a/include/linux/sunrpc/xprtmultipath.h b/include/linux/sunrpc/xprtmultipath.h index c6cce3fbf29d..b19addc8b715 100644 --- a/include/linux/sunrpc/xprtmultipath.h +++ b/include/linux/sunrpc/xprtmultipath.h @@ -10,10 +10,12 @@ #define _NET_SUNRPC_XPRTMULTIPATH_H struct rpc_xprt_iter_ops; +struct rpc_sysfs_xprt_switch; struct rpc_xprt_switch { spinlock_t xps_lock; struct kref xps_kref; + unsigned int xps_id; unsigned int xps_nxprts; unsigned int xps_nactive; atomic_long_t xps_queuelen; @@ -23,6 +25,7 @@ struct rpc_xprt_switch { const struct rpc_xprt_iter_ops *xps_iter_ops; + struct rpc_sysfs_xprt_switch *xps_sysfs; struct rcu_head xps_rcu; }; @@ -71,4 +74,7 @@ extern struct rpc_xprt *xprt_iter_get_next(struct rpc_xprt_iter *xpi); extern bool rpc_xprt_switch_has_addr(struct rpc_xprt_switch *xps, const struct sockaddr *sap); + +extern void xprt_multipath_cleanup_ids(void); + #endif diff --git a/include/linux/sunrpc/xprtsock.h b/include/linux/sunrpc/xprtsock.h index 3c1423ee74b4..8c2a712cb242 100644 --- a/include/linux/sunrpc/xprtsock.h +++ b/include/linux/sunrpc/xprtsock.h @@ -10,6 +10,7 @@ int init_socket_xprt(void); void cleanup_socket_xprt(void); +unsigned short get_srcport(struct rpc_xprt *); #define RPC_MIN_RESVPORT (1U) #define RPC_MAX_RESVPORT (65535U) diff --git a/net/sunrpc/Makefile b/net/sunrpc/Makefile index 9488600451e8..1c8de397d6ad 100644 --- a/net/sunrpc/Makefile +++ b/net/sunrpc/Makefile @@ -12,7 +12,7 @@ sunrpc-y := clnt.o xprt.o socklib.o xprtsock.o sched.o \ auth.o auth_null.o auth_unix.o \ svc.o svcsock.o svcauth.o svcauth_unix.o \ addr.o rpcb_clnt.o timer.o xdr.o \ - sunrpc_syms.o cache.o rpc_pipe.o \ + sunrpc_syms.o cache.o rpc_pipe.o sysfs.o \ svc_xprt.o \ xprtmultipath.o sunrpc-$(CONFIG_SUNRPC_DEBUG) += debugfs.o diff --git a/net/sunrpc/clnt.c b/net/sunrpc/clnt.c index 42623d6b8f0e..8b4de70e8ead 100644 --- a/net/sunrpc/clnt.c +++ b/net/sunrpc/clnt.c @@ -41,6 +41,7 @@ #include <trace/events/sunrpc.h> #include "sunrpc.h" +#include "sysfs.h" #include "netns.h" #if IS_ENABLED(CONFIG_SUNRPC_DEBUG) @@ -327,6 +328,7 @@ err_auth: out: if (pipefs_sb) rpc_put_sb_net(net); + rpc_sysfs_client_destroy(clnt); rpc_clnt_debugfs_unregister(clnt); return err; } @@ -410,6 +412,7 @@ static struct rpc_clnt * rpc_new_client(const struct rpc_create_args *args, } rpc_clnt_set_transport(clnt, xprt, timeout); + xprt->main = true; xprt_iter_init(&clnt->cl_xpi, xps); xprt_switch_put(xps); @@ -423,6 +426,7 @@ static struct rpc_clnt * rpc_new_client(const struct rpc_create_args *args, /* save the nodename */ rpc_clnt_set_nodename(clnt, nodename); + rpc_sysfs_client_setup(clnt, xps, rpc_net_ns(clnt)); err = rpc_client_register(clnt, args->authflavor, args->client_name); if (err) goto out_no_path; @@ -733,6 +737,7 @@ int rpc_switch_client_transport(struct rpc_clnt *clnt, rpc_unregister_client(clnt); __rpc_clnt_remove_pipedir(clnt); + rpc_sysfs_client_destroy(clnt); rpc_clnt_debugfs_unregister(clnt); /* @@ -879,6 +884,7 @@ static void rpc_free_client_work(struct work_struct *work) * so they cannot be called in rpciod, so they are handled separately * here. */ + rpc_sysfs_client_destroy(clnt); rpc_clnt_debugfs_unregister(clnt); rpc_free_clid(clnt); rpc_clnt_remove_pipedir(clnt); @@ -2100,6 +2106,30 @@ call_connect_status(struct rpc_task *task) case -ENOTCONN: case -EAGAIN: case -ETIMEDOUT: + if (!(task->tk_flags & RPC_TASK_NO_ROUND_ROBIN) && + (task->tk_flags & RPC_TASK_MOVEABLE) && + test_bit(XPRT_REMOVE, &xprt->state)) { + struct rpc_xprt *saved = task->tk_xprt; + struct rpc_xprt_switch *xps; + + rcu_read_lock(); + xps = xprt_switch_get(rcu_dereference(clnt->cl_xpi.xpi_xpswitch)); + rcu_read_unlock(); + if (xps->xps_nxprts > 1) { + long value; + + xprt_release(task); + value = atomic_long_dec_return(&xprt->queuelen); + if (value == 0) + rpc_xprt_switch_remove_xprt(xps, saved); + xprt_put(saved); + task->tk_xprt = NULL; + task->tk_action = call_start; + } + xprt_switch_put(xps); + if (!task->tk_xprt) + return; + } goto out_retry; case -ENOBUFS: rpc_delay(task, HZ >> 2); diff --git a/net/sunrpc/sched.c b/net/sunrpc/sched.c index 39ed0e0afe6d..c045f63d11fa 100644 --- a/net/sunrpc/sched.c +++ b/net/sunrpc/sched.c @@ -592,10 +592,20 @@ static struct rpc_task *__rpc_find_next_queued_priority(struct rpc_wait_queue *q struct rpc_task *task; /* + * Service the privileged queue. + */ + q = &queue->tasks[RPC_NR_PRIORITY - 1]; + if (queue->maxpriority > RPC_PRIORITY_PRIVILEGED && !list_empty(q)) { + task = list_first_entry(q, struct rpc_task, u.tk_wait.list); + goto out; + } + + /* * Service a batch of tasks from a single owner. */ q = &queue->tasks[queue->priority]; - if (!list_empty(q) && --queue->nr) { + if (!list_empty(q) && queue->nr) { + queue->nr--; task = list_first_entry(q, struct rpc_task, u.tk_wait.list); goto out; } diff --git a/net/sunrpc/sunrpc_syms.c b/net/sunrpc/sunrpc_syms.c index 236fadc4a439..691c0000e9ea 100644 --- a/net/sunrpc/sunrpc_syms.c +++ b/net/sunrpc/sunrpc_syms.c @@ -24,6 +24,7 @@ #include <linux/sunrpc/xprtsock.h> #include "sunrpc.h" +#include "sysfs.h" #include "netns.h" unsigned int sunrpc_net_id; @@ -103,6 +104,10 @@ init_sunrpc(void) if (err) goto out4; + err = rpc_sysfs_init(); + if (err) + goto out5; + sunrpc_debugfs_init(); #if IS_ENABLED(CONFIG_SUNRPC_DEBUG) rpc_register_sysctl(); @@ -111,6 +116,8 @@ init_sunrpc(void) init_socket_xprt(); /* clnt sock transport */ return 0; +out5: + unregister_rpc_pipefs(); out4: unregister_pernet_subsys(&sunrpc_net_ops); out3: @@ -124,7 +131,10 @@ out: static void __exit cleanup_sunrpc(void) { + rpc_sysfs_exit(); rpc_cleanup_clids(); + xprt_cleanup_ids(); + xprt_multipath_cleanup_ids(); rpcauth_remove_module(); cleanup_socket_xprt(); svc_cleanup_xprt_sock(); diff --git a/net/sunrpc/sysfs.c b/net/sunrpc/sysfs.c new file mode 100644 index 000000000000..64da3bfd28e6 --- /dev/null +++ b/net/sunrpc/sysfs.c @@ -0,0 +1,588 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright (c) 2020 Anna Schumaker <Anna.Schumaker@Netapp.com> + */ +#include <linux/sunrpc/clnt.h> +#include <linux/kobject.h> +#include <linux/sunrpc/addr.h> +#include <linux/sunrpc/xprtsock.h> + +#include "sysfs.h" + +struct xprt_addr { + const char *addr; + struct rcu_head rcu; +}; + +static void free_xprt_addr(struct rcu_head *head) +{ + struct xprt_addr *addr = container_of(head, struct xprt_addr, rcu); + + kfree(addr->addr); + kfree(addr); +} + +static struct kset *rpc_sunrpc_kset; +static struct kobject *rpc_sunrpc_client_kobj, *rpc_sunrpc_xprt_switch_kobj; + +static void rpc_sysfs_object_release(struct kobject *kobj) +{ + kfree(kobj); +} + +static const struct kobj_ns_type_operations * +rpc_sysfs_object_child_ns_type(struct kobject *kobj) +{ + return &net_ns_type_operations; +} + +static struct kobj_type rpc_sysfs_object_type = { + .release = rpc_sysfs_object_release, + .sysfs_ops = &kobj_sysfs_ops, + .child_ns_type = rpc_sysfs_object_child_ns_type, +}; + +static struct kobject *rpc_sysfs_object_alloc(const char *name, + struct kset *kset, + struct kobject *parent) +{ + struct kobject *kobj; + + kobj = kzalloc(sizeof(*kobj), GFP_KERNEL); + if (kobj) { + kobj->kset = kset; + if (kobject_init_and_add(kobj, &rpc_sysfs_object_type, + parent, "%s", name) == 0) + return kobj; + kobject_put(kobj); + } + return NULL; +} + +static inline struct rpc_xprt * +rpc_sysfs_xprt_kobj_get_xprt(struct kobject *kobj) +{ + struct rpc_sysfs_xprt *x = container_of(kobj, + struct rpc_sysfs_xprt, kobject); + + return xprt_get(x->xprt); +} + +static inline struct rpc_xprt_switch * +rpc_sysfs_xprt_kobj_get_xprt_switch(struct kobject *kobj) +{ + struct rpc_sysfs_xprt *x = container_of(kobj, + struct rpc_sysfs_xprt, kobject); + + return xprt_switch_get(x->xprt_switch); +} + +static inline struct rpc_xprt_switch * +rpc_sysfs_xprt_switch_kobj_get_xprt(struct kobject *kobj) +{ + struct rpc_sysfs_xprt_switch *x = container_of(kobj, + struct rpc_sysfs_xprt_switch, kobject); + + return xprt_switch_get(x->xprt_switch); +} + +static ssize_t rpc_sysfs_xprt_dstaddr_show(struct kobject *kobj, + struct kobj_attribute *attr, + char *buf) +{ + struct rpc_xprt *xprt = rpc_sysfs_xprt_kobj_get_xprt(kobj); + ssize_t ret; + + if (!xprt) + return 0; + ret = sprintf(buf, "%s\n", xprt->address_strings[RPC_DISPLAY_ADDR]); + xprt_put(xprt); + return ret + 1; +} + +static ssize_t rpc_sysfs_xprt_info_show(struct kobject *kobj, + struct kobj_attribute *attr, + char *buf) +{ + struct rpc_xprt *xprt = rpc_sysfs_xprt_kobj_get_xprt(kobj); + ssize_t ret; + + if (!xprt) + return 0; + + ret = sprintf(buf, "last_used=%lu\ncur_cong=%lu\ncong_win=%lu\n" + "max_num_slots=%u\nmin_num_slots=%u\nnum_reqs=%u\n" + "binding_q_len=%u\nsending_q_len=%u\npending_q_len=%u\n" + "backlog_q_len=%u\nmain_xprt=%d\nsrc_port=%u\n" + "tasks_queuelen=%ld\n", + xprt->last_used, xprt->cong, xprt->cwnd, xprt->max_reqs, + xprt->min_reqs, xprt->num_reqs, xprt->binding.qlen, + xprt->sending.qlen, xprt->pending.qlen, + xprt->backlog.qlen, xprt->main, + (xprt->xprt_class->ident == XPRT_TRANSPORT_TCP) ? + get_srcport(xprt) : 0, + atomic_long_read(&xprt->queuelen)); + xprt_put(xprt); + return ret + 1; +} + +static ssize_t rpc_sysfs_xprt_state_show(struct kobject *kobj, + struct kobj_attribute *attr, + char *buf) +{ + struct rpc_xprt *xprt = rpc_sysfs_xprt_kobj_get_xprt(kobj); + ssize_t ret; + int locked, connected, connecting, close_wait, bound, binding, + closing, congested, cwnd_wait, write_space, offline, remove; + + if (!xprt) + return 0; + + if (!xprt->state) { + ret = sprintf(buf, "state=CLOSED\n"); + } else { + locked = test_bit(XPRT_LOCKED, &xprt->state); + connected = test_bit(XPRT_CONNECTED, &xprt->state); + connecting = test_bit(XPRT_CONNECTING, &xprt->state); + close_wait = test_bit(XPRT_CLOSE_WAIT, &xprt->state); + bound = test_bit(XPRT_BOUND, &xprt->state); + binding = test_bit(XPRT_BINDING, &xprt->state); + closing = test_bit(XPRT_CLOSING, &xprt->state); + congested = test_bit(XPRT_CONGESTED, &xprt->state); + cwnd_wait = test_bit(XPRT_CWND_WAIT, &xprt->state); + write_space = test_bit(XPRT_WRITE_SPACE, &xprt->state); + offline = test_bit(XPRT_OFFLINE, &xprt->state); + remove = test_bit(XPRT_REMOVE, &xprt->state); + + ret = sprintf(buf, "state=%s %s %s %s %s %s %s %s %s %s %s %s\n", + locked ? "LOCKED" : "", + connected ? "CONNECTED" : "", + connecting ? "CONNECTING" : "", + close_wait ? "CLOSE_WAIT" : "", + bound ? "BOUND" : "", + binding ? "BOUNDING" : "", + closing ? "CLOSING" : "", + congested ? "CONGESTED" : "", + cwnd_wait ? "CWND_WAIT" : "", + write_space ? "WRITE_SPACE" : "", + offline ? "OFFLINE" : "", + remove ? "REMOVE" : ""); + } + + xprt_put(xprt); + return ret + 1; +} + +static ssize_t rpc_sysfs_xprt_switch_info_show(struct kobject *kobj, + struct kobj_attribute *attr, + char *buf) +{ + struct rpc_xprt_switch *xprt_switch = + rpc_sysfs_xprt_switch_kobj_get_xprt(kobj); + ssize_t ret; + + if (!xprt_switch) + return 0; + ret = sprintf(buf, "num_xprts=%u\nnum_active=%u\nqueue_len=%ld\n", + xprt_switch->xps_nxprts, xprt_switch->xps_nactive, + atomic_long_read(&xprt_switch->xps_queuelen)); + xprt_switch_put(xprt_switch); + return ret + 1; +} + +static ssize_t rpc_sysfs_xprt_dstaddr_store(struct kobject *kobj, + struct kobj_attribute *attr, + const char *buf, size_t count) +{ + struct rpc_xprt *xprt = rpc_sysfs_xprt_kobj_get_xprt(kobj); + struct sockaddr *saddr; + char *dst_addr; + int port; + struct xprt_addr *saved_addr; + size_t buf_len; + + if (!xprt) + return 0; + if (!(xprt->xprt_class->ident == XPRT_TRANSPORT_TCP || + xprt->xprt_class->ident == XPRT_TRANSPORT_RDMA)) { + xprt_put(xprt); + return -EOPNOTSUPP; + } + + if (wait_on_bit_lock(&xprt->state, XPRT_LOCKED, TASK_KILLABLE)) { + count = -EINTR; + goto out_put; + } + saddr = (struct sockaddr *)&xprt->addr; + port = rpc_get_port(saddr); + + /* buf_len is the len until the first occurence of either + * '\n' or '\0' + */ + buf_len = strcspn(buf, "\n"); + + dst_addr = kstrndup(buf, buf_len, GFP_KERNEL); + if (!dst_addr) + goto out_err; + saved_addr = kzalloc(sizeof(*saved_addr), GFP_KERNEL); + if (!saved_addr) + goto out_err_free; + saved_addr->addr = + rcu_dereference_raw(xprt->address_strings[RPC_DISPLAY_ADDR]); + rcu_assign_pointer(xprt->address_strings[RPC_DISPLAY_ADDR], dst_addr); + call_rcu(&saved_addr->rcu, free_xprt_addr); + xprt->addrlen = rpc_pton(xprt->xprt_net, buf, buf_len, saddr, + sizeof(*saddr)); + rpc_set_port(saddr, port); + + xprt_force_disconnect(xprt); +out: + xprt_release_write(xprt, NULL); +out_put: + xprt_put(xprt); + return count; +out_err_free: + kfree(dst_addr); +out_err: + count = -ENOMEM; + goto out; +} + +static ssize_t rpc_sysfs_xprt_state_change(struct kobject *kobj, + struct kobj_attribute *attr, + const char *buf, size_t count) +{ + struct rpc_xprt *xprt = rpc_sysfs_xprt_kobj_get_xprt(kobj); + int offline = 0, online = 0, remove = 0; + struct rpc_xprt_switch *xps = rpc_sysfs_xprt_kobj_get_xprt_switch(kobj); + + if (!xprt) + return 0; + + if (!strncmp(buf, "offline", 7)) + offline = 1; + else if (!strncmp(buf, "online", 6)) + online = 1; + else if (!strncmp(buf, "remove", 6)) + remove = 1; + else + return -EINVAL; + + if (wait_on_bit_lock(&xprt->state, XPRT_LOCKED, TASK_KILLABLE)) { + count = -EINTR; + goto out_put; + } + if (xprt->main) { + count = -EINVAL; + goto release_tasks; + } + if (offline) { + set_bit(XPRT_OFFLINE, &xprt->state); + spin_lock(&xps->xps_lock); + xps->xps_nactive--; + spin_unlock(&xps->xps_lock); + } else if (online) { + clear_bit(XPRT_OFFLINE, &xprt->state); + spin_lock(&xps->xps_lock); + xps->xps_nactive++; + spin_unlock(&xps->xps_lock); + } else if (remove) { + if (test_bit(XPRT_OFFLINE, &xprt->state)) { + set_bit(XPRT_REMOVE, &xprt->state); + xprt_force_disconnect(xprt); + if (test_bit(XPRT_CONNECTED, &xprt->state)) { + if (!xprt->sending.qlen && + !xprt->pending.qlen && + !xprt->backlog.qlen && + !atomic_long_read(&xprt->queuelen)) + rpc_xprt_switch_remove_xprt(xps, xprt); + } + } else { + count = -EINVAL; + } + } + +release_tasks: + xprt_release_write(xprt, NULL); +out_put: + xprt_put(xprt); + xprt_switch_put(xps); + return count; +} + +int rpc_sysfs_init(void) +{ + rpc_sunrpc_kset = kset_create_and_add("sunrpc", NULL, kernel_kobj); + if (!rpc_sunrpc_kset) + return -ENOMEM; + rpc_sunrpc_client_kobj = + rpc_sysfs_object_alloc("rpc-clients", rpc_sunrpc_kset, NULL); + if (!rpc_sunrpc_client_kobj) + goto err_client; + rpc_sunrpc_xprt_switch_kobj = + rpc_sysfs_object_alloc("xprt-switches", rpc_sunrpc_kset, NULL); + if (!rpc_sunrpc_xprt_switch_kobj) + goto err_switch; + return 0; +err_switch: + kobject_put(rpc_sunrpc_client_kobj); + rpc_sunrpc_client_kobj = NULL; +err_client: + kset_unregister(rpc_sunrpc_kset); + rpc_sunrpc_kset = NULL; + return -ENOMEM; +} + +static void rpc_sysfs_client_release(struct kobject *kobj) +{ + struct rpc_sysfs_client *c; + + c = container_of(kobj, struct rpc_sysfs_client, kobject); + kfree(c); +} + +static void rpc_sysfs_xprt_switch_release(struct kobject *kobj) +{ + struct rpc_sysfs_xprt_switch *xprt_switch; + + xprt_switch = container_of(kobj, struct rpc_sysfs_xprt_switch, kobject); + kfree(xprt_switch); +} + +static void rpc_sysfs_xprt_release(struct kobject *kobj) +{ + struct rpc_sysfs_xprt *xprt; + + xprt = container_of(kobj, struct rpc_sysfs_xprt, kobject); + kfree(xprt); +} + +static const void *rpc_sysfs_client_namespace(struct kobject *kobj) +{ + return container_of(kobj, struct rpc_sysfs_client, kobject)->net; +} + +static const void *rpc_sysfs_xprt_switch_namespace(struct kobject *kobj) +{ + return container_of(kobj, struct rpc_sysfs_xprt_switch, kobject)->net; +} + +static const void *rpc_sysfs_xprt_namespace(struct kobject *kobj) +{ + return container_of(kobj, struct rpc_sysfs_xprt, + kobject)->xprt->xprt_net; +} + +static struct kobj_attribute rpc_sysfs_xprt_dstaddr = __ATTR(dstaddr, + 0644, rpc_sysfs_xprt_dstaddr_show, rpc_sysfs_xprt_dstaddr_store); + +static struct kobj_attribute rpc_sysfs_xprt_info = __ATTR(xprt_info, + 0444, rpc_sysfs_xprt_info_show, NULL); + +static struct kobj_attribute rpc_sysfs_xprt_change_state = __ATTR(xprt_state, + 0644, rpc_sysfs_xprt_state_show, rpc_sysfs_xprt_state_change); + +static struct attribute *rpc_sysfs_xprt_attrs[] = { + &rpc_sysfs_xprt_dstaddr.attr, + &rpc_sysfs_xprt_info.attr, + &rpc_sysfs_xprt_change_state.attr, + NULL, +}; + +static struct kobj_attribute rpc_sysfs_xprt_switch_info = + __ATTR(xprt_switch_info, 0444, rpc_sysfs_xprt_switch_info_show, NULL); + +static struct attribute *rpc_sysfs_xprt_switch_attrs[] = { + &rpc_sysfs_xprt_switch_info.attr, + NULL, +}; + +static struct kobj_type rpc_sysfs_client_type = { + .release = rpc_sysfs_client_release, + .sysfs_ops = &kobj_sysfs_ops, + .namespace = rpc_sysfs_client_namespace, +}; + +static struct kobj_type rpc_sysfs_xprt_switch_type = { + .release = rpc_sysfs_xprt_switch_release, + .default_attrs = rpc_sysfs_xprt_switch_attrs, + .sysfs_ops = &kobj_sysfs_ops, + .namespace = rpc_sysfs_xprt_switch_namespace, +}; + +static struct kobj_type rpc_sysfs_xprt_type = { + .release = rpc_sysfs_xprt_release, + .default_attrs = rpc_sysfs_xprt_attrs, + .sysfs_ops = &kobj_sysfs_ops, + .namespace = rpc_sysfs_xprt_namespace, +}; + +void rpc_sysfs_exit(void) +{ + kobject_put(rpc_sunrpc_client_kobj); + kobject_put(rpc_sunrpc_xprt_switch_kobj); + kset_unregister(rpc_sunrpc_kset); +} + +static struct rpc_sysfs_client *rpc_sysfs_client_alloc(struct kobject *parent, + struct net *net, + int clid) +{ + struct rpc_sysfs_client *p; + + p = kzalloc(sizeof(*p), GFP_KERNEL); + if (p) { + p->net = net; + p->kobject.kset = rpc_sunrpc_kset; + if (kobject_init_and_add(&p->kobject, &rpc_sysfs_client_type, + parent, "clnt-%d", clid) == 0) + return p; + kobject_put(&p->kobject); + } + return NULL; +} + +static struct rpc_sysfs_xprt_switch * +rpc_sysfs_xprt_switch_alloc(struct kobject *parent, + struct rpc_xprt_switch *xprt_switch, + struct net *net, + gfp_t gfp_flags) +{ + struct rpc_sysfs_xprt_switch *p; + + p = kzalloc(sizeof(*p), gfp_flags); + if (p) { + p->net = net; + p->kobject.kset = rpc_sunrpc_kset; + if (kobject_init_and_add(&p->kobject, + &rpc_sysfs_xprt_switch_type, + parent, "switch-%d", + xprt_switch->xps_id) == 0) + return p; + kobject_put(&p->kobject); + } + return NULL; +} + +static struct rpc_sysfs_xprt *rpc_sysfs_xprt_alloc(struct kobject *parent, + struct rpc_xprt *xprt, + gfp_t gfp_flags) +{ + struct rpc_sysfs_xprt *p; + + p = kzalloc(sizeof(*p), gfp_flags); + if (!p) + goto out; + p->kobject.kset = rpc_sunrpc_kset; + if (kobject_init_and_add(&p->kobject, &rpc_sysfs_xprt_type, + parent, "xprt-%d-%s", xprt->id, + xprt->address_strings[RPC_DISPLAY_PROTO]) == 0) + return p; + kobject_put(&p->kobject); +out: + return NULL; +} + +void rpc_sysfs_client_setup(struct rpc_clnt *clnt, + struct rpc_xprt_switch *xprt_switch, + struct net *net) +{ + struct rpc_sysfs_client *rpc_client; + + rpc_client = rpc_sysfs_client_alloc(rpc_sunrpc_client_kobj, + net, clnt->cl_clid); + if (rpc_client) { + char name[] = "switch"; + struct rpc_sysfs_xprt_switch *xswitch = + (struct rpc_sysfs_xprt_switch *)xprt_switch->xps_sysfs; + int ret; + + clnt->cl_sysfs = rpc_client; + rpc_client->clnt = clnt; + rpc_client->xprt_switch = xprt_switch; + kobject_uevent(&rpc_client->kobject, KOBJ_ADD); + ret = sysfs_create_link_nowarn(&rpc_client->kobject, + &xswitch->kobject, name); + if (ret) + pr_warn("can't create link to %s in sysfs (%d)\n", + name, ret); + } +} + +void rpc_sysfs_xprt_switch_setup(struct rpc_xprt_switch *xprt_switch, + struct rpc_xprt *xprt, + gfp_t gfp_flags) +{ + struct rpc_sysfs_xprt_switch *rpc_xprt_switch; + struct net *net; + + if (xprt_switch->xps_net) + net = xprt_switch->xps_net; + else + net = xprt->xprt_net; + rpc_xprt_switch = + rpc_sysfs_xprt_switch_alloc(rpc_sunrpc_xprt_switch_kobj, + xprt_switch, net, gfp_flags); + if (rpc_xprt_switch) { + xprt_switch->xps_sysfs = rpc_xprt_switch; + rpc_xprt_switch->xprt_switch = xprt_switch; + rpc_xprt_switch->xprt = xprt; + kobject_uevent(&rpc_xprt_switch->kobject, KOBJ_ADD); + } +} + +void rpc_sysfs_xprt_setup(struct rpc_xprt_switch *xprt_switch, + struct rpc_xprt *xprt, + gfp_t gfp_flags) +{ + struct rpc_sysfs_xprt *rpc_xprt; + struct rpc_sysfs_xprt_switch *switch_obj = + (struct rpc_sysfs_xprt_switch *)xprt_switch->xps_sysfs; + + rpc_xprt = rpc_sysfs_xprt_alloc(&switch_obj->kobject, xprt, gfp_flags); + if (rpc_xprt) { + xprt->xprt_sysfs = rpc_xprt; + rpc_xprt->xprt = xprt; + rpc_xprt->xprt_switch = xprt_switch; + kobject_uevent(&rpc_xprt->kobject, KOBJ_ADD); + } +} + +void rpc_sysfs_client_destroy(struct rpc_clnt *clnt) +{ + struct rpc_sysfs_client *rpc_client = clnt->cl_sysfs; + + if (rpc_client) { + char name[] = "switch"; + + sysfs_remove_link(&rpc_client->kobject, name); + kobject_uevent(&rpc_client->kobject, KOBJ_REMOVE); + kobject_del(&rpc_client->kobject); + kobject_put(&rpc_client->kobject); + clnt->cl_sysfs = NULL; + } +} + +void rpc_sysfs_xprt_switch_destroy(struct rpc_xprt_switch *xprt_switch) +{ + struct rpc_sysfs_xprt_switch *rpc_xprt_switch = xprt_switch->xps_sysfs; + + if (rpc_xprt_switch) { + kobject_uevent(&rpc_xprt_switch->kobject, KOBJ_REMOVE); + kobject_del(&rpc_xprt_switch->kobject); + kobject_put(&rpc_xprt_switch->kobject); + xprt_switch->xps_sysfs = NULL; + } +} + +void rpc_sysfs_xprt_destroy(struct rpc_xprt *xprt) +{ + struct rpc_sysfs_xprt *rpc_xprt = xprt->xprt_sysfs; + + if (rpc_xprt) { + kobject_uevent(&rpc_xprt->kobject, KOBJ_REMOVE); + kobject_del(&rpc_xprt->kobject); + kobject_put(&rpc_xprt->kobject); + xprt->xprt_sysfs = NULL; + } +} diff --git a/net/sunrpc/sysfs.h b/net/sunrpc/sysfs.h new file mode 100644 index 000000000000..6620cebd1037 --- /dev/null +++ b/net/sunrpc/sysfs.h @@ -0,0 +1,42 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright (c) 2020 Anna Schumaker <Anna.Schumaker@Netapp.com> + */ +#ifndef __SUNRPC_SYSFS_H +#define __SUNRPC_SYSFS_H + +struct rpc_sysfs_client { + struct kobject kobject; + struct net *net; + struct rpc_clnt *clnt; + struct rpc_xprt_switch *xprt_switch; +}; + +struct rpc_sysfs_xprt_switch { + struct kobject kobject; + struct net *net; + struct rpc_xprt_switch *xprt_switch; + struct rpc_xprt *xprt; +}; + +struct rpc_sysfs_xprt { + struct kobject kobject; + struct rpc_xprt *xprt; + struct rpc_xprt_switch *xprt_switch; +}; + +int rpc_sysfs_init(void); +void rpc_sysfs_exit(void); + +void rpc_sysfs_client_setup(struct rpc_clnt *clnt, + struct rpc_xprt_switch *xprt_switch, + struct net *net); +void rpc_sysfs_client_destroy(struct rpc_clnt *clnt); +void rpc_sysfs_xprt_switch_setup(struct rpc_xprt_switch *xprt_switch, + struct rpc_xprt *xprt, gfp_t gfp_flags); +void rpc_sysfs_xprt_switch_destroy(struct rpc_xprt_switch *xprt); +void rpc_sysfs_xprt_setup(struct rpc_xprt_switch *xprt_switch, + struct rpc_xprt *xprt, gfp_t gfp_flags); +void rpc_sysfs_xprt_destroy(struct rpc_xprt *xprt); + +#endif diff --git a/net/sunrpc/xdr.c b/net/sunrpc/xdr.c index 3964ff74ee51..ca10ba2626f2 100644 --- a/net/sunrpc/xdr.c +++ b/net/sunrpc/xdr.c @@ -1230,10 +1230,9 @@ static unsigned int xdr_set_page_base(struct xdr_stream *xdr, void *kaddr; maxlen = xdr->buf->page_len; - if (base >= maxlen) { - base = maxlen; - maxlen = 0; - } else + if (base >= maxlen) + return 0; + else maxlen -= base; if (len > maxlen) len = maxlen; diff --git a/net/sunrpc/xprt.c b/net/sunrpc/xprt.c index 3509a7f139b9..fb6db09725c7 100644 --- a/net/sunrpc/xprt.c +++ b/net/sunrpc/xprt.c @@ -55,6 +55,7 @@ #include <trace/events/sunrpc.h> #include "sunrpc.h" +#include "sysfs.h" /* * Local variables @@ -443,7 +444,7 @@ void xprt_release_xprt_cong(struct rpc_xprt *xprt, struct rpc_task *task) } EXPORT_SYMBOL_GPL(xprt_release_xprt_cong); -static inline void xprt_release_write(struct rpc_xprt *xprt, struct rpc_task *task) +void xprt_release_write(struct rpc_xprt *xprt, struct rpc_task *task) { if (xprt->snd_task != task) return; @@ -1746,6 +1747,30 @@ static void xprt_free_all_slots(struct rpc_xprt *xprt) } } +static DEFINE_IDA(rpc_xprt_ids); + +void xprt_cleanup_ids(void) +{ + ida_destroy(&rpc_xprt_ids); +} + +static int xprt_alloc_id(struct rpc_xprt *xprt) +{ + int id; + + id = ida_simple_get(&rpc_xprt_ids, 0, 0, GFP_KERNEL); + if (id < 0) + return id; + + xprt->id = id; + return 0; +} + +static void xprt_free_id(struct rpc_xprt *xprt) +{ + ida_simple_remove(&rpc_xprt_ids, xprt->id); +} + struct rpc_xprt *xprt_alloc(struct net *net, size_t size, unsigned int num_prealloc, unsigned int max_alloc) @@ -1758,6 +1783,7 @@ struct rpc_xprt *xprt_alloc(struct net *net, size_t size, if (xprt == NULL) goto out; + xprt_alloc_id(xprt); xprt_init(xprt, net); for (i = 0; i < num_prealloc; i++) { @@ -1786,6 +1812,8 @@ void xprt_free(struct rpc_xprt *xprt) { put_net(xprt->xprt_net); xprt_free_all_slots(xprt); + xprt_free_id(xprt); + rpc_sysfs_xprt_destroy(xprt); kfree_rcu(xprt, rcu); } EXPORT_SYMBOL_GPL(xprt_free); diff --git a/net/sunrpc/xprtmultipath.c b/net/sunrpc/xprtmultipath.c index 1b4073131c6f..c60820e45082 100644 --- a/net/sunrpc/xprtmultipath.c +++ b/net/sunrpc/xprtmultipath.c @@ -19,6 +19,8 @@ #include <linux/sunrpc/addr.h> #include <linux/sunrpc/xprtmultipath.h> +#include "sysfs.h" + typedef struct rpc_xprt *(*xprt_switch_find_xprt_t)(struct rpc_xprt_switch *xps, const struct rpc_xprt *cur); @@ -55,6 +57,7 @@ void rpc_xprt_switch_add_xprt(struct rpc_xprt_switch *xps, if (xps->xps_net == xprt->xprt_net || xps->xps_net == NULL) xprt_switch_add_xprt_locked(xps, xprt); spin_unlock(&xps->xps_lock); + rpc_sysfs_xprt_setup(xps, xprt, GFP_KERNEL); } static void xprt_switch_remove_xprt_locked(struct rpc_xprt_switch *xps, @@ -62,7 +65,8 @@ static void xprt_switch_remove_xprt_locked(struct rpc_xprt_switch *xps, { if (unlikely(xprt == NULL)) return; - xps->xps_nactive--; + if (!test_bit(XPRT_OFFLINE, &xprt->state)) + xps->xps_nactive--; xps->xps_nxprts--; if (xps->xps_nxprts == 0) xps->xps_net = NULL; @@ -86,6 +90,30 @@ void rpc_xprt_switch_remove_xprt(struct rpc_xprt_switch *xps, xprt_put(xprt); } +static DEFINE_IDA(rpc_xprtswitch_ids); + +void xprt_multipath_cleanup_ids(void) +{ + ida_destroy(&rpc_xprtswitch_ids); +} + +static int xprt_switch_alloc_id(struct rpc_xprt_switch *xps, gfp_t gfp_flags) +{ + int id; + + id = ida_simple_get(&rpc_xprtswitch_ids, 0, 0, gfp_flags); + if (id < 0) + return id; + + xps->xps_id = id; + return 0; +} + +static void xprt_switch_free_id(struct rpc_xprt_switch *xps) +{ + ida_simple_remove(&rpc_xprtswitch_ids, xps->xps_id); +} + /** * xprt_switch_alloc - Allocate a new struct rpc_xprt_switch * @xprt: pointer to struct rpc_xprt @@ -103,12 +131,15 @@ struct rpc_xprt_switch *xprt_switch_alloc(struct rpc_xprt *xprt, if (xps != NULL) { spin_lock_init(&xps->xps_lock); kref_init(&xps->xps_kref); + xprt_switch_alloc_id(xps, gfp_flags); xps->xps_nxprts = xps->xps_nactive = 0; atomic_long_set(&xps->xps_queuelen, 0); xps->xps_net = NULL; INIT_LIST_HEAD(&xps->xps_xprt_list); xps->xps_iter_ops = &rpc_xprt_iter_singular; + rpc_sysfs_xprt_switch_setup(xps, xprt, gfp_flags); xprt_switch_add_xprt_locked(xps, xprt); + rpc_sysfs_xprt_setup(xps, xprt, gfp_flags); } return xps; @@ -136,6 +167,8 @@ static void xprt_switch_free(struct kref *kref) struct rpc_xprt_switch, xps_kref); xprt_switch_free_entries(xps); + rpc_sysfs_xprt_switch_destroy(xps); + xprt_switch_free_id(xps); kfree_rcu(xps, xps_rcu); } @@ -198,7 +231,8 @@ void xprt_iter_default_rewind(struct rpc_xprt_iter *xpi) static bool xprt_is_active(const struct rpc_xprt *xprt) { - return kref_read(&xprt->kref) != 0; + return (kref_read(&xprt->kref) != 0 && + !test_bit(XPRT_OFFLINE, &xprt->state)); } static diff --git a/net/sunrpc/xprtrdma/transport.c b/net/sunrpc/xprtrdma/transport.c index 19a49d26b1e4..9c2ffc67c0fd 100644 --- a/net/sunrpc/xprtrdma/transport.c +++ b/net/sunrpc/xprtrdma/transport.c @@ -73,6 +73,7 @@ unsigned int xprt_rdma_max_inline_read = RPCRDMA_DEF_INLINE; unsigned int xprt_rdma_max_inline_write = RPCRDMA_DEF_INLINE; unsigned int xprt_rdma_memreg_strategy = RPCRDMA_FRWR; int xprt_rdma_pad_optimize; +static struct xprt_class xprt_rdma; #if IS_ENABLED(CONFIG_SUNRPC_DEBUG) @@ -349,6 +350,7 @@ xprt_setup_rdma(struct xprt_create *args) /* Ensure xprt->addr holds valid server TCP (not RDMA) * address, for any side protocols which peek at it */ xprt->prot = IPPROTO_TCP; + xprt->xprt_class = &xprt_rdma; xprt->addrlen = args->addrlen; memcpy(&xprt->addr, sap, xprt->addrlen); diff --git a/net/sunrpc/xprtsock.c b/net/sunrpc/xprtsock.c index 316d04945587..e573dcecdd66 100644 --- a/net/sunrpc/xprtsock.c +++ b/net/sunrpc/xprtsock.c @@ -91,6 +91,11 @@ static unsigned int xprt_max_resvport_limit = RPC_MAX_RESVPORT; static struct ctl_table_header *sunrpc_table_header; +static struct xprt_class xs_local_transport; +static struct xprt_class xs_udp_transport; +static struct xprt_class xs_tcp_transport; +static struct xprt_class xs_bc_tcp_transport; + /* * FIXME: changing the UDP slot table size should also resize the UDP * socket buffers for existing UDP transports @@ -1648,6 +1653,13 @@ static int xs_get_srcport(struct sock_xprt *transport) return port; } +unsigned short get_srcport(struct rpc_xprt *xprt) +{ + struct sock_xprt *sock = container_of(xprt, struct sock_xprt, xprt); + return sock->srcport; +} +EXPORT_SYMBOL(get_srcport); + static unsigned short xs_next_srcport(struct sock_xprt *transport, unsigned short port) { if (transport->srcport != 0) @@ -1689,7 +1701,8 @@ static int xs_bind(struct sock_xprt *transport, struct socket *sock) err = kernel_bind(sock, (struct sockaddr *)&myaddr, transport->xprt.addrlen); if (err == 0) { - transport->srcport = port; + if (transport->xprt.reuseport) + transport->srcport = port; break; } last = port; @@ -2779,6 +2792,7 @@ static struct rpc_xprt *xs_setup_local(struct xprt_create *args) transport = container_of(xprt, struct sock_xprt, xprt); xprt->prot = 0; + xprt->xprt_class = &xs_local_transport; xprt->max_payload = RPC_MAX_FRAGMENT_SIZE; xprt->bind_timeout = XS_BIND_TO; @@ -2848,6 +2862,7 @@ static struct rpc_xprt *xs_setup_udp(struct xprt_create *args) transport = container_of(xprt, struct sock_xprt, xprt); xprt->prot = IPPROTO_UDP; + xprt->xprt_class = &xs_udp_transport; /* XXX: header size can vary due to auth type, IPv6, etc. */ xprt->max_payload = (1U << 16) - (MAX_HEADER << 3); @@ -2928,6 +2943,7 @@ static struct rpc_xprt *xs_setup_tcp(struct xprt_create *args) transport = container_of(xprt, struct sock_xprt, xprt); xprt->prot = IPPROTO_TCP; + xprt->xprt_class = &xs_tcp_transport; xprt->max_payload = RPC_MAX_FRAGMENT_SIZE; xprt->bind_timeout = XS_BIND_TO; @@ -3001,6 +3017,7 @@ static struct rpc_xprt *xs_setup_bc_tcp(struct xprt_create *args) transport = container_of(xprt, struct sock_xprt, xprt); xprt->prot = IPPROTO_TCP; + xprt->xprt_class = &xs_bc_tcp_transport; xprt->max_payload = RPC_MAX_FRAGMENT_SIZE; xprt->timeout = &xs_tcp_default_timeout; |