From d737e5d418706abf32f6de68c3e09958516d422f Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Tue, 9 Feb 2021 16:04:15 -0500 Subject: SUNRPC: Set TCP_CORK until the transmit queue is empty When we have multiple RPC requests queued up, it makes sense to set the TCP_CORK option while the transmit queue is non-empty. Signed-off-by: Trond Myklebust --- include/linux/sunrpc/xprt.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/linux/sunrpc/xprt.h b/include/linux/sunrpc/xprt.h index d2e97ee802af..d81fe8b364d0 100644 --- a/include/linux/sunrpc/xprt.h +++ b/include/linux/sunrpc/xprt.h @@ -247,6 +247,7 @@ struct rpc_xprt { struct rpc_task * snd_task; /* Task blocked in send */ struct list_head xmit_queue; /* Send queue */ + atomic_long_t xmit_queuelen; struct svc_xprt *bc_xprt; /* NFSv4.1 backchannel */ #if defined(CONFIG_SUNRPC_BACKCHANNEL) -- cgit v1.2.3 From 332d1a0373be32a3a3c152756bca45ff4f4e11b5 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Thu, 25 Mar 2021 18:15:36 -0400 Subject: NFS: nfs4_bitmask_adjust() must not change the server global bitmasks As currently set, the calls to nfs4_bitmask_adjust() will end up overwriting the contents of the nfs_server cache_consistency_bitmask field. The intention here should be to modify a private copy of that mask in the close/delegreturn/write arguments. Fixes: 76bd5c016ef4 ("NFSv4: make cache consistency bitmask dynamic") Signed-off-by: Trond Myklebust --- fs/nfs/nfs4proc.c | 56 ++++++++++++++++++++++++++++++------------------- include/linux/nfs_xdr.h | 11 +++++++--- 2 files changed, 42 insertions(+), 25 deletions(-) (limited to 'include') diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index bca1726bcd55..1cf98c40e3b2 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -108,9 +108,10 @@ static int nfs41_test_stateid(struct nfs_server *, nfs4_stateid *, static int nfs41_free_stateid(struct nfs_server *, const nfs4_stateid *, const struct cred *, bool); #endif -static void nfs4_bitmask_adjust(__u32 *bitmask, struct inode *inode, - struct nfs_server *server, - struct nfs4_label *label); +static void nfs4_bitmask_set(__u32 bitmask[NFS4_BITMASK_SZ], + const __u32 *src, struct inode *inode, + struct nfs_server *server, + struct nfs4_label *label); #ifdef CONFIG_NFS_V4_SECURITY_LABEL static inline struct nfs4_label * @@ -3591,6 +3592,7 @@ static void nfs4_close_prepare(struct rpc_task *task, void *data) struct nfs4_closedata *calldata = data; struct nfs4_state *state = calldata->state; struct inode *inode = calldata->inode; + struct nfs_server *server = NFS_SERVER(inode); struct pnfs_layout_hdr *lo; bool is_rdonly, is_wronly, is_rdwr; int call_close = 0; @@ -3647,8 +3649,10 @@ static void nfs4_close_prepare(struct rpc_task *task, void *data) if (calldata->arg.fmode == 0 || calldata->arg.fmode == FMODE_READ) { /* Close-to-open cache consistency revalidation */ if (!nfs4_have_delegation(inode, FMODE_READ)) { - calldata->arg.bitmask = NFS_SERVER(inode)->cache_consistency_bitmask; - nfs4_bitmask_adjust(calldata->arg.bitmask, inode, NFS_SERVER(inode), NULL); + nfs4_bitmask_set(calldata->arg.bitmask_store, + server->cache_consistency_bitmask, + inode, server, NULL); + calldata->arg.bitmask = calldata->arg.bitmask_store; } else calldata->arg.bitmask = NULL; } @@ -5416,19 +5420,17 @@ bool nfs4_write_need_cache_consistency_data(struct nfs_pgio_header *hdr) return nfs4_have_delegation(hdr->inode, FMODE_READ) == 0; } -static void nfs4_bitmask_adjust(__u32 *bitmask, struct inode *inode, - struct nfs_server *server, - struct nfs4_label *label) +static void nfs4_bitmask_set(__u32 bitmask[NFS4_BITMASK_SZ], const __u32 *src, + struct inode *inode, struct nfs_server *server, + struct nfs4_label *label) { - unsigned long cache_validity = READ_ONCE(NFS_I(inode)->cache_validity); + unsigned int i; - if ((cache_validity & NFS_INO_INVALID_DATA) || - (cache_validity & NFS_INO_REVAL_PAGECACHE) || - (cache_validity & NFS_INO_REVAL_FORCED) || - (cache_validity & NFS_INO_INVALID_OTHER)) - nfs4_bitmap_copy_adjust(bitmask, nfs4_bitmask(server, label), inode); + memcpy(bitmask, src, sizeof(*bitmask) * NFS4_BITMASK_SZ); + if (cache_validity & (NFS_INO_INVALID_CHANGE | NFS_INO_REVAL_PAGECACHE)) + bitmask[0] |= FATTR4_WORD0_CHANGE; if (cache_validity & NFS_INO_INVALID_ATIME) bitmask[1] |= FATTR4_WORD1_TIME_ACCESS; if (cache_validity & NFS_INO_INVALID_OTHER) @@ -5437,16 +5439,22 @@ static void nfs4_bitmask_adjust(__u32 *bitmask, struct inode *inode, FATTR4_WORD1_NUMLINKS; if (label && label->len && cache_validity & NFS_INO_INVALID_LABEL) bitmask[2] |= FATTR4_WORD2_SECURITY_LABEL; - if (cache_validity & NFS_INO_INVALID_CHANGE) - bitmask[0] |= FATTR4_WORD0_CHANGE; if (cache_validity & NFS_INO_INVALID_CTIME) bitmask[1] |= FATTR4_WORD1_TIME_METADATA; if (cache_validity & NFS_INO_INVALID_MTIME) bitmask[1] |= FATTR4_WORD1_TIME_MODIFY; - if (cache_validity & NFS_INO_INVALID_SIZE) - bitmask[0] |= FATTR4_WORD0_SIZE; if (cache_validity & NFS_INO_INVALID_BLOCKS) bitmask[1] |= FATTR4_WORD1_SPACE_USED; + + if (nfs4_have_delegation(inode, FMODE_READ) && + !(cache_validity & NFS_INO_REVAL_FORCED)) + bitmask[0] &= ~FATTR4_WORD0_SIZE; + else if (cache_validity & + (NFS_INO_INVALID_SIZE | NFS_INO_REVAL_PAGECACHE)) + bitmask[0] |= FATTR4_WORD0_SIZE; + + for (i = 0; i < NFS4_BITMASK_SZ; i++) + bitmask[i] &= server->attr_bitmask[i]; } static void nfs4_proc_write_setup(struct nfs_pgio_header *hdr, @@ -5459,8 +5467,10 @@ static void nfs4_proc_write_setup(struct nfs_pgio_header *hdr, hdr->args.bitmask = NULL; hdr->res.fattr = NULL; } else { - hdr->args.bitmask = server->cache_consistency_bitmask; - nfs4_bitmask_adjust(hdr->args.bitmask, hdr->inode, server, NULL); + nfs4_bitmask_set(hdr->args.bitmask_store, + server->cache_consistency_bitmask, + hdr->inode, server, NULL); + hdr->args.bitmask = hdr->args.bitmask_store; } if (!hdr->pgio_done_cb) @@ -6502,8 +6512,10 @@ static int _nfs4_proc_delegreturn(struct inode *inode, const struct cred *cred, data->args.fhandle = &data->fh; data->args.stateid = &data->stateid; - data->args.bitmask = server->cache_consistency_bitmask; - nfs4_bitmask_adjust(data->args.bitmask, inode, server, NULL); + nfs4_bitmask_set(data->args.bitmask_store, + server->cache_consistency_bitmask, inode, server, + NULL); + data->args.bitmask = data->args.bitmask_store; nfs_copy_fh(&data->fh, NFS_FH(inode)); nfs4_stateid_copy(&data->stateid, stateid); data->res.fattr = &data->fattr; diff --git a/include/linux/nfs_xdr.h b/include/linux/nfs_xdr.h index 3327239fa2f9..cc29dee508f7 100644 --- a/include/linux/nfs_xdr.h +++ b/include/linux/nfs_xdr.h @@ -15,6 +15,8 @@ #define NFS_DEF_FILE_IO_SIZE (4096U) #define NFS_MIN_FILE_IO_SIZE (1024U) +#define NFS_BITMASK_SZ 3 + struct nfs4_string { unsigned int len; char *data; @@ -525,7 +527,8 @@ struct nfs_closeargs { struct nfs_seqid * seqid; fmode_t fmode; u32 share_access; - u32 * bitmask; + const u32 * bitmask; + u32 bitmask_store[NFS_BITMASK_SZ]; struct nfs4_layoutreturn_args *lr_args; }; @@ -608,7 +611,8 @@ struct nfs4_delegreturnargs { struct nfs4_sequence_args seq_args; const struct nfs_fh *fhandle; const nfs4_stateid *stateid; - u32 * bitmask; + const u32 *bitmask; + u32 bitmask_store[NFS_BITMASK_SZ]; struct nfs4_layoutreturn_args *lr_args; }; @@ -648,7 +652,8 @@ struct nfs_pgio_args { union { unsigned int replen; /* used by read */ struct { - u32 * bitmask; /* used by write */ + const u32 * bitmask; /* used by write */ + u32 bitmask_store[NFS_BITMASK_SZ]; /* used by write */ enum nfs3_stable_how stable; /* used by write */ }; }; -- cgit v1.2.3 From 1f3208b2d6975f31b9c7c6bf174b84fe9c97492f Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Thu, 25 Mar 2021 11:04:34 -0400 Subject: NFS: Add a cache validity flag argument to nfs_revalidate_inode() Add an argument to nfs_revalidate_inode() to allow callers to specify which attributes they need to check for validity. Signed-off-by: Trond Myklebust --- fs/nfs/dir.c | 2 +- fs/nfs/export.c | 6 +----- fs/nfs/inode.c | 25 +++++++------------------ fs/nfs/nfs3acl.c | 2 +- fs/nfs/nfs4proc.c | 6 +++--- include/linux/nfs_fs.h | 2 +- 6 files changed, 14 insertions(+), 29 deletions(-) (limited to 'include') diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c index 0cd7c59a6601..e924d65c125e 100644 --- a/fs/nfs/dir.c +++ b/fs/nfs/dir.c @@ -3006,7 +3006,7 @@ out_notsup: if (mask & MAY_NOT_BLOCK) return -ECHILD; - res = nfs_revalidate_inode(NFS_SERVER(inode), inode); + res = nfs_revalidate_inode(inode, NFS_INO_INVALID_OTHER); if (res == 0) res = generic_permission(&init_user_ns, inode, mask); goto out; diff --git a/fs/nfs/export.c b/fs/nfs/export.c index b347e3ce0cc8..37a1a88df771 100644 --- a/fs/nfs/export.c +++ b/fs/nfs/export.c @@ -169,11 +169,7 @@ out: static u64 nfs_fetch_iversion(struct inode *inode) { - struct nfs_server *server = NFS_SERVER(inode); - - if (nfs_check_cache_invalid(inode, NFS_INO_INVALID_CHANGE | - NFS_INO_REVAL_PAGECACHE)) - __nfs_revalidate_inode(server, inode); + nfs_revalidate_inode(inode, NFS_INO_INVALID_CHANGE); return inode_peek_iversion_raw(inode); } diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c index d34da63202cc..b9aac408f03a 100644 --- a/fs/nfs/inode.c +++ b/fs/nfs/inode.c @@ -802,16 +802,6 @@ static void nfs_readdirplus_parent_cache_hit(struct dentry *dentry) dput(parent); } -static bool nfs_need_revalidate_inode(struct inode *inode) -{ - if (NFS_I(inode)->cache_validity & - (NFS_INO_INVALID_ATTR|NFS_INO_INVALID_LABEL)) - return true; - if (nfs_attribute_cache_expired(inode)) - return true; - return false; -} - static u32 nfs_get_valid_attrmask(struct inode *inode) { unsigned long cache_validity = READ_ONCE(NFS_I(inode)->cache_validity); @@ -1004,7 +994,6 @@ void nfs_close_context(struct nfs_open_context *ctx, int is_sync) { struct nfs_inode *nfsi; struct inode *inode; - struct nfs_server *server; if (!(ctx->mode & FMODE_WRITE)) return; @@ -1020,10 +1009,10 @@ void nfs_close_context(struct nfs_open_context *ctx, int is_sync) return; if (!list_empty(&nfsi->open_files)) return; - server = NFS_SERVER(inode); - if (server->flags & NFS_MOUNT_NOCTO) + if (NFS_SERVER(inode)->flags & NFS_MOUNT_NOCTO) return; - nfs_revalidate_inode(server, inode); + nfs_revalidate_inode(inode, + NFS_INO_INVALID_CHANGE | NFS_INO_INVALID_SIZE); } EXPORT_SYMBOL_GPL(nfs_close_context); @@ -1278,16 +1267,16 @@ int nfs_attribute_cache_expired(struct inode *inode) /** * nfs_revalidate_inode - Revalidate the inode attributes - * @server: pointer to nfs_server struct * @inode: pointer to inode struct + * @flags: cache flags to check * * Updates inode attribute information by retrieving the data from the server. */ -int nfs_revalidate_inode(struct nfs_server *server, struct inode *inode) +int nfs_revalidate_inode(struct inode *inode, unsigned long flags) { - if (!nfs_need_revalidate_inode(inode)) + if (!nfs_check_cache_invalid(inode, flags)) return NFS_STALE(inode) ? -ESTALE : 0; - return __nfs_revalidate_inode(server, inode); + return __nfs_revalidate_inode(NFS_SERVER(inode), inode); } EXPORT_SYMBOL_GPL(nfs_revalidate_inode); diff --git a/fs/nfs/nfs3acl.c b/fs/nfs/nfs3acl.c index bb386a691e69..9ec560aa4a50 100644 --- a/fs/nfs/nfs3acl.c +++ b/fs/nfs/nfs3acl.c @@ -65,7 +65,7 @@ struct posix_acl *nfs3_get_acl(struct inode *inode, int type) if (!nfs_server_capable(inode, NFS_CAP_ACLS)) return ERR_PTR(-EOPNOTSUPP); - status = nfs_revalidate_inode(server, inode); + status = nfs_revalidate_inode(inode, NFS_INO_INVALID_CHANGE); if (status < 0) return ERR_PTR(status); diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index 1cf98c40e3b2..6b990fe5bc1f 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -5868,7 +5868,7 @@ static ssize_t nfs4_proc_get_acl(struct inode *inode, void *buf, size_t buflen) if (!nfs4_server_supports_acls(server)) return -EOPNOTSUPP; - ret = nfs_revalidate_inode(server, inode); + ret = nfs_revalidate_inode(inode, NFS_INO_INVALID_CHANGE); if (ret < 0) return ret; if (NFS_I(inode)->cache_validity & NFS_INO_INVALID_ACL) @@ -7619,7 +7619,7 @@ static int nfs4_xattr_get_nfs4_user(const struct xattr_handler *handler, return -EACCES; } - ret = nfs_revalidate_inode(NFS_SERVER(inode), inode); + ret = nfs_revalidate_inode(inode, NFS_INO_INVALID_CHANGE); if (ret) return ret; @@ -7650,7 +7650,7 @@ nfs4_listxattr_nfs4_user(struct inode *inode, char *list, size_t list_len) return 0; } - ret = nfs_revalidate_inode(NFS_SERVER(inode), inode); + ret = nfs_revalidate_inode(inode, NFS_INO_INVALID_CHANGE); if (ret) return ret; diff --git a/include/linux/nfs_fs.h b/include/linux/nfs_fs.h index eadaabd18dc7..624ffd47a9d4 100644 --- a/include/linux/nfs_fs.h +++ b/include/linux/nfs_fs.h @@ -386,7 +386,7 @@ extern void nfs_access_set_mask(struct nfs_access_entry *, u32); extern int nfs_permission(struct user_namespace *, struct inode *, int); extern int nfs_open(struct inode *, struct file *); extern int nfs_attribute_cache_expired(struct inode *inode); -extern int nfs_revalidate_inode(struct nfs_server *server, struct inode *inode); +extern int nfs_revalidate_inode(struct inode *inode, unsigned long flags); extern int __nfs_revalidate_inode(struct nfs_server *, struct inode *); extern int nfs_clear_invalid_mapping(struct address_space *mapping); extern bool nfs_mapping_need_revalidate_inode(struct inode *inode); -- cgit v1.2.3 From fabf2b341502e894001d70f91309dd6f3785e2dc Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Thu, 25 Mar 2021 13:14:42 -0400 Subject: NFS: Separate tracking of file nlinks cache validity from the mode/uid/gid Rename can cause us to revalidate the access cache, so lets track the nlinks separately from the mode/uid/gid. Signed-off-by: Trond Myklebust --- fs/nfs/dir.c | 2 +- fs/nfs/inode.c | 15 ++++++++++----- fs/nfs/nfs4proc.c | 13 +++++++------ fs/nfs/nfstrace.h | 4 +++- include/linux/nfs_fs.h | 2 ++ 5 files changed, 23 insertions(+), 13 deletions(-) (limited to 'include') diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c index e924d65c125e..f748d2294261 100644 --- a/fs/nfs/dir.c +++ b/fs/nfs/dir.c @@ -1711,7 +1711,7 @@ static void nfs_drop_nlink(struct inode *inode) NFS_I(inode)->attr_gencount = nfs_inc_attr_generation_counter(); nfs_set_cache_invalid( inode, NFS_INO_INVALID_CHANGE | NFS_INO_INVALID_CTIME | - NFS_INO_INVALID_OTHER | NFS_INO_REVAL_FORCED); + NFS_INO_INVALID_NLINK | NFS_INO_REVAL_FORCED); spin_unlock(&inode->i_lock); } diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c index 3d18e66a4b8f..7bf9138330f2 100644 --- a/fs/nfs/inode.c +++ b/fs/nfs/inode.c @@ -538,7 +538,7 @@ nfs_fhget(struct super_block *sb, struct nfs_fh *fh, struct nfs_fattr *fattr, st if (fattr->valid & NFS_ATTR_FATTR_NLINK) set_nlink(inode, fattr->nlink); else if (nfs_server_capable(inode, NFS_CAP_NLINK)) - nfs_set_cache_invalid(inode, NFS_INO_INVALID_OTHER); + nfs_set_cache_invalid(inode, NFS_INO_INVALID_NLINK); if (fattr->valid & NFS_ATTR_FATTR_OWNER) inode->i_uid = fattr->uid; else if (nfs_server_capable(inode, NFS_CAP_OWNER)) @@ -801,8 +801,10 @@ static u32 nfs_get_valid_attrmask(struct inode *inode) reply_mask |= STATX_MTIME; if (!(cache_validity & NFS_INO_INVALID_SIZE)) reply_mask |= STATX_SIZE; + if (!(cache_validity & NFS_INO_INVALID_NLINK)) + reply_mask |= STATX_NLINK; if (!(cache_validity & NFS_INO_INVALID_OTHER)) - reply_mask |= STATX_UID | STATX_GID | STATX_MODE | STATX_NLINK; + reply_mask |= STATX_UID | STATX_GID | STATX_MODE; if (!(cache_validity & NFS_INO_INVALID_BLOCKS)) reply_mask |= STATX_BLOCKS; return reply_mask; @@ -868,7 +870,9 @@ int nfs_getattr(struct user_namespace *mnt_userns, const struct path *path, do_update |= cache_validity & NFS_INO_INVALID_MTIME; if (request_mask & STATX_SIZE) do_update |= cache_validity & NFS_INO_INVALID_SIZE; - if (request_mask & (STATX_UID | STATX_GID | STATX_MODE | STATX_NLINK)) + if (request_mask & STATX_NLINK) + do_update |= cache_validity & NFS_INO_INVALID_NLINK; + if (request_mask & (STATX_UID | STATX_GID | STATX_MODE)) do_update |= cache_validity & NFS_INO_INVALID_OTHER; if (request_mask & STATX_BLOCKS) do_update |= cache_validity & NFS_INO_INVALID_BLOCKS; @@ -1518,7 +1522,7 @@ static int nfs_check_inode_attributes(struct inode *inode, struct nfs_fattr *fat /* Has the link count changed? */ if ((fattr->valid & NFS_ATTR_FATTR_NLINK) && inode->i_nlink != fattr->nlink) - invalid |= NFS_INO_INVALID_OTHER; + invalid |= NFS_INO_INVALID_NLINK; ts = inode->i_atime; if ((fattr->valid & NFS_ATTR_FATTR_ATIME) && !timespec64_equal(&ts, &fattr->atime)) @@ -1942,6 +1946,7 @@ static int nfs_update_inode(struct inode *inode, struct nfs_fattr *fattr) | NFS_INO_INVALID_MTIME | NFS_INO_INVALID_SIZE | NFS_INO_INVALID_BLOCKS + | NFS_INO_INVALID_NLINK | NFS_INO_INVALID_OTHER; if (S_ISDIR(inode->i_mode)) nfs_force_lookup_revalidate(inode); @@ -2074,7 +2079,7 @@ static int nfs_update_inode(struct inode *inode, struct nfs_fattr *fattr) } } else if (server->caps & NFS_CAP_NLINK) { nfsi->cache_validity |= save_cache_validity & - (NFS_INO_INVALID_OTHER + (NFS_INO_INVALID_NLINK | NFS_INO_REVAL_FORCED); cache_revalidated = false; } diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index 8dc595ce40ca..a74c1c3c4192 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -1167,14 +1167,14 @@ int nfs4_call_sync(struct rpc_clnt *clnt, static void nfs4_inc_nlink_locked(struct inode *inode) { - nfs_set_cache_invalid(inode, NFS_INO_INVALID_OTHER); + nfs_set_cache_invalid(inode, NFS_INO_INVALID_NLINK); inc_nlink(inode); } static void nfs4_dec_nlink_locked(struct inode *inode) { - nfs_set_cache_invalid(inode, NFS_INO_INVALID_OTHER); + nfs_set_cache_invalid(inode, NFS_INO_INVALID_NLINK); drop_nlink(inode); } @@ -4717,11 +4717,11 @@ static int nfs4_proc_rename_done(struct rpc_task *task, struct inode *old_dir, /* Note: If we moved a directory, nlink will change */ nfs4_update_changeattr(old_dir, &res->old_cinfo, res->old_fattr->time_start, - NFS_INO_INVALID_OTHER | + NFS_INO_INVALID_NLINK | NFS_INO_INVALID_DATA); nfs4_update_changeattr(new_dir, &res->new_cinfo, res->new_fattr->time_start, - NFS_INO_INVALID_OTHER | + NFS_INO_INVALID_NLINK | NFS_INO_INVALID_DATA); } else nfs4_update_changeattr(old_dir, &res->old_cinfo, @@ -5433,8 +5433,9 @@ static void nfs4_bitmask_set(__u32 bitmask[NFS4_BITMASK_SZ], const __u32 *src, bitmask[1] |= FATTR4_WORD1_TIME_ACCESS; if (cache_validity & NFS_INO_INVALID_OTHER) bitmask[1] |= FATTR4_WORD1_MODE | FATTR4_WORD1_OWNER | - FATTR4_WORD1_OWNER_GROUP | - FATTR4_WORD1_NUMLINKS; + FATTR4_WORD1_OWNER_GROUP; + if (cache_validity & NFS_INO_INVALID_NLINK) + bitmask[1] |= FATTR4_WORD1_NUMLINKS; if (label && label->len && cache_validity & NFS_INO_INVALID_LABEL) bitmask[2] |= FATTR4_WORD2_SECURITY_LABEL; if (cache_validity & NFS_INO_INVALID_CTIME) diff --git a/fs/nfs/nfstrace.h b/fs/nfs/nfstrace.h index cdba6eebe3cb..a0ebc53160dd 100644 --- a/fs/nfs/nfstrace.h +++ b/fs/nfs/nfstrace.h @@ -48,6 +48,7 @@ TRACE_DEFINE_ENUM(NFS_INO_INVALID_OTHER); TRACE_DEFINE_ENUM(NFS_INO_DATA_INVAL_DEFER); TRACE_DEFINE_ENUM(NFS_INO_INVALID_BLOCKS); TRACE_DEFINE_ENUM(NFS_INO_INVALID_XATTR); +TRACE_DEFINE_ENUM(NFS_INO_INVALID_NLINK); #define nfs_show_cache_validity(v) \ __print_flags(v, "|", \ @@ -65,7 +66,8 @@ TRACE_DEFINE_ENUM(NFS_INO_INVALID_XATTR); { NFS_INO_INVALID_OTHER, "INVALID_OTHER" }, \ { NFS_INO_DATA_INVAL_DEFER, "DATA_INVAL_DEFER" }, \ { NFS_INO_INVALID_BLOCKS, "INVALID_BLOCKS" }, \ - { NFS_INO_INVALID_XATTR, "INVALID_XATTR" }) + { NFS_INO_INVALID_XATTR, "INVALID_XATTR" }, \ + { NFS_INO_INVALID_NLINK, "INVALID_NLINK" }) TRACE_DEFINE_ENUM(NFS_INO_ADVISE_RDPLUS); TRACE_DEFINE_ENUM(NFS_INO_STALE); diff --git a/include/linux/nfs_fs.h b/include/linux/nfs_fs.h index 624ffd47a9d4..41165b988dfb 100644 --- a/include/linux/nfs_fs.h +++ b/include/linux/nfs_fs.h @@ -246,11 +246,13 @@ struct nfs4_copy_state { BIT(13) /* Deferred cache invalidation */ #define NFS_INO_INVALID_BLOCKS BIT(14) /* cached blocks are invalid */ #define NFS_INO_INVALID_XATTR BIT(15) /* xattrs are invalid */ +#define NFS_INO_INVALID_NLINK BIT(16) /* cached nlinks is invalid */ #define NFS_INO_INVALID_ATTR (NFS_INO_INVALID_CHANGE \ | NFS_INO_INVALID_CTIME \ | NFS_INO_INVALID_MTIME \ | NFS_INO_INVALID_SIZE \ + | NFS_INO_INVALID_NLINK \ | NFS_INO_INVALID_OTHER) /* inode metadata is invalid */ /* -- cgit v1.2.3 From 720869eb19f3161980d6d4631d3df7e8c5355993 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Tue, 13 Apr 2021 09:41:16 -0400 Subject: NFS: Separate tracking of file mode cache validity from the uid/gid chown()/chgrp() and chmod() are separate operations, and in addition, there are mode operations that are performed automatically by the server. So let's track mode validity separately from the file ownership validity. Signed-off-by: Trond Myklebust --- fs/nfs/dir.c | 5 +++-- fs/nfs/inode.c | 18 ++++++++++++------ fs/nfs/nfs4proc.c | 14 +++++++++----- fs/nfs/nfstrace.h | 4 +++- fs/nfs/write.c | 2 +- include/linux/nfs_fs.h | 2 ++ 6 files changed, 30 insertions(+), 15 deletions(-) (limited to 'include') diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c index f748d2294261..d2835d211a73 100644 --- a/fs/nfs/dir.c +++ b/fs/nfs/dir.c @@ -2948,7 +2948,7 @@ static int nfs_execute_ok(struct inode *inode, int mask) if (S_ISDIR(inode->i_mode)) return 0; - if (nfs_check_cache_invalid(inode, NFS_INO_INVALID_OTHER)) { + if (nfs_check_cache_invalid(inode, NFS_INO_INVALID_MODE)) { if (mask & MAY_NOT_BLOCK) return -ECHILD; ret = __nfs_revalidate_inode(server, inode); @@ -3006,7 +3006,8 @@ out_notsup: if (mask & MAY_NOT_BLOCK) return -ECHILD; - res = nfs_revalidate_inode(inode, NFS_INO_INVALID_OTHER); + res = nfs_revalidate_inode(inode, NFS_INO_INVALID_MODE | + NFS_INO_INVALID_OTHER); if (res == 0) res = generic_permission(&init_user_ns, inode, mask); goto out; diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c index 7bf9138330f2..81e3e140e923 100644 --- a/fs/nfs/inode.c +++ b/fs/nfs/inode.c @@ -199,7 +199,8 @@ void nfs_set_cache_invalid(struct inode *inode, unsigned long flags) if (have_delegation) { if (!(flags & NFS_INO_REVAL_FORCED)) - flags &= ~NFS_INO_INVALID_OTHER; + flags &= ~(NFS_INO_INVALID_MODE | + NFS_INO_INVALID_OTHER); flags &= ~(NFS_INO_INVALID_CHANGE | NFS_INO_INVALID_SIZE | NFS_INO_INVALID_XATTR); @@ -472,7 +473,7 @@ nfs_fhget(struct super_block *sb, struct nfs_fh *fh, struct nfs_fattr *fattr, st nfsi->cache_validity = 0; if ((fattr->valid & NFS_ATTR_FATTR_MODE) == 0 && nfs_server_capable(inode, NFS_CAP_MODE)) - nfs_set_cache_invalid(inode, NFS_INO_INVALID_OTHER); + nfs_set_cache_invalid(inode, NFS_INO_INVALID_MODE); /* Why so? Because we want revalidate for devices/FIFOs, and * that's precisely what we have in nfs_file_inode_operations. */ @@ -803,8 +804,10 @@ static u32 nfs_get_valid_attrmask(struct inode *inode) reply_mask |= STATX_SIZE; if (!(cache_validity & NFS_INO_INVALID_NLINK)) reply_mask |= STATX_NLINK; + if (!(cache_validity & NFS_INO_INVALID_MODE)) + reply_mask |= STATX_MODE; if (!(cache_validity & NFS_INO_INVALID_OTHER)) - reply_mask |= STATX_UID | STATX_GID | STATX_MODE; + reply_mask |= STATX_UID | STATX_GID; if (!(cache_validity & NFS_INO_INVALID_BLOCKS)) reply_mask |= STATX_BLOCKS; return reply_mask; @@ -872,7 +875,9 @@ int nfs_getattr(struct user_namespace *mnt_userns, const struct path *path, do_update |= cache_validity & NFS_INO_INVALID_SIZE; if (request_mask & STATX_NLINK) do_update |= cache_validity & NFS_INO_INVALID_NLINK; - if (request_mask & (STATX_UID | STATX_GID | STATX_MODE)) + if (request_mask & STATX_MODE) + do_update |= cache_validity & NFS_INO_INVALID_MODE; + if (request_mask & (STATX_UID | STATX_GID)) do_update |= cache_validity & NFS_INO_INVALID_OTHER; if (request_mask & STATX_BLOCKS) do_update |= cache_validity & NFS_INO_INVALID_BLOCKS; @@ -1510,7 +1515,7 @@ static int nfs_check_inode_attributes(struct inode *inode, struct nfs_fattr *fat if ((fattr->valid & NFS_ATTR_FATTR_MODE) && (inode->i_mode & S_IALLUGO) != (fattr->mode & S_IALLUGO)) invalid |= NFS_INO_INVALID_ACCESS | NFS_INO_INVALID_ACL - | NFS_INO_INVALID_OTHER; + | NFS_INO_INVALID_MODE; if ((fattr->valid & NFS_ATTR_FATTR_OWNER) && !uid_eq(inode->i_uid, fattr->uid)) invalid |= NFS_INO_INVALID_ACCESS | NFS_INO_INVALID_ACL @@ -1947,6 +1952,7 @@ static int nfs_update_inode(struct inode *inode, struct nfs_fattr *fattr) | NFS_INO_INVALID_SIZE | NFS_INO_INVALID_BLOCKS | NFS_INO_INVALID_NLINK + | NFS_INO_INVALID_MODE | NFS_INO_INVALID_OTHER; if (S_ISDIR(inode->i_mode)) nfs_force_lookup_revalidate(inode); @@ -2037,7 +2043,7 @@ static int nfs_update_inode(struct inode *inode, struct nfs_fattr *fattr) } } else if (server->caps & NFS_CAP_MODE) { nfsi->cache_validity |= save_cache_validity & - (NFS_INO_INVALID_OTHER + (NFS_INO_INVALID_MODE | NFS_INO_REVAL_FORCED); cache_revalidated = false; } diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index a74c1c3c4192..bc90f2a12d5d 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -302,9 +302,10 @@ static void nfs4_bitmap_copy_adjust(__u32 *dst, const __u32 *src, if (!(cache_validity & NFS_INO_INVALID_CHANGE)) dst[0] &= ~FATTR4_WORD0_CHANGE; + if (!(cache_validity & NFS_INO_INVALID_MODE)) + dst[1] &= ~FATTR4_WORD1_MODE; if (!(cache_validity & NFS_INO_INVALID_OTHER)) - dst[1] &= ~(FATTR4_WORD1_MODE | FATTR4_WORD1_OWNER | - FATTR4_WORD1_OWNER_GROUP); + dst[1] &= ~(FATTR4_WORD1_OWNER | FATTR4_WORD1_OWNER_GROUP); } static void nfs4_setup_readdir(u64 cookie, __be32 *verifier, struct dentry *dentry, @@ -3344,7 +3345,9 @@ static int nfs4_do_setattr(struct inode *inode, const struct cred *cred, unsigned long adjust_flags = NFS_INO_INVALID_CHANGE; int err; - if (sattr->ia_valid & (ATTR_MODE|ATTR_UID|ATTR_GID)) + if (sattr->ia_valid & (ATTR_MODE | ATTR_KILL_SUID | ATTR_KILL_SGID)) + adjust_flags |= NFS_INO_INVALID_MODE; + if (sattr->ia_valid & (ATTR_UID | ATTR_GID)) adjust_flags |= NFS_INO_INVALID_OTHER; do { @@ -5431,9 +5434,10 @@ static void nfs4_bitmask_set(__u32 bitmask[NFS4_BITMASK_SZ], const __u32 *src, bitmask[0] |= FATTR4_WORD0_CHANGE; if (cache_validity & NFS_INO_INVALID_ATIME) bitmask[1] |= FATTR4_WORD1_TIME_ACCESS; + if (cache_validity & NFS_INO_INVALID_MODE) + bitmask[1] |= FATTR4_WORD1_MODE; if (cache_validity & NFS_INO_INVALID_OTHER) - bitmask[1] |= FATTR4_WORD1_MODE | FATTR4_WORD1_OWNER | - FATTR4_WORD1_OWNER_GROUP; + bitmask[1] |= FATTR4_WORD1_OWNER | FATTR4_WORD1_OWNER_GROUP; if (cache_validity & NFS_INO_INVALID_NLINK) bitmask[1] |= FATTR4_WORD1_NUMLINKS; if (label && label->len && cache_validity & NFS_INO_INVALID_LABEL) diff --git a/fs/nfs/nfstrace.h b/fs/nfs/nfstrace.h index a0ebc53160dd..41a161cd31f6 100644 --- a/fs/nfs/nfstrace.h +++ b/fs/nfs/nfstrace.h @@ -49,6 +49,7 @@ TRACE_DEFINE_ENUM(NFS_INO_DATA_INVAL_DEFER); TRACE_DEFINE_ENUM(NFS_INO_INVALID_BLOCKS); TRACE_DEFINE_ENUM(NFS_INO_INVALID_XATTR); TRACE_DEFINE_ENUM(NFS_INO_INVALID_NLINK); +TRACE_DEFINE_ENUM(NFS_INO_INVALID_MODE); #define nfs_show_cache_validity(v) \ __print_flags(v, "|", \ @@ -67,7 +68,8 @@ TRACE_DEFINE_ENUM(NFS_INO_INVALID_NLINK); { NFS_INO_DATA_INVAL_DEFER, "DATA_INVAL_DEFER" }, \ { NFS_INO_INVALID_BLOCKS, "INVALID_BLOCKS" }, \ { NFS_INO_INVALID_XATTR, "INVALID_XATTR" }, \ - { NFS_INO_INVALID_NLINK, "INVALID_NLINK" }) + { NFS_INO_INVALID_NLINK, "INVALID_NLINK" }, \ + { NFS_INO_INVALID_MODE, "INVALID_MODE" }) TRACE_DEFINE_ENUM(NFS_INO_ADVISE_RDPLUS); TRACE_DEFINE_ENUM(NFS_INO_STALE); diff --git a/fs/nfs/write.c b/fs/nfs/write.c index 7a39b3d424da..61d1174935b6 100644 --- a/fs/nfs/write.c +++ b/fs/nfs/write.c @@ -1604,7 +1604,7 @@ static int nfs_writeback_done(struct rpc_task *task, /* Deal with the suid/sgid bit corner case */ if (nfs_should_remove_suid(inode)) { spin_lock(&inode->i_lock); - nfs_set_cache_invalid(inode, NFS_INO_INVALID_OTHER); + nfs_set_cache_invalid(inode, NFS_INO_INVALID_MODE); spin_unlock(&inode->i_lock); } return 0; diff --git a/include/linux/nfs_fs.h b/include/linux/nfs_fs.h index 41165b988dfb..ffba254d2098 100644 --- a/include/linux/nfs_fs.h +++ b/include/linux/nfs_fs.h @@ -247,12 +247,14 @@ struct nfs4_copy_state { #define NFS_INO_INVALID_BLOCKS BIT(14) /* cached blocks are invalid */ #define NFS_INO_INVALID_XATTR BIT(15) /* xattrs are invalid */ #define NFS_INO_INVALID_NLINK BIT(16) /* cached nlinks is invalid */ +#define NFS_INO_INVALID_MODE BIT(17) /* cached mode is invalid */ #define NFS_INO_INVALID_ATTR (NFS_INO_INVALID_CHANGE \ | NFS_INO_INVALID_CTIME \ | NFS_INO_INVALID_MTIME \ | NFS_INO_INVALID_SIZE \ | NFS_INO_INVALID_NLINK \ + | NFS_INO_INVALID_MODE \ | NFS_INO_INVALID_OTHER) /* inode metadata is invalid */ /* -- cgit v1.2.3 From 7f08a3359a3c1e39c2a118fbbe583d8c8db14ace Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Fri, 26 Mar 2021 09:50:19 -0400 Subject: NFSv4: Add support for the NFSv4.2 "change_attr_type" attribute The change_attr_type allows the server to provide a description of how the change attribute will behave. This again will allow the client to optimise its behaviour w.r.t. attribute revalidation. Signed-off-by: Trond Myklebust --- fs/nfs/client.c | 3 +++ fs/nfs/nfs3xdr.c | 1 + fs/nfs/nfs4proc.c | 1 + fs/nfs/nfs4xdr.c | 32 ++++++++++++++++++++++++++++++++ fs/nfs/proc.c | 1 + include/linux/nfs4.h | 9 +++++++++ include/linux/nfs_fs_sb.h | 3 +++ include/linux/nfs_xdr.h | 2 ++ 8 files changed, 52 insertions(+) (limited to 'include') diff --git a/fs/nfs/client.c b/fs/nfs/client.c index 399a8eb15397..2aeb4e52a4f1 100644 --- a/fs/nfs/client.c +++ b/fs/nfs/client.c @@ -792,6 +792,7 @@ static void nfs_server_set_fsinfo(struct nfs_server *server, server->maxfilesize = fsinfo->maxfilesize; server->time_delta = fsinfo->time_delta; + server->change_attr_type = fsinfo->change_attr_type; server->clone_blksize = fsinfo->clone_blksize; /* We're airborne Set socket buffersize */ @@ -933,6 +934,8 @@ struct nfs_server *nfs_alloc_server(void) return NULL; } + server->change_attr_type = NFS4_CHANGE_TYPE_IS_UNDEFINED; + ida_init(&server->openowner_id); ida_init(&server->lockowner_id); pnfs_init_server(server); diff --git a/fs/nfs/nfs3xdr.c b/fs/nfs/nfs3xdr.c index ed1c83738c30..83ad62c81fc7 100644 --- a/fs/nfs/nfs3xdr.c +++ b/fs/nfs/nfs3xdr.c @@ -2227,6 +2227,7 @@ static int decode_fsinfo3resok(struct xdr_stream *xdr, /* ignore properties */ result->lease_time = 0; + result->change_attr_type = NFS4_CHANGE_TYPE_IS_TIME_METADATA; return 0; } diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index bc90f2a12d5d..6992c88a25e7 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -264,6 +264,7 @@ const u32 nfs4_fsinfo_bitmap[3] = { FATTR4_WORD0_MAXFILESIZE | FATTR4_WORD1_FS_LAYOUT_TYPES, FATTR4_WORD2_LAYOUT_BLKSIZE | FATTR4_WORD2_CLONE_BLKSIZE + | FATTR4_WORD2_CHANGE_ATTR_TYPE | FATTR4_WORD2_XATTR_SUPPORT }; diff --git a/fs/nfs/nfs4xdr.c b/fs/nfs/nfs4xdr.c index d8a1911dd39e..edac4718dec1 100644 --- a/fs/nfs/nfs4xdr.c +++ b/fs/nfs/nfs4xdr.c @@ -153,6 +153,7 @@ static int decode_layoutget(struct xdr_stream *xdr, struct rpc_rqst *req, 5 /* fs layout types */ + \ 1 /* layout blksize */ + \ 1 /* clone blksize */ + \ + 1 /* change attr type */ + \ 1 /* xattr support */) #define encode_renew_maxsz (op_encode_hdr_maxsz + 3) #define decode_renew_maxsz (op_decode_hdr_maxsz) @@ -4846,6 +4847,32 @@ static int decode_attr_clone_blksize(struct xdr_stream *xdr, uint32_t *bitmap, return 0; } +static int decode_attr_change_attr_type(struct xdr_stream *xdr, + uint32_t *bitmap, + enum nfs4_change_attr_type *res) +{ + u32 tmp = NFS4_CHANGE_TYPE_IS_UNDEFINED; + + dprintk("%s: bitmap is %x\n", __func__, bitmap[2]); + if (bitmap[2] & FATTR4_WORD2_CHANGE_ATTR_TYPE) { + if (xdr_stream_decode_u32(xdr, &tmp)) + return -EIO; + bitmap[2] &= ~FATTR4_WORD2_CHANGE_ATTR_TYPE; + } + + switch(tmp) { + case NFS4_CHANGE_TYPE_IS_MONOTONIC_INCR: + case NFS4_CHANGE_TYPE_IS_VERSION_COUNTER: + case NFS4_CHANGE_TYPE_IS_VERSION_COUNTER_NOPNFS: + case NFS4_CHANGE_TYPE_IS_TIME_METADATA: + *res = tmp; + break; + default: + *res = NFS4_CHANGE_TYPE_IS_UNDEFINED; + } + return 0; +} + static int decode_fsinfo(struct xdr_stream *xdr, struct nfs_fsinfo *fsinfo) { unsigned int savep; @@ -4894,6 +4921,11 @@ static int decode_fsinfo(struct xdr_stream *xdr, struct nfs_fsinfo *fsinfo) if (status) goto xdr_error; + status = decode_attr_change_attr_type(xdr, bitmap, + &fsinfo->change_attr_type); + if (status) + goto xdr_error; + status = decode_attr_xattrsupport(xdr, bitmap, &fsinfo->xattr_support); if (status) diff --git a/fs/nfs/proc.c b/fs/nfs/proc.c index 73ab7c59d3a7..ea19dbf12301 100644 --- a/fs/nfs/proc.c +++ b/fs/nfs/proc.c @@ -91,6 +91,7 @@ nfs_proc_get_root(struct nfs_server *server, struct nfs_fh *fhandle, info->dtpref = fsinfo.tsize; info->maxfilesize = 0x7FFFFFFF; info->lease_time = 0; + info->change_attr_type = NFS4_CHANGE_TYPE_IS_TIME_METADATA; return 0; } diff --git a/include/linux/nfs4.h b/include/linux/nfs4.h index 5b4c67c91f56..15004c469807 100644 --- a/include/linux/nfs4.h +++ b/include/linux/nfs4.h @@ -452,6 +452,7 @@ enum lock_type4 { #define FATTR4_WORD2_LAYOUT_BLKSIZE (1UL << 1) #define FATTR4_WORD2_MDSTHRESHOLD (1UL << 4) #define FATTR4_WORD2_CLONE_BLKSIZE (1UL << 13) +#define FATTR4_WORD2_CHANGE_ATTR_TYPE (1UL << 15) #define FATTR4_WORD2_SECURITY_LABEL (1UL << 16) #define FATTR4_WORD2_MODE_UMASK (1UL << 17) #define FATTR4_WORD2_XATTR_SUPPORT (1UL << 18) @@ -709,6 +710,14 @@ struct nl4_server { } u; }; +enum nfs4_change_attr_type { + NFS4_CHANGE_TYPE_IS_MONOTONIC_INCR = 0, + NFS4_CHANGE_TYPE_IS_VERSION_COUNTER = 1, + NFS4_CHANGE_TYPE_IS_VERSION_COUNTER_NOPNFS = 2, + NFS4_CHANGE_TYPE_IS_TIME_METADATA = 3, + NFS4_CHANGE_TYPE_IS_UNDEFINED = 4, +}; + /* * Options for setxattr. These match the flags for setxattr(2). */ diff --git a/include/linux/nfs_fs_sb.h b/include/linux/nfs_fs_sb.h index 6f76b32a0238..fbcdfd9f7a7f 100644 --- a/include/linux/nfs_fs_sb.h +++ b/include/linux/nfs_fs_sb.h @@ -180,6 +180,9 @@ struct nfs_server { #define NFS_OPTION_FSCACHE 0x00000001 /* - local caching enabled */ #define NFS_OPTION_MIGRATION 0x00000002 /* - NFSv4 migration enabled */ + enum nfs4_change_attr_type + change_attr_type;/* Description of change attribute */ + struct nfs_fsid fsid; __u64 maxfilesize; /* maximum file size */ struct timespec64 time_delta; /* smallest time granularity */ diff --git a/include/linux/nfs_xdr.h b/include/linux/nfs_xdr.h index cc29dee508f7..717ecc87c9e7 100644 --- a/include/linux/nfs_xdr.h +++ b/include/linux/nfs_xdr.h @@ -152,6 +152,8 @@ struct nfs_fsinfo { __u32 layouttype[NFS_MAX_LAYOUT_TYPES]; /* supported pnfs layout driver */ __u32 blksize; /* preferred pnfs io block size */ __u32 clone_blksize; /* granularity of a CLONE operation */ + enum nfs4_change_attr_type + change_attr_type; /* Info about change attr */ __u32 xattr_support; /* User xattrs supported */ }; -- cgit v1.2.3 From ce62b114bbad9346641d16853c528ba01513e1b0 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Mon, 5 Mar 2018 15:01:18 -0500 Subject: NFS: Split attribute support out from the server capabilities There are lots of attributes, and they are crowding out the bit space. Signed-off-by: Trond Myklebust --- fs/nfs/client.c | 15 +++++++++++--- fs/nfs/inode.c | 51 ++++++++++++++++++++++++++++------------------- fs/nfs/nfs4proc.c | 49 +++++++++++++++++++++++---------------------- include/linux/nfs_fs_sb.h | 11 ++-------- 4 files changed, 70 insertions(+), 56 deletions(-) (limited to 'include') diff --git a/fs/nfs/client.c b/fs/nfs/client.c index 2aeb4e52a4f1..cfeaadf56bf0 100644 --- a/fs/nfs/client.c +++ b/fs/nfs/client.c @@ -696,9 +696,18 @@ static int nfs_init_server(struct nfs_server *server, /* Initialise the client representation from the mount data */ server->flags = ctx->flags; server->options = ctx->options; - server->caps |= NFS_CAP_HARDLINKS|NFS_CAP_SYMLINKS|NFS_CAP_FILEID| - NFS_CAP_MODE|NFS_CAP_NLINK|NFS_CAP_OWNER|NFS_CAP_OWNER_GROUP| - NFS_CAP_ATIME|NFS_CAP_CTIME|NFS_CAP_MTIME; + server->caps |= NFS_CAP_HARDLINKS | NFS_CAP_SYMLINKS; + + switch (clp->rpc_ops->version) { + case 2: + server->fattr_valid = NFS_ATTR_FATTR_V2; + break; + case 3: + server->fattr_valid = NFS_ATTR_FATTR_V3; + break; + default: + server->fattr_valid = NFS_ATTR_FATTR_V4; + } if (ctx->rsize) server->rsize = nfs_block_size(ctx->rsize, NULL); diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c index 7fa914e24fc4..6d04ebb4f084 100644 --- a/fs/nfs/inode.c +++ b/fs/nfs/inode.c @@ -438,6 +438,7 @@ nfs_fhget(struct super_block *sb, struct nfs_fh *fh, struct nfs_fattr *fattr, st .fattr = fattr }; struct inode *inode = ERR_PTR(-ENOENT); + u64 fattr_supported = NFS_SB(sb)->fattr_valid; unsigned long hash; nfs_attr_check_mountpoint(sb, fattr); @@ -470,7 +471,7 @@ nfs_fhget(struct super_block *sb, struct nfs_fh *fh, struct nfs_fattr *fattr, st inode->i_mode = fattr->mode; nfsi->cache_validity = 0; if ((fattr->valid & NFS_ATTR_FATTR_MODE) == 0 - && nfs_server_capable(inode, NFS_CAP_MODE)) + && (fattr_supported & NFS_ATTR_FATTR_MODE)) nfs_set_cache_invalid(inode, NFS_INO_INVALID_MODE); /* Why so? Because we want revalidate for devices/FIFOs, and * that's precisely what we have in nfs_file_inode_operations. @@ -516,15 +517,15 @@ nfs_fhget(struct super_block *sb, struct nfs_fh *fh, struct nfs_fattr *fattr, st nfsi->attr_gencount = fattr->gencount; if (fattr->valid & NFS_ATTR_FATTR_ATIME) inode->i_atime = fattr->atime; - else if (nfs_server_capable(inode, NFS_CAP_ATIME)) + else if (fattr_supported & NFS_ATTR_FATTR_ATIME) nfs_set_cache_invalid(inode, NFS_INO_INVALID_ATIME); if (fattr->valid & NFS_ATTR_FATTR_MTIME) inode->i_mtime = fattr->mtime; - else if (nfs_server_capable(inode, NFS_CAP_MTIME)) + else if (fattr_supported & NFS_ATTR_FATTR_MTIME) nfs_set_cache_invalid(inode, NFS_INO_INVALID_MTIME); if (fattr->valid & NFS_ATTR_FATTR_CTIME) inode->i_ctime = fattr->ctime; - else if (nfs_server_capable(inode, NFS_CAP_CTIME)) + else if (fattr_supported & NFS_ATTR_FATTR_CTIME) nfs_set_cache_invalid(inode, NFS_INO_INVALID_CTIME); if (fattr->valid & NFS_ATTR_FATTR_CHANGE) inode_set_iversion_raw(inode, fattr->change_attr); @@ -536,26 +537,30 @@ nfs_fhget(struct super_block *sb, struct nfs_fh *fh, struct nfs_fattr *fattr, st nfs_set_cache_invalid(inode, NFS_INO_INVALID_SIZE); if (fattr->valid & NFS_ATTR_FATTR_NLINK) set_nlink(inode, fattr->nlink); - else if (nfs_server_capable(inode, NFS_CAP_NLINK)) + else if (fattr_supported & NFS_ATTR_FATTR_NLINK) nfs_set_cache_invalid(inode, NFS_INO_INVALID_NLINK); if (fattr->valid & NFS_ATTR_FATTR_OWNER) inode->i_uid = fattr->uid; - else if (nfs_server_capable(inode, NFS_CAP_OWNER)) + else if (fattr_supported & NFS_ATTR_FATTR_OWNER) nfs_set_cache_invalid(inode, NFS_INO_INVALID_OTHER); if (fattr->valid & NFS_ATTR_FATTR_GROUP) inode->i_gid = fattr->gid; - else if (nfs_server_capable(inode, NFS_CAP_OWNER_GROUP)) + else if (fattr_supported & NFS_ATTR_FATTR_GROUP) nfs_set_cache_invalid(inode, NFS_INO_INVALID_OTHER); if (nfs_server_capable(inode, NFS_CAP_XATTR)) nfs_set_cache_invalid(inode, NFS_INO_INVALID_XATTR); if (fattr->valid & NFS_ATTR_FATTR_BLOCKS_USED) inode->i_blocks = fattr->du.nfs2.blocks; - else if (fattr->valid & NFS_ATTR_FATTR_SPACE_USED) { + else if (fattr_supported & NFS_ATTR_FATTR_BLOCKS_USED && + fattr->size != 0) + nfs_set_cache_invalid(inode, NFS_INO_INVALID_BLOCKS); + if (fattr->valid & NFS_ATTR_FATTR_SPACE_USED) { /* * report the blocks in 512byte units */ inode->i_blocks = nfs_calc_block_size(fattr->du.nfs3.used); - } else if (fattr->size != 0) + } else if (fattr_supported & NFS_ATTR_FATTR_SPACE_USED && + fattr->size != 0) nfs_set_cache_invalid(inode, NFS_INO_INVALID_BLOCKS); nfs_setsecurity(inode, fattr, label); @@ -1952,9 +1957,10 @@ EXPORT_SYMBOL_GPL(nfs_post_op_update_inode_force_wcc); */ static int nfs_update_inode(struct inode *inode, struct nfs_fattr *fattr) { - struct nfs_server *server; + struct nfs_server *server = NFS_SERVER(inode); struct nfs_inode *nfsi = NFS_I(inode); loff_t cur_isize, new_isize; + u64 fattr_supported = server->fattr_valid; unsigned long invalid = 0; unsigned long now = jiffies; unsigned long save_cache_validity; @@ -1998,7 +2004,6 @@ static int nfs_update_inode(struct inode *inode, struct nfs_fattr *fattr) goto out_err; } - server = NFS_SERVER(inode); /* Update the fsid? */ if (S_ISDIR(inode->i_mode) && (fattr->valid & NFS_ATTR_FATTR_FSID) && !nfs_fsid_equal(&server->fsid, &fattr->fsid) && @@ -2066,7 +2071,7 @@ static int nfs_update_inode(struct inode *inode, struct nfs_fattr *fattr) if (fattr->valid & NFS_ATTR_FATTR_MTIME) { inode->i_mtime = fattr->mtime; - } else if (server->caps & NFS_CAP_MTIME) { + } else if (fattr_supported & NFS_ATTR_FATTR_MTIME) { nfsi->cache_validity |= save_cache_validity & NFS_INO_INVALID_MTIME; cache_revalidated = false; @@ -2074,7 +2079,7 @@ static int nfs_update_inode(struct inode *inode, struct nfs_fattr *fattr) if (fattr->valid & NFS_ATTR_FATTR_CTIME) { inode->i_ctime = fattr->ctime; - } else if (server->caps & NFS_CAP_CTIME) { + } else if (fattr_supported & NFS_ATTR_FATTR_CTIME) { nfsi->cache_validity |= save_cache_validity & NFS_INO_INVALID_CTIME; cache_revalidated = false; @@ -2114,7 +2119,7 @@ static int nfs_update_inode(struct inode *inode, struct nfs_fattr *fattr) if (fattr->valid & NFS_ATTR_FATTR_ATIME) inode->i_atime = fattr->atime; - else if (server->caps & NFS_CAP_ATIME) { + else if (fattr_supported & NFS_ATTR_FATTR_ATIME) { nfsi->cache_validity |= save_cache_validity & NFS_INO_INVALID_ATIME; cache_revalidated = false; @@ -2129,7 +2134,7 @@ static int nfs_update_inode(struct inode *inode, struct nfs_fattr *fattr) | NFS_INO_INVALID_ACL; attr_changed = true; } - } else if (server->caps & NFS_CAP_MODE) { + } else if (fattr_supported & NFS_ATTR_FATTR_MODE) { nfsi->cache_validity |= save_cache_validity & NFS_INO_INVALID_MODE; cache_revalidated = false; @@ -2142,7 +2147,7 @@ static int nfs_update_inode(struct inode *inode, struct nfs_fattr *fattr) inode->i_uid = fattr->uid; attr_changed = true; } - } else if (server->caps & NFS_CAP_OWNER) { + } else if (fattr_supported & NFS_ATTR_FATTR_OWNER) { nfsi->cache_validity |= save_cache_validity & NFS_INO_INVALID_OTHER; cache_revalidated = false; @@ -2155,7 +2160,7 @@ static int nfs_update_inode(struct inode *inode, struct nfs_fattr *fattr) inode->i_gid = fattr->gid; attr_changed = true; } - } else if (server->caps & NFS_CAP_OWNER_GROUP) { + } else if (fattr_supported & NFS_ATTR_FATTR_GROUP) { nfsi->cache_validity |= save_cache_validity & NFS_INO_INVALID_OTHER; cache_revalidated = false; @@ -2168,7 +2173,7 @@ static int nfs_update_inode(struct inode *inode, struct nfs_fattr *fattr) set_nlink(inode, fattr->nlink); attr_changed = true; } - } else if (server->caps & NFS_CAP_NLINK) { + } else if (fattr_supported & NFS_ATTR_FATTR_NLINK) { nfsi->cache_validity |= save_cache_validity & NFS_INO_INVALID_NLINK; cache_revalidated = false; @@ -2179,9 +2184,15 @@ static int nfs_update_inode(struct inode *inode, struct nfs_fattr *fattr) * report the blocks in 512byte units */ inode->i_blocks = nfs_calc_block_size(fattr->du.nfs3.used); - } else if (fattr->valid & NFS_ATTR_FATTR_BLOCKS_USED) + } else if (fattr_supported & NFS_ATTR_FATTR_SPACE_USED) { + nfsi->cache_validity |= + save_cache_validity & NFS_INO_INVALID_BLOCKS; + cache_revalidated = false; + } + + if (fattr->valid & NFS_ATTR_FATTR_BLOCKS_USED) { inode->i_blocks = fattr->du.nfs2.blocks; - else { + } else if (fattr_supported & NFS_ATTR_FATTR_BLOCKS_USED) { nfsi->cache_validity |= save_cache_validity & NFS_INO_INVALID_BLOCKS; cache_revalidated = false; diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index bcbb057d5529..21c31aebb116 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -3868,12 +3868,9 @@ static int _nfs4_server_capabilities(struct nfs_server *server, struct nfs_fh *f res.attr_bitmask[2] &= FATTR4_WORD2_NFS42_MASK; } memcpy(server->attr_bitmask, res.attr_bitmask, sizeof(server->attr_bitmask)); - server->caps &= ~(NFS_CAP_ACLS|NFS_CAP_HARDLINKS| - NFS_CAP_SYMLINKS|NFS_CAP_FILEID| - NFS_CAP_MODE|NFS_CAP_NLINK|NFS_CAP_OWNER| - NFS_CAP_OWNER_GROUP|NFS_CAP_ATIME| - NFS_CAP_CTIME|NFS_CAP_MTIME| - NFS_CAP_SECURITY_LABEL); + server->caps &= ~(NFS_CAP_ACLS | NFS_CAP_HARDLINKS | + NFS_CAP_SYMLINKS| NFS_CAP_SECURITY_LABEL); + server->fattr_valid = NFS_ATTR_FATTR_V4; if (res.attr_bitmask[0] & FATTR4_WORD0_ACL && res.acl_bitmask & ACL4_SUPPORT_ALLOW_ACL) server->caps |= NFS_CAP_ACLS; @@ -3881,25 +3878,29 @@ static int _nfs4_server_capabilities(struct nfs_server *server, struct nfs_fh *f server->caps |= NFS_CAP_HARDLINKS; if (res.has_symlinks != 0) server->caps |= NFS_CAP_SYMLINKS; - if (res.attr_bitmask[0] & FATTR4_WORD0_FILEID) - server->caps |= NFS_CAP_FILEID; - if (res.attr_bitmask[1] & FATTR4_WORD1_MODE) - server->caps |= NFS_CAP_MODE; - if (res.attr_bitmask[1] & FATTR4_WORD1_NUMLINKS) - server->caps |= NFS_CAP_NLINK; - if (res.attr_bitmask[1] & FATTR4_WORD1_OWNER) - server->caps |= NFS_CAP_OWNER; - if (res.attr_bitmask[1] & FATTR4_WORD1_OWNER_GROUP) - server->caps |= NFS_CAP_OWNER_GROUP; - if (res.attr_bitmask[1] & FATTR4_WORD1_TIME_ACCESS) - server->caps |= NFS_CAP_ATIME; - if (res.attr_bitmask[1] & FATTR4_WORD1_TIME_METADATA) - server->caps |= NFS_CAP_CTIME; - if (res.attr_bitmask[1] & FATTR4_WORD1_TIME_MODIFY) - server->caps |= NFS_CAP_MTIME; + if (!(res.attr_bitmask[0] & FATTR4_WORD0_FILEID)) + server->fattr_valid &= ~NFS_ATTR_FATTR_FILEID; + if (!(res.attr_bitmask[1] & FATTR4_WORD1_MODE)) + server->fattr_valid &= ~NFS_ATTR_FATTR_MODE; + if (!(res.attr_bitmask[1] & FATTR4_WORD1_NUMLINKS)) + server->fattr_valid &= ~NFS_ATTR_FATTR_NLINK; + if (!(res.attr_bitmask[1] & FATTR4_WORD1_OWNER)) + server->fattr_valid &= ~(NFS_ATTR_FATTR_OWNER | + NFS_ATTR_FATTR_OWNER_NAME); + if (!(res.attr_bitmask[1] & FATTR4_WORD1_OWNER_GROUP)) + server->fattr_valid &= ~(NFS_ATTR_FATTR_GROUP | + NFS_ATTR_FATTR_GROUP_NAME); + if (!(res.attr_bitmask[1] & FATTR4_WORD1_SPACE_USED)) + server->fattr_valid &= ~NFS_ATTR_FATTR_SPACE_USED; + if (!(res.attr_bitmask[1] & FATTR4_WORD1_TIME_ACCESS)) + server->fattr_valid &= ~NFS_ATTR_FATTR_ATIME; + if (!(res.attr_bitmask[1] & FATTR4_WORD1_TIME_METADATA)) + server->fattr_valid &= ~NFS_ATTR_FATTR_CTIME; + if (!(res.attr_bitmask[1] & FATTR4_WORD1_TIME_MODIFY)) + server->fattr_valid &= ~NFS_ATTR_FATTR_MTIME; #ifdef CONFIG_NFS_V4_SECURITY_LABEL - if (res.attr_bitmask[2] & FATTR4_WORD2_SECURITY_LABEL) - server->caps |= NFS_CAP_SECURITY_LABEL; + if (!(res.attr_bitmask[2] & FATTR4_WORD2_SECURITY_LABEL)) + server->fattr_valid &= ~NFS_ATTR_FATTR_V4_SECURITY_LABEL; #endif memcpy(server->attr_bitmask_nl, res.attr_bitmask, sizeof(server->attr_bitmask)); diff --git a/include/linux/nfs_fs_sb.h b/include/linux/nfs_fs_sb.h index fbcdfd9f7a7f..d28d7a62864f 100644 --- a/include/linux/nfs_fs_sb.h +++ b/include/linux/nfs_fs_sb.h @@ -191,6 +191,8 @@ struct nfs_server { dev_t s_dev; /* superblock dev numbers */ struct nfs_auth_info auth_info; /* parsed auth flavors */ + __u64 fattr_valid; /* Valid attributes */ + #ifdef CONFIG_NFS_FSCACHE struct nfs_fscache_key *fscache_key; /* unique key for superblock */ struct fscache_cookie *fscache; /* superblock cookie */ @@ -267,16 +269,7 @@ struct nfs_server { #define NFS_CAP_SYMLINKS (1U << 2) #define NFS_CAP_ACLS (1U << 3) #define NFS_CAP_ATOMIC_OPEN (1U << 4) -/* #define NFS_CAP_CHANGE_ATTR (1U << 5) */ #define NFS_CAP_LGOPEN (1U << 5) -#define NFS_CAP_FILEID (1U << 6) -#define NFS_CAP_MODE (1U << 7) -#define NFS_CAP_NLINK (1U << 8) -#define NFS_CAP_OWNER (1U << 9) -#define NFS_CAP_OWNER_GROUP (1U << 10) -#define NFS_CAP_ATIME (1U << 11) -#define NFS_CAP_CTIME (1U << 12) -#define NFS_CAP_MTIME (1U << 13) #define NFS_CAP_POSIX_LOCK (1U << 14) #define NFS_CAP_UIDGID_NOMAP (1U << 15) #define NFS_CAP_STATEID_NFSV41 (1U << 16) -- cgit v1.2.3 From e936a5970ef596ff48fca72aa8200955753c543f Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Wed, 31 Mar 2021 13:22:27 -0400 Subject: SUNRPC: Add tracepoint that fires when an RPC is retransmitted A separate tracepoint can be left enabled all the time to capture rare but important retransmission events. So for example: kworker/u26:3-568 [009] 156.967933: xprt_retransmit: task:44093@5 xid=0xa25dbc79 nfsv3 WRITE ntrans=2 Or, for example, enable all nfs and nfs4 tracepoints, and set up a trigger to disable tracing when xprt_retransmit fires to capture everything that leads up to it. Signed-off-by: Chuck Lever Signed-off-by: Trond Myklebust --- include/trace/events/sunrpc.h | 40 ++++++++++++++++++++++++++++++++++++++++ net/sunrpc/xprt.c | 4 +++- 2 files changed, 43 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/trace/events/sunrpc.h b/include/trace/events/sunrpc.h index 036eb1f5c133..430b42f2351f 100644 --- a/include/trace/events/sunrpc.h +++ b/include/trace/events/sunrpc.h @@ -1079,6 +1079,46 @@ TRACE_EVENT(xprt_transmit, __entry->seqno, __entry->status) ); +TRACE_EVENT(xprt_retransmit, + TP_PROTO( + const struct rpc_rqst *rqst + ), + + TP_ARGS(rqst), + + TP_STRUCT__entry( + __field(unsigned int, task_id) + __field(unsigned int, client_id) + __field(u32, xid) + __field(int, ntrans) + __field(int, version) + __string(progname, + rqst->rq_task->tk_client->cl_program->name) + __string(procedure, + rqst->rq_task->tk_msg.rpc_proc->p_name) + ), + + TP_fast_assign( + struct rpc_task *task = rqst->rq_task; + + __entry->task_id = task->tk_pid; + __entry->client_id = task->tk_client ? + task->tk_client->cl_clid : -1; + __entry->xid = be32_to_cpu(rqst->rq_xid); + __entry->ntrans = rqst->rq_ntrans; + __assign_str(progname, + task->tk_client->cl_program->name) + __entry->version = task->tk_client->cl_vers; + __assign_str(procedure, task->tk_msg.rpc_proc->p_name) + ), + + TP_printk( + "task:%u@%u xid=0x%08x %sv%d %s ntrans=%d", + __entry->task_id, __entry->client_id, __entry->xid, + __get_str(progname), __entry->version, __get_str(procedure), + __entry->ntrans) +); + TRACE_EVENT(xprt_ping, TP_PROTO(const struct rpc_xprt *xprt, int status), diff --git a/net/sunrpc/xprt.c b/net/sunrpc/xprt.c index 80c94ead4aa0..67039ee443eb 100644 --- a/net/sunrpc/xprt.c +++ b/net/sunrpc/xprt.c @@ -1542,8 +1542,10 @@ xprt_request_transmit(struct rpc_rqst *req, struct rpc_task *snd_task) return status; } - if (is_retrans) + if (is_retrans) { task->tk_client->cl_stats->rpcretrans++; + trace_xprt_retransmit(req); + } xprt_inject_disconnect(xprt); -- cgit v1.2.3 From 6cf23783f750634e10daeede48b0f5f5d64ebf3a Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Wed, 31 Mar 2021 16:03:08 -0400 Subject: SUNRPC: Remove trace_xprt_transmit_queued This tracepoint can crash when dereferencing snd_task because when some transports connect, they put a cookie in that field instead of a pointer to an rpc_task. BUG: KASAN: use-after-free in trace_event_raw_event_xprt_writelock_event+0x141/0x18e [sunrpc] Read of size 2 at addr ffff8881a83bd3a0 by task git/331872 CPU: 11 PID: 331872 Comm: git Tainted: G S 5.12.0-rc2-00007-g3ab6e585a7f9 #1453 Hardware name: Supermicro SYS-6028R-T/X10DRi, BIOS 1.1a 10/16/2015 Call Trace: dump_stack+0x9c/0xcf print_address_description.constprop.0+0x18/0x239 kasan_report+0x174/0x1b0 trace_event_raw_event_xprt_writelock_event+0x141/0x18e [sunrpc] xprt_prepare_transmit+0x8e/0xc1 [sunrpc] call_transmit+0x4d/0xc6 [sunrpc] Fixes: 9ce07ae5eb1d ("SUNRPC: Replace dprintk() call site in xprt_prepare_transmit") Signed-off-by: Chuck Lever Signed-off-by: Trond Myklebust --- include/trace/events/sunrpc.h | 1 - net/sunrpc/xprt.c | 2 -- 2 files changed, 3 deletions(-) (limited to 'include') diff --git a/include/trace/events/sunrpc.h b/include/trace/events/sunrpc.h index 430b42f2351f..3c3c4c843c62 100644 --- a/include/trace/events/sunrpc.h +++ b/include/trace/events/sunrpc.h @@ -1181,7 +1181,6 @@ DECLARE_EVENT_CLASS(xprt_writelock_event, DEFINE_WRITELOCK_EVENT(reserve_xprt); DEFINE_WRITELOCK_EVENT(release_xprt); -DEFINE_WRITELOCK_EVENT(transmit_queued); DECLARE_EVENT_CLASS(xprt_cong_event, TP_PROTO( diff --git a/net/sunrpc/xprt.c b/net/sunrpc/xprt.c index 67039ee443eb..baba7d14825a 100644 --- a/net/sunrpc/xprt.c +++ b/net/sunrpc/xprt.c @@ -1471,8 +1471,6 @@ bool xprt_prepare_transmit(struct rpc_task *task) struct rpc_xprt *xprt = req->rq_xprt; if (!xprt_lock_write(xprt, task)) { - trace_xprt_transmit_queued(xprt, task); - /* Race breaker: someone may have transmitted us */ if (!test_bit(RPC_TASK_NEED_XMIT, &task->tk_runstate)) rpc_wake_up_queued_task_set_status(&xprt->sending, -- cgit v1.2.3 From d99f2487e1de23a2e902d1a359a85a48bfd21fe7 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Wed, 21 Apr 2021 07:48:43 -0400 Subject: NFS: The 'fattr_valid' field in struct nfs_server should be unsigned int Fix up a static compiler warning: "fs/nfs/nfs4proc.c:3882 _nfs4_server_capabilities() warn: was expecting a 64 bit value instead of '(1 << 11)'" The fix is to convert the fattr_valid field to match the type of the 'valid' field in struct nfs_fattr. Reported-by: Dan Carpenter Signed-off-by: Trond Myklebust --- include/linux/nfs_fs_sb.h | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/nfs_fs_sb.h b/include/linux/nfs_fs_sb.h index d28d7a62864f..70057b2e606e 100644 --- a/include/linux/nfs_fs_sb.h +++ b/include/linux/nfs_fs_sb.h @@ -156,6 +156,7 @@ struct nfs_server { #define NFS_MOUNT_WRITE_EAGER 0x01000000 #define NFS_MOUNT_WRITE_WAIT 0x02000000 + unsigned int fattr_valid; /* Valid attributes */ unsigned int caps; /* server capabilities */ unsigned int rsize; /* read size */ unsigned int rpages; /* read size (in pages) */ @@ -191,8 +192,6 @@ struct nfs_server { dev_t s_dev; /* superblock dev numbers */ struct nfs_auth_info auth_info; /* parsed auth flavors */ - __u64 fattr_valid; /* Valid attributes */ - #ifdef CONFIG_NFS_FSCACHE struct nfs_fscache_key *fscache_key; /* unique key for superblock */ struct fscache_cookie *fscache; /* superblock cookie */ -- cgit v1.2.3 From e4b52ca01315ad53df41877708428c1c41c1444d Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Mon, 19 Apr 2021 14:03:12 -0400 Subject: xprtrdma: Do not recycle MR after FastReg/LocalInv flushes Better not to touch MRs involved in a flush or post error until the Send and Receive Queues are drained and the transport is fully quiescent. Simply don't insert such MRs back onto the free list. They remain on mr_all and will be released when the connection is torn down. I had thought that recycling would prevent hardware resources from being tied up for a long time. However, since v5.7, a transport disconnect destroys the QP and other hardware-owned resources. The MRs get cleaned up nicely at that point. Signed-off-by: Chuck Lever Signed-off-by: Trond Myklebust --- include/trace/events/rpcrdma.h | 1 - net/sunrpc/xprtrdma/frwr_ops.c | 69 +++++++++++------------------------------- 2 files changed, 17 insertions(+), 53 deletions(-) (limited to 'include') diff --git a/include/trace/events/rpcrdma.h b/include/trace/events/rpcrdma.h index c838e7ac1c2d..e38e745d13b0 100644 --- a/include/trace/events/rpcrdma.h +++ b/include/trace/events/rpcrdma.h @@ -1014,7 +1014,6 @@ DEFINE_MR_EVENT(localinv); DEFINE_MR_EVENT(map); DEFINE_ANON_MR_EVENT(unmap); -DEFINE_ANON_MR_EVENT(recycle); TRACE_EVENT(xprtrdma_dma_maperr, TP_PROTO( diff --git a/net/sunrpc/xprtrdma/frwr_ops.c b/net/sunrpc/xprtrdma/frwr_ops.c index af85cec0ce31..27087dc8ba3c 100644 --- a/net/sunrpc/xprtrdma/frwr_ops.c +++ b/net/sunrpc/xprtrdma/frwr_ops.c @@ -49,6 +49,16 @@ # define RPCDBG_FACILITY RPCDBG_TRANS #endif +static void frwr_mr_unmap(struct rpcrdma_xprt *r_xprt, struct rpcrdma_mr *mr) +{ + if (mr->mr_device) { + trace_xprtrdma_mr_unmap(mr); + ib_dma_unmap_sg(mr->mr_device, mr->mr_sg, mr->mr_nents, + mr->mr_dir); + mr->mr_device = NULL; + } +} + /** * frwr_mr_release - Destroy one MR * @mr: MR allocated by frwr_mr_init @@ -58,6 +68,8 @@ void frwr_mr_release(struct rpcrdma_mr *mr) { int rc; + frwr_mr_unmap(mr->mr_xprt, mr); + rc = ib_dereg_mr(mr->frwr.fr_mr); if (rc) trace_xprtrdma_frwr_dereg(mr, rc); @@ -65,32 +77,6 @@ void frwr_mr_release(struct rpcrdma_mr *mr) kfree(mr); } -static void frwr_mr_unmap(struct rpcrdma_xprt *r_xprt, struct rpcrdma_mr *mr) -{ - if (mr->mr_device) { - trace_xprtrdma_mr_unmap(mr); - ib_dma_unmap_sg(mr->mr_device, mr->mr_sg, mr->mr_nents, - mr->mr_dir); - mr->mr_device = NULL; - } -} - -static void frwr_mr_recycle(struct rpcrdma_mr *mr) -{ - struct rpcrdma_xprt *r_xprt = mr->mr_xprt; - - trace_xprtrdma_mr_recycle(mr); - - frwr_mr_unmap(r_xprt, mr); - - spin_lock(&r_xprt->rx_buf.rb_lock); - list_del(&mr->mr_all); - r_xprt->rx_stats.mrs_recycled++; - spin_unlock(&r_xprt->rx_buf.rb_lock); - - frwr_mr_release(mr); -} - static void frwr_mr_put(struct rpcrdma_mr *mr) { frwr_mr_unmap(mr->mr_xprt, mr); @@ -365,6 +351,7 @@ out_mapmr_err: * @cq: completion queue * @wc: WCE for a completed FastReg WR * + * Each flushed MR gets destroyed after the QP has drained. */ static void frwr_wc_fastreg(struct ib_cq *cq, struct ib_wc *wc) { @@ -374,7 +361,6 @@ static void frwr_wc_fastreg(struct ib_cq *cq, struct ib_wc *wc) /* WARNING: Only wr_cqe and status are reliable at this point */ trace_xprtrdma_wc_fastreg(wc, &frwr->fr_cid); - /* The MR will get recycled when the associated req is retransmitted */ rpcrdma_flush_disconnect(cq->cq_context, wc); } @@ -448,9 +434,7 @@ void frwr_reminv(struct rpcrdma_rep *rep, struct list_head *mrs) static void frwr_mr_done(struct ib_wc *wc, struct rpcrdma_mr *mr) { - if (wc->status != IB_WC_SUCCESS) - frwr_mr_recycle(mr); - else + if (likely(wc->status == IB_WC_SUCCESS)) frwr_mr_put(mr); } @@ -567,17 +551,8 @@ void frwr_unmap_sync(struct rpcrdma_xprt *r_xprt, struct rpcrdma_req *req) if (!rc) return; - /* Recycle MRs in the LOCAL_INV chain that did not get posted. - */ + /* On error, the MRs get destroyed once the QP has drained. */ trace_xprtrdma_post_linv_err(req, rc); - while (bad_wr) { - frwr = container_of(bad_wr, struct rpcrdma_frwr, - fr_invwr); - mr = container_of(frwr, struct rpcrdma_mr, frwr); - bad_wr = bad_wr->next; - - frwr_mr_recycle(mr); - } } /** @@ -621,7 +596,6 @@ void frwr_unmap_async(struct rpcrdma_xprt *r_xprt, struct rpcrdma_req *req) { struct ib_send_wr *first, *last, **prev; struct rpcrdma_ep *ep = r_xprt->rx_ep; - const struct ib_send_wr *bad_wr; struct rpcrdma_frwr *frwr; struct rpcrdma_mr *mr; int rc; @@ -663,21 +637,12 @@ void frwr_unmap_async(struct rpcrdma_xprt *r_xprt, struct rpcrdma_req *req) * replaces the QP. The RPC reply handler won't call us * unless re_id->qp is a valid pointer. */ - bad_wr = NULL; - rc = ib_post_send(ep->re_id->qp, first, &bad_wr); + rc = ib_post_send(ep->re_id->qp, first, NULL); if (!rc) return; - /* Recycle MRs in the LOCAL_INV chain that did not get posted. - */ + /* On error, the MRs get destroyed once the QP has drained. */ trace_xprtrdma_post_linv_err(req, rc); - while (bad_wr) { - frwr = container_of(bad_wr, struct rpcrdma_frwr, fr_invwr); - mr = container_of(frwr, struct rpcrdma_mr, frwr); - bad_wr = bad_wr->next; - - frwr_mr_recycle(mr); - } /* The final LOCAL_INV WR in the chain is supposed to * do the wake. If it was never posted, the wake will -- cgit v1.2.3 From 4ddd0fc32c94fbb77a8c0728dc507b2bdcc67edc Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Mon, 19 Apr 2021 14:03:31 -0400 Subject: xprtrdma: Add tracepoints showing FastReg WRs and remote invalidation The Send signaling logic is a little subtle, so add some observability around it. For every xprtrdma_mr_fastreg event, there should be an xprtrdma_mr_localinv or xprtrdma_mr_reminv event. When these tracepoints are enabled, we can see exactly when an MR is DMA-mapped, registered, invalidated (either locally or remotely) and then DMA-unmapped. kworker/u25:2-190 [000] 787.979512: xprtrdma_mr_map: task:351@5 mr.id=4 nents=2 5608@0x8679e0c8f6f56000:0x00000503 (TO_DEVICE) kworker/u25:2-190 [000] 787.979515: xprtrdma_chunk_read: task:351@5 pos=148 5608@0x8679e0c8f6f56000:0x00000503 (last) kworker/u25:2-190 [000] 787.979519: xprtrdma_marshal: task:351@5 xid=0x8679e0c8: hdr=52 xdr=148/5608/0 read list/inline kworker/u25:2-190 [000] 787.979525: xprtrdma_mr_fastreg: task:351@5 mr.id=4 nents=2 5608@0x8679e0c8f6f56000:0x00000503 (TO_DEVICE) kworker/u25:2-190 [000] 787.979526: xprtrdma_post_send: task:351@5 cq.id=0 cid=73 (2 SGEs) ... kworker/5:1H-219 [005] 787.980567: xprtrdma_wc_receive: cq.id=1 cid=161 status=SUCCESS (0/0x0) received=164 kworker/5:1H-219 [005] 787.980571: xprtrdma_post_recvs: peer=[192.168.100.55]:20049 r_xprt=0xffff8884974d4000: 0 new recvs, 70 active (rc 0) kworker/5:1H-219 [005] 787.980573: xprtrdma_reply: task:351@5 xid=0x8679e0c8 credits=64 kworker/5:1H-219 [005] 787.980576: xprtrdma_mr_reminv: task:351@5 mr.id=4 nents=2 5608@0x8679e0c8f6f56000:0x00000503 (TO_DEVICE) kworker/5:1H-219 [005] 787.980577: xprtrdma_mr_unmap: mr.id=4 nents=2 5608@0x8679e0c8f6f56000:0x00000503 (TO_DEVICE) Note that I've moved the xprtrdma_post_send tracepoint so that event always appears after the xprtrdma_mr_fastreg tracepoint. Otherwise the event log looks counterintuitive (FastReg is always supposed to happen before Send). Signed-off-by: Chuck Lever Signed-off-by: Trond Myklebust --- include/trace/events/rpcrdma.h | 2 ++ net/sunrpc/xprtrdma/frwr_ops.c | 2 ++ 2 files changed, 4 insertions(+) (limited to 'include') diff --git a/include/trace/events/rpcrdma.h b/include/trace/events/rpcrdma.h index e38e745d13b0..9462326b3535 100644 --- a/include/trace/events/rpcrdma.h +++ b/include/trace/events/rpcrdma.h @@ -1010,7 +1010,9 @@ TRACE_EVENT(xprtrdma_frwr_maperr, ) ); +DEFINE_MR_EVENT(fastreg); DEFINE_MR_EVENT(localinv); +DEFINE_MR_EVENT(reminv); DEFINE_MR_EVENT(map); DEFINE_ANON_MR_EVENT(unmap); diff --git a/net/sunrpc/xprtrdma/frwr_ops.c b/net/sunrpc/xprtrdma/frwr_ops.c index 43a412ea337a..0f47c1e24037 100644 --- a/net/sunrpc/xprtrdma/frwr_ops.c +++ b/net/sunrpc/xprtrdma/frwr_ops.c @@ -400,6 +400,7 @@ int frwr_send(struct rpcrdma_xprt *r_xprt, struct rpcrdma_req *req) list_for_each_entry(mr, &req->rl_registered, mr_list) { struct rpcrdma_frwr *frwr; + trace_xprtrdma_mr_fastreg(mr); frwr = &mr->frwr; frwr->fr_cqe.done = frwr_wc_fastreg; @@ -440,6 +441,7 @@ void frwr_reminv(struct rpcrdma_rep *rep, struct list_head *mrs) list_for_each_entry(mr, mrs, mr_list) if (mr->mr_handle == rep->rr_inv_rkey) { list_del_init(&mr->mr_list); + trace_xprtrdma_mr_reminv(mr); frwr_mr_put(mr); break; /* only one invalidated MR per RPC */ } -- cgit v1.2.3 From 6b147ea7f442e1fb31dfa25e25b7a8ca3fb817f0 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Mon, 19 Apr 2021 14:03:38 -0400 Subject: xprtrdma: Add an rpcrdma_mr_completion_class I found it confusing that the MR_EVENT class displays the mr.id but the associated COMPLETION_EVENT class displays a cid (that happens to contain the mr.id!). To make it a little easier on humans who have to read and interpret these events, create an MR_COMPLETION class that displays the mr.id in the same way as the MR_EVENT class. Signed-off-by: Chuck Lever Signed-off-by: Trond Myklebust --- include/trace/events/rpcrdma.h | 48 ++++++++++++++++++++++++++++++++++++++---- 1 file changed, 44 insertions(+), 4 deletions(-) (limited to 'include') diff --git a/include/trace/events/rpcrdma.h b/include/trace/events/rpcrdma.h index 9462326b3535..3e6e4c69b533 100644 --- a/include/trace/events/rpcrdma.h +++ b/include/trace/events/rpcrdma.h @@ -60,6 +60,46 @@ DECLARE_EVENT_CLASS(rpcrdma_completion_class, ), \ TP_ARGS(wc, cid)) +DECLARE_EVENT_CLASS(rpcrdma_mr_completion_class, + TP_PROTO( + const struct ib_wc *wc, + const struct rpc_rdma_cid *cid + ), + + TP_ARGS(wc, cid), + + TP_STRUCT__entry( + __field(u32, cq_id) + __field(int, completion_id) + __field(unsigned long, status) + __field(unsigned int, vendor_err) + ), + + TP_fast_assign( + __entry->cq_id = cid->ci_queue_id; + __entry->completion_id = cid->ci_completion_id; + __entry->status = wc->status; + if (wc->status) + __entry->vendor_err = wc->vendor_err; + else + __entry->vendor_err = 0; + ), + + TP_printk("cq.id=%u mr.id=%d status=%s (%lu/0x%x)", + __entry->cq_id, __entry->completion_id, + rdma_show_wc_status(__entry->status), + __entry->status, __entry->vendor_err + ) +); + +#define DEFINE_MR_COMPLETION_EVENT(name) \ + DEFINE_EVENT(rpcrdma_mr_completion_class, name, \ + TP_PROTO( \ + const struct ib_wc *wc, \ + const struct rpc_rdma_cid *cid \ + ), \ + TP_ARGS(wc, cid)) + DECLARE_EVENT_CLASS(rpcrdma_receive_completion_class, TP_PROTO( const struct ib_wc *wc, @@ -886,10 +926,10 @@ TRACE_EVENT(xprtrdma_post_linv_err, DEFINE_RECEIVE_COMPLETION_EVENT(xprtrdma_wc_receive); DEFINE_COMPLETION_EVENT(xprtrdma_wc_send); -DEFINE_COMPLETION_EVENT(xprtrdma_wc_fastreg); -DEFINE_COMPLETION_EVENT(xprtrdma_wc_li); -DEFINE_COMPLETION_EVENT(xprtrdma_wc_li_wake); -DEFINE_COMPLETION_EVENT(xprtrdma_wc_li_done); +DEFINE_MR_COMPLETION_EVENT(xprtrdma_wc_fastreg); +DEFINE_MR_COMPLETION_EVENT(xprtrdma_wc_li); +DEFINE_MR_COMPLETION_EVENT(xprtrdma_wc_li_wake); +DEFINE_MR_COMPLETION_EVENT(xprtrdma_wc_li_done); TRACE_EVENT(xprtrdma_frwr_alloc, TP_PROTO( -- cgit v1.2.3 From 83189d15115467061295c0b75334b39fc64c6142 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Mon, 19 Apr 2021 14:03:44 -0400 Subject: xprtrdma: Don't display r_xprt memory addresses in tracepoints The remote peer's IP address is sufficient, and does not expose details of the kernel's memory layout. Signed-off-by: Chuck Lever Signed-off-by: Trond Myklebust --- include/trace/events/rpcrdma.h | 51 ++++++++++++++++-------------------------- 1 file changed, 19 insertions(+), 32 deletions(-) (limited to 'include') diff --git a/include/trace/events/rpcrdma.h b/include/trace/events/rpcrdma.h index 3e6e4c69b533..e38b8e33be2d 100644 --- a/include/trace/events/rpcrdma.h +++ b/include/trace/events/rpcrdma.h @@ -190,19 +190,17 @@ DECLARE_EVENT_CLASS(xprtrdma_rxprt, TP_ARGS(r_xprt), TP_STRUCT__entry( - __field(const void *, r_xprt) __string(addr, rpcrdma_addrstr(r_xprt)) __string(port, rpcrdma_portstr(r_xprt)) ), TP_fast_assign( - __entry->r_xprt = r_xprt; __assign_str(addr, rpcrdma_addrstr(r_xprt)); __assign_str(port, rpcrdma_portstr(r_xprt)); ), - TP_printk("peer=[%s]:%s r_xprt=%p", - __get_str(addr), __get_str(port), __entry->r_xprt + TP_printk("peer=[%s]:%s", + __get_str(addr), __get_str(port) ) ); @@ -222,7 +220,6 @@ DECLARE_EVENT_CLASS(xprtrdma_connect_class, TP_ARGS(r_xprt, rc), TP_STRUCT__entry( - __field(const void *, r_xprt) __field(int, rc) __field(int, connect_status) __string(addr, rpcrdma_addrstr(r_xprt)) @@ -230,15 +227,14 @@ DECLARE_EVENT_CLASS(xprtrdma_connect_class, ), TP_fast_assign( - __entry->r_xprt = r_xprt; __entry->rc = rc; __entry->connect_status = r_xprt->rx_ep->re_connect_status; __assign_str(addr, rpcrdma_addrstr(r_xprt)); __assign_str(port, rpcrdma_portstr(r_xprt)); ), - TP_printk("peer=[%s]:%s r_xprt=%p: rc=%d connection status=%d", - __get_str(addr), __get_str(port), __entry->r_xprt, + TP_printk("peer=[%s]:%s rc=%d connection status=%d", + __get_str(addr), __get_str(port), __entry->rc, __entry->connect_status ) ); @@ -535,22 +531,19 @@ TRACE_EVENT(xprtrdma_op_connect, TP_ARGS(r_xprt, delay), TP_STRUCT__entry( - __field(const void *, r_xprt) __field(unsigned long, delay) __string(addr, rpcrdma_addrstr(r_xprt)) __string(port, rpcrdma_portstr(r_xprt)) ), TP_fast_assign( - __entry->r_xprt = r_xprt; __entry->delay = delay; __assign_str(addr, rpcrdma_addrstr(r_xprt)); __assign_str(port, rpcrdma_portstr(r_xprt)); ), - TP_printk("peer=[%s]:%s r_xprt=%p delay=%lu", - __get_str(addr), __get_str(port), __entry->r_xprt, - __entry->delay + TP_printk("peer=[%s]:%s delay=%lu", + __get_str(addr), __get_str(port), __entry->delay ) ); @@ -565,7 +558,6 @@ TRACE_EVENT(xprtrdma_op_set_cto, TP_ARGS(r_xprt, connect, reconnect), TP_STRUCT__entry( - __field(const void *, r_xprt) __field(unsigned long, connect) __field(unsigned long, reconnect) __string(addr, rpcrdma_addrstr(r_xprt)) @@ -573,15 +565,14 @@ TRACE_EVENT(xprtrdma_op_set_cto, ), TP_fast_assign( - __entry->r_xprt = r_xprt; __entry->connect = connect; __entry->reconnect = reconnect; __assign_str(addr, rpcrdma_addrstr(r_xprt)); __assign_str(port, rpcrdma_portstr(r_xprt)); ), - TP_printk("peer=[%s]:%s r_xprt=%p: connect=%lu reconnect=%lu", - __get_str(addr), __get_str(port), __entry->r_xprt, + TP_printk("peer=[%s]:%s connect=%lu reconnect=%lu", + __get_str(addr), __get_str(port), __entry->connect / HZ, __entry->reconnect / HZ ) ); @@ -631,22 +622,19 @@ TRACE_EVENT(xprtrdma_createmrs, TP_ARGS(r_xprt, count), TP_STRUCT__entry( - __field(const void *, r_xprt) __string(addr, rpcrdma_addrstr(r_xprt)) __string(port, rpcrdma_portstr(r_xprt)) __field(unsigned int, count) ), TP_fast_assign( - __entry->r_xprt = r_xprt; __entry->count = count; __assign_str(addr, rpcrdma_addrstr(r_xprt)); __assign_str(port, rpcrdma_portstr(r_xprt)); ), - TP_printk("peer=[%s]:%s r_xprt=%p: created %u MRs", - __get_str(addr), __get_str(port), __entry->r_xprt, - __entry->count + TP_printk("peer=[%s]:%s created %u MRs", + __get_str(addr), __get_str(port), __entry->count ) ); @@ -869,7 +857,7 @@ TRACE_EVENT(xprtrdma_post_recvs, TP_ARGS(r_xprt, count, status), TP_STRUCT__entry( - __field(const void *, r_xprt) + __field(u32, cq_id) __field(unsigned int, count) __field(int, status) __field(int, posted) @@ -878,16 +866,18 @@ TRACE_EVENT(xprtrdma_post_recvs, ), TP_fast_assign( - __entry->r_xprt = r_xprt; + const struct rpcrdma_ep *ep = r_xprt->rx_ep; + + __entry->cq_id = ep->re_attr.recv_cq->res.id; __entry->count = count; __entry->status = status; - __entry->posted = r_xprt->rx_ep->re_receive_count; + __entry->posted = ep->re_receive_count; __assign_str(addr, rpcrdma_addrstr(r_xprt)); __assign_str(port, rpcrdma_portstr(r_xprt)); ), - TP_printk("peer=[%s]:%s r_xprt=%p: %u new recvs, %d active (rc %d)", - __get_str(addr), __get_str(port), __entry->r_xprt, + TP_printk("peer=[%s]:%s cq.id=%d %u new recvs, %d active (rc %d)", + __get_str(addr), __get_str(port), __entry->cq_id, __entry->count, __entry->posted, __entry->status ) ); @@ -1289,22 +1279,19 @@ TRACE_EVENT(xprtrdma_cb_setup, TP_ARGS(r_xprt, reqs), TP_STRUCT__entry( - __field(const void *, r_xprt) __field(unsigned int, reqs) __string(addr, rpcrdma_addrstr(r_xprt)) __string(port, rpcrdma_portstr(r_xprt)) ), TP_fast_assign( - __entry->r_xprt = r_xprt; __entry->reqs = reqs; __assign_str(addr, rpcrdma_addrstr(r_xprt)); __assign_str(port, rpcrdma_portstr(r_xprt)); ), - TP_printk("peer=[%s]:%s r_xprt=%p: %u reqs", - __get_str(addr), __get_str(port), - __entry->r_xprt, __entry->reqs + TP_printk("peer=[%s]:%s %u reqs", + __get_str(addr), __get_str(port), __entry->reqs ) ); -- cgit v1.2.3 From e1648eb23d839bd4b9f2999296d5e81dcd93311f Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Mon, 19 Apr 2021 14:03:50 -0400 Subject: xprtrdma: Remove the RPC/RDMA QP event handler Clean up: The handler only recorded a trace event. If indeed no action is needed by the RPC/RDMA consumer, then the event can be ignored. Signed-off-by: Chuck Lever Signed-off-by: Trond Myklebust --- include/trace/events/rpcrdma.h | 32 -------------------------------- net/sunrpc/xprtrdma/verbs.c | 18 ------------------ 2 files changed, 50 deletions(-) (limited to 'include') diff --git a/include/trace/events/rpcrdma.h b/include/trace/events/rpcrdma.h index e38b8e33be2d..ef6166b840e7 100644 --- a/include/trace/events/rpcrdma.h +++ b/include/trace/events/rpcrdma.h @@ -577,38 +577,6 @@ TRACE_EVENT(xprtrdma_op_set_cto, ) ); -TRACE_EVENT(xprtrdma_qp_event, - TP_PROTO( - const struct rpcrdma_ep *ep, - const struct ib_event *event - ), - - TP_ARGS(ep, event), - - TP_STRUCT__entry( - __field(unsigned long, event) - __string(name, event->device->name) - __array(unsigned char, srcaddr, sizeof(struct sockaddr_in6)) - __array(unsigned char, dstaddr, sizeof(struct sockaddr_in6)) - ), - - TP_fast_assign( - const struct rdma_cm_id *id = ep->re_id; - - __entry->event = event->event; - __assign_str(name, event->device->name); - memcpy(__entry->srcaddr, &id->route.addr.src_addr, - sizeof(struct sockaddr_in6)); - memcpy(__entry->dstaddr, &id->route.addr.dst_addr, - sizeof(struct sockaddr_in6)); - ), - - TP_printk("%pISpc -> %pISpc device=%s %s (%lu)", - __entry->srcaddr, __entry->dstaddr, __get_str(name), - rdma_show_ib_event(__entry->event), __entry->event - ) -); - /** ** Call events **/ diff --git a/net/sunrpc/xprtrdma/verbs.c b/net/sunrpc/xprtrdma/verbs.c index 72b24dca96c1..1e965a380896 100644 --- a/net/sunrpc/xprtrdma/verbs.c +++ b/net/sunrpc/xprtrdma/verbs.c @@ -120,22 +120,6 @@ static void rpcrdma_xprt_drain(struct rpcrdma_xprt *r_xprt) rpcrdma_ep_put(ep); } -/** - * rpcrdma_qp_event_handler - Handle one QP event (error notification) - * @event: details of the event - * @context: ep that owns QP where event occurred - * - * Called from the RDMA provider (device driver) possibly in an interrupt - * context. The QP is always destroyed before the ID, so the ID will be - * reliably available when this handler is invoked. - */ -static void rpcrdma_qp_event_handler(struct ib_event *event, void *context) -{ - struct rpcrdma_ep *ep = context; - - trace_xprtrdma_qp_event(ep, event); -} - /* Ensure xprt_force_disconnect() is invoked exactly once when a * connection is closed or lost. (The important thing is it needs * to be invoked "at least" once). @@ -431,8 +415,6 @@ static int rpcrdma_ep_create(struct rpcrdma_xprt *r_xprt) r_xprt->rx_buf.rb_max_requests = cpu_to_be32(ep->re_max_requests); - ep->re_attr.event_handler = rpcrdma_qp_event_handler; - ep->re_attr.qp_context = ep; ep->re_attr.srq = NULL; ep->re_attr.cap.max_inline_data = 0; ep->re_attr.sq_sig_type = IB_SIGNAL_REQ_WR; -- cgit v1.2.3 From 13bcf7e32a0181095cd62010579869e87aacb332 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Mon, 19 Apr 2021 14:04:21 -0400 Subject: xprtrdma: Move fr_mr field to struct rpcrdma_mr Clean up: The last remaining field in struct rpcrdma_frwr has been removed, so the struct can be eliminated. Signed-off-by: Chuck Lever Signed-off-by: Trond Myklebust --- include/trace/events/rpcrdma.h | 12 ++++++------ net/sunrpc/xprtrdma/frwr_ops.c | 8 ++++---- net/sunrpc/xprtrdma/xprt_rdma.h | 7 ++----- 3 files changed, 12 insertions(+), 15 deletions(-) (limited to 'include') diff --git a/include/trace/events/rpcrdma.h b/include/trace/events/rpcrdma.h index ef6166b840e7..bd55908c1bef 100644 --- a/include/trace/events/rpcrdma.h +++ b/include/trace/events/rpcrdma.h @@ -379,7 +379,7 @@ DECLARE_EVENT_CLASS(xprtrdma_mr_class, __entry->task_id = task->tk_pid; __entry->client_id = task->tk_client->cl_clid; - __entry->mr_id = mr->frwr.fr_mr->res.id; + __entry->mr_id = mr->mr_ibmr->res.id; __entry->nents = mr->mr_nents; __entry->handle = mr->mr_handle; __entry->length = mr->mr_length; @@ -420,7 +420,7 @@ DECLARE_EVENT_CLASS(xprtrdma_anonymous_mr_class, ), TP_fast_assign( - __entry->mr_id = mr->frwr.fr_mr->res.id; + __entry->mr_id = mr->mr_ibmr->res.id; __entry->nents = mr->mr_nents; __entry->handle = mr->mr_handle; __entry->length = mr->mr_length; @@ -903,7 +903,7 @@ TRACE_EVENT(xprtrdma_frwr_alloc, ), TP_fast_assign( - __entry->mr_id = mr->frwr.fr_mr->res.id; + __entry->mr_id = mr->mr_ibmr->res.id; __entry->rc = rc; ), @@ -931,7 +931,7 @@ TRACE_EVENT(xprtrdma_frwr_dereg, ), TP_fast_assign( - __entry->mr_id = mr->frwr.fr_mr->res.id; + __entry->mr_id = mr->mr_ibmr->res.id; __entry->nents = mr->mr_nents; __entry->handle = mr->mr_handle; __entry->length = mr->mr_length; @@ -964,7 +964,7 @@ TRACE_EVENT(xprtrdma_frwr_sgerr, ), TP_fast_assign( - __entry->mr_id = mr->frwr.fr_mr->res.id; + __entry->mr_id = mr->mr_ibmr->res.id; __entry->addr = mr->mr_sg->dma_address; __entry->dir = mr->mr_dir; __entry->nents = sg_nents; @@ -994,7 +994,7 @@ TRACE_EVENT(xprtrdma_frwr_maperr, ), TP_fast_assign( - __entry->mr_id = mr->frwr.fr_mr->res.id; + __entry->mr_id = mr->mr_ibmr->res.id; __entry->addr = mr->mr_sg->dma_address; __entry->dir = mr->mr_dir; __entry->num_mapped = num_mapped; diff --git a/net/sunrpc/xprtrdma/frwr_ops.c b/net/sunrpc/xprtrdma/frwr_ops.c index bb2b9c607c71..285d73246fc2 100644 --- a/net/sunrpc/xprtrdma/frwr_ops.c +++ b/net/sunrpc/xprtrdma/frwr_ops.c @@ -55,7 +55,7 @@ static void frwr_cid_init(struct rpcrdma_ep *ep, struct rpc_rdma_cid *cid = &mr->mr_cid; cid->ci_queue_id = ep->re_attr.send_cq->res.id; - cid->ci_completion_id = mr->frwr.fr_mr->res.id; + cid->ci_completion_id = mr->mr_ibmr->res.id; } static void frwr_mr_unmap(struct rpcrdma_xprt *r_xprt, struct rpcrdma_mr *mr) @@ -79,7 +79,7 @@ void frwr_mr_release(struct rpcrdma_mr *mr) frwr_mr_unmap(mr->mr_xprt, mr); - rc = ib_dereg_mr(mr->frwr.fr_mr); + rc = ib_dereg_mr(mr->mr_ibmr); if (rc) trace_xprtrdma_frwr_dereg(mr, rc); kfree(mr->mr_sg); @@ -139,7 +139,7 @@ int frwr_mr_init(struct rpcrdma_xprt *r_xprt, struct rpcrdma_mr *mr) goto out_list_err; mr->mr_xprt = r_xprt; - mr->frwr.fr_mr = frmr; + mr->mr_ibmr = frmr; mr->mr_device = NULL; INIT_LIST_HEAD(&mr->mr_list); init_completion(&mr->mr_linv_done); @@ -323,7 +323,7 @@ struct rpcrdma_mr_seg *frwr_map(struct rpcrdma_xprt *r_xprt, goto out_dmamap_err; mr->mr_device = ep->re_id->device; - ibmr = mr->frwr.fr_mr; + ibmr = mr->mr_ibmr; n = ib_map_mr_sg(ibmr, mr->mr_sg, dma_nents, NULL, PAGE_SIZE); if (n != dma_nents) goto out_mapmr_err; diff --git a/net/sunrpc/xprtrdma/xprt_rdma.h b/net/sunrpc/xprtrdma/xprt_rdma.h index 99ef83d673d6..436ad7312614 100644 --- a/net/sunrpc/xprtrdma/xprt_rdma.h +++ b/net/sunrpc/xprtrdma/xprt_rdma.h @@ -229,14 +229,12 @@ struct rpcrdma_sendctx { * An external memory region is any buffer or page that is registered * on the fly (ie, not pre-registered). */ -struct rpcrdma_frwr { - struct ib_mr *fr_mr; -}; - struct rpcrdma_req; struct rpcrdma_mr { struct list_head mr_list; struct rpcrdma_req *mr_req; + + struct ib_mr *mr_ibmr; struct ib_device *mr_device; struct scatterlist *mr_sg; int mr_nents; @@ -247,7 +245,6 @@ struct rpcrdma_mr { struct ib_reg_wr mr_regwr; struct ib_send_wr mr_invwr; }; - struct rpcrdma_frwr frwr; struct rpcrdma_xprt *mr_xprt; u32 mr_handle; u32 mr_length; -- cgit v1.2.3