aboutsummaryrefslogtreecommitdiff
path: root/fs
diff options
context:
space:
mode:
Diffstat (limited to 'fs')
-rw-r--r--fs/9p/acl.c8
-rw-r--r--fs/9p/v9fs.h3
-rw-r--r--fs/9p/v9fs_vfs.h3
-rw-r--r--fs/9p/vfs_inode.c57
-rw-r--r--fs/9p/vfs_inode_dotl.c39
-rw-r--r--fs/9p/xattr.c1
-rw-r--r--fs/adfs/adfs.h3
-rw-r--r--fs/adfs/inode.c5
-rw-r--r--fs/affs/affs.h24
-rw-r--r--fs/affs/inode.c7
-rw-r--r--fs/affs/namei.c15
-rw-r--r--fs/afs/dir.c34
-rw-r--r--fs/afs/inode.c9
-rw-r--r--fs/afs/internal.h7
-rw-r--r--fs/afs/security.c3
-rw-r--r--fs/afs/xattr.c2
-rw-r--r--fs/anon_inodes.c157
-rw-r--r--fs/attr.c126
-rw-r--r--fs/autofs/root.c17
-rw-r--r--fs/bad_inode.c36
-rw-r--r--fs/bfs/dir.c12
-rw-r--r--fs/block_dev.c2
-rw-r--r--fs/btrfs/acl.c6
-rw-r--r--fs/btrfs/ctree.h3
-rw-r--r--fs/btrfs/file.c2
-rw-r--r--fs/btrfs/inode.c46
-rw-r--r--fs/btrfs/ioctl.c27
-rw-r--r--fs/btrfs/tests/btrfs-tests.c2
-rw-r--r--fs/btrfs/xattr.c2
-rw-r--r--fs/buffer.c7
-rw-r--r--fs/cachefiles/interface.c4
-rw-r--r--fs/cachefiles/namei.c21
-rw-r--r--fs/cachefiles/xattr.c29
-rw-r--r--fs/ceph/acl.c6
-rw-r--r--fs/ceph/addr.c2
-rw-r--r--fs/ceph/caps.c70
-rw-r--r--fs/ceph/dir.c23
-rw-r--r--fs/ceph/inode.c79
-rw-r--r--fs/ceph/snap.c10
-rw-r--r--fs/ceph/super.h52
-rw-r--r--fs/ceph/xattr.c1
-rw-r--r--fs/cifs/cifs_debug.c125
-rw-r--r--fs/cifs/cifs_swn.c2
-rw-r--r--fs/cifs/cifsacl.c379
-rw-r--r--fs/cifs/cifsacl.h4
-rw-r--r--fs/cifs/cifsencrypt.c6
-rw-r--r--fs/cifs/cifsfs.c20
-rw-r--r--fs/cifs/cifsfs.h27
-rw-r--r--fs/cifs/cifsglob.h11
-rw-r--r--fs/cifs/cifsproto.h2
-rw-r--r--fs/cifs/cifssmb.c6
-rw-r--r--fs/cifs/connect.c301
-rw-r--r--fs/cifs/dfs_cache.c33
-rw-r--r--fs/cifs/dir.c8
-rw-r--r--fs/cifs/file.c2
-rw-r--r--fs/cifs/fs_context.c75
-rw-r--r--fs/cifs/fs_context.h6
-rw-r--r--fs/cifs/inode.c49
-rw-r--r--fs/cifs/link.c3
-rw-r--r--fs/cifs/sess.c2
-rw-r--r--fs/cifs/smb2ops.c109
-rw-r--r--fs/cifs/smb2pdu.c22
-rw-r--r--fs/cifs/trace.h36
-rw-r--r--fs/cifs/transport.c63
-rw-r--r--fs/cifs/xattr.c1
-rw-r--r--fs/coda/coda_linux.h8
-rw-r--r--fs/coda/dir.c18
-rw-r--r--fs/coda/inode.c9
-rw-r--r--fs/coda/pioctl.c6
-rw-r--r--fs/configfs/configfs_internal.h6
-rw-r--r--fs/configfs/dir.c3
-rw-r--r--fs/configfs/inode.c5
-rw-r--r--fs/configfs/symlink.c6
-rw-r--r--fs/coredump.c14
-rw-r--r--fs/cramfs/inode.c18
-rw-r--r--fs/crypto/policy.c2
-rw-r--r--fs/dcache.c92
-rw-r--r--fs/debugfs/inode.c14
-rw-r--r--fs/direct-io.c4
-rw-r--r--fs/ecryptfs/crypto.c4
-rw-r--r--fs/ecryptfs/inode.c84
-rw-r--r--fs/ecryptfs/main.c6
-rw-r--r--fs/ecryptfs/mmap.c4
-rw-r--r--fs/efivarfs/file.c2
-rw-r--r--fs/efivarfs/inode.c4
-rw-r--r--fs/erofs/inode.c7
-rw-r--r--fs/erofs/internal.h5
-rw-r--r--fs/erofs/namei.c4
-rw-r--r--fs/eventpoll.c4
-rw-r--r--fs/exec.c16
-rw-r--r--fs/exfat/balloc.c4
-rw-r--r--fs/exfat/exfat_fs.h10
-rw-r--r--fs/exfat/exfat_raw.h4
-rw-r--r--fs/exfat/fatent.c43
-rw-r--r--fs/exfat/file.c14
-rw-r--r--fs/exfat/namei.c14
-rw-r--r--fs/exfat/super.c31
-rw-r--r--fs/ext2/acl.c6
-rw-r--r--fs/ext2/acl.h3
-rw-r--r--fs/ext2/ext2.h5
-rw-r--r--fs/ext2/ialloc.c2
-rw-r--r--fs/ext2/inode.c15
-rw-r--r--fs/ext2/ioctl.c6
-rw-r--r--fs/ext2/namei.c22
-rw-r--r--fs/ext2/xattr_security.c1
-rw-r--r--fs/ext2/xattr_trusted.c1
-rw-r--r--fs/ext2/xattr_user.c1
-rw-r--r--fs/ext4/.kunitconfig3
-rw-r--r--fs/ext4/Kconfig3
-rw-r--r--fs/ext4/acl.c5
-rw-r--r--fs/ext4/acl.h3
-rw-r--r--fs/ext4/ext4.h22
-rw-r--r--fs/ext4/extents.c16
-rw-r--r--fs/ext4/fast_commit.c29
-rw-r--r--fs/ext4/ialloc.c7
-rw-r--r--fs/ext4/inode.c41
-rw-r--r--fs/ext4/ioctl.c20
-rw-r--r--fs/ext4/namei.c94
-rw-r--r--fs/ext4/super.c14
-rw-r--r--fs/ext4/xattr_hurd.c1
-rw-r--r--fs/ext4/xattr_security.c1
-rw-r--r--fs/ext4/xattr_trusted.c1
-rw-r--r--fs/ext4/xattr_user.c1
-rw-r--r--fs/f2fs/acl.c7
-rw-r--r--fs/f2fs/acl.h3
-rw-r--r--fs/f2fs/f2fs.h7
-rw-r--r--fs/f2fs/file.c36
-rw-r--r--fs/f2fs/namei.c23
-rw-r--r--fs/f2fs/super.c3
-rw-r--r--fs/f2fs/xattr.c4
-rw-r--r--fs/fat/fat.h6
-rw-r--r--fs/fat/file.c24
-rw-r--r--fs/fat/misc.c23
-rw-r--r--fs/fat/namei_msdos.c12
-rw-r--r--fs/fat/namei_vfat.c15
-rw-r--r--fs/fcntl.c3
-rw-r--r--fs/fhandle.c2
-rw-r--r--fs/fs-writeback.c116
-rw-r--r--fs/fuse/acl.c3
-rw-r--r--fs/fuse/dev.c6
-rw-r--r--fs/fuse/dir.c46
-rw-r--r--fs/fuse/fuse_i.h4
-rw-r--r--fs/fuse/xattr.c2
-rw-r--r--fs/gfs2/acl.c5
-rw-r--r--fs/gfs2/acl.h3
-rw-r--r--fs/gfs2/bmap.c10
-rw-r--r--fs/gfs2/file.c16
-rw-r--r--fs/gfs2/glock.c22
-rw-r--r--fs/gfs2/glock.h6
-rw-r--r--fs/gfs2/glops.c38
-rw-r--r--fs/gfs2/incore.h54
-rw-r--r--fs/gfs2/inode.c74
-rw-r--r--fs/gfs2/inode.h3
-rw-r--r--fs/gfs2/lock_dlm.c8
-rw-r--r--fs/gfs2/log.c525
-rw-r--r--fs/gfs2/log.h20
-rw-r--r--fs/gfs2/lops.c26
-rw-r--r--fs/gfs2/lops.h23
-rw-r--r--fs/gfs2/main.c4
-rw-r--r--fs/gfs2/ops_fstype.c71
-rw-r--r--fs/gfs2/recovery.c14
-rw-r--r--fs/gfs2/rgrp.c442
-rw-r--r--fs/gfs2/rgrp.h6
-rw-r--r--fs/gfs2/super.c75
-rw-r--r--fs/gfs2/super.h8
-rw-r--r--fs/gfs2/trace_gfs2.h37
-rw-r--r--fs/gfs2/trans.c102
-rw-r--r--fs/gfs2/trans.h5
-rw-r--r--fs/gfs2/util.c59
-rw-r--r--fs/gfs2/util.h3
-rw-r--r--fs/gfs2/xattr.c55
-rw-r--r--fs/hfs/attr.c1
-rw-r--r--fs/hfs/dir.c13
-rw-r--r--fs/hfs/hfs_fs.h3
-rw-r--r--fs/hfs/inode.c8
-rw-r--r--fs/hfsplus/dir.c22
-rw-r--r--fs/hfsplus/hfsplus_fs.h5
-rw-r--r--fs/hfsplus/inode.c16
-rw-r--r--fs/hfsplus/ioctl.c2
-rw-r--r--fs/hfsplus/xattr.c1
-rw-r--r--fs/hfsplus/xattr_security.c1
-rw-r--r--fs/hfsplus/xattr_trusted.c1
-rw-r--r--fs/hfsplus/xattr_user.c1
-rw-r--r--fs/hostfs/hostfs_kern.c29
-rw-r--r--fs/hpfs/hpfs_fn.h2
-rw-r--r--fs/hpfs/inode.c7
-rw-r--r--fs/hpfs/namei.c20
-rw-r--r--fs/hugetlbfs/inode.c107
-rw-r--r--fs/init.c24
-rw-r--r--fs/inode.c87
-rw-r--r--fs/internal.h2
-rw-r--r--fs/io_uring.c45
-rw-r--r--fs/iomap/seek.c125
-rw-r--r--fs/isofs/dir.c1
-rw-r--r--fs/isofs/inode.c9
-rw-r--r--fs/isofs/namei.c1
-rw-r--r--fs/jffs2/acl.c6
-rw-r--r--fs/jffs2/acl.h3
-rw-r--r--fs/jffs2/dir.c33
-rw-r--r--fs/jffs2/fs.c7
-rw-r--r--fs/jffs2/os-linux.h2
-rw-r--r--fs/jffs2/security.c1
-rw-r--r--fs/jffs2/xattr_trusted.c1
-rw-r--r--fs/jffs2/xattr_user.c1
-rw-r--r--fs/jfs/acl.c5
-rw-r--r--fs/jfs/file.c9
-rw-r--r--fs/jfs/ioctl.c2
-rw-r--r--fs/jfs/jfs_acl.h3
-rw-r--r--fs/jfs/jfs_inode.c2
-rw-r--r--fs/jfs/jfs_inode.h2
-rw-r--r--fs/jfs/namei.c21
-rw-r--r--fs/jfs/super.c1
-rw-r--r--fs/jfs/xattr.c2
-rw-r--r--fs/kernfs/dir.c6
-rw-r--r--fs/kernfs/inode.c19
-rw-r--r--fs/kernfs/kernfs-internal.h9
-rw-r--r--fs/libfs.c33
-rw-r--r--fs/minix/bitmap.c2
-rw-r--r--fs/minix/file.c7
-rw-r--r--fs/minix/inode.c6
-rw-r--r--fs/minix/minix.h3
-rw-r--r--fs/minix/namei.c24
-rw-r--r--fs/mount.h10
-rw-r--r--fs/namei.c511
-rw-r--r--fs/namespace.c537
-rw-r--r--fs/nfs/dir.c25
-rw-r--r--fs/nfs/file.c27
-rw-r--r--fs/nfs/fs_context.c35
-rw-r--r--fs/nfs/fscache.c4
-rw-r--r--fs/nfs/inode.c120
-rw-r--r--fs/nfs/internal.h14
-rw-r--r--fs/nfs/namespace.c15
-rw-r--r--fs/nfs/nfs3_fs.h3
-rw-r--r--fs/nfs/nfs3acl.c4
-rw-r--r--fs/nfs/nfs4client.c1
-rw-r--r--fs/nfs/nfs4proc.c24
-rw-r--r--fs/nfs/nfs4state.c1
-rw-r--r--fs/nfs/pnfs.c2
-rw-r--r--fs/nfs/read.c204
-rw-r--r--fs/nfs/super.c7
-rw-r--r--fs/nfs/write.c37
-rw-r--r--fs/nfsd/export.c12
-rw-r--r--fs/nfsd/nfs2acl.c6
-rw-r--r--fs/nfsd/nfs3acl.c6
-rw-r--r--fs/nfsd/nfs4acl.c5
-rw-r--r--fs/nfsd/nfs4recover.c6
-rw-r--r--fs/nfsd/nfsctl.c14
-rw-r--r--fs/nfsd/nfsfh.c3
-rw-r--r--fs/nfsd/nfsproc.c2
-rw-r--r--fs/nfsd/vfs.c50
-rw-r--r--fs/nilfs2/inode.c14
-rw-r--r--fs/nilfs2/ioctl.c2
-rw-r--r--fs/nilfs2/namei.c19
-rw-r--r--fs/nilfs2/nilfs.h6
-rw-r--r--fs/notify/fanotify/fanotify_user.c4
-rw-r--r--fs/notify/group.c25
-rw-r--r--fs/notify/inotify/inotify_user.c6
-rw-r--r--fs/ntfs/inode.c12
-rw-r--r--fs/ntfs/inode.h3
-rw-r--r--fs/ntfs/layout.h4
-rw-r--r--fs/ocfs2/acl.c6
-rw-r--r--fs/ocfs2/acl.h3
-rw-r--r--fs/ocfs2/cluster/heartbeat.c8
-rw-r--r--fs/ocfs2/dlm/dlmast.c10
-rw-r--r--fs/ocfs2/dlm/dlmcommon.h4
-rw-r--r--fs/ocfs2/dlmfs/dlmfs.c17
-rw-r--r--fs/ocfs2/file.c18
-rw-r--r--fs/ocfs2/file.h11
-rw-r--r--fs/ocfs2/ioctl.c2
-rw-r--r--fs/ocfs2/namei.c21
-rw-r--r--fs/ocfs2/refcounttree.c6
-rw-r--r--fs/ocfs2/super.c2
-rw-r--r--fs/ocfs2/xattr.c3
-rw-r--r--fs/omfs/dir.c13
-rw-r--r--fs/omfs/file.c7
-rw-r--r--fs/omfs/inode.c2
-rw-r--r--fs/open.c35
-rw-r--r--fs/orangefs/acl.c6
-rw-r--r--fs/orangefs/file.c5
-rw-r--r--fs/orangefs/inode.c20
-rw-r--r--fs/orangefs/namei.c12
-rw-r--r--fs/orangefs/orangefs-kernel.h13
-rw-r--r--fs/orangefs/xattr.c1
-rw-r--r--fs/overlayfs/copy_up.c22
-rw-r--r--fs/overlayfs/dir.c31
-rw-r--r--fs/overlayfs/file.c6
-rw-r--r--fs/overlayfs/inode.c27
-rw-r--r--fs/overlayfs/overlayfs.h45
-rw-r--r--fs/overlayfs/super.c21
-rw-r--r--fs/overlayfs/util.c4
-rw-r--r--fs/pipe.c2
-rw-r--r--fs/posix_acl.c103
-rw-r--r--fs/proc/base.c47
-rw-r--r--fs/proc/fd.c5
-rw-r--r--fs/proc/fd.h3
-rw-r--r--fs/proc/generic.c12
-rw-r--r--fs/proc/internal.h6
-rw-r--r--fs/proc/meminfo.c10
-rw-r--r--fs/proc/proc_net.c5
-rw-r--r--fs/proc/proc_sysctl.c19
-rw-r--r--fs/proc/root.c5
-rw-r--r--fs/proc/vmcore.c7
-rw-r--r--fs/proc_namespace.c3
-rw-r--r--fs/pstore/zone.c2
-rw-r--r--fs/quota/quota_v2.c11
-rw-r--r--fs/ramfs/file-nommu.c9
-rw-r--r--fs/ramfs/inode.c31
-rw-r--r--fs/reiserfs/acl.h3
-rw-r--r--fs/reiserfs/inode.c7
-rw-r--r--fs/reiserfs/ioctl.c4
-rw-r--r--fs/reiserfs/namei.c21
-rw-r--r--fs/reiserfs/reiserfs.h3
-rw-r--r--fs/reiserfs/xattr.c13
-rw-r--r--fs/reiserfs/xattr.h3
-rw-r--r--fs/reiserfs/xattr_acl.c8
-rw-r--r--fs/reiserfs/xattr_security.c3
-rw-r--r--fs/reiserfs/xattr_trusted.c3
-rw-r--r--fs/reiserfs/xattr_user.c3
-rw-r--r--fs/remap_range.c7
-rw-r--r--fs/seq_file.c5
-rw-r--r--fs/stat.c26
-rw-r--r--fs/super.c12
-rw-r--r--fs/sysfs/file.c11
-rw-r--r--fs/sysv/file.c7
-rw-r--r--fs/sysv/ialloc.c2
-rw-r--r--fs/sysv/itree.c6
-rw-r--r--fs/sysv/namei.c21
-rw-r--r--fs/sysv/sysv.h3
-rw-r--r--fs/tracefs/inode.c4
-rw-r--r--fs/ubifs/dir.c30
-rw-r--r--fs/ubifs/file.c5
-rw-r--r--fs/ubifs/ioctl.c2
-rw-r--r--fs/ubifs/ubifs.h5
-rw-r--r--fs/ubifs/xattr.c1
-rw-r--r--fs/udf/file.c9
-rw-r--r--fs/udf/ialloc.c2
-rw-r--r--fs/udf/inode.c9
-rw-r--r--fs/udf/namei.c24
-rw-r--r--fs/udf/super.c9
-rw-r--r--fs/udf/symlink.c7
-rw-r--r--fs/ufs/ialloc.c2
-rw-r--r--fs/ufs/inode.c7
-rw-r--r--fs/ufs/namei.c19
-rw-r--r--fs/ufs/ufs.h3
-rw-r--r--fs/userfaultfd.c19
-rw-r--r--fs/utimes.c3
-rw-r--r--fs/vboxsf/dir.c12
-rw-r--r--fs/vboxsf/utils.c9
-rw-r--r--fs/vboxsf/vfsmod.h8
-rw-r--r--fs/verity/enable.c2
-rw-r--r--fs/xattr.c139
-rw-r--r--fs/xfs/xfs_acl.c5
-rw-r--r--fs/xfs/xfs_acl.h3
-rw-r--r--fs/xfs/xfs_file.c4
-rw-r--r--fs/xfs/xfs_inode.c26
-rw-r--r--fs/xfs/xfs_inode.h16
-rw-r--r--fs/xfs/xfs_ioctl.c35
-rw-r--r--fs/xfs/xfs_ioctl32.c13
-rw-r--r--fs/xfs/xfs_iops.c101
-rw-r--r--fs/xfs/xfs_iops.h3
-rw-r--r--fs/xfs/xfs_itable.c17
-rw-r--r--fs/xfs/xfs_itable.h1
-rw-r--r--fs/xfs/xfs_qm.c3
-rw-r--r--fs/xfs/xfs_super.c2
-rw-r--r--fs/xfs/xfs_symlink.c5
-rw-r--r--fs/xfs/xfs_symlink.h5
-rw-r--r--fs/xfs/xfs_xattr.c7
-rw-r--r--fs/zonefs/Makefile2
-rw-r--r--fs/zonefs/super.c19
-rw-r--r--fs/zonefs/trace.h104
370 files changed, 5782 insertions, 3375 deletions
diff --git a/fs/9p/acl.c b/fs/9p/acl.c
index 6261719f6f2a..bb1b286c49ae 100644
--- a/fs/9p/acl.c
+++ b/fs/9p/acl.c
@@ -239,6 +239,7 @@ static int v9fs_xattr_get_acl(const struct xattr_handler *handler,
}
static int v9fs_xattr_set_acl(const struct xattr_handler *handler,
+ struct user_namespace *mnt_userns,
struct dentry *dentry, struct inode *inode,
const char *name, const void *value,
size_t size, int flags)
@@ -258,7 +259,7 @@ static int v9fs_xattr_set_acl(const struct xattr_handler *handler,
if (S_ISLNK(inode->i_mode))
return -EOPNOTSUPP;
- if (!inode_owner_or_capable(inode))
+ if (!inode_owner_or_capable(&init_user_ns, inode))
return -EPERM;
if (value) {
/* update the cached acl value */
@@ -279,7 +280,8 @@ static int v9fs_xattr_set_acl(const struct xattr_handler *handler,
struct iattr iattr = { 0 };
struct posix_acl *old_acl = acl;
- retval = posix_acl_update_mode(inode, &iattr.ia_mode, &acl);
+ retval = posix_acl_update_mode(&init_user_ns, inode,
+ &iattr.ia_mode, &acl);
if (retval)
goto err_out;
if (!acl) {
@@ -297,7 +299,7 @@ static int v9fs_xattr_set_acl(const struct xattr_handler *handler,
* What is the following setxattr update the
* mode ?
*/
- v9fs_vfs_setattr_dotl(dentry, &iattr);
+ v9fs_vfs_setattr_dotl(&init_user_ns, dentry, &iattr);
}
break;
case ACL_TYPE_DEFAULT:
diff --git a/fs/9p/v9fs.h b/fs/9p/v9fs.h
index 7b763776306e..4ca56c5dd637 100644
--- a/fs/9p/v9fs.h
+++ b/fs/9p/v9fs.h
@@ -135,7 +135,8 @@ extern struct dentry *v9fs_vfs_lookup(struct inode *dir, struct dentry *dentry,
unsigned int flags);
extern int v9fs_vfs_unlink(struct inode *i, struct dentry *d);
extern int v9fs_vfs_rmdir(struct inode *i, struct dentry *d);
-extern int v9fs_vfs_rename(struct inode *old_dir, struct dentry *old_dentry,
+extern int v9fs_vfs_rename(struct user_namespace *mnt_userns,
+ struct inode *old_dir, struct dentry *old_dentry,
struct inode *new_dir, struct dentry *new_dentry,
unsigned int flags);
extern struct inode *v9fs_inode_from_fid(struct v9fs_session_info *v9ses,
diff --git a/fs/9p/v9fs_vfs.h b/fs/9p/v9fs_vfs.h
index fd2a2b040250..d44ade76966a 100644
--- a/fs/9p/v9fs_vfs.h
+++ b/fs/9p/v9fs_vfs.h
@@ -59,7 +59,8 @@ void v9fs_inode2stat(struct inode *inode, struct p9_wstat *stat);
int v9fs_uflags2omode(int uflags, int extended);
void v9fs_blank_wstat(struct p9_wstat *wstat);
-int v9fs_vfs_setattr_dotl(struct dentry *, struct iattr *);
+int v9fs_vfs_setattr_dotl(struct user_namespace *, struct dentry *,
+ struct iattr *);
int v9fs_file_fsync_dotl(struct file *filp, loff_t start, loff_t end,
int datasync);
int v9fs_refresh_inode(struct p9_fid *fid, struct inode *inode);
diff --git a/fs/9p/vfs_inode.c b/fs/9p/vfs_inode.c
index 4a937fac1acb..8d97f0b45e9c 100644
--- a/fs/9p/vfs_inode.c
+++ b/fs/9p/vfs_inode.c
@@ -251,7 +251,7 @@ int v9fs_init_inode(struct v9fs_session_info *v9ses,
{
int err = 0;
- inode_init_owner(inode, NULL, mode);
+ inode_init_owner(&init_user_ns,inode, NULL, mode);
inode->i_blocks = 0;
inode->i_rdev = rdev;
inode->i_atime = inode->i_mtime = inode->i_ctime = current_time(inode);
@@ -676,8 +676,8 @@ error:
*/
static int
-v9fs_vfs_create(struct inode *dir, struct dentry *dentry, umode_t mode,
- bool excl)
+v9fs_vfs_create(struct user_namespace *mnt_userns, struct inode *dir,
+ struct dentry *dentry, umode_t mode, bool excl)
{
struct v9fs_session_info *v9ses = v9fs_inode2v9ses(dir);
u32 perm = unixmode2p9mode(v9ses, mode);
@@ -702,7 +702,8 @@ v9fs_vfs_create(struct inode *dir, struct dentry *dentry, umode_t mode,
*
*/
-static int v9fs_vfs_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode)
+static int v9fs_vfs_mkdir(struct user_namespace *mnt_userns, struct inode *dir,
+ struct dentry *dentry, umode_t mode)
{
int err;
u32 perm;
@@ -907,9 +908,9 @@ int v9fs_vfs_rmdir(struct inode *i, struct dentry *d)
*/
int
-v9fs_vfs_rename(struct inode *old_dir, struct dentry *old_dentry,
- struct inode *new_dir, struct dentry *new_dentry,
- unsigned int flags)
+v9fs_vfs_rename(struct user_namespace *mnt_userns, struct inode *old_dir,
+ struct dentry *old_dentry, struct inode *new_dir,
+ struct dentry *new_dentry, unsigned int flags)
{
int retval;
struct inode *old_inode;
@@ -1016,8 +1017,8 @@ done:
*/
static int
-v9fs_vfs_getattr(const struct path *path, struct kstat *stat,
- u32 request_mask, unsigned int flags)
+v9fs_vfs_getattr(struct user_namespace *mnt_userns, const struct path *path,
+ struct kstat *stat, u32 request_mask, unsigned int flags)
{
struct dentry *dentry = path->dentry;
struct v9fs_session_info *v9ses;
@@ -1027,7 +1028,7 @@ v9fs_vfs_getattr(const struct path *path, struct kstat *stat,
p9_debug(P9_DEBUG_VFS, "dentry: %p\n", dentry);
v9ses = v9fs_dentry2v9ses(dentry);
if (v9ses->cache == CACHE_LOOSE || v9ses->cache == CACHE_FSCACHE) {
- generic_fillattr(d_inode(dentry), stat);
+ generic_fillattr(&init_user_ns, d_inode(dentry), stat);
return 0;
}
fid = v9fs_fid_lookup(dentry);
@@ -1040,7 +1041,7 @@ v9fs_vfs_getattr(const struct path *path, struct kstat *stat,
return PTR_ERR(st);
v9fs_stat2inode(st, d_inode(dentry), dentry->d_sb, 0);
- generic_fillattr(d_inode(dentry), stat);
+ generic_fillattr(&init_user_ns, d_inode(dentry), stat);
p9stat_free(st);
kfree(st);
@@ -1054,7 +1055,8 @@ v9fs_vfs_getattr(const struct path *path, struct kstat *stat,
*
*/
-static int v9fs_vfs_setattr(struct dentry *dentry, struct iattr *iattr)
+static int v9fs_vfs_setattr(struct user_namespace *mnt_userns,
+ struct dentry *dentry, struct iattr *iattr)
{
int retval, use_dentry = 0;
struct v9fs_session_info *v9ses;
@@ -1062,7 +1064,7 @@ static int v9fs_vfs_setattr(struct dentry *dentry, struct iattr *iattr)
struct p9_wstat wstat;
p9_debug(P9_DEBUG_VFS, "\n");
- retval = setattr_prepare(dentry, iattr);
+ retval = setattr_prepare(&init_user_ns, dentry, iattr);
if (retval)
return retval;
@@ -1118,7 +1120,7 @@ static int v9fs_vfs_setattr(struct dentry *dentry, struct iattr *iattr)
v9fs_invalidate_inode_attr(d_inode(dentry));
- setattr_copy(d_inode(dentry), iattr);
+ setattr_copy(&init_user_ns, d_inode(dentry), iattr);
mark_inode_dirty(d_inode(dentry));
return 0;
}
@@ -1137,9 +1139,6 @@ v9fs_stat2inode(struct p9_wstat *stat, struct inode *inode,
struct super_block *sb, unsigned int flags)
{
umode_t mode;
- char ext[32];
- char tag_name[14];
- unsigned int i_nlink;
struct v9fs_session_info *v9ses = sb->s_fs_info;
struct v9fs_inode *v9inode = V9FS_I(inode);
@@ -1157,18 +1156,18 @@ v9fs_stat2inode(struct p9_wstat *stat, struct inode *inode,
inode->i_gid = stat->n_gid;
}
if ((S_ISREG(inode->i_mode)) || (S_ISDIR(inode->i_mode))) {
- if (v9fs_proto_dotu(v9ses) && (stat->extension[0] != '\0')) {
+ if (v9fs_proto_dotu(v9ses)) {
+ unsigned int i_nlink;
/*
- * Hadlink support got added later to
- * to the .u extension. So there can be
- * server out there that doesn't support
- * this even with .u extension. So check
- * for non NULL stat->extension
+ * Hadlink support got added later to the .u extension.
+ * So there can be a server out there that doesn't
+ * support this even with .u extension. That would
+ * just leave us with stat->extension being an empty
+ * string, though.
*/
- strlcpy(ext, stat->extension, sizeof(ext));
/* HARDLINKCOUNT %u */
- sscanf(ext, "%13s %u", tag_name, &i_nlink);
- if (!strncmp(tag_name, "HARDLINKCOUNT", 13))
+ if (sscanf(stat->extension,
+ " HARDLINKCOUNT %u", &i_nlink) == 1)
set_nlink(inode, i_nlink);
}
}
@@ -1295,7 +1294,8 @@ static int v9fs_vfs_mkspecial(struct inode *dir, struct dentry *dentry,
*/
static int
-v9fs_vfs_symlink(struct inode *dir, struct dentry *dentry, const char *symname)
+v9fs_vfs_symlink(struct user_namespace *mnt_userns, struct inode *dir,
+ struct dentry *dentry, const char *symname)
{
p9_debug(P9_DEBUG_VFS, " %lu,%pd,%s\n",
dir->i_ino, dentry, symname);
@@ -1348,7 +1348,8 @@ v9fs_vfs_link(struct dentry *old_dentry, struct inode *dir,
*/
static int
-v9fs_vfs_mknod(struct inode *dir, struct dentry *dentry, umode_t mode, dev_t rdev)
+v9fs_vfs_mknod(struct user_namespace *mnt_userns, struct inode *dir,
+ struct dentry *dentry, umode_t mode, dev_t rdev)
{
struct v9fs_session_info *v9ses = v9fs_inode2v9ses(dir);
int retval;
diff --git a/fs/9p/vfs_inode_dotl.c b/fs/9p/vfs_inode_dotl.c
index 823c2eb5f1bf..1dc7af046615 100644
--- a/fs/9p/vfs_inode_dotl.c
+++ b/fs/9p/vfs_inode_dotl.c
@@ -33,8 +33,8 @@
#include "acl.h"
static int
-v9fs_vfs_mknod_dotl(struct inode *dir, struct dentry *dentry, umode_t omode,
- dev_t rdev);
+v9fs_vfs_mknod_dotl(struct user_namespace *mnt_userns, struct inode *dir,
+ struct dentry *dentry, umode_t omode, dev_t rdev);
/**
* v9fs_get_fsgid_for_create - Helper function to get the gid for creating a
@@ -218,10 +218,10 @@ int v9fs_open_to_dotl_flags(int flags)
*/
static int
-v9fs_vfs_create_dotl(struct inode *dir, struct dentry *dentry, umode_t omode,
- bool excl)
+v9fs_vfs_create_dotl(struct user_namespace *mnt_userns, struct inode *dir,
+ struct dentry *dentry, umode_t omode, bool excl)
{
- return v9fs_vfs_mknod_dotl(dir, dentry, omode, 0);
+ return v9fs_vfs_mknod_dotl(mnt_userns, dir, dentry, omode, 0);
}
static int
@@ -367,8 +367,9 @@ err_clunk_old_fid:
*
*/
-static int v9fs_vfs_mkdir_dotl(struct inode *dir,
- struct dentry *dentry, umode_t omode)
+static int v9fs_vfs_mkdir_dotl(struct user_namespace *mnt_userns,
+ struct inode *dir, struct dentry *dentry,
+ umode_t omode)
{
int err;
struct v9fs_session_info *v9ses;
@@ -457,8 +458,9 @@ error:
}
static int
-v9fs_vfs_getattr_dotl(const struct path *path, struct kstat *stat,
- u32 request_mask, unsigned int flags)
+v9fs_vfs_getattr_dotl(struct user_namespace *mnt_userns,
+ const struct path *path, struct kstat *stat,
+ u32 request_mask, unsigned int flags)
{
struct dentry *dentry = path->dentry;
struct v9fs_session_info *v9ses;
@@ -468,7 +470,7 @@ v9fs_vfs_getattr_dotl(const struct path *path, struct kstat *stat,
p9_debug(P9_DEBUG_VFS, "dentry: %p\n", dentry);
v9ses = v9fs_dentry2v9ses(dentry);
if (v9ses->cache == CACHE_LOOSE || v9ses->cache == CACHE_FSCACHE) {
- generic_fillattr(d_inode(dentry), stat);
+ generic_fillattr(&init_user_ns, d_inode(dentry), stat);
return 0;
}
fid = v9fs_fid_lookup(dentry);
@@ -485,7 +487,7 @@ v9fs_vfs_getattr_dotl(const struct path *path, struct kstat *stat,
return PTR_ERR(st);
v9fs_stat2inode_dotl(st, d_inode(dentry), 0);
- generic_fillattr(d_inode(dentry), stat);
+ generic_fillattr(&init_user_ns, d_inode(dentry), stat);
/* Change block size to what the server returned */
stat->blksize = st->st_blksize;
@@ -540,7 +542,8 @@ static int v9fs_mapped_iattr_valid(int iattr_valid)
*
*/
-int v9fs_vfs_setattr_dotl(struct dentry *dentry, struct iattr *iattr)
+int v9fs_vfs_setattr_dotl(struct user_namespace *mnt_userns,
+ struct dentry *dentry, struct iattr *iattr)
{
int retval, use_dentry = 0;
struct p9_fid *fid = NULL;
@@ -549,7 +552,7 @@ int v9fs_vfs_setattr_dotl(struct dentry *dentry, struct iattr *iattr)
p9_debug(P9_DEBUG_VFS, "\n");
- retval = setattr_prepare(dentry, iattr);
+ retval = setattr_prepare(&init_user_ns, dentry, iattr);
if (retval)
return retval;
@@ -590,7 +593,7 @@ int v9fs_vfs_setattr_dotl(struct dentry *dentry, struct iattr *iattr)
truncate_setsize(inode, iattr->ia_size);
v9fs_invalidate_inode_attr(inode);
- setattr_copy(inode, iattr);
+ setattr_copy(&init_user_ns, inode, iattr);
mark_inode_dirty(inode);
if (iattr->ia_valid & ATTR_MODE) {
/* We also want to update ACL when we update mode bits */
@@ -684,8 +687,8 @@ v9fs_stat2inode_dotl(struct p9_stat_dotl *stat, struct inode *inode,
}
static int
-v9fs_vfs_symlink_dotl(struct inode *dir, struct dentry *dentry,
- const char *symname)
+v9fs_vfs_symlink_dotl(struct user_namespace *mnt_userns, struct inode *dir,
+ struct dentry *dentry, const char *symname)
{
int err;
kgid_t gid;
@@ -824,8 +827,8 @@ v9fs_vfs_link_dotl(struct dentry *old_dentry, struct inode *dir,
*
*/
static int
-v9fs_vfs_mknod_dotl(struct inode *dir, struct dentry *dentry, umode_t omode,
- dev_t rdev)
+v9fs_vfs_mknod_dotl(struct user_namespace *mnt_userns, struct inode *dir,
+ struct dentry *dentry, umode_t omode, dev_t rdev)
{
int err;
kgid_t gid;
diff --git a/fs/9p/xattr.c b/fs/9p/xattr.c
index 87217dd0433e..ee331845e2c7 100644
--- a/fs/9p/xattr.c
+++ b/fs/9p/xattr.c
@@ -157,6 +157,7 @@ static int v9fs_xattr_handler_get(const struct xattr_handler *handler,
}
static int v9fs_xattr_handler_set(const struct xattr_handler *handler,
+ struct user_namespace *mnt_userns,
struct dentry *dentry, struct inode *inode,
const char *name, const void *value,
size_t size, int flags)
diff --git a/fs/adfs/adfs.h b/fs/adfs/adfs.h
index 699c4fa8b78b..06b7c92343ad 100644
--- a/fs/adfs/adfs.h
+++ b/fs/adfs/adfs.h
@@ -144,7 +144,8 @@ struct adfs_discmap {
/* Inode stuff */
struct inode *adfs_iget(struct super_block *sb, struct object_info *obj);
int adfs_write_inode(struct inode *inode, struct writeback_control *wbc);
-int adfs_notify_change(struct dentry *dentry, struct iattr *attr);
+int adfs_notify_change(struct user_namespace *mnt_userns, struct dentry *dentry,
+ struct iattr *attr);
/* map.c */
int adfs_map_lookup(struct super_block *sb, u32 frag_id, unsigned int offset);
diff --git a/fs/adfs/inode.c b/fs/adfs/inode.c
index 32620f4a7623..fb7ee026d101 100644
--- a/fs/adfs/inode.c
+++ b/fs/adfs/inode.c
@@ -292,14 +292,15 @@ out:
* later.
*/
int
-adfs_notify_change(struct dentry *dentry, struct iattr *attr)
+adfs_notify_change(struct user_namespace *mnt_userns, struct dentry *dentry,
+ struct iattr *attr)
{
struct inode *inode = d_inode(dentry);
struct super_block *sb = inode->i_sb;
unsigned int ia_valid = attr->ia_valid;
int error;
- error = setattr_prepare(dentry, attr);
+ error = setattr_prepare(&init_user_ns, dentry, attr);
/*
* we can't change the UID or GID of any file -
diff --git a/fs/affs/affs.h b/fs/affs/affs.h
index a755bef7c4c7..bfa89e131ead 100644
--- a/fs/affs/affs.h
+++ b/fs/affs/affs.h
@@ -167,27 +167,33 @@ extern const struct export_operations affs_export_ops;
extern int affs_hash_name(struct super_block *sb, const u8 *name, unsigned int len);
extern struct dentry *affs_lookup(struct inode *dir, struct dentry *dentry, unsigned int);
extern int affs_unlink(struct inode *dir, struct dentry *dentry);
-extern int affs_create(struct inode *dir, struct dentry *dentry, umode_t mode, bool);
-extern int affs_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode);
+extern int affs_create(struct user_namespace *mnt_userns, struct inode *dir,
+ struct dentry *dentry, umode_t mode, bool);
+extern int affs_mkdir(struct user_namespace *mnt_userns, struct inode *dir,
+ struct dentry *dentry, umode_t mode);
extern int affs_rmdir(struct inode *dir, struct dentry *dentry);
extern int affs_link(struct dentry *olddentry, struct inode *dir,
struct dentry *dentry);
-extern int affs_symlink(struct inode *dir, struct dentry *dentry,
- const char *symname);
-extern int affs_rename2(struct inode *old_dir, struct dentry *old_dentry,
- struct inode *new_dir, struct dentry *new_dentry,
- unsigned int flags);
+extern int affs_symlink(struct user_namespace *mnt_userns,
+ struct inode *dir, struct dentry *dentry,
+ const char *symname);
+extern int affs_rename2(struct user_namespace *mnt_userns,
+ struct inode *old_dir, struct dentry *old_dentry,
+ struct inode *new_dir, struct dentry *new_dentry,
+ unsigned int flags);
/* inode.c */
extern struct inode *affs_new_inode(struct inode *dir);
-extern int affs_notify_change(struct dentry *dentry, struct iattr *attr);
+extern int affs_notify_change(struct user_namespace *mnt_userns,
+ struct dentry *dentry, struct iattr *attr);
extern void affs_evict_inode(struct inode *inode);
extern struct inode *affs_iget(struct super_block *sb,
unsigned long ino);
extern int affs_write_inode(struct inode *inode,
struct writeback_control *wbc);
-extern int affs_add_entry(struct inode *dir, struct inode *inode, struct dentry *dentry, s32 type);
+extern int affs_add_entry(struct inode *dir, struct inode *inode,
+ struct dentry *dentry, s32 type);
/* file.c */
diff --git a/fs/affs/inode.c b/fs/affs/inode.c
index 044412110b52..2352a75bd9d6 100644
--- a/fs/affs/inode.c
+++ b/fs/affs/inode.c
@@ -216,14 +216,15 @@ affs_write_inode(struct inode *inode, struct writeback_control *wbc)
}
int
-affs_notify_change(struct dentry *dentry, struct iattr *attr)
+affs_notify_change(struct user_namespace *mnt_userns, struct dentry *dentry,
+ struct iattr *attr)
{
struct inode *inode = d_inode(dentry);
int error;
pr_debug("notify_change(%lu,0x%x)\n", inode->i_ino, attr->ia_valid);
- error = setattr_prepare(dentry, attr);
+ error = setattr_prepare(&init_user_ns, dentry, attr);
if (error)
goto out;
@@ -249,7 +250,7 @@ affs_notify_change(struct dentry *dentry, struct iattr *attr)
affs_truncate(inode);
}
- setattr_copy(inode, attr);
+ setattr_copy(&init_user_ns, inode, attr);
mark_inode_dirty(inode);
if (attr->ia_valid & ATTR_MODE)
diff --git a/fs/affs/namei.c b/fs/affs/namei.c
index 5400a876d73f..bcab18956b4f 100644
--- a/fs/affs/namei.c
+++ b/fs/affs/namei.c
@@ -242,7 +242,8 @@ affs_unlink(struct inode *dir, struct dentry *dentry)
}
int
-affs_create(struct inode *dir, struct dentry *dentry, umode_t mode, bool excl)
+affs_create(struct user_namespace *mnt_userns, struct inode *dir,
+ struct dentry *dentry, umode_t mode, bool excl)
{
struct super_block *sb = dir->i_sb;
struct inode *inode;
@@ -273,7 +274,8 @@ affs_create(struct inode *dir, struct dentry *dentry, umode_t mode, bool excl)
}
int
-affs_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode)
+affs_mkdir(struct user_namespace *mnt_userns, struct inode *dir,
+ struct dentry *dentry, umode_t mode)
{
struct inode *inode;
int error;
@@ -311,7 +313,8 @@ affs_rmdir(struct inode *dir, struct dentry *dentry)
}
int
-affs_symlink(struct inode *dir, struct dentry *dentry, const char *symname)
+affs_symlink(struct user_namespace *mnt_userns, struct inode *dir,
+ struct dentry *dentry, const char *symname)
{
struct super_block *sb = dir->i_sb;
struct buffer_head *bh;
@@ -500,9 +503,9 @@ done:
return retval;
}
-int affs_rename2(struct inode *old_dir, struct dentry *old_dentry,
- struct inode *new_dir, struct dentry *new_dentry,
- unsigned int flags)
+int affs_rename2(struct user_namespace *mnt_userns, struct inode *old_dir,
+ struct dentry *old_dentry, struct inode *new_dir,
+ struct dentry *new_dentry, unsigned int flags)
{
if (flags & ~(RENAME_NOREPLACE | RENAME_EXCHANGE))
diff --git a/fs/afs/dir.c b/fs/afs/dir.c
index 7bd659ad959e..714fcca9af99 100644
--- a/fs/afs/dir.c
+++ b/fs/afs/dir.c
@@ -28,18 +28,19 @@ static int afs_lookup_one_filldir(struct dir_context *ctx, const char *name, int
loff_t fpos, u64 ino, unsigned dtype);
static int afs_lookup_filldir(struct dir_context *ctx, const char *name, int nlen,
loff_t fpos, u64 ino, unsigned dtype);
-static int afs_create(struct inode *dir, struct dentry *dentry, umode_t mode,
- bool excl);
-static int afs_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode);
+static int afs_create(struct user_namespace *mnt_userns, struct inode *dir,
+ struct dentry *dentry, umode_t mode, bool excl);
+static int afs_mkdir(struct user_namespace *mnt_userns, struct inode *dir,
+ struct dentry *dentry, umode_t mode);
static int afs_rmdir(struct inode *dir, struct dentry *dentry);
static int afs_unlink(struct inode *dir, struct dentry *dentry);
static int afs_link(struct dentry *from, struct inode *dir,
struct dentry *dentry);
-static int afs_symlink(struct inode *dir, struct dentry *dentry,
- const char *content);
-static int afs_rename(struct inode *old_dir, struct dentry *old_dentry,
- struct inode *new_dir, struct dentry *new_dentry,
- unsigned int flags);
+static int afs_symlink(struct user_namespace *mnt_userns, struct inode *dir,
+ struct dentry *dentry, const char *content);
+static int afs_rename(struct user_namespace *mnt_userns, struct inode *old_dir,
+ struct dentry *old_dentry, struct inode *new_dir,
+ struct dentry *new_dentry, unsigned int flags);
static int afs_dir_releasepage(struct page *page, gfp_t gfp_flags);
static void afs_dir_invalidatepage(struct page *page, unsigned int offset,
unsigned int length);
@@ -1325,7 +1326,8 @@ static const struct afs_operation_ops afs_mkdir_operation = {
/*
* create a directory on an AFS filesystem
*/
-static int afs_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode)
+static int afs_mkdir(struct user_namespace *mnt_userns, struct inode *dir,
+ struct dentry *dentry, umode_t mode)
{
struct afs_operation *op;
struct afs_vnode *dvnode = AFS_FS_I(dir);
@@ -1619,8 +1621,8 @@ static const struct afs_operation_ops afs_create_operation = {
/*
* create a regular file on an AFS filesystem
*/
-static int afs_create(struct inode *dir, struct dentry *dentry, umode_t mode,
- bool excl)
+static int afs_create(struct user_namespace *mnt_userns, struct inode *dir,
+ struct dentry *dentry, umode_t mode, bool excl)
{
struct afs_operation *op;
struct afs_vnode *dvnode = AFS_FS_I(dir);
@@ -1741,8 +1743,8 @@ static const struct afs_operation_ops afs_symlink_operation = {
/*
* create a symlink in an AFS filesystem
*/
-static int afs_symlink(struct inode *dir, struct dentry *dentry,
- const char *content)
+static int afs_symlink(struct user_namespace *mnt_userns, struct inode *dir,
+ struct dentry *dentry, const char *content)
{
struct afs_operation *op;
struct afs_vnode *dvnode = AFS_FS_I(dir);
@@ -1876,9 +1878,9 @@ static const struct afs_operation_ops afs_rename_operation = {
/*
* rename a file in an AFS filesystem and/or move it between directories
*/
-static int afs_rename(struct inode *old_dir, struct dentry *old_dentry,
- struct inode *new_dir, struct dentry *new_dentry,
- unsigned int flags)
+static int afs_rename(struct user_namespace *mnt_userns, struct inode *old_dir,
+ struct dentry *old_dentry, struct inode *new_dir,
+ struct dentry *new_dentry, unsigned int flags)
{
struct afs_operation *op;
struct afs_vnode *orig_dvnode, *new_dvnode, *vnode;
diff --git a/fs/afs/inode.c b/fs/afs/inode.c
index b0d7b892090d..1156b2df28d3 100644
--- a/fs/afs/inode.c
+++ b/fs/afs/inode.c
@@ -734,8 +734,8 @@ error_unlock:
/*
* read the attributes of an inode
*/
-int afs_getattr(const struct path *path, struct kstat *stat,
- u32 request_mask, unsigned int query_flags)
+int afs_getattr(struct user_namespace *mnt_userns, const struct path *path,
+ struct kstat *stat, u32 request_mask, unsigned int query_flags)
{
struct inode *inode = d_inode(path->dentry);
struct afs_vnode *vnode = AFS_FS_I(inode);
@@ -745,7 +745,7 @@ int afs_getattr(const struct path *path, struct kstat *stat,
do {
read_seqbegin_or_lock(&vnode->cb_lock, &seq);
- generic_fillattr(inode, stat);
+ generic_fillattr(&init_user_ns, inode, stat);
if (test_bit(AFS_VNODE_SILLY_DELETED, &vnode->flags) &&
stat->nlink > 0)
stat->nlink -= 1;
@@ -857,7 +857,8 @@ static const struct afs_operation_ops afs_setattr_operation = {
/*
* set the attributes of an inode
*/
-int afs_setattr(struct dentry *dentry, struct iattr *attr)
+int afs_setattr(struct user_namespace *mnt_userns, struct dentry *dentry,
+ struct iattr *attr)
{
struct afs_operation *op;
struct afs_vnode *vnode = AFS_FS_I(d_inode(dentry));
diff --git a/fs/afs/internal.h b/fs/afs/internal.h
index 0d150a29e39e..b626e38e9ab5 100644
--- a/fs/afs/internal.h
+++ b/fs/afs/internal.h
@@ -1149,8 +1149,9 @@ extern struct inode *afs_iget(struct afs_operation *, struct afs_vnode_param *);
extern struct inode *afs_root_iget(struct super_block *, struct key *);
extern bool afs_check_validity(struct afs_vnode *);
extern int afs_validate(struct afs_vnode *, struct key *);
-extern int afs_getattr(const struct path *, struct kstat *, u32, unsigned int);
-extern int afs_setattr(struct dentry *, struct iattr *);
+extern int afs_getattr(struct user_namespace *mnt_userns, const struct path *,
+ struct kstat *, u32, unsigned int);
+extern int afs_setattr(struct user_namespace *mnt_userns, struct dentry *, struct iattr *);
extern void afs_evict_inode(struct inode *);
extern int afs_drop_inode(struct inode *);
@@ -1361,7 +1362,7 @@ extern void afs_zap_permits(struct rcu_head *);
extern struct key *afs_request_key(struct afs_cell *);
extern struct key *afs_request_key_rcu(struct afs_cell *);
extern int afs_check_permit(struct afs_vnode *, struct key *, afs_access_t *);
-extern int afs_permission(struct inode *, int);
+extern int afs_permission(struct user_namespace *, struct inode *, int);
extern void __exit afs_clean_up_permit_cache(void);
/*
diff --git a/fs/afs/security.c b/fs/afs/security.c
index 9cf3102f370c..3c7a8fc4f93f 100644
--- a/fs/afs/security.c
+++ b/fs/afs/security.c
@@ -396,7 +396,8 @@ int afs_check_permit(struct afs_vnode *vnode, struct key *key,
* - AFS ACLs are attached to directories only, and a file is controlled by its
* parent directory's ACL
*/
-int afs_permission(struct inode *inode, int mask)
+int afs_permission(struct user_namespace *mnt_userns, struct inode *inode,
+ int mask)
{
struct afs_vnode *vnode = AFS_FS_I(inode);
afs_access_t access;
diff --git a/fs/afs/xattr.c b/fs/afs/xattr.c
index 95c573dcda11..c629caae5002 100644
--- a/fs/afs/xattr.c
+++ b/fs/afs/xattr.c
@@ -120,6 +120,7 @@ static const struct afs_operation_ops afs_store_acl_operation = {
* Set a file's AFS3 ACL.
*/
static int afs_xattr_set_acl(const struct xattr_handler *handler,
+ struct user_namespace *mnt_userns,
struct dentry *dentry,
struct inode *inode, const char *name,
const void *buffer, size_t size, int flags)
@@ -248,6 +249,7 @@ static const struct afs_operation_ops yfs_store_opaque_acl2_operation = {
* Set a file's YFS ACL.
*/
static int afs_xattr_set_yfs(const struct xattr_handler *handler,
+ struct user_namespace *mnt_userns,
struct dentry *dentry,
struct inode *inode, const char *name,
const void *buffer, size_t size, int flags)
diff --git a/fs/anon_inodes.c b/fs/anon_inodes.c
index 89714308c25b..a280156138ed 100644
--- a/fs/anon_inodes.c
+++ b/fs/anon_inodes.c
@@ -55,61 +55,79 @@ static struct file_system_type anon_inode_fs_type = {
.kill_sb = kill_anon_super,
};
-/**
- * anon_inode_getfile - creates a new file instance by hooking it up to an
- * anonymous inode, and a dentry that describe the "class"
- * of the file
- *
- * @name: [in] name of the "class" of the new file
- * @fops: [in] file operations for the new file
- * @priv: [in] private data for the new file (will be file's private_data)
- * @flags: [in] flags
- *
- * Creates a new file by hooking it on a single inode. This is useful for files
- * that do not need to have a full-fledged inode in order to operate correctly.
- * All the files created with anon_inode_getfile() will share a single inode,
- * hence saving memory and avoiding code duplication for the file/inode/dentry
- * setup. Returns the newly created file* or an error pointer.
- */
-struct file *anon_inode_getfile(const char *name,
- const struct file_operations *fops,
- void *priv, int flags)
+static struct inode *anon_inode_make_secure_inode(
+ const char *name,
+ const struct inode *context_inode)
{
- struct file *file;
+ struct inode *inode;
+ const struct qstr qname = QSTR_INIT(name, strlen(name));
+ int error;
+
+ inode = alloc_anon_inode(anon_inode_mnt->mnt_sb);
+ if (IS_ERR(inode))
+ return inode;
+ inode->i_flags &= ~S_PRIVATE;
+ error = security_inode_init_security_anon(inode, &qname, context_inode);
+ if (error) {
+ iput(inode);
+ return ERR_PTR(error);
+ }
+ return inode;
+}
- if (IS_ERR(anon_inode_inode))
- return ERR_PTR(-ENODEV);
+static struct file *__anon_inode_getfile(const char *name,
+ const struct file_operations *fops,
+ void *priv, int flags,
+ const struct inode *context_inode,
+ bool secure)
+{
+ struct inode *inode;
+ struct file *file;
if (fops->owner && !try_module_get(fops->owner))
return ERR_PTR(-ENOENT);
- /*
- * We know the anon_inode inode count is always greater than zero,
- * so ihold() is safe.
- */
- ihold(anon_inode_inode);
- file = alloc_file_pseudo(anon_inode_inode, anon_inode_mnt, name,
+ if (secure) {
+ inode = anon_inode_make_secure_inode(name, context_inode);
+ if (IS_ERR(inode)) {
+ file = ERR_CAST(inode);
+ goto err;
+ }
+ } else {
+ inode = anon_inode_inode;
+ if (IS_ERR(inode)) {
+ file = ERR_PTR(-ENODEV);
+ goto err;
+ }
+ /*
+ * We know the anon_inode inode count is always
+ * greater than zero, so ihold() is safe.
+ */
+ ihold(inode);
+ }
+
+ file = alloc_file_pseudo(inode, anon_inode_mnt, name,
flags & (O_ACCMODE | O_NONBLOCK), fops);
if (IS_ERR(file))
- goto err;
+ goto err_iput;
- file->f_mapping = anon_inode_inode->i_mapping;
+ file->f_mapping = inode->i_mapping;
file->private_data = priv;
return file;
+err_iput:
+ iput(inode);
err:
- iput(anon_inode_inode);
module_put(fops->owner);
return file;
}
-EXPORT_SYMBOL_GPL(anon_inode_getfile);
/**
- * anon_inode_getfd - creates a new file instance by hooking it up to an
- * anonymous inode, and a dentry that describe the "class"
- * of the file
+ * anon_inode_getfile - creates a new file instance by hooking it up to an
+ * anonymous inode, and a dentry that describe the "class"
+ * of the file
*
* @name: [in] name of the "class" of the new file
* @fops: [in] file operations for the new file
@@ -118,12 +136,23 @@ EXPORT_SYMBOL_GPL(anon_inode_getfile);
*
* Creates a new file by hooking it on a single inode. This is useful for files
* that do not need to have a full-fledged inode in order to operate correctly.
- * All the files created with anon_inode_getfd() will share a single inode,
+ * All the files created with anon_inode_getfile() will share a single inode,
* hence saving memory and avoiding code duplication for the file/inode/dentry
- * setup. Returns new descriptor or an error code.
+ * setup. Returns the newly created file* or an error pointer.
*/
-int anon_inode_getfd(const char *name, const struct file_operations *fops,
- void *priv, int flags)
+struct file *anon_inode_getfile(const char *name,
+ const struct file_operations *fops,
+ void *priv, int flags)
+{
+ return __anon_inode_getfile(name, fops, priv, flags, NULL, false);
+}
+EXPORT_SYMBOL_GPL(anon_inode_getfile);
+
+static int __anon_inode_getfd(const char *name,
+ const struct file_operations *fops,
+ void *priv, int flags,
+ const struct inode *context_inode,
+ bool secure)
{
int error, fd;
struct file *file;
@@ -133,7 +162,8 @@ int anon_inode_getfd(const char *name, const struct file_operations *fops,
return error;
fd = error;
- file = anon_inode_getfile(name, fops, priv, flags);
+ file = __anon_inode_getfile(name, fops, priv, flags, context_inode,
+ secure);
if (IS_ERR(file)) {
error = PTR_ERR(file);
goto err_put_unused_fd;
@@ -146,8 +176,55 @@ err_put_unused_fd:
put_unused_fd(fd);
return error;
}
+
+/**
+ * anon_inode_getfd - creates a new file instance by hooking it up to
+ * an anonymous inode and a dentry that describe
+ * the "class" of the file
+ *
+ * @name: [in] name of the "class" of the new file
+ * @fops: [in] file operations for the new file
+ * @priv: [in] private data for the new file (will be file's private_data)
+ * @flags: [in] flags
+ *
+ * Creates a new file by hooking it on a single inode. This is
+ * useful for files that do not need to have a full-fledged inode in
+ * order to operate correctly. All the files created with
+ * anon_inode_getfd() will use the same singleton inode, reducing
+ * memory use and avoiding code duplication for the file/inode/dentry
+ * setup. Returns a newly created file descriptor or an error code.
+ */
+int anon_inode_getfd(const char *name, const struct file_operations *fops,
+ void *priv, int flags)
+{
+ return __anon_inode_getfd(name, fops, priv, flags, NULL, false);
+}
EXPORT_SYMBOL_GPL(anon_inode_getfd);
+/**
+ * anon_inode_getfd_secure - Like anon_inode_getfd(), but creates a new
+ * !S_PRIVATE anon inode rather than reuse the singleton anon inode, and calls
+ * the inode_init_security_anon() LSM hook. This allows the inode to have its
+ * own security context and for a LSM to reject creation of the inode.
+ *
+ * @name: [in] name of the "class" of the new file
+ * @fops: [in] file operations for the new file
+ * @priv: [in] private data for the new file (will be file's private_data)
+ * @flags: [in] flags
+ * @context_inode:
+ * [in] the logical relationship with the new inode (optional)
+ *
+ * The LSM may use @context_inode in inode_init_security_anon(), but a
+ * reference to it is not held.
+ */
+int anon_inode_getfd_secure(const char *name, const struct file_operations *fops,
+ void *priv, int flags,
+ const struct inode *context_inode)
+{
+ return __anon_inode_getfd(name, fops, priv, flags, context_inode, true);
+}
+EXPORT_SYMBOL_GPL(anon_inode_getfd_secure);
+
static int __init anon_inode_init(void)
{
anon_inode_mnt = kern_mount(&anon_inode_fs_type);
diff --git a/fs/attr.c b/fs/attr.c
index b4bbdbd4c8ca..87ef39db1c34 100644
--- a/fs/attr.c
+++ b/fs/attr.c
@@ -18,27 +18,55 @@
#include <linux/evm.h>
#include <linux/ima.h>
-static bool chown_ok(const struct inode *inode, kuid_t uid)
+/**
+ * chown_ok - verify permissions to chown inode
+ * @mnt_userns: user namespace of the mount @inode was found from
+ * @inode: inode to check permissions on
+ * @uid: uid to chown @inode to
+ *
+ * If the inode has been found through an idmapped mount the user namespace of
+ * the vfsmount must be passed through @mnt_userns. This function will then
+ * take care to map the inode according to @mnt_userns before checking
+ * permissions. On non-idmapped mounts or if permission checking is to be
+ * performed on the raw inode simply passs init_user_ns.
+ */
+static bool chown_ok(struct user_namespace *mnt_userns,
+ const struct inode *inode,
+ kuid_t uid)
{
- if (uid_eq(current_fsuid(), inode->i_uid) &&
- uid_eq(uid, inode->i_uid))
+ kuid_t kuid = i_uid_into_mnt(mnt_userns, inode);
+ if (uid_eq(current_fsuid(), kuid) && uid_eq(uid, kuid))
return true;
- if (capable_wrt_inode_uidgid(inode, CAP_CHOWN))
+ if (capable_wrt_inode_uidgid(mnt_userns, inode, CAP_CHOWN))
return true;
- if (uid_eq(inode->i_uid, INVALID_UID) &&
+ if (uid_eq(kuid, INVALID_UID) &&
ns_capable(inode->i_sb->s_user_ns, CAP_CHOWN))
return true;
return false;
}
-static bool chgrp_ok(const struct inode *inode, kgid_t gid)
+/**
+ * chgrp_ok - verify permissions to chgrp inode
+ * @mnt_userns: user namespace of the mount @inode was found from
+ * @inode: inode to check permissions on
+ * @gid: gid to chown @inode to
+ *
+ * If the inode has been found through an idmapped mount the user namespace of
+ * the vfsmount must be passed through @mnt_userns. This function will then
+ * take care to map the inode according to @mnt_userns before checking
+ * permissions. On non-idmapped mounts or if permission checking is to be
+ * performed on the raw inode simply passs init_user_ns.
+ */
+static bool chgrp_ok(struct user_namespace *mnt_userns,
+ const struct inode *inode, kgid_t gid)
{
- if (uid_eq(current_fsuid(), inode->i_uid) &&
- (in_group_p(gid) || gid_eq(gid, inode->i_gid)))
+ kgid_t kgid = i_gid_into_mnt(mnt_userns, inode);
+ if (uid_eq(current_fsuid(), i_uid_into_mnt(mnt_userns, inode)) &&
+ (in_group_p(gid) || gid_eq(gid, kgid)))
return true;
- if (capable_wrt_inode_uidgid(inode, CAP_CHOWN))
+ if (capable_wrt_inode_uidgid(mnt_userns, inode, CAP_CHOWN))
return true;
- if (gid_eq(inode->i_gid, INVALID_GID) &&
+ if (gid_eq(kgid, INVALID_GID) &&
ns_capable(inode->i_sb->s_user_ns, CAP_CHOWN))
return true;
return false;
@@ -46,6 +74,7 @@ static bool chgrp_ok(const struct inode *inode, kgid_t gid)
/**
* setattr_prepare - check if attribute changes to a dentry are allowed
+ * @mnt_userns: user namespace of the mount the inode was found from
* @dentry: dentry to check
* @attr: attributes to change
*
@@ -55,10 +84,17 @@ static bool chgrp_ok(const struct inode *inode, kgid_t gid)
* SGID bit from mode if user is not allowed to set it. Also file capabilities
* and IMA extended attributes are cleared if ATTR_KILL_PRIV is set.
*
+ * If the inode has been found through an idmapped mount the user namespace of
+ * the vfsmount must be passed through @mnt_userns. This function will then
+ * take care to map the inode according to @mnt_userns before checking
+ * permissions. On non-idmapped mounts or if permission checking is to be
+ * performed on the raw inode simply passs init_user_ns.
+ *
* Should be called as the first thing in ->setattr implementations,
* possibly after taking additional locks.
*/
-int setattr_prepare(struct dentry *dentry, struct iattr *attr)
+int setattr_prepare(struct user_namespace *mnt_userns, struct dentry *dentry,
+ struct iattr *attr)
{
struct inode *inode = d_inode(dentry);
unsigned int ia_valid = attr->ia_valid;
@@ -78,27 +114,27 @@ int setattr_prepare(struct dentry *dentry, struct iattr *attr)
goto kill_priv;
/* Make sure a caller can chown. */
- if ((ia_valid & ATTR_UID) && !chown_ok(inode, attr->ia_uid))
+ if ((ia_valid & ATTR_UID) && !chown_ok(mnt_userns, inode, attr->ia_uid))
return -EPERM;
/* Make sure caller can chgrp. */
- if ((ia_valid & ATTR_GID) && !chgrp_ok(inode, attr->ia_gid))
+ if ((ia_valid & ATTR_GID) && !chgrp_ok(mnt_userns, inode, attr->ia_gid))
return -EPERM;
/* Make sure a caller can chmod. */
if (ia_valid & ATTR_MODE) {
- if (!inode_owner_or_capable(inode))
+ if (!inode_owner_or_capable(mnt_userns, inode))
return -EPERM;
/* Also check the setgid bit! */
- if (!in_group_p((ia_valid & ATTR_GID) ? attr->ia_gid :
- inode->i_gid) &&
- !capable_wrt_inode_uidgid(inode, CAP_FSETID))
+ if (!in_group_p((ia_valid & ATTR_GID) ? attr->ia_gid :
+ i_gid_into_mnt(mnt_userns, inode)) &&
+ !capable_wrt_inode_uidgid(mnt_userns, inode, CAP_FSETID))
attr->ia_mode &= ~S_ISGID;
}
/* Check for setting the inode time. */
if (ia_valid & (ATTR_MTIME_SET | ATTR_ATIME_SET | ATTR_TIMES_SET)) {
- if (!inode_owner_or_capable(inode))
+ if (!inode_owner_or_capable(mnt_userns, inode))
return -EPERM;
}
@@ -107,7 +143,7 @@ kill_priv:
if (ia_valid & ATTR_KILL_PRIV) {
int error;
- error = security_inode_killpriv(dentry);
+ error = security_inode_killpriv(mnt_userns, dentry);
if (error)
return error;
}
@@ -162,20 +198,33 @@ EXPORT_SYMBOL(inode_newsize_ok);
/**
* setattr_copy - copy simple metadata updates into the generic inode
+ * @mnt_userns: user namespace of the mount the inode was found from
* @inode: the inode to be updated
* @attr: the new attributes
*
* setattr_copy must be called with i_mutex held.
*
* setattr_copy updates the inode's metadata with that specified
- * in attr. Noticeably missing is inode size update, which is more complex
+ * in attr on idmapped mounts. If file ownership is changed setattr_copy
+ * doesn't map ia_uid and ia_gid. It will asssume the caller has already
+ * provided the intended values. Necessary permission checks to determine
+ * whether or not the S_ISGID property needs to be removed are performed with
+ * the correct idmapped mount permission helpers.
+ * Noticeably missing is inode size update, which is more complex
* as it requires pagecache updates.
*
+ * If the inode has been found through an idmapped mount the user namespace of
+ * the vfsmount must be passed through @mnt_userns. This function will then
+ * take care to map the inode according to @mnt_userns before checking
+ * permissions. On non-idmapped mounts or if permission checking is to be
+ * performed on the raw inode simply passs init_user_ns.
+ *
* The inode is not marked as dirty after this operation. The rationale is
* that for "simple" filesystems, the struct inode is the inode storage.
* The caller is free to mark the inode dirty afterwards if needed.
*/
-void setattr_copy(struct inode *inode, const struct iattr *attr)
+void setattr_copy(struct user_namespace *mnt_userns, struct inode *inode,
+ const struct iattr *attr)
{
unsigned int ia_valid = attr->ia_valid;
@@ -191,9 +240,9 @@ void setattr_copy(struct inode *inode, const struct iattr *attr)
inode->i_ctime = attr->ia_ctime;
if (ia_valid & ATTR_MODE) {
umode_t mode = attr->ia_mode;
-
- if (!in_group_p(inode->i_gid) &&
- !capable_wrt_inode_uidgid(inode, CAP_FSETID))
+ kgid_t kgid = i_gid_into_mnt(mnt_userns, inode);
+ if (!in_group_p(kgid) &&
+ !capable_wrt_inode_uidgid(mnt_userns, inode, CAP_FSETID))
mode &= ~S_ISGID;
inode->i_mode = mode;
}
@@ -202,6 +251,7 @@ EXPORT_SYMBOL(setattr_copy);
/**
* notify_change - modify attributes of a filesytem object
+ * @mnt_userns: user namespace of the mount the inode was found from
* @dentry: object affected
* @attr: new attributes
* @delegated_inode: returns inode, if the inode is delegated
@@ -214,13 +264,23 @@ EXPORT_SYMBOL(setattr_copy);
* retry. Because breaking a delegation may take a long time, the
* caller should drop the i_mutex before doing so.
*
+ * If file ownership is changed notify_change() doesn't map ia_uid and
+ * ia_gid. It will asssume the caller has already provided the intended values.
+ *
* Alternatively, a caller may pass NULL for delegated_inode. This may
* be appropriate for callers that expect the underlying filesystem not
* to be NFS exported. Also, passing NULL is fine for callers holding
* the file open for write, as there can be no conflicting delegation in
* that case.
+ *
+ * If the inode has been found through an idmapped mount the user namespace of
+ * the vfsmount must be passed through @mnt_userns. This function will then
+ * take care to map the inode according to @mnt_userns before checking
+ * permissions. On non-idmapped mounts or if permission checking is to be
+ * performed on the raw inode simply passs init_user_ns.
*/
-int notify_change(struct dentry * dentry, struct iattr * attr, struct inode **delegated_inode)
+int notify_change(struct user_namespace *mnt_userns, struct dentry *dentry,
+ struct iattr *attr, struct inode **delegated_inode)
{
struct inode *inode = dentry->d_inode;
umode_t mode = inode->i_mode;
@@ -243,8 +303,8 @@ int notify_change(struct dentry * dentry, struct iattr * attr, struct inode **de
if (IS_IMMUTABLE(inode))
return -EPERM;
- if (!inode_owner_or_capable(inode)) {
- error = inode_permission(inode, MAY_WRITE);
+ if (!inode_owner_or_capable(mnt_userns, inode)) {
+ error = inode_permission(mnt_userns, inode, MAY_WRITE);
if (error)
return error;
}
@@ -320,9 +380,11 @@ int notify_change(struct dentry * dentry, struct iattr * attr, struct inode **de
/* Don't allow modifications of files with invalid uids or
* gids unless those uids & gids are being made valid.
*/
- if (!(ia_valid & ATTR_UID) && !uid_valid(inode->i_uid))
+ if (!(ia_valid & ATTR_UID) &&
+ !uid_valid(i_uid_into_mnt(mnt_userns, inode)))
return -EOVERFLOW;
- if (!(ia_valid & ATTR_GID) && !gid_valid(inode->i_gid))
+ if (!(ia_valid & ATTR_GID) &&
+ !gid_valid(i_gid_into_mnt(mnt_userns, inode)))
return -EOVERFLOW;
error = security_inode_setattr(dentry, attr);
@@ -333,13 +395,13 @@ int notify_change(struct dentry * dentry, struct iattr * attr, struct inode **de
return error;
if (inode->i_op->setattr)
- error = inode->i_op->setattr(dentry, attr);
+ error = inode->i_op->setattr(mnt_userns, dentry, attr);
else
- error = simple_setattr(dentry, attr);
+ error = simple_setattr(mnt_userns, dentry, attr);
if (!error) {
fsnotify_change(dentry, ia_valid);
- ima_inode_post_setattr(dentry);
+ ima_inode_post_setattr(mnt_userns, dentry);
evm_inode_post_setattr(dentry, ia_valid);
}
diff --git a/fs/autofs/root.c b/fs/autofs/root.c
index 5aaa1732bf1e..91fe4548c256 100644
--- a/fs/autofs/root.c
+++ b/fs/autofs/root.c
@@ -10,10 +10,12 @@
#include "autofs_i.h"
-static int autofs_dir_symlink(struct inode *, struct dentry *, const char *);
+static int autofs_dir_symlink(struct user_namespace *, struct inode *,
+ struct dentry *, const char *);
static int autofs_dir_unlink(struct inode *, struct dentry *);
static int autofs_dir_rmdir(struct inode *, struct dentry *);
-static int autofs_dir_mkdir(struct inode *, struct dentry *, umode_t);
+static int autofs_dir_mkdir(struct user_namespace *, struct inode *,
+ struct dentry *, umode_t);
static long autofs_root_ioctl(struct file *, unsigned int, unsigned long);
#ifdef CONFIG_COMPAT
static long autofs_root_compat_ioctl(struct file *,
@@ -524,9 +526,9 @@ static struct dentry *autofs_lookup(struct inode *dir,
return NULL;
}
-static int autofs_dir_symlink(struct inode *dir,
- struct dentry *dentry,
- const char *symname)
+static int autofs_dir_symlink(struct user_namespace *mnt_userns,
+ struct inode *dir, struct dentry *dentry,
+ const char *symname)
{
struct autofs_sb_info *sbi = autofs_sbi(dir->i_sb);
struct autofs_info *ino = autofs_dentry_ino(dentry);
@@ -715,8 +717,9 @@ static int autofs_dir_rmdir(struct inode *dir, struct dentry *dentry)
return 0;
}
-static int autofs_dir_mkdir(struct inode *dir,
- struct dentry *dentry, umode_t mode)
+static int autofs_dir_mkdir(struct user_namespace *mnt_userns,
+ struct inode *dir, struct dentry *dentry,
+ umode_t mode)
{
struct autofs_sb_info *sbi = autofs_sbi(dir->i_sb);
struct autofs_info *ino = autofs_dentry_ino(dentry);
diff --git a/fs/bad_inode.c b/fs/bad_inode.c
index 54f0ce444272..48e16144c1f7 100644
--- a/fs/bad_inode.c
+++ b/fs/bad_inode.c
@@ -27,8 +27,9 @@ static const struct file_operations bad_file_ops =
.open = bad_file_open,
};
-static int bad_inode_create (struct inode *dir, struct dentry *dentry,
- umode_t mode, bool excl)
+static int bad_inode_create(struct user_namespace *mnt_userns,
+ struct inode *dir, struct dentry *dentry,
+ umode_t mode, bool excl)
{
return -EIO;
}
@@ -50,14 +51,15 @@ static int bad_inode_unlink(struct inode *dir, struct dentry *dentry)
return -EIO;
}
-static int bad_inode_symlink (struct inode *dir, struct dentry *dentry,
- const char *symname)
+static int bad_inode_symlink(struct user_namespace *mnt_userns,
+ struct inode *dir, struct dentry *dentry,
+ const char *symname)
{
return -EIO;
}
-static int bad_inode_mkdir(struct inode *dir, struct dentry *dentry,
- umode_t mode)
+static int bad_inode_mkdir(struct user_namespace *mnt_userns, struct inode *dir,
+ struct dentry *dentry, umode_t mode)
{
return -EIO;
}
@@ -67,13 +69,14 @@ static int bad_inode_rmdir (struct inode *dir, struct dentry *dentry)
return -EIO;
}
-static int bad_inode_mknod (struct inode *dir, struct dentry *dentry,
- umode_t mode, dev_t rdev)
+static int bad_inode_mknod(struct user_namespace *mnt_userns, struct inode *dir,
+ struct dentry *dentry, umode_t mode, dev_t rdev)
{
return -EIO;
}
-static int bad_inode_rename2(struct inode *old_dir, struct dentry *old_dentry,
+static int bad_inode_rename2(struct user_namespace *mnt_userns,
+ struct inode *old_dir, struct dentry *old_dentry,
struct inode *new_dir, struct dentry *new_dentry,
unsigned int flags)
{
@@ -86,18 +89,21 @@ static int bad_inode_readlink(struct dentry *dentry, char __user *buffer,
return -EIO;
}
-static int bad_inode_permission(struct inode *inode, int mask)
+static int bad_inode_permission(struct user_namespace *mnt_userns,
+ struct inode *inode, int mask)
{
return -EIO;
}
-static int bad_inode_getattr(const struct path *path, struct kstat *stat,
+static int bad_inode_getattr(struct user_namespace *mnt_userns,
+ const struct path *path, struct kstat *stat,
u32 request_mask, unsigned int query_flags)
{
return -EIO;
}
-static int bad_inode_setattr(struct dentry *direntry, struct iattr *attrs)
+static int bad_inode_setattr(struct user_namespace *mnt_userns,
+ struct dentry *direntry, struct iattr *attrs)
{
return -EIO;
}
@@ -140,13 +146,15 @@ static int bad_inode_atomic_open(struct inode *inode, struct dentry *dentry,
return -EIO;
}
-static int bad_inode_tmpfile(struct inode *inode, struct dentry *dentry,
+static int bad_inode_tmpfile(struct user_namespace *mnt_userns,
+ struct inode *inode, struct dentry *dentry,
umode_t mode)
{
return -EIO;
}
-static int bad_inode_set_acl(struct inode *inode, struct posix_acl *acl,
+static int bad_inode_set_acl(struct user_namespace *mnt_userns,
+ struct inode *inode, struct posix_acl *acl,
int type)
{
return -EIO;
diff --git a/fs/bfs/dir.c b/fs/bfs/dir.c
index d8dfe3a0cb39..34d4f68f786b 100644
--- a/fs/bfs/dir.c
+++ b/fs/bfs/dir.c
@@ -75,8 +75,8 @@ const struct file_operations bfs_dir_operations = {
.llseek = generic_file_llseek,
};
-static int bfs_create(struct inode *dir, struct dentry *dentry, umode_t mode,
- bool excl)
+static int bfs_create(struct user_namespace *mnt_userns, struct inode *dir,
+ struct dentry *dentry, umode_t mode, bool excl)
{
int err;
struct inode *inode;
@@ -96,7 +96,7 @@ static int bfs_create(struct inode *dir, struct dentry *dentry, umode_t mode,
}
set_bit(ino, info->si_imap);
info->si_freei--;
- inode_init_owner(inode, dir, mode);
+ inode_init_owner(&init_user_ns, inode, dir, mode);
inode->i_mtime = inode->i_atime = inode->i_ctime = current_time(inode);
inode->i_blocks = 0;
inode->i_op = &bfs_file_inops;
@@ -199,9 +199,9 @@ out_brelse:
return error;
}
-static int bfs_rename(struct inode *old_dir, struct dentry *old_dentry,
- struct inode *new_dir, struct dentry *new_dentry,
- unsigned int flags)
+static int bfs_rename(struct user_namespace *mnt_userns, struct inode *old_dir,
+ struct dentry *old_dentry, struct inode *new_dir,
+ struct dentry *new_dentry, unsigned int flags)
{
struct inode *old_inode, *new_inode;
struct buffer_head *old_bh, *new_bh;
diff --git a/fs/block_dev.c b/fs/block_dev.c
index ec26179c8062..91ff9bfd7c1a 100644
--- a/fs/block_dev.c
+++ b/fs/block_dev.c
@@ -1270,7 +1270,7 @@ rescan:
return ret;
}
/*
- * Only exported for for loop and dasd for historic reasons. Don't use in new
+ * Only exported for loop and dasd for historic reasons. Don't use in new
* code!
*/
EXPORT_SYMBOL_GPL(bdev_disk_changed);
diff --git a/fs/btrfs/acl.c b/fs/btrfs/acl.c
index a0af1b952c4d..d95eb5c8cb37 100644
--- a/fs/btrfs/acl.c
+++ b/fs/btrfs/acl.c
@@ -107,13 +107,15 @@ out:
return ret;
}
-int btrfs_set_acl(struct inode *inode, struct posix_acl *acl, int type)
+int btrfs_set_acl(struct user_namespace *mnt_userns, struct inode *inode,
+ struct posix_acl *acl, int type)
{
int ret;
umode_t old_mode = inode->i_mode;
if (type == ACL_TYPE_ACCESS && acl) {
- ret = posix_acl_update_mode(inode, &inode->i_mode, &acl);
+ ret = posix_acl_update_mode(&init_user_ns, inode,
+ &inode->i_mode, &acl);
if (ret)
return ret;
}
diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h
index 3bc00aed13b2..bd659354d043 100644
--- a/fs/btrfs/ctree.h
+++ b/fs/btrfs/ctree.h
@@ -3635,7 +3635,8 @@ static inline int __btrfs_fs_compat_ro(struct btrfs_fs_info *fs_info, u64 flag)
/* acl.c */
#ifdef CONFIG_BTRFS_FS_POSIX_ACL
struct posix_acl *btrfs_get_acl(struct inode *inode, int type);
-int btrfs_set_acl(struct inode *inode, struct posix_acl *acl, int type);
+int btrfs_set_acl(struct user_namespace *mnt_userns, struct inode *inode,
+ struct posix_acl *acl, int type);
int btrfs_init_acl(struct btrfs_trans_handle *trans,
struct inode *inode, struct inode *dir);
#else
diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c
index be9e3900cce8..bf2c51a9607a 100644
--- a/fs/btrfs/file.c
+++ b/fs/btrfs/file.c
@@ -3634,7 +3634,7 @@ static ssize_t btrfs_file_read_iter(struct kiocb *iocb, struct iov_iter *to)
return ret;
}
- return generic_file_buffered_read(iocb, to, ret);
+ return filemap_read(iocb, to, ret);
}
const struct file_operations btrfs_file_operations = {
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index 535abf898225..2e1c282c202d 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -5212,7 +5212,8 @@ static int btrfs_setsize(struct inode *inode, struct iattr *attr)
return ret;
}
-static int btrfs_setattr(struct dentry *dentry, struct iattr *attr)
+static int btrfs_setattr(struct user_namespace *mnt_userns, struct dentry *dentry,
+ struct iattr *attr)
{
struct inode *inode = d_inode(dentry);
struct btrfs_root *root = BTRFS_I(inode)->root;
@@ -5221,7 +5222,7 @@ static int btrfs_setattr(struct dentry *dentry, struct iattr *attr)
if (btrfs_root_readonly(root))
return -EROFS;
- err = setattr_prepare(dentry, attr);
+ err = setattr_prepare(&init_user_ns, dentry, attr);
if (err)
return err;
@@ -5232,12 +5233,13 @@ static int btrfs_setattr(struct dentry *dentry, struct iattr *attr)
}
if (attr->ia_valid) {
- setattr_copy(inode, attr);
+ setattr_copy(&init_user_ns, inode, attr);
inode_inc_iversion(inode);
err = btrfs_dirty_inode(inode);
if (!err && attr->ia_valid & ATTR_MODE)
- err = posix_acl_chmod(inode, inode->i_mode);
+ err = posix_acl_chmod(&init_user_ns, inode,
+ inode->i_mode);
}
return err;
@@ -6357,7 +6359,7 @@ static struct inode *btrfs_new_inode(struct btrfs_trans_handle *trans,
if (ret != 0)
goto fail_unlock;
- inode_init_owner(inode, dir, mode);
+ inode_init_owner(&init_user_ns, inode, dir, mode);
inode_set_bytes(inode, 0);
inode->i_mtime = current_time(inode);
@@ -6518,8 +6520,8 @@ static int btrfs_add_nondir(struct btrfs_trans_handle *trans,
return err;
}
-static int btrfs_mknod(struct inode *dir, struct dentry *dentry,
- umode_t mode, dev_t rdev)
+static int btrfs_mknod(struct user_namespace *mnt_userns, struct inode *dir,
+ struct dentry *dentry, umode_t mode, dev_t rdev)
{
struct btrfs_fs_info *fs_info = btrfs_sb(dir->i_sb);
struct btrfs_trans_handle *trans;
@@ -6582,8 +6584,8 @@ out_unlock:
return err;
}
-static int btrfs_create(struct inode *dir, struct dentry *dentry,
- umode_t mode, bool excl)
+static int btrfs_create(struct user_namespace *mnt_userns, struct inode *dir,
+ struct dentry *dentry, umode_t mode, bool excl)
{
struct btrfs_fs_info *fs_info = btrfs_sb(dir->i_sb);
struct btrfs_trans_handle *trans;
@@ -6727,7 +6729,8 @@ fail:
return err;
}
-static int btrfs_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode)
+static int btrfs_mkdir(struct user_namespace *mnt_userns, struct inode *dir,
+ struct dentry *dentry, umode_t mode)
{
struct btrfs_fs_info *fs_info = btrfs_sb(dir->i_sb);
struct inode *inode = NULL;
@@ -9017,7 +9020,8 @@ fail:
return -ENOMEM;
}
-static int btrfs_getattr(const struct path *path, struct kstat *stat,
+static int btrfs_getattr(struct user_namespace *mnt_userns,
+ const struct path *path, struct kstat *stat,
u32 request_mask, unsigned int flags)
{
u64 delalloc_bytes;
@@ -9043,7 +9047,7 @@ static int btrfs_getattr(const struct path *path, struct kstat *stat,
STATX_ATTR_IMMUTABLE |
STATX_ATTR_NODUMP);
- generic_fillattr(inode, stat);
+ generic_fillattr(&init_user_ns, inode, stat);
stat->dev = BTRFS_I(inode)->root->anon_dev;
spin_lock(&BTRFS_I(inode)->lock);
@@ -9534,9 +9538,9 @@ out_notrans:
return ret;
}
-static int btrfs_rename2(struct inode *old_dir, struct dentry *old_dentry,
- struct inode *new_dir, struct dentry *new_dentry,
- unsigned int flags)
+static int btrfs_rename2(struct user_namespace *mnt_userns, struct inode *old_dir,
+ struct dentry *old_dentry, struct inode *new_dir,
+ struct dentry *new_dentry, unsigned int flags)
{
if (flags & ~(RENAME_NOREPLACE | RENAME_EXCHANGE | RENAME_WHITEOUT))
return -EINVAL;
@@ -9744,8 +9748,8 @@ out:
return ret;
}
-static int btrfs_symlink(struct inode *dir, struct dentry *dentry,
- const char *symname)
+static int btrfs_symlink(struct user_namespace *mnt_userns, struct inode *dir,
+ struct dentry *dentry, const char *symname)
{
struct btrfs_fs_info *fs_info = btrfs_sb(dir->i_sb);
struct btrfs_trans_handle *trans;
@@ -10079,7 +10083,8 @@ static int btrfs_set_page_dirty(struct page *page)
return __set_page_dirty_nobuffers(page);
}
-static int btrfs_permission(struct inode *inode, int mask)
+static int btrfs_permission(struct user_namespace *mnt_userns,
+ struct inode *inode, int mask)
{
struct btrfs_root *root = BTRFS_I(inode)->root;
umode_t mode = inode->i_mode;
@@ -10091,10 +10096,11 @@ static int btrfs_permission(struct inode *inode, int mask)
if (BTRFS_I(inode)->flags & BTRFS_INODE_READONLY)
return -EACCES;
}
- return generic_permission(inode, mask);
+ return generic_permission(&init_user_ns, inode, mask);
}
-static int btrfs_tmpfile(struct inode *dir, struct dentry *dentry, umode_t mode)
+static int btrfs_tmpfile(struct user_namespace *mnt_userns, struct inode *dir,
+ struct dentry *dentry, umode_t mode)
{
struct btrfs_fs_info *fs_info = btrfs_sb(dir->i_sb);
struct btrfs_trans_handle *trans;
diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c
index a8c60d46d19c..072e77726e94 100644
--- a/fs/btrfs/ioctl.c
+++ b/fs/btrfs/ioctl.c
@@ -213,7 +213,7 @@ static int btrfs_ioctl_setflags(struct file *file, void __user *arg)
const char *comp = NULL;
u32 binode_flags;
- if (!inode_owner_or_capable(inode))
+ if (!inode_owner_or_capable(&init_user_ns, inode))
return -EPERM;
if (btrfs_root_readonly(root))
@@ -429,7 +429,7 @@ static int btrfs_ioctl_fssetxattr(struct file *file, void __user *arg)
unsigned old_i_flags;
int ret = 0;
- if (!inode_owner_or_capable(inode))
+ if (!inode_owner_or_capable(&init_user_ns, inode))
return -EPERM;
if (btrfs_root_readonly(root))
@@ -925,13 +925,14 @@ static int btrfs_may_delete(struct inode *dir, struct dentry *victim, int isdir)
BUG_ON(d_inode(victim->d_parent) != dir);
audit_inode_child(dir, victim, AUDIT_TYPE_CHILD_DELETE);
- error = inode_permission(dir, MAY_WRITE | MAY_EXEC);
+ error = inode_permission(&init_user_ns, dir, MAY_WRITE | MAY_EXEC);
if (error)
return error;
if (IS_APPEND(dir))
return -EPERM;
- if (check_sticky(dir, d_inode(victim)) || IS_APPEND(d_inode(victim)) ||
- IS_IMMUTABLE(d_inode(victim)) || IS_SWAPFILE(d_inode(victim)))
+ if (check_sticky(&init_user_ns, dir, d_inode(victim)) ||
+ IS_APPEND(d_inode(victim)) || IS_IMMUTABLE(d_inode(victim)) ||
+ IS_SWAPFILE(d_inode(victim)))
return -EPERM;
if (isdir) {
if (!d_is_dir(victim))
@@ -954,7 +955,7 @@ static inline int btrfs_may_create(struct inode *dir, struct dentry *child)
return -EEXIST;
if (IS_DEADDIR(dir))
return -ENOENT;
- return inode_permission(dir, MAY_WRITE | MAY_EXEC);
+ return inode_permission(&init_user_ns, dir, MAY_WRITE | MAY_EXEC);
}
/*
@@ -1871,7 +1872,7 @@ static noinline int __btrfs_ioctl_snap_create(struct file *file,
btrfs_info(BTRFS_I(file_inode(file))->root->fs_info,
"Snapshot src from another FS");
ret = -EXDEV;
- } else if (!inode_owner_or_capable(src_inode)) {
+ } else if (!inode_owner_or_capable(&init_user_ns, src_inode)) {
/*
* Subvolume creation is not restricted, but snapshots
* are limited to own subvolumes only
@@ -1991,7 +1992,7 @@ static noinline int btrfs_ioctl_subvol_setflags(struct file *file,
u64 flags;
int ret = 0;
- if (!inode_owner_or_capable(inode))
+ if (!inode_owner_or_capable(&init_user_ns, inode))
return -EPERM;
ret = mnt_want_write_file(file);
@@ -2547,7 +2548,8 @@ static int btrfs_search_path_in_tree_user(struct inode *inode,
ret = PTR_ERR(temp_inode);
goto out_put;
}
- ret = inode_permission(temp_inode, MAY_READ | MAY_EXEC);
+ ret = inode_permission(&init_user_ns, temp_inode,
+ MAY_READ | MAY_EXEC);
iput(temp_inode);
if (ret) {
ret = -EACCES;
@@ -3077,7 +3079,8 @@ static noinline int btrfs_ioctl_snap_destroy(struct file *file,
if (root == dest)
goto out_dput;
- err = inode_permission(inode, MAY_WRITE | MAY_EXEC);
+ err = inode_permission(&init_user_ns, inode,
+ MAY_WRITE | MAY_EXEC);
if (err)
goto out_dput;
}
@@ -3148,7 +3151,7 @@ static int btrfs_ioctl_defrag(struct file *file, void __user *argp)
* running and allows defrag on files open in read-only mode.
*/
if (!capable(CAP_SYS_ADMIN) &&
- inode_permission(inode, MAY_WRITE)) {
+ inode_permission(&init_user_ns, inode, MAY_WRITE)) {
ret = -EPERM;
goto out;
}
@@ -4460,7 +4463,7 @@ static long _btrfs_ioctl_set_received_subvol(struct file *file,
int ret = 0;
int received_uuid_changed;
- if (!inode_owner_or_capable(inode))
+ if (!inode_owner_or_capable(&init_user_ns, inode))
return -EPERM;
ret = mnt_want_write_file(file);
diff --git a/fs/btrfs/tests/btrfs-tests.c b/fs/btrfs/tests/btrfs-tests.c
index 6bd97bd4cb37..3a4099a2bf05 100644
--- a/fs/btrfs/tests/btrfs-tests.c
+++ b/fs/btrfs/tests/btrfs-tests.c
@@ -62,7 +62,7 @@ struct inode *btrfs_new_test_inode(void)
BTRFS_I(inode)->location.type = BTRFS_INODE_ITEM_KEY;
BTRFS_I(inode)->location.objectid = BTRFS_FIRST_FREE_OBJECTID;
BTRFS_I(inode)->location.offset = 0;
- inode_init_owner(inode, NULL, S_IFREG);
+ inode_init_owner(&init_user_ns, inode, NULL, S_IFREG);
return inode;
}
diff --git a/fs/btrfs/xattr.c b/fs/btrfs/xattr.c
index af6246f36a9e..b025102e435f 100644
--- a/fs/btrfs/xattr.c
+++ b/fs/btrfs/xattr.c
@@ -362,6 +362,7 @@ static int btrfs_xattr_handler_get(const struct xattr_handler *handler,
}
static int btrfs_xattr_handler_set(const struct xattr_handler *handler,
+ struct user_namespace *mnt_userns,
struct dentry *unused, struct inode *inode,
const char *name, const void *buffer,
size_t size, int flags)
@@ -371,6 +372,7 @@ static int btrfs_xattr_handler_set(const struct xattr_handler *handler,
}
static int btrfs_xattr_handler_set_prop(const struct xattr_handler *handler,
+ struct user_namespace *mnt_userns,
struct dentry *unused, struct inode *inode,
const char *name, const void *value,
size_t size, int flags)
diff --git a/fs/buffer.c b/fs/buffer.c
index 32647d2011df..0cb7ffd4977c 100644
--- a/fs/buffer.c
+++ b/fs/buffer.c
@@ -847,7 +847,8 @@ struct buffer_head *alloc_page_buffers(struct page *page, unsigned long size,
if (retry)
gfp |= __GFP_NOFAIL;
- memcg = get_mem_cgroup_from_page(page);
+ /* The page lock pins the memcg */
+ memcg = page_memcg(page);
old_memcg = set_active_memcg(memcg);
head = NULL;
@@ -868,7 +869,6 @@ struct buffer_head *alloc_page_buffers(struct page *page, unsigned long size,
}
out:
set_active_memcg(old_memcg);
- mem_cgroup_put(memcg);
return head;
/*
* In case anything failed, we just free everything we got.
@@ -2083,7 +2083,8 @@ static int __block_commit_write(struct inode *inode, struct page *page,
set_buffer_uptodate(bh);
mark_buffer_dirty(bh);
}
- clear_buffer_new(bh);
+ if (buffer_new(bh))
+ clear_buffer_new(bh);
block_start = block_end;
bh = bh->b_this_page;
diff --git a/fs/cachefiles/interface.c b/fs/cachefiles/interface.c
index 4cea5fbf695e..5efa6a3702c0 100644
--- a/fs/cachefiles/interface.c
+++ b/fs/cachefiles/interface.c
@@ -470,14 +470,14 @@ static int cachefiles_attr_changed(struct fscache_object *_object)
_debug("discard tail %llx", oi_size);
newattrs.ia_valid = ATTR_SIZE;
newattrs.ia_size = oi_size & PAGE_MASK;
- ret = notify_change(object->backer, &newattrs, NULL);
+ ret = notify_change(&init_user_ns, object->backer, &newattrs, NULL);
if (ret < 0)
goto truncate_failed;
}
newattrs.ia_valid = ATTR_SIZE;
newattrs.ia_size = ni_size;
- ret = notify_change(object->backer, &newattrs, NULL);
+ ret = notify_change(&init_user_ns, object->backer, &newattrs, NULL);
truncate_failed:
inode_unlock(d_inode(object->backer));
diff --git a/fs/cachefiles/namei.c b/fs/cachefiles/namei.c
index ecc8ecbbfa5a..7bf0732ae25c 100644
--- a/fs/cachefiles/namei.c
+++ b/fs/cachefiles/namei.c
@@ -311,7 +311,8 @@ static int cachefiles_bury_object(struct cachefiles_cache *cache,
cachefiles_io_error(cache, "Unlink security error");
} else {
trace_cachefiles_unlink(object, rep, why);
- ret = vfs_unlink(d_inode(dir), rep, NULL);
+ ret = vfs_unlink(&init_user_ns, d_inode(dir), rep,
+ NULL);
if (preemptive)
cachefiles_mark_object_buried(cache, rep, why);
@@ -412,9 +413,16 @@ try_again:
if (ret < 0) {
cachefiles_io_error(cache, "Rename security error %d", ret);
} else {
+ struct renamedata rd = {
+ .old_mnt_userns = &init_user_ns,
+ .old_dir = d_inode(dir),
+ .old_dentry = rep,
+ .new_mnt_userns = &init_user_ns,
+ .new_dir = d_inode(cache->graveyard),
+ .new_dentry = grave,
+ };
trace_cachefiles_rename(object, rep, grave, why);
- ret = vfs_rename(d_inode(dir), rep,
- d_inode(cache->graveyard), grave, NULL, 0);
+ ret = vfs_rename(&rd);
if (ret != 0 && ret != -ENOMEM)
cachefiles_io_error(cache,
"Rename failed with error %d", ret);
@@ -561,7 +569,7 @@ lookup_again:
if (ret < 0)
goto create_error;
start = jiffies;
- ret = vfs_mkdir(d_inode(dir), next, 0);
+ ret = vfs_mkdir(&init_user_ns, d_inode(dir), next, 0);
cachefiles_hist(cachefiles_mkdir_histogram, start);
if (!key)
trace_cachefiles_mkdir(object, next, ret);
@@ -597,7 +605,8 @@ lookup_again:
if (ret < 0)
goto create_error;
start = jiffies;
- ret = vfs_create(d_inode(dir), next, S_IFREG, true);
+ ret = vfs_create(&init_user_ns, d_inode(dir), next,
+ S_IFREG, true);
cachefiles_hist(cachefiles_create_histogram, start);
trace_cachefiles_create(object, next, ret);
if (ret < 0)
@@ -791,7 +800,7 @@ retry:
ret = security_path_mkdir(&path, subdir, 0700);
if (ret < 0)
goto mkdir_error;
- ret = vfs_mkdir(d_inode(dir), subdir, 0700);
+ ret = vfs_mkdir(&init_user_ns, d_inode(dir), subdir, 0700);
if (ret < 0)
goto mkdir_error;
diff --git a/fs/cachefiles/xattr.c b/fs/cachefiles/xattr.c
index 72e42438f3d7..a591b5e09637 100644
--- a/fs/cachefiles/xattr.c
+++ b/fs/cachefiles/xattr.c
@@ -39,8 +39,8 @@ int cachefiles_check_object_type(struct cachefiles_object *object)
_enter("%p{%s}", object, type);
/* attempt to install a type label directly */
- ret = vfs_setxattr(dentry, cachefiles_xattr_cache, type, 2,
- XATTR_CREATE);
+ ret = vfs_setxattr(&init_user_ns, dentry, cachefiles_xattr_cache, type,
+ 2, XATTR_CREATE);
if (ret == 0) {
_debug("SET"); /* we succeeded */
goto error;
@@ -54,7 +54,8 @@ int cachefiles_check_object_type(struct cachefiles_object *object)
}
/* read the current type label */
- ret = vfs_getxattr(dentry, cachefiles_xattr_cache, xtype, 3);
+ ret = vfs_getxattr(&init_user_ns, dentry, cachefiles_xattr_cache, xtype,
+ 3);
if (ret < 0) {
if (ret == -ERANGE)
goto bad_type_length;
@@ -110,9 +111,8 @@ int cachefiles_set_object_xattr(struct cachefiles_object *object,
_debug("SET #%u", auxdata->len);
clear_bit(FSCACHE_COOKIE_AUX_UPDATED, &object->fscache.cookie->flags);
- ret = vfs_setxattr(dentry, cachefiles_xattr_cache,
- &auxdata->type, auxdata->len,
- XATTR_CREATE);
+ ret = vfs_setxattr(&init_user_ns, dentry, cachefiles_xattr_cache,
+ &auxdata->type, auxdata->len, XATTR_CREATE);
if (ret < 0 && ret != -ENOMEM)
cachefiles_io_error_obj(
object,
@@ -140,9 +140,8 @@ int cachefiles_update_object_xattr(struct cachefiles_object *object,
_debug("SET #%u", auxdata->len);
clear_bit(FSCACHE_COOKIE_AUX_UPDATED, &object->fscache.cookie->flags);
- ret = vfs_setxattr(dentry, cachefiles_xattr_cache,
- &auxdata->type, auxdata->len,
- XATTR_REPLACE);
+ ret = vfs_setxattr(&init_user_ns, dentry, cachefiles_xattr_cache,
+ &auxdata->type, auxdata->len, XATTR_REPLACE);
if (ret < 0 && ret != -ENOMEM)
cachefiles_io_error_obj(
object,
@@ -171,7 +170,7 @@ int cachefiles_check_auxdata(struct cachefiles_object *object)
if (!auxbuf)
return -ENOMEM;
- xlen = vfs_getxattr(dentry, cachefiles_xattr_cache,
+ xlen = vfs_getxattr(&init_user_ns, dentry, cachefiles_xattr_cache,
&auxbuf->type, 512 + 1);
ret = -ESTALE;
if (xlen < 1 ||
@@ -213,7 +212,7 @@ int cachefiles_check_object_xattr(struct cachefiles_object *object,
}
/* read the current type label */
- ret = vfs_getxattr(dentry, cachefiles_xattr_cache,
+ ret = vfs_getxattr(&init_user_ns, dentry, cachefiles_xattr_cache,
&auxbuf->type, 512 + 1);
if (ret < 0) {
if (ret == -ENODATA)
@@ -270,9 +269,9 @@ int cachefiles_check_object_xattr(struct cachefiles_object *object,
}
/* update the current label */
- ret = vfs_setxattr(dentry, cachefiles_xattr_cache,
- &auxdata->type, auxdata->len,
- XATTR_REPLACE);
+ ret = vfs_setxattr(&init_user_ns, dentry,
+ cachefiles_xattr_cache, &auxdata->type,
+ auxdata->len, XATTR_REPLACE);
if (ret < 0) {
cachefiles_io_error_obj(object,
"Can't update xattr on %lu"
@@ -309,7 +308,7 @@ int cachefiles_remove_object_xattr(struct cachefiles_cache *cache,
{
int ret;
- ret = vfs_removexattr(dentry, cachefiles_xattr_cache);
+ ret = vfs_removexattr(&init_user_ns, dentry, cachefiles_xattr_cache);
if (ret < 0) {
if (ret == -ENOENT || ret == -ENODATA)
ret = 0;
diff --git a/fs/ceph/acl.c b/fs/ceph/acl.c
index e0465741c591..529af59d9fd3 100644
--- a/fs/ceph/acl.c
+++ b/fs/ceph/acl.c
@@ -82,7 +82,8 @@ retry:
return acl;
}
-int ceph_set_acl(struct inode *inode, struct posix_acl *acl, int type)
+int ceph_set_acl(struct user_namespace *mnt_userns, struct inode *inode,
+ struct posix_acl *acl, int type)
{
int ret = 0, size = 0;
const char *name = NULL;
@@ -100,7 +101,8 @@ int ceph_set_acl(struct inode *inode, struct posix_acl *acl, int type)
case ACL_TYPE_ACCESS:
name = XATTR_NAME_POSIX_ACL_ACCESS;
if (acl) {
- ret = posix_acl_update_mode(inode, &new_mode, &acl);
+ ret = posix_acl_update_mode(&init_user_ns, inode,
+ &new_mode, &acl);
if (ret)
goto out;
}
diff --git a/fs/ceph/addr.c b/fs/ceph/addr.c
index 950552944436..26e66436f005 100644
--- a/fs/ceph/addr.c
+++ b/fs/ceph/addr.c
@@ -1662,7 +1662,7 @@ static vm_fault_t ceph_page_mkwrite(struct vm_fault *vmf)
dout("page_mkwrite %p %llu~%zd dropping cap refs on %s ret %x\n",
inode, off, len, ceph_cap_string(got), ret);
- ceph_put_cap_refs(ci, got);
+ ceph_put_cap_refs_async(ci, got);
out_free:
ceph_restore_sigs(&oldset);
sb_end_pagefault(inode->i_sb);
diff --git a/fs/ceph/caps.c b/fs/ceph/caps.c
index 255a512f1277..570731c4d019 100644
--- a/fs/ceph/caps.c
+++ b/fs/ceph/caps.c
@@ -3027,6 +3027,12 @@ static int ceph_try_drop_cap_snap(struct ceph_inode_info *ci,
return 0;
}
+enum put_cap_refs_mode {
+ PUT_CAP_REFS_SYNC = 0,
+ PUT_CAP_REFS_NO_CHECK,
+ PUT_CAP_REFS_ASYNC,
+};
+
/*
* Release cap refs.
*
@@ -3037,10 +3043,11 @@ static int ceph_try_drop_cap_snap(struct ceph_inode_info *ci,
* cap_snap, and wake up any waiters.
*/
static void __ceph_put_cap_refs(struct ceph_inode_info *ci, int had,
- bool skip_checking_caps)
+ enum put_cap_refs_mode mode)
{
struct inode *inode = &ci->vfs_inode;
int last = 0, put = 0, flushsnaps = 0, wake = 0;
+ bool check_flushsnaps = false;
spin_lock(&ci->i_ceph_lock);
if (had & CEPH_CAP_PIN)
@@ -3057,26 +3064,17 @@ static void __ceph_put_cap_refs(struct ceph_inode_info *ci, int had,
if (had & CEPH_CAP_FILE_BUFFER) {
if (--ci->i_wb_ref == 0) {
last++;
+ /* put the ref held by ceph_take_cap_refs() */
put++;
+ check_flushsnaps = true;
}
dout("put_cap_refs %p wb %d -> %d (?)\n",
inode, ci->i_wb_ref+1, ci->i_wb_ref);
}
- if (had & CEPH_CAP_FILE_WR)
+ if (had & CEPH_CAP_FILE_WR) {
if (--ci->i_wr_ref == 0) {
last++;
- if (__ceph_have_pending_cap_snap(ci)) {
- struct ceph_cap_snap *capsnap =
- list_last_entry(&ci->i_cap_snaps,
- struct ceph_cap_snap,
- ci_item);
- capsnap->writing = 0;
- if (ceph_try_drop_cap_snap(ci, capsnap))
- put++;
- else if (__ceph_finish_cap_snap(ci, capsnap))
- flushsnaps = 1;
- wake = 1;
- }
+ check_flushsnaps = true;
if (ci->i_wrbuffer_ref_head == 0 &&
ci->i_dirty_caps == 0 &&
ci->i_flushing_caps == 0) {
@@ -3088,15 +3086,42 @@ static void __ceph_put_cap_refs(struct ceph_inode_info *ci, int had,
if (!__ceph_is_any_real_caps(ci) && ci->i_snap_realm)
drop_inode_snap_realm(ci);
}
+ }
+ if (check_flushsnaps && __ceph_have_pending_cap_snap(ci)) {
+ struct ceph_cap_snap *capsnap =
+ list_last_entry(&ci->i_cap_snaps,
+ struct ceph_cap_snap,
+ ci_item);
+
+ capsnap->writing = 0;
+ if (ceph_try_drop_cap_snap(ci, capsnap))
+ /* put the ref held by ceph_queue_cap_snap() */
+ put++;
+ else if (__ceph_finish_cap_snap(ci, capsnap))
+ flushsnaps = 1;
+ wake = 1;
+ }
spin_unlock(&ci->i_ceph_lock);
dout("put_cap_refs %p had %s%s%s\n", inode, ceph_cap_string(had),
last ? " last" : "", put ? " put" : "");
- if (last && !skip_checking_caps)
- ceph_check_caps(ci, 0, NULL);
- else if (flushsnaps)
- ceph_flush_snaps(ci, NULL);
+ switch (mode) {
+ case PUT_CAP_REFS_SYNC:
+ if (last)
+ ceph_check_caps(ci, 0, NULL);
+ else if (flushsnaps)
+ ceph_flush_snaps(ci, NULL);
+ break;
+ case PUT_CAP_REFS_ASYNC:
+ if (last)
+ ceph_queue_check_caps(inode);
+ else if (flushsnaps)
+ ceph_queue_flush_snaps(inode);
+ break;
+ default:
+ break;
+ }
if (wake)
wake_up_all(&ci->i_cap_wq);
while (put-- > 0)
@@ -3105,12 +3130,17 @@ static void __ceph_put_cap_refs(struct ceph_inode_info *ci, int had,
void ceph_put_cap_refs(struct ceph_inode_info *ci, int had)
{
- __ceph_put_cap_refs(ci, had, false);
+ __ceph_put_cap_refs(ci, had, PUT_CAP_REFS_SYNC);
+}
+
+void ceph_put_cap_refs_async(struct ceph_inode_info *ci, int had)
+{
+ __ceph_put_cap_refs(ci, had, PUT_CAP_REFS_ASYNC);
}
void ceph_put_cap_refs_no_check_caps(struct ceph_inode_info *ci, int had)
{
- __ceph_put_cap_refs(ci, had, true);
+ __ceph_put_cap_refs(ci, had, PUT_CAP_REFS_NO_CHECK);
}
/*
diff --git a/fs/ceph/dir.c b/fs/ceph/dir.c
index 858ee7362ff5..83d9358854fb 100644
--- a/fs/ceph/dir.c
+++ b/fs/ceph/dir.c
@@ -823,8 +823,8 @@ int ceph_handle_notrace_create(struct inode *dir, struct dentry *dentry)
return PTR_ERR(result);
}
-static int ceph_mknod(struct inode *dir, struct dentry *dentry,
- umode_t mode, dev_t rdev)
+static int ceph_mknod(struct user_namespace *mnt_userns, struct inode *dir,
+ struct dentry *dentry, umode_t mode, dev_t rdev)
{
struct ceph_mds_client *mdsc = ceph_sb_to_mdsc(dir->i_sb);
struct ceph_mds_request *req;
@@ -878,14 +878,14 @@ out:
return err;
}
-static int ceph_create(struct inode *dir, struct dentry *dentry, umode_t mode,
- bool excl)
+static int ceph_create(struct user_namespace *mnt_userns, struct inode *dir,
+ struct dentry *dentry, umode_t mode, bool excl)
{
- return ceph_mknod(dir, dentry, mode, 0);
+ return ceph_mknod(mnt_userns, dir, dentry, mode, 0);
}
-static int ceph_symlink(struct inode *dir, struct dentry *dentry,
- const char *dest)
+static int ceph_symlink(struct user_namespace *mnt_userns, struct inode *dir,
+ struct dentry *dentry, const char *dest)
{
struct ceph_mds_client *mdsc = ceph_sb_to_mdsc(dir->i_sb);
struct ceph_mds_request *req;
@@ -937,7 +937,8 @@ out:
return err;
}
-static int ceph_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode)
+static int ceph_mkdir(struct user_namespace *mnt_userns, struct inode *dir,
+ struct dentry *dentry, umode_t mode)
{
struct ceph_mds_client *mdsc = ceph_sb_to_mdsc(dir->i_sb);
struct ceph_mds_request *req;
@@ -1183,9 +1184,9 @@ out:
return err;
}
-static int ceph_rename(struct inode *old_dir, struct dentry *old_dentry,
- struct inode *new_dir, struct dentry *new_dentry,
- unsigned int flags)
+static int ceph_rename(struct user_namespace *mnt_userns, struct inode *old_dir,
+ struct dentry *old_dentry, struct inode *new_dir,
+ struct dentry *new_dentry, unsigned int flags)
{
struct ceph_mds_client *mdsc = ceph_sb_to_mdsc(old_dir->i_sb);
struct ceph_mds_request *req;
diff --git a/fs/ceph/inode.c b/fs/ceph/inode.c
index adc8fc3c5d85..156f849f5385 100644
--- a/fs/ceph/inode.c
+++ b/fs/ceph/inode.c
@@ -1816,60 +1816,17 @@ void ceph_async_iput(struct inode *inode)
}
}
-/*
- * Write back inode data in a worker thread. (This can't be done
- * in the message handler context.)
- */
-void ceph_queue_writeback(struct inode *inode)
-{
- struct ceph_inode_info *ci = ceph_inode(inode);
- set_bit(CEPH_I_WORK_WRITEBACK, &ci->i_work_mask);
-
- ihold(inode);
- if (queue_work(ceph_inode_to_client(inode)->inode_wq,
- &ci->i_work)) {
- dout("ceph_queue_writeback %p\n", inode);
- } else {
- dout("ceph_queue_writeback %p already queued, mask=%lx\n",
- inode, ci->i_work_mask);
- iput(inode);
- }
-}
-
-/*
- * queue an async invalidation
- */
-void ceph_queue_invalidate(struct inode *inode)
-{
- struct ceph_inode_info *ci = ceph_inode(inode);
- set_bit(CEPH_I_WORK_INVALIDATE_PAGES, &ci->i_work_mask);
-
- ihold(inode);
- if (queue_work(ceph_inode_to_client(inode)->inode_wq,
- &ceph_inode(inode)->i_work)) {
- dout("ceph_queue_invalidate %p\n", inode);
- } else {
- dout("ceph_queue_invalidate %p already queued, mask=%lx\n",
- inode, ci->i_work_mask);
- iput(inode);
- }
-}
-
-/*
- * Queue an async vmtruncate. If we fail to queue work, we will handle
- * the truncation the next time we call __ceph_do_pending_vmtruncate.
- */
-void ceph_queue_vmtruncate(struct inode *inode)
+void ceph_queue_inode_work(struct inode *inode, int work_bit)
{
+ struct ceph_fs_client *fsc = ceph_inode_to_client(inode);
struct ceph_inode_info *ci = ceph_inode(inode);
- set_bit(CEPH_I_WORK_VMTRUNCATE, &ci->i_work_mask);
+ set_bit(work_bit, &ci->i_work_mask);
ihold(inode);
- if (queue_work(ceph_inode_to_client(inode)->inode_wq,
- &ci->i_work)) {
- dout("ceph_queue_vmtruncate %p\n", inode);
+ if (queue_work(fsc->inode_wq, &ci->i_work)) {
+ dout("queue_inode_work %p, mask=%lx\n", inode, ci->i_work_mask);
} else {
- dout("ceph_queue_vmtruncate %p already queued, mask=%lx\n",
+ dout("queue_inode_work %p already queued, mask=%lx\n",
inode, ci->i_work_mask);
iput(inode);
}
@@ -2008,6 +1965,12 @@ static void ceph_inode_work(struct work_struct *work)
if (test_and_clear_bit(CEPH_I_WORK_VMTRUNCATE, &ci->i_work_mask))
__ceph_do_pending_vmtruncate(inode);
+ if (test_and_clear_bit(CEPH_I_WORK_CHECK_CAPS, &ci->i_work_mask))
+ ceph_check_caps(ci, 0, NULL);
+
+ if (test_and_clear_bit(CEPH_I_WORK_FLUSH_SNAPS, &ci->i_work_mask))
+ ceph_flush_snaps(ci, NULL);
+
iput(inode);
}
@@ -2238,7 +2201,8 @@ int __ceph_setattr(struct inode *inode, struct iattr *attr)
/*
* setattr
*/
-int ceph_setattr(struct dentry *dentry, struct iattr *attr)
+int ceph_setattr(struct user_namespace *mnt_userns, struct dentry *dentry,
+ struct iattr *attr)
{
struct inode *inode = d_inode(dentry);
struct ceph_fs_client *fsc = ceph_inode_to_client(inode);
@@ -2247,7 +2211,7 @@ int ceph_setattr(struct dentry *dentry, struct iattr *attr)
if (ceph_snap(inode) != CEPH_NOSNAP)
return -EROFS;
- err = setattr_prepare(dentry, attr);
+ err = setattr_prepare(&init_user_ns, dentry, attr);
if (err != 0)
return err;
@@ -2262,7 +2226,7 @@ int ceph_setattr(struct dentry *dentry, struct iattr *attr)
err = __ceph_setattr(inode, attr);
if (err >= 0 && (attr->ia_valid & ATTR_MODE))
- err = posix_acl_chmod(inode, attr->ia_mode);
+ err = posix_acl_chmod(&init_user_ns, inode, attr->ia_mode);
return err;
}
@@ -2321,7 +2285,8 @@ int __ceph_do_getattr(struct inode *inode, struct page *locked_page,
* Check inode permissions. We verify we have a valid value for
* the AUTH cap, then call the generic handler.
*/
-int ceph_permission(struct inode *inode, int mask)
+int ceph_permission(struct user_namespace *mnt_userns, struct inode *inode,
+ int mask)
{
int err;
@@ -2331,7 +2296,7 @@ int ceph_permission(struct inode *inode, int mask)
err = ceph_do_getattr(inode, CEPH_CAP_AUTH_SHARED, false);
if (!err)
- err = generic_permission(inode, mask);
+ err = generic_permission(&init_user_ns, inode, mask);
return err;
}
@@ -2368,8 +2333,8 @@ static int statx_to_caps(u32 want, umode_t mode)
* Get all the attributes. If we have sufficient caps for the requested attrs,
* then we can avoid talking to the MDS at all.
*/
-int ceph_getattr(const struct path *path, struct kstat *stat,
- u32 request_mask, unsigned int flags)
+int ceph_getattr(struct user_namespace *mnt_userns, const struct path *path,
+ struct kstat *stat, u32 request_mask, unsigned int flags)
{
struct inode *inode = d_inode(path->dentry);
struct ceph_inode_info *ci = ceph_inode(inode);
@@ -2385,7 +2350,7 @@ int ceph_getattr(const struct path *path, struct kstat *stat,
return err;
}
- generic_fillattr(inode, stat);
+ generic_fillattr(&init_user_ns, inode, stat);
stat->ino = ceph_present_inode(inode);
/*
diff --git a/fs/ceph/snap.c b/fs/ceph/snap.c
index b611f829cb61..0728b01d4d43 100644
--- a/fs/ceph/snap.c
+++ b/fs/ceph/snap.c
@@ -623,6 +623,16 @@ int __ceph_finish_cap_snap(struct ceph_inode_info *ci,
return 0;
}
+ /* Fb cap still in use, delay it */
+ if (ci->i_wb_ref) {
+ dout("finish_cap_snap %p cap_snap %p snapc %p %llu %s s=%llu "
+ "used WRBUFFER, delaying\n", inode, capsnap,
+ capsnap->context, capsnap->context->seq,
+ ceph_cap_string(capsnap->dirty), capsnap->size);
+ capsnap->writing = 1;
+ return 0;
+ }
+
ci->i_ceph_flags |= CEPH_I_FLUSH_SNAPS;
dout("finish_cap_snap %p cap_snap %p snapc %p %llu %s s=%llu\n",
inode, capsnap, capsnap->context,
diff --git a/fs/ceph/super.h b/fs/ceph/super.h
index b62d8fee3b86..c48bb30c8d70 100644
--- a/fs/ceph/super.h
+++ b/fs/ceph/super.h
@@ -562,9 +562,11 @@ static inline struct inode *ceph_find_inode(struct super_block *sb,
/*
* Masks of ceph inode work.
*/
-#define CEPH_I_WORK_WRITEBACK 0 /* writeback */
-#define CEPH_I_WORK_INVALIDATE_PAGES 1 /* invalidate pages */
-#define CEPH_I_WORK_VMTRUNCATE 2 /* vmtruncate */
+#define CEPH_I_WORK_WRITEBACK 0
+#define CEPH_I_WORK_INVALIDATE_PAGES 1
+#define CEPH_I_WORK_VMTRUNCATE 2
+#define CEPH_I_WORK_CHECK_CAPS 3
+#define CEPH_I_WORK_FLUSH_SNAPS 4
/*
* We set the ERROR_WRITE bit when we start seeing write errors on an inode
@@ -962,21 +964,49 @@ extern int ceph_inode_holds_cap(struct inode *inode, int mask);
extern bool ceph_inode_set_size(struct inode *inode, loff_t size);
extern void __ceph_do_pending_vmtruncate(struct inode *inode);
-extern void ceph_queue_vmtruncate(struct inode *inode);
-extern void ceph_queue_invalidate(struct inode *inode);
-extern void ceph_queue_writeback(struct inode *inode);
+
extern void ceph_async_iput(struct inode *inode);
+void ceph_queue_inode_work(struct inode *inode, int work_bit);
+
+static inline void ceph_queue_vmtruncate(struct inode *inode)
+{
+ ceph_queue_inode_work(inode, CEPH_I_WORK_VMTRUNCATE);
+}
+
+static inline void ceph_queue_invalidate(struct inode *inode)
+{
+ ceph_queue_inode_work(inode, CEPH_I_WORK_INVALIDATE_PAGES);
+}
+
+static inline void ceph_queue_writeback(struct inode *inode)
+{
+ ceph_queue_inode_work(inode, CEPH_I_WORK_WRITEBACK);
+}
+
+static inline void ceph_queue_check_caps(struct inode *inode)
+{
+ ceph_queue_inode_work(inode, CEPH_I_WORK_CHECK_CAPS);
+}
+
+static inline void ceph_queue_flush_snaps(struct inode *inode)
+{
+ ceph_queue_inode_work(inode, CEPH_I_WORK_FLUSH_SNAPS);
+}
+
extern int __ceph_do_getattr(struct inode *inode, struct page *locked_page,
int mask, bool force);
static inline int ceph_do_getattr(struct inode *inode, int mask, bool force)
{
return __ceph_do_getattr(inode, NULL, mask, force);
}
-extern int ceph_permission(struct inode *inode, int mask);
+extern int ceph_permission(struct user_namespace *mnt_userns,
+ struct inode *inode, int mask);
extern int __ceph_setattr(struct inode *inode, struct iattr *attr);
-extern int ceph_setattr(struct dentry *dentry, struct iattr *attr);
-extern int ceph_getattr(const struct path *path, struct kstat *stat,
+extern int ceph_setattr(struct user_namespace *mnt_userns,
+ struct dentry *dentry, struct iattr *attr);
+extern int ceph_getattr(struct user_namespace *mnt_userns,
+ const struct path *path, struct kstat *stat,
u32 request_mask, unsigned int flags);
/* xattr.c */
@@ -1037,7 +1067,8 @@ void ceph_release_acl_sec_ctx(struct ceph_acl_sec_ctx *as_ctx);
#ifdef CONFIG_CEPH_FS_POSIX_ACL
struct posix_acl *ceph_get_acl(struct inode *, int);
-int ceph_set_acl(struct inode *inode, struct posix_acl *acl, int type);
+int ceph_set_acl(struct user_namespace *mnt_userns,
+ struct inode *inode, struct posix_acl *acl, int type);
int ceph_pre_init_acls(struct inode *dir, umode_t *mode,
struct ceph_acl_sec_ctx *as_ctx);
void ceph_init_inode_acls(struct inode *inode,
@@ -1105,6 +1136,7 @@ extern void ceph_take_cap_refs(struct ceph_inode_info *ci, int caps,
bool snap_rwsem_locked);
extern void ceph_get_cap_refs(struct ceph_inode_info *ci, int caps);
extern void ceph_put_cap_refs(struct ceph_inode_info *ci, int had);
+extern void ceph_put_cap_refs_async(struct ceph_inode_info *ci, int had);
extern void ceph_put_cap_refs_no_check_caps(struct ceph_inode_info *ci,
int had);
extern void ceph_put_wrbuffer_cap_refs(struct ceph_inode_info *ci, int nr,
diff --git a/fs/ceph/xattr.c b/fs/ceph/xattr.c
index 24997982de01..02f59bcb4f27 100644
--- a/fs/ceph/xattr.c
+++ b/fs/ceph/xattr.c
@@ -1238,6 +1238,7 @@ static int ceph_get_xattr_handler(const struct xattr_handler *handler,
}
static int ceph_set_xattr_handler(const struct xattr_handler *handler,
+ struct user_namespace *mnt_userns,
struct dentry *unused, struct inode *inode,
const char *name, const void *value,
size_t size, int flags)
diff --git a/fs/cifs/cifs_debug.c b/fs/cifs/cifs_debug.c
index b231dcf1d1f9..3aedc484e440 100644
--- a/fs/cifs/cifs_debug.c
+++ b/fs/cifs/cifs_debug.c
@@ -133,11 +133,12 @@ cifs_dump_channel(struct seq_file *m, int i, struct cifs_chan *chan)
{
struct TCP_Server_Info *server = chan->server;
- seq_printf(m, "\t\tChannel %d Number of credits: %d Dialect 0x%x "
- "TCP status: %d Instance: %d Local Users To Server: %d "
- "SecMode: 0x%x Req On Wire: %d In Send: %d "
- "In MaxReq Wait: %d\n",
- i+1,
+ seq_printf(m, "\n\n\t\tChannel: %d ConnectionId: 0x%llx"
+ "\n\t\tNumber of credits: %d Dialect 0x%x"
+ "\n\t\tTCP status: %d Instance: %d"
+ "\n\t\tLocal Users To Server: %d SecMode: 0x%x Req On Wire: %d"
+ "\n\t\tIn Send: %d In MaxReq Wait: %d",
+ i+1, server->conn_id,
server->credits,
server->dialect,
server->tcpStatus,
@@ -197,14 +198,14 @@ static int cifs_debug_files_proc_show(struct seq_file *m, void *v)
cfile = list_entry(tmp2, struct cifsFileInfo,
tlist);
seq_printf(m,
- "0x%x 0x%llx 0x%x %d %d %d %s",
+ "0x%x 0x%llx 0x%x %d %d %d %pd",
tcon->tid,
cfile->fid.persistent_fid,
cfile->f_flags,
cfile->count,
cfile->pid,
from_kuid(&init_user_ns, cfile->uid),
- cfile->dentry->d_name.name);
+ cfile->dentry);
#ifdef CONFIG_CIFS_DEBUG2
seq_printf(m, " 0x%llx\n", cfile->fid.mid);
#else
@@ -227,7 +228,7 @@ static int cifs_debug_data_proc_show(struct seq_file *m, void *v)
struct TCP_Server_Info *server;
struct cifs_ses *ses;
struct cifs_tcon *tcon;
- int i, j;
+ int c, i, j;
seq_puts(m,
"Display Internal CIFS Data Structures for Debugging\n"
@@ -275,14 +276,25 @@ static int cifs_debug_data_proc_show(struct seq_file *m, void *v)
seq_putc(m, '\n');
seq_printf(m, "CIFSMaxBufSize: %d\n", CIFSMaxBufSize);
seq_printf(m, "Active VFS Requests: %d\n", GlobalTotalActiveXid);
- seq_printf(m, "Servers:");
- i = 0;
+ seq_printf(m, "\nServers: ");
+
+ c = 0;
spin_lock(&cifs_tcp_ses_lock);
list_for_each(tmp1, &cifs_tcp_ses_list) {
server = list_entry(tmp1, struct TCP_Server_Info,
tcp_ses_list);
+ /* channel info will be printed as a part of sessions below */
+ if (server->is_channel)
+ continue;
+
+ c++;
+ seq_printf(m, "\n%d) ConnectionId: 0x%llx ",
+ c, server->conn_id);
+
+ if (server->hostname)
+ seq_printf(m, "Hostname: %s ", server->hostname);
#ifdef CONFIG_CIFS_SMB_DIRECT
if (!server->rdma)
goto skip_rdma;
@@ -362,46 +374,48 @@ skip_rdma:
if (server->posix_ext_supported)
seq_printf(m, " posix");
- i++;
+ if (server->rdma)
+ seq_printf(m, "\nRDMA ");
+ seq_printf(m, "\nTCP status: %d Instance: %d"
+ "\nLocal Users To Server: %d SecMode: 0x%x Req On Wire: %d",
+ server->tcpStatus,
+ server->reconnect_instance,
+ server->srv_count,
+ server->sec_mode, in_flight(server));
+
+ seq_printf(m, "\nIn Send: %d In MaxReq Wait: %d",
+ atomic_read(&server->in_send),
+ atomic_read(&server->num_waiters));
+
+ seq_printf(m, "\n\n\tSessions: ");
+ i = 0;
list_for_each(tmp2, &server->smb_ses_list) {
ses = list_entry(tmp2, struct cifs_ses,
smb_ses_list);
+ i++;
if ((ses->serverDomain == NULL) ||
(ses->serverOS == NULL) ||
(ses->serverNOS == NULL)) {
- seq_printf(m, "\n%d) Name: %s Uses: %d Capability: 0x%x\tSession Status: %d ",
- i, ses->serverName, ses->ses_count,
+ seq_printf(m, "\n\t%d) Address: %s Uses: %d Capability: 0x%x\tSession Status: %d ",
+ i, ses->ip_addr, ses->ses_count,
ses->capabilities, ses->status);
if (ses->session_flags & SMB2_SESSION_FLAG_IS_GUEST)
- seq_printf(m, "Guest\t");
+ seq_printf(m, "Guest ");
else if (ses->session_flags & SMB2_SESSION_FLAG_IS_NULL)
- seq_printf(m, "Anonymous\t");
+ seq_printf(m, "Anonymous ");
} else {
seq_printf(m,
- "\n%d) Name: %s Domain: %s Uses: %d OS:"
- " %s\n\tNOS: %s\tCapability: 0x%x\n\tSMB"
- " session status: %d ",
- i, ses->serverName, ses->serverDomain,
+ "\n\t%d) Name: %s Domain: %s Uses: %d OS: %s "
+ "\n\tNOS: %s\tCapability: 0x%x"
+ "\n\tSMB session status: %d ",
+ i, ses->ip_addr, ses->serverDomain,
ses->ses_count, ses->serverOS, ses->serverNOS,
ses->capabilities, ses->status);
}
- seq_printf(m,"Security type: %s\n",
+ seq_printf(m, "\n\tSecurity type: %s ",
get_security_type_str(server->ops->select_sectype(server, ses->sectype)));
- if (server->rdma)
- seq_printf(m, "RDMA\n\t");
- seq_printf(m, "TCP status: %d Instance: %d\n\tLocal Users To "
- "Server: %d SecMode: 0x%x Req On Wire: %d",
- server->tcpStatus,
- server->reconnect_instance,
- server->srv_count,
- server->sec_mode, in_flight(server));
-
- seq_printf(m, " In Send: %d In MaxReq Wait: %d",
- atomic_read(&server->in_send),
- atomic_read(&server->num_waiters));
-
/* dump session id helpful for use with network trace */
seq_printf(m, " SessionId: 0x%llx", ses->Suid);
if (ses->session_flags & SMB2_SESSION_FLAG_ENCRYPT_DATA)
@@ -414,13 +428,13 @@ skip_rdma:
from_kuid(&init_user_ns, ses->cred_uid));
if (ses->chan_count > 1) {
- seq_printf(m, "\n\n\tExtra Channels: %zu\n",
+ seq_printf(m, "\n\n\tExtra Channels: %zu ",
ses->chan_count-1);
for (j = 1; j < ses->chan_count; j++)
cifs_dump_channel(m, j, &ses->chans[j]);
}
- seq_puts(m, "\n\n\tShares:");
+ seq_puts(m, "\n\n\tShares: ");
j = 0;
seq_printf(m, "\n\t%d) IPC: ", j);
@@ -437,38 +451,43 @@ skip_rdma:
cifs_debug_tcon(m, tcon);
}
- seq_puts(m, "\n\tMIDs:\n");
-
- spin_lock(&GlobalMid_Lock);
- list_for_each(tmp3, &server->pending_mid_q) {
- mid_entry = list_entry(tmp3, struct mid_q_entry,
- qhead);
- seq_printf(m, "\tState: %d com: %d pid:"
- " %d cbdata: %p mid %llu\n",
- mid_entry->mid_state,
- le16_to_cpu(mid_entry->command),
- mid_entry->pid,
- mid_entry->callback_data,
- mid_entry->mid);
- }
- spin_unlock(&GlobalMid_Lock);
-
spin_lock(&ses->iface_lock);
if (ses->iface_count)
- seq_printf(m, "\n\tServer interfaces: %zu\n",
+ seq_printf(m, "\n\n\tServer interfaces: %zu",
ses->iface_count);
for (j = 0; j < ses->iface_count; j++) {
struct cifs_server_iface *iface;
iface = &ses->iface_list[j];
- seq_printf(m, "\t%d)", j);
+ seq_printf(m, "\n\t%d)", j+1);
cifs_dump_iface(m, iface);
if (is_ses_using_iface(ses, iface))
seq_puts(m, "\t\t[CONNECTED]\n");
}
spin_unlock(&ses->iface_lock);
}
+ if (i == 0)
+ seq_printf(m, "\n\t\t[NONE]");
+
+ seq_puts(m, "\n\n\tMIDs: ");
+ spin_lock(&GlobalMid_Lock);
+ list_for_each(tmp3, &server->pending_mid_q) {
+ mid_entry = list_entry(tmp3, struct mid_q_entry,
+ qhead);
+ seq_printf(m, "\n\tState: %d com: %d pid:"
+ " %d cbdata: %p mid %llu\n",
+ mid_entry->mid_state,
+ le16_to_cpu(mid_entry->command),
+ mid_entry->pid,
+ mid_entry->callback_data,
+ mid_entry->mid);
+ }
+ spin_unlock(&GlobalMid_Lock);
+ seq_printf(m, "\n--\n");
}
+ if (c == 0)
+ seq_printf(m, "\n\t[NONE]");
+
spin_unlock(&cifs_tcp_ses_lock);
seq_putc(m, '\n');
diff --git a/fs/cifs/cifs_swn.c b/fs/cifs/cifs_swn.c
index d35f599aa00e..f2d730fffccb 100644
--- a/fs/cifs/cifs_swn.c
+++ b/fs/cifs/cifs_swn.c
@@ -272,7 +272,7 @@ static struct cifs_swn_reg *cifs_find_swn_reg(struct cifs_tcon *tcon)
if (IS_ERR(share_name)) {
int ret;
- ret = PTR_ERR(net_name);
+ ret = PTR_ERR(share_name);
cifs_dbg(VFS, "%s: failed to extract share name from target '%s': %d\n",
__func__, tcon->treeName, ret);
kfree(net_name);
diff --git a/fs/cifs/cifsacl.c b/fs/cifs/cifsacl.c
index 562913e2b3f2..9d29eb9660c2 100644
--- a/fs/cifs/cifsacl.c
+++ b/fs/cifs/cifsacl.c
@@ -267,10 +267,11 @@ is_well_known_sid(const struct cifs_sid *psid, uint32_t *puid, bool is_group)
return true; /* well known sid found, uid returned */
}
-static void
+static __u16
cifs_copy_sid(struct cifs_sid *dst, const struct cifs_sid *src)
{
int i;
+ __u16 size = 1 + 1 + 6;
dst->revision = src->revision;
dst->num_subauth = min_t(u8, src->num_subauth, SID_MAX_SUB_AUTHORITIES);
@@ -278,6 +279,9 @@ cifs_copy_sid(struct cifs_sid *dst, const struct cifs_sid *src)
dst->authority[i] = src->authority[i];
for (i = 0; i < dst->num_subauth; ++i)
dst->sub_auth[i] = src->sub_auth[i];
+ size += (dst->num_subauth * 4);
+
+ return size;
}
static int
@@ -521,8 +525,11 @@ exit_cifs_idmap(void)
}
/* copy ntsd, owner sid, and group sid from a security descriptor to another */
-static void copy_sec_desc(const struct cifs_ntsd *pntsd,
- struct cifs_ntsd *pnntsd, __u32 sidsoffset)
+static __u32 copy_sec_desc(const struct cifs_ntsd *pntsd,
+ struct cifs_ntsd *pnntsd,
+ __u32 sidsoffset,
+ struct cifs_sid *pownersid,
+ struct cifs_sid *pgrpsid)
{
struct cifs_sid *owner_sid_ptr, *group_sid_ptr;
struct cifs_sid *nowner_sid_ptr, *ngroup_sid_ptr;
@@ -536,19 +543,25 @@ static void copy_sec_desc(const struct cifs_ntsd *pntsd,
pnntsd->gsidoffset = cpu_to_le32(sidsoffset + sizeof(struct cifs_sid));
/* copy owner sid */
- owner_sid_ptr = (struct cifs_sid *)((char *)pntsd +
+ if (pownersid)
+ owner_sid_ptr = pownersid;
+ else
+ owner_sid_ptr = (struct cifs_sid *)((char *)pntsd +
le32_to_cpu(pntsd->osidoffset));
nowner_sid_ptr = (struct cifs_sid *)((char *)pnntsd + sidsoffset);
cifs_copy_sid(nowner_sid_ptr, owner_sid_ptr);
/* copy group sid */
- group_sid_ptr = (struct cifs_sid *)((char *)pntsd +
+ if (pgrpsid)
+ group_sid_ptr = pgrpsid;
+ else
+ group_sid_ptr = (struct cifs_sid *)((char *)pntsd +
le32_to_cpu(pntsd->gsidoffset));
ngroup_sid_ptr = (struct cifs_sid *)((char *)pnntsd + sidsoffset +
sizeof(struct cifs_sid));
cifs_copy_sid(ngroup_sid_ptr, group_sid_ptr);
- return;
+ return sidsoffset + (2 * sizeof(struct cifs_sid));
}
@@ -663,6 +676,25 @@ static void mode_to_access_flags(umode_t mode, umode_t bits_to_use,
return;
}
+static __u16 cifs_copy_ace(struct cifs_ace *dst, struct cifs_ace *src, struct cifs_sid *psid)
+{
+ __u16 size = 1 + 1 + 2 + 4;
+
+ dst->type = src->type;
+ dst->flags = src->flags;
+ dst->access_req = src->access_req;
+
+ /* Check if there's a replacement sid specified */
+ if (psid)
+ size += cifs_copy_sid(&dst->sid, psid);
+ else
+ size += cifs_copy_sid(&dst->sid, &src->sid);
+
+ dst->size = cpu_to_le16(size);
+
+ return size;
+}
+
static __u16 fill_ace_for_sid(struct cifs_ace *pntace,
const struct cifs_sid *psid, __u64 nmode,
umode_t bits, __u8 access_type,
@@ -907,29 +939,30 @@ unsigned int setup_special_user_owner_ACE(struct cifs_ace *pntace)
return ace_size;
}
-static int set_chmod_dacl(struct cifs_acl *pndacl, struct cifs_sid *pownersid,
- struct cifs_sid *pgrpsid, __u64 *pnmode, bool modefromsid)
+static void populate_new_aces(char *nacl_base,
+ struct cifs_sid *pownersid,
+ struct cifs_sid *pgrpsid,
+ __u64 *pnmode, u32 *pnum_aces, u16 *pnsize,
+ bool modefromsid)
{
- u16 size = 0;
- u32 num_aces = 0;
- struct cifs_acl *pnndacl;
__u64 nmode;
+ u32 num_aces = 0;
+ u16 nsize = 0;
__u64 user_mode;
__u64 group_mode;
__u64 other_mode;
__u64 deny_user_mode = 0;
__u64 deny_group_mode = 0;
bool sticky_set = false;
-
- pnndacl = (struct cifs_acl *)((char *)pndacl + sizeof(struct cifs_acl));
+ struct cifs_ace *pnntace = NULL;
nmode = *pnmode;
+ num_aces = *pnum_aces;
+ nsize = *pnsize;
if (modefromsid) {
- struct cifs_ace *pntace =
- (struct cifs_ace *)((char *)pnndacl + size);
-
- size += setup_special_mode_ACE(pntace, nmode);
+ pnntace = (struct cifs_ace *) (nacl_base + nsize);
+ nsize += setup_special_mode_ACE(pnntace, nmode);
num_aces++;
goto set_size;
}
@@ -966,40 +999,170 @@ static int set_chmod_dacl(struct cifs_acl *pndacl, struct cifs_sid *pownersid,
sticky_set = true;
if (deny_user_mode) {
- size += fill_ace_for_sid((struct cifs_ace *)((char *)pnndacl + size),
- pownersid, deny_user_mode, 0700, ACCESS_DENIED, false);
+ pnntace = (struct cifs_ace *) (nacl_base + nsize);
+ nsize += fill_ace_for_sid(pnntace, pownersid, deny_user_mode,
+ 0700, ACCESS_DENIED, false);
num_aces++;
}
+
/* Group DENY ACE does not conflict with owner ALLOW ACE. Keep in preferred order*/
if (deny_group_mode && !(deny_group_mode & (user_mode >> 3))) {
- size += fill_ace_for_sid((struct cifs_ace *)((char *)pnndacl + size),
- pgrpsid, deny_group_mode, 0070, ACCESS_DENIED, false);
+ pnntace = (struct cifs_ace *) (nacl_base + nsize);
+ nsize += fill_ace_for_sid(pnntace, pgrpsid, deny_group_mode,
+ 0070, ACCESS_DENIED, false);
num_aces++;
}
- size += fill_ace_for_sid((struct cifs_ace *) ((char *)pnndacl + size),
- pownersid, user_mode, 0700, ACCESS_ALLOWED, true);
+
+ pnntace = (struct cifs_ace *) (nacl_base + nsize);
+ nsize += fill_ace_for_sid(pnntace, pownersid, user_mode,
+ 0700, ACCESS_ALLOWED, true);
num_aces++;
+
/* Group DENY ACE conflicts with owner ALLOW ACE. So keep it after. */
if (deny_group_mode && (deny_group_mode & (user_mode >> 3))) {
- size += fill_ace_for_sid((struct cifs_ace *)((char *)pnndacl + size),
- pgrpsid, deny_group_mode, 0070, ACCESS_DENIED, false);
+ pnntace = (struct cifs_ace *) (nacl_base + nsize);
+ nsize += fill_ace_for_sid(pnntace, pgrpsid, deny_group_mode,
+ 0070, ACCESS_DENIED, false);
num_aces++;
}
- size += fill_ace_for_sid((struct cifs_ace *)((char *)pnndacl + size),
- pgrpsid, group_mode, 0070, ACCESS_ALLOWED, !sticky_set);
+
+ pnntace = (struct cifs_ace *) (nacl_base + nsize);
+ nsize += fill_ace_for_sid(pnntace, pgrpsid, group_mode,
+ 0070, ACCESS_ALLOWED, !sticky_set);
num_aces++;
- size += fill_ace_for_sid((struct cifs_ace *)((char *)pnndacl + size),
- &sid_everyone, other_mode, 0007, ACCESS_ALLOWED, !sticky_set);
+
+ pnntace = (struct cifs_ace *) (nacl_base + nsize);
+ nsize += fill_ace_for_sid(pnntace, &sid_everyone, other_mode,
+ 0007, ACCESS_ALLOWED, !sticky_set);
num_aces++;
set_size:
+ *pnum_aces = num_aces;
+ *pnsize = nsize;
+}
+
+static __u16 replace_sids_and_copy_aces(struct cifs_acl *pdacl, struct cifs_acl *pndacl,
+ struct cifs_sid *pownersid, struct cifs_sid *pgrpsid,
+ struct cifs_sid *pnownersid, struct cifs_sid *pngrpsid)
+{
+ int i;
+ u16 size = 0;
+ struct cifs_ace *pntace = NULL;
+ char *acl_base = NULL;
+ u32 src_num_aces = 0;
+ u16 nsize = 0;
+ struct cifs_ace *pnntace = NULL;
+ char *nacl_base = NULL;
+ u16 ace_size = 0;
+
+ acl_base = (char *)pdacl;
+ size = sizeof(struct cifs_acl);
+ src_num_aces = le32_to_cpu(pdacl->num_aces);
+
+ nacl_base = (char *)pndacl;
+ nsize = sizeof(struct cifs_acl);
+
+ /* Go through all the ACEs */
+ for (i = 0; i < src_num_aces; ++i) {
+ pntace = (struct cifs_ace *) (acl_base + size);
+ pnntace = (struct cifs_ace *) (nacl_base + nsize);
+
+ if (pnownersid && compare_sids(&pntace->sid, pownersid) == 0)
+ ace_size = cifs_copy_ace(pnntace, pntace, pnownersid);
+ else if (pngrpsid && compare_sids(&pntace->sid, pgrpsid) == 0)
+ ace_size = cifs_copy_ace(pnntace, pntace, pngrpsid);
+ else
+ ace_size = cifs_copy_ace(pnntace, pntace, NULL);
+
+ size += le16_to_cpu(pntace->size);
+ nsize += ace_size;
+ }
+
+ return nsize;
+}
+
+static int set_chmod_dacl(struct cifs_acl *pdacl, struct cifs_acl *pndacl,
+ struct cifs_sid *pownersid, struct cifs_sid *pgrpsid,
+ __u64 *pnmode, bool mode_from_sid)
+{
+ int i;
+ u16 size = 0;
+ struct cifs_ace *pntace = NULL;
+ char *acl_base = NULL;
+ u32 src_num_aces = 0;
+ u16 nsize = 0;
+ struct cifs_ace *pnntace = NULL;
+ char *nacl_base = NULL;
+ u32 num_aces = 0;
+ __u64 nmode;
+ bool new_aces_set = false;
+
+ /* Assuming that pndacl and pnmode are never NULL */
+ nmode = *pnmode;
+ nacl_base = (char *)pndacl;
+ nsize = sizeof(struct cifs_acl);
+
+ /* If pdacl is NULL, we don't have a src. Simply populate new ACL. */
+ if (!pdacl) {
+ populate_new_aces(nacl_base,
+ pownersid, pgrpsid,
+ pnmode, &num_aces, &nsize,
+ mode_from_sid);
+ goto finalize_dacl;
+ }
+
+ acl_base = (char *)pdacl;
+ size = sizeof(struct cifs_acl);
+ src_num_aces = le32_to_cpu(pdacl->num_aces);
+
+ /* Retain old ACEs which we can retain */
+ for (i = 0; i < src_num_aces; ++i) {
+ pntace = (struct cifs_ace *) (acl_base + size);
+ pnntace = (struct cifs_ace *) (nacl_base + nsize);
+
+ if (!new_aces_set && (pntace->flags & INHERITED_ACE)) {
+ /* Place the new ACEs in between existing explicit and inherited */
+ populate_new_aces(nacl_base,
+ pownersid, pgrpsid,
+ pnmode, &num_aces, &nsize,
+ mode_from_sid);
+
+ new_aces_set = true;
+ }
+
+ /* If it's any one of the ACE we're replacing, skip! */
+ if ((compare_sids(&pntace->sid, &sid_unix_NFS_mode) == 0) ||
+ (compare_sids(&pntace->sid, pownersid) == 0) ||
+ (compare_sids(&pntace->sid, pgrpsid) == 0) ||
+ (compare_sids(&pntace->sid, &sid_everyone) == 0) ||
+ (compare_sids(&pntace->sid, &sid_authusers) == 0)) {
+ goto next_ace;
+ }
+
+ nsize += cifs_copy_ace(pnntace, pntace, NULL);
+ num_aces++;
+
+next_ace:
+ size += le16_to_cpu(pntace->size);
+ }
+
+ /* If inherited ACEs are not present, place the new ones at the tail */
+ if (!new_aces_set) {
+ populate_new_aces(nacl_base,
+ pownersid, pgrpsid,
+ pnmode, &num_aces, &nsize,
+ mode_from_sid);
+
+ new_aces_set = true;
+ }
+
+finalize_dacl:
pndacl->num_aces = cpu_to_le32(num_aces);
- pndacl->size = cpu_to_le16(size + sizeof(struct cifs_acl));
+ pndacl->size = cpu_to_le16(nsize);
return 0;
}
-
static int parse_sid(struct cifs_sid *psid, char *end_of_acl)
{
/* BB need to add parm so we can store the SID BB */
@@ -1094,7 +1257,7 @@ static int parse_sec_desc(struct cifs_sb_info *cifs_sb,
/* Convert permission bits from mode to equivalent CIFS ACL */
static int build_sec_desc(struct cifs_ntsd *pntsd, struct cifs_ntsd *pnntsd,
- __u32 secdesclen, __u64 *pnmode, kuid_t uid, kgid_t gid,
+ __u32 secdesclen, __u32 *pnsecdesclen, __u64 *pnmode, kuid_t uid, kgid_t gid,
bool mode_from_sid, bool id_from_sid, int *aclflag)
{
int rc = 0;
@@ -1102,39 +1265,59 @@ static int build_sec_desc(struct cifs_ntsd *pntsd, struct cifs_ntsd *pnntsd,
__u32 ndacloffset;
__u32 sidsoffset;
struct cifs_sid *owner_sid_ptr, *group_sid_ptr;
- struct cifs_sid *nowner_sid_ptr, *ngroup_sid_ptr;
+ struct cifs_sid *nowner_sid_ptr = NULL, *ngroup_sid_ptr = NULL;
struct cifs_acl *dacl_ptr = NULL; /* no need for SACL ptr */
struct cifs_acl *ndacl_ptr = NULL; /* no need for SACL ptr */
+ char *end_of_acl = ((char *)pntsd) + secdesclen;
+ u16 size = 0;
- if (pnmode && *pnmode != NO_CHANGE_64) { /* chmod */
- owner_sid_ptr = (struct cifs_sid *)((char *)pntsd +
- le32_to_cpu(pntsd->osidoffset));
- group_sid_ptr = (struct cifs_sid *)((char *)pntsd +
- le32_to_cpu(pntsd->gsidoffset));
- dacloffset = le32_to_cpu(pntsd->dacloffset);
+ dacloffset = le32_to_cpu(pntsd->dacloffset);
+ if (dacloffset) {
dacl_ptr = (struct cifs_acl *)((char *)pntsd + dacloffset);
+ if (end_of_acl < (char *)dacl_ptr + le16_to_cpu(dacl_ptr->size)) {
+ cifs_dbg(VFS, "Server returned illegal ACL size\n");
+ return -EINVAL;
+ }
+ }
+
+ owner_sid_ptr = (struct cifs_sid *)((char *)pntsd +
+ le32_to_cpu(pntsd->osidoffset));
+ group_sid_ptr = (struct cifs_sid *)((char *)pntsd +
+ le32_to_cpu(pntsd->gsidoffset));
+
+ if (pnmode && *pnmode != NO_CHANGE_64) { /* chmod */
ndacloffset = sizeof(struct cifs_ntsd);
ndacl_ptr = (struct cifs_acl *)((char *)pnntsd + ndacloffset);
- ndacl_ptr->revision = dacl_ptr->revision;
- ndacl_ptr->size = 0;
- ndacl_ptr->num_aces = 0;
+ ndacl_ptr->revision =
+ dacloffset ? dacl_ptr->revision : cpu_to_le16(ACL_REVISION);
- rc = set_chmod_dacl(ndacl_ptr, owner_sid_ptr, group_sid_ptr,
+ ndacl_ptr->size = cpu_to_le16(0);
+ ndacl_ptr->num_aces = cpu_to_le32(0);
+
+ rc = set_chmod_dacl(dacl_ptr, ndacl_ptr, owner_sid_ptr, group_sid_ptr,
pnmode, mode_from_sid);
+
sidsoffset = ndacloffset + le16_to_cpu(ndacl_ptr->size);
- /* copy sec desc control portion & owner and group sids */
- copy_sec_desc(pntsd, pnntsd, sidsoffset);
- *aclflag = CIFS_ACL_DACL;
+ /* copy the non-dacl portion of secdesc */
+ *pnsecdesclen = copy_sec_desc(pntsd, pnntsd, sidsoffset,
+ NULL, NULL);
+
+ *aclflag |= CIFS_ACL_DACL;
} else {
- memcpy(pnntsd, pntsd, secdesclen);
+ ndacloffset = sizeof(struct cifs_ntsd);
+ ndacl_ptr = (struct cifs_acl *)((char *)pnntsd + ndacloffset);
+ ndacl_ptr->revision =
+ dacloffset ? dacl_ptr->revision : cpu_to_le16(ACL_REVISION);
+ ndacl_ptr->num_aces = dacl_ptr->num_aces;
+
if (uid_valid(uid)) { /* chown */
uid_t id;
- owner_sid_ptr = (struct cifs_sid *)((char *)pnntsd +
- le32_to_cpu(pnntsd->osidoffset));
nowner_sid_ptr = kmalloc(sizeof(struct cifs_sid),
GFP_KERNEL);
- if (!nowner_sid_ptr)
- return -ENOMEM;
+ if (!nowner_sid_ptr) {
+ rc = -ENOMEM;
+ goto chown_chgrp_exit;
+ }
id = from_kuid(&init_user_ns, uid);
if (id_from_sid) {
struct owner_sid *osid = (struct owner_sid *)nowner_sid_ptr;
@@ -1145,27 +1328,25 @@ static int build_sec_desc(struct cifs_ntsd *pntsd, struct cifs_ntsd *pnntsd,
osid->SubAuthorities[0] = cpu_to_le32(88);
osid->SubAuthorities[1] = cpu_to_le32(1);
osid->SubAuthorities[2] = cpu_to_le32(id);
+
} else { /* lookup sid with upcall */
rc = id_to_sid(id, SIDOWNER, nowner_sid_ptr);
if (rc) {
cifs_dbg(FYI, "%s: Mapping error %d for owner id %d\n",
__func__, rc, id);
- kfree(nowner_sid_ptr);
- return rc;
+ goto chown_chgrp_exit;
}
}
- cifs_copy_sid(owner_sid_ptr, nowner_sid_ptr);
- kfree(nowner_sid_ptr);
- *aclflag = CIFS_ACL_OWNER;
+ *aclflag |= CIFS_ACL_OWNER;
}
if (gid_valid(gid)) { /* chgrp */
gid_t id;
- group_sid_ptr = (struct cifs_sid *)((char *)pnntsd +
- le32_to_cpu(pnntsd->gsidoffset));
ngroup_sid_ptr = kmalloc(sizeof(struct cifs_sid),
GFP_KERNEL);
- if (!ngroup_sid_ptr)
- return -ENOMEM;
+ if (!ngroup_sid_ptr) {
+ rc = -ENOMEM;
+ goto chown_chgrp_exit;
+ }
id = from_kgid(&init_user_ns, gid);
if (id_from_sid) {
struct owner_sid *gsid = (struct owner_sid *)ngroup_sid_ptr;
@@ -1176,19 +1357,35 @@ static int build_sec_desc(struct cifs_ntsd *pntsd, struct cifs_ntsd *pnntsd,
gsid->SubAuthorities[0] = cpu_to_le32(88);
gsid->SubAuthorities[1] = cpu_to_le32(2);
gsid->SubAuthorities[2] = cpu_to_le32(id);
+
} else { /* lookup sid with upcall */
rc = id_to_sid(id, SIDGROUP, ngroup_sid_ptr);
if (rc) {
cifs_dbg(FYI, "%s: Mapping error %d for group id %d\n",
__func__, rc, id);
- kfree(ngroup_sid_ptr);
- return rc;
+ goto chown_chgrp_exit;
}
}
- cifs_copy_sid(group_sid_ptr, ngroup_sid_ptr);
- kfree(ngroup_sid_ptr);
- *aclflag = CIFS_ACL_GROUP;
+ *aclflag |= CIFS_ACL_GROUP;
+ }
+
+ if (dacloffset) {
+ /* Replace ACEs for old owner with new one */
+ size = replace_sids_and_copy_aces(dacl_ptr, ndacl_ptr,
+ owner_sid_ptr, group_sid_ptr,
+ nowner_sid_ptr, ngroup_sid_ptr);
+ ndacl_ptr->size = cpu_to_le16(size);
}
+
+ sidsoffset = ndacloffset + le16_to_cpu(ndacl_ptr->size);
+ /* copy the non-dacl portion of secdesc */
+ *pnsecdesclen = copy_sec_desc(pntsd, pnntsd, sidsoffset,
+ nowner_sid_ptr, ngroup_sid_ptr);
+
+chown_chgrp_exit:
+ /* errors could jump here. So make sure we return soon after this */
+ kfree(nowner_sid_ptr);
+ kfree(ngroup_sid_ptr);
}
return rc;
@@ -1384,6 +1581,9 @@ id_mode_to_cifs_acl(struct inode *inode, const char *path, __u64 *pnmode,
int rc = 0;
int aclflag = CIFS_ACL_DACL; /* default flag to set */
__u32 secdesclen = 0;
+ __u32 nsecdesclen = 0;
+ __u32 dacloffset = 0;
+ struct cifs_acl *dacl_ptr = NULL;
struct cifs_ntsd *pntsd = NULL; /* acl obtained from server */
struct cifs_ntsd *pnntsd = NULL; /* modified acl to be sent to server */
struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb);
@@ -1414,31 +1614,52 @@ id_mode_to_cifs_acl(struct inode *inode, const char *path, __u64 *pnmode,
return rc;
}
+ if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_MODE_FROM_SID)
+ mode_from_sid = true;
+ else
+ mode_from_sid = false;
+
+ if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_UID_FROM_ACL)
+ id_from_sid = true;
+ else
+ id_from_sid = false;
+
+ /* Potentially, five new ACEs can be added to the ACL for U,G,O mapping */
+ nsecdesclen = secdesclen;
+ if (pnmode && *pnmode != NO_CHANGE_64) { /* chmod */
+ if (mode_from_sid)
+ nsecdesclen += sizeof(struct cifs_ace);
+ else /* cifsacl */
+ nsecdesclen += 5 * sizeof(struct cifs_ace);
+ } else { /* chown */
+ /* When ownership changes, changes new owner sid length could be different */
+ nsecdesclen = sizeof(struct cifs_ntsd) + (sizeof(struct cifs_sid) * 2);
+ dacloffset = le32_to_cpu(pntsd->dacloffset);
+ if (dacloffset) {
+ dacl_ptr = (struct cifs_acl *)((char *)pntsd + dacloffset);
+ if (mode_from_sid)
+ nsecdesclen +=
+ le32_to_cpu(dacl_ptr->num_aces) * sizeof(struct cifs_ace);
+ else /* cifsacl */
+ nsecdesclen += le16_to_cpu(dacl_ptr->size);
+ }
+ }
+
/*
* Add three ACEs for owner, group, everyone getting rid of other ACEs
* as chmod disables ACEs and set the security descriptor. Allocate
* memory for the smb header, set security descriptor request security
* descriptor parameters, and secuirty descriptor itself
*/
- secdesclen = max_t(u32, secdesclen, DEFAULT_SEC_DESC_LEN);
- pnntsd = kmalloc(secdesclen, GFP_KERNEL);
+ nsecdesclen = max_t(u32, nsecdesclen, DEFAULT_SEC_DESC_LEN);
+ pnntsd = kmalloc(nsecdesclen, GFP_KERNEL);
if (!pnntsd) {
kfree(pntsd);
cifs_put_tlink(tlink);
return -ENOMEM;
}
- if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_MODE_FROM_SID)
- mode_from_sid = true;
- else
- mode_from_sid = false;
-
- if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_UID_FROM_ACL)
- id_from_sid = true;
- else
- id_from_sid = false;
-
- rc = build_sec_desc(pntsd, pnntsd, secdesclen, pnmode, uid, gid,
+ rc = build_sec_desc(pntsd, pnntsd, secdesclen, &nsecdesclen, pnmode, uid, gid,
mode_from_sid, id_from_sid, &aclflag);
cifs_dbg(NOISY, "build_sec_desc rc: %d\n", rc);
@@ -1448,7 +1669,7 @@ id_mode_to_cifs_acl(struct inode *inode, const char *path, __u64 *pnmode,
if (!rc) {
/* Set the security descriptor */
- rc = ops->set_acl(pnntsd, secdesclen, inode, path, aclflag);
+ rc = ops->set_acl(pnntsd, nsecdesclen, inode, path, aclflag);
cifs_dbg(NOISY, "set_cifs_acl rc: %d\n", rc);
}
cifs_put_tlink(tlink);
diff --git a/fs/cifs/cifsacl.h b/fs/cifs/cifsacl.h
index ff7fd0862e28..d9e704979d99 100644
--- a/fs/cifs/cifsacl.h
+++ b/fs/cifs/cifsacl.h
@@ -31,8 +31,8 @@
#define EXEC_BIT 0x1
#define ACL_OWNER_MASK 0700
-#define ACL_GROUP_MASK 0770
-#define ACL_EVERYONE_MASK 0777
+#define ACL_GROUP_MASK 0070
+#define ACL_EVERYONE_MASK 0007
#define UBITSHIFT 6
#define GBITSHIFT 3
diff --git a/fs/cifs/cifsencrypt.c b/fs/cifs/cifsencrypt.c
index 51d53e4bdf6b..b8f1ff9a83f3 100644
--- a/fs/cifs/cifsencrypt.c
+++ b/fs/cifs/cifsencrypt.c
@@ -568,15 +568,15 @@ static int calc_ntlmv2_hash(struct cifs_ses *ses, char *ntlmv2_hash,
return rc;
}
} else {
- /* We use ses->serverName if no domain name available */
- len = strlen(ses->serverName);
+ /* We use ses->ip_addr if no domain name available */
+ len = strlen(ses->ip_addr);
server = kmalloc(2 + (len * 2), GFP_KERNEL);
if (server == NULL) {
rc = -ENOMEM;
return rc;
}
- len = cifs_strtoUTF16((__le16 *)server, ses->serverName, len,
+ len = cifs_strtoUTF16((__le16 *)server, ses->ip_addr, len,
nls_cp);
rc =
crypto_shash_update(&ses->server->secmech.sdeschmacmd5->shash,
diff --git a/fs/cifs/cifsfs.c b/fs/cifs/cifsfs.c
index ab883e84e116..d43e935d2df4 100644
--- a/fs/cifs/cifsfs.c
+++ b/fs/cifs/cifsfs.c
@@ -305,7 +305,8 @@ static long cifs_fallocate(struct file *file, int mode, loff_t off, loff_t len)
return -EOPNOTSUPP;
}
-static int cifs_permission(struct inode *inode, int mask)
+static int cifs_permission(struct user_namespace *mnt_userns,
+ struct inode *inode, int mask)
{
struct cifs_sb_info *cifs_sb;
@@ -320,7 +321,7 @@ static int cifs_permission(struct inode *inode, int mask)
on the client (above and beyond ACL on servers) for
servers which do not support setting and viewing mode bits,
so allowing client to check permissions is useful */
- return generic_permission(inode, mask);
+ return generic_permission(&init_user_ns, inode, mask);
}
static struct kmem_cache *cifs_inode_cachep;
@@ -637,8 +638,18 @@ cifs_show_options(struct seq_file *s, struct dentry *root)
seq_printf(s, ",snapshot=%llu", tcon->snapshot_time);
if (tcon->handle_timeout)
seq_printf(s, ",handletimeout=%u", tcon->handle_timeout);
- /* convert actimeo and display it in seconds */
- seq_printf(s, ",actimeo=%lu", cifs_sb->ctx->actimeo / HZ);
+
+ /*
+ * Display file and directory attribute timeout in seconds.
+ * If file and directory attribute timeout the same then actimeo
+ * was likely specified on mount
+ */
+ if (cifs_sb->ctx->acdirmax == cifs_sb->ctx->acregmax)
+ seq_printf(s, ",actimeo=%lu", cifs_sb->ctx->acregmax / HZ);
+ else {
+ seq_printf(s, ",acdirmax=%lu", cifs_sb->ctx->acdirmax / HZ);
+ seq_printf(s, ",acregmax=%lu", cifs_sb->ctx->acregmax / HZ);
+ }
if (tcon->ses->chan_max > 1)
seq_printf(s, ",multichannel,max_channels=%zu",
@@ -1525,6 +1536,7 @@ init_cifs(void)
*/
atomic_set(&sesInfoAllocCount, 0);
atomic_set(&tconInfoAllocCount, 0);
+ atomic_set(&tcpSesNextId, 0);
atomic_set(&tcpSesAllocCount, 0);
atomic_set(&tcpSesReconnectCount, 0);
atomic_set(&tconInfoReconnectCount, 0);
diff --git a/fs/cifs/cifsfs.h b/fs/cifs/cifsfs.h
index 2307bb0f6147..0d7ef150dbb2 100644
--- a/fs/cifs/cifsfs.h
+++ b/fs/cifs/cifsfs.h
@@ -62,19 +62,22 @@ extern void cifs_sb_deactive(struct super_block *sb);
/* Functions related to inodes */
extern const struct inode_operations cifs_dir_inode_ops;
extern struct inode *cifs_root_iget(struct super_block *);
-extern int cifs_create(struct inode *, struct dentry *, umode_t,
- bool excl);
+extern int cifs_create(struct user_namespace *, struct inode *,
+ struct dentry *, umode_t, bool excl);
extern int cifs_atomic_open(struct inode *, struct dentry *,
struct file *, unsigned, umode_t);
extern struct dentry *cifs_lookup(struct inode *, struct dentry *,
unsigned int);
extern int cifs_unlink(struct inode *dir, struct dentry *dentry);
extern int cifs_hardlink(struct dentry *, struct inode *, struct dentry *);
-extern int cifs_mknod(struct inode *, struct dentry *, umode_t, dev_t);
-extern int cifs_mkdir(struct inode *, struct dentry *, umode_t);
+extern int cifs_mknod(struct user_namespace *, struct inode *, struct dentry *,
+ umode_t, dev_t);
+extern int cifs_mkdir(struct user_namespace *, struct inode *, struct dentry *,
+ umode_t);
extern int cifs_rmdir(struct inode *, struct dentry *);
-extern int cifs_rename2(struct inode *, struct dentry *, struct inode *,
- struct dentry *, unsigned int);
+extern int cifs_rename2(struct user_namespace *, struct inode *,
+ struct dentry *, struct inode *, struct dentry *,
+ unsigned int);
extern int cifs_revalidate_file_attr(struct file *filp);
extern int cifs_revalidate_dentry_attr(struct dentry *);
extern int cifs_revalidate_file(struct file *filp);
@@ -82,8 +85,10 @@ extern int cifs_revalidate_dentry(struct dentry *);
extern int cifs_invalidate_mapping(struct inode *inode);
extern int cifs_revalidate_mapping(struct inode *inode);
extern int cifs_zap_mapping(struct inode *inode);
-extern int cifs_getattr(const struct path *, struct kstat *, u32, unsigned int);
-extern int cifs_setattr(struct dentry *, struct iattr *);
+extern int cifs_getattr(struct user_namespace *, const struct path *,
+ struct kstat *, u32, unsigned int);
+extern int cifs_setattr(struct user_namespace *, struct dentry *,
+ struct iattr *);
extern int cifs_fiemap(struct inode *, struct fiemap_extent_info *, u64 start,
u64 len);
@@ -132,8 +137,8 @@ extern struct vfsmount *cifs_dfs_d_automount(struct path *path);
/* Functions related to symlinks */
extern const char *cifs_get_link(struct dentry *, struct inode *,
struct delayed_call *);
-extern int cifs_symlink(struct inode *inode, struct dentry *direntry,
- const char *symname);
+extern int cifs_symlink(struct user_namespace *mnt_userns, struct inode *inode,
+ struct dentry *direntry, const char *symname);
#ifdef CONFIG_CIFS_XATTR
extern const struct xattr_handler *cifs_xattr_handlers[];
@@ -160,5 +165,5 @@ extern struct dentry *cifs_smb3_do_mount(struct file_system_type *fs_type,
extern const struct export_operations cifs_export_ops;
#endif /* CONFIG_CIFS_NFSD_EXPORT */
-#define CIFS_VERSION "2.30"
+#define CIFS_VERSION "2.31"
#endif /* _CIFSFS_H */
diff --git a/fs/cifs/cifsglob.h b/fs/cifs/cifsglob.h
index 50fcb65920e8..3de3c5908a72 100644
--- a/fs/cifs/cifsglob.h
+++ b/fs/cifs/cifsglob.h
@@ -21,6 +21,7 @@
#include <linux/in.h>
#include <linux/in6.h>
+#include <linux/inet.h>
#include <linux/slab.h>
#include <linux/mempool.h>
#include <linux/workqueue.h>
@@ -504,6 +505,8 @@ struct smb_version_operations {
loff_t (*llseek)(struct file *, struct cifs_tcon *, loff_t, int);
/* Check for STATUS_IO_TIMEOUT */
bool (*is_status_io_timeout)(char *buf);
+ /* Check for STATUS_NETWORK_NAME_DELETED */
+ void (*is_network_name_deleted)(char *buf, struct TCP_Server_Info *srv);
};
struct smb_version_values {
@@ -577,6 +580,7 @@ inc_rfc1001_len(void *buf, int count)
struct TCP_Server_Info {
struct list_head tcp_ses_list;
struct list_head smb_ses_list;
+ __u64 conn_id; /* connection identifier (useful for debugging) */
int srv_count; /* reference counter */
/* 15 character server name + 0x20 16th byte indicating type = srv */
char server_RFC1001_name[RFC1001_NAME_LEN_WITH_NULL];
@@ -901,7 +905,7 @@ struct cifs_ses {
kuid_t linux_uid; /* overriding owner of files on the mount */
kuid_t cred_uid; /* owner of credentials */
unsigned int capabilities;
- char serverName[SERVER_NAME_LEN_WITH_NULL];
+ char ip_addr[INET6_ADDRSTRLEN + 1]; /* Max ipv6 (or v4) addr string len */
char *user_name; /* must not be null except during init of sess
and after mount option parsing we fill it */
char *domainName;
@@ -1704,7 +1708,9 @@ static inline bool is_retryable_error(int error)
#define CIFS_ECHO_OP 0x080 /* echo request */
#define CIFS_OBREAK_OP 0x0100 /* oplock break request */
#define CIFS_NEG_OP 0x0200 /* negotiate request */
-#define CIFS_OP_MASK 0x0380 /* mask request type */
+/* Lower bitmask values are reserved by others below. */
+#define CIFS_SESS_OP 0x2000 /* session setup request */
+#define CIFS_OP_MASK 0x2380 /* mask request type */
#define CIFS_HAS_CREDITS 0x0400 /* already has credits */
#define CIFS_TRANSFORM_REQ 0x0800 /* transform request before sending */
@@ -1844,6 +1850,7 @@ GLOBAL_EXTERN spinlock_t GlobalMid_Lock; /* protects above & list operations */
*/
GLOBAL_EXTERN atomic_t sesInfoAllocCount;
GLOBAL_EXTERN atomic_t tconInfoAllocCount;
+GLOBAL_EXTERN atomic_t tcpSesNextId;
GLOBAL_EXTERN atomic_t tcpSesAllocCount;
GLOBAL_EXTERN atomic_t tcpSesReconnectCount;
GLOBAL_EXTERN atomic_t tconInfoReconnectCount;
diff --git a/fs/cifs/cifsproto.h b/fs/cifs/cifsproto.h
index 32f7a013402e..75ce6f742b8d 100644
--- a/fs/cifs/cifsproto.h
+++ b/fs/cifs/cifsproto.h
@@ -232,6 +232,8 @@ extern unsigned int setup_special_user_owner_ACE(struct cifs_ace *pace);
extern void dequeue_mid(struct mid_q_entry *mid, bool malformed);
extern int cifs_read_from_socket(struct TCP_Server_Info *server, char *buf,
unsigned int to_read);
+extern ssize_t cifs_discard_from_socket(struct TCP_Server_Info *server,
+ size_t to_read);
extern int cifs_read_page_from_socket(struct TCP_Server_Info *server,
struct page *page,
unsigned int page_offset,
diff --git a/fs/cifs/cifssmb.c b/fs/cifs/cifssmb.c
index 0496934feecb..c279527aae92 100644
--- a/fs/cifs/cifssmb.c
+++ b/fs/cifs/cifssmb.c
@@ -1451,9 +1451,9 @@ cifs_discard_remaining_data(struct TCP_Server_Info *server)
while (remaining > 0) {
int length;
- length = cifs_read_from_socket(server, server->bigbuf,
- min_t(unsigned int, remaining,
- CIFSMaxBufSize + MAX_HEADER_SIZE(server)));
+ length = cifs_discard_from_socket(server,
+ min_t(size_t, remaining,
+ CIFSMaxBufSize + MAX_HEADER_SIZE(server)));
if (length < 0)
return length;
server->total_read += length;
diff --git a/fs/cifs/connect.c b/fs/cifs/connect.c
index 4bb9decbbf27..112692300fb6 100644
--- a/fs/cifs/connect.c
+++ b/fs/cifs/connect.c
@@ -242,7 +242,7 @@ cifs_reconnect(struct TCP_Server_Info *server)
server->max_read = 0;
cifs_dbg(FYI, "Mark tcp session as need reconnect\n");
- trace_smb3_reconnect(server->CurrentMid, server->hostname);
+ trace_smb3_reconnect(server->CurrentMid, server->conn_id, server->hostname);
/* before reconnecting the tcp session, mark the smb session (uid)
and the tid bad so they are not used until reconnected */
@@ -564,6 +564,23 @@ cifs_read_from_socket(struct TCP_Server_Info *server, char *buf,
return cifs_readv_from_socket(server, &smb_msg);
}
+ssize_t
+cifs_discard_from_socket(struct TCP_Server_Info *server, size_t to_read)
+{
+ struct msghdr smb_msg;
+
+ /*
+ * iov_iter_discard already sets smb_msg.type and count and iov_offset
+ * and cifs_readv_from_socket sets msg_control and msg_controllen
+ * so little to initialize in struct msghdr
+ */
+ smb_msg.msg_name = NULL;
+ smb_msg.msg_namelen = 0;
+ iov_iter_discard(&smb_msg.msg_iter, READ, to_read);
+
+ return cifs_readv_from_socket(server, &smb_msg);
+}
+
int
cifs_read_page_from_socket(struct TCP_Server_Info *server, struct page *page,
unsigned int page_offset, unsigned int to_read)
@@ -846,7 +863,7 @@ static void
smb2_add_credits_from_hdr(char *buffer, struct TCP_Server_Info *server)
{
struct smb2_sync_hdr *shdr = (struct smb2_sync_hdr *)buffer;
- int scredits = server->credits;
+ int scredits, in_flight;
/*
* SMB1 does not use credits.
@@ -857,12 +874,14 @@ smb2_add_credits_from_hdr(char *buffer, struct TCP_Server_Info *server)
if (shdr->CreditRequest) {
spin_lock(&server->req_lock);
server->credits += le16_to_cpu(shdr->CreditRequest);
+ scredits = server->credits;
+ in_flight = server->in_flight;
spin_unlock(&server->req_lock);
wake_up(&server->request_q);
trace_smb3_add_credits(server->CurrentMid,
- server->hostname, scredits,
- le16_to_cpu(shdr->CreditRequest));
+ server->conn_id, server->hostname, scredits,
+ le16_to_cpu(shdr->CreditRequest), in_flight);
cifs_server_dbg(FYI, "%s: added %u credits total=%d\n",
__func__, le16_to_cpu(shdr->CreditRequest),
scredits);
@@ -993,6 +1012,10 @@ next_pdu:
if (mids[i] != NULL) {
mids[i]->resp_buf_size = server->pdu_size;
+ if (bufs[i] && server->ops->is_network_name_deleted)
+ server->ops->is_network_name_deleted(bufs[i],
+ server);
+
if (!mids[i]->multiRsp || mids[i]->multiEnd)
mids[i]->callback(mids[i]);
@@ -1317,6 +1340,7 @@ cifs_get_tcp_session(struct smb3_fs_context *ctx)
goto out_err_crypto_release;
}
+ tcp_ses->conn_id = atomic_inc_return(&tcpSesNextId);
tcp_ses->noblockcnt = ctx->rootfs;
tcp_ses->noblocksnd = ctx->noblocksnd || ctx->rootfs;
tcp_ses->noautotune = ctx->noautotune;
@@ -1838,9 +1862,9 @@ cifs_get_smb_ses(struct TCP_Server_Info *server, struct smb3_fs_context *ctx)
/* new SMB session uses our server ref */
ses->server = server;
if (server->dstaddr.ss_family == AF_INET6)
- sprintf(ses->serverName, "%pI6", &addr6->sin6_addr);
+ sprintf(ses->ip_addr, "%pI6", &addr6->sin6_addr);
else
- sprintf(ses->serverName, "%pI4", &addr->sin_addr);
+ sprintf(ses->ip_addr, "%pI4", &addr->sin_addr);
if (ctx->username) {
ses->user_name = kstrdup(ctx->username, GFP_KERNEL);
@@ -2269,7 +2293,9 @@ compare_mount_options(struct super_block *sb, struct cifs_mnt_data *mnt_data)
if (strcmp(old->local_nls->charset, new->local_nls->charset))
return 0;
- if (old->ctx->actimeo != new->ctx->actimeo)
+ if (old->ctx->acregmax != new->ctx->acregmax)
+ return 0;
+ if (old->ctx->acdirmax != new->ctx->acdirmax)
return 0;
return 1;
@@ -2911,7 +2937,7 @@ static int mount_setup_tlink(struct cifs_sb_info *cifs_sb, struct cifs_ses *ses,
#ifdef CONFIG_CIFS_DFS_UPCALL
/*
* cifs_build_path_to_root returns full path to root when we do not have an
- * exiting connection (tcon)
+ * existing connection (tcon)
*/
static char *
build_unc_path_to_root(const struct smb3_fs_context *ctx,
@@ -3038,96 +3064,91 @@ static int update_vol_info(const struct dfs_cache_tgt_iterator *tgt_it,
return 0;
}
-static int setup_dfs_tgt_conn(const char *path, const char *full_path,
- const struct dfs_cache_tgt_iterator *tgt_it,
- struct cifs_sb_info *cifs_sb, struct smb3_fs_context *ctx,
- unsigned int *xid, struct TCP_Server_Info **server,
- struct cifs_ses **ses, struct cifs_tcon **tcon)
-{
- int rc;
- struct dfs_info3_param ref = {0};
- char *mdata = NULL;
- struct smb3_fs_context fake_ctx = {NULL};
- char *fake_devname = NULL;
-
- cifs_dbg(FYI, "%s: dfs path: %s\n", __func__, path);
-
- rc = dfs_cache_get_tgt_referral(path, tgt_it, &ref);
- if (rc)
- return rc;
-
- mdata = cifs_compose_mount_options(cifs_sb->ctx->mount_options,
- full_path + 1, &ref,
- &fake_devname);
- free_dfs_info_param(&ref);
-
- if (IS_ERR(mdata)) {
- rc = PTR_ERR(mdata);
- mdata = NULL;
- } else
- rc = cifs_setup_volume_info(&fake_ctx, mdata, fake_devname);
-
- kfree(mdata);
- kfree(fake_devname);
-
- if (!rc) {
- /*
- * We use a 'fake_ctx' here because we need pass it down to the
- * mount_{get,put} functions to test connection against new DFS
- * targets.
- */
- mount_put_conns(cifs_sb, *xid, *server, *ses, *tcon);
- rc = mount_get_conns(&fake_ctx, cifs_sb, xid, server, ses,
- tcon);
- if (!rc || (*server && *ses)) {
- /*
- * We were able to connect to new target server.
- * Update current context with new target server.
- */
- rc = update_vol_info(tgt_it, &fake_ctx, ctx);
- }
- }
- smb3_cleanup_fs_context_contents(&fake_ctx);
- return rc;
-}
-
static int do_dfs_failover(const char *path, const char *full_path, struct cifs_sb_info *cifs_sb,
struct smb3_fs_context *ctx, struct cifs_ses *root_ses,
unsigned int *xid, struct TCP_Server_Info **server,
struct cifs_ses **ses, struct cifs_tcon **tcon)
{
int rc;
- struct dfs_cache_tgt_list tgt_list;
+ struct dfs_cache_tgt_list tgt_list = {0};
struct dfs_cache_tgt_iterator *tgt_it = NULL;
+ struct smb3_fs_context tmp_ctx = {NULL};
if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NO_DFS)
return -EOPNOTSUPP;
+ cifs_dbg(FYI, "%s: path=%s full_path=%s\n", __func__, path, full_path);
+
rc = dfs_cache_noreq_find(path, NULL, &tgt_list);
if (rc)
return rc;
+ /*
+ * We use a 'tmp_ctx' here because we need pass it down to the mount_{get,put} functions to
+ * test connection against new DFS targets.
+ */
+ rc = smb3_fs_context_dup(&tmp_ctx, ctx);
+ if (rc)
+ goto out;
for (;;) {
+ struct dfs_info3_param ref = {0};
+ char *fake_devname = NULL, *mdata = NULL;
+
/* Get next DFS target server - if any */
rc = get_next_dfs_tgt(path, &tgt_list, &tgt_it);
if (rc)
break;
- /* Connect to next DFS target */
- rc = setup_dfs_tgt_conn(path, full_path, tgt_it, cifs_sb, ctx, xid, server, ses,
- tcon);
- if (!rc || (*server && *ses))
+
+ rc = dfs_cache_get_tgt_referral(path, tgt_it, &ref);
+ if (rc)
+ break;
+
+ cifs_dbg(FYI, "%s: old ctx: UNC=%s prepath=%s\n", __func__, tmp_ctx.UNC,
+ tmp_ctx.prepath);
+
+ mdata = cifs_compose_mount_options(cifs_sb->ctx->mount_options, full_path + 1, &ref,
+ &fake_devname);
+ free_dfs_info_param(&ref);
+
+ if (IS_ERR(mdata)) {
+ rc = PTR_ERR(mdata);
+ mdata = NULL;
+ } else
+ rc = cifs_setup_volume_info(&tmp_ctx, mdata, fake_devname);
+
+ kfree(mdata);
+ kfree(fake_devname);
+
+ if (rc)
break;
+
+ cifs_dbg(FYI, "%s: new ctx: UNC=%s prepath=%s\n", __func__, tmp_ctx.UNC,
+ tmp_ctx.prepath);
+
+ mount_put_conns(cifs_sb, *xid, *server, *ses, *tcon);
+ rc = mount_get_conns(&tmp_ctx, cifs_sb, xid, server, ses, tcon);
+ if (!rc || (*server && *ses)) {
+ /*
+ * We were able to connect to new target server. Update current context with
+ * new target server.
+ */
+ rc = update_vol_info(tgt_it, &tmp_ctx, ctx);
+ break;
+ }
}
if (!rc) {
+ cifs_dbg(FYI, "%s: final ctx: UNC=%s prepath=%s\n", __func__, tmp_ctx.UNC,
+ tmp_ctx.prepath);
/*
- * Update DFS target hint in DFS referral cache with the target
- * server we successfully reconnected to.
+ * Update DFS target hint in DFS referral cache with the target server we
+ * successfully reconnected to.
*/
- rc = dfs_cache_update_tgthint(*xid, root_ses ? root_ses : *ses,
- cifs_sb->local_nls,
- cifs_remap(cifs_sb), path,
- tgt_it);
+ rc = dfs_cache_update_tgthint(*xid, root_ses ? root_ses : *ses, cifs_sb->local_nls,
+ cifs_remap(cifs_sb), path, tgt_it);
}
+
+out:
+ smb3_cleanup_fs_context_contents(&tmp_ctx);
dfs_cache_free_tgts(&tgt_list);
return rc;
}
@@ -3285,77 +3306,77 @@ static void put_root_ses(struct cifs_ses *ses)
cifs_put_smb_ses(ses);
}
-/* Check if a path component is remote and then update @dfs_path accordingly */
-static int check_dfs_prepath(struct cifs_sb_info *cifs_sb, struct smb3_fs_context *ctx,
- const unsigned int xid, struct TCP_Server_Info *server,
- struct cifs_tcon *tcon, char **dfs_path)
+/* Set up next dfs prefix path in @dfs_path */
+static int next_dfs_prepath(struct cifs_sb_info *cifs_sb, struct smb3_fs_context *ctx,
+ const unsigned int xid, struct TCP_Server_Info *server,
+ struct cifs_tcon *tcon, char **dfs_path)
{
- char *path, *s;
- char sep = CIFS_DIR_SEP(cifs_sb), tmp;
- char *npath;
- int rc = 0;
- int added_treename = tcon->Flags & SMB_SHARE_IS_IN_DFS;
- int skip = added_treename;
+ char *path, *npath;
+ int added_treename = is_tcon_dfs(tcon);
+ int rc;
path = cifs_build_path_to_root(ctx, cifs_sb, tcon, added_treename);
if (!path)
return -ENOMEM;
- /*
- * Walk through the path components in @path and check if they're accessible. In case any of
- * the components is -EREMOTE, then update @dfs_path with the next DFS referral request path
- * (NOT including the remaining components).
- */
- s = path;
- do {
- /* skip separators */
- while (*s && *s == sep)
- s++;
- if (!*s)
- break;
- /* next separator */
- while (*s && *s != sep)
- s++;
- /*
- * if the treename is added, we then have to skip the first
- * part within the separators
- */
- if (skip) {
- skip = 0;
- continue;
+ rc = is_path_remote(cifs_sb, ctx, xid, server, tcon);
+ if (rc == -EREMOTE) {
+ struct smb3_fs_context v = {NULL};
+ /* if @path contains a tree name, skip it in the prefix path */
+ if (added_treename) {
+ rc = smb3_parse_devname(path, &v);
+ if (rc)
+ goto out;
+ npath = build_unc_path_to_root(&v, cifs_sb, true);
+ smb3_cleanup_fs_context_contents(&v);
+ } else {
+ v.UNC = ctx->UNC;
+ v.prepath = path + 1;
+ npath = build_unc_path_to_root(&v, cifs_sb, true);
}
- tmp = *s;
- *s = 0;
- rc = server->ops->is_path_accessible(xid, tcon, cifs_sb, path);
- if (rc && rc == -EREMOTE) {
- struct smb3_fs_context v = {NULL};
- /* if @path contains a tree name, skip it in the prefix path */
- if (added_treename) {
- rc = smb3_parse_devname(path, &v);
- if (rc)
- break;
- rc = -EREMOTE;
- npath = build_unc_path_to_root(&v, cifs_sb, true);
- smb3_cleanup_fs_context_contents(&v);
- } else {
- v.UNC = ctx->UNC;
- v.prepath = path + 1;
- npath = build_unc_path_to_root(&v, cifs_sb, true);
- }
- if (IS_ERR(npath)) {
- rc = PTR_ERR(npath);
- break;
- }
- kfree(*dfs_path);
- *dfs_path = npath;
+
+ if (IS_ERR(npath)) {
+ rc = PTR_ERR(npath);
+ goto out;
}
- *s = tmp;
- } while (rc == 0);
+ kfree(*dfs_path);
+ *dfs_path = npath;
+ rc = -EREMOTE;
+ }
+
+out:
kfree(path);
return rc;
}
+/* Check if resolved targets can handle any DFS referrals */
+static int is_referral_server(const char *ref_path, struct cifs_tcon *tcon, bool *ref_server)
+{
+ int rc;
+ struct dfs_info3_param ref = {0};
+
+ if (is_tcon_dfs(tcon)) {
+ *ref_server = true;
+ } else {
+ cifs_dbg(FYI, "%s: ref_path=%s\n", __func__, ref_path);
+
+ rc = dfs_cache_noreq_find(ref_path, &ref, NULL);
+ if (rc) {
+ cifs_dbg(VFS, "%s: dfs_cache_noreq_find: failed (rc=%d)\n", __func__, rc);
+ return rc;
+ }
+ cifs_dbg(FYI, "%s: ref.flags=0x%x\n", __func__, ref.flags);
+ /*
+ * Check if all targets are capable of handling DFS referrals as per
+ * MS-DFSC 2.2.4 RESP_GET_DFS_REFERRAL.
+ */
+ *ref_server = !!(ref.flags & DFSREF_REFERRAL_SERVER);
+ free_dfs_info_param(&ref);
+ }
+ return 0;
+}
+
int cifs_mount(struct cifs_sb_info *cifs_sb, struct smb3_fs_context *ctx)
{
int rc = 0;
@@ -3367,18 +3388,19 @@ int cifs_mount(struct cifs_sb_info *cifs_sb, struct smb3_fs_context *ctx)
char *ref_path = NULL, *full_path = NULL;
char *oldmnt = NULL;
char *mntdata = NULL;
+ bool ref_server = false;
rc = mount_get_conns(ctx, cifs_sb, &xid, &server, &ses, &tcon);
/*
- * Unconditionally try to get an DFS referral (even cached) to determine whether it is an
- * DFS mount.
+ * If called with 'nodfs' mount option, then skip DFS resolving. Otherwise unconditionally
+ * try to get an DFS referral (even cached) to determine whether it is an DFS mount.
*
* Skip prefix path to provide support for DFS referrals from w2k8 servers which don't seem
* to respond with PATH_NOT_COVERED to requests that include the prefix.
*/
- if (dfs_cache_find(xid, ses, cifs_sb->local_nls, cifs_remap(cifs_sb), ctx->UNC + 1, NULL,
+ if ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NO_DFS) ||
+ dfs_cache_find(xid, ses, cifs_sb->local_nls, cifs_remap(cifs_sb), ctx->UNC + 1, NULL,
NULL)) {
- /* No DFS referral was returned. Looks like a regular share. */
if (rc)
goto error;
/* Check if it is fully accessible and then mount it */
@@ -3432,13 +3454,18 @@ int cifs_mount(struct cifs_sb_info *cifs_sb, struct smb3_fs_context *ctx)
break;
if (!tcon)
continue;
+
/* Make sure that requests go through new root servers */
- if (is_tcon_dfs(tcon)) {
+ rc = is_referral_server(ref_path + 1, tcon, &ref_server);
+ if (rc)
+ break;
+ if (ref_server) {
put_root_ses(root_ses);
set_root_ses(cifs_sb, ses, &root_ses);
}
- /* Check for remaining path components and then continue chasing them (-EREMOTE) */
- rc = check_dfs_prepath(cifs_sb, ctx, xid, server, tcon, &ref_path);
+
+ /* Get next dfs path and then continue chasing them if -EREMOTE */
+ rc = next_dfs_prepath(cifs_sb, ctx, xid, server, tcon, &ref_path);
/* Prevent recursion on broken link referrals */
if (rc == -EREMOTE && ++count > MAX_NESTED_LINKS)
rc = -ELOOP;
diff --git a/fs/cifs/dfs_cache.c b/fs/cifs/dfs_cache.c
index 4950ab0486ae..098b4bc8da59 100644
--- a/fs/cifs/dfs_cache.c
+++ b/fs/cifs/dfs_cache.c
@@ -37,11 +37,12 @@ struct cache_dfs_tgt {
struct cache_entry {
struct hlist_node hlist;
const char *path;
- int ttl;
- int srvtype;
- int flags;
+ int hdr_flags; /* RESP_GET_DFS_REFERRAL.ReferralHeaderFlags */
+ int ttl; /* DFS_REREFERRAL_V3.TimeToLive */
+ int srvtype; /* DFS_REREFERRAL_V3.ServerType */
+ int ref_flags; /* DFS_REREFERRAL_V3.ReferralEntryFlags */
struct timespec64 etime;
- int path_consumed;
+ int path_consumed; /* RESP_GET_DFS_REFERRAL.PathConsumed */
int numtgts;
struct list_head tlist;
struct cache_dfs_tgt *tgthint;
@@ -166,14 +167,11 @@ static int dfscache_proc_show(struct seq_file *m, void *v)
continue;
seq_printf(m,
- "cache entry: path=%s,type=%s,ttl=%d,etime=%ld,"
- "interlink=%s,path_consumed=%d,expired=%s\n",
- ce->path,
- ce->srvtype == DFS_TYPE_ROOT ? "root" : "link",
- ce->ttl, ce->etime.tv_nsec,
- IS_INTERLINK_SET(ce->flags) ? "yes" : "no",
- ce->path_consumed,
- cache_entry_expired(ce) ? "yes" : "no");
+ "cache entry: path=%s,type=%s,ttl=%d,etime=%ld,hdr_flags=0x%x,ref_flags=0x%x,interlink=%s,path_consumed=%d,expired=%s\n",
+ ce->path, ce->srvtype == DFS_TYPE_ROOT ? "root" : "link",
+ ce->ttl, ce->etime.tv_nsec, ce->ref_flags, ce->hdr_flags,
+ IS_INTERLINK_SET(ce->hdr_flags) ? "yes" : "no",
+ ce->path_consumed, cache_entry_expired(ce) ? "yes" : "no");
list_for_each_entry(t, &ce->tlist, list) {
seq_printf(m, " %s%s\n",
@@ -236,11 +234,12 @@ static inline void dump_tgts(const struct cache_entry *ce)
static inline void dump_ce(const struct cache_entry *ce)
{
- cifs_dbg(FYI, "cache entry: path=%s,type=%s,ttl=%d,etime=%ld,interlink=%s,path_consumed=%d,expired=%s\n",
+ cifs_dbg(FYI, "cache entry: path=%s,type=%s,ttl=%d,etime=%ld,hdr_flags=0x%x,ref_flags=0x%x,interlink=%s,path_consumed=%d,expired=%s\n",
ce->path,
ce->srvtype == DFS_TYPE_ROOT ? "root" : "link", ce->ttl,
ce->etime.tv_nsec,
- IS_INTERLINK_SET(ce->flags) ? "yes" : "no",
+ ce->hdr_flags, ce->ref_flags,
+ IS_INTERLINK_SET(ce->hdr_flags) ? "yes" : "no",
ce->path_consumed,
cache_entry_expired(ce) ? "yes" : "no");
dump_tgts(ce);
@@ -381,7 +380,8 @@ static int copy_ref_data(const struct dfs_info3_param *refs, int numrefs,
ce->ttl = refs[0].ttl;
ce->etime = get_expire_time(ce->ttl);
ce->srvtype = refs[0].server_type;
- ce->flags = refs[0].ref_flag;
+ ce->hdr_flags = refs[0].flags;
+ ce->ref_flags = refs[0].ref_flag;
ce->path_consumed = refs[0].path_consumed;
for (i = 0; i < numrefs; i++) {
@@ -799,7 +799,8 @@ static int setup_referral(const char *path, struct cache_entry *ce,
ref->path_consumed = ce->path_consumed;
ref->ttl = ce->ttl;
ref->server_type = ce->srvtype;
- ref->ref_flag = ce->flags;
+ ref->ref_flag = ce->ref_flags;
+ ref->flags = ce->hdr_flags;
return 0;
diff --git a/fs/cifs/dir.c b/fs/cifs/dir.c
index 97ac363b5df1..a3fb81e0ba17 100644
--- a/fs/cifs/dir.c
+++ b/fs/cifs/dir.c
@@ -567,8 +567,8 @@ out_free_xid:
return rc;
}
-int cifs_create(struct inode *inode, struct dentry *direntry, umode_t mode,
- bool excl)
+int cifs_create(struct user_namespace *mnt_userns, struct inode *inode,
+ struct dentry *direntry, umode_t mode, bool excl)
{
int rc;
unsigned int xid = get_xid();
@@ -611,8 +611,8 @@ out_free_xid:
return rc;
}
-int cifs_mknod(struct inode *inode, struct dentry *direntry, umode_t mode,
- dev_t device_number)
+int cifs_mknod(struct user_namespace *mnt_userns, struct inode *inode,
+ struct dentry *direntry, umode_t mode, dev_t device_number)
{
int rc = -EPERM;
unsigned int xid;
diff --git a/fs/cifs/file.c b/fs/cifs/file.c
index 6d001905c8e5..26de4329d161 100644
--- a/fs/cifs/file.c
+++ b/fs/cifs/file.c
@@ -580,7 +580,7 @@ int cifs_open(struct inode *inode, struct file *file)
} else if ((rc == -EINVAL) || (rc == -EOPNOTSUPP)) {
if (tcon->ses->serverNOS)
cifs_dbg(VFS, "server %s of type %s returned unexpected error on SMB posix open, disabling posix open support. Check if server update available.\n",
- tcon->ses->serverName,
+ tcon->ses->ip_addr,
tcon->ses->serverNOS);
tcon->broken_posix_open = true;
} else if ((rc != -EIO) && (rc != -EREMOTE) &&
diff --git a/fs/cifs/fs_context.c b/fs/cifs/fs_context.c
index 12a5da0230b5..892f51a21278 100644
--- a/fs/cifs/fs_context.c
+++ b/fs/cifs/fs_context.c
@@ -140,6 +140,8 @@ const struct fs_parameter_spec smb3_fs_parameters[] = {
fsparam_u32("rsize", Opt_rsize),
fsparam_u32("wsize", Opt_wsize),
fsparam_u32("actimeo", Opt_actimeo),
+ fsparam_u32("acdirmax", Opt_acdirmax),
+ fsparam_u32("acregmax", Opt_acregmax),
fsparam_u32("echo_interval", Opt_echo_interval),
fsparam_u32("max_credits", Opt_max_credits),
fsparam_u32("handletimeout", Opt_handletimeout),
@@ -397,7 +399,7 @@ cifs_parse_smb_version(char *value, struct smb3_fs_context *ctx, bool is_smb3)
ctx->vals = &smb3any_values;
break;
case Smb_default:
- ctx->ops = &smb30_operations; /* currently identical with 3.0 */
+ ctx->ops = &smb30_operations;
ctx->vals = &smbdefault_values;
break;
default:
@@ -542,20 +544,37 @@ static int smb3_fs_context_parse_monolithic(struct fs_context *fc,
/* BB Need to add support for sep= here TBD */
while ((key = strsep(&options, ",")) != NULL) {
- if (*key) {
- size_t v_len = 0;
- char *value = strchr(key, '=');
-
- if (value) {
- if (value == key)
- continue;
- *value++ = 0;
- v_len = strlen(value);
- }
- ret = vfs_parse_fs_string(fc, key, value, v_len);
- if (ret < 0)
- break;
+ size_t len;
+ char *value;
+
+ if (*key == 0)
+ break;
+
+ /* Check if following character is the deliminator If yes,
+ * we have encountered a double deliminator reset the NULL
+ * character to the deliminator
+ */
+ while (options && options[0] == ',') {
+ len = strlen(key);
+ strcpy(key + len, options);
+ options = strchr(options, ',');
+ if (options)
+ *options++ = 0;
+ }
+
+
+ len = 0;
+ value = strchr(key, '=');
+ if (value) {
+ if (value == key)
+ continue;
+ *value++ = 0;
+ len = strlen(value);
}
+
+ ret = vfs_parse_fs_string(fc, key, value, len);
+ if (ret < 0)
+ break;
}
return ret;
@@ -929,12 +948,31 @@ static int smb3_fs_context_parse_param(struct fs_context *fc,
ctx->wsize = result.uint_32;
ctx->got_wsize = true;
break;
+ case Opt_acregmax:
+ ctx->acregmax = HZ * result.uint_32;
+ if (ctx->acregmax > CIFS_MAX_ACTIMEO) {
+ cifs_dbg(VFS, "acregmax too large\n");
+ goto cifs_parse_mount_err;
+ }
+ break;
+ case Opt_acdirmax:
+ ctx->acdirmax = HZ * result.uint_32;
+ if (ctx->acdirmax > CIFS_MAX_ACTIMEO) {
+ cifs_dbg(VFS, "acdirmax too large\n");
+ goto cifs_parse_mount_err;
+ }
+ break;
case Opt_actimeo:
- ctx->actimeo = HZ * result.uint_32;
- if (ctx->actimeo > CIFS_MAX_ACTIMEO) {
- cifs_dbg(VFS, "attribute cache timeout too large\n");
+ if (HZ * result.uint_32 > CIFS_MAX_ACTIMEO) {
+ cifs_dbg(VFS, "timeout too large\n");
goto cifs_parse_mount_err;
}
+ if ((ctx->acdirmax != CIFS_DEF_ACTIMEO) ||
+ (ctx->acregmax != CIFS_DEF_ACTIMEO)) {
+ cifs_dbg(VFS, "actimeo ignored since acregmax or acdirmax specified\n");
+ break;
+ }
+ ctx->acdirmax = ctx->acregmax = HZ * result.uint_32;
break;
case Opt_echo_interval:
ctx->echo_interval = result.uint_32;
@@ -1361,7 +1399,8 @@ int smb3_init_fs_context(struct fs_context *fc)
/* default is to use strict cifs caching semantics */
ctx->strict_io = true;
- ctx->actimeo = CIFS_DEF_ACTIMEO;
+ ctx->acregmax = CIFS_DEF_ACTIMEO;
+ ctx->acdirmax = CIFS_DEF_ACTIMEO;
/* Most clients set timeout to 0, allows server to use its default */
ctx->handle_timeout = 0; /* See MS-SMB2 spec section 2.2.14.2.12 */
diff --git a/fs/cifs/fs_context.h b/fs/cifs/fs_context.h
index 1c44a460e2c0..87dd1f7168f2 100644
--- a/fs/cifs/fs_context.h
+++ b/fs/cifs/fs_context.h
@@ -118,6 +118,8 @@ enum cifs_param {
Opt_rsize,
Opt_wsize,
Opt_actimeo,
+ Opt_acdirmax,
+ Opt_acregmax,
Opt_echo_interval,
Opt_max_credits,
Opt_snapshot,
@@ -232,7 +234,9 @@ struct smb3_fs_context {
unsigned int wsize;
unsigned int min_offload;
bool sockopt_tcp_nodelay:1;
- unsigned long actimeo; /* attribute cache timeout (jiffies) */
+ /* attribute cache timemout for files and directories in jiffies */
+ unsigned long acregmax;
+ unsigned long acdirmax;
struct smb_version_operations *ops;
struct smb_version_values *vals;
char *prepath;
diff --git a/fs/cifs/inode.c b/fs/cifs/inode.c
index a83b3a8ffaac..7c61bc9573c0 100644
--- a/fs/cifs/inode.c
+++ b/fs/cifs/inode.c
@@ -1857,7 +1857,8 @@ posix_mkdir_get_info:
goto posix_mkdir_out;
}
-int cifs_mkdir(struct inode *inode, struct dentry *direntry, umode_t mode)
+int cifs_mkdir(struct user_namespace *mnt_userns, struct inode *inode,
+ struct dentry *direntry, umode_t mode)
{
int rc = 0;
unsigned int xid;
@@ -2067,9 +2068,9 @@ do_rename_exit:
}
int
-cifs_rename2(struct inode *source_dir, struct dentry *source_dentry,
- struct inode *target_dir, struct dentry *target_dentry,
- unsigned int flags)
+cifs_rename2(struct user_namespace *mnt_userns, struct inode *source_dir,
+ struct dentry *source_dentry, struct inode *target_dir,
+ struct dentry *target_dentry, unsigned int flags)
{
char *from_name = NULL;
char *to_name = NULL;
@@ -2198,12 +2199,23 @@ cifs_inode_needs_reval(struct inode *inode)
if (!lookupCacheEnabled)
return true;
- if (!cifs_sb->ctx->actimeo)
- return true;
-
- if (!time_in_range(jiffies, cifs_i->time,
- cifs_i->time + cifs_sb->ctx->actimeo))
- return true;
+ /*
+ * depending on inode type, check if attribute caching disabled for
+ * files or directories
+ */
+ if (S_ISDIR(inode->i_mode)) {
+ if (!cifs_sb->ctx->acdirmax)
+ return true;
+ if (!time_in_range(jiffies, cifs_i->time,
+ cifs_i->time + cifs_sb->ctx->acdirmax))
+ return true;
+ } else { /* file */
+ if (!cifs_sb->ctx->acregmax)
+ return true;
+ if (!time_in_range(jiffies, cifs_i->time,
+ cifs_i->time + cifs_sb->ctx->acregmax))
+ return true;
+ }
/* hardlinked files w/ noserverino get "special" treatment */
if (!(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_SERVER_INUM) &&
@@ -2370,8 +2382,8 @@ int cifs_revalidate_dentry(struct dentry *dentry)
return cifs_revalidate_mapping(inode);
}
-int cifs_getattr(const struct path *path, struct kstat *stat,
- u32 request_mask, unsigned int flags)
+int cifs_getattr(struct user_namespace *mnt_userns, const struct path *path,
+ struct kstat *stat, u32 request_mask, unsigned int flags)
{
struct dentry *dentry = path->dentry;
struct cifs_sb_info *cifs_sb = CIFS_SB(dentry->d_sb);
@@ -2408,7 +2420,7 @@ int cifs_getattr(const struct path *path, struct kstat *stat,
return rc;
}
- generic_fillattr(inode, stat);
+ generic_fillattr(&init_user_ns, inode, stat);
stat->blksize = cifs_sb->ctx->bsize;
stat->ino = CIFS_I(inode)->uniqueid;
@@ -2610,7 +2622,7 @@ cifs_setattr_unix(struct dentry *direntry, struct iattr *attrs)
if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NO_PERM)
attrs->ia_valid |= ATTR_FORCE;
- rc = setattr_prepare(direntry, attrs);
+ rc = setattr_prepare(&init_user_ns, direntry, attrs);
if (rc < 0)
goto out;
@@ -2715,7 +2727,7 @@ cifs_setattr_unix(struct dentry *direntry, struct iattr *attrs)
attrs->ia_size != i_size_read(inode))
truncate_setsize(inode, attrs->ia_size);
- setattr_copy(inode, attrs);
+ setattr_copy(&init_user_ns, inode, attrs);
mark_inode_dirty(inode);
/* force revalidate when any of these times are set since some
@@ -2757,7 +2769,7 @@ cifs_setattr_nounix(struct dentry *direntry, struct iattr *attrs)
if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NO_PERM)
attrs->ia_valid |= ATTR_FORCE;
- rc = setattr_prepare(direntry, attrs);
+ rc = setattr_prepare(&init_user_ns, direntry, attrs);
if (rc < 0) {
free_xid(xid);
return rc;
@@ -2913,7 +2925,7 @@ cifs_setattr_nounix(struct dentry *direntry, struct iattr *attrs)
attrs->ia_size != i_size_read(inode))
truncate_setsize(inode, attrs->ia_size);
- setattr_copy(inode, attrs);
+ setattr_copy(&init_user_ns, inode, attrs);
mark_inode_dirty(inode);
cifs_setattr_exit:
@@ -2923,7 +2935,8 @@ cifs_setattr_exit:
}
int
-cifs_setattr(struct dentry *direntry, struct iattr *attrs)
+cifs_setattr(struct user_namespace *mnt_userns, struct dentry *direntry,
+ struct iattr *attrs)
{
struct cifs_sb_info *cifs_sb = CIFS_SB(direntry->d_sb);
struct cifs_tcon *pTcon = cifs_sb_master_tcon(cifs_sb);
diff --git a/fs/cifs/link.c b/fs/cifs/link.c
index 94dab4309fbb..7c5878a645d9 100644
--- a/fs/cifs/link.c
+++ b/fs/cifs/link.c
@@ -661,7 +661,8 @@ cifs_get_link(struct dentry *direntry, struct inode *inode,
}
int
-cifs_symlink(struct inode *inode, struct dentry *direntry, const char *symname)
+cifs_symlink(struct user_namespace *mnt_userns, struct inode *inode,
+ struct dentry *direntry, const char *symname)
{
int rc = -EOPNOTSUPP;
unsigned int xid;
diff --git a/fs/cifs/sess.c b/fs/cifs/sess.c
index 213465718fa8..183a3a868d7b 100644
--- a/fs/cifs/sess.c
+++ b/fs/cifs/sess.c
@@ -218,7 +218,7 @@ cifs_ses_add_channel(struct cifs_sb_info *cifs_sb, struct cifs_ses *ses,
/* UNC and paths */
/* XXX: Use ses->server->hostname? */
- sprintf(unc, unc_fmt, ses->serverName);
+ sprintf(unc, unc_fmt, ses->ip_addr);
ctx.UNC = unc;
ctx.prepath = "";
diff --git a/fs/cifs/smb2ops.c b/fs/cifs/smb2ops.c
index f19274857292..f5087295424c 100644
--- a/fs/cifs/smb2ops.c
+++ b/fs/cifs/smb2ops.c
@@ -63,17 +63,19 @@ smb2_add_credits(struct TCP_Server_Info *server,
const struct cifs_credits *credits, const int optype)
{
int *val, rc = -1;
+ int scredits, in_flight;
unsigned int add = credits->value;
unsigned int instance = credits->instance;
bool reconnect_detected = false;
+ bool reconnect_with_invalid_credits = false;
spin_lock(&server->req_lock);
val = server->ops->get_credits_field(server, optype);
/* eg found case where write overlapping reconnect messed up credits */
if (((optype & CIFS_OP_MASK) == CIFS_NEG_OP) && (*val != 0))
- trace_smb3_reconnect_with_invalid_credits(server->CurrentMid,
- server->hostname, *val, add);
+ reconnect_with_invalid_credits = true;
+
if ((instance == 0) || (instance == server->reconnect_instance))
*val += add;
else
@@ -84,7 +86,9 @@ smb2_add_credits(struct TCP_Server_Info *server,
pr_warn_once("server overflowed SMB3 credits\n");
}
server->in_flight--;
- if (server->in_flight == 0 && (optype & CIFS_OP_MASK) != CIFS_NEG_OP)
+ if (server->in_flight == 0 &&
+ ((optype & CIFS_OP_MASK) != CIFS_NEG_OP) &&
+ ((optype & CIFS_OP_MASK) != CIFS_SESS_OP))
rc = change_conf(server);
/*
* Sometimes server returns 0 credits on oplock break ack - we need to
@@ -97,14 +101,26 @@ smb2_add_credits(struct TCP_Server_Info *server,
server->oplock_credits++;
}
}
+ scredits = *val;
+ in_flight = server->in_flight;
spin_unlock(&server->req_lock);
wake_up(&server->request_q);
if (reconnect_detected) {
+ trace_smb3_reconnect_detected(server->CurrentMid,
+ server->conn_id, server->hostname, scredits, add, in_flight);
+
cifs_dbg(FYI, "trying to put %d credits from the old server instance %d\n",
add, instance);
}
+ if (reconnect_with_invalid_credits) {
+ trace_smb3_reconnect_with_invalid_credits(server->CurrentMid,
+ server->conn_id, server->hostname, scredits, add, in_flight);
+ cifs_dbg(FYI, "Negotiate operation when server credits is non-zero. Optype: %d, server credits: %d, credits added: %d\n",
+ optype, scredits, add);
+ }
+
if (server->tcpStatus == CifsNeedReconnect
|| server->tcpStatus == CifsExiting)
return;
@@ -123,23 +139,30 @@ smb2_add_credits(struct TCP_Server_Info *server,
cifs_dbg(FYI, "disabling oplocks\n");
break;
default:
- trace_smb3_add_credits(server->CurrentMid,
- server->hostname, rc, add);
- cifs_dbg(FYI, "%s: added %u credits total=%d\n", __func__, add, rc);
+ /* change_conf rebalanced credits for different types */
+ break;
}
+
+ trace_smb3_add_credits(server->CurrentMid,
+ server->conn_id, server->hostname, scredits, add, in_flight);
+ cifs_dbg(FYI, "%s: added %u credits total=%d\n", __func__, add, scredits);
}
static void
smb2_set_credits(struct TCP_Server_Info *server, const int val)
{
+ int scredits, in_flight;
+
spin_lock(&server->req_lock);
server->credits = val;
if (val == 1)
server->reconnect_instance++;
+ scredits = server->credits;
+ in_flight = server->in_flight;
spin_unlock(&server->req_lock);
trace_smb3_set_credits(server->CurrentMid,
- server->hostname, val, val);
+ server->conn_id, server->hostname, scredits, val, in_flight);
cifs_dbg(FYI, "%s: set %u credits\n", __func__, val);
/* don't log while holding the lock */
@@ -171,7 +194,7 @@ smb2_wait_mtu_credits(struct TCP_Server_Info *server, unsigned int size,
unsigned int *num, struct cifs_credits *credits)
{
int rc = 0;
- unsigned int scredits;
+ unsigned int scredits, in_flight;
spin_lock(&server->req_lock);
while (1) {
@@ -208,17 +231,18 @@ smb2_wait_mtu_credits(struct TCP_Server_Info *server, unsigned int size,
DIV_ROUND_UP(*num, SMB2_MAX_BUFFER_SIZE);
credits->instance = server->reconnect_instance;
server->credits -= credits->value;
- scredits = server->credits;
server->in_flight++;
if (server->in_flight > server->max_in_flight)
server->max_in_flight = server->in_flight;
break;
}
}
+ scredits = server->credits;
+ in_flight = server->in_flight;
spin_unlock(&server->req_lock);
trace_smb3_add_credits(server->CurrentMid,
- server->hostname, scredits, -(credits->value));
+ server->conn_id, server->hostname, scredits, -(credits->value), in_flight);
cifs_dbg(FYI, "%s: removed %u credits total=%d\n",
__func__, credits->value, scredits);
@@ -231,14 +255,14 @@ smb2_adjust_credits(struct TCP_Server_Info *server,
const unsigned int payload_size)
{
int new_val = DIV_ROUND_UP(payload_size, SMB2_MAX_BUFFER_SIZE);
- int scredits;
+ int scredits, in_flight;
if (!credits->value || credits->value == new_val)
return 0;
if (credits->value < new_val) {
trace_smb3_too_many_credits(server->CurrentMid,
- server->hostname, 0, credits->value - new_val);
+ server->conn_id, server->hostname, 0, credits->value - new_val, 0);
cifs_server_dbg(VFS, "request has less credits (%d) than required (%d)",
credits->value, new_val);
@@ -248,9 +272,13 @@ smb2_adjust_credits(struct TCP_Server_Info *server,
spin_lock(&server->req_lock);
if (server->reconnect_instance != credits->instance) {
+ scredits = server->credits;
+ in_flight = server->in_flight;
spin_unlock(&server->req_lock);
+
trace_smb3_reconnect_detected(server->CurrentMid,
- server->hostname, 0, 0);
+ server->conn_id, server->hostname, scredits,
+ credits->value - new_val, in_flight);
cifs_server_dbg(VFS, "trying to return %d credits to old session\n",
credits->value - new_val);
return -EAGAIN;
@@ -258,15 +286,18 @@ smb2_adjust_credits(struct TCP_Server_Info *server,
server->credits += credits->value - new_val;
scredits = server->credits;
+ in_flight = server->in_flight;
spin_unlock(&server->req_lock);
wake_up(&server->request_q);
- credits->value = new_val;
trace_smb3_add_credits(server->CurrentMid,
- server->hostname, scredits, credits->value - new_val);
+ server->conn_id, server->hostname, scredits,
+ credits->value - new_val, in_flight);
cifs_dbg(FYI, "%s: adjust added %u credits total=%d\n",
__func__, credits->value - new_val, scredits);
+ credits->value = new_val;
+
return 0;
}
@@ -2369,7 +2400,7 @@ static bool
smb2_is_status_pending(char *buf, struct TCP_Server_Info *server)
{
struct smb2_sync_hdr *shdr = (struct smb2_sync_hdr *)buf;
- int scredits;
+ int scredits, in_flight;
if (shdr->Status != STATUS_PENDING)
return false;
@@ -2378,11 +2409,13 @@ smb2_is_status_pending(char *buf, struct TCP_Server_Info *server)
spin_lock(&server->req_lock);
server->credits += le16_to_cpu(shdr->CreditRequest);
scredits = server->credits;
+ in_flight = server->in_flight;
spin_unlock(&server->req_lock);
wake_up(&server->request_q);
trace_smb3_add_credits(server->CurrentMid,
- server->hostname, scredits, le16_to_cpu(shdr->CreditRequest));
+ server->conn_id, server->hostname, scredits,
+ le16_to_cpu(shdr->CreditRequest), in_flight);
cifs_dbg(FYI, "%s: status pending add %u credits total=%d\n",
__func__, le16_to_cpu(shdr->CreditRequest), scredits);
}
@@ -2418,6 +2451,34 @@ smb2_is_status_io_timeout(char *buf)
return false;
}
+static void
+smb2_is_network_name_deleted(char *buf, struct TCP_Server_Info *server)
+{
+ struct smb2_sync_hdr *shdr = (struct smb2_sync_hdr *)buf;
+ struct list_head *tmp, *tmp1;
+ struct cifs_ses *ses;
+ struct cifs_tcon *tcon;
+
+ if (shdr->Status != STATUS_NETWORK_NAME_DELETED)
+ return;
+
+ spin_lock(&cifs_tcp_ses_lock);
+ list_for_each(tmp, &server->smb_ses_list) {
+ ses = list_entry(tmp, struct cifs_ses, smb_ses_list);
+ list_for_each(tmp1, &ses->tcon_list) {
+ tcon = list_entry(tmp1, struct cifs_tcon, tcon_list);
+ if (tcon->tid == shdr->TreeId) {
+ tcon->need_reconnect = true;
+ spin_unlock(&cifs_tcp_ses_lock);
+ pr_warn_once("Server share %s deleted.\n",
+ tcon->treeName);
+ return;
+ }
+ }
+ }
+ spin_unlock(&cifs_tcp_ses_lock);
+}
+
static int
smb2_oplock_response(struct cifs_tcon *tcon, struct cifs_fid *fid,
struct cifsInodeInfo *cinode)
@@ -4605,6 +4666,10 @@ static void smb2_decrypt_offload(struct work_struct *work)
#ifdef CONFIG_CIFS_STATS2
mid->when_received = jiffies;
#endif
+ if (dw->server->ops->is_network_name_deleted)
+ dw->server->ops->is_network_name_deleted(dw->buf,
+ dw->server);
+
mid->callback(mid);
} else {
spin_lock(&GlobalMid_Lock);
@@ -4723,6 +4788,12 @@ non_offloaded_decrypt:
rc = handle_read_data(server, *mid, buf,
server->vals->read_rsp_size,
pages, npages, len, false);
+ if (rc >= 0) {
+ if (server->ops->is_network_name_deleted) {
+ server->ops->is_network_name_deleted(buf,
+ server);
+ }
+ }
}
free_pages:
@@ -5072,6 +5143,7 @@ struct smb_version_operations smb20_operations = {
.fiemap = smb3_fiemap,
.llseek = smb3_llseek,
.is_status_io_timeout = smb2_is_status_io_timeout,
+ .is_network_name_deleted = smb2_is_network_name_deleted,
};
struct smb_version_operations smb21_operations = {
@@ -5173,6 +5245,7 @@ struct smb_version_operations smb21_operations = {
.fiemap = smb3_fiemap,
.llseek = smb3_llseek,
.is_status_io_timeout = smb2_is_status_io_timeout,
+ .is_network_name_deleted = smb2_is_network_name_deleted,
};
struct smb_version_operations smb30_operations = {
@@ -5286,6 +5359,7 @@ struct smb_version_operations smb30_operations = {
.fiemap = smb3_fiemap,
.llseek = smb3_llseek,
.is_status_io_timeout = smb2_is_status_io_timeout,
+ .is_network_name_deleted = smb2_is_network_name_deleted,
};
struct smb_version_operations smb311_operations = {
@@ -5399,6 +5473,7 @@ struct smb_version_operations smb311_operations = {
.fiemap = smb3_fiemap,
.llseek = smb3_llseek,
.is_status_io_timeout = smb2_is_status_io_timeout,
+ .is_network_name_deleted = smb2_is_network_name_deleted,
};
struct smb_version_values smb20_values = {
diff --git a/fs/cifs/smb2pdu.c b/fs/cifs/smb2pdu.c
index 794fc3b68b4f..4bbb6126b14d 100644
--- a/fs/cifs/smb2pdu.c
+++ b/fs/cifs/smb2pdu.c
@@ -814,8 +814,9 @@ SMB2_negotiate(const unsigned int xid, struct cifs_ses *ses)
SMB3ANY_VERSION_STRING) == 0) {
req->Dialects[0] = cpu_to_le16(SMB30_PROT_ID);
req->Dialects[1] = cpu_to_le16(SMB302_PROT_ID);
- req->DialectCount = cpu_to_le16(2);
- total_len += 4;
+ req->Dialects[2] = cpu_to_le16(SMB311_PROT_ID);
+ req->DialectCount = cpu_to_le16(3);
+ total_len += 6;
} else if (strcmp(server->vals->version_string,
SMBDEFAULT_VERSION_STRING) == 0) {
req->Dialects[0] = cpu_to_le16(SMB21_PROT_ID);
@@ -849,6 +850,8 @@ SMB2_negotiate(const unsigned int xid, struct cifs_ses *ses)
SMB2_CLIENT_GUID_SIZE);
if ((server->vals->protocol_id == SMB311_PROT_ID) ||
(strcmp(server->vals->version_string,
+ SMB3ANY_VERSION_STRING) == 0) ||
+ (strcmp(server->vals->version_string,
SMBDEFAULT_VERSION_STRING) == 0))
assemble_neg_contexts(req, server, &total_len);
}
@@ -883,6 +886,10 @@ SMB2_negotiate(const unsigned int xid, struct cifs_ses *ses)
cifs_server_dbg(VFS,
"SMB2.1 dialect returned but not requested\n");
return -EIO;
+ } else if (rsp->DialectRevision == cpu_to_le16(SMB311_PROT_ID)) {
+ /* ops set to 3.0 by default for default so update */
+ server->ops = &smb311_operations;
+ server->vals = &smb311_values;
}
} else if (strcmp(server->vals->version_string,
SMBDEFAULT_VERSION_STRING) == 0) {
@@ -1042,10 +1049,11 @@ int smb3_validate_negotiate(const unsigned int xid, struct cifs_tcon *tcon)
SMB3ANY_VERSION_STRING) == 0) {
pneg_inbuf->Dialects[0] = cpu_to_le16(SMB30_PROT_ID);
pneg_inbuf->Dialects[1] = cpu_to_le16(SMB302_PROT_ID);
- pneg_inbuf->DialectCount = cpu_to_le16(2);
- /* structure is big enough for 3 dialects, sending only 2 */
+ pneg_inbuf->Dialects[2] = cpu_to_le16(SMB311_PROT_ID);
+ pneg_inbuf->DialectCount = cpu_to_le16(3);
+ /* SMB 2.1 not included so subtract one dialect from len */
inbuflen = sizeof(*pneg_inbuf) -
- (2 * sizeof(pneg_inbuf->Dialects[0]));
+ (sizeof(pneg_inbuf->Dialects[0]));
} else if (strcmp(server->vals->version_string,
SMBDEFAULT_VERSION_STRING) == 0) {
pneg_inbuf->Dialects[0] = cpu_to_le16(SMB21_PROT_ID);
@@ -1053,7 +1061,7 @@ int smb3_validate_negotiate(const unsigned int xid, struct cifs_tcon *tcon)
pneg_inbuf->Dialects[2] = cpu_to_le16(SMB302_PROT_ID);
pneg_inbuf->Dialects[3] = cpu_to_le16(SMB311_PROT_ID);
pneg_inbuf->DialectCount = cpu_to_le16(4);
- /* structure is big enough for 3 dialects */
+ /* structure is big enough for 4 dialects */
inbuflen = sizeof(*pneg_inbuf);
} else {
/* otherwise specific dialect was requested */
@@ -1253,7 +1261,7 @@ SMB2_sess_sendreceive(struct SMB2_sess_data *sess_data)
cifs_ses_server(sess_data->ses),
&rqst,
&sess_data->buf0_type,
- CIFS_LOG_ERROR | CIFS_NEG_OP, &rsp_iov);
+ CIFS_LOG_ERROR | CIFS_SESS_OP, &rsp_iov);
cifs_small_buf_release(sess_data->iov[0].iov_base);
memcpy(&sess_data->iov[0], &rsp_iov, sizeof(struct kvec));
diff --git a/fs/cifs/trace.h b/fs/cifs/trace.h
index c3d1a584f251..d6df908dccad 100644
--- a/fs/cifs/trace.h
+++ b/fs/cifs/trace.h
@@ -851,17 +851,21 @@ DEFINE_SMB3_LEASE_ERR_EVENT(lease_err);
DECLARE_EVENT_CLASS(smb3_reconnect_class,
TP_PROTO(__u64 currmid,
+ __u64 conn_id,
char *hostname),
- TP_ARGS(currmid, hostname),
+ TP_ARGS(currmid, conn_id, hostname),
TP_STRUCT__entry(
__field(__u64, currmid)
+ __field(__u64, conn_id)
__field(char *, hostname)
),
TP_fast_assign(
__entry->currmid = currmid;
+ __entry->conn_id = conn_id;
__entry->hostname = hostname;
),
- TP_printk("server=%s current_mid=0x%llx",
+ TP_printk("conn_id=0x%llx server=%s current_mid=%llu",
+ __entry->conn_id,
__entry->hostname,
__entry->currmid)
)
@@ -869,44 +873,56 @@ DECLARE_EVENT_CLASS(smb3_reconnect_class,
#define DEFINE_SMB3_RECONNECT_EVENT(name) \
DEFINE_EVENT(smb3_reconnect_class, smb3_##name, \
TP_PROTO(__u64 currmid, \
- char *hostname), \
- TP_ARGS(currmid, hostname))
+ __u64 conn_id, \
+ char *hostname), \
+ TP_ARGS(currmid, conn_id, hostname))
DEFINE_SMB3_RECONNECT_EVENT(reconnect);
DEFINE_SMB3_RECONNECT_EVENT(partial_send_reconnect);
DECLARE_EVENT_CLASS(smb3_credit_class,
TP_PROTO(__u64 currmid,
+ __u64 conn_id,
char *hostname,
int credits,
- int credits_to_add),
- TP_ARGS(currmid, hostname, credits, credits_to_add),
+ int credits_to_add,
+ int in_flight),
+ TP_ARGS(currmid, conn_id, hostname, credits, credits_to_add, in_flight),
TP_STRUCT__entry(
__field(__u64, currmid)
+ __field(__u64, conn_id)
__field(char *, hostname)
__field(int, credits)
__field(int, credits_to_add)
+ __field(int, in_flight)
),
TP_fast_assign(
__entry->currmid = currmid;
+ __entry->conn_id = conn_id;
__entry->hostname = hostname;
__entry->credits = credits;
__entry->credits_to_add = credits_to_add;
+ __entry->in_flight = in_flight;
),
- TP_printk("server=%s current_mid=0x%llx credits=%d credits_to_add=%d",
+ TP_printk("conn_id=0x%llx server=%s current_mid=%llu "
+ "credits=%d credit_change=%d in_flight=%d",
+ __entry->conn_id,
__entry->hostname,
__entry->currmid,
__entry->credits,
- __entry->credits_to_add)
+ __entry->credits_to_add,
+ __entry->in_flight)
)
#define DEFINE_SMB3_CREDIT_EVENT(name) \
DEFINE_EVENT(smb3_credit_class, smb3_##name, \
TP_PROTO(__u64 currmid, \
+ __u64 conn_id, \
char *hostname, \
int credits, \
- int credits_to_add), \
- TP_ARGS(currmid, hostname, credits, credits_to_add))
+ int credits_to_add, \
+ int in_flight), \
+ TP_ARGS(currmid, conn_id, hostname, credits, credits_to_add, in_flight))
DEFINE_SMB3_CREDIT_EVENT(reconnect_with_invalid_credits);
DEFINE_SMB3_CREDIT_EVENT(reconnect_detected);
diff --git a/fs/cifs/transport.c b/fs/cifs/transport.c
index 4a2b836eb017..e90a1d1380b0 100644
--- a/fs/cifs/transport.c
+++ b/fs/cifs/transport.c
@@ -445,7 +445,7 @@ unmask:
*/
server->tcpStatus = CifsNeedReconnect;
trace_smb3_partial_send_reconnect(server->CurrentMid,
- server->hostname);
+ server->conn_id, server->hostname);
}
smbd_done:
if (rc < 0 && rc != -EINTR)
@@ -527,7 +527,7 @@ wait_for_free_credits(struct TCP_Server_Info *server, const int num_credits,
int *credits;
int optype;
long int t;
- int scredits = server->credits;
+ int scredits, in_flight;
if (timeout < 0)
t = MAX_JIFFY_OFFSET;
@@ -551,23 +551,39 @@ wait_for_free_credits(struct TCP_Server_Info *server, const int num_credits,
server->max_in_flight = server->in_flight;
*credits -= 1;
*instance = server->reconnect_instance;
+ scredits = *credits;
+ in_flight = server->in_flight;
spin_unlock(&server->req_lock);
+
+ trace_smb3_add_credits(server->CurrentMid,
+ server->conn_id, server->hostname, scredits, -1, in_flight);
+ cifs_dbg(FYI, "%s: remove %u credits total=%d\n",
+ __func__, 1, scredits);
+
return 0;
}
while (1) {
if (*credits < num_credits) {
+ scredits = *credits;
spin_unlock(&server->req_lock);
+
cifs_num_waiters_inc(server);
rc = wait_event_killable_timeout(server->request_q,
has_credits(server, credits, num_credits), t);
cifs_num_waiters_dec(server);
if (!rc) {
+ spin_lock(&server->req_lock);
+ scredits = *credits;
+ in_flight = server->in_flight;
+ spin_unlock(&server->req_lock);
+
trace_smb3_credit_timeout(server->CurrentMid,
- server->hostname, num_credits, 0);
+ server->conn_id, server->hostname, scredits,
+ num_credits, in_flight);
cifs_server_dbg(VFS, "wait timed out after %d ms\n",
- timeout);
- return -ENOTSUPP;
+ timeout);
+ return -EBUSY;
}
if (rc == -ERESTARTSYS)
return -ERESTARTSYS;
@@ -595,6 +611,7 @@ wait_for_free_credits(struct TCP_Server_Info *server, const int num_credits,
server->in_flight > 2 * MAX_COMPOUND &&
*credits <= MAX_COMPOUND) {
spin_unlock(&server->req_lock);
+
cifs_num_waiters_inc(server);
rc = wait_event_killable_timeout(
server->request_q,
@@ -603,13 +620,18 @@ wait_for_free_credits(struct TCP_Server_Info *server, const int num_credits,
t);
cifs_num_waiters_dec(server);
if (!rc) {
+ spin_lock(&server->req_lock);
+ scredits = *credits;
+ in_flight = server->in_flight;
+ spin_unlock(&server->req_lock);
+
trace_smb3_credit_timeout(
- server->CurrentMid,
- server->hostname, num_credits,
- 0);
+ server->CurrentMid,
+ server->conn_id, server->hostname,
+ scredits, num_credits, in_flight);
cifs_server_dbg(VFS, "wait timed out after %d ms\n",
- timeout);
- return -ENOTSUPP;
+ timeout);
+ return -EBUSY;
}
if (rc == -ERESTARTSYS)
return -ERESTARTSYS;
@@ -625,16 +647,18 @@ wait_for_free_credits(struct TCP_Server_Info *server, const int num_credits,
/* update # of requests on the wire to server */
if ((flags & CIFS_TIMEOUT_MASK) != CIFS_BLOCKING_OP) {
*credits -= num_credits;
- scredits = *credits;
server->in_flight += num_credits;
if (server->in_flight > server->max_in_flight)
server->max_in_flight = server->in_flight;
*instance = server->reconnect_instance;
}
+ scredits = *credits;
+ in_flight = server->in_flight;
spin_unlock(&server->req_lock);
trace_smb3_add_credits(server->CurrentMid,
- server->hostname, scredits, -(num_credits));
+ server->conn_id, server->hostname, scredits,
+ -(num_credits), in_flight);
cifs_dbg(FYI, "%s: remove %u credits total=%d\n",
__func__, num_credits, scredits);
break;
@@ -656,13 +680,13 @@ wait_for_compound_request(struct TCP_Server_Info *server, int num,
const int flags, unsigned int *instance)
{
int *credits;
- int scredits, sin_flight;
+ int scredits, in_flight;
credits = server->ops->get_credits_field(server, flags & CIFS_OP_MASK);
spin_lock(&server->req_lock);
scredits = *credits;
- sin_flight = server->in_flight;
+ in_flight = server->in_flight;
if (*credits < num) {
/*
@@ -684,10 +708,11 @@ wait_for_compound_request(struct TCP_Server_Info *server, int num,
if (server->in_flight == 0) {
spin_unlock(&server->req_lock);
trace_smb3_insufficient_credits(server->CurrentMid,
- server->hostname, scredits, sin_flight);
+ server->conn_id, server->hostname, scredits,
+ num, in_flight);
cifs_dbg(FYI, "%s: %d requests in flight, needed %d total=%d\n",
- __func__, sin_flight, num, scredits);
- return -ENOTSUPP;
+ __func__, in_flight, num, scredits);
+ return -EDEADLK;
}
}
spin_unlock(&server->req_lock);
@@ -1171,7 +1196,7 @@ compound_send_recv(const unsigned int xid, struct cifs_ses *ses,
/*
* Compounding is never used during session establish.
*/
- if ((ses->status == CifsNew) || (optype & CIFS_NEG_OP))
+ if ((ses->status == CifsNew) || (optype & CIFS_NEG_OP) || (optype & CIFS_SESS_OP))
smb311_update_preauth_hash(ses, rqst[0].rq_iov,
rqst[0].rq_nvec);
@@ -1236,7 +1261,7 @@ compound_send_recv(const unsigned int xid, struct cifs_ses *ses,
/*
* Compounding is never used during session establish.
*/
- if ((ses->status == CifsNew) || (optype & CIFS_NEG_OP)) {
+ if ((ses->status == CifsNew) || (optype & CIFS_NEG_OP) || (optype & CIFS_SESS_OP)) {
struct kvec iov = {
.iov_base = resp_iov[0].iov_base,
.iov_len = resp_iov[0].iov_len
diff --git a/fs/cifs/xattr.c b/fs/cifs/xattr.c
index 6b658a1172ef..41a611e76bb7 100644
--- a/fs/cifs/xattr.c
+++ b/fs/cifs/xattr.c
@@ -101,6 +101,7 @@ static int cifs_creation_time_set(unsigned int xid, struct cifs_tcon *pTcon,
}
static int cifs_xattr_set(const struct xattr_handler *handler,
+ struct user_namespace *mnt_userns,
struct dentry *dentry, struct inode *inode,
const char *name, const void *value,
size_t size, int flags)
diff --git a/fs/coda/coda_linux.h b/fs/coda/coda_linux.h
index d5ebd36fb2cc..e7b27754ce78 100644
--- a/fs/coda/coda_linux.h
+++ b/fs/coda/coda_linux.h
@@ -46,10 +46,12 @@ extern const struct file_operations coda_ioctl_operations;
/* operations shared over more than one file */
int coda_open(struct inode *i, struct file *f);
int coda_release(struct inode *i, struct file *f);
-int coda_permission(struct inode *inode, int mask);
+int coda_permission(struct user_namespace *mnt_userns, struct inode *inode,
+ int mask);
int coda_revalidate_inode(struct inode *);
-int coda_getattr(const struct path *, struct kstat *, u32, unsigned int);
-int coda_setattr(struct dentry *, struct iattr *);
+int coda_getattr(struct user_namespace *, const struct path *, struct kstat *,
+ u32, unsigned int);
+int coda_setattr(struct user_namespace *, struct dentry *, struct iattr *);
/* this file: heloers */
char *coda_f2s(struct CodaFid *f);
diff --git a/fs/coda/dir.c b/fs/coda/dir.c
index ca40c2556ba6..d69989c1bac3 100644
--- a/fs/coda/dir.c
+++ b/fs/coda/dir.c
@@ -73,7 +73,8 @@ static struct dentry *coda_lookup(struct inode *dir, struct dentry *entry, unsig
}
-int coda_permission(struct inode *inode, int mask)
+int coda_permission(struct user_namespace *mnt_userns, struct inode *inode,
+ int mask)
{
int error;
@@ -132,7 +133,8 @@ static inline void coda_dir_drop_nlink(struct inode *dir)
}
/* creation routines: create, mknod, mkdir, link, symlink */
-static int coda_create(struct inode *dir, struct dentry *de, umode_t mode, bool excl)
+static int coda_create(struct user_namespace *mnt_userns, struct inode *dir,
+ struct dentry *de, umode_t mode, bool excl)
{
int error;
const char *name=de->d_name.name;
@@ -164,7 +166,8 @@ err_out:
return error;
}
-static int coda_mkdir(struct inode *dir, struct dentry *de, umode_t mode)
+static int coda_mkdir(struct user_namespace *mnt_userns, struct inode *dir,
+ struct dentry *de, umode_t mode)
{
struct inode *inode;
struct coda_vattr attrs;
@@ -225,7 +228,8 @@ static int coda_link(struct dentry *source_de, struct inode *dir_inode,
}
-static int coda_symlink(struct inode *dir_inode, struct dentry *de,
+static int coda_symlink(struct user_namespace *mnt_userns,
+ struct inode *dir_inode, struct dentry *de,
const char *symname)
{
const char *name = de->d_name.name;
@@ -291,9 +295,9 @@ static int coda_rmdir(struct inode *dir, struct dentry *de)
}
/* rename */
-static int coda_rename(struct inode *old_dir, struct dentry *old_dentry,
- struct inode *new_dir, struct dentry *new_dentry,
- unsigned int flags)
+static int coda_rename(struct user_namespace *mnt_userns, struct inode *old_dir,
+ struct dentry *old_dentry, struct inode *new_dir,
+ struct dentry *new_dentry, unsigned int flags)
{
const char *old_name = old_dentry->d_name.name;
const char *new_name = new_dentry->d_name.name;
diff --git a/fs/coda/inode.c b/fs/coda/inode.c
index b1c70e2b9b1e..d9f1bd7153df 100644
--- a/fs/coda/inode.c
+++ b/fs/coda/inode.c
@@ -251,16 +251,17 @@ static void coda_evict_inode(struct inode *inode)
coda_cache_clear_inode(inode);
}
-int coda_getattr(const struct path *path, struct kstat *stat,
- u32 request_mask, unsigned int flags)
+int coda_getattr(struct user_namespace *mnt_userns, const struct path *path,
+ struct kstat *stat, u32 request_mask, unsigned int flags)
{
int err = coda_revalidate_inode(d_inode(path->dentry));
if (!err)
- generic_fillattr(d_inode(path->dentry), stat);
+ generic_fillattr(&init_user_ns, d_inode(path->dentry), stat);
return err;
}
-int coda_setattr(struct dentry *de, struct iattr *iattr)
+int coda_setattr(struct user_namespace *mnt_userns, struct dentry *de,
+ struct iattr *iattr)
{
struct inode *inode = d_inode(de);
struct coda_vattr vattr;
diff --git a/fs/coda/pioctl.c b/fs/coda/pioctl.c
index 3aec27e5eb82..cb9fd59a688c 100644
--- a/fs/coda/pioctl.c
+++ b/fs/coda/pioctl.c
@@ -24,7 +24,8 @@
#include "coda_linux.h"
/* pioctl ops */
-static int coda_ioctl_permission(struct inode *inode, int mask);
+static int coda_ioctl_permission(struct user_namespace *mnt_userns,
+ struct inode *inode, int mask);
static long coda_pioctl(struct file *filp, unsigned int cmd,
unsigned long user_data);
@@ -40,7 +41,8 @@ const struct file_operations coda_ioctl_operations = {
};
/* the coda pioctl inode ops */
-static int coda_ioctl_permission(struct inode *inode, int mask)
+static int coda_ioctl_permission(struct user_namespace *mnt_userns,
+ struct inode *inode, int mask)
{
return (mask & MAY_EXEC) ? -EACCES : 0;
}
diff --git a/fs/configfs/configfs_internal.h b/fs/configfs/configfs_internal.h
index 22dce2d35a4b..9a3aed249692 100644
--- a/fs/configfs/configfs_internal.h
+++ b/fs/configfs/configfs_internal.h
@@ -79,7 +79,8 @@ extern void configfs_hash_and_remove(struct dentry * dir, const char * name);
extern const unsigned char * configfs_get_name(struct configfs_dirent *sd);
extern void configfs_drop_dentry(struct configfs_dirent *sd, struct dentry *parent);
-extern int configfs_setattr(struct dentry *dentry, struct iattr *iattr);
+extern int configfs_setattr(struct user_namespace *mnt_userns,
+ struct dentry *dentry, struct iattr *iattr);
extern struct dentry *configfs_pin_fs(void);
extern void configfs_release_fs(void);
@@ -92,7 +93,8 @@ extern const struct inode_operations configfs_root_inode_operations;
extern const struct inode_operations configfs_symlink_inode_operations;
extern const struct dentry_operations configfs_dentry_ops;
-extern int configfs_symlink(struct inode *dir, struct dentry *dentry,
+extern int configfs_symlink(struct user_namespace *mnt_userns,
+ struct inode *dir, struct dentry *dentry,
const char *symname);
extern int configfs_unlink(struct inode *dir, struct dentry *dentry);
diff --git a/fs/configfs/dir.c b/fs/configfs/dir.c
index b839dd1b459f..b6098e02e20b 100644
--- a/fs/configfs/dir.c
+++ b/fs/configfs/dir.c
@@ -1268,7 +1268,8 @@ out_root_unlock:
}
EXPORT_SYMBOL(configfs_depend_item_unlocked);
-static int configfs_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode)
+static int configfs_mkdir(struct user_namespace *mnt_userns, struct inode *dir,
+ struct dentry *dentry, umode_t mode)
{
int ret = 0;
int module_got = 0;
diff --git a/fs/configfs/inode.c b/fs/configfs/inode.c
index 8bd6a883c94c..42c348bb2903 100644
--- a/fs/configfs/inode.c
+++ b/fs/configfs/inode.c
@@ -40,7 +40,8 @@ static const struct inode_operations configfs_inode_operations ={
.setattr = configfs_setattr,
};
-int configfs_setattr(struct dentry * dentry, struct iattr * iattr)
+int configfs_setattr(struct user_namespace *mnt_userns, struct dentry *dentry,
+ struct iattr *iattr)
{
struct inode * inode = d_inode(dentry);
struct configfs_dirent * sd = dentry->d_fsdata;
@@ -67,7 +68,7 @@ int configfs_setattr(struct dentry * dentry, struct iattr * iattr)
}
/* attributes were changed atleast once in past */
- error = simple_setattr(dentry, iattr);
+ error = simple_setattr(mnt_userns, dentry, iattr);
if (error)
return error;
diff --git a/fs/configfs/symlink.c b/fs/configfs/symlink.c
index cb61467478ca..77c854364e60 100644
--- a/fs/configfs/symlink.c
+++ b/fs/configfs/symlink.c
@@ -139,7 +139,8 @@ static int get_target(const char *symname, struct path *path,
}
-int configfs_symlink(struct inode *dir, struct dentry *dentry, const char *symname)
+int configfs_symlink(struct user_namespace *mnt_userns, struct inode *dir,
+ struct dentry *dentry, const char *symname)
{
int ret;
struct path path;
@@ -197,7 +198,8 @@ int configfs_symlink(struct inode *dir, struct dentry *dentry, const char *symna
if (dentry->d_inode || d_unhashed(dentry))
ret = -EEXIST;
else
- ret = inode_permission(dir, MAY_WRITE | MAY_EXEC);
+ ret = inode_permission(&init_user_ns, dir,
+ MAY_WRITE | MAY_EXEC);
if (!ret)
ret = type->ct_item_ops->allow_link(parent_item, target_item);
if (!ret) {
diff --git a/fs/coredump.c b/fs/coredump.c
index a2f6ecc8e345..1c0fdc1aa70b 100644
--- a/fs/coredump.c
+++ b/fs/coredump.c
@@ -703,6 +703,7 @@ void do_coredump(const kernel_siginfo_t *siginfo)
goto close_fail;
}
} else {
+ struct user_namespace *mnt_userns;
struct inode *inode;
int open_flags = O_CREAT | O_RDWR | O_NOFOLLOW |
O_LARGEFILE | O_EXCL;
@@ -780,13 +781,15 @@ void do_coredump(const kernel_siginfo_t *siginfo)
* a process dumps core while its cwd is e.g. on a vfat
* filesystem.
*/
- if (!uid_eq(inode->i_uid, current_fsuid()))
+ mnt_userns = file_mnt_user_ns(cprm.file);
+ if (!uid_eq(i_uid_into_mnt(mnt_userns, inode), current_fsuid()))
goto close_fail;
if ((inode->i_mode & 0677) != 0600)
goto close_fail;
if (!(cprm.file->f_mode & FMODE_CAN_WRITE))
goto close_fail;
- if (do_truncate(cprm.file->f_path.dentry, 0, 0, cprm.file))
+ if (do_truncate(mnt_userns, cprm.file->f_path.dentry,
+ 0, 0, cprm.file))
goto close_fail;
}
@@ -894,10 +897,10 @@ int dump_user_range(struct coredump_params *cprm, unsigned long start,
*/
page = get_dump_page(addr);
if (page) {
- void *kaddr = kmap(page);
+ void *kaddr = kmap_local_page(page);
stop = !dump_emit(cprm, kaddr, PAGE_SIZE);
- kunmap(page);
+ kunmap_local(kaddr);
put_page(page);
} else {
stop = !dump_skip(cprm, PAGE_SIZE);
@@ -931,7 +934,8 @@ void dump_truncate(struct coredump_params *cprm)
if (file->f_op->llseek && file->f_op->llseek != no_llseek) {
offset = file->f_op->llseek(file, 0, SEEK_CUR);
if (i_size_read(file->f_mapping->host) < offset)
- do_truncate(file->f_path.dentry, offset, 0, file);
+ do_truncate(file_mnt_user_ns(file), file->f_path.dentry,
+ offset, 0, file);
}
}
EXPORT_SYMBOL(dump_truncate);
diff --git a/fs/cramfs/inode.c b/fs/cramfs/inode.c
index 4b90cfd1ec36..2be65269a987 100644
--- a/fs/cramfs/inode.c
+++ b/fs/cramfs/inode.c
@@ -392,8 +392,7 @@ static int cramfs_physmem_mmap(struct file *file, struct vm_area_struct *vma)
/* Don't map the last page if it contains some other data */
if (pgoff + pages == max_pages && cramfs_last_page_is_shared(inode)) {
- pr_debug("mmap: %s: last page is shared\n",
- file_dentry(file)->d_name.name);
+ pr_debug("mmap: %pD: last page is shared\n", file);
pages--;
}
@@ -430,16 +429,15 @@ static int cramfs_physmem_mmap(struct file *file, struct vm_area_struct *vma)
}
if (!ret)
- pr_debug("mapped %s[%lu] at 0x%08lx (%u/%lu pages) "
- "to vma 0x%08lx, page_prot 0x%llx\n",
- file_dentry(file)->d_name.name, pgoff,
- address, pages, vma_pages(vma), vma->vm_start,
+ pr_debug("mapped %pD[%lu] at 0x%08lx (%u/%lu pages) "
+ "to vma 0x%08lx, page_prot 0x%llx\n", file,
+ pgoff, address, pages, vma_pages(vma), vma->vm_start,
(unsigned long long)pgprot_val(vma->vm_page_prot));
return ret;
bailout:
- pr_debug("%s[%lu]: direct mmap impossible: %s\n",
- file_dentry(file)->d_name.name, pgoff, bailout_reason);
+ pr_debug("%pD[%lu]: direct mmap impossible: %s\n",
+ file, pgoff, bailout_reason);
/* Didn't manage any direct map, but normal paging is still possible */
return 0;
}
@@ -469,8 +467,8 @@ static unsigned long cramfs_physmem_get_unmapped_area(struct file *file,
if (!offset || block_pages != pages)
return -ENOSYS;
addr = sbi->linear_phys_addr + offset;
- pr_debug("get_unmapped for %s ofs %#lx siz %lu at 0x%08lx\n",
- file_dentry(file)->d_name.name, pgoff*PAGE_SIZE, len, addr);
+ pr_debug("get_unmapped for %pD ofs %#lx siz %lu at 0x%08lx\n",
+ file, pgoff*PAGE_SIZE, len, addr);
return addr;
}
diff --git a/fs/crypto/policy.c b/fs/crypto/policy.c
index a51cef6bd27f..ed3d623724cd 100644
--- a/fs/crypto/policy.c
+++ b/fs/crypto/policy.c
@@ -465,7 +465,7 @@ int fscrypt_ioctl_set_policy(struct file *filp, const void __user *arg)
return -EFAULT;
policy.version = version;
- if (!inode_owner_or_capable(inode))
+ if (!inode_owner_or_capable(&init_user_ns, inode))
return -EACCES;
ret = mnt_want_write_file(filp);
diff --git a/fs/dcache.c b/fs/dcache.c
index 97e81a844a96..7d24ff7eb206 100644
--- a/fs/dcache.c
+++ b/fs/dcache.c
@@ -456,23 +456,6 @@ static void d_lru_shrink_move(struct list_lru_one *lru, struct dentry *dentry,
list_lru_isolate_move(lru, &dentry->d_lru, list);
}
-/**
- * d_drop - drop a dentry
- * @dentry: dentry to drop
- *
- * d_drop() unhashes the entry from the parent dentry hashes, so that it won't
- * be found through a VFS lookup any more. Note that this is different from
- * deleting the dentry - d_delete will try to mark the dentry negative if
- * possible, giving a successful _negative_ lookup, while d_drop will
- * just make the cache lookup fail.
- *
- * d_drop() is used mainly for stuff that wants to invalidate a dentry for some
- * reason (NFS timeouts or autofs deletes).
- *
- * __d_drop requires dentry->d_lock
- * ___d_drop doesn't mark dentry as "unhashed"
- * (dentry->d_hash.pprev will be LIST_POISON2, not NULL).
- */
static void ___d_drop(struct dentry *dentry)
{
struct hlist_bl_head *b;
@@ -501,6 +484,24 @@ void __d_drop(struct dentry *dentry)
}
EXPORT_SYMBOL(__d_drop);
+/**
+ * d_drop - drop a dentry
+ * @dentry: dentry to drop
+ *
+ * d_drop() unhashes the entry from the parent dentry hashes, so that it won't
+ * be found through a VFS lookup any more. Note that this is different from
+ * deleting the dentry - d_delete will try to mark the dentry negative if
+ * possible, giving a successful _negative_ lookup, while d_drop will
+ * just make the cache lookup fail.
+ *
+ * d_drop() is used mainly for stuff that wants to invalidate a dentry for some
+ * reason (NFS timeouts or autofs deletes).
+ *
+ * __d_drop requires dentry->d_lock
+ *
+ * ___d_drop doesn't mark dentry as "unhashed"
+ * (dentry->d_hash.pprev will be LIST_POISON2, not NULL).
+ */
void d_drop(struct dentry *dentry)
{
spin_lock(&dentry->d_lock);
@@ -996,20 +997,6 @@ struct dentry *d_find_any_alias(struct inode *inode)
}
EXPORT_SYMBOL(d_find_any_alias);
-/**
- * d_find_alias - grab a hashed alias of inode
- * @inode: inode in question
- *
- * If inode has a hashed alias, or is a directory and has any alias,
- * acquire the reference to alias and return it. Otherwise return NULL.
- * Notice that if inode is a directory there can be only one alias and
- * it can be unhashed only if it has no children, or if it is the root
- * of a filesystem, or if the directory was renamed and d_revalidate
- * was the first vfs operation to notice.
- *
- * If the inode has an IS_ROOT, DCACHE_DISCONNECTED alias, then prefer
- * any other hashed alias over that one.
- */
static struct dentry *__d_find_alias(struct inode *inode)
{
struct dentry *alias;
@@ -1029,6 +1016,20 @@ static struct dentry *__d_find_alias(struct inode *inode)
return NULL;
}
+/**
+ * d_find_alias - grab a hashed alias of inode
+ * @inode: inode in question
+ *
+ * If inode has a hashed alias, or is a directory and has any alias,
+ * acquire the reference to alias and return it. Otherwise return NULL.
+ * Notice that if inode is a directory there can be only one alias and
+ * it can be unhashed only if it has no children, or if it is the root
+ * of a filesystem, or if the directory was renamed and d_revalidate
+ * was the first vfs operation to notice.
+ *
+ * If the inode has an IS_ROOT, DCACHE_DISCONNECTED alias, then prefer
+ * any other hashed alias over that one.
+ */
struct dentry *d_find_alias(struct inode *inode)
{
struct dentry *de = NULL;
@@ -1043,6 +1044,31 @@ struct dentry *d_find_alias(struct inode *inode)
EXPORT_SYMBOL(d_find_alias);
/*
+ * Caller MUST be holding rcu_read_lock() and be guaranteed
+ * that inode won't get freed until rcu_read_unlock().
+ */
+struct dentry *d_find_alias_rcu(struct inode *inode)
+{
+ struct hlist_head *l = &inode->i_dentry;
+ struct dentry *de = NULL;
+
+ spin_lock(&inode->i_lock);
+ // ->i_dentry and ->i_rcu are colocated, but the latter won't be
+ // used without having I_FREEING set, which means no aliases left
+ if (likely(!(inode->i_state & I_FREEING) && !hlist_empty(l))) {
+ if (S_ISDIR(inode->i_mode)) {
+ de = hlist_entry(l->first, struct dentry, d_u.d_alias);
+ } else {
+ hlist_for_each_entry(de, l, d_u.d_alias)
+ if (!d_unhashed(de))
+ break;
+ }
+ }
+ spin_unlock(&inode->i_lock);
+ return de;
+}
+
+/*
* Try to kill dentries associated with this inode.
* WARNING: you must own a reference to inode.
*/
@@ -2150,8 +2176,8 @@ EXPORT_SYMBOL(d_obtain_root);
* same inode, only the actual correct case is stored in the dcache for
* case-insensitive filesystems.
*
- * For a case-insensitive lookup match and if the the case-exact dentry
- * already exists in in the dcache, use it and return it.
+ * For a case-insensitive lookup match and if the case-exact dentry
+ * already exists in the dcache, use it and return it.
*
* If no entry exists with the exact case name, allocate new dentry with
* the exact case, and return the spliced entry.
diff --git a/fs/debugfs/inode.c b/fs/debugfs/inode.c
index 2fcf66473436..22e86ae4dd5a 100644
--- a/fs/debugfs/inode.c
+++ b/fs/debugfs/inode.c
@@ -42,13 +42,14 @@ static unsigned int debugfs_allow = DEFAULT_DEBUGFS_ALLOW_BITS;
* so that we can use the file mode as part of a heuristic to determine whether
* to lock down individual files.
*/
-static int debugfs_setattr(struct dentry *dentry, struct iattr *ia)
+static int debugfs_setattr(struct user_namespace *mnt_userns,
+ struct dentry *dentry, struct iattr *ia)
{
int ret = security_locked_down(LOCKDOWN_DEBUGFS);
if (ret && (ia->ia_valid & (ATTR_MODE | ATTR_UID | ATTR_GID)))
return ret;
- return simple_setattr(dentry, ia);
+ return simple_setattr(&init_user_ns, dentry, ia);
}
static const struct inode_operations debugfs_file_inode_operations = {
@@ -297,7 +298,7 @@ struct dentry *debugfs_lookup(const char *name, struct dentry *parent)
{
struct dentry *dentry;
- if (IS_ERR(parent))
+ if (!debugfs_initialized() || IS_ERR_OR_NULL(name) || IS_ERR(parent))
return NULL;
if (!parent)
@@ -318,6 +319,9 @@ static struct dentry *start_creating(const char *name, struct dentry *parent)
if (!(debugfs_allow & DEBUGFS_ALLOW_API))
return ERR_PTR(-EPERM);
+ if (!debugfs_initialized())
+ return ERR_PTR(-ENOENT);
+
pr_debug("creating file '%s'\n", name);
if (IS_ERR(parent))
@@ -775,8 +779,8 @@ struct dentry *debugfs_rename(struct dentry *old_dir, struct dentry *old_dentry,
take_dentry_name_snapshot(&old_name, old_dentry);
- error = simple_rename(d_inode(old_dir), old_dentry, d_inode(new_dir),
- dentry, 0);
+ error = simple_rename(&init_user_ns, d_inode(old_dir), old_dentry,
+ d_inode(new_dir), dentry, 0);
if (error) {
release_dentry_name_snapshot(&old_name);
goto exit;
diff --git a/fs/direct-io.c b/fs/direct-io.c
index aa1083ecd623..0957e1bb8eb2 100644
--- a/fs/direct-io.c
+++ b/fs/direct-io.c
@@ -462,7 +462,7 @@ static inline void dio_cleanup(struct dio *dio, struct dio_submit *sdio)
* Wait for the next BIO to complete. Remove it and return it. NULL is
* returned once all BIOs have been completed. This must only be called once
* all bios have been issued so that dio->refcount can only decrease. This
- * requires that that the caller hold a reference on the dio.
+ * requires that the caller hold a reference on the dio.
*/
static struct bio *dio_await_one(struct dio *dio)
{
@@ -1279,7 +1279,7 @@ do_blockdev_direct_IO(struct kiocb *iocb, struct inode *inode,
if (retval == -ENOTBLK) {
/*
* The remaining part of the request will be
- * be handled by buffered I/O when we return
+ * handled by buffered I/O when we return
*/
retval = 0;
}
diff --git a/fs/ecryptfs/crypto.c b/fs/ecryptfs/crypto.c
index 0681540c48d9..943e523f4c9d 100644
--- a/fs/ecryptfs/crypto.c
+++ b/fs/ecryptfs/crypto.c
@@ -1110,8 +1110,8 @@ ecryptfs_write_metadata_to_xattr(struct dentry *ecryptfs_dentry,
}
inode_lock(lower_inode);
- rc = __vfs_setxattr(lower_dentry, lower_inode, ECRYPTFS_XATTR_NAME,
- page_virt, size, 0);
+ rc = __vfs_setxattr(&init_user_ns, lower_dentry, lower_inode,
+ ECRYPTFS_XATTR_NAME, page_virt, size, 0);
if (!rc && ecryptfs_inode)
fsstack_copy_attr_all(ecryptfs_inode, lower_inode);
inode_unlock(lower_inode);
diff --git a/fs/ecryptfs/inode.c b/fs/ecryptfs/inode.c
index 58d0f7187997..18e9285fbb4c 100644
--- a/fs/ecryptfs/inode.c
+++ b/fs/ecryptfs/inode.c
@@ -141,7 +141,8 @@ static int ecryptfs_do_unlink(struct inode *dir, struct dentry *dentry,
else if (d_unhashed(lower_dentry))
rc = -EINVAL;
else
- rc = vfs_unlink(lower_dir_inode, lower_dentry, NULL);
+ rc = vfs_unlink(&init_user_ns, lower_dir_inode, lower_dentry,
+ NULL);
if (rc) {
printk(KERN_ERR "Error in vfs_unlink; rc = [%d]\n", rc);
goto out_unlock;
@@ -180,7 +181,8 @@ ecryptfs_do_create(struct inode *directory_inode,
lower_dentry = ecryptfs_dentry_to_lower(ecryptfs_dentry);
lower_dir_dentry = lock_parent(lower_dentry);
- rc = vfs_create(d_inode(lower_dir_dentry), lower_dentry, mode, true);
+ rc = vfs_create(&init_user_ns, d_inode(lower_dir_dentry), lower_dentry,
+ mode, true);
if (rc) {
printk(KERN_ERR "%s: Failure to create dentry in lower fs; "
"rc = [%d]\n", __func__, rc);
@@ -190,7 +192,8 @@ ecryptfs_do_create(struct inode *directory_inode,
inode = __ecryptfs_get_inode(d_inode(lower_dentry),
directory_inode->i_sb);
if (IS_ERR(inode)) {
- vfs_unlink(d_inode(lower_dir_dentry), lower_dentry, NULL);
+ vfs_unlink(&init_user_ns, d_inode(lower_dir_dentry),
+ lower_dentry, NULL);
goto out_lock;
}
fsstack_copy_attr_times(directory_inode, d_inode(lower_dir_dentry));
@@ -254,7 +257,8 @@ out:
* Returns zero on success; non-zero on error condition
*/
static int
-ecryptfs_create(struct inode *directory_inode, struct dentry *ecryptfs_dentry,
+ecryptfs_create(struct user_namespace *mnt_userns,
+ struct inode *directory_inode, struct dentry *ecryptfs_dentry,
umode_t mode, bool excl)
{
struct inode *ecryptfs_inode;
@@ -436,8 +440,8 @@ static int ecryptfs_link(struct dentry *old_dentry, struct inode *dir,
dget(lower_old_dentry);
dget(lower_new_dentry);
lower_dir_dentry = lock_parent(lower_new_dentry);
- rc = vfs_link(lower_old_dentry, d_inode(lower_dir_dentry),
- lower_new_dentry, NULL);
+ rc = vfs_link(lower_old_dentry, &init_user_ns,
+ d_inode(lower_dir_dentry), lower_new_dentry, NULL);
if (rc || d_really_is_negative(lower_new_dentry))
goto out_lock;
rc = ecryptfs_interpose(lower_new_dentry, new_dentry, dir->i_sb);
@@ -460,7 +464,8 @@ static int ecryptfs_unlink(struct inode *dir, struct dentry *dentry)
return ecryptfs_do_unlink(dir, dentry, d_inode(dentry));
}
-static int ecryptfs_symlink(struct inode *dir, struct dentry *dentry,
+static int ecryptfs_symlink(struct user_namespace *mnt_userns,
+ struct inode *dir, struct dentry *dentry,
const char *symname)
{
int rc;
@@ -481,7 +486,7 @@ static int ecryptfs_symlink(struct inode *dir, struct dentry *dentry,
strlen(symname));
if (rc)
goto out_lock;
- rc = vfs_symlink(d_inode(lower_dir_dentry), lower_dentry,
+ rc = vfs_symlink(&init_user_ns, d_inode(lower_dir_dentry), lower_dentry,
encoded_symname);
kfree(encoded_symname);
if (rc || d_really_is_negative(lower_dentry))
@@ -499,7 +504,8 @@ out_lock:
return rc;
}
-static int ecryptfs_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode)
+static int ecryptfs_mkdir(struct user_namespace *mnt_userns, struct inode *dir,
+ struct dentry *dentry, umode_t mode)
{
int rc;
struct dentry *lower_dentry;
@@ -507,7 +513,8 @@ static int ecryptfs_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode
lower_dentry = ecryptfs_dentry_to_lower(dentry);
lower_dir_dentry = lock_parent(lower_dentry);
- rc = vfs_mkdir(d_inode(lower_dir_dentry), lower_dentry, mode);
+ rc = vfs_mkdir(&init_user_ns, d_inode(lower_dir_dentry), lower_dentry,
+ mode);
if (rc || d_really_is_negative(lower_dentry))
goto out;
rc = ecryptfs_interpose(lower_dentry, dentry, dir->i_sb);
@@ -541,7 +548,7 @@ static int ecryptfs_rmdir(struct inode *dir, struct dentry *dentry)
else if (d_unhashed(lower_dentry))
rc = -EINVAL;
else
- rc = vfs_rmdir(lower_dir_inode, lower_dentry);
+ rc = vfs_rmdir(&init_user_ns, lower_dir_inode, lower_dentry);
if (!rc) {
clear_nlink(d_inode(dentry));
fsstack_copy_attr_times(dir, lower_dir_inode);
@@ -555,7 +562,8 @@ static int ecryptfs_rmdir(struct inode *dir, struct dentry *dentry)
}
static int
-ecryptfs_mknod(struct inode *dir, struct dentry *dentry, umode_t mode, dev_t dev)
+ecryptfs_mknod(struct user_namespace *mnt_userns, struct inode *dir,
+ struct dentry *dentry, umode_t mode, dev_t dev)
{
int rc;
struct dentry *lower_dentry;
@@ -563,7 +571,8 @@ ecryptfs_mknod(struct inode *dir, struct dentry *dentry, umode_t mode, dev_t dev
lower_dentry = ecryptfs_dentry_to_lower(dentry);
lower_dir_dentry = lock_parent(lower_dentry);
- rc = vfs_mknod(d_inode(lower_dir_dentry), lower_dentry, mode, dev);
+ rc = vfs_mknod(&init_user_ns, d_inode(lower_dir_dentry), lower_dentry,
+ mode, dev);
if (rc || d_really_is_negative(lower_dentry))
goto out;
rc = ecryptfs_interpose(lower_dentry, dentry, dir->i_sb);
@@ -579,9 +588,9 @@ out:
}
static int
-ecryptfs_rename(struct inode *old_dir, struct dentry *old_dentry,
- struct inode *new_dir, struct dentry *new_dentry,
- unsigned int flags)
+ecryptfs_rename(struct user_namespace *mnt_userns, struct inode *old_dir,
+ struct dentry *old_dentry, struct inode *new_dir,
+ struct dentry *new_dentry, unsigned int flags)
{
int rc;
struct dentry *lower_old_dentry;
@@ -590,6 +599,7 @@ ecryptfs_rename(struct inode *old_dir, struct dentry *old_dentry,
struct dentry *lower_new_dir_dentry;
struct dentry *trap;
struct inode *target_inode;
+ struct renamedata rd = {};
if (flags)
return -EINVAL;
@@ -619,9 +629,14 @@ ecryptfs_rename(struct inode *old_dir, struct dentry *old_dentry,
rc = -ENOTEMPTY;
goto out_lock;
}
- rc = vfs_rename(d_inode(lower_old_dir_dentry), lower_old_dentry,
- d_inode(lower_new_dir_dentry), lower_new_dentry,
- NULL, 0);
+
+ rd.old_mnt_userns = &init_user_ns;
+ rd.old_dir = d_inode(lower_old_dir_dentry);
+ rd.old_dentry = lower_old_dentry;
+ rd.new_mnt_userns = &init_user_ns;
+ rd.new_dir = d_inode(lower_new_dir_dentry);
+ rd.new_dentry = lower_new_dentry;
+ rc = vfs_rename(&rd);
if (rc)
goto out_lock;
if (target_inode)
@@ -855,16 +870,19 @@ int ecryptfs_truncate(struct dentry *dentry, loff_t new_length)
struct dentry *lower_dentry = ecryptfs_dentry_to_lower(dentry);
inode_lock(d_inode(lower_dentry));
- rc = notify_change(lower_dentry, &lower_ia, NULL);
+ rc = notify_change(&init_user_ns, lower_dentry,
+ &lower_ia, NULL);
inode_unlock(d_inode(lower_dentry));
}
return rc;
}
static int
-ecryptfs_permission(struct inode *inode, int mask)
+ecryptfs_permission(struct user_namespace *mnt_userns, struct inode *inode,
+ int mask)
{
- return inode_permission(ecryptfs_inode_to_lower(inode), mask);
+ return inode_permission(&init_user_ns,
+ ecryptfs_inode_to_lower(inode), mask);
}
/**
@@ -879,7 +897,8 @@ ecryptfs_permission(struct inode *inode, int mask)
* All other metadata changes will be passed right to the lower filesystem,
* and we will just update our inode to look like the lower.
*/
-static int ecryptfs_setattr(struct dentry *dentry, struct iattr *ia)
+static int ecryptfs_setattr(struct user_namespace *mnt_userns,
+ struct dentry *dentry, struct iattr *ia)
{
int rc = 0;
struct dentry *lower_dentry;
@@ -933,7 +952,7 @@ static int ecryptfs_setattr(struct dentry *dentry, struct iattr *ia)
}
mutex_unlock(&crypt_stat->cs_mutex);
- rc = setattr_prepare(dentry, ia);
+ rc = setattr_prepare(&init_user_ns, dentry, ia);
if (rc)
goto out;
if (ia->ia_valid & ATTR_SIZE) {
@@ -959,14 +978,15 @@ static int ecryptfs_setattr(struct dentry *dentry, struct iattr *ia)
lower_ia.ia_valid &= ~ATTR_MODE;
inode_lock(d_inode(lower_dentry));
- rc = notify_change(lower_dentry, &lower_ia, NULL);
+ rc = notify_change(&init_user_ns, lower_dentry, &lower_ia, NULL);
inode_unlock(d_inode(lower_dentry));
out:
fsstack_copy_attr_all(inode, lower_inode);
return rc;
}
-static int ecryptfs_getattr_link(const struct path *path, struct kstat *stat,
+static int ecryptfs_getattr_link(struct user_namespace *mnt_userns,
+ const struct path *path, struct kstat *stat,
u32 request_mask, unsigned int flags)
{
struct dentry *dentry = path->dentry;
@@ -975,7 +995,7 @@ static int ecryptfs_getattr_link(const struct path *path, struct kstat *stat,
mount_crypt_stat = &ecryptfs_superblock_to_private(
dentry->d_sb)->mount_crypt_stat;
- generic_fillattr(d_inode(dentry), stat);
+ generic_fillattr(&init_user_ns, d_inode(dentry), stat);
if (mount_crypt_stat->flags & ECRYPTFS_GLOBAL_ENCRYPT_FILENAMES) {
char *target;
size_t targetsiz;
@@ -991,7 +1011,8 @@ static int ecryptfs_getattr_link(const struct path *path, struct kstat *stat,
return rc;
}
-static int ecryptfs_getattr(const struct path *path, struct kstat *stat,
+static int ecryptfs_getattr(struct user_namespace *mnt_userns,
+ const struct path *path, struct kstat *stat,
u32 request_mask, unsigned int flags)
{
struct dentry *dentry = path->dentry;
@@ -1003,7 +1024,7 @@ static int ecryptfs_getattr(const struct path *path, struct kstat *stat,
if (!rc) {
fsstack_copy_attr_all(d_inode(dentry),
ecryptfs_inode_to_lower(d_inode(dentry)));
- generic_fillattr(d_inode(dentry), stat);
+ generic_fillattr(&init_user_ns, d_inode(dentry), stat);
stat->blocks = lower_stat.blocks;
}
return rc;
@@ -1025,7 +1046,7 @@ ecryptfs_setxattr(struct dentry *dentry, struct inode *inode,
goto out;
}
inode_lock(lower_inode);
- rc = __vfs_setxattr_locked(lower_dentry, name, value, size, flags, NULL);
+ rc = __vfs_setxattr_locked(&init_user_ns, lower_dentry, name, value, size, flags, NULL);
inode_unlock(lower_inode);
if (!rc && inode)
fsstack_copy_attr_all(inode, lower_inode);
@@ -1091,7 +1112,7 @@ static int ecryptfs_removexattr(struct dentry *dentry, struct inode *inode,
goto out;
}
inode_lock(lower_inode);
- rc = __vfs_removexattr(lower_dentry, name);
+ rc = __vfs_removexattr(&init_user_ns, lower_dentry, name);
inode_unlock(lower_inode);
out:
return rc;
@@ -1135,6 +1156,7 @@ static int ecryptfs_xattr_get(const struct xattr_handler *handler,
}
static int ecryptfs_xattr_set(const struct xattr_handler *handler,
+ struct user_namespace *mnt_userns,
struct dentry *dentry, struct inode *inode,
const char *name, const void *value, size_t size,
int flags)
diff --git a/fs/ecryptfs/main.c b/fs/ecryptfs/main.c
index e63259fdef28..cdf40a54a35d 100644
--- a/fs/ecryptfs/main.c
+++ b/fs/ecryptfs/main.c
@@ -531,6 +531,12 @@ static struct dentry *ecryptfs_mount(struct file_system_type *fs_type, int flags
goto out_free;
}
+ if (mnt_user_ns(path.mnt) != &init_user_ns) {
+ rc = -EINVAL;
+ printk(KERN_ERR "Mounting on idmapped mounts currently disallowed\n");
+ goto out_free;
+ }
+
if (check_ruid && !uid_eq(d_inode(path.dentry)->i_uid, current_uid())) {
rc = -EPERM;
printk(KERN_ERR "Mount of device (uid: %d) not owned by "
diff --git a/fs/ecryptfs/mmap.c b/fs/ecryptfs/mmap.c
index 019572c6b39a..2f333a40ff4d 100644
--- a/fs/ecryptfs/mmap.c
+++ b/fs/ecryptfs/mmap.c
@@ -426,8 +426,8 @@ static int ecryptfs_write_inode_size_to_xattr(struct inode *ecryptfs_inode)
if (size < 0)
size = 8;
put_unaligned_be64(i_size_read(ecryptfs_inode), xattr_virt);
- rc = __vfs_setxattr(lower_dentry, lower_inode, ECRYPTFS_XATTR_NAME,
- xattr_virt, size, 0);
+ rc = __vfs_setxattr(&init_user_ns, lower_dentry, lower_inode,
+ ECRYPTFS_XATTR_NAME, xattr_virt, size, 0);
inode_unlock(lower_inode);
if (rc)
printk(KERN_ERR "Error whilst attempting to write inode size "
diff --git a/fs/efivarfs/file.c b/fs/efivarfs/file.c
index feaa5e182b7b..e6bc0302643b 100644
--- a/fs/efivarfs/file.c
+++ b/fs/efivarfs/file.c
@@ -137,7 +137,7 @@ efivarfs_ioc_setxflags(struct file *file, void __user *arg)
unsigned int oldflags = efivarfs_getflags(inode);
int error;
- if (!inode_owner_or_capable(inode))
+ if (!inode_owner_or_capable(&init_user_ns, inode))
return -EACCES;
if (copy_from_user(&flags, arg, sizeof(flags)))
diff --git a/fs/efivarfs/inode.c b/fs/efivarfs/inode.c
index 0297ad95eb5c..14e2947975fd 100644
--- a/fs/efivarfs/inode.c
+++ b/fs/efivarfs/inode.c
@@ -66,8 +66,8 @@ bool efivarfs_valid_name(const char *str, int len)
return uuid_is_valid(s);
}
-static int efivarfs_create(struct inode *dir, struct dentry *dentry,
- umode_t mode, bool excl)
+static int efivarfs_create(struct user_namespace *mnt_userns, struct inode *dir,
+ struct dentry *dentry, umode_t mode, bool excl)
{
struct inode *inode = NULL;
struct efivar_entry *var;
diff --git a/fs/erofs/inode.c b/fs/erofs/inode.c
index 3e21c0e8adae..119fdce1b520 100644
--- a/fs/erofs/inode.c
+++ b/fs/erofs/inode.c
@@ -331,8 +331,9 @@ struct inode *erofs_iget(struct super_block *sb,
return inode;
}
-int erofs_getattr(const struct path *path, struct kstat *stat,
- u32 request_mask, unsigned int query_flags)
+int erofs_getattr(struct user_namespace *mnt_userns, const struct path *path,
+ struct kstat *stat, u32 request_mask,
+ unsigned int query_flags)
{
struct inode *const inode = d_inode(path->dentry);
@@ -343,7 +344,7 @@ int erofs_getattr(const struct path *path, struct kstat *stat,
stat->attributes_mask |= (STATX_ATTR_COMPRESSED |
STATX_ATTR_IMMUTABLE);
- generic_fillattr(inode, stat);
+ generic_fillattr(&init_user_ns, inode, stat);
return 0;
}
diff --git a/fs/erofs/internal.h b/fs/erofs/internal.h
index 67a7ec945686..351dae524a0c 100644
--- a/fs/erofs/internal.h
+++ b/fs/erofs/internal.h
@@ -373,8 +373,9 @@ extern const struct inode_operations erofs_symlink_iops;
extern const struct inode_operations erofs_fast_symlink_iops;
struct inode *erofs_iget(struct super_block *sb, erofs_nid_t nid, bool dir);
-int erofs_getattr(const struct path *path, struct kstat *stat,
- u32 request_mask, unsigned int query_flags);
+int erofs_getattr(struct user_namespace *mnt_userns, const struct path *path,
+ struct kstat *stat, u32 request_mask,
+ unsigned int query_flags);
/* namei.c */
extern const struct inode_operations erofs_dir_iops;
diff --git a/fs/erofs/namei.c b/fs/erofs/namei.c
index 5f8cc7346c69..3a81e1f7fc06 100644
--- a/fs/erofs/namei.c
+++ b/fs/erofs/namei.c
@@ -234,8 +234,8 @@ static struct dentry *erofs_lookup(struct inode *dir,
} else if (err) {
inode = ERR_PTR(err);
} else {
- erofs_dbg("%s, %s (nid %llu) found, d_type %u", __func__,
- dentry->d_name.name, nid, d_type);
+ erofs_dbg("%s, %pd (nid %llu) found, d_type %u", __func__,
+ dentry, nid, d_type);
inode = erofs_iget(dir->i_sb, nid, d_type == FT_DIR);
}
return d_splice_alias(inode, dentry);
diff --git a/fs/eventpoll.c b/fs/eventpoll.c
index a829af074eb5..3196474cbe24 100644
--- a/fs/eventpoll.c
+++ b/fs/eventpoll.c
@@ -979,7 +979,7 @@ static struct epitem *ep_find(struct eventpoll *ep, struct file *file, int fd)
return epir;
}
-#ifdef CONFIG_CHECKPOINT_RESTORE
+#ifdef CONFIG_KCMP
static struct epitem *ep_find_tfd(struct eventpoll *ep, int tfd, unsigned long toff)
{
struct rb_node *rbp;
@@ -1021,7 +1021,7 @@ struct file *get_epoll_tfile_raw_ptr(struct file *file, int tfd,
return file_raw;
}
-#endif /* CONFIG_CHECKPOINT_RESTORE */
+#endif /* CONFIG_KCMP */
/**
* Adds a new entry to the tail of the list in a lockless way, i.e.
diff --git a/fs/exec.c b/fs/exec.c
index 5a853f03c233..18594f11c31f 100644
--- a/fs/exec.c
+++ b/fs/exec.c
@@ -1404,14 +1404,15 @@ EXPORT_SYMBOL(begin_new_exec);
void would_dump(struct linux_binprm *bprm, struct file *file)
{
struct inode *inode = file_inode(file);
- if (inode_permission(inode, MAY_READ) < 0) {
+ struct user_namespace *mnt_userns = file_mnt_user_ns(file);
+ if (inode_permission(mnt_userns, inode, MAY_READ) < 0) {
struct user_namespace *old, *user_ns;
bprm->interp_flags |= BINPRM_FLAGS_ENFORCE_NONDUMP;
/* Ensure mm->user_ns contains the executable */
user_ns = old = bprm->mm->user_ns;
while ((user_ns != &init_user_ns) &&
- !privileged_wrt_inode_uidgid(user_ns, inode))
+ !privileged_wrt_inode_uidgid(user_ns, mnt_userns, inode))
user_ns = user_ns->parent;
if (old != user_ns) {
@@ -1454,7 +1455,7 @@ EXPORT_SYMBOL(finalize_exec);
/*
* Prepare credentials and lock ->cred_guard_mutex.
* setup_new_exec() commits the new creds and drops the lock.
- * Or, if exec fails before, free_bprm() should release ->cred and
+ * Or, if exec fails before, free_bprm() should release ->cred
* and unlock.
*/
static int prepare_bprm_creds(struct linux_binprm *bprm)
@@ -1579,6 +1580,7 @@ static void check_unsafe_exec(struct linux_binprm *bprm)
static void bprm_fill_uid(struct linux_binprm *bprm, struct file *file)
{
/* Handle suid and sgid on files */
+ struct user_namespace *mnt_userns;
struct inode *inode;
unsigned int mode;
kuid_t uid;
@@ -1595,13 +1597,15 @@ static void bprm_fill_uid(struct linux_binprm *bprm, struct file *file)
if (!(mode & (S_ISUID|S_ISGID)))
return;
+ mnt_userns = file_mnt_user_ns(file);
+
/* Be careful if suid/sgid is set */
inode_lock(inode);
/* reload atomically mode/uid/gid now that lock held */
mode = inode->i_mode;
- uid = inode->i_uid;
- gid = inode->i_gid;
+ uid = i_uid_into_mnt(mnt_userns, inode);
+ gid = i_gid_into_mnt(mnt_userns, inode);
inode_unlock(inode);
/* We ignore suid/sgid if there are no mappings for them in the ns */
@@ -1837,7 +1841,7 @@ static int bprm_execve(struct linux_binprm *bprm,
out:
/*
- * If past the point of no return ensure the the code never
+ * If past the point of no return ensure the code never
* returns to the userspace process. Use an existing fatal
* signal if present otherwise terminate the process with
* SIGSEGV.
diff --git a/fs/exfat/balloc.c b/fs/exfat/balloc.c
index a987919686c0..761c79c3a4ba 100644
--- a/fs/exfat/balloc.c
+++ b/fs/exfat/balloc.c
@@ -166,7 +166,7 @@ int exfat_set_bitmap(struct inode *inode, unsigned int clu)
* If the value of "clu" is 0, it means cluster 2 which is the first cluster of
* the cluster heap.
*/
-void exfat_clear_bitmap(struct inode *inode, unsigned int clu)
+void exfat_clear_bitmap(struct inode *inode, unsigned int clu, bool sync)
{
int i, b;
unsigned int ent_idx;
@@ -180,7 +180,7 @@ void exfat_clear_bitmap(struct inode *inode, unsigned int clu)
b = BITMAP_OFFSET_BIT_IN_SECTOR(sb, ent_idx);
clear_bit_le(b, sbi->vol_amap[i]->b_data);
- exfat_update_bh(sbi->vol_amap[i], IS_DIRSYNC(inode));
+ exfat_update_bh(sbi->vol_amap[i], sync);
if (opts->discard) {
int ret_discard;
diff --git a/fs/exfat/exfat_fs.h b/fs/exfat/exfat_fs.h
index b8f0e829ecbd..fa21421a14d9 100644
--- a/fs/exfat/exfat_fs.h
+++ b/fs/exfat/exfat_fs.h
@@ -408,7 +408,7 @@ int exfat_count_num_clusters(struct super_block *sb,
int exfat_load_bitmap(struct super_block *sb);
void exfat_free_bitmap(struct exfat_sb_info *sbi);
int exfat_set_bitmap(struct inode *inode, unsigned int clu);
-void exfat_clear_bitmap(struct inode *inode, unsigned int clu);
+void exfat_clear_bitmap(struct inode *inode, unsigned int clu, bool sync);
unsigned int exfat_find_free_bitmap(struct super_block *sb, unsigned int clu);
int exfat_count_used_clusters(struct super_block *sb, unsigned int *ret_count);
@@ -416,9 +416,11 @@ int exfat_count_used_clusters(struct super_block *sb, unsigned int *ret_count);
extern const struct file_operations exfat_file_operations;
int __exfat_truncate(struct inode *inode, loff_t new_size);
void exfat_truncate(struct inode *inode, loff_t size);
-int exfat_setattr(struct dentry *dentry, struct iattr *attr);
-int exfat_getattr(const struct path *path, struct kstat *stat,
- unsigned int request_mask, unsigned int query_flags);
+int exfat_setattr(struct user_namespace *mnt_userns, struct dentry *dentry,
+ struct iattr *attr);
+int exfat_getattr(struct user_namespace *mnt_userns, const struct path *path,
+ struct kstat *stat, unsigned int request_mask,
+ unsigned int query_flags);
int exfat_file_fsync(struct file *file, loff_t start, loff_t end, int datasync);
/* namei.c */
diff --git a/fs/exfat/exfat_raw.h b/fs/exfat/exfat_raw.h
index 6aec6288e1f2..7f39b1c6469c 100644
--- a/fs/exfat/exfat_raw.h
+++ b/fs/exfat/exfat_raw.h
@@ -77,6 +77,10 @@
#define EXFAT_FILE_NAME_LEN 15
+#define EXFAT_MIN_SECT_SIZE_BITS 9
+#define EXFAT_MAX_SECT_SIZE_BITS 12
+#define EXFAT_MAX_SECT_PER_CLUS_BITS(x) (25 - (x)->sect_size_bits)
+
/* EXFAT: Main and Backup Boot Sector (512 bytes) */
struct boot_sector {
__u8 jmp_boot[BOOTSEC_JUMP_BOOT_LEN];
diff --git a/fs/exfat/fatent.c b/fs/exfat/fatent.c
index c3c9afee7418..7b2e8af17193 100644
--- a/fs/exfat/fatent.c
+++ b/fs/exfat/fatent.c
@@ -157,6 +157,7 @@ int exfat_free_cluster(struct inode *inode, struct exfat_chain *p_chain)
unsigned int clu;
struct super_block *sb = inode->i_sb;
struct exfat_sb_info *sbi = EXFAT_SB(sb);
+ int cur_cmap_i, next_cmap_i;
/* invalid cluster number */
if (p_chain->dir == EXFAT_FREE_CLUSTER ||
@@ -176,21 +177,51 @@ int exfat_free_cluster(struct inode *inode, struct exfat_chain *p_chain)
clu = p_chain->dir;
+ cur_cmap_i = next_cmap_i =
+ BITMAP_OFFSET_SECTOR_INDEX(sb, CLUSTER_TO_BITMAP_ENT(clu));
+
if (p_chain->flags == ALLOC_NO_FAT_CHAIN) {
+ unsigned int last_cluster = p_chain->dir + p_chain->size - 1;
do {
- exfat_clear_bitmap(inode, clu);
- clu++;
+ bool sync = false;
+
+ if (clu < last_cluster)
+ next_cmap_i =
+ BITMAP_OFFSET_SECTOR_INDEX(sb, CLUSTER_TO_BITMAP_ENT(clu+1));
+ /* flush bitmap only if index would be changed or for last cluster */
+ if (clu == last_cluster || cur_cmap_i != next_cmap_i) {
+ sync = true;
+ cur_cmap_i = next_cmap_i;
+ }
+
+ exfat_clear_bitmap(inode, clu, (sync && IS_DIRSYNC(inode)));
+ clu++;
num_clusters++;
} while (num_clusters < p_chain->size);
} else {
do {
- exfat_clear_bitmap(inode, clu);
-
- if (exfat_get_next_cluster(sb, &clu))
- goto dec_used_clus;
+ bool sync = false;
+ unsigned int n_clu = clu;
+ int err = exfat_get_next_cluster(sb, &n_clu);
+
+ if (err || n_clu == EXFAT_EOF_CLUSTER)
+ sync = true;
+ else
+ next_cmap_i =
+ BITMAP_OFFSET_SECTOR_INDEX(sb, CLUSTER_TO_BITMAP_ENT(n_clu));
+
+ if (cur_cmap_i != next_cmap_i) {
+ sync = true;
+ cur_cmap_i = next_cmap_i;
+ }
+ exfat_clear_bitmap(inode, clu, (sync && IS_DIRSYNC(inode)));
+ clu = n_clu;
num_clusters++;
+
+ if (err)
+ goto dec_used_clus;
} while (clu != EXFAT_EOF_CLUSTER);
}
diff --git a/fs/exfat/file.c b/fs/exfat/file.c
index 183ffdf4d43c..f783cf38dd8e 100644
--- a/fs/exfat/file.c
+++ b/fs/exfat/file.c
@@ -267,13 +267,14 @@ write_size:
mutex_unlock(&sbi->s_lock);
}
-int exfat_getattr(const struct path *path, struct kstat *stat,
- unsigned int request_mask, unsigned int query_flags)
+int exfat_getattr(struct user_namespace *mnt_uerns, const struct path *path,
+ struct kstat *stat, unsigned int request_mask,
+ unsigned int query_flags)
{
struct inode *inode = d_backing_inode(path->dentry);
struct exfat_inode_info *ei = EXFAT_I(inode);
- generic_fillattr(inode, stat);
+ generic_fillattr(&init_user_ns, inode, stat);
exfat_truncate_atime(&stat->atime);
stat->result_mask |= STATX_BTIME;
stat->btime.tv_sec = ei->i_crtime.tv_sec;
@@ -282,7 +283,8 @@ int exfat_getattr(const struct path *path, struct kstat *stat,
return 0;
}
-int exfat_setattr(struct dentry *dentry, struct iattr *attr)
+int exfat_setattr(struct user_namespace *mnt_userns, struct dentry *dentry,
+ struct iattr *attr)
{
struct exfat_sb_info *sbi = EXFAT_SB(dentry->d_sb);
struct inode *inode = dentry->d_inode;
@@ -305,7 +307,7 @@ int exfat_setattr(struct dentry *dentry, struct iattr *attr)
ATTR_TIMES_SET);
}
- error = setattr_prepare(dentry, attr);
+ error = setattr_prepare(&init_user_ns, dentry, attr);
attr->ia_valid = ia_valid;
if (error)
goto out;
@@ -340,7 +342,7 @@ int exfat_setattr(struct dentry *dentry, struct iattr *attr)
up_write(&EXFAT_I(inode)->truncate_lock);
}
- setattr_copy(inode, attr);
+ setattr_copy(&init_user_ns, inode, attr);
exfat_truncate_atime(&inode->i_atime);
mark_inode_dirty(inode);
diff --git a/fs/exfat/namei.c b/fs/exfat/namei.c
index 2932b23a3b6c..d9e8ec689c55 100644
--- a/fs/exfat/namei.c
+++ b/fs/exfat/namei.c
@@ -541,8 +541,8 @@ out:
return ret;
}
-static int exfat_create(struct inode *dir, struct dentry *dentry, umode_t mode,
- bool excl)
+static int exfat_create(struct user_namespace *mnt_userns, struct inode *dir,
+ struct dentry *dentry, umode_t mode, bool excl)
{
struct super_block *sb = dir->i_sb;
struct inode *inode;
@@ -827,7 +827,8 @@ unlock:
return err;
}
-static int exfat_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode)
+static int exfat_mkdir(struct user_namespace *mnt_userns, struct inode *dir,
+ struct dentry *dentry, umode_t mode)
{
struct super_block *sb = dir->i_sb;
struct inode *inode;
@@ -1318,9 +1319,10 @@ out:
return ret;
}
-static int exfat_rename(struct inode *old_dir, struct dentry *old_dentry,
- struct inode *new_dir, struct dentry *new_dentry,
- unsigned int flags)
+static int exfat_rename(struct user_namespace *mnt_userns,
+ struct inode *old_dir, struct dentry *old_dentry,
+ struct inode *new_dir, struct dentry *new_dentry,
+ unsigned int flags)
{
struct inode *old_inode, *new_inode;
struct super_block *sb = old_dir->i_sb;
diff --git a/fs/exfat/super.c b/fs/exfat/super.c
index 87be5bfc31eb..c6d8d2e53486 100644
--- a/fs/exfat/super.c
+++ b/fs/exfat/super.c
@@ -381,8 +381,7 @@ static int exfat_calibrate_blocksize(struct super_block *sb, int logical_sect)
{
struct exfat_sb_info *sbi = EXFAT_SB(sb);
- if (!is_power_of_2(logical_sect) ||
- logical_sect < 512 || logical_sect > 4096) {
+ if (!is_power_of_2(logical_sect)) {
exfat_err(sb, "bogus logical sector size %u", logical_sect);
return -EIO;
}
@@ -451,6 +450,25 @@ static int exfat_read_boot_sector(struct super_block *sb)
return -EINVAL;
}
+ /*
+ * sect_size_bits could be at least 9 and at most 12.
+ */
+ if (p_boot->sect_size_bits < EXFAT_MIN_SECT_SIZE_BITS ||
+ p_boot->sect_size_bits > EXFAT_MAX_SECT_SIZE_BITS) {
+ exfat_err(sb, "bogus sector size bits : %u\n",
+ p_boot->sect_size_bits);
+ return -EINVAL;
+ }
+
+ /*
+ * sect_per_clus_bits could be at least 0 and at most 25 - sect_size_bits.
+ */
+ if (p_boot->sect_per_clus_bits > EXFAT_MAX_SECT_PER_CLUS_BITS(p_boot)) {
+ exfat_err(sb, "bogus sectors bits per cluster : %u\n",
+ p_boot->sect_per_clus_bits);
+ return -EINVAL;
+ }
+
sbi->sect_per_clus = 1 << p_boot->sect_per_clus_bits;
sbi->sect_per_clus_bits = p_boot->sect_per_clus_bits;
sbi->cluster_size_bits = p_boot->sect_per_clus_bits +
@@ -477,16 +495,19 @@ static int exfat_read_boot_sector(struct super_block *sb)
sbi->used_clusters = EXFAT_CLUSTERS_UNTRACKED;
/* check consistencies */
- if (sbi->num_FAT_sectors << p_boot->sect_size_bits <
- sbi->num_clusters * 4) {
+ if ((u64)sbi->num_FAT_sectors << p_boot->sect_size_bits <
+ (u64)sbi->num_clusters * 4) {
exfat_err(sb, "bogus fat length");
return -EINVAL;
}
+
if (sbi->data_start_sector <
- sbi->FAT1_start_sector + sbi->num_FAT_sectors * p_boot->num_fats) {
+ (u64)sbi->FAT1_start_sector +
+ (u64)sbi->num_FAT_sectors * p_boot->num_fats) {
exfat_err(sb, "bogus data start sector");
return -EINVAL;
}
+
if (sbi->vol_flags & VOLUME_DIRTY)
exfat_warn(sb, "Volume was not properly unmounted. Some data may be corrupt. Please run fsck.");
if (sbi->vol_flags & MEDIA_FAILURE)
diff --git a/fs/ext2/acl.c b/fs/ext2/acl.c
index cf4c77f8dd08..b9a9db98e94b 100644
--- a/fs/ext2/acl.c
+++ b/fs/ext2/acl.c
@@ -216,14 +216,16 @@ __ext2_set_acl(struct inode *inode, struct posix_acl *acl, int type)
* inode->i_mutex: down
*/
int
-ext2_set_acl(struct inode *inode, struct posix_acl *acl, int type)
+ext2_set_acl(struct user_namespace *mnt_userns, struct inode *inode,
+ struct posix_acl *acl, int type)
{
int error;
int update_mode = 0;
umode_t mode = inode->i_mode;
if (type == ACL_TYPE_ACCESS && acl) {
- error = posix_acl_update_mode(inode, &mode, &acl);
+ error = posix_acl_update_mode(&init_user_ns, inode, &mode,
+ &acl);
if (error)
return error;
update_mode = 1;
diff --git a/fs/ext2/acl.h b/fs/ext2/acl.h
index 0f01c759daac..917db5f6630a 100644
--- a/fs/ext2/acl.h
+++ b/fs/ext2/acl.h
@@ -56,7 +56,8 @@ static inline int ext2_acl_count(size_t size)
/* acl.c */
extern struct posix_acl *ext2_get_acl(struct inode *inode, int type);
-extern int ext2_set_acl(struct inode *inode, struct posix_acl *acl, int type);
+extern int ext2_set_acl(struct user_namespace *mnt_userns, struct inode *inode,
+ struct posix_acl *acl, int type);
extern int ext2_init_acl (struct inode *, struct inode *);
#else
diff --git a/fs/ext2/ext2.h b/fs/ext2/ext2.h
index 2a4175fbaf5e..3309fb2d327a 100644
--- a/fs/ext2/ext2.h
+++ b/fs/ext2/ext2.h
@@ -764,8 +764,9 @@ extern struct inode *ext2_iget (struct super_block *, unsigned long);
extern int ext2_write_inode (struct inode *, struct writeback_control *);
extern void ext2_evict_inode(struct inode *);
extern int ext2_get_block(struct inode *, sector_t, struct buffer_head *, int);
-extern int ext2_setattr (struct dentry *, struct iattr *);
-extern int ext2_getattr (const struct path *, struct kstat *, u32, unsigned int);
+extern int ext2_setattr (struct user_namespace *, struct dentry *, struct iattr *);
+extern int ext2_getattr (struct user_namespace *, const struct path *,
+ struct kstat *, u32, unsigned int);
extern void ext2_set_inode_flags(struct inode *inode);
extern int ext2_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
u64 start, u64 len);
diff --git a/fs/ext2/ialloc.c b/fs/ext2/ialloc.c
index 432c3febea6d..df14e750e9fe 100644
--- a/fs/ext2/ialloc.c
+++ b/fs/ext2/ialloc.c
@@ -551,7 +551,7 @@ got:
inode->i_uid = current_fsuid();
inode->i_gid = dir->i_gid;
} else
- inode_init_owner(inode, dir, mode);
+ inode_init_owner(&init_user_ns, inode, dir, mode);
inode->i_ino = ino;
inode->i_blocks = 0;
diff --git a/fs/ext2/inode.c b/fs/ext2/inode.c
index 78c417d3c898..68178b2234bd 100644
--- a/fs/ext2/inode.c
+++ b/fs/ext2/inode.c
@@ -1638,8 +1638,8 @@ int ext2_write_inode(struct inode *inode, struct writeback_control *wbc)
return __ext2_write_inode(inode, wbc->sync_mode == WB_SYNC_ALL);
}
-int ext2_getattr(const struct path *path, struct kstat *stat,
- u32 request_mask, unsigned int query_flags)
+int ext2_getattr(struct user_namespace *mnt_userns, const struct path *path,
+ struct kstat *stat, u32 request_mask, unsigned int query_flags)
{
struct inode *inode = d_inode(path->dentry);
struct ext2_inode_info *ei = EXT2_I(inode);
@@ -1660,16 +1660,17 @@ int ext2_getattr(const struct path *path, struct kstat *stat,
STATX_ATTR_IMMUTABLE |
STATX_ATTR_NODUMP);
- generic_fillattr(inode, stat);
+ generic_fillattr(&init_user_ns, inode, stat);
return 0;
}
-int ext2_setattr(struct dentry *dentry, struct iattr *iattr)
+int ext2_setattr(struct user_namespace *mnt_userns, struct dentry *dentry,
+ struct iattr *iattr)
{
struct inode *inode = d_inode(dentry);
int error;
- error = setattr_prepare(dentry, iattr);
+ error = setattr_prepare(&init_user_ns, dentry, iattr);
if (error)
return error;
@@ -1689,9 +1690,9 @@ int ext2_setattr(struct dentry *dentry, struct iattr *iattr)
if (error)
return error;
}
- setattr_copy(inode, iattr);
+ setattr_copy(&init_user_ns, inode, iattr);
if (iattr->ia_valid & ATTR_MODE)
- error = posix_acl_chmod(inode, inode->i_mode);
+ error = posix_acl_chmod(&init_user_ns, inode, inode->i_mode);
mark_inode_dirty(inode);
return error;
diff --git a/fs/ext2/ioctl.c b/fs/ext2/ioctl.c
index 32a8d10b579d..b399cbb7022d 100644
--- a/fs/ext2/ioctl.c
+++ b/fs/ext2/ioctl.c
@@ -39,7 +39,7 @@ long ext2_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
if (ret)
return ret;
- if (!inode_owner_or_capable(inode)) {
+ if (!inode_owner_or_capable(&init_user_ns, inode)) {
ret = -EACCES;
goto setflags_out;
}
@@ -84,7 +84,7 @@ setflags_out:
case EXT2_IOC_SETVERSION: {
__u32 generation;
- if (!inode_owner_or_capable(inode))
+ if (!inode_owner_or_capable(&init_user_ns, inode))
return -EPERM;
ret = mnt_want_write_file(filp);
if (ret)
@@ -117,7 +117,7 @@ setversion_out:
if (!test_opt(inode->i_sb, RESERVATION) ||!S_ISREG(inode->i_mode))
return -ENOTTY;
- if (!inode_owner_or_capable(inode))
+ if (!inode_owner_or_capable(&init_user_ns, inode))
return -EACCES;
if (get_user(rsv_window_size, (int __user *)arg))
diff --git a/fs/ext2/namei.c b/fs/ext2/namei.c
index ea980f1e2e99..3367384d344d 100644
--- a/fs/ext2/namei.c
+++ b/fs/ext2/namei.c
@@ -100,7 +100,9 @@ struct dentry *ext2_get_parent(struct dentry *child)
* If the create succeeds, we fill in the inode information
* with d_instantiate().
*/
-static int ext2_create (struct inode * dir, struct dentry * dentry, umode_t mode, bool excl)
+static int ext2_create (struct user_namespace * mnt_userns,
+ struct inode * dir, struct dentry * dentry,
+ umode_t mode, bool excl)
{
struct inode *inode;
int err;
@@ -118,7 +120,8 @@ static int ext2_create (struct inode * dir, struct dentry * dentry, umode_t mode
return ext2_add_nondir(dentry, inode);
}
-static int ext2_tmpfile(struct inode *dir, struct dentry *dentry, umode_t mode)
+static int ext2_tmpfile(struct user_namespace *mnt_userns, struct inode *dir,
+ struct dentry *dentry, umode_t mode)
{
struct inode *inode = ext2_new_inode(dir, mode, NULL);
if (IS_ERR(inode))
@@ -131,7 +134,8 @@ static int ext2_tmpfile(struct inode *dir, struct dentry *dentry, umode_t mode)
return 0;
}
-static int ext2_mknod (struct inode * dir, struct dentry *dentry, umode_t mode, dev_t rdev)
+static int ext2_mknod (struct user_namespace * mnt_userns, struct inode * dir,
+ struct dentry *dentry, umode_t mode, dev_t rdev)
{
struct inode * inode;
int err;
@@ -151,8 +155,8 @@ static int ext2_mknod (struct inode * dir, struct dentry *dentry, umode_t mode,
return err;
}
-static int ext2_symlink (struct inode * dir, struct dentry * dentry,
- const char * symname)
+static int ext2_symlink (struct user_namespace * mnt_userns, struct inode * dir,
+ struct dentry * dentry, const char * symname)
{
struct super_block * sb = dir->i_sb;
int err = -ENAMETOOLONG;
@@ -225,7 +229,8 @@ static int ext2_link (struct dentry * old_dentry, struct inode * dir,
return err;
}
-static int ext2_mkdir(struct inode * dir, struct dentry * dentry, umode_t mode)
+static int ext2_mkdir(struct user_namespace * mnt_userns,
+ struct inode * dir, struct dentry * dentry, umode_t mode)
{
struct inode * inode;
int err;
@@ -315,8 +320,9 @@ static int ext2_rmdir (struct inode * dir, struct dentry *dentry)
return err;
}
-static int ext2_rename (struct inode * old_dir, struct dentry * old_dentry,
- struct inode * new_dir, struct dentry * new_dentry,
+static int ext2_rename (struct user_namespace * mnt_userns,
+ struct inode * old_dir, struct dentry * old_dentry,
+ struct inode * new_dir, struct dentry * new_dentry,
unsigned int flags)
{
struct inode * old_inode = d_inode(old_dentry);
diff --git a/fs/ext2/xattr_security.c b/fs/ext2/xattr_security.c
index 9a682e440acb..ebade1f52451 100644
--- a/fs/ext2/xattr_security.c
+++ b/fs/ext2/xattr_security.c
@@ -19,6 +19,7 @@ ext2_xattr_security_get(const struct xattr_handler *handler,
static int
ext2_xattr_security_set(const struct xattr_handler *handler,
+ struct user_namespace *mnt_userns,
struct dentry *unused, struct inode *inode,
const char *name, const void *value,
size_t size, int flags)
diff --git a/fs/ext2/xattr_trusted.c b/fs/ext2/xattr_trusted.c
index 49add1107850..18a87d5dd1ab 100644
--- a/fs/ext2/xattr_trusted.c
+++ b/fs/ext2/xattr_trusted.c
@@ -26,6 +26,7 @@ ext2_xattr_trusted_get(const struct xattr_handler *handler,
static int
ext2_xattr_trusted_set(const struct xattr_handler *handler,
+ struct user_namespace *mnt_userns,
struct dentry *unused, struct inode *inode,
const char *name, const void *value,
size_t size, int flags)
diff --git a/fs/ext2/xattr_user.c b/fs/ext2/xattr_user.c
index c243a3b4d69d..58092449f8ff 100644
--- a/fs/ext2/xattr_user.c
+++ b/fs/ext2/xattr_user.c
@@ -30,6 +30,7 @@ ext2_xattr_user_get(const struct xattr_handler *handler,
static int
ext2_xattr_user_set(const struct xattr_handler *handler,
+ struct user_namespace *mnt_userns,
struct dentry *unused, struct inode *inode,
const char *name, const void *value,
size_t size, int flags)
diff --git a/fs/ext4/.kunitconfig b/fs/ext4/.kunitconfig
new file mode 100644
index 000000000000..bf51da7cd9fc
--- /dev/null
+++ b/fs/ext4/.kunitconfig
@@ -0,0 +1,3 @@
+CONFIG_KUNIT=y
+CONFIG_EXT4_FS=y
+CONFIG_EXT4_KUNIT_TESTS=y
diff --git a/fs/ext4/Kconfig b/fs/ext4/Kconfig
index 619dd35ddd48..86699c8cab28 100644
--- a/fs/ext4/Kconfig
+++ b/fs/ext4/Kconfig
@@ -103,8 +103,7 @@ config EXT4_DEBUG
config EXT4_KUNIT_TESTS
tristate "KUnit tests for ext4" if !KUNIT_ALL_TESTS
- select EXT4_FS
- depends on KUNIT
+ depends on EXT4_FS && KUNIT
default KUNIT_ALL_TESTS
help
This builds the ext4 KUnit tests.
diff --git a/fs/ext4/acl.c b/fs/ext4/acl.c
index 68aaed48315f..c5eaffccecc3 100644
--- a/fs/ext4/acl.c
+++ b/fs/ext4/acl.c
@@ -222,7 +222,8 @@ __ext4_set_acl(handle_t *handle, struct inode *inode, int type,
}
int
-ext4_set_acl(struct inode *inode, struct posix_acl *acl, int type)
+ext4_set_acl(struct user_namespace *mnt_userns, struct inode *inode,
+ struct posix_acl *acl, int type)
{
handle_t *handle;
int error, credits, retries = 0;
@@ -245,7 +246,7 @@ retry:
ext4_fc_start_update(inode);
if ((type == ACL_TYPE_ACCESS) && acl) {
- error = posix_acl_update_mode(inode, &mode, &acl);
+ error = posix_acl_update_mode(mnt_userns, inode, &mode, &acl);
if (error)
goto out_stop;
if (mode != inode->i_mode)
diff --git a/fs/ext4/acl.h b/fs/ext4/acl.h
index 9b63f5416a2f..84b8942a57f2 100644
--- a/fs/ext4/acl.h
+++ b/fs/ext4/acl.h
@@ -56,7 +56,8 @@ static inline int ext4_acl_count(size_t size)
/* acl.c */
struct posix_acl *ext4_get_acl(struct inode *inode, int type);
-int ext4_set_acl(struct inode *inode, struct posix_acl *acl, int type);
+int ext4_set_acl(struct user_namespace *mnt_userns, struct inode *inode,
+ struct posix_acl *acl, int type);
extern int ext4_init_acl(handle_t *, struct inode *, struct inode *);
#else /* CONFIG_EXT4_FS_POSIX_ACL */
diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h
index 2866d249f3d2..644fd69185d3 100644
--- a/fs/ext4/ext4.h
+++ b/fs/ext4/ext4.h
@@ -2755,18 +2755,19 @@ extern int ext4fs_dirhash(const struct inode *dir, const char *name, int len,
/* ialloc.c */
extern int ext4_mark_inode_used(struct super_block *sb, int ino);
-extern struct inode *__ext4_new_inode(handle_t *, struct inode *, umode_t,
+extern struct inode *__ext4_new_inode(struct user_namespace *, handle_t *,
+ struct inode *, umode_t,
const struct qstr *qstr, __u32 goal,
uid_t *owner, __u32 i_flags,
int handle_type, unsigned int line_no,
int nblocks);
-#define ext4_new_inode(handle, dir, mode, qstr, goal, owner, i_flags) \
- __ext4_new_inode((handle), (dir), (mode), (qstr), (goal), (owner), \
- i_flags, 0, 0, 0)
-#define ext4_new_inode_start_handle(dir, mode, qstr, goal, owner, \
+#define ext4_new_inode(handle, dir, mode, qstr, goal, owner, i_flags) \
+ __ext4_new_inode(&init_user_ns, (handle), (dir), (mode), (qstr), \
+ (goal), (owner), i_flags, 0, 0, 0)
+#define ext4_new_inode_start_handle(mnt_userns, dir, mode, qstr, goal, owner, \
type, nblocks) \
- __ext4_new_inode(NULL, (dir), (mode), (qstr), (goal), (owner), \
+ __ext4_new_inode((mnt_userns), NULL, (dir), (mode), (qstr), (goal), (owner), \
0, (type), __LINE__, (nblocks))
@@ -2877,11 +2878,14 @@ extern struct inode *__ext4_iget(struct super_block *sb, unsigned long ino,
__ext4_iget((sb), (ino), (flags), __func__, __LINE__)
extern int ext4_write_inode(struct inode *, struct writeback_control *);
-extern int ext4_setattr(struct dentry *, struct iattr *);
-extern int ext4_getattr(const struct path *, struct kstat *, u32, unsigned int);
+extern int ext4_setattr(struct user_namespace *, struct dentry *,
+ struct iattr *);
+extern int ext4_getattr(struct user_namespace *, const struct path *,
+ struct kstat *, u32, unsigned int);
extern void ext4_evict_inode(struct inode *);
extern void ext4_clear_inode(struct inode *);
-extern int ext4_file_getattr(const struct path *, struct kstat *, u32, unsigned int);
+extern int ext4_file_getattr(struct user_namespace *, const struct path *,
+ struct kstat *, u32, unsigned int);
extern int ext4_sync_inode(handle_t *, struct inode *);
extern void ext4_dirty_inode(struct inode *, int);
extern int ext4_change_inode_journal_flag(struct inode *, int);
diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c
index 3960b7ec3ab7..77c7c8a54da7 100644
--- a/fs/ext4/extents.c
+++ b/fs/ext4/extents.c
@@ -4382,8 +4382,7 @@ static int ext4_alloc_file_blocks(struct file *file, ext4_lblk_t offset,
{
struct inode *inode = file_inode(file);
handle_t *handle;
- int ret = 0;
- int ret2 = 0, ret3 = 0;
+ int ret, ret2 = 0, ret3 = 0;
int retries = 0;
int depth = 0;
struct ext4_map_blocks map;
@@ -4408,7 +4407,7 @@ static int ext4_alloc_file_blocks(struct file *file, ext4_lblk_t offset,
depth = ext_depth(inode);
retry:
- while (ret >= 0 && len) {
+ while (len) {
/*
* Recalculate credits when extent tree depth changes.
*/
@@ -4430,9 +4429,13 @@ retry:
inode->i_ino, map.m_lblk,
map.m_len, ret);
ext4_mark_inode_dirty(handle, inode);
- ret2 = ext4_journal_stop(handle);
+ ext4_journal_stop(handle);
break;
}
+ /*
+ * allow a full retry cycle for any remaining allocations
+ */
+ retries = 0;
map.m_lblk += ret;
map.m_len = len = len - ret;
epos = (loff_t)map.m_lblk << inode->i_blkbits;
@@ -4450,11 +4453,8 @@ retry:
if (unlikely(ret2))
break;
}
- if (ret == -ENOSPC &&
- ext4_should_retry_alloc(inode->i_sb, &retries)) {
- ret = 0;
+ if (ret == -ENOSPC && ext4_should_retry_alloc(inode->i_sb, &retries))
goto retry;
- }
return ret > 0 ? ret2 : ret;
}
diff --git a/fs/ext4/fast_commit.c b/fs/ext4/fast_commit.c
index 6e8208acfc62..6c4f19b0a556 100644
--- a/fs/ext4/fast_commit.c
+++ b/fs/ext4/fast_commit.c
@@ -915,13 +915,11 @@ static int ext4_fc_submit_inode_data_all(journal_t *journal)
struct super_block *sb = (struct super_block *)(journal->j_private);
struct ext4_sb_info *sbi = EXT4_SB(sb);
struct ext4_inode_info *ei;
- struct list_head *pos;
int ret = 0;
spin_lock(&sbi->s_fc_lock);
ext4_set_mount_flag(sb, EXT4_MF_FC_COMMITTING);
- list_for_each(pos, &sbi->s_fc_q[FC_Q_MAIN]) {
- ei = list_entry(pos, struct ext4_inode_info, i_fc_list);
+ list_for_each_entry(ei, &sbi->s_fc_q[FC_Q_MAIN], i_fc_list) {
ext4_set_inode_state(&ei->vfs_inode, EXT4_STATE_FC_COMMITTING);
while (atomic_read(&ei->i_fc_updates)) {
DEFINE_WAIT(wait);
@@ -978,17 +976,15 @@ __releases(&sbi->s_fc_lock)
{
struct super_block *sb = (struct super_block *)(journal->j_private);
struct ext4_sb_info *sbi = EXT4_SB(sb);
- struct ext4_fc_dentry_update *fc_dentry;
+ struct ext4_fc_dentry_update *fc_dentry, *fc_dentry_n;
struct inode *inode;
- struct list_head *pos, *n, *fcd_pos, *fcd_n;
- struct ext4_inode_info *ei;
+ struct ext4_inode_info *ei, *ei_n;
int ret;
if (list_empty(&sbi->s_fc_dentry_q[FC_Q_MAIN]))
return 0;
- list_for_each_safe(fcd_pos, fcd_n, &sbi->s_fc_dentry_q[FC_Q_MAIN]) {
- fc_dentry = list_entry(fcd_pos, struct ext4_fc_dentry_update,
- fcd_list);
+ list_for_each_entry_safe(fc_dentry, fc_dentry_n,
+ &sbi->s_fc_dentry_q[FC_Q_MAIN], fcd_list) {
if (fc_dentry->fcd_op != EXT4_FC_TAG_CREAT) {
spin_unlock(&sbi->s_fc_lock);
if (!ext4_fc_add_dentry_tlv(
@@ -1004,8 +1000,8 @@ __releases(&sbi->s_fc_lock)
}
inode = NULL;
- list_for_each_safe(pos, n, &sbi->s_fc_q[FC_Q_MAIN]) {
- ei = list_entry(pos, struct ext4_inode_info, i_fc_list);
+ list_for_each_entry_safe(ei, ei_n, &sbi->s_fc_q[FC_Q_MAIN],
+ i_fc_list) {
if (ei->vfs_inode.i_ino == fc_dentry->fcd_ino) {
inode = &ei->vfs_inode;
break;
@@ -1057,7 +1053,6 @@ static int ext4_fc_perform_commit(journal_t *journal)
struct ext4_sb_info *sbi = EXT4_SB(sb);
struct ext4_inode_info *iter;
struct ext4_fc_head head;
- struct list_head *pos;
struct inode *inode;
struct blk_plug plug;
int ret = 0;
@@ -1099,8 +1094,7 @@ static int ext4_fc_perform_commit(journal_t *journal)
goto out;
}
- list_for_each(pos, &sbi->s_fc_q[FC_Q_MAIN]) {
- iter = list_entry(pos, struct ext4_inode_info, i_fc_list);
+ list_for_each_entry(iter, &sbi->s_fc_q[FC_Q_MAIN], i_fc_list) {
inode = &iter->vfs_inode;
if (!ext4_test_inode_state(inode, EXT4_STATE_FC_COMMITTING))
continue;
@@ -1226,9 +1220,8 @@ static void ext4_fc_cleanup(journal_t *journal, int full)
{
struct super_block *sb = journal->j_private;
struct ext4_sb_info *sbi = EXT4_SB(sb);
- struct ext4_inode_info *iter;
+ struct ext4_inode_info *iter, *iter_n;
struct ext4_fc_dentry_update *fc_dentry;
- struct list_head *pos, *n;
if (full && sbi->s_fc_bh)
sbi->s_fc_bh = NULL;
@@ -1236,8 +1229,8 @@ static void ext4_fc_cleanup(journal_t *journal, int full)
jbd2_fc_release_bufs(journal);
spin_lock(&sbi->s_fc_lock);
- list_for_each_safe(pos, n, &sbi->s_fc_q[FC_Q_MAIN]) {
- iter = list_entry(pos, struct ext4_inode_info, i_fc_list);
+ list_for_each_entry_safe(iter, iter_n, &sbi->s_fc_q[FC_Q_MAIN],
+ i_fc_list) {
list_del_init(&iter->i_fc_list);
ext4_clear_inode_state(&iter->vfs_inode,
EXT4_STATE_FC_COMMITTING);
diff --git a/fs/ext4/ialloc.c b/fs/ext4/ialloc.c
index 20f2fcb799f5..633ae7becd61 100644
--- a/fs/ext4/ialloc.c
+++ b/fs/ext4/ialloc.c
@@ -919,7 +919,8 @@ static int ext4_xattr_credits_for_new_inode(struct inode *dir, mode_t mode,
* For other inodes, search forward from the parent directory's block
* group to find a free inode.
*/
-struct inode *__ext4_new_inode(handle_t *handle, struct inode *dir,
+struct inode *__ext4_new_inode(struct user_namespace *mnt_userns,
+ handle_t *handle, struct inode *dir,
umode_t mode, const struct qstr *qstr,
__u32 goal, uid_t *owner, __u32 i_flags,
int handle_type, unsigned int line_no,
@@ -969,10 +970,10 @@ struct inode *__ext4_new_inode(handle_t *handle, struct inode *dir,
i_gid_write(inode, owner[1]);
} else if (test_opt(sb, GRPID)) {
inode->i_mode = mode;
- inode->i_uid = current_fsuid();
+ inode->i_uid = fsuid_into_mnt(mnt_userns);
inode->i_gid = dir->i_gid;
} else
- inode_init_owner(inode, dir, mode);
+ inode_init_owner(mnt_userns, inode, dir, mode);
if (ext4_has_feature_project(sb) &&
ext4_test_inode_flag(dir, EXT4_INODE_PROJINHERIT))
diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c
index c173c8405856..650c5acd2f2d 100644
--- a/fs/ext4/inode.c
+++ b/fs/ext4/inode.c
@@ -20,6 +20,7 @@
*/
#include <linux/fs.h>
+#include <linux/mount.h>
#include <linux/time.h>
#include <linux/highuid.h>
#include <linux/pagemap.h>
@@ -4961,15 +4962,11 @@ static void __ext4_update_other_inode_time(struct super_block *sb,
if (!inode)
return;
- if ((inode->i_state & (I_FREEING | I_WILL_FREE | I_NEW |
- I_DIRTY_INODE)) ||
- ((inode->i_state & I_DIRTY_TIME) == 0))
+ if (!inode_is_dirtytime_only(inode))
return;
spin_lock(&inode->i_lock);
- if (((inode->i_state & (I_FREEING | I_WILL_FREE | I_NEW |
- I_DIRTY_INODE)) == 0) &&
- (inode->i_state & I_DIRTY_TIME)) {
+ if (inode_is_dirtytime_only(inode)) {
struct ext4_inode_info *ei = EXT4_I(inode);
inode->i_state &= ~I_DIRTY_TIME;
@@ -5319,7 +5316,8 @@ static void ext4_wait_for_tail_page_commit(struct inode *inode)
*
* Called with inode->i_mutex down.
*/
-int ext4_setattr(struct dentry *dentry, struct iattr *attr)
+int ext4_setattr(struct user_namespace *mnt_userns, struct dentry *dentry,
+ struct iattr *attr)
{
struct inode *inode = d_inode(dentry);
int error, rc = 0;
@@ -5337,7 +5335,7 @@ int ext4_setattr(struct dentry *dentry, struct iattr *attr)
ATTR_GID | ATTR_TIMES_SET))))
return -EPERM;
- error = setattr_prepare(dentry, attr);
+ error = setattr_prepare(mnt_userns, dentry, attr);
if (error)
return error;
@@ -5512,7 +5510,7 @@ out_mmap_sem:
}
if (!error) {
- setattr_copy(inode, attr);
+ setattr_copy(mnt_userns, inode, attr);
mark_inode_dirty(inode);
}
@@ -5524,7 +5522,7 @@ out_mmap_sem:
ext4_orphan_del(NULL, inode);
if (!error && (ia_valid & ATTR_MODE))
- rc = posix_acl_chmod(inode, inode->i_mode);
+ rc = posix_acl_chmod(mnt_userns, inode, inode->i_mode);
err_out:
if (error)
@@ -5535,8 +5533,8 @@ err_out:
return error;
}
-int ext4_getattr(const struct path *path, struct kstat *stat,
- u32 request_mask, unsigned int query_flags)
+int ext4_getattr(struct user_namespace *mnt_userns, const struct path *path,
+ struct kstat *stat, u32 request_mask, unsigned int query_flags)
{
struct inode *inode = d_inode(path->dentry);
struct ext4_inode *raw_inode;
@@ -5571,17 +5569,18 @@ int ext4_getattr(const struct path *path, struct kstat *stat,
STATX_ATTR_NODUMP |
STATX_ATTR_VERITY);
- generic_fillattr(inode, stat);
+ generic_fillattr(mnt_userns, inode, stat);
return 0;
}
-int ext4_file_getattr(const struct path *path, struct kstat *stat,
+int ext4_file_getattr(struct user_namespace *mnt_userns,
+ const struct path *path, struct kstat *stat,
u32 request_mask, unsigned int query_flags)
{
struct inode *inode = d_inode(path->dentry);
u64 delalloc_blocks;
- ext4_getattr(path, stat, request_mask, query_flags);
+ ext4_getattr(mnt_userns, path, stat, request_mask, query_flags);
/*
* If there is inline data in the inode, the inode will normally not
@@ -5937,26 +5936,16 @@ out:
* If the inode is marked synchronous, we don't honour that here - doing
* so would cause a commit on atime updates, which we don't bother doing.
* We handle synchronous inodes at the highest possible level.
- *
- * If only the I_DIRTY_TIME flag is set, we can skip everything. If
- * I_DIRTY_TIME and I_DIRTY_SYNC is set, the only inode fields we need
- * to copy into the on-disk inode structure are the timestamp files.
*/
void ext4_dirty_inode(struct inode *inode, int flags)
{
handle_t *handle;
- if (flags == I_DIRTY_TIME)
- return;
handle = ext4_journal_start(inode, EXT4_HT_INODE, 2);
if (IS_ERR(handle))
- goto out;
-
+ return;
ext4_mark_inode_dirty(handle, inode);
-
ext4_journal_stop(handle);
-out:
- return;
}
int ext4_change_inode_journal_flag(struct inode *inode, int val)
diff --git a/fs/ext4/ioctl.c b/fs/ext4/ioctl.c
index 713b1ae44c1a..a2cf35066f46 100644
--- a/fs/ext4/ioctl.c
+++ b/fs/ext4/ioctl.c
@@ -107,10 +107,12 @@ void ext4_reset_inode_seed(struct inode *inode)
* important fields of the inodes.
*
* @sb: the super block of the filesystem
+ * @mnt_userns: user namespace of the mount the inode was found from
* @inode: the inode to swap with EXT4_BOOT_LOADER_INO
*
*/
static long swap_inode_boot_loader(struct super_block *sb,
+ struct user_namespace *mnt_userns,
struct inode *inode)
{
handle_t *handle;
@@ -139,7 +141,8 @@ static long swap_inode_boot_loader(struct super_block *sb,
}
if (IS_RDONLY(inode) || IS_APPEND(inode) || IS_IMMUTABLE(inode) ||
- !inode_owner_or_capable(inode) || !capable(CAP_SYS_ADMIN)) {
+ !inode_owner_or_capable(mnt_userns, inode) ||
+ !capable(CAP_SYS_ADMIN)) {
err = -EPERM;
goto journal_err_out;
}
@@ -814,6 +817,7 @@ static long __ext4_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
struct inode *inode = file_inode(filp);
struct super_block *sb = inode->i_sb;
struct ext4_inode_info *ei = EXT4_I(inode);
+ struct user_namespace *mnt_userns = file_mnt_user_ns(filp);
unsigned int flags;
ext4_debug("cmd = %u, arg = %lu\n", cmd, arg);
@@ -829,7 +833,7 @@ static long __ext4_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
case FS_IOC_SETFLAGS: {
int err;
- if (!inode_owner_or_capable(inode))
+ if (!inode_owner_or_capable(mnt_userns, inode))
return -EACCES;
if (get_user(flags, (int __user *) arg))
@@ -871,7 +875,7 @@ static long __ext4_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
__u32 generation;
int err;
- if (!inode_owner_or_capable(inode))
+ if (!inode_owner_or_capable(mnt_userns, inode))
return -EPERM;
if (ext4_has_metadata_csum(inode->i_sb)) {
@@ -1010,7 +1014,7 @@ mext_out:
case EXT4_IOC_MIGRATE:
{
int err;
- if (!inode_owner_or_capable(inode))
+ if (!inode_owner_or_capable(mnt_userns, inode))
return -EACCES;
err = mnt_want_write_file(filp);
@@ -1032,7 +1036,7 @@ mext_out:
case EXT4_IOC_ALLOC_DA_BLKS:
{
int err;
- if (!inode_owner_or_capable(inode))
+ if (!inode_owner_or_capable(mnt_userns, inode))
return -EACCES;
err = mnt_want_write_file(filp);
@@ -1051,7 +1055,7 @@ mext_out:
err = mnt_want_write_file(filp);
if (err)
return err;
- err = swap_inode_boot_loader(sb, inode);
+ err = swap_inode_boot_loader(sb, mnt_userns, inode);
mnt_drop_write_file(filp);
return err;
}
@@ -1217,7 +1221,7 @@ resizefs_out:
case EXT4_IOC_CLEAR_ES_CACHE:
{
- if (!inode_owner_or_capable(inode))
+ if (!inode_owner_or_capable(mnt_userns, inode))
return -EACCES;
ext4_clear_inode_es(inode);
return 0;
@@ -1263,7 +1267,7 @@ resizefs_out:
return -EFAULT;
/* Make sure caller has proper permission */
- if (!inode_owner_or_capable(inode))
+ if (!inode_owner_or_capable(mnt_userns, inode))
return -EACCES;
if (fa.fsx_xflags & ~EXT4_SUPPORTED_FS_XFLAGS)
diff --git a/fs/ext4/namei.c b/fs/ext4/namei.c
index cf652ba3e74d..686bf982c84e 100644
--- a/fs/ext4/namei.c
+++ b/fs/ext4/namei.c
@@ -731,6 +731,29 @@ struct stats dx_show_entries(struct dx_hash_info *hinfo, struct inode *dir,
(space/bcount)*100/blocksize);
return (struct stats) { names, space, bcount};
}
+
+/*
+ * Linear search cross check
+ */
+static inline void htree_rep_invariant_check(struct dx_entry *at,
+ struct dx_entry *target,
+ u32 hash, unsigned int n)
+{
+ while (n--) {
+ dxtrace(printk(KERN_CONT ","));
+ if (dx_get_hash(++at) > hash) {
+ at--;
+ break;
+ }
+ }
+ ASSERT(at == target - 1);
+}
+#else /* DX_DEBUG */
+static inline void htree_rep_invariant_check(struct dx_entry *at,
+ struct dx_entry *target,
+ u32 hash, unsigned int n)
+{
+}
#endif /* DX_DEBUG */
/*
@@ -827,20 +850,7 @@ dx_probe(struct ext4_filename *fname, struct inode *dir,
p = m + 1;
}
- if (0) { // linear search cross check
- unsigned n = count - 1;
- at = entries;
- while (n--)
- {
- dxtrace(printk(KERN_CONT ","));
- if (dx_get_hash(++at) > hash)
- {
- at--;
- break;
- }
- }
- ASSERT(at == p - 1);
- }
+ htree_rep_invariant_check(entries, p, hash, count - 1);
at = p - 1;
dxtrace(printk(KERN_CONT " %x->%u\n",
@@ -2401,11 +2411,10 @@ again:
(frame - 1)->bh);
if (err)
goto journal_error;
- if (restart) {
- err = ext4_handle_dirty_dx_node(handle, dir,
- frame->bh);
+ err = ext4_handle_dirty_dx_node(handle, dir,
+ frame->bh);
+ if (err)
goto journal_error;
- }
} else {
struct dx_root *dxroot;
memcpy((char *) entries2, (char *) entries,
@@ -2596,8 +2605,8 @@ static int ext4_add_nondir(handle_t *handle,
* If the create succeeds, we fill in the inode information
* with d_instantiate().
*/
-static int ext4_create(struct inode *dir, struct dentry *dentry, umode_t mode,
- bool excl)
+static int ext4_create(struct user_namespace *mnt_userns, struct inode *dir,
+ struct dentry *dentry, umode_t mode, bool excl)
{
handle_t *handle;
struct inode *inode;
@@ -2610,8 +2619,8 @@ static int ext4_create(struct inode *dir, struct dentry *dentry, umode_t mode,
credits = (EXT4_DATA_TRANS_BLOCKS(dir->i_sb) +
EXT4_INDEX_EXTRA_TRANS_BLOCKS + 3);
retry:
- inode = ext4_new_inode_start_handle(dir, mode, &dentry->d_name, 0,
- NULL, EXT4_HT_DIR, credits);
+ inode = ext4_new_inode_start_handle(mnt_userns, dir, mode, &dentry->d_name,
+ 0, NULL, EXT4_HT_DIR, credits);
handle = ext4_journal_current_handle();
err = PTR_ERR(inode);
if (!IS_ERR(inode)) {
@@ -2631,8 +2640,8 @@ retry:
return err;
}
-static int ext4_mknod(struct inode *dir, struct dentry *dentry,
- umode_t mode, dev_t rdev)
+static int ext4_mknod(struct user_namespace *mnt_userns, struct inode *dir,
+ struct dentry *dentry, umode_t mode, dev_t rdev)
{
handle_t *handle;
struct inode *inode;
@@ -2645,8 +2654,8 @@ static int ext4_mknod(struct inode *dir, struct dentry *dentry,
credits = (EXT4_DATA_TRANS_BLOCKS(dir->i_sb) +
EXT4_INDEX_EXTRA_TRANS_BLOCKS + 3);
retry:
- inode = ext4_new_inode_start_handle(dir, mode, &dentry->d_name, 0,
- NULL, EXT4_HT_DIR, credits);
+ inode = ext4_new_inode_start_handle(mnt_userns, dir, mode, &dentry->d_name,
+ 0, NULL, EXT4_HT_DIR, credits);
handle = ext4_journal_current_handle();
err = PTR_ERR(inode);
if (!IS_ERR(inode)) {
@@ -2665,7 +2674,8 @@ retry:
return err;
}
-static int ext4_tmpfile(struct inode *dir, struct dentry *dentry, umode_t mode)
+static int ext4_tmpfile(struct user_namespace *mnt_userns, struct inode *dir,
+ struct dentry *dentry, umode_t mode)
{
handle_t *handle;
struct inode *inode;
@@ -2676,7 +2686,7 @@ static int ext4_tmpfile(struct inode *dir, struct dentry *dentry, umode_t mode)
return err;
retry:
- inode = ext4_new_inode_start_handle(dir, mode,
+ inode = ext4_new_inode_start_handle(mnt_userns, dir, mode,
NULL, 0, NULL,
EXT4_HT_DIR,
EXT4_MAXQUOTAS_INIT_BLOCKS(dir->i_sb) +
@@ -2774,7 +2784,8 @@ out:
return err;
}
-static int ext4_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode)
+static int ext4_mkdir(struct user_namespace *mnt_userns, struct inode *dir,
+ struct dentry *dentry, umode_t mode)
{
handle_t *handle;
struct inode *inode;
@@ -2790,7 +2801,7 @@ static int ext4_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode)
credits = (EXT4_DATA_TRANS_BLOCKS(dir->i_sb) +
EXT4_INDEX_EXTRA_TRANS_BLOCKS + 3);
retry:
- inode = ext4_new_inode_start_handle(dir, S_IFDIR | mode,
+ inode = ext4_new_inode_start_handle(mnt_userns, dir, S_IFDIR | mode,
&dentry->d_name,
0, NULL, EXT4_HT_DIR, credits);
handle = ext4_journal_current_handle();
@@ -3292,7 +3303,7 @@ out_trace:
return retval;
}
-static int ext4_symlink(struct inode *dir,
+static int ext4_symlink(struct user_namespace *mnt_userns, struct inode *dir,
struct dentry *dentry, const char *symname)
{
handle_t *handle;
@@ -3333,7 +3344,7 @@ static int ext4_symlink(struct inode *dir,
EXT4_INDEX_EXTRA_TRANS_BLOCKS + 3;
}
- inode = ext4_new_inode_start_handle(dir, S_IFLNK|S_IRWXUGO,
+ inode = ext4_new_inode_start_handle(mnt_userns, dir, S_IFLNK|S_IRWXUGO,
&dentry->d_name, 0, NULL,
EXT4_HT_DIR, credits);
handle = ext4_journal_current_handle();
@@ -3662,7 +3673,8 @@ static void ext4_update_dir_count(handle_t *handle, struct ext4_renament *ent)
}
}
-static struct inode *ext4_whiteout_for_rename(struct ext4_renament *ent,
+static struct inode *ext4_whiteout_for_rename(struct user_namespace *mnt_userns,
+ struct ext4_renament *ent,
int credits, handle_t **h)
{
struct inode *wh;
@@ -3676,7 +3688,8 @@ static struct inode *ext4_whiteout_for_rename(struct ext4_renament *ent,
credits += (EXT4_MAXQUOTAS_TRANS_BLOCKS(ent->dir->i_sb) +
EXT4_XATTR_TRANS_BLOCKS + 4);
retry:
- wh = ext4_new_inode_start_handle(ent->dir, S_IFCHR | WHITEOUT_MODE,
+ wh = ext4_new_inode_start_handle(mnt_userns, ent->dir,
+ S_IFCHR | WHITEOUT_MODE,
&ent->dentry->d_name, 0, NULL,
EXT4_HT_DIR, credits);
@@ -3703,9 +3716,9 @@ retry:
* while new_{dentry,inode) refers to the destination dentry/inode
* This comes from rename(const char *oldpath, const char *newpath)
*/
-static int ext4_rename(struct inode *old_dir, struct dentry *old_dentry,
- struct inode *new_dir, struct dentry *new_dentry,
- unsigned int flags)
+static int ext4_rename(struct user_namespace *mnt_userns, struct inode *old_dir,
+ struct dentry *old_dentry, struct inode *new_dir,
+ struct dentry *new_dentry, unsigned int flags)
{
handle_t *handle = NULL;
struct ext4_renament old = {
@@ -3789,7 +3802,7 @@ static int ext4_rename(struct inode *old_dir, struct dentry *old_dentry,
goto end_rename;
}
} else {
- whiteout = ext4_whiteout_for_rename(&old, credits, &handle);
+ whiteout = ext4_whiteout_for_rename(mnt_userns, &old, credits, &handle);
if (IS_ERR(whiteout)) {
retval = PTR_ERR(whiteout);
whiteout = NULL;
@@ -4085,7 +4098,8 @@ end_rename:
return retval;
}
-static int ext4_rename2(struct inode *old_dir, struct dentry *old_dentry,
+static int ext4_rename2(struct user_namespace *mnt_userns,
+ struct inode *old_dir, struct dentry *old_dentry,
struct inode *new_dir, struct dentry *new_dentry,
unsigned int flags)
{
@@ -4107,7 +4121,7 @@ static int ext4_rename2(struct inode *old_dir, struct dentry *old_dentry,
new_dir, new_dentry);
}
- return ext4_rename(old_dir, old_dentry, new_dir, new_dentry, flags);
+ return ext4_rename(mnt_userns, old_dir, old_dentry, new_dir, new_dentry, flags);
}
/*
diff --git a/fs/ext4/super.c b/fs/ext4/super.c
index fb5985102c1d..ad34a37278cd 100644
--- a/fs/ext4/super.c
+++ b/fs/ext4/super.c
@@ -59,7 +59,7 @@
#include <trace/events/ext4.h>
static struct ext4_lazy_init *ext4_li_info;
-static struct mutex ext4_li_mtx;
+static DEFINE_MUTEX(ext4_li_mtx);
static struct ratelimit_state ext4_mount_msg_ratelimit;
static int ext4_load_journal(struct super_block *, struct ext4_super_block *,
@@ -4875,7 +4875,6 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent)
set_task_ioprio(sbi->s_journal->j_task, journal_ioprio);
- sbi->s_journal->j_commit_callback = ext4_journal_commit_callback;
sbi->s_journal->j_submit_inode_data_buffers =
ext4_journal_submit_inode_data_buffers;
sbi->s_journal->j_finish_inode_data_buffers =
@@ -4987,6 +4986,14 @@ no_journal:
goto failed_mount5;
}
+ /*
+ * We can only set up the journal commit callback once
+ * mballoc is initialized
+ */
+ if (sbi->s_journal)
+ sbi->s_journal->j_commit_callback =
+ ext4_journal_commit_callback;
+
block = ext4_count_free_clusters(sb);
ext4_free_blocks_count_set(sbi->s_es,
EXT4_C2B(sbi, block));
@@ -6654,7 +6661,7 @@ static struct file_system_type ext4_fs_type = {
.name = "ext4",
.mount = ext4_mount,
.kill_sb = kill_block_super,
- .fs_flags = FS_REQUIRES_DEV,
+ .fs_flags = FS_REQUIRES_DEV | FS_ALLOW_IDMAP,
};
MODULE_ALIAS_FS("ext4");
@@ -6667,7 +6674,6 @@ static int __init ext4_init_fs(void)
ratelimit_state_init(&ext4_mount_msg_ratelimit, 30 * HZ, 64);
ext4_li_info = NULL;
- mutex_init(&ext4_li_mtx);
/* Build-time check for flags consistency */
ext4_check_flag_values();
diff --git a/fs/ext4/xattr_hurd.c b/fs/ext4/xattr_hurd.c
index 8cfa74a56361..c78df5790377 100644
--- a/fs/ext4/xattr_hurd.c
+++ b/fs/ext4/xattr_hurd.c
@@ -32,6 +32,7 @@ ext4_xattr_hurd_get(const struct xattr_handler *handler,
static int
ext4_xattr_hurd_set(const struct xattr_handler *handler,
+ struct user_namespace *mnt_userns,
struct dentry *unused, struct inode *inode,
const char *name, const void *value,
size_t size, int flags)
diff --git a/fs/ext4/xattr_security.c b/fs/ext4/xattr_security.c
index 197a9d8a15ef..8213f66f7b2d 100644
--- a/fs/ext4/xattr_security.c
+++ b/fs/ext4/xattr_security.c
@@ -23,6 +23,7 @@ ext4_xattr_security_get(const struct xattr_handler *handler,
static int
ext4_xattr_security_set(const struct xattr_handler *handler,
+ struct user_namespace *mnt_userns,
struct dentry *unused, struct inode *inode,
const char *name, const void *value,
size_t size, int flags)
diff --git a/fs/ext4/xattr_trusted.c b/fs/ext4/xattr_trusted.c
index e9389e5d75c3..7c21ffb26d25 100644
--- a/fs/ext4/xattr_trusted.c
+++ b/fs/ext4/xattr_trusted.c
@@ -30,6 +30,7 @@ ext4_xattr_trusted_get(const struct xattr_handler *handler,
static int
ext4_xattr_trusted_set(const struct xattr_handler *handler,
+ struct user_namespace *mnt_userns,
struct dentry *unused, struct inode *inode,
const char *name, const void *value,
size_t size, int flags)
diff --git a/fs/ext4/xattr_user.c b/fs/ext4/xattr_user.c
index d4546184b34b..2fe7ff0a479c 100644
--- a/fs/ext4/xattr_user.c
+++ b/fs/ext4/xattr_user.c
@@ -31,6 +31,7 @@ ext4_xattr_user_get(const struct xattr_handler *handler,
static int
ext4_xattr_user_set(const struct xattr_handler *handler,
+ struct user_namespace *mnt_userns,
struct dentry *unused, struct inode *inode,
const char *name, const void *value,
size_t size, int flags)
diff --git a/fs/f2fs/acl.c b/fs/f2fs/acl.c
index 732ec10e7890..965037a9c205 100644
--- a/fs/f2fs/acl.c
+++ b/fs/f2fs/acl.c
@@ -214,8 +214,8 @@ static int f2fs_acl_update_mode(struct inode *inode, umode_t *mode_p,
return error;
if (error == 0)
*acl = NULL;
- if (!in_group_p(inode->i_gid) &&
- !capable_wrt_inode_uidgid(inode, CAP_FSETID))
+ if (!in_group_p(i_gid_into_mnt(&init_user_ns, inode)) &&
+ !capable_wrt_inode_uidgid(&init_user_ns, inode, CAP_FSETID))
mode &= ~S_ISGID;
*mode_p = mode;
return 0;
@@ -269,7 +269,8 @@ static int __f2fs_set_acl(struct inode *inode, int type,
return error;
}
-int f2fs_set_acl(struct inode *inode, struct posix_acl *acl, int type)
+int f2fs_set_acl(struct user_namespace *mnt_userns, struct inode *inode,
+ struct posix_acl *acl, int type)
{
if (unlikely(f2fs_cp_error(F2FS_I_SB(inode))))
return -EIO;
diff --git a/fs/f2fs/acl.h b/fs/f2fs/acl.h
index 124868c13f80..986fd1bc780b 100644
--- a/fs/f2fs/acl.h
+++ b/fs/f2fs/acl.h
@@ -34,7 +34,8 @@ struct f2fs_acl_header {
#ifdef CONFIG_F2FS_FS_POSIX_ACL
extern struct posix_acl *f2fs_get_acl(struct inode *, int);
-extern int f2fs_set_acl(struct inode *, struct posix_acl *, int);
+extern int f2fs_set_acl(struct user_namespace *, struct inode *,
+ struct posix_acl *, int);
extern int f2fs_init_acl(struct inode *, struct inode *, struct page *,
struct page *);
#else
diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h
index 506c801880f3..e2d302ae3a46 100644
--- a/fs/f2fs/f2fs.h
+++ b/fs/f2fs/f2fs.h
@@ -3187,9 +3187,10 @@ void f2fs_truncate_data_blocks(struct dnode_of_data *dn);
int f2fs_do_truncate_blocks(struct inode *inode, u64 from, bool lock);
int f2fs_truncate_blocks(struct inode *inode, u64 from, bool lock);
int f2fs_truncate(struct inode *inode);
-int f2fs_getattr(const struct path *path, struct kstat *stat,
- u32 request_mask, unsigned int flags);
-int f2fs_setattr(struct dentry *dentry, struct iattr *attr);
+int f2fs_getattr(struct user_namespace *mnt_userns, const struct path *path,
+ struct kstat *stat, u32 request_mask, unsigned int flags);
+int f2fs_setattr(struct user_namespace *mnt_userns, struct dentry *dentry,
+ struct iattr *attr);
int f2fs_truncate_hole(struct inode *inode, pgoff_t pg_start, pgoff_t pg_end);
void f2fs_truncate_data_blocks_range(struct dnode_of_data *dn, int count);
int f2fs_precache_extents(struct inode *inode);
diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c
index 471a6ff0c937..d26ff2ae3f5e 100644
--- a/fs/f2fs/file.c
+++ b/fs/f2fs/file.c
@@ -789,8 +789,8 @@ int f2fs_truncate(struct inode *inode)
return 0;
}
-int f2fs_getattr(const struct path *path, struct kstat *stat,
- u32 request_mask, unsigned int query_flags)
+int f2fs_getattr(struct user_namespace *mnt_userns, const struct path *path,
+ struct kstat *stat, u32 request_mask, unsigned int query_flags)
{
struct inode *inode = d_inode(path->dentry);
struct f2fs_inode_info *fi = F2FS_I(inode);
@@ -826,7 +826,7 @@ int f2fs_getattr(const struct path *path, struct kstat *stat,
STATX_ATTR_NODUMP |
STATX_ATTR_VERITY);
- generic_fillattr(inode, stat);
+ generic_fillattr(&init_user_ns, inode, stat);
/* we need to show initial sectors used for inline_data/dentries */
if ((S_ISREG(inode->i_mode) && f2fs_has_inline_data(inode)) ||
@@ -837,7 +837,8 @@ int f2fs_getattr(const struct path *path, struct kstat *stat,
}
#ifdef CONFIG_F2FS_FS_POSIX_ACL
-static void __setattr_copy(struct inode *inode, const struct iattr *attr)
+static void __setattr_copy(struct user_namespace *mnt_userns,
+ struct inode *inode, const struct iattr *attr)
{
unsigned int ia_valid = attr->ia_valid;
@@ -853,9 +854,9 @@ static void __setattr_copy(struct inode *inode, const struct iattr *attr)
inode->i_ctime = attr->ia_ctime;
if (ia_valid & ATTR_MODE) {
umode_t mode = attr->ia_mode;
+ kgid_t kgid = i_gid_into_mnt(mnt_userns, inode);
- if (!in_group_p(inode->i_gid) &&
- !capable_wrt_inode_uidgid(inode, CAP_FSETID))
+ if (!in_group_p(kgid) && !capable_wrt_inode_uidgid(mnt_userns, inode, CAP_FSETID))
mode &= ~S_ISGID;
set_acl_inode(inode, mode);
}
@@ -864,7 +865,8 @@ static void __setattr_copy(struct inode *inode, const struct iattr *attr)
#define __setattr_copy setattr_copy
#endif
-int f2fs_setattr(struct dentry *dentry, struct iattr *attr)
+int f2fs_setattr(struct user_namespace *mnt_userns, struct dentry *dentry,
+ struct iattr *attr)
{
struct inode *inode = d_inode(dentry);
int err;
@@ -884,7 +886,7 @@ int f2fs_setattr(struct dentry *dentry, struct iattr *attr)
!f2fs_is_compress_backend_ready(inode))
return -EOPNOTSUPP;
- err = setattr_prepare(dentry, attr);
+ err = setattr_prepare(&init_user_ns, dentry, attr);
if (err)
return err;
@@ -960,10 +962,10 @@ int f2fs_setattr(struct dentry *dentry, struct iattr *attr)
spin_unlock(&F2FS_I(inode)->i_size_lock);
}
- __setattr_copy(inode, attr);
+ __setattr_copy(&init_user_ns, inode, attr);
if (attr->ia_valid & ATTR_MODE) {
- err = posix_acl_chmod(inode, f2fs_get_inode_mode(inode));
+ err = posix_acl_chmod(&init_user_ns, inode, f2fs_get_inode_mode(inode));
if (is_inode_flag_set(inode, FI_ACL_MODE)) {
if (!err)
@@ -1978,7 +1980,7 @@ static int f2fs_ioc_setflags(struct file *filp, unsigned long arg)
u32 iflags;
int ret;
- if (!inode_owner_or_capable(inode))
+ if (!inode_owner_or_capable(&init_user_ns, inode))
return -EACCES;
if (get_user(fsflags, (int __user *)arg))
@@ -2025,7 +2027,7 @@ static int f2fs_ioc_start_atomic_write(struct file *filp)
struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
int ret;
- if (!inode_owner_or_capable(inode))
+ if (!inode_owner_or_capable(&init_user_ns, inode))
return -EACCES;
if (!S_ISREG(inode->i_mode))
@@ -2092,7 +2094,7 @@ static int f2fs_ioc_commit_atomic_write(struct file *filp)
struct inode *inode = file_inode(filp);
int ret;
- if (!inode_owner_or_capable(inode))
+ if (!inode_owner_or_capable(&init_user_ns, inode))
return -EACCES;
ret = mnt_want_write_file(filp);
@@ -2134,7 +2136,7 @@ static int f2fs_ioc_start_volatile_write(struct file *filp)
struct inode *inode = file_inode(filp);
int ret;
- if (!inode_owner_or_capable(inode))
+ if (!inode_owner_or_capable(&init_user_ns, inode))
return -EACCES;
if (!S_ISREG(inode->i_mode))
@@ -2169,7 +2171,7 @@ static int f2fs_ioc_release_volatile_write(struct file *filp)
struct inode *inode = file_inode(filp);
int ret;
- if (!inode_owner_or_capable(inode))
+ if (!inode_owner_or_capable(&init_user_ns, inode))
return -EACCES;
ret = mnt_want_write_file(filp);
@@ -2198,7 +2200,7 @@ static int f2fs_ioc_abort_volatile_write(struct file *filp)
struct inode *inode = file_inode(filp);
int ret;
- if (!inode_owner_or_capable(inode))
+ if (!inode_owner_or_capable(&init_user_ns, inode))
return -EACCES;
ret = mnt_want_write_file(filp);
@@ -3175,7 +3177,7 @@ static int f2fs_ioc_fssetxattr(struct file *filp, unsigned long arg)
return -EFAULT;
/* Make sure caller has proper permission */
- if (!inode_owner_or_capable(inode))
+ if (!inode_owner_or_capable(&init_user_ns, inode))
return -EACCES;
if (fa.fsx_xflags & ~F2FS_SUPPORTED_XFLAGS)
diff --git a/fs/f2fs/namei.c b/fs/f2fs/namei.c
index 887804968576..17bd072a5d39 100644
--- a/fs/f2fs/namei.c
+++ b/fs/f2fs/namei.c
@@ -46,7 +46,7 @@ static struct inode *f2fs_new_inode(struct inode *dir, umode_t mode)
nid_free = true;
- inode_init_owner(inode, dir, mode);
+ inode_init_owner(&init_user_ns, inode, dir, mode);
inode->i_ino = ino;
inode->i_blocks = 0;
@@ -314,8 +314,8 @@ static void set_compress_inode(struct f2fs_sb_info *sbi, struct inode *inode,
}
}
-static int f2fs_create(struct inode *dir, struct dentry *dentry, umode_t mode,
- bool excl)
+static int f2fs_create(struct user_namespace *mnt_userns, struct inode *dir,
+ struct dentry *dentry, umode_t mode, bool excl)
{
struct f2fs_sb_info *sbi = F2FS_I_SB(dir);
struct inode *inode;
@@ -637,8 +637,8 @@ static const char *f2fs_get_link(struct dentry *dentry,
return link;
}
-static int f2fs_symlink(struct inode *dir, struct dentry *dentry,
- const char *symname)
+static int f2fs_symlink(struct user_namespace *mnt_userns, struct inode *dir,
+ struct dentry *dentry, const char *symname)
{
struct f2fs_sb_info *sbi = F2FS_I_SB(dir);
struct inode *inode;
@@ -717,7 +717,8 @@ out_free_encrypted_link:
return err;
}
-static int f2fs_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode)
+static int f2fs_mkdir(struct user_namespace *mnt_userns, struct inode *dir,
+ struct dentry *dentry, umode_t mode)
{
struct f2fs_sb_info *sbi = F2FS_I_SB(dir);
struct inode *inode;
@@ -770,8 +771,8 @@ static int f2fs_rmdir(struct inode *dir, struct dentry *dentry)
return -ENOTEMPTY;
}
-static int f2fs_mknod(struct inode *dir, struct dentry *dentry,
- umode_t mode, dev_t rdev)
+static int f2fs_mknod(struct user_namespace *mnt_userns, struct inode *dir,
+ struct dentry *dentry, umode_t mode, dev_t rdev)
{
struct f2fs_sb_info *sbi = F2FS_I_SB(dir);
struct inode *inode;
@@ -878,7 +879,8 @@ out:
return err;
}
-static int f2fs_tmpfile(struct inode *dir, struct dentry *dentry, umode_t mode)
+static int f2fs_tmpfile(struct user_namespace *mnt_userns, struct inode *dir,
+ struct dentry *dentry, umode_t mode)
{
struct f2fs_sb_info *sbi = F2FS_I_SB(dir);
@@ -1255,7 +1257,8 @@ out:
return err;
}
-static int f2fs_rename2(struct inode *old_dir, struct dentry *old_dentry,
+static int f2fs_rename2(struct user_namespace *mnt_userns,
+ struct inode *old_dir, struct dentry *old_dentry,
struct inode *new_dir, struct dentry *new_dentry,
unsigned int flags)
{
diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c
index 4acfa7d36731..7069793752f1 100644
--- a/fs/f2fs/super.c
+++ b/fs/f2fs/super.c
@@ -1300,9 +1300,6 @@ static void f2fs_dirty_inode(struct inode *inode, int flags)
inode->i_ino == F2FS_META_INO(sbi))
return;
- if (flags == I_DIRTY_TIME)
- return;
-
if (is_inode_flag_set(inode, FI_AUTO_RECOVER))
clear_inode_flag(inode, FI_AUTO_RECOVER);
diff --git a/fs/f2fs/xattr.c b/fs/f2fs/xattr.c
index 8159fae74b9a..490f843ec3bf 100644
--- a/fs/f2fs/xattr.c
+++ b/fs/f2fs/xattr.c
@@ -64,6 +64,7 @@ static int f2fs_xattr_generic_get(const struct xattr_handler *handler,
}
static int f2fs_xattr_generic_set(const struct xattr_handler *handler,
+ struct user_namespace *mnt_userns,
struct dentry *unused, struct inode *inode,
const char *name, const void *value,
size_t size, int flags)
@@ -107,6 +108,7 @@ static int f2fs_xattr_advise_get(const struct xattr_handler *handler,
}
static int f2fs_xattr_advise_set(const struct xattr_handler *handler,
+ struct user_namespace *mnt_userns,
struct dentry *unused, struct inode *inode,
const char *name, const void *value,
size_t size, int flags)
@@ -114,7 +116,7 @@ static int f2fs_xattr_advise_set(const struct xattr_handler *handler,
unsigned char old_advise = F2FS_I(inode)->i_advise;
unsigned char new_advise;
- if (!inode_owner_or_capable(inode))
+ if (!inode_owner_or_capable(&init_user_ns, inode))
return -EPERM;
if (value == NULL)
return -EINVAL;
diff --git a/fs/fat/fat.h b/fs/fat/fat.h
index 922a0c6ba46c..02d4d4234956 100644
--- a/fs/fat/fat.h
+++ b/fs/fat/fat.h
@@ -397,9 +397,11 @@ extern long fat_generic_ioctl(struct file *filp, unsigned int cmd,
unsigned long arg);
extern const struct file_operations fat_file_operations;
extern const struct inode_operations fat_file_inode_operations;
-extern int fat_setattr(struct dentry *dentry, struct iattr *attr);
+extern int fat_setattr(struct user_namespace *mnt_userns, struct dentry *dentry,
+ struct iattr *attr);
extern void fat_truncate_blocks(struct inode *inode, loff_t offset);
-extern int fat_getattr(const struct path *path, struct kstat *stat,
+extern int fat_getattr(struct user_namespace *mnt_userns,
+ const struct path *path, struct kstat *stat,
u32 request_mask, unsigned int flags);
extern int fat_file_fsync(struct file *file, loff_t start, loff_t end,
int datasync);
diff --git a/fs/fat/file.c b/fs/fat/file.c
index 5fee74f1ad61..13855ba49cd9 100644
--- a/fs/fat/file.c
+++ b/fs/fat/file.c
@@ -95,7 +95,7 @@ static int fat_ioctl_set_attributes(struct file *file, u32 __user *user_attr)
goto out_unlock_inode;
/* This MUST be done before doing anything irreversible... */
- err = fat_setattr(file->f_path.dentry, &ia);
+ err = fat_setattr(file_mnt_user_ns(file), file->f_path.dentry, &ia);
if (err)
goto out_unlock_inode;
@@ -394,11 +394,11 @@ void fat_truncate_blocks(struct inode *inode, loff_t offset)
fat_flush_inodes(inode->i_sb, inode, NULL);
}
-int fat_getattr(const struct path *path, struct kstat *stat,
- u32 request_mask, unsigned int flags)
+int fat_getattr(struct user_namespace *mnt_userns, const struct path *path,
+ struct kstat *stat, u32 request_mask, unsigned int flags)
{
struct inode *inode = d_inode(path->dentry);
- generic_fillattr(inode, stat);
+ generic_fillattr(mnt_userns, inode, stat);
stat->blksize = MSDOS_SB(inode->i_sb)->cluster_size;
if (MSDOS_SB(inode->i_sb)->options.nfs == FAT_NFS_NOSTALE_RO) {
@@ -447,12 +447,13 @@ static int fat_sanitize_mode(const struct msdos_sb_info *sbi,
return 0;
}
-static int fat_allow_set_time(struct msdos_sb_info *sbi, struct inode *inode)
+static int fat_allow_set_time(struct user_namespace *mnt_userns,
+ struct msdos_sb_info *sbi, struct inode *inode)
{
umode_t allow_utime = sbi->options.allow_utime;
- if (!uid_eq(current_fsuid(), inode->i_uid)) {
- if (in_group_p(inode->i_gid))
+ if (!uid_eq(current_fsuid(), i_uid_into_mnt(mnt_userns, inode))) {
+ if (in_group_p(i_gid_into_mnt(mnt_userns, inode)))
allow_utime >>= 3;
if (allow_utime & MAY_WRITE)
return 1;
@@ -466,7 +467,8 @@ static int fat_allow_set_time(struct msdos_sb_info *sbi, struct inode *inode)
/* valid file mode bits */
#define FAT_VALID_MODE (S_IFREG | S_IFDIR | S_IRWXUGO)
-int fat_setattr(struct dentry *dentry, struct iattr *attr)
+int fat_setattr(struct user_namespace *mnt_userns, struct dentry *dentry,
+ struct iattr *attr)
{
struct msdos_sb_info *sbi = MSDOS_SB(dentry->d_sb);
struct inode *inode = d_inode(dentry);
@@ -476,11 +478,11 @@ int fat_setattr(struct dentry *dentry, struct iattr *attr)
/* Check for setting the inode time. */
ia_valid = attr->ia_valid;
if (ia_valid & TIMES_SET_FLAGS) {
- if (fat_allow_set_time(sbi, inode))
+ if (fat_allow_set_time(mnt_userns, sbi, inode))
attr->ia_valid &= ~TIMES_SET_FLAGS;
}
- error = setattr_prepare(dentry, attr);
+ error = setattr_prepare(mnt_userns, dentry, attr);
attr->ia_valid = ia_valid;
if (error) {
if (sbi->options.quiet)
@@ -550,7 +552,7 @@ int fat_setattr(struct dentry *dentry, struct iattr *attr)
fat_truncate_time(inode, &attr->ia_mtime, S_MTIME);
attr->ia_valid &= ~(ATTR_ATIME|ATTR_CTIME|ATTR_MTIME);
- setattr_copy(inode, attr);
+ setattr_copy(mnt_userns, inode, attr);
mark_inode_dirty(inode);
out:
return error;
diff --git a/fs/fat/misc.c b/fs/fat/misc.c
index f1b2a1fc2a6a..18a50a46b57f 100644
--- a/fs/fat/misc.c
+++ b/fs/fat/misc.c
@@ -329,22 +329,23 @@ EXPORT_SYMBOL_GPL(fat_truncate_time);
int fat_update_time(struct inode *inode, struct timespec64 *now, int flags)
{
- int iflags = I_DIRTY_TIME;
- bool dirty = false;
+ int dirty_flags = 0;
if (inode->i_ino == MSDOS_ROOT_INO)
return 0;
- fat_truncate_time(inode, now, flags);
- if (flags & S_VERSION)
- dirty = inode_maybe_inc_iversion(inode, false);
- if ((flags & (S_ATIME | S_CTIME | S_MTIME)) &&
- !(inode->i_sb->s_flags & SB_LAZYTIME))
- dirty = true;
+ if (flags & (S_ATIME | S_CTIME | S_MTIME)) {
+ fat_truncate_time(inode, now, flags);
+ if (inode->i_sb->s_flags & SB_LAZYTIME)
+ dirty_flags |= I_DIRTY_TIME;
+ else
+ dirty_flags |= I_DIRTY_SYNC;
+ }
+
+ if ((flags & S_VERSION) && inode_maybe_inc_iversion(inode, false))
+ dirty_flags |= I_DIRTY_SYNC;
- if (dirty)
- iflags |= I_DIRTY_SYNC;
- __mark_inode_dirty(inode, iflags);
+ __mark_inode_dirty(inode, dirty_flags);
return 0;
}
EXPORT_SYMBOL_GPL(fat_update_time);
diff --git a/fs/fat/namei_msdos.c b/fs/fat/namei_msdos.c
index 9d062886fbc1..efba301d68ae 100644
--- a/fs/fat/namei_msdos.c
+++ b/fs/fat/namei_msdos.c
@@ -261,8 +261,8 @@ static int msdos_add_entry(struct inode *dir, const unsigned char *name,
}
/***** Create a file */
-static int msdos_create(struct inode *dir, struct dentry *dentry, umode_t mode,
- bool excl)
+static int msdos_create(struct user_namespace *mnt_userns, struct inode *dir,
+ struct dentry *dentry, umode_t mode, bool excl)
{
struct super_block *sb = dir->i_sb;
struct inode *inode = NULL;
@@ -339,7 +339,8 @@ out:
}
/***** Make a directory */
-static int msdos_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode)
+static int msdos_mkdir(struct user_namespace *mnt_userns, struct inode *dir,
+ struct dentry *dentry, umode_t mode)
{
struct super_block *sb = dir->i_sb;
struct fat_slot_info sinfo;
@@ -593,7 +594,8 @@ error_inode:
}
/***** Rename, a wrapper for rename_same_dir & rename_diff_dir */
-static int msdos_rename(struct inode *old_dir, struct dentry *old_dentry,
+static int msdos_rename(struct user_namespace *mnt_userns,
+ struct inode *old_dir, struct dentry *old_dentry,
struct inode *new_dir, struct dentry *new_dentry,
unsigned int flags)
{
@@ -665,7 +667,7 @@ static struct file_system_type msdos_fs_type = {
.name = "msdos",
.mount = msdos_mount,
.kill_sb = kill_block_super,
- .fs_flags = FS_REQUIRES_DEV,
+ .fs_flags = FS_REQUIRES_DEV | FS_ALLOW_IDMAP,
};
MODULE_ALIAS_FS("msdos");
diff --git a/fs/fat/namei_vfat.c b/fs/fat/namei_vfat.c
index 0cdd0fb9f742..5369d82e0bfb 100644
--- a/fs/fat/namei_vfat.c
+++ b/fs/fat/namei_vfat.c
@@ -756,8 +756,8 @@ error:
return ERR_PTR(err);
}
-static int vfat_create(struct inode *dir, struct dentry *dentry, umode_t mode,
- bool excl)
+static int vfat_create(struct user_namespace *mnt_userns, struct inode *dir,
+ struct dentry *dentry, umode_t mode, bool excl)
{
struct super_block *sb = dir->i_sb;
struct inode *inode;
@@ -846,7 +846,8 @@ out:
return err;
}
-static int vfat_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode)
+static int vfat_mkdir(struct user_namespace *mnt_userns, struct inode *dir,
+ struct dentry *dentry, umode_t mode)
{
struct super_block *sb = dir->i_sb;
struct inode *inode;
@@ -892,9 +893,9 @@ out:
return err;
}
-static int vfat_rename(struct inode *old_dir, struct dentry *old_dentry,
- struct inode *new_dir, struct dentry *new_dentry,
- unsigned int flags)
+static int vfat_rename(struct user_namespace *mnt_userns, struct inode *old_dir,
+ struct dentry *old_dentry, struct inode *new_dir,
+ struct dentry *new_dentry, unsigned int flags)
{
struct buffer_head *dotdot_bh;
struct msdos_dir_entry *dotdot_de;
@@ -1062,7 +1063,7 @@ static struct file_system_type vfat_fs_type = {
.name = "vfat",
.mount = vfat_mount,
.kill_sb = kill_block_super,
- .fs_flags = FS_REQUIRES_DEV,
+ .fs_flags = FS_REQUIRES_DEV | FS_ALLOW_IDMAP,
};
MODULE_ALIAS_FS("vfat");
diff --git a/fs/fcntl.c b/fs/fcntl.c
index 483ef8861376..dfc72f15be7f 100644
--- a/fs/fcntl.c
+++ b/fs/fcntl.c
@@ -25,6 +25,7 @@
#include <linux/user_namespace.h>
#include <linux/memfd.h>
#include <linux/compat.h>
+#include <linux/mount.h>
#include <linux/poll.h>
#include <asm/siginfo.h>
@@ -46,7 +47,7 @@ static int setfl(int fd, struct file * filp, unsigned long arg)
/* O_NOATIME can only be set by the owner or superuser */
if ((arg & O_NOATIME) && !(filp->f_flags & O_NOATIME))
- if (!inode_owner_or_capable(inode))
+ if (!inode_owner_or_capable(file_mnt_user_ns(filp), inode))
return -EPERM;
/* required for strict SunOS emulation */
diff --git a/fs/fhandle.c b/fs/fhandle.c
index 01263ffbc4c0..ec6feeccc276 100644
--- a/fs/fhandle.c
+++ b/fs/fhandle.c
@@ -173,7 +173,7 @@ static int handle_to_path(int mountdirfd, struct file_handle __user *ufh,
/*
* With handle we don't look at the execute bit on the
- * the directory. Ideally we would like CAP_DAC_SEARCH.
+ * directory. Ideally we would like CAP_DAC_SEARCH.
* But we don't have that
*/
if (!capable(CAP_DAC_READ_SEARCH)) {
diff --git a/fs/fs-writeback.c b/fs/fs-writeback.c
index c41cb887eb7d..e91980f49388 100644
--- a/fs/fs-writeback.c
+++ b/fs/fs-writeback.c
@@ -1442,9 +1442,15 @@ static void requeue_inode(struct inode *inode, struct bdi_writeback *wb,
}
/*
- * Write out an inode and its dirty pages. Do not update the writeback list
- * linkage. That is left to the caller. The caller is also responsible for
- * setting I_SYNC flag and calling inode_sync_complete() to clear it.
+ * Write out an inode and its dirty pages (or some of its dirty pages, depending
+ * on @wbc->nr_to_write), and clear the relevant dirty flags from i_state.
+ *
+ * This doesn't remove the inode from the writeback list it is on, except
+ * potentially to move it from b_dirty_time to b_dirty due to timestamp
+ * expiration. The caller is otherwise responsible for writeback list handling.
+ *
+ * The caller is also responsible for setting the I_SYNC flag beforehand and
+ * calling inode_sync_complete() to clear it afterwards.
*/
static int
__writeback_single_inode(struct inode *inode, struct writeback_control *wbc)
@@ -1479,7 +1485,7 @@ __writeback_single_inode(struct inode *inode, struct writeback_control *wbc)
* change I_DIRTY_TIME into I_DIRTY_SYNC.
*/
if ((inode->i_state & I_DIRTY_TIME) &&
- (wbc->sync_mode == WB_SYNC_ALL || wbc->for_sync ||
+ (wbc->sync_mode == WB_SYNC_ALL ||
time_after(jiffies, inode->dirtied_time_when +
dirtytime_expire_interval * HZ))) {
trace_writeback_lazytime(inode);
@@ -1487,9 +1493,10 @@ __writeback_single_inode(struct inode *inode, struct writeback_control *wbc)
}
/*
- * Some filesystems may redirty the inode during the writeback
- * due to delalloc, clear dirty metadata flags right before
- * write_inode()
+ * Get and clear the dirty flags from i_state. This needs to be done
+ * after calling writepages because some filesystems may redirty the
+ * inode during writepages due to delalloc. It also needs to be done
+ * after handling timestamp expiration, as that may dirty the inode too.
*/
spin_lock(&inode->i_lock);
dirty = inode->i_state & I_DIRTY;
@@ -1524,12 +1531,13 @@ __writeback_single_inode(struct inode *inode, struct writeback_control *wbc)
}
/*
- * Write out an inode's dirty pages. Either the caller has an active reference
- * on the inode or the inode has I_WILL_FREE set.
+ * Write out an inode's dirty data and metadata on-demand, i.e. separately from
+ * the regular batched writeback done by the flusher threads in
+ * writeback_sb_inodes(). @wbc controls various aspects of the write, such as
+ * whether it is a data-integrity sync (%WB_SYNC_ALL) or not (%WB_SYNC_NONE).
*
- * This function is designed to be called for writing back one inode which
- * we go e.g. from filesystem. Flusher thread uses __writeback_single_inode()
- * and does more profound writeback list handling in writeback_sb_inodes().
+ * To prevent the inode from going away, either the caller must have a reference
+ * to the inode, or the inode must have I_WILL_FREE or I_FREEING set.
*/
static int writeback_single_inode(struct inode *inode,
struct writeback_control *wbc)
@@ -1544,23 +1552,23 @@ static int writeback_single_inode(struct inode *inode,
WARN_ON(inode->i_state & I_WILL_FREE);
if (inode->i_state & I_SYNC) {
- if (wbc->sync_mode != WB_SYNC_ALL)
- goto out;
/*
- * It's a data-integrity sync. We must wait. Since callers hold
- * inode reference or inode has I_WILL_FREE set, it cannot go
- * away under us.
+ * Writeback is already running on the inode. For WB_SYNC_NONE,
+ * that's enough and we can just return. For WB_SYNC_ALL, we
+ * must wait for the existing writeback to complete, then do
+ * writeback again if there's anything left.
*/
+ if (wbc->sync_mode != WB_SYNC_ALL)
+ goto out;
__inode_wait_for_writeback(inode);
}
WARN_ON(inode->i_state & I_SYNC);
/*
- * Skip inode if it is clean and we have no outstanding writeback in
- * WB_SYNC_ALL mode. We don't want to mess with writeback lists in this
- * function since flusher thread may be doing for example sync in
- * parallel and if we move the inode, it could get skipped. So here we
- * make sure inode is on some writeback list and leave it there unless
- * we have completely cleaned the inode.
+ * If the inode is already fully clean, then there's nothing to do.
+ *
+ * For data-integrity syncs we also need to check whether any pages are
+ * still under writeback, e.g. due to prior WB_SYNC_NONE writeback. If
+ * there are any such pages, we'll need to wait for them.
*/
if (!(inode->i_state & I_DIRTY_ALL) &&
(wbc->sync_mode != WB_SYNC_ALL ||
@@ -1576,8 +1584,9 @@ static int writeback_single_inode(struct inode *inode,
wb = inode_to_wb_and_lock_list(inode);
spin_lock(&inode->i_lock);
/*
- * If inode is clean, remove it from writeback lists. Otherwise don't
- * touch it. See comment above for explanation.
+ * If the inode is now fully clean, then it can be safely removed from
+ * its writeback list (if any). Otherwise the flusher threads are
+ * responsible for the writeback lists.
*/
if (!(inode->i_state & I_DIRTY_ALL))
inode_io_list_del_locked(inode, wb);
@@ -2219,23 +2228,24 @@ static noinline void block_dump___mark_inode_dirty(struct inode *inode)
}
/**
- * __mark_inode_dirty - internal function
+ * __mark_inode_dirty - internal function to mark an inode dirty
*
* @inode: inode to mark
- * @flags: what kind of dirty (i.e. I_DIRTY_SYNC)
+ * @flags: what kind of dirty, e.g. I_DIRTY_SYNC. This can be a combination of
+ * multiple I_DIRTY_* flags, except that I_DIRTY_TIME can't be combined
+ * with I_DIRTY_PAGES.
*
- * Mark an inode as dirty. Callers should use mark_inode_dirty or
- * mark_inode_dirty_sync.
+ * Mark an inode as dirty. We notify the filesystem, then update the inode's
+ * dirty flags. Then, if needed we add the inode to the appropriate dirty list.
*
- * Put the inode on the super block's dirty list.
+ * Most callers should use mark_inode_dirty() or mark_inode_dirty_sync()
+ * instead of calling this directly.
*
- * CAREFUL! We mark it dirty unconditionally, but move it onto the
- * dirty list only if it is hashed or if it refers to a blockdev.
- * If it was not hashed, it will never be added to the dirty list
- * even if it is later hashed, as it will have been marked dirty already.
+ * CAREFUL! We only add the inode to the dirty list if it is hashed or if it
+ * refers to a blockdev. Unhashed inodes will never be added to the dirty list
+ * even if they are later hashed, as they will have been marked dirty already.
*
- * In short, make sure you hash any inodes _before_ you start marking
- * them dirty.
+ * In short, ensure you hash any inodes _before_ you start marking them dirty.
*
* Note that for blockdevs, inode->dirtied_when represents the dirtying time of
* the block-special inode (/dev/hda1) itself. And the ->dirtied_when field of
@@ -2247,25 +2257,34 @@ static noinline void block_dump___mark_inode_dirty(struct inode *inode)
void __mark_inode_dirty(struct inode *inode, int flags)
{
struct super_block *sb = inode->i_sb;
- int dirtytime;
+ int dirtytime = 0;
trace_writeback_mark_inode_dirty(inode, flags);
- /*
- * Don't do this for I_DIRTY_PAGES - that doesn't actually
- * dirty the inode itself
- */
- if (flags & (I_DIRTY_INODE | I_DIRTY_TIME)) {
+ if (flags & I_DIRTY_INODE) {
+ /*
+ * Notify the filesystem about the inode being dirtied, so that
+ * (if needed) it can update on-disk fields and journal the
+ * inode. This is only needed when the inode itself is being
+ * dirtied now. I.e. it's only needed for I_DIRTY_INODE, not
+ * for just I_DIRTY_PAGES or I_DIRTY_TIME.
+ */
trace_writeback_dirty_inode_start(inode, flags);
-
if (sb->s_op->dirty_inode)
- sb->s_op->dirty_inode(inode, flags);
-
+ sb->s_op->dirty_inode(inode, flags & I_DIRTY_INODE);
trace_writeback_dirty_inode(inode, flags);
- }
- if (flags & I_DIRTY_INODE)
+
+ /* I_DIRTY_INODE supersedes I_DIRTY_TIME. */
flags &= ~I_DIRTY_TIME;
- dirtytime = flags & I_DIRTY_TIME;
+ } else {
+ /*
+ * Else it's either I_DIRTY_PAGES, I_DIRTY_TIME, or nothing.
+ * (We don't support setting both I_DIRTY_PAGES and I_DIRTY_TIME
+ * in one call to __mark_inode_dirty().)
+ */
+ dirtytime = flags & I_DIRTY_TIME;
+ WARN_ON_ONCE(dirtytime && flags != I_DIRTY_TIME);
+ }
/*
* Paired with smp_mb() in __writeback_single_inode() for the
@@ -2288,6 +2307,7 @@ void __mark_inode_dirty(struct inode *inode, int flags)
inode_attach_wb(inode, NULL);
+ /* I_DIRTY_INODE supersedes I_DIRTY_TIME. */
if (flags & I_DIRTY_INODE)
inode->i_state &= ~I_DIRTY_TIME;
inode->i_state |= flags;
diff --git a/fs/fuse/acl.c b/fs/fuse/acl.c
index f529075a2ce8..e9c0f916349d 100644
--- a/fs/fuse/acl.c
+++ b/fs/fuse/acl.c
@@ -50,7 +50,8 @@ struct posix_acl *fuse_get_acl(struct inode *inode, int type)
return acl;
}
-int fuse_set_acl(struct inode *inode, struct posix_acl *acl, int type)
+int fuse_set_acl(struct user_namespace *mnt_userns, struct inode *inode,
+ struct posix_acl *acl, int type)
{
struct fuse_conn *fc = get_fuse_conn(inode);
const char *name;
diff --git a/fs/fuse/dev.c b/fs/fuse/dev.c
index 588f8d1240aa..c6636b4c4ccf 100644
--- a/fs/fuse/dev.c
+++ b/fs/fuse/dev.c
@@ -844,11 +844,7 @@ static int fuse_try_move_page(struct fuse_copy_state *cs, struct page **pagep)
if (WARN_ON(PageMlocked(oldpage)))
goto out_fallback_unlock;
- err = replace_page_cache_page(oldpage, newpage, GFP_KERNEL);
- if (err) {
- unlock_page(newpage);
- goto out_put_old;
- }
+ replace_page_cache_page(oldpage, newpage);
get_page(newpage);
diff --git a/fs/fuse/dir.c b/fs/fuse/dir.c
index 78f9f209078c..06a18700a845 100644
--- a/fs/fuse/dir.c
+++ b/fs/fuse/dir.c
@@ -605,7 +605,8 @@ out_err:
return err;
}
-static int fuse_mknod(struct inode *, struct dentry *, umode_t, dev_t);
+static int fuse_mknod(struct user_namespace *, struct inode *, struct dentry *,
+ umode_t, dev_t);
static int fuse_atomic_open(struct inode *dir, struct dentry *entry,
struct file *file, unsigned flags,
umode_t mode)
@@ -645,7 +646,7 @@ out_dput:
return err;
mknod:
- err = fuse_mknod(dir, entry, mode, 0);
+ err = fuse_mknod(&init_user_ns, dir, entry, mode, 0);
if (err)
goto out_dput;
no_open:
@@ -715,8 +716,8 @@ static int create_new_entry(struct fuse_mount *fm, struct fuse_args *args,
return err;
}
-static int fuse_mknod(struct inode *dir, struct dentry *entry, umode_t mode,
- dev_t rdev)
+static int fuse_mknod(struct user_namespace *mnt_userns, struct inode *dir,
+ struct dentry *entry, umode_t mode, dev_t rdev)
{
struct fuse_mknod_in inarg;
struct fuse_mount *fm = get_fuse_mount(dir);
@@ -738,13 +739,14 @@ static int fuse_mknod(struct inode *dir, struct dentry *entry, umode_t mode,
return create_new_entry(fm, &args, dir, entry, mode);
}
-static int fuse_create(struct inode *dir, struct dentry *entry, umode_t mode,
- bool excl)
+static int fuse_create(struct user_namespace *mnt_userns, struct inode *dir,
+ struct dentry *entry, umode_t mode, bool excl)
{
- return fuse_mknod(dir, entry, mode, 0);
+ return fuse_mknod(&init_user_ns, dir, entry, mode, 0);
}
-static int fuse_mkdir(struct inode *dir, struct dentry *entry, umode_t mode)
+static int fuse_mkdir(struct user_namespace *mnt_userns, struct inode *dir,
+ struct dentry *entry, umode_t mode)
{
struct fuse_mkdir_in inarg;
struct fuse_mount *fm = get_fuse_mount(dir);
@@ -765,8 +767,8 @@ static int fuse_mkdir(struct inode *dir, struct dentry *entry, umode_t mode)
return create_new_entry(fm, &args, dir, entry, S_IFDIR);
}
-static int fuse_symlink(struct inode *dir, struct dentry *entry,
- const char *link)
+static int fuse_symlink(struct user_namespace *mnt_userns, struct inode *dir,
+ struct dentry *entry, const char *link)
{
struct fuse_mount *fm = get_fuse_mount(dir);
unsigned len = strlen(link) + 1;
@@ -908,9 +910,9 @@ static int fuse_rename_common(struct inode *olddir, struct dentry *oldent,
return err;
}
-static int fuse_rename2(struct inode *olddir, struct dentry *oldent,
- struct inode *newdir, struct dentry *newent,
- unsigned int flags)
+static int fuse_rename2(struct user_namespace *mnt_userns, struct inode *olddir,
+ struct dentry *oldent, struct inode *newdir,
+ struct dentry *newent, unsigned int flags)
{
struct fuse_conn *fc = get_fuse_conn(olddir);
int err;
@@ -1087,7 +1089,7 @@ static int fuse_update_get_attr(struct inode *inode, struct file *file,
forget_all_cached_acls(inode);
err = fuse_do_getattr(inode, stat, file);
} else if (stat) {
- generic_fillattr(inode, stat);
+ generic_fillattr(&init_user_ns, inode, stat);
stat->mode = fi->orig_i_mode;
stat->ino = fi->orig_ino;
}
@@ -1249,7 +1251,8 @@ static int fuse_perm_getattr(struct inode *inode, int mask)
* access request is sent. Execute permission is still checked
* locally based on file mode.
*/
-static int fuse_permission(struct inode *inode, int mask)
+static int fuse_permission(struct user_namespace *mnt_userns,
+ struct inode *inode, int mask)
{
struct fuse_conn *fc = get_fuse_conn(inode);
bool refreshed = false;
@@ -1280,7 +1283,7 @@ static int fuse_permission(struct inode *inode, int mask)
}
if (fc->default_permissions) {
- err = generic_permission(inode, mask);
+ err = generic_permission(&init_user_ns, inode, mask);
/* If permission is denied, try to refresh file
attributes. This is also needed, because the root
@@ -1288,7 +1291,8 @@ static int fuse_permission(struct inode *inode, int mask)
if (err == -EACCES && !refreshed) {
err = fuse_perm_getattr(inode, mask);
if (!err)
- err = generic_permission(inode, mask);
+ err = generic_permission(&init_user_ns,
+ inode, mask);
}
/* Note: the opposite of the above test does not
@@ -1610,7 +1614,7 @@ int fuse_do_setattr(struct dentry *dentry, struct iattr *attr,
if (!fc->default_permissions)
attr->ia_valid |= ATTR_FORCE;
- err = setattr_prepare(dentry, attr);
+ err = setattr_prepare(&init_user_ns, dentry, attr);
if (err)
return err;
@@ -1756,7 +1760,8 @@ error:
return err;
}
-static int fuse_setattr(struct dentry *entry, struct iattr *attr)
+static int fuse_setattr(struct user_namespace *mnt_userns, struct dentry *entry,
+ struct iattr *attr)
{
struct inode *inode = d_inode(entry);
struct fuse_conn *fc = get_fuse_conn(inode);
@@ -1818,7 +1823,8 @@ static int fuse_setattr(struct dentry *entry, struct iattr *attr)
return ret;
}
-static int fuse_getattr(const struct path *path, struct kstat *stat,
+static int fuse_getattr(struct user_namespace *mnt_userns,
+ const struct path *path, struct kstat *stat,
u32 request_mask, unsigned int flags)
{
struct inode *inode = d_inode(path->dentry);
diff --git a/fs/fuse/fuse_i.h b/fs/fuse/fuse_i.h
index 7c4b8cb93f9f..68cca8d4db6e 100644
--- a/fs/fuse/fuse_i.h
+++ b/fs/fuse/fuse_i.h
@@ -1180,8 +1180,8 @@ extern const struct xattr_handler *fuse_no_acl_xattr_handlers[];
struct posix_acl;
struct posix_acl *fuse_get_acl(struct inode *inode, int type);
-int fuse_set_acl(struct inode *inode, struct posix_acl *acl, int type);
-
+int fuse_set_acl(struct user_namespace *mnt_userns, struct inode *inode,
+ struct posix_acl *acl, int type);
/* readdir.c */
int fuse_readdir(struct file *file, struct dir_context *ctx);
diff --git a/fs/fuse/xattr.c b/fs/fuse/xattr.c
index cdea18de94f7..1a7d7ace54e1 100644
--- a/fs/fuse/xattr.c
+++ b/fs/fuse/xattr.c
@@ -188,6 +188,7 @@ static int fuse_xattr_get(const struct xattr_handler *handler,
}
static int fuse_xattr_set(const struct xattr_handler *handler,
+ struct user_namespace *mnt_userns,
struct dentry *dentry, struct inode *inode,
const char *name, const void *value, size_t size,
int flags)
@@ -214,6 +215,7 @@ static int no_xattr_get(const struct xattr_handler *handler,
}
static int no_xattr_set(const struct xattr_handler *handler,
+ struct user_namespace *mnt_userns,
struct dentry *dentry, struct inode *nodee,
const char *name, const void *value,
size_t size, int flags)
diff --git a/fs/gfs2/acl.c b/fs/gfs2/acl.c
index 2e939f5fe751..9165d70ead07 100644
--- a/fs/gfs2/acl.c
+++ b/fs/gfs2/acl.c
@@ -106,7 +106,8 @@ out:
return error;
}
-int gfs2_set_acl(struct inode *inode, struct posix_acl *acl, int type)
+int gfs2_set_acl(struct user_namespace *mnt_userns, struct inode *inode,
+ struct posix_acl *acl, int type)
{
struct gfs2_inode *ip = GFS2_I(inode);
struct gfs2_holder gh;
@@ -130,7 +131,7 @@ int gfs2_set_acl(struct inode *inode, struct posix_acl *acl, int type)
mode = inode->i_mode;
if (type == ACL_TYPE_ACCESS && acl) {
- ret = posix_acl_update_mode(inode, &mode, &acl);
+ ret = posix_acl_update_mode(&init_user_ns, inode, &mode, &acl);
if (ret)
goto unlock;
}
diff --git a/fs/gfs2/acl.h b/fs/gfs2/acl.h
index 61353a1501c5..eccc6a43326c 100644
--- a/fs/gfs2/acl.h
+++ b/fs/gfs2/acl.h
@@ -13,6 +13,7 @@
extern struct posix_acl *gfs2_get_acl(struct inode *inode, int type);
extern int __gfs2_set_acl(struct inode *inode, struct posix_acl *acl, int type);
-extern int gfs2_set_acl(struct inode *inode, struct posix_acl *acl, int type);
+extern int gfs2_set_acl(struct user_namespace *mnt_userns, struct inode *inode,
+ struct posix_acl *acl, int type);
#endif /* __ACL_DOT_H__ */
diff --git a/fs/gfs2/bmap.c b/fs/gfs2/bmap.c
index 62d9081d1e26..7a358ae05185 100644
--- a/fs/gfs2/bmap.c
+++ b/fs/gfs2/bmap.c
@@ -1230,6 +1230,9 @@ static int gfs2_iomap_end(struct inode *inode, loff_t pos, loff_t length,
gfs2_inplace_release(ip);
+ if (ip->i_qadata && ip->i_qadata->qa_qd_num)
+ gfs2_quota_unlock(ip);
+
if (length != written && (iomap->flags & IOMAP_F_NEW)) {
/* Deallocate blocks that were just allocated. */
loff_t blockmask = i_blocksize(inode) - 1;
@@ -1242,9 +1245,6 @@ static int gfs2_iomap_end(struct inode *inode, loff_t pos, loff_t length,
}
}
- if (ip->i_qadata && ip->i_qadata->qa_qd_num)
- gfs2_quota_unlock(ip);
-
if (unlikely(!written))
goto out_unlock;
@@ -1538,13 +1538,13 @@ more_rgrps:
goto out;
}
ret = gfs2_glock_nq_init(rgd->rd_gl, LM_ST_EXCLUSIVE,
- 0, rd_gh);
+ LM_FLAG_NODE_SCOPE, rd_gh);
if (ret)
goto out;
/* Must be done with the rgrp glock held: */
if (gfs2_rs_active(&ip->i_res) &&
- rgd == ip->i_res.rs_rbm.rgd)
+ rgd == ip->i_res.rs_rgd)
gfs2_rs_deltree(&ip->i_res);
}
diff --git a/fs/gfs2/file.c b/fs/gfs2/file.c
index 89609c299717..2d500f90cdac 100644
--- a/fs/gfs2/file.c
+++ b/fs/gfs2/file.c
@@ -238,7 +238,7 @@ static int do_gfs2_set_flags(struct file *filp, u32 reqflags, u32 mask,
goto out;
error = -EACCES;
- if (!inode_owner_or_capable(inode))
+ if (!inode_owner_or_capable(&init_user_ns, inode))
goto out;
error = 0;
@@ -256,7 +256,7 @@ static int do_gfs2_set_flags(struct file *filp, u32 reqflags, u32 mask,
!capable(CAP_LINUX_IMMUTABLE))
goto out;
if (!IS_IMMUTABLE(inode)) {
- error = gfs2_permission(inode, MAY_WRITE);
+ error = gfs2_permission(&init_user_ns, inode, MAY_WRITE);
if (error)
goto out;
}
@@ -716,10 +716,10 @@ static int gfs2_release(struct inode *inode, struct file *file)
kfree(file->private_data);
file->private_data = NULL;
- if (file->f_mode & FMODE_WRITE) {
+ if (gfs2_rs_active(&ip->i_res))
gfs2_rs_delete(ip, &inode->i_writecount);
+ if (file->f_mode & FMODE_WRITE)
gfs2_qa_put(ip);
- }
return 0;
}
@@ -749,7 +749,7 @@ static int gfs2_fsync(struct file *file, loff_t start, loff_t end,
{
struct address_space *mapping = file->f_mapping;
struct inode *inode = mapping->host;
- int sync_state = inode->i_state & I_DIRTY_ALL;
+ int sync_state = inode->i_state & I_DIRTY;
struct gfs2_inode *ip = GFS2_I(inode);
int ret = 0, ret1 = 0;
@@ -762,7 +762,7 @@ static int gfs2_fsync(struct file *file, loff_t start, loff_t end,
if (!gfs2_is_jdata(ip))
sync_state &= ~I_DIRTY_PAGES;
if (datasync)
- sync_state &= ~(I_DIRTY_SYNC | I_DIRTY_TIME);
+ sync_state &= ~I_DIRTY_SYNC;
if (sync_state) {
ret = sync_inode_metadata(inode, 1);
@@ -1112,8 +1112,8 @@ static long __gfs2_fallocate(struct file *file, int mode, loff_t offset, loff_t
goto out_qunlock;
/* check if the selected rgrp limits our max_blks further */
- if (ap.allowed && ap.allowed < max_blks)
- max_blks = ap.allowed;
+ if (ip->i_res.rs_reserved < max_blks)
+ max_blks = ip->i_res.rs_reserved;
/* Almost done. Calculate bytes that can be written using
* max_blks. We also recompute max_bytes, data_blocks and
diff --git a/fs/gfs2/glock.c b/fs/gfs2/glock.c
index d87a5bc3607b..9567520d79f7 100644
--- a/fs/gfs2/glock.c
+++ b/fs/gfs2/glock.c
@@ -313,9 +313,23 @@ void gfs2_glock_put(struct gfs2_glock *gl)
static inline int may_grant(const struct gfs2_glock *gl, const struct gfs2_holder *gh)
{
const struct gfs2_holder *gh_head = list_first_entry(&gl->gl_holders, const struct gfs2_holder, gh_list);
- if ((gh->gh_state == LM_ST_EXCLUSIVE ||
- gh_head->gh_state == LM_ST_EXCLUSIVE) && gh != gh_head)
- return 0;
+
+ if (gh != gh_head) {
+ /**
+ * Here we make a special exception to grant holders who agree
+ * to share the EX lock with other holders who also have the
+ * bit set. If the original holder has the LM_FLAG_NODE_SCOPE bit
+ * is set, we grant more holders with the bit set.
+ */
+ if (gh_head->gh_state == LM_ST_EXCLUSIVE &&
+ (gh_head->gh_flags & LM_FLAG_NODE_SCOPE) &&
+ gh->gh_state == LM_ST_EXCLUSIVE &&
+ (gh->gh_flags & LM_FLAG_NODE_SCOPE))
+ return 1;
+ if ((gh->gh_state == LM_ST_EXCLUSIVE ||
+ gh_head->gh_state == LM_ST_EXCLUSIVE))
+ return 0;
+ }
if (gl->gl_state == gh->gh_state)
return 1;
if (gh->gh_flags & GL_EXACT)
@@ -2030,6 +2044,8 @@ static const char *hflags2str(char *buf, u16 flags, unsigned long iflags)
*p++ = 'A';
if (flags & LM_FLAG_PRIORITY)
*p++ = 'p';
+ if (flags & LM_FLAG_NODE_SCOPE)
+ *p++ = 'n';
if (flags & GL_ASYNC)
*p++ = 'a';
if (flags & GL_EXACT)
diff --git a/fs/gfs2/glock.h b/fs/gfs2/glock.h
index 53813364517b..31a8f2f649b5 100644
--- a/fs/gfs2/glock.h
+++ b/fs/gfs2/glock.h
@@ -75,6 +75,11 @@ enum {
* request and directly join the other shared lock. A shared lock request
* without the priority flag might be forced to wait until the deferred
* requested had acquired and released the lock.
+ *
+ * LM_FLAG_NODE_SCOPE
+ * This holder agrees to share the lock within this node. In other words,
+ * the glock is held in EX mode according to DLM, but local holders on the
+ * same node can share it.
*/
#define LM_FLAG_TRY 0x0001
@@ -82,6 +87,7 @@ enum {
#define LM_FLAG_NOEXP 0x0004
#define LM_FLAG_ANY 0x0008
#define LM_FLAG_PRIORITY 0x0010
+#define LM_FLAG_NODE_SCOPE 0x0020
#define GL_ASYNC 0x0040
#define GL_EXACT 0x0080
#define GL_SKIP 0x0100
diff --git a/fs/gfs2/glops.c b/fs/gfs2/glops.c
index 3faa421568b0..8e32d569c8bf 100644
--- a/fs/gfs2/glops.c
+++ b/fs/gfs2/glops.c
@@ -86,16 +86,12 @@ static int gfs2_ail_empty_gl(struct gfs2_glock *gl)
{
struct gfs2_sbd *sdp = gl->gl_name.ln_sbd;
struct gfs2_trans tr;
+ unsigned int revokes;
int ret;
- memset(&tr, 0, sizeof(tr));
- INIT_LIST_HEAD(&tr.tr_buf);
- INIT_LIST_HEAD(&tr.tr_databuf);
- INIT_LIST_HEAD(&tr.tr_ail1_list);
- INIT_LIST_HEAD(&tr.tr_ail2_list);
- tr.tr_revokes = atomic_read(&gl->gl_ail_count);
+ revokes = atomic_read(&gl->gl_ail_count);
- if (!tr.tr_revokes) {
+ if (!revokes) {
bool have_revokes;
bool log_in_flight;
@@ -122,20 +118,14 @@ static int gfs2_ail_empty_gl(struct gfs2_glock *gl)
return 0;
}
- /* A shortened, inline version of gfs2_trans_begin()
- * tr->alloced is not set since the transaction structure is
- * on the stack */
- tr.tr_reserved = 1 + gfs2_struct2blk(sdp, tr.tr_revokes);
- tr.tr_ip = _RET_IP_;
- ret = gfs2_log_reserve(sdp, tr.tr_reserved);
- if (ret < 0)
- return ret;
- WARN_ON_ONCE(current->journal_info);
- current->journal_info = &tr;
-
- __gfs2_ail_flush(gl, 0, tr.tr_revokes);
-
+ memset(&tr, 0, sizeof(tr));
+ set_bit(TR_ONSTACK, &tr.tr_flags);
+ ret = __gfs2_trans_begin(&tr, sdp, 0, revokes, _RET_IP_);
+ if (ret)
+ goto flush;
+ __gfs2_ail_flush(gl, 0, revokes);
gfs2_trans_end(sdp);
+
flush:
gfs2_log_flush(sdp, NULL, GFS2_LOG_HEAD_FLUSH_NORMAL |
GFS2_LFC_AIL_EMPTY_GL);
@@ -146,19 +136,15 @@ void gfs2_ail_flush(struct gfs2_glock *gl, bool fsync)
{
struct gfs2_sbd *sdp = gl->gl_name.ln_sbd;
unsigned int revokes = atomic_read(&gl->gl_ail_count);
- unsigned int max_revokes = (sdp->sd_sb.sb_bsize - sizeof(struct gfs2_log_descriptor)) / sizeof(u64);
int ret;
if (!revokes)
return;
- while (revokes > max_revokes)
- max_revokes += (sdp->sd_sb.sb_bsize - sizeof(struct gfs2_meta_header)) / sizeof(u64);
-
- ret = gfs2_trans_begin(sdp, 0, max_revokes);
+ ret = gfs2_trans_begin(sdp, 0, revokes);
if (ret)
return;
- __gfs2_ail_flush(gl, fsync, max_revokes);
+ __gfs2_ail_flush(gl, fsync, revokes);
gfs2_trans_end(sdp);
gfs2_log_flush(sdp, NULL, GFS2_LOG_HEAD_FLUSH_NORMAL |
GFS2_LFC_AIL_FLUSH);
diff --git a/fs/gfs2/incore.h b/fs/gfs2/incore.h
index 8e1ab8ed4abc..0957119f7744 100644
--- a/fs/gfs2/incore.h
+++ b/fs/gfs2/incore.h
@@ -20,6 +20,7 @@
#include <linux/percpu.h>
#include <linux/lockref.h>
#include <linux/rhashtable.h>
+#include <linux/mutex.h>
#define DIO_WAIT 0x00000010
#define DIO_METADATA 0x00000020
@@ -106,7 +107,8 @@ struct gfs2_rgrpd {
u32 rd_data; /* num of data blocks in rgrp */
u32 rd_bitbytes; /* number of bytes in data bitmaps */
u32 rd_free;
- u32 rd_reserved; /* number of blocks reserved */
+ u32 rd_requested; /* number of blocks in rd_rstree */
+ u32 rd_reserved; /* number of reserved blocks */
u32 rd_free_clone;
u32 rd_dinodes;
u64 rd_igeneration;
@@ -122,34 +124,10 @@ struct gfs2_rgrpd {
#define GFS2_RDF_PREFERRED 0x80000000 /* This rgrp is preferred */
#define GFS2_RDF_MASK 0xf0000000 /* mask for internal flags */
spinlock_t rd_rsspin; /* protects reservation related vars */
+ struct mutex rd_mutex;
struct rb_root rd_rstree; /* multi-block reservation tree */
};
-struct gfs2_rbm {
- struct gfs2_rgrpd *rgd;
- u32 offset; /* The offset is bitmap relative */
- int bii; /* Bitmap index */
-};
-
-static inline struct gfs2_bitmap *rbm_bi(const struct gfs2_rbm *rbm)
-{
- return rbm->rgd->rd_bits + rbm->bii;
-}
-
-static inline u64 gfs2_rbm_to_block(const struct gfs2_rbm *rbm)
-{
- BUG_ON(rbm->offset >= rbm->rgd->rd_data);
- return rbm->rgd->rd_data0 + (rbm_bi(rbm)->bi_start * GFS2_NBBY) +
- rbm->offset;
-}
-
-static inline bool gfs2_rbm_eq(const struct gfs2_rbm *rbm1,
- const struct gfs2_rbm *rbm2)
-{
- return (rbm1->rgd == rbm2->rgd) && (rbm1->bii == rbm2->bii) &&
- (rbm1->offset == rbm2->offset);
-}
-
enum gfs2_state_bits {
BH_Pinned = BH_PrivateStart,
BH_Escaped = BH_PrivateStart + 1,
@@ -313,9 +291,11 @@ struct gfs2_qadata { /* quota allocation data */
*/
struct gfs2_blkreserv {
- struct rb_node rs_node; /* link to other block reservations */
- struct gfs2_rbm rs_rbm; /* Start of reservation */
- u32 rs_free; /* how many blocks are still free */
+ struct rb_node rs_node; /* node within rd_rstree */
+ struct gfs2_rgrpd *rs_rgd;
+ u64 rs_start;
+ u32 rs_requested;
+ u32 rs_reserved; /* number of reserved blocks */
};
/*
@@ -490,7 +470,7 @@ struct gfs2_quota_data {
enum {
TR_TOUCHED = 1,
TR_ATTACHED = 2,
- TR_ALLOCED = 3,
+ TR_ONSTACK = 3,
};
struct gfs2_trans {
@@ -506,7 +486,6 @@ struct gfs2_trans {
unsigned int tr_num_buf_rm;
unsigned int tr_num_databuf_rm;
unsigned int tr_num_revoke;
- unsigned int tr_num_revoke_rm;
struct list_head tr_list;
struct list_head tr_databuf;
@@ -531,6 +510,7 @@ struct gfs2_jdesc {
unsigned int nr_extents;
struct work_struct jd_work;
struct inode *jd_inode;
+ struct bio *jd_log_bio;
unsigned long jd_flags;
#define JDF_RECOVERY 1
unsigned int jd_jid;
@@ -585,6 +565,7 @@ struct gfs2_args {
unsigned int ar_errors:2; /* errors=withdraw | panic */
unsigned int ar_nobarrier:1; /* do not send barriers */
unsigned int ar_rgrplvb:1; /* use lvbs for rgrp info */
+ unsigned int ar_got_rgrplvb:1; /* Was the rgrplvb opt given? */
unsigned int ar_loccookie:1; /* use location based readdir
cookies */
s32 ar_commit; /* Commit interval */
@@ -821,7 +802,6 @@ struct gfs2_sbd {
struct gfs2_trans *sd_log_tr;
unsigned int sd_log_blks_reserved;
- int sd_log_committed_revoke;
atomic_t sd_log_pinned;
unsigned int sd_log_num_revoke;
@@ -834,24 +814,22 @@ struct gfs2_sbd {
atomic_t sd_log_thresh2;
atomic_t sd_log_blks_free;
atomic_t sd_log_blks_needed;
+ atomic_t sd_log_revokes_available;
wait_queue_head_t sd_log_waitq;
wait_queue_head_t sd_logd_waitq;
u64 sd_log_sequence;
- unsigned int sd_log_head;
- unsigned int sd_log_tail;
int sd_log_idle;
struct rw_semaphore sd_log_flush_lock;
atomic_t sd_log_in_flight;
- struct bio *sd_log_bio;
wait_queue_head_t sd_log_flush_wait;
int sd_log_error; /* First log error */
wait_queue_head_t sd_withdraw_wait;
- atomic_t sd_reserving_log;
- wait_queue_head_t sd_reserving_log_wait;
-
+ unsigned int sd_log_tail;
+ unsigned int sd_log_flush_tail;
+ unsigned int sd_log_head;
unsigned int sd_log_flush_head;
spinlock_t sd_ail_lock;
diff --git a/fs/gfs2/inode.c b/fs/gfs2/inode.c
index c1b77e8d6b1c..c9775d5c6594 100644
--- a/fs/gfs2/inode.c
+++ b/fs/gfs2/inode.c
@@ -325,7 +325,7 @@ struct inode *gfs2_lookupi(struct inode *dir, const struct qstr *name,
}
if (!is_root) {
- error = gfs2_permission(dir, MAY_EXEC);
+ error = gfs2_permission(&init_user_ns, dir, MAY_EXEC);
if (error)
goto out;
}
@@ -355,7 +355,8 @@ static int create_ok(struct gfs2_inode *dip, const struct qstr *name,
{
int error;
- error = gfs2_permission(&dip->i_inode, MAY_WRITE | MAY_EXEC);
+ error = gfs2_permission(&init_user_ns, &dip->i_inode,
+ MAY_WRITE | MAY_EXEC);
if (error)
return error;
@@ -490,8 +491,8 @@ static void init_dinode(struct gfs2_inode *dip, struct gfs2_inode *ip,
di = (struct gfs2_dinode *)dibh->b_data;
gfs2_dinode_out(ip, di);
- di->di_major = cpu_to_be32(MAJOR(ip->i_inode.i_rdev));
- di->di_minor = cpu_to_be32(MINOR(ip->i_inode.i_rdev));
+ di->di_major = cpu_to_be32(imajor(&ip->i_inode));
+ di->di_minor = cpu_to_be32(iminor(&ip->i_inode));
di->__pad1 = 0;
di->__pad2 = 0;
di->__pad3 = 0;
@@ -843,8 +844,8 @@ fail:
* Returns: errno
*/
-static int gfs2_create(struct inode *dir, struct dentry *dentry,
- umode_t mode, bool excl)
+static int gfs2_create(struct user_namespace *mnt_userns, struct inode *dir,
+ struct dentry *dentry, umode_t mode, bool excl)
{
return gfs2_create_inode(dir, dentry, NULL, S_IFREG | mode, 0, NULL, 0, excl);
}
@@ -951,7 +952,7 @@ static int gfs2_link(struct dentry *old_dentry, struct inode *dir,
if (inode->i_nlink == 0)
goto out_gunlock;
- error = gfs2_permission(dir, MAY_WRITE | MAY_EXEC);
+ error = gfs2_permission(&init_user_ns, dir, MAY_WRITE | MAY_EXEC);
if (error)
goto out_gunlock;
@@ -1068,7 +1069,8 @@ static int gfs2_unlink_ok(struct gfs2_inode *dip, const struct qstr *name,
if (IS_APPEND(&dip->i_inode))
return -EPERM;
- error = gfs2_permission(&dip->i_inode, MAY_WRITE | MAY_EXEC);
+ error = gfs2_permission(&init_user_ns, &dip->i_inode,
+ MAY_WRITE | MAY_EXEC);
if (error)
return error;
@@ -1145,7 +1147,7 @@ static int gfs2_unlink(struct inode *dir, struct dentry *dentry)
if (!rgd)
goto out_inodes;
- gfs2_holder_init(rgd->rd_gl, LM_ST_EXCLUSIVE, 0, ghs + 2);
+ gfs2_holder_init(rgd->rd_gl, LM_ST_EXCLUSIVE, LM_FLAG_NODE_SCOPE, ghs + 2);
error = gfs2_glock_nq(ghs); /* parent */
@@ -1204,8 +1206,8 @@ out_inodes:
* Returns: errno
*/
-static int gfs2_symlink(struct inode *dir, struct dentry *dentry,
- const char *symname)
+static int gfs2_symlink(struct user_namespace *mnt_userns, struct inode *dir,
+ struct dentry *dentry, const char *symname)
{
unsigned int size;
@@ -1225,7 +1227,8 @@ static int gfs2_symlink(struct inode *dir, struct dentry *dentry,
* Returns: errno
*/
-static int gfs2_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode)
+static int gfs2_mkdir(struct user_namespace *mnt_userns, struct inode *dir,
+ struct dentry *dentry, umode_t mode)
{
unsigned dsize = gfs2_max_stuffed_size(GFS2_I(dir));
return gfs2_create_inode(dir, dentry, NULL, S_IFDIR | mode, 0, NULL, dsize, 0);
@@ -1240,8 +1243,8 @@ static int gfs2_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode)
*
*/
-static int gfs2_mknod(struct inode *dir, struct dentry *dentry, umode_t mode,
- dev_t dev)
+static int gfs2_mknod(struct user_namespace *mnt_userns, struct inode *dir,
+ struct dentry *dentry, umode_t mode, dev_t dev)
{
return gfs2_create_inode(dir, dentry, NULL, mode, dev, NULL, 0, 0);
}
@@ -1450,8 +1453,8 @@ static int gfs2_rename(struct inode *odir, struct dentry *odentry,
error = -ENOENT;
goto out_gunlock;
}
- error = gfs2_glock_nq_init(nrgd->rd_gl, LM_ST_EXCLUSIVE, 0,
- &rd_gh);
+ error = gfs2_glock_nq_init(nrgd->rd_gl, LM_ST_EXCLUSIVE,
+ LM_FLAG_NODE_SCOPE, &rd_gh);
if (error)
goto out_gunlock;
}
@@ -1490,7 +1493,8 @@ static int gfs2_rename(struct inode *odir, struct dentry *odentry,
}
}
} else {
- error = gfs2_permission(ndir, MAY_WRITE | MAY_EXEC);
+ error = gfs2_permission(&init_user_ns, ndir,
+ MAY_WRITE | MAY_EXEC);
if (error)
goto out_gunlock;
@@ -1525,7 +1529,8 @@ static int gfs2_rename(struct inode *odir, struct dentry *odentry,
/* Check out the dir to be renamed */
if (dir_rename) {
- error = gfs2_permission(d_inode(odentry), MAY_WRITE);
+ error = gfs2_permission(&init_user_ns, d_inode(odentry),
+ MAY_WRITE);
if (error)
goto out_gunlock;
}
@@ -1688,12 +1693,14 @@ static int gfs2_exchange(struct inode *odir, struct dentry *odentry,
goto out_gunlock;
if (S_ISDIR(old_mode)) {
- error = gfs2_permission(odentry->d_inode, MAY_WRITE);
+ error = gfs2_permission(&init_user_ns, odentry->d_inode,
+ MAY_WRITE);
if (error)
goto out_gunlock;
}
if (S_ISDIR(new_mode)) {
- error = gfs2_permission(ndentry->d_inode, MAY_WRITE);
+ error = gfs2_permission(&init_user_ns, ndentry->d_inode,
+ MAY_WRITE);
if (error)
goto out_gunlock;
}
@@ -1747,9 +1754,9 @@ out:
return error;
}
-static int gfs2_rename2(struct inode *odir, struct dentry *odentry,
- struct inode *ndir, struct dentry *ndentry,
- unsigned int flags)
+static int gfs2_rename2(struct user_namespace *mnt_userns, struct inode *odir,
+ struct dentry *odentry, struct inode *ndir,
+ struct dentry *ndentry, unsigned int flags)
{
flags &= ~RENAME_NOREPLACE;
@@ -1833,7 +1840,8 @@ out:
* Returns: errno
*/
-int gfs2_permission(struct inode *inode, int mask)
+int gfs2_permission(struct user_namespace *mnt_userns, struct inode *inode,
+ int mask)
{
struct gfs2_inode *ip;
struct gfs2_holder i_gh;
@@ -1852,7 +1860,7 @@ int gfs2_permission(struct inode *inode, int mask)
if ((mask & MAY_WRITE) && IS_IMMUTABLE(inode))
error = -EPERM;
else
- error = generic_permission(inode, mask);
+ error = generic_permission(&init_user_ns, inode, mask);
if (gfs2_holder_initialized(&i_gh))
gfs2_glock_dq_uninit(&i_gh);
@@ -1861,7 +1869,7 @@ int gfs2_permission(struct inode *inode, int mask)
static int __gfs2_setattr_simple(struct inode *inode, struct iattr *attr)
{
- setattr_copy(inode, attr);
+ setattr_copy(&init_user_ns, inode, attr);
mark_inode_dirty(inode);
return 0;
}
@@ -1963,7 +1971,8 @@ out:
* Returns: errno
*/
-static int gfs2_setattr(struct dentry *dentry, struct iattr *attr)
+static int gfs2_setattr(struct user_namespace *mnt_userns,
+ struct dentry *dentry, struct iattr *attr)
{
struct inode *inode = d_inode(dentry);
struct gfs2_inode *ip = GFS2_I(inode);
@@ -1982,7 +1991,7 @@ static int gfs2_setattr(struct dentry *dentry, struct iattr *attr)
if (IS_IMMUTABLE(inode) || IS_APPEND(inode))
goto error;
- error = setattr_prepare(dentry, attr);
+ error = setattr_prepare(&init_user_ns, dentry, attr);
if (error)
goto error;
@@ -1993,7 +2002,8 @@ static int gfs2_setattr(struct dentry *dentry, struct iattr *attr)
else {
error = gfs2_setattr_simple(inode, attr);
if (!error && attr->ia_valid & ATTR_MODE)
- error = posix_acl_chmod(inode, inode->i_mode);
+ error = posix_acl_chmod(&init_user_ns, inode,
+ inode->i_mode);
}
error:
@@ -2007,6 +2017,7 @@ out:
/**
* gfs2_getattr - Read out an inode's attributes
+ * @mnt_userns: user namespace of the mount the inode was found from
* @path: Object to query
* @stat: The inode's stats
* @request_mask: Mask of STATX_xxx flags indicating the caller's interests
@@ -2021,7 +2032,8 @@ out:
* Returns: errno
*/
-static int gfs2_getattr(const struct path *path, struct kstat *stat,
+static int gfs2_getattr(struct user_namespace *mnt_userns,
+ const struct path *path, struct kstat *stat,
u32 request_mask, unsigned int flags)
{
struct inode *inode = d_inode(path->dentry);
@@ -2049,7 +2061,7 @@ static int gfs2_getattr(const struct path *path, struct kstat *stat,
STATX_ATTR_IMMUTABLE |
STATX_ATTR_NODUMP);
- generic_fillattr(inode, stat);
+ generic_fillattr(&init_user_ns, inode, stat);
if (gfs2_holder_initialized(&gh))
gfs2_glock_dq_uninit(&gh);
diff --git a/fs/gfs2/inode.h b/fs/gfs2/inode.h
index 8073b8d2c7fa..c447bd5b3017 100644
--- a/fs/gfs2/inode.h
+++ b/fs/gfs2/inode.h
@@ -99,7 +99,8 @@ extern int gfs2_inode_refresh(struct gfs2_inode *ip);
extern struct inode *gfs2_lookupi(struct inode *dir, const struct qstr *name,
int is_root);
-extern int gfs2_permission(struct inode *inode, int mask);
+extern int gfs2_permission(struct user_namespace *mnt_userns,
+ struct inode *inode, int mask);
extern int gfs2_setattr_simple(struct inode *inode, struct iattr *attr);
extern struct inode *gfs2_lookup_simple(struct inode *dip, const char *name);
extern void gfs2_dinode_out(const struct gfs2_inode *ip, void *buf);
diff --git a/fs/gfs2/lock_dlm.c b/fs/gfs2/lock_dlm.c
index 9f2b5609f225..153272f82984 100644
--- a/fs/gfs2/lock_dlm.c
+++ b/fs/gfs2/lock_dlm.c
@@ -284,7 +284,6 @@ static void gdlm_put_lock(struct gfs2_glock *gl)
{
struct gfs2_sbd *sdp = gl->gl_name.ln_sbd;
struct lm_lockstruct *ls = &sdp->sd_lockstruct;
- int lvb_needs_unlock = 0;
int error;
if (gl->gl_lksb.sb_lkid == 0) {
@@ -297,13 +296,10 @@ static void gdlm_put_lock(struct gfs2_glock *gl)
gfs2_sbstats_inc(gl, GFS2_LKS_DCOUNT);
gfs2_update_request_times(gl);
- /* don't want to skip dlm_unlock writing the lvb when lock is ex */
-
- if (gl->gl_lksb.sb_lvbptr && (gl->gl_state == LM_ST_EXCLUSIVE))
- lvb_needs_unlock = 1;
+ /* don't want to skip dlm_unlock writing the lvb when lock has one */
if (test_bit(SDF_SKIP_DLM_UNLOCK, &sdp->sd_flags) &&
- !lvb_needs_unlock) {
+ !gl->gl_lksb.sb_lvbptr) {
gfs2_glock_free(gl);
return;
}
diff --git a/fs/gfs2/log.c b/fs/gfs2/log.c
index 2e9314091c81..16937ebb2a3e 100644
--- a/fs/gfs2/log.c
+++ b/fs/gfs2/log.c
@@ -50,10 +50,12 @@ unsigned int gfs2_struct2blk(struct gfs2_sbd *sdp, unsigned int nstruct)
unsigned int blks;
unsigned int first, second;
+ /* The initial struct gfs2_log_descriptor block */
blks = 1;
first = sdp->sd_ldptrs;
if (nstruct > first) {
+ /* Subsequent struct gfs2_meta_header blocks */
second = sdp->sd_inptrs;
blks += DIV_ROUND_UP(nstruct - first, second);
}
@@ -89,7 +91,7 @@ void gfs2_remove_from_ail(struct gfs2_bufdata *bd)
static int gfs2_ail1_start_one(struct gfs2_sbd *sdp,
struct writeback_control *wbc,
- struct gfs2_trans *tr)
+ struct gfs2_trans *tr, struct blk_plug *plug)
__releases(&sdp->sd_ail_lock)
__acquires(&sdp->sd_ail_lock)
{
@@ -131,6 +133,11 @@ __acquires(&sdp->sd_ail_lock)
continue;
spin_unlock(&sdp->sd_ail_lock);
ret = generic_writepages(mapping, wbc);
+ if (need_resched()) {
+ blk_finish_plug(plug);
+ cond_resched();
+ blk_start_plug(plug);
+ }
spin_lock(&sdp->sd_ail_lock);
if (ret == -ENODATA) /* if a jdata write into a new hole */
ret = 0; /* ignore it */
@@ -205,7 +212,7 @@ restart:
list_for_each_entry_reverse(tr, head, tr_list) {
if (wbc->nr_to_write <= 0)
break;
- ret = gfs2_ail1_start_one(sdp, wbc, tr);
+ ret = gfs2_ail1_start_one(sdp, wbc, tr, &plug);
if (ret) {
if (ret == -EBUSY)
goto restart;
@@ -240,6 +247,45 @@ static void gfs2_ail1_start(struct gfs2_sbd *sdp)
return gfs2_ail1_flush(sdp, &wbc);
}
+static void gfs2_log_update_flush_tail(struct gfs2_sbd *sdp)
+{
+ unsigned int new_flush_tail = sdp->sd_log_head;
+ struct gfs2_trans *tr;
+
+ if (!list_empty(&sdp->sd_ail1_list)) {
+ tr = list_last_entry(&sdp->sd_ail1_list,
+ struct gfs2_trans, tr_list);
+ new_flush_tail = tr->tr_first;
+ }
+ sdp->sd_log_flush_tail = new_flush_tail;
+}
+
+static void gfs2_log_update_head(struct gfs2_sbd *sdp)
+{
+ unsigned int new_head = sdp->sd_log_flush_head;
+
+ if (sdp->sd_log_flush_tail == sdp->sd_log_head)
+ sdp->sd_log_flush_tail = new_head;
+ sdp->sd_log_head = new_head;
+}
+
+/**
+ * gfs2_ail_empty_tr - empty one of the ail lists of a transaction
+ */
+
+static void gfs2_ail_empty_tr(struct gfs2_sbd *sdp, struct gfs2_trans *tr,
+ struct list_head *head)
+{
+ struct gfs2_bufdata *bd;
+
+ while (!list_empty(head)) {
+ bd = list_first_entry(head, struct gfs2_bufdata,
+ bd_ail_st_list);
+ gfs2_assert(sdp, bd->bd_tr == tr);
+ gfs2_remove_from_ail(bd);
+ }
+}
+
/**
* gfs2_ail1_empty_one - Check whether or not a trans in the AIL has been synced
* @sdp: the filesystem
@@ -315,6 +361,7 @@ static int gfs2_ail1_empty(struct gfs2_sbd *sdp, int max_revokes)
else
oldest_tr = 0;
}
+ gfs2_log_update_flush_tail(sdp);
ret = list_empty(&sdp->sd_ail1_list);
spin_unlock(&sdp->sd_ail_lock);
@@ -348,47 +395,69 @@ static void gfs2_ail1_wait(struct gfs2_sbd *sdp)
spin_unlock(&sdp->sd_ail_lock);
}
-/**
- * gfs2_ail_empty_tr - empty one of the ail lists for a transaction
- */
-
-static void gfs2_ail_empty_tr(struct gfs2_sbd *sdp, struct gfs2_trans *tr,
- struct list_head *head)
+static void __ail2_empty(struct gfs2_sbd *sdp, struct gfs2_trans *tr)
{
- struct gfs2_bufdata *bd;
-
- while (!list_empty(head)) {
- bd = list_first_entry(head, struct gfs2_bufdata,
- bd_ail_st_list);
- gfs2_assert(sdp, bd->bd_tr == tr);
- gfs2_remove_from_ail(bd);
- }
+ gfs2_ail_empty_tr(sdp, tr, &tr->tr_ail2_list);
+ list_del(&tr->tr_list);
+ gfs2_assert_warn(sdp, list_empty(&tr->tr_ail1_list));
+ gfs2_assert_warn(sdp, list_empty(&tr->tr_ail2_list));
+ gfs2_trans_free(sdp, tr);
}
static void ail2_empty(struct gfs2_sbd *sdp, unsigned int new_tail)
{
- struct gfs2_trans *tr, *safe;
+ struct list_head *ail2_list = &sdp->sd_ail2_list;
unsigned int old_tail = sdp->sd_log_tail;
- int wrap = (new_tail < old_tail);
- int a, b, rm;
+ struct gfs2_trans *tr, *safe;
spin_lock(&sdp->sd_ail_lock);
+ if (old_tail <= new_tail) {
+ list_for_each_entry_safe(tr, safe, ail2_list, tr_list) {
+ if (old_tail <= tr->tr_first && tr->tr_first < new_tail)
+ __ail2_empty(sdp, tr);
+ }
+ } else {
+ list_for_each_entry_safe(tr, safe, ail2_list, tr_list) {
+ if (old_tail <= tr->tr_first || tr->tr_first < new_tail)
+ __ail2_empty(sdp, tr);
+ }
+ }
+ spin_unlock(&sdp->sd_ail_lock);
+}
- list_for_each_entry_safe(tr, safe, &sdp->sd_ail2_list, tr_list) {
- a = (old_tail <= tr->tr_first);
- b = (tr->tr_first < new_tail);
- rm = (wrap) ? (a || b) : (a && b);
- if (!rm)
- continue;
+/**
+ * gfs2_log_is_empty - Check if the log is empty
+ * @sdp: The GFS2 superblock
+ */
- gfs2_ail_empty_tr(sdp, tr, &tr->tr_ail2_list);
- list_del(&tr->tr_list);
- gfs2_assert_warn(sdp, list_empty(&tr->tr_ail1_list));
- gfs2_assert_warn(sdp, list_empty(&tr->tr_ail2_list));
- gfs2_trans_free(sdp, tr);
+bool gfs2_log_is_empty(struct gfs2_sbd *sdp) {
+ return atomic_read(&sdp->sd_log_blks_free) == sdp->sd_jdesc->jd_blocks;
+}
+
+static bool __gfs2_log_try_reserve_revokes(struct gfs2_sbd *sdp, unsigned int revokes)
+{
+ unsigned int available;
+
+ available = atomic_read(&sdp->sd_log_revokes_available);
+ while (available >= revokes) {
+ if (atomic_try_cmpxchg(&sdp->sd_log_revokes_available,
+ &available, available - revokes))
+ return true;
}
+ return false;
+}
- spin_unlock(&sdp->sd_ail_lock);
+/**
+ * gfs2_log_release_revokes - Release a given number of revokes
+ * @sdp: The GFS2 superblock
+ * @revokes: The number of revokes to release
+ *
+ * sdp->sd_log_flush_lock must be held.
+ */
+void gfs2_log_release_revokes(struct gfs2_sbd *sdp, unsigned int revokes)
+{
+ if (revokes)
+ atomic_add(revokes, &sdp->sd_log_revokes_available);
}
/**
@@ -400,86 +469,141 @@ static void ail2_empty(struct gfs2_sbd *sdp, unsigned int new_tail)
void gfs2_log_release(struct gfs2_sbd *sdp, unsigned int blks)
{
-
atomic_add(blks, &sdp->sd_log_blks_free);
trace_gfs2_log_blocks(sdp, blks);
gfs2_assert_withdraw(sdp, atomic_read(&sdp->sd_log_blks_free) <=
sdp->sd_jdesc->jd_blocks);
- up_read(&sdp->sd_log_flush_lock);
+ if (atomic_read(&sdp->sd_log_blks_needed))
+ wake_up(&sdp->sd_log_waitq);
}
/**
- * gfs2_log_reserve - Make a log reservation
+ * __gfs2_log_try_reserve - Try to make a log reservation
+ * @sdp: The GFS2 superblock
+ * @blks: The number of blocks to reserve
+ * @taboo_blks: The number of blocks to leave free
+ *
+ * Try to do the same as __gfs2_log_reserve(), but fail if no more log
+ * space is immediately available.
+ */
+static bool __gfs2_log_try_reserve(struct gfs2_sbd *sdp, unsigned int blks,
+ unsigned int taboo_blks)
+{
+ unsigned wanted = blks + taboo_blks;
+ unsigned int free_blocks;
+
+ free_blocks = atomic_read(&sdp->sd_log_blks_free);
+ while (free_blocks >= wanted) {
+ if (atomic_try_cmpxchg(&sdp->sd_log_blks_free, &free_blocks,
+ free_blocks - blks)) {
+ trace_gfs2_log_blocks(sdp, -blks);
+ return true;
+ }
+ }
+ return false;
+}
+
+/**
+ * __gfs2_log_reserve - Make a log reservation
* @sdp: The GFS2 superblock
* @blks: The number of blocks to reserve
+ * @taboo_blks: The number of blocks to leave free
*
- * Note that we never give out the last few blocks of the journal. Thats
- * due to the fact that there is a small number of header blocks
- * associated with each log flush. The exact number can't be known until
- * flush time, so we ensure that we have just enough free blocks at all
- * times to avoid running out during a log flush.
+ * @taboo_blks is set to 0 for logd, and to GFS2_LOG_FLUSH_MIN_BLOCKS
+ * for all other processes. This ensures that when the log is almost full,
+ * logd will still be able to call gfs2_log_flush one more time without
+ * blocking, which will advance the tail and make some more log space
+ * available.
*
* We no longer flush the log here, instead we wake up logd to do that
* for us. To avoid the thundering herd and to ensure that we deal fairly
* with queued waiters, we use an exclusive wait. This means that when we
* get woken with enough journal space to get our reservation, we need to
* wake the next waiter on the list.
- *
- * Returns: errno
*/
-int gfs2_log_reserve(struct gfs2_sbd *sdp, unsigned int blks)
+static void __gfs2_log_reserve(struct gfs2_sbd *sdp, unsigned int blks,
+ unsigned int taboo_blks)
{
- int ret = 0;
- unsigned reserved_blks = 7 * (4096 / sdp->sd_vfs->s_blocksize);
- unsigned wanted = blks + reserved_blks;
- DEFINE_WAIT(wait);
- int did_wait = 0;
+ unsigned wanted = blks + taboo_blks;
unsigned int free_blocks;
- if (gfs2_assert_warn(sdp, blks) ||
- gfs2_assert_warn(sdp, blks <= sdp->sd_jdesc->jd_blocks))
- return -EINVAL;
atomic_add(blks, &sdp->sd_log_blks_needed);
-retry:
- free_blocks = atomic_read(&sdp->sd_log_blks_free);
- if (unlikely(free_blocks <= wanted)) {
- do {
- prepare_to_wait_exclusive(&sdp->sd_log_waitq, &wait,
- TASK_UNINTERRUPTIBLE);
+ for (;;) {
+ if (current != sdp->sd_logd_process)
wake_up(&sdp->sd_logd_waitq);
- did_wait = 1;
- if (atomic_read(&sdp->sd_log_blks_free) <= wanted)
- io_schedule();
- free_blocks = atomic_read(&sdp->sd_log_blks_free);
- } while(free_blocks <= wanted);
- finish_wait(&sdp->sd_log_waitq, &wait);
- }
- atomic_inc(&sdp->sd_reserving_log);
- if (atomic_cmpxchg(&sdp->sd_log_blks_free, free_blocks,
- free_blocks - blks) != free_blocks) {
- if (atomic_dec_and_test(&sdp->sd_reserving_log))
- wake_up(&sdp->sd_reserving_log_wait);
- goto retry;
+ io_wait_event(sdp->sd_log_waitq,
+ (free_blocks = atomic_read(&sdp->sd_log_blks_free),
+ free_blocks >= wanted));
+ do {
+ if (atomic_try_cmpxchg(&sdp->sd_log_blks_free,
+ &free_blocks,
+ free_blocks - blks))
+ goto reserved;
+ } while (free_blocks >= wanted);
}
- atomic_sub(blks, &sdp->sd_log_blks_needed);
- trace_gfs2_log_blocks(sdp, -blks);
- /*
- * If we waited, then so might others, wake them up _after_ we get
- * our share of the log.
- */
- if (unlikely(did_wait))
+reserved:
+ trace_gfs2_log_blocks(sdp, -blks);
+ if (atomic_sub_return(blks, &sdp->sd_log_blks_needed))
wake_up(&sdp->sd_log_waitq);
+}
+
+/**
+ * gfs2_log_try_reserve - Try to make a log reservation
+ * @sdp: The GFS2 superblock
+ * @tr: The transaction
+ * @extra_revokes: The number of additional revokes reserved (output)
+ *
+ * This is similar to gfs2_log_reserve, but sdp->sd_log_flush_lock must be
+ * held for correct revoke accounting.
+ */
- down_read(&sdp->sd_log_flush_lock);
- if (unlikely(!test_bit(SDF_JOURNAL_LIVE, &sdp->sd_flags))) {
- gfs2_log_release(sdp, blks);
- ret = -EROFS;
+bool gfs2_log_try_reserve(struct gfs2_sbd *sdp, struct gfs2_trans *tr,
+ unsigned int *extra_revokes)
+{
+ unsigned int blks = tr->tr_reserved;
+ unsigned int revokes = tr->tr_revokes;
+ unsigned int revoke_blks = 0;
+
+ *extra_revokes = 0;
+ if (revokes && !__gfs2_log_try_reserve_revokes(sdp, revokes)) {
+ revoke_blks = DIV_ROUND_UP(revokes, sdp->sd_inptrs);
+ *extra_revokes = revoke_blks * sdp->sd_inptrs - revokes;
+ blks += revoke_blks;
}
- if (atomic_dec_and_test(&sdp->sd_reserving_log))
- wake_up(&sdp->sd_reserving_log_wait);
- return ret;
+ if (!blks)
+ return true;
+ if (__gfs2_log_try_reserve(sdp, blks, GFS2_LOG_FLUSH_MIN_BLOCKS))
+ return true;
+ if (!revoke_blks)
+ gfs2_log_release_revokes(sdp, revokes);
+ return false;
+}
+
+/**
+ * gfs2_log_reserve - Make a log reservation
+ * @sdp: The GFS2 superblock
+ * @tr: The transaction
+ * @extra_revokes: The number of additional revokes reserved (output)
+ *
+ * sdp->sd_log_flush_lock must not be held.
+ */
+
+void gfs2_log_reserve(struct gfs2_sbd *sdp, struct gfs2_trans *tr,
+ unsigned int *extra_revokes)
+{
+ unsigned int blks = tr->tr_reserved;
+ unsigned int revokes = tr->tr_revokes;
+ unsigned int revoke_blks = 0;
+
+ *extra_revokes = 0;
+ if (revokes) {
+ revoke_blks = DIV_ROUND_UP(revokes, sdp->sd_inptrs);
+ *extra_revokes = revoke_blks * sdp->sd_inptrs - revokes;
+ blks += revoke_blks;
+ }
+ __gfs2_log_reserve(sdp, blks, GFS2_LOG_FLUSH_MIN_BLOCKS);
}
/**
@@ -507,24 +631,20 @@ static inline unsigned int log_distance(struct gfs2_sbd *sdp, unsigned int newer
}
/**
- * calc_reserved - Calculate the number of blocks to reserve when
- * refunding a transaction's unused buffers.
+ * calc_reserved - Calculate the number of blocks to keep reserved
* @sdp: The GFS2 superblock
*
* This is complex. We need to reserve room for all our currently used
- * metadata buffers (e.g. normal file I/O rewriting file time stamps) and
- * all our journaled data buffers for journaled files (e.g. files in the
+ * metadata blocks (e.g. normal file I/O rewriting file time stamps) and
+ * all our journaled data blocks for journaled files (e.g. files in the
* meta_fs like rindex, or files for which chattr +j was done.)
- * If we don't reserve enough space, gfs2_log_refund and gfs2_log_flush
- * will count it as free space (sd_log_blks_free) and corruption will follow.
+ * If we don't reserve enough space, corruption will follow.
*
- * We can have metadata bufs and jdata bufs in the same journal. So each
- * type gets its own log header, for which we need to reserve a block.
- * In fact, each type has the potential for needing more than one header
- * in cases where we have more buffers than will fit on a journal page.
+ * We can have metadata blocks and jdata blocks in the same journal. Each
+ * type gets its own log descriptor, for which we need to reserve a block.
+ * In fact, each type has the potential for needing more than one log descriptor
+ * in cases where we have more blocks than will fit in a log descriptor.
* Metadata journal entries take up half the space of journaled buffer entries.
- * Thus, metadata entries have buf_limit (502) and journaled buffers have
- * databuf_limit (251) before they cause a wrap around.
*
* Also, we need to reserve blocks for revoke journal entries and one for an
* overall header for the lot.
@@ -533,59 +653,29 @@ static inline unsigned int log_distance(struct gfs2_sbd *sdp, unsigned int newer
*/
static unsigned int calc_reserved(struct gfs2_sbd *sdp)
{
- unsigned int reserved = 0;
- unsigned int mbuf;
- unsigned int dbuf;
+ unsigned int reserved = GFS2_LOG_FLUSH_MIN_BLOCKS;
+ unsigned int blocks;
struct gfs2_trans *tr = sdp->sd_log_tr;
if (tr) {
- mbuf = tr->tr_num_buf_new - tr->tr_num_buf_rm;
- dbuf = tr->tr_num_databuf_new - tr->tr_num_databuf_rm;
- reserved = mbuf + dbuf;
- /* Account for header blocks */
- reserved += DIV_ROUND_UP(mbuf, buf_limit(sdp));
- reserved += DIV_ROUND_UP(dbuf, databuf_limit(sdp));
+ blocks = tr->tr_num_buf_new - tr->tr_num_buf_rm;
+ reserved += blocks + DIV_ROUND_UP(blocks, buf_limit(sdp));
+ blocks = tr->tr_num_databuf_new - tr->tr_num_databuf_rm;
+ reserved += blocks + DIV_ROUND_UP(blocks, databuf_limit(sdp));
}
-
- if (sdp->sd_log_committed_revoke > 0)
- reserved += gfs2_struct2blk(sdp, sdp->sd_log_committed_revoke);
- /* One for the overall header */
- if (reserved)
- reserved++;
return reserved;
}
-static unsigned int current_tail(struct gfs2_sbd *sdp)
-{
- struct gfs2_trans *tr;
- unsigned int tail;
-
- spin_lock(&sdp->sd_ail_lock);
-
- if (list_empty(&sdp->sd_ail1_list)) {
- tail = sdp->sd_log_head;
- } else {
- tr = list_last_entry(&sdp->sd_ail1_list, struct gfs2_trans,
- tr_list);
- tail = tr->tr_first;
- }
-
- spin_unlock(&sdp->sd_ail_lock);
-
- return tail;
-}
-
-static void log_pull_tail(struct gfs2_sbd *sdp, unsigned int new_tail)
+static void log_pull_tail(struct gfs2_sbd *sdp)
{
- unsigned int dist = log_distance(sdp, new_tail, sdp->sd_log_tail);
+ unsigned int new_tail = sdp->sd_log_flush_tail;
+ unsigned int dist;
+ if (new_tail == sdp->sd_log_tail)
+ return;
+ dist = log_distance(sdp, new_tail, sdp->sd_log_tail);
ail2_empty(sdp, new_tail);
-
- atomic_add(dist, &sdp->sd_log_blks_free);
- trace_gfs2_log_blocks(sdp, dist);
- gfs2_assert_withdraw(sdp, atomic_read(&sdp->sd_log_blks_free) <=
- sdp->sd_jdesc->jd_blocks);
-
+ gfs2_log_release(sdp, dist);
sdp->sd_log_tail = new_tail;
}
@@ -698,7 +788,7 @@ void gfs2_glock_remove_revoke(struct gfs2_glock *gl)
}
/**
- * gfs2_write_revokes - Add as many revokes to the system transaction as we can
+ * gfs2_flush_revokes - Add as many revokes to the system transaction as we can
* @sdp: The GFS2 superblock
*
* Our usual strategy is to defer writing revokes as much as we can in the hope
@@ -709,38 +799,14 @@ void gfs2_glock_remove_revoke(struct gfs2_glock *gl)
* been written back. This will basically come at no cost now, and will save
* us from having to keep track of those blocks on the AIL2 list later.
*/
-void gfs2_write_revokes(struct gfs2_sbd *sdp)
+void gfs2_flush_revokes(struct gfs2_sbd *sdp)
{
/* number of revokes we still have room for */
- int max_revokes = (sdp->sd_sb.sb_bsize - sizeof(struct gfs2_log_descriptor)) / sizeof(u64);
+ unsigned int max_revokes = atomic_read(&sdp->sd_log_revokes_available);
gfs2_log_lock(sdp);
- while (sdp->sd_log_num_revoke > max_revokes)
- max_revokes += (sdp->sd_sb.sb_bsize - sizeof(struct gfs2_meta_header)) / sizeof(u64);
- max_revokes -= sdp->sd_log_num_revoke;
- if (!sdp->sd_log_num_revoke) {
- atomic_dec(&sdp->sd_log_blks_free);
- /* If no blocks have been reserved, we need to also
- * reserve a block for the header */
- if (!sdp->sd_log_blks_reserved) {
- atomic_dec(&sdp->sd_log_blks_free);
- trace_gfs2_log_blocks(sdp, -2);
- } else {
- trace_gfs2_log_blocks(sdp, -1);
- }
- }
gfs2_ail1_empty(sdp, max_revokes);
gfs2_log_unlock(sdp);
-
- if (!sdp->sd_log_num_revoke) {
- atomic_inc(&sdp->sd_log_blks_free);
- if (!sdp->sd_log_blks_reserved) {
- atomic_inc(&sdp->sd_log_blks_free);
- trace_gfs2_log_blocks(sdp, 2);
- } else {
- trace_gfs2_log_blocks(sdp, 1);
- }
- }
}
/**
@@ -769,7 +835,7 @@ void gfs2_write_log_header(struct gfs2_sbd *sdp, struct gfs2_jdesc *jd,
u64 dblock;
if (gfs2_withdrawn(sdp))
- goto out;
+ return;
page = mempool_alloc(gfs2_page_pool, GFP_NOIO);
lh = page_address(page);
@@ -822,10 +888,8 @@ void gfs2_write_log_header(struct gfs2_sbd *sdp, struct gfs2_jdesc *jd,
sb->s_blocksize - LH_V1_SIZE - 4);
lh->lh_crc = cpu_to_be32(crc);
- gfs2_log_write(sdp, page, sb->s_blocksize, 0, dblock);
- gfs2_log_submit_bio(&sdp->sd_log_bio, REQ_OP_WRITE | op_flags);
-out:
- log_flush_wait(sdp);
+ gfs2_log_write(sdp, jd, page, sb->s_blocksize, 0, dblock);
+ gfs2_log_submit_bio(&jd->jd_log_bio, REQ_OP_WRITE | op_flags);
}
/**
@@ -838,25 +902,24 @@ out:
static void log_write_header(struct gfs2_sbd *sdp, u32 flags)
{
- unsigned int tail;
int op_flags = REQ_PREFLUSH | REQ_FUA | REQ_META | REQ_SYNC;
enum gfs2_freeze_state state = atomic_read(&sdp->sd_freeze_state);
gfs2_assert_withdraw(sdp, (state != SFS_FROZEN));
- tail = current_tail(sdp);
if (test_bit(SDF_NOBARRIERS, &sdp->sd_flags)) {
gfs2_ordered_wait(sdp);
log_flush_wait(sdp);
op_flags = REQ_SYNC | REQ_META | REQ_PRIO;
}
- sdp->sd_log_idle = (tail == sdp->sd_log_flush_head);
- gfs2_write_log_header(sdp, sdp->sd_jdesc, sdp->sd_log_sequence++, tail,
- sdp->sd_log_flush_head, flags, op_flags);
+ sdp->sd_log_idle = (sdp->sd_log_flush_tail == sdp->sd_log_flush_head);
+ gfs2_write_log_header(sdp, sdp->sd_jdesc, sdp->sd_log_sequence++,
+ sdp->sd_log_flush_tail, sdp->sd_log_flush_head,
+ flags, op_flags);
gfs2_log_incr_head(sdp);
-
- if (sdp->sd_log_tail != tail)
- log_pull_tail(sdp, tail);
+ log_flush_wait(sdp);
+ log_pull_tail(sdp);
+ gfs2_log_update_head(sdp);
}
/**
@@ -956,10 +1019,15 @@ static void trans_drain(struct gfs2_trans *tr)
void gfs2_log_flush(struct gfs2_sbd *sdp, struct gfs2_glock *gl, u32 flags)
{
struct gfs2_trans *tr = NULL;
+ unsigned int reserved_blocks = 0, used_blocks = 0;
enum gfs2_freeze_state state = atomic_read(&sdp->sd_freeze_state);
+ unsigned int first_log_head;
+ unsigned int reserved_revokes = 0;
down_write(&sdp->sd_log_flush_lock);
+ trace_gfs2_log_flush(sdp, 1, flags);
+repeat:
/*
* Do this check while holding the log_flush_lock to prevent new
* buffers from being added to the ail via gfs2_pin()
@@ -970,28 +1038,47 @@ void gfs2_log_flush(struct gfs2_sbd *sdp, struct gfs2_glock *gl, u32 flags)
/* Log might have been flushed while we waited for the flush lock */
if (gl && !test_bit(GLF_LFLUSH, &gl->gl_flags))
goto out;
- trace_gfs2_log_flush(sdp, 1, flags);
- if (flags & GFS2_LOG_HEAD_FLUSH_SHUTDOWN)
- clear_bit(SDF_JOURNAL_LIVE, &sdp->sd_flags);
+ first_log_head = sdp->sd_log_head;
+ sdp->sd_log_flush_head = first_log_head;
- sdp->sd_log_flush_head = sdp->sd_log_head;
tr = sdp->sd_log_tr;
- if (tr) {
- sdp->sd_log_tr = NULL;
- tr->tr_first = sdp->sd_log_flush_head;
- if (unlikely (state == SFS_FROZEN))
- if (gfs2_assert_withdraw_delayed(sdp,
- !tr->tr_num_buf_new && !tr->tr_num_databuf_new))
- goto out_withdraw;
+ if (tr || sdp->sd_log_num_revoke) {
+ if (reserved_blocks)
+ gfs2_log_release(sdp, reserved_blocks);
+ reserved_blocks = sdp->sd_log_blks_reserved;
+ reserved_revokes = sdp->sd_log_num_revoke;
+ if (tr) {
+ sdp->sd_log_tr = NULL;
+ tr->tr_first = first_log_head;
+ if (unlikely (state == SFS_FROZEN)) {
+ if (gfs2_assert_withdraw_delayed(sdp,
+ !tr->tr_num_buf_new && !tr->tr_num_databuf_new))
+ goto out_withdraw;
+ }
+ }
+ } else if (!reserved_blocks) {
+ unsigned int taboo_blocks = GFS2_LOG_FLUSH_MIN_BLOCKS;
+
+ reserved_blocks = GFS2_LOG_FLUSH_MIN_BLOCKS;
+ if (current == sdp->sd_logd_process)
+ taboo_blocks = 0;
+
+ if (!__gfs2_log_try_reserve(sdp, reserved_blocks, taboo_blocks)) {
+ up_write(&sdp->sd_log_flush_lock);
+ __gfs2_log_reserve(sdp, reserved_blocks, taboo_blocks);
+ down_write(&sdp->sd_log_flush_lock);
+ goto repeat;
+ }
+ BUG_ON(sdp->sd_log_num_revoke);
}
+ if (flags & GFS2_LOG_HEAD_FLUSH_SHUTDOWN)
+ clear_bit(SDF_JOURNAL_LIVE, &sdp->sd_flags);
+
if (unlikely(state == SFS_FROZEN))
- if (gfs2_assert_withdraw_delayed(sdp, !sdp->sd_log_num_revoke))
+ if (gfs2_assert_withdraw_delayed(sdp, !reserved_revokes))
goto out_withdraw;
- if (gfs2_assert_withdraw_delayed(sdp,
- sdp->sd_log_num_revoke == sdp->sd_log_committed_revoke))
- goto out_withdraw;
gfs2_ordered_write(sdp);
if (gfs2_withdrawn(sdp))
@@ -999,16 +1086,13 @@ void gfs2_log_flush(struct gfs2_sbd *sdp, struct gfs2_glock *gl, u32 flags)
lops_before_commit(sdp, tr);
if (gfs2_withdrawn(sdp))
goto out_withdraw;
- gfs2_log_submit_bio(&sdp->sd_log_bio, REQ_OP_WRITE);
+ gfs2_log_submit_bio(&sdp->sd_jdesc->jd_log_bio, REQ_OP_WRITE);
if (gfs2_withdrawn(sdp))
goto out_withdraw;
if (sdp->sd_log_head != sdp->sd_log_flush_head) {
- log_flush_wait(sdp);
log_write_header(sdp, flags);
- } else if (sdp->sd_log_tail != current_tail(sdp) && !sdp->sd_log_idle){
- atomic_dec(&sdp->sd_log_blks_free); /* Adjust for unreserved buffer */
- trace_gfs2_log_blocks(sdp, -1);
+ } else if (sdp->sd_log_tail != sdp->sd_log_flush_tail && !sdp->sd_log_idle) {
log_write_header(sdp, flags);
}
if (gfs2_withdrawn(sdp))
@@ -1016,9 +1100,7 @@ void gfs2_log_flush(struct gfs2_sbd *sdp, struct gfs2_glock *gl, u32 flags)
lops_after_commit(sdp, tr);
gfs2_log_lock(sdp);
- sdp->sd_log_head = sdp->sd_log_flush_head;
sdp->sd_log_blks_reserved = 0;
- sdp->sd_log_committed_revoke = 0;
spin_lock(&sdp->sd_ail_lock);
if (tr && !list_empty(&tr->tr_ail1_list)) {
@@ -1033,10 +1115,7 @@ void gfs2_log_flush(struct gfs2_sbd *sdp, struct gfs2_glock *gl, u32 flags)
empty_ail1_list(sdp);
if (gfs2_withdrawn(sdp))
goto out_withdraw;
- atomic_dec(&sdp->sd_log_blks_free); /* Adjust for unreserved buffer */
- trace_gfs2_log_blocks(sdp, -1);
log_write_header(sdp, flags);
- sdp->sd_log_head = sdp->sd_log_flush_head;
}
if (flags & (GFS2_LOG_HEAD_FLUSH_SHUTDOWN |
GFS2_LOG_HEAD_FLUSH_FREEZE))
@@ -1046,12 +1125,22 @@ void gfs2_log_flush(struct gfs2_sbd *sdp, struct gfs2_glock *gl, u32 flags)
}
out_end:
- trace_gfs2_log_flush(sdp, 0, flags);
+ used_blocks = log_distance(sdp, sdp->sd_log_flush_head, first_log_head);
+ reserved_revokes += atomic_read(&sdp->sd_log_revokes_available);
+ atomic_set(&sdp->sd_log_revokes_available, sdp->sd_ldptrs);
+ gfs2_assert_withdraw(sdp, reserved_revokes % sdp->sd_inptrs == sdp->sd_ldptrs);
+ if (reserved_revokes > sdp->sd_ldptrs)
+ reserved_blocks += (reserved_revokes - sdp->sd_ldptrs) / sdp->sd_inptrs;
out:
+ if (used_blocks != reserved_blocks) {
+ gfs2_assert_withdraw_delayed(sdp, used_blocks < reserved_blocks);
+ gfs2_log_release(sdp, reserved_blocks - used_blocks);
+ }
up_write(&sdp->sd_log_flush_lock);
gfs2_trans_free(sdp, tr);
if (gfs2_withdrawing(sdp))
gfs2_withdraw(sdp);
+ trace_gfs2_log_flush(sdp, 0, flags);
return;
out_withdraw:
@@ -1087,8 +1176,8 @@ static void gfs2_merge_trans(struct gfs2_sbd *sdp, struct gfs2_trans *new)
old->tr_num_databuf_new += new->tr_num_databuf_new;
old->tr_num_buf_rm += new->tr_num_buf_rm;
old->tr_num_databuf_rm += new->tr_num_databuf_rm;
+ old->tr_revokes += new->tr_revokes;
old->tr_num_revoke += new->tr_num_revoke;
- old->tr_num_revoke_rm += new->tr_num_revoke_rm;
list_splice_tail_init(&new->tr_databuf, &old->tr_databuf);
list_splice_tail_init(&new->tr_buf, &old->tr_buf);
@@ -1110,20 +1199,17 @@ static void log_refund(struct gfs2_sbd *sdp, struct gfs2_trans *tr)
if (sdp->sd_log_tr) {
gfs2_merge_trans(sdp, tr);
} else if (tr->tr_num_buf_new || tr->tr_num_databuf_new) {
- gfs2_assert_withdraw(sdp, test_bit(TR_ALLOCED, &tr->tr_flags));
+ gfs2_assert_withdraw(sdp, !test_bit(TR_ONSTACK, &tr->tr_flags));
sdp->sd_log_tr = tr;
set_bit(TR_ATTACHED, &tr->tr_flags);
}
- sdp->sd_log_committed_revoke += tr->tr_num_revoke - tr->tr_num_revoke_rm;
reserved = calc_reserved(sdp);
maxres = sdp->sd_log_blks_reserved + tr->tr_reserved;
gfs2_assert_withdraw(sdp, maxres >= reserved);
unused = maxres - reserved;
- atomic_add(unused, &sdp->sd_log_blks_free);
- trace_gfs2_log_blocks(sdp, unused);
- gfs2_assert_withdraw(sdp, atomic_read(&sdp->sd_log_blks_free) <=
- sdp->sd_jdesc->jd_blocks);
+ if (unused)
+ gfs2_log_release(sdp, unused);
sdp->sd_log_blks_reserved = reserved;
gfs2_log_unlock(sdp);
@@ -1166,15 +1252,11 @@ static void gfs2_log_shutdown(struct gfs2_sbd *sdp)
gfs2_assert_withdraw(sdp, !sdp->sd_log_num_revoke);
gfs2_assert_withdraw(sdp, list_empty(&sdp->sd_ail1_list));
- sdp->sd_log_flush_head = sdp->sd_log_head;
-
log_write_header(sdp, GFS2_LOG_HEAD_UNMOUNT | GFS2_LFC_SHUTDOWN);
+ log_pull_tail(sdp);
gfs2_assert_warn(sdp, sdp->sd_log_head == sdp->sd_log_tail);
gfs2_assert_warn(sdp, list_empty(&sdp->sd_ail2_list));
-
- sdp->sd_log_head = sdp->sd_log_flush_head;
- sdp->sd_log_tail = sdp->sd_log_head;
}
static inline int gfs2_jrnl_flush_reqd(struct gfs2_sbd *sdp)
@@ -1208,7 +1290,6 @@ int gfs2_logd(void *data)
struct gfs2_sbd *sdp = data;
unsigned long t = 1;
DEFINE_WAIT(wait);
- bool did_flush;
while (!kthread_should_stop()) {
@@ -1227,12 +1308,10 @@ int gfs2_logd(void *data)
continue;
}
- did_flush = false;
if (gfs2_jrnl_flush_reqd(sdp) || t == 0) {
gfs2_ail1_empty(sdp, 0);
gfs2_log_flush(sdp, NULL, GFS2_LOG_HEAD_FLUSH_NORMAL |
- GFS2_LFC_LOGD_JFLUSH_REQD);
- did_flush = true;
+ GFS2_LFC_LOGD_JFLUSH_REQD);
}
if (gfs2_ail_flush_reqd(sdp)) {
@@ -1240,13 +1319,9 @@ int gfs2_logd(void *data)
gfs2_ail1_wait(sdp);
gfs2_ail1_empty(sdp, 0);
gfs2_log_flush(sdp, NULL, GFS2_LOG_HEAD_FLUSH_NORMAL |
- GFS2_LFC_LOGD_AIL_FLUSH_REQD);
- did_flush = true;
+ GFS2_LFC_LOGD_AIL_FLUSH_REQD);
}
- if (!gfs2_ail_flush_reqd(sdp) || did_flush)
- wake_up(&sdp->sd_log_waitq);
-
t = gfs2_tune_get(sdp, gt_logd_secs) * HZ;
try_to_freeze();
diff --git a/fs/gfs2/log.h b/fs/gfs2/log.h
index 79f97290146e..eea58015710e 100644
--- a/fs/gfs2/log.h
+++ b/fs/gfs2/log.h
@@ -13,6 +13,13 @@
#include "incore.h"
#include "inode.h"
+/*
+ * The minimum amount of log space required for a log flush is one block for
+ * revokes and one block for the log header. Log flushes other than
+ * GFS2_LOG_HEAD_FLUSH_NORMAL may write one or two more log headers.
+ */
+#define GFS2_LOG_FLUSH_MIN_BLOCKS 4
+
/**
* gfs2_log_lock - acquire the right to mess with the log manager
* @sdp: the filesystem
@@ -43,7 +50,9 @@ static inline void gfs2_log_pointers_init(struct gfs2_sbd *sdp,
if (++value == sdp->sd_jdesc->jd_blocks) {
value = 0;
}
- sdp->sd_log_head = sdp->sd_log_tail = value;
+ sdp->sd_log_tail = value;
+ sdp->sd_log_flush_tail = value;
+ sdp->sd_log_head = value;
}
static inline void gfs2_ordered_add_inode(struct gfs2_inode *ip)
@@ -64,8 +73,13 @@ static inline void gfs2_ordered_add_inode(struct gfs2_inode *ip)
extern void gfs2_ordered_del_inode(struct gfs2_inode *ip);
extern unsigned int gfs2_struct2blk(struct gfs2_sbd *sdp, unsigned int nstruct);
extern void gfs2_remove_from_ail(struct gfs2_bufdata *bd);
+extern bool gfs2_log_is_empty(struct gfs2_sbd *sdp);
+extern void gfs2_log_release_revokes(struct gfs2_sbd *sdp, unsigned int revokes);
extern void gfs2_log_release(struct gfs2_sbd *sdp, unsigned int blks);
-extern int gfs2_log_reserve(struct gfs2_sbd *sdp, unsigned int blks);
+extern bool gfs2_log_try_reserve(struct gfs2_sbd *sdp, struct gfs2_trans *tr,
+ unsigned int *extra_revokes);
+extern void gfs2_log_reserve(struct gfs2_sbd *sdp, struct gfs2_trans *tr,
+ unsigned int *extra_revokes);
extern void gfs2_write_log_header(struct gfs2_sbd *sdp, struct gfs2_jdesc *jd,
u64 seq, u32 tail, u32 lblock, u32 flags,
int op_flags);
@@ -78,6 +92,6 @@ extern void log_flush_wait(struct gfs2_sbd *sdp);
extern int gfs2_logd(void *data);
extern void gfs2_add_revoke(struct gfs2_sbd *sdp, struct gfs2_bufdata *bd);
extern void gfs2_glock_remove_revoke(struct gfs2_glock *gl);
-extern void gfs2_write_revokes(struct gfs2_sbd *sdp);
+extern void gfs2_flush_revokes(struct gfs2_sbd *sdp);
#endif /* __LOG_DOT_H__ */
diff --git a/fs/gfs2/lops.c b/fs/gfs2/lops.c
index 3922b26264f5..dc1b93a877c6 100644
--- a/fs/gfs2/lops.c
+++ b/fs/gfs2/lops.c
@@ -76,15 +76,20 @@ static void maybe_release_space(struct gfs2_bufdata *bd)
unsigned int index = bd->bd_bh->b_blocknr - gl->gl_name.ln_number;
struct gfs2_bitmap *bi = rgd->rd_bits + index;
+ rgrp_lock_local(rgd);
if (bi->bi_clone == NULL)
- return;
+ goto out;
if (sdp->sd_args.ar_discard)
gfs2_rgrp_send_discards(sdp, rgd->rd_data0, bd->bd_bh, bi, 1, NULL);
memcpy(bi->bi_clone + bi->bi_offset,
bd->bd_bh->b_data + bi->bi_offset, bi->bi_bytes);
clear_bit(GBF_FULL, &bi->bi_flags);
rgd->rd_free_clone = rgd->rd_free;
+ BUG_ON(rgd->rd_free_clone < rgd->rd_reserved);
rgd->rd_extfail_pt = rgd->rd_free;
+
+out:
+ rgrp_unlock_local(rgd);
}
/**
@@ -322,17 +327,18 @@ static struct bio *gfs2_log_get_bio(struct gfs2_sbd *sdp, u64 blkno,
* then add the page segment to that.
*/
-void gfs2_log_write(struct gfs2_sbd *sdp, struct page *page,
- unsigned size, unsigned offset, u64 blkno)
+void gfs2_log_write(struct gfs2_sbd *sdp, struct gfs2_jdesc *jd,
+ struct page *page, unsigned size, unsigned offset,
+ u64 blkno)
{
struct bio *bio;
int ret;
- bio = gfs2_log_get_bio(sdp, blkno, &sdp->sd_log_bio, REQ_OP_WRITE,
+ bio = gfs2_log_get_bio(sdp, blkno, &jd->jd_log_bio, REQ_OP_WRITE,
gfs2_end_log_write, false);
ret = bio_add_page(bio, page, size, offset);
if (ret == 0) {
- bio = gfs2_log_get_bio(sdp, blkno, &sdp->sd_log_bio,
+ bio = gfs2_log_get_bio(sdp, blkno, &jd->jd_log_bio,
REQ_OP_WRITE, gfs2_end_log_write, true);
ret = bio_add_page(bio, page, size, offset);
WARN_ON(ret == 0);
@@ -355,7 +361,8 @@ static void gfs2_log_write_bh(struct gfs2_sbd *sdp, struct buffer_head *bh)
dblock = gfs2_log_bmap(sdp->sd_jdesc, sdp->sd_log_flush_head);
gfs2_log_incr_head(sdp);
- gfs2_log_write(sdp, bh->b_page, bh->b_size, bh_offset(bh), dblock);
+ gfs2_log_write(sdp, sdp->sd_jdesc, bh->b_page, bh->b_size,
+ bh_offset(bh), dblock);
}
/**
@@ -369,14 +376,14 @@ static void gfs2_log_write_bh(struct gfs2_sbd *sdp, struct buffer_head *bh)
* the page may be freed at any time.
*/
-void gfs2_log_write_page(struct gfs2_sbd *sdp, struct page *page)
+static void gfs2_log_write_page(struct gfs2_sbd *sdp, struct page *page)
{
struct super_block *sb = sdp->sd_vfs;
u64 dblock;
dblock = gfs2_log_bmap(sdp->sd_jdesc, sdp->sd_log_flush_head);
gfs2_log_incr_head(sdp);
- gfs2_log_write(sdp, page, sb->s_blocksize, 0, dblock);
+ gfs2_log_write(sdp, sdp->sd_jdesc, page, sb->s_blocksize, 0, dblock);
}
/**
@@ -845,7 +852,7 @@ static void revoke_lo_before_commit(struct gfs2_sbd *sdp, struct gfs2_trans *tr)
struct page *page;
unsigned int length;
- gfs2_write_revokes(sdp);
+ gfs2_flush_revokes(sdp);
if (!sdp->sd_log_num_revoke)
return;
@@ -857,7 +864,6 @@ static void revoke_lo_before_commit(struct gfs2_sbd *sdp, struct gfs2_trans *tr)
sdp->sd_log_num_revoke--;
if (offset + sizeof(u64) > sdp->sd_sb.sb_bsize) {
-
gfs2_log_write_page(sdp, page);
page = mempool_alloc(gfs2_page_pool, GFP_NOIO);
mh = page_address(page);
diff --git a/fs/gfs2/lops.h b/fs/gfs2/lops.h
index fbdbb08dcec6..31b6dd0d2e5d 100644
--- a/fs/gfs2/lops.h
+++ b/fs/gfs2/lops.h
@@ -10,37 +10,24 @@
#include <linux/list.h>
#include "incore.h"
-#define BUF_OFFSET \
- ((sizeof(struct gfs2_log_descriptor) + sizeof(__be64) - 1) & \
- ~(sizeof(__be64) - 1))
-#define DATABUF_OFFSET \
- ((sizeof(struct gfs2_log_descriptor) + (2 * sizeof(__be64) - 1)) & \
- ~(2 * sizeof(__be64) - 1))
-
extern const struct gfs2_log_operations *gfs2_log_ops[];
extern void gfs2_log_incr_head(struct gfs2_sbd *sdp);
extern u64 gfs2_log_bmap(struct gfs2_jdesc *jd, unsigned int lbn);
-extern void gfs2_log_write(struct gfs2_sbd *sdp, struct page *page,
- unsigned size, unsigned offset, u64 blkno);
-extern void gfs2_log_write_page(struct gfs2_sbd *sdp, struct page *page);
+extern void gfs2_log_write(struct gfs2_sbd *sdp, struct gfs2_jdesc *jd,
+ struct page *page, unsigned size, unsigned offset,
+ u64 blkno);
extern void gfs2_log_submit_bio(struct bio **biop, int opf);
extern void gfs2_pin(struct gfs2_sbd *sdp, struct buffer_head *bh);
extern int gfs2_find_jhead(struct gfs2_jdesc *jd,
struct gfs2_log_header_host *head, bool keep_cache);
static inline unsigned int buf_limit(struct gfs2_sbd *sdp)
{
- unsigned int limit;
-
- limit = (sdp->sd_sb.sb_bsize - BUF_OFFSET) / sizeof(__be64);
- return limit;
+ return sdp->sd_ldptrs;
}
static inline unsigned int databuf_limit(struct gfs2_sbd *sdp)
{
- unsigned int limit;
-
- limit = (sdp->sd_sb.sb_bsize - DATABUF_OFFSET) / (2 * sizeof(__be64));
- return limit;
+ return sdp->sd_ldptrs / 2;
}
static inline void lops_before_commit(struct gfs2_sbd *sdp,
diff --git a/fs/gfs2/main.c b/fs/gfs2/main.c
index c7393ee9cf68..28d0eb23e18e 100644
--- a/fs/gfs2/main.c
+++ b/fs/gfs2/main.c
@@ -98,7 +98,7 @@ static int __init init_gfs2_fs(void)
error = -ENOMEM;
gfs2_glock_cachep = kmem_cache_create("gfs2_glock",
sizeof(struct gfs2_glock),
- 0, 0,
+ 0, SLAB_RECLAIM_ACCOUNT,
gfs2_init_glock_once);
if (!gfs2_glock_cachep)
goto fail_cachep1;
@@ -134,7 +134,7 @@ static int __init init_gfs2_fs(void)
gfs2_quotad_cachep = kmem_cache_create("gfs2_quotad",
sizeof(struct gfs2_quota_data),
- 0, 0, NULL);
+ 0, SLAB_RECLAIM_ACCOUNT, NULL);
if (!gfs2_quotad_cachep)
goto fail_cachep6;
diff --git a/fs/gfs2/ops_fstype.c b/fs/gfs2/ops_fstype.c
index 61fce59cb4d3..74c7d01723b9 100644
--- a/fs/gfs2/ops_fstype.c
+++ b/fs/gfs2/ops_fstype.c
@@ -136,8 +136,6 @@ static struct gfs2_sbd *init_sbd(struct super_block *sb)
init_rwsem(&sdp->sd_log_flush_lock);
atomic_set(&sdp->sd_log_in_flight, 0);
- atomic_set(&sdp->sd_reserving_log, 0);
- init_waitqueue_head(&sdp->sd_reserving_log_wait);
init_waitqueue_head(&sdp->sd_log_flush_wait);
atomic_set(&sdp->sd_freeze_state, SFS_UNFROZEN);
mutex_init(&sdp->sd_freeze_mutex);
@@ -171,7 +169,8 @@ static int gfs2_check_sb(struct gfs2_sbd *sdp, int silent)
return -EINVAL;
}
- if (sb->sb_fs_format != GFS2_FORMAT_FS ||
+ if (sb->sb_fs_format < GFS2_FS_FORMAT_MIN ||
+ sb->sb_fs_format > GFS2_FS_FORMAT_MAX ||
sb->sb_multihost_format != GFS2_FORMAT_MULTI) {
fs_warn(sdp, "Unknown on-disk format, unable to mount\n");
return -EINVAL;
@@ -179,7 +178,7 @@ static int gfs2_check_sb(struct gfs2_sbd *sdp, int silent)
if (sb->sb_bsize < 512 || sb->sb_bsize > PAGE_SIZE ||
(sb->sb_bsize & (sb->sb_bsize - 1))) {
- pr_warn("Invalid superblock size\n");
+ pr_warn("Invalid block size\n");
return -EINVAL;
}
@@ -317,6 +316,13 @@ static int gfs2_read_sb(struct gfs2_sbd *sdp, int silent)
sizeof(struct gfs2_meta_header))
* GFS2_NBBY; /* not the rgrp bitmap, subsequent bitmaps only */
+ /*
+ * We always keep at least one block reserved for revokes in
+ * transactions. This greatly simplifies allocating additional
+ * revoke blocks.
+ */
+ atomic_set(&sdp->sd_log_revokes_available, sdp->sd_ldptrs);
+
/* Compute maximum reservation required to add a entry to a directory */
hash_blocks = DIV_ROUND_UP(sizeof(u64) * BIT(GFS2_DIR_MAX_DEPTH),
@@ -488,6 +494,19 @@ static int init_sb(struct gfs2_sbd *sdp, int silent)
goto out;
}
+ switch(sdp->sd_sb.sb_fs_format) {
+ case GFS2_FS_FORMAT_MAX:
+ sb->s_xattr = gfs2_xattr_handlers_max;
+ break;
+
+ case GFS2_FS_FORMAT_MIN:
+ sb->s_xattr = gfs2_xattr_handlers_min;
+ break;
+
+ default:
+ BUG();
+ }
+
/* Set up the buffer cache and SB for real */
if (sdp->sd_sb.sb_bsize < bdev_logical_block_size(sb->s_bdev)) {
ret = -EINVAL;
@@ -1032,13 +1051,14 @@ hostdata_error:
}
if (lm->lm_mount == NULL) {
- fs_info(sdp, "Now mounting FS...\n");
+ fs_info(sdp, "Now mounting FS (format %u)...\n", sdp->sd_sb.sb_fs_format);
complete_all(&sdp->sd_locking_init);
return 0;
}
ret = lm->lm_mount(sdp, table);
if (ret == 0)
- fs_info(sdp, "Joined cluster. Now mounting FS...\n");
+ fs_info(sdp, "Joined cluster. Now mounting FS (format %u)...\n",
+ sdp->sd_sb.sb_fs_format);
complete_all(&sdp->sd_locking_init);
return ret;
}
@@ -1084,6 +1104,7 @@ static int gfs2_fill_super(struct super_block *sb, struct fs_context *fc)
int silent = fc->sb_flags & SB_SILENT;
struct gfs2_sbd *sdp;
struct gfs2_holder mount_gh;
+ struct gfs2_holder freeze_gh;
int error;
sdp = init_sbd(sb);
@@ -1107,7 +1128,6 @@ static int gfs2_fill_super(struct super_block *sb, struct fs_context *fc)
sb->s_op = &gfs2_super_ops;
sb->s_d_op = &gfs2_dops;
sb->s_export_op = &gfs2_export_ops;
- sb->s_xattr = gfs2_xattr_handlers;
sb->s_qcop = &gfs2_quotactl_ops;
sb->s_quota_types = QTYPE_MASK_USR | QTYPE_MASK_GRP;
sb_dqopt(sb)->flags |= DQUOT_QUOTA_SYS_FILE;
@@ -1156,6 +1176,10 @@ static int gfs2_fill_super(struct super_block *sb, struct fs_context *fc)
if (error)
goto fail_locking;
+ /* Turn rgrplvb on by default if fs format is recent enough */
+ if (!sdp->sd_args.ar_got_rgrplvb && sdp->sd_sb.sb_fs_format > 1801)
+ sdp->sd_args.ar_rgrplvb = 1;
+
error = wait_on_journal(sdp);
if (error)
goto fail_sb;
@@ -1195,25 +1219,18 @@ static int gfs2_fill_super(struct super_block *sb, struct fs_context *fc)
goto fail_per_node;
}
- if (sb_rdonly(sb)) {
- struct gfs2_holder freeze_gh;
+ error = gfs2_freeze_lock(sdp, &freeze_gh, 0);
+ if (error)
+ goto fail_per_node;
- error = gfs2_glock_nq_init(sdp->sd_freeze_gl, LM_ST_SHARED,
- LM_FLAG_NOEXP | GL_EXACT,
- &freeze_gh);
- if (error) {
- fs_err(sdp, "can't make FS RO: %d\n", error);
- goto fail_per_node;
- }
- gfs2_glock_dq_uninit(&freeze_gh);
- } else {
+ if (!sb_rdonly(sb))
error = gfs2_make_fs_rw(sdp);
- if (error) {
- fs_err(sdp, "can't make FS RW: %d\n", error);
- goto fail_per_node;
- }
- }
+ gfs2_freeze_unlock(&freeze_gh);
+ if (error) {
+ fs_err(sdp, "can't make FS RW: %d\n", error);
+ goto fail_per_node;
+ }
gfs2_glock_dq_uninit(&mount_gh);
gfs2_online_uevent(sdp);
return 0;
@@ -1456,6 +1473,7 @@ static int gfs2_parse_param(struct fs_context *fc, struct fs_parameter *param)
break;
case Opt_rgrplvb:
args->ar_rgrplvb = result.boolean;
+ args->ar_got_rgrplvb = 1;
break;
case Opt_loccookie:
args->ar_loccookie = result.boolean;
@@ -1514,6 +1532,12 @@ static int gfs2_reconfigure(struct fs_context *fc)
fc->sb_flags |= SB_RDONLY;
if ((sb->s_flags ^ fc->sb_flags) & SB_RDONLY) {
+ struct gfs2_holder freeze_gh;
+
+ error = gfs2_freeze_lock(sdp, &freeze_gh, 0);
+ if (error)
+ return -EINVAL;
+
if (fc->sb_flags & SB_RDONLY) {
error = gfs2_make_fs_ro(sdp);
if (error)
@@ -1523,6 +1547,7 @@ static int gfs2_reconfigure(struct fs_context *fc)
if (error)
errorfc(fc, "unable to remount read-write");
}
+ gfs2_freeze_unlock(&freeze_gh);
}
sdp->sd_args = *newargs;
diff --git a/fs/gfs2/recovery.c b/fs/gfs2/recovery.c
index c26c68ebd29d..282173774005 100644
--- a/fs/gfs2/recovery.c
+++ b/fs/gfs2/recovery.c
@@ -470,9 +470,7 @@ void gfs2_recover_func(struct work_struct *work)
/* Acquire a shared hold on the freeze lock */
- error = gfs2_glock_nq_init(sdp->sd_freeze_gl, LM_ST_SHARED,
- LM_FLAG_NOEXP | LM_FLAG_PRIORITY |
- GL_EXACT, &thaw_gh);
+ error = gfs2_freeze_lock(sdp, &thaw_gh, LM_FLAG_PRIORITY);
if (error)
goto fail_gunlock_ji;
@@ -507,22 +505,24 @@ void gfs2_recover_func(struct work_struct *work)
/* We take the sd_log_flush_lock here primarily to prevent log
* flushes and simultaneous journal replays from stomping on
- * each other wrt sd_log_bio. */
+ * each other wrt jd_log_bio. */
down_read(&sdp->sd_log_flush_lock);
for (pass = 0; pass < 2; pass++) {
lops_before_scan(jd, &head, pass);
error = foreach_descriptor(jd, head.lh_tail,
head.lh_blkno, pass);
lops_after_scan(jd, error, pass);
- if (error)
+ if (error) {
+ up_read(&sdp->sd_log_flush_lock);
goto fail_gunlock_thaw;
+ }
}
recover_local_statfs(jd, &head);
clean_journal(jd, &head);
up_read(&sdp->sd_log_flush_lock);
- gfs2_glock_dq_uninit(&thaw_gh);
+ gfs2_freeze_unlock(&thaw_gh);
t_rep = ktime_get();
fs_info(sdp, "jid=%u: Journal replayed in %lldms [jlck:%lldms, "
"jhead:%lldms, tlck:%lldms, replay:%lldms]\n",
@@ -544,7 +544,7 @@ void gfs2_recover_func(struct work_struct *work)
goto done;
fail_gunlock_thaw:
- gfs2_glock_dq_uninit(&thaw_gh);
+ gfs2_freeze_unlock(&thaw_gh);
fail_gunlock_ji:
if (jlocked) {
gfs2_glock_dq_uninit(&ji_gh);
diff --git a/fs/gfs2/rgrp.c b/fs/gfs2/rgrp.c
index 5e8eef9990e3..89c37a845e64 100644
--- a/fs/gfs2/rgrp.c
+++ b/fs/gfs2/rgrp.c
@@ -36,6 +36,24 @@
#define BFITNOENT ((u32)~0)
#define NO_BLOCK ((u64)~0)
+struct gfs2_rbm {
+ struct gfs2_rgrpd *rgd;
+ u32 offset; /* The offset is bitmap relative */
+ int bii; /* Bitmap index */
+};
+
+static inline struct gfs2_bitmap *rbm_bi(const struct gfs2_rbm *rbm)
+{
+ return rbm->rgd->rd_bits + rbm->bii;
+}
+
+static inline u64 gfs2_rbm_to_block(const struct gfs2_rbm *rbm)
+{
+ BUG_ON(rbm->offset >= rbm->rgd->rd_data);
+ return rbm->rgd->rd_data0 + (rbm_bi(rbm)->bi_start * GFS2_NBBY) +
+ rbm->offset;
+}
+
/*
* These routines are used by the resource group routines (rgrp.c)
* to keep track of block allocation. Each block is represented by two
@@ -61,7 +79,7 @@ static const char valid_change[16] = {
};
static int gfs2_rbm_find(struct gfs2_rbm *rbm, u8 state, u32 *minext,
- const struct gfs2_inode *ip, bool nowrap);
+ struct gfs2_blkreserv *rs, bool nowrap);
/**
@@ -175,7 +193,7 @@ static inline u64 gfs2_bit_search(const __le64 *ptr, u64 mask, u8 state)
/**
* rs_cmp - multi-block reservation range compare
- * @blk: absolute file system block number of the new reservation
+ * @start: start of the new reservation
* @len: number of blocks in the new reservation
* @rs: existing reservation to compare against
*
@@ -183,13 +201,11 @@ static inline u64 gfs2_bit_search(const __le64 *ptr, u64 mask, u8 state)
* -1 if the block range is before the start of the reservation
* 0 if the block range overlaps with the reservation
*/
-static inline int rs_cmp(u64 blk, u32 len, struct gfs2_blkreserv *rs)
+static inline int rs_cmp(u64 start, u32 len, struct gfs2_blkreserv *rs)
{
- u64 startblk = gfs2_rbm_to_block(&rs->rs_rbm);
-
- if (blk >= startblk + rs->rs_free)
+ if (start >= rs->rs_start + rs->rs_requested)
return 1;
- if (blk + len - 1 < startblk)
+ if (rs->rs_start >= start + len)
return -1;
return 0;
}
@@ -277,29 +293,38 @@ static int gfs2_rbm_from_block(struct gfs2_rbm *rbm, u64 block)
}
/**
- * gfs2_rbm_incr - increment an rbm structure
+ * gfs2_rbm_add - add a number of blocks to an rbm
* @rbm: The rbm with rgd already set correctly
+ * @blocks: The number of blocks to add to rpm
*
- * This function takes an existing rbm structure and increments it to the next
- * viable block offset.
- *
- * Returns: If incrementing the offset would cause the rbm to go past the
- * end of the rgrp, true is returned, otherwise false.
+ * This function takes an existing rbm structure and adds a number of blocks to
+ * it.
*
+ * Returns: True if the new rbm would point past the end of the rgrp.
*/
-static bool gfs2_rbm_incr(struct gfs2_rbm *rbm)
+static bool gfs2_rbm_add(struct gfs2_rbm *rbm, u32 blocks)
{
- if (rbm->offset + 1 < rbm_bi(rbm)->bi_blocks) { /* in the same bitmap */
- rbm->offset++;
+ struct gfs2_rgrpd *rgd = rbm->rgd;
+ struct gfs2_bitmap *bi = rgd->rd_bits + rbm->bii;
+
+ if (rbm->offset + blocks < bi->bi_blocks) {
+ rbm->offset += blocks;
return false;
}
- if (rbm->bii == rbm->rgd->rd_length - 1) /* at the last bitmap */
- return true;
+ blocks -= bi->bi_blocks - rbm->offset;
- rbm->offset = 0;
- rbm->bii++;
- return false;
+ for(;;) {
+ bi++;
+ if (bi == rgd->rd_bits + rgd->rd_length)
+ return true;
+ if (blocks < bi->bi_blocks) {
+ rbm->offset = blocks;
+ rbm->bii = bi - rgd->rd_bits;
+ return false;
+ }
+ blocks -= bi->bi_blocks;
+ }
}
/**
@@ -308,7 +333,8 @@ static bool gfs2_rbm_incr(struct gfs2_rbm *rbm)
* @n_unaligned: Number of unaligned blocks to check
* @len: Decremented for each block found (terminate on zero)
*
- * Returns: true if a non-free block is encountered
+ * Returns: true if a non-free block is encountered or the end of the resource
+ * group is reached.
*/
static bool gfs2_unaligned_extlen(struct gfs2_rbm *rbm, u32 n_unaligned, u32 *len)
@@ -323,7 +349,7 @@ static bool gfs2_unaligned_extlen(struct gfs2_rbm *rbm, u32 n_unaligned, u32 *le
(*len)--;
if (*len == 0)
return true;
- if (gfs2_rbm_incr(rbm))
+ if (gfs2_rbm_add(rbm, 1))
return true;
}
@@ -595,10 +621,11 @@ static void dump_rs(struct seq_file *seq, const struct gfs2_blkreserv *rs,
{
struct gfs2_inode *ip = container_of(rs, struct gfs2_inode, i_res);
- gfs2_print_dbg(seq, "%s B: n:%llu s:%llu b:%u f:%u\n", fs_id_buf,
+ gfs2_print_dbg(seq, "%s B: n:%llu s:%llu f:%u\n",
+ fs_id_buf,
(unsigned long long)ip->i_no_addr,
- (unsigned long long)gfs2_rbm_to_block(&rs->rs_rbm),
- rs->rs_rbm.offset, rs->rs_free);
+ (unsigned long long)rs->rs_start,
+ rs->rs_requested);
}
/**
@@ -613,33 +640,22 @@ static void __rs_deltree(struct gfs2_blkreserv *rs)
if (!gfs2_rs_active(rs))
return;
- rgd = rs->rs_rbm.rgd;
+ rgd = rs->rs_rgd;
trace_gfs2_rs(rs, TRACE_RS_TREEDEL);
rb_erase(&rs->rs_node, &rgd->rd_rstree);
RB_CLEAR_NODE(&rs->rs_node);
- if (rs->rs_free) {
- u64 last_block = gfs2_rbm_to_block(&rs->rs_rbm) +
- rs->rs_free - 1;
- struct gfs2_rbm last_rbm = { .rgd = rs->rs_rbm.rgd, };
- struct gfs2_bitmap *start, *last;
+ if (rs->rs_requested) {
+ /* return requested blocks to the rgrp */
+ BUG_ON(rs->rs_rgd->rd_requested < rs->rs_requested);
+ rs->rs_rgd->rd_requested -= rs->rs_requested;
- /* return reserved blocks to the rgrp */
- BUG_ON(rs->rs_rbm.rgd->rd_reserved < rs->rs_free);
- rs->rs_rbm.rgd->rd_reserved -= rs->rs_free;
/* The rgrp extent failure point is likely not to increase;
it will only do so if the freed blocks are somehow
contiguous with a span of free blocks that follows. Still,
it will force the number to be recalculated later. */
- rgd->rd_extfail_pt += rs->rs_free;
- rs->rs_free = 0;
- if (gfs2_rbm_from_block(&last_rbm, last_block))
- return;
- start = rbm_bi(&rs->rs_rbm);
- last = rbm_bi(&last_rbm);
- do
- clear_bit(GBF_FULL, &start->bi_flags);
- while (start++ != last);
+ rgd->rd_extfail_pt += rs->rs_requested;
+ rs->rs_requested = 0;
}
}
@@ -652,11 +668,11 @@ void gfs2_rs_deltree(struct gfs2_blkreserv *rs)
{
struct gfs2_rgrpd *rgd;
- rgd = rs->rs_rbm.rgd;
+ rgd = rs->rs_rgd;
if (rgd) {
spin_lock(&rgd->rd_rsspin);
__rs_deltree(rs);
- BUG_ON(rs->rs_free);
+ BUG_ON(rs->rs_requested);
spin_unlock(&rgd->rd_rsspin);
}
}
@@ -904,6 +920,7 @@ static int read_rindex_entry(struct gfs2_inode *ip)
rgd->rd_data = be32_to_cpu(buf.ri_data);
rgd->rd_bitbytes = be32_to_cpu(buf.ri_bitbytes);
spin_lock_init(&rgd->rd_rsspin);
+ mutex_init(&rgd->rd_mutex);
error = compute_bitstructs(rgd);
if (error)
@@ -1149,6 +1166,23 @@ static u32 count_unlinked(struct gfs2_rgrpd *rgd)
return count;
}
+static void rgrp_set_bitmap_flags(struct gfs2_rgrpd *rgd)
+{
+ struct gfs2_bitmap *bi;
+ int x;
+
+ if (rgd->rd_free) {
+ for (x = 0; x < rgd->rd_length; x++) {
+ bi = rgd->rd_bits + x;
+ clear_bit(GBF_FULL, &bi->bi_flags);
+ }
+ } else {
+ for (x = 0; x < rgd->rd_length; x++) {
+ bi = rgd->rd_bits + x;
+ set_bit(GBF_FULL, &bi->bi_flags);
+ }
+ }
+}
/**
* gfs2_rgrp_bh_get - Read in a RG's header and bitmaps
@@ -1192,11 +1226,11 @@ static int gfs2_rgrp_bh_get(struct gfs2_rgrpd *rgd)
}
if (!(rgd->rd_flags & GFS2_RDF_UPTODATE)) {
- for (x = 0; x < length; x++)
- clear_bit(GBF_FULL, &rgd->rd_bits[x].bi_flags);
gfs2_rgrp_in(rgd, (rgd->rd_bits[0].bi_bh)->b_data);
+ rgrp_set_bitmap_flags(rgd);
rgd->rd_flags |= (GFS2_RDF_UPTODATE | GFS2_RDF_CHECK);
rgd->rd_free_clone = rgd->rd_free;
+ BUG_ON(rgd->rd_reserved);
/* max out the rgrp allocation failure point */
rgd->rd_extfail_pt = rgd->rd_free;
}
@@ -1244,7 +1278,11 @@ static int update_rgrp_lvb(struct gfs2_rgrpd *rgd)
if (rgd->rd_rgl->rl_unlinked == 0)
rgd->rd_flags &= ~GFS2_RDF_CHECK;
rgd->rd_free = be32_to_cpu(rgd->rd_rgl->rl_free);
+ rgrp_set_bitmap_flags(rgd);
rgd->rd_free_clone = rgd->rd_free;
+ BUG_ON(rgd->rd_reserved);
+ /* max out the rgrp allocation failure point */
+ rgd->rd_extfail_pt = rgd->rd_free;
rgd->rd_dinodes = be32_to_cpu(rgd->rd_rgl->rl_dinodes);
rgd->rd_igeneration = be64_to_cpu(rgd->rd_rgl->rl_igeneration);
return 0;
@@ -1404,7 +1442,8 @@ int gfs2_fitrim(struct file *filp, void __user *argp)
while (1) {
- ret = gfs2_glock_nq_init(rgd->rd_gl, LM_ST_EXCLUSIVE, 0, &gh);
+ ret = gfs2_glock_nq_init(rgd->rd_gl, LM_ST_EXCLUSIVE,
+ LM_FLAG_NODE_SCOPE, &gh);
if (ret)
goto out;
@@ -1412,9 +1451,11 @@ int gfs2_fitrim(struct file *filp, void __user *argp)
/* Trim each bitmap in the rgrp */
for (x = 0; x < rgd->rd_length; x++) {
struct gfs2_bitmap *bi = rgd->rd_bits + x;
+ rgrp_lock_local(rgd);
ret = gfs2_rgrp_send_discards(sdp,
rgd->rd_data0, NULL, bi, minlen,
&amt);
+ rgrp_unlock_local(rgd);
if (ret) {
gfs2_glock_dq_uninit(&gh);
goto out;
@@ -1426,9 +1467,11 @@ int gfs2_fitrim(struct file *filp, void __user *argp)
ret = gfs2_trans_begin(sdp, RES_RG_HDR, 0);
if (ret == 0) {
bh = rgd->rd_bits[0].bi_bh;
+ rgrp_lock_local(rgd);
rgd->rd_flags |= GFS2_RGF_TRIMMED;
gfs2_trans_add_meta(rgd->rd_gl, bh);
gfs2_rgrp_out(rgd, bh->b_data);
+ rgrp_unlock_local(rgd);
gfs2_trans_end(sdp);
}
}
@@ -1458,8 +1501,7 @@ static void rs_insert(struct gfs2_inode *ip)
struct rb_node **newn, *parent = NULL;
int rc;
struct gfs2_blkreserv *rs = &ip->i_res;
- struct gfs2_rgrpd *rgd = rs->rs_rbm.rgd;
- u64 fsblock = gfs2_rbm_to_block(&rs->rs_rbm);
+ struct gfs2_rgrpd *rgd = rs->rs_rgd;
BUG_ON(gfs2_rs_active(rs));
@@ -1470,7 +1512,7 @@ static void rs_insert(struct gfs2_inode *ip)
rb_entry(*newn, struct gfs2_blkreserv, rs_node);
parent = *newn;
- rc = rs_cmp(fsblock, rs->rs_free, cur);
+ rc = rs_cmp(rs->rs_start, rs->rs_requested, cur);
if (rc > 0)
newn = &((*newn)->rb_right);
else if (rc < 0)
@@ -1486,7 +1528,7 @@ static void rs_insert(struct gfs2_inode *ip)
rb_insert_color(&rs->rs_node, &rgd->rd_rstree);
/* Do our rgrp accounting for the reservation */
- rgd->rd_reserved += rs->rs_free; /* blocks reserved */
+ rgd->rd_requested += rs->rs_requested; /* blocks requested */
spin_unlock(&rgd->rd_rsspin);
trace_gfs2_rs(rs, TRACE_RS_INSERT);
}
@@ -1507,9 +1549,9 @@ static inline u32 rgd_free(struct gfs2_rgrpd *rgd, struct gfs2_blkreserv *rs)
{
u32 tot_reserved, tot_free;
- if (WARN_ON_ONCE(rgd->rd_reserved < rs->rs_free))
+ if (WARN_ON_ONCE(rgd->rd_requested < rs->rs_requested))
return 0;
- tot_reserved = rgd->rd_reserved - rs->rs_free;
+ tot_reserved = rgd->rd_requested - rs->rs_requested;
if (rgd->rd_free_clone < tot_reserved)
tot_reserved = 0;
@@ -1534,17 +1576,26 @@ static void rg_mblk_search(struct gfs2_rgrpd *rgd, struct gfs2_inode *ip,
u64 goal;
struct gfs2_blkreserv *rs = &ip->i_res;
u32 extlen;
- u32 free_blocks = rgd_free(rgd, rs);
+ u32 free_blocks, blocks_available;
int ret;
struct inode *inode = &ip->i_inode;
+ spin_lock(&rgd->rd_rsspin);
+ free_blocks = rgd_free(rgd, rs);
+ if (rgd->rd_free_clone < rgd->rd_requested)
+ free_blocks = 0;
+ blocks_available = rgd->rd_free_clone - rgd->rd_reserved;
+ if (rgd == rs->rs_rgd)
+ blocks_available += rs->rs_reserved;
+ spin_unlock(&rgd->rd_rsspin);
+
if (S_ISDIR(inode->i_mode))
extlen = 1;
else {
extlen = max_t(u32, atomic_read(&ip->i_sizehint), ap->target);
extlen = clamp(extlen, (u32)RGRP_RSRV_MINBLKS, free_blocks);
}
- if ((rgd->rd_free_clone < rgd->rd_reserved) || (free_blocks < extlen))
+ if (free_blocks < extlen || blocks_available < extlen)
return;
/* Find bitmap block that contains bits for goal block */
@@ -1556,10 +1607,10 @@ static void rg_mblk_search(struct gfs2_rgrpd *rgd, struct gfs2_inode *ip,
if (WARN_ON(gfs2_rbm_from_block(&rbm, goal)))
return;
- ret = gfs2_rbm_find(&rbm, GFS2_BLKST_FREE, &extlen, ip, true);
+ ret = gfs2_rbm_find(&rbm, GFS2_BLKST_FREE, &extlen, &ip->i_res, true);
if (ret == 0) {
- rs->rs_rbm = rbm;
- rs->rs_free = extlen;
+ rs->rs_start = gfs2_rbm_to_block(&rbm);
+ rs->rs_requested = extlen;
rs_insert(ip);
} else {
if (goal == rgd->rd_last_alloc + rgd->rd_data0)
@@ -1572,7 +1623,7 @@ static void rg_mblk_search(struct gfs2_rgrpd *rgd, struct gfs2_inode *ip,
* @rgd: The resource group
* @block: The starting block
* @length: The required length
- * @ip: Ignore any reservations for this inode
+ * @ignore_rs: Reservation to ignore
*
* If the block does not appear in any reservation, then return the
* block number unchanged. If it does appear in the reservation, then
@@ -1582,7 +1633,7 @@ static void rg_mblk_search(struct gfs2_rgrpd *rgd, struct gfs2_inode *ip,
static u64 gfs2_next_unreserved_block(struct gfs2_rgrpd *rgd, u64 block,
u32 length,
- const struct gfs2_inode *ip)
+ struct gfs2_blkreserv *ignore_rs)
{
struct gfs2_blkreserv *rs;
struct rb_node *n;
@@ -1602,8 +1653,8 @@ static u64 gfs2_next_unreserved_block(struct gfs2_rgrpd *rgd, u64 block,
}
if (n) {
- while ((rs_cmp(block, length, rs) == 0) && (&ip->i_res != rs)) {
- block = gfs2_rbm_to_block(&rs->rs_rbm) + rs->rs_free;
+ while (rs_cmp(block, length, rs) == 0 && rs != ignore_rs) {
+ block = rs->rs_start + rs->rs_requested;
n = n->rb_right;
if (n == NULL)
break;
@@ -1618,7 +1669,7 @@ static u64 gfs2_next_unreserved_block(struct gfs2_rgrpd *rgd, u64 block,
/**
* gfs2_reservation_check_and_update - Check for reservations during block alloc
* @rbm: The current position in the resource group
- * @ip: The inode for which we are searching for blocks
+ * @rs: Our own reservation
* @minext: The minimum extent length
* @maxext: A pointer to the maximum extent structure
*
@@ -1632,20 +1683,19 @@ static u64 gfs2_next_unreserved_block(struct gfs2_rgrpd *rgd, u64 block,
*/
static int gfs2_reservation_check_and_update(struct gfs2_rbm *rbm,
- const struct gfs2_inode *ip,
+ struct gfs2_blkreserv *rs,
u32 minext,
struct gfs2_extent *maxext)
{
u64 block = gfs2_rbm_to_block(rbm);
u32 extlen = 1;
u64 nblock;
- int ret;
/*
* If we have a minimum extent length, then skip over any extent
* which is less than the min extent length in size.
*/
- if (minext) {
+ if (minext > 1) {
extlen = gfs2_free_extlen(rbm, minext);
if (extlen <= maxext->len)
goto fail;
@@ -1655,7 +1705,7 @@ static int gfs2_reservation_check_and_update(struct gfs2_rbm *rbm,
* Check the extent which has been found against the reservations
* and skip if parts of it are already reserved
*/
- nblock = gfs2_next_unreserved_block(rbm->rgd, block, extlen, ip);
+ nblock = gfs2_next_unreserved_block(rbm->rgd, block, extlen, rs);
if (nblock == block) {
if (!minext || extlen >= minext)
return 0;
@@ -1664,12 +1714,15 @@ static int gfs2_reservation_check_and_update(struct gfs2_rbm *rbm,
maxext->len = extlen;
maxext->rbm = *rbm;
}
-fail:
- nblock = block + extlen;
+ } else {
+ u64 len = nblock - block;
+ if (len >= (u64)1 << 32)
+ return -E2BIG;
+ extlen = len;
}
- ret = gfs2_rbm_from_block(rbm, nblock);
- if (ret < 0)
- return ret;
+fail:
+ if (gfs2_rbm_add(rbm, extlen))
+ return -E2BIG;
return 1;
}
@@ -1677,9 +1730,9 @@ fail:
* gfs2_rbm_find - Look for blocks of a particular state
* @rbm: Value/result starting position and final position
* @state: The state which we want to find
- * @minext: Pointer to the requested extent length (NULL for a single block)
+ * @minext: Pointer to the requested extent length
* This is updated to be the actual reservation size.
- * @ip: If set, check for reservations
+ * @rs: Our own reservation (NULL to skip checking for reservations)
* @nowrap: Stop looking at the end of the rgrp, rather than wrapping
* around until we've reached the starting point.
*
@@ -1693,7 +1746,7 @@ fail:
*/
static int gfs2_rbm_find(struct gfs2_rbm *rbm, u8 state, u32 *minext,
- const struct gfs2_inode *ip, bool nowrap)
+ struct gfs2_blkreserv *rs, bool nowrap)
{
bool scan_from_start = rbm->bii == 0 && rbm->offset == 0;
struct buffer_head *bh;
@@ -1714,8 +1767,7 @@ static int gfs2_rbm_find(struct gfs2_rbm *rbm, u8 state, u32 *minext,
while(1) {
bi = rbm_bi(rbm);
- if ((ip == NULL || !gfs2_rs_active(&ip->i_res)) &&
- test_bit(GBF_FULL, &bi->bi_flags) &&
+ if (test_bit(GBF_FULL, &bi->bi_flags) &&
(state == GFS2_BLKST_FREE))
goto next_bitmap;
@@ -1731,11 +1783,10 @@ static int gfs2_rbm_find(struct gfs2_rbm *rbm, u8 state, u32 *minext,
goto next_bitmap;
}
rbm->offset = offset;
- if (ip == NULL)
+ if (!rs)
return 0;
- ret = gfs2_reservation_check_and_update(rbm, ip,
- minext ? *minext : 0,
+ ret = gfs2_reservation_check_and_update(rbm, rs, *minext,
&maxext);
if (ret == 0)
return 0;
@@ -1767,7 +1818,7 @@ next_iter:
break;
}
- if (minext == NULL || state != GFS2_BLKST_FREE)
+ if (state != GFS2_BLKST_FREE)
return -ENOSPC;
/* If the extent was too small, and it's smaller than the smallest
@@ -1775,7 +1826,7 @@ next_iter:
useless to search this rgrp again for this amount or more. */
if (wrapped && (scan_from_start || rbm->bii > last_bii) &&
*minext < rbm->rgd->rd_extfail_pt)
- rbm->rgd->rd_extfail_pt = *minext;
+ rbm->rgd->rd_extfail_pt = *minext - 1;
/* If the maximum extent we found is big enough to fulfill the
minimum requirements, use it anyway. */
@@ -1938,7 +1989,7 @@ static bool gfs2_rgrp_used_recently(const struct gfs2_blkreserv *rs,
u64 tdiff;
tdiff = ktime_to_ns(ktime_sub(ktime_get_real(),
- rs->rs_rbm.rgd->rd_gl->gl_dstamp));
+ rs->rs_rgd->rd_gl->gl_dstamp));
return tdiff > (msecs * 1000 * 1000);
}
@@ -1993,8 +2044,7 @@ static inline int fast_to_acquire(struct gfs2_rgrpd *rgd)
* We try our best to find an rgrp that has at least ap->target blocks
* available. After a couple of passes (loops == 2), the prospects of finding
* such an rgrp diminish. At this stage, we return the first rgrp that has
- * at least ap->min_target blocks available. Either way, we set ap->allowed to
- * the number of blocks available in the chosen rgrp.
+ * at least ap->min_target blocks available.
*
* Returns: 0 on success,
* -ENOMEM if a suitable rgrp can't be found
@@ -2006,56 +2056,64 @@ int gfs2_inplace_reserve(struct gfs2_inode *ip, struct gfs2_alloc_parms *ap)
struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode);
struct gfs2_rgrpd *begin = NULL;
struct gfs2_blkreserv *rs = &ip->i_res;
- int error = 0, rg_locked, flags = 0;
+ int error = 0, flags = LM_FLAG_NODE_SCOPE;
+ bool rg_locked;
u64 last_unlinked = NO_BLOCK;
+ u32 target = ap->target;
int loops = 0;
- u32 free_blocks, skip = 0;
+ u32 free_blocks, blocks_available, skip = 0;
+
+ BUG_ON(rs->rs_reserved);
if (sdp->sd_args.ar_rgrplvb)
flags |= GL_SKIP;
- if (gfs2_assert_warn(sdp, ap->target))
+ if (gfs2_assert_warn(sdp, target))
return -EINVAL;
if (gfs2_rs_active(rs)) {
- begin = rs->rs_rbm.rgd;
- } else if (rs->rs_rbm.rgd &&
- rgrp_contains_block(rs->rs_rbm.rgd, ip->i_goal)) {
- begin = rs->rs_rbm.rgd;
+ begin = rs->rs_rgd;
+ } else if (rs->rs_rgd &&
+ rgrp_contains_block(rs->rs_rgd, ip->i_goal)) {
+ begin = rs->rs_rgd;
} else {
check_and_update_goal(ip);
- rs->rs_rbm.rgd = begin = gfs2_blk2rgrpd(sdp, ip->i_goal, 1);
+ rs->rs_rgd = begin = gfs2_blk2rgrpd(sdp, ip->i_goal, 1);
}
if (S_ISDIR(ip->i_inode.i_mode) && (ap->aflags & GFS2_AF_ORLOV))
skip = gfs2_orlov_skip(ip);
- if (rs->rs_rbm.rgd == NULL)
+ if (rs->rs_rgd == NULL)
return -EBADSLT;
while (loops < 3) {
- rg_locked = 1;
+ struct gfs2_rgrpd *rgd;
- if (!gfs2_glock_is_locked_by_me(rs->rs_rbm.rgd->rd_gl)) {
- rg_locked = 0;
+ rg_locked = gfs2_glock_is_locked_by_me(rs->rs_rgd->rd_gl);
+ if (rg_locked) {
+ rgrp_lock_local(rs->rs_rgd);
+ } else {
if (skip && skip--)
goto next_rgrp;
if (!gfs2_rs_active(rs)) {
if (loops == 0 &&
- !fast_to_acquire(rs->rs_rbm.rgd))
+ !fast_to_acquire(rs->rs_rgd))
goto next_rgrp;
if ((loops < 2) &&
gfs2_rgrp_used_recently(rs, 1000) &&
- gfs2_rgrp_congested(rs->rs_rbm.rgd, loops))
+ gfs2_rgrp_congested(rs->rs_rgd, loops))
goto next_rgrp;
}
- error = gfs2_glock_nq_init(rs->rs_rbm.rgd->rd_gl,
+ error = gfs2_glock_nq_init(rs->rs_rgd->rd_gl,
LM_ST_EXCLUSIVE, flags,
&ip->i_rgd_gh);
if (unlikely(error))
return error;
+ rgrp_lock_local(rs->rs_rgd);
if (!gfs2_rs_active(rs) && (loops < 2) &&
- gfs2_rgrp_congested(rs->rs_rbm.rgd, loops))
+ gfs2_rgrp_congested(rs->rs_rgd, loops))
goto skip_rgrp;
if (sdp->sd_args.ar_rgrplvb) {
- error = update_rgrp_lvb(rs->rs_rbm.rgd);
+ error = update_rgrp_lvb(rs->rs_rgd);
if (unlikely(error)) {
+ rgrp_unlock_local(rs->rs_rgd);
gfs2_glock_dq_uninit(&ip->i_rgd_gh);
return error;
}
@@ -2063,36 +2121,46 @@ int gfs2_inplace_reserve(struct gfs2_inode *ip, struct gfs2_alloc_parms *ap)
}
/* Skip unusable resource groups */
- if ((rs->rs_rbm.rgd->rd_flags & (GFS2_RGF_NOALLOC |
+ if ((rs->rs_rgd->rd_flags & (GFS2_RGF_NOALLOC |
GFS2_RDF_ERROR)) ||
- (loops == 0 && ap->target > rs->rs_rbm.rgd->rd_extfail_pt))
+ (loops == 0 && target > rs->rs_rgd->rd_extfail_pt))
goto skip_rgrp;
if (sdp->sd_args.ar_rgrplvb)
- gfs2_rgrp_bh_get(rs->rs_rbm.rgd);
+ gfs2_rgrp_bh_get(rs->rs_rgd);
/* Get a reservation if we don't already have one */
if (!gfs2_rs_active(rs))
- rg_mblk_search(rs->rs_rbm.rgd, ip, ap);
+ rg_mblk_search(rs->rs_rgd, ip, ap);
/* Skip rgrps when we can't get a reservation on first pass */
if (!gfs2_rs_active(rs) && (loops < 1))
goto check_rgrp;
/* If rgrp has enough free space, use it */
- free_blocks = rgd_free(rs->rs_rbm.rgd, rs);
- if (free_blocks >= ap->target ||
- (loops == 2 && ap->min_target &&
- free_blocks >= ap->min_target)) {
- ap->allowed = free_blocks;
- return 0;
+ rgd = rs->rs_rgd;
+ spin_lock(&rgd->rd_rsspin);
+ free_blocks = rgd_free(rgd, rs);
+ blocks_available = rgd->rd_free_clone - rgd->rd_reserved;
+ if (free_blocks < target || blocks_available < target) {
+ spin_unlock(&rgd->rd_rsspin);
+ goto check_rgrp;
}
+ rs->rs_reserved = ap->target;
+ if (rs->rs_reserved > blocks_available)
+ rs->rs_reserved = blocks_available;
+ rgd->rd_reserved += rs->rs_reserved;
+ spin_unlock(&rgd->rd_rsspin);
+ rgrp_unlock_local(rs->rs_rgd);
+ return 0;
check_rgrp:
/* Check for unlinked inodes which can be reclaimed */
- if (rs->rs_rbm.rgd->rd_flags & GFS2_RDF_CHECK)
- try_rgrp_unlink(rs->rs_rbm.rgd, &last_unlinked,
+ if (rs->rs_rgd->rd_flags & GFS2_RDF_CHECK)
+ try_rgrp_unlink(rs->rs_rgd, &last_unlinked,
ip->i_no_addr);
skip_rgrp:
+ rgrp_unlock_local(rs->rs_rgd);
+
/* Drop reservation, if we couldn't use reserved rgrp */
if (gfs2_rs_active(rs))
gfs2_rs_deltree(rs);
@@ -2102,7 +2170,7 @@ skip_rgrp:
gfs2_glock_dq_uninit(&ip->i_rgd_gh);
next_rgrp:
/* Find the next rgrp, and continue looking */
- if (gfs2_select_rgrp(&rs->rs_rbm.rgd, begin))
+ if (gfs2_select_rgrp(&rs->rs_rgd, begin))
continue;
if (skip)
continue;
@@ -2119,9 +2187,12 @@ next_rgrp:
return error;
}
/* Flushing the log may release space */
- if (loops == 2)
+ if (loops == 2) {
+ if (ap->min_target)
+ target = ap->min_target;
gfs2_log_flush(sdp, NULL, GFS2_LOG_HEAD_FLUSH_NORMAL |
GFS2_LFC_INPLACE_RESERVE);
+ }
}
return -ENOSPC;
@@ -2136,6 +2207,17 @@ next_rgrp:
void gfs2_inplace_release(struct gfs2_inode *ip)
{
+ struct gfs2_blkreserv *rs = &ip->i_res;
+
+ if (rs->rs_reserved) {
+ struct gfs2_rgrpd *rgd = rs->rs_rgd;
+
+ spin_lock(&rgd->rd_rsspin);
+ BUG_ON(rgd->rd_reserved < rs->rs_reserved);
+ rgd->rd_reserved -= rs->rs_reserved;
+ spin_unlock(&rgd->rd_rsspin);
+ rs->rs_reserved = 0;
+ }
if (gfs2_holder_initialized(&ip->i_rgd_gh))
gfs2_glock_dq_uninit(&ip->i_rgd_gh);
}
@@ -2205,7 +2287,7 @@ static void rgblk_free(struct gfs2_sbd *sdp, struct gfs2_rgrpd *rgd,
bi_prev = bi;
}
gfs2_setbit(&rbm, false, new_state);
- gfs2_rbm_incr(&rbm);
+ gfs2_rbm_add(&rbm, 1);
}
}
@@ -2223,11 +2305,12 @@ void gfs2_rgrp_dump(struct seq_file *seq, struct gfs2_rgrpd *rgd,
struct gfs2_blkreserv *trs;
const struct rb_node *n;
- gfs2_print_dbg(seq, "%s R: n:%llu f:%02x b:%u/%u i:%u r:%u e:%u\n",
+ spin_lock(&rgd->rd_rsspin);
+ gfs2_print_dbg(seq, "%s R: n:%llu f:%02x b:%u/%u i:%u q:%u r:%u e:%u\n",
fs_id_buf,
(unsigned long long)rgd->rd_addr, rgd->rd_flags,
rgd->rd_free, rgd->rd_free_clone, rgd->rd_dinodes,
- rgd->rd_reserved, rgd->rd_extfail_pt);
+ rgd->rd_requested, rgd->rd_reserved, rgd->rd_extfail_pt);
if (rgd->rd_sbd->sd_args.ar_rgrplvb) {
struct gfs2_rgrp_lvb *rgl = rgd->rd_rgl;
@@ -2236,7 +2319,6 @@ void gfs2_rgrp_dump(struct seq_file *seq, struct gfs2_rgrpd *rgd,
be32_to_cpu(rgl->rl_free),
be32_to_cpu(rgl->rl_dinodes));
}
- spin_lock(&rgd->rd_rsspin);
for (n = rb_first(&rgd->rd_rstree); n; n = rb_next(&trs->rs_node)) {
trs = rb_entry(n, struct gfs2_blkreserv, rs_node);
dump_rs(seq, trs, fs_id_buf);
@@ -2273,29 +2355,29 @@ static void gfs2_adjust_reservation(struct gfs2_inode *ip,
{
struct gfs2_blkreserv *rs = &ip->i_res;
struct gfs2_rgrpd *rgd = rbm->rgd;
- unsigned rlen;
- u64 block;
- int ret;
- spin_lock(&rgd->rd_rsspin);
+ BUG_ON(rs->rs_reserved < len);
+ rs->rs_reserved -= len;
if (gfs2_rs_active(rs)) {
- if (gfs2_rbm_eq(&rs->rs_rbm, rbm)) {
- block = gfs2_rbm_to_block(rbm);
- ret = gfs2_rbm_from_block(&rs->rs_rbm, block + len);
- rlen = min(rs->rs_free, len);
- rs->rs_free -= rlen;
- rgd->rd_reserved -= rlen;
+ u64 start = gfs2_rbm_to_block(rbm);
+
+ if (rs->rs_start == start) {
+ unsigned int rlen;
+
+ rs->rs_start += len;
+ rlen = min(rs->rs_requested, len);
+ rs->rs_requested -= rlen;
+ rgd->rd_requested -= rlen;
trace_gfs2_rs(rs, TRACE_RS_CLAIM);
- if (rs->rs_free && !ret)
- goto out;
+ if (rs->rs_start < rgd->rd_data0 + rgd->rd_data &&
+ rs->rs_requested)
+ return;
/* We used up our block reservation, so we should
reserve more blocks next time. */
atomic_add(RGRP_RSRV_ADDBLKS, &ip->i_sizehint);
}
__rs_deltree(rs);
}
-out:
- spin_unlock(&rgd->rd_rsspin);
}
/**
@@ -2315,15 +2397,13 @@ static void gfs2_set_alloc_start(struct gfs2_rbm *rbm,
u64 goal;
if (gfs2_rs_active(&ip->i_res)) {
- *rbm = ip->i_res.rs_rbm;
- return;
+ goal = ip->i_res.rs_start;
+ } else {
+ if (!dinode && rgrp_contains_block(rbm->rgd, ip->i_goal))
+ goal = ip->i_goal;
+ else
+ goal = rbm->rgd->rd_last_alloc + rbm->rgd->rd_data0;
}
-
- if (!dinode && rgrp_contains_block(rbm->rgd, ip->i_goal))
- goal = ip->i_goal;
- else
- goal = rbm->rgd->rd_last_alloc + rbm->rgd->rd_data0;
-
if (WARN_ON_ONCE(gfs2_rbm_from_block(rbm, goal))) {
rbm->bii = 0;
rbm->offset = 0;
@@ -2346,17 +2426,21 @@ int gfs2_alloc_blocks(struct gfs2_inode *ip, u64 *bn, unsigned int *nblocks,
{
struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode);
struct buffer_head *dibh;
- struct gfs2_rbm rbm = { .rgd = ip->i_res.rs_rbm.rgd, };
- unsigned int ndata;
+ struct gfs2_rbm rbm = { .rgd = ip->i_res.rs_rgd, };
u64 block; /* block, within the file system scope */
- int error;
+ u32 minext = 1;
+ int error = -ENOSPC;
- gfs2_set_alloc_start(&rbm, ip, dinode);
- error = gfs2_rbm_find(&rbm, GFS2_BLKST_FREE, NULL, ip, false);
+ BUG_ON(ip->i_res.rs_reserved < *nblocks);
+ rgrp_lock_local(rbm.rgd);
+ if (gfs2_rs_active(&ip->i_res)) {
+ gfs2_set_alloc_start(&rbm, ip, dinode);
+ error = gfs2_rbm_find(&rbm, GFS2_BLKST_FREE, &minext, &ip->i_res, false);
+ }
if (error == -ENOSPC) {
gfs2_set_alloc_start(&rbm, ip, dinode);
- error = gfs2_rbm_find(&rbm, GFS2_BLKST_FREE, NULL, NULL, false);
+ error = gfs2_rbm_find(&rbm, GFS2_BLKST_FREE, &minext, NULL, false);
}
/* Since all blocks are reserved in advance, this shouldn't happen */
@@ -2371,14 +2455,8 @@ int gfs2_alloc_blocks(struct gfs2_inode *ip, u64 *bn, unsigned int *nblocks,
gfs2_alloc_extent(&rbm, dinode, nblocks);
block = gfs2_rbm_to_block(&rbm);
rbm.rgd->rd_last_alloc = block - rbm.rgd->rd_data0;
- if (gfs2_rs_active(&ip->i_res))
- gfs2_adjust_reservation(ip, &rbm, *nblocks);
- ndata = *nblocks;
- if (dinode)
- ndata--;
-
if (!dinode) {
- ip->i_goal = block + ndata - 1;
+ ip->i_goal = block + *nblocks - 1;
error = gfs2_meta_inode_buffer(ip, &dibh);
if (error == 0) {
struct gfs2_dinode *di =
@@ -2389,12 +2467,20 @@ int gfs2_alloc_blocks(struct gfs2_inode *ip, u64 *bn, unsigned int *nblocks,
brelse(dibh);
}
}
- if (rbm.rgd->rd_free < *nblocks) {
+ spin_lock(&rbm.rgd->rd_rsspin);
+ gfs2_adjust_reservation(ip, &rbm, *nblocks);
+ if (rbm.rgd->rd_free < *nblocks || rbm.rgd->rd_reserved < *nblocks) {
fs_warn(sdp, "nblocks=%u\n", *nblocks);
+ spin_unlock(&rbm.rgd->rd_rsspin);
goto rgrp_error;
}
-
+ BUG_ON(rbm.rgd->rd_reserved < *nblocks);
+ BUG_ON(rbm.rgd->rd_free_clone < *nblocks);
+ BUG_ON(rbm.rgd->rd_free < *nblocks);
+ rbm.rgd->rd_reserved -= *nblocks;
+ rbm.rgd->rd_free_clone -= *nblocks;
rbm.rgd->rd_free -= *nblocks;
+ spin_unlock(&rbm.rgd->rd_rsspin);
if (dinode) {
rbm.rgd->rd_dinodes++;
*generation = rbm.rgd->rd_igeneration++;
@@ -2404,6 +2490,7 @@ int gfs2_alloc_blocks(struct gfs2_inode *ip, u64 *bn, unsigned int *nblocks,
gfs2_trans_add_meta(rbm.rgd->rd_gl, rbm.rgd->rd_bits[0].bi_bh);
gfs2_rgrp_out(rbm.rgd, rbm.rgd->rd_bits[0].bi_bh->b_data);
+ rgrp_unlock_local(rbm.rgd);
gfs2_statfs_change(sdp, 0, -(s64)*nblocks, dinode ? 1 : 0);
if (dinode)
@@ -2411,13 +2498,13 @@ int gfs2_alloc_blocks(struct gfs2_inode *ip, u64 *bn, unsigned int *nblocks,
gfs2_quota_change(ip, *nblocks, ip->i_inode.i_uid, ip->i_inode.i_gid);
- rbm.rgd->rd_free_clone -= *nblocks;
trace_gfs2_block_alloc(ip, rbm.rgd, block, *nblocks,
dinode ? GFS2_BLKST_DINODE : GFS2_BLKST_USED);
*bn = block;
return 0;
rgrp_error:
+ rgrp_unlock_local(rbm.rgd);
gfs2_rgrp_error(rbm.rgd);
return -EIO;
}
@@ -2437,12 +2524,14 @@ void __gfs2_free_blocks(struct gfs2_inode *ip, struct gfs2_rgrpd *rgd,
{
struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode);
+ rgrp_lock_local(rgd);
rgblk_free(sdp, rgd, bstart, blen, GFS2_BLKST_FREE);
trace_gfs2_block_alloc(ip, rgd, bstart, blen, GFS2_BLKST_FREE);
rgd->rd_free += blen;
rgd->rd_flags &= ~GFS2_RGF_TRIMMED;
gfs2_trans_add_meta(rgd->rd_gl, rgd->rd_bits[0].bi_bh);
gfs2_rgrp_out(rgd, rgd->rd_bits[0].bi_bh->b_data);
+ rgrp_unlock_local(rgd);
/* Directories keep their data in the metadata address space */
if (meta || ip->i_depth || gfs2_is_jdata(ip))
@@ -2478,17 +2567,20 @@ void gfs2_unlink_di(struct inode *inode)
rgd = gfs2_blk2rgrpd(sdp, blkno, true);
if (!rgd)
return;
+ rgrp_lock_local(rgd);
rgblk_free(sdp, rgd, blkno, 1, GFS2_BLKST_UNLINKED);
trace_gfs2_block_alloc(ip, rgd, blkno, 1, GFS2_BLKST_UNLINKED);
gfs2_trans_add_meta(rgd->rd_gl, rgd->rd_bits[0].bi_bh);
gfs2_rgrp_out(rgd, rgd->rd_bits[0].bi_bh->b_data);
be32_add_cpu(&rgd->rd_rgl->rl_unlinked, 1);
+ rgrp_unlock_local(rgd);
}
void gfs2_free_di(struct gfs2_rgrpd *rgd, struct gfs2_inode *ip)
{
struct gfs2_sbd *sdp = rgd->rd_sbd;
+ rgrp_lock_local(rgd);
rgblk_free(sdp, rgd, ip->i_no_addr, 1, GFS2_BLKST_FREE);
if (!rgd->rd_dinodes)
gfs2_consist_rgrpd(rgd);
@@ -2497,6 +2589,7 @@ void gfs2_free_di(struct gfs2_rgrpd *rgd, struct gfs2_inode *ip)
gfs2_trans_add_meta(rgd->rd_gl, rgd->rd_bits[0].bi_bh);
gfs2_rgrp_out(rgd, rgd->rd_bits[0].bi_bh->b_data);
+ rgrp_unlock_local(rgd);
be32_add_cpu(&rgd->rd_rgl->rl_unlinked, -1);
gfs2_statfs_change(sdp, 0, +1, -1);
@@ -2511,6 +2604,10 @@ void gfs2_free_di(struct gfs2_rgrpd *rgd, struct gfs2_inode *ip)
* @no_addr: The block number to check
* @type: The block type we are looking for
*
+ * The inode glock of @no_addr must be held. The @type to check for is either
+ * GFS2_BLKST_DINODE or GFS2_BLKST_UNLINKED; checking for type GFS2_BLKST_FREE
+ * or GFS2_BLKST_USED would make no sense.
+ *
* Returns: 0 if the block type matches the expected type
* -ESTALE if it doesn't match
* or -ve errno if something went wrong while checking
@@ -2534,6 +2631,13 @@ int gfs2_check_blk_type(struct gfs2_sbd *sdp, u64 no_addr, unsigned int type)
rbm.rgd = rgd;
error = gfs2_rbm_from_block(&rbm, no_addr);
if (!WARN_ON_ONCE(error)) {
+ /*
+ * No need to take the local resource group lock here; the
+ * inode glock of @no_addr provides the necessary
+ * synchronization in case the block is an inode. (In case
+ * the block is not an inode, the block type will not match
+ * the @type we are looking for.)
+ */
if (gfs2_testbit(&rbm, false) != type)
error = -ESTALE;
}
@@ -2578,7 +2682,7 @@ void gfs2_rlist_add(struct gfs2_inode *ip, struct gfs2_rgrp_list *rlist,
return;
rgd = gfs2_blk2rgrpd(sdp, block, 1);
} else {
- rgd = ip->i_res.rs_rbm.rgd;
+ rgd = ip->i_res.rs_rgd;
if (!rgd || !rgrp_contains_block(rgd, block))
rgd = gfs2_blk2rgrpd(sdp, block, 1);
}
@@ -2633,9 +2737,8 @@ void gfs2_rlist_alloc(struct gfs2_rgrp_list *rlist)
sizeof(struct gfs2_holder),
GFP_NOFS | __GFP_NOFAIL);
for (x = 0; x < rlist->rl_rgrps; x++)
- gfs2_holder_init(rlist->rl_rgd[x]->rd_gl,
- LM_ST_EXCLUSIVE, 0,
- &rlist->rl_ghs[x]);
+ gfs2_holder_init(rlist->rl_rgd[x]->rd_gl, LM_ST_EXCLUSIVE,
+ LM_FLAG_NODE_SCOPE, &rlist->rl_ghs[x]);
}
/**
@@ -2658,3 +2761,14 @@ void gfs2_rlist_free(struct gfs2_rgrp_list *rlist)
}
}
+void rgrp_lock_local(struct gfs2_rgrpd *rgd)
+{
+ BUG_ON(!gfs2_glock_is_held_excl(rgd->rd_gl) &&
+ !test_bit(SDF_NORECOVERY, &rgd->rd_sbd->sd_flags));
+ mutex_lock(&rgd->rd_mutex);
+}
+
+void rgrp_unlock_local(struct gfs2_rgrpd *rgd)
+{
+ mutex_unlock(&rgd->rd_mutex);
+}
diff --git a/fs/gfs2/rgrp.h b/fs/gfs2/rgrp.h
index 9a587ada51ed..a6855fd796e0 100644
--- a/fs/gfs2/rgrp.h
+++ b/fs/gfs2/rgrp.h
@@ -77,7 +77,7 @@ extern int gfs2_fitrim(struct file *filp, void __user *argp);
/* This is how to tell if a reservation is in the rgrp tree: */
static inline bool gfs2_rs_active(const struct gfs2_blkreserv *rs)
{
- return rs && !RB_EMPTY_NODE(&rs->rs_node);
+ return !RB_EMPTY_NODE(&rs->rs_node);
}
static inline int rgrp_contains_block(struct gfs2_rgrpd *rgd, u64 block)
@@ -88,4 +88,8 @@ static inline int rgrp_contains_block(struct gfs2_rgrpd *rgd, u64 block)
}
extern void check_and_update_goal(struct gfs2_inode *ip);
+
+extern void rgrp_lock_local(struct gfs2_rgrpd *rgd);
+extern void rgrp_unlock_local(struct gfs2_rgrpd *rgd);
+
#endif /* __RGRP_DOT_H__ */
diff --git a/fs/gfs2/super.c b/fs/gfs2/super.c
index 2f56acc41c04..861ed5fe02a5 100644
--- a/fs/gfs2/super.c
+++ b/fs/gfs2/super.c
@@ -81,19 +81,12 @@ void gfs2_jindex_free(struct gfs2_sbd *sdp)
static struct gfs2_jdesc *jdesc_find_i(struct list_head *head, unsigned int jid)
{
struct gfs2_jdesc *jd;
- int found = 0;
list_for_each_entry(jd, head, jd_list) {
- if (jd->jd_jid == jid) {
- found = 1;
- break;
- }
+ if (jd->jd_jid == jid)
+ return jd;
}
-
- if (!found)
- jd = NULL;
-
- return jd;
+ return NULL;
}
struct gfs2_jdesc *gfs2_jdesc_find(struct gfs2_sbd *sdp, unsigned int jid)
@@ -165,7 +158,6 @@ int gfs2_make_fs_rw(struct gfs2_sbd *sdp)
{
struct gfs2_inode *ip = GFS2_I(sdp->sd_jdesc->jd_inode);
struct gfs2_glock *j_gl = ip->i_gl;
- struct gfs2_holder freeze_gh;
struct gfs2_log_header_host head;
int error;
@@ -173,12 +165,6 @@ int gfs2_make_fs_rw(struct gfs2_sbd *sdp)
if (error)
return error;
- error = gfs2_glock_nq_init(sdp->sd_freeze_gl, LM_ST_SHARED,
- LM_FLAG_NOEXP | GL_EXACT,
- &freeze_gh);
- if (error)
- goto fail_threads;
-
j_gl->gl_ops->go_inval(j_gl, DIO_METADATA);
if (gfs2_withdrawn(sdp)) {
error = -EIO;
@@ -205,13 +191,9 @@ int gfs2_make_fs_rw(struct gfs2_sbd *sdp)
set_bit(SDF_JOURNAL_LIVE, &sdp->sd_flags);
- gfs2_glock_dq_uninit(&freeze_gh);
-
return 0;
fail:
- gfs2_glock_dq_uninit(&freeze_gh);
-fail_threads:
if (sdp->sd_quotad_process)
kthread_stop(sdp->sd_quotad_process);
sdp->sd_quotad_process = NULL;
@@ -452,7 +434,7 @@ static int gfs2_lock_fs_check_clean(struct gfs2_sbd *sdp)
}
if (error)
- gfs2_glock_dq_uninit(&sdp->sd_freeze_gh);
+ gfs2_freeze_unlock(&sdp->sd_freeze_gh);
out:
while (!list_empty(&list)) {
@@ -562,8 +544,6 @@ static void gfs2_dirty_inode(struct inode *inode, int flags)
int need_endtrans = 0;
int ret;
- if (!(flags & I_DIRTY_INODE))
- return;
if (unlikely(gfs2_withdrawn(sdp)))
return;
if (!gfs2_glock_is_locked_by_me(ip->i_gl)) {
@@ -609,30 +589,9 @@ out:
int gfs2_make_fs_ro(struct gfs2_sbd *sdp)
{
- struct gfs2_holder freeze_gh;
int error = 0;
int log_write_allowed = test_bit(SDF_JOURNAL_LIVE, &sdp->sd_flags);
- gfs2_holder_mark_uninitialized(&freeze_gh);
- if (sdp->sd_freeze_gl &&
- !gfs2_glock_is_locked_by_me(sdp->sd_freeze_gl)) {
- if (!log_write_allowed) {
- error = gfs2_glock_nq_init(sdp->sd_freeze_gl,
- LM_ST_SHARED, LM_FLAG_TRY |
- LM_FLAG_NOEXP | GL_EXACT,
- &freeze_gh);
- if (error == GLR_TRYFAILED)
- error = 0;
- } else {
- error = gfs2_glock_nq_init(sdp->sd_freeze_gl,
- LM_ST_SHARED,
- LM_FLAG_NOEXP | GL_EXACT,
- &freeze_gh);
- if (error && !gfs2_withdrawn(sdp))
- return error;
- }
- }
-
gfs2_flush_delete_work(sdp);
if (!log_write_allowed && current == sdp->sd_quotad_process)
fs_warn(sdp, "The quotad daemon is withdrawing.\n");
@@ -652,18 +611,15 @@ int gfs2_make_fs_ro(struct gfs2_sbd *sdp)
gfs2_log_flush(sdp, NULL, GFS2_LOG_HEAD_FLUSH_SHUTDOWN |
GFS2_LFC_MAKE_FS_RO);
- wait_event(sdp->sd_reserving_log_wait,
- atomic_read(&sdp->sd_reserving_log) == 0);
- gfs2_assert_warn(sdp, atomic_read(&sdp->sd_log_blks_free) ==
- sdp->sd_jdesc->jd_blocks);
+ wait_event_timeout(sdp->sd_log_waitq,
+ gfs2_log_is_empty(sdp),
+ HZ * 5);
+ gfs2_assert_warn(sdp, gfs2_log_is_empty(sdp));
} else {
- wait_event_timeout(sdp->sd_reserving_log_wait,
- atomic_read(&sdp->sd_reserving_log) == 0,
+ wait_event_timeout(sdp->sd_log_waitq,
+ gfs2_log_is_empty(sdp),
HZ * 5);
}
- if (gfs2_holder_initialized(&freeze_gh))
- gfs2_glock_dq_uninit(&freeze_gh);
-
gfs2_quota_cleanup(sdp);
if (!log_write_allowed)
@@ -772,10 +728,8 @@ void gfs2_freeze_func(struct work_struct *work)
struct super_block *sb = sdp->sd_vfs;
atomic_inc(&sb->s_active);
- error = gfs2_glock_nq_init(sdp->sd_freeze_gl, LM_ST_SHARED,
- LM_FLAG_NOEXP | GL_EXACT, &freeze_gh);
+ error = gfs2_freeze_lock(sdp, &freeze_gh, 0);
if (error) {
- fs_info(sdp, "GFS2: couldn't get freeze lock : %d\n", error);
gfs2_assert_withdraw(sdp, 0);
} else {
atomic_set(&sdp->sd_freeze_state, SFS_UNFROZEN);
@@ -785,7 +739,7 @@ void gfs2_freeze_func(struct work_struct *work)
error);
gfs2_assert_withdraw(sdp, 0);
}
- gfs2_glock_dq_uninit(&freeze_gh);
+ gfs2_freeze_unlock(&freeze_gh);
}
deactivate_super(sb);
clear_bit_unlock(SDF_FS_FROZEN, &sdp->sd_flags);
@@ -853,7 +807,7 @@ static int gfs2_unfreeze(struct super_block *sb)
return 0;
}
- gfs2_glock_dq_uninit(&sdp->sd_freeze_gh);
+ gfs2_freeze_unlock(&sdp->sd_freeze_gh);
mutex_unlock(&sdp->sd_freeze_mutex);
return wait_on_bit(&sdp->sd_flags, SDF_FS_FROZEN, TASK_INTERRUPTIBLE);
}
@@ -1229,7 +1183,8 @@ static int gfs2_dinode_dealloc(struct gfs2_inode *ip)
goto out_qs;
}
- error = gfs2_glock_nq_init(rgd->rd_gl, LM_ST_EXCLUSIVE, 0, &gh);
+ error = gfs2_glock_nq_init(rgd->rd_gl, LM_ST_EXCLUSIVE,
+ LM_FLAG_NODE_SCOPE, &gh);
if (error)
goto out_qs;
diff --git a/fs/gfs2/super.h b/fs/gfs2/super.h
index c9fb2a654181..08e502dec7ec 100644
--- a/fs/gfs2/super.h
+++ b/fs/gfs2/super.h
@@ -11,6 +11,10 @@
#include <linux/dcache.h>
#include "incore.h"
+/* Supported fs format version range */
+#define GFS2_FS_FORMAT_MIN (1801)
+#define GFS2_FS_FORMAT_MAX (1802)
+
extern void gfs2_lm_unmount(struct gfs2_sbd *sdp);
static inline unsigned int gfs2_jindex_size(struct gfs2_sbd *sdp)
@@ -54,7 +58,9 @@ extern struct file_system_type gfs2meta_fs_type;
extern const struct export_operations gfs2_export_ops;
extern const struct super_operations gfs2_super_ops;
extern const struct dentry_operations gfs2_dops;
-extern const struct xattr_handler *gfs2_xattr_handlers[];
+
+extern const struct xattr_handler *gfs2_xattr_handlers_max[];
+extern const struct xattr_handler **gfs2_xattr_handlers_min;
#endif /* __SUPER_DOT_H__ */
diff --git a/fs/gfs2/trace_gfs2.h b/fs/gfs2/trace_gfs2.h
index 0b2f858d9a8c..bd6c8e9e49db 100644
--- a/fs/gfs2/trace_gfs2.h
+++ b/fs/gfs2/trace_gfs2.h
@@ -560,6 +560,7 @@ TRACE_EVENT(gfs2_block_alloc,
__field( u8, block_state )
__field( u64, rd_addr )
__field( u32, rd_free_clone )
+ __field( u32, rd_requested )
__field( u32, rd_reserved )
),
@@ -571,17 +572,20 @@ TRACE_EVENT(gfs2_block_alloc,
__entry->block_state = block_state;
__entry->rd_addr = rgd->rd_addr;
__entry->rd_free_clone = rgd->rd_free_clone;
+ __entry->rd_requested = rgd->rd_requested;
__entry->rd_reserved = rgd->rd_reserved;
),
- TP_printk("%u,%u bmap %llu alloc %llu/%lu %s rg:%llu rf:%u rr:%lu",
+ TP_printk("%u,%u bmap %llu alloc %llu/%lu %s rg:%llu rf:%u rq:%u rr:%u",
MAJOR(__entry->dev), MINOR(__entry->dev),
(unsigned long long)__entry->inum,
(unsigned long long)__entry->start,
(unsigned long)__entry->len,
block_state_name(__entry->block_state),
(unsigned long long)__entry->rd_addr,
- __entry->rd_free_clone, (unsigned long)__entry->rd_reserved)
+ __entry->rd_free_clone,
+ __entry->rd_requested,
+ __entry->rd_reserved)
);
/* Keep track of multi-block reservations as they are allocated/freed */
@@ -595,33 +599,40 @@ TRACE_EVENT(gfs2_rs,
__field( dev_t, dev )
__field( u64, rd_addr )
__field( u32, rd_free_clone )
+ __field( u32, rd_requested )
__field( u32, rd_reserved )
__field( u64, inum )
__field( u64, start )
- __field( u32, free )
+ __field( u32, requested )
+ __field( u32, reserved )
__field( u8, func )
),
TP_fast_assign(
- __entry->dev = rs->rs_rbm.rgd->rd_sbd->sd_vfs->s_dev;
- __entry->rd_addr = rs->rs_rbm.rgd->rd_addr;
- __entry->rd_free_clone = rs->rs_rbm.rgd->rd_free_clone;
- __entry->rd_reserved = rs->rs_rbm.rgd->rd_reserved;
+ __entry->dev = rs->rs_rgd->rd_sbd->sd_vfs->s_dev;
+ __entry->rd_addr = rs->rs_rgd->rd_addr;
+ __entry->rd_free_clone = rs->rs_rgd->rd_free_clone;
+ __entry->rd_requested = rs->rs_rgd->rd_requested;
+ __entry->rd_reserved = rs->rs_rgd->rd_reserved;
__entry->inum = container_of(rs, struct gfs2_inode,
i_res)->i_no_addr;
- __entry->start = gfs2_rbm_to_block(&rs->rs_rbm);
- __entry->free = rs->rs_free;
+ __entry->start = rs->rs_start;
+ __entry->requested = rs->rs_requested;
+ __entry->reserved = rs->rs_reserved;
__entry->func = func;
),
- TP_printk("%u,%u bmap %llu resrv %llu rg:%llu rf:%lu rr:%lu %s f:%lu",
+ TP_printk("%u,%u bmap %llu resrv %llu rg:%llu rf:%u rq:%u rr:%u %s q:%u r:%u",
MAJOR(__entry->dev), MINOR(__entry->dev),
(unsigned long long)__entry->inum,
(unsigned long long)__entry->start,
(unsigned long long)__entry->rd_addr,
- (unsigned long)__entry->rd_free_clone,
- (unsigned long)__entry->rd_reserved,
- rs_func_name(__entry->func), (unsigned long)__entry->free)
+ __entry->rd_free_clone,
+ __entry->rd_requested,
+ __entry->rd_reserved,
+ rs_func_name(__entry->func),
+ __entry->requested,
+ __entry->reserved)
);
#endif /* _TRACE_GFS2_H */
diff --git a/fs/gfs2/trans.c b/fs/gfs2/trans.c
index 6d4bf7ea7b3b..ab96cf0bf26b 100644
--- a/fs/gfs2/trans.c
+++ b/fs/gfs2/trans.c
@@ -31,17 +31,17 @@ static void gfs2_print_trans(struct gfs2_sbd *sdp, const struct gfs2_trans *tr)
fs_warn(sdp, "blocks=%u revokes=%u reserved=%u touched=%u\n",
tr->tr_blocks, tr->tr_revokes, tr->tr_reserved,
test_bit(TR_TOUCHED, &tr->tr_flags));
- fs_warn(sdp, "Buf %u/%u Databuf %u/%u Revoke %u/%u\n",
+ fs_warn(sdp, "Buf %u/%u Databuf %u/%u Revoke %u\n",
tr->tr_num_buf_new, tr->tr_num_buf_rm,
tr->tr_num_databuf_new, tr->tr_num_databuf_rm,
- tr->tr_num_revoke, tr->tr_num_revoke_rm);
+ tr->tr_num_revoke);
}
-int gfs2_trans_begin(struct gfs2_sbd *sdp, unsigned int blocks,
- unsigned int revokes)
+int __gfs2_trans_begin(struct gfs2_trans *tr, struct gfs2_sbd *sdp,
+ unsigned int blocks, unsigned int revokes,
+ unsigned long ip)
{
- struct gfs2_trans *tr;
- int error;
+ unsigned int extra_revokes;
if (current->journal_info) {
gfs2_print_trans(sdp, current->journal_info);
@@ -52,39 +52,72 @@ int gfs2_trans_begin(struct gfs2_sbd *sdp, unsigned int blocks,
if (!test_bit(SDF_JOURNAL_LIVE, &sdp->sd_flags))
return -EROFS;
- tr = kmem_cache_zalloc(gfs2_trans_cachep, GFP_NOFS);
- if (!tr)
- return -ENOMEM;
-
- tr->tr_ip = _RET_IP_;
+ tr->tr_ip = ip;
tr->tr_blocks = blocks;
tr->tr_revokes = revokes;
- tr->tr_reserved = 1;
- set_bit(TR_ALLOCED, &tr->tr_flags);
- if (blocks)
- tr->tr_reserved += 6 + blocks;
- if (revokes)
- tr->tr_reserved += gfs2_struct2blk(sdp, revokes);
+ tr->tr_reserved = GFS2_LOG_FLUSH_MIN_BLOCKS;
+ if (blocks) {
+ /*
+ * The reserved blocks are either used for data or metadata.
+ * We can have mixed data and metadata, each with its own log
+ * descriptor block; see calc_reserved().
+ */
+ tr->tr_reserved += blocks + 1 + DIV_ROUND_UP(blocks - 1, databuf_limit(sdp));
+ }
INIT_LIST_HEAD(&tr->tr_databuf);
INIT_LIST_HEAD(&tr->tr_buf);
INIT_LIST_HEAD(&tr->tr_list);
INIT_LIST_HEAD(&tr->tr_ail1_list);
INIT_LIST_HEAD(&tr->tr_ail2_list);
+ if (gfs2_assert_warn(sdp, tr->tr_reserved <= sdp->sd_jdesc->jd_blocks))
+ return -EINVAL;
+
sb_start_intwrite(sdp->sd_vfs);
- error = gfs2_log_reserve(sdp, tr->tr_reserved);
- if (error)
- goto fail;
+ /*
+ * Try the reservations under sd_log_flush_lock to prevent log flushes
+ * from creating inconsistencies between the number of allocated and
+ * reserved revokes. If that fails, do a full-block allocation outside
+ * of the lock to avoid stalling log flushes. Then, allot the
+ * appropriate number of blocks to revokes, use as many revokes locally
+ * as needed, and "release" the surplus into the revokes pool.
+ */
+
+ down_read(&sdp->sd_log_flush_lock);
+ if (gfs2_log_try_reserve(sdp, tr, &extra_revokes))
+ goto reserved;
+ up_read(&sdp->sd_log_flush_lock);
+ gfs2_log_reserve(sdp, tr, &extra_revokes);
+ down_read(&sdp->sd_log_flush_lock);
+
+reserved:
+ gfs2_log_release_revokes(sdp, extra_revokes);
+ if (unlikely(!test_bit(SDF_JOURNAL_LIVE, &sdp->sd_flags))) {
+ gfs2_log_release_revokes(sdp, tr->tr_revokes);
+ up_read(&sdp->sd_log_flush_lock);
+ gfs2_log_release(sdp, tr->tr_reserved);
+ sb_end_intwrite(sdp->sd_vfs);
+ return -EROFS;
+ }
current->journal_info = tr;
return 0;
+}
-fail:
- sb_end_intwrite(sdp->sd_vfs);
- kmem_cache_free(gfs2_trans_cachep, tr);
+int gfs2_trans_begin(struct gfs2_sbd *sdp, unsigned int blocks,
+ unsigned int revokes)
+{
+ struct gfs2_trans *tr;
+ int error;
+ tr = kmem_cache_zalloc(gfs2_trans_cachep, GFP_NOFS);
+ if (!tr)
+ return -ENOMEM;
+ error = __gfs2_trans_begin(tr, sdp, blocks, revokes, _RET_IP_);
+ if (error)
+ kmem_cache_free(gfs2_trans_cachep, tr);
return error;
}
@@ -92,37 +125,39 @@ void gfs2_trans_end(struct gfs2_sbd *sdp)
{
struct gfs2_trans *tr = current->journal_info;
s64 nbuf;
- int alloced = test_bit(TR_ALLOCED, &tr->tr_flags);
current->journal_info = NULL;
if (!test_bit(TR_TOUCHED, &tr->tr_flags)) {
+ gfs2_log_release_revokes(sdp, tr->tr_revokes);
+ up_read(&sdp->sd_log_flush_lock);
gfs2_log_release(sdp, tr->tr_reserved);
- if (alloced) {
+ if (!test_bit(TR_ONSTACK, &tr->tr_flags))
gfs2_trans_free(sdp, tr);
- sb_end_intwrite(sdp->sd_vfs);
- }
+ sb_end_intwrite(sdp->sd_vfs);
return;
}
+ gfs2_log_release_revokes(sdp, tr->tr_revokes - tr->tr_num_revoke);
+
nbuf = tr->tr_num_buf_new + tr->tr_num_databuf_new;
nbuf -= tr->tr_num_buf_rm;
nbuf -= tr->tr_num_databuf_rm;
- if (gfs2_assert_withdraw(sdp, (nbuf <= tr->tr_blocks) &&
- (tr->tr_num_revoke <= tr->tr_revokes)))
+ if (gfs2_assert_withdraw(sdp, nbuf <= tr->tr_blocks) ||
+ gfs2_assert_withdraw(sdp, tr->tr_num_revoke <= tr->tr_revokes))
gfs2_print_trans(sdp, tr);
gfs2_log_commit(sdp, tr);
- if (alloced && !test_bit(TR_ATTACHED, &tr->tr_flags))
+ if (!test_bit(TR_ONSTACK, &tr->tr_flags) &&
+ !test_bit(TR_ATTACHED, &tr->tr_flags))
gfs2_trans_free(sdp, tr);
up_read(&sdp->sd_log_flush_lock);
if (sdp->sd_vfs->s_flags & SB_SYNCHRONOUS)
gfs2_log_flush(sdp, NULL, GFS2_LOG_HEAD_FLUSH_NORMAL |
GFS2_LFC_TRANS_END);
- if (alloced)
- sb_end_intwrite(sdp->sd_vfs);
+ sb_end_intwrite(sdp->sd_vfs);
}
static struct gfs2_bufdata *gfs2_alloc_bufdata(struct gfs2_glock *gl,
@@ -262,7 +297,6 @@ void gfs2_trans_add_revoke(struct gfs2_sbd *sdp, struct gfs2_bufdata *bd)
void gfs2_trans_remove_revoke(struct gfs2_sbd *sdp, u64 blkno, unsigned int len)
{
struct gfs2_bufdata *bd, *tmp;
- struct gfs2_trans *tr = current->journal_info;
unsigned int n = len;
gfs2_log_lock(sdp);
@@ -274,7 +308,7 @@ void gfs2_trans_remove_revoke(struct gfs2_sbd *sdp, u64 blkno, unsigned int len)
if (bd->bd_gl)
gfs2_glock_remove_revoke(bd->bd_gl);
kmem_cache_free(gfs2_bufdata_cachep, bd);
- tr->tr_num_revoke_rm++;
+ gfs2_log_release_revokes(sdp, 1);
if (--n == 0)
break;
}
diff --git a/fs/gfs2/trans.h b/fs/gfs2/trans.h
index 83199ce5a5c5..c76ad9a4c75a 100644
--- a/fs/gfs2/trans.h
+++ b/fs/gfs2/trans.h
@@ -27,13 +27,16 @@ struct gfs2_glock;
* block, or all of the blocks in the rg, whichever is smaller */
static inline unsigned int gfs2_rg_blocks(const struct gfs2_inode *ip, unsigned requested)
{
- struct gfs2_rgrpd *rgd = ip->i_res.rs_rbm.rgd;
+ struct gfs2_rgrpd *rgd = ip->i_res.rs_rgd;
if (requested < rgd->rd_length)
return requested + 1;
return rgd->rd_length;
}
+extern int __gfs2_trans_begin(struct gfs2_trans *tr, struct gfs2_sbd *sdp,
+ unsigned int blocks, unsigned int revokes,
+ unsigned long ip);
extern int gfs2_trans_begin(struct gfs2_sbd *sdp, unsigned int blocks,
unsigned int revokes);
diff --git a/fs/gfs2/util.c b/fs/gfs2/util.c
index a374397f4273..8d3c670c990f 100644
--- a/fs/gfs2/util.c
+++ b/fs/gfs2/util.c
@@ -91,12 +91,39 @@ out_unlock:
return error;
}
+/**
+ * gfs2_freeze_lock - hold the freeze glock
+ * @sdp: the superblock
+ * @freeze_gh: pointer to the requested holder
+ * @caller_flags: any additional flags needed by the caller
+ */
+int gfs2_freeze_lock(struct gfs2_sbd *sdp, struct gfs2_holder *freeze_gh,
+ int caller_flags)
+{
+ int flags = LM_FLAG_NOEXP | GL_EXACT | caller_flags;
+ int error;
+
+ error = gfs2_glock_nq_init(sdp->sd_freeze_gl, LM_ST_SHARED, flags,
+ freeze_gh);
+ if (error && error != GLR_TRYFAILED)
+ fs_err(sdp, "can't lock the freeze lock: %d\n", error);
+ return error;
+}
+
+void gfs2_freeze_unlock(struct gfs2_holder *freeze_gh)
+{
+ if (gfs2_holder_initialized(freeze_gh))
+ gfs2_glock_dq_uninit(freeze_gh);
+}
+
static void signal_our_withdraw(struct gfs2_sbd *sdp)
{
- struct gfs2_glock *gl = sdp->sd_live_gh.gh_gl;
+ struct gfs2_glock *live_gl = sdp->sd_live_gh.gh_gl;
struct inode *inode = sdp->sd_jdesc->jd_inode;
struct gfs2_inode *ip = GFS2_I(inode);
+ struct gfs2_glock *i_gl = ip->i_gl;
u64 no_formal_ino = ip->i_no_formal_ino;
+ int log_write_allowed = test_bit(SDF_JOURNAL_LIVE, &sdp->sd_flags);
int ret = 0;
int tries;
@@ -117,8 +144,21 @@ static void signal_our_withdraw(struct gfs2_sbd *sdp)
* therefore we need to clear SDF_JOURNAL_LIVE manually.
*/
clear_bit(SDF_JOURNAL_LIVE, &sdp->sd_flags);
- if (!sb_rdonly(sdp->sd_vfs))
- ret = gfs2_make_fs_ro(sdp);
+ if (!sb_rdonly(sdp->sd_vfs)) {
+ struct gfs2_holder freeze_gh;
+
+ gfs2_holder_mark_uninitialized(&freeze_gh);
+ if (sdp->sd_freeze_gl &&
+ !gfs2_glock_is_locked_by_me(sdp->sd_freeze_gl)) {
+ ret = gfs2_freeze_lock(sdp, &freeze_gh,
+ log_write_allowed ? 0 : LM_FLAG_TRY);
+ if (ret == GLR_TRYFAILED)
+ ret = 0;
+ }
+ if (!ret)
+ ret = gfs2_make_fs_ro(sdp);
+ gfs2_freeze_unlock(&freeze_gh);
+ }
if (sdp->sd_lockstruct.ls_ops->lm_lock == NULL) { /* lock_nolock */
if (!ret)
@@ -141,7 +181,8 @@ static void signal_our_withdraw(struct gfs2_sbd *sdp)
atomic_set(&sdp->sd_freeze_state, SFS_FROZEN);
thaw_super(sdp->sd_vfs);
} else {
- wait_on_bit(&gl->gl_flags, GLF_DEMOTE, TASK_UNINTERRUPTIBLE);
+ wait_on_bit(&i_gl->gl_flags, GLF_DEMOTE,
+ TASK_UNINTERRUPTIBLE);
}
/*
@@ -161,15 +202,15 @@ static void signal_our_withdraw(struct gfs2_sbd *sdp)
* on other nodes to be successful, otherwise we remain the owner of
* the glock as far as dlm is concerned.
*/
- if (gl->gl_ops->go_free) {
- set_bit(GLF_FREEING, &gl->gl_flags);
- wait_on_bit(&gl->gl_flags, GLF_FREEING, TASK_UNINTERRUPTIBLE);
+ if (i_gl->gl_ops->go_free) {
+ set_bit(GLF_FREEING, &i_gl->gl_flags);
+ wait_on_bit(&i_gl->gl_flags, GLF_FREEING, TASK_UNINTERRUPTIBLE);
}
/*
* Dequeue the "live" glock, but keep a reference so it's never freed.
*/
- gfs2_glock_hold(gl);
+ gfs2_glock_hold(live_gl);
gfs2_glock_dq_wait(&sdp->sd_live_gh);
/*
* We enqueue the "live" glock in EX so that all other nodes
@@ -208,7 +249,7 @@ static void signal_our_withdraw(struct gfs2_sbd *sdp)
gfs2_glock_nq(&sdp->sd_live_gh);
}
- gfs2_glock_queue_put(gl); /* drop the extra reference we acquired */
+ gfs2_glock_queue_put(live_gl); /* drop extra reference we acquired */
clear_bit(SDF_WITHDRAW_RECOVERY, &sdp->sd_flags);
/*
diff --git a/fs/gfs2/util.h b/fs/gfs2/util.h
index a4443dd8a94b..69e1a0ae5a4d 100644
--- a/fs/gfs2/util.h
+++ b/fs/gfs2/util.h
@@ -149,6 +149,9 @@ int gfs2_io_error_i(struct gfs2_sbd *sdp, const char *function,
extern int check_journal_clean(struct gfs2_sbd *sdp, struct gfs2_jdesc *jd,
bool verbose);
+extern int gfs2_freeze_lock(struct gfs2_sbd *sdp,
+ struct gfs2_holder *freeze_gh, int caller_flags);
+extern void gfs2_freeze_unlock(struct gfs2_holder *freeze_gh);
#define gfs2_io_error(sdp) \
gfs2_io_error_i((sdp), __func__, __FILE__, __LINE__)
diff --git a/fs/gfs2/xattr.c b/fs/gfs2/xattr.c
index 9d7667bc4292..124b3d5a7266 100644
--- a/fs/gfs2/xattr.c
+++ b/fs/gfs2/xattr.c
@@ -70,6 +70,20 @@ static int ea_check_size(struct gfs2_sbd *sdp, unsigned int nsize, size_t dsize)
return 0;
}
+static bool gfs2_eatype_valid(struct gfs2_sbd *sdp, u8 type)
+{
+ switch(sdp->sd_sb.sb_fs_format) {
+ case GFS2_FS_FORMAT_MAX:
+ return true;
+
+ case GFS2_FS_FORMAT_MIN:
+ return type <= GFS2_EATYPE_SECURITY;
+
+ default:
+ return false;
+ }
+}
+
typedef int (*ea_call_t) (struct gfs2_inode *ip, struct buffer_head *bh,
struct gfs2_ea_header *ea,
struct gfs2_ea_header *prev, void *private);
@@ -77,6 +91,7 @@ typedef int (*ea_call_t) (struct gfs2_inode *ip, struct buffer_head *bh,
static int ea_foreach_i(struct gfs2_inode *ip, struct buffer_head *bh,
ea_call_t ea_call, void *data)
{
+ struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode);
struct gfs2_ea_header *ea, *prev = NULL;
int error = 0;
@@ -89,9 +104,8 @@ static int ea_foreach_i(struct gfs2_inode *ip, struct buffer_head *bh,
if (!(bh->b_data <= (char *)ea && (char *)GFS2_EA2NEXT(ea) <=
bh->b_data + bh->b_size))
goto fail;
- if (!GFS2_EATYPE_VALID(ea->ea_type))
+ if (!gfs2_eatype_valid(sdp, ea->ea_type))
goto fail;
-
error = ea_call(ip, bh, ea, prev, data);
if (error)
return error;
@@ -259,7 +273,8 @@ static int ea_dealloc_unstuffed(struct gfs2_inode *ip, struct buffer_head *bh,
return -EIO;
}
- error = gfs2_glock_nq_init(rgd->rd_gl, LM_ST_EXCLUSIVE, 0, &rg_gh);
+ error = gfs2_glock_nq_init(rgd->rd_gl, LM_ST_EXCLUSIVE,
+ LM_FLAG_NODE_SCOPE, &rg_gh);
if (error)
return error;
@@ -344,6 +359,7 @@ static int ea_list_i(struct gfs2_inode *ip, struct buffer_head *bh,
struct gfs2_ea_header *ea, struct gfs2_ea_header *prev,
void *private)
{
+ struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode);
struct ea_list *ei = private;
struct gfs2_ea_request *er = ei->ei_er;
unsigned int ea_size;
@@ -353,6 +369,8 @@ static int ea_list_i(struct gfs2_inode *ip, struct buffer_head *bh,
if (ea->ea_type == GFS2_EATYPE_UNUSED)
return 0;
+ BUG_ON(ea->ea_type > GFS2_EATYPE_SECURITY &&
+ sdp->sd_sb.sb_fs_format == GFS2_FS_FORMAT_MIN);
switch (ea->ea_type) {
case GFS2_EATYPE_USR:
prefix = "user.";
@@ -366,8 +384,12 @@ static int ea_list_i(struct gfs2_inode *ip, struct buffer_head *bh,
prefix = "security.";
l = 9;
break;
+ case GFS2_EATYPE_TRUSTED:
+ prefix = "trusted.";
+ l = 8;
+ break;
default:
- BUG();
+ return 0;
}
ea_size = l + ea->ea_name_len + 1;
@@ -1214,6 +1236,7 @@ int __gfs2_xattr_set(struct inode *inode, const char *name,
}
static int gfs2_xattr_set(const struct xattr_handler *handler,
+ struct user_namespace *mnt_userns,
struct dentry *unused, struct inode *inode,
const char *name, const void *value,
size_t size, int flags)
@@ -1385,7 +1408,8 @@ static int ea_dealloc_block(struct gfs2_inode *ip)
return -EIO;
}
- error = gfs2_glock_nq_init(rgd->rd_gl, LM_ST_EXCLUSIVE, 0, &gh);
+ error = gfs2_glock_nq_init(rgd->rd_gl, LM_ST_EXCLUSIVE,
+ LM_FLAG_NODE_SCOPE, &gh);
if (error)
return error;
@@ -1463,7 +1487,25 @@ static const struct xattr_handler gfs2_xattr_security_handler = {
.set = gfs2_xattr_set,
};
-const struct xattr_handler *gfs2_xattr_handlers[] = {
+static bool
+gfs2_xattr_trusted_list(struct dentry *dentry)
+{
+ return capable(CAP_SYS_ADMIN);
+}
+
+static const struct xattr_handler gfs2_xattr_trusted_handler = {
+ .prefix = XATTR_TRUSTED_PREFIX,
+ .flags = GFS2_EATYPE_TRUSTED,
+ .list = gfs2_xattr_trusted_list,
+ .get = gfs2_xattr_get,
+ .set = gfs2_xattr_set,
+};
+
+const struct xattr_handler *gfs2_xattr_handlers_max[] = {
+ /* GFS2_FS_FORMAT_MAX */
+ &gfs2_xattr_trusted_handler,
+
+ /* GFS2_FS_FORMAT_MIN */
&gfs2_xattr_user_handler,
&gfs2_xattr_security_handler,
&posix_acl_access_xattr_handler,
@@ -1471,3 +1513,4 @@ const struct xattr_handler *gfs2_xattr_handlers[] = {
NULL,
};
+const struct xattr_handler **gfs2_xattr_handlers_min = gfs2_xattr_handlers_max + 1;
diff --git a/fs/hfs/attr.c b/fs/hfs/attr.c
index 74fa62643136..2bd54efaf416 100644
--- a/fs/hfs/attr.c
+++ b/fs/hfs/attr.c
@@ -121,6 +121,7 @@ static int hfs_xattr_get(const struct xattr_handler *handler,
}
static int hfs_xattr_set(const struct xattr_handler *handler,
+ struct user_namespace *mnt_userns,
struct dentry *unused, struct inode *inode,
const char *name, const void *value, size_t size,
int flags)
diff --git a/fs/hfs/dir.c b/fs/hfs/dir.c
index 3bf2ae0e467c..527f6e46cbe8 100644
--- a/fs/hfs/dir.c
+++ b/fs/hfs/dir.c
@@ -189,8 +189,8 @@ static int hfs_dir_release(struct inode *inode, struct file *file)
* a directory and return a corresponding inode, given the inode for
* the directory and the name (and its length) of the new file.
*/
-static int hfs_create(struct inode *dir, struct dentry *dentry, umode_t mode,
- bool excl)
+static int hfs_create(struct user_namespace *mnt_userns, struct inode *dir,
+ struct dentry *dentry, umode_t mode, bool excl)
{
struct inode *inode;
int res;
@@ -219,7 +219,8 @@ static int hfs_create(struct inode *dir, struct dentry *dentry, umode_t mode,
* in a directory, given the inode for the parent directory and the
* name (and its length) of the new directory.
*/
-static int hfs_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode)
+static int hfs_mkdir(struct user_namespace *mnt_userns, struct inode *dir,
+ struct dentry *dentry, umode_t mode)
{
struct inode *inode;
int res;
@@ -279,9 +280,9 @@ static int hfs_remove(struct inode *dir, struct dentry *dentry)
* new file/directory.
* XXX: how do you handle must_be dir?
*/
-static int hfs_rename(struct inode *old_dir, struct dentry *old_dentry,
- struct inode *new_dir, struct dentry *new_dentry,
- unsigned int flags)
+static int hfs_rename(struct user_namespace *mnt_userns, struct inode *old_dir,
+ struct dentry *old_dentry, struct inode *new_dir,
+ struct dentry *new_dentry, unsigned int flags)
{
int res;
diff --git a/fs/hfs/hfs_fs.h b/fs/hfs/hfs_fs.h
index f71c384064c8..b8eb0322a3e5 100644
--- a/fs/hfs/hfs_fs.h
+++ b/fs/hfs/hfs_fs.h
@@ -204,7 +204,8 @@ extern const struct address_space_operations hfs_btree_aops;
extern struct inode *hfs_new_inode(struct inode *, const struct qstr *, umode_t);
extern void hfs_inode_write_fork(struct inode *, struct hfs_extent *, __be32 *, __be32 *);
extern int hfs_write_inode(struct inode *, struct writeback_control *);
-extern int hfs_inode_setattr(struct dentry *, struct iattr *);
+extern int hfs_inode_setattr(struct user_namespace *, struct dentry *,
+ struct iattr *);
extern void hfs_inode_read_fork(struct inode *inode, struct hfs_extent *ext,
__be32 log_size, __be32 phys_size, u32 clump_size);
extern struct inode *hfs_iget(struct super_block *, struct hfs_cat_key *, hfs_cat_rec *);
diff --git a/fs/hfs/inode.c b/fs/hfs/inode.c
index f35a37c65e5f..3fc5cb346586 100644
--- a/fs/hfs/inode.c
+++ b/fs/hfs/inode.c
@@ -602,13 +602,15 @@ static int hfs_file_release(struct inode *inode, struct file *file)
* correspond to the same HFS file.
*/
-int hfs_inode_setattr(struct dentry *dentry, struct iattr * attr)
+int hfs_inode_setattr(struct user_namespace *mnt_userns, struct dentry *dentry,
+ struct iattr *attr)
{
struct inode *inode = d_inode(dentry);
struct hfs_sb_info *hsb = HFS_SB(inode->i_sb);
int error;
- error = setattr_prepare(dentry, attr); /* basic permission checks */
+ error = setattr_prepare(&init_user_ns, dentry,
+ attr); /* basic permission checks */
if (error)
return error;
@@ -647,7 +649,7 @@ int hfs_inode_setattr(struct dentry *dentry, struct iattr * attr)
current_time(inode);
}
- setattr_copy(inode, attr);
+ setattr_copy(&init_user_ns, inode, attr);
mark_inode_dirty(inode);
return 0;
}
diff --git a/fs/hfsplus/dir.c b/fs/hfsplus/dir.c
index 29a9dcfbe81f..03e6c046faf4 100644
--- a/fs/hfsplus/dir.c
+++ b/fs/hfsplus/dir.c
@@ -434,8 +434,8 @@ out:
return res;
}
-static int hfsplus_symlink(struct inode *dir, struct dentry *dentry,
- const char *symname)
+static int hfsplus_symlink(struct user_namespace *mnt_userns, struct inode *dir,
+ struct dentry *dentry, const char *symname)
{
struct hfsplus_sb_info *sbi = HFSPLUS_SB(dir->i_sb);
struct inode *inode;
@@ -476,8 +476,8 @@ out:
return res;
}
-static int hfsplus_mknod(struct inode *dir, struct dentry *dentry,
- umode_t mode, dev_t rdev)
+static int hfsplus_mknod(struct user_namespace *mnt_userns, struct inode *dir,
+ struct dentry *dentry, umode_t mode, dev_t rdev)
{
struct hfsplus_sb_info *sbi = HFSPLUS_SB(dir->i_sb);
struct inode *inode;
@@ -517,18 +517,20 @@ out:
return res;
}
-static int hfsplus_create(struct inode *dir, struct dentry *dentry, umode_t mode,
- bool excl)
+static int hfsplus_create(struct user_namespace *mnt_userns, struct inode *dir,
+ struct dentry *dentry, umode_t mode, bool excl)
{
- return hfsplus_mknod(dir, dentry, mode, 0);
+ return hfsplus_mknod(&init_user_ns, dir, dentry, mode, 0);
}
-static int hfsplus_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode)
+static int hfsplus_mkdir(struct user_namespace *mnt_userns, struct inode *dir,
+ struct dentry *dentry, umode_t mode)
{
- return hfsplus_mknod(dir, dentry, mode | S_IFDIR, 0);
+ return hfsplus_mknod(&init_user_ns, dir, dentry, mode | S_IFDIR, 0);
}
-static int hfsplus_rename(struct inode *old_dir, struct dentry *old_dentry,
+static int hfsplus_rename(struct user_namespace *mnt_userns,
+ struct inode *old_dir, struct dentry *old_dentry,
struct inode *new_dir, struct dentry *new_dentry,
unsigned int flags)
{
diff --git a/fs/hfsplus/hfsplus_fs.h b/fs/hfsplus/hfsplus_fs.h
index a92de5199ec3..12b20479ed2b 100644
--- a/fs/hfsplus/hfsplus_fs.h
+++ b/fs/hfsplus/hfsplus_fs.h
@@ -488,8 +488,9 @@ void hfsplus_inode_write_fork(struct inode *inode,
struct hfsplus_fork_raw *fork);
int hfsplus_cat_read_inode(struct inode *inode, struct hfs_find_data *fd);
int hfsplus_cat_write_inode(struct inode *inode);
-int hfsplus_getattr(const struct path *path, struct kstat *stat,
- u32 request_mask, unsigned int query_flags);
+int hfsplus_getattr(struct user_namespace *mnt_userns, const struct path *path,
+ struct kstat *stat, u32 request_mask,
+ unsigned int query_flags);
int hfsplus_file_fsync(struct file *file, loff_t start, loff_t end,
int datasync);
diff --git a/fs/hfsplus/inode.c b/fs/hfsplus/inode.c
index ca464328b79c..078c5c8a5156 100644
--- a/fs/hfsplus/inode.c
+++ b/fs/hfsplus/inode.c
@@ -241,12 +241,13 @@ static int hfsplus_file_release(struct inode *inode, struct file *file)
return 0;
}
-static int hfsplus_setattr(struct dentry *dentry, struct iattr *attr)
+static int hfsplus_setattr(struct user_namespace *mnt_userns,
+ struct dentry *dentry, struct iattr *attr)
{
struct inode *inode = d_inode(dentry);
int error;
- error = setattr_prepare(dentry, attr);
+ error = setattr_prepare(&init_user_ns, dentry, attr);
if (error)
return error;
@@ -264,14 +265,15 @@ static int hfsplus_setattr(struct dentry *dentry, struct iattr *attr)
inode->i_mtime = inode->i_ctime = current_time(inode);
}
- setattr_copy(inode, attr);
+ setattr_copy(&init_user_ns, inode, attr);
mark_inode_dirty(inode);
return 0;
}
-int hfsplus_getattr(const struct path *path, struct kstat *stat,
- u32 request_mask, unsigned int query_flags)
+int hfsplus_getattr(struct user_namespace *mnt_userns, const struct path *path,
+ struct kstat *stat, u32 request_mask,
+ unsigned int query_flags)
{
struct inode *inode = d_inode(path->dentry);
struct hfsplus_inode_info *hip = HFSPLUS_I(inode);
@@ -286,7 +288,7 @@ int hfsplus_getattr(const struct path *path, struct kstat *stat,
stat->attributes_mask |= STATX_ATTR_APPEND | STATX_ATTR_IMMUTABLE |
STATX_ATTR_NODUMP;
- generic_fillattr(inode, stat);
+ generic_fillattr(&init_user_ns, inode, stat);
return 0;
}
@@ -376,7 +378,7 @@ struct inode *hfsplus_new_inode(struct super_block *sb, struct inode *dir,
return NULL;
inode->i_ino = sbi->next_cnid++;
- inode_init_owner(inode, dir, mode);
+ inode_init_owner(&init_user_ns, inode, dir, mode);
set_nlink(inode, 1);
inode->i_mtime = inode->i_atime = inode->i_ctime = current_time(inode);
diff --git a/fs/hfsplus/ioctl.c b/fs/hfsplus/ioctl.c
index ce15b9496b77..3edb1926d127 100644
--- a/fs/hfsplus/ioctl.c
+++ b/fs/hfsplus/ioctl.c
@@ -91,7 +91,7 @@ static int hfsplus_ioctl_setflags(struct file *file, int __user *user_flags)
if (err)
goto out;
- if (!inode_owner_or_capable(inode)) {
+ if (!inode_owner_or_capable(&init_user_ns, inode)) {
err = -EACCES;
goto out_drop_write;
}
diff --git a/fs/hfsplus/xattr.c b/fs/hfsplus/xattr.c
index bb0b27d88e50..4d169c5a2673 100644
--- a/fs/hfsplus/xattr.c
+++ b/fs/hfsplus/xattr.c
@@ -858,6 +858,7 @@ static int hfsplus_osx_getxattr(const struct xattr_handler *handler,
}
static int hfsplus_osx_setxattr(const struct xattr_handler *handler,
+ struct user_namespace *mnt_userns,
struct dentry *unused, struct inode *inode,
const char *name, const void *buffer,
size_t size, int flags)
diff --git a/fs/hfsplus/xattr_security.c b/fs/hfsplus/xattr_security.c
index cfbe6a3bfb1e..c1c7a16cbf21 100644
--- a/fs/hfsplus/xattr_security.c
+++ b/fs/hfsplus/xattr_security.c
@@ -23,6 +23,7 @@ static int hfsplus_security_getxattr(const struct xattr_handler *handler,
}
static int hfsplus_security_setxattr(const struct xattr_handler *handler,
+ struct user_namespace *mnt_userns,
struct dentry *unused, struct inode *inode,
const char *name, const void *buffer,
size_t size, int flags)
diff --git a/fs/hfsplus/xattr_trusted.c b/fs/hfsplus/xattr_trusted.c
index fbad91e1dada..e150372ec564 100644
--- a/fs/hfsplus/xattr_trusted.c
+++ b/fs/hfsplus/xattr_trusted.c
@@ -22,6 +22,7 @@ static int hfsplus_trusted_getxattr(const struct xattr_handler *handler,
}
static int hfsplus_trusted_setxattr(const struct xattr_handler *handler,
+ struct user_namespace *mnt_userns,
struct dentry *unused, struct inode *inode,
const char *name, const void *buffer,
size_t size, int flags)
diff --git a/fs/hfsplus/xattr_user.c b/fs/hfsplus/xattr_user.c
index 74d19faf255e..a6b60b153916 100644
--- a/fs/hfsplus/xattr_user.c
+++ b/fs/hfsplus/xattr_user.c
@@ -22,6 +22,7 @@ static int hfsplus_user_getxattr(const struct xattr_handler *handler,
}
static int hfsplus_user_setxattr(const struct xattr_handler *handler,
+ struct user_namespace *mnt_userns,
struct dentry *unused, struct inode *inode,
const char *name, const void *buffer,
size_t size, int flags)
diff --git a/fs/hostfs/hostfs_kern.c b/fs/hostfs/hostfs_kern.c
index 4a5beca6eaa8..29e407762626 100644
--- a/fs/hostfs/hostfs_kern.c
+++ b/fs/hostfs/hostfs_kern.c
@@ -557,8 +557,8 @@ static int read_name(struct inode *ino, char *name)
return 0;
}
-static int hostfs_create(struct inode *dir, struct dentry *dentry, umode_t mode,
- bool excl)
+static int hostfs_create(struct user_namespace *mnt_userns, struct inode *dir,
+ struct dentry *dentry, umode_t mode, bool excl)
{
struct inode *inode;
char *name;
@@ -656,8 +656,8 @@ static int hostfs_unlink(struct inode *ino, struct dentry *dentry)
return err;
}
-static int hostfs_symlink(struct inode *ino, struct dentry *dentry,
- const char *to)
+static int hostfs_symlink(struct user_namespace *mnt_userns, struct inode *ino,
+ struct dentry *dentry, const char *to)
{
char *file;
int err;
@@ -669,7 +669,8 @@ static int hostfs_symlink(struct inode *ino, struct dentry *dentry,
return err;
}
-static int hostfs_mkdir(struct inode *ino, struct dentry *dentry, umode_t mode)
+static int hostfs_mkdir(struct user_namespace *mnt_userns, struct inode *ino,
+ struct dentry *dentry, umode_t mode)
{
char *file;
int err;
@@ -693,7 +694,8 @@ static int hostfs_rmdir(struct inode *ino, struct dentry *dentry)
return err;
}
-static int hostfs_mknod(struct inode *dir, struct dentry *dentry, umode_t mode, dev_t dev)
+static int hostfs_mknod(struct user_namespace *mnt_userns, struct inode *dir,
+ struct dentry *dentry, umode_t mode, dev_t dev)
{
struct inode *inode;
char *name;
@@ -731,7 +733,8 @@ static int hostfs_mknod(struct inode *dir, struct dentry *dentry, umode_t mode,
return err;
}
-static int hostfs_rename2(struct inode *old_dir, struct dentry *old_dentry,
+static int hostfs_rename2(struct user_namespace *mnt_userns,
+ struct inode *old_dir, struct dentry *old_dentry,
struct inode *new_dir, struct dentry *new_dentry,
unsigned int flags)
{
@@ -759,7 +762,8 @@ static int hostfs_rename2(struct inode *old_dir, struct dentry *old_dentry,
return err;
}
-static int hostfs_permission(struct inode *ino, int desired)
+static int hostfs_permission(struct user_namespace *mnt_userns,
+ struct inode *ino, int desired)
{
char *name;
int r = 0, w = 0, x = 0, err;
@@ -781,11 +785,12 @@ static int hostfs_permission(struct inode *ino, int desired)
err = access_file(name, r, w, x);
__putname(name);
if (!err)
- err = generic_permission(ino, desired);
+ err = generic_permission(&init_user_ns, ino, desired);
return err;
}
-static int hostfs_setattr(struct dentry *dentry, struct iattr *attr)
+static int hostfs_setattr(struct user_namespace *mnt_userns,
+ struct dentry *dentry, struct iattr *attr)
{
struct inode *inode = d_inode(dentry);
struct hostfs_iattr attrs;
@@ -794,7 +799,7 @@ static int hostfs_setattr(struct dentry *dentry, struct iattr *attr)
int fd = HOSTFS_I(inode)->fd;
- err = setattr_prepare(dentry, attr);
+ err = setattr_prepare(&init_user_ns, dentry, attr);
if (err)
return err;
@@ -851,7 +856,7 @@ static int hostfs_setattr(struct dentry *dentry, struct iattr *attr)
attr->ia_size != i_size_read(inode))
truncate_setsize(inode, attr->ia_size);
- setattr_copy(inode, attr);
+ setattr_copy(&init_user_ns, inode, attr);
mark_inode_dirty(inode);
return 0;
}
diff --git a/fs/hpfs/hpfs_fn.h b/fs/hpfs/hpfs_fn.h
index 1cca83218fb5..167ec6884642 100644
--- a/fs/hpfs/hpfs_fn.h
+++ b/fs/hpfs/hpfs_fn.h
@@ -280,7 +280,7 @@ void hpfs_init_inode(struct inode *);
void hpfs_read_inode(struct inode *);
void hpfs_write_inode(struct inode *);
void hpfs_write_inode_nolock(struct inode *);
-int hpfs_setattr(struct dentry *, struct iattr *);
+int hpfs_setattr(struct user_namespace *, struct dentry *, struct iattr *);
void hpfs_write_if_changed(struct inode *);
void hpfs_evict_inode(struct inode *);
diff --git a/fs/hpfs/inode.c b/fs/hpfs/inode.c
index eb8b4baf0f2e..82208cc28ebd 100644
--- a/fs/hpfs/inode.c
+++ b/fs/hpfs/inode.c
@@ -257,7 +257,8 @@ void hpfs_write_inode_nolock(struct inode *i)
brelse(bh);
}
-int hpfs_setattr(struct dentry *dentry, struct iattr *attr)
+int hpfs_setattr(struct user_namespace *mnt_userns, struct dentry *dentry,
+ struct iattr *attr)
{
struct inode *inode = d_inode(dentry);
int error = -EINVAL;
@@ -274,7 +275,7 @@ int hpfs_setattr(struct dentry *dentry, struct iattr *attr)
if ((attr->ia_valid & ATTR_SIZE) && attr->ia_size > inode->i_size)
goto out_unlock;
- error = setattr_prepare(dentry, attr);
+ error = setattr_prepare(&init_user_ns, dentry, attr);
if (error)
goto out_unlock;
@@ -288,7 +289,7 @@ int hpfs_setattr(struct dentry *dentry, struct iattr *attr)
hpfs_truncate(inode);
}
- setattr_copy(inode, attr);
+ setattr_copy(&init_user_ns, inode, attr);
hpfs_write_inode(inode);
diff --git a/fs/hpfs/namei.c b/fs/hpfs/namei.c
index 1aee39160ac5..d73f8a67168e 100644
--- a/fs/hpfs/namei.c
+++ b/fs/hpfs/namei.c
@@ -20,7 +20,8 @@ static void hpfs_update_directory_times(struct inode *dir)
hpfs_write_inode_nolock(dir);
}
-static int hpfs_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode)
+static int hpfs_mkdir(struct user_namespace *mnt_userns, struct inode *dir,
+ struct dentry *dentry, umode_t mode)
{
const unsigned char *name = dentry->d_name.name;
unsigned len = dentry->d_name.len;
@@ -128,7 +129,8 @@ bail:
return err;
}
-static int hpfs_create(struct inode *dir, struct dentry *dentry, umode_t mode, bool excl)
+static int hpfs_create(struct user_namespace *mnt_userns, struct inode *dir,
+ struct dentry *dentry, umode_t mode, bool excl)
{
const unsigned char *name = dentry->d_name.name;
unsigned len = dentry->d_name.len;
@@ -215,7 +217,8 @@ bail:
return err;
}
-static int hpfs_mknod(struct inode *dir, struct dentry *dentry, umode_t mode, dev_t rdev)
+static int hpfs_mknod(struct user_namespace *mnt_userns, struct inode *dir,
+ struct dentry *dentry, umode_t mode, dev_t rdev)
{
const unsigned char *name = dentry->d_name.name;
unsigned len = dentry->d_name.len;
@@ -289,7 +292,8 @@ bail:
return err;
}
-static int hpfs_symlink(struct inode *dir, struct dentry *dentry, const char *symlink)
+static int hpfs_symlink(struct user_namespace *mnt_userns, struct inode *dir,
+ struct dentry *dentry, const char *symlink)
{
const unsigned char *name = dentry->d_name.name;
unsigned len = dentry->d_name.len;
@@ -506,10 +510,10 @@ fail:
const struct address_space_operations hpfs_symlink_aops = {
.readpage = hpfs_symlink_readpage
};
-
-static int hpfs_rename(struct inode *old_dir, struct dentry *old_dentry,
- struct inode *new_dir, struct dentry *new_dentry,
- unsigned int flags)
+
+static int hpfs_rename(struct user_namespace *mnt_userns, struct inode *old_dir,
+ struct dentry *old_dentry, struct inode *new_dir,
+ struct dentry *new_dentry, unsigned int flags)
{
const unsigned char *old_name = old_dentry->d_name.name;
unsigned old_len = old_dentry->d_name.len;
diff --git a/fs/hugetlbfs/inode.c b/fs/hugetlbfs/inode.c
index 21c20fd5f9ee..701c82c36138 100644
--- a/fs/hugetlbfs/inode.c
+++ b/fs/hugetlbfs/inode.c
@@ -171,7 +171,7 @@ static int hugetlbfs_file_mmap(struct file *file, struct vm_area_struct *vma)
file_accessed(file);
ret = -ENOMEM;
- if (hugetlb_reserve_pages(inode,
+ if (!hugetlb_reserve_pages(inode,
vma->vm_pgoff >> huge_page_order(h),
len >> huge_page_shift(h), vma,
vma->vm_flags))
@@ -310,7 +310,7 @@ hugetlbfs_read_actor(struct page *page, unsigned long offset,
/*
* Support for read() - Find the page attached to f_mapping and copy out the
- * data. Its *very* similar to do_generic_mapping_read(), we can't use that
+ * data. Its *very* similar to generic_file_buffered_read(), we can't use that
* since it has PAGE_SIZE assumptions.
*/
static ssize_t hugetlbfs_read_iter(struct kiocb *iocb, struct iov_iter *to)
@@ -442,15 +442,15 @@ hugetlb_vmdelete_list(struct rb_root_cached *root, pgoff_t start, pgoff_t end)
*
* truncation is indicated by end of range being LLONG_MAX
* In this case, we first scan the range and release found pages.
- * After releasing pages, hugetlb_unreserve_pages cleans up region/reserv
+ * After releasing pages, hugetlb_unreserve_pages cleans up region/reserve
* maps and global counts. Page faults can not race with truncation
* in this routine. hugetlb_no_page() holds i_mmap_rwsem and prevents
* page faults in the truncated range by checking i_size. i_size is
* modified while holding i_mmap_rwsem.
* hole punch is indicated if end is not LLONG_MAX
* In the hole punch case we scan the range and release found pages.
- * Only when releasing a page is the associated region/reserv map
- * deleted. The region/reserv map for ranges without associated
+ * Only when releasing a page is the associated region/reserve map
+ * deleted. The region/reserve map for ranges without associated
* pages are not modified. Page faults can race with hole punch.
* This is indicated if we find a mapped page.
* Note: If the passed end of range value is beyond the end of file, but
@@ -567,7 +567,7 @@ static void hugetlbfs_evict_inode(struct inode *inode)
clear_inode(inode);
}
-static int hugetlb_vmtruncate(struct inode *inode, loff_t offset)
+static void hugetlb_vmtruncate(struct inode *inode, loff_t offset)
{
pgoff_t pgoff;
struct address_space *mapping = inode->i_mapping;
@@ -582,7 +582,6 @@ static int hugetlb_vmtruncate(struct inode *inode, loff_t offset)
hugetlb_vmdelete_list(&mapping->i_mmap, pgoff, 0);
i_mmap_unlock_write(mapping);
remove_inode_hugepages(inode, offset, LLONG_MAX);
- return 0;
}
static long hugetlbfs_punch_hole(struct inode *inode, loff_t offset, loff_t len)
@@ -604,7 +603,7 @@ static long hugetlbfs_punch_hole(struct inode *inode, loff_t offset, loff_t len)
inode_lock(inode);
- /* protected by i_mutex */
+ /* protected by i_rwsem */
if (info->seals & (F_SEAL_WRITE | F_SEAL_FUTURE_WRITE)) {
inode_unlock(inode);
return -EPERM;
@@ -680,7 +679,6 @@ static long hugetlbfs_fallocate(struct file *file, int mode, loff_t offset,
*/
struct page *page;
unsigned long addr;
- int avoid_reserve = 0;
cond_resched();
@@ -716,8 +714,15 @@ static long hugetlbfs_fallocate(struct file *file, int mode, loff_t offset,
continue;
}
- /* Allocate page and add to page cache */
- page = alloc_huge_page(&pseudo_vma, addr, avoid_reserve);
+ /*
+ * Allocate page without setting the avoid_reserve argument.
+ * There certainly are no reserves associated with the
+ * pseudo_vma. However, there could be shared mappings with
+ * reserves for the file at the inode level. If we fallocate
+ * pages in these areas, we need to consume the reserves
+ * to keep reservation accounting consistent.
+ */
+ page = alloc_huge_page(&pseudo_vma, addr, 0);
hugetlb_drop_vma_policy(&pseudo_vma);
if (IS_ERR(page)) {
mutex_unlock(&hugetlb_fault_mutex_table[hash]);
@@ -735,7 +740,7 @@ static long hugetlbfs_fallocate(struct file *file, int mode, loff_t offset,
mutex_unlock(&hugetlb_fault_mutex_table[hash]);
- set_page_huge_active(page);
+ SetHPageMigratable(page);
/*
* unlock_page because locked by add_to_page_cache()
* put_page() due to reference from alloc_huge_page()
@@ -752,7 +757,8 @@ out:
return error;
}
-static int hugetlbfs_setattr(struct dentry *dentry, struct iattr *attr)
+static int hugetlbfs_setattr(struct user_namespace *mnt_userns,
+ struct dentry *dentry, struct iattr *attr)
{
struct inode *inode = d_inode(dentry);
struct hstate *h = hstate_inode(inode);
@@ -760,9 +766,7 @@ static int hugetlbfs_setattr(struct dentry *dentry, struct iattr *attr)
unsigned int ia_valid = attr->ia_valid;
struct hugetlbfs_inode_info *info = HUGETLBFS_I(inode);
- BUG_ON(!inode);
-
- error = setattr_prepare(dentry, attr);
+ error = setattr_prepare(&init_user_ns, dentry, attr);
if (error)
return error;
@@ -772,16 +776,14 @@ static int hugetlbfs_setattr(struct dentry *dentry, struct iattr *attr)
if (newsize & ~huge_page_mask(h))
return -EINVAL;
- /* protected by i_mutex */
+ /* protected by i_rwsem */
if ((newsize < oldsize && (info->seals & F_SEAL_SHRINK)) ||
(newsize > oldsize && (info->seals & F_SEAL_GROW)))
return -EPERM;
- error = hugetlb_vmtruncate(inode, newsize);
- if (error)
- return error;
+ hugetlb_vmtruncate(inode, newsize);
}
- setattr_copy(inode, attr);
+ setattr_copy(&init_user_ns, inode, attr);
mark_inode_dirty(inode);
return 0;
}
@@ -837,7 +839,7 @@ static struct inode *hugetlbfs_get_inode(struct super_block *sb,
struct hugetlbfs_inode_info *info = HUGETLBFS_I(inode);
inode->i_ino = get_next_ino();
- inode_init_owner(inode, dir, mode);
+ inode_init_owner(&init_user_ns, inode, dir, mode);
lockdep_set_class(&inode->i_mapping->i_mmap_rwsem,
&hugetlbfs_i_mmap_rwsem_key);
inode->i_mapping->a_ops = &hugetlbfs_aops;
@@ -899,33 +901,39 @@ static int do_hugetlbfs_mknod(struct inode *dir,
return error;
}
-static int hugetlbfs_mknod(struct inode *dir,
- struct dentry *dentry, umode_t mode, dev_t dev)
+static int hugetlbfs_mknod(struct user_namespace *mnt_userns, struct inode *dir,
+ struct dentry *dentry, umode_t mode, dev_t dev)
{
return do_hugetlbfs_mknod(dir, dentry, mode, dev, false);
}
-static int hugetlbfs_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode)
+static int hugetlbfs_mkdir(struct user_namespace *mnt_userns, struct inode *dir,
+ struct dentry *dentry, umode_t mode)
{
- int retval = hugetlbfs_mknod(dir, dentry, mode | S_IFDIR, 0);
+ int retval = hugetlbfs_mknod(&init_user_ns, dir, dentry,
+ mode | S_IFDIR, 0);
if (!retval)
inc_nlink(dir);
return retval;
}
-static int hugetlbfs_create(struct inode *dir, struct dentry *dentry, umode_t mode, bool excl)
+static int hugetlbfs_create(struct user_namespace *mnt_userns,
+ struct inode *dir, struct dentry *dentry,
+ umode_t mode, bool excl)
{
- return hugetlbfs_mknod(dir, dentry, mode | S_IFREG, 0);
+ return hugetlbfs_mknod(&init_user_ns, dir, dentry, mode | S_IFREG, 0);
}
-static int hugetlbfs_tmpfile(struct inode *dir,
- struct dentry *dentry, umode_t mode)
+static int hugetlbfs_tmpfile(struct user_namespace *mnt_userns,
+ struct inode *dir, struct dentry *dentry,
+ umode_t mode)
{
return do_hugetlbfs_mknod(dir, dentry, mode | S_IFREG, 0, true);
}
-static int hugetlbfs_symlink(struct inode *dir,
- struct dentry *dentry, const char *symname)
+static int hugetlbfs_symlink(struct user_namespace *mnt_userns,
+ struct inode *dir, struct dentry *dentry,
+ const char *symname)
{
struct inode *inode;
int error = -ENOSPC;
@@ -945,17 +953,6 @@ static int hugetlbfs_symlink(struct inode *dir,
return error;
}
-/*
- * mark the head page dirty
- */
-static int hugetlbfs_set_page_dirty(struct page *page)
-{
- struct page *head = compound_head(page);
-
- SetPageDirty(head);
- return 0;
-}
-
static int hugetlbfs_migrate_page(struct address_space *mapping,
struct page *newpage, struct page *page,
enum migrate_mode mode)
@@ -966,15 +963,9 @@ static int hugetlbfs_migrate_page(struct address_space *mapping,
if (rc != MIGRATEPAGE_SUCCESS)
return rc;
- /*
- * page_private is subpool pointer in hugetlb pages. Transfer to
- * new page. PagePrivate is not associated with page_private for
- * hugetlb pages and can not be set here as only page_huge_active
- * pages can be migrated.
- */
- if (page_private(page)) {
- set_page_private(newpage, page_private(page));
- set_page_private(page, 0);
+ if (hugetlb_page_subpool(page)) {
+ hugetlb_set_page_subpool(newpage, hugetlb_page_subpool(page));
+ hugetlb_set_page_subpool(page, NULL);
}
if (mode != MIGRATE_SYNC_NO_COPY)
@@ -1149,7 +1140,7 @@ static void hugetlbfs_destroy_inode(struct inode *inode)
static const struct address_space_operations hugetlbfs_aops = {
.write_begin = hugetlbfs_write_begin,
.write_end = hugetlbfs_write_end,
- .set_page_dirty = hugetlbfs_set_page_dirty,
+ .set_page_dirty = __set_page_dirty_no_writeback,
.migratepage = hugetlbfs_migrate_page,
.error_remove_page = hugetlbfs_error_remove_page,
};
@@ -1349,7 +1340,7 @@ hugetlbfs_fill_super(struct super_block *sb, struct fs_context *fc)
/*
* Allocate and initialize subpool if maximum or minimum size is
- * specified. Any needed reservations (for minimim size) are taken
+ * specified. Any needed reservations (for minimum size) are taken
* taken when the subpool is created.
*/
if (ctx->max_hpages != -1 || ctx->min_hpages != -1) {
@@ -1492,7 +1483,7 @@ struct file *hugetlb_file_setup(const char *name, size_t size,
inode->i_size = size;
clear_nlink(inode);
- if (hugetlb_reserve_pages(inode, 0,
+ if (!hugetlb_reserve_pages(inode, 0,
size >> huge_page_shift(hstate_inode(inode)), NULL,
acctflag))
file = ERR_PTR(-ENOMEM);
@@ -1526,8 +1517,8 @@ static struct vfsmount *__init mount_one_hugetlbfs(struct hstate *h)
put_fs_context(fc);
}
if (IS_ERR(mnt))
- pr_err("Cannot mount internal hugetlbfs for page size %uK",
- 1U << (h->order + PAGE_SHIFT - 10));
+ pr_err("Cannot mount internal hugetlbfs for page size %luK",
+ huge_page_size(h) >> 10);
return mnt;
}
@@ -1555,7 +1546,7 @@ static int __init init_hugetlbfs_fs(void)
goto out_free;
/* default hstate mount is required */
- mnt = mount_one_hugetlbfs(&hstates[default_hstate_idx]);
+ mnt = mount_one_hugetlbfs(&default_hstate);
if (IS_ERR(mnt)) {
error = PTR_ERR(mnt);
goto out_unreg;
diff --git a/fs/init.c b/fs/init.c
index e9c320a48cf1..5c36adaa9b44 100644
--- a/fs/init.c
+++ b/fs/init.c
@@ -49,7 +49,7 @@ int __init init_chdir(const char *filename)
error = kern_path(filename, LOOKUP_FOLLOW | LOOKUP_DIRECTORY, &path);
if (error)
return error;
- error = inode_permission(path.dentry->d_inode, MAY_EXEC | MAY_CHDIR);
+ error = path_permission(&path, MAY_EXEC | MAY_CHDIR);
if (!error)
set_fs_pwd(current->fs, &path);
path_put(&path);
@@ -64,7 +64,7 @@ int __init init_chroot(const char *filename)
error = kern_path(filename, LOOKUP_FOLLOW | LOOKUP_DIRECTORY, &path);
if (error)
return error;
- error = inode_permission(path.dentry->d_inode, MAY_EXEC | MAY_CHDIR);
+ error = path_permission(&path, MAY_EXEC | MAY_CHDIR);
if (error)
goto dput_and_out;
error = -EPERM;
@@ -118,7 +118,7 @@ int __init init_eaccess(const char *filename)
error = kern_path(filename, LOOKUP_FOLLOW, &path);
if (error)
return error;
- error = inode_permission(d_inode(path.dentry), MAY_ACCESS);
+ error = path_permission(&path, MAY_ACCESS);
path_put(&path);
return error;
}
@@ -157,8 +157,8 @@ int __init init_mknod(const char *filename, umode_t mode, unsigned int dev)
mode &= ~current_umask();
error = security_path_mknod(&path, dentry, mode, dev);
if (!error)
- error = vfs_mknod(path.dentry->d_inode, dentry, mode,
- new_decode_dev(dev));
+ error = vfs_mknod(mnt_user_ns(path.mnt), path.dentry->d_inode,
+ dentry, mode, new_decode_dev(dev));
done_path_create(&path, dentry);
return error;
}
@@ -167,6 +167,7 @@ int __init init_link(const char *oldname, const char *newname)
{
struct dentry *new_dentry;
struct path old_path, new_path;
+ struct user_namespace *mnt_userns;
int error;
error = kern_path(oldname, 0, &old_path);
@@ -181,14 +182,15 @@ int __init init_link(const char *oldname, const char *newname)
error = -EXDEV;
if (old_path.mnt != new_path.mnt)
goto out_dput;
- error = may_linkat(&old_path);
+ mnt_userns = mnt_user_ns(new_path.mnt);
+ error = may_linkat(mnt_userns, &old_path);
if (unlikely(error))
goto out_dput;
error = security_path_link(old_path.dentry, &new_path, new_dentry);
if (error)
goto out_dput;
- error = vfs_link(old_path.dentry, new_path.dentry->d_inode, new_dentry,
- NULL);
+ error = vfs_link(old_path.dentry, mnt_userns, new_path.dentry->d_inode,
+ new_dentry, NULL);
out_dput:
done_path_create(&new_path, new_dentry);
out:
@@ -207,7 +209,8 @@ int __init init_symlink(const char *oldname, const char *newname)
return PTR_ERR(dentry);
error = security_path_symlink(&path, dentry, oldname);
if (!error)
- error = vfs_symlink(path.dentry->d_inode, dentry, oldname);
+ error = vfs_symlink(mnt_user_ns(path.mnt), path.dentry->d_inode,
+ dentry, oldname);
done_path_create(&path, dentry);
return error;
}
@@ -230,7 +233,8 @@ int __init init_mkdir(const char *pathname, umode_t mode)
mode &= ~current_umask();
error = security_path_mkdir(&path, dentry, mode);
if (!error)
- error = vfs_mkdir(path.dentry->d_inode, dentry, mode);
+ error = vfs_mkdir(mnt_user_ns(path.mnt), path.dentry->d_inode,
+ dentry, mode);
done_path_create(&path, dentry);
return error;
}
diff --git a/fs/inode.c b/fs/inode.c
index 6442d97d9a4a..a047ab306f9a 100644
--- a/fs/inode.c
+++ b/fs/inode.c
@@ -142,6 +142,7 @@ int inode_init_always(struct super_block *sb, struct inode *inode)
atomic_set(&inode->i_count, 1);
inode->i_op = &empty_iops;
inode->i_fop = &no_open_fops;
+ inode->i_ino = 0;
inode->__i_nlink = 1;
inode->i_opflags = 0;
if (sb->s_xattr)
@@ -1493,7 +1494,7 @@ struct inode *find_inode_rcu(struct super_block *sb, unsigned long hashval,
EXPORT_SYMBOL(find_inode_rcu);
/**
- * find_inode_by_rcu - Find an inode in the inode cache
+ * find_inode_by_ino_rcu - Find an inode in the inode cache
* @sb: Super block of file system to search
* @ino: The inode number to match
*
@@ -1743,24 +1744,26 @@ static int relatime_need_update(struct vfsmount *mnt, struct inode *inode,
int generic_update_time(struct inode *inode, struct timespec64 *time, int flags)
{
- int iflags = I_DIRTY_TIME;
- bool dirty = false;
-
- if (flags & S_ATIME)
- inode->i_atime = *time;
- if (flags & S_VERSION)
- dirty = inode_maybe_inc_iversion(inode, false);
- if (flags & S_CTIME)
- inode->i_ctime = *time;
- if (flags & S_MTIME)
- inode->i_mtime = *time;
- if ((flags & (S_ATIME | S_CTIME | S_MTIME)) &&
- !(inode->i_sb->s_flags & SB_LAZYTIME))
- dirty = true;
-
- if (dirty)
- iflags |= I_DIRTY_SYNC;
- __mark_inode_dirty(inode, iflags);
+ int dirty_flags = 0;
+
+ if (flags & (S_ATIME | S_CTIME | S_MTIME)) {
+ if (flags & S_ATIME)
+ inode->i_atime = *time;
+ if (flags & S_CTIME)
+ inode->i_ctime = *time;
+ if (flags & S_MTIME)
+ inode->i_mtime = *time;
+
+ if (inode->i_sb->s_flags & SB_LAZYTIME)
+ dirty_flags |= I_DIRTY_TIME;
+ else
+ dirty_flags |= I_DIRTY_SYNC;
+ }
+
+ if ((flags & S_VERSION) && inode_maybe_inc_iversion(inode, false))
+ dirty_flags |= I_DIRTY_SYNC;
+
+ __mark_inode_dirty(inode, dirty_flags);
return 0;
}
EXPORT_SYMBOL(generic_update_time);
@@ -1777,7 +1780,7 @@ static int update_time(struct inode *inode, struct timespec64 *time, int flags)
}
/**
- * touch_atime - update the access time
+ * atime_needs_update - update the access time
* @path: the &struct path to update
* @inode: inode to update
*
@@ -1796,7 +1799,7 @@ bool atime_needs_update(const struct path *path, struct inode *inode)
/* Atime updates will likely cause i_uid and i_gid to be written
* back improprely if their true value is unknown to the vfs.
*/
- if (HAS_UNMAPPED_ID(inode))
+ if (HAS_UNMAPPED_ID(mnt_user_ns(mnt), inode))
return false;
if (IS_NOATIME(inode))
@@ -1903,7 +1906,8 @@ int dentry_needs_remove_privs(struct dentry *dentry)
return mask;
}
-static int __remove_privs(struct dentry *dentry, int kill)
+static int __remove_privs(struct user_namespace *mnt_userns,
+ struct dentry *dentry, int kill)
{
struct iattr newattrs;
@@ -1912,7 +1916,7 @@ static int __remove_privs(struct dentry *dentry, int kill)
* Note we call this on write, so notify_change will not
* encounter any conflicting delegations:
*/
- return notify_change(dentry, &newattrs, NULL);
+ return notify_change(mnt_userns, dentry, &newattrs, NULL);
}
/*
@@ -1939,7 +1943,7 @@ int file_remove_privs(struct file *file)
if (kill < 0)
return kill;
if (kill)
- error = __remove_privs(dentry, kill);
+ error = __remove_privs(file_mnt_user_ns(file), dentry, kill);
if (!error)
inode_has_no_xattr(inode);
@@ -2130,14 +2134,21 @@ EXPORT_SYMBOL(init_special_inode);
/**
* inode_init_owner - Init uid,gid,mode for new inode according to posix standards
+ * @mnt_userns: User namespace of the mount the inode was created from
* @inode: New inode
* @dir: Directory inode
* @mode: mode of the new inode
+ *
+ * If the inode has been created through an idmapped mount the user namespace of
+ * the vfsmount must be passed through @mnt_userns. This function will then take
+ * care to map the inode according to @mnt_userns before checking permissions
+ * and initializing i_uid and i_gid. On non-idmapped mounts or if permission
+ * checking is to be performed on the raw inode simply passs init_user_ns.
*/
-void inode_init_owner(struct inode *inode, const struct inode *dir,
- umode_t mode)
+void inode_init_owner(struct user_namespace *mnt_userns, struct inode *inode,
+ const struct inode *dir, umode_t mode)
{
- inode->i_uid = current_fsuid();
+ inode->i_uid = fsuid_into_mnt(mnt_userns);
if (dir && dir->i_mode & S_ISGID) {
inode->i_gid = dir->i_gid;
@@ -2145,31 +2156,41 @@ void inode_init_owner(struct inode *inode, const struct inode *dir,
if (S_ISDIR(mode))
mode |= S_ISGID;
else if ((mode & (S_ISGID | S_IXGRP)) == (S_ISGID | S_IXGRP) &&
- !in_group_p(inode->i_gid) &&
- !capable_wrt_inode_uidgid(dir, CAP_FSETID))
+ !in_group_p(i_gid_into_mnt(mnt_userns, dir)) &&
+ !capable_wrt_inode_uidgid(mnt_userns, dir, CAP_FSETID))
mode &= ~S_ISGID;
} else
- inode->i_gid = current_fsgid();
+ inode->i_gid = fsgid_into_mnt(mnt_userns);
inode->i_mode = mode;
}
EXPORT_SYMBOL(inode_init_owner);
/**
* inode_owner_or_capable - check current task permissions to inode
+ * @mnt_userns: user namespace of the mount the inode was found from
* @inode: inode being checked
*
* Return true if current either has CAP_FOWNER in a namespace with the
* inode owner uid mapped, or owns the file.
+ *
+ * If the inode has been found through an idmapped mount the user namespace of
+ * the vfsmount must be passed through @mnt_userns. This function will then take
+ * care to map the inode according to @mnt_userns before checking permissions.
+ * On non-idmapped mounts or if permission checking is to be performed on the
+ * raw inode simply passs init_user_ns.
*/
-bool inode_owner_or_capable(const struct inode *inode)
+bool inode_owner_or_capable(struct user_namespace *mnt_userns,
+ const struct inode *inode)
{
+ kuid_t i_uid;
struct user_namespace *ns;
- if (uid_eq(current_fsuid(), inode->i_uid))
+ i_uid = i_uid_into_mnt(mnt_userns, inode);
+ if (uid_eq(current_fsuid(), i_uid))
return true;
ns = current_user_ns();
- if (kuid_has_mapping(ns, inode->i_uid) && ns_capable(ns, CAP_FOWNER))
+ if (kuid_has_mapping(ns, i_uid) && ns_capable(ns, CAP_FOWNER))
return true;
return false;
}
diff --git a/fs/internal.h b/fs/internal.h
index 49bfb3750b22..6aeae7ef3380 100644
--- a/fs/internal.h
+++ b/fs/internal.h
@@ -74,7 +74,7 @@ extern int vfs_path_lookup(struct dentry *, struct vfsmount *,
const char *, unsigned int, struct path *);
long do_rmdir(int dfd, struct filename *name);
long do_unlinkat(int dfd, struct filename *name);
-int may_linkat(struct path *link);
+int may_linkat(struct user_namespace *mnt_userns, struct path *link);
int do_renameat2(int olddfd, struct filename *oldname, int newdfd,
struct filename *newname, unsigned int flags);
diff --git a/fs/io_uring.c b/fs/io_uring.c
index 4d79732d7d6b..4a088581b0f2 100644
--- a/fs/io_uring.c
+++ b/fs/io_uring.c
@@ -1064,21 +1064,6 @@ static inline void io_set_resource_node(struct io_kiocb *req)
}
}
-static bool io_refs_resurrect(struct percpu_ref *ref, struct completion *compl)
-{
- if (!percpu_ref_tryget(ref)) {
- /* already at zero, wait for ->release() */
- if (!try_wait_for_completion(compl))
- synchronize_rcu();
- return false;
- }
-
- percpu_ref_resurrect(ref);
- reinit_completion(compl);
- percpu_ref_put(ref);
- return true;
-}
-
static bool io_match_task(struct io_kiocb *head,
struct task_struct *task,
struct files_struct *files)
@@ -2498,6 +2483,13 @@ static bool io_rw_reissue(struct io_kiocb *req)
return false;
if ((req->flags & REQ_F_NOWAIT) || io_wq_current_is_worker())
return false;
+ /*
+ * If ref is dying, we might be running poll reap from the exit work.
+ * Don't attempt to reissue from that path, just let it fail with
+ * -EAGAIN.
+ */
+ if (percpu_ref_is_dying(&req->ctx->refs))
+ return false;
lockdep_assert_held(&req->ctx->uring_lock);
@@ -7026,11 +7018,13 @@ static int io_rsrc_ref_quiesce(struct fixed_rsrc_data *data,
flush_delayed_work(&ctx->rsrc_put_work);
ret = wait_for_completion_interruptible(&data->done);
- if (!ret || !io_refs_resurrect(&data->refs, &data->done))
+ if (!ret)
break;
+ percpu_ref_resurrect(&data->refs);
io_sqe_rsrc_set_node(ctx, data, backup_node);
backup_node = NULL;
+ reinit_completion(&data->done);
mutex_unlock(&ctx->uring_lock);
ret = io_run_task_work_sig();
mutex_lock(&ctx->uring_lock);
@@ -8393,19 +8387,23 @@ static void io_req_cache_free(struct list_head *list, struct task_struct *tsk)
static void io_req_caches_free(struct io_ring_ctx *ctx, struct task_struct *tsk)
{
struct io_submit_state *submit_state = &ctx->submit_state;
+ struct io_comp_state *cs = &ctx->submit_state.comp;
mutex_lock(&ctx->uring_lock);
- if (submit_state->free_reqs)
+ if (submit_state->free_reqs) {
kmem_cache_free_bulk(req_cachep, submit_state->free_reqs,
submit_state->reqs);
-
- io_req_cache_free(&submit_state->comp.free_list, NULL);
+ submit_state->free_reqs = 0;
+ }
spin_lock_irq(&ctx->completion_lock);
- io_req_cache_free(&submit_state->comp.locked_free_list, NULL);
+ list_splice_init(&cs->locked_free_list, &cs->free_list);
+ cs->locked_free_nr = 0;
spin_unlock_irq(&ctx->completion_lock);
+ io_req_cache_free(&cs->free_list, NULL);
+
mutex_unlock(&ctx->uring_lock);
}
@@ -9769,8 +9767,10 @@ static int __io_uring_register(struct io_ring_ctx *ctx, unsigned opcode,
mutex_lock(&ctx->uring_lock);
- if (ret && io_refs_resurrect(&ctx->refs, &ctx->ref_comp))
- return ret;
+ if (ret) {
+ percpu_ref_resurrect(&ctx->refs);
+ goto out_quiesce;
+ }
}
if (ctx->restricted) {
@@ -9862,6 +9862,7 @@ out:
if (io_register_op_must_quiesce(opcode)) {
/* bring the ctx back to life */
percpu_ref_reinit(&ctx->refs);
+out_quiesce:
reinit_completion(&ctx->ref_comp);
}
return ret;
diff --git a/fs/iomap/seek.c b/fs/iomap/seek.c
index 107ee80c3568..dab1b02eba5b 100644
--- a/fs/iomap/seek.c
+++ b/fs/iomap/seek.c
@@ -10,122 +10,17 @@
#include <linux/pagemap.h>
#include <linux/pagevec.h>
-/*
- * Seek for SEEK_DATA / SEEK_HOLE within @page, starting at @lastoff.
- * Returns true if found and updates @lastoff to the offset in file.
- */
-static bool
-page_seek_hole_data(struct inode *inode, struct page *page, loff_t *lastoff,
- int whence)
-{
- const struct address_space_operations *ops = inode->i_mapping->a_ops;
- unsigned int bsize = i_blocksize(inode), off;
- bool seek_data = whence == SEEK_DATA;
- loff_t poff = page_offset(page);
-
- if (WARN_ON_ONCE(*lastoff >= poff + PAGE_SIZE))
- return false;
-
- if (*lastoff < poff) {
- /*
- * Last offset smaller than the start of the page means we found
- * a hole:
- */
- if (whence == SEEK_HOLE)
- return true;
- *lastoff = poff;
- }
-
- /*
- * Just check the page unless we can and should check block ranges:
- */
- if (bsize == PAGE_SIZE || !ops->is_partially_uptodate)
- return PageUptodate(page) == seek_data;
-
- lock_page(page);
- if (unlikely(page->mapping != inode->i_mapping))
- goto out_unlock_not_found;
-
- for (off = 0; off < PAGE_SIZE; off += bsize) {
- if (offset_in_page(*lastoff) >= off + bsize)
- continue;
- if (ops->is_partially_uptodate(page, off, bsize) == seek_data) {
- unlock_page(page);
- return true;
- }
- *lastoff = poff + off + bsize;
- }
-
-out_unlock_not_found:
- unlock_page(page);
- return false;
-}
-
-/*
- * Seek for SEEK_DATA / SEEK_HOLE in the page cache.
- *
- * Within unwritten extents, the page cache determines which parts are holes
- * and which are data: uptodate buffer heads count as data; everything else
- * counts as a hole.
- *
- * Returns the resulting offset on successs, and -ENOENT otherwise.
- */
static loff_t
-page_cache_seek_hole_data(struct inode *inode, loff_t offset, loff_t length,
- int whence)
-{
- pgoff_t index = offset >> PAGE_SHIFT;
- pgoff_t end = DIV_ROUND_UP(offset + length, PAGE_SIZE);
- loff_t lastoff = offset;
- struct pagevec pvec;
-
- if (length <= 0)
- return -ENOENT;
-
- pagevec_init(&pvec);
-
- do {
- unsigned nr_pages, i;
-
- nr_pages = pagevec_lookup_range(&pvec, inode->i_mapping, &index,
- end - 1);
- if (nr_pages == 0)
- break;
-
- for (i = 0; i < nr_pages; i++) {
- struct page *page = pvec.pages[i];
-
- if (page_seek_hole_data(inode, page, &lastoff, whence))
- goto check_range;
- lastoff = page_offset(page) + PAGE_SIZE;
- }
- pagevec_release(&pvec);
- } while (index < end);
-
- /* When no page at lastoff and we are not done, we found a hole. */
- if (whence != SEEK_HOLE)
- goto not_found;
-
-check_range:
- if (lastoff < offset + length)
- goto out;
-not_found:
- lastoff = -ENOENT;
-out:
- pagevec_release(&pvec);
- return lastoff;
-}
-
-
-static loff_t
-iomap_seek_hole_actor(struct inode *inode, loff_t offset, loff_t length,
+iomap_seek_hole_actor(struct inode *inode, loff_t start, loff_t length,
void *data, struct iomap *iomap, struct iomap *srcmap)
{
+ loff_t offset = start;
+
switch (iomap->type) {
case IOMAP_UNWRITTEN:
- offset = page_cache_seek_hole_data(inode, offset, length,
- SEEK_HOLE);
- if (offset < 0)
+ offset = mapping_seek_hole_data(inode->i_mapping, start,
+ start + length, SEEK_HOLE);
+ if (offset == start + length)
return length;
fallthrough;
case IOMAP_HOLE:
@@ -164,15 +59,17 @@ iomap_seek_hole(struct inode *inode, loff_t offset, const struct iomap_ops *ops)
EXPORT_SYMBOL_GPL(iomap_seek_hole);
static loff_t
-iomap_seek_data_actor(struct inode *inode, loff_t offset, loff_t length,
+iomap_seek_data_actor(struct inode *inode, loff_t start, loff_t length,
void *data, struct iomap *iomap, struct iomap *srcmap)
{
+ loff_t offset = start;
+
switch (iomap->type) {
case IOMAP_HOLE:
return length;
case IOMAP_UNWRITTEN:
- offset = page_cache_seek_hole_data(inode, offset, length,
- SEEK_DATA);
+ offset = mapping_seek_hole_data(inode->i_mapping, start,
+ start + length, SEEK_DATA);
if (offset < 0)
return length;
fallthrough;
diff --git a/fs/isofs/dir.c b/fs/isofs/dir.c
index f0fe641893a5..b9e6a7ec78be 100644
--- a/fs/isofs/dir.c
+++ b/fs/isofs/dir.c
@@ -152,6 +152,7 @@ static int do_isofs_readdir(struct inode *inode, struct file *file,
printk(KERN_NOTICE "iso9660: Corrupted directory entry"
" in block %lu of inode %lu\n", block,
inode->i_ino);
+ brelse(bh);
return -EIO;
}
diff --git a/fs/isofs/inode.c b/fs/isofs/inode.c
index ec90773527ee..21edc423b79f 100644
--- a/fs/isofs/inode.c
+++ b/fs/isofs/inode.c
@@ -339,6 +339,7 @@ static int parse_options(char *options, struct iso9660_options *popt)
{
char *p;
int option;
+ unsigned int uv;
popt->map = 'n';
popt->rock = 1;
@@ -434,17 +435,17 @@ static int parse_options(char *options, struct iso9660_options *popt)
case Opt_ignore:
break;
case Opt_uid:
- if (match_int(&args[0], &option))
+ if (match_uint(&args[0], &uv))
return 0;
- popt->uid = make_kuid(current_user_ns(), option);
+ popt->uid = make_kuid(current_user_ns(), uv);
if (!uid_valid(popt->uid))
return 0;
popt->uid_set = 1;
break;
case Opt_gid:
- if (match_int(&args[0], &option))
+ if (match_uint(&args[0], &uv))
return 0;
- popt->gid = make_kgid(current_user_ns(), option);
+ popt->gid = make_kgid(current_user_ns(), uv);
if (!gid_valid(popt->gid))
return 0;
popt->gid_set = 1;
diff --git a/fs/isofs/namei.c b/fs/isofs/namei.c
index 402769881c32..58f80e1b3ac0 100644
--- a/fs/isofs/namei.c
+++ b/fs/isofs/namei.c
@@ -102,6 +102,7 @@ isofs_find_entry(struct inode *dir, struct dentry *dentry,
printk(KERN_NOTICE "iso9660: Corrupted directory entry"
" in block %lu of inode %lu\n", block,
dir->i_ino);
+ brelse(bh);
return 0;
}
diff --git a/fs/jffs2/acl.c b/fs/jffs2/acl.c
index 093ffbd82395..55a79df70d24 100644
--- a/fs/jffs2/acl.c
+++ b/fs/jffs2/acl.c
@@ -226,7 +226,8 @@ static int __jffs2_set_acl(struct inode *inode, int xprefix, struct posix_acl *a
return rc;
}
-int jffs2_set_acl(struct inode *inode, struct posix_acl *acl, int type)
+int jffs2_set_acl(struct user_namespace *mnt_userns, struct inode *inode,
+ struct posix_acl *acl, int type)
{
int rc, xprefix;
@@ -236,7 +237,8 @@ int jffs2_set_acl(struct inode *inode, struct posix_acl *acl, int type)
if (acl) {
umode_t mode;
- rc = posix_acl_update_mode(inode, &mode, &acl);
+ rc = posix_acl_update_mode(&init_user_ns, inode, &mode,
+ &acl);
if (rc)
return rc;
if (inode->i_mode != mode) {
diff --git a/fs/jffs2/acl.h b/fs/jffs2/acl.h
index 12d0271bdde3..62c50da9d493 100644
--- a/fs/jffs2/acl.h
+++ b/fs/jffs2/acl.h
@@ -28,7 +28,8 @@ struct jffs2_acl_header {
#ifdef CONFIG_JFFS2_FS_POSIX_ACL
struct posix_acl *jffs2_get_acl(struct inode *inode, int type);
-int jffs2_set_acl(struct inode *inode, struct posix_acl *acl, int type);
+int jffs2_set_acl(struct user_namespace *mnt_userns, struct inode *inode,
+ struct posix_acl *acl, int type);
extern int jffs2_init_acl_pre(struct inode *, struct inode *, umode_t *);
extern int jffs2_init_acl_post(struct inode *);
diff --git a/fs/jffs2/dir.c b/fs/jffs2/dir.c
index 776493713153..c0aabbcbfd58 100644
--- a/fs/jffs2/dir.c
+++ b/fs/jffs2/dir.c
@@ -24,18 +24,21 @@
static int jffs2_readdir (struct file *, struct dir_context *);
-static int jffs2_create (struct inode *,struct dentry *,umode_t,
- bool);
+static int jffs2_create (struct user_namespace *, struct inode *,
+ struct dentry *, umode_t, bool);
static struct dentry *jffs2_lookup (struct inode *,struct dentry *,
unsigned int);
static int jffs2_link (struct dentry *,struct inode *,struct dentry *);
static int jffs2_unlink (struct inode *,struct dentry *);
-static int jffs2_symlink (struct inode *,struct dentry *,const char *);
-static int jffs2_mkdir (struct inode *,struct dentry *,umode_t);
+static int jffs2_symlink (struct user_namespace *, struct inode *,
+ struct dentry *, const char *);
+static int jffs2_mkdir (struct user_namespace *, struct inode *,struct dentry *,
+ umode_t);
static int jffs2_rmdir (struct inode *,struct dentry *);
-static int jffs2_mknod (struct inode *,struct dentry *,umode_t,dev_t);
-static int jffs2_rename (struct inode *, struct dentry *,
- struct inode *, struct dentry *,
+static int jffs2_mknod (struct user_namespace *, struct inode *,struct dentry *,
+ umode_t,dev_t);
+static int jffs2_rename (struct user_namespace *, struct inode *,
+ struct dentry *, struct inode *, struct dentry *,
unsigned int);
const struct file_operations jffs2_dir_operations =
@@ -157,8 +160,8 @@ static int jffs2_readdir(struct file *file, struct dir_context *ctx)
/***********************************************************************/
-static int jffs2_create(struct inode *dir_i, struct dentry *dentry,
- umode_t mode, bool excl)
+static int jffs2_create(struct user_namespace *mnt_userns, struct inode *dir_i,
+ struct dentry *dentry, umode_t mode, bool excl)
{
struct jffs2_raw_inode *ri;
struct jffs2_inode_info *f, *dir_f;
@@ -276,7 +279,8 @@ static int jffs2_link (struct dentry *old_dentry, struct inode *dir_i, struct de
/***********************************************************************/
-static int jffs2_symlink (struct inode *dir_i, struct dentry *dentry, const char *target)
+static int jffs2_symlink (struct user_namespace *mnt_userns, struct inode *dir_i,
+ struct dentry *dentry, const char *target)
{
struct jffs2_inode_info *f, *dir_f;
struct jffs2_sb_info *c;
@@ -438,7 +442,8 @@ static int jffs2_symlink (struct inode *dir_i, struct dentry *dentry, const char
}
-static int jffs2_mkdir (struct inode *dir_i, struct dentry *dentry, umode_t mode)
+static int jffs2_mkdir (struct user_namespace *mnt_userns, struct inode *dir_i,
+ struct dentry *dentry, umode_t mode)
{
struct jffs2_inode_info *f, *dir_f;
struct jffs2_sb_info *c;
@@ -609,7 +614,8 @@ static int jffs2_rmdir (struct inode *dir_i, struct dentry *dentry)
return ret;
}
-static int jffs2_mknod (struct inode *dir_i, struct dentry *dentry, umode_t mode, dev_t rdev)
+static int jffs2_mknod (struct user_namespace *mnt_userns, struct inode *dir_i,
+ struct dentry *dentry, umode_t mode, dev_t rdev)
{
struct jffs2_inode_info *f, *dir_f;
struct jffs2_sb_info *c;
@@ -756,7 +762,8 @@ static int jffs2_mknod (struct inode *dir_i, struct dentry *dentry, umode_t mode
return ret;
}
-static int jffs2_rename (struct inode *old_dir_i, struct dentry *old_dentry,
+static int jffs2_rename (struct user_namespace *mnt_userns,
+ struct inode *old_dir_i, struct dentry *old_dentry,
struct inode *new_dir_i, struct dentry *new_dentry,
unsigned int flags)
{
diff --git a/fs/jffs2/fs.c b/fs/jffs2/fs.c
index 78858f6e9583..2ac410477c4f 100644
--- a/fs/jffs2/fs.c
+++ b/fs/jffs2/fs.c
@@ -190,18 +190,19 @@ int jffs2_do_setattr (struct inode *inode, struct iattr *iattr)
return 0;
}
-int jffs2_setattr(struct dentry *dentry, struct iattr *iattr)
+int jffs2_setattr(struct user_namespace *mnt_userns, struct dentry *dentry,
+ struct iattr *iattr)
{
struct inode *inode = d_inode(dentry);
int rc;
- rc = setattr_prepare(dentry, iattr);
+ rc = setattr_prepare(&init_user_ns, dentry, iattr);
if (rc)
return rc;
rc = jffs2_do_setattr(inode, iattr);
if (!rc && (iattr->ia_valid & ATTR_MODE))
- rc = posix_acl_chmod(inode, inode->i_mode);
+ rc = posix_acl_chmod(&init_user_ns, inode, inode->i_mode);
return rc;
}
diff --git a/fs/jffs2/os-linux.h b/fs/jffs2/os-linux.h
index ef1cfa61549e..173eccac691d 100644
--- a/fs/jffs2/os-linux.h
+++ b/fs/jffs2/os-linux.h
@@ -164,7 +164,7 @@ long jffs2_ioctl(struct file *, unsigned int, unsigned long);
extern const struct inode_operations jffs2_symlink_inode_operations;
/* fs.c */
-int jffs2_setattr (struct dentry *, struct iattr *);
+int jffs2_setattr (struct user_namespace *, struct dentry *, struct iattr *);
int jffs2_do_setattr (struct inode *, struct iattr *);
struct inode *jffs2_iget(struct super_block *, unsigned long);
void jffs2_evict_inode (struct inode *);
diff --git a/fs/jffs2/security.c b/fs/jffs2/security.c
index c2332e30f218..aef5522551db 100644
--- a/fs/jffs2/security.c
+++ b/fs/jffs2/security.c
@@ -57,6 +57,7 @@ static int jffs2_security_getxattr(const struct xattr_handler *handler,
}
static int jffs2_security_setxattr(const struct xattr_handler *handler,
+ struct user_namespace *mnt_userns,
struct dentry *unused, struct inode *inode,
const char *name, const void *buffer,
size_t size, int flags)
diff --git a/fs/jffs2/xattr_trusted.c b/fs/jffs2/xattr_trusted.c
index 5d6030826c52..cc3f24883e7d 100644
--- a/fs/jffs2/xattr_trusted.c
+++ b/fs/jffs2/xattr_trusted.c
@@ -25,6 +25,7 @@ static int jffs2_trusted_getxattr(const struct xattr_handler *handler,
}
static int jffs2_trusted_setxattr(const struct xattr_handler *handler,
+ struct user_namespace *mnt_userns,
struct dentry *unused, struct inode *inode,
const char *name, const void *buffer,
size_t size, int flags)
diff --git a/fs/jffs2/xattr_user.c b/fs/jffs2/xattr_user.c
index 9d027b4abcf9..fb945977c013 100644
--- a/fs/jffs2/xattr_user.c
+++ b/fs/jffs2/xattr_user.c
@@ -25,6 +25,7 @@ static int jffs2_user_getxattr(const struct xattr_handler *handler,
}
static int jffs2_user_setxattr(const struct xattr_handler *handler,
+ struct user_namespace *mnt_userns,
struct dentry *unused, struct inode *inode,
const char *name, const void *buffer,
size_t size, int flags)
diff --git a/fs/jfs/acl.c b/fs/jfs/acl.c
index 92cc0ac2d1fc..43c285c3d2a7 100644
--- a/fs/jfs/acl.c
+++ b/fs/jfs/acl.c
@@ -91,7 +91,8 @@ out:
return rc;
}
-int jfs_set_acl(struct inode *inode, struct posix_acl *acl, int type)
+int jfs_set_acl(struct user_namespace *mnt_userns, struct inode *inode,
+ struct posix_acl *acl, int type)
{
int rc;
tid_t tid;
@@ -101,7 +102,7 @@ int jfs_set_acl(struct inode *inode, struct posix_acl *acl, int type)
tid = txBegin(inode->i_sb, 0);
mutex_lock(&JFS_IP(inode)->commit_mutex);
if (type == ACL_TYPE_ACCESS && acl) {
- rc = posix_acl_update_mode(inode, &mode, &acl);
+ rc = posix_acl_update_mode(&init_user_ns, inode, &mode, &acl);
if (rc)
goto end_tx;
if (mode != inode->i_mode)
diff --git a/fs/jfs/file.c b/fs/jfs/file.c
index 930d2701f206..28b70e7c7dd4 100644
--- a/fs/jfs/file.c
+++ b/fs/jfs/file.c
@@ -85,12 +85,13 @@ static int jfs_release(struct inode *inode, struct file *file)
return 0;
}
-int jfs_setattr(struct dentry *dentry, struct iattr *iattr)
+int jfs_setattr(struct user_namespace *mnt_userns, struct dentry *dentry,
+ struct iattr *iattr)
{
struct inode *inode = d_inode(dentry);
int rc;
- rc = setattr_prepare(dentry, iattr);
+ rc = setattr_prepare(&init_user_ns, dentry, iattr);
if (rc)
return rc;
@@ -118,11 +119,11 @@ int jfs_setattr(struct dentry *dentry, struct iattr *iattr)
jfs_truncate(inode);
}
- setattr_copy(inode, iattr);
+ setattr_copy(&init_user_ns, inode, iattr);
mark_inode_dirty(inode);
if (iattr->ia_valid & ATTR_MODE)
- rc = posix_acl_chmod(inode, inode->i_mode);
+ rc = posix_acl_chmod(&init_user_ns, inode, inode->i_mode);
return rc;
}
diff --git a/fs/jfs/ioctl.c b/fs/jfs/ioctl.c
index 10ee0ecca1a8..2581d4db58ff 100644
--- a/fs/jfs/ioctl.c
+++ b/fs/jfs/ioctl.c
@@ -76,7 +76,7 @@ long jfs_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
if (err)
return err;
- if (!inode_owner_or_capable(inode)) {
+ if (!inode_owner_or_capable(&init_user_ns, inode)) {
err = -EACCES;
goto setflags_out;
}
diff --git a/fs/jfs/jfs_acl.h b/fs/jfs/jfs_acl.h
index 9f8f92dd6f84..7ae389a7a366 100644
--- a/fs/jfs/jfs_acl.h
+++ b/fs/jfs/jfs_acl.h
@@ -8,7 +8,8 @@
#ifdef CONFIG_JFS_POSIX_ACL
struct posix_acl *jfs_get_acl(struct inode *inode, int type);
-int jfs_set_acl(struct inode *inode, struct posix_acl *acl, int type);
+int jfs_set_acl(struct user_namespace *mnt_userns, struct inode *inode,
+ struct posix_acl *acl, int type);
int jfs_init_acl(tid_t, struct inode *, struct inode *);
#else
diff --git a/fs/jfs/jfs_inode.c b/fs/jfs/jfs_inode.c
index 4cef170630db..59379089e939 100644
--- a/fs/jfs/jfs_inode.c
+++ b/fs/jfs/jfs_inode.c
@@ -64,7 +64,7 @@ struct inode *ialloc(struct inode *parent, umode_t mode)
goto fail_put;
}
- inode_init_owner(inode, parent, mode);
+ inode_init_owner(&init_user_ns, inode, parent, mode);
/*
* New inodes need to save sane values on disk when
* uid & gid mount options are used
diff --git a/fs/jfs/jfs_inode.h b/fs/jfs/jfs_inode.h
index 70a0d12e427e..01daa0cb0ae5 100644
--- a/fs/jfs/jfs_inode.h
+++ b/fs/jfs/jfs_inode.h
@@ -26,7 +26,7 @@ extern struct dentry *jfs_fh_to_parent(struct super_block *sb, struct fid *fid,
int fh_len, int fh_type);
extern void jfs_set_inode_flags(struct inode *);
extern int jfs_get_block(struct inode *, sector_t, struct buffer_head *, int);
-extern int jfs_setattr(struct dentry *, struct iattr *);
+extern int jfs_setattr(struct user_namespace *, struct dentry *, struct iattr *);
extern const struct address_space_operations jfs_aops;
extern const struct inode_operations jfs_dir_inode_operations;
diff --git a/fs/jfs/namei.c b/fs/jfs/namei.c
index 7a55d14cc1af..9abed0d750e5 100644
--- a/fs/jfs/namei.c
+++ b/fs/jfs/namei.c
@@ -59,8 +59,8 @@ static inline void free_ea_wmap(struct inode *inode)
* RETURN: Errors from subroutines
*
*/
-static int jfs_create(struct inode *dip, struct dentry *dentry, umode_t mode,
- bool excl)
+static int jfs_create(struct user_namespace *mnt_userns, struct inode *dip,
+ struct dentry *dentry, umode_t mode, bool excl)
{
int rc = 0;
tid_t tid; /* transaction id */
@@ -192,7 +192,8 @@ static int jfs_create(struct inode *dip, struct dentry *dentry, umode_t mode,
* note:
* EACCES: user needs search+write permission on the parent directory
*/
-static int jfs_mkdir(struct inode *dip, struct dentry *dentry, umode_t mode)
+static int jfs_mkdir(struct user_namespace *mnt_userns, struct inode *dip,
+ struct dentry *dentry, umode_t mode)
{
int rc = 0;
tid_t tid; /* transaction id */
@@ -868,8 +869,8 @@ static int jfs_link(struct dentry *old_dentry,
* an intermediate result whose length exceeds PATH_MAX [XPG4.2]
*/
-static int jfs_symlink(struct inode *dip, struct dentry *dentry,
- const char *name)
+static int jfs_symlink(struct user_namespace *mnt_userns, struct inode *dip,
+ struct dentry *dentry, const char *name)
{
int rc;
tid_t tid;
@@ -1058,9 +1059,9 @@ static int jfs_symlink(struct inode *dip, struct dentry *dentry,
*
* FUNCTION: rename a file or directory
*/
-static int jfs_rename(struct inode *old_dir, struct dentry *old_dentry,
- struct inode *new_dir, struct dentry *new_dentry,
- unsigned int flags)
+static int jfs_rename(struct user_namespace *mnt_userns, struct inode *old_dir,
+ struct dentry *old_dentry, struct inode *new_dir,
+ struct dentry *new_dentry, unsigned int flags)
{
struct btstack btstack;
ino_t ino;
@@ -1344,8 +1345,8 @@ static int jfs_rename(struct inode *old_dir, struct dentry *old_dentry,
*
* FUNCTION: Create a special file (device)
*/
-static int jfs_mknod(struct inode *dir, struct dentry *dentry,
- umode_t mode, dev_t rdev)
+static int jfs_mknod(struct user_namespace *mnt_userns, struct inode *dir,
+ struct dentry *dentry, umode_t mode, dev_t rdev)
{
struct jfs_inode_info *jfs_ip;
struct btstack btstack;
diff --git a/fs/jfs/super.c b/fs/jfs/super.c
index b2dc4d1f9dcc..1f0ffabbde56 100644
--- a/fs/jfs/super.c
+++ b/fs/jfs/super.c
@@ -551,7 +551,6 @@ static int jfs_fill_super(struct super_block *sb, void *data, int silent)
ret = -ENOMEM;
goto out_unload;
}
- inode->i_ino = 0;
inode->i_size = i_size_read(sb->s_bdev->bd_inode);
inode->i_mapping->a_ops = &jfs_metapage_aops;
inode_fake_hash(inode);
diff --git a/fs/jfs/xattr.c b/fs/jfs/xattr.c
index db41e7803163..f9273f6901c8 100644
--- a/fs/jfs/xattr.c
+++ b/fs/jfs/xattr.c
@@ -932,6 +932,7 @@ static int jfs_xattr_get(const struct xattr_handler *handler,
}
static int jfs_xattr_set(const struct xattr_handler *handler,
+ struct user_namespace *mnt_userns,
struct dentry *unused, struct inode *inode,
const char *name, const void *value,
size_t size, int flags)
@@ -950,6 +951,7 @@ static int jfs_xattr_get_os2(const struct xattr_handler *handler,
}
static int jfs_xattr_set_os2(const struct xattr_handler *handler,
+ struct user_namespace *mnt_userns,
struct dentry *unused, struct inode *inode,
const char *name, const void *value,
size_t size, int flags)
diff --git a/fs/kernfs/dir.c b/fs/kernfs/dir.c
index 7a53eed69fef..7e0e62deab53 100644
--- a/fs/kernfs/dir.c
+++ b/fs/kernfs/dir.c
@@ -1110,7 +1110,8 @@ static struct dentry *kernfs_iop_lookup(struct inode *dir,
return ret;
}
-static int kernfs_iop_mkdir(struct inode *dir, struct dentry *dentry,
+static int kernfs_iop_mkdir(struct user_namespace *mnt_userns,
+ struct inode *dir, struct dentry *dentry,
umode_t mode)
{
struct kernfs_node *parent = dir->i_private;
@@ -1147,7 +1148,8 @@ static int kernfs_iop_rmdir(struct inode *dir, struct dentry *dentry)
return ret;
}
-static int kernfs_iop_rename(struct inode *old_dir, struct dentry *old_dentry,
+static int kernfs_iop_rename(struct user_namespace *mnt_userns,
+ struct inode *old_dir, struct dentry *old_dentry,
struct inode *new_dir, struct dentry *new_dentry,
unsigned int flags)
{
diff --git a/fs/kernfs/inode.c b/fs/kernfs/inode.c
index fc2469a20fed..d73950fc3d57 100644
--- a/fs/kernfs/inode.c
+++ b/fs/kernfs/inode.c
@@ -112,7 +112,8 @@ int kernfs_setattr(struct kernfs_node *kn, const struct iattr *iattr)
return ret;
}
-int kernfs_iop_setattr(struct dentry *dentry, struct iattr *iattr)
+int kernfs_iop_setattr(struct user_namespace *mnt_userns, struct dentry *dentry,
+ struct iattr *iattr)
{
struct inode *inode = d_inode(dentry);
struct kernfs_node *kn = inode->i_private;
@@ -122,7 +123,7 @@ int kernfs_iop_setattr(struct dentry *dentry, struct iattr *iattr)
return -EINVAL;
mutex_lock(&kernfs_mutex);
- error = setattr_prepare(dentry, iattr);
+ error = setattr_prepare(&init_user_ns, dentry, iattr);
if (error)
goto out;
@@ -131,7 +132,7 @@ int kernfs_iop_setattr(struct dentry *dentry, struct iattr *iattr)
goto out;
/* this ignores size changes */
- setattr_copy(inode, iattr);
+ setattr_copy(&init_user_ns, inode, iattr);
out:
mutex_unlock(&kernfs_mutex);
@@ -183,7 +184,8 @@ static void kernfs_refresh_inode(struct kernfs_node *kn, struct inode *inode)
set_nlink(inode, kn->dir.subdirs + 2);
}
-int kernfs_iop_getattr(const struct path *path, struct kstat *stat,
+int kernfs_iop_getattr(struct user_namespace *mnt_userns,
+ const struct path *path, struct kstat *stat,
u32 request_mask, unsigned int query_flags)
{
struct inode *inode = d_inode(path->dentry);
@@ -193,7 +195,7 @@ int kernfs_iop_getattr(const struct path *path, struct kstat *stat,
kernfs_refresh_inode(kn, inode);
mutex_unlock(&kernfs_mutex);
- generic_fillattr(inode, stat);
+ generic_fillattr(&init_user_ns, inode, stat);
return 0;
}
@@ -272,7 +274,8 @@ void kernfs_evict_inode(struct inode *inode)
kernfs_put(kn);
}
-int kernfs_iop_permission(struct inode *inode, int mask)
+int kernfs_iop_permission(struct user_namespace *mnt_userns,
+ struct inode *inode, int mask)
{
struct kernfs_node *kn;
@@ -285,7 +288,7 @@ int kernfs_iop_permission(struct inode *inode, int mask)
kernfs_refresh_inode(kn, inode);
mutex_unlock(&kernfs_mutex);
- return generic_permission(inode, mask);
+ return generic_permission(&init_user_ns, inode, mask);
}
int kernfs_xattr_get(struct kernfs_node *kn, const char *name,
@@ -319,6 +322,7 @@ static int kernfs_vfs_xattr_get(const struct xattr_handler *handler,
}
static int kernfs_vfs_xattr_set(const struct xattr_handler *handler,
+ struct user_namespace *mnt_userns,
struct dentry *unused, struct inode *inode,
const char *suffix, const void *value,
size_t size, int flags)
@@ -385,6 +389,7 @@ static int kernfs_vfs_user_xattr_rm(struct kernfs_node *kn,
}
static int kernfs_vfs_user_xattr_set(const struct xattr_handler *handler,
+ struct user_namespace *mnt_userns,
struct dentry *unused, struct inode *inode,
const char *suffix, const void *value,
size_t size, int flags)
diff --git a/fs/kernfs/kernfs-internal.h b/fs/kernfs/kernfs-internal.h
index 7ee97ef59184..ccc3b44f6306 100644
--- a/fs/kernfs/kernfs-internal.h
+++ b/fs/kernfs/kernfs-internal.h
@@ -89,9 +89,12 @@ extern struct kmem_cache *kernfs_node_cache, *kernfs_iattrs_cache;
*/
extern const struct xattr_handler *kernfs_xattr_handlers[];
void kernfs_evict_inode(struct inode *inode);
-int kernfs_iop_permission(struct inode *inode, int mask);
-int kernfs_iop_setattr(struct dentry *dentry, struct iattr *iattr);
-int kernfs_iop_getattr(const struct path *path, struct kstat *stat,
+int kernfs_iop_permission(struct user_namespace *mnt_userns,
+ struct inode *inode, int mask);
+int kernfs_iop_setattr(struct user_namespace *mnt_userns, struct dentry *dentry,
+ struct iattr *iattr);
+int kernfs_iop_getattr(struct user_namespace *mnt_userns,
+ const struct path *path, struct kstat *stat,
u32 request_mask, unsigned int query_flags);
ssize_t kernfs_iop_listxattr(struct dentry *dentry, char *buf, size_t size);
int __kernfs_setattr(struct kernfs_node *kn, const struct iattr *iattr);
diff --git a/fs/libfs.c b/fs/libfs.c
index abf7674fb437..e2de5401abca 100644
--- a/fs/libfs.c
+++ b/fs/libfs.c
@@ -27,11 +27,12 @@
#include "internal.h"
-int simple_getattr(const struct path *path, struct kstat *stat,
- u32 request_mask, unsigned int query_flags)
+int simple_getattr(struct user_namespace *mnt_userns, const struct path *path,
+ struct kstat *stat, u32 request_mask,
+ unsigned int query_flags)
{
struct inode *inode = d_inode(path->dentry);
- generic_fillattr(inode, stat);
+ generic_fillattr(&init_user_ns, inode, stat);
stat->blocks = inode->i_mapping->nrpages << (PAGE_SHIFT - 9);
return 0;
}
@@ -447,9 +448,9 @@ int simple_rmdir(struct inode *dir, struct dentry *dentry)
}
EXPORT_SYMBOL(simple_rmdir);
-int simple_rename(struct inode *old_dir, struct dentry *old_dentry,
- struct inode *new_dir, struct dentry *new_dentry,
- unsigned int flags)
+int simple_rename(struct user_namespace *mnt_userns, struct inode *old_dir,
+ struct dentry *old_dentry, struct inode *new_dir,
+ struct dentry *new_dentry, unsigned int flags)
{
struct inode *inode = d_inode(old_dentry);
int they_are_dirs = d_is_dir(old_dentry);
@@ -492,18 +493,19 @@ EXPORT_SYMBOL(simple_rename);
* on simple regular filesystems. Anything that needs to change on-disk
* or wire state on size changes needs its own setattr method.
*/
-int simple_setattr(struct dentry *dentry, struct iattr *iattr)
+int simple_setattr(struct user_namespace *mnt_userns, struct dentry *dentry,
+ struct iattr *iattr)
{
struct inode *inode = d_inode(dentry);
int error;
- error = setattr_prepare(dentry, iattr);
+ error = setattr_prepare(mnt_userns, dentry, iattr);
if (error)
return error;
if (iattr->ia_valid & ATTR_SIZE)
truncate_setsize(inode, iattr->ia_size);
- setattr_copy(inode, iattr);
+ setattr_copy(mnt_userns, inode, iattr);
mark_inode_dirty(inode);
return 0;
}
@@ -1214,11 +1216,6 @@ static int anon_set_page_dirty(struct page *page)
return 0;
};
-/*
- * A single inode exists for all anon_inode files. Contrary to pipes,
- * anon_inode inodes have no associated per-instance data, so we need
- * only allocate one of them.
- */
struct inode *alloc_anon_inode(struct super_block *s)
{
static const struct address_space_operations anon_aops = {
@@ -1300,15 +1297,17 @@ static struct dentry *empty_dir_lookup(struct inode *dir, struct dentry *dentry,
return ERR_PTR(-ENOENT);
}
-static int empty_dir_getattr(const struct path *path, struct kstat *stat,
+static int empty_dir_getattr(struct user_namespace *mnt_userns,
+ const struct path *path, struct kstat *stat,
u32 request_mask, unsigned int query_flags)
{
struct inode *inode = d_inode(path->dentry);
- generic_fillattr(inode, stat);
+ generic_fillattr(&init_user_ns, inode, stat);
return 0;
}
-static int empty_dir_setattr(struct dentry *dentry, struct iattr *attr)
+static int empty_dir_setattr(struct user_namespace *mnt_userns,
+ struct dentry *dentry, struct iattr *attr)
{
return -EPERM;
}
diff --git a/fs/minix/bitmap.c b/fs/minix/bitmap.c
index f4e5e5181a14..9115948c624e 100644
--- a/fs/minix/bitmap.c
+++ b/fs/minix/bitmap.c
@@ -252,7 +252,7 @@ struct inode *minix_new_inode(const struct inode *dir, umode_t mode, int *error)
iput(inode);
return NULL;
}
- inode_init_owner(inode, dir, mode);
+ inode_init_owner(&init_user_ns, inode, dir, mode);
inode->i_ino = j;
inode->i_mtime = inode->i_atime = inode->i_ctime = current_time(inode);
inode->i_blocks = 0;
diff --git a/fs/minix/file.c b/fs/minix/file.c
index c50b0a20fcd9..6a7bd2d9eec0 100644
--- a/fs/minix/file.c
+++ b/fs/minix/file.c
@@ -22,12 +22,13 @@ const struct file_operations minix_file_operations = {
.splice_read = generic_file_splice_read,
};
-static int minix_setattr(struct dentry *dentry, struct iattr *attr)
+static int minix_setattr(struct user_namespace *mnt_userns,
+ struct dentry *dentry, struct iattr *attr)
{
struct inode *inode = d_inode(dentry);
int error;
- error = setattr_prepare(dentry, attr);
+ error = setattr_prepare(&init_user_ns, dentry, attr);
if (error)
return error;
@@ -41,7 +42,7 @@ static int minix_setattr(struct dentry *dentry, struct iattr *attr)
minix_truncate(inode);
}
- setattr_copy(inode, attr);
+ setattr_copy(&init_user_ns, inode, attr);
mark_inode_dirty(inode);
return 0;
}
diff --git a/fs/minix/inode.c b/fs/minix/inode.c
index 34f546404aa1..a532a99bbe81 100644
--- a/fs/minix/inode.c
+++ b/fs/minix/inode.c
@@ -652,13 +652,13 @@ static int minix_write_inode(struct inode *inode, struct writeback_control *wbc)
return err;
}
-int minix_getattr(const struct path *path, struct kstat *stat,
- u32 request_mask, unsigned int flags)
+int minix_getattr(struct user_namespace *mnt_userns, const struct path *path,
+ struct kstat *stat, u32 request_mask, unsigned int flags)
{
struct super_block *sb = path->dentry->d_sb;
struct inode *inode = d_inode(path->dentry);
- generic_fillattr(inode, stat);
+ generic_fillattr(&init_user_ns, inode, stat);
if (INODE_VERSION(inode) == MINIX_V1)
stat->blocks = (BLOCK_SIZE / 512) * V1_minix_blocks(stat->size, sb);
else
diff --git a/fs/minix/minix.h b/fs/minix/minix.h
index 168d45d3de73..202173368025 100644
--- a/fs/minix/minix.h
+++ b/fs/minix/minix.h
@@ -51,7 +51,8 @@ extern unsigned long minix_count_free_inodes(struct super_block *sb);
extern int minix_new_block(struct inode * inode);
extern void minix_free_block(struct inode *inode, unsigned long block);
extern unsigned long minix_count_free_blocks(struct super_block *sb);
-extern int minix_getattr(const struct path *, struct kstat *, u32, unsigned int);
+extern int minix_getattr(struct user_namespace *, const struct path *,
+ struct kstat *, u32, unsigned int);
extern int minix_prepare_chunk(struct page *page, loff_t pos, unsigned len);
extern void V1_minix_truncate(struct inode *);
diff --git a/fs/minix/namei.c b/fs/minix/namei.c
index 1a6084d2b02e..937fa5fae2b8 100644
--- a/fs/minix/namei.c
+++ b/fs/minix/namei.c
@@ -33,7 +33,8 @@ static struct dentry *minix_lookup(struct inode * dir, struct dentry *dentry, un
return d_splice_alias(inode, dentry);
}
-static int minix_mknod(struct inode * dir, struct dentry *dentry, umode_t mode, dev_t rdev)
+static int minix_mknod(struct user_namespace *mnt_userns, struct inode *dir,
+ struct dentry *dentry, umode_t mode, dev_t rdev)
{
int error;
struct inode *inode;
@@ -51,7 +52,8 @@ static int minix_mknod(struct inode * dir, struct dentry *dentry, umode_t mode,
return error;
}
-static int minix_tmpfile(struct inode *dir, struct dentry *dentry, umode_t mode)
+static int minix_tmpfile(struct user_namespace *mnt_userns, struct inode *dir,
+ struct dentry *dentry, umode_t mode)
{
int error;
struct inode *inode = minix_new_inode(dir, mode, &error);
@@ -63,14 +65,14 @@ static int minix_tmpfile(struct inode *dir, struct dentry *dentry, umode_t mode)
return error;
}
-static int minix_create(struct inode *dir, struct dentry *dentry, umode_t mode,
- bool excl)
+static int minix_create(struct user_namespace *mnt_userns, struct inode *dir,
+ struct dentry *dentry, umode_t mode, bool excl)
{
- return minix_mknod(dir, dentry, mode, 0);
+ return minix_mknod(mnt_userns, dir, dentry, mode, 0);
}
-static int minix_symlink(struct inode * dir, struct dentry *dentry,
- const char * symname)
+static int minix_symlink(struct user_namespace *mnt_userns, struct inode *dir,
+ struct dentry *dentry, const char *symname)
{
int err = -ENAMETOOLONG;
int i = strlen(symname)+1;
@@ -109,7 +111,8 @@ static int minix_link(struct dentry * old_dentry, struct inode * dir,
return add_nondir(dentry, inode);
}
-static int minix_mkdir(struct inode * dir, struct dentry *dentry, umode_t mode)
+static int minix_mkdir(struct user_namespace *mnt_userns, struct inode *dir,
+ struct dentry *dentry, umode_t mode)
{
struct inode * inode;
int err;
@@ -181,8 +184,9 @@ static int minix_rmdir(struct inode * dir, struct dentry *dentry)
return err;
}
-static int minix_rename(struct inode * old_dir, struct dentry *old_dentry,
- struct inode * new_dir, struct dentry *new_dentry,
+static int minix_rename(struct user_namespace *mnt_userns,
+ struct inode *old_dir, struct dentry *old_dentry,
+ struct inode *new_dir, struct dentry *new_dentry,
unsigned int flags)
{
struct inode * old_inode = d_inode(old_dentry);
diff --git a/fs/mount.h b/fs/mount.h
index ce6c376e0bc2..0b6e08cf8afb 100644
--- a/fs/mount.h
+++ b/fs/mount.h
@@ -124,16 +124,6 @@ static inline void get_mnt_ns(struct mnt_namespace *ns)
extern seqlock_t mount_lock;
-static inline void lock_mount_hash(void)
-{
- write_seqlock(&mount_lock);
-}
-
-static inline void unlock_mount_hash(void)
-{
- write_sequnlock(&mount_lock);
-}
-
struct proc_mounts {
struct mnt_namespace *ns;
struct path root;
diff --git a/fs/namei.c b/fs/namei.c
index de74ad2bc6e2..216f16e74351 100644
--- a/fs/namei.c
+++ b/fs/namei.c
@@ -259,7 +259,24 @@ void putname(struct filename *name)
__putname(name);
}
-static int check_acl(struct inode *inode, int mask)
+/**
+ * check_acl - perform ACL permission checking
+ * @mnt_userns: user namespace of the mount the inode was found from
+ * @inode: inode to check permissions on
+ * @mask: right to check for (%MAY_READ, %MAY_WRITE, %MAY_EXEC ...)
+ *
+ * This function performs the ACL permission checking. Since this function
+ * retrieve POSIX acls it needs to know whether it is called from a blocking or
+ * non-blocking context and thus cares about the MAY_NOT_BLOCK bit.
+ *
+ * If the inode has been found through an idmapped mount the user namespace of
+ * the vfsmount must be passed through @mnt_userns. This function will then take
+ * care to map the inode according to @mnt_userns before checking permissions.
+ * On non-idmapped mounts or if permission checking is to be performed on the
+ * raw inode simply passs init_user_ns.
+ */
+static int check_acl(struct user_namespace *mnt_userns,
+ struct inode *inode, int mask)
{
#ifdef CONFIG_FS_POSIX_ACL
struct posix_acl *acl;
@@ -271,14 +288,14 @@ static int check_acl(struct inode *inode, int mask)
/* no ->get_acl() calls in RCU mode... */
if (is_uncached_acl(acl))
return -ECHILD;
- return posix_acl_permission(inode, acl, mask);
+ return posix_acl_permission(mnt_userns, inode, acl, mask);
}
acl = get_acl(inode, ACL_TYPE_ACCESS);
if (IS_ERR(acl))
return PTR_ERR(acl);
if (acl) {
- int error = posix_acl_permission(inode, acl, mask);
+ int error = posix_acl_permission(mnt_userns, inode, acl, mask);
posix_acl_release(acl);
return error;
}
@@ -287,18 +304,31 @@ static int check_acl(struct inode *inode, int mask)
return -EAGAIN;
}
-/*
- * This does the basic UNIX permission checking.
+/**
+ * acl_permission_check - perform basic UNIX permission checking
+ * @mnt_userns: user namespace of the mount the inode was found from
+ * @inode: inode to check permissions on
+ * @mask: right to check for (%MAY_READ, %MAY_WRITE, %MAY_EXEC ...)
+ *
+ * This function performs the basic UNIX permission checking. Since this
+ * function may retrieve POSIX acls it needs to know whether it is called from a
+ * blocking or non-blocking context and thus cares about the MAY_NOT_BLOCK bit.
*
- * Note that the POSIX ACL check cares about the MAY_NOT_BLOCK bit,
- * for RCU walking.
+ * If the inode has been found through an idmapped mount the user namespace of
+ * the vfsmount must be passed through @mnt_userns. This function will then take
+ * care to map the inode according to @mnt_userns before checking permissions.
+ * On non-idmapped mounts or if permission checking is to be performed on the
+ * raw inode simply passs init_user_ns.
*/
-static int acl_permission_check(struct inode *inode, int mask)
+static int acl_permission_check(struct user_namespace *mnt_userns,
+ struct inode *inode, int mask)
{
unsigned int mode = inode->i_mode;
+ kuid_t i_uid;
/* Are we the owner? If so, ACL's don't matter */
- if (likely(uid_eq(current_fsuid(), inode->i_uid))) {
+ i_uid = i_uid_into_mnt(mnt_userns, inode);
+ if (likely(uid_eq(current_fsuid(), i_uid))) {
mask &= 7;
mode >>= 6;
return (mask & ~mode) ? -EACCES : 0;
@@ -306,7 +336,7 @@ static int acl_permission_check(struct inode *inode, int mask)
/* Do we have ACL's? */
if (IS_POSIXACL(inode) && (mode & S_IRWXG)) {
- int error = check_acl(inode, mask);
+ int error = check_acl(mnt_userns, inode, mask);
if (error != -EAGAIN)
return error;
}
@@ -320,7 +350,8 @@ static int acl_permission_check(struct inode *inode, int mask)
* about? Need to check group ownership if so.
*/
if (mask & (mode ^ (mode >> 3))) {
- if (in_group_p(inode->i_gid))
+ kgid_t kgid = i_gid_into_mnt(mnt_userns, inode);
+ if (in_group_p(kgid))
mode >>= 3;
}
@@ -330,6 +361,7 @@ static int acl_permission_check(struct inode *inode, int mask)
/**
* generic_permission - check for access rights on a Posix-like filesystem
+ * @mnt_userns: user namespace of the mount the inode was found from
* @inode: inode to check access rights for
* @mask: right to check for (%MAY_READ, %MAY_WRITE, %MAY_EXEC,
* %MAY_NOT_BLOCK ...)
@@ -342,25 +374,33 @@ static int acl_permission_check(struct inode *inode, int mask)
* generic_permission is rcu-walk aware. It returns -ECHILD in case an rcu-walk
* request cannot be satisfied (eg. requires blocking or too much complexity).
* It would then be called again in ref-walk mode.
+ *
+ * If the inode has been found through an idmapped mount the user namespace of
+ * the vfsmount must be passed through @mnt_userns. This function will then take
+ * care to map the inode according to @mnt_userns before checking permissions.
+ * On non-idmapped mounts or if permission checking is to be performed on the
+ * raw inode simply passs init_user_ns.
*/
-int generic_permission(struct inode *inode, int mask)
+int generic_permission(struct user_namespace *mnt_userns, struct inode *inode,
+ int mask)
{
int ret;
/*
* Do the basic permission checks.
*/
- ret = acl_permission_check(inode, mask);
+ ret = acl_permission_check(mnt_userns, inode, mask);
if (ret != -EACCES)
return ret;
if (S_ISDIR(inode->i_mode)) {
/* DACs are overridable for directories */
if (!(mask & MAY_WRITE))
- if (capable_wrt_inode_uidgid(inode,
+ if (capable_wrt_inode_uidgid(mnt_userns, inode,
CAP_DAC_READ_SEARCH))
return 0;
- if (capable_wrt_inode_uidgid(inode, CAP_DAC_OVERRIDE))
+ if (capable_wrt_inode_uidgid(mnt_userns, inode,
+ CAP_DAC_OVERRIDE))
return 0;
return -EACCES;
}
@@ -370,7 +410,8 @@ int generic_permission(struct inode *inode, int mask)
*/
mask &= MAY_READ | MAY_WRITE | MAY_EXEC;
if (mask == MAY_READ)
- if (capable_wrt_inode_uidgid(inode, CAP_DAC_READ_SEARCH))
+ if (capable_wrt_inode_uidgid(mnt_userns, inode,
+ CAP_DAC_READ_SEARCH))
return 0;
/*
* Read/write DACs are always overridable.
@@ -378,31 +419,38 @@ int generic_permission(struct inode *inode, int mask)
* at least one exec bit set.
*/
if (!(mask & MAY_EXEC) || (inode->i_mode & S_IXUGO))
- if (capable_wrt_inode_uidgid(inode, CAP_DAC_OVERRIDE))
+ if (capable_wrt_inode_uidgid(mnt_userns, inode,
+ CAP_DAC_OVERRIDE))
return 0;
return -EACCES;
}
EXPORT_SYMBOL(generic_permission);
-/*
+/**
+ * do_inode_permission - UNIX permission checking
+ * @mnt_userns: user namespace of the mount the inode was found from
+ * @inode: inode to check permissions on
+ * @mask: right to check for (%MAY_READ, %MAY_WRITE, %MAY_EXEC ...)
+ *
* We _really_ want to just do "generic_permission()" without
* even looking at the inode->i_op values. So we keep a cache
* flag in inode->i_opflags, that says "this has not special
* permission function, use the fast case".
*/
-static inline int do_inode_permission(struct inode *inode, int mask)
+static inline int do_inode_permission(struct user_namespace *mnt_userns,
+ struct inode *inode, int mask)
{
if (unlikely(!(inode->i_opflags & IOP_FASTPERM))) {
if (likely(inode->i_op->permission))
- return inode->i_op->permission(inode, mask);
+ return inode->i_op->permission(mnt_userns, inode, mask);
/* This gets set once for the inode lifetime */
spin_lock(&inode->i_lock);
inode->i_opflags |= IOP_FASTPERM;
spin_unlock(&inode->i_lock);
}
- return generic_permission(inode, mask);
+ return generic_permission(mnt_userns, inode, mask);
}
/**
@@ -427,8 +475,9 @@ static int sb_permission(struct super_block *sb, struct inode *inode, int mask)
/**
* inode_permission - Check for access rights to a given inode
- * @inode: Inode to check permission on
- * @mask: Right to check for (%MAY_READ, %MAY_WRITE, %MAY_EXEC)
+ * @mnt_userns: User namespace of the mount the inode was found from
+ * @inode: Inode to check permission on
+ * @mask: Right to check for (%MAY_READ, %MAY_WRITE, %MAY_EXEC)
*
* Check for read/write/execute permissions on an inode. We use fs[ug]id for
* this, letting us set arbitrary permissions for filesystem access without
@@ -436,7 +485,8 @@ static int sb_permission(struct super_block *sb, struct inode *inode, int mask)
*
* When checking for MAY_APPEND, MAY_WRITE must also be set in @mask.
*/
-int inode_permission(struct inode *inode, int mask)
+int inode_permission(struct user_namespace *mnt_userns,
+ struct inode *inode, int mask)
{
int retval;
@@ -456,11 +506,11 @@ int inode_permission(struct inode *inode, int mask)
* written back improperly if their true value is unknown
* to the vfs.
*/
- if (HAS_UNMAPPED_ID(inode))
+ if (HAS_UNMAPPED_ID(mnt_userns, inode))
return -EACCES;
}
- retval = do_inode_permission(inode, mask);
+ retval = do_inode_permission(mnt_userns, inode, mask);
if (retval)
return retval;
@@ -960,11 +1010,16 @@ int sysctl_protected_regular __read_mostly;
*/
static inline int may_follow_link(struct nameidata *nd, const struct inode *inode)
{
+ struct user_namespace *mnt_userns;
+ kuid_t i_uid;
+
if (!sysctl_protected_symlinks)
return 0;
+ mnt_userns = mnt_user_ns(nd->path.mnt);
+ i_uid = i_uid_into_mnt(mnt_userns, inode);
/* Allowed if owner and follower match. */
- if (uid_eq(current_cred()->fsuid, inode->i_uid))
+ if (uid_eq(current_cred()->fsuid, i_uid))
return 0;
/* Allowed if parent directory not sticky and world-writable. */
@@ -972,7 +1027,7 @@ static inline int may_follow_link(struct nameidata *nd, const struct inode *inod
return 0;
/* Allowed if parent directory and link owner match. */
- if (uid_valid(nd->dir_uid) && uid_eq(nd->dir_uid, inode->i_uid))
+ if (uid_valid(nd->dir_uid) && uid_eq(nd->dir_uid, i_uid))
return 0;
if (nd->flags & LOOKUP_RCU)
@@ -985,6 +1040,7 @@ static inline int may_follow_link(struct nameidata *nd, const struct inode *inod
/**
* safe_hardlink_source - Check for safe hardlink conditions
+ * @mnt_userns: user namespace of the mount the inode was found from
* @inode: the source inode to hardlink from
*
* Return false if at least one of the following conditions:
@@ -995,7 +1051,8 @@ static inline int may_follow_link(struct nameidata *nd, const struct inode *inod
*
* Otherwise returns true.
*/
-static bool safe_hardlink_source(struct inode *inode)
+static bool safe_hardlink_source(struct user_namespace *mnt_userns,
+ struct inode *inode)
{
umode_t mode = inode->i_mode;
@@ -1012,7 +1069,7 @@ static bool safe_hardlink_source(struct inode *inode)
return false;
/* Hardlinking to unreadable or unwritable sources is dangerous. */
- if (inode_permission(inode, MAY_READ | MAY_WRITE))
+ if (inode_permission(mnt_userns, inode, MAY_READ | MAY_WRITE))
return false;
return true;
@@ -1020,6 +1077,7 @@ static bool safe_hardlink_source(struct inode *inode)
/**
* may_linkat - Check permissions for creating a hardlink
+ * @mnt_userns: user namespace of the mount the inode was found from
* @link: the source to hardlink from
*
* Block hardlink when all of:
@@ -1028,14 +1086,21 @@ static bool safe_hardlink_source(struct inode *inode)
* - hardlink source is unsafe (see safe_hardlink_source() above)
* - not CAP_FOWNER in a namespace with the inode owner uid mapped
*
+ * If the inode has been found through an idmapped mount the user namespace of
+ * the vfsmount must be passed through @mnt_userns. This function will then take
+ * care to map the inode according to @mnt_userns before checking permissions.
+ * On non-idmapped mounts or if permission checking is to be performed on the
+ * raw inode simply passs init_user_ns.
+ *
* Returns 0 if successful, -ve on error.
*/
-int may_linkat(struct path *link)
+int may_linkat(struct user_namespace *mnt_userns, struct path *link)
{
struct inode *inode = link->dentry->d_inode;
/* Inode writeback is not safe when the uid or gid are invalid. */
- if (!uid_valid(inode->i_uid) || !gid_valid(inode->i_gid))
+ if (!uid_valid(i_uid_into_mnt(mnt_userns, inode)) ||
+ !gid_valid(i_gid_into_mnt(mnt_userns, inode)))
return -EOVERFLOW;
if (!sysctl_protected_hardlinks)
@@ -1044,7 +1109,8 @@ int may_linkat(struct path *link)
/* Source inode owner (or CAP_FOWNER) can hardlink all they like,
* otherwise, it must be a safe source.
*/
- if (safe_hardlink_source(inode) || inode_owner_or_capable(inode))
+ if (safe_hardlink_source(mnt_userns, inode) ||
+ inode_owner_or_capable(mnt_userns, inode))
return 0;
audit_log_path_denied(AUDIT_ANOM_LINK, "linkat");
@@ -1055,6 +1121,7 @@ int may_linkat(struct path *link)
* may_create_in_sticky - Check whether an O_CREAT open in a sticky directory
* should be allowed, or not, on files that already
* exist.
+ * @mnt_userns: user namespace of the mount the inode was found from
* @dir_mode: mode bits of directory
* @dir_uid: owner of directory
* @inode: the inode of the file to open
@@ -1070,16 +1137,25 @@ int may_linkat(struct path *link)
* the directory doesn't have to be world writable: being group writable will
* be enough.
*
+ * If the inode has been found through an idmapped mount the user namespace of
+ * the vfsmount must be passed through @mnt_userns. This function will then take
+ * care to map the inode according to @mnt_userns before checking permissions.
+ * On non-idmapped mounts or if permission checking is to be performed on the
+ * raw inode simply passs init_user_ns.
+ *
* Returns 0 if the open is allowed, -ve on error.
*/
-static int may_create_in_sticky(umode_t dir_mode, kuid_t dir_uid,
- struct inode * const inode)
+static int may_create_in_sticky(struct user_namespace *mnt_userns,
+ struct nameidata *nd, struct inode *const inode)
{
+ umode_t dir_mode = nd->dir_mode;
+ kuid_t dir_uid = nd->dir_uid;
+
if ((!sysctl_protected_fifos && S_ISFIFO(inode->i_mode)) ||
(!sysctl_protected_regular && S_ISREG(inode->i_mode)) ||
likely(!(dir_mode & S_ISVTX)) ||
- uid_eq(inode->i_uid, dir_uid) ||
- uid_eq(current_fsuid(), inode->i_uid))
+ uid_eq(i_uid_into_mnt(mnt_userns, inode), dir_uid) ||
+ uid_eq(current_fsuid(), i_uid_into_mnt(mnt_userns, inode)))
return 0;
if (likely(dir_mode & 0002) ||
@@ -1569,14 +1645,15 @@ static struct dentry *lookup_slow(const struct qstr *name,
return res;
}
-static inline int may_lookup(struct nameidata *nd)
+static inline int may_lookup(struct user_namespace *mnt_userns,
+ struct nameidata *nd)
{
if (nd->flags & LOOKUP_RCU) {
- int err = inode_permission(nd->inode, MAY_EXEC|MAY_NOT_BLOCK);
+ int err = inode_permission(mnt_userns, nd->inode, MAY_EXEC|MAY_NOT_BLOCK);
if (err != -ECHILD || !try_to_unlazy(nd))
return err;
}
- return inode_permission(nd->inode, MAY_EXEC);
+ return inode_permission(mnt_userns, nd->inode, MAY_EXEC);
}
static int reserve_stack(struct nameidata *nd, struct path *link, unsigned seq)
@@ -2122,11 +2199,13 @@ static int link_path_walk(const char *name, struct nameidata *nd)
/* At this point we know we have a real path component. */
for(;;) {
+ struct user_namespace *mnt_userns;
const char *link;
u64 hash_len;
int type;
- err = may_lookup(nd);
+ mnt_userns = mnt_user_ns(nd->path.mnt);
+ err = may_lookup(mnt_userns, nd);
if (err)
return err;
@@ -2174,7 +2253,7 @@ static int link_path_walk(const char *name, struct nameidata *nd)
OK:
/* pathname or trailing symlink, done */
if (!depth) {
- nd->dir_uid = nd->inode->i_uid;
+ nd->dir_uid = i_uid_into_mnt(mnt_userns, nd->inode);
nd->dir_mode = nd->inode->i_mode;
nd->flags &= ~LOOKUP_PARENT;
return 0;
@@ -2511,7 +2590,7 @@ static int lookup_one_len_common(const char *name, struct dentry *base,
return err;
}
- return inode_permission(base->d_inode, MAY_EXEC);
+ return inode_permission(&init_user_ns, base->d_inode, MAY_EXEC);
}
/**
@@ -2656,15 +2735,16 @@ int user_path_at_empty(int dfd, const char __user *name, unsigned flags,
}
EXPORT_SYMBOL(user_path_at_empty);
-int __check_sticky(struct inode *dir, struct inode *inode)
+int __check_sticky(struct user_namespace *mnt_userns, struct inode *dir,
+ struct inode *inode)
{
kuid_t fsuid = current_fsuid();
- if (uid_eq(inode->i_uid, fsuid))
+ if (uid_eq(i_uid_into_mnt(mnt_userns, inode), fsuid))
return 0;
- if (uid_eq(dir->i_uid, fsuid))
+ if (uid_eq(i_uid_into_mnt(mnt_userns, dir), fsuid))
return 0;
- return !capable_wrt_inode_uidgid(inode, CAP_FOWNER);
+ return !capable_wrt_inode_uidgid(mnt_userns, inode, CAP_FOWNER);
}
EXPORT_SYMBOL(__check_sticky);
@@ -2688,7 +2768,8 @@ EXPORT_SYMBOL(__check_sticky);
* 11. We don't allow removal of NFS sillyrenamed files; it's handled by
* nfs_async_unlink().
*/
-static int may_delete(struct inode *dir, struct dentry *victim, bool isdir)
+static int may_delete(struct user_namespace *mnt_userns, struct inode *dir,
+ struct dentry *victim, bool isdir)
{
struct inode *inode = d_backing_inode(victim);
int error;
@@ -2700,19 +2781,21 @@ static int may_delete(struct inode *dir, struct dentry *victim, bool isdir)
BUG_ON(victim->d_parent->d_inode != dir);
/* Inode writeback is not safe when the uid or gid are invalid. */
- if (!uid_valid(inode->i_uid) || !gid_valid(inode->i_gid))
+ if (!uid_valid(i_uid_into_mnt(mnt_userns, inode)) ||
+ !gid_valid(i_gid_into_mnt(mnt_userns, inode)))
return -EOVERFLOW;
audit_inode_child(dir, victim, AUDIT_TYPE_CHILD_DELETE);
- error = inode_permission(dir, MAY_WRITE | MAY_EXEC);
+ error = inode_permission(mnt_userns, dir, MAY_WRITE | MAY_EXEC);
if (error)
return error;
if (IS_APPEND(dir))
return -EPERM;
- if (check_sticky(dir, inode) || IS_APPEND(inode) ||
- IS_IMMUTABLE(inode) || IS_SWAPFILE(inode) || HAS_UNMAPPED_ID(inode))
+ if (check_sticky(mnt_userns, dir, inode) || IS_APPEND(inode) ||
+ IS_IMMUTABLE(inode) || IS_SWAPFILE(inode) ||
+ HAS_UNMAPPED_ID(mnt_userns, inode))
return -EPERM;
if (isdir) {
if (!d_is_dir(victim))
@@ -2737,7 +2820,8 @@ static int may_delete(struct inode *dir, struct dentry *victim, bool isdir)
* 4. We should have write and exec permissions on dir
* 5. We can't do it if dir is immutable (done in permission())
*/
-static inline int may_create(struct inode *dir, struct dentry *child)
+static inline int may_create(struct user_namespace *mnt_userns,
+ struct inode *dir, struct dentry *child)
{
struct user_namespace *s_user_ns;
audit_inode_child(dir, child, AUDIT_TYPE_CHILD_CREATE);
@@ -2746,10 +2830,10 @@ static inline int may_create(struct inode *dir, struct dentry *child)
if (IS_DEADDIR(dir))
return -ENOENT;
s_user_ns = dir->i_sb->s_user_ns;
- if (!kuid_has_mapping(s_user_ns, current_fsuid()) ||
- !kgid_has_mapping(s_user_ns, current_fsgid()))
+ if (!kuid_has_mapping(s_user_ns, fsuid_into_mnt(mnt_userns)) ||
+ !kgid_has_mapping(s_user_ns, fsgid_into_mnt(mnt_userns)))
return -EOVERFLOW;
- return inode_permission(dir, MAY_WRITE | MAY_EXEC);
+ return inode_permission(mnt_userns, dir, MAY_WRITE | MAY_EXEC);
}
/*
@@ -2796,10 +2880,26 @@ void unlock_rename(struct dentry *p1, struct dentry *p2)
}
EXPORT_SYMBOL(unlock_rename);
-int vfs_create(struct inode *dir, struct dentry *dentry, umode_t mode,
- bool want_excl)
+/**
+ * vfs_create - create new file
+ * @mnt_userns: user namespace of the mount the inode was found from
+ * @dir: inode of @dentry
+ * @dentry: pointer to dentry of the base directory
+ * @mode: mode of the new file
+ * @want_excl: whether the file must not yet exist
+ *
+ * Create a new file.
+ *
+ * If the inode has been found through an idmapped mount the user namespace of
+ * the vfsmount must be passed through @mnt_userns. This function will then take
+ * care to map the inode according to @mnt_userns before checking permissions.
+ * On non-idmapped mounts or if permission checking is to be performed on the
+ * raw inode simply passs init_user_ns.
+ */
+int vfs_create(struct user_namespace *mnt_userns, struct inode *dir,
+ struct dentry *dentry, umode_t mode, bool want_excl)
{
- int error = may_create(dir, dentry);
+ int error = may_create(mnt_userns, dir, dentry);
if (error)
return error;
@@ -2810,7 +2910,7 @@ int vfs_create(struct inode *dir, struct dentry *dentry, umode_t mode,
error = security_inode_create(dir, dentry, mode);
if (error)
return error;
- error = dir->i_op->create(dir, dentry, mode, want_excl);
+ error = dir->i_op->create(mnt_userns, dir, dentry, mode, want_excl);
if (!error)
fsnotify_create(dir, dentry);
return error;
@@ -2822,7 +2922,7 @@ int vfs_mkobj(struct dentry *dentry, umode_t mode,
void *arg)
{
struct inode *dir = dentry->d_parent->d_inode;
- int error = may_create(dir, dentry);
+ int error = may_create(&init_user_ns, dir, dentry);
if (error)
return error;
@@ -2844,7 +2944,8 @@ bool may_open_dev(const struct path *path)
!(path->mnt->mnt_sb->s_iflags & SB_I_NODEV);
}
-static int may_open(const struct path *path, int acc_mode, int flag)
+static int may_open(struct user_namespace *mnt_userns, const struct path *path,
+ int acc_mode, int flag)
{
struct dentry *dentry = path->dentry;
struct inode *inode = dentry->d_inode;
@@ -2879,7 +2980,7 @@ static int may_open(const struct path *path, int acc_mode, int flag)
break;
}
- error = inode_permission(inode, MAY_OPEN | acc_mode);
+ error = inode_permission(mnt_userns, inode, MAY_OPEN | acc_mode);
if (error)
return error;
@@ -2894,13 +2995,13 @@ static int may_open(const struct path *path, int acc_mode, int flag)
}
/* O_NOATIME can only be set by the owner or superuser */
- if (flag & O_NOATIME && !inode_owner_or_capable(inode))
+ if (flag & O_NOATIME && !inode_owner_or_capable(mnt_userns, inode))
return -EPERM;
return 0;
}
-static int handle_truncate(struct file *filp)
+static int handle_truncate(struct user_namespace *mnt_userns, struct file *filp)
{
const struct path *path = &filp->f_path;
struct inode *inode = path->dentry->d_inode;
@@ -2914,7 +3015,7 @@ static int handle_truncate(struct file *filp)
if (!error)
error = security_path_truncate(path);
if (!error) {
- error = do_truncate(path->dentry, 0,
+ error = do_truncate(mnt_userns, path->dentry, 0,
ATTR_MTIME|ATTR_CTIME|ATTR_OPEN,
filp);
}
@@ -2929,7 +3030,9 @@ static inline int open_to_namei_flags(int flag)
return flag;
}
-static int may_o_create(const struct path *dir, struct dentry *dentry, umode_t mode)
+static int may_o_create(struct user_namespace *mnt_userns,
+ const struct path *dir, struct dentry *dentry,
+ umode_t mode)
{
struct user_namespace *s_user_ns;
int error = security_path_mknod(dir, dentry, mode, 0);
@@ -2937,11 +3040,12 @@ static int may_o_create(const struct path *dir, struct dentry *dentry, umode_t m
return error;
s_user_ns = dir->dentry->d_sb->s_user_ns;
- if (!kuid_has_mapping(s_user_ns, current_fsuid()) ||
- !kgid_has_mapping(s_user_ns, current_fsgid()))
+ if (!kuid_has_mapping(s_user_ns, fsuid_into_mnt(mnt_userns)) ||
+ !kgid_has_mapping(s_user_ns, fsgid_into_mnt(mnt_userns)))
return -EOVERFLOW;
- error = inode_permission(dir->dentry->d_inode, MAY_WRITE | MAY_EXEC);
+ error = inode_permission(mnt_userns, dir->dentry->d_inode,
+ MAY_WRITE | MAY_EXEC);
if (error)
return error;
@@ -3020,6 +3124,7 @@ static struct dentry *lookup_open(struct nameidata *nd, struct file *file,
const struct open_flags *op,
bool got_write)
{
+ struct user_namespace *mnt_userns;
struct dentry *dir = nd->path.dentry;
struct inode *dir_inode = dir->d_inode;
int open_flag = op->open_flag;
@@ -3067,13 +3172,15 @@ static struct dentry *lookup_open(struct nameidata *nd, struct file *file,
*/
if (unlikely(!got_write))
open_flag &= ~O_TRUNC;
+ mnt_userns = mnt_user_ns(nd->path.mnt);
if (open_flag & O_CREAT) {
if (open_flag & O_EXCL)
open_flag &= ~O_TRUNC;
if (!IS_POSIXACL(dir->d_inode))
mode &= ~current_umask();
if (likely(got_write))
- create_error = may_o_create(&nd->path, dentry, mode);
+ create_error = may_o_create(mnt_userns, &nd->path,
+ dentry, mode);
else
create_error = -EROFS;
}
@@ -3108,8 +3215,9 @@ static struct dentry *lookup_open(struct nameidata *nd, struct file *file,
error = -EACCES;
goto out_dput;
}
- error = dir_inode->i_op->create(dir_inode, dentry, mode,
- open_flag & O_EXCL);
+
+ error = dir_inode->i_op->create(mnt_userns, dir_inode, dentry,
+ mode, open_flag & O_EXCL);
if (error)
goto out_dput;
}
@@ -3213,6 +3321,7 @@ finish_lookup:
static int do_open(struct nameidata *nd,
struct file *file, const struct open_flags *op)
{
+ struct user_namespace *mnt_userns;
int open_flag = op->open_flag;
bool do_truncate;
int acc_mode;
@@ -3225,12 +3334,13 @@ static int do_open(struct nameidata *nd,
}
if (!(file->f_mode & FMODE_CREATED))
audit_inode(nd->name, nd->path.dentry, 0);
+ mnt_userns = mnt_user_ns(nd->path.mnt);
if (open_flag & O_CREAT) {
if ((open_flag & O_EXCL) && !(file->f_mode & FMODE_CREATED))
return -EEXIST;
if (d_is_dir(nd->path.dentry))
return -EISDIR;
- error = may_create_in_sticky(nd->dir_mode, nd->dir_uid,
+ error = may_create_in_sticky(mnt_userns, nd,
d_backing_inode(nd->path.dentry));
if (unlikely(error))
return error;
@@ -3250,13 +3360,13 @@ static int do_open(struct nameidata *nd,
return error;
do_truncate = true;
}
- error = may_open(&nd->path, acc_mode, open_flag);
+ error = may_open(mnt_userns, &nd->path, acc_mode, open_flag);
if (!error && !(file->f_mode & FMODE_OPENED))
error = vfs_open(&nd->path, file);
if (!error)
error = ima_file_check(file, op->acc_mode);
if (!error && do_truncate)
- error = handle_truncate(file);
+ error = handle_truncate(mnt_userns, file);
if (unlikely(error > 0)) {
WARN_ON(1);
error = -EINVAL;
@@ -3266,7 +3376,23 @@ static int do_open(struct nameidata *nd,
return error;
}
-struct dentry *vfs_tmpfile(struct dentry *dentry, umode_t mode, int open_flag)
+/**
+ * vfs_tmpfile - create tmpfile
+ * @mnt_userns: user namespace of the mount the inode was found from
+ * @dentry: pointer to dentry of the base directory
+ * @mode: mode of the new tmpfile
+ * @open_flags: flags
+ *
+ * Create a temporary file.
+ *
+ * If the inode has been found through an idmapped mount the user namespace of
+ * the vfsmount must be passed through @mnt_userns. This function will then take
+ * care to map the inode according to @mnt_userns before checking permissions.
+ * On non-idmapped mounts or if permission checking is to be performed on the
+ * raw inode simply passs init_user_ns.
+ */
+struct dentry *vfs_tmpfile(struct user_namespace *mnt_userns,
+ struct dentry *dentry, umode_t mode, int open_flag)
{
struct dentry *child = NULL;
struct inode *dir = dentry->d_inode;
@@ -3274,7 +3400,7 @@ struct dentry *vfs_tmpfile(struct dentry *dentry, umode_t mode, int open_flag)
int error;
/* we want directory to be writable */
- error = inode_permission(dir, MAY_WRITE | MAY_EXEC);
+ error = inode_permission(mnt_userns, dir, MAY_WRITE | MAY_EXEC);
if (error)
goto out_err;
error = -EOPNOTSUPP;
@@ -3284,7 +3410,7 @@ struct dentry *vfs_tmpfile(struct dentry *dentry, umode_t mode, int open_flag)
child = d_alloc(dentry, &slash_name);
if (unlikely(!child))
goto out_err;
- error = dir->i_op->tmpfile(dir, child, mode);
+ error = dir->i_op->tmpfile(mnt_userns, dir, child, mode);
if (error)
goto out_err;
error = -ENOENT;
@@ -3296,7 +3422,7 @@ struct dentry *vfs_tmpfile(struct dentry *dentry, umode_t mode, int open_flag)
inode->i_state |= I_LINKABLE;
spin_unlock(&inode->i_lock);
}
- ima_post_create_tmpfile(inode);
+ ima_post_create_tmpfile(mnt_userns, inode);
return child;
out_err:
@@ -3309,6 +3435,7 @@ static int do_tmpfile(struct nameidata *nd, unsigned flags,
const struct open_flags *op,
struct file *file)
{
+ struct user_namespace *mnt_userns;
struct dentry *child;
struct path path;
int error = path_lookupat(nd, flags | LOOKUP_DIRECTORY, &path);
@@ -3317,7 +3444,8 @@ static int do_tmpfile(struct nameidata *nd, unsigned flags,
error = mnt_want_write(path.mnt);
if (unlikely(error))
goto out;
- child = vfs_tmpfile(path.dentry, op->mode, op->open_flag);
+ mnt_userns = mnt_user_ns(path.mnt);
+ child = vfs_tmpfile(mnt_userns, path.dentry, op->mode, op->open_flag);
error = PTR_ERR(child);
if (IS_ERR(child))
goto out2;
@@ -3325,7 +3453,7 @@ static int do_tmpfile(struct nameidata *nd, unsigned flags,
path.dentry = child;
audit_inode(nd->name, child, 0);
/* Don't check for other permissions, the inode was just created */
- error = may_open(&path, 0, op->open_flag);
+ error = may_open(mnt_userns, &path, 0, op->open_flag);
if (!error)
error = vfs_open(&path, file);
out2:
@@ -3527,10 +3655,27 @@ inline struct dentry *user_path_create(int dfd, const char __user *pathname,
}
EXPORT_SYMBOL(user_path_create);
-int vfs_mknod(struct inode *dir, struct dentry *dentry, umode_t mode, dev_t dev)
+/**
+ * vfs_mknod - create device node or file
+ * @mnt_userns: user namespace of the mount the inode was found from
+ * @dir: inode of @dentry
+ * @dentry: pointer to dentry of the base directory
+ * @mode: mode of the new device node or file
+ * @dev: device number of device to create
+ *
+ * Create a device node or file.
+ *
+ * If the inode has been found through an idmapped mount the user namespace of
+ * the vfsmount must be passed through @mnt_userns. This function will then take
+ * care to map the inode according to @mnt_userns before checking permissions.
+ * On non-idmapped mounts or if permission checking is to be performed on the
+ * raw inode simply passs init_user_ns.
+ */
+int vfs_mknod(struct user_namespace *mnt_userns, struct inode *dir,
+ struct dentry *dentry, umode_t mode, dev_t dev)
{
bool is_whiteout = S_ISCHR(mode) && dev == WHITEOUT_DEV;
- int error = may_create(dir, dentry);
+ int error = may_create(mnt_userns, dir, dentry);
if (error)
return error;
@@ -3550,7 +3695,7 @@ int vfs_mknod(struct inode *dir, struct dentry *dentry, umode_t mode, dev_t dev)
if (error)
return error;
- error = dir->i_op->mknod(dir, dentry, mode, dev);
+ error = dir->i_op->mknod(mnt_userns, dir, dentry, mode, dev);
if (!error)
fsnotify_create(dir, dentry);
return error;
@@ -3577,6 +3722,7 @@ static int may_mknod(umode_t mode)
static long do_mknodat(int dfd, const char __user *filename, umode_t mode,
unsigned int dev)
{
+ struct user_namespace *mnt_userns;
struct dentry *dentry;
struct path path;
int error;
@@ -3595,18 +3741,22 @@ retry:
error = security_path_mknod(&path, dentry, mode, dev);
if (error)
goto out;
+
+ mnt_userns = mnt_user_ns(path.mnt);
switch (mode & S_IFMT) {
case 0: case S_IFREG:
- error = vfs_create(path.dentry->d_inode,dentry,mode,true);
+ error = vfs_create(mnt_userns, path.dentry->d_inode,
+ dentry, mode, true);
if (!error)
- ima_post_path_mknod(dentry);
+ ima_post_path_mknod(mnt_userns, dentry);
break;
case S_IFCHR: case S_IFBLK:
- error = vfs_mknod(path.dentry->d_inode,dentry,mode,
- new_decode_dev(dev));
+ error = vfs_mknod(mnt_userns, path.dentry->d_inode,
+ dentry, mode, new_decode_dev(dev));
break;
case S_IFIFO: case S_IFSOCK:
- error = vfs_mknod(path.dentry->d_inode,dentry,mode,0);
+ error = vfs_mknod(mnt_userns, path.dentry->d_inode,
+ dentry, mode, 0);
break;
}
out:
@@ -3629,9 +3779,25 @@ SYSCALL_DEFINE3(mknod, const char __user *, filename, umode_t, mode, unsigned, d
return do_mknodat(AT_FDCWD, filename, mode, dev);
}
-int vfs_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode)
+/**
+ * vfs_mkdir - create directory
+ * @mnt_userns: user namespace of the mount the inode was found from
+ * @dir: inode of @dentry
+ * @dentry: pointer to dentry of the base directory
+ * @mode: mode of the new directory
+ *
+ * Create a directory.
+ *
+ * If the inode has been found through an idmapped mount the user namespace of
+ * the vfsmount must be passed through @mnt_userns. This function will then take
+ * care to map the inode according to @mnt_userns before checking permissions.
+ * On non-idmapped mounts or if permission checking is to be performed on the
+ * raw inode simply passs init_user_ns.
+ */
+int vfs_mkdir(struct user_namespace *mnt_userns, struct inode *dir,
+ struct dentry *dentry, umode_t mode)
{
- int error = may_create(dir, dentry);
+ int error = may_create(mnt_userns, dir, dentry);
unsigned max_links = dir->i_sb->s_max_links;
if (error)
@@ -3648,7 +3814,7 @@ int vfs_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode)
if (max_links && dir->i_nlink >= max_links)
return -EMLINK;
- error = dir->i_op->mkdir(dir, dentry, mode);
+ error = dir->i_op->mkdir(mnt_userns, dir, dentry, mode);
if (!error)
fsnotify_mkdir(dir, dentry);
return error;
@@ -3670,8 +3836,12 @@ retry:
if (!IS_POSIXACL(path.dentry->d_inode))
mode &= ~current_umask();
error = security_path_mkdir(&path, dentry, mode);
- if (!error)
- error = vfs_mkdir(path.dentry->d_inode, dentry, mode);
+ if (!error) {
+ struct user_namespace *mnt_userns;
+ mnt_userns = mnt_user_ns(path.mnt);
+ error = vfs_mkdir(mnt_userns, path.dentry->d_inode, dentry,
+ mode);
+ }
done_path_create(&path, dentry);
if (retry_estale(error, lookup_flags)) {
lookup_flags |= LOOKUP_REVAL;
@@ -3690,9 +3860,24 @@ SYSCALL_DEFINE2(mkdir, const char __user *, pathname, umode_t, mode)
return do_mkdirat(AT_FDCWD, pathname, mode);
}
-int vfs_rmdir(struct inode *dir, struct dentry *dentry)
+/**
+ * vfs_rmdir - remove directory
+ * @mnt_userns: user namespace of the mount the inode was found from
+ * @dir: inode of @dentry
+ * @dentry: pointer to dentry of the base directory
+ *
+ * Remove a directory.
+ *
+ * If the inode has been found through an idmapped mount the user namespace of
+ * the vfsmount must be passed through @mnt_userns. This function will then take
+ * care to map the inode according to @mnt_userns before checking permissions.
+ * On non-idmapped mounts or if permission checking is to be performed on the
+ * raw inode simply passs init_user_ns.
+ */
+int vfs_rmdir(struct user_namespace *mnt_userns, struct inode *dir,
+ struct dentry *dentry)
{
- int error = may_delete(dir, dentry, 1);
+ int error = may_delete(mnt_userns, dir, dentry, 1);
if (error)
return error;
@@ -3732,6 +3917,7 @@ EXPORT_SYMBOL(vfs_rmdir);
long do_rmdir(int dfd, struct filename *name)
{
+ struct user_namespace *mnt_userns;
int error = 0;
struct dentry *dentry;
struct path path;
@@ -3772,7 +3958,8 @@ retry:
error = security_path_rmdir(&path, dentry);
if (error)
goto exit3;
- error = vfs_rmdir(path.dentry->d_inode, dentry);
+ mnt_userns = mnt_user_ns(path.mnt);
+ error = vfs_rmdir(mnt_userns, path.dentry->d_inode, dentry);
exit3:
dput(dentry);
exit2:
@@ -3795,6 +3982,7 @@ SYSCALL_DEFINE1(rmdir, const char __user *, pathname)
/**
* vfs_unlink - unlink a filesystem object
+ * @mnt_userns: user namespace of the mount the inode was found from
* @dir: parent directory
* @dentry: victim
* @delegated_inode: returns victim inode, if the inode is delegated.
@@ -3810,11 +3998,18 @@ SYSCALL_DEFINE1(rmdir, const char __user *, pathname)
* Alternatively, a caller may pass NULL for delegated_inode. This may
* be appropriate for callers that expect the underlying filesystem not
* to be NFS exported.
+ *
+ * If the inode has been found through an idmapped mount the user namespace of
+ * the vfsmount must be passed through @mnt_userns. This function will then take
+ * care to map the inode according to @mnt_userns before checking permissions.
+ * On non-idmapped mounts or if permission checking is to be performed on the
+ * raw inode simply passs init_user_ns.
*/
-int vfs_unlink(struct inode *dir, struct dentry *dentry, struct inode **delegated_inode)
+int vfs_unlink(struct user_namespace *mnt_userns, struct inode *dir,
+ struct dentry *dentry, struct inode **delegated_inode)
{
struct inode *target = dentry->d_inode;
- int error = may_delete(dir, dentry, 0);
+ int error = may_delete(mnt_userns, dir, dentry, 0);
if (error)
return error;
@@ -3885,6 +4080,8 @@ retry_deleg:
dentry = __lookup_hash(&last, path.dentry, lookup_flags);
error = PTR_ERR(dentry);
if (!IS_ERR(dentry)) {
+ struct user_namespace *mnt_userns;
+
/* Why not before? Because we want correct error value */
if (last.name[last.len])
goto slashes;
@@ -3895,7 +4092,9 @@ retry_deleg:
error = security_path_unlink(&path, dentry);
if (error)
goto exit2;
- error = vfs_unlink(path.dentry->d_inode, dentry, &delegated_inode);
+ mnt_userns = mnt_user_ns(path.mnt);
+ error = vfs_unlink(mnt_userns, path.dentry->d_inode, dentry,
+ &delegated_inode);
exit2:
dput(dentry);
}
@@ -3944,9 +4143,25 @@ SYSCALL_DEFINE1(unlink, const char __user *, pathname)
return do_unlinkat(AT_FDCWD, getname(pathname));
}
-int vfs_symlink(struct inode *dir, struct dentry *dentry, const char *oldname)
+/**
+ * vfs_symlink - create symlink
+ * @mnt_userns: user namespace of the mount the inode was found from
+ * @dir: inode of @dentry
+ * @dentry: pointer to dentry of the base directory
+ * @oldname: name of the file to link to
+ *
+ * Create a symlink.
+ *
+ * If the inode has been found through an idmapped mount the user namespace of
+ * the vfsmount must be passed through @mnt_userns. This function will then take
+ * care to map the inode according to @mnt_userns before checking permissions.
+ * On non-idmapped mounts or if permission checking is to be performed on the
+ * raw inode simply passs init_user_ns.
+ */
+int vfs_symlink(struct user_namespace *mnt_userns, struct inode *dir,
+ struct dentry *dentry, const char *oldname)
{
- int error = may_create(dir, dentry);
+ int error = may_create(mnt_userns, dir, dentry);
if (error)
return error;
@@ -3958,7 +4173,7 @@ int vfs_symlink(struct inode *dir, struct dentry *dentry, const char *oldname)
if (error)
return error;
- error = dir->i_op->symlink(dir, dentry, oldname);
+ error = dir->i_op->symlink(mnt_userns, dir, dentry, oldname);
if (!error)
fsnotify_create(dir, dentry);
return error;
@@ -3984,8 +4199,13 @@ retry:
goto out_putname;
error = security_path_symlink(&path, dentry, from->name);
- if (!error)
- error = vfs_symlink(path.dentry->d_inode, dentry, from->name);
+ if (!error) {
+ struct user_namespace *mnt_userns;
+
+ mnt_userns = mnt_user_ns(path.mnt);
+ error = vfs_symlink(mnt_userns, path.dentry->d_inode, dentry,
+ from->name);
+ }
done_path_create(&path, dentry);
if (retry_estale(error, lookup_flags)) {
lookup_flags |= LOOKUP_REVAL;
@@ -4010,6 +4230,7 @@ SYSCALL_DEFINE2(symlink, const char __user *, oldname, const char __user *, newn
/**
* vfs_link - create a new link
* @old_dentry: object to be linked
+ * @mnt_userns: the user namespace of the mount
* @dir: new parent
* @new_dentry: where to create the new link
* @delegated_inode: returns inode needing a delegation break
@@ -4025,8 +4246,16 @@ SYSCALL_DEFINE2(symlink, const char __user *, oldname, const char __user *, newn
* Alternatively, a caller may pass NULL for delegated_inode. This may
* be appropriate for callers that expect the underlying filesystem not
* to be NFS exported.
+ *
+ * If the inode has been found through an idmapped mount the user namespace of
+ * the vfsmount must be passed through @mnt_userns. This function will then take
+ * care to map the inode according to @mnt_userns before checking permissions.
+ * On non-idmapped mounts or if permission checking is to be performed on the
+ * raw inode simply passs init_user_ns.
*/
-int vfs_link(struct dentry *old_dentry, struct inode *dir, struct dentry *new_dentry, struct inode **delegated_inode)
+int vfs_link(struct dentry *old_dentry, struct user_namespace *mnt_userns,
+ struct inode *dir, struct dentry *new_dentry,
+ struct inode **delegated_inode)
{
struct inode *inode = old_dentry->d_inode;
unsigned max_links = dir->i_sb->s_max_links;
@@ -4035,7 +4264,7 @@ int vfs_link(struct dentry *old_dentry, struct inode *dir, struct dentry *new_de
if (!inode)
return -ENOENT;
- error = may_create(dir, new_dentry);
+ error = may_create(mnt_userns, dir, new_dentry);
if (error)
return error;
@@ -4052,7 +4281,7 @@ int vfs_link(struct dentry *old_dentry, struct inode *dir, struct dentry *new_de
* be writen back improperly if their true value is unknown to
* the vfs.
*/
- if (HAS_UNMAPPED_ID(inode))
+ if (HAS_UNMAPPED_ID(mnt_userns, inode))
return -EPERM;
if (!dir->i_op->link)
return -EPERM;
@@ -4099,6 +4328,7 @@ EXPORT_SYMBOL(vfs_link);
static int do_linkat(int olddfd, const char __user *oldname, int newdfd,
const char __user *newname, int flags)
{
+ struct user_namespace *mnt_userns;
struct dentry *new_dentry;
struct path old_path, new_path;
struct inode *delegated_inode = NULL;
@@ -4134,13 +4364,15 @@ retry:
error = -EXDEV;
if (old_path.mnt != new_path.mnt)
goto out_dput;
- error = may_linkat(&old_path);
+ mnt_userns = mnt_user_ns(new_path.mnt);
+ error = may_linkat(mnt_userns, &old_path);
if (unlikely(error))
goto out_dput;
error = security_path_link(old_path.dentry, &new_path, new_dentry);
if (error)
goto out_dput;
- error = vfs_link(old_path.dentry, new_path.dentry->d_inode, new_dentry, &delegated_inode);
+ error = vfs_link(old_path.dentry, mnt_userns, new_path.dentry->d_inode,
+ new_dentry, &delegated_inode);
out_dput:
done_path_create(&new_path, new_dentry);
if (delegated_inode) {
@@ -4174,12 +4406,14 @@ SYSCALL_DEFINE2(link, const char __user *, oldname, const char __user *, newname
/**
* vfs_rename - rename a filesystem object
- * @old_dir: parent of source
- * @old_dentry: source
- * @new_dir: parent of destination
- * @new_dentry: destination
- * @delegated_inode: returns an inode needing a delegation break
- * @flags: rename flags
+ * @old_mnt_userns: old user namespace of the mount the inode was found from
+ * @old_dir: parent of source
+ * @old_dentry: source
+ * @new_mnt_userns: new user namespace of the mount the inode was found from
+ * @new_dir: parent of destination
+ * @new_dentry: destination
+ * @delegated_inode: returns an inode needing a delegation break
+ * @flags: rename flags
*
* The caller must hold multiple mutexes--see lock_rename()).
*
@@ -4222,11 +4456,14 @@ SYSCALL_DEFINE2(link, const char __user *, oldname, const char __user *, newname
* ->i_mutex on parents, which works but leads to some truly excessive
* locking].
*/
-int vfs_rename(struct inode *old_dir, struct dentry *old_dentry,
- struct inode *new_dir, struct dentry *new_dentry,
- struct inode **delegated_inode, unsigned int flags)
+int vfs_rename(struct renamedata *rd)
{
int error;
+ struct inode *old_dir = rd->old_dir, *new_dir = rd->new_dir;
+ struct dentry *old_dentry = rd->old_dentry;
+ struct dentry *new_dentry = rd->new_dentry;
+ struct inode **delegated_inode = rd->delegated_inode;
+ unsigned int flags = rd->flags;
bool is_dir = d_is_dir(old_dentry);
struct inode *source = old_dentry->d_inode;
struct inode *target = new_dentry->d_inode;
@@ -4237,19 +4474,21 @@ int vfs_rename(struct inode *old_dir, struct dentry *old_dentry,
if (source == target)
return 0;
- error = may_delete(old_dir, old_dentry, is_dir);
+ error = may_delete(rd->old_mnt_userns, old_dir, old_dentry, is_dir);
if (error)
return error;
if (!target) {
- error = may_create(new_dir, new_dentry);
+ error = may_create(rd->new_mnt_userns, new_dir, new_dentry);
} else {
new_is_dir = d_is_dir(new_dentry);
if (!(flags & RENAME_EXCHANGE))
- error = may_delete(new_dir, new_dentry, is_dir);
+ error = may_delete(rd->new_mnt_userns, new_dir,
+ new_dentry, is_dir);
else
- error = may_delete(new_dir, new_dentry, new_is_dir);
+ error = may_delete(rd->new_mnt_userns, new_dir,
+ new_dentry, new_is_dir);
}
if (error)
return error;
@@ -4263,12 +4502,14 @@ int vfs_rename(struct inode *old_dir, struct dentry *old_dentry,
*/
if (new_dir != old_dir) {
if (is_dir) {
- error = inode_permission(source, MAY_WRITE);
+ error = inode_permission(rd->old_mnt_userns, source,
+ MAY_WRITE);
if (error)
return error;
}
if ((flags & RENAME_EXCHANGE) && new_is_dir) {
- error = inode_permission(target, MAY_WRITE);
+ error = inode_permission(rd->new_mnt_userns, target,
+ MAY_WRITE);
if (error)
return error;
}
@@ -4308,8 +4549,8 @@ int vfs_rename(struct inode *old_dir, struct dentry *old_dentry,
if (error)
goto out;
}
- error = old_dir->i_op->rename(old_dir, old_dentry,
- new_dir, new_dentry, flags);
+ error = old_dir->i_op->rename(rd->new_mnt_userns, old_dir, old_dentry,
+ new_dir, new_dentry, flags);
if (error)
goto out;
@@ -4350,6 +4591,7 @@ EXPORT_SYMBOL(vfs_rename);
int do_renameat2(int olddfd, struct filename *from, int newdfd,
struct filename *to, unsigned int flags)
{
+ struct renamedata rd;
struct dentry *old_dentry, *new_dentry;
struct dentry *trap;
struct path old_path, new_path;
@@ -4453,9 +4695,16 @@ retry_deleg:
&new_path, new_dentry, flags);
if (error)
goto exit5;
- error = vfs_rename(old_path.dentry->d_inode, old_dentry,
- new_path.dentry->d_inode, new_dentry,
- &delegated_inode, flags);
+
+ rd.old_dir = old_path.dentry->d_inode;
+ rd.old_dentry = old_dentry;
+ rd.old_mnt_userns = mnt_user_ns(old_path.mnt);
+ rd.new_dir = new_path.dentry->d_inode;
+ rd.new_dentry = new_dentry;
+ rd.new_mnt_userns = mnt_user_ns(new_path.mnt);
+ rd.delegated_inode = &delegated_inode;
+ rd.flags = flags;
+ error = vfs_rename(&rd);
exit5:
dput(new_dentry);
exit4:
diff --git a/fs/namespace.c b/fs/namespace.c
index 9d33909d0f9e..56bb5a5fdc0d 100644
--- a/fs/namespace.c
+++ b/fs/namespace.c
@@ -25,6 +25,7 @@
#include <linux/proc_ns.h>
#include <linux/magic.h>
#include <linux/memblock.h>
+#include <linux/proc_fs.h>
#include <linux/task_work.h>
#include <linux/sched/task.h>
#include <uapi/linux/mount.h>
@@ -73,6 +74,15 @@ static DECLARE_RWSEM(namespace_sem);
static HLIST_HEAD(unmounted); /* protected by namespace_sem */
static LIST_HEAD(ex_mountpoints); /* protected by namespace_sem */
+struct mount_kattr {
+ unsigned int attr_set;
+ unsigned int attr_clr;
+ unsigned int propagation;
+ unsigned int lookup_flags;
+ bool recurse;
+ struct user_namespace *mnt_userns;
+};
+
/* /sys/fs */
struct kobject *fs_kobj;
EXPORT_SYMBOL_GPL(fs_kobj);
@@ -87,6 +97,16 @@ EXPORT_SYMBOL_GPL(fs_kobj);
*/
__cacheline_aligned_in_smp DEFINE_SEQLOCK(mount_lock);
+static inline void lock_mount_hash(void)
+{
+ write_seqlock(&mount_lock);
+}
+
+static inline void unlock_mount_hash(void)
+{
+ write_sequnlock(&mount_lock);
+}
+
static inline struct hlist_head *m_hash(struct vfsmount *mnt, struct dentry *dentry)
{
unsigned long tmp = ((unsigned long)mnt / L1_CACHE_BYTES);
@@ -210,6 +230,7 @@ static struct mount *alloc_vfsmnt(const char *name)
INIT_HLIST_NODE(&mnt->mnt_mp_list);
INIT_LIST_HEAD(&mnt->mnt_umounting);
INIT_HLIST_HEAD(&mnt->mnt_stuck_children);
+ mnt->mnt.mnt_userns = &init_user_ns;
}
return mnt;
@@ -360,50 +381,36 @@ int mnt_want_write(struct vfsmount *m)
EXPORT_SYMBOL_GPL(mnt_want_write);
/**
- * mnt_clone_write - get write access to a mount
- * @mnt: the mount on which to take a write
- *
- * This is effectively like mnt_want_write, except
- * it must only be used to take an extra write reference
- * on a mountpoint that we already know has a write reference
- * on it. This allows some optimisation.
- *
- * After finished, mnt_drop_write must be called as usual to
- * drop the reference.
- */
-int mnt_clone_write(struct vfsmount *mnt)
-{
- /* superblock may be r/o */
- if (__mnt_is_readonly(mnt))
- return -EROFS;
- preempt_disable();
- mnt_inc_writers(real_mount(mnt));
- preempt_enable();
- return 0;
-}
-EXPORT_SYMBOL_GPL(mnt_clone_write);
-
-/**
* __mnt_want_write_file - get write access to a file's mount
* @file: the file who's mount on which to take a write
*
- * This is like __mnt_want_write, but it takes a file and can
- * do some optimisations if the file is open for write already
+ * This is like __mnt_want_write, but if the file is already open for writing it
+ * skips incrementing mnt_writers (since the open file already has a reference)
+ * and instead only does the check for emergency r/o remounts. This must be
+ * paired with __mnt_drop_write_file.
*/
int __mnt_want_write_file(struct file *file)
{
- if (!(file->f_mode & FMODE_WRITER))
- return __mnt_want_write(file->f_path.mnt);
- else
- return mnt_clone_write(file->f_path.mnt);
+ if (file->f_mode & FMODE_WRITER) {
+ /*
+ * Superblock may have become readonly while there are still
+ * writable fd's, e.g. due to a fs error with errors=remount-ro
+ */
+ if (__mnt_is_readonly(file->f_path.mnt))
+ return -EROFS;
+ return 0;
+ }
+ return __mnt_want_write(file->f_path.mnt);
}
/**
* mnt_want_write_file - get write access to a file's mount
* @file: the file who's mount on which to take a write
*
- * This is like mnt_want_write, but it takes a file and can
- * do some optimisations if the file is open for write already
+ * This is like mnt_want_write, but if the file is already open for writing it
+ * skips incrementing mnt_writers (since the open file already has a reference)
+ * and instead only does the freeze protection and the check for emergency r/o
+ * remounts. This must be paired with mnt_drop_write_file.
*/
int mnt_want_write_file(struct file *file)
{
@@ -449,7 +456,8 @@ EXPORT_SYMBOL_GPL(mnt_drop_write);
void __mnt_drop_write_file(struct file *file)
{
- __mnt_drop_write(file->f_path.mnt);
+ if (!(file->f_mode & FMODE_WRITER))
+ __mnt_drop_write(file->f_path.mnt);
}
void mnt_drop_write_file(struct file *file)
@@ -459,11 +467,8 @@ void mnt_drop_write_file(struct file *file)
}
EXPORT_SYMBOL(mnt_drop_write_file);
-static int mnt_make_readonly(struct mount *mnt)
+static inline int mnt_hold_writers(struct mount *mnt)
{
- int ret = 0;
-
- lock_mount_hash();
mnt->mnt.mnt_flags |= MNT_WRITE_HOLD;
/*
* After storing MNT_WRITE_HOLD, we'll read the counters. This store
@@ -488,25 +493,30 @@ static int mnt_make_readonly(struct mount *mnt)
* we're counting up here.
*/
if (mnt_get_writers(mnt) > 0)
- ret = -EBUSY;
- else
- mnt->mnt.mnt_flags |= MNT_READONLY;
+ return -EBUSY;
+
+ return 0;
+}
+
+static inline void mnt_unhold_writers(struct mount *mnt)
+{
/*
* MNT_READONLY must become visible before ~MNT_WRITE_HOLD, so writers
* that become unheld will see MNT_READONLY.
*/
smp_wmb();
mnt->mnt.mnt_flags &= ~MNT_WRITE_HOLD;
- unlock_mount_hash();
- return ret;
}
-static int __mnt_unmake_readonly(struct mount *mnt)
+static int mnt_make_readonly(struct mount *mnt)
{
- lock_mount_hash();
- mnt->mnt.mnt_flags &= ~MNT_READONLY;
- unlock_mount_hash();
- return 0;
+ int ret;
+
+ ret = mnt_hold_writers(mnt);
+ if (!ret)
+ mnt->mnt.mnt_flags |= MNT_READONLY;
+ mnt_unhold_writers(mnt);
+ return ret;
}
int sb_prepare_remount_readonly(struct super_block *sb)
@@ -547,6 +557,11 @@ int sb_prepare_remount_readonly(struct super_block *sb)
static void free_vfsmnt(struct mount *mnt)
{
+ struct user_namespace *mnt_userns;
+
+ mnt_userns = mnt_user_ns(&mnt->mnt);
+ if (mnt_userns != &init_user_ns)
+ put_user_ns(mnt_userns);
kfree_const(mnt->mnt_devname);
#ifdef CONFIG_SMP
free_percpu(mnt->mnt_pcp);
@@ -1055,6 +1070,9 @@ static struct mount *clone_mnt(struct mount *old, struct dentry *root,
mnt->mnt.mnt_flags &= ~(MNT_WRITE_HOLD|MNT_MARKED|MNT_INTERNAL);
atomic_inc(&sb->s_active);
+ mnt->mnt.mnt_userns = mnt_user_ns(&old->mnt);
+ if (mnt->mnt.mnt_userns != &init_user_ns)
+ mnt->mnt.mnt_userns = get_user_ns(mnt->mnt.mnt_userns);
mnt->mnt.mnt_sb = sb;
mnt->mnt.mnt_root = dget(root);
mnt->mnt_mountpoint = mnt->mnt.mnt_root;
@@ -2514,20 +2532,15 @@ static int change_mount_ro_state(struct mount *mnt, unsigned int mnt_flags)
if (readonly_request)
return mnt_make_readonly(mnt);
- return __mnt_unmake_readonly(mnt);
+ mnt->mnt.mnt_flags &= ~MNT_READONLY;
+ return 0;
}
-/*
- * Update the user-settable attributes on a mount. The caller must hold
- * sb->s_umount for writing.
- */
static void set_mount_attributes(struct mount *mnt, unsigned int mnt_flags)
{
- lock_mount_hash();
mnt_flags |= mnt->mnt.mnt_flags & ~MNT_USER_SETTABLE_MASK;
mnt->mnt.mnt_flags = mnt_flags;
touch_mnt_namespace(mnt->mnt_ns);
- unlock_mount_hash();
}
static void mnt_warn_timestamp_expiry(struct path *mountpoint, struct vfsmount *mnt)
@@ -2572,11 +2585,17 @@ static int do_reconfigure_mnt(struct path *path, unsigned int mnt_flags)
if (!can_change_locked_flags(mnt, mnt_flags))
return -EPERM;
- down_write(&sb->s_umount);
+ /*
+ * We're only checking whether the superblock is read-only not
+ * changing it, so only take down_read(&sb->s_umount).
+ */
+ down_read(&sb->s_umount);
+ lock_mount_hash();
ret = change_mount_ro_state(mnt, mnt_flags);
if (ret == 0)
set_mount_attributes(mnt, mnt_flags);
- up_write(&sb->s_umount);
+ unlock_mount_hash();
+ up_read(&sb->s_umount);
mnt_warn_timestamp_expiry(path, &mnt->mnt);
@@ -2616,8 +2635,11 @@ static int do_remount(struct path *path, int ms_flags, int sb_flags,
err = -EPERM;
if (ns_capable(sb->s_user_ns, CAP_SYS_ADMIN)) {
err = reconfigure_super(fc);
- if (!err)
+ if (!err) {
+ lock_mount_hash();
set_mount_attributes(mnt, mnt_flags);
+ unlock_mount_hash();
+ }
}
up_write(&sb->s_umount);
}
@@ -3440,6 +3462,33 @@ out_type:
return ret;
}
+#define FSMOUNT_VALID_FLAGS \
+ (MOUNT_ATTR_RDONLY | MOUNT_ATTR_NOSUID | MOUNT_ATTR_NODEV | \
+ MOUNT_ATTR_NOEXEC | MOUNT_ATTR__ATIME | MOUNT_ATTR_NODIRATIME)
+
+#define MOUNT_SETATTR_VALID_FLAGS (FSMOUNT_VALID_FLAGS | MOUNT_ATTR_IDMAP)
+
+#define MOUNT_SETATTR_PROPAGATION_FLAGS \
+ (MS_UNBINDABLE | MS_PRIVATE | MS_SLAVE | MS_SHARED)
+
+static unsigned int attr_flags_to_mnt_flags(u64 attr_flags)
+{
+ unsigned int mnt_flags = 0;
+
+ if (attr_flags & MOUNT_ATTR_RDONLY)
+ mnt_flags |= MNT_READONLY;
+ if (attr_flags & MOUNT_ATTR_NOSUID)
+ mnt_flags |= MNT_NOSUID;
+ if (attr_flags & MOUNT_ATTR_NODEV)
+ mnt_flags |= MNT_NODEV;
+ if (attr_flags & MOUNT_ATTR_NOEXEC)
+ mnt_flags |= MNT_NOEXEC;
+ if (attr_flags & MOUNT_ATTR_NODIRATIME)
+ mnt_flags |= MNT_NODIRATIME;
+
+ return mnt_flags;
+}
+
/*
* Create a kernel mount representation for a new, prepared superblock
* (specified by fs_fd) and attach to an open_tree-like file descriptor.
@@ -3462,24 +3511,10 @@ SYSCALL_DEFINE3(fsmount, int, fs_fd, unsigned int, flags,
if ((flags & ~(FSMOUNT_CLOEXEC)) != 0)
return -EINVAL;
- if (attr_flags & ~(MOUNT_ATTR_RDONLY |
- MOUNT_ATTR_NOSUID |
- MOUNT_ATTR_NODEV |
- MOUNT_ATTR_NOEXEC |
- MOUNT_ATTR__ATIME |
- MOUNT_ATTR_NODIRATIME))
+ if (attr_flags & ~FSMOUNT_VALID_FLAGS)
return -EINVAL;
- if (attr_flags & MOUNT_ATTR_RDONLY)
- mnt_flags |= MNT_READONLY;
- if (attr_flags & MOUNT_ATTR_NOSUID)
- mnt_flags |= MNT_NOSUID;
- if (attr_flags & MOUNT_ATTR_NODEV)
- mnt_flags |= MNT_NODEV;
- if (attr_flags & MOUNT_ATTR_NOEXEC)
- mnt_flags |= MNT_NOEXEC;
- if (attr_flags & MOUNT_ATTR_NODIRATIME)
- mnt_flags |= MNT_NODIRATIME;
+ mnt_flags = attr_flags_to_mnt_flags(attr_flags);
switch (attr_flags & MOUNT_ATTR__ATIME) {
case MOUNT_ATTR_STRICTATIME:
@@ -3787,6 +3822,362 @@ out0:
return error;
}
+static unsigned int recalc_flags(struct mount_kattr *kattr, struct mount *mnt)
+{
+ unsigned int flags = mnt->mnt.mnt_flags;
+
+ /* flags to clear */
+ flags &= ~kattr->attr_clr;
+ /* flags to raise */
+ flags |= kattr->attr_set;
+
+ return flags;
+}
+
+static int can_idmap_mount(const struct mount_kattr *kattr, struct mount *mnt)
+{
+ struct vfsmount *m = &mnt->mnt;
+
+ if (!kattr->mnt_userns)
+ return 0;
+
+ /*
+ * Once a mount has been idmapped we don't allow it to change its
+ * mapping. It makes things simpler and callers can just create
+ * another bind-mount they can idmap if they want to.
+ */
+ if (mnt_user_ns(m) != &init_user_ns)
+ return -EPERM;
+
+ /* The underlying filesystem doesn't support idmapped mounts yet. */
+ if (!(m->mnt_sb->s_type->fs_flags & FS_ALLOW_IDMAP))
+ return -EINVAL;
+
+ /* We're not controlling the superblock. */
+ if (!ns_capable(m->mnt_sb->s_user_ns, CAP_SYS_ADMIN))
+ return -EPERM;
+
+ /* Mount has already been visible in the filesystem hierarchy. */
+ if (!is_anon_ns(mnt->mnt_ns))
+ return -EINVAL;
+
+ return 0;
+}
+
+static struct mount *mount_setattr_prepare(struct mount_kattr *kattr,
+ struct mount *mnt, int *err)
+{
+ struct mount *m = mnt, *last = NULL;
+
+ if (!is_mounted(&m->mnt)) {
+ *err = -EINVAL;
+ goto out;
+ }
+
+ if (!(mnt_has_parent(m) ? check_mnt(m) : is_anon_ns(m->mnt_ns))) {
+ *err = -EINVAL;
+ goto out;
+ }
+
+ do {
+ unsigned int flags;
+
+ flags = recalc_flags(kattr, m);
+ if (!can_change_locked_flags(m, flags)) {
+ *err = -EPERM;
+ goto out;
+ }
+
+ *err = can_idmap_mount(kattr, m);
+ if (*err)
+ goto out;
+
+ last = m;
+
+ if ((kattr->attr_set & MNT_READONLY) &&
+ !(m->mnt.mnt_flags & MNT_READONLY)) {
+ *err = mnt_hold_writers(m);
+ if (*err)
+ goto out;
+ }
+ } while (kattr->recurse && (m = next_mnt(m, mnt)));
+
+out:
+ return last;
+}
+
+static void do_idmap_mount(const struct mount_kattr *kattr, struct mount *mnt)
+{
+ struct user_namespace *mnt_userns;
+
+ if (!kattr->mnt_userns)
+ return;
+
+ mnt_userns = get_user_ns(kattr->mnt_userns);
+ /* Pairs with smp_load_acquire() in mnt_user_ns(). */
+ smp_store_release(&mnt->mnt.mnt_userns, mnt_userns);
+}
+
+static void mount_setattr_commit(struct mount_kattr *kattr,
+ struct mount *mnt, struct mount *last,
+ int err)
+{
+ struct mount *m = mnt;
+
+ do {
+ if (!err) {
+ unsigned int flags;
+
+ do_idmap_mount(kattr, m);
+ flags = recalc_flags(kattr, m);
+ WRITE_ONCE(m->mnt.mnt_flags, flags);
+ }
+
+ /*
+ * We either set MNT_READONLY above so make it visible
+ * before ~MNT_WRITE_HOLD or we failed to recursively
+ * apply mount options.
+ */
+ if ((kattr->attr_set & MNT_READONLY) &&
+ (m->mnt.mnt_flags & MNT_WRITE_HOLD))
+ mnt_unhold_writers(m);
+
+ if (!err && kattr->propagation)
+ change_mnt_propagation(m, kattr->propagation);
+
+ /*
+ * On failure, only cleanup until we found the first mount
+ * we failed to handle.
+ */
+ if (err && m == last)
+ break;
+ } while (kattr->recurse && (m = next_mnt(m, mnt)));
+
+ if (!err)
+ touch_mnt_namespace(mnt->mnt_ns);
+}
+
+static int do_mount_setattr(struct path *path, struct mount_kattr *kattr)
+{
+ struct mount *mnt = real_mount(path->mnt), *last = NULL;
+ int err = 0;
+
+ if (path->dentry != mnt->mnt.mnt_root)
+ return -EINVAL;
+
+ if (kattr->propagation) {
+ /*
+ * Only take namespace_lock() if we're actually changing
+ * propagation.
+ */
+ namespace_lock();
+ if (kattr->propagation == MS_SHARED) {
+ err = invent_group_ids(mnt, kattr->recurse);
+ if (err) {
+ namespace_unlock();
+ return err;
+ }
+ }
+ }
+
+ lock_mount_hash();
+
+ /*
+ * Get the mount tree in a shape where we can change mount
+ * properties without failure.
+ */
+ last = mount_setattr_prepare(kattr, mnt, &err);
+ if (last) /* Commit all changes or revert to the old state. */
+ mount_setattr_commit(kattr, mnt, last, err);
+
+ unlock_mount_hash();
+
+ if (kattr->propagation) {
+ namespace_unlock();
+ if (err)
+ cleanup_group_ids(mnt, NULL);
+ }
+
+ return err;
+}
+
+static int build_mount_idmapped(const struct mount_attr *attr, size_t usize,
+ struct mount_kattr *kattr, unsigned int flags)
+{
+ int err = 0;
+ struct ns_common *ns;
+ struct user_namespace *mnt_userns;
+ struct file *file;
+
+ if (!((attr->attr_set | attr->attr_clr) & MOUNT_ATTR_IDMAP))
+ return 0;
+
+ /*
+ * We currently do not support clearing an idmapped mount. If this ever
+ * is a use-case we can revisit this but for now let's keep it simple
+ * and not allow it.
+ */
+ if (attr->attr_clr & MOUNT_ATTR_IDMAP)
+ return -EINVAL;
+
+ if (attr->userns_fd > INT_MAX)
+ return -EINVAL;
+
+ file = fget(attr->userns_fd);
+ if (!file)
+ return -EBADF;
+
+ if (!proc_ns_file(file)) {
+ err = -EINVAL;
+ goto out_fput;
+ }
+
+ ns = get_proc_ns(file_inode(file));
+ if (ns->ops->type != CLONE_NEWUSER) {
+ err = -EINVAL;
+ goto out_fput;
+ }
+
+ /*
+ * The init_user_ns is used to indicate that a vfsmount is not idmapped.
+ * This is simpler than just having to treat NULL as unmapped. Users
+ * wanting to idmap a mount to init_user_ns can just use a namespace
+ * with an identity mapping.
+ */
+ mnt_userns = container_of(ns, struct user_namespace, ns);
+ if (mnt_userns == &init_user_ns) {
+ err = -EPERM;
+ goto out_fput;
+ }
+ kattr->mnt_userns = get_user_ns(mnt_userns);
+
+out_fput:
+ fput(file);
+ return err;
+}
+
+static int build_mount_kattr(const struct mount_attr *attr, size_t usize,
+ struct mount_kattr *kattr, unsigned int flags)
+{
+ unsigned int lookup_flags = LOOKUP_AUTOMOUNT | LOOKUP_FOLLOW;
+
+ if (flags & AT_NO_AUTOMOUNT)
+ lookup_flags &= ~LOOKUP_AUTOMOUNT;
+ if (flags & AT_SYMLINK_NOFOLLOW)
+ lookup_flags &= ~LOOKUP_FOLLOW;
+ if (flags & AT_EMPTY_PATH)
+ lookup_flags |= LOOKUP_EMPTY;
+
+ *kattr = (struct mount_kattr) {
+ .lookup_flags = lookup_flags,
+ .recurse = !!(flags & AT_RECURSIVE),
+ };
+
+ if (attr->propagation & ~MOUNT_SETATTR_PROPAGATION_FLAGS)
+ return -EINVAL;
+ if (hweight32(attr->propagation & MOUNT_SETATTR_PROPAGATION_FLAGS) > 1)
+ return -EINVAL;
+ kattr->propagation = attr->propagation;
+
+ if ((attr->attr_set | attr->attr_clr) & ~MOUNT_SETATTR_VALID_FLAGS)
+ return -EINVAL;
+
+ kattr->attr_set = attr_flags_to_mnt_flags(attr->attr_set);
+ kattr->attr_clr = attr_flags_to_mnt_flags(attr->attr_clr);
+
+ /*
+ * Since the MOUNT_ATTR_<atime> values are an enum, not a bitmap,
+ * users wanting to transition to a different atime setting cannot
+ * simply specify the atime setting in @attr_set, but must also
+ * specify MOUNT_ATTR__ATIME in the @attr_clr field.
+ * So ensure that MOUNT_ATTR__ATIME can't be partially set in
+ * @attr_clr and that @attr_set can't have any atime bits set if
+ * MOUNT_ATTR__ATIME isn't set in @attr_clr.
+ */
+ if (attr->attr_clr & MOUNT_ATTR__ATIME) {
+ if ((attr->attr_clr & MOUNT_ATTR__ATIME) != MOUNT_ATTR__ATIME)
+ return -EINVAL;
+
+ /*
+ * Clear all previous time settings as they are mutually
+ * exclusive.
+ */
+ kattr->attr_clr |= MNT_RELATIME | MNT_NOATIME;
+ switch (attr->attr_set & MOUNT_ATTR__ATIME) {
+ case MOUNT_ATTR_RELATIME:
+ kattr->attr_set |= MNT_RELATIME;
+ break;
+ case MOUNT_ATTR_NOATIME:
+ kattr->attr_set |= MNT_NOATIME;
+ break;
+ case MOUNT_ATTR_STRICTATIME:
+ break;
+ default:
+ return -EINVAL;
+ }
+ } else {
+ if (attr->attr_set & MOUNT_ATTR__ATIME)
+ return -EINVAL;
+ }
+
+ return build_mount_idmapped(attr, usize, kattr, flags);
+}
+
+static void finish_mount_kattr(struct mount_kattr *kattr)
+{
+ put_user_ns(kattr->mnt_userns);
+ kattr->mnt_userns = NULL;
+}
+
+SYSCALL_DEFINE5(mount_setattr, int, dfd, const char __user *, path,
+ unsigned int, flags, struct mount_attr __user *, uattr,
+ size_t, usize)
+{
+ int err;
+ struct path target;
+ struct mount_attr attr;
+ struct mount_kattr kattr;
+
+ BUILD_BUG_ON(sizeof(struct mount_attr) != MOUNT_ATTR_SIZE_VER0);
+
+ if (flags & ~(AT_EMPTY_PATH |
+ AT_RECURSIVE |
+ AT_SYMLINK_NOFOLLOW |
+ AT_NO_AUTOMOUNT))
+ return -EINVAL;
+
+ if (unlikely(usize > PAGE_SIZE))
+ return -E2BIG;
+ if (unlikely(usize < MOUNT_ATTR_SIZE_VER0))
+ return -EINVAL;
+
+ if (!may_mount())
+ return -EPERM;
+
+ err = copy_struct_from_user(&attr, sizeof(attr), uattr, usize);
+ if (err)
+ return err;
+
+ /* Don't bother walking through the mounts if this is a nop. */
+ if (attr.attr_set == 0 &&
+ attr.attr_clr == 0 &&
+ attr.propagation == 0)
+ return 0;
+
+ err = build_mount_kattr(&attr, usize, &kattr, flags);
+ if (err)
+ return err;
+
+ err = user_path_at(dfd, path, kattr.lookup_flags, &target);
+ if (err)
+ return err;
+
+ err = do_mount_setattr(&target, &kattr);
+ finish_mount_kattr(&kattr);
+ path_put(&target);
+ return err;
+}
+
static void __init init_mount_tree(void)
{
struct vfsmount *mnt;
diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c
index ef827ae193d2..19a9f434442f 100644
--- a/fs/nfs/dir.c
+++ b/fs/nfs/dir.c
@@ -2095,8 +2095,8 @@ EXPORT_SYMBOL_GPL(nfs_instantiate);
* that the operation succeeded on the server, but an error in the
* reply path made it appear to have failed.
*/
-int nfs_create(struct inode *dir, struct dentry *dentry,
- umode_t mode, bool excl)
+int nfs_create(struct user_namespace *mnt_userns, struct inode *dir,
+ struct dentry *dentry, umode_t mode, bool excl)
{
struct iattr attr;
int open_flags = excl ? O_CREAT | O_EXCL : O_CREAT;
@@ -2124,7 +2124,8 @@ EXPORT_SYMBOL_GPL(nfs_create);
* See comments for nfs_proc_create regarding failed operations.
*/
int
-nfs_mknod(struct inode *dir, struct dentry *dentry, umode_t mode, dev_t rdev)
+nfs_mknod(struct user_namespace *mnt_userns, struct inode *dir,
+ struct dentry *dentry, umode_t mode, dev_t rdev)
{
struct iattr attr;
int status;
@@ -2150,7 +2151,8 @@ EXPORT_SYMBOL_GPL(nfs_mknod);
/*
* See comments for nfs_proc_create regarding failed operations.
*/
-int nfs_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode)
+int nfs_mkdir(struct user_namespace *mnt_userns, struct inode *dir,
+ struct dentry *dentry, umode_t mode)
{
struct iattr attr;
int error;
@@ -2295,7 +2297,8 @@ EXPORT_SYMBOL_GPL(nfs_unlink);
* now have a new file handle and can instantiate an in-core NFS inode
* and move the raw page into its mapping.
*/
-int nfs_symlink(struct inode *dir, struct dentry *dentry, const char *symname)
+int nfs_symlink(struct user_namespace *mnt_userns, struct inode *dir,
+ struct dentry *dentry, const char *symname)
{
struct page *page;
char *kaddr;
@@ -2398,9 +2401,9 @@ EXPORT_SYMBOL_GPL(nfs_link);
* If these conditions are met, we can drop the dentries before doing
* the rename.
*/
-int nfs_rename(struct inode *old_dir, struct dentry *old_dentry,
- struct inode *new_dir, struct dentry *new_dentry,
- unsigned int flags)
+int nfs_rename(struct user_namespace *mnt_userns, struct inode *old_dir,
+ struct dentry *old_dentry, struct inode *new_dir,
+ struct dentry *new_dentry, unsigned int flags)
{
struct inode *old_inode = d_inode(old_dentry);
struct inode *new_inode = d_inode(new_dentry);
@@ -2939,7 +2942,9 @@ static int nfs_execute_ok(struct inode *inode, int mask)
return ret;
}
-int nfs_permission(struct inode *inode, int mask)
+int nfs_permission(struct user_namespace *mnt_userns,
+ struct inode *inode,
+ int mask)
{
const struct cred *cred = current_cred();
int res = 0;
@@ -2987,7 +2992,7 @@ out_notsup:
res = nfs_revalidate_inode(NFS_SERVER(inode), inode);
if (res == 0)
- res = generic_permission(inode, mask);
+ res = generic_permission(&init_user_ns, inode, mask);
goto out;
}
EXPORT_SYMBOL_GPL(nfs_permission);
diff --git a/fs/nfs/file.c b/fs/nfs/file.c
index 63940a7a70be..16ad5050e046 100644
--- a/fs/nfs/file.c
+++ b/fs/nfs/file.c
@@ -89,7 +89,7 @@ nfs_file_release(struct inode *inode, struct file *filp)
EXPORT_SYMBOL_GPL(nfs_file_release);
/**
- * nfs_revalidate_size - Revalidate the file size
+ * nfs_revalidate_file_size - Revalidate the file size
* @inode: pointer to inode struct
* @filp: pointer to struct file
*
@@ -606,8 +606,8 @@ ssize_t nfs_file_write(struct kiocb *iocb, struct iov_iter *from)
{
struct file *file = iocb->ki_filp;
struct inode *inode = file_inode(file);
- unsigned long written = 0;
- ssize_t result;
+ unsigned int mntflags = NFS_SERVER(inode)->flags;
+ ssize_t result, written;
errseq_t since;
int error;
@@ -626,13 +626,13 @@ ssize_t nfs_file_write(struct kiocb *iocb, struct iov_iter *from)
/*
* O_APPEND implies that we must revalidate the file length.
*/
- if (iocb->ki_flags & IOCB_APPEND) {
+ if (iocb->ki_flags & IOCB_APPEND || iocb->ki_pos > i_size_read(inode)) {
result = nfs_revalidate_file_size(inode, file);
if (result)
goto out;
}
- if (iocb->ki_pos > i_size_read(inode))
- nfs_revalidate_mapping(inode, file->f_mapping);
+
+ nfs_clear_invalid_mapping(file->f_mapping);
since = filemap_sample_wb_err(file->f_mapping);
nfs_start_io_write(inode);
@@ -648,6 +648,21 @@ ssize_t nfs_file_write(struct kiocb *iocb, struct iov_iter *from)
written = result;
iocb->ki_pos += written;
+
+ if (mntflags & NFS_MOUNT_WRITE_EAGER) {
+ result = filemap_fdatawrite_range(file->f_mapping,
+ iocb->ki_pos - written,
+ iocb->ki_pos - 1);
+ if (result < 0)
+ goto out;
+ }
+ if (mntflags & NFS_MOUNT_WRITE_WAIT) {
+ result = filemap_fdatawait_range(file->f_mapping,
+ iocb->ki_pos - written,
+ iocb->ki_pos - 1);
+ if (result < 0)
+ goto out;
+ }
result = generic_write_sync(iocb, written);
if (result < 0)
goto out;
diff --git a/fs/nfs/fs_context.c b/fs/nfs/fs_context.c
index 06894bcdea2d..971a9251c1d9 100644
--- a/fs/nfs/fs_context.c
+++ b/fs/nfs/fs_context.c
@@ -82,6 +82,7 @@ enum nfs_param {
Opt_v,
Opt_vers,
Opt_wsize,
+ Opt_write,
};
enum {
@@ -113,6 +114,19 @@ static const struct constant_table nfs_param_enums_lookupcache[] = {
{}
};
+enum {
+ Opt_write_lazy,
+ Opt_write_eager,
+ Opt_write_wait,
+};
+
+static const struct constant_table nfs_param_enums_write[] = {
+ { "lazy", Opt_write_lazy },
+ { "eager", Opt_write_eager },
+ { "wait", Opt_write_wait },
+ {}
+};
+
static const struct fs_parameter_spec nfs_fs_parameters[] = {
fsparam_flag_no("ac", Opt_ac),
fsparam_u32 ("acdirmax", Opt_acdirmax),
@@ -171,6 +185,7 @@ static const struct fs_parameter_spec nfs_fs_parameters[] = {
fsparam_flag ("v4.1", Opt_v),
fsparam_flag ("v4.2", Opt_v),
fsparam_string("vers", Opt_vers),
+ fsparam_enum ("write", Opt_write, nfs_param_enums_write),
fsparam_u32 ("wsize", Opt_wsize),
{}
};
@@ -770,6 +785,24 @@ static int nfs_fs_context_parse_param(struct fs_context *fc,
goto out_invalid_value;
}
break;
+ case Opt_write:
+ switch (result.uint_32) {
+ case Opt_write_lazy:
+ ctx->flags &=
+ ~(NFS_MOUNT_WRITE_EAGER | NFS_MOUNT_WRITE_WAIT);
+ break;
+ case Opt_write_eager:
+ ctx->flags |= NFS_MOUNT_WRITE_EAGER;
+ ctx->flags &= ~NFS_MOUNT_WRITE_WAIT;
+ break;
+ case Opt_write_wait:
+ ctx->flags |=
+ NFS_MOUNT_WRITE_EAGER | NFS_MOUNT_WRITE_WAIT;
+ break;
+ default:
+ goto out_invalid_value;
+ }
+ break;
/*
* Special options
@@ -1479,6 +1512,8 @@ static int nfs_init_fs_context(struct fs_context *fc)
ctx->selected_flavor = RPC_AUTH_MAXFLAVOR;
ctx->minorversion = 0;
ctx->need_mount = true;
+
+ fc->s_iflags |= SB_I_STABLE_WRITES;
}
fc->fs_private = ctx;
fc->ops = &nfs_fs_context_ops;
diff --git a/fs/nfs/fscache.c b/fs/nfs/fscache.c
index a60df88efc40..c4c021c6ebbd 100644
--- a/fs/nfs/fscache.c
+++ b/fs/nfs/fscache.c
@@ -390,10 +390,6 @@ static void nfs_readpage_from_fscache_complete(struct page *page,
if (!error) {
SetPageUptodate(page);
unlock_page(page);
- } else {
- error = nfs_readpage_async(context, page->mapping->host, page);
- if (error)
- unlock_page(page);
}
}
diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c
index 522aa10a1a3e..749bbea14d99 100644
--- a/fs/nfs/inode.c
+++ b/fs/nfs/inode.c
@@ -195,6 +195,18 @@ bool nfs_check_cache_invalid(struct inode *inode, unsigned long flags)
}
EXPORT_SYMBOL_GPL(nfs_check_cache_invalid);
+#ifdef CONFIG_NFS_V4_2
+static bool nfs_has_xattr_cache(const struct nfs_inode *nfsi)
+{
+ return nfsi->xattr_cache != NULL;
+}
+#else
+static bool nfs_has_xattr_cache(const struct nfs_inode *nfsi)
+{
+ return false;
+}
+#endif
+
static void nfs_set_cache_invalid(struct inode *inode, unsigned long flags)
{
struct nfs_inode *nfsi = NFS_I(inode);
@@ -209,6 +221,8 @@ static void nfs_set_cache_invalid(struct inode *inode, unsigned long flags)
| NFS_INO_INVALID_XATTR);
}
+ if (!nfs_has_xattr_cache(nfsi))
+ flags &= ~NFS_INO_INVALID_XATTR;
if (inode->i_mapping->nrpages == 0)
flags &= ~(NFS_INO_INVALID_DATA|NFS_INO_DATA_INVAL_DEFER);
nfsi->cache_validity |= flags;
@@ -594,7 +608,8 @@ EXPORT_SYMBOL_GPL(nfs_fhget);
#define NFS_VALID_ATTRS (ATTR_MODE|ATTR_UID|ATTR_GID|ATTR_SIZE|ATTR_ATIME|ATTR_ATIME_SET|ATTR_MTIME|ATTR_MTIME_SET|ATTR_FILE|ATTR_OPEN)
int
-nfs_setattr(struct dentry *dentry, struct iattr *attr)
+nfs_setattr(struct user_namespace *mnt_userns, struct dentry *dentry,
+ struct iattr *attr)
{
struct inode *inode = d_inode(dentry);
struct nfs_fattr *fattr;
@@ -787,8 +802,8 @@ static bool nfs_need_revalidate_inode(struct inode *inode)
return false;
}
-int nfs_getattr(const struct path *path, struct kstat *stat,
- u32 request_mask, unsigned int query_flags)
+int nfs_getattr(struct user_namespace *mnt_userns, const struct path *path,
+ struct kstat *stat, u32 request_mask, unsigned int query_flags)
{
struct inode *inode = d_inode(path->dentry);
struct nfs_server *server = NFS_SERVER(inode);
@@ -857,7 +872,7 @@ out_no_revalidate:
/* Only return attributes that were revalidated. */
stat->result_mask &= request_mask;
out_no_update:
- generic_fillattr(inode, stat);
+ generic_fillattr(&init_user_ns, inode, stat);
stat->ino = nfs_compat_user_ino64(NFS_FILEID(inode));
if (S_ISDIR(inode->i_mode))
stat->blksize = NFS_SERVER(inode)->dtsize;
@@ -1257,55 +1272,19 @@ static int nfs_invalidate_mapping(struct inode *inode, struct address_space *map
return 0;
}
-bool nfs_mapping_need_revalidate_inode(struct inode *inode)
-{
- return nfs_check_cache_invalid(inode, NFS_INO_REVAL_PAGECACHE) ||
- NFS_STALE(inode);
-}
-
-int nfs_revalidate_mapping_rcu(struct inode *inode)
-{
- struct nfs_inode *nfsi = NFS_I(inode);
- unsigned long *bitlock = &nfsi->flags;
- int ret = 0;
-
- if (IS_SWAPFILE(inode))
- goto out;
- if (nfs_mapping_need_revalidate_inode(inode)) {
- ret = -ECHILD;
- goto out;
- }
- spin_lock(&inode->i_lock);
- if (test_bit(NFS_INO_INVALIDATING, bitlock) ||
- (nfsi->cache_validity & NFS_INO_INVALID_DATA))
- ret = -ECHILD;
- spin_unlock(&inode->i_lock);
-out:
- return ret;
-}
-
/**
- * nfs_revalidate_mapping - Revalidate the pagecache
- * @inode: pointer to host inode
+ * nfs_clear_invalid_mapping - Conditionally clear a mapping
* @mapping: pointer to mapping
+ *
+ * If the NFS_INO_INVALID_DATA inode flag is set, clear the mapping.
*/
-int nfs_revalidate_mapping(struct inode *inode,
- struct address_space *mapping)
+int nfs_clear_invalid_mapping(struct address_space *mapping)
{
+ struct inode *inode = mapping->host;
struct nfs_inode *nfsi = NFS_I(inode);
unsigned long *bitlock = &nfsi->flags;
int ret = 0;
- /* swapfiles are not supposed to be shared. */
- if (IS_SWAPFILE(inode))
- goto out;
-
- if (nfs_mapping_need_revalidate_inode(inode)) {
- ret = __nfs_revalidate_inode(NFS_SERVER(inode), inode);
- if (ret < 0)
- goto out;
- }
-
/*
* We must clear NFS_INO_INVALID_DATA first to ensure that
* invalidations that come in while we're shooting down the mappings
@@ -1336,8 +1315,8 @@ int nfs_revalidate_mapping(struct inode *inode,
set_bit(NFS_INO_INVALIDATING, bitlock);
smp_wmb();
- nfsi->cache_validity &= ~(NFS_INO_INVALID_DATA|
- NFS_INO_DATA_INVAL_DEFER);
+ nfsi->cache_validity &=
+ ~(NFS_INO_INVALID_DATA | NFS_INO_DATA_INVAL_DEFER);
spin_unlock(&inode->i_lock);
trace_nfs_invalidate_mapping_enter(inode);
ret = nfs_invalidate_mapping(inode, mapping);
@@ -1350,6 +1329,53 @@ out:
return ret;
}
+bool nfs_mapping_need_revalidate_inode(struct inode *inode)
+{
+ return nfs_check_cache_invalid(inode, NFS_INO_REVAL_PAGECACHE) ||
+ NFS_STALE(inode);
+}
+
+int nfs_revalidate_mapping_rcu(struct inode *inode)
+{
+ struct nfs_inode *nfsi = NFS_I(inode);
+ unsigned long *bitlock = &nfsi->flags;
+ int ret = 0;
+
+ if (IS_SWAPFILE(inode))
+ goto out;
+ if (nfs_mapping_need_revalidate_inode(inode)) {
+ ret = -ECHILD;
+ goto out;
+ }
+ spin_lock(&inode->i_lock);
+ if (test_bit(NFS_INO_INVALIDATING, bitlock) ||
+ (nfsi->cache_validity & NFS_INO_INVALID_DATA))
+ ret = -ECHILD;
+ spin_unlock(&inode->i_lock);
+out:
+ return ret;
+}
+
+/**
+ * nfs_revalidate_mapping - Revalidate the pagecache
+ * @inode: pointer to host inode
+ * @mapping: pointer to mapping
+ */
+int nfs_revalidate_mapping(struct inode *inode, struct address_space *mapping)
+{
+ /* swapfiles are not supposed to be shared. */
+ if (IS_SWAPFILE(inode))
+ return 0;
+
+ if (nfs_mapping_need_revalidate_inode(inode)) {
+ int ret = __nfs_revalidate_inode(NFS_SERVER(inode), inode);
+ if (ret < 0)
+ return ret;
+ }
+
+ return nfs_clear_invalid_mapping(mapping);
+}
+
static bool nfs_file_has_writers(struct nfs_inode *nfsi)
{
struct inode *inode = &nfsi->vfs_inode;
diff --git a/fs/nfs/internal.h b/fs/nfs/internal.h
index 62d3189745cd..25fb43b69e5a 100644
--- a/fs/nfs/internal.h
+++ b/fs/nfs/internal.h
@@ -378,14 +378,18 @@ extern unsigned long nfs_access_cache_count(struct shrinker *shrink,
extern unsigned long nfs_access_cache_scan(struct shrinker *shrink,
struct shrink_control *sc);
struct dentry *nfs_lookup(struct inode *, struct dentry *, unsigned int);
-int nfs_create(struct inode *, struct dentry *, umode_t, bool);
-int nfs_mkdir(struct inode *, struct dentry *, umode_t);
+int nfs_create(struct user_namespace *, struct inode *, struct dentry *,
+ umode_t, bool);
+int nfs_mkdir(struct user_namespace *, struct inode *, struct dentry *,
+ umode_t);
int nfs_rmdir(struct inode *, struct dentry *);
int nfs_unlink(struct inode *, struct dentry *);
-int nfs_symlink(struct inode *, struct dentry *, const char *);
+int nfs_symlink(struct user_namespace *, struct inode *, struct dentry *,
+ const char *);
int nfs_link(struct dentry *, struct inode *, struct dentry *);
-int nfs_mknod(struct inode *, struct dentry *, umode_t, dev_t);
-int nfs_rename(struct inode *, struct dentry *,
+int nfs_mknod(struct user_namespace *, struct inode *, struct dentry *, umode_t,
+ dev_t);
+int nfs_rename(struct user_namespace *, struct inode *, struct dentry *,
struct inode *, struct dentry *, unsigned int);
/* file.c */
diff --git a/fs/nfs/namespace.c b/fs/nfs/namespace.c
index 2bcbe38afe2e..93e60e921f92 100644
--- a/fs/nfs/namespace.c
+++ b/fs/nfs/namespace.c
@@ -208,20 +208,23 @@ out_fc:
}
static int
-nfs_namespace_getattr(const struct path *path, struct kstat *stat,
- u32 request_mask, unsigned int query_flags)
+nfs_namespace_getattr(struct user_namespace *mnt_userns,
+ const struct path *path, struct kstat *stat,
+ u32 request_mask, unsigned int query_flags)
{
if (NFS_FH(d_inode(path->dentry))->size != 0)
- return nfs_getattr(path, stat, request_mask, query_flags);
- generic_fillattr(d_inode(path->dentry), stat);
+ return nfs_getattr(mnt_userns, path, stat, request_mask,
+ query_flags);
+ generic_fillattr(&init_user_ns, d_inode(path->dentry), stat);
return 0;
}
static int
-nfs_namespace_setattr(struct dentry *dentry, struct iattr *attr)
+nfs_namespace_setattr(struct user_namespace *mnt_userns, struct dentry *dentry,
+ struct iattr *attr)
{
if (NFS_FH(d_inode(dentry))->size != 0)
- return nfs_setattr(dentry, attr);
+ return nfs_setattr(mnt_userns, dentry, attr);
return -EACCES;
}
diff --git a/fs/nfs/nfs3_fs.h b/fs/nfs/nfs3_fs.h
index 1b950b66b3bb..c8a192802dda 100644
--- a/fs/nfs/nfs3_fs.h
+++ b/fs/nfs/nfs3_fs.h
@@ -12,7 +12,8 @@
*/
#ifdef CONFIG_NFS_V3_ACL
extern struct posix_acl *nfs3_get_acl(struct inode *inode, int type);
-extern int nfs3_set_acl(struct inode *inode, struct posix_acl *acl, int type);
+extern int nfs3_set_acl(struct user_namespace *mnt_userns, struct inode *inode,
+ struct posix_acl *acl, int type);
extern int nfs3_proc_setacls(struct inode *inode, struct posix_acl *acl,
struct posix_acl *dfacl);
extern ssize_t nfs3_listxattr(struct dentry *, char *, size_t);
diff --git a/fs/nfs/nfs3acl.c b/fs/nfs/nfs3acl.c
index c6c863382f37..bb386a691e69 100644
--- a/fs/nfs/nfs3acl.c
+++ b/fs/nfs/nfs3acl.c
@@ -111,6 +111,7 @@ struct posix_acl *nfs3_get_acl(struct inode *inode, int type)
fallthrough;
case -ENOTSUPP:
status = -EOPNOTSUPP;
+ goto getout;
default:
goto getout;
}
@@ -251,7 +252,8 @@ int nfs3_proc_setacls(struct inode *inode, struct posix_acl *acl,
}
-int nfs3_set_acl(struct inode *inode, struct posix_acl *acl, int type)
+int nfs3_set_acl(struct user_namespace *mnt_userns, struct inode *inode,
+ struct posix_acl *acl, int type)
{
struct posix_acl *orig = acl, *dfacl = NULL, *alloc;
int status;
diff --git a/fs/nfs/nfs4client.c b/fs/nfs/nfs4client.c
index 86acffe7335c..889a9f4c0310 100644
--- a/fs/nfs/nfs4client.c
+++ b/fs/nfs/nfs4client.c
@@ -609,6 +609,7 @@ found:
* changed. Schedule recovery!
*/
nfs4_schedule_path_down_recovery(pos);
+ goto out;
default:
goto out;
}
diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c
index 2f4679a62712..74bc5120013d 100644
--- a/fs/nfs/nfs4proc.c
+++ b/fs/nfs/nfs4proc.c
@@ -71,10 +71,6 @@
#include "nfs4trace.h"
-#ifdef CONFIG_NFS_V4_2
-#include "nfs42.h"
-#endif /* CONFIG_NFS_V4_2 */
-
#define NFSDBG_FACILITY NFSDBG_PROC
#define NFS4_BITMASK_SZ 3
@@ -2231,6 +2227,7 @@ static int nfs4_handle_delegation_recall_error(struct nfs_server *server, struct
default:
printk(KERN_ERR "NFS: %s: unhandled error "
"%d.\n", __func__, err);
+ fallthrough;
case 0:
case -ENOENT:
case -EAGAIN:
@@ -5438,15 +5435,16 @@ static void nfs4_bitmask_adjust(__u32 *bitmask, struct inode *inode,
if (cache_validity & NFS_INO_INVALID_ATIME)
bitmask[1] |= FATTR4_WORD1_TIME_ACCESS;
- if (cache_validity & NFS_INO_INVALID_ACCESS)
- bitmask[0] |= FATTR4_WORD1_MODE | FATTR4_WORD1_OWNER |
- FATTR4_WORD1_OWNER_GROUP;
- if (cache_validity & NFS_INO_INVALID_ACL)
- bitmask[0] |= FATTR4_WORD0_ACL;
- if (cache_validity & NFS_INO_INVALID_LABEL)
+ if (cache_validity & NFS_INO_INVALID_OTHER)
+ bitmask[1] |= FATTR4_WORD1_MODE | FATTR4_WORD1_OWNER |
+ FATTR4_WORD1_OWNER_GROUP |
+ FATTR4_WORD1_NUMLINKS;
+ if (label && label->len && cache_validity & NFS_INO_INVALID_LABEL)
bitmask[2] |= FATTR4_WORD2_SECURITY_LABEL;
- if (cache_validity & NFS_INO_INVALID_CTIME)
+ if (cache_validity & NFS_INO_INVALID_CHANGE)
bitmask[0] |= FATTR4_WORD0_CHANGE;
+ if (cache_validity & NFS_INO_INVALID_CTIME)
+ bitmask[1] |= FATTR4_WORD1_TIME_METADATA;
if (cache_validity & NFS_INO_INVALID_MTIME)
bitmask[1] |= FATTR4_WORD1_TIME_MODIFY;
if (cache_validity & NFS_INO_INVALID_SIZE)
@@ -7491,6 +7489,7 @@ nfs4_release_lockowner(struct nfs_server *server, struct nfs4_lock_state *lsp)
#define XATTR_NAME_NFSV4_ACL "system.nfs4_acl"
static int nfs4_xattr_set_nfs4_acl(const struct xattr_handler *handler,
+ struct user_namespace *mnt_userns,
struct dentry *unused, struct inode *inode,
const char *key, const void *buf,
size_t buflen, int flags)
@@ -7513,6 +7512,7 @@ static bool nfs4_xattr_list_nfs4_acl(struct dentry *dentry)
#ifdef CONFIG_NFS_V4_SECURITY_LABEL
static int nfs4_xattr_set_nfs4_label(const struct xattr_handler *handler,
+ struct user_namespace *mnt_userns,
struct dentry *unused, struct inode *inode,
const char *key, const void *buf,
size_t buflen, int flags)
@@ -7563,6 +7563,7 @@ nfs4_listxattr_nfs4_label(struct inode *inode, char *list, size_t list_len)
#ifdef CONFIG_NFS_V4_2
static int nfs4_xattr_set_nfs4_user(const struct xattr_handler *handler,
+ struct user_namespace *mnt_userns,
struct dentry *unused, struct inode *inode,
const char *key, const void *buf,
size_t buflen, int flags)
@@ -9705,6 +9706,7 @@ nfs4_layoutcommit_done(struct rpc_task *task, void *calldata)
case -NFS4ERR_BADLAYOUT: /* no layout */
case -NFS4ERR_GRACE: /* loca_recalim always false */
task->tk_status = 0;
+ break;
case 0:
break;
default:
diff --git a/fs/nfs/nfs4state.c b/fs/nfs/nfs4state.c
index 4bf10792cb5b..3a51351bdc6a 100644
--- a/fs/nfs/nfs4state.c
+++ b/fs/nfs/nfs4state.c
@@ -1125,6 +1125,7 @@ static void nfs_increment_seqid(int status, struct nfs_seqid *seqid)
" sequence-id error on an"
" unconfirmed sequence %p!\n",
seqid->sequence);
+ return;
case -NFS4ERR_STALE_CLIENTID:
case -NFS4ERR_STALE_STATEID:
case -NFS4ERR_BAD_STATEID:
diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c
index af64b4e6fd1f..102b66e0bdef 100644
--- a/fs/nfs/pnfs.c
+++ b/fs/nfs/pnfs.c
@@ -2875,6 +2875,7 @@ pnfs_do_write(struct nfs_pageio_descriptor *desc,
switch (trypnfs) {
case PNFS_NOT_ATTEMPTED:
pnfs_write_through_mds(desc, hdr);
+ break;
case PNFS_ATTEMPTED:
break;
case PNFS_TRY_AGAIN:
@@ -3019,6 +3020,7 @@ pnfs_do_read(struct nfs_pageio_descriptor *desc, struct nfs_pgio_header *hdr)
switch (trypnfs) {
case PNFS_NOT_ATTEMPTED:
pnfs_read_through_mds(desc, hdr);
+ break;
case PNFS_ATTEMPTED:
break;
case PNFS_TRY_AGAIN:
diff --git a/fs/nfs/read.c b/fs/nfs/read.c
index eb854f1f86e2..d2b6dce1f99f 100644
--- a/fs/nfs/read.c
+++ b/fs/nfs/read.c
@@ -74,6 +74,24 @@ void nfs_pageio_init_read(struct nfs_pageio_descriptor *pgio,
}
EXPORT_SYMBOL_GPL(nfs_pageio_init_read);
+static void nfs_pageio_complete_read(struct nfs_pageio_descriptor *pgio,
+ struct inode *inode)
+{
+ struct nfs_pgio_mirror *pgm;
+ unsigned long npages;
+
+ nfs_pageio_complete(pgio);
+
+ /* It doesn't make sense to do mirrored reads! */
+ WARN_ON_ONCE(pgio->pg_mirror_count != 1);
+
+ pgm = &pgio->pg_mirrors[0];
+ NFS_I(inode)->read_io += pgm->pg_bytes_written;
+ npages = (pgm->pg_bytes_written + PAGE_SIZE - 1) >> PAGE_SHIFT;
+ nfs_add_stats(inode, NFSIOS_READPAGES, npages);
+}
+
+
void nfs_pageio_reset_read_mds(struct nfs_pageio_descriptor *pgio)
{
struct nfs_pgio_mirror *mirror;
@@ -114,41 +132,10 @@ static void nfs_readpage_release(struct nfs_page *req, int error)
nfs_release_request(req);
}
-int nfs_readpage_async(struct nfs_open_context *ctx, struct inode *inode,
- struct page *page)
-{
- struct nfs_page *new;
- unsigned int len;
+struct nfs_readdesc {
struct nfs_pageio_descriptor pgio;
- struct nfs_pgio_mirror *pgm;
-
- len = nfs_page_length(page);
- if (len == 0)
- return nfs_return_empty_page(page);
- new = nfs_create_request(ctx, page, 0, len);
- if (IS_ERR(new)) {
- unlock_page(page);
- return PTR_ERR(new);
- }
- if (len < PAGE_SIZE)
- zero_user_segment(page, len, PAGE_SIZE);
-
- nfs_pageio_init_read(&pgio, inode, false,
- &nfs_async_read_completion_ops);
- if (!nfs_pageio_add_request(&pgio, new)) {
- nfs_list_remove_request(new);
- nfs_readpage_release(new, pgio.pg_error);
- }
- nfs_pageio_complete(&pgio);
-
- /* It doesn't make sense to do mirrored reads! */
- WARN_ON_ONCE(pgio.pg_mirror_count != 1);
-
- pgm = &pgio.pg_mirrors[0];
- NFS_I(inode)->read_io += pgm->pg_bytes_written;
-
- return pgio.pg_error < 0 ? pgio.pg_error : 0;
-}
+ struct nfs_open_context *ctx;
+};
static void nfs_page_group_set_uptodate(struct nfs_page *req)
{
@@ -171,8 +158,7 @@ static void nfs_read_completion(struct nfs_pgio_header *hdr)
if (test_bit(NFS_IOHDR_EOF, &hdr->flags)) {
/* note: regions of the page not covered by a
- * request are zeroed in nfs_readpage_async /
- * readpage_async_filler */
+ * request are zeroed in readpage_async_filler */
if (bytes > hdr->good_bytes) {
/* nothing in this request was good, so zero
* the full extent of the request */
@@ -304,6 +290,38 @@ static void nfs_readpage_result(struct rpc_task *task,
nfs_readpage_retry(task, hdr);
}
+static int
+readpage_async_filler(void *data, struct page *page)
+{
+ struct nfs_readdesc *desc = data;
+ struct nfs_page *new;
+ unsigned int len;
+ int error;
+
+ len = nfs_page_length(page);
+ if (len == 0)
+ return nfs_return_empty_page(page);
+
+ new = nfs_create_request(desc->ctx, page, 0, len);
+ if (IS_ERR(new))
+ goto out_error;
+
+ if (len < PAGE_SIZE)
+ zero_user_segment(page, len, PAGE_SIZE);
+ if (!nfs_pageio_add_request(&desc->pgio, new)) {
+ nfs_list_remove_request(new);
+ error = desc->pgio.pg_error;
+ nfs_readpage_release(new, error);
+ goto out;
+ }
+ return 0;
+out_error:
+ error = PTR_ERR(new);
+ unlock_page(page);
+out:
+ return error;
+}
+
/*
* Read a page over NFS.
* We read the page synchronously in the following case:
@@ -312,14 +330,13 @@ static void nfs_readpage_result(struct rpc_task *task,
*/
int nfs_readpage(struct file *file, struct page *page)
{
- struct nfs_open_context *ctx;
+ struct nfs_readdesc desc;
struct inode *inode = page_file_mapping(page)->host;
- int error;
+ int ret;
dprintk("NFS: nfs_readpage (%p %ld@%lu)\n",
page, PAGE_SIZE, page_index(page));
nfs_inc_stats(inode, NFSIOS_VFSREADPAGE);
- nfs_add_stats(inode, NFSIOS_READPAGES, 1);
/*
* Try to flush any pending writes to the file..
@@ -328,93 +345,59 @@ int nfs_readpage(struct file *file, struct page *page)
* be any new pending writes generated at this point
* for this page (other pages can be written to).
*/
- error = nfs_wb_page(inode, page);
- if (error)
+ ret = nfs_wb_page(inode, page);
+ if (ret)
goto out_unlock;
if (PageUptodate(page))
goto out_unlock;
- error = -ESTALE;
+ ret = -ESTALE;
if (NFS_STALE(inode))
goto out_unlock;
if (file == NULL) {
- error = -EBADF;
- ctx = nfs_find_open_context(inode, NULL, FMODE_READ);
- if (ctx == NULL)
+ ret = -EBADF;
+ desc.ctx = nfs_find_open_context(inode, NULL, FMODE_READ);
+ if (desc.ctx == NULL)
goto out_unlock;
} else
- ctx = get_nfs_open_context(nfs_file_open_context(file));
+ desc.ctx = get_nfs_open_context(nfs_file_open_context(file));
if (!IS_SYNC(inode)) {
- error = nfs_readpage_from_fscache(ctx, inode, page);
- if (error == 0)
+ ret = nfs_readpage_from_fscache(desc.ctx, inode, page);
+ if (ret == 0)
goto out;
}
- xchg(&ctx->error, 0);
- error = nfs_readpage_async(ctx, inode, page);
- if (!error) {
- error = wait_on_page_locked_killable(page);
- if (!PageUptodate(page) && !error)
- error = xchg(&ctx->error, 0);
- }
-out:
- put_nfs_open_context(ctx);
- return error;
-out_unlock:
- unlock_page(page);
- return error;
-}
-
-struct nfs_readdesc {
- struct nfs_pageio_descriptor *pgio;
- struct nfs_open_context *ctx;
-};
-
-static int
-readpage_async_filler(void *data, struct page *page)
-{
- struct nfs_readdesc *desc = (struct nfs_readdesc *)data;
- struct nfs_page *new;
- unsigned int len;
- int error;
+ xchg(&desc.ctx->error, 0);
+ nfs_pageio_init_read(&desc.pgio, inode, false,
+ &nfs_async_read_completion_ops);
- len = nfs_page_length(page);
- if (len == 0)
- return nfs_return_empty_page(page);
+ ret = readpage_async_filler(&desc, page);
- new = nfs_create_request(desc->ctx, page, 0, len);
- if (IS_ERR(new))
- goto out_error;
+ if (!ret)
+ nfs_pageio_complete_read(&desc.pgio, inode);
- if (len < PAGE_SIZE)
- zero_user_segment(page, len, PAGE_SIZE);
- if (!nfs_pageio_add_request(desc->pgio, new)) {
- nfs_list_remove_request(new);
- error = desc->pgio->pg_error;
- nfs_readpage_release(new, error);
- goto out;
+ ret = desc.pgio.pg_error < 0 ? desc.pgio.pg_error : 0;
+ if (!ret) {
+ ret = wait_on_page_locked_killable(page);
+ if (!PageUptodate(page) && !ret)
+ ret = xchg(&desc.ctx->error, 0);
}
- return 0;
-out_error:
- error = PTR_ERR(new);
- unlock_page(page);
out:
- return error;
+ put_nfs_open_context(desc.ctx);
+ return ret;
+out_unlock:
+ unlock_page(page);
+ return ret;
}
-int nfs_readpages(struct file *filp, struct address_space *mapping,
+int nfs_readpages(struct file *file, struct address_space *mapping,
struct list_head *pages, unsigned nr_pages)
{
- struct nfs_pageio_descriptor pgio;
- struct nfs_pgio_mirror *pgm;
- struct nfs_readdesc desc = {
- .pgio = &pgio,
- };
+ struct nfs_readdesc desc;
struct inode *inode = mapping->host;
- unsigned long npages;
- int ret = -ESTALE;
+ int ret;
dprintk("NFS: nfs_readpages (%s/%Lu %d)\n",
inode->i_sb->s_id,
@@ -422,15 +405,17 @@ int nfs_readpages(struct file *filp, struct address_space *mapping,
nr_pages);
nfs_inc_stats(inode, NFSIOS_VFSREADPAGES);
+ ret = -ESTALE;
if (NFS_STALE(inode))
goto out;
- if (filp == NULL) {
+ if (file == NULL) {
+ ret = -EBADF;
desc.ctx = nfs_find_open_context(inode, NULL, FMODE_READ);
if (desc.ctx == NULL)
- return -EBADF;
+ goto out;
} else
- desc.ctx = get_nfs_open_context(nfs_file_open_context(filp));
+ desc.ctx = get_nfs_open_context(nfs_file_open_context(file));
/* attempt to read as many of the pages as possible from the cache
* - this returns -ENOBUFS immediately if the cookie is negative
@@ -440,20 +425,13 @@ int nfs_readpages(struct file *filp, struct address_space *mapping,
if (ret == 0)
goto read_complete; /* all pages were read */
- nfs_pageio_init_read(&pgio, inode, false,
+ nfs_pageio_init_read(&desc.pgio, inode, false,
&nfs_async_read_completion_ops);
ret = read_cache_pages(mapping, pages, readpage_async_filler, &desc);
- nfs_pageio_complete(&pgio);
- /* It doesn't make sense to do mirrored reads! */
- WARN_ON_ONCE(pgio.pg_mirror_count != 1);
+ nfs_pageio_complete_read(&desc.pgio, inode);
- pgm = &pgio.pg_mirrors[0];
- NFS_I(inode)->read_io += pgm->pg_bytes_written;
- npages = (pgm->pg_bytes_written + PAGE_SIZE - 1) >>
- PAGE_SHIFT;
- nfs_add_stats(inode, NFSIOS_READPAGES, npages);
read_complete:
put_nfs_open_context(desc.ctx);
out:
diff --git a/fs/nfs/super.c b/fs/nfs/super.c
index c7a924580eec..94885c6f8f54 100644
--- a/fs/nfs/super.c
+++ b/fs/nfs/super.c
@@ -523,6 +523,13 @@ static void nfs_show_mount_options(struct seq_file *m, struct nfs_server *nfss,
seq_puts(m, ",local_lock=flock");
else
seq_puts(m, ",local_lock=posix");
+
+ if (nfss->flags & NFS_MOUNT_WRITE_EAGER) {
+ if (nfss->flags & NFS_MOUNT_WRITE_WAIT)
+ seq_puts(m, ",write=wait");
+ else
+ seq_puts(m, ",write=eager");
+ }
}
/*
diff --git a/fs/nfs/write.c b/fs/nfs/write.c
index 639c34fec04a..82bdcb982186 100644
--- a/fs/nfs/write.c
+++ b/fs/nfs/write.c
@@ -712,16 +712,23 @@ int nfs_writepages(struct address_space *mapping, struct writeback_control *wbc)
{
struct inode *inode = mapping->host;
struct nfs_pageio_descriptor pgio;
- struct nfs_io_completion *ioc;
+ struct nfs_io_completion *ioc = NULL;
+ unsigned int mntflags = NFS_SERVER(inode)->flags;
+ int priority = 0;
int err;
nfs_inc_stats(inode, NFSIOS_VFSWRITEPAGES);
- ioc = nfs_io_completion_alloc(GFP_KERNEL);
- if (ioc)
- nfs_io_completion_init(ioc, nfs_io_completion_commit, inode);
+ if (!(mntflags & NFS_MOUNT_WRITE_EAGER) || wbc->for_kupdate ||
+ wbc->for_background || wbc->for_sync || wbc->for_reclaim) {
+ ioc = nfs_io_completion_alloc(GFP_KERNEL);
+ if (ioc)
+ nfs_io_completion_init(ioc, nfs_io_completion_commit,
+ inode);
+ priority = wb_priority(wbc);
+ }
- nfs_pageio_init_write(&pgio, inode, wb_priority(wbc), false,
+ nfs_pageio_init_write(&pgio, inode, priority, false,
&nfs_async_write_completion_ops);
pgio.pg_io_completion = ioc;
err = write_cache_pages(mapping, wbc, nfs_writepages_callback, &pgio);
@@ -1278,19 +1285,21 @@ bool nfs_ctx_key_to_expire(struct nfs_open_context *ctx, struct inode *inode)
* the PageUptodate() flag. In this case, we will need to turn off
* write optimisations that depend on the page contents being correct.
*/
-static bool nfs_write_pageuptodate(struct page *page, struct inode *inode)
+static bool nfs_write_pageuptodate(struct page *page, struct inode *inode,
+ unsigned int pagelen)
{
struct nfs_inode *nfsi = NFS_I(inode);
if (nfs_have_delegated_attributes(inode))
goto out;
- if (nfsi->cache_validity & NFS_INO_REVAL_PAGECACHE)
+ if (nfsi->cache_validity &
+ (NFS_INO_REVAL_PAGECACHE | NFS_INO_INVALID_SIZE))
return false;
smp_rmb();
- if (test_bit(NFS_INO_INVALIDATING, &nfsi->flags))
+ if (test_bit(NFS_INO_INVALIDATING, &nfsi->flags) && pagelen != 0)
return false;
out:
- if (nfsi->cache_validity & NFS_INO_INVALID_DATA)
+ if (nfsi->cache_validity & NFS_INO_INVALID_DATA && pagelen != 0)
return false;
return PageUptodate(page) != 0;
}
@@ -1310,7 +1319,8 @@ is_whole_file_wrlock(struct file_lock *fl)
* If the file is opened for synchronous writes then we can just skip the rest
* of the checks.
*/
-static int nfs_can_extend_write(struct file *file, struct page *page, struct inode *inode)
+static int nfs_can_extend_write(struct file *file, struct page *page,
+ struct inode *inode, unsigned int pagelen)
{
int ret;
struct file_lock_context *flctx = inode->i_flctx;
@@ -1318,7 +1328,7 @@ static int nfs_can_extend_write(struct file *file, struct page *page, struct ino
if (file->f_flags & O_DSYNC)
return 0;
- if (!nfs_write_pageuptodate(page, inode))
+ if (!nfs_write_pageuptodate(page, inode, pagelen))
return 0;
if (NFS_PROTO(inode)->have_delegation(inode, FMODE_WRITE))
return 1;
@@ -1356,6 +1366,7 @@ int nfs_updatepage(struct file *file, struct page *page,
struct nfs_open_context *ctx = nfs_file_open_context(file);
struct address_space *mapping = page_file_mapping(page);
struct inode *inode = mapping->host;
+ unsigned int pagelen = nfs_page_length(page);
int status = 0;
nfs_inc_stats(inode, NFSIOS_VFSUPDATEPAGE);
@@ -1366,8 +1377,8 @@ int nfs_updatepage(struct file *file, struct page *page,
if (!count)
goto out;
- if (nfs_can_extend_write(file, page, inode)) {
- count = max(count + offset, nfs_page_length(page));
+ if (nfs_can_extend_write(file, page, inode, pagelen)) {
+ count = max(count + offset, pagelen);
offset = 0;
}
diff --git a/fs/nfsd/export.c b/fs/nfsd/export.c
index 7c863f2c21e0..9421dae22737 100644
--- a/fs/nfsd/export.c
+++ b/fs/nfsd/export.c
@@ -386,8 +386,9 @@ static struct svc_export *svc_export_update(struct svc_export *new,
struct svc_export *old);
static struct svc_export *svc_export_lookup(struct svc_export *);
-static int check_export(struct inode *inode, int *flags, unsigned char *uuid)
+static int check_export(struct path *path, int *flags, unsigned char *uuid)
{
+ struct inode *inode = d_inode(path->dentry);
/*
* We currently export only dirs, regular files, and (for v4
@@ -411,6 +412,7 @@ static int check_export(struct inode *inode, int *flags, unsigned char *uuid)
* or an FSID number (so NFSEXP_FSID or ->uuid is needed).
* 2: We must be able to find an inode from a filehandle.
* This means that s_export_op must be set.
+ * 3: We must not currently be on an idmapped mount.
*/
if (!(inode->i_sb->s_type->fs_flags & FS_REQUIRES_DEV) &&
!(*flags & NFSEXP_FSID) &&
@@ -425,6 +427,11 @@ static int check_export(struct inode *inode, int *flags, unsigned char *uuid)
return -EINVAL;
}
+ if (mnt_user_ns(path->mnt) != &init_user_ns) {
+ dprintk("exp_export: export of idmapped mounts not yet supported.\n");
+ return -EINVAL;
+ }
+
if (inode->i_sb->s_export_op->flags & EXPORT_OP_NOSUBTREECHK &&
!(*flags & NFSEXP_NOSUBTREECHECK)) {
dprintk("%s: %s does not support subtree checking!\n",
@@ -653,8 +660,7 @@ static int svc_export_parse(struct cache_detail *cd, char *mesg, int mlen)
goto out4;
}
- err = check_export(d_inode(exp.ex_path.dentry), &exp.ex_flags,
- exp.ex_uuid);
+ err = check_export(&exp.ex_path, &exp.ex_flags, exp.ex_uuid);
if (err)
goto out4;
/*
diff --git a/fs/nfsd/nfs2acl.c b/fs/nfsd/nfs2acl.c
index 7eeac5b81c20..855e17772eba 100644
--- a/fs/nfsd/nfs2acl.c
+++ b/fs/nfsd/nfs2acl.c
@@ -113,10 +113,12 @@ static __be32 nfsacld_proc_setacl(struct svc_rqst *rqstp)
fh_lock(fh);
- error = set_posix_acl(inode, ACL_TYPE_ACCESS, argp->acl_access);
+ error = set_posix_acl(&init_user_ns, inode, ACL_TYPE_ACCESS,
+ argp->acl_access);
if (error)
goto out_drop_lock;
- error = set_posix_acl(inode, ACL_TYPE_DEFAULT, argp->acl_default);
+ error = set_posix_acl(&init_user_ns, inode, ACL_TYPE_DEFAULT,
+ argp->acl_default);
if (error)
goto out_drop_lock;
diff --git a/fs/nfsd/nfs3acl.c b/fs/nfsd/nfs3acl.c
index a568b842e9eb..9a6f18d74d14 100644
--- a/fs/nfsd/nfs3acl.c
+++ b/fs/nfsd/nfs3acl.c
@@ -103,10 +103,12 @@ static __be32 nfsd3_proc_setacl(struct svc_rqst *rqstp)
fh_lock(fh);
- error = set_posix_acl(inode, ACL_TYPE_ACCESS, argp->acl_access);
+ error = set_posix_acl(&init_user_ns, inode, ACL_TYPE_ACCESS,
+ argp->acl_access);
if (error)
goto out_drop_lock;
- error = set_posix_acl(inode, ACL_TYPE_DEFAULT, argp->acl_default);
+ error = set_posix_acl(&init_user_ns, inode, ACL_TYPE_DEFAULT,
+ argp->acl_default);
out_drop_lock:
fh_unlock(fh);
diff --git a/fs/nfsd/nfs4acl.c b/fs/nfsd/nfs4acl.c
index 71292a0d6f09..eaa3a0cf38f1 100644
--- a/fs/nfsd/nfs4acl.c
+++ b/fs/nfsd/nfs4acl.c
@@ -781,12 +781,13 @@ nfsd4_set_nfs4_acl(struct svc_rqst *rqstp, struct svc_fh *fhp,
fh_lock(fhp);
- host_error = set_posix_acl(inode, ACL_TYPE_ACCESS, pacl);
+ host_error = set_posix_acl(&init_user_ns, inode, ACL_TYPE_ACCESS, pacl);
if (host_error < 0)
goto out_drop_lock;
if (S_ISDIR(inode->i_mode)) {
- host_error = set_posix_acl(inode, ACL_TYPE_DEFAULT, dpacl);
+ host_error = set_posix_acl(&init_user_ns, inode,
+ ACL_TYPE_DEFAULT, dpacl);
}
out_drop_lock:
diff --git a/fs/nfsd/nfs4recover.c b/fs/nfsd/nfs4recover.c
index 186fa2c2c6ba..891395c6c7d3 100644
--- a/fs/nfsd/nfs4recover.c
+++ b/fs/nfsd/nfs4recover.c
@@ -233,7 +233,7 @@ nfsd4_create_clid_dir(struct nfs4_client *clp)
* as well be forgiving and just succeed silently.
*/
goto out_put;
- status = vfs_mkdir(d_inode(dir), dentry, S_IRWXU);
+ status = vfs_mkdir(&init_user_ns, d_inode(dir), dentry, S_IRWXU);
out_put:
dput(dentry);
out_unlock:
@@ -353,7 +353,7 @@ nfsd4_unlink_clid_dir(char *name, int namlen, struct nfsd_net *nn)
status = -ENOENT;
if (d_really_is_negative(dentry))
goto out;
- status = vfs_rmdir(d_inode(dir), dentry);
+ status = vfs_rmdir(&init_user_ns, d_inode(dir), dentry);
out:
dput(dentry);
out_unlock:
@@ -443,7 +443,7 @@ purge_old(struct dentry *parent, struct dentry *child, struct nfsd_net *nn)
if (nfs4_has_reclaimed_state(name, nn))
goto out_free;
- status = vfs_rmdir(d_inode(parent), child);
+ status = vfs_rmdir(&init_user_ns, d_inode(parent), child);
if (status)
printk("failed to remove client recovery directory %pd\n",
child);
diff --git a/fs/nfsd/nfsctl.c b/fs/nfsd/nfsctl.c
index 4f6e514192bd..ef86ed23af82 100644
--- a/fs/nfsd/nfsctl.c
+++ b/fs/nfsd/nfsctl.c
@@ -1525,12 +1525,9 @@ static int __init init_nfsd(void)
int retval;
printk(KERN_INFO "Installing knfsd (copyright (C) 1996 okir@monad.swb.de).\n");
- retval = register_pernet_subsys(&nfsd_net_ops);
- if (retval < 0)
- return retval;
retval = register_cld_notifier();
if (retval)
- goto out_unregister_pernet;
+ return retval;
retval = nfsd4_init_slabs();
if (retval)
goto out_unregister_notifier;
@@ -1549,9 +1546,14 @@ static int __init init_nfsd(void)
goto out_free_lockd;
retval = register_filesystem(&nfsd_fs_type);
if (retval)
+ goto out_free_exports;
+ retval = register_pernet_subsys(&nfsd_net_ops);
+ if (retval < 0)
goto out_free_all;
return 0;
out_free_all:
+ unregister_pernet_subsys(&nfsd_net_ops);
+out_free_exports:
remove_proc_entry("fs/nfs/exports", NULL);
remove_proc_entry("fs/nfs", NULL);
out_free_lockd:
@@ -1565,13 +1567,12 @@ out_free_slabs:
nfsd4_free_slabs();
out_unregister_notifier:
unregister_cld_notifier();
-out_unregister_pernet:
- unregister_pernet_subsys(&nfsd_net_ops);
return retval;
}
static void __exit exit_nfsd(void)
{
+ unregister_pernet_subsys(&nfsd_net_ops);
nfsd_drc_slab_free();
remove_proc_entry("fs/nfs/exports", NULL);
remove_proc_entry("fs/nfs", NULL);
@@ -1581,7 +1582,6 @@ static void __exit exit_nfsd(void)
nfsd4_exit_pnfs();
unregister_filesystem(&nfsd_fs_type);
unregister_cld_notifier();
- unregister_pernet_subsys(&nfsd_net_ops);
}
MODULE_AUTHOR("Olaf Kirch <okir@monad.swb.de>");
diff --git a/fs/nfsd/nfsfh.c b/fs/nfsd/nfsfh.c
index 4744a276058d..10b44421eace 100644
--- a/fs/nfsd/nfsfh.c
+++ b/fs/nfsd/nfsfh.c
@@ -40,7 +40,8 @@ static int nfsd_acceptable(void *expv, struct dentry *dentry)
/* make sure parents give x permission to user */
int err;
parent = dget_parent(tdentry);
- err = inode_permission(d_inode(parent), MAY_EXEC);
+ err = inode_permission(&init_user_ns,
+ d_inode(parent), MAY_EXEC);
if (err < 0) {
dput(parent);
break;
diff --git a/fs/nfsd/nfsproc.c b/fs/nfsd/nfsproc.c
index b2f8035f166b..a8d5449dd0e9 100644
--- a/fs/nfsd/nfsproc.c
+++ b/fs/nfsd/nfsproc.c
@@ -90,7 +90,7 @@ nfsd_proc_setattr(struct svc_rqst *rqstp)
if (delta < 0)
delta = -delta;
if (delta < MAX_TOUCH_TIME_ERROR &&
- setattr_prepare(fhp->fh_dentry, iap) != 0) {
+ setattr_prepare(&init_user_ns, fhp->fh_dentry, iap) != 0) {
/*
* Turn off ATTR_[AM]TIME_SET but leave ATTR_[AM]TIME.
* This will cause notify_change to set these times
diff --git a/fs/nfsd/vfs.c b/fs/nfsd/vfs.c
index d316e11923c5..fd6be35a1642 100644
--- a/fs/nfsd/vfs.c
+++ b/fs/nfsd/vfs.c
@@ -448,7 +448,7 @@ nfsd_setattr(struct svc_rqst *rqstp, struct svc_fh *fhp, struct iattr *iap,
.ia_size = iap->ia_size,
};
- host_err = notify_change(dentry, &size_attr, NULL);
+ host_err = notify_change(&init_user_ns, dentry, &size_attr, NULL);
if (host_err)
goto out_unlock;
iap->ia_valid &= ~ATTR_SIZE;
@@ -463,7 +463,7 @@ nfsd_setattr(struct svc_rqst *rqstp, struct svc_fh *fhp, struct iattr *iap,
}
iap->ia_valid |= ATTR_CTIME;
- host_err = notify_change(dentry, iap, NULL);
+ host_err = notify_change(&init_user_ns, dentry, iap, NULL);
out_unlock:
fh_unlock(fhp);
@@ -499,7 +499,8 @@ int nfsd4_is_junction(struct dentry *dentry)
return 0;
if (!(inode->i_mode & S_ISVTX))
return 0;
- if (vfs_getxattr(dentry, NFSD_JUNCTION_XATTR_NAME, NULL, 0) <= 0)
+ if (vfs_getxattr(&init_user_ns, dentry, NFSD_JUNCTION_XATTR_NAME,
+ NULL, 0) <= 0)
return 0;
return 1;
}
@@ -1254,12 +1255,12 @@ nfsd_create_locked(struct svc_rqst *rqstp, struct svc_fh *fhp,
host_err = 0;
switch (type) {
case S_IFREG:
- host_err = vfs_create(dirp, dchild, iap->ia_mode, true);
+ host_err = vfs_create(&init_user_ns, dirp, dchild, iap->ia_mode, true);
if (!host_err)
nfsd_check_ignore_resizing(iap);
break;
case S_IFDIR:
- host_err = vfs_mkdir(dirp, dchild, iap->ia_mode);
+ host_err = vfs_mkdir(&init_user_ns, dirp, dchild, iap->ia_mode);
if (!host_err && unlikely(d_unhashed(dchild))) {
struct dentry *d;
d = lookup_one_len(dchild->d_name.name,
@@ -1287,7 +1288,8 @@ nfsd_create_locked(struct svc_rqst *rqstp, struct svc_fh *fhp,
case S_IFBLK:
case S_IFIFO:
case S_IFSOCK:
- host_err = vfs_mknod(dirp, dchild, iap->ia_mode, rdev);
+ host_err = vfs_mknod(&init_user_ns, dirp, dchild,
+ iap->ia_mode, rdev);
break;
default:
printk(KERN_WARNING "nfsd: bad file type %o in nfsd_create\n",
@@ -1485,7 +1487,7 @@ do_nfsd_create(struct svc_rqst *rqstp, struct svc_fh *fhp,
if (!IS_POSIXACL(dirp))
iap->ia_mode &= ~current_umask();
- host_err = vfs_create(dirp, dchild, iap->ia_mode, true);
+ host_err = vfs_create(&init_user_ns, dirp, dchild, iap->ia_mode, true);
if (host_err < 0) {
fh_drop_write(fhp);
goto out_nfserr;
@@ -1609,7 +1611,7 @@ nfsd_symlink(struct svc_rqst *rqstp, struct svc_fh *fhp,
if (IS_ERR(dnew))
goto out_nfserr;
- host_err = vfs_symlink(d_inode(dentry), dnew, path);
+ host_err = vfs_symlink(&init_user_ns, d_inode(dentry), dnew, path);
err = nfserrno(host_err);
if (!err)
err = nfserrno(commit_metadata(fhp));
@@ -1677,7 +1679,7 @@ nfsd_link(struct svc_rqst *rqstp, struct svc_fh *ffhp,
err = nfserr_noent;
if (d_really_is_negative(dold))
goto out_dput;
- host_err = vfs_link(dold, dirp, dnew, NULL);
+ host_err = vfs_link(dold, &init_user_ns, dirp, dnew, NULL);
if (!host_err) {
err = nfserrno(commit_metadata(ffhp));
if (!err)
@@ -1797,7 +1799,15 @@ retry:
close_cached = true;
goto out_dput_old;
} else {
- host_err = vfs_rename(fdir, odentry, tdir, ndentry, NULL, 0);
+ struct renamedata rd = {
+ .old_mnt_userns = &init_user_ns,
+ .old_dir = fdir,
+ .old_dentry = odentry,
+ .new_mnt_userns = &init_user_ns,
+ .new_dir = tdir,
+ .new_dentry = ndentry,
+ };
+ host_err = vfs_rename(&rd);
if (!host_err) {
host_err = commit_metadata(tfhp);
if (!host_err)
@@ -1884,9 +1894,9 @@ nfsd_unlink(struct svc_rqst *rqstp, struct svc_fh *fhp, int type,
if (type != S_IFDIR) {
if (rdentry->d_sb->s_export_op->flags & EXPORT_OP_CLOSE_BEFORE_UNLINK)
nfsd_close_cached_files(rdentry);
- host_err = vfs_unlink(dirp, rdentry, NULL);
+ host_err = vfs_unlink(&init_user_ns, dirp, rdentry, NULL);
} else {
- host_err = vfs_rmdir(dirp, rdentry);
+ host_err = vfs_rmdir(&init_user_ns, dirp, rdentry);
}
if (!host_err)
@@ -2149,7 +2159,7 @@ nfsd_getxattr(struct svc_rqst *rqstp, struct svc_fh *fhp, char *name,
inode_lock_shared(inode);
- len = vfs_getxattr(dentry, name, NULL, 0);
+ len = vfs_getxattr(&init_user_ns, dentry, name, NULL, 0);
/*
* Zero-length attribute, just return.
@@ -2176,7 +2186,7 @@ nfsd_getxattr(struct svc_rqst *rqstp, struct svc_fh *fhp, char *name,
goto out;
}
- len = vfs_getxattr(dentry, name, buf, len);
+ len = vfs_getxattr(&init_user_ns, dentry, name, buf, len);
if (len <= 0) {
kvfree(buf);
buf = NULL;
@@ -2283,7 +2293,8 @@ nfsd_removexattr(struct svc_rqst *rqstp, struct svc_fh *fhp, char *name)
fh_lock(fhp);
- ret = __vfs_removexattr_locked(fhp->fh_dentry, name, NULL);
+ ret = __vfs_removexattr_locked(&init_user_ns, fhp->fh_dentry,
+ name, NULL);
fh_unlock(fhp);
fh_drop_write(fhp);
@@ -2307,8 +2318,8 @@ nfsd_setxattr(struct svc_rqst *rqstp, struct svc_fh *fhp, char *name,
return nfserrno(ret);
fh_lock(fhp);
- ret = __vfs_setxattr_locked(fhp->fh_dentry, name, buf, len, flags,
- NULL);
+ ret = __vfs_setxattr_locked(&init_user_ns, fhp->fh_dentry, name, buf,
+ len, flags, NULL);
fh_unlock(fhp);
fh_drop_write(fhp);
@@ -2391,13 +2402,14 @@ nfsd_permission(struct svc_rqst *rqstp, struct svc_export *exp,
return 0;
/* This assumes NFSD_MAY_{READ,WRITE,EXEC} == MAY_{READ,WRITE,EXEC} */
- err = inode_permission(inode, acc & (MAY_READ|MAY_WRITE|MAY_EXEC));
+ err = inode_permission(&init_user_ns, inode,
+ acc & (MAY_READ | MAY_WRITE | MAY_EXEC));
/* Allow read access to binaries even when mode 111 */
if (err == -EACCES && S_ISREG(inode->i_mode) &&
(acc == (NFSD_MAY_READ | NFSD_MAY_OWNER_OVERRIDE) ||
acc == (NFSD_MAY_READ | NFSD_MAY_READ_IF_EXEC)))
- err = inode_permission(inode, MAY_EXEC);
+ err = inode_permission(&init_user_ns, inode, MAY_EXEC);
return err? nfserrno(err) : 0;
}
diff --git a/fs/nilfs2/inode.c b/fs/nilfs2/inode.c
index 745d371d6fea..2e8eb263cf0f 100644
--- a/fs/nilfs2/inode.c
+++ b/fs/nilfs2/inode.c
@@ -348,7 +348,7 @@ struct inode *nilfs_new_inode(struct inode *dir, umode_t mode)
/* reference count of i_bh inherits from nilfs_mdt_read_block() */
atomic64_inc(&root->inodes_count);
- inode_init_owner(inode, dir, mode);
+ inode_init_owner(&init_user_ns, inode, dir, mode);
inode->i_ino = ino;
inode->i_mtime = inode->i_atime = inode->i_ctime = current_time(inode);
@@ -805,14 +805,15 @@ void nilfs_evict_inode(struct inode *inode)
*/
}
-int nilfs_setattr(struct dentry *dentry, struct iattr *iattr)
+int nilfs_setattr(struct user_namespace *mnt_userns, struct dentry *dentry,
+ struct iattr *iattr)
{
struct nilfs_transaction_info ti;
struct inode *inode = d_inode(dentry);
struct super_block *sb = inode->i_sb;
int err;
- err = setattr_prepare(dentry, iattr);
+ err = setattr_prepare(&init_user_ns, dentry, iattr);
if (err)
return err;
@@ -827,7 +828,7 @@ int nilfs_setattr(struct dentry *dentry, struct iattr *iattr)
nilfs_truncate(inode);
}
- setattr_copy(inode, iattr);
+ setattr_copy(&init_user_ns, inode, iattr);
mark_inode_dirty(inode);
if (iattr->ia_valid & ATTR_MODE) {
@@ -843,7 +844,8 @@ out_err:
return err;
}
-int nilfs_permission(struct inode *inode, int mask)
+int nilfs_permission(struct user_namespace *mnt_userns, struct inode *inode,
+ int mask)
{
struct nilfs_root *root = NILFS_I(inode)->i_root;
@@ -851,7 +853,7 @@ int nilfs_permission(struct inode *inode, int mask)
root->cno != NILFS_CPTREE_CURRENT_CNO)
return -EROFS; /* snapshot is not writable */
- return generic_permission(inode, mask);
+ return generic_permission(&init_user_ns, inode, mask);
}
int nilfs_load_inode_block(struct inode *inode, struct buffer_head **pbh)
diff --git a/fs/nilfs2/ioctl.c b/fs/nilfs2/ioctl.c
index 07d26f61f22a..b053b40315bf 100644
--- a/fs/nilfs2/ioctl.c
+++ b/fs/nilfs2/ioctl.c
@@ -132,7 +132,7 @@ static int nilfs_ioctl_setflags(struct inode *inode, struct file *filp,
unsigned int flags, oldflags;
int ret;
- if (!inode_owner_or_capable(inode))
+ if (!inode_owner_or_capable(&init_user_ns, inode))
return -EACCES;
if (get_user(flags, (int __user *)argp))
diff --git a/fs/nilfs2/namei.c b/fs/nilfs2/namei.c
index a6ec7961d4f5..ecace5f96a95 100644
--- a/fs/nilfs2/namei.c
+++ b/fs/nilfs2/namei.c
@@ -72,8 +72,8 @@ nilfs_lookup(struct inode *dir, struct dentry *dentry, unsigned int flags)
* If the create succeeds, we fill in the inode information
* with d_instantiate().
*/
-static int nilfs_create(struct inode *dir, struct dentry *dentry, umode_t mode,
- bool excl)
+static int nilfs_create(struct user_namespace *mnt_userns, struct inode *dir,
+ struct dentry *dentry, umode_t mode, bool excl)
{
struct inode *inode;
struct nilfs_transaction_info ti;
@@ -100,7 +100,8 @@ static int nilfs_create(struct inode *dir, struct dentry *dentry, umode_t mode,
}
static int
-nilfs_mknod(struct inode *dir, struct dentry *dentry, umode_t mode, dev_t rdev)
+nilfs_mknod(struct user_namespace *mnt_userns, struct inode *dir,
+ struct dentry *dentry, umode_t mode, dev_t rdev)
{
struct inode *inode;
struct nilfs_transaction_info ti;
@@ -124,8 +125,8 @@ nilfs_mknod(struct inode *dir, struct dentry *dentry, umode_t mode, dev_t rdev)
return err;
}
-static int nilfs_symlink(struct inode *dir, struct dentry *dentry,
- const char *symname)
+static int nilfs_symlink(struct user_namespace *mnt_userns, struct inode *dir,
+ struct dentry *dentry, const char *symname)
{
struct nilfs_transaction_info ti;
struct super_block *sb = dir->i_sb;
@@ -201,7 +202,8 @@ static int nilfs_link(struct dentry *old_dentry, struct inode *dir,
return err;
}
-static int nilfs_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode)
+static int nilfs_mkdir(struct user_namespace *mnt_userns, struct inode *dir,
+ struct dentry *dentry, umode_t mode)
{
struct inode *inode;
struct nilfs_transaction_info ti;
@@ -338,8 +340,9 @@ static int nilfs_rmdir(struct inode *dir, struct dentry *dentry)
return err;
}
-static int nilfs_rename(struct inode *old_dir, struct dentry *old_dentry,
- struct inode *new_dir, struct dentry *new_dentry,
+static int nilfs_rename(struct user_namespace *mnt_userns,
+ struct inode *old_dir, struct dentry *old_dentry,
+ struct inode *new_dir, struct dentry *new_dentry,
unsigned int flags)
{
struct inode *old_inode = d_inode(old_dentry);
diff --git a/fs/nilfs2/nilfs.h b/fs/nilfs2/nilfs.h
index f8450ee3fd06..c4a45a081ade 100644
--- a/fs/nilfs2/nilfs.h
+++ b/fs/nilfs2/nilfs.h
@@ -267,9 +267,11 @@ extern struct inode *nilfs_iget_for_gc(struct super_block *sb,
extern void nilfs_update_inode(struct inode *, struct buffer_head *, int);
extern void nilfs_truncate(struct inode *);
extern void nilfs_evict_inode(struct inode *);
-extern int nilfs_setattr(struct dentry *, struct iattr *);
+extern int nilfs_setattr(struct user_namespace *, struct dentry *,
+ struct iattr *);
extern void nilfs_write_failed(struct address_space *mapping, loff_t to);
-int nilfs_permission(struct inode *inode, int mask);
+int nilfs_permission(struct user_namespace *mnt_userns, struct inode *inode,
+ int mask);
int nilfs_load_inode_block(struct inode *inode, struct buffer_head **pbh);
extern int nilfs_inode_dirty(struct inode *);
int nilfs_set_file_dirty(struct inode *inode, unsigned int nr_dirty);
diff --git a/fs/notify/fanotify/fanotify_user.c b/fs/notify/fanotify/fanotify_user.c
index dcab112e1f00..9e0c1afac8bd 100644
--- a/fs/notify/fanotify/fanotify_user.c
+++ b/fs/notify/fanotify/fanotify_user.c
@@ -702,7 +702,7 @@ static int fanotify_find_path(int dfd, const char __user *filename,
}
/* you can only watch an inode if you have read permissions on it */
- ret = inode_permission(path->dentry->d_inode, MAY_READ);
+ ret = path_permission(path, MAY_READ);
if (ret) {
path_put(path);
goto out;
@@ -976,7 +976,7 @@ SYSCALL_DEFINE2(fanotify_init, unsigned int, flags, unsigned int, event_f_flags)
f_flags |= O_NONBLOCK;
/* fsnotify_alloc_group takes a ref. Dropped in fanotify_release */
- group = fsnotify_alloc_group(&fanotify_fsnotify_ops);
+ group = fsnotify_alloc_user_group(&fanotify_fsnotify_ops);
if (IS_ERR(group)) {
free_uid(user);
return PTR_ERR(group);
diff --git a/fs/notify/group.c b/fs/notify/group.c
index a4a4b1c64d32..ffd723ffe46d 100644
--- a/fs/notify/group.c
+++ b/fs/notify/group.c
@@ -111,14 +111,12 @@ void fsnotify_put_group(struct fsnotify_group *group)
}
EXPORT_SYMBOL_GPL(fsnotify_put_group);
-/*
- * Create a new fsnotify_group and hold a reference for the group returned.
- */
-struct fsnotify_group *fsnotify_alloc_group(const struct fsnotify_ops *ops)
+static struct fsnotify_group *__fsnotify_alloc_group(
+ const struct fsnotify_ops *ops, gfp_t gfp)
{
struct fsnotify_group *group;
- group = kzalloc(sizeof(struct fsnotify_group), GFP_KERNEL);
+ group = kzalloc(sizeof(struct fsnotify_group), gfp);
if (!group)
return ERR_PTR(-ENOMEM);
@@ -139,8 +137,25 @@ struct fsnotify_group *fsnotify_alloc_group(const struct fsnotify_ops *ops)
return group;
}
+
+/*
+ * Create a new fsnotify_group and hold a reference for the group returned.
+ */
+struct fsnotify_group *fsnotify_alloc_group(const struct fsnotify_ops *ops)
+{
+ return __fsnotify_alloc_group(ops, GFP_KERNEL);
+}
EXPORT_SYMBOL_GPL(fsnotify_alloc_group);
+/*
+ * Create a new fsnotify_group and hold a reference for the group returned.
+ */
+struct fsnotify_group *fsnotify_alloc_user_group(const struct fsnotify_ops *ops)
+{
+ return __fsnotify_alloc_group(ops, GFP_KERNEL_ACCOUNT);
+}
+EXPORT_SYMBOL_GPL(fsnotify_alloc_user_group);
+
int fsnotify_fasync(int fd, struct file *file, int on)
{
struct fsnotify_group *group = file->private_data;
diff --git a/fs/notify/inotify/inotify_user.c b/fs/notify/inotify/inotify_user.c
index 59c177011a0f..c71be4fb7dc5 100644
--- a/fs/notify/inotify/inotify_user.c
+++ b/fs/notify/inotify/inotify_user.c
@@ -352,7 +352,7 @@ static int inotify_find_inode(const char __user *dirname, struct path *path,
if (error)
return error;
/* you can only watch an inode if you have read permissions on it */
- error = inode_permission(path->dentry->d_inode, MAY_READ);
+ error = path_permission(path, MAY_READ);
if (error) {
path_put(path);
return error;
@@ -632,11 +632,11 @@ static struct fsnotify_group *inotify_new_group(unsigned int max_events)
struct fsnotify_group *group;
struct inotify_event_info *oevent;
- group = fsnotify_alloc_group(&inotify_fsnotify_ops);
+ group = fsnotify_alloc_user_group(&inotify_fsnotify_ops);
if (IS_ERR(group))
return group;
- oevent = kmalloc(sizeof(struct inotify_event_info), GFP_KERNEL);
+ oevent = kmalloc(sizeof(struct inotify_event_info), GFP_KERNEL_ACCOUNT);
if (unlikely(!oevent)) {
fsnotify_destroy_group(group);
return ERR_PTR(-ENOMEM);
diff --git a/fs/ntfs/inode.c b/fs/ntfs/inode.c
index f7e4cbc26eaf..f5c058b3192c 100644
--- a/fs/ntfs/inode.c
+++ b/fs/ntfs/inode.c
@@ -629,6 +629,12 @@ static int ntfs_read_locked_inode(struct inode *vi)
}
a = ctx->attr;
/* Get the standard information attribute value. */
+ if ((u8 *)a + le16_to_cpu(a->data.resident.value_offset)
+ + le32_to_cpu(a->data.resident.value_length) >
+ (u8 *)ctx->mrec + vol->mft_record_size) {
+ ntfs_error(vi->i_sb, "Corrupt standard information attribute in inode.");
+ goto unm_err_out;
+ }
si = (STANDARD_INFORMATION*)((u8*)a +
le16_to_cpu(a->data.resident.value_offset));
@@ -2848,6 +2854,7 @@ void ntfs_truncate_vfs(struct inode *vi) {
/**
* ntfs_setattr - called from notify_change() when an attribute is being changed
+ * @mnt_userns: user namespace of the mount the inode was found from
* @dentry: dentry whose attributes to change
* @attr: structure describing the attributes and the changes
*
@@ -2860,13 +2867,14 @@ void ntfs_truncate_vfs(struct inode *vi) {
*
* Called with ->i_mutex held.
*/
-int ntfs_setattr(struct dentry *dentry, struct iattr *attr)
+int ntfs_setattr(struct user_namespace *mnt_userns, struct dentry *dentry,
+ struct iattr *attr)
{
struct inode *vi = d_inode(dentry);
int err;
unsigned int ia_valid = attr->ia_valid;
- err = setattr_prepare(dentry, attr);
+ err = setattr_prepare(&init_user_ns, dentry, attr);
if (err)
goto out;
/* We do not support NTFS ACLs yet. */
diff --git a/fs/ntfs/inode.h b/fs/ntfs/inode.h
index 363e4e820673..6f78ee00f57f 100644
--- a/fs/ntfs/inode.h
+++ b/fs/ntfs/inode.h
@@ -289,7 +289,8 @@ extern int ntfs_show_options(struct seq_file *sf, struct dentry *root);
extern int ntfs_truncate(struct inode *vi);
extern void ntfs_truncate_vfs(struct inode *vi);
-extern int ntfs_setattr(struct dentry *dentry, struct iattr *attr);
+extern int ntfs_setattr(struct user_namespace *mnt_userns,
+ struct dentry *dentry, struct iattr *attr);
extern int __ntfs_write_inode(struct inode *vi, int sync);
diff --git a/fs/ntfs/layout.h b/fs/ntfs/layout.h
index 85422761ff43..5d4bf7a3259f 100644
--- a/fs/ntfs/layout.h
+++ b/fs/ntfs/layout.h
@@ -703,7 +703,7 @@ typedef struct {
/* 14*/ le16 instance; /* The instance of this attribute record. This
number is unique within this mft record (see
MFT_RECORD/next_attribute_instance notes in
- in mft.h for more details). */
+ mft.h for more details). */
/* 16*/ union {
/* Resident attributes. */
struct {
@@ -1838,7 +1838,7 @@ typedef struct {
* Also, each security descriptor is stored twice in the $SDS stream with a
* fixed offset of 0x40000 bytes (256kib, the Windows cache manager's max size)
* between them; i.e. if a SDS_ENTRY specifies an offset of 0x51d0, then the
- * the first copy of the security descriptor will be at offset 0x51d0 in the
+ * first copy of the security descriptor will be at offset 0x51d0 in the
* $SDS data stream and the second copy will be at offset 0x451d0.
*/
typedef struct {
diff --git a/fs/ocfs2/acl.c b/fs/ocfs2/acl.c
index 7b07f5df3a29..5259badabb56 100644
--- a/fs/ocfs2/acl.c
+++ b/fs/ocfs2/acl.c
@@ -262,7 +262,8 @@ static int ocfs2_set_acl(handle_t *handle,
return ret;
}
-int ocfs2_iop_set_acl(struct inode *inode, struct posix_acl *acl, int type)
+int ocfs2_iop_set_acl(struct user_namespace *mnt_userns, struct inode *inode,
+ struct posix_acl *acl, int type)
{
struct buffer_head *bh = NULL;
int status, had_lock;
@@ -274,7 +275,8 @@ int ocfs2_iop_set_acl(struct inode *inode, struct posix_acl *acl, int type)
if (type == ACL_TYPE_ACCESS && acl) {
umode_t mode;
- status = posix_acl_update_mode(inode, &mode, &acl);
+ status = posix_acl_update_mode(&init_user_ns, inode, &mode,
+ &acl);
if (status)
goto unlock;
diff --git a/fs/ocfs2/acl.h b/fs/ocfs2/acl.h
index 127b13432146..4e86450917b2 100644
--- a/fs/ocfs2/acl.h
+++ b/fs/ocfs2/acl.h
@@ -19,7 +19,8 @@ struct ocfs2_acl_entry {
};
struct posix_acl *ocfs2_iop_get_acl(struct inode *inode, int type);
-int ocfs2_iop_set_acl(struct inode *inode, struct posix_acl *acl, int type);
+int ocfs2_iop_set_acl(struct user_namespace *mnt_userns, struct inode *inode,
+ struct posix_acl *acl, int type);
extern int ocfs2_acl_chmod(struct inode *, struct buffer_head *);
extern int ocfs2_init_acl(handle_t *, struct inode *, struct inode *,
struct buffer_head *, struct buffer_head *,
diff --git a/fs/ocfs2/cluster/heartbeat.c b/fs/ocfs2/cluster/heartbeat.c
index 0179a73a3fa2..12a7590601dd 100644
--- a/fs/ocfs2/cluster/heartbeat.c
+++ b/fs/ocfs2/cluster/heartbeat.c
@@ -2042,7 +2042,7 @@ static struct config_item *o2hb_heartbeat_group_make_item(struct config_group *g
o2hb_nego_timeout_handler,
reg, NULL, &reg->hr_handler_list);
if (ret)
- goto free;
+ goto remove_item;
ret = o2net_register_handler(O2HB_NEGO_APPROVE_MSG, reg->hr_key,
sizeof(struct o2hb_nego_msg),
@@ -2057,6 +2057,12 @@ static struct config_item *o2hb_heartbeat_group_make_item(struct config_group *g
unregister_handler:
o2net_unregister_handler_list(&reg->hr_handler_list);
+remove_item:
+ spin_lock(&o2hb_live_lock);
+ list_del(&reg->hr_all_item);
+ if (o2hb_global_heartbeat_active())
+ clear_bit(reg->hr_region_num, o2hb_region_bitmap);
+ spin_unlock(&o2hb_live_lock);
free:
kfree(reg);
return ERR_PTR(ret);
diff --git a/fs/ocfs2/dlm/dlmast.c b/fs/ocfs2/dlm/dlmast.c
index 6abaded3ff6b..70a10764f249 100644
--- a/fs/ocfs2/dlm/dlmast.c
+++ b/fs/ocfs2/dlm/dlmast.c
@@ -165,16 +165,6 @@ void __dlm_queue_bast(struct dlm_ctxt *dlm, struct dlm_lock *lock)
spin_unlock(&lock->spinlock);
}
-void dlm_queue_bast(struct dlm_ctxt *dlm, struct dlm_lock *lock)
-{
- BUG_ON(!dlm);
- BUG_ON(!lock);
-
- spin_lock(&dlm->ast_lock);
- __dlm_queue_bast(dlm, lock);
- spin_unlock(&dlm->ast_lock);
-}
-
static void dlm_update_lvb(struct dlm_ctxt *dlm, struct dlm_lock_resource *res,
struct dlm_lock *lock)
{
diff --git a/fs/ocfs2/dlm/dlmcommon.h b/fs/ocfs2/dlm/dlmcommon.h
index c8a444622faa..58d57e25d384 100644
--- a/fs/ocfs2/dlm/dlmcommon.h
+++ b/fs/ocfs2/dlm/dlmcommon.h
@@ -17,10 +17,7 @@
#define DLM_LOCKID_NAME_MAX 32
-#define DLM_DOMAIN_NAME_MAX_LEN 255
#define DLM_LOCK_RES_OWNER_UNKNOWN O2NM_MAX_NODES
-#define DLM_THREAD_SHUFFLE_INTERVAL 5 // flush everything every 5 passes
-#define DLM_THREAD_MS 200 // flush at least every 200 ms
#define DLM_HASH_SIZE_DEFAULT (1 << 17)
#if DLM_HASH_SIZE_DEFAULT < PAGE_SIZE
@@ -902,7 +899,6 @@ void __dlm_lockres_grab_inflight_worker(struct dlm_ctxt *dlm,
struct dlm_lock_resource *res);
void dlm_queue_ast(struct dlm_ctxt *dlm, struct dlm_lock *lock);
-void dlm_queue_bast(struct dlm_ctxt *dlm, struct dlm_lock *lock);
void __dlm_queue_ast(struct dlm_ctxt *dlm, struct dlm_lock *lock);
void __dlm_queue_bast(struct dlm_ctxt *dlm, struct dlm_lock *lock);
void dlm_do_local_ast(struct dlm_ctxt *dlm,
diff --git a/fs/ocfs2/dlmfs/dlmfs.c b/fs/ocfs2/dlmfs/dlmfs.c
index 583820ec63e2..b2870f1a31df 100644
--- a/fs/ocfs2/dlmfs/dlmfs.c
+++ b/fs/ocfs2/dlmfs/dlmfs.c
@@ -190,17 +190,18 @@ static int dlmfs_file_release(struct inode *inode,
* We do ->setattr() just to override size changes. Our size is the size
* of the LVB and nothing else.
*/
-static int dlmfs_file_setattr(struct dentry *dentry, struct iattr *attr)
+static int dlmfs_file_setattr(struct user_namespace *mnt_userns,
+ struct dentry *dentry, struct iattr *attr)
{
int error;
struct inode *inode = d_inode(dentry);
attr->ia_valid &= ~ATTR_SIZE;
- error = setattr_prepare(dentry, attr);
+ error = setattr_prepare(&init_user_ns, dentry, attr);
if (error)
return error;
- setattr_copy(inode, attr);
+ setattr_copy(&init_user_ns, inode, attr);
mark_inode_dirty(inode);
return 0;
}
@@ -329,7 +330,7 @@ static struct inode *dlmfs_get_root_inode(struct super_block *sb)
if (inode) {
inode->i_ino = get_next_ino();
- inode_init_owner(inode, NULL, mode);
+ inode_init_owner(&init_user_ns, inode, NULL, mode);
inode->i_atime = inode->i_mtime = inode->i_ctime = current_time(inode);
inc_nlink(inode);
@@ -352,7 +353,7 @@ static struct inode *dlmfs_get_inode(struct inode *parent,
return NULL;
inode->i_ino = get_next_ino();
- inode_init_owner(inode, parent, mode);
+ inode_init_owner(&init_user_ns, inode, parent, mode);
inode->i_atime = inode->i_mtime = inode->i_ctime = current_time(inode);
ip = DLMFS_I(inode);
@@ -395,7 +396,8 @@ static struct inode *dlmfs_get_inode(struct inode *parent,
* File creation. Allocate an inode, and we're done..
*/
/* SMP-safe */
-static int dlmfs_mkdir(struct inode * dir,
+static int dlmfs_mkdir(struct user_namespace * mnt_userns,
+ struct inode * dir,
struct dentry * dentry,
umode_t mode)
{
@@ -443,7 +445,8 @@ bail:
return status;
}
-static int dlmfs_create(struct inode *dir,
+static int dlmfs_create(struct user_namespace *mnt_userns,
+ struct inode *dir,
struct dentry *dentry,
umode_t mode,
bool excl)
diff --git a/fs/ocfs2/file.c b/fs/ocfs2/file.c
index df6d709d2ae3..6611c64ca0be 100644
--- a/fs/ocfs2/file.c
+++ b/fs/ocfs2/file.c
@@ -1112,7 +1112,8 @@ out:
return ret;
}
-int ocfs2_setattr(struct dentry *dentry, struct iattr *attr)
+int ocfs2_setattr(struct user_namespace *mnt_userns, struct dentry *dentry,
+ struct iattr *attr)
{
int status = 0, size_change;
int inode_locked = 0;
@@ -1142,7 +1143,7 @@ int ocfs2_setattr(struct dentry *dentry, struct iattr *attr)
if (!(attr->ia_valid & OCFS2_VALID_ATTRS))
return 0;
- status = setattr_prepare(dentry, attr);
+ status = setattr_prepare(&init_user_ns, dentry, attr);
if (status)
return status;
@@ -1263,7 +1264,7 @@ int ocfs2_setattr(struct dentry *dentry, struct iattr *attr)
}
}
- setattr_copy(inode, attr);
+ setattr_copy(&init_user_ns, inode, attr);
mark_inode_dirty(inode);
status = ocfs2_mark_inode_dirty(handle, inode, bh);
@@ -1298,8 +1299,8 @@ bail:
return status;
}
-int ocfs2_getattr(const struct path *path, struct kstat *stat,
- u32 request_mask, unsigned int flags)
+int ocfs2_getattr(struct user_namespace *mnt_userns, const struct path *path,
+ struct kstat *stat, u32 request_mask, unsigned int flags)
{
struct inode *inode = d_inode(path->dentry);
struct super_block *sb = path->dentry->d_sb;
@@ -1313,7 +1314,7 @@ int ocfs2_getattr(const struct path *path, struct kstat *stat,
goto bail;
}
- generic_fillattr(inode, stat);
+ generic_fillattr(&init_user_ns, inode, stat);
/*
* If there is inline data in the inode, the inode will normally not
* have data blocks allocated (it may have an external xattr block).
@@ -1330,7 +1331,8 @@ bail:
return err;
}
-int ocfs2_permission(struct inode *inode, int mask)
+int ocfs2_permission(struct user_namespace *mnt_userns, struct inode *inode,
+ int mask)
{
int ret, had_lock;
struct ocfs2_lock_holder oh;
@@ -1355,7 +1357,7 @@ int ocfs2_permission(struct inode *inode, int mask)
dump_stack();
}
- ret = generic_permission(inode, mask);
+ ret = generic_permission(&init_user_ns, inode, mask);
ocfs2_inode_unlock_tracker(inode, 0, &oh, had_lock);
out:
diff --git a/fs/ocfs2/file.h b/fs/ocfs2/file.h
index 4832cbceba5b..8536cec5f122 100644
--- a/fs/ocfs2/file.h
+++ b/fs/ocfs2/file.h
@@ -51,10 +51,13 @@ int ocfs2_extend_no_holes(struct inode *inode, struct buffer_head *di_bh,
u64 new_i_size, u64 zero_to);
int ocfs2_zero_extend(struct inode *inode, struct buffer_head *di_bh,
loff_t zero_to);
-int ocfs2_setattr(struct dentry *dentry, struct iattr *attr);
-int ocfs2_getattr(const struct path *path, struct kstat *stat,
- u32 request_mask, unsigned int flags);
-int ocfs2_permission(struct inode *inode, int mask);
+int ocfs2_setattr(struct user_namespace *mnt_userns, struct dentry *dentry,
+ struct iattr *attr);
+int ocfs2_getattr(struct user_namespace *mnt_userns, const struct path *path,
+ struct kstat *stat, u32 request_mask, unsigned int flags);
+int ocfs2_permission(struct user_namespace *mnt_userns,
+ struct inode *inode,
+ int mask);
int ocfs2_should_update_atime(struct inode *inode,
struct vfsmount *vfsmnt);
diff --git a/fs/ocfs2/ioctl.c b/fs/ocfs2/ioctl.c
index 89984172fc4a..50c9b30ee9f6 100644
--- a/fs/ocfs2/ioctl.c
+++ b/fs/ocfs2/ioctl.c
@@ -96,7 +96,7 @@ static int ocfs2_set_inode_attr(struct inode *inode, unsigned flags,
}
status = -EACCES;
- if (!inode_owner_or_capable(inode))
+ if (!inode_owner_or_capable(&init_user_ns, inode))
goto bail_unlock;
if (!S_ISDIR(inode->i_mode))
diff --git a/fs/ocfs2/namei.c b/fs/ocfs2/namei.c
index 2a237ab00453..3abdd36da2e2 100644
--- a/fs/ocfs2/namei.c
+++ b/fs/ocfs2/namei.c
@@ -198,7 +198,7 @@ static struct inode *ocfs2_get_init_inode(struct inode *dir, umode_t mode)
* callers. */
if (S_ISDIR(mode))
set_nlink(inode, 2);
- inode_init_owner(inode, dir, mode);
+ inode_init_owner(&init_user_ns, inode, dir, mode);
status = dquot_initialize(inode);
if (status)
return ERR_PTR(status);
@@ -221,7 +221,8 @@ static void ocfs2_cleanup_add_entry_failure(struct ocfs2_super *osb,
iput(inode);
}
-static int ocfs2_mknod(struct inode *dir,
+static int ocfs2_mknod(struct user_namespace *mnt_userns,
+ struct inode *dir,
struct dentry *dentry,
umode_t mode,
dev_t dev)
@@ -645,7 +646,8 @@ static int ocfs2_mknod_locked(struct ocfs2_super *osb,
return status;
}
-static int ocfs2_mkdir(struct inode *dir,
+static int ocfs2_mkdir(struct user_namespace *mnt_userns,
+ struct inode *dir,
struct dentry *dentry,
umode_t mode)
{
@@ -653,14 +655,15 @@ static int ocfs2_mkdir(struct inode *dir,
trace_ocfs2_mkdir(dir, dentry, dentry->d_name.len, dentry->d_name.name,
OCFS2_I(dir)->ip_blkno, mode);
- ret = ocfs2_mknod(dir, dentry, mode | S_IFDIR, 0);
+ ret = ocfs2_mknod(&init_user_ns, dir, dentry, mode | S_IFDIR, 0);
if (ret)
mlog_errno(ret);
return ret;
}
-static int ocfs2_create(struct inode *dir,
+static int ocfs2_create(struct user_namespace *mnt_userns,
+ struct inode *dir,
struct dentry *dentry,
umode_t mode,
bool excl)
@@ -669,7 +672,7 @@ static int ocfs2_create(struct inode *dir,
trace_ocfs2_create(dir, dentry, dentry->d_name.len, dentry->d_name.name,
(unsigned long long)OCFS2_I(dir)->ip_blkno, mode);
- ret = ocfs2_mknod(dir, dentry, mode | S_IFREG, 0);
+ ret = ocfs2_mknod(&init_user_ns, dir, dentry, mode | S_IFREG, 0);
if (ret)
mlog_errno(ret);
@@ -1195,7 +1198,8 @@ static void ocfs2_double_unlock(struct inode *inode1, struct inode *inode2)
ocfs2_inode_unlock(inode2, 1);
}
-static int ocfs2_rename(struct inode *old_dir,
+static int ocfs2_rename(struct user_namespace *mnt_userns,
+ struct inode *old_dir,
struct dentry *old_dentry,
struct inode *new_dir,
struct dentry *new_dentry,
@@ -1784,7 +1788,8 @@ bail:
return status;
}
-static int ocfs2_symlink(struct inode *dir,
+static int ocfs2_symlink(struct user_namespace *mnt_userns,
+ struct inode *dir,
struct dentry *dentry,
const char *symname)
{
diff --git a/fs/ocfs2/refcounttree.c b/fs/ocfs2/refcounttree.c
index 3b397fa9c9e8..c19a463fac55 100644
--- a/fs/ocfs2/refcounttree.c
+++ b/fs/ocfs2/refcounttree.c
@@ -978,7 +978,7 @@ static int ocfs2_get_refcount_cpos_end(struct ocfs2_caching_info *ci,
return 0;
}
- if (!eb || (eb && !eb->h_next_leaf_blk)) {
+ if (!eb || !eb->h_next_leaf_blk) {
/*
* We are the last extent rec, so any high cpos should
* be stored in this leaf refcount block.
@@ -4346,7 +4346,7 @@ static inline int ocfs2_may_create(struct inode *dir, struct dentry *child)
return -EEXIST;
if (IS_DEADDIR(dir))
return -ENOENT;
- return inode_permission(dir, MAY_WRITE | MAY_EXEC);
+ return inode_permission(&init_user_ns, dir, MAY_WRITE | MAY_EXEC);
}
/**
@@ -4400,7 +4400,7 @@ static int ocfs2_vfs_reflink(struct dentry *old_dentry, struct inode *dir,
* file.
*/
if (!preserve) {
- error = inode_permission(inode, MAY_READ);
+ error = inode_permission(&init_user_ns, inode, MAY_READ);
if (error)
return error;
}
diff --git a/fs/ocfs2/super.c b/fs/ocfs2/super.c
index 2febc76e9de7..079f8826993e 100644
--- a/fs/ocfs2/super.c
+++ b/fs/ocfs2/super.c
@@ -973,8 +973,6 @@ static void ocfs2_disable_quotas(struct ocfs2_super *osb)
* quota files */
dquot_disable(sb, type, DQUOT_USAGE_ENABLED |
DQUOT_LIMITS_ENABLED);
- if (!inode)
- continue;
iput(inode);
}
}
diff --git a/fs/ocfs2/xattr.c b/fs/ocfs2/xattr.c
index 9ccd19d8f7b1..36ae47a4aef6 100644
--- a/fs/ocfs2/xattr.c
+++ b/fs/ocfs2/xattr.c
@@ -7249,6 +7249,7 @@ static int ocfs2_xattr_security_get(const struct xattr_handler *handler,
}
static int ocfs2_xattr_security_set(const struct xattr_handler *handler,
+ struct user_namespace *mnt_userns,
struct dentry *unused, struct inode *inode,
const char *name, const void *value,
size_t size, int flags)
@@ -7321,6 +7322,7 @@ static int ocfs2_xattr_trusted_get(const struct xattr_handler *handler,
}
static int ocfs2_xattr_trusted_set(const struct xattr_handler *handler,
+ struct user_namespace *mnt_userns,
struct dentry *unused, struct inode *inode,
const char *name, const void *value,
size_t size, int flags)
@@ -7351,6 +7353,7 @@ static int ocfs2_xattr_user_get(const struct xattr_handler *handler,
}
static int ocfs2_xattr_user_set(const struct xattr_handler *handler,
+ struct user_namespace *mnt_userns,
struct dentry *unused, struct inode *inode,
const char *name, const void *value,
size_t size, int flags)
diff --git a/fs/omfs/dir.c b/fs/omfs/dir.c
index a0f45651f3b7..c219f91f44e9 100644
--- a/fs/omfs/dir.c
+++ b/fs/omfs/dir.c
@@ -279,13 +279,14 @@ out_free_inode:
return err;
}
-static int omfs_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode)
+static int omfs_mkdir(struct user_namespace *mnt_userns, struct inode *dir,
+ struct dentry *dentry, umode_t mode)
{
return omfs_add_node(dir, dentry, mode | S_IFDIR);
}
-static int omfs_create(struct inode *dir, struct dentry *dentry, umode_t mode,
- bool excl)
+static int omfs_create(struct user_namespace *mnt_userns, struct inode *dir,
+ struct dentry *dentry, umode_t mode, bool excl)
{
return omfs_add_node(dir, dentry, mode | S_IFREG);
}
@@ -369,9 +370,9 @@ static bool omfs_fill_chain(struct inode *dir, struct dir_context *ctx,
return true;
}
-static int omfs_rename(struct inode *old_dir, struct dentry *old_dentry,
- struct inode *new_dir, struct dentry *new_dentry,
- unsigned int flags)
+static int omfs_rename(struct user_namespace *mnt_userns, struct inode *old_dir,
+ struct dentry *old_dentry, struct inode *new_dir,
+ struct dentry *new_dentry, unsigned int flags)
{
struct inode *new_inode = d_inode(new_dentry);
struct inode *old_inode = d_inode(old_dentry);
diff --git a/fs/omfs/file.c b/fs/omfs/file.c
index 2c7b70ee1388..11e733aab25d 100644
--- a/fs/omfs/file.c
+++ b/fs/omfs/file.c
@@ -343,12 +343,13 @@ const struct file_operations omfs_file_operations = {
.splice_read = generic_file_splice_read,
};
-static int omfs_setattr(struct dentry *dentry, struct iattr *attr)
+static int omfs_setattr(struct user_namespace *mnt_userns,
+ struct dentry *dentry, struct iattr *attr)
{
struct inode *inode = d_inode(dentry);
int error;
- error = setattr_prepare(dentry, attr);
+ error = setattr_prepare(&init_user_ns, dentry, attr);
if (error)
return error;
@@ -361,7 +362,7 @@ static int omfs_setattr(struct dentry *dentry, struct iattr *attr)
omfs_truncate(inode);
}
- setattr_copy(inode, attr);
+ setattr_copy(&init_user_ns, inode, attr);
mark_inode_dirty(inode);
return 0;
}
diff --git a/fs/omfs/inode.c b/fs/omfs/inode.c
index ce93ccca8639..2a0e83236c01 100644
--- a/fs/omfs/inode.c
+++ b/fs/omfs/inode.c
@@ -48,7 +48,7 @@ struct inode *omfs_new_inode(struct inode *dir, umode_t mode)
goto fail;
inode->i_ino = new_block;
- inode_init_owner(inode, NULL, mode);
+ inode_init_owner(&init_user_ns, inode, NULL, mode);
inode->i_mapping->a_ops = &omfs_aops;
inode->i_atime = inode->i_mtime = inode->i_ctime = current_time(inode);
diff --git a/fs/open.c b/fs/open.c
index ca5444733acd..e53af13b5835 100644
--- a/fs/open.c
+++ b/fs/open.c
@@ -35,8 +35,8 @@
#include "internal.h"
-int do_truncate(struct dentry *dentry, loff_t length, unsigned int time_attrs,
- struct file *filp)
+int do_truncate(struct user_namespace *mnt_userns, struct dentry *dentry,
+ loff_t length, unsigned int time_attrs, struct file *filp)
{
int ret;
struct iattr newattrs;
@@ -61,13 +61,14 @@ int do_truncate(struct dentry *dentry, loff_t length, unsigned int time_attrs,
inode_lock(dentry->d_inode);
/* Note any delegations or leases have already been broken: */
- ret = notify_change(dentry, &newattrs, NULL);
+ ret = notify_change(mnt_userns, dentry, &newattrs, NULL);
inode_unlock(dentry->d_inode);
return ret;
}
long vfs_truncate(const struct path *path, loff_t length)
{
+ struct user_namespace *mnt_userns;
struct inode *inode;
long error;
@@ -83,7 +84,8 @@ long vfs_truncate(const struct path *path, loff_t length)
if (error)
goto out;
- error = inode_permission(inode, MAY_WRITE);
+ mnt_userns = mnt_user_ns(path->mnt);
+ error = inode_permission(mnt_userns, inode, MAY_WRITE);
if (error)
goto mnt_drop_write_and_out;
@@ -107,7 +109,7 @@ long vfs_truncate(const struct path *path, loff_t length)
if (!error)
error = security_path_truncate(path);
if (!error)
- error = do_truncate(path->dentry, length, 0, NULL);
+ error = do_truncate(mnt_userns, path->dentry, length, 0, NULL);
put_write_and_out:
put_write_access(inode);
@@ -186,13 +188,13 @@ long do_sys_ftruncate(unsigned int fd, loff_t length, int small)
/* Check IS_APPEND on real upper inode */
if (IS_APPEND(file_inode(f.file)))
goto out_putf;
-
sb_start_write(inode->i_sb);
error = locks_verify_truncate(inode, f.file, length);
if (!error)
error = security_path_truncate(&f.file->f_path);
if (!error)
- error = do_truncate(dentry, length, ATTR_MTIME|ATTR_CTIME, f.file);
+ error = do_truncate(file_mnt_user_ns(f.file), dentry, length,
+ ATTR_MTIME | ATTR_CTIME, f.file);
sb_end_write(inode->i_sb);
out_putf:
fdput(f);
@@ -436,7 +438,7 @@ retry:
goto out_path_release;
}
- res = inode_permission(inode, mode | MAY_ACCESS);
+ res = inode_permission(mnt_user_ns(path.mnt), inode, mode | MAY_ACCESS);
/* SuS v2 requires we report a read only fs too */
if (res || !(mode & S_IWOTH) || special_file(inode->i_mode))
goto out_path_release;
@@ -492,7 +494,7 @@ retry:
if (error)
goto out;
- error = inode_permission(path.dentry->d_inode, MAY_EXEC | MAY_CHDIR);
+ error = path_permission(&path, MAY_EXEC | MAY_CHDIR);
if (error)
goto dput_and_out;
@@ -521,7 +523,7 @@ SYSCALL_DEFINE1(fchdir, unsigned int, fd)
if (!d_can_lookup(f.file->f_path.dentry))
goto out_putf;
- error = inode_permission(file_inode(f.file), MAY_EXEC | MAY_CHDIR);
+ error = file_permission(f.file, MAY_EXEC | MAY_CHDIR);
if (!error)
set_fs_pwd(current->fs, &f.file->f_path);
out_putf:
@@ -540,7 +542,7 @@ retry:
if (error)
goto out;
- error = inode_permission(path.dentry->d_inode, MAY_EXEC | MAY_CHDIR);
+ error = path_permission(&path, MAY_EXEC | MAY_CHDIR);
if (error)
goto dput_and_out;
@@ -580,7 +582,8 @@ retry_deleg:
goto out_unlock;
newattrs.ia_mode = (mode & S_IALLUGO) | (inode->i_mode & ~S_IALLUGO);
newattrs.ia_valid = ATTR_MODE | ATTR_CTIME;
- error = notify_change(path->dentry, &newattrs, &delegated_inode);
+ error = notify_change(mnt_user_ns(path->mnt), path->dentry,
+ &newattrs, &delegated_inode);
out_unlock:
inode_unlock(inode);
if (delegated_inode) {
@@ -641,6 +644,7 @@ SYSCALL_DEFINE2(chmod, const char __user *, filename, umode_t, mode)
int chown_common(const struct path *path, uid_t user, gid_t group)
{
+ struct user_namespace *mnt_userns;
struct inode *inode = path->dentry->d_inode;
struct inode *delegated_inode = NULL;
int error;
@@ -651,6 +655,10 @@ int chown_common(const struct path *path, uid_t user, gid_t group)
uid = make_kuid(current_user_ns(), user);
gid = make_kgid(current_user_ns(), group);
+ mnt_userns = mnt_user_ns(path->mnt);
+ uid = kuid_from_mnt(mnt_userns, uid);
+ gid = kgid_from_mnt(mnt_userns, gid);
+
retry_deleg:
newattrs.ia_valid = ATTR_CTIME;
if (user != (uid_t) -1) {
@@ -671,7 +679,8 @@ retry_deleg:
inode_lock(inode);
error = security_path_chown(path, uid, gid);
if (!error)
- error = notify_change(path->dentry, &newattrs, &delegated_inode);
+ error = notify_change(mnt_userns, path->dentry, &newattrs,
+ &delegated_inode);
inode_unlock(inode);
if (delegated_inode) {
error = break_deleg_wait(&delegated_inode);
diff --git a/fs/orangefs/acl.c b/fs/orangefs/acl.c
index a25e6c890975..18852b9ed82b 100644
--- a/fs/orangefs/acl.c
+++ b/fs/orangefs/acl.c
@@ -116,7 +116,8 @@ out:
return error;
}
-int orangefs_set_acl(struct inode *inode, struct posix_acl *acl, int type)
+int orangefs_set_acl(struct user_namespace *mnt_userns, struct inode *inode,
+ struct posix_acl *acl, int type)
{
int error;
struct iattr iattr;
@@ -132,7 +133,8 @@ int orangefs_set_acl(struct inode *inode, struct posix_acl *acl, int type)
* and "mode" to the new desired value. It is up to
* us to propagate the new mode back to the server...
*/
- error = posix_acl_update_mode(inode, &iattr.ia_mode, &acl);
+ error = posix_acl_update_mode(&init_user_ns, inode,
+ &iattr.ia_mode, &acl);
if (error) {
gossip_err("%s: posix_acl_update_mode err: %d\n",
__func__,
diff --git a/fs/orangefs/file.c b/fs/orangefs/file.c
index ec8ae4257975..9b28a7132466 100644
--- a/fs/orangefs/file.c
+++ b/fs/orangefs/file.c
@@ -487,10 +487,7 @@ static int orangefs_file_mmap(struct file *file, struct vm_area_struct *vma)
return ret;
gossip_debug(GOSSIP_FILE_DEBUG,
- "orangefs_file_mmap: called on %s\n",
- (file ?
- (char *)file->f_path.dentry->d_name.name :
- (char *)"Unknown"));
+ "orangefs_file_mmap: called on %pD\n", file);
/* set the sequential readahead hint */
vma->vm_flags |= VM_SEQ_READ;
diff --git a/fs/orangefs/inode.c b/fs/orangefs/inode.c
index 48f0547d4850..5079cfafa8d7 100644
--- a/fs/orangefs/inode.c
+++ b/fs/orangefs/inode.c
@@ -855,13 +855,13 @@ again:
ORANGEFS_I(inode)->attr_uid = current_fsuid();
ORANGEFS_I(inode)->attr_gid = current_fsgid();
}
- setattr_copy(inode, iattr);
+ setattr_copy(&init_user_ns, inode, iattr);
spin_unlock(&inode->i_lock);
mark_inode_dirty(inode);
if (iattr->ia_valid & ATTR_MODE)
/* change mod on a file that has ACLs */
- ret = posix_acl_chmod(inode, inode->i_mode);
+ ret = posix_acl_chmod(&init_user_ns, inode, inode->i_mode);
ret = 0;
out:
@@ -871,12 +871,13 @@ out:
/*
* Change attributes of an object referenced by dentry.
*/
-int orangefs_setattr(struct dentry *dentry, struct iattr *iattr)
+int orangefs_setattr(struct user_namespace *mnt_userns, struct dentry *dentry,
+ struct iattr *iattr)
{
int ret;
gossip_debug(GOSSIP_INODE_DEBUG, "__orangefs_setattr: called on %pd\n",
dentry);
- ret = setattr_prepare(dentry, iattr);
+ ret = setattr_prepare(&init_user_ns, dentry, iattr);
if (ret)
goto out;
ret = __orangefs_setattr(d_inode(dentry), iattr);
@@ -890,8 +891,8 @@ out:
/*
* Obtain attributes of an object given a dentry
*/
-int orangefs_getattr(const struct path *path, struct kstat *stat,
- u32 request_mask, unsigned int flags)
+int orangefs_getattr(struct user_namespace *mnt_userns, const struct path *path,
+ struct kstat *stat, u32 request_mask, unsigned int flags)
{
int ret;
struct inode *inode = path->dentry->d_inode;
@@ -903,7 +904,7 @@ int orangefs_getattr(const struct path *path, struct kstat *stat,
ret = orangefs_inode_getattr(inode,
request_mask & STATX_SIZE ? ORANGEFS_GETATTR_SIZE : 0);
if (ret == 0) {
- generic_fillattr(inode, stat);
+ generic_fillattr(&init_user_ns, inode, stat);
/* override block size reported to stat */
if (!(request_mask & STATX_SIZE))
@@ -919,7 +920,8 @@ int orangefs_getattr(const struct path *path, struct kstat *stat,
return ret;
}
-int orangefs_permission(struct inode *inode, int mask)
+int orangefs_permission(struct user_namespace *mnt_userns,
+ struct inode *inode, int mask)
{
int ret;
@@ -933,7 +935,7 @@ int orangefs_permission(struct inode *inode, int mask)
if (ret < 0)
return ret;
- return generic_permission(inode, mask);
+ return generic_permission(&init_user_ns, inode, mask);
}
int orangefs_update_time(struct inode *inode, struct timespec64 *time, int flags)
diff --git a/fs/orangefs/namei.c b/fs/orangefs/namei.c
index 3e7cf3d0a494..600e8eee541f 100644
--- a/fs/orangefs/namei.c
+++ b/fs/orangefs/namei.c
@@ -15,7 +15,8 @@
/*
* Get a newly allocated inode to go with a negative dentry.
*/
-static int orangefs_create(struct inode *dir,
+static int orangefs_create(struct user_namespace *mnt_userns,
+ struct inode *dir,
struct dentry *dentry,
umode_t mode,
bool exclusive)
@@ -215,7 +216,8 @@ static int orangefs_unlink(struct inode *dir, struct dentry *dentry)
return ret;
}
-static int orangefs_symlink(struct inode *dir,
+static int orangefs_symlink(struct user_namespace *mnt_userns,
+ struct inode *dir,
struct dentry *dentry,
const char *symname)
{
@@ -303,7 +305,8 @@ out:
return ret;
}
-static int orangefs_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode)
+static int orangefs_mkdir(struct user_namespace *mnt_userns, struct inode *dir,
+ struct dentry *dentry, umode_t mode)
{
struct orangefs_inode_s *parent = ORANGEFS_I(dir);
struct orangefs_kernel_op_s *new_op;
@@ -372,7 +375,8 @@ out:
return ret;
}
-static int orangefs_rename(struct inode *old_dir,
+static int orangefs_rename(struct user_namespace *mnt_userns,
+ struct inode *old_dir,
struct dentry *old_dentry,
struct inode *new_dir,
struct dentry *new_dentry,
diff --git a/fs/orangefs/orangefs-kernel.h b/fs/orangefs/orangefs-kernel.h
index e12aeb9623d6..0e6b97682e41 100644
--- a/fs/orangefs/orangefs-kernel.h
+++ b/fs/orangefs/orangefs-kernel.h
@@ -107,7 +107,9 @@ extern int orangefs_init_acl(struct inode *inode, struct inode *dir);
extern const struct xattr_handler *orangefs_xattr_handlers[];
extern struct posix_acl *orangefs_get_acl(struct inode *inode, int type);
-extern int orangefs_set_acl(struct inode *inode, struct posix_acl *acl, int type);
+extern int orangefs_set_acl(struct user_namespace *mnt_userns,
+ struct inode *inode, struct posix_acl *acl,
+ int type);
/*
* orangefs data structures
@@ -359,12 +361,13 @@ struct inode *orangefs_new_inode(struct super_block *sb,
struct orangefs_object_kref *ref);
int __orangefs_setattr(struct inode *, struct iattr *);
-int orangefs_setattr(struct dentry *, struct iattr *);
+int orangefs_setattr(struct user_namespace *, struct dentry *, struct iattr *);
-int orangefs_getattr(const struct path *path, struct kstat *stat,
- u32 request_mask, unsigned int flags);
+int orangefs_getattr(struct user_namespace *mnt_userns, const struct path *path,
+ struct kstat *stat, u32 request_mask, unsigned int flags);
-int orangefs_permission(struct inode *inode, int mask);
+int orangefs_permission(struct user_namespace *mnt_userns,
+ struct inode *inode, int mask);
int orangefs_update_time(struct inode *, struct timespec64 *, int);
diff --git a/fs/orangefs/xattr.c b/fs/orangefs/xattr.c
index bdc285aea360..9a5b757fbd2f 100644
--- a/fs/orangefs/xattr.c
+++ b/fs/orangefs/xattr.c
@@ -526,6 +526,7 @@ out_unlock:
}
static int orangefs_xattr_set_default(const struct xattr_handler *handler,
+ struct user_namespace *mnt_userns,
struct dentry *unused,
struct inode *inode,
const char *name,
diff --git a/fs/overlayfs/copy_up.c b/fs/overlayfs/copy_up.c
index 0fed532efa68..0b2891c6c71e 100644
--- a/fs/overlayfs/copy_up.c
+++ b/fs/overlayfs/copy_up.c
@@ -93,9 +93,9 @@ int ovl_copy_xattr(struct super_block *sb, struct dentry *old,
continue; /* Discard */
}
retry:
- size = vfs_getxattr(old, name, value, value_size);
+ size = vfs_getxattr(&init_user_ns, old, name, value, value_size);
if (size == -ERANGE)
- size = vfs_getxattr(old, name, NULL, 0);
+ size = vfs_getxattr(&init_user_ns, old, name, NULL, 0);
if (size < 0) {
error = size;
@@ -115,7 +115,7 @@ retry:
goto retry;
}
- error = vfs_setxattr(new, name, value, size, 0);
+ error = vfs_setxattr(&init_user_ns, new, name, value, size, 0);
if (error) {
if (error != -EOPNOTSUPP || ovl_must_copy_xattr(name))
break;
@@ -236,7 +236,7 @@ static int ovl_set_size(struct dentry *upperdentry, struct kstat *stat)
.ia_size = stat->size,
};
- return notify_change(upperdentry, &attr, NULL);
+ return notify_change(&init_user_ns, upperdentry, &attr, NULL);
}
static int ovl_set_timestamps(struct dentry *upperdentry, struct kstat *stat)
@@ -248,7 +248,7 @@ static int ovl_set_timestamps(struct dentry *upperdentry, struct kstat *stat)
.ia_mtime = stat->mtime,
};
- return notify_change(upperdentry, &attr, NULL);
+ return notify_change(&init_user_ns, upperdentry, &attr, NULL);
}
int ovl_set_attr(struct dentry *upperdentry, struct kstat *stat)
@@ -260,7 +260,7 @@ int ovl_set_attr(struct dentry *upperdentry, struct kstat *stat)
.ia_valid = ATTR_MODE,
.ia_mode = stat->mode,
};
- err = notify_change(upperdentry, &attr, NULL);
+ err = notify_change(&init_user_ns, upperdentry, &attr, NULL);
}
if (!err) {
struct iattr attr = {
@@ -268,7 +268,7 @@ int ovl_set_attr(struct dentry *upperdentry, struct kstat *stat)
.ia_uid = stat->uid,
.ia_gid = stat->gid,
};
- err = notify_change(upperdentry, &attr, NULL);
+ err = notify_change(&init_user_ns, upperdentry, &attr, NULL);
}
if (!err)
ovl_set_timestamps(upperdentry, stat);
@@ -796,7 +796,7 @@ static ssize_t ovl_getxattr(struct dentry *dentry, char *name, char **value)
ssize_t res;
char *buf;
- res = vfs_getxattr(dentry, name, NULL, 0);
+ res = vfs_getxattr(&init_user_ns, dentry, name, NULL, 0);
if (res == -ENODATA || res == -EOPNOTSUPP)
res = 0;
@@ -805,7 +805,7 @@ static ssize_t ovl_getxattr(struct dentry *dentry, char *name, char **value)
if (!buf)
return -ENOMEM;
- res = vfs_getxattr(dentry, name, buf, res);
+ res = vfs_getxattr(&init_user_ns, dentry, name, buf, res);
if (res < 0)
kfree(buf);
else
@@ -847,8 +847,8 @@ static int ovl_copy_up_meta_inode_data(struct ovl_copy_up_ctx *c)
* don't want that to happen for normal copy-up operation.
*/
if (capability) {
- err = vfs_setxattr(upperpath.dentry, XATTR_NAME_CAPS,
- capability, cap_size, 0);
+ err = vfs_setxattr(&init_user_ns, upperpath.dentry,
+ XATTR_NAME_CAPS, capability, cap_size, 0);
if (err)
goto out_free;
}
diff --git a/fs/overlayfs/dir.c b/fs/overlayfs/dir.c
index d1efa3a5a503..836f14b9d3a6 100644
--- a/fs/overlayfs/dir.c
+++ b/fs/overlayfs/dir.c
@@ -449,7 +449,7 @@ static int ovl_set_upper_acl(struct dentry *upperdentry, const char *name,
if (err < 0)
goto out_free;
- err = vfs_setxattr(upperdentry, name, buffer, size, XATTR_CREATE);
+ err = vfs_setxattr(&init_user_ns, upperdentry, name, buffer, size, XATTR_CREATE);
out_free:
kfree(buffer);
return err;
@@ -508,7 +508,7 @@ static int ovl_create_over_whiteout(struct dentry *dentry, struct inode *inode,
.ia_mode = cattr->mode,
};
inode_lock(newdentry->d_inode);
- err = notify_change(newdentry, &attr, NULL);
+ err = notify_change(&init_user_ns, newdentry, &attr, NULL);
inode_unlock(newdentry->d_inode);
if (err)
goto out_cleanup;
@@ -636,7 +636,7 @@ static int ovl_create_object(struct dentry *dentry, int mode, dev_t rdev,
inode->i_state |= I_CREATING;
spin_unlock(&inode->i_lock);
- inode_init_owner(inode, dentry->d_parent->d_inode, mode);
+ inode_init_owner(&init_user_ns, inode, dentry->d_parent->d_inode, mode);
attr.mode = inode->i_mode;
err = ovl_create_or_link(dentry, inode, &attr, false);
@@ -650,19 +650,20 @@ out:
return err;
}
-static int ovl_create(struct inode *dir, struct dentry *dentry, umode_t mode,
- bool excl)
+static int ovl_create(struct user_namespace *mnt_userns, struct inode *dir,
+ struct dentry *dentry, umode_t mode, bool excl)
{
return ovl_create_object(dentry, (mode & 07777) | S_IFREG, 0, NULL);
}
-static int ovl_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode)
+static int ovl_mkdir(struct user_namespace *mnt_userns, struct inode *dir,
+ struct dentry *dentry, umode_t mode)
{
return ovl_create_object(dentry, (mode & 07777) | S_IFDIR, 0, NULL);
}
-static int ovl_mknod(struct inode *dir, struct dentry *dentry, umode_t mode,
- dev_t rdev)
+static int ovl_mknod(struct user_namespace *mnt_userns, struct inode *dir,
+ struct dentry *dentry, umode_t mode, dev_t rdev)
{
/* Don't allow creation of "whiteout" on overlay */
if (S_ISCHR(mode) && rdev == WHITEOUT_DEV)
@@ -671,8 +672,8 @@ static int ovl_mknod(struct inode *dir, struct dentry *dentry, umode_t mode,
return ovl_create_object(dentry, mode, rdev, NULL);
}
-static int ovl_symlink(struct inode *dir, struct dentry *dentry,
- const char *link)
+static int ovl_symlink(struct user_namespace *mnt_userns, struct inode *dir,
+ struct dentry *dentry, const char *link)
{
return ovl_create_object(dentry, S_IFLNK, 0, link);
}
@@ -821,9 +822,9 @@ static int ovl_remove_upper(struct dentry *dentry, bool is_dir,
goto out_dput_upper;
if (is_dir)
- err = vfs_rmdir(dir, upper);
+ err = vfs_rmdir(&init_user_ns, dir, upper);
else
- err = vfs_unlink(dir, upper, NULL);
+ err = vfs_unlink(&init_user_ns, dir, upper, NULL);
ovl_dir_modified(dentry->d_parent, ovl_type_origin(dentry));
/*
@@ -1069,9 +1070,9 @@ static int ovl_set_redirect(struct dentry *dentry, bool samedir)
return err;
}
-static int ovl_rename(struct inode *olddir, struct dentry *old,
- struct inode *newdir, struct dentry *new,
- unsigned int flags)
+static int ovl_rename(struct user_namespace *mnt_userns, struct inode *olddir,
+ struct dentry *old, struct inode *newdir,
+ struct dentry *new, unsigned int flags)
{
int err;
struct dentry *old_upperdir;
diff --git a/fs/overlayfs/file.c b/fs/overlayfs/file.c
index 077d3ad343f6..dbfb35fb0ff7 100644
--- a/fs/overlayfs/file.c
+++ b/fs/overlayfs/file.c
@@ -50,11 +50,11 @@ static struct file *ovl_open_realfile(const struct file *file,
acc_mode |= MAY_APPEND;
old_cred = ovl_override_creds(inode->i_sb);
- err = inode_permission(realinode, MAY_OPEN | acc_mode);
+ err = inode_permission(&init_user_ns, realinode, MAY_OPEN | acc_mode);
if (err) {
realfile = ERR_PTR(err);
} else {
- if (!inode_owner_or_capable(realinode))
+ if (!inode_owner_or_capable(&init_user_ns, realinode))
flags &= ~O_NOATIME;
realfile = open_with_fake_path(&file->f_path, flags, realinode,
@@ -521,7 +521,7 @@ static long ovl_ioctl_set_flags(struct file *file, unsigned int cmd,
long ret;
struct inode *inode = file_inode(file);
- if (!inode_owner_or_capable(inode))
+ if (!inode_owner_or_capable(&init_user_ns, inode))
return -EACCES;
ret = mnt_want_write_file(file);
diff --git a/fs/overlayfs/inode.c b/fs/overlayfs/inode.c
index cf41bcb664bc..003cf83bf78a 100644
--- a/fs/overlayfs/inode.c
+++ b/fs/overlayfs/inode.c
@@ -14,14 +14,15 @@
#include "overlayfs.h"
-int ovl_setattr(struct dentry *dentry, struct iattr *attr)
+int ovl_setattr(struct user_namespace *mnt_userns, struct dentry *dentry,
+ struct iattr *attr)
{
int err;
bool full_copy_up = false;
struct dentry *upperdentry;
const struct cred *old_cred;
- err = setattr_prepare(dentry, attr);
+ err = setattr_prepare(&init_user_ns, dentry, attr);
if (err)
return err;
@@ -79,7 +80,7 @@ int ovl_setattr(struct dentry *dentry, struct iattr *attr)
inode_lock(upperdentry->d_inode);
old_cred = ovl_override_creds(dentry->d_sb);
- err = notify_change(upperdentry, attr, NULL);
+ err = notify_change(&init_user_ns, upperdentry, attr, NULL);
revert_creds(old_cred);
if (!err)
ovl_copyattr(upperdentry->d_inode, dentry->d_inode);
@@ -154,8 +155,8 @@ static int ovl_map_dev_ino(struct dentry *dentry, struct kstat *stat, int fsid)
return 0;
}
-int ovl_getattr(const struct path *path, struct kstat *stat,
- u32 request_mask, unsigned int flags)
+int ovl_getattr(struct user_namespace *mnt_userns, const struct path *path,
+ struct kstat *stat, u32 request_mask, unsigned int flags)
{
struct dentry *dentry = path->dentry;
enum ovl_path_type type;
@@ -277,7 +278,8 @@ out:
return err;
}
-int ovl_permission(struct inode *inode, int mask)
+int ovl_permission(struct user_namespace *mnt_userns,
+ struct inode *inode, int mask)
{
struct inode *upperinode = ovl_inode_upper(inode);
struct inode *realinode = upperinode ?: ovl_inode_lower(inode);
@@ -294,7 +296,7 @@ int ovl_permission(struct inode *inode, int mask)
* Check overlay inode with the creds of task and underlying inode
* with creds of mounter
*/
- err = generic_permission(inode, mask);
+ err = generic_permission(&init_user_ns, inode, mask);
if (err)
return err;
@@ -305,7 +307,7 @@ int ovl_permission(struct inode *inode, int mask)
/* Make sure mounter can read file for copy up later */
mask |= MAY_READ;
}
- err = inode_permission(realinode, mask);
+ err = inode_permission(&init_user_ns, realinode, mask);
revert_creds(old_cred);
return err;
@@ -353,7 +355,7 @@ int ovl_xattr_set(struct dentry *dentry, struct inode *inode, const char *name,
if (!value && !upperdentry) {
old_cred = ovl_override_creds(dentry->d_sb);
- err = vfs_getxattr(realdentry, name, NULL, 0);
+ err = vfs_getxattr(&init_user_ns, realdentry, name, NULL, 0);
revert_creds(old_cred);
if (err < 0)
goto out_drop_write;
@@ -369,10 +371,11 @@ int ovl_xattr_set(struct dentry *dentry, struct inode *inode, const char *name,
old_cred = ovl_override_creds(dentry->d_sb);
if (value)
- err = vfs_setxattr(realdentry, name, value, size, flags);
+ err = vfs_setxattr(&init_user_ns, realdentry, name, value, size,
+ flags);
else {
WARN_ON(flags != XATTR_REPLACE);
- err = vfs_removexattr(realdentry, name);
+ err = vfs_removexattr(&init_user_ns, realdentry, name);
}
revert_creds(old_cred);
@@ -394,7 +397,7 @@ int ovl_xattr_get(struct dentry *dentry, struct inode *inode, const char *name,
ovl_i_dentry_upper(inode) ?: ovl_dentry_lower(dentry);
old_cred = ovl_override_creds(dentry->d_sb);
- res = vfs_getxattr(realdentry, name, value, size);
+ res = vfs_getxattr(&init_user_ns, realdentry, name, value, size);
revert_creds(old_cred);
return res;
}
diff --git a/fs/overlayfs/overlayfs.h b/fs/overlayfs/overlayfs.h
index cb4e2d60ecf9..95cff83786a5 100644
--- a/fs/overlayfs/overlayfs.h
+++ b/fs/overlayfs/overlayfs.h
@@ -123,7 +123,7 @@ static inline const char *ovl_xattr(struct ovl_fs *ofs, enum ovl_xattr ox)
static inline int ovl_do_rmdir(struct inode *dir, struct dentry *dentry)
{
- int err = vfs_rmdir(dir, dentry);
+ int err = vfs_rmdir(&init_user_ns, dir, dentry);
pr_debug("rmdir(%pd2) = %i\n", dentry, err);
return err;
@@ -131,7 +131,7 @@ static inline int ovl_do_rmdir(struct inode *dir, struct dentry *dentry)
static inline int ovl_do_unlink(struct inode *dir, struct dentry *dentry)
{
- int err = vfs_unlink(dir, dentry, NULL);
+ int err = vfs_unlink(&init_user_ns, dir, dentry, NULL);
pr_debug("unlink(%pd2) = %i\n", dentry, err);
return err;
@@ -140,7 +140,7 @@ static inline int ovl_do_unlink(struct inode *dir, struct dentry *dentry)
static inline int ovl_do_link(struct dentry *old_dentry, struct inode *dir,
struct dentry *new_dentry)
{
- int err = vfs_link(old_dentry, dir, new_dentry, NULL);
+ int err = vfs_link(old_dentry, &init_user_ns, dir, new_dentry, NULL);
pr_debug("link(%pd2, %pd2) = %i\n", old_dentry, new_dentry, err);
return err;
@@ -149,7 +149,7 @@ static inline int ovl_do_link(struct dentry *old_dentry, struct inode *dir,
static inline int ovl_do_create(struct inode *dir, struct dentry *dentry,
umode_t mode)
{
- int err = vfs_create(dir, dentry, mode, true);
+ int err = vfs_create(&init_user_ns, dir, dentry, mode, true);
pr_debug("create(%pd2, 0%o) = %i\n", dentry, mode, err);
return err;
@@ -158,7 +158,7 @@ static inline int ovl_do_create(struct inode *dir, struct dentry *dentry,
static inline int ovl_do_mkdir(struct inode *dir, struct dentry *dentry,
umode_t mode)
{
- int err = vfs_mkdir(dir, dentry, mode);
+ int err = vfs_mkdir(&init_user_ns, dir, dentry, mode);
pr_debug("mkdir(%pd2, 0%o) = %i\n", dentry, mode, err);
return err;
}
@@ -166,7 +166,7 @@ static inline int ovl_do_mkdir(struct inode *dir, struct dentry *dentry,
static inline int ovl_do_mknod(struct inode *dir, struct dentry *dentry,
umode_t mode, dev_t dev)
{
- int err = vfs_mknod(dir, dentry, mode, dev);
+ int err = vfs_mknod(&init_user_ns, dir, dentry, mode, dev);
pr_debug("mknod(%pd2, 0%o, 0%o) = %i\n", dentry, mode, dev, err);
return err;
@@ -175,7 +175,7 @@ static inline int ovl_do_mknod(struct inode *dir, struct dentry *dentry,
static inline int ovl_do_symlink(struct inode *dir, struct dentry *dentry,
const char *oldname)
{
- int err = vfs_symlink(dir, dentry, oldname);
+ int err = vfs_symlink(&init_user_ns, dir, dentry, oldname);
pr_debug("symlink(\"%s\", %pd2) = %i\n", oldname, dentry, err);
return err;
@@ -186,7 +186,7 @@ static inline ssize_t ovl_do_getxattr(struct ovl_fs *ofs, struct dentry *dentry,
size_t size)
{
const char *name = ovl_xattr(ofs, ox);
- return vfs_getxattr(dentry, name, value, size);
+ return vfs_getxattr(&init_user_ns, dentry, name, value, size);
}
static inline int ovl_do_setxattr(struct ovl_fs *ofs, struct dentry *dentry,
@@ -194,7 +194,7 @@ static inline int ovl_do_setxattr(struct ovl_fs *ofs, struct dentry *dentry,
size_t size)
{
const char *name = ovl_xattr(ofs, ox);
- int err = vfs_setxattr(dentry, name, value, size, 0);
+ int err = vfs_setxattr(&init_user_ns, dentry, name, value, size, 0);
pr_debug("setxattr(%pd2, \"%s\", \"%*pE\", %zu, 0) = %i\n",
dentry, name, min((int)size, 48), value, size, err);
return err;
@@ -204,7 +204,7 @@ static inline int ovl_do_removexattr(struct ovl_fs *ofs, struct dentry *dentry,
enum ovl_xattr ox)
{
const char *name = ovl_xattr(ofs, ox);
- int err = vfs_removexattr(dentry, name);
+ int err = vfs_removexattr(&init_user_ns, dentry, name);
pr_debug("removexattr(%pd2, \"%s\") = %i\n", dentry, name, err);
return err;
}
@@ -214,9 +214,18 @@ static inline int ovl_do_rename(struct inode *olddir, struct dentry *olddentry,
unsigned int flags)
{
int err;
+ struct renamedata rd = {
+ .old_mnt_userns = &init_user_ns,
+ .old_dir = olddir,
+ .old_dentry = olddentry,
+ .new_mnt_userns = &init_user_ns,
+ .new_dir = newdir,
+ .new_dentry = newdentry,
+ .flags = flags,
+ };
pr_debug("rename(%pd2, %pd2, 0x%x)\n", olddentry, newdentry, flags);
- err = vfs_rename(olddir, olddentry, newdir, newdentry, NULL, flags);
+ err = vfs_rename(&rd);
if (err) {
pr_debug("...rename(%pd2, %pd2, ...) = %i\n",
olddentry, newdentry, err);
@@ -226,14 +235,14 @@ static inline int ovl_do_rename(struct inode *olddir, struct dentry *olddentry,
static inline int ovl_do_whiteout(struct inode *dir, struct dentry *dentry)
{
- int err = vfs_whiteout(dir, dentry);
+ int err = vfs_whiteout(&init_user_ns, dir, dentry);
pr_debug("whiteout(%pd2) = %i\n", dentry, err);
return err;
}
static inline struct dentry *ovl_do_tmpfile(struct dentry *dentry, umode_t mode)
{
- struct dentry *ret = vfs_tmpfile(dentry, mode, 0);
+ struct dentry *ret = vfs_tmpfile(&init_user_ns, dentry, mode, 0);
int err = PTR_ERR_OR_ZERO(ret);
pr_debug("tmpfile(%pd2, 0%o) = %i\n", dentry, mode, err);
@@ -436,10 +445,12 @@ int ovl_set_nlink_lower(struct dentry *dentry);
unsigned int ovl_get_nlink(struct ovl_fs *ofs, struct dentry *lowerdentry,
struct dentry *upperdentry,
unsigned int fallback);
-int ovl_setattr(struct dentry *dentry, struct iattr *attr);
-int ovl_getattr(const struct path *path, struct kstat *stat,
- u32 request_mask, unsigned int flags);
-int ovl_permission(struct inode *inode, int mask);
+int ovl_setattr(struct user_namespace *mnt_userns, struct dentry *dentry,
+ struct iattr *attr);
+int ovl_getattr(struct user_namespace *mnt_userns, const struct path *path,
+ struct kstat *stat, u32 request_mask, unsigned int flags);
+int ovl_permission(struct user_namespace *mnt_userns, struct inode *inode,
+ int mask);
int ovl_xattr_set(struct dentry *dentry, struct inode *inode, const char *name,
const void *value, size_t size, int flags);
int ovl_xattr_get(struct dentry *dentry, struct inode *inode, const char *name,
diff --git a/fs/overlayfs/super.c b/fs/overlayfs/super.c
index d58b8f2bf9d0..fdd72f1a9c5e 100644
--- a/fs/overlayfs/super.c
+++ b/fs/overlayfs/super.c
@@ -803,17 +803,19 @@ retry:
* allowed as upper are limited to "normal" ones, where checking
* for the above two errors is sufficient.
*/
- err = vfs_removexattr(work, XATTR_NAME_POSIX_ACL_DEFAULT);
+ err = vfs_removexattr(&init_user_ns, work,
+ XATTR_NAME_POSIX_ACL_DEFAULT);
if (err && err != -ENODATA && err != -EOPNOTSUPP)
goto out_dput;
- err = vfs_removexattr(work, XATTR_NAME_POSIX_ACL_ACCESS);
+ err = vfs_removexattr(&init_user_ns, work,
+ XATTR_NAME_POSIX_ACL_ACCESS);
if (err && err != -ENODATA && err != -EOPNOTSUPP)
goto out_dput;
/* Clear any inherited mode bits */
inode_lock(work->d_inode);
- err = notify_change(work, &attr, NULL);
+ err = notify_change(&init_user_ns, work, &attr, NULL);
inode_unlock(work->d_inode);
if (err)
goto out_dput;
@@ -865,6 +867,10 @@ static int ovl_mount_dir_noesc(const char *name, struct path *path)
pr_err("filesystem on '%s' not supported\n", name);
goto out_put;
}
+ if (mnt_user_ns(path->mnt) != &init_user_ns) {
+ pr_err("idmapped layers are currently not supported\n");
+ goto out_put;
+ }
if (!d_is_dir(path->dentry)) {
pr_err("'%s' not a directory\n", name);
goto out_put;
@@ -989,6 +995,7 @@ ovl_posix_acl_xattr_get(const struct xattr_handler *handler,
static int __maybe_unused
ovl_posix_acl_xattr_set(const struct xattr_handler *handler,
+ struct user_namespace *mnt_userns,
struct dentry *dentry, struct inode *inode,
const char *name, const void *value,
size_t size, int flags)
@@ -1014,7 +1021,7 @@ ovl_posix_acl_xattr_set(const struct xattr_handler *handler,
goto out_acl_release;
}
err = -EPERM;
- if (!inode_owner_or_capable(inode))
+ if (!inode_owner_or_capable(&init_user_ns, inode))
goto out_acl_release;
posix_acl_release(acl);
@@ -1026,10 +1033,10 @@ ovl_posix_acl_xattr_set(const struct xattr_handler *handler,
if (unlikely(inode->i_mode & S_ISGID) &&
handler->flags == ACL_TYPE_ACCESS &&
!in_group_p(inode->i_gid) &&
- !capable_wrt_inode_uidgid(inode, CAP_FSETID)) {
+ !capable_wrt_inode_uidgid(&init_user_ns, inode, CAP_FSETID)) {
struct iattr iattr = { .ia_valid = ATTR_KILL_SGID };
- err = ovl_setattr(dentry, &iattr);
+ err = ovl_setattr(&init_user_ns, dentry, &iattr);
if (err)
return err;
}
@@ -1053,6 +1060,7 @@ static int ovl_own_xattr_get(const struct xattr_handler *handler,
}
static int ovl_own_xattr_set(const struct xattr_handler *handler,
+ struct user_namespace *mnt_userns,
struct dentry *dentry, struct inode *inode,
const char *name, const void *value,
size_t size, int flags)
@@ -1068,6 +1076,7 @@ static int ovl_other_xattr_get(const struct xattr_handler *handler,
}
static int ovl_other_xattr_set(const struct xattr_handler *handler,
+ struct user_namespace *mnt_userns,
struct dentry *dentry, struct inode *inode,
const char *name, const void *value,
size_t size, int flags)
diff --git a/fs/overlayfs/util.c b/fs/overlayfs/util.c
index 9826b003f1d2..7f5a01a11f97 100644
--- a/fs/overlayfs/util.c
+++ b/fs/overlayfs/util.c
@@ -479,12 +479,12 @@ struct file *ovl_path_open(struct path *path, int flags)
BUG();
}
- err = inode_permission(inode, acc_mode | MAY_OPEN);
+ err = inode_permission(&init_user_ns, inode, acc_mode | MAY_OPEN);
if (err)
return ERR_PTR(err);
/* O_NOATIME is an optimization, don't fail if not permitted */
- if (inode_owner_or_capable(inode))
+ if (inode_owner_or_capable(&init_user_ns, inode))
flags |= O_NOATIME;
return dentry_open(path, flags, current_cred());
diff --git a/fs/pipe.c b/fs/pipe.c
index 39c96845a72f..bfd946a9ad01 100644
--- a/fs/pipe.c
+++ b/fs/pipe.c
@@ -171,7 +171,7 @@ EXPORT_SYMBOL(generic_pipe_buf_try_steal);
*
* Description:
* This function grabs an extra reference to @buf. It's used in
- * in the tee() system call, when we duplicate the buffers in one
+ * the tee() system call, when we duplicate the buffers in one
* pipe into another.
*/
bool generic_pipe_buf_get(struct pipe_inode_info *pipe, struct pipe_buffer *buf)
diff --git a/fs/posix_acl.c b/fs/posix_acl.c
index 95882b3f5f62..f3309a7edb49 100644
--- a/fs/posix_acl.c
+++ b/fs/posix_acl.c
@@ -345,10 +345,13 @@ EXPORT_SYMBOL(posix_acl_from_mode);
* by the acl. Returns -E... otherwise.
*/
int
-posix_acl_permission(struct inode *inode, const struct posix_acl *acl, int want)
+posix_acl_permission(struct user_namespace *mnt_userns, struct inode *inode,
+ const struct posix_acl *acl, int want)
{
const struct posix_acl_entry *pa, *pe, *mask_obj;
int found = 0;
+ kuid_t uid;
+ kgid_t gid;
want &= MAY_READ | MAY_WRITE | MAY_EXEC;
@@ -356,22 +359,26 @@ posix_acl_permission(struct inode *inode, const struct posix_acl *acl, int want)
switch(pa->e_tag) {
case ACL_USER_OBJ:
/* (May have been checked already) */
- if (uid_eq(inode->i_uid, current_fsuid()))
+ uid = i_uid_into_mnt(mnt_userns, inode);
+ if (uid_eq(uid, current_fsuid()))
goto check_perm;
break;
case ACL_USER:
- if (uid_eq(pa->e_uid, current_fsuid()))
+ uid = kuid_into_mnt(mnt_userns, pa->e_uid);
+ if (uid_eq(uid, current_fsuid()))
goto mask;
break;
case ACL_GROUP_OBJ:
- if (in_group_p(inode->i_gid)) {
+ gid = i_gid_into_mnt(mnt_userns, inode);
+ if (in_group_p(gid)) {
found = 1;
if ((pa->e_perm & want) == want)
goto mask;
}
break;
case ACL_GROUP:
- if (in_group_p(pa->e_gid)) {
+ gid = kgid_into_mnt(mnt_userns, pa->e_gid);
+ if (in_group_p(gid)) {
found = 1;
if ((pa->e_perm & want) == want)
goto mask;
@@ -551,8 +558,22 @@ __posix_acl_chmod(struct posix_acl **acl, gfp_t gfp, umode_t mode)
}
EXPORT_SYMBOL(__posix_acl_chmod);
+/**
+ * posix_acl_chmod - chmod a posix acl
+ *
+ * @mnt_userns: user namespace of the mount @inode was found from
+ * @inode: inode to check permissions on
+ * @mode: the new mode of @inode
+ *
+ * If the inode has been found through an idmapped mount the user namespace of
+ * the vfsmount must be passed through @mnt_userns. This function will then
+ * take care to map the inode according to @mnt_userns before checking
+ * permissions. On non-idmapped mounts or if permission checking is to be
+ * performed on the raw inode simply passs init_user_ns.
+ */
int
-posix_acl_chmod(struct inode *inode, umode_t mode)
+ posix_acl_chmod(struct user_namespace *mnt_userns, struct inode *inode,
+ umode_t mode)
{
struct posix_acl *acl;
int ret = 0;
@@ -572,7 +593,7 @@ posix_acl_chmod(struct inode *inode, umode_t mode)
ret = __posix_acl_chmod(&acl, GFP_KERNEL, mode);
if (ret)
return ret;
- ret = inode->i_op->set_acl(inode, acl, ACL_TYPE_ACCESS);
+ ret = inode->i_op->set_acl(mnt_userns, inode, acl, ACL_TYPE_ACCESS);
posix_acl_release(acl);
return ret;
}
@@ -631,9 +652,10 @@ EXPORT_SYMBOL_GPL(posix_acl_create);
/**
* posix_acl_update_mode - update mode in set_acl
- * @inode: target inode
- * @mode_p: mode (pointer) for update
- * @acl: acl pointer
+ * @mnt_userns: user namespace of the mount @inode was found from
+ * @inode: target inode
+ * @mode_p: mode (pointer) for update
+ * @acl: acl pointer
*
* Update the file mode when setting an ACL: compute the new file permission
* bits based on the ACL. In addition, if the ACL is equivalent to the new
@@ -642,9 +664,16 @@ EXPORT_SYMBOL_GPL(posix_acl_create);
* As with chmod, clear the setgid bit if the caller is not in the owning group
* or capable of CAP_FSETID (see inode_change_ok).
*
+ * If the inode has been found through an idmapped mount the user namespace of
+ * the vfsmount must be passed through @mnt_userns. This function will then
+ * take care to map the inode according to @mnt_userns before checking
+ * permissions. On non-idmapped mounts or if permission checking is to be
+ * performed on the raw inode simply passs init_user_ns.
+ *
* Called from set_acl inode operations.
*/
-int posix_acl_update_mode(struct inode *inode, umode_t *mode_p,
+int posix_acl_update_mode(struct user_namespace *mnt_userns,
+ struct inode *inode, umode_t *mode_p,
struct posix_acl **acl)
{
umode_t mode = inode->i_mode;
@@ -655,8 +684,8 @@ int posix_acl_update_mode(struct inode *inode, umode_t *mode_p,
return error;
if (error == 0)
*acl = NULL;
- if (!in_group_p(inode->i_gid) &&
- !capable_wrt_inode_uidgid(inode, CAP_FSETID))
+ if (!in_group_p(i_gid_into_mnt(mnt_userns, inode)) &&
+ !capable_wrt_inode_uidgid(mnt_userns, inode, CAP_FSETID))
mode &= ~S_ISGID;
*mode_p = mode;
return 0;
@@ -668,7 +697,8 @@ EXPORT_SYMBOL(posix_acl_update_mode);
*/
static void posix_acl_fix_xattr_userns(
struct user_namespace *to, struct user_namespace *from,
- void *value, size_t size)
+ struct user_namespace *mnt_userns,
+ void *value, size_t size, bool from_user)
{
struct posix_acl_xattr_header *header = value;
struct posix_acl_xattr_entry *entry = (void *)(header + 1), *end;
@@ -693,10 +723,18 @@ static void posix_acl_fix_xattr_userns(
switch(le16_to_cpu(entry->e_tag)) {
case ACL_USER:
uid = make_kuid(from, le32_to_cpu(entry->e_id));
+ if (from_user)
+ uid = kuid_from_mnt(mnt_userns, uid);
+ else
+ uid = kuid_into_mnt(mnt_userns, uid);
entry->e_id = cpu_to_le32(from_kuid(to, uid));
break;
case ACL_GROUP:
gid = make_kgid(from, le32_to_cpu(entry->e_id));
+ if (from_user)
+ gid = kgid_from_mnt(mnt_userns, gid);
+ else
+ gid = kgid_into_mnt(mnt_userns, gid);
entry->e_id = cpu_to_le32(from_kgid(to, gid));
break;
default:
@@ -705,20 +743,24 @@ static void posix_acl_fix_xattr_userns(
}
}
-void posix_acl_fix_xattr_from_user(void *value, size_t size)
+void posix_acl_fix_xattr_from_user(struct user_namespace *mnt_userns,
+ void *value, size_t size)
{
struct user_namespace *user_ns = current_user_ns();
- if (user_ns == &init_user_ns)
+ if ((user_ns == &init_user_ns) && (mnt_userns == &init_user_ns))
return;
- posix_acl_fix_xattr_userns(&init_user_ns, user_ns, value, size);
+ posix_acl_fix_xattr_userns(&init_user_ns, user_ns, mnt_userns, value,
+ size, true);
}
-void posix_acl_fix_xattr_to_user(void *value, size_t size)
+void posix_acl_fix_xattr_to_user(struct user_namespace *mnt_userns,
+ void *value, size_t size)
{
struct user_namespace *user_ns = current_user_ns();
- if (user_ns == &init_user_ns)
+ if ((user_ns == &init_user_ns) && (mnt_userns == &init_user_ns))
return;
- posix_acl_fix_xattr_userns(user_ns, &init_user_ns, value, size);
+ posix_acl_fix_xattr_userns(user_ns, &init_user_ns, mnt_userns, value,
+ size, false);
}
/*
@@ -858,7 +900,8 @@ posix_acl_xattr_get(const struct xattr_handler *handler,
}
int
-set_posix_acl(struct inode *inode, int type, struct posix_acl *acl)
+set_posix_acl(struct user_namespace *mnt_userns, struct inode *inode,
+ int type, struct posix_acl *acl)
{
if (!IS_POSIXACL(inode))
return -EOPNOTSUPP;
@@ -867,7 +910,7 @@ set_posix_acl(struct inode *inode, int type, struct posix_acl *acl)
if (type == ACL_TYPE_DEFAULT && !S_ISDIR(inode->i_mode))
return acl ? -EACCES : 0;
- if (!inode_owner_or_capable(inode))
+ if (!inode_owner_or_capable(mnt_userns, inode))
return -EPERM;
if (acl) {
@@ -875,15 +918,16 @@ set_posix_acl(struct inode *inode, int type, struct posix_acl *acl)
if (ret)
return ret;
}
- return inode->i_op->set_acl(inode, acl, type);
+ return inode->i_op->set_acl(mnt_userns, inode, acl, type);
}
EXPORT_SYMBOL(set_posix_acl);
static int
posix_acl_xattr_set(const struct xattr_handler *handler,
- struct dentry *unused, struct inode *inode,
- const char *name, const void *value,
- size_t size, int flags)
+ struct user_namespace *mnt_userns,
+ struct dentry *unused, struct inode *inode,
+ const char *name, const void *value, size_t size,
+ int flags)
{
struct posix_acl *acl = NULL;
int ret;
@@ -893,7 +937,7 @@ posix_acl_xattr_set(const struct xattr_handler *handler,
if (IS_ERR(acl))
return PTR_ERR(acl);
}
- ret = set_posix_acl(inode, handler->flags, acl);
+ ret = set_posix_acl(mnt_userns, inode, handler->flags, acl);
posix_acl_release(acl);
return ret;
}
@@ -922,12 +966,13 @@ const struct xattr_handler posix_acl_default_xattr_handler = {
};
EXPORT_SYMBOL_GPL(posix_acl_default_xattr_handler);
-int simple_set_acl(struct inode *inode, struct posix_acl *acl, int type)
+int simple_set_acl(struct user_namespace *mnt_userns, struct inode *inode,
+ struct posix_acl *acl, int type)
{
int error;
if (type == ACL_TYPE_ACCESS) {
- error = posix_acl_update_mode(inode,
+ error = posix_acl_update_mode(mnt_userns, inode,
&inode->i_mode, &acl);
if (error)
return error;
diff --git a/fs/proc/base.c b/fs/proc/base.c
index b3422cda2a91..3851bfcdba56 100644
--- a/fs/proc/base.c
+++ b/fs/proc/base.c
@@ -67,7 +67,6 @@
#include <linux/mm.h>
#include <linux/swap.h>
#include <linux/rcupdate.h>
-#include <linux/kallsyms.h>
#include <linux/stacktrace.h>
#include <linux/resource.h>
#include <linux/module.h>
@@ -386,19 +385,17 @@ static int proc_pid_wchan(struct seq_file *m, struct pid_namespace *ns,
struct pid *pid, struct task_struct *task)
{
unsigned long wchan;
- char symname[KSYM_NAME_LEN];
- if (!ptrace_may_access(task, PTRACE_MODE_READ_FSCREDS))
- goto print0;
+ if (ptrace_may_access(task, PTRACE_MODE_READ_FSCREDS))
+ wchan = get_wchan(task);
+ else
+ wchan = 0;
- wchan = get_wchan(task);
- if (wchan && !lookup_symbol_name(wchan, symname)) {
- seq_puts(m, symname);
- return 0;
- }
+ if (wchan)
+ seq_printf(m, "%ps", (void *) wchan);
+ else
+ seq_putc(m, '0');
-print0:
- seq_putc(m, '0');
return 0;
}
#endif /* CONFIG_KALLSYMS */
@@ -685,7 +682,8 @@ static int proc_fd_access_allowed(struct inode *inode)
return allowed;
}
-int proc_setattr(struct dentry *dentry, struct iattr *attr)
+int proc_setattr(struct user_namespace *mnt_userns, struct dentry *dentry,
+ struct iattr *attr)
{
int error;
struct inode *inode = d_inode(dentry);
@@ -693,11 +691,11 @@ int proc_setattr(struct dentry *dentry, struct iattr *attr)
if (attr->ia_valid & ATTR_MODE)
return -EPERM;
- error = setattr_prepare(dentry, attr);
+ error = setattr_prepare(&init_user_ns, dentry, attr);
if (error)
return error;
- setattr_copy(inode, attr);
+ setattr_copy(&init_user_ns, inode, attr);
mark_inode_dirty(inode);
return 0;
}
@@ -726,7 +724,8 @@ static bool has_pid_permissions(struct proc_fs_info *fs_info,
}
-static int proc_pid_permission(struct inode *inode, int mask)
+static int proc_pid_permission(struct user_namespace *mnt_userns,
+ struct inode *inode, int mask)
{
struct proc_fs_info *fs_info = proc_sb_info(inode->i_sb);
struct task_struct *task;
@@ -751,7 +750,7 @@ static int proc_pid_permission(struct inode *inode, int mask)
return -EPERM;
}
- return generic_permission(inode, mask);
+ return generic_permission(&init_user_ns, inode, mask);
}
@@ -1927,14 +1926,14 @@ out_unlock:
return NULL;
}
-int pid_getattr(const struct path *path, struct kstat *stat,
- u32 request_mask, unsigned int query_flags)
+int pid_getattr(struct user_namespace *mnt_userns, const struct path *path,
+ struct kstat *stat, u32 request_mask, unsigned int query_flags)
{
struct inode *inode = d_inode(path->dentry);
struct proc_fs_info *fs_info = proc_sb_info(inode->i_sb);
struct task_struct *task;
- generic_fillattr(inode, stat);
+ generic_fillattr(&init_user_ns, inode, stat);
stat->uid = GLOBAL_ROOT_UID;
stat->gid = GLOBAL_ROOT_GID;
@@ -3473,7 +3472,8 @@ int proc_pid_readdir(struct file *file, struct dir_context *ctx)
* This function makes sure that the node is always accessible for members of
* same thread group.
*/
-static int proc_tid_comm_permission(struct inode *inode, int mask)
+static int proc_tid_comm_permission(struct user_namespace *mnt_userns,
+ struct inode *inode, int mask)
{
bool is_same_tgroup;
struct task_struct *task;
@@ -3492,7 +3492,7 @@ static int proc_tid_comm_permission(struct inode *inode, int mask)
return 0;
}
- return generic_permission(inode, mask);
+ return generic_permission(&init_user_ns, inode, mask);
}
static const struct inode_operations proc_tid_comm_inode_operations = {
@@ -3798,12 +3798,13 @@ static int proc_task_readdir(struct file *file, struct dir_context *ctx)
return 0;
}
-static int proc_task_getattr(const struct path *path, struct kstat *stat,
+static int proc_task_getattr(struct user_namespace *mnt_userns,
+ const struct path *path, struct kstat *stat,
u32 request_mask, unsigned int query_flags)
{
struct inode *inode = d_inode(path->dentry);
struct task_struct *p = get_proc_task(inode);
- generic_fillattr(inode, stat);
+ generic_fillattr(&init_user_ns, inode, stat);
if (p) {
stat->nlink += get_nr_threads(p);
diff --git a/fs/proc/fd.c b/fs/proc/fd.c
index cb51763ed554..07fc4fad2602 100644
--- a/fs/proc/fd.c
+++ b/fs/proc/fd.c
@@ -276,12 +276,13 @@ static struct dentry *proc_lookupfd(struct inode *dir, struct dentry *dentry,
* /proc/pid/fd needs a special permission handler so that a process can still
* access /proc/self/fd after it has executed a setuid().
*/
-int proc_fd_permission(struct inode *inode, int mask)
+int proc_fd_permission(struct user_namespace *mnt_userns,
+ struct inode *inode, int mask)
{
struct task_struct *p;
int rv;
- rv = generic_permission(inode, mask);
+ rv = generic_permission(&init_user_ns, inode, mask);
if (rv == 0)
return rv;
diff --git a/fs/proc/fd.h b/fs/proc/fd.h
index f371a602bf58..c5a921a06a0b 100644
--- a/fs/proc/fd.h
+++ b/fs/proc/fd.h
@@ -10,7 +10,8 @@ extern const struct inode_operations proc_fd_inode_operations;
extern const struct file_operations proc_fdinfo_operations;
extern const struct inode_operations proc_fdinfo_inode_operations;
-extern int proc_fd_permission(struct inode *inode, int mask);
+extern int proc_fd_permission(struct user_namespace *mnt_userns,
+ struct inode *inode, int mask);
static inline unsigned int proc_fd(struct inode *inode)
{
diff --git a/fs/proc/generic.c b/fs/proc/generic.c
index 6c0a05f55d6b..bc86aa87cc41 100644
--- a/fs/proc/generic.c
+++ b/fs/proc/generic.c
@@ -115,17 +115,18 @@ static bool pde_subdir_insert(struct proc_dir_entry *dir,
return true;
}
-static int proc_notify_change(struct dentry *dentry, struct iattr *iattr)
+static int proc_notify_change(struct user_namespace *mnt_userns,
+ struct dentry *dentry, struct iattr *iattr)
{
struct inode *inode = d_inode(dentry);
struct proc_dir_entry *de = PDE(inode);
int error;
- error = setattr_prepare(dentry, iattr);
+ error = setattr_prepare(&init_user_ns, dentry, iattr);
if (error)
return error;
- setattr_copy(inode, iattr);
+ setattr_copy(&init_user_ns, inode, iattr);
mark_inode_dirty(inode);
proc_set_user(de, inode->i_uid, inode->i_gid);
@@ -133,7 +134,8 @@ static int proc_notify_change(struct dentry *dentry, struct iattr *iattr)
return 0;
}
-static int proc_getattr(const struct path *path, struct kstat *stat,
+static int proc_getattr(struct user_namespace *mnt_userns,
+ const struct path *path, struct kstat *stat,
u32 request_mask, unsigned int query_flags)
{
struct inode *inode = d_inode(path->dentry);
@@ -145,7 +147,7 @@ static int proc_getattr(const struct path *path, struct kstat *stat,
}
}
- generic_fillattr(inode, stat);
+ generic_fillattr(&init_user_ns, inode, stat);
return 0;
}
diff --git a/fs/proc/internal.h b/fs/proc/internal.h
index f60b379dcdc7..03415f3fb3a8 100644
--- a/fs/proc/internal.h
+++ b/fs/proc/internal.h
@@ -162,8 +162,10 @@ extern int proc_pid_statm(struct seq_file *, struct pid_namespace *,
* base.c
*/
extern const struct dentry_operations pid_dentry_operations;
-extern int pid_getattr(const struct path *, struct kstat *, u32, unsigned int);
-extern int proc_setattr(struct dentry *, struct iattr *);
+extern int pid_getattr(struct user_namespace *, const struct path *,
+ struct kstat *, u32, unsigned int);
+extern int proc_setattr(struct user_namespace *, struct dentry *,
+ struct iattr *);
extern void proc_pid_evict_inode(struct proc_inode *);
extern struct inode *proc_pid_make_inode(struct super_block *, struct task_struct *, umode_t);
extern void pid_update_inode(struct task_struct *, struct inode *);
diff --git a/fs/proc/meminfo.c b/fs/proc/meminfo.c
index d6fc74619625..6fa761c9cc78 100644
--- a/fs/proc/meminfo.c
+++ b/fs/proc/meminfo.c
@@ -129,15 +129,15 @@ static int meminfo_proc_show(struct seq_file *m, void *v)
#ifdef CONFIG_TRANSPARENT_HUGEPAGE
show_val_kb(m, "AnonHugePages: ",
- global_node_page_state(NR_ANON_THPS) * HPAGE_PMD_NR);
+ global_node_page_state(NR_ANON_THPS));
show_val_kb(m, "ShmemHugePages: ",
- global_node_page_state(NR_SHMEM_THPS) * HPAGE_PMD_NR);
+ global_node_page_state(NR_SHMEM_THPS));
show_val_kb(m, "ShmemPmdMapped: ",
- global_node_page_state(NR_SHMEM_PMDMAPPED) * HPAGE_PMD_NR);
+ global_node_page_state(NR_SHMEM_PMDMAPPED));
show_val_kb(m, "FileHugePages: ",
- global_node_page_state(NR_FILE_THPS) * HPAGE_PMD_NR);
+ global_node_page_state(NR_FILE_THPS));
show_val_kb(m, "FilePmdMapped: ",
- global_node_page_state(NR_FILE_PMDMAPPED) * HPAGE_PMD_NR);
+ global_node_page_state(NR_FILE_PMDMAPPED));
#endif
#ifdef CONFIG_CMA
diff --git a/fs/proc/proc_net.c b/fs/proc/proc_net.c
index 18601042af99..15c2e55d2ed2 100644
--- a/fs/proc/proc_net.c
+++ b/fs/proc/proc_net.c
@@ -289,7 +289,8 @@ static struct dentry *proc_tgid_net_lookup(struct inode *dir,
return de;
}
-static int proc_tgid_net_getattr(const struct path *path, struct kstat *stat,
+static int proc_tgid_net_getattr(struct user_namespace *mnt_userns,
+ const struct path *path, struct kstat *stat,
u32 request_mask, unsigned int query_flags)
{
struct inode *inode = d_inode(path->dentry);
@@ -297,7 +298,7 @@ static int proc_tgid_net_getattr(const struct path *path, struct kstat *stat,
net = get_proc_task_net(inode);
- generic_fillattr(inode, stat);
+ generic_fillattr(&init_user_ns, inode, stat);
if (net != NULL) {
stat->nlink = net->proc_net->nlink;
diff --git a/fs/proc/proc_sysctl.c b/fs/proc/proc_sysctl.c
index d2018f70d1fa..984e42f8cb11 100644
--- a/fs/proc/proc_sysctl.c
+++ b/fs/proc/proc_sysctl.c
@@ -571,7 +571,7 @@ static ssize_t proc_sys_call_handler(struct kiocb *iocb, struct iov_iter *iter,
error = -ENOMEM;
if (count >= KMALLOC_MAX_SIZE)
goto out;
- kbuf = kzalloc(count + 1, GFP_KERNEL);
+ kbuf = kvzalloc(count + 1, GFP_KERNEL);
if (!kbuf)
goto out;
@@ -600,7 +600,7 @@ static ssize_t proc_sys_call_handler(struct kiocb *iocb, struct iov_iter *iter,
error = count;
out_free_buf:
- kfree(kbuf);
+ kvfree(kbuf);
out:
sysctl_head_finish(head);
@@ -785,7 +785,8 @@ out:
return 0;
}
-static int proc_sys_permission(struct inode *inode, int mask)
+static int proc_sys_permission(struct user_namespace *mnt_userns,
+ struct inode *inode, int mask)
{
/*
* sysctl entries that are not writeable,
@@ -813,7 +814,8 @@ static int proc_sys_permission(struct inode *inode, int mask)
return error;
}
-static int proc_sys_setattr(struct dentry *dentry, struct iattr *attr)
+static int proc_sys_setattr(struct user_namespace *mnt_userns,
+ struct dentry *dentry, struct iattr *attr)
{
struct inode *inode = d_inode(dentry);
int error;
@@ -821,16 +823,17 @@ static int proc_sys_setattr(struct dentry *dentry, struct iattr *attr)
if (attr->ia_valid & (ATTR_MODE | ATTR_UID | ATTR_GID))
return -EPERM;
- error = setattr_prepare(dentry, attr);
+ error = setattr_prepare(&init_user_ns, dentry, attr);
if (error)
return error;
- setattr_copy(inode, attr);
+ setattr_copy(&init_user_ns, inode, attr);
mark_inode_dirty(inode);
return 0;
}
-static int proc_sys_getattr(const struct path *path, struct kstat *stat,
+static int proc_sys_getattr(struct user_namespace *mnt_userns,
+ const struct path *path, struct kstat *stat,
u32 request_mask, unsigned int query_flags)
{
struct inode *inode = d_inode(path->dentry);
@@ -840,7 +843,7 @@ static int proc_sys_getattr(const struct path *path, struct kstat *stat,
if (IS_ERR(head))
return PTR_ERR(head);
- generic_fillattr(inode, stat);
+ generic_fillattr(&init_user_ns, inode, stat);
if (table)
stat->mode = (stat->mode & S_IFMT) | table->mode;
diff --git a/fs/proc/root.c b/fs/proc/root.c
index 5e444d4f9717..c7e3b1350ef8 100644
--- a/fs/proc/root.c
+++ b/fs/proc/root.c
@@ -308,10 +308,11 @@ void __init proc_root_init(void)
register_filesystem(&proc_fs_type);
}
-static int proc_root_getattr(const struct path *path, struct kstat *stat,
+static int proc_root_getattr(struct user_namespace *mnt_userns,
+ const struct path *path, struct kstat *stat,
u32 request_mask, unsigned int query_flags)
{
- generic_fillattr(d_inode(path->dentry), stat);
+ generic_fillattr(&init_user_ns, d_inode(path->dentry), stat);
stat->nlink = proc_root.nlink + nr_processes();
return 0;
}
diff --git a/fs/proc/vmcore.c b/fs/proc/vmcore.c
index c3a345c28a93..9a15334da208 100644
--- a/fs/proc/vmcore.c
+++ b/fs/proc/vmcore.c
@@ -1503,11 +1503,8 @@ int vmcore_add_device_dump(struct vmcoredd_data *data)
return 0;
out_err:
- if (buf)
- vfree(buf);
-
- if (dump)
- vfree(dump);
+ vfree(buf);
+ vfree(dump);
return ret;
}
diff --git a/fs/proc_namespace.c b/fs/proc_namespace.c
index eafb75755fa3..392ef5162655 100644
--- a/fs/proc_namespace.c
+++ b/fs/proc_namespace.c
@@ -79,6 +79,9 @@ static void show_mnt_opts(struct seq_file *m, struct vfsmount *mnt)
if (mnt->mnt_flags & fs_infop->flag)
seq_puts(m, fs_infop->str);
}
+
+ if (mnt_user_ns(mnt) != &init_user_ns)
+ seq_puts(m, ",idmapped");
}
static inline void mangle(struct seq_file *m, const char *s)
diff --git a/fs/pstore/zone.c b/fs/pstore/zone.c
index 5266ccbec007..7c8f8feac6c3 100644
--- a/fs/pstore/zone.c
+++ b/fs/pstore/zone.c
@@ -23,7 +23,7 @@
#include "internal.h"
/**
- * struct psz_head - header of zone to flush to storage
+ * struct psz_buffer - header of zone to flush to storage
*
* @sig: signature to indicate header (PSZ_SIG xor PSZONE-type value)
* @datalen: length of data in @data
diff --git a/fs/quota/quota_v2.c b/fs/quota/quota_v2.c
index c21106557a37..b1467f3921c2 100644
--- a/fs/quota/quota_v2.c
+++ b/fs/quota/quota_v2.c
@@ -164,19 +164,24 @@ static int v2_read_file_info(struct super_block *sb, int type)
quota_error(sb, "Number of blocks too big for quota file size (%llu > %llu).",
(loff_t)qinfo->dqi_blocks << qinfo->dqi_blocksize_bits,
i_size_read(sb_dqopt(sb)->files[type]));
- goto out;
+ goto out_free;
}
if (qinfo->dqi_free_blk >= qinfo->dqi_blocks) {
quota_error(sb, "Free block number too big (%u >= %u).",
qinfo->dqi_free_blk, qinfo->dqi_blocks);
- goto out;
+ goto out_free;
}
if (qinfo->dqi_free_entry >= qinfo->dqi_blocks) {
quota_error(sb, "Block with free entry too big (%u >= %u).",
qinfo->dqi_free_entry, qinfo->dqi_blocks);
- goto out;
+ goto out_free;
}
ret = 0;
+out_free:
+ if (ret) {
+ kfree(info->dqi_priv);
+ info->dqi_priv = NULL;
+ }
out:
up_read(&dqopt->dqio_sem);
return ret;
diff --git a/fs/ramfs/file-nommu.c b/fs/ramfs/file-nommu.c
index 355523f4a4bf..ba3525ccc27e 100644
--- a/fs/ramfs/file-nommu.c
+++ b/fs/ramfs/file-nommu.c
@@ -22,7 +22,7 @@
#include <linux/uaccess.h>
#include "internal.h"
-static int ramfs_nommu_setattr(struct dentry *, struct iattr *);
+static int ramfs_nommu_setattr(struct user_namespace *, struct dentry *, struct iattr *);
static unsigned long ramfs_nommu_get_unmapped_area(struct file *file,
unsigned long addr,
unsigned long len,
@@ -158,14 +158,15 @@ static int ramfs_nommu_resize(struct inode *inode, loff_t newsize, loff_t size)
* handle a change of attributes
* - we're specifically interested in a change of size
*/
-static int ramfs_nommu_setattr(struct dentry *dentry, struct iattr *ia)
+static int ramfs_nommu_setattr(struct user_namespace *mnt_userns,
+ struct dentry *dentry, struct iattr *ia)
{
struct inode *inode = d_inode(dentry);
unsigned int old_ia_valid = ia->ia_valid;
int ret = 0;
/* POSIX UID/GID verification for setting inode attributes */
- ret = setattr_prepare(dentry, ia);
+ ret = setattr_prepare(&init_user_ns, dentry, ia);
if (ret)
return ret;
@@ -185,7 +186,7 @@ static int ramfs_nommu_setattr(struct dentry *dentry, struct iattr *ia)
}
}
- setattr_copy(inode, ia);
+ setattr_copy(&init_user_ns, inode, ia);
out:
ia->ia_valid = old_ia_valid;
return ret;
diff --git a/fs/ramfs/inode.c b/fs/ramfs/inode.c
index ee179a81b3da..9ebd17d7befb 100644
--- a/fs/ramfs/inode.c
+++ b/fs/ramfs/inode.c
@@ -67,7 +67,7 @@ struct inode *ramfs_get_inode(struct super_block *sb,
if (inode) {
inode->i_ino = get_next_ino();
- inode_init_owner(inode, dir, mode);
+ inode_init_owner(&init_user_ns, inode, dir, mode);
inode->i_mapping->a_ops = &ramfs_aops;
mapping_set_gfp_mask(inode->i_mapping, GFP_HIGHUSER);
mapping_set_unevictable(inode->i_mapping);
@@ -101,7 +101,8 @@ struct inode *ramfs_get_inode(struct super_block *sb,
*/
/* SMP-safe */
static int
-ramfs_mknod(struct inode *dir, struct dentry *dentry, umode_t mode, dev_t dev)
+ramfs_mknod(struct user_namespace *mnt_userns, struct inode *dir,
+ struct dentry *dentry, umode_t mode, dev_t dev)
{
struct inode * inode = ramfs_get_inode(dir->i_sb, dir, mode, dev);
int error = -ENOSPC;
@@ -115,20 +116,23 @@ ramfs_mknod(struct inode *dir, struct dentry *dentry, umode_t mode, dev_t dev)
return error;
}
-static int ramfs_mkdir(struct inode * dir, struct dentry * dentry, umode_t mode)
+static int ramfs_mkdir(struct user_namespace *mnt_userns, struct inode *dir,
+ struct dentry *dentry, umode_t mode)
{
- int retval = ramfs_mknod(dir, dentry, mode | S_IFDIR, 0);
+ int retval = ramfs_mknod(&init_user_ns, dir, dentry, mode | S_IFDIR, 0);
if (!retval)
inc_nlink(dir);
return retval;
}
-static int ramfs_create(struct inode *dir, struct dentry *dentry, umode_t mode, bool excl)
+static int ramfs_create(struct user_namespace *mnt_userns, struct inode *dir,
+ struct dentry *dentry, umode_t mode, bool excl)
{
- return ramfs_mknod(dir, dentry, mode | S_IFREG, 0);
+ return ramfs_mknod(&init_user_ns, dir, dentry, mode | S_IFREG, 0);
}
-static int ramfs_symlink(struct inode * dir, struct dentry *dentry, const char * symname)
+static int ramfs_symlink(struct user_namespace *mnt_userns, struct inode *dir,
+ struct dentry *dentry, const char *symname)
{
struct inode *inode;
int error = -ENOSPC;
@@ -147,6 +151,18 @@ static int ramfs_symlink(struct inode * dir, struct dentry *dentry, const char *
return error;
}
+static int ramfs_tmpfile(struct user_namespace *mnt_userns,
+ struct inode *dir, struct dentry *dentry, umode_t mode)
+{
+ struct inode *inode;
+
+ inode = ramfs_get_inode(dir->i_sb, dir, mode, 0);
+ if (!inode)
+ return -ENOSPC;
+ d_tmpfile(dentry, inode);
+ return 0;
+}
+
static const struct inode_operations ramfs_dir_inode_operations = {
.create = ramfs_create,
.lookup = simple_lookup,
@@ -157,6 +173,7 @@ static const struct inode_operations ramfs_dir_inode_operations = {
.rmdir = simple_rmdir,
.mknod = ramfs_mknod,
.rename = simple_rename,
+ .tmpfile = ramfs_tmpfile,
};
/*
diff --git a/fs/reiserfs/acl.h b/fs/reiserfs/acl.h
index 0c1c847f992f..fd58618da360 100644
--- a/fs/reiserfs/acl.h
+++ b/fs/reiserfs/acl.h
@@ -49,7 +49,8 @@ static inline int reiserfs_acl_count(size_t size)
#ifdef CONFIG_REISERFS_FS_POSIX_ACL
struct posix_acl *reiserfs_get_acl(struct inode *inode, int type);
-int reiserfs_set_acl(struct inode *inode, struct posix_acl *acl, int type);
+int reiserfs_set_acl(struct user_namespace *mnt_userns, struct inode *inode,
+ struct posix_acl *acl, int type);
int reiserfs_acl_chmod(struct inode *inode);
int reiserfs_inherit_default_acl(struct reiserfs_transaction_handle *th,
struct inode *dir, struct dentry *dentry,
diff --git a/fs/reiserfs/inode.c b/fs/reiserfs/inode.c
index c76d563dec0e..780bb90c1804 100644
--- a/fs/reiserfs/inode.c
+++ b/fs/reiserfs/inode.c
@@ -3282,13 +3282,14 @@ static ssize_t reiserfs_direct_IO(struct kiocb *iocb, struct iov_iter *iter)
return ret;
}
-int reiserfs_setattr(struct dentry *dentry, struct iattr *attr)
+int reiserfs_setattr(struct user_namespace *mnt_userns, struct dentry *dentry,
+ struct iattr *attr)
{
struct inode *inode = d_inode(dentry);
unsigned int ia_valid;
int error;
- error = setattr_prepare(dentry, attr);
+ error = setattr_prepare(&init_user_ns, dentry, attr);
if (error)
return error;
@@ -3413,7 +3414,7 @@ int reiserfs_setattr(struct dentry *dentry, struct iattr *attr)
}
if (!error) {
- setattr_copy(inode, attr);
+ setattr_copy(&init_user_ns, inode, attr);
mark_inode_dirty(inode);
}
diff --git a/fs/reiserfs/ioctl.c b/fs/reiserfs/ioctl.c
index adb21bea3d60..4f1cbd930179 100644
--- a/fs/reiserfs/ioctl.c
+++ b/fs/reiserfs/ioctl.c
@@ -59,7 +59,7 @@ long reiserfs_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
if (err)
break;
- if (!inode_owner_or_capable(inode)) {
+ if (!inode_owner_or_capable(&init_user_ns, inode)) {
err = -EPERM;
goto setflags_out;
}
@@ -101,7 +101,7 @@ setflags_out:
err = put_user(inode->i_generation, (int __user *)arg);
break;
case REISERFS_IOC_SETVERSION:
- if (!inode_owner_or_capable(inode)) {
+ if (!inode_owner_or_capable(&init_user_ns, inode)) {
err = -EPERM;
break;
}
diff --git a/fs/reiserfs/namei.c b/fs/reiserfs/namei.c
index 1594687582f0..e6eb05e2b2f1 100644
--- a/fs/reiserfs/namei.c
+++ b/fs/reiserfs/namei.c
@@ -615,12 +615,12 @@ static int new_inode_init(struct inode *inode, struct inode *dir, umode_t mode)
* the quota init calls have to know who to charge the quota to, so
* we have to set uid and gid here
*/
- inode_init_owner(inode, dir, mode);
+ inode_init_owner(&init_user_ns, inode, dir, mode);
return dquot_initialize(inode);
}
-static int reiserfs_create(struct inode *dir, struct dentry *dentry, umode_t mode,
- bool excl)
+static int reiserfs_create(struct user_namespace *mnt_userns, struct inode *dir,
+ struct dentry *dentry, umode_t mode, bool excl)
{
int retval;
struct inode *inode;
@@ -698,8 +698,8 @@ out_failed:
return retval;
}
-static int reiserfs_mknod(struct inode *dir, struct dentry *dentry, umode_t mode,
- dev_t rdev)
+static int reiserfs_mknod(struct user_namespace *mnt_userns, struct inode *dir,
+ struct dentry *dentry, umode_t mode, dev_t rdev)
{
int retval;
struct inode *inode;
@@ -781,7 +781,8 @@ out_failed:
return retval;
}
-static int reiserfs_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode)
+static int reiserfs_mkdir(struct user_namespace *mnt_userns, struct inode *dir,
+ struct dentry *dentry, umode_t mode)
{
int retval;
struct inode *inode;
@@ -1094,8 +1095,9 @@ out_unlink:
return retval;
}
-static int reiserfs_symlink(struct inode *parent_dir,
- struct dentry *dentry, const char *symname)
+static int reiserfs_symlink(struct user_namespace *mnt_userns,
+ struct inode *parent_dir, struct dentry *dentry,
+ const char *symname)
{
int retval;
struct inode *inode;
@@ -1304,7 +1306,8 @@ static void set_ino_in_dir_entry(struct reiserfs_dir_entry *de,
* one path. If it holds 2 or more, it can get into endless waiting in
* get_empty_nodes or its clones
*/
-static int reiserfs_rename(struct inode *old_dir, struct dentry *old_dentry,
+static int reiserfs_rename(struct user_namespace *mnt_userns,
+ struct inode *old_dir, struct dentry *old_dentry,
struct inode *new_dir, struct dentry *new_dentry,
unsigned int flags)
{
diff --git a/fs/reiserfs/reiserfs.h b/fs/reiserfs/reiserfs.h
index f69871516167..0ca2ac62e534 100644
--- a/fs/reiserfs/reiserfs.h
+++ b/fs/reiserfs/reiserfs.h
@@ -3102,7 +3102,8 @@ static inline void reiserfs_update_sd(struct reiserfs_transaction_handle *th,
}
void sd_attrs_to_i_attrs(__u16 sd_attrs, struct inode *inode);
-int reiserfs_setattr(struct dentry *dentry, struct iattr *attr);
+int reiserfs_setattr(struct user_namespace *mnt_userns, struct dentry *dentry,
+ struct iattr *attr);
int __reiserfs_write_begin(struct page *page, unsigned from, unsigned len);
diff --git a/fs/reiserfs/xattr.c b/fs/reiserfs/xattr.c
index fe63a7c3e0da..bd073836e141 100644
--- a/fs/reiserfs/xattr.c
+++ b/fs/reiserfs/xattr.c
@@ -66,14 +66,14 @@
static int xattr_create(struct inode *dir, struct dentry *dentry, int mode)
{
BUG_ON(!inode_is_locked(dir));
- return dir->i_op->create(dir, dentry, mode, true);
+ return dir->i_op->create(&init_user_ns, dir, dentry, mode, true);
}
#endif
static int xattr_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode)
{
BUG_ON(!inode_is_locked(dir));
- return dir->i_op->mkdir(dir, dentry, mode);
+ return dir->i_op->mkdir(&init_user_ns, dir, dentry, mode);
}
/*
@@ -352,7 +352,7 @@ static int chown_one_xattr(struct dentry *dentry, void *data)
* ATTR_MODE is set.
*/
attrs->ia_valid &= (ATTR_UID|ATTR_GID);
- err = reiserfs_setattr(dentry, attrs);
+ err = reiserfs_setattr(&init_user_ns, dentry, attrs);
attrs->ia_valid = ia_valid;
return err;
@@ -604,7 +604,7 @@ reiserfs_xattr_set_handle(struct reiserfs_transaction_handle *th,
inode_lock_nested(d_inode(dentry), I_MUTEX_XATTR);
inode_dio_wait(d_inode(dentry));
- err = reiserfs_setattr(dentry, &newattrs);
+ err = reiserfs_setattr(&init_user_ns, dentry, &newattrs);
inode_unlock(d_inode(dentry));
} else
update_ctime(inode);
@@ -948,7 +948,8 @@ static int xattr_mount_check(struct super_block *s)
return 0;
}
-int reiserfs_permission(struct inode *inode, int mask)
+int reiserfs_permission(struct user_namespace *mnt_userns, struct inode *inode,
+ int mask)
{
/*
* We don't do permission checks on the internal objects.
@@ -957,7 +958,7 @@ int reiserfs_permission(struct inode *inode, int mask)
if (IS_PRIVATE(inode))
return 0;
- return generic_permission(inode, mask);
+ return generic_permission(&init_user_ns, inode, mask);
}
static int xattr_hide_revalidate(struct dentry *dentry, unsigned int flags)
diff --git a/fs/reiserfs/xattr.h b/fs/reiserfs/xattr.h
index c764352447ba..9b3b06da568c 100644
--- a/fs/reiserfs/xattr.h
+++ b/fs/reiserfs/xattr.h
@@ -16,7 +16,8 @@ int reiserfs_xattr_init(struct super_block *sb, int mount_flags);
int reiserfs_lookup_privroot(struct super_block *sb);
int reiserfs_delete_xattrs(struct inode *inode);
int reiserfs_chown_xattrs(struct inode *inode, struct iattr *attrs);
-int reiserfs_permission(struct inode *inode, int mask);
+int reiserfs_permission(struct user_namespace *mnt_userns,
+ struct inode *inode, int mask);
#ifdef CONFIG_REISERFS_FS_XATTR
#define has_xattr_dir(inode) (REISERFS_I(inode)->i_flags & i_has_xattr_dir)
diff --git a/fs/reiserfs/xattr_acl.c b/fs/reiserfs/xattr_acl.c
index ccd40df6eb45..a9547144a099 100644
--- a/fs/reiserfs/xattr_acl.c
+++ b/fs/reiserfs/xattr_acl.c
@@ -18,7 +18,8 @@ static int __reiserfs_set_acl(struct reiserfs_transaction_handle *th,
int
-reiserfs_set_acl(struct inode *inode, struct posix_acl *acl, int type)
+reiserfs_set_acl(struct user_namespace *mnt_userns, struct inode *inode,
+ struct posix_acl *acl, int type)
{
int error, error2;
struct reiserfs_transaction_handle th;
@@ -40,7 +41,8 @@ reiserfs_set_acl(struct inode *inode, struct posix_acl *acl, int type)
reiserfs_write_unlock(inode->i_sb);
if (error == 0) {
if (type == ACL_TYPE_ACCESS && acl) {
- error = posix_acl_update_mode(inode, &mode, &acl);
+ error = posix_acl_update_mode(&init_user_ns, inode,
+ &mode, &acl);
if (error)
goto unlock;
update_mode = 1;
@@ -399,5 +401,5 @@ int reiserfs_acl_chmod(struct inode *inode)
!reiserfs_posixacl(inode->i_sb))
return 0;
- return posix_acl_chmod(inode, inode->i_mode);
+ return posix_acl_chmod(&init_user_ns, inode, inode->i_mode);
}
diff --git a/fs/reiserfs/xattr_security.c b/fs/reiserfs/xattr_security.c
index 20be9a0e5870..8965c8e5e172 100644
--- a/fs/reiserfs/xattr_security.c
+++ b/fs/reiserfs/xattr_security.c
@@ -21,7 +21,8 @@ security_get(const struct xattr_handler *handler, struct dentry *unused,
}
static int
-security_set(const struct xattr_handler *handler, struct dentry *unused,
+security_set(const struct xattr_handler *handler,
+ struct user_namespace *mnt_userns, struct dentry *unused,
struct inode *inode, const char *name, const void *buffer,
size_t size, int flags)
{
diff --git a/fs/reiserfs/xattr_trusted.c b/fs/reiserfs/xattr_trusted.c
index 5ed48da3d02b..d853cea2afcd 100644
--- a/fs/reiserfs/xattr_trusted.c
+++ b/fs/reiserfs/xattr_trusted.c
@@ -20,7 +20,8 @@ trusted_get(const struct xattr_handler *handler, struct dentry *unused,
}
static int
-trusted_set(const struct xattr_handler *handler, struct dentry *unused,
+trusted_set(const struct xattr_handler *handler,
+ struct user_namespace *mnt_userns, struct dentry *unused,
struct inode *inode, const char *name, const void *buffer,
size_t size, int flags)
{
diff --git a/fs/reiserfs/xattr_user.c b/fs/reiserfs/xattr_user.c
index a573ca45bacc..65d9cd10a5ea 100644
--- a/fs/reiserfs/xattr_user.c
+++ b/fs/reiserfs/xattr_user.c
@@ -18,7 +18,8 @@ user_get(const struct xattr_handler *handler, struct dentry *unused,
}
static int
-user_set(const struct xattr_handler *handler, struct dentry *unused,
+user_set(const struct xattr_handler *handler, struct user_namespace *mnt_userns,
+ struct dentry *unused,
struct inode *inode, const char *name, const void *buffer,
size_t size, int flags)
{
diff --git a/fs/remap_range.c b/fs/remap_range.c
index 77dba3a49e65..e4a5fdd7ad7b 100644
--- a/fs/remap_range.c
+++ b/fs/remap_range.c
@@ -432,13 +432,16 @@ EXPORT_SYMBOL(vfs_clone_file_range);
/* Check whether we are allowed to dedupe the destination file */
static bool allow_file_dedupe(struct file *file)
{
+ struct user_namespace *mnt_userns = file_mnt_user_ns(file);
+ struct inode *inode = file_inode(file);
+
if (capable(CAP_SYS_ADMIN))
return true;
if (file->f_mode & FMODE_WRITE)
return true;
- if (uid_eq(current_fsuid(), file_inode(file)->i_uid))
+ if (uid_eq(current_fsuid(), i_uid_into_mnt(mnt_userns, inode)))
return true;
- if (!inode_permission(file_inode(file), MAY_WRITE))
+ if (!inode_permission(mnt_userns, inode, MAY_WRITE))
return true;
return false;
}
diff --git a/fs/seq_file.c b/fs/seq_file.c
index 03a369ccd28c..cb11a34fb871 100644
--- a/fs/seq_file.c
+++ b/fs/seq_file.c
@@ -669,7 +669,8 @@ void seq_puts(struct seq_file *m, const char *s)
EXPORT_SYMBOL(seq_puts);
/**
- * A helper routine for putting decimal numbers without rich format of printf().
+ * seq_put_decimal_ull_width - A helper routine for putting decimal numbers
+ * without rich format of printf().
* only 'unsigned long long' is supported.
* @m: seq_file identifying the buffer to which data should be written
* @delimiter: a string which is printed before the number
@@ -1044,7 +1045,7 @@ struct hlist_node *seq_hlist_next_rcu(void *v,
EXPORT_SYMBOL(seq_hlist_next_rcu);
/**
- * seq_hlist_start_precpu - start an iteration of a percpu hlist array
+ * seq_hlist_start_percpu - start an iteration of a percpu hlist array
* @head: pointer to percpu array of struct hlist_heads
* @cpu: pointer to cpu "cursor"
* @pos: start position of sequence
diff --git a/fs/stat.c b/fs/stat.c
index dacecdda2e79..fbc171d038aa 100644
--- a/fs/stat.c
+++ b/fs/stat.c
@@ -26,21 +26,29 @@
/**
* generic_fillattr - Fill in the basic attributes from the inode struct
- * @inode: Inode to use as the source
- * @stat: Where to fill in the attributes
+ * @mnt_userns: user namespace of the mount the inode was found from
+ * @inode: Inode to use as the source
+ * @stat: Where to fill in the attributes
*
* Fill in the basic attributes in the kstat structure from data that's to be
* found on the VFS inode structure. This is the default if no getattr inode
* operation is supplied.
+ *
+ * If the inode has been found through an idmapped mount the user namespace of
+ * the vfsmount must be passed through @mnt_userns. This function will then
+ * take care to map the inode according to @mnt_userns before filling in the
+ * uid and gid filds. On non-idmapped mounts or if permission checking is to be
+ * performed on the raw inode simply passs init_user_ns.
*/
-void generic_fillattr(struct inode *inode, struct kstat *stat)
+void generic_fillattr(struct user_namespace *mnt_userns, struct inode *inode,
+ struct kstat *stat)
{
stat->dev = inode->i_sb->s_dev;
stat->ino = inode->i_ino;
stat->mode = inode->i_mode;
stat->nlink = inode->i_nlink;
- stat->uid = inode->i_uid;
- stat->gid = inode->i_gid;
+ stat->uid = i_uid_into_mnt(mnt_userns, inode);
+ stat->gid = i_gid_into_mnt(mnt_userns, inode);
stat->rdev = inode->i_rdev;
stat->size = i_size_read(inode);
stat->atime = inode->i_atime;
@@ -67,6 +75,7 @@ EXPORT_SYMBOL(generic_fillattr);
int vfs_getattr_nosec(const struct path *path, struct kstat *stat,
u32 request_mask, unsigned int query_flags)
{
+ struct user_namespace *mnt_userns;
struct inode *inode = d_backing_inode(path->dentry);
memset(stat, 0, sizeof(*stat));
@@ -83,11 +92,12 @@ int vfs_getattr_nosec(const struct path *path, struct kstat *stat,
if (IS_DAX(inode))
stat->attributes |= STATX_ATTR_DAX;
+ mnt_userns = mnt_user_ns(path->mnt);
if (inode->i_op->getattr)
- return inode->i_op->getattr(path, stat, request_mask,
- query_flags);
+ return inode->i_op->getattr(mnt_userns, path, stat,
+ request_mask, query_flags);
- generic_fillattr(inode, stat);
+ generic_fillattr(mnt_userns, inode, stat);
return 0;
}
EXPORT_SYMBOL(vfs_getattr_nosec);
diff --git a/fs/super.c b/fs/super.c
index 5a1f384ffc74..8c1baca35c16 100644
--- a/fs/super.c
+++ b/fs/super.c
@@ -1719,12 +1719,6 @@ int freeze_super(struct super_block *sb)
}
EXPORT_SYMBOL(freeze_super);
-/**
- * thaw_super -- unlock filesystem
- * @sb: the super to thaw
- *
- * Unlocks the filesystem and marks it writeable again after freeze_super().
- */
static int thaw_super_locked(struct super_block *sb)
{
int error;
@@ -1760,6 +1754,12 @@ out:
return 0;
}
+/**
+ * thaw_super -- unlock filesystem
+ * @sb: the super to thaw
+ *
+ * Unlocks the filesystem and marks it writeable again after freeze_super().
+ */
int thaw_super(struct super_block *sb)
{
down_write(&sb->s_umount);
diff --git a/fs/sysfs/file.c b/fs/sysfs/file.c
index 96d0da65e088..9aefa7779b29 100644
--- a/fs/sysfs/file.c
+++ b/fs/sysfs/file.c
@@ -170,6 +170,16 @@ static int sysfs_kf_bin_mmap(struct kernfs_open_file *of,
return battr->mmap(of->file, kobj, battr, vma);
}
+static int sysfs_kf_bin_open(struct kernfs_open_file *of)
+{
+ struct bin_attribute *battr = of->kn->priv;
+
+ if (battr->mapping)
+ of->file->f_mapping = battr->mapping;
+
+ return 0;
+}
+
void sysfs_notify(struct kobject *kobj, const char *dir, const char *attr)
{
struct kernfs_node *kn = kobj->sd, *tmp;
@@ -241,6 +251,7 @@ static const struct kernfs_ops sysfs_bin_kfops_mmap = {
.read = sysfs_kf_bin_read,
.write = sysfs_kf_bin_write,
.mmap = sysfs_kf_bin_mmap,
+ .open = sysfs_kf_bin_open,
};
int sysfs_add_file_mode_ns(struct kernfs_node *parent,
diff --git a/fs/sysv/file.c b/fs/sysv/file.c
index 45fc79a18594..90e00124ea07 100644
--- a/fs/sysv/file.c
+++ b/fs/sysv/file.c
@@ -29,12 +29,13 @@ const struct file_operations sysv_file_operations = {
.splice_read = generic_file_splice_read,
};
-static int sysv_setattr(struct dentry *dentry, struct iattr *attr)
+static int sysv_setattr(struct user_namespace *mnt_userns,
+ struct dentry *dentry, struct iattr *attr)
{
struct inode *inode = d_inode(dentry);
int error;
- error = setattr_prepare(dentry, attr);
+ error = setattr_prepare(&init_user_ns, dentry, attr);
if (error)
return error;
@@ -47,7 +48,7 @@ static int sysv_setattr(struct dentry *dentry, struct iattr *attr)
sysv_truncate(inode);
}
- setattr_copy(inode, attr);
+ setattr_copy(&init_user_ns, inode, attr);
mark_inode_dirty(inode);
return 0;
}
diff --git a/fs/sysv/ialloc.c b/fs/sysv/ialloc.c
index 6c9801986af6..50df794a3c1f 100644
--- a/fs/sysv/ialloc.c
+++ b/fs/sysv/ialloc.c
@@ -163,7 +163,7 @@ struct inode * sysv_new_inode(const struct inode * dir, umode_t mode)
*sbi->s_sb_fic_count = cpu_to_fs16(sbi, count);
fs16_add(sbi, sbi->s_sb_total_free_inodes, -1);
dirty_sb(sb);
- inode_init_owner(inode, dir, mode);
+ inode_init_owner(&init_user_ns, inode, dir, mode);
inode->i_ino = fs16_to_cpu(sbi, ino);
inode->i_mtime = inode->i_atime = inode->i_ctime = current_time(inode);
inode->i_blocks = 0;
diff --git a/fs/sysv/itree.c b/fs/sysv/itree.c
index bcb67b0cabe7..8b2e99b7bc9f 100644
--- a/fs/sysv/itree.c
+++ b/fs/sysv/itree.c
@@ -441,11 +441,11 @@ static unsigned sysv_nblocks(struct super_block *s, loff_t size)
return blocks;
}
-int sysv_getattr(const struct path *path, struct kstat *stat,
- u32 request_mask, unsigned int flags)
+int sysv_getattr(struct user_namespace *mnt_userns, const struct path *path,
+ struct kstat *stat, u32 request_mask, unsigned int flags)
{
struct super_block *s = path->dentry->d_sb;
- generic_fillattr(d_inode(path->dentry), stat);
+ generic_fillattr(&init_user_ns, d_inode(path->dentry), stat);
stat->blocks = (s->s_blocksize / 512) * sysv_nblocks(s, stat->size);
stat->blksize = s->s_blocksize;
return 0;
diff --git a/fs/sysv/namei.c b/fs/sysv/namei.c
index ea2414b385ec..b2e6abc06a2d 100644
--- a/fs/sysv/namei.c
+++ b/fs/sysv/namei.c
@@ -41,7 +41,8 @@ static struct dentry *sysv_lookup(struct inode * dir, struct dentry * dentry, un
return d_splice_alias(inode, dentry);
}
-static int sysv_mknod(struct inode * dir, struct dentry * dentry, umode_t mode, dev_t rdev)
+static int sysv_mknod(struct user_namespace *mnt_userns, struct inode *dir,
+ struct dentry *dentry, umode_t mode, dev_t rdev)
{
struct inode * inode;
int err;
@@ -60,13 +61,14 @@ static int sysv_mknod(struct inode * dir, struct dentry * dentry, umode_t mode,
return err;
}
-static int sysv_create(struct inode * dir, struct dentry * dentry, umode_t mode, bool excl)
+static int sysv_create(struct user_namespace *mnt_userns, struct inode *dir,
+ struct dentry *dentry, umode_t mode, bool excl)
{
- return sysv_mknod(dir, dentry, mode, 0);
+ return sysv_mknod(&init_user_ns, dir, dentry, mode, 0);
}
-static int sysv_symlink(struct inode * dir, struct dentry * dentry,
- const char * symname)
+static int sysv_symlink(struct user_namespace *mnt_userns, struct inode *dir,
+ struct dentry *dentry, const char *symname)
{
int err = -ENAMETOOLONG;
int l = strlen(symname)+1;
@@ -108,7 +110,8 @@ static int sysv_link(struct dentry * old_dentry, struct inode * dir,
return add_nondir(dentry, inode);
}
-static int sysv_mkdir(struct inode * dir, struct dentry *dentry, umode_t mode)
+static int sysv_mkdir(struct user_namespace *mnt_userns, struct inode *dir,
+ struct dentry *dentry, umode_t mode)
{
struct inode * inode;
int err;
@@ -186,9 +189,9 @@ static int sysv_rmdir(struct inode * dir, struct dentry * dentry)
* Anybody can rename anything with this: the permission checks are left to the
* higher-level routines.
*/
-static int sysv_rename(struct inode * old_dir, struct dentry * old_dentry,
- struct inode * new_dir, struct dentry * new_dentry,
- unsigned int flags)
+static int sysv_rename(struct user_namespace *mnt_userns, struct inode *old_dir,
+ struct dentry *old_dentry, struct inode *new_dir,
+ struct dentry *new_dentry, unsigned int flags)
{
struct inode * old_inode = d_inode(old_dentry);
struct inode * new_inode = d_inode(new_dentry);
diff --git a/fs/sysv/sysv.h b/fs/sysv/sysv.h
index 1cff585526b1..99ddf033da4f 100644
--- a/fs/sysv/sysv.h
+++ b/fs/sysv/sysv.h
@@ -141,7 +141,8 @@ extern struct inode *sysv_iget(struct super_block *, unsigned int);
extern int sysv_write_inode(struct inode *, struct writeback_control *wbc);
extern int sysv_sync_inode(struct inode *);
extern void sysv_set_inode(struct inode *, dev_t);
-extern int sysv_getattr(const struct path *, struct kstat *, u32, unsigned int);
+extern int sysv_getattr(struct user_namespace *, const struct path *,
+ struct kstat *, u32, unsigned int);
extern int sysv_init_icache(void);
extern void sysv_destroy_icache(void);
diff --git a/fs/tracefs/inode.c b/fs/tracefs/inode.c
index 0ee8c6dfb036..4b83cbded559 100644
--- a/fs/tracefs/inode.c
+++ b/fs/tracefs/inode.c
@@ -67,7 +67,9 @@ static char *get_dname(struct dentry *dentry)
return name;
}
-static int tracefs_syscall_mkdir(struct inode *inode, struct dentry *dentry, umode_t mode)
+static int tracefs_syscall_mkdir(struct user_namespace *mnt_userns,
+ struct inode *inode, struct dentry *dentry,
+ umode_t mode)
{
char *name;
int ret;
diff --git a/fs/ubifs/dir.c b/fs/ubifs/dir.c
index 9a6b8660425a..d9d8d7794eff 100644
--- a/fs/ubifs/dir.c
+++ b/fs/ubifs/dir.c
@@ -94,7 +94,7 @@ struct inode *ubifs_new_inode(struct ubifs_info *c, struct inode *dir,
*/
inode->i_flags |= S_NOCMTIME;
- inode_init_owner(inode, dir, mode);
+ inode_init_owner(&init_user_ns, inode, dir, mode);
inode->i_mtime = inode->i_atime = inode->i_ctime =
current_time(inode);
inode->i_mapping->nrpages = 0;
@@ -280,8 +280,8 @@ static int ubifs_prepare_create(struct inode *dir, struct dentry *dentry,
return fscrypt_setup_filename(dir, &dentry->d_name, 0, nm);
}
-static int ubifs_create(struct inode *dir, struct dentry *dentry, umode_t mode,
- bool excl)
+static int ubifs_create(struct user_namespace *mnt_userns, struct inode *dir,
+ struct dentry *dentry, umode_t mode, bool excl)
{
struct inode *inode;
struct ubifs_info *c = dir->i_sb->s_fs_info;
@@ -441,8 +441,8 @@ out_budg:
return err;
}
-static int ubifs_tmpfile(struct inode *dir, struct dentry *dentry,
- umode_t mode)
+static int ubifs_tmpfile(struct user_namespace *mnt_userns, struct inode *dir,
+ struct dentry *dentry, umode_t mode)
{
return do_tmpfile(dir, dentry, mode, NULL);
}
@@ -942,7 +942,8 @@ out_fname:
return err;
}
-static int ubifs_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode)
+static int ubifs_mkdir(struct user_namespace *mnt_userns, struct inode *dir,
+ struct dentry *dentry, umode_t mode)
{
struct inode *inode;
struct ubifs_inode *dir_ui = ubifs_inode(dir);
@@ -1013,8 +1014,8 @@ out_budg:
return err;
}
-static int ubifs_mknod(struct inode *dir, struct dentry *dentry,
- umode_t mode, dev_t rdev)
+static int ubifs_mknod(struct user_namespace *mnt_userns, struct inode *dir,
+ struct dentry *dentry, umode_t mode, dev_t rdev)
{
struct inode *inode;
struct ubifs_inode *ui;
@@ -1102,8 +1103,8 @@ out_budg:
return err;
}
-static int ubifs_symlink(struct inode *dir, struct dentry *dentry,
- const char *symname)
+static int ubifs_symlink(struct user_namespace *mnt_userns, struct inode *dir,
+ struct dentry *dentry, const char *symname)
{
struct inode *inode;
struct ubifs_inode *ui;
@@ -1542,7 +1543,8 @@ static int ubifs_xrename(struct inode *old_dir, struct dentry *old_dentry,
return err;
}
-static int ubifs_rename(struct inode *old_dir, struct dentry *old_dentry,
+static int ubifs_rename(struct user_namespace *mnt_userns,
+ struct inode *old_dir, struct dentry *old_dentry,
struct inode *new_dir, struct dentry *new_dentry,
unsigned int flags)
{
@@ -1566,8 +1568,8 @@ static int ubifs_rename(struct inode *old_dir, struct dentry *old_dentry,
return do_rename(old_dir, old_dentry, new_dir, new_dentry, flags);
}
-int ubifs_getattr(const struct path *path, struct kstat *stat,
- u32 request_mask, unsigned int flags)
+int ubifs_getattr(struct user_namespace *mnt_userns, const struct path *path,
+ struct kstat *stat, u32 request_mask, unsigned int flags)
{
loff_t size;
struct inode *inode = d_inode(path->dentry);
@@ -1589,7 +1591,7 @@ int ubifs_getattr(const struct path *path, struct kstat *stat,
STATX_ATTR_ENCRYPTED |
STATX_ATTR_IMMUTABLE);
- generic_fillattr(inode, stat);
+ generic_fillattr(&init_user_ns, inode, stat);
stat->blksize = UBIFS_BLOCK_SIZE;
stat->size = ui->ui_size;
diff --git a/fs/ubifs/file.c b/fs/ubifs/file.c
index 2bc7780d2963..0e4b4be3aa26 100644
--- a/fs/ubifs/file.c
+++ b/fs/ubifs/file.c
@@ -1257,7 +1257,8 @@ static int do_setattr(struct ubifs_info *c, struct inode *inode,
return err;
}
-int ubifs_setattr(struct dentry *dentry, struct iattr *attr)
+int ubifs_setattr(struct user_namespace *mnt_userns, struct dentry *dentry,
+ struct iattr *attr)
{
int err;
struct inode *inode = d_inode(dentry);
@@ -1265,7 +1266,7 @@ int ubifs_setattr(struct dentry *dentry, struct iattr *attr)
dbg_gen("ino %lu, mode %#x, ia_valid %#x",
inode->i_ino, inode->i_mode, attr->ia_valid);
- err = setattr_prepare(dentry, attr);
+ err = setattr_prepare(&init_user_ns, dentry, attr);
if (err)
return err;
diff --git a/fs/ubifs/ioctl.c b/fs/ubifs/ioctl.c
index 4363d85a3fd4..2326d5122beb 100644
--- a/fs/ubifs/ioctl.c
+++ b/fs/ubifs/ioctl.c
@@ -155,7 +155,7 @@ long ubifs_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
if (IS_RDONLY(inode))
return -EROFS;
- if (!inode_owner_or_capable(inode))
+ if (!inode_owner_or_capable(&init_user_ns, inode))
return -EACCES;
if (get_user(flags, (int __user *) arg))
diff --git a/fs/ubifs/ubifs.h b/fs/ubifs/ubifs.h
index fc2cdde3b549..7fdfdbda4b8a 100644
--- a/fs/ubifs/ubifs.h
+++ b/fs/ubifs/ubifs.h
@@ -1989,13 +1989,14 @@ int ubifs_calc_dark(const struct ubifs_info *c, int spc);
/* file.c */
int ubifs_fsync(struct file *file, loff_t start, loff_t end, int datasync);
-int ubifs_setattr(struct dentry *dentry, struct iattr *attr);
+int ubifs_setattr(struct user_namespace *mnt_userns, struct dentry *dentry,
+ struct iattr *attr);
int ubifs_update_time(struct inode *inode, struct timespec64 *time, int flags);
/* dir.c */
struct inode *ubifs_new_inode(struct ubifs_info *c, struct inode *dir,
umode_t mode);
-int ubifs_getattr(const struct path *path, struct kstat *stat,
+int ubifs_getattr(struct user_namespace *mnt_userns, const struct path *path, struct kstat *stat,
u32 request_mask, unsigned int flags);
int ubifs_check_dir_empty(struct inode *dir);
diff --git a/fs/ubifs/xattr.c b/fs/ubifs/xattr.c
index 842d5f14545d..6b1e9830b274 100644
--- a/fs/ubifs/xattr.c
+++ b/fs/ubifs/xattr.c
@@ -681,6 +681,7 @@ static int xattr_get(const struct xattr_handler *handler,
}
static int xattr_set(const struct xattr_handler *handler,
+ struct user_namespace *mnt_userns,
struct dentry *dentry, struct inode *inode,
const char *name, const void *value,
size_t size, int flags)
diff --git a/fs/udf/file.c b/fs/udf/file.c
index ad8eefad27d7..2846dcd92197 100644
--- a/fs/udf/file.c
+++ b/fs/udf/file.c
@@ -183,7 +183,7 @@ long udf_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
long old_block, new_block;
int result;
- if (inode_permission(inode, MAY_READ) != 0) {
+ if (file_permission(filp, MAY_READ) != 0) {
udf_debug("no permission to access inode %lu\n", inode->i_ino);
return -EPERM;
}
@@ -253,13 +253,14 @@ const struct file_operations udf_file_operations = {
.llseek = generic_file_llseek,
};
-static int udf_setattr(struct dentry *dentry, struct iattr *attr)
+static int udf_setattr(struct user_namespace *mnt_userns, struct dentry *dentry,
+ struct iattr *attr)
{
struct inode *inode = d_inode(dentry);
struct super_block *sb = inode->i_sb;
int error;
- error = setattr_prepare(dentry, attr);
+ error = setattr_prepare(&init_user_ns, dentry, attr);
if (error)
return error;
@@ -282,7 +283,7 @@ static int udf_setattr(struct dentry *dentry, struct iattr *attr)
if (attr->ia_valid & ATTR_MODE)
udf_update_extra_perms(inode, attr->ia_mode);
- setattr_copy(inode, attr);
+ setattr_copy(&init_user_ns, inode, attr);
mark_inode_dirty(inode);
return 0;
}
diff --git a/fs/udf/ialloc.c b/fs/udf/ialloc.c
index 84ed23edebfd..2ecf0e87660e 100644
--- a/fs/udf/ialloc.c
+++ b/fs/udf/ialloc.c
@@ -103,7 +103,7 @@ struct inode *udf_new_inode(struct inode *dir, umode_t mode)
mutex_unlock(&sbi->s_alloc_mutex);
}
- inode_init_owner(inode, dir, mode);
+ inode_init_owner(&init_user_ns, inode, dir, mode);
if (UDF_QUERY_FLAG(sb, UDF_FLAG_UID_SET))
inode->i_uid = sbi->s_uid;
if (UDF_QUERY_FLAG(sb, UDF_FLAG_GID_SET))
diff --git a/fs/udf/inode.c b/fs/udf/inode.c
index bb89c3e43212..0dd2f93ac048 100644
--- a/fs/udf/inode.c
+++ b/fs/udf/inode.c
@@ -544,11 +544,14 @@ static int udf_do_extend_file(struct inode *inode,
udf_write_aext(inode, last_pos, &last_ext->extLocation,
last_ext->extLength, 1);
+
/*
- * We've rewritten the last extent but there may be empty
- * indirect extent after it - enter it.
+ * We've rewritten the last extent. If we are going to add
+ * more extents, we may need to enter possible following
+ * empty indirect extent.
*/
- udf_next_aext(inode, last_pos, &tmploc, &tmplen, 0);
+ if (new_block_bytes || prealloc_len)
+ udf_next_aext(inode, last_pos, &tmploc, &tmplen, 0);
}
/* Managed to do everything necessary? */
diff --git a/fs/udf/namei.c b/fs/udf/namei.c
index e169d8fe35b5..f146b3089f3d 100644
--- a/fs/udf/namei.c
+++ b/fs/udf/namei.c
@@ -604,8 +604,8 @@ static int udf_add_nondir(struct dentry *dentry, struct inode *inode)
return 0;
}
-static int udf_create(struct inode *dir, struct dentry *dentry, umode_t mode,
- bool excl)
+static int udf_create(struct user_namespace *mnt_userns, struct inode *dir,
+ struct dentry *dentry, umode_t mode, bool excl)
{
struct inode *inode = udf_new_inode(dir, mode);
@@ -623,7 +623,8 @@ static int udf_create(struct inode *dir, struct dentry *dentry, umode_t mode,
return udf_add_nondir(dentry, inode);
}
-static int udf_tmpfile(struct inode *dir, struct dentry *dentry, umode_t mode)
+static int udf_tmpfile(struct user_namespace *mnt_userns, struct inode *dir,
+ struct dentry *dentry, umode_t mode)
{
struct inode *inode = udf_new_inode(dir, mode);
@@ -642,8 +643,8 @@ static int udf_tmpfile(struct inode *dir, struct dentry *dentry, umode_t mode)
return 0;
}
-static int udf_mknod(struct inode *dir, struct dentry *dentry, umode_t mode,
- dev_t rdev)
+static int udf_mknod(struct user_namespace *mnt_userns, struct inode *dir,
+ struct dentry *dentry, umode_t mode, dev_t rdev)
{
struct inode *inode;
@@ -658,7 +659,8 @@ static int udf_mknod(struct inode *dir, struct dentry *dentry, umode_t mode,
return udf_add_nondir(dentry, inode);
}
-static int udf_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode)
+static int udf_mkdir(struct user_namespace *mnt_userns, struct inode *dir,
+ struct dentry *dentry, umode_t mode)
{
struct inode *inode;
struct udf_fileident_bh fibh;
@@ -877,8 +879,8 @@ out:
return retval;
}
-static int udf_symlink(struct inode *dir, struct dentry *dentry,
- const char *symname)
+static int udf_symlink(struct user_namespace *mnt_userns, struct inode *dir,
+ struct dentry *dentry, const char *symname)
{
struct inode *inode = udf_new_inode(dir, S_IFLNK | 0777);
struct pathComponent *pc;
@@ -1065,9 +1067,9 @@ static int udf_link(struct dentry *old_dentry, struct inode *dir,
/* Anybody can rename anything with this: the permission checks are left to the
* higher-level routines.
*/
-static int udf_rename(struct inode *old_dir, struct dentry *old_dentry,
- struct inode *new_dir, struct dentry *new_dentry,
- unsigned int flags)
+static int udf_rename(struct user_namespace *mnt_userns, struct inode *old_dir,
+ struct dentry *old_dentry, struct inode *new_dir,
+ struct dentry *new_dentry, unsigned int flags)
{
struct inode *old_inode = d_inode(old_dentry);
struct inode *new_inode = d_inode(new_dentry);
diff --git a/fs/udf/super.c b/fs/udf/super.c
index d0df217f4712..2f83c1204e20 100644
--- a/fs/udf/super.c
+++ b/fs/udf/super.c
@@ -459,6 +459,7 @@ static int udf_parse_options(char *options, struct udf_options *uopt,
{
char *p;
int option;
+ unsigned int uv;
uopt->novrs = 0;
uopt->session = 0xFFFFFFFF;
@@ -508,17 +509,17 @@ static int udf_parse_options(char *options, struct udf_options *uopt,
uopt->flags &= ~(1 << UDF_FLAG_USE_SHORT_AD);
break;
case Opt_gid:
- if (match_int(args, &option))
+ if (match_uint(args, &uv))
return 0;
- uopt->gid = make_kgid(current_user_ns(), option);
+ uopt->gid = make_kgid(current_user_ns(), uv);
if (!gid_valid(uopt->gid))
return 0;
uopt->flags |= (1 << UDF_FLAG_GID_SET);
break;
case Opt_uid:
- if (match_int(args, &option))
+ if (match_uint(args, &uv))
return 0;
- uopt->uid = make_kuid(current_user_ns(), option);
+ uopt->uid = make_kuid(current_user_ns(), uv);
if (!uid_valid(uopt->uid))
return 0;
uopt->flags |= (1 << UDF_FLAG_UID_SET);
diff --git a/fs/udf/symlink.c b/fs/udf/symlink.c
index c973db239604..9b223421a3c5 100644
--- a/fs/udf/symlink.c
+++ b/fs/udf/symlink.c
@@ -152,14 +152,15 @@ out_unmap:
return err;
}
-static int udf_symlink_getattr(const struct path *path, struct kstat *stat,
- u32 request_mask, unsigned int flags)
+static int udf_symlink_getattr(struct user_namespace *mnt_userns,
+ const struct path *path, struct kstat *stat,
+ u32 request_mask, unsigned int flags)
{
struct dentry *dentry = path->dentry;
struct inode *inode = d_backing_inode(dentry);
struct page *page;
- generic_fillattr(inode, stat);
+ generic_fillattr(&init_user_ns, inode, stat);
page = read_mapping_page(inode->i_mapping, 0, NULL);
if (IS_ERR(page))
return PTR_ERR(page);
diff --git a/fs/ufs/ialloc.c b/fs/ufs/ialloc.c
index 969fd60436d3..7e3e08c0166f 100644
--- a/fs/ufs/ialloc.c
+++ b/fs/ufs/ialloc.c
@@ -289,7 +289,7 @@ cg_found:
ufs_mark_sb_dirty(sb);
inode->i_ino = cg * uspi->s_ipg + bit;
- inode_init_owner(inode, dir, mode);
+ inode_init_owner(&init_user_ns, inode, dir, mode);
inode->i_blocks = 0;
inode->i_generation = 0;
inode->i_mtime = inode->i_atime = inode->i_ctime = current_time(inode);
diff --git a/fs/ufs/inode.c b/fs/ufs/inode.c
index c843ec858cf7..debc282c1bb4 100644
--- a/fs/ufs/inode.c
+++ b/fs/ufs/inode.c
@@ -1211,13 +1211,14 @@ out:
return err;
}
-int ufs_setattr(struct dentry *dentry, struct iattr *attr)
+int ufs_setattr(struct user_namespace *mnt_userns, struct dentry *dentry,
+ struct iattr *attr)
{
struct inode *inode = d_inode(dentry);
unsigned int ia_valid = attr->ia_valid;
int error;
- error = setattr_prepare(dentry, attr);
+ error = setattr_prepare(&init_user_ns, dentry, attr);
if (error)
return error;
@@ -1227,7 +1228,7 @@ int ufs_setattr(struct dentry *dentry, struct iattr *attr)
return error;
}
- setattr_copy(inode, attr);
+ setattr_copy(&init_user_ns, inode, attr);
mark_inode_dirty(inode);
return 0;
}
diff --git a/fs/ufs/namei.c b/fs/ufs/namei.c
index 9ef40f100415..29d5a0e0c8f0 100644
--- a/fs/ufs/namei.c
+++ b/fs/ufs/namei.c
@@ -69,7 +69,8 @@ static struct dentry *ufs_lookup(struct inode * dir, struct dentry *dentry, unsi
* If the create succeeds, we fill in the inode information
* with d_instantiate().
*/
-static int ufs_create (struct inode * dir, struct dentry * dentry, umode_t mode,
+static int ufs_create (struct user_namespace * mnt_userns,
+ struct inode * dir, struct dentry * dentry, umode_t mode,
bool excl)
{
struct inode *inode;
@@ -85,7 +86,8 @@ static int ufs_create (struct inode * dir, struct dentry * dentry, umode_t mode,
return ufs_add_nondir(dentry, inode);
}
-static int ufs_mknod(struct inode *dir, struct dentry *dentry, umode_t mode, dev_t rdev)
+static int ufs_mknod(struct user_namespace *mnt_userns, struct inode *dir,
+ struct dentry *dentry, umode_t mode, dev_t rdev)
{
struct inode *inode;
int err;
@@ -104,8 +106,8 @@ static int ufs_mknod(struct inode *dir, struct dentry *dentry, umode_t mode, dev
return err;
}
-static int ufs_symlink (struct inode * dir, struct dentry * dentry,
- const char * symname)
+static int ufs_symlink (struct user_namespace * mnt_userns, struct inode * dir,
+ struct dentry * dentry, const char * symname)
{
struct super_block * sb = dir->i_sb;
int err;
@@ -164,7 +166,8 @@ static int ufs_link (struct dentry * old_dentry, struct inode * dir,
return error;
}
-static int ufs_mkdir(struct inode * dir, struct dentry * dentry, umode_t mode)
+static int ufs_mkdir(struct user_namespace * mnt_userns, struct inode * dir,
+ struct dentry * dentry, umode_t mode)
{
struct inode * inode;
int err;
@@ -240,9 +243,9 @@ static int ufs_rmdir (struct inode * dir, struct dentry *dentry)
return err;
}
-static int ufs_rename(struct inode *old_dir, struct dentry *old_dentry,
- struct inode *new_dir, struct dentry *new_dentry,
- unsigned int flags)
+static int ufs_rename(struct user_namespace *mnt_userns, struct inode *old_dir,
+ struct dentry *old_dentry, struct inode *new_dir,
+ struct dentry *new_dentry, unsigned int flags)
{
struct inode *old_inode = d_inode(old_dentry);
struct inode *new_inode = d_inode(new_dentry);
diff --git a/fs/ufs/ufs.h b/fs/ufs/ufs.h
index b49e0efdf3d7..550f7c5a3636 100644
--- a/fs/ufs/ufs.h
+++ b/fs/ufs/ufs.h
@@ -123,7 +123,8 @@ extern struct inode *ufs_iget(struct super_block *, unsigned long);
extern int ufs_write_inode (struct inode *, struct writeback_control *);
extern int ufs_sync_inode (struct inode *);
extern void ufs_evict_inode (struct inode *);
-extern int ufs_setattr(struct dentry *dentry, struct iattr *attr);
+extern int ufs_setattr(struct user_namespace *mnt_userns, struct dentry *dentry,
+ struct iattr *attr);
/* namei.c */
extern const struct file_operations ufs_dir_operations;
diff --git a/fs/userfaultfd.c b/fs/userfaultfd.c
index 894cc28142e7..0be8cdd4425a 100644
--- a/fs/userfaultfd.c
+++ b/fs/userfaultfd.c
@@ -979,14 +979,14 @@ static __poll_t userfaultfd_poll(struct file *file, poll_table *wait)
static const struct file_operations userfaultfd_fops;
-static int resolve_userfault_fork(struct userfaultfd_ctx *ctx,
- struct userfaultfd_ctx *new,
+static int resolve_userfault_fork(struct userfaultfd_ctx *new,
+ struct inode *inode,
struct uffd_msg *msg)
{
int fd;
- fd = anon_inode_getfd("[userfaultfd]", &userfaultfd_fops, new,
- O_RDWR | (new->flags & UFFD_SHARED_FCNTL_FLAGS));
+ fd = anon_inode_getfd_secure("[userfaultfd]", &userfaultfd_fops, new,
+ O_RDWR | (new->flags & UFFD_SHARED_FCNTL_FLAGS), inode);
if (fd < 0)
return fd;
@@ -996,7 +996,7 @@ static int resolve_userfault_fork(struct userfaultfd_ctx *ctx,
}
static ssize_t userfaultfd_ctx_read(struct userfaultfd_ctx *ctx, int no_wait,
- struct uffd_msg *msg)
+ struct uffd_msg *msg, struct inode *inode)
{
ssize_t ret;
DECLARE_WAITQUEUE(wait, current);
@@ -1107,7 +1107,7 @@ static ssize_t userfaultfd_ctx_read(struct userfaultfd_ctx *ctx, int no_wait,
spin_unlock_irq(&ctx->fd_wqh.lock);
if (!ret && msg->event == UFFD_EVENT_FORK) {
- ret = resolve_userfault_fork(ctx, fork_nctx, msg);
+ ret = resolve_userfault_fork(fork_nctx, inode, msg);
spin_lock_irq(&ctx->event_wqh.lock);
if (!list_empty(&fork_event)) {
/*
@@ -1167,6 +1167,7 @@ static ssize_t userfaultfd_read(struct file *file, char __user *buf,
ssize_t _ret, ret = 0;
struct uffd_msg msg;
int no_wait = file->f_flags & O_NONBLOCK;
+ struct inode *inode = file_inode(file);
if (ctx->state == UFFD_STATE_WAIT_API)
return -EINVAL;
@@ -1174,7 +1175,7 @@ static ssize_t userfaultfd_read(struct file *file, char __user *buf,
for (;;) {
if (count < sizeof(msg))
return ret ? ret : -EINVAL;
- _ret = userfaultfd_ctx_read(ctx, no_wait, &msg);
+ _ret = userfaultfd_ctx_read(ctx, no_wait, &msg, inode);
if (_ret < 0)
return ret ? ret : _ret;
if (copy_to_user((__u64 __user *) buf, &msg, sizeof(msg)))
@@ -1999,8 +2000,8 @@ SYSCALL_DEFINE1(userfaultfd, int, flags)
/* prevent the mm struct to be freed */
mmgrab(ctx->mm);
- fd = anon_inode_getfd("[userfaultfd]", &userfaultfd_fops, ctx,
- O_RDWR | (flags & UFFD_SHARED_FCNTL_FLAGS));
+ fd = anon_inode_getfd_secure("[userfaultfd]", &userfaultfd_fops, ctx,
+ O_RDWR | (flags & UFFD_SHARED_FCNTL_FLAGS), NULL);
if (fd < 0) {
mmdrop(ctx->mm);
kmem_cache_free(userfaultfd_ctx_cachep, ctx);
diff --git a/fs/utimes.c b/fs/utimes.c
index fd3cc4226224..39f356017635 100644
--- a/fs/utimes.c
+++ b/fs/utimes.c
@@ -62,7 +62,8 @@ int vfs_utimes(const struct path *path, struct timespec64 *times)
}
retry_deleg:
inode_lock(inode);
- error = notify_change(path->dentry, &newattrs, &delegated_inode);
+ error = notify_change(mnt_user_ns(path->mnt), path->dentry, &newattrs,
+ &delegated_inode);
inode_unlock(inode);
if (delegated_inode) {
error = break_deleg_wait(&delegated_inode);
diff --git a/fs/vboxsf/dir.c b/fs/vboxsf/dir.c
index 4d569f14a8d8..7aee0ec63ade 100644
--- a/fs/vboxsf/dir.c
+++ b/fs/vboxsf/dir.c
@@ -288,13 +288,15 @@ static int vboxsf_dir_create(struct inode *parent, struct dentry *dentry,
return 0;
}
-static int vboxsf_dir_mkfile(struct inode *parent, struct dentry *dentry,
+static int vboxsf_dir_mkfile(struct user_namespace *mnt_userns,
+ struct inode *parent, struct dentry *dentry,
umode_t mode, bool excl)
{
return vboxsf_dir_create(parent, dentry, mode, 0);
}
-static int vboxsf_dir_mkdir(struct inode *parent, struct dentry *dentry,
+static int vboxsf_dir_mkdir(struct user_namespace *mnt_userns,
+ struct inode *parent, struct dentry *dentry,
umode_t mode)
{
return vboxsf_dir_create(parent, dentry, mode, 1);
@@ -332,7 +334,8 @@ static int vboxsf_dir_unlink(struct inode *parent, struct dentry *dentry)
return 0;
}
-static int vboxsf_dir_rename(struct inode *old_parent,
+static int vboxsf_dir_rename(struct user_namespace *mnt_userns,
+ struct inode *old_parent,
struct dentry *old_dentry,
struct inode *new_parent,
struct dentry *new_dentry,
@@ -374,7 +377,8 @@ err_put_old_path:
return err;
}
-static int vboxsf_dir_symlink(struct inode *parent, struct dentry *dentry,
+static int vboxsf_dir_symlink(struct user_namespace *mnt_userns,
+ struct inode *parent, struct dentry *dentry,
const char *symname)
{
struct vboxsf_inode *sf_parent_i = VBOXSF_I(parent);
diff --git a/fs/vboxsf/utils.c b/fs/vboxsf/utils.c
index 018057546067..3b847e3fba24 100644
--- a/fs/vboxsf/utils.c
+++ b/fs/vboxsf/utils.c
@@ -212,8 +212,8 @@ int vboxsf_inode_revalidate(struct dentry *dentry)
return 0;
}
-int vboxsf_getattr(const struct path *path, struct kstat *kstat,
- u32 request_mask, unsigned int flags)
+int vboxsf_getattr(struct user_namespace *mnt_userns, const struct path *path,
+ struct kstat *kstat, u32 request_mask, unsigned int flags)
{
int err;
struct dentry *dentry = path->dentry;
@@ -233,11 +233,12 @@ int vboxsf_getattr(const struct path *path, struct kstat *kstat,
if (err)
return err;
- generic_fillattr(d_inode(dentry), kstat);
+ generic_fillattr(&init_user_ns, d_inode(dentry), kstat);
return 0;
}
-int vboxsf_setattr(struct dentry *dentry, struct iattr *iattr)
+int vboxsf_setattr(struct user_namespace *mnt_userns, struct dentry *dentry,
+ struct iattr *iattr)
{
struct vboxsf_inode *sf_i = VBOXSF_I(d_inode(dentry));
struct vboxsf_sbi *sbi = VBOXSF_SBI(dentry->d_sb);
diff --git a/fs/vboxsf/vfsmod.h b/fs/vboxsf/vfsmod.h
index 18f95b00fc33..760524e78c88 100644
--- a/fs/vboxsf/vfsmod.h
+++ b/fs/vboxsf/vfsmod.h
@@ -90,9 +90,11 @@ int vboxsf_stat(struct vboxsf_sbi *sbi, struct shfl_string *path,
struct shfl_fsobjinfo *info);
int vboxsf_stat_dentry(struct dentry *dentry, struct shfl_fsobjinfo *info);
int vboxsf_inode_revalidate(struct dentry *dentry);
-int vboxsf_getattr(const struct path *path, struct kstat *kstat,
- u32 request_mask, unsigned int query_flags);
-int vboxsf_setattr(struct dentry *dentry, struct iattr *iattr);
+int vboxsf_getattr(struct user_namespace *mnt_userns, const struct path *path,
+ struct kstat *kstat, u32 request_mask,
+ unsigned int query_flags);
+int vboxsf_setattr(struct user_namespace *mnt_userns, struct dentry *dentry,
+ struct iattr *iattr);
struct shfl_string *vboxsf_path_from_dentry(struct vboxsf_sbi *sbi,
struct dentry *dentry);
int vboxsf_nlscpy(struct vboxsf_sbi *sbi, char *name, size_t name_bound_len,
diff --git a/fs/verity/enable.c b/fs/verity/enable.c
index f7e997a01ad0..77e159a0346b 100644
--- a/fs/verity/enable.c
+++ b/fs/verity/enable.c
@@ -369,7 +369,7 @@ int fsverity_ioctl_enable(struct file *filp, const void __user *uarg)
* has verity enabled, and to stabilize the data being hashed.
*/
- err = inode_permission(inode, MAY_WRITE);
+ err = file_permission(filp, MAY_WRITE);
if (err)
return err;
diff --git a/fs/xattr.c b/fs/xattr.c
index fd57153b1f61..b3444e06cded 100644
--- a/fs/xattr.c
+++ b/fs/xattr.c
@@ -83,7 +83,8 @@ xattr_resolve_name(struct inode *inode, const char **name)
* because different namespaces have very different rules.
*/
static int
-xattr_permission(struct inode *inode, const char *name, int mask)
+xattr_permission(struct user_namespace *mnt_userns, struct inode *inode,
+ const char *name, int mask)
{
/*
* We can never set or remove an extended attribute on a read-only
@@ -97,7 +98,7 @@ xattr_permission(struct inode *inode, const char *name, int mask)
* to be writen back improperly if their true value is
* unknown to the vfs.
*/
- if (HAS_UNMAPPED_ID(inode))
+ if (HAS_UNMAPPED_ID(mnt_userns, inode))
return -EPERM;
}
@@ -127,11 +128,12 @@ xattr_permission(struct inode *inode, const char *name, int mask)
if (!S_ISREG(inode->i_mode) && !S_ISDIR(inode->i_mode))
return (mask & MAY_WRITE) ? -EPERM : -ENODATA;
if (S_ISDIR(inode->i_mode) && (inode->i_mode & S_ISVTX) &&
- (mask & MAY_WRITE) && !inode_owner_or_capable(inode))
+ (mask & MAY_WRITE) &&
+ !inode_owner_or_capable(mnt_userns, inode))
return -EPERM;
}
- return inode_permission(inode, mask);
+ return inode_permission(mnt_userns, inode, mask);
}
/*
@@ -162,8 +164,9 @@ xattr_supported_namespace(struct inode *inode, const char *prefix)
EXPORT_SYMBOL(xattr_supported_namespace);
int
-__vfs_setxattr(struct dentry *dentry, struct inode *inode, const char *name,
- const void *value, size_t size, int flags)
+__vfs_setxattr(struct user_namespace *mnt_userns, struct dentry *dentry,
+ struct inode *inode, const char *name, const void *value,
+ size_t size, int flags)
{
const struct xattr_handler *handler;
@@ -174,7 +177,8 @@ __vfs_setxattr(struct dentry *dentry, struct inode *inode, const char *name,
return -EOPNOTSUPP;
if (size == 0)
value = ""; /* empty EA, do not remove */
- return handler->set(handler, dentry, inode, name, value, size, flags);
+ return handler->set(handler, mnt_userns, dentry, inode, name, value,
+ size, flags);
}
EXPORT_SYMBOL(__vfs_setxattr);
@@ -182,6 +186,7 @@ EXPORT_SYMBOL(__vfs_setxattr);
* __vfs_setxattr_noperm - perform setxattr operation without performing
* permission checks.
*
+ * @mnt_userns - user namespace of the mount the inode was found from
* @dentry - object to perform setxattr on
* @name - xattr name to set
* @value - value to set @name to
@@ -194,8 +199,9 @@ EXPORT_SYMBOL(__vfs_setxattr);
* is executed. It also assumes that the caller will make the appropriate
* permission checks.
*/
-int __vfs_setxattr_noperm(struct dentry *dentry, const char *name,
- const void *value, size_t size, int flags)
+int __vfs_setxattr_noperm(struct user_namespace *mnt_userns,
+ struct dentry *dentry, const char *name,
+ const void *value, size_t size, int flags)
{
struct inode *inode = dentry->d_inode;
int error = -EAGAIN;
@@ -205,7 +211,8 @@ int __vfs_setxattr_noperm(struct dentry *dentry, const char *name,
if (issec)
inode->i_flags &= ~S_NOSEC;
if (inode->i_opflags & IOP_XATTR) {
- error = __vfs_setxattr(dentry, inode, name, value, size, flags);
+ error = __vfs_setxattr(mnt_userns, dentry, inode, name, value,
+ size, flags);
if (!error) {
fsnotify_xattr(dentry);
security_inode_post_setxattr(dentry, name, value,
@@ -244,18 +251,19 @@ int __vfs_setxattr_noperm(struct dentry *dentry, const char *name,
* a delegation was broken on, NULL if none.
*/
int
-__vfs_setxattr_locked(struct dentry *dentry, const char *name,
- const void *value, size_t size, int flags,
- struct inode **delegated_inode)
+__vfs_setxattr_locked(struct user_namespace *mnt_userns, struct dentry *dentry,
+ const char *name, const void *value, size_t size,
+ int flags, struct inode **delegated_inode)
{
struct inode *inode = dentry->d_inode;
int error;
- error = xattr_permission(inode, name, MAY_WRITE);
+ error = xattr_permission(mnt_userns, inode, name, MAY_WRITE);
if (error)
return error;
- error = security_inode_setxattr(dentry, name, value, size, flags);
+ error = security_inode_setxattr(mnt_userns, dentry, name, value, size,
+ flags);
if (error)
goto out;
@@ -263,7 +271,8 @@ __vfs_setxattr_locked(struct dentry *dentry, const char *name,
if (error)
goto out;
- error = __vfs_setxattr_noperm(dentry, name, value, size, flags);
+ error = __vfs_setxattr_noperm(mnt_userns, dentry, name, value,
+ size, flags);
out:
return error;
@@ -271,8 +280,8 @@ out:
EXPORT_SYMBOL_GPL(__vfs_setxattr_locked);
int
-vfs_setxattr(struct dentry *dentry, const char *name, const void *value,
- size_t size, int flags)
+vfs_setxattr(struct user_namespace *mnt_userns, struct dentry *dentry,
+ const char *name, const void *value, size_t size, int flags)
{
struct inode *inode = dentry->d_inode;
struct inode *delegated_inode = NULL;
@@ -280,7 +289,7 @@ vfs_setxattr(struct dentry *dentry, const char *name, const void *value,
int error;
if (size && strcmp(name, XATTR_NAME_CAPS) == 0) {
- error = cap_convert_nscap(dentry, &value, size);
+ error = cap_convert_nscap(mnt_userns, dentry, &value, size);
if (error < 0)
return error;
size = error;
@@ -288,8 +297,8 @@ vfs_setxattr(struct dentry *dentry, const char *name, const void *value,
retry_deleg:
inode_lock(inode);
- error = __vfs_setxattr_locked(dentry, name, value, size, flags,
- &delegated_inode);
+ error = __vfs_setxattr_locked(mnt_userns, dentry, name, value, size,
+ flags, &delegated_inode);
inode_unlock(inode);
if (delegated_inode) {
@@ -305,18 +314,20 @@ retry_deleg:
EXPORT_SYMBOL_GPL(vfs_setxattr);
static ssize_t
-xattr_getsecurity(struct inode *inode, const char *name, void *value,
- size_t size)
+xattr_getsecurity(struct user_namespace *mnt_userns, struct inode *inode,
+ const char *name, void *value, size_t size)
{
void *buffer = NULL;
ssize_t len;
if (!value || !size) {
- len = security_inode_getsecurity(inode, name, &buffer, false);
+ len = security_inode_getsecurity(mnt_userns, inode, name,
+ &buffer, false);
goto out_noalloc;
}
- len = security_inode_getsecurity(inode, name, &buffer, true);
+ len = security_inode_getsecurity(mnt_userns, inode, name, &buffer,
+ true);
if (len < 0)
return len;
if (size < len) {
@@ -339,15 +350,16 @@ out_noalloc:
* Returns the result of alloc, if failed, or the getxattr operation.
*/
ssize_t
-vfs_getxattr_alloc(struct dentry *dentry, const char *name, char **xattr_value,
- size_t xattr_size, gfp_t flags)
+vfs_getxattr_alloc(struct user_namespace *mnt_userns, struct dentry *dentry,
+ const char *name, char **xattr_value, size_t xattr_size,
+ gfp_t flags)
{
const struct xattr_handler *handler;
struct inode *inode = dentry->d_inode;
char *value = *xattr_value;
int error;
- error = xattr_permission(inode, name, MAY_READ);
+ error = xattr_permission(mnt_userns, inode, name, MAY_READ);
if (error)
return error;
@@ -388,12 +400,13 @@ __vfs_getxattr(struct dentry *dentry, struct inode *inode, const char *name,
EXPORT_SYMBOL(__vfs_getxattr);
ssize_t
-vfs_getxattr(struct dentry *dentry, const char *name, void *value, size_t size)
+vfs_getxattr(struct user_namespace *mnt_userns, struct dentry *dentry,
+ const char *name, void *value, size_t size)
{
struct inode *inode = dentry->d_inode;
int error;
- error = xattr_permission(inode, name, MAY_READ);
+ error = xattr_permission(mnt_userns, inode, name, MAY_READ);
if (error)
return error;
@@ -404,7 +417,8 @@ vfs_getxattr(struct dentry *dentry, const char *name, void *value, size_t size)
if (!strncmp(name, XATTR_SECURITY_PREFIX,
XATTR_SECURITY_PREFIX_LEN)) {
const char *suffix = name + XATTR_SECURITY_PREFIX_LEN;
- int ret = xattr_getsecurity(inode, suffix, value, size);
+ int ret = xattr_getsecurity(mnt_userns, inode, suffix, value,
+ size);
/*
* Only overwrite the return value if a security module
* is actually active.
@@ -439,7 +453,8 @@ vfs_listxattr(struct dentry *dentry, char *list, size_t size)
EXPORT_SYMBOL_GPL(vfs_listxattr);
int
-__vfs_removexattr(struct dentry *dentry, const char *name)
+__vfs_removexattr(struct user_namespace *mnt_userns, struct dentry *dentry,
+ const char *name)
{
struct inode *inode = d_inode(dentry);
const struct xattr_handler *handler;
@@ -449,7 +464,8 @@ __vfs_removexattr(struct dentry *dentry, const char *name)
return PTR_ERR(handler);
if (!handler->set)
return -EOPNOTSUPP;
- return handler->set(handler, dentry, inode, name, NULL, 0, XATTR_REPLACE);
+ return handler->set(handler, mnt_userns, dentry, inode, name, NULL, 0,
+ XATTR_REPLACE);
}
EXPORT_SYMBOL(__vfs_removexattr);
@@ -463,17 +479,18 @@ EXPORT_SYMBOL(__vfs_removexattr);
* a delegation was broken on, NULL if none.
*/
int
-__vfs_removexattr_locked(struct dentry *dentry, const char *name,
- struct inode **delegated_inode)
+__vfs_removexattr_locked(struct user_namespace *mnt_userns,
+ struct dentry *dentry, const char *name,
+ struct inode **delegated_inode)
{
struct inode *inode = dentry->d_inode;
int error;
- error = xattr_permission(inode, name, MAY_WRITE);
+ error = xattr_permission(mnt_userns, inode, name, MAY_WRITE);
if (error)
return error;
- error = security_inode_removexattr(dentry, name);
+ error = security_inode_removexattr(mnt_userns, dentry, name);
if (error)
goto out;
@@ -481,7 +498,7 @@ __vfs_removexattr_locked(struct dentry *dentry, const char *name,
if (error)
goto out;
- error = __vfs_removexattr(dentry, name);
+ error = __vfs_removexattr(mnt_userns, dentry, name);
if (!error) {
fsnotify_xattr(dentry);
@@ -494,7 +511,8 @@ out:
EXPORT_SYMBOL_GPL(__vfs_removexattr_locked);
int
-vfs_removexattr(struct dentry *dentry, const char *name)
+vfs_removexattr(struct user_namespace *mnt_userns, struct dentry *dentry,
+ const char *name)
{
struct inode *inode = dentry->d_inode;
struct inode *delegated_inode = NULL;
@@ -502,7 +520,8 @@ vfs_removexattr(struct dentry *dentry, const char *name)
retry_deleg:
inode_lock(inode);
- error = __vfs_removexattr_locked(dentry, name, &delegated_inode);
+ error = __vfs_removexattr_locked(mnt_userns, dentry,
+ name, &delegated_inode);
inode_unlock(inode);
if (delegated_inode) {
@@ -519,8 +538,9 @@ EXPORT_SYMBOL_GPL(vfs_removexattr);
* Extended attribute SET operations
*/
static long
-setxattr(struct dentry *d, const char __user *name, const void __user *value,
- size_t size, int flags)
+setxattr(struct user_namespace *mnt_userns, struct dentry *d,
+ const char __user *name, const void __user *value, size_t size,
+ int flags)
{
int error;
void *kvalue = NULL;
@@ -547,10 +567,10 @@ setxattr(struct dentry *d, const char __user *name, const void __user *value,
}
if ((strcmp(kname, XATTR_NAME_POSIX_ACL_ACCESS) == 0) ||
(strcmp(kname, XATTR_NAME_POSIX_ACL_DEFAULT) == 0))
- posix_acl_fix_xattr_from_user(kvalue, size);
+ posix_acl_fix_xattr_from_user(mnt_userns, kvalue, size);
}
- error = vfs_setxattr(d, kname, kvalue, size, flags);
+ error = vfs_setxattr(mnt_userns, d, kname, kvalue, size, flags);
out:
kvfree(kvalue);
@@ -563,13 +583,15 @@ static int path_setxattr(const char __user *pathname,
{
struct path path;
int error;
+
retry:
error = user_path_at(AT_FDCWD, pathname, lookup_flags, &path);
if (error)
return error;
error = mnt_want_write(path.mnt);
if (!error) {
- error = setxattr(path.dentry, name, value, size, flags);
+ error = setxattr(mnt_user_ns(path.mnt), path.dentry, name,
+ value, size, flags);
mnt_drop_write(path.mnt);
}
path_put(&path);
@@ -605,7 +627,9 @@ SYSCALL_DEFINE5(fsetxattr, int, fd, const char __user *, name,
audit_file(f.file);
error = mnt_want_write_file(f.file);
if (!error) {
- error = setxattr(f.file->f_path.dentry, name, value, size, flags);
+ error = setxattr(file_mnt_user_ns(f.file),
+ f.file->f_path.dentry, name,
+ value, size, flags);
mnt_drop_write_file(f.file);
}
fdput(f);
@@ -616,8 +640,8 @@ SYSCALL_DEFINE5(fsetxattr, int, fd, const char __user *, name,
* Extended attribute GET operations
*/
static ssize_t
-getxattr(struct dentry *d, const char __user *name, void __user *value,
- size_t size)
+getxattr(struct user_namespace *mnt_userns, struct dentry *d,
+ const char __user *name, void __user *value, size_t size)
{
ssize_t error;
void *kvalue = NULL;
@@ -637,11 +661,11 @@ getxattr(struct dentry *d, const char __user *name, void __user *value,
return -ENOMEM;
}
- error = vfs_getxattr(d, kname, kvalue, size);
+ error = vfs_getxattr(mnt_userns, d, kname, kvalue, size);
if (error > 0) {
if ((strcmp(kname, XATTR_NAME_POSIX_ACL_ACCESS) == 0) ||
(strcmp(kname, XATTR_NAME_POSIX_ACL_DEFAULT) == 0))
- posix_acl_fix_xattr_to_user(kvalue, error);
+ posix_acl_fix_xattr_to_user(mnt_userns, kvalue, error);
if (size && copy_to_user(value, kvalue, error))
error = -EFAULT;
} else if (error == -ERANGE && size >= XATTR_SIZE_MAX) {
@@ -665,7 +689,7 @@ retry:
error = user_path_at(AT_FDCWD, pathname, lookup_flags, &path);
if (error)
return error;
- error = getxattr(path.dentry, name, value, size);
+ error = getxattr(mnt_user_ns(path.mnt), path.dentry, name, value, size);
path_put(&path);
if (retry_estale(error, lookup_flags)) {
lookup_flags |= LOOKUP_REVAL;
@@ -695,7 +719,8 @@ SYSCALL_DEFINE4(fgetxattr, int, fd, const char __user *, name,
if (!f.file)
return error;
audit_file(f.file);
- error = getxattr(f.file->f_path.dentry, name, value, size);
+ error = getxattr(file_mnt_user_ns(f.file), f.file->f_path.dentry,
+ name, value, size);
fdput(f);
return error;
}
@@ -779,7 +804,8 @@ SYSCALL_DEFINE3(flistxattr, int, fd, char __user *, list, size_t, size)
* Extended attribute REMOVE operations
*/
static long
-removexattr(struct dentry *d, const char __user *name)
+removexattr(struct user_namespace *mnt_userns, struct dentry *d,
+ const char __user *name)
{
int error;
char kname[XATTR_NAME_MAX + 1];
@@ -790,7 +816,7 @@ removexattr(struct dentry *d, const char __user *name)
if (error < 0)
return error;
- return vfs_removexattr(d, kname);
+ return vfs_removexattr(mnt_userns, d, kname);
}
static int path_removexattr(const char __user *pathname,
@@ -804,7 +830,7 @@ retry:
return error;
error = mnt_want_write(path.mnt);
if (!error) {
- error = removexattr(path.dentry, name);
+ error = removexattr(mnt_user_ns(path.mnt), path.dentry, name);
mnt_drop_write(path.mnt);
}
path_put(&path);
@@ -837,7 +863,8 @@ SYSCALL_DEFINE2(fremovexattr, int, fd, const char __user *, name)
audit_file(f.file);
error = mnt_want_write_file(f.file);
if (!error) {
- error = removexattr(f.file->f_path.dentry, name);
+ error = removexattr(file_mnt_user_ns(f.file),
+ f.file->f_path.dentry, name);
mnt_drop_write_file(f.file);
}
fdput(f);
diff --git a/fs/xfs/xfs_acl.c b/fs/xfs/xfs_acl.c
index 779cb73b3d00..d02bef24b32b 100644
--- a/fs/xfs/xfs_acl.c
+++ b/fs/xfs/xfs_acl.c
@@ -238,7 +238,8 @@ xfs_acl_set_mode(
}
int
-xfs_set_acl(struct inode *inode, struct posix_acl *acl, int type)
+xfs_set_acl(struct user_namespace *mnt_userns, struct inode *inode,
+ struct posix_acl *acl, int type)
{
umode_t mode;
bool set_mode = false;
@@ -252,7 +253,7 @@ xfs_set_acl(struct inode *inode, struct posix_acl *acl, int type)
return error;
if (type == ACL_TYPE_ACCESS) {
- error = posix_acl_update_mode(inode, &mode, &acl);
+ error = posix_acl_update_mode(mnt_userns, inode, &mode, &acl);
if (error)
return error;
set_mode = true;
diff --git a/fs/xfs/xfs_acl.h b/fs/xfs/xfs_acl.h
index c042c0868016..7bdb3a4ed798 100644
--- a/fs/xfs/xfs_acl.h
+++ b/fs/xfs/xfs_acl.h
@@ -11,7 +11,8 @@ struct posix_acl;
#ifdef CONFIG_XFS_POSIX_ACL
extern struct posix_acl *xfs_get_acl(struct inode *inode, int type);
-extern int xfs_set_acl(struct inode *inode, struct posix_acl *acl, int type);
+extern int xfs_set_acl(struct user_namespace *mnt_userns, struct inode *inode,
+ struct posix_acl *acl, int type);
extern int __xfs_set_acl(struct inode *inode, struct posix_acl *acl, int type);
void xfs_forget_acl(struct inode *inode, const char *name);
#else
diff --git a/fs/xfs/xfs_file.c b/fs/xfs/xfs_file.c
index 68ca1b40d8c7..a007ca0711d9 100644
--- a/fs/xfs/xfs_file.c
+++ b/fs/xfs/xfs_file.c
@@ -29,6 +29,7 @@
#include <linux/backing-dev.h>
#include <linux/mman.h>
#include <linux/fadvise.h>
+#include <linux/mount.h>
static const struct vm_operations_struct xfs_file_vm_ops;
@@ -1051,7 +1052,8 @@ xfs_file_fallocate(
iattr.ia_valid = ATTR_SIZE;
iattr.ia_size = new_size;
- error = xfs_vn_setattr_size(file_dentry(file), &iattr);
+ error = xfs_vn_setattr_size(file_mnt_user_ns(file),
+ file_dentry(file), &iattr);
if (error)
goto out_unlock;
}
diff --git a/fs/xfs/xfs_inode.c b/fs/xfs/xfs_inode.c
index 636ac13b1df2..46a861d55e48 100644
--- a/fs/xfs/xfs_inode.c
+++ b/fs/xfs/xfs_inode.c
@@ -766,6 +766,7 @@ xfs_inode_inherit_flags2(
*/
static int
xfs_init_new_inode(
+ struct user_namespace *mnt_userns,
struct xfs_trans *tp,
struct xfs_inode *pip,
xfs_ino_t ino,
@@ -811,11 +812,11 @@ xfs_init_new_inode(
if (dir && !(dir->i_mode & S_ISGID) &&
(mp->m_flags & XFS_MOUNT_GRPID)) {
- inode->i_uid = current_fsuid();
+ inode->i_uid = fsuid_into_mnt(mnt_userns);
inode->i_gid = dir->i_gid;
inode->i_mode = mode;
} else {
- inode_init_owner(inode, dir, mode);
+ inode_init_owner(mnt_userns, inode, dir, mode);
}
/*
@@ -824,7 +825,8 @@ xfs_init_new_inode(
* (and only if the irix_sgid_inherit compatibility variable is set).
*/
if (irix_sgid_inherit &&
- (inode->i_mode & S_ISGID) && !in_group_p(inode->i_gid))
+ (inode->i_mode & S_ISGID) &&
+ !in_group_p(i_gid_into_mnt(mnt_userns, inode)))
inode->i_mode &= ~S_ISGID;
ip->i_d.di_size = 0;
@@ -901,6 +903,7 @@ xfs_init_new_inode(
*/
int
xfs_dir_ialloc(
+ struct user_namespace *mnt_userns,
struct xfs_trans **tpp,
struct xfs_inode *dp,
umode_t mode,
@@ -933,7 +936,8 @@ xfs_dir_ialloc(
return error;
ASSERT(ino != NULLFSINO);
- return xfs_init_new_inode(*tpp, dp, ino, mode, nlink, rdev, prid, ipp);
+ return xfs_init_new_inode(mnt_userns, *tpp, dp, ino, mode, nlink, rdev,
+ prid, ipp);
}
/*
@@ -973,6 +977,7 @@ xfs_bumplink(
int
xfs_create(
+ struct user_namespace *mnt_userns,
xfs_inode_t *dp,
struct xfs_name *name,
umode_t mode,
@@ -1046,7 +1051,8 @@ xfs_create(
* entry pointing to them, but a directory also the "." entry
* pointing to itself.
*/
- error = xfs_dir_ialloc(&tp, dp, mode, is_dir ? 2 : 1, rdev, prid, &ip);
+ error = xfs_dir_ialloc(mnt_userns, &tp, dp, mode, is_dir ? 2 : 1, rdev,
+ prid, &ip);
if (error)
goto out_trans_cancel;
@@ -1127,6 +1133,7 @@ xfs_create(
int
xfs_create_tmpfile(
+ struct user_namespace *mnt_userns,
struct xfs_inode *dp,
umode_t mode,
struct xfs_inode **ipp)
@@ -1164,7 +1171,7 @@ xfs_create_tmpfile(
if (error)
goto out_release_dquots;
- error = xfs_dir_ialloc(&tp, dp, mode, 0, 0, prid, &ip);
+ error = xfs_dir_ialloc(mnt_userns, &tp, dp, mode, 0, 0, prid, &ip);
if (error)
goto out_trans_cancel;
@@ -2977,13 +2984,15 @@ out_trans_abort:
*/
static int
xfs_rename_alloc_whiteout(
+ struct user_namespace *mnt_userns,
struct xfs_inode *dp,
struct xfs_inode **wip)
{
struct xfs_inode *tmpfile;
int error;
- error = xfs_create_tmpfile(dp, S_IFCHR | WHITEOUT_MODE, &tmpfile);
+ error = xfs_create_tmpfile(mnt_userns, dp, S_IFCHR | WHITEOUT_MODE,
+ &tmpfile);
if (error)
return error;
@@ -3005,6 +3014,7 @@ xfs_rename_alloc_whiteout(
*/
int
xfs_rename(
+ struct user_namespace *mnt_userns,
struct xfs_inode *src_dp,
struct xfs_name *src_name,
struct xfs_inode *src_ip,
@@ -3036,7 +3046,7 @@ xfs_rename(
*/
if (flags & RENAME_WHITEOUT) {
ASSERT(!(flags & (RENAME_NOREPLACE | RENAME_EXCHANGE)));
- error = xfs_rename_alloc_whiteout(target_dp, &wip);
+ error = xfs_rename_alloc_whiteout(mnt_userns, target_dp, &wip);
if (error)
return error;
diff --git a/fs/xfs/xfs_inode.h b/fs/xfs/xfs_inode.h
index eca333f5f715..88ee4c3930ae 100644
--- a/fs/xfs/xfs_inode.h
+++ b/fs/xfs/xfs_inode.h
@@ -369,15 +369,18 @@ int xfs_release(struct xfs_inode *ip);
void xfs_inactive(struct xfs_inode *ip);
int xfs_lookup(struct xfs_inode *dp, struct xfs_name *name,
struct xfs_inode **ipp, struct xfs_name *ci_name);
-int xfs_create(struct xfs_inode *dp, struct xfs_name *name,
+int xfs_create(struct user_namespace *mnt_userns,
+ struct xfs_inode *dp, struct xfs_name *name,
umode_t mode, dev_t rdev, struct xfs_inode **ipp);
-int xfs_create_tmpfile(struct xfs_inode *dp, umode_t mode,
+int xfs_create_tmpfile(struct user_namespace *mnt_userns,
+ struct xfs_inode *dp, umode_t mode,
struct xfs_inode **ipp);
int xfs_remove(struct xfs_inode *dp, struct xfs_name *name,
struct xfs_inode *ip);
int xfs_link(struct xfs_inode *tdp, struct xfs_inode *sip,
struct xfs_name *target_name);
-int xfs_rename(struct xfs_inode *src_dp, struct xfs_name *src_name,
+int xfs_rename(struct user_namespace *mnt_userns,
+ struct xfs_inode *src_dp, struct xfs_name *src_name,
struct xfs_inode *src_ip, struct xfs_inode *target_dp,
struct xfs_name *target_name,
struct xfs_inode *target_ip, unsigned int flags);
@@ -407,9 +410,10 @@ void xfs_lock_two_inodes(struct xfs_inode *ip0, uint ip0_mode,
xfs_extlen_t xfs_get_extsz_hint(struct xfs_inode *ip);
xfs_extlen_t xfs_get_cowextsz_hint(struct xfs_inode *ip);
-int xfs_dir_ialloc(struct xfs_trans **tpp, struct xfs_inode *dp, umode_t mode,
- xfs_nlink_t nlink, dev_t dev, prid_t prid,
- struct xfs_inode **ipp);
+int xfs_dir_ialloc(struct user_namespace *mnt_userns,
+ struct xfs_trans **tpp, struct xfs_inode *dp,
+ umode_t mode, xfs_nlink_t nlink, dev_t dev,
+ prid_t prid, struct xfs_inode **ipp);
static inline int
xfs_itruncate_extents(
diff --git a/fs/xfs/xfs_ioctl.c b/fs/xfs/xfs_ioctl.c
index 248083ea0276..99dfe89a8d08 100644
--- a/fs/xfs/xfs_ioctl.c
+++ b/fs/xfs/xfs_ioctl.c
@@ -693,7 +693,8 @@ xfs_ioc_space(
iattr.ia_valid = ATTR_SIZE;
iattr.ia_size = bf->l_start;
- error = xfs_vn_setattr_size(file_dentry(filp), &iattr);
+ error = xfs_vn_setattr_size(file_mnt_user_ns(filp), file_dentry(filp),
+ &iattr);
if (error)
goto out_unlock;
@@ -734,13 +735,15 @@ xfs_fsinumbers_fmt(
STATIC int
xfs_ioc_fsbulkstat(
- xfs_mount_t *mp,
+ struct file *file,
unsigned int cmd,
void __user *arg)
{
+ struct xfs_mount *mp = XFS_I(file_inode(file))->i_mount;
struct xfs_fsop_bulkreq bulkreq;
struct xfs_ibulk breq = {
.mp = mp,
+ .mnt_userns = file_mnt_user_ns(file),
.ocount = 0,
};
xfs_ino_t lastino;
@@ -908,13 +911,15 @@ xfs_bulk_ireq_teardown(
/* Handle the v5 bulkstat ioctl. */
STATIC int
xfs_ioc_bulkstat(
- struct xfs_mount *mp,
+ struct file *file,
unsigned int cmd,
struct xfs_bulkstat_req __user *arg)
{
+ struct xfs_mount *mp = XFS_I(file_inode(file))->i_mount;
struct xfs_bulk_ireq hdr;
struct xfs_ibulk breq = {
.mp = mp,
+ .mnt_userns = file_mnt_user_ns(file),
};
int error;
@@ -1275,9 +1280,10 @@ xfs_ioctl_setattr_prepare_dax(
*/
static struct xfs_trans *
xfs_ioctl_setattr_get_trans(
- struct xfs_inode *ip,
+ struct file *file,
struct xfs_dquot *pdqp)
{
+ struct xfs_inode *ip = XFS_I(file_inode(file));
struct xfs_mount *mp = ip->i_mount;
struct xfs_trans *tp;
int error = -EROFS;
@@ -1299,7 +1305,7 @@ xfs_ioctl_setattr_get_trans(
* The user ID of the calling process must be equal to the file owner
* ID, except in cases where the CAP_FSETID capability is applicable.
*/
- if (!inode_owner_or_capable(VFS_I(ip))) {
+ if (!inode_owner_or_capable(file_mnt_user_ns(file), VFS_I(ip))) {
error = -EPERM;
goto out_cancel;
}
@@ -1427,9 +1433,11 @@ xfs_ioctl_setattr_check_projid(
STATIC int
xfs_ioctl_setattr(
- xfs_inode_t *ip,
+ struct file *file,
struct fsxattr *fa)
{
+ struct user_namespace *mnt_userns = file_mnt_user_ns(file);
+ struct xfs_inode *ip = XFS_I(file_inode(file));
struct fsxattr old_fa;
struct xfs_mount *mp = ip->i_mount;
struct xfs_trans *tp;
@@ -1461,7 +1469,7 @@ xfs_ioctl_setattr(
xfs_ioctl_setattr_prepare_dax(ip, fa);
- tp = xfs_ioctl_setattr_get_trans(ip, pdqp);
+ tp = xfs_ioctl_setattr_get_trans(file, pdqp);
if (IS_ERR(tp)) {
error = PTR_ERR(tp);
goto error_free_dquots;
@@ -1493,7 +1501,7 @@ xfs_ioctl_setattr(
*/
if ((VFS_I(ip)->i_mode & (S_ISUID|S_ISGID)) &&
- !capable_wrt_inode_uidgid(VFS_I(ip), CAP_FSETID))
+ !capable_wrt_inode_uidgid(mnt_userns, VFS_I(ip), CAP_FSETID))
VFS_I(ip)->i_mode &= ~(S_ISUID|S_ISGID);
/* Change the ownerships and register project quota modifications */
@@ -1540,7 +1548,6 @@ error_free_dquots:
STATIC int
xfs_ioc_fssetxattr(
- xfs_inode_t *ip,
struct file *filp,
void __user *arg)
{
@@ -1553,7 +1560,7 @@ xfs_ioc_fssetxattr(
error = mnt_want_write_file(filp);
if (error)
return error;
- error = xfs_ioctl_setattr(ip, &fa);
+ error = xfs_ioctl_setattr(filp, &fa);
mnt_drop_write_file(filp);
return error;
}
@@ -1599,7 +1606,7 @@ xfs_ioc_setxflags(
xfs_ioctl_setattr_prepare_dax(ip, &fa);
- tp = xfs_ioctl_setattr_get_trans(ip, NULL);
+ tp = xfs_ioctl_setattr_get_trans(filp, NULL);
if (IS_ERR(tp)) {
error = PTR_ERR(tp);
goto out_drop_write;
@@ -2110,10 +2117,10 @@ xfs_file_ioctl(
case XFS_IOC_FSBULKSTAT_SINGLE:
case XFS_IOC_FSBULKSTAT:
case XFS_IOC_FSINUMBERS:
- return xfs_ioc_fsbulkstat(mp, cmd, arg);
+ return xfs_ioc_fsbulkstat(filp, cmd, arg);
case XFS_IOC_BULKSTAT:
- return xfs_ioc_bulkstat(mp, cmd, arg);
+ return xfs_ioc_bulkstat(filp, cmd, arg);
case XFS_IOC_INUMBERS:
return xfs_ioc_inumbers(mp, cmd, arg);
@@ -2135,7 +2142,7 @@ xfs_file_ioctl(
case XFS_IOC_FSGETXATTRA:
return xfs_ioc_fsgetxattr(ip, 1, arg);
case XFS_IOC_FSSETXATTR:
- return xfs_ioc_fssetxattr(ip, filp, arg);
+ return xfs_ioc_fssetxattr(filp, arg);
case XFS_IOC_GETXFLAGS:
return xfs_ioc_getxflags(ip, arg);
case XFS_IOC_SETXFLAGS:
diff --git a/fs/xfs/xfs_ioctl32.c b/fs/xfs/xfs_ioctl32.c
index c1771e728117..33c09ec8e6c0 100644
--- a/fs/xfs/xfs_ioctl32.c
+++ b/fs/xfs/xfs_ioctl32.c
@@ -209,14 +209,16 @@ xfs_fsbulkstat_one_fmt_compat(
/* copied from xfs_ioctl.c */
STATIC int
xfs_compat_ioc_fsbulkstat(
- xfs_mount_t *mp,
+ struct file *file,
unsigned int cmd,
struct compat_xfs_fsop_bulkreq __user *p32)
{
+ struct xfs_mount *mp = XFS_I(file_inode(file))->i_mount;
u32 addr;
struct xfs_fsop_bulkreq bulkreq;
struct xfs_ibulk breq = {
.mp = mp,
+ .mnt_userns = file_mnt_user_ns(file),
.ocount = 0,
};
xfs_ino_t lastino;
@@ -436,7 +438,6 @@ xfs_file_compat_ioctl(
{
struct inode *inode = file_inode(filp);
struct xfs_inode *ip = XFS_I(inode);
- struct xfs_mount *mp = ip->i_mount;
void __user *arg = compat_ptr(p);
int error;
@@ -456,7 +457,7 @@ xfs_file_compat_ioctl(
return xfs_ioc_space(filp, &bf);
}
case XFS_IOC_FSGEOMETRY_V1_32:
- return xfs_compat_ioc_fsgeometry_v1(mp, arg);
+ return xfs_compat_ioc_fsgeometry_v1(ip->i_mount, arg);
case XFS_IOC_FSGROWFSDATA_32: {
struct xfs_growfs_data in;
@@ -465,7 +466,7 @@ xfs_file_compat_ioctl(
error = mnt_want_write_file(filp);
if (error)
return error;
- error = xfs_growfs_data(mp, &in);
+ error = xfs_growfs_data(ip->i_mount, &in);
mnt_drop_write_file(filp);
return error;
}
@@ -477,7 +478,7 @@ xfs_file_compat_ioctl(
error = mnt_want_write_file(filp);
if (error)
return error;
- error = xfs_growfs_rt(mp, &in);
+ error = xfs_growfs_rt(ip->i_mount, &in);
mnt_drop_write_file(filp);
return error;
}
@@ -507,7 +508,7 @@ xfs_file_compat_ioctl(
case XFS_IOC_FSBULKSTAT_32:
case XFS_IOC_FSBULKSTAT_SINGLE_32:
case XFS_IOC_FSINUMBERS_32:
- return xfs_compat_ioc_fsbulkstat(mp, cmd, arg);
+ return xfs_compat_ioc_fsbulkstat(filp, cmd, arg);
case XFS_IOC_FD_TO_HANDLE_32:
case XFS_IOC_PATH_TO_HANDLE_32:
case XFS_IOC_PATH_TO_FSHANDLE_32: {
diff --git a/fs/xfs/xfs_iops.c b/fs/xfs/xfs_iops.c
index 00369502fe25..66ebccb5a6ff 100644
--- a/fs/xfs/xfs_iops.c
+++ b/fs/xfs/xfs_iops.c
@@ -128,6 +128,7 @@ xfs_cleanup_inode(
STATIC int
xfs_generic_create(
+ struct user_namespace *mnt_userns,
struct inode *dir,
struct dentry *dentry,
umode_t mode,
@@ -161,9 +162,10 @@ xfs_generic_create(
goto out_free_acl;
if (!tmpfile) {
- error = xfs_create(XFS_I(dir), &name, mode, rdev, &ip);
+ error = xfs_create(mnt_userns, XFS_I(dir), &name, mode, rdev,
+ &ip);
} else {
- error = xfs_create_tmpfile(XFS_I(dir), mode, &ip);
+ error = xfs_create_tmpfile(mnt_userns, XFS_I(dir), mode, &ip);
}
if (unlikely(error))
goto out_free_acl;
@@ -220,31 +222,35 @@ xfs_generic_create(
STATIC int
xfs_vn_mknod(
- struct inode *dir,
- struct dentry *dentry,
- umode_t mode,
- dev_t rdev)
+ struct user_namespace *mnt_userns,
+ struct inode *dir,
+ struct dentry *dentry,
+ umode_t mode,
+ dev_t rdev)
{
- return xfs_generic_create(dir, dentry, mode, rdev, false);
+ return xfs_generic_create(mnt_userns, dir, dentry, mode, rdev, false);
}
STATIC int
xfs_vn_create(
- struct inode *dir,
- struct dentry *dentry,
- umode_t mode,
- bool flags)
+ struct user_namespace *mnt_userns,
+ struct inode *dir,
+ struct dentry *dentry,
+ umode_t mode,
+ bool flags)
{
- return xfs_generic_create(dir, dentry, mode, 0, false);
+ return xfs_generic_create(mnt_userns, dir, dentry, mode, 0, false);
}
STATIC int
xfs_vn_mkdir(
- struct inode *dir,
- struct dentry *dentry,
- umode_t mode)
+ struct user_namespace *mnt_userns,
+ struct inode *dir,
+ struct dentry *dentry,
+ umode_t mode)
{
- return xfs_generic_create(dir, dentry, mode | S_IFDIR, 0, false);
+ return xfs_generic_create(mnt_userns, dir, dentry, mode | S_IFDIR, 0,
+ false);
}
STATIC struct dentry *
@@ -361,9 +367,10 @@ xfs_vn_unlink(
STATIC int
xfs_vn_symlink(
- struct inode *dir,
- struct dentry *dentry,
- const char *symname)
+ struct user_namespace *mnt_userns,
+ struct inode *dir,
+ struct dentry *dentry,
+ const char *symname)
{
struct inode *inode;
struct xfs_inode *cip = NULL;
@@ -377,7 +384,7 @@ xfs_vn_symlink(
if (unlikely(error))
goto out;
- error = xfs_symlink(XFS_I(dir), &name, symname, mode, &cip);
+ error = xfs_symlink(mnt_userns, XFS_I(dir), &name, symname, mode, &cip);
if (unlikely(error))
goto out;
@@ -403,11 +410,12 @@ xfs_vn_symlink(
STATIC int
xfs_vn_rename(
- struct inode *odir,
- struct dentry *odentry,
- struct inode *ndir,
- struct dentry *ndentry,
- unsigned int flags)
+ struct user_namespace *mnt_userns,
+ struct inode *odir,
+ struct dentry *odentry,
+ struct inode *ndir,
+ struct dentry *ndentry,
+ unsigned int flags)
{
struct inode *new_inode = d_inode(ndentry);
int omode = 0;
@@ -431,8 +439,8 @@ xfs_vn_rename(
if (unlikely(error))
return error;
- return xfs_rename(XFS_I(odir), &oname, XFS_I(d_inode(odentry)),
- XFS_I(ndir), &nname,
+ return xfs_rename(mnt_userns, XFS_I(odir), &oname,
+ XFS_I(d_inode(odentry)), XFS_I(ndir), &nname,
new_inode ? XFS_I(new_inode) : NULL, flags);
}
@@ -529,6 +537,7 @@ xfs_stat_blksize(
STATIC int
xfs_vn_getattr(
+ struct user_namespace *mnt_userns,
const struct path *path,
struct kstat *stat,
u32 request_mask,
@@ -547,8 +556,8 @@ xfs_vn_getattr(
stat->dev = inode->i_sb->s_dev;
stat->mode = inode->i_mode;
stat->nlink = inode->i_nlink;
- stat->uid = inode->i_uid;
- stat->gid = inode->i_gid;
+ stat->uid = i_uid_into_mnt(mnt_userns, inode);
+ stat->gid = i_gid_into_mnt(mnt_userns, inode);
stat->ino = ip->i_ino;
stat->atime = inode->i_atime;
stat->mtime = inode->i_mtime;
@@ -626,8 +635,9 @@ xfs_setattr_time(
static int
xfs_vn_change_ok(
- struct dentry *dentry,
- struct iattr *iattr)
+ struct user_namespace *mnt_userns,
+ struct dentry *dentry,
+ struct iattr *iattr)
{
struct xfs_mount *mp = XFS_I(d_inode(dentry))->i_mount;
@@ -637,7 +647,7 @@ xfs_vn_change_ok(
if (XFS_FORCED_SHUTDOWN(mp))
return -EIO;
- return setattr_prepare(dentry, iattr);
+ return setattr_prepare(mnt_userns, dentry, iattr);
}
/*
@@ -648,6 +658,7 @@ xfs_vn_change_ok(
*/
static int
xfs_setattr_nonsize(
+ struct user_namespace *mnt_userns,
struct xfs_inode *ip,
struct iattr *iattr)
{
@@ -788,7 +799,7 @@ xfs_setattr_nonsize(
* Posix ACL code seems to care about this issue either.
*/
if (mask & ATTR_MODE) {
- error = posix_acl_chmod(inode, inode->i_mode);
+ error = posix_acl_chmod(mnt_userns, inode, inode->i_mode);
if (error)
return error;
}
@@ -809,6 +820,7 @@ out_dqrele:
*/
STATIC int
xfs_setattr_size(
+ struct user_namespace *mnt_userns,
struct xfs_inode *ip,
struct iattr *iattr)
{
@@ -840,7 +852,7 @@ xfs_setattr_size(
* Use the regular setattr path to update the timestamps.
*/
iattr->ia_valid &= ~ATTR_SIZE;
- return xfs_setattr_nonsize(ip, iattr);
+ return xfs_setattr_nonsize(mnt_userns, ip, iattr);
}
/*
@@ -1009,6 +1021,7 @@ out_trans_cancel:
int
xfs_vn_setattr_size(
+ struct user_namespace *mnt_userns,
struct dentry *dentry,
struct iattr *iattr)
{
@@ -1017,14 +1030,15 @@ xfs_vn_setattr_size(
trace_xfs_setattr(ip);
- error = xfs_vn_change_ok(dentry, iattr);
+ error = xfs_vn_change_ok(mnt_userns, dentry, iattr);
if (error)
return error;
- return xfs_setattr_size(ip, iattr);
+ return xfs_setattr_size(mnt_userns, ip, iattr);
}
STATIC int
xfs_vn_setattr(
+ struct user_namespace *mnt_userns,
struct dentry *dentry,
struct iattr *iattr)
{
@@ -1044,14 +1058,14 @@ xfs_vn_setattr(
return error;
}
- error = xfs_vn_setattr_size(dentry, iattr);
+ error = xfs_vn_setattr_size(mnt_userns, dentry, iattr);
xfs_iunlock(ip, XFS_MMAPLOCK_EXCL);
} else {
trace_xfs_setattr(ip);
- error = xfs_vn_change_ok(dentry, iattr);
+ error = xfs_vn_change_ok(mnt_userns, dentry, iattr);
if (!error)
- error = xfs_setattr_nonsize(ip, iattr);
+ error = xfs_setattr_nonsize(mnt_userns, ip, iattr);
}
return error;
@@ -1122,11 +1136,12 @@ xfs_vn_fiemap(
STATIC int
xfs_vn_tmpfile(
- struct inode *dir,
- struct dentry *dentry,
- umode_t mode)
+ struct user_namespace *mnt_userns,
+ struct inode *dir,
+ struct dentry *dentry,
+ umode_t mode)
{
- return xfs_generic_create(dir, dentry, mode, 0, true);
+ return xfs_generic_create(mnt_userns, dir, dentry, mode, 0, true);
}
static const struct inode_operations xfs_inode_operations = {
diff --git a/fs/xfs/xfs_iops.h b/fs/xfs/xfs_iops.h
index 99ca745c1071..278949056048 100644
--- a/fs/xfs/xfs_iops.h
+++ b/fs/xfs/xfs_iops.h
@@ -14,6 +14,7 @@ extern const struct file_operations xfs_dir_file_operations;
extern ssize_t xfs_vn_listxattr(struct dentry *, char *data, size_t size);
extern void xfs_setattr_time(struct xfs_inode *ip, struct iattr *iattr);
-extern int xfs_vn_setattr_size(struct dentry *dentry, struct iattr *vap);
+int xfs_vn_setattr_size(struct user_namespace *mnt_userns,
+ struct dentry *dentry, struct iattr *vap);
#endif /* __XFS_IOPS_H__ */
diff --git a/fs/xfs/xfs_itable.c b/fs/xfs/xfs_itable.c
index 16ca97a7ff00..ca310a125d1e 100644
--- a/fs/xfs/xfs_itable.c
+++ b/fs/xfs/xfs_itable.c
@@ -54,10 +54,12 @@ struct xfs_bstat_chunk {
STATIC int
xfs_bulkstat_one_int(
struct xfs_mount *mp,
+ struct user_namespace *mnt_userns,
struct xfs_trans *tp,
xfs_ino_t ino,
struct xfs_bstat_chunk *bc)
{
+ struct user_namespace *sb_userns = mp->m_super->s_user_ns;
struct xfs_icdinode *dic; /* dinode core info pointer */
struct xfs_inode *ip; /* incore inode pointer */
struct inode *inode;
@@ -86,8 +88,8 @@ xfs_bulkstat_one_int(
*/
buf->bs_projectid = ip->i_d.di_projid;
buf->bs_ino = ino;
- buf->bs_uid = i_uid_read(inode);
- buf->bs_gid = i_gid_read(inode);
+ buf->bs_uid = from_kuid(sb_userns, i_uid_into_mnt(mnt_userns, inode));
+ buf->bs_gid = from_kgid(sb_userns, i_gid_into_mnt(mnt_userns, inode));
buf->bs_size = dic->di_size;
buf->bs_nlink = inode->i_nlink;
@@ -173,7 +175,8 @@ xfs_bulkstat_one(
if (!bc.buf)
return -ENOMEM;
- error = xfs_bulkstat_one_int(breq->mp, NULL, breq->startino, &bc);
+ error = xfs_bulkstat_one_int(breq->mp, breq->mnt_userns, NULL,
+ breq->startino, &bc);
kmem_free(bc.buf);
@@ -194,9 +197,10 @@ xfs_bulkstat_iwalk(
xfs_ino_t ino,
void *data)
{
+ struct xfs_bstat_chunk *bc = data;
int error;
- error = xfs_bulkstat_one_int(mp, tp, ino, data);
+ error = xfs_bulkstat_one_int(mp, bc->breq->mnt_userns, tp, ino, data);
/* bulkstat just skips over missing inodes */
if (error == -ENOENT || error == -EINVAL)
return 0;
@@ -239,6 +243,11 @@ xfs_bulkstat(
};
int error;
+ if (breq->mnt_userns != &init_user_ns) {
+ xfs_warn_ratelimited(breq->mp,
+ "bulkstat not supported inside of idmapped mounts.");
+ return -EINVAL;
+ }
if (xfs_bulkstat_already_done(breq->mp, breq->startino))
return 0;
diff --git a/fs/xfs/xfs_itable.h b/fs/xfs/xfs_itable.h
index 96a1e2a9be3f..7078d10c9b12 100644
--- a/fs/xfs/xfs_itable.h
+++ b/fs/xfs/xfs_itable.h
@@ -8,6 +8,7 @@
/* In-memory representation of a userspace request for batch inode data. */
struct xfs_ibulk {
struct xfs_mount *mp;
+ struct user_namespace *mnt_userns;
void __user *ubuffer; /* user output buffer */
xfs_ino_t startino; /* start with this inode */
unsigned int icount; /* number of elements in ubuffer */
diff --git a/fs/xfs/xfs_qm.c b/fs/xfs/xfs_qm.c
index 742d1413e2d0..bfa4164990b1 100644
--- a/fs/xfs/xfs_qm.c
+++ b/fs/xfs/xfs_qm.c
@@ -787,7 +787,8 @@ xfs_qm_qino_alloc(
return error;
if (need_alloc) {
- error = xfs_dir_ialloc(&tp, NULL, S_IFREG, 1, 0, 0, ipp);
+ error = xfs_dir_ialloc(&init_user_ns, &tp, NULL, S_IFREG, 1, 0,
+ 0, ipp);
if (error) {
xfs_trans_cancel(tp);
return error;
diff --git a/fs/xfs/xfs_super.c b/fs/xfs/xfs_super.c
index 586d42342a79..e5e0713bebcd 100644
--- a/fs/xfs/xfs_super.c
+++ b/fs/xfs/xfs_super.c
@@ -1881,7 +1881,7 @@ static struct file_system_type xfs_fs_type = {
.init_fs_context = xfs_init_fs_context,
.parameters = xfs_fs_parameters,
.kill_sb = kill_block_super,
- .fs_flags = FS_REQUIRES_DEV,
+ .fs_flags = FS_REQUIRES_DEV | FS_ALLOW_IDMAP,
};
MODULE_ALIAS_FS("xfs");
diff --git a/fs/xfs/xfs_symlink.c b/fs/xfs/xfs_symlink.c
index 8565663b16cd..1379013d74b8 100644
--- a/fs/xfs/xfs_symlink.c
+++ b/fs/xfs/xfs_symlink.c
@@ -134,6 +134,7 @@ xfs_readlink(
int
xfs_symlink(
+ struct user_namespace *mnt_userns,
struct xfs_inode *dp,
struct xfs_name *link_name,
const char *target_path,
@@ -221,8 +222,8 @@ xfs_symlink(
/*
* Allocate an inode for the symlink.
*/
- error = xfs_dir_ialloc(&tp, dp, S_IFLNK | (mode & ~S_IFMT), 1, 0,
- prid, &ip);
+ error = xfs_dir_ialloc(mnt_userns, &tp, dp, S_IFLNK | (mode & ~S_IFMT),
+ 1, 0, prid, &ip);
if (error)
goto out_trans_cancel;
diff --git a/fs/xfs/xfs_symlink.h b/fs/xfs/xfs_symlink.h
index b1fa091427e6..2586b7e393f3 100644
--- a/fs/xfs/xfs_symlink.h
+++ b/fs/xfs/xfs_symlink.h
@@ -7,8 +7,9 @@
/* Kernel only symlink definitions */
-int xfs_symlink(struct xfs_inode *dp, struct xfs_name *link_name,
- const char *target_path, umode_t mode, struct xfs_inode **ipp);
+int xfs_symlink(struct user_namespace *mnt_userns, struct xfs_inode *dp,
+ struct xfs_name *link_name, const char *target_path,
+ umode_t mode, struct xfs_inode **ipp);
int xfs_readlink_bmap_ilocked(struct xfs_inode *ip, char *link);
int xfs_readlink(struct xfs_inode *ip, char *link);
int xfs_inactive_symlink(struct xfs_inode *ip);
diff --git a/fs/xfs/xfs_xattr.c b/fs/xfs/xfs_xattr.c
index bca48b308c02..12be32f66dc1 100644
--- a/fs/xfs/xfs_xattr.c
+++ b/fs/xfs/xfs_xattr.c
@@ -38,9 +38,10 @@ xfs_xattr_get(const struct xattr_handler *handler, struct dentry *unused,
}
static int
-xfs_xattr_set(const struct xattr_handler *handler, struct dentry *unused,
- struct inode *inode, const char *name, const void *value,
- size_t size, int flags)
+xfs_xattr_set(const struct xattr_handler *handler,
+ struct user_namespace *mnt_userns, struct dentry *unused,
+ struct inode *inode, const char *name, const void *value,
+ size_t size, int flags)
{
struct xfs_da_args args = {
.dp = XFS_I(inode),
diff --git a/fs/zonefs/Makefile b/fs/zonefs/Makefile
index 75a380aa1ae1..33c1a4f1132e 100644
--- a/fs/zonefs/Makefile
+++ b/fs/zonefs/Makefile
@@ -1,4 +1,6 @@
# SPDX-License-Identifier: GPL-2.0
+ccflags-y += -I$(src)
+
obj-$(CONFIG_ZONEFS_FS) += zonefs.o
zonefs-y := super.o
diff --git a/fs/zonefs/super.c b/fs/zonefs/super.c
index a29653c0196b..b6ff4a21abac 100644
--- a/fs/zonefs/super.c
+++ b/fs/zonefs/super.c
@@ -24,6 +24,9 @@
#include "zonefs.h"
+#define CREATE_TRACE_POINTS
+#include "trace.h"
+
static inline int zonefs_zone_mgmt(struct inode *inode,
enum req_opf op)
{
@@ -32,6 +35,7 @@ static inline int zonefs_zone_mgmt(struct inode *inode,
lockdep_assert_held(&zi->i_truncate_mutex);
+ trace_zonefs_zone_mgmt(inode, op);
ret = blkdev_zone_mgmt(inode->i_sb->s_bdev, op, zi->i_zsector,
zi->i_zone_size >> SECTOR_SHIFT, GFP_NOFS);
if (ret) {
@@ -100,6 +104,8 @@ static int zonefs_iomap_begin(struct inode *inode, loff_t offset, loff_t length,
iomap->bdev = inode->i_sb->s_bdev;
iomap->addr = (zi->i_zsector << SECTOR_SHIFT) + iomap->offset;
+ trace_zonefs_iomap_begin(inode, iomap);
+
return 0;
}
@@ -250,6 +256,9 @@ static loff_t zonefs_check_zone_condition(struct inode *inode,
}
inode->i_mode &= ~0222;
return i_size_read(inode);
+ case BLK_ZONE_COND_FULL:
+ /* The write pointer of full zones is invalid. */
+ return zi->i_max_size;
default:
if (zi->i_ztype == ZONEFS_ZTYPE_CNV)
return zi->i_max_size;
@@ -480,7 +489,8 @@ unlock:
return ret;
}
-static int zonefs_inode_setattr(struct dentry *dentry, struct iattr *iattr)
+static int zonefs_inode_setattr(struct user_namespace *mnt_userns,
+ struct dentry *dentry, struct iattr *iattr)
{
struct inode *inode = d_inode(dentry);
int ret;
@@ -488,7 +498,7 @@ static int zonefs_inode_setattr(struct dentry *dentry, struct iattr *iattr)
if (unlikely(IS_IMMUTABLE(inode)))
return -EPERM;
- ret = setattr_prepare(dentry, iattr);
+ ret = setattr_prepare(&init_user_ns, dentry, iattr);
if (ret)
return ret;
@@ -516,7 +526,7 @@ static int zonefs_inode_setattr(struct dentry *dentry, struct iattr *iattr)
return ret;
}
- setattr_copy(inode, iattr);
+ setattr_copy(&init_user_ns, inode, iattr);
return 0;
}
@@ -703,6 +713,7 @@ static ssize_t zonefs_file_dio_append(struct kiocb *iocb, struct iov_iter *from)
ret = submit_bio_wait(bio);
zonefs_file_write_dio_end_io(iocb, size, ret, 0);
+ trace_zonefs_file_dio_append(inode, size, ret);
out_release:
bio_release_pages(bio, false);
@@ -1223,7 +1234,7 @@ static void zonefs_init_dir_inode(struct inode *parent, struct inode *inode,
struct super_block *sb = parent->i_sb;
inode->i_ino = blkdev_nr_zones(sb->s_bdev->bd_disk) + type + 1;
- inode_init_owner(inode, parent, S_IFDIR | 0555);
+ inode_init_owner(&init_user_ns, inode, parent, S_IFDIR | 0555);
inode->i_op = &zonefs_dir_inode_operations;
inode->i_fop = &simple_dir_operations;
set_nlink(inode, 2);
diff --git a/fs/zonefs/trace.h b/fs/zonefs/trace.h
new file mode 100644
index 000000000000..f369d7d50303
--- /dev/null
+++ b/fs/zonefs/trace.h
@@ -0,0 +1,104 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * zonefs filesystem driver tracepoints.
+ *
+ * Copyright (C) 2021 Western Digital Corporation or its affiliates.
+ */
+
+#undef TRACE_SYSTEM
+#define TRACE_SYSTEM zonefs
+
+#if !defined(_TRACE_ZONEFS_H) || defined(TRACE_HEADER_MULTI_READ)
+#define _TRACE_ZONEFS_H
+
+#include <linux/tracepoint.h>
+#include <linux/trace_seq.h>
+#include <linux/blkdev.h>
+
+#include "zonefs.h"
+
+#define show_dev(dev) MAJOR(dev), MINOR(dev)
+
+TRACE_EVENT(zonefs_zone_mgmt,
+ TP_PROTO(struct inode *inode, enum req_opf op),
+ TP_ARGS(inode, op),
+ TP_STRUCT__entry(
+ __field(dev_t, dev)
+ __field(ino_t, ino)
+ __field(int, op)
+ __field(sector_t, sector)
+ __field(sector_t, nr_sectors)
+ ),
+ TP_fast_assign(
+ __entry->dev = inode->i_sb->s_dev;
+ __entry->ino = inode->i_ino;
+ __entry->op = op;
+ __entry->sector = ZONEFS_I(inode)->i_zsector;
+ __entry->nr_sectors =
+ ZONEFS_I(inode)->i_zone_size >> SECTOR_SHIFT;
+ ),
+ TP_printk("bdev=(%d,%d), ino=%lu op=%s, sector=%llu, nr_sectors=%llu",
+ show_dev(__entry->dev), (unsigned long)__entry->ino,
+ blk_op_str(__entry->op), __entry->sector,
+ __entry->nr_sectors
+ )
+);
+
+TRACE_EVENT(zonefs_file_dio_append,
+ TP_PROTO(struct inode *inode, ssize_t size, ssize_t ret),
+ TP_ARGS(inode, size, ret),
+ TP_STRUCT__entry(
+ __field(dev_t, dev)
+ __field(ino_t, ino)
+ __field(sector_t, sector)
+ __field(ssize_t, size)
+ __field(loff_t, wpoffset)
+ __field(ssize_t, ret)
+ ),
+ TP_fast_assign(
+ __entry->dev = inode->i_sb->s_dev;
+ __entry->ino = inode->i_ino;
+ __entry->sector = ZONEFS_I(inode)->i_zsector;
+ __entry->size = size;
+ __entry->wpoffset = ZONEFS_I(inode)->i_wpoffset;
+ __entry->ret = ret;
+ ),
+ TP_printk("bdev=(%d, %d), ino=%lu, sector=%llu, size=%zu, wpoffset=%llu, ret=%zu",
+ show_dev(__entry->dev), (unsigned long)__entry->ino,
+ __entry->sector, __entry->size, __entry->wpoffset,
+ __entry->ret
+ )
+);
+
+TRACE_EVENT(zonefs_iomap_begin,
+ TP_PROTO(struct inode *inode, struct iomap *iomap),
+ TP_ARGS(inode, iomap),
+ TP_STRUCT__entry(
+ __field(dev_t, dev)
+ __field(ino_t, ino)
+ __field(u64, addr)
+ __field(loff_t, offset)
+ __field(u64, length)
+ ),
+ TP_fast_assign(
+ __entry->dev = inode->i_sb->s_dev;
+ __entry->ino = inode->i_ino;
+ __entry->addr = iomap->addr;
+ __entry->offset = iomap->offset;
+ __entry->length = iomap->length;
+ ),
+ TP_printk("bdev=(%d,%d), ino=%lu, addr=%llu, offset=%llu, length=%llu",
+ show_dev(__entry->dev), (unsigned long)__entry->ino,
+ __entry->addr, __entry->offset, __entry->length
+ )
+);
+
+#endif /* _TRACE_ZONEFS_H */
+
+#undef TRACE_INCLUDE_PATH
+#define TRACE_INCLUDE_PATH .
+#undef TRACE_INCLUDE_FILE
+#define TRACE_INCLUDE_FILE trace
+
+/* This part must be outside protection */
+#include <trace/define_trace.h>