diff options
-rw-r--r-- | Documentation/filesystems/idmappings.rst | 72 | ||||
-rw-r--r-- | fs/cachefiles/bind.c | 2 | ||||
-rw-r--r-- | fs/ecryptfs/main.c | 2 | ||||
-rw-r--r-- | fs/ksmbd/smbacl.c | 19 | ||||
-rw-r--r-- | fs/ksmbd/smbacl.h | 5 | ||||
-rw-r--r-- | fs/namespace.c | 53 | ||||
-rw-r--r-- | fs/nfsd/export.c | 2 | ||||
-rw-r--r-- | fs/open.c | 8 | ||||
-rw-r--r-- | fs/overlayfs/super.c | 2 | ||||
-rw-r--r-- | fs/posix_acl.c | 17 | ||||
-rw-r--r-- | fs/proc_namespace.c | 2 | ||||
-rw-r--r-- | fs/xfs/xfs_inode.c | 8 | ||||
-rw-r--r-- | fs/xfs/xfs_linux.h | 1 | ||||
-rw-r--r-- | fs/xfs/xfs_symlink.c | 4 | ||||
-rw-r--r-- | include/linux/fs.h | 141 | ||||
-rw-r--r-- | include/linux/mnt_idmapping.h | 234 | ||||
-rw-r--r-- | security/commoncap.c | 15 |
17 files changed, 356 insertions, 231 deletions
diff --git a/Documentation/filesystems/idmappings.rst b/Documentation/filesystems/idmappings.rst index 1229a75ec75d..7a879ec3b6bf 100644 --- a/Documentation/filesystems/idmappings.rst +++ b/Documentation/filesystems/idmappings.rst @@ -952,75 +952,3 @@ The raw userspace id that is put on disk is ``u1000`` so when the user takes their home directory back to their home computer where they are assigned ``u1000`` using the initial idmapping and mount the filesystem with the initial idmapping they will see all those files owned by ``u1000``. - -Shortcircuting --------------- - -Currently, the implementation of idmapped mounts enforces that the filesystem -is mounted with the initial idmapping. The reason is simply that none of the -filesystems that we targeted were mountable with a non-initial idmapping. But -that might change soon enough. As we've seen above, thanks to the properties of -idmappings the translation works for both filesystems mounted with the initial -idmapping and filesystem with non-initial idmappings. - -Based on this current restriction to filesystem mounted with the initial -idmapping two noticeable shortcuts have been taken: - -1. We always stash a reference to the initial user namespace in ``struct - vfsmount``. Idmapped mounts are thus mounts that have a non-initial user - namespace attached to them. - - In order to support idmapped mounts this needs to be changed. Instead of - stashing the initial user namespace the user namespace the filesystem was - mounted with must be stashed. An idmapped mount is then any mount that has - a different user namespace attached then the filesystem was mounted with. - This has no user-visible consequences. - -2. The translation algorithms in ``mapped_fs*id()`` and ``i_*id_into_mnt()`` - are simplified. - - Let's consider ``mapped_fs*id()`` first. This function translates the - caller's kernel id into a kernel id in the filesystem's idmapping via - a mount's idmapping. The full algorithm is:: - - mapped_fsuid(kid): - /* Map the kernel id up into a userspace id in the mount's idmapping. */ - from_kuid(mount-idmapping, kid) = uid - - /* Map the userspace id down into a kernel id in the filesystem's idmapping. */ - make_kuid(filesystem-idmapping, uid) = kuid - - We know that the filesystem is always mounted with the initial idmapping as - we enforce this in ``mount_setattr()``. So this can be shortened to:: - - mapped_fsuid(kid): - /* Map the kernel id up into a userspace id in the mount's idmapping. */ - from_kuid(mount-idmapping, kid) = uid - - /* Map the userspace id down into a kernel id in the filesystem's idmapping. */ - KUIDT_INIT(uid) = kuid - - Similarly, for ``i_*id_into_mnt()`` which translated the filesystem's kernel - id into a mount's kernel id:: - - i_uid_into_mnt(kid): - /* Map the kernel id up into a userspace id in the filesystem's idmapping. */ - from_kuid(filesystem-idmapping, kid) = uid - - /* Map the userspace id down into a kernel id in the mounts's idmapping. */ - make_kuid(mount-idmapping, uid) = kuid - - Again, we know that the filesystem is always mounted with the initial - idmapping as we enforce this in ``mount_setattr()``. So this can be - shortened to:: - - i_uid_into_mnt(kid): - /* Map the kernel id up into a userspace id in the filesystem's idmapping. */ - __kuid_val(kid) = uid - - /* Map the userspace id down into a kernel id in the mounts's idmapping. */ - make_kuid(mount-idmapping, uid) = kuid - -Handling filesystems mounted with non-initial idmappings requires that the -translation functions be converted to their full form. They can still be -shortcircuited on non-idmapped mounts. This has no user-visible consequences. diff --git a/fs/cachefiles/bind.c b/fs/cachefiles/bind.c index d463d89f5db8..146291be6263 100644 --- a/fs/cachefiles/bind.c +++ b/fs/cachefiles/bind.c @@ -117,7 +117,7 @@ static int cachefiles_daemon_add_cache(struct cachefiles_cache *cache) root = path.dentry; ret = -EINVAL; - if (mnt_user_ns(path.mnt) != &init_user_ns) { + if (is_idmapped_mnt(path.mnt)) { pr_warn("File cache on idmapped mounts not supported"); goto error_unsupported; } diff --git a/fs/ecryptfs/main.c b/fs/ecryptfs/main.c index d66bbd2df191..2dd23a82e0de 100644 --- a/fs/ecryptfs/main.c +++ b/fs/ecryptfs/main.c @@ -537,7 +537,7 @@ static struct dentry *ecryptfs_mount(struct file_system_type *fs_type, int flags goto out_free; } - if (mnt_user_ns(path.mnt) != &init_user_ns) { + if (is_idmapped_mnt(path.mnt)) { rc = -EINVAL; printk(KERN_ERR "Mounting on idmapped mounts currently disallowed\n"); goto out_free; diff --git a/fs/ksmbd/smbacl.c b/fs/ksmbd/smbacl.c index bd792db32623..6ecf55ea1fed 100644 --- a/fs/ksmbd/smbacl.c +++ b/fs/ksmbd/smbacl.c @@ -9,6 +9,7 @@ #include <linux/fs.h> #include <linux/slab.h> #include <linux/string.h> +#include <linux/mnt_idmapping.h> #include "smbacl.h" #include "smb_common.h" @@ -274,14 +275,7 @@ static int sid_to_id(struct user_namespace *user_ns, uid_t id; id = le32_to_cpu(psid->sub_auth[psid->num_subauth - 1]); - /* - * Translate raw sid into kuid in the server's user - * namespace. - */ - uid = make_kuid(&init_user_ns, id); - - /* If this is an idmapped mount, apply the idmapping. */ - uid = kuid_from_mnt(user_ns, uid); + uid = mapped_kuid_user(user_ns, &init_user_ns, KUIDT_INIT(id)); if (uid_valid(uid)) { fattr->cf_uid = uid; rc = 0; @@ -291,14 +285,7 @@ static int sid_to_id(struct user_namespace *user_ns, gid_t id; id = le32_to_cpu(psid->sub_auth[psid->num_subauth - 1]); - /* - * Translate raw sid into kgid in the server's user - * namespace. - */ - gid = make_kgid(&init_user_ns, id); - - /* If this is an idmapped mount, apply the idmapping. */ - gid = kgid_from_mnt(user_ns, gid); + gid = mapped_kgid_user(user_ns, &init_user_ns, KGIDT_INIT(id)); if (gid_valid(gid)) { fattr->cf_gid = gid; rc = 0; diff --git a/fs/ksmbd/smbacl.h b/fs/ksmbd/smbacl.h index 73e08cad412b..811af3309429 100644 --- a/fs/ksmbd/smbacl.h +++ b/fs/ksmbd/smbacl.h @@ -11,6 +11,7 @@ #include <linux/fs.h> #include <linux/namei.h> #include <linux/posix_acl.h> +#include <linux/mnt_idmapping.h> #include "mgmt/tree_connect.h" @@ -216,7 +217,7 @@ static inline uid_t posix_acl_uid_translate(struct user_namespace *mnt_userns, kuid_t kuid; /* If this is an idmapped mount, apply the idmapping. */ - kuid = kuid_into_mnt(mnt_userns, pace->e_uid); + kuid = mapped_kuid_fs(mnt_userns, &init_user_ns, pace->e_uid); /* Translate the kuid into a userspace id ksmbd would see. */ return from_kuid(&init_user_ns, kuid); @@ -228,7 +229,7 @@ static inline gid_t posix_acl_gid_translate(struct user_namespace *mnt_userns, kgid_t kgid; /* If this is an idmapped mount, apply the idmapping. */ - kgid = kgid_into_mnt(mnt_userns, pace->e_gid); + kgid = mapped_kgid_fs(mnt_userns, &init_user_ns, pace->e_gid); /* Translate the kgid into a userspace id ksmbd would see. */ return from_kgid(&init_user_ns, kgid); diff --git a/fs/namespace.c b/fs/namespace.c index b696543adab8..dc31ad6b370f 100644 --- a/fs/namespace.c +++ b/fs/namespace.c @@ -31,6 +31,7 @@ #include <uapi/linux/mount.h> #include <linux/fs_context.h> #include <linux/shmem_fs.h> +#include <linux/mnt_idmapping.h> #include "pnode.h" #include "internal.h" @@ -561,7 +562,7 @@ static void free_vfsmnt(struct mount *mnt) struct user_namespace *mnt_userns; mnt_userns = mnt_user_ns(&mnt->mnt); - if (mnt_userns != &init_user_ns) + if (!initial_idmapping(mnt_userns)) put_user_ns(mnt_userns); kfree_const(mnt->mnt_devname); #ifdef CONFIG_SMP @@ -965,6 +966,7 @@ static struct mount *skip_mnt_tree(struct mount *p) struct vfsmount *vfs_create_mount(struct fs_context *fc) { struct mount *mnt; + struct user_namespace *fs_userns; if (!fc->root) return ERR_PTR(-EINVAL); @@ -982,6 +984,10 @@ struct vfsmount *vfs_create_mount(struct fs_context *fc) mnt->mnt_mountpoint = mnt->mnt.mnt_root; mnt->mnt_parent = mnt; + fs_userns = mnt->mnt.mnt_sb->s_user_ns; + if (!initial_idmapping(fs_userns)) + mnt->mnt.mnt_userns = get_user_ns(fs_userns); + lock_mount_hash(); list_add_tail(&mnt->mnt_instance, &mnt->mnt.mnt_sb->s_mounts); unlock_mount_hash(); @@ -1072,7 +1078,7 @@ static struct mount *clone_mnt(struct mount *old, struct dentry *root, atomic_inc(&sb->s_active); mnt->mnt.mnt_userns = mnt_user_ns(&old->mnt); - if (mnt->mnt.mnt_userns != &init_user_ns) + if (!initial_idmapping(mnt->mnt.mnt_userns)) mnt->mnt.mnt_userns = get_user_ns(mnt->mnt.mnt_userns); mnt->mnt.mnt_sb = sb; mnt->mnt.mnt_root = dget(root); @@ -3927,28 +3933,32 @@ static unsigned int recalc_flags(struct mount_kattr *kattr, struct mount *mnt) static int can_idmap_mount(const struct mount_kattr *kattr, struct mount *mnt) { struct vfsmount *m = &mnt->mnt; + struct user_namespace *fs_userns = m->mnt_sb->s_user_ns; if (!kattr->mnt_userns) return 0; /* + * Creating an idmapped mount with the filesystem wide idmapping + * doesn't make sense so block that. We don't allow mushy semantics. + */ + if (kattr->mnt_userns == fs_userns) + return -EINVAL; + + /* * Once a mount has been idmapped we don't allow it to change its * mapping. It makes things simpler and callers can just create * another bind-mount they can idmap if they want to. */ - if (mnt_user_ns(m) != &init_user_ns) + if (is_idmapped_mnt(m)) return -EPERM; /* The underlying filesystem doesn't support idmapped mounts yet. */ if (!(m->mnt_sb->s_type->fs_flags & FS_ALLOW_IDMAP)) return -EINVAL; - /* Don't yet support filesystem mountable in user namespaces. */ - if (m->mnt_sb->s_user_ns != &init_user_ns) - return -EINVAL; - /* We're not controlling the superblock. */ - if (!capable(CAP_SYS_ADMIN)) + if (!ns_capable(fs_userns, CAP_SYS_ADMIN)) return -EPERM; /* Mount has already been visible in the filesystem hierarchy. */ @@ -4002,14 +4012,27 @@ out: static void do_idmap_mount(const struct mount_kattr *kattr, struct mount *mnt) { - struct user_namespace *mnt_userns; + struct user_namespace *mnt_userns, *old_mnt_userns; if (!kattr->mnt_userns) return; + /* + * We're the only ones able to change the mount's idmapping. So + * mnt->mnt.mnt_userns is stable and we can retrieve it directly. + */ + old_mnt_userns = mnt->mnt.mnt_userns; + mnt_userns = get_user_ns(kattr->mnt_userns); /* Pairs with smp_load_acquire() in mnt_user_ns(). */ smp_store_release(&mnt->mnt.mnt_userns, mnt_userns); + + /* + * If this is an idmapped filesystem drop the reference we've taken + * in vfs_create_mount() before. + */ + if (!initial_idmapping(old_mnt_userns)) + put_user_ns(old_mnt_userns); } static void mount_setattr_commit(struct mount_kattr *kattr, @@ -4133,13 +4156,15 @@ static int build_mount_idmapped(const struct mount_attr *attr, size_t usize, } /* - * The init_user_ns is used to indicate that a vfsmount is not idmapped. - * This is simpler than just having to treat NULL as unmapped. Users - * wanting to idmap a mount to init_user_ns can just use a namespace - * with an identity mapping. + * The initial idmapping cannot be used to create an idmapped + * mount. We use the initial idmapping as an indicator of a mount + * that is not idmapped. It can simply be passed into helpers that + * are aware of idmapped mounts as a convenient shortcut. A user + * can just create a dedicated identity mapping to achieve the same + * result. */ mnt_userns = container_of(ns, struct user_namespace, ns); - if (mnt_userns == &init_user_ns) { + if (initial_idmapping(mnt_userns)) { err = -EPERM; goto out_fput; } diff --git a/fs/nfsd/export.c b/fs/nfsd/export.c index 9421dae22737..668c7527b17e 100644 --- a/fs/nfsd/export.c +++ b/fs/nfsd/export.c @@ -427,7 +427,7 @@ static int check_export(struct path *path, int *flags, unsigned char *uuid) return -EINVAL; } - if (mnt_user_ns(path->mnt) != &init_user_ns) { + if (is_idmapped_mnt(path->mnt)) { dprintk("exp_export: export of idmapped mounts not yet supported.\n"); return -EINVAL; } diff --git a/fs/open.c b/fs/open.c index f732fb94600c..9ff2f621b760 100644 --- a/fs/open.c +++ b/fs/open.c @@ -32,6 +32,7 @@ #include <linux/ima.h> #include <linux/dnotify.h> #include <linux/compat.h> +#include <linux/mnt_idmapping.h> #include "internal.h" @@ -640,7 +641,7 @@ SYSCALL_DEFINE2(chmod, const char __user *, filename, umode_t, mode) int chown_common(const struct path *path, uid_t user, gid_t group) { - struct user_namespace *mnt_userns; + struct user_namespace *mnt_userns, *fs_userns; struct inode *inode = path->dentry->d_inode; struct inode *delegated_inode = NULL; int error; @@ -652,8 +653,9 @@ int chown_common(const struct path *path, uid_t user, gid_t group) gid = make_kgid(current_user_ns(), group); mnt_userns = mnt_user_ns(path->mnt); - uid = kuid_from_mnt(mnt_userns, uid); - gid = kgid_from_mnt(mnt_userns, gid); + fs_userns = i_user_ns(inode); + uid = mapped_kuid_user(mnt_userns, fs_userns, uid); + gid = mapped_kgid_user(mnt_userns, fs_userns, gid); retry_deleg: newattrs.ia_valid = ATTR_CTIME; diff --git a/fs/overlayfs/super.c b/fs/overlayfs/super.c index 265181c110ae..7bb0a47cb615 100644 --- a/fs/overlayfs/super.c +++ b/fs/overlayfs/super.c @@ -873,7 +873,7 @@ static int ovl_mount_dir_noesc(const char *name, struct path *path) pr_err("filesystem on '%s' not supported\n", name); goto out_put; } - if (mnt_user_ns(path->mnt) != &init_user_ns) { + if (is_idmapped_mnt(path->mnt)) { pr_err("idmapped layers are currently not supported\n"); goto out_put; } diff --git a/fs/posix_acl.c b/fs/posix_acl.c index 9323a854a60a..80acb6885cf9 100644 --- a/fs/posix_acl.c +++ b/fs/posix_acl.c @@ -23,6 +23,7 @@ #include <linux/export.h> #include <linux/user_namespace.h> #include <linux/namei.h> +#include <linux/mnt_idmapping.h> static struct posix_acl **acl_by_type(struct inode *inode, int type) { @@ -374,7 +375,9 @@ posix_acl_permission(struct user_namespace *mnt_userns, struct inode *inode, goto check_perm; break; case ACL_USER: - uid = kuid_into_mnt(mnt_userns, pa->e_uid); + uid = mapped_kuid_fs(mnt_userns, + i_user_ns(inode), + pa->e_uid); if (uid_eq(uid, current_fsuid())) goto mask; break; @@ -387,7 +390,9 @@ posix_acl_permission(struct user_namespace *mnt_userns, struct inode *inode, } break; case ACL_GROUP: - gid = kgid_into_mnt(mnt_userns, pa->e_gid); + gid = mapped_kgid_fs(mnt_userns, + i_user_ns(inode), + pa->e_gid); if (in_group_p(gid)) { found = 1; if ((pa->e_perm & want) == want) @@ -734,17 +739,17 @@ static void posix_acl_fix_xattr_userns( case ACL_USER: uid = make_kuid(from, le32_to_cpu(entry->e_id)); if (from_user) - uid = kuid_from_mnt(mnt_userns, uid); + uid = mapped_kuid_user(mnt_userns, &init_user_ns, uid); else - uid = kuid_into_mnt(mnt_userns, uid); + uid = mapped_kuid_fs(mnt_userns, &init_user_ns, uid); entry->e_id = cpu_to_le32(from_kuid(to, uid)); break; case ACL_GROUP: gid = make_kgid(from, le32_to_cpu(entry->e_id)); if (from_user) - gid = kgid_from_mnt(mnt_userns, gid); + gid = mapped_kgid_user(mnt_userns, &init_user_ns, gid); else - gid = kgid_into_mnt(mnt_userns, gid); + gid = mapped_kgid_fs(mnt_userns, &init_user_ns, gid); entry->e_id = cpu_to_le32(from_kgid(to, gid)); break; default: diff --git a/fs/proc_namespace.c b/fs/proc_namespace.c index 392ef5162655..49650e54d2f8 100644 --- a/fs/proc_namespace.c +++ b/fs/proc_namespace.c @@ -80,7 +80,7 @@ static void show_mnt_opts(struct seq_file *m, struct vfsmount *mnt) seq_puts(m, fs_infop->str); } - if (mnt_user_ns(mnt) != &init_user_ns) + if (is_idmapped_mnt(mnt)) seq_puts(m, ",idmapped"); } diff --git a/fs/xfs/xfs_inode.c b/fs/xfs/xfs_inode.c index 6771f357ad2c..04bf467b1090 100644 --- a/fs/xfs/xfs_inode.c +++ b/fs/xfs/xfs_inode.c @@ -988,8 +988,8 @@ xfs_create( /* * Make sure that we have allocated dquot(s) on disk. */ - error = xfs_qm_vop_dqalloc(dp, mapped_fsuid(mnt_userns), - mapped_fsgid(mnt_userns), prid, + error = xfs_qm_vop_dqalloc(dp, mapped_fsuid(mnt_userns, &init_user_ns), + mapped_fsgid(mnt_userns, &init_user_ns), prid, XFS_QMOPT_QUOTALL | XFS_QMOPT_INHERIT, &udqp, &gdqp, &pdqp); if (error) @@ -1142,8 +1142,8 @@ xfs_create_tmpfile( /* * Make sure that we have allocated dquot(s) on disk. */ - error = xfs_qm_vop_dqalloc(dp, mapped_fsuid(mnt_userns), - mapped_fsgid(mnt_userns), prid, + error = xfs_qm_vop_dqalloc(dp, mapped_fsuid(mnt_userns, &init_user_ns), + mapped_fsgid(mnt_userns, &init_user_ns), prid, XFS_QMOPT_QUOTALL | XFS_QMOPT_INHERIT, &udqp, &gdqp, &pdqp); if (error) diff --git a/fs/xfs/xfs_linux.h b/fs/xfs/xfs_linux.h index c174262a074e..09a8fba84ff9 100644 --- a/fs/xfs/xfs_linux.h +++ b/fs/xfs/xfs_linux.h @@ -61,6 +61,7 @@ typedef __u32 xfs_nlink_t; #include <linux/ratelimit.h> #include <linux/rhashtable.h> #include <linux/xattr.h> +#include <linux/mnt_idmapping.h> #include <asm/page.h> #include <asm/div64.h> diff --git a/fs/xfs/xfs_symlink.c b/fs/xfs/xfs_symlink.c index fc2c6a404647..a31d2e5d0321 100644 --- a/fs/xfs/xfs_symlink.c +++ b/fs/xfs/xfs_symlink.c @@ -184,8 +184,8 @@ xfs_symlink( /* * Make sure that we have allocated dquot(s) on disk. */ - error = xfs_qm_vop_dqalloc(dp, mapped_fsuid(mnt_userns), - mapped_fsgid(mnt_userns), prid, + error = xfs_qm_vop_dqalloc(dp, mapped_fsuid(mnt_userns, &init_user_ns), + mapped_fsgid(mnt_userns, &init_user_ns), prid, XFS_QMOPT_QUOTALL | XFS_QMOPT_INHERIT, &udqp, &gdqp, &pdqp); if (error) diff --git a/include/linux/fs.h b/include/linux/fs.h index bbf812ce89a8..493b87e3616b 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -41,6 +41,7 @@ #include <linux/stddef.h> #include <linux/mount.h> #include <linux/cred.h> +#include <linux/mnt_idmapping.h> #include <asm/byteorder.h> #include <uapi/linux/fs.h> @@ -1599,6 +1600,11 @@ struct super_block { struct list_head s_inodes_wb; /* writeback inodes */ } __randomize_layout; +static inline struct user_namespace *i_user_ns(const struct inode *inode) +{ + return inode->i_sb->s_user_ns; +} + /* Helper functions so that in most cases filesystems will * not need to deal directly with kuid_t and kgid_t and can * instead deal with the raw numeric values that are stored @@ -1606,50 +1612,22 @@ struct super_block { */ static inline uid_t i_uid_read(const struct inode *inode) { - return from_kuid(inode->i_sb->s_user_ns, inode->i_uid); + return from_kuid(i_user_ns(inode), inode->i_uid); } static inline gid_t i_gid_read(const struct inode *inode) { - return from_kgid(inode->i_sb->s_user_ns, inode->i_gid); + return from_kgid(i_user_ns(inode), inode->i_gid); } static inline void i_uid_write(struct inode *inode, uid_t uid) { - inode->i_uid = make_kuid(inode->i_sb->s_user_ns, uid); + inode->i_uid = make_kuid(i_user_ns(inode), uid); } static inline void i_gid_write(struct inode *inode, gid_t gid) { - inode->i_gid = make_kgid(inode->i_sb->s_user_ns, gid); -} - -/** - * kuid_into_mnt - map a kuid down into a mnt_userns - * @mnt_userns: user namespace of the relevant mount - * @kuid: kuid to be mapped - * - * Return: @kuid mapped according to @mnt_userns. - * If @kuid has no mapping INVALID_UID is returned. - */ -static inline kuid_t kuid_into_mnt(struct user_namespace *mnt_userns, - kuid_t kuid) -{ - return make_kuid(mnt_userns, __kuid_val(kuid)); -} - -/** - * kgid_into_mnt - map a kgid down into a mnt_userns - * @mnt_userns: user namespace of the relevant mount - * @kgid: kgid to be mapped - * - * Return: @kgid mapped according to @mnt_userns. - * If @kgid has no mapping INVALID_GID is returned. - */ -static inline kgid_t kgid_into_mnt(struct user_namespace *mnt_userns, - kgid_t kgid) -{ - return make_kgid(mnt_userns, __kgid_val(kgid)); + inode->i_gid = make_kgid(i_user_ns(inode), gid); } /** @@ -1663,7 +1641,7 @@ static inline kgid_t kgid_into_mnt(struct user_namespace *mnt_userns, static inline kuid_t i_uid_into_mnt(struct user_namespace *mnt_userns, const struct inode *inode) { - return kuid_into_mnt(mnt_userns, inode->i_uid); + return mapped_kuid_fs(mnt_userns, i_user_ns(inode), inode->i_uid); } /** @@ -1677,69 +1655,7 @@ static inline kuid_t i_uid_into_mnt(struct user_namespace *mnt_userns, static inline kgid_t i_gid_into_mnt(struct user_namespace *mnt_userns, const struct inode *inode) { - return kgid_into_mnt(mnt_userns, inode->i_gid); -} - -/** - * kuid_from_mnt - map a kuid up into a mnt_userns - * @mnt_userns: user namespace of the relevant mount - * @kuid: kuid to be mapped - * - * Return: @kuid mapped up according to @mnt_userns. - * If @kuid has no mapping INVALID_UID is returned. - */ -static inline kuid_t kuid_from_mnt(struct user_namespace *mnt_userns, - kuid_t kuid) -{ - return KUIDT_INIT(from_kuid(mnt_userns, kuid)); -} - -/** - * kgid_from_mnt - map a kgid up into a mnt_userns - * @mnt_userns: user namespace of the relevant mount - * @kgid: kgid to be mapped - * - * Return: @kgid mapped up according to @mnt_userns. - * If @kgid has no mapping INVALID_GID is returned. - */ -static inline kgid_t kgid_from_mnt(struct user_namespace *mnt_userns, - kgid_t kgid) -{ - return KGIDT_INIT(from_kgid(mnt_userns, kgid)); -} - -/** - * mapped_fsuid - return caller's fsuid mapped up into a mnt_userns - * @mnt_userns: user namespace of the relevant mount - * - * Use this helper to initialize a new vfs or filesystem object based on - * the caller's fsuid. A common example is initializing the i_uid field of - * a newly allocated inode triggered by a creation event such as mkdir or - * O_CREAT. Other examples include the allocation of quotas for a specific - * user. - * - * Return: the caller's current fsuid mapped up according to @mnt_userns. - */ -static inline kuid_t mapped_fsuid(struct user_namespace *mnt_userns) -{ - return kuid_from_mnt(mnt_userns, current_fsuid()); -} - -/** - * mapped_fsgid - return caller's fsgid mapped up into a mnt_userns - * @mnt_userns: user namespace of the relevant mount - * - * Use this helper to initialize a new vfs or filesystem object based on - * the caller's fsgid. A common example is initializing the i_gid field of - * a newly allocated inode triggered by a creation event such as mkdir or - * O_CREAT. Other examples include the allocation of quotas for a specific - * user. - * - * Return: the caller's current fsgid mapped up according to @mnt_userns. - */ -static inline kgid_t mapped_fsgid(struct user_namespace *mnt_userns) -{ - return kgid_from_mnt(mnt_userns, current_fsgid()); + return mapped_kgid_fs(mnt_userns, i_user_ns(inode), inode->i_gid); } /** @@ -1753,7 +1669,7 @@ static inline kgid_t mapped_fsgid(struct user_namespace *mnt_userns) static inline void inode_fsuid_set(struct inode *inode, struct user_namespace *mnt_userns) { - inode->i_uid = mapped_fsuid(mnt_userns); + inode->i_uid = mapped_fsuid(mnt_userns, i_user_ns(inode)); } /** @@ -1767,7 +1683,7 @@ static inline void inode_fsuid_set(struct inode *inode, static inline void inode_fsgid_set(struct inode *inode, struct user_namespace *mnt_userns) { - inode->i_gid = mapped_fsgid(mnt_userns); + inode->i_gid = mapped_fsgid(mnt_userns, i_user_ns(inode)); } /** @@ -1784,10 +1700,18 @@ static inline void inode_fsgid_set(struct inode *inode, static inline bool fsuidgid_has_mapping(struct super_block *sb, struct user_namespace *mnt_userns) { - struct user_namespace *s_user_ns = sb->s_user_ns; + struct user_namespace *fs_userns = sb->s_user_ns; + kuid_t kuid; + kgid_t kgid; - return kuid_has_mapping(s_user_ns, mapped_fsuid(mnt_userns)) && - kgid_has_mapping(s_user_ns, mapped_fsgid(mnt_userns)); + kuid = mapped_fsuid(mnt_userns, fs_userns); + if (!uid_valid(kuid)) + return false; + kgid = mapped_fsgid(mnt_userns, fs_userns); + if (!gid_valid(kgid)) + return false; + return kuid_has_mapping(fs_userns, kuid) && + kgid_has_mapping(fs_userns, kgid); } extern struct timespec64 current_time(struct inode *inode); @@ -2724,6 +2648,21 @@ static inline struct user_namespace *file_mnt_user_ns(struct file *file) { return mnt_user_ns(file->f_path.mnt); } + +/** + * is_idmapped_mnt - check whether a mount is mapped + * @mnt: the mount to check + * + * If @mnt has an idmapping attached different from the + * filesystem's idmapping then @mnt is mapped. + * + * Return: true if mount is mapped, false if not. + */ +static inline bool is_idmapped_mnt(const struct vfsmount *mnt) +{ + return mnt_user_ns(mnt) != mnt->mnt_sb->s_user_ns; +} + extern long vfs_truncate(const struct path *, loff_t); int do_truncate(struct user_namespace *, struct dentry *, loff_t start, unsigned int time_attrs, struct file *filp); diff --git a/include/linux/mnt_idmapping.h b/include/linux/mnt_idmapping.h new file mode 100644 index 000000000000..ee5a217de2a8 --- /dev/null +++ b/include/linux/mnt_idmapping.h @@ -0,0 +1,234 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _LINUX_MNT_IDMAPPING_H +#define _LINUX_MNT_IDMAPPING_H + +#include <linux/types.h> +#include <linux/uidgid.h> + +struct user_namespace; +/* + * Carries the initial idmapping of 0:0:4294967295 which is an identity + * mapping. This means that {g,u}id 0 is mapped to {g,u}id 0, {g,u}id 1 is + * mapped to {g,u}id 1, [...], {g,u}id 1000 to {g,u}id 1000, [...]. + */ +extern struct user_namespace init_user_ns; + +/** + * initial_idmapping - check whether this is the initial mapping + * @ns: idmapping to check + * + * Check whether this is the initial mapping, mapping 0 to 0, 1 to 1, + * [...], 1000 to 1000 [...]. + * + * Return: true if this is the initial mapping, false if not. + */ +static inline bool initial_idmapping(const struct user_namespace *ns) +{ + return ns == &init_user_ns; +} + +/** + * no_idmapping - check whether we can skip remapping a kuid/gid + * @mnt_userns: the mount's idmapping + * @fs_userns: the filesystem's idmapping + * + * This function can be used to check whether a remapping between two + * idmappings is required. + * An idmapped mount is a mount that has an idmapping attached to it that + * is different from the filsystem's idmapping and the initial idmapping. + * If the initial mapping is used or the idmapping of the mount and the + * filesystem are identical no remapping is required. + * + * Return: true if remapping can be skipped, false if not. + */ +static inline bool no_idmapping(const struct user_namespace *mnt_userns, + const struct user_namespace *fs_userns) +{ + return initial_idmapping(mnt_userns) || mnt_userns == fs_userns; +} + +/** + * mapped_kuid_fs - map a filesystem kuid into a mnt_userns + * @mnt_userns: the mount's idmapping + * @fs_userns: the filesystem's idmapping + * @kuid : kuid to be mapped + * + * Take a @kuid and remap it from @fs_userns into @mnt_userns. Use this + * function when preparing a @kuid to be reported to userspace. + * + * If no_idmapping() determines that this is not an idmapped mount we can + * simply return @kuid unchanged. + * If initial_idmapping() tells us that the filesystem is not mounted with an + * idmapping we know the value of @kuid won't change when calling + * from_kuid() so we can simply retrieve the value via __kuid_val() + * directly. + * + * Return: @kuid mapped according to @mnt_userns. + * If @kuid has no mapping in either @mnt_userns or @fs_userns INVALID_UID is + * returned. + */ +static inline kuid_t mapped_kuid_fs(struct user_namespace *mnt_userns, + struct user_namespace *fs_userns, + kuid_t kuid) +{ + uid_t uid; + + if (no_idmapping(mnt_userns, fs_userns)) + return kuid; + if (initial_idmapping(fs_userns)) + uid = __kuid_val(kuid); + else + uid = from_kuid(fs_userns, kuid); + if (uid == (uid_t)-1) + return INVALID_UID; + return make_kuid(mnt_userns, uid); +} + +/** + * mapped_kgid_fs - map a filesystem kgid into a mnt_userns + * @mnt_userns: the mount's idmapping + * @fs_userns: the filesystem's idmapping + * @kgid : kgid to be mapped + * + * Take a @kgid and remap it from @fs_userns into @mnt_userns. Use this + * function when preparing a @kgid to be reported to userspace. + * + * If no_idmapping() determines that this is not an idmapped mount we can + * simply return @kgid unchanged. + * If initial_idmapping() tells us that the filesystem is not mounted with an + * idmapping we know the value of @kgid won't change when calling + * from_kgid() so we can simply retrieve the value via __kgid_val() + * directly. + * + * Return: @kgid mapped according to @mnt_userns. + * If @kgid has no mapping in either @mnt_userns or @fs_userns INVALID_GID is + * returned. + */ +static inline kgid_t mapped_kgid_fs(struct user_namespace *mnt_userns, + struct user_namespace *fs_userns, + kgid_t kgid) +{ + gid_t gid; + + if (no_idmapping(mnt_userns, fs_userns)) + return kgid; + if (initial_idmapping(fs_userns)) + gid = __kgid_val(kgid); + else + gid = from_kgid(fs_userns, kgid); + if (gid == (gid_t)-1) + return INVALID_GID; + return make_kgid(mnt_userns, gid); +} + +/** + * mapped_kuid_user - map a user kuid into a mnt_userns + * @mnt_userns: the mount's idmapping + * @fs_userns: the filesystem's idmapping + * @kuid : kuid to be mapped + * + * Use the idmapping of @mnt_userns to remap a @kuid into @fs_userns. Use this + * function when preparing a @kuid to be written to disk or inode. + * + * If no_idmapping() determines that this is not an idmapped mount we can + * simply return @kuid unchanged. + * If initial_idmapping() tells us that the filesystem is not mounted with an + * idmapping we know the value of @kuid won't change when calling + * make_kuid() so we can simply retrieve the value via KUIDT_INIT() + * directly. + * + * Return: @kuid mapped according to @mnt_userns. + * If @kuid has no mapping in either @mnt_userns or @fs_userns INVALID_UID is + * returned. + */ +static inline kuid_t mapped_kuid_user(struct user_namespace *mnt_userns, + struct user_namespace *fs_userns, + kuid_t kuid) +{ + uid_t uid; + + if (no_idmapping(mnt_userns, fs_userns)) + return kuid; + uid = from_kuid(mnt_userns, kuid); + if (uid == (uid_t)-1) + return INVALID_UID; + if (initial_idmapping(fs_userns)) + return KUIDT_INIT(uid); + return make_kuid(fs_userns, uid); +} + +/** + * mapped_kgid_user - map a user kgid into a mnt_userns + * @mnt_userns: the mount's idmapping + * @fs_userns: the filesystem's idmapping + * @kgid : kgid to be mapped + * + * Use the idmapping of @mnt_userns to remap a @kgid into @fs_userns. Use this + * function when preparing a @kgid to be written to disk or inode. + * + * If no_idmapping() determines that this is not an idmapped mount we can + * simply return @kgid unchanged. + * If initial_idmapping() tells us that the filesystem is not mounted with an + * idmapping we know the value of @kgid won't change when calling + * make_kgid() so we can simply retrieve the value via KGIDT_INIT() + * directly. + * + * Return: @kgid mapped according to @mnt_userns. + * If @kgid has no mapping in either @mnt_userns or @fs_userns INVALID_GID is + * returned. + */ +static inline kgid_t mapped_kgid_user(struct user_namespace *mnt_userns, + struct user_namespace *fs_userns, + kgid_t kgid) +{ + gid_t gid; + + if (no_idmapping(mnt_userns, fs_userns)) + return kgid; + gid = from_kgid(mnt_userns, kgid); + if (gid == (gid_t)-1) + return INVALID_GID; + if (initial_idmapping(fs_userns)) + return KGIDT_INIT(gid); + return make_kgid(fs_userns, gid); +} + +/** + * mapped_fsuid - return caller's fsuid mapped up into a mnt_userns + * @mnt_userns: the mount's idmapping + * @fs_userns: the filesystem's idmapping + * + * Use this helper to initialize a new vfs or filesystem object based on + * the caller's fsuid. A common example is initializing the i_uid field of + * a newly allocated inode triggered by a creation event such as mkdir or + * O_CREAT. Other examples include the allocation of quotas for a specific + * user. + * + * Return: the caller's current fsuid mapped up according to @mnt_userns. + */ +static inline kuid_t mapped_fsuid(struct user_namespace *mnt_userns, + struct user_namespace *fs_userns) +{ + return mapped_kuid_user(mnt_userns, fs_userns, current_fsuid()); +} + +/** + * mapped_fsgid - return caller's fsgid mapped up into a mnt_userns + * @mnt_userns: the mount's idmapping + * @fs_userns: the filesystem's idmapping + * + * Use this helper to initialize a new vfs or filesystem object based on + * the caller's fsgid. A common example is initializing the i_gid field of + * a newly allocated inode triggered by a creation event such as mkdir or + * O_CREAT. Other examples include the allocation of quotas for a specific + * user. + * + * Return: the caller's current fsgid mapped up according to @mnt_userns. + */ +static inline kgid_t mapped_fsgid(struct user_namespace *mnt_userns, + struct user_namespace *fs_userns) +{ + return mapped_kgid_user(mnt_userns, fs_userns, current_fsgid()); +} + +#endif /* _LINUX_MNT_IDMAPPING_H */ diff --git a/security/commoncap.c b/security/commoncap.c index 3f810d37b71b..5fc8986c3c77 100644 --- a/security/commoncap.c +++ b/security/commoncap.c @@ -24,6 +24,7 @@ #include <linux/user_namespace.h> #include <linux/binfmts.h> #include <linux/personality.h> +#include <linux/mnt_idmapping.h> /* * If a non-root user executes a setuid-root binary in @@ -418,7 +419,7 @@ int cap_inode_getsecurity(struct user_namespace *mnt_userns, kroot = make_kuid(fs_ns, root); /* If this is an idmapped mount shift the kuid. */ - kroot = kuid_into_mnt(mnt_userns, kroot); + kroot = mapped_kuid_fs(mnt_userns, fs_ns, kroot); /* If the root kuid maps to a valid uid in current ns, then return * this as a nscap. */ @@ -488,6 +489,7 @@ out_free: * @size: size of @ivalue * @task_ns: user namespace of the caller * @mnt_userns: user namespace of the mount the inode was found from + * @fs_userns: user namespace of the filesystem * * If the inode has been found through an idmapped mount the user namespace of * the vfsmount must be passed through @mnt_userns. This function will then @@ -497,7 +499,8 @@ out_free: */ static kuid_t rootid_from_xattr(const void *value, size_t size, struct user_namespace *task_ns, - struct user_namespace *mnt_userns) + struct user_namespace *mnt_userns, + struct user_namespace *fs_userns) { const struct vfs_ns_cap_data *nscap = value; kuid_t rootkid; @@ -507,7 +510,7 @@ static kuid_t rootid_from_xattr(const void *value, size_t size, rootid = le32_to_cpu(nscap->rootid); rootkid = make_kuid(task_ns, rootid); - return kuid_from_mnt(mnt_userns, rootkid); + return mapped_kuid_user(mnt_userns, fs_userns, rootkid); } static bool validheader(size_t size, const struct vfs_cap_data *cap) @@ -553,12 +556,12 @@ int cap_convert_nscap(struct user_namespace *mnt_userns, struct dentry *dentry, return -EINVAL; if (!capable_wrt_inode_uidgid(mnt_userns, inode, CAP_SETFCAP)) return -EPERM; - if (size == XATTR_CAPS_SZ_2 && (mnt_userns == &init_user_ns)) + if (size == XATTR_CAPS_SZ_2 && (mnt_userns == fs_ns)) if (ns_capable(inode->i_sb->s_user_ns, CAP_SETFCAP)) /* user is privileged, just write the v2 */ return size; - rootid = rootid_from_xattr(*ivalue, size, task_ns, mnt_userns); + rootid = rootid_from_xattr(*ivalue, size, task_ns, mnt_userns, fs_ns); if (!uid_valid(rootid)) return -EINVAL; @@ -699,7 +702,7 @@ int get_vfs_caps_from_disk(struct user_namespace *mnt_userns, /* Limit the caps to the mounter of the filesystem * or the more limited uid specified in the xattr. */ - rootkuid = kuid_into_mnt(mnt_userns, rootkuid); + rootkuid = mapped_kuid_fs(mnt_userns, fs_ns, rootkuid); if (!rootid_owns_currentns(rootkuid)) return -ENODATA; |