diff options
Diffstat (limited to 'fs')
-rw-r--r-- | fs/aio.c | 8 | ||||
-rw-r--r-- | fs/char_dev.c | 3 | ||||
-rw-r--r-- | fs/fs_context.c | 30 | ||||
-rw-r--r-- | fs/internal.h | 1 | ||||
-rw-r--r-- | fs/mount.h | 12 | ||||
-rw-r--r-- | fs/namei.c | 21 | ||||
-rw-r--r-- | fs/namespace.c | 91 | ||||
-rw-r--r-- | fs/open.c | 58 | ||||
-rw-r--r-- | fs/proc_namespace.c | 4 | ||||
-rw-r--r-- | fs/stat.c | 11 | ||||
-rw-r--r-- | fs/utimes.c | 6 |
11 files changed, 159 insertions, 86 deletions
@@ -176,6 +176,7 @@ struct fsync_iocb { struct file *file; struct work_struct work; bool datasync; + struct cred *creds; }; struct poll_iocb { @@ -1589,8 +1590,11 @@ static int aio_write(struct kiocb *req, const struct iocb *iocb, static void aio_fsync_work(struct work_struct *work) { struct aio_kiocb *iocb = container_of(work, struct aio_kiocb, fsync.work); + const struct cred *old_cred = override_creds(iocb->fsync.creds); iocb->ki_res.res = vfs_fsync(iocb->fsync.file, iocb->fsync.datasync); + revert_creds(old_cred); + put_cred(iocb->fsync.creds); iocb_put(iocb); } @@ -1604,6 +1608,10 @@ static int aio_fsync(struct fsync_iocb *req, const struct iocb *iocb, if (unlikely(!req->file->f_op->fsync)) return -EINVAL; + req->creds = prepare_creds(); + if (!req->creds) + return -ENOMEM; + req->datasync = datasync; INIT_WORK(&req->work, aio_fsync_work); schedule_work(&req->work); diff --git a/fs/char_dev.c b/fs/char_dev.c index c5e6eff5a381..ba0ded7842a7 100644 --- a/fs/char_dev.c +++ b/fs/char_dev.c @@ -483,6 +483,9 @@ int cdev_add(struct cdev *p, dev_t dev, unsigned count) p->dev = dev; p->count = count; + if (WARN_ON(dev == WHITEOUT_DEV)) + return -EBUSY; + error = kobj_map(cdev_map, dev, count, NULL, exact_match, exact_lock, p); if (error) diff --git a/fs/fs_context.c b/fs/fs_context.c index fc9f6ef93b55..7d5c5dd2b1d5 100644 --- a/fs/fs_context.c +++ b/fs/fs_context.c @@ -42,7 +42,6 @@ static const struct constant_table common_set_sb_flag[] = { { "dirsync", SB_DIRSYNC }, { "lazytime", SB_LAZYTIME }, { "mand", SB_MANDLOCK }, - { "posixacl", SB_POSIXACL }, { "ro", SB_RDONLY }, { "sync", SB_SYNCHRONOUS }, { }, @@ -53,44 +52,15 @@ static const struct constant_table common_clear_sb_flag[] = { { "nolazytime", SB_LAZYTIME }, { "nomand", SB_MANDLOCK }, { "rw", SB_RDONLY }, - { "silent", SB_SILENT }, { }, }; -static const char *const forbidden_sb_flag[] = { - "bind", - "dev", - "exec", - "move", - "noatime", - "nodev", - "nodiratime", - "noexec", - "norelatime", - "nostrictatime", - "nosuid", - "private", - "rec", - "relatime", - "remount", - "shared", - "slave", - "strictatime", - "suid", - "unbindable", -}; - /* * Check for a common mount option that manipulates s_flags. */ static int vfs_parse_sb_flag(struct fs_context *fc, const char *key) { unsigned int token; - unsigned int i; - - for (i = 0; i < ARRAY_SIZE(forbidden_sb_flag); i++) - if (strcmp(key, forbidden_sb_flag[i]) == 0) - return -EINVAL; token = lookup_constant(common_set_sb_flag, key, 0); if (token) { diff --git a/fs/internal.h b/fs/internal.h index aa5d45524e87..0d467e32dd7e 100644 --- a/fs/internal.h +++ b/fs/internal.h @@ -126,7 +126,6 @@ extern struct open_how build_open_how(int flags, umode_t mode); extern int build_open_flags(const struct open_how *how, struct open_flags *op); long do_sys_ftruncate(unsigned int fd, loff_t length, int small); -long do_faccessat(int dfd, const char __user *filename, int mode); int do_fchmodat(int dfd, const char __user *filename, umode_t mode); int do_fchownat(int dfd, const char __user *filename, uid_t user, gid_t group, int flag); diff --git a/fs/mount.h b/fs/mount.h index 711a4093e475..c7abb7b394d8 100644 --- a/fs/mount.h +++ b/fs/mount.h @@ -9,7 +9,13 @@ struct mnt_namespace { atomic_t count; struct ns_common ns; struct mount * root; + /* + * Traversal and modification of .list is protected by either + * - taking namespace_sem for write, OR + * - taking namespace_sem for read AND taking .ns_lock. + */ struct list_head list; + spinlock_t ns_lock; struct user_namespace *user_ns; struct ucounts *ucounts; u64 seq; /* Sequence number to prevent loops */ @@ -133,9 +139,7 @@ struct proc_mounts { struct mnt_namespace *ns; struct path root; int (*show)(struct seq_file *, struct vfsmount *); - void *cached_mount; - u64 cached_event; - loff_t cached_index; + struct mount cursor; }; extern const struct seq_operations mounts_op; @@ -153,3 +157,5 @@ static inline bool is_anon_ns(struct mnt_namespace *ns) { return ns->seq == 0; } + +extern void mnt_cursor_del(struct mnt_namespace *ns, struct mount *cursor); diff --git a/fs/namei.c b/fs/namei.c index a320371899cf..d81f73ff1a8b 100644 --- a/fs/namei.c +++ b/fs/namei.c @@ -3505,12 +3505,14 @@ EXPORT_SYMBOL(user_path_create); int vfs_mknod(struct inode *dir, struct dentry *dentry, umode_t mode, dev_t dev) { + bool is_whiteout = S_ISCHR(mode) && dev == WHITEOUT_DEV; int error = may_create(dir, dentry); if (error) return error; - if ((S_ISCHR(mode) || S_ISBLK(mode)) && !capable(CAP_MKNOD)) + if ((S_ISCHR(mode) || S_ISBLK(mode)) && !is_whiteout && + !capable(CAP_MKNOD)) return -EPERM; if (!dir->i_op->mknod) @@ -4345,9 +4347,6 @@ static int do_renameat2(int olddfd, const char __user *oldname, int newdfd, (flags & RENAME_EXCHANGE)) return -EINVAL; - if ((flags & RENAME_WHITEOUT) && !capable(CAP_MKNOD)) - return -EPERM; - if (flags & RENAME_EXCHANGE) target_flags = 0; @@ -4483,20 +4482,6 @@ SYSCALL_DEFINE2(rename, const char __user *, oldname, const char __user *, newna return do_renameat2(AT_FDCWD, oldname, AT_FDCWD, newname, 0); } -int vfs_whiteout(struct inode *dir, struct dentry *dentry) -{ - int error = may_create(dir, dentry); - if (error) - return error; - - if (!dir->i_op->mknod) - return -EPERM; - - return dir->i_op->mknod(dir, dentry, - S_IFCHR | WHITEOUT_MODE, WHITEOUT_DEV); -} -EXPORT_SYMBOL(vfs_whiteout); - int readlink_copy(char __user *buffer, int buflen, const char *link) { int len = PTR_ERR(link); diff --git a/fs/namespace.c b/fs/namespace.c index 5f036dc711b6..a6baee3c7904 100644 --- a/fs/namespace.c +++ b/fs/namespace.c @@ -648,6 +648,21 @@ struct vfsmount *lookup_mnt(const struct path *path) return m; } +static inline void lock_ns_list(struct mnt_namespace *ns) +{ + spin_lock(&ns->ns_lock); +} + +static inline void unlock_ns_list(struct mnt_namespace *ns) +{ + spin_unlock(&ns->ns_lock); +} + +static inline bool mnt_is_cursor(struct mount *mnt) +{ + return mnt->mnt.mnt_flags & MNT_CURSOR; +} + /* * __is_local_mountpoint - Test to see if dentry is a mountpoint in the * current mount namespace. @@ -673,11 +688,15 @@ bool __is_local_mountpoint(struct dentry *dentry) goto out; down_read(&namespace_sem); + lock_ns_list(ns); list_for_each_entry(mnt, &ns->list, mnt_list) { + if (mnt_is_cursor(mnt)) + continue; is_covered = (mnt->mnt_mountpoint == dentry); if (is_covered) break; } + unlock_ns_list(ns); up_read(&namespace_sem); out: return is_covered; @@ -1245,46 +1264,71 @@ struct vfsmount *mnt_clone_internal(const struct path *path) } #ifdef CONFIG_PROC_FS +static struct mount *mnt_list_next(struct mnt_namespace *ns, + struct list_head *p) +{ + struct mount *mnt, *ret = NULL; + + lock_ns_list(ns); + list_for_each_continue(p, &ns->list) { + mnt = list_entry(p, typeof(*mnt), mnt_list); + if (!mnt_is_cursor(mnt)) { + ret = mnt; + break; + } + } + unlock_ns_list(ns); + + return ret; +} + /* iterator; we want it to have access to namespace_sem, thus here... */ static void *m_start(struct seq_file *m, loff_t *pos) { struct proc_mounts *p = m->private; + struct list_head *prev; down_read(&namespace_sem); - if (p->cached_event == p->ns->event) { - void *v = p->cached_mount; - if (*pos == p->cached_index) - return v; - if (*pos == p->cached_index + 1) { - v = seq_list_next(v, &p->ns->list, &p->cached_index); - return p->cached_mount = v; - } + if (!*pos) { + prev = &p->ns->list; + } else { + prev = &p->cursor.mnt_list; + + /* Read after we'd reached the end? */ + if (list_empty(prev)) + return NULL; } - p->cached_event = p->ns->event; - p->cached_mount = seq_list_start(&p->ns->list, *pos); - p->cached_index = *pos; - return p->cached_mount; + return mnt_list_next(p->ns, prev); } static void *m_next(struct seq_file *m, void *v, loff_t *pos) { struct proc_mounts *p = m->private; + struct mount *mnt = v; - p->cached_mount = seq_list_next(v, &p->ns->list, pos); - p->cached_index = *pos; - return p->cached_mount; + ++*pos; + return mnt_list_next(p->ns, &mnt->mnt_list); } static void m_stop(struct seq_file *m, void *v) { + struct proc_mounts *p = m->private; + struct mount *mnt = v; + + lock_ns_list(p->ns); + if (mnt) + list_move_tail(&p->cursor.mnt_list, &mnt->mnt_list); + else + list_del_init(&p->cursor.mnt_list); + unlock_ns_list(p->ns); up_read(&namespace_sem); } static int m_show(struct seq_file *m, void *v) { struct proc_mounts *p = m->private; - struct mount *r = list_entry(v, struct mount, mnt_list); + struct mount *r = v; return p->show(m, &r->mnt); } @@ -1294,6 +1338,15 @@ const struct seq_operations mounts_op = { .stop = m_stop, .show = m_show, }; + +void mnt_cursor_del(struct mnt_namespace *ns, struct mount *cursor) +{ + down_read(&namespace_sem); + lock_ns_list(ns); + list_del(&cursor->mnt_list); + unlock_ns_list(ns); + up_read(&namespace_sem); +} #endif /* CONFIG_PROC_FS */ /** @@ -3202,6 +3255,7 @@ static struct mnt_namespace *alloc_mnt_ns(struct user_namespace *user_ns, bool a atomic_set(&new_ns->count, 1); INIT_LIST_HEAD(&new_ns->list); init_waitqueue_head(&new_ns->poll); + spin_lock_init(&new_ns->ns_lock); new_ns->user_ns = get_user_ns(user_ns); new_ns->ucounts = ucounts; return new_ns; @@ -3842,10 +3896,14 @@ static bool mnt_already_visible(struct mnt_namespace *ns, bool visible = false; down_read(&namespace_sem); + lock_ns_list(ns); list_for_each_entry(mnt, &ns->list, mnt_list) { struct mount *child; int mnt_flags; + if (mnt_is_cursor(mnt)) + continue; + if (mnt->mnt.mnt_sb->s_type != sb->s_type) continue; @@ -3893,6 +3951,7 @@ static bool mnt_already_visible(struct mnt_namespace *ns, next: ; } found: + unlock_ns_list(ns); up_read(&namespace_sem); return visible; } diff --git a/fs/open.c b/fs/open.c index 719b320ede52..e62b1db06638 100644 --- a/fs/open.c +++ b/fs/open.c @@ -345,21 +345,14 @@ SYSCALL_DEFINE4(fallocate, int, fd, int, mode, loff_t, offset, loff_t, len) * We do this by temporarily clearing all FS-related capabilities and * switching the fsuid/fsgid around to the real ones. */ -long do_faccessat(int dfd, const char __user *filename, int mode) +static const struct cred *access_override_creds(void) { const struct cred *old_cred; struct cred *override_cred; - struct path path; - struct inode *inode; - int res; - unsigned int lookup_flags = LOOKUP_FOLLOW; - - if (mode & ~S_IRWXO) /* where's F_OK, X_OK, W_OK, R_OK? */ - return -EINVAL; override_cred = prepare_creds(); if (!override_cred) - return -ENOMEM; + return NULL; override_cred->fsuid = override_cred->uid; override_cred->fsgid = override_cred->gid; @@ -394,6 +387,38 @@ long do_faccessat(int dfd, const char __user *filename, int mode) override_cred->non_rcu = 1; old_cred = override_creds(override_cred); + + /* override_cred() gets its own ref */ + put_cred(override_cred); + + return old_cred; +} + +long do_faccessat(int dfd, const char __user *filename, int mode, int flags) +{ + struct path path; + struct inode *inode; + int res; + unsigned int lookup_flags = LOOKUP_FOLLOW; + const struct cred *old_cred = NULL; + + if (mode & ~S_IRWXO) /* where's F_OK, X_OK, W_OK, R_OK? */ + return -EINVAL; + + if (flags & ~(AT_EACCESS | AT_SYMLINK_NOFOLLOW | AT_EMPTY_PATH)) + return -EINVAL; + + if (flags & AT_SYMLINK_NOFOLLOW) + lookup_flags &= ~LOOKUP_FOLLOW; + if (flags & AT_EMPTY_PATH) + lookup_flags |= LOOKUP_EMPTY; + + if (!(flags & AT_EACCESS)) { + old_cred = access_override_creds(); + if (!old_cred) + return -ENOMEM; + } + retry: res = user_path_at(dfd, filename, lookup_flags, &path); if (res) @@ -435,19 +460,26 @@ out_path_release: goto retry; } out: - revert_creds(old_cred); - put_cred(override_cred); + if (old_cred) + revert_creds(old_cred); + return res; } SYSCALL_DEFINE3(faccessat, int, dfd, const char __user *, filename, int, mode) { - return do_faccessat(dfd, filename, mode); + return do_faccessat(dfd, filename, mode, 0); +} + +SYSCALL_DEFINE4(faccessat2, int, dfd, const char __user *, filename, int, mode, + int, flags) +{ + return do_faccessat(dfd, filename, mode, flags); } SYSCALL_DEFINE2(access, const char __user *, filename, int, mode) { - return do_faccessat(AT_FDCWD, filename, mode); + return do_faccessat(AT_FDCWD, filename, mode, 0); } int ksys_chdir(const char __user *filename) diff --git a/fs/proc_namespace.c b/fs/proc_namespace.c index 273ee82d8aa9..e4d70c0dffe9 100644 --- a/fs/proc_namespace.c +++ b/fs/proc_namespace.c @@ -279,7 +279,8 @@ static int mounts_open_common(struct inode *inode, struct file *file, p->ns = ns; p->root = root; p->show = show; - p->cached_event = ~0ULL; + INIT_LIST_HEAD(&p->cursor.mnt_list); + p->cursor.mnt.mnt_flags = MNT_CURSOR; return 0; @@ -296,6 +297,7 @@ static int mounts_release(struct inode *inode, struct file *file) struct seq_file *m = file->private_data; struct proc_mounts *p = m->private; path_put(&p->root); + mnt_cursor_del(p->ns, &p->cursor); put_mnt_ns(p->ns); return seq_release_private(inode, file); } diff --git a/fs/stat.c b/fs/stat.c index 030008796479..b9faa6cafafe 100644 --- a/fs/stat.c +++ b/fs/stat.c @@ -22,6 +22,7 @@ #include <asm/unistd.h> #include "internal.h" +#include "mount.h" /** * generic_fillattr - Fill in the basic attributes from the inode struct @@ -70,11 +71,11 @@ int vfs_getattr_nosec(const struct path *path, struct kstat *stat, memset(stat, 0, sizeof(*stat)); stat->result_mask |= STATX_BASIC_STATS; - request_mask &= STATX_ALL; query_flags &= KSTAT_QUERY_FLAGS; /* allow the fs to override these if it really wants to */ - if (IS_NOATIME(inode)) + /* SB_NOATIME means filesystem supplies dummy atime value */ + if (inode->i_sb->s_flags & SB_NOATIME) stat->result_mask &= ~STATX_ATIME; if (IS_AUTOMOUNT(inode)) stat->attributes |= STATX_ATTR_AUTOMOUNT; @@ -199,6 +200,11 @@ retry: goto out; error = vfs_getattr(&path, stat, request_mask, flags); + stat->mnt_id = real_mount(path.mnt)->mnt_id; + stat->result_mask |= STATX_MNT_ID; + if (path.mnt->mnt_root == path.dentry) + stat->attributes |= STATX_ATTR_MOUNT_ROOT; + stat->attributes_mask |= STATX_ATTR_MOUNT_ROOT; path_put(&path); if (retry_estale(error, lookup_flags)) { lookup_flags |= LOOKUP_REVAL; @@ -563,6 +569,7 @@ cp_statx(const struct kstat *stat, struct statx __user *buffer) tmp.stx_rdev_minor = MINOR(stat->rdev); tmp.stx_dev_major = MAJOR(stat->dev); tmp.stx_dev_minor = MINOR(stat->dev); + tmp.stx_mnt_id = stat->mnt_id; return copy_to_user(buffer, &tmp, sizeof(tmp)) ? -EFAULT : 0; } diff --git a/fs/utimes.c b/fs/utimes.c index 1d17ce98cb80..b7b927502d6e 100644 --- a/fs/utimes.c +++ b/fs/utimes.c @@ -95,13 +95,13 @@ long do_utimes(int dfd, const char __user *filename, struct timespec64 *times, goto out; } - if (flags & ~AT_SYMLINK_NOFOLLOW) + if (flags & ~(AT_SYMLINK_NOFOLLOW | AT_EMPTY_PATH)) goto out; if (filename == NULL && dfd != AT_FDCWD) { struct fd f; - if (flags & AT_SYMLINK_NOFOLLOW) + if (flags) goto out; f = fdget(dfd); @@ -117,6 +117,8 @@ long do_utimes(int dfd, const char __user *filename, struct timespec64 *times, if (!(flags & AT_SYMLINK_NOFOLLOW)) lookup_flags |= LOOKUP_FOLLOW; + if (flags & AT_EMPTY_PATH) + lookup_flags |= LOOKUP_EMPTY; retry: error = user_path_at(dfd, filename, lookup_flags, &path); if (error) |