aboutsummaryrefslogtreecommitdiff
path: root/fs
diff options
context:
space:
mode:
Diffstat (limited to 'fs')
-rw-r--r--fs/aio.c8
-rw-r--r--fs/char_dev.c3
-rw-r--r--fs/fs_context.c30
-rw-r--r--fs/internal.h1
-rw-r--r--fs/mount.h12
-rw-r--r--fs/namei.c21
-rw-r--r--fs/namespace.c91
-rw-r--r--fs/open.c58
-rw-r--r--fs/proc_namespace.c4
-rw-r--r--fs/stat.c11
-rw-r--r--fs/utimes.c6
11 files changed, 159 insertions, 86 deletions
diff --git a/fs/aio.c b/fs/aio.c
index 5f3d3d814928..6483f9274d5e 100644
--- a/fs/aio.c
+++ b/fs/aio.c
@@ -176,6 +176,7 @@ struct fsync_iocb {
struct file *file;
struct work_struct work;
bool datasync;
+ struct cred *creds;
};
struct poll_iocb {
@@ -1589,8 +1590,11 @@ static int aio_write(struct kiocb *req, const struct iocb *iocb,
static void aio_fsync_work(struct work_struct *work)
{
struct aio_kiocb *iocb = container_of(work, struct aio_kiocb, fsync.work);
+ const struct cred *old_cred = override_creds(iocb->fsync.creds);
iocb->ki_res.res = vfs_fsync(iocb->fsync.file, iocb->fsync.datasync);
+ revert_creds(old_cred);
+ put_cred(iocb->fsync.creds);
iocb_put(iocb);
}
@@ -1604,6 +1608,10 @@ static int aio_fsync(struct fsync_iocb *req, const struct iocb *iocb,
if (unlikely(!req->file->f_op->fsync))
return -EINVAL;
+ req->creds = prepare_creds();
+ if (!req->creds)
+ return -ENOMEM;
+
req->datasync = datasync;
INIT_WORK(&req->work, aio_fsync_work);
schedule_work(&req->work);
diff --git a/fs/char_dev.c b/fs/char_dev.c
index c5e6eff5a381..ba0ded7842a7 100644
--- a/fs/char_dev.c
+++ b/fs/char_dev.c
@@ -483,6 +483,9 @@ int cdev_add(struct cdev *p, dev_t dev, unsigned count)
p->dev = dev;
p->count = count;
+ if (WARN_ON(dev == WHITEOUT_DEV))
+ return -EBUSY;
+
error = kobj_map(cdev_map, dev, count, NULL,
exact_match, exact_lock, p);
if (error)
diff --git a/fs/fs_context.c b/fs/fs_context.c
index fc9f6ef93b55..7d5c5dd2b1d5 100644
--- a/fs/fs_context.c
+++ b/fs/fs_context.c
@@ -42,7 +42,6 @@ static const struct constant_table common_set_sb_flag[] = {
{ "dirsync", SB_DIRSYNC },
{ "lazytime", SB_LAZYTIME },
{ "mand", SB_MANDLOCK },
- { "posixacl", SB_POSIXACL },
{ "ro", SB_RDONLY },
{ "sync", SB_SYNCHRONOUS },
{ },
@@ -53,44 +52,15 @@ static const struct constant_table common_clear_sb_flag[] = {
{ "nolazytime", SB_LAZYTIME },
{ "nomand", SB_MANDLOCK },
{ "rw", SB_RDONLY },
- { "silent", SB_SILENT },
{ },
};
-static const char *const forbidden_sb_flag[] = {
- "bind",
- "dev",
- "exec",
- "move",
- "noatime",
- "nodev",
- "nodiratime",
- "noexec",
- "norelatime",
- "nostrictatime",
- "nosuid",
- "private",
- "rec",
- "relatime",
- "remount",
- "shared",
- "slave",
- "strictatime",
- "suid",
- "unbindable",
-};
-
/*
* Check for a common mount option that manipulates s_flags.
*/
static int vfs_parse_sb_flag(struct fs_context *fc, const char *key)
{
unsigned int token;
- unsigned int i;
-
- for (i = 0; i < ARRAY_SIZE(forbidden_sb_flag); i++)
- if (strcmp(key, forbidden_sb_flag[i]) == 0)
- return -EINVAL;
token = lookup_constant(common_set_sb_flag, key, 0);
if (token) {
diff --git a/fs/internal.h b/fs/internal.h
index aa5d45524e87..0d467e32dd7e 100644
--- a/fs/internal.h
+++ b/fs/internal.h
@@ -126,7 +126,6 @@ extern struct open_how build_open_how(int flags, umode_t mode);
extern int build_open_flags(const struct open_how *how, struct open_flags *op);
long do_sys_ftruncate(unsigned int fd, loff_t length, int small);
-long do_faccessat(int dfd, const char __user *filename, int mode);
int do_fchmodat(int dfd, const char __user *filename, umode_t mode);
int do_fchownat(int dfd, const char __user *filename, uid_t user, gid_t group,
int flag);
diff --git a/fs/mount.h b/fs/mount.h
index 711a4093e475..c7abb7b394d8 100644
--- a/fs/mount.h
+++ b/fs/mount.h
@@ -9,7 +9,13 @@ struct mnt_namespace {
atomic_t count;
struct ns_common ns;
struct mount * root;
+ /*
+ * Traversal and modification of .list is protected by either
+ * - taking namespace_sem for write, OR
+ * - taking namespace_sem for read AND taking .ns_lock.
+ */
struct list_head list;
+ spinlock_t ns_lock;
struct user_namespace *user_ns;
struct ucounts *ucounts;
u64 seq; /* Sequence number to prevent loops */
@@ -133,9 +139,7 @@ struct proc_mounts {
struct mnt_namespace *ns;
struct path root;
int (*show)(struct seq_file *, struct vfsmount *);
- void *cached_mount;
- u64 cached_event;
- loff_t cached_index;
+ struct mount cursor;
};
extern const struct seq_operations mounts_op;
@@ -153,3 +157,5 @@ static inline bool is_anon_ns(struct mnt_namespace *ns)
{
return ns->seq == 0;
}
+
+extern void mnt_cursor_del(struct mnt_namespace *ns, struct mount *cursor);
diff --git a/fs/namei.c b/fs/namei.c
index a320371899cf..d81f73ff1a8b 100644
--- a/fs/namei.c
+++ b/fs/namei.c
@@ -3505,12 +3505,14 @@ EXPORT_SYMBOL(user_path_create);
int vfs_mknod(struct inode *dir, struct dentry *dentry, umode_t mode, dev_t dev)
{
+ bool is_whiteout = S_ISCHR(mode) && dev == WHITEOUT_DEV;
int error = may_create(dir, dentry);
if (error)
return error;
- if ((S_ISCHR(mode) || S_ISBLK(mode)) && !capable(CAP_MKNOD))
+ if ((S_ISCHR(mode) || S_ISBLK(mode)) && !is_whiteout &&
+ !capable(CAP_MKNOD))
return -EPERM;
if (!dir->i_op->mknod)
@@ -4345,9 +4347,6 @@ static int do_renameat2(int olddfd, const char __user *oldname, int newdfd,
(flags & RENAME_EXCHANGE))
return -EINVAL;
- if ((flags & RENAME_WHITEOUT) && !capable(CAP_MKNOD))
- return -EPERM;
-
if (flags & RENAME_EXCHANGE)
target_flags = 0;
@@ -4483,20 +4482,6 @@ SYSCALL_DEFINE2(rename, const char __user *, oldname, const char __user *, newna
return do_renameat2(AT_FDCWD, oldname, AT_FDCWD, newname, 0);
}
-int vfs_whiteout(struct inode *dir, struct dentry *dentry)
-{
- int error = may_create(dir, dentry);
- if (error)
- return error;
-
- if (!dir->i_op->mknod)
- return -EPERM;
-
- return dir->i_op->mknod(dir, dentry,
- S_IFCHR | WHITEOUT_MODE, WHITEOUT_DEV);
-}
-EXPORT_SYMBOL(vfs_whiteout);
-
int readlink_copy(char __user *buffer, int buflen, const char *link)
{
int len = PTR_ERR(link);
diff --git a/fs/namespace.c b/fs/namespace.c
index 5f036dc711b6..a6baee3c7904 100644
--- a/fs/namespace.c
+++ b/fs/namespace.c
@@ -648,6 +648,21 @@ struct vfsmount *lookup_mnt(const struct path *path)
return m;
}
+static inline void lock_ns_list(struct mnt_namespace *ns)
+{
+ spin_lock(&ns->ns_lock);
+}
+
+static inline void unlock_ns_list(struct mnt_namespace *ns)
+{
+ spin_unlock(&ns->ns_lock);
+}
+
+static inline bool mnt_is_cursor(struct mount *mnt)
+{
+ return mnt->mnt.mnt_flags & MNT_CURSOR;
+}
+
/*
* __is_local_mountpoint - Test to see if dentry is a mountpoint in the
* current mount namespace.
@@ -673,11 +688,15 @@ bool __is_local_mountpoint(struct dentry *dentry)
goto out;
down_read(&namespace_sem);
+ lock_ns_list(ns);
list_for_each_entry(mnt, &ns->list, mnt_list) {
+ if (mnt_is_cursor(mnt))
+ continue;
is_covered = (mnt->mnt_mountpoint == dentry);
if (is_covered)
break;
}
+ unlock_ns_list(ns);
up_read(&namespace_sem);
out:
return is_covered;
@@ -1245,46 +1264,71 @@ struct vfsmount *mnt_clone_internal(const struct path *path)
}
#ifdef CONFIG_PROC_FS
+static struct mount *mnt_list_next(struct mnt_namespace *ns,
+ struct list_head *p)
+{
+ struct mount *mnt, *ret = NULL;
+
+ lock_ns_list(ns);
+ list_for_each_continue(p, &ns->list) {
+ mnt = list_entry(p, typeof(*mnt), mnt_list);
+ if (!mnt_is_cursor(mnt)) {
+ ret = mnt;
+ break;
+ }
+ }
+ unlock_ns_list(ns);
+
+ return ret;
+}
+
/* iterator; we want it to have access to namespace_sem, thus here... */
static void *m_start(struct seq_file *m, loff_t *pos)
{
struct proc_mounts *p = m->private;
+ struct list_head *prev;
down_read(&namespace_sem);
- if (p->cached_event == p->ns->event) {
- void *v = p->cached_mount;
- if (*pos == p->cached_index)
- return v;
- if (*pos == p->cached_index + 1) {
- v = seq_list_next(v, &p->ns->list, &p->cached_index);
- return p->cached_mount = v;
- }
+ if (!*pos) {
+ prev = &p->ns->list;
+ } else {
+ prev = &p->cursor.mnt_list;
+
+ /* Read after we'd reached the end? */
+ if (list_empty(prev))
+ return NULL;
}
- p->cached_event = p->ns->event;
- p->cached_mount = seq_list_start(&p->ns->list, *pos);
- p->cached_index = *pos;
- return p->cached_mount;
+ return mnt_list_next(p->ns, prev);
}
static void *m_next(struct seq_file *m, void *v, loff_t *pos)
{
struct proc_mounts *p = m->private;
+ struct mount *mnt = v;
- p->cached_mount = seq_list_next(v, &p->ns->list, pos);
- p->cached_index = *pos;
- return p->cached_mount;
+ ++*pos;
+ return mnt_list_next(p->ns, &mnt->mnt_list);
}
static void m_stop(struct seq_file *m, void *v)
{
+ struct proc_mounts *p = m->private;
+ struct mount *mnt = v;
+
+ lock_ns_list(p->ns);
+ if (mnt)
+ list_move_tail(&p->cursor.mnt_list, &mnt->mnt_list);
+ else
+ list_del_init(&p->cursor.mnt_list);
+ unlock_ns_list(p->ns);
up_read(&namespace_sem);
}
static int m_show(struct seq_file *m, void *v)
{
struct proc_mounts *p = m->private;
- struct mount *r = list_entry(v, struct mount, mnt_list);
+ struct mount *r = v;
return p->show(m, &r->mnt);
}
@@ -1294,6 +1338,15 @@ const struct seq_operations mounts_op = {
.stop = m_stop,
.show = m_show,
};
+
+void mnt_cursor_del(struct mnt_namespace *ns, struct mount *cursor)
+{
+ down_read(&namespace_sem);
+ lock_ns_list(ns);
+ list_del(&cursor->mnt_list);
+ unlock_ns_list(ns);
+ up_read(&namespace_sem);
+}
#endif /* CONFIG_PROC_FS */
/**
@@ -3202,6 +3255,7 @@ static struct mnt_namespace *alloc_mnt_ns(struct user_namespace *user_ns, bool a
atomic_set(&new_ns->count, 1);
INIT_LIST_HEAD(&new_ns->list);
init_waitqueue_head(&new_ns->poll);
+ spin_lock_init(&new_ns->ns_lock);
new_ns->user_ns = get_user_ns(user_ns);
new_ns->ucounts = ucounts;
return new_ns;
@@ -3842,10 +3896,14 @@ static bool mnt_already_visible(struct mnt_namespace *ns,
bool visible = false;
down_read(&namespace_sem);
+ lock_ns_list(ns);
list_for_each_entry(mnt, &ns->list, mnt_list) {
struct mount *child;
int mnt_flags;
+ if (mnt_is_cursor(mnt))
+ continue;
+
if (mnt->mnt.mnt_sb->s_type != sb->s_type)
continue;
@@ -3893,6 +3951,7 @@ static bool mnt_already_visible(struct mnt_namespace *ns,
next: ;
}
found:
+ unlock_ns_list(ns);
up_read(&namespace_sem);
return visible;
}
diff --git a/fs/open.c b/fs/open.c
index 719b320ede52..e62b1db06638 100644
--- a/fs/open.c
+++ b/fs/open.c
@@ -345,21 +345,14 @@ SYSCALL_DEFINE4(fallocate, int, fd, int, mode, loff_t, offset, loff_t, len)
* We do this by temporarily clearing all FS-related capabilities and
* switching the fsuid/fsgid around to the real ones.
*/
-long do_faccessat(int dfd, const char __user *filename, int mode)
+static const struct cred *access_override_creds(void)
{
const struct cred *old_cred;
struct cred *override_cred;
- struct path path;
- struct inode *inode;
- int res;
- unsigned int lookup_flags = LOOKUP_FOLLOW;
-
- if (mode & ~S_IRWXO) /* where's F_OK, X_OK, W_OK, R_OK? */
- return -EINVAL;
override_cred = prepare_creds();
if (!override_cred)
- return -ENOMEM;
+ return NULL;
override_cred->fsuid = override_cred->uid;
override_cred->fsgid = override_cred->gid;
@@ -394,6 +387,38 @@ long do_faccessat(int dfd, const char __user *filename, int mode)
override_cred->non_rcu = 1;
old_cred = override_creds(override_cred);
+
+ /* override_cred() gets its own ref */
+ put_cred(override_cred);
+
+ return old_cred;
+}
+
+long do_faccessat(int dfd, const char __user *filename, int mode, int flags)
+{
+ struct path path;
+ struct inode *inode;
+ int res;
+ unsigned int lookup_flags = LOOKUP_FOLLOW;
+ const struct cred *old_cred = NULL;
+
+ if (mode & ~S_IRWXO) /* where's F_OK, X_OK, W_OK, R_OK? */
+ return -EINVAL;
+
+ if (flags & ~(AT_EACCESS | AT_SYMLINK_NOFOLLOW | AT_EMPTY_PATH))
+ return -EINVAL;
+
+ if (flags & AT_SYMLINK_NOFOLLOW)
+ lookup_flags &= ~LOOKUP_FOLLOW;
+ if (flags & AT_EMPTY_PATH)
+ lookup_flags |= LOOKUP_EMPTY;
+
+ if (!(flags & AT_EACCESS)) {
+ old_cred = access_override_creds();
+ if (!old_cred)
+ return -ENOMEM;
+ }
+
retry:
res = user_path_at(dfd, filename, lookup_flags, &path);
if (res)
@@ -435,19 +460,26 @@ out_path_release:
goto retry;
}
out:
- revert_creds(old_cred);
- put_cred(override_cred);
+ if (old_cred)
+ revert_creds(old_cred);
+
return res;
}
SYSCALL_DEFINE3(faccessat, int, dfd, const char __user *, filename, int, mode)
{
- return do_faccessat(dfd, filename, mode);
+ return do_faccessat(dfd, filename, mode, 0);
+}
+
+SYSCALL_DEFINE4(faccessat2, int, dfd, const char __user *, filename, int, mode,
+ int, flags)
+{
+ return do_faccessat(dfd, filename, mode, flags);
}
SYSCALL_DEFINE2(access, const char __user *, filename, int, mode)
{
- return do_faccessat(AT_FDCWD, filename, mode);
+ return do_faccessat(AT_FDCWD, filename, mode, 0);
}
int ksys_chdir(const char __user *filename)
diff --git a/fs/proc_namespace.c b/fs/proc_namespace.c
index 273ee82d8aa9..e4d70c0dffe9 100644
--- a/fs/proc_namespace.c
+++ b/fs/proc_namespace.c
@@ -279,7 +279,8 @@ static int mounts_open_common(struct inode *inode, struct file *file,
p->ns = ns;
p->root = root;
p->show = show;
- p->cached_event = ~0ULL;
+ INIT_LIST_HEAD(&p->cursor.mnt_list);
+ p->cursor.mnt.mnt_flags = MNT_CURSOR;
return 0;
@@ -296,6 +297,7 @@ static int mounts_release(struct inode *inode, struct file *file)
struct seq_file *m = file->private_data;
struct proc_mounts *p = m->private;
path_put(&p->root);
+ mnt_cursor_del(p->ns, &p->cursor);
put_mnt_ns(p->ns);
return seq_release_private(inode, file);
}
diff --git a/fs/stat.c b/fs/stat.c
index 030008796479..b9faa6cafafe 100644
--- a/fs/stat.c
+++ b/fs/stat.c
@@ -22,6 +22,7 @@
#include <asm/unistd.h>
#include "internal.h"
+#include "mount.h"
/**
* generic_fillattr - Fill in the basic attributes from the inode struct
@@ -70,11 +71,11 @@ int vfs_getattr_nosec(const struct path *path, struct kstat *stat,
memset(stat, 0, sizeof(*stat));
stat->result_mask |= STATX_BASIC_STATS;
- request_mask &= STATX_ALL;
query_flags &= KSTAT_QUERY_FLAGS;
/* allow the fs to override these if it really wants to */
- if (IS_NOATIME(inode))
+ /* SB_NOATIME means filesystem supplies dummy atime value */
+ if (inode->i_sb->s_flags & SB_NOATIME)
stat->result_mask &= ~STATX_ATIME;
if (IS_AUTOMOUNT(inode))
stat->attributes |= STATX_ATTR_AUTOMOUNT;
@@ -199,6 +200,11 @@ retry:
goto out;
error = vfs_getattr(&path, stat, request_mask, flags);
+ stat->mnt_id = real_mount(path.mnt)->mnt_id;
+ stat->result_mask |= STATX_MNT_ID;
+ if (path.mnt->mnt_root == path.dentry)
+ stat->attributes |= STATX_ATTR_MOUNT_ROOT;
+ stat->attributes_mask |= STATX_ATTR_MOUNT_ROOT;
path_put(&path);
if (retry_estale(error, lookup_flags)) {
lookup_flags |= LOOKUP_REVAL;
@@ -563,6 +569,7 @@ cp_statx(const struct kstat *stat, struct statx __user *buffer)
tmp.stx_rdev_minor = MINOR(stat->rdev);
tmp.stx_dev_major = MAJOR(stat->dev);
tmp.stx_dev_minor = MINOR(stat->dev);
+ tmp.stx_mnt_id = stat->mnt_id;
return copy_to_user(buffer, &tmp, sizeof(tmp)) ? -EFAULT : 0;
}
diff --git a/fs/utimes.c b/fs/utimes.c
index 1d17ce98cb80..b7b927502d6e 100644
--- a/fs/utimes.c
+++ b/fs/utimes.c
@@ -95,13 +95,13 @@ long do_utimes(int dfd, const char __user *filename, struct timespec64 *times,
goto out;
}
- if (flags & ~AT_SYMLINK_NOFOLLOW)
+ if (flags & ~(AT_SYMLINK_NOFOLLOW | AT_EMPTY_PATH))
goto out;
if (filename == NULL && dfd != AT_FDCWD) {
struct fd f;
- if (flags & AT_SYMLINK_NOFOLLOW)
+ if (flags)
goto out;
f = fdget(dfd);
@@ -117,6 +117,8 @@ long do_utimes(int dfd, const char __user *filename, struct timespec64 *times,
if (!(flags & AT_SYMLINK_NOFOLLOW))
lookup_flags |= LOOKUP_FOLLOW;
+ if (flags & AT_EMPTY_PATH)
+ lookup_flags |= LOOKUP_EMPTY;
retry:
error = user_path_at(dfd, filename, lookup_flags, &path);
if (error)