diff options
author | Pavel Tikhomirov | 2021-07-15 13:07:13 +0300 |
---|---|---|
committer | Christian Brauner | 2021-07-26 14:45:18 +0200 |
commit | 9ffb14ef61bab83fa818736bf3e7e6b6e182e8e2 (patch) | |
tree | 9c620397ea2922a585b986548ebbe6e5028aa1b8 /fs/namespace.c | |
parent | ff1176468d368232b684f75e82563369208bc371 (diff) |
move_mount: allow to add a mount into an existing group
Previously a sharing group (shared and master ids pair) can be only
inherited when mount is created via bindmount. This patch adds an
ability to add an existing private mount into an existing sharing group.
With this functionality one can first create the desired mount tree from
only private mounts (without the need to care about undesired mount
propagation or mount creation order implied by sharing group
dependencies), and next then setup any desired mount sharing between
those mounts in tree as needed.
This allows CRIU to restore any set of mount namespaces, mount trees and
sharing group trees for a container.
We have many issues with restoring mounts in CRIU related to sharing
groups and propagation:
- reverse sharing groups vs mount tree order requires complex mounts
reordering which mostly implies also using some temporary mounts
(please see https://lkml.org/lkml/2021/3/23/569 for more info)
- mount() syscall creates tons of mounts due to propagation
- mount re-parenting due to propagation
- "Mount Trap" due to propagation
- "Non Uniform" propagation, meaning that with different tricks with
mount order and temporary children-"lock" mounts one can create mount
trees which can't be restored without those tricks
(see https://www.linuxplumbersconf.org/event/7/contributions/640/)
With this new functionality we can resolve all the problems with
propagation at once.
Link: https://lore.kernel.org/r/20210715100714.120228-1-ptikhomirov@virtuozzo.com
Cc: Eric W. Biederman <ebiederm@xmission.com>
Cc: Alexander Viro <viro@zeniv.linux.org.uk>
Cc: Christian Brauner <christian.brauner@ubuntu.com>
Cc: Mattias Nissler <mnissler@chromium.org>
Cc: Aleksa Sarai <cyphar@cyphar.com>
Cc: Andrei Vagin <avagin@gmail.com>
Cc: linux-fsdevel@vger.kernel.org
Cc: linux-api@vger.kernel.org
Cc: lkml <linux-kernel@vger.kernel.org>
Co-developed-by: Andrei Vagin <avagin@gmail.com>
Acked-by: Christian Brauner <christian.brauner@ubuntu.com>
Signed-off-by: Pavel Tikhomirov <ptikhomirov@virtuozzo.com>
Signed-off-by: Andrei Vagin <avagin@gmail.com>
Signed-off-by: Christian Brauner <christian.brauner@ubuntu.com>
Diffstat (limited to 'fs/namespace.c')
-rw-r--r-- | fs/namespace.c | 77 |
1 files changed, 76 insertions, 1 deletions
diff --git a/fs/namespace.c b/fs/namespace.c index ab4174a3c802..5d0b477c2682 100644 --- a/fs/namespace.c +++ b/fs/namespace.c @@ -2684,6 +2684,78 @@ out: return ret; } +static int do_set_group(struct path *from_path, struct path *to_path) +{ + struct mount *from, *to; + int err; + + from = real_mount(from_path->mnt); + to = real_mount(to_path->mnt); + + namespace_lock(); + + err = -EINVAL; + /* To and From must be mounted */ + if (!is_mounted(&from->mnt)) + goto out; + if (!is_mounted(&to->mnt)) + goto out; + + err = -EPERM; + /* We should be allowed to modify mount namespaces of both mounts */ + if (!ns_capable(from->mnt_ns->user_ns, CAP_SYS_ADMIN)) + goto out; + if (!ns_capable(to->mnt_ns->user_ns, CAP_SYS_ADMIN)) + goto out; + + err = -EINVAL; + /* To and From paths should be mount roots */ + if (from_path->dentry != from_path->mnt->mnt_root) + goto out; + if (to_path->dentry != to_path->mnt->mnt_root) + goto out; + + /* Setting sharing groups is only allowed across same superblock */ + if (from->mnt.mnt_sb != to->mnt.mnt_sb) + goto out; + + /* From mount root should be wider than To mount root */ + if (!is_subdir(to->mnt.mnt_root, from->mnt.mnt_root)) + goto out; + + /* From mount should not have locked children in place of To's root */ + if (has_locked_children(from, to->mnt.mnt_root)) + goto out; + + /* Setting sharing groups is only allowed on private mounts */ + if (IS_MNT_SHARED(to) || IS_MNT_SLAVE(to)) + goto out; + + /* From should not be private */ + if (!IS_MNT_SHARED(from) && !IS_MNT_SLAVE(from)) + goto out; + + if (IS_MNT_SLAVE(from)) { + struct mount *m = from->mnt_master; + + list_add(&to->mnt_slave, &m->mnt_slave_list); + to->mnt_master = m; + } + + if (IS_MNT_SHARED(from)) { + to->mnt_group_id = from->mnt_group_id; + list_add(&to->mnt_share, &from->mnt_share); + lock_mount_hash(); + set_mnt_shared(to); + unlock_mount_hash(); + } + + err = 0; +out: + namespace_unlock(); + return err; +} + static int do_move_mount(struct path *old_path, struct path *new_path) { struct mnt_namespace *ns; @@ -3669,7 +3741,10 @@ SYSCALL_DEFINE5(move_mount, if (ret < 0) goto out_to; - ret = do_move_mount(&from_path, &to_path); + if (flags & MOVE_MOUNT_SET_GROUP) + ret = do_set_group(&from_path, &to_path); + else + ret = do_move_mount(&from_path, &to_path); out_to: path_put(&to_path); |