From 2b5f9dad32ed19e8db3b0f10a84aa824a219803b Mon Sep 17 00:00:00 2001 From: Andrei Vagin Date: Tue, 20 Sep 2022 17:31:19 -0700 Subject: fs/exec: switch timens when a task gets a new mm Changing a time namespace requires remapping a vvar page, so we don't want to allow doing that if any other tasks can use the same mm. Currently, we install a time namespace when a task is created with a new vm. exec() is another case when a task gets a new mm and so it can switch a time namespace safely, but it isn't handled now. One more issue of the current interface is that clone() with CLONE_VM isn't allowed if the current task has unshared a time namespace (timens_for_children doesn't match the current timens). Both these issues make some inconvenience for users. For example, Alexey and Florian reported that posix_spawn() uses vfork+exec and this pattern doesn't work with time namespaces due to the both described issues. LXC needed to workaround the exec() issue by calling setns. In the commit 133e2d3e81de5 ("fs/exec: allow to unshare a time namespace on vfork+exec"), we tried to fix these issues with minimal impact on UAPI. But it adds extra complexity and some undesirable side effects. Eric suggested fixing the issues properly because here are all the reasons to suppose that there are no users that depend on the old behavior. Cc: Alexey Izbyshev Cc: Christian Brauner Cc: Dmitry Safonov <0x7f454c46@gmail.com> Cc: "Eric W. Biederman" Cc: Florian Weimer Cc: Kees Cook Suggested-by: "Eric W. Biederman" Origin-author: "Eric W. Biederman" Signed-off-by: Andrei Vagin Signed-off-by: Kees Cook Link: https://lore.kernel.org/r/20220921003120.209637-1-avagin@google.com --- fs/exec.c | 5 +++++ include/linux/nsproxy.h | 1 + kernel/fork.c | 9 --------- kernel/nsproxy.c | 23 +++++++++++++++++++++-- 4 files changed, 27 insertions(+), 11 deletions(-) diff --git a/fs/exec.c b/fs/exec.c index 32dc8cf5fceb..34e6a2e53268 100644 --- a/fs/exec.c +++ b/fs/exec.c @@ -64,6 +64,7 @@ #include #include #include +#include #include #include @@ -1297,6 +1298,10 @@ int begin_new_exec(struct linux_binprm * bprm) bprm->mm = NULL; + retval = exec_task_namespaces(); + if (retval) + goto out_unlock; + #ifdef CONFIG_POSIX_TIMERS spin_lock_irq(&me->sighand->siglock); posix_cpu_timers_exit(me); diff --git a/include/linux/nsproxy.h b/include/linux/nsproxy.h index cdb171efc7cb..fee881cded01 100644 --- a/include/linux/nsproxy.h +++ b/include/linux/nsproxy.h @@ -94,6 +94,7 @@ static inline struct cred *nsset_cred(struct nsset *set) int copy_namespaces(unsigned long flags, struct task_struct *tsk); void exit_task_namespaces(struct task_struct *tsk); void switch_task_namespaces(struct task_struct *tsk, struct nsproxy *new); +int exec_task_namespaces(void); void free_nsproxy(struct nsproxy *ns); int unshare_nsproxy_namespaces(unsigned long, struct nsproxy **, struct cred *, struct fs_struct *); diff --git a/kernel/fork.c b/kernel/fork.c index 08969f5aa38d..44224a9b004c 100644 --- a/kernel/fork.c +++ b/kernel/fork.c @@ -2043,15 +2043,6 @@ static __latent_entropy struct task_struct *copy_process( return ERR_PTR(-EINVAL); } - /* - * If the new process will be in a different time namespace - * do not allow it to share VM or a thread group with the forking task. - */ - if (clone_flags & (CLONE_THREAD | CLONE_VM)) { - if (nsp->time_ns != nsp->time_ns_for_children) - return ERR_PTR(-EINVAL); - } - if (clone_flags & CLONE_PIDFD) { /* * - CLONE_DETACHED is blocked so that we can potentially diff --git a/kernel/nsproxy.c b/kernel/nsproxy.c index eec72ca962e2..a487ff24129b 100644 --- a/kernel/nsproxy.c +++ b/kernel/nsproxy.c @@ -157,7 +157,8 @@ int copy_namespaces(unsigned long flags, struct task_struct *tsk) if (likely(!(flags & (CLONE_NEWNS | CLONE_NEWUTS | CLONE_NEWIPC | CLONE_NEWPID | CLONE_NEWNET | CLONE_NEWCGROUP | CLONE_NEWTIME)))) { - if (likely(old_ns->time_ns_for_children == old_ns->time_ns)) { + if ((flags & CLONE_VM) || + likely(old_ns->time_ns_for_children == old_ns->time_ns)) { get_nsproxy(old_ns); return 0; } @@ -179,7 +180,8 @@ int copy_namespaces(unsigned long flags, struct task_struct *tsk) if (IS_ERR(new_ns)) return PTR_ERR(new_ns); - timens_on_fork(new_ns, tsk); + if ((flags & CLONE_VM) == 0) + timens_on_fork(new_ns, tsk); tsk->nsproxy = new_ns; return 0; @@ -254,6 +256,23 @@ void exit_task_namespaces(struct task_struct *p) switch_task_namespaces(p, NULL); } +int exec_task_namespaces(void) +{ + struct task_struct *tsk = current; + struct nsproxy *new; + + if (tsk->nsproxy->time_ns_for_children == tsk->nsproxy->time_ns) + return 0; + + new = create_new_namespaces(0, tsk, current_user_ns(), tsk->fs); + if (IS_ERR(new)) + return PTR_ERR(new); + + timens_on_fork(new, tsk); + switch_task_namespaces(tsk, new); + return 0; +} + static int check_setns_flags(unsigned long flags) { if (!flags || (flags & ~(CLONE_NEWNS | CLONE_NEWUTS | CLONE_NEWIPC | -- cgit v1.2.3 From af4fddffbe95855ef3b413158fa8e6e95899f2b2 Mon Sep 17 00:00:00 2001 From: Andrei Vagin Date: Thu, 13 Oct 2022 10:31:54 -0700 Subject: selftests/timens: add a test for vfork+exit * check that a child process is in parent's time namespace after vfork. * check that a child process is in the target namespace after exec. Output on success: 1..4 ok 1 parent before vfork ok 2 child after exec ok 3 wait for child ok 4 parent after vfork # Totals: pass:4 fail:0 xfail:0 xpass:0 skip:0 error:0 Signed-off-by: Andrei Vagin Signed-off-by: Kees Cook Link: https://lore.kernel.org/r/20221013173154.291597-1-avagin@google.com --- tools/testing/selftests/timens/.gitignore | 1 + tools/testing/selftests/timens/Makefile | 2 +- tools/testing/selftests/timens/vfork_exec.c | 139 ++++++++++++++++++++++++++++ 3 files changed, 141 insertions(+), 1 deletion(-) create mode 100644 tools/testing/selftests/timens/vfork_exec.c diff --git a/tools/testing/selftests/timens/.gitignore b/tools/testing/selftests/timens/.gitignore index fe1eb8271b35..cae8dca0fbff 100644 --- a/tools/testing/selftests/timens/.gitignore +++ b/tools/testing/selftests/timens/.gitignore @@ -8,3 +8,4 @@ procfs timens timer timerfd +vfork_exec diff --git a/tools/testing/selftests/timens/Makefile b/tools/testing/selftests/timens/Makefile index 3a5936cc10ab..f0d51d4d2c87 100644 --- a/tools/testing/selftests/timens/Makefile +++ b/tools/testing/selftests/timens/Makefile @@ -1,4 +1,4 @@ -TEST_GEN_PROGS := timens timerfd timer clock_nanosleep procfs exec futex +TEST_GEN_PROGS := timens timerfd timer clock_nanosleep procfs exec futex vfork_exec TEST_GEN_PROGS_EXTENDED := gettime_perf CFLAGS := -Wall -Werror -pthread diff --git a/tools/testing/selftests/timens/vfork_exec.c b/tools/testing/selftests/timens/vfork_exec.c new file mode 100644 index 000000000000..beb7614941fb --- /dev/null +++ b/tools/testing/selftests/timens/vfork_exec.c @@ -0,0 +1,139 @@ +// SPDX-License-Identifier: GPL-2.0 +#define _GNU_SOURCE +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "log.h" +#include "timens.h" + +#define OFFSET (36000) + +struct thread_args { + char *tst_name; + struct timespec *now; +}; + +static void *tcheck(void *_args) +{ + struct thread_args *args = _args; + struct timespec *now = args->now, tst; + int i; + + for (i = 0; i < 2; i++) { + _gettime(CLOCK_MONOTONIC, &tst, i); + if (abs(tst.tv_sec - now->tv_sec) > 5) { + pr_fail("%s: in-thread: unexpected value: %ld (%ld)\n", + args->tst_name, tst.tv_sec, now->tv_sec); + return (void *)1UL; + } + } + return NULL; +} + +static int check_in_thread(char *tst_name, struct timespec *now) +{ + struct thread_args args = { + .tst_name = tst_name, + .now = now, + }; + pthread_t th; + void *retval; + + if (pthread_create(&th, NULL, tcheck, &args)) + return pr_perror("thread"); + if (pthread_join(th, &retval)) + return pr_perror("pthread_join"); + return !(retval == NULL); +} + +static int check(char *tst_name, struct timespec *now) +{ + struct timespec tst; + int i; + + for (i = 0; i < 2; i++) { + _gettime(CLOCK_MONOTONIC, &tst, i); + if (abs(tst.tv_sec - now->tv_sec) > 5) + return pr_fail("%s: unexpected value: %ld (%ld)\n", + tst_name, tst.tv_sec, now->tv_sec); + } + if (check_in_thread(tst_name, now)) + return 1; + ksft_test_result_pass("%s\n", tst_name); + return 0; +} + +int main(int argc, char *argv[]) +{ + struct timespec now; + int status; + pid_t pid; + + if (argc > 1) { + char *endptr; + + ksft_cnt.ksft_pass = 1; + now.tv_sec = strtoul(argv[1], &endptr, 0); + if (*endptr != 0) + return pr_perror("strtoul"); + + return check("child after exec", &now); + } + + nscheck(); + + ksft_set_plan(4); + + clock_gettime(CLOCK_MONOTONIC, &now); + + if (unshare_timens()) + return 1; + + if (_settime(CLOCK_MONOTONIC, OFFSET)) + return 1; + + if (check("parent before vfork", &now)) + return 1; + + pid = vfork(); + if (pid < 0) + return pr_perror("fork"); + + if (pid == 0) { + char now_str[64]; + char *cargv[] = {"exec", now_str, NULL}; + char *cenv[] = {NULL}; + + /* Check for proper vvar offsets after execve. */ + snprintf(now_str, sizeof(now_str), "%ld", now.tv_sec + OFFSET); + execve("/proc/self/exe", cargv, cenv); + pr_perror("execve"); + _exit(1); + } + + if (waitpid(pid, &status, 0) != pid) + return pr_perror("waitpid"); + + if (status) + ksft_exit_fail(); + ksft_inc_pass_cnt(); + ksft_test_result_pass("wait for child\n"); + + /* Check that we are still in the source timens. */ + if (check("parent after vfork", &now)) + return 1; + + ksft_exit_pass(); + return 0; +} -- cgit v1.2.3 From 23a7aea5faf6500051c52dfaba46845c41b3abd4 Mon Sep 17 00:00:00 2001 From: Rolf Eike Beer Date: Tue, 4 Oct 2022 12:25:40 +0200 Subject: ELF uapi: add spaces before '{' When searching for a struct definition I often enough end up simply doing git grep 'struct foobar {' Sadly some of the ELF structs did not follow the usual coding style so they were invisible. Signed-off-by: Rolf Eike Beer Signed-off-by: Kees Cook Link: https://lore.kernel.org/r/11563980.Ss37MnutNL@mobilepool36.emlix.com --- include/uapi/linux/elf.h | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/include/uapi/linux/elf.h b/include/uapi/linux/elf.h index c7b056af9ef0..775a17b5b8ac 100644 --- a/include/uapi/linux/elf.h +++ b/include/uapi/linux/elf.h @@ -140,9 +140,9 @@ typedef __s64 Elf64_Sxword; #define ELF64_ST_BIND(x) ELF_ST_BIND(x) #define ELF64_ST_TYPE(x) ELF_ST_TYPE(x) -typedef struct dynamic{ +typedef struct dynamic { Elf32_Sword d_tag; - union{ + union { Elf32_Sword d_val; Elf32_Addr d_ptr; } d_un; @@ -173,7 +173,7 @@ typedef struct elf64_rel { Elf64_Xword r_info; /* index and type of relocation */ } Elf64_Rel; -typedef struct elf32_rela{ +typedef struct elf32_rela { Elf32_Addr r_offset; Elf32_Word r_info; Elf32_Sword r_addend; @@ -185,7 +185,7 @@ typedef struct elf64_rela { Elf64_Sxword r_addend; /* Constant addend used to compute value */ } Elf64_Rela; -typedef struct elf32_sym{ +typedef struct elf32_sym { Elf32_Word st_name; Elf32_Addr st_value; Elf32_Word st_size; @@ -206,7 +206,7 @@ typedef struct elf64_sym { #define EI_NIDENT 16 -typedef struct elf32_hdr{ +typedef struct elf32_hdr { unsigned char e_ident[EI_NIDENT]; Elf32_Half e_type; Elf32_Half e_machine; @@ -246,7 +246,7 @@ typedef struct elf64_hdr { #define PF_W 0x2 #define PF_X 0x1 -typedef struct elf32_phdr{ +typedef struct elf32_phdr { Elf32_Word p_type; Elf32_Off p_offset; Elf32_Addr p_vaddr; -- cgit v1.2.3 From 275498a98b1fe77deebddfc4f8986c0cf2c3ced7 Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Tue, 18 Oct 2022 00:17:24 -0700 Subject: exec: Add comments on check_unsafe_exec() fs counting Add some comments about what the fs counting is doing in check_unsafe_exec() and how it relates to the call graph. Specifically, we can't force an unshare of the fs because of at least Chrome: https://lore.kernel.org/lkml/86CE201B-5632-4BB7-BCF6-7CB2C2895409@chromium.org/ Cc: Eric Biederman Cc: linux-fsdevel@vger.kernel.org Signed-off-by: Kees Cook Acked-by: Christian Brauner (Microsoft) Link: https://lore.kernel.org/r/20221018071537.never.662-kees@kernel.org --- fs/exec.c | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/fs/exec.c b/fs/exec.c index 34e6a2e53268..b3e87bd50962 100644 --- a/fs/exec.c +++ b/fs/exec.c @@ -1573,6 +1573,12 @@ static void check_unsafe_exec(struct linux_binprm *bprm) if (task_no_new_privs(current)) bprm->unsafe |= LSM_UNSAFE_NO_NEW_PRIVS; + /* + * If another task is sharing our fs, we cannot safely + * suid exec because the differently privileged task + * will be able to manipulate the current directory, etc. + * It would be nice to force an unshare instead... + */ t = p; n_fs = 1; spin_lock(&p->fs->lock); @@ -1753,6 +1759,7 @@ static int search_binary_handler(struct linux_binprm *bprm) return retval; } +/* binfmt handlers will call back into begin_new_exec() on success. */ static int exec_binprm(struct linux_binprm *bprm) { pid_t old_pid, old_vpid; @@ -1811,6 +1818,11 @@ static int bprm_execve(struct linux_binprm *bprm, if (retval) return retval; + /* + * Check for unsafe execution states before exec_binprm(), which + * will call back into begin_new_exec(), into bprm_creds_from_file(), + * where setuid-ness is evaluated. + */ check_unsafe_exec(bprm); current->in_execve = 1; -- cgit v1.2.3 From 8f6e3f9e5a0f58e458a348b7e36af11d0e9702af Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Tue, 18 Oct 2022 00:14:20 -0700 Subject: binfmt: Fix whitespace issues Fix the annoying whitespace issues that have been following these files around for years. Cc: linux-fsdevel@vger.kernel.org Signed-off-by: Kees Cook Acked-by: Christian Brauner (Microsoft) Link: https://lore.kernel.org/r/20221018071350.never.230-kees@kernel.org --- fs/binfmt_elf.c | 14 +++++++------- fs/binfmt_elf_fdpic.c | 2 +- fs/exec.c | 2 +- include/uapi/linux/elf.h | 2 +- 4 files changed, 10 insertions(+), 10 deletions(-) diff --git a/fs/binfmt_elf.c b/fs/binfmt_elf.c index 6a11025e5850..38a9496f83f3 100644 --- a/fs/binfmt_elf.c +++ b/fs/binfmt_elf.c @@ -248,7 +248,7 @@ create_elf_tables(struct linux_binprm *bprm, const struct elfhdr *exec, } while (0) #ifdef ARCH_DLINFO - /* + /* * ARCH_DLINFO must come first so PPC can do its special alignment of * AUXV. * update AT_VECTOR_SIZE_ARCH if the number of NEW_AUX_ENT() in @@ -1020,7 +1020,7 @@ out_free_interp: executable_stack); if (retval < 0) goto out_free_dentry; - + elf_bss = 0; elf_brk = 0; @@ -1043,7 +1043,7 @@ out_free_interp: if (unlikely (elf_brk > elf_bss)) { unsigned long nbyte; - + /* There was a PT_LOAD segment with p_memsz > p_filesz before this one. Map anonymous pages, if needed, and clear the area. */ @@ -1521,7 +1521,7 @@ static void fill_elf_note_phdr(struct elf_phdr *phdr, int sz, loff_t offset) phdr->p_align = 0; } -static void fill_note(struct memelfnote *note, const char *name, int type, +static void fill_note(struct memelfnote *note, const char *name, int type, unsigned int sz, void *data) { note->name = name; @@ -2004,8 +2004,8 @@ static int elf_dump_thread_status(long signr, struct elf_thread_status *t) t->num_notes = 0; fill_prstatus(&t->prstatus.common, p, signr); - elf_core_copy_task_regs(p, &t->prstatus.pr_reg); - + elf_core_copy_task_regs(p, &t->prstatus.pr_reg); + fill_note(&t->notes[0], "CORE", NT_PRSTATUS, sizeof(t->prstatus), &(t->prstatus)); t->num_notes++; @@ -2295,7 +2295,7 @@ static int elf_core_dump(struct coredump_params *cprm) if (!elf_core_write_extra_phdrs(cprm, offset)) goto end_coredump; - /* write out the notes section */ + /* write out the notes section */ if (!write_note_info(&info, cprm)) goto end_coredump; diff --git a/fs/binfmt_elf_fdpic.c b/fs/binfmt_elf_fdpic.c index 08d0c8797828..e90c1192dec6 100644 --- a/fs/binfmt_elf_fdpic.c +++ b/fs/binfmt_elf_fdpic.c @@ -1603,7 +1603,7 @@ static int elf_fdpic_core_dump(struct coredump_params *cprm) if (!elf_core_write_extra_phdrs(cprm, offset)) goto end_coredump; - /* write out the notes section */ + /* write out the notes section */ if (!writenote(thread_list->notes, cprm)) goto end_coredump; if (!writenote(&psinfo_note, cprm)) diff --git a/fs/exec.c b/fs/exec.c index b3e87bd50962..1644bf10fb9d 100644 --- a/fs/exec.c +++ b/fs/exec.c @@ -172,7 +172,7 @@ SYSCALL_DEFINE1(uselib, const char __user *, library) exit: fput(file); out: - return error; + return error; } #endif /* #ifdef CONFIG_USELIB */ diff --git a/include/uapi/linux/elf.h b/include/uapi/linux/elf.h index 775a17b5b8ac..4c6a8fa5e7ed 100644 --- a/include/uapi/linux/elf.h +++ b/include/uapi/linux/elf.h @@ -91,7 +91,7 @@ typedef __s64 Elf64_Sxword; #define DT_INIT 12 #define DT_FINI 13 #define DT_SONAME 14 -#define DT_RPATH 15 +#define DT_RPATH 15 #define DT_SYMBOLIC 16 #define DT_REL 17 #define DT_RELSZ 18 -- cgit v1.2.3 From bfb4a2b95875a47a01234f2de113ec089d524e71 Mon Sep 17 00:00:00 2001 From: Rolf Eike Beer Date: Wed, 19 Oct 2022 09:32:35 +0200 Subject: exec: simplify initial stack size expansion I had a hard time trying to understand completely why it is using vm_end in one side of the expression and vm_start in the other one, and using something in the "if" clause that is not an exact copy of what is used below. The whole point is that the stack_size variable that was used in the "if" clause is the difference between vm_start and vm_end, which is not far away but makes this thing harder to read than it must be. Signed-off-by: Rolf Eike Beer Signed-off-by: Kees Cook Link: https://lore.kernel.org/r/2017429.gqNitNVd0C@mobilepool36.emlix.com --- fs/exec.c | 13 +++++-------- 1 file changed, 5 insertions(+), 8 deletions(-) diff --git a/fs/exec.c b/fs/exec.c index 1644bf10fb9d..9585bc1bc970 100644 --- a/fs/exec.c +++ b/fs/exec.c @@ -843,16 +843,13 @@ int setup_arg_pages(struct linux_binprm *bprm, * will align it up. */ rlim_stack = bprm->rlim_stack.rlim_cur & PAGE_MASK; + + stack_expand = min(rlim_stack, stack_size + stack_expand); + #ifdef CONFIG_STACK_GROWSUP - if (stack_size + stack_expand > rlim_stack) - stack_base = vma->vm_start + rlim_stack; - else - stack_base = vma->vm_end + stack_expand; + stack_base = vma->vm_start + stack_expand; #else - if (stack_size + stack_expand > rlim_stack) - stack_base = vma->vm_end - rlim_stack; - else - stack_base = vma->vm_start - stack_expand; + stack_base = vma->vm_end - stack_expand; #endif current->mm->start_stack = bprm->p; ret = expand_stack(vma, stack_base); -- cgit v1.2.3 From cfc46ca4fdcaef6f0215f56d89162400aee402e3 Mon Sep 17 00:00:00 2001 From: Rolf Eike Beer Date: Wed, 19 Oct 2022 09:43:01 +0200 Subject: binfmt_elf: fix documented return value for load_elf_phdrs() This function has never returned anything but a plain NULL. Fixes: 6a8d38945cf4 ("binfmt_elf: Hoist ELF program header loading to a function") Signed-off-by: Rolf Eike Beer Signed-off-by: Kees Cook Link: https://lore.kernel.org/r/2359389.EDbqzprbEW@mobilepool36.emlix.com --- fs/binfmt_elf.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/binfmt_elf.c b/fs/binfmt_elf.c index 38a9496f83f3..71df1a3c2dac 100644 --- a/fs/binfmt_elf.c +++ b/fs/binfmt_elf.c @@ -456,7 +456,7 @@ static unsigned long maximum_alignment(struct elf_phdr *cmds, int nr) * * Loads ELF program headers from the binary file elf_file, which has the ELF * header pointed to by elf_ex, into a newly allocated array. The caller is - * responsible for freeing the allocated data. Returns an ERR_PTR upon failure. + * responsible for freeing the allocated data. Returns NULL upon failure. */ static struct elf_phdr *load_elf_phdrs(const struct elfhdr *elf_ex, struct file *elf_file) -- cgit v1.2.3 From ef20c5139c3157b5c6eda46f496952bddffe9ad6 Mon Sep 17 00:00:00 2001 From: Rolf Eike Beer Date: Wed, 19 Oct 2022 09:52:16 +0200 Subject: binfmt_elf: simplify error handling in load_elf_phdrs() The err variable was the same like retval, but capped to <= 0. This is the same as retval as elf_read() never returns positive values. Signed-off-by: Rolf Eike Beer Signed-off-by: Kees Cook Link: https://lore.kernel.org/r/4137126.7Qn9TF0dmF@mobilepool36.emlix.com --- fs/binfmt_elf.c | 10 ++-------- 1 file changed, 2 insertions(+), 8 deletions(-) diff --git a/fs/binfmt_elf.c b/fs/binfmt_elf.c index 71df1a3c2dac..528e2ac8931f 100644 --- a/fs/binfmt_elf.c +++ b/fs/binfmt_elf.c @@ -462,7 +462,7 @@ static struct elf_phdr *load_elf_phdrs(const struct elfhdr *elf_ex, struct file *elf_file) { struct elf_phdr *elf_phdata = NULL; - int retval, err = -1; + int retval = -1; unsigned int size; /* @@ -484,15 +484,9 @@ static struct elf_phdr *load_elf_phdrs(const struct elfhdr *elf_ex, /* Read in the program headers */ retval = elf_read(elf_file, elf_phdata, size, elf_ex->e_phoff); - if (retval < 0) { - err = retval; - goto out; - } - /* Success! */ - err = 0; out: - if (err) { + if (retval) { kfree(elf_phdata); elf_phdata = NULL; } -- cgit v1.2.3 From dc64cc12bcd14219afb91b55d23192c3eb45aa43 Mon Sep 17 00:00:00 2001 From: Bo Liu Date: Mon, 14 Nov 2022 22:17:57 -0500 Subject: binfmt_elf: replace IS_ERR() with IS_ERR_VALUE() Avoid typecasts that are needed for IS_ERR() and use IS_ERR_VALUE() instead. Signed-off-by: Bo Liu Signed-off-by: Kees Cook Link: https://lore.kernel.org/r/20221115031757.2426-1-liubo03@inspur.com --- fs/binfmt_elf.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/fs/binfmt_elf.c b/fs/binfmt_elf.c index 528e2ac8931f..d9af34f816a9 100644 --- a/fs/binfmt_elf.c +++ b/fs/binfmt_elf.c @@ -1160,7 +1160,7 @@ out_free_interp: error = elf_map(bprm->file, load_bias + vaddr, elf_ppnt, elf_prot, elf_flags, total_size); if (BAD_ADDR(error)) { - retval = IS_ERR((void *)error) ? + retval = IS_ERR_VALUE(error) ? PTR_ERR((void*)error) : -EINVAL; goto out_free_dentry; } @@ -1245,7 +1245,7 @@ out_free_interp: interpreter, load_bias, interp_elf_phdata, &arch_state); - if (!IS_ERR((void *)elf_entry)) { + if (!IS_ERR_VALUE(elf_entry)) { /* * load_elf_interp() returns relocation * adjustment @@ -1254,7 +1254,7 @@ out_free_interp: elf_entry += interp_elf_ex->e_entry; } if (BAD_ADDR(elf_entry)) { - retval = IS_ERR((void *)elf_entry) ? + retval = IS_ERR_VALUE(elf_entry) ? (int)elf_entry : -EINVAL; goto out_free_dentry; } -- cgit v1.2.3 From cd57e443831d8eeb083c7165bce195d886e216d4 Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Thu, 17 Nov 2022 16:31:55 -0800 Subject: exec: Remove FOLL_FORCE for stack setup It does not appear that FOLL_FORCE should be needed for setting up the stack pages. They are allocated using the nascent brpm->vma, which was newly created with VM_STACK_FLAGS, which an arch can override, but they all appear to include VM_WRITE | VM_MAYWRITE. Remove FOLL_FORCE. Cc: Eric Biederman Cc: David Hildenbrand Cc: Linus Torvalds Cc: Alexander Viro Cc: linux-fsdevel@vger.kernel.org Cc: linux-mm@kvack.org Link: https://lore.kernel.org/lkml/202211171439.CDE720EAD@keescook/ Signed-off-by: Kees Cook --- fs/exec.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/exec.c b/fs/exec.c index 9585bc1bc970..870a707b5d3b 100644 --- a/fs/exec.c +++ b/fs/exec.c @@ -200,7 +200,7 @@ static struct page *get_arg_page(struct linux_binprm *bprm, unsigned long pos, { struct page *page; int ret; - unsigned int gup_flags = FOLL_FORCE; + unsigned int gup_flags = 0; #ifdef CONFIG_STACK_GROWSUP if (write) { -- cgit v1.2.3 From e7f703ff2507f4e9f496da96cd4b78fd3026120c Mon Sep 17 00:00:00 2001 From: Wang Yufen Date: Fri, 2 Dec 2022 09:41:01 +0800 Subject: binfmt: Fix error return code in load_elf_fdpic_binary() Fix to return a negative error code from create_elf_fdpic_tables() instead of 0. Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2") Cc: stable@vger.kernel.org Signed-off-by: Wang Yufen Signed-off-by: Kees Cook Link: https://lore.kernel.org/r/1669945261-30271-1-git-send-email-wangyufen@huawei.com --- fs/binfmt_elf_fdpic.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/fs/binfmt_elf_fdpic.c b/fs/binfmt_elf_fdpic.c index e90c1192dec6..096e3520a0b1 100644 --- a/fs/binfmt_elf_fdpic.c +++ b/fs/binfmt_elf_fdpic.c @@ -434,8 +434,9 @@ static int load_elf_fdpic_binary(struct linux_binprm *bprm) current->mm->start_stack = current->mm->start_brk + stack_size; #endif - if (create_elf_fdpic_tables(bprm, current->mm, - &exec_params, &interp_params) < 0) + retval = create_elf_fdpic_tables(bprm, current->mm, &exec_params, + &interp_params); + if (retval < 0) goto error; kdebug("- start_code %lx", current->mm->start_code); -- cgit v1.2.3 From 6a46bf558803dd2b959ca7435a5c143efe837217 Mon Sep 17 00:00:00 2001 From: Liu Shixin Date: Wed, 2 Nov 2022 10:51:23 +0800 Subject: binfmt_misc: fix shift-out-of-bounds in check_special_flags UBSAN reported a shift-out-of-bounds warning: left shift of 1 by 31 places cannot be represented in type 'int' Call Trace: __dump_stack lib/dump_stack.c:88 [inline] dump_stack_lvl+0x8d/0xcf lib/dump_stack.c:106 ubsan_epilogue+0xa/0x44 lib/ubsan.c:151 __ubsan_handle_shift_out_of_bounds+0x1e7/0x208 lib/ubsan.c:322 check_special_flags fs/binfmt_misc.c:241 [inline] create_entry fs/binfmt_misc.c:456 [inline] bm_register_write+0x9d3/0xa20 fs/binfmt_misc.c:654 vfs_write+0x11e/0x580 fs/read_write.c:582 ksys_write+0xcf/0x120 fs/read_write.c:637 do_syscall_x64 arch/x86/entry/common.c:50 [inline] do_syscall_64+0x34/0x80 arch/x86/entry/common.c:80 entry_SYSCALL_64_after_hwframe+0x63/0xcd RIP: 0033:0x4194e1 Since the type of Node's flags is unsigned long, we should define these macros with same type too. Signed-off-by: Liu Shixin Signed-off-by: Kees Cook Link: https://lore.kernel.org/r/20221102025123.1117184-1-liushixin2@huawei.com --- fs/binfmt_misc.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/fs/binfmt_misc.c b/fs/binfmt_misc.c index e1eae7ea823a..bb202ad369d5 100644 --- a/fs/binfmt_misc.c +++ b/fs/binfmt_misc.c @@ -44,10 +44,10 @@ static LIST_HEAD(entries); static int enabled = 1; enum {Enabled, Magic}; -#define MISC_FMT_PRESERVE_ARGV0 (1 << 31) -#define MISC_FMT_OPEN_BINARY (1 << 30) -#define MISC_FMT_CREDENTIALS (1 << 29) -#define MISC_FMT_OPEN_FILE (1 << 28) +#define MISC_FMT_PRESERVE_ARGV0 (1UL << 31) +#define MISC_FMT_OPEN_BINARY (1UL << 30) +#define MISC_FMT_CREDENTIALS (1UL << 29) +#define MISC_FMT_OPEN_FILE (1UL << 28) typedef struct { struct list_head list; -- cgit v1.2.3