diff options
author | Jakub Kicinski | 2023-07-13 19:13:24 -0700 |
---|---|---|
committer | Jakub Kicinski | 2023-07-13 19:13:24 -0700 |
commit | d2afa89f6690616b9fb55f3f74e6d2927589e43a (patch) | |
tree | 48b5b9b08b3b0049f6674f29e99e0b3eafe8a3f7 /arch | |
parent | eb1b24a9bb6982bb44b135e3490d3159ba0be4f2 (diff) | |
parent | 1cd0e7715cad848017e48033772791e8b9ee2932 (diff) |
Merge tag 'for-netdev' of https://git.kernel.org/pub/scm/linux/kernel/git/bpf/bpf-next
Alexei Starovoitov says:
====================
pull-request: bpf-next 2023-07-13
We've added 67 non-merge commits during the last 15 day(s) which contain
a total of 106 files changed, 4444 insertions(+), 619 deletions(-).
The main changes are:
1) Fix bpftool build in presence of stale vmlinux.h,
from Alexander Lobakin.
2) Introduce bpf_me_mcache_free_rcu() and fix OOM under stress,
from Alexei Starovoitov.
3) Teach verifier actual bounds of bpf_get_smp_processor_id()
and fix perf+libbpf issue related to custom section handling,
from Andrii Nakryiko.
4) Introduce bpf map element count, from Anton Protopopov.
5) Check skb ownership against full socket, from Kui-Feng Lee.
6) Support for up to 12 arguments in BPF trampoline, from Menglong Dong.
7) Export rcu_request_urgent_qs_task, from Paul E. McKenney.
8) Fix BTF walking of unions, from Yafang Shao.
9) Extend link_info for kprobe_multi and perf_event links,
from Yafang Shao.
* tag 'for-netdev' of https://git.kernel.org/pub/scm/linux/kernel/git/bpf/bpf-next: (67 commits)
selftests/bpf: Add selftest for PTR_UNTRUSTED
bpf: Fix an error in verifying a field in a union
selftests/bpf: Add selftests for nested_trust
bpf: Fix an error around PTR_UNTRUSTED
selftests/bpf: add testcase for TRACING with 6+ arguments
bpf, x86: allow function arguments up to 12 for TRACING
bpf, x86: save/restore regs with BPF_DW size
bpftool: Use "fallthrough;" keyword instead of comments
bpf: Add object leak check.
bpf: Convert bpf_cpumask to bpf_mem_cache_free_rcu.
bpf: Introduce bpf_mem_free_rcu() similar to kfree_rcu().
selftests/bpf: Improve test coverage of bpf_mem_alloc.
rcu: Export rcu_request_urgent_qs_task()
bpf: Allow reuse from waiting_for_gp_ttrace list.
bpf: Add a hint to allocated objects.
bpf: Change bpf_mem_cache draining process.
bpf: Further refactor alloc_bulk().
bpf: Factor out inc/dec of active flag into helpers.
bpf: Refactor alloc_bulk().
bpf: Let free_all() return the number of freed elements.
...
====================
Link: https://lore.kernel.org/r/20230714020910.80794-1-alexei.starovoitov@gmail.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
Diffstat (limited to 'arch')
-rw-r--r-- | arch/x86/net/bpf_jit_comp.c | 246 |
1 files changed, 203 insertions, 43 deletions
diff --git a/arch/x86/net/bpf_jit_comp.c b/arch/x86/net/bpf_jit_comp.c index 438adb695daa..5ab531be56ac 100644 --- a/arch/x86/net/bpf_jit_comp.c +++ b/arch/x86/net/bpf_jit_comp.c @@ -1857,59 +1857,177 @@ emit_jmp: return proglen; } -static void save_regs(const struct btf_func_model *m, u8 **prog, int nr_regs, - int stack_size) +static void clean_stack_garbage(const struct btf_func_model *m, + u8 **pprog, int nr_stack_slots, + int stack_size) { - int i, j, arg_size; - bool next_same_struct = false; + int arg_size, off; + u8 *prog; + + /* Generally speaking, the compiler will pass the arguments + * on-stack with "push" instruction, which will take 8-byte + * on the stack. In this case, there won't be garbage values + * while we copy the arguments from origin stack frame to current + * in BPF_DW. + * + * However, sometimes the compiler will only allocate 4-byte on + * the stack for the arguments. For now, this case will only + * happen if there is only one argument on-stack and its size + * not more than 4 byte. In this case, there will be garbage + * values on the upper 4-byte where we store the argument on + * current stack frame. + * + * arguments on origin stack: + * + * stack_arg_1(4-byte) xxx(4-byte) + * + * what we copy: + * + * stack_arg_1(8-byte): stack_arg_1(origin) xxx + * + * and the xxx is the garbage values which we should clean here. + */ + if (nr_stack_slots != 1) + return; + + /* the size of the last argument */ + arg_size = m->arg_size[m->nr_args - 1]; + if (arg_size <= 4) { + off = -(stack_size - 4); + prog = *pprog; + /* mov DWORD PTR [rbp + off], 0 */ + if (!is_imm8(off)) + EMIT2_off32(0xC7, 0x85, off); + else + EMIT3(0xC7, 0x45, off); + EMIT(0, 4); + *pprog = prog; + } +} + +/* get the count of the regs that are used to pass arguments */ +static int get_nr_used_regs(const struct btf_func_model *m) +{ + int i, arg_regs, nr_used_regs = 0; + + for (i = 0; i < min_t(int, m->nr_args, MAX_BPF_FUNC_ARGS); i++) { + arg_regs = (m->arg_size[i] + 7) / 8; + if (nr_used_regs + arg_regs <= 6) + nr_used_regs += arg_regs; + + if (nr_used_regs >= 6) + break; + } + + return nr_used_regs; +} + +static void save_args(const struct btf_func_model *m, u8 **prog, + int stack_size, bool for_call_origin) +{ + int arg_regs, first_off, nr_regs = 0, nr_stack_slots = 0; + int i, j; /* Store function arguments to stack. * For a function that accepts two pointers the sequence will be: * mov QWORD PTR [rbp-0x10],rdi * mov QWORD PTR [rbp-0x8],rsi */ - for (i = 0, j = 0; i < min(nr_regs, 6); i++) { - /* The arg_size is at most 16 bytes, enforced by the verifier. */ - arg_size = m->arg_size[j]; - if (arg_size > 8) { - arg_size = 8; - next_same_struct = !next_same_struct; - } + for (i = 0; i < min_t(int, m->nr_args, MAX_BPF_FUNC_ARGS); i++) { + arg_regs = (m->arg_size[i] + 7) / 8; - emit_stx(prog, bytes_to_bpf_size(arg_size), - BPF_REG_FP, - i == 5 ? X86_REG_R9 : BPF_REG_1 + i, - -(stack_size - i * 8)); + /* According to the research of Yonghong, struct members + * should be all in register or all on the stack. + * Meanwhile, the compiler will pass the argument on regs + * if the remaining regs can hold the argument. + * + * Disorder of the args can happen. For example: + * + * struct foo_struct { + * long a; + * int b; + * }; + * int foo(char, char, char, char, char, struct foo_struct, + * char); + * + * the arg1-5,arg7 will be passed by regs, and arg6 will + * by stack. + */ + if (nr_regs + arg_regs > 6) { + /* copy function arguments from origin stack frame + * into current stack frame. + * + * The starting address of the arguments on-stack + * is: + * rbp + 8(push rbp) + + * 8(return addr of origin call) + + * 8(return addr of the caller) + * which means: rbp + 24 + */ + for (j = 0; j < arg_regs; j++) { + emit_ldx(prog, BPF_DW, BPF_REG_0, BPF_REG_FP, + nr_stack_slots * 8 + 0x18); + emit_stx(prog, BPF_DW, BPF_REG_FP, BPF_REG_0, + -stack_size); + + if (!nr_stack_slots) + first_off = stack_size; + stack_size -= 8; + nr_stack_slots++; + } + } else { + /* Only copy the arguments on-stack to current + * 'stack_size' and ignore the regs, used to + * prepare the arguments on-stack for orign call. + */ + if (for_call_origin) { + nr_regs += arg_regs; + continue; + } - j = next_same_struct ? j : j + 1; + /* copy the arguments from regs into stack */ + for (j = 0; j < arg_regs; j++) { + emit_stx(prog, BPF_DW, BPF_REG_FP, + nr_regs == 5 ? X86_REG_R9 : BPF_REG_1 + nr_regs, + -stack_size); + stack_size -= 8; + nr_regs++; + } + } } + + clean_stack_garbage(m, prog, nr_stack_slots, first_off); } -static void restore_regs(const struct btf_func_model *m, u8 **prog, int nr_regs, +static void restore_regs(const struct btf_func_model *m, u8 **prog, int stack_size) { - int i, j, arg_size; - bool next_same_struct = false; + int i, j, arg_regs, nr_regs = 0; /* Restore function arguments from stack. * For a function that accepts two pointers the sequence will be: * EMIT4(0x48, 0x8B, 0x7D, 0xF0); mov rdi,QWORD PTR [rbp-0x10] * EMIT4(0x48, 0x8B, 0x75, 0xF8); mov rsi,QWORD PTR [rbp-0x8] + * + * The logic here is similar to what we do in save_args() */ - for (i = 0, j = 0; i < min(nr_regs, 6); i++) { - /* The arg_size is at most 16 bytes, enforced by the verifier. */ - arg_size = m->arg_size[j]; - if (arg_size > 8) { - arg_size = 8; - next_same_struct = !next_same_struct; + for (i = 0; i < min_t(int, m->nr_args, MAX_BPF_FUNC_ARGS); i++) { + arg_regs = (m->arg_size[i] + 7) / 8; + if (nr_regs + arg_regs <= 6) { + for (j = 0; j < arg_regs; j++) { + emit_ldx(prog, BPF_DW, + nr_regs == 5 ? X86_REG_R9 : BPF_REG_1 + nr_regs, + BPF_REG_FP, + -stack_size); + stack_size -= 8; + nr_regs++; + } + } else { + stack_size -= 8 * arg_regs; } - emit_ldx(prog, bytes_to_bpf_size(arg_size), - i == 5 ? X86_REG_R9 : BPF_REG_1 + i, - BPF_REG_FP, - -(stack_size - i * 8)); - - j = next_same_struct ? j : j + 1; + if (nr_regs >= 6) + break; } } @@ -1938,7 +2056,10 @@ static int invoke_bpf_prog(const struct btf_func_model *m, u8 **pprog, /* arg1: mov rdi, progs[i] */ emit_mov_imm64(&prog, BPF_REG_1, (long) p >> 32, (u32) (long) p); /* arg2: lea rsi, [rbp - ctx_cookie_off] */ - EMIT4(0x48, 0x8D, 0x75, -run_ctx_off); + if (!is_imm8(-run_ctx_off)) + EMIT3_off32(0x48, 0x8D, 0xB5, -run_ctx_off); + else + EMIT4(0x48, 0x8D, 0x75, -run_ctx_off); if (emit_rsb_call(&prog, bpf_trampoline_enter(p), prog)) return -EINVAL; @@ -1954,7 +2075,10 @@ static int invoke_bpf_prog(const struct btf_func_model *m, u8 **pprog, emit_nops(&prog, 2); /* arg1: lea rdi, [rbp - stack_size] */ - EMIT4(0x48, 0x8D, 0x7D, -stack_size); + if (!is_imm8(-stack_size)) + EMIT3_off32(0x48, 0x8D, 0xBD, -stack_size); + else + EMIT4(0x48, 0x8D, 0x7D, -stack_size); /* arg2: progs[i]->insnsi for interpreter */ if (!p->jited) emit_mov_imm64(&prog, BPF_REG_2, @@ -1984,7 +2108,10 @@ static int invoke_bpf_prog(const struct btf_func_model *m, u8 **pprog, /* arg2: mov rsi, rbx <- start time in nsec */ emit_mov_reg(&prog, true, BPF_REG_2, BPF_REG_6); /* arg3: lea rdx, [rbp - run_ctx_off] */ - EMIT4(0x48, 0x8D, 0x55, -run_ctx_off); + if (!is_imm8(-run_ctx_off)) + EMIT3_off32(0x48, 0x8D, 0x95, -run_ctx_off); + else + EMIT4(0x48, 0x8D, 0x55, -run_ctx_off); if (emit_rsb_call(&prog, bpf_trampoline_exit(p), prog)) return -EINVAL; @@ -2136,7 +2263,7 @@ int arch_prepare_bpf_trampoline(struct bpf_tramp_image *im, void *image, void *i void *func_addr) { int i, ret, nr_regs = m->nr_args, stack_size = 0; - int regs_off, nregs_off, ip_off, run_ctx_off; + int regs_off, nregs_off, ip_off, run_ctx_off, arg_stack_off, rbx_off; struct bpf_tramp_links *fentry = &tlinks[BPF_TRAMP_FENTRY]; struct bpf_tramp_links *fexit = &tlinks[BPF_TRAMP_FEXIT]; struct bpf_tramp_links *fmod_ret = &tlinks[BPF_TRAMP_MODIFY_RETURN]; @@ -2150,8 +2277,10 @@ int arch_prepare_bpf_trampoline(struct bpf_tramp_image *im, void *image, void *i if (m->arg_flags[i] & BTF_FMODEL_STRUCT_ARG) nr_regs += (m->arg_size[i] + 7) / 8 - 1; - /* x86-64 supports up to 6 arguments. 7+ can be added in the future */ - if (nr_regs > 6) + /* x86-64 supports up to MAX_BPF_FUNC_ARGS arguments. 1-6 + * are passed through regs, the remains are through stack. + */ + if (nr_regs > MAX_BPF_FUNC_ARGS) return -ENOTSUPP; /* Generated trampoline stack layout: @@ -2170,7 +2299,14 @@ int arch_prepare_bpf_trampoline(struct bpf_tramp_image *im, void *image, void *i * * RBP - ip_off [ traced function ] BPF_TRAMP_F_IP_ARG flag * + * RBP - rbx_off [ rbx value ] always + * * RBP - run_ctx_off [ bpf_tramp_run_ctx ] + * + * [ stack_argN ] BPF_TRAMP_F_CALL_ORIG + * [ ... ] + * [ stack_arg2 ] + * RBP - arg_stack_off [ stack_arg1 ] */ /* room for return value of orig_call or fentry prog */ @@ -2190,9 +2326,26 @@ int arch_prepare_bpf_trampoline(struct bpf_tramp_image *im, void *image, void *i ip_off = stack_size; + stack_size += 8; + rbx_off = stack_size; + stack_size += (sizeof(struct bpf_tramp_run_ctx) + 7) & ~0x7; run_ctx_off = stack_size; + if (nr_regs > 6 && (flags & BPF_TRAMP_F_CALL_ORIG)) { + /* the space that used to pass arguments on-stack */ + stack_size += (nr_regs - get_nr_used_regs(m)) * 8; + /* make sure the stack pointer is 16-byte aligned if we + * need pass arguments on stack, which means + * [stack_size + 8(rbp) + 8(rip) + 8(origin rip)] + * should be 16-byte aligned. Following code depend on + * that stack_size is already 8-byte aligned. + */ + stack_size += (stack_size % 16) ? 0 : 8; + } + + arg_stack_off = stack_size; + if (flags & BPF_TRAMP_F_SKIP_FRAME) { /* skip patched call instruction and point orig_call to actual * body of the kernel function. @@ -2212,8 +2365,14 @@ int arch_prepare_bpf_trampoline(struct bpf_tramp_image *im, void *image, void *i x86_call_depth_emit_accounting(&prog, NULL); EMIT1(0x55); /* push rbp */ EMIT3(0x48, 0x89, 0xE5); /* mov rbp, rsp */ - EMIT4(0x48, 0x83, 0xEC, stack_size); /* sub rsp, stack_size */ - EMIT1(0x53); /* push rbx */ + if (!is_imm8(stack_size)) + /* sub rsp, stack_size */ + EMIT3_off32(0x48, 0x81, 0xEC, stack_size); + else + /* sub rsp, stack_size */ + EMIT4(0x48, 0x83, 0xEC, stack_size); + /* mov QWORD PTR [rbp - rbx_off], rbx */ + emit_stx(&prog, BPF_DW, BPF_REG_FP, BPF_REG_6, -rbx_off); /* Store number of argument registers of the traced function: * mov rax, nr_regs @@ -2231,7 +2390,7 @@ int arch_prepare_bpf_trampoline(struct bpf_tramp_image *im, void *image, void *i emit_stx(&prog, BPF_DW, BPF_REG_FP, BPF_REG_0, -ip_off); } - save_regs(m, &prog, nr_regs, regs_off); + save_args(m, &prog, regs_off, false); if (flags & BPF_TRAMP_F_CALL_ORIG) { /* arg1: mov rdi, im */ @@ -2261,7 +2420,8 @@ int arch_prepare_bpf_trampoline(struct bpf_tramp_image *im, void *image, void *i } if (flags & BPF_TRAMP_F_CALL_ORIG) { - restore_regs(m, &prog, nr_regs, regs_off); + restore_regs(m, &prog, regs_off); + save_args(m, &prog, arg_stack_off, true); if (flags & BPF_TRAMP_F_ORIG_STACK) { emit_ldx(&prog, BPF_DW, BPF_REG_0, BPF_REG_FP, 8); @@ -2302,7 +2462,7 @@ int arch_prepare_bpf_trampoline(struct bpf_tramp_image *im, void *image, void *i } if (flags & BPF_TRAMP_F_RESTORE_REGS) - restore_regs(m, &prog, nr_regs, regs_off); + restore_regs(m, &prog, regs_off); /* This needs to be done regardless. If there were fmod_ret programs, * the return value is only updated on the stack and still needs to be @@ -2321,7 +2481,7 @@ int arch_prepare_bpf_trampoline(struct bpf_tramp_image *im, void *image, void *i if (save_ret) emit_ldx(&prog, BPF_DW, BPF_REG_0, BPF_REG_FP, -8); - EMIT1(0x5B); /* pop rbx */ + emit_ldx(&prog, BPF_DW, BPF_REG_6, BPF_REG_FP, -rbx_off); EMIT1(0xC9); /* leave */ if (flags & BPF_TRAMP_F_SKIP_FRAME) /* skip our return address and return to parent */ |