diff options
69 files changed, 3141 insertions, 866 deletions
diff --git a/Documentation/bpf/bpf_devel_QA.rst b/Documentation/bpf/bpf_devel_QA.rst index 2ed89abbf9a4..253496af8fef 100644 --- a/Documentation/bpf/bpf_devel_QA.rst +++ b/Documentation/bpf/bpf_devel_QA.rst @@ -29,7 +29,7 @@ list: This may also include issues related to XDP, BPF tracing, etc. Given netdev has a high volume of traffic, please also add the BPF -maintainers to Cc (from kernel MAINTAINERS_ file): +maintainers to Cc (from kernel ``MAINTAINERS`` file): * Alexei Starovoitov <ast@kernel.org> * Daniel Borkmann <daniel@iogearbox.net> @@ -234,11 +234,11 @@ be subject to change. Q: samples/bpf preference vs selftests? --------------------------------------- -Q: When should I add code to `samples/bpf/`_ and when to BPF kernel -selftests_ ? +Q: When should I add code to ``samples/bpf/`` and when to BPF kernel +selftests_? A: In general, we prefer additions to BPF kernel selftests_ rather than -`samples/bpf/`_. The rationale is very simple: kernel selftests are +``samples/bpf/``. The rationale is very simple: kernel selftests are regularly run by various bots to test for kernel regressions. The more test cases we add to BPF selftests, the better the coverage @@ -246,9 +246,9 @@ and the less likely it is that those could accidentally break. It is not that BPF kernel selftests cannot demo how a specific feature can be used. -That said, `samples/bpf/`_ may be a good place for people to get started, +That said, ``samples/bpf/`` may be a good place for people to get started, so it might be advisable that simple demos of features could go into -`samples/bpf/`_, but advanced functional and corner-case testing rather +``samples/bpf/``, but advanced functional and corner-case testing rather into kernel selftests. If your sample looks like a test case, then go for BPF kernel selftests @@ -449,6 +449,19 @@ from source at https://github.com/acmel/dwarves +pahole starts to use libbpf definitions and APIs since v1.13 after the +commit 21507cd3e97b ("pahole: add libbpf as submodule under lib/bpf"). +It works well with the git repository because the libbpf submodule will +use "git submodule update --init --recursive" to update. + +Unfortunately, the default github release source code does not contain +libbpf submodule source code and this will cause build issues, the tarball +from https://git.kernel.org/pub/scm/devel/pahole/pahole.git/ is same with +github, you can get the source tarball with corresponding libbpf submodule +codes from + +https://fedorapeople.org/~acme/dwarves + Some distros have pahole version 1.16 packaged already, e.g. Fedora, Gentoo. @@ -645,10 +658,9 @@ when: .. Links .. _Documentation/process/: https://www.kernel.org/doc/html/latest/process/ -.. _MAINTAINERS: ../../MAINTAINERS .. _netdev-FAQ: ../networking/netdev-FAQ.rst -.. _samples/bpf/: ../../samples/bpf/ -.. _selftests: ../../tools/testing/selftests/bpf/ +.. _selftests: + https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/tree/tools/testing/selftests/bpf/ .. _Documentation/dev-tools/kselftest.rst: https://www.kernel.org/doc/html/latest/dev-tools/kselftest.html .. _Documentation/bpf/btf.rst: btf.rst diff --git a/include/linux/bpf-cgroup.h b/include/linux/bpf-cgroup.h index 6a29fe11485d..8b77d08d4b47 100644 --- a/include/linux/bpf-cgroup.h +++ b/include/linux/bpf-cgroup.h @@ -458,7 +458,6 @@ int cgroup_bpf_prog_query(const union bpf_attr *attr, union bpf_attr __user *uattr); #else -struct bpf_prog; struct cgroup_bpf {}; static inline int cgroup_bpf_inherit(struct cgroup *cgrp) { return 0; } static inline void cgroup_bpf_offline(struct cgroup *cgrp) {} diff --git a/include/linux/bpf.h b/include/linux/bpf.h index c9b7a876b0c8..ad4bcf1cadbb 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -310,6 +310,7 @@ enum bpf_arg_type { ARG_PTR_TO_PERCPU_BTF_ID, /* pointer to in-kernel percpu type */ ARG_PTR_TO_FUNC, /* pointer to a bpf program function */ ARG_PTR_TO_STACK_OR_NULL, /* pointer to stack or NULL */ + ARG_PTR_TO_CONST_STR, /* pointer to a null terminated read-only string */ __BPF_ARG_TYPE_MAX, }; @@ -930,7 +931,6 @@ struct bpf_link_primer { }; struct bpf_struct_ops_value; -struct btf_type; struct btf_member; #define BPF_STRUCT_OPS_MAX_NR_MEMBERS 64 @@ -1955,6 +1955,7 @@ extern const struct bpf_func_proto bpf_skc_to_tcp_request_sock_proto; extern const struct bpf_func_proto bpf_skc_to_udp6_sock_proto; extern const struct bpf_func_proto bpf_copy_from_user_proto; extern const struct bpf_func_proto bpf_snprintf_btf_proto; +extern const struct bpf_func_proto bpf_snprintf_proto; extern const struct bpf_func_proto bpf_per_cpu_ptr_proto; extern const struct bpf_func_proto bpf_this_cpu_ptr_proto; extern const struct bpf_func_proto bpf_ktime_get_coarse_ns_proto; @@ -2080,4 +2081,24 @@ int bpf_arch_text_poke(void *ip, enum bpf_text_poke_type t, struct btf_id_set; bool btf_id_set_contains(const struct btf_id_set *set, u32 id); +enum bpf_printf_mod_type { + BPF_PRINTF_INT, + BPF_PRINTF_LONG, + BPF_PRINTF_LONG_LONG, +}; + +/* Workaround for getting va_list handling working with different argument type + * combinations generically for 32 and 64 bit archs. + */ +#define BPF_CAST_FMT_ARG(arg_nb, args, mod) \ + (mod[arg_nb] == BPF_PRINTF_LONG_LONG || \ + (mod[arg_nb] == BPF_PRINTF_LONG && __BITS_PER_LONG == 64) \ + ? (u64)args[arg_nb] \ + : (u32)args[arg_nb]) + +int bpf_printf_prepare(char *fmt, u32 fmt_size, const u64 *raw_args, + u64 *final_args, enum bpf_printf_mod_type *mod, + u32 num_args); +void bpf_printf_cleanup(void); + #endif /* _LINUX_BPF_H */ diff --git a/include/linux/bpf_verifier.h b/include/linux/bpf_verifier.h index 51c2ffa3d901..6023a1367853 100644 --- a/include/linux/bpf_verifier.h +++ b/include/linux/bpf_verifier.h @@ -487,6 +487,15 @@ static inline u64 bpf_trampoline_compute_key(const struct bpf_prog *tgt_prog, return ((u64)btf_obj_id(btf) << 32) | 0x80000000 | btf_id; } +/* unpack the IDs from the key as constructed above */ +static inline void bpf_trampoline_unpack_key(u64 key, u32 *obj_id, u32 *btf_id) +{ + if (obj_id) + *obj_id = key >> 32; + if (btf_id) + *btf_id = key & 0x7FFFFFFF; +} + int bpf_check_attach_target(struct bpf_verifier_log *log, const struct bpf_prog *prog, const struct bpf_prog *tgt_prog, diff --git a/include/linux/skmsg.h b/include/linux/skmsg.h index e242bf3d2b4a..aba0f0f429be 100644 --- a/include/linux/skmsg.h +++ b/include/linux/skmsg.h @@ -99,7 +99,8 @@ struct sk_psock { void (*saved_close)(struct sock *sk, long timeout); void (*saved_write_space)(struct sock *sk); void (*saved_data_ready)(struct sock *sk); - int (*psock_update_sk_prot)(struct sock *sk, bool restore); + int (*psock_update_sk_prot)(struct sock *sk, struct sk_psock *psock, + bool restore); struct proto *sk_proto; struct mutex work_mutex; struct sk_psock_work_state work_state; @@ -404,7 +405,7 @@ static inline void sk_psock_restore_proto(struct sock *sk, struct sk_psock *psock) { if (psock->psock_update_sk_prot) - psock->psock_update_sk_prot(sk, true); + psock->psock_update_sk_prot(sk, psock, true); } static inline void sk_psock_set_state(struct sk_psock *psock, diff --git a/include/net/sock.h b/include/net/sock.h index cadcc12cc316..42bc5e1a627f 100644 --- a/include/net/sock.h +++ b/include/net/sock.h @@ -1118,6 +1118,7 @@ struct inet_hashinfo; struct raw_hashinfo; struct smc_hashinfo; struct module; +struct sk_psock; /* * caches using SLAB_TYPESAFE_BY_RCU should let .next pointer from nulls nodes @@ -1189,7 +1190,9 @@ struct proto { void (*rehash)(struct sock *sk); int (*get_port)(struct sock *sk, unsigned short snum); #ifdef CONFIG_BPF_SYSCALL - int (*psock_update_sk_prot)(struct sock *sk, bool restore); + int (*psock_update_sk_prot)(struct sock *sk, + struct sk_psock *psock, + bool restore); #endif /* Keeping track of sockets in use */ diff --git a/include/net/tcp.h b/include/net/tcp.h index eaea43afcc97..d05193cb0d99 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -2215,7 +2215,7 @@ struct sk_psock; #ifdef CONFIG_BPF_SYSCALL struct proto *tcp_bpf_get_proto(struct sock *sk, struct sk_psock *psock); -int tcp_bpf_update_proto(struct sock *sk, bool restore); +int tcp_bpf_update_proto(struct sock *sk, struct sk_psock *psock, bool restore); void tcp_bpf_clone(const struct sock *sk, struct sock *newsk); #endif /* CONFIG_BPF_SYSCALL */ diff --git a/include/net/udp.h b/include/net/udp.h index f55aaeef7e91..360df454356c 100644 --- a/include/net/udp.h +++ b/include/net/udp.h @@ -543,7 +543,7 @@ static inline void udp_post_segment_fix_csum(struct sk_buff *skb) #ifdef CONFIG_BPF_SYSCALL struct sk_psock; struct proto *udp_bpf_get_proto(struct sock *sk, struct sk_psock *psock); -int udp_bpf_update_proto(struct sock *sk, bool restore); +int udp_bpf_update_proto(struct sock *sk, struct sk_psock *psock, bool restore); #endif #endif /* _UDP_H */ diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index 49371eba98ba..ec6d85a81744 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -312,6 +312,27 @@ union bpf_iter_link_info { * *ctx_out*, *data_out* (for example, packet data), result of the * execution *retval*, and *duration* of the test run. * + * The sizes of the buffers provided as input and output + * parameters *ctx_in*, *ctx_out*, *data_in*, and *data_out* must + * be provided in the corresponding variables *ctx_size_in*, + * *ctx_size_out*, *data_size_in*, and/or *data_size_out*. If any + * of these parameters are not provided (ie set to NULL), the + * corresponding size field must be zero. + * + * Some program types have particular requirements: + * + * **BPF_PROG_TYPE_SK_LOOKUP** + * *data_in* and *data_out* must be NULL. + * + * **BPF_PROG_TYPE_XDP** + * *ctx_in* and *ctx_out* must be NULL. + * + * **BPF_PROG_TYPE_RAW_TRACEPOINT**, + * **BPF_PROG_TYPE_RAW_TRACEPOINT_WRITABLE** + * + * *ctx_out*, *data_in* and *data_out* must be NULL. + * *repeat* must be zero. + * * Return * Returns zero on success. On error, -1 is returned and *errno* * is set appropriately. @@ -4061,12 +4082,20 @@ union bpf_attr { * of new data availability is sent. * If **BPF_RB_FORCE_WAKEUP** is specified in *flags*, notification * of new data availability is sent unconditionally. + * If **0** is specified in *flags*, an adaptive notification + * of new data availability is sent. + * + * An adaptive notification is a notification sent whenever the user-space + * process has caught up and consumed all available payloads. In case the user-space + * process is still processing a previous payload, then no notification is needed + * as it will process the newly added payload automatically. * Return * 0 on success, or a negative error in case of failure. * * void *bpf_ringbuf_reserve(void *ringbuf, u64 size, u64 flags) * Description * Reserve *size* bytes of payload in a ring buffer *ringbuf*. + * *flags* must be 0. * Return * Valid pointer with *size* bytes of memory available; NULL, * otherwise. @@ -4078,6 +4107,10 @@ union bpf_attr { * of new data availability is sent. * If **BPF_RB_FORCE_WAKEUP** is specified in *flags*, notification * of new data availability is sent unconditionally. + * If **0** is specified in *flags*, an adaptive notification + * of new data availability is sent. + * + * See 'bpf_ringbuf_output()' for the definition of adaptive notification. * Return * Nothing. Always succeeds. * @@ -4088,6 +4121,10 @@ union bpf_attr { * of new data availability is sent. * If **BPF_RB_FORCE_WAKEUP** is specified in *flags*, notification * of new data availability is sent unconditionally. + * If **0** is specified in *flags*, an adaptive notification + * of new data availability is sent. + * + * See 'bpf_ringbuf_output()' for the definition of adaptive notification. * Return * Nothing. Always succeeds. * @@ -4671,6 +4708,33 @@ union bpf_attr { * Return * The number of traversed map elements for success, **-EINVAL** for * invalid **flags**. + * + * long bpf_snprintf(char *str, u32 str_size, const char *fmt, u64 *data, u32 data_len) + * Description + * Outputs a string into the **str** buffer of size **str_size** + * based on a format string stored in a read-only map pointed by + * **fmt**. + * + * Each format specifier in **fmt** corresponds to one u64 element + * in the **data** array. For strings and pointers where pointees + * are accessed, only the pointer values are stored in the *data* + * array. The *data_len* is the size of *data* in bytes. + * + * Formats **%s** and **%p{i,I}{4,6}** require to read kernel + * memory. Reading kernel memory may fail due to either invalid + * address or valid address but requiring a major memory fault. If + * reading kernel memory fails, the string for **%s** will be an + * empty string, and the ip address for **%p{i,I}{4,6}** will be 0. + * Not returning error to bpf program is consistent with what + * **bpf_trace_printk**\ () does for now. + * + * Return + * The strictly positive length of the formatted string, including + * the trailing zero character. If the return value is greater than + * **str_size**, **str** contains a truncated string, guaranteed to + * be zero-terminated except when **str_size** is 0. + * + * Or **-EBUSY** if the per-CPU memory copy buffer is busy. */ #define __BPF_FUNC_MAPPER(FN) \ FN(unspec), \ @@ -4838,6 +4902,7 @@ union bpf_attr { FN(sock_from_file), \ FN(check_mtu), \ FN(for_each_map_elem), \ + FN(snprintf), \ /* */ /* integer value in 'imm' field of BPF_CALL instruction selects which helper @@ -5379,6 +5444,8 @@ struct bpf_link_info { } raw_tracepoint; struct { __u32 attach_type; + __u32 target_obj_id; /* prog_id for PROG_EXT, otherwise btf object id */ + __u32 target_btf_id; /* BTF type id inside the object */ } tracing; struct { __u64 cgroup_id; diff --git a/kernel/bpf/core.c b/kernel/bpf/core.c index f5423251c118..5e31ee9f7512 100644 --- a/kernel/bpf/core.c +++ b/kernel/bpf/core.c @@ -1363,11 +1363,10 @@ u64 __weak bpf_probe_read_kernel(void *dst, u32 size, const void *unsafe_ptr) * __bpf_prog_run - run eBPF program on a given context * @regs: is the array of MAX_BPF_EXT_REG eBPF pseudo-registers * @insn: is the array of eBPF instructions - * @stack: is the eBPF storage stack * * Decode and execute eBPF instructions. */ -static u64 ___bpf_prog_run(u64 *regs, const struct bpf_insn *insn, u64 *stack) +static u64 ___bpf_prog_run(u64 *regs, const struct bpf_insn *insn) { #define BPF_INSN_2_LBL(x, y) [BPF_##x | BPF_##y] = &&x##_##y #define BPF_INSN_3_LBL(x, y, z) [BPF_##x | BPF_##y | BPF_##z] = &&x##_##y##_##z @@ -1701,7 +1700,7 @@ static unsigned int PROG_NAME(stack_size)(const void *ctx, const struct bpf_insn \ FP = (u64) (unsigned long) &stack[ARRAY_SIZE(stack)]; \ ARG1 = (u64) (unsigned long) ctx; \ - return ___bpf_prog_run(regs, insn, stack); \ + return ___bpf_prog_run(regs, insn); \ } #define PROG_NAME_ARGS(stack_size) __bpf_prog_run_args##stack_size @@ -1718,7 +1717,7 @@ static u64 PROG_NAME_ARGS(stack_size)(u64 r1, u64 r2, u64 r3, u64 r4, u64 r5, \ BPF_R3 = r3; \ BPF_R4 = r4; \ BPF_R5 = r5; \ - return ___bpf_prog_run(regs, insn, stack); \ + return ___bpf_prog_run(regs, insn); \ } #define EVAL1(FN, X) FN(X) diff --git a/kernel/bpf/helpers.c b/kernel/bpf/helpers.c index f306611c4ddf..85b26ca5aacd 100644 --- a/kernel/bpf/helpers.c +++ b/kernel/bpf/helpers.c @@ -669,6 +669,310 @@ const struct bpf_func_proto bpf_this_cpu_ptr_proto = { .arg1_type = ARG_PTR_TO_PERCPU_BTF_ID, }; +static int bpf_trace_copy_string(char *buf, void *unsafe_ptr, char fmt_ptype, + size_t bufsz) +{ + void __user *user_ptr = (__force void __user *)unsafe_ptr; + + buf[0] = 0; + + switch (fmt_ptype) { + case 's': +#ifdef CONFIG_ARCH_HAS_NON_OVERLAPPING_ADDRESS_SPACE + if ((unsigned long)unsafe_ptr < TASK_SIZE) + return strncpy_from_user_nofault(buf, user_ptr, bufsz); + fallthrough; +#endif + case 'k': + return strncpy_from_kernel_nofault(buf, unsafe_ptr, bufsz); + case 'u': + return strncpy_from_user_nofault(buf, user_ptr, bufsz); + } + + return -EINVAL; +} + +/* Per-cpu temp buffers which can be used by printf-like helpers for %s or %p + */ +#define MAX_PRINTF_BUF_LEN 512 + +struct bpf_printf_buf { + char tmp_buf[MAX_PRINTF_BUF_LEN]; +}; +static DEFINE_PER_CPU(struct bpf_printf_buf, bpf_printf_buf); +static DEFINE_PER_CPU(int, bpf_printf_buf_used); + +static int try_get_fmt_tmp_buf(char **tmp_buf) +{ + struct bpf_printf_buf *bufs; + int used; + + if (*tmp_buf) + return 0; + + preempt_disable(); + used = this_cpu_inc_return(bpf_printf_buf_used); + if (WARN_ON_ONCE(used > 1)) { + this_cpu_dec(bpf_printf_buf_used); + preempt_enable(); + return -EBUSY; + } + bufs = this_cpu_ptr(&bpf_printf_buf); + *tmp_buf = bufs->tmp_buf; + + return 0; +} + +void bpf_printf_cleanup(void) +{ + if (this_cpu_read(bpf_printf_buf_used)) { + this_cpu_dec(bpf_printf_buf_used); + preempt_enable(); + } +} + +/* + * bpf_parse_fmt_str - Generic pass on format strings for printf-like helpers + * + * Returns a negative value if fmt is an invalid format string or 0 otherwise. + * + * This can be used in two ways: + * - Format string verification only: when final_args and mod are NULL + * - Arguments preparation: in addition to the above verification, it writes in + * final_args a copy of raw_args where pointers from BPF have been sanitized + * into pointers safe to use by snprintf. This also writes in the mod array + * the size requirement of each argument, usable by BPF_CAST_FMT_ARG for ex. + * + * In argument preparation mode, if 0 is returned, safe temporary buffers are + * allocated and bpf_printf_cleanup should be called to free them after use. + */ +int bpf_printf_prepare(char *fmt, u32 fmt_size, const u64 *raw_args, + u64 *final_args, enum bpf_printf_mod_type *mod, + u32 num_args) +{ + char *unsafe_ptr = NULL, *tmp_buf = NULL, *fmt_end; + size_t tmp_buf_len = MAX_PRINTF_BUF_LEN; + int err, i, num_spec = 0, copy_size; + enum bpf_printf_mod_type cur_mod; + u64 cur_arg; + char fmt_ptype; + + if (!!final_args != !!mod) + return -EINVAL; + + fmt_end = strnchr(fmt, fmt_size, 0); + if (!fmt_end) + return -EINVAL; + fmt_size = fmt_end - fmt; + + for (i = 0; i < fmt_size; i++) { + if ((!isprint(fmt[i]) && !isspace(fmt[i])) || !isascii(fmt[i])) { + err = -EINVAL; + goto cleanup; + } + + if (fmt[i] != '%') + continue; + + if (fmt[i + 1] == '%') { + i++; + continue; + } + + if (num_spec >= num_args) { + err = -EINVAL; + goto cleanup; + } + + /* The string is zero-terminated so if fmt[i] != 0, we can + * always access fmt[i + 1], in the worst case it will be a 0 + */ + i++; + + /* skip optional "[0 +-][num]" width formatting field */ + while (fmt[i] == '0' || fmt[i] == '+' || fmt[i] == '-' || + fmt[i] == ' ') + i++; + if (fmt[i] >= '1' && fmt[i] <= '9') { + i++; + while (fmt[i] >= '0' && fmt[i] <= '9') + i++; + } + + if (fmt[i] == 'p') { + cur_mod = BPF_PRINTF_LONG; + + if ((fmt[i + 1] == 'k' || fmt[i + 1] == 'u') && + fmt[i + 2] == 's') { + fmt_ptype = fmt[i + 1]; + i += 2; + goto fmt_str; + } + + if (fmt[i + 1] == 0 || isspace(fmt[i + 1]) || + ispunct(fmt[i + 1]) || fmt[i + 1] == 'K' || + fmt[i + 1] == 'x' || fmt[i + 1] == 'B' || + fmt[i + 1] == 's' || fmt[i + 1] == 'S') { + /* just kernel pointers */ + if (final_args) + cur_arg = raw_args[num_spec]; + goto fmt_next; + } + + /* only support "%pI4", "%pi4", "%pI6" and "%pi6". */ + if ((fmt[i + 1] != 'i' && fmt[i + 1] != 'I') || + (fmt[i + 2] != '4' && fmt[i + 2] != '6')) { + err = -EINVAL; + goto cleanup; + } + + i += 2; + if (!final_args) + goto fmt_next; + + if (try_get_fmt_tmp_buf(&tmp_buf)) { + err = -EBUSY; + goto out; + } + + copy_size = (fmt[i + 2] == '4') ? 4 : 16; + if (tmp_buf_len < copy_size) { + err = -ENOSPC; + goto cleanup; + } + + unsafe_ptr = (char *)(long)raw_args[num_spec]; + err = copy_from_kernel_nofault(tmp_buf, unsafe_ptr, + copy_size); + if (err < 0) + memset(tmp_buf, 0, copy_size); + cur_arg = (u64)(long)tmp_buf; + tmp_buf += copy_size; + tmp_buf_len -= copy_size; + + goto fmt_next; + } else if (fmt[i] == 's') { + cur_mod = BPF_PRINTF_LONG; + fmt_ptype = fmt[i]; +fmt_str: + if (fmt[i + 1] != 0 && + !isspace(fmt[i + 1]) && + !ispunct(fmt[i + 1])) { + err = -EINVAL; + goto cleanup; + } + + if (!final_args) + goto fmt_next; + + if (try_get_fmt_tmp_buf(&tmp_buf)) { + err = -EBUSY; + goto out; + } + + if (!tmp_buf_len) { + err = -ENOSPC; + goto cleanup; + } + + unsafe_ptr = (char *)(long)raw_args[num_spec]; + err = bpf_trace_copy_string(tmp_buf, unsafe_ptr, + fmt_ptype, tmp_buf_len); + if (err < 0) { + tmp_buf[0] = '\0'; + err = 1; + } + + cur_arg = (u64)(long)tmp_buf; + tmp_buf += err; + tmp_buf_len -= err; + + goto fmt_next; + } + + cur_mod = BPF_PRINTF_INT; + + if (fmt[i] == 'l') { + cur_mod = BPF_PRINTF_LONG; + i++; + } + if (fmt[i] == 'l') { + cur_mod = BPF_PRINTF_LONG_LONG; + i++; + } + + if (fmt[i] != 'i' && fmt[i] != 'd' && fmt[i] != 'u' && + fmt[i] != 'x' && fmt[i] != 'X') { + err = -EINVAL; + goto cleanup; + } + + if (final_args) + cur_arg = raw_args[num_spec]; +fmt_next: + if (final_args) { + mod[num_spec] = cur_mod; + final_args[num_spec] = cur_arg; + } + num_spec++; + } + + err = 0; +cleanup: + if (err) + bpf_printf_cleanup(); +out: + return err; +} + +#define MAX_SNPRINTF_VARARGS 12 + +BPF_CALL_5(bpf_snprintf, char *, str, u32, str_size, char *, fmt, + const void *, data, u32, data_len) +{ + enum bpf_printf_mod_type mod[MAX_SNPRINTF_VARARGS]; + u64 args[MAX_SNPRINTF_VARARGS]; + int err, num_args; + + if (data_len % 8 || data_len > MAX_SNPRINTF_VARARGS * 8 || + (data_len && !data)) + return -EINVAL; + num_args = data_len / 8; + + /* ARG_PTR_TO_CONST_STR guarantees that fmt is zero-terminated so we + * can safely give an unbounded size. + */ + err = bpf_printf_prepare(fmt, UINT_MAX, data, args, mod, num_args); + if (err < 0) + return err; + + /* Maximumly we can have MAX_SNPRINTF_VARARGS parameters, just give + * all of them to snprintf(). + */ + err = snprintf(str, str_size, fmt, BPF_CAST_FMT_ARG(0, args, mod), + BPF_CAST_FMT_ARG(1, args, mod), BPF_CAST_FMT_ARG(2, args, mod), + BPF_CAST_FMT_ARG(3, args, mod), BPF_CAST_FMT_ARG(4, args, mod), + BPF_CAST_FMT_ARG(5, args, mod), BPF_CAST_FMT_ARG(6, args, mod), + BPF_CAST_FMT_ARG(7, args, mod), BPF_CAST_FMT_ARG(8, args, mod), + BPF_CAST_FMT_ARG(9, args, mod), BPF_CAST_FMT_ARG(10, args, mod), + BPF_CAST_FMT_ARG(11, args, mod)); + + bpf_printf_cleanup(); + + return err + 1; +} + +const struct bpf_func_proto bpf_snprintf_proto = { + .func = bpf_snprintf, + .gpl_only = true, + .ret_type = RET_INTEGER, + .arg1_type = ARG_PTR_TO_MEM_OR_NULL, + .arg2_type = ARG_CONST_SIZE_OR_ZERO, + .arg3_type = ARG_PTR_TO_CONST_STR, + .arg4_type = ARG_PTR_TO_MEM_OR_NULL, + .arg5_type = ARG_CONST_SIZE_OR_ZERO, +}; + const struct bpf_func_proto bpf_get_current_task_proto __weak; const struct bpf_func_proto bpf_probe_read_user_proto __weak; const struct bpf_func_proto bpf_probe_read_user_str_proto __weak; @@ -757,6 +1061,8 @@ bpf_base_func_proto(enum bpf_func_id func_id) return &bpf_probe_read_kernel_str_proto; case BPF_FUNC_snprintf_btf: return &bpf_snprintf_btf_proto; + case BPF_FUNC_snprintf: + return &bpf_snprintf_proto; default: return NULL; } diff --git a/kernel/bpf/inode.c b/kernel/bpf/inode.c index d2de2abec35b..b4ebd60a6c16 100644 --- a/kernel/bpf/inode.c +++ b/kernel/bpf/inode.c @@ -816,8 +816,6 @@ static int __init bpf_init(void) { int ret; - mutex_init(&bpf_preload_lock); - ret = sysfs_create_mount_point(fs_kobj, "bpf"); if (ret) return ret; diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c index 6428634da57e..fd495190115e 100644 --- a/kernel/bpf/syscall.c +++ b/kernel/bpf/syscall.c @@ -2551,6 +2551,9 @@ static int bpf_tracing_link_fill_link_info(const struct bpf_link *link, container_of(link, struct bpf_tracing_link, link); info->tracing.attach_type = tr_link->attach_type; + bpf_trampoline_unpack_key(tr_link->trampoline->key, + &info->tracing.target_obj_id, + &info->tracing.target_btf_id); return 0; } diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index 5682a02901d3..637462e9b6ee 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -4787,6 +4787,7 @@ static const struct bpf_reg_types spin_lock_types = { .types = { PTR_TO_MAP_VALU static const struct bpf_reg_types percpu_btf_ptr_types = { .types = { PTR_TO_PERCPU_BTF_ID } }; static const struct bpf_reg_types func_ptr_types = { .types = { PTR_TO_FUNC } }; static const struct bpf_reg_types stack_ptr_types = { .types = { PTR_TO_STACK } }; +static const struct bpf_reg_types const_str_ptr_types = { .types = { PTR_TO_MAP_VALUE } }; static const struct bpf_reg_types *compatible_reg_types[__BPF_ARG_TYPE_MAX] = { [ARG_PTR_TO_MAP_KEY] = &map_key_value_types, @@ -4817,6 +4818,7 @@ static const struct bpf_reg_types *compatible_reg_types[__BPF_ARG_TYPE_MAX] = { [ARG_PTR_TO_PERCPU_BTF_ID] = &percpu_btf_ptr_types, [ARG_PTR_TO_FUNC] = &func_ptr_types, [ARG_PTR_TO_STACK_OR_NULL] = &stack_ptr_types, + [ARG_PTR_TO_CONST_STR] = &const_str_ptr_types, }; static int check_reg_type(struct bpf_verifier_env *env, u32 regno, @@ -5067,6 +5069,44 @@ skip_type_check: if (err) return err; err = check_ptr_alignment(env, reg, 0, size, true); + } else if (arg_type == ARG_PTR_TO_CONST_STR) { + struct bpf_map *map = reg->map_ptr; + int map_off; + u64 map_addr; + char *str_ptr; + + if (!bpf_map_is_rdonly(map)) { + verbose(env, "R%d does not point to a readonly map'\n", regno); + return -EACCES; + } + + if (!tnum_is_const(reg->var_off)) { + verbose(env, "R%d is not a constant address'\n", regno); + return -EACCES; + } + + if (!map->ops->map_direct_value_addr) { + verbose(env, "no direct value access support for this map type\n"); + return -EACCES; + } + + err = check_map_access(env, regno, reg->off, + map->value_size - reg->off, false); + if (err) + return err; + + map_off = reg->off + reg->var_off.value; + err = map->ops->map_direct_value_addr(map, &map_addr, map_off); + if (err) { + verbose(env, "direct value access on string failed\n"); + return err; + } + + str_ptr = (char *)(long)(map_addr); + if (!strnchr(str_ptr + map_off, map->value_size - map_off, 0)) { + verbose(env, "string is not zero-terminated\n"); + return -EINVAL; + } } return err; @@ -5767,6 +5807,7 @@ static void do_refine_retval_range(struct bpf_reg_state *regs, int ret_type, if (ret_type != RET_INTEGER || (func_id != BPF_FUNC_get_stack && + func_id != BPF_FUNC_get_task_stack && func_id != BPF_FUNC_probe_read_str && func_id != BPF_FUNC_probe_read_kernel_str && func_id != BPF_FUNC_probe_read_user_str)) @@ -5877,6 +5918,43 @@ static int check_reference_leak(struct bpf_verifier_env *env) return state->acquired_refs ? -EINVAL : 0; } +static int check_bpf_snprintf_call(struct bpf_verifier_env *env, + struct bpf_reg_state *regs) +{ + struct bpf_reg_state *fmt_reg = ®s[BPF_REG_3]; + struct bpf_reg_state *data_len_reg = ®s[BPF_REG_5]; + struct bpf_map *fmt_map = fmt_reg->map_ptr; + int err, fmt_map_off, num_args; + u64 fmt_addr; + char *fmt; + + /* data must be an array of u64 */ + if (data_len_reg->var_off.value % 8) + return -EINVAL; + num_args = data_len_reg->var_off.value / 8; + + /* fmt being ARG_PTR_TO_CONST_STR guarantees that var_off is const + * and map_direct_value_addr is set. + */ + fmt_map_off = fmt_reg->off + fmt_reg->var_off.value; + err = fmt_map->ops->map_direct_value_addr(fmt_map, &fmt_addr, + fmt_map_off); + if (err) { + verbose(env, "verifier bug\n"); + return -EFAULT; + } + fmt = (char *)(long)fmt_addr + fmt_map_off; + + /* We are also guaranteed that fmt+fmt_map_off is NULL terminated, we + * can focus on validating the format specifiers. + */ + err = bpf_printf_prepare(fmt, UINT_MAX, NULL, NULL, NULL, num_args); + if (err < 0) + verbose(env, "Invalid format string\n"); + + return err; +} + static int check_helper_call(struct bpf_verifier_env *env, struct bpf_insn *insn, int *insn_idx_p) { @@ -5991,6 +6069,12 @@ static int check_helper_call(struct bpf_verifier_env *env, struct bpf_insn *insn return -EINVAL; } + if (func_id == BPF_FUNC_snprintf) { + err = check_bpf_snprintf_call(env, regs); + if (err < 0) + return err; + } + /* reset caller saved regs */ for (i = 0; i < CALLER_SAVED_REGS; i++) { mark_reg_not_init(env, regs, caller_saved[i]); diff --git a/kernel/trace/bpf_trace.c b/kernel/trace/bpf_trace.c index 0d23755c2747..2a8bcdc927c7 100644 --- a/kernel/trace/bpf_trace.c +++ b/kernel/trace/bpf_trace.c @@ -372,188 +372,38 @@ static const struct bpf_func_proto *bpf_get_probe_write_proto(void) return &bpf_probe_write_user_proto; } -static void bpf_trace_copy_string(char *buf, void *unsafe_ptr, char fmt_ptype, - size_t bufsz) -{ - void __user *user_ptr = (__force void __user *)unsafe_ptr; - - buf[0] = 0; - - switch (fmt_ptype) { - case 's': -#ifdef CONFIG_ARCH_HAS_NON_OVERLAPPING_ADDRESS_SPACE - if ((unsigned long)unsafe_ptr < TASK_SIZE) { - strncpy_from_user_nofault(buf, user_ptr, bufsz); - break; - } - fallthrough; -#endif - case 'k': - strncpy_from_kernel_nofault(buf, unsafe_ptr, bufsz); - break; - case 'u': - strncpy_from_user_nofault(buf, user_ptr, bufsz); - break; - } -} - static DEFINE_RAW_SPINLOCK(trace_printk_lock); -#define BPF_TRACE_PRINTK_SIZE 1024 +#define MAX_TRACE_PRINTK_VARARGS 3 +#define BPF_TRACE_PRINTK_SIZE 1024 -static __printf(1, 0) int bpf_do_trace_printk(const char *fmt, ...) +BPF_CALL_5(bpf_trace_printk, char *, fmt, u32, fmt_size, u64, arg1, + u64, arg2, u64, arg3) { + u64 args[MAX_TRACE_PRINTK_VARARGS] = { arg1, arg2, arg3 }; + enum bpf_printf_mod_type mod[MAX_TRACE_PRINTK_VARARGS]; static char buf[BPF_TRACE_PRINTK_SIZE]; unsigned long flags; - va_list ap; int ret; - raw_spin_lock_irqsave(&trace_printk_lock, flags); - va_start(ap, fmt); - ret = vsnprintf(buf, sizeof(buf), fmt, ap); - va_end(ap); - /* vsnprintf() will not append null for zero-length strings */ + ret = bpf_printf_prepare(fmt, fmt_size, args, args, mod, + MAX_TRACE_PRINTK_VARARGS); + if (ret < 0) + return ret; + + ret = snprintf(buf, sizeof(buf), fmt, BPF_CAST_FMT_ARG(0, args, mod), + BPF_CAST_FMT_ARG(1, args, mod), BPF_CAST_FMT_ARG(2, args, mod)); + /* snprintf() will not append null for zero-length strings */ if (ret == 0) buf[0] = '\0'; + + raw_spin_lock_irqsave(&trace_printk_lock, flags); trace_bpf_trace_printk(buf); raw_spin_unlock_irqrestore(&trace_printk_lock, flags); - return ret; -} - -/* - * Only limited trace_printk() conversion specifiers allowed: - * %d %i %u %x %ld %li %lu %lx %lld %lli %llu %llx %p %pB %pks %pus %s - */ -BPF_CALL_5(bpf_trace_printk, char *, fmt, u32, fmt_size, u64, arg1, - u64, arg2, u64, arg3) -{ - int i, mod[3] = {}, fmt_cnt = 0; - char buf[64], fmt_ptype; - void *unsafe_ptr = NULL; - bool str_seen = false; + bpf_printf_cleanup(); - /* - * bpf_check()->check_func_arg()->check_stack_boundary() - * guarantees that fmt points to bpf program stack, - * fmt_size bytes of it were initialized and fmt_size > 0 - */ - if (fmt[--fmt_size] != 0) - return -EINVAL; - - /* check format string for allowed specifiers */ - for (i = 0; i < fmt_size; i++) { - if ((!isprint(fmt[i]) && !isspace(fmt[i])) || !isascii(fmt[i])) - return -EINVAL; - - if (fmt[i] != '%') - continue; - - if (fmt_cnt >= 3) - return -EINVAL; - - /* fmt[i] != 0 && fmt[last] == 0, so we can access fmt[i + 1] */ - i++; - if (fmt[i] == 'l') { - mod[fmt_cnt]++; - i++; - } else if (fmt[i] == 'p') { - mod[fmt_cnt]++; - if ((fmt[i + 1] == 'k' || - fmt[i + 1] == 'u') && - fmt[i + 2] == 's') { - fmt_ptype = fmt[i + 1]; - i += 2; - goto fmt_str; - } - - if (fmt[i + 1] == 'B') { - i++; - goto fmt_next; - } - - /* disallow any further format extensions */ - if (fmt[i + 1] != 0 && - !isspace(fmt[i + 1]) && - !ispunct(fmt[i + 1])) - return -EINVAL; - - goto fmt_next; - } else if (fmt[i] == 's') { - mod[fmt_cnt]++; - fmt_ptype = fmt[i]; -fmt_str: - if (str_seen) - /* allow only one '%s' per fmt string */ - return -EINVAL; - str_seen = true; - - if (fmt[i + 1] != 0 && - !isspace(fmt[i + 1]) && - !ispunct(fmt[i + 1])) - return -EINVAL; - - switch (fmt_cnt) { - case 0: - unsafe_ptr = (void *)(long)arg1; - arg1 = (long)buf; - break; - case 1: - unsafe_ptr = (void *)(long)arg2; - arg2 = (long)buf; - break; - case 2: - unsafe_ptr = (void *)(long)arg3; - arg3 = (long)buf; - break; - } - - bpf_trace_copy_string(buf, unsafe_ptr, fmt_ptype, - sizeof(buf)); - goto fmt_next; - } - - if (fmt[i] == 'l') { - mod[fmt_cnt]++; - i++; - } - - if (fmt[i] != 'i' && fmt[i] != 'd' && - fmt[i] != 'u' && fmt[i] != 'x') - return -EINVAL; -fmt_next: - fmt_cnt++; - } - -/* Horrid workaround for getting va_list handling working with different - * argument type combinations generically for 32 and 64 bit archs. - */ -#define __BPF_TP_EMIT() __BPF_ARG3_TP() -#define __BPF_TP(...) \ - bpf_do_trace_printk(fmt, ##__VA_ARGS__) - -#define __BPF_ARG1_TP(...) \ - ((mod[0] == 2 || (mod[0] == 1 && __BITS_PER_LONG == 64)) \ - ? __BPF_TP(arg1, ##__VA_ARGS__) \ - : ((mod[0] == 1 || (mod[0] == 0 && __BITS_PER_LONG == 32)) \ - ? __BPF_TP((long)arg1, ##__VA_ARGS__) \ - : __BPF_TP((u32)arg1, ##__VA_ARGS__))) - -#define __BPF_ARG2_TP(...) \ - ((mod[1] == 2 || (mod[1] == 1 && __BITS_PER_LONG == 64)) \ - ? __BPF_ARG1_TP(arg2, ##__VA_ARGS__) \ - : ((mod[1] == 1 || (mod[1] == 0 && __BITS_PER_LONG == 32)) \ - ? __BPF_ARG1_TP((long)arg2, ##__VA_ARGS__) \ - : __BPF_ARG1_TP((u32)arg2, ##__VA_ARGS__))) - -#define __BPF_ARG3_TP(...) \ - ((mod[2] == 2 || (mod[2] == 1 && __BITS_PER_LONG == 64)) \ - ? __BPF_ARG2_TP(arg3, ##__VA_ARGS__) \ - : ((mod[2] == 1 || (mod[2] == 0 && __BITS_PER_LONG == 32)) \ - ? __BPF_ARG2_TP((long)arg3, ##__VA_ARGS__) \ - : __BPF_ARG2_TP((u32)arg3, ##__VA_ARGS__))) - - return __BPF_TP_EMIT(); + return ret; } static const struct bpf_func_proto bpf_trace_printk_proto = { @@ -581,184 +431,37 @@ const struct bpf_func_proto *bpf_get_trace_printk_proto(void) } #define MAX_SEQ_PRINTF_VARARGS 12 -#define MAX_SEQ_PRINTF_MAX_MEMCPY 6 -#define MAX_SEQ_PRINTF_STR_LEN 128 - -struct bpf_seq_printf_buf { - char buf[MAX_SEQ_PRINTF_MAX_MEMCPY][MAX_SEQ_PRINTF_STR_LEN]; -}; -static DEFINE_PER_CPU(struct bpf_seq_printf_buf, bpf_seq_printf_buf); -static DEFINE_PER_CPU(int, bpf_seq_printf_buf_used); BPF_CALL_5(bpf_seq_printf, struct seq_file *, m, char *, fmt, u32, fmt_size, const void *, data, u32, data_len) { - int err = -EINVAL, fmt_cnt = 0, memcpy_cnt = 0; - int i, buf_used, copy_size, num_args; - u64 params[MAX_SEQ_PRINTF_VARARGS]; - struct bpf_seq_printf_buf *bufs; - const u64 *args = data; - - buf_used = this_cpu_inc_return(bpf_seq_printf_buf_used); - if (WARN_ON_ONCE(buf_used > 1)) { - err = -EBUSY; - goto out; - } - - bufs = this_cpu_ptr(&bpf_seq_printf_buf); - - /* - * bpf_check()->check_func_arg()->check_stack_boundary() - * guarantees that fmt points to bpf program stack, - * fmt_size bytes of it were initialized and fmt_size > 0 - */ - if (fmt[--fmt_size] != 0) - goto out; - - if (data_len & 7) - goto out; - - for (i = 0; i < fmt_size; i++) { - if (fmt[i] == '%') { - if (fmt[i + 1] == '%') - i++; - else if (!data || !data_len) - goto out; - } - } + enum bpf_printf_mod_type mod[MAX_SEQ_PRINTF_VARARGS]; + u64 args[MAX_SEQ_PRINTF_VARARGS]; + int err, num_args; + if (data_len & 7 || data_len > MAX_SEQ_PRINTF_VARARGS * 8 || + (data_len && !data)) + return -EINVAL; num_args = data_len / 8; - /* check format string for allowed specifiers */ - for (i = 0; i < fmt_size; i++) { - /* only printable ascii for now. */ - if ((!isprint(fmt[i]) && !isspace(fmt[i])) || !isascii(fmt[i])) { - err = -EINVAL; - goto out; - } - - if (fmt[i] != '%') - continue; - - if (fmt[i + 1] == '%') { - i++; - continue; - } - - if (fmt_cnt >= MAX_SEQ_PRINTF_VARARGS) { - err = -E2BIG; - goto out; - } - - if (fmt_cnt >= num_args) { - err = -EINVAL; - goto out; - } - - /* fmt[i] != 0 && fmt[last] == 0, so we can access fmt[i + 1] */ - i++; - - /* skip optional "[0 +-][num]" width formating field */ - while (fmt[i] == '0' || fmt[i] == '+' || fmt[i] == '-' || - fmt[i] == ' ') - i++; - if (fmt[i] >= '1' && fmt[i] <= '9') { - i++; - while (fmt[i] >= '0' && fmt[i] <= '9') - i++; - } - - if (fmt[i] == 's') { - void *unsafe_ptr; - - /* try our best to copy */ - if (memcpy_cnt >= MAX_SEQ_PRINTF_MAX_MEMCPY) { - err = -E2BIG; - goto out; - } - - unsafe_ptr = (void *)(long)args[fmt_cnt]; - err = strncpy_from_kernel_nofault(bufs->buf[memcpy_cnt], - unsafe_ptr, MAX_SEQ_PRINTF_STR_LEN); - if (err < 0) - bufs->buf[memcpy_cnt][0] = '\0'; - params[fmt_cnt] = (u64)(long)bufs->buf[memcpy_cnt]; - - fmt_cnt++; - memcpy_cnt++; - continue; - } - - if (fmt[i] == 'p') { - if (fmt[i + 1] == 0 || - fmt[i + 1] == 'K' || - fmt[i + 1] == 'x' || - fmt[i + 1] == 'B') { - /* just kernel pointers */ - params[fmt_cnt] = args[fmt_cnt]; - fmt_cnt++; - continue; - } - - /* only support "%pI4", "%pi4", "%pI6" and "%pi6". */ - if (fmt[i + 1] != 'i' && fmt[i + 1] != 'I') { - err = -EINVAL; - goto out; - } - if (fmt[i + 2] != '4' && fmt[i + 2] != '6') { - err = -EINVAL; - goto out; - } - - if (memcpy_cnt >= MAX_SEQ_PRINTF_MAX_MEMCPY) { - err = -E2BIG; - goto out; - } - - - copy_size = (fmt[i + 2] == '4') ? 4 : 16; - - err = copy_from_kernel_nofault(bufs->buf[memcpy_cnt], - (void *) (long) args[fmt_cnt], - copy_size); - if (err < 0) - memset(bufs->buf[memcpy_cnt], 0, copy_size); - params[fmt_cnt] = (u64)(long)bufs->buf[memcpy_cnt]; - - i += 2; - fmt_cnt++; - memcpy_cnt++; - continue; - } - - if (fmt[i] == 'l') { - i++; - if (fmt[i] == 'l') - i++; - } - - if (fmt[i] != 'i' && fmt[i] != 'd' && - fmt[i] != 'u' && fmt[i] != 'x' && - fmt[i] != 'X') { - err = -EINVAL; - goto out; - } - - params[fmt_cnt] = args[fmt_cnt]; - fmt_cnt++; - } + err = bpf_printf_prepare(fmt, fmt_size, data, args, mod, num_args); + if (err < 0) + return err; /* Maximumly we can have MAX_SEQ_PRINTF_VARARGS parameter, just give * all of them to seq_printf(). */ - seq_printf(m, fmt, params[0], params[1], params[2], params[3], - params[4], params[5], params[6], params[7], params[8], - params[9], params[10], params[11]); + seq_printf(m, fmt, BPF_CAST_FMT_ARG(0, args, mod), + BPF_CAST_FMT_ARG(1, args, mod), BPF_CAST_FMT_ARG(2, args, mod), + BPF_CAST_FMT_ARG(3, args, mod), BPF_CAST_FMT_ARG(4, args, mod), + BPF_CAST_FMT_ARG(5, args, mod), BPF_CAST_FMT_ARG(6, args, mod), + BPF_CAST_FMT_ARG(7, args, mod), BPF_CAST_FMT_ARG(8, args, mod), + BPF_CAST_FMT_ARG(9, args, mod), BPF_CAST_FMT_ARG(10, args, mod), + BPF_CAST_FMT_ARG(11, args, mod)); - err = seq_has_overflowed(m) ? -EOVERFLOW : 0; -out: - this_cpu_dec(bpf_seq_printf_buf_used); - return err; + bpf_printf_cleanup(); + + return seq_has_overflowed(m) ? -EOVERFLOW : 0; } BTF_ID_LIST_SINGLE(btf_seq_file_ids, struct, seq_file) @@ -1373,6 +1076,8 @@ bpf_tracing_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog) return &bpf_task_storage_delete_proto; case BPF_FUNC_for_each_map_elem: return &bpf_for_each_map_elem_proto; + case BPF_FUNC_snprintf: + return &bpf_snprintf_proto; default: return NULL; } diff --git a/net/core/dev.c b/net/core/dev.c index d9bf63dbe4fd..eed028aec6a4 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -4723,10 +4723,10 @@ static u32 netif_receive_generic_xdp(struct sk_buff *skb, void *orig_data, *orig_data_end, *hard_start; struct netdev_rx_queue *rxqueue; u32 metalen, act = XDP_DROP; + bool orig_bcast, orig_host; u32 mac_len, frame_sz; __be16 orig_eth_type; struct ethhdr *eth; - bool orig_bcast; int off; /* Reinjected packets coming from act_mirred or similar should @@ -4773,6 +4773,7 @@ static u32 netif_receive_generic_xdp(struct sk_buff *skb, orig_data_end = xdp->data_end; orig_data = xdp->data; eth = (struct ethhdr *)xdp->data; + orig_host = ether_addr_equal_64bits(eth->h_dest, skb->dev->dev_addr); orig_bcast = is_multicast_ether_addr_64bits(eth->h_dest); orig_eth_type = eth->h_proto; @@ -4800,8 +4801,11 @@ static u32 netif_receive_generic_xdp(struct sk_buff *skb, /* check if XDP changed eth hdr such SKB needs update */ eth = (struct ethhdr *)xdp->data; if ((orig_eth_type != eth->h_proto) || + (orig_host != ether_addr_equal_64bits(eth->h_dest, + skb->dev->dev_addr)) || (orig_bcast != is_multicast_ether_addr_64bits(eth->h_dest))) { __skb_push(skb, ETH_HLEN); + skb->pkt_type = PACKET_HOST; skb->protocol = eth_type_trans(skb, skb->dev); } diff --git a/net/core/sock_map.c b/net/core/sock_map.c index 3d190d22b0d8..6f1b82b8ad49 100644 --- a/net/core/sock_map.c +++ b/net/core/sock_map.c @@ -188,7 +188,7 @@ static int sock_map_init_proto(struct sock *sk, struct sk_psock *psock) if (!sk->sk_prot->psock_update_sk_prot) return -EINVAL; psock->psock_update_sk_prot = sk->sk_prot->psock_update_sk_prot; - return sk->sk_prot->psock_update_sk_prot(sk, false); + return sk->sk_prot->psock_update_sk_prot(sk, psock, false); } static struct sk_psock *sock_map_psock_get_checked(struct sock *sk) @@ -1521,7 +1521,7 @@ void sock_map_close(struct sock *sk, long timeout) lock_sock(sk); rcu_read_lock(); - psock = sk_psock(sk); + psock = sk_psock_get(sk); if (unlikely(!psock)) { rcu_read_unlock(); release_sock(sk); @@ -1532,6 +1532,7 @@ void sock_map_close(struct sock *sk, long timeout) sock_map_remove_links(sk, psock); rcu_read_unlock(); sk_psock_stop(psock, true); + sk_psock_put(sk, psock); release_sock(sk); saved_close(sk, timeout); } diff --git a/net/ipv4/tcp_bpf.c b/net/ipv4/tcp_bpf.c index 4f49c12dae53..ad9d17923fc5 100644 --- a/net/ipv4/tcp_bpf.c +++ b/net/ipv4/tcp_bpf.c @@ -499,9 +499,8 @@ static int tcp_bpf_assert_proto_ops(struct proto *ops) ops->sendpage == tcp_sendpage ? 0 : -ENOTSUPP; } -int tcp_bpf_update_proto(struct sock *sk, bool restore) +int tcp_bpf_update_proto(struct sock *sk, struct sk_psock *psock, bool restore) { - struct sk_psock *psock = sk_psock(sk); int family = sk->sk_family == AF_INET6 ? TCP_BPF_IPV6 : TCP_BPF_IPV4; int config = psock->progs.msg_parser ? TCP_BPF_TX : TCP_BPF_BASE; diff --git a/net/ipv4/udp_bpf.c b/net/ipv4/udp_bpf.c index 7d5c4ebf42fe..954c4591a6fd 100644 --- a/net/ipv4/udp_bpf.c +++ b/net/ipv4/udp_bpf.c @@ -103,14 +103,12 @@ static int __init udp_bpf_v4_build_proto(void) } core_initcall(udp_bpf_v4_build_proto); -int udp_bpf_update_proto(struct sock *sk, bool restore) +int udp_bpf_update_proto(struct sock *sk, struct sk_psock *psock, bool restore) { int family = sk->sk_family == AF_INET ? UDP_BPF_IPV4 : UDP_BPF_IPV6; - struct sk_psock *psock = sk_psock(sk); if (restore) { sk->sk_write_space = psock->saved_write_space; - /* Pairs with lockless read in sk_clone_lock() */ WRITE_ONCE(sk->sk_prot, psock->sk_proto); return 0; } @@ -118,7 +116,6 @@ int udp_bpf_update_proto(struct sock *sk, bool restore) if (sk->sk_family == AF_INET6) udp_bpf_check_v6_needs_rebuild(psock->sk_proto); - /* Pairs with lockless read in sk_clone_lock() */ WRITE_ONCE(sk->sk_prot, &udp_bpf_prots[family]); return 0; } diff --git a/net/xdp/xsk.c b/net/xdp/xsk.c index a71ed664da0a..cd62d4ba87a9 100644 --- a/net/xdp/xsk.c +++ b/net/xdp/xsk.c @@ -30,7 +30,7 @@ #include "xdp_umem.h" #include "xsk.h" -#define TX_BATCH_SIZE 16 +#define TX_BATCH_SIZE 32 static DEFINE_PER_CPU(struct list_head, xskmap_flush_list); diff --git a/samples/bpf/tracex1_kern.c b/samples/bpf/tracex1_kern.c index 3f4599c9a202..ef30d2b353b0 100644 --- a/samples/bpf/tracex1_kern.c +++ b/samples/bpf/tracex1_kern.c @@ -26,7 +26,7 @@ SEC("kprobe/__netif_receive_skb_core") int bpf_prog1(struct pt_regs *ctx) { - /* attaches to kprobe netif_receive_skb, + /* attaches to kprobe __netif_receive_skb_core, * looks for packets on loobpack device and prints them */ char devname[IFNAMSIZ]; @@ -35,7 +35,7 @@ int bpf_prog1(struct pt_regs *ctx) int len; /* non-portable! works for the given kernel only */ - skb = (struct sk_buff *) PT_REGS_PARM1(ctx); + bpf_probe_read_kernel(&skb, sizeof(skb), (void *)PT_REGS_PARM1(ctx)); dev = _(skb->dev); len = _(skb->len); diff --git a/scripts/link-vmlinux.sh b/scripts/link-vmlinux.sh index 3b261b0f74f0..667aacb9261c 100755 --- a/scripts/link-vmlinux.sh +++ b/scripts/link-vmlinux.sh @@ -213,6 +213,7 @@ vmlinux_link() gen_btf() { local pahole_ver + local extra_paholeopt= if ! [ -x "$(command -v ${PAHOLE})" ]; then echo >&2 "BTF: ${1}: pahole (${PAHOLE}) is not available" @@ -227,8 +228,12 @@ gen_btf() vmlinux_link ${1} + if [ "${pahole_ver}" -ge "121" ]; then + extra_paholeopt="${extra_paholeopt} --btf_gen_floats" + fi + info "BTF" ${2} - LLVM_OBJCOPY=${OBJCOPY} ${PAHOLE} -J ${1} + LLVM_OBJCOPY=${OBJCOPY} ${PAHOLE} -J ${extra_paholeopt} ${1} # Create ${2} which contains just .BTF section but no symbols. Add # SHF_ALLOC because .BTF will be part of the vmlinux image. --strip-all diff --git a/tools/bpf/bpftool/btf.c b/tools/bpf/bpftool/btf.c index 62953bbf68b4..385d5c955cf3 100644 --- a/tools/bpf/bpftool/btf.c +++ b/tools/bpf/bpftool/btf.c @@ -71,7 +71,9 @@ static const char *btf_var_linkage_str(__u32 linkage) case BTF_VAR_STATIC: return "static"; case BTF_VAR_GLOBAL_ALLOCATED: - return "global-alloc"; + return "global"; + case BTF_VAR_GLOBAL_EXTERN: + return "extern"; default: return "(unknown)"; } @@ -98,26 +100,28 @@ static const char *btf_str(const struct btf *btf, __u32 off) return btf__name_by_offset(btf, off) ? : "(invalid)"; } +static int btf_kind_safe(int kind) +{ + return kind <= BTF_KIND_MAX ? kind : BTF_KIND_UNKN; +} + static int dump_btf_type(const struct btf *btf, __u32 id, const struct btf_type *t) { json_writer_t *w = json_wtr; - int kind, safe_kind; - - kind = BTF_INFO_KIND(t->info); - safe_kind = kind <= BTF_KIND_MAX ? kind : BTF_KIND_UNKN; + int kind = btf_kind(t); if (json_output) { jsonw_start_object(w); jsonw_uint_field(w, "id", id); - jsonw_string_field(w, "kind", btf_kind_str[safe_kind]); + jsonw_string_field(w, "kind", btf_kind_str[btf_kind_safe(kind)]); jsonw_string_field(w, "name", btf_str(btf, t->name_off)); } else { - printf("[%u] %s '%s'", id, btf_kind_str[safe_kind], + printf("[%u] %s '%s'", id, btf_kind_str[btf_kind_safe(kind)], btf_str(btf, t->name_off)); } - switch (BTF_INFO_KIND(t->info)) { + switch (kind) { case BTF_KIND_INT: { __u32 v = *(__u32 *)(t + 1); const char *enc; @@ -300,7 +304,8 @@ static int dump_btf_type(const struct btf *btf, __u32 id, break; } case BTF_KIND_DATASEC: { - const struct btf_var_secinfo *v = (const void *)(t+1); + const struct btf_var_secinfo *v = (const void *)(t + 1); + const struct btf_type *vt; __u16 vlen = BTF_INFO_VLEN(t->info); int i; @@ -322,6 +327,13 @@ static int dump_btf_type(const struct btf *btf, __u32 id, } else { printf("\n\ttype_id=%u offset=%u size=%u", v->type, v->offset, v->size); + + if (v->type <= btf__get_nr_types(btf)) { + vt = btf__type_by_id(btf, v->type); + printf(" (%s '%s')", + btf_kind_str[btf_kind_safe(btf_kind(vt))], + btf_str(btf, vt->name_off)); + } } } if (json_output) diff --git a/tools/bpf/bpftool/net.c b/tools/bpf/bpftool/net.c index ff3aa0cf3997..f836d115d7d6 100644 --- a/tools/bpf/bpftool/net.c +++ b/tools/bpf/bpftool/net.c @@ -157,7 +157,7 @@ static int netlink_recv(int sock, __u32 nl_pid, __u32 seq, if (len == 0) break; - for (nh = (struct nlmsghdr *)buf; NLMSG_OK(nh, len); + for (nh = (struct nlmsghdr *)buf; NLMSG_OK(nh, (unsigned int)len); nh = NLMSG_NEXT(nh, len)) { if (nh->nlmsg_pid != nl_pid) { ret = -LIBBPF_ERRNO__WRNGPID; diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h index 69902603012c..ec6d85a81744 100644 --- a/tools/include/uapi/linux/bpf.h +++ b/tools/include/uapi/linux/bpf.h @@ -312,6 +312,27 @@ union bpf_iter_link_info { * *ctx_out*, *data_out* (for example, packet data), result of the * execution *retval*, and *duration* of the test run. * + * The sizes of the buffers provided as input and output + * parameters *ctx_in*, *ctx_out*, *data_in*, and *data_out* must + * be provided in the corresponding variables *ctx_size_in*, + * *ctx_size_out*, *data_size_in*, and/or *data_size_out*. If any + * of these parameters are not provided (ie set to NULL), the + * corresponding size field must be zero. + * + * Some program types have particular requirements: + * + * **BPF_PROG_TYPE_SK_LOOKUP** + * *data_in* and *data_out* must be NULL. + * + * **BPF_PROG_TYPE_XDP** + * *ctx_in* and *ctx_out* must be NULL. + * + * **BPF_PROG_TYPE_RAW_TRACEPOINT**, + * **BPF_PROG_TYPE_RAW_TRACEPOINT_WRITABLE** + * + * *ctx_out*, *data_in* and *data_out* must be NULL. + * *repeat* must be zero. + * * Return * Returns zero on success. On error, -1 is returned and *errno* * is set appropriately. @@ -4061,12 +4082,20 @@ union bpf_attr { * of new data availability is sent. * If **BPF_RB_FORCE_WAKEUP** is specified in *flags*, notification * of new data availability is sent unconditionally. + * If **0** is specified in *flags*, an adaptive notification + * of new data availability is sent. + * + * An adaptive notification is a notification sent whenever the user-space + * process has caught up and consumed all available payloads. In case the user-space + * process is still processing a previous payload, then no notification is needed + * as it will process the newly added payload automatically. * Return * 0 on success, or a negative error in case of failure. * * void *bpf_ringbuf_reserve(void *ringbuf, u64 size, u64 flags) * Description * Reserve *size* bytes of payload in a ring buffer *ringbuf*. + * *flags* must be 0. * Return * Valid pointer with *size* bytes of memory available; NULL, * otherwise. @@ -4078,6 +4107,10 @@ union bpf_attr { * of new data availability is sent. * If **BPF_RB_FORCE_WAKEUP** is specified in *flags*, notification * of new data availability is sent unconditionally. + * If **0** is specified in *flags*, an adaptive notification + * of new data availability is sent. + * + * See 'bpf_ringbuf_output()' for the definition of adaptive notification. * Return * Nothing. Always succeeds. * @@ -4088,6 +4121,10 @@ union bpf_attr { * of new data availability is sent. * If **BPF_RB_FORCE_WAKEUP** is specified in *flags*, notification * of new data availability is sent unconditionally. + * If **0** is specified in *flags*, an adaptive notification + * of new data availability is sent. + * + * See 'bpf_ringbuf_output()' for the definition of adaptive notification. * Return * Nothing. Always succeeds. * @@ -4578,7 +4615,7 @@ union bpf_attr { * * long bpf_check_mtu(void *ctx, u32 ifindex, u32 *mtu_len, s32 len_diff, u64 flags) * Description - * Check ctx packet size against exceeding MTU of net device (based + * Check packet size against exceeding MTU of net device (based * on *ifindex*). This helper will likely be used in combination * with helpers that adjust/change the packet size. * @@ -4595,6 +4632,14 @@ union bpf_attr { * against the current net device. This is practical if this isn't * used prior to redirect. * + * On input *mtu_len* must be a valid pointer, else verifier will + * reject BPF program. If the value *mtu_len* is initialized to + * zero then the ctx packet size is use. When value *mtu_len* is + * provided as input this specify the L3 length that the MTU check + * is done against. Remember XDP and TC length operate at L2, but + * this value is L3 as this correlate to MTU and IP-header tot_len + * values which are L3 (similar behavior as bpf_fib_lookup). + * * The Linux kernel route table can configure MTUs on a more * specific per route level, which is not provided by this helper. * For route level MTU checks use the **bpf_fib_lookup**\ () @@ -4619,11 +4664,9 @@ union bpf_attr { * * On return *mtu_len* pointer contains the MTU value of the net * device. Remember the net device configured MTU is the L3 size, - * which is returned here and XDP and TX length operate at L2. + * which is returned here and XDP and TC length operate at L2. * Helper take this into account for you, but remember when using - * MTU value in your BPF-code. On input *mtu_len* must be a valid - * pointer and be initialized (to zero), else verifier will reject - * BPF program. + * MTU value in your BPF-code. * * Return * * 0 on success, and populate MTU value in *mtu_len* pointer. @@ -4665,6 +4708,33 @@ union bpf_attr { * Return * The number of traversed map elements for success, **-EINVAL** for * invalid **flags**. + * + * long bpf_snprintf(char *str, u32 str_size, const char *fmt, u64 *data, u32 data_len) + * Description + * Outputs a string into the **str** buffer of size **str_size** + * based on a format string stored in a read-only map pointed by + * **fmt**. + * + * Each format specifier in **fmt** corresponds to one u64 element + * in the **data** array. For strings and pointers where pointees + * are accessed, only the pointer values are stored in the *data* + * array. The *data_len* is the size of *data* in bytes. + * + * Formats **%s** and **%p{i,I}{4,6}** require to read kernel + * memory. Reading kernel memory may fail due to either invalid + * address or valid address but requiring a major memory fault. If + * reading kernel memory fails, the string for **%s** will be an + * empty string, and the ip address for **%p{i,I}{4,6}** will be 0. + * Not returning error to bpf program is consistent with what + * **bpf_trace_printk**\ () does for now. + * + * Return + * The strictly positive length of the formatted string, including + * the trailing zero character. If the return value is greater than + * **str_size**, **str** contains a truncated string, guaranteed to + * be zero-terminated except when **str_size** is 0. + * + * Or **-EBUSY** if the per-CPU memory copy buffer is busy. */ #define __BPF_FUNC_MAPPER(FN) \ FN(unspec), \ @@ -4832,6 +4902,7 @@ union bpf_attr { FN(sock_from_file), \ FN(check_mtu), \ FN(for_each_map_elem), \ + FN(snprintf), \ /* */ /* integer value in 'imm' field of BPF_CALL instruction selects which helper @@ -5373,6 +5444,8 @@ struct bpf_link_info { } raw_tracepoint; struct { __u32 attach_type; + __u32 target_obj_id; /* prog_id for PROG_EXT, otherwise btf object id */ + __u32 target_btf_id; /* BTF type id inside the object */ } tracing; struct { __u64 cgroup_id; diff --git a/tools/lib/bpf/bpf_helpers.h b/tools/lib/bpf/bpf_helpers.h index cc2e51c64a54..9720dc0b4605 100644 --- a/tools/lib/bpf/bpf_helpers.h +++ b/tools/lib/bpf/bpf_helpers.h @@ -25,9 +25,16 @@ /* * Helper macro to place programs, maps, license in * different sections in elf_bpf file. Section names - * are interpreted by elf_bpf loader + * are interpreted by libbpf depending on the context (BPF programs, BPF maps, + * extern variables, etc). + * To allow use of SEC() with externs (e.g., for extern .maps declarations), + * make sure __attribute__((unused)) doesn't trigger compilation warning. */ -#define SEC(NAME) __attribute__((section(NAME), used)) +#define SEC(name) \ + _Pragma("GCC diagnostic push") \ + _Pragma("GCC diagnostic ignored \"-Wignored-attributes\"") \ + __attribute__((section(name), used)) \ + _Pragma("GCC diagnostic pop") \ /* Avoid 'linux/stddef.h' definition of '__always_inline'. */ #undef __always_inline @@ -40,6 +47,14 @@ #define __weak __attribute__((weak)) #endif +/* + * Use __hidden attribute to mark a non-static BPF subprogram effectively + * static for BPF verifier's verification algorithm purposes, allowing more + * extensive and permissive BPF verification process, taking into account + * subprogram's caller context. + */ +#define __hidden __attribute__((visibility("hidden"))) + /* When utilizing vmlinux.h with BPF CO-RE, user BPF programs can't include * any system-level headers (such as stddef.h, linux/version.h, etc), and * commonly-used macros like NULL and KERNEL_VERSION aren't available through @@ -51,7 +66,7 @@ #endif #ifndef KERNEL_VERSION -#define KERNEL_VERSION(a,b,c) (((a) << 16) + ((b) << 8) + ((c) > 255 ? 255 : (c)) +#define KERNEL_VERSION(a, b, c) (((a) << 16) + ((b) << 8) + ((c) > 255 ? 255 : (c))) #endif /* diff --git a/tools/lib/bpf/bpf_tracing.h b/tools/lib/bpf/bpf_tracing.h index f9ef37707888..8c954ebc0c7c 100644 --- a/tools/lib/bpf/bpf_tracing.h +++ b/tools/lib/bpf/bpf_tracing.h @@ -413,20 +413,56 @@ typeof(name(0)) name(struct pt_regs *ctx) \ } \ static __always_inline typeof(name(0)) ____##name(struct pt_regs *ctx, ##args) +#define ___bpf_fill0(arr, p, x) do {} while (0) +#define ___bpf_fill1(arr, p, x) arr[p] = x +#define ___bpf_fill2(arr, p, x, args...) arr[p] = x; ___bpf_fill1(arr, p + 1, args) +#define ___bpf_fill3(arr, p, x, args...) arr[p] = x; ___bpf_fill2(arr, p + 1, args) +#define ___bpf_fill4(arr, p, x, args...) arr[p] = x; ___bpf_fill3(arr, p + 1, args) +#define ___bpf_fill5(arr, p, x, args...) arr[p] = x; ___bpf_fill4(arr, p + 1, args) +#define ___bpf_fill6(arr, p, x, args...) arr[p] = x; ___bpf_fill5(arr, p + 1, args) +#define ___bpf_fill7(arr, p, x, args...) arr[p] = x; ___bpf_fill6(arr, p + 1, args) +#define ___bpf_fill8(arr, p, x, args...) arr[p] = x; ___bpf_fill7(arr, p + 1, args) +#define ___bpf_fill9(arr, p, x, args...) arr[p] = x; ___bpf_fill8(arr, p + 1, args) +#define ___bpf_fill10(arr, p, x, args...) arr[p] = x; ___bpf_fill9(arr, p + 1, args) +#define ___bpf_fill11(arr, p, x, args...) arr[p] = x; ___bpf_fill10(arr, p + 1, args) +#define ___bpf_fill12(arr, p, x, args...) arr[p] = x; ___bpf_fill11(arr, p + 1, args) +#define ___bpf_fill(arr, args...) \ + ___bpf_apply(___bpf_fill, ___bpf_narg(args))(arr, 0, args) + /* * BPF_SEQ_PRINTF to wrap bpf_seq_printf to-be-printed values * in a structure. */ -#define BPF_SEQ_PRINTF(seq, fmt, args...) \ - ({ \ - _Pragma("GCC diagnostic push") \ - _Pragma("GCC diagnostic ignored \"-Wint-conversion\"") \ - static const char ___fmt[] = fmt; \ - unsigned long long ___param[] = { args }; \ - _Pragma("GCC diagnostic pop") \ - int ___ret = bpf_seq_printf(seq, ___fmt, sizeof(___fmt), \ - ___param, sizeof(___param)); \ - ___ret; \ - }) +#define BPF_SEQ_PRINTF(seq, fmt, args...) \ +({ \ + static const char ___fmt[] = fmt; \ + unsigned long long ___param[___bpf_narg(args)]; \ + \ + _Pragma("GCC diagnostic push") \ + _Pragma("GCC diagnostic ignored \"-Wint-conversion\"") \ + ___bpf_fill(___param, args); \ + _Pragma("GCC diagnostic pop") \ + \ + bpf_seq_printf(seq, ___fmt, sizeof(___fmt), \ + ___param, sizeof(___param)); \ +}) + +/* + * BPF_SNPRINTF wraps the bpf_snprintf helper with variadic arguments instead of + * an array of u64. + */ +#define BPF_SNPRINTF(out, out_size, fmt, args...) \ +({ \ + static const char ___fmt[] = fmt; \ + unsigned long long ___param[___bpf_narg(args)]; \ + \ + _Pragma("GCC diagnostic push") \ + _Pragma("GCC diagnostic ignored \"-Wint-conversion\"") \ + ___bpf_fill(___param, args); \ + _Pragma("GCC diagnostic pop") \ + \ + bpf_snprintf(out, out_size, ___fmt, \ + ___param, sizeof(___param)); \ +}) #endif diff --git a/tools/lib/bpf/btf.c b/tools/lib/bpf/btf.c index d30e67e7e1e5..d57e13a13798 100644 --- a/tools/lib/bpf/btf.c +++ b/tools/lib/bpf/btf.c @@ -1605,11 +1605,6 @@ static void *btf_add_type_mem(struct btf *btf, size_t add_sz) btf->hdr->type_len, UINT_MAX, add_sz); } -static __u32 btf_type_info(int kind, int vlen, int kflag) -{ - return (kflag << 31) | (kind << 24) | vlen; -} - static void btf_type_inc_vlen(struct btf_type *t) { t->info = btf_type_info(btf_kind(t), btf_vlen(t) + 1, btf_kflag(t)); diff --git a/tools/lib/bpf/libbpf.c b/tools/lib/bpf/libbpf.c index 7aad78dbb4b4..a1cddd17af7d 100644 --- a/tools/lib/bpf/libbpf.c +++ b/tools/lib/bpf/libbpf.c @@ -69,8 +69,7 @@ #define __printf(a, b) __attribute__((format(printf, a, b))) static struct bpf_map *bpf_object__add_map(struct bpf_object *obj); -static const struct btf_type * -skip_mods_and_typedefs(const struct btf *btf, __u32 id, __u32 *res_id); +static bool prog_is_subprog(const struct bpf_object *obj, const struct bpf_program *prog); static int __base_pr(enum libbpf_print_level level, const char *format, va_list args) @@ -195,7 +194,6 @@ struct reloc_desc { int insn_idx; int map_idx; int sym_off; - bool processed; }; struct bpf_sec_def; @@ -275,6 +273,7 @@ struct bpf_program { bpf_program_clear_priv_t clear_priv; bool load; + bool mark_btf_static; enum bpf_prog_type type; enum bpf_attach_type expected_attach_type; int prog_ifindex; @@ -501,8 +500,6 @@ static Elf_Scn *elf_sec_by_name(const struct bpf_object *obj, const char *name); static int elf_sec_hdr(const struct bpf_object *obj, Elf_Scn *scn, GElf_Shdr *hdr); static const char *elf_sec_name(const struct bpf_object *obj, Elf_Scn *scn); static Elf_Data *elf_sec_data(const struct bpf_object *obj, Elf_Scn *scn); -static int elf_sym_by_sec_off(const struct bpf_object *obj, size_t sec_idx, - size_t off, __u32 sym_type, GElf_Sym *sym); void bpf_program__unload(struct bpf_program *prog) { @@ -643,25 +640,29 @@ static int bpf_object__add_programs(struct bpf_object *obj, Elf_Data *sec_data, const char *sec_name, int sec_idx) { + Elf_Data *symbols = obj->efile.symbols; struct bpf_program *prog, *progs; void *data = sec_data->d_buf; - size_t sec_sz = sec_data->d_size, sec_off, prog_sz; - int nr_progs, err; + size_t sec_sz = sec_data->d_size, sec_off, prog_sz, nr_syms; + int nr_progs, err, i; const char *name; GElf_Sym sym; progs = obj->programs; nr_progs = obj->nr_programs; + nr_syms = symbols->d_size / sizeof(GElf_Sym); sec_off = 0; - while (sec_off < sec_sz) { - if (elf_sym_by_sec_off(obj, sec_idx, sec_off, STT_FUNC, &sym)) { - pr_warn("sec '%s': failed to find program symbol at offset %zu\n", - sec_name, sec_off); - return -LIBBPF_ERRNO__FORMAT; - } + for (i = 0; i < nr_syms; i++) { + if (!gelf_getsym(symbols, i, &sym)) + continue; + if (sym.st_shndx != sec_idx) + continue; + if (GELF_ST_TYPE(sym.st_info) != STT_FUNC) + continue; prog_sz = sym.st_size; + sec_off = sym.st_value; name = elf_sym_str(obj, sym.st_name); if (!name) { @@ -699,10 +700,17 @@ bpf_object__add_programs(struct bpf_object *obj, Elf_Data *sec_data, if (err) return err; + /* if function is a global/weak symbol, but has hidden + * visibility (STV_HIDDEN), mark its BTF FUNC as static to + * enable more permissive BPF verification mode with more + * outside context available to BPF verifier + */ + if (GELF_ST_BIND(sym.st_info) != STB_LOCAL + && GELF_ST_VISIBILITY(sym.st_other) == STV_HIDDEN) + prog->mark_btf_static = true; + nr_progs++; obj->nr_programs = nr_progs; - - sec_off += prog_sz; } return 0; @@ -1896,7 +1904,7 @@ static int bpf_object__init_user_maps(struct bpf_object *obj, bool strict) return 0; } -static const struct btf_type * +const struct btf_type * skip_mods_and_typedefs(const struct btf *btf, __u32 id, __u32 *res_id) { const struct btf_type *t = btf__type_by_id(btf, id); @@ -1951,16 +1959,11 @@ static const char *__btf_kind_str(__u16 kind) } } -static const char *btf_kind_str(const struct btf_type *t) +const char *btf_kind_str(const struct btf_type *t) { return __btf_kind_str(btf_kind(t)); } -static enum btf_func_linkage btf_func_linkage(const struct btf_type *t) -{ - return (enum btf_func_linkage)BTF_INFO_VLEN(t->info); -} - /* * Fetch integer attribute of BTF map definition. Such attributes are * represented using a pointer to an array, in which dimensionality of array @@ -2015,254 +2018,262 @@ static int build_map_pin_path(struct bpf_map *map, const char *path) return bpf_map__set_pin_path(map, buf); } - -static int parse_btf_map_def(struct bpf_object *obj, - struct bpf_map *map, - const struct btf_type *def, - bool strict, bool is_inner, - const char *pin_root_path) +int parse_btf_map_def(const char *map_name, struct btf *btf, + const struct btf_type *def_t, bool strict, + struct btf_map_def *map_def, struct btf_map_def *inner_def) { const struct btf_type *t; const struct btf_member *m; + bool is_inner = inner_def == NULL; int vlen, i; - vlen = btf_vlen(def); - m = btf_members(def); + vlen = btf_vlen(def_t); + m = btf_members(def_t); for (i = 0; i < vlen; i++, m++) { - const char *name = btf__name_by_offset(obj->btf, m->name_off); + const char *name = btf__name_by_offset(btf, m->name_off); if (!name) { - pr_warn("map '%s': invalid field #%d.\n", map->name, i); + pr_warn("map '%s': invalid field #%d.\n", map_name, i); return -EINVAL; } if (strcmp(name, "type") == 0) { - if (!get_map_field_int(map->name, obj->btf, m, - &map->def.type)) + if (!get_map_field_int(map_name, btf, m, &map_def->map_type)) return -EINVAL; - pr_debug("map '%s': found type = %u.\n", - map->name, map->def.type); + map_def->parts |= MAP_DEF_MAP_TYPE; } else if (strcmp(name, "max_entries") == 0) { - if (!get_map_field_int(map->name, obj->btf, m, - &map->def.max_entries)) + if (!get_map_field_int(map_name, btf, m, &map_def->max_entries)) return -EINVAL; - pr_debug("map '%s': found max_entries = %u.\n", - map->name, map->def.max_entries); + map_def->parts |= MAP_DEF_MAX_ENTRIES; } else if (strcmp(name, "map_flags") == 0) { - if (!get_map_field_int(map->name, obj->btf, m, - &map->def.map_flags)) + if (!get_map_field_int(map_name, btf, m, &map_def->map_flags)) return -EINVAL; - pr_debug("map '%s': found map_flags = %u.\n", - map->name, map->def.map_flags); + map_def->parts |= MAP_DEF_MAP_FLAGS; } else if (strcmp(name, "numa_node") == 0) { - if (!get_map_field_int(map->name, obj->btf, m, &map->numa_node)) + if (!get_map_field_int(map_name, btf, m, &map_def->numa_node)) return -EINVAL; - pr_debug("map '%s': found numa_node = %u.\n", map->name, map->numa_node); + map_def->parts |= MAP_DEF_NUMA_NODE; } else if (strcmp(name, "key_size") == 0) { __u32 sz; - if (!get_map_field_int(map->name, obj->btf, m, &sz)) + if (!get_map_field_int(map_name, btf, m, &sz)) return -EINVAL; - pr_debug("map '%s': found key_size = %u.\n", - map->name, sz); - if (map->def.key_size && map->def.key_size != sz) { + if (map_def->key_size && map_def->key_size != sz) { pr_warn("map '%s': conflicting key size %u != %u.\n", - map->name, map->def.key_size, sz); + map_name, map_def->key_size, sz); return -EINVAL; } - map->def.key_size = sz; + map_def->key_size = sz; + map_def->parts |= MAP_DEF_KEY_SIZE; } else if (strcmp(name, "key") == 0) { __s64 sz; - t = btf__type_by_id(obj->btf, m->type); + t = btf__type_by_id(btf, m->type); if (!t) { pr_warn("map '%s': key type [%d] not found.\n", - map->name, m->type); + map_name, m->type); return -EINVAL; } if (!btf_is_ptr(t)) { pr_warn("map '%s': key spec is not PTR: %s.\n", - map->name, btf_kind_str(t)); + map_name, btf_kind_str(t)); return -EINVAL; } - sz = btf__resolve_size(obj->btf, t->type); + sz = btf__resolve_size(btf, t->type); if (sz < 0) { pr_warn("map '%s': can't determine key size for type [%u]: %zd.\n", - map->name, t->type, (ssize_t)sz); + map_name, t->type, (ssize_t)sz); return sz; } - pr_debug("map '%s': found key [%u], sz = %zd.\n", - map->name, t->type, (ssize_t)sz); - if (map->def.key_size && map->def.key_size != sz) { + if (map_def->key_size && map_def->key_size != sz) { pr_warn("map '%s': conflicting key size %u != %zd.\n", - map->name, map->def.key_size, (ssize_t)sz); + map_name, map_def->key_size, (ssize_t)sz); return -EINVAL; } - map->def.key_size = sz; - map->btf_key_type_id = t->type; + map_def->key_size = sz; + map_def->key_type_id = t->type; + map_def->parts |= MAP_DEF_KEY_SIZE | MAP_DEF_KEY_TYPE; } else if (strcmp(name, "value_size") == 0) { __u32 sz; - if (!get_map_field_int(map->name, obj->btf, m, &sz)) + if (!get_map_field_int(map_name, btf, m, &sz)) return -EINVAL; - pr_debug("map '%s': found value_size = %u.\n", - map->name, sz); - if (map->def.value_size && map->def.value_size != sz) { + if (map_def->value_size && map_def->value_size != sz) { pr_warn("map '%s': conflicting value size %u != %u.\n", - map->name, map->def.value_size, sz); + map_name, map_def->value_size, sz); return -EINVAL; } - map->def.value_size = sz; + map_def->value_size = sz; + map_def->parts |= MAP_DEF_VALUE_SIZE; } else if (strcmp(name, "value") == 0) { __s64 sz; - t = btf__type_by_id(obj->btf, m->type); + t = btf__type_by_id(btf, m->type); if (!t) { pr_warn("map '%s': value type [%d] not found.\n", - map->name, m->type); + map_name, m->type); return -EINVAL; } if (!btf_is_ptr(t)) { pr_warn("map '%s': value spec is not PTR: %s.\n", - map->name, btf_kind_str(t)); + map_name, btf_kind_str(t)); return -EINVAL; } - sz = btf__resolve_size(obj->btf, t->type); + sz = btf__resolve_size(btf, t->type); if (sz < 0) { pr_warn("map '%s': can't determine value size for type [%u]: %zd.\n", - map->name, t->type, (ssize_t)sz); + map_name, t->type, (ssize_t)sz); return sz; } - pr_debug("map '%s': found value [%u], sz = %zd.\n", - map->name, t->type, (ssize_t)sz); - if (map->def.value_size && map->def.value_size != sz) { + if (map_def->value_size && map_def->value_size != sz) { pr_warn("map '%s': conflicting value size %u != %zd.\n", - map->name, map->def.value_size, (ssize_t)sz); + map_name, map_def->value_size, (ssize_t)sz); return -EINVAL; } - map->def.value_size = sz; - map->btf_value_type_id = t->type; + map_def->value_size = sz; + map_def->value_type_id = t->type; + map_def->parts |= MAP_DEF_VALUE_SIZE | MAP_DEF_VALUE_TYPE; } else if (strcmp(name, "values") == 0) { + char inner_map_name[128]; int err; if (is_inner) { pr_warn("map '%s': multi-level inner maps not supported.\n", - map->name); + map_name); return -ENOTSUP; } if (i != vlen - 1) { pr_warn("map '%s': '%s' member should be last.\n", - map->name, name); + map_name, name); return -EINVAL; } - if (!bpf_map_type__is_map_in_map(map->def.type)) { + if (!bpf_map_type__is_map_in_map(map_def->map_type)) { pr_warn("map '%s': should be map-in-map.\n", - map->name); + map_name); return -ENOTSUP; } - if (map->def.value_size && map->def.value_size != 4) { + if (map_def->value_size && map_def->value_size != 4) { pr_warn("map '%s': conflicting value size %u != 4.\n", - map->name, map->def.value_size); + map_name, map_def->value_size); return -EINVAL; } - map->def.value_size = 4; - t = btf__type_by_id(obj->btf, m->type); + map_def->value_size = 4; + t = btf__type_by_id(btf, m->type); if (!t) { pr_warn("map '%s': map-in-map inner type [%d] not found.\n", - map->name, m->type); + map_name, m->type); return -EINVAL; } if (!btf_is_array(t) || btf_array(t)->nelems) { pr_warn("map '%s': map-in-map inner spec is not a zero-sized array.\n", - map->name); + map_name); return -EINVAL; } - t = skip_mods_and_typedefs(obj->btf, btf_array(t)->type, - NULL); + t = skip_mods_and_typedefs(btf, btf_array(t)->type, NULL); if (!btf_is_ptr(t)) { pr_warn("map '%s': map-in-map inner def is of unexpected kind %s.\n", - map->name, btf_kind_str(t)); + map_name, btf_kind_str(t)); return -EINVAL; } - t = skip_mods_and_typedefs(obj->btf, t->type, NULL); + t = skip_mods_and_typedefs(btf, t->type, NULL); if (!btf_is_struct(t)) { pr_warn("map '%s': map-in-map inner def is of unexpected kind %s.\n", - map->name, btf_kind_str(t)); + map_name, btf_kind_str(t)); return -EINVAL; } - map->inner_map = calloc(1, sizeof(*map->inner_map)); - if (!map->inner_map) - return -ENOMEM; - map->inner_map->sec_idx = obj->efile.btf_maps_shndx; - map->inner_map->name = malloc(strlen(map->name) + - sizeof(".inner") + 1); - if (!map->inner_map->name) - return -ENOMEM; - sprintf(map->inner_map->name, "%s.inner", map->name); - - err = parse_btf_map_def(obj, map->inner_map, t, strict, - true /* is_inner */, NULL); + snprintf(inner_map_name, sizeof(inner_map_name), "%s.inner", map_name); + err = parse_btf_map_def(inner_map_name, btf, t, strict, inner_def, NULL); if (err) return err; + + map_def->parts |= MAP_DEF_INNER_MAP; } else if (strcmp(name, "pinning") == 0) { __u32 val; - int err; if (is_inner) { - pr_debug("map '%s': inner def can't be pinned.\n", - map->name); + pr_warn("map '%s': inner def can't be pinned.\n", map_name); return -EINVAL; } - if (!get_map_field_int(map->name, obj->btf, m, &val)) + if (!get_map_field_int(map_name, btf, m, &val)) return -EINVAL; - pr_debug("map '%s': found pinning = %u.\n", - map->name, val); - - if (val != LIBBPF_PIN_NONE && - val != LIBBPF_PIN_BY_NAME) { + if (val != LIBBPF_PIN_NONE && val != LIBBPF_PIN_BY_NAME) { pr_warn("map '%s': invalid pinning value %u.\n", - map->name, val); + map_name, val); return -EINVAL; } - if (val == LIBBPF_PIN_BY_NAME) { - err = build_map_pin_path(map, pin_root_path); - if (err) { - pr_warn("map '%s': couldn't build pin path.\n", - map->name); - return err; - } - } + map_def->pinning = val; + map_def->parts |= MAP_DEF_PINNING; } else { if (strict) { - pr_warn("map '%s': unknown field '%s'.\n", - map->name, name); + pr_warn("map '%s': unknown field '%s'.\n", map_name, name); return -ENOTSUP; } - pr_debug("map '%s': ignoring unknown field '%s'.\n", - map->name, name); + pr_debug("map '%s': ignoring unknown field '%s'.\n", map_name, name); } } - if (map->def.type == BPF_MAP_TYPE_UNSPEC) { - pr_warn("map '%s': map type isn't specified.\n", map->name); + if (map_def->map_type == BPF_MAP_TYPE_UNSPEC) { + pr_warn("map '%s': map type isn't specified.\n", map_name); return -EINVAL; } return 0; } +static void fill_map_from_def(struct bpf_map *map, const struct btf_map_def *def) +{ + map->def.type = def->map_type; + map->def.key_size = def->key_size; + map->def.value_size = def->value_size; + map->def.max_entries = def->max_entries; + map->def.map_flags = def->map_flags; + + map->numa_node = def->numa_node; + map->btf_key_type_id = def->key_type_id; + map->btf_value_type_id = def->value_type_id; + + if (def->parts & MAP_DEF_MAP_TYPE) + pr_debug("map '%s': found type = %u.\n", map->name, def->map_type); + + if (def->parts & MAP_DEF_KEY_TYPE) + pr_debug("map '%s': found key [%u], sz = %u.\n", + map->name, def->key_type_id, def->key_size); + else if (def->parts & MAP_DEF_KEY_SIZE) + pr_debug("map '%s': found key_size = %u.\n", map->name, def->key_size); + + if (def->parts & MAP_DEF_VALUE_TYPE) + pr_debug("map '%s': found value [%u], sz = %u.\n", + map->name, def->value_type_id, def->value_size); + else if (def->parts & MAP_DEF_VALUE_SIZE) + pr_debug("map '%s': found value_size = %u.\n", map->name, def->value_size); + + if (def->parts & MAP_DEF_MAX_ENTRIES) + pr_debug("map '%s': found max_entries = %u.\n", map->name, def->max_entries); + if (def->parts & MAP_DEF_MAP_FLAGS) + pr_debug("map '%s': found map_flags = %u.\n", map->name, def->map_flags); + if (def->parts & MAP_DEF_PINNING) + pr_debug("map '%s': found pinning = %u.\n", map->name, def->pinning); + if (def->parts & MAP_DEF_NUMA_NODE) + pr_debug("map '%s': found numa_node = %u.\n", map->name, def->numa_node); + + if (def->parts & MAP_DEF_INNER_MAP) + pr_debug("map '%s': found inner map definition.\n", map->name); +} + static int bpf_object__init_user_btf_map(struct bpf_object *obj, const struct btf_type *sec, int var_idx, int sec_idx, const Elf_Data *data, bool strict, const char *pin_root_path) { + struct btf_map_def map_def = {}, inner_def = {}; const struct btf_type *var, *def; const struct btf_var_secinfo *vi; const struct btf_var *var_extra; const char *map_name; struct bpf_map *map; + int err; vi = btf_var_secinfos(sec) + var_idx; var = btf__type_by_id(obj->btf, vi->type); @@ -2316,7 +2327,35 @@ static int bpf_object__init_user_btf_map(struct bpf_object *obj, pr_debug("map '%s': at sec_idx %d, offset %zu.\n", map_name, map->sec_idx, map->sec_offset); - return parse_btf_map_def(obj, map, def, strict, false, pin_root_path); + err = parse_btf_map_def(map->name, obj->btf, def, strict, &map_def, &inner_def); + if (err) + return err; + + fill_map_from_def(map, &map_def); + + if (map_def.pinning == LIBBPF_PIN_BY_NAME) { + err = build_map_pin_path(map, pin_root_path); + if (err) { + pr_warn("map '%s': couldn't build pin path.\n", map->name); + return err; + } + } + + if (map_def.parts & MAP_DEF_INNER_MAP) { + map->inner_map = calloc(1, sizeof(*map->inner_map)); + if (!map->inner_map) + return -ENOMEM; + map->inner_map->fd = -1; + map->inner_map->sec_idx = sec_idx; + map->inner_map->name = malloc(strlen(map_name) + sizeof(".inner") + 1); + if (!map->inner_map->name) + return -ENOMEM; + sprintf(map->inner_map->name, "%s.inner", map_name); + + fill_map_from_def(map->inner_map, &inner_def); + } + + return 0; } static int bpf_object__init_user_btf_maps(struct bpf_object *obj, bool strict, @@ -2618,7 +2657,7 @@ static int bpf_object__sanitize_and_load_btf(struct bpf_object *obj) { struct btf *kern_btf = obj->btf; bool btf_mandatory, sanitize; - int err = 0; + int i, err = 0; if (!obj->btf) return 0; @@ -2632,6 +2671,38 @@ static int bpf_object__sanitize_and_load_btf(struct bpf_object *obj) return 0; } + /* Even though some subprogs are global/weak, user might prefer more + * permissive BPF verification process that BPF verifier performs for + * static functions, taking into account more context from the caller + * functions. In such case, they need to mark such subprogs with + * __attribute__((visibility("hidden"))) and libbpf will adjust + * corresponding FUNC BTF type to be marked as static and trigger more + * involved BPF verification process. + */ + for (i = 0; i < obj->nr_programs; i++) { + struct bpf_program *prog = &obj->programs[i]; + struct btf_type *t; + const char *name; + int j, n; + + if (!prog->mark_btf_static || !prog_is_subprog(obj, prog)) + continue; + + n = btf__get_nr_types(obj->btf); + for (j = 1; j <= n; j++) { + t = btf_type_by_id(obj->btf, j); + if (!btf_is_func(t) || btf_func_linkage(t) != BTF_FUNC_GLOBAL) + continue; + + name = btf__str_by_offset(obj->btf, t->name_off); + if (strcmp(name, prog->name) != 0) + continue; + + t->info = btf_type_info(BTF_KIND_FUNC, BTF_FUNC_STATIC, 0); + break; + } + } + sanitize = btf_needs_sanitization(obj); if (sanitize) { const void *raw_data; @@ -2782,26 +2853,6 @@ static Elf_Data *elf_sec_data(const struct bpf_object *obj, Elf_Scn *scn) return data; } -static int elf_sym_by_sec_off(const struct bpf_object *obj, size_t sec_idx, - size_t off, __u32 sym_type, GElf_Sym *sym) -{ - Elf_Data *symbols = obj->efile.symbols; - size_t n = symbols->d_size / sizeof(GElf_Sym); - int i; - - for (i = 0; i < n; i++) { - if (!gelf_getsym(symbols, i, sym)) - continue; - if (sym->st_shndx != sec_idx || sym->st_value != off) - continue; - if (GELF_ST_TYPE(sym->st_info) != sym_type) - continue; - return 0; - } - - return -ENOENT; -} - static bool is_sec_name_dwarf(const char *name) { /* approximation, but the actual list is too long */ @@ -3498,8 +3549,6 @@ static int bpf_program__record_reloc(struct bpf_program *prog, const char *sym_sec_name; struct bpf_map *map; - reloc_desc->processed = false; - if (!is_call_insn(insn) && !is_ldimm64_insn(insn)) { pr_warn("prog '%s': invalid relo against '%s' for insns[%d].code 0x%x\n", prog->name, sym_name, insn_idx, insn->code); @@ -3682,11 +3731,16 @@ bpf_object__collect_prog_relos(struct bpf_object *obj, GElf_Shdr *shdr, Elf_Data int err, i, nrels; const char *sym_name; __u32 insn_idx; + Elf_Scn *scn; + Elf_Data *scn_data; GElf_Sym sym; GElf_Rel rel; + scn = elf_sec_by_idx(obj, sec_idx); + scn_data = elf_sec_data(obj, scn); + relo_sec_name = elf_sec_str(obj, shdr->sh_name); - sec_name = elf_sec_name(obj, elf_sec_by_idx(obj, sec_idx)); + sec_name = elf_sec_name(obj, scn); if (!relo_sec_name || !sec_name) return -EINVAL; @@ -3704,7 +3758,8 @@ bpf_object__collect_prog_relos(struct bpf_object *obj, GElf_Shdr *shdr, Elf_Data relo_sec_name, (size_t)GELF_R_SYM(rel.r_info), i); return -LIBBPF_ERRNO__FORMAT; } - if (rel.r_offset % BPF_INSN_SZ) { + + if (rel.r_offset % BPF_INSN_SZ || rel.r_offset >= scn_data->d_size) { pr_warn("sec '%s': invalid offset 0x%zx for relo #%d\n", relo_sec_name, (size_t)GELF_R_SYM(rel.r_info), i); return -LIBBPF_ERRNO__FORMAT; @@ -3728,9 +3783,9 @@ bpf_object__collect_prog_relos(struct bpf_object *obj, GElf_Shdr *shdr, Elf_Data prog = find_prog_by_sec_insn(obj, sec_idx, insn_idx); if (!prog) { - pr_warn("sec '%s': relo #%d: program not found in section '%s' for insn #%u\n", + pr_debug("sec '%s': relo #%d: couldn't find program in section '%s' for insn #%u, probably overridden weak function, skipping...\n", relo_sec_name, i, sec_name, insn_idx); - return -LIBBPF_ERRNO__RELOC; + continue; } relos = libbpf_reallocarray(prog->reloc_desc, @@ -3845,6 +3900,14 @@ __u32 bpf_map__max_entries(const struct bpf_map *map) return map->def.max_entries; } +struct bpf_map *bpf_map__inner_map(struct bpf_map *map) +{ + if (!bpf_map_type__is_map_in_map(map->def.type)) + return NULL; + + return map->inner_map; +} + int bpf_map__set_max_entries(struct bpf_map *map, __u32 max_entries) { if (map->fd >= 0) @@ -6305,13 +6368,11 @@ bpf_object__relocate_data(struct bpf_object *obj, struct bpf_program *prog) case RELO_LD64: insn[0].src_reg = BPF_PSEUDO_MAP_FD; insn[0].imm = obj->maps[relo->map_idx].fd; - relo->processed = true; break; case RELO_DATA: insn[0].src_reg = BPF_PSEUDO_MAP_VALUE; insn[1].imm = insn[0].imm + relo->sym_off; insn[0].imm = obj->maps[relo->map_idx].fd; - relo->processed = true; break; case RELO_EXTERN_VAR: ext = &obj->externs[relo->sym_off]; @@ -6329,13 +6390,11 @@ bpf_object__relocate_data(struct bpf_object *obj, struct bpf_program *prog) insn[1].imm = ext->ksym.addr >> 32; } } - relo->processed = true; break; case RELO_EXTERN_FUNC: ext = &obj->externs[relo->sym_off]; insn[0].src_reg = BPF_PSEUDO_KFUNC_CALL; insn[0].imm = ext->ksym.kernel_btf_id; - relo->processed = true; break; case RELO_SUBPROG_ADDR: insn[0].src_reg = BPF_PSEUDO_FUNC; @@ -6621,9 +6680,6 @@ bpf_object__reloc_code(struct bpf_object *obj, struct bpf_program *main_prog, * different main programs */ insn->imm = subprog->sub_insn_off - (prog->sub_insn_off + insn_idx) - 1; - if (relo) - relo->processed = true; - pr_debug("prog '%s': insn #%zu relocated, imm %d points to subprog '%s' (now at %zu offset)\n", prog->name, insn_idx, insn->imm, subprog->name, subprog->sub_insn_off); } @@ -6716,7 +6772,7 @@ static int bpf_object__relocate_calls(struct bpf_object *obj, struct bpf_program *prog) { struct bpf_program *subprog; - int i, j, err; + int i, err; /* mark all subprogs as not relocated (yet) within the context of * current main program @@ -6727,9 +6783,6 @@ bpf_object__relocate_calls(struct bpf_object *obj, struct bpf_program *prog) continue; subprog->sub_insn_off = 0; - for (j = 0; j < subprog->nr_reloc; j++) - if (subprog->reloc_desc[j].type == RELO_CALL) - subprog->reloc_desc[j].processed = false; } err = bpf_object__reloc_code(obj, prog, prog); @@ -6976,7 +7029,7 @@ static bool insn_is_helper_call(struct bpf_insn *insn, enum bpf_func_id *func_id return false; } -static int bpf_object__sanitize_prog(struct bpf_object* obj, struct bpf_program *prog) +static int bpf_object__sanitize_prog(struct bpf_object *obj, struct bpf_program *prog) { struct bpf_insn *insn = prog->insns; enum bpf_func_id func_id; @@ -9476,6 +9529,7 @@ int bpf_map__set_inner_map_fd(struct bpf_map *map, int fd) pr_warn("error: inner_map_fd already specified\n"); return -EINVAL; } + zfree(&map->inner_map); map->inner_map_fd = fd; return 0; } diff --git a/tools/lib/bpf/libbpf.h b/tools/lib/bpf/libbpf.h index f500621d28e5..bec4e6a6e31d 100644 --- a/tools/lib/bpf/libbpf.h +++ b/tools/lib/bpf/libbpf.h @@ -480,6 +480,7 @@ LIBBPF_API int bpf_map__pin(struct bpf_map *map, const char *path); LIBBPF_API int bpf_map__unpin(struct bpf_map *map, const char *path); LIBBPF_API int bpf_map__set_inner_map_fd(struct bpf_map *map, int fd); +LIBBPF_API struct bpf_map *bpf_map__inner_map(struct bpf_map *map); LIBBPF_API long libbpf_get_error(const void *ptr); diff --git a/tools/lib/bpf/libbpf.map b/tools/lib/bpf/libbpf.map index f5990f7208ce..b9b29baf1df8 100644 --- a/tools/lib/bpf/libbpf.map +++ b/tools/lib/bpf/libbpf.map @@ -359,5 +359,6 @@ LIBBPF_0.4.0 { bpf_linker__finalize; bpf_linker__free; bpf_linker__new; + bpf_map__inner_map; bpf_object__set_kversion; } LIBBPF_0.3.0; diff --git a/tools/lib/bpf/libbpf_internal.h b/tools/lib/bpf/libbpf_internal.h index 6017902c687e..ee426226928f 100644 --- a/tools/lib/bpf/libbpf_internal.h +++ b/tools/lib/bpf/libbpf_internal.h @@ -19,6 +19,7 @@ #pragma GCC poison reallocarray #include "libbpf.h" +#include "btf.h" #ifndef EM_BPF #define EM_BPF 247 @@ -131,6 +132,50 @@ struct btf; struct btf_type; struct btf_type *btf_type_by_id(struct btf *btf, __u32 type_id); +const char *btf_kind_str(const struct btf_type *t); +const struct btf_type *skip_mods_and_typedefs(const struct btf *btf, __u32 id, __u32 *res_id); + +static inline enum btf_func_linkage btf_func_linkage(const struct btf_type *t) +{ + return (enum btf_func_linkage)(int)btf_vlen(t); +} + +static inline __u32 btf_type_info(int kind, int vlen, int kflag) +{ + return (kflag << 31) | (kind << 24) | vlen; +} + +enum map_def_parts { + MAP_DEF_MAP_TYPE = 0x001, + MAP_DEF_KEY_TYPE = 0x002, + MAP_DEF_KEY_SIZE = 0x004, + MAP_DEF_VALUE_TYPE = 0x008, + MAP_DEF_VALUE_SIZE = 0x010, + MAP_DEF_MAX_ENTRIES = 0x020, + MAP_DEF_MAP_FLAGS = 0x040, + MAP_DEF_NUMA_NODE = 0x080, + MAP_DEF_PINNING = 0x100, + MAP_DEF_INNER_MAP = 0x200, + + MAP_DEF_ALL = 0x3ff, /* combination of all above */ +}; + +struct btf_map_def { + enum map_def_parts parts; + __u32 map_type; + __u32 key_type_id; + __u32 key_size; + __u32 value_type_id; + __u32 value_size; + __u32 max_entries; + __u32 map_flags; + __u32 numa_node; + __u32 pinning; +}; + +int parse_btf_map_def(const char *map_name, struct btf *btf, + const struct btf_type *def_t, bool strict, + struct btf_map_def *map_def, struct btf_map_def *inner_def); void *libbpf_add_mem(void **data, size_t *cap_cnt, size_t elem_sz, size_t cur_cnt, size_t max_cnt, size_t add_cnt); diff --git a/tools/lib/bpf/linker.c b/tools/lib/bpf/linker.c index 46b16cbdcda3..9de084b1c699 100644 --- a/tools/lib/bpf/linker.c +++ b/tools/lib/bpf/linker.c @@ -22,6 +22,8 @@ #include "libbpf_internal.h" #include "strset.h" +#define BTF_EXTERN_SEC ".extern" + struct src_sec { const char *sec_name; /* positional (not necessarily ELF) index in an array of sections */ @@ -74,11 +76,36 @@ struct btf_ext_sec_data { void *recs; }; +struct glob_sym { + /* ELF symbol index */ + int sym_idx; + /* associated section id for .ksyms, .kconfig, etc, but not .extern */ + int sec_id; + /* extern name offset in STRTAB */ + int name_off; + /* optional associated BTF type ID */ + int btf_id; + /* BTF type ID to which VAR/FUNC type is pointing to; used for + * rewriting types when extern VAR/FUNC is resolved to a concrete + * definition + */ + int underlying_btf_id; + /* sec_var index in the corresponding dst_sec, if exists */ + int var_idx; + + /* extern or resolved/global symbol */ + bool is_extern; + /* weak or strong symbol, never goes back from strong to weak */ + bool is_weak; +}; + struct dst_sec { char *sec_name; /* positional (not necessarily ELF) index in an array of sections */ int id; + bool ephemeral; + /* ELF info */ size_t sec_idx; Elf_Scn *scn; @@ -120,22 +147,28 @@ struct bpf_linker { struct btf *btf; struct btf_ext *btf_ext; + + /* global (including extern) ELF symbols */ + int glob_sym_cnt; + struct glob_sym *glob_syms; }; #define pr_warn_elf(fmt, ...) \ -do { \ - libbpf_print(LIBBPF_WARN, "libbpf: " fmt ": %s\n", ##__VA_ARGS__, elf_errmsg(-1)); \ -} while (0) + libbpf_print(LIBBPF_WARN, "libbpf: " fmt ": %s\n", ##__VA_ARGS__, elf_errmsg(-1)) static int init_output_elf(struct bpf_linker *linker, const char *file); static int linker_load_obj_file(struct bpf_linker *linker, const char *filename, struct src_obj *obj); static int linker_sanity_check_elf(struct src_obj *obj); +static int linker_sanity_check_elf_symtab(struct src_obj *obj, struct src_sec *sec); +static int linker_sanity_check_elf_relos(struct src_obj *obj, struct src_sec *sec); static int linker_sanity_check_btf(struct src_obj *obj); static int linker_sanity_check_btf_ext(struct src_obj *obj); static int linker_fixup_btf(struct src_obj *obj); static int linker_append_sec_data(struct bpf_linker *linker, struct src_obj *obj); static int linker_append_elf_syms(struct bpf_linker *linker, struct src_obj *obj); +static int linker_append_elf_sym(struct bpf_linker *linker, struct src_obj *obj, + Elf64_Sym *sym, const char *sym_name, int src_sym_idx); static int linker_append_elf_relos(struct bpf_linker *linker, struct src_obj *obj); static int linker_append_btf(struct bpf_linker *linker, struct src_obj *obj); static int linker_append_btf_ext(struct bpf_linker *linker, struct src_obj *obj); @@ -282,7 +315,7 @@ static int init_output_elf(struct bpf_linker *linker, const char *file) /* ELF header */ linker->elf_hdr = elf64_newehdr(linker->elf); - if (!linker->elf_hdr){ + if (!linker->elf_hdr) { pr_warn_elf("failed to create ELF header"); return -EINVAL; } @@ -663,8 +696,8 @@ static bool is_pow_of_2(size_t x) static int linker_sanity_check_elf(struct src_obj *obj) { - struct src_sec *sec, *link_sec; - int i, j, n; + struct src_sec *sec; + int i, err; if (!obj->symtab_sec_idx) { pr_warn("ELF is missing SYMTAB section in %s\n", obj->filename); @@ -692,43 +725,11 @@ static int linker_sanity_check_elf(struct src_obj *obj) return -EINVAL; switch (sec->shdr->sh_type) { - case SHT_SYMTAB: { - Elf64_Sym *sym; - - if (sec->shdr->sh_entsize != sizeof(Elf64_Sym)) - return -EINVAL; - if (sec->shdr->sh_size % sec->shdr->sh_entsize != 0) - return -EINVAL; - - if (!sec->shdr->sh_link || sec->shdr->sh_link >= obj->sec_cnt) { - pr_warn("ELF SYMTAB section #%zu points to missing STRTAB section #%zu in %s\n", - sec->sec_idx, (size_t)sec->shdr->sh_link, obj->filename); - return -EINVAL; - } - link_sec = &obj->secs[sec->shdr->sh_link]; - if (link_sec->shdr->sh_type != SHT_STRTAB) { - pr_warn("ELF SYMTAB section #%zu points to invalid STRTAB section #%zu in %s\n", - sec->sec_idx, (size_t)sec->shdr->sh_link, obj->filename); - return -EINVAL; - } - - n = sec->shdr->sh_size / sec->shdr->sh_entsize; - sym = sec->data->d_buf; - for (j = 0; j < n; j++, sym++) { - if (sym->st_shndx - && sym->st_shndx < SHN_LORESERVE - && sym->st_shndx >= obj->sec_cnt) { - pr_warn("ELF sym #%d in section #%zu points to missing section #%zu in %s\n", - j, sec->sec_idx, (size_t)sym->st_shndx, obj->filename); - return -EINVAL; - } - if (ELF64_ST_TYPE(sym->st_info) == STT_SECTION) { - if (sym->st_value != 0) - return -EINVAL; - } - } + case SHT_SYMTAB: + err = linker_sanity_check_elf_symtab(obj, sec); + if (err) + return err; break; - } case SHT_STRTAB: break; case SHT_PROGBITS: @@ -739,87 +740,169 @@ static int linker_sanity_check_elf(struct src_obj *obj) break; case SHT_NOBITS: break; - case SHT_REL: { - Elf64_Rel *relo; - struct src_sec *sym_sec; + case SHT_REL: + err = linker_sanity_check_elf_relos(obj, sec); + if (err) + return err; + break; + case SHT_LLVM_ADDRSIG: + break; + default: + pr_warn("ELF section #%zu (%s) has unrecognized type %zu in %s\n", + sec->sec_idx, sec->sec_name, (size_t)sec->shdr->sh_type, obj->filename); + return -EINVAL; + } + } - if (sec->shdr->sh_entsize != sizeof(Elf64_Rel)) - return -EINVAL; - if (sec->shdr->sh_size % sec->shdr->sh_entsize != 0) - return -EINVAL; + return 0; +} - /* SHT_REL's sh_link should point to SYMTAB */ - if (sec->shdr->sh_link != obj->symtab_sec_idx) { - pr_warn("ELF relo section #%zu points to invalid SYMTAB section #%zu in %s\n", - sec->sec_idx, (size_t)sec->shdr->sh_link, obj->filename); - return -EINVAL; - } +static int linker_sanity_check_elf_symtab(struct src_obj *obj, struct src_sec *sec) +{ + struct src_sec *link_sec; + Elf64_Sym *sym; + int i, n; + + if (sec->shdr->sh_entsize != sizeof(Elf64_Sym)) + return -EINVAL; + if (sec->shdr->sh_size % sec->shdr->sh_entsize != 0) + return -EINVAL; + + if (!sec->shdr->sh_link || sec->shdr->sh_link >= obj->sec_cnt) { + pr_warn("ELF SYMTAB section #%zu points to missing STRTAB section #%zu in %s\n", + sec->sec_idx, (size_t)sec->shdr->sh_link, obj->filename); + return -EINVAL; + } + link_sec = &obj->secs[sec->shdr->sh_link]; + if (link_sec->shdr->sh_type != SHT_STRTAB) { + pr_warn("ELF SYMTAB section #%zu points to invalid STRTAB section #%zu in %s\n", + sec->sec_idx, (size_t)sec->shdr->sh_link, obj->filename); + return -EINVAL; + } - /* SHT_REL's sh_info points to relocated section */ - if (!sec->shdr->sh_info || sec->shdr->sh_info >= obj->sec_cnt) { - pr_warn("ELF relo section #%zu points to missing section #%zu in %s\n", - sec->sec_idx, (size_t)sec->shdr->sh_info, obj->filename); + n = sec->shdr->sh_size / sec->shdr->sh_entsize; + sym = sec->data->d_buf; + for (i = 0; i < n; i++, sym++) { + int sym_type = ELF64_ST_TYPE(sym->st_info); + int sym_bind = ELF64_ST_BIND(sym->st_info); + int sym_vis = ELF64_ST_VISIBILITY(sym->st_other); + + if (i == 0) { + if (sym->st_name != 0 || sym->st_info != 0 + || sym->st_other != 0 || sym->st_shndx != 0 + || sym->st_value != 0 || sym->st_size != 0) { + pr_warn("ELF sym #0 is invalid in %s\n", obj->filename); return -EINVAL; } - link_sec = &obj->secs[sec->shdr->sh_info]; + continue; + } + if (sym_bind != STB_LOCAL && sym_bind != STB_GLOBAL && sym_bind != STB_WEAK) { + pr_warn("ELF sym #%d in section #%zu has unsupported symbol binding %d\n", + i, sec->sec_idx, sym_bind); + return -EINVAL; + } + if (sym_vis != STV_DEFAULT && sym_vis != STV_HIDDEN) { + pr_warn("ELF sym #%d in section #%zu has unsupported symbol visibility %d\n", + i, sec->sec_idx, sym_vis); + return -EINVAL; + } + if (sym->st_shndx == 0) { + if (sym_type != STT_NOTYPE || sym_bind == STB_LOCAL + || sym->st_value != 0 || sym->st_size != 0) { + pr_warn("ELF sym #%d is invalid extern symbol in %s\n", + i, obj->filename); - /* .rel<secname> -> <secname> pattern is followed */ - if (strncmp(sec->sec_name, ".rel", sizeof(".rel") - 1) != 0 - || strcmp(sec->sec_name + sizeof(".rel") - 1, link_sec->sec_name) != 0) { - pr_warn("ELF relo section #%zu name has invalid name in %s\n", - sec->sec_idx, obj->filename); return -EINVAL; } + continue; + } + if (sym->st_shndx < SHN_LORESERVE && sym->st_shndx >= obj->sec_cnt) { + pr_warn("ELF sym #%d in section #%zu points to missing section #%zu in %s\n", + i, sec->sec_idx, (size_t)sym->st_shndx, obj->filename); + return -EINVAL; + } + if (sym_type == STT_SECTION) { + if (sym->st_value != 0) + return -EINVAL; + continue; + } + } - /* don't further validate relocations for ignored sections */ - if (link_sec->skipped) - break; + return 0; +} - /* relocatable section is data or instructions */ - if (link_sec->shdr->sh_type != SHT_PROGBITS - && link_sec->shdr->sh_type != SHT_NOBITS) { - pr_warn("ELF relo section #%zu points to invalid section #%zu in %s\n", - sec->sec_idx, (size_t)sec->shdr->sh_info, obj->filename); - return -EINVAL; - } +static int linker_sanity_check_elf_relos(struct src_obj *obj, struct src_sec *sec) +{ + struct src_sec *link_sec, *sym_sec; + Elf64_Rel *relo; + int i, n; - /* check sanity of each relocation */ - n = sec->shdr->sh_size / sec->shdr->sh_entsize; - relo = sec->data->d_buf; - sym_sec = &obj->secs[obj->symtab_sec_idx]; - for (j = 0; j < n; j++, relo++) { - size_t sym_idx = ELF64_R_SYM(relo->r_info); - size_t sym_type = ELF64_R_TYPE(relo->r_info); - - if (sym_type != R_BPF_64_64 && sym_type != R_BPF_64_32) { - pr_warn("ELF relo #%d in section #%zu has unexpected type %zu in %s\n", - j, sec->sec_idx, sym_type, obj->filename); - return -EINVAL; - } + if (sec->shdr->sh_entsize != sizeof(Elf64_Rel)) + return -EINVAL; + if (sec->shdr->sh_size % sec->shdr->sh_entsize != 0) + return -EINVAL; - if (!sym_idx || sym_idx * sizeof(Elf64_Sym) >= sym_sec->shdr->sh_size) { - pr_warn("ELF relo #%d in section #%zu points to invalid symbol #%zu in %s\n", - j, sec->sec_idx, sym_idx, obj->filename); - return -EINVAL; - } + /* SHT_REL's sh_link should point to SYMTAB */ + if (sec->shdr->sh_link != obj->symtab_sec_idx) { + pr_warn("ELF relo section #%zu points to invalid SYMTAB section #%zu in %s\n", + sec->sec_idx, (size_t)sec->shdr->sh_link, obj->filename); + return -EINVAL; + } - if (link_sec->shdr->sh_flags & SHF_EXECINSTR) { - if (relo->r_offset % sizeof(struct bpf_insn) != 0) { - pr_warn("ELF relo #%d in section #%zu points to missing symbol #%zu in %s\n", - j, sec->sec_idx, sym_idx, obj->filename); - return -EINVAL; - } - } - } - break; + /* SHT_REL's sh_info points to relocated section */ + if (!sec->shdr->sh_info || sec->shdr->sh_info >= obj->sec_cnt) { + pr_warn("ELF relo section #%zu points to missing section #%zu in %s\n", + sec->sec_idx, (size_t)sec->shdr->sh_info, obj->filename); + return -EINVAL; + } + link_sec = &obj->secs[sec->shdr->sh_info]; + + /* .rel<secname> -> <secname> pattern is followed */ + if (strncmp(sec->sec_name, ".rel", sizeof(".rel") - 1) != 0 + || strcmp(sec->sec_name + sizeof(".rel") - 1, link_sec->sec_name) != 0) { + pr_warn("ELF relo section #%zu name has invalid name in %s\n", + sec->sec_idx, obj->filename); + return -EINVAL; + } + + /* don't further validate relocations for ignored sections */ + if (link_sec->skipped) + return 0; + + /* relocatable section is data or instructions */ + if (link_sec->shdr->sh_type != SHT_PROGBITS && link_sec->shdr->sh_type != SHT_NOBITS) { + pr_warn("ELF relo section #%zu points to invalid section #%zu in %s\n", + sec->sec_idx, (size_t)sec->shdr->sh_info, obj->filename); + return -EINVAL; + } + + /* check sanity of each relocation */ + n = sec->shdr->sh_size / sec->shdr->sh_entsize; + relo = sec->data->d_buf; + sym_sec = &obj->secs[obj->symtab_sec_idx]; + for (i = 0; i < n; i++, relo++) { + size_t sym_idx = ELF64_R_SYM(relo->r_info); + size_t sym_type = ELF64_R_TYPE(relo->r_info); + + if (sym_type != R_BPF_64_64 && sym_type != R_BPF_64_32) { + pr_warn("ELF relo #%d in section #%zu has unexpected type %zu in %s\n", + i, sec->sec_idx, sym_type, obj->filename); + return -EINVAL; } - case SHT_LLVM_ADDRSIG: - break; - default: - pr_warn("ELF section #%zu (%s) has unrecognized type %zu in %s\n", - sec->sec_idx, sec->sec_name, (size_t)sec->shdr->sh_type, obj->filename); + + if (!sym_idx || sym_idx * sizeof(Elf64_Sym) >= sym_sec->shdr->sh_size) { + pr_warn("ELF relo #%d in section #%zu points to invalid symbol #%zu in %s\n", + i, sec->sec_idx, sym_idx, obj->filename); return -EINVAL; } + + if (link_sec->shdr->sh_flags & SHF_EXECINSTR) { + if (relo->r_offset % sizeof(struct bpf_insn) != 0) { + pr_warn("ELF relo #%d in section #%zu points to missing symbol #%zu in %s\n", + i, sec->sec_idx, sym_idx, obj->filename); + return -EINVAL; + } + } } return 0; @@ -897,6 +980,7 @@ static int init_sec(struct bpf_linker *linker, struct dst_sec *dst_sec, struct s dst_sec->sec_sz = 0; dst_sec->sec_idx = 0; + dst_sec->ephemeral = src_sec->ephemeral; /* ephemeral sections are just thin section shells lacking most parts */ if (src_sec->ephemeral) @@ -904,13 +988,13 @@ static int init_sec(struct bpf_linker *linker, struct dst_sec *dst_sec, struct s scn = elf_newscn(linker->elf); if (!scn) - return -1; + return -ENOMEM; data = elf_newdata(scn); if (!data) - return -1; + return -ENOMEM; shdr = elf64_getshdr(scn); if (!shdr) - return -1; + return -ENOMEM; dst_sec->scn = scn; dst_sec->shdr = shdr; @@ -960,6 +1044,9 @@ static struct dst_sec *find_dst_sec_by_name(struct bpf_linker *linker, const cha static bool secs_match(struct dst_sec *dst, struct src_sec *src) { + if (dst->ephemeral || src->ephemeral) + return true; + if (dst->shdr->sh_type != src->shdr->sh_type) { pr_warn("sec %s types mismatch\n", dst->sec_name); return false; @@ -985,13 +1072,33 @@ static bool sec_content_is_same(struct dst_sec *dst_sec, struct src_sec *src_sec return true; } -static int extend_sec(struct dst_sec *dst, struct src_sec *src) +static int extend_sec(struct bpf_linker *linker, struct dst_sec *dst, struct src_sec *src) { void *tmp; - size_t dst_align = dst->shdr->sh_addralign; - size_t src_align = src->shdr->sh_addralign; + size_t dst_align, src_align; size_t dst_align_sz, dst_final_sz; + int err; + + /* Ephemeral source section doesn't contribute anything to ELF + * section data. + */ + if (src->ephemeral) + return 0; + + /* Some sections (like .maps) can contain both externs (and thus be + * ephemeral) and non-externs (map definitions). So it's possible that + * it has to be "upgraded" from ephemeral to non-ephemeral when the + * first non-ephemeral entity appears. In such case, we add ELF + * section, data, etc. + */ + if (dst->ephemeral) { + err = init_sec(linker, dst, src); + if (err) + return err; + } + dst_align = dst->shdr->sh_addralign; + src_align = src->shdr->sh_addralign; if (dst_align == 0) dst_align = 1; if (dst_align < src_align) @@ -1087,10 +1194,7 @@ static int linker_append_sec_data(struct bpf_linker *linker, struct src_obj *obj /* record mapped section index */ src_sec->dst_id = dst_sec->id; - if (src_sec->ephemeral) - continue; - - err = extend_sec(dst_sec, src_sec); + err = extend_sec(linker, dst_sec, src_sec); if (err) return err; } @@ -1101,68 +1205,778 @@ static int linker_append_sec_data(struct bpf_linker *linker, struct src_obj *obj static int linker_append_elf_syms(struct bpf_linker *linker, struct src_obj *obj) { struct src_sec *symtab = &obj->secs[obj->symtab_sec_idx]; - Elf64_Sym *sym = symtab->data->d_buf, *dst_sym; - int i, n = symtab->shdr->sh_size / symtab->shdr->sh_entsize; + Elf64_Sym *sym = symtab->data->d_buf; + int i, n = symtab->shdr->sh_size / symtab->shdr->sh_entsize, err; int str_sec_idx = symtab->shdr->sh_link; + const char *sym_name; obj->sym_map = calloc(n + 1, sizeof(*obj->sym_map)); if (!obj->sym_map) return -ENOMEM; for (i = 0; i < n; i++, sym++) { - struct src_sec *src_sec = NULL; - struct dst_sec *dst_sec = NULL; - const char *sym_name; - size_t dst_sym_idx; - int name_off; - - /* we already have all-zero initial symbol */ - if (sym->st_name == 0 && sym->st_info == 0 && - sym->st_other == 0 && sym->st_shndx == SHN_UNDEF && - sym->st_value == 0 && sym->st_size ==0) + /* We already validated all-zero symbol #0 and we already + * appended it preventively to the final SYMTAB, so skip it. + */ + if (i == 0) continue; sym_name = elf_strptr(obj->elf, str_sec_idx, sym->st_name); if (!sym_name) { pr_warn("can't fetch symbol name for symbol #%d in '%s'\n", i, obj->filename); - return -1; + return -EINVAL; + } + + err = linker_append_elf_sym(linker, obj, sym, sym_name, i); + if (err) + return err; + } + + return 0; +} + +static Elf64_Sym *get_sym_by_idx(struct bpf_linker *linker, size_t sym_idx) +{ + struct dst_sec *symtab = &linker->secs[linker->symtab_sec_idx]; + Elf64_Sym *syms = symtab->raw_data; + + return &syms[sym_idx]; +} + +static struct glob_sym *find_glob_sym(struct bpf_linker *linker, const char *sym_name) +{ + struct glob_sym *glob_sym; + const char *name; + int i; + + for (i = 0; i < linker->glob_sym_cnt; i++) { + glob_sym = &linker->glob_syms[i]; + name = strset__data(linker->strtab_strs) + glob_sym->name_off; + + if (strcmp(name, sym_name) == 0) + return glob_sym; + } + + return NULL; +} + +static struct glob_sym *add_glob_sym(struct bpf_linker *linker) +{ + struct glob_sym *syms, *sym; + + syms = libbpf_reallocarray(linker->glob_syms, linker->glob_sym_cnt + 1, + sizeof(*linker->glob_syms)); + if (!syms) + return NULL; + + sym = &syms[linker->glob_sym_cnt]; + memset(sym, 0, sizeof(*sym)); + sym->var_idx = -1; + + linker->glob_syms = syms; + linker->glob_sym_cnt++; + + return sym; +} + +static bool glob_sym_btf_matches(const char *sym_name, bool exact, + const struct btf *btf1, __u32 id1, + const struct btf *btf2, __u32 id2) +{ + const struct btf_type *t1, *t2; + bool is_static1, is_static2; + const char *n1, *n2; + int i, n; + +recur: + n1 = n2 = NULL; + t1 = skip_mods_and_typedefs(btf1, id1, &id1); + t2 = skip_mods_and_typedefs(btf2, id2, &id2); + + /* check if only one side is FWD, otherwise handle with common logic */ + if (!exact && btf_is_fwd(t1) != btf_is_fwd(t2)) { + n1 = btf__str_by_offset(btf1, t1->name_off); + n2 = btf__str_by_offset(btf2, t2->name_off); + if (strcmp(n1, n2) != 0) { + pr_warn("global '%s': incompatible forward declaration names '%s' and '%s'\n", + sym_name, n1, n2); + return false; } + /* validate if FWD kind matches concrete kind */ + if (btf_is_fwd(t1)) { + if (btf_kflag(t1) && btf_is_union(t2)) + return true; + if (!btf_kflag(t1) && btf_is_struct(t2)) + return true; + pr_warn("global '%s': incompatible %s forward declaration and concrete kind %s\n", + sym_name, btf_kflag(t1) ? "union" : "struct", btf_kind_str(t2)); + } else { + if (btf_kflag(t2) && btf_is_union(t1)) + return true; + if (!btf_kflag(t2) && btf_is_struct(t1)) + return true; + pr_warn("global '%s': incompatible %s forward declaration and concrete kind %s\n", + sym_name, btf_kflag(t2) ? "union" : "struct", btf_kind_str(t1)); + } + return false; + } + + if (btf_kind(t1) != btf_kind(t2)) { + pr_warn("global '%s': incompatible BTF kinds %s and %s\n", + sym_name, btf_kind_str(t1), btf_kind_str(t2)); + return false; + } + + switch (btf_kind(t1)) { + case BTF_KIND_STRUCT: + case BTF_KIND_UNION: + case BTF_KIND_ENUM: + case BTF_KIND_FWD: + case BTF_KIND_FUNC: + case BTF_KIND_VAR: + n1 = btf__str_by_offset(btf1, t1->name_off); + n2 = btf__str_by_offset(btf2, t2->name_off); + if (strcmp(n1, n2) != 0) { + pr_warn("global '%s': incompatible %s names '%s' and '%s'\n", + sym_name, btf_kind_str(t1), n1, n2); + return false; + } + break; + default: + break; + } + + switch (btf_kind(t1)) { + case BTF_KIND_UNKN: /* void */ + case BTF_KIND_FWD: + return true; + case BTF_KIND_INT: + case BTF_KIND_FLOAT: + case BTF_KIND_ENUM: + /* ignore encoding for int and enum values for enum */ + if (t1->size != t2->size) { + pr_warn("global '%s': incompatible %s '%s' size %u and %u\n", + sym_name, btf_kind_str(t1), n1, t1->size, t2->size); + return false; + } + return true; + case BTF_KIND_PTR: + /* just validate overall shape of the referenced type, so no + * contents comparison for struct/union, and allowd fwd vs + * struct/union + */ + exact = false; + id1 = t1->type; + id2 = t2->type; + goto recur; + case BTF_KIND_ARRAY: + /* ignore index type and array size */ + id1 = btf_array(t1)->type; + id2 = btf_array(t2)->type; + goto recur; + case BTF_KIND_FUNC: + /* extern and global linkages are compatible */ + is_static1 = btf_func_linkage(t1) == BTF_FUNC_STATIC; + is_static2 = btf_func_linkage(t2) == BTF_FUNC_STATIC; + if (is_static1 != is_static2) { + pr_warn("global '%s': incompatible func '%s' linkage\n", sym_name, n1); + return false; + } + + id1 = t1->type; + id2 = t2->type; + goto recur; + case BTF_KIND_VAR: + /* extern and global linkages are compatible */ + is_static1 = btf_var(t1)->linkage == BTF_VAR_STATIC; + is_static2 = btf_var(t2)->linkage == BTF_VAR_STATIC; + if (is_static1 != is_static2) { + pr_warn("global '%s': incompatible var '%s' linkage\n", sym_name, n1); + return false; + } + + id1 = t1->type; + id2 = t2->type; + goto recur; + case BTF_KIND_STRUCT: + case BTF_KIND_UNION: { + const struct btf_member *m1, *m2; + + if (!exact) + return true; + + if (btf_vlen(t1) != btf_vlen(t2)) { + pr_warn("global '%s': incompatible number of %s fields %u and %u\n", + sym_name, btf_kind_str(t1), btf_vlen(t1), btf_vlen(t2)); + return false; + } + + n = btf_vlen(t1); + m1 = btf_members(t1); + m2 = btf_members(t2); + for (i = 0; i < n; i++, m1++, m2++) { + n1 = btf__str_by_offset(btf1, m1->name_off); + n2 = btf__str_by_offset(btf2, m2->name_off); + if (strcmp(n1, n2) != 0) { + pr_warn("global '%s': incompatible field #%d names '%s' and '%s'\n", + sym_name, i, n1, n2); + return false; + } + if (m1->offset != m2->offset) { + pr_warn("global '%s': incompatible field #%d ('%s') offsets\n", + sym_name, i, n1); + return false; + } + if (!glob_sym_btf_matches(sym_name, exact, btf1, m1->type, btf2, m2->type)) + return false; + } + + return true; + } + case BTF_KIND_FUNC_PROTO: { + const struct btf_param *m1, *m2; + + if (btf_vlen(t1) != btf_vlen(t2)) { + pr_warn("global '%s': incompatible number of %s params %u and %u\n", + sym_name, btf_kind_str(t1), btf_vlen(t1), btf_vlen(t2)); + return false; + } + + n = btf_vlen(t1); + m1 = btf_params(t1); + m2 = btf_params(t2); + for (i = 0; i < n; i++, m1++, m2++) { + /* ignore func arg names */ + if (!glob_sym_btf_matches(sym_name, exact, btf1, m1->type, btf2, m2->type)) + return false; + } + + /* now check return type as well */ + id1 = t1->type; + id2 = t2->type; + goto recur; + } + + /* skip_mods_and_typedefs() make this impossible */ + case BTF_KIND_TYPEDEF: + case BTF_KIND_VOLATILE: + case BTF_KIND_CONST: + case BTF_KIND_RESTRICT: + /* DATASECs are never compared with each other */ + case BTF_KIND_DATASEC: + default: + pr_warn("global '%s': unsupported BTF kind %s\n", + sym_name, btf_kind_str(t1)); + return false; + } +} + +static bool map_defs_match(const char *sym_name, + const struct btf *main_btf, + const struct btf_map_def *main_def, + const struct btf_map_def *main_inner_def, + const struct btf *extra_btf, + const struct btf_map_def *extra_def, + const struct btf_map_def *extra_inner_def) +{ + const char *reason; + + if (main_def->map_type != extra_def->map_type) { + reason = "type"; + goto mismatch; + } + + /* check key type/size match */ + if (main_def->key_size != extra_def->key_size) { + reason = "key_size"; + goto mismatch; + } + if (!!main_def->key_type_id != !!extra_def->key_type_id) { + reason = "key type"; + goto mismatch; + } + if ((main_def->parts & MAP_DEF_KEY_TYPE) + && !glob_sym_btf_matches(sym_name, true /*exact*/, + main_btf, main_def->key_type_id, + extra_btf, extra_def->key_type_id)) { + reason = "key type"; + goto mismatch; + } + + /* validate value type/size match */ + if (main_def->value_size != extra_def->value_size) { + reason = "value_size"; + goto mismatch; + } + if (!!main_def->value_type_id != !!extra_def->value_type_id) { + reason = "value type"; + goto mismatch; + } + if ((main_def->parts & MAP_DEF_VALUE_TYPE) + && !glob_sym_btf_matches(sym_name, true /*exact*/, + main_btf, main_def->value_type_id, + extra_btf, extra_def->value_type_id)) { + reason = "key type"; + goto mismatch; + } + + if (main_def->max_entries != extra_def->max_entries) { + reason = "max_entries"; + goto mismatch; + } + if (main_def->map_flags != extra_def->map_flags) { + reason = "map_flags"; + goto mismatch; + } + if (main_def->numa_node != extra_def->numa_node) { + reason = "numa_node"; + goto mismatch; + } + if (main_def->pinning != extra_def->pinning) { + reason = "pinning"; + goto mismatch; + } + + if ((main_def->parts & MAP_DEF_INNER_MAP) != (extra_def->parts & MAP_DEF_INNER_MAP)) { + reason = "inner map"; + goto mismatch; + } - if (sym->st_shndx && sym->st_shndx < SHN_LORESERVE) { - src_sec = &obj->secs[sym->st_shndx]; - if (src_sec->skipped) + if (main_def->parts & MAP_DEF_INNER_MAP) { + char inner_map_name[128]; + + snprintf(inner_map_name, sizeof(inner_map_name), "%s.inner", sym_name); + + return map_defs_match(inner_map_name, + main_btf, main_inner_def, NULL, + extra_btf, extra_inner_def, NULL); + } + + return true; + +mismatch: + pr_warn("global '%s': map %s mismatch\n", sym_name, reason); + return false; +} + +static bool glob_map_defs_match(const char *sym_name, + struct bpf_linker *linker, struct glob_sym *glob_sym, + struct src_obj *obj, Elf64_Sym *sym, int btf_id) +{ + struct btf_map_def dst_def = {}, dst_inner_def = {}; + struct btf_map_def src_def = {}, src_inner_def = {}; + const struct btf_type *t; + int err; + + t = btf__type_by_id(obj->btf, btf_id); + if (!btf_is_var(t)) { + pr_warn("global '%s': invalid map definition type [%d]\n", sym_name, btf_id); + return false; + } + t = skip_mods_and_typedefs(obj->btf, t->type, NULL); + + err = parse_btf_map_def(sym_name, obj->btf, t, true /*strict*/, &src_def, &src_inner_def); + if (err) { + pr_warn("global '%s': invalid map definition\n", sym_name); + return false; + } + + /* re-parse existing map definition */ + t = btf__type_by_id(linker->btf, glob_sym->btf_id); + t = skip_mods_and_typedefs(linker->btf, t->type, NULL); + err = parse_btf_map_def(sym_name, linker->btf, t, true /*strict*/, &dst_def, &dst_inner_def); + if (err) { + /* this should not happen, because we already validated it */ + pr_warn("global '%s': invalid dst map definition\n", sym_name); + return false; + } + + /* Currently extern map definition has to be complete and match + * concrete map definition exactly. This restriction might be lifted + * in the future. + */ + return map_defs_match(sym_name, linker->btf, &dst_def, &dst_inner_def, + obj->btf, &src_def, &src_inner_def); +} + +static bool glob_syms_match(const char *sym_name, + struct bpf_linker *linker, struct glob_sym *glob_sym, + struct src_obj *obj, Elf64_Sym *sym, size_t sym_idx, int btf_id) +{ + const struct btf_type *src_t; + + /* if we are dealing with externs, BTF types describing both global + * and extern VARs/FUNCs should be completely present in all files + */ + if (!glob_sym->btf_id || !btf_id) { + pr_warn("BTF info is missing for global symbol '%s'\n", sym_name); + return false; + } + + src_t = btf__type_by_id(obj->btf, btf_id); + if (!btf_is_var(src_t) && !btf_is_func(src_t)) { + pr_warn("only extern variables and functions are supported, but got '%s' for '%s'\n", + btf_kind_str(src_t), sym_name); + return false; + } + + /* deal with .maps definitions specially */ + if (glob_sym->sec_id && strcmp(linker->secs[glob_sym->sec_id].sec_name, MAPS_ELF_SEC) == 0) + return glob_map_defs_match(sym_name, linker, glob_sym, obj, sym, btf_id); + + if (!glob_sym_btf_matches(sym_name, true /*exact*/, + linker->btf, glob_sym->btf_id, obj->btf, btf_id)) + return false; + + return true; +} + +static bool btf_is_non_static(const struct btf_type *t) +{ + return (btf_is_var(t) && btf_var(t)->linkage != BTF_VAR_STATIC) + || (btf_is_func(t) && btf_func_linkage(t) != BTF_FUNC_STATIC); +} + +static int find_glob_sym_btf(struct src_obj *obj, Elf64_Sym *sym, const char *sym_name, + int *out_btf_sec_id, int *out_btf_id) +{ + int i, j, n = btf__get_nr_types(obj->btf), m, btf_id = 0; + const struct btf_type *t; + const struct btf_var_secinfo *vi; + const char *name; + + for (i = 1; i <= n; i++) { + t = btf__type_by_id(obj->btf, i); + + /* some global and extern FUNCs and VARs might not be associated with any + * DATASEC, so try to detect them in the same pass + */ + if (btf_is_non_static(t)) { + name = btf__str_by_offset(obj->btf, t->name_off); + if (strcmp(name, sym_name) != 0) continue; - dst_sec = &linker->secs[src_sec->dst_id]; - /* allow only one STT_SECTION symbol per section */ - if (ELF64_ST_TYPE(sym->st_info) == STT_SECTION && dst_sec->sec_sym_idx) { - obj->sym_map[i] = dst_sec->sec_sym_idx; + /* remember and still try to find DATASEC */ + btf_id = i; + continue; + } + + if (!btf_is_datasec(t)) + continue; + + vi = btf_var_secinfos(t); + for (j = 0, m = btf_vlen(t); j < m; j++, vi++) { + t = btf__type_by_id(obj->btf, vi->type); + name = btf__str_by_offset(obj->btf, t->name_off); + + if (strcmp(name, sym_name) != 0) + continue; + if (btf_is_var(t) && btf_var(t)->linkage == BTF_VAR_STATIC) continue; + if (btf_is_func(t) && btf_func_linkage(t) == BTF_FUNC_STATIC) + continue; + + if (btf_id && btf_id != vi->type) { + pr_warn("global/extern '%s' BTF is ambiguous: both types #%d and #%u match\n", + sym_name, btf_id, vi->type); + return -EINVAL; } + + *out_btf_sec_id = i; + *out_btf_id = vi->type; + + return 0; } + } + + /* free-floating extern or global FUNC */ + if (btf_id) { + *out_btf_sec_id = 0; + *out_btf_id = btf_id; + return 0; + } - name_off = strset__add_str(linker->strtab_strs, sym_name); - if (name_off < 0) - return name_off; + pr_warn("failed to find BTF info for global/extern symbol '%s'\n", sym_name); + return -ENOENT; +} - dst_sym = add_new_sym(linker, &dst_sym_idx); - if (!dst_sym) - return -ENOMEM; +static struct src_sec *find_src_sec_by_name(struct src_obj *obj, const char *sec_name) +{ + struct src_sec *sec; + int i; - dst_sym->st_name = name_off; - dst_sym->st_info = sym->st_info; - dst_sym->st_other = sym->st_other; - dst_sym->st_shndx = src_sec ? dst_sec->sec_idx : sym->st_shndx; - dst_sym->st_value = (src_sec ? src_sec->dst_off : 0) + sym->st_value; - dst_sym->st_size = sym->st_size; + for (i = 1; i < obj->sec_cnt; i++) { + sec = &obj->secs[i]; + + if (strcmp(sec->sec_name, sec_name) == 0) + return sec; + } + + return NULL; +} + +static int complete_extern_btf_info(struct btf *dst_btf, int dst_id, + struct btf *src_btf, int src_id) +{ + struct btf_type *dst_t = btf_type_by_id(dst_btf, dst_id); + struct btf_type *src_t = btf_type_by_id(src_btf, src_id); + struct btf_param *src_p, *dst_p; + const char *s; + int i, n, off; + + /* We already made sure that source and destination types (FUNC or + * VAR) match in terms of types and argument names. + */ + if (btf_is_var(dst_t)) { + btf_var(dst_t)->linkage = BTF_VAR_GLOBAL_ALLOCATED; + return 0; + } + + dst_t->info = btf_type_info(BTF_KIND_FUNC, BTF_FUNC_GLOBAL, 0); + + /* now onto FUNC_PROTO types */ + src_t = btf_type_by_id(src_btf, src_t->type); + dst_t = btf_type_by_id(dst_btf, dst_t->type); + + /* Fill in all the argument names, which for extern FUNCs are missing. + * We'll end up with two copies of FUNCs/VARs for externs, but that + * will be taken care of by BTF dedup at the very end. + * It might be that BTF types for extern in one file has less/more BTF + * information (e.g., FWD instead of full STRUCT/UNION information), + * but that should be (in most cases, subject to BTF dedup rules) + * handled and resolved by BTF dedup algorithm as well, so we won't + * worry about it. Our only job is to make sure that argument names + * are populated on both sides, otherwise BTF dedup will pedantically + * consider them different. + */ + src_p = btf_params(src_t); + dst_p = btf_params(dst_t); + for (i = 0, n = btf_vlen(dst_t); i < n; i++, src_p++, dst_p++) { + if (!src_p->name_off) + continue; + + /* src_btf has more complete info, so add name to dst_btf */ + s = btf__str_by_offset(src_btf, src_p->name_off); + off = btf__add_str(dst_btf, s); + if (off < 0) + return off; + dst_p->name_off = off; + } + return 0; +} + +static void sym_update_bind(Elf64_Sym *sym, int sym_bind) +{ + sym->st_info = ELF64_ST_INFO(sym_bind, ELF64_ST_TYPE(sym->st_info)); +} + +static void sym_update_type(Elf64_Sym *sym, int sym_type) +{ + sym->st_info = ELF64_ST_INFO(ELF64_ST_BIND(sym->st_info), sym_type); +} + +static void sym_update_visibility(Elf64_Sym *sym, int sym_vis) +{ + /* libelf doesn't provide setters for ST_VISIBILITY, + * but it is stored in the lower 2 bits of st_other + */ + sym->st_other &= 0x03; + sym->st_other |= sym_vis; +} + +static int linker_append_elf_sym(struct bpf_linker *linker, struct src_obj *obj, + Elf64_Sym *sym, const char *sym_name, int src_sym_idx) +{ + struct src_sec *src_sec = NULL; + struct dst_sec *dst_sec = NULL; + struct glob_sym *glob_sym = NULL; + int name_off, sym_type, sym_bind, sym_vis, err; + int btf_sec_id = 0, btf_id = 0; + size_t dst_sym_idx; + Elf64_Sym *dst_sym; + bool sym_is_extern; + + sym_type = ELF64_ST_TYPE(sym->st_info); + sym_bind = ELF64_ST_BIND(sym->st_info); + sym_vis = ELF64_ST_VISIBILITY(sym->st_other); + sym_is_extern = sym->st_shndx == SHN_UNDEF; + + if (sym_is_extern) { + if (!obj->btf) { + pr_warn("externs without BTF info are not supported\n"); + return -ENOTSUP; + } + } else if (sym->st_shndx < SHN_LORESERVE) { + src_sec = &obj->secs[sym->st_shndx]; + if (src_sec->skipped) + return 0; + dst_sec = &linker->secs[src_sec->dst_id]; + + /* allow only one STT_SECTION symbol per section */ + if (sym_type == STT_SECTION && dst_sec->sec_sym_idx) { + obj->sym_map[src_sym_idx] = dst_sec->sec_sym_idx; + return 0; + } + } + + if (sym_bind == STB_LOCAL) + goto add_sym; + + /* find matching BTF info */ + err = find_glob_sym_btf(obj, sym, sym_name, &btf_sec_id, &btf_id); + if (err) + return err; + + if (sym_is_extern && btf_sec_id) { + const char *sec_name = NULL; + const struct btf_type *t; + + t = btf__type_by_id(obj->btf, btf_sec_id); + sec_name = btf__str_by_offset(obj->btf, t->name_off); + + /* Clang puts unannotated extern vars into + * '.extern' BTF DATASEC. Treat them the same + * as unannotated extern funcs (which are + * currently not put into any DATASECs). + * Those don't have associated src_sec/dst_sec. + */ + if (strcmp(sec_name, BTF_EXTERN_SEC) != 0) { + src_sec = find_src_sec_by_name(obj, sec_name); + if (!src_sec) { + pr_warn("failed to find matching ELF sec '%s'\n", sec_name); + return -ENOENT; + } + dst_sec = &linker->secs[src_sec->dst_id]; + } + } + + glob_sym = find_glob_sym(linker, sym_name); + if (glob_sym) { + /* Preventively resolve to existing symbol. This is + * needed for further relocation symbol remapping in + * the next step of linking. + */ + obj->sym_map[src_sym_idx] = glob_sym->sym_idx; + + /* If both symbols are non-externs, at least one of + * them has to be STB_WEAK, otherwise they are in + * a conflict with each other. + */ + if (!sym_is_extern && !glob_sym->is_extern + && !glob_sym->is_weak && sym_bind != STB_WEAK) { + pr_warn("conflicting non-weak symbol #%d (%s) definition in '%s'\n", + src_sym_idx, sym_name, obj->filename); + return -EINVAL; + } - obj->sym_map[i] = dst_sym_idx; + if (!glob_syms_match(sym_name, linker, glob_sym, obj, sym, src_sym_idx, btf_id)) + return -EINVAL; + + dst_sym = get_sym_by_idx(linker, glob_sym->sym_idx); - if (ELF64_ST_TYPE(sym->st_info) == STT_SECTION && dst_sym) { - dst_sec->sec_sym_idx = dst_sym_idx; - dst_sym->st_value = 0; + /* If new symbol is strong, then force dst_sym to be strong as + * well; this way a mix of weak and non-weak extern + * definitions will end up being strong. + */ + if (sym_bind == STB_GLOBAL) { + /* We still need to preserve type (NOTYPE or + * OBJECT/FUNC, depending on whether the symbol is + * extern or not) + */ + sym_update_bind(dst_sym, STB_GLOBAL); + glob_sym->is_weak = false; } + /* Non-default visibility is "contaminating", with stricter + * visibility overwriting more permissive ones, even if more + * permissive visibility comes from just an extern definition. + * Currently only STV_DEFAULT and STV_HIDDEN are allowed and + * ensured by ELF symbol sanity checks above. + */ + if (sym_vis > ELF64_ST_VISIBILITY(dst_sym->st_other)) + sym_update_visibility(dst_sym, sym_vis); + + /* If the new symbol is extern, then regardless if + * existing symbol is extern or resolved global, just + * keep the existing one untouched. + */ + if (sym_is_extern) + return 0; + + /* If existing symbol is a strong resolved symbol, bail out, + * because we lost resolution battle have nothing to + * contribute. We already checked abover that there is no + * strong-strong conflict. We also already tightened binding + * and visibility, so nothing else to contribute at that point. + */ + if (!glob_sym->is_extern && sym_bind == STB_WEAK) + return 0; + + /* At this point, new symbol is strong non-extern, + * so overwrite glob_sym with new symbol information. + * Preserve binding and visibility. + */ + sym_update_type(dst_sym, sym_type); + dst_sym->st_shndx = dst_sec->sec_idx; + dst_sym->st_value = src_sec->dst_off + sym->st_value; + dst_sym->st_size = sym->st_size; + + /* see comment below about dst_sec->id vs dst_sec->sec_idx */ + glob_sym->sec_id = dst_sec->id; + glob_sym->is_extern = false; + + if (complete_extern_btf_info(linker->btf, glob_sym->btf_id, + obj->btf, btf_id)) + return -EINVAL; + + /* request updating VAR's/FUNC's underlying BTF type when appending BTF type */ + glob_sym->underlying_btf_id = 0; + + obj->sym_map[src_sym_idx] = glob_sym->sym_idx; + return 0; + } + +add_sym: + name_off = strset__add_str(linker->strtab_strs, sym_name); + if (name_off < 0) + return name_off; + + dst_sym = add_new_sym(linker, &dst_sym_idx); + if (!dst_sym) + return -ENOMEM; + + dst_sym->st_name = name_off; + dst_sym->st_info = sym->st_info; + dst_sym->st_other = sym->st_other; + dst_sym->st_shndx = dst_sec ? dst_sec->sec_idx : sym->st_shndx; + dst_sym->st_value = (src_sec ? src_sec->dst_off : 0) + sym->st_value; + dst_sym->st_size = sym->st_size; + + obj->sym_map[src_sym_idx] = dst_sym_idx; + + if (sym_type == STT_SECTION && dst_sym) { + dst_sec->sec_sym_idx = dst_sym_idx; + dst_sym->st_value = 0; + } + + if (sym_bind != STB_LOCAL) { + glob_sym = add_glob_sym(linker); + if (!glob_sym) + return -ENOMEM; + + glob_sym->sym_idx = dst_sym_idx; + /* we use dst_sec->id (and not dst_sec->sec_idx), because + * ephemeral sections (.kconfig, .ksyms, etc) don't have + * sec_idx (as they don't have corresponding ELF section), but + * still have id. .extern doesn't have even ephemeral section + * associated with it, so dst_sec->id == dst_sec->sec_idx == 0. + */ + glob_sym->sec_id = dst_sec ? dst_sec->id : 0; + glob_sym->name_off = name_off; + /* we will fill btf_id in during BTF merging step */ + glob_sym->btf_id = 0; + glob_sym->is_extern = sym_is_extern; + glob_sym->is_weak = sym_bind == STB_WEAK; } return 0; @@ -1200,7 +2014,7 @@ static int linker_append_elf_relos(struct bpf_linker *linker, struct src_obj *ob return err; } } else if (!secs_match(dst_sec, src_sec)) { - pr_warn("Secs %s are not compatible\n", src_sec->sec_name); + pr_warn("sections %s are not compatible\n", src_sec->sec_name); return -1; } @@ -1212,7 +2026,7 @@ static int linker_append_elf_relos(struct bpf_linker *linker, struct src_obj *ob dst_sec->shdr->sh_info = dst_linked_sec->sec_idx; src_sec->dst_id = dst_sec->id; - err = extend_sec(dst_sec, src_sec); + err = extend_sec(linker, dst_sec, src_sec); if (err) return err; @@ -1265,21 +2079,6 @@ static int linker_append_elf_relos(struct bpf_linker *linker, struct src_obj *ob return 0; } -static struct src_sec *find_src_sec_by_name(struct src_obj *obj, const char *sec_name) -{ - struct src_sec *sec; - int i; - - for (i = 1; i < obj->sec_cnt; i++) { - sec = &obj->secs[i]; - - if (strcmp(sec->sec_name, sec_name) == 0) - return sec; - } - - return NULL; -} - static Elf64_Sym *find_sym_by_name(struct src_obj *obj, size_t sec_idx, int sym_type, const char *sym_name) { @@ -1334,12 +2133,32 @@ static int linker_fixup_btf(struct src_obj *obj) t->size = sec->shdr->sh_size; } else { /* BTF can have some sections that are not represented - * in ELF, e.g., .kconfig and .ksyms, which are used - * for special extern variables. Here we'll - * pre-create "section shells" for them to be able to - * keep track of extra per-section metadata later - * (e.g., BTF variables). + * in ELF, e.g., .kconfig, .ksyms, .extern, which are used + * for special extern variables. + * + * For all but one such special (ephemeral) + * sections, we pre-create "section shells" to be able + * to keep track of extra per-section metadata later + * (e.g., those BTF extern variables). + * + * .extern is even more special, though, because it + * contains extern variables that need to be resolved + * by static linker, not libbpf and kernel. When such + * externs are resolved, we are going to remove them + * from .extern BTF section and might end up not + * needing it at all. Each resolved extern should have + * matching non-extern VAR/FUNC in other sections. + * + * We do support leaving some of the externs + * unresolved, though, to support cases of building + * libraries, which will later be linked against final + * BPF applications. So if at finalization we still + * see unresolved externs, we'll create .extern + * section on our own. */ + if (strcmp(sec_name, BTF_EXTERN_SEC) == 0) + continue; + sec = add_src_sec(obj, sec_name); if (!sec) return -ENOMEM; @@ -1379,6 +2198,13 @@ static int linker_fixup_btf(struct src_obj *obj) static int remap_type_id(__u32 *type_id, void *ctx) { int *id_map = ctx; + int new_id = id_map[*type_id]; + + /* Error out if the type wasn't remapped. Ignore VOID which stays VOID. */ + if (new_id == 0 && *type_id != 0) { + pr_warn("failed to find new ID mapping for original BTF type ID %u\n", *type_id); + return -EINVAL; + } *type_id = id_map[*type_id]; @@ -1389,6 +2215,7 @@ static int linker_append_btf(struct bpf_linker *linker, struct src_obj *obj) { const struct btf_type *t; int i, j, n, start_id, id; + const char *name; if (!obj->btf) return 0; @@ -1401,12 +2228,44 @@ static int linker_append_btf(struct bpf_linker *linker, struct src_obj *obj) return -ENOMEM; for (i = 1; i <= n; i++) { + struct glob_sym *glob_sym = NULL; + t = btf__type_by_id(obj->btf, i); /* DATASECs are handled specially below */ if (btf_kind(t) == BTF_KIND_DATASEC) continue; + if (btf_is_non_static(t)) { + /* there should be glob_sym already */ + name = btf__str_by_offset(obj->btf, t->name_off); + glob_sym = find_glob_sym(linker, name); + + /* VARs without corresponding glob_sym are those that + * belong to skipped/deduplicated sections (i.e., + * license and version), so just skip them + */ + if (!glob_sym) + continue; + + /* linker_append_elf_sym() might have requested + * updating underlying type ID, if extern was resolved + * to strong symbol or weak got upgraded to non-weak + */ + if (glob_sym->underlying_btf_id == 0) + glob_sym->underlying_btf_id = -t->type; + + /* globals from previous object files that match our + * VAR/FUNC already have a corresponding associated + * BTF type, so just make sure to use it + */ + if (glob_sym->btf_id) { + /* reuse existing BTF type for global var/func */ + obj->btf_type_map[i] = glob_sym->btf_id; + continue; + } + } + id = btf__add_type(linker->btf, obj->btf, t); if (id < 0) { pr_warn("failed to append BTF type #%d from file '%s'\n", i, obj->filename); @@ -1414,6 +2273,12 @@ static int linker_append_btf(struct bpf_linker *linker, struct src_obj *obj) } obj->btf_type_map[i] = id; + + /* record just appended BTF type for var/func */ + if (glob_sym) { + glob_sym->btf_id = id; + glob_sym->underlying_btf_id = -t->type; + } } /* remap all the types except DATASECs */ @@ -1425,6 +2290,22 @@ static int linker_append_btf(struct bpf_linker *linker, struct src_obj *obj) return -EINVAL; } + /* Rewrite VAR/FUNC underlying types (i.e., FUNC's FUNC_PROTO and VAR's + * actual type), if necessary + */ + for (i = 0; i < linker->glob_sym_cnt; i++) { + struct glob_sym *glob_sym = &linker->glob_syms[i]; + struct btf_type *glob_t; + + if (glob_sym->underlying_btf_id >= 0) + continue; + + glob_sym->underlying_btf_id = obj->btf_type_map[-glob_sym->underlying_btf_id]; + + glob_t = btf_type_by_id(linker->btf, glob_sym->btf_id); + glob_t->type = glob_sym->underlying_btf_id; + } + /* append DATASEC info */ for (i = 1; i < obj->sec_cnt; i++) { struct src_sec *src_sec; @@ -1452,6 +2333,42 @@ static int linker_append_btf(struct bpf_linker *linker, struct src_obj *obj) n = btf_vlen(t); for (j = 0; j < n; j++, src_var++) { void *sec_vars = dst_sec->sec_vars; + int new_id = obj->btf_type_map[src_var->type]; + struct glob_sym *glob_sym = NULL; + + t = btf_type_by_id(linker->btf, new_id); + if (btf_is_non_static(t)) { + name = btf__str_by_offset(linker->btf, t->name_off); + glob_sym = find_glob_sym(linker, name); + if (glob_sym->sec_id != dst_sec->id) { + pr_warn("global '%s': section mismatch %d vs %d\n", + name, glob_sym->sec_id, dst_sec->id); + return -EINVAL; + } + } + + /* If there is already a member (VAR or FUNC) mapped + * to the same type, don't add a duplicate entry. + * This will happen when multiple object files define + * the same extern VARs/FUNCs. + */ + if (glob_sym && glob_sym->var_idx >= 0) { + __s64 sz; + + dst_var = &dst_sec->sec_vars[glob_sym->var_idx]; + /* Because underlying BTF type might have + * changed, so might its size have changed, so + * re-calculate and update it in sec_var. + */ + sz = btf__resolve_size(linker->btf, glob_sym->underlying_btf_id); + if (sz < 0) { + pr_warn("global '%s': failed to resolve size of underlying type: %d\n", + name, (int)sz); + return -EINVAL; + } + dst_var->size = sz; + continue; + } sec_vars = libbpf_reallocarray(sec_vars, dst_sec->sec_var_cnt + 1, @@ -1466,6 +2383,9 @@ static int linker_append_btf(struct bpf_linker *linker, struct src_obj *obj) dst_var->type = obj->btf_type_map[src_var->type]; dst_var->size = src_var->size; dst_var->offset = src_sec->dst_off + src_var->offset; + + if (glob_sym) + glob_sym->var_idx = dst_sec->sec_var_cnt - 1; } } @@ -1895,7 +2815,7 @@ static int finalize_btf_ext(struct bpf_linker *linker) hdr->func_info_len = funcs_sz; hdr->line_info_off = funcs_sz; hdr->line_info_len = lines_sz; - hdr->core_relo_off = funcs_sz + lines_sz;; + hdr->core_relo_off = funcs_sz + lines_sz; hdr->core_relo_len = core_relos_sz; if (funcs_sz) { diff --git a/tools/scripts/Makefile.include b/tools/scripts/Makefile.include index a402f32a145c..91130648d8e6 100644 --- a/tools/scripts/Makefile.include +++ b/tools/scripts/Makefile.include @@ -39,8 +39,6 @@ EXTRA_WARNINGS += -Wundef EXTRA_WARNINGS += -Wwrite-strings EXTRA_WARNINGS += -Wformat -CC_NO_CLANG := $(shell $(CC) -dM -E -x c /dev/null | grep -Fq "__clang__"; echo $$?) - # Makefiles suck: This macro sets a default value of $(2) for the # variable named by $(1), unless the variable has been set by # environment or command line. This is necessary for CC and AR @@ -52,12 +50,22 @@ define allow-override $(eval $(1) = $(2))) endef +ifneq ($(LLVM),) +$(call allow-override,CC,clang) +$(call allow-override,AR,llvm-ar) +$(call allow-override,LD,ld.lld) +$(call allow-override,CXX,clang++) +$(call allow-override,STRIP,llvm-strip) +else # Allow setting various cross-compile vars or setting CROSS_COMPILE as a prefix. $(call allow-override,CC,$(CROSS_COMPILE)gcc) $(call allow-override,AR,$(CROSS_COMPILE)ar) $(call allow-override,LD,$(CROSS_COMPILE)ld) $(call allow-override,CXX,$(CROSS_COMPILE)g++) $(call allow-override,STRIP,$(CROSS_COMPILE)strip) +endif + +CC_NO_CLANG := $(shell $(CC) -dM -E -x c /dev/null | grep -Fq "__clang__"; echo $$?) ifneq ($(LLVM),) HOSTAR ?= llvm-ar diff --git a/tools/testing/selftests/bpf/Makefile b/tools/testing/selftests/bpf/Makefile index 6448c626498f..283e5ad8385e 100644 --- a/tools/testing/selftests/bpf/Makefile +++ b/tools/testing/selftests/bpf/Makefile @@ -21,13 +21,18 @@ endif BPF_GCC ?= $(shell command -v bpf-gcc;) SAN_CFLAGS ?= -CFLAGS += -g -Og -rdynamic -Wall $(GENFLAGS) $(SAN_CFLAGS) \ +CFLAGS += -g -O0 -rdynamic -Wall $(GENFLAGS) $(SAN_CFLAGS) \ -I$(CURDIR) -I$(INCLUDE_DIR) -I$(GENDIR) -I$(LIBDIR) \ -I$(TOOLSINCDIR) -I$(APIDIR) -I$(OUTPUT) \ -Dbpf_prog_load=bpf_prog_test_load \ -Dbpf_load_program=bpf_test_load_program LDLIBS += -lcap -lelf -lz -lrt -lpthread +# Silence some warnings when compiled with clang +ifneq ($(LLVM),) +CFLAGS += -Wno-unused-command-line-argument +endif + # Order correspond to 'make run_tests' order TEST_GEN_PROGS = test_verifier test_tag test_maps test_lru_map test_lpm_map test_progs \ test_verifier_log test_dev_cgroup \ @@ -182,7 +187,6 @@ $(OUTPUT)/runqslower: $(BPFOBJ) | $(DEFAULT_BPFTOOL) cp $(SCRATCH_DIR)/runqslower $@ $(TEST_GEN_PROGS) $(TEST_GEN_PROGS_EXTENDED): $(OUTPUT)/test_stub.o $(BPFOBJ) -$(TEST_GEN_FILES): docs $(OUTPUT)/test_dev_cgroup: cgroup_helpers.c $(OUTPUT)/test_skb_cgroup_id_user: cgroup_helpers.c @@ -201,10 +205,12 @@ $(DEFAULT_BPFTOOL): $(wildcard $(BPFTOOLDIR)/*.[ch] $(BPFTOOLDIR)/Makefile) \ $(HOST_BPFOBJ) | $(HOST_BUILD_DIR)/bpftool $(Q)$(MAKE) $(submake_extras) -C $(BPFTOOLDIR) \ CC=$(HOSTCC) LD=$(HOSTLD) \ - EXTRA_CFLAGS='-g -Og' \ + EXTRA_CFLAGS='-g -O0' \ OUTPUT=$(HOST_BUILD_DIR)/bpftool/ \ prefix= DESTDIR=$(HOST_SCRATCH_DIR)/ install +all: docs + docs: $(Q)RST2MAN_OPTS="--exit-status=1" $(MAKE) $(submake_extras) \ -f Makefile.docs \ @@ -219,7 +225,7 @@ $(BPFOBJ): $(wildcard $(BPFDIR)/*.[ch] $(BPFDIR)/Makefile) \ ../../../include/uapi/linux/bpf.h \ | $(INCLUDE_DIR) $(BUILD_DIR)/libbpf $(Q)$(MAKE) $(submake_extras) -C $(BPFDIR) OUTPUT=$(BUILD_DIR)/libbpf/ \ - EXTRA_CFLAGS='-g -Og' \ + EXTRA_CFLAGS='-g -O0' \ DESTDIR=$(SCRATCH_DIR) prefix= all install_headers ifneq ($(BPFOBJ),$(HOST_BPFOBJ)) @@ -227,7 +233,7 @@ $(HOST_BPFOBJ): $(wildcard $(BPFDIR)/*.[ch] $(BPFDIR)/Makefile) \ ../../../include/uapi/linux/bpf.h \ | $(INCLUDE_DIR) $(HOST_BUILD_DIR)/libbpf $(Q)$(MAKE) $(submake_extras) -C $(BPFDIR) \ - EXTRA_CFLAGS='-g -Og' \ + EXTRA_CFLAGS='-g -O0' \ OUTPUT=$(HOST_BUILD_DIR)/libbpf/ CC=$(HOSTCC) LD=$(HOSTLD) \ DESTDIR=$(HOST_SCRATCH_DIR)/ prefix= all install_headers endif @@ -303,9 +309,15 @@ endef SKEL_BLACKLIST := btf__% test_pinning_invalid.c test_sk_assign.c -LINKED_SKELS := test_static_linked.skel.h +LINKED_SKELS := test_static_linked.skel.h linked_funcs.skel.h \ + linked_vars.skel.h linked_maps.skel.h test_static_linked.skel.h-deps := test_static_linked1.o test_static_linked2.o +linked_funcs.skel.h-deps := linked_funcs1.o linked_funcs2.o +linked_vars.skel.h-deps := linked_vars1.o linked_vars2.o +linked_maps.skel.h-deps := linked_maps1.o linked_maps2.o + +LINKED_BPF_SRCS := $(patsubst %.o,%.c,$(foreach skel,$(LINKED_SKELS),$($(skel)-deps))) # Set up extra TRUNNER_XXX "temporary" variables in the environment (relies on # $eval()) and pass control to DEFINE_TEST_RUNNER_RULES. @@ -325,7 +337,7 @@ TRUNNER_TESTS_HDR := $(TRUNNER_TESTS_DIR)/tests.h TRUNNER_BPF_SRCS := $$(notdir $$(wildcard $(TRUNNER_BPF_PROGS_DIR)/*.c)) TRUNNER_BPF_OBJS := $$(patsubst %.c,$$(TRUNNER_OUTPUT)/%.o, $$(TRUNNER_BPF_SRCS)) TRUNNER_BPF_SKELS := $$(patsubst %.c,$$(TRUNNER_OUTPUT)/%.skel.h, \ - $$(filter-out $(SKEL_BLACKLIST), \ + $$(filter-out $(SKEL_BLACKLIST) $(LINKED_BPF_SRCS),\ $$(TRUNNER_BPF_SRCS))) TRUNNER_BPF_SKELS_LINKED := $$(addprefix $$(TRUNNER_OUTPUT)/,$(LINKED_SKELS)) TEST_GEN_FILES += $$(TRUNNER_BPF_OBJS) @@ -481,7 +493,7 @@ $(OUTPUT)/test_verifier: test_verifier.c verifier/tests.h $(BPFOBJ) | $(OUTPUT) # Make sure we are able to include and link libbpf against c++. $(OUTPUT)/test_cpp: test_cpp.cpp $(OUTPUT)/test_core_extern.skel.h $(BPFOBJ) $(call msg,CXX,,$@) - $(Q)$(CXX) $(CFLAGS) $^ $(LDLIBS) -o $@ + $(Q)$(CXX) $(CFLAGS) $(filter %.a %.o %.cpp,$^) $(LDLIBS) -o $@ # Benchmark runner $(OUTPUT)/bench_%.o: benchs/bench_%.c bench.h diff --git a/tools/testing/selftests/bpf/README.rst b/tools/testing/selftests/bpf/README.rst index 65fe318d1e71..3353778c30f8 100644 --- a/tools/testing/selftests/bpf/README.rst +++ b/tools/testing/selftests/bpf/README.rst @@ -193,3 +193,12 @@ Without it, the error from compiling bpf selftests looks like: libbpf: failed to find BTF for extern 'tcp_slow_start' [25] section: -2 __ https://reviews.llvm.org/D93563 + +Clang dependencies for static linking tests +=========================================== + +linked_vars, linked_maps, and linked_funcs tests depend on `Clang fix`__ to +generate valid BTF information for weak variables. Please make sure you use +Clang that contains the fix. + +__ https://reviews.llvm.org/D100362 diff --git a/tools/testing/selftests/bpf/config b/tools/testing/selftests/bpf/config index 37e1f303fc11..5192305159ec 100644 --- a/tools/testing/selftests/bpf/config +++ b/tools/testing/selftests/bpf/config @@ -44,3 +44,5 @@ CONFIG_SECURITYFS=y CONFIG_IMA_WRITE_POLICY=y CONFIG_IMA_READ_POLICY=y CONFIG_BLK_DEV_LOOP=y +CONFIG_FUNCTION_TRACER=y +CONFIG_DYNAMIC_FTRACE=y diff --git a/tools/testing/selftests/bpf/prog_tests/bpf_iter.c b/tools/testing/selftests/bpf/prog_tests/bpf_iter.c index 74c45d557a2b..2d3590cfb5e1 100644 --- a/tools/testing/selftests/bpf/prog_tests/bpf_iter.c +++ b/tools/testing/selftests/bpf/prog_tests/bpf_iter.c @@ -147,6 +147,7 @@ static void test_task_stack(void) return; do_dummy_read(skel->progs.dump_task_stack); + do_dummy_read(skel->progs.get_task_user_stacks); bpf_iter_task_stack__destroy(skel); } diff --git a/tools/testing/selftests/bpf/prog_tests/fexit_bpf2bpf.c b/tools/testing/selftests/bpf/prog_tests/fexit_bpf2bpf.c index 5c0448910426..63990842d20f 100644 --- a/tools/testing/selftests/bpf/prog_tests/fexit_bpf2bpf.c +++ b/tools/testing/selftests/bpf/prog_tests/fexit_bpf2bpf.c @@ -58,42 +58,73 @@ static void test_fexit_bpf2bpf_common(const char *obj_file, test_cb cb) { struct bpf_object *obj = NULL, *tgt_obj; + __u32 retval, tgt_prog_id, info_len; + struct bpf_prog_info prog_info = {}; struct bpf_program **prog = NULL; struct bpf_link **link = NULL; - __u32 duration = 0, retval; int err, tgt_fd, i; + struct btf *btf; err = bpf_prog_load(target_obj_file, BPF_PROG_TYPE_UNSPEC, &tgt_obj, &tgt_fd); - if (CHECK(err, "tgt_prog_load", "file %s err %d errno %d\n", - target_obj_file, err, errno)) + if (!ASSERT_OK(err, "tgt_prog_load")) return; DECLARE_LIBBPF_OPTS(bpf_object_open_opts, opts, .attach_prog_fd = tgt_fd, ); + info_len = sizeof(prog_info); + err = bpf_obj_get_info_by_fd(tgt_fd, &prog_info, &info_len); + if (!ASSERT_OK(err, "tgt_fd_get_info")) + goto close_prog; + + tgt_prog_id = prog_info.id; + btf = bpf_object__btf(tgt_obj); + link = calloc(sizeof(struct bpf_link *), prog_cnt); + if (!ASSERT_OK_PTR(link, "link_ptr")) + goto close_prog; + prog = calloc(sizeof(struct bpf_program *), prog_cnt); - if (CHECK(!link || !prog, "alloc_memory", "failed to alloc memory")) + if (!ASSERT_OK_PTR(prog, "prog_ptr")) goto close_prog; obj = bpf_object__open_file(obj_file, &opts); - if (CHECK(IS_ERR_OR_NULL(obj), "obj_open", - "failed to open %s: %ld\n", obj_file, - PTR_ERR(obj))) + if (!ASSERT_OK_PTR(obj, "obj_open")) goto close_prog; err = bpf_object__load(obj); - if (CHECK(err, "obj_load", "err %d\n", err)) + if (!ASSERT_OK(err, "obj_load")) goto close_prog; for (i = 0; i < prog_cnt; i++) { + struct bpf_link_info link_info; + char *tgt_name; + __s32 btf_id; + + tgt_name = strstr(prog_name[i], "/"); + if (!ASSERT_OK_PTR(tgt_name, "tgt_name")) + goto close_prog; + btf_id = btf__find_by_name_kind(btf, tgt_name + 1, BTF_KIND_FUNC); + prog[i] = bpf_object__find_program_by_title(obj, prog_name[i]); - if (CHECK(!prog[i], "find_prog", "prog %s not found\n", prog_name[i])) + if (!ASSERT_OK_PTR(prog[i], prog_name[i])) goto close_prog; + link[i] = bpf_program__attach_trace(prog[i]); - if (CHECK(IS_ERR(link[i]), "attach_trace", "failed to link\n")) + if (!ASSERT_OK_PTR(link[i], "attach_trace")) goto close_prog; + + info_len = sizeof(link_info); + memset(&link_info, 0, sizeof(link_info)); + err = bpf_obj_get_info_by_fd(bpf_link__fd(link[i]), + &link_info, &info_len); + ASSERT_OK(err, "link_fd_get_info"); + ASSERT_EQ(link_info.tracing.attach_type, + bpf_program__get_expected_attach_type(prog[i]), + "link_attach_type"); + ASSERT_EQ(link_info.tracing.target_obj_id, tgt_prog_id, "link_tgt_obj_id"); + ASSERT_EQ(link_info.tracing.target_btf_id, btf_id, "link_tgt_btf_id"); } if (cb) { @@ -106,10 +137,9 @@ static void test_fexit_bpf2bpf_common(const char *obj_file, goto close_prog; err = bpf_prog_test_run(tgt_fd, 1, &pkt_v6, sizeof(pkt_v6), - NULL, NULL, &retval, &duration); - CHECK(err || retval, "ipv6", - "err %d errno %d retval %d duration %d\n", - err, errno, retval, duration); + NULL, NULL, &retval, NULL); + ASSERT_OK(err, "prog_run"); + ASSERT_EQ(retval, 0, "prog_run_ret"); if (check_data_map(obj, prog_cnt, false)) goto close_prog; diff --git a/tools/testing/selftests/bpf/prog_tests/fexit_sleep.c b/tools/testing/selftests/bpf/prog_tests/fexit_sleep.c index 6c4d42a2386f..ccc7e8a34ab6 100644 --- a/tools/testing/selftests/bpf/prog_tests/fexit_sleep.c +++ b/tools/testing/selftests/bpf/prog_tests/fexit_sleep.c @@ -39,7 +39,7 @@ void test_fexit_sleep(void) goto cleanup; cpid = clone(do_sleep, child_stack + STACK_SIZE, CLONE_FILES | SIGCHLD, fexit_skel); - if (CHECK(cpid == -1, "clone", strerror(errno))) + if (CHECK(cpid == -1, "clone", "%s\n", strerror(errno))) goto cleanup; /* wait until first sys_nanosleep ends and second sys_nanosleep starts */ @@ -65,7 +65,7 @@ void test_fexit_sleep(void) /* kill the thread to unwind sys_nanosleep stack through the trampoline */ kill(cpid, 9); - if (CHECK(waitpid(cpid, &wstatus, 0) == -1, "waitpid", strerror(errno))) + if (CHECK(waitpid(cpid, &wstatus, 0) == -1, "waitpid", "%s\n", strerror(errno))) goto cleanup; if (CHECK(WEXITSTATUS(wstatus) != 0, "exitstatus", "failed")) goto cleanup; diff --git a/tools/testing/selftests/bpf/prog_tests/linked_funcs.c b/tools/testing/selftests/bpf/prog_tests/linked_funcs.c new file mode 100644 index 000000000000..e9916f2817ec --- /dev/null +++ b/tools/testing/selftests/bpf/prog_tests/linked_funcs.c @@ -0,0 +1,42 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2021 Facebook */ + +#include <test_progs.h> +#include <sys/syscall.h> +#include "linked_funcs.skel.h" + +void test_linked_funcs(void) +{ + int err; + struct linked_funcs *skel; + + skel = linked_funcs__open(); + if (!ASSERT_OK_PTR(skel, "skel_open")) + return; + + skel->rodata->my_tid = syscall(SYS_gettid); + skel->bss->syscall_id = SYS_getpgid; + + err = linked_funcs__load(skel); + if (!ASSERT_OK(err, "skel_load")) + goto cleanup; + + err = linked_funcs__attach(skel); + if (!ASSERT_OK(err, "skel_attach")) + goto cleanup; + + /* trigger */ + syscall(SYS_getpgid); + + ASSERT_EQ(skel->bss->output_val1, 2000 + 2000, "output_val1"); + ASSERT_EQ(skel->bss->output_ctx1, SYS_getpgid, "output_ctx1"); + ASSERT_EQ(skel->bss->output_weak1, 42, "output_weak1"); + + ASSERT_EQ(skel->bss->output_val2, 2 * 1000 + 2 * (2 * 1000), "output_val2"); + ASSERT_EQ(skel->bss->output_ctx2, SYS_getpgid, "output_ctx2"); + /* output_weak2 should never be updated */ + ASSERT_EQ(skel->bss->output_weak2, 0, "output_weak2"); + +cleanup: + linked_funcs__destroy(skel); +} diff --git a/tools/testing/selftests/bpf/prog_tests/linked_maps.c b/tools/testing/selftests/bpf/prog_tests/linked_maps.c new file mode 100644 index 000000000000..85dcaaaf2775 --- /dev/null +++ b/tools/testing/selftests/bpf/prog_tests/linked_maps.c @@ -0,0 +1,30 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2021 Facebook */ + +#include <test_progs.h> +#include <sys/syscall.h> +#include "linked_maps.skel.h" + +void test_linked_maps(void) +{ + int err; + struct linked_maps *skel; + + skel = linked_maps__open_and_load(); + if (!ASSERT_OK_PTR(skel, "skel_open")) + return; + + err = linked_maps__attach(skel); + if (!ASSERT_OK(err, "skel_attach")) + goto cleanup; + + /* trigger */ + syscall(SYS_getpgid); + + ASSERT_EQ(skel->bss->output_first1, 2000, "output_first1"); + ASSERT_EQ(skel->bss->output_second1, 2, "output_second1"); + ASSERT_EQ(skel->bss->output_weak1, 2, "output_weak1"); + +cleanup: + linked_maps__destroy(skel); +} diff --git a/tools/testing/selftests/bpf/prog_tests/linked_vars.c b/tools/testing/selftests/bpf/prog_tests/linked_vars.c new file mode 100644 index 000000000000..267166abe4c1 --- /dev/null +++ b/tools/testing/selftests/bpf/prog_tests/linked_vars.c @@ -0,0 +1,43 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2021 Facebook */ + +#include <test_progs.h> +#include <sys/syscall.h> +#include "linked_vars.skel.h" + +void test_linked_vars(void) +{ + int err; + struct linked_vars *skel; + + skel = linked_vars__open(); + if (!ASSERT_OK_PTR(skel, "skel_open")) + return; + + skel->bss->input_bss1 = 1000; + skel->bss->input_bss2 = 2000; + skel->bss->input_bss_weak = 3000; + + err = linked_vars__load(skel); + if (!ASSERT_OK(err, "skel_load")) + goto cleanup; + + err = linked_vars__attach(skel); + if (!ASSERT_OK(err, "skel_attach")) + goto cleanup; + + /* trigger */ + syscall(SYS_getpgid); + + ASSERT_EQ(skel->bss->output_bss1, 1000 + 2000 + 3000, "output_bss1"); + ASSERT_EQ(skel->bss->output_bss2, 1000 + 2000 + 3000, "output_bss2"); + /* 10 comes from "winner" input_data_weak in first obj file */ + ASSERT_EQ(skel->bss->output_data1, 1 + 2 + 10, "output_bss1"); + ASSERT_EQ(skel->bss->output_data2, 1 + 2 + 10, "output_bss2"); + /* 100 comes from "winner" input_rodata_weak in first obj file */ + ASSERT_EQ(skel->bss->output_rodata1, 11 + 22 + 100, "output_weak1"); + ASSERT_EQ(skel->bss->output_rodata2, 11 + 22 + 100, "output_weak2"); + +cleanup: + linked_vars__destroy(skel); +} diff --git a/tools/testing/selftests/bpf/prog_tests/map_ptr.c b/tools/testing/selftests/bpf/prog_tests/map_ptr.c index c230a573c373..4972f92205c7 100644 --- a/tools/testing/selftests/bpf/prog_tests/map_ptr.c +++ b/tools/testing/selftests/bpf/prog_tests/map_ptr.c @@ -12,11 +12,22 @@ void test_map_ptr(void) __u32 duration = 0, retval; char buf[128]; int err; + int page_size = getpagesize(); - skel = map_ptr_kern__open_and_load(); - if (CHECK(!skel, "skel_open_load", "open_load failed\n")) + skel = map_ptr_kern__open(); + if (!ASSERT_OK_PTR(skel, "skel_open")) return; + err = bpf_map__set_max_entries(skel->maps.m_ringbuf, page_size); + if (!ASSERT_OK(err, "bpf_map__set_max_entries")) + goto cleanup; + + err = map_ptr_kern__load(skel); + if (!ASSERT_OK(err, "skel_load")) + goto cleanup; + + skel->bss->page_size = page_size; + err = bpf_prog_test_run(bpf_program__fd(skel->progs.cg_skb), 1, &pkt_v4, sizeof(pkt_v4), buf, NULL, &retval, NULL); diff --git a/tools/testing/selftests/bpf/prog_tests/mmap.c b/tools/testing/selftests/bpf/prog_tests/mmap.c index 9c3c5c0f068f..37b002ca1167 100644 --- a/tools/testing/selftests/bpf/prog_tests/mmap.c +++ b/tools/testing/selftests/bpf/prog_tests/mmap.c @@ -29,22 +29,36 @@ void test_mmap(void) struct test_mmap *skel; __u64 val = 0; - skel = test_mmap__open_and_load(); - if (CHECK(!skel, "skel_open_and_load", "skeleton open/load failed\n")) + skel = test_mmap__open(); + if (CHECK(!skel, "skel_open", "skeleton open failed\n")) return; + err = bpf_map__set_max_entries(skel->maps.rdonly_map, page_size); + if (CHECK(err != 0, "bpf_map__set_max_entries", "bpf_map__set_max_entries failed\n")) + goto cleanup; + + /* at least 4 pages of data */ + err = bpf_map__set_max_entries(skel->maps.data_map, + 4 * (page_size / sizeof(u64))); + if (CHECK(err != 0, "bpf_map__set_max_entries", "bpf_map__set_max_entries failed\n")) + goto cleanup; + + err = test_mmap__load(skel); + if (CHECK(err != 0, "skel_load", "skeleton load failed\n")) + goto cleanup; + bss_map = skel->maps.bss; data_map = skel->maps.data_map; data_map_fd = bpf_map__fd(data_map); rdmap_fd = bpf_map__fd(skel->maps.rdonly_map); - tmp1 = mmap(NULL, 4096, PROT_READ | PROT_WRITE, MAP_SHARED, rdmap_fd, 0); + tmp1 = mmap(NULL, page_size, PROT_READ | PROT_WRITE, MAP_SHARED, rdmap_fd, 0); if (CHECK(tmp1 != MAP_FAILED, "rdonly_write_mmap", "unexpected success\n")) { - munmap(tmp1, 4096); + munmap(tmp1, page_size); goto cleanup; } /* now double-check if it's mmap()'able at all */ - tmp1 = mmap(NULL, 4096, PROT_READ, MAP_SHARED, rdmap_fd, 0); + tmp1 = mmap(NULL, page_size, PROT_READ, MAP_SHARED, rdmap_fd, 0); if (CHECK(tmp1 == MAP_FAILED, "rdonly_read_mmap", "failed: %d\n", errno)) goto cleanup; diff --git a/tools/testing/selftests/bpf/prog_tests/ns_current_pid_tgid.c b/tools/testing/selftests/bpf/prog_tests/ns_current_pid_tgid.c index 31a3114906e2..2535788e135f 100644 --- a/tools/testing/selftests/bpf/prog_tests/ns_current_pid_tgid.c +++ b/tools/testing/selftests/bpf/prog_tests/ns_current_pid_tgid.c @@ -68,10 +68,10 @@ static void test_ns_current_pid_tgid_new_ns(void) cpid = clone(test_current_pid_tgid, child_stack + STACK_SIZE, CLONE_NEWPID | SIGCHLD, NULL); - if (CHECK(cpid == -1, "clone", strerror(errno))) + if (CHECK(cpid == -1, "clone", "%s\n", strerror(errno))) return; - if (CHECK(waitpid(cpid, &wstatus, 0) == -1, "waitpid", strerror(errno))) + if (CHECK(waitpid(cpid, &wstatus, 0) == -1, "waitpid", "%s\n", strerror(errno))) return; if (CHECK(WEXITSTATUS(wstatus) != 0, "newns_pidtgid", "failed")) diff --git a/tools/testing/selftests/bpf/prog_tests/ringbuf.c b/tools/testing/selftests/bpf/prog_tests/ringbuf.c index fddbc5db5d6a..de78617f6550 100644 --- a/tools/testing/selftests/bpf/prog_tests/ringbuf.c +++ b/tools/testing/selftests/bpf/prog_tests/ringbuf.c @@ -87,11 +87,20 @@ void test_ringbuf(void) pthread_t thread; long bg_ret = -1; int err, cnt; + int page_size = getpagesize(); - skel = test_ringbuf__open_and_load(); - if (CHECK(!skel, "skel_open_load", "skeleton open&load failed\n")) + skel = test_ringbuf__open(); + if (CHECK(!skel, "skel_open", "skeleton open failed\n")) return; + err = bpf_map__set_max_entries(skel->maps.ringbuf, page_size); + if (CHECK(err != 0, "bpf_map__set_max_entries", "bpf_map__set_max_entries failed\n")) + goto cleanup; + + err = test_ringbuf__load(skel); + if (CHECK(err != 0, "skel_load", "skeleton load failed\n")) + goto cleanup; + /* only trigger BPF program for current process */ skel->bss->pid = getpid(); @@ -110,9 +119,9 @@ void test_ringbuf(void) CHECK(skel->bss->avail_data != 3 * rec_sz, "err_avail_size", "exp %ld, got %ld\n", 3L * rec_sz, skel->bss->avail_data); - CHECK(skel->bss->ring_size != 4096, + CHECK(skel->bss->ring_size != page_size, "err_ring_size", "exp %ld, got %ld\n", - 4096L, skel->bss->ring_size); + (long)page_size, skel->bss->ring_size); CHECK(skel->bss->cons_pos != 0, "err_cons_pos", "exp %ld, got %ld\n", 0L, skel->bss->cons_pos); diff --git a/tools/testing/selftests/bpf/prog_tests/ringbuf_multi.c b/tools/testing/selftests/bpf/prog_tests/ringbuf_multi.c index d37161e59bb2..cef63e703924 100644 --- a/tools/testing/selftests/bpf/prog_tests/ringbuf_multi.c +++ b/tools/testing/selftests/bpf/prog_tests/ringbuf_multi.c @@ -41,13 +41,42 @@ static int process_sample(void *ctx, void *data, size_t len) void test_ringbuf_multi(void) { struct test_ringbuf_multi *skel; - struct ring_buffer *ringbuf; + struct ring_buffer *ringbuf = NULL; int err; + int page_size = getpagesize(); + int proto_fd = -1; - skel = test_ringbuf_multi__open_and_load(); - if (CHECK(!skel, "skel_open_load", "skeleton open&load failed\n")) + skel = test_ringbuf_multi__open(); + if (CHECK(!skel, "skel_open", "skeleton open failed\n")) return; + err = bpf_map__set_max_entries(skel->maps.ringbuf1, page_size); + if (CHECK(err != 0, "bpf_map__set_max_entries", "bpf_map__set_max_entries failed\n")) + goto cleanup; + + err = bpf_map__set_max_entries(skel->maps.ringbuf2, page_size); + if (CHECK(err != 0, "bpf_map__set_max_entries", "bpf_map__set_max_entries failed\n")) + goto cleanup; + + err = bpf_map__set_max_entries(bpf_map__inner_map(skel->maps.ringbuf_arr), page_size); + if (CHECK(err != 0, "bpf_map__set_max_entries", "bpf_map__set_max_entries failed\n")) + goto cleanup; + + proto_fd = bpf_create_map(BPF_MAP_TYPE_RINGBUF, 0, 0, page_size, 0); + if (CHECK(proto_fd == -1, "bpf_create_map", "bpf_create_map failed\n")) + goto cleanup; + + err = bpf_map__set_inner_map_fd(skel->maps.ringbuf_hash, proto_fd); + if (CHECK(err != 0, "bpf_map__set_inner_map_fd", "bpf_map__set_inner_map_fd failed\n")) + goto cleanup; + + err = test_ringbuf_multi__load(skel); + if (CHECK(err != 0, "skel_load", "skeleton load failed\n")) + goto cleanup; + + close(proto_fd); + proto_fd = -1; + /* only trigger BPF program for current process */ skel->bss->pid = getpid(); @@ -97,6 +126,8 @@ void test_ringbuf_multi(void) 2L, skel->bss->total); cleanup: + if (proto_fd >= 0) + close(proto_fd); ring_buffer__free(ringbuf); test_ringbuf_multi__destroy(skel); } diff --git a/tools/testing/selftests/bpf/prog_tests/snprintf.c b/tools/testing/selftests/bpf/prog_tests/snprintf.c new file mode 100644 index 000000000000..a958c22aec75 --- /dev/null +++ b/tools/testing/selftests/bpf/prog_tests/snprintf.c @@ -0,0 +1,125 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2021 Google LLC. */ + +#include <test_progs.h> +#include "test_snprintf.skel.h" +#include "test_snprintf_single.skel.h" + +#define EXP_NUM_OUT "-8 9 96 -424242 1337 DABBAD00" +#define EXP_NUM_RET sizeof(EXP_NUM_OUT) + +#define EXP_IP_OUT "127.000.000.001 0000:0000:0000:0000:0000:0000:0000:0001" +#define EXP_IP_RET sizeof(EXP_IP_OUT) + +/* The third specifier, %pB, depends on compiler inlining so don't check it */ +#define EXP_SYM_OUT "schedule schedule+0x0/" +#define MIN_SYM_RET sizeof(EXP_SYM_OUT) + +/* The third specifier, %p, is a hashed pointer which changes on every reboot */ +#define EXP_ADDR_OUT "0000000000000000 ffff00000add4e55 " +#define EXP_ADDR_RET sizeof(EXP_ADDR_OUT "unknownhashedptr") + +#define EXP_STR_OUT "str1 longstr" +#define EXP_STR_RET sizeof(EXP_STR_OUT) + +#define EXP_OVER_OUT "%over" +#define EXP_OVER_RET 10 + +#define EXP_PAD_OUT " 4 000" +#define EXP_PAD_RET 900007 + +#define EXP_NO_ARG_OUT "simple case" +#define EXP_NO_ARG_RET 12 + +#define EXP_NO_BUF_RET 29 + +void test_snprintf_positive(void) +{ + char exp_addr_out[] = EXP_ADDR_OUT; + char exp_sym_out[] = EXP_SYM_OUT; + struct test_snprintf *skel; + + skel = test_snprintf__open_and_load(); + if (!ASSERT_OK_PTR(skel, "skel_open")) + return; + + if (!ASSERT_OK(test_snprintf__attach(skel), "skel_attach")) + goto cleanup; + + /* trigger tracepoint */ + usleep(1); + + ASSERT_STREQ(skel->bss->num_out, EXP_NUM_OUT, "num_out"); + ASSERT_EQ(skel->bss->num_ret, EXP_NUM_RET, "num_ret"); + + ASSERT_STREQ(skel->bss->ip_out, EXP_IP_OUT, "ip_out"); + ASSERT_EQ(skel->bss->ip_ret, EXP_IP_RET, "ip_ret"); + + ASSERT_OK(memcmp(skel->bss->sym_out, exp_sym_out, + sizeof(exp_sym_out) - 1), "sym_out"); + ASSERT_LT(MIN_SYM_RET, skel->bss->sym_ret, "sym_ret"); + + ASSERT_OK(memcmp(skel->bss->addr_out, exp_addr_out, + sizeof(exp_addr_out) - 1), "addr_out"); + ASSERT_EQ(skel->bss->addr_ret, EXP_ADDR_RET, "addr_ret"); + + ASSERT_STREQ(skel->bss->str_out, EXP_STR_OUT, "str_out"); + ASSERT_EQ(skel->bss->str_ret, EXP_STR_RET, "str_ret"); + + ASSERT_STREQ(skel->bss->over_out, EXP_OVER_OUT, "over_out"); + ASSERT_EQ(skel->bss->over_ret, EXP_OVER_RET, "over_ret"); + + ASSERT_STREQ(skel->bss->pad_out, EXP_PAD_OUT, "pad_out"); + ASSERT_EQ(skel->bss->pad_ret, EXP_PAD_RET, "pad_ret"); + + ASSERT_STREQ(skel->bss->noarg_out, EXP_NO_ARG_OUT, "no_arg_out"); + ASSERT_EQ(skel->bss->noarg_ret, EXP_NO_ARG_RET, "no_arg_ret"); + + ASSERT_EQ(skel->bss->nobuf_ret, EXP_NO_BUF_RET, "no_buf_ret"); + +cleanup: + test_snprintf__destroy(skel); +} + +#define min(a, b) ((a) < (b) ? (a) : (b)) + +/* Loads an eBPF object calling bpf_snprintf with up to 10 characters of fmt */ +static int load_single_snprintf(char *fmt) +{ + struct test_snprintf_single *skel; + int ret; + + skel = test_snprintf_single__open(); + if (!skel) + return -EINVAL; + + memcpy(skel->rodata->fmt, fmt, min(strlen(fmt) + 1, 10)); + + ret = test_snprintf_single__load(skel); + test_snprintf_single__destroy(skel); + + return ret; +} + +void test_snprintf_negative(void) +{ + ASSERT_OK(load_single_snprintf("valid %d"), "valid usage"); + + ASSERT_ERR(load_single_snprintf("0123456789"), "no terminating zero"); + ASSERT_ERR(load_single_snprintf("%d %d"), "too many specifiers"); + ASSERT_ERR(load_single_snprintf("%pi5"), "invalid specifier 1"); + ASSERT_ERR(load_single_snprintf("%a"), "invalid specifier 2"); + ASSERT_ERR(load_single_snprintf("%"), "invalid specifier 3"); + ASSERT_ERR(load_single_snprintf("%12345678"), "invalid specifier 4"); + ASSERT_ERR(load_single_snprintf("%--------"), "invalid specifier 5"); + ASSERT_ERR(load_single_snprintf("\x80"), "non ascii character"); + ASSERT_ERR(load_single_snprintf("\x1"), "non printable character"); +} + +void test_snprintf(void) +{ + if (test__start_subtest("snprintf_positive")) + test_snprintf_positive(); + if (test__start_subtest("snprintf_negative")) + test_snprintf_negative(); +} diff --git a/tools/testing/selftests/bpf/prog_tests/sockopt_sk.c b/tools/testing/selftests/bpf/prog_tests/sockopt_sk.c index d5b44b135c00..4b937e5dbaca 100644 --- a/tools/testing/selftests/bpf/prog_tests/sockopt_sk.c +++ b/tools/testing/selftests/bpf/prog_tests/sockopt_sk.c @@ -3,6 +3,7 @@ #include "cgroup_helpers.h" #include <linux/tcp.h> +#include "sockopt_sk.skel.h" #ifndef SOL_TCP #define SOL_TCP IPPROTO_TCP @@ -191,60 +192,30 @@ err: return -1; } -static int prog_attach(struct bpf_object *obj, int cgroup_fd, const char *title) +static void run_test(int cgroup_fd) { - enum bpf_attach_type attach_type; - enum bpf_prog_type prog_type; - struct bpf_program *prog; - int err; + struct sockopt_sk *skel; - err = libbpf_prog_type_by_name(title, &prog_type, &attach_type); - if (err) { - log_err("Failed to deduct types for %s BPF program", title); - return -1; - } + skel = sockopt_sk__open_and_load(); + if (!ASSERT_OK_PTR(skel, "skel_load")) + goto cleanup; - prog = bpf_object__find_program_by_title(obj, title); - if (!prog) { - log_err("Failed to find %s BPF program", title); - return -1; - } - - err = bpf_prog_attach(bpf_program__fd(prog), cgroup_fd, - attach_type, 0); - if (err) { - log_err("Failed to attach %s BPF program", title); - return -1; - } - - return 0; -} - -static void run_test(int cgroup_fd) -{ - struct bpf_prog_load_attr attr = { - .file = "./sockopt_sk.o", - }; - struct bpf_object *obj; - int ignored; - int err; - - err = bpf_prog_load_xattr(&attr, &obj, &ignored); - if (CHECK_FAIL(err)) - return; + skel->bss->page_size = getpagesize(); - err = prog_attach(obj, cgroup_fd, "cgroup/getsockopt"); - if (CHECK_FAIL(err)) - goto close_bpf_object; + skel->links._setsockopt = + bpf_program__attach_cgroup(skel->progs._setsockopt, cgroup_fd); + if (!ASSERT_OK_PTR(skel->links._setsockopt, "setsockopt_link")) + goto cleanup; - err = prog_attach(obj, cgroup_fd, "cgroup/setsockopt"); - if (CHECK_FAIL(err)) - goto close_bpf_object; + skel->links._getsockopt = + bpf_program__attach_cgroup(skel->progs._getsockopt, cgroup_fd); + if (!ASSERT_OK_PTR(skel->links._getsockopt, "getsockopt_link")) + goto cleanup; - CHECK_FAIL(getsetsockopt()); + ASSERT_OK(getsetsockopt(), "getsetsockopt"); -close_bpf_object: - bpf_object__close(obj); +cleanup: + sockopt_sk__destroy(skel); } void test_sockopt_sk(void) diff --git a/tools/testing/selftests/bpf/progs/bpf_iter_task_stack.c b/tools/testing/selftests/bpf/progs/bpf_iter_task_stack.c index 50e59a2e142e..43c36f5f7649 100644 --- a/tools/testing/selftests/bpf/progs/bpf_iter_task_stack.c +++ b/tools/testing/selftests/bpf/progs/bpf_iter_task_stack.c @@ -35,3 +35,30 @@ int dump_task_stack(struct bpf_iter__task *ctx) return 0; } + +SEC("iter/task") +int get_task_user_stacks(struct bpf_iter__task *ctx) +{ + struct seq_file *seq = ctx->meta->seq; + struct task_struct *task = ctx->task; + uint64_t buf_sz = 0; + int64_t res; + + if (task == (void *)0) + return 0; + + res = bpf_get_task_stack(task, entries, + MAX_STACK_TRACE_DEPTH * SIZE_OF_ULONG, BPF_F_USER_STACK); + if (res <= 0) + return 0; + + buf_sz += res; + + /* If the verifier doesn't refine bpf_get_task_stack res, and instead + * assumes res is entirely unknown, this program will fail to load as + * the verifier will believe that max buf_sz value allows reading + * past the end of entries in bpf_seq_write call + */ + bpf_seq_write(seq, &entries, buf_sz); + return 0; +} diff --git a/tools/testing/selftests/bpf/progs/linked_funcs1.c b/tools/testing/selftests/bpf/progs/linked_funcs1.c new file mode 100644 index 000000000000..b964ec1390c2 --- /dev/null +++ b/tools/testing/selftests/bpf/progs/linked_funcs1.c @@ -0,0 +1,73 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2021 Facebook */ + +#include "vmlinux.h" +#include <bpf/bpf_helpers.h> +#include <bpf/bpf_tracing.h> + +/* weak and shared between two files */ +const volatile int my_tid __weak; +long syscall_id __weak; + +int output_val1; +int output_ctx1; +int output_weak1; + +/* same "subprog" name in all files, but it's ok because they all are static */ +static __noinline int subprog(int x) +{ + /* but different formula */ + return x * 1; +} + +/* Global functions can't be void */ +int set_output_val1(int x) +{ + output_val1 = x + subprog(x); + return x; +} + +/* This function can't be verified as global, as it assumes raw_tp/sys_enter + * context and accesses syscall id (second argument). So we mark it as + * __hidden, so that libbpf will mark it as static in the final object file, + * right before verifying it in the kernel. + * + * But we don't mark it as __hidden here, rather at extern site. __hidden is + * "contaminating" visibility, so it will get propagated from either extern or + * actual definition (including from the losing __weak definition). + */ +void set_output_ctx1(__u64 *ctx) +{ + output_ctx1 = ctx[1]; /* long id, same as in BPF_PROG below */ +} + +/* this weak instance should win because it's the first one */ +__weak int set_output_weak(int x) +{ + output_weak1 = x; + return x; +} + +extern int set_output_val2(int x); + +/* here we'll force set_output_ctx2() to be __hidden in the final obj file */ +__hidden extern void set_output_ctx2(__u64 *ctx); + +SEC("raw_tp/sys_enter") +int BPF_PROG(handler1, struct pt_regs *regs, long id) +{ + if (my_tid != (u32)bpf_get_current_pid_tgid() || id != syscall_id) + return 0; + + set_output_val2(1000); + set_output_ctx2(ctx); /* ctx definition is hidden in BPF_PROG macro */ + + /* keep input value the same across both files to avoid dependency on + * handler call order; differentiate by output_weak1 vs output_weak2. + */ + set_output_weak(42); + + return 0; +} + +char LICENSE[] SEC("license") = "GPL"; diff --git a/tools/testing/selftests/bpf/progs/linked_funcs2.c b/tools/testing/selftests/bpf/progs/linked_funcs2.c new file mode 100644 index 000000000000..575e958e60b7 --- /dev/null +++ b/tools/testing/selftests/bpf/progs/linked_funcs2.c @@ -0,0 +1,73 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2021 Facebook */ + +#include "vmlinux.h" +#include <bpf/bpf_helpers.h> +#include <bpf/bpf_tracing.h> + +/* weak and shared between both files */ +const volatile int my_tid __weak; +long syscall_id __weak; + +int output_val2; +int output_ctx2; +int output_weak2; /* should stay zero */ + +/* same "subprog" name in all files, but it's ok because they all are static */ +static __noinline int subprog(int x) +{ + /* but different formula */ + return x * 2; +} + +/* Global functions can't be void */ +int set_output_val2(int x) +{ + output_val2 = 2 * x + 2 * subprog(x); + return 2 * x; +} + +/* This function can't be verified as global, as it assumes raw_tp/sys_enter + * context and accesses syscall id (second argument). So we mark it as + * __hidden, so that libbpf will mark it as static in the final object file, + * right before verifying it in the kernel. + * + * But we don't mark it as __hidden here, rather at extern site. __hidden is + * "contaminating" visibility, so it will get propagated from either extern or + * actual definition (including from the losing __weak definition). + */ +void set_output_ctx2(__u64 *ctx) +{ + output_ctx2 = ctx[1]; /* long id, same as in BPF_PROG below */ +} + +/* this weak instance should lose, because it will be processed second */ +__weak int set_output_weak(int x) +{ + output_weak2 = x; + return 2 * x; +} + +extern int set_output_val1(int x); + +/* here we'll force set_output_ctx1() to be __hidden in the final obj file */ +__hidden extern void set_output_ctx1(__u64 *ctx); + +SEC("raw_tp/sys_enter") +int BPF_PROG(handler2, struct pt_regs *regs, long id) +{ + if (my_tid != (u32)bpf_get_current_pid_tgid() || id != syscall_id) + return 0; + + set_output_val1(2000); + set_output_ctx1(ctx); /* ctx definition is hidden in BPF_PROG macro */ + + /* keep input value the same across both files to avoid dependency on + * handler call order; differentiate by output_weak1 vs output_weak2. + */ + set_output_weak(42); + + return 0; +} + +char LICENSE[] SEC("license") = "GPL"; diff --git a/tools/testing/selftests/bpf/progs/linked_maps1.c b/tools/testing/selftests/bpf/progs/linked_maps1.c new file mode 100644 index 000000000000..52291515cc72 --- /dev/null +++ b/tools/testing/selftests/bpf/progs/linked_maps1.c @@ -0,0 +1,82 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2021 Facebook */ + +#include "vmlinux.h" +#include <bpf/bpf_helpers.h> +#include <bpf/bpf_tracing.h> + +struct my_key { long x; }; +struct my_value { long x; }; + +struct { + __uint(type, BPF_MAP_TYPE_HASH); + __type(key, struct my_key); + __type(value, struct my_value); + __uint(max_entries, 16); +} map1 SEC(".maps"); + + /* Matches map2 definition in linked_maps2.c. Order of the attributes doesn't + * matter. + */ +typedef struct { + __uint(max_entries, 8); + __type(key, int); + __type(value, int); + __uint(type, BPF_MAP_TYPE_ARRAY); +} map2_t; + +extern map2_t map2 SEC(".maps"); + +/* This should be the winning map definition, but we have no way of verifying, + * so we just make sure that it links and works without errors + */ +struct { + __uint(type, BPF_MAP_TYPE_ARRAY); + __type(key, int); + __type(value, int); + __uint(max_entries, 16); +} map_weak __weak SEC(".maps"); + +int output_first1; +int output_second1; +int output_weak1; + +SEC("raw_tp/sys_enter") +int BPF_PROG(handler_enter1) +{ + /* update values with key = 1 */ + int key = 1, val = 1; + struct my_key key_struct = { .x = 1 }; + struct my_value val_struct = { .x = 1000 }; + + bpf_map_update_elem(&map1, &key_struct, &val_struct, 0); + bpf_map_update_elem(&map2, &key, &val, 0); + bpf_map_update_elem(&map_weak, &key, &val, 0); + + return 0; +} + +SEC("raw_tp/sys_exit") +int BPF_PROG(handler_exit1) +{ + /* lookup values with key = 2, set in another file */ + int key = 2, *val; + struct my_key key_struct = { .x = 2 }; + struct my_value *value_struct; + + value_struct = bpf_map_lookup_elem(&map1, &key_struct); + if (value_struct) + output_first1 = value_struct->x; + + val = bpf_map_lookup_elem(&map2, &key); + if (val) + output_second1 = *val; + + val = bpf_map_lookup_elem(&map_weak, &key); + if (val) + output_weak1 = *val; + + return 0; +} + +char LICENSE[] SEC("license") = "GPL"; diff --git a/tools/testing/selftests/bpf/progs/linked_maps2.c b/tools/testing/selftests/bpf/progs/linked_maps2.c new file mode 100644 index 000000000000..0693687474ed --- /dev/null +++ b/tools/testing/selftests/bpf/progs/linked_maps2.c @@ -0,0 +1,76 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2021 Facebook */ + +#include "vmlinux.h" +#include <bpf/bpf_helpers.h> +#include <bpf/bpf_tracing.h> + +/* modifiers and typedefs are ignored when comparing key/value types */ +typedef struct my_key { long x; } key_type; +typedef struct my_value { long x; } value_type; + +extern struct { + __uint(max_entries, 16); + __type(key, key_type); + __type(value, value_type); + __uint(type, BPF_MAP_TYPE_HASH); +} map1 SEC(".maps"); + +struct { + __uint(type, BPF_MAP_TYPE_ARRAY); + __type(key, int); + __type(value, int); + __uint(max_entries, 8); +} map2 SEC(".maps"); + +/* this definition will lose, but it has to exactly match the winner */ +struct { + __uint(type, BPF_MAP_TYPE_ARRAY); + __type(key, int); + __type(value, int); + __uint(max_entries, 16); +} map_weak __weak SEC(".maps"); + +int output_first2; +int output_second2; +int output_weak2; + +SEC("raw_tp/sys_enter") +int BPF_PROG(handler_enter2) +{ + /* update values with key = 2 */ + int key = 2, val = 2; + key_type key_struct = { .x = 2 }; + value_type val_struct = { .x = 2000 }; + + bpf_map_update_elem(&map1, &key_struct, &val_struct, 0); + bpf_map_update_elem(&map2, &key, &val, 0); + bpf_map_update_elem(&map_weak, &key, &val, 0); + + return 0; +} + +SEC("raw_tp/sys_exit") +int BPF_PROG(handler_exit2) +{ + /* lookup values with key = 1, set in another file */ + int key = 1, *val; + key_type key_struct = { .x = 1 }; + value_type *value_struct; + + value_struct = bpf_map_lookup_elem(&map1, &key_struct); + if (value_struct) + output_first2 = value_struct->x; + + val = bpf_map_lookup_elem(&map2, &key); + if (val) + output_second2 = *val; + + val = bpf_map_lookup_elem(&map_weak, &key); + if (val) + output_weak2 = *val; + + return 0; +} + +char LICENSE[] SEC("license") = "GPL"; diff --git a/tools/testing/selftests/bpf/progs/linked_vars1.c b/tools/testing/selftests/bpf/progs/linked_vars1.c new file mode 100644 index 000000000000..ef9e9d0bb0ca --- /dev/null +++ b/tools/testing/selftests/bpf/progs/linked_vars1.c @@ -0,0 +1,54 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2021 Facebook */ + +#include "vmlinux.h" +#include <bpf/bpf_helpers.h> +#include <bpf/bpf_tracing.h> + +extern int LINUX_KERNEL_VERSION __kconfig; +/* this weak extern will be strict due to the other file's strong extern */ +extern bool CONFIG_BPF_SYSCALL __kconfig __weak; +extern const void bpf_link_fops __ksym __weak; + +int input_bss1; +int input_data1 = 1; +const volatile int input_rodata1 = 11; + +int input_bss_weak __weak; +/* these two definitions should win */ +int input_data_weak __weak = 10; +const volatile int input_rodata_weak __weak = 100; + +extern int input_bss2; +extern int input_data2; +extern const int input_rodata2; + +int output_bss1; +int output_data1; +int output_rodata1; + +long output_sink1; + +static __noinline int get_bss_res(void) +{ + /* just make sure all the relocations work against .text as well */ + return input_bss1 + input_bss2 + input_bss_weak; +} + +SEC("raw_tp/sys_enter") +int BPF_PROG(handler1) +{ + output_bss1 = get_bss_res(); + output_data1 = input_data1 + input_data2 + input_data_weak; + output_rodata1 = input_rodata1 + input_rodata2 + input_rodata_weak; + + /* make sure we actually use above special externs, otherwise compiler + * will optimize them out + */ + output_sink1 = LINUX_KERNEL_VERSION + + CONFIG_BPF_SYSCALL + + (long)&bpf_link_fops; + return 0; +} + +char LICENSE[] SEC("license") = "GPL"; diff --git a/tools/testing/selftests/bpf/progs/linked_vars2.c b/tools/testing/selftests/bpf/progs/linked_vars2.c new file mode 100644 index 000000000000..e4f5bd388a3c --- /dev/null +++ b/tools/testing/selftests/bpf/progs/linked_vars2.c @@ -0,0 +1,55 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2021 Facebook */ + +#include "vmlinux.h" +#include <bpf/bpf_helpers.h> +#include <bpf/bpf_tracing.h> + +extern int LINUX_KERNEL_VERSION __kconfig; +/* when an extern is defined as both strong and weak, resulting symbol will be strong */ +extern bool CONFIG_BPF_SYSCALL __kconfig; +extern const void __start_BTF __ksym; + +int input_bss2; +int input_data2 = 2; +const volatile int input_rodata2 = 22; + +int input_bss_weak __weak; +/* these two weak variables should lose */ +int input_data_weak __weak = 20; +const volatile int input_rodata_weak __weak = 200; + +extern int input_bss1; +extern int input_data1; +extern const int input_rodata1; + +int output_bss2; +int output_data2; +int output_rodata2; + +int output_sink2; + +static __noinline int get_data_res(void) +{ + /* just make sure all the relocations work against .text as well */ + return input_data1 + input_data2 + input_data_weak; +} + +SEC("raw_tp/sys_enter") +int BPF_PROG(handler2) +{ + output_bss2 = input_bss1 + input_bss2 + input_bss_weak; + output_data2 = get_data_res(); + output_rodata2 = input_rodata1 + input_rodata2 + input_rodata_weak; + + /* make sure we actually use above special externs, otherwise compiler + * will optimize them out + */ + output_sink2 = LINUX_KERNEL_VERSION + + CONFIG_BPF_SYSCALL + + (long)&__start_BTF; + + return 0; +} + +char LICENSE[] SEC("license") = "GPL"; diff --git a/tools/testing/selftests/bpf/progs/map_ptr_kern.c b/tools/testing/selftests/bpf/progs/map_ptr_kern.c index d8850bc6a9f1..d1d304c980f0 100644 --- a/tools/testing/selftests/bpf/progs/map_ptr_kern.c +++ b/tools/testing/selftests/bpf/progs/map_ptr_kern.c @@ -12,6 +12,7 @@ _Static_assert(MAX_ENTRIES < LOOP_BOUND, "MAX_ENTRIES must be < LOOP_BOUND"); enum bpf_map_type g_map_type = BPF_MAP_TYPE_UNSPEC; __u32 g_line = 0; +int page_size = 0; /* userspace should set it */ #define VERIFY_TYPE(type, func) ({ \ g_map_type = type; \ @@ -635,7 +636,6 @@ struct bpf_ringbuf_map { struct { __uint(type, BPF_MAP_TYPE_RINGBUF); - __uint(max_entries, 1 << 12); } m_ringbuf SEC(".maps"); static inline int check_ringbuf(void) @@ -643,7 +643,7 @@ static inline int check_ringbuf(void) struct bpf_ringbuf_map *ringbuf = (struct bpf_ringbuf_map *)&m_ringbuf; struct bpf_map *map = (struct bpf_map *)&m_ringbuf; - VERIFY(check(&ringbuf->map, map, 0, 0, 1 << 12)); + VERIFY(check(&ringbuf->map, map, 0, 0, page_size)); return 1; } diff --git a/tools/testing/selftests/bpf/progs/sockmap_tcp_msg_prog.c b/tools/testing/selftests/bpf/progs/sockmap_tcp_msg_prog.c index fdb4bf4408fa..eeaf6e75c9a2 100644 --- a/tools/testing/selftests/bpf/progs/sockmap_tcp_msg_prog.c +++ b/tools/testing/selftests/bpf/progs/sockmap_tcp_msg_prog.c @@ -8,18 +8,6 @@ int _version SEC("version") = 1; SEC("sk_msg1") int bpf_prog1(struct sk_msg_md *msg) { - void *data_end = (void *)(long) msg->data_end; - void *data = (void *)(long) msg->data; - - char *d; - - if (data + 8 > data_end) - return SK_DROP; - - bpf_printk("data length %i\n", (__u64)msg->data_end - (__u64)msg->data); - d = (char *)data; - bpf_printk("hello sendmsg hook %i %i\n", d[0], d[1]); - return SK_PASS; } diff --git a/tools/testing/selftests/bpf/progs/sockopt_sk.c b/tools/testing/selftests/bpf/progs/sockopt_sk.c index d3597f81e6e9..8acdb99b5959 100644 --- a/tools/testing/selftests/bpf/progs/sockopt_sk.c +++ b/tools/testing/selftests/bpf/progs/sockopt_sk.c @@ -6,11 +6,8 @@ #include <bpf/bpf_helpers.h> char _license[] SEC("license") = "GPL"; -__u32 _version SEC("version") = 1; -#ifndef PAGE_SIZE -#define PAGE_SIZE 4096 -#endif +int page_size = 0; /* userspace should set it */ #ifndef SOL_TCP #define SOL_TCP IPPROTO_TCP @@ -90,7 +87,7 @@ int _getsockopt(struct bpf_sockopt *ctx) * program can only see the first PAGE_SIZE * bytes of data. */ - if (optval_end - optval != PAGE_SIZE) + if (optval_end - optval != page_size) return 0; /* EPERM, unexpected data size */ return 1; @@ -161,7 +158,7 @@ int _setsockopt(struct bpf_sockopt *ctx) if (ctx->level == SOL_IP && ctx->optname == IP_FREEBIND) { /* Original optlen is larger than PAGE_SIZE. */ - if (ctx->optlen != PAGE_SIZE * 2) + if (ctx->optlen != page_size * 2) return 0; /* EPERM, unexpected data size */ if (optval + 1 > optval_end) @@ -175,7 +172,7 @@ int _setsockopt(struct bpf_sockopt *ctx) * program can only see the first PAGE_SIZE * bytes of data. */ - if (optval_end - optval != PAGE_SIZE) + if (optval_end - optval != page_size) return 0; /* EPERM, unexpected data size */ return 1; diff --git a/tools/testing/selftests/bpf/progs/test_mmap.c b/tools/testing/selftests/bpf/progs/test_mmap.c index 4eb42cff5fe9..5a5cc19a15bf 100644 --- a/tools/testing/selftests/bpf/progs/test_mmap.c +++ b/tools/testing/selftests/bpf/progs/test_mmap.c @@ -9,7 +9,6 @@ char _license[] SEC("license") = "GPL"; struct { __uint(type, BPF_MAP_TYPE_ARRAY); - __uint(max_entries, 4096); __uint(map_flags, BPF_F_MMAPABLE | BPF_F_RDONLY_PROG); __type(key, __u32); __type(value, char); @@ -17,7 +16,6 @@ struct { struct { __uint(type, BPF_MAP_TYPE_ARRAY); - __uint(max_entries, 512 * 4); /* at least 4 pages of data */ __uint(map_flags, BPF_F_MMAPABLE); __type(key, __u32); __type(value, __u64); diff --git a/tools/testing/selftests/bpf/progs/test_ringbuf.c b/tools/testing/selftests/bpf/progs/test_ringbuf.c index 8ba9959b036b..6b3f288b7c63 100644 --- a/tools/testing/selftests/bpf/progs/test_ringbuf.c +++ b/tools/testing/selftests/bpf/progs/test_ringbuf.c @@ -15,7 +15,6 @@ struct sample { struct { __uint(type, BPF_MAP_TYPE_RINGBUF); - __uint(max_entries, 1 << 12); } ringbuf SEC(".maps"); /* inputs */ diff --git a/tools/testing/selftests/bpf/progs/test_ringbuf_multi.c b/tools/testing/selftests/bpf/progs/test_ringbuf_multi.c index edf3b6953533..197b86546dca 100644 --- a/tools/testing/selftests/bpf/progs/test_ringbuf_multi.c +++ b/tools/testing/selftests/bpf/progs/test_ringbuf_multi.c @@ -15,7 +15,6 @@ struct sample { struct ringbuf_map { __uint(type, BPF_MAP_TYPE_RINGBUF); - __uint(max_entries, 1 << 12); } ringbuf1 SEC(".maps"), ringbuf2 SEC(".maps"); @@ -31,6 +30,17 @@ struct { }, }; +struct { + __uint(type, BPF_MAP_TYPE_HASH_OF_MAPS); + __uint(max_entries, 1); + __type(key, int); + __array(values, struct ringbuf_map); +} ringbuf_hash SEC(".maps") = { + .values = { + [0] = &ringbuf1, + }, +}; + /* inputs */ int pid = 0; int target_ring = 0; diff --git a/tools/testing/selftests/bpf/progs/test_snprintf.c b/tools/testing/selftests/bpf/progs/test_snprintf.c new file mode 100644 index 000000000000..951a0301c553 --- /dev/null +++ b/tools/testing/selftests/bpf/progs/test_snprintf.c @@ -0,0 +1,73 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2021 Google LLC. */ + +#include <linux/bpf.h> +#include <bpf/bpf_helpers.h> +#include <bpf/bpf_tracing.h> + +char num_out[64] = {}; +long num_ret = 0; + +char ip_out[64] = {}; +long ip_ret = 0; + +char sym_out[64] = {}; +long sym_ret = 0; + +char addr_out[64] = {}; +long addr_ret = 0; + +char str_out[64] = {}; +long str_ret = 0; + +char over_out[6] = {}; +long over_ret = 0; + +char pad_out[10] = {}; +long pad_ret = 0; + +char noarg_out[64] = {}; +long noarg_ret = 0; + +long nobuf_ret = 0; + +extern const void schedule __ksym; + +SEC("raw_tp/sys_enter") +int handler(const void *ctx) +{ + /* Convenient values to pretty-print */ + const __u8 ex_ipv4[] = {127, 0, 0, 1}; + const __u8 ex_ipv6[] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1}; + static const char str1[] = "str1"; + static const char longstr[] = "longstr"; + + /* Integer types */ + num_ret = BPF_SNPRINTF(num_out, sizeof(num_out), + "%d %u %x %li %llu %lX", + -8, 9, 150, -424242, 1337, 0xDABBAD00); + /* IP addresses */ + ip_ret = BPF_SNPRINTF(ip_out, sizeof(ip_out), "%pi4 %pI6", + &ex_ipv4, &ex_ipv6); + /* Symbol lookup formatting */ + sym_ret = BPF_SNPRINTF(sym_out, sizeof(sym_out), "%ps %pS %pB", + &schedule, &schedule, &schedule); + /* Kernel pointers */ + addr_ret = BPF_SNPRINTF(addr_out, sizeof(addr_out), "%pK %px %p", + 0, 0xFFFF00000ADD4E55, 0xFFFF00000ADD4E55); + /* Strings embedding */ + str_ret = BPF_SNPRINTF(str_out, sizeof(str_out), "%s %+05s", + str1, longstr); + /* Overflow */ + over_ret = BPF_SNPRINTF(over_out, sizeof(over_out), "%%overflow"); + /* Padding of fixed width numbers */ + pad_ret = BPF_SNPRINTF(pad_out, sizeof(pad_out), "%5d %0900000X", 4, 4); + /* No args */ + noarg_ret = BPF_SNPRINTF(noarg_out, sizeof(noarg_out), "simple case"); + /* No buffer */ + nobuf_ret = BPF_SNPRINTF(NULL, 0, "only interested in length %d", 60); + + return 0; +} + +char _license[] SEC("license") = "GPL"; diff --git a/tools/testing/selftests/bpf/progs/test_snprintf_single.c b/tools/testing/selftests/bpf/progs/test_snprintf_single.c new file mode 100644 index 000000000000..402adaf344f9 --- /dev/null +++ b/tools/testing/selftests/bpf/progs/test_snprintf_single.c @@ -0,0 +1,20 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2021 Google LLC. */ + +#include <linux/bpf.h> +#include <bpf/bpf_helpers.h> + +/* The format string is filled from the userspace such that loading fails */ +static const char fmt[10]; + +SEC("raw_tp/sys_enter") +int handler(const void *ctx) +{ + unsigned long long arg = 42; + + bpf_snprintf(NULL, 0, fmt, &arg, sizeof(arg)); + + return 0; +} + +char _license[] SEC("license") = "GPL"; diff --git a/tools/testing/selftests/bpf/progs/test_tunnel_kern.c b/tools/testing/selftests/bpf/progs/test_tunnel_kern.c index ba6eadfec565..e7b673117436 100644 --- a/tools/testing/selftests/bpf/progs/test_tunnel_kern.c +++ b/tools/testing/selftests/bpf/progs/test_tunnel_kern.c @@ -396,7 +396,7 @@ int _ip6vxlan_get_tunnel(struct __sk_buff *skb) SEC("geneve_set_tunnel") int _geneve_set_tunnel(struct __sk_buff *skb) { - int ret, ret2; + int ret; struct bpf_tunnel_key key; struct geneve_opt gopt; diff --git a/tools/testing/selftests/bpf/test_progs.h b/tools/testing/selftests/bpf/test_progs.h index e87c8546230e..ee7e3b45182a 100644 --- a/tools/testing/selftests/bpf/test_progs.h +++ b/tools/testing/selftests/bpf/test_progs.h @@ -210,7 +210,7 @@ extern int test__join_cgroup(const char *path); #define ASSERT_ERR_PTR(ptr, name) ({ \ static int duration = 0; \ const void *___res = (ptr); \ - bool ___ok = IS_ERR(___res) \ + bool ___ok = IS_ERR(___res); \ CHECK(!___ok, (name), "unexpected pointer: %p\n", ___res); \ ___ok; \ }) diff --git a/tools/testing/selftests/bpf/verifier/bpf_get_stack.c b/tools/testing/selftests/bpf/verifier/bpf_get_stack.c index 69b048cf46d9..3e024c891178 100644 --- a/tools/testing/selftests/bpf/verifier/bpf_get_stack.c +++ b/tools/testing/selftests/bpf/verifier/bpf_get_stack.c @@ -42,3 +42,46 @@ .result = ACCEPT, .prog_type = BPF_PROG_TYPE_TRACEPOINT, }, +{ + "bpf_get_task_stack return R0 range is refined", + .insns = { + BPF_LDX_MEM(BPF_DW, BPF_REG_6, BPF_REG_1, 0), + BPF_LDX_MEM(BPF_DW, BPF_REG_6, BPF_REG_6, 0), // ctx->meta->seq + BPF_LDX_MEM(BPF_DW, BPF_REG_7, BPF_REG_1, 8), // ctx->task + BPF_LD_MAP_FD(BPF_REG_1, 0), // fixup_map_array_48b + BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0), + BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem), + BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 2), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + BPF_JMP_IMM(BPF_JNE, BPF_REG_7, 0, 2), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + + BPF_MOV64_REG(BPF_REG_1, BPF_REG_7), + BPF_MOV64_REG(BPF_REG_2, BPF_REG_0), + BPF_MOV64_REG(BPF_REG_9, BPF_REG_0), // keep buf for seq_write + BPF_MOV64_IMM(BPF_REG_3, 48), + BPF_MOV64_IMM(BPF_REG_4, 0), + BPF_EMIT_CALL(BPF_FUNC_get_task_stack), + BPF_JMP_IMM(BPF_JSGT, BPF_REG_0, 0, 2), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + + BPF_MOV64_REG(BPF_REG_1, BPF_REG_6), + BPF_MOV64_REG(BPF_REG_2, BPF_REG_9), + BPF_MOV64_REG(BPF_REG_3, BPF_REG_0), + BPF_EMIT_CALL(BPF_FUNC_seq_write), + + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .result = ACCEPT, + .prog_type = BPF_PROG_TYPE_TRACING, + .expected_attach_type = BPF_TRACE_ITER, + .kfunc = "task", + .runs = -1, // Don't run, just load + .fixup_map_array_48b = { 3 }, +}, diff --git a/tools/testing/selftests/lib.mk b/tools/testing/selftests/lib.mk index a5ce26d548e4..9a41d8bb9ff1 100644 --- a/tools/testing/selftests/lib.mk +++ b/tools/testing/selftests/lib.mk @@ -1,6 +1,10 @@ # This mimics the top-level Makefile. We do it explicitly here so that this # Makefile can operate with or without the kbuild infrastructure. +ifneq ($(LLVM),) +CC := clang +else CC := $(CROSS_COMPILE)gcc +endif ifeq (0,$(MAKELEVEL)) ifeq ($(OUTPUT),) |