diff options
Diffstat (limited to 'tools')
180 files changed, 12460 insertions, 4068 deletions
diff --git a/tools/bpf/bpftool/Documentation/bpftool-cgroup.rst b/tools/bpf/bpftool/Documentation/bpftool-cgroup.rst index a17e9aa314fd..bd015ec9847b 100644 --- a/tools/bpf/bpftool/Documentation/bpftool-cgroup.rst +++ b/tools/bpf/bpftool/Documentation/bpftool-cgroup.rst @@ -31,11 +31,17 @@ CGROUP COMMANDS | **bpftool** **cgroup help** | | *PROG* := { **id** *PROG_ID* | **pinned** *FILE* | **tag** *PROG_TAG* } -| *ATTACH_TYPE* := { **ingress** | **egress** | **sock_create** | **sock_ops** | **device** | -| **bind4** | **bind6** | **post_bind4** | **post_bind6** | **connect4** | **connect6** | -| **getpeername4** | **getpeername6** | **getsockname4** | **getsockname6** | **sendmsg4** | -| **sendmsg6** | **recvmsg4** | **recvmsg6** | **sysctl** | **getsockopt** | **setsockopt** | -| **sock_release** } +| *ATTACH_TYPE* := { **cgroup_inet_ingress** | **cgroup_inet_egress** | +| **cgroup_inet_sock_create** | **cgroup_sock_ops** | +| **cgroup_device** | **cgroup_inet4_bind** | **cgroup_inet6_bind** | +| **cgroup_inet4_post_bind** | **cgroup_inet6_post_bind** | +| **cgroup_inet4_connect** | **cgroup_inet6_connect** | +| **cgroup_inet4_getpeername** | **cgroup_inet6_getpeername** | +| **cgroup_inet4_getsockname** | **cgroup_inet6_getsockname** | +| **cgroup_udp4_sendmsg** | **cgroup_udp6_sendmsg** | +| **cgroup_udp4_recvmsg** | **cgroup_udp6_recvmsg** | +| **cgroup_sysctl** | **cgroup_getsockopt** | **cgroup_setsockopt** | +| **cgroup_inet_sock_release** } | *ATTACH_FLAGS* := { **multi** | **override** } DESCRIPTION diff --git a/tools/bpf/bpftool/Documentation/bpftool-feature.rst b/tools/bpf/bpftool/Documentation/bpftool-feature.rst index 4ce9a77bc1e0..e44039f89be7 100644 --- a/tools/bpf/bpftool/Documentation/bpftool-feature.rst +++ b/tools/bpf/bpftool/Documentation/bpftool-feature.rst @@ -24,9 +24,11 @@ FEATURE COMMANDS ================ | **bpftool** **feature probe** [*COMPONENT*] [**full**] [**unprivileged**] [**macros** [**prefix** *PREFIX*]] +| **bpftool** **feature list_builtins** *GROUP* | **bpftool** **feature help** | | *COMPONENT* := { **kernel** | **dev** *NAME* } +| *GROUP* := { **prog_types** | **map_types** | **attach_types** | **link_types** | **helpers** } DESCRIPTION =========== @@ -70,6 +72,16 @@ DESCRIPTION The keywords **full**, **macros** and **prefix** have the same role as when probing the kernel. + **bpftool feature list_builtins** *GROUP* + List items known to bpftool. These can be BPF program types + (**prog_types**), BPF map types (**map_types**), attach types + (**attach_types**), link types (**link_types**), or BPF helper + functions (**helpers**). The command does not probe the system, but + simply lists the elements that bpftool knows from compilation time, + as provided from libbpf (for all object types) or from the BPF UAPI + header (list of helpers). This can be used in scripts to iterate over + BPF types or helpers. + **bpftool feature help** Print short help message. diff --git a/tools/bpf/bpftool/Documentation/bpftool-prog.rst b/tools/bpf/bpftool/Documentation/bpftool-prog.rst index a2e9359e554c..eb1b2a254eb1 100644 --- a/tools/bpf/bpftool/Documentation/bpftool-prog.rst +++ b/tools/bpf/bpftool/Documentation/bpftool-prog.rst @@ -53,8 +53,9 @@ PROG COMMANDS | **cgroup/getsockopt** | **cgroup/setsockopt** | **cgroup/sock_release** | | **struct_ops** | **fentry** | **fexit** | **freplace** | **sk_lookup** | } -| *ATTACH_TYPE* := { -| **msg_verdict** | **skb_verdict** | **stream_verdict** | **stream_parser** | **flow_dissector** +| *ATTACH_TYPE* := { +| **sk_msg_verdict** | **sk_skb_verdict** | **sk_skb_stream_verdict** | +| **sk_skb_stream_parser** | **flow_dissector** | } | *METRICs* := { | **cycles** | **instructions** | **l1d_loads** | **llc_misses** | diff --git a/tools/bpf/bpftool/Makefile b/tools/bpf/bpftool/Makefile index c6d2c77d0252..6b5b3a99f79d 100644 --- a/tools/bpf/bpftool/Makefile +++ b/tools/bpf/bpftool/Makefile @@ -53,7 +53,7 @@ $(LIBBPF_INTERNAL_HDRS): $(LIBBPF_HDRS_DIR)/%.h: $(BPF_DIR)/%.h | $(LIBBPF_HDRS_ $(LIBBPF_BOOTSTRAP): $(wildcard $(BPF_DIR)/*.[ch] $(BPF_DIR)/Makefile) | $(LIBBPF_BOOTSTRAP_OUTPUT) $(Q)$(MAKE) -C $(BPF_DIR) OUTPUT=$(LIBBPF_BOOTSTRAP_OUTPUT) \ DESTDIR=$(LIBBPF_BOOTSTRAP_DESTDIR:/=) prefix= \ - ARCH= CROSS_COMPILE= CC=$(HOSTCC) LD=$(HOSTLD) $@ install_headers + ARCH= CROSS_COMPILE= CC=$(HOSTCC) LD=$(HOSTLD) AR=$(HOSTAR) $@ install_headers $(LIBBPF_BOOTSTRAP_INTERNAL_HDRS): $(LIBBPF_BOOTSTRAP_HDRS_DIR)/%.h: $(BPF_DIR)/%.h | $(LIBBPF_BOOTSTRAP_HDRS_DIR) $(call QUIET_INSTALL, $@) @@ -93,10 +93,8 @@ INSTALL ?= install RM ?= rm -f FEATURE_USER = .bpftool -FEATURE_TESTS = libbfd disassembler-four-args zlib libcap \ - clang-bpf-co-re -FEATURE_DISPLAY = libbfd disassembler-four-args zlib libcap \ - clang-bpf-co-re +FEATURE_TESTS = libbfd disassembler-four-args libcap clang-bpf-co-re +FEATURE_DISPLAY = libbfd disassembler-four-args libcap clang-bpf-co-re check_feat := 1 NON_CHECK_FEAT_TARGETS := clean uninstall doc doc-clean doc-install doc-uninstall @@ -204,11 +202,6 @@ $(BOOTSTRAP_OUTPUT)disasm.o: $(srctree)/kernel/bpf/disasm.c $(OUTPUT)disasm.o: $(srctree)/kernel/bpf/disasm.c $(QUIET_CC)$(CC) $(CFLAGS) -c -MMD $< -o $@ -$(OUTPUT)feature.o: -ifneq ($(feature-zlib), 1) - $(error "No zlib found") -endif - $(BPFTOOL_BOOTSTRAP): $(BOOTSTRAP_OBJS) $(LIBBPF_BOOTSTRAP) $(QUIET_LINK)$(HOSTCC) $(HOST_CFLAGS) $(LDFLAGS) $(BOOTSTRAP_OBJS) $(LIBS_BOOTSTRAP) -o $@ diff --git a/tools/bpf/bpftool/bash-completion/bpftool b/tools/bpf/bpftool/bash-completion/bpftool index 5df8d72c5179..dc1641e3670e 100644 --- a/tools/bpf/bpftool/bash-completion/bpftool +++ b/tools/bpf/bpftool/bash-completion/bpftool @@ -407,8 +407,8 @@ _bpftool() return 0 ;; 5) - local BPFTOOL_PROG_ATTACH_TYPES='msg_verdict \ - skb_verdict stream_verdict stream_parser \ + local BPFTOOL_PROG_ATTACH_TYPES='sk_msg_verdict \ + sk_skb_verdict sk_skb_stream_verdict sk_skb_stream_parser \ flow_dissector' COMPREPLY=( $( compgen -W "$BPFTOOL_PROG_ATTACH_TYPES" -- "$cur" ) ) return 0 @@ -703,15 +703,8 @@ _bpftool() return 0 ;; type) - local BPFTOOL_MAP_CREATE_TYPES='hash array \ - prog_array perf_event_array percpu_hash \ - percpu_array stack_trace cgroup_array lru_hash \ - lru_percpu_hash lpm_trie array_of_maps \ - hash_of_maps devmap devmap_hash sockmap cpumap \ - xskmap sockhash cgroup_storage reuseport_sockarray \ - percpu_cgroup_storage queue stack sk_storage \ - struct_ops ringbuf inode_storage task_storage \ - bloom_filter' + local BPFTOOL_MAP_CREATE_TYPES="$(bpftool feature list_builtins map_types 2>/dev/null | \ + grep -v '^unspec$')" COMPREPLY=( $( compgen -W "$BPFTOOL_MAP_CREATE_TYPES" -- "$cur" ) ) return 0 ;; @@ -1039,12 +1032,8 @@ _bpftool() return 0 ;; attach|detach) - local BPFTOOL_CGROUP_ATTACH_TYPES='ingress egress \ - sock_create sock_ops device \ - bind4 bind6 post_bind4 post_bind6 connect4 connect6 \ - getpeername4 getpeername6 getsockname4 getsockname6 \ - sendmsg4 sendmsg6 recvmsg4 recvmsg6 sysctl getsockopt \ - setsockopt sock_release' + local BPFTOOL_CGROUP_ATTACH_TYPES="$(bpftool feature list_builtins attach_types 2>/dev/null | \ + grep '^cgroup_')" local ATTACH_FLAGS='multi override' local PROG_TYPE='id pinned tag name' # Check for $prev = $command first @@ -1173,9 +1162,14 @@ _bpftool() _bpftool_once_attr 'full unprivileged' return 0 ;; + list_builtins) + [[ $prev != "$command" ]] && return 0 + COMPREPLY=( $( compgen -W 'prog_types map_types \ + attach_types link_types helpers' -- "$cur" ) ) + ;; *) [[ $prev == $object ]] && \ - COMPREPLY=( $( compgen -W 'help probe' -- "$cur" ) ) + COMPREPLY=( $( compgen -W 'help list_builtins probe' -- "$cur" ) ) ;; esac ;; diff --git a/tools/bpf/bpftool/btf.c b/tools/bpf/bpftool/btf.c index 7e6accb9d9f7..0744bd1150be 100644 --- a/tools/bpf/bpftool/btf.c +++ b/tools/bpf/bpftool/btf.c @@ -40,6 +40,7 @@ static const char * const btf_kind_str[NR_BTF_KINDS] = { [BTF_KIND_FLOAT] = "FLOAT", [BTF_KIND_DECL_TAG] = "DECL_TAG", [BTF_KIND_TYPE_TAG] = "TYPE_TAG", + [BTF_KIND_ENUM64] = "ENUM64", }; struct btf_attach_point { @@ -212,26 +213,76 @@ static int dump_btf_type(const struct btf *btf, __u32 id, case BTF_KIND_ENUM: { const struct btf_enum *v = (const void *)(t + 1); __u16 vlen = BTF_INFO_VLEN(t->info); + const char *encoding; int i; + encoding = btf_kflag(t) ? "SIGNED" : "UNSIGNED"; if (json_output) { + jsonw_string_field(w, "encoding", encoding); jsonw_uint_field(w, "size", t->size); jsonw_uint_field(w, "vlen", vlen); jsonw_name(w, "values"); jsonw_start_array(w); } else { - printf(" size=%u vlen=%u", t->size, vlen); + printf(" encoding=%s size=%u vlen=%u", encoding, t->size, vlen); + } + for (i = 0; i < vlen; i++, v++) { + const char *name = btf_str(btf, v->name_off); + + if (json_output) { + jsonw_start_object(w); + jsonw_string_field(w, "name", name); + if (btf_kflag(t)) + jsonw_int_field(w, "val", v->val); + else + jsonw_uint_field(w, "val", v->val); + jsonw_end_object(w); + } else { + if (btf_kflag(t)) + printf("\n\t'%s' val=%d", name, v->val); + else + printf("\n\t'%s' val=%u", name, v->val); + } + } + if (json_output) + jsonw_end_array(w); + break; + } + case BTF_KIND_ENUM64: { + const struct btf_enum64 *v = btf_enum64(t); + __u16 vlen = btf_vlen(t); + const char *encoding; + int i; + + encoding = btf_kflag(t) ? "SIGNED" : "UNSIGNED"; + if (json_output) { + jsonw_string_field(w, "encoding", encoding); + jsonw_uint_field(w, "size", t->size); + jsonw_uint_field(w, "vlen", vlen); + jsonw_name(w, "values"); + jsonw_start_array(w); + } else { + printf(" encoding=%s size=%u vlen=%u", encoding, t->size, vlen); } for (i = 0; i < vlen; i++, v++) { const char *name = btf_str(btf, v->name_off); + __u64 val = ((__u64)v->val_hi32 << 32) | v->val_lo32; if (json_output) { jsonw_start_object(w); jsonw_string_field(w, "name", name); - jsonw_uint_field(w, "val", v->val); + if (btf_kflag(t)) + jsonw_int_field(w, "val", val); + else + jsonw_uint_field(w, "val", val); jsonw_end_object(w); } else { - printf("\n\t'%s' val=%u", name, v->val); + if (btf_kflag(t)) + printf("\n\t'%s' val=%lldLL", name, + (unsigned long long)val); + else + printf("\n\t'%s' val=%lluULL", name, + (unsigned long long)val); } } if (json_output) diff --git a/tools/bpf/bpftool/btf_dumper.c b/tools/bpf/bpftool/btf_dumper.c index f5dddf8ef404..125798b0bc5d 100644 --- a/tools/bpf/bpftool/btf_dumper.c +++ b/tools/bpf/bpftool/btf_dumper.c @@ -182,6 +182,32 @@ static int btf_dumper_enum(const struct btf_dumper *d, return 0; } +static int btf_dumper_enum64(const struct btf_dumper *d, + const struct btf_type *t, + const void *data) +{ + const struct btf_enum64 *enums = btf_enum64(t); + __u32 val_lo32, val_hi32; + __u64 value; + __u16 i; + + value = *(__u64 *)data; + val_lo32 = (__u32)value; + val_hi32 = value >> 32; + + for (i = 0; i < btf_vlen(t); i++) { + if (val_lo32 == enums[i].val_lo32 && val_hi32 == enums[i].val_hi32) { + jsonw_string(d->jw, + btf__name_by_offset(d->btf, + enums[i].name_off)); + return 0; + } + } + + jsonw_int(d->jw, value); + return 0; +} + static bool is_str_array(const struct btf *btf, const struct btf_array *arr, const char *s) { @@ -542,6 +568,8 @@ static int btf_dumper_do_type(const struct btf_dumper *d, __u32 type_id, return btf_dumper_array(d, type_id, data); case BTF_KIND_ENUM: return btf_dumper_enum(d, t, data); + case BTF_KIND_ENUM64: + return btf_dumper_enum64(d, t, data); case BTF_KIND_PTR: btf_dumper_ptr(d, t, data); return 0; @@ -618,6 +646,7 @@ static int __btf_dumper_type_only(const struct btf *btf, __u32 type_id, btf__name_by_offset(btf, t->name_off)); break; case BTF_KIND_ENUM: + case BTF_KIND_ENUM64: BTF_PRINT_ARG("enum %s ", btf__name_by_offset(btf, t->name_off)); break; diff --git a/tools/bpf/bpftool/cgroup.c b/tools/bpf/bpftool/cgroup.c index effe136119d7..cced668fb2a3 100644 --- a/tools/bpf/bpftool/cgroup.c +++ b/tools/bpf/bpftool/cgroup.c @@ -15,43 +15,92 @@ #include <unistd.h> #include <bpf/bpf.h> +#include <bpf/btf.h> #include "main.h" #define HELP_SPEC_ATTACH_FLAGS \ "ATTACH_FLAGS := { multi | override }" -#define HELP_SPEC_ATTACH_TYPES \ - " ATTACH_TYPE := { ingress | egress | sock_create |\n" \ - " sock_ops | device | bind4 | bind6 |\n" \ - " post_bind4 | post_bind6 | connect4 |\n" \ - " connect6 | getpeername4 | getpeername6 |\n" \ - " getsockname4 | getsockname6 | sendmsg4 |\n" \ - " sendmsg6 | recvmsg4 | recvmsg6 |\n" \ - " sysctl | getsockopt | setsockopt |\n" \ - " sock_release }" +#define HELP_SPEC_ATTACH_TYPES \ + " ATTACH_TYPE := { cgroup_inet_ingress | cgroup_inet_egress |\n" \ + " cgroup_inet_sock_create | cgroup_sock_ops |\n" \ + " cgroup_device | cgroup_inet4_bind |\n" \ + " cgroup_inet6_bind | cgroup_inet4_post_bind |\n" \ + " cgroup_inet6_post_bind | cgroup_inet4_connect |\n" \ + " cgroup_inet6_connect | cgroup_inet4_getpeername |\n" \ + " cgroup_inet6_getpeername | cgroup_inet4_getsockname |\n" \ + " cgroup_inet6_getsockname | cgroup_udp4_sendmsg |\n" \ + " cgroup_udp6_sendmsg | cgroup_udp4_recvmsg |\n" \ + " cgroup_udp6_recvmsg | cgroup_sysctl |\n" \ + " cgroup_getsockopt | cgroup_setsockopt |\n" \ + " cgroup_inet_sock_release }" static unsigned int query_flags; +static struct btf *btf_vmlinux; +static __u32 btf_vmlinux_id; static enum bpf_attach_type parse_attach_type(const char *str) { + const char *attach_type_str; enum bpf_attach_type type; - for (type = 0; type < __MAX_BPF_ATTACH_TYPE; type++) { - if (attach_type_name[type] && - is_prefix(str, attach_type_name[type])) + for (type = 0; ; type++) { + attach_type_str = libbpf_bpf_attach_type_str(type); + if (!attach_type_str) + break; + if (!strcmp(str, attach_type_str)) + return type; + } + + /* Also check traditionally used attach type strings. For these we keep + * allowing prefixed usage. + */ + for (type = 0; ; type++) { + attach_type_str = bpf_attach_type_input_str(type); + if (!attach_type_str) + break; + if (is_prefix(str, attach_type_str)) return type; } return __MAX_BPF_ATTACH_TYPE; } +static void guess_vmlinux_btf_id(__u32 attach_btf_obj_id) +{ + struct bpf_btf_info btf_info = {}; + __u32 btf_len = sizeof(btf_info); + char name[16] = {}; + int err; + int fd; + + btf_info.name = ptr_to_u64(name); + btf_info.name_len = sizeof(name); + + fd = bpf_btf_get_fd_by_id(attach_btf_obj_id); + if (fd < 0) + return; + + err = bpf_obj_get_info_by_fd(fd, &btf_info, &btf_len); + if (err) + goto out; + + if (btf_info.kernel_btf && strncmp(name, "vmlinux", sizeof(name)) == 0) + btf_vmlinux_id = btf_info.id; + +out: + close(fd); +} + static int show_bpf_prog(int id, enum bpf_attach_type attach_type, const char *attach_flags_str, int level) { char prog_name[MAX_PROG_FULL_NAME]; + const char *attach_btf_name = NULL; struct bpf_prog_info info = {}; + const char *attach_type_str; __u32 info_len = sizeof(info); int prog_fd; @@ -64,26 +113,50 @@ static int show_bpf_prog(int id, enum bpf_attach_type attach_type, return -1; } + attach_type_str = libbpf_bpf_attach_type_str(attach_type); + + if (btf_vmlinux) { + if (!btf_vmlinux_id) + guess_vmlinux_btf_id(info.attach_btf_obj_id); + + if (btf_vmlinux_id == info.attach_btf_obj_id && + info.attach_btf_id < btf__type_cnt(btf_vmlinux)) { + const struct btf_type *t = + btf__type_by_id(btf_vmlinux, info.attach_btf_id); + attach_btf_name = + btf__name_by_offset(btf_vmlinux, t->name_off); + } + } + get_prog_full_name(&info, prog_fd, prog_name, sizeof(prog_name)); if (json_output) { jsonw_start_object(json_wtr); jsonw_uint_field(json_wtr, "id", info.id); - if (attach_type < ARRAY_SIZE(attach_type_name)) - jsonw_string_field(json_wtr, "attach_type", - attach_type_name[attach_type]); + if (attach_type_str) + jsonw_string_field(json_wtr, "attach_type", attach_type_str); else jsonw_uint_field(json_wtr, "attach_type", attach_type); jsonw_string_field(json_wtr, "attach_flags", attach_flags_str); jsonw_string_field(json_wtr, "name", prog_name); + if (attach_btf_name) + jsonw_string_field(json_wtr, "attach_btf_name", attach_btf_name); + jsonw_uint_field(json_wtr, "attach_btf_obj_id", info.attach_btf_obj_id); + jsonw_uint_field(json_wtr, "attach_btf_id", info.attach_btf_id); jsonw_end_object(json_wtr); } else { printf("%s%-8u ", level ? " " : "", info.id); - if (attach_type < ARRAY_SIZE(attach_type_name)) - printf("%-15s", attach_type_name[attach_type]); + if (attach_type_str) + printf("%-15s", attach_type_str); else printf("type %-10u", attach_type); - printf(" %-15s %-15s\n", attach_flags_str, prog_name); + printf(" %-15s %-15s", attach_flags_str, prog_name); + if (attach_btf_name) + printf(" %-15s", attach_btf_name); + else if (info.attach_btf_id) + printf(" attach_btf_obj_id=%d attach_btf_id=%d", + info.attach_btf_obj_id, info.attach_btf_id); + printf("\n"); } close(prog_fd); @@ -125,40 +198,49 @@ static int cgroup_has_attached_progs(int cgroup_fd) static int show_attached_bpf_progs(int cgroup_fd, enum bpf_attach_type type, int level) { + LIBBPF_OPTS(bpf_prog_query_opts, p); + __u32 prog_attach_flags[1024] = {0}; const char *attach_flags_str; __u32 prog_ids[1024] = {0}; - __u32 prog_cnt, iter; - __u32 attach_flags; char buf[32]; + __u32 iter; int ret; - prog_cnt = ARRAY_SIZE(prog_ids); - ret = bpf_prog_query(cgroup_fd, type, query_flags, &attach_flags, - prog_ids, &prog_cnt); + p.query_flags = query_flags; + p.prog_cnt = ARRAY_SIZE(prog_ids); + p.prog_ids = prog_ids; + p.prog_attach_flags = prog_attach_flags; + + ret = bpf_prog_query_opts(cgroup_fd, type, &p); if (ret) return ret; - if (prog_cnt == 0) + if (p.prog_cnt == 0) return 0; - switch (attach_flags) { - case BPF_F_ALLOW_MULTI: - attach_flags_str = "multi"; - break; - case BPF_F_ALLOW_OVERRIDE: - attach_flags_str = "override"; - break; - case 0: - attach_flags_str = ""; - break; - default: - snprintf(buf, sizeof(buf), "unknown(%x)", attach_flags); - attach_flags_str = buf; - } + for (iter = 0; iter < p.prog_cnt; iter++) { + __u32 attach_flags; + + attach_flags = prog_attach_flags[iter] ?: p.attach_flags; + + switch (attach_flags) { + case BPF_F_ALLOW_MULTI: + attach_flags_str = "multi"; + break; + case BPF_F_ALLOW_OVERRIDE: + attach_flags_str = "override"; + break; + case 0: + attach_flags_str = ""; + break; + default: + snprintf(buf, sizeof(buf), "unknown(%x)", attach_flags); + attach_flags_str = buf; + } - for (iter = 0; iter < prog_cnt; iter++) show_bpf_prog(prog_ids[iter], type, attach_flags_str, level); + } return 0; } @@ -214,6 +296,7 @@ static int do_show(int argc, char **argv) printf("%-8s %-15s %-15s %-15s\n", "ID", "AttachType", "AttachFlags", "Name"); + btf_vmlinux = libbpf_find_kernel_btf(); for (type = 0; type < __MAX_BPF_ATTACH_TYPE; type++) { /* * Not all attach types may be supported, so it's expected, @@ -277,6 +360,7 @@ static int do_show_tree_fn(const char *fpath, const struct stat *sb, printf("%s\n", fpath); } + btf_vmlinux = libbpf_find_kernel_btf(); for (type = 0; type < __MAX_BPF_ATTACH_TYPE; type++) show_attached_bpf_progs(cgroup_fd, type, ftw->level); diff --git a/tools/bpf/bpftool/common.c b/tools/bpf/bpftool/common.c index c740142c24d8..067e9ea59e3b 100644 --- a/tools/bpf/bpftool/common.c +++ b/tools/bpf/bpftool/common.c @@ -13,13 +13,17 @@ #include <stdlib.h> #include <string.h> #include <unistd.h> -#include <linux/limits.h> -#include <linux/magic.h> #include <net/if.h> #include <sys/mount.h> +#include <sys/resource.h> #include <sys/stat.h> #include <sys/vfs.h> +#include <linux/filter.h> +#include <linux/limits.h> +#include <linux/magic.h> +#include <linux/unistd.h> + #include <bpf/bpf.h> #include <bpf/hashmap.h> #include <bpf/libbpf.h> /* libbpf_num_possible_cpus */ @@ -31,52 +35,6 @@ #define BPF_FS_MAGIC 0xcafe4a11 #endif -const char * const attach_type_name[__MAX_BPF_ATTACH_TYPE] = { - [BPF_CGROUP_INET_INGRESS] = "ingress", - [BPF_CGROUP_INET_EGRESS] = "egress", - [BPF_CGROUP_INET_SOCK_CREATE] = "sock_create", - [BPF_CGROUP_INET_SOCK_RELEASE] = "sock_release", - [BPF_CGROUP_SOCK_OPS] = "sock_ops", - [BPF_CGROUP_DEVICE] = "device", - [BPF_CGROUP_INET4_BIND] = "bind4", - [BPF_CGROUP_INET6_BIND] = "bind6", - [BPF_CGROUP_INET4_CONNECT] = "connect4", - [BPF_CGROUP_INET6_CONNECT] = "connect6", - [BPF_CGROUP_INET4_POST_BIND] = "post_bind4", - [BPF_CGROUP_INET6_POST_BIND] = "post_bind6", - [BPF_CGROUP_INET4_GETPEERNAME] = "getpeername4", - [BPF_CGROUP_INET6_GETPEERNAME] = "getpeername6", - [BPF_CGROUP_INET4_GETSOCKNAME] = "getsockname4", - [BPF_CGROUP_INET6_GETSOCKNAME] = "getsockname6", - [BPF_CGROUP_UDP4_SENDMSG] = "sendmsg4", - [BPF_CGROUP_UDP6_SENDMSG] = "sendmsg6", - [BPF_CGROUP_SYSCTL] = "sysctl", - [BPF_CGROUP_UDP4_RECVMSG] = "recvmsg4", - [BPF_CGROUP_UDP6_RECVMSG] = "recvmsg6", - [BPF_CGROUP_GETSOCKOPT] = "getsockopt", - [BPF_CGROUP_SETSOCKOPT] = "setsockopt", - [BPF_SK_SKB_STREAM_PARSER] = "sk_skb_stream_parser", - [BPF_SK_SKB_STREAM_VERDICT] = "sk_skb_stream_verdict", - [BPF_SK_SKB_VERDICT] = "sk_skb_verdict", - [BPF_SK_MSG_VERDICT] = "sk_msg_verdict", - [BPF_LIRC_MODE2] = "lirc_mode2", - [BPF_FLOW_DISSECTOR] = "flow_dissector", - [BPF_TRACE_RAW_TP] = "raw_tp", - [BPF_TRACE_FENTRY] = "fentry", - [BPF_TRACE_FEXIT] = "fexit", - [BPF_MODIFY_RETURN] = "mod_ret", - [BPF_LSM_MAC] = "lsm_mac", - [BPF_SK_LOOKUP] = "sk_lookup", - [BPF_TRACE_ITER] = "trace_iter", - [BPF_XDP_DEVMAP] = "xdp_devmap", - [BPF_XDP_CPUMAP] = "xdp_cpumap", - [BPF_XDP] = "xdp", - [BPF_SK_REUSEPORT_SELECT] = "sk_skb_reuseport_select", - [BPF_SK_REUSEPORT_SELECT_OR_MIGRATE] = "sk_skb_reuseport_select_or_migrate", - [BPF_PERF_EVENT] = "perf_event", - [BPF_TRACE_KPROBE_MULTI] = "trace_kprobe_multi", -}; - void p_err(const char *fmt, ...) { va_list ap; @@ -118,6 +76,75 @@ static bool is_bpffs(char *path) return (unsigned long)st_fs.f_type == BPF_FS_MAGIC; } +/* Probe whether kernel switched from memlock-based (RLIMIT_MEMLOCK) to + * memcg-based memory accounting for BPF maps and programs. This was done in + * commit 97306be45fbe ("Merge branch 'switch to memcg-based memory + * accounting'"), in Linux 5.11. + * + * Libbpf also offers to probe for memcg-based accounting vs rlimit, but does + * so by checking for the availability of a given BPF helper and this has + * failed on some kernels with backports in the past, see commit 6b4384ff1088 + * ("Revert "bpftool: Use libbpf 1.0 API mode instead of RLIMIT_MEMLOCK""). + * Instead, we can probe by lowering the process-based rlimit to 0, trying to + * load a BPF object, and resetting the rlimit. If the load succeeds then + * memcg-based accounting is supported. + * + * This would be too dangerous to do in the library, because multithreaded + * applications might attempt to load items while the rlimit is at 0. Given + * that bpftool is single-threaded, this is fine to do here. + */ +static bool known_to_need_rlimit(void) +{ + struct rlimit rlim_init, rlim_cur_zero = {}; + struct bpf_insn insns[] = { + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }; + size_t insn_cnt = ARRAY_SIZE(insns); + union bpf_attr attr; + int prog_fd, err; + + memset(&attr, 0, sizeof(attr)); + attr.prog_type = BPF_PROG_TYPE_SOCKET_FILTER; + attr.insns = ptr_to_u64(insns); + attr.insn_cnt = insn_cnt; + attr.license = ptr_to_u64("GPL"); + + if (getrlimit(RLIMIT_MEMLOCK, &rlim_init)) + return false; + + /* Drop the soft limit to zero. We maintain the hard limit to its + * current value, because lowering it would be a permanent operation + * for unprivileged users. + */ + rlim_cur_zero.rlim_max = rlim_init.rlim_max; + if (setrlimit(RLIMIT_MEMLOCK, &rlim_cur_zero)) + return false; + + /* Do not use bpf_prog_load() from libbpf here, because it calls + * bump_rlimit_memlock(), interfering with the current probe. + */ + prog_fd = syscall(__NR_bpf, BPF_PROG_LOAD, &attr, sizeof(attr)); + err = errno; + + /* reset soft rlimit to its initial value */ + setrlimit(RLIMIT_MEMLOCK, &rlim_init); + + if (prog_fd < 0) + return err == EPERM; + + close(prog_fd); + return false; +} + +void set_max_rlimit(void) +{ + struct rlimit rinf = { RLIM_INFINITY, RLIM_INFINITY }; + + if (known_to_need_rlimit()) + setrlimit(RLIMIT_MEMLOCK, &rinf); +} + static int mnt_fs(const char *target, const char *type, char *buff, size_t bufflen) { @@ -289,6 +316,7 @@ const char *get_fd_type_name(enum bpf_obj_type type) [BPF_OBJ_UNKNOWN] = "unknown", [BPF_OBJ_PROG] = "prog", [BPF_OBJ_MAP] = "map", + [BPF_OBJ_LINK] = "link", }; if (type < 0 || type >= ARRAY_SIZE(names) || !names[type]) @@ -1009,3 +1037,39 @@ bool equal_fn_for_key_as_id(const void *k1, const void *k2, void *ctx) { return k1 == k2; } + +const char *bpf_attach_type_input_str(enum bpf_attach_type t) +{ + switch (t) { + case BPF_CGROUP_INET_INGRESS: return "ingress"; + case BPF_CGROUP_INET_EGRESS: return "egress"; + case BPF_CGROUP_INET_SOCK_CREATE: return "sock_create"; + case BPF_CGROUP_INET_SOCK_RELEASE: return "sock_release"; + case BPF_CGROUP_SOCK_OPS: return "sock_ops"; + case BPF_CGROUP_DEVICE: return "device"; + case BPF_CGROUP_INET4_BIND: return "bind4"; + case BPF_CGROUP_INET6_BIND: return "bind6"; + case BPF_CGROUP_INET4_CONNECT: return "connect4"; + case BPF_CGROUP_INET6_CONNECT: return "connect6"; + case BPF_CGROUP_INET4_POST_BIND: return "post_bind4"; + case BPF_CGROUP_INET6_POST_BIND: return "post_bind6"; + case BPF_CGROUP_INET4_GETPEERNAME: return "getpeername4"; + case BPF_CGROUP_INET6_GETPEERNAME: return "getpeername6"; + case BPF_CGROUP_INET4_GETSOCKNAME: return "getsockname4"; + case BPF_CGROUP_INET6_GETSOCKNAME: return "getsockname6"; + case BPF_CGROUP_UDP4_SENDMSG: return "sendmsg4"; + case BPF_CGROUP_UDP6_SENDMSG: return "sendmsg6"; + case BPF_CGROUP_SYSCTL: return "sysctl"; + case BPF_CGROUP_UDP4_RECVMSG: return "recvmsg4"; + case BPF_CGROUP_UDP6_RECVMSG: return "recvmsg6"; + case BPF_CGROUP_GETSOCKOPT: return "getsockopt"; + case BPF_CGROUP_SETSOCKOPT: return "setsockopt"; + case BPF_TRACE_RAW_TP: return "raw_tp"; + case BPF_TRACE_FENTRY: return "fentry"; + case BPF_TRACE_FEXIT: return "fexit"; + case BPF_MODIFY_RETURN: return "mod_ret"; + case BPF_SK_REUSEPORT_SELECT: return "sk_skb_reuseport_select"; + case BPF_SK_REUSEPORT_SELECT_OR_MIGRATE: return "sk_skb_reuseport_select_or_migrate"; + default: return libbpf_bpf_attach_type_str(t); + } +} diff --git a/tools/bpf/bpftool/feature.c b/tools/bpf/bpftool/feature.c index d12f46051aac..7ecabf7947fb 100644 --- a/tools/bpf/bpftool/feature.c +++ b/tools/bpf/bpftool/feature.c @@ -548,8 +548,8 @@ static bool probe_prog_type_ifindex(enum bpf_prog_type prog_type, __u32 ifindex) } static void -probe_prog_type(enum bpf_prog_type prog_type, bool *supported_types, - const char *define_prefix, __u32 ifindex) +probe_prog_type(enum bpf_prog_type prog_type, const char *prog_type_str, + bool *supported_types, const char *define_prefix, __u32 ifindex) { char feat_name[128], plain_desc[128], define_name[128]; const char *plain_comment = "eBPF program_type "; @@ -580,20 +580,16 @@ probe_prog_type(enum bpf_prog_type prog_type, bool *supported_types, supported_types[prog_type] |= res; - if (!prog_type_name[prog_type]) { - p_info("program type name not found (type %d)", prog_type); - return; - } maxlen = sizeof(plain_desc) - strlen(plain_comment) - 1; - if (strlen(prog_type_name[prog_type]) > maxlen) { + if (strlen(prog_type_str) > maxlen) { p_info("program type name too long"); return; } - sprintf(feat_name, "have_%s_prog_type", prog_type_name[prog_type]); - sprintf(define_name, "%s_prog_type", prog_type_name[prog_type]); + sprintf(feat_name, "have_%s_prog_type", prog_type_str); + sprintf(define_name, "%s_prog_type", prog_type_str); uppercase(define_name, sizeof(define_name)); - sprintf(plain_desc, "%s%s", plain_comment, prog_type_name[prog_type]); + sprintf(plain_desc, "%s%s", plain_comment, prog_type_str); print_bool_feature(feat_name, plain_desc, define_name, res, define_prefix); } @@ -619,8 +615,8 @@ static bool probe_map_type_ifindex(enum bpf_map_type map_type, __u32 ifindex) } static void -probe_map_type(enum bpf_map_type map_type, const char *define_prefix, - __u32 ifindex) +probe_map_type(enum bpf_map_type map_type, char const *map_type_str, + const char *define_prefix, __u32 ifindex) { char feat_name[128], plain_desc[128], define_name[128]; const char *plain_comment = "eBPF map_type "; @@ -645,20 +641,16 @@ probe_map_type(enum bpf_map_type map_type, const char *define_prefix, * check required for unprivileged users */ - if (!map_type_name[map_type]) { - p_info("map type name not found (type %d)", map_type); - return; - } maxlen = sizeof(plain_desc) - strlen(plain_comment) - 1; - if (strlen(map_type_name[map_type]) > maxlen) { + if (strlen(map_type_str) > maxlen) { p_info("map type name too long"); return; } - sprintf(feat_name, "have_%s_map_type", map_type_name[map_type]); - sprintf(define_name, "%s_map_type", map_type_name[map_type]); + sprintf(feat_name, "have_%s_map_type", map_type_str); + sprintf(define_name, "%s_map_type", map_type_str); uppercase(define_name, sizeof(define_name)); - sprintf(plain_desc, "%s%s", plain_comment, map_type_name[map_type]); + sprintf(plain_desc, "%s%s", plain_comment, map_type_str); print_bool_feature(feat_name, plain_desc, define_name, res, define_prefix); } @@ -728,10 +720,10 @@ probe_helper_for_progtype(enum bpf_prog_type prog_type, bool supported_type, } static void -probe_helpers_for_progtype(enum bpf_prog_type prog_type, bool supported_type, +probe_helpers_for_progtype(enum bpf_prog_type prog_type, + const char *prog_type_str, bool supported_type, const char *define_prefix, __u32 ifindex) { - const char *ptype_name = prog_type_name[prog_type]; char feat_name[128]; unsigned int id; bool probe_res = false; @@ -747,12 +739,12 @@ probe_helpers_for_progtype(enum bpf_prog_type prog_type, bool supported_type, } if (json_output) { - sprintf(feat_name, "%s_available_helpers", ptype_name); + sprintf(feat_name, "%s_available_helpers", prog_type_str); jsonw_name(json_wtr, feat_name); jsonw_start_array(json_wtr); } else if (!define_prefix) { printf("eBPF helpers supported for program type %s:", - ptype_name); + prog_type_str); } for (id = 1; id < ARRAY_SIZE(helper_name); id++) { @@ -768,7 +760,7 @@ probe_helpers_for_progtype(enum bpf_prog_type prog_type, bool supported_type, /* fallthrough */ default: probe_res |= probe_helper_for_progtype(prog_type, supported_type, - define_prefix, id, ptype_name, + define_prefix, id, prog_type_str, ifindex); } } @@ -943,30 +935,47 @@ static void section_program_types(bool *supported_types, const char *define_prefix, __u32 ifindex) { - unsigned int i; + unsigned int prog_type = BPF_PROG_TYPE_UNSPEC; + const char *prog_type_str; print_start_section("program_types", "Scanning eBPF program types...", "/*** eBPF program types ***/", define_prefix); - for (i = BPF_PROG_TYPE_UNSPEC + 1; i < prog_type_name_size; i++) - probe_prog_type(i, supported_types, define_prefix, ifindex); + while (true) { + prog_type++; + prog_type_str = libbpf_bpf_prog_type_str(prog_type); + /* libbpf will return NULL for variants unknown to it. */ + if (!prog_type_str) + break; + + probe_prog_type(prog_type, prog_type_str, supported_types, define_prefix, + ifindex); + } print_end_section(); } static void section_map_types(const char *define_prefix, __u32 ifindex) { - unsigned int i; + unsigned int map_type = BPF_MAP_TYPE_UNSPEC; + const char *map_type_str; print_start_section("map_types", "Scanning eBPF map types...", "/*** eBPF map types ***/", define_prefix); - for (i = BPF_MAP_TYPE_UNSPEC + 1; i < map_type_name_size; i++) - probe_map_type(i, define_prefix, ifindex); + while (true) { + map_type++; + map_type_str = libbpf_bpf_map_type_str(map_type); + /* libbpf will return NULL for variants unknown to it. */ + if (!map_type_str) + break; + + probe_map_type(map_type, map_type_str, define_prefix, ifindex); + } print_end_section(); } @@ -974,7 +983,8 @@ static void section_map_types(const char *define_prefix, __u32 ifindex) static void section_helpers(bool *supported_types, const char *define_prefix, __u32 ifindex) { - unsigned int i; + unsigned int prog_type = BPF_PROG_TYPE_UNSPEC; + const char *prog_type_str; print_start_section("helpers", "Scanning eBPF helper functions...", @@ -996,9 +1006,18 @@ section_helpers(bool *supported_types, const char *define_prefix, __u32 ifindex) " %sBPF__PROG_TYPE_ ## prog_type ## __HELPER_ ## helper\n", define_prefix, define_prefix, define_prefix, define_prefix); - for (i = BPF_PROG_TYPE_UNSPEC + 1; i < prog_type_name_size; i++) - probe_helpers_for_progtype(i, supported_types[i], define_prefix, + while (true) { + prog_type++; + prog_type_str = libbpf_bpf_prog_type_str(prog_type); + /* libbpf will return NULL for variants unknown to it. */ + if (!prog_type_str) + break; + + probe_helpers_for_progtype(prog_type, prog_type_str, + supported_types[prog_type], + define_prefix, ifindex); + } print_end_section(); } @@ -1148,6 +1167,8 @@ static int do_probe(int argc, char **argv) __u32 ifindex = 0; char *ifname; + set_max_rlimit(); + while (argc) { if (is_prefix(*argv, "kernel")) { if (target != COMPONENT_UNSPEC) { @@ -1237,6 +1258,58 @@ exit_close_json: return 0; } +static const char *get_helper_name(unsigned int id) +{ + if (id >= ARRAY_SIZE(helper_name)) + return NULL; + + return helper_name[id]; +} + +static int do_list_builtins(int argc, char **argv) +{ + const char *(*get_name)(unsigned int id); + unsigned int id = 0; + + if (argc < 1) + usage(); + + if (is_prefix(*argv, "prog_types")) { + get_name = (const char *(*)(unsigned int))libbpf_bpf_prog_type_str; + } else if (is_prefix(*argv, "map_types")) { + get_name = (const char *(*)(unsigned int))libbpf_bpf_map_type_str; + } else if (is_prefix(*argv, "attach_types")) { + get_name = (const char *(*)(unsigned int))libbpf_bpf_attach_type_str; + } else if (is_prefix(*argv, "link_types")) { + get_name = (const char *(*)(unsigned int))libbpf_bpf_link_type_str; + } else if (is_prefix(*argv, "helpers")) { + get_name = get_helper_name; + } else { + p_err("expected 'prog_types', 'map_types', 'attach_types', 'link_types' or 'helpers', got: %s", *argv); + return -1; + } + + if (json_output) + jsonw_start_array(json_wtr); /* root array */ + + while (true) { + const char *name; + + name = get_name(id++); + if (!name) + break; + if (json_output) + jsonw_string(json_wtr, name); + else + printf("%s\n", name); + } + + if (json_output) + jsonw_end_array(json_wtr); /* root array */ + + return 0; +} + static int do_help(int argc, char **argv) { if (json_output) { @@ -1246,9 +1319,11 @@ static int do_help(int argc, char **argv) fprintf(stderr, "Usage: %1$s %2$s probe [COMPONENT] [full] [unprivileged] [macros [prefix PREFIX]]\n" + " %1$s %2$s list_builtins GROUP\n" " %1$s %2$s help\n" "\n" " COMPONENT := { kernel | dev NAME }\n" + " GROUP := { prog_types | map_types | attach_types | link_types | helpers }\n" " " HELP_SPEC_OPTIONS " }\n" "", bin_name, argv[-2]); @@ -1257,8 +1332,9 @@ static int do_help(int argc, char **argv) } static const struct cmd cmds[] = { - { "probe", do_probe }, - { "help", do_help }, + { "probe", do_probe }, + { "list_builtins", do_list_builtins }, + { "help", do_help }, { 0 } }; diff --git a/tools/bpf/bpftool/gen.c b/tools/bpf/bpftool/gen.c index 4c9477ff748d..7070dcffa822 100644 --- a/tools/bpf/bpftool/gen.c +++ b/tools/bpf/bpftool/gen.c @@ -474,6 +474,9 @@ static void codegen_asserts(struct bpf_object *obj, const char *obj_name) const struct btf_type *sec; char map_ident[256], var_ident[256]; + if (!btf) + return; + codegen("\ \n\ __attribute__((unused)) static void \n\ @@ -1172,7 +1175,7 @@ static int do_skeleton(int argc, char **argv) static inline void \n\ %1$s__detach(struct %1$s *obj) \n\ { \n\ - return bpf_object__detach_skeleton(obj->skeleton); \n\ + bpf_object__detach_skeleton(obj->skeleton); \n\ } \n\ ", obj_name @@ -1747,6 +1750,7 @@ btfgen_mark_type(struct btfgen_info *info, unsigned int type_id, bool follow_poi case BTF_KIND_INT: case BTF_KIND_FLOAT: case BTF_KIND_ENUM: + case BTF_KIND_ENUM64: case BTF_KIND_STRUCT: case BTF_KIND_UNION: break; @@ -1758,6 +1762,7 @@ btfgen_mark_type(struct btfgen_info *info, unsigned int type_id, bool follow_poi } break; case BTF_KIND_CONST: + case BTF_KIND_RESTRICT: case BTF_KIND_VOLATILE: case BTF_KIND_TYPEDEF: err = btfgen_mark_type(info, btf_type->type, follow_pointers); @@ -1852,6 +1857,112 @@ static int btfgen_record_field_relo(struct btfgen_info *info, struct bpf_core_sp return 0; } +/* Mark types, members, and member types. Compared to btfgen_record_field_relo, + * this function does not rely on the target spec for inferring members, but + * uses the associated BTF. + * + * The `behind_ptr` argument is used to stop marking of composite types reached + * through a pointer. This way, we can keep BTF size in check while providing + * reasonable match semantics. + */ +static int btfgen_mark_type_match(struct btfgen_info *info, __u32 type_id, bool behind_ptr) +{ + const struct btf_type *btf_type; + struct btf *btf = info->src_btf; + struct btf_type *cloned_type; + int i, err; + + if (type_id == 0) + return 0; + + btf_type = btf__type_by_id(btf, type_id); + /* mark type on cloned BTF as used */ + cloned_type = (struct btf_type *)btf__type_by_id(info->marked_btf, type_id); + cloned_type->name_off = MARKED; + + switch (btf_kind(btf_type)) { + case BTF_KIND_UNKN: + case BTF_KIND_INT: + case BTF_KIND_FLOAT: + case BTF_KIND_ENUM: + case BTF_KIND_ENUM64: + break; + case BTF_KIND_STRUCT: + case BTF_KIND_UNION: { + struct btf_member *m = btf_members(btf_type); + __u16 vlen = btf_vlen(btf_type); + + if (behind_ptr) + break; + + for (i = 0; i < vlen; i++, m++) { + /* mark member */ + btfgen_mark_member(info, type_id, i); + + /* mark member's type */ + err = btfgen_mark_type_match(info, m->type, false); + if (err) + return err; + } + break; + } + case BTF_KIND_CONST: + case BTF_KIND_FWD: + case BTF_KIND_RESTRICT: + case BTF_KIND_TYPEDEF: + case BTF_KIND_VOLATILE: + return btfgen_mark_type_match(info, btf_type->type, behind_ptr); + case BTF_KIND_PTR: + return btfgen_mark_type_match(info, btf_type->type, true); + case BTF_KIND_ARRAY: { + struct btf_array *array; + + array = btf_array(btf_type); + /* mark array type */ + err = btfgen_mark_type_match(info, array->type, false); + /* mark array's index type */ + err = err ? : btfgen_mark_type_match(info, array->index_type, false); + if (err) + return err; + break; + } + case BTF_KIND_FUNC_PROTO: { + __u16 vlen = btf_vlen(btf_type); + struct btf_param *param; + + /* mark ret type */ + err = btfgen_mark_type_match(info, btf_type->type, false); + if (err) + return err; + + /* mark parameters types */ + param = btf_params(btf_type); + for (i = 0; i < vlen; i++) { + err = btfgen_mark_type_match(info, param->type, false); + if (err) + return err; + param++; + } + break; + } + /* tells if some other type needs to be handled */ + default: + p_err("unsupported kind: %s (%d)", btf_kind_str(btf_type), type_id); + return -EINVAL; + } + + return 0; +} + +/* Mark types, members, and member types. Compared to btfgen_record_field_relo, + * this function does not rely on the target spec for inferring members, but + * uses the associated BTF. + */ +static int btfgen_record_type_match_relo(struct btfgen_info *info, struct bpf_core_spec *targ_spec) +{ + return btfgen_mark_type_match(info, targ_spec->root_type_id, false); +} + static int btfgen_record_type_relo(struct btfgen_info *info, struct bpf_core_spec *targ_spec) { return btfgen_mark_type(info, targ_spec->root_type_id, true); @@ -1878,6 +1989,8 @@ static int btfgen_record_reloc(struct btfgen_info *info, struct bpf_core_spec *r case BPF_CORE_TYPE_EXISTS: case BPF_CORE_TYPE_SIZE: return btfgen_record_type_relo(info, res); + case BPF_CORE_TYPE_MATCHES: + return btfgen_record_type_match_relo(info, res); case BPF_CORE_ENUMVAL_EXISTS: case BPF_CORE_ENUMVAL_VALUE: return btfgen_record_enumval_relo(info, res); diff --git a/tools/bpf/bpftool/link.c b/tools/bpf/bpftool/link.c index 6353a789322b..7a20931c3250 100644 --- a/tools/bpf/bpftool/link.c +++ b/tools/bpf/bpftool/link.c @@ -13,19 +13,6 @@ #include "json_writer.h" #include "main.h" -static const char * const link_type_name[] = { - [BPF_LINK_TYPE_UNSPEC] = "unspec", - [BPF_LINK_TYPE_RAW_TRACEPOINT] = "raw_tracepoint", - [BPF_LINK_TYPE_TRACING] = "tracing", - [BPF_LINK_TYPE_CGROUP] = "cgroup", - [BPF_LINK_TYPE_ITER] = "iter", - [BPF_LINK_TYPE_NETNS] = "netns", - [BPF_LINK_TYPE_XDP] = "xdp", - [BPF_LINK_TYPE_PERF_EVENT] = "perf_event", - [BPF_LINK_TYPE_KPROBE_MULTI] = "kprobe_multi", - [BPF_LINK_TYPE_STRUCT_OPS] = "struct_ops", -}; - static struct hashmap *link_table; static int link_parse_fd(int *argc, char ***argv) @@ -67,9 +54,12 @@ static int link_parse_fd(int *argc, char ***argv) static void show_link_header_json(struct bpf_link_info *info, json_writer_t *wtr) { + const char *link_type_str; + jsonw_uint_field(wtr, "id", info->id); - if (info->type < ARRAY_SIZE(link_type_name)) - jsonw_string_field(wtr, "type", link_type_name[info->type]); + link_type_str = libbpf_bpf_link_type_str(info->type); + if (link_type_str) + jsonw_string_field(wtr, "type", link_type_str); else jsonw_uint_field(wtr, "type", info->type); @@ -78,9 +68,11 @@ show_link_header_json(struct bpf_link_info *info, json_writer_t *wtr) static void show_link_attach_type_json(__u32 attach_type, json_writer_t *wtr) { - if (attach_type < ARRAY_SIZE(attach_type_name)) - jsonw_string_field(wtr, "attach_type", - attach_type_name[attach_type]); + const char *attach_type_str; + + attach_type_str = libbpf_bpf_attach_type_str(attach_type); + if (attach_type_str) + jsonw_string_field(wtr, "attach_type", attach_type_str); else jsonw_uint_field(wtr, "attach_type", attach_type); } @@ -121,6 +113,7 @@ static int get_prog_info(int prog_id, struct bpf_prog_info *info) static int show_link_close_json(int fd, struct bpf_link_info *info) { struct bpf_prog_info prog_info; + const char *prog_type_str; int err; jsonw_start_object(json_wtr); @@ -137,12 +130,12 @@ static int show_link_close_json(int fd, struct bpf_link_info *info) if (err) return err; - if (prog_info.type < prog_type_name_size) - jsonw_string_field(json_wtr, "prog_type", - prog_type_name[prog_info.type]); + prog_type_str = libbpf_bpf_prog_type_str(prog_info.type); + /* libbpf will return NULL for variants unknown to it. */ + if (prog_type_str) + jsonw_string_field(json_wtr, "prog_type", prog_type_str); else - jsonw_uint_field(json_wtr, "prog_type", - prog_info.type); + jsonw_uint_field(json_wtr, "prog_type", prog_info.type); show_link_attach_type_json(info->tracing.attach_type, json_wtr); @@ -184,9 +177,12 @@ static int show_link_close_json(int fd, struct bpf_link_info *info) static void show_link_header_plain(struct bpf_link_info *info) { + const char *link_type_str; + printf("%u: ", info->id); - if (info->type < ARRAY_SIZE(link_type_name)) - printf("%s ", link_type_name[info->type]); + link_type_str = libbpf_bpf_link_type_str(info->type); + if (link_type_str) + printf("%s ", link_type_str); else printf("type %u ", info->type); @@ -195,8 +191,11 @@ static void show_link_header_plain(struct bpf_link_info *info) static void show_link_attach_type_plain(__u32 attach_type) { - if (attach_type < ARRAY_SIZE(attach_type_name)) - printf("attach_type %s ", attach_type_name[attach_type]); + const char *attach_type_str; + + attach_type_str = libbpf_bpf_attach_type_str(attach_type); + if (attach_type_str) + printf("attach_type %s ", attach_type_str); else printf("attach_type %u ", attach_type); } @@ -214,6 +213,7 @@ static void show_iter_plain(struct bpf_link_info *info) static int show_link_close_plain(int fd, struct bpf_link_info *info) { struct bpf_prog_info prog_info; + const char *prog_type_str; int err; show_link_header_plain(info); @@ -228,9 +228,10 @@ static int show_link_close_plain(int fd, struct bpf_link_info *info) if (err) return err; - if (prog_info.type < prog_type_name_size) - printf("\n\tprog_type %s ", - prog_type_name[prog_info.type]); + prog_type_str = libbpf_bpf_prog_type_str(prog_info.type); + /* libbpf will return NULL for variants unknown to it. */ + if (prog_type_str) + printf("\n\tprog_type %s ", prog_type_str); else printf("\n\tprog_type %u ", prog_info.type); diff --git a/tools/bpf/bpftool/main.c b/tools/bpf/bpftool/main.c index 9062ef2b8767..451cefc2d0da 100644 --- a/tools/bpf/bpftool/main.c +++ b/tools/bpf/bpftool/main.c @@ -508,8 +508,6 @@ int main(int argc, char **argv) * mode for loading generated skeleton. */ libbpf_set_strict_mode(LIBBPF_STRICT_ALL & ~LIBBPF_STRICT_MAP_DEFINITIONS); - } else { - libbpf_set_strict_mode(LIBBPF_STRICT_AUTO_RLIMIT_MEMLOCK); } argc -= optind; diff --git a/tools/bpf/bpftool/main.h b/tools/bpf/bpftool/main.h index aa99ffab451a..5e5060c2ac04 100644 --- a/tools/bpf/bpftool/main.h +++ b/tools/bpf/bpftool/main.h @@ -63,14 +63,6 @@ static inline void *u64_to_ptr(__u64 ptr) #define HELP_SPEC_LINK \ "LINK := { id LINK_ID | pinned FILE }" -extern const char * const prog_type_name[]; -extern const size_t prog_type_name_size; - -extern const char * const attach_type_name[__MAX_BPF_ATTACH_TYPE]; - -extern const char * const map_type_name[]; -extern const size_t map_type_name_size; - /* keep in sync with the definition in skeleton/pid_iter.bpf.c */ enum bpf_obj_type { BPF_OBJ_UNKNOWN, @@ -102,6 +94,8 @@ int detect_common_prefix(const char *arg, ...); void fprint_hex(FILE *f, void *arg, unsigned int n, const char *sep); void usage(void) __noreturn; +void set_max_rlimit(void); + int mount_tracefs(const char *target); struct obj_ref { @@ -249,6 +243,20 @@ int print_all_levels(__maybe_unused enum libbpf_print_level level, size_t hash_fn_for_key_as_id(const void *key, void *ctx); bool equal_fn_for_key_as_id(const void *k1, const void *k2, void *ctx); +/* bpf_attach_type_input_str - convert the provided attach type value into a + * textual representation that we accept for input purposes. + * + * This function is similar in nature to libbpf_bpf_attach_type_str, but + * recognizes some attach type names that have been used by the program in the + * past and which do not follow the string inference scheme that libbpf uses. + * These textual representations should only be used for user input. + * + * @t: The attach type + * Returns a pointer to a static string identifying the attach type. NULL is + * returned for unknown bpf_attach_type values. + */ +const char *bpf_attach_type_input_str(enum bpf_attach_type t); + static inline void *u32_as_hash_field(__u32 x) { return (void *)(uintptr_t)x; diff --git a/tools/bpf/bpftool/map.c b/tools/bpf/bpftool/map.c index 877387ef79c7..38b6bc9c26c3 100644 --- a/tools/bpf/bpftool/map.c +++ b/tools/bpf/bpftool/map.c @@ -22,42 +22,6 @@ #include "json_writer.h" #include "main.h" -const char * const map_type_name[] = { - [BPF_MAP_TYPE_UNSPEC] = "unspec", - [BPF_MAP_TYPE_HASH] = "hash", - [BPF_MAP_TYPE_ARRAY] = "array", - [BPF_MAP_TYPE_PROG_ARRAY] = "prog_array", - [BPF_MAP_TYPE_PERF_EVENT_ARRAY] = "perf_event_array", - [BPF_MAP_TYPE_PERCPU_HASH] = "percpu_hash", - [BPF_MAP_TYPE_PERCPU_ARRAY] = "percpu_array", - [BPF_MAP_TYPE_STACK_TRACE] = "stack_trace", - [BPF_MAP_TYPE_CGROUP_ARRAY] = "cgroup_array", - [BPF_MAP_TYPE_LRU_HASH] = "lru_hash", - [BPF_MAP_TYPE_LRU_PERCPU_HASH] = "lru_percpu_hash", - [BPF_MAP_TYPE_LPM_TRIE] = "lpm_trie", - [BPF_MAP_TYPE_ARRAY_OF_MAPS] = "array_of_maps", - [BPF_MAP_TYPE_HASH_OF_MAPS] = "hash_of_maps", - [BPF_MAP_TYPE_DEVMAP] = "devmap", - [BPF_MAP_TYPE_DEVMAP_HASH] = "devmap_hash", - [BPF_MAP_TYPE_SOCKMAP] = "sockmap", - [BPF_MAP_TYPE_CPUMAP] = "cpumap", - [BPF_MAP_TYPE_XSKMAP] = "xskmap", - [BPF_MAP_TYPE_SOCKHASH] = "sockhash", - [BPF_MAP_TYPE_CGROUP_STORAGE] = "cgroup_storage", - [BPF_MAP_TYPE_REUSEPORT_SOCKARRAY] = "reuseport_sockarray", - [BPF_MAP_TYPE_PERCPU_CGROUP_STORAGE] = "percpu_cgroup_storage", - [BPF_MAP_TYPE_QUEUE] = "queue", - [BPF_MAP_TYPE_STACK] = "stack", - [BPF_MAP_TYPE_SK_STORAGE] = "sk_storage", - [BPF_MAP_TYPE_STRUCT_OPS] = "struct_ops", - [BPF_MAP_TYPE_RINGBUF] = "ringbuf", - [BPF_MAP_TYPE_INODE_STORAGE] = "inode_storage", - [BPF_MAP_TYPE_TASK_STORAGE] = "task_storage", - [BPF_MAP_TYPE_BLOOM_FILTER] = "bloom_filter", -}; - -const size_t map_type_name_size = ARRAY_SIZE(map_type_name); - static struct hashmap *map_table; static bool map_is_per_cpu(__u32 type) @@ -81,12 +45,18 @@ static bool map_is_map_of_progs(__u32 type) static int map_type_from_str(const char *type) { + const char *map_type_str; unsigned int i; - for (i = 0; i < ARRAY_SIZE(map_type_name); i++) + for (i = 0; ; i++) { + map_type_str = libbpf_bpf_map_type_str(i); + if (!map_type_str) + break; + /* Don't allow prefixing in case of possible future shadowing */ - if (map_type_name[i] && !strcmp(map_type_name[i], type)) + if (!strcmp(map_type_str, type)) return i; + } return -1; } @@ -472,9 +442,12 @@ static int parse_elem(char **argv, struct bpf_map_info *info, static void show_map_header_json(struct bpf_map_info *info, json_writer_t *wtr) { + const char *map_type_str; + jsonw_uint_field(wtr, "id", info->id); - if (info->type < ARRAY_SIZE(map_type_name)) - jsonw_string_field(wtr, "type", map_type_name[info->type]); + map_type_str = libbpf_bpf_map_type_str(info->type); + if (map_type_str) + jsonw_string_field(wtr, "type", map_type_str); else jsonw_uint_field(wtr, "type", info->type); @@ -513,10 +486,12 @@ static int show_map_close_json(int fd, struct bpf_map_info *info) if (owner_prog_type) { unsigned int prog_type = atoi(owner_prog_type); + const char *prog_type_str; - if (prog_type < prog_type_name_size) + prog_type_str = libbpf_bpf_prog_type_str(prog_type); + if (prog_type_str) jsonw_string_field(json_wtr, "owner_prog_type", - prog_type_name[prog_type]); + prog_type_str); else jsonw_uint_field(json_wtr, "owner_prog_type", prog_type); @@ -559,9 +534,13 @@ static int show_map_close_json(int fd, struct bpf_map_info *info) static void show_map_header_plain(struct bpf_map_info *info) { + const char *map_type_str; + printf("%u: ", info->id); - if (info->type < ARRAY_SIZE(map_type_name)) - printf("%s ", map_type_name[info->type]); + + map_type_str = libbpf_bpf_map_type_str(info->type); + if (map_type_str) + printf("%s ", map_type_str); else printf("type %u ", info->type); @@ -597,10 +576,11 @@ static int show_map_close_plain(int fd, struct bpf_map_info *info) printf("\n\t"); if (owner_prog_type) { unsigned int prog_type = atoi(owner_prog_type); + const char *prog_type_str; - if (prog_type < prog_type_name_size) - printf("owner_prog_type %s ", - prog_type_name[prog_type]); + prog_type_str = libbpf_bpf_prog_type_str(prog_type); + if (prog_type_str) + printf("owner_prog_type %s ", prog_type_str); else printf("owner_prog_type %d ", prog_type); } @@ -876,9 +856,13 @@ map_dump(int fd, struct bpf_map_info *info, json_writer_t *wtr, } if (info->type == BPF_MAP_TYPE_REUSEPORT_SOCKARRAY && - info->value_size != 8) + info->value_size != 8) { + const char *map_type_str; + + map_type_str = libbpf_bpf_map_type_str(info->type); p_info("Warning: cannot read values from %s map with value_size != 8", - map_type_name[info->type]); + map_type_str); + } while (true) { err = bpf_map_get_next_key(fd, prev_key, key); if (err) { @@ -1342,6 +1326,8 @@ static int do_create(int argc, char **argv) goto exit; } + set_max_rlimit(); + fd = bpf_map_create(map_type, map_name, key_size, value_size, max_entries, &attr); if (fd < 0) { p_err("map create failed: %s", strerror(errno)); diff --git a/tools/bpf/bpftool/pids.c b/tools/bpf/bpftool/pids.c index e2d00d3cd868..bb6c969a114a 100644 --- a/tools/bpf/bpftool/pids.c +++ b/tools/bpf/bpftool/pids.c @@ -108,6 +108,7 @@ int build_obj_refs_table(struct hashmap **map, enum bpf_obj_type type) p_err("failed to create hashmap for PID references"); return -1; } + set_max_rlimit(); skel = pid_iter_bpf__open(); if (!skel) { diff --git a/tools/bpf/bpftool/prog.c b/tools/bpf/bpftool/prog.c index 5c2c63df92e8..c81362a001ba 100644 --- a/tools/bpf/bpftool/prog.c +++ b/tools/bpf/bpftool/prog.c @@ -36,54 +36,28 @@ #define BPF_METADATA_PREFIX "bpf_metadata_" #define BPF_METADATA_PREFIX_LEN (sizeof(BPF_METADATA_PREFIX) - 1) -const char * const prog_type_name[] = { - [BPF_PROG_TYPE_UNSPEC] = "unspec", - [BPF_PROG_TYPE_SOCKET_FILTER] = "socket_filter", - [BPF_PROG_TYPE_KPROBE] = "kprobe", - [BPF_PROG_TYPE_SCHED_CLS] = "sched_cls", - [BPF_PROG_TYPE_SCHED_ACT] = "sched_act", - [BPF_PROG_TYPE_TRACEPOINT] = "tracepoint", - [BPF_PROG_TYPE_XDP] = "xdp", - [BPF_PROG_TYPE_PERF_EVENT] = "perf_event", - [BPF_PROG_TYPE_CGROUP_SKB] = "cgroup_skb", - [BPF_PROG_TYPE_CGROUP_SOCK] = "cgroup_sock", - [BPF_PROG_TYPE_LWT_IN] = "lwt_in", - [BPF_PROG_TYPE_LWT_OUT] = "lwt_out", - [BPF_PROG_TYPE_LWT_XMIT] = "lwt_xmit", - [BPF_PROG_TYPE_SOCK_OPS] = "sock_ops", - [BPF_PROG_TYPE_SK_SKB] = "sk_skb", - [BPF_PROG_TYPE_CGROUP_DEVICE] = "cgroup_device", - [BPF_PROG_TYPE_SK_MSG] = "sk_msg", - [BPF_PROG_TYPE_RAW_TRACEPOINT] = "raw_tracepoint", - [BPF_PROG_TYPE_CGROUP_SOCK_ADDR] = "cgroup_sock_addr", - [BPF_PROG_TYPE_LWT_SEG6LOCAL] = "lwt_seg6local", - [BPF_PROG_TYPE_LIRC_MODE2] = "lirc_mode2", - [BPF_PROG_TYPE_SK_REUSEPORT] = "sk_reuseport", - [BPF_PROG_TYPE_FLOW_DISSECTOR] = "flow_dissector", - [BPF_PROG_TYPE_CGROUP_SYSCTL] = "cgroup_sysctl", - [BPF_PROG_TYPE_RAW_TRACEPOINT_WRITABLE] = "raw_tracepoint_writable", - [BPF_PROG_TYPE_CGROUP_SOCKOPT] = "cgroup_sockopt", - [BPF_PROG_TYPE_TRACING] = "tracing", - [BPF_PROG_TYPE_STRUCT_OPS] = "struct_ops", - [BPF_PROG_TYPE_EXT] = "ext", - [BPF_PROG_TYPE_LSM] = "lsm", - [BPF_PROG_TYPE_SK_LOOKUP] = "sk_lookup", - [BPF_PROG_TYPE_SYSCALL] = "syscall", -}; - -const size_t prog_type_name_size = ARRAY_SIZE(prog_type_name); - enum dump_mode { DUMP_JITED, DUMP_XLATED, }; +static const bool attach_types[] = { + [BPF_SK_SKB_STREAM_PARSER] = true, + [BPF_SK_SKB_STREAM_VERDICT] = true, + [BPF_SK_SKB_VERDICT] = true, + [BPF_SK_MSG_VERDICT] = true, + [BPF_FLOW_DISSECTOR] = true, + [__MAX_BPF_ATTACH_TYPE] = false, +}; + +/* Textual representations traditionally used by the program and kept around + * for the sake of backwards compatibility. + */ static const char * const attach_type_strings[] = { [BPF_SK_SKB_STREAM_PARSER] = "stream_parser", [BPF_SK_SKB_STREAM_VERDICT] = "stream_verdict", [BPF_SK_SKB_VERDICT] = "skb_verdict", [BPF_SK_MSG_VERDICT] = "msg_verdict", - [BPF_FLOW_DISSECTOR] = "flow_dissector", [__MAX_BPF_ATTACH_TYPE] = NULL, }; @@ -94,6 +68,14 @@ static enum bpf_attach_type parse_attach_type(const char *str) enum bpf_attach_type type; for (type = 0; type < __MAX_BPF_ATTACH_TYPE; type++) { + if (attach_types[type]) { + const char *attach_type_str; + + attach_type_str = libbpf_bpf_attach_type_str(type); + if (!strcmp(str, attach_type_str)) + return type; + } + if (attach_type_strings[type] && is_prefix(str, attach_type_strings[type])) return type; @@ -428,12 +410,14 @@ out_free: static void print_prog_header_json(struct bpf_prog_info *info, int fd) { + const char *prog_type_str; char prog_name[MAX_PROG_FULL_NAME]; jsonw_uint_field(json_wtr, "id", info->id); - if (info->type < ARRAY_SIZE(prog_type_name)) - jsonw_string_field(json_wtr, "type", - prog_type_name[info->type]); + prog_type_str = libbpf_bpf_prog_type_str(info->type); + + if (prog_type_str) + jsonw_string_field(json_wtr, "type", prog_type_str); else jsonw_uint_field(json_wtr, "type", info->type); @@ -515,11 +499,13 @@ static void print_prog_json(struct bpf_prog_info *info, int fd) static void print_prog_header_plain(struct bpf_prog_info *info, int fd) { + const char *prog_type_str; char prog_name[MAX_PROG_FULL_NAME]; printf("%u: ", info->id); - if (info->type < ARRAY_SIZE(prog_type_name)) - printf("%s ", prog_type_name[info->type]); + prog_type_str = libbpf_bpf_prog_type_str(info->type); + if (prog_type_str) + printf("%s ", prog_type_str); else printf("type %u ", info->type); @@ -1604,6 +1590,8 @@ static int load_with_options(int argc, char **argv, bool first_prog_only) } } + set_max_rlimit(); + if (verifier_logs) /* log_level1 + log_level2 + stats, but not stable UAPI */ open_opts.kernel_log_level = 1 + 2 + 4; @@ -1974,7 +1962,7 @@ static int profile_parse_metrics(int argc, char **argv) int selected_cnt = 0; unsigned int i; - metric_cnt = sizeof(metrics) / sizeof(struct profile_metric); + metric_cnt = ARRAY_SIZE(metrics); while (argc > 0) { for (i = 0; i < metric_cnt; i++) { @@ -2301,6 +2289,7 @@ static int do_profile(int argc, char **argv) } } + set_max_rlimit(); err = profiler_bpf__load(profile_obj); if (err) { p_err("failed to load profile_obj"); @@ -2374,8 +2363,8 @@ static int do_help(int argc, char **argv) " cgroup/sendmsg6 | cgroup/recvmsg4 | cgroup/recvmsg6 |\n" " cgroup/getsockopt | cgroup/setsockopt | cgroup/sock_release |\n" " struct_ops | fentry | fexit | freplace | sk_lookup }\n" - " ATTACH_TYPE := { msg_verdict | skb_verdict | stream_verdict |\n" - " stream_parser | flow_dissector }\n" + " ATTACH_TYPE := { sk_msg_verdict | sk_skb_verdict | sk_skb_stream_verdict |\n" + " sk_skb_stream_parser | flow_dissector }\n" " METRIC := { cycles | instructions | l1d_loads | llc_misses | itlb_misses | dtlb_misses }\n" " " HELP_SPEC_OPTIONS " |\n" " {-f|--bpffs} | {-m|--mapcompat} | {-n|--nomount} |\n" diff --git a/tools/bpf/bpftool/struct_ops.c b/tools/bpf/bpftool/struct_ops.c index 2535f079ed67..e08a6ff2866c 100644 --- a/tools/bpf/bpftool/struct_ops.c +++ b/tools/bpf/bpftool/struct_ops.c @@ -501,6 +501,8 @@ static int do_register(int argc, char **argv) if (libbpf_get_error(obj)) return -1; + set_max_rlimit(); + if (bpf_object__load(obj)) { bpf_object__close(obj); return -1; diff --git a/tools/bpf/resolve_btfids/main.c b/tools/bpf/resolve_btfids/main.c index 5d26f3c6f918..80cd7843c677 100644 --- a/tools/bpf/resolve_btfids/main.c +++ b/tools/bpf/resolve_btfids/main.c @@ -45,6 +45,19 @@ * .zero 4 * __BTF_ID__func__vfs_fallocate__4: * .zero 4 + * + * set8 - store symbol size into first 4 bytes and sort following + * ID list + * + * __BTF_ID__set8__list: + * .zero 8 + * list: + * __BTF_ID__func__vfs_getattr__3: + * .zero 4 + * .word (1 << 0) | (1 << 2) + * __BTF_ID__func__vfs_fallocate__5: + * .zero 4 + * .word (1 << 3) | (1 << 1) | (1 << 2) */ #define _GNU_SOURCE @@ -72,6 +85,7 @@ #define BTF_TYPEDEF "typedef" #define BTF_FUNC "func" #define BTF_SET "set" +#define BTF_SET8 "set8" #define ADDR_CNT 100 @@ -84,6 +98,7 @@ struct btf_id { }; int addr_cnt; bool is_set; + bool is_set8; Elf64_Addr addr[ADDR_CNT]; }; @@ -231,14 +246,14 @@ static char *get_id(const char *prefix_end) return id; } -static struct btf_id *add_set(struct object *obj, char *name) +static struct btf_id *add_set(struct object *obj, char *name, bool is_set8) { /* * __BTF_ID__set__name * name = ^ * id = ^ */ - char *id = name + sizeof(BTF_SET "__") - 1; + char *id = name + (is_set8 ? sizeof(BTF_SET8 "__") : sizeof(BTF_SET "__")) - 1; int len = strlen(name); if (id >= name + len) { @@ -444,9 +459,21 @@ static int symbols_collect(struct object *obj) } else if (!strncmp(prefix, BTF_FUNC, sizeof(BTF_FUNC) - 1)) { obj->nr_funcs++; id = add_symbol(&obj->funcs, prefix, sizeof(BTF_FUNC) - 1); + /* set8 */ + } else if (!strncmp(prefix, BTF_SET8, sizeof(BTF_SET8) - 1)) { + id = add_set(obj, prefix, true); + /* + * SET8 objects store list's count, which is encoded + * in symbol's size, together with 'cnt' field hence + * that - 1. + */ + if (id) { + id->cnt = sym.st_size / sizeof(uint64_t) - 1; + id->is_set8 = true; + } /* set */ } else if (!strncmp(prefix, BTF_SET, sizeof(BTF_SET) - 1)) { - id = add_set(obj, prefix); + id = add_set(obj, prefix, false); /* * SET objects store list's count, which is encoded * in symbol's size, together with 'cnt' field hence @@ -571,7 +598,8 @@ static int id_patch(struct object *obj, struct btf_id *id) int *ptr = data->d_buf; int i; - if (!id->id && !id->is_set) + /* For set, set8, id->id may be 0 */ + if (!id->id && !id->is_set && !id->is_set8) pr_err("WARN: resolve_btfids: unresolved symbol %s\n", id->name); for (i = 0; i < id->addr_cnt; i++) { @@ -643,13 +671,13 @@ static int sets_patch(struct object *obj) } idx = idx / sizeof(int); - base = &ptr[idx] + 1; + base = &ptr[idx] + (id->is_set8 ? 2 : 1); cnt = ptr[idx]; pr_debug("sorting addr %5lu: cnt %6d [%s]\n", (idx + 1) * sizeof(int), cnt, id->name); - qsort(base, cnt, sizeof(int), cmp_id); + qsort(base, cnt, id->is_set8 ? sizeof(uint64_t) : sizeof(int), cmp_id); next = rb_next(next); } diff --git a/tools/bpf/runqslower/Makefile b/tools/bpf/runqslower/Makefile index da6de16a3dfb..8b3d87b82b7a 100644 --- a/tools/bpf/runqslower/Makefile +++ b/tools/bpf/runqslower/Makefile @@ -4,7 +4,7 @@ include ../../scripts/Makefile.include OUTPUT ?= $(abspath .output)/ BPFTOOL_OUTPUT := $(OUTPUT)bpftool/ -DEFAULT_BPFTOOL := $(BPFTOOL_OUTPUT)bpftool +DEFAULT_BPFTOOL := $(BPFTOOL_OUTPUT)bootstrap/bpftool BPFTOOL ?= $(DEFAULT_BPFTOOL) LIBBPF_SRC := $(abspath ../../lib/bpf) BPFOBJ_OUTPUT := $(OUTPUT)libbpf/ @@ -86,6 +86,5 @@ $(BPFOBJ): $(wildcard $(LIBBPF_SRC)/*.[ch] $(LIBBPF_SRC)/Makefile) | $(BPFOBJ_OU $(Q)$(MAKE) $(submake_extras) -C $(LIBBPF_SRC) OUTPUT=$(BPFOBJ_OUTPUT) \ DESTDIR=$(BPFOBJ_OUTPUT) prefix= $(abspath $@) install_headers -$(DEFAULT_BPFTOOL): $(BPFOBJ) | $(BPFTOOL_OUTPUT) - $(Q)$(MAKE) $(submake_extras) -C ../bpftool OUTPUT=$(BPFTOOL_OUTPUT) \ - ARCH= CROSS_COMPILE= CC=$(HOSTCC) LD=$(HOSTLD) +$(DEFAULT_BPFTOOL): | $(BPFTOOL_OUTPUT) + $(Q)$(MAKE) $(submake_extras) -C ../bpftool OUTPUT=$(BPFTOOL_OUTPUT) bootstrap diff --git a/tools/include/linux/btf_ids.h b/tools/include/linux/btf_ids.h index 57890b357f85..71e54b1e3796 100644 --- a/tools/include/linux/btf_ids.h +++ b/tools/include/linux/btf_ids.h @@ -73,7 +73,7 @@ asm( \ __BTF_ID_LIST(name, local) \ extern u32 name[]; -#define BTF_ID_LIST_GLOBAL(name) \ +#define BTF_ID_LIST_GLOBAL(name, n) \ __BTF_ID_LIST(name, globl) /* The BTF_ID_LIST_SINGLE macro defines a BTF_ID_LIST with @@ -82,6 +82,9 @@ __BTF_ID_LIST(name, globl) #define BTF_ID_LIST_SINGLE(name, prefix, typename) \ BTF_ID_LIST(name) \ BTF_ID(prefix, typename) +#define BTF_ID_LIST_GLOBAL_SINGLE(name, prefix, typename) \ + BTF_ID_LIST_GLOBAL(name, 1) \ + BTF_ID(prefix, typename) /* * The BTF_ID_UNUSED macro defines 4 zero bytes. @@ -143,13 +146,14 @@ extern struct btf_id_set name; #else -#define BTF_ID_LIST(name) static u32 name[5]; +#define BTF_ID_LIST(name) static u32 __maybe_unused name[5]; #define BTF_ID(prefix, name) #define BTF_ID_UNUSED -#define BTF_ID_LIST_GLOBAL(name) u32 name[1]; -#define BTF_ID_LIST_SINGLE(name, prefix, typename) static u32 name[1]; -#define BTF_SET_START(name) static struct btf_id_set name = { 0 }; -#define BTF_SET_START_GLOBAL(name) static struct btf_id_set name = { 0 }; +#define BTF_ID_LIST_GLOBAL(name, n) u32 __maybe_unused name[n]; +#define BTF_ID_LIST_SINGLE(name, prefix, typename) static u32 __maybe_unused name[1]; +#define BTF_ID_LIST_GLOBAL_SINGLE(name, prefix, typename) u32 __maybe_unused name[1]; +#define BTF_SET_START(name) static struct btf_id_set __maybe_unused name = { 0 }; +#define BTF_SET_START_GLOBAL(name) static struct btf_id_set __maybe_unused name = { 0 }; #define BTF_SET_END(name) #endif /* CONFIG_DEBUG_INFO_BTF */ @@ -172,7 +176,10 @@ extern struct btf_id_set name; BTF_SOCK_TYPE(BTF_SOCK_TYPE_TCP_TW, tcp_timewait_sock) \ BTF_SOCK_TYPE(BTF_SOCK_TYPE_TCP6, tcp6_sock) \ BTF_SOCK_TYPE(BTF_SOCK_TYPE_UDP, udp_sock) \ - BTF_SOCK_TYPE(BTF_SOCK_TYPE_UDP6, udp6_sock) + BTF_SOCK_TYPE(BTF_SOCK_TYPE_UDP6, udp6_sock) \ + BTF_SOCK_TYPE(BTF_SOCK_TYPE_UNIX, unix_sock) \ + BTF_SOCK_TYPE(BTF_SOCK_TYPE_MPTCP, mptcp_sock) \ + BTF_SOCK_TYPE(BTF_SOCK_TYPE_SOCKET, socket) enum { #define BTF_SOCK_TYPE(name, str) name, @@ -184,4 +191,18 @@ MAX_BTF_SOCK_TYPE, extern u32 btf_sock_ids[]; #endif +#define BTF_TRACING_TYPE_xxx \ + BTF_TRACING_TYPE(BTF_TRACING_TYPE_TASK, task_struct) \ + BTF_TRACING_TYPE(BTF_TRACING_TYPE_FILE, file) \ + BTF_TRACING_TYPE(BTF_TRACING_TYPE_VMA, vm_area_struct) + +enum { +#define BTF_TRACING_TYPE(name, type) name, +BTF_TRACING_TYPE_xxx +#undef BTF_TRACING_TYPE +MAX_BTF_TRACING_TYPE, +}; + +extern u32 btf_tracing_ids[]; + #endif diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h index ef78e0e1a754..59a217ca2dfd 100644 --- a/tools/include/uapi/linux/bpf.h +++ b/tools/include/uapi/linux/bpf.h @@ -998,6 +998,7 @@ enum bpf_attach_type { BPF_SK_REUSEPORT_SELECT_OR_MIGRATE, BPF_PERF_EVENT, BPF_TRACE_KPROBE_MULTI, + BPF_LSM_CGROUP, __MAX_BPF_ATTACH_TYPE }; @@ -1431,6 +1432,7 @@ union bpf_attr { __u32 attach_flags; __aligned_u64 prog_ids; __u32 prog_cnt; + __aligned_u64 prog_attach_flags; /* output: per-program attach_flags */ } query; struct { /* anonymous struct used by BPF_RAW_TRACEPOINT_OPEN command */ @@ -2359,7 +2361,8 @@ union bpf_attr { * Pull in non-linear data in case the *skb* is non-linear and not * all of *len* are part of the linear section. Make *len* bytes * from *skb* readable and writable. If a zero value is passed for - * *len*, then the whole length of the *skb* is pulled. + * *len*, then all bytes in the linear part of *skb* will be made + * readable and writable. * * This helper is only needed for reading and writing with direct * packet access. @@ -3597,10 +3600,11 @@ union bpf_attr { * * *iph* points to the start of the IPv4 or IPv6 header, while * *iph_len* contains **sizeof**\ (**struct iphdr**) or - * **sizeof**\ (**struct ip6hdr**). + * **sizeof**\ (**struct ipv6hdr**). * * *th* points to the start of the TCP header, while *th_len* - * contains **sizeof**\ (**struct tcphdr**). + * contains the length of the TCP header (at least + * **sizeof**\ (**struct tcphdr**)). * Return * 0 if *iph* and *th* are a valid SYN cookie ACK, or a negative * error otherwise. @@ -3783,10 +3787,11 @@ union bpf_attr { * * *iph* points to the start of the IPv4 or IPv6 header, while * *iph_len* contains **sizeof**\ (**struct iphdr**) or - * **sizeof**\ (**struct ip6hdr**). + * **sizeof**\ (**struct ipv6hdr**). * * *th* points to the start of the TCP header, while *th_len* - * contains the length of the TCP header. + * contains the length of the TCP header with options (at least + * **sizeof**\ (**struct tcphdr**)). * Return * On success, lower 32 bits hold the generated SYN cookie in * followed by 16 bits which hold the MSS value for that cookie, @@ -5252,6 +5257,80 @@ union bpf_attr { * Pointer to the underlying dynptr data, NULL if the dynptr is * read-only, if the dynptr is invalid, or if the offset and length * is out of bounds. + * + * s64 bpf_tcp_raw_gen_syncookie_ipv4(struct iphdr *iph, struct tcphdr *th, u32 th_len) + * Description + * Try to issue a SYN cookie for the packet with corresponding + * IPv4/TCP headers, *iph* and *th*, without depending on a + * listening socket. + * + * *iph* points to the IPv4 header. + * + * *th* points to the start of the TCP header, while *th_len* + * contains the length of the TCP header (at least + * **sizeof**\ (**struct tcphdr**)). + * Return + * On success, lower 32 bits hold the generated SYN cookie in + * followed by 16 bits which hold the MSS value for that cookie, + * and the top 16 bits are unused. + * + * On failure, the returned value is one of the following: + * + * **-EINVAL** if *th_len* is invalid. + * + * s64 bpf_tcp_raw_gen_syncookie_ipv6(struct ipv6hdr *iph, struct tcphdr *th, u32 th_len) + * Description + * Try to issue a SYN cookie for the packet with corresponding + * IPv6/TCP headers, *iph* and *th*, without depending on a + * listening socket. + * + * *iph* points to the IPv6 header. + * + * *th* points to the start of the TCP header, while *th_len* + * contains the length of the TCP header (at least + * **sizeof**\ (**struct tcphdr**)). + * Return + * On success, lower 32 bits hold the generated SYN cookie in + * followed by 16 bits which hold the MSS value for that cookie, + * and the top 16 bits are unused. + * + * On failure, the returned value is one of the following: + * + * **-EINVAL** if *th_len* is invalid. + * + * **-EPROTONOSUPPORT** if CONFIG_IPV6 is not builtin. + * + * long bpf_tcp_raw_check_syncookie_ipv4(struct iphdr *iph, struct tcphdr *th) + * Description + * Check whether *iph* and *th* contain a valid SYN cookie ACK + * without depending on a listening socket. + * + * *iph* points to the IPv4 header. + * + * *th* points to the TCP header. + * Return + * 0 if *iph* and *th* are a valid SYN cookie ACK. + * + * On failure, the returned value is one of the following: + * + * **-EACCES** if the SYN cookie is not valid. + * + * long bpf_tcp_raw_check_syncookie_ipv6(struct ipv6hdr *iph, struct tcphdr *th) + * Description + * Check whether *iph* and *th* contain a valid SYN cookie ACK + * without depending on a listening socket. + * + * *iph* points to the IPv6 header. + * + * *th* points to the TCP header. + * Return + * 0 if *iph* and *th* are a valid SYN cookie ACK. + * + * On failure, the returned value is one of the following: + * + * **-EACCES** if the SYN cookie is not valid. + * + * **-EPROTONOSUPPORT** if CONFIG_IPV6 is not builtin. */ #define __BPF_FUNC_MAPPER(FN) \ FN(unspec), \ @@ -5458,6 +5537,10 @@ union bpf_attr { FN(dynptr_read), \ FN(dynptr_write), \ FN(dynptr_data), \ + FN(tcp_raw_gen_syncookie_ipv4), \ + FN(tcp_raw_gen_syncookie_ipv6), \ + FN(tcp_raw_check_syncookie_ipv4), \ + FN(tcp_raw_check_syncookie_ipv6), \ /* */ /* integer value in 'imm' field of BPF_CALL instruction selects which helper @@ -5998,6 +6081,8 @@ struct bpf_prog_info { __u64 run_cnt; __u64 recursion_misses; __u32 verified_insns; + __u32 attach_btf_obj_id; + __u32 attach_btf_id; } __attribute__((aligned(8))); struct bpf_map_info { @@ -6705,6 +6790,7 @@ enum bpf_core_relo_kind { BPF_CORE_TYPE_SIZE = 9, /* type size in bytes */ BPF_CORE_ENUMVAL_EXISTS = 10, /* enum value existence in target kernel */ BPF_CORE_ENUMVAL_VALUE = 11, /* enum value integer value */ + BPF_CORE_TYPE_MATCHES = 12, /* type match in target kernel */ }; /* diff --git a/tools/include/uapi/linux/btf.h b/tools/include/uapi/linux/btf.h index a9162a6c0284..ec1798b6d3ff 100644 --- a/tools/include/uapi/linux/btf.h +++ b/tools/include/uapi/linux/btf.h @@ -36,10 +36,10 @@ struct btf_type { * bits 24-28: kind (e.g. int, ptr, array...etc) * bits 29-30: unused * bit 31: kind_flag, currently used by - * struct, union and fwd + * struct, union, enum, fwd and enum64 */ __u32 info; - /* "size" is used by INT, ENUM, STRUCT, UNION and DATASEC. + /* "size" is used by INT, ENUM, STRUCT, UNION, DATASEC and ENUM64. * "size" tells the size of the type it is describing. * * "type" is used by PTR, TYPEDEF, VOLATILE, CONST, RESTRICT, @@ -63,7 +63,7 @@ enum { BTF_KIND_ARRAY = 3, /* Array */ BTF_KIND_STRUCT = 4, /* Struct */ BTF_KIND_UNION = 5, /* Union */ - BTF_KIND_ENUM = 6, /* Enumeration */ + BTF_KIND_ENUM = 6, /* Enumeration up to 32-bit values */ BTF_KIND_FWD = 7, /* Forward */ BTF_KIND_TYPEDEF = 8, /* Typedef */ BTF_KIND_VOLATILE = 9, /* Volatile */ @@ -76,6 +76,7 @@ enum { BTF_KIND_FLOAT = 16, /* Floating point */ BTF_KIND_DECL_TAG = 17, /* Decl Tag */ BTF_KIND_TYPE_TAG = 18, /* Type Tag */ + BTF_KIND_ENUM64 = 19, /* Enumeration up to 64-bit values */ NR_BTF_KINDS, BTF_KIND_MAX = NR_BTF_KINDS - 1, @@ -186,4 +187,14 @@ struct btf_decl_tag { __s32 component_idx; }; +/* BTF_KIND_ENUM64 is followed by multiple "struct btf_enum64". + * The exact number of btf_enum64 is stored in the vlen (of the + * info in "struct btf_type"). + */ +struct btf_enum64 { + __u32 name_off; + __u32 val_lo32; + __u32 val_hi32; +}; + #endif /* _UAPI__LINUX_BTF_H__ */ diff --git a/tools/include/uapi/linux/if_link.h b/tools/include/uapi/linux/if_link.h index b339bf2196ca..0242f31e339c 100644 --- a/tools/include/uapi/linux/if_link.h +++ b/tools/include/uapi/linux/if_link.h @@ -890,6 +890,7 @@ enum { IFLA_BOND_SLAVE_AD_AGGREGATOR_ID, IFLA_BOND_SLAVE_AD_ACTOR_OPER_PORT_STATE, IFLA_BOND_SLAVE_AD_PARTNER_OPER_PORT_STATE, + IFLA_BOND_SLAVE_PRIO, __IFLA_BOND_SLAVE_MAX, }; diff --git a/tools/lib/bpf/Build b/tools/lib/bpf/Build index 31a1a9015902..5a3dfb56d78f 100644 --- a/tools/lib/bpf/Build +++ b/tools/lib/bpf/Build @@ -1,4 +1,4 @@ libbpf-y := libbpf.o bpf.o nlattr.o btf.o libbpf_errno.o str_error.o \ - netlink.o bpf_prog_linfo.o libbpf_probes.o xsk.o hashmap.o \ + netlink.o bpf_prog_linfo.o libbpf_probes.o hashmap.o \ btf_dump.o ringbuf.o strset.o linker.o gen_loader.o relo_core.o \ usdt.o diff --git a/tools/lib/bpf/Makefile b/tools/lib/bpf/Makefile index a1265b152027..4c904ef0b47e 100644 --- a/tools/lib/bpf/Makefile +++ b/tools/lib/bpf/Makefile @@ -237,7 +237,7 @@ install_lib: all_cmd $(call do_install_mkdir,$(libdir_SQ)); \ cp -fpR $(LIB_FILE) $(DESTDIR)$(libdir_SQ) -SRC_HDRS := bpf.h libbpf.h btf.h libbpf_common.h libbpf_legacy.h xsk.h \ +SRC_HDRS := bpf.h libbpf.h btf.h libbpf_common.h libbpf_legacy.h \ bpf_helpers.h bpf_tracing.h bpf_endian.h bpf_core_read.h \ skel_internal.h libbpf_version.h usdt.bpf.h GEN_HDRS := $(BPF_GENERATED) diff --git a/tools/lib/bpf/bpf.c b/tools/lib/bpf/bpf.c index 240186aac8e6..efcc06dafbd9 100644 --- a/tools/lib/bpf/bpf.c +++ b/tools/lib/bpf/bpf.c @@ -147,10 +147,6 @@ int bump_rlimit_memlock(void) { struct rlimit rlim; - /* this the default in libbpf 1.0, but for now user has to opt-in explicitly */ - if (!(libbpf_mode & LIBBPF_STRICT_AUTO_RLIMIT_MEMLOCK)) - return 0; - /* if kernel supports memcg-based accounting, skip bumping RLIMIT_MEMLOCK */ if (memlock_bumped || kernel_supports(NULL, FEAT_MEMCG_ACCOUNT)) return 0; @@ -233,11 +229,10 @@ alloc_zero_tailing_info(const void *orecord, __u32 cnt, return info; } -DEFAULT_VERSION(bpf_prog_load_v0_6_0, bpf_prog_load, LIBBPF_0.6.0) -int bpf_prog_load_v0_6_0(enum bpf_prog_type prog_type, - const char *prog_name, const char *license, - const struct bpf_insn *insns, size_t insn_cnt, - const struct bpf_prog_load_opts *opts) +int bpf_prog_load(enum bpf_prog_type prog_type, + const char *prog_name, const char *license, + const struct bpf_insn *insns, size_t insn_cnt, + const struct bpf_prog_load_opts *opts) { void *finfo = NULL, *linfo = NULL; const char *func_info, *line_info; @@ -384,94 +379,6 @@ done: return libbpf_err_errno(fd); } -__attribute__((alias("bpf_load_program_xattr2"))) -int bpf_load_program_xattr(const struct bpf_load_program_attr *load_attr, - char *log_buf, size_t log_buf_sz); - -static int bpf_load_program_xattr2(const struct bpf_load_program_attr *load_attr, - char *log_buf, size_t log_buf_sz) -{ - LIBBPF_OPTS(bpf_prog_load_opts, p); - - if (!load_attr || !log_buf != !log_buf_sz) - return libbpf_err(-EINVAL); - - p.expected_attach_type = load_attr->expected_attach_type; - switch (load_attr->prog_type) { - case BPF_PROG_TYPE_STRUCT_OPS: - case BPF_PROG_TYPE_LSM: - p.attach_btf_id = load_attr->attach_btf_id; - break; - case BPF_PROG_TYPE_TRACING: - case BPF_PROG_TYPE_EXT: - p.attach_btf_id = load_attr->attach_btf_id; - p.attach_prog_fd = load_attr->attach_prog_fd; - break; - default: - p.prog_ifindex = load_attr->prog_ifindex; - p.kern_version = load_attr->kern_version; - } - p.log_level = load_attr->log_level; - p.log_buf = log_buf; - p.log_size = log_buf_sz; - p.prog_btf_fd = load_attr->prog_btf_fd; - p.func_info_rec_size = load_attr->func_info_rec_size; - p.func_info_cnt = load_attr->func_info_cnt; - p.func_info = load_attr->func_info; - p.line_info_rec_size = load_attr->line_info_rec_size; - p.line_info_cnt = load_attr->line_info_cnt; - p.line_info = load_attr->line_info; - p.prog_flags = load_attr->prog_flags; - - return bpf_prog_load(load_attr->prog_type, load_attr->name, load_attr->license, - load_attr->insns, load_attr->insns_cnt, &p); -} - -int bpf_load_program(enum bpf_prog_type type, const struct bpf_insn *insns, - size_t insns_cnt, const char *license, - __u32 kern_version, char *log_buf, - size_t log_buf_sz) -{ - struct bpf_load_program_attr load_attr; - - memset(&load_attr, 0, sizeof(struct bpf_load_program_attr)); - load_attr.prog_type = type; - load_attr.expected_attach_type = 0; - load_attr.name = NULL; - load_attr.insns = insns; - load_attr.insns_cnt = insns_cnt; - load_attr.license = license; - load_attr.kern_version = kern_version; - - return bpf_load_program_xattr2(&load_attr, log_buf, log_buf_sz); -} - -int bpf_verify_program(enum bpf_prog_type type, const struct bpf_insn *insns, - size_t insns_cnt, __u32 prog_flags, const char *license, - __u32 kern_version, char *log_buf, size_t log_buf_sz, - int log_level) -{ - union bpf_attr attr; - int fd; - - bump_rlimit_memlock(); - - memset(&attr, 0, sizeof(attr)); - attr.prog_type = type; - attr.insn_cnt = (__u32)insns_cnt; - attr.insns = ptr_to_u64(insns); - attr.license = ptr_to_u64(license); - attr.log_buf = ptr_to_u64(log_buf); - attr.log_size = log_buf_sz; - attr.log_level = log_level; - log_buf[0] = 0; - attr.kern_version = kern_version; - attr.prog_flags = prog_flags; - - fd = sys_bpf_prog_load(&attr, sizeof(attr), PROG_LOAD_ATTEMPTS); - return libbpf_err_errno(fd); -} - int bpf_map_update_elem(int fd, const void *key, const void *value, __u64 flags) { @@ -672,11 +579,20 @@ int bpf_obj_pin(int fd, const char *pathname) int bpf_obj_get(const char *pathname) { + return bpf_obj_get_opts(pathname, NULL); +} + +int bpf_obj_get_opts(const char *pathname, const struct bpf_obj_get_opts *opts) +{ union bpf_attr attr; int fd; + if (!OPTS_VALID(opts, bpf_obj_get_opts)) + return libbpf_err(-EINVAL); + memset(&attr, 0, sizeof(attr)); attr.pathname = ptr_to_u64((void *)pathname); + attr.file_flags = OPTS_GET(opts, file_flags, 0); fd = sys_bpf_fd(BPF_OBJ_GET, &attr, sizeof(attr)); return libbpf_err_errno(fd); @@ -888,80 +804,48 @@ int bpf_iter_create(int link_fd) return libbpf_err_errno(fd); } -int bpf_prog_query(int target_fd, enum bpf_attach_type type, __u32 query_flags, - __u32 *attach_flags, __u32 *prog_ids, __u32 *prog_cnt) +int bpf_prog_query_opts(int target_fd, + enum bpf_attach_type type, + struct bpf_prog_query_opts *opts) { union bpf_attr attr; int ret; + if (!OPTS_VALID(opts, bpf_prog_query_opts)) + return libbpf_err(-EINVAL); + memset(&attr, 0, sizeof(attr)); + attr.query.target_fd = target_fd; attr.query.attach_type = type; - attr.query.query_flags = query_flags; - attr.query.prog_cnt = *prog_cnt; - attr.query.prog_ids = ptr_to_u64(prog_ids); + attr.query.query_flags = OPTS_GET(opts, query_flags, 0); + attr.query.prog_cnt = OPTS_GET(opts, prog_cnt, 0); + attr.query.prog_ids = ptr_to_u64(OPTS_GET(opts, prog_ids, NULL)); + attr.query.prog_attach_flags = ptr_to_u64(OPTS_GET(opts, prog_attach_flags, NULL)); ret = sys_bpf(BPF_PROG_QUERY, &attr, sizeof(attr)); - if (attach_flags) - *attach_flags = attr.query.attach_flags; - *prog_cnt = attr.query.prog_cnt; - - return libbpf_err_errno(ret); -} - -int bpf_prog_test_run(int prog_fd, int repeat, void *data, __u32 size, - void *data_out, __u32 *size_out, __u32 *retval, - __u32 *duration) -{ - union bpf_attr attr; - int ret; - - memset(&attr, 0, sizeof(attr)); - attr.test.prog_fd = prog_fd; - attr.test.data_in = ptr_to_u64(data); - attr.test.data_out = ptr_to_u64(data_out); - attr.test.data_size_in = size; - attr.test.repeat = repeat; - - ret = sys_bpf(BPF_PROG_TEST_RUN, &attr, sizeof(attr)); - - if (size_out) - *size_out = attr.test.data_size_out; - if (retval) - *retval = attr.test.retval; - if (duration) - *duration = attr.test.duration; + OPTS_SET(opts, attach_flags, attr.query.attach_flags); + OPTS_SET(opts, prog_cnt, attr.query.prog_cnt); return libbpf_err_errno(ret); } -int bpf_prog_test_run_xattr(struct bpf_prog_test_run_attr *test_attr) +int bpf_prog_query(int target_fd, enum bpf_attach_type type, __u32 query_flags, + __u32 *attach_flags, __u32 *prog_ids, __u32 *prog_cnt) { - union bpf_attr attr; + LIBBPF_OPTS(bpf_prog_query_opts, opts); int ret; - if (!test_attr->data_out && test_attr->data_size_out > 0) - return libbpf_err(-EINVAL); + opts.query_flags = query_flags; + opts.prog_ids = prog_ids; + opts.prog_cnt = *prog_cnt; - memset(&attr, 0, sizeof(attr)); - attr.test.prog_fd = test_attr->prog_fd; - attr.test.data_in = ptr_to_u64(test_attr->data_in); - attr.test.data_out = ptr_to_u64(test_attr->data_out); - attr.test.data_size_in = test_attr->data_size_in; - attr.test.data_size_out = test_attr->data_size_out; - attr.test.ctx_in = ptr_to_u64(test_attr->ctx_in); - attr.test.ctx_out = ptr_to_u64(test_attr->ctx_out); - attr.test.ctx_size_in = test_attr->ctx_size_in; - attr.test.ctx_size_out = test_attr->ctx_size_out; - attr.test.repeat = test_attr->repeat; + ret = bpf_prog_query_opts(target_fd, type, &opts); - ret = sys_bpf(BPF_PROG_TEST_RUN, &attr, sizeof(attr)); - - test_attr->data_size_out = attr.test.data_size_out; - test_attr->ctx_size_out = attr.test.ctx_size_out; - test_attr->retval = attr.test.retval; - test_attr->duration = attr.test.duration; + if (attach_flags) + *attach_flags = opts.attach_flags; + *prog_cnt = opts.prog_cnt; return libbpf_err_errno(ret); } @@ -1162,27 +1046,6 @@ int bpf_btf_load(const void *btf_data, size_t btf_size, const struct bpf_btf_loa return libbpf_err_errno(fd); } -int bpf_load_btf(const void *btf, __u32 btf_size, char *log_buf, __u32 log_buf_size, bool do_log) -{ - LIBBPF_OPTS(bpf_btf_load_opts, opts); - int fd; - -retry: - if (do_log && log_buf && log_buf_size) { - opts.log_buf = log_buf; - opts.log_size = log_buf_size; - opts.log_level = 1; - } - - fd = bpf_btf_load(btf, btf_size, &opts); - if (fd < 0 && !do_log && log_buf && log_buf_size) { - do_log = true; - goto retry; - } - - return libbpf_err_errno(fd); -} - int bpf_task_fd_query(int pid, int fd, __u32 flags, char *buf, __u32 *buf_len, __u32 *prog_id, __u32 *fd_type, __u64 *probe_offset, __u64 *probe_addr) diff --git a/tools/lib/bpf/bpf.h b/tools/lib/bpf/bpf.h index cabc03703e29..9c50beabdd14 100644 --- a/tools/lib/bpf/bpf.h +++ b/tools/lib/bpf/bpf.h @@ -103,54 +103,6 @@ LIBBPF_API int bpf_prog_load(enum bpf_prog_type prog_type, const char *prog_name, const char *license, const struct bpf_insn *insns, size_t insn_cnt, const struct bpf_prog_load_opts *opts); -/* this "specialization" should go away in libbpf 1.0 */ -LIBBPF_API int bpf_prog_load_v0_6_0(enum bpf_prog_type prog_type, - const char *prog_name, const char *license, - const struct bpf_insn *insns, size_t insn_cnt, - const struct bpf_prog_load_opts *opts); - -/* This is an elaborate way to not conflict with deprecated bpf_prog_load() - * API, defined in libbpf.h. Once we hit libbpf 1.0, all this will be gone. - * With this approach, if someone is calling bpf_prog_load() with - * 4 arguments, they will use the deprecated API, which keeps backwards - * compatibility (both source code and binary). If bpf_prog_load() is called - * with 6 arguments, though, it gets redirected to __bpf_prog_load. - * So looking forward to libbpf 1.0 when this hack will be gone and - * __bpf_prog_load() will be called just bpf_prog_load(). - */ -#ifndef bpf_prog_load -#define bpf_prog_load(...) ___libbpf_overload(___bpf_prog_load, __VA_ARGS__) -#define ___bpf_prog_load4(file, type, pobj, prog_fd) \ - bpf_prog_load_deprecated(file, type, pobj, prog_fd) -#define ___bpf_prog_load6(prog_type, prog_name, license, insns, insn_cnt, opts) \ - bpf_prog_load(prog_type, prog_name, license, insns, insn_cnt, opts) -#endif /* bpf_prog_load */ - -struct bpf_load_program_attr { - enum bpf_prog_type prog_type; - enum bpf_attach_type expected_attach_type; - const char *name; - const struct bpf_insn *insns; - size_t insns_cnt; - const char *license; - union { - __u32 kern_version; - __u32 attach_prog_fd; - }; - union { - __u32 prog_ifindex; - __u32 attach_btf_id; - }; - __u32 prog_btf_fd; - __u32 func_info_rec_size; - const void *func_info; - __u32 func_info_cnt; - __u32 line_info_rec_size; - const void *line_info; - __u32 line_info_cnt; - __u32 log_level; - __u32 prog_flags; -}; /* Flags to direct loading requirements */ #define MAPS_RELAX_COMPAT 0x01 @@ -158,22 +110,6 @@ struct bpf_load_program_attr { /* Recommended log buffer size */ #define BPF_LOG_BUF_SIZE (UINT32_MAX >> 8) /* verifier maximum in kernels <= 5.1 */ -LIBBPF_DEPRECATED_SINCE(0, 7, "use bpf_prog_load() instead") -LIBBPF_API int bpf_load_program_xattr(const struct bpf_load_program_attr *load_attr, - char *log_buf, size_t log_buf_sz); -LIBBPF_DEPRECATED_SINCE(0, 7, "use bpf_prog_load() instead") -LIBBPF_API int bpf_load_program(enum bpf_prog_type type, - const struct bpf_insn *insns, size_t insns_cnt, - const char *license, __u32 kern_version, - char *log_buf, size_t log_buf_sz); -LIBBPF_DEPRECATED_SINCE(0, 7, "use bpf_prog_load() instead") -LIBBPF_API int bpf_verify_program(enum bpf_prog_type type, - const struct bpf_insn *insns, - size_t insns_cnt, __u32 prog_flags, - const char *license, __u32 kern_version, - char *log_buf, size_t log_buf_sz, - int log_level); - struct bpf_btf_load_opts { size_t sz; /* size of this struct for forward/backward compatibility */ @@ -187,10 +123,6 @@ struct bpf_btf_load_opts { LIBBPF_API int bpf_btf_load(const void *btf_data, size_t btf_size, const struct bpf_btf_load_opts *opts); -LIBBPF_DEPRECATED_SINCE(0, 8, "use bpf_btf_load() instead") -LIBBPF_API int bpf_load_btf(const void *btf, __u32 btf_size, char *log_buf, - __u32 log_buf_size, bool do_log); - LIBBPF_API int bpf_map_update_elem(int fd, const void *key, const void *value, __u64 flags); @@ -338,8 +270,19 @@ LIBBPF_API int bpf_map_update_batch(int fd, const void *keys, const void *values __u32 *count, const struct bpf_map_batch_opts *opts); +struct bpf_obj_get_opts { + size_t sz; /* size of this struct for forward/backward compatibility */ + + __u32 file_flags; + + size_t :0; +}; +#define bpf_obj_get_opts__last_field file_flags + LIBBPF_API int bpf_obj_pin(int fd, const char *pathname); LIBBPF_API int bpf_obj_get(const char *pathname); +LIBBPF_API int bpf_obj_get_opts(const char *pathname, + const struct bpf_obj_get_opts *opts); struct bpf_prog_attach_opts { size_t sz; /* size of this struct for forward/backward compatibility */ @@ -353,10 +296,6 @@ LIBBPF_API int bpf_prog_attach(int prog_fd, int attachable_fd, LIBBPF_API int bpf_prog_attach_opts(int prog_fd, int attachable_fd, enum bpf_attach_type type, const struct bpf_prog_attach_opts *opts); -LIBBPF_DEPRECATED_SINCE(0, 8, "use bpf_prog_attach_opts() instead") -LIBBPF_API int bpf_prog_attach_xattr(int prog_fd, int attachable_fd, - enum bpf_attach_type type, - const struct bpf_prog_attach_opts *opts); LIBBPF_API int bpf_prog_detach(int attachable_fd, enum bpf_attach_type type); LIBBPF_API int bpf_prog_detach2(int prog_fd, int attachable_fd, enum bpf_attach_type type); @@ -422,17 +361,6 @@ struct bpf_prog_test_run_attr { * out: length of cxt_out */ }; -LIBBPF_DEPRECATED_SINCE(0, 7, "use bpf_prog_test_run_opts() instead") -LIBBPF_API int bpf_prog_test_run_xattr(struct bpf_prog_test_run_attr *test_attr); - -/* - * bpf_prog_test_run does not check that data_out is large enough. Consider - * using bpf_prog_test_run_opts instead. - */ -LIBBPF_DEPRECATED_SINCE(0, 7, "use bpf_prog_test_run_opts() instead") -LIBBPF_API int bpf_prog_test_run(int prog_fd, int repeat, void *data, - __u32 size, void *data_out, __u32 *size_out, - __u32 *retval, __u32 *duration); LIBBPF_API int bpf_prog_get_next_id(__u32 start_id, __u32 *next_id); LIBBPF_API int bpf_map_get_next_id(__u32 start_id, __u32 *next_id); LIBBPF_API int bpf_btf_get_next_id(__u32 start_id, __u32 *next_id); @@ -442,9 +370,24 @@ LIBBPF_API int bpf_map_get_fd_by_id(__u32 id); LIBBPF_API int bpf_btf_get_fd_by_id(__u32 id); LIBBPF_API int bpf_link_get_fd_by_id(__u32 id); LIBBPF_API int bpf_obj_get_info_by_fd(int bpf_fd, void *info, __u32 *info_len); + +struct bpf_prog_query_opts { + size_t sz; /* size of this struct for forward/backward compatibility */ + __u32 query_flags; + __u32 attach_flags; /* output argument */ + __u32 *prog_ids; + __u32 prog_cnt; /* input+output argument */ + __u32 *prog_attach_flags; +}; +#define bpf_prog_query_opts__last_field prog_attach_flags + +LIBBPF_API int bpf_prog_query_opts(int target_fd, + enum bpf_attach_type type, + struct bpf_prog_query_opts *opts); LIBBPF_API int bpf_prog_query(int target_fd, enum bpf_attach_type type, __u32 query_flags, __u32 *attach_flags, __u32 *prog_ids, __u32 *prog_cnt); + LIBBPF_API int bpf_raw_tracepoint_open(const char *name, int prog_fd); LIBBPF_API int bpf_task_fd_query(int pid, int fd, __u32 flags, char *buf, __u32 *buf_len, __u32 *prog_id, __u32 *fd_type, diff --git a/tools/lib/bpf/bpf_core_read.h b/tools/lib/bpf/bpf_core_read.h index fd48b1ff59ca..496e6a8ee0dc 100644 --- a/tools/lib/bpf/bpf_core_read.h +++ b/tools/lib/bpf/bpf_core_read.h @@ -29,6 +29,7 @@ enum bpf_type_id_kind { enum bpf_type_info_kind { BPF_TYPE_EXISTS = 0, /* type existence in target kernel */ BPF_TYPE_SIZE = 1, /* type size in target kernel */ + BPF_TYPE_MATCHES = 2, /* type match in target kernel */ }; /* second argument to __builtin_preserve_enum_value() built-in */ @@ -184,6 +185,16 @@ enum bpf_enum_value_kind { __builtin_preserve_type_info(*(typeof(type) *)0, BPF_TYPE_EXISTS) /* + * Convenience macro to check that provided named type + * (struct/union/enum/typedef) "matches" that in a target kernel. + * Returns: + * 1, if the type matches in the target kernel's BTF; + * 0, if the type does not match any in the target kernel + */ +#define bpf_core_type_matches(type) \ + __builtin_preserve_type_info(*(typeof(type) *)0, BPF_TYPE_MATCHES) + +/* * Convenience macro to get the byte size of a provided named type * (struct/union/enum/typedef) in a target kernel. * Returns: diff --git a/tools/lib/bpf/bpf_helpers.h b/tools/lib/bpf/bpf_helpers.h index fb04eaf367f1..7349b16b8e2f 100644 --- a/tools/lib/bpf/bpf_helpers.h +++ b/tools/lib/bpf/bpf_helpers.h @@ -22,12 +22,25 @@ * To allow use of SEC() with externs (e.g., for extern .maps declarations), * make sure __attribute__((unused)) doesn't trigger compilation warning. */ +#if __GNUC__ && !__clang__ + +/* + * Pragma macros are broken on GCC + * https://gcc.gnu.org/bugzilla/show_bug.cgi?id=55578 + * https://gcc.gnu.org/bugzilla/show_bug.cgi?id=90400 + */ +#define SEC(name) __attribute__((section(name), used)) + +#else + #define SEC(name) \ _Pragma("GCC diagnostic push") \ _Pragma("GCC diagnostic ignored \"-Wignored-attributes\"") \ __attribute__((section(name), used)) \ _Pragma("GCC diagnostic pop") \ +#endif + /* Avoid 'linux/stddef.h' definition of '__always_inline'. */ #undef __always_inline #define __always_inline inline __attribute__((always_inline)) diff --git a/tools/lib/bpf/bpf_tracing.h b/tools/lib/bpf/bpf_tracing.h index 01ce121c302d..43ca3aff2292 100644 --- a/tools/lib/bpf/bpf_tracing.h +++ b/tools/lib/bpf/bpf_tracing.h @@ -2,6 +2,8 @@ #ifndef __BPF_TRACING_H__ #define __BPF_TRACING_H__ +#include <bpf/bpf_helpers.h> + /* Scan the ARCH passed in from ARCH env variable (see Makefile) */ #if defined(__TARGET_ARCH_x86) #define bpf_target_x86 @@ -140,7 +142,7 @@ struct pt_regs___s390 { #define __PT_RC_REG gprs[2] #define __PT_SP_REG gprs[15] #define __PT_IP_REG psw.addr -#define PT_REGS_PARM1_SYSCALL(x) ({ _Pragma("GCC error \"use PT_REGS_PARM1_CORE_SYSCALL() instead\""); 0l; }) +#define PT_REGS_PARM1_SYSCALL(x) PT_REGS_PARM1_CORE_SYSCALL(x) #define PT_REGS_PARM1_CORE_SYSCALL(x) BPF_CORE_READ((const struct pt_regs___s390 *)(x), orig_gpr2) #elif defined(bpf_target_arm) @@ -174,7 +176,7 @@ struct pt_regs___arm64 { #define __PT_RC_REG regs[0] #define __PT_SP_REG sp #define __PT_IP_REG pc -#define PT_REGS_PARM1_SYSCALL(x) ({ _Pragma("GCC error \"use PT_REGS_PARM1_CORE_SYSCALL() instead\""); 0l; }) +#define PT_REGS_PARM1_SYSCALL(x) PT_REGS_PARM1_CORE_SYSCALL(x) #define PT_REGS_PARM1_CORE_SYSCALL(x) BPF_CORE_READ((const struct pt_regs___arm64 *)(x), orig_x0) #elif defined(bpf_target_mips) @@ -233,7 +235,7 @@ struct pt_regs___arm64 { #define __PT_PARM5_REG a4 #define __PT_RET_REG ra #define __PT_FP_REG s0 -#define __PT_RC_REG a5 +#define __PT_RC_REG a0 #define __PT_SP_REG sp #define __PT_IP_REG pc /* riscv does not select ARCH_HAS_SYSCALL_WRAPPER. */ @@ -493,39 +495,69 @@ typeof(name(0)) name(struct pt_regs *ctx) \ } \ static __always_inline typeof(name(0)) ____##name(struct pt_regs *ctx, ##args) +/* If kernel has CONFIG_ARCH_HAS_SYSCALL_WRAPPER, read pt_regs directly */ #define ___bpf_syscall_args0() ctx -#define ___bpf_syscall_args1(x) ___bpf_syscall_args0(), (void *)PT_REGS_PARM1_CORE_SYSCALL(regs) -#define ___bpf_syscall_args2(x, args...) ___bpf_syscall_args1(args), (void *)PT_REGS_PARM2_CORE_SYSCALL(regs) -#define ___bpf_syscall_args3(x, args...) ___bpf_syscall_args2(args), (void *)PT_REGS_PARM3_CORE_SYSCALL(regs) -#define ___bpf_syscall_args4(x, args...) ___bpf_syscall_args3(args), (void *)PT_REGS_PARM4_CORE_SYSCALL(regs) -#define ___bpf_syscall_args5(x, args...) ___bpf_syscall_args4(args), (void *)PT_REGS_PARM5_CORE_SYSCALL(regs) +#define ___bpf_syscall_args1(x) ___bpf_syscall_args0(), (void *)PT_REGS_PARM1_SYSCALL(regs) +#define ___bpf_syscall_args2(x, args...) ___bpf_syscall_args1(args), (void *)PT_REGS_PARM2_SYSCALL(regs) +#define ___bpf_syscall_args3(x, args...) ___bpf_syscall_args2(args), (void *)PT_REGS_PARM3_SYSCALL(regs) +#define ___bpf_syscall_args4(x, args...) ___bpf_syscall_args3(args), (void *)PT_REGS_PARM4_SYSCALL(regs) +#define ___bpf_syscall_args5(x, args...) ___bpf_syscall_args4(args), (void *)PT_REGS_PARM5_SYSCALL(regs) #define ___bpf_syscall_args(args...) ___bpf_apply(___bpf_syscall_args, ___bpf_narg(args))(args) +/* If kernel doesn't have CONFIG_ARCH_HAS_SYSCALL_WRAPPER, we have to BPF_CORE_READ from pt_regs */ +#define ___bpf_syswrap_args0() ctx +#define ___bpf_syswrap_args1(x) ___bpf_syswrap_args0(), (void *)PT_REGS_PARM1_CORE_SYSCALL(regs) +#define ___bpf_syswrap_args2(x, args...) ___bpf_syswrap_args1(args), (void *)PT_REGS_PARM2_CORE_SYSCALL(regs) +#define ___bpf_syswrap_args3(x, args...) ___bpf_syswrap_args2(args), (void *)PT_REGS_PARM3_CORE_SYSCALL(regs) +#define ___bpf_syswrap_args4(x, args...) ___bpf_syswrap_args3(args), (void *)PT_REGS_PARM4_CORE_SYSCALL(regs) +#define ___bpf_syswrap_args5(x, args...) ___bpf_syswrap_args4(args), (void *)PT_REGS_PARM5_CORE_SYSCALL(regs) +#define ___bpf_syswrap_args(args...) ___bpf_apply(___bpf_syswrap_args, ___bpf_narg(args))(args) + /* - * BPF_KPROBE_SYSCALL is a variant of BPF_KPROBE, which is intended for + * BPF_KSYSCALL is a variant of BPF_KPROBE, which is intended for * tracing syscall functions, like __x64_sys_close. It hides the underlying * platform-specific low-level way of getting syscall input arguments from * struct pt_regs, and provides a familiar typed and named function arguments * syntax and semantics of accessing syscall input parameters. * - * Original struct pt_regs* context is preserved as 'ctx' argument. This might + * Original struct pt_regs * context is preserved as 'ctx' argument. This might * be necessary when using BPF helpers like bpf_perf_event_output(). * - * This macro relies on BPF CO-RE support. + * At the moment BPF_KSYSCALL does not transparently handle all the calling + * convention quirks for the following syscalls: + * + * - mmap(): __ARCH_WANT_SYS_OLD_MMAP. + * - clone(): CONFIG_CLONE_BACKWARDS, CONFIG_CLONE_BACKWARDS2 and + * CONFIG_CLONE_BACKWARDS3. + * - socket-related syscalls: __ARCH_WANT_SYS_SOCKETCALL. + * - compat syscalls. + * + * This may or may not change in the future. User needs to take extra measures + * to handle such quirks explicitly, if necessary. + * + * This macro relies on BPF CO-RE support and virtual __kconfig externs. */ -#define BPF_KPROBE_SYSCALL(name, args...) \ +#define BPF_KSYSCALL(name, args...) \ name(struct pt_regs *ctx); \ +extern _Bool LINUX_HAS_SYSCALL_WRAPPER __kconfig; \ static __attribute__((always_inline)) typeof(name(0)) \ ____##name(struct pt_regs *ctx, ##args); \ typeof(name(0)) name(struct pt_regs *ctx) \ { \ - struct pt_regs *regs = PT_REGS_SYSCALL_REGS(ctx); \ + struct pt_regs *regs = LINUX_HAS_SYSCALL_WRAPPER \ + ? (struct pt_regs *)PT_REGS_PARM1(ctx) \ + : ctx; \ _Pragma("GCC diagnostic push") \ _Pragma("GCC diagnostic ignored \"-Wint-conversion\"") \ - return ____##name(___bpf_syscall_args(args)); \ + if (LINUX_HAS_SYSCALL_WRAPPER) \ + return ____##name(___bpf_syswrap_args(args)); \ + else \ + return ____##name(___bpf_syscall_args(args)); \ _Pragma("GCC diagnostic pop") \ } \ static __attribute__((always_inline)) typeof(name(0)) \ ____##name(struct pt_regs *ctx, ##args) +#define BPF_KPROBE_SYSCALL BPF_KSYSCALL + #endif diff --git a/tools/lib/bpf/btf.c b/tools/lib/bpf/btf.c index bb1e06eb1eca..2d14f1a52d7a 100644 --- a/tools/lib/bpf/btf.c +++ b/tools/lib/bpf/btf.c @@ -130,7 +130,7 @@ static inline __u64 ptr_to_u64(const void *ptr) /* Ensure given dynamically allocated memory region pointed to by *data* with * capacity of *cap_cnt* elements each taking *elem_sz* bytes has enough - * memory to accomodate *add_cnt* new elements, assuming *cur_cnt* elements + * memory to accommodate *add_cnt* new elements, assuming *cur_cnt* elements * are already used. At most *max_cnt* elements can be ever allocated. * If necessary, memory is reallocated and all existing data is copied over, * new pointer to the memory region is stored at *data, new memory region @@ -305,6 +305,8 @@ static int btf_type_size(const struct btf_type *t) return base_size + sizeof(__u32); case BTF_KIND_ENUM: return base_size + vlen * sizeof(struct btf_enum); + case BTF_KIND_ENUM64: + return base_size + vlen * sizeof(struct btf_enum64); case BTF_KIND_ARRAY: return base_size + sizeof(struct btf_array); case BTF_KIND_STRUCT: @@ -334,6 +336,7 @@ static void btf_bswap_type_base(struct btf_type *t) static int btf_bswap_type_rest(struct btf_type *t) { struct btf_var_secinfo *v; + struct btf_enum64 *e64; struct btf_member *m; struct btf_array *a; struct btf_param *p; @@ -361,6 +364,13 @@ static int btf_bswap_type_rest(struct btf_type *t) e->val = bswap_32(e->val); } return 0; + case BTF_KIND_ENUM64: + for (i = 0, e64 = btf_enum64(t); i < vlen; i++, e64++) { + e64->name_off = bswap_32(e64->name_off); + e64->val_lo32 = bswap_32(e64->val_lo32); + e64->val_hi32 = bswap_32(e64->val_hi32); + } + return 0; case BTF_KIND_ARRAY: a = btf_array(t); a->type = bswap_32(a->type); @@ -438,11 +448,6 @@ static int btf_parse_type_sec(struct btf *btf) return 0; } -__u32 btf__get_nr_types(const struct btf *btf) -{ - return btf->start_id + btf->nr_types - 1; -} - __u32 btf__type_cnt(const struct btf *btf) { return btf->start_id + btf->nr_types; @@ -472,9 +477,22 @@ const struct btf_type *btf__type_by_id(const struct btf *btf, __u32 type_id) static int determine_ptr_size(const struct btf *btf) { + static const char * const long_aliases[] = { + "long", + "long int", + "int long", + "unsigned long", + "long unsigned", + "unsigned long int", + "unsigned int long", + "long unsigned int", + "long int unsigned", + "int unsigned long", + "int long unsigned", + }; const struct btf_type *t; const char *name; - int i, n; + int i, j, n; if (btf->base_btf && btf->base_btf->ptr_sz > 0) return btf->base_btf->ptr_sz; @@ -485,15 +503,16 @@ static int determine_ptr_size(const struct btf *btf) if (!btf_is_int(t)) continue; + if (t->size != 4 && t->size != 8) + continue; + name = btf__name_by_offset(btf, t->name_off); if (!name) continue; - if (strcmp(name, "long int") == 0 || - strcmp(name, "long unsigned int") == 0) { - if (t->size != 4 && t->size != 8) - continue; - return t->size; + for (j = 0; j < ARRAY_SIZE(long_aliases); j++) { + if (strcmp(name, long_aliases[j]) == 0) + return t->size; } } @@ -597,6 +616,7 @@ __s64 btf__resolve_size(const struct btf *btf, __u32 type_id) case BTF_KIND_STRUCT: case BTF_KIND_UNION: case BTF_KIND_ENUM: + case BTF_KIND_ENUM64: case BTF_KIND_DATASEC: case BTF_KIND_FLOAT: size = t->size; @@ -644,6 +664,7 @@ int btf__align_of(const struct btf *btf, __u32 id) switch (kind) { case BTF_KIND_INT: case BTF_KIND_ENUM: + case BTF_KIND_ENUM64: case BTF_KIND_FLOAT: return min(btf_ptr_sz(btf), (size_t)t->size); case BTF_KIND_PTR: @@ -1382,92 +1403,6 @@ struct btf *btf__load_from_kernel_by_id(__u32 id) return btf__load_from_kernel_by_id_split(id, NULL); } -int btf__get_from_id(__u32 id, struct btf **btf) -{ - struct btf *res; - int err; - - *btf = NULL; - res = btf__load_from_kernel_by_id(id); - err = libbpf_get_error(res); - - if (err) - return libbpf_err(err); - - *btf = res; - return 0; -} - -int btf__get_map_kv_tids(const struct btf *btf, const char *map_name, - __u32 expected_key_size, __u32 expected_value_size, - __u32 *key_type_id, __u32 *value_type_id) -{ - const struct btf_type *container_type; - const struct btf_member *key, *value; - const size_t max_name = 256; - char container_name[max_name]; - __s64 key_size, value_size; - __s32 container_id; - - if (snprintf(container_name, max_name, "____btf_map_%s", map_name) == max_name) { - pr_warn("map:%s length of '____btf_map_%s' is too long\n", - map_name, map_name); - return libbpf_err(-EINVAL); - } - - container_id = btf__find_by_name(btf, container_name); - if (container_id < 0) { - pr_debug("map:%s container_name:%s cannot be found in BTF. Missing BPF_ANNOTATE_KV_PAIR?\n", - map_name, container_name); - return libbpf_err(container_id); - } - - container_type = btf__type_by_id(btf, container_id); - if (!container_type) { - pr_warn("map:%s cannot find BTF type for container_id:%u\n", - map_name, container_id); - return libbpf_err(-EINVAL); - } - - if (!btf_is_struct(container_type) || btf_vlen(container_type) < 2) { - pr_warn("map:%s container_name:%s is an invalid container struct\n", - map_name, container_name); - return libbpf_err(-EINVAL); - } - - key = btf_members(container_type); - value = key + 1; - - key_size = btf__resolve_size(btf, key->type); - if (key_size < 0) { - pr_warn("map:%s invalid BTF key_type_size\n", map_name); - return libbpf_err(key_size); - } - - if (expected_key_size != key_size) { - pr_warn("map:%s btf_key_type_size:%u != map_def_key_size:%u\n", - map_name, (__u32)key_size, expected_key_size); - return libbpf_err(-EINVAL); - } - - value_size = btf__resolve_size(btf, value->type); - if (value_size < 0) { - pr_warn("map:%s invalid BTF value_type_size\n", map_name); - return libbpf_err(value_size); - } - - if (expected_value_size != value_size) { - pr_warn("map:%s btf_value_type_size:%u != map_def_value_size:%u\n", - map_name, (__u32)value_size, expected_value_size); - return libbpf_err(-EINVAL); - } - - *key_type_id = key->type; - *value_type_id = value->type; - - return 0; -} - static void btf_invalidate_raw_data(struct btf *btf) { if (btf->raw_data) { @@ -2115,20 +2050,8 @@ int btf__add_field(struct btf *btf, const char *name, int type_id, return 0; } -/* - * Append new BTF_KIND_ENUM type with: - * - *name* - name of the enum, can be NULL or empty for anonymous enums; - * - *byte_sz* - size of the enum, in bytes. - * - * Enum initially has no enum values in it (and corresponds to enum forward - * declaration). Enumerator values can be added by btf__add_enum_value() - * immediately after btf__add_enum() succeeds. - * - * Returns: - * - >0, type ID of newly added BTF type; - * - <0, on error. - */ -int btf__add_enum(struct btf *btf, const char *name, __u32 byte_sz) +static int btf_add_enum_common(struct btf *btf, const char *name, __u32 byte_sz, + bool is_signed, __u8 kind) { struct btf_type *t; int sz, name_off = 0; @@ -2153,13 +2076,35 @@ int btf__add_enum(struct btf *btf, const char *name, __u32 byte_sz) /* start out with vlen=0; it will be adjusted when adding enum values */ t->name_off = name_off; - t->info = btf_type_info(BTF_KIND_ENUM, 0, 0); + t->info = btf_type_info(kind, 0, is_signed); t->size = byte_sz; return btf_commit_type(btf, sz); } /* + * Append new BTF_KIND_ENUM type with: + * - *name* - name of the enum, can be NULL or empty for anonymous enums; + * - *byte_sz* - size of the enum, in bytes. + * + * Enum initially has no enum values in it (and corresponds to enum forward + * declaration). Enumerator values can be added by btf__add_enum_value() + * immediately after btf__add_enum() succeeds. + * + * Returns: + * - >0, type ID of newly added BTF type; + * - <0, on error. + */ +int btf__add_enum(struct btf *btf, const char *name, __u32 byte_sz) +{ + /* + * set the signedness to be unsigned, it will change to signed + * if any later enumerator is negative. + */ + return btf_add_enum_common(btf, name, byte_sz, false, BTF_KIND_ENUM); +} + +/* * Append new enum value for the current ENUM type with: * - *name* - name of the enumerator value, can't be NULL or empty; * - *value* - integer value corresponding to enum value *name*; @@ -2206,6 +2151,82 @@ int btf__add_enum_value(struct btf *btf, const char *name, __s64 value) t = btf_last_type(btf); btf_type_inc_vlen(t); + /* if negative value, set signedness to signed */ + if (value < 0) + t->info = btf_type_info(btf_kind(t), btf_vlen(t), true); + + btf->hdr->type_len += sz; + btf->hdr->str_off += sz; + return 0; +} + +/* + * Append new BTF_KIND_ENUM64 type with: + * - *name* - name of the enum, can be NULL or empty for anonymous enums; + * - *byte_sz* - size of the enum, in bytes. + * - *is_signed* - whether the enum values are signed or not; + * + * Enum initially has no enum values in it (and corresponds to enum forward + * declaration). Enumerator values can be added by btf__add_enum64_value() + * immediately after btf__add_enum64() succeeds. + * + * Returns: + * - >0, type ID of newly added BTF type; + * - <0, on error. + */ +int btf__add_enum64(struct btf *btf, const char *name, __u32 byte_sz, + bool is_signed) +{ + return btf_add_enum_common(btf, name, byte_sz, is_signed, + BTF_KIND_ENUM64); +} + +/* + * Append new enum value for the current ENUM64 type with: + * - *name* - name of the enumerator value, can't be NULL or empty; + * - *value* - integer value corresponding to enum value *name*; + * Returns: + * - 0, on success; + * - <0, on error. + */ +int btf__add_enum64_value(struct btf *btf, const char *name, __u64 value) +{ + struct btf_enum64 *v; + struct btf_type *t; + int sz, name_off; + + /* last type should be BTF_KIND_ENUM64 */ + if (btf->nr_types == 0) + return libbpf_err(-EINVAL); + t = btf_last_type(btf); + if (!btf_is_enum64(t)) + return libbpf_err(-EINVAL); + + /* non-empty name */ + if (!name || !name[0]) + return libbpf_err(-EINVAL); + + /* decompose and invalidate raw data */ + if (btf_ensure_modifiable(btf)) + return libbpf_err(-ENOMEM); + + sz = sizeof(struct btf_enum64); + v = btf_add_type_mem(btf, sz); + if (!v) + return libbpf_err(-ENOMEM); + + name_off = btf__add_str(btf, name); + if (name_off < 0) + return name_off; + + v->name_off = name_off; + v->val_lo32 = (__u32)value; + v->val_hi32 = value >> 32; + + /* update parent type's vlen */ + t = btf_last_type(btf); + btf_type_inc_vlen(t); + btf->hdr->type_len += sz; btf->hdr->str_off += sz; return 0; @@ -2853,81 +2874,6 @@ const void *btf_ext__get_raw_data(const struct btf_ext *btf_ext, __u32 *size) return btf_ext->data; } -static int btf_ext_reloc_info(const struct btf *btf, - const struct btf_ext_info *ext_info, - const char *sec_name, __u32 insns_cnt, - void **info, __u32 *cnt) -{ - __u32 sec_hdrlen = sizeof(struct btf_ext_info_sec); - __u32 i, record_size, existing_len, records_len; - struct btf_ext_info_sec *sinfo; - const char *info_sec_name; - __u64 remain_len; - void *data; - - record_size = ext_info->rec_size; - sinfo = ext_info->info; - remain_len = ext_info->len; - while (remain_len > 0) { - records_len = sinfo->num_info * record_size; - info_sec_name = btf__name_by_offset(btf, sinfo->sec_name_off); - if (strcmp(info_sec_name, sec_name)) { - remain_len -= sec_hdrlen + records_len; - sinfo = (void *)sinfo + sec_hdrlen + records_len; - continue; - } - - existing_len = (*cnt) * record_size; - data = realloc(*info, existing_len + records_len); - if (!data) - return libbpf_err(-ENOMEM); - - memcpy(data + existing_len, sinfo->data, records_len); - /* adjust insn_off only, the rest data will be passed - * to the kernel. - */ - for (i = 0; i < sinfo->num_info; i++) { - __u32 *insn_off; - - insn_off = data + existing_len + (i * record_size); - *insn_off = *insn_off / sizeof(struct bpf_insn) + insns_cnt; - } - *info = data; - *cnt += sinfo->num_info; - return 0; - } - - return libbpf_err(-ENOENT); -} - -int btf_ext__reloc_func_info(const struct btf *btf, - const struct btf_ext *btf_ext, - const char *sec_name, __u32 insns_cnt, - void **func_info, __u32 *cnt) -{ - return btf_ext_reloc_info(btf, &btf_ext->func_info, sec_name, - insns_cnt, func_info, cnt); -} - -int btf_ext__reloc_line_info(const struct btf *btf, - const struct btf_ext *btf_ext, - const char *sec_name, __u32 insns_cnt, - void **line_info, __u32 *cnt) -{ - return btf_ext_reloc_info(btf, &btf_ext->line_info, sec_name, - insns_cnt, line_info, cnt); -} - -__u32 btf_ext__func_info_rec_size(const struct btf_ext *btf_ext) -{ - return btf_ext->func_info.rec_size; -} - -__u32 btf_ext__line_info_rec_size(const struct btf_ext *btf_ext) -{ - return btf_ext->line_info.rec_size; -} - struct btf_dedup; static struct btf_dedup *btf_dedup_new(struct btf *btf, const struct btf_dedup_opts *opts); @@ -3077,9 +3023,7 @@ static int btf_dedup_remap_types(struct btf_dedup *d); * deduplicating structs/unions is described in greater details in comments for * `btf_dedup_is_equiv` function. */ - -DEFAULT_VERSION(btf__dedup_v0_6_0, btf__dedup, LIBBPF_0.6.0) -int btf__dedup_v0_6_0(struct btf *btf, const struct btf_dedup_opts *opts) +int btf__dedup(struct btf *btf, const struct btf_dedup_opts *opts) { struct btf_dedup *d; int err; @@ -3139,19 +3083,6 @@ done: return libbpf_err(err); } -COMPAT_VERSION(btf__dedup_deprecated, btf__dedup, LIBBPF_0.0.2) -int btf__dedup_deprecated(struct btf *btf, struct btf_ext *btf_ext, const void *unused_opts) -{ - LIBBPF_OPTS(btf_dedup_opts, opts, .btf_ext = btf_ext); - - if (unused_opts) { - pr_warn("please use new version of btf__dedup() that supports options\n"); - return libbpf_err(-ENOTSUP); - } - - return btf__dedup(btf, &opts); -} - #define BTF_UNPROCESSED_ID ((__u32)-1) #define BTF_IN_PROGRESS_ID ((__u32)-2) @@ -3470,7 +3401,7 @@ static bool btf_equal_int_tag(struct btf_type *t1, struct btf_type *t2) return info1 == info2; } -/* Calculate type signature hash of ENUM. */ +/* Calculate type signature hash of ENUM/ENUM64. */ static long btf_hash_enum(struct btf_type *t) { long h; @@ -3504,9 +3435,31 @@ static bool btf_equal_enum(struct btf_type *t1, struct btf_type *t2) return true; } +static bool btf_equal_enum64(struct btf_type *t1, struct btf_type *t2) +{ + const struct btf_enum64 *m1, *m2; + __u16 vlen; + int i; + + if (!btf_equal_common(t1, t2)) + return false; + + vlen = btf_vlen(t1); + m1 = btf_enum64(t1); + m2 = btf_enum64(t2); + for (i = 0; i < vlen; i++) { + if (m1->name_off != m2->name_off || m1->val_lo32 != m2->val_lo32 || + m1->val_hi32 != m2->val_hi32) + return false; + m1++; + m2++; + } + return true; +} + static inline bool btf_is_enum_fwd(struct btf_type *t) { - return btf_is_enum(t) && btf_vlen(t) == 0; + return btf_is_any_enum(t) && btf_vlen(t) == 0; } static bool btf_compat_enum(struct btf_type *t1, struct btf_type *t2) @@ -3519,6 +3472,17 @@ static bool btf_compat_enum(struct btf_type *t1, struct btf_type *t2) t1->size == t2->size; } +static bool btf_compat_enum64(struct btf_type *t1, struct btf_type *t2) +{ + if (!btf_is_enum_fwd(t1) && !btf_is_enum_fwd(t2)) + return btf_equal_enum64(t1, t2); + + /* ignore vlen when comparing */ + return t1->name_off == t2->name_off && + (t1->info & ~0xffff) == (t2->info & ~0xffff) && + t1->size == t2->size; +} + /* * Calculate type signature hash of STRUCT/UNION, ignoring referenced type IDs, * as referenced type IDs equivalence is established separately during type @@ -3731,6 +3695,7 @@ static int btf_dedup_prep(struct btf_dedup *d) h = btf_hash_int_decl_tag(t); break; case BTF_KIND_ENUM: + case BTF_KIND_ENUM64: h = btf_hash_enum(t); break; case BTF_KIND_STRUCT: @@ -3820,6 +3785,27 @@ static int btf_dedup_prim_type(struct btf_dedup *d, __u32 type_id) } break; + case BTF_KIND_ENUM64: + h = btf_hash_enum(t); + for_each_dedup_cand(d, hash_entry, h) { + cand_id = (__u32)(long)hash_entry->value; + cand = btf_type_by_id(d->btf, cand_id); + if (btf_equal_enum64(t, cand)) { + new_id = cand_id; + break; + } + if (btf_compat_enum64(t, cand)) { + if (btf_is_enum_fwd(t)) { + /* resolve fwd to full enum */ + new_id = cand_id; + break; + } + /* resolve canonical enum fwd to full enum */ + d->map[cand_id] = type_id; + } + } + break; + case BTF_KIND_FWD: case BTF_KIND_FLOAT: h = btf_hash_common(t); @@ -4115,6 +4101,9 @@ static int btf_dedup_is_equiv(struct btf_dedup *d, __u32 cand_id, case BTF_KIND_ENUM: return btf_compat_enum(cand_type, canon_type); + case BTF_KIND_ENUM64: + return btf_compat_enum64(cand_type, canon_type); + case BTF_KIND_FWD: case BTF_KIND_FLOAT: return btf_equal_common(cand_type, canon_type); @@ -4717,6 +4706,7 @@ int btf_type_visit_type_ids(struct btf_type *t, type_id_visit_fn visit, void *ct case BTF_KIND_INT: case BTF_KIND_FLOAT: case BTF_KIND_ENUM: + case BTF_KIND_ENUM64: return 0; case BTF_KIND_FWD: @@ -4811,6 +4801,16 @@ int btf_type_visit_str_offs(struct btf_type *t, str_off_visit_fn visit, void *ct } break; } + case BTF_KIND_ENUM64: { + struct btf_enum64 *m = btf_enum64(t); + + for (i = 0, n = btf_vlen(t); i < n; i++, m++) { + err = visit(&m->name_off, ctx); + if (err) + return err; + } + break; + } case BTF_KIND_FUNC_PROTO: { struct btf_param *m = btf_params(t); diff --git a/tools/lib/bpf/btf.h b/tools/lib/bpf/btf.h index 951ac7475794..583760df83b4 100644 --- a/tools/lib/bpf/btf.h +++ b/tools/lib/bpf/btf.h @@ -120,20 +120,12 @@ LIBBPF_API struct btf *libbpf_find_kernel_btf(void); LIBBPF_API struct btf *btf__load_from_kernel_by_id(__u32 id); LIBBPF_API struct btf *btf__load_from_kernel_by_id_split(__u32 id, struct btf *base_btf); -LIBBPF_DEPRECATED_SINCE(0, 6, "use btf__load_from_kernel_by_id instead") -LIBBPF_API int btf__get_from_id(__u32 id, struct btf **btf); -LIBBPF_DEPRECATED_SINCE(0, 6, "intended for internal libbpf use only") -LIBBPF_API int btf__finalize_data(struct bpf_object *obj, struct btf *btf); -LIBBPF_DEPRECATED_SINCE(0, 6, "use btf__load_into_kernel instead") -LIBBPF_API int btf__load(struct btf *btf); LIBBPF_API int btf__load_into_kernel(struct btf *btf); LIBBPF_API __s32 btf__find_by_name(const struct btf *btf, const char *type_name); LIBBPF_API __s32 btf__find_by_name_kind(const struct btf *btf, const char *type_name, __u32 kind); -LIBBPF_DEPRECATED_SINCE(0, 7, "use btf__type_cnt() instead; note that btf__get_nr_types() == btf__type_cnt() - 1") -LIBBPF_API __u32 btf__get_nr_types(const struct btf *btf); LIBBPF_API __u32 btf__type_cnt(const struct btf *btf); LIBBPF_API const struct btf *btf__base_btf(const struct btf *btf); LIBBPF_API const struct btf_type *btf__type_by_id(const struct btf *btf, @@ -150,29 +142,10 @@ LIBBPF_API void btf__set_fd(struct btf *btf, int fd); LIBBPF_API const void *btf__raw_data(const struct btf *btf, __u32 *size); LIBBPF_API const char *btf__name_by_offset(const struct btf *btf, __u32 offset); LIBBPF_API const char *btf__str_by_offset(const struct btf *btf, __u32 offset); -LIBBPF_DEPRECATED_SINCE(0, 7, "this API is not necessary when BTF-defined maps are used") -LIBBPF_API int btf__get_map_kv_tids(const struct btf *btf, const char *map_name, - __u32 expected_key_size, - __u32 expected_value_size, - __u32 *key_type_id, __u32 *value_type_id); LIBBPF_API struct btf_ext *btf_ext__new(const __u8 *data, __u32 size); LIBBPF_API void btf_ext__free(struct btf_ext *btf_ext); LIBBPF_API const void *btf_ext__raw_data(const struct btf_ext *btf_ext, __u32 *size); -LIBBPF_API LIBBPF_DEPRECATED("btf_ext__reloc_func_info was never meant as a public API and has wrong assumptions embedded in it; it will be removed in the future libbpf versions") -int btf_ext__reloc_func_info(const struct btf *btf, - const struct btf_ext *btf_ext, - const char *sec_name, __u32 insns_cnt, - void **func_info, __u32 *cnt); -LIBBPF_API LIBBPF_DEPRECATED("btf_ext__reloc_line_info was never meant as a public API and has wrong assumptions embedded in it; it will be removed in the future libbpf versions") -int btf_ext__reloc_line_info(const struct btf *btf, - const struct btf_ext *btf_ext, - const char *sec_name, __u32 insns_cnt, - void **line_info, __u32 *cnt); -LIBBPF_API LIBBPF_DEPRECATED("btf_ext__reloc_func_info is deprecated; write custom func_info parsing to fetch rec_size") -__u32 btf_ext__func_info_rec_size(const struct btf_ext *btf_ext); -LIBBPF_API LIBBPF_DEPRECATED("btf_ext__reloc_line_info is deprecated; write custom line_info parsing to fetch rec_size") -__u32 btf_ext__line_info_rec_size(const struct btf_ext *btf_ext); LIBBPF_API int btf__find_str(struct btf *btf, const char *s); LIBBPF_API int btf__add_str(struct btf *btf, const char *s); @@ -215,6 +188,8 @@ LIBBPF_API int btf__add_field(struct btf *btf, const char *name, int field_type_ /* enum construction APIs */ LIBBPF_API int btf__add_enum(struct btf *btf, const char *name, __u32 bytes_sz); LIBBPF_API int btf__add_enum_value(struct btf *btf, const char *name, __s64 value); +LIBBPF_API int btf__add_enum64(struct btf *btf, const char *name, __u32 bytes_sz, bool is_signed); +LIBBPF_API int btf__add_enum64_value(struct btf *btf, const char *name, __u64 value); enum btf_fwd_kind { BTF_FWD_STRUCT = 0, @@ -257,22 +232,12 @@ struct btf_dedup_opts { LIBBPF_API int btf__dedup(struct btf *btf, const struct btf_dedup_opts *opts); -LIBBPF_API int btf__dedup_v0_6_0(struct btf *btf, const struct btf_dedup_opts *opts); - -LIBBPF_DEPRECATED_SINCE(0, 7, "use btf__dedup() instead") -LIBBPF_API int btf__dedup_deprecated(struct btf *btf, struct btf_ext *btf_ext, const void *opts); -#define btf__dedup(...) ___libbpf_overload(___btf_dedup, __VA_ARGS__) -#define ___btf_dedup3(btf, btf_ext, opts) btf__dedup_deprecated(btf, btf_ext, opts) -#define ___btf_dedup2(btf, opts) btf__dedup(btf, opts) - struct btf_dump; struct btf_dump_opts { - union { - size_t sz; - void *ctx; /* DEPRECATED: will be gone in v1.0 */ - }; + size_t sz; }; +#define btf_dump_opts__last_field sz typedef void (*btf_dump_printf_fn_t)(void *ctx, const char *fmt, va_list args); @@ -281,51 +246,6 @@ LIBBPF_API struct btf_dump *btf_dump__new(const struct btf *btf, void *ctx, const struct btf_dump_opts *opts); -LIBBPF_API struct btf_dump *btf_dump__new_v0_6_0(const struct btf *btf, - btf_dump_printf_fn_t printf_fn, - void *ctx, - const struct btf_dump_opts *opts); - -LIBBPF_API struct btf_dump *btf_dump__new_deprecated(const struct btf *btf, - const struct btf_ext *btf_ext, - const struct btf_dump_opts *opts, - btf_dump_printf_fn_t printf_fn); - -/* Choose either btf_dump__new() or btf_dump__new_deprecated() based on the - * type of 4th argument. If it's btf_dump's print callback, use deprecated - * API; otherwise, choose the new btf_dump__new(). ___libbpf_override() - * doesn't work here because both variants have 4 input arguments. - * - * (void *) casts are necessary to avoid compilation warnings about type - * mismatches, because even though __builtin_choose_expr() only ever evaluates - * one side the other side still has to satisfy type constraints (this is - * compiler implementation limitation which might be lifted eventually, - * according to the documentation). So passing struct btf_ext in place of - * btf_dump_printf_fn_t would be generating compilation warning. Casting to - * void * avoids this issue. - * - * Also, two type compatibility checks for a function and function pointer are - * required because passing function reference into btf_dump__new() as - * btf_dump__new(..., my_callback, ...) and as btf_dump__new(..., - * &my_callback, ...) (not explicit ampersand in the latter case) actually - * differs as far as __builtin_types_compatible_p() is concerned. Thus two - * checks are combined to detect callback argument. - * - * The rest works just like in case of ___libbpf_override() usage with symbol - * versioning. - * - * C++ compilers don't support __builtin_types_compatible_p(), so at least - * don't screw up compilation for them and let C++ users pick btf_dump__new - * vs btf_dump__new_deprecated explicitly. - */ -#ifndef __cplusplus -#define btf_dump__new(a1, a2, a3, a4) __builtin_choose_expr( \ - __builtin_types_compatible_p(typeof(a4), btf_dump_printf_fn_t) || \ - __builtin_types_compatible_p(typeof(a4), void(void *, const char *, va_list)), \ - btf_dump__new_deprecated((void *)a1, (void *)a2, (void *)a3, (void *)a4), \ - btf_dump__new((void *)a1, (void *)a2, (void *)a3, (void *)a4)) -#endif - LIBBPF_API void btf_dump__free(struct btf_dump *d); LIBBPF_API int btf_dump__dump_type(struct btf_dump *d, __u32 id); @@ -393,9 +313,10 @@ btf_dump__dump_type_data(struct btf_dump *d, __u32 id, #ifndef BTF_KIND_FLOAT #define BTF_KIND_FLOAT 16 /* Floating point */ #endif -/* The kernel header switched to enums, so these two were never #defined */ +/* The kernel header switched to enums, so the following were never #defined */ #define BTF_KIND_DECL_TAG 17 /* Decl Tag */ #define BTF_KIND_TYPE_TAG 18 /* Type Tag */ +#define BTF_KIND_ENUM64 19 /* Enum for up-to 64bit values */ static inline __u16 btf_kind(const struct btf_type *t) { @@ -454,6 +375,11 @@ static inline bool btf_is_enum(const struct btf_type *t) return btf_kind(t) == BTF_KIND_ENUM; } +static inline bool btf_is_enum64(const struct btf_type *t) +{ + return btf_kind(t) == BTF_KIND_ENUM64; +} + static inline bool btf_is_fwd(const struct btf_type *t) { return btf_kind(t) == BTF_KIND_FWD; @@ -524,6 +450,18 @@ static inline bool btf_is_type_tag(const struct btf_type *t) return btf_kind(t) == BTF_KIND_TYPE_TAG; } +static inline bool btf_is_any_enum(const struct btf_type *t) +{ + return btf_is_enum(t) || btf_is_enum64(t); +} + +static inline bool btf_kind_core_compat(const struct btf_type *t1, + const struct btf_type *t2) +{ + return btf_kind(t1) == btf_kind(t2) || + (btf_is_any_enum(t1) && btf_is_any_enum(t2)); +} + static inline __u8 btf_int_encoding(const struct btf_type *t) { return BTF_INT_ENCODING(*(__u32 *)(t + 1)); @@ -549,6 +487,16 @@ static inline struct btf_enum *btf_enum(const struct btf_type *t) return (struct btf_enum *)(t + 1); } +static inline struct btf_enum64 *btf_enum64(const struct btf_type *t) +{ + return (struct btf_enum64 *)(t + 1); +} + +static inline __u64 btf_enum64_value(const struct btf_enum64 *e) +{ + return ((__u64)e->val_hi32 << 32) | e->val_lo32; +} + static inline struct btf_member *btf_members(const struct btf_type *t) { return (struct btf_member *)(t + 1); diff --git a/tools/lib/bpf/btf_dump.c b/tools/lib/bpf/btf_dump.c index 6b1bc1f43728..627edb5bb6de 100644 --- a/tools/lib/bpf/btf_dump.c +++ b/tools/lib/bpf/btf_dump.c @@ -144,15 +144,17 @@ static void btf_dump_printf(const struct btf_dump *d, const char *fmt, ...) static int btf_dump_mark_referenced(struct btf_dump *d); static int btf_dump_resize(struct btf_dump *d); -DEFAULT_VERSION(btf_dump__new_v0_6_0, btf_dump__new, LIBBPF_0.6.0) -struct btf_dump *btf_dump__new_v0_6_0(const struct btf *btf, - btf_dump_printf_fn_t printf_fn, - void *ctx, - const struct btf_dump_opts *opts) +struct btf_dump *btf_dump__new(const struct btf *btf, + btf_dump_printf_fn_t printf_fn, + void *ctx, + const struct btf_dump_opts *opts) { struct btf_dump *d; int err; + if (!OPTS_VALID(opts, btf_dump_opts)) + return libbpf_err_ptr(-EINVAL); + if (!printf_fn) return libbpf_err_ptr(-EINVAL); @@ -188,17 +190,6 @@ err: return libbpf_err_ptr(err); } -COMPAT_VERSION(btf_dump__new_deprecated, btf_dump__new, LIBBPF_0.0.4) -struct btf_dump *btf_dump__new_deprecated(const struct btf *btf, - const struct btf_ext *btf_ext, - const struct btf_dump_opts *opts, - btf_dump_printf_fn_t printf_fn) -{ - if (!printf_fn) - return libbpf_err_ptr(-EINVAL); - return btf_dump__new_v0_6_0(btf, printf_fn, opts ? opts->ctx : NULL, opts); -} - static int btf_dump_resize(struct btf_dump *d) { int err, last_id = btf__type_cnt(d->btf) - 1; @@ -318,6 +309,7 @@ static int btf_dump_mark_referenced(struct btf_dump *d) switch (btf_kind(t)) { case BTF_KIND_INT: case BTF_KIND_ENUM: + case BTF_KIND_ENUM64: case BTF_KIND_FWD: case BTF_KIND_FLOAT: break; @@ -538,6 +530,7 @@ static int btf_dump_order_type(struct btf_dump *d, __u32 id, bool through_ptr) return 1; } case BTF_KIND_ENUM: + case BTF_KIND_ENUM64: case BTF_KIND_FWD: /* * non-anonymous or non-referenced enums are top-level @@ -739,6 +732,7 @@ static void btf_dump_emit_type(struct btf_dump *d, __u32 id, __u32 cont_id) tstate->emit_state = EMITTED; break; case BTF_KIND_ENUM: + case BTF_KIND_ENUM64: if (top_level_def) { btf_dump_emit_enum_def(d, id, t, 0); btf_dump_printf(d, ";\n\n"); @@ -989,38 +983,81 @@ static void btf_dump_emit_enum_fwd(struct btf_dump *d, __u32 id, btf_dump_printf(d, "enum %s", btf_dump_type_name(d, id)); } -static void btf_dump_emit_enum_def(struct btf_dump *d, __u32 id, - const struct btf_type *t, - int lvl) +static void btf_dump_emit_enum32_val(struct btf_dump *d, + const struct btf_type *t, + int lvl, __u16 vlen) { const struct btf_enum *v = btf_enum(t); - __u16 vlen = btf_vlen(t); + bool is_signed = btf_kflag(t); + const char *fmt_str; const char *name; size_t dup_cnt; int i; + for (i = 0; i < vlen; i++, v++) { + name = btf_name_of(d, v->name_off); + /* enumerators share namespace with typedef idents */ + dup_cnt = btf_dump_name_dups(d, d->ident_names, name); + if (dup_cnt > 1) { + fmt_str = is_signed ? "\n%s%s___%zd = %d," : "\n%s%s___%zd = %u,"; + btf_dump_printf(d, fmt_str, pfx(lvl + 1), name, dup_cnt, v->val); + } else { + fmt_str = is_signed ? "\n%s%s = %d," : "\n%s%s = %u,"; + btf_dump_printf(d, fmt_str, pfx(lvl + 1), name, v->val); + } + } +} + +static void btf_dump_emit_enum64_val(struct btf_dump *d, + const struct btf_type *t, + int lvl, __u16 vlen) +{ + const struct btf_enum64 *v = btf_enum64(t); + bool is_signed = btf_kflag(t); + const char *fmt_str; + const char *name; + size_t dup_cnt; + __u64 val; + int i; + + for (i = 0; i < vlen; i++, v++) { + name = btf_name_of(d, v->name_off); + dup_cnt = btf_dump_name_dups(d, d->ident_names, name); + val = btf_enum64_value(v); + if (dup_cnt > 1) { + fmt_str = is_signed ? "\n%s%s___%zd = %lldLL," + : "\n%s%s___%zd = %lluULL,"; + btf_dump_printf(d, fmt_str, + pfx(lvl + 1), name, dup_cnt, + (unsigned long long)val); + } else { + fmt_str = is_signed ? "\n%s%s = %lldLL," + : "\n%s%s = %lluULL,"; + btf_dump_printf(d, fmt_str, + pfx(lvl + 1), name, + (unsigned long long)val); + } + } +} +static void btf_dump_emit_enum_def(struct btf_dump *d, __u32 id, + const struct btf_type *t, + int lvl) +{ + __u16 vlen = btf_vlen(t); + btf_dump_printf(d, "enum%s%s", t->name_off ? " " : "", btf_dump_type_name(d, id)); - if (vlen) { - btf_dump_printf(d, " {"); - for (i = 0; i < vlen; i++, v++) { - name = btf_name_of(d, v->name_off); - /* enumerators share namespace with typedef idents */ - dup_cnt = btf_dump_name_dups(d, d->ident_names, name); - if (dup_cnt > 1) { - btf_dump_printf(d, "\n%s%s___%zu = %u,", - pfx(lvl + 1), name, dup_cnt, - (__u32)v->val); - } else { - btf_dump_printf(d, "\n%s%s = %u,", - pfx(lvl + 1), name, - (__u32)v->val); - } - } - btf_dump_printf(d, "\n%s}", pfx(lvl)); - } + if (!vlen) + return; + + btf_dump_printf(d, " {"); + if (btf_is_enum(t)) + btf_dump_emit_enum32_val(d, t, lvl, vlen); + else + btf_dump_emit_enum64_val(d, t, lvl, vlen); + btf_dump_printf(d, "\n%s}", pfx(lvl)); } static void btf_dump_emit_fwd_def(struct btf_dump *d, __u32 id, @@ -1178,6 +1215,7 @@ skip_mod: break; case BTF_KIND_INT: case BTF_KIND_ENUM: + case BTF_KIND_ENUM64: case BTF_KIND_FWD: case BTF_KIND_STRUCT: case BTF_KIND_UNION: @@ -1312,6 +1350,7 @@ static void btf_dump_emit_type_chain(struct btf_dump *d, btf_dump_emit_struct_fwd(d, id, t); break; case BTF_KIND_ENUM: + case BTF_KIND_ENUM64: btf_dump_emit_mods(d, decls); /* inline anonymous enum */ if (t->name_off == 0 && !d->skip_anon_defs) @@ -1988,7 +2027,8 @@ static int btf_dump_get_enum_value(struct btf_dump *d, __u32 id, __s64 *value) { - /* handle unaligned enum value */ + bool is_signed = btf_kflag(t); + if (!ptr_is_aligned(d->btf, id, data)) { __u64 val; int err; @@ -2005,13 +2045,13 @@ static int btf_dump_get_enum_value(struct btf_dump *d, *value = *(__s64 *)data; return 0; case 4: - *value = *(__s32 *)data; + *value = is_signed ? (__s64)*(__s32 *)data : *(__u32 *)data; return 0; case 2: - *value = *(__s16 *)data; + *value = is_signed ? *(__s16 *)data : *(__u16 *)data; return 0; case 1: - *value = *(__s8 *)data; + *value = is_signed ? *(__s8 *)data : *(__u8 *)data; return 0; default: pr_warn("unexpected size %d for enum, id:[%u]\n", t->size, id); @@ -2024,7 +2064,7 @@ static int btf_dump_enum_data(struct btf_dump *d, __u32 id, const void *data) { - const struct btf_enum *e; + bool is_signed; __s64 value; int i, err; @@ -2032,14 +2072,31 @@ static int btf_dump_enum_data(struct btf_dump *d, if (err) return err; - for (i = 0, e = btf_enum(t); i < btf_vlen(t); i++, e++) { - if (value != e->val) - continue; - btf_dump_type_values(d, "%s", btf_name_of(d, e->name_off)); - return 0; - } + is_signed = btf_kflag(t); + if (btf_is_enum(t)) { + const struct btf_enum *e; + + for (i = 0, e = btf_enum(t); i < btf_vlen(t); i++, e++) { + if (value != e->val) + continue; + btf_dump_type_values(d, "%s", btf_name_of(d, e->name_off)); + return 0; + } - btf_dump_type_values(d, "%d", value); + btf_dump_type_values(d, is_signed ? "%d" : "%u", value); + } else { + const struct btf_enum64 *e; + + for (i = 0, e = btf_enum64(t); i < btf_vlen(t); i++, e++) { + if (value != btf_enum64_value(e)) + continue; + btf_dump_type_values(d, "%s", btf_name_of(d, e->name_off)); + return 0; + } + + btf_dump_type_values(d, is_signed ? "%lldLL" : "%lluULL", + (unsigned long long)value); + } return 0; } @@ -2099,6 +2156,7 @@ static int btf_dump_type_data_check_overflow(struct btf_dump *d, case BTF_KIND_FLOAT: case BTF_KIND_PTR: case BTF_KIND_ENUM: + case BTF_KIND_ENUM64: if (data + bits_offset / 8 + size > d->typed_dump->data_end) return -E2BIG; break; @@ -2203,6 +2261,7 @@ static int btf_dump_type_data_check_zero(struct btf_dump *d, return -ENODATA; } case BTF_KIND_ENUM: + case BTF_KIND_ENUM64: err = btf_dump_get_enum_value(d, t, data, id, &value); if (err) return err; @@ -2275,6 +2334,7 @@ static int btf_dump_dump_type_data(struct btf_dump *d, err = btf_dump_struct_data(d, t, id, data); break; case BTF_KIND_ENUM: + case BTF_KIND_ENUM64: /* handle bitfield and int enum values */ if (bit_sz) { __u64 print_num; diff --git a/tools/lib/bpf/gen_loader.c b/tools/lib/bpf/gen_loader.c index 927745b08014..23f5c46708f8 100644 --- a/tools/lib/bpf/gen_loader.c +++ b/tools/lib/bpf/gen_loader.c @@ -533,7 +533,7 @@ void bpf_gen__record_attach_target(struct bpf_gen *gen, const char *attach_name, gen->attach_kind = kind; ret = snprintf(gen->attach_target, sizeof(gen->attach_target), "%s%s", prefix, attach_name); - if (ret == sizeof(gen->attach_target)) + if (ret >= sizeof(gen->attach_target)) gen->error = -ENOSPC; } diff --git a/tools/lib/bpf/libbpf.c b/tools/lib/bpf/libbpf.c index e89cc9c885b3..50d41815f431 100644 --- a/tools/lib/bpf/libbpf.c +++ b/tools/lib/bpf/libbpf.c @@ -31,7 +31,6 @@ #include <linux/bpf.h> #include <linux/btf.h> #include <linux/filter.h> -#include <linux/list.h> #include <linux/limits.h> #include <linux/perf_event.h> #include <linux/ring_buffer.h> @@ -72,6 +71,135 @@ static struct bpf_map *bpf_object__add_map(struct bpf_object *obj); static bool prog_is_subprog(const struct bpf_object *obj, const struct bpf_program *prog); +static const char * const attach_type_name[] = { + [BPF_CGROUP_INET_INGRESS] = "cgroup_inet_ingress", + [BPF_CGROUP_INET_EGRESS] = "cgroup_inet_egress", + [BPF_CGROUP_INET_SOCK_CREATE] = "cgroup_inet_sock_create", + [BPF_CGROUP_INET_SOCK_RELEASE] = "cgroup_inet_sock_release", + [BPF_CGROUP_SOCK_OPS] = "cgroup_sock_ops", + [BPF_CGROUP_DEVICE] = "cgroup_device", + [BPF_CGROUP_INET4_BIND] = "cgroup_inet4_bind", + [BPF_CGROUP_INET6_BIND] = "cgroup_inet6_bind", + [BPF_CGROUP_INET4_CONNECT] = "cgroup_inet4_connect", + [BPF_CGROUP_INET6_CONNECT] = "cgroup_inet6_connect", + [BPF_CGROUP_INET4_POST_BIND] = "cgroup_inet4_post_bind", + [BPF_CGROUP_INET6_POST_BIND] = "cgroup_inet6_post_bind", + [BPF_CGROUP_INET4_GETPEERNAME] = "cgroup_inet4_getpeername", + [BPF_CGROUP_INET6_GETPEERNAME] = "cgroup_inet6_getpeername", + [BPF_CGROUP_INET4_GETSOCKNAME] = "cgroup_inet4_getsockname", + [BPF_CGROUP_INET6_GETSOCKNAME] = "cgroup_inet6_getsockname", + [BPF_CGROUP_UDP4_SENDMSG] = "cgroup_udp4_sendmsg", + [BPF_CGROUP_UDP6_SENDMSG] = "cgroup_udp6_sendmsg", + [BPF_CGROUP_SYSCTL] = "cgroup_sysctl", + [BPF_CGROUP_UDP4_RECVMSG] = "cgroup_udp4_recvmsg", + [BPF_CGROUP_UDP6_RECVMSG] = "cgroup_udp6_recvmsg", + [BPF_CGROUP_GETSOCKOPT] = "cgroup_getsockopt", + [BPF_CGROUP_SETSOCKOPT] = "cgroup_setsockopt", + [BPF_SK_SKB_STREAM_PARSER] = "sk_skb_stream_parser", + [BPF_SK_SKB_STREAM_VERDICT] = "sk_skb_stream_verdict", + [BPF_SK_SKB_VERDICT] = "sk_skb_verdict", + [BPF_SK_MSG_VERDICT] = "sk_msg_verdict", + [BPF_LIRC_MODE2] = "lirc_mode2", + [BPF_FLOW_DISSECTOR] = "flow_dissector", + [BPF_TRACE_RAW_TP] = "trace_raw_tp", + [BPF_TRACE_FENTRY] = "trace_fentry", + [BPF_TRACE_FEXIT] = "trace_fexit", + [BPF_MODIFY_RETURN] = "modify_return", + [BPF_LSM_MAC] = "lsm_mac", + [BPF_LSM_CGROUP] = "lsm_cgroup", + [BPF_SK_LOOKUP] = "sk_lookup", + [BPF_TRACE_ITER] = "trace_iter", + [BPF_XDP_DEVMAP] = "xdp_devmap", + [BPF_XDP_CPUMAP] = "xdp_cpumap", + [BPF_XDP] = "xdp", + [BPF_SK_REUSEPORT_SELECT] = "sk_reuseport_select", + [BPF_SK_REUSEPORT_SELECT_OR_MIGRATE] = "sk_reuseport_select_or_migrate", + [BPF_PERF_EVENT] = "perf_event", + [BPF_TRACE_KPROBE_MULTI] = "trace_kprobe_multi", +}; + +static const char * const link_type_name[] = { + [BPF_LINK_TYPE_UNSPEC] = "unspec", + [BPF_LINK_TYPE_RAW_TRACEPOINT] = "raw_tracepoint", + [BPF_LINK_TYPE_TRACING] = "tracing", + [BPF_LINK_TYPE_CGROUP] = "cgroup", + [BPF_LINK_TYPE_ITER] = "iter", + [BPF_LINK_TYPE_NETNS] = "netns", + [BPF_LINK_TYPE_XDP] = "xdp", + [BPF_LINK_TYPE_PERF_EVENT] = "perf_event", + [BPF_LINK_TYPE_KPROBE_MULTI] = "kprobe_multi", + [BPF_LINK_TYPE_STRUCT_OPS] = "struct_ops", +}; + +static const char * const map_type_name[] = { + [BPF_MAP_TYPE_UNSPEC] = "unspec", + [BPF_MAP_TYPE_HASH] = "hash", + [BPF_MAP_TYPE_ARRAY] = "array", + [BPF_MAP_TYPE_PROG_ARRAY] = "prog_array", + [BPF_MAP_TYPE_PERF_EVENT_ARRAY] = "perf_event_array", + [BPF_MAP_TYPE_PERCPU_HASH] = "percpu_hash", + [BPF_MAP_TYPE_PERCPU_ARRAY] = "percpu_array", + [BPF_MAP_TYPE_STACK_TRACE] = "stack_trace", + [BPF_MAP_TYPE_CGROUP_ARRAY] = "cgroup_array", + [BPF_MAP_TYPE_LRU_HASH] = "lru_hash", + [BPF_MAP_TYPE_LRU_PERCPU_HASH] = "lru_percpu_hash", + [BPF_MAP_TYPE_LPM_TRIE] = "lpm_trie", + [BPF_MAP_TYPE_ARRAY_OF_MAPS] = "array_of_maps", + [BPF_MAP_TYPE_HASH_OF_MAPS] = "hash_of_maps", + [BPF_MAP_TYPE_DEVMAP] = "devmap", + [BPF_MAP_TYPE_DEVMAP_HASH] = "devmap_hash", + [BPF_MAP_TYPE_SOCKMAP] = "sockmap", + [BPF_MAP_TYPE_CPUMAP] = "cpumap", + [BPF_MAP_TYPE_XSKMAP] = "xskmap", + [BPF_MAP_TYPE_SOCKHASH] = "sockhash", + [BPF_MAP_TYPE_CGROUP_STORAGE] = "cgroup_storage", + [BPF_MAP_TYPE_REUSEPORT_SOCKARRAY] = "reuseport_sockarray", + [BPF_MAP_TYPE_PERCPU_CGROUP_STORAGE] = "percpu_cgroup_storage", + [BPF_MAP_TYPE_QUEUE] = "queue", + [BPF_MAP_TYPE_STACK] = "stack", + [BPF_MAP_TYPE_SK_STORAGE] = "sk_storage", + [BPF_MAP_TYPE_STRUCT_OPS] = "struct_ops", + [BPF_MAP_TYPE_RINGBUF] = "ringbuf", + [BPF_MAP_TYPE_INODE_STORAGE] = "inode_storage", + [BPF_MAP_TYPE_TASK_STORAGE] = "task_storage", + [BPF_MAP_TYPE_BLOOM_FILTER] = "bloom_filter", +}; + +static const char * const prog_type_name[] = { + [BPF_PROG_TYPE_UNSPEC] = "unspec", + [BPF_PROG_TYPE_SOCKET_FILTER] = "socket_filter", + [BPF_PROG_TYPE_KPROBE] = "kprobe", + [BPF_PROG_TYPE_SCHED_CLS] = "sched_cls", + [BPF_PROG_TYPE_SCHED_ACT] = "sched_act", + [BPF_PROG_TYPE_TRACEPOINT] = "tracepoint", + [BPF_PROG_TYPE_XDP] = "xdp", + [BPF_PROG_TYPE_PERF_EVENT] = "perf_event", + [BPF_PROG_TYPE_CGROUP_SKB] = "cgroup_skb", + [BPF_PROG_TYPE_CGROUP_SOCK] = "cgroup_sock", + [BPF_PROG_TYPE_LWT_IN] = "lwt_in", + [BPF_PROG_TYPE_LWT_OUT] = "lwt_out", + [BPF_PROG_TYPE_LWT_XMIT] = "lwt_xmit", + [BPF_PROG_TYPE_SOCK_OPS] = "sock_ops", + [BPF_PROG_TYPE_SK_SKB] = "sk_skb", + [BPF_PROG_TYPE_CGROUP_DEVICE] = "cgroup_device", + [BPF_PROG_TYPE_SK_MSG] = "sk_msg", + [BPF_PROG_TYPE_RAW_TRACEPOINT] = "raw_tracepoint", + [BPF_PROG_TYPE_CGROUP_SOCK_ADDR] = "cgroup_sock_addr", + [BPF_PROG_TYPE_LWT_SEG6LOCAL] = "lwt_seg6local", + [BPF_PROG_TYPE_LIRC_MODE2] = "lirc_mode2", + [BPF_PROG_TYPE_SK_REUSEPORT] = "sk_reuseport", + [BPF_PROG_TYPE_FLOW_DISSECTOR] = "flow_dissector", + [BPF_PROG_TYPE_CGROUP_SYSCTL] = "cgroup_sysctl", + [BPF_PROG_TYPE_RAW_TRACEPOINT_WRITABLE] = "raw_tracepoint_writable", + [BPF_PROG_TYPE_CGROUP_SOCKOPT] = "cgroup_sockopt", + [BPF_PROG_TYPE_TRACING] = "tracing", + [BPF_PROG_TYPE_STRUCT_OPS] = "struct_ops", + [BPF_PROG_TYPE_EXT] = "ext", + [BPF_PROG_TYPE_LSM] = "lsm", + [BPF_PROG_TYPE_SK_LOOKUP] = "sk_lookup", + [BPF_PROG_TYPE_SYSCALL] = "syscall", +}; + static int __base_pr(enum libbpf_print_level level, const char *format, va_list args) { @@ -151,12 +279,9 @@ static inline __u64 ptr_to_u64(const void *ptr) return (__u64) (unsigned long) ptr; } -/* this goes away in libbpf 1.0 */ -enum libbpf_strict_mode libbpf_mode = LIBBPF_STRICT_NONE; - int libbpf_set_strict_mode(enum libbpf_strict_mode mode) { - libbpf_mode = mode; + /* as of v1.0 libbpf_set_strict_mode() is a no-op */ return 0; } @@ -219,12 +344,8 @@ enum sec_def_flags { SEC_ATTACH_BTF = 4, /* BPF program type allows sleeping/blocking in kernel */ SEC_SLEEPABLE = 8, - /* allow non-strict prefix matching */ - SEC_SLOPPY_PFX = 16, /* BPF program support non-linear XDP buffer */ - SEC_XDP_FRAGS = 32, - /* deprecated sec definitions not supposed to be used */ - SEC_DEPRECATED = 64, + SEC_XDP_FRAGS = 16, }; struct bpf_sec_def { @@ -244,9 +365,10 @@ struct bpf_sec_def { * linux/filter.h. */ struct bpf_program { - const struct bpf_sec_def *sec_def; + char *name; char *sec_name; size_t sec_idx; + const struct bpf_sec_def *sec_def; /* this program's instruction offset (in number of instructions) * within its containing ELF section */ @@ -266,12 +388,6 @@ struct bpf_program { */ size_t sub_insn_off; - char *name; - /* name with / replaced by _; makes recursive pinning - * in bpf_object__pin_programs easier - */ - char *pin_name; - /* instructions that belong to BPF program; insns[0] is located at * sec_insn_off instruction within its ELF section in ELF file, so * when mapping ELF file instruction index to the local instruction, @@ -292,24 +408,19 @@ struct bpf_program { size_t log_size; __u32 log_level; - struct { - int nr; - int *fds; - } instances; - bpf_program_prep_t preprocessor; - struct bpf_object *obj; - void *priv; - bpf_program_clear_priv_t clear_priv; + int fd; bool autoload; bool mark_btf_static; enum bpf_prog_type type; enum bpf_attach_type expected_attach_type; + int prog_ifindex; __u32 attach_btf_obj_fd; __u32 attach_btf_id; __u32 attach_prog_fd; + void *func_info; __u32 func_info_rec_size; __u32 func_info_cnt; @@ -356,6 +467,14 @@ enum libbpf_map_type { LIBBPF_MAP_KCONFIG, }; +struct bpf_map_def { + unsigned int type; + unsigned int key_size; + unsigned int value_size; + unsigned int max_entries; + unsigned int map_flags; +}; + struct bpf_map { struct bpf_object *obj; char *name; @@ -376,8 +495,6 @@ struct bpf_map { __u32 btf_key_type_id; __u32 btf_value_type_id; __u32 btf_vmlinux_value_type_id; - void *priv; - bpf_map_clear_priv_t clear_priv; enum libbpf_map_type libbpf_type; void *mmaped; struct bpf_struct_ops *st_ops; @@ -440,8 +557,6 @@ struct extern_desc { }; }; -static LIST_HEAD(bpf_objects_list); - struct module_btf { struct btf *btf; char *name; @@ -510,12 +625,6 @@ struct bpf_object { /* Information when doing ELF related work. Only valid if efile.elf is not NULL */ struct elf_state efile; - /* - * All loaded bpf_object are linked in a list, which is - * hidden to caller. bpf_objects__<func> handlers deal with - * all objects. - */ - struct list_head list; struct btf *btf; struct btf_ext *btf_ext; @@ -541,9 +650,6 @@ struct bpf_object { size_t log_size; __u32 log_level; - void *priv; - bpf_object_clear_priv_t clear_priv; - int *fd_array; size_t fd_array_cap; size_t fd_array_cnt; @@ -565,25 +671,10 @@ static Elf64_Rel *elf_rel_by_idx(Elf_Data *data, size_t idx); void bpf_program__unload(struct bpf_program *prog) { - int i; - if (!prog) return; - /* - * If the object is opened but the program was never loaded, - * it is possible that prog->instances.nr == -1. - */ - if (prog->instances.nr > 0) { - for (i = 0; i < prog->instances.nr; i++) - zclose(prog->instances.fds[i]); - } else if (prog->instances.nr != -1) { - pr_warn("Internal error: instances.nr is %d\n", - prog->instances.nr); - } - - prog->instances.nr = -1; - zfree(&prog->instances.fds); + zclose(prog->fd); zfree(&prog->func_info); zfree(&prog->line_info); @@ -594,16 +685,9 @@ static void bpf_program__exit(struct bpf_program *prog) if (!prog) return; - if (prog->clear_priv) - prog->clear_priv(prog, prog->priv); - - prog->priv = NULL; - prog->clear_priv = NULL; - bpf_program__unload(prog); zfree(&prog->name); zfree(&prog->sec_name); - zfree(&prog->pin_name); zfree(&prog->insns); zfree(&prog->reloc_desc); @@ -612,26 +696,6 @@ static void bpf_program__exit(struct bpf_program *prog) prog->sec_idx = -1; } -static char *__bpf_program__pin_name(struct bpf_program *prog) -{ - char *name, *p; - - if (libbpf_mode & LIBBPF_STRICT_SEC_NAME) - name = strdup(prog->name); - else - name = strdup(prog->sec_name); - - if (!name) - return NULL; - - p = name; - - while ((p = strchr(p, '/'))) - *p = '_'; - - return name; -} - static bool insn_is_subprog_call(const struct bpf_insn *insn) { return BPF_CLASS(insn->code) == BPF_JMP && @@ -673,6 +737,7 @@ bpf_object__init_prog(struct bpf_object *obj, struct bpf_program *prog, prog->insns_cnt = prog->sec_insn_cnt; prog->type = BPF_PROG_TYPE_UNSPEC; + prog->fd = -1; /* libbpf's convention for SEC("?abc...") is that it's just like * SEC("abc...") but the corresponding bpf_program starts out with @@ -686,9 +751,6 @@ bpf_object__init_prog(struct bpf_object *obj, struct bpf_program *prog, prog->autoload = true; } - prog->instances.fds = NULL; - prog->instances.nr = -1; - /* inherit object's log_level */ prog->log_level = obj->log_level; @@ -700,10 +762,6 @@ bpf_object__init_prog(struct bpf_object *obj, struct bpf_program *prog, if (!prog->name) goto errout; - prog->pin_name = __bpf_program__pin_name(prog); - if (!prog->pin_name) - goto errout; - prog->insns = malloc(insn_data_sz); if (!prog->insns) goto errout; @@ -1185,7 +1243,6 @@ static struct bpf_object *bpf_object__new(const char *path, size_t obj_buf_sz, const char *obj_name) { - bool strict = (libbpf_mode & LIBBPF_STRICT_NO_OBJECT_LIST); struct bpf_object *obj; char *end; @@ -1223,9 +1280,6 @@ static struct bpf_object *bpf_object__new(const char *path, obj->kern_version = get_kernel_version(); obj->loaded = false; - INIT_LIST_HEAD(&obj->list); - if (!strict) - list_add(&obj->list, &bpf_objects_list); return obj; } @@ -1258,10 +1312,7 @@ static int bpf_object__elf_init(struct bpf_object *obj) } if (obj->efile.obj_buf_sz > 0) { - /* - * obj_buf should have been validated by - * bpf_object__open_buffer(). - */ + /* obj_buf should have been validated by bpf_object__open_mem(). */ elf = elf_memory((char *)obj->efile.obj_buf, obj->efile.obj_buf_sz); } else { obj->efile.fd = open(obj->path, O_RDONLY | O_CLOEXEC); @@ -1643,7 +1694,7 @@ static int set_kcfg_value_tri(struct extern_desc *ext, void *ext_val, switch (ext->kcfg.type) { case KCFG_BOOL: if (value == 'm') { - pr_warn("extern (kcfg) %s=%c should be tristate or char\n", + pr_warn("extern (kcfg) '%s': value '%c' implies tristate or char type\n", ext->name, value); return -EINVAL; } @@ -1664,7 +1715,7 @@ static int set_kcfg_value_tri(struct extern_desc *ext, void *ext_val, case KCFG_INT: case KCFG_CHAR_ARR: default: - pr_warn("extern (kcfg) %s=%c should be bool, tristate, or char\n", + pr_warn("extern (kcfg) '%s': value '%c' implies bool, tristate, or char type\n", ext->name, value); return -EINVAL; } @@ -1678,7 +1729,8 @@ static int set_kcfg_value_str(struct extern_desc *ext, char *ext_val, size_t len; if (ext->kcfg.type != KCFG_CHAR_ARR) { - pr_warn("extern (kcfg) %s=%s should be char array\n", ext->name, value); + pr_warn("extern (kcfg) '%s': value '%s' implies char array type\n", + ext->name, value); return -EINVAL; } @@ -1692,7 +1744,7 @@ static int set_kcfg_value_str(struct extern_desc *ext, char *ext_val, /* strip quotes */ len -= 2; if (len >= ext->kcfg.sz) { - pr_warn("extern (kcfg) '%s': long string config %s of (%zu bytes) truncated to %d bytes\n", + pr_warn("extern (kcfg) '%s': long string '%s' of (%zu bytes) truncated to %d bytes\n", ext->name, value, len, ext->kcfg.sz - 1); len = ext->kcfg.sz - 1; } @@ -1749,13 +1801,20 @@ static bool is_kcfg_value_in_range(const struct extern_desc *ext, __u64 v) static int set_kcfg_value_num(struct extern_desc *ext, void *ext_val, __u64 value) { - if (ext->kcfg.type != KCFG_INT && ext->kcfg.type != KCFG_CHAR) { - pr_warn("extern (kcfg) %s=%llu should be integer\n", + if (ext->kcfg.type != KCFG_INT && ext->kcfg.type != KCFG_CHAR && + ext->kcfg.type != KCFG_BOOL) { + pr_warn("extern (kcfg) '%s': value '%llu' implies integer, char, or boolean type\n", ext->name, (unsigned long long)value); return -EINVAL; } + if (ext->kcfg.type == KCFG_BOOL && value > 1) { + pr_warn("extern (kcfg) '%s': value '%llu' isn't boolean compatible\n", + ext->name, (unsigned long long)value); + return -EINVAL; + + } if (!is_kcfg_value_in_range(ext, value)) { - pr_warn("extern (kcfg) %s=%llu value doesn't fit in %d bytes\n", + pr_warn("extern (kcfg) '%s': value '%llu' doesn't fit in %d bytes\n", ext->name, (unsigned long long)value, ext->kcfg.sz); return -ERANGE; } @@ -1819,16 +1878,19 @@ static int bpf_object__process_kconfig_line(struct bpf_object *obj, /* assume integer */ err = parse_u64(value, &num); if (err) { - pr_warn("extern (kcfg) %s=%s should be integer\n", - ext->name, value); + pr_warn("extern (kcfg) '%s': value '%s' isn't a valid integer\n", ext->name, value); return err; } + if (ext->kcfg.type != KCFG_INT && ext->kcfg.type != KCFG_CHAR) { + pr_warn("extern (kcfg) '%s': value '%s' implies integer type\n", ext->name, value); + return -EINVAL; + } err = set_kcfg_value_num(ext, ext_val, num); break; } if (err) return err; - pr_debug("extern (kcfg) %s=%s\n", ext->name, value); + pr_debug("extern (kcfg) '%s': set to %s\n", ext->name, value); return 0; } @@ -1924,143 +1986,6 @@ static int bpf_object__init_kconfig_map(struct bpf_object *obj) return 0; } -static int bpf_object__init_user_maps(struct bpf_object *obj, bool strict) -{ - Elf_Data *symbols = obj->efile.symbols; - int i, map_def_sz = 0, nr_maps = 0, nr_syms; - Elf_Data *data = NULL; - Elf_Scn *scn; - - if (obj->efile.maps_shndx < 0) - return 0; - - if (libbpf_mode & LIBBPF_STRICT_MAP_DEFINITIONS) { - pr_warn("legacy map definitions in SEC(\"maps\") are not supported\n"); - return -EOPNOTSUPP; - } - - if (!symbols) - return -EINVAL; - - scn = elf_sec_by_idx(obj, obj->efile.maps_shndx); - data = elf_sec_data(obj, scn); - if (!scn || !data) { - pr_warn("elf: failed to get legacy map definitions for %s\n", - obj->path); - return -EINVAL; - } - - /* - * Count number of maps. Each map has a name. - * Array of maps is not supported: only the first element is - * considered. - * - * TODO: Detect array of map and report error. - */ - nr_syms = symbols->d_size / sizeof(Elf64_Sym); - for (i = 0; i < nr_syms; i++) { - Elf64_Sym *sym = elf_sym_by_idx(obj, i); - - if (sym->st_shndx != obj->efile.maps_shndx) - continue; - if (ELF64_ST_TYPE(sym->st_info) == STT_SECTION) - continue; - nr_maps++; - } - /* Assume equally sized map definitions */ - pr_debug("elf: found %d legacy map definitions (%zd bytes) in %s\n", - nr_maps, data->d_size, obj->path); - - if (!data->d_size || nr_maps == 0 || (data->d_size % nr_maps) != 0) { - pr_warn("elf: unable to determine legacy map definition size in %s\n", - obj->path); - return -EINVAL; - } - map_def_sz = data->d_size / nr_maps; - - /* Fill obj->maps using data in "maps" section. */ - for (i = 0; i < nr_syms; i++) { - Elf64_Sym *sym = elf_sym_by_idx(obj, i); - const char *map_name; - struct bpf_map_def *def; - struct bpf_map *map; - - if (sym->st_shndx != obj->efile.maps_shndx) - continue; - if (ELF64_ST_TYPE(sym->st_info) == STT_SECTION) - continue; - - map = bpf_object__add_map(obj); - if (IS_ERR(map)) - return PTR_ERR(map); - - map_name = elf_sym_str(obj, sym->st_name); - if (!map_name) { - pr_warn("failed to get map #%d name sym string for obj %s\n", - i, obj->path); - return -LIBBPF_ERRNO__FORMAT; - } - - pr_warn("map '%s' (legacy): legacy map definitions are deprecated, use BTF-defined maps instead\n", map_name); - - if (ELF64_ST_BIND(sym->st_info) == STB_LOCAL) { - pr_warn("map '%s' (legacy): static maps are not supported\n", map_name); - return -ENOTSUP; - } - - map->libbpf_type = LIBBPF_MAP_UNSPEC; - map->sec_idx = sym->st_shndx; - map->sec_offset = sym->st_value; - pr_debug("map '%s' (legacy): at sec_idx %d, offset %zu.\n", - map_name, map->sec_idx, map->sec_offset); - if (sym->st_value + map_def_sz > data->d_size) { - pr_warn("corrupted maps section in %s: last map \"%s\" too small\n", - obj->path, map_name); - return -EINVAL; - } - - map->name = strdup(map_name); - if (!map->name) { - pr_warn("map '%s': failed to alloc map name\n", map_name); - return -ENOMEM; - } - pr_debug("map %d is \"%s\"\n", i, map->name); - def = (struct bpf_map_def *)(data->d_buf + sym->st_value); - /* - * If the definition of the map in the object file fits in - * bpf_map_def, copy it. Any extra fields in our version - * of bpf_map_def will default to zero as a result of the - * calloc above. - */ - if (map_def_sz <= sizeof(struct bpf_map_def)) { - memcpy(&map->def, def, map_def_sz); - } else { - /* - * Here the map structure being read is bigger than what - * we expect, truncate if the excess bits are all zero. - * If they are not zero, reject this map as - * incompatible. - */ - char *b; - - for (b = ((char *)def) + sizeof(struct bpf_map_def); - b < ((char *)def) + map_def_sz; b++) { - if (*b != 0) { - pr_warn("maps section in %s: \"%s\" has unrecognized, non-zero options\n", - obj->path, map_name); - if (strict) - return -EINVAL; - } - } - memcpy(&map->def, def, sizeof(struct bpf_map_def)); - } - - /* btf info may not exist but fill it in if it does exist */ - (void) bpf_map_find_btf_info(obj, map); - } - return 0; -} - const struct btf_type * skip_mods_and_typedefs(const struct btf *btf, __u32 id, __u32 *res_id) { @@ -2114,6 +2039,7 @@ static const char *__btf_kind_str(__u16 kind) case BTF_KIND_FLOAT: return "float"; case BTF_KIND_DECL_TAG: return "decl_tag"; case BTF_KIND_TYPE_TAG: return "type_tag"; + case BTF_KIND_ENUM64: return "enum64"; default: return "unknown"; } } @@ -2177,6 +2103,13 @@ static int build_map_pin_path(struct bpf_map *map, const char *path) return bpf_map__set_pin_path(map, buf); } +/* should match definition in bpf_helpers.h */ +enum libbpf_pin_type { + LIBBPF_PIN_NONE, + /* PIN_BY_NAME: pin maps by name (in /sys/fs/bpf by default) */ + LIBBPF_PIN_BY_NAME, +}; + int parse_btf_map_def(const char *map_name, struct btf *btf, const struct btf_type *def_t, bool strict, struct btf_map_def *map_def, struct btf_map_def *inner_def) @@ -2398,6 +2331,37 @@ int parse_btf_map_def(const char *map_name, struct btf *btf, return 0; } +static size_t adjust_ringbuf_sz(size_t sz) +{ + __u32 page_sz = sysconf(_SC_PAGE_SIZE); + __u32 mul; + + /* if user forgot to set any size, make sure they see error */ + if (sz == 0) + return 0; + /* Kernel expects BPF_MAP_TYPE_RINGBUF's max_entries to be + * a power-of-2 multiple of kernel's page size. If user diligently + * satisified these conditions, pass the size through. + */ + if ((sz % page_sz) == 0 && is_pow_of_2(sz / page_sz)) + return sz; + + /* Otherwise find closest (page_sz * power_of_2) product bigger than + * user-set size to satisfy both user size request and kernel + * requirements and substitute correct max_entries for map creation. + */ + for (mul = 1; mul <= UINT_MAX / page_sz; mul <<= 1) { + if (mul * page_sz > sz) + return mul * page_sz; + } + + /* if it's impossible to satisfy the conditions (i.e., user size is + * very close to UINT_MAX but is not a power-of-2 multiple of + * page_size) then just return original size and let kernel reject it + */ + return sz; +} + static void fill_map_from_def(struct bpf_map *map, const struct btf_map_def *def) { map->def.type = def->map_type; @@ -2411,6 +2375,10 @@ static void fill_map_from_def(struct bpf_map *map, const struct btf_map_def *def map->btf_key_type_id = def->key_type_id; map->btf_value_type_id = def->value_type_id; + /* auto-adjust BPF ringbuf map max_entries to be a multiple of page size */ + if (map->def.type == BPF_MAP_TYPE_RINGBUF) + map->def.max_entries = adjust_ringbuf_sz(map->def.max_entries); + if (def->parts & MAP_DEF_MAP_TYPE) pr_debug("map '%s': found type = %u.\n", map->name, def->map_type); @@ -2609,12 +2577,11 @@ static int bpf_object__init_maps(struct bpf_object *obj, { const char *pin_root_path; bool strict; - int err; + int err = 0; strict = !OPTS_GET(opts, relaxed_maps, false); pin_root_path = OPTS_GET(opts, pin_root_path, NULL); - err = bpf_object__init_user_maps(obj, strict); err = err ?: bpf_object__init_user_btf_maps(obj, strict, pin_root_path); err = err ?: bpf_object__init_global_data_maps(obj); err = err ?: bpf_object__init_kconfig_map(obj); @@ -2642,12 +2609,13 @@ static bool btf_needs_sanitization(struct bpf_object *obj) bool has_func = kernel_supports(obj, FEAT_BTF_FUNC); bool has_decl_tag = kernel_supports(obj, FEAT_BTF_DECL_TAG); bool has_type_tag = kernel_supports(obj, FEAT_BTF_TYPE_TAG); + bool has_enum64 = kernel_supports(obj, FEAT_BTF_ENUM64); return !has_func || !has_datasec || !has_func_global || !has_float || - !has_decl_tag || !has_type_tag; + !has_decl_tag || !has_type_tag || !has_enum64; } -static void bpf_object__sanitize_btf(struct bpf_object *obj, struct btf *btf) +static int bpf_object__sanitize_btf(struct bpf_object *obj, struct btf *btf) { bool has_func_global = kernel_supports(obj, FEAT_BTF_GLOBAL_FUNC); bool has_datasec = kernel_supports(obj, FEAT_BTF_DATASEC); @@ -2655,6 +2623,8 @@ static void bpf_object__sanitize_btf(struct bpf_object *obj, struct btf *btf) bool has_func = kernel_supports(obj, FEAT_BTF_FUNC); bool has_decl_tag = kernel_supports(obj, FEAT_BTF_DECL_TAG); bool has_type_tag = kernel_supports(obj, FEAT_BTF_TYPE_TAG); + bool has_enum64 = kernel_supports(obj, FEAT_BTF_ENUM64); + int enum64_placeholder_id = 0; struct btf_type *t; int i, j, vlen; @@ -2717,8 +2687,32 @@ static void bpf_object__sanitize_btf(struct bpf_object *obj, struct btf *btf) /* replace TYPE_TAG with a CONST */ t->name_off = 0; t->info = BTF_INFO_ENC(BTF_KIND_CONST, 0, 0); - } + } else if (!has_enum64 && btf_is_enum(t)) { + /* clear the kflag */ + t->info = btf_type_info(btf_kind(t), btf_vlen(t), false); + } else if (!has_enum64 && btf_is_enum64(t)) { + /* replace ENUM64 with a union */ + struct btf_member *m; + + if (enum64_placeholder_id == 0) { + enum64_placeholder_id = btf__add_int(btf, "enum64_placeholder", 1, 0); + if (enum64_placeholder_id < 0) + return enum64_placeholder_id; + + t = (struct btf_type *)btf__type_by_id(btf, i); + } + + m = btf_members(t); + vlen = btf_vlen(t); + t->info = BTF_INFO_ENC(BTF_KIND_UNION, 0, vlen); + for (j = 0; j < vlen; j++, m++) { + m->type = enum64_placeholder_id; + m->offset = 0; + } + } } + + return 0; } static bool libbpf_needs_btf(const struct bpf_object *obj) @@ -2905,11 +2899,6 @@ static int btf_finalize_data(struct bpf_object *obj, struct btf *btf) return libbpf_err(err); } -int btf__finalize_data(struct bpf_object *obj, struct btf *btf) -{ - return btf_finalize_data(obj, btf); -} - static int bpf_object__finalize_btf(struct bpf_object *obj) { int err; @@ -3056,7 +3045,9 @@ static int bpf_object__sanitize_and_load_btf(struct bpf_object *obj) /* enforce 8-byte pointers for BPF-targeted BTFs */ btf__set_pointer_size(obj->btf, 8); - bpf_object__sanitize_btf(obj, kern_btf); + err = bpf_object__sanitize_btf(obj, kern_btf); + if (err) + return err; } if (obj->gen_loader) { @@ -3563,6 +3554,10 @@ static enum kcfg_type find_kcfg_type(const struct btf *btf, int id, if (strcmp(name, "libbpf_tristate")) return KCFG_UNKNOWN; return KCFG_TRISTATE; + case BTF_KIND_ENUM64: + if (strcmp(name, "libbpf_tristate")) + return KCFG_UNKNOWN; + return KCFG_TRISTATE; case BTF_KIND_ARRAY: if (btf_array(t)->nelems == 0) return KCFG_UNKNOWN; @@ -3738,7 +3733,7 @@ static int bpf_object__collect_externs(struct bpf_object *obj) ext->kcfg.type = find_kcfg_type(obj->btf, t->type, &ext->kcfg.is_signed); if (ext->kcfg.type == KCFG_UNKNOWN) { - pr_warn("extern (kcfg) '%s' type is unsupported\n", ext_name); + pr_warn("extern (kcfg) '%s': type is unsupported\n", ext_name); return -ENOTSUP; } } else if (strcmp(sec_name, KSYMS_SEC) == 0) { @@ -3860,41 +3855,8 @@ static int bpf_object__collect_externs(struct bpf_object *obj) return 0; } -struct bpf_program * -bpf_object__find_program_by_title(const struct bpf_object *obj, - const char *title) +static bool prog_is_subprog(const struct bpf_object *obj, const struct bpf_program *prog) { - struct bpf_program *pos; - - bpf_object__for_each_program(pos, obj) { - if (pos->sec_name && !strcmp(pos->sec_name, title)) - return pos; - } - return errno = ENOENT, NULL; -} - -static bool prog_is_subprog(const struct bpf_object *obj, - const struct bpf_program *prog) -{ - /* For legacy reasons, libbpf supports an entry-point BPF programs - * without SEC() attribute, i.e., those in the .text section. But if - * there are 2 or more such programs in the .text section, they all - * must be subprograms called from entry-point BPF programs in - * designated SEC()'tions, otherwise there is no way to distinguish - * which of those programs should be loaded vs which are a subprogram. - * Similarly, if there is a function/program in .text and at least one - * other BPF program with custom SEC() attribute, then we just assume - * .text programs are subprograms (even if they are not called from - * other programs), because libbpf never explicitly supported mixing - * SEC()-designated BPF programs and .text entry-point BPF programs. - * - * In libbpf 1.0 strict mode, we always consider .text - * programs to be subprograms. - */ - - if (libbpf_mode & LIBBPF_STRICT_SEC_NAME) - return prog->sec_idx == obj->efile.text_shndx; - return prog->sec_idx == obj->efile.text_shndx && obj->nr_programs > 1; } @@ -4235,9 +4197,7 @@ bpf_object__collect_prog_relos(struct bpf_object *obj, Elf64_Shdr *shdr, Elf_Dat static int bpf_map_find_btf_info(struct bpf_object *obj, struct bpf_map *map) { - struct bpf_map_def *def = &map->def; - __u32 key_type_id = 0, value_type_id = 0; - int ret; + int id; if (!obj->btf) return -ENOENT; @@ -4246,31 +4206,22 @@ static int bpf_map_find_btf_info(struct bpf_object *obj, struct bpf_map *map) * For struct_ops map, it does not need btf_key_type_id and * btf_value_type_id. */ - if (map->sec_idx == obj->efile.btf_maps_shndx || - bpf_map__is_struct_ops(map)) + if (map->sec_idx == obj->efile.btf_maps_shndx || bpf_map__is_struct_ops(map)) return 0; - if (!bpf_map__is_internal(map)) { - pr_warn("Use of BPF_ANNOTATE_KV_PAIR is deprecated, use BTF-defined maps in .maps section instead\n"); -#pragma GCC diagnostic push -#pragma GCC diagnostic ignored "-Wdeprecated-declarations" - ret = btf__get_map_kv_tids(obj->btf, map->name, def->key_size, - def->value_size, &key_type_id, - &value_type_id); -#pragma GCC diagnostic pop - } else { - /* - * LLVM annotates global data differently in BTF, that is, - * only as '.data', '.bss' or '.rodata'. - */ - ret = btf__find_by_name(obj->btf, map->real_name); - } - if (ret < 0) - return ret; + /* + * LLVM annotates global data differently in BTF, that is, + * only as '.data', '.bss' or '.rodata'. + */ + if (!bpf_map__is_internal(map)) + return -ENOENT; - map->btf_key_type_id = key_type_id; - map->btf_value_type_id = bpf_map__is_internal(map) ? - ret : value_type_id; + id = btf__find_by_name(obj->btf, map->real_name); + if (id < 0) + return id; + + map->btf_key_type_id = 0; + map->btf_value_type_id = id; return 0; } @@ -4327,7 +4278,7 @@ int bpf_map__set_autocreate(struct bpf_map *map, bool autocreate) int bpf_map__reuse_fd(struct bpf_map *map, int fd) { struct bpf_map_info info = {}; - __u32 len = sizeof(info); + __u32 len = sizeof(info), name_len; int new_fd, err; char *new_name; @@ -4337,7 +4288,12 @@ int bpf_map__reuse_fd(struct bpf_map *map, int fd) if (err) return libbpf_err(err); - new_name = strdup(info.name); + name_len = strlen(info.name); + if (name_len == BPF_OBJ_NAME_LEN - 1 && strncmp(map->name, info.name, name_len) == 0) + new_name = strdup(map->name); + else + new_name = strdup(info.name); + if (!new_name) return libbpf_err(-errno); @@ -4396,18 +4352,16 @@ struct bpf_map *bpf_map__inner_map(struct bpf_map *map) int bpf_map__set_max_entries(struct bpf_map *map, __u32 max_entries) { - if (map->fd >= 0) + if (map->obj->loaded) return libbpf_err(-EBUSY); + map->def.max_entries = max_entries; - return 0; -} -int bpf_map__resize(struct bpf_map *map, __u32 max_entries) -{ - if (!map || !max_entries) - return libbpf_err(-EINVAL); + /* auto-adjust BPF ringbuf map max_entries to be a multiple of page size */ + if (map->def.type == BPF_MAP_TYPE_RINGBUF) + map->def.max_entries = adjust_ringbuf_sz(map->def.max_entries); - return bpf_map__set_max_entries(map, max_entries); + return 0; } static int @@ -4746,6 +4700,19 @@ static int probe_kern_bpf_cookie(void) return probe_fd(ret); } +static int probe_kern_btf_enum64(void) +{ + static const char strs[] = "\0enum64"; + __u32 types[] = { + BTF_TYPE_ENC(1, BTF_INFO_ENC(BTF_KIND_ENUM64, 0, 0), 8), + }; + + return probe_fd(libbpf__load_raw_btf((char *)types, sizeof(types), + strs, sizeof(strs))); +} + +static int probe_kern_syscall_wrapper(void); + enum kern_feature_result { FEAT_UNKNOWN = 0, FEAT_SUPPORTED = 1, @@ -4811,6 +4778,12 @@ static struct kern_feature_desc { [FEAT_BPF_COOKIE] = { "BPF cookie support", probe_kern_bpf_cookie, }, + [FEAT_BTF_ENUM64] = { + "BTF_KIND_ENUM64 support", probe_kern_btf_enum64, + }, + [FEAT_SYSCALL_WRAPPER] = { + "Kernel using syscall wrapper", probe_kern_syscall_wrapper, + }, }; bool kernel_supports(const struct bpf_object *obj, enum kern_feature_id feat_id) @@ -4943,42 +4916,6 @@ bpf_object__populate_internal_map(struct bpf_object *obj, struct bpf_map *map) static void bpf_map__destroy(struct bpf_map *map); -static bool is_pow_of_2(size_t x) -{ - return x && (x & (x - 1)); -} - -static size_t adjust_ringbuf_sz(size_t sz) -{ - __u32 page_sz = sysconf(_SC_PAGE_SIZE); - __u32 mul; - - /* if user forgot to set any size, make sure they see error */ - if (sz == 0) - return 0; - /* Kernel expects BPF_MAP_TYPE_RINGBUF's max_entries to be - * a power-of-2 multiple of kernel's page size. If user diligently - * satisified these conditions, pass the size through. - */ - if ((sz % page_sz) == 0 && is_pow_of_2(sz / page_sz)) - return sz; - - /* Otherwise find closest (page_sz * power_of_2) product bigger than - * user-set size to satisfy both user size request and kernel - * requirements and substitute correct max_entries for map creation. - */ - for (mul = 1; mul <= UINT_MAX / page_sz; mul <<= 1) { - if (mul * page_sz > sz) - return mul * page_sz; - } - - /* if it's impossible to satisfy the conditions (i.e., user size is - * very close to UINT_MAX but is not a power-of-2 multiple of - * page_size) then just return original size and let kernel reject it - */ - return sz; -} - static int bpf_object__create_map(struct bpf_object *obj, struct bpf_map *map, bool is_inner) { LIBBPF_OPTS(bpf_map_create_opts, create_attr); @@ -5017,9 +4954,6 @@ static int bpf_object__create_map(struct bpf_object *obj, struct bpf_map *map, b } switch (def->type) { - case BPF_MAP_TYPE_RINGBUF: - map->def.max_entries = adjust_ringbuf_sz(map->def.max_entries); - /* fallthrough */ case BPF_MAP_TYPE_PERF_EVENT_ARRAY: case BPF_MAP_TYPE_CGROUP_ARRAY: case BPF_MAP_TYPE_STACK_TRACE: @@ -5353,7 +5287,7 @@ int bpf_core_add_cands(struct bpf_core_cand *local_cand, n = btf__type_cnt(targ_btf); for (i = targ_start_id; i < n; i++) { t = btf__type_by_id(targ_btf, i); - if (btf_kind(t) != btf_kind(local_t)) + if (!btf_kind_core_compat(t, local_t)) continue; targ_name = btf__name_by_offset(targ_btf, t->name_off); @@ -5561,76 +5495,13 @@ err_out: int bpf_core_types_are_compat(const struct btf *local_btf, __u32 local_id, const struct btf *targ_btf, __u32 targ_id) { - const struct btf_type *local_type, *targ_type; - int depth = 32; /* max recursion depth */ - - /* caller made sure that names match (ignoring flavor suffix) */ - local_type = btf__type_by_id(local_btf, local_id); - targ_type = btf__type_by_id(targ_btf, targ_id); - if (btf_kind(local_type) != btf_kind(targ_type)) - return 0; - -recur: - depth--; - if (depth < 0) - return -EINVAL; - - local_type = skip_mods_and_typedefs(local_btf, local_id, &local_id); - targ_type = skip_mods_and_typedefs(targ_btf, targ_id, &targ_id); - if (!local_type || !targ_type) - return -EINVAL; - - if (btf_kind(local_type) != btf_kind(targ_type)) - return 0; - - switch (btf_kind(local_type)) { - case BTF_KIND_UNKN: - case BTF_KIND_STRUCT: - case BTF_KIND_UNION: - case BTF_KIND_ENUM: - case BTF_KIND_FWD: - return 1; - case BTF_KIND_INT: - /* just reject deprecated bitfield-like integers; all other - * integers are by default compatible between each other - */ - return btf_int_offset(local_type) == 0 && btf_int_offset(targ_type) == 0; - case BTF_KIND_PTR: - local_id = local_type->type; - targ_id = targ_type->type; - goto recur; - case BTF_KIND_ARRAY: - local_id = btf_array(local_type)->type; - targ_id = btf_array(targ_type)->type; - goto recur; - case BTF_KIND_FUNC_PROTO: { - struct btf_param *local_p = btf_params(local_type); - struct btf_param *targ_p = btf_params(targ_type); - __u16 local_vlen = btf_vlen(local_type); - __u16 targ_vlen = btf_vlen(targ_type); - int i, err; - - if (local_vlen != targ_vlen) - return 0; - - for (i = 0; i < local_vlen; i++, local_p++, targ_p++) { - skip_mods_and_typedefs(local_btf, local_p->type, &local_id); - skip_mods_and_typedefs(targ_btf, targ_p->type, &targ_id); - err = bpf_core_types_are_compat(local_btf, local_id, targ_btf, targ_id); - if (err <= 0) - return err; - } + return __bpf_core_types_are_compat(local_btf, local_id, targ_btf, targ_id, 32); +} - /* tail recurse for return type check */ - skip_mods_and_typedefs(local_btf, local_type->type, &local_id); - skip_mods_and_typedefs(targ_btf, targ_type->type, &targ_id); - goto recur; - } - default: - pr_warn("unexpected kind %s relocated, local [%d], target [%d]\n", - btf_kind_str(local_type), local_id, targ_id); - return 0; - } +int bpf_core_types_match(const struct btf *local_btf, __u32 local_id, + const struct btf *targ_btf, __u32 targ_id) +{ + return __bpf_core_types_match(local_btf, local_id, targ_btf, targ_id, false, 32); } static size_t bpf_core_hash_fn(const void *key, void *ctx) @@ -6754,11 +6625,6 @@ static int libbpf_prepare_prog_load(struct bpf_program *prog, if (prog->type == BPF_PROG_TYPE_XDP && (def & SEC_XDP_FRAGS)) opts->prog_flags |= BPF_F_XDP_HAS_FRAGS; - if (def & SEC_DEPRECATED) { - pr_warn("SEC(\"%s\") is deprecated, please see https://github.com/libbpf/libbpf/wiki/Libbpf-1.0-migration-guide#bpf-program-sec-annotation-deprecations for details\n", - prog->sec_name); - } - if ((def & SEC_ATTACH_BTF) && !prog->attach_btf_id) { int btf_obj_fd = 0, btf_type_id = 0, err; const char *attach_name; @@ -6801,10 +6667,9 @@ static int libbpf_prepare_prog_load(struct bpf_program *prog, static void fixup_verifier_log(struct bpf_program *prog, char *buf, size_t buf_sz); -static int bpf_object_load_prog_instance(struct bpf_object *obj, struct bpf_program *prog, - struct bpf_insn *insns, int insns_cnt, - const char *license, __u32 kern_version, - int *prog_fd) +static int bpf_object_load_prog(struct bpf_object *obj, struct bpf_program *prog, + struct bpf_insn *insns, int insns_cnt, + const char *license, __u32 kern_version, int *prog_fd) { LIBBPF_OPTS(bpf_prog_load_opts, load_attr); const char *prog_name = NULL; @@ -7171,93 +7036,6 @@ static int bpf_program_record_relos(struct bpf_program *prog) return 0; } -static int bpf_object_load_prog(struct bpf_object *obj, struct bpf_program *prog, - const char *license, __u32 kern_ver) -{ - int err = 0, fd, i; - - if (obj->loaded) { - pr_warn("prog '%s': can't load after object was loaded\n", prog->name); - return libbpf_err(-EINVAL); - } - - if (prog->instances.nr < 0 || !prog->instances.fds) { - if (prog->preprocessor) { - pr_warn("Internal error: can't load program '%s'\n", - prog->name); - return libbpf_err(-LIBBPF_ERRNO__INTERNAL); - } - - prog->instances.fds = malloc(sizeof(int)); - if (!prog->instances.fds) { - pr_warn("Not enough memory for BPF fds\n"); - return libbpf_err(-ENOMEM); - } - prog->instances.nr = 1; - prog->instances.fds[0] = -1; - } - - if (!prog->preprocessor) { - if (prog->instances.nr != 1) { - pr_warn("prog '%s': inconsistent nr(%d) != 1\n", - prog->name, prog->instances.nr); - } - if (obj->gen_loader) - bpf_program_record_relos(prog); - err = bpf_object_load_prog_instance(obj, prog, - prog->insns, prog->insns_cnt, - license, kern_ver, &fd); - if (!err) - prog->instances.fds[0] = fd; - goto out; - } - - for (i = 0; i < prog->instances.nr; i++) { - struct bpf_prog_prep_result result; - bpf_program_prep_t preprocessor = prog->preprocessor; - - memset(&result, 0, sizeof(result)); - err = preprocessor(prog, i, prog->insns, - prog->insns_cnt, &result); - if (err) { - pr_warn("Preprocessing the %dth instance of program '%s' failed\n", - i, prog->name); - goto out; - } - - if (!result.new_insn_ptr || !result.new_insn_cnt) { - pr_debug("Skip loading the %dth instance of program '%s'\n", - i, prog->name); - prog->instances.fds[i] = -1; - if (result.pfd) - *result.pfd = -1; - continue; - } - - err = bpf_object_load_prog_instance(obj, prog, - result.new_insn_ptr, result.new_insn_cnt, - license, kern_ver, &fd); - if (err) { - pr_warn("Loading the %dth instance of program '%s' failed\n", - i, prog->name); - goto out; - } - - if (result.pfd) - *result.pfd = fd; - prog->instances.fds[i] = fd; - } -out: - if (err) - pr_warn("failed to load program '%s'\n", prog->name); - return libbpf_err(err); -} - -int bpf_program__load(struct bpf_program *prog, const char *license, __u32 kern_ver) -{ - return bpf_object_load_prog(prog->obj, prog, license, kern_ver); -} - static int bpf_object__load_progs(struct bpf_object *obj, int log_level) { @@ -7281,9 +7059,16 @@ bpf_object__load_progs(struct bpf_object *obj, int log_level) continue; } prog->log_level |= log_level; - err = bpf_object_load_prog(obj, prog, obj->license, obj->kern_version); - if (err) + + if (obj->gen_loader) + bpf_program_record_relos(prog); + + err = bpf_object_load_prog(obj, prog, prog->insns, prog->insns_cnt, + obj->license, obj->kern_version, &prog->fd); + if (err) { + pr_warn("prog '%s': failed to load: %d\n", prog->name, err); return err; + } } bpf_object__free_relocs(obj); @@ -7309,13 +7094,6 @@ static int bpf_object_init_progs(struct bpf_object *obj, const struct bpf_object prog->type = prog->sec_def->prog_type; prog->expected_attach_type = prog->sec_def->expected_attach_type; -#pragma GCC diagnostic push -#pragma GCC diagnostic ignored "-Wdeprecated-declarations" - if (prog->sec_def->prog_type == BPF_PROG_TYPE_TRACING || - prog->sec_def->prog_type == BPF_PROG_TYPE_EXT) - prog->attach_prog_fd = OPTS_GET(opts, attach_prog_fd, 0); -#pragma GCC diagnostic pop - /* sec_def can have custom callback which should be called * after bpf_program is initialized to adjust its properties */ @@ -7421,36 +7199,6 @@ out: return ERR_PTR(err); } -static struct bpf_object * -__bpf_object__open_xattr(struct bpf_object_open_attr *attr, int flags) -{ - DECLARE_LIBBPF_OPTS(bpf_object_open_opts, opts, - .relaxed_maps = flags & MAPS_RELAX_COMPAT, - ); - - /* param validation */ - if (!attr->file) - return NULL; - - pr_debug("loading %s\n", attr->file); - return bpf_object_open(attr->file, NULL, 0, &opts); -} - -struct bpf_object *bpf_object__open_xattr(struct bpf_object_open_attr *attr) -{ - return libbpf_ptr(__bpf_object__open_xattr(attr, 0)); -} - -struct bpf_object *bpf_object__open(const char *path) -{ - struct bpf_object_open_attr attr = { - .file = path, - .prog_type = BPF_PROG_TYPE_UNSPEC, - }; - - return libbpf_ptr(__bpf_object__open_xattr(&attr, 0)); -} - struct bpf_object * bpf_object__open_file(const char *path, const struct bpf_object_open_opts *opts) { @@ -7462,6 +7210,11 @@ bpf_object__open_file(const char *path, const struct bpf_object_open_opts *opts) return libbpf_ptr(bpf_object_open(path, NULL, 0, opts)); } +struct bpf_object *bpf_object__open(const char *path) +{ + return bpf_object__open_file(path, NULL); +} + struct bpf_object * bpf_object__open_mem(const void *obj_buf, size_t obj_buf_sz, const struct bpf_object_open_opts *opts) @@ -7472,23 +7225,6 @@ bpf_object__open_mem(const void *obj_buf, size_t obj_buf_sz, return libbpf_ptr(bpf_object_open(NULL, obj_buf, obj_buf_sz, opts)); } -struct bpf_object * -bpf_object__open_buffer(const void *obj_buf, size_t obj_buf_sz, - const char *name) -{ - DECLARE_LIBBPF_OPTS(bpf_object_open_opts, opts, - .object_name = name, - /* wrong default, but backwards-compatible */ - .relaxed_maps = true, - ); - - /* returning NULL is wrong, but backwards-compatible */ - if (!obj_buf || obj_buf_sz == 0) - return errno = EINVAL, NULL; - - return libbpf_ptr(bpf_object_open(NULL, obj_buf, obj_buf_sz, &opts)); -} - static int bpf_object_unload(struct bpf_object *obj) { size_t i; @@ -7574,14 +7310,14 @@ static int kallsyms_cb(unsigned long long sym_addr, char sym_type, return 0; if (ext->is_set && ext->ksym.addr != sym_addr) { - pr_warn("extern (ksym) '%s' resolution is ambiguous: 0x%llx or 0x%llx\n", + pr_warn("extern (ksym) '%s': resolution is ambiguous: 0x%llx or 0x%llx\n", sym_name, ext->ksym.addr, sym_addr); return -EINVAL; } if (!ext->is_set) { ext->is_set = true; ext->ksym.addr = sym_addr; - pr_debug("extern (ksym) %s=0x%llx\n", sym_name, sym_addr); + pr_debug("extern (ksym) '%s': set to 0x%llx\n", sym_name, sym_addr); } return 0; } @@ -7785,28 +7521,52 @@ static int bpf_object__resolve_externs(struct bpf_object *obj, for (i = 0; i < obj->nr_extern; i++) { ext = &obj->externs[i]; - if (ext->type == EXT_KCFG && - strcmp(ext->name, "LINUX_KERNEL_VERSION") == 0) { - void *ext_val = kcfg_data + ext->kcfg.data_off; - __u32 kver = get_kernel_version(); + if (ext->type == EXT_KSYM) { + if (ext->ksym.type_id) + need_vmlinux_btf = true; + else + need_kallsyms = true; + continue; + } else if (ext->type == EXT_KCFG) { + void *ext_ptr = kcfg_data + ext->kcfg.data_off; + __u64 value = 0; + + /* Kconfig externs need actual /proc/config.gz */ + if (str_has_pfx(ext->name, "CONFIG_")) { + need_config = true; + continue; + } - if (!kver) { - pr_warn("failed to get kernel version\n"); + /* Virtual kcfg externs are customly handled by libbpf */ + if (strcmp(ext->name, "LINUX_KERNEL_VERSION") == 0) { + value = get_kernel_version(); + if (!value) { + pr_warn("extern (kcfg) '%s': failed to get kernel version\n", ext->name); + return -EINVAL; + } + } else if (strcmp(ext->name, "LINUX_HAS_BPF_COOKIE") == 0) { + value = kernel_supports(obj, FEAT_BPF_COOKIE); + } else if (strcmp(ext->name, "LINUX_HAS_SYSCALL_WRAPPER") == 0) { + value = kernel_supports(obj, FEAT_SYSCALL_WRAPPER); + } else if (!str_has_pfx(ext->name, "LINUX_") || !ext->is_weak) { + /* Currently libbpf supports only CONFIG_ and LINUX_ prefixed + * __kconfig externs, where LINUX_ ones are virtual and filled out + * customly by libbpf (their values don't come from Kconfig). + * If LINUX_xxx variable is not recognized by libbpf, but is marked + * __weak, it defaults to zero value, just like for CONFIG_xxx + * externs. + */ + pr_warn("extern (kcfg) '%s': unrecognized virtual extern\n", ext->name); return -EINVAL; } - err = set_kcfg_value_num(ext, ext_val, kver); + + err = set_kcfg_value_num(ext, ext_ptr, value); if (err) return err; - pr_debug("extern (kcfg) %s=0x%x\n", ext->name, kver); - } else if (ext->type == EXT_KCFG && str_has_pfx(ext->name, "CONFIG_")) { - need_config = true; - } else if (ext->type == EXT_KSYM) { - if (ext->ksym.type_id) - need_vmlinux_btf = true; - else - need_kallsyms = true; + pr_debug("extern (kcfg) '%s': set to 0x%llx\n", + ext->name, (long long)value); } else { - pr_warn("unrecognized extern '%s'\n", ext->name); + pr_warn("extern '%s': unrecognized extern kind\n", ext->name); return -EINVAL; } } @@ -7842,10 +7602,10 @@ static int bpf_object__resolve_externs(struct bpf_object *obj, ext = &obj->externs[i]; if (!ext->is_set && !ext->is_weak) { - pr_warn("extern %s (strong) not resolved\n", ext->name); + pr_warn("extern '%s' (strong): not resolved\n", ext->name); return -ESRCH; } else if (!ext->is_set) { - pr_debug("extern %s (weak) not resolved, defaulting to zero\n", + pr_debug("extern '%s' (weak): not resolved, defaulting to zero\n", ext->name); } } @@ -7921,11 +7681,6 @@ out: return libbpf_err(err); } -int bpf_object__load_xattr(struct bpf_object_load_attr *attr) -{ - return bpf_object_load(attr->obj, attr->log_level, attr->target_btf_path); -} - int bpf_object__load(struct bpf_object *obj) { return bpf_object_load(obj, 0, NULL); @@ -7983,11 +7738,16 @@ static int check_path(const char *path) return err; } -static int bpf_program_pin_instance(struct bpf_program *prog, const char *path, int instance) +int bpf_program__pin(struct bpf_program *prog, const char *path) { char *cp, errmsg[STRERR_BUFSIZE]; int err; + if (prog->fd < 0) { + pr_warn("prog '%s': can't pin program that wasn't loaded\n", prog->name); + return libbpf_err(-EINVAL); + } + err = make_parent_dir(path); if (err) return libbpf_err(err); @@ -7996,170 +7756,35 @@ static int bpf_program_pin_instance(struct bpf_program *prog, const char *path, if (err) return libbpf_err(err); - if (prog == NULL) { - pr_warn("invalid program pointer\n"); - return libbpf_err(-EINVAL); - } - - if (instance < 0 || instance >= prog->instances.nr) { - pr_warn("invalid prog instance %d of prog %s (max %d)\n", - instance, prog->name, prog->instances.nr); - return libbpf_err(-EINVAL); - } - - if (bpf_obj_pin(prog->instances.fds[instance], path)) { + if (bpf_obj_pin(prog->fd, path)) { err = -errno; cp = libbpf_strerror_r(err, errmsg, sizeof(errmsg)); - pr_warn("failed to pin program: %s\n", cp); + pr_warn("prog '%s': failed to pin at '%s': %s\n", prog->name, path, cp); return libbpf_err(err); } - pr_debug("pinned program '%s'\n", path); + pr_debug("prog '%s': pinned at '%s'\n", prog->name, path); return 0; } -static int bpf_program_unpin_instance(struct bpf_program *prog, const char *path, int instance) +int bpf_program__unpin(struct bpf_program *prog, const char *path) { int err; - err = check_path(path); - if (err) - return libbpf_err(err); - - if (prog == NULL) { - pr_warn("invalid program pointer\n"); - return libbpf_err(-EINVAL); - } - - if (instance < 0 || instance >= prog->instances.nr) { - pr_warn("invalid prog instance %d of prog %s (max %d)\n", - instance, prog->name, prog->instances.nr); + if (prog->fd < 0) { + pr_warn("prog '%s': can't unpin program that wasn't loaded\n", prog->name); return libbpf_err(-EINVAL); } - err = unlink(path); - if (err != 0) - return libbpf_err(-errno); - - pr_debug("unpinned program '%s'\n", path); - - return 0; -} - -__attribute__((alias("bpf_program_pin_instance"))) -int bpf_object__pin_instance(struct bpf_program *prog, const char *path, int instance); - -__attribute__((alias("bpf_program_unpin_instance"))) -int bpf_program__unpin_instance(struct bpf_program *prog, const char *path, int instance); - -int bpf_program__pin(struct bpf_program *prog, const char *path) -{ - int i, err; - - err = make_parent_dir(path); - if (err) - return libbpf_err(err); - - err = check_path(path); - if (err) - return libbpf_err(err); - - if (prog == NULL) { - pr_warn("invalid program pointer\n"); - return libbpf_err(-EINVAL); - } - - if (prog->instances.nr <= 0) { - pr_warn("no instances of prog %s to pin\n", prog->name); - return libbpf_err(-EINVAL); - } - - if (prog->instances.nr == 1) { - /* don't create subdirs when pinning single instance */ - return bpf_program_pin_instance(prog, path, 0); - } - - for (i = 0; i < prog->instances.nr; i++) { - char buf[PATH_MAX]; - int len; - - len = snprintf(buf, PATH_MAX, "%s/%d", path, i); - if (len < 0) { - err = -EINVAL; - goto err_unpin; - } else if (len >= PATH_MAX) { - err = -ENAMETOOLONG; - goto err_unpin; - } - - err = bpf_program_pin_instance(prog, buf, i); - if (err) - goto err_unpin; - } - - return 0; - -err_unpin: - for (i = i - 1; i >= 0; i--) { - char buf[PATH_MAX]; - int len; - - len = snprintf(buf, PATH_MAX, "%s/%d", path, i); - if (len < 0) - continue; - else if (len >= PATH_MAX) - continue; - - bpf_program_unpin_instance(prog, buf, i); - } - - rmdir(path); - - return libbpf_err(err); -} - -int bpf_program__unpin(struct bpf_program *prog, const char *path) -{ - int i, err; - err = check_path(path); if (err) return libbpf_err(err); - if (prog == NULL) { - pr_warn("invalid program pointer\n"); - return libbpf_err(-EINVAL); - } - - if (prog->instances.nr <= 0) { - pr_warn("no instances of prog %s to pin\n", prog->name); - return libbpf_err(-EINVAL); - } - - if (prog->instances.nr == 1) { - /* don't create subdirs when pinning single instance */ - return bpf_program_unpin_instance(prog, path, 0); - } - - for (i = 0; i < prog->instances.nr; i++) { - char buf[PATH_MAX]; - int len; - - len = snprintf(buf, PATH_MAX, "%s/%d", path, i); - if (len < 0) - return libbpf_err(-EINVAL); - else if (len >= PATH_MAX) - return libbpf_err(-ENAMETOOLONG); - - err = bpf_program_unpin_instance(prog, buf, i); - if (err) - return err; - } - - err = rmdir(path); + err = unlink(path); if (err) return libbpf_err(-errno); + pr_debug("prog '%s': unpinned from '%s'\n", prog->name, path); return 0; } @@ -8406,8 +8031,7 @@ int bpf_object__pin_programs(struct bpf_object *obj, const char *path) char buf[PATH_MAX]; int len; - len = snprintf(buf, PATH_MAX, "%s/%s", path, - prog->pin_name); + len = snprintf(buf, PATH_MAX, "%s/%s", path, prog->name); if (len < 0) { err = -EINVAL; goto err_unpin_programs; @@ -8428,8 +8052,7 @@ err_unpin_programs: char buf[PATH_MAX]; int len; - len = snprintf(buf, PATH_MAX, "%s/%s", path, - prog->pin_name); + len = snprintf(buf, PATH_MAX, "%s/%s", path, prog->name); if (len < 0) continue; else if (len >= PATH_MAX) @@ -8453,8 +8076,7 @@ int bpf_object__unpin_programs(struct bpf_object *obj, const char *path) char buf[PATH_MAX]; int len; - len = snprintf(buf, PATH_MAX, "%s/%s", path, - prog->pin_name); + len = snprintf(buf, PATH_MAX, "%s/%s", path, prog->name); if (len < 0) return libbpf_err(-EINVAL); else if (len >= PATH_MAX) @@ -8487,11 +8109,6 @@ int bpf_object__pin(struct bpf_object *obj, const char *path) static void bpf_map__destroy(struct bpf_map *map) { - if (map->clear_priv) - map->clear_priv(map, map->priv); - map->priv = NULL; - map->clear_priv = NULL; - if (map->inner_map) { bpf_map__destroy(map->inner_map); zfree(&map->inner_map); @@ -8527,9 +8144,6 @@ void bpf_object__close(struct bpf_object *obj) if (IS_ERR_OR_NULL(obj)) return; - if (obj->clear_priv) - obj->clear_priv(obj, obj->priv); - usdt_manager_free(obj->usdt_man); obj->usdt_man = NULL; @@ -8556,33 +8170,9 @@ void bpf_object__close(struct bpf_object *obj) } zfree(&obj->programs); - list_del(&obj->list); free(obj); } -struct bpf_object * -bpf_object__next(struct bpf_object *prev) -{ - struct bpf_object *next; - bool strict = (libbpf_mode & LIBBPF_STRICT_NO_OBJECT_LIST); - - if (strict) - return NULL; - - if (!prev) - next = list_first_entry(&bpf_objects_list, - struct bpf_object, - list); - else - next = list_next_entry(prev, list); - - /* Empty list is noticed here so don't need checking on entry. */ - if (&next->list == &bpf_objects_list) - return NULL; - - return next; -} - const char *bpf_object__name(const struct bpf_object *obj) { return obj ? obj->name : libbpf_err_ptr(-EINVAL); @@ -8613,22 +8203,6 @@ int bpf_object__set_kversion(struct bpf_object *obj, __u32 kern_version) return 0; } -int bpf_object__set_priv(struct bpf_object *obj, void *priv, - bpf_object_clear_priv_t clear_priv) -{ - if (obj->priv && obj->clear_priv) - obj->clear_priv(obj, obj->priv); - - obj->priv = priv; - obj->clear_priv = clear_priv; - return 0; -} - -void *bpf_object__priv(const struct bpf_object *obj) -{ - return obj ? obj->priv : libbpf_err_ptr(-EINVAL); -} - int bpf_object__gen_loader(struct bpf_object *obj, struct gen_loader_opts *opts) { struct bpf_gen *gen; @@ -8672,12 +8246,6 @@ __bpf_program__iter(const struct bpf_program *p, const struct bpf_object *obj, } struct bpf_program * -bpf_program__next(struct bpf_program *prev, const struct bpf_object *obj) -{ - return bpf_object__next_program(obj, prev); -} - -struct bpf_program * bpf_object__next_program(const struct bpf_object *obj, struct bpf_program *prev) { struct bpf_program *prog = prev; @@ -8690,12 +8258,6 @@ bpf_object__next_program(const struct bpf_object *obj, struct bpf_program *prev) } struct bpf_program * -bpf_program__prev(struct bpf_program *next, const struct bpf_object *obj) -{ - return bpf_object__prev_program(obj, next); -} - -struct bpf_program * bpf_object__prev_program(const struct bpf_object *obj, struct bpf_program *next) { struct bpf_program *prog = next; @@ -8707,22 +8269,6 @@ bpf_object__prev_program(const struct bpf_object *obj, struct bpf_program *next) return prog; } -int bpf_program__set_priv(struct bpf_program *prog, void *priv, - bpf_program_clear_priv_t clear_priv) -{ - if (prog->priv && prog->clear_priv) - prog->clear_priv(prog, prog->priv); - - prog->priv = priv; - prog->clear_priv = clear_priv; - return 0; -} - -void *bpf_program__priv(const struct bpf_program *prog) -{ - return prog ? prog->priv : libbpf_err_ptr(-EINVAL); -} - void bpf_program__set_ifindex(struct bpf_program *prog, __u32 ifindex) { prog->prog_ifindex = ifindex; @@ -8738,22 +8284,6 @@ const char *bpf_program__section_name(const struct bpf_program *prog) return prog->sec_name; } -const char *bpf_program__title(const struct bpf_program *prog, bool needs_copy) -{ - const char *title; - - title = prog->sec_name; - if (needs_copy) { - title = strdup(title); - if (!title) { - pr_warn("failed to strdup program title\n"); - return libbpf_err_ptr(-ENOMEM); - } - } - - return title; -} - bool bpf_program__autoload(const struct bpf_program *prog) { return prog->autoload; @@ -8768,18 +8298,6 @@ int bpf_program__set_autoload(struct bpf_program *prog, bool autoload) return 0; } -static int bpf_program_nth_fd(const struct bpf_program *prog, int n); - -int bpf_program__fd(const struct bpf_program *prog) -{ - return bpf_program_nth_fd(prog, 0); -} - -size_t bpf_program__size(const struct bpf_program *prog) -{ - return prog->insns_cnt * BPF_INSN_SZ; -} - const struct bpf_insn *bpf_program__insns(const struct bpf_program *prog) { return prog->insns; @@ -8810,58 +8328,15 @@ int bpf_program__set_insns(struct bpf_program *prog, return 0; } -int bpf_program__set_prep(struct bpf_program *prog, int nr_instances, - bpf_program_prep_t prep) -{ - int *instances_fds; - - if (nr_instances <= 0 || !prep) - return libbpf_err(-EINVAL); - - if (prog->instances.nr > 0 || prog->instances.fds) { - pr_warn("Can't set pre-processor after loading\n"); - return libbpf_err(-EINVAL); - } - - instances_fds = malloc(sizeof(int) * nr_instances); - if (!instances_fds) { - pr_warn("alloc memory failed for fds\n"); - return libbpf_err(-ENOMEM); - } - - /* fill all fd with -1 */ - memset(instances_fds, -1, sizeof(int) * nr_instances); - - prog->instances.nr = nr_instances; - prog->instances.fds = instances_fds; - prog->preprocessor = prep; - return 0; -} - -__attribute__((alias("bpf_program_nth_fd"))) -int bpf_program__nth_fd(const struct bpf_program *prog, int n); - -static int bpf_program_nth_fd(const struct bpf_program *prog, int n) +int bpf_program__fd(const struct bpf_program *prog) { - int fd; - if (!prog) return libbpf_err(-EINVAL); - if (n >= prog->instances.nr || n < 0) { - pr_warn("Can't get the %dth fd from program %s: only %d instances\n", - n, prog->name, prog->instances.nr); - return libbpf_err(-EINVAL); - } - - fd = prog->instances.fds[n]; - if (fd < 0) { - pr_warn("%dth instance of program '%s' is invalid\n", - n, prog->name); + if (prog->fd < 0) return libbpf_err(-ENOENT); - } - return fd; + return prog->fd; } __alias(bpf_program__type) @@ -8881,39 +8356,6 @@ int bpf_program__set_type(struct bpf_program *prog, enum bpf_prog_type type) return 0; } -static bool bpf_program__is_type(const struct bpf_program *prog, - enum bpf_prog_type type) -{ - return prog ? (prog->type == type) : false; -} - -#define BPF_PROG_TYPE_FNS(NAME, TYPE) \ -int bpf_program__set_##NAME(struct bpf_program *prog) \ -{ \ - if (!prog) \ - return libbpf_err(-EINVAL); \ - return bpf_program__set_type(prog, TYPE); \ -} \ - \ -bool bpf_program__is_##NAME(const struct bpf_program *prog) \ -{ \ - return bpf_program__is_type(prog, TYPE); \ -} \ - -BPF_PROG_TYPE_FNS(socket_filter, BPF_PROG_TYPE_SOCKET_FILTER); -BPF_PROG_TYPE_FNS(lsm, BPF_PROG_TYPE_LSM); -BPF_PROG_TYPE_FNS(kprobe, BPF_PROG_TYPE_KPROBE); -BPF_PROG_TYPE_FNS(sched_cls, BPF_PROG_TYPE_SCHED_CLS); -BPF_PROG_TYPE_FNS(sched_act, BPF_PROG_TYPE_SCHED_ACT); -BPF_PROG_TYPE_FNS(tracepoint, BPF_PROG_TYPE_TRACEPOINT); -BPF_PROG_TYPE_FNS(raw_tracepoint, BPF_PROG_TYPE_RAW_TRACEPOINT); -BPF_PROG_TYPE_FNS(xdp, BPF_PROG_TYPE_XDP); -BPF_PROG_TYPE_FNS(perf_event, BPF_PROG_TYPE_PERF_EVENT); -BPF_PROG_TYPE_FNS(tracing, BPF_PROG_TYPE_TRACING); -BPF_PROG_TYPE_FNS(struct_ops, BPF_PROG_TYPE_STRUCT_OPS); -BPF_PROG_TYPE_FNS(extension, BPF_PROG_TYPE_EXT); -BPF_PROG_TYPE_FNS(sk_lookup, BPF_PROG_TYPE_SK_LOOKUP); - __alias(bpf_program__expected_attach_type) enum bpf_attach_type bpf_program__get_expected_attach_type(const struct bpf_program *prog); @@ -8991,6 +8433,7 @@ int bpf_program__set_log_buf(struct bpf_program *prog, char *log_buf, size_t log static int attach_kprobe(const struct bpf_program *prog, long cookie, struct bpf_link **link); static int attach_uprobe(const struct bpf_program *prog, long cookie, struct bpf_link **link); +static int attach_ksyscall(const struct bpf_program *prog, long cookie, struct bpf_link **link); static int attach_usdt(const struct bpf_program *prog, long cookie, struct bpf_link **link); static int attach_tp(const struct bpf_program *prog, long cookie, struct bpf_link **link); static int attach_raw_tp(const struct bpf_program *prog, long cookie, struct bpf_link **link); @@ -9000,19 +8443,23 @@ static int attach_lsm(const struct bpf_program *prog, long cookie, struct bpf_li static int attach_iter(const struct bpf_program *prog, long cookie, struct bpf_link **link); static const struct bpf_sec_def section_defs[] = { - SEC_DEF("socket", SOCKET_FILTER, 0, SEC_NONE | SEC_SLOPPY_PFX), - SEC_DEF("sk_reuseport/migrate", SK_REUSEPORT, BPF_SK_REUSEPORT_SELECT_OR_MIGRATE, SEC_ATTACHABLE | SEC_SLOPPY_PFX), - SEC_DEF("sk_reuseport", SK_REUSEPORT, BPF_SK_REUSEPORT_SELECT, SEC_ATTACHABLE | SEC_SLOPPY_PFX), + SEC_DEF("socket", SOCKET_FILTER, 0, SEC_NONE), + SEC_DEF("sk_reuseport/migrate", SK_REUSEPORT, BPF_SK_REUSEPORT_SELECT_OR_MIGRATE, SEC_ATTACHABLE), + SEC_DEF("sk_reuseport", SK_REUSEPORT, BPF_SK_REUSEPORT_SELECT, SEC_ATTACHABLE), SEC_DEF("kprobe+", KPROBE, 0, SEC_NONE, attach_kprobe), SEC_DEF("uprobe+", KPROBE, 0, SEC_NONE, attach_uprobe), + SEC_DEF("uprobe.s+", KPROBE, 0, SEC_SLEEPABLE, attach_uprobe), SEC_DEF("kretprobe+", KPROBE, 0, SEC_NONE, attach_kprobe), SEC_DEF("uretprobe+", KPROBE, 0, SEC_NONE, attach_uprobe), + SEC_DEF("uretprobe.s+", KPROBE, 0, SEC_SLEEPABLE, attach_uprobe), SEC_DEF("kprobe.multi+", KPROBE, BPF_TRACE_KPROBE_MULTI, SEC_NONE, attach_kprobe_multi), SEC_DEF("kretprobe.multi+", KPROBE, BPF_TRACE_KPROBE_MULTI, SEC_NONE, attach_kprobe_multi), + SEC_DEF("ksyscall+", KPROBE, 0, SEC_NONE, attach_ksyscall), + SEC_DEF("kretsyscall+", KPROBE, 0, SEC_NONE, attach_ksyscall), SEC_DEF("usdt+", KPROBE, 0, SEC_NONE, attach_usdt), SEC_DEF("tc", SCHED_CLS, 0, SEC_NONE), - SEC_DEF("classifier", SCHED_CLS, 0, SEC_NONE | SEC_SLOPPY_PFX | SEC_DEPRECATED), - SEC_DEF("action", SCHED_ACT, 0, SEC_NONE | SEC_SLOPPY_PFX), + SEC_DEF("classifier", SCHED_CLS, 0, SEC_NONE), + SEC_DEF("action", SCHED_ACT, 0, SEC_NONE), SEC_DEF("tracepoint+", TRACEPOINT, 0, SEC_NONE, attach_tp), SEC_DEF("tp+", TRACEPOINT, 0, SEC_NONE, attach_tp), SEC_DEF("raw_tracepoint+", RAW_TRACEPOINT, 0, SEC_NONE, attach_raw_tp), @@ -9029,55 +8476,54 @@ static const struct bpf_sec_def section_defs[] = { SEC_DEF("freplace+", EXT, 0, SEC_ATTACH_BTF, attach_trace), SEC_DEF("lsm+", LSM, BPF_LSM_MAC, SEC_ATTACH_BTF, attach_lsm), SEC_DEF("lsm.s+", LSM, BPF_LSM_MAC, SEC_ATTACH_BTF | SEC_SLEEPABLE, attach_lsm), + SEC_DEF("lsm_cgroup+", LSM, BPF_LSM_CGROUP, SEC_ATTACH_BTF), SEC_DEF("iter+", TRACING, BPF_TRACE_ITER, SEC_ATTACH_BTF, attach_iter), SEC_DEF("iter.s+", TRACING, BPF_TRACE_ITER, SEC_ATTACH_BTF | SEC_SLEEPABLE, attach_iter), SEC_DEF("syscall", SYSCALL, 0, SEC_SLEEPABLE), SEC_DEF("xdp.frags/devmap", XDP, BPF_XDP_DEVMAP, SEC_XDP_FRAGS), SEC_DEF("xdp/devmap", XDP, BPF_XDP_DEVMAP, SEC_ATTACHABLE), - SEC_DEF("xdp_devmap/", XDP, BPF_XDP_DEVMAP, SEC_ATTACHABLE | SEC_DEPRECATED), SEC_DEF("xdp.frags/cpumap", XDP, BPF_XDP_CPUMAP, SEC_XDP_FRAGS), SEC_DEF("xdp/cpumap", XDP, BPF_XDP_CPUMAP, SEC_ATTACHABLE), - SEC_DEF("xdp_cpumap/", XDP, BPF_XDP_CPUMAP, SEC_ATTACHABLE | SEC_DEPRECATED), SEC_DEF("xdp.frags", XDP, BPF_XDP, SEC_XDP_FRAGS), - SEC_DEF("xdp", XDP, BPF_XDP, SEC_ATTACHABLE_OPT | SEC_SLOPPY_PFX), - SEC_DEF("perf_event", PERF_EVENT, 0, SEC_NONE | SEC_SLOPPY_PFX), - SEC_DEF("lwt_in", LWT_IN, 0, SEC_NONE | SEC_SLOPPY_PFX), - SEC_DEF("lwt_out", LWT_OUT, 0, SEC_NONE | SEC_SLOPPY_PFX), - SEC_DEF("lwt_xmit", LWT_XMIT, 0, SEC_NONE | SEC_SLOPPY_PFX), - SEC_DEF("lwt_seg6local", LWT_SEG6LOCAL, 0, SEC_NONE | SEC_SLOPPY_PFX), - SEC_DEF("cgroup_skb/ingress", CGROUP_SKB, BPF_CGROUP_INET_INGRESS, SEC_ATTACHABLE_OPT | SEC_SLOPPY_PFX), - SEC_DEF("cgroup_skb/egress", CGROUP_SKB, BPF_CGROUP_INET_EGRESS, SEC_ATTACHABLE_OPT | SEC_SLOPPY_PFX), - SEC_DEF("cgroup/skb", CGROUP_SKB, 0, SEC_NONE | SEC_SLOPPY_PFX), - SEC_DEF("cgroup/sock_create", CGROUP_SOCK, BPF_CGROUP_INET_SOCK_CREATE, SEC_ATTACHABLE | SEC_SLOPPY_PFX), - SEC_DEF("cgroup/sock_release", CGROUP_SOCK, BPF_CGROUP_INET_SOCK_RELEASE, SEC_ATTACHABLE | SEC_SLOPPY_PFX), - SEC_DEF("cgroup/sock", CGROUP_SOCK, BPF_CGROUP_INET_SOCK_CREATE, SEC_ATTACHABLE_OPT | SEC_SLOPPY_PFX), - SEC_DEF("cgroup/post_bind4", CGROUP_SOCK, BPF_CGROUP_INET4_POST_BIND, SEC_ATTACHABLE | SEC_SLOPPY_PFX), - SEC_DEF("cgroup/post_bind6", CGROUP_SOCK, BPF_CGROUP_INET6_POST_BIND, SEC_ATTACHABLE | SEC_SLOPPY_PFX), - SEC_DEF("cgroup/dev", CGROUP_DEVICE, BPF_CGROUP_DEVICE, SEC_ATTACHABLE_OPT | SEC_SLOPPY_PFX), - SEC_DEF("sockops", SOCK_OPS, BPF_CGROUP_SOCK_OPS, SEC_ATTACHABLE_OPT | SEC_SLOPPY_PFX), - SEC_DEF("sk_skb/stream_parser", SK_SKB, BPF_SK_SKB_STREAM_PARSER, SEC_ATTACHABLE_OPT | SEC_SLOPPY_PFX), - SEC_DEF("sk_skb/stream_verdict",SK_SKB, BPF_SK_SKB_STREAM_VERDICT, SEC_ATTACHABLE_OPT | SEC_SLOPPY_PFX), - SEC_DEF("sk_skb", SK_SKB, 0, SEC_NONE | SEC_SLOPPY_PFX), - SEC_DEF("sk_msg", SK_MSG, BPF_SK_MSG_VERDICT, SEC_ATTACHABLE_OPT | SEC_SLOPPY_PFX), - SEC_DEF("lirc_mode2", LIRC_MODE2, BPF_LIRC_MODE2, SEC_ATTACHABLE_OPT | SEC_SLOPPY_PFX), - SEC_DEF("flow_dissector", FLOW_DISSECTOR, BPF_FLOW_DISSECTOR, SEC_ATTACHABLE_OPT | SEC_SLOPPY_PFX), - SEC_DEF("cgroup/bind4", CGROUP_SOCK_ADDR, BPF_CGROUP_INET4_BIND, SEC_ATTACHABLE | SEC_SLOPPY_PFX), - SEC_DEF("cgroup/bind6", CGROUP_SOCK_ADDR, BPF_CGROUP_INET6_BIND, SEC_ATTACHABLE | SEC_SLOPPY_PFX), - SEC_DEF("cgroup/connect4", CGROUP_SOCK_ADDR, BPF_CGROUP_INET4_CONNECT, SEC_ATTACHABLE | SEC_SLOPPY_PFX), - SEC_DEF("cgroup/connect6", CGROUP_SOCK_ADDR, BPF_CGROUP_INET6_CONNECT, SEC_ATTACHABLE | SEC_SLOPPY_PFX), - SEC_DEF("cgroup/sendmsg4", CGROUP_SOCK_ADDR, BPF_CGROUP_UDP4_SENDMSG, SEC_ATTACHABLE | SEC_SLOPPY_PFX), - SEC_DEF("cgroup/sendmsg6", CGROUP_SOCK_ADDR, BPF_CGROUP_UDP6_SENDMSG, SEC_ATTACHABLE | SEC_SLOPPY_PFX), - SEC_DEF("cgroup/recvmsg4", CGROUP_SOCK_ADDR, BPF_CGROUP_UDP4_RECVMSG, SEC_ATTACHABLE | SEC_SLOPPY_PFX), - SEC_DEF("cgroup/recvmsg6", CGROUP_SOCK_ADDR, BPF_CGROUP_UDP6_RECVMSG, SEC_ATTACHABLE | SEC_SLOPPY_PFX), - SEC_DEF("cgroup/getpeername4", CGROUP_SOCK_ADDR, BPF_CGROUP_INET4_GETPEERNAME, SEC_ATTACHABLE | SEC_SLOPPY_PFX), - SEC_DEF("cgroup/getpeername6", CGROUP_SOCK_ADDR, BPF_CGROUP_INET6_GETPEERNAME, SEC_ATTACHABLE | SEC_SLOPPY_PFX), - SEC_DEF("cgroup/getsockname4", CGROUP_SOCK_ADDR, BPF_CGROUP_INET4_GETSOCKNAME, SEC_ATTACHABLE | SEC_SLOPPY_PFX), - SEC_DEF("cgroup/getsockname6", CGROUP_SOCK_ADDR, BPF_CGROUP_INET6_GETSOCKNAME, SEC_ATTACHABLE | SEC_SLOPPY_PFX), - SEC_DEF("cgroup/sysctl", CGROUP_SYSCTL, BPF_CGROUP_SYSCTL, SEC_ATTACHABLE | SEC_SLOPPY_PFX), - SEC_DEF("cgroup/getsockopt", CGROUP_SOCKOPT, BPF_CGROUP_GETSOCKOPT, SEC_ATTACHABLE | SEC_SLOPPY_PFX), - SEC_DEF("cgroup/setsockopt", CGROUP_SOCKOPT, BPF_CGROUP_SETSOCKOPT, SEC_ATTACHABLE | SEC_SLOPPY_PFX), + SEC_DEF("xdp", XDP, BPF_XDP, SEC_ATTACHABLE_OPT), + SEC_DEF("perf_event", PERF_EVENT, 0, SEC_NONE), + SEC_DEF("lwt_in", LWT_IN, 0, SEC_NONE), + SEC_DEF("lwt_out", LWT_OUT, 0, SEC_NONE), + SEC_DEF("lwt_xmit", LWT_XMIT, 0, SEC_NONE), + SEC_DEF("lwt_seg6local", LWT_SEG6LOCAL, 0, SEC_NONE), + SEC_DEF("sockops", SOCK_OPS, BPF_CGROUP_SOCK_OPS, SEC_ATTACHABLE_OPT), + SEC_DEF("sk_skb/stream_parser", SK_SKB, BPF_SK_SKB_STREAM_PARSER, SEC_ATTACHABLE_OPT), + SEC_DEF("sk_skb/stream_verdict",SK_SKB, BPF_SK_SKB_STREAM_VERDICT, SEC_ATTACHABLE_OPT), + SEC_DEF("sk_skb", SK_SKB, 0, SEC_NONE), + SEC_DEF("sk_msg", SK_MSG, BPF_SK_MSG_VERDICT, SEC_ATTACHABLE_OPT), + SEC_DEF("lirc_mode2", LIRC_MODE2, BPF_LIRC_MODE2, SEC_ATTACHABLE_OPT), + SEC_DEF("flow_dissector", FLOW_DISSECTOR, BPF_FLOW_DISSECTOR, SEC_ATTACHABLE_OPT), + SEC_DEF("cgroup_skb/ingress", CGROUP_SKB, BPF_CGROUP_INET_INGRESS, SEC_ATTACHABLE_OPT), + SEC_DEF("cgroup_skb/egress", CGROUP_SKB, BPF_CGROUP_INET_EGRESS, SEC_ATTACHABLE_OPT), + SEC_DEF("cgroup/skb", CGROUP_SKB, 0, SEC_NONE), + SEC_DEF("cgroup/sock_create", CGROUP_SOCK, BPF_CGROUP_INET_SOCK_CREATE, SEC_ATTACHABLE), + SEC_DEF("cgroup/sock_release", CGROUP_SOCK, BPF_CGROUP_INET_SOCK_RELEASE, SEC_ATTACHABLE), + SEC_DEF("cgroup/sock", CGROUP_SOCK, BPF_CGROUP_INET_SOCK_CREATE, SEC_ATTACHABLE_OPT), + SEC_DEF("cgroup/post_bind4", CGROUP_SOCK, BPF_CGROUP_INET4_POST_BIND, SEC_ATTACHABLE), + SEC_DEF("cgroup/post_bind6", CGROUP_SOCK, BPF_CGROUP_INET6_POST_BIND, SEC_ATTACHABLE), + SEC_DEF("cgroup/bind4", CGROUP_SOCK_ADDR, BPF_CGROUP_INET4_BIND, SEC_ATTACHABLE), + SEC_DEF("cgroup/bind6", CGROUP_SOCK_ADDR, BPF_CGROUP_INET6_BIND, SEC_ATTACHABLE), + SEC_DEF("cgroup/connect4", CGROUP_SOCK_ADDR, BPF_CGROUP_INET4_CONNECT, SEC_ATTACHABLE), + SEC_DEF("cgroup/connect6", CGROUP_SOCK_ADDR, BPF_CGROUP_INET6_CONNECT, SEC_ATTACHABLE), + SEC_DEF("cgroup/sendmsg4", CGROUP_SOCK_ADDR, BPF_CGROUP_UDP4_SENDMSG, SEC_ATTACHABLE), + SEC_DEF("cgroup/sendmsg6", CGROUP_SOCK_ADDR, BPF_CGROUP_UDP6_SENDMSG, SEC_ATTACHABLE), + SEC_DEF("cgroup/recvmsg4", CGROUP_SOCK_ADDR, BPF_CGROUP_UDP4_RECVMSG, SEC_ATTACHABLE), + SEC_DEF("cgroup/recvmsg6", CGROUP_SOCK_ADDR, BPF_CGROUP_UDP6_RECVMSG, SEC_ATTACHABLE), + SEC_DEF("cgroup/getpeername4", CGROUP_SOCK_ADDR, BPF_CGROUP_INET4_GETPEERNAME, SEC_ATTACHABLE), + SEC_DEF("cgroup/getpeername6", CGROUP_SOCK_ADDR, BPF_CGROUP_INET6_GETPEERNAME, SEC_ATTACHABLE), + SEC_DEF("cgroup/getsockname4", CGROUP_SOCK_ADDR, BPF_CGROUP_INET4_GETSOCKNAME, SEC_ATTACHABLE), + SEC_DEF("cgroup/getsockname6", CGROUP_SOCK_ADDR, BPF_CGROUP_INET6_GETSOCKNAME, SEC_ATTACHABLE), + SEC_DEF("cgroup/sysctl", CGROUP_SYSCTL, BPF_CGROUP_SYSCTL, SEC_ATTACHABLE), + SEC_DEF("cgroup/getsockopt", CGROUP_SOCKOPT, BPF_CGROUP_GETSOCKOPT, SEC_ATTACHABLE), + SEC_DEF("cgroup/setsockopt", CGROUP_SOCKOPT, BPF_CGROUP_SETSOCKOPT, SEC_ATTACHABLE), + SEC_DEF("cgroup/dev", CGROUP_DEVICE, BPF_CGROUP_DEVICE, SEC_ATTACHABLE_OPT), SEC_DEF("struct_ops+", STRUCT_OPS, 0, SEC_NONE), - SEC_DEF("sk_lookup", SK_LOOKUP, BPF_SK_LOOKUP, SEC_ATTACHABLE | SEC_SLOPPY_PFX), + SEC_DEF("sk_lookup", SK_LOOKUP, BPF_SK_LOOKUP, SEC_ATTACHABLE), }; static size_t custom_sec_def_cnt; @@ -9172,8 +8618,7 @@ int libbpf_unregister_prog_handler(int handler_id) return 0; } -static bool sec_def_matches(const struct bpf_sec_def *sec_def, const char *sec_name, - bool allow_sloppy) +static bool sec_def_matches(const struct bpf_sec_def *sec_def, const char *sec_name) { size_t len = strlen(sec_def->sec); @@ -9198,17 +8643,6 @@ static bool sec_def_matches(const struct bpf_sec_def *sec_def, const char *sec_n return false; } - /* SEC_SLOPPY_PFX definitions are allowed to be just prefix - * matches, unless strict section name mode - * (LIBBPF_STRICT_SEC_NAME) is enabled, in which case the - * match has to be exact. - */ - if (allow_sloppy && str_has_pfx(sec_name, sec_def->sec)) - return true; - - /* Definitions not marked SEC_SLOPPY_PFX (e.g., - * SEC("syscall")) are exact matches in both modes. - */ return strcmp(sec_name, sec_def->sec) == 0; } @@ -9216,20 +8650,18 @@ static const struct bpf_sec_def *find_sec_def(const char *sec_name) { const struct bpf_sec_def *sec_def; int i, n; - bool strict = libbpf_mode & LIBBPF_STRICT_SEC_NAME, allow_sloppy; n = custom_sec_def_cnt; for (i = 0; i < n; i++) { sec_def = &custom_sec_defs[i]; - if (sec_def_matches(sec_def, sec_name, false)) + if (sec_def_matches(sec_def, sec_name)) return sec_def; } n = ARRAY_SIZE(section_defs); for (i = 0; i < n; i++) { sec_def = §ion_defs[i]; - allow_sloppy = (sec_def->cookie & SEC_SLOPPY_PFX) && !strict; - if (sec_def_matches(sec_def, sec_name, allow_sloppy)) + if (sec_def_matches(sec_def, sec_name)) return sec_def; } @@ -9300,6 +8732,38 @@ int libbpf_prog_type_by_name(const char *name, enum bpf_prog_type *prog_type, return libbpf_err(-ESRCH); } +const char *libbpf_bpf_attach_type_str(enum bpf_attach_type t) +{ + if (t < 0 || t >= ARRAY_SIZE(attach_type_name)) + return NULL; + + return attach_type_name[t]; +} + +const char *libbpf_bpf_link_type_str(enum bpf_link_type t) +{ + if (t < 0 || t >= ARRAY_SIZE(link_type_name)) + return NULL; + + return link_type_name[t]; +} + +const char *libbpf_bpf_map_type_str(enum bpf_map_type t) +{ + if (t < 0 || t >= ARRAY_SIZE(map_type_name)) + return NULL; + + return map_type_name[t]; +} + +const char *libbpf_bpf_prog_type_str(enum bpf_prog_type t) +{ + if (t < 0 || t >= ARRAY_SIZE(prog_type_name)) + return NULL; + + return prog_type_name[t]; +} + static struct bpf_map *find_struct_ops_map_by_offset(struct bpf_object *obj, size_t offset) { @@ -9450,6 +8914,7 @@ void btf_get_kernel_prefix_kind(enum bpf_attach_type attach_type, *kind = BTF_KIND_TYPEDEF; break; case BPF_LSM_MAC: + case BPF_LSM_CGROUP: *prefix = BTF_LSM_PREFIX; *kind = BTF_KIND_FUNC; break; @@ -9653,11 +9118,6 @@ int bpf_map__fd(const struct bpf_map *map) return map ? map->fd : libbpf_err(-EINVAL); } -const struct bpf_map_def *bpf_map__def(const struct bpf_map *map) -{ - return map ? &map->def : libbpf_err_ptr(-EINVAL); -} - static bool map_uses_real_name(const struct bpf_map *map) { /* Since libbpf started to support custom .data.* and .rodata.* maps, @@ -9772,27 +9232,6 @@ __u32 bpf_map__btf_value_type_id(const struct bpf_map *map) return map ? map->btf_value_type_id : 0; } -int bpf_map__set_priv(struct bpf_map *map, void *priv, - bpf_map_clear_priv_t clear_priv) -{ - if (!map) - return libbpf_err(-EINVAL); - - if (map->priv) { - if (map->clear_priv) - map->clear_priv(map, map->priv); - } - - map->priv = priv; - map->clear_priv = clear_priv; - return 0; -} - -void *bpf_map__priv(const struct bpf_map *map) -{ - return map ? map->priv : libbpf_err_ptr(-EINVAL); -} - int bpf_map__set_initial_value(struct bpf_map *map, const void *data, size_t size) { @@ -9812,11 +9251,6 @@ const void *bpf_map__initial_value(struct bpf_map *map, size_t *psize) return map->mmaped; } -bool bpf_map__is_offload_neutral(const struct bpf_map *map) -{ - return map->def.type == BPF_MAP_TYPE_PERF_EVENT_ARRAY; -} - bool bpf_map__is_internal(const struct bpf_map *map) { return map->libbpf_type != LIBBPF_MAP_UNSPEC; @@ -9878,12 +9312,6 @@ __bpf_map__iter(const struct bpf_map *m, const struct bpf_object *obj, int i) } struct bpf_map * -bpf_map__next(const struct bpf_map *prev, const struct bpf_object *obj) -{ - return bpf_object__next_map(obj, prev); -} - -struct bpf_map * bpf_object__next_map(const struct bpf_object *obj, const struct bpf_map *prev) { if (prev == NULL) @@ -9893,12 +9321,6 @@ bpf_object__next_map(const struct bpf_object *obj, const struct bpf_map *prev) } struct bpf_map * -bpf_map__prev(const struct bpf_map *next, const struct bpf_object *obj) -{ - return bpf_object__prev_map(obj, next); -} - -struct bpf_map * bpf_object__prev_map(const struct bpf_object *obj, const struct bpf_map *next) { if (next == NULL) { @@ -9943,12 +9365,6 @@ bpf_object__find_map_fd_by_name(const struct bpf_object *obj, const char *name) return bpf_map__fd(bpf_object__find_map_by_name(obj, name)); } -struct bpf_map * -bpf_object__find_map_by_offset(struct bpf_object *obj, size_t offset) -{ - return libbpf_err_ptr(-ENOTSUP); -} - static int validate_map_op(const struct bpf_map *map, size_t key_sz, size_t value_sz, bool check_value_sz) { @@ -10069,95 +9485,6 @@ long libbpf_get_error(const void *ptr) return -errno; } -__attribute__((alias("bpf_prog_load_xattr2"))) -int bpf_prog_load_xattr(const struct bpf_prog_load_attr *attr, - struct bpf_object **pobj, int *prog_fd); - -static int bpf_prog_load_xattr2(const struct bpf_prog_load_attr *attr, - struct bpf_object **pobj, int *prog_fd) -{ - struct bpf_object_open_attr open_attr = {}; - struct bpf_program *prog, *first_prog = NULL; - struct bpf_object *obj; - struct bpf_map *map; - int err; - - if (!attr) - return libbpf_err(-EINVAL); - if (!attr->file) - return libbpf_err(-EINVAL); - - open_attr.file = attr->file; - open_attr.prog_type = attr->prog_type; - - obj = __bpf_object__open_xattr(&open_attr, 0); - err = libbpf_get_error(obj); - if (err) - return libbpf_err(-ENOENT); - - bpf_object__for_each_program(prog, obj) { - enum bpf_attach_type attach_type = attr->expected_attach_type; - /* - * to preserve backwards compatibility, bpf_prog_load treats - * attr->prog_type, if specified, as an override to whatever - * bpf_object__open guessed - */ - if (attr->prog_type != BPF_PROG_TYPE_UNSPEC) { - prog->type = attr->prog_type; - prog->expected_attach_type = attach_type; - } - if (bpf_program__type(prog) == BPF_PROG_TYPE_UNSPEC) { - /* - * we haven't guessed from section name and user - * didn't provide a fallback type, too bad... - */ - bpf_object__close(obj); - return libbpf_err(-EINVAL); - } - - prog->prog_ifindex = attr->ifindex; - prog->log_level = attr->log_level; - prog->prog_flags |= attr->prog_flags; - if (!first_prog) - first_prog = prog; - } - - bpf_object__for_each_map(map, obj) { - if (map->def.type != BPF_MAP_TYPE_PERF_EVENT_ARRAY) - map->map_ifindex = attr->ifindex; - } - - if (!first_prog) { - pr_warn("object file doesn't contain bpf program\n"); - bpf_object__close(obj); - return libbpf_err(-ENOENT); - } - - err = bpf_object__load(obj); - if (err) { - bpf_object__close(obj); - return libbpf_err(err); - } - - *pobj = obj; - *prog_fd = bpf_program__fd(first_prog); - return 0; -} - -COMPAT_VERSION(bpf_prog_load_deprecated, bpf_prog_load, LIBBPF_0.0.1) -int bpf_prog_load_deprecated(const char *file, enum bpf_prog_type type, - struct bpf_object **pobj, int *prog_fd) -{ - struct bpf_prog_load_attr attr; - - memset(&attr, 0, sizeof(struct bpf_prog_load_attr)); - attr.file = file; - attr.prog_type = type; - attr.expected_attach_type = 0; - - return bpf_prog_load_xattr2(&attr, pobj, prog_fd); -} - /* Replace link's underlying BPF program with the new one */ int bpf_link__update_program(struct bpf_link *link, struct bpf_program *prog) { @@ -10485,7 +9812,7 @@ static int perf_event_open_probe(bool uprobe, bool retprobe, const char *name, { struct perf_event_attr attr = {}; char errmsg[STRERR_BUFSIZE]; - int type, pfd, err; + int type, pfd; if (ref_ctr_off >= (1ULL << PERF_UPROBE_REF_CTR_OFFSET_BITS)) return -EINVAL; @@ -10521,14 +9848,7 @@ static int perf_event_open_probe(bool uprobe, bool retprobe, const char *name, pid < 0 ? -1 : pid /* pid */, pid == -1 ? 0 : -1 /* cpu */, -1 /* group_fd */, PERF_FLAG_FD_CLOEXEC); - if (pfd < 0) { - err = -errno; - pr_warn("%s perf_event_open() failed: %s\n", - uprobe ? "uprobe" : "kprobe", - libbpf_strerror_r(err, errmsg, sizeof(errmsg))); - return err; - } - return pfd; + return pfd >= 0 ? pfd : -errno; } static int append_to_file(const char *file, const char *fmt, ...) @@ -10551,6 +9871,34 @@ static int append_to_file(const char *file, const char *fmt, ...) return err; } +#define DEBUGFS "/sys/kernel/debug/tracing" +#define TRACEFS "/sys/kernel/tracing" + +static bool use_debugfs(void) +{ + static int has_debugfs = -1; + + if (has_debugfs < 0) + has_debugfs = access(DEBUGFS, F_OK) == 0; + + return has_debugfs == 1; +} + +static const char *tracefs_path(void) +{ + return use_debugfs() ? DEBUGFS : TRACEFS; +} + +static const char *tracefs_kprobe_events(void) +{ + return use_debugfs() ? DEBUGFS"/kprobe_events" : TRACEFS"/kprobe_events"; +} + +static const char *tracefs_uprobe_events(void) +{ + return use_debugfs() ? DEBUGFS"/uprobe_events" : TRACEFS"/uprobe_events"; +} + static void gen_kprobe_legacy_event_name(char *buf, size_t buf_sz, const char *kfunc_name, size_t offset) { @@ -10563,9 +9911,7 @@ static void gen_kprobe_legacy_event_name(char *buf, size_t buf_sz, static int add_kprobe_event_legacy(const char *probe_name, bool retprobe, const char *kfunc_name, size_t offset) { - const char *file = "/sys/kernel/debug/tracing/kprobe_events"; - - return append_to_file(file, "%c:%s/%s %s+0x%zx", + return append_to_file(tracefs_kprobe_events(), "%c:%s/%s %s+0x%zx", retprobe ? 'r' : 'p', retprobe ? "kretprobes" : "kprobes", probe_name, kfunc_name, offset); @@ -10573,18 +9919,16 @@ static int add_kprobe_event_legacy(const char *probe_name, bool retprobe, static int remove_kprobe_event_legacy(const char *probe_name, bool retprobe) { - const char *file = "/sys/kernel/debug/tracing/kprobe_events"; - - return append_to_file(file, "-:%s/%s", retprobe ? "kretprobes" : "kprobes", probe_name); + return append_to_file(tracefs_kprobe_events(), "-:%s/%s", + retprobe ? "kretprobes" : "kprobes", probe_name); } static int determine_kprobe_perf_type_legacy(const char *probe_name, bool retprobe) { char file[256]; - snprintf(file, sizeof(file), - "/sys/kernel/debug/tracing/events/%s/%s/id", - retprobe ? "kretprobes" : "kprobes", probe_name); + snprintf(file, sizeof(file), "%s/events/%s/%s/id", + tracefs_path(), retprobe ? "kretprobes" : "kprobes", probe_name); return parse_uint_from_file(file, "%d\n"); } @@ -10605,10 +9949,11 @@ static int perf_event_kprobe_open_legacy(const char *probe_name, bool retprobe, } type = determine_kprobe_perf_type_legacy(probe_name, retprobe); if (type < 0) { + err = type; pr_warn("failed to determine legacy kprobe event id for '%s+0x%zx': %s\n", kfunc_name, offset, - libbpf_strerror_r(type, errmsg, sizeof(errmsg))); - return type; + libbpf_strerror_r(err, errmsg, sizeof(errmsg))); + goto err_clean_legacy; } attr.size = sizeof(attr); attr.config = type; @@ -10622,9 +9967,72 @@ static int perf_event_kprobe_open_legacy(const char *probe_name, bool retprobe, err = -errno; pr_warn("legacy kprobe perf_event_open() failed: %s\n", libbpf_strerror_r(err, errmsg, sizeof(errmsg))); - return err; + goto err_clean_legacy; } return pfd; + +err_clean_legacy: + /* Clear the newly added legacy kprobe_event */ + remove_kprobe_event_legacy(probe_name, retprobe); + return err; +} + +static const char *arch_specific_syscall_pfx(void) +{ +#if defined(__x86_64__) + return "x64"; +#elif defined(__i386__) + return "ia32"; +#elif defined(__s390x__) + return "s390x"; +#elif defined(__s390__) + return "s390"; +#elif defined(__arm__) + return "arm"; +#elif defined(__aarch64__) + return "arm64"; +#elif defined(__mips__) + return "mips"; +#elif defined(__riscv) + return "riscv"; +#elif defined(__powerpc__) + return "powerpc"; +#elif defined(__powerpc64__) + return "powerpc64"; +#else + return NULL; +#endif +} + +static int probe_kern_syscall_wrapper(void) +{ + char syscall_name[64]; + const char *ksys_pfx; + + ksys_pfx = arch_specific_syscall_pfx(); + if (!ksys_pfx) + return 0; + + snprintf(syscall_name, sizeof(syscall_name), "__%s_sys_bpf", ksys_pfx); + + if (determine_kprobe_perf_type() >= 0) { + int pfd; + + pfd = perf_event_open_probe(false, false, syscall_name, 0, getpid(), 0); + if (pfd >= 0) + close(pfd); + + return pfd >= 0 ? 1 : 0; + } else { /* legacy mode */ + char probe_name[128]; + + gen_kprobe_legacy_event_name(probe_name, sizeof(probe_name), syscall_name, 0); + if (add_kprobe_event_legacy(probe_name, false, syscall_name, 0) < 0) + return 0; + + (void)remove_kprobe_event_legacy(probe_name, false); + return 1; + } } struct bpf_link * @@ -10681,7 +10089,7 @@ bpf_program__attach_kprobe_opts(const struct bpf_program *prog, prog->name, retprobe ? "kretprobe" : "kprobe", func_name, offset, libbpf_strerror_r(err, errmsg, sizeof(errmsg))); - goto err_out; + goto err_clean_legacy; } if (legacy) { struct bpf_link_perf *perf_link = container_of(link, struct bpf_link_perf, link); @@ -10692,6 +10100,10 @@ bpf_program__attach_kprobe_opts(const struct bpf_program *prog, } return link; + +err_clean_legacy: + if (legacy) + remove_kprobe_event_legacy(legacy_probe, retprobe); err_out: free(legacy_probe); return libbpf_err_ptr(err); @@ -10708,6 +10120,34 @@ struct bpf_link *bpf_program__attach_kprobe(const struct bpf_program *prog, return bpf_program__attach_kprobe_opts(prog, func_name, &opts); } +struct bpf_link *bpf_program__attach_ksyscall(const struct bpf_program *prog, + const char *syscall_name, + const struct bpf_ksyscall_opts *opts) +{ + LIBBPF_OPTS(bpf_kprobe_opts, kprobe_opts); + char func_name[128]; + + if (!OPTS_VALID(opts, bpf_ksyscall_opts)) + return libbpf_err_ptr(-EINVAL); + + if (kernel_supports(prog->obj, FEAT_SYSCALL_WRAPPER)) { + /* arch_specific_syscall_pfx() should never return NULL here + * because it is guarded by kernel_supports(). However, since + * compiler does not know that we have an explicit conditional + * as well. + */ + snprintf(func_name, sizeof(func_name), "__%s_sys_%s", + arch_specific_syscall_pfx() ? : "", syscall_name); + } else { + snprintf(func_name, sizeof(func_name), "__se_sys_%s", syscall_name); + } + + kprobe_opts.retprobe = OPTS_GET(opts, retprobe, false); + kprobe_opts.bpf_cookie = OPTS_GET(opts, bpf_cookie, 0); + + return bpf_program__attach_kprobe_opts(prog, func_name, &kprobe_opts); +} + /* Adapted from perf/util/string.c */ static bool glob_match(const char *str, const char *pat) { @@ -10878,6 +10318,27 @@ static int attach_kprobe(const struct bpf_program *prog, long cookie, struct bpf return libbpf_get_error(*link); } +static int attach_ksyscall(const struct bpf_program *prog, long cookie, struct bpf_link **link) +{ + LIBBPF_OPTS(bpf_ksyscall_opts, opts); + const char *syscall_name; + + *link = NULL; + + /* no auto-attach for SEC("ksyscall") and SEC("kretsyscall") */ + if (strcmp(prog->sec_name, "ksyscall") == 0 || strcmp(prog->sec_name, "kretsyscall") == 0) + return 0; + + opts.retprobe = str_has_pfx(prog->sec_name, "kretsyscall/"); + if (opts.retprobe) + syscall_name = prog->sec_name + sizeof("kretsyscall/") - 1; + else + syscall_name = prog->sec_name + sizeof("ksyscall/") - 1; + + *link = bpf_program__attach_ksyscall(prog, syscall_name, &opts); + return *link ? 0 : -errno; +} + static int attach_kprobe_multi(const struct bpf_program *prog, long cookie, struct bpf_link **link) { LIBBPF_OPTS(bpf_kprobe_multi_opts, opts); @@ -10926,9 +10387,7 @@ static void gen_uprobe_legacy_event_name(char *buf, size_t buf_sz, static inline int add_uprobe_event_legacy(const char *probe_name, bool retprobe, const char *binary_path, size_t offset) { - const char *file = "/sys/kernel/debug/tracing/uprobe_events"; - - return append_to_file(file, "%c:%s/%s %s:0x%zx", + return append_to_file(tracefs_uprobe_events(), "%c:%s/%s %s:0x%zx", retprobe ? 'r' : 'p', retprobe ? "uretprobes" : "uprobes", probe_name, binary_path, offset); @@ -10936,18 +10395,16 @@ static inline int add_uprobe_event_legacy(const char *probe_name, bool retprobe, static inline int remove_uprobe_event_legacy(const char *probe_name, bool retprobe) { - const char *file = "/sys/kernel/debug/tracing/uprobe_events"; - - return append_to_file(file, "-:%s/%s", retprobe ? "uretprobes" : "uprobes", probe_name); + return append_to_file(tracefs_uprobe_events(), "-:%s/%s", + retprobe ? "uretprobes" : "uprobes", probe_name); } static int determine_uprobe_perf_type_legacy(const char *probe_name, bool retprobe) { char file[512]; - snprintf(file, sizeof(file), - "/sys/kernel/debug/tracing/events/%s/%s/id", - retprobe ? "uretprobes" : "uprobes", probe_name); + snprintf(file, sizeof(file), "%s/events/%s/%s/id", + tracefs_path(), retprobe ? "uretprobes" : "uprobes", probe_name); return parse_uint_from_file(file, "%d\n"); } @@ -10966,9 +10423,10 @@ static int perf_event_uprobe_open_legacy(const char *probe_name, bool retprobe, } type = determine_uprobe_perf_type_legacy(probe_name, retprobe); if (type < 0) { + err = type; pr_warn("failed to determine legacy uprobe event id for %s:0x%zx: %d\n", binary_path, offset, err); - return type; + goto err_clean_legacy; } memset(&attr, 0, sizeof(attr)); @@ -10983,46 +10441,14 @@ static int perf_event_uprobe_open_legacy(const char *probe_name, bool retprobe, if (pfd < 0) { err = -errno; pr_warn("legacy uprobe perf_event_open() failed: %d\n", err); - return err; + goto err_clean_legacy; } return pfd; -} -/* uprobes deal in relative offsets; subtract the base address associated with - * the mapped binary. See Documentation/trace/uprobetracer.rst for more - * details. - */ -static long elf_find_relative_offset(const char *filename, Elf *elf, long addr) -{ - size_t n; - int i; - - if (elf_getphdrnum(elf, &n)) { - pr_warn("elf: failed to find program headers for '%s': %s\n", filename, - elf_errmsg(-1)); - return -ENOENT; - } - - for (i = 0; i < n; i++) { - int seg_start, seg_end, seg_offset; - GElf_Phdr phdr; - - if (!gelf_getphdr(elf, i, &phdr)) { - pr_warn("elf: failed to get program header %d from '%s': %s\n", i, filename, - elf_errmsg(-1)); - return -ENOENT; - } - if (phdr.p_type != PT_LOAD || !(phdr.p_flags & PF_X)) - continue; - - seg_start = phdr.p_vaddr; - seg_end = seg_start + phdr.p_memsz; - seg_offset = phdr.p_offset; - if (addr >= seg_start && addr < seg_end) - return addr - seg_start + seg_offset; - } - pr_warn("elf: failed to find prog header containing 0x%lx in '%s'\n", addr, filename); - return -ENOENT; +err_clean_legacy: + /* Clear the newly added legacy uprobe_event */ + remove_uprobe_event_legacy(probe_name, retprobe); + return err; } /* Return next ELF section of sh_type after scn, or first of that type if scn is NULL. */ @@ -11111,6 +10537,8 @@ static long elf_find_func_offset(const char *binary_path, const char *name) for (idx = 0; idx < nr_syms; idx++) { int curr_bind; GElf_Sym sym; + Elf_Scn *sym_scn; + GElf_Shdr sym_sh; if (!gelf_getsym(symbols, idx, &sym)) continue; @@ -11148,12 +10576,28 @@ static long elf_find_func_offset(const char *binary_path, const char *name) continue; } } - ret = sym.st_value; + + /* Transform symbol's virtual address (absolute for + * binaries and relative for shared libs) into file + * offset, which is what kernel is expecting for + * uprobe/uretprobe attachment. + * See Documentation/trace/uprobetracer.rst for more + * details. + * This is done by looking up symbol's containing + * section's header and using it's virtual address + * (sh_addr) and corresponding file offset (sh_offset) + * to transform sym.st_value (virtual address) into + * desired final file offset. + */ + sym_scn = elf_getscn(elf, sym.st_shndx); + if (!sym_scn) + continue; + if (!gelf_getshdr(sym_scn, &sym_sh)) + continue; + + ret = sym.st_value - sym_sh.sh_addr + sym_sh.sh_offset; last_bind = curr_bind; } - /* For binaries that are not shared libraries, we need relative offset */ - if (ret > 0 && !is_shared_lib) - ret = elf_find_relative_offset(binary_path, elf, ret); if (ret > 0) break; } @@ -11276,7 +10720,10 @@ bpf_program__attach_uprobe_opts(const struct bpf_program *prog, pid_t pid, ref_ctr_off = OPTS_GET(opts, ref_ctr_offset, 0); pe_opts.bpf_cookie = OPTS_GET(opts, bpf_cookie, 0); - if (binary_path && !strchr(binary_path, '/')) { + if (!binary_path) + return libbpf_err_ptr(-EINVAL); + + if (!strchr(binary_path, '/')) { err = resolve_full_path(binary_path, full_binary_path, sizeof(full_binary_path)); if (err) { @@ -11290,11 +10737,6 @@ bpf_program__attach_uprobe_opts(const struct bpf_program *prog, pid_t pid, if (func_name) { long sym_off; - if (!binary_path) { - pr_warn("prog '%s': name-based attach requires binary_path\n", - prog->name); - return libbpf_err_ptr(-EINVAL); - } sym_off = elf_find_func_offset(binary_path, func_name); if (sym_off < 0) return libbpf_err_ptr(sym_off); @@ -11338,7 +10780,7 @@ bpf_program__attach_uprobe_opts(const struct bpf_program *prog, pid_t pid, prog->name, retprobe ? "uretprobe" : "uprobe", binary_path, func_offset, libbpf_strerror_r(err, errmsg, sizeof(errmsg))); - goto err_out; + goto err_clean_legacy; } if (legacy) { struct bpf_link_perf *perf_link = container_of(link, struct bpf_link_perf, link); @@ -11348,10 +10790,13 @@ bpf_program__attach_uprobe_opts(const struct bpf_program *prog, pid_t pid, perf_link->legacy_is_retprobe = retprobe; } return link; + +err_clean_legacy: + if (legacy) + remove_uprobe_event_legacy(legacy_probe, retprobe); err_out: free(legacy_probe); return libbpf_err_ptr(err); - } /* Format of u[ret]probe section definition supporting auto-attach: @@ -11386,7 +10831,8 @@ static int attach_uprobe(const struct bpf_program *prog, long cookie, struct bpf break; case 3: case 4: - opts.retprobe = strcmp(probe_type, "uretprobe") == 0; + opts.retprobe = strcmp(probe_type, "uretprobe") == 0 || + strcmp(probe_type, "uretprobe.s") == 0; if (opts.retprobe && offset != 0) { pr_warn("prog '%s': uretprobes do not support offset specification\n", prog->name); @@ -11438,6 +10884,9 @@ struct bpf_link *bpf_program__attach_usdt(const struct bpf_program *prog, return libbpf_err_ptr(-EINVAL); } + if (!binary_path) + return libbpf_err_ptr(-EINVAL); + if (!strchr(binary_path, '/')) { err = resolve_full_path(binary_path, resolved_path, sizeof(resolved_path)); if (err) { @@ -11503,9 +10952,8 @@ static int determine_tracepoint_id(const char *tp_category, char file[PATH_MAX]; int ret; - ret = snprintf(file, sizeof(file), - "/sys/kernel/debug/tracing/events/%s/%s/id", - tp_category, tp_name); + ret = snprintf(file, sizeof(file), "%s/events/%s/%s/id", + tracefs_path(), tp_category, tp_name); if (ret < 0) return -errno; if (ret >= sizeof(file)) { @@ -11962,6 +11410,9 @@ struct bpf_link *bpf_map__attach_struct_ops(const struct bpf_map *map) return link; } +typedef enum bpf_perf_event_ret (*bpf_perf_event_print_t)(struct perf_event_header *hdr, + void *private_data); + static enum bpf_perf_event_ret perf_event_read_simple(void *mmap_mem, size_t mmap_size, size_t page_size, void **copy_mem, size_t *copy_size, @@ -12010,12 +11461,6 @@ perf_event_read_simple(void *mmap_mem, size_t mmap_size, size_t page_size, return libbpf_err(ret); } -__attribute__((alias("perf_event_read_simple"))) -enum bpf_perf_event_ret -bpf_perf_event_read_simple(void *mmap_mem, size_t mmap_size, size_t page_size, - void **copy_mem, size_t *copy_size, - bpf_perf_event_print_t fn, void *private_data); - struct perf_buffer; struct perf_buffer_params { @@ -12149,12 +11594,11 @@ error: static struct perf_buffer *__perf_buffer__new(int map_fd, size_t page_cnt, struct perf_buffer_params *p); -DEFAULT_VERSION(perf_buffer__new_v0_6_0, perf_buffer__new, LIBBPF_0.6.0) -struct perf_buffer *perf_buffer__new_v0_6_0(int map_fd, size_t page_cnt, - perf_buffer_sample_fn sample_cb, - perf_buffer_lost_fn lost_cb, - void *ctx, - const struct perf_buffer_opts *opts) +struct perf_buffer *perf_buffer__new(int map_fd, size_t page_cnt, + perf_buffer_sample_fn sample_cb, + perf_buffer_lost_fn lost_cb, + void *ctx, + const struct perf_buffer_opts *opts) { struct perf_buffer_params p = {}; struct perf_event_attr attr = {}; @@ -12176,22 +11620,10 @@ struct perf_buffer *perf_buffer__new_v0_6_0(int map_fd, size_t page_cnt, return libbpf_ptr(__perf_buffer__new(map_fd, page_cnt, &p)); } -COMPAT_VERSION(perf_buffer__new_deprecated, perf_buffer__new, LIBBPF_0.0.4) -struct perf_buffer *perf_buffer__new_deprecated(int map_fd, size_t page_cnt, - const struct perf_buffer_opts *opts) -{ - return perf_buffer__new_v0_6_0(map_fd, page_cnt, - opts ? opts->sample_cb : NULL, - opts ? opts->lost_cb : NULL, - opts ? opts->ctx : NULL, - NULL); -} - -DEFAULT_VERSION(perf_buffer__new_raw_v0_6_0, perf_buffer__new_raw, LIBBPF_0.6.0) -struct perf_buffer *perf_buffer__new_raw_v0_6_0(int map_fd, size_t page_cnt, - struct perf_event_attr *attr, - perf_buffer_event_fn event_cb, void *ctx, - const struct perf_buffer_raw_opts *opts) +struct perf_buffer *perf_buffer__new_raw(int map_fd, size_t page_cnt, + struct perf_event_attr *attr, + perf_buffer_event_fn event_cb, void *ctx, + const struct perf_buffer_raw_opts *opts) { struct perf_buffer_params p = {}; @@ -12211,20 +11643,6 @@ struct perf_buffer *perf_buffer__new_raw_v0_6_0(int map_fd, size_t page_cnt, return libbpf_ptr(__perf_buffer__new(map_fd, page_cnt, &p)); } -COMPAT_VERSION(perf_buffer__new_raw_deprecated, perf_buffer__new_raw, LIBBPF_0.0.4) -struct perf_buffer *perf_buffer__new_raw_deprecated(int map_fd, size_t page_cnt, - const struct perf_buffer_raw_opts *opts) -{ - LIBBPF_OPTS(perf_buffer_raw_opts, inner_opts, - .cpu_cnt = opts->cpu_cnt, - .cpus = opts->cpus, - .map_keys = opts->map_keys, - ); - - return perf_buffer__new_raw_v0_6_0(map_fd, page_cnt, opts->attr, - opts->event_cb, opts->ctx, &inner_opts); -} - static struct perf_buffer *__perf_buffer__new(int map_fd, size_t page_cnt, struct perf_buffer_params *p) { @@ -12485,6 +11903,22 @@ int perf_buffer__buffer_fd(const struct perf_buffer *pb, size_t buf_idx) return cpu_buf->fd; } +int perf_buffer__buffer(struct perf_buffer *pb, int buf_idx, void **buf, size_t *buf_size) +{ + struct perf_cpu_buf *cpu_buf; + + if (buf_idx >= pb->cpu_cnt) + return libbpf_err(-EINVAL); + + cpu_buf = pb->cpu_bufs[buf_idx]; + if (!cpu_buf) + return libbpf_err(-ENOENT); + + *buf = cpu_buf->base; + *buf_size = pb->mmap_size; + return 0; +} + /* * Consume data from perf ring buffer corresponding to slot *buf_idx* in * PERF_EVENT_ARRAY BPF map without waiting/polling. If there is no data to @@ -12526,254 +11960,6 @@ int perf_buffer__consume(struct perf_buffer *pb) return 0; } -struct bpf_prog_info_array_desc { - int array_offset; /* e.g. offset of jited_prog_insns */ - int count_offset; /* e.g. offset of jited_prog_len */ - int size_offset; /* > 0: offset of rec size, - * < 0: fix size of -size_offset - */ -}; - -static struct bpf_prog_info_array_desc bpf_prog_info_array_desc[] = { - [BPF_PROG_INFO_JITED_INSNS] = { - offsetof(struct bpf_prog_info, jited_prog_insns), - offsetof(struct bpf_prog_info, jited_prog_len), - -1, - }, - [BPF_PROG_INFO_XLATED_INSNS] = { - offsetof(struct bpf_prog_info, xlated_prog_insns), - offsetof(struct bpf_prog_info, xlated_prog_len), - -1, - }, - [BPF_PROG_INFO_MAP_IDS] = { - offsetof(struct bpf_prog_info, map_ids), - offsetof(struct bpf_prog_info, nr_map_ids), - -(int)sizeof(__u32), - }, - [BPF_PROG_INFO_JITED_KSYMS] = { - offsetof(struct bpf_prog_info, jited_ksyms), - offsetof(struct bpf_prog_info, nr_jited_ksyms), - -(int)sizeof(__u64), - }, - [BPF_PROG_INFO_JITED_FUNC_LENS] = { - offsetof(struct bpf_prog_info, jited_func_lens), - offsetof(struct bpf_prog_info, nr_jited_func_lens), - -(int)sizeof(__u32), - }, - [BPF_PROG_INFO_FUNC_INFO] = { - offsetof(struct bpf_prog_info, func_info), - offsetof(struct bpf_prog_info, nr_func_info), - offsetof(struct bpf_prog_info, func_info_rec_size), - }, - [BPF_PROG_INFO_LINE_INFO] = { - offsetof(struct bpf_prog_info, line_info), - offsetof(struct bpf_prog_info, nr_line_info), - offsetof(struct bpf_prog_info, line_info_rec_size), - }, - [BPF_PROG_INFO_JITED_LINE_INFO] = { - offsetof(struct bpf_prog_info, jited_line_info), - offsetof(struct bpf_prog_info, nr_jited_line_info), - offsetof(struct bpf_prog_info, jited_line_info_rec_size), - }, - [BPF_PROG_INFO_PROG_TAGS] = { - offsetof(struct bpf_prog_info, prog_tags), - offsetof(struct bpf_prog_info, nr_prog_tags), - -(int)sizeof(__u8) * BPF_TAG_SIZE, - }, - -}; - -static __u32 bpf_prog_info_read_offset_u32(struct bpf_prog_info *info, - int offset) -{ - __u32 *array = (__u32 *)info; - - if (offset >= 0) - return array[offset / sizeof(__u32)]; - return -(int)offset; -} - -static __u64 bpf_prog_info_read_offset_u64(struct bpf_prog_info *info, - int offset) -{ - __u64 *array = (__u64 *)info; - - if (offset >= 0) - return array[offset / sizeof(__u64)]; - return -(int)offset; -} - -static void bpf_prog_info_set_offset_u32(struct bpf_prog_info *info, int offset, - __u32 val) -{ - __u32 *array = (__u32 *)info; - - if (offset >= 0) - array[offset / sizeof(__u32)] = val; -} - -static void bpf_prog_info_set_offset_u64(struct bpf_prog_info *info, int offset, - __u64 val) -{ - __u64 *array = (__u64 *)info; - - if (offset >= 0) - array[offset / sizeof(__u64)] = val; -} - -struct bpf_prog_info_linear * -bpf_program__get_prog_info_linear(int fd, __u64 arrays) -{ - struct bpf_prog_info_linear *info_linear; - struct bpf_prog_info info = {}; - __u32 info_len = sizeof(info); - __u32 data_len = 0; - int i, err; - void *ptr; - - if (arrays >> BPF_PROG_INFO_LAST_ARRAY) - return libbpf_err_ptr(-EINVAL); - - /* step 1: get array dimensions */ - err = bpf_obj_get_info_by_fd(fd, &info, &info_len); - if (err) { - pr_debug("can't get prog info: %s", strerror(errno)); - return libbpf_err_ptr(-EFAULT); - } - - /* step 2: calculate total size of all arrays */ - for (i = BPF_PROG_INFO_FIRST_ARRAY; i < BPF_PROG_INFO_LAST_ARRAY; ++i) { - bool include_array = (arrays & (1UL << i)) > 0; - struct bpf_prog_info_array_desc *desc; - __u32 count, size; - - desc = bpf_prog_info_array_desc + i; - - /* kernel is too old to support this field */ - if (info_len < desc->array_offset + sizeof(__u32) || - info_len < desc->count_offset + sizeof(__u32) || - (desc->size_offset > 0 && info_len < desc->size_offset)) - include_array = false; - - if (!include_array) { - arrays &= ~(1UL << i); /* clear the bit */ - continue; - } - - count = bpf_prog_info_read_offset_u32(&info, desc->count_offset); - size = bpf_prog_info_read_offset_u32(&info, desc->size_offset); - - data_len += count * size; - } - - /* step 3: allocate continuous memory */ - data_len = roundup(data_len, sizeof(__u64)); - info_linear = malloc(sizeof(struct bpf_prog_info_linear) + data_len); - if (!info_linear) - return libbpf_err_ptr(-ENOMEM); - - /* step 4: fill data to info_linear->info */ - info_linear->arrays = arrays; - memset(&info_linear->info, 0, sizeof(info)); - ptr = info_linear->data; - - for (i = BPF_PROG_INFO_FIRST_ARRAY; i < BPF_PROG_INFO_LAST_ARRAY; ++i) { - struct bpf_prog_info_array_desc *desc; - __u32 count, size; - - if ((arrays & (1UL << i)) == 0) - continue; - - desc = bpf_prog_info_array_desc + i; - count = bpf_prog_info_read_offset_u32(&info, desc->count_offset); - size = bpf_prog_info_read_offset_u32(&info, desc->size_offset); - bpf_prog_info_set_offset_u32(&info_linear->info, - desc->count_offset, count); - bpf_prog_info_set_offset_u32(&info_linear->info, - desc->size_offset, size); - bpf_prog_info_set_offset_u64(&info_linear->info, - desc->array_offset, - ptr_to_u64(ptr)); - ptr += count * size; - } - - /* step 5: call syscall again to get required arrays */ - err = bpf_obj_get_info_by_fd(fd, &info_linear->info, &info_len); - if (err) { - pr_debug("can't get prog info: %s", strerror(errno)); - free(info_linear); - return libbpf_err_ptr(-EFAULT); - } - - /* step 6: verify the data */ - for (i = BPF_PROG_INFO_FIRST_ARRAY; i < BPF_PROG_INFO_LAST_ARRAY; ++i) { - struct bpf_prog_info_array_desc *desc; - __u32 v1, v2; - - if ((arrays & (1UL << i)) == 0) - continue; - - desc = bpf_prog_info_array_desc + i; - v1 = bpf_prog_info_read_offset_u32(&info, desc->count_offset); - v2 = bpf_prog_info_read_offset_u32(&info_linear->info, - desc->count_offset); - if (v1 != v2) - pr_warn("%s: mismatch in element count\n", __func__); - - v1 = bpf_prog_info_read_offset_u32(&info, desc->size_offset); - v2 = bpf_prog_info_read_offset_u32(&info_linear->info, - desc->size_offset); - if (v1 != v2) - pr_warn("%s: mismatch in rec size\n", __func__); - } - - /* step 7: update info_len and data_len */ - info_linear->info_len = sizeof(struct bpf_prog_info); - info_linear->data_len = data_len; - - return info_linear; -} - -void bpf_program__bpil_addr_to_offs(struct bpf_prog_info_linear *info_linear) -{ - int i; - - for (i = BPF_PROG_INFO_FIRST_ARRAY; i < BPF_PROG_INFO_LAST_ARRAY; ++i) { - struct bpf_prog_info_array_desc *desc; - __u64 addr, offs; - - if ((info_linear->arrays & (1UL << i)) == 0) - continue; - - desc = bpf_prog_info_array_desc + i; - addr = bpf_prog_info_read_offset_u64(&info_linear->info, - desc->array_offset); - offs = addr - ptr_to_u64(info_linear->data); - bpf_prog_info_set_offset_u64(&info_linear->info, - desc->array_offset, offs); - } -} - -void bpf_program__bpil_offs_to_addr(struct bpf_prog_info_linear *info_linear) -{ - int i; - - for (i = BPF_PROG_INFO_FIRST_ARRAY; i < BPF_PROG_INFO_LAST_ARRAY; ++i) { - struct bpf_prog_info_array_desc *desc; - __u64 addr, offs; - - if ((info_linear->arrays & (1UL << i)) == 0) - continue; - - desc = bpf_prog_info_array_desc + i; - offs = bpf_prog_info_read_offset_u64(&info_linear->info, - desc->array_offset); - addr = offs + ptr_to_u64(info_linear->data); - bpf_prog_info_set_offset_u64(&info_linear->info, - desc->array_offset, addr); - } -} - int bpf_program__set_attach_target(struct bpf_program *prog, int attach_prog_fd, const char *attach_func_name) diff --git a/tools/lib/bpf/libbpf.h b/tools/lib/bpf/libbpf.h index 9e9a3fd3edd8..61493c4cddac 100644 --- a/tools/lib/bpf/libbpf.h +++ b/tools/lib/bpf/libbpf.h @@ -51,6 +51,42 @@ enum libbpf_errno { LIBBPF_API int libbpf_strerror(int err, char *buf, size_t size); +/** + * @brief **libbpf_bpf_attach_type_str()** converts the provided attach type + * value into a textual representation. + * @param t The attach type. + * @return Pointer to a static string identifying the attach type. NULL is + * returned for unknown **bpf_attach_type** values. + */ +LIBBPF_API const char *libbpf_bpf_attach_type_str(enum bpf_attach_type t); + +/** + * @brief **libbpf_bpf_link_type_str()** converts the provided link type value + * into a textual representation. + * @param t The link type. + * @return Pointer to a static string identifying the link type. NULL is + * returned for unknown **bpf_link_type** values. + */ +LIBBPF_API const char *libbpf_bpf_link_type_str(enum bpf_link_type t); + +/** + * @brief **libbpf_bpf_map_type_str()** converts the provided map type value + * into a textual representation. + * @param t The map type. + * @return Pointer to a static string identifying the map type. NULL is + * returned for unknown **bpf_map_type** values. + */ +LIBBPF_API const char *libbpf_bpf_map_type_str(enum bpf_map_type t); + +/** + * @brief **libbpf_bpf_prog_type_str()** converts the provided program type + * value into a textual representation. + * @param t The program type. + * @return Pointer to a static string identifying the program type. NULL is + * returned for unknown **bpf_prog_type** values. + */ +LIBBPF_API const char *libbpf_bpf_prog_type_str(enum bpf_prog_type t); + enum libbpf_print_level { LIBBPF_WARN, LIBBPF_INFO, @@ -65,13 +101,8 @@ LIBBPF_API libbpf_print_fn_t libbpf_set_print(libbpf_print_fn_t fn); /* Hide internal to user */ struct bpf_object; -struct bpf_object_open_attr { - const char *file; - enum bpf_prog_type prog_type; -}; - struct bpf_object_open_opts { - /* size of this struct, for forward/backward compatiblity */ + /* size of this struct, for forward/backward compatibility */ size_t sz; /* object name override, if provided: * - for object open from file, this will override setting object @@ -82,21 +113,12 @@ struct bpf_object_open_opts { const char *object_name; /* parse map definitions non-strictly, allowing extra attributes/data */ bool relaxed_maps; - /* DEPRECATED: handle CO-RE relocations non-strictly, allowing failures. - * Value is ignored. Relocations always are processed non-strictly. - * Non-relocatable instructions are replaced with invalid ones to - * prevent accidental errors. - * */ - LIBBPF_DEPRECATED_SINCE(0, 6, "field has no effect") - bool relaxed_core_relocs; /* maps that set the 'pinning' attribute in their definition will have * their pin_path attribute set to a file in this directory, and be * auto-pinned to that path on load; defaults to "/sys/fs/bpf". */ const char *pin_root_path; - - LIBBPF_DEPRECATED_SINCE(0, 7, "use bpf_program__set_attach_target() on each individual bpf_program") - __u32 attach_prog_fd; + long :0; /* Additional kernel config content that augments and overrides * system Kconfig for CONFIG_xxx externs. */ @@ -179,20 +201,10 @@ LIBBPF_API struct bpf_object * bpf_object__open_mem(const void *obj_buf, size_t obj_buf_sz, const struct bpf_object_open_opts *opts); -/* deprecated bpf_object__open variants */ -LIBBPF_DEPRECATED_SINCE(0, 8, "use bpf_object__open_mem() instead") -LIBBPF_API struct bpf_object * -bpf_object__open_buffer(const void *obj_buf, size_t obj_buf_sz, - const char *name); -LIBBPF_DEPRECATED_SINCE(0, 7, "use bpf_object__open_file() instead") -LIBBPF_API struct bpf_object * -bpf_object__open_xattr(struct bpf_object_open_attr *attr); +/* Load/unload object into/from kernel */ +LIBBPF_API int bpf_object__load(struct bpf_object *obj); -enum libbpf_pin_type { - LIBBPF_PIN_NONE, - /* PIN_BY_NAME: pin maps by name (in /sys/fs/bpf by default) */ - LIBBPF_PIN_BY_NAME, -}; +LIBBPF_API void bpf_object__close(struct bpf_object *object); /* pin_maps and unpin_maps can both be called with a NULL path, in which case * they will use the pin_path attribute of each map (and ignore all maps that @@ -206,20 +218,6 @@ LIBBPF_API int bpf_object__pin_programs(struct bpf_object *obj, LIBBPF_API int bpf_object__unpin_programs(struct bpf_object *obj, const char *path); LIBBPF_API int bpf_object__pin(struct bpf_object *object, const char *path); -LIBBPF_API void bpf_object__close(struct bpf_object *object); - -struct bpf_object_load_attr { - struct bpf_object *obj; - int log_level; - const char *target_btf_path; -}; - -/* Load/unload object into/from kernel */ -LIBBPF_API int bpf_object__load(struct bpf_object *obj); -LIBBPF_DEPRECATED_SINCE(0, 8, "use bpf_object__load() instead") -LIBBPF_API int bpf_object__load_xattr(struct bpf_object_load_attr *attr); -LIBBPF_DEPRECATED_SINCE(0, 6, "bpf_object__unload() is deprecated, use bpf_object__close() instead") -LIBBPF_API int bpf_object__unload(struct bpf_object *obj); LIBBPF_API const char *bpf_object__name(const struct bpf_object *obj); LIBBPF_API unsigned int bpf_object__kversion(const struct bpf_object *obj); @@ -229,29 +227,10 @@ struct btf; LIBBPF_API struct btf *bpf_object__btf(const struct bpf_object *obj); LIBBPF_API int bpf_object__btf_fd(const struct bpf_object *obj); -LIBBPF_DEPRECATED_SINCE(0, 7, "use bpf_object__find_program_by_name() instead") -LIBBPF_API struct bpf_program * -bpf_object__find_program_by_title(const struct bpf_object *obj, - const char *title); LIBBPF_API struct bpf_program * bpf_object__find_program_by_name(const struct bpf_object *obj, const char *name); -LIBBPF_API LIBBPF_DEPRECATED_SINCE(0, 7, "track bpf_objects in application code instead") -struct bpf_object *bpf_object__next(struct bpf_object *prev); -#define bpf_object__for_each_safe(pos, tmp) \ - for ((pos) = bpf_object__next(NULL), \ - (tmp) = bpf_object__next(pos); \ - (pos) != NULL; \ - (pos) = (tmp), (tmp) = bpf_object__next(tmp)) - -typedef void (*bpf_object_clear_priv_t)(struct bpf_object *, void *); -LIBBPF_DEPRECATED_SINCE(0, 7, "storage via set_priv/priv is deprecated") -LIBBPF_API int bpf_object__set_priv(struct bpf_object *obj, void *priv, - bpf_object_clear_priv_t clear_priv); -LIBBPF_DEPRECATED_SINCE(0, 7, "storage via set_priv/priv is deprecated") -LIBBPF_API void *bpf_object__priv(const struct bpf_object *prog); - LIBBPF_API int libbpf_prog_type_by_name(const char *name, enum bpf_prog_type *prog_type, enum bpf_attach_type *expected_attach_type); @@ -262,9 +241,7 @@ LIBBPF_API int libbpf_find_vmlinux_btf_id(const char *name, /* Accessors of bpf_program */ struct bpf_program; -LIBBPF_API LIBBPF_DEPRECATED_SINCE(0, 7, "use bpf_object__next_program() instead") -struct bpf_program *bpf_program__next(struct bpf_program *prog, - const struct bpf_object *obj); + LIBBPF_API struct bpf_program * bpf_object__next_program(const struct bpf_object *obj, struct bpf_program *prog); @@ -273,33 +250,17 @@ bpf_object__next_program(const struct bpf_object *obj, struct bpf_program *prog) (pos) != NULL; \ (pos) = bpf_object__next_program((obj), (pos))) -LIBBPF_API LIBBPF_DEPRECATED_SINCE(0, 7, "use bpf_object__prev_program() instead") -struct bpf_program *bpf_program__prev(struct bpf_program *prog, - const struct bpf_object *obj); LIBBPF_API struct bpf_program * bpf_object__prev_program(const struct bpf_object *obj, struct bpf_program *prog); -typedef void (*bpf_program_clear_priv_t)(struct bpf_program *, void *); - -LIBBPF_DEPRECATED_SINCE(0, 7, "storage via set_priv/priv is deprecated") -LIBBPF_API int bpf_program__set_priv(struct bpf_program *prog, void *priv, - bpf_program_clear_priv_t clear_priv); -LIBBPF_DEPRECATED_SINCE(0, 7, "storage via set_priv/priv is deprecated") -LIBBPF_API void *bpf_program__priv(const struct bpf_program *prog); LIBBPF_API void bpf_program__set_ifindex(struct bpf_program *prog, __u32 ifindex); LIBBPF_API const char *bpf_program__name(const struct bpf_program *prog); LIBBPF_API const char *bpf_program__section_name(const struct bpf_program *prog); -LIBBPF_API LIBBPF_DEPRECATED("BPF program title is confusing term; please use bpf_program__section_name() instead") -const char *bpf_program__title(const struct bpf_program *prog, bool needs_copy); LIBBPF_API bool bpf_program__autoload(const struct bpf_program *prog); LIBBPF_API int bpf_program__set_autoload(struct bpf_program *prog, bool autoload); -/* returns program size in bytes */ -LIBBPF_DEPRECATED_SINCE(0, 7, "use bpf_program__insn_cnt() instead") -LIBBPF_API size_t bpf_program__size(const struct bpf_program *prog); - struct bpf_insn; /** @@ -352,17 +313,7 @@ LIBBPF_API int bpf_program__set_insns(struct bpf_program *prog, */ LIBBPF_API size_t bpf_program__insn_cnt(const struct bpf_program *prog); -LIBBPF_DEPRECATED_SINCE(0, 6, "use bpf_object__load() instead") -LIBBPF_API int bpf_program__load(struct bpf_program *prog, const char *license, __u32 kern_version); LIBBPF_API int bpf_program__fd(const struct bpf_program *prog); -LIBBPF_DEPRECATED_SINCE(0, 7, "multi-instance bpf_program support is deprecated") -LIBBPF_API int bpf_program__pin_instance(struct bpf_program *prog, - const char *path, - int instance); -LIBBPF_DEPRECATED_SINCE(0, 7, "multi-instance bpf_program support is deprecated") -LIBBPF_API int bpf_program__unpin_instance(struct bpf_program *prog, - const char *path, - int instance); /** * @brief **bpf_program__pin()** pins the BPF program to a file @@ -506,6 +457,52 @@ bpf_program__attach_kprobe_multi_opts(const struct bpf_program *prog, const char *pattern, const struct bpf_kprobe_multi_opts *opts); +struct bpf_ksyscall_opts { + /* size of this struct, for forward/backward compatiblity */ + size_t sz; + /* custom user-provided value fetchable through bpf_get_attach_cookie() */ + __u64 bpf_cookie; + /* attach as return probe? */ + bool retprobe; + size_t :0; +}; +#define bpf_ksyscall_opts__last_field retprobe + +/** + * @brief **bpf_program__attach_ksyscall()** attaches a BPF program + * to kernel syscall handler of a specified syscall. Optionally it's possible + * to request to install retprobe that will be triggered at syscall exit. It's + * also possible to associate BPF cookie (though options). + * + * Libbpf automatically will determine correct full kernel function name, + * which depending on system architecture and kernel version/configuration + * could be of the form __<arch>_sys_<syscall> or __se_sys_<syscall>, and will + * attach specified program using kprobe/kretprobe mechanism. + * + * **bpf_program__attach_ksyscall()** is an API counterpart of declarative + * **SEC("ksyscall/<syscall>")** annotation of BPF programs. + * + * At the moment **SEC("ksyscall")** and **bpf_program__attach_ksyscall()** do + * not handle all the calling convention quirks for mmap(), clone() and compat + * syscalls. It also only attaches to "native" syscall interfaces. If host + * system supports compat syscalls or defines 32-bit syscalls in 64-bit + * kernel, such syscall interfaces won't be attached to by libbpf. + * + * These limitations may or may not change in the future. Therefore it is + * recommended to use SEC("kprobe") for these syscalls or if working with + * compat and 32-bit interfaces is required. + * + * @param prog BPF program to attach + * @param syscall_name Symbolic name of the syscall (e.g., "bpf") + * @param opts Additional options (see **struct bpf_ksyscall_opts**) + * @return Reference to the newly created BPF link; or NULL is returned on + * error, error code is stored in errno + */ +LIBBPF_API struct bpf_link * +bpf_program__attach_ksyscall(const struct bpf_program *prog, + const char *syscall_name, + const struct bpf_ksyscall_opts *opts); + struct bpf_uprobe_opts { /* size of this struct, for forward/backward compatiblity */ size_t sz; @@ -662,99 +659,6 @@ LIBBPF_API struct bpf_link * bpf_program__attach_iter(const struct bpf_program *prog, const struct bpf_iter_attach_opts *opts); -/* - * Libbpf allows callers to adjust BPF programs before being loaded - * into kernel. One program in an object file can be transformed into - * multiple variants to be attached to different hooks. - * - * bpf_program_prep_t, bpf_program__set_prep and bpf_program__nth_fd - * form an API for this purpose. - * - * - bpf_program_prep_t: - * Defines a 'preprocessor', which is a caller defined function - * passed to libbpf through bpf_program__set_prep(), and will be - * called before program is loaded. The processor should adjust - * the program one time for each instance according to the instance id - * passed to it. - * - * - bpf_program__set_prep: - * Attaches a preprocessor to a BPF program. The number of instances - * that should be created is also passed through this function. - * - * - bpf_program__nth_fd: - * After the program is loaded, get resulting FD of a given instance - * of the BPF program. - * - * If bpf_program__set_prep() is not used, the program would be loaded - * without adjustment during bpf_object__load(). The program has only - * one instance. In this case bpf_program__fd(prog) is equal to - * bpf_program__nth_fd(prog, 0). - */ -struct bpf_prog_prep_result { - /* - * If not NULL, load new instruction array. - * If set to NULL, don't load this instance. - */ - struct bpf_insn *new_insn_ptr; - int new_insn_cnt; - - /* If not NULL, result FD is written to it. */ - int *pfd; -}; - -/* - * Parameters of bpf_program_prep_t: - * - prog: The bpf_program being loaded. - * - n: Index of instance being generated. - * - insns: BPF instructions array. - * - insns_cnt:Number of instructions in insns. - * - res: Output parameter, result of transformation. - * - * Return value: - * - Zero: pre-processing success. - * - Non-zero: pre-processing error, stop loading. - */ -typedef int (*bpf_program_prep_t)(struct bpf_program *prog, int n, - struct bpf_insn *insns, int insns_cnt, - struct bpf_prog_prep_result *res); - -LIBBPF_DEPRECATED_SINCE(0, 7, "use bpf_program__insns() for getting bpf_program instructions") -LIBBPF_API int bpf_program__set_prep(struct bpf_program *prog, int nr_instance, - bpf_program_prep_t prep); - -LIBBPF_DEPRECATED_SINCE(0, 7, "multi-instance bpf_program support is deprecated") -LIBBPF_API int bpf_program__nth_fd(const struct bpf_program *prog, int n); - -/* - * Adjust type of BPF program. Default is kprobe. - */ -LIBBPF_DEPRECATED_SINCE(0, 8, "use bpf_program__set_type() instead") -LIBBPF_API int bpf_program__set_socket_filter(struct bpf_program *prog); -LIBBPF_DEPRECATED_SINCE(0, 8, "use bpf_program__set_type() instead") -LIBBPF_API int bpf_program__set_tracepoint(struct bpf_program *prog); -LIBBPF_DEPRECATED_SINCE(0, 8, "use bpf_program__set_type() instead") -LIBBPF_API int bpf_program__set_raw_tracepoint(struct bpf_program *prog); -LIBBPF_DEPRECATED_SINCE(0, 8, "use bpf_program__set_type() instead") -LIBBPF_API int bpf_program__set_kprobe(struct bpf_program *prog); -LIBBPF_DEPRECATED_SINCE(0, 8, "use bpf_program__set_type() instead") -LIBBPF_API int bpf_program__set_lsm(struct bpf_program *prog); -LIBBPF_DEPRECATED_SINCE(0, 8, "use bpf_program__set_type() instead") -LIBBPF_API int bpf_program__set_sched_cls(struct bpf_program *prog); -LIBBPF_DEPRECATED_SINCE(0, 8, "use bpf_program__set_type() instead") -LIBBPF_API int bpf_program__set_sched_act(struct bpf_program *prog); -LIBBPF_DEPRECATED_SINCE(0, 8, "use bpf_program__set_type() instead") -LIBBPF_API int bpf_program__set_xdp(struct bpf_program *prog); -LIBBPF_DEPRECATED_SINCE(0, 8, "use bpf_program__set_type() instead") -LIBBPF_API int bpf_program__set_perf_event(struct bpf_program *prog); -LIBBPF_DEPRECATED_SINCE(0, 8, "use bpf_program__set_type() instead") -LIBBPF_API int bpf_program__set_tracing(struct bpf_program *prog); -LIBBPF_DEPRECATED_SINCE(0, 8, "use bpf_program__set_type() instead") -LIBBPF_API int bpf_program__set_struct_ops(struct bpf_program *prog); -LIBBPF_DEPRECATED_SINCE(0, 8, "use bpf_program__set_type() instead") -LIBBPF_API int bpf_program__set_extension(struct bpf_program *prog); -LIBBPF_DEPRECATED_SINCE(0, 8, "use bpf_program__set_type() instead") -LIBBPF_API int bpf_program__set_sk_lookup(struct bpf_program *prog); - LIBBPF_API enum bpf_prog_type bpf_program__type(const struct bpf_program *prog); /** @@ -817,47 +721,6 @@ LIBBPF_API int bpf_program__set_attach_target(struct bpf_program *prog, int attach_prog_fd, const char *attach_func_name); -LIBBPF_DEPRECATED_SINCE(0, 8, "use bpf_program__type() instead") -LIBBPF_API bool bpf_program__is_socket_filter(const struct bpf_program *prog); -LIBBPF_DEPRECATED_SINCE(0, 8, "use bpf_program__type() instead") -LIBBPF_API bool bpf_program__is_tracepoint(const struct bpf_program *prog); -LIBBPF_DEPRECATED_SINCE(0, 8, "use bpf_program__type() instead") -LIBBPF_API bool bpf_program__is_raw_tracepoint(const struct bpf_program *prog); -LIBBPF_DEPRECATED_SINCE(0, 8, "use bpf_program__type() instead") -LIBBPF_API bool bpf_program__is_kprobe(const struct bpf_program *prog); -LIBBPF_DEPRECATED_SINCE(0, 8, "use bpf_program__type() instead") -LIBBPF_API bool bpf_program__is_lsm(const struct bpf_program *prog); -LIBBPF_DEPRECATED_SINCE(0, 8, "use bpf_program__type() instead") -LIBBPF_API bool bpf_program__is_sched_cls(const struct bpf_program *prog); -LIBBPF_DEPRECATED_SINCE(0, 8, "use bpf_program__type() instead") -LIBBPF_API bool bpf_program__is_sched_act(const struct bpf_program *prog); -LIBBPF_DEPRECATED_SINCE(0, 8, "use bpf_program__type() instead") -LIBBPF_API bool bpf_program__is_xdp(const struct bpf_program *prog); -LIBBPF_DEPRECATED_SINCE(0, 8, "use bpf_program__type() instead") -LIBBPF_API bool bpf_program__is_perf_event(const struct bpf_program *prog); -LIBBPF_DEPRECATED_SINCE(0, 8, "use bpf_program__type() instead") -LIBBPF_API bool bpf_program__is_tracing(const struct bpf_program *prog); -LIBBPF_DEPRECATED_SINCE(0, 8, "use bpf_program__type() instead") -LIBBPF_API bool bpf_program__is_struct_ops(const struct bpf_program *prog); -LIBBPF_DEPRECATED_SINCE(0, 8, "use bpf_program__type() instead") -LIBBPF_API bool bpf_program__is_extension(const struct bpf_program *prog); -LIBBPF_DEPRECATED_SINCE(0, 8, "use bpf_program__type() instead") -LIBBPF_API bool bpf_program__is_sk_lookup(const struct bpf_program *prog); - -/* - * No need for __attribute__((packed)), all members of 'bpf_map_def' - * are all aligned. In addition, using __attribute__((packed)) - * would trigger a -Wpacked warning message, and lead to an error - * if -Werror is set. - */ -struct bpf_map_def { - unsigned int type; - unsigned int key_size; - unsigned int value_size; - unsigned int max_entries; - unsigned int map_flags; -}; - /** * @brief **bpf_object__find_map_by_name()** returns BPF map of * the given name, if it exists within the passed BPF object @@ -872,16 +735,6 @@ bpf_object__find_map_by_name(const struct bpf_object *obj, const char *name); LIBBPF_API int bpf_object__find_map_fd_by_name(const struct bpf_object *obj, const char *name); -/* - * Get bpf_map through the offset of corresponding struct bpf_map_def - * in the BPF object file. - */ -LIBBPF_API LIBBPF_DEPRECATED_SINCE(0, 8, "use bpf_object__find_map_by_name() instead") -struct bpf_map * -bpf_object__find_map_by_offset(struct bpf_object *obj, size_t offset); - -LIBBPF_API LIBBPF_DEPRECATED_SINCE(0, 7, "use bpf_object__next_map() instead") -struct bpf_map *bpf_map__next(const struct bpf_map *map, const struct bpf_object *obj); LIBBPF_API struct bpf_map * bpf_object__next_map(const struct bpf_object *obj, const struct bpf_map *map); @@ -891,8 +744,6 @@ bpf_object__next_map(const struct bpf_object *obj, const struct bpf_map *map); (pos) = bpf_object__next_map((obj), (pos))) #define bpf_map__for_each bpf_object__for_each_map -LIBBPF_API LIBBPF_DEPRECATED_SINCE(0, 7, "use bpf_object__prev_map() instead") -struct bpf_map *bpf_map__prev(const struct bpf_map *map, const struct bpf_object *obj); LIBBPF_API struct bpf_map * bpf_object__prev_map(const struct bpf_object *obj, const struct bpf_map *map); @@ -926,9 +777,6 @@ LIBBPF_API bool bpf_map__autocreate(const struct bpf_map *map); */ LIBBPF_API int bpf_map__fd(const struct bpf_map *map); LIBBPF_API int bpf_map__reuse_fd(struct bpf_map *map, int fd); -/* get map definition */ -LIBBPF_API LIBBPF_DEPRECATED_SINCE(0, 8, "use appropriate getters or setters instead") -const struct bpf_map_def *bpf_map__def(const struct bpf_map *map); /* get map name */ LIBBPF_API const char *bpf_map__name(const struct bpf_map *map); /* get/set map type */ @@ -937,8 +785,6 @@ LIBBPF_API int bpf_map__set_type(struct bpf_map *map, enum bpf_map_type type); /* get/set map size (max_entries) */ LIBBPF_API __u32 bpf_map__max_entries(const struct bpf_map *map); LIBBPF_API int bpf_map__set_max_entries(struct bpf_map *map, __u32 max_entries); -LIBBPF_DEPRECATED_SINCE(0, 8, "use bpf_map__set_max_entries() instead") -LIBBPF_API int bpf_map__resize(struct bpf_map *map, __u32 max_entries); /* get/set map flags */ LIBBPF_API __u32 bpf_map__map_flags(const struct bpf_map *map); LIBBPF_API int bpf_map__set_map_flags(struct bpf_map *map, __u32 flags); @@ -961,17 +807,9 @@ LIBBPF_API int bpf_map__set_ifindex(struct bpf_map *map, __u32 ifindex); LIBBPF_API __u64 bpf_map__map_extra(const struct bpf_map *map); LIBBPF_API int bpf_map__set_map_extra(struct bpf_map *map, __u64 map_extra); -typedef void (*bpf_map_clear_priv_t)(struct bpf_map *, void *); -LIBBPF_DEPRECATED_SINCE(0, 7, "storage via set_priv/priv is deprecated") -LIBBPF_API int bpf_map__set_priv(struct bpf_map *map, void *priv, - bpf_map_clear_priv_t clear_priv); -LIBBPF_DEPRECATED_SINCE(0, 7, "storage via set_priv/priv is deprecated") -LIBBPF_API void *bpf_map__priv(const struct bpf_map *map); LIBBPF_API int bpf_map__set_initial_value(struct bpf_map *map, const void *data, size_t size); LIBBPF_API const void *bpf_map__initial_value(struct bpf_map *map, size_t *psize); -LIBBPF_DEPRECATED_SINCE(0, 8, "use bpf_map__type() instead") -LIBBPF_API bool bpf_map__is_offload_neutral(const struct bpf_map *map); /** * @brief **bpf_map__is_internal()** tells the caller whether or not the @@ -1094,65 +932,6 @@ LIBBPF_API int bpf_map__lookup_and_delete_elem(const struct bpf_map *map, LIBBPF_API int bpf_map__get_next_key(const struct bpf_map *map, const void *cur_key, void *next_key, size_t key_sz); -/** - * @brief **libbpf_get_error()** extracts the error code from the passed - * pointer - * @param ptr pointer returned from libbpf API function - * @return error code; or 0 if no error occured - * - * Many libbpf API functions which return pointers have logic to encode error - * codes as pointers, and do not return NULL. Meaning **libbpf_get_error()** - * should be used on the return value from these functions immediately after - * calling the API function, with no intervening calls that could clobber the - * `errno` variable. Consult the individual functions documentation to verify - * if this logic applies should be used. - * - * For these API functions, if `libbpf_set_strict_mode(LIBBPF_STRICT_CLEAN_PTRS)` - * is enabled, NULL is returned on error instead. - * - * If ptr is NULL, then errno should be already set by the failing - * API, because libbpf never returns NULL on success and it now always - * sets errno on error. - * - * Example usage: - * - * struct perf_buffer *pb; - * - * pb = perf_buffer__new(bpf_map__fd(obj->maps.events), PERF_BUFFER_PAGES, &opts); - * err = libbpf_get_error(pb); - * if (err) { - * pb = NULL; - * fprintf(stderr, "failed to open perf buffer: %d\n", err); - * goto cleanup; - * } - */ -LIBBPF_API long libbpf_get_error(const void *ptr); - -struct bpf_prog_load_attr { - const char *file; - enum bpf_prog_type prog_type; - enum bpf_attach_type expected_attach_type; - int ifindex; - int log_level; - int prog_flags; -}; - -LIBBPF_DEPRECATED_SINCE(0, 8, "use bpf_object__open() and bpf_object__load() instead") -LIBBPF_API int bpf_prog_load_xattr(const struct bpf_prog_load_attr *attr, - struct bpf_object **pobj, int *prog_fd); -LIBBPF_DEPRECATED_SINCE(0, 7, "use bpf_object__open() and bpf_object__load() instead") -LIBBPF_API int bpf_prog_load_deprecated(const char *file, enum bpf_prog_type type, - struct bpf_object **pobj, int *prog_fd); - -/* XDP related API */ -struct xdp_link_info { - __u32 prog_id; - __u32 drv_prog_id; - __u32 hw_prog_id; - __u32 skb_prog_id; - __u8 attach_mode; -}; - struct bpf_xdp_set_link_opts { size_t sz; int old_fd; @@ -1160,17 +939,6 @@ struct bpf_xdp_set_link_opts { }; #define bpf_xdp_set_link_opts__last_field old_fd -LIBBPF_DEPRECATED_SINCE(0, 8, "use bpf_xdp_attach() instead") -LIBBPF_API int bpf_set_link_xdp_fd(int ifindex, int fd, __u32 flags); -LIBBPF_DEPRECATED_SINCE(0, 8, "use bpf_xdp_attach() instead") -LIBBPF_API int bpf_set_link_xdp_fd_opts(int ifindex, int fd, __u32 flags, - const struct bpf_xdp_set_link_opts *opts); -LIBBPF_DEPRECATED_SINCE(0, 8, "use bpf_xdp_query_id() instead") -LIBBPF_API int bpf_get_link_xdp_id(int ifindex, __u32 *prog_id, __u32 flags); -LIBBPF_DEPRECATED_SINCE(0, 8, "use bpf_xdp_query() instead") -LIBBPF_API int bpf_get_link_xdp_info(int ifindex, struct xdp_link_info *info, - size_t info_size, __u32 flags); - struct bpf_xdp_attach_opts { size_t sz; int old_prog_fd; @@ -1269,17 +1037,7 @@ typedef void (*perf_buffer_lost_fn)(void *ctx, int cpu, __u64 cnt); /* common use perf buffer options */ struct perf_buffer_opts { - union { - size_t sz; - struct { /* DEPRECATED: will be removed in v1.0 */ - /* if specified, sample_cb is called for each sample */ - perf_buffer_sample_fn sample_cb; - /* if specified, lost_cb is called for each batch of lost samples */ - perf_buffer_lost_fn lost_cb; - /* ctx is provided to sample_cb and lost_cb */ - void *ctx; - }; - }; + size_t sz; }; #define perf_buffer_opts__last_field sz @@ -1300,21 +1058,6 @@ perf_buffer__new(int map_fd, size_t page_cnt, perf_buffer_sample_fn sample_cb, perf_buffer_lost_fn lost_cb, void *ctx, const struct perf_buffer_opts *opts); -LIBBPF_API struct perf_buffer * -perf_buffer__new_v0_6_0(int map_fd, size_t page_cnt, - perf_buffer_sample_fn sample_cb, perf_buffer_lost_fn lost_cb, void *ctx, - const struct perf_buffer_opts *opts); - -LIBBPF_API LIBBPF_DEPRECATED_SINCE(0, 7, "use new variant of perf_buffer__new() instead") -struct perf_buffer *perf_buffer__new_deprecated(int map_fd, size_t page_cnt, - const struct perf_buffer_opts *opts); - -#define perf_buffer__new(...) ___libbpf_overload(___perf_buffer_new, __VA_ARGS__) -#define ___perf_buffer_new6(map_fd, page_cnt, sample_cb, lost_cb, ctx, opts) \ - perf_buffer__new(map_fd, page_cnt, sample_cb, lost_cb, ctx, opts) -#define ___perf_buffer_new3(map_fd, page_cnt, opts) \ - perf_buffer__new_deprecated(map_fd, page_cnt, opts) - enum bpf_perf_event_ret { LIBBPF_PERF_EVENT_DONE = 0, LIBBPF_PERF_EVENT_ERROR = -1, @@ -1328,21 +1071,9 @@ typedef enum bpf_perf_event_ret /* raw perf buffer options, giving most power and control */ struct perf_buffer_raw_opts { - union { - struct { - size_t sz; - long :0; - long :0; - }; - struct { /* DEPRECATED: will be removed in v1.0 */ - /* perf event attrs passed directly into perf_event_open() */ - struct perf_event_attr *attr; - /* raw event callback */ - perf_buffer_event_fn event_cb; - /* ctx is provided to event_cb */ - void *ctx; - }; - }; + size_t sz; + long :0; + long :0; /* if cpu_cnt == 0, open all on all possible CPUs (up to the number of * max_entries of given PERF_EVENT_ARRAY map) */ @@ -1354,26 +1085,13 @@ struct perf_buffer_raw_opts { }; #define perf_buffer_raw_opts__last_field map_keys +struct perf_event_attr; + LIBBPF_API struct perf_buffer * perf_buffer__new_raw(int map_fd, size_t page_cnt, struct perf_event_attr *attr, perf_buffer_event_fn event_cb, void *ctx, const struct perf_buffer_raw_opts *opts); -LIBBPF_API struct perf_buffer * -perf_buffer__new_raw_v0_6_0(int map_fd, size_t page_cnt, struct perf_event_attr *attr, - perf_buffer_event_fn event_cb, void *ctx, - const struct perf_buffer_raw_opts *opts); - -LIBBPF_API LIBBPF_DEPRECATED_SINCE(0, 7, "use new variant of perf_buffer__new_raw() instead") -struct perf_buffer *perf_buffer__new_raw_deprecated(int map_fd, size_t page_cnt, - const struct perf_buffer_raw_opts *opts); - -#define perf_buffer__new_raw(...) ___libbpf_overload(___perf_buffer_new_raw, __VA_ARGS__) -#define ___perf_buffer_new_raw6(map_fd, page_cnt, attr, event_cb, ctx, opts) \ - perf_buffer__new_raw(map_fd, page_cnt, attr, event_cb, ctx, opts) -#define ___perf_buffer_new_raw3(map_fd, page_cnt, opts) \ - perf_buffer__new_raw_deprecated(map_fd, page_cnt, opts) - LIBBPF_API void perf_buffer__free(struct perf_buffer *pb); LIBBPF_API int perf_buffer__epoll_fd(const struct perf_buffer *pb); LIBBPF_API int perf_buffer__poll(struct perf_buffer *pb, int timeout_ms); @@ -1381,15 +1099,22 @@ LIBBPF_API int perf_buffer__consume(struct perf_buffer *pb); LIBBPF_API int perf_buffer__consume_buffer(struct perf_buffer *pb, size_t buf_idx); LIBBPF_API size_t perf_buffer__buffer_cnt(const struct perf_buffer *pb); LIBBPF_API int perf_buffer__buffer_fd(const struct perf_buffer *pb, size_t buf_idx); - -typedef enum bpf_perf_event_ret - (*bpf_perf_event_print_t)(struct perf_event_header *hdr, - void *private_data); -LIBBPF_DEPRECATED_SINCE(0, 8, "use perf_buffer__poll() or perf_buffer__consume() instead") -LIBBPF_API enum bpf_perf_event_ret -bpf_perf_event_read_simple(void *mmap_mem, size_t mmap_size, size_t page_size, - void **copy_mem, size_t *copy_size, - bpf_perf_event_print_t fn, void *private_data); +/** + * @brief **perf_buffer__buffer()** returns the per-cpu raw mmap()'ed underlying + * memory region of the ring buffer. + * This ring buffer can be used to implement a custom events consumer. + * The ring buffer starts with the *struct perf_event_mmap_page*, which + * holds the ring buffer managment fields, when accessing the header + * structure it's important to be SMP aware. + * You can refer to *perf_event_read_simple* for a simple example. + * @param pb the perf buffer structure + * @param buf_idx the buffer index to retreive + * @param buf (out) gets the base pointer of the mmap()'ed memory + * @param buf_size (out) gets the size of the mmap()'ed region + * @return 0 on success, negative error code for failure + */ +LIBBPF_API int perf_buffer__buffer(struct perf_buffer *pb, int buf_idx, void **buf, + size_t *buf_size); struct bpf_prog_linfo; struct bpf_prog_info; @@ -1412,14 +1137,6 @@ bpf_prog_linfo__lfind(const struct bpf_prog_linfo *prog_linfo, * user, causing subsequent probes to fail. In this case, the caller may want * to adjust that limit with setrlimit(). */ -LIBBPF_DEPRECATED_SINCE(0, 8, "use libbpf_probe_bpf_prog_type() instead") -LIBBPF_API bool bpf_probe_prog_type(enum bpf_prog_type prog_type, __u32 ifindex); -LIBBPF_DEPRECATED_SINCE(0, 8, "use libbpf_probe_bpf_map_type() instead") -LIBBPF_API bool bpf_probe_map_type(enum bpf_map_type map_type, __u32 ifindex); -LIBBPF_DEPRECATED_SINCE(0, 8, "use libbpf_probe_bpf_helper() instead") -LIBBPF_API bool bpf_probe_helper(enum bpf_func_id id, enum bpf_prog_type prog_type, __u32 ifindex); -LIBBPF_DEPRECATED_SINCE(0, 8, "implement your own or use bpftool for feature detection") -LIBBPF_API bool bpf_probe_large_insn_limit(__u32 ifindex); /** * @brief **libbpf_probe_bpf_prog_type()** detects if host kernel supports @@ -1463,72 +1180,6 @@ LIBBPF_API int libbpf_probe_bpf_map_type(enum bpf_map_type map_type, const void LIBBPF_API int libbpf_probe_bpf_helper(enum bpf_prog_type prog_type, enum bpf_func_id helper_id, const void *opts); -/* - * Get bpf_prog_info in continuous memory - * - * struct bpf_prog_info has multiple arrays. The user has option to choose - * arrays to fetch from kernel. The following APIs provide an uniform way to - * fetch these data. All arrays in bpf_prog_info are stored in a single - * continuous memory region. This makes it easy to store the info in a - * file. - * - * Before writing bpf_prog_info_linear to files, it is necessary to - * translate pointers in bpf_prog_info to offsets. Helper functions - * bpf_program__bpil_addr_to_offs() and bpf_program__bpil_offs_to_addr() - * are introduced to switch between pointers and offsets. - * - * Examples: - * # To fetch map_ids and prog_tags: - * __u64 arrays = (1UL << BPF_PROG_INFO_MAP_IDS) | - * (1UL << BPF_PROG_INFO_PROG_TAGS); - * struct bpf_prog_info_linear *info_linear = - * bpf_program__get_prog_info_linear(fd, arrays); - * - * # To save data in file - * bpf_program__bpil_addr_to_offs(info_linear); - * write(f, info_linear, sizeof(*info_linear) + info_linear->data_len); - * - * # To read data from file - * read(f, info_linear, <proper_size>); - * bpf_program__bpil_offs_to_addr(info_linear); - */ -enum bpf_prog_info_array { - BPF_PROG_INFO_FIRST_ARRAY = 0, - BPF_PROG_INFO_JITED_INSNS = 0, - BPF_PROG_INFO_XLATED_INSNS, - BPF_PROG_INFO_MAP_IDS, - BPF_PROG_INFO_JITED_KSYMS, - BPF_PROG_INFO_JITED_FUNC_LENS, - BPF_PROG_INFO_FUNC_INFO, - BPF_PROG_INFO_LINE_INFO, - BPF_PROG_INFO_JITED_LINE_INFO, - BPF_PROG_INFO_PROG_TAGS, - BPF_PROG_INFO_LAST_ARRAY, -}; - -struct bpf_prog_info_linear { - /* size of struct bpf_prog_info, when the tool is compiled */ - __u32 info_len; - /* total bytes allocated for data, round up to 8 bytes */ - __u32 data_len; - /* which arrays are included in data */ - __u64 arrays; - struct bpf_prog_info info; - __u8 data[]; -}; - -LIBBPF_DEPRECATED_SINCE(0, 6, "use a custom linear prog_info wrapper") -LIBBPF_API struct bpf_prog_info_linear * -bpf_program__get_prog_info_linear(int fd, __u64 arrays); - -LIBBPF_DEPRECATED_SINCE(0, 6, "use a custom linear prog_info wrapper") -LIBBPF_API void -bpf_program__bpil_addr_to_offs(struct bpf_prog_info_linear *info_linear); - -LIBBPF_DEPRECATED_SINCE(0, 6, "use a custom linear prog_info wrapper") -LIBBPF_API void -bpf_program__bpil_offs_to_addr(struct bpf_prog_info_linear *info_linear); - /** * @brief **libbpf_num_possible_cpus()** is a helper function to get the * number of possible CPUs that the host kernel supports and expects. diff --git a/tools/lib/bpf/libbpf.map b/tools/lib/bpf/libbpf.map index 52973cffc20c..119e6e1ea7f1 100644 --- a/tools/lib/bpf/libbpf.map +++ b/tools/lib/bpf/libbpf.map @@ -1,29 +1,14 @@ LIBBPF_0.0.1 { global: bpf_btf_get_fd_by_id; - bpf_create_map; - bpf_create_map_in_map; - bpf_create_map_in_map_node; - bpf_create_map_name; - bpf_create_map_node; - bpf_create_map_xattr; - bpf_load_btf; - bpf_load_program; - bpf_load_program_xattr; bpf_map__btf_key_type_id; bpf_map__btf_value_type_id; - bpf_map__def; bpf_map__fd; - bpf_map__is_offload_neutral; bpf_map__name; - bpf_map__next; bpf_map__pin; - bpf_map__prev; - bpf_map__priv; bpf_map__reuse_fd; bpf_map__set_ifindex; bpf_map__set_inner_map_fd; - bpf_map__set_priv; bpf_map__unpin; bpf_map_delete_elem; bpf_map_get_fd_by_id; @@ -38,79 +23,37 @@ LIBBPF_0.0.1 { bpf_object__btf_fd; bpf_object__close; bpf_object__find_map_by_name; - bpf_object__find_map_by_offset; - bpf_object__find_program_by_title; bpf_object__kversion; bpf_object__load; bpf_object__name; - bpf_object__next; bpf_object__open; - bpf_object__open_buffer; - bpf_object__open_xattr; bpf_object__pin; bpf_object__pin_maps; bpf_object__pin_programs; - bpf_object__priv; - bpf_object__set_priv; - bpf_object__unload; bpf_object__unpin_maps; bpf_object__unpin_programs; - bpf_perf_event_read_simple; bpf_prog_attach; bpf_prog_detach; bpf_prog_detach2; bpf_prog_get_fd_by_id; bpf_prog_get_next_id; - bpf_prog_load; - bpf_prog_load_xattr; bpf_prog_query; - bpf_prog_test_run; - bpf_prog_test_run_xattr; bpf_program__fd; - bpf_program__is_kprobe; - bpf_program__is_perf_event; - bpf_program__is_raw_tracepoint; - bpf_program__is_sched_act; - bpf_program__is_sched_cls; - bpf_program__is_socket_filter; - bpf_program__is_tracepoint; - bpf_program__is_xdp; - bpf_program__load; - bpf_program__next; - bpf_program__nth_fd; bpf_program__pin; - bpf_program__pin_instance; - bpf_program__prev; - bpf_program__priv; bpf_program__set_expected_attach_type; bpf_program__set_ifindex; - bpf_program__set_kprobe; - bpf_program__set_perf_event; - bpf_program__set_prep; - bpf_program__set_priv; - bpf_program__set_raw_tracepoint; - bpf_program__set_sched_act; - bpf_program__set_sched_cls; - bpf_program__set_socket_filter; - bpf_program__set_tracepoint; bpf_program__set_type; - bpf_program__set_xdp; - bpf_program__title; bpf_program__unload; bpf_program__unpin; - bpf_program__unpin_instance; bpf_prog_linfo__free; bpf_prog_linfo__new; bpf_prog_linfo__lfind_addr_func; bpf_prog_linfo__lfind; bpf_raw_tracepoint_open; - bpf_set_link_xdp_fd; bpf_task_fd_query; - bpf_verify_program; btf__fd; btf__find_by_name; btf__free; - btf__get_from_id; btf__name_by_offset; btf__new; btf__resolve_size; @@ -127,48 +70,24 @@ LIBBPF_0.0.1 { LIBBPF_0.0.2 { global: - bpf_probe_helper; - bpf_probe_map_type; - bpf_probe_prog_type; - bpf_map__resize; bpf_map_lookup_elem_flags; bpf_object__btf; bpf_object__find_map_fd_by_name; - bpf_get_link_xdp_id; - btf__dedup; - btf__get_map_kv_tids; - btf__get_nr_types; btf__get_raw_data; - btf__load; btf_ext__free; - btf_ext__func_info_rec_size; btf_ext__get_raw_data; - btf_ext__line_info_rec_size; btf_ext__new; - btf_ext__reloc_func_info; - btf_ext__reloc_line_info; - xsk_umem__create; - xsk_socket__create; - xsk_umem__delete; - xsk_socket__delete; - xsk_umem__fd; - xsk_socket__fd; - bpf_program__get_prog_info_linear; - bpf_program__bpil_addr_to_offs; - bpf_program__bpil_offs_to_addr; } LIBBPF_0.0.1; LIBBPF_0.0.3 { global: bpf_map__is_internal; bpf_map_freeze; - btf__finalize_data; } LIBBPF_0.0.2; LIBBPF_0.0.4 { global: bpf_link__destroy; - bpf_object__load_xattr; bpf_program__attach_kprobe; bpf_program__attach_perf_event; bpf_program__attach_raw_tracepoint; @@ -176,14 +95,10 @@ LIBBPF_0.0.4 { bpf_program__attach_uprobe; btf_dump__dump_type; btf_dump__free; - btf_dump__new; btf__parse_elf; libbpf_num_possible_cpus; perf_buffer__free; - perf_buffer__new; - perf_buffer__new_raw; perf_buffer__poll; - xsk_umem__create; } LIBBPF_0.0.3; LIBBPF_0.0.5 { @@ -193,7 +108,6 @@ LIBBPF_0.0.5 { LIBBPF_0.0.6 { global: - bpf_get_link_xdp_info; bpf_map__get_pin_path; bpf_map__is_pinned; bpf_map__set_pin_path; @@ -202,9 +116,6 @@ LIBBPF_0.0.6 { bpf_program__attach_trace; bpf_program__get_expected_attach_type; bpf_program__get_type; - bpf_program__is_tracing; - bpf_program__set_tracing; - bpf_program__size; btf__find_by_name_kind; libbpf_find_vmlinux_btf_id; } LIBBPF_0.0.5; @@ -224,14 +135,8 @@ LIBBPF_0.0.7 { bpf_object__detach_skeleton; bpf_object__load_skeleton; bpf_object__open_skeleton; - bpf_probe_large_insn_limit; - bpf_prog_attach_xattr; bpf_program__attach; bpf_program__name; - bpf_program__is_extension; - bpf_program__is_struct_ops; - bpf_program__set_extension; - bpf_program__set_struct_ops; btf__align_of; libbpf_find_kernel_btf; } LIBBPF_0.0.6; @@ -250,10 +155,7 @@ LIBBPF_0.0.8 { bpf_prog_attach_opts; bpf_program__attach_cgroup; bpf_program__attach_lsm; - bpf_program__is_lsm; bpf_program__set_attach_target; - bpf_program__set_lsm; - bpf_set_link_xdp_fd_opts; } LIBBPF_0.0.7; LIBBPF_0.0.9 { @@ -291,9 +193,7 @@ LIBBPF_0.1.0 { bpf_map__value_size; bpf_program__attach_xdp; bpf_program__autoload; - bpf_program__is_sk_lookup; bpf_program__set_autoload; - bpf_program__set_sk_lookup; btf__parse; btf__parse_raw; btf__pointer_size; @@ -336,7 +236,6 @@ LIBBPF_0.2.0 { perf_buffer__buffer_fd; perf_buffer__epoll_fd; perf_buffer__consume_buffer; - xsk_socket__create_shared; } LIBBPF_0.1.0; LIBBPF_0.3.0 { @@ -348,8 +247,6 @@ LIBBPF_0.3.0 { btf__new_empty_split; btf__new_split; ring_buffer__epoll_fd; - xsk_setup_xdp_prog; - xsk_socket__update_xskmap; } LIBBPF_0.2.0; LIBBPF_0.4.0 { @@ -397,7 +294,6 @@ LIBBPF_0.6.0 { bpf_object__next_program; bpf_object__prev_map; bpf_object__prev_program; - bpf_prog_load_deprecated; bpf_prog_load; bpf_program__flags; bpf_program__insn_cnt; @@ -407,18 +303,14 @@ LIBBPF_0.6.0 { btf__add_decl_tag; btf__add_type_tag; btf__dedup; - btf__dedup_deprecated; btf__raw_data; btf__type_cnt; btf_dump__new; - btf_dump__new_deprecated; libbpf_major_version; libbpf_minor_version; libbpf_version_string; perf_buffer__new; - perf_buffer__new_deprecated; perf_buffer__new_raw; - perf_buffer__new_raw_deprecated; } LIBBPF_0.5.0; LIBBPF_0.7.0 { @@ -434,10 +326,11 @@ LIBBPF_0.7.0 { bpf_xdp_detach; bpf_xdp_query; bpf_xdp_query_id; + btf_ext__raw_data; libbpf_probe_bpf_helper; libbpf_probe_bpf_map_type; libbpf_probe_bpf_prog_type; - libbpf_set_memlock_rlim_max; + libbpf_set_memlock_rlim; } LIBBPF_0.6.0; LIBBPF_0.8.0 { @@ -461,5 +354,15 @@ LIBBPF_0.8.0 { } LIBBPF_0.7.0; LIBBPF_1.0.0 { - local: *; + global: + bpf_obj_get_opts; + bpf_prog_query_opts; + bpf_program__attach_ksyscall; + btf__add_enum64; + btf__add_enum64_value; + libbpf_bpf_attach_type_str; + libbpf_bpf_link_type_str; + libbpf_bpf_map_type_str; + libbpf_bpf_prog_type_str; + perf_buffer__buffer; }; diff --git a/tools/lib/bpf/libbpf_common.h b/tools/lib/bpf/libbpf_common.h index 000e37798ff2..9a7937f339df 100644 --- a/tools/lib/bpf/libbpf_common.h +++ b/tools/lib/bpf/libbpf_common.h @@ -30,20 +30,10 @@ /* Add checks for other versions below when planning deprecation of API symbols * with the LIBBPF_DEPRECATED_SINCE macro. */ -#if __LIBBPF_CURRENT_VERSION_GEQ(0, 6) -#define __LIBBPF_MARK_DEPRECATED_0_6(X) X +#if __LIBBPF_CURRENT_VERSION_GEQ(1, 0) +#define __LIBBPF_MARK_DEPRECATED_1_0(X) X #else -#define __LIBBPF_MARK_DEPRECATED_0_6(X) -#endif -#if __LIBBPF_CURRENT_VERSION_GEQ(0, 7) -#define __LIBBPF_MARK_DEPRECATED_0_7(X) X -#else -#define __LIBBPF_MARK_DEPRECATED_0_7(X) -#endif -#if __LIBBPF_CURRENT_VERSION_GEQ(0, 8) -#define __LIBBPF_MARK_DEPRECATED_0_8(X) X -#else -#define __LIBBPF_MARK_DEPRECATED_0_8(X) +#define __LIBBPF_MARK_DEPRECATED_1_0(X) #endif /* This set of internal macros allows to do "function overloading" based on diff --git a/tools/lib/bpf/libbpf_internal.h b/tools/lib/bpf/libbpf_internal.h index 4abdbe2fea9d..4135ae0a2bc3 100644 --- a/tools/lib/bpf/libbpf_internal.h +++ b/tools/lib/bpf/libbpf_internal.h @@ -15,7 +15,6 @@ #include <linux/err.h> #include <fcntl.h> #include <unistd.h> -#include "libbpf_legacy.h" #include "relo_core.h" /* make sure libbpf doesn't use kernel-only integer typedefs */ @@ -109,9 +108,9 @@ static inline bool str_has_sfx(const char *str, const char *sfx) size_t str_len = strlen(str); size_t sfx_len = strlen(sfx); - if (sfx_len <= str_len) - return strcmp(str + str_len - sfx_len, sfx); - return false; + if (sfx_len > str_len) + return false; + return strcmp(str + str_len - sfx_len, sfx) == 0; } /* Symbol versioning is different between static and shared library. @@ -351,6 +350,10 @@ enum kern_feature_id { FEAT_MEMCG_ACCOUNT, /* BPF cookie (bpf_get_attach_cookie() BPF helper) support */ FEAT_BPF_COOKIE, + /* BTF_KIND_ENUM64 support and BTF_KIND_ENUM kflag support */ + FEAT_BTF_ENUM64, + /* Kernel uses syscall wrapper (CONFIG_ARCH_HAS_SYSCALL_WRAPPER) */ + FEAT_SYSCALL_WRAPPER, __FEAT_CNT, }; @@ -476,8 +479,6 @@ int btf_ext_visit_str_offs(struct btf_ext *btf_ext, str_off_visit_fn visit, void __s32 btf__find_by_name_kind_own(const struct btf *btf, const char *type_name, __u32 kind); -extern enum libbpf_strict_mode libbpf_mode; - typedef int (*kallsyms_cb_t)(unsigned long long sym_addr, char sym_type, const char *sym_name, void *ctx); @@ -496,12 +497,8 @@ static inline int libbpf_err(int ret) */ static inline int libbpf_err_errno(int ret) { - if (libbpf_mode & LIBBPF_STRICT_DIRECT_ERRS) - /* errno is already assumed to be set on error */ - return ret < 0 ? -errno : ret; - - /* legacy: on error return -1 directly and don't touch errno */ - return ret; + /* errno is already assumed to be set on error */ + return ret < 0 ? -errno : ret; } /* handle error for pointer-returning APIs, err is assumed to be < 0 always */ @@ -509,12 +506,7 @@ static inline void *libbpf_err_ptr(int err) { /* set errno on error, this doesn't break anything */ errno = -err; - - if (libbpf_mode & LIBBPF_STRICT_CLEAN_PTRS) - return NULL; - - /* legacy: encode err as ptr */ - return ERR_PTR(err); + return NULL; } /* handle pointer-returning APIs' error handling */ @@ -524,11 +516,7 @@ static inline void *libbpf_ptr(void *ret) if (IS_ERR(ret)) errno = -PTR_ERR(ret); - if (libbpf_mode & LIBBPF_STRICT_CLEAN_PTRS) - return IS_ERR(ret) ? NULL : ret; - - /* legacy: pass-through original pointer */ - return ret; + return IS_ERR(ret) ? NULL : ret; } static inline bool str_is_empty(const char *s) @@ -580,4 +568,9 @@ struct bpf_link * usdt_manager_attach_usdt(struct usdt_manager *man, const char *usdt_provider, const char *usdt_name, __u64 usdt_cookie); +static inline bool is_pow_of_2(size_t x) +{ + return x && (x & (x - 1)) == 0; +} + #endif /* __LIBBPF_LIBBPF_INTERNAL_H */ diff --git a/tools/lib/bpf/libbpf_legacy.h b/tools/lib/bpf/libbpf_legacy.h index d7bcbd01f66f..5b7e0155db6a 100644 --- a/tools/lib/bpf/libbpf_legacy.h +++ b/tools/lib/bpf/libbpf_legacy.h @@ -20,6 +20,11 @@ extern "C" { #endif +/* As of libbpf 1.0 libbpf_set_strict_mode() and enum libbpf_struct_mode have + * no effect. But they are left in libbpf_legacy.h so that applications that + * prepared for libbpf 1.0 before final release by using + * libbpf_set_strict_mode() still work with libbpf 1.0+ without any changes. + */ enum libbpf_strict_mode { /* Turn on all supported strict features of libbpf to simulate libbpf * v1.0 behavior. @@ -71,8 +76,8 @@ enum libbpf_strict_mode { * first BPF program or map creation operation. This is done only if * kernel is too old to support memcg-based memory accounting for BPF * subsystem. By default, RLIMIT_MEMLOCK limit is set to RLIM_INFINITY, - * but it can be overriden with libbpf_set_memlock_rlim_max() API. - * Note that libbpf_set_memlock_rlim_max() needs to be called before + * but it can be overriden with libbpf_set_memlock_rlim() API. + * Note that libbpf_set_memlock_rlim() needs to be called before * the very first bpf_prog_load(), bpf_map_create() or bpf_object__load() * operation. */ @@ -88,6 +93,25 @@ enum libbpf_strict_mode { LIBBPF_API int libbpf_set_strict_mode(enum libbpf_strict_mode mode); +/** + * @brief **libbpf_get_error()** extracts the error code from the passed + * pointer + * @param ptr pointer returned from libbpf API function + * @return error code; or 0 if no error occured + * + * Note, as of libbpf 1.0 this function is not necessary and not recommended + * to be used. Libbpf doesn't return error code embedded into the pointer + * itself. Instead, NULL is returned on error and error code is passed through + * thread-local errno variable. **libbpf_get_error()** is just returning -errno + * value if it receives NULL, which is correct only if errno hasn't been + * modified between libbpf API call and corresponding **libbpf_get_error()** + * call. Prefer to check return for NULL and use errno directly. + * + * This API is left in libbpf 1.0 to allow applications that were 1.0-ready + * before final libbpf 1.0 without needing to change them. + */ +LIBBPF_API long libbpf_get_error(const void *ptr); + #define DECLARE_LIBBPF_OPTS LIBBPF_OPTS /* "Discouraged" APIs which don't follow consistent libbpf naming patterns. diff --git a/tools/lib/bpf/libbpf_probes.c b/tools/lib/bpf/libbpf_probes.c index 97b06cede56f..0b5398786bf3 100644 --- a/tools/lib/bpf/libbpf_probes.c +++ b/tools/lib/bpf/libbpf_probes.c @@ -17,47 +17,14 @@ #include "libbpf.h" #include "libbpf_internal.h" -static bool grep(const char *buffer, const char *pattern) -{ - return !!strstr(buffer, pattern); -} - -static int get_vendor_id(int ifindex) -{ - char ifname[IF_NAMESIZE], path[64], buf[8]; - ssize_t len; - int fd; - - if (!if_indextoname(ifindex, ifname)) - return -1; - - snprintf(path, sizeof(path), "/sys/class/net/%s/device/vendor", ifname); - - fd = open(path, O_RDONLY | O_CLOEXEC); - if (fd < 0) - return -1; - - len = read(fd, buf, sizeof(buf)); - close(fd); - if (len < 0) - return -1; - if (len >= (ssize_t)sizeof(buf)) - return -1; - buf[len] = '\0'; - - return strtol(buf, NULL, 0); -} - static int probe_prog_load(enum bpf_prog_type prog_type, const struct bpf_insn *insns, size_t insns_cnt, - char *log_buf, size_t log_buf_sz, - __u32 ifindex) + char *log_buf, size_t log_buf_sz) { LIBBPF_OPTS(bpf_prog_load_opts, opts, .log_buf = log_buf, .log_size = log_buf_sz, .log_level = log_buf ? 1 : 0, - .prog_ifindex = ifindex, ); int fd, err, exp_err = 0; const char *exp_msg = NULL; @@ -161,31 +128,10 @@ int libbpf_probe_bpf_prog_type(enum bpf_prog_type prog_type, const void *opts) if (opts) return libbpf_err(-EINVAL); - ret = probe_prog_load(prog_type, insns, insn_cnt, NULL, 0, 0); + ret = probe_prog_load(prog_type, insns, insn_cnt, NULL, 0); return libbpf_err(ret); } -bool bpf_probe_prog_type(enum bpf_prog_type prog_type, __u32 ifindex) -{ - struct bpf_insn insns[2] = { - BPF_MOV64_IMM(BPF_REG_0, 0), - BPF_EXIT_INSN() - }; - - /* prefer libbpf_probe_bpf_prog_type() unless offload is requested */ - if (ifindex == 0) - return libbpf_probe_bpf_prog_type(prog_type, NULL) == 1; - - if (ifindex && prog_type == BPF_PROG_TYPE_SCHED_CLS) - /* nfp returns -EINVAL on exit(0) with TC offload */ - insns[0].imm = 2; - - errno = 0; - probe_prog_load(prog_type, insns, ARRAY_SIZE(insns), NULL, 0, ifindex); - - return errno != EINVAL && errno != EOPNOTSUPP; -} - int libbpf__load_raw_btf(const char *raw_types, size_t types_len, const char *str_sec, size_t str_len) { @@ -242,15 +188,13 @@ static int load_local_storage_btf(void) strs, sizeof(strs)); } -static int probe_map_create(enum bpf_map_type map_type, __u32 ifindex) +static int probe_map_create(enum bpf_map_type map_type) { LIBBPF_OPTS(bpf_map_create_opts, opts); int key_size, value_size, max_entries; __u32 btf_key_type_id = 0, btf_value_type_id = 0; int fd = -1, btf_fd = -1, fd_inner = -1, exp_err = 0, err; - opts.map_ifindex = ifindex; - key_size = sizeof(__u32); value_size = sizeof(__u32); max_entries = 1; @@ -326,12 +270,6 @@ static int probe_map_create(enum bpf_map_type map_type, __u32 ifindex) if (map_type == BPF_MAP_TYPE_ARRAY_OF_MAPS || map_type == BPF_MAP_TYPE_HASH_OF_MAPS) { - /* TODO: probe for device, once libbpf has a function to create - * map-in-map for offload - */ - if (ifindex) - goto cleanup; - fd_inner = bpf_map_create(BPF_MAP_TYPE_HASH, NULL, sizeof(__u32), sizeof(__u32), 1, NULL); if (fd_inner < 0) @@ -370,15 +308,10 @@ int libbpf_probe_bpf_map_type(enum bpf_map_type map_type, const void *opts) if (opts) return libbpf_err(-EINVAL); - ret = probe_map_create(map_type, 0); + ret = probe_map_create(map_type); return libbpf_err(ret); } -bool bpf_probe_map_type(enum bpf_map_type map_type, __u32 ifindex) -{ - return probe_map_create(map_type, ifindex) == 1; -} - int libbpf_probe_bpf_helper(enum bpf_prog_type prog_type, enum bpf_func_id helper_id, const void *opts) { @@ -407,7 +340,7 @@ int libbpf_probe_bpf_helper(enum bpf_prog_type prog_type, enum bpf_func_id helpe } buf[0] = '\0'; - ret = probe_prog_load(prog_type, insns, insn_cnt, buf, sizeof(buf), 0); + ret = probe_prog_load(prog_type, insns, insn_cnt, buf, sizeof(buf)); if (ret < 0) return libbpf_err(ret); @@ -427,51 +360,3 @@ int libbpf_probe_bpf_helper(enum bpf_prog_type prog_type, enum bpf_func_id helpe return 0; return 1; /* assume supported */ } - -bool bpf_probe_helper(enum bpf_func_id id, enum bpf_prog_type prog_type, - __u32 ifindex) -{ - struct bpf_insn insns[2] = { - BPF_EMIT_CALL(id), - BPF_EXIT_INSN() - }; - char buf[4096] = {}; - bool res; - - probe_prog_load(prog_type, insns, ARRAY_SIZE(insns), buf, sizeof(buf), ifindex); - res = !grep(buf, "invalid func ") && !grep(buf, "unknown func "); - - if (ifindex) { - switch (get_vendor_id(ifindex)) { - case 0x19ee: /* Netronome specific */ - res = res && !grep(buf, "not supported by FW") && - !grep(buf, "unsupported function id"); - break; - default: - break; - } - } - - return res; -} - -/* - * Probe for availability of kernel commit (5.3): - * - * c04c0d2b968a ("bpf: increase complexity limit and maximum program size") - */ -bool bpf_probe_large_insn_limit(__u32 ifindex) -{ - struct bpf_insn insns[BPF_MAXINSNS + 1]; - int i; - - for (i = 0; i < BPF_MAXINSNS; i++) - insns[i] = BPF_MOV64_IMM(BPF_REG_0, 1); - insns[BPF_MAXINSNS] = BPF_EXIT_INSN(); - - errno = 0; - probe_prog_load(BPF_PROG_TYPE_SCHED_CLS, insns, ARRAY_SIZE(insns), NULL, 0, - ifindex); - - return errno != E2BIG && errno != EINVAL; -} diff --git a/tools/lib/bpf/linker.c b/tools/lib/bpf/linker.c index 9aa016fb55aa..4ac02c28e152 100644 --- a/tools/lib/bpf/linker.c +++ b/tools/lib/bpf/linker.c @@ -697,11 +697,6 @@ static int linker_load_obj_file(struct bpf_linker *linker, const char *filename, return err; } -static bool is_pow_of_2(size_t x) -{ - return x && (x & (x - 1)) == 0; -} - static int linker_sanity_check_elf(struct src_obj *obj) { struct src_sec *sec; @@ -1340,6 +1335,7 @@ recur: case BTF_KIND_STRUCT: case BTF_KIND_UNION: case BTF_KIND_ENUM: + case BTF_KIND_ENUM64: case BTF_KIND_FWD: case BTF_KIND_FUNC: case BTF_KIND_VAR: @@ -1362,6 +1358,7 @@ recur: case BTF_KIND_INT: case BTF_KIND_FLOAT: case BTF_KIND_ENUM: + case BTF_KIND_ENUM64: /* ignore encoding for int and enum values for enum */ if (t1->size != t2->size) { pr_warn("global '%s': incompatible %s '%s' size %u and %u\n", diff --git a/tools/lib/bpf/netlink.c b/tools/lib/bpf/netlink.c index cbc8967d5402..6c013168032d 100644 --- a/tools/lib/bpf/netlink.c +++ b/tools/lib/bpf/netlink.c @@ -27,6 +27,14 @@ typedef int (*libbpf_dump_nlmsg_t)(void *cookie, void *msg, struct nlattr **tb); typedef int (*__dump_nlmsg_t)(struct nlmsghdr *nlmsg, libbpf_dump_nlmsg_t, void *cookie); +struct xdp_link_info { + __u32 prog_id; + __u32 drv_prog_id; + __u32 hw_prog_id; + __u32 skb_prog_id; + __u8 attach_mode; +}; + struct xdp_id_md { int ifindex; __u32 flags; @@ -288,31 +296,6 @@ int bpf_xdp_detach(int ifindex, __u32 flags, const struct bpf_xdp_attach_opts *o return bpf_xdp_attach(ifindex, -1, flags, opts); } -int bpf_set_link_xdp_fd_opts(int ifindex, int fd, __u32 flags, - const struct bpf_xdp_set_link_opts *opts) -{ - int old_fd = -1, ret; - - if (!OPTS_VALID(opts, bpf_xdp_set_link_opts)) - return libbpf_err(-EINVAL); - - if (OPTS_HAS(opts, old_fd)) { - old_fd = OPTS_GET(opts, old_fd, -1); - flags |= XDP_FLAGS_REPLACE; - } - - ret = __bpf_set_link_xdp_fd_replace(ifindex, fd, old_fd, flags); - return libbpf_err(ret); -} - -int bpf_set_link_xdp_fd(int ifindex, int fd, __u32 flags) -{ - int ret; - - ret = __bpf_set_link_xdp_fd_replace(ifindex, fd, 0, flags); - return libbpf_err(ret); -} - static int __dump_link_nlmsg(struct nlmsghdr *nlh, libbpf_dump_nlmsg_t dump_link_nlmsg, void *cookie) { @@ -413,30 +396,6 @@ int bpf_xdp_query(int ifindex, int xdp_flags, struct bpf_xdp_query_opts *opts) return 0; } -int bpf_get_link_xdp_info(int ifindex, struct xdp_link_info *info, - size_t info_size, __u32 flags) -{ - LIBBPF_OPTS(bpf_xdp_query_opts, opts); - size_t sz; - int err; - - if (!info_size) - return libbpf_err(-EINVAL); - - err = bpf_xdp_query(ifindex, flags, &opts); - if (err) - return libbpf_err(err); - - /* struct xdp_link_info field layout matches struct bpf_xdp_query_opts - * layout after sz field - */ - sz = min(info_size, offsetofend(struct xdp_link_info, attach_mode)); - memcpy(info, &opts.prog_id, sz); - memset((void *)info + sz, 0, info_size - sz); - - return 0; -} - int bpf_xdp_query_id(int ifindex, int flags, __u32 *prog_id) { LIBBPF_OPTS(bpf_xdp_query_opts, opts); @@ -463,11 +422,6 @@ int bpf_xdp_query_id(int ifindex, int flags, __u32 *prog_id) } -int bpf_get_link_xdp_id(int ifindex, __u32 *prog_id, __u32 flags) -{ - return bpf_xdp_query_id(ifindex, flags, prog_id); -} - typedef int (*qdisc_config_t)(struct libbpf_nla_req *req); static int clsact_config(struct libbpf_nla_req *req) diff --git a/tools/lib/bpf/relo_core.c b/tools/lib/bpf/relo_core.c index ba4453dfd1ed..c4b0e81ae293 100644 --- a/tools/lib/bpf/relo_core.c +++ b/tools/lib/bpf/relo_core.c @@ -95,6 +95,7 @@ static const char *core_relo_kind_str(enum bpf_core_relo_kind kind) case BPF_CORE_TYPE_ID_LOCAL: return "local_type_id"; case BPF_CORE_TYPE_ID_TARGET: return "target_type_id"; case BPF_CORE_TYPE_EXISTS: return "type_exists"; + case BPF_CORE_TYPE_MATCHES: return "type_matches"; case BPF_CORE_TYPE_SIZE: return "type_size"; case BPF_CORE_ENUMVAL_EXISTS: return "enumval_exists"; case BPF_CORE_ENUMVAL_VALUE: return "enumval_value"; @@ -123,6 +124,7 @@ static bool core_relo_is_type_based(enum bpf_core_relo_kind kind) case BPF_CORE_TYPE_ID_LOCAL: case BPF_CORE_TYPE_ID_TARGET: case BPF_CORE_TYPE_EXISTS: + case BPF_CORE_TYPE_MATCHES: case BPF_CORE_TYPE_SIZE: return true; default: @@ -141,6 +143,86 @@ static bool core_relo_is_enumval_based(enum bpf_core_relo_kind kind) } } +int __bpf_core_types_are_compat(const struct btf *local_btf, __u32 local_id, + const struct btf *targ_btf, __u32 targ_id, int level) +{ + const struct btf_type *local_type, *targ_type; + int depth = 32; /* max recursion depth */ + + /* caller made sure that names match (ignoring flavor suffix) */ + local_type = btf_type_by_id(local_btf, local_id); + targ_type = btf_type_by_id(targ_btf, targ_id); + if (!btf_kind_core_compat(local_type, targ_type)) + return 0; + +recur: + depth--; + if (depth < 0) + return -EINVAL; + + local_type = skip_mods_and_typedefs(local_btf, local_id, &local_id); + targ_type = skip_mods_and_typedefs(targ_btf, targ_id, &targ_id); + if (!local_type || !targ_type) + return -EINVAL; + + if (!btf_kind_core_compat(local_type, targ_type)) + return 0; + + switch (btf_kind(local_type)) { + case BTF_KIND_UNKN: + case BTF_KIND_STRUCT: + case BTF_KIND_UNION: + case BTF_KIND_ENUM: + case BTF_KIND_FWD: + case BTF_KIND_ENUM64: + return 1; + case BTF_KIND_INT: + /* just reject deprecated bitfield-like integers; all other + * integers are by default compatible between each other + */ + return btf_int_offset(local_type) == 0 && btf_int_offset(targ_type) == 0; + case BTF_KIND_PTR: + local_id = local_type->type; + targ_id = targ_type->type; + goto recur; + case BTF_KIND_ARRAY: + local_id = btf_array(local_type)->type; + targ_id = btf_array(targ_type)->type; + goto recur; + case BTF_KIND_FUNC_PROTO: { + struct btf_param *local_p = btf_params(local_type); + struct btf_param *targ_p = btf_params(targ_type); + __u16 local_vlen = btf_vlen(local_type); + __u16 targ_vlen = btf_vlen(targ_type); + int i, err; + + if (local_vlen != targ_vlen) + return 0; + + for (i = 0; i < local_vlen; i++, local_p++, targ_p++) { + if (level <= 0) + return -EINVAL; + + skip_mods_and_typedefs(local_btf, local_p->type, &local_id); + skip_mods_and_typedefs(targ_btf, targ_p->type, &targ_id); + err = __bpf_core_types_are_compat(local_btf, local_id, targ_btf, targ_id, + level - 1); + if (err <= 0) + return err; + } + + /* tail recurse for return type check */ + skip_mods_and_typedefs(local_btf, local_type->type, &local_id); + skip_mods_and_typedefs(targ_btf, targ_type->type, &targ_id); + goto recur; + } + default: + pr_warn("unexpected kind %s relocated, local [%d], target [%d]\n", + btf_kind_str(local_type), local_id, targ_id); + return 0; + } +} + /* * Turn bpf_core_relo into a low- and high-level spec representation, * validating correctness along the way, as well as calculating resulting @@ -167,11 +249,11 @@ static bool core_relo_is_enumval_based(enum bpf_core_relo_kind kind) * just a parsed access string representation): [0, 1, 2, 3]. * * High-level spec will capture only 3 points: - * - intial zero-index access by pointer (&s->... is the same as &s[0]...); + * - initial zero-index access by pointer (&s->... is the same as &s[0]...); * - field 'a' access (corresponds to '2' in low-level spec); * - array element #3 access (corresponds to '3' in low-level spec). * - * Type-based relocations (TYPE_EXISTS/TYPE_SIZE, + * Type-based relocations (TYPE_EXISTS/TYPE_MATCHES/TYPE_SIZE, * TYPE_ID_LOCAL/TYPE_ID_TARGET) don't capture any field information. Their * spec and raw_spec are kept empty. * @@ -186,7 +268,7 @@ int bpf_core_parse_spec(const char *prog_name, const struct btf *btf, struct bpf_core_accessor *acc; const struct btf_type *t; const char *name, *spec_str; - __u32 id; + __u32 id, name_off; __s64 sz; spec_str = btf__name_by_offset(btf, relo->access_str_off); @@ -231,11 +313,13 @@ int bpf_core_parse_spec(const char *prog_name, const struct btf *btf, spec->len++; if (core_relo_is_enumval_based(relo->kind)) { - if (!btf_is_enum(t) || spec->raw_len > 1 || access_idx >= btf_vlen(t)) + if (!btf_is_any_enum(t) || spec->raw_len > 1 || access_idx >= btf_vlen(t)) return -EINVAL; /* record enumerator name in a first accessor */ - acc->name = btf__name_by_offset(btf, btf_enum(t)[access_idx].name_off); + name_off = btf_is_enum(t) ? btf_enum(t)[access_idx].name_off + : btf_enum64(t)[access_idx].name_off; + acc->name = btf__name_by_offset(btf, name_off); return 0; } @@ -340,7 +424,7 @@ recur: if (btf_is_composite(local_type) && btf_is_composite(targ_type)) return 1; - if (btf_kind(local_type) != btf_kind(targ_type)) + if (!btf_kind_core_compat(local_type, targ_type)) return 0; switch (btf_kind(local_type)) { @@ -348,6 +432,7 @@ recur: case BTF_KIND_FLOAT: return 1; case BTF_KIND_FWD: + case BTF_KIND_ENUM64: case BTF_KIND_ENUM: { const char *local_name, *targ_name; size_t local_len, targ_len; @@ -477,6 +562,7 @@ static int bpf_core_spec_match(struct bpf_core_spec *local_spec, const struct bpf_core_accessor *local_acc; struct bpf_core_accessor *targ_acc; int i, sz, matched; + __u32 name_off; memset(targ_spec, 0, sizeof(*targ_spec)); targ_spec->btf = targ_btf; @@ -484,9 +570,14 @@ static int bpf_core_spec_match(struct bpf_core_spec *local_spec, targ_spec->relo_kind = local_spec->relo_kind; if (core_relo_is_type_based(local_spec->relo_kind)) { - return bpf_core_types_are_compat(local_spec->btf, - local_spec->root_type_id, - targ_btf, targ_id); + if (local_spec->relo_kind == BPF_CORE_TYPE_MATCHES) + return bpf_core_types_match(local_spec->btf, + local_spec->root_type_id, + targ_btf, targ_id); + else + return bpf_core_types_are_compat(local_spec->btf, + local_spec->root_type_id, + targ_btf, targ_id); } local_acc = &local_spec->spec[0]; @@ -494,18 +585,22 @@ static int bpf_core_spec_match(struct bpf_core_spec *local_spec, if (core_relo_is_enumval_based(local_spec->relo_kind)) { size_t local_essent_len, targ_essent_len; - const struct btf_enum *e; const char *targ_name; /* has to resolve to an enum */ targ_type = skip_mods_and_typedefs(targ_spec->btf, targ_id, &targ_id); - if (!btf_is_enum(targ_type)) + if (!btf_is_any_enum(targ_type)) return 0; local_essent_len = bpf_core_essential_name_len(local_acc->name); - for (i = 0, e = btf_enum(targ_type); i < btf_vlen(targ_type); i++, e++) { - targ_name = btf__name_by_offset(targ_spec->btf, e->name_off); + for (i = 0; i < btf_vlen(targ_type); i++) { + if (btf_is_enum(targ_type)) + name_off = btf_enum(targ_type)[i].name_off; + else + name_off = btf_enum64(targ_type)[i].name_off; + + targ_name = btf__name_by_offset(targ_spec->btf, name_off); targ_essent_len = bpf_core_essential_name_len(targ_name); if (targ_essent_len != local_essent_len) continue; @@ -583,7 +678,7 @@ static int bpf_core_spec_match(struct bpf_core_spec *local_spec, static int bpf_core_calc_field_relo(const char *prog_name, const struct bpf_core_relo *relo, const struct bpf_core_spec *spec, - __u32 *val, __u32 *field_sz, __u32 *type_id, + __u64 *val, __u32 *field_sz, __u32 *type_id, bool *validate) { const struct bpf_core_accessor *acc; @@ -680,8 +775,7 @@ static int bpf_core_calc_field_relo(const char *prog_name, *val = byte_sz; break; case BPF_CORE_FIELD_SIGNED: - /* enums will be assumed unsigned */ - *val = btf_is_enum(mt) || + *val = (btf_is_any_enum(mt) && BTF_INFO_KFLAG(mt->info)) || (btf_int_encoding(mt) & BTF_INT_SIGNED); if (validate) *validate = true; /* signedness is never ambiguous */ @@ -708,7 +802,7 @@ static int bpf_core_calc_field_relo(const char *prog_name, static int bpf_core_calc_type_relo(const struct bpf_core_relo *relo, const struct bpf_core_spec *spec, - __u32 *val, bool *validate) + __u64 *val, bool *validate) { __s64 sz; @@ -732,6 +826,7 @@ static int bpf_core_calc_type_relo(const struct bpf_core_relo *relo, *validate = false; break; case BPF_CORE_TYPE_EXISTS: + case BPF_CORE_TYPE_MATCHES: *val = 1; break; case BPF_CORE_TYPE_SIZE: @@ -751,10 +846,9 @@ static int bpf_core_calc_type_relo(const struct bpf_core_relo *relo, static int bpf_core_calc_enumval_relo(const struct bpf_core_relo *relo, const struct bpf_core_spec *spec, - __u32 *val) + __u64 *val) { const struct btf_type *t; - const struct btf_enum *e; switch (relo->kind) { case BPF_CORE_ENUMVAL_EXISTS: @@ -764,8 +858,10 @@ static int bpf_core_calc_enumval_relo(const struct bpf_core_relo *relo, if (!spec) return -EUCLEAN; /* request instruction poisoning */ t = btf_type_by_id(spec->btf, spec->spec[0].type_id); - e = btf_enum(t) + spec->spec[0].idx; - *val = e->val; + if (btf_is_enum(t)) + *val = btf_enum(t)[spec->spec[0].idx].val; + else + *val = btf_enum64_value(btf_enum64(t) + spec->spec[0].idx); break; default: return -EOPNOTSUPP; @@ -929,7 +1025,7 @@ int bpf_core_patch_insn(const char *prog_name, struct bpf_insn *insn, int insn_idx, const struct bpf_core_relo *relo, int relo_idx, const struct bpf_core_relo_res *res) { - __u32 orig_val, new_val; + __u64 orig_val, new_val; __u8 class; class = BPF_CLASS(insn->code); @@ -954,28 +1050,30 @@ poison: if (BPF_SRC(insn->code) != BPF_K) return -EINVAL; if (res->validate && insn->imm != orig_val) { - pr_warn("prog '%s': relo #%d: unexpected insn #%d (ALU/ALU64) value: got %u, exp %u -> %u\n", + pr_warn("prog '%s': relo #%d: unexpected insn #%d (ALU/ALU64) value: got %u, exp %llu -> %llu\n", prog_name, relo_idx, - insn_idx, insn->imm, orig_val, new_val); + insn_idx, insn->imm, (unsigned long long)orig_val, + (unsigned long long)new_val); return -EINVAL; } orig_val = insn->imm; insn->imm = new_val; - pr_debug("prog '%s': relo #%d: patched insn #%d (ALU/ALU64) imm %u -> %u\n", + pr_debug("prog '%s': relo #%d: patched insn #%d (ALU/ALU64) imm %llu -> %llu\n", prog_name, relo_idx, insn_idx, - orig_val, new_val); + (unsigned long long)orig_val, (unsigned long long)new_val); break; case BPF_LDX: case BPF_ST: case BPF_STX: if (res->validate && insn->off != orig_val) { - pr_warn("prog '%s': relo #%d: unexpected insn #%d (LDX/ST/STX) value: got %u, exp %u -> %u\n", - prog_name, relo_idx, insn_idx, insn->off, orig_val, new_val); + pr_warn("prog '%s': relo #%d: unexpected insn #%d (LDX/ST/STX) value: got %u, exp %llu -> %llu\n", + prog_name, relo_idx, insn_idx, insn->off, (unsigned long long)orig_val, + (unsigned long long)new_val); return -EINVAL; } if (new_val > SHRT_MAX) { - pr_warn("prog '%s': relo #%d: insn #%d (LDX/ST/STX) value too big: %u\n", - prog_name, relo_idx, insn_idx, new_val); + pr_warn("prog '%s': relo #%d: insn #%d (LDX/ST/STX) value too big: %llu\n", + prog_name, relo_idx, insn_idx, (unsigned long long)new_val); return -ERANGE; } if (res->fail_memsz_adjust) { @@ -987,8 +1085,9 @@ poison: orig_val = insn->off; insn->off = new_val; - pr_debug("prog '%s': relo #%d: patched insn #%d (LDX/ST/STX) off %u -> %u\n", - prog_name, relo_idx, insn_idx, orig_val, new_val); + pr_debug("prog '%s': relo #%d: patched insn #%d (LDX/ST/STX) off %llu -> %llu\n", + prog_name, relo_idx, insn_idx, (unsigned long long)orig_val, + (unsigned long long)new_val); if (res->new_sz != res->orig_sz) { int insn_bytes_sz, insn_bpf_sz; @@ -1024,20 +1123,20 @@ poison: return -EINVAL; } - imm = insn[0].imm + ((__u64)insn[1].imm << 32); + imm = (__u32)insn[0].imm | ((__u64)insn[1].imm << 32); if (res->validate && imm != orig_val) { - pr_warn("prog '%s': relo #%d: unexpected insn #%d (LDIMM64) value: got %llu, exp %u -> %u\n", + pr_warn("prog '%s': relo #%d: unexpected insn #%d (LDIMM64) value: got %llu, exp %llu -> %llu\n", prog_name, relo_idx, insn_idx, (unsigned long long)imm, - orig_val, new_val); + (unsigned long long)orig_val, (unsigned long long)new_val); return -EINVAL; } insn[0].imm = new_val; - insn[1].imm = 0; /* currently only 32-bit values are supported */ - pr_debug("prog '%s': relo #%d: patched insn #%d (LDIMM64) imm64 %llu -> %u\n", + insn[1].imm = new_val >> 32; + pr_debug("prog '%s': relo #%d: patched insn #%d (LDIMM64) imm64 %llu -> %llu\n", prog_name, relo_idx, insn_idx, - (unsigned long long)imm, new_val); + (unsigned long long)imm, (unsigned long long)new_val); break; } default: @@ -1057,7 +1156,6 @@ poison: int bpf_core_format_spec(char *buf, size_t buf_sz, const struct bpf_core_spec *spec) { const struct btf_type *t; - const struct btf_enum *e; const char *s; __u32 type_id; int i, len = 0; @@ -1086,10 +1184,23 @@ int bpf_core_format_spec(char *buf, size_t buf_sz, const struct bpf_core_spec *s if (core_relo_is_enumval_based(spec->relo_kind)) { t = skip_mods_and_typedefs(spec->btf, type_id, NULL); - e = btf_enum(t) + spec->raw_spec[0]; - s = btf__name_by_offset(spec->btf, e->name_off); + if (btf_is_enum(t)) { + const struct btf_enum *e; + const char *fmt_str; + + e = btf_enum(t) + spec->raw_spec[0]; + s = btf__name_by_offset(spec->btf, e->name_off); + fmt_str = BTF_INFO_KFLAG(t->info) ? "::%s = %d" : "::%s = %u"; + append_buf(fmt_str, s, e->val); + } else { + const struct btf_enum64 *e; + const char *fmt_str; - append_buf("::%s = %u", s, e->val); + e = btf_enum64(t) + spec->raw_spec[0]; + s = btf__name_by_offset(spec->btf, e->name_off); + fmt_str = BTF_INFO_KFLAG(t->info) ? "::%s = %lld" : "::%s = %llu"; + append_buf(fmt_str, s, (unsigned long long)btf_enum64_value(e)); + } return len; } @@ -1148,11 +1259,11 @@ int bpf_core_format_spec(char *buf, size_t buf_sz, const struct bpf_core_spec *s * 3. It is supported and expected that there might be multiple flavors * matching the spec. As long as all the specs resolve to the same set of * offsets across all candidates, there is no error. If there is any - * ambiguity, CO-RE relocation will fail. This is necessary to accomodate - * imprefection of BTF deduplication, which can cause slight duplication of + * ambiguity, CO-RE relocation will fail. This is necessary to accommodate + * imperfection of BTF deduplication, which can cause slight duplication of * the same BTF type, if some directly or indirectly referenced (by * pointer) type gets resolved to different actual types in different - * object files. If such situation occurs, deduplicated BTF will end up + * object files. If such a situation occurs, deduplicated BTF will end up * with two (or more) structurally identical types, which differ only in * types they refer to through pointer. This should be OK in most cases and * is not an error. @@ -1261,10 +1372,12 @@ int bpf_core_calc_relo_insn(const char *prog_name, * decision and value, otherwise it's dangerous to * proceed due to ambiguity */ - pr_warn("prog '%s': relo #%d: relocation decision ambiguity: %s %u != %s %u\n", + pr_warn("prog '%s': relo #%d: relocation decision ambiguity: %s %llu != %s %llu\n", prog_name, relo_idx, - cand_res.poison ? "failure" : "success", cand_res.new_val, - targ_res->poison ? "failure" : "success", targ_res->new_val); + cand_res.poison ? "failure" : "success", + (unsigned long long)cand_res.new_val, + targ_res->poison ? "failure" : "success", + (unsigned long long)targ_res->new_val); return -EINVAL; } @@ -1305,3 +1418,273 @@ int bpf_core_calc_relo_insn(const char *prog_name, return 0; } + +static bool bpf_core_names_match(const struct btf *local_btf, size_t local_name_off, + const struct btf *targ_btf, size_t targ_name_off) +{ + const char *local_n, *targ_n; + size_t local_len, targ_len; + + local_n = btf__name_by_offset(local_btf, local_name_off); + targ_n = btf__name_by_offset(targ_btf, targ_name_off); + + if (str_is_empty(targ_n)) + return str_is_empty(local_n); + + targ_len = bpf_core_essential_name_len(targ_n); + local_len = bpf_core_essential_name_len(local_n); + + return targ_len == local_len && strncmp(local_n, targ_n, local_len) == 0; +} + +static int bpf_core_enums_match(const struct btf *local_btf, const struct btf_type *local_t, + const struct btf *targ_btf, const struct btf_type *targ_t) +{ + __u16 local_vlen = btf_vlen(local_t); + __u16 targ_vlen = btf_vlen(targ_t); + int i, j; + + if (local_t->size != targ_t->size) + return 0; + + if (local_vlen > targ_vlen) + return 0; + + /* iterate over the local enum's variants and make sure each has + * a symbolic name correspondent in the target + */ + for (i = 0; i < local_vlen; i++) { + bool matched = false; + __u32 local_n_off, targ_n_off; + + local_n_off = btf_is_enum(local_t) ? btf_enum(local_t)[i].name_off : + btf_enum64(local_t)[i].name_off; + + for (j = 0; j < targ_vlen; j++) { + targ_n_off = btf_is_enum(targ_t) ? btf_enum(targ_t)[j].name_off : + btf_enum64(targ_t)[j].name_off; + + if (bpf_core_names_match(local_btf, local_n_off, targ_btf, targ_n_off)) { + matched = true; + break; + } + } + + if (!matched) + return 0; + } + return 1; +} + +static int bpf_core_composites_match(const struct btf *local_btf, const struct btf_type *local_t, + const struct btf *targ_btf, const struct btf_type *targ_t, + bool behind_ptr, int level) +{ + const struct btf_member *local_m = btf_members(local_t); + __u16 local_vlen = btf_vlen(local_t); + __u16 targ_vlen = btf_vlen(targ_t); + int i, j, err; + + if (local_vlen > targ_vlen) + return 0; + + /* check that all local members have a match in the target */ + for (i = 0; i < local_vlen; i++, local_m++) { + const struct btf_member *targ_m = btf_members(targ_t); + bool matched = false; + + for (j = 0; j < targ_vlen; j++, targ_m++) { + if (!bpf_core_names_match(local_btf, local_m->name_off, + targ_btf, targ_m->name_off)) + continue; + + err = __bpf_core_types_match(local_btf, local_m->type, targ_btf, + targ_m->type, behind_ptr, level - 1); + if (err < 0) + return err; + if (err > 0) { + matched = true; + break; + } + } + + if (!matched) + return 0; + } + return 1; +} + +/* Check that two types "match". This function assumes that root types were + * already checked for name match. + * + * The matching relation is defined as follows: + * - modifiers and typedefs are stripped (and, hence, effectively ignored) + * - generally speaking types need to be of same kind (struct vs. struct, union + * vs. union, etc.) + * - exceptions are struct/union behind a pointer which could also match a + * forward declaration of a struct or union, respectively, and enum vs. + * enum64 (see below) + * Then, depending on type: + * - integers: + * - match if size and signedness match + * - arrays & pointers: + * - target types are recursively matched + * - structs & unions: + * - local members need to exist in target with the same name + * - for each member we recursively check match unless it is already behind a + * pointer, in which case we only check matching names and compatible kind + * - enums: + * - local variants have to have a match in target by symbolic name (but not + * numeric value) + * - size has to match (but enum may match enum64 and vice versa) + * - function pointers: + * - number and position of arguments in local type has to match target + * - for each argument and the return value we recursively check match + */ +int __bpf_core_types_match(const struct btf *local_btf, __u32 local_id, const struct btf *targ_btf, + __u32 targ_id, bool behind_ptr, int level) +{ + const struct btf_type *local_t, *targ_t; + int depth = 32; /* max recursion depth */ + __u16 local_k, targ_k; + + if (level <= 0) + return -EINVAL; + + local_t = btf_type_by_id(local_btf, local_id); + targ_t = btf_type_by_id(targ_btf, targ_id); + +recur: + depth--; + if (depth < 0) + return -EINVAL; + + local_t = skip_mods_and_typedefs(local_btf, local_id, &local_id); + targ_t = skip_mods_and_typedefs(targ_btf, targ_id, &targ_id); + if (!local_t || !targ_t) + return -EINVAL; + + /* While the name check happens after typedefs are skipped, root-level + * typedefs would still be name-matched as that's the contract with + * callers. + */ + if (!bpf_core_names_match(local_btf, local_t->name_off, targ_btf, targ_t->name_off)) + return 0; + + local_k = btf_kind(local_t); + targ_k = btf_kind(targ_t); + + switch (local_k) { + case BTF_KIND_UNKN: + return local_k == targ_k; + case BTF_KIND_FWD: { + bool local_f = BTF_INFO_KFLAG(local_t->info); + + if (behind_ptr) { + if (local_k == targ_k) + return local_f == BTF_INFO_KFLAG(targ_t->info); + + /* for forward declarations kflag dictates whether the + * target is a struct (0) or union (1) + */ + return (targ_k == BTF_KIND_STRUCT && !local_f) || + (targ_k == BTF_KIND_UNION && local_f); + } else { + if (local_k != targ_k) + return 0; + + /* match if the forward declaration is for the same kind */ + return local_f == BTF_INFO_KFLAG(targ_t->info); + } + } + case BTF_KIND_ENUM: + case BTF_KIND_ENUM64: + if (!btf_is_any_enum(targ_t)) + return 0; + + return bpf_core_enums_match(local_btf, local_t, targ_btf, targ_t); + case BTF_KIND_STRUCT: + case BTF_KIND_UNION: + if (behind_ptr) { + bool targ_f = BTF_INFO_KFLAG(targ_t->info); + + if (local_k == targ_k) + return 1; + + if (targ_k != BTF_KIND_FWD) + return 0; + + return (local_k == BTF_KIND_UNION) == targ_f; + } else { + if (local_k != targ_k) + return 0; + + return bpf_core_composites_match(local_btf, local_t, targ_btf, targ_t, + behind_ptr, level); + } + case BTF_KIND_INT: { + __u8 local_sgn; + __u8 targ_sgn; + + if (local_k != targ_k) + return 0; + + local_sgn = btf_int_encoding(local_t) & BTF_INT_SIGNED; + targ_sgn = btf_int_encoding(targ_t) & BTF_INT_SIGNED; + + return local_t->size == targ_t->size && local_sgn == targ_sgn; + } + case BTF_KIND_PTR: + if (local_k != targ_k) + return 0; + + behind_ptr = true; + + local_id = local_t->type; + targ_id = targ_t->type; + goto recur; + case BTF_KIND_ARRAY: { + const struct btf_array *local_array = btf_array(local_t); + const struct btf_array *targ_array = btf_array(targ_t); + + if (local_k != targ_k) + return 0; + + if (local_array->nelems != targ_array->nelems) + return 0; + + local_id = local_array->type; + targ_id = targ_array->type; + goto recur; + } + case BTF_KIND_FUNC_PROTO: { + struct btf_param *local_p = btf_params(local_t); + struct btf_param *targ_p = btf_params(targ_t); + __u16 local_vlen = btf_vlen(local_t); + __u16 targ_vlen = btf_vlen(targ_t); + int i, err; + + if (local_k != targ_k) + return 0; + + if (local_vlen != targ_vlen) + return 0; + + for (i = 0; i < local_vlen; i++, local_p++, targ_p++) { + err = __bpf_core_types_match(local_btf, local_p->type, targ_btf, + targ_p->type, behind_ptr, level - 1); + if (err <= 0) + return err; + } + + /* tail recurse for return type check */ + local_id = local_t->type; + targ_id = targ_t->type; + goto recur; + } + default: + pr_warn("unexpected kind %s relocated, local [%d], target [%d]\n", + btf_kind_str(local_t), local_id, targ_id); + return 0; + } +} diff --git a/tools/lib/bpf/relo_core.h b/tools/lib/bpf/relo_core.h index 073039d8ca4f..1c0566daf8e8 100644 --- a/tools/lib/bpf/relo_core.h +++ b/tools/lib/bpf/relo_core.h @@ -46,9 +46,9 @@ struct bpf_core_spec { struct bpf_core_relo_res { /* expected value in the instruction, unless validate == false */ - __u32 orig_val; + __u64 orig_val; /* new value that needs to be patched up to */ - __u32 new_val; + __u64 new_val; /* relocation unsuccessful, poison instruction, but don't fail load */ bool poison; /* some relocations can't be validated against orig_val */ @@ -68,8 +68,14 @@ struct bpf_core_relo_res { __u32 new_type_id; }; +int __bpf_core_types_are_compat(const struct btf *local_btf, __u32 local_id, + const struct btf *targ_btf, __u32 targ_id, int level); int bpf_core_types_are_compat(const struct btf *local_btf, __u32 local_id, const struct btf *targ_btf, __u32 targ_id); +int __bpf_core_types_match(const struct btf *local_btf, __u32 local_id, const struct btf *targ_btf, + __u32 targ_id, bool behind_ptr, int level); +int bpf_core_types_match(const struct btf *local_btf, __u32 local_id, const struct btf *targ_btf, + __u32 targ_id); size_t bpf_core_essential_name_len(const char *name); diff --git a/tools/lib/bpf/usdt.bpf.h b/tools/lib/bpf/usdt.bpf.h index 4181fddb3687..4f2adc0bd6ca 100644 --- a/tools/lib/bpf/usdt.bpf.h +++ b/tools/lib/bpf/usdt.bpf.h @@ -6,7 +6,6 @@ #include <linux/errno.h> #include <bpf/bpf_helpers.h> #include <bpf/bpf_tracing.h> -#include <bpf/bpf_core_read.h> /* Below types and maps are internal implementation details of libbpf's USDT * support and are subjects to change. Also, bpf_usdt_xxx() API helpers should @@ -30,14 +29,6 @@ #ifndef BPF_USDT_MAX_IP_CNT #define BPF_USDT_MAX_IP_CNT (4 * BPF_USDT_MAX_SPEC_CNT) #endif -/* We use BPF CO-RE to detect support for BPF cookie from BPF side. This is - * the only dependency on CO-RE, so if it's undesirable, user can override - * BPF_USDT_HAS_BPF_COOKIE to specify whether to BPF cookie is supported or not. - */ -#ifndef BPF_USDT_HAS_BPF_COOKIE -#define BPF_USDT_HAS_BPF_COOKIE \ - bpf_core_enum_value_exists(enum bpf_func_id___usdt, BPF_FUNC_get_attach_cookie___usdt) -#endif enum __bpf_usdt_arg_type { BPF_USDT_ARG_CONST, @@ -83,15 +74,12 @@ struct { __type(value, __u32); } __bpf_usdt_ip_to_spec_id SEC(".maps") __weak; -/* don't rely on user's BPF code to have latest definition of bpf_func_id */ -enum bpf_func_id___usdt { - BPF_FUNC_get_attach_cookie___usdt = 0xBAD, /* value doesn't matter */ -}; +extern const _Bool LINUX_HAS_BPF_COOKIE __kconfig; static __always_inline int __bpf_usdt_spec_id(struct pt_regs *ctx) { - if (!BPF_USDT_HAS_BPF_COOKIE) { + if (!LINUX_HAS_BPF_COOKIE) { long ip = PT_REGS_IP(ctx); int *spec_id_ptr; diff --git a/tools/lib/bpf/usdt.c b/tools/lib/bpf/usdt.c index f1c9339cfbbc..d18e37982344 100644 --- a/tools/lib/bpf/usdt.c +++ b/tools/lib/bpf/usdt.c @@ -441,7 +441,7 @@ static int parse_elf_segs(Elf *elf, const char *path, struct elf_seg **segs, siz return 0; } -static int parse_lib_segs(int pid, const char *lib_path, struct elf_seg **segs, size_t *seg_cnt) +static int parse_vma_segs(int pid, const char *lib_path, struct elf_seg **segs, size_t *seg_cnt) { char path[PATH_MAX], line[PATH_MAX], mode[16]; size_t seg_start, seg_end, seg_off; @@ -531,35 +531,40 @@ err_out: return err; } -static struct elf_seg *find_elf_seg(struct elf_seg *segs, size_t seg_cnt, long addr, bool relative) +static struct elf_seg *find_elf_seg(struct elf_seg *segs, size_t seg_cnt, long virtaddr) { struct elf_seg *seg; int i; - if (relative) { - /* for shared libraries, address is relative offset and thus - * should be fall within logical offset-based range of - * [offset_start, offset_end) - */ - for (i = 0, seg = segs; i < seg_cnt; i++, seg++) { - if (seg->offset <= addr && addr < seg->offset + (seg->end - seg->start)) - return seg; - } - } else { - /* for binaries, address is absolute and thus should be within - * absolute address range of [seg_start, seg_end) - */ - for (i = 0, seg = segs; i < seg_cnt; i++, seg++) { - if (seg->start <= addr && addr < seg->end) - return seg; - } + /* for ELF binaries (both executables and shared libraries), we are + * given virtual address (absolute for executables, relative for + * libraries) which should match address range of [seg_start, seg_end) + */ + for (i = 0, seg = segs; i < seg_cnt; i++, seg++) { + if (seg->start <= virtaddr && virtaddr < seg->end) + return seg; } + return NULL; +} +static struct elf_seg *find_vma_seg(struct elf_seg *segs, size_t seg_cnt, long offset) +{ + struct elf_seg *seg; + int i; + + /* for VMA segments from /proc/<pid>/maps file, provided "address" is + * actually a file offset, so should be fall within logical + * offset-based range of [offset_start, offset_end) + */ + for (i = 0, seg = segs; i < seg_cnt; i++, seg++) { + if (seg->offset <= offset && offset < seg->offset + (seg->end - seg->start)) + return seg; + } return NULL; } -static int parse_usdt_note(Elf *elf, const char *path, long base_addr, - GElf_Nhdr *nhdr, const char *data, size_t name_off, size_t desc_off, +static int parse_usdt_note(Elf *elf, const char *path, GElf_Nhdr *nhdr, + const char *data, size_t name_off, size_t desc_off, struct usdt_note *usdt_note); static int parse_usdt_spec(struct usdt_spec *spec, const struct usdt_note *note, __u64 usdt_cookie); @@ -568,8 +573,8 @@ static int collect_usdt_targets(struct usdt_manager *man, Elf *elf, const char * const char *usdt_provider, const char *usdt_name, __u64 usdt_cookie, struct usdt_target **out_targets, size_t *out_target_cnt) { - size_t off, name_off, desc_off, seg_cnt = 0, lib_seg_cnt = 0, target_cnt = 0; - struct elf_seg *segs = NULL, *lib_segs = NULL; + size_t off, name_off, desc_off, seg_cnt = 0, vma_seg_cnt = 0, target_cnt = 0; + struct elf_seg *segs = NULL, *vma_segs = NULL; struct usdt_target *targets = NULL, *target; long base_addr = 0; Elf_Scn *notes_scn, *base_scn; @@ -613,8 +618,7 @@ static int collect_usdt_targets(struct usdt_manager *man, Elf *elf, const char * struct elf_seg *seg = NULL; void *tmp; - err = parse_usdt_note(elf, path, base_addr, &nhdr, - data->d_buf, name_off, desc_off, ¬e); + err = parse_usdt_note(elf, path, &nhdr, data->d_buf, name_off, desc_off, ¬e); if (err) goto err_out; @@ -648,36 +652,33 @@ static int collect_usdt_targets(struct usdt_manager *man, Elf *elf, const char * * * [0] https://sourceware.org/systemtap/wiki/UserSpaceProbeImplementation */ - usdt_rel_ip = usdt_abs_ip = note.loc_addr; - if (base_addr) { + usdt_abs_ip = note.loc_addr; + if (base_addr) usdt_abs_ip += base_addr - note.base_addr; - usdt_rel_ip += base_addr - note.base_addr; - } - if (ehdr.e_type == ET_EXEC) { - /* When attaching uprobes (which what USDTs basically - * are) kernel expects a relative IP to be specified, - * so if we are attaching to an executable ELF binary - * (i.e., not a shared library), we need to calculate - * proper relative IP based on ELF's load address - */ - seg = find_elf_seg(segs, seg_cnt, usdt_abs_ip, false /* relative */); - if (!seg) { - err = -ESRCH; - pr_warn("usdt: failed to find ELF program segment for '%s:%s' in '%s' at IP 0x%lx\n", - usdt_provider, usdt_name, path, usdt_abs_ip); - goto err_out; - } - if (!seg->is_exec) { - err = -ESRCH; - pr_warn("usdt: matched ELF binary '%s' segment [0x%lx, 0x%lx) for '%s:%s' at IP 0x%lx is not executable\n", - path, seg->start, seg->end, usdt_provider, usdt_name, - usdt_abs_ip); - goto err_out; - } + /* When attaching uprobes (which is what USDTs basically are) + * kernel expects file offset to be specified, not a relative + * virtual address, so we need to translate virtual address to + * file offset, for both ET_EXEC and ET_DYN binaries. + */ + seg = find_elf_seg(segs, seg_cnt, usdt_abs_ip); + if (!seg) { + err = -ESRCH; + pr_warn("usdt: failed to find ELF program segment for '%s:%s' in '%s' at IP 0x%lx\n", + usdt_provider, usdt_name, path, usdt_abs_ip); + goto err_out; + } + if (!seg->is_exec) { + err = -ESRCH; + pr_warn("usdt: matched ELF binary '%s' segment [0x%lx, 0x%lx) for '%s:%s' at IP 0x%lx is not executable\n", + path, seg->start, seg->end, usdt_provider, usdt_name, + usdt_abs_ip); + goto err_out; + } + /* translate from virtual address to file offset */ + usdt_rel_ip = usdt_abs_ip - seg->start + seg->offset; - usdt_rel_ip = usdt_abs_ip - (seg->start - seg->offset); - } else if (!man->has_bpf_cookie) { /* ehdr.e_type == ET_DYN */ + if (ehdr.e_type == ET_DYN && !man->has_bpf_cookie) { /* If we don't have BPF cookie support but need to * attach to a shared library, we'll need to know and * record absolute addresses of attach points due to @@ -697,9 +698,9 @@ static int collect_usdt_targets(struct usdt_manager *man, Elf *elf, const char * goto err_out; } - /* lib_segs are lazily initialized only if necessary */ - if (lib_seg_cnt == 0) { - err = parse_lib_segs(pid, path, &lib_segs, &lib_seg_cnt); + /* vma_segs are lazily initialized only if necessary */ + if (vma_seg_cnt == 0) { + err = parse_vma_segs(pid, path, &vma_segs, &vma_seg_cnt); if (err) { pr_warn("usdt: failed to get memory segments in PID %d for shared library '%s': %d\n", pid, path, err); @@ -707,7 +708,7 @@ static int collect_usdt_targets(struct usdt_manager *man, Elf *elf, const char * } } - seg = find_elf_seg(lib_segs, lib_seg_cnt, usdt_rel_ip, true /* relative */); + seg = find_vma_seg(vma_segs, vma_seg_cnt, usdt_rel_ip); if (!seg) { err = -ESRCH; pr_warn("usdt: failed to find shared lib memory segment for '%s:%s' in '%s' at relative IP 0x%lx\n", @@ -715,7 +716,7 @@ static int collect_usdt_targets(struct usdt_manager *man, Elf *elf, const char * goto err_out; } - usdt_abs_ip = seg->start + (usdt_rel_ip - seg->offset); + usdt_abs_ip = seg->start - seg->offset + usdt_rel_ip; } pr_debug("usdt: probe for '%s:%s' in %s '%s': addr 0x%lx base 0x%lx (resolved abs_ip 0x%lx rel_ip 0x%lx) args '%s' in segment [0x%lx, 0x%lx) at offset 0x%lx\n", @@ -723,7 +724,7 @@ static int collect_usdt_targets(struct usdt_manager *man, Elf *elf, const char * note.loc_addr, note.base_addr, usdt_abs_ip, usdt_rel_ip, note.args, seg ? seg->start : 0, seg ? seg->end : 0, seg ? seg->offset : 0); - /* Adjust semaphore address to be a relative offset */ + /* Adjust semaphore address to be a file offset */ if (note.sema_addr) { if (!man->has_sema_refcnt) { pr_warn("usdt: kernel doesn't support USDT semaphore refcounting for '%s:%s' in '%s'\n", @@ -732,7 +733,7 @@ static int collect_usdt_targets(struct usdt_manager *man, Elf *elf, const char * goto err_out; } - seg = find_elf_seg(segs, seg_cnt, note.sema_addr, false /* relative */); + seg = find_elf_seg(segs, seg_cnt, note.sema_addr); if (!seg) { err = -ESRCH; pr_warn("usdt: failed to find ELF loadable segment with semaphore of '%s:%s' in '%s' at 0x%lx\n", @@ -747,7 +748,7 @@ static int collect_usdt_targets(struct usdt_manager *man, Elf *elf, const char * goto err_out; } - usdt_sema_off = note.sema_addr - (seg->start - seg->offset); + usdt_sema_off = note.sema_addr - seg->start + seg->offset; pr_debug("usdt: sema for '%s:%s' in %s '%s': addr 0x%lx base 0x%lx (resolved 0x%lx) in segment [0x%lx, 0x%lx] at offset 0x%lx\n", usdt_provider, usdt_name, ehdr.e_type == ET_EXEC ? "exec" : "lib ", @@ -770,7 +771,7 @@ static int collect_usdt_targets(struct usdt_manager *man, Elf *elf, const char * target->rel_ip = usdt_rel_ip; target->sema_off = usdt_sema_off; - /* notes->args references strings from Elf itself, so they can + /* notes.args references strings from Elf itself, so they can * be referenced safely until elf_end() call */ target->spec_str = note.args; @@ -788,7 +789,7 @@ static int collect_usdt_targets(struct usdt_manager *man, Elf *elf, const char * err_out: free(segs); - free(lib_segs); + free(vma_segs); if (err < 0) free(targets); return err; @@ -1089,8 +1090,8 @@ err_out: /* Parse out USDT ELF note from '.note.stapsdt' section. * Logic inspired by perf's code. */ -static int parse_usdt_note(Elf *elf, const char *path, long base_addr, - GElf_Nhdr *nhdr, const char *data, size_t name_off, size_t desc_off, +static int parse_usdt_note(Elf *elf, const char *path, GElf_Nhdr *nhdr, + const char *data, size_t name_off, size_t desc_off, struct usdt_note *note) { const char *provider, *name, *args; diff --git a/tools/perf/util/bpf-loader.c b/tools/perf/util/bpf-loader.c index cdd6463a5b68..d2c9b09ddb48 100644 --- a/tools/perf/util/bpf-loader.c +++ b/tools/perf/util/bpf-loader.c @@ -9,6 +9,7 @@ #include <linux/bpf.h> #include <bpf/libbpf.h> #include <bpf/bpf.h> +#include <linux/filter.h> #include <linux/err.h> #include <linux/kernel.h> #include <linux/string.h> @@ -49,6 +50,7 @@ struct bpf_prog_priv { struct bpf_insn *insns_buf; int nr_types; int *type_mapping; + int *prologue_fds; }; struct bpf_perf_object { @@ -56,6 +58,11 @@ struct bpf_perf_object { struct bpf_object *obj; }; +struct bpf_preproc_result { + struct bpf_insn *new_insn_ptr; + int new_insn_cnt; +}; + static LIST_HEAD(bpf_objects_list); static struct hashmap *bpf_program_hash; static struct hashmap *bpf_map_hash; @@ -82,6 +89,7 @@ bpf_perf_object__next(struct bpf_perf_object *prev) (perf_obj) = (tmp), (tmp) = bpf_perf_object__next(tmp)) static bool libbpf_initialized; +static int libbpf_sec_handler; static int bpf_perf_object__add(struct bpf_object *obj) { @@ -95,12 +103,76 @@ static int bpf_perf_object__add(struct bpf_object *obj) return perf_obj ? 0 : -ENOMEM; } +static void *program_priv(const struct bpf_program *prog) +{ + void *priv; + + if (IS_ERR_OR_NULL(bpf_program_hash)) + return NULL; + if (!hashmap__find(bpf_program_hash, prog, &priv)) + return NULL; + return priv; +} + +static struct bpf_insn prologue_init_insn[] = { + BPF_MOV64_IMM(BPF_REG_2, 0), + BPF_MOV64_IMM(BPF_REG_3, 0), + BPF_MOV64_IMM(BPF_REG_4, 0), + BPF_MOV64_IMM(BPF_REG_5, 0), +}; + +static int libbpf_prog_prepare_load_fn(struct bpf_program *prog, + struct bpf_prog_load_opts *opts __maybe_unused, + long cookie __maybe_unused) +{ + size_t init_size_cnt = ARRAY_SIZE(prologue_init_insn); + size_t orig_insn_cnt, insn_cnt, init_size, orig_size; + struct bpf_prog_priv *priv = program_priv(prog); + const struct bpf_insn *orig_insn; + struct bpf_insn *insn; + + if (IS_ERR_OR_NULL(priv)) { + pr_debug("bpf: failed to get private field\n"); + return -BPF_LOADER_ERRNO__INTERNAL; + } + + if (!priv->need_prologue) + return 0; + + /* prepend initialization code to program instructions */ + orig_insn = bpf_program__insns(prog); + orig_insn_cnt = bpf_program__insn_cnt(prog); + init_size = init_size_cnt * sizeof(*insn); + orig_size = orig_insn_cnt * sizeof(*insn); + + insn_cnt = orig_insn_cnt + init_size_cnt; + insn = malloc(insn_cnt * sizeof(*insn)); + if (!insn) + return -ENOMEM; + + memcpy(insn, prologue_init_insn, init_size); + memcpy((char *) insn + init_size, orig_insn, orig_size); + bpf_program__set_insns(prog, insn, insn_cnt); + return 0; +} + static int libbpf_init(void) { + LIBBPF_OPTS(libbpf_prog_handler_opts, handler_opts, + .prog_prepare_load_fn = libbpf_prog_prepare_load_fn, + ); + if (libbpf_initialized) return 0; libbpf_set_print(libbpf_perf_print); + libbpf_sec_handler = libbpf_register_prog_handler(NULL, BPF_PROG_TYPE_KPROBE, + 0, &handler_opts); + if (libbpf_sec_handler < 0) { + pr_debug("bpf: failed to register libbpf section handler: %d\n", + libbpf_sec_handler); + return -BPF_LOADER_ERRNO__INTERNAL; + } libbpf_initialized = true; return 0; } @@ -184,14 +256,31 @@ struct bpf_object *bpf__prepare_load(const char *filename, bool source) return obj; } +static void close_prologue_programs(struct bpf_prog_priv *priv) +{ + struct perf_probe_event *pev; + int i, fd; + + if (!priv->need_prologue) + return; + pev = &priv->pev; + for (i = 0; i < pev->ntevs; i++) { + fd = priv->prologue_fds[i]; + if (fd != -1) + close(fd); + } +} + static void clear_prog_priv(const struct bpf_program *prog __maybe_unused, void *_priv) { struct bpf_prog_priv *priv = _priv; + close_prologue_programs(priv); cleanup_perf_probe_events(&priv->pev, 1); zfree(&priv->insns_buf); + zfree(&priv->prologue_fds); zfree(&priv->type_mapping); zfree(&priv->sys_name); zfree(&priv->evt_name); @@ -239,17 +328,6 @@ static bool ptr_equal(const void *key1, const void *key2, return key1 == key2; } -static void *program_priv(const struct bpf_program *prog) -{ - void *priv; - - if (IS_ERR_OR_NULL(bpf_program_hash)) - return NULL; - if (!hashmap__find(bpf_program_hash, prog, &priv)) - return NULL; - return priv; -} - static int program_set_priv(struct bpf_program *prog, void *priv) { void *old_priv; @@ -554,8 +632,8 @@ static int bpf__prepare_probe(void) static int preproc_gen_prologue(struct bpf_program *prog, int n, - struct bpf_insn *orig_insns, int orig_insns_cnt, - struct bpf_prog_prep_result *res) + const struct bpf_insn *orig_insns, int orig_insns_cnt, + struct bpf_preproc_result *res) { struct bpf_prog_priv *priv = program_priv(prog); struct probe_trace_event *tev; @@ -603,7 +681,6 @@ preproc_gen_prologue(struct bpf_program *prog, int n, res->new_insn_ptr = buf; res->new_insn_cnt = prologue_cnt + orig_insns_cnt; - res->pfd = NULL; return 0; errout: @@ -711,7 +788,7 @@ static int hook_load_preprocessor(struct bpf_program *prog) struct bpf_prog_priv *priv = program_priv(prog); struct perf_probe_event *pev; bool need_prologue = false; - int err, i; + int i; if (IS_ERR_OR_NULL(priv)) { pr_debug("Internal error when hook preprocessor\n"); @@ -749,6 +826,13 @@ static int hook_load_preprocessor(struct bpf_program *prog) return -ENOMEM; } + priv->prologue_fds = malloc(sizeof(int) * pev->ntevs); + if (!priv->prologue_fds) { + pr_debug("Not enough memory: alloc prologue fds failed\n"); + return -ENOMEM; + } + memset(priv->prologue_fds, -1, sizeof(int) * pev->ntevs); + priv->type_mapping = malloc(sizeof(int) * pev->ntevs); if (!priv->type_mapping) { pr_debug("Not enough memory: alloc type_mapping failed\n"); @@ -757,13 +841,7 @@ static int hook_load_preprocessor(struct bpf_program *prog) memset(priv->type_mapping, -1, sizeof(int) * pev->ntevs); - err = map_prologue(pev, priv->type_mapping, &priv->nr_types); - if (err) - return err; - - err = bpf_program__set_prep(prog, priv->nr_types, - preproc_gen_prologue); - return err; + return map_prologue(pev, priv->type_mapping, &priv->nr_types); } int bpf__probe(struct bpf_object *obj) @@ -870,6 +948,77 @@ int bpf__unprobe(struct bpf_object *obj) return ret; } +static int bpf_object__load_prologue(struct bpf_object *obj) +{ + int init_cnt = ARRAY_SIZE(prologue_init_insn); + const struct bpf_insn *orig_insns; + struct bpf_preproc_result res; + struct perf_probe_event *pev; + struct bpf_program *prog; + int orig_insns_cnt; + + bpf_object__for_each_program(prog, obj) { + struct bpf_prog_priv *priv = program_priv(prog); + int err, i, fd; + + if (IS_ERR_OR_NULL(priv)) { + pr_debug("bpf: failed to get private field\n"); + return -BPF_LOADER_ERRNO__INTERNAL; + } + + if (!priv->need_prologue) + continue; + + /* + * For each program that needs prologue we do following: + * + * - take its current instructions and use them + * to generate the new code with prologue + * - load new instructions with bpf_prog_load + * and keep the fd in prologue_fds + * - new fd will be used in bpf__foreach_event + * to connect this program with perf evsel + */ + orig_insns = bpf_program__insns(prog); + orig_insns_cnt = bpf_program__insn_cnt(prog); + + pev = &priv->pev; + for (i = 0; i < pev->ntevs; i++) { + /* + * Skipping artificall prologue_init_insn instructions + * (init_cnt), so the prologue can be generated instead + * of them. + */ + err = preproc_gen_prologue(prog, i, + orig_insns + init_cnt, + orig_insns_cnt - init_cnt, + &res); + if (err) + return err; + + fd = bpf_prog_load(bpf_program__get_type(prog), + bpf_program__name(prog), "GPL", + res.new_insn_ptr, + res.new_insn_cnt, NULL); + if (fd < 0) { + char bf[128]; + + libbpf_strerror(-errno, bf, sizeof(bf)); + pr_debug("bpf: load objects with prologue failed: err=%d: (%s)\n", + -errno, bf); + return -errno; + } + priv->prologue_fds[i] = fd; + } + /* + * We no longer need the original program, + * we can unload it. + */ + bpf_program__unload(prog); + } + return 0; +} + int bpf__load(struct bpf_object *obj) { int err; @@ -881,7 +1030,7 @@ int bpf__load(struct bpf_object *obj) pr_debug("bpf: load objects failed: err=%d: (%s)\n", err, bf); return err; } - return 0; + return bpf_object__load_prologue(obj); } int bpf__foreach_event(struct bpf_object *obj, @@ -916,13 +1065,10 @@ int bpf__foreach_event(struct bpf_object *obj, for (i = 0; i < pev->ntevs; i++) { tev = &pev->tevs[i]; - if (priv->need_prologue) { - int type = priv->type_mapping[i]; - - fd = bpf_program__nth_fd(prog, type); - } else { + if (priv->need_prologue) + fd = priv->prologue_fds[i]; + else fd = bpf_program__fd(prog); - } if (fd < 0) { pr_debug("bpf: failed to get file descriptor\n"); diff --git a/tools/testing/selftests/bpf/.gitignore b/tools/testing/selftests/bpf/.gitignore index 595565eb68c0..3a8cb2404ea6 100644 --- a/tools/testing/selftests/bpf/.gitignore +++ b/tools/testing/selftests/bpf/.gitignore @@ -41,5 +41,6 @@ test_cpp /bench *.ko *.tmp -xdpxceiver +xskxceiver xdp_redirect_multi +xdp_synproxy diff --git a/tools/testing/selftests/bpf/DENYLIST b/tools/testing/selftests/bpf/DENYLIST new file mode 100644 index 000000000000..939de574fc7f --- /dev/null +++ b/tools/testing/selftests/bpf/DENYLIST @@ -0,0 +1,6 @@ +# TEMPORARY +get_stack_raw_tp # spams with kernel warnings until next bpf -> bpf-next merge +stacktrace_build_id_nmi +stacktrace_build_id +task_fd_query_rawtp +varlen diff --git a/tools/testing/selftests/bpf/DENYLIST.s390x b/tools/testing/selftests/bpf/DENYLIST.s390x new file mode 100644 index 000000000000..e33cab34d22f --- /dev/null +++ b/tools/testing/selftests/bpf/DENYLIST.s390x @@ -0,0 +1,67 @@ +# TEMPORARY +atomics # attach(add): actual -524 <= expected 0 (trampoline) +bpf_iter_setsockopt # JIT does not support calling kernel function (kfunc) +bloom_filter_map # failed to find kernel BTF type ID of '__x64_sys_getpgid': -3 (?) +bpf_tcp_ca # JIT does not support calling kernel function (kfunc) +bpf_loop # attaches to __x64_sys_nanosleep +bpf_mod_race # BPF trampoline +bpf_nf # JIT does not support calling kernel function +core_read_macros # unknown func bpf_probe_read#4 (overlapping) +d_path # failed to auto-attach program 'prog_stat': -524 (trampoline) +dummy_st_ops # test_run unexpected error: -524 (errno 524) (trampoline) +fentry_fexit # fentry attach failed: -524 (trampoline) +fentry_test # fentry_first_attach unexpected error: -524 (trampoline) +fexit_bpf2bpf # freplace_attach_trace unexpected error: -524 (trampoline) +fexit_sleep # fexit_skel_load fexit skeleton failed (trampoline) +fexit_stress # fexit attach failed prog 0 failed: -524 (trampoline) +fexit_test # fexit_first_attach unexpected error: -524 (trampoline) +get_func_args_test # trampoline +get_func_ip_test # get_func_ip_test__attach unexpected error: -524 (trampoline) +get_stack_raw_tp # user_stack corrupted user stack (no backchain userspace) +kfree_skb # attach fentry unexpected error: -524 (trampoline) +kfunc_call # 'bpf_prog_active': not found in kernel BTF (?) +ksyms_module # test_ksyms_module__open_and_load unexpected error: -9 (?) +ksyms_module_libbpf # JIT does not support calling kernel function (kfunc) +ksyms_module_lskel # test_ksyms_module_lskel__open_and_load unexpected error: -9 (?) +modify_return # modify_return attach failed: -524 (trampoline) +module_attach # skel_attach skeleton attach failed: -524 (trampoline) +mptcp +kprobe_multi_test # relies on fentry +netcnt # failed to load BPF skeleton 'netcnt_prog': -7 (?) +probe_user # check_kprobe_res wrong kprobe res from probe read (?) +recursion # skel_attach unexpected error: -524 (trampoline) +ringbuf # skel_load skeleton load failed (?) +sk_assign # Can't read on server: Invalid argument (?) +sk_lookup # endianness problem +sk_storage_tracing # test_sk_storage_tracing__attach unexpected error: -524 (trampoline) +skc_to_unix_sock # could not attach BPF object unexpected error: -524 (trampoline) +socket_cookie # prog_attach unexpected error: -524 (trampoline) +stacktrace_build_id # compare_map_keys stackid_hmap vs. stackmap err -2 errno 2 (?) +tailcalls # tail_calls are not allowed in non-JITed programs with bpf-to-bpf calls (?) +task_local_storage # failed to auto-attach program 'trace_exit_creds': -524 (trampoline) +test_bpffs # bpffs test failed 255 (iterator) +test_bprm_opts # failed to auto-attach program 'secure_exec': -524 (trampoline) +test_ima # failed to auto-attach program 'ima': -524 (trampoline) +test_local_storage # failed to auto-attach program 'unlink_hook': -524 (trampoline) +test_lsm # failed to find kernel BTF type ID of '__x64_sys_setdomainname': -3 (?) +test_overhead # attach_fentry unexpected error: -524 (trampoline) +test_profiler # unknown func bpf_probe_read_str#45 (overlapping) +timer # failed to auto-attach program 'test1': -524 (trampoline) +timer_crash # trampoline +timer_mim # failed to auto-attach program 'test1': -524 (trampoline) +trace_ext # failed to auto-attach program 'test_pkt_md_access_new': -524 (trampoline) +trace_printk # trace_printk__load unexpected error: -2 (errno 2) (?) +trace_vprintk # trace_vprintk__open_and_load unexpected error: -9 (?) +trampoline_count # prog 'prog1': failed to attach: ERROR: strerror_r(-524)=22 (trampoline) +verif_stats # trace_vprintk__open_and_load unexpected error: -9 (?) +vmlinux # failed to auto-attach program 'handle__fentry': -524 (trampoline) +xdp_adjust_tail # case-128 err 0 errno 28 retval 1 size 128 expect-size 3520 (?) +xdp_bonding # failed to auto-attach program 'trace_on_entry': -524 (trampoline) +xdp_bpf2bpf # failed to auto-attach program 'trace_on_entry': -524 (trampoline) +map_kptr # failed to open_and_load program: -524 (trampoline) +bpf_cookie # failed to open_and_load program: -524 (trampoline) +xdp_do_redirect # prog_run_max_size unexpected error: -22 (errno 22) +send_signal # intermittently fails to receive signal +select_reuseport # intermittently fails on new s390x setup +xdp_synproxy # JIT does not support calling kernel function (kfunc) +unpriv_bpf_disabled # fentry diff --git a/tools/testing/selftests/bpf/Makefile b/tools/testing/selftests/bpf/Makefile index 2d3c8c8f558a..8d59ec7f4c2d 100644 --- a/tools/testing/selftests/bpf/Makefile +++ b/tools/testing/selftests/bpf/Makefile @@ -82,7 +82,7 @@ TEST_PROGS_EXTENDED := with_addr.sh \ TEST_GEN_PROGS_EXTENDED = test_sock_addr test_skb_cgroup_id_user \ flow_dissector_load test_flow_dissector test_tcp_check_syncookie_user \ test_lirc_mode2_user xdping test_cpp runqslower bench bpf_testmod.ko \ - xdpxceiver xdp_redirect_multi + xskxceiver xdp_redirect_multi xdp_synproxy TEST_CUSTOM_PROGS = $(OUTPUT)/urandom_read @@ -168,17 +168,26 @@ $(OUTPUT)/%:%.c $(call msg,BINARY,,$@) $(Q)$(LINK.c) $^ $(LDLIBS) -o $@ +# LLVM's ld.lld doesn't support all the architectures, so use it only on x86 +ifeq ($(SRCARCH),x86) +LLD := lld +else +LLD := ld +endif + # Filter out -static for liburandom_read.so and its dependent targets so that static builds # do not fail. Static builds leave urandom_read relying on system-wide shared libraries. $(OUTPUT)/liburandom_read.so: urandom_read_lib1.c urandom_read_lib2.c $(call msg,LIB,,$@) - $(Q)$(CC) $(filter-out -static,$(CFLAGS) $(LDFLAGS)) $^ $(LDLIBS) -fPIC -shared -o $@ + $(Q)$(CLANG) $(filter-out -static,$(CFLAGS) $(LDFLAGS)) $^ $(LDLIBS) \ + -fuse-ld=$(LLD) -Wl,-znoseparate-code -fPIC -shared -o $@ $(OUTPUT)/urandom_read: urandom_read.c urandom_read_aux.c $(OUTPUT)/liburandom_read.so $(call msg,BINARY,,$@) - $(Q)$(CC) $(filter-out -static,$(CFLAGS) $(LDFLAGS)) $(filter %.c,$^) \ - liburandom_read.so $(LDLIBS) \ - -Wl,-rpath=. -Wl,--build-id=sha1 -o $@ + $(Q)$(CLANG) $(filter-out -static,$(CFLAGS) $(LDFLAGS)) $(filter %.c,$^) \ + liburandom_read.so $(LDLIBS) \ + -fuse-ld=$(LLD) -Wl,-znoseparate-code \ + -Wl,-rpath=. -Wl,--build-id=sha1 -o $@ $(OUTPUT)/bpf_testmod.ko: $(VMLINUX_BTF) $(wildcard bpf_testmod/Makefile bpf_testmod/*.[ch]) $(call msg,MOD,,$@) @@ -221,6 +230,8 @@ $(OUTPUT)/xdping: $(TESTING_HELPERS) $(OUTPUT)/flow_dissector_load: $(TESTING_HELPERS) $(OUTPUT)/test_maps: $(TESTING_HELPERS) $(OUTPUT)/test_verifier: $(TESTING_HELPERS) $(CAP_HELPERS) +$(OUTPUT)/xsk.o: $(BPFOBJ) +$(OUTPUT)/xskxceiver: $(OUTPUT)/xsk.o BPFTOOL ?= $(DEFAULT_BPFTOOL) $(DEFAULT_BPFTOOL): $(wildcard $(BPFTOOLDIR)/*.[ch] $(BPFTOOLDIR)/Makefile) \ @@ -502,6 +513,7 @@ TRUNNER_EXTRA_SOURCES := test_progs.c cgroup_helpers.c trace_helpers.c \ cap_helpers.c TRUNNER_EXTRA_FILES := $(OUTPUT)/urandom_read $(OUTPUT)/bpf_testmod.ko \ $(OUTPUT)/liburandom_read.so \ + $(OUTPUT)/xdp_synproxy \ ima_setup.sh \ $(wildcard progs/btf_dump_test_case_*.c) TRUNNER_BPF_BUILD_RULE := CLANG_BPF_BUILD_RULE @@ -560,6 +572,9 @@ $(OUTPUT)/bench_ringbufs.o: $(OUTPUT)/ringbuf_bench.skel.h \ $(OUTPUT)/bench_bloom_filter_map.o: $(OUTPUT)/bloom_filter_bench.skel.h $(OUTPUT)/bench_bpf_loop.o: $(OUTPUT)/bpf_loop_bench.skel.h $(OUTPUT)/bench_strncmp.o: $(OUTPUT)/strncmp_bench.skel.h +$(OUTPUT)/bench_bpf_hashmap_full_update.o: $(OUTPUT)/bpf_hashmap_full_update_bench.skel.h +$(OUTPUT)/bench_local_storage.o: $(OUTPUT)/local_storage_bench.skel.h +$(OUTPUT)/bench_local_storage_rcu_tasks_trace.o: $(OUTPUT)/local_storage_rcu_tasks_trace_bench.skel.h $(OUTPUT)/bench.o: bench.h testing_helpers.h $(BPFOBJ) $(OUTPUT)/bench: LDLIBS += -lm $(OUTPUT)/bench: $(OUTPUT)/bench.o \ @@ -571,13 +586,18 @@ $(OUTPUT)/bench: $(OUTPUT)/bench.o \ $(OUTPUT)/bench_ringbufs.o \ $(OUTPUT)/bench_bloom_filter_map.o \ $(OUTPUT)/bench_bpf_loop.o \ - $(OUTPUT)/bench_strncmp.o + $(OUTPUT)/bench_strncmp.o \ + $(OUTPUT)/bench_bpf_hashmap_full_update.o \ + $(OUTPUT)/bench_local_storage.o \ + $(OUTPUT)/bench_local_storage_rcu_tasks_trace.o $(call msg,BINARY,,$@) $(Q)$(CC) $(CFLAGS) $(LDFLAGS) $(filter %.a %.o,$^) $(LDLIBS) -o $@ EXTRA_CLEAN := $(TEST_CUSTOM_PROGS) $(SCRATCH_DIR) $(HOST_SCRATCH_DIR) \ prog_tests/tests.h map_tests/tests.h verifier/tests.h \ feature bpftool \ - $(addprefix $(OUTPUT)/,*.o *.skel.h *.lskel.h *.subskel.h no_alu32 bpf_gcc bpf_testmod.ko) + $(addprefix $(OUTPUT)/,*.o *.skel.h *.lskel.h *.subskel.h \ + no_alu32 bpf_gcc bpf_testmod.ko \ + liburandom_read.so) .PHONY: docs docs-clean diff --git a/tools/testing/selftests/bpf/bench.c b/tools/testing/selftests/bpf/bench.c index f061cc20e776..c1f20a147462 100644 --- a/tools/testing/selftests/bpf/bench.c +++ b/tools/testing/selftests/bpf/bench.c @@ -79,6 +79,43 @@ void hits_drops_report_progress(int iter, struct bench_res *res, long delta_ns) hits_per_sec, hits_per_prod, drops_per_sec, hits_per_sec + drops_per_sec); } +void +grace_period_latency_basic_stats(struct bench_res res[], int res_cnt, struct basic_stats *gp_stat) +{ + int i; + + memset(gp_stat, 0, sizeof(struct basic_stats)); + + for (i = 0; i < res_cnt; i++) + gp_stat->mean += res[i].gp_ns / 1000.0 / (double)res[i].gp_ct / (0.0 + res_cnt); + +#define IT_MEAN_DIFF (res[i].gp_ns / 1000.0 / (double)res[i].gp_ct - gp_stat->mean) + if (res_cnt > 1) { + for (i = 0; i < res_cnt; i++) + gp_stat->stddev += (IT_MEAN_DIFF * IT_MEAN_DIFF) / (res_cnt - 1.0); + } + gp_stat->stddev = sqrt(gp_stat->stddev); +#undef IT_MEAN_DIFF +} + +void +grace_period_ticks_basic_stats(struct bench_res res[], int res_cnt, struct basic_stats *gp_stat) +{ + int i; + + memset(gp_stat, 0, sizeof(struct basic_stats)); + for (i = 0; i < res_cnt; i++) + gp_stat->mean += res[i].stime / (double)res[i].gp_ct / (0.0 + res_cnt); + +#define IT_MEAN_DIFF (res[i].stime / (double)res[i].gp_ct - gp_stat->mean) + if (res_cnt > 1) { + for (i = 0; i < res_cnt; i++) + gp_stat->stddev += (IT_MEAN_DIFF * IT_MEAN_DIFF) / (res_cnt - 1.0); + } + gp_stat->stddev = sqrt(gp_stat->stddev); +#undef IT_MEAN_DIFF +} + void hits_drops_report_final(struct bench_res res[], int res_cnt) { int i; @@ -150,6 +187,53 @@ void ops_report_final(struct bench_res res[], int res_cnt) printf("latency %8.3lf ns/op\n", 1000.0 / hits_mean * env.producer_cnt); } +void local_storage_report_progress(int iter, struct bench_res *res, + long delta_ns) +{ + double important_hits_per_sec, hits_per_sec; + double delta_sec = delta_ns / 1000000000.0; + + hits_per_sec = res->hits / 1000000.0 / delta_sec; + important_hits_per_sec = res->important_hits / 1000000.0 / delta_sec; + + printf("Iter %3d (%7.3lfus): ", iter, (delta_ns - 1000000000) / 1000.0); + + printf("hits %8.3lfM/s ", hits_per_sec); + printf("important_hits %8.3lfM/s\n", important_hits_per_sec); +} + +void local_storage_report_final(struct bench_res res[], int res_cnt) +{ + double important_hits_mean = 0.0, important_hits_stddev = 0.0; + double hits_mean = 0.0, hits_stddev = 0.0; + int i; + + for (i = 0; i < res_cnt; i++) { + hits_mean += res[i].hits / 1000000.0 / (0.0 + res_cnt); + important_hits_mean += res[i].important_hits / 1000000.0 / (0.0 + res_cnt); + } + + if (res_cnt > 1) { + for (i = 0; i < res_cnt; i++) { + hits_stddev += (hits_mean - res[i].hits / 1000000.0) * + (hits_mean - res[i].hits / 1000000.0) / + (res_cnt - 1.0); + important_hits_stddev += + (important_hits_mean - res[i].important_hits / 1000000.0) * + (important_hits_mean - res[i].important_hits / 1000000.0) / + (res_cnt - 1.0); + } + + hits_stddev = sqrt(hits_stddev); + important_hits_stddev = sqrt(important_hits_stddev); + } + printf("Summary: hits throughput %8.3lf \u00B1 %5.3lf M ops/s, ", + hits_mean, hits_stddev); + printf("hits latency %8.3lf ns/op, ", 1000.0 / hits_mean); + printf("important_hits throughput %8.3lf \u00B1 %5.3lf M ops/s\n", + important_hits_mean, important_hits_stddev); +} + const char *argp_program_version = "benchmark"; const char *argp_program_bug_address = "<bpf@vger.kernel.org>"; const char argp_program_doc[] = @@ -188,13 +272,18 @@ static const struct argp_option opts[] = { extern struct argp bench_ringbufs_argp; extern struct argp bench_bloom_map_argp; extern struct argp bench_bpf_loop_argp; +extern struct argp bench_local_storage_argp; +extern struct argp bench_local_storage_rcu_tasks_trace_argp; extern struct argp bench_strncmp_argp; static const struct argp_child bench_parsers[] = { { &bench_ringbufs_argp, 0, "Ring buffers benchmark", 0 }, { &bench_bloom_map_argp, 0, "Bloom filter map benchmark", 0 }, { &bench_bpf_loop_argp, 0, "bpf_loop helper benchmark", 0 }, + { &bench_local_storage_argp, 0, "local_storage benchmark", 0 }, { &bench_strncmp_argp, 0, "bpf_strncmp helper benchmark", 0 }, + { &bench_local_storage_rcu_tasks_trace_argp, 0, + "local_storage RCU Tasks Trace slowdown benchmark", 0 }, {}, }; @@ -396,6 +485,11 @@ extern const struct bench bench_hashmap_with_bloom; extern const struct bench bench_bpf_loop; extern const struct bench bench_strncmp_no_helper; extern const struct bench bench_strncmp_helper; +extern const struct bench bench_bpf_hashmap_full_update; +extern const struct bench bench_local_storage_cache_seq_get; +extern const struct bench bench_local_storage_cache_interleaved_get; +extern const struct bench bench_local_storage_cache_hashmap_control; +extern const struct bench bench_local_storage_tasks_trace; static const struct bench *benchs[] = { &bench_count_global, @@ -430,6 +524,11 @@ static const struct bench *benchs[] = { &bench_bpf_loop, &bench_strncmp_no_helper, &bench_strncmp_helper, + &bench_bpf_hashmap_full_update, + &bench_local_storage_cache_seq_get, + &bench_local_storage_cache_interleaved_get, + &bench_local_storage_cache_hashmap_control, + &bench_local_storage_tasks_trace, }; static void setup_benchmark() diff --git a/tools/testing/selftests/bpf/bench.h b/tools/testing/selftests/bpf/bench.h index fb3e213df3dc..d748255877e2 100644 --- a/tools/testing/selftests/bpf/bench.h +++ b/tools/testing/selftests/bpf/bench.h @@ -30,10 +30,19 @@ struct env { struct cpu_set cons_cpus; }; +struct basic_stats { + double mean; + double stddev; +}; + struct bench_res { long hits; long drops; long false_hits; + long important_hits; + unsigned long gp_ns; + unsigned long gp_ct; + unsigned int stime; }; struct bench { @@ -61,6 +70,13 @@ void false_hits_report_progress(int iter, struct bench_res *res, long delta_ns); void false_hits_report_final(struct bench_res res[], int res_cnt); void ops_report_progress(int iter, struct bench_res *res, long delta_ns); void ops_report_final(struct bench_res res[], int res_cnt); +void local_storage_report_progress(int iter, struct bench_res *res, + long delta_ns); +void local_storage_report_final(struct bench_res res[], int res_cnt); +void grace_period_latency_basic_stats(struct bench_res res[], int res_cnt, + struct basic_stats *gp_stat); +void grace_period_ticks_basic_stats(struct bench_res res[], int res_cnt, + struct basic_stats *gp_stat); static inline __u64 get_time_ns(void) { diff --git a/tools/testing/selftests/bpf/benchs/bench_bpf_hashmap_full_update.c b/tools/testing/selftests/bpf/benchs/bench_bpf_hashmap_full_update.c new file mode 100644 index 000000000000..cec51e0ff4b8 --- /dev/null +++ b/tools/testing/selftests/bpf/benchs/bench_bpf_hashmap_full_update.c @@ -0,0 +1,96 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2022 Bytedance */ + +#include <argp.h> +#include "bench.h" +#include "bpf_hashmap_full_update_bench.skel.h" +#include "bpf_util.h" + +/* BPF triggering benchmarks */ +static struct ctx { + struct bpf_hashmap_full_update_bench *skel; +} ctx; + +#define MAX_LOOP_NUM 10000 + +static void validate(void) +{ + if (env.consumer_cnt != 1) { + fprintf(stderr, "benchmark doesn't support multi-consumer!\n"); + exit(1); + } +} + +static void *producer(void *input) +{ + while (true) { + /* trigger the bpf program */ + syscall(__NR_getpgid); + } + + return NULL; +} + +static void *consumer(void *input) +{ + return NULL; +} + +static void measure(struct bench_res *res) +{ +} + +static void setup(void) +{ + struct bpf_link *link; + int map_fd, i, max_entries; + + setup_libbpf(); + + ctx.skel = bpf_hashmap_full_update_bench__open_and_load(); + if (!ctx.skel) { + fprintf(stderr, "failed to open skeleton\n"); + exit(1); + } + + ctx.skel->bss->nr_loops = MAX_LOOP_NUM; + + link = bpf_program__attach(ctx.skel->progs.benchmark); + if (!link) { + fprintf(stderr, "failed to attach program!\n"); + exit(1); + } + + /* fill hash_map */ + map_fd = bpf_map__fd(ctx.skel->maps.hash_map_bench); + max_entries = bpf_map__max_entries(ctx.skel->maps.hash_map_bench); + for (i = 0; i < max_entries; i++) + bpf_map_update_elem(map_fd, &i, &i, BPF_ANY); +} + +void hashmap_report_final(struct bench_res res[], int res_cnt) +{ + unsigned int nr_cpus = bpf_num_possible_cpus(); + int i; + + for (i = 0; i < nr_cpus; i++) { + u64 time = ctx.skel->bss->percpu_time[i]; + + if (!time) + continue; + + printf("%d:hash_map_full_perf %lld events per sec\n", + i, ctx.skel->bss->nr_loops * 1000000000ll / time); + } +} + +const struct bench bench_bpf_hashmap_full_update = { + .name = "bpf-hashmap-ful-update", + .validate = validate, + .setup = setup, + .producer_thread = producer, + .consumer_thread = consumer, + .measure = measure, + .report_progress = NULL, + .report_final = hashmap_report_final, +}; diff --git a/tools/testing/selftests/bpf/benchs/bench_local_storage.c b/tools/testing/selftests/bpf/benchs/bench_local_storage.c new file mode 100644 index 000000000000..5a378c84e81f --- /dev/null +++ b/tools/testing/selftests/bpf/benchs/bench_local_storage.c @@ -0,0 +1,287 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2022 Meta Platforms, Inc. and affiliates. */ + +#include <argp.h> +#include <linux/btf.h> + +#include "local_storage_bench.skel.h" +#include "bench.h" + +#include <test_btf.h> + +static struct { + __u32 nr_maps; + __u32 hashmap_nr_keys_used; +} args = { + .nr_maps = 1000, + .hashmap_nr_keys_used = 1000, +}; + +enum { + ARG_NR_MAPS = 6000, + ARG_HASHMAP_NR_KEYS_USED = 6001, +}; + +static const struct argp_option opts[] = { + { "nr_maps", ARG_NR_MAPS, "NR_MAPS", 0, + "Set number of local_storage maps"}, + { "hashmap_nr_keys_used", ARG_HASHMAP_NR_KEYS_USED, "NR_KEYS", + 0, "When doing hashmap test, set number of hashmap keys test uses"}, + {}, +}; + +static error_t parse_arg(int key, char *arg, struct argp_state *state) +{ + long ret; + + switch (key) { + case ARG_NR_MAPS: + ret = strtol(arg, NULL, 10); + if (ret < 1 || ret > UINT_MAX) { + fprintf(stderr, "invalid nr_maps"); + argp_usage(state); + } + args.nr_maps = ret; + break; + case ARG_HASHMAP_NR_KEYS_USED: + ret = strtol(arg, NULL, 10); + if (ret < 1 || ret > UINT_MAX) { + fprintf(stderr, "invalid hashmap_nr_keys_used"); + argp_usage(state); + } + args.hashmap_nr_keys_used = ret; + break; + default: + return ARGP_ERR_UNKNOWN; + } + + return 0; +} + +const struct argp bench_local_storage_argp = { + .options = opts, + .parser = parse_arg, +}; + +/* Keep in sync w/ array of maps in bpf */ +#define MAX_NR_MAPS 1000 +/* keep in sync w/ same define in bpf */ +#define HASHMAP_SZ 4194304 + +static void validate(void) +{ + if (env.producer_cnt != 1) { + fprintf(stderr, "benchmark doesn't support multi-producer!\n"); + exit(1); + } + if (env.consumer_cnt != 1) { + fprintf(stderr, "benchmark doesn't support multi-consumer!\n"); + exit(1); + } + + if (args.nr_maps > MAX_NR_MAPS) { + fprintf(stderr, "nr_maps must be <= 1000\n"); + exit(1); + } + + if (args.hashmap_nr_keys_used > HASHMAP_SZ) { + fprintf(stderr, "hashmap_nr_keys_used must be <= %u\n", HASHMAP_SZ); + exit(1); + } +} + +static struct { + struct local_storage_bench *skel; + void *bpf_obj; + struct bpf_map *array_of_maps; +} ctx; + +static void prepopulate_hashmap(int fd) +{ + int i, key, val; + + /* local_storage gets will have BPF_LOCAL_STORAGE_GET_F_CREATE flag set, so + * populate the hashmap for a similar comparison + */ + for (i = 0; i < HASHMAP_SZ; i++) { + key = val = i; + if (bpf_map_update_elem(fd, &key, &val, 0)) { + fprintf(stderr, "Error prepopulating hashmap (key %d)\n", key); + exit(1); + } + } +} + +static void __setup(struct bpf_program *prog, bool hashmap) +{ + struct bpf_map *inner_map; + int i, fd, mim_fd, err; + + LIBBPF_OPTS(bpf_map_create_opts, create_opts); + + if (!hashmap) + create_opts.map_flags = BPF_F_NO_PREALLOC; + + ctx.skel->rodata->num_maps = args.nr_maps; + ctx.skel->rodata->hashmap_num_keys = args.hashmap_nr_keys_used; + inner_map = bpf_map__inner_map(ctx.array_of_maps); + create_opts.btf_key_type_id = bpf_map__btf_key_type_id(inner_map); + create_opts.btf_value_type_id = bpf_map__btf_value_type_id(inner_map); + + err = local_storage_bench__load(ctx.skel); + if (err) { + fprintf(stderr, "Error loading skeleton\n"); + goto err_out; + } + + create_opts.btf_fd = bpf_object__btf_fd(ctx.skel->obj); + + mim_fd = bpf_map__fd(ctx.array_of_maps); + if (mim_fd < 0) { + fprintf(stderr, "Error getting map_in_map fd\n"); + goto err_out; + } + + for (i = 0; i < args.nr_maps; i++) { + if (hashmap) + fd = bpf_map_create(BPF_MAP_TYPE_HASH, NULL, sizeof(int), + sizeof(int), HASHMAP_SZ, &create_opts); + else + fd = bpf_map_create(BPF_MAP_TYPE_TASK_STORAGE, NULL, sizeof(int), + sizeof(int), 0, &create_opts); + if (fd < 0) { + fprintf(stderr, "Error creating map %d: %d\n", i, fd); + goto err_out; + } + + if (hashmap) + prepopulate_hashmap(fd); + + err = bpf_map_update_elem(mim_fd, &i, &fd, 0); + if (err) { + fprintf(stderr, "Error updating array-of-maps w/ map %d\n", i); + goto err_out; + } + } + + if (!bpf_program__attach(prog)) { + fprintf(stderr, "Error attaching bpf program\n"); + goto err_out; + } + + return; +err_out: + exit(1); +} + +static void hashmap_setup(void) +{ + struct local_storage_bench *skel; + + setup_libbpf(); + + skel = local_storage_bench__open(); + ctx.skel = skel; + ctx.array_of_maps = skel->maps.array_of_hash_maps; + skel->rodata->use_hashmap = 1; + skel->rodata->interleave = 0; + + __setup(skel->progs.get_local, true); +} + +static void local_storage_cache_get_setup(void) +{ + struct local_storage_bench *skel; + + setup_libbpf(); + + skel = local_storage_bench__open(); + ctx.skel = skel; + ctx.array_of_maps = skel->maps.array_of_local_storage_maps; + skel->rodata->use_hashmap = 0; + skel->rodata->interleave = 0; + + __setup(skel->progs.get_local, false); +} + +static void local_storage_cache_get_interleaved_setup(void) +{ + struct local_storage_bench *skel; + + setup_libbpf(); + + skel = local_storage_bench__open(); + ctx.skel = skel; + ctx.array_of_maps = skel->maps.array_of_local_storage_maps; + skel->rodata->use_hashmap = 0; + skel->rodata->interleave = 1; + + __setup(skel->progs.get_local, false); +} + +static void measure(struct bench_res *res) +{ + res->hits = atomic_swap(&ctx.skel->bss->hits, 0); + res->important_hits = atomic_swap(&ctx.skel->bss->important_hits, 0); +} + +static inline void trigger_bpf_program(void) +{ + syscall(__NR_getpgid); +} + +static void *consumer(void *input) +{ + return NULL; +} + +static void *producer(void *input) +{ + while (true) + trigger_bpf_program(); + + return NULL; +} + +/* cache sequential and interleaved get benchs test local_storage get + * performance, specifically they demonstrate performance cliff of + * current list-plus-cache local_storage model. + * + * cache sequential get: call bpf_task_storage_get on n maps in order + * cache interleaved get: like "sequential get", but interleave 4 calls to the + * 'important' map (idx 0 in array_of_maps) for every 10 calls. Goal + * is to mimic environment where many progs are accessing their local_storage + * maps, with 'our' prog needing to access its map more often than others + */ +const struct bench bench_local_storage_cache_seq_get = { + .name = "local-storage-cache-seq-get", + .validate = validate, + .setup = local_storage_cache_get_setup, + .producer_thread = producer, + .consumer_thread = consumer, + .measure = measure, + .report_progress = local_storage_report_progress, + .report_final = local_storage_report_final, +}; + +const struct bench bench_local_storage_cache_interleaved_get = { + .name = "local-storage-cache-int-get", + .validate = validate, + .setup = local_storage_cache_get_interleaved_setup, + .producer_thread = producer, + .consumer_thread = consumer, + .measure = measure, + .report_progress = local_storage_report_progress, + .report_final = local_storage_report_final, +}; + +const struct bench bench_local_storage_cache_hashmap_control = { + .name = "local-storage-cache-hashmap-control", + .validate = validate, + .setup = hashmap_setup, + .producer_thread = producer, + .consumer_thread = consumer, + .measure = measure, + .report_progress = local_storage_report_progress, + .report_final = local_storage_report_final, +}; diff --git a/tools/testing/selftests/bpf/benchs/bench_local_storage_rcu_tasks_trace.c b/tools/testing/selftests/bpf/benchs/bench_local_storage_rcu_tasks_trace.c new file mode 100644 index 000000000000..43f109d93130 --- /dev/null +++ b/tools/testing/selftests/bpf/benchs/bench_local_storage_rcu_tasks_trace.c @@ -0,0 +1,281 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2022 Meta Platforms, Inc. and affiliates. */ + +#include <argp.h> + +#include <sys/prctl.h> +#include "local_storage_rcu_tasks_trace_bench.skel.h" +#include "bench.h" + +#include <signal.h> + +static struct { + __u32 nr_procs; + __u32 kthread_pid; + bool quiet; +} args = { + .nr_procs = 1000, + .kthread_pid = 0, + .quiet = false, +}; + +enum { + ARG_NR_PROCS = 7000, + ARG_KTHREAD_PID = 7001, + ARG_QUIET = 7002, +}; + +static const struct argp_option opts[] = { + { "nr_procs", ARG_NR_PROCS, "NR_PROCS", 0, + "Set number of user processes to spin up"}, + { "kthread_pid", ARG_KTHREAD_PID, "PID", 0, + "Pid of rcu_tasks_trace kthread for ticks tracking"}, + { "quiet", ARG_QUIET, "{0,1}", 0, + "If true, don't report progress"}, + {}, +}; + +static error_t parse_arg(int key, char *arg, struct argp_state *state) +{ + long ret; + + switch (key) { + case ARG_NR_PROCS: + ret = strtol(arg, NULL, 10); + if (ret < 1 || ret > UINT_MAX) { + fprintf(stderr, "invalid nr_procs\n"); + argp_usage(state); + } + args.nr_procs = ret; + break; + case ARG_KTHREAD_PID: + ret = strtol(arg, NULL, 10); + if (ret < 1) { + fprintf(stderr, "invalid kthread_pid\n"); + argp_usage(state); + } + args.kthread_pid = ret; + break; + case ARG_QUIET: + ret = strtol(arg, NULL, 10); + if (ret < 0 || ret > 1) { + fprintf(stderr, "invalid quiet %ld\n", ret); + argp_usage(state); + } + args.quiet = ret; + break; +break; + default: + return ARGP_ERR_UNKNOWN; + } + + return 0; +} + +const struct argp bench_local_storage_rcu_tasks_trace_argp = { + .options = opts, + .parser = parse_arg, +}; + +#define MAX_SLEEP_PROCS 150000 + +static void validate(void) +{ + if (env.producer_cnt != 1) { + fprintf(stderr, "benchmark doesn't support multi-producer!\n"); + exit(1); + } + if (env.consumer_cnt != 1) { + fprintf(stderr, "benchmark doesn't support multi-consumer!\n"); + exit(1); + } + + if (args.nr_procs > MAX_SLEEP_PROCS) { + fprintf(stderr, "benchmark supports up to %u sleeper procs!\n", + MAX_SLEEP_PROCS); + exit(1); + } +} + +static long kthread_pid_ticks(void) +{ + char procfs_path[100]; + long stime; + FILE *f; + + if (!args.kthread_pid) + return -1; + + sprintf(procfs_path, "/proc/%u/stat", args.kthread_pid); + f = fopen(procfs_path, "r"); + if (!f) { + fprintf(stderr, "couldn't open %s, exiting\n", procfs_path); + goto err_out; + } + if (fscanf(f, "%*s %*s %*s %*s %*s %*s %*s %*s %*s %*s %*s %*s %*s %*s %ld", &stime) != 1) { + fprintf(stderr, "fscanf of %s failed, exiting\n", procfs_path); + goto err_out; + } + fclose(f); + return stime; + +err_out: + if (f) + fclose(f); + exit(1); + return 0; +} + +static struct { + struct local_storage_rcu_tasks_trace_bench *skel; + long prev_kthread_stime; +} ctx; + +static void sleep_and_loop(void) +{ + while (true) { + sleep(rand() % 4); + syscall(__NR_getpgid); + } +} + +static void local_storage_tasks_trace_setup(void) +{ + int i, err, forkret, runner_pid; + + runner_pid = getpid(); + + for (i = 0; i < args.nr_procs; i++) { + forkret = fork(); + if (forkret < 0) { + fprintf(stderr, "Error forking sleeper proc %u of %u, exiting\n", i, + args.nr_procs); + goto err_out; + } + + if (!forkret) { + err = prctl(PR_SET_PDEATHSIG, SIGKILL); + if (err < 0) { + fprintf(stderr, "prctl failed with err %d, exiting\n", errno); + goto err_out; + } + + if (getppid() != runner_pid) { + fprintf(stderr, "Runner died while spinning up procs, exiting\n"); + goto err_out; + } + sleep_and_loop(); + } + } + printf("Spun up %u procs (our pid %d)\n", args.nr_procs, runner_pid); + + setup_libbpf(); + + ctx.skel = local_storage_rcu_tasks_trace_bench__open_and_load(); + if (!ctx.skel) { + fprintf(stderr, "Error doing open_and_load, exiting\n"); + goto err_out; + } + + ctx.prev_kthread_stime = kthread_pid_ticks(); + + if (!bpf_program__attach(ctx.skel->progs.get_local)) { + fprintf(stderr, "Error attaching bpf program\n"); + goto err_out; + } + + if (!bpf_program__attach(ctx.skel->progs.pregp_step)) { + fprintf(stderr, "Error attaching bpf program\n"); + goto err_out; + } + + if (!bpf_program__attach(ctx.skel->progs.postgp)) { + fprintf(stderr, "Error attaching bpf program\n"); + goto err_out; + } + + return; +err_out: + exit(1); +} + +static void measure(struct bench_res *res) +{ + long ticks; + + res->gp_ct = atomic_swap(&ctx.skel->bss->gp_hits, 0); + res->gp_ns = atomic_swap(&ctx.skel->bss->gp_times, 0); + ticks = kthread_pid_ticks(); + res->stime = ticks - ctx.prev_kthread_stime; + ctx.prev_kthread_stime = ticks; +} + +static void *consumer(void *input) +{ + return NULL; +} + +static void *producer(void *input) +{ + while (true) + syscall(__NR_getpgid); + return NULL; +} + +static void report_progress(int iter, struct bench_res *res, long delta_ns) +{ + if (ctx.skel->bss->unexpected) { + fprintf(stderr, "Error: Unexpected order of bpf prog calls (postgp after pregp)."); + fprintf(stderr, "Data can't be trusted, exiting\n"); + exit(1); + } + + if (args.quiet) + return; + + printf("Iter %d\t avg tasks_trace grace period latency\t%lf ns\n", + iter, res->gp_ns / (double)res->gp_ct); + printf("Iter %d\t avg ticks per tasks_trace grace period\t%lf\n", + iter, res->stime / (double)res->gp_ct); +} + +static void report_final(struct bench_res res[], int res_cnt) +{ + struct basic_stats gp_stat; + + grace_period_latency_basic_stats(res, res_cnt, &gp_stat); + printf("SUMMARY tasks_trace grace period latency"); + printf("\tavg %.3lf us\tstddev %.3lf us\n", gp_stat.mean, gp_stat.stddev); + grace_period_ticks_basic_stats(res, res_cnt, &gp_stat); + printf("SUMMARY ticks per tasks_trace grace period"); + printf("\tavg %.3lf\tstddev %.3lf\n", gp_stat.mean, gp_stat.stddev); +} + +/* local-storage-tasks-trace: Benchmark performance of BPF local_storage's use + * of RCU Tasks-Trace. + * + * Stress RCU Tasks Trace by forking many tasks, all of which do no work aside + * from sleep() loop, and creating/destroying BPF task-local storage on wakeup. + * The number of forked tasks is configurable. + * + * exercising code paths which call call_rcu_tasks_trace while there are many + * thousands of tasks on the system should result in RCU Tasks-Trace having to + * do a noticeable amount of work. + * + * This should be observable by measuring rcu_tasks_trace_kthread CPU usage + * after the grace period has ended, or by measuring grace period latency. + * + * This benchmark uses both approaches, attaching to rcu_tasks_trace_pregp_step + * and rcu_tasks_trace_postgp functions to measure grace period latency and + * using /proc/PID/stat to measure rcu_tasks_trace_kthread kernel ticks + */ +const struct bench bench_local_storage_tasks_trace = { + .name = "local-storage-tasks-trace", + .validate = validate, + .setup = local_storage_tasks_trace_setup, + .producer_thread = producer, + .consumer_thread = consumer, + .measure = measure, + .report_progress = report_progress, + .report_final = report_final, +}; diff --git a/tools/testing/selftests/bpf/benchs/run_bench_bpf_hashmap_full_update.sh b/tools/testing/selftests/bpf/benchs/run_bench_bpf_hashmap_full_update.sh new file mode 100755 index 000000000000..1e2de838f9fa --- /dev/null +++ b/tools/testing/selftests/bpf/benchs/run_bench_bpf_hashmap_full_update.sh @@ -0,0 +1,11 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 + +source ./benchs/run_common.sh + +set -eufo pipefail + +nr_threads=`expr $(cat /proc/cpuinfo | grep "processor"| wc -l) - 1` +summary=$($RUN_BENCH -p $nr_threads bpf-hashmap-ful-update) +printf "$summary" +printf "\n" diff --git a/tools/testing/selftests/bpf/benchs/run_bench_local_storage.sh b/tools/testing/selftests/bpf/benchs/run_bench_local_storage.sh new file mode 100755 index 000000000000..2eb2b513a173 --- /dev/null +++ b/tools/testing/selftests/bpf/benchs/run_bench_local_storage.sh @@ -0,0 +1,24 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 + +source ./benchs/run_common.sh + +set -eufo pipefail + +header "Hashmap Control" +for i in 10 1000 10000 100000 4194304; do +subtitle "num keys: $i" + summarize_local_storage "hashmap (control) sequential get: "\ + "$(./bench --nr_maps 1 --hashmap_nr_keys_used=$i local-storage-cache-hashmap-control)" + printf "\n" +done + +header "Local Storage" +for i in 1 10 16 17 24 32 100 1000; do +subtitle "num_maps: $i" + summarize_local_storage "local_storage cache sequential get: "\ + "$(./bench --nr_maps $i local-storage-cache-seq-get)" + summarize_local_storage "local_storage cache interleaved get: "\ + "$(./bench --nr_maps $i local-storage-cache-int-get)" + printf "\n" +done diff --git a/tools/testing/selftests/bpf/benchs/run_bench_local_storage_rcu_tasks_trace.sh b/tools/testing/selftests/bpf/benchs/run_bench_local_storage_rcu_tasks_trace.sh new file mode 100755 index 000000000000..5dac1f02892c --- /dev/null +++ b/tools/testing/selftests/bpf/benchs/run_bench_local_storage_rcu_tasks_trace.sh @@ -0,0 +1,11 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 + +kthread_pid=`pgrep rcu_tasks_trace_kthread` + +if [ -z $kthread_pid ]; then + echo "error: Couldn't find rcu_tasks_trace_kthread" + exit 1 +fi + +./bench --nr_procs 15000 --kthread_pid $kthread_pid -d 600 --quiet 1 local-storage-tasks-trace diff --git a/tools/testing/selftests/bpf/benchs/run_common.sh b/tools/testing/selftests/bpf/benchs/run_common.sh index 6c5e6023a69f..d9f40af82006 100644 --- a/tools/testing/selftests/bpf/benchs/run_common.sh +++ b/tools/testing/selftests/bpf/benchs/run_common.sh @@ -41,6 +41,16 @@ function ops() echo "$*" | sed -E "s/.*latency\s+([0-9]+\.[0-9]+\sns\/op).*/\1/" } +function local_storage() +{ + echo -n "hits throughput: " + echo -n "$*" | sed -E "s/.* hits throughput\s+([0-9]+\.[0-9]+ ± [0-9]+\.[0-9]+\sM\sops\/s).*/\1/" + echo -n -e ", hits latency: " + echo -n "$*" | sed -E "s/.* hits latency\s+([0-9]+\.[0-9]+\sns\/op).*/\1/" + echo -n ", important_hits throughput: " + echo "$*" | sed -E "s/.*important_hits throughput\s+([0-9]+\.[0-9]+ ± [0-9]+\.[0-9]+\sM\sops\/s).*/\1/" +} + function total() { echo "$*" | sed -E "s/.*total operations\s+([0-9]+\.[0-9]+ ± [0-9]+\.[0-9]+M\/s).*/\1/" @@ -67,6 +77,13 @@ function summarize_ops() printf "%-20s %s\n" "$bench" "$(ops $summary)" } +function summarize_local_storage() +{ + bench="$1" + summary=$(echo $2 | tail -n1) + printf "%-20s %s\n" "$bench" "$(local_storage $summary)" +} + function summarize_total() { bench="$1" diff --git a/tools/testing/selftests/bpf/bpf_legacy.h b/tools/testing/selftests/bpf/bpf_legacy.h index 719ab56cdb5d..845209581440 100644 --- a/tools/testing/selftests/bpf/bpf_legacy.h +++ b/tools/testing/selftests/bpf/bpf_legacy.h @@ -2,15 +2,6 @@ #ifndef __BPF_LEGACY__ #define __BPF_LEGACY__ -#define BPF_ANNOTATE_KV_PAIR(name, type_key, type_val) \ - struct ____btf_map_##name { \ - type_key key; \ - type_val value; \ - }; \ - struct ____btf_map_##name \ - __attribute__ ((section(".maps." #name), used)) \ - ____btf_map_##name = { } - /* llvm builtin functions that eBPF C program may use to * emit BPF_LD_ABS and BPF_LD_IND instructions */ diff --git a/tools/testing/selftests/bpf/bpf_testmod/bpf_testmod.c b/tools/testing/selftests/bpf/bpf_testmod/bpf_testmod.c index e585e1cefc77..792cb15bac40 100644 --- a/tools/testing/selftests/bpf/bpf_testmod/bpf_testmod.c +++ b/tools/testing/selftests/bpf/bpf_testmod/bpf_testmod.c @@ -148,13 +148,13 @@ static struct bin_attribute bin_attr_bpf_testmod_file __ro_after_init = { .write = bpf_testmod_test_write, }; -BTF_SET_START(bpf_testmod_check_kfunc_ids) -BTF_ID(func, bpf_testmod_test_mod_kfunc) -BTF_SET_END(bpf_testmod_check_kfunc_ids) +BTF_SET8_START(bpf_testmod_check_kfunc_ids) +BTF_ID_FLAGS(func, bpf_testmod_test_mod_kfunc) +BTF_SET8_END(bpf_testmod_check_kfunc_ids) static const struct btf_kfunc_id_set bpf_testmod_kfunc_set = { - .owner = THIS_MODULE, - .check_set = &bpf_testmod_check_kfunc_ids, + .owner = THIS_MODULE, + .set = &bpf_testmod_check_kfunc_ids, }; extern int bpf_fentry_test1(int a); diff --git a/tools/testing/selftests/bpf/btf_helpers.c b/tools/testing/selftests/bpf/btf_helpers.c index b5941d514e17..1c1c2c26690a 100644 --- a/tools/testing/selftests/bpf/btf_helpers.c +++ b/tools/testing/selftests/bpf/btf_helpers.c @@ -26,11 +26,12 @@ static const char * const btf_kind_str_mapping[] = { [BTF_KIND_FLOAT] = "FLOAT", [BTF_KIND_DECL_TAG] = "DECL_TAG", [BTF_KIND_TYPE_TAG] = "TYPE_TAG", + [BTF_KIND_ENUM64] = "ENUM64", }; static const char *btf_kind_str(__u16 kind) { - if (kind > BTF_KIND_TYPE_TAG) + if (kind > BTF_KIND_ENUM64) return "UNKNOWN"; return btf_kind_str_mapping[kind]; } @@ -139,14 +140,32 @@ int fprintf_btf_type_raw(FILE *out, const struct btf *btf, __u32 id) } case BTF_KIND_ENUM: { const struct btf_enum *v = btf_enum(t); + const char *fmt_str; - fprintf(out, " size=%u vlen=%u", t->size, vlen); + fmt_str = btf_kflag(t) ? "\n\t'%s' val=%d" : "\n\t'%s' val=%u"; + fprintf(out, " encoding=%s size=%u vlen=%u", + btf_kflag(t) ? "SIGNED" : "UNSIGNED", t->size, vlen); for (i = 0; i < vlen; i++, v++) { - fprintf(out, "\n\t'%s' val=%u", + fprintf(out, fmt_str, btf_str(btf, v->name_off), v->val); } break; } + case BTF_KIND_ENUM64: { + const struct btf_enum64 *v = btf_enum64(t); + const char *fmt_str; + + fmt_str = btf_kflag(t) ? "\n\t'%s' val=%lld" : "\n\t'%s' val=%llu"; + + fprintf(out, " encoding=%s size=%u vlen=%u", + btf_kflag(t) ? "SIGNED" : "UNSIGNED", t->size, vlen); + for (i = 0; i < vlen; i++, v++) { + fprintf(out, fmt_str, + btf_str(btf, v->name_off), + ((__u64)v->val_hi32 << 32) | v->val_lo32); + } + break; + } case BTF_KIND_FWD: fprintf(out, " fwd_kind=%s", btf_kflag(t) ? "union" : "struct"); break; diff --git a/tools/testing/selftests/bpf/config b/tools/testing/selftests/bpf/config index 3b3edc0fc8a6..fabf0c014349 100644 --- a/tools/testing/selftests/bpf/config +++ b/tools/testing/selftests/bpf/config @@ -1,59 +1,64 @@ +CONFIG_BLK_DEV_LOOP=y CONFIG_BPF=y -CONFIG_BPF_SYSCALL=y -CONFIG_NET_CLS_BPF=m CONFIG_BPF_EVENTS=y -CONFIG_TEST_BPF=m +CONFIG_BPF_JIT=y +CONFIG_BPF_LIRC_MODE2=y +CONFIG_BPF_LSM=y +CONFIG_BPF_STREAM_PARSER=y +CONFIG_BPF_SYSCALL=y CONFIG_CGROUP_BPF=y -CONFIG_NETDEVSIM=m -CONFIG_NET_CLS_ACT=y -CONFIG_NET_SCHED=y -CONFIG_NET_SCH_INGRESS=y -CONFIG_NET_IPIP=y -CONFIG_IPV6=y -CONFIG_NET_IPGRE_DEMUX=y -CONFIG_NET_IPGRE=y -CONFIG_IPV6_GRE=y -CONFIG_CRYPTO_USER_API_HASH=m CONFIG_CRYPTO_HMAC=m CONFIG_CRYPTO_SHA256=m -CONFIG_VXLAN=y -CONFIG_GENEVE=y -CONFIG_NET_CLS_FLOWER=m -CONFIG_LWTUNNEL=y -CONFIG_BPF_STREAM_PARSER=y -CONFIG_XDP_SOCKETS=y +CONFIG_CRYPTO_USER_API_HASH=m +CONFIG_DYNAMIC_FTRACE=y +CONFIG_FPROBE=y CONFIG_FTRACE_SYSCALLS=y -CONFIG_IPV6_TUNNEL=y +CONFIG_FUNCTION_TRACER=y +CONFIG_GENEVE=y +CONFIG_IKCONFIG=y +CONFIG_IKCONFIG_PROC=y +CONFIG_IMA=y +CONFIG_IMA_READ_POLICY=y +CONFIG_IMA_WRITE_POLICY=y +CONFIG_IP_NF_FILTER=y +CONFIG_IP_NF_RAW=y +CONFIG_IP_NF_TARGET_SYNPROXY=y +CONFIG_IPV6=y +CONFIG_IPV6_FOU=m +CONFIG_IPV6_FOU_TUNNEL=m CONFIG_IPV6_GRE=y CONFIG_IPV6_SEG6_BPF=y +CONFIG_IPV6_SIT=m +CONFIG_IPV6_TUNNEL=y +CONFIG_LIRC=y +CONFIG_LWTUNNEL=y +CONFIG_MPLS=y +CONFIG_MPLS_IPTUNNEL=m +CONFIG_MPLS_ROUTING=m +CONFIG_MPTCP=y +CONFIG_NET_CLS_ACT=y +CONFIG_NET_CLS_BPF=y +CONFIG_NET_CLS_FLOWER=m CONFIG_NET_FOU=m CONFIG_NET_FOU_IP_TUNNELS=y -CONFIG_IPV6_FOU=m -CONFIG_IPV6_FOU_TUNNEL=m -CONFIG_MPLS=y +CONFIG_NET_IPGRE=y +CONFIG_NET_IPGRE_DEMUX=y +CONFIG_NET_IPIP=y CONFIG_NET_MPLS_GSO=m -CONFIG_MPLS_ROUTING=m -CONFIG_MPLS_IPTUNNEL=m -CONFIG_IPV6_SIT=m -CONFIG_BPF_JIT=y -CONFIG_BPF_LSM=y -CONFIG_SECURITY=y -CONFIG_RC_CORE=y -CONFIG_LIRC=y -CONFIG_BPF_LIRC_MODE2=y -CONFIG_IMA=y -CONFIG_SECURITYFS=y -CONFIG_IMA_WRITE_POLICY=y -CONFIG_IMA_READ_POLICY=y -CONFIG_BLK_DEV_LOOP=y -CONFIG_FUNCTION_TRACER=y -CONFIG_DYNAMIC_FTRACE=y +CONFIG_NET_SCH_INGRESS=y +CONFIG_NET_SCHED=y +CONFIG_NETDEVSIM=m CONFIG_NETFILTER=y +CONFIG_NETFILTER_SYNPROXY=y +CONFIG_NETFILTER_XT_MATCH_STATE=y +CONFIG_NETFILTER_XT_TARGET_CT=y +CONFIG_NF_CONNTRACK=y CONFIG_NF_DEFRAG_IPV4=y CONFIG_NF_DEFRAG_IPV6=y -CONFIG_NF_CONNTRACK=y +CONFIG_RC_CORE=y +CONFIG_SECURITY=y +CONFIG_SECURITYFS=y +CONFIG_TEST_BPF=m CONFIG_USERFAULTFD=y -CONFIG_FPROBE=y -CONFIG_IKCONFIG=y -CONFIG_IKCONFIG_PROC=y -CONFIG_MPTCP=y +CONFIG_VXLAN=y +CONFIG_XDP_SOCKETS=y diff --git a/tools/testing/selftests/bpf/config.s390x b/tools/testing/selftests/bpf/config.s390x new file mode 100644 index 000000000000..f8a7a258a718 --- /dev/null +++ b/tools/testing/selftests/bpf/config.s390x @@ -0,0 +1,147 @@ +CONFIG_9P_FS=y +CONFIG_ARCH_WANT_DEFAULT_BPF_JIT=y +CONFIG_AUDIT=y +CONFIG_BLK_CGROUP=y +CONFIG_BLK_DEV_INITRD=y +CONFIG_BLK_DEV_IO_TRACE=y +CONFIG_BLK_DEV_RAM=y +CONFIG_BONDING=y +CONFIG_BPF_JIT_ALWAYS_ON=y +CONFIG_BPF_JIT_DEFAULT_ON=y +CONFIG_BPF_PRELOAD=y +CONFIG_BPF_PRELOAD_UMD=y +CONFIG_BPFILTER=y +CONFIG_CGROUP_CPUACCT=y +CONFIG_CGROUP_DEVICE=y +CONFIG_CGROUP_FREEZER=y +CONFIG_CGROUP_HUGETLB=y +CONFIG_CGROUP_NET_CLASSID=y +CONFIG_CGROUP_PERF=y +CONFIG_CGROUP_PIDS=y +CONFIG_CGROUP_SCHED=y +CONFIG_CGROUPS=y +CONFIG_CHECKPOINT_RESTORE=y +CONFIG_CPUSETS=y +CONFIG_CRASH_DUMP=y +CONFIG_CRYPTO_USER_API_RNG=y +CONFIG_CRYPTO_USER_API_SKCIPHER=y +CONFIG_DEBUG_ATOMIC_SLEEP=y +CONFIG_DEBUG_INFO_BTF=y +CONFIG_DEBUG_INFO_DWARF4=y +CONFIG_DEBUG_LIST=y +CONFIG_DEBUG_LOCKDEP=y +CONFIG_DEBUG_NOTIFIERS=y +CONFIG_DEBUG_PAGEALLOC=y +CONFIG_DEBUG_SECTION_MISMATCH=y +CONFIG_DEBUG_SG=y +CONFIG_DETECT_HUNG_TASK=y +CONFIG_DEVTMPFS=y +CONFIG_EXPERT=y +CONFIG_EXT4_FS=y +CONFIG_EXT4_FS_POSIX_ACL=y +CONFIG_EXT4_FS_SECURITY=y +CONFIG_FANOTIFY=y +CONFIG_FUNCTION_PROFILER=y +CONFIG_GDB_SCRIPTS=y +CONFIG_HAVE_EBPF_JIT=y +CONFIG_HAVE_KPROBES=y +CONFIG_HAVE_KPROBES_ON_FTRACE=y +CONFIG_HAVE_KRETPROBES=y +CONFIG_HAVE_MARCH_Z10_FEATURES=y +CONFIG_HAVE_MARCH_Z196_FEATURES=y +CONFIG_HEADERS_INSTALL=y +CONFIG_HIGH_RES_TIMERS=y +CONFIG_HUGETLBFS=y +CONFIG_HW_RANDOM=y +CONFIG_HZ_100=y +CONFIG_IDLE_PAGE_TRACKING=y +CONFIG_IKHEADERS=y +CONFIG_INET6_ESP=y +CONFIG_INET=y +CONFIG_INET_ESP=y +CONFIG_IP_ADVANCED_ROUTER=y +CONFIG_IP_MULTICAST=y +CONFIG_IP_MULTIPLE_TABLES=y +CONFIG_IP_NF_IPTABLES=y +CONFIG_IPV6_SEG6_LWTUNNEL=y +CONFIG_IPVLAN=y +CONFIG_JUMP_LABEL=y +CONFIG_KERNEL_UNCOMPRESSED=y +CONFIG_KPROBES=y +CONFIG_KPROBES_ON_FTRACE=y +CONFIG_KRETPROBES=y +CONFIG_KSM=y +CONFIG_LATENCYTOP=y +CONFIG_LIVEPATCH=y +CONFIG_LOCK_STAT=y +CONFIG_MACVLAN=y +CONFIG_MACVTAP=y +CONFIG_MAGIC_SYSRQ=y +CONFIG_MARCH_Z196=y +CONFIG_MARCH_Z196_TUNE=y +CONFIG_MEMCG=y +CONFIG_MEMORY_HOTPLUG=y +CONFIG_MEMORY_HOTREMOVE=y +CONFIG_MODULE_SIG=y +CONFIG_MODULE_UNLOAD=y +CONFIG_MODULES=y +CONFIG_NAMESPACES=y +CONFIG_NET=y +CONFIG_NET_9P=y +CONFIG_NET_9P_VIRTIO=y +CONFIG_NET_ACT_BPF=y +CONFIG_NET_ACT_GACT=y +CONFIG_NET_KEY=y +CONFIG_NET_SCH_FQ=y +CONFIG_NET_VRF=y +CONFIG_NETDEVICES=y +CONFIG_NETFILTER_XT_MATCH_BPF=y +CONFIG_NETFILTER_XT_TARGET_MARK=y +CONFIG_NF_TABLES=y +CONFIG_NO_HZ_IDLE=y +CONFIG_NR_CPUS=256 +CONFIG_NUMA=y +CONFIG_PACKET=y +CONFIG_PANIC_ON_OOPS=y +CONFIG_PARTITION_ADVANCED=y +CONFIG_PCI=y +CONFIG_POSIX_MQUEUE=y +CONFIG_PROC_KCORE=y +CONFIG_PROFILING=y +CONFIG_PROVE_LOCKING=y +CONFIG_PTDUMP_DEBUGFS=y +CONFIG_RC_DEVICES=y +CONFIG_RC_LOOPBACK=y +CONFIG_RT_GROUP_SCHED=y +CONFIG_SAMPLE_SECCOMP=y +CONFIG_SAMPLES=y +CONFIG_SCHED_TRACER=y +CONFIG_SCSI=y +CONFIG_SCSI_VIRTIO=y +CONFIG_SECURITY_NETWORK=y +CONFIG_STACK_TRACER=y +CONFIG_STATIC_KEYS_SELFTEST=y +CONFIG_SYSVIPC=y +CONFIG_TASK_DELAY_ACCT=y +CONFIG_TASK_IO_ACCOUNTING=y +CONFIG_TASK_XACCT=y +CONFIG_TASKSTATS=y +CONFIG_TCP_CONG_ADVANCED=y +CONFIG_TCP_CONG_DCTCP=y +CONFIG_TLS=y +CONFIG_TMPFS=y +CONFIG_TMPFS_POSIX_ACL=y +CONFIG_TRACER_SNAPSHOT_PER_CPU_SWAP=y +CONFIG_TRANSPARENT_HUGEPAGE=y +CONFIG_TUN=y +CONFIG_UNIX=y +CONFIG_UPROBES=y +CONFIG_USELIB=y +CONFIG_USER_NS=y +CONFIG_VETH=y +CONFIG_VIRTIO_BALLOON=y +CONFIG_VIRTIO_BLK=y +CONFIG_VIRTIO_NET=y +CONFIG_VIRTIO_PCI=y +CONFIG_VLAN_8021Q=y +CONFIG_XFRM_USER=y diff --git a/tools/testing/selftests/bpf/config.x86_64 b/tools/testing/selftests/bpf/config.x86_64 new file mode 100644 index 000000000000..f0859a1d37ab --- /dev/null +++ b/tools/testing/selftests/bpf/config.x86_64 @@ -0,0 +1,251 @@ +CONFIG_9P_FS=y +CONFIG_9P_FS_POSIX_ACL=y +CONFIG_9P_FS_SECURITY=y +CONFIG_AGP=y +CONFIG_AGP_AMD64=y +CONFIG_AGP_INTEL=y +CONFIG_AGP_SIS=y +CONFIG_AGP_VIA=y +CONFIG_AMIGA_PARTITION=y +CONFIG_AUDIT=y +CONFIG_BACKLIGHT_CLASS_DEVICE=y +CONFIG_BINFMT_MISC=y +CONFIG_BLK_CGROUP=y +CONFIG_BLK_CGROUP_IOLATENCY=y +CONFIG_BLK_DEV_BSGLIB=y +CONFIG_BLK_DEV_IO_TRACE=y +CONFIG_BLK_DEV_RAM=y +CONFIG_BLK_DEV_RAM_SIZE=16384 +CONFIG_BLK_DEV_THROTTLING=y +CONFIG_BONDING=y +CONFIG_BOOTPARAM_HARDLOCKUP_PANIC=y +CONFIG_BOOTTIME_TRACING=y +CONFIG_BPF_JIT_ALWAYS_ON=y +CONFIG_BPF_KPROBE_OVERRIDE=y +CONFIG_BPF_PRELOAD=y +CONFIG_BPF_PRELOAD_UMD=y +CONFIG_BPFILTER=y +CONFIG_BSD_DISKLABEL=y +CONFIG_BSD_PROCESS_ACCT=y +CONFIG_CFS_BANDWIDTH=y +CONFIG_CGROUP_CPUACCT=y +CONFIG_CGROUP_DEVICE=y +CONFIG_CGROUP_FREEZER=y +CONFIG_CGROUP_HUGETLB=y +CONFIG_CGROUP_PERF=y +CONFIG_CGROUP_SCHED=y +CONFIG_CGROUPS=y +CONFIG_CMA=y +CONFIG_CMA_AREAS=7 +CONFIG_COMPAT_32BIT_TIME=y +CONFIG_CPU_FREQ_DEFAULT_GOV_PERFORMANCE=y +CONFIG_CPU_FREQ_GOV_CONSERVATIVE=y +CONFIG_CPU_FREQ_GOV_ONDEMAND=y +CONFIG_CPU_FREQ_GOV_USERSPACE=y +CONFIG_CPU_FREQ_STAT=y +CONFIG_CPU_IDLE_GOV_LADDER=y +CONFIG_CPUSETS=y +CONFIG_CRC_T10DIF=y +CONFIG_CRYPTO_BLAKE2B=y +CONFIG_CRYPTO_DEV_VIRTIO=m +CONFIG_CRYPTO_SEQIV=y +CONFIG_CRYPTO_XXHASH=y +CONFIG_DCB=y +CONFIG_DEBUG_ATOMIC_SLEEP=y +CONFIG_DEBUG_CREDENTIALS=y +CONFIG_DEBUG_INFO_BTF=y +CONFIG_DEBUG_INFO_DWARF_TOOLCHAIN_DEFAULT=y +CONFIG_DEBUG_MEMORY_INIT=y +CONFIG_DEFAULT_FQ_CODEL=y +CONFIG_DEFAULT_RENO=y +CONFIG_DEFAULT_SECURITY_DAC=y +CONFIG_DEVTMPFS=y +CONFIG_DEVTMPFS_MOUNT=y +CONFIG_DMA_CMA=y +CONFIG_DNS_RESOLVER=y +CONFIG_EFI=y +CONFIG_EFI_STUB=y +CONFIG_EXPERT=y +CONFIG_EXT4_FS=y +CONFIG_EXT4_FS_POSIX_ACL=y +CONFIG_EXT4_FS_SECURITY=y +CONFIG_FAIL_FUNCTION=y +CONFIG_FAULT_INJECTION=y +CONFIG_FAULT_INJECTION_DEBUG_FS=y +CONFIG_FB=y +CONFIG_FB_MODE_HELPERS=y +CONFIG_FB_TILEBLITTING=y +CONFIG_FB_VESA=y +CONFIG_FONT_8x16=y +CONFIG_FONT_MINI_4x6=y +CONFIG_FONTS=y +CONFIG_FRAMEBUFFER_CONSOLE=y +CONFIG_FRAMEBUFFER_CONSOLE_DETECT_PRIMARY=y +CONFIG_FRAMEBUFFER_CONSOLE_ROTATION=y +CONFIG_FW_LOADER_USER_HELPER=y +CONFIG_GART_IOMMU=y +CONFIG_GENERIC_PHY=y +CONFIG_HARDLOCKUP_DETECTOR=y +CONFIG_HID_A4TECH=y +CONFIG_HID_BELKIN=y +CONFIG_HID_CHERRY=y +CONFIG_HID_CYPRESS=y +CONFIG_HID_DRAGONRISE=y +CONFIG_HID_EZKEY=y +CONFIG_HID_GREENASIA=y +CONFIG_HID_GYRATION=y +CONFIG_HID_KENSINGTON=y +CONFIG_HID_KYE=y +CONFIG_HID_MICROSOFT=y +CONFIG_HID_MONTEREY=y +CONFIG_HID_PANTHERLORD=y +CONFIG_HID_PETALYNX=y +CONFIG_HID_SMARTJOYPLUS=y +CONFIG_HID_SUNPLUS=y +CONFIG_HID_TOPSEED=y +CONFIG_HID_TWINHAN=y +CONFIG_HID_ZEROPLUS=y +CONFIG_HIGH_RES_TIMERS=y +CONFIG_HPET=y +CONFIG_HUGETLBFS=y +CONFIG_HWPOISON_INJECT=y +CONFIG_HZ_1000=y +CONFIG_INET=y +CONFIG_INPUT_EVDEV=y +CONFIG_INTEL_POWERCLAMP=y +CONFIG_IP6_NF_IPTABLES=y +CONFIG_IP_ADVANCED_ROUTER=y +CONFIG_IP_MROUTE=y +CONFIG_IP_MULTICAST=y +CONFIG_IP_MULTIPLE_TABLES=y +CONFIG_IP_NF_IPTABLES=y +CONFIG_IP_PIMSM_V1=y +CONFIG_IP_PIMSM_V2=y +CONFIG_IP_ROUTE_MULTIPATH=y +CONFIG_IP_ROUTE_VERBOSE=y +CONFIG_IPV6_MIP6=y +CONFIG_IPV6_ROUTE_INFO=y +CONFIG_IPV6_ROUTER_PREF=y +CONFIG_IPV6_SEG6_LWTUNNEL=y +CONFIG_IPV6_SUBTREES=y +CONFIG_IRQ_POLL=y +CONFIG_JUMP_LABEL=y +CONFIG_KARMA_PARTITION=y +CONFIG_KEXEC=y +CONFIG_KPROBES=y +CONFIG_KSM=y +CONFIG_LEGACY_VSYSCALL_NONE=y +CONFIG_LOG_BUF_SHIFT=21 +CONFIG_LOG_CPU_MAX_BUF_SHIFT=0 +CONFIG_LOGO=y +CONFIG_LSM="selinux,bpf,integrity" +CONFIG_MAC_PARTITION=y +CONFIG_MAGIC_SYSRQ=y +CONFIG_MCORE2=y +CONFIG_MEMCG=y +CONFIG_MEMORY_FAILURE=y +CONFIG_MINIX_SUBPARTITION=y +CONFIG_MODULE_SIG=y +CONFIG_MODULE_SRCVERSION_ALL=y +CONFIG_MODULE_UNLOAD=y +CONFIG_MODULES=y +CONFIG_MODVERSIONS=y +CONFIG_NAMESPACES=y +CONFIG_NET=y +CONFIG_NET_9P=y +CONFIG_NET_9P_VIRTIO=y +CONFIG_NET_ACT_BPF=y +CONFIG_NET_CLS_CGROUP=y +CONFIG_NET_EMATCH=y +CONFIG_NET_IPGRE_BROADCAST=y +CONFIG_NET_L3_MASTER_DEV=y +CONFIG_NET_SCH_DEFAULT=y +CONFIG_NET_SCH_FQ_CODEL=y +CONFIG_NET_TC_SKB_EXT=y +CONFIG_NET_VRF=y +CONFIG_NETDEVICES=y +CONFIG_NETFILTER_NETLINK_LOG=y +CONFIG_NETFILTER_NETLINK_QUEUE=y +CONFIG_NETFILTER_XT_MATCH_BPF=y +CONFIG_NETFILTER_XT_MATCH_STATISTIC=y +CONFIG_NETLABEL=y +CONFIG_NLS_ASCII=y +CONFIG_NLS_CODEPAGE_437=y +CONFIG_NLS_DEFAULT="utf8" +CONFIG_NO_HZ=y +CONFIG_NR_CPUS=128 +CONFIG_NUMA=y +CONFIG_NUMA_BALANCING=y +CONFIG_NVMEM=y +CONFIG_OSF_PARTITION=y +CONFIG_PACKET=y +CONFIG_PANIC_ON_OOPS=y +CONFIG_PARTITION_ADVANCED=y +CONFIG_PCI=y +CONFIG_PCI_IOV=y +CONFIG_PCI_MSI=y +CONFIG_PCIEPORTBUS=y +CONFIG_PHYSICAL_ALIGN=0x1000000 +CONFIG_POSIX_MQUEUE=y +CONFIG_POWER_SUPPLY=y +CONFIG_PREEMPT=y +CONFIG_PRINTK_TIME=y +CONFIG_PROC_KCORE=y +CONFIG_PROFILING=y +CONFIG_PROVE_LOCKING=y +CONFIG_PTP_1588_CLOCK=y +CONFIG_RC_DEVICES=y +CONFIG_RC_LOOPBACK=y +CONFIG_RCU_CPU_STALL_TIMEOUT=60 +CONFIG_SCHED_STACK_END_CHECK=y +CONFIG_SCHEDSTATS=y +CONFIG_SECURITY_NETWORK=y +CONFIG_SECURITY_SELINUX=y +CONFIG_SERIAL_8250=y +CONFIG_SERIAL_8250_CONSOLE=y +CONFIG_SERIAL_8250_DETECT_IRQ=y +CONFIG_SERIAL_8250_EXTENDED=y +CONFIG_SERIAL_8250_MANY_PORTS=y +CONFIG_SERIAL_8250_NR_UARTS=32 +CONFIG_SERIAL_8250_RSA=y +CONFIG_SERIAL_8250_SHARE_IRQ=y +CONFIG_SERIAL_NONSTANDARD=y +CONFIG_SERIO_LIBPS2=y +CONFIG_SGI_PARTITION=y +CONFIG_SMP=y +CONFIG_SOLARIS_X86_PARTITION=y +CONFIG_SUN_PARTITION=y +CONFIG_SYNC_FILE=y +CONFIG_SYSVIPC=y +CONFIG_TASK_DELAY_ACCT=y +CONFIG_TASK_IO_ACCOUNTING=y +CONFIG_TASK_XACCT=y +CONFIG_TASKSTATS=y +CONFIG_TCP_CONG_ADVANCED=y +CONFIG_TCP_MD5SIG=y +CONFIG_TLS=y +CONFIG_TMPFS=y +CONFIG_TMPFS_POSIX_ACL=y +CONFIG_TRANSPARENT_HUGEPAGE=y +CONFIG_TRANSPARENT_HUGEPAGE_MADVISE=y +CONFIG_TUN=y +CONFIG_UNIX=y +CONFIG_UNIXWARE_DISKLABEL=y +CONFIG_USER_NS=y +CONFIG_VALIDATE_FS_PARSER=y +CONFIG_VETH=y +CONFIG_VIRT_DRIVERS=y +CONFIG_VIRTIO_BALLOON=y +CONFIG_VIRTIO_BLK=y +CONFIG_VIRTIO_CONSOLE=y +CONFIG_VIRTIO_NET=y +CONFIG_VIRTIO_PCI=y +CONFIG_VLAN_8021Q=y +CONFIG_X86_ACPI_CPUFREQ=y +CONFIG_X86_CPUID=y +CONFIG_X86_MSR=y +CONFIG_X86_POWERNOW_K8=y +CONFIG_XDP_SOCKETS_DIAG=y +CONFIG_XFRM_SUB_POLICY=y +CONFIG_XFRM_USER=y +CONFIG_ZEROPLUS_FF=y diff --git a/tools/testing/selftests/bpf/network_helpers.c b/tools/testing/selftests/bpf/network_helpers.c index 59cf81ec55af..bec15558fd93 100644 --- a/tools/testing/selftests/bpf/network_helpers.c +++ b/tools/testing/selftests/bpf/network_helpers.c @@ -436,7 +436,7 @@ struct nstoken *open_netns(const char *name) int err; struct nstoken *token; - token = malloc(sizeof(struct nstoken)); + token = calloc(1, sizeof(struct nstoken)); if (!ASSERT_OK_PTR(token, "malloc token")) return NULL; diff --git a/tools/testing/selftests/bpf/prog_tests/attach_probe.c b/tools/testing/selftests/bpf/prog_tests/attach_probe.c index 08c0601b3e84..0b899d2d8ea7 100644 --- a/tools/testing/selftests/bpf/prog_tests/attach_probe.c +++ b/tools/testing/selftests/bpf/prog_tests/attach_probe.c @@ -17,6 +17,14 @@ static void trigger_func2(void) asm volatile (""); } +/* attach point for byname sleepable uprobe */ +static void trigger_func3(void) +{ + asm volatile (""); +} + +static char test_data[] = "test_data"; + void test_attach_probe(void) { DECLARE_LIBBPF_OPTS(bpf_uprobe_opts, uprobe_opts); @@ -49,9 +57,17 @@ void test_attach_probe(void) if (!ASSERT_GE(ref_ctr_offset, 0, "ref_ctr_offset")) return; - skel = test_attach_probe__open_and_load(); + skel = test_attach_probe__open(); if (!ASSERT_OK_PTR(skel, "skel_open")) return; + + /* sleepable kprobe test case needs flags set before loading */ + if (!ASSERT_OK(bpf_program__set_flags(skel->progs.handle_kprobe_sleepable, + BPF_F_SLEEPABLE), "kprobe_sleepable_flags")) + goto cleanup; + + if (!ASSERT_OK(test_attach_probe__load(skel), "skel_load")) + goto cleanup; if (!ASSERT_OK_PTR(skel->bss, "check_bss")) goto cleanup; @@ -151,6 +167,30 @@ void test_attach_probe(void) if (!ASSERT_OK_PTR(skel->links.handle_uretprobe_byname2, "attach_uretprobe_byname2")) goto cleanup; + /* sleepable kprobes should not attach successfully */ + skel->links.handle_kprobe_sleepable = bpf_program__attach(skel->progs.handle_kprobe_sleepable); + if (!ASSERT_ERR_PTR(skel->links.handle_kprobe_sleepable, "attach_kprobe_sleepable")) + goto cleanup; + + /* test sleepable uprobe and uretprobe variants */ + skel->links.handle_uprobe_byname3_sleepable = bpf_program__attach(skel->progs.handle_uprobe_byname3_sleepable); + if (!ASSERT_OK_PTR(skel->links.handle_uprobe_byname3_sleepable, "attach_uprobe_byname3_sleepable")) + goto cleanup; + + skel->links.handle_uprobe_byname3 = bpf_program__attach(skel->progs.handle_uprobe_byname3); + if (!ASSERT_OK_PTR(skel->links.handle_uprobe_byname3, "attach_uprobe_byname3")) + goto cleanup; + + skel->links.handle_uretprobe_byname3_sleepable = bpf_program__attach(skel->progs.handle_uretprobe_byname3_sleepable); + if (!ASSERT_OK_PTR(skel->links.handle_uretprobe_byname3_sleepable, "attach_uretprobe_byname3_sleepable")) + goto cleanup; + + skel->links.handle_uretprobe_byname3 = bpf_program__attach(skel->progs.handle_uretprobe_byname3); + if (!ASSERT_OK_PTR(skel->links.handle_uretprobe_byname3, "attach_uretprobe_byname3")) + goto cleanup; + + skel->bss->user_ptr = test_data; + /* trigger & validate kprobe && kretprobe */ usleep(1); @@ -164,6 +204,9 @@ void test_attach_probe(void) /* trigger & validate uprobe attached by name */ trigger_func2(); + /* trigger & validate sleepable uprobe attached by name */ + trigger_func3(); + ASSERT_EQ(skel->bss->kprobe_res, 1, "check_kprobe_res"); ASSERT_EQ(skel->bss->kprobe2_res, 11, "check_kprobe_auto_res"); ASSERT_EQ(skel->bss->kretprobe_res, 2, "check_kretprobe_res"); @@ -174,6 +217,10 @@ void test_attach_probe(void) ASSERT_EQ(skel->bss->uretprobe_byname_res, 6, "check_uretprobe_byname_res"); ASSERT_EQ(skel->bss->uprobe_byname2_res, 7, "check_uprobe_byname2_res"); ASSERT_EQ(skel->bss->uretprobe_byname2_res, 8, "check_uretprobe_byname2_res"); + ASSERT_EQ(skel->bss->uprobe_byname3_sleepable_res, 9, "check_uprobe_byname3_sleepable_res"); + ASSERT_EQ(skel->bss->uprobe_byname3_res, 10, "check_uprobe_byname3_res"); + ASSERT_EQ(skel->bss->uretprobe_byname3_sleepable_res, 11, "check_uretprobe_byname3_sleepable_res"); + ASSERT_EQ(skel->bss->uretprobe_byname3_res, 12, "check_uretprobe_byname3_res"); cleanup: test_attach_probe__destroy(skel); diff --git a/tools/testing/selftests/bpf/prog_tests/bpf_iter.c b/tools/testing/selftests/bpf/prog_tests/bpf_iter.c index 7ff5fa93d056..a33874b081b6 100644 --- a/tools/testing/selftests/bpf/prog_tests/bpf_iter.c +++ b/tools/testing/selftests/bpf/prog_tests/bpf_iter.c @@ -27,6 +27,7 @@ #include "bpf_iter_test_kern5.skel.h" #include "bpf_iter_test_kern6.skel.h" #include "bpf_iter_bpf_link.skel.h" +#include "bpf_iter_ksym.skel.h" static int duration; @@ -1120,6 +1121,19 @@ static void test_link_iter(void) bpf_iter_bpf_link__destroy(skel); } +static void test_ksym_iter(void) +{ + struct bpf_iter_ksym *skel; + + skel = bpf_iter_ksym__open_and_load(); + if (!ASSERT_OK_PTR(skel, "bpf_iter_ksym__open_and_load")) + return; + + do_dummy_read(skel->progs.dump_ksym); + + bpf_iter_ksym__destroy(skel); +} + #define CMP_BUFFER_SIZE 1024 static char task_vma_output[CMP_BUFFER_SIZE]; static char proc_maps_output[CMP_BUFFER_SIZE]; @@ -1267,4 +1281,6 @@ void test_bpf_iter(void) test_buf_neg_offset(); if (test__start_subtest("link-iter")) test_link_iter(); + if (test__start_subtest("ksym")) + test_ksym_iter(); } diff --git a/tools/testing/selftests/bpf/prog_tests/bpf_loop.c b/tools/testing/selftests/bpf/prog_tests/bpf_loop.c index 380d7a2072e3..4cd8a25afe68 100644 --- a/tools/testing/selftests/bpf/prog_tests/bpf_loop.c +++ b/tools/testing/selftests/bpf/prog_tests/bpf_loop.c @@ -120,6 +120,64 @@ static void check_nested_calls(struct bpf_loop *skel) bpf_link__destroy(link); } +static void check_non_constant_callback(struct bpf_loop *skel) +{ + struct bpf_link *link = + bpf_program__attach(skel->progs.prog_non_constant_callback); + + if (!ASSERT_OK_PTR(link, "link")) + return; + + skel->bss->callback_selector = 0x0F; + usleep(1); + ASSERT_EQ(skel->bss->g_output, 0x0F, "g_output #1"); + + skel->bss->callback_selector = 0xF0; + usleep(1); + ASSERT_EQ(skel->bss->g_output, 0xF0, "g_output #2"); + + bpf_link__destroy(link); +} + +static void check_stack(struct bpf_loop *skel) +{ + struct bpf_link *link = bpf_program__attach(skel->progs.stack_check); + const int max_key = 12; + int key; + int map_fd; + + if (!ASSERT_OK_PTR(link, "link")) + return; + + map_fd = bpf_map__fd(skel->maps.map1); + + if (!ASSERT_GE(map_fd, 0, "bpf_map__fd")) + goto out; + + for (key = 1; key <= max_key; ++key) { + int val = key; + int err = bpf_map_update_elem(map_fd, &key, &val, BPF_NOEXIST); + + if (!ASSERT_OK(err, "bpf_map_update_elem")) + goto out; + } + + usleep(1); + + for (key = 1; key <= max_key; ++key) { + int val; + int err = bpf_map_lookup_elem(map_fd, &key, &val); + + if (!ASSERT_OK(err, "bpf_map_lookup_elem")) + goto out; + if (!ASSERT_EQ(val, key + 1, "bad value in the map")) + goto out; + } + +out: + bpf_link__destroy(link); +} + void test_bpf_loop(void) { struct bpf_loop *skel; @@ -140,6 +198,10 @@ void test_bpf_loop(void) check_invalid_flags(skel); if (test__start_subtest("check_nested_calls")) check_nested_calls(skel); + if (test__start_subtest("check_non_constant_callback")) + check_non_constant_callback(skel); + if (test__start_subtest("check_stack")) + check_stack(skel); bpf_loop__destroy(skel); } diff --git a/tools/testing/selftests/bpf/prog_tests/bpf_nf.c b/tools/testing/selftests/bpf/prog_tests/bpf_nf.c index dd30b1e3a67c..7a74a1579076 100644 --- a/tools/testing/selftests/bpf/prog_tests/bpf_nf.c +++ b/tools/testing/selftests/bpf/prog_tests/bpf_nf.c @@ -2,13 +2,29 @@ #include <test_progs.h> #include <network_helpers.h> #include "test_bpf_nf.skel.h" +#include "test_bpf_nf_fail.skel.h" + +static char log_buf[1024 * 1024]; + +struct { + const char *prog_name; + const char *err_msg; +} test_bpf_nf_fail_tests[] = { + { "alloc_release", "kernel function bpf_ct_release args#0 expected pointer to STRUCT nf_conn but" }, + { "insert_insert", "kernel function bpf_ct_insert_entry args#0 expected pointer to STRUCT nf_conn___init but" }, + { "lookup_insert", "kernel function bpf_ct_insert_entry args#0 expected pointer to STRUCT nf_conn___init but" }, + { "set_timeout_after_insert", "kernel function bpf_ct_set_timeout args#0 expected pointer to STRUCT nf_conn___init but" }, + { "set_status_after_insert", "kernel function bpf_ct_set_status args#0 expected pointer to STRUCT nf_conn___init but" }, + { "change_timeout_after_alloc", "kernel function bpf_ct_change_timeout args#0 expected pointer to STRUCT nf_conn but" }, + { "change_status_after_alloc", "kernel function bpf_ct_change_status args#0 expected pointer to STRUCT nf_conn but" }, +}; enum { TEST_XDP, TEST_TC_BPF, }; -void test_bpf_nf_ct(int mode) +static void test_bpf_nf_ct(int mode) { struct test_bpf_nf *skel; int prog_fd, err; @@ -39,14 +55,60 @@ void test_bpf_nf_ct(int mode) ASSERT_EQ(skel->bss->test_enonet_netns_id, -ENONET, "Test ENONET for bad but valid netns_id"); ASSERT_EQ(skel->bss->test_enoent_lookup, -ENOENT, "Test ENOENT for failed lookup"); ASSERT_EQ(skel->bss->test_eafnosupport, -EAFNOSUPPORT, "Test EAFNOSUPPORT for invalid len__tuple"); + ASSERT_EQ(skel->data->test_alloc_entry, 0, "Test for alloc new entry"); + ASSERT_EQ(skel->data->test_insert_entry, 0, "Test for insert new entry"); + ASSERT_EQ(skel->data->test_succ_lookup, 0, "Test for successful lookup"); + /* allow some tolerance for test_delta_timeout value to avoid races. */ + ASSERT_GT(skel->bss->test_delta_timeout, 8, "Test for min ct timeout update"); + ASSERT_LE(skel->bss->test_delta_timeout, 10, "Test for max ct timeout update"); + /* expected status is IPS_SEEN_REPLY */ + ASSERT_EQ(skel->bss->test_status, 2, "Test for ct status update "); end: test_bpf_nf__destroy(skel); } +static void test_bpf_nf_ct_fail(const char *prog_name, const char *err_msg) +{ + LIBBPF_OPTS(bpf_object_open_opts, opts, .kernel_log_buf = log_buf, + .kernel_log_size = sizeof(log_buf), + .kernel_log_level = 1); + struct test_bpf_nf_fail *skel; + struct bpf_program *prog; + int ret; + + skel = test_bpf_nf_fail__open_opts(&opts); + if (!ASSERT_OK_PTR(skel, "test_bpf_nf_fail__open")) + return; + + prog = bpf_object__find_program_by_name(skel->obj, prog_name); + if (!ASSERT_OK_PTR(prog, "bpf_object__find_program_by_name")) + goto end; + + bpf_program__set_autoload(prog, true); + + ret = test_bpf_nf_fail__load(skel); + if (!ASSERT_ERR(ret, "test_bpf_nf_fail__load must fail")) + goto end; + + if (!ASSERT_OK_PTR(strstr(log_buf, err_msg), "expected error message")) { + fprintf(stderr, "Expected: %s\n", err_msg); + fprintf(stderr, "Verifier: %s\n", log_buf); + } + +end: + test_bpf_nf_fail__destroy(skel); +} + void test_bpf_nf(void) { + int i; if (test__start_subtest("xdp-ct")) test_bpf_nf_ct(TEST_XDP); if (test__start_subtest("tc-bpf-ct")) test_bpf_nf_ct(TEST_TC_BPF); + for (i = 0; i < ARRAY_SIZE(test_bpf_nf_fail_tests); i++) { + if (test__start_subtest(test_bpf_nf_fail_tests[i].prog_name)) + test_bpf_nf_ct_fail(test_bpf_nf_fail_tests[i].prog_name, + test_bpf_nf_fail_tests[i].err_msg); + } } diff --git a/tools/testing/selftests/bpf/prog_tests/bpf_tcp_ca.c b/tools/testing/selftests/bpf/prog_tests/bpf_tcp_ca.c index e9a9a31b2ffe..2959a52ced06 100644 --- a/tools/testing/selftests/bpf/prog_tests/bpf_tcp_ca.c +++ b/tools/testing/selftests/bpf/prog_tests/bpf_tcp_ca.c @@ -9,6 +9,9 @@ #include "bpf_cubic.skel.h" #include "bpf_tcp_nogpl.skel.h" #include "bpf_dctcp_release.skel.h" +#include "tcp_ca_write_sk_pacing.skel.h" +#include "tcp_ca_incompl_cong_ops.skel.h" +#include "tcp_ca_unsupp_cong_op.skel.h" #ifndef ENOTSUPP #define ENOTSUPP 524 @@ -322,6 +325,58 @@ static void test_rel_setsockopt(void) bpf_dctcp_release__destroy(rel_skel); } +static void test_write_sk_pacing(void) +{ + struct tcp_ca_write_sk_pacing *skel; + struct bpf_link *link; + + skel = tcp_ca_write_sk_pacing__open_and_load(); + if (!ASSERT_OK_PTR(skel, "open_and_load")) + return; + + link = bpf_map__attach_struct_ops(skel->maps.write_sk_pacing); + ASSERT_OK_PTR(link, "attach_struct_ops"); + + bpf_link__destroy(link); + tcp_ca_write_sk_pacing__destroy(skel); +} + +static void test_incompl_cong_ops(void) +{ + struct tcp_ca_incompl_cong_ops *skel; + struct bpf_link *link; + + skel = tcp_ca_incompl_cong_ops__open_and_load(); + if (!ASSERT_OK_PTR(skel, "open_and_load")) + return; + + /* That cong_avoid() and cong_control() are missing is only reported at + * this point: + */ + link = bpf_map__attach_struct_ops(skel->maps.incompl_cong_ops); + ASSERT_ERR_PTR(link, "attach_struct_ops"); + + bpf_link__destroy(link); + tcp_ca_incompl_cong_ops__destroy(skel); +} + +static void test_unsupp_cong_op(void) +{ + libbpf_print_fn_t old_print_fn; + struct tcp_ca_unsupp_cong_op *skel; + + err_str = "attach to unsupported member get_info"; + found = false; + old_print_fn = libbpf_set_print(libbpf_debug_print); + + skel = tcp_ca_unsupp_cong_op__open_and_load(); + ASSERT_NULL(skel, "open_and_load"); + ASSERT_EQ(found, true, "expected_err_msg"); + + tcp_ca_unsupp_cong_op__destroy(skel); + libbpf_set_print(old_print_fn); +} + void test_bpf_tcp_ca(void) { if (test__start_subtest("dctcp")) @@ -334,4 +389,10 @@ void test_bpf_tcp_ca(void) test_dctcp_fallback(); if (test__start_subtest("rel_setsockopt")) test_rel_setsockopt(); + if (test__start_subtest("write_sk_pacing")) + test_write_sk_pacing(); + if (test__start_subtest("incompl_cong_ops")) + test_incompl_cong_ops(); + if (test__start_subtest("unsupp_cong_op")) + test_unsupp_cong_op(); } diff --git a/tools/testing/selftests/bpf/prog_tests/btf.c b/tools/testing/selftests/bpf/prog_tests/btf.c index ba5bde53d418..ef6528b8084c 100644 --- a/tools/testing/selftests/bpf/prog_tests/btf.c +++ b/tools/testing/selftests/bpf/prog_tests/btf.c @@ -34,7 +34,6 @@ static bool always_log; #undef CHECK #define CHECK(condition, format...) _CHECK(condition, "check", duration, format) -#define BTF_END_RAW 0xdeadbeef #define NAME_TBD 0xdeadb33f #define NAME_NTH(N) (0xfffe0000 | N) @@ -2897,26 +2896,6 @@ static struct btf_raw_test raw_tests[] = { }, { - .descr = "invalid enum kind_flag", - .raw_types = { - BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4), /* [1] */ - BTF_TYPE_ENC(0, BTF_INFO_ENC(BTF_KIND_ENUM, 1, 1), 4), /* [2] */ - BTF_ENUM_ENC(NAME_TBD, 0), - BTF_END_RAW, - }, - BTF_STR_SEC("\0A"), - .map_type = BPF_MAP_TYPE_ARRAY, - .map_name = "enum_type_check_btf", - .key_size = sizeof(int), - .value_size = sizeof(int), - .key_type_id = 1, - .value_type_id = 1, - .max_entries = 4, - .btf_load_err = true, - .err_str = "Invalid btf_info kind_flag", -}, - -{ .descr = "valid fwd kind_flag", .raw_types = { BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4), /* [1] */ @@ -4072,6 +4051,42 @@ static struct btf_raw_test raw_tests[] = { .btf_load_err = true, .err_str = "Type tags don't precede modifiers", }, +{ + .descr = "enum64 test #1, unsigned, size 8", + .raw_types = { + BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4), /* [1] */ + BTF_TYPE_ENC(NAME_TBD, BTF_INFO_ENC(BTF_KIND_ENUM64, 0, 2), 8), /* [2] */ + BTF_ENUM64_ENC(NAME_TBD, 0, 0), + BTF_ENUM64_ENC(NAME_TBD, 1, 1), + BTF_END_RAW, + }, + BTF_STR_SEC("\0a\0b\0c"), + .map_type = BPF_MAP_TYPE_ARRAY, + .map_name = "tag_type_check_btf", + .key_size = sizeof(int), + .value_size = 8, + .key_type_id = 1, + .value_type_id = 2, + .max_entries = 1, +}, +{ + .descr = "enum64 test #2, signed, size 4", + .raw_types = { + BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4), /* [1] */ + BTF_TYPE_ENC(NAME_TBD, BTF_INFO_ENC(BTF_KIND_ENUM64, 1, 2), 4), /* [2] */ + BTF_ENUM64_ENC(NAME_TBD, -1, 0), + BTF_ENUM64_ENC(NAME_TBD, 1, 0), + BTF_END_RAW, + }, + BTF_STR_SEC("\0a\0b\0c"), + .map_type = BPF_MAP_TYPE_ARRAY, + .map_name = "tag_type_check_btf", + .key_size = sizeof(int), + .value_size = 4, + .key_type_id = 1, + .value_type_id = 2, + .max_entries = 1, +}, }; /* struct btf_raw_test raw_tests[] */ @@ -4636,7 +4651,6 @@ struct btf_file_test { }; static struct btf_file_test file_tests[] = { - { .file = "test_btf_haskv.o", }, { .file = "test_btf_newkv.o", }, { .file = "test_btf_nokv.o", .btf_kv_notfound = true, }, }; @@ -5324,7 +5338,7 @@ static void do_test_pprint(int test_num) ret = snprintf(pin_path, sizeof(pin_path), "%s/%s", "/sys/fs/bpf", test->map_name); - if (CHECK(ret == sizeof(pin_path), "pin_path %s/%s is too long", + if (CHECK(ret >= sizeof(pin_path), "pin_path %s/%s is too long", "/sys/fs/bpf", test->map_name)) { err = -1; goto done; @@ -7000,9 +7014,12 @@ static struct btf_dedup_test dedup_tests[] = { BTF_DECL_TAG_ENC(NAME_TBD, 13, 1), /* [16] decl_tag */ BTF_DECL_TAG_ENC(NAME_TBD, 7, -1), /* [17] decl_tag */ BTF_TYPE_TAG_ENC(NAME_TBD, 8), /* [18] type_tag */ + BTF_TYPE_ENC(NAME_TBD, BTF_INFO_ENC(BTF_KIND_ENUM64, 0, 2), 8), /* [19] enum64 */ + BTF_ENUM64_ENC(NAME_TBD, 0, 0), + BTF_ENUM64_ENC(NAME_TBD, 1, 1), BTF_END_RAW, }, - BTF_STR_SEC("\0A\0B\0C\0D\0E\0F\0G\0H\0I\0J\0K\0L\0M\0N\0O\0P\0Q\0R"), + BTF_STR_SEC("\0A\0B\0C\0D\0E\0F\0G\0H\0I\0J\0K\0L\0M\0N\0O\0P\0Q\0R\0S\0T\0U"), }, .expect = { .raw_types = { @@ -7030,9 +7047,12 @@ static struct btf_dedup_test dedup_tests[] = { BTF_DECL_TAG_ENC(NAME_TBD, 13, 1), /* [16] decl_tag */ BTF_DECL_TAG_ENC(NAME_TBD, 7, -1), /* [17] decl_tag */ BTF_TYPE_TAG_ENC(NAME_TBD, 8), /* [18] type_tag */ + BTF_TYPE_ENC(NAME_TBD, BTF_INFO_ENC(BTF_KIND_ENUM64, 0, 2), 8), /* [19] enum64 */ + BTF_ENUM64_ENC(NAME_TBD, 0, 0), + BTF_ENUM64_ENC(NAME_TBD, 1, 1), BTF_END_RAW, }, - BTF_STR_SEC("\0A\0B\0C\0D\0E\0F\0G\0H\0I\0J\0K\0L\0M\0N\0O\0P\0Q\0R"), + BTF_STR_SEC("\0A\0B\0C\0D\0E\0F\0G\0H\0I\0J\0K\0L\0M\0N\0O\0P\0Q\0R\0S\0T\0U"), }, }, { @@ -7493,6 +7513,91 @@ static struct btf_dedup_test dedup_tests[] = { BTF_STR_SEC("\0tag1\0t\0m"), }, }, +{ + .descr = "dedup: enum64, standalone", + .input = { + .raw_types = { + BTF_TYPE_ENC(NAME_NTH(1), BTF_INFO_ENC(BTF_KIND_ENUM64, 0, 1), 8), + BTF_ENUM64_ENC(NAME_NTH(2), 1, 123), + BTF_TYPE_ENC(NAME_NTH(1), BTF_INFO_ENC(BTF_KIND_ENUM64, 0, 1), 8), + BTF_ENUM64_ENC(NAME_NTH(2), 1, 123), + BTF_END_RAW, + }, + BTF_STR_SEC("\0e1\0e1_val"), + }, + .expect = { + .raw_types = { + BTF_TYPE_ENC(NAME_NTH(1), BTF_INFO_ENC(BTF_KIND_ENUM64, 0, 1), 8), + BTF_ENUM64_ENC(NAME_NTH(2), 1, 123), + BTF_END_RAW, + }, + BTF_STR_SEC("\0e1\0e1_val"), + }, +}, +{ + .descr = "dedup: enum64, fwd resolution", + .input = { + .raw_types = { + /* [1] fwd enum64 'e1' before full enum */ + BTF_TYPE_ENC(NAME_NTH(1), BTF_INFO_ENC(BTF_KIND_ENUM64, 0, 0), 8), + /* [2] full enum64 'e1' after fwd */ + BTF_TYPE_ENC(NAME_NTH(1), BTF_INFO_ENC(BTF_KIND_ENUM64, 0, 1), 8), + BTF_ENUM64_ENC(NAME_NTH(2), 1, 123), + /* [3] full enum64 'e2' before fwd */ + BTF_TYPE_ENC(NAME_NTH(3), BTF_INFO_ENC(BTF_KIND_ENUM64, 0, 1), 8), + BTF_ENUM64_ENC(NAME_NTH(4), 0, 456), + /* [4] fwd enum64 'e2' after full enum */ + BTF_TYPE_ENC(NAME_NTH(3), BTF_INFO_ENC(BTF_KIND_ENUM64, 0, 0), 8), + /* [5] incompatible full enum64 with different value */ + BTF_TYPE_ENC(NAME_NTH(1), BTF_INFO_ENC(BTF_KIND_ENUM64, 0, 1), 8), + BTF_ENUM64_ENC(NAME_NTH(2), 0, 321), + BTF_END_RAW, + }, + BTF_STR_SEC("\0e1\0e1_val\0e2\0e2_val"), + }, + .expect = { + .raw_types = { + /* [1] full enum64 'e1' */ + BTF_TYPE_ENC(NAME_NTH(1), BTF_INFO_ENC(BTF_KIND_ENUM64, 0, 1), 8), + BTF_ENUM64_ENC(NAME_NTH(2), 1, 123), + /* [2] full enum64 'e2' */ + BTF_TYPE_ENC(NAME_NTH(3), BTF_INFO_ENC(BTF_KIND_ENUM64, 0, 1), 8), + BTF_ENUM64_ENC(NAME_NTH(4), 0, 456), + /* [3] incompatible full enum64 with different value */ + BTF_TYPE_ENC(NAME_NTH(1), BTF_INFO_ENC(BTF_KIND_ENUM64, 0, 1), 8), + BTF_ENUM64_ENC(NAME_NTH(2), 0, 321), + BTF_END_RAW, + }, + BTF_STR_SEC("\0e1\0e1_val\0e2\0e2_val"), + }, +}, +{ + .descr = "dedup: enum and enum64, no dedup", + .input = { + .raw_types = { + /* [1] enum 'e1' */ + BTF_TYPE_ENC(NAME_NTH(1), BTF_INFO_ENC(BTF_KIND_ENUM, 0, 1), 4), + BTF_ENUM_ENC(NAME_NTH(2), 1), + /* [2] enum64 'e1' */ + BTF_TYPE_ENC(NAME_NTH(1), BTF_INFO_ENC(BTF_KIND_ENUM64, 0, 1), 4), + BTF_ENUM64_ENC(NAME_NTH(2), 1, 0), + BTF_END_RAW, + }, + BTF_STR_SEC("\0e1\0e1_val"), + }, + .expect = { + .raw_types = { + /* [1] enum 'e1' */ + BTF_TYPE_ENC(NAME_NTH(1), BTF_INFO_ENC(BTF_KIND_ENUM, 0, 1), 4), + BTF_ENUM_ENC(NAME_NTH(2), 1), + /* [2] enum64 'e1' */ + BTF_TYPE_ENC(NAME_NTH(1), BTF_INFO_ENC(BTF_KIND_ENUM64, 0, 1), 4), + BTF_ENUM64_ENC(NAME_NTH(2), 1, 0), + BTF_END_RAW, + }, + BTF_STR_SEC("\0e1\0e1_val"), + }, +}, }; @@ -7517,6 +7622,8 @@ static int btf_type_size(const struct btf_type *t) return base_size + sizeof(__u32); case BTF_KIND_ENUM: return base_size + vlen * sizeof(struct btf_enum); + case BTF_KIND_ENUM64: + return base_size + vlen * sizeof(struct btf_enum64); case BTF_KIND_ARRAY: return base_size + sizeof(struct btf_array); case BTF_KIND_STRUCT: diff --git a/tools/testing/selftests/bpf/prog_tests/btf_write.c b/tools/testing/selftests/bpf/prog_tests/btf_write.c index addf99c05896..6e36de1302fc 100644 --- a/tools/testing/selftests/bpf/prog_tests/btf_write.c +++ b/tools/testing/selftests/bpf/prog_tests/btf_write.c @@ -9,6 +9,7 @@ static void gen_btf(struct btf *btf) const struct btf_var_secinfo *vi; const struct btf_type *t; const struct btf_member *m; + const struct btf_enum64 *v64; const struct btf_enum *v; const struct btf_param *p; int id, err, str_off; @@ -171,7 +172,7 @@ static void gen_btf(struct btf *btf) ASSERT_STREQ(btf__str_by_offset(btf, v->name_off), "v2", "v2_name"); ASSERT_EQ(v->val, 2, "v2_val"); ASSERT_STREQ(btf_type_raw_dump(btf, 9), - "[9] ENUM 'e1' size=4 vlen=2\n" + "[9] ENUM 'e1' encoding=UNSIGNED size=4 vlen=2\n" "\t'v1' val=1\n" "\t'v2' val=2", "raw_dump"); @@ -202,7 +203,7 @@ static void gen_btf(struct btf *btf) ASSERT_EQ(btf_vlen(t), 0, "enum_fwd_kind"); ASSERT_EQ(t->size, 4, "enum_fwd_sz"); ASSERT_STREQ(btf_type_raw_dump(btf, 12), - "[12] ENUM 'enum_fwd' size=4 vlen=0", "raw_dump"); + "[12] ENUM 'enum_fwd' encoding=UNSIGNED size=4 vlen=0", "raw_dump"); /* TYPEDEF */ id = btf__add_typedef(btf, "typedef1", 1); @@ -307,6 +308,48 @@ static void gen_btf(struct btf *btf) ASSERT_EQ(t->type, 1, "tag_type"); ASSERT_STREQ(btf_type_raw_dump(btf, 20), "[20] TYPE_TAG 'tag1' type_id=1", "raw_dump"); + + /* ENUM64 */ + id = btf__add_enum64(btf, "e1", 8, true); + ASSERT_EQ(id, 21, "enum64_id"); + err = btf__add_enum64_value(btf, "v1", -1); + ASSERT_OK(err, "v1_res"); + err = btf__add_enum64_value(btf, "v2", 0x123456789); /* 4886718345 */ + ASSERT_OK(err, "v2_res"); + t = btf__type_by_id(btf, 21); + ASSERT_STREQ(btf__str_by_offset(btf, t->name_off), "e1", "enum64_name"); + ASSERT_EQ(btf_kind(t), BTF_KIND_ENUM64, "enum64_kind"); + ASSERT_EQ(btf_vlen(t), 2, "enum64_vlen"); + ASSERT_EQ(t->size, 8, "enum64_sz"); + v64 = btf_enum64(t) + 0; + ASSERT_STREQ(btf__str_by_offset(btf, v64->name_off), "v1", "v1_name"); + ASSERT_EQ(v64->val_hi32, 0xffffffff, "v1_val"); + ASSERT_EQ(v64->val_lo32, 0xffffffff, "v1_val"); + v64 = btf_enum64(t) + 1; + ASSERT_STREQ(btf__str_by_offset(btf, v64->name_off), "v2", "v2_name"); + ASSERT_EQ(v64->val_hi32, 0x1, "v2_val"); + ASSERT_EQ(v64->val_lo32, 0x23456789, "v2_val"); + ASSERT_STREQ(btf_type_raw_dump(btf, 21), + "[21] ENUM64 'e1' encoding=SIGNED size=8 vlen=2\n" + "\t'v1' val=-1\n" + "\t'v2' val=4886718345", "raw_dump"); + + id = btf__add_enum64(btf, "e1", 8, false); + ASSERT_EQ(id, 22, "enum64_id"); + err = btf__add_enum64_value(btf, "v1", 0xffffffffFFFFFFFF); /* 18446744073709551615 */ + ASSERT_OK(err, "v1_res"); + t = btf__type_by_id(btf, 22); + ASSERT_STREQ(btf__str_by_offset(btf, t->name_off), "e1", "enum64_name"); + ASSERT_EQ(btf_kind(t), BTF_KIND_ENUM64, "enum64_kind"); + ASSERT_EQ(btf_vlen(t), 1, "enum64_vlen"); + ASSERT_EQ(t->size, 8, "enum64_sz"); + v64 = btf_enum64(t) + 0; + ASSERT_STREQ(btf__str_by_offset(btf, v64->name_off), "v1", "v1_name"); + ASSERT_EQ(v64->val_hi32, 0xffffffff, "v1_val"); + ASSERT_EQ(v64->val_lo32, 0xffffffff, "v1_val"); + ASSERT_STREQ(btf_type_raw_dump(btf, 22), + "[22] ENUM64 'e1' encoding=UNSIGNED size=8 vlen=1\n" + "\t'v1' val=18446744073709551615", "raw_dump"); } static void test_btf_add() @@ -332,12 +375,12 @@ static void test_btf_add() "\t'f2' type_id=1 bits_offset=32 bitfield_size=16", "[8] UNION 'u1' size=8 vlen=1\n" "\t'f1' type_id=1 bits_offset=0 bitfield_size=16", - "[9] ENUM 'e1' size=4 vlen=2\n" + "[9] ENUM 'e1' encoding=UNSIGNED size=4 vlen=2\n" "\t'v1' val=1\n" "\t'v2' val=2", "[10] FWD 'struct_fwd' fwd_kind=struct", "[11] FWD 'union_fwd' fwd_kind=union", - "[12] ENUM 'enum_fwd' size=4 vlen=0", + "[12] ENUM 'enum_fwd' encoding=UNSIGNED size=4 vlen=0", "[13] TYPEDEF 'typedef1' type_id=1", "[14] FUNC 'func1' type_id=15 linkage=global", "[15] FUNC_PROTO '(anon)' ret_type_id=1 vlen=2\n" @@ -348,7 +391,12 @@ static void test_btf_add() "\ttype_id=1 offset=4 size=8", "[18] DECL_TAG 'tag1' type_id=16 component_idx=-1", "[19] DECL_TAG 'tag2' type_id=14 component_idx=1", - "[20] TYPE_TAG 'tag1' type_id=1"); + "[20] TYPE_TAG 'tag1' type_id=1", + "[21] ENUM64 'e1' encoding=SIGNED size=8 vlen=2\n" + "\t'v1' val=-1\n" + "\t'v2' val=4886718345", + "[22] ENUM64 'e1' encoding=UNSIGNED size=8 vlen=1\n" + "\t'v1' val=18446744073709551615"); btf__free(btf); } @@ -370,7 +418,7 @@ static void test_btf_add_btf() gen_btf(btf2); id = btf__add_btf(btf1, btf2); - if (!ASSERT_EQ(id, 21, "id")) + if (!ASSERT_EQ(id, 23, "id")) goto cleanup; VALIDATE_RAW_BTF( @@ -386,12 +434,12 @@ static void test_btf_add_btf() "\t'f2' type_id=1 bits_offset=32 bitfield_size=16", "[8] UNION 'u1' size=8 vlen=1\n" "\t'f1' type_id=1 bits_offset=0 bitfield_size=16", - "[9] ENUM 'e1' size=4 vlen=2\n" + "[9] ENUM 'e1' encoding=UNSIGNED size=4 vlen=2\n" "\t'v1' val=1\n" "\t'v2' val=2", "[10] FWD 'struct_fwd' fwd_kind=struct", "[11] FWD 'union_fwd' fwd_kind=union", - "[12] ENUM 'enum_fwd' size=4 vlen=0", + "[12] ENUM 'enum_fwd' encoding=UNSIGNED size=4 vlen=0", "[13] TYPEDEF 'typedef1' type_id=1", "[14] FUNC 'func1' type_id=15 linkage=global", "[15] FUNC_PROTO '(anon)' ret_type_id=1 vlen=2\n" @@ -403,36 +451,46 @@ static void test_btf_add_btf() "[18] DECL_TAG 'tag1' type_id=16 component_idx=-1", "[19] DECL_TAG 'tag2' type_id=14 component_idx=1", "[20] TYPE_TAG 'tag1' type_id=1", + "[21] ENUM64 'e1' encoding=SIGNED size=8 vlen=2\n" + "\t'v1' val=-1\n" + "\t'v2' val=4886718345", + "[22] ENUM64 'e1' encoding=UNSIGNED size=8 vlen=1\n" + "\t'v1' val=18446744073709551615", /* types appended from the second BTF */ - "[21] INT 'int' size=4 bits_offset=0 nr_bits=32 encoding=SIGNED", - "[22] PTR '(anon)' type_id=21", - "[23] CONST '(anon)' type_id=25", - "[24] VOLATILE '(anon)' type_id=23", - "[25] RESTRICT '(anon)' type_id=24", - "[26] ARRAY '(anon)' type_id=22 index_type_id=21 nr_elems=10", - "[27] STRUCT 's1' size=8 vlen=2\n" - "\t'f1' type_id=21 bits_offset=0\n" - "\t'f2' type_id=21 bits_offset=32 bitfield_size=16", - "[28] UNION 'u1' size=8 vlen=1\n" - "\t'f1' type_id=21 bits_offset=0 bitfield_size=16", - "[29] ENUM 'e1' size=4 vlen=2\n" + "[23] INT 'int' size=4 bits_offset=0 nr_bits=32 encoding=SIGNED", + "[24] PTR '(anon)' type_id=23", + "[25] CONST '(anon)' type_id=27", + "[26] VOLATILE '(anon)' type_id=25", + "[27] RESTRICT '(anon)' type_id=26", + "[28] ARRAY '(anon)' type_id=24 index_type_id=23 nr_elems=10", + "[29] STRUCT 's1' size=8 vlen=2\n" + "\t'f1' type_id=23 bits_offset=0\n" + "\t'f2' type_id=23 bits_offset=32 bitfield_size=16", + "[30] UNION 'u1' size=8 vlen=1\n" + "\t'f1' type_id=23 bits_offset=0 bitfield_size=16", + "[31] ENUM 'e1' encoding=UNSIGNED size=4 vlen=2\n" "\t'v1' val=1\n" "\t'v2' val=2", - "[30] FWD 'struct_fwd' fwd_kind=struct", - "[31] FWD 'union_fwd' fwd_kind=union", - "[32] ENUM 'enum_fwd' size=4 vlen=0", - "[33] TYPEDEF 'typedef1' type_id=21", - "[34] FUNC 'func1' type_id=35 linkage=global", - "[35] FUNC_PROTO '(anon)' ret_type_id=21 vlen=2\n" - "\t'p1' type_id=21\n" - "\t'p2' type_id=22", - "[36] VAR 'var1' type_id=21, linkage=global-alloc", - "[37] DATASEC 'datasec1' size=12 vlen=1\n" - "\ttype_id=21 offset=4 size=8", - "[38] DECL_TAG 'tag1' type_id=36 component_idx=-1", - "[39] DECL_TAG 'tag2' type_id=34 component_idx=1", - "[40] TYPE_TAG 'tag1' type_id=21"); + "[32] FWD 'struct_fwd' fwd_kind=struct", + "[33] FWD 'union_fwd' fwd_kind=union", + "[34] ENUM 'enum_fwd' encoding=UNSIGNED size=4 vlen=0", + "[35] TYPEDEF 'typedef1' type_id=23", + "[36] FUNC 'func1' type_id=37 linkage=global", + "[37] FUNC_PROTO '(anon)' ret_type_id=23 vlen=2\n" + "\t'p1' type_id=23\n" + "\t'p2' type_id=24", + "[38] VAR 'var1' type_id=23, linkage=global-alloc", + "[39] DATASEC 'datasec1' size=12 vlen=1\n" + "\ttype_id=23 offset=4 size=8", + "[40] DECL_TAG 'tag1' type_id=38 component_idx=-1", + "[41] DECL_TAG 'tag2' type_id=36 component_idx=1", + "[42] TYPE_TAG 'tag1' type_id=23", + "[43] ENUM64 'e1' encoding=SIGNED size=8 vlen=2\n" + "\t'v1' val=-1\n" + "\t'v2' val=4886718345", + "[44] ENUM64 'e1' encoding=UNSIGNED size=8 vlen=1\n" + "\t'v1' val=18446744073709551615"); cleanup: btf__free(btf1); diff --git a/tools/testing/selftests/bpf/prog_tests/core_extern.c b/tools/testing/selftests/bpf/prog_tests/core_extern.c index 1931a158510e..63a51e9f3630 100644 --- a/tools/testing/selftests/bpf/prog_tests/core_extern.c +++ b/tools/testing/selftests/bpf/prog_tests/core_extern.c @@ -39,6 +39,7 @@ static struct test_case { "CONFIG_STR=\"abracad\"\n" "CONFIG_MISSING=0", .data = { + .unkn_virt_val = 0, .bpf_syscall = false, .tristate_val = TRI_MODULE, .bool_val = true, @@ -121,7 +122,7 @@ static struct test_case { void test_core_extern(void) { const uint32_t kern_ver = get_kernel_version(); - int err, duration = 0, i, j; + int err, i, j; struct test_core_extern *skel = NULL; uint64_t *got, *exp; int n = sizeof(*skel->data) / sizeof(uint64_t); @@ -136,19 +137,17 @@ void test_core_extern(void) continue; skel = test_core_extern__open_opts(&opts); - if (CHECK(!skel, "skel_open", "skeleton open failed\n")) + if (!ASSERT_OK_PTR(skel, "skel_open")) goto cleanup; err = test_core_extern__load(skel); if (t->fails) { - CHECK(!err, "skel_load", - "shouldn't succeed open/load of skeleton\n"); + ASSERT_ERR(err, "skel_load_should_fail"); goto cleanup; - } else if (CHECK(err, "skel_load", - "failed to open/load skeleton\n")) { + } else if (!ASSERT_OK(err, "skel_load")) { goto cleanup; } err = test_core_extern__attach(skel); - if (CHECK(err, "attach_raw_tp", "failed attach: %d\n", err)) + if (!ASSERT_OK(err, "attach_raw_tp")) goto cleanup; usleep(1); @@ -158,9 +157,7 @@ void test_core_extern(void) got = (uint64_t *)skel->data; exp = (uint64_t *)&t->data; for (j = 0; j < n; j++) { - CHECK(got[j] != exp[j], "check_res", - "result #%d: expected %llx, but got %llx\n", - j, (__u64)exp[j], (__u64)got[j]); + ASSERT_EQ(got[j], exp[j], "result"); } cleanup: test_core_extern__destroy(skel); diff --git a/tools/testing/selftests/bpf/prog_tests/core_reloc.c b/tools/testing/selftests/bpf/prog_tests/core_reloc.c index 3712dfe1be59..c8655ba9a88f 100644 --- a/tools/testing/selftests/bpf/prog_tests/core_reloc.c +++ b/tools/testing/selftests/bpf/prog_tests/core_reloc.c @@ -84,6 +84,7 @@ static int duration = 0; #define NESTING_ERR_CASE(name) { \ NESTING_CASE_COMMON(name), \ .fails = true, \ + .run_btfgen_fails = true, \ } #define ARRAYS_DATA(struct_name) STRUCT_TO_CHAR_PTR(struct_name) { \ @@ -258,12 +259,14 @@ static int duration = 0; BITFIELDS_CASE_COMMON("test_core_reloc_bitfields_probed.o", \ "probed:", name), \ .fails = true, \ + .run_btfgen_fails = true, \ .raw_tp_name = "sys_enter", \ .prog_name = "test_core_bitfields", \ }, { \ BITFIELDS_CASE_COMMON("test_core_reloc_bitfields_direct.o", \ "direct:", name), \ .fails = true, \ + .run_btfgen_fails = true, \ .prog_name = "test_core_bitfields_direct", \ } @@ -304,6 +307,7 @@ static int duration = 0; #define SIZE_ERR_CASE(name) { \ SIZE_CASE_COMMON(name), \ .fails = true, \ + .run_btfgen_fails = true, \ } #define TYPE_BASED_CASE_COMMON(name) \ @@ -363,6 +367,25 @@ static int duration = 0; .fails = true, \ } +#define ENUM64VAL_CASE_COMMON(name) \ + .case_name = #name, \ + .bpf_obj_file = "test_core_reloc_enum64val.o", \ + .btf_src_file = "btf__core_reloc_" #name ".o", \ + .raw_tp_name = "sys_enter", \ + .prog_name = "test_core_enum64val" + +#define ENUM64VAL_CASE(name, ...) { \ + ENUM64VAL_CASE_COMMON(name), \ + .output = STRUCT_TO_CHAR_PTR(core_reloc_enum64val_output) \ + __VA_ARGS__, \ + .output_len = sizeof(struct core_reloc_enum64val_output), \ +} + +#define ENUM64VAL_ERR_CASE(name) { \ + ENUM64VAL_CASE_COMMON(name), \ + .fails = true, \ +} + struct core_reloc_test_case; typedef int (*setup_test_fn)(struct core_reloc_test_case *test); @@ -377,6 +400,7 @@ struct core_reloc_test_case { const char *output; int output_len; bool fails; + bool run_btfgen_fails; bool needs_testmod; bool relaxed_core_relocs; const char *prog_name; @@ -519,7 +543,6 @@ static int __trigger_module_test_read(const struct core_reloc_test_case *test) return 0; } - static const struct core_reloc_test_case test_cases[] = { /* validate we can find kernel image and use its BTF for relocs */ { @@ -532,6 +555,7 @@ static const struct core_reloc_test_case test_cases[] = { .valid = { 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, }, .comm = "test_progs", .comm_len = sizeof("test_progs"), + .local_task_struct_matches = true, }, .output_len = sizeof(struct core_reloc_kernel_output), .raw_tp_name = "sys_enter", @@ -728,9 +752,10 @@ static const struct core_reloc_test_case test_cases[] = { SIZE_CASE(size___diff_offs), SIZE_ERR_CASE(size___err_ambiguous), - /* validate type existence and size relocations */ + /* validate type existence, match, and size relocations */ TYPE_BASED_CASE(type_based, { .struct_exists = 1, + .complex_struct_exists = 1, .union_exists = 1, .enum_exists = 1, .typedef_named_struct_exists = 1, @@ -739,8 +764,24 @@ static const struct core_reloc_test_case test_cases[] = { .typedef_int_exists = 1, .typedef_enum_exists = 1, .typedef_void_ptr_exists = 1, + .typedef_restrict_ptr_exists = 1, .typedef_func_proto_exists = 1, .typedef_arr_exists = 1, + + .struct_matches = 1, + .complex_struct_matches = 1, + .union_matches = 1, + .enum_matches = 1, + .typedef_named_struct_matches = 1, + .typedef_anon_struct_matches = 1, + .typedef_struct_ptr_matches = 1, + .typedef_int_matches = 1, + .typedef_enum_matches = 1, + .typedef_void_ptr_matches = 1, + .typedef_restrict_ptr_matches = 1, + .typedef_func_proto_matches = 1, + .typedef_arr_matches = 1, + .struct_sz = sizeof(struct a_struct), .union_sz = sizeof(union a_union), .enum_sz = sizeof(enum an_enum), @@ -756,6 +797,45 @@ static const struct core_reloc_test_case test_cases[] = { TYPE_BASED_CASE(type_based___all_missing, { /* all zeros */ }), + TYPE_BASED_CASE(type_based___diff, { + .struct_exists = 1, + .complex_struct_exists = 1, + .union_exists = 1, + .enum_exists = 1, + .typedef_named_struct_exists = 1, + .typedef_anon_struct_exists = 1, + .typedef_struct_ptr_exists = 1, + .typedef_int_exists = 1, + .typedef_enum_exists = 1, + .typedef_void_ptr_exists = 1, + .typedef_func_proto_exists = 1, + .typedef_arr_exists = 1, + + .struct_matches = 1, + .complex_struct_matches = 1, + .union_matches = 1, + .enum_matches = 1, + .typedef_named_struct_matches = 1, + .typedef_anon_struct_matches = 1, + .typedef_struct_ptr_matches = 1, + .typedef_int_matches = 0, + .typedef_enum_matches = 1, + .typedef_void_ptr_matches = 1, + .typedef_func_proto_matches = 0, + .typedef_arr_matches = 0, + + .struct_sz = sizeof(struct a_struct___diff), + .union_sz = sizeof(union a_union___diff), + .enum_sz = sizeof(enum an_enum___diff), + .typedef_named_struct_sz = sizeof(named_struct_typedef___diff), + .typedef_anon_struct_sz = sizeof(anon_struct_typedef___diff), + .typedef_struct_ptr_sz = sizeof(struct_ptr_typedef___diff), + .typedef_int_sz = sizeof(int_typedef___diff), + .typedef_enum_sz = sizeof(enum_typedef___diff), + .typedef_void_ptr_sz = sizeof(void_ptr_typedef___diff), + .typedef_func_proto_sz = sizeof(func_proto_typedef___diff), + .typedef_arr_sz = sizeof(arr_typedef___diff), + }), TYPE_BASED_CASE(type_based___diff_sz, { .struct_exists = 1, .union_exists = 1, @@ -768,6 +848,19 @@ static const struct core_reloc_test_case test_cases[] = { .typedef_void_ptr_exists = 1, .typedef_func_proto_exists = 1, .typedef_arr_exists = 1, + + .struct_matches = 0, + .union_matches = 0, + .enum_matches = 0, + .typedef_named_struct_matches = 0, + .typedef_anon_struct_matches = 0, + .typedef_struct_ptr_matches = 1, + .typedef_int_matches = 0, + .typedef_enum_matches = 0, + .typedef_void_ptr_matches = 1, + .typedef_func_proto_matches = 0, + .typedef_arr_matches = 0, + .struct_sz = sizeof(struct a_struct___diff_sz), .union_sz = sizeof(union a_union___diff_sz), .enum_sz = sizeof(enum an_enum___diff_sz), @@ -782,10 +875,12 @@ static const struct core_reloc_test_case test_cases[] = { }), TYPE_BASED_CASE(type_based___incompat, { .enum_exists = 1, + .enum_matches = 1, .enum_sz = sizeof(enum an_enum), }), TYPE_BASED_CASE(type_based___fn_wrong_args, { .struct_exists = 1, + .struct_matches = 1, .struct_sz = sizeof(struct a_struct), }), @@ -831,6 +926,45 @@ static const struct core_reloc_test_case test_cases[] = { .anon_val2 = 0x222, }), ENUMVAL_ERR_CASE(enumval___err_missing), + + /* 64bit enumerator value existence and value relocations */ + ENUM64VAL_CASE(enum64val, { + .unsigned_val1_exists = true, + .unsigned_val2_exists = true, + .unsigned_val3_exists = true, + .signed_val1_exists = true, + .signed_val2_exists = true, + .signed_val3_exists = true, + .unsigned_val1 = 0x1ffffffffULL, + .unsigned_val2 = 0x2, + .signed_val1 = 0x1ffffffffLL, + .signed_val2 = -2, + }), + ENUM64VAL_CASE(enum64val___diff, { + .unsigned_val1_exists = true, + .unsigned_val2_exists = true, + .unsigned_val3_exists = true, + .signed_val1_exists = true, + .signed_val2_exists = true, + .signed_val3_exists = true, + .unsigned_val1 = 0x101ffffffffULL, + .unsigned_val2 = 0x202ffffffffULL, + .signed_val1 = -101, + .signed_val2 = -202, + }), + ENUM64VAL_CASE(enum64val___val3_missing, { + .unsigned_val1_exists = true, + .unsigned_val2_exists = true, + .unsigned_val3_exists = false, + .signed_val1_exists = true, + .signed_val2_exists = true, + .signed_val3_exists = false, + .unsigned_val1 = 0x111ffffffffULL, + .unsigned_val2 = 0x222, + .signed_val1 = 0x111ffffffffLL, + .signed_val2 = -222, + }), + ENUM64VAL_ERR_CASE(enum64val___err_missing), }; struct data { @@ -894,7 +1028,7 @@ static void run_core_reloc_tests(bool use_btfgen) /* generate a "minimal" BTF file and use it as source */ if (use_btfgen) { - if (!test_case->btf_src_file || test_case->fails) { + if (!test_case->btf_src_file || test_case->run_btfgen_fails) { test__skip(); continue; } diff --git a/tools/testing/selftests/bpf/prog_tests/fexit_stress.c b/tools/testing/selftests/bpf/prog_tests/fexit_stress.c index a7e74297f15f..5a7e6011f6bf 100644 --- a/tools/testing/selftests/bpf/prog_tests/fexit_stress.c +++ b/tools/testing/selftests/bpf/prog_tests/fexit_stress.c @@ -7,11 +7,9 @@ void serial_test_fexit_stress(void) { - char test_skb[128] = {}; int fexit_fd[CNT] = {}; int link_fd[CNT] = {}; - char error[4096]; - int err, i, filter_fd; + int err, i; const struct bpf_insn trace_program[] = { BPF_MOV64_IMM(BPF_REG_0, 0), @@ -20,25 +18,9 @@ void serial_test_fexit_stress(void) LIBBPF_OPTS(bpf_prog_load_opts, trace_opts, .expected_attach_type = BPF_TRACE_FEXIT, - .log_buf = error, - .log_size = sizeof(error), ); - const struct bpf_insn skb_program[] = { - BPF_MOV64_IMM(BPF_REG_0, 0), - BPF_EXIT_INSN(), - }; - - LIBBPF_OPTS(bpf_prog_load_opts, skb_opts, - .log_buf = error, - .log_size = sizeof(error), - ); - - LIBBPF_OPTS(bpf_test_run_opts, topts, - .data_in = test_skb, - .data_size_in = sizeof(test_skb), - .repeat = 1, - ); + LIBBPF_OPTS(bpf_test_run_opts, topts); err = libbpf_find_vmlinux_btf_id("bpf_fentry_test1", trace_opts.expected_attach_type); @@ -58,15 +40,9 @@ void serial_test_fexit_stress(void) goto out; } - filter_fd = bpf_prog_load(BPF_PROG_TYPE_SOCKET_FILTER, NULL, "GPL", - skb_program, sizeof(skb_program) / sizeof(struct bpf_insn), - &skb_opts); - if (!ASSERT_GE(filter_fd, 0, "test_program_loaded")) - goto out; + err = bpf_prog_test_run_opts(fexit_fd[0], &topts); + ASSERT_OK(err, "bpf_prog_test_run_opts"); - err = bpf_prog_test_run_opts(filter_fd, &topts); - close(filter_fd); - CHECK_FAIL(err); out: for (i = 0; i < CNT; i++) { if (link_fd[i]) diff --git a/tools/testing/selftests/bpf/prog_tests/kprobe_multi_test.c b/tools/testing/selftests/bpf/prog_tests/kprobe_multi_test.c index 5b93d5d0bd93..d457a55ff408 100644 --- a/tools/testing/selftests/bpf/prog_tests/kprobe_multi_test.c +++ b/tools/testing/selftests/bpf/prog_tests/kprobe_multi_test.c @@ -329,7 +329,7 @@ static int get_syms(char ***symsp, size_t *cntp) struct hashmap *map; char buf[256]; FILE *f; - int err; + int err = 0; /* * The available_filter_functions contains many duplicates, @@ -364,6 +364,8 @@ static int get_syms(char ***symsp, size_t *cntp) continue; if (!strncmp(name, "rcu_", 4)) continue; + if (!strcmp(name, "bpf_dispatcher_xdp_func")) + continue; if (!strncmp(name, "__ftrace_invalid_address__", sizeof("__ftrace_invalid_address__") - 1)) continue; @@ -407,7 +409,7 @@ static void test_bench_attach(void) double attach_delta, detach_delta; struct bpf_link *link = NULL; char **syms = NULL; - size_t cnt, i; + size_t cnt = 0, i; if (!ASSERT_OK(get_syms(&syms, &cnt), "get_syms")) return; diff --git a/tools/testing/selftests/bpf/prog_tests/libbpf_str.c b/tools/testing/selftests/bpf/prog_tests/libbpf_str.c new file mode 100644 index 000000000000..93e9cddaadcf --- /dev/null +++ b/tools/testing/selftests/bpf/prog_tests/libbpf_str.c @@ -0,0 +1,207 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2022 Meta Platforms, Inc. and affiliates. */ + +#include <ctype.h> +#include <test_progs.h> +#include <bpf/btf.h> + +/* + * Utility function uppercasing an entire string. + */ +static void uppercase(char *s) +{ + for (; *s != '\0'; s++) + *s = toupper(*s); +} + +/* + * Test case to check that all bpf_attach_type variants are covered by + * libbpf_bpf_attach_type_str. + */ +static void test_libbpf_bpf_attach_type_str(void) +{ + struct btf *btf; + const struct btf_type *t; + const struct btf_enum *e; + int i, n, id; + + btf = btf__parse("/sys/kernel/btf/vmlinux", NULL); + if (!ASSERT_OK_PTR(btf, "btf_parse")) + return; + + /* find enum bpf_attach_type and enumerate each value */ + id = btf__find_by_name_kind(btf, "bpf_attach_type", BTF_KIND_ENUM); + if (!ASSERT_GT(id, 0, "bpf_attach_type_id")) + goto cleanup; + t = btf__type_by_id(btf, id); + e = btf_enum(t); + n = btf_vlen(t); + for (i = 0; i < n; e++, i++) { + enum bpf_attach_type attach_type = (enum bpf_attach_type)e->val; + const char *attach_type_name; + const char *attach_type_str; + char buf[256]; + + if (attach_type == __MAX_BPF_ATTACH_TYPE) + continue; + + attach_type_name = btf__str_by_offset(btf, e->name_off); + attach_type_str = libbpf_bpf_attach_type_str(attach_type); + ASSERT_OK_PTR(attach_type_str, attach_type_name); + + snprintf(buf, sizeof(buf), "BPF_%s", attach_type_str); + uppercase(buf); + + ASSERT_STREQ(buf, attach_type_name, "exp_str_value"); + } + +cleanup: + btf__free(btf); +} + +/* + * Test case to check that all bpf_link_type variants are covered by + * libbpf_bpf_link_type_str. + */ +static void test_libbpf_bpf_link_type_str(void) +{ + struct btf *btf; + const struct btf_type *t; + const struct btf_enum *e; + int i, n, id; + + btf = btf__parse("/sys/kernel/btf/vmlinux", NULL); + if (!ASSERT_OK_PTR(btf, "btf_parse")) + return; + + /* find enum bpf_link_type and enumerate each value */ + id = btf__find_by_name_kind(btf, "bpf_link_type", BTF_KIND_ENUM); + if (!ASSERT_GT(id, 0, "bpf_link_type_id")) + goto cleanup; + t = btf__type_by_id(btf, id); + e = btf_enum(t); + n = btf_vlen(t); + for (i = 0; i < n; e++, i++) { + enum bpf_link_type link_type = (enum bpf_link_type)e->val; + const char *link_type_name; + const char *link_type_str; + char buf[256]; + + if (link_type == MAX_BPF_LINK_TYPE) + continue; + + link_type_name = btf__str_by_offset(btf, e->name_off); + link_type_str = libbpf_bpf_link_type_str(link_type); + ASSERT_OK_PTR(link_type_str, link_type_name); + + snprintf(buf, sizeof(buf), "BPF_LINK_TYPE_%s", link_type_str); + uppercase(buf); + + ASSERT_STREQ(buf, link_type_name, "exp_str_value"); + } + +cleanup: + btf__free(btf); +} + +/* + * Test case to check that all bpf_map_type variants are covered by + * libbpf_bpf_map_type_str. + */ +static void test_libbpf_bpf_map_type_str(void) +{ + struct btf *btf; + const struct btf_type *t; + const struct btf_enum *e; + int i, n, id; + + btf = btf__parse("/sys/kernel/btf/vmlinux", NULL); + if (!ASSERT_OK_PTR(btf, "btf_parse")) + return; + + /* find enum bpf_map_type and enumerate each value */ + id = btf__find_by_name_kind(btf, "bpf_map_type", BTF_KIND_ENUM); + if (!ASSERT_GT(id, 0, "bpf_map_type_id")) + goto cleanup; + t = btf__type_by_id(btf, id); + e = btf_enum(t); + n = btf_vlen(t); + for (i = 0; i < n; e++, i++) { + enum bpf_map_type map_type = (enum bpf_map_type)e->val; + const char *map_type_name; + const char *map_type_str; + char buf[256]; + + map_type_name = btf__str_by_offset(btf, e->name_off); + map_type_str = libbpf_bpf_map_type_str(map_type); + ASSERT_OK_PTR(map_type_str, map_type_name); + + snprintf(buf, sizeof(buf), "BPF_MAP_TYPE_%s", map_type_str); + uppercase(buf); + + ASSERT_STREQ(buf, map_type_name, "exp_str_value"); + } + +cleanup: + btf__free(btf); +} + +/* + * Test case to check that all bpf_prog_type variants are covered by + * libbpf_bpf_prog_type_str. + */ +static void test_libbpf_bpf_prog_type_str(void) +{ + struct btf *btf; + const struct btf_type *t; + const struct btf_enum *e; + int i, n, id; + + btf = btf__parse("/sys/kernel/btf/vmlinux", NULL); + if (!ASSERT_OK_PTR(btf, "btf_parse")) + return; + + /* find enum bpf_prog_type and enumerate each value */ + id = btf__find_by_name_kind(btf, "bpf_prog_type", BTF_KIND_ENUM); + if (!ASSERT_GT(id, 0, "bpf_prog_type_id")) + goto cleanup; + t = btf__type_by_id(btf, id); + e = btf_enum(t); + n = btf_vlen(t); + for (i = 0; i < n; e++, i++) { + enum bpf_prog_type prog_type = (enum bpf_prog_type)e->val; + const char *prog_type_name; + const char *prog_type_str; + char buf[256]; + + prog_type_name = btf__str_by_offset(btf, e->name_off); + prog_type_str = libbpf_bpf_prog_type_str(prog_type); + ASSERT_OK_PTR(prog_type_str, prog_type_name); + + snprintf(buf, sizeof(buf), "BPF_PROG_TYPE_%s", prog_type_str); + uppercase(buf); + + ASSERT_STREQ(buf, prog_type_name, "exp_str_value"); + } + +cleanup: + btf__free(btf); +} + +/* + * Run all libbpf str conversion tests. + */ +void test_libbpf_str(void) +{ + if (test__start_subtest("bpf_attach_type_str")) + test_libbpf_bpf_attach_type_str(); + + if (test__start_subtest("bpf_link_type_str")) + test_libbpf_bpf_link_type_str(); + + if (test__start_subtest("bpf_map_type_str")) + test_libbpf_bpf_map_type_str(); + + if (test__start_subtest("bpf_prog_type_str")) + test_libbpf_bpf_prog_type_str(); +} diff --git a/tools/testing/selftests/bpf/prog_tests/lsm_cgroup.c b/tools/testing/selftests/bpf/prog_tests/lsm_cgroup.c new file mode 100644 index 000000000000..1102e4f42d2d --- /dev/null +++ b/tools/testing/selftests/bpf/prog_tests/lsm_cgroup.c @@ -0,0 +1,313 @@ +// SPDX-License-Identifier: GPL-2.0 + +#include <sys/types.h> +#include <sys/socket.h> +#include <test_progs.h> +#include <bpf/btf.h> + +#include "lsm_cgroup.skel.h" +#include "lsm_cgroup_nonvoid.skel.h" +#include "cgroup_helpers.h" +#include "network_helpers.h" + +#ifndef ENOTSUPP +#define ENOTSUPP 524 +#endif + +static struct btf *btf; + +static __u32 query_prog_cnt(int cgroup_fd, const char *attach_func) +{ + LIBBPF_OPTS(bpf_prog_query_opts, p); + int cnt = 0; + int i; + + ASSERT_OK(bpf_prog_query_opts(cgroup_fd, BPF_LSM_CGROUP, &p), "prog_query"); + + if (!attach_func) + return p.prog_cnt; + + /* When attach_func is provided, count the number of progs that + * attach to the given symbol. + */ + + if (!btf) + btf = btf__load_vmlinux_btf(); + if (!ASSERT_OK(libbpf_get_error(btf), "btf_vmlinux")) + return -1; + + p.prog_ids = malloc(sizeof(u32) * p.prog_cnt); + p.prog_attach_flags = malloc(sizeof(u32) * p.prog_cnt); + ASSERT_OK(bpf_prog_query_opts(cgroup_fd, BPF_LSM_CGROUP, &p), "prog_query"); + + for (i = 0; i < p.prog_cnt; i++) { + struct bpf_prog_info info = {}; + __u32 info_len = sizeof(info); + int fd; + + fd = bpf_prog_get_fd_by_id(p.prog_ids[i]); + ASSERT_GE(fd, 0, "prog_get_fd_by_id"); + ASSERT_OK(bpf_obj_get_info_by_fd(fd, &info, &info_len), "prog_info_by_fd"); + close(fd); + + if (info.attach_btf_id == + btf__find_by_name_kind(btf, attach_func, BTF_KIND_FUNC)) + cnt++; + } + + free(p.prog_ids); + free(p.prog_attach_flags); + + return cnt; +} + +static void test_lsm_cgroup_functional(void) +{ + DECLARE_LIBBPF_OPTS(bpf_prog_attach_opts, attach_opts); + DECLARE_LIBBPF_OPTS(bpf_link_update_opts, update_opts); + int cgroup_fd = -1, cgroup_fd2 = -1, cgroup_fd3 = -1; + int listen_fd, client_fd, accepted_fd; + struct lsm_cgroup *skel = NULL; + int post_create_prog_fd2 = -1; + int post_create_prog_fd = -1; + int bind_link_fd2 = -1; + int bind_prog_fd2 = -1; + int alloc_prog_fd = -1; + int bind_prog_fd = -1; + int bind_link_fd = -1; + int clone_prog_fd = -1; + int err, fd, prio; + socklen_t socklen; + + cgroup_fd3 = test__join_cgroup("/sock_policy_empty"); + if (!ASSERT_GE(cgroup_fd3, 0, "create empty cgroup")) + goto close_cgroup; + + cgroup_fd2 = test__join_cgroup("/sock_policy_reuse"); + if (!ASSERT_GE(cgroup_fd2, 0, "create cgroup for reuse")) + goto close_cgroup; + + cgroup_fd = test__join_cgroup("/sock_policy"); + if (!ASSERT_GE(cgroup_fd, 0, "join_cgroup")) + goto close_cgroup; + + skel = lsm_cgroup__open_and_load(); + if (!ASSERT_OK_PTR(skel, "open_and_load")) + goto close_cgroup; + + post_create_prog_fd = bpf_program__fd(skel->progs.socket_post_create); + post_create_prog_fd2 = bpf_program__fd(skel->progs.socket_post_create2); + bind_prog_fd = bpf_program__fd(skel->progs.socket_bind); + bind_prog_fd2 = bpf_program__fd(skel->progs.socket_bind2); + alloc_prog_fd = bpf_program__fd(skel->progs.socket_alloc); + clone_prog_fd = bpf_program__fd(skel->progs.socket_clone); + + ASSERT_EQ(query_prog_cnt(cgroup_fd, "bpf_lsm_sk_alloc_security"), 0, "prog count"); + ASSERT_EQ(query_prog_cnt(cgroup_fd, NULL), 0, "total prog count"); + err = bpf_prog_attach(alloc_prog_fd, cgroup_fd, BPF_LSM_CGROUP, 0); + if (err == -ENOTSUPP) { + test__skip(); + goto close_cgroup; + } + if (!ASSERT_OK(err, "attach alloc_prog_fd")) + goto detach_cgroup; + ASSERT_EQ(query_prog_cnt(cgroup_fd, "bpf_lsm_sk_alloc_security"), 1, "prog count"); + ASSERT_EQ(query_prog_cnt(cgroup_fd, NULL), 1, "total prog count"); + + ASSERT_EQ(query_prog_cnt(cgroup_fd, "bpf_lsm_inet_csk_clone"), 0, "prog count"); + err = bpf_prog_attach(clone_prog_fd, cgroup_fd, BPF_LSM_CGROUP, 0); + if (!ASSERT_OK(err, "attach clone_prog_fd")) + goto detach_cgroup; + ASSERT_EQ(query_prog_cnt(cgroup_fd, "bpf_lsm_inet_csk_clone"), 1, "prog count"); + ASSERT_EQ(query_prog_cnt(cgroup_fd, NULL), 2, "total prog count"); + + /* Make sure replacing works. */ + + ASSERT_EQ(query_prog_cnt(cgroup_fd, "bpf_lsm_socket_post_create"), 0, "prog count"); + err = bpf_prog_attach(post_create_prog_fd, cgroup_fd, + BPF_LSM_CGROUP, 0); + if (!ASSERT_OK(err, "attach post_create_prog_fd")) + goto detach_cgroup; + ASSERT_EQ(query_prog_cnt(cgroup_fd, "bpf_lsm_socket_post_create"), 1, "prog count"); + ASSERT_EQ(query_prog_cnt(cgroup_fd, NULL), 3, "total prog count"); + + attach_opts.replace_prog_fd = post_create_prog_fd; + err = bpf_prog_attach_opts(post_create_prog_fd2, cgroup_fd, + BPF_LSM_CGROUP, &attach_opts); + if (!ASSERT_OK(err, "prog replace post_create_prog_fd")) + goto detach_cgroup; + ASSERT_EQ(query_prog_cnt(cgroup_fd, "bpf_lsm_socket_post_create"), 1, "prog count"); + ASSERT_EQ(query_prog_cnt(cgroup_fd, NULL), 3, "total prog count"); + + /* Try the same attach/replace via link API. */ + + ASSERT_EQ(query_prog_cnt(cgroup_fd, "bpf_lsm_socket_bind"), 0, "prog count"); + bind_link_fd = bpf_link_create(bind_prog_fd, cgroup_fd, + BPF_LSM_CGROUP, NULL); + if (!ASSERT_GE(bind_link_fd, 0, "link create bind_prog_fd")) + goto detach_cgroup; + ASSERT_EQ(query_prog_cnt(cgroup_fd, "bpf_lsm_socket_bind"), 1, "prog count"); + ASSERT_EQ(query_prog_cnt(cgroup_fd, NULL), 4, "total prog count"); + + update_opts.old_prog_fd = bind_prog_fd; + update_opts.flags = BPF_F_REPLACE; + + err = bpf_link_update(bind_link_fd, bind_prog_fd2, &update_opts); + if (!ASSERT_OK(err, "link update bind_prog_fd")) + goto detach_cgroup; + ASSERT_EQ(query_prog_cnt(cgroup_fd, "bpf_lsm_socket_bind"), 1, "prog count"); + ASSERT_EQ(query_prog_cnt(cgroup_fd, NULL), 4, "total prog count"); + + /* Attach another instance of bind program to another cgroup. + * This should trigger the reuse of the trampoline shim (two + * programs attaching to the same btf_id). + */ + + ASSERT_EQ(query_prog_cnt(cgroup_fd, "bpf_lsm_socket_bind"), 1, "prog count"); + ASSERT_EQ(query_prog_cnt(cgroup_fd2, "bpf_lsm_socket_bind"), 0, "prog count"); + bind_link_fd2 = bpf_link_create(bind_prog_fd2, cgroup_fd2, + BPF_LSM_CGROUP, NULL); + if (!ASSERT_GE(bind_link_fd2, 0, "link create bind_prog_fd2")) + goto detach_cgroup; + ASSERT_EQ(query_prog_cnt(cgroup_fd2, "bpf_lsm_socket_bind"), 1, "prog count"); + ASSERT_EQ(query_prog_cnt(cgroup_fd, NULL), 4, "total prog count"); + ASSERT_EQ(query_prog_cnt(cgroup_fd2, NULL), 1, "total prog count"); + + /* AF_UNIX is prohibited. */ + + fd = socket(AF_UNIX, SOCK_STREAM, 0); + ASSERT_LT(fd, 0, "socket(AF_UNIX)"); + close(fd); + + /* AF_INET6 gets default policy (sk_priority). */ + + fd = socket(AF_INET6, SOCK_STREAM, 0); + if (!ASSERT_GE(fd, 0, "socket(SOCK_STREAM)")) + goto detach_cgroup; + + prio = 0; + socklen = sizeof(prio); + ASSERT_GE(getsockopt(fd, SOL_SOCKET, SO_PRIORITY, &prio, &socklen), 0, + "getsockopt"); + ASSERT_EQ(prio, 123, "sk_priority"); + + close(fd); + + /* TX-only AF_PACKET is allowed. */ + + ASSERT_LT(socket(AF_PACKET, SOCK_RAW, htons(ETH_P_ALL)), 0, + "socket(AF_PACKET, ..., ETH_P_ALL)"); + + fd = socket(AF_PACKET, SOCK_RAW, 0); + ASSERT_GE(fd, 0, "socket(AF_PACKET, ..., 0)"); + + /* TX-only AF_PACKET can not be rebound. */ + + struct sockaddr_ll sa = { + .sll_family = AF_PACKET, + .sll_protocol = htons(ETH_P_ALL), + }; + ASSERT_LT(bind(fd, (struct sockaddr *)&sa, sizeof(sa)), 0, + "bind(ETH_P_ALL)"); + + close(fd); + + /* Trigger passive open. */ + + listen_fd = start_server(AF_INET6, SOCK_STREAM, "::1", 0, 0); + ASSERT_GE(listen_fd, 0, "start_server"); + client_fd = connect_to_fd(listen_fd, 0); + ASSERT_GE(client_fd, 0, "connect_to_fd"); + accepted_fd = accept(listen_fd, NULL, NULL); + ASSERT_GE(accepted_fd, 0, "accept"); + + prio = 0; + socklen = sizeof(prio); + ASSERT_GE(getsockopt(accepted_fd, SOL_SOCKET, SO_PRIORITY, &prio, &socklen), 0, + "getsockopt"); + ASSERT_EQ(prio, 234, "sk_priority"); + + /* These are replaced and never called. */ + ASSERT_EQ(skel->bss->called_socket_post_create, 0, "called_create"); + ASSERT_EQ(skel->bss->called_socket_bind, 0, "called_bind"); + + /* AF_INET6+SOCK_STREAM + * AF_PACKET+SOCK_RAW + * listen_fd + * client_fd + * accepted_fd + */ + ASSERT_EQ(skel->bss->called_socket_post_create2, 5, "called_create2"); + + /* start_server + * bind(ETH_P_ALL) + */ + ASSERT_EQ(skel->bss->called_socket_bind2, 2, "called_bind2"); + /* Single accept(). */ + ASSERT_EQ(skel->bss->called_socket_clone, 1, "called_clone"); + + /* AF_UNIX+SOCK_STREAM (failed) + * AF_INET6+SOCK_STREAM + * AF_PACKET+SOCK_RAW (failed) + * AF_PACKET+SOCK_RAW + * listen_fd + * client_fd + * accepted_fd + */ + ASSERT_EQ(skel->bss->called_socket_alloc, 7, "called_alloc"); + + close(listen_fd); + close(client_fd); + close(accepted_fd); + + /* Make sure other cgroup doesn't trigger the programs. */ + + if (!ASSERT_OK(join_cgroup("/sock_policy_empty"), "join root cgroup")) + goto detach_cgroup; + + fd = socket(AF_INET6, SOCK_STREAM, 0); + if (!ASSERT_GE(fd, 0, "socket(SOCK_STREAM)")) + goto detach_cgroup; + + prio = 0; + socklen = sizeof(prio); + ASSERT_GE(getsockopt(fd, SOL_SOCKET, SO_PRIORITY, &prio, &socklen), 0, + "getsockopt"); + ASSERT_EQ(prio, 0, "sk_priority"); + + close(fd); + +detach_cgroup: + ASSERT_GE(bpf_prog_detach2(post_create_prog_fd2, cgroup_fd, + BPF_LSM_CGROUP), 0, "detach_create"); + close(bind_link_fd); + /* Don't close bind_link_fd2, exercise cgroup release cleanup. */ + ASSERT_GE(bpf_prog_detach2(alloc_prog_fd, cgroup_fd, + BPF_LSM_CGROUP), 0, "detach_alloc"); + ASSERT_GE(bpf_prog_detach2(clone_prog_fd, cgroup_fd, + BPF_LSM_CGROUP), 0, "detach_clone"); + +close_cgroup: + close(cgroup_fd); + close(cgroup_fd2); + close(cgroup_fd3); + lsm_cgroup__destroy(skel); +} + +static void test_lsm_cgroup_nonvoid(void) +{ + struct lsm_cgroup_nonvoid *skel = NULL; + + skel = lsm_cgroup_nonvoid__open_and_load(); + ASSERT_NULL(skel, "open succeeds"); + lsm_cgroup_nonvoid__destroy(skel); +} + +void test_lsm_cgroup(void) +{ + if (test__start_subtest("functional")) + test_lsm_cgroup_functional(); + if (test__start_subtest("nonvoid")) + test_lsm_cgroup_nonvoid(); + btf__free(btf); +} diff --git a/tools/testing/selftests/bpf/prog_tests/probe_user.c b/tools/testing/selftests/bpf/prog_tests/probe_user.c index abf890d066eb..34dbd2adc157 100644 --- a/tools/testing/selftests/bpf/prog_tests/probe_user.c +++ b/tools/testing/selftests/bpf/prog_tests/probe_user.c @@ -4,25 +4,35 @@ /* TODO: corrupts other tests uses connect() */ void serial_test_probe_user(void) { - const char *prog_name = "handle_sys_connect"; + static const char *const prog_names[] = { + "handle_sys_connect", +#if defined(__s390x__) + "handle_sys_socketcall", +#endif + }; + enum { prog_count = ARRAY_SIZE(prog_names) }; const char *obj_file = "./test_probe_user.o"; DECLARE_LIBBPF_OPTS(bpf_object_open_opts, opts, ); int err, results_map_fd, sock_fd, duration = 0; struct sockaddr curr, orig, tmp; struct sockaddr_in *in = (struct sockaddr_in *)&curr; - struct bpf_link *kprobe_link = NULL; - struct bpf_program *kprobe_prog; + struct bpf_link *kprobe_links[prog_count] = {}; + struct bpf_program *kprobe_progs[prog_count]; struct bpf_object *obj; static const int zero = 0; + size_t i; obj = bpf_object__open_file(obj_file, &opts); if (!ASSERT_OK_PTR(obj, "obj_open_file")) return; - kprobe_prog = bpf_object__find_program_by_name(obj, prog_name); - if (CHECK(!kprobe_prog, "find_probe", - "prog '%s' not found\n", prog_name)) - goto cleanup; + for (i = 0; i < prog_count; i++) { + kprobe_progs[i] = + bpf_object__find_program_by_name(obj, prog_names[i]); + if (CHECK(!kprobe_progs[i], "find_probe", + "prog '%s' not found\n", prog_names[i])) + goto cleanup; + } err = bpf_object__load(obj); if (CHECK(err, "obj_load", "err %d\n", err)) @@ -33,9 +43,11 @@ void serial_test_probe_user(void) "err %d\n", results_map_fd)) goto cleanup; - kprobe_link = bpf_program__attach(kprobe_prog); - if (!ASSERT_OK_PTR(kprobe_link, "attach_kprobe")) - goto cleanup; + for (i = 0; i < prog_count; i++) { + kprobe_links[i] = bpf_program__attach(kprobe_progs[i]); + if (!ASSERT_OK_PTR(kprobe_links[i], "attach_kprobe")) + goto cleanup; + } memset(&curr, 0, sizeof(curr)); in->sin_family = AF_INET; @@ -69,6 +81,7 @@ void serial_test_probe_user(void) inet_ntoa(in->sin_addr), ntohs(in->sin_port))) goto cleanup; cleanup: - bpf_link__destroy(kprobe_link); + for (i = 0; i < prog_count; i++) + bpf_link__destroy(kprobe_links[i]); bpf_object__close(obj); } diff --git a/tools/testing/selftests/bpf/prog_tests/resolve_btfids.c b/tools/testing/selftests/bpf/prog_tests/resolve_btfids.c index f4a13d9dd5c8..c197261d02e2 100644 --- a/tools/testing/selftests/bpf/prog_tests/resolve_btfids.c +++ b/tools/testing/selftests/bpf/prog_tests/resolve_btfids.c @@ -44,7 +44,7 @@ BTF_ID(union, U) BTF_ID(func, func) extern __u32 test_list_global[]; -BTF_ID_LIST_GLOBAL(test_list_global) +BTF_ID_LIST_GLOBAL(test_list_global, 1) BTF_ID_UNUSED BTF_ID(typedef, S) BTF_ID(typedef, T) diff --git a/tools/testing/selftests/bpf/prog_tests/ringbuf_multi.c b/tools/testing/selftests/bpf/prog_tests/ringbuf_multi.c index eb5f7f5aa81a..1455911d9fcb 100644 --- a/tools/testing/selftests/bpf/prog_tests/ringbuf_multi.c +++ b/tools/testing/selftests/bpf/prog_tests/ringbuf_multi.c @@ -50,6 +50,13 @@ void test_ringbuf_multi(void) if (CHECK(!skel, "skel_open", "skeleton open failed\n")) return; + /* validate ringbuf size adjustment logic */ + ASSERT_EQ(bpf_map__max_entries(skel->maps.ringbuf1), page_size, "rb1_size_before"); + ASSERT_OK(bpf_map__set_max_entries(skel->maps.ringbuf1, page_size + 1), "rb1_resize"); + ASSERT_EQ(bpf_map__max_entries(skel->maps.ringbuf1), 2 * page_size, "rb1_size_after"); + ASSERT_OK(bpf_map__set_max_entries(skel->maps.ringbuf1, page_size), "rb1_reset"); + ASSERT_EQ(bpf_map__max_entries(skel->maps.ringbuf1), page_size, "rb1_size_final"); + proto_fd = bpf_map_create(BPF_MAP_TYPE_RINGBUF, NULL, 0, 0, page_size, NULL); if (CHECK(proto_fd < 0, "bpf_map_create", "bpf_map_create failed\n")) goto cleanup; @@ -65,6 +72,10 @@ void test_ringbuf_multi(void) close(proto_fd); proto_fd = -1; + /* make sure we can't resize ringbuf after object load */ + if (!ASSERT_ERR(bpf_map__set_max_entries(skel->maps.ringbuf1, 3 * page_size), "rb1_resize_after_load")) + goto cleanup; + /* only trigger BPF program for current process */ skel->bss->pid = getpid(); diff --git a/tools/testing/selftests/bpf/prog_tests/send_signal.c b/tools/testing/selftests/bpf/prog_tests/send_signal.c index d71226e34c34..d63a20fbed33 100644 --- a/tools/testing/selftests/bpf/prog_tests/send_signal.c +++ b/tools/testing/selftests/bpf/prog_tests/send_signal.c @@ -64,7 +64,7 @@ static void test_send_signal_common(struct perf_event_attr *attr, ASSERT_EQ(read(pipe_p2c[0], buf, 1), 1, "pipe_read"); /* wait a little for signal handler */ - for (int i = 0; i < 100000000 && !sigusr1_received; i++) + for (int i = 0; i < 1000000000 && !sigusr1_received; i++) j /= i + j + 1; buf[0] = sigusr1_received ? '2' : '0'; diff --git a/tools/testing/selftests/bpf/prog_tests/skeleton.c b/tools/testing/selftests/bpf/prog_tests/skeleton.c index 180afd632f4c..99dac5292b41 100644 --- a/tools/testing/selftests/bpf/prog_tests/skeleton.c +++ b/tools/testing/selftests/bpf/prog_tests/skeleton.c @@ -122,6 +122,8 @@ void test_skeleton(void) ASSERT_EQ(skel->bss->out_mostly_var, 123, "out_mostly_var"); + ASSERT_EQ(bss->huge_arr[ARRAY_SIZE(bss->huge_arr) - 1], 123, "huge_arr"); + elf_bytes = test_skeleton__elf_bytes(&elf_bytes_sz); ASSERT_OK_PTR(elf_bytes, "elf_bytes"); ASSERT_GE(elf_bytes_sz, 0, "elf_bytes_sz"); diff --git a/tools/testing/selftests/bpf/prog_tests/sock_fields.c b/tools/testing/selftests/bpf/prog_tests/sock_fields.c index 9d211b5c22c4..7d23166c77af 100644 --- a/tools/testing/selftests/bpf/prog_tests/sock_fields.c +++ b/tools/testing/selftests/bpf/prog_tests/sock_fields.c @@ -394,7 +394,6 @@ void serial_test_sock_fields(void) test(); done: - test_sock_fields__detach(skel); test_sock_fields__destroy(skel); if (child_cg_fd >= 0) close(child_cg_fd); diff --git a/tools/testing/selftests/bpf/prog_tests/tc_redirect.c b/tools/testing/selftests/bpf/prog_tests/tc_redirect.c index 958dae769c52..cb6a53b3e023 100644 --- a/tools/testing/selftests/bpf/prog_tests/tc_redirect.c +++ b/tools/testing/selftests/bpf/prog_tests/tc_redirect.c @@ -646,7 +646,7 @@ static void test_tcp_clear_dtime(struct test_tc_dtime *skel) __u32 *errs = skel->bss->errs[t]; skel->bss->test = t; - test_inet_dtime(AF_INET6, SOCK_STREAM, IP6_DST, 0); + test_inet_dtime(AF_INET6, SOCK_STREAM, IP6_DST, 50000 + t); ASSERT_EQ(dtimes[INGRESS_FWDNS_P100], 0, dtime_cnt_str(t, INGRESS_FWDNS_P100)); @@ -683,7 +683,7 @@ static void test_tcp_dtime(struct test_tc_dtime *skel, int family, bool bpf_fwd) errs = skel->bss->errs[t]; skel->bss->test = t; - test_inet_dtime(family, SOCK_STREAM, addr, 0); + test_inet_dtime(family, SOCK_STREAM, addr, 50000 + t); /* fwdns_prio100 prog does not read delivery_time_type, so * kernel puts the (rcv) timetamp in __sk_buff->tstamp @@ -715,13 +715,13 @@ static void test_udp_dtime(struct test_tc_dtime *skel, int family, bool bpf_fwd) errs = skel->bss->errs[t]; skel->bss->test = t; - test_inet_dtime(family, SOCK_DGRAM, addr, 0); + test_inet_dtime(family, SOCK_DGRAM, addr, 50000 + t); ASSERT_EQ(dtimes[INGRESS_FWDNS_P100], 0, dtime_cnt_str(t, INGRESS_FWDNS_P100)); /* non mono delivery time is not forwarded */ ASSERT_EQ(dtimes[INGRESS_FWDNS_P101], 0, - dtime_cnt_str(t, INGRESS_FWDNS_P100)); + dtime_cnt_str(t, INGRESS_FWDNS_P101)); for (i = EGRESS_FWDNS_P100; i < SET_DTIME; i++) ASSERT_GT(dtimes[i], 0, dtime_cnt_str(t, i)); diff --git a/tools/testing/selftests/bpf/prog_tests/test_tunnel.c b/tools/testing/selftests/bpf/prog_tests/test_tunnel.c index 3bba4a2a0530..eea274110267 100644 --- a/tools/testing/selftests/bpf/prog_tests/test_tunnel.c +++ b/tools/testing/selftests/bpf/prog_tests/test_tunnel.c @@ -82,6 +82,7 @@ #define MAC_TUNL_DEV0 "52:54:00:d9:01:00" #define MAC_TUNL_DEV1 "52:54:00:d9:02:00" +#define MAC_VETH1 "52:54:00:d9:03:00" #define VXLAN_TUNL_DEV0 "vxlan00" #define VXLAN_TUNL_DEV1 "vxlan11" @@ -108,10 +109,9 @@ static int config_device(void) { SYS("ip netns add at_ns0"); - SYS("ip link add veth0 type veth peer name veth1"); + SYS("ip link add veth0 address " MAC_VETH1 " type veth peer name veth1"); SYS("ip link set veth0 netns at_ns0"); SYS("ip addr add " IP4_ADDR1_VETH1 "/24 dev veth1"); - SYS("ip addr add " IP4_ADDR2_VETH1 "/24 dev veth1"); SYS("ip link set dev veth1 up mtu 1500"); SYS("ip netns exec at_ns0 ip addr add " IP4_ADDR_VETH0 "/24 dev veth0"); SYS("ip netns exec at_ns0 ip link set dev veth0 up mtu 1500"); @@ -140,6 +140,8 @@ static int add_vxlan_tunnel(void) VXLAN_TUNL_DEV0, IP4_ADDR_TUNL_DEV0); SYS("ip netns exec at_ns0 ip neigh add %s lladdr %s dev %s", IP4_ADDR_TUNL_DEV1, MAC_TUNL_DEV1, VXLAN_TUNL_DEV0); + SYS("ip netns exec at_ns0 ip neigh add %s lladdr %s dev veth0", + IP4_ADDR2_VETH1, MAC_VETH1); /* root namespace */ SYS("ip link add dev %s type vxlan external gbp dstport 4789", @@ -277,6 +279,17 @@ static void test_vxlan_tunnel(void) if (attach_tc_prog(&tc_hook, get_src_prog_fd, set_src_prog_fd)) goto done; + /* load and attach bpf prog to veth dev tc hook point */ + ifindex = if_nametoindex("veth1"); + if (!ASSERT_NEQ(ifindex, 0, "veth1 ifindex")) + goto done; + tc_hook.ifindex = ifindex; + set_dst_prog_fd = bpf_program__fd(skel->progs.veth_set_outer_dst); + if (!ASSERT_GE(set_dst_prog_fd, 0, "bpf_program__fd")) + goto done; + if (attach_tc_prog(&tc_hook, set_dst_prog_fd, -1)) + goto done; + /* load and attach prog set_md to tunnel dev tc hook point at_ns0 */ nstoken = open_netns("at_ns0"); if (!ASSERT_OK_PTR(nstoken, "setns src")) diff --git a/tools/testing/selftests/bpf/prog_tests/usdt.c b/tools/testing/selftests/bpf/prog_tests/usdt.c index 5f733d50b0d7..9ad9da0f215e 100644 --- a/tools/testing/selftests/bpf/prog_tests/usdt.c +++ b/tools/testing/selftests/bpf/prog_tests/usdt.c @@ -12,7 +12,7 @@ int lets_test_this(int); static volatile int idx = 2; static volatile __u64 bla = 0xFEDCBA9876543210ULL; -static volatile short nums[] = {-1, -2, -3, }; +static volatile short nums[] = {-1, -2, -3, -4}; static volatile struct { int x; diff --git a/tools/testing/selftests/bpf/prog_tests/xdp_synproxy.c b/tools/testing/selftests/bpf/prog_tests/xdp_synproxy.c new file mode 100644 index 000000000000..874a846e298c --- /dev/null +++ b/tools/testing/selftests/bpf/prog_tests/xdp_synproxy.c @@ -0,0 +1,183 @@ +// SPDX-License-Identifier: LGPL-2.1 OR BSD-2-Clause +/* Copyright (c) 2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved. */ + +#define _GNU_SOURCE +#include <test_progs.h> +#include <network_helpers.h> +#include <ctype.h> + +#define CMD_OUT_BUF_SIZE 1023 + +#define SYS(cmd) ({ \ + if (!ASSERT_OK(system(cmd), (cmd))) \ + goto out; \ +}) + +#define SYS_OUT(cmd, ...) ({ \ + char buf[1024]; \ + snprintf(buf, sizeof(buf), (cmd), ##__VA_ARGS__); \ + FILE *f = popen(buf, "r"); \ + if (!ASSERT_OK_PTR(f, buf)) \ + goto out; \ + f; \ +}) + +/* out must be at least `size * 4 + 1` bytes long */ +static void escape_str(char *out, const char *in, size_t size) +{ + static const char *hex = "0123456789ABCDEF"; + size_t i; + + for (i = 0; i < size; i++) { + if (isprint(in[i]) && in[i] != '\\' && in[i] != '\'') { + *out++ = in[i]; + } else { + *out++ = '\\'; + *out++ = 'x'; + *out++ = hex[(in[i] >> 4) & 0xf]; + *out++ = hex[in[i] & 0xf]; + } + } + *out++ = '\0'; +} + +static bool expect_str(char *buf, size_t size, const char *str, const char *name) +{ + static char escbuf_expected[CMD_OUT_BUF_SIZE * 4]; + static char escbuf_actual[CMD_OUT_BUF_SIZE * 4]; + static int duration = 0; + bool ok; + + ok = size == strlen(str) && !memcmp(buf, str, size); + + if (!ok) { + escape_str(escbuf_expected, str, strlen(str)); + escape_str(escbuf_actual, buf, size); + } + CHECK(!ok, name, "unexpected %s: actual '%s' != expected '%s'\n", + name, escbuf_actual, escbuf_expected); + + return ok; +} + +static void test_synproxy(bool xdp) +{ + int server_fd = -1, client_fd = -1, accept_fd = -1; + char *prog_id = NULL, *prog_id_end; + struct nstoken *ns = NULL; + FILE *ctrl_file = NULL; + char buf[CMD_OUT_BUF_SIZE]; + size_t size; + + SYS("ip netns add synproxy"); + + SYS("ip link add tmp0 type veth peer name tmp1"); + SYS("ip link set tmp1 netns synproxy"); + SYS("ip link set tmp0 up"); + SYS("ip addr replace 198.18.0.1/24 dev tmp0"); + + /* When checksum offload is enabled, the XDP program sees wrong + * checksums and drops packets. + */ + SYS("ethtool -K tmp0 tx off"); + if (xdp) + /* Workaround required for veth. */ + SYS("ip link set tmp0 xdp object xdp_dummy.o section xdp 2> /dev/null"); + + ns = open_netns("synproxy"); + if (!ASSERT_OK_PTR(ns, "setns")) + goto out; + + SYS("ip link set lo up"); + SYS("ip link set tmp1 up"); + SYS("ip addr replace 198.18.0.2/24 dev tmp1"); + SYS("sysctl -w net.ipv4.tcp_syncookies=2"); + SYS("sysctl -w net.ipv4.tcp_timestamps=1"); + SYS("sysctl -w net.netfilter.nf_conntrack_tcp_loose=0"); + SYS("iptables -t raw -I PREROUTING \ + -i tmp1 -p tcp -m tcp --syn --dport 8080 -j CT --notrack"); + SYS("iptables -t filter -A INPUT \ + -i tmp1 -p tcp -m tcp --dport 8080 -m state --state INVALID,UNTRACKED \ + -j SYNPROXY --sack-perm --timestamp --wscale 7 --mss 1460"); + SYS("iptables -t filter -A INPUT \ + -i tmp1 -m state --state INVALID -j DROP"); + + ctrl_file = SYS_OUT("./xdp_synproxy --iface tmp1 --ports 8080 \ + --single --mss4 1460 --mss6 1440 \ + --wscale 7 --ttl 64%s", xdp ? "" : " --tc"); + size = fread(buf, 1, sizeof(buf), ctrl_file); + pclose(ctrl_file); + if (!expect_str(buf, size, "Total SYNACKs generated: 0\n", + "initial SYNACKs")) + goto out; + + if (!xdp) { + ctrl_file = SYS_OUT("tc filter show dev tmp1 ingress"); + size = fread(buf, 1, sizeof(buf), ctrl_file); + pclose(ctrl_file); + prog_id = memmem(buf, size, " id ", 4); + if (!ASSERT_OK_PTR(prog_id, "find prog id")) + goto out; + prog_id += 4; + if (!ASSERT_LT(prog_id, buf + size, "find prog id begin")) + goto out; + prog_id_end = prog_id; + while (prog_id_end < buf + size && *prog_id_end >= '0' && + *prog_id_end <= '9') + prog_id_end++; + if (!ASSERT_LT(prog_id_end, buf + size, "find prog id end")) + goto out; + *prog_id_end = '\0'; + } + + server_fd = start_server(AF_INET, SOCK_STREAM, "198.18.0.2", 8080, 0); + if (!ASSERT_GE(server_fd, 0, "start_server")) + goto out; + + close_netns(ns); + ns = NULL; + + client_fd = connect_to_fd(server_fd, 10000); + if (!ASSERT_GE(client_fd, 0, "connect_to_fd")) + goto out; + + accept_fd = accept(server_fd, NULL, NULL); + if (!ASSERT_GE(accept_fd, 0, "accept")) + goto out; + + ns = open_netns("synproxy"); + if (!ASSERT_OK_PTR(ns, "setns")) + goto out; + + if (xdp) + ctrl_file = SYS_OUT("./xdp_synproxy --iface tmp1 --single"); + else + ctrl_file = SYS_OUT("./xdp_synproxy --prog %s --single", + prog_id); + size = fread(buf, 1, sizeof(buf), ctrl_file); + pclose(ctrl_file); + if (!expect_str(buf, size, "Total SYNACKs generated: 1\n", + "SYNACKs after connection")) + goto out; + +out: + if (accept_fd >= 0) + close(accept_fd); + if (client_fd >= 0) + close(client_fd); + if (server_fd >= 0) + close(server_fd); + if (ns) + close_netns(ns); + + system("ip link del tmp0"); + system("ip netns del synproxy"); +} + +void test_xdp_synproxy(void) +{ + if (test__start_subtest("xdp")) + test_synproxy(true); + if (test__start_subtest("tc")) + test_synproxy(false); +} diff --git a/tools/testing/selftests/bpf/progs/bpf_hashmap_full_update_bench.c b/tools/testing/selftests/bpf/progs/bpf_hashmap_full_update_bench.c new file mode 100644 index 000000000000..56957557e3e1 --- /dev/null +++ b/tools/testing/selftests/bpf/progs/bpf_hashmap_full_update_bench.c @@ -0,0 +1,40 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2022 Bytedance */ + +#include "vmlinux.h" +#include <bpf/bpf_helpers.h> +#include "bpf_misc.h" + +char _license[] SEC("license") = "GPL"; + +#define MAX_ENTRIES 1000 + +struct { + __uint(type, BPF_MAP_TYPE_HASH); + __type(key, u32); + __type(value, u64); + __uint(max_entries, MAX_ENTRIES); +} hash_map_bench SEC(".maps"); + +u64 __attribute__((__aligned__(256))) percpu_time[256]; +u64 nr_loops; + +static int loop_update_callback(__u32 index, u32 *key) +{ + u64 init_val = 1; + + bpf_map_update_elem(&hash_map_bench, key, &init_val, BPF_ANY); + return 0; +} + +SEC("fentry/" SYS_PREFIX "sys_getpgid") +int benchmark(void *ctx) +{ + u32 cpu = bpf_get_smp_processor_id(); + u32 key = cpu + MAX_ENTRIES; + u64 start_time = bpf_ktime_get_ns(); + + bpf_loop(nr_loops, loop_update_callback, &key, 0); + percpu_time[cpu & 255] = bpf_ktime_get_ns() - start_time; + return 0; +} diff --git a/tools/testing/selftests/bpf/progs/bpf_iter.h b/tools/testing/selftests/bpf/progs/bpf_iter.h index 97ec8bc76ae6..e9846606690d 100644 --- a/tools/testing/selftests/bpf/progs/bpf_iter.h +++ b/tools/testing/selftests/bpf/progs/bpf_iter.h @@ -22,6 +22,7 @@ #define BTF_F_NONAME BTF_F_NONAME___not_used #define BTF_F_PTR_RAW BTF_F_PTR_RAW___not_used #define BTF_F_ZERO BTF_F_ZERO___not_used +#define bpf_iter__ksym bpf_iter__ksym___not_used #include "vmlinux.h" #undef bpf_iter_meta #undef bpf_iter__bpf_map @@ -44,6 +45,7 @@ #undef BTF_F_NONAME #undef BTF_F_PTR_RAW #undef BTF_F_ZERO +#undef bpf_iter__ksym struct bpf_iter_meta { struct seq_file *seq; @@ -151,3 +153,8 @@ enum { BTF_F_PTR_RAW = (1ULL << 2), BTF_F_ZERO = (1ULL << 3), }; + +struct bpf_iter__ksym { + struct bpf_iter_meta *meta; + struct kallsym_iter *ksym; +}; diff --git a/tools/testing/selftests/bpf/progs/bpf_iter_ksym.c b/tools/testing/selftests/bpf/progs/bpf_iter_ksym.c new file mode 100644 index 000000000000..285c008cbf9c --- /dev/null +++ b/tools/testing/selftests/bpf/progs/bpf_iter_ksym.c @@ -0,0 +1,74 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2022, Oracle and/or its affiliates. */ +#include "bpf_iter.h" +#include <bpf/bpf_helpers.h> + +char _license[] SEC("license") = "GPL"; + +unsigned long last_sym_value = 0; + +static inline char tolower(char c) +{ + if (c >= 'A' && c <= 'Z') + c += ('a' - 'A'); + return c; +} + +static inline char toupper(char c) +{ + if (c >= 'a' && c <= 'z') + c -= ('a' - 'A'); + return c; +} + +/* Dump symbols with max size; the latter is calculated by caching symbol N value + * and when iterating on symbol N+1, we can print max size of symbol N via + * address of N+1 - address of N. + */ +SEC("iter/ksym") +int dump_ksym(struct bpf_iter__ksym *ctx) +{ + struct seq_file *seq = ctx->meta->seq; + struct kallsym_iter *iter = ctx->ksym; + __u32 seq_num = ctx->meta->seq_num; + unsigned long value; + char type; + int ret; + + if (!iter) + return 0; + + if (seq_num == 0) { + BPF_SEQ_PRINTF(seq, "ADDR TYPE NAME MODULE_NAME KIND MAX_SIZE\n"); + return 0; + } + if (last_sym_value) + BPF_SEQ_PRINTF(seq, "0x%x\n", iter->value - last_sym_value); + else + BPF_SEQ_PRINTF(seq, "\n"); + + value = iter->show_value ? iter->value : 0; + + last_sym_value = value; + + type = iter->type; + + if (iter->module_name[0]) { + type = iter->exported ? toupper(type) : tolower(type); + BPF_SEQ_PRINTF(seq, "0x%llx %c %s [ %s ] ", + value, type, iter->name, iter->module_name); + } else { + BPF_SEQ_PRINTF(seq, "0x%llx %c %s ", value, type, iter->name); + } + if (!iter->pos_arch_end || iter->pos_arch_end > iter->pos) + BPF_SEQ_PRINTF(seq, "CORE "); + else if (!iter->pos_mod_end || iter->pos_mod_end > iter->pos) + BPF_SEQ_PRINTF(seq, "MOD "); + else if (!iter->pos_ftrace_mod_end || iter->pos_ftrace_mod_end > iter->pos) + BPF_SEQ_PRINTF(seq, "FTRACE_MOD "); + else if (!iter->pos_bpf_end || iter->pos_bpf_end > iter->pos) + BPF_SEQ_PRINTF(seq, "BPF "); + else + BPF_SEQ_PRINTF(seq, "KPROBE "); + return 0; +} diff --git a/tools/testing/selftests/bpf/progs/bpf_loop.c b/tools/testing/selftests/bpf/progs/bpf_loop.c index e08565282759..de1fc82d2710 100644 --- a/tools/testing/selftests/bpf/progs/bpf_loop.c +++ b/tools/testing/selftests/bpf/progs/bpf_loop.c @@ -11,11 +11,19 @@ struct callback_ctx { int output; }; +struct { + __uint(type, BPF_MAP_TYPE_HASH); + __uint(max_entries, 32); + __type(key, int); + __type(value, int); +} map1 SEC(".maps"); + /* These should be set by the user program */ u32 nested_callback_nr_loops; u32 stop_index = -1; u32 nr_loops; int pid; +int callback_selector; /* Making these global variables so that the userspace program * can verify the output through the skeleton @@ -111,3 +119,109 @@ int prog_nested_calls(void *ctx) return 0; } + +static int callback_set_f0(int i, void *ctx) +{ + g_output = 0xF0; + return 0; +} + +static int callback_set_0f(int i, void *ctx) +{ + g_output = 0x0F; + return 0; +} + +/* + * non-constant callback is a corner case for bpf_loop inline logic + */ +SEC("fentry/" SYS_PREFIX "sys_nanosleep") +int prog_non_constant_callback(void *ctx) +{ + struct callback_ctx data = {}; + + if (bpf_get_current_pid_tgid() >> 32 != pid) + return 0; + + int (*callback)(int i, void *ctx); + + g_output = 0; + + if (callback_selector == 0x0F) + callback = callback_set_0f; + else + callback = callback_set_f0; + + bpf_loop(1, callback, NULL, 0); + + return 0; +} + +static int stack_check_inner_callback(void *ctx) +{ + return 0; +} + +static int map1_lookup_elem(int key) +{ + int *val = bpf_map_lookup_elem(&map1, &key); + + return val ? *val : -1; +} + +static void map1_update_elem(int key, int val) +{ + bpf_map_update_elem(&map1, &key, &val, BPF_ANY); +} + +static int stack_check_outer_callback(void *ctx) +{ + int a = map1_lookup_elem(1); + int b = map1_lookup_elem(2); + int c = map1_lookup_elem(3); + int d = map1_lookup_elem(4); + int e = map1_lookup_elem(5); + int f = map1_lookup_elem(6); + + bpf_loop(1, stack_check_inner_callback, NULL, 0); + + map1_update_elem(1, a + 1); + map1_update_elem(2, b + 1); + map1_update_elem(3, c + 1); + map1_update_elem(4, d + 1); + map1_update_elem(5, e + 1); + map1_update_elem(6, f + 1); + + return 0; +} + +/* Some of the local variables in stack_check and + * stack_check_outer_callback would be allocated on stack by + * compiler. This test should verify that stack content for these + * variables is preserved between calls to bpf_loop (might be an issue + * if loop inlining allocates stack slots incorrectly). + */ +SEC("fentry/" SYS_PREFIX "sys_nanosleep") +int stack_check(void *ctx) +{ + if (bpf_get_current_pid_tgid() >> 32 != pid) + return 0; + + int a = map1_lookup_elem(7); + int b = map1_lookup_elem(8); + int c = map1_lookup_elem(9); + int d = map1_lookup_elem(10); + int e = map1_lookup_elem(11); + int f = map1_lookup_elem(12); + + bpf_loop(1, stack_check_outer_callback, NULL, 0); + + map1_update_elem(7, a + 1); + map1_update_elem(8, b + 1); + map1_update_elem(9, c + 1); + map1_update_elem(10, d + 1); + map1_update_elem(11, e + 1); + map1_update_elem(12, f + 1); + + return 0; +} diff --git a/tools/testing/selftests/bpf/progs/bpf_syscall_macro.c b/tools/testing/selftests/bpf/progs/bpf_syscall_macro.c index 05838ed9b89c..e1e11897e99b 100644 --- a/tools/testing/selftests/bpf/progs/bpf_syscall_macro.c +++ b/tools/testing/selftests/bpf/progs/bpf_syscall_macro.c @@ -64,9 +64,9 @@ int BPF_KPROBE(handle_sys_prctl) return 0; } -SEC("kprobe/" SYS_PREFIX "sys_prctl") -int BPF_KPROBE_SYSCALL(prctl_enter, int option, unsigned long arg2, - unsigned long arg3, unsigned long arg4, unsigned long arg5) +SEC("ksyscall/prctl") +int BPF_KSYSCALL(prctl_enter, int option, unsigned long arg2, + unsigned long arg3, unsigned long arg4, unsigned long arg5) { pid_t pid = bpf_get_current_pid_tgid() >> 32; diff --git a/tools/testing/selftests/bpf/progs/bpf_tracing_net.h b/tools/testing/selftests/bpf/progs/bpf_tracing_net.h index 1c1289ba5fc5..98dd2c4815f0 100644 --- a/tools/testing/selftests/bpf/progs/bpf_tracing_net.h +++ b/tools/testing/selftests/bpf/progs/bpf_tracing_net.h @@ -8,6 +8,7 @@ #define SOL_SOCKET 1 #define SO_SNDBUF 7 #define __SO_ACCEPTCON (1 << 16) +#define SO_PRIORITY 12 #define SOL_TCP 6 #define TCP_CONGESTION 13 diff --git a/tools/testing/selftests/bpf/progs/btf__core_reloc_enum64val.c b/tools/testing/selftests/bpf/progs/btf__core_reloc_enum64val.c new file mode 100644 index 000000000000..888e79db6a77 --- /dev/null +++ b/tools/testing/selftests/bpf/progs/btf__core_reloc_enum64val.c @@ -0,0 +1,3 @@ +#include "core_reloc_types.h" + +void f(struct core_reloc_enum64val x) {} diff --git a/tools/testing/selftests/bpf/progs/btf__core_reloc_enum64val___diff.c b/tools/testing/selftests/bpf/progs/btf__core_reloc_enum64val___diff.c new file mode 100644 index 000000000000..194749130d87 --- /dev/null +++ b/tools/testing/selftests/bpf/progs/btf__core_reloc_enum64val___diff.c @@ -0,0 +1,3 @@ +#include "core_reloc_types.h" + +void f(struct core_reloc_enum64val___diff x) {} diff --git a/tools/testing/selftests/bpf/progs/btf__core_reloc_enum64val___err_missing.c b/tools/testing/selftests/bpf/progs/btf__core_reloc_enum64val___err_missing.c new file mode 100644 index 000000000000..3d732d4193e4 --- /dev/null +++ b/tools/testing/selftests/bpf/progs/btf__core_reloc_enum64val___err_missing.c @@ -0,0 +1,3 @@ +#include "core_reloc_types.h" + +void f(struct core_reloc_enum64val___err_missing x) {} diff --git a/tools/testing/selftests/bpf/progs/btf__core_reloc_enum64val___val3_missing.c b/tools/testing/selftests/bpf/progs/btf__core_reloc_enum64val___val3_missing.c new file mode 100644 index 000000000000..17cf5d6a848d --- /dev/null +++ b/tools/testing/selftests/bpf/progs/btf__core_reloc_enum64val___val3_missing.c @@ -0,0 +1,3 @@ +#include "core_reloc_types.h" + +void f(struct core_reloc_enum64val___val3_missing x) {} diff --git a/tools/testing/selftests/bpf/progs/btf__core_reloc_type_based___diff.c b/tools/testing/selftests/bpf/progs/btf__core_reloc_type_based___diff.c new file mode 100644 index 000000000000..57ae2c258928 --- /dev/null +++ b/tools/testing/selftests/bpf/progs/btf__core_reloc_type_based___diff.c @@ -0,0 +1,3 @@ +#include "core_reloc_types.h" + +void f(struct core_reloc_type_based___diff x) {} diff --git a/tools/testing/selftests/bpf/progs/core_reloc_types.h b/tools/testing/selftests/bpf/progs/core_reloc_types.h index f9dc9766546e..fd8e1b4c6762 100644 --- a/tools/testing/selftests/bpf/progs/core_reloc_types.h +++ b/tools/testing/selftests/bpf/progs/core_reloc_types.h @@ -13,6 +13,7 @@ struct core_reloc_kernel_output { int valid[10]; char comm[sizeof("test_progs")]; int comm_len; + bool local_task_struct_matches; }; /* @@ -860,10 +861,11 @@ struct core_reloc_size___err_ambiguous2 { }; /* - * TYPE EXISTENCE & SIZE + * TYPE EXISTENCE, MATCH & SIZE */ struct core_reloc_type_based_output { bool struct_exists; + bool complex_struct_exists; bool union_exists; bool enum_exists; bool typedef_named_struct_exists; @@ -872,9 +874,24 @@ struct core_reloc_type_based_output { bool typedef_int_exists; bool typedef_enum_exists; bool typedef_void_ptr_exists; + bool typedef_restrict_ptr_exists; bool typedef_func_proto_exists; bool typedef_arr_exists; + bool struct_matches; + bool complex_struct_matches; + bool union_matches; + bool enum_matches; + bool typedef_named_struct_matches; + bool typedef_anon_struct_matches; + bool typedef_struct_ptr_matches; + bool typedef_int_matches; + bool typedef_enum_matches; + bool typedef_void_ptr_matches; + bool typedef_restrict_ptr_matches; + bool typedef_func_proto_matches; + bool typedef_arr_matches; + int struct_sz; int union_sz; int enum_sz; @@ -892,6 +909,14 @@ struct a_struct { int x; }; +struct a_complex_struct { + union { + struct a_struct * restrict a; + void *b; + } x; + volatile long y; +}; + union a_union { int y; int z; @@ -916,6 +941,7 @@ typedef int int_typedef; typedef enum { TYPEDEF_ENUM_VAL1, TYPEDEF_ENUM_VAL2 } enum_typedef; typedef void *void_ptr_typedef; +typedef int *restrict restrict_ptr_typedef; typedef int (*func_proto_typedef)(long); @@ -923,22 +949,86 @@ typedef char arr_typedef[20]; struct core_reloc_type_based { struct a_struct f1; - union a_union f2; - enum an_enum f3; - named_struct_typedef f4; - anon_struct_typedef f5; - struct_ptr_typedef f6; - int_typedef f7; - enum_typedef f8; - void_ptr_typedef f9; - func_proto_typedef f10; - arr_typedef f11; + struct a_complex_struct f2; + union a_union f3; + enum an_enum f4; + named_struct_typedef f5; + anon_struct_typedef f6; + struct_ptr_typedef f7; + int_typedef f8; + enum_typedef f9; + void_ptr_typedef f10; + restrict_ptr_typedef f11; + func_proto_typedef f12; + arr_typedef f13; }; /* no types in target */ struct core_reloc_type_based___all_missing { }; +/* different member orders, enum variant values, signedness, etc */ +struct a_struct___diff { + int x; + int a; +}; + +struct a_struct___forward; + +struct a_complex_struct___diff { + union { + struct a_struct___forward *a; + void *b; + } x; + volatile long y; +}; + +union a_union___diff { + int z; + int y; +}; + +typedef struct a_struct___diff named_struct_typedef___diff; + +typedef struct { int z, x, y; } anon_struct_typedef___diff; + +typedef struct { + int c; + int b; + int a; +} *struct_ptr_typedef___diff; + +enum an_enum___diff { + AN_ENUM_VAL2___diff = 0, + AN_ENUM_VAL1___diff = 42, + AN_ENUM_VAL3___diff = 1, +}; + +typedef unsigned int int_typedef___diff; + +typedef enum { TYPEDEF_ENUM_VAL2___diff, TYPEDEF_ENUM_VAL1___diff = 50 } enum_typedef___diff; + +typedef const void *void_ptr_typedef___diff; + +typedef int_typedef___diff (*func_proto_typedef___diff)(long); + +typedef char arr_typedef___diff[3]; + +struct core_reloc_type_based___diff { + struct a_struct___diff f1; + struct a_complex_struct___diff f2; + union a_union___diff f3; + enum an_enum___diff f4; + named_struct_typedef___diff f5; + anon_struct_typedef___diff f6; + struct_ptr_typedef___diff f7; + int_typedef___diff f8; + enum_typedef___diff f9; + void_ptr_typedef___diff f10; + func_proto_typedef___diff f11; + arr_typedef___diff f12; +}; + /* different type sizes, extra modifiers, anon vs named enums, etc */ struct a_struct___diff_sz { long x; @@ -1117,6 +1207,20 @@ struct core_reloc_enumval_output { int anon_val2; }; +struct core_reloc_enum64val_output { + bool unsigned_val1_exists; + bool unsigned_val2_exists; + bool unsigned_val3_exists; + bool signed_val1_exists; + bool signed_val2_exists; + bool signed_val3_exists; + + long unsigned_val1; + long unsigned_val2; + long signed_val1; + long signed_val2; +}; + enum named_enum { NAMED_ENUM_VAL1 = 1, NAMED_ENUM_VAL2 = 2, @@ -1134,6 +1238,23 @@ struct core_reloc_enumval { anon_enum f2; }; +enum named_unsigned_enum64 { + UNSIGNED_ENUM64_VAL1 = 0x1ffffffffULL, + UNSIGNED_ENUM64_VAL2 = 0x2, + UNSIGNED_ENUM64_VAL3 = 0x3ffffffffULL, +}; + +enum named_signed_enum64 { + SIGNED_ENUM64_VAL1 = 0x1ffffffffLL, + SIGNED_ENUM64_VAL2 = -2, + SIGNED_ENUM64_VAL3 = 0x3ffffffffLL, +}; + +struct core_reloc_enum64val { + enum named_unsigned_enum64 f1; + enum named_signed_enum64 f2; +}; + /* differing enumerator values */ enum named_enum___diff { NAMED_ENUM_VAL1___diff = 101, @@ -1152,6 +1273,23 @@ struct core_reloc_enumval___diff { anon_enum___diff f2; }; +enum named_unsigned_enum64___diff { + UNSIGNED_ENUM64_VAL1___diff = 0x101ffffffffULL, + UNSIGNED_ENUM64_VAL2___diff = 0x202ffffffffULL, + UNSIGNED_ENUM64_VAL3___diff = 0x303ffffffffULL, +}; + +enum named_signed_enum64___diff { + SIGNED_ENUM64_VAL1___diff = -101, + SIGNED_ENUM64_VAL2___diff = -202, + SIGNED_ENUM64_VAL3___diff = -303, +}; + +struct core_reloc_enum64val___diff { + enum named_unsigned_enum64___diff f1; + enum named_signed_enum64___diff f2; +}; + /* missing (optional) third enum value */ enum named_enum___val3_missing { NAMED_ENUM_VAL1___val3_missing = 111, @@ -1168,6 +1306,21 @@ struct core_reloc_enumval___val3_missing { anon_enum___val3_missing f2; }; +enum named_unsigned_enum64___val3_missing { + UNSIGNED_ENUM64_VAL1___val3_missing = 0x111ffffffffULL, + UNSIGNED_ENUM64_VAL2___val3_missing = 0x222, +}; + +enum named_signed_enum64___val3_missing { + SIGNED_ENUM64_VAL1___val3_missing = 0x111ffffffffLL, + SIGNED_ENUM64_VAL2___val3_missing = -222, +}; + +struct core_reloc_enum64val___val3_missing { + enum named_unsigned_enum64___val3_missing f1; + enum named_signed_enum64___val3_missing f2; +}; + /* missing (mandatory) second enum value, should fail */ enum named_enum___err_missing { NAMED_ENUM_VAL1___err_missing = 1, @@ -1183,3 +1336,18 @@ struct core_reloc_enumval___err_missing { enum named_enum___err_missing f1; anon_enum___err_missing f2; }; + +enum named_unsigned_enum64___err_missing { + UNSIGNED_ENUM64_VAL1___err_missing = 0x1ffffffffULL, + UNSIGNED_ENUM64_VAL3___err_missing = 0x3ffffffffULL, +}; + +enum named_signed_enum64___err_missing { + SIGNED_ENUM64_VAL1___err_missing = 0x1ffffffffLL, + SIGNED_ENUM64_VAL3___err_missing = -3, +}; + +struct core_reloc_enum64val___err_missing { + enum named_unsigned_enum64___err_missing f1; + enum named_signed_enum64___err_missing f2; +}; diff --git a/tools/testing/selftests/bpf/progs/local_storage_bench.c b/tools/testing/selftests/bpf/progs/local_storage_bench.c new file mode 100644 index 000000000000..2c3234c5b73a --- /dev/null +++ b/tools/testing/selftests/bpf/progs/local_storage_bench.c @@ -0,0 +1,104 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2022 Meta Platforms, Inc. and affiliates. */ + +#include "vmlinux.h" +#include <bpf/bpf_helpers.h> +#include "bpf_misc.h" + +#define HASHMAP_SZ 4194304 + +struct { + __uint(type, BPF_MAP_TYPE_ARRAY_OF_MAPS); + __uint(max_entries, 1000); + __type(key, int); + __type(value, int); + __array(values, struct { + __uint(type, BPF_MAP_TYPE_TASK_STORAGE); + __uint(map_flags, BPF_F_NO_PREALLOC); + __type(key, int); + __type(value, int); + }); +} array_of_local_storage_maps SEC(".maps"); + +struct { + __uint(type, BPF_MAP_TYPE_ARRAY_OF_MAPS); + __uint(max_entries, 1000); + __type(key, int); + __type(value, int); + __array(values, struct { + __uint(type, BPF_MAP_TYPE_HASH); + __uint(max_entries, HASHMAP_SZ); + __type(key, int); + __type(value, int); + }); +} array_of_hash_maps SEC(".maps"); + +long important_hits; +long hits; + +/* set from user-space */ +const volatile unsigned int use_hashmap; +const volatile unsigned int hashmap_num_keys; +const volatile unsigned int num_maps; +const volatile unsigned int interleave; + +struct loop_ctx { + struct task_struct *task; + long loop_hits; + long loop_important_hits; +}; + +static int do_lookup(unsigned int elem, struct loop_ctx *lctx) +{ + void *map, *inner_map; + int idx = 0; + + if (use_hashmap) + map = &array_of_hash_maps; + else + map = &array_of_local_storage_maps; + + inner_map = bpf_map_lookup_elem(map, &elem); + if (!inner_map) + return -1; + + if (use_hashmap) { + idx = bpf_get_prandom_u32() % hashmap_num_keys; + bpf_map_lookup_elem(inner_map, &idx); + } else { + bpf_task_storage_get(inner_map, lctx->task, &idx, + BPF_LOCAL_STORAGE_GET_F_CREATE); + } + + lctx->loop_hits++; + if (!elem) + lctx->loop_important_hits++; + return 0; +} + +static long loop(u32 index, void *ctx) +{ + struct loop_ctx *lctx = (struct loop_ctx *)ctx; + unsigned int map_idx = index % num_maps; + + do_lookup(map_idx, lctx); + if (interleave && map_idx % 3 == 0) + do_lookup(0, lctx); + return 0; +} + +SEC("fentry/" SYS_PREFIX "sys_getpgid") +int get_local(void *ctx) +{ + struct loop_ctx lctx; + + lctx.task = bpf_get_current_task_btf(); + lctx.loop_hits = 0; + lctx.loop_important_hits = 0; + bpf_loop(10000, &loop, &lctx, 0); + __sync_add_and_fetch(&hits, lctx.loop_hits); + __sync_add_and_fetch(&important_hits, lctx.loop_important_hits); + return 0; +} + +char _license[] SEC("license") = "GPL"; diff --git a/tools/testing/selftests/bpf/progs/local_storage_rcu_tasks_trace_bench.c b/tools/testing/selftests/bpf/progs/local_storage_rcu_tasks_trace_bench.c new file mode 100644 index 000000000000..03bf69f49075 --- /dev/null +++ b/tools/testing/selftests/bpf/progs/local_storage_rcu_tasks_trace_bench.c @@ -0,0 +1,67 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2022 Meta Platforms, Inc. and affiliates. */ + +#include "vmlinux.h" +#include <bpf/bpf_helpers.h> +#include "bpf_misc.h" + +struct { + __uint(type, BPF_MAP_TYPE_TASK_STORAGE); + __uint(map_flags, BPF_F_NO_PREALLOC); + __type(key, int); + __type(value, int); +} task_storage SEC(".maps"); + +long hits; +long gp_hits; +long gp_times; +long current_gp_start; +long unexpected; +bool postgp_seen; + +SEC("fentry/" SYS_PREFIX "sys_getpgid") +int get_local(void *ctx) +{ + struct task_struct *task; + int idx; + int *s; + + idx = 0; + task = bpf_get_current_task_btf(); + s = bpf_task_storage_get(&task_storage, task, &idx, + BPF_LOCAL_STORAGE_GET_F_CREATE); + if (!s) + return 0; + + *s = 3; + bpf_task_storage_delete(&task_storage, task); + __sync_add_and_fetch(&hits, 1); + return 0; +} + +SEC("fentry/rcu_tasks_trace_pregp_step") +int pregp_step(struct pt_regs *ctx) +{ + current_gp_start = bpf_ktime_get_ns(); + return 0; +} + +SEC("fentry/rcu_tasks_trace_postgp") +int postgp(struct pt_regs *ctx) +{ + if (!current_gp_start && postgp_seen) { + /* Will only happen if prog tracing rcu_tasks_trace_pregp_step doesn't + * execute before this prog + */ + __sync_add_and_fetch(&unexpected, 1); + return 0; + } + + __sync_add_and_fetch(&gp_times, bpf_ktime_get_ns() - current_gp_start); + __sync_add_and_fetch(&gp_hits, 1); + current_gp_start = 0; + postgp_seen = true; + return 0; +} + +char _license[] SEC("license") = "GPL"; diff --git a/tools/testing/selftests/bpf/progs/lsm_cgroup.c b/tools/testing/selftests/bpf/progs/lsm_cgroup.c new file mode 100644 index 000000000000..4f2d60b87b75 --- /dev/null +++ b/tools/testing/selftests/bpf/progs/lsm_cgroup.c @@ -0,0 +1,180 @@ +// SPDX-License-Identifier: GPL-2.0 + +#include "vmlinux.h" +#include "bpf_tracing_net.h" +#include <bpf/bpf_helpers.h> +#include <bpf/bpf_tracing.h> + +char _license[] SEC("license") = "GPL"; + +#ifndef AF_PACKET +#define AF_PACKET 17 +#endif + +#ifndef AF_UNIX +#define AF_UNIX 1 +#endif + +#ifndef EPERM +#define EPERM 1 +#endif + +struct { + __uint(type, BPF_MAP_TYPE_CGROUP_STORAGE); + __type(key, __u64); + __type(value, __u64); +} cgroup_storage SEC(".maps"); + +int called_socket_post_create; +int called_socket_post_create2; +int called_socket_bind; +int called_socket_bind2; +int called_socket_alloc; +int called_socket_clone; + +static __always_inline int test_local_storage(void) +{ + __u64 *val; + + val = bpf_get_local_storage(&cgroup_storage, 0); + if (!val) + return 0; + *val += 1; + + return 1; +} + +static __always_inline int real_create(struct socket *sock, int family, + int protocol) +{ + struct sock *sk; + int prio = 123; + + /* Reject non-tx-only AF_PACKET. */ + if (family == AF_PACKET && protocol != 0) + return 0; /* EPERM */ + + sk = sock->sk; + if (!sk) + return 1; + + /* The rest of the sockets get default policy. */ + if (bpf_setsockopt(sk, SOL_SOCKET, SO_PRIORITY, &prio, sizeof(prio))) + return 0; /* EPERM */ + + /* Make sure bpf_getsockopt is allowed and works. */ + prio = 0; + if (bpf_getsockopt(sk, SOL_SOCKET, SO_PRIORITY, &prio, sizeof(prio))) + return 0; /* EPERM */ + if (prio != 123) + return 0; /* EPERM */ + + /* Can access cgroup local storage. */ + if (!test_local_storage()) + return 0; /* EPERM */ + + return 1; +} + +/* __cgroup_bpf_run_lsm_socket */ +SEC("lsm_cgroup/socket_post_create") +int BPF_PROG(socket_post_create, struct socket *sock, int family, + int type, int protocol, int kern) +{ + called_socket_post_create++; + return real_create(sock, family, protocol); +} + +/* __cgroup_bpf_run_lsm_socket */ +SEC("lsm_cgroup/socket_post_create") +int BPF_PROG(socket_post_create2, struct socket *sock, int family, + int type, int protocol, int kern) +{ + called_socket_post_create2++; + return real_create(sock, family, protocol); +} + +static __always_inline int real_bind(struct socket *sock, + struct sockaddr *address, + int addrlen) +{ + struct sockaddr_ll sa = {}; + + if (sock->sk->__sk_common.skc_family != AF_PACKET) + return 1; + + if (sock->sk->sk_kern_sock) + return 1; + + bpf_probe_read_kernel(&sa, sizeof(sa), address); + if (sa.sll_protocol) + return 0; /* EPERM */ + + /* Can access cgroup local storage. */ + if (!test_local_storage()) + return 0; /* EPERM */ + + return 1; +} + +/* __cgroup_bpf_run_lsm_socket */ +SEC("lsm_cgroup/socket_bind") +int BPF_PROG(socket_bind, struct socket *sock, struct sockaddr *address, + int addrlen) +{ + called_socket_bind++; + return real_bind(sock, address, addrlen); +} + +/* __cgroup_bpf_run_lsm_socket */ +SEC("lsm_cgroup/socket_bind") +int BPF_PROG(socket_bind2, struct socket *sock, struct sockaddr *address, + int addrlen) +{ + called_socket_bind2++; + return real_bind(sock, address, addrlen); +} + +/* __cgroup_bpf_run_lsm_current (via bpf_lsm_current_hooks) */ +SEC("lsm_cgroup/sk_alloc_security") +int BPF_PROG(socket_alloc, struct sock *sk, int family, gfp_t priority) +{ + called_socket_alloc++; + if (family == AF_UNIX) + return 0; /* EPERM */ + + /* Can access cgroup local storage. */ + if (!test_local_storage()) + return 0; /* EPERM */ + + return 1; +} + +/* __cgroup_bpf_run_lsm_sock */ +SEC("lsm_cgroup/inet_csk_clone") +int BPF_PROG(socket_clone, struct sock *newsk, const struct request_sock *req) +{ + int prio = 234; + + if (!newsk) + return 1; + + /* Accepted request sockets get a different priority. */ + if (bpf_setsockopt(newsk, SOL_SOCKET, SO_PRIORITY, &prio, sizeof(prio))) + return 1; + + /* Make sure bpf_getsockopt is allowed and works. */ + prio = 0; + if (bpf_getsockopt(newsk, SOL_SOCKET, SO_PRIORITY, &prio, sizeof(prio))) + return 1; + if (prio != 234) + return 1; + + /* Can access cgroup local storage. */ + if (!test_local_storage()) + return 1; + + called_socket_clone++; + + return 1; +} diff --git a/tools/testing/selftests/bpf/progs/lsm_cgroup_nonvoid.c b/tools/testing/selftests/bpf/progs/lsm_cgroup_nonvoid.c new file mode 100644 index 000000000000..6cb0f161f417 --- /dev/null +++ b/tools/testing/selftests/bpf/progs/lsm_cgroup_nonvoid.c @@ -0,0 +1,14 @@ +// SPDX-License-Identifier: GPL-2.0 + +#include "vmlinux.h" +#include <bpf/bpf_helpers.h> +#include <bpf/bpf_tracing.h> + +char _license[] SEC("license") = "GPL"; + +SEC("lsm_cgroup/inet_csk_clone") +int BPF_PROG(nonvoid_socket_clone, struct sock *newsk, const struct request_sock *req) +{ + /* Can not return any errors from void LSM hooks. */ + return 0; +} diff --git a/tools/testing/selftests/bpf/progs/tcp_ca_incompl_cong_ops.c b/tools/testing/selftests/bpf/progs/tcp_ca_incompl_cong_ops.c new file mode 100644 index 000000000000..7bb872fb22dd --- /dev/null +++ b/tools/testing/selftests/bpf/progs/tcp_ca_incompl_cong_ops.c @@ -0,0 +1,35 @@ +// SPDX-License-Identifier: GPL-2.0 + +#include "vmlinux.h" + +#include <bpf/bpf_helpers.h> +#include <bpf/bpf_tracing.h> + +char _license[] SEC("license") = "GPL"; + +static inline struct tcp_sock *tcp_sk(const struct sock *sk) +{ + return (struct tcp_sock *)sk; +} + +SEC("struct_ops/incompl_cong_ops_ssthresh") +__u32 BPF_PROG(incompl_cong_ops_ssthresh, struct sock *sk) +{ + return tcp_sk(sk)->snd_ssthresh; +} + +SEC("struct_ops/incompl_cong_ops_undo_cwnd") +__u32 BPF_PROG(incompl_cong_ops_undo_cwnd, struct sock *sk) +{ + return tcp_sk(sk)->snd_cwnd; +} + +SEC(".struct_ops") +struct tcp_congestion_ops incompl_cong_ops = { + /* Intentionally leaving out any of the required cong_avoid() and + * cong_control() here. + */ + .ssthresh = (void *)incompl_cong_ops_ssthresh, + .undo_cwnd = (void *)incompl_cong_ops_undo_cwnd, + .name = "bpf_incompl_ops", +}; diff --git a/tools/testing/selftests/bpf/progs/tcp_ca_unsupp_cong_op.c b/tools/testing/selftests/bpf/progs/tcp_ca_unsupp_cong_op.c new file mode 100644 index 000000000000..c06f4a41c21a --- /dev/null +++ b/tools/testing/selftests/bpf/progs/tcp_ca_unsupp_cong_op.c @@ -0,0 +1,21 @@ +// SPDX-License-Identifier: GPL-2.0 + +#include "vmlinux.h" + +#include <bpf/bpf_helpers.h> +#include <bpf/bpf_tracing.h> + +char _license[] SEC("license") = "GPL"; + +SEC("struct_ops/unsupp_cong_op_get_info") +size_t BPF_PROG(unsupp_cong_op_get_info, struct sock *sk, u32 ext, int *attr, + union tcp_cc_info *info) +{ + return 0; +} + +SEC(".struct_ops") +struct tcp_congestion_ops unsupp_cong_op = { + .get_info = (void *)unsupp_cong_op_get_info, + .name = "bpf_unsupp_op", +}; diff --git a/tools/testing/selftests/bpf/progs/tcp_ca_write_sk_pacing.c b/tools/testing/selftests/bpf/progs/tcp_ca_write_sk_pacing.c new file mode 100644 index 000000000000..43447704cf0e --- /dev/null +++ b/tools/testing/selftests/bpf/progs/tcp_ca_write_sk_pacing.c @@ -0,0 +1,60 @@ +// SPDX-License-Identifier: GPL-2.0 + +#include "vmlinux.h" + +#include <bpf/bpf_helpers.h> +#include <bpf/bpf_tracing.h> + +char _license[] SEC("license") = "GPL"; + +#define USEC_PER_SEC 1000000UL + +#define min(a, b) ((a) < (b) ? (a) : (b)) + +static inline struct tcp_sock *tcp_sk(const struct sock *sk) +{ + return (struct tcp_sock *)sk; +} + +SEC("struct_ops/write_sk_pacing_init") +void BPF_PROG(write_sk_pacing_init, struct sock *sk) +{ +#ifdef ENABLE_ATOMICS_TESTS + __sync_bool_compare_and_swap(&sk->sk_pacing_status, SK_PACING_NONE, + SK_PACING_NEEDED); +#else + sk->sk_pacing_status = SK_PACING_NEEDED; +#endif +} + +SEC("struct_ops/write_sk_pacing_cong_control") +void BPF_PROG(write_sk_pacing_cong_control, struct sock *sk, + const struct rate_sample *rs) +{ + const struct tcp_sock *tp = tcp_sk(sk); + unsigned long rate = + ((tp->snd_cwnd * tp->mss_cache * USEC_PER_SEC) << 3) / + (tp->srtt_us ?: 1U << 3); + sk->sk_pacing_rate = min(rate, sk->sk_max_pacing_rate); +} + +SEC("struct_ops/write_sk_pacing_ssthresh") +__u32 BPF_PROG(write_sk_pacing_ssthresh, struct sock *sk) +{ + return tcp_sk(sk)->snd_ssthresh; +} + +SEC("struct_ops/write_sk_pacing_undo_cwnd") +__u32 BPF_PROG(write_sk_pacing_undo_cwnd, struct sock *sk) +{ + return tcp_sk(sk)->snd_cwnd; +} + +SEC(".struct_ops") +struct tcp_congestion_ops write_sk_pacing = { + .init = (void *)write_sk_pacing_init, + .cong_control = (void *)write_sk_pacing_cong_control, + .ssthresh = (void *)write_sk_pacing_ssthresh, + .undo_cwnd = (void *)write_sk_pacing_undo_cwnd, + .name = "bpf_w_sk_pacing", +}; diff --git a/tools/testing/selftests/bpf/progs/test_attach_probe.c b/tools/testing/selftests/bpf/progs/test_attach_probe.c index ce9acf4db8d2..a1e45fec8938 100644 --- a/tools/testing/selftests/bpf/progs/test_attach_probe.c +++ b/tools/testing/selftests/bpf/progs/test_attach_probe.c @@ -1,10 +1,10 @@ // SPDX-License-Identifier: GPL-2.0 // Copyright (c) 2017 Facebook -#include <linux/ptrace.h> -#include <linux/bpf.h> +#include "vmlinux.h" #include <bpf/bpf_helpers.h> #include <bpf/bpf_tracing.h> +#include <bpf/bpf_core_read.h> #include "bpf_misc.h" int kprobe_res = 0; @@ -17,6 +17,11 @@ int uprobe_byname_res = 0; int uretprobe_byname_res = 0; int uprobe_byname2_res = 0; int uretprobe_byname2_res = 0; +int uprobe_byname3_sleepable_res = 0; +int uprobe_byname3_res = 0; +int uretprobe_byname3_sleepable_res = 0; +int uretprobe_byname3_res = 0; +void *user_ptr = 0; SEC("kprobe") int handle_kprobe(struct pt_regs *ctx) @@ -25,13 +30,24 @@ int handle_kprobe(struct pt_regs *ctx) return 0; } -SEC("kprobe/" SYS_PREFIX "sys_nanosleep") -int BPF_KPROBE(handle_kprobe_auto) +SEC("ksyscall/nanosleep") +int BPF_KSYSCALL(handle_kprobe_auto, struct __kernel_timespec *req, struct __kernel_timespec *rem) { kprobe2_res = 11; return 0; } +/** + * This program will be manually made sleepable on the userspace side + * and should thus be unattachable. + */ +SEC("kprobe/" SYS_PREFIX "sys_nanosleep") +int handle_kprobe_sleepable(struct pt_regs *ctx) +{ + kprobe_res = 2; + return 0; +} + SEC("kretprobe") int handle_kretprobe(struct pt_regs *ctx) { @@ -39,11 +55,11 @@ int handle_kretprobe(struct pt_regs *ctx) return 0; } -SEC("kretprobe/" SYS_PREFIX "sys_nanosleep") -int BPF_KRETPROBE(handle_kretprobe_auto) +SEC("kretsyscall/nanosleep") +int BPF_KRETPROBE(handle_kretprobe_auto, int ret) { kretprobe2_res = 22; - return 0; + return ret; } SEC("uprobe") @@ -93,4 +109,47 @@ int handle_uretprobe_byname2(struct pt_regs *ctx) return 0; } +static __always_inline bool verify_sleepable_user_copy(void) +{ + char data[9]; + + bpf_copy_from_user(data, sizeof(data), user_ptr); + return bpf_strncmp(data, sizeof(data), "test_data") == 0; +} + +SEC("uprobe.s//proc/self/exe:trigger_func3") +int handle_uprobe_byname3_sleepable(struct pt_regs *ctx) +{ + if (verify_sleepable_user_copy()) + uprobe_byname3_sleepable_res = 9; + return 0; +} + +/** + * same target as the uprobe.s above to force sleepable and non-sleepable + * programs in the same bpf_prog_array + */ +SEC("uprobe//proc/self/exe:trigger_func3") +int handle_uprobe_byname3(struct pt_regs *ctx) +{ + uprobe_byname3_res = 10; + return 0; +} + +SEC("uretprobe.s//proc/self/exe:trigger_func3") +int handle_uretprobe_byname3_sleepable(struct pt_regs *ctx) +{ + if (verify_sleepable_user_copy()) + uretprobe_byname3_sleepable_res = 11; + return 0; +} + +SEC("uretprobe//proc/self/exe:trigger_func3") +int handle_uretprobe_byname3(struct pt_regs *ctx) +{ + uretprobe_byname3_res = 12; + return 0; +} + + char _license[] SEC("license") = "GPL"; diff --git a/tools/testing/selftests/bpf/progs/test_bpf_nf.c b/tools/testing/selftests/bpf/progs/test_bpf_nf.c index f00a9731930e..196cd8dfe42a 100644 --- a/tools/testing/selftests/bpf/progs/test_bpf_nf.c +++ b/tools/testing/selftests/bpf/progs/test_bpf_nf.c @@ -8,6 +8,8 @@ #define EINVAL 22 #define ENOENT 2 +extern unsigned long CONFIG_HZ __kconfig; + int test_einval_bpf_tuple = 0; int test_einval_reserved = 0; int test_einval_netns_id = 0; @@ -16,6 +18,11 @@ int test_eproto_l4proto = 0; int test_enonet_netns_id = 0; int test_enoent_lookup = 0; int test_eafnosupport = 0; +int test_alloc_entry = -EINVAL; +int test_insert_entry = -EAFNOSUPPORT; +int test_succ_lookup = -ENOENT; +u32 test_delta_timeout = 0; +u32 test_status = 0; struct nf_conn; @@ -26,31 +33,44 @@ struct bpf_ct_opts___local { u8 reserved[3]; } __attribute__((preserve_access_index)); +struct nf_conn *bpf_xdp_ct_alloc(struct xdp_md *, struct bpf_sock_tuple *, u32, + struct bpf_ct_opts___local *, u32) __ksym; struct nf_conn *bpf_xdp_ct_lookup(struct xdp_md *, struct bpf_sock_tuple *, u32, struct bpf_ct_opts___local *, u32) __ksym; +struct nf_conn *bpf_skb_ct_alloc(struct __sk_buff *, struct bpf_sock_tuple *, u32, + struct bpf_ct_opts___local *, u32) __ksym; struct nf_conn *bpf_skb_ct_lookup(struct __sk_buff *, struct bpf_sock_tuple *, u32, struct bpf_ct_opts___local *, u32) __ksym; +struct nf_conn *bpf_ct_insert_entry(struct nf_conn *) __ksym; void bpf_ct_release(struct nf_conn *) __ksym; +void bpf_ct_set_timeout(struct nf_conn *, u32) __ksym; +int bpf_ct_change_timeout(struct nf_conn *, u32) __ksym; +int bpf_ct_set_status(struct nf_conn *, u32) __ksym; +int bpf_ct_change_status(struct nf_conn *, u32) __ksym; static __always_inline void -nf_ct_test(struct nf_conn *(*func)(void *, struct bpf_sock_tuple *, u32, - struct bpf_ct_opts___local *, u32), +nf_ct_test(struct nf_conn *(*lookup_fn)(void *, struct bpf_sock_tuple *, u32, + struct bpf_ct_opts___local *, u32), + struct nf_conn *(*alloc_fn)(void *, struct bpf_sock_tuple *, u32, + struct bpf_ct_opts___local *, u32), void *ctx) { struct bpf_ct_opts___local opts_def = { .l4proto = IPPROTO_TCP, .netns_id = -1 }; struct bpf_sock_tuple bpf_tuple; struct nf_conn *ct; + int err; __builtin_memset(&bpf_tuple, 0, sizeof(bpf_tuple.ipv4)); - ct = func(ctx, NULL, 0, &opts_def, sizeof(opts_def)); + ct = lookup_fn(ctx, NULL, 0, &opts_def, sizeof(opts_def)); if (ct) bpf_ct_release(ct); else test_einval_bpf_tuple = opts_def.error; opts_def.reserved[0] = 1; - ct = func(ctx, &bpf_tuple, sizeof(bpf_tuple.ipv4), &opts_def, sizeof(opts_def)); + ct = lookup_fn(ctx, &bpf_tuple, sizeof(bpf_tuple.ipv4), &opts_def, + sizeof(opts_def)); opts_def.reserved[0] = 0; opts_def.l4proto = IPPROTO_TCP; if (ct) @@ -59,21 +79,24 @@ nf_ct_test(struct nf_conn *(*func)(void *, struct bpf_sock_tuple *, u32, test_einval_reserved = opts_def.error; opts_def.netns_id = -2; - ct = func(ctx, &bpf_tuple, sizeof(bpf_tuple.ipv4), &opts_def, sizeof(opts_def)); + ct = lookup_fn(ctx, &bpf_tuple, sizeof(bpf_tuple.ipv4), &opts_def, + sizeof(opts_def)); opts_def.netns_id = -1; if (ct) bpf_ct_release(ct); else test_einval_netns_id = opts_def.error; - ct = func(ctx, &bpf_tuple, sizeof(bpf_tuple.ipv4), &opts_def, sizeof(opts_def) - 1); + ct = lookup_fn(ctx, &bpf_tuple, sizeof(bpf_tuple.ipv4), &opts_def, + sizeof(opts_def) - 1); if (ct) bpf_ct_release(ct); else test_einval_len_opts = opts_def.error; opts_def.l4proto = IPPROTO_ICMP; - ct = func(ctx, &bpf_tuple, sizeof(bpf_tuple.ipv4), &opts_def, sizeof(opts_def)); + ct = lookup_fn(ctx, &bpf_tuple, sizeof(bpf_tuple.ipv4), &opts_def, + sizeof(opts_def)); opts_def.l4proto = IPPROTO_TCP; if (ct) bpf_ct_release(ct); @@ -81,37 +104,75 @@ nf_ct_test(struct nf_conn *(*func)(void *, struct bpf_sock_tuple *, u32, test_eproto_l4proto = opts_def.error; opts_def.netns_id = 0xf00f; - ct = func(ctx, &bpf_tuple, sizeof(bpf_tuple.ipv4), &opts_def, sizeof(opts_def)); + ct = lookup_fn(ctx, &bpf_tuple, sizeof(bpf_tuple.ipv4), &opts_def, + sizeof(opts_def)); opts_def.netns_id = -1; if (ct) bpf_ct_release(ct); else test_enonet_netns_id = opts_def.error; - ct = func(ctx, &bpf_tuple, sizeof(bpf_tuple.ipv4), &opts_def, sizeof(opts_def)); + ct = lookup_fn(ctx, &bpf_tuple, sizeof(bpf_tuple.ipv4), &opts_def, + sizeof(opts_def)); if (ct) bpf_ct_release(ct); else test_enoent_lookup = opts_def.error; - ct = func(ctx, &bpf_tuple, sizeof(bpf_tuple.ipv4) - 1, &opts_def, sizeof(opts_def)); + ct = lookup_fn(ctx, &bpf_tuple, sizeof(bpf_tuple.ipv4) - 1, &opts_def, + sizeof(opts_def)); if (ct) bpf_ct_release(ct); else test_eafnosupport = opts_def.error; + + bpf_tuple.ipv4.saddr = bpf_get_prandom_u32(); /* src IP */ + bpf_tuple.ipv4.daddr = bpf_get_prandom_u32(); /* dst IP */ + bpf_tuple.ipv4.sport = bpf_get_prandom_u32(); /* src port */ + bpf_tuple.ipv4.dport = bpf_get_prandom_u32(); /* dst port */ + + ct = alloc_fn(ctx, &bpf_tuple, sizeof(bpf_tuple.ipv4), &opts_def, + sizeof(opts_def)); + if (ct) { + struct nf_conn *ct_ins; + + bpf_ct_set_timeout(ct, 10000); + bpf_ct_set_status(ct, IPS_CONFIRMED); + + ct_ins = bpf_ct_insert_entry(ct); + if (ct_ins) { + struct nf_conn *ct_lk; + + ct_lk = lookup_fn(ctx, &bpf_tuple, sizeof(bpf_tuple.ipv4), + &opts_def, sizeof(opts_def)); + if (ct_lk) { + /* update ct entry timeout */ + bpf_ct_change_timeout(ct_lk, 10000); + test_delta_timeout = ct_lk->timeout - bpf_jiffies64(); + test_delta_timeout /= CONFIG_HZ; + test_status = IPS_SEEN_REPLY; + bpf_ct_change_status(ct_lk, IPS_SEEN_REPLY); + bpf_ct_release(ct_lk); + test_succ_lookup = 0; + } + bpf_ct_release(ct_ins); + test_insert_entry = 0; + } + test_alloc_entry = 0; + } } SEC("xdp") int nf_xdp_ct_test(struct xdp_md *ctx) { - nf_ct_test((void *)bpf_xdp_ct_lookup, ctx); + nf_ct_test((void *)bpf_xdp_ct_lookup, (void *)bpf_xdp_ct_alloc, ctx); return 0; } SEC("tc") int nf_skb_ct_test(struct __sk_buff *ctx) { - nf_ct_test((void *)bpf_skb_ct_lookup, ctx); + nf_ct_test((void *)bpf_skb_ct_lookup, (void *)bpf_skb_ct_alloc, ctx); return 0; } diff --git a/tools/testing/selftests/bpf/progs/test_bpf_nf_fail.c b/tools/testing/selftests/bpf/progs/test_bpf_nf_fail.c new file mode 100644 index 000000000000..bf79af15c808 --- /dev/null +++ b/tools/testing/selftests/bpf/progs/test_bpf_nf_fail.c @@ -0,0 +1,134 @@ +// SPDX-License-Identifier: GPL-2.0 +#include <vmlinux.h> +#include <bpf/bpf_tracing.h> +#include <bpf/bpf_helpers.h> +#include <bpf/bpf_core_read.h> + +struct nf_conn; + +struct bpf_ct_opts___local { + s32 netns_id; + s32 error; + u8 l4proto; + u8 reserved[3]; +} __attribute__((preserve_access_index)); + +struct nf_conn *bpf_skb_ct_alloc(struct __sk_buff *, struct bpf_sock_tuple *, u32, + struct bpf_ct_opts___local *, u32) __ksym; +struct nf_conn *bpf_skb_ct_lookup(struct __sk_buff *, struct bpf_sock_tuple *, u32, + struct bpf_ct_opts___local *, u32) __ksym; +struct nf_conn *bpf_ct_insert_entry(struct nf_conn *) __ksym; +void bpf_ct_release(struct nf_conn *) __ksym; +void bpf_ct_set_timeout(struct nf_conn *, u32) __ksym; +int bpf_ct_change_timeout(struct nf_conn *, u32) __ksym; +int bpf_ct_set_status(struct nf_conn *, u32) __ksym; +int bpf_ct_change_status(struct nf_conn *, u32) __ksym; + +SEC("?tc") +int alloc_release(struct __sk_buff *ctx) +{ + struct bpf_ct_opts___local opts = {}; + struct bpf_sock_tuple tup = {}; + struct nf_conn *ct; + + ct = bpf_skb_ct_alloc(ctx, &tup, sizeof(tup.ipv4), &opts, sizeof(opts)); + if (!ct) + return 0; + bpf_ct_release(ct); + return 0; +} + +SEC("?tc") +int insert_insert(struct __sk_buff *ctx) +{ + struct bpf_ct_opts___local opts = {}; + struct bpf_sock_tuple tup = {}; + struct nf_conn *ct; + + ct = bpf_skb_ct_alloc(ctx, &tup, sizeof(tup.ipv4), &opts, sizeof(opts)); + if (!ct) + return 0; + ct = bpf_ct_insert_entry(ct); + if (!ct) + return 0; + ct = bpf_ct_insert_entry(ct); + return 0; +} + +SEC("?tc") +int lookup_insert(struct __sk_buff *ctx) +{ + struct bpf_ct_opts___local opts = {}; + struct bpf_sock_tuple tup = {}; + struct nf_conn *ct; + + ct = bpf_skb_ct_lookup(ctx, &tup, sizeof(tup.ipv4), &opts, sizeof(opts)); + if (!ct) + return 0; + bpf_ct_insert_entry(ct); + return 0; +} + +SEC("?tc") +int set_timeout_after_insert(struct __sk_buff *ctx) +{ + struct bpf_ct_opts___local opts = {}; + struct bpf_sock_tuple tup = {}; + struct nf_conn *ct; + + ct = bpf_skb_ct_alloc(ctx, &tup, sizeof(tup.ipv4), &opts, sizeof(opts)); + if (!ct) + return 0; + ct = bpf_ct_insert_entry(ct); + if (!ct) + return 0; + bpf_ct_set_timeout(ct, 0); + return 0; +} + +SEC("?tc") +int set_status_after_insert(struct __sk_buff *ctx) +{ + struct bpf_ct_opts___local opts = {}; + struct bpf_sock_tuple tup = {}; + struct nf_conn *ct; + + ct = bpf_skb_ct_alloc(ctx, &tup, sizeof(tup.ipv4), &opts, sizeof(opts)); + if (!ct) + return 0; + ct = bpf_ct_insert_entry(ct); + if (!ct) + return 0; + bpf_ct_set_status(ct, 0); + return 0; +} + +SEC("?tc") +int change_timeout_after_alloc(struct __sk_buff *ctx) +{ + struct bpf_ct_opts___local opts = {}; + struct bpf_sock_tuple tup = {}; + struct nf_conn *ct; + + ct = bpf_skb_ct_alloc(ctx, &tup, sizeof(tup.ipv4), &opts, sizeof(opts)); + if (!ct) + return 0; + bpf_ct_change_timeout(ct, 0); + return 0; +} + +SEC("?tc") +int change_status_after_alloc(struct __sk_buff *ctx) +{ + struct bpf_ct_opts___local opts = {}; + struct bpf_sock_tuple tup = {}; + struct nf_conn *ct; + + ct = bpf_skb_ct_alloc(ctx, &tup, sizeof(tup.ipv4), &opts, sizeof(opts)); + if (!ct) + return 0; + bpf_ct_change_status(ct, 0); + return 0; +} + +char _license[] SEC("license") = "GPL"; diff --git a/tools/testing/selftests/bpf/progs/test_btf_haskv.c b/tools/testing/selftests/bpf/progs/test_btf_haskv.c deleted file mode 100644 index 07c94df13660..000000000000 --- a/tools/testing/selftests/bpf/progs/test_btf_haskv.c +++ /dev/null @@ -1,51 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -/* Copyright (c) 2018 Facebook */ -#include <linux/bpf.h> -#include <bpf/bpf_helpers.h> -#include "bpf_legacy.h" - -struct ipv_counts { - unsigned int v4; - unsigned int v6; -}; - -#pragma GCC diagnostic push -#pragma GCC diagnostic ignored "-Wdeprecated-declarations" -struct bpf_map_def SEC("maps") btf_map = { - .type = BPF_MAP_TYPE_ARRAY, - .key_size = sizeof(int), - .value_size = sizeof(struct ipv_counts), - .max_entries = 4, -}; -#pragma GCC diagnostic pop - -BPF_ANNOTATE_KV_PAIR(btf_map, int, struct ipv_counts); - -__attribute__((noinline)) -int test_long_fname_2(void) -{ - struct ipv_counts *counts; - int key = 0; - - counts = bpf_map_lookup_elem(&btf_map, &key); - if (!counts) - return 0; - - counts->v6++; - - return 0; -} - -__attribute__((noinline)) -int test_long_fname_1(void) -{ - return test_long_fname_2(); -} - -SEC("dummy_tracepoint") -int _dummy_tracepoint(void *arg) -{ - return test_long_fname_1(); -} - -char _license[] SEC("license") = "GPL"; diff --git a/tools/testing/selftests/bpf/progs/test_btf_newkv.c b/tools/testing/selftests/bpf/progs/test_btf_newkv.c index 762671a2e90c..251854a041b5 100644 --- a/tools/testing/selftests/bpf/progs/test_btf_newkv.c +++ b/tools/testing/selftests/bpf/progs/test_btf_newkv.c @@ -9,19 +9,6 @@ struct ipv_counts { unsigned int v6; }; -#pragma GCC diagnostic push -#pragma GCC diagnostic ignored "-Wdeprecated-declarations" -/* just to validate we can handle maps in multiple sections */ -struct bpf_map_def SEC("maps") btf_map_legacy = { - .type = BPF_MAP_TYPE_ARRAY, - .key_size = sizeof(int), - .value_size = sizeof(long long), - .max_entries = 4, -}; -#pragma GCC diagnostic pop - -BPF_ANNOTATE_KV_PAIR(btf_map_legacy, int, struct ipv_counts); - struct { __uint(type, BPF_MAP_TYPE_ARRAY); __uint(max_entries, 4); @@ -41,11 +28,6 @@ int test_long_fname_2(void) counts->v6++; - /* just verify we can reference both maps */ - counts = bpf_map_lookup_elem(&btf_map_legacy, &key); - if (!counts) - return 0; - return 0; } diff --git a/tools/testing/selftests/bpf/progs/test_core_extern.c b/tools/testing/selftests/bpf/progs/test_core_extern.c index 3ac3603ad53d..a3c7c1042f35 100644 --- a/tools/testing/selftests/bpf/progs/test_core_extern.c +++ b/tools/testing/selftests/bpf/progs/test_core_extern.c @@ -11,6 +11,7 @@ static int (*bpf_missing_helper)(const void *arg1, int arg2) = (void *) 999; extern int LINUX_KERNEL_VERSION __kconfig; +extern int LINUX_UNKNOWN_VIRTUAL_EXTERN __kconfig __weak; extern bool CONFIG_BPF_SYSCALL __kconfig; /* strong */ extern enum libbpf_tristate CONFIG_TRISTATE __kconfig __weak; extern bool CONFIG_BOOL __kconfig __weak; @@ -22,6 +23,7 @@ extern const char CONFIG_STR[8] __kconfig __weak; extern uint64_t CONFIG_MISSING __kconfig __weak; uint64_t kern_ver = -1; +uint64_t unkn_virt_val = -1; uint64_t bpf_syscall = -1; uint64_t tristate_val = -1; uint64_t bool_val = -1; @@ -38,6 +40,7 @@ int handle_sys_enter(struct pt_regs *ctx) int i; kern_ver = LINUX_KERNEL_VERSION; + unkn_virt_val = LINUX_UNKNOWN_VIRTUAL_EXTERN; bpf_syscall = CONFIG_BPF_SYSCALL; tristate_val = CONFIG_TRISTATE; bool_val = CONFIG_BOOL; diff --git a/tools/testing/selftests/bpf/progs/test_core_reloc_enum64val.c b/tools/testing/selftests/bpf/progs/test_core_reloc_enum64val.c new file mode 100644 index 000000000000..63147fbfae6e --- /dev/null +++ b/tools/testing/selftests/bpf/progs/test_core_reloc_enum64val.c @@ -0,0 +1,70 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2022 Meta Platforms, Inc. and affiliates. */ + +#include <linux/bpf.h> +#include <stdint.h> +#include <stdbool.h> +#include <bpf/bpf_helpers.h> +#include <bpf/bpf_core_read.h> + +char _license[] SEC("license") = "GPL"; + +struct { + char in[256]; + char out[256]; + bool skip; +} data = {}; + +enum named_unsigned_enum64 { + UNSIGNED_ENUM64_VAL1 = 0x1ffffffffULL, + UNSIGNED_ENUM64_VAL2 = 0x2ffffffffULL, + UNSIGNED_ENUM64_VAL3 = 0x3ffffffffULL, +}; + +enum named_signed_enum64 { + SIGNED_ENUM64_VAL1 = 0x1ffffffffLL, + SIGNED_ENUM64_VAL2 = -2, + SIGNED_ENUM64_VAL3 = 0x3ffffffffLL, +}; + +struct core_reloc_enum64val_output { + bool unsigned_val1_exists; + bool unsigned_val2_exists; + bool unsigned_val3_exists; + bool signed_val1_exists; + bool signed_val2_exists; + bool signed_val3_exists; + + long unsigned_val1; + long unsigned_val2; + long signed_val1; + long signed_val2; +}; + +SEC("raw_tracepoint/sys_enter") +int test_core_enum64val(void *ctx) +{ +#if __clang_major__ >= 15 + struct core_reloc_enum64val_output *out = (void *)&data.out; + enum named_unsigned_enum64 named_unsigned = 0; + enum named_signed_enum64 named_signed = 0; + + out->unsigned_val1_exists = bpf_core_enum_value_exists(named_unsigned, UNSIGNED_ENUM64_VAL1); + out->unsigned_val2_exists = bpf_core_enum_value_exists(enum named_unsigned_enum64, UNSIGNED_ENUM64_VAL2); + out->unsigned_val3_exists = bpf_core_enum_value_exists(enum named_unsigned_enum64, UNSIGNED_ENUM64_VAL3); + out->signed_val1_exists = bpf_core_enum_value_exists(named_signed, SIGNED_ENUM64_VAL1); + out->signed_val2_exists = bpf_core_enum_value_exists(enum named_signed_enum64, SIGNED_ENUM64_VAL2); + out->signed_val3_exists = bpf_core_enum_value_exists(enum named_signed_enum64, SIGNED_ENUM64_VAL3); + + out->unsigned_val1 = bpf_core_enum_value(named_unsigned, UNSIGNED_ENUM64_VAL1); + out->unsigned_val2 = bpf_core_enum_value(named_unsigned, UNSIGNED_ENUM64_VAL2); + out->signed_val1 = bpf_core_enum_value(named_signed, SIGNED_ENUM64_VAL1); + out->signed_val2 = bpf_core_enum_value(named_signed, SIGNED_ENUM64_VAL2); + /* NAMED_ENUM64_VAL3 value is optional */ + +#else + data.skip = true; +#endif + + return 0; +} diff --git a/tools/testing/selftests/bpf/progs/test_core_reloc_kernel.c b/tools/testing/selftests/bpf/progs/test_core_reloc_kernel.c index 145028b52ad8..a17dd83eae67 100644 --- a/tools/testing/selftests/bpf/progs/test_core_reloc_kernel.c +++ b/tools/testing/selftests/bpf/progs/test_core_reloc_kernel.c @@ -21,6 +21,7 @@ struct core_reloc_kernel_output { /* we have test_progs[-flavor], so cut flavor part */ char comm[sizeof("test_progs")]; int comm_len; + bool local_task_struct_matches; }; struct task_struct { @@ -30,11 +31,25 @@ struct task_struct { struct task_struct *group_leader; }; +struct mm_struct___wrong { + int abc_whatever_should_not_exist; +}; + +struct task_struct___local { + int pid; + struct mm_struct___wrong *mm; +}; + #define CORE_READ(dst, src) bpf_core_read(dst, sizeof(*(dst)), src) SEC("raw_tracepoint/sys_enter") int test_core_kernel(void *ctx) { + /* Support for the BPF_TYPE_MATCHES argument to the + * __builtin_preserve_type_info builtin was added at some point during + * development of clang 15 and it's what we require for this test. + */ +#if __has_builtin(__builtin_preserve_type_info) && __clang_major__ >= 15 struct task_struct *task = (void *)bpf_get_current_task(); struct core_reloc_kernel_output *out = (void *)&data.out; uint64_t pid_tgid = bpf_get_current_pid_tgid(); @@ -93,6 +108,10 @@ int test_core_kernel(void *ctx) group_leader, group_leader, group_leader, group_leader, comm); + out->local_task_struct_matches = bpf_core_type_matches(struct task_struct___local); +#else + data.skip = true; +#endif return 0; } diff --git a/tools/testing/selftests/bpf/progs/test_core_reloc_type_based.c b/tools/testing/selftests/bpf/progs/test_core_reloc_type_based.c index fb60f8195c53..2edb4df35e6e 100644 --- a/tools/testing/selftests/bpf/progs/test_core_reloc_type_based.c +++ b/tools/testing/selftests/bpf/progs/test_core_reloc_type_based.c @@ -19,6 +19,14 @@ struct a_struct { int x; }; +struct a_complex_struct { + union { + struct a_struct *a; + void *b; + } x; + volatile long y; +}; + union a_union { int y; int z; @@ -43,6 +51,7 @@ typedef int int_typedef; typedef enum { TYPEDEF_ENUM_VAL1, TYPEDEF_ENUM_VAL2 } enum_typedef; typedef void *void_ptr_typedef; +typedef int *restrict restrict_ptr_typedef; typedef int (*func_proto_typedef)(long); @@ -50,6 +59,7 @@ typedef char arr_typedef[20]; struct core_reloc_type_based_output { bool struct_exists; + bool complex_struct_exists; bool union_exists; bool enum_exists; bool typedef_named_struct_exists; @@ -58,9 +68,24 @@ struct core_reloc_type_based_output { bool typedef_int_exists; bool typedef_enum_exists; bool typedef_void_ptr_exists; + bool typedef_restrict_ptr_exists; bool typedef_func_proto_exists; bool typedef_arr_exists; + bool struct_matches; + bool complex_struct_matches; + bool union_matches; + bool enum_matches; + bool typedef_named_struct_matches; + bool typedef_anon_struct_matches; + bool typedef_struct_ptr_matches; + bool typedef_int_matches; + bool typedef_enum_matches; + bool typedef_void_ptr_matches; + bool typedef_restrict_ptr_matches; + bool typedef_func_proto_matches; + bool typedef_arr_matches; + int struct_sz; int union_sz; int enum_sz; @@ -77,10 +102,17 @@ struct core_reloc_type_based_output { SEC("raw_tracepoint/sys_enter") int test_core_type_based(void *ctx) { -#if __has_builtin(__builtin_preserve_type_info) + /* Support for the BPF_TYPE_MATCHES argument to the + * __builtin_preserve_type_info builtin was added at some point during + * development of clang 15 and it's what we require for this test. Part of it + * could run with merely __builtin_preserve_type_info (which could be checked + * separately), but we have to find an upper bound. + */ +#if __has_builtin(__builtin_preserve_type_info) && __clang_major__ >= 15 struct core_reloc_type_based_output *out = (void *)&data.out; out->struct_exists = bpf_core_type_exists(struct a_struct); + out->complex_struct_exists = bpf_core_type_exists(struct a_complex_struct); out->union_exists = bpf_core_type_exists(union a_union); out->enum_exists = bpf_core_type_exists(enum an_enum); out->typedef_named_struct_exists = bpf_core_type_exists(named_struct_typedef); @@ -89,9 +121,24 @@ int test_core_type_based(void *ctx) out->typedef_int_exists = bpf_core_type_exists(int_typedef); out->typedef_enum_exists = bpf_core_type_exists(enum_typedef); out->typedef_void_ptr_exists = bpf_core_type_exists(void_ptr_typedef); + out->typedef_restrict_ptr_exists = bpf_core_type_exists(restrict_ptr_typedef); out->typedef_func_proto_exists = bpf_core_type_exists(func_proto_typedef); out->typedef_arr_exists = bpf_core_type_exists(arr_typedef); + out->struct_matches = bpf_core_type_matches(struct a_struct); + out->complex_struct_matches = bpf_core_type_matches(struct a_complex_struct); + out->union_matches = bpf_core_type_matches(union a_union); + out->enum_matches = bpf_core_type_matches(enum an_enum); + out->typedef_named_struct_matches = bpf_core_type_matches(named_struct_typedef); + out->typedef_anon_struct_matches = bpf_core_type_matches(anon_struct_typedef); + out->typedef_struct_ptr_matches = bpf_core_type_matches(struct_ptr_typedef); + out->typedef_int_matches = bpf_core_type_matches(int_typedef); + out->typedef_enum_matches = bpf_core_type_matches(enum_typedef); + out->typedef_void_ptr_matches = bpf_core_type_matches(void_ptr_typedef); + out->typedef_restrict_ptr_matches = bpf_core_type_matches(restrict_ptr_typedef); + out->typedef_func_proto_matches = bpf_core_type_matches(func_proto_typedef); + out->typedef_arr_matches = bpf_core_type_matches(arr_typedef); + out->struct_sz = bpf_core_type_size(struct a_struct); out->union_sz = bpf_core_type_size(union a_union); out->enum_sz = bpf_core_type_size(enum an_enum); diff --git a/tools/testing/selftests/bpf/progs/test_probe_user.c b/tools/testing/selftests/bpf/progs/test_probe_user.c index 702578a5e496..a8e501af9604 100644 --- a/tools/testing/selftests/bpf/progs/test_probe_user.c +++ b/tools/testing/selftests/bpf/progs/test_probe_user.c @@ -1,37 +1,47 @@ // SPDX-License-Identifier: GPL-2.0 - -#include <linux/ptrace.h> -#include <linux/bpf.h> - -#include <netinet/in.h> - +#include "vmlinux.h" #include <bpf/bpf_helpers.h> #include <bpf/bpf_tracing.h> +#include <bpf/bpf_core_read.h> #include "bpf_misc.h" static struct sockaddr_in old; -SEC("kprobe/" SYS_PREFIX "sys_connect") -int BPF_KPROBE(handle_sys_connect) +static int handle_sys_connect_common(struct sockaddr_in *uservaddr) { -#if SYSCALL_WRAPPER == 1 - struct pt_regs *real_regs; -#endif struct sockaddr_in new; - void *ptr; -#if SYSCALL_WRAPPER == 0 - ptr = (void *)PT_REGS_PARM2(ctx); -#else - real_regs = (struct pt_regs *)PT_REGS_PARM1(ctx); - bpf_probe_read_kernel(&ptr, sizeof(ptr), &PT_REGS_PARM2(real_regs)); + bpf_probe_read_user(&old, sizeof(old), uservaddr); + __builtin_memset(&new, 0xab, sizeof(new)); + bpf_probe_write_user(uservaddr, &new, sizeof(new)); + + return 0; +} + +SEC("ksyscall/connect") +int BPF_KSYSCALL(handle_sys_connect, int fd, struct sockaddr_in *uservaddr, + int addrlen) +{ + return handle_sys_connect_common(uservaddr); +} + +#if defined(bpf_target_s390) +#ifndef SYS_CONNECT +#define SYS_CONNECT 3 #endif - bpf_probe_read_user(&old, sizeof(old), ptr); - __builtin_memset(&new, 0xab, sizeof(new)); - bpf_probe_write_user(ptr, &new, sizeof(new)); +SEC("ksyscall/socketcall") +int BPF_KSYSCALL(handle_sys_socketcall, int call, unsigned long *args) +{ + if (call == SYS_CONNECT) { + struct sockaddr_in *uservaddr; + + bpf_probe_read_user(&uservaddr, sizeof(uservaddr), &args[1]); + return handle_sys_connect_common(uservaddr); + } return 0; } +#endif char _license[] SEC("license") = "GPL"; diff --git a/tools/testing/selftests/bpf/progs/test_skeleton.c b/tools/testing/selftests/bpf/progs/test_skeleton.c index 1b1187d2967b..1a4e93f6d9df 100644 --- a/tools/testing/selftests/bpf/progs/test_skeleton.c +++ b/tools/testing/selftests/bpf/progs/test_skeleton.c @@ -51,6 +51,8 @@ int out_dynarr[4] SEC(".data.dyn") = { 1, 2, 3, 4 }; int read_mostly_var __read_mostly; int out_mostly_var; +char huge_arr[16 * 1024 * 1024]; + SEC("raw_tp/sys_enter") int handler(const void *ctx) { @@ -71,6 +73,8 @@ int handler(const void *ctx) out_mostly_var = read_mostly_var; + huge_arr[sizeof(huge_arr) - 1] = 123; + return 0; } diff --git a/tools/testing/selftests/bpf/progs/test_tc_dtime.c b/tools/testing/selftests/bpf/progs/test_tc_dtime.c index 06f300d06dbd..b596479a9ebe 100644 --- a/tools/testing/selftests/bpf/progs/test_tc_dtime.c +++ b/tools/testing/selftests/bpf/progs/test_tc_dtime.c @@ -11,6 +11,8 @@ #include <linux/in.h> #include <linux/ip.h> #include <linux/ipv6.h> +#include <linux/tcp.h> +#include <linux/udp.h> #include <bpf/bpf_helpers.h> #include <bpf/bpf_endian.h> #include <sys/socket.h> @@ -115,6 +117,19 @@ static bool bpf_fwd(void) return test < TCP_IP4_RT_FWD; } +static __u8 get_proto(void) +{ + switch (test) { + case UDP_IP4: + case UDP_IP6: + case UDP_IP4_RT_FWD: + case UDP_IP6_RT_FWD: + return IPPROTO_UDP; + default: + return IPPROTO_TCP; + } +} + /* -1: parse error: TC_ACT_SHOT * 0: not testing traffic: TC_ACT_OK * >0: first byte is the inet_proto, second byte has the netns @@ -122,11 +137,16 @@ static bool bpf_fwd(void) */ static int skb_get_type(struct __sk_buff *skb) { + __u16 dst_ns_port = __bpf_htons(50000 + test); void *data_end = ctx_ptr(skb->data_end); void *data = ctx_ptr(skb->data); __u8 inet_proto = 0, ns = 0; struct ipv6hdr *ip6h; + __u16 sport, dport; struct iphdr *iph; + struct tcphdr *th; + struct udphdr *uh; + void *trans; switch (skb->protocol) { case __bpf_htons(ETH_P_IP): @@ -138,6 +158,7 @@ static int skb_get_type(struct __sk_buff *skb) else if (iph->saddr == ip4_dst) ns = DST_NS; inet_proto = iph->protocol; + trans = iph + 1; break; case __bpf_htons(ETH_P_IPV6): ip6h = data + sizeof(struct ethhdr); @@ -148,15 +169,43 @@ static int skb_get_type(struct __sk_buff *skb) else if (v6_equal(ip6h->saddr, (struct in6_addr)ip6_dst)) ns = DST_NS; inet_proto = ip6h->nexthdr; + trans = ip6h + 1; break; default: return 0; } - if ((inet_proto != IPPROTO_TCP && inet_proto != IPPROTO_UDP) || !ns) + /* skb is not from src_ns or dst_ns. + * skb is not the testing IPPROTO. + */ + if (!ns || inet_proto != get_proto()) return 0; - return (ns << 8 | inet_proto); + switch (inet_proto) { + case IPPROTO_TCP: + th = trans; + if (th + 1 > data_end) + return -1; + sport = th->source; + dport = th->dest; + break; + case IPPROTO_UDP: + uh = trans; + if (uh + 1 > data_end) + return -1; + sport = uh->source; + dport = uh->dest; + break; + default: + return 0; + } + + /* The skb is the testing traffic */ + if ((ns == SRC_NS && dport == dst_ns_port) || + (ns == DST_NS && sport == dst_ns_port)) + return (ns << 8 | inet_proto); + + return 0; } /* format: direction@iface@netns diff --git a/tools/testing/selftests/bpf/progs/test_tunnel_kern.c b/tools/testing/selftests/bpf/progs/test_tunnel_kern.c index 17f2f325b3f3..df0673c4ecbe 100644 --- a/tools/testing/selftests/bpf/progs/test_tunnel_kern.c +++ b/tools/testing/selftests/bpf/progs/test_tunnel_kern.c @@ -14,15 +14,24 @@ #include <linux/if_packet.h> #include <linux/ip.h> #include <linux/ipv6.h> +#include <linux/icmp.h> #include <linux/types.h> #include <linux/socket.h> #include <linux/pkt_cls.h> #include <linux/erspan.h> +#include <linux/udp.h> #include <bpf/bpf_helpers.h> #include <bpf/bpf_endian.h> #define log_err(__ret) bpf_printk("ERROR line:%d ret:%d\n", __LINE__, __ret) +#define VXLAN_UDP_PORT 4789 + +/* Only IPv4 address assigned to veth1. + * 172.16.1.200 + */ +#define ASSIGNED_ADDR_VETH1 0xac1001c8 + struct geneve_opt { __be16 opt_class; __u8 type; @@ -33,6 +42,11 @@ struct geneve_opt { __u8 opt_data[8]; /* hard-coded to 8 byte */ }; +struct vxlanhdr { + __be32 vx_flags; + __be32 vx_vni; +} __attribute__((packed)); + struct vxlan_metadata { __u32 gbp; }; @@ -369,14 +383,8 @@ int vxlan_get_tunnel_src(struct __sk_buff *skb) int ret; struct bpf_tunnel_key key; struct vxlan_metadata md; + __u32 orig_daddr; __u32 index = 0; - __u32 *local_ip = NULL; - - local_ip = bpf_map_lookup_elem(&local_ip_map, &index); - if (!local_ip) { - log_err(ret); - return TC_ACT_SHOT; - } ret = bpf_skb_get_tunnel_key(skb, &key, sizeof(key), 0); if (ret < 0) { @@ -390,11 +398,10 @@ int vxlan_get_tunnel_src(struct __sk_buff *skb) return TC_ACT_SHOT; } - if (key.local_ipv4 != *local_ip || md.gbp != 0x800FF) { + if (key.local_ipv4 != ASSIGNED_ADDR_VETH1 || md.gbp != 0x800FF) { bpf_printk("vxlan key %d local ip 0x%x remote ip 0x%x gbp 0x%x\n", key.tunnel_id, key.local_ipv4, key.remote_ipv4, md.gbp); - bpf_printk("local_ip 0x%x\n", *local_ip); log_err(ret); return TC_ACT_SHOT; } @@ -403,6 +410,61 @@ int vxlan_get_tunnel_src(struct __sk_buff *skb) } SEC("tc") +int veth_set_outer_dst(struct __sk_buff *skb) +{ + struct ethhdr *eth = (struct ethhdr *)(long)skb->data; + __u32 assigned_ip = bpf_htonl(ASSIGNED_ADDR_VETH1); + void *data_end = (void *)(long)skb->data_end; + struct udphdr *udph; + struct iphdr *iph; + __u32 index = 0; + int ret = 0; + int shrink; + __s64 csum; + + if ((void *)eth + sizeof(*eth) > data_end) { + log_err(ret); + return TC_ACT_SHOT; + } + + if (eth->h_proto != bpf_htons(ETH_P_IP)) + return TC_ACT_OK; + + iph = (struct iphdr *)(eth + 1); + if ((void *)iph + sizeof(*iph) > data_end) { + log_err(ret); + return TC_ACT_SHOT; + } + if (iph->protocol != IPPROTO_UDP) + return TC_ACT_OK; + + udph = (struct udphdr *)(iph + 1); + if ((void *)udph + sizeof(*udph) > data_end) { + log_err(ret); + return TC_ACT_SHOT; + } + if (udph->dest != bpf_htons(VXLAN_UDP_PORT)) + return TC_ACT_OK; + + if (iph->daddr != assigned_ip) { + csum = bpf_csum_diff(&iph->daddr, sizeof(__u32), &assigned_ip, + sizeof(__u32), 0); + if (bpf_skb_store_bytes(skb, ETH_HLEN + offsetof(struct iphdr, daddr), + &assigned_ip, sizeof(__u32), 0) < 0) { + log_err(ret); + return TC_ACT_SHOT; + } + if (bpf_l3_csum_replace(skb, ETH_HLEN + offsetof(struct iphdr, check), + 0, csum, 0) < 0) { + log_err(ret); + return TC_ACT_SHOT; + } + bpf_skb_change_type(skb, PACKET_HOST); + } + return TC_ACT_OK; +} + +SEC("tc") int ip6vxlan_set_tunnel_dst(struct __sk_buff *skb) { struct bpf_tunnel_key key; diff --git a/tools/testing/selftests/bpf/progs/test_varlen.c b/tools/testing/selftests/bpf/progs/test_varlen.c index 913acdffd90f..3987ff174f1f 100644 --- a/tools/testing/selftests/bpf/progs/test_varlen.c +++ b/tools/testing/selftests/bpf/progs/test_varlen.c @@ -41,20 +41,20 @@ int handler64_unsigned(void *regs) { int pid = bpf_get_current_pid_tgid() >> 32; void *payload = payload1; - u64 len; + long len; /* ignore irrelevant invocations */ if (test_pid != pid || !capture) return 0; len = bpf_probe_read_kernel_str(payload, MAX_LEN, &buf_in1[0]); - if (len <= MAX_LEN) { + if (len >= 0) { payload += len; payload1_len1 = len; } len = bpf_probe_read_kernel_str(payload, MAX_LEN, &buf_in2[0]); - if (len <= MAX_LEN) { + if (len >= 0) { payload += len; payload1_len2 = len; } @@ -123,7 +123,7 @@ int handler32_signed(void *regs) { int pid = bpf_get_current_pid_tgid() >> 32; void *payload = payload4; - int len; + long len; /* ignore irrelevant invocations */ if (test_pid != pid || !capture) diff --git a/tools/testing/selftests/bpf/progs/test_xdp_noinline.c b/tools/testing/selftests/bpf/progs/test_xdp_noinline.c index 125d872d7981..ba48fcb98ab2 100644 --- a/tools/testing/selftests/bpf/progs/test_xdp_noinline.c +++ b/tools/testing/selftests/bpf/progs/test_xdp_noinline.c @@ -239,7 +239,7 @@ bool parse_udp(void *data, void *data_end, udp = data + off; if (udp + 1 > data_end) - return 0; + return false; if (!is_icmp) { pckt->flow.port16[0] = udp->source; pckt->flow.port16[1] = udp->dest; @@ -247,7 +247,7 @@ bool parse_udp(void *data, void *data_end, pckt->flow.port16[0] = udp->dest; pckt->flow.port16[1] = udp->source; } - return 1; + return true; } static __attribute__ ((noinline)) @@ -261,7 +261,7 @@ bool parse_tcp(void *data, void *data_end, tcp = data + off; if (tcp + 1 > data_end) - return 0; + return false; if (tcp->syn) pckt->flags |= (1 << 1); if (!is_icmp) { @@ -271,7 +271,7 @@ bool parse_tcp(void *data, void *data_end, pckt->flow.port16[0] = tcp->dest; pckt->flow.port16[1] = tcp->source; } - return 1; + return true; } static __attribute__ ((noinline)) @@ -287,7 +287,7 @@ bool encap_v6(struct xdp_md *xdp, struct ctl_value *cval, void *data; if (bpf_xdp_adjust_head(xdp, 0 - (int)sizeof(struct ipv6hdr))) - return 0; + return false; data = (void *)(long)xdp->data; data_end = (void *)(long)xdp->data_end; new_eth = data; @@ -295,7 +295,7 @@ bool encap_v6(struct xdp_md *xdp, struct ctl_value *cval, old_eth = data + sizeof(struct ipv6hdr); if (new_eth + 1 > data_end || old_eth + 1 > data_end || ip6h + 1 > data_end) - return 0; + return false; memcpy(new_eth->eth_dest, cval->mac, 6); memcpy(new_eth->eth_source, old_eth->eth_dest, 6); new_eth->eth_proto = 56710; @@ -314,7 +314,7 @@ bool encap_v6(struct xdp_md *xdp, struct ctl_value *cval, ip6h->saddr.in6_u.u6_addr32[2] = 3; ip6h->saddr.in6_u.u6_addr32[3] = ip_suffix; memcpy(ip6h->daddr.in6_u.u6_addr32, dst->dstv6, 16); - return 1; + return true; } static __attribute__ ((noinline)) @@ -335,7 +335,7 @@ bool encap_v4(struct xdp_md *xdp, struct ctl_value *cval, ip_suffix <<= 15; ip_suffix ^= pckt->flow.src; if (bpf_xdp_adjust_head(xdp, 0 - (int)sizeof(struct iphdr))) - return 0; + return false; data = (void *)(long)xdp->data; data_end = (void *)(long)xdp->data_end; new_eth = data; @@ -343,7 +343,7 @@ bool encap_v4(struct xdp_md *xdp, struct ctl_value *cval, old_eth = data + sizeof(struct iphdr); if (new_eth + 1 > data_end || old_eth + 1 > data_end || iph + 1 > data_end) - return 0; + return false; memcpy(new_eth->eth_dest, cval->mac, 6); memcpy(new_eth->eth_source, old_eth->eth_dest, 6); new_eth->eth_proto = 8; @@ -367,8 +367,8 @@ bool encap_v4(struct xdp_md *xdp, struct ctl_value *cval, csum += *next_iph_u16++; iph->check = ~((csum & 0xffff) + (csum >> 16)); if (bpf_xdp_adjust_head(xdp, (int)sizeof(struct iphdr))) - return 0; - return 1; + return false; + return true; } static __attribute__ ((noinline)) @@ -386,10 +386,10 @@ bool decap_v6(struct xdp_md *xdp, void **data, void **data_end, bool inner_v4) else new_eth->eth_proto = 56710; if (bpf_xdp_adjust_head(xdp, (int)sizeof(struct ipv6hdr))) - return 0; + return false; *data = (void *)(long)xdp->data; *data_end = (void *)(long)xdp->data_end; - return 1; + return true; } static __attribute__ ((noinline)) @@ -404,10 +404,10 @@ bool decap_v4(struct xdp_md *xdp, void **data, void **data_end) memcpy(new_eth->eth_dest, old_eth->eth_dest, 6); new_eth->eth_proto = 8; if (bpf_xdp_adjust_head(xdp, (int)sizeof(struct iphdr))) - return 0; + return false; *data = (void *)(long)xdp->data; *data_end = (void *)(long)xdp->data_end; - return 1; + return true; } static __attribute__ ((noinline)) diff --git a/tools/testing/selftests/bpf/progs/xdp_synproxy_kern.c b/tools/testing/selftests/bpf/progs/xdp_synproxy_kern.c new file mode 100644 index 000000000000..736686e903f6 --- /dev/null +++ b/tools/testing/selftests/bpf/progs/xdp_synproxy_kern.c @@ -0,0 +1,843 @@ +// SPDX-License-Identifier: LGPL-2.1 OR BSD-2-Clause +/* Copyright (c) 2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved. */ + +#include "vmlinux.h" + +#include <bpf/bpf_helpers.h> +#include <bpf/bpf_endian.h> +#include <asm/errno.h> + +#define TC_ACT_OK 0 +#define TC_ACT_SHOT 2 + +#define NSEC_PER_SEC 1000000000L + +#define ETH_ALEN 6 +#define ETH_P_IP 0x0800 +#define ETH_P_IPV6 0x86DD + +#define tcp_flag_word(tp) (((union tcp_word_hdr *)(tp))->words[3]) + +#define IP_DF 0x4000 +#define IP_MF 0x2000 +#define IP_OFFSET 0x1fff + +#define NEXTHDR_TCP 6 + +#define TCPOPT_NOP 1 +#define TCPOPT_EOL 0 +#define TCPOPT_MSS 2 +#define TCPOPT_WINDOW 3 +#define TCPOPT_SACK_PERM 4 +#define TCPOPT_TIMESTAMP 8 + +#define TCPOLEN_MSS 4 +#define TCPOLEN_WINDOW 3 +#define TCPOLEN_SACK_PERM 2 +#define TCPOLEN_TIMESTAMP 10 + +#define TCP_TS_HZ 1000 +#define TS_OPT_WSCALE_MASK 0xf +#define TS_OPT_SACK (1 << 4) +#define TS_OPT_ECN (1 << 5) +#define TSBITS 6 +#define TSMASK (((__u32)1 << TSBITS) - 1) +#define TCP_MAX_WSCALE 14U + +#define IPV4_MAXLEN 60 +#define TCP_MAXLEN 60 + +#define DEFAULT_MSS4 1460 +#define DEFAULT_MSS6 1440 +#define DEFAULT_WSCALE 7 +#define DEFAULT_TTL 64 +#define MAX_ALLOWED_PORTS 8 + +#define swap(a, b) \ + do { typeof(a) __tmp = (a); (a) = (b); (b) = __tmp; } while (0) + +#define __get_unaligned_t(type, ptr) ({ \ + const struct { type x; } __attribute__((__packed__)) *__pptr = (typeof(__pptr))(ptr); \ + __pptr->x; \ +}) + +#define get_unaligned(ptr) __get_unaligned_t(typeof(*(ptr)), (ptr)) + +struct { + __uint(type, BPF_MAP_TYPE_ARRAY); + __type(key, __u32); + __type(value, __u64); + __uint(max_entries, 2); +} values SEC(".maps"); + +struct { + __uint(type, BPF_MAP_TYPE_ARRAY); + __type(key, __u32); + __type(value, __u16); + __uint(max_entries, MAX_ALLOWED_PORTS); +} allowed_ports SEC(".maps"); + +/* Some symbols defined in net/netfilter/nf_conntrack_bpf.c are unavailable in + * vmlinux.h if CONFIG_NF_CONNTRACK=m, so they are redefined locally. + */ + +struct bpf_ct_opts___local { + s32 netns_id; + s32 error; + u8 l4proto; + u8 dir; + u8 reserved[2]; +} __attribute__((preserve_access_index)); + +#define BPF_F_CURRENT_NETNS (-1) + +extern struct nf_conn *bpf_xdp_ct_lookup(struct xdp_md *xdp_ctx, + struct bpf_sock_tuple *bpf_tuple, + __u32 len_tuple, + struct bpf_ct_opts___local *opts, + __u32 len_opts) __ksym; + +extern struct nf_conn *bpf_skb_ct_lookup(struct __sk_buff *skb_ctx, + struct bpf_sock_tuple *bpf_tuple, + u32 len_tuple, + struct bpf_ct_opts___local *opts, + u32 len_opts) __ksym; + +extern void bpf_ct_release(struct nf_conn *ct) __ksym; + +static __always_inline void swap_eth_addr(__u8 *a, __u8 *b) +{ + __u8 tmp[ETH_ALEN]; + + __builtin_memcpy(tmp, a, ETH_ALEN); + __builtin_memcpy(a, b, ETH_ALEN); + __builtin_memcpy(b, tmp, ETH_ALEN); +} + +static __always_inline __u16 csum_fold(__u32 csum) +{ + csum = (csum & 0xffff) + (csum >> 16); + csum = (csum & 0xffff) + (csum >> 16); + return (__u16)~csum; +} + +static __always_inline __u16 csum_tcpudp_magic(__be32 saddr, __be32 daddr, + __u32 len, __u8 proto, + __u32 csum) +{ + __u64 s = csum; + + s += (__u32)saddr; + s += (__u32)daddr; +#if __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__ + s += proto + len; +#elif __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__ + s += (proto + len) << 8; +#else +#error Unknown endian +#endif + s = (s & 0xffffffff) + (s >> 32); + s = (s & 0xffffffff) + (s >> 32); + + return csum_fold((__u32)s); +} + +static __always_inline __u16 csum_ipv6_magic(const struct in6_addr *saddr, + const struct in6_addr *daddr, + __u32 len, __u8 proto, __u32 csum) +{ + __u64 sum = csum; + int i; + +#pragma unroll + for (i = 0; i < 4; i++) + sum += (__u32)saddr->in6_u.u6_addr32[i]; + +#pragma unroll + for (i = 0; i < 4; i++) + sum += (__u32)daddr->in6_u.u6_addr32[i]; + + /* Don't combine additions to avoid 32-bit overflow. */ + sum += bpf_htonl(len); + sum += bpf_htonl(proto); + + sum = (sum & 0xffffffff) + (sum >> 32); + sum = (sum & 0xffffffff) + (sum >> 32); + + return csum_fold((__u32)sum); +} + +static __always_inline __u64 tcp_clock_ns(void) +{ + return bpf_ktime_get_ns(); +} + +static __always_inline __u32 tcp_ns_to_ts(__u64 ns) +{ + return ns / (NSEC_PER_SEC / TCP_TS_HZ); +} + +static __always_inline __u32 tcp_time_stamp_raw(void) +{ + return tcp_ns_to_ts(tcp_clock_ns()); +} + +struct tcpopt_context { + __u8 *ptr; + __u8 *end; + void *data_end; + __be32 *tsecr; + __u8 wscale; + bool option_timestamp; + bool option_sack; +}; + +static int tscookie_tcpopt_parse(struct tcpopt_context *ctx) +{ + __u8 opcode, opsize; + + if (ctx->ptr >= ctx->end) + return 1; + if (ctx->ptr >= ctx->data_end) + return 1; + + opcode = ctx->ptr[0]; + + if (opcode == TCPOPT_EOL) + return 1; + if (opcode == TCPOPT_NOP) { + ++ctx->ptr; + return 0; + } + + if (ctx->ptr + 1 >= ctx->end) + return 1; + if (ctx->ptr + 1 >= ctx->data_end) + return 1; + opsize = ctx->ptr[1]; + if (opsize < 2) + return 1; + + if (ctx->ptr + opsize > ctx->end) + return 1; + + switch (opcode) { + case TCPOPT_WINDOW: + if (opsize == TCPOLEN_WINDOW && ctx->ptr + TCPOLEN_WINDOW <= ctx->data_end) + ctx->wscale = ctx->ptr[2] < TCP_MAX_WSCALE ? ctx->ptr[2] : TCP_MAX_WSCALE; + break; + case TCPOPT_TIMESTAMP: + if (opsize == TCPOLEN_TIMESTAMP && ctx->ptr + TCPOLEN_TIMESTAMP <= ctx->data_end) { + ctx->option_timestamp = true; + /* Client's tsval becomes our tsecr. */ + *ctx->tsecr = get_unaligned((__be32 *)(ctx->ptr + 2)); + } + break; + case TCPOPT_SACK_PERM: + if (opsize == TCPOLEN_SACK_PERM) + ctx->option_sack = true; + break; + } + + ctx->ptr += opsize; + + return 0; +} + +static int tscookie_tcpopt_parse_batch(__u32 index, void *context) +{ + int i; + + for (i = 0; i < 7; i++) + if (tscookie_tcpopt_parse(context)) + return 1; + return 0; +} + +static __always_inline bool tscookie_init(struct tcphdr *tcp_header, + __u16 tcp_len, __be32 *tsval, + __be32 *tsecr, void *data_end) +{ + struct tcpopt_context loop_ctx = { + .ptr = (__u8 *)(tcp_header + 1), + .end = (__u8 *)tcp_header + tcp_len, + .data_end = data_end, + .tsecr = tsecr, + .wscale = TS_OPT_WSCALE_MASK, + .option_timestamp = false, + .option_sack = false, + }; + u32 cookie; + + bpf_loop(6, tscookie_tcpopt_parse_batch, &loop_ctx, 0); + + if (!loop_ctx.option_timestamp) + return false; + + cookie = tcp_time_stamp_raw() & ~TSMASK; + cookie |= loop_ctx.wscale & TS_OPT_WSCALE_MASK; + if (loop_ctx.option_sack) + cookie |= TS_OPT_SACK; + if (tcp_header->ece && tcp_header->cwr) + cookie |= TS_OPT_ECN; + *tsval = bpf_htonl(cookie); + + return true; +} + +static __always_inline void values_get_tcpipopts(__u16 *mss, __u8 *wscale, + __u8 *ttl, bool ipv6) +{ + __u32 key = 0; + __u64 *value; + + value = bpf_map_lookup_elem(&values, &key); + if (value && *value != 0) { + if (ipv6) + *mss = (*value >> 32) & 0xffff; + else + *mss = *value & 0xffff; + *wscale = (*value >> 16) & 0xf; + *ttl = (*value >> 24) & 0xff; + return; + } + + *mss = ipv6 ? DEFAULT_MSS6 : DEFAULT_MSS4; + *wscale = DEFAULT_WSCALE; + *ttl = DEFAULT_TTL; +} + +static __always_inline void values_inc_synacks(void) +{ + __u32 key = 1; + __u32 *value; + + value = bpf_map_lookup_elem(&values, &key); + if (value) + __sync_fetch_and_add(value, 1); +} + +static __always_inline bool check_port_allowed(__u16 port) +{ + __u32 i; + + for (i = 0; i < MAX_ALLOWED_PORTS; i++) { + __u32 key = i; + __u16 *value; + + value = bpf_map_lookup_elem(&allowed_ports, &key); + + if (!value) + break; + /* 0 is a terminator value. Check it first to avoid matching on + * a forbidden port == 0 and returning true. + */ + if (*value == 0) + break; + + if (*value == port) + return true; + } + + return false; +} + +struct header_pointers { + struct ethhdr *eth; + struct iphdr *ipv4; + struct ipv6hdr *ipv6; + struct tcphdr *tcp; + __u16 tcp_len; +}; + +static __always_inline int tcp_dissect(void *data, void *data_end, + struct header_pointers *hdr) +{ + hdr->eth = data; + if (hdr->eth + 1 > data_end) + return XDP_DROP; + + switch (bpf_ntohs(hdr->eth->h_proto)) { + case ETH_P_IP: + hdr->ipv6 = NULL; + + hdr->ipv4 = (void *)hdr->eth + sizeof(*hdr->eth); + if (hdr->ipv4 + 1 > data_end) + return XDP_DROP; + if (hdr->ipv4->ihl * 4 < sizeof(*hdr->ipv4)) + return XDP_DROP; + if (hdr->ipv4->version != 4) + return XDP_DROP; + + if (hdr->ipv4->protocol != IPPROTO_TCP) + return XDP_PASS; + + hdr->tcp = (void *)hdr->ipv4 + hdr->ipv4->ihl * 4; + break; + case ETH_P_IPV6: + hdr->ipv4 = NULL; + + hdr->ipv6 = (void *)hdr->eth + sizeof(*hdr->eth); + if (hdr->ipv6 + 1 > data_end) + return XDP_DROP; + if (hdr->ipv6->version != 6) + return XDP_DROP; + + /* XXX: Extension headers are not supported and could circumvent + * XDP SYN flood protection. + */ + if (hdr->ipv6->nexthdr != NEXTHDR_TCP) + return XDP_PASS; + + hdr->tcp = (void *)hdr->ipv6 + sizeof(*hdr->ipv6); + break; + default: + /* XXX: VLANs will circumvent XDP SYN flood protection. */ + return XDP_PASS; + } + + if (hdr->tcp + 1 > data_end) + return XDP_DROP; + hdr->tcp_len = hdr->tcp->doff * 4; + if (hdr->tcp_len < sizeof(*hdr->tcp)) + return XDP_DROP; + + return XDP_TX; +} + +static __always_inline int tcp_lookup(void *ctx, struct header_pointers *hdr, bool xdp) +{ + struct bpf_ct_opts___local ct_lookup_opts = { + .netns_id = BPF_F_CURRENT_NETNS, + .l4proto = IPPROTO_TCP, + }; + struct bpf_sock_tuple tup = {}; + struct nf_conn *ct; + __u32 tup_size; + + if (hdr->ipv4) { + /* TCP doesn't normally use fragments, and XDP can't reassemble + * them. + */ + if ((hdr->ipv4->frag_off & bpf_htons(IP_DF | IP_MF | IP_OFFSET)) != bpf_htons(IP_DF)) + return XDP_DROP; + + tup.ipv4.saddr = hdr->ipv4->saddr; + tup.ipv4.daddr = hdr->ipv4->daddr; + tup.ipv4.sport = hdr->tcp->source; + tup.ipv4.dport = hdr->tcp->dest; + tup_size = sizeof(tup.ipv4); + } else if (hdr->ipv6) { + __builtin_memcpy(tup.ipv6.saddr, &hdr->ipv6->saddr, sizeof(tup.ipv6.saddr)); + __builtin_memcpy(tup.ipv6.daddr, &hdr->ipv6->daddr, sizeof(tup.ipv6.daddr)); + tup.ipv6.sport = hdr->tcp->source; + tup.ipv6.dport = hdr->tcp->dest; + tup_size = sizeof(tup.ipv6); + } else { + /* The verifier can't track that either ipv4 or ipv6 is not + * NULL. + */ + return XDP_ABORTED; + } + if (xdp) + ct = bpf_xdp_ct_lookup(ctx, &tup, tup_size, &ct_lookup_opts, sizeof(ct_lookup_opts)); + else + ct = bpf_skb_ct_lookup(ctx, &tup, tup_size, &ct_lookup_opts, sizeof(ct_lookup_opts)); + if (ct) { + unsigned long status = ct->status; + + bpf_ct_release(ct); + if (status & IPS_CONFIRMED_BIT) + return XDP_PASS; + } else if (ct_lookup_opts.error != -ENOENT) { + return XDP_ABORTED; + } + + /* error == -ENOENT || !(status & IPS_CONFIRMED_BIT) */ + return XDP_TX; +} + +static __always_inline __u8 tcp_mkoptions(__be32 *buf, __be32 *tsopt, __u16 mss, + __u8 wscale) +{ + __be32 *start = buf; + + *buf++ = bpf_htonl((TCPOPT_MSS << 24) | (TCPOLEN_MSS << 16) | mss); + + if (!tsopt) + return buf - start; + + if (tsopt[0] & bpf_htonl(1 << 4)) + *buf++ = bpf_htonl((TCPOPT_SACK_PERM << 24) | + (TCPOLEN_SACK_PERM << 16) | + (TCPOPT_TIMESTAMP << 8) | + TCPOLEN_TIMESTAMP); + else + *buf++ = bpf_htonl((TCPOPT_NOP << 24) | + (TCPOPT_NOP << 16) | + (TCPOPT_TIMESTAMP << 8) | + TCPOLEN_TIMESTAMP); + *buf++ = tsopt[0]; + *buf++ = tsopt[1]; + + if ((tsopt[0] & bpf_htonl(0xf)) != bpf_htonl(0xf)) + *buf++ = bpf_htonl((TCPOPT_NOP << 24) | + (TCPOPT_WINDOW << 16) | + (TCPOLEN_WINDOW << 8) | + wscale); + + return buf - start; +} + +static __always_inline void tcp_gen_synack(struct tcphdr *tcp_header, + __u32 cookie, __be32 *tsopt, + __u16 mss, __u8 wscale) +{ + void *tcp_options; + + tcp_flag_word(tcp_header) = TCP_FLAG_SYN | TCP_FLAG_ACK; + if (tsopt && (tsopt[0] & bpf_htonl(1 << 5))) + tcp_flag_word(tcp_header) |= TCP_FLAG_ECE; + tcp_header->doff = 5; /* doff is part of tcp_flag_word. */ + swap(tcp_header->source, tcp_header->dest); + tcp_header->ack_seq = bpf_htonl(bpf_ntohl(tcp_header->seq) + 1); + tcp_header->seq = bpf_htonl(cookie); + tcp_header->window = 0; + tcp_header->urg_ptr = 0; + tcp_header->check = 0; /* Calculate checksum later. */ + + tcp_options = (void *)(tcp_header + 1); + tcp_header->doff += tcp_mkoptions(tcp_options, tsopt, mss, wscale); +} + +static __always_inline void tcpv4_gen_synack(struct header_pointers *hdr, + __u32 cookie, __be32 *tsopt) +{ + __u8 wscale; + __u16 mss; + __u8 ttl; + + values_get_tcpipopts(&mss, &wscale, &ttl, false); + + swap_eth_addr(hdr->eth->h_source, hdr->eth->h_dest); + + swap(hdr->ipv4->saddr, hdr->ipv4->daddr); + hdr->ipv4->check = 0; /* Calculate checksum later. */ + hdr->ipv4->tos = 0; + hdr->ipv4->id = 0; + hdr->ipv4->ttl = ttl; + + tcp_gen_synack(hdr->tcp, cookie, tsopt, mss, wscale); + + hdr->tcp_len = hdr->tcp->doff * 4; + hdr->ipv4->tot_len = bpf_htons(sizeof(*hdr->ipv4) + hdr->tcp_len); +} + +static __always_inline void tcpv6_gen_synack(struct header_pointers *hdr, + __u32 cookie, __be32 *tsopt) +{ + __u8 wscale; + __u16 mss; + __u8 ttl; + + values_get_tcpipopts(&mss, &wscale, &ttl, true); + + swap_eth_addr(hdr->eth->h_source, hdr->eth->h_dest); + + swap(hdr->ipv6->saddr, hdr->ipv6->daddr); + *(__be32 *)hdr->ipv6 = bpf_htonl(0x60000000); + hdr->ipv6->hop_limit = ttl; + + tcp_gen_synack(hdr->tcp, cookie, tsopt, mss, wscale); + + hdr->tcp_len = hdr->tcp->doff * 4; + hdr->ipv6->payload_len = bpf_htons(hdr->tcp_len); +} + +static __always_inline int syncookie_handle_syn(struct header_pointers *hdr, + void *ctx, + void *data, void *data_end, + bool xdp) +{ + __u32 old_pkt_size, new_pkt_size; + /* Unlike clang 10, clang 11 and 12 generate code that doesn't pass the + * BPF verifier if tsopt is not volatile. Volatile forces it to store + * the pointer value and use it directly, otherwise tcp_mkoptions is + * (mis)compiled like this: + * if (!tsopt) + * return buf - start; + * reg = stored_return_value_of_tscookie_init; + * if (reg) + * tsopt = tsopt_buf; + * else + * tsopt = NULL; + * ... + * *buf++ = tsopt[1]; + * It creates a dead branch where tsopt is assigned NULL, but the + * verifier can't prove it's dead and blocks the program. + */ + __be32 * volatile tsopt = NULL; + __be32 tsopt_buf[2] = {}; + __u16 ip_len; + __u32 cookie; + __s64 value; + + /* Checksum is not yet verified, but both checksum failure and TCP + * header checks return XDP_DROP, so the order doesn't matter. + */ + if (hdr->tcp->fin || hdr->tcp->rst) + return XDP_DROP; + + /* Issue SYN cookies on allowed ports, drop SYN packets on blocked + * ports. + */ + if (!check_port_allowed(bpf_ntohs(hdr->tcp->dest))) + return XDP_DROP; + + if (hdr->ipv4) { + /* Check the IPv4 and TCP checksums before creating a SYNACK. */ + value = bpf_csum_diff(0, 0, (void *)hdr->ipv4, hdr->ipv4->ihl * 4, 0); + if (value < 0) + return XDP_ABORTED; + if (csum_fold(value) != 0) + return XDP_DROP; /* Bad IPv4 checksum. */ + + value = bpf_csum_diff(0, 0, (void *)hdr->tcp, hdr->tcp_len, 0); + if (value < 0) + return XDP_ABORTED; + if (csum_tcpudp_magic(hdr->ipv4->saddr, hdr->ipv4->daddr, + hdr->tcp_len, IPPROTO_TCP, value) != 0) + return XDP_DROP; /* Bad TCP checksum. */ + + ip_len = sizeof(*hdr->ipv4); + + value = bpf_tcp_raw_gen_syncookie_ipv4(hdr->ipv4, hdr->tcp, + hdr->tcp_len); + } else if (hdr->ipv6) { + /* Check the TCP checksum before creating a SYNACK. */ + value = bpf_csum_diff(0, 0, (void *)hdr->tcp, hdr->tcp_len, 0); + if (value < 0) + return XDP_ABORTED; + if (csum_ipv6_magic(&hdr->ipv6->saddr, &hdr->ipv6->daddr, + hdr->tcp_len, IPPROTO_TCP, value) != 0) + return XDP_DROP; /* Bad TCP checksum. */ + + ip_len = sizeof(*hdr->ipv6); + + value = bpf_tcp_raw_gen_syncookie_ipv6(hdr->ipv6, hdr->tcp, + hdr->tcp_len); + } else { + return XDP_ABORTED; + } + + if (value < 0) + return XDP_ABORTED; + cookie = (__u32)value; + + if (tscookie_init((void *)hdr->tcp, hdr->tcp_len, + &tsopt_buf[0], &tsopt_buf[1], data_end)) + tsopt = tsopt_buf; + + /* Check that there is enough space for a SYNACK. It also covers + * the check that the destination of the __builtin_memmove below + * doesn't overflow. + */ + if (data + sizeof(*hdr->eth) + ip_len + TCP_MAXLEN > data_end) + return XDP_ABORTED; + + if (hdr->ipv4) { + if (hdr->ipv4->ihl * 4 > sizeof(*hdr->ipv4)) { + struct tcphdr *new_tcp_header; + + new_tcp_header = data + sizeof(*hdr->eth) + sizeof(*hdr->ipv4); + __builtin_memmove(new_tcp_header, hdr->tcp, sizeof(*hdr->tcp)); + hdr->tcp = new_tcp_header; + + hdr->ipv4->ihl = sizeof(*hdr->ipv4) / 4; + } + + tcpv4_gen_synack(hdr, cookie, tsopt); + } else if (hdr->ipv6) { + tcpv6_gen_synack(hdr, cookie, tsopt); + } else { + return XDP_ABORTED; + } + + /* Recalculate checksums. */ + hdr->tcp->check = 0; + value = bpf_csum_diff(0, 0, (void *)hdr->tcp, hdr->tcp_len, 0); + if (value < 0) + return XDP_ABORTED; + if (hdr->ipv4) { + hdr->tcp->check = csum_tcpudp_magic(hdr->ipv4->saddr, + hdr->ipv4->daddr, + hdr->tcp_len, + IPPROTO_TCP, + value); + + hdr->ipv4->check = 0; + value = bpf_csum_diff(0, 0, (void *)hdr->ipv4, sizeof(*hdr->ipv4), 0); + if (value < 0) + return XDP_ABORTED; + hdr->ipv4->check = csum_fold(value); + } else if (hdr->ipv6) { + hdr->tcp->check = csum_ipv6_magic(&hdr->ipv6->saddr, + &hdr->ipv6->daddr, + hdr->tcp_len, + IPPROTO_TCP, + value); + } else { + return XDP_ABORTED; + } + + /* Set the new packet size. */ + old_pkt_size = data_end - data; + new_pkt_size = sizeof(*hdr->eth) + ip_len + hdr->tcp->doff * 4; + if (xdp) { + if (bpf_xdp_adjust_tail(ctx, new_pkt_size - old_pkt_size)) + return XDP_ABORTED; + } else { + if (bpf_skb_change_tail(ctx, new_pkt_size, 0)) + return XDP_ABORTED; + } + + values_inc_synacks(); + + return XDP_TX; +} + +static __always_inline int syncookie_handle_ack(struct header_pointers *hdr) +{ + int err; + + if (hdr->tcp->rst) + return XDP_DROP; + + if (hdr->ipv4) + err = bpf_tcp_raw_check_syncookie_ipv4(hdr->ipv4, hdr->tcp); + else if (hdr->ipv6) + err = bpf_tcp_raw_check_syncookie_ipv6(hdr->ipv6, hdr->tcp); + else + return XDP_ABORTED; + if (err) + return XDP_DROP; + + return XDP_PASS; +} + +static __always_inline int syncookie_part1(void *ctx, void *data, void *data_end, + struct header_pointers *hdr, bool xdp) +{ + int ret; + + ret = tcp_dissect(data, data_end, hdr); + if (ret != XDP_TX) + return ret; + + ret = tcp_lookup(ctx, hdr, xdp); + if (ret != XDP_TX) + return ret; + + /* Packet is TCP and doesn't belong to an established connection. */ + + if ((hdr->tcp->syn ^ hdr->tcp->ack) != 1) + return XDP_DROP; + + /* Grow the TCP header to TCP_MAXLEN to be able to pass any hdr->tcp_len + * to bpf_tcp_raw_gen_syncookie_ipv{4,6} and pass the verifier. + */ + if (xdp) { + if (bpf_xdp_adjust_tail(ctx, TCP_MAXLEN - hdr->tcp_len)) + return XDP_ABORTED; + } else { + /* Without volatile the verifier throws this error: + * R9 32-bit pointer arithmetic prohibited + */ + volatile u64 old_len = data_end - data; + + if (bpf_skb_change_tail(ctx, old_len + TCP_MAXLEN - hdr->tcp_len, 0)) + return XDP_ABORTED; + } + + return XDP_TX; +} + +static __always_inline int syncookie_part2(void *ctx, void *data, void *data_end, + struct header_pointers *hdr, bool xdp) +{ + if (hdr->ipv4) { + hdr->eth = data; + hdr->ipv4 = (void *)hdr->eth + sizeof(*hdr->eth); + /* IPV4_MAXLEN is needed when calculating checksum. + * At least sizeof(struct iphdr) is needed here to access ihl. + */ + if ((void *)hdr->ipv4 + IPV4_MAXLEN > data_end) + return XDP_ABORTED; + hdr->tcp = (void *)hdr->ipv4 + hdr->ipv4->ihl * 4; + } else if (hdr->ipv6) { + hdr->eth = data; + hdr->ipv6 = (void *)hdr->eth + sizeof(*hdr->eth); + hdr->tcp = (void *)hdr->ipv6 + sizeof(*hdr->ipv6); + } else { + return XDP_ABORTED; + } + + if ((void *)hdr->tcp + TCP_MAXLEN > data_end) + return XDP_ABORTED; + + /* We run out of registers, tcp_len gets spilled to the stack, and the + * verifier forgets its min and max values checked above in tcp_dissect. + */ + hdr->tcp_len = hdr->tcp->doff * 4; + if (hdr->tcp_len < sizeof(*hdr->tcp)) + return XDP_ABORTED; + + return hdr->tcp->syn ? syncookie_handle_syn(hdr, ctx, data, data_end, xdp) : + syncookie_handle_ack(hdr); +} + +SEC("xdp") +int syncookie_xdp(struct xdp_md *ctx) +{ + void *data_end = (void *)(long)ctx->data_end; + void *data = (void *)(long)ctx->data; + struct header_pointers hdr; + int ret; + + ret = syncookie_part1(ctx, data, data_end, &hdr, true); + if (ret != XDP_TX) + return ret; + + data_end = (void *)(long)ctx->data_end; + data = (void *)(long)ctx->data; + + return syncookie_part2(ctx, data, data_end, &hdr, true); +} + +SEC("tc") +int syncookie_tc(struct __sk_buff *skb) +{ + void *data_end = (void *)(long)skb->data_end; + void *data = (void *)(long)skb->data; + struct header_pointers hdr; + int ret; + + ret = syncookie_part1(skb, data, data_end, &hdr, false); + if (ret != XDP_TX) + return ret == XDP_PASS ? TC_ACT_OK : TC_ACT_SHOT; + + data_end = (void *)(long)skb->data_end; + data = (void *)(long)skb->data; + + ret = syncookie_part2(skb, data, data_end, &hdr, false); + switch (ret) { + case XDP_PASS: + return TC_ACT_OK; + case XDP_TX: + return bpf_redirect(skb->ifindex, 0); + default: + return TC_ACT_SHOT; + } +} + +char _license[] SEC("license") = "GPL"; diff --git a/tools/testing/selftests/bpf/test_bpftool_synctypes.py b/tools/testing/selftests/bpf/test_bpftool_synctypes.py index c0e7acd698ed..a6410bebe603 100755 --- a/tools/testing/selftests/bpf/test_bpftool_synctypes.py +++ b/tools/testing/selftests/bpf/test_bpftool_synctypes.py @@ -58,7 +58,7 @@ class BlockParser(object): class ArrayParser(BlockParser): """ - A parser for extracting dicionaries of values from some BPF-related arrays. + A parser for extracting a set of values from some BPF-related arrays. @reader: a pointer to the open file to parse @array_name: name of the array to parse """ @@ -66,7 +66,7 @@ class ArrayParser(BlockParser): def __init__(self, reader, array_name): self.array_name = array_name - self.start_marker = re.compile(f'(static )?const char \* const {self.array_name}\[.*\] = {{\n') + self.start_marker = re.compile(f'(static )?const bool {self.array_name}\[.*\] = {{\n') super().__init__(reader) def search_block(self): @@ -80,15 +80,15 @@ class ArrayParser(BlockParser): Parse a block and return data as a dictionary. Items to extract must be on separate lines in the file. """ - pattern = re.compile('\[(BPF_\w*)\]\s*= "(.*)",?$') - entries = {} + pattern = re.compile('\[(BPF_\w*)\]\s*= (true|false),?$') + entries = set() while True: line = self.reader.readline() if line == '' or re.match(self.end_marker, line): break capture = pattern.search(line) if capture: - entries[capture.group(1)] = capture.group(2) + entries |= {capture.group(1)} return entries class InlineListParser(BlockParser): @@ -115,7 +115,7 @@ class InlineListParser(BlockParser): class FileExtractor(object): """ A generic reader for extracting data from a given file. This class contains - several helper methods that wrap arround parser objects to extract values + several helper methods that wrap around parser objects to extract values from different structures. This class does not offer a way to set a filename, which is expected to be defined in children classes. @@ -139,21 +139,19 @@ class FileExtractor(object): def get_types_from_array(self, array_name): """ - Search for and parse an array associating names to BPF_* enum members, - for example: + Search for and parse a list of allowed BPF_* enum members, for example: - const char * const prog_type_name[] = { - [BPF_PROG_TYPE_UNSPEC] = "unspec", - [BPF_PROG_TYPE_SOCKET_FILTER] = "socket_filter", - [BPF_PROG_TYPE_KPROBE] = "kprobe", + const bool prog_type_name[] = { + [BPF_PROG_TYPE_UNSPEC] = true, + [BPF_PROG_TYPE_SOCKET_FILTER] = true, + [BPF_PROG_TYPE_KPROBE] = true, }; - Return a dictionary with the enum member names as keys and the - associated names as values, for example: + Return a set of the enum members, for example: - {'BPF_PROG_TYPE_UNSPEC': 'unspec', - 'BPF_PROG_TYPE_SOCKET_FILTER': 'socket_filter', - 'BPF_PROG_TYPE_KPROBE': 'kprobe'} + {'BPF_PROG_TYPE_UNSPEC', + 'BPF_PROG_TYPE_SOCKET_FILTER', + 'BPF_PROG_TYPE_KPROBE'} @array_name: name of the array to parse """ @@ -186,6 +184,27 @@ class FileExtractor(object): parser.search_block(start_marker) return parser.parse(pattern, end_marker) + def make_enum_map(self, names, enum_prefix): + """ + Search for and parse an enum containing BPF_* members, just as get_enum + does. However, instead of just returning a set of the variant names, + also generate a textual representation from them by (assuming and) + removing a provided prefix and lowercasing the remainder. Then return a + dict mapping from name to textual representation. + + @enum_values: a set of enum values; e.g., as retrieved by get_enum + @enum_prefix: the prefix to remove from each of the variants to infer + textual representation + """ + mapping = {} + for name in names: + if not name.startswith(enum_prefix): + raise Exception(f"enum variant {name} does not start with {enum_prefix}") + text = name[len(enum_prefix):].lower() + mapping[name] = text + + return mapping + def __get_description_list(self, start_marker, pattern, end_marker): parser = InlineListParser(self.reader) parser.search_block(start_marker) @@ -333,11 +352,9 @@ class ProgFileExtractor(SourceFileExtractor): """ filename = os.path.join(BPFTOOL_DIR, 'prog.c') - def get_prog_types(self): - return self.get_types_from_array('prog_type_name') - def get_attach_types(self): - return self.get_types_from_array('attach_type_strings') + types = self.get_types_from_array('attach_types') + return self.make_enum_map(types, 'BPF_') def get_prog_attach_help(self): return self.get_help_list('ATTACH_TYPE') @@ -348,9 +365,6 @@ class MapFileExtractor(SourceFileExtractor): """ filename = os.path.join(BPFTOOL_DIR, 'map.c') - def get_map_types(self): - return self.get_types_from_array('map_type_name') - def get_map_help(self): return self.get_help_list('TYPE') @@ -363,30 +377,6 @@ class CgroupFileExtractor(SourceFileExtractor): def get_prog_attach_help(self): return self.get_help_list('ATTACH_TYPE') -class CommonFileExtractor(SourceFileExtractor): - """ - An extractor for bpftool's common.c. - """ - filename = os.path.join(BPFTOOL_DIR, 'common.c') - - def __init__(self): - super().__init__() - self.attach_types = {} - - def get_attach_types(self): - if not self.attach_types: - self.attach_types = self.get_types_from_array('attach_type_name') - return self.attach_types - - def get_cgroup_attach_types(self): - if not self.attach_types: - self.get_attach_types() - cgroup_types = {} - for (key, value) in self.attach_types.items(): - if key.find('BPF_CGROUP') != -1: - cgroup_types[key] = value - return cgroup_types - class GenericSourceExtractor(SourceFileExtractor): """ An extractor for generic source code files. @@ -403,14 +393,28 @@ class BpfHeaderExtractor(FileExtractor): """ filename = os.path.join(INCLUDE_DIR, 'uapi/linux/bpf.h') + def __init__(self): + super().__init__() + self.attach_types = {} + def get_prog_types(self): return self.get_enum('bpf_prog_type') - def get_map_types(self): - return self.get_enum('bpf_map_type') + def get_map_type_map(self): + names = self.get_enum('bpf_map_type') + return self.make_enum_map(names, 'BPF_MAP_TYPE_') - def get_attach_types(self): - return self.get_enum('bpf_attach_type') + def get_attach_type_map(self): + if not self.attach_types: + names = self.get_enum('bpf_attach_type') + self.attach_types = self.make_enum_map(names, 'BPF_') + return self.attach_types + + def get_cgroup_attach_type_map(self): + if not self.attach_types: + self.get_attach_type_map() + return {name: text for name, text in self.attach_types.items() + if name.startswith('BPF_CGROUP')} class ManPageExtractor(FileExtractor): """ @@ -467,12 +471,6 @@ class BashcompExtractor(FileExtractor): def get_prog_attach_types(self): return self.get_bashcomp_list('BPFTOOL_PROG_ATTACH_TYPES') - def get_map_types(self): - return self.get_bashcomp_list('BPFTOOL_MAP_CREATE_TYPES') - - def get_cgroup_attach_types(self): - return self.get_bashcomp_list('BPFTOOL_CGROUP_ATTACH_TYPES') - def verify(first_set, second_set, message): """ Print all values that differ between two sets. @@ -495,21 +493,12 @@ def main(): """) args = argParser.parse_args() - # Map types (enum) - bpf_info = BpfHeaderExtractor() - ref = bpf_info.get_map_types() - - map_info = MapFileExtractor() - source_map_items = map_info.get_map_types() - map_types_enum = set(source_map_items.keys()) - - verify(ref, map_types_enum, - f'Comparing BPF header (enum bpf_map_type) and {MapFileExtractor.filename} (map_type_name):') # Map types (names) - source_map_types = set(source_map_items.values()) + map_info = MapFileExtractor() + source_map_types = set(bpf_info.get_map_type_map().values()) source_map_types.discard('unspec') help_map_types = map_info.get_map_help() @@ -521,41 +510,16 @@ def main(): man_map_types = man_map_info.get_map_types() man_map_info.close() - bashcomp_info = BashcompExtractor() - bashcomp_map_types = bashcomp_info.get_map_types() - verify(source_map_types, help_map_types, - f'Comparing {MapFileExtractor.filename} (map_type_name) and {MapFileExtractor.filename} (do_help() TYPE):') + f'Comparing {BpfHeaderExtractor.filename} (bpf_map_type) and {MapFileExtractor.filename} (do_help() TYPE):') verify(source_map_types, man_map_types, - f'Comparing {MapFileExtractor.filename} (map_type_name) and {ManMapExtractor.filename} (TYPE):') + f'Comparing {BpfHeaderExtractor.filename} (bpf_map_type) and {ManMapExtractor.filename} (TYPE):') verify(help_map_options, man_map_options, f'Comparing {MapFileExtractor.filename} (do_help() OPTIONS) and {ManMapExtractor.filename} (OPTIONS):') - verify(source_map_types, bashcomp_map_types, - f'Comparing {MapFileExtractor.filename} (map_type_name) and {BashcompExtractor.filename} (BPFTOOL_MAP_CREATE_TYPES):') - - # Program types (enum) - - ref = bpf_info.get_prog_types() - - prog_info = ProgFileExtractor() - prog_types = set(prog_info.get_prog_types().keys()) - - verify(ref, prog_types, - f'Comparing BPF header (enum bpf_prog_type) and {ProgFileExtractor.filename} (prog_type_name):') - - # Attach types (enum) - - ref = bpf_info.get_attach_types() - bpf_info.close() - - common_info = CommonFileExtractor() - attach_types = common_info.get_attach_types() - - verify(ref, attach_types, - f'Comparing BPF header (enum bpf_attach_type) and {CommonFileExtractor.filename} (attach_type_name):') # Attach types (names) + prog_info = ProgFileExtractor() source_prog_attach_types = set(prog_info.get_attach_types().values()) help_prog_attach_types = prog_info.get_prog_attach_help() @@ -567,22 +531,23 @@ def main(): man_prog_attach_types = man_prog_info.get_attach_types() man_prog_info.close() - bashcomp_info.reset_read() # We stopped at map types, rewind + + bashcomp_info = BashcompExtractor() bashcomp_prog_attach_types = bashcomp_info.get_prog_attach_types() + bashcomp_info.close() verify(source_prog_attach_types, help_prog_attach_types, - f'Comparing {ProgFileExtractor.filename} (attach_type_strings) and {ProgFileExtractor.filename} (do_help() ATTACH_TYPE):') + f'Comparing {ProgFileExtractor.filename} (bpf_attach_type) and {ProgFileExtractor.filename} (do_help() ATTACH_TYPE):') verify(source_prog_attach_types, man_prog_attach_types, - f'Comparing {ProgFileExtractor.filename} (attach_type_strings) and {ManProgExtractor.filename} (ATTACH_TYPE):') + f'Comparing {ProgFileExtractor.filename} (bpf_attach_type) and {ManProgExtractor.filename} (ATTACH_TYPE):') verify(help_prog_options, man_prog_options, f'Comparing {ProgFileExtractor.filename} (do_help() OPTIONS) and {ManProgExtractor.filename} (OPTIONS):') verify(source_prog_attach_types, bashcomp_prog_attach_types, - f'Comparing {ProgFileExtractor.filename} (attach_type_strings) and {BashcompExtractor.filename} (BPFTOOL_PROG_ATTACH_TYPES):') + f'Comparing {ProgFileExtractor.filename} (bpf_attach_type) and {BashcompExtractor.filename} (BPFTOOL_PROG_ATTACH_TYPES):') # Cgroup attach types - - source_cgroup_attach_types = set(common_info.get_cgroup_attach_types().values()) - common_info.close() + source_cgroup_attach_types = set(bpf_info.get_cgroup_attach_type_map().values()) + bpf_info.close() cgroup_info = CgroupFileExtractor() help_cgroup_attach_types = cgroup_info.get_prog_attach_help() @@ -594,17 +559,12 @@ def main(): man_cgroup_attach_types = man_cgroup_info.get_attach_types() man_cgroup_info.close() - bashcomp_cgroup_attach_types = bashcomp_info.get_cgroup_attach_types() - bashcomp_info.close() - verify(source_cgroup_attach_types, help_cgroup_attach_types, - f'Comparing {CommonFileExtractor.filename} (attach_type_strings) and {CgroupFileExtractor.filename} (do_help() ATTACH_TYPE):') + f'Comparing {BpfHeaderExtractor.filename} (bpf_attach_type) and {CgroupFileExtractor.filename} (do_help() ATTACH_TYPE):') verify(source_cgroup_attach_types, man_cgroup_attach_types, - f'Comparing {CommonFileExtractor.filename} (attach_type_strings) and {ManCgroupExtractor.filename} (ATTACH_TYPE):') + f'Comparing {BpfHeaderExtractor.filename} (bpf_attach_type) and {ManCgroupExtractor.filename} (ATTACH_TYPE):') verify(help_cgroup_options, man_cgroup_options, f'Comparing {CgroupFileExtractor.filename} (do_help() OPTIONS) and {ManCgroupExtractor.filename} (OPTIONS):') - verify(source_cgroup_attach_types, bashcomp_cgroup_attach_types, - f'Comparing {CommonFileExtractor.filename} (attach_type_strings) and {BashcompExtractor.filename} (BPFTOOL_CGROUP_ATTACH_TYPES):') # Options for remaining commands diff --git a/tools/testing/selftests/bpf/test_btf.h b/tools/testing/selftests/bpf/test_btf.h index 128989bed8b7..fb4f4714eeb4 100644 --- a/tools/testing/selftests/bpf/test_btf.h +++ b/tools/testing/selftests/bpf/test_btf.h @@ -4,6 +4,8 @@ #ifndef _TEST_BTF_H #define _TEST_BTF_H +#define BTF_END_RAW 0xdeadbeef + #define BTF_INFO_ENC(kind, kind_flag, vlen) \ ((!!(kind_flag) << 31) | ((kind) << 24) | ((vlen) & BTF_MAX_VLEN)) @@ -39,6 +41,7 @@ #define BTF_MEMBER_ENC(name, type, bits_offset) \ (name), (type), (bits_offset) #define BTF_ENUM_ENC(name, val) (name), (val) +#define BTF_ENUM64_ENC(name, val_lo32, val_hi32) (name), (val_lo32), (val_hi32) #define BTF_MEMBER_OFFSET(bitfield_size, bits_offset) \ ((bitfield_size) << 24 | (bits_offset)) diff --git a/tools/testing/selftests/bpf/test_progs.c b/tools/testing/selftests/bpf/test_progs.c index c639f2e56fc5..3561c97701f2 100644 --- a/tools/testing/selftests/bpf/test_progs.c +++ b/tools/testing/selftests/bpf/test_progs.c @@ -1604,11 +1604,8 @@ int main(int argc, char **argv) struct prog_test_def *test = &prog_test_defs[i]; test->test_num = i + 1; - if (should_run(&env.test_selector, - test->test_num, test->test_name)) - test->should_run = true; - else - test->should_run = false; + test->should_run = should_run(&env.test_selector, + test->test_num, test->test_name); if ((test->run_test == NULL && test->run_serial_test == NULL) || (test->run_test != NULL && test->run_serial_test != NULL)) { diff --git a/tools/testing/selftests/bpf/test_verifier.c b/tools/testing/selftests/bpf/test_verifier.c index 372579c9f45e..f9d553fbf68a 100644 --- a/tools/testing/selftests/bpf/test_verifier.c +++ b/tools/testing/selftests/bpf/test_verifier.c @@ -51,12 +51,24 @@ #endif #define MAX_INSNS BPF_MAXINSNS +#define MAX_EXPECTED_INSNS 32 +#define MAX_UNEXPECTED_INSNS 32 #define MAX_TEST_INSNS 1000000 #define MAX_FIXUPS 8 #define MAX_NR_MAPS 23 #define MAX_TEST_RUNS 8 #define POINTER_VALUE 0xcafe4all #define TEST_DATA_LEN 64 +#define MAX_FUNC_INFOS 8 +#define MAX_BTF_STRINGS 256 +#define MAX_BTF_TYPES 256 + +#define INSN_OFF_MASK ((__s16)0xFFFF) +#define INSN_IMM_MASK ((__s32)0xFFFFFFFF) +#define SKIP_INSNS() BPF_RAW_INSN(0xde, 0xa, 0xd, 0xbeef, 0xdeadbeef) + +#define DEFAULT_LIBBPF_LOG_LEVEL 4 +#define VERBOSE_LIBBPF_LOG_LEVEL 1 #define F_NEEDS_EFFICIENT_UNALIGNED_ACCESS (1 << 0) #define F_LOAD_WITH_STRICT_ALIGNMENT (1 << 1) @@ -79,6 +91,23 @@ struct bpf_test { const char *descr; struct bpf_insn insns[MAX_INSNS]; struct bpf_insn *fill_insns; + /* If specified, test engine looks for this sequence of + * instructions in the BPF program after loading. Allows to + * test rewrites applied by verifier. Use values + * INSN_OFF_MASK and INSN_IMM_MASK to mask `off` and `imm` + * fields if content does not matter. The test case fails if + * specified instructions are not found. + * + * The sequence could be split into sub-sequences by adding + * SKIP_INSNS instruction at the end of each sub-sequence. In + * such case sub-sequences are searched for one after another. + */ + struct bpf_insn expected_insns[MAX_EXPECTED_INSNS]; + /* If specified, test engine applies same pattern matching + * logic as for `expected_insns`. If the specified pattern is + * matched test case is marked as failed. + */ + struct bpf_insn unexpected_insns[MAX_UNEXPECTED_INSNS]; int fixup_map_hash_8b[MAX_FIXUPS]; int fixup_map_hash_48b[MAX_FIXUPS]; int fixup_map_hash_16b[MAX_FIXUPS]; @@ -135,6 +164,14 @@ struct bpf_test { }; enum bpf_attach_type expected_attach_type; const char *kfunc; + struct bpf_func_info func_info[MAX_FUNC_INFOS]; + int func_info_cnt; + char btf_strings[MAX_BTF_STRINGS]; + /* A set of BTF types to load when specified, + * use macro definitions from test_btf.h, + * must end with BTF_END_RAW + */ + __u32 btf_types[MAX_BTF_TYPES]; }; /* Note we want this to be 64 bit aligned so that the end of our array is @@ -388,6 +425,45 @@ static void bpf_fill_torturous_jumps(struct bpf_test *self) } } +static void bpf_fill_big_prog_with_loop_1(struct bpf_test *self) +{ + struct bpf_insn *insn = self->fill_insns; + /* This test was added to catch a specific use after free + * error, which happened upon BPF program reallocation. + * Reallocation is handled by core.c:bpf_prog_realloc, which + * reuses old memory if page boundary is not crossed. The + * value of `len` is chosen to cross this boundary on bpf_loop + * patching. + */ + const int len = getpagesize() - 25; + int callback_load_idx; + int callback_idx; + int i = 0; + + insn[i++] = BPF_ALU64_IMM(BPF_MOV, BPF_REG_1, 1); + callback_load_idx = i; + insn[i++] = BPF_RAW_INSN(BPF_LD | BPF_IMM | BPF_DW, + BPF_REG_2, BPF_PSEUDO_FUNC, 0, + 777 /* filled below */); + insn[i++] = BPF_RAW_INSN(0, 0, 0, 0, 0); + insn[i++] = BPF_ALU64_IMM(BPF_MOV, BPF_REG_3, 0); + insn[i++] = BPF_ALU64_IMM(BPF_MOV, BPF_REG_4, 0); + insn[i++] = BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_loop); + + while (i < len - 3) + insn[i++] = BPF_ALU64_IMM(BPF_MOV, BPF_REG_0, 0); + insn[i++] = BPF_EXIT_INSN(); + + callback_idx = i; + insn[i++] = BPF_ALU64_IMM(BPF_MOV, BPF_REG_0, 0); + insn[i++] = BPF_EXIT_INSN(); + + insn[callback_load_idx].imm = callback_idx - callback_load_idx - 1; + self->func_info[1].insn_off = callback_idx; + self->prog_len = i; + assert(i == len); +} + /* BPF_SK_LOOKUP contains 13 instructions, if you need to fix up maps */ #define BPF_SK_LOOKUP(func) \ /* struct bpf_sock_tuple tuple = {} */ \ @@ -664,34 +740,66 @@ static __u32 btf_raw_types[] = { BTF_MEMBER_ENC(71, 13, 128), /* struct prog_test_member __kptr_ref *ptr; */ }; -static int load_btf(void) +static char bpf_vlog[UINT_MAX >> 8]; + +static int load_btf_spec(__u32 *types, int types_len, + const char *strings, int strings_len) { struct btf_header hdr = { .magic = BTF_MAGIC, .version = BTF_VERSION, .hdr_len = sizeof(struct btf_header), - .type_len = sizeof(btf_raw_types), - .str_off = sizeof(btf_raw_types), - .str_len = sizeof(btf_str_sec), + .type_len = types_len, + .str_off = types_len, + .str_len = strings_len, }; void *ptr, *raw_btf; int btf_fd; + LIBBPF_OPTS(bpf_btf_load_opts, opts, + .log_buf = bpf_vlog, + .log_size = sizeof(bpf_vlog), + .log_level = (verbose + ? VERBOSE_LIBBPF_LOG_LEVEL + : DEFAULT_LIBBPF_LOG_LEVEL), + ); - ptr = raw_btf = malloc(sizeof(hdr) + sizeof(btf_raw_types) + - sizeof(btf_str_sec)); + raw_btf = malloc(sizeof(hdr) + types_len + strings_len); + ptr = raw_btf; memcpy(ptr, &hdr, sizeof(hdr)); ptr += sizeof(hdr); - memcpy(ptr, btf_raw_types, hdr.type_len); + memcpy(ptr, types, hdr.type_len); ptr += hdr.type_len; - memcpy(ptr, btf_str_sec, hdr.str_len); + memcpy(ptr, strings, hdr.str_len); ptr += hdr.str_len; - btf_fd = bpf_btf_load(raw_btf, ptr - raw_btf, NULL); - free(raw_btf); + btf_fd = bpf_btf_load(raw_btf, ptr - raw_btf, &opts); if (btf_fd < 0) - return -1; - return btf_fd; + printf("Failed to load BTF spec: '%s'\n", strerror(errno)); + + free(raw_btf); + + return btf_fd < 0 ? -1 : btf_fd; +} + +static int load_btf(void) +{ + return load_btf_spec(btf_raw_types, sizeof(btf_raw_types), + btf_str_sec, sizeof(btf_str_sec)); +} + +static int load_btf_for_test(struct bpf_test *test) +{ + int types_num = 0; + + while (types_num < MAX_BTF_TYPES && + test->btf_types[types_num] != BTF_END_RAW) + ++types_num; + + int types_len = types_num * sizeof(test->btf_types[0]); + + return load_btf_spec(test->btf_types, types_len, + test->btf_strings, sizeof(test->btf_strings)); } static int create_map_spin_lock(void) @@ -770,8 +878,6 @@ static int create_map_kptr(void) return fd; } -static char bpf_vlog[UINT_MAX >> 8]; - static void do_test_fixup(struct bpf_test *test, enum bpf_prog_type prog_type, struct bpf_insn *prog, int *map_fds) { @@ -1126,10 +1232,218 @@ static bool cmp_str_seq(const char *log, const char *exp) return true; } +static int get_xlated_program(int fd_prog, struct bpf_insn **buf, int *cnt) +{ + struct bpf_prog_info info = {}; + __u32 info_len = sizeof(info); + __u32 xlated_prog_len; + __u32 buf_element_size = sizeof(struct bpf_insn); + + if (bpf_obj_get_info_by_fd(fd_prog, &info, &info_len)) { + perror("bpf_obj_get_info_by_fd failed"); + return -1; + } + + xlated_prog_len = info.xlated_prog_len; + if (xlated_prog_len % buf_element_size) { + printf("Program length %d is not multiple of %d\n", + xlated_prog_len, buf_element_size); + return -1; + } + + *cnt = xlated_prog_len / buf_element_size; + *buf = calloc(*cnt, buf_element_size); + if (!buf) { + perror("can't allocate xlated program buffer"); + return -ENOMEM; + } + + bzero(&info, sizeof(info)); + info.xlated_prog_len = xlated_prog_len; + info.xlated_prog_insns = (__u64)*buf; + if (bpf_obj_get_info_by_fd(fd_prog, &info, &info_len)) { + perror("second bpf_obj_get_info_by_fd failed"); + goto out_free_buf; + } + + return 0; + +out_free_buf: + free(*buf); + return -1; +} + +static bool is_null_insn(struct bpf_insn *insn) +{ + struct bpf_insn null_insn = {}; + + return memcmp(insn, &null_insn, sizeof(null_insn)) == 0; +} + +static bool is_skip_insn(struct bpf_insn *insn) +{ + struct bpf_insn skip_insn = SKIP_INSNS(); + + return memcmp(insn, &skip_insn, sizeof(skip_insn)) == 0; +} + +static int null_terminated_insn_len(struct bpf_insn *seq, int max_len) +{ + int i; + + for (i = 0; i < max_len; ++i) { + if (is_null_insn(&seq[i])) + return i; + } + return max_len; +} + +static bool compare_masked_insn(struct bpf_insn *orig, struct bpf_insn *masked) +{ + struct bpf_insn orig_masked; + + memcpy(&orig_masked, orig, sizeof(orig_masked)); + if (masked->imm == INSN_IMM_MASK) + orig_masked.imm = INSN_IMM_MASK; + if (masked->off == INSN_OFF_MASK) + orig_masked.off = INSN_OFF_MASK; + + return memcmp(&orig_masked, masked, sizeof(orig_masked)) == 0; +} + +static int find_insn_subseq(struct bpf_insn *seq, struct bpf_insn *subseq, + int seq_len, int subseq_len) +{ + int i, j; + + if (subseq_len > seq_len) + return -1; + + for (i = 0; i < seq_len - subseq_len + 1; ++i) { + bool found = true; + + for (j = 0; j < subseq_len; ++j) { + if (!compare_masked_insn(&seq[i + j], &subseq[j])) { + found = false; + break; + } + } + if (found) + return i; + } + + return -1; +} + +static int find_skip_insn_marker(struct bpf_insn *seq, int len) +{ + int i; + + for (i = 0; i < len; ++i) + if (is_skip_insn(&seq[i])) + return i; + + return -1; +} + +/* Return true if all sub-sequences in `subseqs` could be found in + * `seq` one after another. Sub-sequences are separated by a single + * nil instruction. + */ +static bool find_all_insn_subseqs(struct bpf_insn *seq, struct bpf_insn *subseqs, + int seq_len, int max_subseqs_len) +{ + int subseqs_len = null_terminated_insn_len(subseqs, max_subseqs_len); + + while (subseqs_len > 0) { + int skip_idx = find_skip_insn_marker(subseqs, subseqs_len); + int cur_subseq_len = skip_idx < 0 ? subseqs_len : skip_idx; + int subseq_idx = find_insn_subseq(seq, subseqs, + seq_len, cur_subseq_len); + + if (subseq_idx < 0) + return false; + seq += subseq_idx + cur_subseq_len; + seq_len -= subseq_idx + cur_subseq_len; + subseqs += cur_subseq_len + 1; + subseqs_len -= cur_subseq_len + 1; + } + + return true; +} + +static void print_insn(struct bpf_insn *buf, int cnt) +{ + int i; + + printf(" addr op d s off imm\n"); + for (i = 0; i < cnt; ++i) { + struct bpf_insn *insn = &buf[i]; + + if (is_null_insn(insn)) + break; + + if (is_skip_insn(insn)) + printf(" ...\n"); + else + printf(" %04x: %02x %1x %x %04hx %08x\n", + i, insn->code, insn->dst_reg, + insn->src_reg, insn->off, insn->imm); + } +} + +static bool check_xlated_program(struct bpf_test *test, int fd_prog) +{ + struct bpf_insn *buf; + int cnt; + bool result = true; + bool check_expected = !is_null_insn(test->expected_insns); + bool check_unexpected = !is_null_insn(test->unexpected_insns); + + if (!check_expected && !check_unexpected) + goto out; + + if (get_xlated_program(fd_prog, &buf, &cnt)) { + printf("FAIL: can't get xlated program\n"); + result = false; + goto out; + } + + if (check_expected && + !find_all_insn_subseqs(buf, test->expected_insns, + cnt, MAX_EXPECTED_INSNS)) { + printf("FAIL: can't find expected subsequence of instructions\n"); + result = false; + if (verbose) { + printf("Program:\n"); + print_insn(buf, cnt); + printf("Expected subsequence:\n"); + print_insn(test->expected_insns, MAX_EXPECTED_INSNS); + } + } + + if (check_unexpected && + find_all_insn_subseqs(buf, test->unexpected_insns, + cnt, MAX_UNEXPECTED_INSNS)) { + printf("FAIL: found unexpected subsequence of instructions\n"); + result = false; + if (verbose) { + printf("Program:\n"); + print_insn(buf, cnt); + printf("Un-expected subsequence:\n"); + print_insn(test->unexpected_insns, MAX_UNEXPECTED_INSNS); + } + } + + free(buf); + out: + return result; +} + static void do_test_single(struct bpf_test *test, bool unpriv, int *passes, int *errors) { - int fd_prog, expected_ret, alignment_prevented_execution; + int fd_prog, btf_fd, expected_ret, alignment_prevented_execution; int prog_len, prog_type = test->prog_type; struct bpf_insn *prog = test->insns; LIBBPF_OPTS(bpf_prog_load_opts, opts); @@ -1141,8 +1455,10 @@ static void do_test_single(struct bpf_test *test, bool unpriv, __u32 pflags; int i, err; + fd_prog = -1; for (i = 0; i < MAX_NR_MAPS; i++) map_fds[i] = -1; + btf_fd = -1; if (!prog_type) prog_type = BPF_PROG_TYPE_SOCKET_FILTER; @@ -1175,11 +1491,11 @@ static void do_test_single(struct bpf_test *test, bool unpriv, opts.expected_attach_type = test->expected_attach_type; if (verbose) - opts.log_level = 1; + opts.log_level = VERBOSE_LIBBPF_LOG_LEVEL; else if (expected_ret == VERBOSE_ACCEPT) opts.log_level = 2; else - opts.log_level = 4; + opts.log_level = DEFAULT_LIBBPF_LOG_LEVEL; opts.prog_flags = pflags; if (prog_type == BPF_PROG_TYPE_TRACING && test->kfunc) { @@ -1197,6 +1513,19 @@ static void do_test_single(struct bpf_test *test, bool unpriv, opts.attach_btf_id = attach_btf_id; } + if (test->btf_types[0] != 0) { + btf_fd = load_btf_for_test(test); + if (btf_fd < 0) + goto fail_log; + opts.prog_btf_fd = btf_fd; + } + + if (test->func_info_cnt != 0) { + opts.func_info = test->func_info; + opts.func_info_cnt = test->func_info_cnt; + opts.func_info_rec_size = sizeof(test->func_info[0]); + } + opts.log_buf = bpf_vlog; opts.log_size = sizeof(bpf_vlog); fd_prog = bpf_prog_load(prog_type, NULL, "GPL", prog, prog_len, &opts); @@ -1262,6 +1591,9 @@ static void do_test_single(struct bpf_test *test, bool unpriv, if (verbose) printf(", verifier log:\n%s", bpf_vlog); + if (!check_xlated_program(test, fd_prog)) + goto fail_log; + run_errs = 0; run_successes = 0; if (!alignment_prevented_execution && fd_prog >= 0 && test->runs >= 0) { @@ -1305,6 +1637,7 @@ close_fds: if (test->fill_insns) free(test->fill_insns); close(fd_prog); + close(btf_fd); for (i = 0; i < MAX_NR_MAPS; i++) close(map_fds[i]); sched_yield(); diff --git a/tools/testing/selftests/bpf/test_xdp_veth.sh b/tools/testing/selftests/bpf/test_xdp_veth.sh index 392d28cc4e58..49936c4c8567 100755 --- a/tools/testing/selftests/bpf/test_xdp_veth.sh +++ b/tools/testing/selftests/bpf/test_xdp_veth.sh @@ -106,9 +106,9 @@ bpftool prog loadall \ bpftool map update pinned $BPF_DIR/maps/tx_port key 0 0 0 0 value 122 0 0 0 bpftool map update pinned $BPF_DIR/maps/tx_port key 1 0 0 0 value 133 0 0 0 bpftool map update pinned $BPF_DIR/maps/tx_port key 2 0 0 0 value 111 0 0 0 -ip link set dev veth1 xdp pinned $BPF_DIR/progs/redirect_map_0 -ip link set dev veth2 xdp pinned $BPF_DIR/progs/redirect_map_1 -ip link set dev veth3 xdp pinned $BPF_DIR/progs/redirect_map_2 +ip link set dev veth1 xdp pinned $BPF_DIR/progs/xdp_redirect_map_0 +ip link set dev veth2 xdp pinned $BPF_DIR/progs/xdp_redirect_map_1 +ip link set dev veth3 xdp pinned $BPF_DIR/progs/xdp_redirect_map_2 ip -n ${NS1} link set dev veth11 xdp obj xdp_dummy.o sec xdp ip -n ${NS2} link set dev veth22 xdp obj xdp_tx.o sec xdp diff --git a/tools/testing/selftests/bpf/test_xdping.sh b/tools/testing/selftests/bpf/test_xdping.sh index c2f0ddb45531..c3d82e0a7378 100755 --- a/tools/testing/selftests/bpf/test_xdping.sh +++ b/tools/testing/selftests/bpf/test_xdping.sh @@ -95,5 +95,9 @@ for server_args in "" "-I veth0 -s -S" ; do test "$client_args" "$server_args" done +# Test drv mode +test "-I veth1 -N" "-I veth0 -s -N" +test "-I veth1 -N -c 10" "-I veth0 -s -N" + echo "OK. All tests passed" exit 0 diff --git a/tools/testing/selftests/bpf/test_xsk.sh b/tools/testing/selftests/bpf/test_xsk.sh index 567500299231..096a957594cd 100755 --- a/tools/testing/selftests/bpf/test_xsk.sh +++ b/tools/testing/selftests/bpf/test_xsk.sh @@ -47,7 +47,7 @@ # conflict with any existing interface # * tests the veth and xsk layers of the topology # -# See the source xdpxceiver.c for information on each test +# See the source xskxceiver.c for information on each test # # Kernel configuration: # --------------------- @@ -160,14 +160,14 @@ statusList=() TEST_NAME="XSK_SELFTESTS_SOFTIRQ" -execxdpxceiver +exec_xskxceiver cleanup_exit ${VETH0} ${VETH1} ${NS1} TEST_NAME="XSK_SELFTESTS_BUSY_POLL" busy_poll=1 setup_vethPairs -execxdpxceiver +exec_xskxceiver ## END TESTS diff --git a/tools/testing/selftests/bpf/verifier/bpf_loop_inline.c b/tools/testing/selftests/bpf/verifier/bpf_loop_inline.c new file mode 100644 index 000000000000..a535d41dc20d --- /dev/null +++ b/tools/testing/selftests/bpf/verifier/bpf_loop_inline.c @@ -0,0 +1,264 @@ +#define BTF_TYPES \ + .btf_strings = "\0int\0i\0ctx\0callback\0main\0", \ + .btf_types = { \ + /* 1: int */ BTF_TYPE_INT_ENC(1, BTF_INT_SIGNED, 0, 32, 4), \ + /* 2: int* */ BTF_PTR_ENC(1), \ + /* 3: void* */ BTF_PTR_ENC(0), \ + /* 4: int __(void*) */ BTF_FUNC_PROTO_ENC(1, 1), \ + BTF_FUNC_PROTO_ARG_ENC(7, 3), \ + /* 5: int __(int, int*) */ BTF_FUNC_PROTO_ENC(1, 2), \ + BTF_FUNC_PROTO_ARG_ENC(5, 1), \ + BTF_FUNC_PROTO_ARG_ENC(7, 2), \ + /* 6: main */ BTF_FUNC_ENC(20, 4), \ + /* 7: callback */ BTF_FUNC_ENC(11, 5), \ + BTF_END_RAW \ + } + +#define MAIN_TYPE 6 +#define CALLBACK_TYPE 7 + +/* can't use BPF_CALL_REL, jit_subprogs adjusts IMM & OFF + * fields for pseudo calls + */ +#define PSEUDO_CALL_INSN() \ + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, BPF_PSEUDO_CALL, \ + INSN_OFF_MASK, INSN_IMM_MASK) + +/* can't use BPF_FUNC_loop constant, + * do_mix_fixups adjusts the IMM field + */ +#define HELPER_CALL_INSN() \ + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, INSN_OFF_MASK, INSN_IMM_MASK) + +{ + "inline simple bpf_loop call", + .insns = { + /* main */ + /* force verifier state branching to verify logic on first and + * subsequent bpf_loop insn processing steps + */ + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_jiffies64), + BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 777, 2), + BPF_ALU64_IMM(BPF_MOV, BPF_REG_1, 1), + BPF_JMP_IMM(BPF_JA, 0, 0, 1), + BPF_ALU64_IMM(BPF_MOV, BPF_REG_1, 2), + + BPF_RAW_INSN(BPF_LD | BPF_IMM | BPF_DW, BPF_REG_2, BPF_PSEUDO_FUNC, 0, 6), + BPF_RAW_INSN(0, 0, 0, 0, 0), + BPF_ALU64_IMM(BPF_MOV, BPF_REG_3, 0), + BPF_ALU64_IMM(BPF_MOV, BPF_REG_4, 0), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_loop), + BPF_ALU64_IMM(BPF_MOV, BPF_REG_0, 0), + BPF_EXIT_INSN(), + /* callback */ + BPF_ALU64_IMM(BPF_MOV, BPF_REG_0, 1), + BPF_EXIT_INSN(), + }, + .expected_insns = { PSEUDO_CALL_INSN() }, + .unexpected_insns = { HELPER_CALL_INSN() }, + .prog_type = BPF_PROG_TYPE_TRACEPOINT, + .result = ACCEPT, + .runs = 0, + .func_info = { { 0, MAIN_TYPE }, { 12, CALLBACK_TYPE } }, + .func_info_cnt = 2, + BTF_TYPES +}, +{ + "don't inline bpf_loop call, flags non-zero", + .insns = { + /* main */ + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_jiffies64), + BPF_ALU64_REG(BPF_MOV, BPF_REG_6, BPF_REG_0), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_jiffies64), + BPF_ALU64_REG(BPF_MOV, BPF_REG_7, BPF_REG_0), + BPF_JMP_IMM(BPF_JNE, BPF_REG_6, 0, 9), + BPF_ALU64_IMM(BPF_MOV, BPF_REG_4, 0), + BPF_JMP_IMM(BPF_JNE, BPF_REG_7, 0, 0), + BPF_ALU64_IMM(BPF_MOV, BPF_REG_1, 1), + BPF_RAW_INSN(BPF_LD | BPF_IMM | BPF_DW, BPF_REG_2, BPF_PSEUDO_FUNC, 0, 7), + BPF_RAW_INSN(0, 0, 0, 0, 0), + BPF_ALU64_IMM(BPF_MOV, BPF_REG_3, 0), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_loop), + BPF_ALU64_IMM(BPF_MOV, BPF_REG_0, 0), + BPF_EXIT_INSN(), + BPF_ALU64_IMM(BPF_MOV, BPF_REG_4, 1), + BPF_JMP_IMM(BPF_JA, 0, 0, -10), + /* callback */ + BPF_ALU64_IMM(BPF_MOV, BPF_REG_0, 1), + BPF_EXIT_INSN(), + }, + .expected_insns = { HELPER_CALL_INSN() }, + .unexpected_insns = { PSEUDO_CALL_INSN() }, + .prog_type = BPF_PROG_TYPE_TRACEPOINT, + .result = ACCEPT, + .runs = 0, + .func_info = { { 0, MAIN_TYPE }, { 16, CALLBACK_TYPE } }, + .func_info_cnt = 2, + BTF_TYPES +}, +{ + "don't inline bpf_loop call, callback non-constant", + .insns = { + /* main */ + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_jiffies64), + BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 777, 4), /* pick a random callback */ + + BPF_ALU64_IMM(BPF_MOV, BPF_REG_1, 1), + BPF_RAW_INSN(BPF_LD | BPF_IMM | BPF_DW, BPF_REG_2, BPF_PSEUDO_FUNC, 0, 10), + BPF_RAW_INSN(0, 0, 0, 0, 0), + BPF_JMP_IMM(BPF_JA, 0, 0, 3), + + BPF_ALU64_IMM(BPF_MOV, BPF_REG_1, 1), + BPF_RAW_INSN(BPF_LD | BPF_IMM | BPF_DW, BPF_REG_2, BPF_PSEUDO_FUNC, 0, 8), + BPF_RAW_INSN(0, 0, 0, 0, 0), + + BPF_ALU64_IMM(BPF_MOV, BPF_REG_3, 0), + BPF_ALU64_IMM(BPF_MOV, BPF_REG_4, 0), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_loop), + BPF_ALU64_IMM(BPF_MOV, BPF_REG_0, 0), + BPF_EXIT_INSN(), + /* callback */ + BPF_ALU64_IMM(BPF_MOV, BPF_REG_0, 1), + BPF_EXIT_INSN(), + /* callback #2 */ + BPF_ALU64_IMM(BPF_MOV, BPF_REG_0, 1), + BPF_EXIT_INSN(), + }, + .expected_insns = { HELPER_CALL_INSN() }, + .unexpected_insns = { PSEUDO_CALL_INSN() }, + .prog_type = BPF_PROG_TYPE_TRACEPOINT, + .result = ACCEPT, + .runs = 0, + .func_info = { + { 0, MAIN_TYPE }, + { 14, CALLBACK_TYPE }, + { 16, CALLBACK_TYPE } + }, + .func_info_cnt = 3, + BTF_TYPES +}, +{ + "bpf_loop_inline and a dead func", + .insns = { + /* main */ + + /* A reference to callback #1 to make verifier count it as a func. + * This reference is overwritten below and callback #1 is dead. + */ + BPF_RAW_INSN(BPF_LD | BPF_IMM | BPF_DW, BPF_REG_2, BPF_PSEUDO_FUNC, 0, 9), + BPF_RAW_INSN(0, 0, 0, 0, 0), + BPF_ALU64_IMM(BPF_MOV, BPF_REG_1, 1), + BPF_RAW_INSN(BPF_LD | BPF_IMM | BPF_DW, BPF_REG_2, BPF_PSEUDO_FUNC, 0, 8), + BPF_RAW_INSN(0, 0, 0, 0, 0), + BPF_ALU64_IMM(BPF_MOV, BPF_REG_3, 0), + BPF_ALU64_IMM(BPF_MOV, BPF_REG_4, 0), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_loop), + BPF_ALU64_IMM(BPF_MOV, BPF_REG_0, 0), + BPF_EXIT_INSN(), + /* callback */ + BPF_ALU64_IMM(BPF_MOV, BPF_REG_0, 1), + BPF_EXIT_INSN(), + /* callback #2 */ + BPF_ALU64_IMM(BPF_MOV, BPF_REG_0, 1), + BPF_EXIT_INSN(), + }, + .expected_insns = { PSEUDO_CALL_INSN() }, + .unexpected_insns = { HELPER_CALL_INSN() }, + .prog_type = BPF_PROG_TYPE_TRACEPOINT, + .result = ACCEPT, + .runs = 0, + .func_info = { + { 0, MAIN_TYPE }, + { 10, CALLBACK_TYPE }, + { 12, CALLBACK_TYPE } + }, + .func_info_cnt = 3, + BTF_TYPES +}, +{ + "bpf_loop_inline stack locations for loop vars", + .insns = { + /* main */ + BPF_ST_MEM(BPF_W, BPF_REG_10, -12, 0x77), + /* bpf_loop call #1 */ + BPF_ALU64_IMM(BPF_MOV, BPF_REG_1, 1), + BPF_RAW_INSN(BPF_LD | BPF_IMM | BPF_DW, BPF_REG_2, BPF_PSEUDO_FUNC, 0, 22), + BPF_RAW_INSN(0, 0, 0, 0, 0), + BPF_ALU64_IMM(BPF_MOV, BPF_REG_3, 0), + BPF_ALU64_IMM(BPF_MOV, BPF_REG_4, 0), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_loop), + /* bpf_loop call #2 */ + BPF_ALU64_IMM(BPF_MOV, BPF_REG_1, 2), + BPF_RAW_INSN(BPF_LD | BPF_IMM | BPF_DW, BPF_REG_2, BPF_PSEUDO_FUNC, 0, 16), + BPF_RAW_INSN(0, 0, 0, 0, 0), + BPF_ALU64_IMM(BPF_MOV, BPF_REG_3, 0), + BPF_ALU64_IMM(BPF_MOV, BPF_REG_4, 0), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_loop), + /* call func and exit */ + BPF_CALL_REL(2), + BPF_ALU64_IMM(BPF_MOV, BPF_REG_0, 0), + BPF_EXIT_INSN(), + /* func */ + BPF_ST_MEM(BPF_DW, BPF_REG_10, -32, 0x55), + BPF_ALU64_IMM(BPF_MOV, BPF_REG_1, 2), + BPF_RAW_INSN(BPF_LD | BPF_IMM | BPF_DW, BPF_REG_2, BPF_PSEUDO_FUNC, 0, 6), + BPF_RAW_INSN(0, 0, 0, 0, 0), + BPF_ALU64_IMM(BPF_MOV, BPF_REG_3, 0), + BPF_ALU64_IMM(BPF_MOV, BPF_REG_4, 0), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_loop), + BPF_ALU64_IMM(BPF_MOV, BPF_REG_0, 0), + BPF_EXIT_INSN(), + /* callback */ + BPF_ALU64_IMM(BPF_MOV, BPF_REG_0, 1), + BPF_EXIT_INSN(), + }, + .expected_insns = { + BPF_ST_MEM(BPF_W, BPF_REG_10, -12, 0x77), + SKIP_INSNS(), + BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_6, -40), + BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_7, -32), + BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_8, -24), + SKIP_INSNS(), + /* offsets are the same as in the first call */ + BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_6, -40), + BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_7, -32), + BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_8, -24), + SKIP_INSNS(), + BPF_ST_MEM(BPF_DW, BPF_REG_10, -32, 0x55), + SKIP_INSNS(), + /* offsets differ from main because of different offset + * in BPF_ST_MEM instruction + */ + BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_6, -56), + BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_7, -48), + BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_8, -40), + }, + .unexpected_insns = { HELPER_CALL_INSN() }, + .prog_type = BPF_PROG_TYPE_TRACEPOINT, + .result = ACCEPT, + .func_info = { + { 0, MAIN_TYPE }, + { 16, MAIN_TYPE }, + { 25, CALLBACK_TYPE }, + }, + .func_info_cnt = 3, + BTF_TYPES +}, +{ + "inline bpf_loop call in a big program", + .insns = {}, + .fill_helper = bpf_fill_big_prog_with_loop_1, + .expected_insns = { PSEUDO_CALL_INSN() }, + .unexpected_insns = { HELPER_CALL_INSN() }, + .result = ACCEPT, + .prog_type = BPF_PROG_TYPE_TRACEPOINT, + .func_info = { { 0, MAIN_TYPE }, { 16, CALLBACK_TYPE } }, + .func_info_cnt = 2, + BTF_TYPES +}, + +#undef HELPER_CALL_INSN +#undef PSEUDO_CALL_INSN +#undef CALLBACK_TYPE +#undef MAIN_TYPE +#undef BTF_TYPES diff --git a/tools/testing/selftests/bpf/verifier/calls.c b/tools/testing/selftests/bpf/verifier/calls.c index 743ed34c1238..3fb4f69b1962 100644 --- a/tools/testing/selftests/bpf/verifier/calls.c +++ b/tools/testing/selftests/bpf/verifier/calls.c @@ -219,6 +219,59 @@ .errstr = "variable ptr_ access var_off=(0x0; 0x7) disallowed", }, { + "calls: invalid kfunc call: referenced arg needs refcounted PTR_TO_BTF_ID", + .insns = { + BPF_MOV64_REG(BPF_REG_1, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, -8), + BPF_ST_MEM(BPF_DW, BPF_REG_1, 0, 0), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, BPF_PSEUDO_KFUNC_CALL, 0, 0), + BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 1), + BPF_EXIT_INSN(), + BPF_MOV64_REG(BPF_REG_6, BPF_REG_0), + BPF_MOV64_REG(BPF_REG_1, BPF_REG_0), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, BPF_PSEUDO_KFUNC_CALL, 0, 0), + BPF_LDX_MEM(BPF_DW, BPF_REG_1, BPF_REG_6, 16), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, BPF_PSEUDO_KFUNC_CALL, 0, 0), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .prog_type = BPF_PROG_TYPE_SCHED_CLS, + .fixup_kfunc_btf_id = { + { "bpf_kfunc_call_test_acquire", 3 }, + { "bpf_kfunc_call_test_ref", 8 }, + { "bpf_kfunc_call_test_ref", 10 }, + }, + .result_unpriv = REJECT, + .result = REJECT, + .errstr = "R1 must be referenced", +}, +{ + "calls: valid kfunc call: referenced arg needs refcounted PTR_TO_BTF_ID", + .insns = { + BPF_MOV64_REG(BPF_REG_1, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, -8), + BPF_ST_MEM(BPF_DW, BPF_REG_1, 0, 0), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, BPF_PSEUDO_KFUNC_CALL, 0, 0), + BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 1), + BPF_EXIT_INSN(), + BPF_MOV64_REG(BPF_REG_6, BPF_REG_0), + BPF_MOV64_REG(BPF_REG_1, BPF_REG_0), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, BPF_PSEUDO_KFUNC_CALL, 0, 0), + BPF_MOV64_REG(BPF_REG_1, BPF_REG_6), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, BPF_PSEUDO_KFUNC_CALL, 0, 0), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .prog_type = BPF_PROG_TYPE_SCHED_CLS, + .fixup_kfunc_btf_id = { + { "bpf_kfunc_call_test_acquire", 3 }, + { "bpf_kfunc_call_test_ref", 8 }, + { "bpf_kfunc_call_test_release", 10 }, + }, + .result_unpriv = REJECT, + .result = ACCEPT, +}, +{ "calls: basic sanity", .insns = { BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 2), diff --git a/tools/testing/selftests/bpf/vmtest.sh b/tools/testing/selftests/bpf/vmtest.sh index e0bb04a97e10..b86ae4a2e5c5 100755 --- a/tools/testing/selftests/bpf/vmtest.sh +++ b/tools/testing/selftests/bpf/vmtest.sh @@ -30,8 +30,7 @@ DEFAULT_COMMAND="./test_progs" MOUNT_DIR="mnt" ROOTFS_IMAGE="root.img" OUTPUT_DIR="$HOME/.bpf_selftests" -KCONFIG_URL="https://raw.githubusercontent.com/libbpf/libbpf/master/travis-ci/vmtest/configs/config-latest.${ARCH}" -KCONFIG_API_URL="https://api.github.com/repos/libbpf/libbpf/contents/travis-ci/vmtest/configs/config-latest.${ARCH}" +KCONFIG_REL_PATHS=("tools/testing/selftests/bpf/config" "tools/testing/selftests/bpf/config.${ARCH}") INDEX_URL="https://raw.githubusercontent.com/libbpf/ci/master/INDEX" NUM_COMPILE_JOBS="$(nproc)" LOG_FILE_BASE="$(date +"bpf_selftests.%Y-%m-%d_%H-%M-%S")" @@ -269,26 +268,42 @@ is_rel_path() [[ ${path:0:1} != "/" ]] } +do_update_kconfig() +{ + local kernel_checkout="$1" + local kconfig_file="$2" + + rm -f "$kconfig_file" 2> /dev/null + + for config in "${KCONFIG_REL_PATHS[@]}"; do + local kconfig_src="${kernel_checkout}/${config}" + cat "$kconfig_src" >> "$kconfig_file" + done +} + update_kconfig() { - local kconfig_file="$1" - local update_command="curl -sLf ${KCONFIG_URL} -o ${kconfig_file}" - # Github does not return the "last-modified" header when retrieving the - # raw contents of the file. Use the API call to get the last-modified - # time of the kernel config and only update the config if it has been - # updated after the previously cached config was created. This avoids - # unnecessarily compiling the kernel and selftests. - if [[ -f "${kconfig_file}" ]]; then - local last_modified_date="$(curl -sL -D - "${KCONFIG_API_URL}" -o /dev/null | \ - grep "last-modified" | awk -F ': ' '{print $2}')" - local remote_modified_timestamp="$(date -d "${last_modified_date}" +"%s")" - local local_creation_timestamp="$(stat -c %Y "${kconfig_file}")" + local kernel_checkout="$1" + local kconfig_file="$2" - if [[ "${remote_modified_timestamp}" -gt "${local_creation_timestamp}" ]]; then - ${update_command} - fi + if [[ -f "${kconfig_file}" ]]; then + local local_modified="$(stat -c %Y "${kconfig_file}")" + + for config in "${KCONFIG_REL_PATHS[@]}"; do + local kconfig_src="${kernel_checkout}/${config}" + local src_modified="$(stat -c %Y "${kconfig_src}")" + # Only update the config if it has been updated after the + # previously cached config was created. This avoids + # unnecessarily compiling the kernel and selftests. + if [[ "${src_modified}" -gt "${local_modified}" ]]; then + do_update_kconfig "$kernel_checkout" "$kconfig_file" + # Once we have found one outdated configuration + # there is no need to check other ones. + break + fi + done else - ${update_command} + do_update_kconfig "$kernel_checkout" "$kconfig_file" fi } @@ -372,7 +387,7 @@ main() mkdir -p "${OUTPUT_DIR}" mkdir -p "${mount_dir}" - update_kconfig "${kconfig_file}" + update_kconfig "${kernel_checkout}" "${kconfig_file}" recompile_kernel "${kernel_checkout}" "${make_command}" diff --git a/tools/testing/selftests/bpf/xdp_synproxy.c b/tools/testing/selftests/bpf/xdp_synproxy.c new file mode 100644 index 000000000000..d874ddfb39c4 --- /dev/null +++ b/tools/testing/selftests/bpf/xdp_synproxy.c @@ -0,0 +1,466 @@ +// SPDX-License-Identifier: LGPL-2.1 OR BSD-2-Clause +/* Copyright (c) 2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved. */ + +#include <stdnoreturn.h> +#include <stdlib.h> +#include <stdio.h> +#include <string.h> +#include <errno.h> +#include <unistd.h> +#include <getopt.h> +#include <signal.h> +#include <sys/types.h> +#include <bpf/bpf.h> +#include <bpf/libbpf.h> +#include <net/if.h> +#include <linux/if_link.h> +#include <linux/limits.h> + +static unsigned int ifindex; +static __u32 attached_prog_id; +static bool attached_tc; + +static void noreturn cleanup(int sig) +{ + LIBBPF_OPTS(bpf_xdp_attach_opts, opts); + int prog_fd; + int err; + + if (attached_prog_id == 0) + exit(0); + + if (attached_tc) { + LIBBPF_OPTS(bpf_tc_hook, hook, + .ifindex = ifindex, + .attach_point = BPF_TC_INGRESS); + + err = bpf_tc_hook_destroy(&hook); + if (err < 0) { + fprintf(stderr, "Error: bpf_tc_hook_destroy: %s\n", strerror(-err)); + fprintf(stderr, "Failed to destroy the TC hook\n"); + exit(1); + } + exit(0); + } + + prog_fd = bpf_prog_get_fd_by_id(attached_prog_id); + if (prog_fd < 0) { + fprintf(stderr, "Error: bpf_prog_get_fd_by_id: %s\n", strerror(-prog_fd)); + err = bpf_xdp_attach(ifindex, -1, 0, NULL); + if (err < 0) { + fprintf(stderr, "Error: bpf_set_link_xdp_fd: %s\n", strerror(-err)); + fprintf(stderr, "Failed to detach XDP program\n"); + exit(1); + } + } else { + opts.old_prog_fd = prog_fd; + err = bpf_xdp_attach(ifindex, -1, XDP_FLAGS_REPLACE, &opts); + close(prog_fd); + if (err < 0) { + fprintf(stderr, "Error: bpf_set_link_xdp_fd_opts: %s\n", strerror(-err)); + /* Not an error if already replaced by someone else. */ + if (err != -EEXIST) { + fprintf(stderr, "Failed to detach XDP program\n"); + exit(1); + } + } + } + exit(0); +} + +static noreturn void usage(const char *progname) +{ + fprintf(stderr, "Usage: %s [--iface <iface>|--prog <prog_id>] [--mss4 <mss ipv4> --mss6 <mss ipv6> --wscale <wscale> --ttl <ttl>] [--ports <port1>,<port2>,...] [--single] [--tc]\n", + progname); + exit(1); +} + +static unsigned long parse_arg_ul(const char *progname, const char *arg, unsigned long limit) +{ + unsigned long res; + char *endptr; + + errno = 0; + res = strtoul(arg, &endptr, 10); + if (errno != 0 || *endptr != '\0' || arg[0] == '\0' || res > limit) + usage(progname); + + return res; +} + +static void parse_options(int argc, char *argv[], unsigned int *ifindex, __u32 *prog_id, + __u64 *tcpipopts, char **ports, bool *single, bool *tc) +{ + static struct option long_options[] = { + { "help", no_argument, NULL, 'h' }, + { "iface", required_argument, NULL, 'i' }, + { "prog", required_argument, NULL, 'x' }, + { "mss4", required_argument, NULL, 4 }, + { "mss6", required_argument, NULL, 6 }, + { "wscale", required_argument, NULL, 'w' }, + { "ttl", required_argument, NULL, 't' }, + { "ports", required_argument, NULL, 'p' }, + { "single", no_argument, NULL, 's' }, + { "tc", no_argument, NULL, 'c' }, + { NULL, 0, NULL, 0 }, + }; + unsigned long mss4, mss6, wscale, ttl; + unsigned int tcpipopts_mask = 0; + + if (argc < 2) + usage(argv[0]); + + *ifindex = 0; + *prog_id = 0; + *tcpipopts = 0; + *ports = NULL; + *single = false; + + while (true) { + int opt; + + opt = getopt_long(argc, argv, "", long_options, NULL); + if (opt == -1) + break; + + switch (opt) { + case 'h': + usage(argv[0]); + break; + case 'i': + *ifindex = if_nametoindex(optarg); + if (*ifindex == 0) + usage(argv[0]); + break; + case 'x': + *prog_id = parse_arg_ul(argv[0], optarg, UINT32_MAX); + if (*prog_id == 0) + usage(argv[0]); + break; + case 4: + mss4 = parse_arg_ul(argv[0], optarg, UINT16_MAX); + tcpipopts_mask |= 1 << 0; + break; + case 6: + mss6 = parse_arg_ul(argv[0], optarg, UINT16_MAX); + tcpipopts_mask |= 1 << 1; + break; + case 'w': + wscale = parse_arg_ul(argv[0], optarg, 14); + tcpipopts_mask |= 1 << 2; + break; + case 't': + ttl = parse_arg_ul(argv[0], optarg, UINT8_MAX); + tcpipopts_mask |= 1 << 3; + break; + case 'p': + *ports = optarg; + break; + case 's': + *single = true; + break; + case 'c': + *tc = true; + break; + default: + usage(argv[0]); + } + } + if (optind < argc) + usage(argv[0]); + + if (tcpipopts_mask == 0xf) { + if (mss4 == 0 || mss6 == 0 || wscale == 0 || ttl == 0) + usage(argv[0]); + *tcpipopts = (mss6 << 32) | (ttl << 24) | (wscale << 16) | mss4; + } else if (tcpipopts_mask != 0) { + usage(argv[0]); + } + + if (*ifindex != 0 && *prog_id != 0) + usage(argv[0]); + if (*ifindex == 0 && *prog_id == 0) + usage(argv[0]); +} + +static int syncookie_attach(const char *argv0, unsigned int ifindex, bool tc) +{ + struct bpf_prog_info info = {}; + __u32 info_len = sizeof(info); + char xdp_filename[PATH_MAX]; + struct bpf_program *prog; + struct bpf_object *obj; + int prog_fd; + int err; + + snprintf(xdp_filename, sizeof(xdp_filename), "%s_kern.o", argv0); + obj = bpf_object__open_file(xdp_filename, NULL); + err = libbpf_get_error(obj); + if (err < 0) { + fprintf(stderr, "Error: bpf_object__open_file: %s\n", strerror(-err)); + return err; + } + + err = bpf_object__load(obj); + if (err < 0) { + fprintf(stderr, "Error: bpf_object__open_file: %s\n", strerror(-err)); + return err; + } + + prog = bpf_object__find_program_by_name(obj, tc ? "syncookie_tc" : "syncookie_xdp"); + if (!prog) { + fprintf(stderr, "Error: bpf_object__find_program_by_name: program was not found\n"); + return -ENOENT; + } + + prog_fd = bpf_program__fd(prog); + + err = bpf_obj_get_info_by_fd(prog_fd, &info, &info_len); + if (err < 0) { + fprintf(stderr, "Error: bpf_obj_get_info_by_fd: %s\n", strerror(-err)); + goto out; + } + attached_tc = tc; + attached_prog_id = info.id; + signal(SIGINT, cleanup); + signal(SIGTERM, cleanup); + if (tc) { + LIBBPF_OPTS(bpf_tc_hook, hook, + .ifindex = ifindex, + .attach_point = BPF_TC_INGRESS); + LIBBPF_OPTS(bpf_tc_opts, opts, + .handle = 1, + .priority = 1, + .prog_fd = prog_fd); + + err = bpf_tc_hook_create(&hook); + if (err < 0) { + fprintf(stderr, "Error: bpf_tc_hook_create: %s\n", + strerror(-err)); + goto fail; + } + err = bpf_tc_attach(&hook, &opts); + if (err < 0) { + fprintf(stderr, "Error: bpf_tc_attach: %s\n", + strerror(-err)); + goto fail; + } + + } else { + err = bpf_xdp_attach(ifindex, prog_fd, + XDP_FLAGS_UPDATE_IF_NOEXIST, NULL); + if (err < 0) { + fprintf(stderr, "Error: bpf_set_link_xdp_fd: %s\n", + strerror(-err)); + goto fail; + } + } + err = 0; +out: + bpf_object__close(obj); + return err; +fail: + signal(SIGINT, SIG_DFL); + signal(SIGTERM, SIG_DFL); + attached_prog_id = 0; + goto out; +} + +static int syncookie_open_bpf_maps(__u32 prog_id, int *values_map_fd, int *ports_map_fd) +{ + struct bpf_prog_info prog_info; + __u32 map_ids[8]; + __u32 info_len; + int prog_fd; + int err; + int i; + + *values_map_fd = -1; + *ports_map_fd = -1; + + prog_fd = bpf_prog_get_fd_by_id(prog_id); + if (prog_fd < 0) { + fprintf(stderr, "Error: bpf_prog_get_fd_by_id: %s\n", strerror(-prog_fd)); + return prog_fd; + } + + prog_info = (struct bpf_prog_info) { + .nr_map_ids = 8, + .map_ids = (__u64)map_ids, + }; + info_len = sizeof(prog_info); + + err = bpf_obj_get_info_by_fd(prog_fd, &prog_info, &info_len); + if (err != 0) { + fprintf(stderr, "Error: bpf_obj_get_info_by_fd: %s\n", strerror(-err)); + goto out; + } + + if (prog_info.nr_map_ids < 2) { + fprintf(stderr, "Error: Found %u BPF maps, expected at least 2\n", + prog_info.nr_map_ids); + err = -ENOENT; + goto out; + } + + for (i = 0; i < prog_info.nr_map_ids; i++) { + struct bpf_map_info map_info = {}; + int map_fd; + + err = bpf_map_get_fd_by_id(map_ids[i]); + if (err < 0) { + fprintf(stderr, "Error: bpf_map_get_fd_by_id: %s\n", strerror(-err)); + goto err_close_map_fds; + } + map_fd = err; + + info_len = sizeof(map_info); + err = bpf_obj_get_info_by_fd(map_fd, &map_info, &info_len); + if (err != 0) { + fprintf(stderr, "Error: bpf_obj_get_info_by_fd: %s\n", strerror(-err)); + close(map_fd); + goto err_close_map_fds; + } + if (strcmp(map_info.name, "values") == 0) { + *values_map_fd = map_fd; + continue; + } + if (strcmp(map_info.name, "allowed_ports") == 0) { + *ports_map_fd = map_fd; + continue; + } + close(map_fd); + } + + if (*values_map_fd != -1 && *ports_map_fd != -1) { + err = 0; + goto out; + } + + err = -ENOENT; + +err_close_map_fds: + if (*values_map_fd != -1) + close(*values_map_fd); + if (*ports_map_fd != -1) + close(*ports_map_fd); + *values_map_fd = -1; + *ports_map_fd = -1; + +out: + close(prog_fd); + return err; +} + +int main(int argc, char *argv[]) +{ + int values_map_fd, ports_map_fd; + __u64 tcpipopts; + bool firstiter; + __u64 prevcnt; + __u32 prog_id; + char *ports; + bool single; + int err = 0; + bool tc; + + parse_options(argc, argv, &ifindex, &prog_id, &tcpipopts, &ports, + &single, &tc); + + if (prog_id == 0) { + if (!tc) { + err = bpf_xdp_query_id(ifindex, 0, &prog_id); + if (err < 0) { + fprintf(stderr, "Error: bpf_get_link_xdp_id: %s\n", + strerror(-err)); + goto out; + } + } + if (prog_id == 0) { + err = syncookie_attach(argv[0], ifindex, tc); + if (err < 0) + goto out; + prog_id = attached_prog_id; + } + } + + err = syncookie_open_bpf_maps(prog_id, &values_map_fd, &ports_map_fd); + if (err < 0) + goto out; + + if (ports) { + __u16 port_last = 0; + __u32 port_idx = 0; + char *p = ports; + + fprintf(stderr, "Replacing allowed ports\n"); + + while (p && *p != '\0') { + char *token = strsep(&p, ","); + __u16 port; + + port = parse_arg_ul(argv[0], token, UINT16_MAX); + err = bpf_map_update_elem(ports_map_fd, &port_idx, &port, BPF_ANY); + if (err != 0) { + fprintf(stderr, "Error: bpf_map_update_elem: %s\n", strerror(-err)); + fprintf(stderr, "Failed to add port %u (index %u)\n", + port, port_idx); + goto out_close_maps; + } + fprintf(stderr, "Added port %u\n", port); + port_idx++; + } + err = bpf_map_update_elem(ports_map_fd, &port_idx, &port_last, BPF_ANY); + if (err != 0) { + fprintf(stderr, "Error: bpf_map_update_elem: %s\n", strerror(-err)); + fprintf(stderr, "Failed to add the terminator value 0 (index %u)\n", + port_idx); + goto out_close_maps; + } + } + + if (tcpipopts) { + __u32 key = 0; + + fprintf(stderr, "Replacing TCP/IP options\n"); + + err = bpf_map_update_elem(values_map_fd, &key, &tcpipopts, BPF_ANY); + if (err != 0) { + fprintf(stderr, "Error: bpf_map_update_elem: %s\n", strerror(-err)); + goto out_close_maps; + } + } + + if ((ports || tcpipopts) && attached_prog_id == 0 && !single) + goto out_close_maps; + + prevcnt = 0; + firstiter = true; + while (true) { + __u32 key = 1; + __u64 value; + + err = bpf_map_lookup_elem(values_map_fd, &key, &value); + if (err != 0) { + fprintf(stderr, "Error: bpf_map_lookup_elem: %s\n", strerror(-err)); + goto out_close_maps; + } + if (firstiter) { + prevcnt = value; + firstiter = false; + } + if (single) { + printf("Total SYNACKs generated: %llu\n", value); + break; + } + printf("SYNACKs generated: %llu (total %llu)\n", value - prevcnt, value); + prevcnt = value; + sleep(1); + } + +out_close_maps: + close(values_map_fd); + close(ports_map_fd); +out: + return err == 0 ? 0 : 1; +} diff --git a/tools/lib/bpf/xsk.c b/tools/testing/selftests/bpf/xsk.c index af136f73b09d..f2721a4ae7c5 100644 --- a/tools/lib/bpf/xsk.c +++ b/tools/testing/selftests/bpf/xsk.c @@ -30,16 +30,10 @@ #include <sys/types.h> #include <linux/if_link.h> -#include "bpf.h" -#include "libbpf.h" -#include "libbpf_internal.h" +#include <bpf/bpf.h> +#include <bpf/libbpf.h> #include "xsk.h" -/* entire xsk.h and xsk.c is going away in libbpf 1.0, so ignore all internal - * uses of deprecated APIs - */ -#pragma GCC diagnostic ignored "-Wdeprecated-declarations" - #ifndef SOL_XDP #define SOL_XDP 283 #endif @@ -52,6 +46,8 @@ #define PF_XDP AF_XDP #endif +#define pr_warn(fmt, ...) fprintf(stderr, fmt, ##__VA_ARGS__) + enum xsk_prog { XSK_PROG_FALLBACK, XSK_PROG_REDIRECT_FLAGS, @@ -286,11 +282,10 @@ out_mmap: return err; } -DEFAULT_VERSION(xsk_umem__create_v0_0_4, xsk_umem__create, LIBBPF_0.0.4) -int xsk_umem__create_v0_0_4(struct xsk_umem **umem_ptr, void *umem_area, - __u64 size, struct xsk_ring_prod *fill, - struct xsk_ring_cons *comp, - const struct xsk_umem_config *usr_config) +int xsk_umem__create(struct xsk_umem **umem_ptr, void *umem_area, + __u64 size, struct xsk_ring_prod *fill, + struct xsk_ring_cons *comp, + const struct xsk_umem_config *usr_config) { struct xdp_umem_reg mr; struct xsk_umem *umem; @@ -351,25 +346,9 @@ struct xsk_umem_config_v1 { __u32 frame_headroom; }; -COMPAT_VERSION(xsk_umem__create_v0_0_2, xsk_umem__create, LIBBPF_0.0.2) -int xsk_umem__create_v0_0_2(struct xsk_umem **umem_ptr, void *umem_area, - __u64 size, struct xsk_ring_prod *fill, - struct xsk_ring_cons *comp, - const struct xsk_umem_config *usr_config) -{ - struct xsk_umem_config config; - - memcpy(&config, usr_config, sizeof(struct xsk_umem_config_v1)); - config.flags = 0; - - return xsk_umem__create_v0_0_4(umem_ptr, umem_area, size, fill, comp, - &config); -} - static enum xsk_prog get_xsk_prog(void) { enum xsk_prog detected = XSK_PROG_FALLBACK; - __u32 size_out, retval, duration; char data_in = 0, data_out; struct bpf_insn insns[] = { BPF_LD_MAP_FD(BPF_REG_1, 0), @@ -378,6 +357,12 @@ static enum xsk_prog get_xsk_prog(void) BPF_EMIT_CALL(BPF_FUNC_redirect_map), BPF_EXIT_INSN(), }; + LIBBPF_OPTS(bpf_test_run_opts, opts, + .data_in = &data_in, + .data_size_in = 1, + .data_out = &data_out, + ); + int prog_fd, map_fd, ret, insn_cnt = ARRAY_SIZE(insns); map_fd = bpf_map_create(BPF_MAP_TYPE_XSKMAP, NULL, sizeof(int), sizeof(int), 1, NULL); @@ -392,8 +377,8 @@ static enum xsk_prog get_xsk_prog(void) return detected; } - ret = bpf_prog_test_run(prog_fd, 0, &data_in, 1, &data_out, &size_out, &retval, &duration); - if (!ret && retval == XDP_PASS) + ret = bpf_prog_test_run_opts(prog_fd, &opts); + if (!ret && opts.retval == XDP_PASS) detected = XSK_PROG_REDIRECT_FLAGS; close(prog_fd); close(map_fd); @@ -510,7 +495,7 @@ static int xsk_create_bpf_link(struct xsk_socket *xsk) int link_fd; int err; - err = bpf_get_link_xdp_id(ctx->ifindex, &prog_id, xsk->config.xdp_flags); + err = bpf_xdp_query_id(ctx->ifindex, xsk->config.xdp_flags, &prog_id); if (err) { pr_warn("getting XDP prog id failed\n"); return err; @@ -536,6 +521,25 @@ static int xsk_create_bpf_link(struct xsk_socket *xsk) return 0; } +/* Copy up to sz - 1 bytes from zero-terminated src string and ensure that dst + * is zero-terminated string no matter what (unless sz == 0, in which case + * it's a no-op). It's conceptually close to FreeBSD's strlcpy(), but differs + * in what is returned. Given this is internal helper, it's trivial to extend + * this, when necessary. Use this instead of strncpy inside libbpf source code. + */ +static inline void libbpf_strlcpy(char *dst, const char *src, size_t sz) +{ + size_t i; + + if (sz == 0) + return; + + sz--; + for (i = 0; i < sz && src[i]; i++) + dst[i] = src[i]; + dst[i] = '\0'; +} + static int xsk_get_max_queues(struct xsk_socket *xsk) { struct ethtool_channels channels = { .cmd = ETHTOOL_GCHANNELS }; @@ -792,8 +796,8 @@ static int xsk_init_xdp_res(struct xsk_socket *xsk, if (ctx->has_bpf_link) err = xsk_create_bpf_link(xsk); else - err = bpf_set_link_xdp_fd(xsk->ctx->ifindex, ctx->prog_fd, - xsk->config.xdp_flags); + err = bpf_xdp_attach(xsk->ctx->ifindex, ctx->prog_fd, + xsk->config.xdp_flags, NULL); if (err) goto err_attach_xdp_prog; @@ -811,7 +815,7 @@ err_set_bpf_maps: if (ctx->has_bpf_link) close(ctx->link_fd); else - bpf_set_link_xdp_fd(ctx->ifindex, -1, 0); + bpf_xdp_detach(ctx->ifindex, 0, NULL); err_attach_xdp_prog: close(ctx->prog_fd); err_load_xdp_prog: @@ -862,7 +866,7 @@ static int __xsk_setup_xdp_prog(struct xsk_socket *_xdp, int *xsks_map_fd) if (ctx->has_bpf_link) err = xsk_link_lookup(ctx->ifindex, &prog_id, &ctx->link_fd); else - err = bpf_get_link_xdp_id(ctx->ifindex, &prog_id, xsk->config.xdp_flags); + err = bpf_xdp_query_id(ctx->ifindex, xsk->config.xdp_flags, &prog_id); if (err) return err; @@ -876,6 +880,11 @@ static int __xsk_setup_xdp_prog(struct xsk_socket *_xdp, int *xsks_map_fd) return err; } +int xsk_setup_xdp_prog_xsk(struct xsk_socket *xsk, int *xsks_map_fd) +{ + return __xsk_setup_xdp_prog(xsk, xsks_map_fd); +} + static struct xsk_ctx *xsk_get_ctx(struct xsk_umem *umem, int ifindex, __u32 queue_id) { @@ -954,6 +963,7 @@ static struct xsk_ctx *xsk_create_ctx(struct xsk_socket *xsk, ctx->fill = fill; ctx->comp = comp; list_add(&ctx->list, &umem->ctx_list); + ctx->has_bpf_link = xsk_probe_bpf_link(); return ctx; } @@ -1055,7 +1065,6 @@ int xsk_socket__create_shared(struct xsk_socket **xsk_ptr, } } xsk->ctx = ctx; - xsk->ctx->has_bpf_link = xsk_probe_bpf_link(); if (rx && !rx_setup_done) { err = setsockopt(xsk->fd, SOL_XDP, XDP_RX_RING, @@ -1147,8 +1156,6 @@ int xsk_socket__create_shared(struct xsk_socket **xsk_ptr, goto out_mmap_tx; } - ctx->prog_fd = -1; - if (!(xsk->config.libbpf_flags & XSK_LIBBPF_FLAGS__INHIBIT_PROG_LOAD)) { err = __xsk_setup_xdp_prog(xsk, NULL); if (err) @@ -1229,7 +1236,10 @@ void xsk_socket__delete(struct xsk_socket *xsk) ctx = xsk->ctx; umem = ctx->umem; - if (ctx->prog_fd != -1) { + + xsk_put_ctx(ctx, true); + + if (!ctx->refcount) { xsk_delete_bpf_maps(xsk); close(ctx->prog_fd); if (ctx->has_bpf_link) @@ -1248,8 +1258,6 @@ void xsk_socket__delete(struct xsk_socket *xsk) } } - xsk_put_ctx(ctx, true); - umem->refcount--; /* Do not close an fd that also has an associated umem connected * to it. diff --git a/tools/lib/bpf/xsk.h b/tools/testing/selftests/bpf/xsk.h index 64e9c57fd792..997723b0bfb2 100644 --- a/tools/lib/bpf/xsk.h +++ b/tools/testing/selftests/bpf/xsk.h @@ -9,15 +9,15 @@ * Author(s): Magnus Karlsson <magnus.karlsson@intel.com> */ -#ifndef __LIBBPF_XSK_H -#define __LIBBPF_XSK_H +#ifndef __XSK_H +#define __XSK_H #include <stdio.h> #include <stdint.h> #include <stdbool.h> #include <linux/if_xdp.h> -#include "libbpf.h" +#include <bpf/libbpf.h> #ifdef __cplusplus extern "C" { @@ -251,9 +251,7 @@ static inline __u64 xsk_umem__add_offset_to_addr(__u64 addr) return xsk_umem__extract_addr(addr) + xsk_umem__extract_offset(addr); } -LIBBPF_API LIBBPF_DEPRECATED_SINCE(0, 7, "AF_XDP support deprecated and moved to libxdp") int xsk_umem__fd(const struct xsk_umem *umem); -LIBBPF_API LIBBPF_DEPRECATED_SINCE(0, 7, "AF_XDP support deprecated and moved to libxdp") int xsk_socket__fd(const struct xsk_socket *xsk); #define XSK_RING_CONS__DEFAULT_NUM_DESCS 2048 @@ -271,9 +269,8 @@ struct xsk_umem_config { __u32 flags; }; -LIBBPF_API LIBBPF_DEPRECATED_SINCE(0, 7, "AF_XDP support deprecated and moved to libxdp") +int xsk_setup_xdp_prog_xsk(struct xsk_socket *xsk, int *xsks_map_fd); int xsk_setup_xdp_prog(int ifindex, int *xsks_map_fd); -LIBBPF_API LIBBPF_DEPRECATED_SINCE(0, 7, "AF_XDP support deprecated and moved to libxdp") int xsk_socket__update_xskmap(struct xsk_socket *xsk, int xsks_map_fd); /* Flags for the libbpf_flags field. */ @@ -288,32 +285,17 @@ struct xsk_socket_config { }; /* Set config to NULL to get the default configuration. */ -LIBBPF_API LIBBPF_DEPRECATED_SINCE(0, 7, "AF_XDP support deprecated and moved to libxdp") int xsk_umem__create(struct xsk_umem **umem, void *umem_area, __u64 size, struct xsk_ring_prod *fill, struct xsk_ring_cons *comp, const struct xsk_umem_config *config); -LIBBPF_API LIBBPF_DEPRECATED_SINCE(0, 7, "AF_XDP support deprecated and moved to libxdp") -int xsk_umem__create_v0_0_2(struct xsk_umem **umem, - void *umem_area, __u64 size, - struct xsk_ring_prod *fill, - struct xsk_ring_cons *comp, - const struct xsk_umem_config *config); -LIBBPF_API LIBBPF_DEPRECATED_SINCE(0, 7, "AF_XDP support deprecated and moved to libxdp") -int xsk_umem__create_v0_0_4(struct xsk_umem **umem, - void *umem_area, __u64 size, - struct xsk_ring_prod *fill, - struct xsk_ring_cons *comp, - const struct xsk_umem_config *config); -LIBBPF_API LIBBPF_DEPRECATED_SINCE(0, 7, "AF_XDP support deprecated and moved to libxdp") int xsk_socket__create(struct xsk_socket **xsk, const char *ifname, __u32 queue_id, struct xsk_umem *umem, struct xsk_ring_cons *rx, struct xsk_ring_prod *tx, const struct xsk_socket_config *config); -LIBBPF_API LIBBPF_DEPRECATED_SINCE(0, 7, "AF_XDP support deprecated and moved to libxdp") int xsk_socket__create_shared(struct xsk_socket **xsk_ptr, const char *ifname, __u32 queue_id, struct xsk_umem *umem, @@ -324,13 +306,11 @@ int xsk_socket__create_shared(struct xsk_socket **xsk_ptr, const struct xsk_socket_config *config); /* Returns 0 for success and -EBUSY if the umem is still in use. */ -LIBBPF_API LIBBPF_DEPRECATED_SINCE(0, 7, "AF_XDP support deprecated and moved to libxdp") int xsk_umem__delete(struct xsk_umem *umem); -LIBBPF_API LIBBPF_DEPRECATED_SINCE(0, 7, "AF_XDP support deprecated and moved to libxdp") void xsk_socket__delete(struct xsk_socket *xsk); #ifdef __cplusplus } /* extern "C" */ #endif -#endif /* __LIBBPF_XSK_H */ +#endif /* __XSK_H */ diff --git a/tools/testing/selftests/bpf/xsk_prereqs.sh b/tools/testing/selftests/bpf/xsk_prereqs.sh index 684e813803ec..a0b71723a818 100755 --- a/tools/testing/selftests/bpf/xsk_prereqs.sh +++ b/tools/testing/selftests/bpf/xsk_prereqs.sh @@ -8,7 +8,7 @@ ksft_xfail=2 ksft_xpass=3 ksft_skip=4 -XSKOBJ=xdpxceiver +XSKOBJ=xskxceiver validate_root_exec() { @@ -77,7 +77,7 @@ validate_ip_utility() [ ! $(type -P ip) ] && { echo "'ip' not found. Skipping tests."; test_exit $ksft_skip; } } -execxdpxceiver() +exec_xskxceiver() { if [[ $busy_poll -eq 1 ]]; then ARGS+="-b " diff --git a/tools/testing/selftests/bpf/xdpxceiver.c b/tools/testing/selftests/bpf/xskxceiver.c index e5992a6b5e09..74d56d971baf 100644 --- a/tools/testing/selftests/bpf/xdpxceiver.c +++ b/tools/testing/selftests/bpf/xskxceiver.c @@ -97,12 +97,12 @@ #include <time.h> #include <unistd.h> #include <stdatomic.h> -#include <bpf/xsk.h> -#include "xdpxceiver.h" +#include "xsk.h" +#include "xskxceiver.h" #include "../kselftest.h" /* AF_XDP APIs were moved into libxdp and marked as deprecated in libbpf. - * Until xdpxceiver is either moved or re-writed into libxdp, suppress + * Until xskxceiver is either moved or re-writed into libxdp, suppress * deprecation warnings in this file */ #pragma GCC diagnostic ignored "-Wdeprecated-declarations" @@ -1085,6 +1085,7 @@ static void thread_common_ops(struct test_spec *test, struct ifobject *ifobject) { u64 umem_sz = ifobject->umem->num_frames * ifobject->umem->frame_size; int mmap_flags = MAP_PRIVATE | MAP_ANONYMOUS | MAP_NORESERVE; + LIBBPF_OPTS(bpf_xdp_query_opts, opts); int ret, ifindex; void *bufs; u32 i; @@ -1130,10 +1131,26 @@ static void thread_common_ops(struct test_spec *test, struct ifobject *ifobject) if (!ifindex) exit_with_error(errno); - ret = xsk_setup_xdp_prog(ifindex, &ifobject->xsk_map_fd); + ret = xsk_setup_xdp_prog_xsk(ifobject->xsk->xsk, &ifobject->xsk_map_fd); if (ret) exit_with_error(-ret); + ret = bpf_xdp_query(ifindex, ifobject->xdp_flags, &opts); + if (ret) + exit_with_error(-ret); + + if (ifobject->xdp_flags & XDP_FLAGS_SKB_MODE) { + if (opts.attach_mode != XDP_ATTACHED_SKB) { + ksft_print_msg("ERROR: [%s] XDP prog not in SKB mode\n"); + exit_with_error(-EINVAL); + } + } else if (ifobject->xdp_flags & XDP_FLAGS_DRV_MODE) { + if (opts.attach_mode != XDP_ATTACHED_DRV) { + ksft_print_msg("ERROR: [%s] XDP prog not in DRV mode\n"); + exit_with_error(-EINVAL); + } + } + ret = xsk_socket__update_xskmap(ifobject->xsk->xsk, ifobject->xsk_map_fd); if (ret) exit_with_error(-ret); diff --git a/tools/testing/selftests/bpf/xdpxceiver.h b/tools/testing/selftests/bpf/xskxceiver.h index 8f672b0fe0e1..3d17053f98e5 100644 --- a/tools/testing/selftests/bpf/xdpxceiver.h +++ b/tools/testing/selftests/bpf/xskxceiver.h @@ -2,8 +2,8 @@ * Copyright(c) 2020 Intel Corporation. */ -#ifndef XDPXCEIVER_H_ -#define XDPXCEIVER_H_ +#ifndef XSKXCEIVER_H_ +#define XSKXCEIVER_H_ #ifndef SOL_XDP #define SOL_XDP 283 @@ -169,4 +169,4 @@ pthread_cond_t pacing_cond = PTHREAD_COND_INITIALIZER; int pkts_in_flight; -#endif /* XDPXCEIVER_H */ +#endif /* XSKXCEIVER_H_ */ diff --git a/tools/testing/selftests/drivers/net/dsa/Makefile b/tools/testing/selftests/drivers/net/dsa/Makefile new file mode 100644 index 000000000000..2a731d5c6d85 --- /dev/null +++ b/tools/testing/selftests/drivers/net/dsa/Makefile @@ -0,0 +1,17 @@ +# SPDX-License-Identifier: GPL-2.0+ OR MIT + +TEST_PROGS = bridge_locked_port.sh \ + bridge_mdb.sh \ + bridge_mld.sh \ + bridge_vlan_aware.sh \ + bridge_vlan_mcast.sh \ + bridge_vlan_unaware.sh \ + local_termination.sh \ + no_forwarding.sh \ + test_bridge_fdb_stress.sh + +TEST_PROGS_EXTENDED := lib.sh + +TEST_FILES := forwarding.config + +include ../../../lib.mk diff --git a/tools/testing/selftests/drivers/net/mlxsw/devlink_linecard.sh b/tools/testing/selftests/drivers/net/mlxsw/devlink_linecard.sh index 08a922d8b86a..224ca3695c89 100755 --- a/tools/testing/selftests/drivers/net/mlxsw/devlink_linecard.sh +++ b/tools/testing/selftests/drivers/net/mlxsw/devlink_linecard.sh @@ -84,6 +84,13 @@ lc_wait_until_port_count_is() busywait "$timeout" until_lc_port_count_is "$port_count" lc_port_count_get "$lc" } +lc_nested_devlink_dev_get() +{ + local lc=$1 + + devlink lc show $DEVLINK_DEV lc $lc -j | jq -e -r ".[][][].nested_devlink" +} + PROV_UNPROV_TIMEOUT=8000 # ms POST_PROV_ACT_TIMEOUT=2000 # ms PROV_PORTS_INSTANTIATION_TIMEOUT=15000 # ms @@ -191,12 +198,30 @@ ports_check() check_err $? "Unexpected port count linecard $lc (got $port_count, expected $expected_port_count)" } +lc_dev_info_provisioned_check() +{ + local lc=$1 + local nested_devlink_dev=$2 + local fixed_hw_revision + local running_ini_version + + fixed_hw_revision=$(devlink dev info $nested_devlink_dev -j | \ + jq -e -r '.[][].versions.fixed."hw.revision"') + check_err $? "Failed to get linecard $lc fixed.hw.revision" + log_info "Linecard $lc fixed.hw.revision: \"$fixed_hw_revision\"" + running_ini_version=$(devlink dev info $nested_devlink_dev -j | \ + jq -e -r '.[][].versions.running."ini.version"') + check_err $? "Failed to get linecard $lc running.ini.version" + log_info "Linecard $lc running.ini.version: \"$running_ini_version\"" +} + provision_test() { RET=0 local lc local type local state + local nested_devlink_dev lc=$LC_SLOT supported_types_check $lc @@ -207,6 +232,11 @@ provision_test() fi provision_one $lc $LC_16X100G_TYPE ports_check $lc $LC_16X100G_PORT_COUNT + + nested_devlink_dev=$(lc_nested_devlink_dev_get $lc) + check_err $? "Failed to get nested devlink handle of linecard $lc" + lc_dev_info_provisioned_check $lc $nested_devlink_dev + log_test "Provision" } @@ -220,12 +250,32 @@ interface_check() setup_wait } +lc_dev_info_active_check() +{ + local lc=$1 + local nested_devlink_dev=$2 + local fixed_device_fw_psid + local running_device_fw + + fixed_device_fw_psid=$(devlink dev info $nested_devlink_dev -j | \ + jq -e -r ".[][].versions.fixed" | \ + jq -e -r '."fw.psid"') + check_err $? "Failed to get linecard $lc fixed fw PSID" + log_info "Linecard $lc fixed.fw.psid: \"$fixed_device_fw_psid\"" + + running_device_fw=$(devlink dev info $nested_devlink_dev -j | \ + jq -e -r ".[][].versions.running.fw") + check_err $? "Failed to get linecard $lc running.fw.version" + log_info "Linecard $lc running.fw: \"$running_device_fw\"" +} + activation_16x100G_test() { RET=0 local lc local type local state + local nested_devlink_dev lc=$LC_SLOT type=$LC_16X100G_TYPE @@ -238,6 +288,10 @@ activation_16x100G_test() interface_check + nested_devlink_dev=$(lc_nested_devlink_dev_get $lc) + check_err $? "Failed to get nested devlink handle of linecard $lc" + lc_dev_info_active_check $lc $nested_devlink_dev + log_test "Activation 16x100G" } diff --git a/tools/testing/selftests/drivers/net/mlxsw/rif_counter_scale.sh b/tools/testing/selftests/drivers/net/mlxsw/rif_counter_scale.sh new file mode 100644 index 000000000000..a43a9926e690 --- /dev/null +++ b/tools/testing/selftests/drivers/net/mlxsw/rif_counter_scale.sh @@ -0,0 +1,107 @@ +# SPDX-License-Identifier: GPL-2.0 + +RIF_COUNTER_NUM_NETIFS=2 + +rif_counter_addr4() +{ + local i=$1; shift + local p=$1; shift + + printf 192.0.%d.%d $((i / 64)) $(((4 * i % 256) + p)) +} + +rif_counter_addr4pfx() +{ + rif_counter_addr4 $@ + printf /30 +} + +rif_counter_h1_create() +{ + simple_if_init $h1 +} + +rif_counter_h1_destroy() +{ + simple_if_fini $h1 +} + +rif_counter_h2_create() +{ + simple_if_init $h2 +} + +rif_counter_h2_destroy() +{ + simple_if_fini $h2 +} + +rif_counter_setup_prepare() +{ + h1=${NETIFS[p1]} + h2=${NETIFS[p2]} + + vrf_prepare + + rif_counter_h1_create + rif_counter_h2_create +} + +rif_counter_cleanup() +{ + local count=$1; shift + + pre_cleanup + + for ((i = 1; i <= count; i++)); do + vlan_destroy $h2 $i + done + + rif_counter_h2_destroy + rif_counter_h1_destroy + + vrf_cleanup + + if [[ -v RIF_COUNTER_BATCH_FILE ]]; then + rm -f $RIF_COUNTER_BATCH_FILE + fi +} + + +rif_counter_test() +{ + local count=$1; shift + local should_fail=$1; shift + + RIF_COUNTER_BATCH_FILE="$(mktemp)" + + for ((i = 1; i <= count; i++)); do + vlan_create $h2 $i v$h2 $(rif_counter_addr4pfx $i 2) + done + for ((i = 1; i <= count; i++)); do + cat >> $RIF_COUNTER_BATCH_FILE <<-EOF + stats set dev $h2.$i l3_stats on + EOF + done + + ip -b $RIF_COUNTER_BATCH_FILE + check_err_fail $should_fail $? "RIF counter enablement" +} + +rif_counter_traffic_test() +{ + local count=$1; shift + local i; + + for ((i = count; i > 0; i /= 2)); do + $MZ $h1 -Q $i -c 1 -d 20msec -p 100 -a own -b $(mac_get $h2) \ + -A $(rif_counter_addr4 $i 1) \ + -B $(rif_counter_addr4 $i 2) \ + -q -t udp sp=54321,dp=12345 + done + for ((i = count; i > 0; i /= 2)); do + busywait "$TC_HIT_TIMEOUT" until_counter_is "== 1" \ + hw_stats_get l3_stats $h2.$i rx packets > /dev/null + check_err $? "Traffic not seen at RIF $h2.$i" + done +} diff --git a/tools/testing/selftests/drivers/net/mlxsw/spectrum-2/resource_scale.sh b/tools/testing/selftests/drivers/net/mlxsw/spectrum-2/resource_scale.sh index e9f65bd2e299..688338bbeb97 100755 --- a/tools/testing/selftests/drivers/net/mlxsw/spectrum-2/resource_scale.sh +++ b/tools/testing/selftests/drivers/net/mlxsw/spectrum-2/resource_scale.sh @@ -25,7 +25,16 @@ cleanup() trap cleanup EXIT -ALL_TESTS="router tc_flower mirror_gre tc_police port rif_mac_profile" +ALL_TESTS=" + router + tc_flower + mirror_gre + tc_police + port + rif_mac_profile + rif_counter +" + for current_test in ${TESTS:-$ALL_TESTS}; do RET_FIN=0 source ${current_test}_scale.sh @@ -36,16 +45,32 @@ for current_test in ${TESTS:-$ALL_TESTS}; do for should_fail in 0 1; do RET=0 target=$(${current_test}_get_target "$should_fail") + if ((target == 0)); then + log_test_skip "'$current_test' should_fail=$should_fail test" + continue + fi + ${current_test}_setup_prepare setup_wait $num_netifs + # Update target in case occupancy of a certain resource changed + # following the test setup. + target=$(${current_test}_get_target "$should_fail") ${current_test}_test "$target" "$should_fail" - ${current_test}_cleanup - devlink_reload if [[ "$should_fail" -eq 0 ]]; then log_test "'$current_test' $target" + + if ((!RET)); then + tt=${current_test}_traffic_test + if [[ $(type -t $tt) == "function" ]]; then + $tt "$target" + log_test "'$current_test' $target traffic test" + fi + fi else log_test "'$current_test' overflow $target" fi + ${current_test}_cleanup $target + devlink_reload RET_FIN=$(( RET_FIN || RET )) done done diff --git a/tools/testing/selftests/drivers/net/mlxsw/spectrum-2/rif_counter_scale.sh b/tools/testing/selftests/drivers/net/mlxsw/spectrum-2/rif_counter_scale.sh new file mode 120000 index 000000000000..1f5752e8ffc0 --- /dev/null +++ b/tools/testing/selftests/drivers/net/mlxsw/spectrum-2/rif_counter_scale.sh @@ -0,0 +1 @@ +../spectrum/rif_counter_scale.sh
\ No newline at end of file diff --git a/tools/testing/selftests/drivers/net/mlxsw/spectrum-2/tc_flower_scale.sh b/tools/testing/selftests/drivers/net/mlxsw/spectrum-2/tc_flower_scale.sh index efd798a85931..4444bbace1a9 100644 --- a/tools/testing/selftests/drivers/net/mlxsw/spectrum-2/tc_flower_scale.sh +++ b/tools/testing/selftests/drivers/net/mlxsw/spectrum-2/tc_flower_scale.sh @@ -4,17 +4,22 @@ source ../tc_flower_scale.sh tc_flower_get_target() { local should_fail=$1; shift + local max_cnts # The driver associates a counter with each tc filter, which means the # number of supported filters is bounded by the number of available # counters. - # Currently, the driver supports 30K (30,720) flow counters and six of - # these are used for multicast routing. - local target=30714 + max_cnts=$(devlink_resource_size_get counters flow) + + # Remove already allocated counters. + ((max_cnts -= $(devlink_resource_occ_get counters flow))) + + # Each rule uses two counters, for packets and bytes. + ((max_cnts /= 2)) if ((! should_fail)); then - echo $target + echo $max_cnts else - echo $((target + 1)) + echo $((max_cnts + 1)) fi } diff --git a/tools/testing/selftests/drivers/net/mlxsw/spectrum/resource_scale.sh b/tools/testing/selftests/drivers/net/mlxsw/spectrum/resource_scale.sh index dea33dc93790..95d9f710a630 100755 --- a/tools/testing/selftests/drivers/net/mlxsw/spectrum/resource_scale.sh +++ b/tools/testing/selftests/drivers/net/mlxsw/spectrum/resource_scale.sh @@ -22,7 +22,16 @@ cleanup() devlink_sp_read_kvd_defaults trap cleanup EXIT -ALL_TESTS="router tc_flower mirror_gre tc_police port rif_mac_profile" +ALL_TESTS=" + router + tc_flower + mirror_gre + tc_police + port + rif_mac_profile + rif_counter +" + for current_test in ${TESTS:-$ALL_TESTS}; do RET_FIN=0 source ${current_test}_scale.sh @@ -41,15 +50,31 @@ for current_test in ${TESTS:-$ALL_TESTS}; do for should_fail in 0 1; do RET=0 target=$(${current_test}_get_target "$should_fail") + if ((target == 0)); then + log_test_skip "'$current_test' [$profile] should_fail=$should_fail test" + continue + fi ${current_test}_setup_prepare setup_wait $num_netifs + # Update target in case occupancy of a certain resource + # changed following the test setup. + target=$(${current_test}_get_target "$should_fail") ${current_test}_test "$target" "$should_fail" - ${current_test}_cleanup if [[ "$should_fail" -eq 0 ]]; then log_test "'$current_test' [$profile] $target" + + if ((!RET)); then + tt=${current_test}_traffic_test + if [[ $(type -t $tt) == "function" ]] + then + $tt "$target" + log_test "'$current_test' [$profile] $target traffic test" + fi + fi else log_test "'$current_test' [$profile] overflow $target" fi + ${current_test}_cleanup $target RET_FIN=$(( RET_FIN || RET )) done done diff --git a/tools/testing/selftests/drivers/net/mlxsw/spectrum/rif_counter_scale.sh b/tools/testing/selftests/drivers/net/mlxsw/spectrum/rif_counter_scale.sh new file mode 100644 index 000000000000..d44536276e8a --- /dev/null +++ b/tools/testing/selftests/drivers/net/mlxsw/spectrum/rif_counter_scale.sh @@ -0,0 +1,34 @@ +# SPDX-License-Identifier: GPL-2.0 +source ../rif_counter_scale.sh + +rif_counter_get_target() +{ + local should_fail=$1; shift + local max_cnts + local max_rifs + local target + + max_rifs=$(devlink_resource_size_get rifs) + max_cnts=$(devlink_resource_size_get counters rif) + + # Remove already allocated RIFs. + ((max_rifs -= $(devlink_resource_occ_get rifs))) + + # 10 KVD slots per counter, ingress+egress counters per RIF + ((max_cnts /= 20)) + + # Pointless to run the overflow test if we don't have enough RIFs to + # host all the counters. + if ((max_cnts > max_rifs && should_fail)); then + echo 0 + return + fi + + target=$((max_rifs < max_cnts ? max_rifs : max_cnts)) + + if ((! should_fail)); then + echo $target + else + echo $((target + 1)) + fi +} diff --git a/tools/testing/selftests/drivers/net/mlxsw/tc_flower_scale.sh b/tools/testing/selftests/drivers/net/mlxsw/tc_flower_scale.sh index aa74be9f47c8..d3d9e60d6ddf 100644 --- a/tools/testing/selftests/drivers/net/mlxsw/tc_flower_scale.sh +++ b/tools/testing/selftests/drivers/net/mlxsw/tc_flower_scale.sh @@ -77,6 +77,7 @@ tc_flower_rules_create() filter add dev $h2 ingress \ prot ipv6 \ pref 1000 \ + handle 42$i \ flower $tcflags dst_ip $(tc_flower_addr $i) \ action drop EOF @@ -121,3 +122,19 @@ tc_flower_test() tcflags="skip_sw" __tc_flower_test $count $should_fail } + +tc_flower_traffic_test() +{ + local count=$1; shift + local i; + + for ((i = count - 1; i > 0; i /= 2)); do + $MZ -6 $h1 -c 1 -d 20msec -p 100 -a own -b $(mac_get $h2) \ + -A $(tc_flower_addr 0) -B $(tc_flower_addr $i) \ + -q -t udp sp=54321,dp=12345 + done + for ((i = count - 1; i > 0; i /= 2)); do + tc_check_packets "dev $h2 ingress" 42$i 1 + check_err $? "Traffic not seen at rule #$i" + done +} diff --git a/tools/testing/selftests/drivers/net/netdevsim/fib.sh b/tools/testing/selftests/drivers/net/netdevsim/fib.sh index fc794cd30389..6800de816e8b 100755 --- a/tools/testing/selftests/drivers/net/netdevsim/fib.sh +++ b/tools/testing/selftests/drivers/net/netdevsim/fib.sh @@ -16,6 +16,7 @@ ALL_TESTS=" ipv4_replay ipv4_flush ipv4_error_path + ipv4_delete_fail ipv6_add ipv6_metric ipv6_append_single @@ -29,11 +30,13 @@ ALL_TESTS=" ipv6_replay_single ipv6_replay_multipath ipv6_error_path + ipv6_delete_fail " NETDEVSIM_PATH=/sys/bus/netdevsim/ DEV_ADDR=1337 DEV=netdevsim${DEV_ADDR} SYSFS_NET_DIR=/sys/bus/netdevsim/devices/$DEV/net/ +DEBUGFS_DIR=/sys/kernel/debug/netdevsim/$DEV/ NUM_NETIFS=0 source $lib_dir/lib.sh source $lib_dir/fib_offload_lib.sh @@ -157,6 +160,27 @@ ipv4_error_path() ipv4_error_path_replay } +ipv4_delete_fail() +{ + RET=0 + + echo "y" > $DEBUGFS_DIR/fib/fail_route_delete + + ip -n testns1 link add name dummy1 type dummy + ip -n testns1 link set dev dummy1 up + + ip -n testns1 route add 192.0.2.0/24 dev dummy1 + ip -n testns1 route del 192.0.2.0/24 dev dummy1 &> /dev/null + + # We should not be able to delete the netdev if we are leaking a + # reference. + ip -n testns1 link del dev dummy1 + + log_test "IPv4 route delete failure" + + echo "n" > $DEBUGFS_DIR/fib/fail_route_delete +} + ipv6_add() { fib_ipv6_add_test "testns1" @@ -304,6 +328,27 @@ ipv6_error_path() ipv6_error_path_replay } +ipv6_delete_fail() +{ + RET=0 + + echo "y" > $DEBUGFS_DIR/fib/fail_route_delete + + ip -n testns1 link add name dummy1 type dummy + ip -n testns1 link set dev dummy1 up + + ip -n testns1 route add 2001:db8:1::/64 dev dummy1 + ip -n testns1 route del 2001:db8:1::/64 dev dummy1 &> /dev/null + + # We should not be able to delete the netdev if we are leaking a + # reference. + ip -n testns1 link del dev dummy1 + + log_test "IPv6 route delete failure" + + echo "n" > $DEBUGFS_DIR/fib/fail_route_delete +} + fib_notify_on_flag_change_set() { local notify=$1; shift diff --git a/tools/testing/selftests/net/.gitignore b/tools/testing/selftests/net/.gitignore index ffc35a22e914..892306bdb47d 100644 --- a/tools/testing/selftests/net/.gitignore +++ b/tools/testing/selftests/net/.gitignore @@ -38,3 +38,4 @@ ioam6_parser toeplitz tun cmsg_sender +unix_connect
\ No newline at end of file diff --git a/tools/testing/selftests/net/Makefile b/tools/testing/selftests/net/Makefile index d0460a969060..e2dfef8b78a7 100644 --- a/tools/testing/selftests/net/Makefile +++ b/tools/testing/selftests/net/Makefile @@ -35,9 +35,12 @@ TEST_PROGS += cmsg_time.sh cmsg_ipv6.sh TEST_PROGS += srv6_end_dt46_l3vpn_test.sh TEST_PROGS += srv6_end_dt4_l3vpn_test.sh TEST_PROGS += srv6_end_dt6_l3vpn_test.sh +TEST_PROGS += srv6_hencap_red_l3vpn_test.sh +TEST_PROGS += srv6_hl2encap_red_l2vpn_test.sh TEST_PROGS += vrf_strict_mode_test.sh TEST_PROGS += arp_ndisc_evict_nocarrier.sh TEST_PROGS += ndisc_unsolicited_na_test.sh +TEST_PROGS += arp_ndisc_untracked_subnets.sh TEST_PROGS += stress_reuseport_listen.sh TEST_PROGS_EXTENDED := in_netns.sh setup_loopback.sh setup_veth.sh TEST_PROGS_EXTENDED += toeplitz_client.sh toeplitz.sh diff --git a/tools/testing/selftests/net/af_unix/Makefile b/tools/testing/selftests/net/af_unix/Makefile index df341648f818..969620ae9928 100644 --- a/tools/testing/selftests/net/af_unix/Makefile +++ b/tools/testing/selftests/net/af_unix/Makefile @@ -1,2 +1,3 @@ -TEST_GEN_PROGS := test_unix_oob +TEST_GEN_PROGS := test_unix_oob unix_connect + include ../../lib.mk diff --git a/tools/testing/selftests/net/af_unix/unix_connect.c b/tools/testing/selftests/net/af_unix/unix_connect.c new file mode 100644 index 000000000000..d799fd8f5c7c --- /dev/null +++ b/tools/testing/selftests/net/af_unix/unix_connect.c @@ -0,0 +1,148 @@ +// SPDX-License-Identifier: GPL-2.0 + +#define _GNU_SOURCE +#include <sched.h> + +#include <stddef.h> +#include <stdio.h> +#include <unistd.h> + +#include <sys/socket.h> +#include <sys/un.h> + +#include "../../kselftest_harness.h" + +FIXTURE(unix_connect) +{ + int server, client; + int family; +}; + +FIXTURE_VARIANT(unix_connect) +{ + int type; + char sun_path[8]; + int len; + int flags; + int err; +}; + +FIXTURE_VARIANT_ADD(unix_connect, stream_pathname) +{ + .type = SOCK_STREAM, + .sun_path = "test", + .len = 4 + 1, + .flags = 0, + .err = 0, +}; + +FIXTURE_VARIANT_ADD(unix_connect, stream_abstract) +{ + .type = SOCK_STREAM, + .sun_path = "\0test", + .len = 5, + .flags = 0, + .err = 0, +}; + +FIXTURE_VARIANT_ADD(unix_connect, stream_pathname_netns) +{ + .type = SOCK_STREAM, + .sun_path = "test", + .len = 4 + 1, + .flags = CLONE_NEWNET, + .err = 0, +}; + +FIXTURE_VARIANT_ADD(unix_connect, stream_abstract_netns) +{ + .type = SOCK_STREAM, + .sun_path = "\0test", + .len = 5, + .flags = CLONE_NEWNET, + .err = ECONNREFUSED, +}; + +FIXTURE_VARIANT_ADD(unix_connect, dgram_pathname) +{ + .type = SOCK_DGRAM, + .sun_path = "test", + .len = 4 + 1, + .flags = 0, + .err = 0, +}; + +FIXTURE_VARIANT_ADD(unix_connect, dgram_abstract) +{ + .type = SOCK_DGRAM, + .sun_path = "\0test", + .len = 5, + .flags = 0, + .err = 0, +}; + +FIXTURE_VARIANT_ADD(unix_connect, dgram_pathname_netns) +{ + .type = SOCK_DGRAM, + .sun_path = "test", + .len = 4 + 1, + .flags = CLONE_NEWNET, + .err = 0, +}; + +FIXTURE_VARIANT_ADD(unix_connect, dgram_abstract_netns) +{ + .type = SOCK_DGRAM, + .sun_path = "\0test", + .len = 5, + .flags = CLONE_NEWNET, + .err = ECONNREFUSED, +}; + +FIXTURE_SETUP(unix_connect) +{ + self->family = AF_UNIX; +} + +FIXTURE_TEARDOWN(unix_connect) +{ + close(self->server); + close(self->client); + + if (variant->sun_path[0]) + remove("test"); +} + +TEST_F(unix_connect, test) +{ + socklen_t addrlen; + struct sockaddr_un addr = { + .sun_family = self->family, + }; + int err; + + self->server = socket(self->family, variant->type, 0); + ASSERT_NE(-1, self->server); + + addrlen = offsetof(struct sockaddr_un, sun_path) + variant->len; + memcpy(&addr.sun_path, variant->sun_path, variant->len); + + err = bind(self->server, (struct sockaddr *)&addr, addrlen); + ASSERT_EQ(0, err); + + if (variant->type == SOCK_STREAM) { + err = listen(self->server, 32); + ASSERT_EQ(0, err); + } + + err = unshare(variant->flags); + ASSERT_EQ(0, err); + + self->client = socket(self->family, variant->type, 0); + ASSERT_LT(0, self->client); + + err = connect(self->client, (struct sockaddr *)&addr, addrlen); + ASSERT_EQ(variant->err, err == -1 ? errno : 0); +} + +TEST_HARNESS_MAIN diff --git a/tools/testing/selftests/net/arp_ndisc_untracked_subnets.sh b/tools/testing/selftests/net/arp_ndisc_untracked_subnets.sh new file mode 100755 index 000000000000..c899b446acb6 --- /dev/null +++ b/tools/testing/selftests/net/arp_ndisc_untracked_subnets.sh @@ -0,0 +1,308 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 +# +# 2 namespaces: one host and one router. Use arping from the host to send a +# garp to the router. Router accepts or ignores based on its arp_accept +# or accept_untracked_na configuration. + +TESTS="arp ndisc" + +ROUTER_NS="ns-router" +ROUTER_NS_V6="ns-router-v6" +ROUTER_INTF="veth-router" +ROUTER_ADDR="10.0.10.1" +ROUTER_ADDR_V6="2001:db8:abcd:0012::1" + +HOST_NS="ns-host" +HOST_NS_V6="ns-host-v6" +HOST_INTF="veth-host" +HOST_ADDR="10.0.10.2" +HOST_ADDR_V6="2001:db8:abcd:0012::2" + +SUBNET_WIDTH=24 +PREFIX_WIDTH_V6=64 + +cleanup() { + ip netns del ${HOST_NS} + ip netns del ${ROUTER_NS} +} + +cleanup_v6() { + ip netns del ${HOST_NS_V6} + ip netns del ${ROUTER_NS_V6} +} + +setup() { + set -e + local arp_accept=$1 + + # Set up two namespaces + ip netns add ${ROUTER_NS} + ip netns add ${HOST_NS} + + # Set up interfaces veth0 and veth1, which are pairs in separate + # namespaces. veth0 is veth-router, veth1 is veth-host. + # first, set up the inteface's link to the namespace + # then, set the interface "up" + ip netns exec ${ROUTER_NS} ip link add name ${ROUTER_INTF} \ + type veth peer name ${HOST_INTF} + + ip netns exec ${ROUTER_NS} ip link set dev ${ROUTER_INTF} up + ip netns exec ${ROUTER_NS} ip link set dev ${HOST_INTF} netns ${HOST_NS} + + ip netns exec ${HOST_NS} ip link set dev ${HOST_INTF} up + ip netns exec ${ROUTER_NS} ip addr add ${ROUTER_ADDR}/${SUBNET_WIDTH} \ + dev ${ROUTER_INTF} + + ip netns exec ${HOST_NS} ip addr add ${HOST_ADDR}/${SUBNET_WIDTH} \ + dev ${HOST_INTF} + ip netns exec ${HOST_NS} ip route add default via ${HOST_ADDR} \ + dev ${HOST_INTF} + ip netns exec ${ROUTER_NS} ip route add default via ${ROUTER_ADDR} \ + dev ${ROUTER_INTF} + + ROUTER_CONF=net.ipv4.conf.${ROUTER_INTF} + ip netns exec ${ROUTER_NS} sysctl -w \ + ${ROUTER_CONF}.arp_accept=${arp_accept} >/dev/null 2>&1 + set +e +} + +setup_v6() { + set -e + local accept_untracked_na=$1 + + # Set up two namespaces + ip netns add ${ROUTER_NS_V6} + ip netns add ${HOST_NS_V6} + + # Set up interfaces veth0 and veth1, which are pairs in separate + # namespaces. veth0 is veth-router, veth1 is veth-host. + # first, set up the inteface's link to the namespace + # then, set the interface "up" + ip -6 -netns ${ROUTER_NS_V6} link add name ${ROUTER_INTF} \ + type veth peer name ${HOST_INTF} + + ip -6 -netns ${ROUTER_NS_V6} link set dev ${ROUTER_INTF} up + ip -6 -netns ${ROUTER_NS_V6} link set dev ${HOST_INTF} netns \ + ${HOST_NS_V6} + + ip -6 -netns ${HOST_NS_V6} link set dev ${HOST_INTF} up + ip -6 -netns ${ROUTER_NS_V6} addr add \ + ${ROUTER_ADDR_V6}/${PREFIX_WIDTH_V6} dev ${ROUTER_INTF} nodad + + HOST_CONF=net.ipv6.conf.${HOST_INTF} + ip netns exec ${HOST_NS_V6} sysctl -qw ${HOST_CONF}.ndisc_notify=1 + ip netns exec ${HOST_NS_V6} sysctl -qw ${HOST_CONF}.disable_ipv6=0 + ip -6 -netns ${HOST_NS_V6} addr add ${HOST_ADDR_V6}/${PREFIX_WIDTH_V6} \ + dev ${HOST_INTF} + + ROUTER_CONF=net.ipv6.conf.${ROUTER_INTF} + + ip netns exec ${ROUTER_NS_V6} sysctl -w \ + ${ROUTER_CONF}.forwarding=1 >/dev/null 2>&1 + ip netns exec ${ROUTER_NS_V6} sysctl -w \ + ${ROUTER_CONF}.drop_unsolicited_na=0 >/dev/null 2>&1 + ip netns exec ${ROUTER_NS_V6} sysctl -w \ + ${ROUTER_CONF}.accept_untracked_na=${accept_untracked_na} \ + >/dev/null 2>&1 + set +e +} + +verify_arp() { + local arp_accept=$1 + local same_subnet=$2 + + neigh_show_output=$(ip netns exec ${ROUTER_NS} ip neigh get \ + ${HOST_ADDR} dev ${ROUTER_INTF} 2>/dev/null) + + if [ ${arp_accept} -eq 1 ]; then + # Neighbor entries expected + [[ ${neigh_show_output} ]] + elif [ ${arp_accept} -eq 2 ]; then + if [ ${same_subnet} -eq 1 ]; then + # Neighbor entries expected + [[ ${neigh_show_output} ]] + else + [[ -z "${neigh_show_output}" ]] + fi + else + [[ -z "${neigh_show_output}" ]] + fi + } + +arp_test_gratuitous() { + set -e + local arp_accept=$1 + local same_subnet=$2 + + if [ ${arp_accept} -eq 2 ]; then + test_msg=("test_arp: " + "accept_arp=$1 " + "same_subnet=$2") + if [ ${same_subnet} -eq 0 ]; then + HOST_ADDR=10.0.11.3 + else + HOST_ADDR=10.0.10.3 + fi + else + test_msg=("test_arp: " + "accept_arp=$1") + fi + # Supply arp_accept option to set up which sets it in sysctl + setup ${arp_accept} + ip netns exec ${HOST_NS} arping -A -U ${HOST_ADDR} -c1 2>&1 >/dev/null + + if verify_arp $1 $2; then + printf " TEST: %-60s [ OK ]\n" "${test_msg[*]}" + else + printf " TEST: %-60s [FAIL]\n" "${test_msg[*]}" + fi + cleanup + set +e +} + +arp_test_gratuitous_combinations() { + arp_test_gratuitous 0 + arp_test_gratuitous 1 + arp_test_gratuitous 2 0 # Second entry indicates subnet or not + arp_test_gratuitous 2 1 +} + +cleanup_tcpdump() { + set -e + [[ ! -z ${tcpdump_stdout} ]] && rm -f ${tcpdump_stdout} + [[ ! -z ${tcpdump_stderr} ]] && rm -f ${tcpdump_stderr} + tcpdump_stdout= + tcpdump_stderr= + set +e +} + +start_tcpdump() { + set -e + tcpdump_stdout=`mktemp` + tcpdump_stderr=`mktemp` + ip netns exec ${ROUTER_NS_V6} timeout 15s \ + tcpdump --immediate-mode -tpni ${ROUTER_INTF} -c 1 \ + "icmp6 && icmp6[0] == 136 && src ${HOST_ADDR_V6}" \ + > ${tcpdump_stdout} 2> /dev/null + set +e +} + +verify_ndisc() { + local accept_untracked_na=$1 + local same_subnet=$2 + + neigh_show_output=$(ip -6 -netns ${ROUTER_NS_V6} neigh show \ + to ${HOST_ADDR_V6} dev ${ROUTER_INTF} nud stale) + + if [ ${accept_untracked_na} -eq 1 ]; then + # Neighbour entry expected to be present + [[ ${neigh_show_output} ]] + elif [ ${accept_untracked_na} -eq 2 ]; then + if [ ${same_subnet} -eq 1 ]; then + [[ ${neigh_show_output} ]] + else + [[ -z "${neigh_show_output}" ]] + fi + else + # Neighbour entry expected to be absent for all other cases + [[ -z "${neigh_show_output}" ]] + fi +} + +ndisc_test_untracked_advertisements() { + set -e + test_msg=("test_ndisc: " + "accept_untracked_na=$1") + + local accept_untracked_na=$1 + local same_subnet=$2 + if [ ${accept_untracked_na} -eq 2 ]; then + test_msg=("test_ndisc: " + "accept_untracked_na=$1 " + "same_subnet=$2") + if [ ${same_subnet} -eq 0 ]; then + # Not same subnet + HOST_ADDR_V6=2000:db8:abcd:0013::4 + else + HOST_ADDR_V6=2001:db8:abcd:0012::3 + fi + fi + setup_v6 $1 $2 + start_tcpdump + + if verify_ndisc $1 $2; then + printf " TEST: %-60s [ OK ]\n" "${test_msg[*]}" + else + printf " TEST: %-60s [FAIL]\n" "${test_msg[*]}" + fi + + cleanup_tcpdump + cleanup_v6 + set +e +} + +ndisc_test_untracked_combinations() { + ndisc_test_untracked_advertisements 0 + ndisc_test_untracked_advertisements 1 + ndisc_test_untracked_advertisements 2 0 + ndisc_test_untracked_advertisements 2 1 +} + +################################################################################ +# usage + +usage() +{ + cat <<EOF +usage: ${0##*/} OPTS + + -t <test> Test(s) to run (default: all) + (options: $TESTS) +EOF +} + +################################################################################ +# main + +while getopts ":t:h" opt; do + case $opt in + t) TESTS=$OPTARG;; + h) usage; exit 0;; + *) usage; exit 1;; + esac +done + +if [ "$(id -u)" -ne 0 ];then + echo "SKIP: Need root privileges" + exit $ksft_skip; +fi + +if [ ! -x "$(command -v ip)" ]; then + echo "SKIP: Could not run test without ip tool" + exit $ksft_skip +fi + +if [ ! -x "$(command -v tcpdump)" ]; then + echo "SKIP: Could not run test without tcpdump tool" + exit $ksft_skip +fi + +if [ ! -x "$(command -v arping)" ]; then + echo "SKIP: Could not run test without arping tool" + exit $ksft_skip +fi + +# start clean +cleanup &> /dev/null +cleanup_v6 &> /dev/null + +for t in $TESTS +do + case $t in + arp_test_gratuitous_combinations|arp) arp_test_gratuitous_combinations;; + ndisc_test_untracked_combinations|ndisc) \ + ndisc_test_untracked_combinations;; + help) echo "Test names: $TESTS"; exit 0;; +esac +done diff --git a/tools/testing/selftests/net/cmsg_sender.c b/tools/testing/selftests/net/cmsg_sender.c index bc2162909a1a..75dd83e39207 100644 --- a/tools/testing/selftests/net/cmsg_sender.c +++ b/tools/testing/selftests/net/cmsg_sender.c @@ -456,7 +456,7 @@ int main(int argc, char *argv[]) buf[1] = 0; } else if (opt.sock.type == SOCK_RAW) { struct udphdr hdr = { 1, 2, htons(opt.size), 0 }; - struct sockaddr_in6 *sin6 = (void *)ai->ai_addr;; + struct sockaddr_in6 *sin6 = (void *)ai->ai_addr; memcpy(buf, &hdr, sizeof(hdr)); sin6->sin6_port = htons(opt.sock.proto); diff --git a/tools/testing/selftests/net/fib_rule_tests.sh b/tools/testing/selftests/net/fib_rule_tests.sh index bbe3b379927a..c245476fa29d 100755 --- a/tools/testing/selftests/net/fib_rule_tests.sh +++ b/tools/testing/selftests/net/fib_rule_tests.sh @@ -303,6 +303,29 @@ run_fibrule_tests() log_section "IPv6 fib rule" fib_rule6_test } +################################################################################ +# usage + +usage() +{ + cat <<EOF +usage: ${0##*/} OPTS + + -t <test> Test(s) to run (default: all) + (options: $TESTS) +EOF +} + +################################################################################ +# main + +while getopts ":t:h" opt; do + case $opt in + t) TESTS=$OPTARG;; + h) usage; exit 0;; + *) usage; exit 1;; + esac +done if [ "$(id -u)" -ne 0 ];then echo "SKIP: Need root privileges" diff --git a/tools/testing/selftests/net/forwarding/Makefile b/tools/testing/selftests/net/forwarding/Makefile index 57b84e0c879e..a9c5c1be5088 100644 --- a/tools/testing/selftests/net/forwarding/Makefile +++ b/tools/testing/selftests/net/forwarding/Makefile @@ -3,6 +3,7 @@ TEST_PROGS = bridge_igmp.sh \ bridge_locked_port.sh \ bridge_mdb.sh \ + bridge_mdb_port_down.sh \ bridge_mld.sh \ bridge_port_isolation.sh \ bridge_sticky_fdb.sh \ diff --git a/tools/testing/selftests/net/forwarding/bridge_mdb_port_down.sh b/tools/testing/selftests/net/forwarding/bridge_mdb_port_down.sh new file mode 100755 index 000000000000..1a0480e71d83 --- /dev/null +++ b/tools/testing/selftests/net/forwarding/bridge_mdb_port_down.sh @@ -0,0 +1,118 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 +# +# Verify that permanent mdb entries can be added to and deleted from bridge +# interfaces that are down, and works correctly when done so. + +ALL_TESTS="add_del_to_port_down" +NUM_NETIFS=4 + +TEST_GROUP="239.10.10.10" +TEST_GROUP_MAC="01:00:5e:0a:0a:0a" + +source lib.sh + + +add_del_to_port_down() { + RET=0 + + ip link set dev $swp2 down + bridge mdb add dev br0 port "$swp2" grp $TEST_GROUP permanent 2>/dev/null + check_err $? "Failed adding mdb entry" + + ip link set dev $swp2 up + setup_wait_dev $swp2 + mcast_packet_test $TEST_GROUP_MAC 192.0.2.1 $TEST_GROUP $h1 $h2 + check_fail $? "Traffic to $TEST_GROUP wasn't forwarded" + + ip link set dev $swp2 down + bridge mdb show dev br0 | grep -q "$TEST_GROUP permanent" 2>/dev/null + check_err $? "MDB entry did not persist after link up/down" + + bridge mdb del dev br0 port "$swp2" grp $TEST_GROUP 2>/dev/null + check_err $? "Failed deleting mdb entry" + + ip link set dev $swp2 up + setup_wait_dev $swp2 + mcast_packet_test $TEST_GROUP_MAC 192.0.2.1 $TEST_GROUP $h1 $h2 + check_err $? "Traffic to $TEST_GROUP was forwarded after entry removed" + + log_test "MDB add/del entry to port with state down " +} + +h1_create() +{ + simple_if_init $h1 192.0.2.1/24 2001:db8:1::1/64 +} + +h1_destroy() +{ + simple_if_fini $h1 192.0.2.1/24 2001:db8:1::1/64 +} + +h2_create() +{ + simple_if_init $h2 192.0.2.2/24 2001:db8:1::2/64 +} + +h2_destroy() +{ + simple_if_fini $h2 192.0.2.2/24 2001:db8:1::2/64 +} + +switch_create() +{ + # Enable multicast filtering + ip link add dev br0 type bridge mcast_snooping 1 mcast_querier 1 + + ip link set dev $swp1 master br0 + ip link set dev $swp2 master br0 + + ip link set dev br0 up + ip link set dev $swp1 up + + bridge link set dev $swp2 mcast_flood off + # Bridge currently has a "grace time" at creation time before it + # forwards multicast according to the mdb. Since we disable the + # mcast_flood setting per port + sleep 10 +} + +switch_destroy() +{ + ip link set dev $swp1 down + ip link set dev $swp2 down + ip link del dev br0 +} + +setup_prepare() +{ + h1=${NETIFS[p1]} + swp1=${NETIFS[p2]} + + swp2=${NETIFS[p3]} + h2=${NETIFS[p4]} + + vrf_prepare + + h1_create + h2_create + switch_create +} + +cleanup() +{ + pre_cleanup + + switch_destroy + h1_destroy + h2_destroy + + vrf_cleanup +} + +trap cleanup EXIT + +setup_prepare +tests_run +exit $EXIT_STATUS diff --git a/tools/testing/selftests/net/forwarding/ethtool_extended_state.sh b/tools/testing/selftests/net/forwarding/ethtool_extended_state.sh index 4b42dfd4efd1..072faa77f53b 100755 --- a/tools/testing/selftests/net/forwarding/ethtool_extended_state.sh +++ b/tools/testing/selftests/net/forwarding/ethtool_extended_state.sh @@ -11,6 +11,8 @@ NUM_NETIFS=2 source lib.sh source ethtool_lib.sh +TIMEOUT=$((WAIT_TIMEOUT * 1000)) # ms + setup_prepare() { swp1=${NETIFS[p1]} @@ -18,7 +20,7 @@ setup_prepare() swp3=$NETIF_NO_CABLE } -ethtool_extended_state_check() +ethtool_ext_state() { local dev=$1; shift local expected_ext_state=$1; shift @@ -30,21 +32,27 @@ ethtool_extended_state_check() | sed -e 's/^[[:space:]]*//') ext_state=$(echo $ext_state | cut -d "," -f1) - [[ $ext_state == $expected_ext_state ]] - check_err $? "Expected \"$expected_ext_state\", got \"$ext_state\"" - - [[ $ext_substate == $expected_ext_substate ]] - check_err $? "Expected \"$expected_ext_substate\", got \"$ext_substate\"" + if [[ $ext_state != $expected_ext_state ]]; then + echo "Expected \"$expected_ext_state\", got \"$ext_state\"" + return 1 + fi + if [[ $ext_substate != $expected_ext_substate ]]; then + echo "Expected \"$expected_ext_substate\", got \"$ext_substate\"" + return 1 + fi } autoneg() { + local msg + RET=0 ip link set dev $swp1 up - sleep 4 - ethtool_extended_state_check $swp1 "Autoneg" "No partner detected" + msg=$(busywait $TIMEOUT ethtool_ext_state $swp1 \ + "Autoneg" "No partner detected") + check_err $? "$msg" log_test "Autoneg, No partner detected" @@ -53,6 +61,8 @@ autoneg() autoneg_force_mode() { + local msg + RET=0 ip link set dev $swp1 up @@ -65,12 +75,13 @@ autoneg_force_mode() ethtool_set $swp1 speed $speed1 autoneg off ethtool_set $swp2 speed $speed2 autoneg off - sleep 4 - ethtool_extended_state_check $swp1 "Autoneg" \ - "No partner detected during force mode" + msg=$(busywait $TIMEOUT ethtool_ext_state $swp1 \ + "Autoneg" "No partner detected during force mode") + check_err $? "$msg" - ethtool_extended_state_check $swp2 "Autoneg" \ - "No partner detected during force mode" + msg=$(busywait $TIMEOUT ethtool_ext_state $swp2 \ + "Autoneg" "No partner detected during force mode") + check_err $? "$msg" log_test "Autoneg, No partner detected during force mode" @@ -83,12 +94,14 @@ autoneg_force_mode() no_cable() { + local msg + RET=0 ip link set dev $swp3 up - sleep 1 - ethtool_extended_state_check $swp3 "No cable" + msg=$(busywait $TIMEOUT ethtool_ext_state $swp3 "No cable") + check_err $? "$msg" log_test "No cable" diff --git a/tools/testing/selftests/net/forwarding/mirror_gre_bridge_1q_lag.sh b/tools/testing/selftests/net/forwarding/mirror_gre_bridge_1q_lag.sh index 28d568c48a73..91e431cd919e 100755 --- a/tools/testing/selftests/net/forwarding/mirror_gre_bridge_1q_lag.sh +++ b/tools/testing/selftests/net/forwarding/mirror_gre_bridge_1q_lag.sh @@ -141,12 +141,13 @@ switch_create() ip link set dev $swp4 up ip link add name br1 type bridge vlan_filtering 1 - ip link set dev br1 up - __addr_add_del br1 add 192.0.2.129/32 - ip -4 route add 192.0.2.130/32 dev br1 team_create lag loadbalance $swp3 $swp4 ip link set dev lag master br1 + + ip link set dev br1 up + __addr_add_del br1 add 192.0.2.129/32 + ip -4 route add 192.0.2.130/32 dev br1 } switch_destroy() diff --git a/tools/testing/selftests/net/forwarding/vxlan_asymmetric.sh b/tools/testing/selftests/net/forwarding/vxlan_asymmetric.sh index 0727e2012b68..43469c7de118 100755 --- a/tools/testing/selftests/net/forwarding/vxlan_asymmetric.sh +++ b/tools/testing/selftests/net/forwarding/vxlan_asymmetric.sh @@ -525,7 +525,7 @@ arp_suppression() log_test "neigh_suppress: on / neigh exists: yes" - # Delete the neighbour from the the SVI. A single ARP request should be + # Delete the neighbour from the SVI. A single ARP request should be # received by the remote VTEP RET=0 diff --git a/tools/testing/selftests/net/ioam6.sh b/tools/testing/selftests/net/ioam6.sh index a2b9fad5a9a6..4ceb401da1bf 100755 --- a/tools/testing/selftests/net/ioam6.sh +++ b/tools/testing/selftests/net/ioam6.sh @@ -117,6 +117,8 @@ # | Schema Data | | # +-----------------------------------------------------------+ +# Kselftest framework requirement - SKIP code is 4. +ksft_skip=4 ################################################################################ # # @@ -211,7 +213,7 @@ check_kernel_compatibility() echo "SKIP: kernel version probably too old, missing ioam support" ip link del veth0 2>/dev/null || true ip netns del ioam-tmp-node || true - exit 1 + exit $ksft_skip fi ip -netns ioam-tmp-node route add db02::/64 encap ioam6 mode inline \ @@ -227,7 +229,7 @@ check_kernel_compatibility() "without CONFIG_IPV6_IOAM6_LWTUNNEL?" ip link del veth0 2>/dev/null || true ip netns del ioam-tmp-node || true - exit 1 + exit $ksft_skip fi ip link del veth0 2>/dev/null || true @@ -752,20 +754,20 @@ nfailed=0 if [ "$(id -u)" -ne 0 ] then echo "SKIP: Need root privileges" - exit 1 + exit $ksft_skip fi if [ ! -x "$(command -v ip)" ] then echo "SKIP: Could not run test without ip tool" - exit 1 + exit $ksft_skip fi ip ioam &>/dev/null if [ $? = 1 ] then echo "SKIP: iproute2 too old, missing ioam command" - exit 1 + exit $ksft_skip fi check_kernel_compatibility diff --git a/tools/testing/selftests/net/ipv6_flowlabel.c b/tools/testing/selftests/net/ipv6_flowlabel.c index a7c41375374f..708a9822259d 100644 --- a/tools/testing/selftests/net/ipv6_flowlabel.c +++ b/tools/testing/selftests/net/ipv6_flowlabel.c @@ -9,6 +9,7 @@ #include <errno.h> #include <fcntl.h> #include <limits.h> +#include <linux/icmpv6.h> #include <linux/in6.h> #include <stdbool.h> #include <stdio.h> @@ -29,26 +30,48 @@ #ifndef IPV6_FLOWLABEL_MGR #define IPV6_FLOWLABEL_MGR 32 #endif +#ifndef IPV6_FLOWINFO_SEND +#define IPV6_FLOWINFO_SEND 33 +#endif #define FLOWLABEL_WILDCARD ((uint32_t) -1) static const char cfg_data[] = "a"; static uint32_t cfg_label = 1; +static bool use_ping; +static bool use_flowinfo_send; + +static struct icmp6hdr icmp6 = { + .icmp6_type = ICMPV6_ECHO_REQUEST +}; + +static struct sockaddr_in6 addr = { + .sin6_family = AF_INET6, + .sin6_addr = IN6ADDR_LOOPBACK_INIT, +}; static void do_send(int fd, bool with_flowlabel, uint32_t flowlabel) { char control[CMSG_SPACE(sizeof(flowlabel))] = {0}; struct msghdr msg = {0}; - struct iovec iov = {0}; + struct iovec iov = { + .iov_base = (char *)cfg_data, + .iov_len = sizeof(cfg_data) + }; int ret; - iov.iov_base = (char *)cfg_data; - iov.iov_len = sizeof(cfg_data); + if (use_ping) { + iov.iov_base = &icmp6; + iov.iov_len = sizeof(icmp6); + } msg.msg_iov = &iov; msg.msg_iovlen = 1; - if (with_flowlabel) { + if (use_flowinfo_send) { + msg.msg_name = &addr; + msg.msg_namelen = sizeof(addr); + } else if (with_flowlabel) { struct cmsghdr *cm; cm = (void *)control; @@ -94,6 +117,8 @@ static void do_recv(int fd, bool with_flowlabel, uint32_t expect) ret = recvmsg(fd, &msg, 0); if (ret == -1) error(1, errno, "recv"); + if (use_ping) + goto parse_cmsg; if (msg.msg_flags & (MSG_TRUNC | MSG_CTRUNC)) error(1, 0, "recv: truncated"); if (ret != sizeof(cfg_data)) @@ -101,6 +126,7 @@ static void do_recv(int fd, bool with_flowlabel, uint32_t expect) if (memcmp(data, cfg_data, sizeof(data))) error(1, 0, "recv: data mismatch"); +parse_cmsg: cm = CMSG_FIRSTHDR(&msg); if (with_flowlabel) { if (!cm) @@ -114,9 +140,11 @@ static void do_recv(int fd, bool with_flowlabel, uint32_t expect) flowlabel = ntohl(*(uint32_t *)CMSG_DATA(cm)); fprintf(stderr, "recv with label %u\n", flowlabel); - if (expect != FLOWLABEL_WILDCARD && expect != flowlabel) + if (expect != FLOWLABEL_WILDCARD && expect != flowlabel) { fprintf(stderr, "recv: incorrect flowlabel %u != %u\n", flowlabel, expect); + error(1, 0, "recv: flowlabel is wrong"); + } } else { fprintf(stderr, "recv without label\n"); @@ -165,11 +193,17 @@ static void parse_opts(int argc, char **argv) { int c; - while ((c = getopt(argc, argv, "l:")) != -1) { + while ((c = getopt(argc, argv, "l:ps")) != -1) { switch (c) { case 'l': cfg_label = strtoul(optarg, NULL, 0); break; + case 'p': + use_ping = true; + break; + case 's': + use_flowinfo_send = true; + break; default: error(1, 0, "%s: parse error", argv[0]); } @@ -178,27 +212,30 @@ static void parse_opts(int argc, char **argv) int main(int argc, char **argv) { - struct sockaddr_in6 addr = { - .sin6_family = AF_INET6, - .sin6_port = htons(8000), - .sin6_addr = IN6ADDR_LOOPBACK_INIT, - }; const int one = 1; int fdt, fdr; + int prot = 0; + + addr.sin6_port = htons(8000); parse_opts(argc, argv); - fdt = socket(PF_INET6, SOCK_DGRAM, 0); + if (use_ping) { + fprintf(stderr, "attempting to use ping sockets\n"); + prot = IPPROTO_ICMPV6; + } + + fdt = socket(PF_INET6, SOCK_DGRAM, prot); if (fdt == -1) error(1, errno, "socket t"); - fdr = socket(PF_INET6, SOCK_DGRAM, 0); + fdr = use_ping ? fdt : socket(PF_INET6, SOCK_DGRAM, 0); if (fdr == -1) error(1, errno, "socket r"); if (connect(fdt, (void *)&addr, sizeof(addr))) error(1, errno, "connect"); - if (bind(fdr, (void *)&addr, sizeof(addr))) + if (!use_ping && bind(fdr, (void *)&addr, sizeof(addr))) error(1, errno, "bind"); flowlabel_get(fdt, cfg_label, IPV6_FL_S_EXCL, IPV6_FL_F_CREATE); @@ -216,13 +253,21 @@ int main(int argc, char **argv) do_recv(fdr, false, 0); } + if (use_flowinfo_send) { + fprintf(stderr, "using IPV6_FLOWINFO_SEND to send label\n"); + addr.sin6_flowinfo = htonl(cfg_label); + if (setsockopt(fdt, SOL_IPV6, IPV6_FLOWINFO_SEND, &one, + sizeof(one)) == -1) + error(1, errno, "setsockopt flowinfo_send"); + } + fprintf(stderr, "send label\n"); do_send(fdt, true, cfg_label); do_recv(fdr, true, cfg_label); if (close(fdr)) error(1, errno, "close r"); - if (close(fdt)) + if (!use_ping && close(fdt)) error(1, errno, "close t"); return 0; diff --git a/tools/testing/selftests/net/ipv6_flowlabel.sh b/tools/testing/selftests/net/ipv6_flowlabel.sh index d3bc6442704e..cee95e252bee 100755 --- a/tools/testing/selftests/net/ipv6_flowlabel.sh +++ b/tools/testing/selftests/net/ipv6_flowlabel.sh @@ -18,4 +18,20 @@ echo "TEST datapath (with auto-flowlabels)" ./in_netns.sh \ sh -c 'sysctl -q -w net.ipv6.auto_flowlabels=1 && ./ipv6_flowlabel -l 1' +echo "TEST datapath (with ping-sockets)" +./in_netns.sh \ + sh -c 'sysctl -q -w net.ipv6.flowlabel_reflect=4 && \ + sysctl -q -w net.ipv4.ping_group_range="0 2147483647" && \ + ./ipv6_flowlabel -l 1 -p' + +echo "TEST datapath (with flowinfo-send)" +./in_netns.sh \ + sh -c './ipv6_flowlabel -l 1 -s' + +echo "TEST datapath (with ping-sockets flowinfo-send)" +./in_netns.sh \ + sh -c 'sysctl -q -w net.ipv6.flowlabel_reflect=4 && \ + sysctl -q -w net.ipv4.ping_group_range="0 2147483647" && \ + ./ipv6_flowlabel -l 1 -p -s' + echo OK. All tests passed diff --git a/tools/testing/selftests/net/mptcp/mptcp_join.sh b/tools/testing/selftests/net/mptcp/mptcp_join.sh index a4406b7a8064..ff83ef426df5 100755 --- a/tools/testing/selftests/net/mptcp/mptcp_join.sh +++ b/tools/testing/selftests/net/mptcp/mptcp_join.sh @@ -455,6 +455,12 @@ wait_mpj() done } +kill_wait() +{ + kill $1 > /dev/null 2>&1 + wait $1 2>/dev/null +} + pm_nl_set_limits() { local ns=$1 @@ -654,6 +660,11 @@ do_transfer() local port=$((10000 + TEST_COUNT - 1)) local cappid + local userspace_pm=0 + local evts_ns1 + local evts_ns1_pid + local evts_ns2 + local evts_ns2_pid :> "$cout" :> "$sout" @@ -690,10 +701,29 @@ do_transfer() extra_args="-r ${speed:6}" fi + if [[ "${addr_nr_ns1}" = "userspace_"* ]]; then + userspace_pm=1 + addr_nr_ns1=${addr_nr_ns1:10} + fi + if [[ "${addr_nr_ns2}" = "fastclose_"* ]]; then # disconnect extra_args="$extra_args -I ${addr_nr_ns2:10}" addr_nr_ns2=0 + elif [[ "${addr_nr_ns2}" = "userspace_"* ]]; then + userspace_pm=1 + addr_nr_ns2=${addr_nr_ns2:10} + fi + + if [ $userspace_pm -eq 1 ]; then + evts_ns1=$(mktemp) + evts_ns2=$(mktemp) + :> "$evts_ns1" + :> "$evts_ns2" + ip netns exec ${listener_ns} ./pm_nl_ctl events >> "$evts_ns1" 2>&1 & + evts_ns1_pid=$! + ip netns exec ${connector_ns} ./pm_nl_ctl events >> "$evts_ns2" 2>&1 & + evts_ns2_pid=$! fi local local_addr @@ -748,6 +778,8 @@ do_transfer() if [ $addr_nr_ns1 -gt 0 ]; then local counter=2 local add_nr_ns1=${addr_nr_ns1} + local id=10 + local tk while [ $add_nr_ns1 -gt 0 ]; do local addr if is_v6 "${connect_addr}"; then @@ -755,9 +787,18 @@ do_transfer() else addr="10.0.$counter.1" fi - pm_nl_add_endpoint $ns1 $addr flags signal + if [ $userspace_pm -eq 0 ]; then + pm_nl_add_endpoint $ns1 $addr flags signal + else + tk=$(sed -n 's/.*\(token:\)\([[:digit:]]*\).*$/\2/p;q' "$evts_ns1") + ip netns exec ${listener_ns} ./pm_nl_ctl ann $addr token $tk id $id + sleep 1 + ip netns exec ${listener_ns} ./pm_nl_ctl rem token $tk id $id + fi + counter=$((counter + 1)) add_nr_ns1=$((add_nr_ns1 - 1)) + id=$((id + 1)) done elif [ $addr_nr_ns1 -lt 0 ]; then local rm_nr_ns1=$((-addr_nr_ns1)) @@ -804,6 +845,8 @@ do_transfer() if [ $addr_nr_ns2 -gt 0 ]; then local add_nr_ns2=${addr_nr_ns2} local counter=3 + local id=20 + local tk da dp sp while [ $add_nr_ns2 -gt 0 ]; do local addr if is_v6 "${connect_addr}"; then @@ -811,9 +854,23 @@ do_transfer() else addr="10.0.$counter.2" fi - pm_nl_add_endpoint $ns2 $addr flags $flags + if [ $userspace_pm -eq 0 ]; then + pm_nl_add_endpoint $ns2 $addr flags $flags + else + tk=$(sed -n 's/.*\(token:\)\([[:digit:]]*\).*$/\2/p;q' "$evts_ns2") + da=$(sed -n 's/.*\(daddr4:\)\([0-9.]*\).*$/\2/p;q' "$evts_ns2") + dp=$(sed -n 's/.*\(dport:\)\([[:digit:]]*\).*$/\2/p;q' "$evts_ns2") + ip netns exec ${connector_ns} ./pm_nl_ctl csf lip $addr lid $id \ + rip $da rport $dp token $tk + sleep 1 + sp=$(grep "type:10" "$evts_ns2" | + sed -n 's/.*\(sport:\)\([[:digit:]]*\).*$/\2/p;q') + ip netns exec ${connector_ns} ./pm_nl_ctl dsf lip $addr lport $sp \ + rip $da rport $dp token $tk + fi counter=$((counter + 1)) add_nr_ns2=$((add_nr_ns2 - 1)) + id=$((id + 1)) done elif [ $addr_nr_ns2 -lt 0 ]; then local rm_nr_ns2=$((-addr_nr_ns2)) @@ -890,6 +947,12 @@ do_transfer() kill $cappid fi + if [ $userspace_pm -eq 1 ]; then + kill_wait $evts_ns1_pid + kill_wait $evts_ns2_pid + rm -rf $evts_ns1 $evts_ns2 + fi + NSTAT_HISTORY=/tmp/${listener_ns}.nstat ip netns exec ${listener_ns} \ nstat | grep Tcp > /tmp/${listener_ns}.out NSTAT_HISTORY=/tmp/${connector_ns}.nstat ip netns exec ${connector_ns} \ @@ -2365,6 +2428,36 @@ backup_tests() chk_add_nr 1 1 chk_prio_nr 1 1 fi + + if reset "mpc backup"; then + pm_nl_add_endpoint $ns2 10.0.1.2 flags subflow,backup + run_tests $ns1 $ns2 10.0.1.1 0 0 0 slow + chk_join_nr 0 0 0 + chk_prio_nr 0 1 + fi + + if reset "mpc backup both sides"; then + pm_nl_add_endpoint $ns1 10.0.1.1 flags subflow,backup + pm_nl_add_endpoint $ns2 10.0.1.2 flags subflow,backup + run_tests $ns1 $ns2 10.0.1.1 0 0 0 slow + chk_join_nr 0 0 0 + chk_prio_nr 1 1 + fi + + if reset "mpc switch to backup"; then + pm_nl_add_endpoint $ns2 10.0.1.2 flags subflow + run_tests $ns1 $ns2 10.0.1.1 0 0 0 slow backup + chk_join_nr 0 0 0 + chk_prio_nr 0 1 + fi + + if reset "mpc switch to backup both sides"; then + pm_nl_add_endpoint $ns1 10.0.1.1 flags subflow + pm_nl_add_endpoint $ns2 10.0.1.2 flags subflow + run_tests $ns1 $ns2 10.0.1.1 0 0 0 slow backup + chk_join_nr 0 0 0 + chk_prio_nr 1 1 + fi } add_addr_ports_tests() @@ -2810,6 +2903,25 @@ userspace_tests() chk_join_nr 0 0 0 chk_rm_nr 0 0 fi + + # userspace pm add & remove address + if reset "userspace pm add & remove address"; then + set_userspace_pm $ns1 + pm_nl_set_limits $ns2 1 1 + run_tests $ns1 $ns2 10.0.1.1 0 userspace_1 0 slow + chk_join_nr 1 1 1 + chk_add_nr 1 1 + chk_rm_nr 1 1 invert + fi + + # userspace pm create destroy subflow + if reset "userspace pm create destroy subflow"; then + set_userspace_pm $ns2 + pm_nl_set_limits $ns1 0 1 + run_tests $ns1 $ns2 10.0.1.1 0 0 userspace_1 slow + chk_join_nr 1 1 1 + chk_rm_nr 0 1 + fi } endpoint_tests() diff --git a/tools/testing/selftests/net/mptcp/pm_nl_ctl.c b/tools/testing/selftests/net/mptcp/pm_nl_ctl.c index cb79f0719e3b..abddf4c63e79 100644 --- a/tools/testing/selftests/net/mptcp/pm_nl_ctl.c +++ b/tools/testing/selftests/net/mptcp/pm_nl_ctl.c @@ -31,7 +31,7 @@ static void syntax(char *argv[]) { - fprintf(stderr, "%s add|get|set|del|flush|dump|accept [<args>]\n", argv[0]); + fprintf(stderr, "%s add|ann|rem|csf|dsf|get|set|del|flush|dump|events|listen|accept [<args>]\n", argv[0]); fprintf(stderr, "\tadd [flags signal|subflow|backup|fullmesh] [id <nr>] [dev <name>] <ip>\n"); fprintf(stderr, "\tann <local-ip> id <local-id> token <token> [port <local-port>] [dev <name>]\n"); fprintf(stderr, "\trem id <local-id> token <token>\n"); diff --git a/tools/testing/selftests/net/mptcp/simult_flows.sh b/tools/testing/selftests/net/mptcp/simult_flows.sh index f441ff7904fc..ffa13a957a36 100755 --- a/tools/testing/selftests/net/mptcp/simult_flows.sh +++ b/tools/testing/selftests/net/mptcp/simult_flows.sh @@ -12,6 +12,7 @@ timeout_test=$((timeout_poll * 2 + 1)) test_cnt=1 ret=0 bail=0 +slack=50 usage() { echo "Usage: $0 [ -b ] [ -c ] [ -d ]" @@ -52,6 +53,7 @@ setup() cout=$(mktemp) capout=$(mktemp) size=$((2 * 2048 * 4096)) + dd if=/dev/zero of=$small bs=4096 count=20 >/dev/null 2>&1 dd if=/dev/zero of=$large bs=4096 count=$((size / 4096)) >/dev/null 2>&1 @@ -104,6 +106,16 @@ setup() ip -net "$ns3" route add default via dead:beef:3::2 ip netns exec "$ns3" ./pm_nl_ctl limits 1 1 + + # debug build can slow down measurably the test program + # we use quite tight time limit on the run-time, to ensure + # maximum B/W usage. + # Use kmemleak/lockdep/kasan/prove_locking presence as a rough + # estimate for this being a debug kernel and increase the + # maximum run-time accordingly. Observed run times for CI builds + # running selftests, including kbuild, were used to determine the + # amount of time to add. + grep -q ' kmemleak_init$\| lockdep_init$\| kasan_init$\| prove_locking$' /proc/kallsyms && slack=$((slack+550)) } # $1: ns, $2: port @@ -241,7 +253,7 @@ run_test() # mptcp_connect will do some sleeps to allow the mp_join handshake # completion (see mptcp_connect): 200ms on each side, add some slack - time=$((time + 450)) + time=$((time + 400 + slack)) printf "%-60s" "$msg" do_transfer $small $large $time diff --git a/tools/testing/selftests/net/mptcp/userspace_pm.sh b/tools/testing/selftests/net/mptcp/userspace_pm.sh index abe3d4ebe554..3229725b64b0 100755 --- a/tools/testing/selftests/net/mptcp/userspace_pm.sh +++ b/tools/testing/selftests/net/mptcp/userspace_pm.sh @@ -37,6 +37,12 @@ rndh=$(stdbuf -o0 -e0 printf %x "$sec")-$(mktemp -u XXXXXX) ns1="ns1-$rndh" ns2="ns2-$rndh" +kill_wait() +{ + kill $1 > /dev/null 2>&1 + wait $1 2>/dev/null +} + cleanup() { echo "cleanup" @@ -48,16 +54,16 @@ cleanup() kill -SIGUSR1 $client4_pid > /dev/null 2>&1 fi if [ $server4_pid -ne 0 ]; then - kill $server4_pid > /dev/null 2>&1 + kill_wait $server4_pid fi if [ $client6_pid -ne 0 ]; then kill -SIGUSR1 $client6_pid > /dev/null 2>&1 fi if [ $server6_pid -ne 0 ]; then - kill $server6_pid > /dev/null 2>&1 + kill_wait $server6_pid fi if [ $evts_pid -ne 0 ]; then - kill $evts_pid > /dev/null 2>&1 + kill_wait $evts_pid fi local netns for netns in "$ns1" "$ns2" ;do @@ -153,7 +159,7 @@ make_connection() sleep 1 # Capture client/server attributes from MPTCP connection netlink events - kill $client_evts_pid + kill_wait $client_evts_pid local client_token local client_port @@ -165,7 +171,7 @@ make_connection() client_port=$(sed --unbuffered -n 's/.*\(sport:\)\([[:digit:]]*\).*$/\2/p;q' "$client_evts") client_serverside=$(sed --unbuffered -n 's/.*\(server_side:\)\([[:digit:]]*\).*$/\2/p;q'\ "$client_evts") - kill $server_evts_pid + kill_wait $server_evts_pid server_token=$(sed --unbuffered -n 's/.*\(token:\)\([[:digit:]]*\).*$/\2/p;q' "$server_evts") server_serverside=$(sed --unbuffered -n 's/.*\(server_side:\)\([[:digit:]]*\).*$/\2/p;q'\ "$server_evts") @@ -286,7 +292,7 @@ test_announce() verify_announce_event "$evts" "$ANNOUNCED" "$server4_token" "10.0.2.2"\ "$client_addr_id" "$new4_port" - kill $evts_pid + kill_wait $evts_pid # Capture events on the network namespace running the client :>"$evts" @@ -321,7 +327,7 @@ test_announce() verify_announce_event "$evts" "$ANNOUNCED" "$client4_token" "10.0.2.1"\ "$server_addr_id" "$new4_port" - kill $evts_pid + kill_wait $evts_pid rm -f "$evts" } @@ -416,7 +422,7 @@ test_remove() sleep 0.5 verify_remove_event "$evts" "$REMOVED" "$server6_token" "$client_addr_id" - kill $evts_pid + kill_wait $evts_pid # Capture events on the network namespace running the client :>"$evts" @@ -449,7 +455,7 @@ test_remove() sleep 0.5 verify_remove_event "$evts" "$REMOVED" "$client6_token" "$server_addr_id" - kill $evts_pid + kill_wait $evts_pid rm -f "$evts" } @@ -553,7 +559,7 @@ test_subflows() "10.0.2.2" "$client4_port" "23" "$client_addr_id" "ns1" "ns2" # Delete the listener from the client ns, if one was created - kill $listener_pid > /dev/null 2>&1 + kill_wait $listener_pid local sport sport=$(sed --unbuffered -n 's/.*\(sport:\)\([[:digit:]]*\).*$/\2/p;q' "$evts") @@ -592,7 +598,7 @@ test_subflows() "$client_addr_id" "ns1" "ns2" # Delete the listener from the client ns, if one was created - kill $listener_pid > /dev/null 2>&1 + kill_wait $listener_pid sport=$(sed --unbuffered -n 's/.*\(sport:\)\([[:digit:]]*\).*$/\2/p;q' "$evts") @@ -631,7 +637,7 @@ test_subflows() "$client_addr_id" "ns1" "ns2" # Delete the listener from the client ns, if one was created - kill $listener_pid > /dev/null 2>&1 + kill_wait $listener_pid sport=$(sed --unbuffered -n 's/.*\(sport:\)\([[:digit:]]*\).*$/\2/p;q' "$evts") @@ -647,7 +653,7 @@ test_subflows() ip netns exec "$ns2" ./pm_nl_ctl rem id $client_addr_id token\ "$client4_token" > /dev/null 2>&1 - kill $evts_pid + kill_wait $evts_pid # Capture events on the network namespace running the client :>"$evts" @@ -674,7 +680,7 @@ test_subflows() "10.0.2.1" "$app4_port" "23" "$server_addr_id" "ns2" "ns1" # Delete the listener from the server ns, if one was created - kill $listener_pid> /dev/null 2>&1 + kill_wait $listener_pid sport=$(sed --unbuffered -n 's/.*\(sport:\)\([[:digit:]]*\).*$/\2/p;q' "$evts") @@ -713,7 +719,7 @@ test_subflows() "$server_addr_id" "ns2" "ns1" # Delete the listener from the server ns, if one was created - kill $listener_pid > /dev/null 2>&1 + kill_wait $listener_pid sport=$(sed --unbuffered -n 's/.*\(sport:\)\([[:digit:]]*\).*$/\2/p;q' "$evts") @@ -750,7 +756,7 @@ test_subflows() "10.0.2.2" "10.0.2.1" "$new4_port" "23" "$server_addr_id" "ns2" "ns1" # Delete the listener from the server ns, if one was created - kill $listener_pid > /dev/null 2>&1 + kill_wait $listener_pid sport=$(sed --unbuffered -n 's/.*\(sport:\)\([[:digit:]]*\).*$/\2/p;q' "$evts") @@ -766,7 +772,7 @@ test_subflows() ip netns exec "$ns1" ./pm_nl_ctl rem id $server_addr_id token\ "$server4_token" > /dev/null 2>&1 - kill $evts_pid + kill_wait $evts_pid rm -f "$evts" } diff --git a/tools/testing/selftests/net/srv6_hencap_red_l3vpn_test.sh b/tools/testing/selftests/net/srv6_hencap_red_l3vpn_test.sh new file mode 100755 index 000000000000..28a775654b92 --- /dev/null +++ b/tools/testing/selftests/net/srv6_hencap_red_l3vpn_test.sh @@ -0,0 +1,879 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 +# +# author: Andrea Mayer <andrea.mayer@uniroma2.it> +# +# This script is designed for testing the SRv6 H.Encaps.Red behavior. +# +# Below is depicted the IPv6 network of an operator which offers advanced +# IPv4/IPv6 VPN services to hosts, enabling them to communicate with each +# other. +# In this example, hosts hs-1 and hs-2 are connected through an IPv4/IPv6 VPN +# service, while hs-3 and hs-4 are connected using an IPv6 only VPN. +# +# Routers rt-1,rt-2,rt-3 and rt-4 implement IPv4/IPv6 L3 VPN services +# leveraging the SRv6 architecture. The key components for such VPNs are: +# +# i) The SRv6 H.Encaps.Red behavior applies SRv6 Policies on traffic received +# by connected hosts, initiating the VPN tunnel. Such a behavior is an +# optimization of the SRv6 H.Encap aiming to reduce the length of the SID +# List carried in the pushed SRH. Specifically, the H.Encaps.Red removes +# the first SID contained in the SID List (i.e. SRv6 Policy) by storing it +# into the IPv6 Destination Address. When a SRv6 Policy is made of only one +# SID, the SRv6 H.Encaps.Red behavior omits the SRH at all and pushes that +# SID directly into the IPv6 DA; +# +# ii) The SRv6 End behavior advances the active SID in the SID List carried by +# the SRH; +# +# iii) The SRv6 End.DT46 behavior is used for removing the SRv6 Policy and, +# thus, it terminates the VPN tunnel. Such a behavior is capable of +# handling, at the same time, both tunneled IPv4 and IPv6 traffic. +# +# +# cafe::1 cafe::2 +# 10.0.0.1 10.0.0.2 +# +--------+ +--------+ +# | | | | +# | hs-1 | | hs-2 | +# | | | | +# +---+----+ +--- +---+ +# cafe::/64 | | cafe::/64 +# 10.0.0.0/24 | | 10.0.0.0/24 +# +---+----+ +----+---+ +# | | fcf0:0:1:2::/64 | | +# | rt-1 +-------------------+ rt-2 | +# | | | | +# +---+----+ +----+---+ +# | . . | +# | fcf0:0:1:3::/64 . | +# | . . | +# | . . | +# fcf0:0:1:4::/64 | . | fcf0:0:2:3::/64 +# | . . | +# | . . | +# | fcf0:0:2:4::/64 . | +# | . . | +# +---+----+ +----+---+ +# | | | | +# | rt-4 +-------------------+ rt-3 | +# | | fcf0:0:3:4::/64 | | +# +---+----+ +----+---+ +# cafe::/64 | | cafe::/64 +# 10.0.0.0/24 | | 10.0.0.0/24 +# +---+----+ +--- +---+ +# | | | | +# | hs-4 | | hs-3 | +# | | | | +# +--------+ +--------+ +# cafe::4 cafe::3 +# 10.0.0.4 10.0.0.3 +# +# +# Every fcf0:0:x:y::/64 network interconnects the SRv6 routers rt-x with rt-y +# in the IPv6 operator network. +# +# Local SID table +# =============== +# +# Each SRv6 router is configured with a Local SID table in which SIDs are +# stored. Considering the given SRv6 router rt-x, at least two SIDs are +# configured in the Local SID table: +# +# Local SID table for SRv6 router rt-x +# +----------------------------------------------------------+ +# |fcff:x::e is associated with the SRv6 End behavior | +# |fcff:x::d46 is associated with the SRv6 End.DT46 behavior | +# +----------------------------------------------------------+ +# +# The fcff::/16 prefix is reserved by the operator for implementing SRv6 VPN +# services. Reachability of SIDs is ensured by proper configuration of the IPv6 +# operator's network and SRv6 routers. +# +# # SRv6 Policies +# =============== +# +# An SRv6 ingress router applies SRv6 policies to the traffic received from a +# connected host. SRv6 policy enforcement consists of encapsulating the +# received traffic into a new IPv6 packet with a given SID List contained in +# the SRH. +# +# IPv4/IPv6 VPN between hs-1 and hs-2 +# ----------------------------------- +# +# Hosts hs-1 and hs-2 are connected using dedicated IPv4/IPv6 VPNs. +# Specifically, packets generated from hs-1 and directed towards hs-2 are +# handled by rt-1 which applies the following SRv6 Policies: +# +# i.a) IPv6 traffic, SID List=fcff:3::e,fcff:4::e,fcff:2::d46 +# ii.a) IPv4 traffic, SID List=fcff:2::d46 +# +# Policy (i.a) steers tunneled IPv6 traffic through SRv6 routers +# rt-3,rt-4,rt-2. Instead, Policy (ii.a) steers tunneled IPv4 traffic through +# rt-2. +# The H.Encaps.Red reduces the SID List (i.a) carried in SRH by removing the +# first SID (fcff:3::e) and pushing it into the IPv6 DA. In case of IPv4 +# traffic, the H.Encaps.Red omits the presence of SRH at all, since the SID +# List (ii.a) consists of only one SID that can be stored directly in the IPv6 +# DA. +# +# On the reverse path (i.e. from hs-2 to hs-1), rt-2 applies the following +# policies: +# +# i.b) IPv6 traffic, SID List=fcff:1::d46 +# ii.b) IPv4 traffic, SID List=fcff:4::e,fcff:3::e,fcff:1::d46 +# +# Policy (i.b) steers tunneled IPv6 traffic through the SRv6 router rt-1. +# Conversely, Policy (ii.b) steers tunneled IPv4 traffic through SRv6 routers +# rt-4,rt-3,rt-1. +# The H.Encaps.Red omits the SRH at all in case of (i.b) by pushing the single +# SID (fcff::1::d46) inside the IPv6 DA. +# The H.Encaps.Red reduces the SID List (ii.b) in the SRH by removing the first +# SID (fcff:4::e) and pushing it into the IPv6 DA. +# +# In summary: +# hs-1->hs-2 |IPv6 DA=fcff:3::e|SRH SIDs=fcff:4::e,fcff:2::d46|IPv6|...| (i.a) +# hs-1->hs-2 |IPv6 DA=fcff:2::d46|IPv4|...| (ii.a) +# +# hs-2->hs-1 |IPv6 DA=fcff:1::d46|IPv6|...| (i.b) +# hs-2->hs-1 |IPv6 DA=fcff:4::e|SRH SIDs=fcff:3::e,fcff:1::d46|IPv4|...| (ii.b) +# +# +# IPv6 VPN between hs-3 and hs-4 +# ------------------------------ +# +# Hosts hs-3 and hs-4 are connected using a dedicated IPv6 only VPN. +# Specifically, packets generated from hs-3 and directed towards hs-4 are +# handled by rt-3 which applies the following SRv6 Policy: +# +# i.c) IPv6 traffic, SID List=fcff:2::e,fcff:4::d46 +# +# Policy (i.c) steers tunneled IPv6 traffic through SRv6 routers rt-2,rt-4. +# The H.Encaps.Red reduces the SID List (i.c) carried in SRH by pushing the +# first SID (fcff:2::e) in the IPv6 DA. +# +# On the reverse path (i.e. from hs-4 to hs-3) the router rt-4 applies the +# following SRv6 Policy: +# +# i.d) IPv6 traffic, SID List=fcff:1::e,fcff:3::d46. +# +# Policy (i.d) steers tunneled IPv6 traffic through SRv6 routers rt-1,rt-3. +# The H.Encaps.Red reduces the SID List (i.d) carried in SRH by pushing the +# first SID (fcff:1::e) in the IPv6 DA. +# +# In summary: +# hs-3->hs-4 |IPv6 DA=fcff:2::e|SRH SIDs=fcff:4::d46|IPv6|...| (i.c) +# hs-4->hs-3 |IPv6 DA=fcff:1::e|SRH SIDs=fcff:3::d46|IPv6|...| (i.d) +# + +# Kselftest framework requirement - SKIP code is 4. +readonly ksft_skip=4 + +readonly RDMSUFF="$(mktemp -u XXXXXXXX)" +readonly VRF_TID=100 +readonly VRF_DEVNAME="vrf-${VRF_TID}" +readonly RT2HS_DEVNAME="veth-t${VRF_TID}" +readonly LOCALSID_TABLE_ID=90 +readonly IPv6_RT_NETWORK=fcf0:0 +readonly IPv6_HS_NETWORK=cafe +readonly IPv4_HS_NETWORK=10.0.0 +readonly VPN_LOCATOR_SERVICE=fcff +readonly END_FUNC=000e +readonly DT46_FUNC=0d46 + +PING_TIMEOUT_SEC=4 +PAUSE_ON_FAIL=${PAUSE_ON_FAIL:=no} + +# IDs of routers and hosts are initialized during the setup of the testing +# network +ROUTERS='' +HOSTS='' + +SETUP_ERR=1 + +ret=${ksft_skip} +nsuccess=0 +nfail=0 + +log_test() +{ + local rc="$1" + local expected="$2" + local msg="$3" + + if [ "${rc}" -eq "${expected}" ]; then + nsuccess=$((nsuccess+1)) + printf "\n TEST: %-60s [ OK ]\n" "${msg}" + else + ret=1 + nfail=$((nfail+1)) + printf "\n TEST: %-60s [FAIL]\n" "${msg}" + if [ "${PAUSE_ON_FAIL}" = "yes" ]; then + echo + echo "hit enter to continue, 'q' to quit" + read a + [ "$a" = "q" ] && exit 1 + fi + fi +} + +print_log_test_results() +{ + printf "\nTests passed: %3d\n" "${nsuccess}" + printf "Tests failed: %3d\n" "${nfail}" + + # when a test fails, the value of 'ret' is set to 1 (error code). + # Conversely, when all tests are passed successfully, the 'ret' value + # is set to 0 (success code). + if [ "${ret}" -ne 1 ]; then + ret=0 + fi +} + +log_section() +{ + echo + echo "################################################################################" + echo "TEST SECTION: $*" + echo "################################################################################" +} + +test_command_or_ksft_skip() +{ + local cmd="$1" + + if [ ! -x "$(command -v "${cmd}")" ]; then + echo "SKIP: Could not run test without \"${cmd}\" tool"; + exit "${ksft_skip}" + fi +} + +get_nodename() +{ + local name="$1" + + echo "${name}-${RDMSUFF}" +} + +get_rtname() +{ + local rtid="$1" + + get_nodename "rt-${rtid}" +} + +get_hsname() +{ + local hsid="$1" + + get_nodename "hs-${hsid}" +} + +__create_namespace() +{ + local name="$1" + + ip netns add "${name}" +} + +create_router() +{ + local rtid="$1" + local nsname + + nsname="$(get_rtname "${rtid}")" + + __create_namespace "${nsname}" +} + +create_host() +{ + local hsid="$1" + local nsname + + nsname="$(get_hsname "${hsid}")" + + __create_namespace "${nsname}" +} + +cleanup() +{ + local nsname + local i + + # destroy routers + for i in ${ROUTERS}; do + nsname="$(get_rtname "${i}")" + + ip netns del "${nsname}" &>/dev/null || true + done + + # destroy hosts + for i in ${HOSTS}; do + nsname="$(get_hsname "${i}")" + + ip netns del "${nsname}" &>/dev/null || true + done + + # check whether the setup phase was completed successfully or not. In + # case of an error during the setup phase of the testing environment, + # the selftest is considered as "skipped". + if [ "${SETUP_ERR}" -ne 0 ]; then + echo "SKIP: Setting up the testing environment failed" + exit "${ksft_skip}" + fi + + exit "${ret}" +} + +add_link_rt_pairs() +{ + local rt="$1" + local rt_neighs="$2" + local neigh + local nsname + local neigh_nsname + + nsname="$(get_rtname "${rt}")" + + for neigh in ${rt_neighs}; do + neigh_nsname="$(get_rtname "${neigh}")" + + ip link add "veth-rt-${rt}-${neigh}" netns "${nsname}" \ + type veth peer name "veth-rt-${neigh}-${rt}" \ + netns "${neigh_nsname}" + done +} + +get_network_prefix() +{ + local rt="$1" + local neigh="$2" + local p="${rt}" + local q="${neigh}" + + if [ "${p}" -gt "${q}" ]; then + p="${q}"; q="${rt}" + fi + + echo "${IPv6_RT_NETWORK}:${p}:${q}" +} + +# Setup the basic networking for the routers +setup_rt_networking() +{ + local rt="$1" + local rt_neighs="$2" + local nsname + local net_prefix + local devname + local neigh + + nsname="$(get_rtname "${rt}")" + + for neigh in ${rt_neighs}; do + devname="veth-rt-${rt}-${neigh}" + + net_prefix="$(get_network_prefix "${rt}" "${neigh}")" + + ip -netns "${nsname}" addr \ + add "${net_prefix}::${rt}/64" dev "${devname}" nodad + + ip -netns "${nsname}" link set "${devname}" up + done + + ip -netns "${nsname}" link set lo up + + ip netns exec "${nsname}" sysctl -wq net.ipv6.conf.all.accept_dad=0 + ip netns exec "${nsname}" sysctl -wq net.ipv6.conf.default.accept_dad=0 + ip netns exec "${nsname}" sysctl -wq net.ipv6.conf.all.forwarding=1 + + ip netns exec "${nsname}" sysctl -wq net.ipv4.conf.all.rp_filter=0 + ip netns exec "${nsname}" sysctl -wq net.ipv4.conf.default.rp_filter=0 + ip netns exec "${nsname}" sysctl -wq net.ipv4.ip_forward=1 +} + +# Setup local SIDs for an SRv6 router +setup_rt_local_sids() +{ + local rt="$1" + local rt_neighs="$2" + local net_prefix + local devname + local nsname + local neigh + + nsname="$(get_rtname "${rt}")" + + for neigh in ${rt_neighs}; do + devname="veth-rt-${rt}-${neigh}" + + net_prefix="$(get_network_prefix "${rt}" "${neigh}")" + + # set underlay network routes for SIDs reachability + ip -netns "${nsname}" -6 route \ + add "${VPN_LOCATOR_SERVICE}:${neigh}::/32" \ + table "${LOCALSID_TABLE_ID}" \ + via "${net_prefix}::${neigh}" dev "${devname}" + done + + # Local End behavior (note that "dev" is dummy and the VRF is chosen + # for the sake of simplicity). + ip -netns "${nsname}" -6 route \ + add "${VPN_LOCATOR_SERVICE}:${rt}::${END_FUNC}" \ + table "${LOCALSID_TABLE_ID}" \ + encap seg6local action End dev "${VRF_DEVNAME}" + + # Local End.DT46 behavior + ip -netns "${nsname}" -6 route \ + add "${VPN_LOCATOR_SERVICE}:${rt}::${DT46_FUNC}" \ + table "${LOCALSID_TABLE_ID}" \ + encap seg6local action End.DT46 vrftable "${VRF_TID}" \ + dev "${VRF_DEVNAME}" + + # all SIDs for VPNs start with a common locator. Routes and SRv6 + # Endpoint behavior instaces are grouped together in the 'localsid' + # table. + ip -netns "${nsname}" -6 rule \ + add to "${VPN_LOCATOR_SERVICE}::/16" \ + lookup "${LOCALSID_TABLE_ID}" prio 999 + + # set default routes to unreachable for both ipv4 and ipv6 + ip -netns "${nsname}" -6 route \ + add unreachable default metric 4278198272 \ + vrf "${VRF_DEVNAME}" + + ip -netns "${nsname}" -4 route \ + add unreachable default metric 4278198272 \ + vrf "${VRF_DEVNAME}" +} + +# build and install the SRv6 policy into the ingress SRv6 router. +# args: +# $1 - destination host (i.e. cafe::x host) +# $2 - SRv6 router configured for enforcing the SRv6 Policy +# $3 - SRv6 routers configured for steering traffic (End behaviors) +# $4 - SRv6 router configured for removing the SRv6 Policy (router connected +# to the destination host) +# $5 - encap mode (full or red) +# $6 - traffic type (IPv6 or IPv4) +__setup_rt_policy() +{ + local dst="$1" + local encap_rt="$2" + local end_rts="$3" + local dec_rt="$4" + local mode="$5" + local traffic="$6" + local nsname + local policy='' + local n + + nsname="$(get_rtname "${encap_rt}")" + + for n in ${end_rts}; do + policy="${policy}${VPN_LOCATOR_SERVICE}:${n}::${END_FUNC}," + done + + policy="${policy}${VPN_LOCATOR_SERVICE}:${dec_rt}::${DT46_FUNC}" + + # add SRv6 policy to incoming traffic sent by connected hosts + if [ "${traffic}" -eq 6 ]; then + ip -netns "${nsname}" -6 route \ + add "${IPv6_HS_NETWORK}::${dst}" vrf "${VRF_DEVNAME}" \ + encap seg6 mode "${mode}" segs "${policy}" \ + dev "${VRF_DEVNAME}" + + ip -netns "${nsname}" -6 neigh \ + add proxy "${IPv6_HS_NETWORK}::${dst}" \ + dev "${RT2HS_DEVNAME}" + else + # "dev" must be different from the one where the packet is + # received, otherwise the proxy arp does not work. + ip -netns "${nsname}" -4 route \ + add "${IPv4_HS_NETWORK}.${dst}" vrf "${VRF_DEVNAME}" \ + encap seg6 mode "${mode}" segs "${policy}" \ + dev "${VRF_DEVNAME}" + fi +} + +# see __setup_rt_policy +setup_rt_policy_ipv6() +{ + __setup_rt_policy "$1" "$2" "$3" "$4" "$5" 6 +} + +#see __setup_rt_policy +setup_rt_policy_ipv4() +{ + __setup_rt_policy "$1" "$2" "$3" "$4" "$5" 4 +} + +setup_hs() +{ + local hs="$1" + local rt="$2" + local hsname + local rtname + + hsname="$(get_hsname "${hs}")" + rtname="$(get_rtname "${rt}")" + + ip netns exec "${hsname}" sysctl -wq net.ipv6.conf.all.accept_dad=0 + ip netns exec "${hsname}" sysctl -wq net.ipv6.conf.default.accept_dad=0 + + ip -netns "${hsname}" link add veth0 type veth \ + peer name "${RT2HS_DEVNAME}" netns "${rtname}" + + ip -netns "${hsname}" addr \ + add "${IPv6_HS_NETWORK}::${hs}/64" dev veth0 nodad + ip -netns "${hsname}" addr add "${IPv4_HS_NETWORK}.${hs}/24" dev veth0 + + ip -netns "${hsname}" link set veth0 up + ip -netns "${hsname}" link set lo up + + # configure the VRF on the router which is directly connected to the + # source host. + ip -netns "${rtname}" link \ + add "${VRF_DEVNAME}" type vrf table "${VRF_TID}" + ip -netns "${rtname}" link set "${VRF_DEVNAME}" up + + # enslave the veth interface connecting the router with the host to the + # VRF in the access router + ip -netns "${rtname}" link \ + set "${RT2HS_DEVNAME}" master "${VRF_DEVNAME}" + + ip -netns "${rtname}" addr \ + add "${IPv6_HS_NETWORK}::254/64" dev "${RT2HS_DEVNAME}" nodad + ip -netns "${rtname}" addr \ + add "${IPv4_HS_NETWORK}.254/24" dev "${RT2HS_DEVNAME}" + + ip -netns "${rtname}" link set "${RT2HS_DEVNAME}" up + + ip netns exec "${rtname}" \ + sysctl -wq net.ipv6.conf."${RT2HS_DEVNAME}".proxy_ndp=1 + ip netns exec "${rtname}" \ + sysctl -wq net.ipv4.conf."${RT2HS_DEVNAME}".proxy_arp=1 + + # disable the rp_filter otherwise the kernel gets confused about how + # to route decap ipv4 packets. + ip netns exec "${rtname}" \ + sysctl -wq net.ipv4.conf."${RT2HS_DEVNAME}".rp_filter=0 + + ip netns exec "${rtname}" sh -c "echo 1 > /proc/sys/net/vrf/strict_mode" +} + +setup() +{ + local i + + # create routers + ROUTERS="1 2 3 4"; readonly ROUTERS + for i in ${ROUTERS}; do + create_router "${i}" + done + + # create hosts + HOSTS="1 2 3 4"; readonly HOSTS + for i in ${HOSTS}; do + create_host "${i}" + done + + # set up the links for connecting routers + add_link_rt_pairs 1 "2 3 4" + add_link_rt_pairs 2 "3 4" + add_link_rt_pairs 3 "4" + + # set up the basic connectivity of routers and routes required for + # reachability of SIDs. + setup_rt_networking 1 "2 3 4" + setup_rt_networking 2 "1 3 4" + setup_rt_networking 3 "1 2 4" + setup_rt_networking 4 "1 2 3" + + # set up the hosts connected to routers + setup_hs 1 1 + setup_hs 2 2 + setup_hs 3 3 + setup_hs 4 4 + + # set up default SRv6 Endpoints (i.e. SRv6 End and SRv6 End.DT46) + setup_rt_local_sids 1 "2 3 4" + setup_rt_local_sids 2 "1 3 4" + setup_rt_local_sids 3 "1 2 4" + setup_rt_local_sids 4 "1 2 3" + + # set up SRv6 policies + + # create an IPv6 VPN between hosts hs-1 and hs-2. + # the network path between hs-1 and hs-2 traverses several routers + # depending on the direction of traffic. + # + # Direction hs-1 -> hs-2 (H.Encaps.Red) + # - rt-3,rt-4 (SRv6 End behaviors) + # - rt-2 (SRv6 End.DT46 behavior) + # + # Direction hs-2 -> hs-1 (H.Encaps.Red) + # - rt-1 (SRv6 End.DT46 behavior) + setup_rt_policy_ipv6 2 1 "3 4" 2 encap.red + setup_rt_policy_ipv6 1 2 "" 1 encap.red + + # create an IPv4 VPN between hosts hs-1 and hs-2 + # the network path between hs-1 and hs-2 traverses several routers + # depending on the direction of traffic. + # + # Direction hs-1 -> hs-2 (H.Encaps.Red) + # - rt-2 (SRv6 End.DT46 behavior) + # + # Direction hs-2 -> hs-1 (H.Encaps.Red) + # - rt-4,rt-3 (SRv6 End behaviors) + # - rt-1 (SRv6 End.DT46 behavior) + setup_rt_policy_ipv4 2 1 "" 2 encap.red + setup_rt_policy_ipv4 1 2 "4 3" 1 encap.red + + # create an IPv6 VPN between hosts hs-3 and hs-4 + # the network path between hs-3 and hs-4 traverses several routers + # depending on the direction of traffic. + # + # Direction hs-3 -> hs-4 (H.Encaps.Red) + # - rt-2 (SRv6 End Behavior) + # - rt-4 (SRv6 End.DT46 behavior) + # + # Direction hs-4 -> hs-3 (H.Encaps.Red) + # - rt-1 (SRv6 End behavior) + # - rt-3 (SRv6 End.DT46 behavior) + setup_rt_policy_ipv6 4 3 "2" 4 encap.red + setup_rt_policy_ipv6 3 4 "1" 3 encap.red + + # testing environment was set up successfully + SETUP_ERR=0 +} + +check_rt_connectivity() +{ + local rtsrc="$1" + local rtdst="$2" + local prefix + local rtsrc_nsname + + rtsrc_nsname="$(get_rtname "${rtsrc}")" + + prefix="$(get_network_prefix "${rtsrc}" "${rtdst}")" + + ip netns exec "${rtsrc_nsname}" ping -c 1 -W "${PING_TIMEOUT_SEC}" \ + "${prefix}::${rtdst}" >/dev/null 2>&1 +} + +check_and_log_rt_connectivity() +{ + local rtsrc="$1" + local rtdst="$2" + + check_rt_connectivity "${rtsrc}" "${rtdst}" + log_test $? 0 "Routers connectivity: rt-${rtsrc} -> rt-${rtdst}" +} + +check_hs_ipv6_connectivity() +{ + local hssrc="$1" + local hsdst="$2" + local hssrc_nsname + + hssrc_nsname="$(get_hsname "${hssrc}")" + + ip netns exec "${hssrc_nsname}" ping -c 1 -W "${PING_TIMEOUT_SEC}" \ + "${IPv6_HS_NETWORK}::${hsdst}" >/dev/null 2>&1 +} + +check_hs_ipv4_connectivity() +{ + local hssrc="$1" + local hsdst="$2" + local hssrc_nsname + + hssrc_nsname="$(get_hsname "${hssrc}")" + + ip netns exec "${hssrc_nsname}" ping -c 1 -W "${PING_TIMEOUT_SEC}" \ + "${IPv4_HS_NETWORK}.${hsdst}" >/dev/null 2>&1 +} + +check_and_log_hs2gw_connectivity() +{ + local hssrc="$1" + + check_hs_ipv6_connectivity "${hssrc}" 254 + log_test $? 0 "IPv6 Hosts connectivity: hs-${hssrc} -> gw" + + check_hs_ipv4_connectivity "${hssrc}" 254 + log_test $? 0 "IPv4 Hosts connectivity: hs-${hssrc} -> gw" +} + +check_and_log_hs_ipv6_connectivity() +{ + local hssrc="$1" + local hsdst="$2" + + check_hs_ipv6_connectivity "${hssrc}" "${hsdst}" + log_test $? 0 "IPv6 Hosts connectivity: hs-${hssrc} -> hs-${hsdst}" +} + +check_and_log_hs_ipv4_connectivity() +{ + local hssrc="$1" + local hsdst="$2" + + check_hs_ipv4_connectivity "${hssrc}" "${hsdst}" + log_test $? 0 "IPv4 Hosts connectivity: hs-${hssrc} -> hs-${hsdst}" +} + +check_and_log_hs_connectivity() +{ + local hssrc="$1" + local hsdst="$2" + + check_and_log_hs_ipv4_connectivity "${hssrc}" "${hsdst}" + check_and_log_hs_ipv6_connectivity "${hssrc}" "${hsdst}" +} + +check_and_log_hs_ipv6_isolation() +{ + local hssrc="$1" + local hsdst="$2" + + # in this case, the connectivity test must fail + check_hs_ipv6_connectivity "${hssrc}" "${hsdst}" + log_test $? 1 "IPv6 Hosts isolation: hs-${hssrc} -X-> hs-${hsdst}" +} + +check_and_log_hs_ipv4_isolation() +{ + local hssrc="$1" + local hsdst="$2" + + # in this case, the connectivity test must fail + check_hs_ipv4_connectivity "${hssrc}" "${hsdst}" + log_test $? 1 "IPv4 Hosts isolation: hs-${hssrc} -X-> hs-${hsdst}" +} + +check_and_log_hs_isolation() +{ + local hssrc="$1" + local hsdst="$2" + + check_and_log_hs_ipv6_isolation "${hssrc}" "${hsdst}" + check_and_log_hs_ipv4_isolation "${hssrc}" "${hsdst}" +} + +router_tests() +{ + local i + local j + + log_section "IPv6 routers connectivity test" + + for i in ${ROUTERS}; do + for j in ${ROUTERS}; do + if [ "${i}" -eq "${j}" ]; then + continue + fi + + check_and_log_rt_connectivity "${i}" "${j}" + done + done +} + +host2gateway_tests() +{ + local hs + + log_section "IPv4/IPv6 connectivity test among hosts and gateways" + + for hs in ${HOSTS}; do + check_and_log_hs2gw_connectivity "${hs}" + done +} + +host_vpn_tests() +{ + log_section "SRv6 VPN connectivity test hosts (h1 <-> h2, IPv4/IPv6)" + + check_and_log_hs_connectivity 1 2 + check_and_log_hs_connectivity 2 1 + + log_section "SRv6 VPN connectivity test hosts (h3 <-> h4, IPv6 only)" + + check_and_log_hs_ipv6_connectivity 3 4 + check_and_log_hs_ipv6_connectivity 4 3 +} + +host_vpn_isolation_tests() +{ + local l1="1 2" + local l2="3 4" + local tmp + local i + local j + local k + + log_section "SRv6 VPN isolation test among hosts" + + for k in 0 1; do + for i in ${l1}; do + for j in ${l2}; do + check_and_log_hs_isolation "${i}" "${j}" + done + done + + # let us test the reverse path + tmp="${l1}"; l1="${l2}"; l2="${tmp}" + done + + log_section "SRv6 VPN isolation test among hosts (h2 <-> h4, IPv4 only)" + + check_and_log_hs_ipv4_isolation 2 4 + check_and_log_hs_ipv4_isolation 4 2 +} + +test_iproute2_supp_or_ksft_skip() +{ + if ! ip route help 2>&1 | grep -qo "encap.red"; then + echo "SKIP: Missing SRv6 encap.red support in iproute2" + exit "${ksft_skip}" + fi +} + +test_vrf_or_ksft_skip() +{ + modprobe vrf &>/dev/null || true + if [ ! -e /proc/sys/net/vrf/strict_mode ]; then + echo "SKIP: vrf sysctl does not exist" + exit "${ksft_skip}" + fi +} + +if [ "$(id -u)" -ne 0 ]; then + echo "SKIP: Need root privileges" + exit "${ksft_skip}" +fi + +# required programs to carry out this selftest +test_command_or_ksft_skip ip +test_command_or_ksft_skip ping +test_command_or_ksft_skip sysctl +test_command_or_ksft_skip grep + +test_iproute2_supp_or_ksft_skip +test_vrf_or_ksft_skip + +set -e +trap cleanup EXIT + +setup +set +e + +router_tests +host2gateway_tests +host_vpn_tests +host_vpn_isolation_tests + +print_log_test_results diff --git a/tools/testing/selftests/net/srv6_hl2encap_red_l2vpn_test.sh b/tools/testing/selftests/net/srv6_hl2encap_red_l2vpn_test.sh new file mode 100755 index 000000000000..cb4177d41b21 --- /dev/null +++ b/tools/testing/selftests/net/srv6_hl2encap_red_l2vpn_test.sh @@ -0,0 +1,821 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 +# +# author: Andrea Mayer <andrea.mayer@uniroma2.it> +# +# This script is designed for testing the SRv6 H.L2Encaps.Red behavior. +# +# Below is depicted the IPv6 network of an operator which offers L2 VPN +# services to hosts, enabling them to communicate with each other. +# In this example, hosts hs-1 and hs-2 are connected through an L2 VPN service. +# Currently, the SRv6 subsystem in Linux allows hosts hs-1 and hs-2 to exchange +# full L2 frames as long as they carry IPv4/IPv6. +# +# Routers rt-1,rt-2,rt-3 and rt-4 implement L2 VPN services +# leveraging the SRv6 architecture. The key components for such VPNs are: +# +# i) The SRv6 H.L2Encaps.Red behavior applies SRv6 Policies on traffic +# received by connected hosts, initiating the VPN tunnel. Such a behavior +# is an optimization of the SRv6 H.L2Encap aiming to reduce the +# length of the SID List carried in the pushed SRH. Specifically, the +# H.L2Encaps.Red removes the first SID contained in the SID List (i.e. SRv6 +# Policy) by storing it into the IPv6 Destination Address. When a SRv6 +# Policy is made of only one SID, the SRv6 H.L2Encaps.Red behavior omits +# the SRH at all and pushes that SID directly into the IPv6 DA; +# +# ii) The SRv6 End behavior advances the active SID in the SID List +# carried by the SRH; +# +# iii) The SRv6 End.DX2 behavior is used for removing the SRv6 Policy +# and, thus, it terminates the VPN tunnel. The decapsulated L2 frame is +# sent over the interface connected with the destination host. +# +# cafe::1 cafe::2 +# 10.0.0.1 10.0.0.2 +# +--------+ +--------+ +# | | | | +# | hs-1 | | hs-2 | +# | | | | +# +---+----+ +--- +---+ +# cafe::/64 | | cafe::/64 +# 10.0.0.0/24 | | 10.0.0.0/24 +# +---+----+ +----+---+ +# | | fcf0:0:1:2::/64 | | +# | rt-1 +-------------------+ rt-2 | +# | | | | +# +---+----+ +----+---+ +# | . . | +# | fcf0:0:1:3::/64 . | +# | . . | +# | . . | +# fcf0:0:1:4::/64 | . | fcf0:0:2:3::/64 +# | . . | +# | . . | +# | fcf0:0:2:4::/64 . | +# | . . | +# +---+----+ +----+---+ +# | | | | +# | rt-4 +-------------------+ rt-3 | +# | | fcf0:0:3:4::/64 | | +# +---+----+ +----+---+ +# +# +# Every fcf0:0:x:y::/64 network interconnects the SRv6 routers rt-x with rt-y +# in the IPv6 operator network. +# +# Local SID table +# =============== +# +# Each SRv6 router is configured with a Local SID table in which SIDs are +# stored. Considering the given SRv6 router rt-x, at least two SIDs are +# configured in the Local SID table: +# +# Local SID table for SRv6 router rt-x +# +----------------------------------------------------------+ +# |fcff:x::e is associated with the SRv6 End behavior | +# |fcff:x::d2 is associated with the SRv6 End.DX2 behavior | +# +----------------------------------------------------------+ +# +# The fcff::/16 prefix is reserved by the operator for implementing SRv6 VPN +# services. Reachability of SIDs is ensured by proper configuration of the IPv6 +# operator's network and SRv6 routers. +# +# SRv6 Policies +# ============= +# +# An SRv6 ingress router applies SRv6 policies to the traffic received from a +# connected host. SRv6 policy enforcement consists of encapsulating the +# received traffic into a new IPv6 packet with a given SID List contained in +# the SRH. +# +# L2 VPN between hs-1 and hs-2 +# ---------------------------- +# +# Hosts hs-1 and hs-2 are connected using a dedicated L2 VPN. +# Specifically, packets generated from hs-1 and directed towards hs-2 are +# handled by rt-1 which applies the following SRv6 Policies: +# +# i.a) L2 traffic, SID List=fcff:2::d2 +# +# Policy (i.a) steers tunneled L2 traffic through SRv6 router rt-2. +# The H.L2Encaps.Red omits the presence of SRH at all, since the SID List +# consists of only one SID (fcff:2::d2) that can be stored directly in the IPv6 +# DA. +# +# On the reverse path (i.e. from hs-2 to hs-1), rt-2 applies the following +# policies: +# +# i.b) L2 traffic, SID List=fcff:4::e,fcff:3::e,fcff:1::d2 +# +# Policy (i.b) steers tunneled L2 traffic through the SRv6 routers +# rt-4,rt-3,rt2. The H.L2Encaps.Red reduces the SID List in the SRH by removing +# the first SID (fcff:4::e) and pushing it into the IPv6 DA. +# +# In summary: +# hs-1->hs-2 |IPv6 DA=fcff:2::d2|eth|...| (i.a) +# hs-2->hs-1 |IPv6 DA=fcff:4::e|SRH SIDs=fcff:3::e,fcff:1::d2|eth|...| (i.b) +# + +# Kselftest framework requirement - SKIP code is 4. +readonly ksft_skip=4 + +readonly RDMSUFF="$(mktemp -u XXXXXXXX)" +readonly DUMMY_DEVNAME="dum0" +readonly RT2HS_DEVNAME="veth-hs" +readonly HS_VETH_NAME="veth0" +readonly LOCALSID_TABLE_ID=90 +readonly IPv6_RT_NETWORK=fcf0:0 +readonly IPv6_HS_NETWORK=cafe +readonly IPv4_HS_NETWORK=10.0.0 +readonly VPN_LOCATOR_SERVICE=fcff +readonly MAC_PREFIX=00:00:00:c0:01 +readonly END_FUNC=000e +readonly DX2_FUNC=00d2 + +PING_TIMEOUT_SEC=4 +PAUSE_ON_FAIL=${PAUSE_ON_FAIL:=no} + +# IDs of routers and hosts are initialized during the setup of the testing +# network +ROUTERS='' +HOSTS='' + +SETUP_ERR=1 + +ret=${ksft_skip} +nsuccess=0 +nfail=0 + +log_test() +{ + local rc="$1" + local expected="$2" + local msg="$3" + + if [ "${rc}" -eq "${expected}" ]; then + nsuccess=$((nsuccess+1)) + printf "\n TEST: %-60s [ OK ]\n" "${msg}" + else + ret=1 + nfail=$((nfail+1)) + printf "\n TEST: %-60s [FAIL]\n" "${msg}" + if [ "${PAUSE_ON_FAIL}" = "yes" ]; then + echo + echo "hit enter to continue, 'q' to quit" + read a + [ "$a" = "q" ] && exit 1 + fi + fi +} + +print_log_test_results() +{ + printf "\nTests passed: %3d\n" "${nsuccess}" + printf "Tests failed: %3d\n" "${nfail}" + + # when a test fails, the value of 'ret' is set to 1 (error code). + # Conversely, when all tests are passed successfully, the 'ret' value + # is set to 0 (success code). + if [ "${ret}" -ne 1 ]; then + ret=0 + fi +} + +log_section() +{ + echo + echo "################################################################################" + echo "TEST SECTION: $*" + echo "################################################################################" +} + +test_command_or_ksft_skip() +{ + local cmd="$1" + + if [ ! -x "$(command -v "${cmd}")" ]; then + echo "SKIP: Could not run test without \"${cmd}\" tool"; + exit "${ksft_skip}" + fi +} + +get_nodename() +{ + local name="$1" + + echo "${name}-${RDMSUFF}" +} + +get_rtname() +{ + local rtid="$1" + + get_nodename "rt-${rtid}" +} + +get_hsname() +{ + local hsid="$1" + + get_nodename "hs-${hsid}" +} + +__create_namespace() +{ + local name="$1" + + ip netns add "${name}" +} + +create_router() +{ + local rtid="$1" + local nsname + + nsname="$(get_rtname "${rtid}")" + + __create_namespace "${nsname}" +} + +create_host() +{ + local hsid="$1" + local nsname + + nsname="$(get_hsname "${hsid}")" + + __create_namespace "${nsname}" +} + +cleanup() +{ + local nsname + local i + + # destroy routers + for i in ${ROUTERS}; do + nsname="$(get_rtname "${i}")" + + ip netns del "${nsname}" &>/dev/null || true + done + + # destroy hosts + for i in ${HOSTS}; do + nsname="$(get_hsname "${i}")" + + ip netns del "${nsname}" &>/dev/null || true + done + + # check whether the setup phase was completed successfully or not. In + # case of an error during the setup phase of the testing environment, + # the selftest is considered as "skipped". + if [ "${SETUP_ERR}" -ne 0 ]; then + echo "SKIP: Setting up the testing environment failed" + exit "${ksft_skip}" + fi + + exit "${ret}" +} + +add_link_rt_pairs() +{ + local rt="$1" + local rt_neighs="$2" + local neigh + local nsname + local neigh_nsname + + nsname="$(get_rtname "${rt}")" + + for neigh in ${rt_neighs}; do + neigh_nsname="$(get_rtname "${neigh}")" + + ip link add "veth-rt-${rt}-${neigh}" netns "${nsname}" \ + type veth peer name "veth-rt-${neigh}-${rt}" \ + netns "${neigh_nsname}" + done +} + +get_network_prefix() +{ + local rt="$1" + local neigh="$2" + local p="${rt}" + local q="${neigh}" + + if [ "${p}" -gt "${q}" ]; then + p="${q}"; q="${rt}" + fi + + echo "${IPv6_RT_NETWORK}:${p}:${q}" +} + +# Setup the basic networking for the routers +setup_rt_networking() +{ + local rt="$1" + local rt_neighs="$2" + local nsname + local net_prefix + local devname + local neigh + + nsname="$(get_rtname "${rt}")" + + for neigh in ${rt_neighs}; do + devname="veth-rt-${rt}-${neigh}" + + net_prefix="$(get_network_prefix "${rt}" "${neigh}")" + + ip -netns "${nsname}" addr \ + add "${net_prefix}::${rt}/64" dev "${devname}" nodad + + ip -netns "${nsname}" link set "${devname}" up + done + + ip -netns "${nsname}" link add "${DUMMY_DEVNAME}" type dummy + + ip -netns "${nsname}" link set "${DUMMY_DEVNAME}" up + ip -netns "${nsname}" link set lo up + + ip netns exec "${nsname}" sysctl -wq net.ipv6.conf.all.accept_dad=0 + ip netns exec "${nsname}" sysctl -wq net.ipv6.conf.default.accept_dad=0 + ip netns exec "${nsname}" sysctl -wq net.ipv6.conf.all.forwarding=1 + + ip netns exec "${nsname}" sysctl -wq net.ipv4.conf.all.rp_filter=0 + ip netns exec "${nsname}" sysctl -wq net.ipv4.conf.default.rp_filter=0 + ip netns exec "${nsname}" sysctl -wq net.ipv4.ip_forward=1 +} + +# Setup local SIDs for an SRv6 router +setup_rt_local_sids() +{ + local rt="$1" + local rt_neighs="$2" + local net_prefix + local devname + local nsname + local neigh + + nsname="$(get_rtname "${rt}")" + + for neigh in ${rt_neighs}; do + devname="veth-rt-${rt}-${neigh}" + + net_prefix="$(get_network_prefix "${rt}" "${neigh}")" + + # set underlay network routes for SIDs reachability + ip -netns "${nsname}" -6 route \ + add "${VPN_LOCATOR_SERVICE}:${neigh}::/32" \ + table "${LOCALSID_TABLE_ID}" \ + via "${net_prefix}::${neigh}" dev "${devname}" + done + + # Local End behavior (note that dev "${DUMMY_DEVNAME}" is a dummy + # interface) + ip -netns "${nsname}" -6 route \ + add "${VPN_LOCATOR_SERVICE}:${rt}::${END_FUNC}" \ + table "${LOCALSID_TABLE_ID}" \ + encap seg6local action End dev "${DUMMY_DEVNAME}" + + # all SIDs for VPNs start with a common locator. Routes and SRv6 + # Endpoint behaviors instaces are grouped together in the 'localsid' + # table. + ip -netns "${nsname}" -6 rule add \ + to "${VPN_LOCATOR_SERVICE}::/16" \ + lookup "${LOCALSID_TABLE_ID}" prio 999 +} + +# build and install the SRv6 policy into the ingress SRv6 router. +# args: +# $1 - destination host (i.e. cafe::x host) +# $2 - SRv6 router configured for enforcing the SRv6 Policy +# $3 - SRv6 routers configured for steering traffic (End behaviors) +# $4 - SRv6 router configured for removing the SRv6 Policy (router connected +# to the destination host) +# $5 - encap mode (full or red) +# $6 - traffic type (IPv6 or IPv4) +__setup_rt_policy() +{ + local dst="$1" + local encap_rt="$2" + local end_rts="$3" + local dec_rt="$4" + local mode="$5" + local traffic="$6" + local nsname + local policy='' + local n + + nsname="$(get_rtname "${encap_rt}")" + + for n in ${end_rts}; do + policy="${policy}${VPN_LOCATOR_SERVICE}:${n}::${END_FUNC}," + done + + policy="${policy}${VPN_LOCATOR_SERVICE}:${dec_rt}::${DX2_FUNC}" + + # add SRv6 policy to incoming traffic sent by connected hosts + if [ "${traffic}" -eq 6 ]; then + ip -netns "${nsname}" -6 route \ + add "${IPv6_HS_NETWORK}::${dst}" \ + encap seg6 mode "${mode}" segs "${policy}" \ + dev dum0 + else + ip -netns "${nsname}" -4 route \ + add "${IPv4_HS_NETWORK}.${dst}" \ + encap seg6 mode "${mode}" segs "${policy}" \ + dev dum0 + fi +} + +# see __setup_rt_policy +setup_rt_policy_ipv6() +{ + __setup_rt_policy "$1" "$2" "$3" "$4" "$5" 6 +} + +#see __setup_rt_policy +setup_rt_policy_ipv4() +{ + __setup_rt_policy "$1" "$2" "$3" "$4" "$5" 4 +} + +setup_decap() +{ + local rt="$1" + local nsname + + nsname="$(get_rtname "${rt}")" + + # Local End.DX2 behavior + ip -netns "${nsname}" -6 route \ + add "${VPN_LOCATOR_SERVICE}:${rt}::${DX2_FUNC}" \ + table "${LOCALSID_TABLE_ID}" \ + encap seg6local action End.DX2 oif "${RT2HS_DEVNAME}" \ + dev "${RT2HS_DEVNAME}" +} + +setup_hs() +{ + local hs="$1" + local rt="$2" + local hsname + local rtname + + hsname="$(get_hsname "${hs}")" + rtname="$(get_rtname "${rt}")" + + ip netns exec "${hsname}" sysctl -wq net.ipv6.conf.all.accept_dad=0 + ip netns exec "${hsname}" sysctl -wq net.ipv6.conf.default.accept_dad=0 + + ip -netns "${hsname}" link add "${HS_VETH_NAME}" type veth \ + peer name "${RT2HS_DEVNAME}" netns "${rtname}" + + ip -netns "${hsname}" addr add "${IPv6_HS_NETWORK}::${hs}/64" \ + dev "${HS_VETH_NAME}" nodad + ip -netns "${hsname}" addr add "${IPv4_HS_NETWORK}.${hs}/24" \ + dev "${HS_VETH_NAME}" + + ip -netns "${hsname}" link set "${HS_VETH_NAME}" up + ip -netns "${hsname}" link set lo up + + ip -netns "${rtname}" addr add "${IPv6_HS_NETWORK}::254/64" \ + dev "${RT2HS_DEVNAME}" nodad + ip -netns "${rtname}" addr \ + add "${IPv4_HS_NETWORK}.254/24" dev "${RT2HS_DEVNAME}" + + ip -netns "${rtname}" link set "${RT2HS_DEVNAME}" up + + # disable the rp_filter otherwise the kernel gets confused about how + # to route decap ipv4 packets. + ip netns exec "${rtname}" \ + sysctl -wq net.ipv4.conf."${RT2HS_DEVNAME}".rp_filter=0 +} + +# set an auto-generated mac address +# args: +# $1 - name of the node (e.g.: hs-1, rt-3, etc) +# $2 - id of the node (e.g.: 1 for hs-1, 3 for rt-3, etc) +# $3 - host part of the IPv6 network address +# $4 - name of the network interface to which the generated mac address must +# be set. +set_mac_address() +{ + local nodename="$1" + local nodeid="$2" + local host="$3" + local ifname="$4" + local nsname + + nsname=$(get_nodename "${nodename}") + + ip -netns "${nsname}" link set dev "${ifname}" down + + ip -netns "${nsname}" link set address "${MAC_PREFIX}:${nodeid}" \ + dev "${ifname}" + + # the IPv6 address must be set once again after the MAC address has + # been changed. + ip -netns "${nsname}" addr add "${IPv6_HS_NETWORK}::${host}/64" \ + dev "${ifname}" nodad + + ip -netns "${nsname}" link set dev "${ifname}" up +} + +set_host_l2peer() +{ + local hssrc="$1" + local hsdst="$2" + local ipprefix="$3" + local proto="$4" + local hssrc_name + local ipaddr + + hssrc_name="$(get_hsname "${hssrc}")" + + if [ "${proto}" -eq 6 ]; then + ipaddr="${ipprefix}::${hsdst}" + else + ipaddr="${ipprefix}.${hsdst}" + fi + + ip -netns "${hssrc_name}" route add "${ipaddr}" dev "${HS_VETH_NAME}" + + ip -netns "${hssrc_name}" neigh \ + add "${ipaddr}" lladdr "${MAC_PREFIX}:${hsdst}" \ + dev "${HS_VETH_NAME}" +} + +# setup an SRv6 L2 VPN between host hs-x and hs-y (currently, the SRv6 +# subsystem only supports L2 frames whose layer-3 is IPv4/IPv6). +# args: +# $1 - source host +# $2 - SRv6 routers configured for steering tunneled traffic +# $3 - destination host +setup_l2vpn() +{ + local hssrc="$1" + local end_rts="$2" + local hsdst="$3" + local rtsrc="${hssrc}" + local rtdst="${hsdst}" + + # set fixed mac for source node and the neigh MAC address + set_mac_address "hs-${hssrc}" "${hssrc}" "${hssrc}" "${HS_VETH_NAME}" + set_host_l2peer "${hssrc}" "${hsdst}" "${IPv6_HS_NETWORK}" 6 + set_host_l2peer "${hssrc}" "${hsdst}" "${IPv4_HS_NETWORK}" 4 + + # we have to set the mac address of the veth-host (on ingress router) + # to the mac address of the remote peer (L2 VPN destination host). + # Otherwise, traffic coming from the source host is dropped at the + # ingress router. + set_mac_address "rt-${rtsrc}" "${hsdst}" 254 "${RT2HS_DEVNAME}" + + # set the SRv6 Policies at the ingress router + setup_rt_policy_ipv6 "${hsdst}" "${rtsrc}" "${end_rts}" "${rtdst}" \ + l2encap.red 6 + setup_rt_policy_ipv4 "${hsdst}" "${rtsrc}" "${end_rts}" "${rtdst}" \ + l2encap.red 4 + + # set the decap behavior + setup_decap "${rtsrc}" +} + +setup() +{ + local i + + # create routers + ROUTERS="1 2 3 4"; readonly ROUTERS + for i in ${ROUTERS}; do + create_router "${i}" + done + + # create hosts + HOSTS="1 2"; readonly HOSTS + for i in ${HOSTS}; do + create_host "${i}" + done + + # set up the links for connecting routers + add_link_rt_pairs 1 "2 3 4" + add_link_rt_pairs 2 "3 4" + add_link_rt_pairs 3 "4" + + # set up the basic connectivity of routers and routes required for + # reachability of SIDs. + setup_rt_networking 1 "2 3 4" + setup_rt_networking 2 "1 3 4" + setup_rt_networking 3 "1 2 4" + setup_rt_networking 4 "1 2 3" + + # set up the hosts connected to routers + setup_hs 1 1 + setup_hs 2 2 + + # set up default SRv6 Endpoints (i.e. SRv6 End and SRv6 End.DX2) + setup_rt_local_sids 1 "2 3 4" + setup_rt_local_sids 2 "1 3 4" + setup_rt_local_sids 3 "1 2 4" + setup_rt_local_sids 4 "1 2 3" + + # create a L2 VPN between hs-1 and hs-2. + # NB: currently, H.L2Encap* enables tunneling of L2 frames whose + # layer-3 is IPv4/IPv6. + # + # the network path between hs-1 and hs-2 traverses several routers + # depending on the direction of traffic. + # + # Direction hs-1 -> hs-2 (H.L2Encaps.Red) + # - rt-2 (SRv6 End.DX2 behavior) + # + # Direction hs-2 -> hs-1 (H.L2Encaps.Red) + # - rt-4,rt-3 (SRv6 End behaviors) + # - rt-1 (SRv6 End.DX2 behavior) + setup_l2vpn 1 "" 2 + setup_l2vpn 2 "4 3" 1 + + # testing environment was set up successfully + SETUP_ERR=0 +} + +check_rt_connectivity() +{ + local rtsrc="$1" + local rtdst="$2" + local prefix + local rtsrc_nsname + + rtsrc_nsname="$(get_rtname "${rtsrc}")" + + prefix="$(get_network_prefix "${rtsrc}" "${rtdst}")" + + ip netns exec "${rtsrc_nsname}" ping -c 1 -W "${PING_TIMEOUT_SEC}" \ + "${prefix}::${rtdst}" >/dev/null 2>&1 +} + +check_and_log_rt_connectivity() +{ + local rtsrc="$1" + local rtdst="$2" + + check_rt_connectivity "${rtsrc}" "${rtdst}" + log_test $? 0 "Routers connectivity: rt-${rtsrc} -> rt-${rtdst}" +} + +check_hs_ipv6_connectivity() +{ + local hssrc="$1" + local hsdst="$2" + local hssrc_nsname + + hssrc_nsname="$(get_hsname "${hssrc}")" + + ip netns exec "${hssrc_nsname}" ping -c 1 -W "${PING_TIMEOUT_SEC}" \ + "${IPv6_HS_NETWORK}::${hsdst}" >/dev/null 2>&1 +} + +check_hs_ipv4_connectivity() +{ + local hssrc="$1" + local hsdst="$2" + local hssrc_nsname + + hssrc_nsname="$(get_hsname "${hssrc}")" + + ip netns exec "${hssrc_nsname}" ping -c 1 -W "${PING_TIMEOUT_SEC}" \ + "${IPv4_HS_NETWORK}.${hsdst}" >/dev/null 2>&1 +} + +check_and_log_hs2gw_connectivity() +{ + local hssrc="$1" + + check_hs_ipv6_connectivity "${hssrc}" 254 + log_test $? 0 "IPv6 Hosts connectivity: hs-${hssrc} -> gw" + + check_hs_ipv4_connectivity "${hssrc}" 254 + log_test $? 0 "IPv4 Hosts connectivity: hs-${hssrc} -> gw" +} + +check_and_log_hs_ipv6_connectivity() +{ + local hssrc="$1" + local hsdst="$2" + + check_hs_ipv6_connectivity "${hssrc}" "${hsdst}" + log_test $? 0 "IPv6 Hosts connectivity: hs-${hssrc} -> hs-${hsdst}" +} + +check_and_log_hs_ipv4_connectivity() +{ + local hssrc="$1" + local hsdst="$2" + + check_hs_ipv4_connectivity "${hssrc}" "${hsdst}" + log_test $? 0 "IPv4 Hosts connectivity: hs-${hssrc} -> hs-${hsdst}" +} + +check_and_log_hs_connectivity() +{ + local hssrc="$1" + local hsdst="$2" + + check_and_log_hs_ipv4_connectivity "${hssrc}" "${hsdst}" + check_and_log_hs_ipv6_connectivity "${hssrc}" "${hsdst}" +} + +router_tests() +{ + local i + local j + + log_section "IPv6 routers connectivity test" + + for i in ${ROUTERS}; do + for j in ${ROUTERS}; do + if [ "${i}" -eq "${j}" ]; then + continue + fi + + check_and_log_rt_connectivity "${i}" "${j}" + done + done +} + +host2gateway_tests() +{ + local hs + + log_section "IPv4/IPv6 connectivity test among hosts and gateways" + + for hs in ${HOSTS}; do + check_and_log_hs2gw_connectivity "${hs}" + done +} + +host_vpn_tests() +{ + log_section "SRv6 L2 VPN connectivity test hosts (h1 <-> h2)" + + check_and_log_hs_connectivity 1 2 + check_and_log_hs_connectivity 2 1 +} + +test_dummy_dev_or_ksft_skip() +{ + local test_netns + + test_netns="dummy-$(mktemp -u XXXXXXXX)" + + if ! ip netns add "${test_netns}"; then + echo "SKIP: Cannot set up netns for testing dummy dev support" + exit "${ksft_skip}" + fi + + modprobe dummy &>/dev/null || true + if ! ip -netns "${test_netns}" link \ + add "${DUMMY_DEVNAME}" type dummy; then + echo "SKIP: dummy dev not supported" + + ip netns del "${test_netns}" + exit "${ksft_skip}" + fi + + ip netns del "${test_netns}" +} + +test_iproute2_supp_or_ksft_skip() +{ + if ! ip route help 2>&1 | grep -qo "l2encap.red"; then + echo "SKIP: Missing SRv6 l2encap.red support in iproute2" + exit "${ksft_skip}" + fi +} + +if [ "$(id -u)" -ne 0 ]; then + echo "SKIP: Need root privileges" + exit "${ksft_skip}" +fi + +# required programs to carry out this selftest +test_command_or_ksft_skip ip +test_command_or_ksft_skip ping +test_command_or_ksft_skip sysctl +test_command_or_ksft_skip grep + +test_iproute2_supp_or_ksft_skip +test_dummy_dev_or_ksft_skip + +set -e +trap cleanup EXIT + +setup +set +e + +router_tests +host2gateway_tests +host_vpn_tests + +print_log_test_results diff --git a/tools/testing/selftests/net/tls.c b/tools/testing/selftests/net/tls.c index 5d70b04c482c..2cbb12736596 100644 --- a/tools/testing/selftests/net/tls.c +++ b/tools/testing/selftests/net/tls.c @@ -235,6 +235,7 @@ FIXTURE_VARIANT(tls) { uint16_t tls_version; uint16_t cipher_type; + bool nopad; }; FIXTURE_VARIANT_ADD(tls, 12_aes_gcm) @@ -297,9 +298,17 @@ FIXTURE_VARIANT_ADD(tls, 13_aes_gcm_256) .cipher_type = TLS_CIPHER_AES_GCM_256, }; +FIXTURE_VARIANT_ADD(tls, 13_nopad) +{ + .tls_version = TLS_1_3_VERSION, + .cipher_type = TLS_CIPHER_AES_GCM_128, + .nopad = true, +}; + FIXTURE_SETUP(tls) { struct tls_crypto_info_keys tls12; + int one = 1; int ret; tls_crypto_info_init(variant->tls_version, variant->cipher_type, @@ -315,6 +324,12 @@ FIXTURE_SETUP(tls) ret = setsockopt(self->cfd, SOL_TLS, TLS_RX, &tls12, tls12.len); ASSERT_EQ(ret, 0); + + if (variant->nopad) { + ret = setsockopt(self->cfd, SOL_TLS, TLS_RX_EXPECT_NO_PAD, + (void *)&one, sizeof(one)); + ASSERT_EQ(ret, 0); + } } FIXTURE_TEARDOWN(tls) @@ -629,12 +644,14 @@ TEST_F(tls, splice_from_pipe2) int p2[2]; int p[2]; + memrnd(mem_send, sizeof(mem_send)); + ASSERT_GE(pipe(p), 0); ASSERT_GE(pipe(p2), 0); - EXPECT_GE(write(p[1], mem_send, 8000), 0); - EXPECT_GE(splice(p[0], NULL, self->fd, NULL, 8000, 0), 0); - EXPECT_GE(write(p2[1], mem_send + 8000, 8000), 0); - EXPECT_GE(splice(p2[0], NULL, self->fd, NULL, 8000, 0), 0); + EXPECT_EQ(write(p[1], mem_send, 8000), 8000); + EXPECT_EQ(splice(p[0], NULL, self->fd, NULL, 8000, 0), 8000); + EXPECT_EQ(write(p2[1], mem_send + 8000, 8000), 8000); + EXPECT_EQ(splice(p2[0], NULL, self->fd, NULL, 8000, 0), 8000); EXPECT_EQ(recv(self->cfd, mem_recv, send_len, MSG_WAITALL), send_len); EXPECT_EQ(memcmp(mem_send, mem_recv, send_len), 0); } @@ -668,10 +685,12 @@ TEST_F(tls, splice_to_pipe) char mem_recv[TLS_PAYLOAD_MAX_LEN]; int p[2]; + memrnd(mem_send, sizeof(mem_send)); + ASSERT_GE(pipe(p), 0); - EXPECT_GE(send(self->fd, mem_send, send_len, 0), 0); - EXPECT_GE(splice(self->cfd, NULL, p[1], NULL, send_len, 0), 0); - EXPECT_GE(read(p[0], mem_recv, send_len), 0); + EXPECT_EQ(send(self->fd, mem_send, send_len, 0), send_len); + EXPECT_EQ(splice(self->cfd, NULL, p[1], NULL, send_len, 0), send_len); + EXPECT_EQ(read(p[0], mem_recv, send_len), send_len); EXPECT_EQ(memcmp(mem_send, mem_recv, send_len), 0); } @@ -860,6 +879,8 @@ TEST_F(tls, multiple_send_single_recv) char recv_mem[2 * 10]; char send_mem[10]; + memrnd(send_mem, sizeof(send_mem)); + EXPECT_GE(send(self->fd, send_mem, send_len, 0), 0); EXPECT_GE(send(self->fd, send_mem, send_len, 0), 0); memset(recv_mem, 0, total_len); @@ -876,6 +897,8 @@ TEST_F(tls, single_send_multiple_recv_non_align) char recv_mem[recv_len * 2]; char send_mem[total_len]; + memrnd(send_mem, sizeof(send_mem)); + EXPECT_GE(send(self->fd, send_mem, total_len, 0), 0); memset(recv_mem, 0, total_len); @@ -921,10 +944,10 @@ TEST_F(tls, recv_peek) char buf[15]; EXPECT_EQ(send(self->fd, test_str, send_len, 0), send_len); - EXPECT_NE(recv(self->cfd, buf, send_len, MSG_PEEK), -1); + EXPECT_EQ(recv(self->cfd, buf, send_len, MSG_PEEK), send_len); EXPECT_EQ(memcmp(test_str, buf, send_len), 0); memset(buf, 0, sizeof(buf)); - EXPECT_NE(recv(self->cfd, buf, send_len, 0), -1); + EXPECT_EQ(recv(self->cfd, buf, send_len, 0), send_len); EXPECT_EQ(memcmp(test_str, buf, send_len), 0); } @@ -1582,6 +1605,38 @@ TEST_F(tls_err, bad_cmsg) EXPECT_EQ(errno, EBADMSG); } +TEST_F(tls_err, timeo) +{ + struct timeval tv = { .tv_usec = 10000, }; + char buf[128]; + int ret; + + if (self->notls) + SKIP(return, "no TLS support"); + + ret = setsockopt(self->cfd2, SOL_SOCKET, SO_RCVTIMEO, &tv, sizeof(tv)); + ASSERT_EQ(ret, 0); + + ret = fork(); + ASSERT_GE(ret, 0); + + if (ret) { + usleep(1000); /* Give child a head start */ + + EXPECT_EQ(recv(self->cfd2, buf, sizeof(buf), 0), -1); + EXPECT_EQ(errno, EAGAIN); + + EXPECT_EQ(recv(self->cfd2, buf, sizeof(buf), 0), -1); + EXPECT_EQ(errno, EAGAIN); + + wait(&ret); + } else { + EXPECT_EQ(recv(self->cfd2, buf, sizeof(buf), 0), -1); + EXPECT_EQ(errno, EAGAIN); + exit(0); + } +} + TEST(non_established) { struct tls12_crypto_info_aes_gcm_256 tls12; struct sockaddr_in addr; @@ -1659,6 +1714,57 @@ TEST(keysizes) { close(cfd); } +TEST(no_pad) { + struct tls12_crypto_info_aes_gcm_256 tls12; + int ret, fd, cfd, val; + socklen_t len; + bool notls; + + memset(&tls12, 0, sizeof(tls12)); + tls12.info.version = TLS_1_3_VERSION; + tls12.info.cipher_type = TLS_CIPHER_AES_GCM_256; + + ulp_sock_pair(_metadata, &fd, &cfd, ¬ls); + + if (notls) + exit(KSFT_SKIP); + + ret = setsockopt(fd, SOL_TLS, TLS_TX, &tls12, sizeof(tls12)); + EXPECT_EQ(ret, 0); + + ret = setsockopt(cfd, SOL_TLS, TLS_RX, &tls12, sizeof(tls12)); + EXPECT_EQ(ret, 0); + + val = 1; + ret = setsockopt(cfd, SOL_TLS, TLS_RX_EXPECT_NO_PAD, + (void *)&val, sizeof(val)); + EXPECT_EQ(ret, 0); + + len = sizeof(val); + val = 2; + ret = getsockopt(cfd, SOL_TLS, TLS_RX_EXPECT_NO_PAD, + (void *)&val, &len); + EXPECT_EQ(ret, 0); + EXPECT_EQ(val, 1); + EXPECT_EQ(len, 4); + + val = 0; + ret = setsockopt(cfd, SOL_TLS, TLS_RX_EXPECT_NO_PAD, + (void *)&val, sizeof(val)); + EXPECT_EQ(ret, 0); + + len = sizeof(val); + val = 2; + ret = getsockopt(cfd, SOL_TLS, TLS_RX_EXPECT_NO_PAD, + (void *)&val, &len); + EXPECT_EQ(ret, 0); + EXPECT_EQ(val, 0); + EXPECT_EQ(len, 4); + + close(fd); + close(cfd); +} + TEST(tls_v6ops) { struct tls_crypto_info_keys tls12; struct sockaddr_in6 addr, addr2; diff --git a/tools/testing/selftests/tc-testing/.gitignore b/tools/testing/selftests/tc-testing/.gitignore index d52f65de23b4..9fe1cef72728 100644 --- a/tools/testing/selftests/tc-testing/.gitignore +++ b/tools/testing/selftests/tc-testing/.gitignore @@ -1,7 +1,6 @@ # SPDX-License-Identifier: GPL-2.0-only __pycache__/ *.pyc -plugins/ *.xml *.tap tdc_config_local.py diff --git a/tools/testing/selftests/wireguard/qemu/Makefile b/tools/testing/selftests/wireguard/qemu/Makefile index 9700358e4337..fda76282d34b 100644 --- a/tools/testing/selftests/wireguard/qemu/Makefile +++ b/tools/testing/selftests/wireguard/qemu/Makefile @@ -248,8 +248,13 @@ QEMU_MACHINE := -cpu host,accel=kvm -machine s390-ccw-virtio -append $(KERNEL_CM else QEMU_MACHINE := -cpu max -machine s390-ccw-virtio -append $(KERNEL_CMDLINE) endif +else ifeq ($(ARCH),um) +CHOST := $(HOST_ARCH)-linux-musl +KERNEL_BZIMAGE := $(KERNEL_BUILD_PATH)/vmlinux +KERNEL_ARCH := um +KERNEL_CMDLINE := $(shell sed -n 's/CONFIG_CMDLINE=\(.*\)/\1/p' arch/um.config) else -$(error I only build: x86_64, i686, arm, armeb, aarch64, aarch64_be, mips, mipsel, mips64, mips64el, powerpc64, powerpc64le, powerpc, m68k, riscv64, riscv32, s390x) +$(error I only build: x86_64, i686, arm, armeb, aarch64, aarch64_be, mips, mipsel, mips64, mips64el, powerpc64, powerpc64le, powerpc, m68k, riscv64, riscv32, s390x, um) endif TOOLCHAIN_FILENAME := $(CHOST)-cross.tgz @@ -262,7 +267,9 @@ $(eval $(call file_download,$(TOOLCHAIN_FILENAME),$(TOOLCHAIN_DIR),,$(DISTFILES_ STRIP := $(CHOST)-strip CROSS_COMPILE_FLAG := --build=$(CBUILD) --host=$(CHOST) $(info Building for $(CHOST) using $(CBUILD)) +ifneq ($(ARCH),um) export CROSS_COMPILE := $(CHOST)- +endif export PATH := $(TOOLCHAIN_PATH)/bin:$(PATH) export CC := $(CHOST)-gcc CCACHE_PATH := $(shell which ccache 2>/dev/null) @@ -279,6 +286,7 @@ comma := , build: $(KERNEL_BZIMAGE) qemu: $(KERNEL_BZIMAGE) rm -f $(BUILD_PATH)/result +ifneq ($(ARCH),um) timeout --foreground 20m qemu-system-$(QEMU_ARCH) \ -nodefaults \ -nographic \ @@ -291,6 +299,13 @@ qemu: $(KERNEL_BZIMAGE) -no-reboot \ -monitor none \ -kernel $< +else + timeout --foreground 20m $< \ + $(KERNEL_CMDLINE) \ + mem=$$(grep -q CONFIG_DEBUG_KMEMLEAK=y $(KERNEL_BUILD_PATH)/.config && echo 1G || echo 256M) \ + noreboot \ + con1=fd:51 51>$(BUILD_PATH)/result </dev/null 2>&1 | cat +endif grep -Fq success $(BUILD_PATH)/result $(BUILD_PATH)/init-cpio-spec.txt: $(TOOLCHAIN_PATH)/.installed $(BUILD_PATH)/init diff --git a/tools/testing/selftests/wireguard/qemu/arch/um.config b/tools/testing/selftests/wireguard/qemu/arch/um.config new file mode 100644 index 000000000000..c8b229e0810e --- /dev/null +++ b/tools/testing/selftests/wireguard/qemu/arch/um.config @@ -0,0 +1,3 @@ +CONFIG_64BIT=y +CONFIG_CMDLINE="wg.success=tty1 panic_on_warn=1" +CONFIG_FRAME_WARN=1280 diff --git a/tools/testing/selftests/wireguard/qemu/debug.config b/tools/testing/selftests/wireguard/qemu/debug.config index 2b321b8a96cf..9d172210e2c6 100644 --- a/tools/testing/selftests/wireguard/qemu/debug.config +++ b/tools/testing/selftests/wireguard/qemu/debug.config @@ -18,15 +18,12 @@ CONFIG_DEBUG_VM=y CONFIG_DEBUG_MEMORY_INIT=y CONFIG_HAVE_DEBUG_STACKOVERFLOW=y CONFIG_DEBUG_STACKOVERFLOW=y -CONFIG_HAVE_ARCH_KMEMCHECK=y CONFIG_HAVE_ARCH_KASAN=y CONFIG_KASAN=y CONFIG_KASAN_INLINE=y CONFIG_UBSAN=y CONFIG_UBSAN_SANITIZE_ALL=y -CONFIG_UBSAN_NULL=y CONFIG_DEBUG_KMEMLEAK=y -CONFIG_DEBUG_KMEMLEAK_EARLY_LOG_SIZE=8192 CONFIG_DEBUG_STACK_USAGE=y CONFIG_DEBUG_SHIRQ=y CONFIG_WQ_WATCHDOG=y @@ -35,7 +32,6 @@ CONFIG_SCHED_INFO=y CONFIG_SCHEDSTATS=y CONFIG_SCHED_STACK_END_CHECK=y CONFIG_DEBUG_TIMEKEEPING=y -CONFIG_TIMER_STATS=y CONFIG_DEBUG_PREEMPT=y CONFIG_DEBUG_RT_MUTEXES=y CONFIG_DEBUG_SPINLOCK=y @@ -49,7 +45,6 @@ CONFIG_DEBUG_BUGVERBOSE=y CONFIG_DEBUG_LIST=y CONFIG_DEBUG_PLIST=y CONFIG_PROVE_RCU=y -CONFIG_SPARSE_RCU_POINTER=y CONFIG_RCU_CPU_STALL_TIMEOUT=21 CONFIG_RCU_TRACE=y CONFIG_RCU_EQS_DEBUG=y diff --git a/tools/testing/selftests/wireguard/qemu/kernel.config b/tools/testing/selftests/wireguard/qemu/kernel.config index e1858ce7003f..ce2a04717300 100644 --- a/tools/testing/selftests/wireguard/qemu/kernel.config +++ b/tools/testing/selftests/wireguard/qemu/kernel.config @@ -19,7 +19,6 @@ CONFIG_NETFILTER_XTABLES=y CONFIG_NETFILTER_XT_NAT=y CONFIG_NETFILTER_XT_MATCH_LENGTH=y CONFIG_NETFILTER_XT_MARK=y -CONFIG_NF_NAT_IPV4=y CONFIG_IP_NF_IPTABLES=y CONFIG_IP_NF_FILTER=y CONFIG_IP_NF_MANGLE=y |