aboutsummaryrefslogtreecommitdiff
path: root/kernel/bpf
diff options
context:
space:
mode:
authorDavid S. Miller2019-11-20 18:11:23 -0800
committerDavid S. Miller2019-11-20 18:11:23 -0800
commitee5a489fd9645104925e5cdf8f8e455d833730b9 (patch)
tree1e46a8c460e1d51d465fe472e42cf1c16f92f9c7 /kernel/bpf
parente2193c9334291ecdc437cdbd9fe9ac35c14fffa8 (diff)
parent196e8ca74886c433dcfc64a809707074b936aaf5 (diff)
Merge git://git.kernel.org/pub/scm/linux/kernel/git/bpf/bpf-next
Daniel Borkmann says: ==================== pull-request: bpf-next 2019-11-20 The following pull-request contains BPF updates for your *net-next* tree. We've added 81 non-merge commits during the last 17 day(s) which contain a total of 120 files changed, 4958 insertions(+), 1081 deletions(-). There are 3 trivial conflicts, resolve it by always taking the chunk from 196e8ca74886c433: <<<<<<< HEAD ======= void *bpf_map_area_mmapable_alloc(u64 size, int numa_node); >>>>>>> 196e8ca74886c433dcfc64a809707074b936aaf5 <<<<<<< HEAD void *bpf_map_area_alloc(u64 size, int numa_node) ======= static void *__bpf_map_area_alloc(u64 size, int numa_node, bool mmapable) >>>>>>> 196e8ca74886c433dcfc64a809707074b936aaf5 <<<<<<< HEAD if (size <= (PAGE_SIZE << PAGE_ALLOC_COSTLY_ORDER)) { ======= /* kmalloc()'ed memory can't be mmap()'ed */ if (!mmapable && size <= (PAGE_SIZE << PAGE_ALLOC_COSTLY_ORDER)) { >>>>>>> 196e8ca74886c433dcfc64a809707074b936aaf5 The main changes are: 1) Addition of BPF trampoline which works as a bridge between kernel functions, BPF programs and other BPF programs along with two new use cases: i) fentry/fexit BPF programs for tracing with practically zero overhead to call into BPF (as opposed to k[ret]probes) and ii) attachment of the former to networking related programs to see input/output of networking programs (covering xdpdump use case), from Alexei Starovoitov. 2) BPF array map mmap support and use in libbpf for global data maps; also a big batch of libbpf improvements, among others, support for reading bitfields in a relocatable manner (via libbpf's CO-RE helper API), from Andrii Nakryiko. 3) Extend s390x JIT with usage of relative long jumps and loads in order to lift the current 64/512k size limits on JITed BPF programs there, from Ilya Leoshkevich. 4) Add BPF audit support and emit messages upon successful prog load and unload in order to have a timeline of events, from Daniel Borkmann and Jiri Olsa. 5) Extension to libbpf and xdpsock sample programs to demo the shared umem mode (XDP_SHARED_UMEM) as well as RX-only and TX-only sockets, from Magnus Karlsson. 6) Several follow-up bug fixes for libbpf's auto-pinning code and a new API call named bpf_get_link_xdp_info() for retrieving the full set of prog IDs attached to XDP, from Toke Høiland-Jørgensen. 7) Add BTF support for array of int, array of struct and multidimensional arrays and enable it for skb->cb[] access in kfree_skb test, from Martin KaFai Lau. 8) Fix AF_XDP by using the correct number of channels from ethtool, from Luigi Rizzo. 9) Two fixes for BPF selftest to get rid of a hang in test_tc_tunnel and to avoid xdping to be run as standalone, from Jiri Benc. 10) Various BPF selftest fixes when run with latest LLVM trunk, from Yonghong Song. 11) Fix a memory leak in BPF fentry test run data, from Colin Ian King. 12) Various smaller misc cleanups and improvements mostly all over BPF selftests and samples, from Daniel T. Lee, Andre Guedes, Anders Roxell, Mao Wenan, Yue Haibing. ==================== Signed-off-by: David S. Miller <davem@davemloft.net>
Diffstat (limited to 'kernel/bpf')
-rw-r--r--kernel/bpf/Makefile1
-rw-r--r--kernel/bpf/arraymap.c58
-rw-r--r--kernel/bpf/btf.c558
-rw-r--r--kernel/bpf/core.c15
-rw-r--r--kernel/bpf/inode.c7
-rw-r--r--kernel/bpf/map_in_map.c2
-rw-r--r--kernel/bpf/syscall.c282
-rw-r--r--kernel/bpf/trampoline.c253
-rw-r--r--kernel/bpf/verifier.c137
-rw-r--r--kernel/bpf/xskmap.c6
10 files changed, 1184 insertions, 135 deletions
diff --git a/kernel/bpf/Makefile b/kernel/bpf/Makefile
index e1d9adb212f9..3f671bf617e8 100644
--- a/kernel/bpf/Makefile
+++ b/kernel/bpf/Makefile
@@ -6,6 +6,7 @@ obj-$(CONFIG_BPF_SYSCALL) += syscall.o verifier.o inode.o helpers.o tnum.o
obj-$(CONFIG_BPF_SYSCALL) += hashtab.o arraymap.o percpu_freelist.o bpf_lru_list.o lpm_trie.o map_in_map.o
obj-$(CONFIG_BPF_SYSCALL) += local_storage.o queue_stack_maps.o
obj-$(CONFIG_BPF_SYSCALL) += disasm.o
+obj-$(CONFIG_BPF_JIT) += trampoline.o
obj-$(CONFIG_BPF_SYSCALL) += btf.o
ifeq ($(CONFIG_NET),y)
obj-$(CONFIG_BPF_SYSCALL) += devmap.o
diff --git a/kernel/bpf/arraymap.c b/kernel/bpf/arraymap.c
index 1c65ce0098a9..633c8c701ff6 100644
--- a/kernel/bpf/arraymap.c
+++ b/kernel/bpf/arraymap.c
@@ -14,7 +14,7 @@
#include "map_in_map.h"
#define ARRAY_CREATE_FLAG_MASK \
- (BPF_F_NUMA_NODE | BPF_F_ACCESS_MASK)
+ (BPF_F_NUMA_NODE | BPF_F_MMAPABLE | BPF_F_ACCESS_MASK)
static void bpf_array_free_percpu(struct bpf_array *array)
{
@@ -59,6 +59,10 @@ int array_map_alloc_check(union bpf_attr *attr)
(percpu && numa_node != NUMA_NO_NODE))
return -EINVAL;
+ if (attr->map_type != BPF_MAP_TYPE_ARRAY &&
+ attr->map_flags & BPF_F_MMAPABLE)
+ return -EINVAL;
+
if (attr->value_size > KMALLOC_MAX_SIZE)
/* if value_size is bigger, the user space won't be able to
* access the elements.
@@ -102,10 +106,19 @@ static struct bpf_map *array_map_alloc(union bpf_attr *attr)
}
array_size = sizeof(*array);
- if (percpu)
+ if (percpu) {
array_size += (u64) max_entries * sizeof(void *);
- else
- array_size += (u64) max_entries * elem_size;
+ } else {
+ /* rely on vmalloc() to return page-aligned memory and
+ * ensure array->value is exactly page-aligned
+ */
+ if (attr->map_flags & BPF_F_MMAPABLE) {
+ array_size = PAGE_ALIGN(array_size);
+ array_size += PAGE_ALIGN((u64) max_entries * elem_size);
+ } else {
+ array_size += (u64) max_entries * elem_size;
+ }
+ }
/* make sure there is no u32 overflow later in round_up() */
cost = array_size;
@@ -117,7 +130,20 @@ static struct bpf_map *array_map_alloc(union bpf_attr *attr)
return ERR_PTR(ret);
/* allocate all map elements and zero-initialize them */
- array = bpf_map_area_alloc(array_size, numa_node);
+ if (attr->map_flags & BPF_F_MMAPABLE) {
+ void *data;
+
+ /* kmalloc'ed memory can't be mmap'ed, use explicit vmalloc */
+ data = bpf_map_area_mmapable_alloc(array_size, numa_node);
+ if (!data) {
+ bpf_map_charge_finish(&mem);
+ return ERR_PTR(-ENOMEM);
+ }
+ array = data + PAGE_ALIGN(sizeof(struct bpf_array))
+ - offsetof(struct bpf_array, value);
+ } else {
+ array = bpf_map_area_alloc(array_size, numa_node);
+ }
if (!array) {
bpf_map_charge_finish(&mem);
return ERR_PTR(-ENOMEM);
@@ -350,6 +376,11 @@ static int array_map_delete_elem(struct bpf_map *map, void *key)
return -EINVAL;
}
+static void *array_map_vmalloc_addr(struct bpf_array *array)
+{
+ return (void *)round_down((unsigned long)array, PAGE_SIZE);
+}
+
/* Called when map->refcnt goes to zero, either from workqueue or from syscall */
static void array_map_free(struct bpf_map *map)
{
@@ -365,7 +396,10 @@ static void array_map_free(struct bpf_map *map)
if (array->map.map_type == BPF_MAP_TYPE_PERCPU_ARRAY)
bpf_array_free_percpu(array);
- bpf_map_area_free(array);
+ if (array->map.map_flags & BPF_F_MMAPABLE)
+ bpf_map_area_free(array_map_vmalloc_addr(array));
+ else
+ bpf_map_area_free(array);
}
static void array_map_seq_show_elem(struct bpf_map *map, void *key,
@@ -444,6 +478,17 @@ static int array_map_check_btf(const struct bpf_map *map,
return 0;
}
+static int array_map_mmap(struct bpf_map *map, struct vm_area_struct *vma)
+{
+ struct bpf_array *array = container_of(map, struct bpf_array, map);
+ pgoff_t pgoff = PAGE_ALIGN(sizeof(*array)) >> PAGE_SHIFT;
+
+ if (!(map->map_flags & BPF_F_MMAPABLE))
+ return -EINVAL;
+
+ return remap_vmalloc_range(vma, array_map_vmalloc_addr(array), pgoff);
+}
+
const struct bpf_map_ops array_map_ops = {
.map_alloc_check = array_map_alloc_check,
.map_alloc = array_map_alloc,
@@ -455,6 +500,7 @@ const struct bpf_map_ops array_map_ops = {
.map_gen_lookup = array_map_gen_lookup,
.map_direct_value_addr = array_map_direct_value_addr,
.map_direct_value_meta = array_map_direct_value_meta,
+ .map_mmap = array_map_mmap,
.map_seq_show_elem = array_map_seq_show_elem,
.map_check_btf = array_map_check_btf,
};
diff --git a/kernel/bpf/btf.c b/kernel/bpf/btf.c
index 128d89601d73..40efde5eedcb 100644
--- a/kernel/bpf/btf.c
+++ b/kernel/bpf/btf.c
@@ -2,6 +2,8 @@
/* Copyright (c) 2018 Facebook */
#include <uapi/linux/btf.h>
+#include <uapi/linux/bpf.h>
+#include <uapi/linux/bpf_perf_event.h>
#include <uapi/linux/types.h>
#include <linux/seq_file.h>
#include <linux/compiler.h>
@@ -16,6 +18,9 @@
#include <linux/sort.h>
#include <linux/bpf_verifier.h>
#include <linux/btf.h>
+#include <linux/skmsg.h>
+#include <linux/perf_event.h>
+#include <net/sock.h>
/* BTF (BPF Type Format) is the meta data format which describes
* the data types of BPF program/map. Hence, it basically focus
@@ -1036,6 +1041,82 @@ static const struct resolve_vertex *env_stack_peak(struct btf_verifier_env *env)
return env->top_stack ? &env->stack[env->top_stack - 1] : NULL;
}
+/* Resolve the size of a passed-in "type"
+ *
+ * type: is an array (e.g. u32 array[x][y])
+ * return type: type "u32[x][y]", i.e. BTF_KIND_ARRAY,
+ * *type_size: (x * y * sizeof(u32)). Hence, *type_size always
+ * corresponds to the return type.
+ * *elem_type: u32
+ * *total_nelems: (x * y). Hence, individual elem size is
+ * (*type_size / *total_nelems)
+ *
+ * type: is not an array (e.g. const struct X)
+ * return type: type "struct X"
+ * *type_size: sizeof(struct X)
+ * *elem_type: same as return type ("struct X")
+ * *total_nelems: 1
+ */
+static const struct btf_type *
+btf_resolve_size(const struct btf *btf, const struct btf_type *type,
+ u32 *type_size, const struct btf_type **elem_type,
+ u32 *total_nelems)
+{
+ const struct btf_type *array_type = NULL;
+ const struct btf_array *array;
+ u32 i, size, nelems = 1;
+
+ for (i = 0; i < MAX_RESOLVE_DEPTH; i++) {
+ switch (BTF_INFO_KIND(type->info)) {
+ /* type->size can be used */
+ case BTF_KIND_INT:
+ case BTF_KIND_STRUCT:
+ case BTF_KIND_UNION:
+ case BTF_KIND_ENUM:
+ size = type->size;
+ goto resolved;
+
+ case BTF_KIND_PTR:
+ size = sizeof(void *);
+ goto resolved;
+
+ /* Modifiers */
+ case BTF_KIND_TYPEDEF:
+ case BTF_KIND_VOLATILE:
+ case BTF_KIND_CONST:
+ case BTF_KIND_RESTRICT:
+ type = btf_type_by_id(btf, type->type);
+ break;
+
+ case BTF_KIND_ARRAY:
+ if (!array_type)
+ array_type = type;
+ array = btf_type_array(type);
+ if (nelems && array->nelems > U32_MAX / nelems)
+ return ERR_PTR(-EINVAL);
+ nelems *= array->nelems;
+ type = btf_type_by_id(btf, array->type);
+ break;
+
+ /* type without size */
+ default:
+ return ERR_PTR(-EINVAL);
+ }
+ }
+
+ return ERR_PTR(-EINVAL);
+
+resolved:
+ if (nelems && size > U32_MAX / nelems)
+ return ERR_PTR(-EINVAL);
+
+ *type_size = nelems * size;
+ *total_nelems = nelems;
+ *elem_type = type;
+
+ return array_type ? : type;
+}
+
/* The input param "type_id" must point to a needs_resolve type */
static const struct btf_type *btf_type_id_resolve(const struct btf *btf,
u32 *type_id)
@@ -3363,13 +3444,112 @@ errout:
extern char __weak _binary__btf_vmlinux_bin_start[];
extern char __weak _binary__btf_vmlinux_bin_end[];
+extern struct btf *btf_vmlinux;
+
+#define BPF_MAP_TYPE(_id, _ops)
+static union {
+ struct bpf_ctx_convert {
+#define BPF_PROG_TYPE(_id, _name, prog_ctx_type, kern_ctx_type) \
+ prog_ctx_type _id##_prog; \
+ kern_ctx_type _id##_kern;
+#include <linux/bpf_types.h>
+#undef BPF_PROG_TYPE
+ } *__t;
+ /* 't' is written once under lock. Read many times. */
+ const struct btf_type *t;
+} bpf_ctx_convert;
+enum {
+#define BPF_PROG_TYPE(_id, _name, prog_ctx_type, kern_ctx_type) \
+ __ctx_convert##_id,
+#include <linux/bpf_types.h>
+#undef BPF_PROG_TYPE
+};
+static u8 bpf_ctx_convert_map[] = {
+#define BPF_PROG_TYPE(_id, _name, prog_ctx_type, kern_ctx_type) \
+ [_id] = __ctx_convert##_id,
+#include <linux/bpf_types.h>
+#undef BPF_PROG_TYPE
+};
+#undef BPF_MAP_TYPE
+
+static const struct btf_member *
+btf_get_prog_ctx_type(struct bpf_verifier_log *log, struct btf *btf,
+ const struct btf_type *t, enum bpf_prog_type prog_type)
+{
+ const struct btf_type *conv_struct;
+ const struct btf_type *ctx_struct;
+ const struct btf_member *ctx_type;
+ const char *tname, *ctx_tname;
+
+ conv_struct = bpf_ctx_convert.t;
+ if (!conv_struct) {
+ bpf_log(log, "btf_vmlinux is malformed\n");
+ return NULL;
+ }
+ t = btf_type_by_id(btf, t->type);
+ while (btf_type_is_modifier(t))
+ t = btf_type_by_id(btf, t->type);
+ if (!btf_type_is_struct(t)) {
+ /* Only pointer to struct is supported for now.
+ * That means that BPF_PROG_TYPE_TRACEPOINT with BTF
+ * is not supported yet.
+ * BPF_PROG_TYPE_RAW_TRACEPOINT is fine.
+ */
+ bpf_log(log, "BPF program ctx type is not a struct\n");
+ return NULL;
+ }
+ tname = btf_name_by_offset(btf, t->name_off);
+ if (!tname) {
+ bpf_log(log, "BPF program ctx struct doesn't have a name\n");
+ return NULL;
+ }
+ /* prog_type is valid bpf program type. No need for bounds check. */
+ ctx_type = btf_type_member(conv_struct) + bpf_ctx_convert_map[prog_type] * 2;
+ /* ctx_struct is a pointer to prog_ctx_type in vmlinux.
+ * Like 'struct __sk_buff'
+ */
+ ctx_struct = btf_type_by_id(btf_vmlinux, ctx_type->type);
+ if (!ctx_struct)
+ /* should not happen */
+ return NULL;
+ ctx_tname = btf_name_by_offset(btf_vmlinux, ctx_struct->name_off);
+ if (!ctx_tname) {
+ /* should not happen */
+ bpf_log(log, "Please fix kernel include/linux/bpf_types.h\n");
+ return NULL;
+ }
+ /* only compare that prog's ctx type name is the same as
+ * kernel expects. No need to compare field by field.
+ * It's ok for bpf prog to do:
+ * struct __sk_buff {};
+ * int socket_filter_bpf_prog(struct __sk_buff *skb)
+ * { // no fields of skb are ever used }
+ */
+ if (strcmp(ctx_tname, tname))
+ return NULL;
+ return ctx_type;
+}
+
+static int btf_translate_to_vmlinux(struct bpf_verifier_log *log,
+ struct btf *btf,
+ const struct btf_type *t,
+ enum bpf_prog_type prog_type)
+{
+ const struct btf_member *prog_ctx_type, *kern_ctx_type;
+
+ prog_ctx_type = btf_get_prog_ctx_type(log, btf, t, prog_type);
+ if (!prog_ctx_type)
+ return -ENOENT;
+ kern_ctx_type = prog_ctx_type + 1;
+ return kern_ctx_type->type;
+}
struct btf *btf_parse_vmlinux(void)
{
struct btf_verifier_env *env = NULL;
struct bpf_verifier_log *log;
struct btf *btf = NULL;
- int err;
+ int err, i;
env = kzalloc(sizeof(*env), GFP_KERNEL | __GFP_NOWARN);
if (!env)
@@ -3403,6 +3583,26 @@ struct btf *btf_parse_vmlinux(void)
if (err)
goto errout;
+ /* find struct bpf_ctx_convert for type checking later */
+ for (i = 1; i <= btf->nr_types; i++) {
+ const struct btf_type *t;
+ const char *tname;
+
+ t = btf_type_by_id(btf, i);
+ if (!__btf_type_is_struct(t))
+ continue;
+ tname = __btf_name_by_offset(btf, t->name_off);
+ if (!strcmp(tname, "bpf_ctx_convert")) {
+ /* btf_parse_vmlinux() runs under bpf_verifier_lock */
+ bpf_ctx_convert.t = t;
+ break;
+ }
+ }
+ if (i > btf->nr_types) {
+ err = -ENOENT;
+ goto errout;
+ }
+
btf_verifier_env_free(env);
refcount_set(&btf->refcnt, 1);
return btf;
@@ -3416,17 +3616,29 @@ errout:
return ERR_PTR(err);
}
-extern struct btf *btf_vmlinux;
+struct btf *bpf_prog_get_target_btf(const struct bpf_prog *prog)
+{
+ struct bpf_prog *tgt_prog = prog->aux->linked_prog;
+
+ if (tgt_prog) {
+ return tgt_prog->aux->btf;
+ } else {
+ return btf_vmlinux;
+ }
+}
bool btf_ctx_access(int off, int size, enum bpf_access_type type,
const struct bpf_prog *prog,
struct bpf_insn_access_aux *info)
{
const struct btf_type *t = prog->aux->attach_func_proto;
+ struct bpf_prog *tgt_prog = prog->aux->linked_prog;
+ struct btf *btf = bpf_prog_get_target_btf(prog);
const char *tname = prog->aux->attach_func_name;
struct bpf_verifier_log *log = info->log;
const struct btf_param *args;
u32 nr_args, arg;
+ int ret;
if (off % 8) {
bpf_log(log, "func '%s' offset %d is not multiple of 8\n",
@@ -3435,22 +3647,34 @@ bool btf_ctx_access(int off, int size, enum bpf_access_type type,
}
arg = off / 8;
args = (const struct btf_param *)(t + 1);
- nr_args = btf_type_vlen(t);
+ /* if (t == NULL) Fall back to default BPF prog with 5 u64 arguments */
+ nr_args = t ? btf_type_vlen(t) : 5;
if (prog->aux->attach_btf_trace) {
/* skip first 'void *__data' argument in btf_trace_##name typedef */
args++;
nr_args--;
}
- if (arg >= nr_args) {
+
+ if (prog->expected_attach_type == BPF_TRACE_FEXIT &&
+ arg == nr_args) {
+ if (!t)
+ /* Default prog with 5 args. 6th arg is retval. */
+ return true;
+ /* function return type */
+ t = btf_type_by_id(btf, t->type);
+ } else if (arg >= nr_args) {
bpf_log(log, "func '%s' doesn't have %d-th argument\n",
- tname, arg);
+ tname, arg + 1);
return false;
+ } else {
+ if (!t)
+ /* Default prog with 5 args */
+ return true;
+ t = btf_type_by_id(btf, args[arg].type);
}
-
- t = btf_type_by_id(btf_vmlinux, args[arg].type);
/* skip modifiers */
while (btf_type_is_modifier(t))
- t = btf_type_by_id(btf_vmlinux, t->type);
+ t = btf_type_by_id(btf, t->type);
if (btf_type_is_int(t))
/* accessing a scalar */
return true;
@@ -3458,7 +3682,7 @@ bool btf_ctx_access(int off, int size, enum bpf_access_type type,
bpf_log(log,
"func '%s' arg%d '%s' has type %s. Only pointer access is allowed\n",
tname, arg,
- __btf_name_by_offset(btf_vmlinux, t->name_off),
+ __btf_name_by_offset(btf, t->name_off),
btf_kind_str[BTF_INFO_KIND(t->info)]);
return false;
}
@@ -3473,10 +3697,19 @@ bool btf_ctx_access(int off, int size, enum bpf_access_type type,
info->reg_type = PTR_TO_BTF_ID;
info->btf_id = t->type;
- t = btf_type_by_id(btf_vmlinux, t->type);
+ if (tgt_prog) {
+ ret = btf_translate_to_vmlinux(log, btf, t, tgt_prog->type);
+ if (ret > 0) {
+ info->btf_id = ret;
+ return true;
+ } else {
+ return false;
+ }
+ }
+ t = btf_type_by_id(btf, t->type);
/* skip modifiers */
while (btf_type_is_modifier(t))
- t = btf_type_by_id(btf_vmlinux, t->type);
+ t = btf_type_by_id(btf, t->type);
if (!btf_type_is_struct(t)) {
bpf_log(log,
"func '%s' arg%d type %s is not a struct\n",
@@ -3485,7 +3718,7 @@ bool btf_ctx_access(int off, int size, enum bpf_access_type type,
}
bpf_log(log, "func '%s' arg%d has btf_id %d type %s '%s'\n",
tname, arg, info->btf_id, btf_kind_str[BTF_INFO_KIND(t->info)],
- __btf_name_by_offset(btf_vmlinux, t->name_off));
+ __btf_name_by_offset(btf, t->name_off));
return true;
}
@@ -3494,10 +3727,10 @@ int btf_struct_access(struct bpf_verifier_log *log,
enum bpf_access_type atype,
u32 *next_btf_id)
{
+ u32 i, moff, mtrue_end, msize = 0, total_nelems = 0;
+ const struct btf_type *mtype, *elem_type = NULL;
const struct btf_member *member;
- const struct btf_type *mtype;
const char *tname, *mname;
- int i, moff = 0, msize;
again:
tname = __btf_name_by_offset(btf_vmlinux, t->name_off);
@@ -3507,40 +3740,88 @@ again:
}
for_each_member(i, t, member) {
- /* offset of the field in bits */
- moff = btf_member_bit_offset(t, member);
-
if (btf_member_bitfield_size(t, member))
/* bitfields are not supported yet */
continue;
- if (off + size <= moff / 8)
+ /* offset of the field in bytes */
+ moff = btf_member_bit_offset(t, member) / 8;
+ if (off + size <= moff)
/* won't find anything, field is already too far */
break;
+ /* In case of "off" is pointing to holes of a struct */
+ if (off < moff)
+ continue;
/* type of the field */
mtype = btf_type_by_id(btf_vmlinux, member->type);
mname = __btf_name_by_offset(btf_vmlinux, member->name_off);
- /* skip modifiers */
- while (btf_type_is_modifier(mtype))
- mtype = btf_type_by_id(btf_vmlinux, mtype->type);
-
- if (btf_type_is_array(mtype))
- /* array deref is not supported yet */
- continue;
-
- if (!btf_type_has_size(mtype) && !btf_type_is_ptr(mtype)) {
+ mtype = btf_resolve_size(btf_vmlinux, mtype, &msize,
+ &elem_type, &total_nelems);
+ if (IS_ERR(mtype)) {
bpf_log(log, "field %s doesn't have size\n", mname);
return -EFAULT;
}
- if (btf_type_is_ptr(mtype))
- msize = 8;
- else
- msize = mtype->size;
- if (off >= moff / 8 + msize)
+
+ mtrue_end = moff + msize;
+ if (off >= mtrue_end)
/* no overlap with member, keep iterating */
continue;
+
+ if (btf_type_is_array(mtype)) {
+ u32 elem_idx;
+
+ /* btf_resolve_size() above helps to
+ * linearize a multi-dimensional array.
+ *
+ * The logic here is treating an array
+ * in a struct as the following way:
+ *
+ * struct outer {
+ * struct inner array[2][2];
+ * };
+ *
+ * looks like:
+ *
+ * struct outer {
+ * struct inner array_elem0;
+ * struct inner array_elem1;
+ * struct inner array_elem2;
+ * struct inner array_elem3;
+ * };
+ *
+ * When accessing outer->array[1][0], it moves
+ * moff to "array_elem2", set mtype to
+ * "struct inner", and msize also becomes
+ * sizeof(struct inner). Then most of the
+ * remaining logic will fall through without
+ * caring the current member is an array or
+ * not.
+ *
+ * Unlike mtype/msize/moff, mtrue_end does not
+ * change. The naming difference ("_true") tells
+ * that it is not always corresponding to
+ * the current mtype/msize/moff.
+ * It is the true end of the current
+ * member (i.e. array in this case). That
+ * will allow an int array to be accessed like
+ * a scratch space,
+ * i.e. allow access beyond the size of
+ * the array's element as long as it is
+ * within the mtrue_end boundary.
+ */
+
+ /* skip empty array */
+ if (moff == mtrue_end)
+ continue;
+
+ msize /= total_nelems;
+ elem_idx = (off - moff) / msize;
+ moff += elem_idx * msize;
+ mtype = elem_type;
+ }
+
/* the 'off' we're looking for is either equal to start
* of this field or inside of this struct
*/
@@ -3549,20 +3830,20 @@ again:
t = mtype;
/* adjust offset we're looking for */
- off -= moff / 8;
+ off -= moff;
goto again;
}
- if (msize != size) {
- /* field access size doesn't match */
- bpf_log(log,
- "cannot access %d bytes in struct %s field %s that has size %d\n",
- size, tname, mname, msize);
- return -EACCES;
- }
if (btf_type_is_ptr(mtype)) {
const struct btf_type *stype;
+ if (msize != size || off != moff) {
+ bpf_log(log,
+ "cannot access ptr member %s with moff %u in struct %s with off %u size %u\n",
+ mname, moff, tname, off, size);
+ return -EACCES;
+ }
+
stype = btf_type_by_id(btf_vmlinux, mtype->type);
/* skip modifiers */
while (btf_type_is_modifier(stype))
@@ -3572,14 +3853,28 @@ again:
return PTR_TO_BTF_ID;
}
}
- /* all other fields are treated as scalars */
+
+ /* Allow more flexible access within an int as long as
+ * it is within mtrue_end.
+ * Since mtrue_end could be the end of an array,
+ * that also allows using an array of int as a scratch
+ * space. e.g. skb->cb[].
+ */
+ if (off + size > mtrue_end) {
+ bpf_log(log,
+ "access beyond the end of member %s (mend:%u) in struct %s with off %u size %u\n",
+ mname, mtrue_end, tname, off, size);
+ return -EACCES;
+ }
+
return SCALAR_VALUE;
}
bpf_log(log, "struct %s doesn't have field at offset %d\n", tname, off);
return -EINVAL;
}
-u32 btf_resolve_helper_id(struct bpf_verifier_log *log, void *fn, int arg)
+static int __btf_resolve_helper_id(struct bpf_verifier_log *log, void *fn,
+ int arg)
{
char fnname[KSYM_SYMBOL_LEN + 4] = "btf_";
const struct btf_param *args;
@@ -3647,6 +3942,185 @@ u32 btf_resolve_helper_id(struct bpf_verifier_log *log, void *fn, int arg)
return btf_id;
}
+int btf_resolve_helper_id(struct bpf_verifier_log *log,
+ const struct bpf_func_proto *fn, int arg)
+{
+ int *btf_id = &fn->btf_id[arg];
+ int ret;
+
+ if (fn->arg_type[arg] != ARG_PTR_TO_BTF_ID)
+ return -EINVAL;
+
+ ret = READ_ONCE(*btf_id);
+ if (ret)
+ return ret;
+ /* ok to race the search. The result is the same */
+ ret = __btf_resolve_helper_id(log, fn->func, arg);
+ if (!ret) {
+ /* Function argument cannot be type 'void' */
+ bpf_log(log, "BTF resolution bug\n");
+ return -EFAULT;
+ }
+ WRITE_ONCE(*btf_id, ret);
+ return ret;
+}
+
+static int __get_type_size(struct btf *btf, u32 btf_id,
+ const struct btf_type **bad_type)
+{
+ const struct btf_type *t;
+
+ if (!btf_id)
+ /* void */
+ return 0;
+ t = btf_type_by_id(btf, btf_id);
+ while (t && btf_type_is_modifier(t))
+ t = btf_type_by_id(btf, t->type);
+ if (!t)
+ return -EINVAL;
+ if (btf_type_is_ptr(t))
+ /* kernel size of pointer. Not BPF's size of pointer*/
+ return sizeof(void *);
+ if (btf_type_is_int(t) || btf_type_is_enum(t))
+ return t->size;
+ *bad_type = t;
+ return -EINVAL;
+}
+
+int btf_distill_func_proto(struct bpf_verifier_log *log,
+ struct btf *btf,
+ const struct btf_type *func,
+ const char *tname,
+ struct btf_func_model *m)
+{
+ const struct btf_param *args;
+ const struct btf_type *t;
+ u32 i, nargs;
+ int ret;
+
+ if (!func) {
+ /* BTF function prototype doesn't match the verifier types.
+ * Fall back to 5 u64 args.
+ */
+ for (i = 0; i < 5; i++)
+ m->arg_size[i] = 8;
+ m->ret_size = 8;
+ m->nr_args = 5;
+ return 0;
+ }
+ args = (const struct btf_param *)(func + 1);
+ nargs = btf_type_vlen(func);
+ if (nargs >= MAX_BPF_FUNC_ARGS) {
+ bpf_log(log,
+ "The function %s has %d arguments. Too many.\n",
+ tname, nargs);
+ return -EINVAL;
+ }
+ ret = __get_type_size(btf, func->type, &t);
+ if (ret < 0) {
+ bpf_log(log,
+ "The function %s return type %s is unsupported.\n",
+ tname, btf_kind_str[BTF_INFO_KIND(t->info)]);
+ return -EINVAL;
+ }
+ m->ret_size = ret;
+
+ for (i = 0; i < nargs; i++) {
+ ret = __get_type_size(btf, args[i].type, &t);
+ if (ret < 0) {
+ bpf_log(log,
+ "The function %s arg%d type %s is unsupported.\n",
+ tname, i, btf_kind_str[BTF_INFO_KIND(t->info)]);
+ return -EINVAL;
+ }
+ m->arg_size[i] = ret;
+ }
+ m->nr_args = nargs;
+ return 0;
+}
+
+int btf_check_func_arg_match(struct bpf_verifier_env *env, int subprog)
+{
+ struct bpf_verifier_state *st = env->cur_state;
+ struct bpf_func_state *func = st->frame[st->curframe];
+ struct bpf_reg_state *reg = func->regs;
+ struct bpf_verifier_log *log = &env->log;
+ struct bpf_prog *prog = env->prog;
+ struct btf *btf = prog->aux->btf;
+ const struct btf_param *args;
+ const struct btf_type *t;
+ u32 i, nargs, btf_id;
+ const char *tname;
+
+ if (!prog->aux->func_info)
+ return 0;
+
+ btf_id = prog->aux->func_info[subprog].type_id;
+ if (!btf_id)
+ return 0;
+
+ if (prog->aux->func_info_aux[subprog].unreliable)
+ return 0;
+
+ t = btf_type_by_id(btf, btf_id);
+ if (!t || !btf_type_is_func(t)) {
+ bpf_log(log, "BTF of subprog %d doesn't point to KIND_FUNC\n",
+ subprog);
+ return -EINVAL;
+ }
+ tname = btf_name_by_offset(btf, t->name_off);
+
+ t = btf_type_by_id(btf, t->type);
+ if (!t || !btf_type_is_func_proto(t)) {
+ bpf_log(log, "Invalid type of func %s\n", tname);
+ return -EINVAL;
+ }
+ args = (const struct btf_param *)(t + 1);
+ nargs = btf_type_vlen(t);
+ if (nargs > 5) {
+ bpf_log(log, "Function %s has %d > 5 args\n", tname, nargs);
+ goto out;
+ }
+ /* check that BTF function arguments match actual types that the
+ * verifier sees.
+ */
+ for (i = 0; i < nargs; i++) {
+ t = btf_type_by_id(btf, args[i].type);
+ while (btf_type_is_modifier(t))
+ t = btf_type_by_id(btf, t->type);
+ if (btf_type_is_int(t) || btf_type_is_enum(t)) {
+ if (reg[i + 1].type == SCALAR_VALUE)
+ continue;
+ bpf_log(log, "R%d is not a scalar\n", i + 1);
+ goto out;
+ }
+ if (btf_type_is_ptr(t)) {
+ if (reg[i + 1].type == SCALAR_VALUE) {
+ bpf_log(log, "R%d is not a pointer\n", i + 1);
+ goto out;
+ }
+ /* If program is passing PTR_TO_CTX into subprogram
+ * check that BTF type matches.
+ */
+ if (reg[i + 1].type == PTR_TO_CTX &&
+ !btf_get_prog_ctx_type(log, btf, t, prog->type))
+ goto out;
+ /* All other pointers are ok */
+ continue;
+ }
+ bpf_log(log, "Unrecognized argument type %s\n",
+ btf_kind_str[BTF_INFO_KIND(t->info)]);
+ goto out;
+ }
+ return 0;
+out:
+ /* LLVM optimizations can remove arguments from static functions. */
+ bpf_log(log,
+ "Type info disagrees with actual arguments due to compiler optimizations\n");
+ prog->aux->func_info_aux[subprog].unreliable = true;
+ return 0;
+}
+
void btf_type_seq_show(const struct btf *btf, u32 type_id, void *obj,
struct seq_file *m)
{
diff --git a/kernel/bpf/core.c b/kernel/bpf/core.c
index 97e37d82a1cc..b5945c3aaa8e 100644
--- a/kernel/bpf/core.c
+++ b/kernel/bpf/core.c
@@ -31,6 +31,7 @@
#include <linux/rcupdate.h>
#include <linux/perf_event.h>
#include <linux/extable.h>
+#include <linux/log2.h>
#include <asm/unaligned.h>
/* Registers */
@@ -815,6 +816,9 @@ bpf_jit_binary_alloc(unsigned int proglen, u8 **image_ptr,
struct bpf_binary_header *hdr;
u32 size, hole, start, pages;
+ WARN_ON_ONCE(!is_power_of_2(alignment) ||
+ alignment > BPF_IMAGE_ALIGNMENT);
+
/* Most of BPF filters are really small, but if some of them
* fill a page, allow at least 128 extra bytes to insert a
* random section of illegal instructions.
@@ -1569,7 +1573,7 @@ out:
#undef LDST
#define LDX_PROBE(SIZEOP, SIZE) \
LDX_PROBE_MEM_##SIZEOP: \
- bpf_probe_read_kernel(&DST, SIZE, (const void *)(long) SRC); \
+ bpf_probe_read_kernel(&DST, SIZE, (const void *)(long) (SRC + insn->off)); \
CONT;
LDX_PROBE(B, 1)
LDX_PROBE(H, 2)
@@ -2011,6 +2015,7 @@ static void bpf_prog_free_deferred(struct work_struct *work)
if (aux->prog->has_callchain_buf)
put_callchain_buffers();
#endif
+ bpf_trampoline_put(aux->trampoline);
for (i = 0; i < aux->func_cnt; i++)
bpf_jit_free(aux->func[i]);
if (aux->func_cnt) {
@@ -2026,6 +2031,8 @@ void bpf_prog_free(struct bpf_prog *fp)
{
struct bpf_prog_aux *aux = fp->aux;
+ if (aux->linked_prog)
+ bpf_prog_put(aux->linked_prog);
INIT_WORK(&aux->work, bpf_prog_free_deferred);
schedule_work(&aux->work);
}
@@ -2140,6 +2147,12 @@ int __weak skb_copy_bits(const struct sk_buff *skb, int offset, void *to,
return -EFAULT;
}
+int __weak bpf_arch_text_poke(void *ip, enum bpf_text_poke_type t,
+ void *addr1, void *addr2)
+{
+ return -ENOTSUPP;
+}
+
DEFINE_STATIC_KEY_FALSE(bpf_stats_enabled_key);
EXPORT_SYMBOL(bpf_stats_enabled_key);
diff --git a/kernel/bpf/inode.c b/kernel/bpf/inode.c
index a70f7209cda3..ecf42bec38c0 100644
--- a/kernel/bpf/inode.c
+++ b/kernel/bpf/inode.c
@@ -31,10 +31,10 @@ static void *bpf_any_get(void *raw, enum bpf_type type)
{
switch (type) {
case BPF_TYPE_PROG:
- raw = bpf_prog_inc(raw);
+ bpf_prog_inc(raw);
break;
case BPF_TYPE_MAP:
- raw = bpf_map_inc(raw, true);
+ bpf_map_inc_with_uref(raw);
break;
default:
WARN_ON_ONCE(1);
@@ -534,7 +534,8 @@ static struct bpf_prog *__get_prog_inode(struct inode *inode, enum bpf_prog_type
if (!bpf_prog_get_ok(prog, &type, false))
return ERR_PTR(-EINVAL);
- return bpf_prog_inc(prog);
+ bpf_prog_inc(prog);
+ return prog;
}
struct bpf_prog *bpf_prog_get_type_path(const char *name, enum bpf_prog_type type)
diff --git a/kernel/bpf/map_in_map.c b/kernel/bpf/map_in_map.c
index fab4fb134547..4cbe987be35b 100644
--- a/kernel/bpf/map_in_map.c
+++ b/kernel/bpf/map_in_map.c
@@ -98,7 +98,7 @@ void *bpf_map_fd_get_ptr(struct bpf_map *map,
return inner_map;
if (bpf_map_meta_equal(map->inner_map_meta, inner_map))
- inner_map = bpf_map_inc(inner_map, false);
+ bpf_map_inc(inner_map);
else
inner_map = ERR_PTR(-EINVAL);
diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c
index d447b5e343bf..b51ecb9644d0 100644
--- a/kernel/bpf/syscall.c
+++ b/kernel/bpf/syscall.c
@@ -23,6 +23,7 @@
#include <linux/timekeeping.h>
#include <linux/ctype.h>
#include <linux/nospec.h>
+#include <linux/audit.h>
#include <uapi/linux/btf.h>
#define IS_FD_ARRAY(map) ((map)->map_type == BPF_MAP_TYPE_PROG_ARRAY || \
@@ -43,7 +44,7 @@ static DEFINE_SPINLOCK(map_idr_lock);
int sysctl_unprivileged_bpf_disabled __read_mostly;
static const struct bpf_map_ops * const bpf_map_types[] = {
-#define BPF_PROG_TYPE(_id, _ops)
+#define BPF_PROG_TYPE(_id, _name, prog_ctx_type, kern_ctx_type)
#define BPF_MAP_TYPE(_id, _ops) \
[_id] = &_ops,
#include <linux/bpf_types.h>
@@ -127,7 +128,7 @@ static struct bpf_map *find_and_alloc_map(union bpf_attr *attr)
return map;
}
-void *bpf_map_area_alloc(u64 size, int numa_node)
+static void *__bpf_map_area_alloc(u64 size, int numa_node, bool mmapable)
{
/* We really just want to fail instead of triggering OOM killer
* under memory pressure, therefore we set __GFP_NORETRY to kmalloc,
@@ -145,18 +146,33 @@ void *bpf_map_area_alloc(u64 size, int numa_node)
if (size >= SIZE_MAX)
return NULL;
- if (size <= (PAGE_SIZE << PAGE_ALLOC_COSTLY_ORDER)) {
+ /* kmalloc()'ed memory can't be mmap()'ed */
+ if (!mmapable && size <= (PAGE_SIZE << PAGE_ALLOC_COSTLY_ORDER)) {
area = kmalloc_node(size, GFP_USER | __GFP_NORETRY | flags,
numa_node);
if (area != NULL)
return area;
}
-
+ if (mmapable) {
+ BUG_ON(!PAGE_ALIGNED(size));
+ return vmalloc_user_node_flags(size, numa_node, GFP_KERNEL |
+ __GFP_RETRY_MAYFAIL | flags);
+ }
return __vmalloc_node_flags_caller(size, numa_node,
GFP_KERNEL | __GFP_RETRY_MAYFAIL |
flags, __builtin_return_address(0));
}
+void *bpf_map_area_alloc(u64 size, int numa_node)
+{
+ return __bpf_map_area_alloc(size, numa_node, false);
+}
+
+void *bpf_map_area_mmapable_alloc(u64 size, int numa_node)
+{
+ return __bpf_map_area_alloc(size, numa_node, true);
+}
+
void bpf_map_area_free(void *area)
{
kvfree(area);
@@ -314,7 +330,7 @@ static void bpf_map_free_deferred(struct work_struct *work)
static void bpf_map_put_uref(struct bpf_map *map)
{
- if (atomic_dec_and_test(&map->usercnt)) {
+ if (atomic64_dec_and_test(&map->usercnt)) {
if (map->ops->map_release_uref)
map->ops->map_release_uref(map);
}
@@ -325,7 +341,7 @@ static void bpf_map_put_uref(struct bpf_map *map)
*/
static void __bpf_map_put(struct bpf_map *map, bool do_idr_lock)
{
- if (atomic_dec_and_test(&map->refcnt)) {
+ if (atomic64_dec_and_test(&map->refcnt)) {
/* bpf_map_free_id() must be called first */
bpf_map_free_id(map, do_idr_lock);
btf_put(map->btf);
@@ -428,6 +444,74 @@ static ssize_t bpf_dummy_write(struct file *filp, const char __user *buf,
return -EINVAL;
}
+/* called for any extra memory-mapped regions (except initial) */
+static void bpf_map_mmap_open(struct vm_area_struct *vma)
+{
+ struct bpf_map *map = vma->vm_file->private_data;
+
+ bpf_map_inc_with_uref(map);
+
+ if (vma->vm_flags & VM_WRITE) {
+ mutex_lock(&map->freeze_mutex);
+ map->writecnt++;
+ mutex_unlock(&map->freeze_mutex);
+ }
+}
+
+/* called for all unmapped memory region (including initial) */
+static void bpf_map_mmap_close(struct vm_area_struct *vma)
+{
+ struct bpf_map *map = vma->vm_file->private_data;
+
+ if (vma->vm_flags & VM_WRITE) {
+ mutex_lock(&map->freeze_mutex);
+ map->writecnt--;
+ mutex_unlock(&map->freeze_mutex);
+ }
+
+ bpf_map_put_with_uref(map);
+}
+
+static const struct vm_operations_struct bpf_map_default_vmops = {
+ .open = bpf_map_mmap_open,
+ .close = bpf_map_mmap_close,
+};
+
+static int bpf_map_mmap(struct file *filp, struct vm_area_struct *vma)
+{
+ struct bpf_map *map = filp->private_data;
+ int err;
+
+ if (!map->ops->map_mmap || map_value_has_spin_lock(map))
+ return -ENOTSUPP;
+
+ if (!(vma->vm_flags & VM_SHARED))
+ return -EINVAL;
+
+ mutex_lock(&map->freeze_mutex);
+
+ if ((vma->vm_flags & VM_WRITE) && map->frozen) {
+ err = -EPERM;
+ goto out;
+ }
+
+ /* set default open/close callbacks */
+ vma->vm_ops = &bpf_map_default_vmops;
+ vma->vm_private_data = map;
+
+ err = map->ops->map_mmap(map, vma);
+ if (err)
+ goto out;
+
+ bpf_map_inc_with_uref(map);
+
+ if (vma->vm_flags & VM_WRITE)
+ map->writecnt++;
+out:
+ mutex_unlock(&map->freeze_mutex);
+ return err;
+}
+
const struct file_operations bpf_map_fops = {
#ifdef CONFIG_PROC_FS
.show_fdinfo = bpf_map_show_fdinfo,
@@ -435,6 +519,7 @@ const struct file_operations bpf_map_fops = {
.release = bpf_map_release,
.read = bpf_dummy_read,
.write = bpf_dummy_write,
+ .mmap = bpf_map_mmap,
};
int bpf_map_new_fd(struct bpf_map *map, int flags)
@@ -578,8 +663,9 @@ static int map_create(union bpf_attr *attr)
if (err)
goto free_map;
- atomic_set(&map->refcnt, 1);
- atomic_set(&map->usercnt, 1);
+ atomic64_set(&map->refcnt, 1);
+ atomic64_set(&map->usercnt, 1);
+ mutex_init(&map->freeze_mutex);
if (attr->btf_key_type_id || attr->btf_value_type_id) {
struct btf *btf;
@@ -656,21 +742,19 @@ struct bpf_map *__bpf_map_get(struct fd f)
return f.file->private_data;
}
-/* prog's and map's refcnt limit */
-#define BPF_MAX_REFCNT 32768
-
-struct bpf_map *bpf_map_inc(struct bpf_map *map, bool uref)
+void bpf_map_inc(struct bpf_map *map)
{
- if (atomic_inc_return(&map->refcnt) > BPF_MAX_REFCNT) {
- atomic_dec(&map->refcnt);
- return ERR_PTR(-EBUSY);
- }
- if (uref)
- atomic_inc(&map->usercnt);
- return map;
+ atomic64_inc(&map->refcnt);
}
EXPORT_SYMBOL_GPL(bpf_map_inc);
+void bpf_map_inc_with_uref(struct bpf_map *map)
+{
+ atomic64_inc(&map->refcnt);
+ atomic64_inc(&map->usercnt);
+}
+EXPORT_SYMBOL_GPL(bpf_map_inc_with_uref);
+
struct bpf_map *bpf_map_get_with_uref(u32 ufd)
{
struct fd f = fdget(ufd);
@@ -680,38 +764,30 @@ struct bpf_map *bpf_map_get_with_uref(u32 ufd)
if (IS_ERR(map))
return map;
- map = bpf_map_inc(map, true);
+ bpf_map_inc_with_uref(map);
fdput(f);
return map;
}
/* map_idr_lock should have been held */
-static struct bpf_map *__bpf_map_inc_not_zero(struct bpf_map *map,
- bool uref)
+static struct bpf_map *__bpf_map_inc_not_zero(struct bpf_map *map, bool uref)
{
int refold;
- refold = atomic_fetch_add_unless(&map->refcnt, 1, 0);
-
- if (refold >= BPF_MAX_REFCNT) {
- __bpf_map_put(map, false);
- return ERR_PTR(-EBUSY);
- }
-
+ refold = atomic64_fetch_add_unless(&map->refcnt, 1, 0);
if (!refold)
return ERR_PTR(-ENOENT);
-
if (uref)
- atomic_inc(&map->usercnt);
+ atomic64_inc(&map->usercnt);
return map;
}
-struct bpf_map *bpf_map_inc_not_zero(struct bpf_map *map, bool uref)
+struct bpf_map *bpf_map_inc_not_zero(struct bpf_map *map)
{
spin_lock_bh(&map_idr_lock);
- map = __bpf_map_inc_not_zero(map, uref);
+ map = __bpf_map_inc_not_zero(map, false);
spin_unlock_bh(&map_idr_lock);
return map;
@@ -1176,6 +1252,13 @@ static int map_freeze(const union bpf_attr *attr)
map = __bpf_map_get(f);
if (IS_ERR(map))
return PTR_ERR(map);
+
+ mutex_lock(&map->freeze_mutex);
+
+ if (map->writecnt) {
+ err = -EBUSY;
+ goto err_put;
+ }
if (READ_ONCE(map->frozen)) {
err = -EBUSY;
goto err_put;
@@ -1187,12 +1270,13 @@ static int map_freeze(const union bpf_attr *attr)
WRITE_ONCE(map->frozen, true);
err_put:
+ mutex_unlock(&map->freeze_mutex);
fdput(f);
return err;
}
static const struct bpf_prog_ops * const bpf_prog_types[] = {
-#define BPF_PROG_TYPE(_id, _name) \
+#define BPF_PROG_TYPE(_id, _name, prog_ctx_type, kern_ctx_type) \
[_id] = & _name ## _prog_ops,
#define BPF_MAP_TYPE(_id, _ops)
#include <linux/bpf_types.h>
@@ -1238,6 +1322,34 @@ static void free_used_maps(struct bpf_prog_aux *aux)
kfree(aux->used_maps);
}
+enum bpf_event {
+ BPF_EVENT_LOAD,
+ BPF_EVENT_UNLOAD,
+};
+
+static const char * const bpf_event_audit_str[] = {
+ [BPF_EVENT_LOAD] = "LOAD",
+ [BPF_EVENT_UNLOAD] = "UNLOAD",
+};
+
+static void bpf_audit_prog(const struct bpf_prog *prog, enum bpf_event event)
+{
+ bool has_task_context = event == BPF_EVENT_LOAD;
+ struct audit_buffer *ab;
+
+ if (audit_enabled == AUDIT_OFF)
+ return;
+ ab = audit_log_start(audit_context(), GFP_ATOMIC, AUDIT_BPF);
+ if (unlikely(!ab))
+ return;
+ if (has_task_context)
+ audit_log_task(ab);
+ audit_log_format(ab, "%sprog-id=%u event=%s",
+ has_task_context ? " " : "",
+ prog->aux->id, bpf_event_audit_str[event]);
+ audit_log_end(ab);
+}
+
int __bpf_prog_charge(struct user_struct *user, u32 pages)
{
unsigned long memlock_limit = rlimit(RLIMIT_MEMLOCK) >> PAGE_SHIFT;
@@ -1331,6 +1443,7 @@ static void __bpf_prog_put_rcu(struct rcu_head *rcu)
struct bpf_prog_aux *aux = container_of(rcu, struct bpf_prog_aux, rcu);
kvfree(aux->func_info);
+ kfree(aux->func_info_aux);
free_used_maps(aux);
bpf_prog_uncharge_memlock(aux->prog);
security_bpf_prog_free(aux);
@@ -1351,8 +1464,9 @@ static void __bpf_prog_put_noref(struct bpf_prog *prog, bool deferred)
static void __bpf_prog_put(struct bpf_prog *prog, bool do_idr_lock)
{
- if (atomic_dec_and_test(&prog->aux->refcnt)) {
+ if (atomic64_dec_and_test(&prog->aux->refcnt)) {
perf_event_bpf_event(prog, PERF_BPF_EVENT_PROG_UNLOAD, 0);
+ bpf_audit_prog(prog, BPF_EVENT_UNLOAD);
/* bpf_prog_free_id() must be called first */
bpf_prog_free_id(prog, do_idr_lock);
__bpf_prog_put_noref(prog, true);
@@ -1457,13 +1571,9 @@ static struct bpf_prog *____bpf_prog_get(struct fd f)
return f.file->private_data;
}
-struct bpf_prog *bpf_prog_add(struct bpf_prog *prog, int i)
+void bpf_prog_add(struct bpf_prog *prog, int i)
{
- if (atomic_add_return(i, &prog->aux->refcnt) > BPF_MAX_REFCNT) {
- atomic_sub(i, &prog->aux->refcnt);
- return ERR_PTR(-EBUSY);
- }
- return prog;
+ atomic64_add(i, &prog->aux->refcnt);
}
EXPORT_SYMBOL_GPL(bpf_prog_add);
@@ -1474,13 +1584,13 @@ void bpf_prog_sub(struct bpf_prog *prog, int i)
* path holds a reference to the program, thus atomic_sub() can
* be safely used in such cases!
*/
- WARN_ON(atomic_sub_return(i, &prog->aux->refcnt) == 0);
+ WARN_ON(atomic64_sub_return(i, &prog->aux->refcnt) == 0);
}
EXPORT_SYMBOL_GPL(bpf_prog_sub);
-struct bpf_prog *bpf_prog_inc(struct bpf_prog *prog)
+void bpf_prog_inc(struct bpf_prog *prog)
{
- return bpf_prog_add(prog, 1);
+ atomic64_inc(&prog->aux->refcnt);
}
EXPORT_SYMBOL_GPL(bpf_prog_inc);
@@ -1489,12 +1599,7 @@ struct bpf_prog *bpf_prog_inc_not_zero(struct bpf_prog *prog)
{
int refold;
- refold = atomic_fetch_add_unless(&prog->aux->refcnt, 1, 0);
-
- if (refold >= BPF_MAX_REFCNT) {
- __bpf_prog_put(prog, false);
- return ERR_PTR(-EBUSY);
- }
+ refold = atomic64_fetch_add_unless(&prog->aux->refcnt, 1, 0);
if (!refold)
return ERR_PTR(-ENOENT);
@@ -1532,7 +1637,7 @@ static struct bpf_prog *__bpf_prog_get(u32 ufd, enum bpf_prog_type *attach_type,
goto out;
}
- prog = bpf_prog_inc(prog);
+ bpf_prog_inc(prog);
out:
fdput(f);
return prog;
@@ -1579,7 +1684,7 @@ static void bpf_prog_load_fixup_attach_type(union bpf_attr *attr)
static int
bpf_prog_load_check_attach(enum bpf_prog_type prog_type,
enum bpf_attach_type expected_attach_type,
- u32 btf_id)
+ u32 btf_id, u32 prog_fd)
{
switch (prog_type) {
case BPF_PROG_TYPE_TRACING:
@@ -1587,7 +1692,7 @@ bpf_prog_load_check_attach(enum bpf_prog_type prog_type,
return -EINVAL;
break;
default:
- if (btf_id)
+ if (btf_id || prog_fd)
return -EINVAL;
break;
}
@@ -1638,7 +1743,7 @@ bpf_prog_load_check_attach(enum bpf_prog_type prog_type,
}
/* last field in 'union bpf_attr' used by this command */
-#define BPF_PROG_LOAD_LAST_FIELD attach_btf_id
+#define BPF_PROG_LOAD_LAST_FIELD attach_prog_fd
static int bpf_prog_load(union bpf_attr *attr, union bpf_attr __user *uattr)
{
@@ -1681,7 +1786,8 @@ static int bpf_prog_load(union bpf_attr *attr, union bpf_attr __user *uattr)
bpf_prog_load_fixup_attach_type(attr);
if (bpf_prog_load_check_attach(type, attr->expected_attach_type,
- attr->attach_btf_id))
+ attr->attach_btf_id,
+ attr->attach_prog_fd))
return -EINVAL;
/* plain bpf_prog allocation */
@@ -1691,6 +1797,16 @@ static int bpf_prog_load(union bpf_attr *attr, union bpf_attr __user *uattr)
prog->expected_attach_type = attr->expected_attach_type;
prog->aux->attach_btf_id = attr->attach_btf_id;
+ if (attr->attach_prog_fd) {
+ struct bpf_prog *tgt_prog;
+
+ tgt_prog = bpf_prog_get(attr->attach_prog_fd);
+ if (IS_ERR(tgt_prog)) {
+ err = PTR_ERR(tgt_prog);
+ goto free_prog_nouncharge;
+ }
+ prog->aux->linked_prog = tgt_prog;
+ }
prog->aux->offload_requested = !!attr->prog_ifindex;
@@ -1712,7 +1828,7 @@ static int bpf_prog_load(union bpf_attr *attr, union bpf_attr __user *uattr)
prog->orig_prog = NULL;
prog->jited = 0;
- atomic_set(&prog->aux->refcnt, 1);
+ atomic64_set(&prog->aux->refcnt, 1);
prog->gpl_compatible = is_gpl ? 1 : 0;
if (bpf_prog_is_dev_bound(prog->aux)) {
@@ -1760,6 +1876,7 @@ static int bpf_prog_load(union bpf_attr *attr, union bpf_attr __user *uattr)
*/
bpf_prog_kallsyms_add(prog);
perf_event_bpf_event(prog, PERF_BPF_EVENT_PROG_LOAD, 0);
+ bpf_audit_prog(prog, BPF_EVENT_LOAD);
err = bpf_prog_new_fd(prog);
if (err < 0)
@@ -1802,6 +1919,49 @@ static int bpf_obj_get(const union bpf_attr *attr)
attr->file_flags);
}
+static int bpf_tracing_prog_release(struct inode *inode, struct file *filp)
+{
+ struct bpf_prog *prog = filp->private_data;
+
+ WARN_ON_ONCE(bpf_trampoline_unlink_prog(prog));
+ bpf_prog_put(prog);
+ return 0;
+}
+
+static const struct file_operations bpf_tracing_prog_fops = {
+ .release = bpf_tracing_prog_release,
+ .read = bpf_dummy_read,
+ .write = bpf_dummy_write,
+};
+
+static int bpf_tracing_prog_attach(struct bpf_prog *prog)
+{
+ int tr_fd, err;
+
+ if (prog->expected_attach_type != BPF_TRACE_FENTRY &&
+ prog->expected_attach_type != BPF_TRACE_FEXIT) {
+ err = -EINVAL;
+ goto out_put_prog;
+ }
+
+ err = bpf_trampoline_link_prog(prog);
+ if (err)
+ goto out_put_prog;
+
+ tr_fd = anon_inode_getfd("bpf-tracing-prog", &bpf_tracing_prog_fops,
+ prog, O_CLOEXEC);
+ if (tr_fd < 0) {
+ WARN_ON_ONCE(bpf_trampoline_unlink_prog(prog));
+ err = tr_fd;
+ goto out_put_prog;
+ }
+ return tr_fd;
+
+out_put_prog:
+ bpf_prog_put(prog);
+ return err;
+}
+
struct bpf_raw_tracepoint {
struct bpf_raw_event_map *btp;
struct bpf_prog *prog;
@@ -1853,14 +2013,16 @@ static int bpf_raw_tracepoint_open(const union bpf_attr *attr)
if (prog->type == BPF_PROG_TYPE_TRACING) {
if (attr->raw_tracepoint.name) {
- /* raw_tp name should not be specified in raw_tp
- * programs that were verified via in-kernel BTF info
+ /* The attach point for this category of programs
+ * should be specified via btf_id during program load.
*/
err = -EINVAL;
goto out_put_prog;
}
- /* raw_tp name is taken from type name instead */
- tp_name = prog->aux->attach_func_name;
+ if (prog->expected_attach_type == BPF_TRACE_RAW_TP)
+ tp_name = prog->aux->attach_func_name;
+ else
+ return bpf_tracing_prog_attach(prog);
} else {
if (strncpy_from_user(buf,
u64_to_user_ptr(attr->raw_tracepoint.name),
diff --git a/kernel/bpf/trampoline.c b/kernel/bpf/trampoline.c
new file mode 100644
index 000000000000..10ae59d65f13
--- /dev/null
+++ b/kernel/bpf/trampoline.c
@@ -0,0 +1,253 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/* Copyright (c) 2019 Facebook */
+#include <linux/hash.h>
+#include <linux/bpf.h>
+#include <linux/filter.h>
+
+/* btf_vmlinux has ~22k attachable functions. 1k htab is enough. */
+#define TRAMPOLINE_HASH_BITS 10
+#define TRAMPOLINE_TABLE_SIZE (1 << TRAMPOLINE_HASH_BITS)
+
+static struct hlist_head trampoline_table[TRAMPOLINE_TABLE_SIZE];
+
+/* serializes access to trampoline_table */
+static DEFINE_MUTEX(trampoline_mutex);
+
+struct bpf_trampoline *bpf_trampoline_lookup(u64 key)
+{
+ struct bpf_trampoline *tr;
+ struct hlist_head *head;
+ void *image;
+ int i;
+
+ mutex_lock(&trampoline_mutex);
+ head = &trampoline_table[hash_64(key, TRAMPOLINE_HASH_BITS)];
+ hlist_for_each_entry(tr, head, hlist) {
+ if (tr->key == key) {
+ refcount_inc(&tr->refcnt);
+ goto out;
+ }
+ }
+ tr = kzalloc(sizeof(*tr), GFP_KERNEL);
+ if (!tr)
+ goto out;
+
+ /* is_root was checked earlier. No need for bpf_jit_charge_modmem() */
+ image = bpf_jit_alloc_exec(PAGE_SIZE);
+ if (!image) {
+ kfree(tr);
+ tr = NULL;
+ goto out;
+ }
+
+ tr->key = key;
+ INIT_HLIST_NODE(&tr->hlist);
+ hlist_add_head(&tr->hlist, head);
+ refcount_set(&tr->refcnt, 1);
+ mutex_init(&tr->mutex);
+ for (i = 0; i < BPF_TRAMP_MAX; i++)
+ INIT_HLIST_HEAD(&tr->progs_hlist[i]);
+
+ set_vm_flush_reset_perms(image);
+ /* Keep image as writeable. The alternative is to keep flipping ro/rw
+ * everytime new program is attached or detached.
+ */
+ set_memory_x((long)image, 1);
+ tr->image = image;
+out:
+ mutex_unlock(&trampoline_mutex);
+ return tr;
+}
+
+/* Each call __bpf_prog_enter + call bpf_func + call __bpf_prog_exit is ~50
+ * bytes on x86. Pick a number to fit into PAGE_SIZE / 2
+ */
+#define BPF_MAX_TRAMP_PROGS 40
+
+static int bpf_trampoline_update(struct bpf_trampoline *tr)
+{
+ void *old_image = tr->image + ((tr->selector + 1) & 1) * PAGE_SIZE/2;
+ void *new_image = tr->image + (tr->selector & 1) * PAGE_SIZE/2;
+ struct bpf_prog *progs_to_run[BPF_MAX_TRAMP_PROGS];
+ int fentry_cnt = tr->progs_cnt[BPF_TRAMP_FENTRY];
+ int fexit_cnt = tr->progs_cnt[BPF_TRAMP_FEXIT];
+ struct bpf_prog **progs, **fentry, **fexit;
+ u32 flags = BPF_TRAMP_F_RESTORE_REGS;
+ struct bpf_prog_aux *aux;
+ int err;
+
+ if (fentry_cnt + fexit_cnt == 0) {
+ err = bpf_arch_text_poke(tr->func.addr, BPF_MOD_CALL_TO_NOP,
+ old_image, NULL);
+ tr->selector = 0;
+ goto out;
+ }
+
+ /* populate fentry progs */
+ fentry = progs = progs_to_run;
+ hlist_for_each_entry(aux, &tr->progs_hlist[BPF_TRAMP_FENTRY], tramp_hlist)
+ *progs++ = aux->prog;
+
+ /* populate fexit progs */
+ fexit = progs;
+ hlist_for_each_entry(aux, &tr->progs_hlist[BPF_TRAMP_FEXIT], tramp_hlist)
+ *progs++ = aux->prog;
+
+ if (fexit_cnt)
+ flags = BPF_TRAMP_F_CALL_ORIG | BPF_TRAMP_F_SKIP_FRAME;
+
+ err = arch_prepare_bpf_trampoline(new_image, &tr->func.model, flags,
+ fentry, fentry_cnt,
+ fexit, fexit_cnt,
+ tr->func.addr);
+ if (err)
+ goto out;
+
+ if (tr->selector)
+ /* progs already running at this address */
+ err = bpf_arch_text_poke(tr->func.addr, BPF_MOD_CALL_TO_CALL,
+ old_image, new_image);
+ else
+ /* first time registering */
+ err = bpf_arch_text_poke(tr->func.addr, BPF_MOD_NOP_TO_CALL,
+ NULL, new_image);
+ if (err)
+ goto out;
+ tr->selector++;
+out:
+ return err;
+}
+
+static enum bpf_tramp_prog_type bpf_attach_type_to_tramp(enum bpf_attach_type t)
+{
+ switch (t) {
+ case BPF_TRACE_FENTRY:
+ return BPF_TRAMP_FENTRY;
+ default:
+ return BPF_TRAMP_FEXIT;
+ }
+}
+
+int bpf_trampoline_link_prog(struct bpf_prog *prog)
+{
+ enum bpf_tramp_prog_type kind;
+ struct bpf_trampoline *tr;
+ int err = 0;
+
+ tr = prog->aux->trampoline;
+ kind = bpf_attach_type_to_tramp(prog->expected_attach_type);
+ mutex_lock(&tr->mutex);
+ if (tr->progs_cnt[BPF_TRAMP_FENTRY] + tr->progs_cnt[BPF_TRAMP_FEXIT]
+ >= BPF_MAX_TRAMP_PROGS) {
+ err = -E2BIG;
+ goto out;
+ }
+ if (!hlist_unhashed(&prog->aux->tramp_hlist)) {
+ /* prog already linked */
+ err = -EBUSY;
+ goto out;
+ }
+ hlist_add_head(&prog->aux->tramp_hlist, &tr->progs_hlist[kind]);
+ tr->progs_cnt[kind]++;
+ err = bpf_trampoline_update(prog->aux->trampoline);
+ if (err) {
+ hlist_del(&prog->aux->tramp_hlist);
+ tr->progs_cnt[kind]--;
+ }
+out:
+ mutex_unlock(&tr->mutex);
+ return err;
+}
+
+/* bpf_trampoline_unlink_prog() should never fail. */
+int bpf_trampoline_unlink_prog(struct bpf_prog *prog)
+{
+ enum bpf_tramp_prog_type kind;
+ struct bpf_trampoline *tr;
+ int err;
+
+ tr = prog->aux->trampoline;
+ kind = bpf_attach_type_to_tramp(prog->expected_attach_type);
+ mutex_lock(&tr->mutex);
+ hlist_del(&prog->aux->tramp_hlist);
+ tr->progs_cnt[kind]--;
+ err = bpf_trampoline_update(prog->aux->trampoline);
+ mutex_unlock(&tr->mutex);
+ return err;
+}
+
+void bpf_trampoline_put(struct bpf_trampoline *tr)
+{
+ if (!tr)
+ return;
+ mutex_lock(&trampoline_mutex);
+ if (!refcount_dec_and_test(&tr->refcnt))
+ goto out;
+ WARN_ON_ONCE(mutex_is_locked(&tr->mutex));
+ if (WARN_ON_ONCE(!hlist_empty(&tr->progs_hlist[BPF_TRAMP_FENTRY])))
+ goto out;
+ if (WARN_ON_ONCE(!hlist_empty(&tr->progs_hlist[BPF_TRAMP_FEXIT])))
+ goto out;
+ bpf_jit_free_exec(tr->image);
+ hlist_del(&tr->hlist);
+ kfree(tr);
+out:
+ mutex_unlock(&trampoline_mutex);
+}
+
+/* The logic is similar to BPF_PROG_RUN, but with explicit rcu and preempt that
+ * are needed for trampoline. The macro is split into
+ * call _bpf_prog_enter
+ * call prog->bpf_func
+ * call __bpf_prog_exit
+ */
+u64 notrace __bpf_prog_enter(void)
+{
+ u64 start = 0;
+
+ rcu_read_lock();
+ preempt_disable();
+ if (static_branch_unlikely(&bpf_stats_enabled_key))
+ start = sched_clock();
+ return start;
+}
+
+void notrace __bpf_prog_exit(struct bpf_prog *prog, u64 start)
+{
+ struct bpf_prog_stats *stats;
+
+ if (static_branch_unlikely(&bpf_stats_enabled_key) &&
+ /* static_key could be enabled in __bpf_prog_enter
+ * and disabled in __bpf_prog_exit.
+ * And vice versa.
+ * Hence check that 'start' is not zero.
+ */
+ start) {
+ stats = this_cpu_ptr(prog->aux->stats);
+ u64_stats_update_begin(&stats->syncp);
+ stats->cnt++;
+ stats->nsecs += sched_clock() - start;
+ u64_stats_update_end(&stats->syncp);
+ }
+ preempt_enable();
+ rcu_read_unlock();
+}
+
+int __weak
+arch_prepare_bpf_trampoline(void *image, struct btf_func_model *m, u32 flags,
+ struct bpf_prog **fentry_progs, int fentry_cnt,
+ struct bpf_prog **fexit_progs, int fexit_cnt,
+ void *orig_call)
+{
+ return -ENOTSUPP;
+}
+
+static int __init init_trampolines(void)
+{
+ int i;
+
+ for (i = 0; i < TRAMPOLINE_TABLE_SIZE; i++)
+ INIT_HLIST_HEAD(&trampoline_table[i]);
+ return 0;
+}
+late_initcall(init_trampolines);
diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c
index 2f2374967b36..9f59f7a19dd0 100644
--- a/kernel/bpf/verifier.c
+++ b/kernel/bpf/verifier.c
@@ -23,7 +23,7 @@
#include "disasm.h"
static const struct bpf_verifier_ops * const bpf_verifier_ops[] = {
-#define BPF_PROG_TYPE(_id, _name) \
+#define BPF_PROG_TYPE(_id, _name, prog_ctx_type, kern_ctx_type) \
[_id] = & _name ## _verifier_ops,
#define BPF_MAP_TYPE(_id, _ops)
#include <linux/bpf_types.h>
@@ -3970,6 +3970,9 @@ static int check_func_call(struct bpf_verifier_env *env, struct bpf_insn *insn,
/* only increment it after check_reg_arg() finished */
state->curframe++;
+ if (btf_check_func_arg_match(env, subprog))
+ return -EINVAL;
+
/* and go analyze first insn of the callee */
*insn_idx = target_insn;
@@ -4147,11 +4150,9 @@ static int check_helper_call(struct bpf_verifier_env *env, int func_id, int insn
meta.func_id = func_id;
/* check args */
for (i = 0; i < 5; i++) {
- if (fn->arg_type[i] == ARG_PTR_TO_BTF_ID) {
- if (!fn->btf_id[i])
- fn->btf_id[i] = btf_resolve_helper_id(&env->log, fn->func, i);
- meta.btf_id = fn->btf_id[i];
- }
+ err = btf_resolve_helper_id(&env->log, fn, i);
+ if (err > 0)
+ meta.btf_id = err;
err = check_func_arg(env, BPF_REG_1 + i, fn->arg_type[i], &meta);
if (err)
return err;
@@ -6566,6 +6567,7 @@ static int check_btf_func(struct bpf_verifier_env *env,
u32 i, nfuncs, urec_size, min_size;
u32 krec_size = sizeof(struct bpf_func_info);
struct bpf_func_info *krecord;
+ struct bpf_func_info_aux *info_aux = NULL;
const struct btf_type *type;
struct bpf_prog *prog;
const struct btf *btf;
@@ -6599,6 +6601,9 @@ static int check_btf_func(struct bpf_verifier_env *env,
krecord = kvcalloc(nfuncs, krec_size, GFP_KERNEL | __GFP_NOWARN);
if (!krecord)
return -ENOMEM;
+ info_aux = kcalloc(nfuncs, sizeof(*info_aux), GFP_KERNEL | __GFP_NOWARN);
+ if (!info_aux)
+ goto err_free;
for (i = 0; i < nfuncs; i++) {
ret = bpf_check_uarg_tail_zero(urecord, krec_size, urec_size);
@@ -6650,29 +6655,31 @@ static int check_btf_func(struct bpf_verifier_env *env,
ret = -EINVAL;
goto err_free;
}
-
prev_offset = krecord[i].insn_off;
urecord += urec_size;
}
prog->aux->func_info = krecord;
prog->aux->func_info_cnt = nfuncs;
+ prog->aux->func_info_aux = info_aux;
return 0;
err_free:
kvfree(krecord);
+ kfree(info_aux);
return ret;
}
static void adjust_btf_func(struct bpf_verifier_env *env)
{
+ struct bpf_prog_aux *aux = env->prog->aux;
int i;
- if (!env->prog->aux->func_info)
+ if (!aux->func_info)
return;
for (i = 0; i < env->subprog_cnt; i++)
- env->prog->aux->func_info[i].insn_off = env->subprog_info[i].start;
+ aux->func_info[i].insn_off = env->subprog_info[i].start;
}
#define MIN_BPF_LINEINFO_SIZE (offsetof(struct bpf_line_info, line_col) + \
@@ -7653,6 +7660,9 @@ static int do_check(struct bpf_verifier_env *env)
0 /* frameno */,
0 /* subprogno, zero == main subprog */);
+ if (btf_check_func_arg_match(env, 0))
+ return -EINVAL;
+
for (;;) {
struct bpf_insn *insn;
u8 class;
@@ -8169,11 +8179,7 @@ static int replace_map_fd_with_map_ptr(struct bpf_verifier_env *env)
* will be used by the valid program until it's unloaded
* and all maps are released in free_used_maps()
*/
- map = bpf_map_inc(map, false);
- if (IS_ERR(map)) {
- fdput(f);
- return PTR_ERR(map);
- }
+ bpf_map_inc(map);
aux->map_index = env->used_map_cnt;
env->used_maps[env->used_map_cnt++] = map;
@@ -9380,10 +9386,17 @@ static void print_verification_stats(struct bpf_verifier_env *env)
static int check_attach_btf_id(struct bpf_verifier_env *env)
{
struct bpf_prog *prog = env->prog;
+ struct bpf_prog *tgt_prog = prog->aux->linked_prog;
u32 btf_id = prog->aux->attach_btf_id;
const char prefix[] = "btf_trace_";
+ int ret = 0, subprog = -1, i;
+ struct bpf_trampoline *tr;
const struct btf_type *t;
+ bool conservative = true;
const char *tname;
+ struct btf *btf;
+ long addr;
+ u64 key;
if (prog->type != BPF_PROG_TYPE_TRACING)
return 0;
@@ -9392,19 +9405,47 @@ static int check_attach_btf_id(struct bpf_verifier_env *env)
verbose(env, "Tracing programs must provide btf_id\n");
return -EINVAL;
}
- t = btf_type_by_id(btf_vmlinux, btf_id);
+ btf = bpf_prog_get_target_btf(prog);
+ if (!btf) {
+ verbose(env,
+ "FENTRY/FEXIT program can only be attached to another program annotated with BTF\n");
+ return -EINVAL;
+ }
+ t = btf_type_by_id(btf, btf_id);
if (!t) {
verbose(env, "attach_btf_id %u is invalid\n", btf_id);
return -EINVAL;
}
- tname = btf_name_by_offset(btf_vmlinux, t->name_off);
+ tname = btf_name_by_offset(btf, t->name_off);
if (!tname) {
verbose(env, "attach_btf_id %u doesn't have a name\n", btf_id);
return -EINVAL;
}
+ if (tgt_prog) {
+ struct bpf_prog_aux *aux = tgt_prog->aux;
+
+ for (i = 0; i < aux->func_info_cnt; i++)
+ if (aux->func_info[i].type_id == btf_id) {
+ subprog = i;
+ break;
+ }
+ if (subprog == -1) {
+ verbose(env, "Subprog %s doesn't exist\n", tname);
+ return -EINVAL;
+ }
+ conservative = aux->func_info_aux[subprog].unreliable;
+ key = ((u64)aux->id) << 32 | btf_id;
+ } else {
+ key = btf_id;
+ }
switch (prog->expected_attach_type) {
case BPF_TRACE_RAW_TP:
+ if (tgt_prog) {
+ verbose(env,
+ "Only FENTRY/FEXIT progs are attachable to another BPF prog\n");
+ return -EINVAL;
+ }
if (!btf_type_is_typedef(t)) {
verbose(env, "attach_btf_id %u is not a typedef\n",
btf_id);
@@ -9416,11 +9457,11 @@ static int check_attach_btf_id(struct bpf_verifier_env *env)
return -EINVAL;
}
tname += sizeof(prefix) - 1;
- t = btf_type_by_id(btf_vmlinux, t->type);
+ t = btf_type_by_id(btf, t->type);
if (!btf_type_is_ptr(t))
/* should never happen in valid vmlinux build */
return -EINVAL;
- t = btf_type_by_id(btf_vmlinux, t->type);
+ t = btf_type_by_id(btf, t->type);
if (!btf_type_is_func_proto(t))
/* should never happen in valid vmlinux build */
return -EINVAL;
@@ -9432,6 +9473,66 @@ static int check_attach_btf_id(struct bpf_verifier_env *env)
prog->aux->attach_func_proto = t;
prog->aux->attach_btf_trace = true;
return 0;
+ case BPF_TRACE_FENTRY:
+ case BPF_TRACE_FEXIT:
+ if (!btf_type_is_func(t)) {
+ verbose(env, "attach_btf_id %u is not a function\n",
+ btf_id);
+ return -EINVAL;
+ }
+ t = btf_type_by_id(btf, t->type);
+ if (!btf_type_is_func_proto(t))
+ return -EINVAL;
+ tr = bpf_trampoline_lookup(key);
+ if (!tr)
+ return -ENOMEM;
+ prog->aux->attach_func_name = tname;
+ /* t is either vmlinux type or another program's type */
+ prog->aux->attach_func_proto = t;
+ mutex_lock(&tr->mutex);
+ if (tr->func.addr) {
+ prog->aux->trampoline = tr;
+ goto out;
+ }
+ if (tgt_prog && conservative) {
+ prog->aux->attach_func_proto = NULL;
+ t = NULL;
+ }
+ ret = btf_distill_func_proto(&env->log, btf, t,
+ tname, &tr->func.model);
+ if (ret < 0)
+ goto out;
+ if (tgt_prog) {
+ if (!tgt_prog->jited) {
+ /* for now */
+ verbose(env, "Can trace only JITed BPF progs\n");
+ ret = -EINVAL;
+ goto out;
+ }
+ if (tgt_prog->type == BPF_PROG_TYPE_TRACING) {
+ /* prevent cycles */
+ verbose(env, "Cannot recursively attach\n");
+ ret = -EINVAL;
+ goto out;
+ }
+ addr = (long) tgt_prog->aux->func[subprog]->bpf_func;
+ } else {
+ addr = kallsyms_lookup_name(tname);
+ if (!addr) {
+ verbose(env,
+ "The address of function %s cannot be found\n",
+ tname);
+ ret = -ENOENT;
+ goto out;
+ }
+ }
+ tr->func.addr = (void *)addr;
+ prog->aux->trampoline = tr;
+out:
+ mutex_unlock(&tr->mutex);
+ if (ret)
+ bpf_trampoline_put(tr);
+ return ret;
default:
return -EINVAL;
}
diff --git a/kernel/bpf/xskmap.c b/kernel/bpf/xskmap.c
index da16c30868f3..90c4fce1c981 100644
--- a/kernel/bpf/xskmap.c
+++ b/kernel/bpf/xskmap.c
@@ -11,10 +11,8 @@
int xsk_map_inc(struct xsk_map *map)
{
- struct bpf_map *m = &map->map;
-
- m = bpf_map_inc(m, false);
- return PTR_ERR_OR_ZERO(m);
+ bpf_map_inc(&map->map);
+ return 0;
}
void xsk_map_put(struct xsk_map *map)