aboutsummaryrefslogtreecommitdiff
path: root/kernel
diff options
context:
space:
mode:
authorDaniel Borkmann2016-09-09 02:45:31 +0200
committerDavid S. Miller2016-09-09 19:36:04 -0700
commitf3694e00123802d688180e7ae90b240669910e3c (patch)
tree321a9b95e9df3e64adbc8340a5f63a778db69e70 /kernel
parent374fb54eeaaa6b2cb82bca73a11273687bb2a96a (diff)
bpf: add BPF_CALL_x macros for declaring helpers
This work adds BPF_CALL_<n>() macros and converts all the eBPF helper functions to use them, in a similar fashion like we do with SYSCALL_DEFINE<n>() macros that are used today. Motivation for this is to hide all the register handling and all necessary casts from the user, so that it is done automatically in the background when adding a BPF_CALL_<n>() call. This makes current helpers easier to review, eases to write future helpers, avoids getting the casting mess wrong, and allows for extending all helpers at once (f.e. build time checks, etc). It also helps detecting more easily in code reviews that unused registers are not instrumented in the code by accident, breaking compatibility with existing programs. BPF_CALL_<n>() internals are quite similar to SYSCALL_DEFINE<n>() ones with some fundamental differences, for example, for generating the actual helper function that carries all u64 regs, we need to fill unused regs, so that we always end up with 5 u64 regs as an argument. I reviewed several 0-5 generated BPF_CALL_<n>() variants of the .i results and they look all as expected. No sparse issue spotted. We let this also sit for a few days with Fengguang's kbuild test robot, and there were no issues seen. On s390, it barked on the "uses dynamic stack allocation" notice, which is an old one from bpf_perf_event_output{,_tp}() reappearing here due to the conversion to the call wrapper, just telling that the perf raw record/frag sits on stack (gcc with s390's -mwarn-dynamicstack), but that's all. Did various runtime tests and they were fine as well. All eBPF helpers are now converted to use these macros, getting rid of a good chunk of all the raw castings. Signed-off-by: Daniel Borkmann <daniel@iogearbox.net> Acked-by: Alexei Starovoitov <ast@kernel.org> Signed-off-by: David S. Miller <davem@davemloft.net>
Diffstat (limited to 'kernel')
-rw-r--r--kernel/bpf/core.c2
-rw-r--r--kernel/bpf/helpers.c46
-rw-r--r--kernel/bpf/stackmap.c5
-rw-r--r--kernel/trace/bpf_trace.c75
4 files changed, 51 insertions, 77 deletions
diff --git a/kernel/bpf/core.c b/kernel/bpf/core.c
index 03fd23d4d587..7b7baaed9ed4 100644
--- a/kernel/bpf/core.c
+++ b/kernel/bpf/core.c
@@ -1018,7 +1018,7 @@ void bpf_user_rnd_init_once(void)
prandom_init_once(&bpf_user_rnd_state);
}
-u64 bpf_user_rnd_u32(u64 r1, u64 r2, u64 r3, u64 r4, u64 r5)
+BPF_CALL_0(bpf_user_rnd_u32)
{
/* Should someone ever have the rather unwise idea to use some
* of the registers passed into this function, then note that
diff --git a/kernel/bpf/helpers.c b/kernel/bpf/helpers.c
index 6df73bd1ba34..a5b8bf8cfcfd 100644
--- a/kernel/bpf/helpers.c
+++ b/kernel/bpf/helpers.c
@@ -16,6 +16,7 @@
#include <linux/ktime.h>
#include <linux/sched.h>
#include <linux/uidgid.h>
+#include <linux/filter.h>
/* If kernel subsystem is allowing eBPF programs to call this function,
* inside its own verifier_ops->get_func_proto() callback it should return
@@ -26,24 +27,10 @@
* if program is allowed to access maps, so check rcu_read_lock_held in
* all three functions.
*/
-static u64 bpf_map_lookup_elem(u64 r1, u64 r2, u64 r3, u64 r4, u64 r5)
+BPF_CALL_2(bpf_map_lookup_elem, struct bpf_map *, map, void *, key)
{
- /* verifier checked that R1 contains a valid pointer to bpf_map
- * and R2 points to a program stack and map->key_size bytes were
- * initialized
- */
- struct bpf_map *map = (struct bpf_map *) (unsigned long) r1;
- void *key = (void *) (unsigned long) r2;
- void *value;
-
WARN_ON_ONCE(!rcu_read_lock_held());
-
- value = map->ops->map_lookup_elem(map, key);
-
- /* lookup() returns either pointer to element value or NULL
- * which is the meaning of PTR_TO_MAP_VALUE_OR_NULL type
- */
- return (unsigned long) value;
+ return (unsigned long) map->ops->map_lookup_elem(map, key);
}
const struct bpf_func_proto bpf_map_lookup_elem_proto = {
@@ -54,15 +41,11 @@ const struct bpf_func_proto bpf_map_lookup_elem_proto = {
.arg2_type = ARG_PTR_TO_MAP_KEY,
};
-static u64 bpf_map_update_elem(u64 r1, u64 r2, u64 r3, u64 r4, u64 r5)
+BPF_CALL_4(bpf_map_update_elem, struct bpf_map *, map, void *, key,
+ void *, value, u64, flags)
{
- struct bpf_map *map = (struct bpf_map *) (unsigned long) r1;
- void *key = (void *) (unsigned long) r2;
- void *value = (void *) (unsigned long) r3;
-
WARN_ON_ONCE(!rcu_read_lock_held());
-
- return map->ops->map_update_elem(map, key, value, r4);
+ return map->ops->map_update_elem(map, key, value, flags);
}
const struct bpf_func_proto bpf_map_update_elem_proto = {
@@ -75,13 +58,9 @@ const struct bpf_func_proto bpf_map_update_elem_proto = {
.arg4_type = ARG_ANYTHING,
};
-static u64 bpf_map_delete_elem(u64 r1, u64 r2, u64 r3, u64 r4, u64 r5)
+BPF_CALL_2(bpf_map_delete_elem, struct bpf_map *, map, void *, key)
{
- struct bpf_map *map = (struct bpf_map *) (unsigned long) r1;
- void *key = (void *) (unsigned long) r2;
-
WARN_ON_ONCE(!rcu_read_lock_held());
-
return map->ops->map_delete_elem(map, key);
}
@@ -99,7 +78,7 @@ const struct bpf_func_proto bpf_get_prandom_u32_proto = {
.ret_type = RET_INTEGER,
};
-static u64 bpf_get_smp_processor_id(u64 r1, u64 r2, u64 r3, u64 r4, u64 r5)
+BPF_CALL_0(bpf_get_smp_processor_id)
{
return smp_processor_id();
}
@@ -110,7 +89,7 @@ const struct bpf_func_proto bpf_get_smp_processor_id_proto = {
.ret_type = RET_INTEGER,
};
-static u64 bpf_ktime_get_ns(u64 r1, u64 r2, u64 r3, u64 r4, u64 r5)
+BPF_CALL_0(bpf_ktime_get_ns)
{
/* NMI safe access to clock monotonic */
return ktime_get_mono_fast_ns();
@@ -122,7 +101,7 @@ const struct bpf_func_proto bpf_ktime_get_ns_proto = {
.ret_type = RET_INTEGER,
};
-static u64 bpf_get_current_pid_tgid(u64 r1, u64 r2, u64 r3, u64 r4, u64 r5)
+BPF_CALL_0(bpf_get_current_pid_tgid)
{
struct task_struct *task = current;
@@ -138,7 +117,7 @@ const struct bpf_func_proto bpf_get_current_pid_tgid_proto = {
.ret_type = RET_INTEGER,
};
-static u64 bpf_get_current_uid_gid(u64 r1, u64 r2, u64 r3, u64 r4, u64 r5)
+BPF_CALL_0(bpf_get_current_uid_gid)
{
struct task_struct *task = current;
kuid_t uid;
@@ -158,10 +137,9 @@ const struct bpf_func_proto bpf_get_current_uid_gid_proto = {
.ret_type = RET_INTEGER,
};
-static u64 bpf_get_current_comm(u64 r1, u64 size, u64 r3, u64 r4, u64 r5)
+BPF_CALL_2(bpf_get_current_comm, char *, buf, u32, size)
{
struct task_struct *task = current;
- char *buf = (char *) (long) r1;
if (unlikely(!task))
goto err_clear;
diff --git a/kernel/bpf/stackmap.c b/kernel/bpf/stackmap.c
index bf4495fcd25d..732ae16d12b7 100644
--- a/kernel/bpf/stackmap.c
+++ b/kernel/bpf/stackmap.c
@@ -116,10 +116,9 @@ free_smap:
return ERR_PTR(err);
}
-u64 bpf_get_stackid(u64 r1, u64 r2, u64 flags, u64 r4, u64 r5)
+BPF_CALL_3(bpf_get_stackid, struct pt_regs *, regs, struct bpf_map *, map,
+ u64, flags)
{
- struct pt_regs *regs = (struct pt_regs *) (long) r1;
- struct bpf_map *map = (struct bpf_map *) (long) r2;
struct bpf_stack_map *smap = container_of(map, struct bpf_stack_map, map);
struct perf_callchain_entry *trace;
struct stack_map_bucket *bucket, *new_bucket, *old_bucket;
diff --git a/kernel/trace/bpf_trace.c b/kernel/trace/bpf_trace.c
index e63d7d435796..5dcb99281259 100644
--- a/kernel/trace/bpf_trace.c
+++ b/kernel/trace/bpf_trace.c
@@ -61,11 +61,9 @@ unsigned int trace_call_bpf(struct bpf_prog *prog, void *ctx)
}
EXPORT_SYMBOL_GPL(trace_call_bpf);
-static u64 bpf_probe_read(u64 r1, u64 r2, u64 r3, u64 r4, u64 r5)
+BPF_CALL_3(bpf_probe_read, void *, dst, u32, size, const void *, unsafe_ptr)
{
- void *dst = (void *) (long) r1;
- int ret, size = (int) r2;
- void *unsafe_ptr = (void *) (long) r3;
+ int ret;
ret = probe_kernel_read(dst, unsafe_ptr, size);
if (unlikely(ret < 0))
@@ -83,12 +81,9 @@ static const struct bpf_func_proto bpf_probe_read_proto = {
.arg3_type = ARG_ANYTHING,
};
-static u64 bpf_probe_write_user(u64 r1, u64 r2, u64 r3, u64 r4, u64 r5)
+BPF_CALL_3(bpf_probe_write_user, void *, unsafe_ptr, const void *, src,
+ u32, size)
{
- void *unsafe_ptr = (void *) (long) r1;
- void *src = (void *) (long) r2;
- int size = (int) r3;
-
/*
* Ensure we're in user context which is safe for the helper to
* run. This helper has no business in a kthread.
@@ -130,9 +125,9 @@ static const struct bpf_func_proto *bpf_get_probe_write_proto(void)
* limited trace_printk()
* only %d %u %x %ld %lu %lx %lld %llu %llx %p %s conversion specifiers allowed
*/
-static u64 bpf_trace_printk(u64 r1, u64 fmt_size, u64 r3, u64 r4, u64 r5)
+BPF_CALL_5(bpf_trace_printk, char *, fmt, u32, fmt_size, u64, arg1,
+ u64, arg2, u64, arg3)
{
- char *fmt = (char *) (long) r1;
bool str_seen = false;
int mod[3] = {};
int fmt_cnt = 0;
@@ -178,16 +173,16 @@ static u64 bpf_trace_printk(u64 r1, u64 fmt_size, u64 r3, u64 r4, u64 r5)
switch (fmt_cnt) {
case 1:
- unsafe_addr = r3;
- r3 = (long) buf;
+ unsafe_addr = arg1;
+ arg1 = (long) buf;
break;
case 2:
- unsafe_addr = r4;
- r4 = (long) buf;
+ unsafe_addr = arg2;
+ arg2 = (long) buf;
break;
case 3:
- unsafe_addr = r5;
- r5 = (long) buf;
+ unsafe_addr = arg3;
+ arg3 = (long) buf;
break;
}
buf[0] = 0;
@@ -209,9 +204,9 @@ static u64 bpf_trace_printk(u64 r1, u64 fmt_size, u64 r3, u64 r4, u64 r5)
}
return __trace_printk(1/* fake ip will not be printed */, fmt,
- mod[0] == 2 ? r3 : mod[0] == 1 ? (long) r3 : (u32) r3,
- mod[1] == 2 ? r4 : mod[1] == 1 ? (long) r4 : (u32) r4,
- mod[2] == 2 ? r5 : mod[2] == 1 ? (long) r5 : (u32) r5);
+ mod[0] == 2 ? arg1 : mod[0] == 1 ? (long) arg1 : (u32) arg1,
+ mod[1] == 2 ? arg2 : mod[1] == 1 ? (long) arg2 : (u32) arg2,
+ mod[2] == 2 ? arg3 : mod[2] == 1 ? (long) arg3 : (u32) arg3);
}
static const struct bpf_func_proto bpf_trace_printk_proto = {
@@ -233,9 +228,8 @@ const struct bpf_func_proto *bpf_get_trace_printk_proto(void)
return &bpf_trace_printk_proto;
}
-static u64 bpf_perf_event_read(u64 r1, u64 flags, u64 r3, u64 r4, u64 r5)
+BPF_CALL_2(bpf_perf_event_read, struct bpf_map *, map, u64, flags)
{
- struct bpf_map *map = (struct bpf_map *) (unsigned long) r1;
struct bpf_array *array = container_of(map, struct bpf_array, map);
unsigned int cpu = smp_processor_id();
u64 index = flags & BPF_F_INDEX_MASK;
@@ -312,11 +306,9 @@ __bpf_perf_event_output(struct pt_regs *regs, struct bpf_map *map,
return 0;
}
-static u64 bpf_perf_event_output(u64 r1, u64 r2, u64 flags, u64 r4, u64 size)
+BPF_CALL_5(bpf_perf_event_output, struct pt_regs *, regs, struct bpf_map *, map,
+ u64, flags, void *, data, u64, size)
{
- struct pt_regs *regs = (struct pt_regs *)(long) r1;
- struct bpf_map *map = (struct bpf_map *)(long) r2;
- void *data = (void *)(long) r4;
struct perf_raw_record raw = {
.frag = {
.size = size,
@@ -367,7 +359,7 @@ u64 bpf_event_output(struct bpf_map *map, u64 flags, void *meta, u64 meta_size,
return __bpf_perf_event_output(regs, map, flags, &raw);
}
-static u64 bpf_get_current_task(u64 r1, u64 r2, u64 r3, u64 r4, u64 r5)
+BPF_CALL_0(bpf_get_current_task)
{
return (long) current;
}
@@ -378,16 +370,13 @@ static const struct bpf_func_proto bpf_get_current_task_proto = {
.ret_type = RET_INTEGER,
};
-static u64 bpf_current_task_under_cgroup(u64 r1, u64 r2, u64 r3, u64 r4, u64 r5)
+BPF_CALL_2(bpf_current_task_under_cgroup, struct bpf_map *, map, u32, idx)
{
- struct bpf_map *map = (struct bpf_map *)(long)r1;
struct bpf_array *array = container_of(map, struct bpf_array, map);
struct cgroup *cgrp;
- u32 idx = (u32)r2;
if (unlikely(in_interrupt()))
return -EINVAL;
-
if (unlikely(idx >= array->map.max_entries))
return -E2BIG;
@@ -481,16 +470,17 @@ static struct bpf_prog_type_list kprobe_tl = {
.type = BPF_PROG_TYPE_KPROBE,
};
-static u64 bpf_perf_event_output_tp(u64 r1, u64 r2, u64 index, u64 r4, u64 size)
+BPF_CALL_5(bpf_perf_event_output_tp, void *, tp_buff, struct bpf_map *, map,
+ u64, flags, void *, data, u64, size)
{
+ struct pt_regs *regs = *(struct pt_regs **)tp_buff;
+
/*
* r1 points to perf tracepoint buffer where first 8 bytes are hidden
* from bpf program and contain a pointer to 'struct pt_regs'. Fetch it
- * from there and call the same bpf_perf_event_output() helper
+ * from there and call the same bpf_perf_event_output() helper inline.
*/
- u64 ctx = *(long *)(uintptr_t)r1;
-
- return bpf_perf_event_output(ctx, r2, index, r4, size);
+ return ____bpf_perf_event_output(regs, map, flags, data, size);
}
static const struct bpf_func_proto bpf_perf_event_output_proto_tp = {
@@ -504,11 +494,18 @@ static const struct bpf_func_proto bpf_perf_event_output_proto_tp = {
.arg5_type = ARG_CONST_STACK_SIZE,
};
-static u64 bpf_get_stackid_tp(u64 r1, u64 r2, u64 r3, u64 r4, u64 r5)
+BPF_CALL_3(bpf_get_stackid_tp, void *, tp_buff, struct bpf_map *, map,
+ u64, flags)
{
- u64 ctx = *(long *)(uintptr_t)r1;
+ struct pt_regs *regs = *(struct pt_regs **)tp_buff;
- return bpf_get_stackid(ctx, r2, r3, r4, r5);
+ /*
+ * Same comment as in bpf_perf_event_output_tp(), only that this time
+ * the other helper's function body cannot be inlined due to being
+ * external, thus we need to call raw helper function.
+ */
+ return bpf_get_stackid((unsigned long) regs, (unsigned long) map,
+ flags, 0, 0);
}
static const struct bpf_func_proto bpf_get_stackid_proto_tp = {