aboutsummaryrefslogtreecommitdiff
path: root/kernel
diff options
context:
space:
mode:
Diffstat (limited to 'kernel')
-rw-r--r--kernel/bpf/syscall.c11
-rw-r--r--kernel/bpf/verifier.c106
-rw-r--r--kernel/sysctl.c13
3 files changed, 116 insertions, 14 deletions
diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c
index c868cafbc00c..83697bc8e574 100644
--- a/kernel/bpf/syscall.c
+++ b/kernel/bpf/syscall.c
@@ -18,6 +18,8 @@
#include <linux/filter.h>
#include <linux/version.h>
+int sysctl_unprivileged_bpf_disabled __read_mostly;
+
static LIST_HEAD(bpf_map_types);
static struct bpf_map *find_and_alloc_map(union bpf_attr *attr)
@@ -544,6 +546,9 @@ static int bpf_prog_load(union bpf_attr *attr)
attr->kern_version != LINUX_VERSION_CODE)
return -EINVAL;
+ if (type != BPF_PROG_TYPE_SOCKET_FILTER && !capable(CAP_SYS_ADMIN))
+ return -EPERM;
+
/* plain bpf_prog allocation */
prog = bpf_prog_alloc(bpf_prog_size(attr->insn_cnt), GFP_USER);
if (!prog)
@@ -599,11 +604,7 @@ SYSCALL_DEFINE3(bpf, int, cmd, union bpf_attr __user *, uattr, unsigned int, siz
union bpf_attr attr = {};
int err;
- /* the syscall is limited to root temporarily. This restriction will be
- * lifted when security audit is clean. Note that eBPF+tracing must have
- * this restriction, since it may pass kernel data to user space
- */
- if (!capable(CAP_SYS_ADMIN))
+ if (!capable(CAP_SYS_ADMIN) && sysctl_unprivileged_bpf_disabled)
return -EPERM;
if (!access_ok(VERIFY_READ, uattr, 1))
diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c
index f8da034c2258..1d6b97be79e1 100644
--- a/kernel/bpf/verifier.c
+++ b/kernel/bpf/verifier.c
@@ -199,6 +199,7 @@ struct verifier_env {
struct verifier_state_list **explored_states; /* search pruning optimization */
struct bpf_map *used_maps[MAX_USED_MAPS]; /* array of map's used by eBPF program */
u32 used_map_cnt; /* number of used maps */
+ bool allow_ptr_leaks;
};
/* verbose verifier prints what it's seeing
@@ -538,6 +539,21 @@ static int bpf_size_to_bytes(int bpf_size)
return -EINVAL;
}
+static bool is_spillable_regtype(enum bpf_reg_type type)
+{
+ switch (type) {
+ case PTR_TO_MAP_VALUE:
+ case PTR_TO_MAP_VALUE_OR_NULL:
+ case PTR_TO_STACK:
+ case PTR_TO_CTX:
+ case FRAME_PTR:
+ case CONST_PTR_TO_MAP:
+ return true;
+ default:
+ return false;
+ }
+}
+
/* check_stack_read/write functions track spill/fill of registers,
* stack boundary and alignment are checked in check_mem_access()
*/
@@ -550,9 +566,7 @@ static int check_stack_write(struct verifier_state *state, int off, int size,
*/
if (value_regno >= 0 &&
- (state->regs[value_regno].type == PTR_TO_MAP_VALUE ||
- state->regs[value_regno].type == PTR_TO_STACK ||
- state->regs[value_regno].type == PTR_TO_CTX)) {
+ is_spillable_regtype(state->regs[value_regno].type)) {
/* register containing pointer is being spilled into stack */
if (size != BPF_REG_SIZE) {
@@ -643,6 +657,20 @@ static int check_ctx_access(struct verifier_env *env, int off, int size,
return -EACCES;
}
+static bool is_pointer_value(struct verifier_env *env, int regno)
+{
+ if (env->allow_ptr_leaks)
+ return false;
+
+ switch (env->cur_state.regs[regno].type) {
+ case UNKNOWN_VALUE:
+ case CONST_IMM:
+ return false;
+ default:
+ return true;
+ }
+}
+
/* check whether memory at (regno + off) is accessible for t = (read | write)
* if t==write, value_regno is a register which value is stored into memory
* if t==read, value_regno is a register which will receive the value from memory
@@ -669,11 +697,21 @@ static int check_mem_access(struct verifier_env *env, u32 regno, int off,
}
if (state->regs[regno].type == PTR_TO_MAP_VALUE) {
+ if (t == BPF_WRITE && value_regno >= 0 &&
+ is_pointer_value(env, value_regno)) {
+ verbose("R%d leaks addr into map\n", value_regno);
+ return -EACCES;
+ }
err = check_map_access(env, regno, off, size);
if (!err && t == BPF_READ && value_regno >= 0)
mark_reg_unknown_value(state->regs, value_regno);
} else if (state->regs[regno].type == PTR_TO_CTX) {
+ if (t == BPF_WRITE && value_regno >= 0 &&
+ is_pointer_value(env, value_regno)) {
+ verbose("R%d leaks addr into ctx\n", value_regno);
+ return -EACCES;
+ }
err = check_ctx_access(env, off, size, t);
if (!err && t == BPF_READ && value_regno >= 0)
mark_reg_unknown_value(state->regs, value_regno);
@@ -684,10 +722,17 @@ static int check_mem_access(struct verifier_env *env, u32 regno, int off,
verbose("invalid stack off=%d size=%d\n", off, size);
return -EACCES;
}
- if (t == BPF_WRITE)
+ if (t == BPF_WRITE) {
+ if (!env->allow_ptr_leaks &&
+ state->stack_slot_type[MAX_BPF_STACK + off] == STACK_SPILL &&
+ size != BPF_REG_SIZE) {
+ verbose("attempt to corrupt spilled pointer on stack\n");
+ return -EACCES;
+ }
err = check_stack_write(state, off, size, value_regno);
- else
+ } else {
err = check_stack_read(state, off, size, value_regno);
+ }
} else {
verbose("R%d invalid mem access '%s'\n",
regno, reg_type_str[state->regs[regno].type]);
@@ -775,8 +820,13 @@ static int check_func_arg(struct verifier_env *env, u32 regno,
return -EACCES;
}
- if (arg_type == ARG_ANYTHING)
+ if (arg_type == ARG_ANYTHING) {
+ if (is_pointer_value(env, regno)) {
+ verbose("R%d leaks addr into helper function\n", regno);
+ return -EACCES;
+ }
return 0;
+ }
if (arg_type == ARG_PTR_TO_STACK || arg_type == ARG_PTR_TO_MAP_KEY ||
arg_type == ARG_PTR_TO_MAP_VALUE) {
@@ -950,8 +1000,9 @@ static int check_call(struct verifier_env *env, int func_id)
}
/* check validity of 32-bit and 64-bit arithmetic operations */
-static int check_alu_op(struct reg_state *regs, struct bpf_insn *insn)
+static int check_alu_op(struct verifier_env *env, struct bpf_insn *insn)
{
+ struct reg_state *regs = env->cur_state.regs;
u8 opcode = BPF_OP(insn->code);
int err;
@@ -976,6 +1027,12 @@ static int check_alu_op(struct reg_state *regs, struct bpf_insn *insn)
if (err)
return err;
+ if (is_pointer_value(env, insn->dst_reg)) {
+ verbose("R%d pointer arithmetic prohibited\n",
+ insn->dst_reg);
+ return -EACCES;
+ }
+
/* check dest operand */
err = check_reg_arg(regs, insn->dst_reg, DST_OP);
if (err)
@@ -1012,6 +1069,11 @@ static int check_alu_op(struct reg_state *regs, struct bpf_insn *insn)
*/
regs[insn->dst_reg] = regs[insn->src_reg];
} else {
+ if (is_pointer_value(env, insn->src_reg)) {
+ verbose("R%d partial copy of pointer\n",
+ insn->src_reg);
+ return -EACCES;
+ }
regs[insn->dst_reg].type = UNKNOWN_VALUE;
regs[insn->dst_reg].map_ptr = NULL;
}
@@ -1061,8 +1123,18 @@ static int check_alu_op(struct reg_state *regs, struct bpf_insn *insn)
/* pattern match 'bpf_add Rx, imm' instruction */
if (opcode == BPF_ADD && BPF_CLASS(insn->code) == BPF_ALU64 &&
regs[insn->dst_reg].type == FRAME_PTR &&
- BPF_SRC(insn->code) == BPF_K)
+ BPF_SRC(insn->code) == BPF_K) {
stack_relative = true;
+ } else if (is_pointer_value(env, insn->dst_reg)) {
+ verbose("R%d pointer arithmetic prohibited\n",
+ insn->dst_reg);
+ return -EACCES;
+ } else if (BPF_SRC(insn->code) == BPF_X &&
+ is_pointer_value(env, insn->src_reg)) {
+ verbose("R%d pointer arithmetic prohibited\n",
+ insn->src_reg);
+ return -EACCES;
+ }
/* check dest operand */
err = check_reg_arg(regs, insn->dst_reg, DST_OP);
@@ -1101,6 +1173,12 @@ static int check_cond_jmp_op(struct verifier_env *env,
err = check_reg_arg(regs, insn->src_reg, SRC_OP);
if (err)
return err;
+
+ if (is_pointer_value(env, insn->src_reg)) {
+ verbose("R%d pointer comparison prohibited\n",
+ insn->src_reg);
+ return -EACCES;
+ }
} else {
if (insn->src_reg != BPF_REG_0) {
verbose("BPF_JMP uses reserved fields\n");
@@ -1155,6 +1233,9 @@ static int check_cond_jmp_op(struct verifier_env *env,
regs[insn->dst_reg].type = CONST_IMM;
regs[insn->dst_reg].imm = 0;
}
+ } else if (is_pointer_value(env, insn->dst_reg)) {
+ verbose("R%d pointer comparison prohibited\n", insn->dst_reg);
+ return -EACCES;
} else if (BPF_SRC(insn->code) == BPF_K &&
(opcode == BPF_JEQ || opcode == BPF_JNE)) {
@@ -1658,7 +1739,7 @@ static int do_check(struct verifier_env *env)
}
if (class == BPF_ALU || class == BPF_ALU64) {
- err = check_alu_op(regs, insn);
+ err = check_alu_op(env, insn);
if (err)
return err;
@@ -1816,6 +1897,11 @@ static int do_check(struct verifier_env *env)
if (err)
return err;
+ if (is_pointer_value(env, BPF_REG_0)) {
+ verbose("R0 leaks addr as return value\n");
+ return -EACCES;
+ }
+
process_bpf_exit:
insn_idx = pop_stack(env, &prev_insn_idx);
if (insn_idx < 0) {
@@ -2144,6 +2230,8 @@ int bpf_check(struct bpf_prog **prog, union bpf_attr *attr)
if (ret < 0)
goto skip_full_check;
+ env->allow_ptr_leaks = capable(CAP_SYS_ADMIN);
+
ret = do_check(env);
skip_full_check:
diff --git a/kernel/sysctl.c b/kernel/sysctl.c
index e69201d8094e..96c856b04081 100644
--- a/kernel/sysctl.c
+++ b/kernel/sysctl.c
@@ -64,6 +64,7 @@
#include <linux/binfmts.h>
#include <linux/sched/sysctl.h>
#include <linux/kexec.h>
+#include <linux/bpf.h>
#include <asm/uaccess.h>
#include <asm/processor.h>
@@ -1139,6 +1140,18 @@ static struct ctl_table kern_table[] = {
.proc_handler = timer_migration_handler,
},
#endif
+#ifdef CONFIG_BPF_SYSCALL
+ {
+ .procname = "unprivileged_bpf_disabled",
+ .data = &sysctl_unprivileged_bpf_disabled,
+ .maxlen = sizeof(sysctl_unprivileged_bpf_disabled),
+ .mode = 0644,
+ /* only handle a transition from default "0" to "1" */
+ .proc_handler = proc_dointvec_minmax,
+ .extra1 = &one,
+ .extra2 = &one,
+ },
+#endif
{ }
};