diff options
Diffstat (limited to 'kernel')
-rw-r--r-- | kernel/bpf/btf.c | 148 | ||||
-rw-r--r-- | kernel/bpf/cgroup.c | 9 | ||||
-rw-r--r-- | kernel/bpf/core.c | 11 | ||||
-rw-r--r-- | kernel/bpf/helpers.c | 113 | ||||
-rw-r--r-- | kernel/bpf/map_in_map.c | 15 | ||||
-rw-r--r-- | kernel/bpf/syscall.c | 30 | ||||
-rw-r--r-- | kernel/bpf/verifier.c | 355 |
7 files changed, 375 insertions, 306 deletions
diff --git a/kernel/bpf/btf.c b/kernel/bpf/btf.c index 2c2d1fb9f410..6b682b8e4b50 100644 --- a/kernel/bpf/btf.c +++ b/kernel/bpf/btf.c @@ -25,6 +25,9 @@ #include <linux/bsearch.h> #include <linux/kobject.h> #include <linux/sysfs.h> + +#include <net/netfilter/nf_bpf_link.h> + #include <net/sock.h> #include "../tools/lib/bpf/relo_core.h" @@ -212,6 +215,7 @@ enum btf_kfunc_hook { BTF_KFUNC_HOOK_SK_SKB, BTF_KFUNC_HOOK_SOCKET_FILTER, BTF_KFUNC_HOOK_LWT, + BTF_KFUNC_HOOK_NETFILTER, BTF_KFUNC_HOOK_MAX, }; @@ -577,8 +581,8 @@ static s32 bpf_find_btf_id(const char *name, u32 kind, struct btf **btf_p) *btf_p = btf; return ret; } - spin_lock_bh(&btf_idr_lock); btf_put(btf); + spin_lock_bh(&btf_idr_lock); } spin_unlock_bh(&btf_idr_lock); return ret; @@ -1666,10 +1670,8 @@ static void btf_struct_metas_free(struct btf_struct_metas *tab) if (!tab) return; - for (i = 0; i < tab->cnt; i++) { + for (i = 0; i < tab->cnt; i++) btf_record_free(tab->types[i].record); - kfree(tab->types[i].field_offs); - } kfree(tab); } @@ -3393,6 +3395,7 @@ static int btf_get_field_type(const char *name, u32 field_mask, u32 *seen_mask, field_mask_test_name(BPF_LIST_NODE, "bpf_list_node"); field_mask_test_name(BPF_RB_ROOT, "bpf_rb_root"); field_mask_test_name(BPF_RB_NODE, "bpf_rb_node"); + field_mask_test_name(BPF_REFCOUNT, "bpf_refcount"); /* Only return BPF_KPTR when all other types with matchable names fail */ if (field_mask & BPF_KPTR) { @@ -3441,6 +3444,7 @@ static int btf_find_struct_field(const struct btf *btf, case BPF_TIMER: case BPF_LIST_NODE: case BPF_RB_NODE: + case BPF_REFCOUNT: ret = btf_find_struct(btf, member_type, off, sz, field_type, idx < info_cnt ? &info[idx] : &tmp); if (ret < 0) @@ -3506,6 +3510,7 @@ static int btf_find_datasec_var(const struct btf *btf, const struct btf_type *t, case BPF_TIMER: case BPF_LIST_NODE: case BPF_RB_NODE: + case BPF_REFCOUNT: ret = btf_find_struct(btf, var_type, off, sz, field_type, idx < info_cnt ? &info[idx] : &tmp); if (ret < 0) @@ -3700,12 +3705,24 @@ static int btf_parse_rb_root(const struct btf *btf, struct btf_field *field, __alignof__(struct bpf_rb_node)); } +static int btf_field_cmp(const void *_a, const void *_b, const void *priv) +{ + const struct btf_field *a = (const struct btf_field *)_a; + const struct btf_field *b = (const struct btf_field *)_b; + + if (a->offset < b->offset) + return -1; + else if (a->offset > b->offset) + return 1; + return 0; +} + struct btf_record *btf_parse_fields(const struct btf *btf, const struct btf_type *t, u32 field_mask, u32 value_size) { struct btf_field_info info_arr[BTF_FIELDS_MAX]; + u32 next_off = 0, field_type_size; struct btf_record *rec; - u32 next_off = 0; int ret, i, cnt; ret = btf_find_field(btf, t, field_mask, info_arr, ARRAY_SIZE(info_arr)); @@ -3724,8 +3741,10 @@ struct btf_record *btf_parse_fields(const struct btf *btf, const struct btf_type rec->spin_lock_off = -EINVAL; rec->timer_off = -EINVAL; + rec->refcount_off = -EINVAL; for (i = 0; i < cnt; i++) { - if (info_arr[i].off + btf_field_type_size(info_arr[i].type) > value_size) { + field_type_size = btf_field_type_size(info_arr[i].type); + if (info_arr[i].off + field_type_size > value_size) { WARN_ONCE(1, "verifier bug off %d size %d", info_arr[i].off, value_size); ret = -EFAULT; goto end; @@ -3734,11 +3753,12 @@ struct btf_record *btf_parse_fields(const struct btf *btf, const struct btf_type ret = -EEXIST; goto end; } - next_off = info_arr[i].off + btf_field_type_size(info_arr[i].type); + next_off = info_arr[i].off + field_type_size; rec->field_mask |= info_arr[i].type; rec->fields[i].offset = info_arr[i].off; rec->fields[i].type = info_arr[i].type; + rec->fields[i].size = field_type_size; switch (info_arr[i].type) { case BPF_SPIN_LOCK: @@ -3751,6 +3771,11 @@ struct btf_record *btf_parse_fields(const struct btf *btf, const struct btf_type /* Cache offset for faster lookup at runtime */ rec->timer_off = rec->fields[i].offset; break; + case BPF_REFCOUNT: + WARN_ON_ONCE(rec->refcount_off >= 0); + /* Cache offset for faster lookup at runtime */ + rec->refcount_off = rec->fields[i].offset; + break; case BPF_KPTR_UNREF: case BPF_KPTR_REF: ret = btf_parse_kptr(btf, &rec->fields[i], &info_arr[i]); @@ -3784,30 +3809,16 @@ struct btf_record *btf_parse_fields(const struct btf *btf, const struct btf_type goto end; } - /* need collection identity for non-owning refs before allowing this - * - * Consider a node type w/ both list and rb_node fields: - * struct node { - * struct bpf_list_node l; - * struct bpf_rb_node r; - * } - * - * Used like so: - * struct node *n = bpf_obj_new(....); - * bpf_list_push_front(&list_head, &n->l); - * bpf_rbtree_remove(&rb_root, &n->r); - * - * It should not be possible to rbtree_remove the node since it hasn't - * been added to a tree. But push_front converts n to a non-owning - * reference, and rbtree_remove accepts the non-owning reference to - * a type w/ bpf_rb_node field. - */ - if (btf_record_has_field(rec, BPF_LIST_NODE) && + if (rec->refcount_off < 0 && + btf_record_has_field(rec, BPF_LIST_NODE) && btf_record_has_field(rec, BPF_RB_NODE)) { ret = -EINVAL; goto end; } + sort_r(rec->fields, rec->cnt, sizeof(struct btf_field), btf_field_cmp, + NULL, rec); + return rec; end: btf_record_free(rec); @@ -3889,61 +3900,6 @@ int btf_check_and_fixup_fields(const struct btf *btf, struct btf_record *rec) return 0; } -static int btf_field_offs_cmp(const void *_a, const void *_b, const void *priv) -{ - const u32 a = *(const u32 *)_a; - const u32 b = *(const u32 *)_b; - - if (a < b) - return -1; - else if (a > b) - return 1; - return 0; -} - -static void btf_field_offs_swap(void *_a, void *_b, int size, const void *priv) -{ - struct btf_field_offs *foffs = (void *)priv; - u32 *off_base = foffs->field_off; - u32 *a = _a, *b = _b; - u8 *sz_a, *sz_b; - - sz_a = foffs->field_sz + (a - off_base); - sz_b = foffs->field_sz + (b - off_base); - - swap(*a, *b); - swap(*sz_a, *sz_b); -} - -struct btf_field_offs *btf_parse_field_offs(struct btf_record *rec) -{ - struct btf_field_offs *foffs; - u32 i, *off; - u8 *sz; - - BUILD_BUG_ON(ARRAY_SIZE(foffs->field_off) != ARRAY_SIZE(foffs->field_sz)); - if (IS_ERR_OR_NULL(rec)) - return NULL; - - foffs = kzalloc(sizeof(*foffs), GFP_KERNEL | __GFP_NOWARN); - if (!foffs) - return ERR_PTR(-ENOMEM); - - off = foffs->field_off; - sz = foffs->field_sz; - for (i = 0; i < rec->cnt; i++) { - off[i] = rec->fields[i].offset; - sz[i] = btf_field_type_size(rec->fields[i].type); - } - foffs->cnt = rec->cnt; - - if (foffs->cnt == 1) - return foffs; - sort_r(foffs->field_off, foffs->cnt, sizeof(foffs->field_off[0]), - btf_field_offs_cmp, btf_field_offs_swap, foffs); - return foffs; -} - static void __btf_struct_show(const struct btf *btf, const struct btf_type *t, u32 type_id, void *data, u8 bits_offset, struct btf_show *show) @@ -5348,6 +5304,7 @@ static const char *alloc_obj_fields[] = { "bpf_list_node", "bpf_rb_root", "bpf_rb_node", + "bpf_refcount", }; static struct btf_struct_metas * @@ -5386,7 +5343,6 @@ btf_parse_struct_metas(struct bpf_verifier_log *log, struct btf *btf) for (i = 1; i < n; i++) { struct btf_struct_metas *new_tab; const struct btf_member *member; - struct btf_field_offs *foffs; struct btf_struct_meta *type; struct btf_record *record; const struct btf_type *t; @@ -5422,23 +5378,13 @@ btf_parse_struct_metas(struct bpf_verifier_log *log, struct btf *btf) type = &tab->types[tab->cnt]; type->btf_id = i; record = btf_parse_fields(btf, t, BPF_SPIN_LOCK | BPF_LIST_HEAD | BPF_LIST_NODE | - BPF_RB_ROOT | BPF_RB_NODE, t->size); + BPF_RB_ROOT | BPF_RB_NODE | BPF_REFCOUNT, t->size); /* The record cannot be unset, treat it as an error if so */ if (IS_ERR_OR_NULL(record)) { ret = PTR_ERR_OR_ZERO(record) ?: -EFAULT; goto free; } - foffs = btf_parse_field_offs(record); - /* We need the field_offs to be valid for a valid record, - * either both should be set or both should be unset. - */ - if (IS_ERR_OR_NULL(foffs)) { - btf_record_free(record); - ret = -EFAULT; - goto free; - } type->record = record; - type->field_offs = foffs; tab->cnt++; } return tab; @@ -6215,11 +6161,13 @@ again: if (off < moff) goto error; - /* Only allow structure for now, can be relaxed for - * other types later. - */ + /* allow structure and integer */ t = btf_type_skip_modifiers(btf, array_elem->type, NULL); + + if (btf_type_is_int(t)) + return WALK_SCALAR; + if (!btf_type_is_struct(t)) goto error; @@ -7858,6 +7806,8 @@ static int bpf_prog_type_to_kfunc_hook(enum bpf_prog_type prog_type) case BPF_PROG_TYPE_LWT_XMIT: case BPF_PROG_TYPE_LWT_SEG6LOCAL: return BTF_KFUNC_HOOK_LWT; + case BPF_PROG_TYPE_NETFILTER: + return BTF_KFUNC_HOOK_NETFILTER; default: return BTF_KFUNC_HOOK_MAX; } @@ -8410,12 +8360,10 @@ check_modules: btf_get(mod_btf); spin_unlock_bh(&btf_idr_lock); cands = bpf_core_add_cands(cands, mod_btf, btf_nr_types(main_btf)); - if (IS_ERR(cands)) { - btf_put(mod_btf); + btf_put(mod_btf); + if (IS_ERR(cands)) return ERR_CAST(cands); - } spin_lock_bh(&btf_idr_lock); - btf_put(mod_btf); } spin_unlock_bh(&btf_idr_lock); /* cands is a pointer to kmalloced memory here if cands->cnt > 0 diff --git a/kernel/bpf/cgroup.c b/kernel/bpf/cgroup.c index 53edb8ad2471..a06e118a9be5 100644 --- a/kernel/bpf/cgroup.c +++ b/kernel/bpf/cgroup.c @@ -1921,14 +1921,17 @@ int __cgroup_bpf_run_filter_getsockopt(struct sock *sk, int level, if (ret < 0) goto out; - if (ctx.optlen > max_optlen || ctx.optlen < 0) { + if (optval && (ctx.optlen > max_optlen || ctx.optlen < 0)) { ret = -EFAULT; goto out; } if (ctx.optlen != 0) { - if (copy_to_user(optval, ctx.optval, ctx.optlen) || - put_user(ctx.optlen, optlen)) { + if (optval && copy_to_user(optval, ctx.optval, ctx.optlen)) { + ret = -EFAULT; + goto out; + } + if (put_user(ctx.optlen, optlen)) { ret = -EFAULT; goto out; } diff --git a/kernel/bpf/core.c b/kernel/bpf/core.c index e2d256c82072..7421487422d4 100644 --- a/kernel/bpf/core.c +++ b/kernel/bpf/core.c @@ -1187,6 +1187,7 @@ int bpf_jit_get_func_addr(const struct bpf_prog *prog, s16 off = insn->off; s32 imm = insn->imm; u8 *addr; + int err; *func_addr_fixed = insn->src_reg != BPF_PSEUDO_CALL; if (!*func_addr_fixed) { @@ -1201,6 +1202,11 @@ int bpf_jit_get_func_addr(const struct bpf_prog *prog, addr = (u8 *)prog->aux->func[off]->bpf_func; else return -EINVAL; + } else if (insn->src_reg == BPF_PSEUDO_KFUNC_CALL && + bpf_jit_supports_far_kfunc_call()) { + err = bpf_get_kfunc_addr(prog, insn->imm, insn->off, &addr); + if (err) + return err; } else { /* Address of a BPF helper call. Since part of the core * kernel, it's always at a fixed location. __bpf_call_base @@ -2732,6 +2738,11 @@ bool __weak bpf_jit_supports_kfunc_call(void) return false; } +bool __weak bpf_jit_supports_far_kfunc_call(void) +{ + return false; +} + /* To execute LD_ABS/LD_IND instructions __bpf_prog_run() may call * skb_copy_bits(), so provide a weak definition of it for NET-less config. */ diff --git a/kernel/bpf/helpers.c b/kernel/bpf/helpers.c index f04e60a4847f..8d368fa353f9 100644 --- a/kernel/bpf/helpers.c +++ b/kernel/bpf/helpers.c @@ -1798,6 +1798,8 @@ bpf_base_func_proto(enum bpf_func_id func_id) } } +void __bpf_obj_drop_impl(void *p, const struct btf_record *rec); + void bpf_list_head_free(const struct btf_field *field, void *list_head, struct bpf_spin_lock *spin_lock) { @@ -1828,13 +1830,8 @@ unlock: /* The contained type can also have resources, including a * bpf_list_head which needs to be freed. */ - bpf_obj_free_fields(field->graph_root.value_rec, obj); - /* bpf_mem_free requires migrate_disable(), since we can be - * called from map free path as well apart from BPF program (as - * part of map ops doing bpf_obj_free_fields). - */ migrate_disable(); - bpf_mem_free(&bpf_global_ma, obj); + __bpf_obj_drop_impl(obj, field->graph_root.value_rec); migrate_enable(); } } @@ -1871,10 +1868,9 @@ void bpf_rb_root_free(const struct btf_field *field, void *rb_root, obj = pos; obj -= field->graph_root.node_offset; - bpf_obj_free_fields(field->graph_root.value_rec, obj); migrate_disable(); - bpf_mem_free(&bpf_global_ma, obj); + __bpf_obj_drop_impl(obj, field->graph_root.value_rec); migrate_enable(); } } @@ -1893,12 +1889,21 @@ __bpf_kfunc void *bpf_obj_new_impl(u64 local_type_id__k, void *meta__ign) if (!p) return NULL; if (meta) - bpf_obj_init(meta->field_offs, p); + bpf_obj_init(meta->record, p); return p; } +/* Must be called under migrate_disable(), as required by bpf_mem_free */ void __bpf_obj_drop_impl(void *p, const struct btf_record *rec) { + if (rec && rec->refcount_off >= 0 && + !refcount_dec_and_test((refcount_t *)(p + rec->refcount_off))) { + /* Object is refcounted and refcount_dec didn't result in 0 + * refcount. Return without freeing the object + */ + return; + } + if (rec) bpf_obj_free_fields(rec, p); bpf_mem_free(&bpf_global_ma, p); @@ -1912,31 +1917,68 @@ __bpf_kfunc void bpf_obj_drop_impl(void *p__alloc, void *meta__ign) __bpf_obj_drop_impl(p, meta ? meta->record : NULL); } -static void __bpf_list_add(struct bpf_list_node *node, struct bpf_list_head *head, bool tail) +__bpf_kfunc void *bpf_refcount_acquire_impl(void *p__refcounted_kptr, void *meta__ign) +{ + struct btf_struct_meta *meta = meta__ign; + struct bpf_refcount *ref; + + /* Could just cast directly to refcount_t *, but need some code using + * bpf_refcount type so that it is emitted in vmlinux BTF + */ + ref = (struct bpf_refcount *)(p__refcounted_kptr + meta->record->refcount_off); + + refcount_inc((refcount_t *)ref); + return (void *)p__refcounted_kptr; +} + +static int __bpf_list_add(struct bpf_list_node *node, struct bpf_list_head *head, + bool tail, struct btf_record *rec, u64 off) { struct list_head *n = (void *)node, *h = (void *)head; + /* If list_head was 0-initialized by map, bpf_obj_init_field wasn't + * called on its fields, so init here + */ if (unlikely(!h->next)) INIT_LIST_HEAD(h); - if (unlikely(!n->next)) - INIT_LIST_HEAD(n); + if (!list_empty(n)) { + /* Only called from BPF prog, no need to migrate_disable */ + __bpf_obj_drop_impl(n - off, rec); + return -EINVAL; + } + tail ? list_add_tail(n, h) : list_add(n, h); + + return 0; } -__bpf_kfunc void bpf_list_push_front(struct bpf_list_head *head, struct bpf_list_node *node) +__bpf_kfunc int bpf_list_push_front_impl(struct bpf_list_head *head, + struct bpf_list_node *node, + void *meta__ign, u64 off) { - return __bpf_list_add(node, head, false); + struct btf_struct_meta *meta = meta__ign; + + return __bpf_list_add(node, head, false, + meta ? meta->record : NULL, off); } -__bpf_kfunc void bpf_list_push_back(struct bpf_list_head *head, struct bpf_list_node *node) +__bpf_kfunc int bpf_list_push_back_impl(struct bpf_list_head *head, + struct bpf_list_node *node, + void *meta__ign, u64 off) { - return __bpf_list_add(node, head, true); + struct btf_struct_meta *meta = meta__ign; + + return __bpf_list_add(node, head, true, + meta ? meta->record : NULL, off); } static struct bpf_list_node *__bpf_list_del(struct bpf_list_head *head, bool tail) { struct list_head *n, *h = (void *)head; + /* If list_head was 0-initialized by map, bpf_obj_init_field wasn't + * called on its fields, so init here + */ if (unlikely(!h->next)) INIT_LIST_HEAD(h); if (list_empty(h)) @@ -1962,6 +2004,9 @@ __bpf_kfunc struct bpf_rb_node *bpf_rbtree_remove(struct bpf_rb_root *root, struct rb_root_cached *r = (struct rb_root_cached *)root; struct rb_node *n = (struct rb_node *)node; + if (RB_EMPTY_NODE(n)) + return NULL; + rb_erase_cached(n, r); RB_CLEAR_NODE(n); return (struct bpf_rb_node *)n; @@ -1970,14 +2015,20 @@ __bpf_kfunc struct bpf_rb_node *bpf_rbtree_remove(struct bpf_rb_root *root, /* Need to copy rbtree_add_cached's logic here because our 'less' is a BPF * program */ -static void __bpf_rbtree_add(struct bpf_rb_root *root, struct bpf_rb_node *node, - void *less) +static int __bpf_rbtree_add(struct bpf_rb_root *root, struct bpf_rb_node *node, + void *less, struct btf_record *rec, u64 off) { struct rb_node **link = &((struct rb_root_cached *)root)->rb_root.rb_node; + struct rb_node *parent = NULL, *n = (struct rb_node *)node; bpf_callback_t cb = (bpf_callback_t)less; - struct rb_node *parent = NULL; bool leftmost = true; + if (!RB_EMPTY_NODE(n)) { + /* Only called from BPF prog, no need to migrate_disable */ + __bpf_obj_drop_impl(n - off, rec); + return -EINVAL; + } + while (*link) { parent = *link; if (cb((uintptr_t)node, (uintptr_t)parent, 0, 0, 0)) { @@ -1988,15 +2039,18 @@ static void __bpf_rbtree_add(struct bpf_rb_root *root, struct bpf_rb_node *node, } } - rb_link_node((struct rb_node *)node, parent, link); - rb_insert_color_cached((struct rb_node *)node, - (struct rb_root_cached *)root, leftmost); + rb_link_node(n, parent, link); + rb_insert_color_cached(n, (struct rb_root_cached *)root, leftmost); + return 0; } -__bpf_kfunc void bpf_rbtree_add(struct bpf_rb_root *root, struct bpf_rb_node *node, - bool (less)(struct bpf_rb_node *a, const struct bpf_rb_node *b)) +__bpf_kfunc int bpf_rbtree_add_impl(struct bpf_rb_root *root, struct bpf_rb_node *node, + bool (less)(struct bpf_rb_node *a, const struct bpf_rb_node *b), + void *meta__ign, u64 off) { - __bpf_rbtree_add(root, node, (void *)less); + struct btf_struct_meta *meta = meta__ign; + + return __bpf_rbtree_add(root, node, (void *)less, meta ? meta->record : NULL, off); } __bpf_kfunc struct bpf_rb_node *bpf_rbtree_first(struct bpf_rb_root *root) @@ -2271,14 +2325,15 @@ BTF_ID_FLAGS(func, crash_kexec, KF_DESTRUCTIVE) #endif BTF_ID_FLAGS(func, bpf_obj_new_impl, KF_ACQUIRE | KF_RET_NULL) BTF_ID_FLAGS(func, bpf_obj_drop_impl, KF_RELEASE) -BTF_ID_FLAGS(func, bpf_list_push_front) -BTF_ID_FLAGS(func, bpf_list_push_back) +BTF_ID_FLAGS(func, bpf_refcount_acquire_impl, KF_ACQUIRE) +BTF_ID_FLAGS(func, bpf_list_push_front_impl) +BTF_ID_FLAGS(func, bpf_list_push_back_impl) BTF_ID_FLAGS(func, bpf_list_pop_front, KF_ACQUIRE | KF_RET_NULL) BTF_ID_FLAGS(func, bpf_list_pop_back, KF_ACQUIRE | KF_RET_NULL) BTF_ID_FLAGS(func, bpf_task_acquire, KF_ACQUIRE | KF_RCU | KF_RET_NULL) BTF_ID_FLAGS(func, bpf_task_release, KF_RELEASE) -BTF_ID_FLAGS(func, bpf_rbtree_remove, KF_ACQUIRE) -BTF_ID_FLAGS(func, bpf_rbtree_add) +BTF_ID_FLAGS(func, bpf_rbtree_remove, KF_ACQUIRE | KF_RET_NULL) +BTF_ID_FLAGS(func, bpf_rbtree_add_impl) BTF_ID_FLAGS(func, bpf_rbtree_first, KF_RET_NULL) #ifdef CONFIG_CGROUPS diff --git a/kernel/bpf/map_in_map.c b/kernel/bpf/map_in_map.c index 38136ec4e095..2c5c64c2a53b 100644 --- a/kernel/bpf/map_in_map.c +++ b/kernel/bpf/map_in_map.c @@ -56,18 +56,6 @@ struct bpf_map *bpf_map_meta_alloc(int inner_map_ufd) ret = PTR_ERR(inner_map_meta->record); goto free; } - if (inner_map_meta->record) { - struct btf_field_offs *field_offs; - /* If btf_record is !IS_ERR_OR_NULL, then field_offs is always - * valid. - */ - field_offs = kmemdup(inner_map->field_offs, sizeof(*inner_map->field_offs), GFP_KERNEL | __GFP_NOWARN); - if (!field_offs) { - ret = -ENOMEM; - goto free_rec; - } - inner_map_meta->field_offs = field_offs; - } /* Note: We must use the same BTF, as we also used btf_record_dup above * which relies on BTF being same for both maps, as some members like * record->fields.list_head have pointers like value_rec pointing into @@ -88,8 +76,6 @@ struct bpf_map *bpf_map_meta_alloc(int inner_map_ufd) fdput(f); return inner_map_meta; -free_rec: - btf_record_free(inner_map_meta->record); free: kfree(inner_map_meta); put: @@ -99,7 +85,6 @@ put: void bpf_map_meta_free(struct bpf_map *map_meta) { - kfree(map_meta->field_offs); bpf_map_free_record(map_meta); btf_put(map_meta->btf); kfree(map_meta); diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c index 6d575505f89c..14f39c1e573e 100644 --- a/kernel/bpf/syscall.c +++ b/kernel/bpf/syscall.c @@ -35,6 +35,7 @@ #include <linux/rcupdate_trace.h> #include <linux/memcontrol.h> #include <linux/trace_events.h> +#include <net/netfilter/nf_bpf_link.h> #define IS_FD_ARRAY(map) ((map)->map_type == BPF_MAP_TYPE_PERF_EVENT_ARRAY || \ (map)->map_type == BPF_MAP_TYPE_CGROUP_ARRAY || \ @@ -552,6 +553,7 @@ void btf_record_free(struct btf_record *rec) case BPF_RB_NODE: case BPF_SPIN_LOCK: case BPF_TIMER: + case BPF_REFCOUNT: /* Nothing to release */ break; default: @@ -599,6 +601,7 @@ struct btf_record *btf_record_dup(const struct btf_record *rec) case BPF_RB_NODE: case BPF_SPIN_LOCK: case BPF_TIMER: + case BPF_REFCOUNT: /* Nothing to acquire */ break; default: @@ -705,6 +708,7 @@ void bpf_obj_free_fields(const struct btf_record *rec, void *obj) break; case BPF_LIST_NODE: case BPF_RB_NODE: + case BPF_REFCOUNT: break; default: WARN_ON_ONCE(1); @@ -717,14 +721,13 @@ void bpf_obj_free_fields(const struct btf_record *rec, void *obj) static void bpf_map_free_deferred(struct work_struct *work) { struct bpf_map *map = container_of(work, struct bpf_map, work); - struct btf_field_offs *foffs = map->field_offs; struct btf_record *rec = map->record; security_bpf_map_free(map); bpf_map_release_memcg(map); /* implementation dependent freeing */ map->ops->map_free(map); - /* Delay freeing of field_offs and btf_record for maps, as map_free + /* Delay freeing of btf_record for maps, as map_free * callback usually needs access to them. It is better to do it here * than require each callback to do the free itself manually. * @@ -733,7 +736,6 @@ static void bpf_map_free_deferred(struct work_struct *work) * eventually calls bpf_map_free_meta, since inner_map_meta is only a * template bpf_map struct used during verification. */ - kfree(foffs); btf_record_free(rec); } @@ -1034,7 +1036,7 @@ static int map_check_btf(struct bpf_map *map, const struct btf *btf, map->record = btf_parse_fields(btf, value_type, BPF_SPIN_LOCK | BPF_TIMER | BPF_KPTR | BPF_LIST_HEAD | - BPF_RB_ROOT, + BPF_RB_ROOT | BPF_REFCOUNT, map->value_size); if (!IS_ERR_OR_NULL(map->record)) { int i; @@ -1073,6 +1075,7 @@ static int map_check_btf(struct bpf_map *map, const struct btf *btf, break; case BPF_KPTR_UNREF: case BPF_KPTR_REF: + case BPF_REFCOUNT: if (map->map_type != BPF_MAP_TYPE_HASH && map->map_type != BPF_MAP_TYPE_PERCPU_HASH && map->map_type != BPF_MAP_TYPE_LRU_HASH && @@ -1125,7 +1128,6 @@ free_map_tab: static int map_create(union bpf_attr *attr) { int numa_node = bpf_map_attr_numa_node(attr); - struct btf_field_offs *foffs; struct bpf_map *map; int f_flags; int err; @@ -1205,17 +1207,9 @@ static int map_create(union bpf_attr *attr) attr->btf_vmlinux_value_type_id; } - - foffs = btf_parse_field_offs(map->record); - if (IS_ERR(foffs)) { - err = PTR_ERR(foffs); - goto free_map; - } - map->field_offs = foffs; - err = security_bpf_map_alloc(map); if (err) - goto free_map_field_offs; + goto free_map; err = bpf_map_alloc_id(map); if (err) @@ -1239,8 +1233,6 @@ static int map_create(union bpf_attr *attr) free_map_sec: security_bpf_map_free(map); -free_map_field_offs: - kfree(map->field_offs); free_map: btf_put(map->btf); map->ops->map_free(map); @@ -2463,7 +2455,6 @@ static bool is_net_admin_prog_type(enum bpf_prog_type prog_type) case BPF_PROG_TYPE_LWT_SEG6LOCAL: case BPF_PROG_TYPE_SK_SKB: case BPF_PROG_TYPE_SK_MSG: - case BPF_PROG_TYPE_LIRC_MODE2: case BPF_PROG_TYPE_FLOW_DISSECTOR: case BPF_PROG_TYPE_CGROUP_DEVICE: case BPF_PROG_TYPE_CGROUP_SOCK: @@ -2472,6 +2463,7 @@ static bool is_net_admin_prog_type(enum bpf_prog_type prog_type) case BPF_PROG_TYPE_CGROUP_SYSCTL: case BPF_PROG_TYPE_SOCK_OPS: case BPF_PROG_TYPE_EXT: /* extends any prog */ + case BPF_PROG_TYPE_NETFILTER: return true; case BPF_PROG_TYPE_CGROUP_SKB: /* always unpriv */ @@ -4598,6 +4590,7 @@ static int link_create(union bpf_attr *attr, bpfptr_t uattr) switch (prog->type) { case BPF_PROG_TYPE_EXT: + case BPF_PROG_TYPE_NETFILTER: break; case BPF_PROG_TYPE_PERF_EVENT: case BPF_PROG_TYPE_TRACEPOINT: @@ -4664,6 +4657,9 @@ static int link_create(union bpf_attr *attr, bpfptr_t uattr) case BPF_PROG_TYPE_XDP: ret = bpf_xdp_link_attach(attr, prog); break; + case BPF_PROG_TYPE_NETFILTER: + ret = bpf_nf_link_attach(attr, prog); + break; #endif case BPF_PROG_TYPE_PERF_EVENT: case BPF_PROG_TYPE_TRACEPOINT: diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index 6e12cc296ad8..0d73139ee4d8 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -195,6 +195,8 @@ static void invalidate_non_owning_refs(struct bpf_verifier_env *env); static bool in_rbtree_lock_required_cb(struct bpf_verifier_env *env); static int ref_set_non_owning(struct bpf_verifier_env *env, struct bpf_reg_state *reg); +static void specialize_kfunc(struct bpf_verifier_env *env, + u32 func_id, u16 offset, unsigned long *addr); static bool bpf_map_ptr_poisoned(const struct bpf_insn_aux_data *aux) { @@ -271,6 +273,11 @@ struct bpf_call_arg_meta { struct btf_field *kptr_field; }; +struct btf_and_id { + struct btf *btf; + u32 btf_id; +}; + struct bpf_kfunc_call_arg_meta { /* In parameters */ struct btf *btf; @@ -289,10 +296,10 @@ struct bpf_kfunc_call_arg_meta { u64 value; bool found; } arg_constant; - struct { - struct btf *btf; - u32 btf_id; - } arg_obj_drop; + union { + struct btf_and_id arg_obj_drop; + struct btf_and_id arg_refcount_acquire; + }; struct { struct btf_field *field; } arg_list_head; @@ -2374,6 +2381,7 @@ struct bpf_kfunc_desc { u32 func_id; s32 imm; u16 offset; + unsigned long addr; }; struct bpf_kfunc_btf { @@ -2383,6 +2391,11 @@ struct bpf_kfunc_btf { }; struct bpf_kfunc_desc_tab { + /* Sorted by func_id (BTF ID) and offset (fd_array offset) during + * verification. JITs do lookups by bpf_insn, where func_id may not be + * available, therefore at the end of verification do_misc_fixups() + * sorts this by imm and offset. + */ struct bpf_kfunc_desc descs[MAX_KFUNC_DESCS]; u32 nr_descs; }; @@ -2423,6 +2436,19 @@ find_kfunc_desc(const struct bpf_prog *prog, u32 func_id, u16 offset) sizeof(tab->descs[0]), kfunc_desc_cmp_by_id_off); } +int bpf_get_kfunc_addr(const struct bpf_prog *prog, u32 func_id, + u16 btf_fd_idx, u8 **func_addr) +{ + const struct bpf_kfunc_desc *desc; + + desc = find_kfunc_desc(prog, func_id, btf_fd_idx); + if (!desc) + return -EFAULT; + + *func_addr = (u8 *)desc->addr; + return 0; +} + static struct btf *__find_kfunc_desc_btf(struct bpf_verifier_env *env, s16 offset) { @@ -2602,13 +2628,18 @@ static int add_kfunc_call(struct bpf_verifier_env *env, u32 func_id, s16 offset) func_name); return -EINVAL; } + specialize_kfunc(env, func_id, offset, &addr); - call_imm = BPF_CALL_IMM(addr); - /* Check whether or not the relative offset overflows desc->imm */ - if ((unsigned long)(s32)call_imm != call_imm) { - verbose(env, "address of kernel function %s is out of range\n", - func_name); - return -EINVAL; + if (bpf_jit_supports_far_kfunc_call()) { + call_imm = func_id; + } else { + call_imm = BPF_CALL_IMM(addr); + /* Check whether the relative offset overflows desc->imm */ + if ((unsigned long)(s32)call_imm != call_imm) { + verbose(env, "address of kernel function %s is out of range\n", + func_name); + return -EINVAL; + } } if (bpf_dev_bound_kfunc_id(func_id)) { @@ -2621,6 +2652,7 @@ static int add_kfunc_call(struct bpf_verifier_env *env, u32 func_id, s16 offset) desc->func_id = func_id; desc->imm = call_imm; desc->offset = offset; + desc->addr = addr; err = btf_distill_func_proto(&env->log, desc_btf, func_proto, func_name, &desc->func_model); @@ -2630,19 +2662,19 @@ static int add_kfunc_call(struct bpf_verifier_env *env, u32 func_id, s16 offset) return err; } -static int kfunc_desc_cmp_by_imm(const void *a, const void *b) +static int kfunc_desc_cmp_by_imm_off(const void *a, const void *b) { const struct bpf_kfunc_desc *d0 = a; const struct bpf_kfunc_desc *d1 = b; - if (d0->imm > d1->imm) - return 1; - else if (d0->imm < d1->imm) - return -1; + if (d0->imm != d1->imm) + return d0->imm < d1->imm ? -1 : 1; + if (d0->offset != d1->offset) + return d0->offset < d1->offset ? -1 : 1; return 0; } -static void sort_kfunc_descs_by_imm(struct bpf_prog *prog) +static void sort_kfunc_descs_by_imm_off(struct bpf_prog *prog) { struct bpf_kfunc_desc_tab *tab; @@ -2651,7 +2683,7 @@ static void sort_kfunc_descs_by_imm(struct bpf_prog *prog) return; sort(tab->descs, tab->nr_descs, sizeof(tab->descs[0]), - kfunc_desc_cmp_by_imm, NULL); + kfunc_desc_cmp_by_imm_off, NULL); } bool bpf_prog_has_kfunc_call(const struct bpf_prog *prog) @@ -2665,13 +2697,14 @@ bpf_jit_find_kfunc_model(const struct bpf_prog *prog, { const struct bpf_kfunc_desc desc = { .imm = insn->imm, + .offset = insn->off, }; const struct bpf_kfunc_desc *res; struct bpf_kfunc_desc_tab *tab; tab = prog->aux->kfunc_tab; res = bsearch(&desc, tab->descs, tab->nr_descs, - sizeof(tab->descs[0]), kfunc_desc_cmp_by_imm); + sizeof(tab->descs[0]), kfunc_desc_cmp_by_imm_off); return res ? &res->func_model : NULL; } @@ -8482,10 +8515,10 @@ static int set_rbtree_add_callback_state(struct bpf_verifier_env *env, struct bpf_func_state *callee, int insn_idx) { - /* void bpf_rbtree_add(struct bpf_rb_root *root, struct bpf_rb_node *node, + /* void bpf_rbtree_add_impl(struct bpf_rb_root *root, struct bpf_rb_node *node, * bool (less)(struct bpf_rb_node *a, const struct bpf_rb_node *b)); * - * 'struct bpf_rb_node *node' arg to bpf_rbtree_add is the same PTR_TO_BTF_ID w/ offset + * 'struct bpf_rb_node *node' arg to bpf_rbtree_add_impl is the same PTR_TO_BTF_ID w/ offset * that 'less' callback args will be receiving. However, 'node' arg was release_reference'd * by this point, so look at 'root' */ @@ -9321,11 +9354,6 @@ static bool is_kfunc_rcu(struct bpf_kfunc_call_arg_meta *meta) return meta->kfunc_flags & KF_RCU; } -static bool is_kfunc_arg_kptr_get(struct bpf_kfunc_call_arg_meta *meta, int arg) -{ - return arg == 0 && (meta->kfunc_flags & KF_KPTR_GET); -} - static bool __kfunc_param_match_suffix(const struct btf *btf, const struct btf_param *arg, const char *suffix) @@ -9390,6 +9418,11 @@ static bool is_kfunc_arg_uninit(const struct btf *btf, const struct btf_param *a return __kfunc_param_match_suffix(btf, arg, "__uninit"); } +static bool is_kfunc_arg_refcounted_kptr(const struct btf *btf, const struct btf_param *arg) +{ + return __kfunc_param_match_suffix(btf, arg, "__refcounted_kptr"); +} + static bool is_kfunc_arg_scalar_with_name(const struct btf *btf, const struct btf_param *arg, const char *name) @@ -9529,15 +9562,15 @@ static u32 *reg2btf_ids[__BPF_REG_TYPE_MAX] = { enum kfunc_ptr_arg_type { KF_ARG_PTR_TO_CTX, - KF_ARG_PTR_TO_ALLOC_BTF_ID, /* Allocated object */ - KF_ARG_PTR_TO_KPTR, /* PTR_TO_KPTR but type specific */ + KF_ARG_PTR_TO_ALLOC_BTF_ID, /* Allocated object */ + KF_ARG_PTR_TO_REFCOUNTED_KPTR, /* Refcounted local kptr */ KF_ARG_PTR_TO_DYNPTR, KF_ARG_PTR_TO_ITER, KF_ARG_PTR_TO_LIST_HEAD, KF_ARG_PTR_TO_LIST_NODE, - KF_ARG_PTR_TO_BTF_ID, /* Also covers reg2btf_ids conversions */ + KF_ARG_PTR_TO_BTF_ID, /* Also covers reg2btf_ids conversions */ KF_ARG_PTR_TO_MEM, - KF_ARG_PTR_TO_MEM_SIZE, /* Size derived from next argument, skip it */ + KF_ARG_PTR_TO_MEM_SIZE, /* Size derived from next argument, skip it */ KF_ARG_PTR_TO_CALLBACK, KF_ARG_PTR_TO_RB_ROOT, KF_ARG_PTR_TO_RB_NODE, @@ -9546,8 +9579,9 @@ enum kfunc_ptr_arg_type { enum special_kfunc_type { KF_bpf_obj_new_impl, KF_bpf_obj_drop_impl, - KF_bpf_list_push_front, - KF_bpf_list_push_back, + KF_bpf_refcount_acquire_impl, + KF_bpf_list_push_front_impl, + KF_bpf_list_push_back_impl, KF_bpf_list_pop_front, KF_bpf_list_pop_back, KF_bpf_cast_to_kern_ctx, @@ -9555,7 +9589,7 @@ enum special_kfunc_type { KF_bpf_rcu_read_lock, KF_bpf_rcu_read_unlock, KF_bpf_rbtree_remove, - KF_bpf_rbtree_add, + KF_bpf_rbtree_add_impl, KF_bpf_rbtree_first, KF_bpf_dynptr_from_skb, KF_bpf_dynptr_from_xdp, @@ -9566,14 +9600,15 @@ enum special_kfunc_type { BTF_SET_START(special_kfunc_set) BTF_ID(func, bpf_obj_new_impl) BTF_ID(func, bpf_obj_drop_impl) -BTF_ID(func, bpf_list_push_front) -BTF_ID(func, bpf_list_push_back) +BTF_ID(func, bpf_refcount_acquire_impl) +BTF_ID(func, bpf_list_push_front_impl) +BTF_ID(func, bpf_list_push_back_impl) BTF_ID(func, bpf_list_pop_front) BTF_ID(func, bpf_list_pop_back) BTF_ID(func, bpf_cast_to_kern_ctx) BTF_ID(func, bpf_rdonly_cast) BTF_ID(func, bpf_rbtree_remove) -BTF_ID(func, bpf_rbtree_add) +BTF_ID(func, bpf_rbtree_add_impl) BTF_ID(func, bpf_rbtree_first) BTF_ID(func, bpf_dynptr_from_skb) BTF_ID(func, bpf_dynptr_from_xdp) @@ -9584,8 +9619,9 @@ BTF_SET_END(special_kfunc_set) BTF_ID_LIST(special_kfunc_list) BTF_ID(func, bpf_obj_new_impl) BTF_ID(func, bpf_obj_drop_impl) -BTF_ID(func, bpf_list_push_front) -BTF_ID(func, bpf_list_push_back) +BTF_ID(func, bpf_refcount_acquire_impl) +BTF_ID(func, bpf_list_push_front_impl) +BTF_ID(func, bpf_list_push_back_impl) BTF_ID(func, bpf_list_pop_front) BTF_ID(func, bpf_list_pop_back) BTF_ID(func, bpf_cast_to_kern_ctx) @@ -9593,7 +9629,7 @@ BTF_ID(func, bpf_rdonly_cast) BTF_ID(func, bpf_rcu_read_lock) BTF_ID(func, bpf_rcu_read_unlock) BTF_ID(func, bpf_rbtree_remove) -BTF_ID(func, bpf_rbtree_add) +BTF_ID(func, bpf_rbtree_add_impl) BTF_ID(func, bpf_rbtree_first) BTF_ID(func, bpf_dynptr_from_skb) BTF_ID(func, bpf_dynptr_from_xdp) @@ -9636,20 +9672,8 @@ get_kfunc_ptr_arg_type(struct bpf_verifier_env *env, if (is_kfunc_arg_alloc_obj(meta->btf, &args[argno])) return KF_ARG_PTR_TO_ALLOC_BTF_ID; - if (is_kfunc_arg_kptr_get(meta, argno)) { - if (!btf_type_is_ptr(ref_t)) { - verbose(env, "arg#0 BTF type must be a double pointer for kptr_get kfunc\n"); - return -EINVAL; - } - ref_t = btf_type_by_id(meta->btf, ref_t->type); - ref_tname = btf_name_by_offset(meta->btf, ref_t->name_off); - if (!btf_type_is_struct(ref_t)) { - verbose(env, "kernel function %s args#0 pointer type %s %s is not supported\n", - meta->func_name, btf_type_str(ref_t), ref_tname); - return -EINVAL; - } - return KF_ARG_PTR_TO_KPTR; - } + if (is_kfunc_arg_refcounted_kptr(meta->btf, &args[argno])) + return KF_ARG_PTR_TO_REFCOUNTED_KPTR; if (is_kfunc_arg_dynptr(meta->btf, &args[argno])) return KF_ARG_PTR_TO_DYNPTR; @@ -9764,40 +9788,6 @@ static int process_kf_arg_ptr_to_btf_id(struct bpf_verifier_env *env, return 0; } -static int process_kf_arg_ptr_to_kptr(struct bpf_verifier_env *env, - struct bpf_reg_state *reg, - const struct btf_type *ref_t, - const char *ref_tname, - struct bpf_kfunc_call_arg_meta *meta, - int argno) -{ - struct btf_field *kptr_field; - - /* check_func_arg_reg_off allows var_off for - * PTR_TO_MAP_VALUE, but we need fixed offset to find - * off_desc. - */ - if (!tnum_is_const(reg->var_off)) { - verbose(env, "arg#0 must have constant offset\n"); - return -EINVAL; - } - - kptr_field = btf_record_find(reg->map_ptr->record, reg->off + reg->var_off.value, BPF_KPTR); - if (!kptr_field || kptr_field->type != BPF_KPTR_REF) { - verbose(env, "arg#0 no referenced kptr at map value offset=%llu\n", - reg->off + reg->var_off.value); - return -EINVAL; - } - - if (!btf_struct_ids_match(&env->log, meta->btf, ref_t->type, 0, kptr_field->kptr.btf, - kptr_field->kptr.btf_id, true)) { - verbose(env, "kernel function %s args#%d expected pointer to %s %s\n", - meta->func_name, argno, btf_type_str(ref_t), ref_tname); - return -EINVAL; - } - return 0; -} - static int ref_set_non_owning(struct bpf_verifier_env *env, struct bpf_reg_state *reg) { struct bpf_verifier_state *state = env->cur_state; @@ -9924,27 +9914,28 @@ static int check_reg_allocation_locked(struct bpf_verifier_env *env, struct bpf_ static bool is_bpf_list_api_kfunc(u32 btf_id) { - return btf_id == special_kfunc_list[KF_bpf_list_push_front] || - btf_id == special_kfunc_list[KF_bpf_list_push_back] || + return btf_id == special_kfunc_list[KF_bpf_list_push_front_impl] || + btf_id == special_kfunc_list[KF_bpf_list_push_back_impl] || btf_id == special_kfunc_list[KF_bpf_list_pop_front] || btf_id == special_kfunc_list[KF_bpf_list_pop_back]; } static bool is_bpf_rbtree_api_kfunc(u32 btf_id) { - return btf_id == special_kfunc_list[KF_bpf_rbtree_add] || + return btf_id == special_kfunc_list[KF_bpf_rbtree_add_impl] || btf_id == special_kfunc_list[KF_bpf_rbtree_remove] || btf_id == special_kfunc_list[KF_bpf_rbtree_first]; } static bool is_bpf_graph_api_kfunc(u32 btf_id) { - return is_bpf_list_api_kfunc(btf_id) || is_bpf_rbtree_api_kfunc(btf_id); + return is_bpf_list_api_kfunc(btf_id) || is_bpf_rbtree_api_kfunc(btf_id) || + btf_id == special_kfunc_list[KF_bpf_refcount_acquire_impl]; } static bool is_callback_calling_kfunc(u32 btf_id) { - return btf_id == special_kfunc_list[KF_bpf_rbtree_add]; + return btf_id == special_kfunc_list[KF_bpf_rbtree_add_impl]; } static bool is_rbtree_lock_required_kfunc(u32 btf_id) @@ -9985,12 +9976,12 @@ static bool check_kfunc_is_graph_node_api(struct bpf_verifier_env *env, switch (node_field_type) { case BPF_LIST_NODE: - ret = (kfunc_btf_id == special_kfunc_list[KF_bpf_list_push_front] || - kfunc_btf_id == special_kfunc_list[KF_bpf_list_push_back]); + ret = (kfunc_btf_id == special_kfunc_list[KF_bpf_list_push_front_impl] || + kfunc_btf_id == special_kfunc_list[KF_bpf_list_push_back_impl]); break; case BPF_RB_NODE: ret = (kfunc_btf_id == special_kfunc_list[KF_bpf_rbtree_remove] || - kfunc_btf_id == special_kfunc_list[KF_bpf_rbtree_add]); + kfunc_btf_id == special_kfunc_list[KF_bpf_rbtree_add_impl]); break; default: verbose(env, "verifier internal error: unexpected graph node argument type %s\n", @@ -10158,6 +10149,7 @@ static int check_kfunc_args(struct bpf_verifier_env *env, struct bpf_kfunc_call_ const char *func_name = meta->func_name, *ref_tname; const struct btf *btf = meta->btf; const struct btf_param *args; + struct btf_record *rec; u32 i, nargs; int ret; @@ -10283,7 +10275,6 @@ static int check_kfunc_args(struct bpf_verifier_env *env, struct bpf_kfunc_call_ /* Trusted arguments have the same offset checks as release arguments */ arg_type |= OBJ_RELEASE; break; - case KF_ARG_PTR_TO_KPTR: case KF_ARG_PTR_TO_DYNPTR: case KF_ARG_PTR_TO_ITER: case KF_ARG_PTR_TO_LIST_HEAD: @@ -10293,6 +10284,7 @@ static int check_kfunc_args(struct bpf_verifier_env *env, struct bpf_kfunc_call_ case KF_ARG_PTR_TO_MEM: case KF_ARG_PTR_TO_MEM_SIZE: case KF_ARG_PTR_TO_CALLBACK: + case KF_ARG_PTR_TO_REFCOUNTED_KPTR: /* Trusted by default */ break; default: @@ -10335,15 +10327,6 @@ static int check_kfunc_args(struct bpf_verifier_env *env, struct bpf_kfunc_call_ meta->arg_obj_drop.btf_id = reg->btf_id; } break; - case KF_ARG_PTR_TO_KPTR: - if (reg->type != PTR_TO_MAP_VALUE) { - verbose(env, "arg#0 expected pointer to map value\n"); - return -EINVAL; - } - ret = process_kf_arg_ptr_to_kptr(env, reg, ref_t, ref_tname, meta, i); - if (ret < 0) - return ret; - break; case KF_ARG_PTR_TO_DYNPTR: { enum bpf_arg_type dynptr_arg_type = ARG_PTR_TO_DYNPTR; @@ -10510,6 +10493,26 @@ static int check_kfunc_args(struct bpf_verifier_env *env, struct bpf_kfunc_call_ case KF_ARG_PTR_TO_CALLBACK: meta->subprogno = reg->subprogno; break; + case KF_ARG_PTR_TO_REFCOUNTED_KPTR: + if (!type_is_ptr_alloc_obj(reg->type) && !type_is_non_owning_ref(reg->type)) { + verbose(env, "arg#%d is neither owning or non-owning ref\n", i); + return -EINVAL; + } + + rec = reg_btf_record(reg); + if (!rec) { + verbose(env, "verifier internal error: Couldn't find btf_record\n"); + return -EFAULT; + } + + if (rec->refcount_off < 0) { + verbose(env, "arg#%d doesn't point to a type with bpf_refcount field\n", i); + return -EINVAL; + } + + meta->arg_refcount_acquire.btf = reg->btf; + meta->arg_refcount_acquire.btf_id = reg->btf_id; + break; } } @@ -10649,10 +10652,11 @@ static int check_kfunc_call(struct bpf_verifier_env *env, struct bpf_insn *insn, } } - if (meta.func_id == special_kfunc_list[KF_bpf_list_push_front] || - meta.func_id == special_kfunc_list[KF_bpf_list_push_back] || - meta.func_id == special_kfunc_list[KF_bpf_rbtree_add]) { + if (meta.func_id == special_kfunc_list[KF_bpf_list_push_front_impl] || + meta.func_id == special_kfunc_list[KF_bpf_list_push_back_impl] || + meta.func_id == special_kfunc_list[KF_bpf_rbtree_add_impl]) { release_ref_obj_id = regs[BPF_REG_2].ref_obj_id; + insn_aux->insert_off = regs[BPF_REG_2].off; err = ref_convert_owning_non_owning(env, release_ref_obj_id); if (err) { verbose(env, "kfunc %s#%d conversion of owning ref to non-owning failed\n", @@ -10668,7 +10672,7 @@ static int check_kfunc_call(struct bpf_verifier_env *env, struct bpf_insn *insn, } } - if (meta.func_id == special_kfunc_list[KF_bpf_rbtree_add]) { + if (meta.func_id == special_kfunc_list[KF_bpf_rbtree_add_impl]) { err = __check_func_call(env, insn, insn_idx_p, meta.subprogno, set_rbtree_add_callback_state); if (err) { @@ -10686,7 +10690,9 @@ static int check_kfunc_call(struct bpf_verifier_env *env, struct bpf_insn *insn, if (is_kfunc_acquire(&meta) && !btf_type_is_struct_ptr(meta.btf, t)) { /* Only exception is bpf_obj_new_impl */ - if (meta.btf != btf_vmlinux || meta.func_id != special_kfunc_list[KF_bpf_obj_new_impl]) { + if (meta.btf != btf_vmlinux || + (meta.func_id != special_kfunc_list[KF_bpf_obj_new_impl] && + meta.func_id != special_kfunc_list[KF_bpf_refcount_acquire_impl])) { verbose(env, "acquire kernel function does not return PTR_TO_BTF_ID\n"); return -EINVAL; } @@ -10734,6 +10740,15 @@ static int check_kfunc_call(struct bpf_verifier_env *env, struct bpf_insn *insn, insn_aux->obj_new_size = ret_t->size; insn_aux->kptr_struct_meta = btf_find_struct_meta(ret_btf, ret_btf_id); + } else if (meta.func_id == special_kfunc_list[KF_bpf_refcount_acquire_impl]) { + mark_reg_known_zero(env, regs, BPF_REG_0); + regs[BPF_REG_0].type = PTR_TO_BTF_ID | MEM_ALLOC; + regs[BPF_REG_0].btf = meta.arg_refcount_acquire.btf; + regs[BPF_REG_0].btf_id = meta.arg_refcount_acquire.btf_id; + + insn_aux->kptr_struct_meta = + btf_find_struct_meta(meta.arg_refcount_acquire.btf, + meta.arg_refcount_acquire.btf_id); } else if (meta.func_id == special_kfunc_list[KF_bpf_list_pop_front] || meta.func_id == special_kfunc_list[KF_bpf_list_pop_back]) { struct btf_field *field = meta.arg_list_head.field; @@ -10857,9 +10872,6 @@ static int check_kfunc_call(struct bpf_verifier_env *env, struct bpf_insn *insn, ref_set_non_owning(env, ®s[BPF_REG_0]); } - if (meta.func_id == special_kfunc_list[KF_bpf_rbtree_remove]) - invalidate_non_owning_refs(env); - if (reg_may_point_to_spin_lock(®s[BPF_REG_0]) && !regs[BPF_REG_0].id) regs[BPF_REG_0].id = ++env->id_gen; } else if (btf_type_is_void(t)) { @@ -12424,12 +12436,17 @@ static int check_alu_op(struct bpf_verifier_env *env, struct bpf_insn *insn) insn->src_reg); return -EACCES; } else if (src_reg->type == SCALAR_VALUE) { + bool is_src_reg_u32 = src_reg->umax_value <= U32_MAX; + + if (is_src_reg_u32 && !src_reg->id) + src_reg->id = ++env->id_gen; copy_register_state(dst_reg, src_reg); - /* Make sure ID is cleared otherwise + /* Make sure ID is cleared if src_reg is not in u32 range otherwise * dst_reg min/max could be incorrectly * propagated into src_reg by find_equal_scalars() */ - dst_reg->id = 0; + if (!is_src_reg_u32) + dst_reg->id = 0; dst_reg->live |= REG_LIVE_WRITTEN; dst_reg->subreg_def = env->insn_idx + 1; } else { @@ -13814,6 +13831,9 @@ static int check_return_code(struct bpf_verifier_env *env) } break; + case BPF_PROG_TYPE_NETFILTER: + range = tnum_range(NF_DROP, NF_ACCEPT); + break; case BPF_PROG_TYPE_EXT: /* freplace program can return anything as its return value * depends on the to-be-replaced kernel func or bpf program. @@ -14700,7 +14720,7 @@ static bool regs_exact(const struct bpf_reg_state *rold, const struct bpf_reg_state *rcur, struct bpf_id_pair *idmap) { - return memcmp(rold, rcur, offsetof(struct bpf_reg_state, id)) == 0 && + return memcmp(rold, rcur, offsetof(struct bpf_reg_state, id)) == 0 && check_ids(rold->id, rcur->id, idmap) && check_ids(rold->ref_obj_id, rcur->ref_obj_id, idmap); } @@ -17308,11 +17328,62 @@ static int fixup_call_args(struct bpf_verifier_env *env) return err; } +/* replace a generic kfunc with a specialized version if necessary */ +static void specialize_kfunc(struct bpf_verifier_env *env, + u32 func_id, u16 offset, unsigned long *addr) +{ + struct bpf_prog *prog = env->prog; + bool seen_direct_write; + void *xdp_kfunc; + bool is_rdonly; + + if (bpf_dev_bound_kfunc_id(func_id)) { + xdp_kfunc = bpf_dev_bound_resolve_kfunc(prog, func_id); + if (xdp_kfunc) { + *addr = (unsigned long)xdp_kfunc; + return; + } + /* fallback to default kfunc when not supported by netdev */ + } + + if (offset) + return; + + if (func_id == special_kfunc_list[KF_bpf_dynptr_from_skb]) { + seen_direct_write = env->seen_direct_write; + is_rdonly = !may_access_direct_pkt_data(env, NULL, BPF_WRITE); + + if (is_rdonly) + *addr = (unsigned long)bpf_dynptr_from_skb_rdonly; + + /* restore env->seen_direct_write to its original value, since + * may_access_direct_pkt_data mutates it + */ + env->seen_direct_write = seen_direct_write; + } +} + +static void __fixup_collection_insert_kfunc(struct bpf_insn_aux_data *insn_aux, + u16 struct_meta_reg, + u16 node_offset_reg, + struct bpf_insn *insn, + struct bpf_insn *insn_buf, + int *cnt) +{ + struct btf_struct_meta *kptr_struct_meta = insn_aux->kptr_struct_meta; + struct bpf_insn addr[2] = { BPF_LD_IMM64(struct_meta_reg, (long)kptr_struct_meta) }; + + insn_buf[0] = addr[0]; + insn_buf[1] = addr[1]; + insn_buf[2] = BPF_MOV64_IMM(node_offset_reg, insn_aux->insert_off); + insn_buf[3] = *insn; + *cnt = 4; +} + static int fixup_kfunc_call(struct bpf_verifier_env *env, struct bpf_insn *insn, struct bpf_insn *insn_buf, int insn_idx, int *cnt) { const struct bpf_kfunc_desc *desc; - void *xdp_kfunc; if (!insn->imm) { verbose(env, "invalid kernel function call not eliminated in verifier pass\n"); @@ -17321,18 +17392,9 @@ static int fixup_kfunc_call(struct bpf_verifier_env *env, struct bpf_insn *insn, *cnt = 0; - if (bpf_dev_bound_kfunc_id(insn->imm)) { - xdp_kfunc = bpf_dev_bound_resolve_kfunc(env->prog, insn->imm); - if (xdp_kfunc) { - insn->imm = BPF_CALL_IMM(xdp_kfunc); - return 0; - } - - /* fallback to default kfunc when not supported by netdev */ - } - - /* insn->imm has the btf func_id. Replace it with - * an address (relative to __bpf_call_base). + /* insn->imm has the btf func_id. Replace it with an offset relative to + * __bpf_call_base, unless the JIT needs to call functions that are + * further than 32 bits away (bpf_jit_supports_far_kfunc_call()). */ desc = find_kfunc_desc(env->prog, insn->imm, insn->off); if (!desc) { @@ -17341,7 +17403,8 @@ static int fixup_kfunc_call(struct bpf_verifier_env *env, struct bpf_insn *insn, return -EFAULT; } - insn->imm = desc->imm; + if (!bpf_jit_supports_far_kfunc_call()) + insn->imm = BPF_CALL_IMM(desc->addr); if (insn->off) return 0; if (desc->func_id == special_kfunc_list[KF_bpf_obj_new_impl]) { @@ -17354,7 +17417,8 @@ static int fixup_kfunc_call(struct bpf_verifier_env *env, struct bpf_insn *insn, insn_buf[2] = addr[1]; insn_buf[3] = *insn; *cnt = 4; - } else if (desc->func_id == special_kfunc_list[KF_bpf_obj_drop_impl]) { + } else if (desc->func_id == special_kfunc_list[KF_bpf_obj_drop_impl] || + desc->func_id == special_kfunc_list[KF_bpf_refcount_acquire_impl]) { struct btf_struct_meta *kptr_struct_meta = env->insn_aux_data[insn_idx].kptr_struct_meta; struct bpf_insn addr[2] = { BPF_LD_IMM64(BPF_REG_2, (long)kptr_struct_meta) }; @@ -17362,21 +17426,24 @@ static int fixup_kfunc_call(struct bpf_verifier_env *env, struct bpf_insn *insn, insn_buf[1] = addr[1]; insn_buf[2] = *insn; *cnt = 3; + } else if (desc->func_id == special_kfunc_list[KF_bpf_list_push_back_impl] || + desc->func_id == special_kfunc_list[KF_bpf_list_push_front_impl] || + desc->func_id == special_kfunc_list[KF_bpf_rbtree_add_impl]) { + int struct_meta_reg = BPF_REG_3; + int node_offset_reg = BPF_REG_4; + + /* rbtree_add has extra 'less' arg, so args-to-fixup are in diff regs */ + if (desc->func_id == special_kfunc_list[KF_bpf_rbtree_add_impl]) { + struct_meta_reg = BPF_REG_4; + node_offset_reg = BPF_REG_5; + } + + __fixup_collection_insert_kfunc(&env->insn_aux_data[insn_idx], struct_meta_reg, + node_offset_reg, insn, insn_buf, cnt); } else if (desc->func_id == special_kfunc_list[KF_bpf_cast_to_kern_ctx] || desc->func_id == special_kfunc_list[KF_bpf_rdonly_cast]) { insn_buf[0] = BPF_MOV64_REG(BPF_REG_0, BPF_REG_1); *cnt = 1; - } else if (desc->func_id == special_kfunc_list[KF_bpf_dynptr_from_skb]) { - bool seen_direct_write = env->seen_direct_write; - bool is_rdonly = !may_access_direct_pkt_data(env, NULL, BPF_WRITE); - - if (is_rdonly) - insn->imm = BPF_CALL_IMM(bpf_dynptr_from_skb_rdonly); - - /* restore env->seen_direct_write to its original value, since - * may_access_direct_pkt_data mutates it - */ - env->seen_direct_write = seen_direct_write; } return 0; } @@ -17906,7 +17973,7 @@ patch_call_imm: } } - sort_kfunc_descs_by_imm(env->prog); + sort_kfunc_descs_by_imm_off(env->prog); return 0; } @@ -18597,6 +18664,10 @@ BTF_ID(func, migrate_enable) #if !defined CONFIG_PREEMPT_RCU && !defined CONFIG_TINY_RCU BTF_ID(func, rcu_read_unlock_strict) #endif +#if defined(CONFIG_DEBUG_PREEMPT) || defined(CONFIG_TRACE_PREEMPT_TOGGLE) +BTF_ID(func, preempt_count_add) +BTF_ID(func, preempt_count_sub) +#endif BTF_SET_END(btf_id_deny) static bool can_be_sleepable(struct bpf_prog *prog) |