aboutsummaryrefslogtreecommitdiff
path: root/kernel
diff options
context:
space:
mode:
Diffstat (limited to 'kernel')
-rw-r--r--kernel/bpf/btf.c148
-rw-r--r--kernel/bpf/cgroup.c9
-rw-r--r--kernel/bpf/core.c11
-rw-r--r--kernel/bpf/helpers.c113
-rw-r--r--kernel/bpf/map_in_map.c15
-rw-r--r--kernel/bpf/syscall.c30
-rw-r--r--kernel/bpf/verifier.c355
7 files changed, 375 insertions, 306 deletions
diff --git a/kernel/bpf/btf.c b/kernel/bpf/btf.c
index 2c2d1fb9f410..6b682b8e4b50 100644
--- a/kernel/bpf/btf.c
+++ b/kernel/bpf/btf.c
@@ -25,6 +25,9 @@
#include <linux/bsearch.h>
#include <linux/kobject.h>
#include <linux/sysfs.h>
+
+#include <net/netfilter/nf_bpf_link.h>
+
#include <net/sock.h>
#include "../tools/lib/bpf/relo_core.h"
@@ -212,6 +215,7 @@ enum btf_kfunc_hook {
BTF_KFUNC_HOOK_SK_SKB,
BTF_KFUNC_HOOK_SOCKET_FILTER,
BTF_KFUNC_HOOK_LWT,
+ BTF_KFUNC_HOOK_NETFILTER,
BTF_KFUNC_HOOK_MAX,
};
@@ -577,8 +581,8 @@ static s32 bpf_find_btf_id(const char *name, u32 kind, struct btf **btf_p)
*btf_p = btf;
return ret;
}
- spin_lock_bh(&btf_idr_lock);
btf_put(btf);
+ spin_lock_bh(&btf_idr_lock);
}
spin_unlock_bh(&btf_idr_lock);
return ret;
@@ -1666,10 +1670,8 @@ static void btf_struct_metas_free(struct btf_struct_metas *tab)
if (!tab)
return;
- for (i = 0; i < tab->cnt; i++) {
+ for (i = 0; i < tab->cnt; i++)
btf_record_free(tab->types[i].record);
- kfree(tab->types[i].field_offs);
- }
kfree(tab);
}
@@ -3393,6 +3395,7 @@ static int btf_get_field_type(const char *name, u32 field_mask, u32 *seen_mask,
field_mask_test_name(BPF_LIST_NODE, "bpf_list_node");
field_mask_test_name(BPF_RB_ROOT, "bpf_rb_root");
field_mask_test_name(BPF_RB_NODE, "bpf_rb_node");
+ field_mask_test_name(BPF_REFCOUNT, "bpf_refcount");
/* Only return BPF_KPTR when all other types with matchable names fail */
if (field_mask & BPF_KPTR) {
@@ -3441,6 +3444,7 @@ static int btf_find_struct_field(const struct btf *btf,
case BPF_TIMER:
case BPF_LIST_NODE:
case BPF_RB_NODE:
+ case BPF_REFCOUNT:
ret = btf_find_struct(btf, member_type, off, sz, field_type,
idx < info_cnt ? &info[idx] : &tmp);
if (ret < 0)
@@ -3506,6 +3510,7 @@ static int btf_find_datasec_var(const struct btf *btf, const struct btf_type *t,
case BPF_TIMER:
case BPF_LIST_NODE:
case BPF_RB_NODE:
+ case BPF_REFCOUNT:
ret = btf_find_struct(btf, var_type, off, sz, field_type,
idx < info_cnt ? &info[idx] : &tmp);
if (ret < 0)
@@ -3700,12 +3705,24 @@ static int btf_parse_rb_root(const struct btf *btf, struct btf_field *field,
__alignof__(struct bpf_rb_node));
}
+static int btf_field_cmp(const void *_a, const void *_b, const void *priv)
+{
+ const struct btf_field *a = (const struct btf_field *)_a;
+ const struct btf_field *b = (const struct btf_field *)_b;
+
+ if (a->offset < b->offset)
+ return -1;
+ else if (a->offset > b->offset)
+ return 1;
+ return 0;
+}
+
struct btf_record *btf_parse_fields(const struct btf *btf, const struct btf_type *t,
u32 field_mask, u32 value_size)
{
struct btf_field_info info_arr[BTF_FIELDS_MAX];
+ u32 next_off = 0, field_type_size;
struct btf_record *rec;
- u32 next_off = 0;
int ret, i, cnt;
ret = btf_find_field(btf, t, field_mask, info_arr, ARRAY_SIZE(info_arr));
@@ -3724,8 +3741,10 @@ struct btf_record *btf_parse_fields(const struct btf *btf, const struct btf_type
rec->spin_lock_off = -EINVAL;
rec->timer_off = -EINVAL;
+ rec->refcount_off = -EINVAL;
for (i = 0; i < cnt; i++) {
- if (info_arr[i].off + btf_field_type_size(info_arr[i].type) > value_size) {
+ field_type_size = btf_field_type_size(info_arr[i].type);
+ if (info_arr[i].off + field_type_size > value_size) {
WARN_ONCE(1, "verifier bug off %d size %d", info_arr[i].off, value_size);
ret = -EFAULT;
goto end;
@@ -3734,11 +3753,12 @@ struct btf_record *btf_parse_fields(const struct btf *btf, const struct btf_type
ret = -EEXIST;
goto end;
}
- next_off = info_arr[i].off + btf_field_type_size(info_arr[i].type);
+ next_off = info_arr[i].off + field_type_size;
rec->field_mask |= info_arr[i].type;
rec->fields[i].offset = info_arr[i].off;
rec->fields[i].type = info_arr[i].type;
+ rec->fields[i].size = field_type_size;
switch (info_arr[i].type) {
case BPF_SPIN_LOCK:
@@ -3751,6 +3771,11 @@ struct btf_record *btf_parse_fields(const struct btf *btf, const struct btf_type
/* Cache offset for faster lookup at runtime */
rec->timer_off = rec->fields[i].offset;
break;
+ case BPF_REFCOUNT:
+ WARN_ON_ONCE(rec->refcount_off >= 0);
+ /* Cache offset for faster lookup at runtime */
+ rec->refcount_off = rec->fields[i].offset;
+ break;
case BPF_KPTR_UNREF:
case BPF_KPTR_REF:
ret = btf_parse_kptr(btf, &rec->fields[i], &info_arr[i]);
@@ -3784,30 +3809,16 @@ struct btf_record *btf_parse_fields(const struct btf *btf, const struct btf_type
goto end;
}
- /* need collection identity for non-owning refs before allowing this
- *
- * Consider a node type w/ both list and rb_node fields:
- * struct node {
- * struct bpf_list_node l;
- * struct bpf_rb_node r;
- * }
- *
- * Used like so:
- * struct node *n = bpf_obj_new(....);
- * bpf_list_push_front(&list_head, &n->l);
- * bpf_rbtree_remove(&rb_root, &n->r);
- *
- * It should not be possible to rbtree_remove the node since it hasn't
- * been added to a tree. But push_front converts n to a non-owning
- * reference, and rbtree_remove accepts the non-owning reference to
- * a type w/ bpf_rb_node field.
- */
- if (btf_record_has_field(rec, BPF_LIST_NODE) &&
+ if (rec->refcount_off < 0 &&
+ btf_record_has_field(rec, BPF_LIST_NODE) &&
btf_record_has_field(rec, BPF_RB_NODE)) {
ret = -EINVAL;
goto end;
}
+ sort_r(rec->fields, rec->cnt, sizeof(struct btf_field), btf_field_cmp,
+ NULL, rec);
+
return rec;
end:
btf_record_free(rec);
@@ -3889,61 +3900,6 @@ int btf_check_and_fixup_fields(const struct btf *btf, struct btf_record *rec)
return 0;
}
-static int btf_field_offs_cmp(const void *_a, const void *_b, const void *priv)
-{
- const u32 a = *(const u32 *)_a;
- const u32 b = *(const u32 *)_b;
-
- if (a < b)
- return -1;
- else if (a > b)
- return 1;
- return 0;
-}
-
-static void btf_field_offs_swap(void *_a, void *_b, int size, const void *priv)
-{
- struct btf_field_offs *foffs = (void *)priv;
- u32 *off_base = foffs->field_off;
- u32 *a = _a, *b = _b;
- u8 *sz_a, *sz_b;
-
- sz_a = foffs->field_sz + (a - off_base);
- sz_b = foffs->field_sz + (b - off_base);
-
- swap(*a, *b);
- swap(*sz_a, *sz_b);
-}
-
-struct btf_field_offs *btf_parse_field_offs(struct btf_record *rec)
-{
- struct btf_field_offs *foffs;
- u32 i, *off;
- u8 *sz;
-
- BUILD_BUG_ON(ARRAY_SIZE(foffs->field_off) != ARRAY_SIZE(foffs->field_sz));
- if (IS_ERR_OR_NULL(rec))
- return NULL;
-
- foffs = kzalloc(sizeof(*foffs), GFP_KERNEL | __GFP_NOWARN);
- if (!foffs)
- return ERR_PTR(-ENOMEM);
-
- off = foffs->field_off;
- sz = foffs->field_sz;
- for (i = 0; i < rec->cnt; i++) {
- off[i] = rec->fields[i].offset;
- sz[i] = btf_field_type_size(rec->fields[i].type);
- }
- foffs->cnt = rec->cnt;
-
- if (foffs->cnt == 1)
- return foffs;
- sort_r(foffs->field_off, foffs->cnt, sizeof(foffs->field_off[0]),
- btf_field_offs_cmp, btf_field_offs_swap, foffs);
- return foffs;
-}
-
static void __btf_struct_show(const struct btf *btf, const struct btf_type *t,
u32 type_id, void *data, u8 bits_offset,
struct btf_show *show)
@@ -5348,6 +5304,7 @@ static const char *alloc_obj_fields[] = {
"bpf_list_node",
"bpf_rb_root",
"bpf_rb_node",
+ "bpf_refcount",
};
static struct btf_struct_metas *
@@ -5386,7 +5343,6 @@ btf_parse_struct_metas(struct bpf_verifier_log *log, struct btf *btf)
for (i = 1; i < n; i++) {
struct btf_struct_metas *new_tab;
const struct btf_member *member;
- struct btf_field_offs *foffs;
struct btf_struct_meta *type;
struct btf_record *record;
const struct btf_type *t;
@@ -5422,23 +5378,13 @@ btf_parse_struct_metas(struct bpf_verifier_log *log, struct btf *btf)
type = &tab->types[tab->cnt];
type->btf_id = i;
record = btf_parse_fields(btf, t, BPF_SPIN_LOCK | BPF_LIST_HEAD | BPF_LIST_NODE |
- BPF_RB_ROOT | BPF_RB_NODE, t->size);
+ BPF_RB_ROOT | BPF_RB_NODE | BPF_REFCOUNT, t->size);
/* The record cannot be unset, treat it as an error if so */
if (IS_ERR_OR_NULL(record)) {
ret = PTR_ERR_OR_ZERO(record) ?: -EFAULT;
goto free;
}
- foffs = btf_parse_field_offs(record);
- /* We need the field_offs to be valid for a valid record,
- * either both should be set or both should be unset.
- */
- if (IS_ERR_OR_NULL(foffs)) {
- btf_record_free(record);
- ret = -EFAULT;
- goto free;
- }
type->record = record;
- type->field_offs = foffs;
tab->cnt++;
}
return tab;
@@ -6215,11 +6161,13 @@ again:
if (off < moff)
goto error;
- /* Only allow structure for now, can be relaxed for
- * other types later.
- */
+ /* allow structure and integer */
t = btf_type_skip_modifiers(btf, array_elem->type,
NULL);
+
+ if (btf_type_is_int(t))
+ return WALK_SCALAR;
+
if (!btf_type_is_struct(t))
goto error;
@@ -7858,6 +7806,8 @@ static int bpf_prog_type_to_kfunc_hook(enum bpf_prog_type prog_type)
case BPF_PROG_TYPE_LWT_XMIT:
case BPF_PROG_TYPE_LWT_SEG6LOCAL:
return BTF_KFUNC_HOOK_LWT;
+ case BPF_PROG_TYPE_NETFILTER:
+ return BTF_KFUNC_HOOK_NETFILTER;
default:
return BTF_KFUNC_HOOK_MAX;
}
@@ -8410,12 +8360,10 @@ check_modules:
btf_get(mod_btf);
spin_unlock_bh(&btf_idr_lock);
cands = bpf_core_add_cands(cands, mod_btf, btf_nr_types(main_btf));
- if (IS_ERR(cands)) {
- btf_put(mod_btf);
+ btf_put(mod_btf);
+ if (IS_ERR(cands))
return ERR_CAST(cands);
- }
spin_lock_bh(&btf_idr_lock);
- btf_put(mod_btf);
}
spin_unlock_bh(&btf_idr_lock);
/* cands is a pointer to kmalloced memory here if cands->cnt > 0
diff --git a/kernel/bpf/cgroup.c b/kernel/bpf/cgroup.c
index 53edb8ad2471..a06e118a9be5 100644
--- a/kernel/bpf/cgroup.c
+++ b/kernel/bpf/cgroup.c
@@ -1921,14 +1921,17 @@ int __cgroup_bpf_run_filter_getsockopt(struct sock *sk, int level,
if (ret < 0)
goto out;
- if (ctx.optlen > max_optlen || ctx.optlen < 0) {
+ if (optval && (ctx.optlen > max_optlen || ctx.optlen < 0)) {
ret = -EFAULT;
goto out;
}
if (ctx.optlen != 0) {
- if (copy_to_user(optval, ctx.optval, ctx.optlen) ||
- put_user(ctx.optlen, optlen)) {
+ if (optval && copy_to_user(optval, ctx.optval, ctx.optlen)) {
+ ret = -EFAULT;
+ goto out;
+ }
+ if (put_user(ctx.optlen, optlen)) {
ret = -EFAULT;
goto out;
}
diff --git a/kernel/bpf/core.c b/kernel/bpf/core.c
index e2d256c82072..7421487422d4 100644
--- a/kernel/bpf/core.c
+++ b/kernel/bpf/core.c
@@ -1187,6 +1187,7 @@ int bpf_jit_get_func_addr(const struct bpf_prog *prog,
s16 off = insn->off;
s32 imm = insn->imm;
u8 *addr;
+ int err;
*func_addr_fixed = insn->src_reg != BPF_PSEUDO_CALL;
if (!*func_addr_fixed) {
@@ -1201,6 +1202,11 @@ int bpf_jit_get_func_addr(const struct bpf_prog *prog,
addr = (u8 *)prog->aux->func[off]->bpf_func;
else
return -EINVAL;
+ } else if (insn->src_reg == BPF_PSEUDO_KFUNC_CALL &&
+ bpf_jit_supports_far_kfunc_call()) {
+ err = bpf_get_kfunc_addr(prog, insn->imm, insn->off, &addr);
+ if (err)
+ return err;
} else {
/* Address of a BPF helper call. Since part of the core
* kernel, it's always at a fixed location. __bpf_call_base
@@ -2732,6 +2738,11 @@ bool __weak bpf_jit_supports_kfunc_call(void)
return false;
}
+bool __weak bpf_jit_supports_far_kfunc_call(void)
+{
+ return false;
+}
+
/* To execute LD_ABS/LD_IND instructions __bpf_prog_run() may call
* skb_copy_bits(), so provide a weak definition of it for NET-less config.
*/
diff --git a/kernel/bpf/helpers.c b/kernel/bpf/helpers.c
index f04e60a4847f..8d368fa353f9 100644
--- a/kernel/bpf/helpers.c
+++ b/kernel/bpf/helpers.c
@@ -1798,6 +1798,8 @@ bpf_base_func_proto(enum bpf_func_id func_id)
}
}
+void __bpf_obj_drop_impl(void *p, const struct btf_record *rec);
+
void bpf_list_head_free(const struct btf_field *field, void *list_head,
struct bpf_spin_lock *spin_lock)
{
@@ -1828,13 +1830,8 @@ unlock:
/* The contained type can also have resources, including a
* bpf_list_head which needs to be freed.
*/
- bpf_obj_free_fields(field->graph_root.value_rec, obj);
- /* bpf_mem_free requires migrate_disable(), since we can be
- * called from map free path as well apart from BPF program (as
- * part of map ops doing bpf_obj_free_fields).
- */
migrate_disable();
- bpf_mem_free(&bpf_global_ma, obj);
+ __bpf_obj_drop_impl(obj, field->graph_root.value_rec);
migrate_enable();
}
}
@@ -1871,10 +1868,9 @@ void bpf_rb_root_free(const struct btf_field *field, void *rb_root,
obj = pos;
obj -= field->graph_root.node_offset;
- bpf_obj_free_fields(field->graph_root.value_rec, obj);
migrate_disable();
- bpf_mem_free(&bpf_global_ma, obj);
+ __bpf_obj_drop_impl(obj, field->graph_root.value_rec);
migrate_enable();
}
}
@@ -1893,12 +1889,21 @@ __bpf_kfunc void *bpf_obj_new_impl(u64 local_type_id__k, void *meta__ign)
if (!p)
return NULL;
if (meta)
- bpf_obj_init(meta->field_offs, p);
+ bpf_obj_init(meta->record, p);
return p;
}
+/* Must be called under migrate_disable(), as required by bpf_mem_free */
void __bpf_obj_drop_impl(void *p, const struct btf_record *rec)
{
+ if (rec && rec->refcount_off >= 0 &&
+ !refcount_dec_and_test((refcount_t *)(p + rec->refcount_off))) {
+ /* Object is refcounted and refcount_dec didn't result in 0
+ * refcount. Return without freeing the object
+ */
+ return;
+ }
+
if (rec)
bpf_obj_free_fields(rec, p);
bpf_mem_free(&bpf_global_ma, p);
@@ -1912,31 +1917,68 @@ __bpf_kfunc void bpf_obj_drop_impl(void *p__alloc, void *meta__ign)
__bpf_obj_drop_impl(p, meta ? meta->record : NULL);
}
-static void __bpf_list_add(struct bpf_list_node *node, struct bpf_list_head *head, bool tail)
+__bpf_kfunc void *bpf_refcount_acquire_impl(void *p__refcounted_kptr, void *meta__ign)
+{
+ struct btf_struct_meta *meta = meta__ign;
+ struct bpf_refcount *ref;
+
+ /* Could just cast directly to refcount_t *, but need some code using
+ * bpf_refcount type so that it is emitted in vmlinux BTF
+ */
+ ref = (struct bpf_refcount *)(p__refcounted_kptr + meta->record->refcount_off);
+
+ refcount_inc((refcount_t *)ref);
+ return (void *)p__refcounted_kptr;
+}
+
+static int __bpf_list_add(struct bpf_list_node *node, struct bpf_list_head *head,
+ bool tail, struct btf_record *rec, u64 off)
{
struct list_head *n = (void *)node, *h = (void *)head;
+ /* If list_head was 0-initialized by map, bpf_obj_init_field wasn't
+ * called on its fields, so init here
+ */
if (unlikely(!h->next))
INIT_LIST_HEAD(h);
- if (unlikely(!n->next))
- INIT_LIST_HEAD(n);
+ if (!list_empty(n)) {
+ /* Only called from BPF prog, no need to migrate_disable */
+ __bpf_obj_drop_impl(n - off, rec);
+ return -EINVAL;
+ }
+
tail ? list_add_tail(n, h) : list_add(n, h);
+
+ return 0;
}
-__bpf_kfunc void bpf_list_push_front(struct bpf_list_head *head, struct bpf_list_node *node)
+__bpf_kfunc int bpf_list_push_front_impl(struct bpf_list_head *head,
+ struct bpf_list_node *node,
+ void *meta__ign, u64 off)
{
- return __bpf_list_add(node, head, false);
+ struct btf_struct_meta *meta = meta__ign;
+
+ return __bpf_list_add(node, head, false,
+ meta ? meta->record : NULL, off);
}
-__bpf_kfunc void bpf_list_push_back(struct bpf_list_head *head, struct bpf_list_node *node)
+__bpf_kfunc int bpf_list_push_back_impl(struct bpf_list_head *head,
+ struct bpf_list_node *node,
+ void *meta__ign, u64 off)
{
- return __bpf_list_add(node, head, true);
+ struct btf_struct_meta *meta = meta__ign;
+
+ return __bpf_list_add(node, head, true,
+ meta ? meta->record : NULL, off);
}
static struct bpf_list_node *__bpf_list_del(struct bpf_list_head *head, bool tail)
{
struct list_head *n, *h = (void *)head;
+ /* If list_head was 0-initialized by map, bpf_obj_init_field wasn't
+ * called on its fields, so init here
+ */
if (unlikely(!h->next))
INIT_LIST_HEAD(h);
if (list_empty(h))
@@ -1962,6 +2004,9 @@ __bpf_kfunc struct bpf_rb_node *bpf_rbtree_remove(struct bpf_rb_root *root,
struct rb_root_cached *r = (struct rb_root_cached *)root;
struct rb_node *n = (struct rb_node *)node;
+ if (RB_EMPTY_NODE(n))
+ return NULL;
+
rb_erase_cached(n, r);
RB_CLEAR_NODE(n);
return (struct bpf_rb_node *)n;
@@ -1970,14 +2015,20 @@ __bpf_kfunc struct bpf_rb_node *bpf_rbtree_remove(struct bpf_rb_root *root,
/* Need to copy rbtree_add_cached's logic here because our 'less' is a BPF
* program
*/
-static void __bpf_rbtree_add(struct bpf_rb_root *root, struct bpf_rb_node *node,
- void *less)
+static int __bpf_rbtree_add(struct bpf_rb_root *root, struct bpf_rb_node *node,
+ void *less, struct btf_record *rec, u64 off)
{
struct rb_node **link = &((struct rb_root_cached *)root)->rb_root.rb_node;
+ struct rb_node *parent = NULL, *n = (struct rb_node *)node;
bpf_callback_t cb = (bpf_callback_t)less;
- struct rb_node *parent = NULL;
bool leftmost = true;
+ if (!RB_EMPTY_NODE(n)) {
+ /* Only called from BPF prog, no need to migrate_disable */
+ __bpf_obj_drop_impl(n - off, rec);
+ return -EINVAL;
+ }
+
while (*link) {
parent = *link;
if (cb((uintptr_t)node, (uintptr_t)parent, 0, 0, 0)) {
@@ -1988,15 +2039,18 @@ static void __bpf_rbtree_add(struct bpf_rb_root *root, struct bpf_rb_node *node,
}
}
- rb_link_node((struct rb_node *)node, parent, link);
- rb_insert_color_cached((struct rb_node *)node,
- (struct rb_root_cached *)root, leftmost);
+ rb_link_node(n, parent, link);
+ rb_insert_color_cached(n, (struct rb_root_cached *)root, leftmost);
+ return 0;
}
-__bpf_kfunc void bpf_rbtree_add(struct bpf_rb_root *root, struct bpf_rb_node *node,
- bool (less)(struct bpf_rb_node *a, const struct bpf_rb_node *b))
+__bpf_kfunc int bpf_rbtree_add_impl(struct bpf_rb_root *root, struct bpf_rb_node *node,
+ bool (less)(struct bpf_rb_node *a, const struct bpf_rb_node *b),
+ void *meta__ign, u64 off)
{
- __bpf_rbtree_add(root, node, (void *)less);
+ struct btf_struct_meta *meta = meta__ign;
+
+ return __bpf_rbtree_add(root, node, (void *)less, meta ? meta->record : NULL, off);
}
__bpf_kfunc struct bpf_rb_node *bpf_rbtree_first(struct bpf_rb_root *root)
@@ -2271,14 +2325,15 @@ BTF_ID_FLAGS(func, crash_kexec, KF_DESTRUCTIVE)
#endif
BTF_ID_FLAGS(func, bpf_obj_new_impl, KF_ACQUIRE | KF_RET_NULL)
BTF_ID_FLAGS(func, bpf_obj_drop_impl, KF_RELEASE)
-BTF_ID_FLAGS(func, bpf_list_push_front)
-BTF_ID_FLAGS(func, bpf_list_push_back)
+BTF_ID_FLAGS(func, bpf_refcount_acquire_impl, KF_ACQUIRE)
+BTF_ID_FLAGS(func, bpf_list_push_front_impl)
+BTF_ID_FLAGS(func, bpf_list_push_back_impl)
BTF_ID_FLAGS(func, bpf_list_pop_front, KF_ACQUIRE | KF_RET_NULL)
BTF_ID_FLAGS(func, bpf_list_pop_back, KF_ACQUIRE | KF_RET_NULL)
BTF_ID_FLAGS(func, bpf_task_acquire, KF_ACQUIRE | KF_RCU | KF_RET_NULL)
BTF_ID_FLAGS(func, bpf_task_release, KF_RELEASE)
-BTF_ID_FLAGS(func, bpf_rbtree_remove, KF_ACQUIRE)
-BTF_ID_FLAGS(func, bpf_rbtree_add)
+BTF_ID_FLAGS(func, bpf_rbtree_remove, KF_ACQUIRE | KF_RET_NULL)
+BTF_ID_FLAGS(func, bpf_rbtree_add_impl)
BTF_ID_FLAGS(func, bpf_rbtree_first, KF_RET_NULL)
#ifdef CONFIG_CGROUPS
diff --git a/kernel/bpf/map_in_map.c b/kernel/bpf/map_in_map.c
index 38136ec4e095..2c5c64c2a53b 100644
--- a/kernel/bpf/map_in_map.c
+++ b/kernel/bpf/map_in_map.c
@@ -56,18 +56,6 @@ struct bpf_map *bpf_map_meta_alloc(int inner_map_ufd)
ret = PTR_ERR(inner_map_meta->record);
goto free;
}
- if (inner_map_meta->record) {
- struct btf_field_offs *field_offs;
- /* If btf_record is !IS_ERR_OR_NULL, then field_offs is always
- * valid.
- */
- field_offs = kmemdup(inner_map->field_offs, sizeof(*inner_map->field_offs), GFP_KERNEL | __GFP_NOWARN);
- if (!field_offs) {
- ret = -ENOMEM;
- goto free_rec;
- }
- inner_map_meta->field_offs = field_offs;
- }
/* Note: We must use the same BTF, as we also used btf_record_dup above
* which relies on BTF being same for both maps, as some members like
* record->fields.list_head have pointers like value_rec pointing into
@@ -88,8 +76,6 @@ struct bpf_map *bpf_map_meta_alloc(int inner_map_ufd)
fdput(f);
return inner_map_meta;
-free_rec:
- btf_record_free(inner_map_meta->record);
free:
kfree(inner_map_meta);
put:
@@ -99,7 +85,6 @@ put:
void bpf_map_meta_free(struct bpf_map *map_meta)
{
- kfree(map_meta->field_offs);
bpf_map_free_record(map_meta);
btf_put(map_meta->btf);
kfree(map_meta);
diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c
index 6d575505f89c..14f39c1e573e 100644
--- a/kernel/bpf/syscall.c
+++ b/kernel/bpf/syscall.c
@@ -35,6 +35,7 @@
#include <linux/rcupdate_trace.h>
#include <linux/memcontrol.h>
#include <linux/trace_events.h>
+#include <net/netfilter/nf_bpf_link.h>
#define IS_FD_ARRAY(map) ((map)->map_type == BPF_MAP_TYPE_PERF_EVENT_ARRAY || \
(map)->map_type == BPF_MAP_TYPE_CGROUP_ARRAY || \
@@ -552,6 +553,7 @@ void btf_record_free(struct btf_record *rec)
case BPF_RB_NODE:
case BPF_SPIN_LOCK:
case BPF_TIMER:
+ case BPF_REFCOUNT:
/* Nothing to release */
break;
default:
@@ -599,6 +601,7 @@ struct btf_record *btf_record_dup(const struct btf_record *rec)
case BPF_RB_NODE:
case BPF_SPIN_LOCK:
case BPF_TIMER:
+ case BPF_REFCOUNT:
/* Nothing to acquire */
break;
default:
@@ -705,6 +708,7 @@ void bpf_obj_free_fields(const struct btf_record *rec, void *obj)
break;
case BPF_LIST_NODE:
case BPF_RB_NODE:
+ case BPF_REFCOUNT:
break;
default:
WARN_ON_ONCE(1);
@@ -717,14 +721,13 @@ void bpf_obj_free_fields(const struct btf_record *rec, void *obj)
static void bpf_map_free_deferred(struct work_struct *work)
{
struct bpf_map *map = container_of(work, struct bpf_map, work);
- struct btf_field_offs *foffs = map->field_offs;
struct btf_record *rec = map->record;
security_bpf_map_free(map);
bpf_map_release_memcg(map);
/* implementation dependent freeing */
map->ops->map_free(map);
- /* Delay freeing of field_offs and btf_record for maps, as map_free
+ /* Delay freeing of btf_record for maps, as map_free
* callback usually needs access to them. It is better to do it here
* than require each callback to do the free itself manually.
*
@@ -733,7 +736,6 @@ static void bpf_map_free_deferred(struct work_struct *work)
* eventually calls bpf_map_free_meta, since inner_map_meta is only a
* template bpf_map struct used during verification.
*/
- kfree(foffs);
btf_record_free(rec);
}
@@ -1034,7 +1036,7 @@ static int map_check_btf(struct bpf_map *map, const struct btf *btf,
map->record = btf_parse_fields(btf, value_type,
BPF_SPIN_LOCK | BPF_TIMER | BPF_KPTR | BPF_LIST_HEAD |
- BPF_RB_ROOT,
+ BPF_RB_ROOT | BPF_REFCOUNT,
map->value_size);
if (!IS_ERR_OR_NULL(map->record)) {
int i;
@@ -1073,6 +1075,7 @@ static int map_check_btf(struct bpf_map *map, const struct btf *btf,
break;
case BPF_KPTR_UNREF:
case BPF_KPTR_REF:
+ case BPF_REFCOUNT:
if (map->map_type != BPF_MAP_TYPE_HASH &&
map->map_type != BPF_MAP_TYPE_PERCPU_HASH &&
map->map_type != BPF_MAP_TYPE_LRU_HASH &&
@@ -1125,7 +1128,6 @@ free_map_tab:
static int map_create(union bpf_attr *attr)
{
int numa_node = bpf_map_attr_numa_node(attr);
- struct btf_field_offs *foffs;
struct bpf_map *map;
int f_flags;
int err;
@@ -1205,17 +1207,9 @@ static int map_create(union bpf_attr *attr)
attr->btf_vmlinux_value_type_id;
}
-
- foffs = btf_parse_field_offs(map->record);
- if (IS_ERR(foffs)) {
- err = PTR_ERR(foffs);
- goto free_map;
- }
- map->field_offs = foffs;
-
err = security_bpf_map_alloc(map);
if (err)
- goto free_map_field_offs;
+ goto free_map;
err = bpf_map_alloc_id(map);
if (err)
@@ -1239,8 +1233,6 @@ static int map_create(union bpf_attr *attr)
free_map_sec:
security_bpf_map_free(map);
-free_map_field_offs:
- kfree(map->field_offs);
free_map:
btf_put(map->btf);
map->ops->map_free(map);
@@ -2463,7 +2455,6 @@ static bool is_net_admin_prog_type(enum bpf_prog_type prog_type)
case BPF_PROG_TYPE_LWT_SEG6LOCAL:
case BPF_PROG_TYPE_SK_SKB:
case BPF_PROG_TYPE_SK_MSG:
- case BPF_PROG_TYPE_LIRC_MODE2:
case BPF_PROG_TYPE_FLOW_DISSECTOR:
case BPF_PROG_TYPE_CGROUP_DEVICE:
case BPF_PROG_TYPE_CGROUP_SOCK:
@@ -2472,6 +2463,7 @@ static bool is_net_admin_prog_type(enum bpf_prog_type prog_type)
case BPF_PROG_TYPE_CGROUP_SYSCTL:
case BPF_PROG_TYPE_SOCK_OPS:
case BPF_PROG_TYPE_EXT: /* extends any prog */
+ case BPF_PROG_TYPE_NETFILTER:
return true;
case BPF_PROG_TYPE_CGROUP_SKB:
/* always unpriv */
@@ -4598,6 +4590,7 @@ static int link_create(union bpf_attr *attr, bpfptr_t uattr)
switch (prog->type) {
case BPF_PROG_TYPE_EXT:
+ case BPF_PROG_TYPE_NETFILTER:
break;
case BPF_PROG_TYPE_PERF_EVENT:
case BPF_PROG_TYPE_TRACEPOINT:
@@ -4664,6 +4657,9 @@ static int link_create(union bpf_attr *attr, bpfptr_t uattr)
case BPF_PROG_TYPE_XDP:
ret = bpf_xdp_link_attach(attr, prog);
break;
+ case BPF_PROG_TYPE_NETFILTER:
+ ret = bpf_nf_link_attach(attr, prog);
+ break;
#endif
case BPF_PROG_TYPE_PERF_EVENT:
case BPF_PROG_TYPE_TRACEPOINT:
diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c
index 6e12cc296ad8..0d73139ee4d8 100644
--- a/kernel/bpf/verifier.c
+++ b/kernel/bpf/verifier.c
@@ -195,6 +195,8 @@ static void invalidate_non_owning_refs(struct bpf_verifier_env *env);
static bool in_rbtree_lock_required_cb(struct bpf_verifier_env *env);
static int ref_set_non_owning(struct bpf_verifier_env *env,
struct bpf_reg_state *reg);
+static void specialize_kfunc(struct bpf_verifier_env *env,
+ u32 func_id, u16 offset, unsigned long *addr);
static bool bpf_map_ptr_poisoned(const struct bpf_insn_aux_data *aux)
{
@@ -271,6 +273,11 @@ struct bpf_call_arg_meta {
struct btf_field *kptr_field;
};
+struct btf_and_id {
+ struct btf *btf;
+ u32 btf_id;
+};
+
struct bpf_kfunc_call_arg_meta {
/* In parameters */
struct btf *btf;
@@ -289,10 +296,10 @@ struct bpf_kfunc_call_arg_meta {
u64 value;
bool found;
} arg_constant;
- struct {
- struct btf *btf;
- u32 btf_id;
- } arg_obj_drop;
+ union {
+ struct btf_and_id arg_obj_drop;
+ struct btf_and_id arg_refcount_acquire;
+ };
struct {
struct btf_field *field;
} arg_list_head;
@@ -2374,6 +2381,7 @@ struct bpf_kfunc_desc {
u32 func_id;
s32 imm;
u16 offset;
+ unsigned long addr;
};
struct bpf_kfunc_btf {
@@ -2383,6 +2391,11 @@ struct bpf_kfunc_btf {
};
struct bpf_kfunc_desc_tab {
+ /* Sorted by func_id (BTF ID) and offset (fd_array offset) during
+ * verification. JITs do lookups by bpf_insn, where func_id may not be
+ * available, therefore at the end of verification do_misc_fixups()
+ * sorts this by imm and offset.
+ */
struct bpf_kfunc_desc descs[MAX_KFUNC_DESCS];
u32 nr_descs;
};
@@ -2423,6 +2436,19 @@ find_kfunc_desc(const struct bpf_prog *prog, u32 func_id, u16 offset)
sizeof(tab->descs[0]), kfunc_desc_cmp_by_id_off);
}
+int bpf_get_kfunc_addr(const struct bpf_prog *prog, u32 func_id,
+ u16 btf_fd_idx, u8 **func_addr)
+{
+ const struct bpf_kfunc_desc *desc;
+
+ desc = find_kfunc_desc(prog, func_id, btf_fd_idx);
+ if (!desc)
+ return -EFAULT;
+
+ *func_addr = (u8 *)desc->addr;
+ return 0;
+}
+
static struct btf *__find_kfunc_desc_btf(struct bpf_verifier_env *env,
s16 offset)
{
@@ -2602,13 +2628,18 @@ static int add_kfunc_call(struct bpf_verifier_env *env, u32 func_id, s16 offset)
func_name);
return -EINVAL;
}
+ specialize_kfunc(env, func_id, offset, &addr);
- call_imm = BPF_CALL_IMM(addr);
- /* Check whether or not the relative offset overflows desc->imm */
- if ((unsigned long)(s32)call_imm != call_imm) {
- verbose(env, "address of kernel function %s is out of range\n",
- func_name);
- return -EINVAL;
+ if (bpf_jit_supports_far_kfunc_call()) {
+ call_imm = func_id;
+ } else {
+ call_imm = BPF_CALL_IMM(addr);
+ /* Check whether the relative offset overflows desc->imm */
+ if ((unsigned long)(s32)call_imm != call_imm) {
+ verbose(env, "address of kernel function %s is out of range\n",
+ func_name);
+ return -EINVAL;
+ }
}
if (bpf_dev_bound_kfunc_id(func_id)) {
@@ -2621,6 +2652,7 @@ static int add_kfunc_call(struct bpf_verifier_env *env, u32 func_id, s16 offset)
desc->func_id = func_id;
desc->imm = call_imm;
desc->offset = offset;
+ desc->addr = addr;
err = btf_distill_func_proto(&env->log, desc_btf,
func_proto, func_name,
&desc->func_model);
@@ -2630,19 +2662,19 @@ static int add_kfunc_call(struct bpf_verifier_env *env, u32 func_id, s16 offset)
return err;
}
-static int kfunc_desc_cmp_by_imm(const void *a, const void *b)
+static int kfunc_desc_cmp_by_imm_off(const void *a, const void *b)
{
const struct bpf_kfunc_desc *d0 = a;
const struct bpf_kfunc_desc *d1 = b;
- if (d0->imm > d1->imm)
- return 1;
- else if (d0->imm < d1->imm)
- return -1;
+ if (d0->imm != d1->imm)
+ return d0->imm < d1->imm ? -1 : 1;
+ if (d0->offset != d1->offset)
+ return d0->offset < d1->offset ? -1 : 1;
return 0;
}
-static void sort_kfunc_descs_by_imm(struct bpf_prog *prog)
+static void sort_kfunc_descs_by_imm_off(struct bpf_prog *prog)
{
struct bpf_kfunc_desc_tab *tab;
@@ -2651,7 +2683,7 @@ static void sort_kfunc_descs_by_imm(struct bpf_prog *prog)
return;
sort(tab->descs, tab->nr_descs, sizeof(tab->descs[0]),
- kfunc_desc_cmp_by_imm, NULL);
+ kfunc_desc_cmp_by_imm_off, NULL);
}
bool bpf_prog_has_kfunc_call(const struct bpf_prog *prog)
@@ -2665,13 +2697,14 @@ bpf_jit_find_kfunc_model(const struct bpf_prog *prog,
{
const struct bpf_kfunc_desc desc = {
.imm = insn->imm,
+ .offset = insn->off,
};
const struct bpf_kfunc_desc *res;
struct bpf_kfunc_desc_tab *tab;
tab = prog->aux->kfunc_tab;
res = bsearch(&desc, tab->descs, tab->nr_descs,
- sizeof(tab->descs[0]), kfunc_desc_cmp_by_imm);
+ sizeof(tab->descs[0]), kfunc_desc_cmp_by_imm_off);
return res ? &res->func_model : NULL;
}
@@ -8482,10 +8515,10 @@ static int set_rbtree_add_callback_state(struct bpf_verifier_env *env,
struct bpf_func_state *callee,
int insn_idx)
{
- /* void bpf_rbtree_add(struct bpf_rb_root *root, struct bpf_rb_node *node,
+ /* void bpf_rbtree_add_impl(struct bpf_rb_root *root, struct bpf_rb_node *node,
* bool (less)(struct bpf_rb_node *a, const struct bpf_rb_node *b));
*
- * 'struct bpf_rb_node *node' arg to bpf_rbtree_add is the same PTR_TO_BTF_ID w/ offset
+ * 'struct bpf_rb_node *node' arg to bpf_rbtree_add_impl is the same PTR_TO_BTF_ID w/ offset
* that 'less' callback args will be receiving. However, 'node' arg was release_reference'd
* by this point, so look at 'root'
*/
@@ -9321,11 +9354,6 @@ static bool is_kfunc_rcu(struct bpf_kfunc_call_arg_meta *meta)
return meta->kfunc_flags & KF_RCU;
}
-static bool is_kfunc_arg_kptr_get(struct bpf_kfunc_call_arg_meta *meta, int arg)
-{
- return arg == 0 && (meta->kfunc_flags & KF_KPTR_GET);
-}
-
static bool __kfunc_param_match_suffix(const struct btf *btf,
const struct btf_param *arg,
const char *suffix)
@@ -9390,6 +9418,11 @@ static bool is_kfunc_arg_uninit(const struct btf *btf, const struct btf_param *a
return __kfunc_param_match_suffix(btf, arg, "__uninit");
}
+static bool is_kfunc_arg_refcounted_kptr(const struct btf *btf, const struct btf_param *arg)
+{
+ return __kfunc_param_match_suffix(btf, arg, "__refcounted_kptr");
+}
+
static bool is_kfunc_arg_scalar_with_name(const struct btf *btf,
const struct btf_param *arg,
const char *name)
@@ -9529,15 +9562,15 @@ static u32 *reg2btf_ids[__BPF_REG_TYPE_MAX] = {
enum kfunc_ptr_arg_type {
KF_ARG_PTR_TO_CTX,
- KF_ARG_PTR_TO_ALLOC_BTF_ID, /* Allocated object */
- KF_ARG_PTR_TO_KPTR, /* PTR_TO_KPTR but type specific */
+ KF_ARG_PTR_TO_ALLOC_BTF_ID, /* Allocated object */
+ KF_ARG_PTR_TO_REFCOUNTED_KPTR, /* Refcounted local kptr */
KF_ARG_PTR_TO_DYNPTR,
KF_ARG_PTR_TO_ITER,
KF_ARG_PTR_TO_LIST_HEAD,
KF_ARG_PTR_TO_LIST_NODE,
- KF_ARG_PTR_TO_BTF_ID, /* Also covers reg2btf_ids conversions */
+ KF_ARG_PTR_TO_BTF_ID, /* Also covers reg2btf_ids conversions */
KF_ARG_PTR_TO_MEM,
- KF_ARG_PTR_TO_MEM_SIZE, /* Size derived from next argument, skip it */
+ KF_ARG_PTR_TO_MEM_SIZE, /* Size derived from next argument, skip it */
KF_ARG_PTR_TO_CALLBACK,
KF_ARG_PTR_TO_RB_ROOT,
KF_ARG_PTR_TO_RB_NODE,
@@ -9546,8 +9579,9 @@ enum kfunc_ptr_arg_type {
enum special_kfunc_type {
KF_bpf_obj_new_impl,
KF_bpf_obj_drop_impl,
- KF_bpf_list_push_front,
- KF_bpf_list_push_back,
+ KF_bpf_refcount_acquire_impl,
+ KF_bpf_list_push_front_impl,
+ KF_bpf_list_push_back_impl,
KF_bpf_list_pop_front,
KF_bpf_list_pop_back,
KF_bpf_cast_to_kern_ctx,
@@ -9555,7 +9589,7 @@ enum special_kfunc_type {
KF_bpf_rcu_read_lock,
KF_bpf_rcu_read_unlock,
KF_bpf_rbtree_remove,
- KF_bpf_rbtree_add,
+ KF_bpf_rbtree_add_impl,
KF_bpf_rbtree_first,
KF_bpf_dynptr_from_skb,
KF_bpf_dynptr_from_xdp,
@@ -9566,14 +9600,15 @@ enum special_kfunc_type {
BTF_SET_START(special_kfunc_set)
BTF_ID(func, bpf_obj_new_impl)
BTF_ID(func, bpf_obj_drop_impl)
-BTF_ID(func, bpf_list_push_front)
-BTF_ID(func, bpf_list_push_back)
+BTF_ID(func, bpf_refcount_acquire_impl)
+BTF_ID(func, bpf_list_push_front_impl)
+BTF_ID(func, bpf_list_push_back_impl)
BTF_ID(func, bpf_list_pop_front)
BTF_ID(func, bpf_list_pop_back)
BTF_ID(func, bpf_cast_to_kern_ctx)
BTF_ID(func, bpf_rdonly_cast)
BTF_ID(func, bpf_rbtree_remove)
-BTF_ID(func, bpf_rbtree_add)
+BTF_ID(func, bpf_rbtree_add_impl)
BTF_ID(func, bpf_rbtree_first)
BTF_ID(func, bpf_dynptr_from_skb)
BTF_ID(func, bpf_dynptr_from_xdp)
@@ -9584,8 +9619,9 @@ BTF_SET_END(special_kfunc_set)
BTF_ID_LIST(special_kfunc_list)
BTF_ID(func, bpf_obj_new_impl)
BTF_ID(func, bpf_obj_drop_impl)
-BTF_ID(func, bpf_list_push_front)
-BTF_ID(func, bpf_list_push_back)
+BTF_ID(func, bpf_refcount_acquire_impl)
+BTF_ID(func, bpf_list_push_front_impl)
+BTF_ID(func, bpf_list_push_back_impl)
BTF_ID(func, bpf_list_pop_front)
BTF_ID(func, bpf_list_pop_back)
BTF_ID(func, bpf_cast_to_kern_ctx)
@@ -9593,7 +9629,7 @@ BTF_ID(func, bpf_rdonly_cast)
BTF_ID(func, bpf_rcu_read_lock)
BTF_ID(func, bpf_rcu_read_unlock)
BTF_ID(func, bpf_rbtree_remove)
-BTF_ID(func, bpf_rbtree_add)
+BTF_ID(func, bpf_rbtree_add_impl)
BTF_ID(func, bpf_rbtree_first)
BTF_ID(func, bpf_dynptr_from_skb)
BTF_ID(func, bpf_dynptr_from_xdp)
@@ -9636,20 +9672,8 @@ get_kfunc_ptr_arg_type(struct bpf_verifier_env *env,
if (is_kfunc_arg_alloc_obj(meta->btf, &args[argno]))
return KF_ARG_PTR_TO_ALLOC_BTF_ID;
- if (is_kfunc_arg_kptr_get(meta, argno)) {
- if (!btf_type_is_ptr(ref_t)) {
- verbose(env, "arg#0 BTF type must be a double pointer for kptr_get kfunc\n");
- return -EINVAL;
- }
- ref_t = btf_type_by_id(meta->btf, ref_t->type);
- ref_tname = btf_name_by_offset(meta->btf, ref_t->name_off);
- if (!btf_type_is_struct(ref_t)) {
- verbose(env, "kernel function %s args#0 pointer type %s %s is not supported\n",
- meta->func_name, btf_type_str(ref_t), ref_tname);
- return -EINVAL;
- }
- return KF_ARG_PTR_TO_KPTR;
- }
+ if (is_kfunc_arg_refcounted_kptr(meta->btf, &args[argno]))
+ return KF_ARG_PTR_TO_REFCOUNTED_KPTR;
if (is_kfunc_arg_dynptr(meta->btf, &args[argno]))
return KF_ARG_PTR_TO_DYNPTR;
@@ -9764,40 +9788,6 @@ static int process_kf_arg_ptr_to_btf_id(struct bpf_verifier_env *env,
return 0;
}
-static int process_kf_arg_ptr_to_kptr(struct bpf_verifier_env *env,
- struct bpf_reg_state *reg,
- const struct btf_type *ref_t,
- const char *ref_tname,
- struct bpf_kfunc_call_arg_meta *meta,
- int argno)
-{
- struct btf_field *kptr_field;
-
- /* check_func_arg_reg_off allows var_off for
- * PTR_TO_MAP_VALUE, but we need fixed offset to find
- * off_desc.
- */
- if (!tnum_is_const(reg->var_off)) {
- verbose(env, "arg#0 must have constant offset\n");
- return -EINVAL;
- }
-
- kptr_field = btf_record_find(reg->map_ptr->record, reg->off + reg->var_off.value, BPF_KPTR);
- if (!kptr_field || kptr_field->type != BPF_KPTR_REF) {
- verbose(env, "arg#0 no referenced kptr at map value offset=%llu\n",
- reg->off + reg->var_off.value);
- return -EINVAL;
- }
-
- if (!btf_struct_ids_match(&env->log, meta->btf, ref_t->type, 0, kptr_field->kptr.btf,
- kptr_field->kptr.btf_id, true)) {
- verbose(env, "kernel function %s args#%d expected pointer to %s %s\n",
- meta->func_name, argno, btf_type_str(ref_t), ref_tname);
- return -EINVAL;
- }
- return 0;
-}
-
static int ref_set_non_owning(struct bpf_verifier_env *env, struct bpf_reg_state *reg)
{
struct bpf_verifier_state *state = env->cur_state;
@@ -9924,27 +9914,28 @@ static int check_reg_allocation_locked(struct bpf_verifier_env *env, struct bpf_
static bool is_bpf_list_api_kfunc(u32 btf_id)
{
- return btf_id == special_kfunc_list[KF_bpf_list_push_front] ||
- btf_id == special_kfunc_list[KF_bpf_list_push_back] ||
+ return btf_id == special_kfunc_list[KF_bpf_list_push_front_impl] ||
+ btf_id == special_kfunc_list[KF_bpf_list_push_back_impl] ||
btf_id == special_kfunc_list[KF_bpf_list_pop_front] ||
btf_id == special_kfunc_list[KF_bpf_list_pop_back];
}
static bool is_bpf_rbtree_api_kfunc(u32 btf_id)
{
- return btf_id == special_kfunc_list[KF_bpf_rbtree_add] ||
+ return btf_id == special_kfunc_list[KF_bpf_rbtree_add_impl] ||
btf_id == special_kfunc_list[KF_bpf_rbtree_remove] ||
btf_id == special_kfunc_list[KF_bpf_rbtree_first];
}
static bool is_bpf_graph_api_kfunc(u32 btf_id)
{
- return is_bpf_list_api_kfunc(btf_id) || is_bpf_rbtree_api_kfunc(btf_id);
+ return is_bpf_list_api_kfunc(btf_id) || is_bpf_rbtree_api_kfunc(btf_id) ||
+ btf_id == special_kfunc_list[KF_bpf_refcount_acquire_impl];
}
static bool is_callback_calling_kfunc(u32 btf_id)
{
- return btf_id == special_kfunc_list[KF_bpf_rbtree_add];
+ return btf_id == special_kfunc_list[KF_bpf_rbtree_add_impl];
}
static bool is_rbtree_lock_required_kfunc(u32 btf_id)
@@ -9985,12 +9976,12 @@ static bool check_kfunc_is_graph_node_api(struct bpf_verifier_env *env,
switch (node_field_type) {
case BPF_LIST_NODE:
- ret = (kfunc_btf_id == special_kfunc_list[KF_bpf_list_push_front] ||
- kfunc_btf_id == special_kfunc_list[KF_bpf_list_push_back]);
+ ret = (kfunc_btf_id == special_kfunc_list[KF_bpf_list_push_front_impl] ||
+ kfunc_btf_id == special_kfunc_list[KF_bpf_list_push_back_impl]);
break;
case BPF_RB_NODE:
ret = (kfunc_btf_id == special_kfunc_list[KF_bpf_rbtree_remove] ||
- kfunc_btf_id == special_kfunc_list[KF_bpf_rbtree_add]);
+ kfunc_btf_id == special_kfunc_list[KF_bpf_rbtree_add_impl]);
break;
default:
verbose(env, "verifier internal error: unexpected graph node argument type %s\n",
@@ -10158,6 +10149,7 @@ static int check_kfunc_args(struct bpf_verifier_env *env, struct bpf_kfunc_call_
const char *func_name = meta->func_name, *ref_tname;
const struct btf *btf = meta->btf;
const struct btf_param *args;
+ struct btf_record *rec;
u32 i, nargs;
int ret;
@@ -10283,7 +10275,6 @@ static int check_kfunc_args(struct bpf_verifier_env *env, struct bpf_kfunc_call_
/* Trusted arguments have the same offset checks as release arguments */
arg_type |= OBJ_RELEASE;
break;
- case KF_ARG_PTR_TO_KPTR:
case KF_ARG_PTR_TO_DYNPTR:
case KF_ARG_PTR_TO_ITER:
case KF_ARG_PTR_TO_LIST_HEAD:
@@ -10293,6 +10284,7 @@ static int check_kfunc_args(struct bpf_verifier_env *env, struct bpf_kfunc_call_
case KF_ARG_PTR_TO_MEM:
case KF_ARG_PTR_TO_MEM_SIZE:
case KF_ARG_PTR_TO_CALLBACK:
+ case KF_ARG_PTR_TO_REFCOUNTED_KPTR:
/* Trusted by default */
break;
default:
@@ -10335,15 +10327,6 @@ static int check_kfunc_args(struct bpf_verifier_env *env, struct bpf_kfunc_call_
meta->arg_obj_drop.btf_id = reg->btf_id;
}
break;
- case KF_ARG_PTR_TO_KPTR:
- if (reg->type != PTR_TO_MAP_VALUE) {
- verbose(env, "arg#0 expected pointer to map value\n");
- return -EINVAL;
- }
- ret = process_kf_arg_ptr_to_kptr(env, reg, ref_t, ref_tname, meta, i);
- if (ret < 0)
- return ret;
- break;
case KF_ARG_PTR_TO_DYNPTR:
{
enum bpf_arg_type dynptr_arg_type = ARG_PTR_TO_DYNPTR;
@@ -10510,6 +10493,26 @@ static int check_kfunc_args(struct bpf_verifier_env *env, struct bpf_kfunc_call_
case KF_ARG_PTR_TO_CALLBACK:
meta->subprogno = reg->subprogno;
break;
+ case KF_ARG_PTR_TO_REFCOUNTED_KPTR:
+ if (!type_is_ptr_alloc_obj(reg->type) && !type_is_non_owning_ref(reg->type)) {
+ verbose(env, "arg#%d is neither owning or non-owning ref\n", i);
+ return -EINVAL;
+ }
+
+ rec = reg_btf_record(reg);
+ if (!rec) {
+ verbose(env, "verifier internal error: Couldn't find btf_record\n");
+ return -EFAULT;
+ }
+
+ if (rec->refcount_off < 0) {
+ verbose(env, "arg#%d doesn't point to a type with bpf_refcount field\n", i);
+ return -EINVAL;
+ }
+
+ meta->arg_refcount_acquire.btf = reg->btf;
+ meta->arg_refcount_acquire.btf_id = reg->btf_id;
+ break;
}
}
@@ -10649,10 +10652,11 @@ static int check_kfunc_call(struct bpf_verifier_env *env, struct bpf_insn *insn,
}
}
- if (meta.func_id == special_kfunc_list[KF_bpf_list_push_front] ||
- meta.func_id == special_kfunc_list[KF_bpf_list_push_back] ||
- meta.func_id == special_kfunc_list[KF_bpf_rbtree_add]) {
+ if (meta.func_id == special_kfunc_list[KF_bpf_list_push_front_impl] ||
+ meta.func_id == special_kfunc_list[KF_bpf_list_push_back_impl] ||
+ meta.func_id == special_kfunc_list[KF_bpf_rbtree_add_impl]) {
release_ref_obj_id = regs[BPF_REG_2].ref_obj_id;
+ insn_aux->insert_off = regs[BPF_REG_2].off;
err = ref_convert_owning_non_owning(env, release_ref_obj_id);
if (err) {
verbose(env, "kfunc %s#%d conversion of owning ref to non-owning failed\n",
@@ -10668,7 +10672,7 @@ static int check_kfunc_call(struct bpf_verifier_env *env, struct bpf_insn *insn,
}
}
- if (meta.func_id == special_kfunc_list[KF_bpf_rbtree_add]) {
+ if (meta.func_id == special_kfunc_list[KF_bpf_rbtree_add_impl]) {
err = __check_func_call(env, insn, insn_idx_p, meta.subprogno,
set_rbtree_add_callback_state);
if (err) {
@@ -10686,7 +10690,9 @@ static int check_kfunc_call(struct bpf_verifier_env *env, struct bpf_insn *insn,
if (is_kfunc_acquire(&meta) && !btf_type_is_struct_ptr(meta.btf, t)) {
/* Only exception is bpf_obj_new_impl */
- if (meta.btf != btf_vmlinux || meta.func_id != special_kfunc_list[KF_bpf_obj_new_impl]) {
+ if (meta.btf != btf_vmlinux ||
+ (meta.func_id != special_kfunc_list[KF_bpf_obj_new_impl] &&
+ meta.func_id != special_kfunc_list[KF_bpf_refcount_acquire_impl])) {
verbose(env, "acquire kernel function does not return PTR_TO_BTF_ID\n");
return -EINVAL;
}
@@ -10734,6 +10740,15 @@ static int check_kfunc_call(struct bpf_verifier_env *env, struct bpf_insn *insn,
insn_aux->obj_new_size = ret_t->size;
insn_aux->kptr_struct_meta =
btf_find_struct_meta(ret_btf, ret_btf_id);
+ } else if (meta.func_id == special_kfunc_list[KF_bpf_refcount_acquire_impl]) {
+ mark_reg_known_zero(env, regs, BPF_REG_0);
+ regs[BPF_REG_0].type = PTR_TO_BTF_ID | MEM_ALLOC;
+ regs[BPF_REG_0].btf = meta.arg_refcount_acquire.btf;
+ regs[BPF_REG_0].btf_id = meta.arg_refcount_acquire.btf_id;
+
+ insn_aux->kptr_struct_meta =
+ btf_find_struct_meta(meta.arg_refcount_acquire.btf,
+ meta.arg_refcount_acquire.btf_id);
} else if (meta.func_id == special_kfunc_list[KF_bpf_list_pop_front] ||
meta.func_id == special_kfunc_list[KF_bpf_list_pop_back]) {
struct btf_field *field = meta.arg_list_head.field;
@@ -10857,9 +10872,6 @@ static int check_kfunc_call(struct bpf_verifier_env *env, struct bpf_insn *insn,
ref_set_non_owning(env, &regs[BPF_REG_0]);
}
- if (meta.func_id == special_kfunc_list[KF_bpf_rbtree_remove])
- invalidate_non_owning_refs(env);
-
if (reg_may_point_to_spin_lock(&regs[BPF_REG_0]) && !regs[BPF_REG_0].id)
regs[BPF_REG_0].id = ++env->id_gen;
} else if (btf_type_is_void(t)) {
@@ -12424,12 +12436,17 @@ static int check_alu_op(struct bpf_verifier_env *env, struct bpf_insn *insn)
insn->src_reg);
return -EACCES;
} else if (src_reg->type == SCALAR_VALUE) {
+ bool is_src_reg_u32 = src_reg->umax_value <= U32_MAX;
+
+ if (is_src_reg_u32 && !src_reg->id)
+ src_reg->id = ++env->id_gen;
copy_register_state(dst_reg, src_reg);
- /* Make sure ID is cleared otherwise
+ /* Make sure ID is cleared if src_reg is not in u32 range otherwise
* dst_reg min/max could be incorrectly
* propagated into src_reg by find_equal_scalars()
*/
- dst_reg->id = 0;
+ if (!is_src_reg_u32)
+ dst_reg->id = 0;
dst_reg->live |= REG_LIVE_WRITTEN;
dst_reg->subreg_def = env->insn_idx + 1;
} else {
@@ -13814,6 +13831,9 @@ static int check_return_code(struct bpf_verifier_env *env)
}
break;
+ case BPF_PROG_TYPE_NETFILTER:
+ range = tnum_range(NF_DROP, NF_ACCEPT);
+ break;
case BPF_PROG_TYPE_EXT:
/* freplace program can return anything as its return value
* depends on the to-be-replaced kernel func or bpf program.
@@ -14700,7 +14720,7 @@ static bool regs_exact(const struct bpf_reg_state *rold,
const struct bpf_reg_state *rcur,
struct bpf_id_pair *idmap)
{
- return memcmp(rold, rcur, offsetof(struct bpf_reg_state, id)) == 0 &&
+ return memcmp(rold, rcur, offsetof(struct bpf_reg_state, id)) == 0 &&
check_ids(rold->id, rcur->id, idmap) &&
check_ids(rold->ref_obj_id, rcur->ref_obj_id, idmap);
}
@@ -17308,11 +17328,62 @@ static int fixup_call_args(struct bpf_verifier_env *env)
return err;
}
+/* replace a generic kfunc with a specialized version if necessary */
+static void specialize_kfunc(struct bpf_verifier_env *env,
+ u32 func_id, u16 offset, unsigned long *addr)
+{
+ struct bpf_prog *prog = env->prog;
+ bool seen_direct_write;
+ void *xdp_kfunc;
+ bool is_rdonly;
+
+ if (bpf_dev_bound_kfunc_id(func_id)) {
+ xdp_kfunc = bpf_dev_bound_resolve_kfunc(prog, func_id);
+ if (xdp_kfunc) {
+ *addr = (unsigned long)xdp_kfunc;
+ return;
+ }
+ /* fallback to default kfunc when not supported by netdev */
+ }
+
+ if (offset)
+ return;
+
+ if (func_id == special_kfunc_list[KF_bpf_dynptr_from_skb]) {
+ seen_direct_write = env->seen_direct_write;
+ is_rdonly = !may_access_direct_pkt_data(env, NULL, BPF_WRITE);
+
+ if (is_rdonly)
+ *addr = (unsigned long)bpf_dynptr_from_skb_rdonly;
+
+ /* restore env->seen_direct_write to its original value, since
+ * may_access_direct_pkt_data mutates it
+ */
+ env->seen_direct_write = seen_direct_write;
+ }
+}
+
+static void __fixup_collection_insert_kfunc(struct bpf_insn_aux_data *insn_aux,
+ u16 struct_meta_reg,
+ u16 node_offset_reg,
+ struct bpf_insn *insn,
+ struct bpf_insn *insn_buf,
+ int *cnt)
+{
+ struct btf_struct_meta *kptr_struct_meta = insn_aux->kptr_struct_meta;
+ struct bpf_insn addr[2] = { BPF_LD_IMM64(struct_meta_reg, (long)kptr_struct_meta) };
+
+ insn_buf[0] = addr[0];
+ insn_buf[1] = addr[1];
+ insn_buf[2] = BPF_MOV64_IMM(node_offset_reg, insn_aux->insert_off);
+ insn_buf[3] = *insn;
+ *cnt = 4;
+}
+
static int fixup_kfunc_call(struct bpf_verifier_env *env, struct bpf_insn *insn,
struct bpf_insn *insn_buf, int insn_idx, int *cnt)
{
const struct bpf_kfunc_desc *desc;
- void *xdp_kfunc;
if (!insn->imm) {
verbose(env, "invalid kernel function call not eliminated in verifier pass\n");
@@ -17321,18 +17392,9 @@ static int fixup_kfunc_call(struct bpf_verifier_env *env, struct bpf_insn *insn,
*cnt = 0;
- if (bpf_dev_bound_kfunc_id(insn->imm)) {
- xdp_kfunc = bpf_dev_bound_resolve_kfunc(env->prog, insn->imm);
- if (xdp_kfunc) {
- insn->imm = BPF_CALL_IMM(xdp_kfunc);
- return 0;
- }
-
- /* fallback to default kfunc when not supported by netdev */
- }
-
- /* insn->imm has the btf func_id. Replace it with
- * an address (relative to __bpf_call_base).
+ /* insn->imm has the btf func_id. Replace it with an offset relative to
+ * __bpf_call_base, unless the JIT needs to call functions that are
+ * further than 32 bits away (bpf_jit_supports_far_kfunc_call()).
*/
desc = find_kfunc_desc(env->prog, insn->imm, insn->off);
if (!desc) {
@@ -17341,7 +17403,8 @@ static int fixup_kfunc_call(struct bpf_verifier_env *env, struct bpf_insn *insn,
return -EFAULT;
}
- insn->imm = desc->imm;
+ if (!bpf_jit_supports_far_kfunc_call())
+ insn->imm = BPF_CALL_IMM(desc->addr);
if (insn->off)
return 0;
if (desc->func_id == special_kfunc_list[KF_bpf_obj_new_impl]) {
@@ -17354,7 +17417,8 @@ static int fixup_kfunc_call(struct bpf_verifier_env *env, struct bpf_insn *insn,
insn_buf[2] = addr[1];
insn_buf[3] = *insn;
*cnt = 4;
- } else if (desc->func_id == special_kfunc_list[KF_bpf_obj_drop_impl]) {
+ } else if (desc->func_id == special_kfunc_list[KF_bpf_obj_drop_impl] ||
+ desc->func_id == special_kfunc_list[KF_bpf_refcount_acquire_impl]) {
struct btf_struct_meta *kptr_struct_meta = env->insn_aux_data[insn_idx].kptr_struct_meta;
struct bpf_insn addr[2] = { BPF_LD_IMM64(BPF_REG_2, (long)kptr_struct_meta) };
@@ -17362,21 +17426,24 @@ static int fixup_kfunc_call(struct bpf_verifier_env *env, struct bpf_insn *insn,
insn_buf[1] = addr[1];
insn_buf[2] = *insn;
*cnt = 3;
+ } else if (desc->func_id == special_kfunc_list[KF_bpf_list_push_back_impl] ||
+ desc->func_id == special_kfunc_list[KF_bpf_list_push_front_impl] ||
+ desc->func_id == special_kfunc_list[KF_bpf_rbtree_add_impl]) {
+ int struct_meta_reg = BPF_REG_3;
+ int node_offset_reg = BPF_REG_4;
+
+ /* rbtree_add has extra 'less' arg, so args-to-fixup are in diff regs */
+ if (desc->func_id == special_kfunc_list[KF_bpf_rbtree_add_impl]) {
+ struct_meta_reg = BPF_REG_4;
+ node_offset_reg = BPF_REG_5;
+ }
+
+ __fixup_collection_insert_kfunc(&env->insn_aux_data[insn_idx], struct_meta_reg,
+ node_offset_reg, insn, insn_buf, cnt);
} else if (desc->func_id == special_kfunc_list[KF_bpf_cast_to_kern_ctx] ||
desc->func_id == special_kfunc_list[KF_bpf_rdonly_cast]) {
insn_buf[0] = BPF_MOV64_REG(BPF_REG_0, BPF_REG_1);
*cnt = 1;
- } else if (desc->func_id == special_kfunc_list[KF_bpf_dynptr_from_skb]) {
- bool seen_direct_write = env->seen_direct_write;
- bool is_rdonly = !may_access_direct_pkt_data(env, NULL, BPF_WRITE);
-
- if (is_rdonly)
- insn->imm = BPF_CALL_IMM(bpf_dynptr_from_skb_rdonly);
-
- /* restore env->seen_direct_write to its original value, since
- * may_access_direct_pkt_data mutates it
- */
- env->seen_direct_write = seen_direct_write;
}
return 0;
}
@@ -17906,7 +17973,7 @@ patch_call_imm:
}
}
- sort_kfunc_descs_by_imm(env->prog);
+ sort_kfunc_descs_by_imm_off(env->prog);
return 0;
}
@@ -18597,6 +18664,10 @@ BTF_ID(func, migrate_enable)
#if !defined CONFIG_PREEMPT_RCU && !defined CONFIG_TINY_RCU
BTF_ID(func, rcu_read_unlock_strict)
#endif
+#if defined(CONFIG_DEBUG_PREEMPT) || defined(CONFIG_TRACE_PREEMPT_TOGGLE)
+BTF_ID(func, preempt_count_add)
+BTF_ID(func, preempt_count_sub)
+#endif
BTF_SET_END(btf_id_deny)
static bool can_be_sleepable(struct bpf_prog *prog)