diff options
author | Alexei Starovoitov | 2023-02-15 15:40:06 -0800 |
---|---|---|
committer | Alexei Starovoitov | 2023-02-15 15:40:06 -0800 |
commit | 3538a0fbbd81bc131afe48b4cf02895735944359 (patch) | |
tree | c36bec94b00c337da6266687758715ec9eaa0f92 | |
parent | b2d9002ee9a65aa0eaf01ae494876fe16389d535 (diff) | |
parent | f88da2d46cc9a19b0c233285339659cae36c5d9a (diff) |
Merge branch 'Use __GFP_ZERO in bpf memory allocator'
Hou Tao says:
====================
From: Hou Tao <houtao1@huawei.com>
Hi,
The patchset tries to fix the hard-up problem found when checking how htab
handles element reuse in bpf memory allocator. The immediate reuse of
freed elements will reinitialize special fields (e.g., bpf_spin_lock) in
htab map value and it may corrupt lookup procedure with BFP_F_LOCK flag
which acquires bpf-spin-lock during value copying, and lead to hard-lock
as shown in patch #2. Patch #1 fixes it by using __GFP_ZERO when allocating
the object from slab and the behavior is similar with the preallocated
hash-table case. Please see individual patches for more details. And comments
are always welcome.
Regards,
Change Log:
v1:
* Use __GFP_ZERO instead of ctor to avoid retpoline overhead (from Alexei)
* Add comments for check_and_init_map_value() (from Alexei)
* split __GFP_ZERO patches out of the original patchset to unblock
the development work of others.
RFC: https://lore.kernel.org/bpf/20221230041151.1231169-1-houtao@huaweicloud.com
====================
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
-rw-r--r-- | include/linux/bpf.h | 7 | ||||
-rw-r--r-- | kernel/bpf/hashtab.c | 4 | ||||
-rw-r--r-- | kernel/bpf/memalloc.c | 2 | ||||
-rw-r--r-- | tools/testing/selftests/bpf/prog_tests/htab_reuse.c | 101 | ||||
-rw-r--r-- | tools/testing/selftests/bpf/progs/htab_reuse.c | 19 |
5 files changed, 130 insertions, 3 deletions
diff --git a/include/linux/bpf.h b/include/linux/bpf.h index be34f7deb6c3..520b238abd5a 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -363,6 +363,13 @@ static inline void bpf_obj_init(const struct btf_field_offs *foffs, void *obj) memset(obj + foffs->field_off[i], 0, foffs->field_sz[i]); } +/* 'dst' must be a temporary buffer and should not point to memory that is being + * used in parallel by a bpf program or bpf syscall, otherwise the access from + * the bpf program or bpf syscall may be corrupted by the reinitialization, + * leading to weird problems. Even 'dst' is newly-allocated from bpf memory + * allocator, it is still possible for 'dst' to be used in parallel by a bpf + * program or bpf syscall. + */ static inline void check_and_init_map_value(struct bpf_map *map, void *dst) { bpf_obj_init(map->field_offs, dst); diff --git a/kernel/bpf/hashtab.c b/kernel/bpf/hashtab.c index 66bded144377..5dfcb5ad0d06 100644 --- a/kernel/bpf/hashtab.c +++ b/kernel/bpf/hashtab.c @@ -1004,8 +1004,6 @@ static struct htab_elem *alloc_htab_elem(struct bpf_htab *htab, void *key, l_new = ERR_PTR(-ENOMEM); goto dec_count; } - check_and_init_map_value(&htab->map, - l_new->key + round_up(key_size, 8)); } memcpy(l_new->key, key, key_size); @@ -1592,6 +1590,7 @@ static int __htab_map_lookup_and_delete_elem(struct bpf_map *map, void *key, else copy_map_value(map, value, l->key + roundup_key_size); + /* Zeroing special fields in the temp buffer */ check_and_init_map_value(map, value); } @@ -1792,6 +1791,7 @@ again_nocopy: true); else copy_map_value(map, dst_val, value); + /* Zeroing special fields in the temp buffer */ check_and_init_map_value(map, dst_val); } if (do_delete) { diff --git a/kernel/bpf/memalloc.c b/kernel/bpf/memalloc.c index 490d03a4581a..5fcdacbb8439 100644 --- a/kernel/bpf/memalloc.c +++ b/kernel/bpf/memalloc.c @@ -143,7 +143,7 @@ static void *__alloc(struct bpf_mem_cache *c, int node) return obj; } - return kmalloc_node(c->unit_size, flags, node); + return kmalloc_node(c->unit_size, flags | __GFP_ZERO, node); } static struct mem_cgroup *get_memcg(const struct bpf_mem_cache *c) diff --git a/tools/testing/selftests/bpf/prog_tests/htab_reuse.c b/tools/testing/selftests/bpf/prog_tests/htab_reuse.c new file mode 100644 index 000000000000..a742dd994d60 --- /dev/null +++ b/tools/testing/selftests/bpf/prog_tests/htab_reuse.c @@ -0,0 +1,101 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (C) 2023. Huawei Technologies Co., Ltd */ +#define _GNU_SOURCE +#include <sched.h> +#include <stdbool.h> +#include <test_progs.h> +#include "htab_reuse.skel.h" + +struct htab_op_ctx { + int fd; + int loop; + bool stop; +}; + +struct htab_val { + unsigned int lock; + unsigned int data; +}; + +static void *htab_lookup_fn(void *arg) +{ + struct htab_op_ctx *ctx = arg; + int i = 0; + + while (i++ < ctx->loop && !ctx->stop) { + struct htab_val value; + unsigned int key; + + /* Use BPF_F_LOCK to use spin-lock in map value. */ + key = 7; + bpf_map_lookup_elem_flags(ctx->fd, &key, &value, BPF_F_LOCK); + } + + return NULL; +} + +static void *htab_update_fn(void *arg) +{ + struct htab_op_ctx *ctx = arg; + int i = 0; + + while (i++ < ctx->loop && !ctx->stop) { + struct htab_val value; + unsigned int key; + + key = 7; + value.lock = 0; + value.data = key; + bpf_map_update_elem(ctx->fd, &key, &value, BPF_F_LOCK); + bpf_map_delete_elem(ctx->fd, &key); + + key = 24; + value.lock = 0; + value.data = key; + bpf_map_update_elem(ctx->fd, &key, &value, BPF_F_LOCK); + bpf_map_delete_elem(ctx->fd, &key); + } + + return NULL; +} + +void test_htab_reuse(void) +{ + unsigned int i, wr_nr = 1, rd_nr = 4; + pthread_t tids[wr_nr + rd_nr]; + struct htab_reuse *skel; + struct htab_op_ctx ctx; + int err; + + skel = htab_reuse__open_and_load(); + if (!ASSERT_OK_PTR(skel, "htab_reuse__open_and_load")) + return; + + ctx.fd = bpf_map__fd(skel->maps.htab); + ctx.loop = 500; + ctx.stop = false; + + memset(tids, 0, sizeof(tids)); + for (i = 0; i < wr_nr; i++) { + err = pthread_create(&tids[i], NULL, htab_update_fn, &ctx); + if (!ASSERT_OK(err, "pthread_create")) { + ctx.stop = true; + goto reap; + } + } + for (i = 0; i < rd_nr; i++) { + err = pthread_create(&tids[i + wr_nr], NULL, htab_lookup_fn, &ctx); + if (!ASSERT_OK(err, "pthread_create")) { + ctx.stop = true; + goto reap; + } + } + +reap: + for (i = 0; i < wr_nr + rd_nr; i++) { + if (!tids[i]) + continue; + pthread_join(tids[i], NULL); + } + htab_reuse__destroy(skel); +} diff --git a/tools/testing/selftests/bpf/progs/htab_reuse.c b/tools/testing/selftests/bpf/progs/htab_reuse.c new file mode 100644 index 000000000000..7f7368cb3095 --- /dev/null +++ b/tools/testing/selftests/bpf/progs/htab_reuse.c @@ -0,0 +1,19 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (C) 2023. Huawei Technologies Co., Ltd */ +#include <linux/bpf.h> +#include <bpf/bpf_helpers.h> + +char _license[] SEC("license") = "GPL"; + +struct htab_val { + struct bpf_spin_lock lock; + unsigned int data; +}; + +struct { + __uint(type, BPF_MAP_TYPE_HASH); + __uint(max_entries, 64); + __type(key, unsigned int); + __type(value, struct htab_val); + __uint(map_flags, BPF_F_NO_PREALLOC); +} htab SEC(".maps"); |