diff options
-rw-r--r-- | MAINTAINERS | 1 | ||||
-rw-r--r-- | include/net/xsk_buff_pool.h | 2 | ||||
-rw-r--r-- | include/uapi/linux/bpf.h | 7 | ||||
-rw-r--r-- | kernel/bpf/btf.c | 2 | ||||
-rw-r--r-- | kernel/bpf/cgroup.c | 28 | ||||
-rw-r--r-- | kernel/bpf/helpers.c | 28 | ||||
-rw-r--r-- | kernel/bpf/syscall.c | 2 | ||||
-rw-r--r-- | net/xdp/xsk.c | 26 | ||||
-rw-r--r-- | net/xdp/xsk_buff_pool.c | 5 | ||||
-rw-r--r-- | net/xdp/xsk_queue.h | 22 | ||||
-rw-r--r-- | tools/bpf/bpftool/cgroup.c | 54 | ||||
-rw-r--r-- | tools/include/uapi/linux/bpf.h | 7 | ||||
-rw-r--r-- | tools/testing/selftests/bpf/prog_tests/cgroup_link.c | 11 | ||||
-rw-r--r-- | tools/testing/selftests/bpf/xskxceiver.c | 4 |
14 files changed, 130 insertions, 69 deletions
diff --git a/MAINTAINERS b/MAINTAINERS index 17abc6483100..6105ee47d5de 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -3825,6 +3825,7 @@ F: kernel/bpf/dispatcher.c F: kernel/bpf/trampoline.c F: include/linux/bpf* F: include/linux/filter.h +F: include/linux/tnum.h BPF [BTF] M: Martin KaFai Lau <martin.lau@linux.dev> diff --git a/include/net/xsk_buff_pool.h b/include/net/xsk_buff_pool.h index 647722e847b4..f787c3f524b0 100644 --- a/include/net/xsk_buff_pool.h +++ b/include/net/xsk_buff_pool.h @@ -95,7 +95,7 @@ struct xsk_buff_pool *xp_create_and_assign_umem(struct xdp_sock *xs, struct xdp_umem *umem); int xp_assign_dev(struct xsk_buff_pool *pool, struct net_device *dev, u16 queue_id, u16 flags); -int xp_assign_dev_shared(struct xsk_buff_pool *pool, struct xdp_umem *umem, +int xp_assign_dev_shared(struct xsk_buff_pool *pool, struct xdp_sock *umem_xs, struct net_device *dev, u16 queue_id); int xp_alloc_tx_descs(struct xsk_buff_pool *pool, struct xdp_sock *xs); void xp_destroy(struct xsk_buff_pool *pool); diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index 59a217ca2dfd..4eff7fc7ae58 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -1233,7 +1233,7 @@ enum { /* Query effective (directly attached + inherited from ancestor cgroups) * programs that will be executed for events within a cgroup. - * attach_flags with this flag are returned only for directly attached programs. + * attach_flags with this flag are always returned 0. */ #define BPF_F_QUERY_EFFECTIVE (1U << 0) @@ -1432,7 +1432,10 @@ union bpf_attr { __u32 attach_flags; __aligned_u64 prog_ids; __u32 prog_cnt; - __aligned_u64 prog_attach_flags; /* output: per-program attach_flags */ + /* output: per-program attach_flags. + * not allowed to be set during effective query. + */ + __aligned_u64 prog_attach_flags; } query; struct { /* anonymous struct used by BPF_RAW_TRACEPOINT_OPEN command */ diff --git a/kernel/bpf/btf.c b/kernel/bpf/btf.c index 7e64447659f3..36fd4b509294 100644 --- a/kernel/bpf/btf.c +++ b/kernel/bpf/btf.c @@ -3128,7 +3128,7 @@ static int btf_struct_resolve(struct btf_verifier_env *env, if (v->next_member) { const struct btf_type *last_member_type; const struct btf_member *last_member; - u16 last_member_type_id; + u32 last_member_type_id; last_member = btf_type_member(v->t) + v->next_member - 1; last_member_type_id = last_member->type; diff --git a/kernel/bpf/cgroup.c b/kernel/bpf/cgroup.c index 4a400cd63731..22888aaa68b6 100644 --- a/kernel/bpf/cgroup.c +++ b/kernel/bpf/cgroup.c @@ -1020,6 +1020,7 @@ static int __cgroup_bpf_query(struct cgroup *cgrp, const union bpf_attr *attr, union bpf_attr __user *uattr) { __u32 __user *prog_attach_flags = u64_to_user_ptr(attr->query.prog_attach_flags); + bool effective_query = attr->query.query_flags & BPF_F_QUERY_EFFECTIVE; __u32 __user *prog_ids = u64_to_user_ptr(attr->query.prog_ids); enum bpf_attach_type type = attr->query.attach_type; enum cgroup_bpf_attach_type from_atype, to_atype; @@ -1029,8 +1030,12 @@ static int __cgroup_bpf_query(struct cgroup *cgrp, const union bpf_attr *attr, int total_cnt = 0; u32 flags; + if (effective_query && prog_attach_flags) + return -EINVAL; + if (type == BPF_LSM_CGROUP) { - if (attr->query.prog_cnt && prog_ids && !prog_attach_flags) + if (!effective_query && attr->query.prog_cnt && + prog_ids && !prog_attach_flags) return -EINVAL; from_atype = CGROUP_LSM_START; @@ -1045,7 +1050,7 @@ static int __cgroup_bpf_query(struct cgroup *cgrp, const union bpf_attr *attr, } for (atype = from_atype; atype <= to_atype; atype++) { - if (attr->query.query_flags & BPF_F_QUERY_EFFECTIVE) { + if (effective_query) { effective = rcu_dereference_protected(cgrp->bpf.effective[atype], lockdep_is_held(&cgroup_mutex)); total_cnt += bpf_prog_array_length(effective); @@ -1054,6 +1059,8 @@ static int __cgroup_bpf_query(struct cgroup *cgrp, const union bpf_attr *attr, } } + /* always output uattr->query.attach_flags as 0 during effective query */ + flags = effective_query ? 0 : flags; if (copy_to_user(&uattr->query.attach_flags, &flags, sizeof(flags))) return -EFAULT; if (copy_to_user(&uattr->query.prog_cnt, &total_cnt, sizeof(total_cnt))) @@ -1068,7 +1075,7 @@ static int __cgroup_bpf_query(struct cgroup *cgrp, const union bpf_attr *attr, } for (atype = from_atype; atype <= to_atype && total_cnt; atype++) { - if (attr->query.query_flags & BPF_F_QUERY_EFFECTIVE) { + if (effective_query) { effective = rcu_dereference_protected(cgrp->bpf.effective[atype], lockdep_is_held(&cgroup_mutex)); cnt = min_t(int, bpf_prog_array_length(effective), total_cnt); @@ -1090,15 +1097,16 @@ static int __cgroup_bpf_query(struct cgroup *cgrp, const union bpf_attr *attr, if (++i == cnt) break; } - } - if (prog_attach_flags) { - flags = cgrp->bpf.flags[atype]; + if (prog_attach_flags) { + flags = cgrp->bpf.flags[atype]; - for (i = 0; i < cnt; i++) - if (copy_to_user(prog_attach_flags + i, &flags, sizeof(flags))) - return -EFAULT; - prog_attach_flags += cnt; + for (i = 0; i < cnt; i++) + if (copy_to_user(prog_attach_flags + i, + &flags, sizeof(flags))) + return -EFAULT; + prog_attach_flags += cnt; + } } prog_ids += cnt; diff --git a/kernel/bpf/helpers.c b/kernel/bpf/helpers.c index 1f961f9982d2..3814b0fd3a2c 100644 --- a/kernel/bpf/helpers.c +++ b/kernel/bpf/helpers.c @@ -1627,26 +1627,12 @@ bpf_base_func_proto(enum bpf_func_id func_id) return &bpf_ringbuf_discard_proto; case BPF_FUNC_ringbuf_query: return &bpf_ringbuf_query_proto; - case BPF_FUNC_ringbuf_reserve_dynptr: - return &bpf_ringbuf_reserve_dynptr_proto; - case BPF_FUNC_ringbuf_submit_dynptr: - return &bpf_ringbuf_submit_dynptr_proto; - case BPF_FUNC_ringbuf_discard_dynptr: - return &bpf_ringbuf_discard_dynptr_proto; case BPF_FUNC_for_each_map_elem: return &bpf_for_each_map_elem_proto; case BPF_FUNC_loop: return &bpf_loop_proto; case BPF_FUNC_strncmp: return &bpf_strncmp_proto; - case BPF_FUNC_dynptr_from_mem: - return &bpf_dynptr_from_mem_proto; - case BPF_FUNC_dynptr_read: - return &bpf_dynptr_read_proto; - case BPF_FUNC_dynptr_write: - return &bpf_dynptr_write_proto; - case BPF_FUNC_dynptr_data: - return &bpf_dynptr_data_proto; default: break; } @@ -1675,6 +1661,20 @@ bpf_base_func_proto(enum bpf_func_id func_id) return &bpf_timer_cancel_proto; case BPF_FUNC_kptr_xchg: return &bpf_kptr_xchg_proto; + case BPF_FUNC_ringbuf_reserve_dynptr: + return &bpf_ringbuf_reserve_dynptr_proto; + case BPF_FUNC_ringbuf_submit_dynptr: + return &bpf_ringbuf_submit_dynptr_proto; + case BPF_FUNC_ringbuf_discard_dynptr: + return &bpf_ringbuf_discard_dynptr_proto; + case BPF_FUNC_dynptr_from_mem: + return &bpf_dynptr_from_mem_proto; + case BPF_FUNC_dynptr_read: + return &bpf_dynptr_read_proto; + case BPF_FUNC_dynptr_write: + return &bpf_dynptr_write_proto; + case BPF_FUNC_dynptr_data: + return &bpf_dynptr_data_proto; default: break; } diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c index 27760627370d..1bd18af8af83 100644 --- a/kernel/bpf/syscall.c +++ b/kernel/bpf/syscall.c @@ -4395,7 +4395,9 @@ static int bpf_task_fd_query(const union bpf_attr *attr, if (attr->task_fd_query.flags != 0) return -EINVAL; + rcu_read_lock(); task = get_pid_task(find_vpid(pid), PIDTYPE_PID); + rcu_read_unlock(); if (!task) return -ENOENT; diff --git a/net/xdp/xsk.c b/net/xdp/xsk.c index 5b4ce6ba1bc7..9f0561b67c12 100644 --- a/net/xdp/xsk.c +++ b/net/xdp/xsk.c @@ -355,16 +355,15 @@ static u32 xsk_tx_peek_release_fallback(struct xsk_buff_pool *pool, u32 max_entr return nb_pkts; } -u32 xsk_tx_peek_release_desc_batch(struct xsk_buff_pool *pool, u32 max_entries) +u32 xsk_tx_peek_release_desc_batch(struct xsk_buff_pool *pool, u32 nb_pkts) { struct xdp_sock *xs; - u32 nb_pkts; rcu_read_lock(); if (!list_is_singular(&pool->xsk_tx_list)) { /* Fallback to the non-batched version */ rcu_read_unlock(); - return xsk_tx_peek_release_fallback(pool, max_entries); + return xsk_tx_peek_release_fallback(pool, nb_pkts); } xs = list_first_or_null_rcu(&pool->xsk_tx_list, struct xdp_sock, tx_list); @@ -373,12 +372,7 @@ u32 xsk_tx_peek_release_desc_batch(struct xsk_buff_pool *pool, u32 max_entries) goto out; } - max_entries = xskq_cons_nb_entries(xs->tx, max_entries); - nb_pkts = xskq_cons_read_desc_batch(xs->tx, pool, max_entries); - if (!nb_pkts) { - xs->tx->queue_empty_descs++; - goto out; - } + nb_pkts = xskq_cons_nb_entries(xs->tx, nb_pkts); /* This is the backpressure mechanism for the Tx path. Try to * reserve space in the completion queue for all packets, but @@ -386,12 +380,18 @@ u32 xsk_tx_peek_release_desc_batch(struct xsk_buff_pool *pool, u32 max_entries) * packets. This avoids having to implement any buffering in * the Tx path. */ - nb_pkts = xskq_prod_reserve_addr_batch(pool->cq, pool->tx_descs, nb_pkts); + nb_pkts = xskq_prod_nb_free(pool->cq, nb_pkts); if (!nb_pkts) goto out; - xskq_cons_release_n(xs->tx, max_entries); + nb_pkts = xskq_cons_read_desc_batch(xs->tx, pool, nb_pkts); + if (!nb_pkts) { + xs->tx->queue_empty_descs++; + goto out; + } + __xskq_cons_release(xs->tx); + xskq_prod_write_addr_batch(pool->cq, pool->tx_descs, nb_pkts); xs->sk.sk_write_space(&xs->sk); out: @@ -954,8 +954,8 @@ static int xsk_bind(struct socket *sock, struct sockaddr *addr, int addr_len) goto out_unlock; } - err = xp_assign_dev_shared(xs->pool, umem_xs->umem, - dev, qid); + err = xp_assign_dev_shared(xs->pool, umem_xs, dev, + qid); if (err) { xp_destroy(xs->pool); xs->pool = NULL; diff --git a/net/xdp/xsk_buff_pool.c b/net/xdp/xsk_buff_pool.c index a71a8c6edf55..ed6c71826d31 100644 --- a/net/xdp/xsk_buff_pool.c +++ b/net/xdp/xsk_buff_pool.c @@ -212,17 +212,18 @@ err_unreg_pool: return err; } -int xp_assign_dev_shared(struct xsk_buff_pool *pool, struct xdp_umem *umem, +int xp_assign_dev_shared(struct xsk_buff_pool *pool, struct xdp_sock *umem_xs, struct net_device *dev, u16 queue_id) { u16 flags; + struct xdp_umem *umem = umem_xs->umem; /* One fill and completion ring required for each queue id. */ if (!pool->fq || !pool->cq) return -EINVAL; flags = umem->zc ? XDP_ZEROCOPY : XDP_COPY; - if (pool->uses_need_wakeup) + if (umem_xs->pool->uses_need_wakeup) flags |= XDP_USE_NEED_WAKEUP; return xp_assign_dev(pool, dev, queue_id, flags); diff --git a/net/xdp/xsk_queue.h b/net/xdp/xsk_queue.h index fb20bf7207cf..c6fb6b763658 100644 --- a/net/xdp/xsk_queue.h +++ b/net/xdp/xsk_queue.h @@ -205,6 +205,11 @@ static inline bool xskq_cons_read_desc(struct xsk_queue *q, return false; } +static inline void xskq_cons_release_n(struct xsk_queue *q, u32 cnt) +{ + q->cached_cons += cnt; +} + static inline u32 xskq_cons_read_desc_batch(struct xsk_queue *q, struct xsk_buff_pool *pool, u32 max) { @@ -226,6 +231,8 @@ static inline u32 xskq_cons_read_desc_batch(struct xsk_queue *q, struct xsk_buff cached_cons++; } + /* Release valid plus any invalid entries */ + xskq_cons_release_n(q, cached_cons - q->cached_cons); return nb_entries; } @@ -291,11 +298,6 @@ static inline void xskq_cons_release(struct xsk_queue *q) q->cached_cons++; } -static inline void xskq_cons_release_n(struct xsk_queue *q, u32 cnt) -{ - q->cached_cons += cnt; -} - static inline u32 xskq_cons_present_entries(struct xsk_queue *q) { /* No barriers needed since data is not accessed */ @@ -350,21 +352,17 @@ static inline int xskq_prod_reserve_addr(struct xsk_queue *q, u64 addr) return 0; } -static inline u32 xskq_prod_reserve_addr_batch(struct xsk_queue *q, struct xdp_desc *descs, - u32 max) +static inline void xskq_prod_write_addr_batch(struct xsk_queue *q, struct xdp_desc *descs, + u32 nb_entries) { struct xdp_umem_ring *ring = (struct xdp_umem_ring *)q->ring; - u32 nb_entries, i, cached_prod; - - nb_entries = xskq_prod_nb_free(q, max); + u32 i, cached_prod; /* A, matches D */ cached_prod = q->cached_prod; for (i = 0; i < nb_entries; i++) ring->desc[cached_prod++ & q->ring_mask] = descs[i].addr; q->cached_prod = cached_prod; - - return nb_entries; } static inline int xskq_prod_reserve_desc(struct xsk_queue *q, diff --git a/tools/bpf/bpftool/cgroup.c b/tools/bpf/bpftool/cgroup.c index cced668fb2a3..b46a998d8f8d 100644 --- a/tools/bpf/bpftool/cgroup.c +++ b/tools/bpf/bpftool/cgroup.c @@ -136,8 +136,8 @@ static int show_bpf_prog(int id, enum bpf_attach_type attach_type, jsonw_string_field(json_wtr, "attach_type", attach_type_str); else jsonw_uint_field(json_wtr, "attach_type", attach_type); - jsonw_string_field(json_wtr, "attach_flags", - attach_flags_str); + if (!(query_flags & BPF_F_QUERY_EFFECTIVE)) + jsonw_string_field(json_wtr, "attach_flags", attach_flags_str); jsonw_string_field(json_wtr, "name", prog_name); if (attach_btf_name) jsonw_string_field(json_wtr, "attach_btf_name", attach_btf_name); @@ -150,7 +150,10 @@ static int show_bpf_prog(int id, enum bpf_attach_type attach_type, printf("%-15s", attach_type_str); else printf("type %-10u", attach_type); - printf(" %-15s %-15s", attach_flags_str, prog_name); + if (query_flags & BPF_F_QUERY_EFFECTIVE) + printf(" %-15s", prog_name); + else + printf(" %-15s %-15s", attach_flags_str, prog_name); if (attach_btf_name) printf(" %-15s", attach_btf_name); else if (info.attach_btf_id) @@ -195,6 +198,32 @@ static int cgroup_has_attached_progs(int cgroup_fd) return no_prog ? 0 : 1; } + +static int show_effective_bpf_progs(int cgroup_fd, enum bpf_attach_type type, + int level) +{ + LIBBPF_OPTS(bpf_prog_query_opts, p); + __u32 prog_ids[1024] = {0}; + __u32 iter; + int ret; + + p.query_flags = query_flags; + p.prog_cnt = ARRAY_SIZE(prog_ids); + p.prog_ids = prog_ids; + + ret = bpf_prog_query_opts(cgroup_fd, type, &p); + if (ret) + return ret; + + if (p.prog_cnt == 0) + return 0; + + for (iter = 0; iter < p.prog_cnt; iter++) + show_bpf_prog(prog_ids[iter], type, NULL, level); + + return 0; +} + static int show_attached_bpf_progs(int cgroup_fd, enum bpf_attach_type type, int level) { @@ -245,6 +274,14 @@ static int show_attached_bpf_progs(int cgroup_fd, enum bpf_attach_type type, return 0; } +static int show_bpf_progs(int cgroup_fd, enum bpf_attach_type type, + int level) +{ + return query_flags & BPF_F_QUERY_EFFECTIVE ? + show_effective_bpf_progs(cgroup_fd, type, level) : + show_attached_bpf_progs(cgroup_fd, type, level); +} + static int do_show(int argc, char **argv) { enum bpf_attach_type type; @@ -292,6 +329,8 @@ static int do_show(int argc, char **argv) if (json_output) jsonw_start_array(json_wtr); + else if (query_flags & BPF_F_QUERY_EFFECTIVE) + printf("%-8s %-15s %-15s\n", "ID", "AttachType", "Name"); else printf("%-8s %-15s %-15s %-15s\n", "ID", "AttachType", "AttachFlags", "Name"); @@ -304,7 +343,7 @@ static int do_show(int argc, char **argv) * If we were able to get the show for at least one * attach type, let's return 0. */ - if (show_attached_bpf_progs(cgroup_fd, type, 0) == 0) + if (show_bpf_progs(cgroup_fd, type, 0) == 0) ret = 0; } @@ -362,7 +401,7 @@ static int do_show_tree_fn(const char *fpath, const struct stat *sb, btf_vmlinux = libbpf_find_kernel_btf(); for (type = 0; type < __MAX_BPF_ATTACH_TYPE; type++) - show_attached_bpf_progs(cgroup_fd, type, ftw->level); + show_bpf_progs(cgroup_fd, type, ftw->level); if (errno == EINVAL) /* Last attach type does not support query. @@ -436,6 +475,11 @@ static int do_show_tree(int argc, char **argv) if (json_output) jsonw_start_array(json_wtr); + else if (query_flags & BPF_F_QUERY_EFFECTIVE) + printf("%s\n" + "%-8s %-15s %-15s\n", + "CgroupPath", + "ID", "AttachType", "Name"); else printf("%s\n" "%-8s %-15s %-15s %-15s\n", diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h index 59a217ca2dfd..4eff7fc7ae58 100644 --- a/tools/include/uapi/linux/bpf.h +++ b/tools/include/uapi/linux/bpf.h @@ -1233,7 +1233,7 @@ enum { /* Query effective (directly attached + inherited from ancestor cgroups) * programs that will be executed for events within a cgroup. - * attach_flags with this flag are returned only for directly attached programs. + * attach_flags with this flag are always returned 0. */ #define BPF_F_QUERY_EFFECTIVE (1U << 0) @@ -1432,7 +1432,10 @@ union bpf_attr { __u32 attach_flags; __aligned_u64 prog_ids; __u32 prog_cnt; - __aligned_u64 prog_attach_flags; /* output: per-program attach_flags */ + /* output: per-program attach_flags. + * not allowed to be set during effective query. + */ + __aligned_u64 prog_attach_flags; } query; struct { /* anonymous struct used by BPF_RAW_TRACEPOINT_OPEN command */ diff --git a/tools/testing/selftests/bpf/prog_tests/cgroup_link.c b/tools/testing/selftests/bpf/prog_tests/cgroup_link.c index 9e6e6aad347c..15093a69510e 100644 --- a/tools/testing/selftests/bpf/prog_tests/cgroup_link.c +++ b/tools/testing/selftests/bpf/prog_tests/cgroup_link.c @@ -71,10 +71,9 @@ void serial_test_cgroup_link(void) ping_and_check(cg_nr, 0); - /* query the number of effective progs and attach flags in root cg */ + /* query the number of attached progs and attach flags in root cg */ err = bpf_prog_query(cgs[0].fd, BPF_CGROUP_INET_EGRESS, - BPF_F_QUERY_EFFECTIVE, &attach_flags, NULL, - &prog_cnt); + 0, &attach_flags, NULL, &prog_cnt); CHECK_FAIL(err); CHECK_FAIL(attach_flags != BPF_F_ALLOW_MULTI); if (CHECK(prog_cnt != 1, "effect_cnt", "exp %d, got %d\n", 1, prog_cnt)) @@ -85,17 +84,15 @@ void serial_test_cgroup_link(void) BPF_F_QUERY_EFFECTIVE, NULL, NULL, &prog_cnt); CHECK_FAIL(err); - CHECK_FAIL(attach_flags != BPF_F_ALLOW_MULTI); if (CHECK(prog_cnt != cg_nr, "effect_cnt", "exp %d, got %d\n", cg_nr, prog_cnt)) goto cleanup; /* query the effective prog IDs in last cg */ err = bpf_prog_query(cgs[last_cg].fd, BPF_CGROUP_INET_EGRESS, - BPF_F_QUERY_EFFECTIVE, &attach_flags, - prog_ids, &prog_cnt); + BPF_F_QUERY_EFFECTIVE, NULL, prog_ids, + &prog_cnt); CHECK_FAIL(err); - CHECK_FAIL(attach_flags != BPF_F_ALLOW_MULTI); if (CHECK(prog_cnt != cg_nr, "effect_cnt", "exp %d, got %d\n", cg_nr, prog_cnt)) goto cleanup; diff --git a/tools/testing/selftests/bpf/xskxceiver.c b/tools/testing/selftests/bpf/xskxceiver.c index 74d56d971baf..091402dc5390 100644 --- a/tools/testing/selftests/bpf/xskxceiver.c +++ b/tools/testing/selftests/bpf/xskxceiver.c @@ -1606,6 +1606,8 @@ static struct ifobject *ifobject_create(void) if (!ifobj->umem) goto out_umem; + ifobj->ns_fd = -1; + return ifobj; out_umem: @@ -1617,6 +1619,8 @@ out_xsk_arr: static void ifobject_delete(struct ifobject *ifobj) { + if (ifobj->ns_fd != -1) + close(ifobj->ns_fd); free(ifobj->umem); free(ifobj->xsk_arr); free(ifobj); |