diff options
author | Jakub Kicinski | 2023-02-20 15:38:41 -0800 |
---|---|---|
committer | Jakub Kicinski | 2023-02-20 16:31:14 -0800 |
commit | ee8d72a157ebb4b8c4b8b664f5a78a341fede2ef (patch) | |
tree | 4c6abf1db03b7bffaf8d9a41ecda74b9041312fb /net | |
parent | 01bb11ad828b320749764fa93ad078db20d08a9e (diff) | |
parent | 168de0233586fb06c5c5c56304aa9a928a09b0ba (diff) |
Merge tag 'for-netdev' of https://git.kernel.org/pub/scm/linux/kernel/git/bpf/bpf-next
Daniel Borkmann says:
====================
pull-request: bpf-next 2023-02-17
We've added 64 non-merge commits during the last 7 day(s) which contain
a total of 158 files changed, 4190 insertions(+), 988 deletions(-).
The main changes are:
1) Add a rbtree data structure following the "next-gen data structure"
precedent set by recently-added linked-list, that is, by using
kfunc + kptr instead of adding a new BPF map type, from Dave Marchevsky.
2) Add a new benchmark for hashmap lookups to BPF selftests,
from Anton Protopopov.
3) Fix bpf_fib_lookup to only return valid neighbors and add an option
to skip the neigh table lookup, from Martin KaFai Lau.
4) Add cgroup.memory=nobpf kernel parameter option to disable BPF memory
accouting for container environments, from Yafang Shao.
5) Batch of ice multi-buffer and driver performance fixes,
from Alexander Lobakin.
6) Fix a bug in determining whether global subprog's argument is
PTR_TO_CTX, which is based on type names which breaks kprobe progs,
from Andrii Nakryiko.
7) Prep work for future -mcpu=v4 LLVM option which includes usage of
BPF_ST insn. Thus improve BPF_ST-related value tracking in verifier,
from Eduard Zingerman.
8) More prep work for later building selftests with Memory Sanitizer
in order to detect usages of undefined memory, from Ilya Leoshkevich.
9) Fix xsk sockets to check IFF_UP earlier to avoid a NULL pointer
dereference via sendmsg(), from Maciej Fijalkowski.
10) Implement BPF trampoline for RV64 JIT compiler, from Pu Lehui.
11) Fix BPF memory allocator in combination with BPF hashtab where it could
corrupt special fields e.g. used in bpf_spin_lock, from Hou Tao.
12) Fix LoongArch BPF JIT to always use 4 instructions for function
address so that instruction sequences don't change between passes,
from Hengqi Chen.
* tag 'for-netdev' of https://git.kernel.org/pub/scm/linux/kernel/git/bpf/bpf-next: (64 commits)
selftests/bpf: Add bpf_fib_lookup test
bpf: Add BPF_FIB_LOOKUP_SKIP_NEIGH for bpf_fib_lookup
riscv, bpf: Add bpf trampoline support for RV64
riscv, bpf: Add bpf_arch_text_poke support for RV64
riscv, bpf: Factor out emit_call for kernel and bpf context
riscv: Extend patch_text for multiple instructions
Revert "bpf, test_run: fix &xdp_frame misplacement for LIVE_FRAMES"
selftests/bpf: Add global subprog context passing tests
selftests/bpf: Convert test_global_funcs test to test_loader framework
bpf: Fix global subprog context argument resolution logic
LoongArch, bpf: Use 4 instructions for function address in JIT
bpf: bpf_fib_lookup should not return neigh in NUD_FAILED state
bpf: Disable bh in bpf_test_run for xdp and tc prog
xsk: check IFF_UP earlier in Tx path
Fix typos in selftest/bpf files
selftests/bpf: Use bpf_{btf,link,map,prog}_get_info_by_fd()
samples/bpf: Use bpf_{btf,link,map,prog}_get_info_by_fd()
bpftool: Use bpf_{btf,link,map,prog}_get_info_by_fd()
libbpf: Use bpf_{btf,link,map,prog}_get_info_by_fd()
libbpf: Introduce bpf_{btf,link,map,prog}_get_info_by_fd()
...
====================
Link: https://lore.kernel.org/r/20230217221737.31122-1-daniel@iogearbox.net
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
Diffstat (limited to 'net')
-rw-r--r-- | net/bpf/test_run.c | 2 | ||||
-rw-r--r-- | net/core/filter.c | 43 | ||||
-rw-r--r-- | net/xdp/xsk.c | 59 |
3 files changed, 63 insertions, 41 deletions
diff --git a/net/bpf/test_run.c b/net/bpf/test_run.c index b766a84c8536..6f3d654b3339 100644 --- a/net/bpf/test_run.c +++ b/net/bpf/test_run.c @@ -396,10 +396,12 @@ static int bpf_test_run(struct bpf_prog *prog, void *ctx, u32 repeat, old_ctx = bpf_set_run_ctx(&run_ctx.run_ctx); do { run_ctx.prog_item = &item; + local_bh_disable(); if (xdp) *retval = bpf_prog_run_xdp(prog, ctx); else *retval = bpf_prog_run(prog, ctx); + local_bh_enable(); } while (bpf_test_timer_continue(&t, 1, repeat, &ret, time)); bpf_reset_run_ctx(old_ctx); bpf_test_timer_leave(&t); diff --git a/net/core/filter.c b/net/core/filter.c index 2ce06a72a5ba..1d6f165923bf 100644 --- a/net/core/filter.c +++ b/net/core/filter.c @@ -5722,12 +5722,8 @@ static const struct bpf_func_proto bpf_skb_get_xfrm_state_proto = { #endif #if IS_ENABLED(CONFIG_INET) || IS_ENABLED(CONFIG_IPV6) -static int bpf_fib_set_fwd_params(struct bpf_fib_lookup *params, - const struct neighbour *neigh, - const struct net_device *dev, u32 mtu) +static int bpf_fib_set_fwd_params(struct bpf_fib_lookup *params, u32 mtu) { - memcpy(params->dmac, neigh->ha, ETH_ALEN); - memcpy(params->smac, dev->dev_addr, ETH_ALEN); params->h_vlan_TCI = 0; params->h_vlan_proto = 0; if (mtu) @@ -5838,21 +5834,29 @@ static int bpf_ipv4_fib_lookup(struct net *net, struct bpf_fib_lookup *params, if (likely(nhc->nhc_gw_family != AF_INET6)) { if (nhc->nhc_gw_family) params->ipv4_dst = nhc->nhc_gw.ipv4; - - neigh = __ipv4_neigh_lookup_noref(dev, - (__force u32)params->ipv4_dst); } else { struct in6_addr *dst = (struct in6_addr *)params->ipv6_dst; params->family = AF_INET6; *dst = nhc->nhc_gw.ipv6; - neigh = __ipv6_neigh_lookup_noref_stub(dev, dst); } - if (!neigh) + if (flags & BPF_FIB_LOOKUP_SKIP_NEIGH) + goto set_fwd_params; + + if (likely(nhc->nhc_gw_family != AF_INET6)) + neigh = __ipv4_neigh_lookup_noref(dev, + (__force u32)params->ipv4_dst); + else + neigh = __ipv6_neigh_lookup_noref_stub(dev, params->ipv6_dst); + + if (!neigh || !(neigh->nud_state & NUD_VALID)) return BPF_FIB_LKUP_RET_NO_NEIGH; + memcpy(params->dmac, neigh->ha, ETH_ALEN); + memcpy(params->smac, dev->dev_addr, ETH_ALEN); - return bpf_fib_set_fwd_params(params, neigh, dev, mtu); +set_fwd_params: + return bpf_fib_set_fwd_params(params, mtu); } #endif @@ -5960,24 +5964,33 @@ static int bpf_ipv6_fib_lookup(struct net *net, struct bpf_fib_lookup *params, params->rt_metric = res.f6i->fib6_metric; params->ifindex = dev->ifindex; + if (flags & BPF_FIB_LOOKUP_SKIP_NEIGH) + goto set_fwd_params; + /* xdp and cls_bpf programs are run in RCU-bh so rcu_read_lock_bh is * not needed here. */ neigh = __ipv6_neigh_lookup_noref_stub(dev, dst); - if (!neigh) + if (!neigh || !(neigh->nud_state & NUD_VALID)) return BPF_FIB_LKUP_RET_NO_NEIGH; + memcpy(params->dmac, neigh->ha, ETH_ALEN); + memcpy(params->smac, dev->dev_addr, ETH_ALEN); - return bpf_fib_set_fwd_params(params, neigh, dev, mtu); +set_fwd_params: + return bpf_fib_set_fwd_params(params, mtu); } #endif +#define BPF_FIB_LOOKUP_MASK (BPF_FIB_LOOKUP_DIRECT | BPF_FIB_LOOKUP_OUTPUT | \ + BPF_FIB_LOOKUP_SKIP_NEIGH) + BPF_CALL_4(bpf_xdp_fib_lookup, struct xdp_buff *, ctx, struct bpf_fib_lookup *, params, int, plen, u32, flags) { if (plen < sizeof(*params)) return -EINVAL; - if (flags & ~(BPF_FIB_LOOKUP_DIRECT | BPF_FIB_LOOKUP_OUTPUT)) + if (flags & ~BPF_FIB_LOOKUP_MASK) return -EINVAL; switch (params->family) { @@ -6015,7 +6028,7 @@ BPF_CALL_4(bpf_skb_fib_lookup, struct sk_buff *, skb, if (plen < sizeof(*params)) return -EINVAL; - if (flags & ~(BPF_FIB_LOOKUP_DIRECT | BPF_FIB_LOOKUP_OUTPUT)) + if (flags & ~BPF_FIB_LOOKUP_MASK) return -EINVAL; if (params->tot_len) diff --git a/net/xdp/xsk.c b/net/xdp/xsk.c index 63c82e8bcd8e..45eef5af0a51 100644 --- a/net/xdp/xsk.c +++ b/net/xdp/xsk.c @@ -511,7 +511,7 @@ static struct sk_buff *xsk_build_skb(struct xdp_sock *xs, return skb; } -static int xsk_generic_xmit(struct sock *sk) +static int __xsk_generic_xmit(struct sock *sk) { struct xdp_sock *xs = xdp_sk(sk); u32 max_batch = TX_BATCH_SIZE; @@ -594,22 +594,13 @@ out: return err; } -static int xsk_xmit(struct sock *sk) +static int xsk_generic_xmit(struct sock *sk) { - struct xdp_sock *xs = xdp_sk(sk); int ret; - if (unlikely(!(xs->dev->flags & IFF_UP))) - return -ENETDOWN; - if (unlikely(!xs->tx)) - return -ENOBUFS; - - if (xs->zc) - return xsk_wakeup(xs, XDP_WAKEUP_TX); - /* Drop the RCU lock since the SKB path might sleep. */ rcu_read_unlock(); - ret = xsk_generic_xmit(sk); + ret = __xsk_generic_xmit(sk); /* Reaquire RCU lock before going into common code. */ rcu_read_lock(); @@ -627,17 +618,31 @@ static bool xsk_no_wakeup(struct sock *sk) #endif } +static int xsk_check_common(struct xdp_sock *xs) +{ + if (unlikely(!xsk_is_bound(xs))) + return -ENXIO; + if (unlikely(!(xs->dev->flags & IFF_UP))) + return -ENETDOWN; + + return 0; +} + static int __xsk_sendmsg(struct socket *sock, struct msghdr *m, size_t total_len) { bool need_wait = !(m->msg_flags & MSG_DONTWAIT); struct sock *sk = sock->sk; struct xdp_sock *xs = xdp_sk(sk); struct xsk_buff_pool *pool; + int err; - if (unlikely(!xsk_is_bound(xs))) - return -ENXIO; + err = xsk_check_common(xs); + if (err) + return err; if (unlikely(need_wait)) return -EOPNOTSUPP; + if (unlikely(!xs->tx)) + return -ENOBUFS; if (sk_can_busy_loop(sk)) { if (xs->zc) @@ -649,8 +654,11 @@ static int __xsk_sendmsg(struct socket *sock, struct msghdr *m, size_t total_len return 0; pool = xs->pool; - if (pool->cached_need_wakeup & XDP_WAKEUP_TX) - return xsk_xmit(sk); + if (pool->cached_need_wakeup & XDP_WAKEUP_TX) { + if (xs->zc) + return xsk_wakeup(xs, XDP_WAKEUP_TX); + return xsk_generic_xmit(sk); + } return 0; } @@ -670,11 +678,11 @@ static int __xsk_recvmsg(struct socket *sock, struct msghdr *m, size_t len, int bool need_wait = !(flags & MSG_DONTWAIT); struct sock *sk = sock->sk; struct xdp_sock *xs = xdp_sk(sk); + int err; - if (unlikely(!xsk_is_bound(xs))) - return -ENXIO; - if (unlikely(!(xs->dev->flags & IFF_UP))) - return -ENETDOWN; + err = xsk_check_common(xs); + if (err) + return err; if (unlikely(!xs->rx)) return -ENOBUFS; if (unlikely(need_wait)) @@ -713,21 +721,20 @@ static __poll_t xsk_poll(struct file *file, struct socket *sock, sock_poll_wait(file, sock, wait); rcu_read_lock(); - if (unlikely(!xsk_is_bound(xs))) { - rcu_read_unlock(); - return mask; - } + if (xsk_check_common(xs)) + goto skip_tx; pool = xs->pool; if (pool->cached_need_wakeup) { if (xs->zc) xsk_wakeup(xs, pool->cached_need_wakeup); - else + else if (xs->tx) /* Poll needs to drive Tx also in copy mode */ - xsk_xmit(sk); + xsk_generic_xmit(sk); } +skip_tx: if (xs->rx && !xskq_prod_is_empty(xs->rx)) mask |= EPOLLIN | EPOLLRDNORM; if (xs->tx && xsk_tx_writeable(xs)) |