diff options
author | Sebastian Andrzej Siewior | 2024-06-28 12:18:55 +0200 |
---|---|---|
committer | Paolo Abeni | 2024-07-02 15:26:57 +0200 |
commit | d839a73179ae91c07f5f2f97ccb9c69b2b7c3306 (patch) | |
tree | 78ca317c579749b037de74a2a509e97b4cfbac89 | |
parent | 2896624be30b049601ec3ef9b08df184d0c70495 (diff) |
net: Optimize xdp_do_flush() with bpf_net_context infos.
Every NIC driver utilizing XDP should invoke xdp_do_flush() after
processing all packages. With the introduction of the bpf_net_context
logic the flush lists (for dev, CPU-map and xsk) are lazy initialized
only if used. However xdp_do_flush() tries to flush all three of them so
all three lists are always initialized and the likely empty lists are
"iterated".
Without the usage of XDP but with CONFIG_DEBUG_NET the lists are also
initialized due to xdp_do_check_flushed().
Jakub suggest to utilize the hints in bpf_net_context and avoid invoking
the flush function. This will also avoiding initializing the lists which
are otherwise unused.
Introduce bpf_net_ctx_get_all_used_flush_lists() to return the
individual list if not-empty. Use the logic in xdp_do_flush() and
xdp_do_check_flushed(). Remove the not needed .*_check_flush().
Suggested-by: Jakub Kicinski <kuba@kernel.org>
Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
Reviewed-by: Jakub Kicinski <kuba@kernel.org>
Signed-off-by: Paolo Abeni <pabeni@redhat.com>
-rw-r--r-- | include/linux/bpf.h | 10 | ||||
-rw-r--r-- | include/linux/filter.h | 27 | ||||
-rw-r--r-- | include/net/xdp_sock.h | 14 | ||||
-rw-r--r-- | kernel/bpf/cpumap.c | 13 | ||||
-rw-r--r-- | kernel/bpf/devmap.c | 13 | ||||
-rw-r--r-- | net/core/filter.c | 33 | ||||
-rw-r--r-- | net/xdp/xsk.c | 13 |
7 files changed, 61 insertions, 62 deletions
diff --git a/include/linux/bpf.h b/include/linux/bpf.h index a834f4b761bc..f5c6bc9093a6 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -2494,7 +2494,7 @@ struct sk_buff; struct bpf_dtab_netdev; struct bpf_cpu_map_entry; -void __dev_flush(void); +void __dev_flush(struct list_head *flush_list); int dev_xdp_enqueue(struct net_device *dev, struct xdp_frame *xdpf, struct net_device *dev_rx); int dev_map_enqueue(struct bpf_dtab_netdev *dst, struct xdp_frame *xdpf, @@ -2507,7 +2507,7 @@ int dev_map_redirect_multi(struct net_device *dev, struct sk_buff *skb, struct bpf_prog *xdp_prog, struct bpf_map *map, bool exclude_ingress); -void __cpu_map_flush(void); +void __cpu_map_flush(struct list_head *flush_list); int cpu_map_enqueue(struct bpf_cpu_map_entry *rcpu, struct xdp_frame *xdpf, struct net_device *dev_rx); int cpu_map_generic_redirect(struct bpf_cpu_map_entry *rcpu, @@ -2644,8 +2644,6 @@ void bpf_dynptr_init(struct bpf_dynptr_kern *ptr, void *data, void bpf_dynptr_set_null(struct bpf_dynptr_kern *ptr); void bpf_dynptr_set_rdonly(struct bpf_dynptr_kern *ptr); -bool dev_check_flush(void); -bool cpu_map_check_flush(void); #else /* !CONFIG_BPF_SYSCALL */ static inline struct bpf_prog *bpf_prog_get(u32 ufd) { @@ -2738,7 +2736,7 @@ static inline struct bpf_token *bpf_token_get_from_fd(u32 ufd) return ERR_PTR(-EOPNOTSUPP); } -static inline void __dev_flush(void) +static inline void __dev_flush(struct list_head *flush_list) { } @@ -2784,7 +2782,7 @@ int dev_map_redirect_multi(struct net_device *dev, struct sk_buff *skb, return 0; } -static inline void __cpu_map_flush(void) +static inline void __cpu_map_flush(struct list_head *flush_list) { } diff --git a/include/linux/filter.h b/include/linux/filter.h index c0349522de8f..02ddcfdf94c4 100644 --- a/include/linux/filter.h +++ b/include/linux/filter.h @@ -829,6 +829,33 @@ static inline struct list_head *bpf_net_ctx_get_xskmap_flush_list(void) return &bpf_net_ctx->xskmap_map_flush_list; } +static inline void bpf_net_ctx_get_all_used_flush_lists(struct list_head **lh_map, + struct list_head **lh_dev, + struct list_head **lh_xsk) +{ + struct bpf_net_context *bpf_net_ctx = bpf_net_ctx_get(); + u32 kern_flags = bpf_net_ctx->ri.kern_flags; + struct list_head *lh; + + *lh_map = *lh_dev = *lh_xsk = NULL; + + if (!IS_ENABLED(CONFIG_BPF_SYSCALL)) + return; + + lh = &bpf_net_ctx->dev_map_flush_list; + if (kern_flags & BPF_RI_F_DEV_MAP_INIT && !list_empty(lh)) + *lh_dev = lh; + + lh = &bpf_net_ctx->cpu_map_flush_list; + if (kern_flags & BPF_RI_F_CPU_MAP_INIT && !list_empty(lh)) + *lh_map = lh; + + lh = &bpf_net_ctx->xskmap_map_flush_list; + if (IS_ENABLED(CONFIG_XDP_SOCKETS) && + kern_flags & BPF_RI_F_XSK_MAP_INIT && !list_empty(lh)) + *lh_xsk = lh; +} + /* Compute the linear packet data range [data, data_end) which * will be accessed by various program types (cls_bpf, act_bpf, * lwt, ...). Subsystems allowing direct data access must (!) diff --git a/include/net/xdp_sock.h b/include/net/xdp_sock.h index 3d54de168a6d..bfe625b55d55 100644 --- a/include/net/xdp_sock.h +++ b/include/net/xdp_sock.h @@ -121,7 +121,7 @@ struct xsk_tx_metadata_ops { int xsk_generic_rcv(struct xdp_sock *xs, struct xdp_buff *xdp); int __xsk_map_redirect(struct xdp_sock *xs, struct xdp_buff *xdp); -void __xsk_map_flush(void); +void __xsk_map_flush(struct list_head *flush_list); /** * xsk_tx_metadata_to_compl - Save enough relevant metadata information @@ -206,7 +206,7 @@ static inline int __xsk_map_redirect(struct xdp_sock *xs, struct xdp_buff *xdp) return -EOPNOTSUPP; } -static inline void __xsk_map_flush(void) +static inline void __xsk_map_flush(struct list_head *flush_list) { } @@ -228,14 +228,4 @@ static inline void xsk_tx_metadata_complete(struct xsk_tx_metadata_compl *compl, } #endif /* CONFIG_XDP_SOCKETS */ - -#if defined(CONFIG_XDP_SOCKETS) && defined(CONFIG_DEBUG_NET) -bool xsk_map_check_flush(void); -#else -static inline bool xsk_map_check_flush(void) -{ - return false; -} -#endif - #endif /* _LINUX_XDP_SOCK_H */ diff --git a/kernel/bpf/cpumap.c b/kernel/bpf/cpumap.c index 068e994ed781..4acf90cd79eb 100644 --- a/kernel/bpf/cpumap.c +++ b/kernel/bpf/cpumap.c @@ -757,9 +757,8 @@ trace: return ret; } -void __cpu_map_flush(void) +void __cpu_map_flush(struct list_head *flush_list) { - struct list_head *flush_list = bpf_net_ctx_get_cpu_map_flush_list(); struct xdp_bulk_queue *bq, *tmp; list_for_each_entry_safe(bq, tmp, flush_list, flush_node) { @@ -769,13 +768,3 @@ void __cpu_map_flush(void) wake_up_process(bq->obj->kthread); } } - -#ifdef CONFIG_DEBUG_NET -bool cpu_map_check_flush(void) -{ - if (list_empty(bpf_net_ctx_get_cpu_map_flush_list())) - return false; - __cpu_map_flush(); - return true; -} -#endif diff --git a/kernel/bpf/devmap.c b/kernel/bpf/devmap.c index 317ac2d66ebd..9ca47eaacdd5 100644 --- a/kernel/bpf/devmap.c +++ b/kernel/bpf/devmap.c @@ -412,9 +412,8 @@ out: * driver before returning from its napi->poll() routine. See the comment above * xdp_do_flush() in filter.c. */ -void __dev_flush(void) +void __dev_flush(struct list_head *flush_list) { - struct list_head *flush_list = bpf_net_ctx_get_dev_flush_list(); struct xdp_dev_bulk_queue *bq, *tmp; list_for_each_entry_safe(bq, tmp, flush_list, flush_node) { @@ -425,16 +424,6 @@ void __dev_flush(void) } } -#ifdef CONFIG_DEBUG_NET -bool dev_check_flush(void) -{ - if (list_empty(bpf_net_ctx_get_dev_flush_list())) - return false; - __dev_flush(); - return true; -} -#endif - /* Elements are kept alive by RCU; either by rcu_read_lock() (from syscall) or * by local_bh_disable() (from XDP calls inside NAPI). The * rcu_read_lock_bh_held() below makes lockdep accept both. diff --git a/net/core/filter.c b/net/core/filter.c index eb1c4425c06f..403d23faf22e 100644 --- a/net/core/filter.c +++ b/net/core/filter.c @@ -4277,22 +4277,39 @@ static const struct bpf_func_proto bpf_xdp_adjust_meta_proto = { */ void xdp_do_flush(void) { - __dev_flush(); - __cpu_map_flush(); - __xsk_map_flush(); + struct list_head *lh_map, *lh_dev, *lh_xsk; + + bpf_net_ctx_get_all_used_flush_lists(&lh_map, &lh_dev, &lh_xsk); + if (lh_dev) + __dev_flush(lh_dev); + if (lh_map) + __cpu_map_flush(lh_map); + if (lh_xsk) + __xsk_map_flush(lh_xsk); } EXPORT_SYMBOL_GPL(xdp_do_flush); #if defined(CONFIG_DEBUG_NET) && defined(CONFIG_BPF_SYSCALL) void xdp_do_check_flushed(struct napi_struct *napi) { - bool ret; + struct list_head *lh_map, *lh_dev, *lh_xsk; + bool missed = false; - ret = dev_check_flush(); - ret |= cpu_map_check_flush(); - ret |= xsk_map_check_flush(); + bpf_net_ctx_get_all_used_flush_lists(&lh_map, &lh_dev, &lh_xsk); + if (lh_dev) { + __dev_flush(lh_dev); + missed = true; + } + if (lh_map) { + __cpu_map_flush(lh_map); + missed = true; + } + if (lh_xsk) { + __xsk_map_flush(lh_xsk); + missed = true; + } - WARN_ONCE(ret, "Missing xdp_do_flush() invocation after NAPI by %ps\n", + WARN_ONCE(missed, "Missing xdp_do_flush() invocation after NAPI by %ps\n", napi->poll); } #endif diff --git a/net/xdp/xsk.c b/net/xdp/xsk.c index ed062e038389..de9c0322bc29 100644 --- a/net/xdp/xsk.c +++ b/net/xdp/xsk.c @@ -383,9 +383,8 @@ int __xsk_map_redirect(struct xdp_sock *xs, struct xdp_buff *xdp) return 0; } -void __xsk_map_flush(void) +void __xsk_map_flush(struct list_head *flush_list) { - struct list_head *flush_list = bpf_net_ctx_get_xskmap_flush_list(); struct xdp_sock *xs, *tmp; list_for_each_entry_safe(xs, tmp, flush_list, flush_node) { @@ -394,16 +393,6 @@ void __xsk_map_flush(void) } } -#ifdef CONFIG_DEBUG_NET -bool xsk_map_check_flush(void) -{ - if (list_empty(bpf_net_ctx_get_xskmap_flush_list())) - return false; - __xsk_map_flush(); - return true; -} -#endif - void xsk_tx_completed(struct xsk_buff_pool *pool, u32 nb_entries) { xskq_prod_submit_n(pool->cq, nb_entries); |