diff options
128 files changed, 4990 insertions, 895 deletions
diff --git a/Documentation/bpf/btf.rst b/Documentation/bpf/btf.rst index 1ebf4c5c7ddc..ab08852e53ae 100644 --- a/Documentation/bpf/btf.rst +++ b/Documentation/bpf/btf.rst @@ -565,18 +565,15 @@ A map can be created with ``btf_fd`` and specified key/value type id.:: In libbpf, the map can be defined with extra annotation like below: :: - struct bpf_map_def SEC("maps") btf_map = { - .type = BPF_MAP_TYPE_ARRAY, - .key_size = sizeof(int), - .value_size = sizeof(struct ipv_counts), - .max_entries = 4, - }; - BPF_ANNOTATE_KV_PAIR(btf_map, int, struct ipv_counts); + struct { + __uint(type, BPF_MAP_TYPE_ARRAY); + __type(key, int); + __type(value, struct ipv_counts); + __uint(max_entries, 4); + } btf_map SEC(".maps"); -Here, the parameters for macro BPF_ANNOTATE_KV_PAIR are map name, key and -value types for the map. During ELF parsing, libbpf is able to extract -key/value type_id's and assign them to BPF_MAP_CREATE attributes -automatically. +During ELF parsing, libbpf is able to extract key/value type_id's and assign +them to BPF_MAP_CREATE attributes automatically. .. _BPF_Prog_Load: @@ -824,13 +821,12 @@ structure has bitfields. For example, for the following map,:: ___A b1:4; enum A b2:4; }; - struct bpf_map_def SEC("maps") tmpmap = { - .type = BPF_MAP_TYPE_ARRAY, - .key_size = sizeof(__u32), - .value_size = sizeof(struct tmp_t), - .max_entries = 1, - }; - BPF_ANNOTATE_KV_PAIR(tmpmap, int, struct tmp_t); + struct { + __uint(type, BPF_MAP_TYPE_ARRAY); + __type(key, int); + __type(value, struct tmp_t); + __uint(max_entries, 1); + } tmpmap SEC(".maps"); bpftool is able to pretty print like below: :: diff --git a/drivers/net/ethernet/marvell/mvneta.c b/drivers/net/ethernet/marvell/mvneta.c index 83c8908f0cc7..309bd3e97ea0 100644 --- a/drivers/net/ethernet/marvell/mvneta.c +++ b/drivers/net/ethernet/marvell/mvneta.c @@ -1884,8 +1884,8 @@ static void mvneta_txq_bufs_free(struct mvneta_port *pp, bytes_compl += buf->skb->len; pkts_compl++; dev_kfree_skb_any(buf->skb); - } else if (buf->type == MVNETA_TYPE_XDP_TX || - buf->type == MVNETA_TYPE_XDP_NDO) { + } else if ((buf->type == MVNETA_TYPE_XDP_TX || + buf->type == MVNETA_TYPE_XDP_NDO) && buf->xdpf) { if (napi && buf->type == MVNETA_TYPE_XDP_TX) xdp_return_frame_rx_napi(buf->xdpf); else @@ -2060,61 +2060,106 @@ int mvneta_rx_refill_queue(struct mvneta_port *pp, struct mvneta_rx_queue *rxq) static void mvneta_xdp_put_buff(struct mvneta_port *pp, struct mvneta_rx_queue *rxq, - struct xdp_buff *xdp, struct skb_shared_info *sinfo, - int sync_len) + struct xdp_buff *xdp, int sync_len) { + struct skb_shared_info *sinfo = xdp_get_shared_info_from_buff(xdp); int i; + if (likely(!xdp_buff_has_frags(xdp))) + goto out; + for (i = 0; i < sinfo->nr_frags; i++) page_pool_put_full_page(rxq->page_pool, skb_frag_page(&sinfo->frags[i]), true); + +out: page_pool_put_page(rxq->page_pool, virt_to_head_page(xdp->data), sync_len, true); } static int mvneta_xdp_submit_frame(struct mvneta_port *pp, struct mvneta_tx_queue *txq, - struct xdp_frame *xdpf, bool dma_map) + struct xdp_frame *xdpf, int *nxmit_byte, bool dma_map) { - struct mvneta_tx_desc *tx_desc; - struct mvneta_tx_buf *buf; - dma_addr_t dma_addr; + struct skb_shared_info *sinfo = xdp_get_shared_info_from_frame(xdpf); + struct device *dev = pp->dev->dev.parent; + struct mvneta_tx_desc *tx_desc = NULL; + int i, num_frames = 1; + struct page *page; + + if (unlikely(xdp_frame_has_frags(xdpf))) + num_frames += sinfo->nr_frags; - if (txq->count >= txq->tx_stop_threshold) + if (txq->count + num_frames >= txq->size) return MVNETA_XDP_DROPPED; - tx_desc = mvneta_txq_next_desc_get(txq); + for (i = 0; i < num_frames; i++) { + struct mvneta_tx_buf *buf = &txq->buf[txq->txq_put_index]; + skb_frag_t *frag = NULL; + int len = xdpf->len; + dma_addr_t dma_addr; - buf = &txq->buf[txq->txq_put_index]; - if (dma_map) { - /* ndo_xdp_xmit */ - dma_addr = dma_map_single(pp->dev->dev.parent, xdpf->data, - xdpf->len, DMA_TO_DEVICE); - if (dma_mapping_error(pp->dev->dev.parent, dma_addr)) { - mvneta_txq_desc_put(txq); - return MVNETA_XDP_DROPPED; + if (unlikely(i)) { /* paged area */ + frag = &sinfo->frags[i - 1]; + len = skb_frag_size(frag); } - buf->type = MVNETA_TYPE_XDP_NDO; - } else { - struct page *page = virt_to_page(xdpf->data); - dma_addr = page_pool_get_dma_addr(page) + - sizeof(*xdpf) + xdpf->headroom; - dma_sync_single_for_device(pp->dev->dev.parent, dma_addr, - xdpf->len, DMA_BIDIRECTIONAL); - buf->type = MVNETA_TYPE_XDP_TX; + tx_desc = mvneta_txq_next_desc_get(txq); + if (dma_map) { + /* ndo_xdp_xmit */ + void *data; + + data = unlikely(frag) ? skb_frag_address(frag) + : xdpf->data; + dma_addr = dma_map_single(dev, data, len, + DMA_TO_DEVICE); + if (dma_mapping_error(dev, dma_addr)) { + mvneta_txq_desc_put(txq); + goto unmap; + } + + buf->type = MVNETA_TYPE_XDP_NDO; + } else { + page = unlikely(frag) ? skb_frag_page(frag) + : virt_to_page(xdpf->data); + dma_addr = page_pool_get_dma_addr(page); + if (unlikely(frag)) + dma_addr += skb_frag_off(frag); + else + dma_addr += sizeof(*xdpf) + xdpf->headroom; + dma_sync_single_for_device(dev, dma_addr, len, + DMA_BIDIRECTIONAL); + buf->type = MVNETA_TYPE_XDP_TX; + } + buf->xdpf = unlikely(i) ? NULL : xdpf; + + tx_desc->command = unlikely(i) ? 0 : MVNETA_TXD_F_DESC; + tx_desc->buf_phys_addr = dma_addr; + tx_desc->data_size = len; + *nxmit_byte += len; + + mvneta_txq_inc_put(txq); } - buf->xdpf = xdpf; - tx_desc->command = MVNETA_TXD_FLZ_DESC; - tx_desc->buf_phys_addr = dma_addr; - tx_desc->data_size = xdpf->len; + /*last descriptor */ + if (likely(tx_desc)) + tx_desc->command |= MVNETA_TXD_L_DESC | MVNETA_TXD_Z_PAD; - mvneta_txq_inc_put(txq); - txq->pending++; - txq->count++; + txq->pending += num_frames; + txq->count += num_frames; return MVNETA_XDP_TX; + +unmap: + for (i--; i >= 0; i--) { + mvneta_txq_desc_put(txq); + tx_desc = txq->descs + txq->next_desc_to_proc; + dma_unmap_single(dev, tx_desc->buf_phys_addr, + tx_desc->data_size, + DMA_TO_DEVICE); + } + + return MVNETA_XDP_DROPPED; } static int @@ -2123,8 +2168,8 @@ mvneta_xdp_xmit_back(struct mvneta_port *pp, struct xdp_buff *xdp) struct mvneta_pcpu_stats *stats = this_cpu_ptr(pp->stats); struct mvneta_tx_queue *txq; struct netdev_queue *nq; + int cpu, nxmit_byte = 0; struct xdp_frame *xdpf; - int cpu; u32 ret; xdpf = xdp_convert_buff_to_frame(xdp); @@ -2136,10 +2181,10 @@ mvneta_xdp_xmit_back(struct mvneta_port *pp, struct xdp_buff *xdp) nq = netdev_get_tx_queue(pp->dev, txq->id); __netif_tx_lock(nq, cpu); - ret = mvneta_xdp_submit_frame(pp, txq, xdpf, false); + ret = mvneta_xdp_submit_frame(pp, txq, xdpf, &nxmit_byte, false); if (ret == MVNETA_XDP_TX) { u64_stats_update_begin(&stats->syncp); - stats->es.ps.tx_bytes += xdpf->len; + stats->es.ps.tx_bytes += nxmit_byte; stats->es.ps.tx_packets++; stats->es.ps.xdp_tx++; u64_stats_update_end(&stats->syncp); @@ -2178,11 +2223,11 @@ mvneta_xdp_xmit(struct net_device *dev, int num_frame, __netif_tx_lock(nq, cpu); for (i = 0; i < num_frame; i++) { - ret = mvneta_xdp_submit_frame(pp, txq, frames[i], true); + ret = mvneta_xdp_submit_frame(pp, txq, frames[i], &nxmit_byte, + true); if (ret != MVNETA_XDP_TX) break; - nxmit_byte += frames[i]->len; nxmit++; } @@ -2205,7 +2250,6 @@ mvneta_run_xdp(struct mvneta_port *pp, struct mvneta_rx_queue *rxq, struct bpf_prog *prog, struct xdp_buff *xdp, u32 frame_sz, struct mvneta_stats *stats) { - struct skb_shared_info *sinfo = xdp_get_shared_info_from_buff(xdp); unsigned int len, data_len, sync; u32 ret, act; @@ -2226,7 +2270,7 @@ mvneta_run_xdp(struct mvneta_port *pp, struct mvneta_rx_queue *rxq, err = xdp_do_redirect(pp->dev, xdp, prog); if (unlikely(err)) { - mvneta_xdp_put_buff(pp, rxq, xdp, sinfo, sync); + mvneta_xdp_put_buff(pp, rxq, xdp, sync); ret = MVNETA_XDP_DROPPED; } else { ret = MVNETA_XDP_REDIR; @@ -2237,7 +2281,7 @@ mvneta_run_xdp(struct mvneta_port *pp, struct mvneta_rx_queue *rxq, case XDP_TX: ret = mvneta_xdp_xmit_back(pp, xdp); if (ret != MVNETA_XDP_TX) - mvneta_xdp_put_buff(pp, rxq, xdp, sinfo, sync); + mvneta_xdp_put_buff(pp, rxq, xdp, sync); break; default: bpf_warn_invalid_xdp_action(pp->dev, prog, act); @@ -2246,7 +2290,7 @@ mvneta_run_xdp(struct mvneta_port *pp, struct mvneta_rx_queue *rxq, trace_xdp_exception(pp->dev, prog, act); fallthrough; case XDP_DROP: - mvneta_xdp_put_buff(pp, rxq, xdp, sinfo, sync); + mvneta_xdp_put_buff(pp, rxq, xdp, sync); ret = MVNETA_XDP_DROPPED; stats->xdp_drop++; break; @@ -2269,7 +2313,6 @@ mvneta_swbm_rx_frame(struct mvneta_port *pp, int data_len = -MVNETA_MH_SIZE, len; struct net_device *dev = pp->dev; enum dma_data_direction dma_dir; - struct skb_shared_info *sinfo; if (*size > MVNETA_MAX_RX_BUF_SIZE) { len = MVNETA_MAX_RX_BUF_SIZE; @@ -2289,11 +2332,9 @@ mvneta_swbm_rx_frame(struct mvneta_port *pp, /* Prefetch header */ prefetch(data); + xdp_buff_clear_frags_flag(xdp); xdp_prepare_buff(xdp, data, pp->rx_offset_correction + MVNETA_MH_SIZE, data_len, false); - - sinfo = xdp_get_shared_info_from_buff(xdp); - sinfo->nr_frags = 0; } static void @@ -2301,9 +2342,9 @@ mvneta_swbm_add_rx_fragment(struct mvneta_port *pp, struct mvneta_rx_desc *rx_desc, struct mvneta_rx_queue *rxq, struct xdp_buff *xdp, int *size, - struct skb_shared_info *xdp_sinfo, struct page *page) { + struct skb_shared_info *sinfo = xdp_get_shared_info_from_buff(xdp); struct net_device *dev = pp->dev; enum dma_data_direction dma_dir; int data_len, len; @@ -2321,25 +2362,25 @@ mvneta_swbm_add_rx_fragment(struct mvneta_port *pp, len, dma_dir); rx_desc->buf_phys_addr = 0; - if (data_len > 0 && xdp_sinfo->nr_frags < MAX_SKB_FRAGS) { - skb_frag_t *frag = &xdp_sinfo->frags[xdp_sinfo->nr_frags++]; + if (!xdp_buff_has_frags(xdp)) + sinfo->nr_frags = 0; + + if (data_len > 0 && sinfo->nr_frags < MAX_SKB_FRAGS) { + skb_frag_t *frag = &sinfo->frags[sinfo->nr_frags++]; skb_frag_off_set(frag, pp->rx_offset_correction); skb_frag_size_set(frag, data_len); __skb_frag_set_page(frag, page); + + if (!xdp_buff_has_frags(xdp)) { + sinfo->xdp_frags_size = *size; + xdp_buff_set_frags_flag(xdp); + } + if (page_is_pfmemalloc(page)) + xdp_buff_set_frag_pfmemalloc(xdp); } else { page_pool_put_full_page(rxq->page_pool, page, true); } - - /* last fragment */ - if (len == *size) { - struct skb_shared_info *sinfo; - - sinfo = xdp_get_shared_info_from_buff(xdp); - sinfo->nr_frags = xdp_sinfo->nr_frags; - memcpy(sinfo->frags, xdp_sinfo->frags, - sinfo->nr_frags * sizeof(skb_frag_t)); - } *size -= len; } @@ -2348,8 +2389,11 @@ mvneta_swbm_build_skb(struct mvneta_port *pp, struct page_pool *pool, struct xdp_buff *xdp, u32 desc_status) { struct skb_shared_info *sinfo = xdp_get_shared_info_from_buff(xdp); - int i, num_frags = sinfo->nr_frags; struct sk_buff *skb; + u8 num_frags; + + if (unlikely(xdp_buff_has_frags(xdp))) + num_frags = sinfo->nr_frags; skb = build_skb(xdp->data_hard_start, PAGE_SIZE); if (!skb) @@ -2361,13 +2405,11 @@ mvneta_swbm_build_skb(struct mvneta_port *pp, struct page_pool *pool, skb_put(skb, xdp->data_end - xdp->data); skb->ip_summed = mvneta_rx_csum(pp, desc_status); - for (i = 0; i < num_frags; i++) { - skb_frag_t *frag = &sinfo->frags[i]; - - skb_add_rx_frag(skb, skb_shinfo(skb)->nr_frags, - skb_frag_page(frag), skb_frag_off(frag), - skb_frag_size(frag), PAGE_SIZE); - } + if (unlikely(xdp_buff_has_frags(xdp))) + xdp_update_skb_shared_info(skb, num_frags, + sinfo->xdp_frags_size, + num_frags * xdp->frame_sz, + xdp_buff_is_frag_pfmemalloc(xdp)); return skb; } @@ -2379,7 +2421,6 @@ static int mvneta_rx_swbm(struct napi_struct *napi, { int rx_proc = 0, rx_todo, refill, size = 0; struct net_device *dev = pp->dev; - struct skb_shared_info sinfo; struct mvneta_stats ps = {}; struct bpf_prog *xdp_prog; u32 desc_status, frame_sz; @@ -2388,8 +2429,6 @@ static int mvneta_rx_swbm(struct napi_struct *napi, xdp_init_buff(&xdp_buf, PAGE_SIZE, &rxq->xdp_rxq); xdp_buf.data_hard_start = NULL; - sinfo.nr_frags = 0; - /* Get number of received packets */ rx_todo = mvneta_rxq_busy_desc_num_get(pp, rxq); @@ -2431,7 +2470,7 @@ static int mvneta_rx_swbm(struct napi_struct *napi, } mvneta_swbm_add_rx_fragment(pp, rx_desc, rxq, &xdp_buf, - &size, &sinfo, page); + &size, page); } /* Middle or Last descriptor */ if (!(rx_status & MVNETA_RXD_LAST_DESC)) @@ -2439,7 +2478,7 @@ static int mvneta_rx_swbm(struct napi_struct *napi, continue; if (size) { - mvneta_xdp_put_buff(pp, rxq, &xdp_buf, &sinfo, -1); + mvneta_xdp_put_buff(pp, rxq, &xdp_buf, -1); goto next; } @@ -2451,7 +2490,7 @@ static int mvneta_rx_swbm(struct napi_struct *napi, if (IS_ERR(skb)) { struct mvneta_pcpu_stats *stats = this_cpu_ptr(pp->stats); - mvneta_xdp_put_buff(pp, rxq, &xdp_buf, &sinfo, -1); + mvneta_xdp_put_buff(pp, rxq, &xdp_buf, -1); u64_stats_update_begin(&stats->syncp); stats->es.skb_alloc_error++; @@ -2468,11 +2507,10 @@ static int mvneta_rx_swbm(struct napi_struct *napi, napi_gro_receive(napi, skb); next: xdp_buf.data_hard_start = NULL; - sinfo.nr_frags = 0; } if (xdp_buf.data_hard_start) - mvneta_xdp_put_buff(pp, rxq, &xdp_buf, &sinfo, -1); + mvneta_xdp_put_buff(pp, rxq, &xdp_buf, -1); if (ps.xdp_redirect) xdp_do_flush_map(); @@ -3260,7 +3298,8 @@ static int mvneta_create_page_pool(struct mvneta_port *pp, return err; } - err = xdp_rxq_info_reg(&rxq->xdp_rxq, pp->dev, rxq->id, 0); + err = __xdp_rxq_info_reg(&rxq->xdp_rxq, pp->dev, rxq->id, 0, + PAGE_SIZE); if (err < 0) goto err_free_pp; @@ -3740,6 +3779,7 @@ static void mvneta_percpu_disable(void *arg) static int mvneta_change_mtu(struct net_device *dev, int mtu) { struct mvneta_port *pp = netdev_priv(dev); + struct bpf_prog *prog = pp->xdp_prog; int ret; if (!IS_ALIGNED(MVNETA_RX_PKT_SIZE(mtu), 8)) { @@ -3748,8 +3788,11 @@ static int mvneta_change_mtu(struct net_device *dev, int mtu) mtu = ALIGN(MVNETA_RX_PKT_SIZE(mtu), 8); } - if (pp->xdp_prog && mtu > MVNETA_MAX_RX_BUF_SIZE) { - netdev_info(dev, "Illegal MTU value %d for XDP mode\n", mtu); + if (prog && !prog->aux->xdp_has_frags && + mtu > MVNETA_MAX_RX_BUF_SIZE) { + netdev_info(dev, "Illegal MTU %d for XDP prog without frags\n", + mtu); + return -EINVAL; } @@ -4490,8 +4533,9 @@ static int mvneta_xdp_setup(struct net_device *dev, struct bpf_prog *prog, struct mvneta_port *pp = netdev_priv(dev); struct bpf_prog *old_prog; - if (prog && dev->mtu > MVNETA_MAX_RX_BUF_SIZE) { - NL_SET_ERR_MSG_MOD(extack, "MTU too large for XDP"); + if (prog && !prog->aux->xdp_has_frags && + dev->mtu > MVNETA_MAX_RX_BUF_SIZE) { + NL_SET_ERR_MSG_MOD(extack, "prog does not support XDP frags"); return -EOPNOTSUPP; } diff --git a/include/linux/bpf.h b/include/linux/bpf.h index fa517ae604ad..8c92c974bd12 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -194,6 +194,17 @@ struct bpf_map { struct work_struct work; struct mutex freeze_mutex; atomic64_t writecnt; + /* 'Ownership' of program-containing map is claimed by the first program + * that is going to use this map or by the first program which FD is + * stored in the map to make sure that all callers and callees have the + * same prog type, JITed flag and xdp_has_frags flag. + */ + struct { + spinlock_t lock; + enum bpf_prog_type type; + bool jited; + bool xdp_has_frags; + } owner; }; static inline bool map_value_has_spin_lock(const struct bpf_map *map) @@ -578,7 +589,6 @@ struct bpf_verifier_ops { const struct btf_type *t, int off, int size, enum bpf_access_type atype, u32 *next_btf_id); - bool (*check_kfunc_call)(u32 kfunc_btf_id, struct module *owner); }; struct bpf_prog_offload_ops { @@ -939,6 +949,7 @@ struct bpf_prog_aux { bool func_proto_unreliable; bool sleepable; bool tail_call_reachable; + bool xdp_has_frags; struct hlist_node tramp_hlist; /* BTF_KIND_FUNC_PROTO for valid attach_btf_id */ const struct btf_type *attach_func_proto; @@ -999,16 +1010,6 @@ struct bpf_prog_aux { }; struct bpf_array_aux { - /* 'Ownership' of prog array is claimed by the first program that - * is going to use this map or by the first program which FD is - * stored in the map to make sure that all callers and callees have - * the same prog type and JITed flag. - */ - struct { - spinlock_t lock; - enum bpf_prog_type type; - bool jited; - } owner; /* Programs with direct jumps into programs part of this array. */ struct list_head poke_progs; struct bpf_map *map; @@ -1183,7 +1184,14 @@ struct bpf_event_entry { struct rcu_head rcu; }; -bool bpf_prog_array_compatible(struct bpf_array *array, const struct bpf_prog *fp); +static inline bool map_type_contains_progs(struct bpf_map *map) +{ + return map->map_type == BPF_MAP_TYPE_PROG_ARRAY || + map->map_type == BPF_MAP_TYPE_DEVMAP || + map->map_type == BPF_MAP_TYPE_CPUMAP; +} + +bool bpf_prog_map_compatible(struct bpf_map *map, const struct bpf_prog *fp); int bpf_prog_calc_tag(struct bpf_prog *fp); const struct bpf_func_proto *bpf_get_trace_printk_proto(void); @@ -1251,6 +1259,7 @@ struct bpf_run_ctx {}; struct bpf_cg_run_ctx { struct bpf_run_ctx run_ctx; const struct bpf_prog_array_item *prog_item; + int retval; }; struct bpf_trace_run_ctx { @@ -1283,19 +1292,19 @@ static inline void bpf_reset_run_ctx(struct bpf_run_ctx *old_ctx) typedef u32 (*bpf_prog_run_fn)(const struct bpf_prog *prog, const void *ctx); -static __always_inline u32 +static __always_inline int BPF_PROG_RUN_ARRAY_CG_FLAGS(const struct bpf_prog_array __rcu *array_rcu, const void *ctx, bpf_prog_run_fn run_prog, - u32 *ret_flags) + int retval, u32 *ret_flags) { const struct bpf_prog_array_item *item; const struct bpf_prog *prog; const struct bpf_prog_array *array; struct bpf_run_ctx *old_run_ctx; struct bpf_cg_run_ctx run_ctx; - u32 ret = 1; u32 func_ret; + run_ctx.retval = retval; migrate_disable(); rcu_read_lock(); array = rcu_dereference(array_rcu); @@ -1304,27 +1313,29 @@ BPF_PROG_RUN_ARRAY_CG_FLAGS(const struct bpf_prog_array __rcu *array_rcu, while ((prog = READ_ONCE(item->prog))) { run_ctx.prog_item = item; func_ret = run_prog(prog, ctx); - ret &= (func_ret & 1); + if (!(func_ret & 1) && !IS_ERR_VALUE((long)run_ctx.retval)) + run_ctx.retval = -EPERM; *(ret_flags) |= (func_ret >> 1); item++; } bpf_reset_run_ctx(old_run_ctx); rcu_read_unlock(); migrate_enable(); - return ret; + return run_ctx.retval; } -static __always_inline u32 +static __always_inline int BPF_PROG_RUN_ARRAY_CG(const struct bpf_prog_array __rcu *array_rcu, - const void *ctx, bpf_prog_run_fn run_prog) + const void *ctx, bpf_prog_run_fn run_prog, + int retval) { const struct bpf_prog_array_item *item; const struct bpf_prog *prog; const struct bpf_prog_array *array; struct bpf_run_ctx *old_run_ctx; struct bpf_cg_run_ctx run_ctx; - u32 ret = 1; + run_ctx.retval = retval; migrate_disable(); rcu_read_lock(); array = rcu_dereference(array_rcu); @@ -1332,13 +1343,14 @@ BPF_PROG_RUN_ARRAY_CG(const struct bpf_prog_array __rcu *array_rcu, old_run_ctx = bpf_set_run_ctx(&run_ctx.run_ctx); while ((prog = READ_ONCE(item->prog))) { run_ctx.prog_item = item; - ret &= run_prog(prog, ctx); + if (!run_prog(prog, ctx) && !IS_ERR_VALUE((long)run_ctx.retval)) + run_ctx.retval = -EPERM; item++; } bpf_reset_run_ctx(old_run_ctx); rcu_read_unlock(); migrate_enable(); - return ret; + return run_ctx.retval; } static __always_inline u32 @@ -1391,19 +1403,21 @@ out: * 0: NET_XMIT_SUCCESS skb should be transmitted * 1: NET_XMIT_DROP skb should be dropped and cn * 2: NET_XMIT_CN skb should be transmitted and cn - * 3: -EPERM skb should be dropped + * 3: -err skb should be dropped */ #define BPF_PROG_CGROUP_INET_EGRESS_RUN_ARRAY(array, ctx, func) \ ({ \ u32 _flags = 0; \ bool _cn; \ u32 _ret; \ - _ret = BPF_PROG_RUN_ARRAY_CG_FLAGS(array, ctx, func, &_flags); \ + _ret = BPF_PROG_RUN_ARRAY_CG_FLAGS(array, ctx, func, 0, &_flags); \ _cn = _flags & BPF_RET_SET_CN; \ - if (_ret) \ + if (_ret && !IS_ERR_VALUE((long)_ret)) \ + _ret = -EFAULT; \ + if (!_ret) \ _ret = (_cn ? NET_XMIT_CN : NET_XMIT_SUCCESS); \ else \ - _ret = (_cn ? NET_XMIT_DROP : -EPERM); \ + _ret = (_cn ? NET_XMIT_DROP : _ret); \ _ret; \ }) @@ -1724,7 +1738,6 @@ int bpf_prog_test_run_raw_tp(struct bpf_prog *prog, int bpf_prog_test_run_sk_lookup(struct bpf_prog *prog, const union bpf_attr *kattr, union bpf_attr __user *uattr); -bool bpf_prog_test_check_kfunc_call(u32 kfunc_id, struct module *owner); bool btf_ctx_access(int off, int size, enum bpf_access_type type, const struct bpf_prog *prog, struct bpf_insn_access_aux *info); @@ -1976,12 +1989,6 @@ static inline int bpf_prog_test_run_sk_lookup(struct bpf_prog *prog, return -ENOTSUPP; } -static inline bool bpf_prog_test_check_kfunc_call(u32 kfunc_id, - struct module *owner) -{ - return false; -} - static inline void bpf_map_put(struct bpf_map *map) { } @@ -2076,6 +2083,9 @@ int bpf_prog_test_run_syscall(struct bpf_prog *prog, int sock_map_get_from_fd(const union bpf_attr *attr, struct bpf_prog *prog); int sock_map_prog_detach(const union bpf_attr *attr, enum bpf_prog_type ptype); int sock_map_update_elem_sys(struct bpf_map *map, void *key, void *value, u64 flags); +int sock_map_bpf_prog_query(const union bpf_attr *attr, + union bpf_attr __user *uattr); + void sock_map_unhash(struct sock *sk); void sock_map_close(struct sock *sk, long timeout); #else @@ -2129,6 +2139,12 @@ static inline int sock_map_update_elem_sys(struct bpf_map *map, void *key, void { return -EOPNOTSUPP; } + +static inline int sock_map_bpf_prog_query(const union bpf_attr *attr, + union bpf_attr __user *uattr) +{ + return -EINVAL; +} #endif /* CONFIG_BPF_SYSCALL */ #endif /* CONFIG_NET && CONFIG_BPF_SYSCALL */ diff --git a/include/linux/bpf_verifier.h b/include/linux/bpf_verifier.h index e9993172f892..7a7be8c057f2 100644 --- a/include/linux/bpf_verifier.h +++ b/include/linux/bpf_verifier.h @@ -521,6 +521,8 @@ bpf_prog_offload_remove_insns(struct bpf_verifier_env *env, u32 off, u32 cnt); int check_ptr_off_reg(struct bpf_verifier_env *env, const struct bpf_reg_state *reg, int regno); +int check_kfunc_mem_size_reg(struct bpf_verifier_env *env, struct bpf_reg_state *reg, + u32 regno); int check_mem_reg(struct bpf_verifier_env *env, struct bpf_reg_state *reg, u32 regno, u32 mem_size); @@ -564,4 +566,9 @@ static inline u32 type_flag(u32 type) return type & ~BPF_BASE_TYPE_MASK; } +static inline enum bpf_prog_type resolve_prog_type(struct bpf_prog *prog) +{ + return prog->aux->dst_prog ? prog->aux->dst_prog->type : prog->type; +} + #endif /* _LINUX_BPF_VERIFIER_H */ diff --git a/include/linux/btf.h b/include/linux/btf.h index 0c74348cbc9d..b12cfe3b12bb 100644 --- a/include/linux/btf.h +++ b/include/linux/btf.h @@ -12,11 +12,33 @@ #define BTF_TYPE_EMIT(type) ((void)(type *)0) #define BTF_TYPE_EMIT_ENUM(enum_val) ((void)enum_val) +enum btf_kfunc_type { + BTF_KFUNC_TYPE_CHECK, + BTF_KFUNC_TYPE_ACQUIRE, + BTF_KFUNC_TYPE_RELEASE, + BTF_KFUNC_TYPE_RET_NULL, + BTF_KFUNC_TYPE_MAX, +}; + struct btf; struct btf_member; struct btf_type; union bpf_attr; struct btf_show; +struct btf_id_set; + +struct btf_kfunc_id_set { + struct module *owner; + union { + struct { + struct btf_id_set *check_set; + struct btf_id_set *acquire_set; + struct btf_id_set *release_set; + struct btf_id_set *ret_null_set; + }; + struct btf_id_set *sets[BTF_KFUNC_TYPE_MAX]; + }; +}; extern const struct file_operations btf_fops; @@ -307,6 +329,11 @@ const struct btf_type *btf_type_by_id(const struct btf *btf, u32 type_id); const char *btf_name_by_offset(const struct btf *btf, u32 offset); struct btf *btf_parse_vmlinux(void); struct btf *bpf_prog_get_target_btf(const struct bpf_prog *prog); +bool btf_kfunc_id_set_contains(const struct btf *btf, + enum bpf_prog_type prog_type, + enum btf_kfunc_type type, u32 kfunc_btf_id); +int register_btf_kfunc_id_set(enum bpf_prog_type prog_type, + const struct btf_kfunc_id_set *s); #else static inline const struct btf_type *btf_type_by_id(const struct btf *btf, u32 type_id) @@ -318,50 +345,18 @@ static inline const char *btf_name_by_offset(const struct btf *btf, { return NULL; } -#endif - -struct kfunc_btf_id_set { - struct list_head list; - struct btf_id_set *set; - struct module *owner; -}; - -struct kfunc_btf_id_list { - struct list_head list; - struct mutex mutex; -}; - -#ifdef CONFIG_DEBUG_INFO_BTF_MODULES -void register_kfunc_btf_id_set(struct kfunc_btf_id_list *l, - struct kfunc_btf_id_set *s); -void unregister_kfunc_btf_id_set(struct kfunc_btf_id_list *l, - struct kfunc_btf_id_set *s); -bool bpf_check_mod_kfunc_call(struct kfunc_btf_id_list *klist, u32 kfunc_id, - struct module *owner); - -extern struct kfunc_btf_id_list bpf_tcp_ca_kfunc_list; -extern struct kfunc_btf_id_list prog_test_kfunc_list; -#else -static inline void register_kfunc_btf_id_set(struct kfunc_btf_id_list *l, - struct kfunc_btf_id_set *s) -{ -} -static inline void unregister_kfunc_btf_id_set(struct kfunc_btf_id_list *l, - struct kfunc_btf_id_set *s) +static inline bool btf_kfunc_id_set_contains(const struct btf *btf, + enum bpf_prog_type prog_type, + enum btf_kfunc_type type, + u32 kfunc_btf_id) { + return false; } -static inline bool bpf_check_mod_kfunc_call(struct kfunc_btf_id_list *klist, - u32 kfunc_id, struct module *owner) +static inline int register_btf_kfunc_id_set(enum bpf_prog_type prog_type, + const struct btf_kfunc_id_set *s) { - return false; + return 0; } - -static struct kfunc_btf_id_list bpf_tcp_ca_kfunc_list __maybe_unused; -static struct kfunc_btf_id_list prog_test_kfunc_list __maybe_unused; #endif -#define DEFINE_KFUNC_BTF_ID_SET(set, name) \ - struct kfunc_btf_id_set name = { LIST_HEAD_INIT(name.list), (set), \ - THIS_MODULE } - #endif diff --git a/include/linux/btf_ids.h b/include/linux/btf_ids.h index 919c0fde1c51..bc5d9cc34e4c 100644 --- a/include/linux/btf_ids.h +++ b/include/linux/btf_ids.h @@ -11,6 +11,7 @@ struct btf_id_set { #ifdef CONFIG_DEBUG_INFO_BTF #include <linux/compiler.h> /* for __PASTE */ +#include <linux/compiler_attributes.h> /* for __maybe_unused */ /* * Following macros help to define lists of BTF IDs placed @@ -146,14 +147,14 @@ extern struct btf_id_set name; #else -#define BTF_ID_LIST(name) static u32 name[5]; +#define BTF_ID_LIST(name) static u32 __maybe_unused name[5]; #define BTF_ID(prefix, name) #define BTF_ID_UNUSED -#define BTF_ID_LIST_GLOBAL(name, n) u32 name[n]; -#define BTF_ID_LIST_SINGLE(name, prefix, typename) static u32 name[1]; -#define BTF_ID_LIST_GLOBAL_SINGLE(name, prefix, typename) u32 name[1]; -#define BTF_SET_START(name) static struct btf_id_set name = { 0 }; -#define BTF_SET_START_GLOBAL(name) static struct btf_id_set name = { 0 }; +#define BTF_ID_LIST_GLOBAL(name, n) u32 __maybe_unused name[n]; +#define BTF_ID_LIST_SINGLE(name, prefix, typename) static u32 __maybe_unused name[1]; +#define BTF_ID_LIST_GLOBAL_SINGLE(name, prefix, typename) u32 __maybe_unused name[1]; +#define BTF_SET_START(name) static struct btf_id_set __maybe_unused name = { 0 }; +#define BTF_SET_START_GLOBAL(name) static struct btf_id_set __maybe_unused name = { 0 }; #define BTF_SET_END(name) #endif /* CONFIG_DEBUG_INFO_BTF */ diff --git a/include/linux/filter.h b/include/linux/filter.h index 71fa57b88bfc..d23e999dc032 100644 --- a/include/linux/filter.h +++ b/include/linux/filter.h @@ -1356,7 +1356,10 @@ struct bpf_sockopt_kern { s32 level; s32 optname; s32 optlen; - s32 retval; + /* for retval in struct bpf_cg_run_ctx */ + struct task_struct *current_task; + /* Temporary "register" for indirect stores to ppos. */ + u64 tmp_reg; }; int copy_bpf_fprog_from_user(struct sock_fprog *dst, sockptr_t src, int len); diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index bf11e1fbd69b..8131d0de7559 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -557,6 +557,7 @@ struct skb_shared_info { * Warning : all fields before dataref are cleared in __alloc_skb() */ atomic_t dataref; + unsigned int xdp_frags_size; /* Intermediate layers must ensure that destructor_arg * remains valid until skb destructor */ diff --git a/include/net/netfilter/nf_conntrack_bpf.h b/include/net/netfilter/nf_conntrack_bpf.h new file mode 100644 index 000000000000..a473b56842c5 --- /dev/null +++ b/include/net/netfilter/nf_conntrack_bpf.h @@ -0,0 +1,23 @@ +/* SPDX-License-Identifier: GPL-2.0 */ + +#ifndef _NF_CONNTRACK_BPF_H +#define _NF_CONNTRACK_BPF_H + +#include <linux/btf.h> +#include <linux/kconfig.h> + +#if (IS_BUILTIN(CONFIG_NF_CONNTRACK) && IS_ENABLED(CONFIG_DEBUG_INFO_BTF)) || \ + (IS_MODULE(CONFIG_NF_CONNTRACK) && IS_ENABLED(CONFIG_DEBUG_INFO_BTF_MODULES)) + +extern int register_nf_conntrack_bpf(void); + +#else + +static inline int register_nf_conntrack_bpf(void) +{ + return 0; +} + +#endif + +#endif /* _NF_CONNTRACK_BPF_H */ diff --git a/include/net/xdp.h b/include/net/xdp.h index 8f0812e4996d..b7721c3e4d1f 100644 --- a/include/net/xdp.h +++ b/include/net/xdp.h @@ -60,12 +60,20 @@ struct xdp_rxq_info { u32 reg_state; struct xdp_mem_info mem; unsigned int napi_id; + u32 frag_size; } ____cacheline_aligned; /* perf critical, avoid false-sharing */ struct xdp_txq_info { struct net_device *dev; }; +enum xdp_buff_flags { + XDP_FLAGS_HAS_FRAGS = BIT(0), /* non-linear xdp buff */ + XDP_FLAGS_FRAGS_PF_MEMALLOC = BIT(1), /* xdp paged memory is under + * pressure + */ +}; + struct xdp_buff { void *data; void *data_end; @@ -74,13 +82,40 @@ struct xdp_buff { struct xdp_rxq_info *rxq; struct xdp_txq_info *txq; u32 frame_sz; /* frame size to deduce data_hard_end/reserved tailroom*/ + u32 flags; /* supported values defined in xdp_buff_flags */ }; +static __always_inline bool xdp_buff_has_frags(struct xdp_buff *xdp) +{ + return !!(xdp->flags & XDP_FLAGS_HAS_FRAGS); +} + +static __always_inline void xdp_buff_set_frags_flag(struct xdp_buff *xdp) +{ + xdp->flags |= XDP_FLAGS_HAS_FRAGS; +} + +static __always_inline void xdp_buff_clear_frags_flag(struct xdp_buff *xdp) +{ + xdp->flags &= ~XDP_FLAGS_HAS_FRAGS; +} + +static __always_inline bool xdp_buff_is_frag_pfmemalloc(struct xdp_buff *xdp) +{ + return !!(xdp->flags & XDP_FLAGS_FRAGS_PF_MEMALLOC); +} + +static __always_inline void xdp_buff_set_frag_pfmemalloc(struct xdp_buff *xdp) +{ + xdp->flags |= XDP_FLAGS_FRAGS_PF_MEMALLOC; +} + static __always_inline void xdp_init_buff(struct xdp_buff *xdp, u32 frame_sz, struct xdp_rxq_info *rxq) { xdp->frame_sz = frame_sz; xdp->rxq = rxq; + xdp->flags = 0; } static __always_inline void @@ -111,6 +146,20 @@ xdp_get_shared_info_from_buff(struct xdp_buff *xdp) return (struct skb_shared_info *)xdp_data_hard_end(xdp); } +static __always_inline unsigned int xdp_get_buff_len(struct xdp_buff *xdp) +{ + unsigned int len = xdp->data_end - xdp->data; + struct skb_shared_info *sinfo; + + if (likely(!xdp_buff_has_frags(xdp))) + goto out; + + sinfo = xdp_get_shared_info_from_buff(xdp); + len += sinfo->xdp_frags_size; +out: + return len; +} + struct xdp_frame { void *data; u16 len; @@ -122,8 +171,19 @@ struct xdp_frame { */ struct xdp_mem_info mem; struct net_device *dev_rx; /* used by cpumap */ + u32 flags; /* supported values defined in xdp_buff_flags */ }; +static __always_inline bool xdp_frame_has_frags(struct xdp_frame *frame) +{ + return !!(frame->flags & XDP_FLAGS_HAS_FRAGS); +} + +static __always_inline bool xdp_frame_is_frag_pfmemalloc(struct xdp_frame *frame) +{ + return !!(frame->flags & XDP_FLAGS_FRAGS_PF_MEMALLOC); +} + #define XDP_BULK_QUEUE_SIZE 16 struct xdp_frame_bulk { int count; @@ -159,6 +219,19 @@ static inline void xdp_scrub_frame(struct xdp_frame *frame) frame->dev_rx = NULL; } +static inline void +xdp_update_skb_shared_info(struct sk_buff *skb, u8 nr_frags, + unsigned int size, unsigned int truesize, + bool pfmemalloc) +{ + skb_shinfo(skb)->nr_frags = nr_frags; + + skb->len += size; + skb->data_len += size; + skb->truesize += truesize; + skb->pfmemalloc |= pfmemalloc; +} + /* Avoids inlining WARN macro in fast-path */ void xdp_warn(const char *msg, const char *func, const int line); #define XDP_WARN(msg) xdp_warn(msg, __func__, __LINE__) @@ -180,6 +253,7 @@ void xdp_convert_frame_to_buff(struct xdp_frame *frame, struct xdp_buff *xdp) xdp->data_end = frame->data + frame->len; xdp->data_meta = frame->data - frame->metasize; xdp->frame_sz = frame->frame_sz; + xdp->flags = frame->flags; } static inline @@ -206,6 +280,7 @@ int xdp_update_frame_from_buff(struct xdp_buff *xdp, xdp_frame->headroom = headroom - sizeof(*xdp_frame); xdp_frame->metasize = metasize; xdp_frame->frame_sz = xdp->frame_sz; + xdp_frame->flags = xdp->flags; return 0; } @@ -230,6 +305,8 @@ struct xdp_frame *xdp_convert_buff_to_frame(struct xdp_buff *xdp) return xdp_frame; } +void __xdp_return(void *data, struct xdp_mem_info *mem, bool napi_direct, + struct xdp_buff *xdp); void xdp_return_frame(struct xdp_frame *xdpf); void xdp_return_frame_rx_napi(struct xdp_frame *xdpf); void xdp_return_buff(struct xdp_buff *xdp); @@ -246,14 +323,37 @@ void __xdp_release_frame(void *data, struct xdp_mem_info *mem); static inline void xdp_release_frame(struct xdp_frame *xdpf) { struct xdp_mem_info *mem = &xdpf->mem; + struct skb_shared_info *sinfo; + int i; /* Curr only page_pool needs this */ - if (mem->type == MEM_TYPE_PAGE_POOL) - __xdp_release_frame(xdpf->data, mem); + if (mem->type != MEM_TYPE_PAGE_POOL) + return; + + if (likely(!xdp_frame_has_frags(xdpf))) + goto out; + + sinfo = xdp_get_shared_info_from_frame(xdpf); + for (i = 0; i < sinfo->nr_frags; i++) { + struct page *page = skb_frag_page(&sinfo->frags[i]); + + __xdp_release_frame(page_address(page), mem); + } +out: + __xdp_release_frame(xdpf->data, mem); +} + +int __xdp_rxq_info_reg(struct xdp_rxq_info *xdp_rxq, + struct net_device *dev, u32 queue_index, + unsigned int napi_id, u32 frag_size); +static inline int +xdp_rxq_info_reg(struct xdp_rxq_info *xdp_rxq, + struct net_device *dev, u32 queue_index, + unsigned int napi_id) +{ + return __xdp_rxq_info_reg(xdp_rxq, dev, queue_index, napi_id, 0); } -int xdp_rxq_info_reg(struct xdp_rxq_info *xdp_rxq, - struct net_device *dev, u32 queue_index, unsigned int napi_id); void xdp_rxq_info_unreg(struct xdp_rxq_info *xdp_rxq); void xdp_rxq_info_unused(struct xdp_rxq_info *xdp_rxq); bool xdp_rxq_info_is_reg(struct xdp_rxq_info *xdp_rxq); diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index b0383d371b9a..16a7574292a5 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -330,6 +330,8 @@ union bpf_iter_link_info { * *ctx_out*, *data_in* and *data_out* must be NULL. * *repeat* must be zero. * + * BPF_PROG_RUN is an alias for BPF_PROG_TEST_RUN. + * * Return * Returns zero on success. On error, -1 is returned and *errno* * is set appropriately. @@ -1111,6 +1113,11 @@ enum bpf_link_type { */ #define BPF_F_SLEEPABLE (1U << 4) +/* If BPF_F_XDP_HAS_FRAGS is used in BPF_PROG_LOAD command, the loaded program + * fully support xdp frags. + */ +#define BPF_F_XDP_HAS_FRAGS (1U << 5) + /* When BPF ldimm64's insn[0].src_reg != 0 then this can have * the following extensions: * @@ -1775,6 +1782,8 @@ union bpf_attr { * 0 on success, or a negative error in case of failure. * * u64 bpf_get_current_pid_tgid(void) + * Description + * Get the current pid and tgid. * Return * A 64-bit integer containing the current tgid and pid, and * created as such: @@ -1782,6 +1791,8 @@ union bpf_attr { * *current_task*\ **->pid**. * * u64 bpf_get_current_uid_gid(void) + * Description + * Get the current uid and gid. * Return * A 64-bit integer containing the current GID and UID, and * created as such: *current_gid* **<< 32 \|** *current_uid*. @@ -2256,6 +2267,8 @@ union bpf_attr { * The 32-bit hash. * * u64 bpf_get_current_task(void) + * Description + * Get the current task. * Return * A pointer to the current task struct. * @@ -2369,6 +2382,8 @@ union bpf_attr { * indicate that the hash is outdated and to trigger a * recalculation the next time the kernel tries to access this * hash or when the **bpf_get_hash_recalc**\ () helper is called. + * Return + * void. * * long bpf_get_numa_node_id(void) * Description @@ -2466,6 +2481,8 @@ union bpf_attr { * A 8-byte long unique number or 0 if *sk* is NULL. * * u32 bpf_get_socket_uid(struct sk_buff *skb) + * Description + * Get the owner UID of the socked associated to *skb*. * Return * The owner UID of the socket associated to *skb*. If the socket * is **NULL**, or if it is not a full socket (i.e. if it is a @@ -3240,6 +3257,9 @@ union bpf_attr { * The id is returned or 0 in case the id could not be retrieved. * * u64 bpf_get_current_cgroup_id(void) + * Description + * Get the current cgroup id based on the cgroup within which + * the current task is running. * Return * A 64-bit integer containing the current cgroup id based * on the cgroup within which the current task is running. @@ -5018,6 +5038,44 @@ union bpf_attr { * * Return * The number of arguments of the traced function. + * + * int bpf_get_retval(void) + * Description + * Get the syscall's return value that will be returned to userspace. + * + * This helper is currently supported by cgroup programs only. + * Return + * The syscall's return value. + * + * int bpf_set_retval(int retval) + * Description + * Set the syscall's return value that will be returned to userspace. + * + * This helper is currently supported by cgroup programs only. + * Return + * 0 on success, or a negative error in case of failure. + * + * u64 bpf_xdp_get_buff_len(struct xdp_buff *xdp_md) + * Description + * Get the total size of a given xdp buff (linear and paged area) + * Return + * The total size of a given xdp buffer. + * + * long bpf_xdp_load_bytes(struct xdp_buff *xdp_md, u32 offset, void *buf, u32 len) + * Description + * This helper is provided as an easy way to load data from a + * xdp buffer. It can be used to load *len* bytes from *offset* from + * the frame associated to *xdp_md*, into the buffer pointed by + * *buf*. + * Return + * 0 on success, or a negative error in case of failure. + * + * long bpf_xdp_store_bytes(struct xdp_buff *xdp_md, u32 offset, void *buf, u32 len) + * Description + * Store *len* bytes from buffer *buf* into the frame + * associated to *xdp_md*, at *offset*. + * Return + * 0 on success, or a negative error in case of failure. */ #define __BPF_FUNC_MAPPER(FN) \ FN(unspec), \ @@ -5206,6 +5264,11 @@ union bpf_attr { FN(get_func_arg), \ FN(get_func_ret), \ FN(get_func_arg_cnt), \ + FN(get_retval), \ + FN(set_retval), \ + FN(xdp_get_buff_len), \ + FN(xdp_load_bytes), \ + FN(xdp_store_bytes), \ /* */ /* integer value in 'imm' field of BPF_CALL instruction selects which helper diff --git a/kernel/bpf/arraymap.c b/kernel/bpf/arraymap.c index c7a5be3bf8be..7f145aefbff8 100644 --- a/kernel/bpf/arraymap.c +++ b/kernel/bpf/arraymap.c @@ -837,13 +837,12 @@ static int fd_array_map_delete_elem(struct bpf_map *map, void *key) static void *prog_fd_array_get_ptr(struct bpf_map *map, struct file *map_file, int fd) { - struct bpf_array *array = container_of(map, struct bpf_array, map); struct bpf_prog *prog = bpf_prog_get(fd); if (IS_ERR(prog)) return prog; - if (!bpf_prog_array_compatible(array, prog)) { + if (!bpf_prog_map_compatible(map, prog)) { bpf_prog_put(prog); return ERR_PTR(-EINVAL); } @@ -1071,7 +1070,6 @@ static struct bpf_map *prog_array_map_alloc(union bpf_attr *attr) INIT_WORK(&aux->work, prog_array_map_clear_deferred); INIT_LIST_HEAD(&aux->poke_progs); mutex_init(&aux->poke_mutex); - spin_lock_init(&aux->owner.lock); map = array_map_alloc(attr); if (IS_ERR(map)) { diff --git a/kernel/bpf/btf.c b/kernel/bpf/btf.c index e16dafeb2450..a1c44c17ea9c 100644 --- a/kernel/bpf/btf.c +++ b/kernel/bpf/btf.c @@ -198,6 +198,21 @@ DEFINE_IDR(btf_idr); DEFINE_SPINLOCK(btf_idr_lock); +enum btf_kfunc_hook { + BTF_KFUNC_HOOK_XDP, + BTF_KFUNC_HOOK_TC, + BTF_KFUNC_HOOK_STRUCT_OPS, + BTF_KFUNC_HOOK_MAX, +}; + +enum { + BTF_KFUNC_SET_MAX_CNT = 32, +}; + +struct btf_kfunc_set_tab { + struct btf_id_set *sets[BTF_KFUNC_HOOK_MAX][BTF_KFUNC_TYPE_MAX]; +}; + struct btf { void *data; struct btf_type **types; @@ -212,6 +227,7 @@ struct btf { refcount_t refcnt; u32 id; struct rcu_head rcu; + struct btf_kfunc_set_tab *kfunc_set_tab; /* split BTF support */ struct btf *base_btf; @@ -1531,8 +1547,30 @@ static void btf_free_id(struct btf *btf) spin_unlock_irqrestore(&btf_idr_lock, flags); } +static void btf_free_kfunc_set_tab(struct btf *btf) +{ + struct btf_kfunc_set_tab *tab = btf->kfunc_set_tab; + int hook, type; + + if (!tab) + return; + /* For module BTF, we directly assign the sets being registered, so + * there is nothing to free except kfunc_set_tab. + */ + if (btf_is_module(btf)) + goto free_tab; + for (hook = 0; hook < ARRAY_SIZE(tab->sets); hook++) { + for (type = 0; type < ARRAY_SIZE(tab->sets[0]); type++) + kfree(tab->sets[hook][type]); + } +free_tab: + kfree(tab); + btf->kfunc_set_tab = NULL; +} + static void btf_free(struct btf *btf) { + btf_free_kfunc_set_tab(btf); kvfree(btf->types); kvfree(btf->resolved_sizes); kvfree(btf->resolved_ids); @@ -5616,17 +5654,45 @@ static bool __btf_type_is_scalar_struct(struct bpf_verifier_log *log, return true; } +static bool is_kfunc_arg_mem_size(const struct btf *btf, + const struct btf_param *arg, + const struct bpf_reg_state *reg) +{ + int len, sfx_len = sizeof("__sz") - 1; + const struct btf_type *t; + const char *param_name; + + t = btf_type_skip_modifiers(btf, arg->type, NULL); + if (!btf_type_is_scalar(t) || reg->type != SCALAR_VALUE) + return false; + + /* In the future, this can be ported to use BTF tagging */ + param_name = btf_name_by_offset(btf, arg->name_off); + if (str_is_empty(param_name)) + return false; + len = strlen(param_name); + if (len < sfx_len) + return false; + param_name += len - sfx_len; + if (strncmp(param_name, "__sz", sfx_len)) + return false; + + return true; +} + static int btf_check_func_arg_match(struct bpf_verifier_env *env, const struct btf *btf, u32 func_id, struct bpf_reg_state *regs, bool ptr_to_mem_ok) { struct bpf_verifier_log *log = &env->log; + u32 i, nargs, ref_id, ref_obj_id = 0; bool is_kfunc = btf_is_kernel(btf); const char *func_name, *ref_tname; const struct btf_type *t, *ref_t; const struct btf_param *args; - u32 i, nargs, ref_id; + int ref_regno = 0; + bool rel = false; t = btf_type_by_id(btf, func_id); if (!t || !btf_type_is_func(t)) { @@ -5704,6 +5770,16 @@ static int btf_check_func_arg_match(struct bpf_verifier_env *env, if (reg->type == PTR_TO_BTF_ID) { reg_btf = reg->btf; reg_ref_id = reg->btf_id; + /* Ensure only one argument is referenced PTR_TO_BTF_ID */ + if (reg->ref_obj_id) { + if (ref_obj_id) { + bpf_log(log, "verifier internal error: more than one arg with ref_obj_id R%d %u %u\n", + regno, reg->ref_obj_id, ref_obj_id); + return -EFAULT; + } + ref_regno = regno; + ref_obj_id = reg->ref_obj_id; + } } else { reg_btf = btf_vmlinux; reg_ref_id = *reg2btf_ids[reg->type]; @@ -5727,17 +5803,33 @@ static int btf_check_func_arg_match(struct bpf_verifier_env *env, u32 type_size; if (is_kfunc) { + bool arg_mem_size = i + 1 < nargs && is_kfunc_arg_mem_size(btf, &args[i + 1], ®s[regno + 1]); + /* Permit pointer to mem, but only when argument * type is pointer to scalar, or struct composed * (recursively) of scalars. + * When arg_mem_size is true, the pointer can be + * void *. */ if (!btf_type_is_scalar(ref_t) && - !__btf_type_is_scalar_struct(log, btf, ref_t, 0)) { + !__btf_type_is_scalar_struct(log, btf, ref_t, 0) && + (arg_mem_size ? !btf_type_is_void(ref_t) : 1)) { bpf_log(log, - "arg#%d pointer type %s %s must point to scalar or struct with scalar\n", - i, btf_type_str(ref_t), ref_tname); + "arg#%d pointer type %s %s must point to %sscalar, or struct with scalar\n", + i, btf_type_str(ref_t), ref_tname, arg_mem_size ? "void, " : ""); return -EINVAL; } + + /* Check for mem, len pair */ + if (arg_mem_size) { + if (check_kfunc_mem_size_reg(env, ®s[regno + 1], regno + 1)) { + bpf_log(log, "arg#%d arg#%d memory, len pair leads to invalid memory access\n", + i, i + 1); + return -EINVAL; + } + i++; + continue; + } } resolve_ret = btf_resolve_size(btf, ref_t, &type_size); @@ -5758,7 +5850,23 @@ static int btf_check_func_arg_match(struct bpf_verifier_env *env, } } - return 0; + /* Either both are set, or neither */ + WARN_ON_ONCE((ref_obj_id && !ref_regno) || (!ref_obj_id && ref_regno)); + if (is_kfunc) { + rel = btf_kfunc_id_set_contains(btf, resolve_prog_type(env->prog), + BTF_KFUNC_TYPE_RELEASE, func_id); + /* We already made sure ref_obj_id is set only for one argument */ + if (rel && !ref_obj_id) { + bpf_log(log, "release kernel function %s expects refcounted PTR_TO_BTF_ID\n", + func_name); + return -EINVAL; + } + /* Allow (!rel && ref_obj_id), so that passing such referenced PTR_TO_BTF_ID to + * other kfuncs works + */ + } + /* returns argument register number > 0 in case of reference release kfunc */ + return rel ? ref_regno : 0; } /* Compare BTF of a function with given bpf_reg_state. @@ -6200,12 +6308,17 @@ bool btf_id_set_contains(const struct btf_id_set *set, u32 id) return bsearch(&id, set->ids, set->cnt, sizeof(u32), btf_id_cmp_func) != NULL; } +enum { + BTF_MODULE_F_LIVE = (1 << 0), +}; + #ifdef CONFIG_DEBUG_INFO_BTF_MODULES struct btf_module { struct list_head list; struct module *module; struct btf *btf; struct bin_attribute *sysfs_attr; + int flags; }; static LIST_HEAD(btf_modules); @@ -6233,7 +6346,8 @@ static int btf_module_notify(struct notifier_block *nb, unsigned long op, int err = 0; if (mod->btf_data_size == 0 || - (op != MODULE_STATE_COMING && op != MODULE_STATE_GOING)) + (op != MODULE_STATE_COMING && op != MODULE_STATE_LIVE && + op != MODULE_STATE_GOING)) goto out; switch (op) { @@ -6292,6 +6406,17 @@ static int btf_module_notify(struct notifier_block *nb, unsigned long op, } break; + case MODULE_STATE_LIVE: + mutex_lock(&btf_module_mutex); + list_for_each_entry_safe(btf_mod, tmp, &btf_modules, list) { + if (btf_mod->module != module) + continue; + + btf_mod->flags |= BTF_MODULE_F_LIVE; + break; + } + mutex_unlock(&btf_module_mutex); + break; case MODULE_STATE_GOING: mutex_lock(&btf_module_mutex); list_for_each_entry_safe(btf_mod, tmp, &btf_modules, list) { @@ -6338,7 +6463,12 @@ struct module *btf_try_get_module(const struct btf *btf) if (btf_mod->btf != btf) continue; - if (try_module_get(btf_mod->module)) + /* We must only consider module whose __init routine has + * finished, hence we must check for BTF_MODULE_F_LIVE flag, + * which is set from the notifier callback for + * MODULE_STATE_LIVE. + */ + if ((btf_mod->flags & BTF_MODULE_F_LIVE) && try_module_get(btf_mod->module)) res = btf_mod->module; break; @@ -6349,6 +6479,36 @@ struct module *btf_try_get_module(const struct btf *btf) return res; } +/* Returns struct btf corresponding to the struct module + * + * This function can return NULL or ERR_PTR. Note that caller must + * release reference for struct btf iff btf_is_module is true. + */ +static struct btf *btf_get_module_btf(const struct module *module) +{ + struct btf *btf = NULL; +#ifdef CONFIG_DEBUG_INFO_BTF_MODULES + struct btf_module *btf_mod, *tmp; +#endif + + if (!module) + return bpf_get_btf_vmlinux(); +#ifdef CONFIG_DEBUG_INFO_BTF_MODULES + mutex_lock(&btf_module_mutex); + list_for_each_entry_safe(btf_mod, tmp, &btf_modules, list) { + if (btf_mod->module != module) + continue; + + btf_get(btf_mod->btf); + btf = btf_mod->btf; + break; + } + mutex_unlock(&btf_module_mutex); +#endif + + return btf; +} + BPF_CALL_4(bpf_btf_find_by_name_kind, char *, name, int, name_sz, u32, kind, int, flags) { struct btf *btf; @@ -6416,53 +6576,181 @@ BTF_ID_LIST_GLOBAL(btf_tracing_ids, MAX_BTF_TRACING_TYPE) BTF_TRACING_TYPE_xxx #undef BTF_TRACING_TYPE -/* BTF ID set registration API for modules */ - -#ifdef CONFIG_DEBUG_INFO_BTF_MODULES +/* Kernel Function (kfunc) BTF ID set registration API */ -void register_kfunc_btf_id_set(struct kfunc_btf_id_list *l, - struct kfunc_btf_id_set *s) +static int __btf_populate_kfunc_set(struct btf *btf, enum btf_kfunc_hook hook, + enum btf_kfunc_type type, + struct btf_id_set *add_set, bool vmlinux_set) { - mutex_lock(&l->mutex); - list_add(&s->list, &l->list); - mutex_unlock(&l->mutex); + struct btf_kfunc_set_tab *tab; + struct btf_id_set *set; + u32 set_cnt; + int ret; + + if (hook >= BTF_KFUNC_HOOK_MAX || type >= BTF_KFUNC_TYPE_MAX) { + ret = -EINVAL; + goto end; + } + + if (!add_set->cnt) + return 0; + + tab = btf->kfunc_set_tab; + if (!tab) { + tab = kzalloc(sizeof(*tab), GFP_KERNEL | __GFP_NOWARN); + if (!tab) + return -ENOMEM; + btf->kfunc_set_tab = tab; + } + + set = tab->sets[hook][type]; + /* Warn when register_btf_kfunc_id_set is called twice for the same hook + * for module sets. + */ + if (WARN_ON_ONCE(set && !vmlinux_set)) { + ret = -EINVAL; + goto end; + } + + /* We don't need to allocate, concatenate, and sort module sets, because + * only one is allowed per hook. Hence, we can directly assign the + * pointer and return. + */ + if (!vmlinux_set) { + tab->sets[hook][type] = add_set; + return 0; + } + + /* In case of vmlinux sets, there may be more than one set being + * registered per hook. To create a unified set, we allocate a new set + * and concatenate all individual sets being registered. While each set + * is individually sorted, they may become unsorted when concatenated, + * hence re-sorting the final set again is required to make binary + * searching the set using btf_id_set_contains function work. + */ + set_cnt = set ? set->cnt : 0; + + if (set_cnt > U32_MAX - add_set->cnt) { + ret = -EOVERFLOW; + goto end; + } + + if (set_cnt + add_set->cnt > BTF_KFUNC_SET_MAX_CNT) { + ret = -E2BIG; + goto end; + } + + /* Grow set */ + set = krealloc(tab->sets[hook][type], + offsetof(struct btf_id_set, ids[set_cnt + add_set->cnt]), + GFP_KERNEL | __GFP_NOWARN); + if (!set) { + ret = -ENOMEM; + goto end; + } + + /* For newly allocated set, initialize set->cnt to 0 */ + if (!tab->sets[hook][type]) + set->cnt = 0; + tab->sets[hook][type] = set; + + /* Concatenate the two sets */ + memcpy(set->ids + set->cnt, add_set->ids, add_set->cnt * sizeof(set->ids[0])); + set->cnt += add_set->cnt; + + sort(set->ids, set->cnt, sizeof(set->ids[0]), btf_id_cmp_func, NULL); + + return 0; +end: + btf_free_kfunc_set_tab(btf); + return ret; } -EXPORT_SYMBOL_GPL(register_kfunc_btf_id_set); -void unregister_kfunc_btf_id_set(struct kfunc_btf_id_list *l, - struct kfunc_btf_id_set *s) +static int btf_populate_kfunc_set(struct btf *btf, enum btf_kfunc_hook hook, + const struct btf_kfunc_id_set *kset) { - mutex_lock(&l->mutex); - list_del_init(&s->list); - mutex_unlock(&l->mutex); + bool vmlinux_set = !btf_is_module(btf); + int type, ret; + + for (type = 0; type < ARRAY_SIZE(kset->sets); type++) { + if (!kset->sets[type]) + continue; + + ret = __btf_populate_kfunc_set(btf, hook, type, kset->sets[type], vmlinux_set); + if (ret) + break; + } + return ret; } -EXPORT_SYMBOL_GPL(unregister_kfunc_btf_id_set); -bool bpf_check_mod_kfunc_call(struct kfunc_btf_id_list *klist, u32 kfunc_id, - struct module *owner) +static bool __btf_kfunc_id_set_contains(const struct btf *btf, + enum btf_kfunc_hook hook, + enum btf_kfunc_type type, + u32 kfunc_btf_id) { - struct kfunc_btf_id_set *s; + struct btf_id_set *set; - mutex_lock(&klist->mutex); - list_for_each_entry(s, &klist->list, list) { - if (s->owner == owner && btf_id_set_contains(s->set, kfunc_id)) { - mutex_unlock(&klist->mutex); - return true; - } + if (hook >= BTF_KFUNC_HOOK_MAX || type >= BTF_KFUNC_TYPE_MAX) + return false; + if (!btf->kfunc_set_tab) + return false; + set = btf->kfunc_set_tab->sets[hook][type]; + if (!set) + return false; + return btf_id_set_contains(set, kfunc_btf_id); +} + +static int bpf_prog_type_to_kfunc_hook(enum bpf_prog_type prog_type) +{ + switch (prog_type) { + case BPF_PROG_TYPE_XDP: + return BTF_KFUNC_HOOK_XDP; + case BPF_PROG_TYPE_SCHED_CLS: + return BTF_KFUNC_HOOK_TC; + case BPF_PROG_TYPE_STRUCT_OPS: + return BTF_KFUNC_HOOK_STRUCT_OPS; + default: + return BTF_KFUNC_HOOK_MAX; } - mutex_unlock(&klist->mutex); - return false; } -#define DEFINE_KFUNC_BTF_ID_LIST(name) \ - struct kfunc_btf_id_list name = { LIST_HEAD_INIT(name.list), \ - __MUTEX_INITIALIZER(name.mutex) }; \ - EXPORT_SYMBOL_GPL(name) +/* Caution: + * Reference to the module (obtained using btf_try_get_module) corresponding to + * the struct btf *MUST* be held when calling this function from verifier + * context. This is usually true as we stash references in prog's kfunc_btf_tab; + * keeping the reference for the duration of the call provides the necessary + * protection for looking up a well-formed btf->kfunc_set_tab. + */ +bool btf_kfunc_id_set_contains(const struct btf *btf, + enum bpf_prog_type prog_type, + enum btf_kfunc_type type, u32 kfunc_btf_id) +{ + enum btf_kfunc_hook hook; -DEFINE_KFUNC_BTF_ID_LIST(bpf_tcp_ca_kfunc_list); -DEFINE_KFUNC_BTF_ID_LIST(prog_test_kfunc_list); + hook = bpf_prog_type_to_kfunc_hook(prog_type); + return __btf_kfunc_id_set_contains(btf, hook, type, kfunc_btf_id); +} -#endif +/* This function must be invoked only from initcalls/module init functions */ +int register_btf_kfunc_id_set(enum bpf_prog_type prog_type, + const struct btf_kfunc_id_set *kset) +{ + enum btf_kfunc_hook hook; + struct btf *btf; + int ret; + + btf = btf_get_module_btf(kset->owner); + if (IS_ERR_OR_NULL(btf)) + return btf ? PTR_ERR(btf) : -ENOENT; + + hook = bpf_prog_type_to_kfunc_hook(prog_type); + ret = btf_populate_kfunc_set(btf, hook, kset); + /* reference is only taken for module BTF */ + if (btf_is_module(btf)) + btf_put(btf); + return ret; +} +EXPORT_SYMBOL_GPL(register_btf_kfunc_id_set); int bpf_core_types_are_compat(const struct btf *local_btf, __u32 local_id, const struct btf *targ_btf, __u32 targ_id) diff --git a/kernel/bpf/cgroup.c b/kernel/bpf/cgroup.c index 514b4681a90a..279ebbed75a5 100644 --- a/kernel/bpf/cgroup.c +++ b/kernel/bpf/cgroup.c @@ -1044,7 +1044,7 @@ int cgroup_bpf_prog_query(const union bpf_attr *attr, * NET_XMIT_DROP (1) - drop packet and notify TCP to call cwr * NET_XMIT_CN (2) - continue with packet output and notify TCP * to call cwr - * -EPERM - drop packet + * -err - drop packet * * For ingress packets, this function will return -EPERM if any * attached program was found and if it returned != 1 during execution. @@ -1079,8 +1079,9 @@ int __cgroup_bpf_run_filter_skb(struct sock *sk, cgrp->bpf.effective[atype], skb, __bpf_prog_run_save_cb); } else { ret = BPF_PROG_RUN_ARRAY_CG(cgrp->bpf.effective[atype], skb, - __bpf_prog_run_save_cb); - ret = (ret == 1 ? 0 : -EPERM); + __bpf_prog_run_save_cb, 0); + if (ret && !IS_ERR_VALUE((long)ret)) + ret = -EFAULT; } bpf_restore_data_end(skb, saved_data_end); __skb_pull(skb, offset); @@ -1107,10 +1108,9 @@ int __cgroup_bpf_run_filter_sk(struct sock *sk, enum cgroup_bpf_attach_type atype) { struct cgroup *cgrp = sock_cgroup_ptr(&sk->sk_cgrp_data); - int ret; - ret = BPF_PROG_RUN_ARRAY_CG(cgrp->bpf.effective[atype], sk, bpf_prog_run); - return ret == 1 ? 0 : -EPERM; + return BPF_PROG_RUN_ARRAY_CG(cgrp->bpf.effective[atype], sk, + bpf_prog_run, 0); } EXPORT_SYMBOL(__cgroup_bpf_run_filter_sk); @@ -1142,7 +1142,6 @@ int __cgroup_bpf_run_filter_sock_addr(struct sock *sk, }; struct sockaddr_storage unspec; struct cgroup *cgrp; - int ret; /* Check socket family since not all sockets represent network * endpoint (e.g. AF_UNIX). @@ -1156,10 +1155,8 @@ int __cgroup_bpf_run_filter_sock_addr(struct sock *sk, } cgrp = sock_cgroup_ptr(&sk->sk_cgrp_data); - ret = BPF_PROG_RUN_ARRAY_CG_FLAGS(cgrp->bpf.effective[atype], &ctx, - bpf_prog_run, flags); - - return ret == 1 ? 0 : -EPERM; + return BPF_PROG_RUN_ARRAY_CG_FLAGS(cgrp->bpf.effective[atype], &ctx, + bpf_prog_run, 0, flags); } EXPORT_SYMBOL(__cgroup_bpf_run_filter_sock_addr); @@ -1184,11 +1181,9 @@ int __cgroup_bpf_run_filter_sock_ops(struct sock *sk, enum cgroup_bpf_attach_type atype) { struct cgroup *cgrp = sock_cgroup_ptr(&sk->sk_cgrp_data); - int ret; - ret = BPF_PROG_RUN_ARRAY_CG(cgrp->bpf.effective[atype], sock_ops, - bpf_prog_run); - return ret == 1 ? 0 : -EPERM; + return BPF_PROG_RUN_ARRAY_CG(cgrp->bpf.effective[atype], sock_ops, + bpf_prog_run, 0); } EXPORT_SYMBOL(__cgroup_bpf_run_filter_sock_ops); @@ -1201,17 +1196,47 @@ int __cgroup_bpf_check_dev_permission(short dev_type, u32 major, u32 minor, .major = major, .minor = minor, }; - int allow; + int ret; rcu_read_lock(); cgrp = task_dfl_cgroup(current); - allow = BPF_PROG_RUN_ARRAY_CG(cgrp->bpf.effective[atype], &ctx, - bpf_prog_run); + ret = BPF_PROG_RUN_ARRAY_CG(cgrp->bpf.effective[atype], &ctx, + bpf_prog_run, 0); rcu_read_unlock(); - return !allow; + return ret; } +BPF_CALL_0(bpf_get_retval) +{ + struct bpf_cg_run_ctx *ctx = + container_of(current->bpf_ctx, struct bpf_cg_run_ctx, run_ctx); + + return ctx->retval; +} + +static const struct bpf_func_proto bpf_get_retval_proto = { + .func = bpf_get_retval, + .gpl_only = false, + .ret_type = RET_INTEGER, +}; + +BPF_CALL_1(bpf_set_retval, int, retval) +{ + struct bpf_cg_run_ctx *ctx = + container_of(current->bpf_ctx, struct bpf_cg_run_ctx, run_ctx); + + ctx->retval = retval; + return 0; +} + +static const struct bpf_func_proto bpf_set_retval_proto = { + .func = bpf_set_retval, + .gpl_only = false, + .ret_type = RET_INTEGER, + .arg1_type = ARG_ANYTHING, +}; + static const struct bpf_func_proto * cgroup_base_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog) { @@ -1224,6 +1249,10 @@ cgroup_base_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog) return &bpf_get_current_cgroup_id_proto; case BPF_FUNC_perf_event_output: return &bpf_event_output_data_proto; + case BPF_FUNC_get_retval: + return &bpf_get_retval_proto; + case BPF_FUNC_set_retval: + return &bpf_set_retval_proto; default: return bpf_base_func_proto(func_id); } @@ -1337,7 +1366,8 @@ int __cgroup_bpf_run_filter_sysctl(struct ctl_table_header *head, rcu_read_lock(); cgrp = task_dfl_cgroup(current); - ret = BPF_PROG_RUN_ARRAY_CG(cgrp->bpf.effective[atype], &ctx, bpf_prog_run); + ret = BPF_PROG_RUN_ARRAY_CG(cgrp->bpf.effective[atype], &ctx, + bpf_prog_run, 0); rcu_read_unlock(); kfree(ctx.cur_val); @@ -1350,7 +1380,7 @@ int __cgroup_bpf_run_filter_sysctl(struct ctl_table_header *head, kfree(ctx.new_val); } - return ret == 1 ? 0 : -EPERM; + return ret; } #ifdef CONFIG_NET @@ -1452,13 +1482,11 @@ int __cgroup_bpf_run_filter_setsockopt(struct sock *sk, int *level, lock_sock(sk); ret = BPF_PROG_RUN_ARRAY_CG(cgrp->bpf.effective[CGROUP_SETSOCKOPT], - &ctx, bpf_prog_run); + &ctx, bpf_prog_run, 0); release_sock(sk); - if (!ret) { - ret = -EPERM; + if (ret) goto out; - } if (ctx.optlen == -1) { /* optlen set to -1, bypass kernel */ @@ -1518,7 +1546,7 @@ int __cgroup_bpf_run_filter_getsockopt(struct sock *sk, int level, .sk = sk, .level = level, .optname = optname, - .retval = retval, + .current_task = current, }; int ret; @@ -1562,27 +1590,17 @@ int __cgroup_bpf_run_filter_getsockopt(struct sock *sk, int level, lock_sock(sk); ret = BPF_PROG_RUN_ARRAY_CG(cgrp->bpf.effective[CGROUP_GETSOCKOPT], - &ctx, bpf_prog_run); + &ctx, bpf_prog_run, retval); release_sock(sk); - if (!ret) { - ret = -EPERM; + if (ret < 0) goto out; - } if (ctx.optlen > max_optlen || ctx.optlen < 0) { ret = -EFAULT; goto out; } - /* BPF programs only allowed to set retval to 0, not some - * arbitrary value. - */ - if (ctx.retval != 0 && ctx.retval != retval) { - ret = -EFAULT; - goto out; - } - if (ctx.optlen != 0) { if (copy_to_user(optval, ctx.optval, ctx.optlen) || put_user(ctx.optlen, optlen)) { @@ -1591,8 +1609,6 @@ int __cgroup_bpf_run_filter_getsockopt(struct sock *sk, int level, } } - ret = ctx.retval; - out: sockopt_free_buf(&ctx, &buf); return ret; @@ -1607,10 +1623,10 @@ int __cgroup_bpf_run_filter_getsockopt_kern(struct sock *sk, int level, .sk = sk, .level = level, .optname = optname, - .retval = retval, .optlen = *optlen, .optval = optval, .optval_end = optval + *optlen, + .current_task = current, }; int ret; @@ -1623,25 +1639,19 @@ int __cgroup_bpf_run_filter_getsockopt_kern(struct sock *sk, int level, */ ret = BPF_PROG_RUN_ARRAY_CG(cgrp->bpf.effective[CGROUP_GETSOCKOPT], - &ctx, bpf_prog_run); - if (!ret) - return -EPERM; + &ctx, bpf_prog_run, retval); + if (ret < 0) + return ret; if (ctx.optlen > *optlen) return -EFAULT; - /* BPF programs only allowed to set retval to 0, not some - * arbitrary value. - */ - if (ctx.retval != 0 && ctx.retval != retval) - return -EFAULT; - /* BPF programs can shrink the buffer, export the modifications. */ if (ctx.optlen != 0) *optlen = ctx.optlen; - return ctx.retval; + return ret; } #endif @@ -2057,10 +2067,39 @@ static u32 cg_sockopt_convert_ctx_access(enum bpf_access_type type, *insn++ = CG_SOCKOPT_ACCESS_FIELD(BPF_LDX_MEM, optlen); break; case offsetof(struct bpf_sockopt, retval): - if (type == BPF_WRITE) - *insn++ = CG_SOCKOPT_ACCESS_FIELD(BPF_STX_MEM, retval); - else - *insn++ = CG_SOCKOPT_ACCESS_FIELD(BPF_LDX_MEM, retval); + BUILD_BUG_ON(offsetof(struct bpf_cg_run_ctx, run_ctx) != 0); + + if (type == BPF_WRITE) { + int treg = BPF_REG_9; + + if (si->src_reg == treg || si->dst_reg == treg) + --treg; + if (si->src_reg == treg || si->dst_reg == treg) + --treg; + *insn++ = BPF_STX_MEM(BPF_DW, si->dst_reg, treg, + offsetof(struct bpf_sockopt_kern, tmp_reg)); + *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct bpf_sockopt_kern, current_task), + treg, si->dst_reg, + offsetof(struct bpf_sockopt_kern, current_task)); + *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct task_struct, bpf_ctx), + treg, treg, + offsetof(struct task_struct, bpf_ctx)); + *insn++ = BPF_STX_MEM(BPF_FIELD_SIZEOF(struct bpf_cg_run_ctx, retval), + treg, si->src_reg, + offsetof(struct bpf_cg_run_ctx, retval)); + *insn++ = BPF_LDX_MEM(BPF_DW, treg, si->dst_reg, + offsetof(struct bpf_sockopt_kern, tmp_reg)); + } else { + *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct bpf_sockopt_kern, current_task), + si->dst_reg, si->src_reg, + offsetof(struct bpf_sockopt_kern, current_task)); + *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct task_struct, bpf_ctx), + si->dst_reg, si->dst_reg, + offsetof(struct task_struct, bpf_ctx)); + *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct bpf_cg_run_ctx, retval), + si->dst_reg, si->dst_reg, + offsetof(struct bpf_cg_run_ctx, retval)); + } break; case offsetof(struct bpf_sockopt, optval): *insn++ = CG_SOCKOPT_ACCESS_FIELD(BPF_LDX_MEM, optval); diff --git a/kernel/bpf/core.c b/kernel/bpf/core.c index de3e5bc6781f..0a1cfd8544b9 100644 --- a/kernel/bpf/core.c +++ b/kernel/bpf/core.c @@ -1829,28 +1829,30 @@ static unsigned int __bpf_prog_ret0_warn(const void *ctx, } #endif -bool bpf_prog_array_compatible(struct bpf_array *array, - const struct bpf_prog *fp) +bool bpf_prog_map_compatible(struct bpf_map *map, + const struct bpf_prog *fp) { bool ret; if (fp->kprobe_override) return false; - spin_lock(&array->aux->owner.lock); - - if (!array->aux->owner.type) { + spin_lock(&map->owner.lock); + if (!map->owner.type) { /* There's no owner yet where we could check for * compatibility. */ - array->aux->owner.type = fp->type; - array->aux->owner.jited = fp->jited; + map->owner.type = fp->type; + map->owner.jited = fp->jited; + map->owner.xdp_has_frags = fp->aux->xdp_has_frags; ret = true; } else { - ret = array->aux->owner.type == fp->type && - array->aux->owner.jited == fp->jited; + ret = map->owner.type == fp->type && + map->owner.jited == fp->jited && + map->owner.xdp_has_frags == fp->aux->xdp_has_frags; } - spin_unlock(&array->aux->owner.lock); + spin_unlock(&map->owner.lock); + return ret; } @@ -1862,13 +1864,11 @@ static int bpf_check_tail_call(const struct bpf_prog *fp) mutex_lock(&aux->used_maps_mutex); for (i = 0; i < aux->used_map_cnt; i++) { struct bpf_map *map = aux->used_maps[i]; - struct bpf_array *array; - if (map->map_type != BPF_MAP_TYPE_PROG_ARRAY) + if (!map_type_contains_progs(map)) continue; - array = container_of(map, struct bpf_array, map); - if (!bpf_prog_array_compatible(array, fp)) { + if (!bpf_prog_map_compatible(map, fp)) { ret = -EINVAL; goto out; } diff --git a/kernel/bpf/cpumap.c b/kernel/bpf/cpumap.c index b3e6b9422238..650e5d21f90d 100644 --- a/kernel/bpf/cpumap.c +++ b/kernel/bpf/cpumap.c @@ -397,7 +397,8 @@ static int cpu_map_kthread_run(void *data) return 0; } -static int __cpu_map_load_bpf_program(struct bpf_cpu_map_entry *rcpu, int fd) +static int __cpu_map_load_bpf_program(struct bpf_cpu_map_entry *rcpu, + struct bpf_map *map, int fd) { struct bpf_prog *prog; @@ -405,7 +406,8 @@ static int __cpu_map_load_bpf_program(struct bpf_cpu_map_entry *rcpu, int fd) if (IS_ERR(prog)) return PTR_ERR(prog); - if (prog->expected_attach_type != BPF_XDP_CPUMAP) { + if (prog->expected_attach_type != BPF_XDP_CPUMAP || + !bpf_prog_map_compatible(map, prog)) { bpf_prog_put(prog); return -EINVAL; } @@ -457,7 +459,7 @@ __cpu_map_entry_alloc(struct bpf_map *map, struct bpf_cpumap_val *value, rcpu->map_id = map->id; rcpu->value.qsize = value->qsize; - if (fd > 0 && __cpu_map_load_bpf_program(rcpu, fd)) + if (fd > 0 && __cpu_map_load_bpf_program(rcpu, map, fd)) goto free_ptr_ring; /* Setup kthread */ diff --git a/kernel/bpf/devmap.c b/kernel/bpf/devmap.c index fe019dbdb3f0..038f6d7a83e4 100644 --- a/kernel/bpf/devmap.c +++ b/kernel/bpf/devmap.c @@ -858,7 +858,8 @@ static struct bpf_dtab_netdev *__dev_map_alloc_node(struct net *net, BPF_PROG_TYPE_XDP, false); if (IS_ERR(prog)) goto err_put_dev; - if (prog->expected_attach_type != BPF_XDP_DEVMAP) + if (prog->expected_attach_type != BPF_XDP_DEVMAP || + !bpf_prog_map_compatible(&dtab->map, prog)) goto err_put_prog; } diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c index fa4505f9b611..72ce1edde950 100644 --- a/kernel/bpf/syscall.c +++ b/kernel/bpf/syscall.c @@ -556,16 +556,14 @@ static unsigned long bpf_map_memory_footprint(const struct bpf_map *map) static void bpf_map_show_fdinfo(struct seq_file *m, struct file *filp) { - const struct bpf_map *map = filp->private_data; - const struct bpf_array *array; + struct bpf_map *map = filp->private_data; u32 type = 0, jited = 0; - if (map->map_type == BPF_MAP_TYPE_PROG_ARRAY) { - array = container_of(map, struct bpf_array, map); - spin_lock(&array->aux->owner.lock); - type = array->aux->owner.type; - jited = array->aux->owner.jited; - spin_unlock(&array->aux->owner.lock); + if (map_type_contains_progs(map)) { + spin_lock(&map->owner.lock); + type = map->owner.type; + jited = map->owner.jited; + spin_unlock(&map->owner.lock); } seq_printf(m, @@ -874,6 +872,7 @@ static int map_create(union bpf_attr *attr) atomic64_set(&map->refcnt, 1); atomic64_set(&map->usercnt, 1); mutex_init(&map->freeze_mutex); + spin_lock_init(&map->owner.lock); map->spin_lock_off = -EINVAL; map->timer_off = -EINVAL; @@ -2217,7 +2216,8 @@ static int bpf_prog_load(union bpf_attr *attr, bpfptr_t uattr) BPF_F_ANY_ALIGNMENT | BPF_F_TEST_STATE_FREQ | BPF_F_SLEEPABLE | - BPF_F_TEST_RND_HI32)) + BPF_F_TEST_RND_HI32 | + BPF_F_XDP_HAS_FRAGS)) return -EINVAL; if (!IS_ENABLED(CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS) && @@ -2303,6 +2303,7 @@ static int bpf_prog_load(union bpf_attr *attr, bpfptr_t uattr) prog->aux->dst_prog = dst_prog; prog->aux->offload_requested = !!attr->prog_ifindex; prog->aux->sleepable = attr->prog_flags & BPF_F_SLEEPABLE; + prog->aux->xdp_has_frags = attr->prog_flags & BPF_F_XDP_HAS_FRAGS; err = security_bpf_prog_alloc(prog->aux); if (err) @@ -3318,6 +3319,11 @@ static int bpf_prog_query(const union bpf_attr *attr, case BPF_FLOW_DISSECTOR: case BPF_SK_LOOKUP: return netns_bpf_prog_query(attr, uattr); + case BPF_SK_SKB_STREAM_PARSER: + case BPF_SK_SKB_STREAM_VERDICT: + case BPF_SK_MSG_VERDICT: + case BPF_SK_SKB_VERDICT: + return sock_map_bpf_prog_query(attr, uattr); default: return -EINVAL; } diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index a39eedecc93a..dcf065ec2774 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -452,7 +452,8 @@ static bool reg_type_may_be_refcounted_or_null(enum bpf_reg_type type) { return base_type(type) == PTR_TO_SOCKET || base_type(type) == PTR_TO_TCP_SOCK || - base_type(type) == PTR_TO_MEM; + base_type(type) == PTR_TO_MEM || + base_type(type) == PTR_TO_BTF_ID; } static bool type_is_rdonly_mem(u32 type) @@ -1743,7 +1744,7 @@ find_kfunc_desc(const struct bpf_prog *prog, u32 func_id, u16 offset) } static struct btf *__find_kfunc_desc_btf(struct bpf_verifier_env *env, - s16 offset, struct module **btf_modp) + s16 offset) { struct bpf_kfunc_btf kf_btf = { .offset = offset }; struct bpf_kfunc_btf_tab *tab; @@ -1797,8 +1798,6 @@ static struct btf *__find_kfunc_desc_btf(struct bpf_verifier_env *env, sort(tab->descs, tab->nr_descs, sizeof(tab->descs[0]), kfunc_btf_cmp_by_off, NULL); } - if (btf_modp) - *btf_modp = b->module; return b->btf; } @@ -1815,8 +1814,7 @@ void bpf_free_kfunc_btf_tab(struct bpf_kfunc_btf_tab *tab) } static struct btf *find_kfunc_desc_btf(struct bpf_verifier_env *env, - u32 func_id, s16 offset, - struct module **btf_modp) + u32 func_id, s16 offset) { if (offset) { if (offset < 0) { @@ -1827,7 +1825,7 @@ static struct btf *find_kfunc_desc_btf(struct bpf_verifier_env *env, return ERR_PTR(-EINVAL); } - return __find_kfunc_desc_btf(env, offset, btf_modp); + return __find_kfunc_desc_btf(env, offset); } return btf_vmlinux ?: ERR_PTR(-ENOENT); } @@ -1890,7 +1888,7 @@ static int add_kfunc_call(struct bpf_verifier_env *env, u32 func_id, s16 offset) prog_aux->kfunc_btf_tab = btf_tab; } - desc_btf = find_kfunc_desc_btf(env, func_id, offset, NULL); + desc_btf = find_kfunc_desc_btf(env, func_id, offset); if (IS_ERR(desc_btf)) { verbose(env, "failed to find BTF for kernel function\n"); return PTR_ERR(desc_btf); @@ -2351,7 +2349,7 @@ static const char *disasm_kfunc_name(void *data, const struct bpf_insn *insn) if (insn->src_reg != BPF_PSEUDO_KFUNC_CALL) return NULL; - desc_btf = find_kfunc_desc_btf(data, insn->imm, insn->off, NULL); + desc_btf = find_kfunc_desc_btf(data, insn->imm, insn->off); if (IS_ERR(desc_btf)) return "<error>"; @@ -3498,11 +3496,6 @@ static int check_map_access(struct bpf_verifier_env *env, u32 regno, #define MAX_PACKET_OFF 0xffff -static enum bpf_prog_type resolve_prog_type(struct bpf_prog *prog) -{ - return prog->aux->dst_prog ? prog->aux->dst_prog->type : prog->type; -} - static bool may_access_direct_pkt_data(struct bpf_verifier_env *env, const struct bpf_call_arg_meta *meta, enum bpf_access_type t) @@ -4877,6 +4870,62 @@ static int check_helper_mem_access(struct bpf_verifier_env *env, int regno, } } +static int check_mem_size_reg(struct bpf_verifier_env *env, + struct bpf_reg_state *reg, u32 regno, + bool zero_size_allowed, + struct bpf_call_arg_meta *meta) +{ + int err; + + /* This is used to refine r0 return value bounds for helpers + * that enforce this value as an upper bound on return values. + * See do_refine_retval_range() for helpers that can refine + * the return value. C type of helper is u32 so we pull register + * bound from umax_value however, if negative verifier errors + * out. Only upper bounds can be learned because retval is an + * int type and negative retvals are allowed. + */ + if (meta) + meta->msize_max_value = reg->umax_value; + + /* The register is SCALAR_VALUE; the access check + * happens using its boundaries. + */ + if (!tnum_is_const(reg->var_off)) + /* For unprivileged variable accesses, disable raw + * mode so that the program is required to + * initialize all the memory that the helper could + * just partially fill up. + */ + meta = NULL; + + if (reg->smin_value < 0) { + verbose(env, "R%d min value is negative, either use unsigned or 'var &= const'\n", + regno); + return -EACCES; + } + + if (reg->umin_value == 0) { + err = check_helper_mem_access(env, regno - 1, 0, + zero_size_allowed, + meta); + if (err) + return err; + } + + if (reg->umax_value >= BPF_MAX_VAR_SIZ) { + verbose(env, "R%d unbounded memory access, use 'var &= const' or 'if (var < const)'\n", + regno); + return -EACCES; + } + err = check_helper_mem_access(env, regno - 1, + reg->umax_value, + zero_size_allowed, meta); + if (!err) + err = mark_chain_precision(env, regno); + return err; +} + int check_mem_reg(struct bpf_verifier_env *env, struct bpf_reg_state *reg, u32 regno, u32 mem_size) { @@ -4900,6 +4949,28 @@ int check_mem_reg(struct bpf_verifier_env *env, struct bpf_reg_state *reg, return check_helper_mem_access(env, regno, mem_size, true, NULL); } +int check_kfunc_mem_size_reg(struct bpf_verifier_env *env, struct bpf_reg_state *reg, + u32 regno) +{ + struct bpf_reg_state *mem_reg = &cur_regs(env)[regno - 1]; + bool may_be_null = type_may_be_null(mem_reg->type); + struct bpf_reg_state saved_reg; + int err; + + WARN_ON_ONCE(regno < BPF_REG_2 || regno > BPF_REG_5); + + if (may_be_null) { + saved_reg = *mem_reg; + mark_ptr_not_null_reg(mem_reg); + } + + err = check_mem_size_reg(env, reg, regno, true, NULL); + + if (may_be_null) + *mem_reg = saved_reg; + return err; +} + /* Implementation details: * bpf_map_lookup returns PTR_TO_MAP_VALUE_OR_NULL * Two bpf_map_lookups (even with the same key) will have different reg->id. @@ -5439,51 +5510,7 @@ skip_type_check: } else if (arg_type_is_mem_size(arg_type)) { bool zero_size_allowed = (arg_type == ARG_CONST_SIZE_OR_ZERO); - /* This is used to refine r0 return value bounds for helpers - * that enforce this value as an upper bound on return values. - * See do_refine_retval_range() for helpers that can refine - * the return value. C type of helper is u32 so we pull register - * bound from umax_value however, if negative verifier errors - * out. Only upper bounds can be learned because retval is an - * int type and negative retvals are allowed. - */ - meta->msize_max_value = reg->umax_value; - - /* The register is SCALAR_VALUE; the access check - * happens using its boundaries. - */ - if (!tnum_is_const(reg->var_off)) - /* For unprivileged variable accesses, disable raw - * mode so that the program is required to - * initialize all the memory that the helper could - * just partially fill up. - */ - meta = NULL; - - if (reg->smin_value < 0) { - verbose(env, "R%d min value is negative, either use unsigned or 'var &= const'\n", - regno); - return -EACCES; - } - - if (reg->umin_value == 0) { - err = check_helper_mem_access(env, regno - 1, 0, - zero_size_allowed, - meta); - if (err) - return err; - } - - if (reg->umax_value >= BPF_MAX_VAR_SIZ) { - verbose(env, "R%d unbounded memory access, use 'var &= const' or 'if (var < const)'\n", - regno); - return -EACCES; - } - err = check_helper_mem_access(env, regno - 1, - reg->umax_value, - zero_size_allowed, meta); - if (!err) - err = mark_chain_precision(env, regno); + err = check_mem_size_reg(env, reg, regno, zero_size_allowed, meta); } else if (arg_type_is_alloc_size(arg_type)) { if (!tnum_is_const(reg->var_off)) { verbose(env, "R%d is not a known constant'\n", @@ -6842,22 +6869,23 @@ static void mark_btf_func_reg_size(struct bpf_verifier_env *env, u32 regno, } } -static int check_kfunc_call(struct bpf_verifier_env *env, struct bpf_insn *insn) +static int check_kfunc_call(struct bpf_verifier_env *env, struct bpf_insn *insn, + int *insn_idx_p) { const struct btf_type *t, *func, *func_proto, *ptr_type; struct bpf_reg_state *regs = cur_regs(env); const char *func_name, *ptr_type_name; u32 i, nargs, func_id, ptr_type_id; - struct module *btf_mod = NULL; + int err, insn_idx = *insn_idx_p; const struct btf_param *args; struct btf *desc_btf; - int err; + bool acq; /* skip for now, but return error when we find this in fixup_kfunc_call */ if (!insn->imm) return 0; - desc_btf = find_kfunc_desc_btf(env, insn->imm, insn->off, &btf_mod); + desc_btf = find_kfunc_desc_btf(env, insn->imm, insn->off); if (IS_ERR(desc_btf)) return PTR_ERR(desc_btf); @@ -6866,23 +6894,43 @@ static int check_kfunc_call(struct bpf_verifier_env *env, struct bpf_insn *insn) func_name = btf_name_by_offset(desc_btf, func->name_off); func_proto = btf_type_by_id(desc_btf, func->type); - if (!env->ops->check_kfunc_call || - !env->ops->check_kfunc_call(func_id, btf_mod)) { + if (!btf_kfunc_id_set_contains(desc_btf, resolve_prog_type(env->prog), + BTF_KFUNC_TYPE_CHECK, func_id)) { verbose(env, "calling kernel function %s is not allowed\n", func_name); return -EACCES; } + acq = btf_kfunc_id_set_contains(desc_btf, resolve_prog_type(env->prog), + BTF_KFUNC_TYPE_ACQUIRE, func_id); + /* Check the arguments */ err = btf_check_kfunc_arg_match(env, desc_btf, func_id, regs); - if (err) + if (err < 0) return err; + /* In case of release function, we get register number of refcounted + * PTR_TO_BTF_ID back from btf_check_kfunc_arg_match, do the release now + */ + if (err) { + err = release_reference(env, regs[err].ref_obj_id); + if (err) { + verbose(env, "kfunc %s#%d reference has not been acquired before\n", + func_name, func_id); + return err; + } + } for (i = 0; i < CALLER_SAVED_REGS; i++) mark_reg_not_init(env, regs, caller_saved[i]); /* Check return type */ t = btf_type_skip_modifiers(desc_btf, func_proto->type, NULL); + + if (acq && !btf_type_is_ptr(t)) { + verbose(env, "acquire kernel function does not return PTR_TO_BTF_ID\n"); + return -EINVAL; + } + if (btf_type_is_scalar(t)) { mark_reg_unknown(env, regs, BPF_REG_0); mark_btf_func_reg_size(env, BPF_REG_0, t->size); @@ -6901,7 +6949,21 @@ static int check_kfunc_call(struct bpf_verifier_env *env, struct bpf_insn *insn) regs[BPF_REG_0].btf = desc_btf; regs[BPF_REG_0].type = PTR_TO_BTF_ID; regs[BPF_REG_0].btf_id = ptr_type_id; + if (btf_kfunc_id_set_contains(desc_btf, resolve_prog_type(env->prog), + BTF_KFUNC_TYPE_RET_NULL, func_id)) { + regs[BPF_REG_0].type |= PTR_MAYBE_NULL; + /* For mark_ptr_or_null_reg, see 93c230e3f5bd6 */ + regs[BPF_REG_0].id = ++env->id_gen; + } mark_btf_func_reg_size(env, BPF_REG_0, sizeof(void *)); + if (acq) { + int id = acquire_reference_state(env, insn_idx); + + if (id < 0) + return id; + regs[BPF_REG_0].id = id; + regs[BPF_REG_0].ref_obj_id = id; + } } /* else { add_kfunc_call() ensures it is btf_type_is_void(t) } */ nargs = btf_type_vlen(func_proto); @@ -11549,7 +11611,7 @@ static int do_check(struct bpf_verifier_env *env) if (insn->src_reg == BPF_PSEUDO_CALL) err = check_func_call(env, insn, &env->insn_idx); else if (insn->src_reg == BPF_PSEUDO_KFUNC_CALL) - err = check_kfunc_call(env, insn); + err = check_kfunc_call(env, insn, &env->insn_idx); else err = check_helper_call(env, insn, &env->insn_idx); if (err) diff --git a/kernel/trace/bpf_trace.c b/kernel/trace/bpf_trace.c index 21aa30644219..06a9e220069e 100644 --- a/kernel/trace/bpf_trace.c +++ b/kernel/trace/bpf_trace.c @@ -1562,6 +1562,7 @@ static const struct bpf_func_proto bpf_perf_event_output_proto_raw_tp = { extern const struct bpf_func_proto bpf_skb_output_proto; extern const struct bpf_func_proto bpf_xdp_output_proto; +extern const struct bpf_func_proto bpf_xdp_get_buff_len_trace_proto; BPF_CALL_3(bpf_get_stackid_raw_tp, struct bpf_raw_tracepoint_args *, args, struct bpf_map *, map, u64, flags) @@ -1661,6 +1662,8 @@ tracing_prog_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog) return &bpf_sock_from_file_proto; case BPF_FUNC_get_socket_cookie: return &bpf_get_socket_ptr_cookie_proto; + case BPF_FUNC_xdp_get_buff_len: + return &bpf_xdp_get_buff_len_trace_proto; #endif case BPF_FUNC_seq_printf: return prog->expected_attach_type == BPF_TRACE_ITER ? diff --git a/net/bpf/test_run.c b/net/bpf/test_run.c index 46dd95755967..65b52b4bd6e1 100644 --- a/net/bpf/test_run.c +++ b/net/bpf/test_run.c @@ -5,6 +5,7 @@ #include <linux/btf.h> #include <linux/btf_ids.h> #include <linux/slab.h> +#include <linux/init.h> #include <linux/vmalloc.h> #include <linux/etherdevice.h> #include <linux/filter.h> @@ -130,7 +131,8 @@ static int bpf_test_run(struct bpf_prog *prog, void *ctx, u32 repeat, static int bpf_test_finish(const union bpf_attr *kattr, union bpf_attr __user *uattr, const void *data, - u32 size, u32 retval, u32 duration) + struct skb_shared_info *sinfo, u32 size, + u32 retval, u32 duration) { void __user *data_out = u64_to_user_ptr(kattr->test.data_out); int err = -EFAULT; @@ -145,8 +147,36 @@ static int bpf_test_finish(const union bpf_attr *kattr, err = -ENOSPC; } - if (data_out && copy_to_user(data_out, data, copy_size)) - goto out; + if (data_out) { + int len = sinfo ? copy_size - sinfo->xdp_frags_size : copy_size; + + if (copy_to_user(data_out, data, len)) + goto out; + + if (sinfo) { + int i, offset = len, data_len; + + for (i = 0; i < sinfo->nr_frags; i++) { + skb_frag_t *frag = &sinfo->frags[i]; + + if (offset >= copy_size) { + err = -ENOSPC; + break; + } + + data_len = min_t(int, copy_size - offset, + skb_frag_size(frag)); + + if (copy_to_user(data_out + offset, + skb_frag_address(frag), + data_len)) + goto out; + + offset += data_len; + } + } + } + if (copy_to_user(&uattr->test.data_size_out, &size, sizeof(size))) goto out; if (copy_to_user(&uattr->test.retval, &retval, sizeof(retval))) @@ -171,6 +201,8 @@ int noinline bpf_fentry_test1(int a) { return a + 1; } +EXPORT_SYMBOL_GPL(bpf_fentry_test1); +ALLOW_ERROR_INJECTION(bpf_fentry_test1, ERRNO); int noinline bpf_fentry_test2(int a, u64 b) { @@ -232,28 +264,142 @@ struct sock * noinline bpf_kfunc_call_test3(struct sock *sk) return sk; } +struct prog_test_ref_kfunc { + int a; + int b; + struct prog_test_ref_kfunc *next; +}; + +static struct prog_test_ref_kfunc prog_test_struct = { + .a = 42, + .b = 108, + .next = &prog_test_struct, +}; + +noinline struct prog_test_ref_kfunc * +bpf_kfunc_call_test_acquire(unsigned long *scalar_ptr) +{ + /* randomly return NULL */ + if (get_jiffies_64() % 2) + return NULL; + return &prog_test_struct; +} + +noinline void bpf_kfunc_call_test_release(struct prog_test_ref_kfunc *p) +{ +} + +struct prog_test_pass1 { + int x0; + struct { + int x1; + struct { + int x2; + struct { + int x3; + }; + }; + }; +}; + +struct prog_test_pass2 { + int len; + short arr1[4]; + struct { + char arr2[4]; + unsigned long arr3[8]; + } x; +}; + +struct prog_test_fail1 { + void *p; + int x; +}; + +struct prog_test_fail2 { + int x8; + struct prog_test_pass1 x; +}; + +struct prog_test_fail3 { + int len; + char arr1[2]; + char arr2[]; +}; + +noinline void bpf_kfunc_call_test_pass_ctx(struct __sk_buff *skb) +{ +} + +noinline void bpf_kfunc_call_test_pass1(struct prog_test_pass1 *p) +{ +} + +noinline void bpf_kfunc_call_test_pass2(struct prog_test_pass2 *p) +{ +} + +noinline void bpf_kfunc_call_test_fail1(struct prog_test_fail1 *p) +{ +} + +noinline void bpf_kfunc_call_test_fail2(struct prog_test_fail2 *p) +{ +} + +noinline void bpf_kfunc_call_test_fail3(struct prog_test_fail3 *p) +{ +} + +noinline void bpf_kfunc_call_test_mem_len_pass1(void *mem, int mem__sz) +{ +} + +noinline void bpf_kfunc_call_test_mem_len_fail1(void *mem, int len) +{ +} + +noinline void bpf_kfunc_call_test_mem_len_fail2(u64 *mem, int len) +{ +} + __diag_pop(); ALLOW_ERROR_INJECTION(bpf_modify_return_test, ERRNO); -BTF_SET_START(test_sk_kfunc_ids) +BTF_SET_START(test_sk_check_kfunc_ids) BTF_ID(func, bpf_kfunc_call_test1) BTF_ID(func, bpf_kfunc_call_test2) BTF_ID(func, bpf_kfunc_call_test3) -BTF_SET_END(test_sk_kfunc_ids) - -bool bpf_prog_test_check_kfunc_call(u32 kfunc_id, struct module *owner) -{ - if (btf_id_set_contains(&test_sk_kfunc_ids, kfunc_id)) - return true; - return bpf_check_mod_kfunc_call(&prog_test_kfunc_list, kfunc_id, owner); -} - -static void *bpf_test_init(const union bpf_attr *kattr, u32 size, - u32 headroom, u32 tailroom) +BTF_ID(func, bpf_kfunc_call_test_acquire) +BTF_ID(func, bpf_kfunc_call_test_release) +BTF_ID(func, bpf_kfunc_call_test_pass_ctx) +BTF_ID(func, bpf_kfunc_call_test_pass1) +BTF_ID(func, bpf_kfunc_call_test_pass2) +BTF_ID(func, bpf_kfunc_call_test_fail1) +BTF_ID(func, bpf_kfunc_call_test_fail2) +BTF_ID(func, bpf_kfunc_call_test_fail3) +BTF_ID(func, bpf_kfunc_call_test_mem_len_pass1) +BTF_ID(func, bpf_kfunc_call_test_mem_len_fail1) +BTF_ID(func, bpf_kfunc_call_test_mem_len_fail2) +BTF_SET_END(test_sk_check_kfunc_ids) + +BTF_SET_START(test_sk_acquire_kfunc_ids) +BTF_ID(func, bpf_kfunc_call_test_acquire) +BTF_SET_END(test_sk_acquire_kfunc_ids) + +BTF_SET_START(test_sk_release_kfunc_ids) +BTF_ID(func, bpf_kfunc_call_test_release) +BTF_SET_END(test_sk_release_kfunc_ids) + +BTF_SET_START(test_sk_ret_null_kfunc_ids) +BTF_ID(func, bpf_kfunc_call_test_acquire) +BTF_SET_END(test_sk_ret_null_kfunc_ids) + +static void *bpf_test_init(const union bpf_attr *kattr, u32 user_size, + u32 size, u32 headroom, u32 tailroom) { void __user *data_in = u64_to_user_ptr(kattr->test.data_in); - u32 user_size = kattr->test.data_size_in; void *data; if (size < ETH_HLEN || size > PAGE_SIZE - headroom - tailroom) @@ -581,7 +727,8 @@ int bpf_prog_test_run_skb(struct bpf_prog *prog, const union bpf_attr *kattr, if (kattr->test.flags || kattr->test.cpu) return -EINVAL; - data = bpf_test_init(kattr, size, NET_SKB_PAD + NET_IP_ALIGN, + data = bpf_test_init(kattr, kattr->test.data_size_in, + size, NET_SKB_PAD + NET_IP_ALIGN, SKB_DATA_ALIGN(sizeof(struct skb_shared_info))); if (IS_ERR(data)) return PTR_ERR(data); @@ -683,7 +830,8 @@ int bpf_prog_test_run_skb(struct bpf_prog *prog, const union bpf_attr *kattr, /* bpf program can never convert linear skb to non-linear */ if (WARN_ON_ONCE(skb_is_nonlinear(skb))) size = skb_headlen(skb); - ret = bpf_test_finish(kattr, uattr, skb->data, size, retval, duration); + ret = bpf_test_finish(kattr, uattr, skb->data, NULL, size, retval, + duration); if (!ret) ret = bpf_ctx_finish(kattr, uattr, ctx, sizeof(struct __sk_buff)); @@ -758,16 +906,16 @@ int bpf_prog_test_run_xdp(struct bpf_prog *prog, const union bpf_attr *kattr, union bpf_attr __user *uattr) { u32 tailroom = SKB_DATA_ALIGN(sizeof(struct skb_shared_info)); - u32 headroom = XDP_PACKET_HEADROOM; u32 size = kattr->test.data_size_in; + u32 headroom = XDP_PACKET_HEADROOM; + u32 retval, duration, max_data_sz; u32 repeat = kattr->test.repeat; struct netdev_rx_queue *rxqueue; + struct skb_shared_info *sinfo; struct xdp_buff xdp = {}; - u32 retval, duration; + int i, ret = -EINVAL; struct xdp_md *ctx; - u32 max_data_sz; void *data; - int ret = -EINVAL; if (prog->expected_attach_type == BPF_XDP_DEVMAP || prog->expected_attach_type == BPF_XDP_CPUMAP) @@ -787,26 +935,60 @@ int bpf_prog_test_run_xdp(struct bpf_prog *prog, const union bpf_attr *kattr, headroom -= ctx->data; } - /* XDP have extra tailroom as (most) drivers use full page */ max_data_sz = 4096 - headroom - tailroom; + size = min_t(u32, size, max_data_sz); - data = bpf_test_init(kattr, max_data_sz, headroom, tailroom); + data = bpf_test_init(kattr, size, max_data_sz, headroom, tailroom); if (IS_ERR(data)) { ret = PTR_ERR(data); goto free_ctx; } rxqueue = __netif_get_rx_queue(current->nsproxy->net_ns->loopback_dev, 0); - xdp_init_buff(&xdp, headroom + max_data_sz + tailroom, - &rxqueue->xdp_rxq); + rxqueue->xdp_rxq.frag_size = headroom + max_data_sz + tailroom; + xdp_init_buff(&xdp, rxqueue->xdp_rxq.frag_size, &rxqueue->xdp_rxq); xdp_prepare_buff(&xdp, data, headroom, size, true); + sinfo = xdp_get_shared_info_from_buff(&xdp); ret = xdp_convert_md_to_buff(ctx, &xdp); if (ret) goto free_data; + if (unlikely(kattr->test.data_size_in > size)) { + void __user *data_in = u64_to_user_ptr(kattr->test.data_in); + + while (size < kattr->test.data_size_in) { + struct page *page; + skb_frag_t *frag; + int data_len; + + page = alloc_page(GFP_KERNEL); + if (!page) { + ret = -ENOMEM; + goto out; + } + + frag = &sinfo->frags[sinfo->nr_frags++]; + __skb_frag_set_page(frag, page); + + data_len = min_t(int, kattr->test.data_size_in - size, + PAGE_SIZE); + skb_frag_size_set(frag, data_len); + + if (copy_from_user(page_address(page), data_in + size, + data_len)) { + ret = -EFAULT; + goto out; + } + sinfo->xdp_frags_size += data_len; + size += data_len; + } + xdp_buff_set_frags_flag(&xdp); + } + if (repeat > 1) bpf_prog_change_xdp(NULL, prog); + ret = bpf_test_run(prog, &xdp, repeat, &retval, &duration, true); /* We convert the xdp_buff back to an xdp_md before checking the return * code so the reference count of any held netdevice will be decremented @@ -816,12 +998,9 @@ int bpf_prog_test_run_xdp(struct bpf_prog *prog, const union bpf_attr *kattr, if (ret) goto out; - if (xdp.data_meta != data + headroom || - xdp.data_end != xdp.data_meta + size) - size = xdp.data_end - xdp.data_meta; - - ret = bpf_test_finish(kattr, uattr, xdp.data_meta, size, retval, - duration); + size = xdp.data_end - xdp.data_meta + sinfo->xdp_frags_size; + ret = bpf_test_finish(kattr, uattr, xdp.data_meta, sinfo, size, + retval, duration); if (!ret) ret = bpf_ctx_finish(kattr, uattr, ctx, sizeof(struct xdp_md)); @@ -830,6 +1009,8 @@ out: if (repeat > 1) bpf_prog_change_xdp(prog, NULL); free_data: + for (i = 0; i < sinfo->nr_frags; i++) + __free_page(skb_frag_page(&sinfo->frags[i])); kfree(data); free_ctx: kfree(ctx); @@ -876,7 +1057,7 @@ int bpf_prog_test_run_flow_dissector(struct bpf_prog *prog, if (size < ETH_HLEN) return -EINVAL; - data = bpf_test_init(kattr, size, 0, 0); + data = bpf_test_init(kattr, kattr->test.data_size_in, size, 0, 0); if (IS_ERR(data)) return PTR_ERR(data); @@ -911,8 +1092,8 @@ int bpf_prog_test_run_flow_dissector(struct bpf_prog *prog, if (ret < 0) goto out; - ret = bpf_test_finish(kattr, uattr, &flow_keys, sizeof(flow_keys), - retval, duration); + ret = bpf_test_finish(kattr, uattr, &flow_keys, NULL, + sizeof(flow_keys), retval, duration); if (!ret) ret = bpf_ctx_finish(kattr, uattr, user_ctx, sizeof(struct bpf_flow_keys)); @@ -1016,7 +1197,7 @@ int bpf_prog_test_run_sk_lookup(struct bpf_prog *prog, const union bpf_attr *kat user_ctx->cookie = sock_gen_cookie(ctx.selected_sk); } - ret = bpf_test_finish(kattr, uattr, NULL, 0, retval, duration); + ret = bpf_test_finish(kattr, uattr, NULL, NULL, 0, retval, duration); if (!ret) ret = bpf_ctx_finish(kattr, uattr, user_ctx, sizeof(*user_ctx)); @@ -1067,3 +1248,17 @@ out: kfree(ctx); return err; } + +static const struct btf_kfunc_id_set bpf_prog_test_kfunc_set = { + .owner = THIS_MODULE, + .check_set = &test_sk_check_kfunc_ids, + .acquire_set = &test_sk_acquire_kfunc_ids, + .release_set = &test_sk_release_kfunc_ids, + .ret_null_set = &test_sk_ret_null_kfunc_ids, +}; + +static int __init bpf_prog_test_run_init(void) +{ + return register_btf_kfunc_id_set(BPF_PROG_TYPE_SCHED_CLS, &bpf_prog_test_kfunc_set); +} +late_initcall(bpf_prog_test_run_init); diff --git a/net/core/filter.c b/net/core/filter.c index 4603b7cd3cd1..a06931c27eeb 100644 --- a/net/core/filter.c +++ b/net/core/filter.c @@ -3783,6 +3783,28 @@ static const struct bpf_func_proto sk_skb_change_head_proto = { .arg2_type = ARG_ANYTHING, .arg3_type = ARG_ANYTHING, }; + +BPF_CALL_1(bpf_xdp_get_buff_len, struct xdp_buff*, xdp) +{ + return xdp_get_buff_len(xdp); +} + +static const struct bpf_func_proto bpf_xdp_get_buff_len_proto = { + .func = bpf_xdp_get_buff_len, + .gpl_only = false, + .ret_type = RET_INTEGER, + .arg1_type = ARG_PTR_TO_CTX, +}; + +BTF_ID_LIST_SINGLE(bpf_xdp_get_buff_len_bpf_ids, struct, xdp_buff) + +const struct bpf_func_proto bpf_xdp_get_buff_len_trace_proto = { + .func = bpf_xdp_get_buff_len, + .gpl_only = false, + .arg1_type = ARG_PTR_TO_BTF_ID, + .arg1_btf_id = &bpf_xdp_get_buff_len_bpf_ids[0], +}; + static unsigned long xdp_get_metalen(const struct xdp_buff *xdp) { return xdp_data_meta_unsupported(xdp) ? 0 : @@ -3817,11 +3839,208 @@ static const struct bpf_func_proto bpf_xdp_adjust_head_proto = { .arg2_type = ARG_ANYTHING, }; +static void bpf_xdp_copy_buf(struct xdp_buff *xdp, unsigned long off, + void *buf, unsigned long len, bool flush) +{ + unsigned long ptr_len, ptr_off = 0; + skb_frag_t *next_frag, *end_frag; + struct skb_shared_info *sinfo; + void *src, *dst; + u8 *ptr_buf; + + if (likely(xdp->data_end - xdp->data >= off + len)) { + src = flush ? buf : xdp->data + off; + dst = flush ? xdp->data + off : buf; + memcpy(dst, src, len); + return; + } + + sinfo = xdp_get_shared_info_from_buff(xdp); + end_frag = &sinfo->frags[sinfo->nr_frags]; + next_frag = &sinfo->frags[0]; + + ptr_len = xdp->data_end - xdp->data; + ptr_buf = xdp->data; + + while (true) { + if (off < ptr_off + ptr_len) { + unsigned long copy_off = off - ptr_off; + unsigned long copy_len = min(len, ptr_len - copy_off); + + src = flush ? buf : ptr_buf + copy_off; + dst = flush ? ptr_buf + copy_off : buf; + memcpy(dst, src, copy_len); + + off += copy_len; + len -= copy_len; + buf += copy_len; + } + + if (!len || next_frag == end_frag) + break; + + ptr_off += ptr_len; + ptr_buf = skb_frag_address(next_frag); + ptr_len = skb_frag_size(next_frag); + next_frag++; + } +} + +static void *bpf_xdp_pointer(struct xdp_buff *xdp, u32 offset, u32 len) +{ + struct skb_shared_info *sinfo = xdp_get_shared_info_from_buff(xdp); + u32 size = xdp->data_end - xdp->data; + void *addr = xdp->data; + int i; + + if (unlikely(offset > 0xffff || len > 0xffff)) + return ERR_PTR(-EFAULT); + + if (offset + len > xdp_get_buff_len(xdp)) + return ERR_PTR(-EINVAL); + + if (offset < size) /* linear area */ + goto out; + + offset -= size; + for (i = 0; i < sinfo->nr_frags; i++) { /* paged area */ + u32 frag_size = skb_frag_size(&sinfo->frags[i]); + + if (offset < frag_size) { + addr = skb_frag_address(&sinfo->frags[i]); + size = frag_size; + break; + } + offset -= frag_size; + } +out: + return offset + len < size ? addr + offset : NULL; +} + +BPF_CALL_4(bpf_xdp_load_bytes, struct xdp_buff *, xdp, u32, offset, + void *, buf, u32, len) +{ + void *ptr; + + ptr = bpf_xdp_pointer(xdp, offset, len); + if (IS_ERR(ptr)) + return PTR_ERR(ptr); + + if (!ptr) + bpf_xdp_copy_buf(xdp, offset, buf, len, false); + else + memcpy(buf, ptr, len); + + return 0; +} + +static const struct bpf_func_proto bpf_xdp_load_bytes_proto = { + .func = bpf_xdp_load_bytes, + .gpl_only = false, + .ret_type = RET_INTEGER, + .arg1_type = ARG_PTR_TO_CTX, + .arg2_type = ARG_ANYTHING, + .arg3_type = ARG_PTR_TO_UNINIT_MEM, + .arg4_type = ARG_CONST_SIZE, +}; + +BPF_CALL_4(bpf_xdp_store_bytes, struct xdp_buff *, xdp, u32, offset, + void *, buf, u32, len) +{ + void *ptr; + + ptr = bpf_xdp_pointer(xdp, offset, len); + if (IS_ERR(ptr)) + return PTR_ERR(ptr); + + if (!ptr) + bpf_xdp_copy_buf(xdp, offset, buf, len, true); + else + memcpy(ptr, buf, len); + + return 0; +} + +static const struct bpf_func_proto bpf_xdp_store_bytes_proto = { + .func = bpf_xdp_store_bytes, + .gpl_only = false, + .ret_type = RET_INTEGER, + .arg1_type = ARG_PTR_TO_CTX, + .arg2_type = ARG_ANYTHING, + .arg3_type = ARG_PTR_TO_UNINIT_MEM, + .arg4_type = ARG_CONST_SIZE, +}; + +static int bpf_xdp_frags_increase_tail(struct xdp_buff *xdp, int offset) +{ + struct skb_shared_info *sinfo = xdp_get_shared_info_from_buff(xdp); + skb_frag_t *frag = &sinfo->frags[sinfo->nr_frags - 1]; + struct xdp_rxq_info *rxq = xdp->rxq; + unsigned int tailroom; + + if (!rxq->frag_size || rxq->frag_size > xdp->frame_sz) + return -EOPNOTSUPP; + + tailroom = rxq->frag_size - skb_frag_size(frag) - skb_frag_off(frag); + if (unlikely(offset > tailroom)) + return -EINVAL; + + memset(skb_frag_address(frag) + skb_frag_size(frag), 0, offset); + skb_frag_size_add(frag, offset); + sinfo->xdp_frags_size += offset; + + return 0; +} + +static int bpf_xdp_frags_shrink_tail(struct xdp_buff *xdp, int offset) +{ + struct skb_shared_info *sinfo = xdp_get_shared_info_from_buff(xdp); + int i, n_frags_free = 0, len_free = 0; + + if (unlikely(offset > (int)xdp_get_buff_len(xdp) - ETH_HLEN)) + return -EINVAL; + + for (i = sinfo->nr_frags - 1; i >= 0 && offset > 0; i--) { + skb_frag_t *frag = &sinfo->frags[i]; + int shrink = min_t(int, offset, skb_frag_size(frag)); + + len_free += shrink; + offset -= shrink; + + if (skb_frag_size(frag) == shrink) { + struct page *page = skb_frag_page(frag); + + __xdp_return(page_address(page), &xdp->rxq->mem, + false, NULL); + n_frags_free++; + } else { + skb_frag_size_sub(frag, shrink); + break; + } + } + sinfo->nr_frags -= n_frags_free; + sinfo->xdp_frags_size -= len_free; + + if (unlikely(!sinfo->nr_frags)) { + xdp_buff_clear_frags_flag(xdp); + xdp->data_end -= offset; + } + + return 0; +} + BPF_CALL_2(bpf_xdp_adjust_tail, struct xdp_buff *, xdp, int, offset) { void *data_hard_end = xdp_data_hard_end(xdp); /* use xdp->frame_sz */ void *data_end = xdp->data_end + offset; + if (unlikely(xdp_buff_has_frags(xdp))) { /* non-linear xdp buff */ + if (offset < 0) + return bpf_xdp_frags_shrink_tail(xdp, -offset); + + return bpf_xdp_frags_increase_tail(xdp, offset); + } + /* Notice that xdp_data_hard_end have reserved some tailroom */ if (unlikely(data_end > data_hard_end)) return -EINVAL; @@ -4047,6 +4266,14 @@ int xdp_do_redirect(struct net_device *dev, struct xdp_buff *xdp, struct bpf_redirect_info *ri = this_cpu_ptr(&bpf_redirect_info); enum bpf_map_type map_type = ri->map_type; + /* XDP_REDIRECT is not fully supported yet for xdp frags since + * not all XDP capable drivers can map non-linear xdp_frame in + * ndo_xdp_xmit. + */ + if (unlikely(xdp_buff_has_frags(xdp) && + map_type != BPF_MAP_TYPE_CPUMAP)) + return -EOPNOTSUPP; + if (map_type == BPF_MAP_TYPE_XSKMAP) return __xdp_do_redirect_xsk(ri, dev, xdp, xdp_prog); @@ -4590,10 +4817,12 @@ static const struct bpf_func_proto bpf_sk_ancestor_cgroup_id_proto = { }; #endif -static unsigned long bpf_xdp_copy(void *dst_buff, const void *src_buff, +static unsigned long bpf_xdp_copy(void *dst, const void *ctx, unsigned long off, unsigned long len) { - memcpy(dst_buff, src_buff + off, len); + struct xdp_buff *xdp = (struct xdp_buff *)ctx; + + bpf_xdp_copy_buf(xdp, off, dst, len, false); return 0; } @@ -4604,11 +4833,11 @@ BPF_CALL_5(bpf_xdp_event_output, struct xdp_buff *, xdp, struct bpf_map *, map, if (unlikely(flags & ~(BPF_F_CTXLEN_MASK | BPF_F_INDEX_MASK))) return -EINVAL; - if (unlikely(!xdp || - xdp_size > (unsigned long)(xdp->data_end - xdp->data))) + + if (unlikely(!xdp || xdp_size > xdp_get_buff_len(xdp))) return -EFAULT; - return bpf_event_output(map, flags, meta, meta_size, xdp->data, + return bpf_event_output(map, flags, meta, meta_size, xdp, xdp_size, bpf_xdp_copy); } @@ -7533,6 +7762,12 @@ xdp_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog) return &bpf_xdp_redirect_map_proto; case BPF_FUNC_xdp_adjust_tail: return &bpf_xdp_adjust_tail_proto; + case BPF_FUNC_xdp_get_buff_len: + return &bpf_xdp_get_buff_len_proto; + case BPF_FUNC_xdp_load_bytes: + return &bpf_xdp_load_bytes_proto; + case BPF_FUNC_xdp_store_bytes: + return &bpf_xdp_store_bytes_proto; case BPF_FUNC_fib_lookup: return &bpf_xdp_fib_lookup_proto; case BPF_FUNC_check_mtu: @@ -10062,7 +10297,6 @@ const struct bpf_verifier_ops tc_cls_act_verifier_ops = { .convert_ctx_access = tc_cls_act_convert_ctx_access, .gen_prologue = tc_cls_act_prologue, .gen_ld_abs = bpf_gen_ld_abs, - .check_kfunc_call = bpf_prog_test_check_kfunc_call, }; const struct bpf_prog_ops tc_cls_act_prog_ops = { diff --git a/net/core/net_namespace.c b/net/core/net_namespace.c index a5b5bb99c644..c53d9aab38ab 100644 --- a/net/core/net_namespace.c +++ b/net/core/net_namespace.c @@ -301,6 +301,7 @@ struct net *get_net_ns_by_id(const struct net *net, int id) return peer; } +EXPORT_SYMBOL_GPL(get_net_ns_by_id); /* * setup_net runs the initializers for the network namespace object. diff --git a/net/core/sock_map.c b/net/core/sock_map.c index 1827669eedd6..2d213c4011db 100644 --- a/net/core/sock_map.c +++ b/net/core/sock_map.c @@ -1416,38 +1416,50 @@ static struct sk_psock_progs *sock_map_progs(struct bpf_map *map) return NULL; } -static int sock_map_prog_update(struct bpf_map *map, struct bpf_prog *prog, - struct bpf_prog *old, u32 which) +static int sock_map_prog_lookup(struct bpf_map *map, struct bpf_prog ***pprog, + u32 which) { struct sk_psock_progs *progs = sock_map_progs(map); - struct bpf_prog **pprog; if (!progs) return -EOPNOTSUPP; switch (which) { case BPF_SK_MSG_VERDICT: - pprog = &progs->msg_parser; + *pprog = &progs->msg_parser; break; #if IS_ENABLED(CONFIG_BPF_STREAM_PARSER) case BPF_SK_SKB_STREAM_PARSER: - pprog = &progs->stream_parser; + *pprog = &progs->stream_parser; break; #endif case BPF_SK_SKB_STREAM_VERDICT: if (progs->skb_verdict) return -EBUSY; - pprog = &progs->stream_verdict; + *pprog = &progs->stream_verdict; break; case BPF_SK_SKB_VERDICT: if (progs->stream_verdict) return -EBUSY; - pprog = &progs->skb_verdict; + *pprog = &progs->skb_verdict; break; default: return -EOPNOTSUPP; } + return 0; +} + +static int sock_map_prog_update(struct bpf_map *map, struct bpf_prog *prog, + struct bpf_prog *old, u32 which) +{ + struct bpf_prog **pprog; + int ret; + + ret = sock_map_prog_lookup(map, &pprog, which); + if (ret) + return ret; + if (old) return psock_replace_prog(pprog, prog, old); @@ -1455,6 +1467,57 @@ static int sock_map_prog_update(struct bpf_map *map, struct bpf_prog *prog, return 0; } +int sock_map_bpf_prog_query(const union bpf_attr *attr, + union bpf_attr __user *uattr) +{ + __u32 __user *prog_ids = u64_to_user_ptr(attr->query.prog_ids); + u32 prog_cnt = 0, flags = 0, ufd = attr->target_fd; + struct bpf_prog **pprog; + struct bpf_prog *prog; + struct bpf_map *map; + struct fd f; + u32 id = 0; + int ret; + + if (attr->query.query_flags) + return -EINVAL; + + f = fdget(ufd); + map = __bpf_map_get(f); + if (IS_ERR(map)) + return PTR_ERR(map); + + rcu_read_lock(); + + ret = sock_map_prog_lookup(map, &pprog, attr->query.attach_type); + if (ret) + goto end; + + prog = *pprog; + prog_cnt = !prog ? 0 : 1; + + if (!attr->query.prog_cnt || !prog_ids || !prog_cnt) + goto end; + + /* we do not hold the refcnt, the bpf prog may be released + * asynchronously and the id would be set to 0. + */ + id = data_race(prog->aux->id); + if (id == 0) + prog_cnt = 0; + +end: + rcu_read_unlock(); + + if (copy_to_user(&uattr->query.attach_flags, &flags, sizeof(flags)) || + (id != 0 && copy_to_user(prog_ids, &id, sizeof(u32))) || + copy_to_user(&uattr->query.prog_cnt, &prog_cnt, sizeof(prog_cnt))) + ret = -EFAULT; + + fdput(f); + return ret; +} + static void sock_map_unlink(struct sock *sk, struct sk_psock_link *link) { switch (link->map->map_type) { diff --git a/net/core/xdp.c b/net/core/xdp.c index 7aba35504986..361df312ee7f 100644 --- a/net/core/xdp.c +++ b/net/core/xdp.c @@ -162,8 +162,9 @@ static void xdp_rxq_info_init(struct xdp_rxq_info *xdp_rxq) } /* Returns 0 on success, negative on failure */ -int xdp_rxq_info_reg(struct xdp_rxq_info *xdp_rxq, - struct net_device *dev, u32 queue_index, unsigned int napi_id) +int __xdp_rxq_info_reg(struct xdp_rxq_info *xdp_rxq, + struct net_device *dev, u32 queue_index, + unsigned int napi_id, u32 frag_size) { if (!dev) { WARN(1, "Missing net_device from driver"); @@ -185,11 +186,12 @@ int xdp_rxq_info_reg(struct xdp_rxq_info *xdp_rxq, xdp_rxq->dev = dev; xdp_rxq->queue_index = queue_index; xdp_rxq->napi_id = napi_id; + xdp_rxq->frag_size = frag_size; xdp_rxq->reg_state = REG_STATE_REGISTERED; return 0; } -EXPORT_SYMBOL_GPL(xdp_rxq_info_reg); +EXPORT_SYMBOL_GPL(__xdp_rxq_info_reg); void xdp_rxq_info_unused(struct xdp_rxq_info *xdp_rxq) { @@ -369,8 +371,8 @@ EXPORT_SYMBOL_GPL(xdp_rxq_info_reg_mem_model); * is used for those calls sites. Thus, allowing for faster recycling * of xdp_frames/pages in those cases. */ -static void __xdp_return(void *data, struct xdp_mem_info *mem, bool napi_direct, - struct xdp_buff *xdp) +void __xdp_return(void *data, struct xdp_mem_info *mem, bool napi_direct, + struct xdp_buff *xdp) { struct xdp_mem_allocator *xa; struct page *page; @@ -406,12 +408,38 @@ static void __xdp_return(void *data, struct xdp_mem_info *mem, bool napi_direct, void xdp_return_frame(struct xdp_frame *xdpf) { + struct skb_shared_info *sinfo; + int i; + + if (likely(!xdp_frame_has_frags(xdpf))) + goto out; + + sinfo = xdp_get_shared_info_from_frame(xdpf); + for (i = 0; i < sinfo->nr_frags; i++) { + struct page *page = skb_frag_page(&sinfo->frags[i]); + + __xdp_return(page_address(page), &xdpf->mem, false, NULL); + } +out: __xdp_return(xdpf->data, &xdpf->mem, false, NULL); } EXPORT_SYMBOL_GPL(xdp_return_frame); void xdp_return_frame_rx_napi(struct xdp_frame *xdpf) { + struct skb_shared_info *sinfo; + int i; + + if (likely(!xdp_frame_has_frags(xdpf))) + goto out; + + sinfo = xdp_get_shared_info_from_frame(xdpf); + for (i = 0; i < sinfo->nr_frags; i++) { + struct page *page = skb_frag_page(&sinfo->frags[i]); + + __xdp_return(page_address(page), &xdpf->mem, true, NULL); + } +out: __xdp_return(xdpf->data, &xdpf->mem, true, NULL); } EXPORT_SYMBOL_GPL(xdp_return_frame_rx_napi); @@ -447,7 +475,7 @@ void xdp_return_frame_bulk(struct xdp_frame *xdpf, struct xdp_mem_allocator *xa; if (mem->type != MEM_TYPE_PAGE_POOL) { - __xdp_return(xdpf->data, &xdpf->mem, false, NULL); + xdp_return_frame(xdpf); return; } @@ -466,12 +494,38 @@ void xdp_return_frame_bulk(struct xdp_frame *xdpf, bq->xa = rhashtable_lookup(mem_id_ht, &mem->id, mem_id_rht_params); } + if (unlikely(xdp_frame_has_frags(xdpf))) { + struct skb_shared_info *sinfo; + int i; + + sinfo = xdp_get_shared_info_from_frame(xdpf); + for (i = 0; i < sinfo->nr_frags; i++) { + skb_frag_t *frag = &sinfo->frags[i]; + + bq->q[bq->count++] = skb_frag_address(frag); + if (bq->count == XDP_BULK_QUEUE_SIZE) + xdp_flush_frame_bulk(bq); + } + } bq->q[bq->count++] = xdpf->data; } EXPORT_SYMBOL_GPL(xdp_return_frame_bulk); void xdp_return_buff(struct xdp_buff *xdp) { + struct skb_shared_info *sinfo; + int i; + + if (likely(!xdp_buff_has_frags(xdp))) + goto out; + + sinfo = xdp_get_shared_info_from_buff(xdp); + for (i = 0; i < sinfo->nr_frags; i++) { + struct page *page = skb_frag_page(&sinfo->frags[i]); + + __xdp_return(page_address(page), &xdp->rxq->mem, true, xdp); + } +out: __xdp_return(xdp->data, &xdp->rxq->mem, true, xdp); } @@ -561,8 +615,14 @@ struct sk_buff *__xdp_build_skb_from_frame(struct xdp_frame *xdpf, struct sk_buff *skb, struct net_device *dev) { + struct skb_shared_info *sinfo = xdp_get_shared_info_from_frame(xdpf); unsigned int headroom, frame_size; void *hard_start; + u8 nr_frags; + + /* xdp frags frame */ + if (unlikely(xdp_frame_has_frags(xdpf))) + nr_frags = sinfo->nr_frags; /* Part of headroom was reserved to xdpf */ headroom = sizeof(*xdpf) + xdpf->headroom; @@ -582,6 +642,12 @@ struct sk_buff *__xdp_build_skb_from_frame(struct xdp_frame *xdpf, if (xdpf->metasize) skb_metadata_set(skb, xdpf->metasize); + if (unlikely(xdp_frame_has_frags(xdpf))) + xdp_update_skb_shared_info(skb, nr_frags, + sinfo->xdp_frags_size, + nr_frags * xdpf->frame_sz, + xdp_frame_is_frag_pfmemalloc(xdpf)); + /* Essential SKB info: protocol and skb->dev */ skb->protocol = eth_type_trans(skb, dev); diff --git a/net/ipv4/bpf_tcp_ca.c b/net/ipv4/bpf_tcp_ca.c index de610cb83694..b60c9fd7147e 100644 --- a/net/ipv4/bpf_tcp_ca.c +++ b/net/ipv4/bpf_tcp_ca.c @@ -1,6 +1,7 @@ // SPDX-License-Identifier: GPL-2.0 /* Copyright (c) 2019 Facebook */ +#include <linux/init.h> #include <linux/types.h> #include <linux/bpf_verifier.h> #include <linux/bpf.h> @@ -212,26 +213,23 @@ bpf_tcp_ca_get_func_proto(enum bpf_func_id func_id, } } -BTF_SET_START(bpf_tcp_ca_kfunc_ids) +BTF_SET_START(bpf_tcp_ca_check_kfunc_ids) BTF_ID(func, tcp_reno_ssthresh) BTF_ID(func, tcp_reno_cong_avoid) BTF_ID(func, tcp_reno_undo_cwnd) BTF_ID(func, tcp_slow_start) BTF_ID(func, tcp_cong_avoid_ai) -BTF_SET_END(bpf_tcp_ca_kfunc_ids) +BTF_SET_END(bpf_tcp_ca_check_kfunc_ids) -static bool bpf_tcp_ca_check_kfunc_call(u32 kfunc_btf_id, struct module *owner) -{ - if (btf_id_set_contains(&bpf_tcp_ca_kfunc_ids, kfunc_btf_id)) - return true; - return bpf_check_mod_kfunc_call(&bpf_tcp_ca_kfunc_list, kfunc_btf_id, owner); -} +static const struct btf_kfunc_id_set bpf_tcp_ca_kfunc_set = { + .owner = THIS_MODULE, + .check_set = &bpf_tcp_ca_check_kfunc_ids, +}; static const struct bpf_verifier_ops bpf_tcp_ca_verifier_ops = { .get_func_proto = bpf_tcp_ca_get_func_proto, .is_valid_access = bpf_tcp_ca_is_valid_access, .btf_struct_access = bpf_tcp_ca_btf_struct_access, - .check_kfunc_call = bpf_tcp_ca_check_kfunc_call, }; static int bpf_tcp_ca_init_member(const struct btf_type *t, @@ -300,3 +298,9 @@ struct bpf_struct_ops bpf_tcp_congestion_ops = { .init = bpf_tcp_ca_init, .name = "tcp_congestion_ops", }; + +static int __init bpf_tcp_ca_kfunc_init(void) +{ + return register_btf_kfunc_id_set(BPF_PROG_TYPE_STRUCT_OPS, &bpf_tcp_ca_kfunc_set); +} +late_initcall(bpf_tcp_ca_kfunc_init); diff --git a/net/ipv4/tcp_bbr.c b/net/ipv4/tcp_bbr.c index ec5550089b4d..02e8626ccb27 100644 --- a/net/ipv4/tcp_bbr.c +++ b/net/ipv4/tcp_bbr.c @@ -1154,7 +1154,7 @@ static struct tcp_congestion_ops tcp_bbr_cong_ops __read_mostly = { .set_state = bbr_set_state, }; -BTF_SET_START(tcp_bbr_kfunc_ids) +BTF_SET_START(tcp_bbr_check_kfunc_ids) #ifdef CONFIG_X86 #ifdef CONFIG_DYNAMIC_FTRACE BTF_ID(func, bbr_init) @@ -1167,25 +1167,27 @@ BTF_ID(func, bbr_min_tso_segs) BTF_ID(func, bbr_set_state) #endif #endif -BTF_SET_END(tcp_bbr_kfunc_ids) +BTF_SET_END(tcp_bbr_check_kfunc_ids) -static DEFINE_KFUNC_BTF_ID_SET(&tcp_bbr_kfunc_ids, tcp_bbr_kfunc_btf_set); +static const struct btf_kfunc_id_set tcp_bbr_kfunc_set = { + .owner = THIS_MODULE, + .check_set = &tcp_bbr_check_kfunc_ids, +}; static int __init bbr_register(void) { int ret; BUILD_BUG_ON(sizeof(struct bbr) > ICSK_CA_PRIV_SIZE); - ret = tcp_register_congestion_control(&tcp_bbr_cong_ops); - if (ret) + + ret = register_btf_kfunc_id_set(BPF_PROG_TYPE_STRUCT_OPS, &tcp_bbr_kfunc_set); + if (ret < 0) return ret; - register_kfunc_btf_id_set(&bpf_tcp_ca_kfunc_list, &tcp_bbr_kfunc_btf_set); - return 0; + return tcp_register_congestion_control(&tcp_bbr_cong_ops); } static void __exit bbr_unregister(void) { - unregister_kfunc_btf_id_set(&bpf_tcp_ca_kfunc_list, &tcp_bbr_kfunc_btf_set); tcp_unregister_congestion_control(&tcp_bbr_cong_ops); } diff --git a/net/ipv4/tcp_cubic.c b/net/ipv4/tcp_cubic.c index e07837e23b3f..24d562dd6225 100644 --- a/net/ipv4/tcp_cubic.c +++ b/net/ipv4/tcp_cubic.c @@ -485,7 +485,7 @@ static struct tcp_congestion_ops cubictcp __read_mostly = { .name = "cubic", }; -BTF_SET_START(tcp_cubic_kfunc_ids) +BTF_SET_START(tcp_cubic_check_kfunc_ids) #ifdef CONFIG_X86 #ifdef CONFIG_DYNAMIC_FTRACE BTF_ID(func, cubictcp_init) @@ -496,9 +496,12 @@ BTF_ID(func, cubictcp_cwnd_event) BTF_ID(func, cubictcp_acked) #endif #endif -BTF_SET_END(tcp_cubic_kfunc_ids) +BTF_SET_END(tcp_cubic_check_kfunc_ids) -static DEFINE_KFUNC_BTF_ID_SET(&tcp_cubic_kfunc_ids, tcp_cubic_kfunc_btf_set); +static const struct btf_kfunc_id_set tcp_cubic_kfunc_set = { + .owner = THIS_MODULE, + .check_set = &tcp_cubic_check_kfunc_ids, +}; static int __init cubictcp_register(void) { @@ -534,16 +537,14 @@ static int __init cubictcp_register(void) /* divide by bic_scale and by constant Srtt (100ms) */ do_div(cube_factor, bic_scale * 10); - ret = tcp_register_congestion_control(&cubictcp); - if (ret) + ret = register_btf_kfunc_id_set(BPF_PROG_TYPE_STRUCT_OPS, &tcp_cubic_kfunc_set); + if (ret < 0) return ret; - register_kfunc_btf_id_set(&bpf_tcp_ca_kfunc_list, &tcp_cubic_kfunc_btf_set); - return 0; + return tcp_register_congestion_control(&cubictcp); } static void __exit cubictcp_unregister(void) { - unregister_kfunc_btf_id_set(&bpf_tcp_ca_kfunc_list, &tcp_cubic_kfunc_btf_set); tcp_unregister_congestion_control(&cubictcp); } diff --git a/net/ipv4/tcp_dctcp.c b/net/ipv4/tcp_dctcp.c index 0d7ab3cc7b61..1943a6630341 100644 --- a/net/ipv4/tcp_dctcp.c +++ b/net/ipv4/tcp_dctcp.c @@ -238,7 +238,7 @@ static struct tcp_congestion_ops dctcp_reno __read_mostly = { .name = "dctcp-reno", }; -BTF_SET_START(tcp_dctcp_kfunc_ids) +BTF_SET_START(tcp_dctcp_check_kfunc_ids) #ifdef CONFIG_X86 #ifdef CONFIG_DYNAMIC_FTRACE BTF_ID(func, dctcp_init) @@ -249,25 +249,27 @@ BTF_ID(func, dctcp_cwnd_undo) BTF_ID(func, dctcp_state) #endif #endif -BTF_SET_END(tcp_dctcp_kfunc_ids) +BTF_SET_END(tcp_dctcp_check_kfunc_ids) -static DEFINE_KFUNC_BTF_ID_SET(&tcp_dctcp_kfunc_ids, tcp_dctcp_kfunc_btf_set); +static const struct btf_kfunc_id_set tcp_dctcp_kfunc_set = { + .owner = THIS_MODULE, + .check_set = &tcp_dctcp_check_kfunc_ids, +}; static int __init dctcp_register(void) { int ret; BUILD_BUG_ON(sizeof(struct dctcp) > ICSK_CA_PRIV_SIZE); - ret = tcp_register_congestion_control(&dctcp); - if (ret) + + ret = register_btf_kfunc_id_set(BPF_PROG_TYPE_STRUCT_OPS, &tcp_dctcp_kfunc_set); + if (ret < 0) return ret; - register_kfunc_btf_id_set(&bpf_tcp_ca_kfunc_list, &tcp_dctcp_kfunc_btf_set); - return 0; + return tcp_register_congestion_control(&dctcp); } static void __exit dctcp_unregister(void) { - unregister_kfunc_btf_id_set(&bpf_tcp_ca_kfunc_list, &tcp_dctcp_kfunc_btf_set); tcp_unregister_congestion_control(&dctcp); } diff --git a/net/netfilter/Makefile b/net/netfilter/Makefile index a135b1a46014..238b6a620e88 100644 --- a/net/netfilter/Makefile +++ b/net/netfilter/Makefile @@ -14,6 +14,11 @@ nf_conntrack-$(CONFIG_NF_CONNTRACK_LABELS) += nf_conntrack_labels.o nf_conntrack-$(CONFIG_NF_CT_PROTO_DCCP) += nf_conntrack_proto_dccp.o nf_conntrack-$(CONFIG_NF_CT_PROTO_SCTP) += nf_conntrack_proto_sctp.o nf_conntrack-$(CONFIG_NF_CT_PROTO_GRE) += nf_conntrack_proto_gre.o +ifeq ($(CONFIG_NF_CONNTRACK),m) +nf_conntrack-$(CONFIG_DEBUG_INFO_BTF_MODULES) += nf_conntrack_bpf.o +else ifeq ($(CONFIG_NF_CONNTRACK),y) +nf_conntrack-$(CONFIG_DEBUG_INFO_BTF) += nf_conntrack_bpf.o +endif obj-$(CONFIG_NETFILTER) = netfilter.o diff --git a/net/netfilter/nf_conntrack_bpf.c b/net/netfilter/nf_conntrack_bpf.c new file mode 100644 index 000000000000..8ad3f52579f3 --- /dev/null +++ b/net/netfilter/nf_conntrack_bpf.c @@ -0,0 +1,257 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* Unstable Conntrack Helpers for XDP and TC-BPF hook + * + * These are called from the XDP and SCHED_CLS BPF programs. Note that it is + * allowed to break compatibility for these functions since the interface they + * are exposed through to BPF programs is explicitly unstable. + */ + +#include <linux/bpf.h> +#include <linux/btf.h> +#include <linux/types.h> +#include <linux/btf_ids.h> +#include <linux/net_namespace.h> +#include <net/netfilter/nf_conntrack.h> +#include <net/netfilter/nf_conntrack_core.h> + +/* bpf_ct_opts - Options for CT lookup helpers + * + * Members: + * @netns_id - Specify the network namespace for lookup + * Values: + * BPF_F_CURRENT_NETNS (-1) + * Use namespace associated with ctx (xdp_md, __sk_buff) + * [0, S32_MAX] + * Network Namespace ID + * @error - Out parameter, set for any errors encountered + * Values: + * -EINVAL - Passed NULL for bpf_tuple pointer + * -EINVAL - opts->reserved is not 0 + * -EINVAL - netns_id is less than -1 + * -EINVAL - opts__sz isn't NF_BPF_CT_OPTS_SZ (12) + * -EPROTO - l4proto isn't one of IPPROTO_TCP or IPPROTO_UDP + * -ENONET - No network namespace found for netns_id + * -ENOENT - Conntrack lookup could not find entry for tuple + * -EAFNOSUPPORT - tuple__sz isn't one of sizeof(tuple->ipv4) + * or sizeof(tuple->ipv6) + * @l4proto - Layer 4 protocol + * Values: + * IPPROTO_TCP, IPPROTO_UDP + * @reserved - Reserved member, will be reused for more options in future + * Values: + * 0 + */ +struct bpf_ct_opts { + s32 netns_id; + s32 error; + u8 l4proto; + u8 reserved[3]; +}; + +enum { + NF_BPF_CT_OPTS_SZ = 12, +}; + +static struct nf_conn *__bpf_nf_ct_lookup(struct net *net, + struct bpf_sock_tuple *bpf_tuple, + u32 tuple_len, u8 protonum, + s32 netns_id) +{ + struct nf_conntrack_tuple_hash *hash; + struct nf_conntrack_tuple tuple; + + if (unlikely(protonum != IPPROTO_TCP && protonum != IPPROTO_UDP)) + return ERR_PTR(-EPROTO); + if (unlikely(netns_id < BPF_F_CURRENT_NETNS)) + return ERR_PTR(-EINVAL); + + memset(&tuple, 0, sizeof(tuple)); + switch (tuple_len) { + case sizeof(bpf_tuple->ipv4): + tuple.src.l3num = AF_INET; + tuple.src.u3.ip = bpf_tuple->ipv4.saddr; + tuple.src.u.tcp.port = bpf_tuple->ipv4.sport; + tuple.dst.u3.ip = bpf_tuple->ipv4.daddr; + tuple.dst.u.tcp.port = bpf_tuple->ipv4.dport; + break; + case sizeof(bpf_tuple->ipv6): + tuple.src.l3num = AF_INET6; + memcpy(tuple.src.u3.ip6, bpf_tuple->ipv6.saddr, sizeof(bpf_tuple->ipv6.saddr)); + tuple.src.u.tcp.port = bpf_tuple->ipv6.sport; + memcpy(tuple.dst.u3.ip6, bpf_tuple->ipv6.daddr, sizeof(bpf_tuple->ipv6.daddr)); + tuple.dst.u.tcp.port = bpf_tuple->ipv6.dport; + break; + default: + return ERR_PTR(-EAFNOSUPPORT); + } + + tuple.dst.protonum = protonum; + + if (netns_id >= 0) { + net = get_net_ns_by_id(net, netns_id); + if (unlikely(!net)) + return ERR_PTR(-ENONET); + } + + hash = nf_conntrack_find_get(net, &nf_ct_zone_dflt, &tuple); + if (netns_id >= 0) + put_net(net); + if (!hash) + return ERR_PTR(-ENOENT); + return nf_ct_tuplehash_to_ctrack(hash); +} + +__diag_push(); +__diag_ignore(GCC, 8, "-Wmissing-prototypes", + "Global functions as their definitions will be in nf_conntrack BTF"); + +/* bpf_xdp_ct_lookup - Lookup CT entry for the given tuple, and acquire a + * reference to it + * + * Parameters: + * @xdp_ctx - Pointer to ctx (xdp_md) in XDP program + * Cannot be NULL + * @bpf_tuple - Pointer to memory representing the tuple to look up + * Cannot be NULL + * @tuple__sz - Length of the tuple structure + * Must be one of sizeof(bpf_tuple->ipv4) or + * sizeof(bpf_tuple->ipv6) + * @opts - Additional options for lookup (documented above) + * Cannot be NULL + * @opts__sz - Length of the bpf_ct_opts structure + * Must be NF_BPF_CT_OPTS_SZ (12) + */ +struct nf_conn * +bpf_xdp_ct_lookup(struct xdp_md *xdp_ctx, struct bpf_sock_tuple *bpf_tuple, + u32 tuple__sz, struct bpf_ct_opts *opts, u32 opts__sz) +{ + struct xdp_buff *ctx = (struct xdp_buff *)xdp_ctx; + struct net *caller_net; + struct nf_conn *nfct; + + BUILD_BUG_ON(sizeof(struct bpf_ct_opts) != NF_BPF_CT_OPTS_SZ); + + if (!opts) + return NULL; + if (!bpf_tuple || opts->reserved[0] || opts->reserved[1] || + opts->reserved[2] || opts__sz != NF_BPF_CT_OPTS_SZ) { + opts->error = -EINVAL; + return NULL; + } + caller_net = dev_net(ctx->rxq->dev); + nfct = __bpf_nf_ct_lookup(caller_net, bpf_tuple, tuple__sz, opts->l4proto, + opts->netns_id); + if (IS_ERR(nfct)) { + opts->error = PTR_ERR(nfct); + return NULL; + } + return nfct; +} + +/* bpf_skb_ct_lookup - Lookup CT entry for the given tuple, and acquire a + * reference to it + * + * Parameters: + * @skb_ctx - Pointer to ctx (__sk_buff) in TC program + * Cannot be NULL + * @bpf_tuple - Pointer to memory representing the tuple to look up + * Cannot be NULL + * @tuple__sz - Length of the tuple structure + * Must be one of sizeof(bpf_tuple->ipv4) or + * sizeof(bpf_tuple->ipv6) + * @opts - Additional options for lookup (documented above) + * Cannot be NULL + * @opts__sz - Length of the bpf_ct_opts structure + * Must be NF_BPF_CT_OPTS_SZ (12) + */ +struct nf_conn * +bpf_skb_ct_lookup(struct __sk_buff *skb_ctx, struct bpf_sock_tuple *bpf_tuple, + u32 tuple__sz, struct bpf_ct_opts *opts, u32 opts__sz) +{ + struct sk_buff *skb = (struct sk_buff *)skb_ctx; + struct net *caller_net; + struct nf_conn *nfct; + + BUILD_BUG_ON(sizeof(struct bpf_ct_opts) != NF_BPF_CT_OPTS_SZ); + + if (!opts) + return NULL; + if (!bpf_tuple || opts->reserved[0] || opts->reserved[1] || + opts->reserved[2] || opts__sz != NF_BPF_CT_OPTS_SZ) { + opts->error = -EINVAL; + return NULL; + } + caller_net = skb->dev ? dev_net(skb->dev) : sock_net(skb->sk); + nfct = __bpf_nf_ct_lookup(caller_net, bpf_tuple, tuple__sz, opts->l4proto, + opts->netns_id); + if (IS_ERR(nfct)) { + opts->error = PTR_ERR(nfct); + return NULL; + } + return nfct; +} + +/* bpf_ct_release - Release acquired nf_conn object + * + * This must be invoked for referenced PTR_TO_BTF_ID, and the verifier rejects + * the program if any references remain in the program in all of the explored + * states. + * + * Parameters: + * @nf_conn - Pointer to referenced nf_conn object, obtained using + * bpf_xdp_ct_lookup or bpf_skb_ct_lookup. + */ +void bpf_ct_release(struct nf_conn *nfct) +{ + if (!nfct) + return; + nf_ct_put(nfct); +} + +__diag_pop() + +BTF_SET_START(nf_ct_xdp_check_kfunc_ids) +BTF_ID(func, bpf_xdp_ct_lookup) +BTF_ID(func, bpf_ct_release) +BTF_SET_END(nf_ct_xdp_check_kfunc_ids) + +BTF_SET_START(nf_ct_tc_check_kfunc_ids) +BTF_ID(func, bpf_skb_ct_lookup) +BTF_ID(func, bpf_ct_release) +BTF_SET_END(nf_ct_tc_check_kfunc_ids) + +BTF_SET_START(nf_ct_acquire_kfunc_ids) +BTF_ID(func, bpf_xdp_ct_lookup) +BTF_ID(func, bpf_skb_ct_lookup) +BTF_SET_END(nf_ct_acquire_kfunc_ids) + +BTF_SET_START(nf_ct_release_kfunc_ids) +BTF_ID(func, bpf_ct_release) +BTF_SET_END(nf_ct_release_kfunc_ids) + +/* Both sets are identical */ +#define nf_ct_ret_null_kfunc_ids nf_ct_acquire_kfunc_ids + +static const struct btf_kfunc_id_set nf_conntrack_xdp_kfunc_set = { + .owner = THIS_MODULE, + .check_set = &nf_ct_xdp_check_kfunc_ids, + .acquire_set = &nf_ct_acquire_kfunc_ids, + .release_set = &nf_ct_release_kfunc_ids, + .ret_null_set = &nf_ct_ret_null_kfunc_ids, +}; + +static const struct btf_kfunc_id_set nf_conntrack_tc_kfunc_set = { + .owner = THIS_MODULE, + .check_set = &nf_ct_tc_check_kfunc_ids, + .acquire_set = &nf_ct_acquire_kfunc_ids, + .release_set = &nf_ct_release_kfunc_ids, + .ret_null_set = &nf_ct_ret_null_kfunc_ids, +}; + +int register_nf_conntrack_bpf(void) +{ + int ret; + + ret = register_btf_kfunc_id_set(BPF_PROG_TYPE_XDP, &nf_conntrack_xdp_kfunc_set); + return ret ?: register_btf_kfunc_id_set(BPF_PROG_TYPE_SCHED_CLS, &nf_conntrack_tc_kfunc_set); +} diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c index d6aa5b47031e..d38d689de23c 100644 --- a/net/netfilter/nf_conntrack_core.c +++ b/net/netfilter/nf_conntrack_core.c @@ -34,6 +34,7 @@ #include <linux/rculist_nulls.h> #include <net/netfilter/nf_conntrack.h> +#include <net/netfilter/nf_conntrack_bpf.h> #include <net/netfilter/nf_conntrack_l4proto.h> #include <net/netfilter/nf_conntrack_expect.h> #include <net/netfilter/nf_conntrack_helper.h> @@ -2750,8 +2751,15 @@ int nf_conntrack_init_start(void) conntrack_gc_work_init(&conntrack_gc_work); queue_delayed_work(system_power_efficient_wq, &conntrack_gc_work.dwork, HZ); + ret = register_nf_conntrack_bpf(); + if (ret < 0) + goto err_kfunc; + return 0; +err_kfunc: + cancel_delayed_work_sync(&conntrack_gc_work.dwork); + nf_conntrack_proto_fini(); err_proto: nf_conntrack_seqadj_fini(); err_seqadj: diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c index c19569819866..3e0d6281fd1e 100644 --- a/net/unix/af_unix.c +++ b/net/unix/af_unix.c @@ -3240,49 +3240,58 @@ static struct sock *unix_from_bucket(struct seq_file *seq, loff_t *pos) return sk; } -static struct sock *unix_next_socket(struct seq_file *seq, - struct sock *sk, - loff_t *pos) +static struct sock *unix_get_first(struct seq_file *seq, loff_t *pos) { unsigned long bucket = get_bucket(*pos); + struct sock *sk; - while (sk > (struct sock *)SEQ_START_TOKEN) { - sk = sk_next(sk); - if (!sk) - goto next_bucket; - if (sock_net(sk) == seq_file_net(seq)) - return sk; - } - - do { + while (bucket < ARRAY_SIZE(unix_socket_table)) { spin_lock(&unix_table_locks[bucket]); + sk = unix_from_bucket(seq, pos); if (sk) return sk; -next_bucket: - spin_unlock(&unix_table_locks[bucket++]); - *pos = set_bucket_offset(bucket, 1); - } while (bucket < ARRAY_SIZE(unix_socket_table)); + spin_unlock(&unix_table_locks[bucket]); + + *pos = set_bucket_offset(++bucket, 1); + } return NULL; } +static struct sock *unix_get_next(struct seq_file *seq, struct sock *sk, + loff_t *pos) +{ + unsigned long bucket = get_bucket(*pos); + + for (sk = sk_next(sk); sk; sk = sk_next(sk)) + if (sock_net(sk) == seq_file_net(seq)) + return sk; + + spin_unlock(&unix_table_locks[bucket]); + + *pos = set_bucket_offset(++bucket, 1); + + return unix_get_first(seq, pos); +} + static void *unix_seq_start(struct seq_file *seq, loff_t *pos) { if (!*pos) return SEQ_START_TOKEN; - if (get_bucket(*pos) >= ARRAY_SIZE(unix_socket_table)) - return NULL; - - return unix_next_socket(seq, NULL, pos); + return unix_get_first(seq, pos); } static void *unix_seq_next(struct seq_file *seq, void *v, loff_t *pos) { ++*pos; - return unix_next_socket(seq, v, pos); + + if (v == SEQ_START_TOKEN) + return unix_get_first(seq, pos); + + return unix_get_next(seq, v, pos); } static void unix_seq_stop(struct seq_file *seq, void *v) @@ -3347,6 +3356,15 @@ static const struct seq_operations unix_seq_ops = { }; #if IS_BUILTIN(CONFIG_UNIX) && defined(CONFIG_BPF_SYSCALL) +struct bpf_unix_iter_state { + struct seq_net_private p; + unsigned int cur_sk; + unsigned int end_sk; + unsigned int max_sk; + struct sock **batch; + bool st_bucket_done; +}; + struct bpf_iter__unix { __bpf_md_ptr(struct bpf_iter_meta *, meta); __bpf_md_ptr(struct unix_sock *, unix_sk); @@ -3365,24 +3383,156 @@ static int unix_prog_seq_show(struct bpf_prog *prog, struct bpf_iter_meta *meta, return bpf_iter_run_prog(prog, &ctx); } +static int bpf_iter_unix_hold_batch(struct seq_file *seq, struct sock *start_sk) + +{ + struct bpf_unix_iter_state *iter = seq->private; + unsigned int expected = 1; + struct sock *sk; + + sock_hold(start_sk); + iter->batch[iter->end_sk++] = start_sk; + + for (sk = sk_next(start_sk); sk; sk = sk_next(sk)) { + if (sock_net(sk) != seq_file_net(seq)) + continue; + + if (iter->end_sk < iter->max_sk) { + sock_hold(sk); + iter->batch[iter->end_sk++] = sk; + } + + expected++; + } + + spin_unlock(&unix_table_locks[start_sk->sk_hash]); + + return expected; +} + +static void bpf_iter_unix_put_batch(struct bpf_unix_iter_state *iter) +{ + while (iter->cur_sk < iter->end_sk) + sock_put(iter->batch[iter->cur_sk++]); +} + +static int bpf_iter_unix_realloc_batch(struct bpf_unix_iter_state *iter, + unsigned int new_batch_sz) +{ + struct sock **new_batch; + + new_batch = kvmalloc(sizeof(*new_batch) * new_batch_sz, + GFP_USER | __GFP_NOWARN); + if (!new_batch) + return -ENOMEM; + + bpf_iter_unix_put_batch(iter); + kvfree(iter->batch); + iter->batch = new_batch; + iter->max_sk = new_batch_sz; + + return 0; +} + +static struct sock *bpf_iter_unix_batch(struct seq_file *seq, + loff_t *pos) +{ + struct bpf_unix_iter_state *iter = seq->private; + unsigned int expected; + bool resized = false; + struct sock *sk; + + if (iter->st_bucket_done) + *pos = set_bucket_offset(get_bucket(*pos) + 1, 1); + +again: + /* Get a new batch */ + iter->cur_sk = 0; + iter->end_sk = 0; + + sk = unix_get_first(seq, pos); + if (!sk) + return NULL; /* Done */ + + expected = bpf_iter_unix_hold_batch(seq, sk); + + if (iter->end_sk == expected) { + iter->st_bucket_done = true; + return sk; + } + + if (!resized && !bpf_iter_unix_realloc_batch(iter, expected * 3 / 2)) { + resized = true; + goto again; + } + + return sk; +} + +static void *bpf_iter_unix_seq_start(struct seq_file *seq, loff_t *pos) +{ + if (!*pos) + return SEQ_START_TOKEN; + + /* bpf iter does not support lseek, so it always + * continue from where it was stop()-ped. + */ + return bpf_iter_unix_batch(seq, pos); +} + +static void *bpf_iter_unix_seq_next(struct seq_file *seq, void *v, loff_t *pos) +{ + struct bpf_unix_iter_state *iter = seq->private; + struct sock *sk; + + /* Whenever seq_next() is called, the iter->cur_sk is + * done with seq_show(), so advance to the next sk in + * the batch. + */ + if (iter->cur_sk < iter->end_sk) + sock_put(iter->batch[iter->cur_sk++]); + + ++*pos; + + if (iter->cur_sk < iter->end_sk) + sk = iter->batch[iter->cur_sk]; + else + sk = bpf_iter_unix_batch(seq, pos); + + return sk; +} + static int bpf_iter_unix_seq_show(struct seq_file *seq, void *v) { struct bpf_iter_meta meta; struct bpf_prog *prog; struct sock *sk = v; uid_t uid; + bool slow; + int ret; if (v == SEQ_START_TOKEN) return 0; + slow = lock_sock_fast(sk); + + if (unlikely(sk_unhashed(sk))) { + ret = SEQ_SKIP; + goto unlock; + } + uid = from_kuid_munged(seq_user_ns(seq), sock_i_uid(sk)); meta.seq = seq; prog = bpf_iter_get_info(&meta, false); - return unix_prog_seq_show(prog, &meta, v, uid); + ret = unix_prog_seq_show(prog, &meta, v, uid); +unlock: + unlock_sock_fast(sk, slow); + return ret; } static void bpf_iter_unix_seq_stop(struct seq_file *seq, void *v) { + struct bpf_unix_iter_state *iter = seq->private; struct bpf_iter_meta meta; struct bpf_prog *prog; @@ -3393,12 +3543,13 @@ static void bpf_iter_unix_seq_stop(struct seq_file *seq, void *v) (void)unix_prog_seq_show(prog, &meta, v, 0); } - unix_seq_stop(seq, v); + if (iter->cur_sk < iter->end_sk) + bpf_iter_unix_put_batch(iter); } static const struct seq_operations bpf_iter_unix_seq_ops = { - .start = unix_seq_start, - .next = unix_seq_next, + .start = bpf_iter_unix_seq_start, + .next = bpf_iter_unix_seq_next, .stop = bpf_iter_unix_seq_stop, .show = bpf_iter_unix_seq_show, }; @@ -3447,13 +3598,55 @@ static struct pernet_operations unix_net_ops = { DEFINE_BPF_ITER_FUNC(unix, struct bpf_iter_meta *meta, struct unix_sock *unix_sk, uid_t uid) +#define INIT_BATCH_SZ 16 + +static int bpf_iter_init_unix(void *priv_data, struct bpf_iter_aux_info *aux) +{ + struct bpf_unix_iter_state *iter = priv_data; + int err; + + err = bpf_iter_init_seq_net(priv_data, aux); + if (err) + return err; + + err = bpf_iter_unix_realloc_batch(iter, INIT_BATCH_SZ); + if (err) { + bpf_iter_fini_seq_net(priv_data); + return err; + } + + return 0; +} + +static void bpf_iter_fini_unix(void *priv_data) +{ + struct bpf_unix_iter_state *iter = priv_data; + + bpf_iter_fini_seq_net(priv_data); + kvfree(iter->batch); +} + static const struct bpf_iter_seq_info unix_seq_info = { .seq_ops = &bpf_iter_unix_seq_ops, - .init_seq_private = bpf_iter_init_seq_net, - .fini_seq_private = bpf_iter_fini_seq_net, - .seq_priv_size = sizeof(struct seq_net_private), + .init_seq_private = bpf_iter_init_unix, + .fini_seq_private = bpf_iter_fini_unix, + .seq_priv_size = sizeof(struct bpf_unix_iter_state), }; +static const struct bpf_func_proto * +bpf_iter_unix_get_func_proto(enum bpf_func_id func_id, + const struct bpf_prog *prog) +{ + switch (func_id) { + case BPF_FUNC_setsockopt: + return &bpf_sk_setsockopt_proto; + case BPF_FUNC_getsockopt: + return &bpf_sk_getsockopt_proto; + default: + return NULL; + } +} + static struct bpf_iter_reg unix_reg_info = { .target = "unix", .ctx_arg_info_size = 1, @@ -3461,6 +3654,7 @@ static struct bpf_iter_reg unix_reg_info = { { offsetof(struct bpf_iter__unix, unix_sk), PTR_TO_BTF_ID_OR_NULL }, }, + .get_func_proto = bpf_iter_unix_get_func_proto, .seq_info = &unix_seq_info, }; diff --git a/samples/bpf/xdp1_user.c b/samples/bpf/xdp1_user.c index 8675fa5273df..3ec8ad9c1750 100644 --- a/samples/bpf/xdp1_user.c +++ b/samples/bpf/xdp1_user.c @@ -26,12 +26,12 @@ static void int_exit(int sig) { __u32 curr_prog_id = 0; - if (bpf_get_link_xdp_id(ifindex, &curr_prog_id, xdp_flags)) { - printf("bpf_get_link_xdp_id failed\n"); + if (bpf_xdp_query_id(ifindex, xdp_flags, &curr_prog_id)) { + printf("bpf_xdp_query_id failed\n"); exit(1); } if (prog_id == curr_prog_id) - bpf_set_link_xdp_fd(ifindex, -1, xdp_flags); + bpf_xdp_detach(ifindex, xdp_flags, NULL); else if (!curr_prog_id) printf("couldn't find a prog id on a given interface\n"); else @@ -143,7 +143,7 @@ int main(int argc, char **argv) signal(SIGINT, int_exit); signal(SIGTERM, int_exit); - if (bpf_set_link_xdp_fd(ifindex, prog_fd, xdp_flags) < 0) { + if (bpf_xdp_attach(ifindex, prog_fd, xdp_flags, NULL) < 0) { printf("link set xdp fd failed\n"); return 1; } diff --git a/samples/bpf/xdp_adjust_tail_user.c b/samples/bpf/xdp_adjust_tail_user.c index a70b094c8ec5..6c61d5f570fb 100644 --- a/samples/bpf/xdp_adjust_tail_user.c +++ b/samples/bpf/xdp_adjust_tail_user.c @@ -34,12 +34,12 @@ static void int_exit(int sig) __u32 curr_prog_id = 0; if (ifindex > -1) { - if (bpf_get_link_xdp_id(ifindex, &curr_prog_id, xdp_flags)) { - printf("bpf_get_link_xdp_id failed\n"); + if (bpf_xdp_query_id(ifindex, xdp_flags, &curr_prog_id)) { + printf("bpf_xdp_query_id failed\n"); exit(1); } if (prog_id == curr_prog_id) - bpf_set_link_xdp_fd(ifindex, -1, xdp_flags); + bpf_xdp_detach(ifindex, xdp_flags, NULL); else if (!curr_prog_id) printf("couldn't find a prog id on a given iface\n"); else @@ -173,7 +173,7 @@ int main(int argc, char **argv) signal(SIGINT, int_exit); signal(SIGTERM, int_exit); - if (bpf_set_link_xdp_fd(ifindex, prog_fd, xdp_flags) < 0) { + if (bpf_xdp_attach(ifindex, prog_fd, xdp_flags, NULL) < 0) { printf("link set xdp fd failed\n"); return 1; } diff --git a/samples/bpf/xdp_fwd_user.c b/samples/bpf/xdp_fwd_user.c index 4ad896782f77..79ccd9891924 100644 --- a/samples/bpf/xdp_fwd_user.c +++ b/samples/bpf/xdp_fwd_user.c @@ -33,7 +33,7 @@ static int do_attach(int idx, int prog_fd, int map_fd, const char *name) { int err; - err = bpf_set_link_xdp_fd(idx, prog_fd, xdp_flags); + err = bpf_xdp_attach(idx, prog_fd, xdp_flags, NULL); if (err < 0) { printf("ERROR: failed to attach program to %s\n", name); return err; @@ -51,7 +51,7 @@ static int do_detach(int idx, const char *name) { int err; - err = bpf_set_link_xdp_fd(idx, -1, xdp_flags); + err = bpf_xdp_detach(idx, xdp_flags, NULL); if (err < 0) printf("ERROR: failed to detach program from %s\n", name); diff --git a/samples/bpf/xdp_router_ipv4_user.c b/samples/bpf/xdp_router_ipv4_user.c index cfaf7e50e431..2d565ba54b8c 100644 --- a/samples/bpf/xdp_router_ipv4_user.c +++ b/samples/bpf/xdp_router_ipv4_user.c @@ -43,13 +43,13 @@ static void int_exit(int sig) int i = 0; for (i = 0; i < total_ifindex; i++) { - if (bpf_get_link_xdp_id(ifindex_list[i], &prog_id, flags)) { - printf("bpf_get_link_xdp_id on iface %d failed\n", + if (bpf_xdp_query_id(ifindex_list[i], flags, &prog_id)) { + printf("bpf_xdp_query_id on iface %d failed\n", ifindex_list[i]); exit(1); } if (prog_id_list[i] == prog_id) - bpf_set_link_xdp_fd(ifindex_list[i], -1, flags); + bpf_xdp_detach(ifindex_list[i], flags, NULL); else if (!prog_id) printf("couldn't find a prog id on iface %d\n", ifindex_list[i]); @@ -716,12 +716,12 @@ int main(int ac, char **argv) } prog_id_list = (__u32 *)calloc(total_ifindex, sizeof(__u32 *)); for (i = 0; i < total_ifindex; i++) { - if (bpf_set_link_xdp_fd(ifindex_list[i], prog_fd, flags) < 0) { + if (bpf_xdp_attach(ifindex_list[i], prog_fd, flags, NULL) < 0) { printf("link set xdp fd failed\n"); int recovery_index = i; for (i = 0; i < recovery_index; i++) - bpf_set_link_xdp_fd(ifindex_list[i], -1, flags); + bpf_xdp_detach(ifindex_list[i], flags, NULL); return 1; } diff --git a/samples/bpf/xdp_rxq_info_user.c b/samples/bpf/xdp_rxq_info_user.c index 74a2926eba08..fb2532d13aac 100644 --- a/samples/bpf/xdp_rxq_info_user.c +++ b/samples/bpf/xdp_rxq_info_user.c @@ -62,15 +62,15 @@ static void int_exit(int sig) __u32 curr_prog_id = 0; if (ifindex > -1) { - if (bpf_get_link_xdp_id(ifindex, &curr_prog_id, xdp_flags)) { - printf("bpf_get_link_xdp_id failed\n"); + if (bpf_xdp_query_id(ifindex, xdp_flags, &curr_prog_id)) { + printf("bpf_xdp_query_id failed\n"); exit(EXIT_FAIL); } if (prog_id == curr_prog_id) { fprintf(stderr, "Interrupted: Removing XDP program on ifindex:%d device:%s\n", ifindex, ifname); - bpf_set_link_xdp_fd(ifindex, -1, xdp_flags); + bpf_xdp_detach(ifindex, xdp_flags, NULL); } else if (!curr_prog_id) { printf("couldn't find a prog id on a given iface\n"); } else { @@ -209,7 +209,7 @@ static struct datarec *alloc_record_per_cpu(void) static struct record *alloc_record_per_rxq(void) { - unsigned int nr_rxqs = bpf_map__def(rx_queue_index_map)->max_entries; + unsigned int nr_rxqs = bpf_map__max_entries(rx_queue_index_map); struct record *array; array = calloc(nr_rxqs, sizeof(struct record)); @@ -222,7 +222,7 @@ static struct record *alloc_record_per_rxq(void) static struct stats_record *alloc_stats_record(void) { - unsigned int nr_rxqs = bpf_map__def(rx_queue_index_map)->max_entries; + unsigned int nr_rxqs = bpf_map__max_entries(rx_queue_index_map); struct stats_record *rec; int i; @@ -241,7 +241,7 @@ static struct stats_record *alloc_stats_record(void) static void free_stats_record(struct stats_record *r) { - unsigned int nr_rxqs = bpf_map__def(rx_queue_index_map)->max_entries; + unsigned int nr_rxqs = bpf_map__max_entries(rx_queue_index_map); int i; for (i = 0; i < nr_rxqs; i++) @@ -289,7 +289,7 @@ static void stats_collect(struct stats_record *rec) map_collect_percpu(fd, 0, &rec->stats); fd = bpf_map__fd(rx_queue_index_map); - max_rxqs = bpf_map__def(rx_queue_index_map)->max_entries; + max_rxqs = bpf_map__max_entries(rx_queue_index_map); for (i = 0; i < max_rxqs; i++) map_collect_percpu(fd, i, &rec->rxq[i]); } @@ -335,7 +335,7 @@ static void stats_print(struct stats_record *stats_rec, struct stats_record *stats_prev, int action, __u32 cfg_opt) { - unsigned int nr_rxqs = bpf_map__def(rx_queue_index_map)->max_entries; + unsigned int nr_rxqs = bpf_map__max_entries(rx_queue_index_map); unsigned int nr_cpus = bpf_num_possible_cpus(); double pps = 0, err = 0; struct record *rec, *prev; @@ -582,7 +582,7 @@ int main(int argc, char **argv) signal(SIGINT, int_exit); signal(SIGTERM, int_exit); - if (bpf_set_link_xdp_fd(ifindex, prog_fd, xdp_flags) < 0) { + if (bpf_xdp_attach(ifindex, prog_fd, xdp_flags, NULL) < 0) { fprintf(stderr, "link set xdp fd failed\n"); return EXIT_FAIL_XDP; } diff --git a/samples/bpf/xdp_sample_pkts_user.c b/samples/bpf/xdp_sample_pkts_user.c index 587eacb49103..0a2b3e997aed 100644 --- a/samples/bpf/xdp_sample_pkts_user.c +++ b/samples/bpf/xdp_sample_pkts_user.c @@ -30,7 +30,7 @@ static int do_attach(int idx, int fd, const char *name) __u32 info_len = sizeof(info); int err; - err = bpf_set_link_xdp_fd(idx, fd, xdp_flags); + err = bpf_xdp_attach(idx, fd, xdp_flags, NULL); if (err < 0) { printf("ERROR: failed to attach program to %s\n", name); return err; @@ -51,13 +51,13 @@ static int do_detach(int idx, const char *name) __u32 curr_prog_id = 0; int err = 0; - err = bpf_get_link_xdp_id(idx, &curr_prog_id, xdp_flags); + err = bpf_xdp_query_id(idx, xdp_flags, &curr_prog_id); if (err) { - printf("bpf_get_link_xdp_id failed\n"); + printf("bpf_xdp_query_id failed\n"); return err; } if (prog_id == curr_prog_id) { - err = bpf_set_link_xdp_fd(idx, -1, xdp_flags); + err = bpf_xdp_detach(idx, xdp_flags, NULL); if (err < 0) printf("ERROR: failed to detach prog from %s\n", name); } else if (!curr_prog_id) { diff --git a/samples/bpf/xdp_sample_user.c b/samples/bpf/xdp_sample_user.c index 8740838e7767..ae70a7943d85 100644 --- a/samples/bpf/xdp_sample_user.c +++ b/samples/bpf/xdp_sample_user.c @@ -1265,7 +1265,7 @@ static int __sample_remove_xdp(int ifindex, __u32 prog_id, int xdp_flags) int ret; if (prog_id) { - ret = bpf_get_link_xdp_id(ifindex, &cur_prog_id, xdp_flags); + ret = bpf_xdp_query_id(ifindex, xdp_flags, &cur_prog_id); if (ret < 0) return -errno; @@ -1278,7 +1278,7 @@ static int __sample_remove_xdp(int ifindex, __u32 prog_id, int xdp_flags) } } - return bpf_set_link_xdp_fd(ifindex, -1, xdp_flags); + return bpf_xdp_detach(ifindex, xdp_flags, NULL); } int sample_install_xdp(struct bpf_program *xdp_prog, int ifindex, bool generic, @@ -1295,8 +1295,7 @@ int sample_install_xdp(struct bpf_program *xdp_prog, int ifindex, bool generic, xdp_flags |= !force ? XDP_FLAGS_UPDATE_IF_NOEXIST : 0; xdp_flags |= generic ? XDP_FLAGS_SKB_MODE : XDP_FLAGS_DRV_MODE; - ret = bpf_set_link_xdp_fd(ifindex, bpf_program__fd(xdp_prog), - xdp_flags); + ret = bpf_xdp_attach(ifindex, bpf_program__fd(xdp_prog), xdp_flags, NULL); if (ret < 0) { ret = -errno; fprintf(stderr, @@ -1308,7 +1307,7 @@ int sample_install_xdp(struct bpf_program *xdp_prog, int ifindex, bool generic, return ret; } - ret = bpf_get_link_xdp_id(ifindex, &prog_id, xdp_flags); + ret = bpf_xdp_query_id(ifindex, xdp_flags, &prog_id); if (ret < 0) { ret = -errno; fprintf(stderr, diff --git a/samples/bpf/xdp_tx_iptunnel_user.c b/samples/bpf/xdp_tx_iptunnel_user.c index 1d4f305d02aa..7370c03c96fc 100644 --- a/samples/bpf/xdp_tx_iptunnel_user.c +++ b/samples/bpf/xdp_tx_iptunnel_user.c @@ -32,12 +32,12 @@ static void int_exit(int sig) __u32 curr_prog_id = 0; if (ifindex > -1) { - if (bpf_get_link_xdp_id(ifindex, &curr_prog_id, xdp_flags)) { - printf("bpf_get_link_xdp_id failed\n"); + if (bpf_xdp_query_id(ifindex, xdp_flags, &curr_prog_id)) { + printf("bpf_xdp_query_id failed\n"); exit(1); } if (prog_id == curr_prog_id) - bpf_set_link_xdp_fd(ifindex, -1, xdp_flags); + bpf_xdp_detach(ifindex, xdp_flags, NULL); else if (!curr_prog_id) printf("couldn't find a prog id on a given iface\n"); else @@ -288,7 +288,7 @@ int main(int argc, char **argv) } } - if (bpf_set_link_xdp_fd(ifindex, prog_fd, xdp_flags) < 0) { + if (bpf_xdp_attach(ifindex, prog_fd, xdp_flags, NULL) < 0) { printf("link set xdp fd failed\n"); return 1; } @@ -302,7 +302,7 @@ int main(int argc, char **argv) poll_stats(kill_after_s); - bpf_set_link_xdp_fd(ifindex, -1, xdp_flags); + bpf_xdp_detach(ifindex, xdp_flags, NULL); return 0; } diff --git a/samples/bpf/xdpsock_ctrl_proc.c b/samples/bpf/xdpsock_ctrl_proc.c index cc4408797ab7..28b5f2a9fa08 100644 --- a/samples/bpf/xdpsock_ctrl_proc.c +++ b/samples/bpf/xdpsock_ctrl_proc.c @@ -173,7 +173,7 @@ main(int argc, char **argv) unlink(SOCKET_NAME); /* Unset fd for given ifindex */ - err = bpf_set_link_xdp_fd(ifindex, -1, 0); + err = bpf_xdp_detach(ifindex, 0, NULL); if (err) { fprintf(stderr, "Error when unsetting bpf prog_fd for ifindex(%d)\n", ifindex); return err; diff --git a/samples/bpf/xdpsock_user.c b/samples/bpf/xdpsock_user.c index aa50864e4415..19288a2bbc75 100644 --- a/samples/bpf/xdpsock_user.c +++ b/samples/bpf/xdpsock_user.c @@ -571,13 +571,13 @@ static void remove_xdp_program(void) { u32 curr_prog_id = 0; - if (bpf_get_link_xdp_id(opt_ifindex, &curr_prog_id, opt_xdp_flags)) { - printf("bpf_get_link_xdp_id failed\n"); + if (bpf_xdp_query_id(opt_ifindex, opt_xdp_flags, &curr_prog_id)) { + printf("bpf_xdp_query_id failed\n"); exit(EXIT_FAILURE); } if (prog_id == curr_prog_id) - bpf_set_link_xdp_fd(opt_ifindex, -1, opt_xdp_flags); + bpf_xdp_detach(opt_ifindex, opt_xdp_flags, NULL); else if (!curr_prog_id) printf("couldn't find a prog id on a given interface\n"); else @@ -1027,7 +1027,7 @@ static struct xsk_socket_info *xsk_configure_socket(struct xsk_umem_info *umem, if (ret) exit_with_error(-ret); - ret = bpf_get_link_xdp_id(opt_ifindex, &prog_id, opt_xdp_flags); + ret = bpf_xdp_query_id(opt_ifindex, opt_xdp_flags, &prog_id); if (ret) exit_with_error(-ret); @@ -1760,7 +1760,7 @@ static void load_xdp_program(char **argv, struct bpf_object **obj) exit(EXIT_FAILURE); } - if (bpf_set_link_xdp_fd(opt_ifindex, prog_fd, opt_xdp_flags) < 0) { + if (bpf_xdp_attach(opt_ifindex, prog_fd, opt_xdp_flags, NULL) < 0) { fprintf(stderr, "ERROR: link set xdp fd failed\n"); exit(EXIT_FAILURE); } diff --git a/samples/bpf/xsk_fwd.c b/samples/bpf/xsk_fwd.c index 52e7c4ffd228..2220509588a0 100644 --- a/samples/bpf/xsk_fwd.c +++ b/samples/bpf/xsk_fwd.c @@ -974,8 +974,8 @@ static void remove_xdp_program(void) int i; for (i = 0 ; i < n_ports; i++) - bpf_set_link_xdp_fd(if_nametoindex(port_params[i].iface), -1, - port_params[i].xsk_cfg.xdp_flags); + bpf_xdp_detach(if_nametoindex(port_params[i].iface), + port_params[i].xsk_cfg.xdp_flags, NULL); } int main(int argc, char **argv) diff --git a/scripts/bpf_doc.py b/scripts/bpf_doc.py index a6403ddf5de7..096625242475 100755 --- a/scripts/bpf_doc.py +++ b/scripts/bpf_doc.py @@ -87,21 +87,25 @@ class HeaderParser(object): self.line = '' self.helpers = [] self.commands = [] + self.desc_unique_helpers = set() + self.define_unique_helpers = [] + self.desc_syscalls = [] + self.enum_syscalls = [] def parse_element(self): proto = self.parse_symbol() - desc = self.parse_desc() - ret = self.parse_ret() + desc = self.parse_desc(proto) + ret = self.parse_ret(proto) return APIElement(proto=proto, desc=desc, ret=ret) def parse_helper(self): proto = self.parse_proto() - desc = self.parse_desc() - ret = self.parse_ret() + desc = self.parse_desc(proto) + ret = self.parse_ret(proto) return Helper(proto=proto, desc=desc, ret=ret) def parse_symbol(self): - p = re.compile(' \* ?(.+)$') + p = re.compile(' \* ?(BPF\w+)$') capture = p.match(self.line) if not capture: raise NoSyscallCommandFound @@ -127,16 +131,15 @@ class HeaderParser(object): self.line = self.reader.readline() return capture.group(1) - def parse_desc(self): + def parse_desc(self, proto): p = re.compile(' \* ?(?:\t| {5,8})Description$') capture = p.match(self.line) if not capture: - # Helper can have empty description and we might be parsing another - # attribute: return but do not consume. - return '' + raise Exception("No description section found for " + proto) # Description can be several lines, some of them possibly empty, and it # stops when another subsection title is met. desc = '' + desc_present = False while True: self.line = self.reader.readline() if self.line == ' *\n': @@ -145,21 +148,24 @@ class HeaderParser(object): p = re.compile(' \* ?(?:\t| {5,8})(?:\t| {8})(.*)') capture = p.match(self.line) if capture: + desc_present = True desc += capture.group(1) + '\n' else: break + + if not desc_present: + raise Exception("No description found for " + proto) return desc - def parse_ret(self): + def parse_ret(self, proto): p = re.compile(' \* ?(?:\t| {5,8})Return$') capture = p.match(self.line) if not capture: - # Helper can have empty retval and we might be parsing another - # attribute: return but do not consume. - return '' + raise Exception("No return section found for " + proto) # Return value description can be several lines, some of them possibly # empty, and it stops when another subsection title is met. ret = '' + ret_present = False while True: self.line = self.reader.readline() if self.line == ' *\n': @@ -168,44 +174,101 @@ class HeaderParser(object): p = re.compile(' \* ?(?:\t| {5,8})(?:\t| {8})(.*)') capture = p.match(self.line) if capture: + ret_present = True ret += capture.group(1) + '\n' else: break + + if not ret_present: + raise Exception("No return found for " + proto) return ret - def seek_to(self, target, help_message): + def seek_to(self, target, help_message, discard_lines = 1): self.reader.seek(0) offset = self.reader.read().find(target) if offset == -1: raise Exception(help_message) self.reader.seek(offset) self.reader.readline() - self.reader.readline() + for _ in range(discard_lines): + self.reader.readline() self.line = self.reader.readline() - def parse_syscall(self): + def parse_desc_syscall(self): self.seek_to('* DOC: eBPF Syscall Commands', 'Could not find start of eBPF syscall descriptions list') while True: try: command = self.parse_element() self.commands.append(command) + self.desc_syscalls.append(command.proto) + except NoSyscallCommandFound: break - def parse_helpers(self): + def parse_enum_syscall(self): + self.seek_to('enum bpf_cmd {', + 'Could not find start of bpf_cmd enum', 0) + # Searches for either one or more BPF\w+ enums + bpf_p = re.compile('\s*(BPF\w+)+') + # Searches for an enum entry assigned to another entry, + # for e.g. BPF_PROG_RUN = BPF_PROG_TEST_RUN, which is + # not documented hence should be skipped in check to + # determine if the right number of syscalls are documented + assign_p = re.compile('\s*(BPF\w+)\s*=\s*(BPF\w+)') + bpf_cmd_str = '' + while True: + capture = assign_p.match(self.line) + if capture: + # Skip line if an enum entry is assigned to another entry + self.line = self.reader.readline() + continue + capture = bpf_p.match(self.line) + if capture: + bpf_cmd_str += self.line + else: + break + self.line = self.reader.readline() + # Find the number of occurences of BPF\w+ + self.enum_syscalls = re.findall('(BPF\w+)+', bpf_cmd_str) + + def parse_desc_helpers(self): self.seek_to('* Start of BPF helper function descriptions:', 'Could not find start of eBPF helper descriptions list') while True: try: helper = self.parse_helper() self.helpers.append(helper) + proto = helper.proto_break_down() + self.desc_unique_helpers.add(proto['name']) except NoHelperFound: break + def parse_define_helpers(self): + # Parse the number of FN(...) in #define __BPF_FUNC_MAPPER to compare + # later with the number of unique function names present in description. + # Note: seek_to(..) discards the first line below the target search text, + # resulting in FN(unspec) being skipped and not added to self.define_unique_helpers. + self.seek_to('#define __BPF_FUNC_MAPPER(FN)', + 'Could not find start of eBPF helper definition list') + # Searches for either one or more FN(\w+) defines or a backslash for newline + p = re.compile('\s*(FN\(\w+\))+|\\\\') + fn_defines_str = '' + while True: + capture = p.match(self.line) + if capture: + fn_defines_str += self.line + else: + break + self.line = self.reader.readline() + # Find the number of occurences of FN(\w+) + self.define_unique_helpers = re.findall('FN\(\w+\)', fn_defines_str) + def run(self): - self.parse_syscall() - self.parse_helpers() + self.parse_desc_syscall() + self.parse_enum_syscall() + self.parse_desc_helpers() + self.parse_define_helpers() self.reader.close() ############################################################################### @@ -235,6 +298,25 @@ class Printer(object): self.print_one(elem) self.print_footer() + def elem_number_check(self, desc_unique_elem, define_unique_elem, type, instance): + """ + Checks the number of helpers/syscalls documented within the header file + description with those defined as part of enum/macro and raise an + Exception if they don't match. + """ + nr_desc_unique_elem = len(desc_unique_elem) + nr_define_unique_elem = len(define_unique_elem) + if nr_desc_unique_elem != nr_define_unique_elem: + exception_msg = ''' +The number of unique %s in description (%d) doesn\'t match the number of unique %s defined in %s (%d) +''' % (type, nr_desc_unique_elem, type, instance, nr_define_unique_elem) + if nr_desc_unique_elem < nr_define_unique_elem: + # Function description is parsed until no helper is found (which can be due to + # misformatting). Hence, only print the first missing/misformatted helper/enum. + exception_msg += ''' +The description for %s is not present or formatted correctly. +''' % (define_unique_elem[nr_desc_unique_elem]) + raise Exception(exception_msg) class PrinterRST(Printer): """ @@ -295,7 +377,6 @@ class PrinterRST(Printer): print('') - class PrinterHelpersRST(PrinterRST): """ A printer for dumping collected information about helpers as a ReStructured @@ -305,6 +386,7 @@ class PrinterHelpersRST(PrinterRST): """ def __init__(self, parser): self.elements = parser.helpers + self.elem_number_check(parser.desc_unique_helpers, parser.define_unique_helpers, 'helper', '__BPF_FUNC_MAPPER') def print_header(self): header = '''\ @@ -478,6 +560,7 @@ class PrinterSyscallRST(PrinterRST): """ def __init__(self, parser): self.elements = parser.commands + self.elem_number_check(parser.desc_syscalls, parser.enum_syscalls, 'syscall', 'bpf_cmd') def print_header(self): header = '''\ @@ -509,6 +592,7 @@ class PrinterHelpers(Printer): """ def __init__(self, parser): self.elements = parser.helpers + self.elem_number_check(parser.desc_unique_helpers, parser.define_unique_helpers, 'helper', '__BPF_FUNC_MAPPER') type_fwds = [ 'struct bpf_fib_lookup', diff --git a/security/device_cgroup.c b/security/device_cgroup.c index 842889f3dcb7..a9f8c63a96d1 100644 --- a/security/device_cgroup.c +++ b/security/device_cgroup.c @@ -838,7 +838,7 @@ int devcgroup_check_permission(short type, u32 major, u32 minor, short access) int rc = BPF_CGROUP_RUN_PROG_DEVICE_CGROUP(type, major, minor, access); if (rc) - return -EPERM; + return rc; #ifdef CONFIG_CGROUP_DEVICE return devcgroup_legacy_check_permission(type, major, minor, access); diff --git a/tools/bpf/bpftool/btf.c b/tools/bpf/bpftool/btf.c index 59833125ac0a..a2c665beda87 100644 --- a/tools/bpf/bpftool/btf.c +++ b/tools/bpf/bpftool/btf.c @@ -902,7 +902,7 @@ static int do_show(int argc, char **argv) equal_fn_for_key_as_id, NULL); btf_map_table = hashmap__new(hash_fn_for_key_as_id, equal_fn_for_key_as_id, NULL); - if (!btf_prog_table || !btf_map_table) { + if (IS_ERR(btf_prog_table) || IS_ERR(btf_map_table)) { hashmap__free(btf_prog_table); hashmap__free(btf_map_table); if (fd >= 0) diff --git a/tools/bpf/bpftool/cgroup.c b/tools/bpf/bpftool/cgroup.c index 3571a281c43f..effe136119d7 100644 --- a/tools/bpf/bpftool/cgroup.c +++ b/tools/bpf/bpftool/cgroup.c @@ -50,6 +50,7 @@ static int show_bpf_prog(int id, enum bpf_attach_type attach_type, const char *attach_flags_str, int level) { + char prog_name[MAX_PROG_FULL_NAME]; struct bpf_prog_info info = {}; __u32 info_len = sizeof(info); int prog_fd; @@ -63,6 +64,7 @@ static int show_bpf_prog(int id, enum bpf_attach_type attach_type, return -1; } + get_prog_full_name(&info, prog_fd, prog_name, sizeof(prog_name)); if (json_output) { jsonw_start_object(json_wtr); jsonw_uint_field(json_wtr, "id", info.id); @@ -73,7 +75,7 @@ static int show_bpf_prog(int id, enum bpf_attach_type attach_type, jsonw_uint_field(json_wtr, "attach_type", attach_type); jsonw_string_field(json_wtr, "attach_flags", attach_flags_str); - jsonw_string_field(json_wtr, "name", info.name); + jsonw_string_field(json_wtr, "name", prog_name); jsonw_end_object(json_wtr); } else { printf("%s%-8u ", level ? " " : "", info.id); @@ -81,7 +83,7 @@ static int show_bpf_prog(int id, enum bpf_attach_type attach_type, printf("%-15s", attach_type_name[attach_type]); else printf("type %-10u", attach_type); - printf(" %-15s %-15s\n", attach_flags_str, info.name); + printf(" %-15s %-15s\n", attach_flags_str, prog_name); } close(prog_fd); diff --git a/tools/bpf/bpftool/common.c b/tools/bpf/bpftool/common.c index fa8eb8134344..111dff809c7b 100644 --- a/tools/bpf/bpftool/common.c +++ b/tools/bpf/bpftool/common.c @@ -24,6 +24,7 @@ #include <bpf/bpf.h> #include <bpf/hashmap.h> #include <bpf/libbpf.h> /* libbpf_num_possible_cpus */ +#include <bpf/btf.h> #include "main.h" @@ -304,6 +305,49 @@ const char *get_fd_type_name(enum bpf_obj_type type) return names[type]; } +void get_prog_full_name(const struct bpf_prog_info *prog_info, int prog_fd, + char *name_buff, size_t buff_len) +{ + const char *prog_name = prog_info->name; + const struct btf_type *func_type; + const struct bpf_func_info finfo; + struct bpf_prog_info info = {}; + __u32 info_len = sizeof(info); + struct btf *prog_btf = NULL; + + if (buff_len <= BPF_OBJ_NAME_LEN || + strlen(prog_info->name) < BPF_OBJ_NAME_LEN - 1) + goto copy_name; + + if (!prog_info->btf_id || prog_info->nr_func_info == 0) + goto copy_name; + + info.nr_func_info = 1; + info.func_info_rec_size = prog_info->func_info_rec_size; + if (info.func_info_rec_size > sizeof(finfo)) + info.func_info_rec_size = sizeof(finfo); + info.func_info = ptr_to_u64(&finfo); + + if (bpf_obj_get_info_by_fd(prog_fd, &info, &info_len)) + goto copy_name; + + prog_btf = btf__load_from_kernel_by_id(info.btf_id); + if (!prog_btf) + goto copy_name; + + func_type = btf__type_by_id(prog_btf, finfo.type_id); + if (!func_type || !btf_is_func(func_type)) + goto copy_name; + + prog_name = btf__name_by_offset(prog_btf, func_type->name_off); + +copy_name: + snprintf(name_buff, buff_len, "%s", prog_name); + + if (prog_btf) + btf__free(prog_btf); +} + int get_fd_type(int fd) { char path[PATH_MAX]; diff --git a/tools/bpf/bpftool/gen.c b/tools/bpf/bpftool/gen.c index b4695df2ea3d..43e3f8700ecc 100644 --- a/tools/bpf/bpftool/gen.c +++ b/tools/bpf/bpftool/gen.c @@ -227,7 +227,7 @@ static int codegen_datasecs(struct bpf_object *obj, const char *obj_name) /* only generate definitions for memory-mapped internal maps */ if (!bpf_map__is_internal(map)) continue; - if (!(bpf_map__def(map)->map_flags & BPF_F_MMAPABLE)) + if (!(bpf_map__map_flags(map) & BPF_F_MMAPABLE)) continue; if (!get_map_ident(map, map_ident, sizeof(map_ident))) @@ -468,7 +468,7 @@ static void codegen_destroy(struct bpf_object *obj, const char *obj_name) if (!get_map_ident(map, ident, sizeof(ident))) continue; if (bpf_map__is_internal(map) && - (bpf_map__def(map)->map_flags & BPF_F_MMAPABLE)) + (bpf_map__map_flags(map) & BPF_F_MMAPABLE)) printf("\tmunmap(skel->%1$s, %2$zd);\n", ident, bpf_map_mmap_sz(map)); codegen("\ @@ -536,7 +536,7 @@ static int gen_trace(struct bpf_object *obj, const char *obj_name, const char *h continue; if (!bpf_map__is_internal(map) || - !(bpf_map__def(map)->map_flags & BPF_F_MMAPABLE)) + !(bpf_map__map_flags(map) & BPF_F_MMAPABLE)) continue; codegen("\ @@ -600,10 +600,10 @@ static int gen_trace(struct bpf_object *obj, const char *obj_name, const char *h continue; if (!bpf_map__is_internal(map) || - !(bpf_map__def(map)->map_flags & BPF_F_MMAPABLE)) + !(bpf_map__map_flags(map) & BPF_F_MMAPABLE)) continue; - if (bpf_map__def(map)->map_flags & BPF_F_RDONLY_PROG) + if (bpf_map__map_flags(map) & BPF_F_RDONLY_PROG) mmap_flags = "PROT_READ"; else mmap_flags = "PROT_READ | PROT_WRITE"; @@ -927,7 +927,6 @@ static int do_skeleton(int argc, char **argv) s = (struct bpf_object_skeleton *)calloc(1, sizeof(*s));\n\ if (!s) \n\ goto err; \n\ - obj->skeleton = s; \n\ \n\ s->sz = sizeof(*s); \n\ s->name = \"%1$s\"; \n\ @@ -962,7 +961,7 @@ static int do_skeleton(int argc, char **argv) i, bpf_map__name(map), i, ident); /* memory-mapped internal maps */ if (bpf_map__is_internal(map) && - (bpf_map__def(map)->map_flags & BPF_F_MMAPABLE)) { + (bpf_map__map_flags(map) & BPF_F_MMAPABLE)) { printf("\ts->maps[%zu].mmaped = (void **)&obj->%s;\n", i, ident); } @@ -1000,6 +999,7 @@ static int do_skeleton(int argc, char **argv) \n\ s->data = (void *)%2$s__elf_bytes(&s->data_sz); \n\ \n\ + obj->skeleton = s; \n\ return 0; \n\ err: \n\ bpf_object__destroy_skeleton(s); \n\ diff --git a/tools/bpf/bpftool/link.c b/tools/bpf/bpftool/link.c index 2c258db0d352..97dec81950e5 100644 --- a/tools/bpf/bpftool/link.c +++ b/tools/bpf/bpftool/link.c @@ -2,6 +2,7 @@ /* Copyright (C) 2020 Facebook */ #include <errno.h> +#include <linux/err.h> #include <net/if.h> #include <stdio.h> #include <unistd.h> @@ -306,7 +307,7 @@ static int do_show(int argc, char **argv) if (show_pinned) { link_table = hashmap__new(hash_fn_for_key_as_id, equal_fn_for_key_as_id, NULL); - if (!link_table) { + if (IS_ERR(link_table)) { p_err("failed to create hashmap for pinned paths"); return -1; } diff --git a/tools/bpf/bpftool/main.c b/tools/bpf/bpftool/main.c index 020e91a542d5..9d01fa9de033 100644 --- a/tools/bpf/bpftool/main.c +++ b/tools/bpf/bpftool/main.c @@ -478,7 +478,14 @@ int main(int argc, char **argv) } if (!legacy_libbpf) { - ret = libbpf_set_strict_mode(LIBBPF_STRICT_ALL); + enum libbpf_strict_mode mode; + + /* Allow legacy map definitions for skeleton generation. + * It will still be rejected if users use LIBBPF_STRICT_ALL + * mode for loading generated skeleton. + */ + mode = (__LIBBPF_STRICT_LAST - 1) & ~LIBBPF_STRICT_MAP_DEFINITIONS; + ret = libbpf_set_strict_mode(mode); if (ret) p_err("failed to enable libbpf strict mode: %d", ret); } diff --git a/tools/bpf/bpftool/main.h b/tools/bpf/bpftool/main.h index 8d76d937a62b..0c3840596b5a 100644 --- a/tools/bpf/bpftool/main.h +++ b/tools/bpf/bpftool/main.h @@ -140,6 +140,10 @@ struct cmd { int cmd_select(const struct cmd *cmds, int argc, char **argv, int (*help)(int argc, char **argv)); +#define MAX_PROG_FULL_NAME 128 +void get_prog_full_name(const struct bpf_prog_info *prog_info, int prog_fd, + char *name_buff, size_t buff_len); + int get_fd_type(int fd); const char *get_fd_type_name(enum bpf_obj_type type); char *get_fdinfo(int fd, const char *key); diff --git a/tools/bpf/bpftool/map.c b/tools/bpf/bpftool/map.c index cc530a229812..c66a3c979b7a 100644 --- a/tools/bpf/bpftool/map.c +++ b/tools/bpf/bpftool/map.c @@ -699,7 +699,7 @@ static int do_show(int argc, char **argv) if (show_pinned) { map_table = hashmap__new(hash_fn_for_key_as_id, equal_fn_for_key_as_id, NULL); - if (!map_table) { + if (IS_ERR(map_table)) { p_err("failed to create hashmap for pinned paths"); return -1; } diff --git a/tools/bpf/bpftool/net.c b/tools/bpf/bpftool/net.c index 649053704bd7..526a332c48e6 100644 --- a/tools/bpf/bpftool/net.c +++ b/tools/bpf/bpftool/net.c @@ -551,7 +551,7 @@ static int do_attach_detach_xdp(int progfd, enum net_attach_type attach_type, if (attach_type == NET_ATTACH_TYPE_XDP_OFFLOAD) flags |= XDP_FLAGS_HW_MODE; - return bpf_set_link_xdp_fd(ifindex, progfd, flags); + return bpf_xdp_attach(ifindex, progfd, flags, NULL); } static int do_attach(int argc, char **argv) diff --git a/tools/bpf/bpftool/pids.c b/tools/bpf/bpftool/pids.c index 56b598eee043..7c384d10e95f 100644 --- a/tools/bpf/bpftool/pids.c +++ b/tools/bpf/bpftool/pids.c @@ -1,6 +1,7 @@ // SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) /* Copyright (C) 2020 Facebook */ #include <errno.h> +#include <linux/err.h> #include <stdbool.h> #include <stdio.h> #include <stdlib.h> @@ -101,7 +102,7 @@ int build_obj_refs_table(struct hashmap **map, enum bpf_obj_type type) libbpf_print_fn_t default_print; *map = hashmap__new(hash_fn_for_key_as_id, equal_fn_for_key_as_id, NULL); - if (!*map) { + if (IS_ERR(*map)) { p_err("failed to create hashmap for PID references"); return -1; } diff --git a/tools/bpf/bpftool/prog.c b/tools/bpf/bpftool/prog.c index 2a21d50516bc..cf935c63e6f5 100644 --- a/tools/bpf/bpftool/prog.c +++ b/tools/bpf/bpftool/prog.c @@ -424,8 +424,10 @@ out_free: free(value); } -static void print_prog_header_json(struct bpf_prog_info *info) +static void print_prog_header_json(struct bpf_prog_info *info, int fd) { + char prog_name[MAX_PROG_FULL_NAME]; + jsonw_uint_field(json_wtr, "id", info->id); if (info->type < ARRAY_SIZE(prog_type_name)) jsonw_string_field(json_wtr, "type", @@ -433,8 +435,10 @@ static void print_prog_header_json(struct bpf_prog_info *info) else jsonw_uint_field(json_wtr, "type", info->type); - if (*info->name) - jsonw_string_field(json_wtr, "name", info->name); + if (*info->name) { + get_prog_full_name(info, fd, prog_name, sizeof(prog_name)); + jsonw_string_field(json_wtr, "name", prog_name); + } jsonw_name(json_wtr, "tag"); jsonw_printf(json_wtr, "\"" BPF_TAG_FMT "\"", @@ -455,7 +459,7 @@ static void print_prog_json(struct bpf_prog_info *info, int fd) char *memlock; jsonw_start_object(json_wtr); - print_prog_header_json(info); + print_prog_header_json(info, fd); print_dev_json(info->ifindex, info->netns_dev, info->netns_ino); if (info->load_time) { @@ -507,16 +511,20 @@ static void print_prog_json(struct bpf_prog_info *info, int fd) jsonw_end_object(json_wtr); } -static void print_prog_header_plain(struct bpf_prog_info *info) +static void print_prog_header_plain(struct bpf_prog_info *info, int fd) { + char prog_name[MAX_PROG_FULL_NAME]; + printf("%u: ", info->id); if (info->type < ARRAY_SIZE(prog_type_name)) printf("%s ", prog_type_name[info->type]); else printf("type %u ", info->type); - if (*info->name) - printf("name %s ", info->name); + if (*info->name) { + get_prog_full_name(info, fd, prog_name, sizeof(prog_name)); + printf("name %s ", prog_name); + } printf("tag "); fprint_hex(stdout, info->tag, BPF_TAG_SIZE, ""); @@ -534,7 +542,7 @@ static void print_prog_plain(struct bpf_prog_info *info, int fd) { char *memlock; - print_prog_header_plain(info); + print_prog_header_plain(info, fd); if (info->load_time) { char buf[32]; @@ -641,7 +649,7 @@ static int do_show(int argc, char **argv) if (show_pinned) { prog_table = hashmap__new(hash_fn_for_key_as_id, equal_fn_for_key_as_id, NULL); - if (!prog_table) { + if (IS_ERR(prog_table)) { p_err("failed to create hashmap for pinned paths"); return -1; } @@ -972,10 +980,10 @@ static int do_dump(int argc, char **argv) if (json_output && nb_fds > 1) { jsonw_start_object(json_wtr); /* prog object */ - print_prog_header_json(&info); + print_prog_header_json(&info, fds[i]); jsonw_name(json_wtr, "insns"); } else if (nb_fds > 1) { - print_prog_header_plain(&info); + print_prog_header_plain(&info, fds[i]); } err = prog_dump(&info, mode, filepath, opcodes, visual, linum); diff --git a/tools/bpf/bpftool/struct_ops.c b/tools/bpf/bpftool/struct_ops.c index 2f693b082bdb..e08a6ff2866c 100644 --- a/tools/bpf/bpftool/struct_ops.c +++ b/tools/bpf/bpftool/struct_ops.c @@ -480,7 +480,6 @@ static int do_unregister(int argc, char **argv) static int do_register(int argc, char **argv) { LIBBPF_OPTS(bpf_object_open_opts, open_opts); - const struct bpf_map_def *def; struct bpf_map_info info = {}; __u32 info_len = sizeof(info); int nr_errs = 0, nr_maps = 0; @@ -510,8 +509,7 @@ static int do_register(int argc, char **argv) } bpf_object__for_each_map(map, obj) { - def = bpf_map__def(map); - if (def->type != BPF_MAP_TYPE_STRUCT_OPS) + if (bpf_map__type(map) != BPF_MAP_TYPE_STRUCT_OPS) continue; link = bpf_map__attach_struct_ops(map); diff --git a/tools/bpf/resolve_btfids/Makefile b/tools/bpf/resolve_btfids/Makefile index 9ddeca947635..a7f87cdf11da 100644 --- a/tools/bpf/resolve_btfids/Makefile +++ b/tools/bpf/resolve_btfids/Makefile @@ -20,6 +20,8 @@ LD = $(HOSTLD) ARCH = $(HOSTARCH) RM ?= rm CROSS_COMPILE = +CFLAGS := $(KBUILD_HOSTCFLAGS) +LDFLAGS := $(KBUILD_HOSTLDFLAGS) OUTPUT ?= $(srctree)/tools/bpf/resolve_btfids/ @@ -47,10 +49,10 @@ $(SUBCMDOBJ): fixdep FORCE | $(OUTPUT)/libsubcmd $(BPFOBJ): $(wildcard $(LIBBPF_SRC)/*.[ch] $(LIBBPF_SRC)/Makefile) | $(LIBBPF_OUT) $(Q)$(MAKE) $(submake_extras) -C $(LIBBPF_SRC) OUTPUT=$(LIBBPF_OUT) \ - DESTDIR=$(LIBBPF_DESTDIR) prefix= \ + DESTDIR=$(LIBBPF_DESTDIR) prefix= EXTRA_CFLAGS="$(CFLAGS)" \ $(abspath $@) install_headers -CFLAGS := -g \ +CFLAGS += -g \ -I$(srctree)/tools/include \ -I$(srctree)/tools/include/uapi \ -I$(LIBBPF_INCLUDE) \ diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h index b0383d371b9a..16a7574292a5 100644 --- a/tools/include/uapi/linux/bpf.h +++ b/tools/include/uapi/linux/bpf.h @@ -330,6 +330,8 @@ union bpf_iter_link_info { * *ctx_out*, *data_in* and *data_out* must be NULL. * *repeat* must be zero. * + * BPF_PROG_RUN is an alias for BPF_PROG_TEST_RUN. + * * Return * Returns zero on success. On error, -1 is returned and *errno* * is set appropriately. @@ -1111,6 +1113,11 @@ enum bpf_link_type { */ #define BPF_F_SLEEPABLE (1U << 4) +/* If BPF_F_XDP_HAS_FRAGS is used in BPF_PROG_LOAD command, the loaded program + * fully support xdp frags. + */ +#define BPF_F_XDP_HAS_FRAGS (1U << 5) + /* When BPF ldimm64's insn[0].src_reg != 0 then this can have * the following extensions: * @@ -1775,6 +1782,8 @@ union bpf_attr { * 0 on success, or a negative error in case of failure. * * u64 bpf_get_current_pid_tgid(void) + * Description + * Get the current pid and tgid. * Return * A 64-bit integer containing the current tgid and pid, and * created as such: @@ -1782,6 +1791,8 @@ union bpf_attr { * *current_task*\ **->pid**. * * u64 bpf_get_current_uid_gid(void) + * Description + * Get the current uid and gid. * Return * A 64-bit integer containing the current GID and UID, and * created as such: *current_gid* **<< 32 \|** *current_uid*. @@ -2256,6 +2267,8 @@ union bpf_attr { * The 32-bit hash. * * u64 bpf_get_current_task(void) + * Description + * Get the current task. * Return * A pointer to the current task struct. * @@ -2369,6 +2382,8 @@ union bpf_attr { * indicate that the hash is outdated and to trigger a * recalculation the next time the kernel tries to access this * hash or when the **bpf_get_hash_recalc**\ () helper is called. + * Return + * void. * * long bpf_get_numa_node_id(void) * Description @@ -2466,6 +2481,8 @@ union bpf_attr { * A 8-byte long unique number or 0 if *sk* is NULL. * * u32 bpf_get_socket_uid(struct sk_buff *skb) + * Description + * Get the owner UID of the socked associated to *skb*. * Return * The owner UID of the socket associated to *skb*. If the socket * is **NULL**, or if it is not a full socket (i.e. if it is a @@ -3240,6 +3257,9 @@ union bpf_attr { * The id is returned or 0 in case the id could not be retrieved. * * u64 bpf_get_current_cgroup_id(void) + * Description + * Get the current cgroup id based on the cgroup within which + * the current task is running. * Return * A 64-bit integer containing the current cgroup id based * on the cgroup within which the current task is running. @@ -5018,6 +5038,44 @@ union bpf_attr { * * Return * The number of arguments of the traced function. + * + * int bpf_get_retval(void) + * Description + * Get the syscall's return value that will be returned to userspace. + * + * This helper is currently supported by cgroup programs only. + * Return + * The syscall's return value. + * + * int bpf_set_retval(int retval) + * Description + * Set the syscall's return value that will be returned to userspace. + * + * This helper is currently supported by cgroup programs only. + * Return + * 0 on success, or a negative error in case of failure. + * + * u64 bpf_xdp_get_buff_len(struct xdp_buff *xdp_md) + * Description + * Get the total size of a given xdp buff (linear and paged area) + * Return + * The total size of a given xdp buffer. + * + * long bpf_xdp_load_bytes(struct xdp_buff *xdp_md, u32 offset, void *buf, u32 len) + * Description + * This helper is provided as an easy way to load data from a + * xdp buffer. It can be used to load *len* bytes from *offset* from + * the frame associated to *xdp_md*, into the buffer pointed by + * *buf*. + * Return + * 0 on success, or a negative error in case of failure. + * + * long bpf_xdp_store_bytes(struct xdp_buff *xdp_md, u32 offset, void *buf, u32 len) + * Description + * Store *len* bytes from buffer *buf* into the frame + * associated to *xdp_md*, at *offset*. + * Return + * 0 on success, or a negative error in case of failure. */ #define __BPF_FUNC_MAPPER(FN) \ FN(unspec), \ @@ -5206,6 +5264,11 @@ union bpf_attr { FN(get_func_arg), \ FN(get_func_ret), \ FN(get_func_arg_cnt), \ + FN(get_retval), \ + FN(set_retval), \ + FN(xdp_get_buff_len), \ + FN(xdp_load_bytes), \ + FN(xdp_store_bytes), \ /* */ /* integer value in 'imm' field of BPF_CALL instruction selects which helper diff --git a/tools/lib/bpf/bpf.c b/tools/lib/bpf/bpf.c index 550b4cbb6c99..418b259166f8 100644 --- a/tools/lib/bpf/bpf.c +++ b/tools/lib/bpf/bpf.c @@ -754,10 +754,10 @@ int bpf_prog_attach(int prog_fd, int target_fd, enum bpf_attach_type type, .flags = flags, ); - return bpf_prog_attach_xattr(prog_fd, target_fd, type, &opts); + return bpf_prog_attach_opts(prog_fd, target_fd, type, &opts); } -int bpf_prog_attach_xattr(int prog_fd, int target_fd, +int bpf_prog_attach_opts(int prog_fd, int target_fd, enum bpf_attach_type type, const struct bpf_prog_attach_opts *opts) { @@ -778,6 +778,11 @@ int bpf_prog_attach_xattr(int prog_fd, int target_fd, return libbpf_err_errno(ret); } +__attribute__((alias("bpf_prog_attach_opts"))) +int bpf_prog_attach_xattr(int prog_fd, int target_fd, + enum bpf_attach_type type, + const struct bpf_prog_attach_opts *opts); + int bpf_prog_detach(int target_fd, enum bpf_attach_type type) { union bpf_attr attr; diff --git a/tools/lib/bpf/bpf.h b/tools/lib/bpf/bpf.h index 14e0d97ad2cf..c2e8327010f9 100644 --- a/tools/lib/bpf/bpf.h +++ b/tools/lib/bpf/bpf.h @@ -391,6 +391,10 @@ struct bpf_prog_attach_opts { LIBBPF_API int bpf_prog_attach(int prog_fd, int attachable_fd, enum bpf_attach_type type, unsigned int flags); +LIBBPF_API int bpf_prog_attach_opts(int prog_fd, int attachable_fd, + enum bpf_attach_type type, + const struct bpf_prog_attach_opts *opts); +LIBBPF_DEPRECATED_SINCE(0, 8, "use bpf_prog_attach_opts() instead") LIBBPF_API int bpf_prog_attach_xattr(int prog_fd, int attachable_fd, enum bpf_attach_type type, const struct bpf_prog_attach_opts *opts); diff --git a/tools/lib/bpf/bpf_helpers.h b/tools/lib/bpf/bpf_helpers.h index 963b1060d944..44df982d2a5c 100644 --- a/tools/lib/bpf/bpf_helpers.h +++ b/tools/lib/bpf/bpf_helpers.h @@ -133,7 +133,7 @@ struct bpf_map_def { unsigned int value_size; unsigned int max_entries; unsigned int map_flags; -}; +} __attribute__((deprecated("use BTF-defined maps in .maps section"))); enum libbpf_pin_type { LIBBPF_PIN_NONE, diff --git a/tools/lib/bpf/btf.c b/tools/lib/bpf/btf.c index 9aa19c89f758..1383e26c5d1f 100644 --- a/tools/lib/bpf/btf.c +++ b/tools/lib/bpf/btf.c @@ -1620,20 +1620,37 @@ static int btf_commit_type(struct btf *btf, int data_sz) struct btf_pipe { const struct btf *src; struct btf *dst; + struct hashmap *str_off_map; /* map string offsets from src to dst */ }; static int btf_rewrite_str(__u32 *str_off, void *ctx) { struct btf_pipe *p = ctx; - int off; + void *mapped_off; + int off, err; if (!*str_off) /* nothing to do for empty strings */ return 0; + if (p->str_off_map && + hashmap__find(p->str_off_map, (void *)(long)*str_off, &mapped_off)) { + *str_off = (__u32)(long)mapped_off; + return 0; + } + off = btf__add_str(p->dst, btf__str_by_offset(p->src, *str_off)); if (off < 0) return off; + /* Remember string mapping from src to dst. It avoids + * performing expensive string comparisons. + */ + if (p->str_off_map) { + err = hashmap__append(p->str_off_map, (void *)(long)*str_off, (void *)(long)off); + if (err) + return err; + } + *str_off = off; return 0; } @@ -1680,6 +1697,9 @@ static int btf_rewrite_type_ids(__u32 *type_id, void *ctx) return 0; } +static size_t btf_dedup_identity_hash_fn(const void *key, void *ctx); +static bool btf_dedup_equal_fn(const void *k1, const void *k2, void *ctx); + int btf__add_btf(struct btf *btf, const struct btf *src_btf) { struct btf_pipe p = { .src = src_btf, .dst = btf }; @@ -1713,6 +1733,11 @@ int btf__add_btf(struct btf *btf, const struct btf *src_btf) if (!off) return libbpf_err(-ENOMEM); + /* Map the string offsets from src_btf to the offsets from btf to improve performance */ + p.str_off_map = hashmap__new(btf_dedup_identity_hash_fn, btf_dedup_equal_fn, NULL); + if (IS_ERR(p.str_off_map)) + return libbpf_err(-ENOMEM); + /* bulk copy types data for all types from src_btf */ memcpy(t, src_btf->types_data, data_sz); @@ -1754,6 +1779,8 @@ int btf__add_btf(struct btf *btf, const struct btf *src_btf) btf->hdr->str_off += data_sz; btf->nr_types += cnt; + hashmap__free(p.str_off_map); + /* return type ID of the first added BTF type */ return btf->start_id + btf->nr_types - cnt; err_out: @@ -1767,6 +1794,8 @@ err_out: * wasn't modified, so doesn't need restoring, see big comment above */ btf->hdr->str_len = old_strs_len; + hashmap__free(p.str_off_map); + return libbpf_err(err); } diff --git a/tools/lib/bpf/btf.h b/tools/lib/bpf/btf.h index 061839f04525..51862fdee850 100644 --- a/tools/lib/bpf/btf.h +++ b/tools/lib/bpf/btf.h @@ -375,8 +375,28 @@ btf_dump__dump_type_data(struct btf_dump *d, __u32 id, const struct btf_dump_type_data_opts *opts); /* - * A set of helpers for easier BTF types handling + * A set of helpers for easier BTF types handling. + * + * The inline functions below rely on constants from the kernel headers which + * may not be available for applications including this header file. To avoid + * compilation errors, we define all the constants here that were added after + * the initial introduction of the BTF_KIND* constants. */ +#ifndef BTF_KIND_FUNC +#define BTF_KIND_FUNC 12 /* Function */ +#define BTF_KIND_FUNC_PROTO 13 /* Function Proto */ +#endif +#ifndef BTF_KIND_VAR +#define BTF_KIND_VAR 14 /* Variable */ +#define BTF_KIND_DATASEC 15 /* Section */ +#endif +#ifndef BTF_KIND_FLOAT +#define BTF_KIND_FLOAT 16 /* Floating point */ +#endif +/* The kernel header switched to enums, so these two were never #defined */ +#define BTF_KIND_DECL_TAG 17 /* Decl Tag */ +#define BTF_KIND_TYPE_TAG 18 /* Type Tag */ + static inline __u16 btf_kind(const struct btf_type *t) { return BTF_INFO_KIND(t->info); diff --git a/tools/lib/bpf/hashmap.c b/tools/lib/bpf/hashmap.c index 3c20b126d60d..aeb09c288716 100644 --- a/tools/lib/bpf/hashmap.c +++ b/tools/lib/bpf/hashmap.c @@ -75,7 +75,7 @@ void hashmap__clear(struct hashmap *map) void hashmap__free(struct hashmap *map) { - if (!map) + if (IS_ERR_OR_NULL(map)) return; hashmap__clear(map); @@ -238,4 +238,3 @@ bool hashmap__delete(struct hashmap *map, const void *key, return true; } - diff --git a/tools/lib/bpf/libbpf.c b/tools/lib/bpf/libbpf.c index 7f10dd501a52..a8c750373ad5 100644 --- a/tools/lib/bpf/libbpf.c +++ b/tools/lib/bpf/libbpf.c @@ -235,6 +235,8 @@ enum sec_def_flags { SEC_SLEEPABLE = 8, /* allow non-strict prefix matching */ SEC_SLOPPY_PFX = 16, + /* BPF program support non-linear XDP buffer */ + SEC_XDP_FRAGS = 32, }; struct bpf_sec_def { @@ -1937,6 +1939,11 @@ static int bpf_object__init_user_maps(struct bpf_object *obj, bool strict) if (obj->efile.maps_shndx < 0) return 0; + if (libbpf_mode & LIBBPF_STRICT_MAP_DEFINITIONS) { + pr_warn("legacy map definitions in SEC(\"maps\") are not supported\n"); + return -EOPNOTSUPP; + } + if (!symbols) return -EINVAL; @@ -1999,6 +2006,8 @@ static int bpf_object__init_user_maps(struct bpf_object *obj, bool strict) return -LIBBPF_ERRNO__FORMAT; } + pr_warn("map '%s' (legacy): legacy map definitions are deprecated, use BTF-defined maps instead\n", map_name); + if (ELF64_ST_BIND(sym->st_info) == STB_LOCAL) { pr_warn("map '%s' (legacy): static maps are not supported\n", map_name); return -ENOTSUP; @@ -4190,6 +4199,7 @@ static int bpf_map_find_btf_info(struct bpf_object *obj, struct bpf_map *map) return 0; if (!bpf_map__is_internal(map)) { + pr_warn("Use of BPF_ANNOTATE_KV_PAIR is deprecated, use BTF-defined maps in .maps section instead\n"); ret = btf__get_map_kv_tids(obj->btf, map->name, def->key_size, def->value_size, &key_type_id, &value_type_id); @@ -6562,6 +6572,9 @@ static int libbpf_preload_prog(struct bpf_program *prog, if (def & SEC_SLEEPABLE) opts->prog_flags |= BPF_F_SLEEPABLE; + if (prog->type == BPF_PROG_TYPE_XDP && (def & SEC_XDP_FRAGS)) + opts->prog_flags |= BPF_F_XDP_HAS_FRAGS; + if ((prog->type == BPF_PROG_TYPE_TRACING || prog->type == BPF_PROG_TYPE_LSM || prog->type == BPF_PROG_TYPE_EXT) && !prog->attach_btf_id) { @@ -8600,8 +8613,11 @@ static const struct bpf_sec_def section_defs[] = { SEC_DEF("lsm.s/", LSM, BPF_LSM_MAC, SEC_ATTACH_BTF | SEC_SLEEPABLE, attach_lsm), SEC_DEF("iter/", TRACING, BPF_TRACE_ITER, SEC_ATTACH_BTF, attach_iter), SEC_DEF("syscall", SYSCALL, 0, SEC_SLEEPABLE), + SEC_DEF("xdp.frags/devmap", XDP, BPF_XDP_DEVMAP, SEC_XDP_FRAGS), SEC_DEF("xdp_devmap/", XDP, BPF_XDP_DEVMAP, SEC_ATTACHABLE), + SEC_DEF("xdp.frags/cpumap", XDP, BPF_XDP_CPUMAP, SEC_XDP_FRAGS), SEC_DEF("xdp_cpumap/", XDP, BPF_XDP_CPUMAP, SEC_ATTACHABLE), + SEC_DEF("xdp.frags", XDP, BPF_XDP, SEC_XDP_FRAGS), SEC_DEF("xdp", XDP, BPF_XDP, SEC_ATTACHABLE_OPT | SEC_SLOPPY_PFX), SEC_DEF("perf_event", PERF_EVENT, 0, SEC_NONE | SEC_SLOPPY_PFX), SEC_DEF("lwt_in", LWT_IN, 0, SEC_NONE | SEC_SLOPPY_PFX), @@ -11795,6 +11811,9 @@ void bpf_object__detach_skeleton(struct bpf_object_skeleton *s) void bpf_object__destroy_skeleton(struct bpf_object_skeleton *s) { + if (!s) + return; + if (s->progs) bpf_object__detach_skeleton(s); if (s->obj) diff --git a/tools/lib/bpf/libbpf.h b/tools/lib/bpf/libbpf.h index 8b9bc5e90c2b..94670066de62 100644 --- a/tools/lib/bpf/libbpf.h +++ b/tools/lib/bpf/libbpf.h @@ -706,7 +706,8 @@ bpf_object__prev_map(const struct bpf_object *obj, const struct bpf_map *map); LIBBPF_API int bpf_map__fd(const struct bpf_map *map); LIBBPF_API int bpf_map__reuse_fd(struct bpf_map *map, int fd); /* get map definition */ -LIBBPF_API const struct bpf_map_def *bpf_map__def(const struct bpf_map *map); +LIBBPF_API LIBBPF_DEPRECATED_SINCE(0, 8, "use appropriate getters or setters instead") +const struct bpf_map_def *bpf_map__def(const struct bpf_map *map); /* get map name */ LIBBPF_API const char *bpf_map__name(const struct bpf_map *map); /* get/set map type */ @@ -832,13 +833,42 @@ struct bpf_xdp_set_link_opts { }; #define bpf_xdp_set_link_opts__last_field old_fd +LIBBPF_DEPRECATED_SINCE(0, 8, "use bpf_xdp_attach() instead") LIBBPF_API int bpf_set_link_xdp_fd(int ifindex, int fd, __u32 flags); +LIBBPF_DEPRECATED_SINCE(0, 8, "use bpf_xdp_attach() instead") LIBBPF_API int bpf_set_link_xdp_fd_opts(int ifindex, int fd, __u32 flags, const struct bpf_xdp_set_link_opts *opts); +LIBBPF_DEPRECATED_SINCE(0, 8, "use bpf_xdp_query_id() instead") LIBBPF_API int bpf_get_link_xdp_id(int ifindex, __u32 *prog_id, __u32 flags); +LIBBPF_DEPRECATED_SINCE(0, 8, "use bpf_xdp_query() instead") LIBBPF_API int bpf_get_link_xdp_info(int ifindex, struct xdp_link_info *info, size_t info_size, __u32 flags); +struct bpf_xdp_attach_opts { + size_t sz; + int old_prog_fd; + size_t :0; +}; +#define bpf_xdp_attach_opts__last_field old_prog_fd + +struct bpf_xdp_query_opts { + size_t sz; + __u32 prog_id; /* output */ + __u32 drv_prog_id; /* output */ + __u32 hw_prog_id; /* output */ + __u32 skb_prog_id; /* output */ + __u8 attach_mode; /* output */ + size_t :0; +}; +#define bpf_xdp_query_opts__last_field attach_mode + +LIBBPF_API int bpf_xdp_attach(int ifindex, int prog_fd, __u32 flags, + const struct bpf_xdp_attach_opts *opts); +LIBBPF_API int bpf_xdp_detach(int ifindex, __u32 flags, + const struct bpf_xdp_attach_opts *opts); +LIBBPF_API int bpf_xdp_query(int ifindex, int flags, struct bpf_xdp_query_opts *opts); +LIBBPF_API int bpf_xdp_query_id(int ifindex, int flags, __u32 *prog_id); + /* TC related API */ enum bpf_tc_attach_point { BPF_TC_INGRESS = 1 << 0, diff --git a/tools/lib/bpf/libbpf.map b/tools/lib/bpf/libbpf.map index 529783967793..e10f0822845a 100644 --- a/tools/lib/bpf/libbpf.map +++ b/tools/lib/bpf/libbpf.map @@ -247,6 +247,7 @@ LIBBPF_0.0.8 { bpf_link_create; bpf_link_update; bpf_map__set_initial_value; + bpf_prog_attach_opts; bpf_program__attach_cgroup; bpf_program__attach_lsm; bpf_program__is_lsm; @@ -427,6 +428,10 @@ LIBBPF_0.7.0 { bpf_program__log_level; bpf_program__set_log_buf; bpf_program__set_log_level; + bpf_xdp_attach; + bpf_xdp_detach; + bpf_xdp_query; + bpf_xdp_query_id; libbpf_probe_bpf_helper; libbpf_probe_bpf_map_type; libbpf_probe_bpf_prog_type; diff --git a/tools/lib/bpf/libbpf_legacy.h b/tools/lib/bpf/libbpf_legacy.h index 79131f761a27..3c2b281c2bc3 100644 --- a/tools/lib/bpf/libbpf_legacy.h +++ b/tools/lib/bpf/libbpf_legacy.h @@ -73,6 +73,11 @@ enum libbpf_strict_mode { * operation. */ LIBBPF_STRICT_AUTO_RLIMIT_MEMLOCK = 0x10, + /* + * Error out on any SEC("maps") map definition, which are deprecated + * in favor of BTF-defined map definitions in SEC(".maps"). + */ + LIBBPF_STRICT_MAP_DEFINITIONS = 0x20, __LIBBPF_STRICT_LAST, }; diff --git a/tools/lib/bpf/netlink.c b/tools/lib/bpf/netlink.c index 39f25e09b51e..c39c37f99d5c 100644 --- a/tools/lib/bpf/netlink.c +++ b/tools/lib/bpf/netlink.c @@ -217,6 +217,28 @@ static int __bpf_set_link_xdp_fd_replace(int ifindex, int fd, int old_fd, return libbpf_netlink_send_recv(&req, NULL, NULL, NULL); } +int bpf_xdp_attach(int ifindex, int prog_fd, __u32 flags, const struct bpf_xdp_attach_opts *opts) +{ + int old_prog_fd, err; + + if (!OPTS_VALID(opts, bpf_xdp_attach_opts)) + return libbpf_err(-EINVAL); + + old_prog_fd = OPTS_GET(opts, old_prog_fd, 0); + if (old_prog_fd) + flags |= XDP_FLAGS_REPLACE; + else + old_prog_fd = -1; + + err = __bpf_set_link_xdp_fd_replace(ifindex, prog_fd, old_prog_fd, flags); + return libbpf_err(err); +} + +int bpf_xdp_detach(int ifindex, __u32 flags, const struct bpf_xdp_attach_opts *opts) +{ + return bpf_xdp_attach(ifindex, -1, flags, opts); +} + int bpf_set_link_xdp_fd_opts(int ifindex, int fd, __u32 flags, const struct bpf_xdp_set_link_opts *opts) { @@ -303,69 +325,98 @@ static int get_xdp_info(void *cookie, void *msg, struct nlattr **tb) return 0; } -int bpf_get_link_xdp_info(int ifindex, struct xdp_link_info *info, - size_t info_size, __u32 flags) +int bpf_xdp_query(int ifindex, int xdp_flags, struct bpf_xdp_query_opts *opts) { - struct xdp_id_md xdp_id = {}; - __u32 mask; - int ret; struct libbpf_nla_req req = { .nh.nlmsg_len = NLMSG_LENGTH(sizeof(struct ifinfomsg)), .nh.nlmsg_type = RTM_GETLINK, .nh.nlmsg_flags = NLM_F_DUMP | NLM_F_REQUEST, .ifinfo.ifi_family = AF_PACKET, }; + struct xdp_id_md xdp_id = {}; + int err; - if (flags & ~XDP_FLAGS_MASK || !info_size) + if (!OPTS_VALID(opts, bpf_xdp_query_opts)) + return libbpf_err(-EINVAL); + + if (xdp_flags & ~XDP_FLAGS_MASK) return libbpf_err(-EINVAL); /* Check whether the single {HW,DRV,SKB} mode is set */ - flags &= (XDP_FLAGS_SKB_MODE | XDP_FLAGS_DRV_MODE | XDP_FLAGS_HW_MODE); - mask = flags - 1; - if (flags && flags & mask) + xdp_flags &= XDP_FLAGS_SKB_MODE | XDP_FLAGS_DRV_MODE | XDP_FLAGS_HW_MODE; + if (xdp_flags & (xdp_flags - 1)) return libbpf_err(-EINVAL); xdp_id.ifindex = ifindex; - xdp_id.flags = flags; + xdp_id.flags = xdp_flags; - ret = libbpf_netlink_send_recv(&req, __dump_link_nlmsg, + err = libbpf_netlink_send_recv(&req, __dump_link_nlmsg, get_xdp_info, &xdp_id); - if (!ret) { - size_t sz = min(info_size, sizeof(xdp_id.info)); + if (err) + return libbpf_err(err); - memcpy(info, &xdp_id.info, sz); - memset((void *) info + sz, 0, info_size - sz); - } + OPTS_SET(opts, prog_id, xdp_id.info.prog_id); + OPTS_SET(opts, drv_prog_id, xdp_id.info.drv_prog_id); + OPTS_SET(opts, hw_prog_id, xdp_id.info.hw_prog_id); + OPTS_SET(opts, skb_prog_id, xdp_id.info.skb_prog_id); + OPTS_SET(opts, attach_mode, xdp_id.info.attach_mode); - return libbpf_err(ret); + return 0; } -static __u32 get_xdp_id(struct xdp_link_info *info, __u32 flags) +int bpf_get_link_xdp_info(int ifindex, struct xdp_link_info *info, + size_t info_size, __u32 flags) { - flags &= XDP_FLAGS_MODES; + LIBBPF_OPTS(bpf_xdp_query_opts, opts); + size_t sz; + int err; + + if (!info_size) + return libbpf_err(-EINVAL); - if (info->attach_mode != XDP_ATTACHED_MULTI && !flags) - return info->prog_id; - if (flags & XDP_FLAGS_DRV_MODE) - return info->drv_prog_id; - if (flags & XDP_FLAGS_HW_MODE) - return info->hw_prog_id; - if (flags & XDP_FLAGS_SKB_MODE) - return info->skb_prog_id; + err = bpf_xdp_query(ifindex, flags, &opts); + if (err) + return libbpf_err(err); + + /* struct xdp_link_info field layout matches struct bpf_xdp_query_opts + * layout after sz field + */ + sz = min(info_size, offsetofend(struct xdp_link_info, attach_mode)); + memcpy(info, &opts.prog_id, sz); + memset((void *)info + sz, 0, info_size - sz); return 0; } -int bpf_get_link_xdp_id(int ifindex, __u32 *prog_id, __u32 flags) +int bpf_xdp_query_id(int ifindex, int flags, __u32 *prog_id) { - struct xdp_link_info info; + LIBBPF_OPTS(bpf_xdp_query_opts, opts); int ret; - ret = bpf_get_link_xdp_info(ifindex, &info, sizeof(info), flags); - if (!ret) - *prog_id = get_xdp_id(&info, flags); + ret = bpf_xdp_query(ifindex, flags, &opts); + if (ret) + return libbpf_err(ret); + + flags &= XDP_FLAGS_MODES; - return libbpf_err(ret); + if (opts.attach_mode != XDP_ATTACHED_MULTI && !flags) + *prog_id = opts.prog_id; + else if (flags & XDP_FLAGS_DRV_MODE) + *prog_id = opts.drv_prog_id; + else if (flags & XDP_FLAGS_HW_MODE) + *prog_id = opts.hw_prog_id; + else if (flags & XDP_FLAGS_SKB_MODE) + *prog_id = opts.skb_prog_id; + else + *prog_id = 0; + + return 0; +} + + +int bpf_get_link_xdp_id(int ifindex, __u32 *prog_id, __u32 flags) +{ + return bpf_xdp_query_id(ifindex, flags, prog_id); } typedef int (*qdisc_config_t)(struct libbpf_nla_req *req); diff --git a/tools/perf/util/bpf-loader.c b/tools/perf/util/bpf-loader.c index 7ecfaac7536a..ef2832b4d5cc 100644 --- a/tools/perf/util/bpf-loader.c +++ b/tools/perf/util/bpf-loader.c @@ -1005,24 +1005,22 @@ __bpf_map__config_value(struct bpf_map *map, { struct bpf_map_op *op; const char *map_name = bpf_map__name(map); - const struct bpf_map_def *def = bpf_map__def(map); - if (IS_ERR(def)) { - pr_debug("Unable to get map definition from '%s'\n", - map_name); + if (!map) { + pr_debug("Map '%s' is invalid\n", map_name); return -BPF_LOADER_ERRNO__INTERNAL; } - if (def->type != BPF_MAP_TYPE_ARRAY) { + if (bpf_map__type(map) != BPF_MAP_TYPE_ARRAY) { pr_debug("Map %s type is not BPF_MAP_TYPE_ARRAY\n", map_name); return -BPF_LOADER_ERRNO__OBJCONF_MAP_TYPE; } - if (def->key_size < sizeof(unsigned int)) { + if (bpf_map__key_size(map) < sizeof(unsigned int)) { pr_debug("Map %s has incorrect key size\n", map_name); return -BPF_LOADER_ERRNO__OBJCONF_MAP_KEYSIZE; } - switch (def->value_size) { + switch (bpf_map__value_size(map)) { case 1: case 2: case 4: @@ -1064,7 +1062,6 @@ __bpf_map__config_event(struct bpf_map *map, struct parse_events_term *term, struct evlist *evlist) { - const struct bpf_map_def *def; struct bpf_map_op *op; const char *map_name = bpf_map__name(map); struct evsel *evsel = evlist__find_evsel_by_str(evlist, term->val.str); @@ -1075,18 +1072,16 @@ __bpf_map__config_event(struct bpf_map *map, return -BPF_LOADER_ERRNO__OBJCONF_MAP_NOEVT; } - def = bpf_map__def(map); - if (IS_ERR(def)) { - pr_debug("Unable to get map definition from '%s'\n", - map_name); - return PTR_ERR(def); + if (!map) { + pr_debug("Map '%s' is invalid\n", map_name); + return PTR_ERR(map); } /* * No need to check key_size and value_size: * kernel has already checked them. */ - if (def->type != BPF_MAP_TYPE_PERF_EVENT_ARRAY) { + if (bpf_map__type(map) != BPF_MAP_TYPE_PERF_EVENT_ARRAY) { pr_debug("Map %s type is not BPF_MAP_TYPE_PERF_EVENT_ARRAY\n", map_name); return -BPF_LOADER_ERRNO__OBJCONF_MAP_TYPE; @@ -1135,7 +1130,6 @@ config_map_indices_range_check(struct parse_events_term *term, const char *map_name) { struct parse_events_array *array = &term->array; - const struct bpf_map_def *def; unsigned int i; if (!array->nr_ranges) @@ -1146,10 +1140,8 @@ config_map_indices_range_check(struct parse_events_term *term, return -BPF_LOADER_ERRNO__INTERNAL; } - def = bpf_map__def(map); - if (IS_ERR(def)) { - pr_debug("ERROR: Unable to get map definition from '%s'\n", - map_name); + if (!map) { + pr_debug("Map '%s' is invalid\n", map_name); return -BPF_LOADER_ERRNO__INTERNAL; } @@ -1158,7 +1150,7 @@ config_map_indices_range_check(struct parse_events_term *term, size_t length = array->ranges[i].length; unsigned int idx = start + length - 1; - if (idx >= def->max_entries) { + if (idx >= bpf_map__max_entries(map)) { pr_debug("ERROR: index %d too large\n", idx); return -BPF_LOADER_ERRNO__OBJCONF_MAP_IDX2BIG; } @@ -1251,21 +1243,21 @@ out: } typedef int (*map_config_func_t)(const char *name, int map_fd, - const struct bpf_map_def *pdef, + const struct bpf_map *map, struct bpf_map_op *op, void *pkey, void *arg); static int foreach_key_array_all(map_config_func_t func, void *arg, const char *name, - int map_fd, const struct bpf_map_def *pdef, + int map_fd, const struct bpf_map *map, struct bpf_map_op *op) { unsigned int i; int err; - for (i = 0; i < pdef->max_entries; i++) { - err = func(name, map_fd, pdef, op, &i, arg); + for (i = 0; i < bpf_map__max_entries(map); i++) { + err = func(name, map_fd, map, op, &i, arg); if (err) { pr_debug("ERROR: failed to insert value to %s[%u]\n", name, i); @@ -1278,7 +1270,7 @@ foreach_key_array_all(map_config_func_t func, static int foreach_key_array_ranges(map_config_func_t func, void *arg, const char *name, int map_fd, - const struct bpf_map_def *pdef, + const struct bpf_map *map, struct bpf_map_op *op) { unsigned int i, j; @@ -1291,7 +1283,7 @@ foreach_key_array_ranges(map_config_func_t func, void *arg, for (j = 0; j < length; j++) { unsigned int idx = start + j; - err = func(name, map_fd, pdef, op, &idx, arg); + err = func(name, map_fd, map, op, &idx, arg); if (err) { pr_debug("ERROR: failed to insert value to %s[%u]\n", name, idx); @@ -1307,9 +1299,8 @@ bpf_map_config_foreach_key(struct bpf_map *map, map_config_func_t func, void *arg) { - int err, map_fd; + int err, map_fd, type; struct bpf_map_op *op; - const struct bpf_map_def *def; const char *name = bpf_map__name(map); struct bpf_map_priv *priv = bpf_map__priv(map); @@ -1322,9 +1313,8 @@ bpf_map_config_foreach_key(struct bpf_map *map, return 0; } - def = bpf_map__def(map); - if (IS_ERR(def)) { - pr_debug("ERROR: failed to get definition from map %s\n", name); + if (!map) { + pr_debug("Map '%s' is invalid\n", name); return -BPF_LOADER_ERRNO__INTERNAL; } map_fd = bpf_map__fd(map); @@ -1333,19 +1323,19 @@ bpf_map_config_foreach_key(struct bpf_map *map, return map_fd; } + type = bpf_map__type(map); list_for_each_entry(op, &priv->ops_list, list) { - switch (def->type) { + switch (type) { case BPF_MAP_TYPE_ARRAY: case BPF_MAP_TYPE_PERF_EVENT_ARRAY: switch (op->key_type) { case BPF_MAP_KEY_ALL: err = foreach_key_array_all(func, arg, name, - map_fd, def, op); + map_fd, map, op); break; case BPF_MAP_KEY_RANGES: err = foreach_key_array_ranges(func, arg, name, - map_fd, def, - op); + map_fd, map, op); break; default: pr_debug("ERROR: keytype for map '%s' invalid\n", @@ -1454,7 +1444,7 @@ apply_config_evsel_for_key(const char *name, int map_fd, void *pkey, static int apply_obj_config_map_for_key(const char *name, int map_fd, - const struct bpf_map_def *pdef, + const struct bpf_map *map, struct bpf_map_op *op, void *pkey, void *arg __maybe_unused) { @@ -1463,7 +1453,7 @@ apply_obj_config_map_for_key(const char *name, int map_fd, switch (op->op_type) { case BPF_MAP_OP_SET_VALUE: err = apply_config_value_for_key(map_fd, pkey, - pdef->value_size, + bpf_map__value_size(map), op->v.value); break; case BPF_MAP_OP_SET_EVSEL: diff --git a/tools/perf/util/bpf_map.c b/tools/perf/util/bpf_map.c index eb853ca67cf4..c863ae0c5cb5 100644 --- a/tools/perf/util/bpf_map.c +++ b/tools/perf/util/bpf_map.c @@ -9,25 +9,25 @@ #include <stdlib.h> #include <unistd.h> -static bool bpf_map_def__is_per_cpu(const struct bpf_map_def *def) +static bool bpf_map__is_per_cpu(enum bpf_map_type type) { - return def->type == BPF_MAP_TYPE_PERCPU_HASH || - def->type == BPF_MAP_TYPE_PERCPU_ARRAY || - def->type == BPF_MAP_TYPE_LRU_PERCPU_HASH || - def->type == BPF_MAP_TYPE_PERCPU_CGROUP_STORAGE; + return type == BPF_MAP_TYPE_PERCPU_HASH || + type == BPF_MAP_TYPE_PERCPU_ARRAY || + type == BPF_MAP_TYPE_LRU_PERCPU_HASH || + type == BPF_MAP_TYPE_PERCPU_CGROUP_STORAGE; } -static void *bpf_map_def__alloc_value(const struct bpf_map_def *def) +static void *bpf_map__alloc_value(const struct bpf_map *map) { - if (bpf_map_def__is_per_cpu(def)) - return malloc(round_up(def->value_size, 8) * sysconf(_SC_NPROCESSORS_CONF)); + if (bpf_map__is_per_cpu(bpf_map__type(map))) + return malloc(round_up(bpf_map__value_size(map), 8) * + sysconf(_SC_NPROCESSORS_CONF)); - return malloc(def->value_size); + return malloc(bpf_map__value_size(map)); } int bpf_map__fprintf(struct bpf_map *map, FILE *fp) { - const struct bpf_map_def *def = bpf_map__def(map); void *prev_key = NULL, *key, *value; int fd = bpf_map__fd(map), err; int printed = 0; @@ -35,15 +35,15 @@ int bpf_map__fprintf(struct bpf_map *map, FILE *fp) if (fd < 0) return fd; - if (IS_ERR(def)) - return PTR_ERR(def); + if (!map) + return PTR_ERR(map); err = -ENOMEM; - key = malloc(def->key_size); + key = malloc(bpf_map__key_size(map)); if (key == NULL) goto out; - value = bpf_map_def__alloc_value(def); + value = bpf_map__alloc_value(map); if (value == NULL) goto out_free_key; diff --git a/tools/testing/selftests/bpf/Makefile b/tools/testing/selftests/bpf/Makefile index 42ffc24e9e71..945f92d71db3 100644 --- a/tools/testing/selftests/bpf/Makefile +++ b/tools/testing/selftests/bpf/Makefile @@ -21,7 +21,7 @@ endif BPF_GCC ?= $(shell command -v bpf-gcc;) SAN_CFLAGS ?= -CFLAGS += -g -O0 -rdynamic -Wall $(GENFLAGS) $(SAN_CFLAGS) \ +CFLAGS += -g -O0 -rdynamic -Wall -Werror $(GENFLAGS) $(SAN_CFLAGS) \ -I$(CURDIR) -I$(INCLUDE_DIR) -I$(GENDIR) -I$(LIBDIR) \ -I$(TOOLSINCDIR) -I$(APIDIR) -I$(OUTPUT) LDFLAGS += $(SAN_CFLAGS) @@ -292,7 +292,7 @@ IS_LITTLE_ENDIAN = $(shell $(CC) -dM -E - </dev/null | \ MENDIAN=$(if $(IS_LITTLE_ENDIAN),-mlittle-endian,-mbig-endian) CLANG_SYS_INCLUDES = $(call get_sys_includes,$(CLANG)) -BPF_CFLAGS = -g -D__TARGET_ARCH_$(SRCARCH) $(MENDIAN) \ +BPF_CFLAGS = -g -Werror -D__TARGET_ARCH_$(SRCARCH) $(MENDIAN) \ -I$(INCLUDE_DIR) -I$(CURDIR) -I$(APIDIR) \ -I$(abspath $(OUTPUT)/../usr/include) diff --git a/tools/testing/selftests/bpf/bpf_testmod/bpf_testmod.c b/tools/testing/selftests/bpf/bpf_testmod/bpf_testmod.c index df3b292a8ffe..bdbacf5adcd2 100644 --- a/tools/testing/selftests/bpf/bpf_testmod/bpf_testmod.c +++ b/tools/testing/selftests/bpf/bpf_testmod/bpf_testmod.c @@ -109,26 +109,31 @@ static struct bin_attribute bin_attr_bpf_testmod_file __ro_after_init = { .write = bpf_testmod_test_write, }; -BTF_SET_START(bpf_testmod_kfunc_ids) +BTF_SET_START(bpf_testmod_check_kfunc_ids) BTF_ID(func, bpf_testmod_test_mod_kfunc) -BTF_SET_END(bpf_testmod_kfunc_ids) +BTF_SET_END(bpf_testmod_check_kfunc_ids) -static DEFINE_KFUNC_BTF_ID_SET(&bpf_testmod_kfunc_ids, bpf_testmod_kfunc_btf_set); +static const struct btf_kfunc_id_set bpf_testmod_kfunc_set = { + .owner = THIS_MODULE, + .check_set = &bpf_testmod_check_kfunc_ids, +}; + +extern int bpf_fentry_test1(int a); static int bpf_testmod_init(void) { int ret; - ret = sysfs_create_bin_file(kernel_kobj, &bin_attr_bpf_testmod_file); - if (ret) + ret = register_btf_kfunc_id_set(BPF_PROG_TYPE_SCHED_CLS, &bpf_testmod_kfunc_set); + if (ret < 0) return ret; - register_kfunc_btf_id_set(&prog_test_kfunc_list, &bpf_testmod_kfunc_btf_set); - return 0; + if (bpf_fentry_test1(0) < 0) + return -EINVAL; + return sysfs_create_bin_file(kernel_kobj, &bin_attr_bpf_testmod_file); } static void bpf_testmod_exit(void) { - unregister_kfunc_btf_id_set(&prog_test_kfunc_list, &bpf_testmod_kfunc_btf_set); return sysfs_remove_bin_file(kernel_kobj, &bin_attr_bpf_testmod_file); } diff --git a/tools/testing/selftests/bpf/config b/tools/testing/selftests/bpf/config index f6287132fa89..763db63a3890 100644 --- a/tools/testing/selftests/bpf/config +++ b/tools/testing/selftests/bpf/config @@ -48,3 +48,8 @@ CONFIG_IMA_READ_POLICY=y CONFIG_BLK_DEV_LOOP=y CONFIG_FUNCTION_TRACER=y CONFIG_DYNAMIC_FTRACE=y +CONFIG_NETFILTER=y +CONFIG_NF_DEFRAG_IPV4=y +CONFIG_NF_DEFRAG_IPV6=y +CONFIG_NF_CONNTRACK=y +CONFIG_USERFAULTFD=y diff --git a/tools/testing/selftests/bpf/prog_tests/bind_perm.c b/tools/testing/selftests/bpf/prog_tests/bind_perm.c index d0f06e40c16d..eac71fbb24ce 100644 --- a/tools/testing/selftests/bpf/prog_tests/bind_perm.c +++ b/tools/testing/selftests/bpf/prog_tests/bind_perm.c @@ -1,13 +1,24 @@ // SPDX-License-Identifier: GPL-2.0 -#include <test_progs.h> -#include "bind_perm.skel.h" - +#define _GNU_SOURCE +#include <sched.h> +#include <stdlib.h> #include <sys/types.h> #include <sys/socket.h> #include <sys/capability.h> +#include "test_progs.h" +#include "bind_perm.skel.h" + static int duration; +static int create_netns(void) +{ + if (!ASSERT_OK(unshare(CLONE_NEWNET), "create netns")) + return -1; + + return 0; +} + void try_bind(int family, int port, int expected_errno) { struct sockaddr_storage addr = {}; @@ -75,6 +86,9 @@ void test_bind_perm(void) struct bind_perm *skel; int cgroup_fd; + if (create_netns()) + return; + cgroup_fd = test__join_cgroup("/bind_perm"); if (CHECK(cgroup_fd < 0, "cg-join", "errno %d", errno)) return; diff --git a/tools/testing/selftests/bpf/prog_tests/bpf_iter_setsockopt_unix.c b/tools/testing/selftests/bpf/prog_tests/bpf_iter_setsockopt_unix.c new file mode 100644 index 000000000000..ee725d4d98a5 --- /dev/null +++ b/tools/testing/selftests/bpf/prog_tests/bpf_iter_setsockopt_unix.c @@ -0,0 +1,100 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright Amazon.com Inc. or its affiliates. */ +#include <sys/socket.h> +#include <sys/un.h> +#include <test_progs.h> +#include "bpf_iter_setsockopt_unix.skel.h" + +#define NR_CASES 5 + +static int create_unix_socket(struct bpf_iter_setsockopt_unix *skel) +{ + struct sockaddr_un addr = { + .sun_family = AF_UNIX, + .sun_path = "", + }; + socklen_t len; + int fd, err; + + fd = socket(AF_UNIX, SOCK_STREAM, 0); + if (!ASSERT_NEQ(fd, -1, "socket")) + return -1; + + len = offsetof(struct sockaddr_un, sun_path); + err = bind(fd, (struct sockaddr *)&addr, len); + if (!ASSERT_OK(err, "bind")) + return -1; + + len = sizeof(addr); + err = getsockname(fd, (struct sockaddr *)&addr, &len); + if (!ASSERT_OK(err, "getsockname")) + return -1; + + memcpy(&skel->bss->sun_path, &addr.sun_path, + len - offsetof(struct sockaddr_un, sun_path)); + + return fd; +} + +static void test_sndbuf(struct bpf_iter_setsockopt_unix *skel, int fd) +{ + socklen_t optlen; + int i, err; + + for (i = 0; i < NR_CASES; i++) { + if (!ASSERT_NEQ(skel->data->sndbuf_getsockopt[i], -1, + "bpf_(get|set)sockopt")) + return; + + err = setsockopt(fd, SOL_SOCKET, SO_SNDBUF, + &(skel->data->sndbuf_setsockopt[i]), + sizeof(skel->data->sndbuf_setsockopt[i])); + if (!ASSERT_OK(err, "setsockopt")) + return; + + optlen = sizeof(skel->bss->sndbuf_getsockopt_expected[i]); + err = getsockopt(fd, SOL_SOCKET, SO_SNDBUF, + &(skel->bss->sndbuf_getsockopt_expected[i]), + &optlen); + if (!ASSERT_OK(err, "getsockopt")) + return; + + if (!ASSERT_EQ(skel->data->sndbuf_getsockopt[i], + skel->bss->sndbuf_getsockopt_expected[i], + "bpf_(get|set)sockopt")) + return; + } +} + +void test_bpf_iter_setsockopt_unix(void) +{ + struct bpf_iter_setsockopt_unix *skel; + int err, unix_fd, iter_fd; + char buf; + + skel = bpf_iter_setsockopt_unix__open_and_load(); + if (!ASSERT_OK_PTR(skel, "open_and_load")) + return; + + unix_fd = create_unix_socket(skel); + if (!ASSERT_NEQ(unix_fd, -1, "create_unix_server")) + goto destroy; + + skel->links.change_sndbuf = bpf_program__attach_iter(skel->progs.change_sndbuf, NULL); + if (!ASSERT_OK_PTR(skel->links.change_sndbuf, "bpf_program__attach_iter")) + goto destroy; + + iter_fd = bpf_iter_create(bpf_link__fd(skel->links.change_sndbuf)); + if (!ASSERT_GE(iter_fd, 0, "bpf_iter_create")) + goto destroy; + + while ((err = read(iter_fd, &buf, sizeof(buf))) == -1 && + errno == EAGAIN) + ; + if (!ASSERT_OK(err, "read iter error")) + goto destroy; + + test_sndbuf(skel, unix_fd); +destroy: + bpf_iter_setsockopt_unix__destroy(skel); +} diff --git a/tools/testing/selftests/bpf/prog_tests/bpf_mod_race.c b/tools/testing/selftests/bpf/prog_tests/bpf_mod_race.c new file mode 100644 index 000000000000..d43f548c572c --- /dev/null +++ b/tools/testing/selftests/bpf/prog_tests/bpf_mod_race.c @@ -0,0 +1,230 @@ +// SPDX-License-Identifier: GPL-2.0 +#include <unistd.h> +#include <pthread.h> +#include <sys/mman.h> +#include <stdatomic.h> +#include <test_progs.h> +#include <sys/syscall.h> +#include <linux/module.h> +#include <linux/userfaultfd.h> + +#include "ksym_race.skel.h" +#include "bpf_mod_race.skel.h" +#include "kfunc_call_race.skel.h" + +/* This test crafts a race between btf_try_get_module and do_init_module, and + * checks whether btf_try_get_module handles the invocation for a well-formed + * but uninitialized module correctly. Unless the module has completed its + * initcalls, the verifier should fail the program load and return ENXIO. + * + * userfaultfd is used to trigger a fault in an fmod_ret program, and make it + * sleep, then the BPF program is loaded and the return value from verifier is + * inspected. After this, the userfaultfd is closed so that the module loading + * thread makes forward progress, and fmod_ret injects an error so that the + * module load fails and it is freed. + * + * If the verifier succeeded in loading the supplied program, it will end up + * taking reference to freed module, and trigger a crash when the program fd + * is closed later. This is true for both kfuncs and ksyms. In both cases, + * the crash is triggered inside bpf_prog_free_deferred, when module reference + * is finally released. + */ + +struct test_config { + const char *str_open; + void *(*bpf_open_and_load)(); + void (*bpf_destroy)(void *); +}; + +enum test_state { + _TS_INVALID, + TS_MODULE_LOAD, + TS_MODULE_LOAD_FAIL, +}; + +static _Atomic enum test_state state = _TS_INVALID; + +static int sys_finit_module(int fd, const char *param_values, int flags) +{ + return syscall(__NR_finit_module, fd, param_values, flags); +} + +static int sys_delete_module(const char *name, unsigned int flags) +{ + return syscall(__NR_delete_module, name, flags); +} + +static int load_module(const char *mod) +{ + int ret, fd; + + fd = open("bpf_testmod.ko", O_RDONLY); + if (fd < 0) + return fd; + + ret = sys_finit_module(fd, "", 0); + close(fd); + if (ret < 0) + return ret; + return 0; +} + +static void *load_module_thread(void *p) +{ + + if (!ASSERT_NEQ(load_module("bpf_testmod.ko"), 0, "load_module_thread must fail")) + atomic_store(&state, TS_MODULE_LOAD); + else + atomic_store(&state, TS_MODULE_LOAD_FAIL); + return p; +} + +static int sys_userfaultfd(int flags) +{ + return syscall(__NR_userfaultfd, flags); +} + +static int test_setup_uffd(void *fault_addr) +{ + struct uffdio_register uffd_register = {}; + struct uffdio_api uffd_api = {}; + int uffd; + + uffd = sys_userfaultfd(O_CLOEXEC); + if (uffd < 0) + return -errno; + + uffd_api.api = UFFD_API; + uffd_api.features = 0; + if (ioctl(uffd, UFFDIO_API, &uffd_api)) { + close(uffd); + return -1; + } + + uffd_register.range.start = (unsigned long)fault_addr; + uffd_register.range.len = 4096; + uffd_register.mode = UFFDIO_REGISTER_MODE_MISSING; + if (ioctl(uffd, UFFDIO_REGISTER, &uffd_register)) { + close(uffd); + return -1; + } + return uffd; +} + +static void test_bpf_mod_race_config(const struct test_config *config) +{ + void *fault_addr, *skel_fail; + struct bpf_mod_race *skel; + struct uffd_msg uffd_msg; + pthread_t load_mod_thrd; + _Atomic int *blockingp; + int uffd, ret; + + fault_addr = mmap(0, 4096, PROT_READ, MAP_PRIVATE | MAP_ANONYMOUS, -1, 0); + if (!ASSERT_NEQ(fault_addr, MAP_FAILED, "mmap for uffd registration")) + return; + + if (!ASSERT_OK(sys_delete_module("bpf_testmod", 0), "unload bpf_testmod")) + goto end_mmap; + + skel = bpf_mod_race__open(); + if (!ASSERT_OK_PTR(skel, "bpf_mod_kfunc_race__open")) + goto end_module; + + skel->rodata->bpf_mod_race_config.tgid = getpid(); + skel->rodata->bpf_mod_race_config.inject_error = -4242; + skel->rodata->bpf_mod_race_config.fault_addr = fault_addr; + if (!ASSERT_OK(bpf_mod_race__load(skel), "bpf_mod___load")) + goto end_destroy; + blockingp = (_Atomic int *)&skel->bss->bpf_blocking; + + if (!ASSERT_OK(bpf_mod_race__attach(skel), "bpf_mod_kfunc_race__attach")) + goto end_destroy; + + uffd = test_setup_uffd(fault_addr); + if (!ASSERT_GE(uffd, 0, "userfaultfd open + register address")) + goto end_destroy; + + if (!ASSERT_OK(pthread_create(&load_mod_thrd, NULL, load_module_thread, NULL), + "load module thread")) + goto end_uffd; + + /* Now, we either fail loading module, or block in bpf prog, spin to find out */ + while (!atomic_load(&state) && !atomic_load(blockingp)) + ; + if (!ASSERT_EQ(state, _TS_INVALID, "module load should block")) + goto end_join; + if (!ASSERT_EQ(*blockingp, 1, "module load blocked")) { + pthread_kill(load_mod_thrd, SIGKILL); + goto end_uffd; + } + + /* We might have set bpf_blocking to 1, but may have not blocked in + * bpf_copy_from_user. Read userfaultfd descriptor to verify that. + */ + if (!ASSERT_EQ(read(uffd, &uffd_msg, sizeof(uffd_msg)), sizeof(uffd_msg), + "read uffd block event")) + goto end_join; + if (!ASSERT_EQ(uffd_msg.event, UFFD_EVENT_PAGEFAULT, "read uffd event is pagefault")) + goto end_join; + + /* We know that load_mod_thrd is blocked in the fmod_ret program, the + * module state is still MODULE_STATE_COMING because mod->init hasn't + * returned. This is the time we try to load a program calling kfunc and + * check if we get ENXIO from verifier. + */ + skel_fail = config->bpf_open_and_load(); + ret = errno; + if (!ASSERT_EQ(skel_fail, NULL, config->str_open)) { + /* Close uffd to unblock load_mod_thrd */ + close(uffd); + uffd = -1; + while (atomic_load(blockingp) != 2) + ; + ASSERT_OK(kern_sync_rcu(), "kern_sync_rcu"); + config->bpf_destroy(skel_fail); + goto end_join; + + } + ASSERT_EQ(ret, ENXIO, "verifier returns ENXIO"); + ASSERT_EQ(skel->data->res_try_get_module, false, "btf_try_get_module == false"); + + close(uffd); + uffd = -1; +end_join: + pthread_join(load_mod_thrd, NULL); + if (uffd < 0) + ASSERT_EQ(atomic_load(&state), TS_MODULE_LOAD_FAIL, "load_mod_thrd success"); +end_uffd: + if (uffd >= 0) + close(uffd); +end_destroy: + bpf_mod_race__destroy(skel); + ASSERT_OK(kern_sync_rcu(), "kern_sync_rcu"); +end_module: + sys_delete_module("bpf_testmod", 0); + ASSERT_OK(load_module("bpf_testmod.ko"), "restore bpf_testmod"); +end_mmap: + munmap(fault_addr, 4096); + atomic_store(&state, _TS_INVALID); +} + +static const struct test_config ksym_config = { + .str_open = "ksym_race__open_and_load", + .bpf_open_and_load = (void *)ksym_race__open_and_load, + .bpf_destroy = (void *)ksym_race__destroy, +}; + +static const struct test_config kfunc_config = { + .str_open = "kfunc_call_race__open_and_load", + .bpf_open_and_load = (void *)kfunc_call_race__open_and_load, + .bpf_destroy = (void *)kfunc_call_race__destroy, +}; + +void serial_test_bpf_mod_race(void) +{ + if (test__start_subtest("ksym (used_btfs UAF)")) + test_bpf_mod_race_config(&ksym_config); + if (test__start_subtest("kfunc (kfunc_btf_tab UAF)")) + test_bpf_mod_race_config(&kfunc_config); +} diff --git a/tools/testing/selftests/bpf/prog_tests/bpf_nf.c b/tools/testing/selftests/bpf/prog_tests/bpf_nf.c new file mode 100644 index 000000000000..e3166a81e989 --- /dev/null +++ b/tools/testing/selftests/bpf/prog_tests/bpf_nf.c @@ -0,0 +1,48 @@ +// SPDX-License-Identifier: GPL-2.0 +#include <test_progs.h> +#include <network_helpers.h> +#include "test_bpf_nf.skel.h" + +enum { + TEST_XDP, + TEST_TC_BPF, +}; + +void test_bpf_nf_ct(int mode) +{ + struct test_bpf_nf *skel; + int prog_fd, err, retval; + + skel = test_bpf_nf__open_and_load(); + if (!ASSERT_OK_PTR(skel, "test_bpf_nf__open_and_load")) + return; + + if (mode == TEST_XDP) + prog_fd = bpf_program__fd(skel->progs.nf_xdp_ct_test); + else + prog_fd = bpf_program__fd(skel->progs.nf_skb_ct_test); + + err = bpf_prog_test_run(prog_fd, 1, &pkt_v4, sizeof(pkt_v4), NULL, NULL, + (__u32 *)&retval, NULL); + if (!ASSERT_OK(err, "bpf_prog_test_run")) + goto end; + + ASSERT_EQ(skel->bss->test_einval_bpf_tuple, -EINVAL, "Test EINVAL for NULL bpf_tuple"); + ASSERT_EQ(skel->bss->test_einval_reserved, -EINVAL, "Test EINVAL for reserved not set to 0"); + ASSERT_EQ(skel->bss->test_einval_netns_id, -EINVAL, "Test EINVAL for netns_id < -1"); + ASSERT_EQ(skel->bss->test_einval_len_opts, -EINVAL, "Test EINVAL for len__opts != NF_BPF_CT_OPTS_SZ"); + ASSERT_EQ(skel->bss->test_eproto_l4proto, -EPROTO, "Test EPROTO for l4proto != TCP or UDP"); + ASSERT_EQ(skel->bss->test_enonet_netns_id, -ENONET, "Test ENONET for bad but valid netns_id"); + ASSERT_EQ(skel->bss->test_enoent_lookup, -ENOENT, "Test ENOENT for failed lookup"); + ASSERT_EQ(skel->bss->test_eafnosupport, -EAFNOSUPPORT, "Test EAFNOSUPPORT for invalid len__tuple"); +end: + test_bpf_nf__destroy(skel); +} + +void test_bpf_nf(void) +{ + if (test__start_subtest("xdp-ct")) + test_bpf_nf_ct(TEST_XDP); + if (test__start_subtest("tc-bpf-ct")) + test_bpf_nf_ct(TEST_TC_BPF); +} diff --git a/tools/testing/selftests/bpf/prog_tests/btf.c b/tools/testing/selftests/bpf/prog_tests/btf.c index 8ba53acf9eb4..14f9b6136794 100644 --- a/tools/testing/selftests/bpf/prog_tests/btf.c +++ b/tools/testing/selftests/bpf/prog_tests/btf.c @@ -4560,6 +4560,8 @@ static void do_test_file(unsigned int test_num) has_btf_ext = btf_ext != NULL; btf_ext__free(btf_ext); + /* temporary disable LIBBPF_STRICT_MAP_DEFINITIONS to test legacy maps */ + libbpf_set_strict_mode((__LIBBPF_STRICT_LAST - 1) & ~LIBBPF_STRICT_MAP_DEFINITIONS); obj = bpf_object__open(test->file); err = libbpf_get_error(obj); if (CHECK(err, "obj: %d", err)) @@ -4684,6 +4686,8 @@ skip: fprintf(stderr, "OK"); done: + libbpf_set_strict_mode(LIBBPF_STRICT_ALL); + btf__free(btf); free(func_info); bpf_object__close(obj); diff --git a/tools/testing/selftests/bpf/prog_tests/cgroup_attach_multi.c b/tools/testing/selftests/bpf/prog_tests/cgroup_attach_multi.c index d3e8f729c623..38b3c47293da 100644 --- a/tools/testing/selftests/bpf/prog_tests/cgroup_attach_multi.c +++ b/tools/testing/selftests/bpf/prog_tests/cgroup_attach_multi.c @@ -194,14 +194,14 @@ void serial_test_cgroup_attach_multi(void) attach_opts.flags = BPF_F_ALLOW_OVERRIDE | BPF_F_REPLACE; attach_opts.replace_prog_fd = allow_prog[0]; - if (CHECK(!bpf_prog_attach_xattr(allow_prog[6], cg1, + if (CHECK(!bpf_prog_attach_opts(allow_prog[6], cg1, BPF_CGROUP_INET_EGRESS, &attach_opts), "fail_prog_replace_override", "unexpected success\n")) goto err; CHECK_FAIL(errno != EINVAL); attach_opts.flags = BPF_F_REPLACE; - if (CHECK(!bpf_prog_attach_xattr(allow_prog[6], cg1, + if (CHECK(!bpf_prog_attach_opts(allow_prog[6], cg1, BPF_CGROUP_INET_EGRESS, &attach_opts), "fail_prog_replace_no_multi", "unexpected success\n")) goto err; @@ -209,7 +209,7 @@ void serial_test_cgroup_attach_multi(void) attach_opts.flags = BPF_F_ALLOW_MULTI | BPF_F_REPLACE; attach_opts.replace_prog_fd = -1; - if (CHECK(!bpf_prog_attach_xattr(allow_prog[6], cg1, + if (CHECK(!bpf_prog_attach_opts(allow_prog[6], cg1, BPF_CGROUP_INET_EGRESS, &attach_opts), "fail_prog_replace_bad_fd", "unexpected success\n")) goto err; @@ -217,7 +217,7 @@ void serial_test_cgroup_attach_multi(void) /* replacing a program that is not attached to cgroup should fail */ attach_opts.replace_prog_fd = allow_prog[3]; - if (CHECK(!bpf_prog_attach_xattr(allow_prog[6], cg1, + if (CHECK(!bpf_prog_attach_opts(allow_prog[6], cg1, BPF_CGROUP_INET_EGRESS, &attach_opts), "fail_prog_replace_no_ent", "unexpected success\n")) goto err; @@ -225,14 +225,14 @@ void serial_test_cgroup_attach_multi(void) /* replace 1st from the top program */ attach_opts.replace_prog_fd = allow_prog[0]; - if (CHECK(bpf_prog_attach_xattr(allow_prog[6], cg1, + if (CHECK(bpf_prog_attach_opts(allow_prog[6], cg1, BPF_CGROUP_INET_EGRESS, &attach_opts), "prog_replace", "errno=%d\n", errno)) goto err; /* replace program with itself */ attach_opts.replace_prog_fd = allow_prog[6]; - if (CHECK(bpf_prog_attach_xattr(allow_prog[6], cg1, + if (CHECK(bpf_prog_attach_opts(allow_prog[6], cg1, BPF_CGROUP_INET_EGRESS, &attach_opts), "prog_replace", "errno=%d\n", errno)) goto err; diff --git a/tools/testing/selftests/bpf/prog_tests/cgroup_getset_retval.c b/tools/testing/selftests/bpf/prog_tests/cgroup_getset_retval.c new file mode 100644 index 000000000000..0b47c3c000c7 --- /dev/null +++ b/tools/testing/selftests/bpf/prog_tests/cgroup_getset_retval.c @@ -0,0 +1,481 @@ +// SPDX-License-Identifier: GPL-2.0-only + +/* + * Copyright 2021 Google LLC. + */ + +#include <test_progs.h> +#include <cgroup_helpers.h> +#include <network_helpers.h> + +#include "cgroup_getset_retval_setsockopt.skel.h" +#include "cgroup_getset_retval_getsockopt.skel.h" + +#define SOL_CUSTOM 0xdeadbeef + +static int zero; + +static void test_setsockopt_set(int cgroup_fd, int sock_fd) +{ + struct cgroup_getset_retval_setsockopt *obj; + struct bpf_link *link_set_eunatch = NULL; + + obj = cgroup_getset_retval_setsockopt__open_and_load(); + if (!ASSERT_OK_PTR(obj, "skel-load")) + return; + + /* Attach setsockopt that sets EUNATCH, assert that + * we actually get that error when we run setsockopt() + */ + link_set_eunatch = bpf_program__attach_cgroup(obj->progs.set_eunatch, + cgroup_fd); + if (!ASSERT_OK_PTR(link_set_eunatch, "cg-attach-set_eunatch")) + goto close_bpf_object; + + if (!ASSERT_ERR(setsockopt(sock_fd, SOL_SOCKET, SO_REUSEADDR, + &zero, sizeof(int)), "setsockopt")) + goto close_bpf_object; + if (!ASSERT_EQ(errno, EUNATCH, "setsockopt-errno")) + goto close_bpf_object; + + if (!ASSERT_EQ(obj->bss->invocations, 1, "invocations")) + goto close_bpf_object; + if (!ASSERT_FALSE(obj->bss->assertion_error, "assertion_error")) + goto close_bpf_object; + +close_bpf_object: + bpf_link__destroy(link_set_eunatch); + + cgroup_getset_retval_setsockopt__destroy(obj); +} + +static void test_setsockopt_set_and_get(int cgroup_fd, int sock_fd) +{ + struct cgroup_getset_retval_setsockopt *obj; + struct bpf_link *link_set_eunatch = NULL, *link_get_retval = NULL; + + obj = cgroup_getset_retval_setsockopt__open_and_load(); + if (!ASSERT_OK_PTR(obj, "skel-load")) + return; + + /* Attach setsockopt that sets EUNATCH, and one that gets the + * previously set errno. Assert that we get the same errno back. + */ + link_set_eunatch = bpf_program__attach_cgroup(obj->progs.set_eunatch, + cgroup_fd); + if (!ASSERT_OK_PTR(link_set_eunatch, "cg-attach-set_eunatch")) + goto close_bpf_object; + link_get_retval = bpf_program__attach_cgroup(obj->progs.get_retval, + cgroup_fd); + if (!ASSERT_OK_PTR(link_get_retval, "cg-attach-get_retval")) + goto close_bpf_object; + + if (!ASSERT_ERR(setsockopt(sock_fd, SOL_SOCKET, SO_REUSEADDR, + &zero, sizeof(int)), "setsockopt")) + goto close_bpf_object; + if (!ASSERT_EQ(errno, EUNATCH, "setsockopt-errno")) + goto close_bpf_object; + + if (!ASSERT_EQ(obj->bss->invocations, 2, "invocations")) + goto close_bpf_object; + if (!ASSERT_FALSE(obj->bss->assertion_error, "assertion_error")) + goto close_bpf_object; + if (!ASSERT_EQ(obj->bss->retval_value, -EUNATCH, "retval_value")) + goto close_bpf_object; + +close_bpf_object: + bpf_link__destroy(link_set_eunatch); + bpf_link__destroy(link_get_retval); + + cgroup_getset_retval_setsockopt__destroy(obj); +} + +static void test_setsockopt_default_zero(int cgroup_fd, int sock_fd) +{ + struct cgroup_getset_retval_setsockopt *obj; + struct bpf_link *link_get_retval = NULL; + + obj = cgroup_getset_retval_setsockopt__open_and_load(); + if (!ASSERT_OK_PTR(obj, "skel-load")) + return; + + /* Attach setsockopt that gets the previously set errno. + * Assert that, without anything setting one, we get 0. + */ + link_get_retval = bpf_program__attach_cgroup(obj->progs.get_retval, + cgroup_fd); + if (!ASSERT_OK_PTR(link_get_retval, "cg-attach-get_retval")) + goto close_bpf_object; + + if (!ASSERT_OK(setsockopt(sock_fd, SOL_SOCKET, SO_REUSEADDR, + &zero, sizeof(int)), "setsockopt")) + goto close_bpf_object; + + if (!ASSERT_EQ(obj->bss->invocations, 1, "invocations")) + goto close_bpf_object; + if (!ASSERT_FALSE(obj->bss->assertion_error, "assertion_error")) + goto close_bpf_object; + if (!ASSERT_EQ(obj->bss->retval_value, 0, "retval_value")) + goto close_bpf_object; + +close_bpf_object: + bpf_link__destroy(link_get_retval); + + cgroup_getset_retval_setsockopt__destroy(obj); +} + +static void test_setsockopt_default_zero_and_set(int cgroup_fd, int sock_fd) +{ + struct cgroup_getset_retval_setsockopt *obj; + struct bpf_link *link_get_retval = NULL, *link_set_eunatch = NULL; + + obj = cgroup_getset_retval_setsockopt__open_and_load(); + if (!ASSERT_OK_PTR(obj, "skel-load")) + return; + + /* Attach setsockopt that gets the previously set errno, and then + * one that sets the errno to EUNATCH. Assert that the get does not + * see EUNATCH set later, and does not prevent EUNATCH from being set. + */ + link_get_retval = bpf_program__attach_cgroup(obj->progs.get_retval, + cgroup_fd); + if (!ASSERT_OK_PTR(link_get_retval, "cg-attach-get_retval")) + goto close_bpf_object; + link_set_eunatch = bpf_program__attach_cgroup(obj->progs.set_eunatch, + cgroup_fd); + if (!ASSERT_OK_PTR(link_set_eunatch, "cg-attach-set_eunatch")) + goto close_bpf_object; + + if (!ASSERT_ERR(setsockopt(sock_fd, SOL_SOCKET, SO_REUSEADDR, + &zero, sizeof(int)), "setsockopt")) + goto close_bpf_object; + if (!ASSERT_EQ(errno, EUNATCH, "setsockopt-errno")) + goto close_bpf_object; + + if (!ASSERT_EQ(obj->bss->invocations, 2, "invocations")) + goto close_bpf_object; + if (!ASSERT_FALSE(obj->bss->assertion_error, "assertion_error")) + goto close_bpf_object; + if (!ASSERT_EQ(obj->bss->retval_value, 0, "retval_value")) + goto close_bpf_object; + +close_bpf_object: + bpf_link__destroy(link_get_retval); + bpf_link__destroy(link_set_eunatch); + + cgroup_getset_retval_setsockopt__destroy(obj); +} + +static void test_setsockopt_override(int cgroup_fd, int sock_fd) +{ + struct cgroup_getset_retval_setsockopt *obj; + struct bpf_link *link_set_eunatch = NULL, *link_set_eisconn = NULL; + struct bpf_link *link_get_retval = NULL; + + obj = cgroup_getset_retval_setsockopt__open_and_load(); + if (!ASSERT_OK_PTR(obj, "skel-load")) + return; + + /* Attach setsockopt that sets EUNATCH, then one that sets EISCONN, + * and then one that gets the exported errno. Assert both the syscall + * and the helper sees the last set errno. + */ + link_set_eunatch = bpf_program__attach_cgroup(obj->progs.set_eunatch, + cgroup_fd); + if (!ASSERT_OK_PTR(link_set_eunatch, "cg-attach-set_eunatch")) + goto close_bpf_object; + link_set_eisconn = bpf_program__attach_cgroup(obj->progs.set_eisconn, + cgroup_fd); + if (!ASSERT_OK_PTR(link_set_eisconn, "cg-attach-set_eisconn")) + goto close_bpf_object; + link_get_retval = bpf_program__attach_cgroup(obj->progs.get_retval, + cgroup_fd); + if (!ASSERT_OK_PTR(link_get_retval, "cg-attach-get_retval")) + goto close_bpf_object; + + if (!ASSERT_ERR(setsockopt(sock_fd, SOL_SOCKET, SO_REUSEADDR, + &zero, sizeof(int)), "setsockopt")) + goto close_bpf_object; + if (!ASSERT_EQ(errno, EISCONN, "setsockopt-errno")) + goto close_bpf_object; + + if (!ASSERT_EQ(obj->bss->invocations, 3, "invocations")) + goto close_bpf_object; + if (!ASSERT_FALSE(obj->bss->assertion_error, "assertion_error")) + goto close_bpf_object; + if (!ASSERT_EQ(obj->bss->retval_value, -EISCONN, "retval_value")) + goto close_bpf_object; + +close_bpf_object: + bpf_link__destroy(link_set_eunatch); + bpf_link__destroy(link_set_eisconn); + bpf_link__destroy(link_get_retval); + + cgroup_getset_retval_setsockopt__destroy(obj); +} + +static void test_setsockopt_legacy_eperm(int cgroup_fd, int sock_fd) +{ + struct cgroup_getset_retval_setsockopt *obj; + struct bpf_link *link_legacy_eperm = NULL, *link_get_retval = NULL; + + obj = cgroup_getset_retval_setsockopt__open_and_load(); + if (!ASSERT_OK_PTR(obj, "skel-load")) + return; + + /* Attach setsockopt that return a reject without setting errno + * (legacy reject), and one that gets the errno. Assert that for + * backward compatibility the syscall result in EPERM, and this + * is also visible to the helper. + */ + link_legacy_eperm = bpf_program__attach_cgroup(obj->progs.legacy_eperm, + cgroup_fd); + if (!ASSERT_OK_PTR(link_legacy_eperm, "cg-attach-legacy_eperm")) + goto close_bpf_object; + link_get_retval = bpf_program__attach_cgroup(obj->progs.get_retval, + cgroup_fd); + if (!ASSERT_OK_PTR(link_get_retval, "cg-attach-get_retval")) + goto close_bpf_object; + + if (!ASSERT_ERR(setsockopt(sock_fd, SOL_SOCKET, SO_REUSEADDR, + &zero, sizeof(int)), "setsockopt")) + goto close_bpf_object; + if (!ASSERT_EQ(errno, EPERM, "setsockopt-errno")) + goto close_bpf_object; + + if (!ASSERT_EQ(obj->bss->invocations, 2, "invocations")) + goto close_bpf_object; + if (!ASSERT_FALSE(obj->bss->assertion_error, "assertion_error")) + goto close_bpf_object; + if (!ASSERT_EQ(obj->bss->retval_value, -EPERM, "retval_value")) + goto close_bpf_object; + +close_bpf_object: + bpf_link__destroy(link_legacy_eperm); + bpf_link__destroy(link_get_retval); + + cgroup_getset_retval_setsockopt__destroy(obj); +} + +static void test_setsockopt_legacy_no_override(int cgroup_fd, int sock_fd) +{ + struct cgroup_getset_retval_setsockopt *obj; + struct bpf_link *link_set_eunatch = NULL, *link_legacy_eperm = NULL; + struct bpf_link *link_get_retval = NULL; + + obj = cgroup_getset_retval_setsockopt__open_and_load(); + if (!ASSERT_OK_PTR(obj, "skel-load")) + return; + + /* Attach setsockopt that sets EUNATCH, then one that return a reject + * without setting errno, and then one that gets the exported errno. + * Assert both the syscall and the helper's errno are unaffected by + * the second prog (i.e. legacy rejects does not override the errno + * to EPERM). + */ + link_set_eunatch = bpf_program__attach_cgroup(obj->progs.set_eunatch, + cgroup_fd); + if (!ASSERT_OK_PTR(link_set_eunatch, "cg-attach-set_eunatch")) + goto close_bpf_object; + link_legacy_eperm = bpf_program__attach_cgroup(obj->progs.legacy_eperm, + cgroup_fd); + if (!ASSERT_OK_PTR(link_legacy_eperm, "cg-attach-legacy_eperm")) + goto close_bpf_object; + link_get_retval = bpf_program__attach_cgroup(obj->progs.get_retval, + cgroup_fd); + if (!ASSERT_OK_PTR(link_get_retval, "cg-attach-get_retval")) + goto close_bpf_object; + + if (!ASSERT_ERR(setsockopt(sock_fd, SOL_SOCKET, SO_REUSEADDR, + &zero, sizeof(int)), "setsockopt")) + goto close_bpf_object; + if (!ASSERT_EQ(errno, EUNATCH, "setsockopt-errno")) + goto close_bpf_object; + + if (!ASSERT_EQ(obj->bss->invocations, 3, "invocations")) + goto close_bpf_object; + if (!ASSERT_FALSE(obj->bss->assertion_error, "assertion_error")) + goto close_bpf_object; + if (!ASSERT_EQ(obj->bss->retval_value, -EUNATCH, "retval_value")) + goto close_bpf_object; + +close_bpf_object: + bpf_link__destroy(link_set_eunatch); + bpf_link__destroy(link_legacy_eperm); + bpf_link__destroy(link_get_retval); + + cgroup_getset_retval_setsockopt__destroy(obj); +} + +static void test_getsockopt_get(int cgroup_fd, int sock_fd) +{ + struct cgroup_getset_retval_getsockopt *obj; + struct bpf_link *link_get_retval = NULL; + int buf; + socklen_t optlen = sizeof(buf); + + obj = cgroup_getset_retval_getsockopt__open_and_load(); + if (!ASSERT_OK_PTR(obj, "skel-load")) + return; + + /* Attach getsockopt that gets previously set errno. Assert that the + * error from kernel is in both ctx_retval_value and retval_value. + */ + link_get_retval = bpf_program__attach_cgroup(obj->progs.get_retval, + cgroup_fd); + if (!ASSERT_OK_PTR(link_get_retval, "cg-attach-get_retval")) + goto close_bpf_object; + + if (!ASSERT_ERR(getsockopt(sock_fd, SOL_CUSTOM, 0, + &buf, &optlen), "getsockopt")) + goto close_bpf_object; + if (!ASSERT_EQ(errno, EOPNOTSUPP, "getsockopt-errno")) + goto close_bpf_object; + + if (!ASSERT_EQ(obj->bss->invocations, 1, "invocations")) + goto close_bpf_object; + if (!ASSERT_FALSE(obj->bss->assertion_error, "assertion_error")) + goto close_bpf_object; + if (!ASSERT_EQ(obj->bss->retval_value, -EOPNOTSUPP, "retval_value")) + goto close_bpf_object; + if (!ASSERT_EQ(obj->bss->ctx_retval_value, -EOPNOTSUPP, "ctx_retval_value")) + goto close_bpf_object; + +close_bpf_object: + bpf_link__destroy(link_get_retval); + + cgroup_getset_retval_getsockopt__destroy(obj); +} + +static void test_getsockopt_override(int cgroup_fd, int sock_fd) +{ + struct cgroup_getset_retval_getsockopt *obj; + struct bpf_link *link_set_eisconn = NULL; + int buf; + socklen_t optlen = sizeof(buf); + + obj = cgroup_getset_retval_getsockopt__open_and_load(); + if (!ASSERT_OK_PTR(obj, "skel-load")) + return; + + /* Attach getsockopt that sets retval to -EISCONN. Assert that this + * overrides the value from kernel. + */ + link_set_eisconn = bpf_program__attach_cgroup(obj->progs.set_eisconn, + cgroup_fd); + if (!ASSERT_OK_PTR(link_set_eisconn, "cg-attach-set_eisconn")) + goto close_bpf_object; + + if (!ASSERT_ERR(getsockopt(sock_fd, SOL_CUSTOM, 0, + &buf, &optlen), "getsockopt")) + goto close_bpf_object; + if (!ASSERT_EQ(errno, EISCONN, "getsockopt-errno")) + goto close_bpf_object; + + if (!ASSERT_EQ(obj->bss->invocations, 1, "invocations")) + goto close_bpf_object; + if (!ASSERT_FALSE(obj->bss->assertion_error, "assertion_error")) + goto close_bpf_object; + +close_bpf_object: + bpf_link__destroy(link_set_eisconn); + + cgroup_getset_retval_getsockopt__destroy(obj); +} + +static void test_getsockopt_retval_sync(int cgroup_fd, int sock_fd) +{ + struct cgroup_getset_retval_getsockopt *obj; + struct bpf_link *link_set_eisconn = NULL, *link_clear_retval = NULL; + struct bpf_link *link_get_retval = NULL; + int buf; + socklen_t optlen = sizeof(buf); + + obj = cgroup_getset_retval_getsockopt__open_and_load(); + if (!ASSERT_OK_PTR(obj, "skel-load")) + return; + + /* Attach getsockopt that sets retval to -EISCONN, and one that clears + * ctx retval. Assert that the clearing ctx retval is synced to helper + * and clears any errors both from kernel and BPF.. + */ + link_set_eisconn = bpf_program__attach_cgroup(obj->progs.set_eisconn, + cgroup_fd); + if (!ASSERT_OK_PTR(link_set_eisconn, "cg-attach-set_eisconn")) + goto close_bpf_object; + link_clear_retval = bpf_program__attach_cgroup(obj->progs.clear_retval, + cgroup_fd); + if (!ASSERT_OK_PTR(link_clear_retval, "cg-attach-clear_retval")) + goto close_bpf_object; + link_get_retval = bpf_program__attach_cgroup(obj->progs.get_retval, + cgroup_fd); + if (!ASSERT_OK_PTR(link_get_retval, "cg-attach-get_retval")) + goto close_bpf_object; + + if (!ASSERT_OK(getsockopt(sock_fd, SOL_CUSTOM, 0, + &buf, &optlen), "getsockopt")) + goto close_bpf_object; + + if (!ASSERT_EQ(obj->bss->invocations, 3, "invocations")) + goto close_bpf_object; + if (!ASSERT_FALSE(obj->bss->assertion_error, "assertion_error")) + goto close_bpf_object; + if (!ASSERT_EQ(obj->bss->retval_value, 0, "retval_value")) + goto close_bpf_object; + if (!ASSERT_EQ(obj->bss->ctx_retval_value, 0, "ctx_retval_value")) + goto close_bpf_object; + +close_bpf_object: + bpf_link__destroy(link_set_eisconn); + bpf_link__destroy(link_clear_retval); + bpf_link__destroy(link_get_retval); + + cgroup_getset_retval_getsockopt__destroy(obj); +} + +void test_cgroup_getset_retval(void) +{ + int cgroup_fd = -1; + int sock_fd = -1; + + cgroup_fd = test__join_cgroup("/cgroup_getset_retval"); + if (!ASSERT_GE(cgroup_fd, 0, "cg-create")) + goto close_fd; + + sock_fd = start_server(AF_INET, SOCK_DGRAM, NULL, 0, 0); + if (!ASSERT_GE(sock_fd, 0, "start-server")) + goto close_fd; + + if (test__start_subtest("setsockopt-set")) + test_setsockopt_set(cgroup_fd, sock_fd); + + if (test__start_subtest("setsockopt-set_and_get")) + test_setsockopt_set_and_get(cgroup_fd, sock_fd); + + if (test__start_subtest("setsockopt-default_zero")) + test_setsockopt_default_zero(cgroup_fd, sock_fd); + + if (test__start_subtest("setsockopt-default_zero_and_set")) + test_setsockopt_default_zero_and_set(cgroup_fd, sock_fd); + + if (test__start_subtest("setsockopt-override")) + test_setsockopt_override(cgroup_fd, sock_fd); + + if (test__start_subtest("setsockopt-legacy_eperm")) + test_setsockopt_legacy_eperm(cgroup_fd, sock_fd); + + if (test__start_subtest("setsockopt-legacy_no_override")) + test_setsockopt_legacy_no_override(cgroup_fd, sock_fd); + + if (test__start_subtest("getsockopt-get")) + test_getsockopt_get(cgroup_fd, sock_fd); + + if (test__start_subtest("getsockopt-override")) + test_getsockopt_override(cgroup_fd, sock_fd); + + if (test__start_subtest("getsockopt-retval_sync")) + test_getsockopt_retval_sync(cgroup_fd, sock_fd); + +close_fd: + close(cgroup_fd); +} diff --git a/tools/testing/selftests/bpf/prog_tests/flow_dissector.c b/tools/testing/selftests/bpf/prog_tests/flow_dissector.c index ac54e3f91d42..dfafd62df50b 100644 --- a/tools/testing/selftests/bpf/prog_tests/flow_dissector.c +++ b/tools/testing/selftests/bpf/prog_tests/flow_dissector.c @@ -457,7 +457,7 @@ static int init_prog_array(struct bpf_object *obj, struct bpf_map *prog_array) if (map_fd < 0) return -1; - for (i = 0; i < bpf_map__def(prog_array)->max_entries; i++) { + for (i = 0; i < bpf_map__max_entries(prog_array); i++) { snprintf(prog_name, sizeof(prog_name), "flow_dissector_%d", i); prog = bpf_object__find_program_by_name(obj, prog_name); diff --git a/tools/testing/selftests/bpf/prog_tests/global_data.c b/tools/testing/selftests/bpf/prog_tests/global_data.c index 9da131b32e13..917165e04427 100644 --- a/tools/testing/selftests/bpf/prog_tests/global_data.c +++ b/tools/testing/selftests/bpf/prog_tests/global_data.c @@ -121,7 +121,7 @@ static void test_global_data_rdonly(struct bpf_object *obj, __u32 duration) if (CHECK_FAIL(map_fd < 0)) return; - buff = malloc(bpf_map__def(map)->value_size); + buff = malloc(bpf_map__value_size(map)); if (buff) err = bpf_map_update_elem(map_fd, &zero, buff, 0); free(buff); diff --git a/tools/testing/selftests/bpf/prog_tests/global_data_init.c b/tools/testing/selftests/bpf/prog_tests/global_data_init.c index 1db86eab101b..57331c606964 100644 --- a/tools/testing/selftests/bpf/prog_tests/global_data_init.c +++ b/tools/testing/selftests/bpf/prog_tests/global_data_init.c @@ -20,7 +20,7 @@ void test_global_data_init(void) if (CHECK_FAIL(!map || !bpf_map__is_internal(map))) goto out; - sz = bpf_map__def(map)->value_size; + sz = bpf_map__value_size(map); newval = malloc(sz); if (CHECK_FAIL(!newval)) goto out; diff --git a/tools/testing/selftests/bpf/prog_tests/kfunc_call.c b/tools/testing/selftests/bpf/prog_tests/kfunc_call.c index 7d7445ccc141..b39a4f09aefd 100644 --- a/tools/testing/selftests/bpf/prog_tests/kfunc_call.c +++ b/tools/testing/selftests/bpf/prog_tests/kfunc_call.c @@ -27,6 +27,12 @@ static void test_main(void) ASSERT_OK(err, "bpf_prog_test_run(test2)"); ASSERT_EQ(retval, 3, "test2-retval"); + prog_fd = skel->progs.kfunc_call_test_ref_btf_id.prog_fd; + err = bpf_prog_test_run(prog_fd, 1, &pkt_v4, sizeof(pkt_v4), + NULL, NULL, (__u32 *)&retval, NULL); + ASSERT_OK(err, "bpf_prog_test_run(test_ref_btf_id)"); + ASSERT_EQ(retval, 0, "test_ref_btf_id-retval"); + kfunc_call_test_lskel__destroy(skel); } diff --git a/tools/testing/selftests/bpf/prog_tests/sockmap_basic.c b/tools/testing/selftests/bpf/prog_tests/sockmap_basic.c index 85db0f4cdd95..b97a8f236b3a 100644 --- a/tools/testing/selftests/bpf/prog_tests/sockmap_basic.c +++ b/tools/testing/selftests/bpf/prog_tests/sockmap_basic.c @@ -8,6 +8,7 @@ #include "test_sockmap_update.skel.h" #include "test_sockmap_invalid_update.skel.h" #include "test_sockmap_skb_verdict_attach.skel.h" +#include "test_sockmap_progs_query.skel.h" #include "bpf_iter_sockmap.skel.h" #define TCP_REPAIR 19 /* TCP sock is under repair right now */ @@ -315,6 +316,63 @@ out: test_sockmap_skb_verdict_attach__destroy(skel); } +static __u32 query_prog_id(int prog_fd) +{ + struct bpf_prog_info info = {}; + __u32 info_len = sizeof(info); + int err; + + err = bpf_obj_get_info_by_fd(prog_fd, &info, &info_len); + if (!ASSERT_OK(err, "bpf_obj_get_info_by_fd") || + !ASSERT_EQ(info_len, sizeof(info), "bpf_obj_get_info_by_fd")) + return 0; + + return info.id; +} + +static void test_sockmap_progs_query(enum bpf_attach_type attach_type) +{ + struct test_sockmap_progs_query *skel; + int err, map_fd, verdict_fd; + __u32 attach_flags = 0; + __u32 prog_ids[3] = {}; + __u32 prog_cnt = 3; + + skel = test_sockmap_progs_query__open_and_load(); + if (!ASSERT_OK_PTR(skel, "test_sockmap_progs_query__open_and_load")) + return; + + map_fd = bpf_map__fd(skel->maps.sock_map); + + if (attach_type == BPF_SK_MSG_VERDICT) + verdict_fd = bpf_program__fd(skel->progs.prog_skmsg_verdict); + else + verdict_fd = bpf_program__fd(skel->progs.prog_skb_verdict); + + err = bpf_prog_query(map_fd, attach_type, 0 /* query flags */, + &attach_flags, prog_ids, &prog_cnt); + ASSERT_OK(err, "bpf_prog_query failed"); + ASSERT_EQ(attach_flags, 0, "wrong attach_flags on query"); + ASSERT_EQ(prog_cnt, 0, "wrong program count on query"); + + err = bpf_prog_attach(verdict_fd, map_fd, attach_type, 0); + if (!ASSERT_OK(err, "bpf_prog_attach failed")) + goto out; + + prog_cnt = 1; + err = bpf_prog_query(map_fd, attach_type, 0 /* query flags */, + &attach_flags, prog_ids, &prog_cnt); + ASSERT_OK(err, "bpf_prog_query failed"); + ASSERT_EQ(attach_flags, 0, "wrong attach_flags on query"); + ASSERT_EQ(prog_cnt, 1, "wrong program count on query"); + ASSERT_EQ(prog_ids[0], query_prog_id(verdict_fd), + "wrong prog_ids on query"); + + bpf_prog_detach2(verdict_fd, map_fd, attach_type); +out: + test_sockmap_progs_query__destroy(skel); +} + void test_sockmap_basic(void) { if (test__start_subtest("sockmap create_update_free")) @@ -341,4 +399,12 @@ void test_sockmap_basic(void) test_sockmap_skb_verdict_attach(BPF_SK_SKB_STREAM_VERDICT, BPF_SK_SKB_VERDICT); } + if (test__start_subtest("sockmap msg_verdict progs query")) + test_sockmap_progs_query(BPF_SK_MSG_VERDICT); + if (test__start_subtest("sockmap stream_parser progs query")) + test_sockmap_progs_query(BPF_SK_SKB_STREAM_PARSER); + if (test__start_subtest("sockmap stream_verdict progs query")) + test_sockmap_progs_query(BPF_SK_SKB_STREAM_VERDICT); + if (test__start_subtest("sockmap skb_verdict progs query")) + test_sockmap_progs_query(BPF_SK_SKB_VERDICT); } diff --git a/tools/testing/selftests/bpf/prog_tests/sockmap_listen.c b/tools/testing/selftests/bpf/prog_tests/sockmap_listen.c index 7e21bfab6358..2cf0c7a3fe23 100644 --- a/tools/testing/selftests/bpf/prog_tests/sockmap_listen.c +++ b/tools/testing/selftests/bpf/prog_tests/sockmap_listen.c @@ -1413,14 +1413,12 @@ close_srv1: static void test_ops_cleanup(const struct bpf_map *map) { - const struct bpf_map_def *def; int err, mapfd; u32 key; - def = bpf_map__def(map); mapfd = bpf_map__fd(map); - for (key = 0; key < def->max_entries; key++) { + for (key = 0; key < bpf_map__max_entries(map); key++) { err = bpf_map_delete_elem(mapfd, &key); if (err && errno != EINVAL && errno != ENOENT) FAIL_ERRNO("map_delete: expected EINVAL/ENOENT"); @@ -1443,13 +1441,13 @@ static const char *family_str(sa_family_t family) static const char *map_type_str(const struct bpf_map *map) { - const struct bpf_map_def *def; + int type; - def = bpf_map__def(map); - if (IS_ERR(def)) + if (!map) return "invalid"; + type = bpf_map__type(map); - switch (def->type) { + switch (type) { case BPF_MAP_TYPE_SOCKMAP: return "sockmap"; case BPF_MAP_TYPE_SOCKHASH: diff --git a/tools/testing/selftests/bpf/prog_tests/sockopt_sk.c b/tools/testing/selftests/bpf/prog_tests/sockopt_sk.c index 4b937e5dbaca..30a99d2ed5c6 100644 --- a/tools/testing/selftests/bpf/prog_tests/sockopt_sk.c +++ b/tools/testing/selftests/bpf/prog_tests/sockopt_sk.c @@ -173,11 +173,11 @@ static int getsetsockopt(void) } memset(&buf, 0, sizeof(buf)); - buf.zc.address = 12345; /* rejected by BPF */ + buf.zc.address = 12345; /* Not page aligned. Rejected by tcp_zerocopy_receive() */ optlen = sizeof(buf.zc); errno = 0; err = getsockopt(fd, SOL_TCP, TCP_ZEROCOPY_RECEIVE, &buf, &optlen); - if (errno != EPERM) { + if (errno != EINVAL) { log_err("Unexpected getsockopt(TCP_ZEROCOPY_RECEIVE) err=%d errno=%d", err, errno); goto err; diff --git a/tools/testing/selftests/bpf/prog_tests/tailcalls.c b/tools/testing/selftests/bpf/prog_tests/tailcalls.c index 5dc0f425bd11..796f231582f8 100644 --- a/tools/testing/selftests/bpf/prog_tests/tailcalls.c +++ b/tools/testing/selftests/bpf/prog_tests/tailcalls.c @@ -37,7 +37,7 @@ static void test_tailcall_1(void) if (CHECK_FAIL(map_fd < 0)) goto out; - for (i = 0; i < bpf_map__def(prog_array)->max_entries; i++) { + for (i = 0; i < bpf_map__max_entries(prog_array); i++) { snprintf(prog_name, sizeof(prog_name), "classifier_%d", i); prog = bpf_object__find_program_by_name(obj, prog_name); @@ -53,7 +53,7 @@ static void test_tailcall_1(void) goto out; } - for (i = 0; i < bpf_map__def(prog_array)->max_entries; i++) { + for (i = 0; i < bpf_map__max_entries(prog_array); i++) { err = bpf_prog_test_run(main_fd, 1, buff, sizeof(buff), 0, &duration, &retval, NULL); CHECK(err || retval != i, "tailcall", @@ -69,7 +69,7 @@ static void test_tailcall_1(void) CHECK(err || retval != 3, "tailcall", "err %d errno %d retval %d\n", err, errno, retval); - for (i = 0; i < bpf_map__def(prog_array)->max_entries; i++) { + for (i = 0; i < bpf_map__max_entries(prog_array); i++) { snprintf(prog_name, sizeof(prog_name), "classifier_%d", i); prog = bpf_object__find_program_by_name(obj, prog_name); @@ -90,8 +90,8 @@ static void test_tailcall_1(void) CHECK(err || retval != 0, "tailcall", "err %d errno %d retval %d\n", err, errno, retval); - for (i = 0; i < bpf_map__def(prog_array)->max_entries; i++) { - j = bpf_map__def(prog_array)->max_entries - 1 - i; + for (i = 0; i < bpf_map__max_entries(prog_array); i++) { + j = bpf_map__max_entries(prog_array) - 1 - i; snprintf(prog_name, sizeof(prog_name), "classifier_%d", j); prog = bpf_object__find_program_by_name(obj, prog_name); @@ -107,8 +107,8 @@ static void test_tailcall_1(void) goto out; } - for (i = 0; i < bpf_map__def(prog_array)->max_entries; i++) { - j = bpf_map__def(prog_array)->max_entries - 1 - i; + for (i = 0; i < bpf_map__max_entries(prog_array); i++) { + j = bpf_map__max_entries(prog_array) - 1 - i; err = bpf_prog_test_run(main_fd, 1, buff, sizeof(buff), 0, &duration, &retval, NULL); @@ -125,7 +125,7 @@ static void test_tailcall_1(void) CHECK(err || retval != 3, "tailcall", "err %d errno %d retval %d\n", err, errno, retval); - for (i = 0; i < bpf_map__def(prog_array)->max_entries; i++) { + for (i = 0; i < bpf_map__max_entries(prog_array); i++) { err = bpf_map_delete_elem(map_fd, &i); if (CHECK_FAIL(err >= 0 || errno != ENOENT)) goto out; @@ -175,7 +175,7 @@ static void test_tailcall_2(void) if (CHECK_FAIL(map_fd < 0)) goto out; - for (i = 0; i < bpf_map__def(prog_array)->max_entries; i++) { + for (i = 0; i < bpf_map__max_entries(prog_array); i++) { snprintf(prog_name, sizeof(prog_name), "classifier_%d", i); prog = bpf_object__find_program_by_name(obj, prog_name); @@ -353,7 +353,7 @@ static void test_tailcall_4(void) if (CHECK_FAIL(map_fd < 0)) return; - for (i = 0; i < bpf_map__def(prog_array)->max_entries; i++) { + for (i = 0; i < bpf_map__max_entries(prog_array); i++) { snprintf(prog_name, sizeof(prog_name), "classifier_%d", i); prog = bpf_object__find_program_by_name(obj, prog_name); @@ -369,7 +369,7 @@ static void test_tailcall_4(void) goto out; } - for (i = 0; i < bpf_map__def(prog_array)->max_entries; i++) { + for (i = 0; i < bpf_map__max_entries(prog_array); i++) { err = bpf_map_update_elem(data_fd, &zero, &i, BPF_ANY); if (CHECK_FAIL(err)) goto out; @@ -380,7 +380,7 @@ static void test_tailcall_4(void) "err %d errno %d retval %d\n", err, errno, retval); } - for (i = 0; i < bpf_map__def(prog_array)->max_entries; i++) { + for (i = 0; i < bpf_map__max_entries(prog_array); i++) { err = bpf_map_update_elem(data_fd, &zero, &i, BPF_ANY); if (CHECK_FAIL(err)) goto out; @@ -441,7 +441,7 @@ static void test_tailcall_5(void) if (CHECK_FAIL(map_fd < 0)) return; - for (i = 0; i < bpf_map__def(prog_array)->max_entries; i++) { + for (i = 0; i < bpf_map__max_entries(prog_array); i++) { snprintf(prog_name, sizeof(prog_name), "classifier_%d", i); prog = bpf_object__find_program_by_name(obj, prog_name); @@ -457,7 +457,7 @@ static void test_tailcall_5(void) goto out; } - for (i = 0; i < bpf_map__def(prog_array)->max_entries; i++) { + for (i = 0; i < bpf_map__max_entries(prog_array); i++) { err = bpf_map_update_elem(data_fd, &zero, &key[i], BPF_ANY); if (CHECK_FAIL(err)) goto out; @@ -468,7 +468,7 @@ static void test_tailcall_5(void) "err %d errno %d retval %d\n", err, errno, retval); } - for (i = 0; i < bpf_map__def(prog_array)->max_entries; i++) { + for (i = 0; i < bpf_map__max_entries(prog_array); i++) { err = bpf_map_update_elem(data_fd, &zero, &key[i], BPF_ANY); if (CHECK_FAIL(err)) goto out; @@ -520,7 +520,7 @@ static void test_tailcall_bpf2bpf_1(void) goto out; /* nop -> jmp */ - for (i = 0; i < bpf_map__def(prog_array)->max_entries; i++) { + for (i = 0; i < bpf_map__max_entries(prog_array); i++) { snprintf(prog_name, sizeof(prog_name), "classifier_%d", i); prog = bpf_object__find_program_by_name(obj, prog_name); @@ -681,7 +681,7 @@ static void test_tailcall_bpf2bpf_3(void) if (CHECK_FAIL(map_fd < 0)) goto out; - for (i = 0; i < bpf_map__def(prog_array)->max_entries; i++) { + for (i = 0; i < bpf_map__max_entries(prog_array); i++) { snprintf(prog_name, sizeof(prog_name), "classifier_%d", i); prog = bpf_object__find_program_by_name(obj, prog_name); @@ -778,7 +778,7 @@ static void test_tailcall_bpf2bpf_4(bool noise) if (CHECK_FAIL(map_fd < 0)) goto out; - for (i = 0; i < bpf_map__def(prog_array)->max_entries; i++) { + for (i = 0; i < bpf_map__max_entries(prog_array); i++) { snprintf(prog_name, sizeof(prog_name), "classifier_%d", i); prog = bpf_object__find_program_by_name(obj, prog_name); diff --git a/tools/testing/selftests/bpf/prog_tests/xdp_adjust_frags.c b/tools/testing/selftests/bpf/prog_tests/xdp_adjust_frags.c new file mode 100644 index 000000000000..31c188666e81 --- /dev/null +++ b/tools/testing/selftests/bpf/prog_tests/xdp_adjust_frags.c @@ -0,0 +1,104 @@ +// SPDX-License-Identifier: GPL-2.0 +#include <test_progs.h> +#include <network_helpers.h> + +void test_xdp_update_frags(void) +{ + const char *file = "./test_xdp_update_frags.o"; + __u32 duration, retval, size; + struct bpf_program *prog; + struct bpf_object *obj; + int err, prog_fd; + __u32 *offset; + __u8 *buf; + + obj = bpf_object__open(file); + if (libbpf_get_error(obj)) + return; + + prog = bpf_object__next_program(obj, NULL); + if (bpf_object__load(obj)) + return; + + prog_fd = bpf_program__fd(prog); + + buf = malloc(128); + if (!ASSERT_OK_PTR(buf, "alloc buf 128b")) + goto out; + + memset(buf, 0, 128); + offset = (__u32 *)buf; + *offset = 16; + buf[*offset] = 0xaa; /* marker at offset 16 (head) */ + buf[*offset + 15] = 0xaa; /* marker at offset 31 (head) */ + + err = bpf_prog_test_run(prog_fd, 1, buf, 128, + buf, &size, &retval, &duration); + + /* test_xdp_update_frags: buf[16,31]: 0xaa -> 0xbb */ + ASSERT_OK(err, "xdp_update_frag"); + ASSERT_EQ(retval, XDP_PASS, "xdp_update_frag retval"); + ASSERT_EQ(buf[16], 0xbb, "xdp_update_frag buf[16]"); + ASSERT_EQ(buf[31], 0xbb, "xdp_update_frag buf[31]"); + + free(buf); + + buf = malloc(9000); + if (!ASSERT_OK_PTR(buf, "alloc buf 9Kb")) + goto out; + + memset(buf, 0, 9000); + offset = (__u32 *)buf; + *offset = 5000; + buf[*offset] = 0xaa; /* marker at offset 5000 (frag0) */ + buf[*offset + 15] = 0xaa; /* marker at offset 5015 (frag0) */ + + err = bpf_prog_test_run(prog_fd, 1, buf, 9000, + buf, &size, &retval, &duration); + + /* test_xdp_update_frags: buf[5000,5015]: 0xaa -> 0xbb */ + ASSERT_OK(err, "xdp_update_frag"); + ASSERT_EQ(retval, XDP_PASS, "xdp_update_frag retval"); + ASSERT_EQ(buf[5000], 0xbb, "xdp_update_frag buf[5000]"); + ASSERT_EQ(buf[5015], 0xbb, "xdp_update_frag buf[5015]"); + + memset(buf, 0, 9000); + offset = (__u32 *)buf; + *offset = 3510; + buf[*offset] = 0xaa; /* marker at offset 3510 (head) */ + buf[*offset + 15] = 0xaa; /* marker at offset 3525 (frag0) */ + + err = bpf_prog_test_run(prog_fd, 1, buf, 9000, + buf, &size, &retval, &duration); + + /* test_xdp_update_frags: buf[3510,3525]: 0xaa -> 0xbb */ + ASSERT_OK(err, "xdp_update_frag"); + ASSERT_EQ(retval, XDP_PASS, "xdp_update_frag retval"); + ASSERT_EQ(buf[3510], 0xbb, "xdp_update_frag buf[3510]"); + ASSERT_EQ(buf[3525], 0xbb, "xdp_update_frag buf[3525]"); + + memset(buf, 0, 9000); + offset = (__u32 *)buf; + *offset = 7606; + buf[*offset] = 0xaa; /* marker at offset 7606 (frag0) */ + buf[*offset + 15] = 0xaa; /* marker at offset 7621 (frag1) */ + + err = bpf_prog_test_run(prog_fd, 1, buf, 9000, + buf, &size, &retval, &duration); + + /* test_xdp_update_frags: buf[7606,7621]: 0xaa -> 0xbb */ + ASSERT_OK(err, "xdp_update_frag"); + ASSERT_EQ(retval, XDP_PASS, "xdp_update_frag retval"); + ASSERT_EQ(buf[7606], 0xbb, "xdp_update_frag buf[7606]"); + ASSERT_EQ(buf[7621], 0xbb, "xdp_update_frag buf[7621]"); + + free(buf); +out: + bpf_object__close(obj); +} + +void test_xdp_adjust_frags(void) +{ + if (test__start_subtest("xdp_adjust_frags")) + test_xdp_update_frags(); +} diff --git a/tools/testing/selftests/bpf/prog_tests/xdp_adjust_tail.c b/tools/testing/selftests/bpf/prog_tests/xdp_adjust_tail.c index 3f5a17c38be5..ccc9e63254a8 100644 --- a/tools/testing/selftests/bpf/prog_tests/xdp_adjust_tail.c +++ b/tools/testing/selftests/bpf/prog_tests/xdp_adjust_tail.c @@ -11,22 +11,21 @@ static void test_xdp_adjust_tail_shrink(void) char buf[128]; err = bpf_prog_test_load(file, BPF_PROG_TYPE_XDP, &obj, &prog_fd); - if (CHECK_FAIL(err)) + if (ASSERT_OK(err, "test_xdp_adjust_tail_shrink")) return; err = bpf_prog_test_run(prog_fd, 1, &pkt_v4, sizeof(pkt_v4), buf, &size, &retval, &duration); - - CHECK(err || retval != XDP_DROP, - "ipv4", "err %d errno %d retval %d size %d\n", - err, errno, retval, size); + ASSERT_OK(err, "ipv4"); + ASSERT_EQ(retval, XDP_DROP, "ipv4 retval"); expect_sz = sizeof(pkt_v6) - 20; /* Test shrink with 20 bytes */ err = bpf_prog_test_run(prog_fd, 1, &pkt_v6, sizeof(pkt_v6), buf, &size, &retval, &duration); - CHECK(err || retval != XDP_TX || size != expect_sz, - "ipv6", "err %d errno %d retval %d size %d expect-size %d\n", - err, errno, retval, size, expect_sz); + ASSERT_OK(err, "ipv6"); + ASSERT_EQ(retval, XDP_TX, "ipv6 retval"); + ASSERT_EQ(size, expect_sz, "ipv6 size"); + bpf_object__close(obj); } @@ -39,21 +38,20 @@ static void test_xdp_adjust_tail_grow(void) int err, prog_fd; err = bpf_prog_test_load(file, BPF_PROG_TYPE_XDP, &obj, &prog_fd); - if (CHECK_FAIL(err)) + if (ASSERT_OK(err, "test_xdp_adjust_tail_grow")) return; err = bpf_prog_test_run(prog_fd, 1, &pkt_v4, sizeof(pkt_v4), buf, &size, &retval, &duration); - CHECK(err || retval != XDP_DROP, - "ipv4", "err %d errno %d retval %d size %d\n", - err, errno, retval, size); + ASSERT_OK(err, "ipv4"); + ASSERT_EQ(retval, XDP_DROP, "ipv4 retval"); expect_sz = sizeof(pkt_v6) + 40; /* Test grow with 40 bytes */ err = bpf_prog_test_run(prog_fd, 1, &pkt_v6, sizeof(pkt_v6) /* 74 */, buf, &size, &retval, &duration); - CHECK(err || retval != XDP_TX || size != expect_sz, - "ipv6", "err %d errno %d retval %d size %d expect-size %d\n", - err, errno, retval, size, expect_sz); + ASSERT_OK(err, "ipv6"); + ASSERT_EQ(retval, XDP_TX, "ipv6 retval"); + ASSERT_EQ(size, expect_sz, "ipv6 size"); bpf_object__close(obj); } @@ -76,7 +74,7 @@ static void test_xdp_adjust_tail_grow2(void) }; err = bpf_prog_test_load(file, BPF_PROG_TYPE_XDP, &obj, &tattr.prog_fd); - if (CHECK_ATTR(err, "load", "err %d errno %d\n", err, errno)) + if (ASSERT_OK(err, "test_xdp_adjust_tail_grow")) return; /* Test case-64 */ @@ -86,21 +84,17 @@ static void test_xdp_adjust_tail_grow2(void) /* Kernel side alloc packet memory area that is zero init */ err = bpf_prog_test_run_xattr(&tattr); - CHECK_ATTR(errno != ENOSPC /* Due limit copy_size in bpf_test_finish */ - || tattr.retval != XDP_TX - || tattr.data_size_out != 192, /* Expected grow size */ - "case-64", - "err %d errno %d retval %d size %d\n", - err, errno, tattr.retval, tattr.data_size_out); + ASSERT_EQ(errno, ENOSPC, "case-64 errno"); /* Due limit copy_size in bpf_test_finish */ + ASSERT_EQ(tattr.retval, XDP_TX, "case-64 retval"); + ASSERT_EQ(tattr.data_size_out, 192, "case-64 data_size_out"); /* Expected grow size */ /* Extra checks for data contents */ - CHECK_ATTR(tattr.data_size_out != 192 - || buf[0] != 1 || buf[63] != 1 /* 0-63 memset to 1 */ - || buf[64] != 0 || buf[127] != 0 /* 64-127 memset to 0 */ - || buf[128] != 1 || buf[191] != 1, /*128-191 memset to 1 */ - "case-64-data", - "err %d errno %d retval %d size %d\n", - err, errno, tattr.retval, tattr.data_size_out); + ASSERT_EQ(buf[0], 1, "case-64-data buf[0]"); /* 0-63 memset to 1 */ + ASSERT_EQ(buf[63], 1, "case-64-data buf[63]"); + ASSERT_EQ(buf[64], 0, "case-64-data buf[64]"); /* 64-127 memset to 0 */ + ASSERT_EQ(buf[127], 0, "case-64-data buf[127]"); + ASSERT_EQ(buf[128], 1, "case-64-data buf[128]"); /* 128-191 memset to 1 */ + ASSERT_EQ(buf[191], 1, "case-64-data buf[191]"); /* Test case-128 */ memset(buf, 2, sizeof(buf)); @@ -109,24 +103,139 @@ static void test_xdp_adjust_tail_grow2(void) err = bpf_prog_test_run_xattr(&tattr); max_grow = 4096 - XDP_PACKET_HEADROOM - tailroom; /* 3520 */ - CHECK_ATTR(err - || tattr.retval != XDP_TX - || tattr.data_size_out != max_grow,/* Expect max grow size */ - "case-128", - "err %d errno %d retval %d size %d expect-size %d\n", - err, errno, tattr.retval, tattr.data_size_out, max_grow); + ASSERT_OK(err, "case-128"); + ASSERT_EQ(tattr.retval, XDP_TX, "case-128 retval"); + ASSERT_EQ(tattr.data_size_out, max_grow, "case-128 data_size_out"); /* Expect max grow */ /* Extra checks for data content: Count grow size, will contain zeros */ for (i = 0, cnt = 0; i < sizeof(buf); i++) { if (buf[i] == 0) cnt++; } - CHECK_ATTR((cnt != (max_grow - tattr.data_size_in)) /* Grow increase */ - || tattr.data_size_out != max_grow, /* Total grow size */ - "case-128-data", - "err %d errno %d retval %d size %d grow-size %d\n", - err, errno, tattr.retval, tattr.data_size_out, cnt); + ASSERT_EQ(cnt, max_grow - tattr.data_size_in, "case-128-data cnt"); /* Grow increase */ + ASSERT_EQ(tattr.data_size_out, max_grow, "case-128-data data_size_out"); /* Total grow */ + + bpf_object__close(obj); +} + +void test_xdp_adjust_frags_tail_shrink(void) +{ + const char *file = "./test_xdp_adjust_tail_shrink.o"; + __u32 duration, retval, size, exp_size; + struct bpf_program *prog; + struct bpf_object *obj; + int err, prog_fd; + __u8 *buf; + + /* For the individual test cases, the first byte in the packet + * indicates which test will be run. + */ + obj = bpf_object__open(file); + if (libbpf_get_error(obj)) + return; + + prog = bpf_object__next_program(obj, NULL); + if (bpf_object__load(obj)) + return; + + prog_fd = bpf_program__fd(prog); + + buf = malloc(9000); + if (!ASSERT_OK_PTR(buf, "alloc buf 9Kb")) + goto out; + + memset(buf, 0, 9000); + + /* Test case removing 10 bytes from last frag, NOT freeing it */ + exp_size = 8990; /* 9000 - 10 */ + err = bpf_prog_test_run(prog_fd, 1, buf, 9000, + buf, &size, &retval, &duration); + + ASSERT_OK(err, "9Kb-10b"); + ASSERT_EQ(retval, XDP_TX, "9Kb-10b retval"); + ASSERT_EQ(size, exp_size, "9Kb-10b size"); + + /* Test case removing one of two pages, assuming 4K pages */ + buf[0] = 1; + exp_size = 4900; /* 9000 - 4100 */ + err = bpf_prog_test_run(prog_fd, 1, buf, 9000, + buf, &size, &retval, &duration); + + ASSERT_OK(err, "9Kb-4Kb"); + ASSERT_EQ(retval, XDP_TX, "9Kb-4Kb retval"); + ASSERT_EQ(size, exp_size, "9Kb-4Kb size"); + + /* Test case removing two pages resulting in a linear xdp_buff */ + buf[0] = 2; + exp_size = 800; /* 9000 - 8200 */ + err = bpf_prog_test_run(prog_fd, 1, buf, 9000, + buf, &size, &retval, &duration); + + ASSERT_OK(err, "9Kb-9Kb"); + ASSERT_EQ(retval, XDP_TX, "9Kb-9Kb retval"); + ASSERT_EQ(size, exp_size, "9Kb-9Kb size"); + + free(buf); +out: + bpf_object__close(obj); +} + +void test_xdp_adjust_frags_tail_grow(void) +{ + const char *file = "./test_xdp_adjust_tail_grow.o"; + __u32 duration, retval, size, exp_size; + struct bpf_program *prog; + struct bpf_object *obj; + int err, i, prog_fd; + __u8 *buf; + + obj = bpf_object__open(file); + if (libbpf_get_error(obj)) + return; + + prog = bpf_object__next_program(obj, NULL); + if (bpf_object__load(obj)) + return; + + prog_fd = bpf_program__fd(prog); + + buf = malloc(16384); + if (!ASSERT_OK_PTR(buf, "alloc buf 16Kb")) + goto out; + + /* Test case add 10 bytes to last frag */ + memset(buf, 1, 16384); + size = 9000; + exp_size = size + 10; + err = bpf_prog_test_run(prog_fd, 1, buf, size, + buf, &size, &retval, &duration); + + ASSERT_OK(err, "9Kb+10b"); + ASSERT_EQ(retval, XDP_TX, "9Kb+10b retval"); + ASSERT_EQ(size, exp_size, "9Kb+10b size"); + + for (i = 0; i < 9000; i++) + ASSERT_EQ(buf[i], 1, "9Kb+10b-old"); + + for (i = 9000; i < 9010; i++) + ASSERT_EQ(buf[i], 0, "9Kb+10b-new"); + + for (i = 9010; i < 16384; i++) + ASSERT_EQ(buf[i], 1, "9Kb+10b-untouched"); + + /* Test a too large grow */ + memset(buf, 1, 16384); + size = 9001; + exp_size = size; + err = bpf_prog_test_run(prog_fd, 1, buf, size, + buf, &size, &retval, &duration); + + ASSERT_OK(err, "9Kb+10b"); + ASSERT_EQ(retval, XDP_DROP, "9Kb+10b retval"); + ASSERT_EQ(size, exp_size, "9Kb+10b size"); + free(buf); +out: bpf_object__close(obj); } @@ -138,4 +247,8 @@ void test_xdp_adjust_tail(void) test_xdp_adjust_tail_grow(); if (test__start_subtest("xdp_adjust_tail_grow2")) test_xdp_adjust_tail_grow2(); + if (test__start_subtest("xdp_adjust_frags_tail_shrink")) + test_xdp_adjust_frags_tail_shrink(); + if (test__start_subtest("xdp_adjust_frags_tail_grow")) + test_xdp_adjust_frags_tail_grow(); } diff --git a/tools/testing/selftests/bpf/prog_tests/xdp_bpf2bpf.c b/tools/testing/selftests/bpf/prog_tests/xdp_bpf2bpf.c index c98a897ad692..9c395ea680c6 100644 --- a/tools/testing/selftests/bpf/prog_tests/xdp_bpf2bpf.c +++ b/tools/testing/selftests/bpf/prog_tests/xdp_bpf2bpf.c @@ -10,40 +10,97 @@ struct meta { int pkt_len; }; +struct test_ctx_s { + bool passed; + int pkt_size; +}; + +struct test_ctx_s test_ctx; + static void on_sample(void *ctx, int cpu, void *data, __u32 size) { - int duration = 0; struct meta *meta = (struct meta *)data; struct ipv4_packet *trace_pkt_v4 = data + sizeof(*meta); + unsigned char *raw_pkt = data + sizeof(*meta); + struct test_ctx_s *tst_ctx = ctx; + + ASSERT_GE(size, sizeof(pkt_v4) + sizeof(*meta), "check_size"); + ASSERT_EQ(meta->ifindex, if_nametoindex("lo"), "check_meta_ifindex"); + ASSERT_EQ(meta->pkt_len, tst_ctx->pkt_size, "check_meta_pkt_len"); + ASSERT_EQ(memcmp(trace_pkt_v4, &pkt_v4, sizeof(pkt_v4)), 0, + "check_packet_content"); + + if (meta->pkt_len > sizeof(pkt_v4)) { + for (int i = 0; i < meta->pkt_len - sizeof(pkt_v4); i++) + ASSERT_EQ(raw_pkt[i + sizeof(pkt_v4)], (unsigned char)i, + "check_packet_content"); + } + + tst_ctx->passed = true; +} - if (CHECK(size < sizeof(pkt_v4) + sizeof(*meta), - "check_size", "size %u < %zu\n", - size, sizeof(pkt_v4) + sizeof(*meta))) - return; +#define BUF_SZ 9000 - if (CHECK(meta->ifindex != if_nametoindex("lo"), "check_meta_ifindex", - "meta->ifindex = %d\n", meta->ifindex)) +static void run_xdp_bpf2bpf_pkt_size(int pkt_fd, struct perf_buffer *pb, + struct test_xdp_bpf2bpf *ftrace_skel, + int pkt_size) +{ + __u32 duration = 0, retval, size; + __u8 *buf, *buf_in; + int err; + + if (!ASSERT_LE(pkt_size, BUF_SZ, "pkt_size") || + !ASSERT_GE(pkt_size, sizeof(pkt_v4), "pkt_size")) return; - if (CHECK(meta->pkt_len != sizeof(pkt_v4), "check_meta_pkt_len", - "meta->pkt_len = %zd\n", sizeof(pkt_v4))) + buf_in = malloc(BUF_SZ); + if (!ASSERT_OK_PTR(buf_in, "buf_in malloc()")) return; - if (CHECK(memcmp(trace_pkt_v4, &pkt_v4, sizeof(pkt_v4)), - "check_packet_content", "content not the same\n")) + buf = malloc(BUF_SZ); + if (!ASSERT_OK_PTR(buf, "buf malloc()")) { + free(buf_in); return; + } + + test_ctx.passed = false; + test_ctx.pkt_size = pkt_size; + + memcpy(buf_in, &pkt_v4, sizeof(pkt_v4)); + if (pkt_size > sizeof(pkt_v4)) { + for (int i = 0; i < (pkt_size - sizeof(pkt_v4)); i++) + buf_in[i + sizeof(pkt_v4)] = i; + } + + /* Run test program */ + err = bpf_prog_test_run(pkt_fd, 1, buf_in, pkt_size, + buf, &size, &retval, &duration); + + ASSERT_OK(err, "ipv4"); + ASSERT_EQ(retval, XDP_PASS, "ipv4 retval"); + ASSERT_EQ(size, pkt_size, "ipv4 size"); + + /* Make sure bpf_xdp_output() was triggered and it sent the expected + * data to the perf ring buffer. + */ + err = perf_buffer__poll(pb, 100); - *(bool *)ctx = true; + ASSERT_GE(err, 0, "perf_buffer__poll"); + ASSERT_TRUE(test_ctx.passed, "test passed"); + /* Verify test results */ + ASSERT_EQ(ftrace_skel->bss->test_result_fentry, if_nametoindex("lo"), + "fentry result"); + ASSERT_EQ(ftrace_skel->bss->test_result_fexit, XDP_PASS, "fexit result"); + + free(buf); + free(buf_in); } void test_xdp_bpf2bpf(void) { - __u32 duration = 0, retval, size; - char buf[128]; int err, pkt_fd, map_fd; - bool passed = false; - struct iphdr iph; - struct iptnl_info value4 = {.family = AF_INET}; + int pkt_sizes[] = {sizeof(pkt_v4), 1024, 4100, 8200}; + struct iptnl_info value4 = {.family = AF_INET6}; struct test_xdp *pkt_skel = NULL; struct test_xdp_bpf2bpf *ftrace_skel = NULL; struct vip key4 = {.protocol = 6, .family = AF_INET}; @@ -52,7 +109,7 @@ void test_xdp_bpf2bpf(void) /* Load XDP program to introspect */ pkt_skel = test_xdp__open_and_load(); - if (CHECK(!pkt_skel, "pkt_skel_load", "test_xdp skeleton failed\n")) + if (!ASSERT_OK_PTR(pkt_skel, "test_xdp__open_and_load")) return; pkt_fd = bpf_program__fd(pkt_skel->progs._xdp_tx_iptunnel); @@ -62,7 +119,7 @@ void test_xdp_bpf2bpf(void) /* Load trace program */ ftrace_skel = test_xdp_bpf2bpf__open(); - if (CHECK(!ftrace_skel, "__open", "ftrace skeleton failed\n")) + if (!ASSERT_OK_PTR(ftrace_skel, "test_xdp_bpf2bpf__open")) goto out; /* Demonstrate the bpf_program__set_attach_target() API rather than @@ -77,50 +134,24 @@ void test_xdp_bpf2bpf(void) bpf_program__set_attach_target(prog, pkt_fd, "_xdp_tx_iptunnel"); err = test_xdp_bpf2bpf__load(ftrace_skel); - if (CHECK(err, "__load", "ftrace skeleton failed\n")) + if (!ASSERT_OK(err, "test_xdp_bpf2bpf__load")) goto out; err = test_xdp_bpf2bpf__attach(ftrace_skel); - if (CHECK(err, "ftrace_attach", "ftrace attach failed: %d\n", err)) + if (!ASSERT_OK(err, "test_xdp_bpf2bpf__attach")) goto out; /* Set up perf buffer */ - pb = perf_buffer__new(bpf_map__fd(ftrace_skel->maps.perf_buf_map), 1, - on_sample, NULL, &passed, NULL); + pb = perf_buffer__new(bpf_map__fd(ftrace_skel->maps.perf_buf_map), 8, + on_sample, NULL, &test_ctx, NULL); if (!ASSERT_OK_PTR(pb, "perf_buf__new")) goto out; - /* Run test program */ - err = bpf_prog_test_run(pkt_fd, 1, &pkt_v4, sizeof(pkt_v4), - buf, &size, &retval, &duration); - memcpy(&iph, buf + sizeof(struct ethhdr), sizeof(iph)); - if (CHECK(err || retval != XDP_TX || size != 74 || - iph.protocol != IPPROTO_IPIP, "ipv4", - "err %d errno %d retval %d size %d\n", - err, errno, retval, size)) - goto out; - - /* Make sure bpf_xdp_output() was triggered and it sent the expected - * data to the perf ring buffer. - */ - err = perf_buffer__poll(pb, 100); - if (CHECK(err < 0, "perf_buffer__poll", "err %d\n", err)) - goto out; - - CHECK_FAIL(!passed); - - /* Verify test results */ - if (CHECK(ftrace_skel->bss->test_result_fentry != if_nametoindex("lo"), - "result", "fentry failed err %llu\n", - ftrace_skel->bss->test_result_fentry)) - goto out; - - CHECK(ftrace_skel->bss->test_result_fexit != XDP_TX, "result", - "fexit failed err %llu\n", ftrace_skel->bss->test_result_fexit); - + for (int i = 0; i < ARRAY_SIZE(pkt_sizes); i++) + run_xdp_bpf2bpf_pkt_size(pkt_fd, pb, ftrace_skel, + pkt_sizes[i]); out: - if (pb) - perf_buffer__free(pb); + perf_buffer__free(pb); test_xdp__destroy(pkt_skel); test_xdp_bpf2bpf__destroy(ftrace_skel); } diff --git a/tools/testing/selftests/bpf/prog_tests/xdp_cpumap_attach.c b/tools/testing/selftests/bpf/prog_tests/xdp_cpumap_attach.c index fd812bd43600..13aabb3b6cf2 100644 --- a/tools/testing/selftests/bpf/prog_tests/xdp_cpumap_attach.c +++ b/tools/testing/selftests/bpf/prog_tests/xdp_cpumap_attach.c @@ -3,11 +3,12 @@ #include <linux/if_link.h> #include <test_progs.h> +#include "test_xdp_with_cpumap_frags_helpers.skel.h" #include "test_xdp_with_cpumap_helpers.skel.h" #define IFINDEX_LO 1 -void serial_test_xdp_cpumap_attach(void) +void test_xdp_with_cpumap_helpers(void) { struct test_xdp_with_cpumap_helpers *skel; struct bpf_prog_info info = {}; @@ -54,6 +55,67 @@ void serial_test_xdp_cpumap_attach(void) err = bpf_map_update_elem(map_fd, &idx, &val, 0); ASSERT_NEQ(err, 0, "Add non-BPF_XDP_CPUMAP program to cpumap entry"); + /* Try to attach BPF_XDP program with frags to cpumap when we have + * already loaded a BPF_XDP program on the map + */ + idx = 1; + val.qsize = 192; + val.bpf_prog.fd = bpf_program__fd(skel->progs.xdp_dummy_cm_frags); + err = bpf_map_update_elem(map_fd, &idx, &val, 0); + ASSERT_NEQ(err, 0, "Add BPF_XDP program with frags to cpumap entry"); + out_close: test_xdp_with_cpumap_helpers__destroy(skel); } + +void test_xdp_with_cpumap_frags_helpers(void) +{ + struct test_xdp_with_cpumap_frags_helpers *skel; + struct bpf_prog_info info = {}; + __u32 len = sizeof(info); + struct bpf_cpumap_val val = { + .qsize = 192, + }; + int err, frags_prog_fd, map_fd; + __u32 idx = 0; + + skel = test_xdp_with_cpumap_frags_helpers__open_and_load(); + if (!ASSERT_OK_PTR(skel, "test_xdp_with_cpumap_helpers__open_and_load")) + return; + + frags_prog_fd = bpf_program__fd(skel->progs.xdp_dummy_cm_frags); + map_fd = bpf_map__fd(skel->maps.cpu_map); + err = bpf_obj_get_info_by_fd(frags_prog_fd, &info, &len); + if (!ASSERT_OK(err, "bpf_obj_get_info_by_fd")) + goto out_close; + + val.bpf_prog.fd = frags_prog_fd; + err = bpf_map_update_elem(map_fd, &idx, &val, 0); + ASSERT_OK(err, "Add program to cpumap entry"); + + err = bpf_map_lookup_elem(map_fd, &idx, &val); + ASSERT_OK(err, "Read cpumap entry"); + ASSERT_EQ(info.id, val.bpf_prog.id, + "Match program id to cpumap entry prog_id"); + + /* Try to attach BPF_XDP program to cpumap when we have + * already loaded a BPF_XDP program with frags on the map + */ + idx = 1; + val.qsize = 192; + val.bpf_prog.fd = bpf_program__fd(skel->progs.xdp_dummy_cm); + err = bpf_map_update_elem(map_fd, &idx, &val, 0); + ASSERT_NEQ(err, 0, "Add BPF_XDP program to cpumap entry"); + +out_close: + test_xdp_with_cpumap_frags_helpers__destroy(skel); +} + +void serial_test_xdp_cpumap_attach(void) +{ + if (test__start_subtest("CPUMAP with programs in entries")) + test_xdp_with_cpumap_helpers(); + + if (test__start_subtest("CPUMAP with frags programs in entries")) + test_xdp_with_cpumap_frags_helpers(); +} diff --git a/tools/testing/selftests/bpf/prog_tests/xdp_devmap_attach.c b/tools/testing/selftests/bpf/prog_tests/xdp_devmap_attach.c index 3079d5568f8f..2a784ccd3136 100644 --- a/tools/testing/selftests/bpf/prog_tests/xdp_devmap_attach.c +++ b/tools/testing/selftests/bpf/prog_tests/xdp_devmap_attach.c @@ -4,6 +4,7 @@ #include <test_progs.h> #include "test_xdp_devmap_helpers.skel.h" +#include "test_xdp_with_devmap_frags_helpers.skel.h" #include "test_xdp_with_devmap_helpers.skel.h" #define IFINDEX_LO 1 @@ -56,6 +57,15 @@ static void test_xdp_with_devmap_helpers(void) err = bpf_map_update_elem(map_fd, &idx, &val, 0); ASSERT_NEQ(err, 0, "Add non-BPF_XDP_DEVMAP program to devmap entry"); + /* Try to attach BPF_XDP program with frags to devmap when we have + * already loaded a BPF_XDP program on the map + */ + idx = 1; + val.ifindex = 1; + val.bpf_prog.fd = bpf_program__fd(skel->progs.xdp_dummy_dm_frags); + err = bpf_map_update_elem(map_fd, &idx, &val, 0); + ASSERT_NEQ(err, 0, "Add BPF_XDP program with frags to devmap entry"); + out_close: test_xdp_with_devmap_helpers__destroy(skel); } @@ -71,12 +81,57 @@ static void test_neg_xdp_devmap_helpers(void) } } +void test_xdp_with_devmap_frags_helpers(void) +{ + struct test_xdp_with_devmap_frags_helpers *skel; + struct bpf_prog_info info = {}; + struct bpf_devmap_val val = { + .ifindex = IFINDEX_LO, + }; + __u32 len = sizeof(info); + int err, dm_fd_frags, map_fd; + __u32 idx = 0; + + skel = test_xdp_with_devmap_frags_helpers__open_and_load(); + if (!ASSERT_OK_PTR(skel, "test_xdp_with_devmap_helpers__open_and_load")) + return; + + dm_fd_frags = bpf_program__fd(skel->progs.xdp_dummy_dm_frags); + map_fd = bpf_map__fd(skel->maps.dm_ports); + err = bpf_obj_get_info_by_fd(dm_fd_frags, &info, &len); + if (!ASSERT_OK(err, "bpf_obj_get_info_by_fd")) + goto out_close; + + val.bpf_prog.fd = dm_fd_frags; + err = bpf_map_update_elem(map_fd, &idx, &val, 0); + ASSERT_OK(err, "Add frags program to devmap entry"); + + err = bpf_map_lookup_elem(map_fd, &idx, &val); + ASSERT_OK(err, "Read devmap entry"); + ASSERT_EQ(info.id, val.bpf_prog.id, + "Match program id to devmap entry prog_id"); + + /* Try to attach BPF_XDP program to devmap when we have + * already loaded a BPF_XDP program with frags on the map + */ + idx = 1; + val.ifindex = 1; + val.bpf_prog.fd = bpf_program__fd(skel->progs.xdp_dummy_dm); + err = bpf_map_update_elem(map_fd, &idx, &val, 0); + ASSERT_NEQ(err, 0, "Add BPF_XDP program to devmap entry"); + +out_close: + test_xdp_with_devmap_frags_helpers__destroy(skel); +} void serial_test_xdp_devmap_attach(void) { if (test__start_subtest("DEVMAP with programs in entries")) test_xdp_with_devmap_helpers(); + if (test__start_subtest("DEVMAP with frags programs in entries")) + test_xdp_with_devmap_frags_helpers(); + if (test__start_subtest("Verifier check of DEVMAP programs")) test_neg_xdp_devmap_helpers(); } diff --git a/tools/testing/selftests/bpf/progs/bpf_iter_setsockopt_unix.c b/tools/testing/selftests/bpf/progs/bpf_iter_setsockopt_unix.c new file mode 100644 index 000000000000..eafc877ea460 --- /dev/null +++ b/tools/testing/selftests/bpf/progs/bpf_iter_setsockopt_unix.c @@ -0,0 +1,60 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright Amazon.com Inc. or its affiliates. */ +#include "bpf_iter.h" +#include "bpf_tracing_net.h" +#include <bpf/bpf_helpers.h> +#include <limits.h> + +#define AUTOBIND_LEN 6 +char sun_path[AUTOBIND_LEN]; + +#define NR_CASES 5 +int sndbuf_setsockopt[NR_CASES] = {-1, 0, 8192, INT_MAX / 2, INT_MAX}; +int sndbuf_getsockopt[NR_CASES] = {-1, -1, -1, -1, -1}; +int sndbuf_getsockopt_expected[NR_CASES]; + +static inline int cmpname(struct unix_sock *unix_sk) +{ + int i; + + for (i = 0; i < AUTOBIND_LEN; i++) { + if (unix_sk->addr->name->sun_path[i] != sun_path[i]) + return -1; + } + + return 0; +} + +SEC("iter/unix") +int change_sndbuf(struct bpf_iter__unix *ctx) +{ + struct unix_sock *unix_sk = ctx->unix_sk; + int i, err; + + if (!unix_sk || !unix_sk->addr) + return 0; + + if (unix_sk->addr->name->sun_path[0]) + return 0; + + if (cmpname(unix_sk)) + return 0; + + for (i = 0; i < NR_CASES; i++) { + err = bpf_setsockopt(unix_sk, SOL_SOCKET, SO_SNDBUF, + &sndbuf_setsockopt[i], + sizeof(sndbuf_setsockopt[i])); + if (err) + break; + + err = bpf_getsockopt(unix_sk, SOL_SOCKET, SO_SNDBUF, + &sndbuf_getsockopt[i], + sizeof(sndbuf_getsockopt[i])); + if (err) + break; + } + + return 0; +} + +char _license[] SEC("license") = "GPL"; diff --git a/tools/testing/selftests/bpf/progs/bpf_iter_unix.c b/tools/testing/selftests/bpf/progs/bpf_iter_unix.c index c21e3f545371..e6aefae38894 100644 --- a/tools/testing/selftests/bpf/progs/bpf_iter_unix.c +++ b/tools/testing/selftests/bpf/progs/bpf_iter_unix.c @@ -63,7 +63,7 @@ int dump_unix(struct bpf_iter__unix *ctx) BPF_SEQ_PRINTF(seq, " @"); for (i = 1; i < len; i++) { - /* unix_mkname() tests this upper bound. */ + /* unix_validate_addr() tests this upper bound. */ if (i >= sizeof(struct sockaddr_un)) break; diff --git a/tools/testing/selftests/bpf/progs/bpf_mod_race.c b/tools/testing/selftests/bpf/progs/bpf_mod_race.c new file mode 100644 index 000000000000..82a5c6c6ba83 --- /dev/null +++ b/tools/testing/selftests/bpf/progs/bpf_mod_race.c @@ -0,0 +1,100 @@ +// SPDX-License-Identifier: GPL-2.0 +#include <vmlinux.h> +#include <bpf/bpf_helpers.h> +#include <bpf/bpf_tracing.h> + +const volatile struct { + /* thread to activate trace programs for */ + pid_t tgid; + /* return error from __init function */ + int inject_error; + /* uffd monitored range start address */ + void *fault_addr; +} bpf_mod_race_config = { -1 }; + +int bpf_blocking = 0; +int res_try_get_module = -1; + +static __always_inline bool check_thread_id(void) +{ + struct task_struct *task = bpf_get_current_task_btf(); + + return task->tgid == bpf_mod_race_config.tgid; +} + +/* The trace of execution is something like this: + * + * finit_module() + * load_module() + * prepare_coming_module() + * notifier_call(MODULE_STATE_COMING) + * btf_parse_module() + * btf_alloc_id() // Visible to userspace at this point + * list_add(btf_mod->list, &btf_modules) + * do_init_module() + * freeinit = kmalloc() + * ret = mod->init() + * bpf_prog_widen_race() + * bpf_copy_from_user() + * ...<sleep>... + * if (ret < 0) + * ... + * free_module() + * return ret + * + * At this point, module loading thread is blocked, we now load the program: + * + * bpf_check + * add_kfunc_call/check_pseudo_btf_id + * btf_try_get_module + * try_get_module_live == false + * return -ENXIO + * + * Without the fix (try_get_module_live in btf_try_get_module): + * + * bpf_check + * add_kfunc_call/check_pseudo_btf_id + * btf_try_get_module + * try_get_module == true + * <store module reference in btf_kfunc_tab or used_btf array> + * ... + * return fd + * + * Now, if we inject an error in the blocked program, our module will be freed + * (going straight from MODULE_STATE_COMING to MODULE_STATE_GOING). + * Later, when bpf program is freed, it will try to module_put already freed + * module. This is why try_get_module_live returns false if mod->state is not + * MODULE_STATE_LIVE. + */ + +SEC("fmod_ret.s/bpf_fentry_test1") +int BPF_PROG(widen_race, int a, int ret) +{ + char dst; + + if (!check_thread_id()) + return 0; + /* Indicate that we will attempt to block */ + bpf_blocking = 1; + bpf_copy_from_user(&dst, 1, bpf_mod_race_config.fault_addr); + return bpf_mod_race_config.inject_error; +} + +SEC("fexit/do_init_module") +int BPF_PROG(fexit_init_module, struct module *mod, int ret) +{ + if (!check_thread_id()) + return 0; + /* Indicate that we finished blocking */ + bpf_blocking = 2; + return 0; +} + +SEC("fexit/btf_try_get_module") +int BPF_PROG(fexit_module_get, const struct btf *btf, struct module *mod) +{ + res_try_get_module = !!mod; + return 0; +} + +char _license[] SEC("license") = "GPL"; diff --git a/tools/testing/selftests/bpf/progs/bpf_tracing_net.h b/tools/testing/selftests/bpf/progs/bpf_tracing_net.h index e0f42601be9b..1c1289ba5fc5 100644 --- a/tools/testing/selftests/bpf/progs/bpf_tracing_net.h +++ b/tools/testing/selftests/bpf/progs/bpf_tracing_net.h @@ -5,6 +5,8 @@ #define AF_INET 2 #define AF_INET6 10 +#define SOL_SOCKET 1 +#define SO_SNDBUF 7 #define __SO_ACCEPTCON (1 << 16) #define SOL_TCP 6 diff --git a/tools/testing/selftests/bpf/progs/cgroup_getset_retval_getsockopt.c b/tools/testing/selftests/bpf/progs/cgroup_getset_retval_getsockopt.c new file mode 100644 index 000000000000..b2a409e6382a --- /dev/null +++ b/tools/testing/selftests/bpf/progs/cgroup_getset_retval_getsockopt.c @@ -0,0 +1,45 @@ +// SPDX-License-Identifier: GPL-2.0-only + +/* + * Copyright 2021 Google LLC. + */ + +#include <errno.h> +#include <linux/bpf.h> +#include <bpf/bpf_helpers.h> + +__u32 invocations = 0; +__u32 assertion_error = 0; +__u32 retval_value = 0; +__u32 ctx_retval_value = 0; + +SEC("cgroup/getsockopt") +int get_retval(struct bpf_sockopt *ctx) +{ + retval_value = bpf_get_retval(); + ctx_retval_value = ctx->retval; + __sync_fetch_and_add(&invocations, 1); + + return 1; +} + +SEC("cgroup/getsockopt") +int set_eisconn(struct bpf_sockopt *ctx) +{ + __sync_fetch_and_add(&invocations, 1); + + if (bpf_set_retval(-EISCONN)) + assertion_error = 1; + + return 1; +} + +SEC("cgroup/getsockopt") +int clear_retval(struct bpf_sockopt *ctx) +{ + __sync_fetch_and_add(&invocations, 1); + + ctx->retval = 0; + + return 1; +} diff --git a/tools/testing/selftests/bpf/progs/cgroup_getset_retval_setsockopt.c b/tools/testing/selftests/bpf/progs/cgroup_getset_retval_setsockopt.c new file mode 100644 index 000000000000..d6e5903e06ba --- /dev/null +++ b/tools/testing/selftests/bpf/progs/cgroup_getset_retval_setsockopt.c @@ -0,0 +1,52 @@ +// SPDX-License-Identifier: GPL-2.0-only + +/* + * Copyright 2021 Google LLC. + */ + +#include <errno.h> +#include <linux/bpf.h> +#include <bpf/bpf_helpers.h> + +__u32 invocations = 0; +__u32 assertion_error = 0; +__u32 retval_value = 0; + +SEC("cgroup/setsockopt") +int get_retval(struct bpf_sockopt *ctx) +{ + retval_value = bpf_get_retval(); + __sync_fetch_and_add(&invocations, 1); + + return 1; +} + +SEC("cgroup/setsockopt") +int set_eunatch(struct bpf_sockopt *ctx) +{ + __sync_fetch_and_add(&invocations, 1); + + if (bpf_set_retval(-EUNATCH)) + assertion_error = 1; + + return 0; +} + +SEC("cgroup/setsockopt") +int set_eisconn(struct bpf_sockopt *ctx) +{ + __sync_fetch_and_add(&invocations, 1); + + if (bpf_set_retval(-EISCONN)) + assertion_error = 1; + + return 0; +} + +SEC("cgroup/setsockopt") +int legacy_eperm(struct bpf_sockopt *ctx) +{ + __sync_fetch_and_add(&invocations, 1); + + return 0; +} diff --git a/tools/testing/selftests/bpf/progs/freplace_cls_redirect.c b/tools/testing/selftests/bpf/progs/freplace_cls_redirect.c index 68a5a9db928a..7e94412d47a5 100644 --- a/tools/testing/selftests/bpf/progs/freplace_cls_redirect.c +++ b/tools/testing/selftests/bpf/progs/freplace_cls_redirect.c @@ -7,12 +7,12 @@ #include <bpf/bpf_endian.h> #include <bpf/bpf_helpers.h> -struct bpf_map_def SEC("maps") sock_map = { - .type = BPF_MAP_TYPE_SOCKMAP, - .key_size = sizeof(int), - .value_size = sizeof(int), - .max_entries = 2, -}; +struct { + __uint(type, BPF_MAP_TYPE_SOCKMAP); + __type(key, int); + __type(value, int); + __uint(max_entries, 2); +} sock_map SEC(".maps"); SEC("freplace/cls_redirect") int freplace_cls_redirect_test(struct __sk_buff *skb) diff --git a/tools/testing/selftests/bpf/progs/kfunc_call_race.c b/tools/testing/selftests/bpf/progs/kfunc_call_race.c new file mode 100644 index 000000000000..4e8fed75a4e0 --- /dev/null +++ b/tools/testing/selftests/bpf/progs/kfunc_call_race.c @@ -0,0 +1,14 @@ +// SPDX-License-Identifier: GPL-2.0 +#include <vmlinux.h> +#include <bpf/bpf_helpers.h> + +extern void bpf_testmod_test_mod_kfunc(int i) __ksym; + +SEC("tc") +int kfunc_call_fail(struct __sk_buff *ctx) +{ + bpf_testmod_test_mod_kfunc(0); + return 0; +} + +char _license[] SEC("license") = "GPL"; diff --git a/tools/testing/selftests/bpf/progs/kfunc_call_test.c b/tools/testing/selftests/bpf/progs/kfunc_call_test.c index 8a8cf59017aa..5aecbb9fdc68 100644 --- a/tools/testing/selftests/bpf/progs/kfunc_call_test.c +++ b/tools/testing/selftests/bpf/progs/kfunc_call_test.c @@ -1,13 +1,20 @@ // SPDX-License-Identifier: GPL-2.0 /* Copyright (c) 2021 Facebook */ -#include <linux/bpf.h> +#include <vmlinux.h> #include <bpf/bpf_helpers.h> -#include "bpf_tcp_helpers.h" extern int bpf_kfunc_call_test2(struct sock *sk, __u32 a, __u32 b) __ksym; extern __u64 bpf_kfunc_call_test1(struct sock *sk, __u32 a, __u64 b, __u32 c, __u64 d) __ksym; +extern struct prog_test_ref_kfunc *bpf_kfunc_call_test_acquire(unsigned long *sp) __ksym; +extern void bpf_kfunc_call_test_release(struct prog_test_ref_kfunc *p) __ksym; +extern void bpf_kfunc_call_test_pass_ctx(struct __sk_buff *skb) __ksym; +extern void bpf_kfunc_call_test_pass1(struct prog_test_pass1 *p) __ksym; +extern void bpf_kfunc_call_test_pass2(struct prog_test_pass2 *p) __ksym; +extern void bpf_kfunc_call_test_mem_len_pass1(void *mem, int len) __ksym; +extern void bpf_kfunc_call_test_mem_len_fail2(__u64 *mem, int len) __ksym; + SEC("tc") int kfunc_call_test2(struct __sk_buff *skb) { @@ -44,4 +51,45 @@ int kfunc_call_test1(struct __sk_buff *skb) return ret; } +SEC("tc") +int kfunc_call_test_ref_btf_id(struct __sk_buff *skb) +{ + struct prog_test_ref_kfunc *pt; + unsigned long s = 0; + int ret = 0; + + pt = bpf_kfunc_call_test_acquire(&s); + if (pt) { + if (pt->a != 42 || pt->b != 108) + ret = -1; + bpf_kfunc_call_test_release(pt); + } + return ret; +} + +SEC("tc") +int kfunc_call_test_pass(struct __sk_buff *skb) +{ + struct prog_test_pass1 p1 = {}; + struct prog_test_pass2 p2 = {}; + short a = 0; + __u64 b = 0; + long c = 0; + char d = 0; + int e = 0; + + bpf_kfunc_call_test_pass_ctx(skb); + bpf_kfunc_call_test_pass1(&p1); + bpf_kfunc_call_test_pass2(&p2); + + bpf_kfunc_call_test_mem_len_pass1(&a, sizeof(a)); + bpf_kfunc_call_test_mem_len_pass1(&b, sizeof(b)); + bpf_kfunc_call_test_mem_len_pass1(&c, sizeof(c)); + bpf_kfunc_call_test_mem_len_pass1(&d, sizeof(d)); + bpf_kfunc_call_test_mem_len_pass1(&e, sizeof(e)); + bpf_kfunc_call_test_mem_len_fail2(&b, -1); + + return 0; +} + char _license[] SEC("license") = "GPL"; diff --git a/tools/testing/selftests/bpf/progs/ksym_race.c b/tools/testing/selftests/bpf/progs/ksym_race.c new file mode 100644 index 000000000000..def97f2fed90 --- /dev/null +++ b/tools/testing/selftests/bpf/progs/ksym_race.c @@ -0,0 +1,13 @@ +// SPDX-License-Identifier: GPL-2.0 +#include <vmlinux.h> +#include <bpf/bpf_helpers.h> + +extern int bpf_testmod_ksym_percpu __ksym; + +SEC("tc") +int ksym_fail(struct __sk_buff *ctx) +{ + return *(int *)bpf_this_cpu_ptr(&bpf_testmod_ksym_percpu); +} + +char _license[] SEC("license") = "GPL"; diff --git a/tools/testing/selftests/bpf/progs/sample_map_ret0.c b/tools/testing/selftests/bpf/progs/sample_map_ret0.c index 1612a32007b6..495990d355ef 100644 --- a/tools/testing/selftests/bpf/progs/sample_map_ret0.c +++ b/tools/testing/selftests/bpf/progs/sample_map_ret0.c @@ -2,19 +2,19 @@ #include <linux/bpf.h> #include <bpf/bpf_helpers.h> -struct bpf_map_def SEC("maps") htab = { - .type = BPF_MAP_TYPE_HASH, - .key_size = sizeof(__u32), - .value_size = sizeof(long), - .max_entries = 2, -}; +struct { + __uint(type, BPF_MAP_TYPE_HASH); + __type(key, __u32); + __type(value, long); + __uint(max_entries, 2); +} htab SEC(".maps"); -struct bpf_map_def SEC("maps") array = { - .type = BPF_MAP_TYPE_ARRAY, - .key_size = sizeof(__u32), - .value_size = sizeof(long), - .max_entries = 2, -}; +struct { + __uint(type, BPF_MAP_TYPE_ARRAY); + __type(key, __u32); + __type(value, long); + __uint(max_entries, 2); +} array SEC(".maps"); /* Sample program which should always load for testing control paths. */ SEC(".text") int func() diff --git a/tools/testing/selftests/bpf/progs/sockmap_parse_prog.c b/tools/testing/selftests/bpf/progs/sockmap_parse_prog.c index 95d5b941bc1f..c9abfe3a11af 100644 --- a/tools/testing/selftests/bpf/progs/sockmap_parse_prog.c +++ b/tools/testing/selftests/bpf/progs/sockmap_parse_prog.c @@ -7,8 +7,6 @@ int bpf_prog1(struct __sk_buff *skb) { void *data_end = (void *)(long) skb->data_end; void *data = (void *)(long) skb->data; - __u32 lport = skb->local_port; - __u32 rport = skb->remote_port; __u8 *d = data; int err; diff --git a/tools/testing/selftests/bpf/progs/sockopt_sk.c b/tools/testing/selftests/bpf/progs/sockopt_sk.c index 79c8139b63b8..d0298dccedcd 100644 --- a/tools/testing/selftests/bpf/progs/sockopt_sk.c +++ b/tools/testing/selftests/bpf/progs/sockopt_sk.c @@ -73,17 +73,17 @@ int _getsockopt(struct bpf_sockopt *ctx) */ if (optval + sizeof(struct tcp_zerocopy_receive) > optval_end) - return 0; /* EPERM, bounds check */ + return 0; /* bounds check */ if (((struct tcp_zerocopy_receive *)optval)->address != 0) - return 0; /* EPERM, unexpected data */ + return 0; /* unexpected data */ return 1; } if (ctx->level == SOL_IP && ctx->optname == IP_FREEBIND) { if (optval + 1 > optval_end) - return 0; /* EPERM, bounds check */ + return 0; /* bounds check */ ctx->retval = 0; /* Reset system call return value to zero */ @@ -96,24 +96,24 @@ int _getsockopt(struct bpf_sockopt *ctx) * bytes of data. */ if (optval_end - optval != page_size) - return 0; /* EPERM, unexpected data size */ + return 0; /* unexpected data size */ return 1; } if (ctx->level != SOL_CUSTOM) - return 0; /* EPERM, deny everything except custom level */ + return 0; /* deny everything except custom level */ if (optval + 1 > optval_end) - return 0; /* EPERM, bounds check */ + return 0; /* bounds check */ storage = bpf_sk_storage_get(&socket_storage_map, ctx->sk, 0, BPF_SK_STORAGE_GET_F_CREATE); if (!storage) - return 0; /* EPERM, couldn't get sk storage */ + return 0; /* couldn't get sk storage */ if (!ctx->retval) - return 0; /* EPERM, kernel should not have handled + return 0; /* kernel should not have handled * SOL_CUSTOM, something is wrong! */ ctx->retval = 0; /* Reset system call return value to zero */ @@ -152,7 +152,7 @@ int _setsockopt(struct bpf_sockopt *ctx) /* Overwrite SO_SNDBUF value */ if (optval + sizeof(__u32) > optval_end) - return 0; /* EPERM, bounds check */ + return 0; /* bounds check */ *(__u32 *)optval = 0x55AA; ctx->optlen = 4; @@ -164,7 +164,7 @@ int _setsockopt(struct bpf_sockopt *ctx) /* Always use cubic */ if (optval + 5 > optval_end) - return 0; /* EPERM, bounds check */ + return 0; /* bounds check */ memcpy(optval, "cubic", 5); ctx->optlen = 5; @@ -175,10 +175,10 @@ int _setsockopt(struct bpf_sockopt *ctx) if (ctx->level == SOL_IP && ctx->optname == IP_FREEBIND) { /* Original optlen is larger than PAGE_SIZE. */ if (ctx->optlen != page_size * 2) - return 0; /* EPERM, unexpected data size */ + return 0; /* unexpected data size */ if (optval + 1 > optval_end) - return 0; /* EPERM, bounds check */ + return 0; /* bounds check */ /* Make sure we can trim the buffer. */ optval[0] = 0; @@ -189,21 +189,21 @@ int _setsockopt(struct bpf_sockopt *ctx) * bytes of data. */ if (optval_end - optval != page_size) - return 0; /* EPERM, unexpected data size */ + return 0; /* unexpected data size */ return 1; } if (ctx->level != SOL_CUSTOM) - return 0; /* EPERM, deny everything except custom level */ + return 0; /* deny everything except custom level */ if (optval + 1 > optval_end) - return 0; /* EPERM, bounds check */ + return 0; /* bounds check */ storage = bpf_sk_storage_get(&socket_storage_map, ctx->sk, 0, BPF_SK_STORAGE_GET_F_CREATE); if (!storage) - return 0; /* EPERM, couldn't get sk storage */ + return 0; /* couldn't get sk storage */ storage->val = optval[0]; ctx->optlen = -1; /* BPF has consumed this option, don't call kernel diff --git a/tools/testing/selftests/bpf/progs/test_bpf_nf.c b/tools/testing/selftests/bpf/progs/test_bpf_nf.c new file mode 100644 index 000000000000..f00a9731930e --- /dev/null +++ b/tools/testing/selftests/bpf/progs/test_bpf_nf.c @@ -0,0 +1,118 @@ +// SPDX-License-Identifier: GPL-2.0 +#include <vmlinux.h> +#include <bpf/bpf_helpers.h> + +#define EAFNOSUPPORT 97 +#define EPROTO 71 +#define ENONET 64 +#define EINVAL 22 +#define ENOENT 2 + +int test_einval_bpf_tuple = 0; +int test_einval_reserved = 0; +int test_einval_netns_id = 0; +int test_einval_len_opts = 0; +int test_eproto_l4proto = 0; +int test_enonet_netns_id = 0; +int test_enoent_lookup = 0; +int test_eafnosupport = 0; + +struct nf_conn; + +struct bpf_ct_opts___local { + s32 netns_id; + s32 error; + u8 l4proto; + u8 reserved[3]; +} __attribute__((preserve_access_index)); + +struct nf_conn *bpf_xdp_ct_lookup(struct xdp_md *, struct bpf_sock_tuple *, u32, + struct bpf_ct_opts___local *, u32) __ksym; +struct nf_conn *bpf_skb_ct_lookup(struct __sk_buff *, struct bpf_sock_tuple *, u32, + struct bpf_ct_opts___local *, u32) __ksym; +void bpf_ct_release(struct nf_conn *) __ksym; + +static __always_inline void +nf_ct_test(struct nf_conn *(*func)(void *, struct bpf_sock_tuple *, u32, + struct bpf_ct_opts___local *, u32), + void *ctx) +{ + struct bpf_ct_opts___local opts_def = { .l4proto = IPPROTO_TCP, .netns_id = -1 }; + struct bpf_sock_tuple bpf_tuple; + struct nf_conn *ct; + + __builtin_memset(&bpf_tuple, 0, sizeof(bpf_tuple.ipv4)); + + ct = func(ctx, NULL, 0, &opts_def, sizeof(opts_def)); + if (ct) + bpf_ct_release(ct); + else + test_einval_bpf_tuple = opts_def.error; + + opts_def.reserved[0] = 1; + ct = func(ctx, &bpf_tuple, sizeof(bpf_tuple.ipv4), &opts_def, sizeof(opts_def)); + opts_def.reserved[0] = 0; + opts_def.l4proto = IPPROTO_TCP; + if (ct) + bpf_ct_release(ct); + else + test_einval_reserved = opts_def.error; + + opts_def.netns_id = -2; + ct = func(ctx, &bpf_tuple, sizeof(bpf_tuple.ipv4), &opts_def, sizeof(opts_def)); + opts_def.netns_id = -1; + if (ct) + bpf_ct_release(ct); + else + test_einval_netns_id = opts_def.error; + + ct = func(ctx, &bpf_tuple, sizeof(bpf_tuple.ipv4), &opts_def, sizeof(opts_def) - 1); + if (ct) + bpf_ct_release(ct); + else + test_einval_len_opts = opts_def.error; + + opts_def.l4proto = IPPROTO_ICMP; + ct = func(ctx, &bpf_tuple, sizeof(bpf_tuple.ipv4), &opts_def, sizeof(opts_def)); + opts_def.l4proto = IPPROTO_TCP; + if (ct) + bpf_ct_release(ct); + else + test_eproto_l4proto = opts_def.error; + + opts_def.netns_id = 0xf00f; + ct = func(ctx, &bpf_tuple, sizeof(bpf_tuple.ipv4), &opts_def, sizeof(opts_def)); + opts_def.netns_id = -1; + if (ct) + bpf_ct_release(ct); + else + test_enonet_netns_id = opts_def.error; + + ct = func(ctx, &bpf_tuple, sizeof(bpf_tuple.ipv4), &opts_def, sizeof(opts_def)); + if (ct) + bpf_ct_release(ct); + else + test_enoent_lookup = opts_def.error; + + ct = func(ctx, &bpf_tuple, sizeof(bpf_tuple.ipv4) - 1, &opts_def, sizeof(opts_def)); + if (ct) + bpf_ct_release(ct); + else + test_eafnosupport = opts_def.error; +} + +SEC("xdp") +int nf_xdp_ct_test(struct xdp_md *ctx) +{ + nf_ct_test((void *)bpf_xdp_ct_lookup, ctx); + return 0; +} + +SEC("tc") +int nf_skb_ct_test(struct __sk_buff *ctx) +{ + nf_ct_test((void *)bpf_skb_ct_lookup, ctx); + return 0; +} + +char _license[] SEC("license") = "GPL"; diff --git a/tools/testing/selftests/bpf/progs/test_btf_haskv.c b/tools/testing/selftests/bpf/progs/test_btf_haskv.c index 160ead6c67b2..07c94df13660 100644 --- a/tools/testing/selftests/bpf/progs/test_btf_haskv.c +++ b/tools/testing/selftests/bpf/progs/test_btf_haskv.c @@ -9,12 +9,15 @@ struct ipv_counts { unsigned int v6; }; +#pragma GCC diagnostic push +#pragma GCC diagnostic ignored "-Wdeprecated-declarations" struct bpf_map_def SEC("maps") btf_map = { .type = BPF_MAP_TYPE_ARRAY, .key_size = sizeof(int), .value_size = sizeof(struct ipv_counts), .max_entries = 4, }; +#pragma GCC diagnostic pop BPF_ANNOTATE_KV_PAIR(btf_map, int, struct ipv_counts); diff --git a/tools/testing/selftests/bpf/progs/test_btf_newkv.c b/tools/testing/selftests/bpf/progs/test_btf_newkv.c index 1884a5bd10f5..762671a2e90c 100644 --- a/tools/testing/selftests/bpf/progs/test_btf_newkv.c +++ b/tools/testing/selftests/bpf/progs/test_btf_newkv.c @@ -9,6 +9,8 @@ struct ipv_counts { unsigned int v6; }; +#pragma GCC diagnostic push +#pragma GCC diagnostic ignored "-Wdeprecated-declarations" /* just to validate we can handle maps in multiple sections */ struct bpf_map_def SEC("maps") btf_map_legacy = { .type = BPF_MAP_TYPE_ARRAY, @@ -16,6 +18,7 @@ struct bpf_map_def SEC("maps") btf_map_legacy = { .value_size = sizeof(long long), .max_entries = 4, }; +#pragma GCC diagnostic pop BPF_ANNOTATE_KV_PAIR(btf_map_legacy, int, struct ipv_counts); diff --git a/tools/testing/selftests/bpf/progs/test_btf_nokv.c b/tools/testing/selftests/bpf/progs/test_btf_nokv.c index 15e0f9945fe4..1dabb88f8cb4 100644 --- a/tools/testing/selftests/bpf/progs/test_btf_nokv.c +++ b/tools/testing/selftests/bpf/progs/test_btf_nokv.c @@ -8,12 +8,12 @@ struct ipv_counts { unsigned int v6; }; -struct bpf_map_def SEC("maps") btf_map = { - .type = BPF_MAP_TYPE_ARRAY, - .key_size = sizeof(int), - .value_size = sizeof(struct ipv_counts), - .max_entries = 4, -}; +struct { + __uint(type, BPF_MAP_TYPE_ARRAY); + __uint(key_size, sizeof(int)); + __uint(value_size, sizeof(struct ipv_counts)); + __uint(max_entries, 4); +} btf_map SEC(".maps"); __attribute__((noinline)) int test_long_fname_2(void) diff --git a/tools/testing/selftests/bpf/progs/test_skb_cgroup_id_kern.c b/tools/testing/selftests/bpf/progs/test_skb_cgroup_id_kern.c index c304cd5b8cad..37aacc66cd68 100644 --- a/tools/testing/selftests/bpf/progs/test_skb_cgroup_id_kern.c +++ b/tools/testing/selftests/bpf/progs/test_skb_cgroup_id_kern.c @@ -10,12 +10,12 @@ #define NUM_CGROUP_LEVELS 4 -struct bpf_map_def SEC("maps") cgroup_ids = { - .type = BPF_MAP_TYPE_ARRAY, - .key_size = sizeof(__u32), - .value_size = sizeof(__u64), - .max_entries = NUM_CGROUP_LEVELS, -}; +struct { + __uint(type, BPF_MAP_TYPE_ARRAY); + __type(key, __u32); + __type(value, __u64); + __uint(max_entries, NUM_CGROUP_LEVELS); +} cgroup_ids SEC(".maps"); static __always_inline void log_nth_level(struct __sk_buff *skb, __u32 level) { diff --git a/tools/testing/selftests/bpf/progs/test_sockmap_progs_query.c b/tools/testing/selftests/bpf/progs/test_sockmap_progs_query.c new file mode 100644 index 000000000000..9d58d61c0dee --- /dev/null +++ b/tools/testing/selftests/bpf/progs/test_sockmap_progs_query.c @@ -0,0 +1,24 @@ +// SPDX-License-Identifier: GPL-2.0 +#include "vmlinux.h" +#include <bpf/bpf_helpers.h> + +struct { + __uint(type, BPF_MAP_TYPE_SOCKMAP); + __uint(max_entries, 1); + __type(key, __u32); + __type(value, __u64); +} sock_map SEC(".maps"); + +SEC("sk_skb") +int prog_skb_verdict(struct __sk_buff *skb) +{ + return SK_PASS; +} + +SEC("sk_msg") +int prog_skmsg_verdict(struct sk_msg_md *msg) +{ + return SK_PASS; +} + +char _license[] SEC("license") = "GPL"; diff --git a/tools/testing/selftests/bpf/progs/test_tc_edt.c b/tools/testing/selftests/bpf/progs/test_tc_edt.c index bf28814bfde5..950a70b61e74 100644 --- a/tools/testing/selftests/bpf/progs/test_tc_edt.c +++ b/tools/testing/selftests/bpf/progs/test_tc_edt.c @@ -17,12 +17,12 @@ #define THROTTLE_RATE_BPS (5 * 1000 * 1000) /* flow_key => last_tstamp timestamp used */ -struct bpf_map_def SEC("maps") flow_map = { - .type = BPF_MAP_TYPE_HASH, - .key_size = sizeof(uint32_t), - .value_size = sizeof(uint64_t), - .max_entries = 1, -}; +struct { + __uint(type, BPF_MAP_TYPE_HASH); + __type(key, uint32_t); + __type(value, uint64_t); + __uint(max_entries, 1); +} flow_map SEC(".maps"); static inline int throttle_flow(struct __sk_buff *skb) { diff --git a/tools/testing/selftests/bpf/progs/test_tcp_check_syncookie_kern.c b/tools/testing/selftests/bpf/progs/test_tcp_check_syncookie_kern.c index cd747cd93dbe..6edebce563b5 100644 --- a/tools/testing/selftests/bpf/progs/test_tcp_check_syncookie_kern.c +++ b/tools/testing/selftests/bpf/progs/test_tcp_check_syncookie_kern.c @@ -16,12 +16,12 @@ #include <bpf/bpf_helpers.h> #include <bpf/bpf_endian.h> -struct bpf_map_def SEC("maps") results = { - .type = BPF_MAP_TYPE_ARRAY, - .key_size = sizeof(__u32), - .value_size = sizeof(__u32), - .max_entries = 3, -}; +struct { + __uint(type, BPF_MAP_TYPE_ARRAY); + __type(key, __u32); + __type(value, __u32); + __uint(max_entries, 3); +} results SEC(".maps"); static __always_inline __s64 gen_syncookie(void *data_end, struct bpf_sock *sk, void *iph, __u32 ip_size, diff --git a/tools/testing/selftests/bpf/progs/test_xdp_adjust_tail_grow.c b/tools/testing/selftests/bpf/progs/test_xdp_adjust_tail_grow.c index 199c61b7d062..53b64c999450 100644 --- a/tools/testing/selftests/bpf/progs/test_xdp_adjust_tail_grow.c +++ b/tools/testing/selftests/bpf/progs/test_xdp_adjust_tail_grow.c @@ -7,11 +7,10 @@ int _xdp_adjust_tail_grow(struct xdp_md *xdp) { void *data_end = (void *)(long)xdp->data_end; void *data = (void *)(long)xdp->data; - unsigned int data_len; + int data_len = bpf_xdp_get_buff_len(xdp); int offset = 0; /* Data length determine test case */ - data_len = data_end - data; if (data_len == 54) { /* sizeof(pkt_v4) */ offset = 4096; /* test too large offset */ @@ -20,7 +19,12 @@ int _xdp_adjust_tail_grow(struct xdp_md *xdp) } else if (data_len == 64) { offset = 128; } else if (data_len == 128) { - offset = 4096 - 256 - 320 - data_len; /* Max tail grow 3520 */ + /* Max tail grow 3520 */ + offset = 4096 - 256 - 320 - data_len; + } else if (data_len == 9000) { + offset = 10; + } else if (data_len == 9001) { + offset = 4096; } else { return XDP_ABORTED; /* No matching test */ } diff --git a/tools/testing/selftests/bpf/progs/test_xdp_adjust_tail_shrink.c b/tools/testing/selftests/bpf/progs/test_xdp_adjust_tail_shrink.c index b7448253d135..ca68c038357c 100644 --- a/tools/testing/selftests/bpf/progs/test_xdp_adjust_tail_shrink.c +++ b/tools/testing/selftests/bpf/progs/test_xdp_adjust_tail_shrink.c @@ -12,14 +12,38 @@ SEC("xdp") int _xdp_adjust_tail_shrink(struct xdp_md *xdp) { - void *data_end = (void *)(long)xdp->data_end; - void *data = (void *)(long)xdp->data; + __u8 *data_end = (void *)(long)xdp->data_end; + __u8 *data = (void *)(long)xdp->data; int offset = 0; - if (data_end - data == 54) /* sizeof(pkt_v4) */ + switch (bpf_xdp_get_buff_len(xdp)) { + case 54: + /* sizeof(pkt_v4) */ offset = 256; /* shrink too much */ - else + break; + case 9000: + /* non-linear buff test cases */ + if (data + 1 > data_end) + return XDP_DROP; + + switch (data[0]) { + case 0: + offset = 10; + break; + case 1: + offset = 4100; + break; + case 2: + offset = 8200; + break; + default: + return XDP_DROP; + } + break; + default: offset = 20; + break; + } if (bpf_xdp_adjust_tail(xdp, 0 - offset)) return XDP_DROP; return XDP_TX; diff --git a/tools/testing/selftests/bpf/progs/test_xdp_bpf2bpf.c b/tools/testing/selftests/bpf/progs/test_xdp_bpf2bpf.c index 58cf4345f5cc..3379d303f41a 100644 --- a/tools/testing/selftests/bpf/progs/test_xdp_bpf2bpf.c +++ b/tools/testing/selftests/bpf/progs/test_xdp_bpf2bpf.c @@ -49,7 +49,7 @@ int BPF_PROG(trace_on_entry, struct xdp_buff *xdp) void *data = (void *)(long)xdp->data; meta.ifindex = xdp->rxq->dev->ifindex; - meta.pkt_len = data_end - data; + meta.pkt_len = bpf_xdp_get_buff_len((struct xdp_md *)xdp); bpf_xdp_output(xdp, &perf_buf_map, ((__u64) meta.pkt_len << 32) | BPF_F_CURRENT_CPU, diff --git a/tools/testing/selftests/bpf/progs/test_xdp_update_frags.c b/tools/testing/selftests/bpf/progs/test_xdp_update_frags.c new file mode 100644 index 000000000000..2a3496d8e327 --- /dev/null +++ b/tools/testing/selftests/bpf/progs/test_xdp_update_frags.c @@ -0,0 +1,42 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * This program is free software; you can redistribute it and/or + * modify it under the terms of version 2 of the GNU General Public + * License as published by the Free Software Foundation. + */ +#include <linux/bpf.h> +#include <linux/if_ether.h> +#include <bpf/bpf_helpers.h> + +int _version SEC("version") = 1; + +SEC("xdp.frags") +int xdp_adjust_frags(struct xdp_md *xdp) +{ + __u8 *data_end = (void *)(long)xdp->data_end; + __u8 *data = (void *)(long)xdp->data; + __u8 val[16] = {}; + __u32 offset; + int err; + + if (data + sizeof(__u32) > data_end) + return XDP_DROP; + + offset = *(__u32 *)data; + err = bpf_xdp_load_bytes(xdp, offset, val, sizeof(val)); + if (err < 0) + return XDP_DROP; + + if (val[0] != 0xaa || val[15] != 0xaa) /* marker */ + return XDP_DROP; + + val[0] = 0xbb; /* update the marker */ + val[15] = 0xbb; + err = bpf_xdp_store_bytes(xdp, offset, val, sizeof(val)); + if (err < 0) + return XDP_DROP; + + return XDP_PASS; +} + +char _license[] SEC("license") = "GPL"; diff --git a/tools/testing/selftests/bpf/progs/test_xdp_with_cpumap_frags_helpers.c b/tools/testing/selftests/bpf/progs/test_xdp_with_cpumap_frags_helpers.c new file mode 100644 index 000000000000..62fb7cd4d87a --- /dev/null +++ b/tools/testing/selftests/bpf/progs/test_xdp_with_cpumap_frags_helpers.c @@ -0,0 +1,27 @@ +// SPDX-License-Identifier: GPL-2.0 + +#include <linux/bpf.h> +#include <bpf/bpf_helpers.h> + +#define IFINDEX_LO 1 + +struct { + __uint(type, BPF_MAP_TYPE_CPUMAP); + __uint(key_size, sizeof(__u32)); + __uint(value_size, sizeof(struct bpf_cpumap_val)); + __uint(max_entries, 4); +} cpu_map SEC(".maps"); + +SEC("xdp_cpumap/dummy_cm") +int xdp_dummy_cm(struct xdp_md *ctx) +{ + return XDP_PASS; +} + +SEC("xdp.frags/cpumap") +int xdp_dummy_cm_frags(struct xdp_md *ctx) +{ + return XDP_PASS; +} + +char _license[] SEC("license") = "GPL"; diff --git a/tools/testing/selftests/bpf/progs/test_xdp_with_cpumap_helpers.c b/tools/testing/selftests/bpf/progs/test_xdp_with_cpumap_helpers.c index 532025057711..48007f17dfa8 100644 --- a/tools/testing/selftests/bpf/progs/test_xdp_with_cpumap_helpers.c +++ b/tools/testing/selftests/bpf/progs/test_xdp_with_cpumap_helpers.c @@ -33,4 +33,10 @@ int xdp_dummy_cm(struct xdp_md *ctx) return XDP_PASS; } +SEC("xdp.frags/cpumap") +int xdp_dummy_cm_frags(struct xdp_md *ctx) +{ + return XDP_PASS; +} + char _license[] SEC("license") = "GPL"; diff --git a/tools/testing/selftests/bpf/progs/test_xdp_with_devmap_frags_helpers.c b/tools/testing/selftests/bpf/progs/test_xdp_with_devmap_frags_helpers.c new file mode 100644 index 000000000000..e1caf510b7d2 --- /dev/null +++ b/tools/testing/selftests/bpf/progs/test_xdp_with_devmap_frags_helpers.c @@ -0,0 +1,27 @@ +// SPDX-License-Identifier: GPL-2.0 +#include <linux/bpf.h> +#include <bpf/bpf_helpers.h> + +struct { + __uint(type, BPF_MAP_TYPE_DEVMAP); + __uint(key_size, sizeof(__u32)); + __uint(value_size, sizeof(struct bpf_devmap_val)); + __uint(max_entries, 4); +} dm_ports SEC(".maps"); + +/* valid program on DEVMAP entry via SEC name; + * has access to egress and ingress ifindex + */ +SEC("xdp_devmap/map_prog") +int xdp_dummy_dm(struct xdp_md *ctx) +{ + return XDP_PASS; +} + +SEC("xdp.frags/devmap") +int xdp_dummy_dm_frags(struct xdp_md *ctx) +{ + return XDP_PASS; +} + +char _license[] SEC("license") = "GPL"; diff --git a/tools/testing/selftests/bpf/progs/test_xdp_with_devmap_helpers.c b/tools/testing/selftests/bpf/progs/test_xdp_with_devmap_helpers.c index 1e6b9c38ea6d..8ae11fab8316 100644 --- a/tools/testing/selftests/bpf/progs/test_xdp_with_devmap_helpers.c +++ b/tools/testing/selftests/bpf/progs/test_xdp_with_devmap_helpers.c @@ -40,4 +40,11 @@ int xdp_dummy_dm(struct xdp_md *ctx) return XDP_PASS; } + +SEC("xdp.frags/devmap") +int xdp_dummy_dm_frags(struct xdp_md *ctx) +{ + return XDP_PASS; +} + char _license[] SEC("license") = "GPL"; diff --git a/tools/testing/selftests/bpf/test_verifier.c b/tools/testing/selftests/bpf/test_verifier.c index 76cd903117af..29bbaa58233c 100644 --- a/tools/testing/selftests/bpf/test_verifier.c +++ b/tools/testing/selftests/bpf/test_verifier.c @@ -31,6 +31,7 @@ #include <linux/if_ether.h> #include <linux/btf.h> +#include <bpf/btf.h> #include <bpf/bpf.h> #include <bpf/libbpf.h> @@ -66,6 +67,11 @@ static bool unpriv_disabled = false; static int skips; static bool verbose = false; +struct kfunc_btf_id_pair { + const char *kfunc; + int insn_idx; +}; + struct bpf_test { const char *descr; struct bpf_insn insns[MAX_INSNS]; @@ -92,6 +98,7 @@ struct bpf_test { int fixup_map_reuseport_array[MAX_FIXUPS]; int fixup_map_ringbuf[MAX_FIXUPS]; int fixup_map_timer[MAX_FIXUPS]; + struct kfunc_btf_id_pair fixup_kfunc_btf_id[MAX_FIXUPS]; /* Expected verifier log output for result REJECT or VERBOSE_ACCEPT. * Can be a tab-separated sequence of expected strings. An empty string * means no log verification. @@ -744,6 +751,7 @@ static void do_test_fixup(struct bpf_test *test, enum bpf_prog_type prog_type, int *fixup_map_reuseport_array = test->fixup_map_reuseport_array; int *fixup_map_ringbuf = test->fixup_map_ringbuf; int *fixup_map_timer = test->fixup_map_timer; + struct kfunc_btf_id_pair *fixup_kfunc_btf_id = test->fixup_kfunc_btf_id; if (test->fill_helper) { test->fill_insns = calloc(MAX_TEST_INSNS, sizeof(struct bpf_insn)); @@ -936,6 +944,26 @@ static void do_test_fixup(struct bpf_test *test, enum bpf_prog_type prog_type, fixup_map_timer++; } while (*fixup_map_timer); } + + /* Patch in kfunc BTF IDs */ + if (fixup_kfunc_btf_id->kfunc) { + struct btf *btf; + int btf_id; + + do { + btf_id = 0; + btf = btf__load_vmlinux_btf(); + if (btf) { + btf_id = btf__find_by_name_kind(btf, + fixup_kfunc_btf_id->kfunc, + BTF_KIND_FUNC); + btf_id = btf_id < 0 ? 0 : btf_id; + } + btf__free(btf); + prog[fixup_kfunc_btf_id->insn_idx].imm = btf_id; + fixup_kfunc_btf_id++; + } while (fixup_kfunc_btf_id->kfunc); + } } struct libcap { diff --git a/tools/testing/selftests/bpf/verifier/calls.c b/tools/testing/selftests/bpf/verifier/calls.c index d7b74eb28333..829be2b9e08e 100644 --- a/tools/testing/selftests/bpf/verifier/calls.c +++ b/tools/testing/selftests/bpf/verifier/calls.c @@ -22,6 +22,81 @@ .result = ACCEPT, }, { + "calls: invalid kfunc call: ptr_to_mem to struct with non-scalar", + .insns = { + BPF_MOV64_REG(BPF_REG_1, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, -8), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, BPF_PSEUDO_KFUNC_CALL, 0, 0), + BPF_EXIT_INSN(), + }, + .prog_type = BPF_PROG_TYPE_SCHED_CLS, + .result = REJECT, + .errstr = "arg#0 pointer type STRUCT prog_test_fail1 must point to scalar", + .fixup_kfunc_btf_id = { + { "bpf_kfunc_call_test_fail1", 2 }, + }, +}, +{ + "calls: invalid kfunc call: ptr_to_mem to struct with nesting depth > 4", + .insns = { + BPF_MOV64_REG(BPF_REG_1, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, -8), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, BPF_PSEUDO_KFUNC_CALL, 0, 0), + BPF_EXIT_INSN(), + }, + .prog_type = BPF_PROG_TYPE_SCHED_CLS, + .result = REJECT, + .errstr = "max struct nesting depth exceeded\narg#0 pointer type STRUCT prog_test_fail2", + .fixup_kfunc_btf_id = { + { "bpf_kfunc_call_test_fail2", 2 }, + }, +}, +{ + "calls: invalid kfunc call: ptr_to_mem to struct with FAM", + .insns = { + BPF_MOV64_REG(BPF_REG_1, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, -8), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, BPF_PSEUDO_KFUNC_CALL, 0, 0), + BPF_EXIT_INSN(), + }, + .prog_type = BPF_PROG_TYPE_SCHED_CLS, + .result = REJECT, + .errstr = "arg#0 pointer type STRUCT prog_test_fail3 must point to scalar", + .fixup_kfunc_btf_id = { + { "bpf_kfunc_call_test_fail3", 2 }, + }, +}, +{ + "calls: invalid kfunc call: reg->type != PTR_TO_CTX", + .insns = { + BPF_MOV64_REG(BPF_REG_1, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, -8), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, BPF_PSEUDO_KFUNC_CALL, 0, 0), + BPF_EXIT_INSN(), + }, + .prog_type = BPF_PROG_TYPE_SCHED_CLS, + .result = REJECT, + .errstr = "arg#0 expected pointer to ctx, but got PTR", + .fixup_kfunc_btf_id = { + { "bpf_kfunc_call_test_pass_ctx", 2 }, + }, +}, +{ + "calls: invalid kfunc call: void * not allowed in func proto without mem size arg", + .insns = { + BPF_MOV64_REG(BPF_REG_1, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, -8), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, BPF_PSEUDO_KFUNC_CALL, 0, 0), + BPF_EXIT_INSN(), + }, + .prog_type = BPF_PROG_TYPE_SCHED_CLS, + .result = REJECT, + .errstr = "arg#0 pointer type UNKNOWN must point to scalar", + .fixup_kfunc_btf_id = { + { "bpf_kfunc_call_test_mem_len_fail1", 2 }, + }, +}, +{ "calls: basic sanity", .insns = { BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 2), diff --git a/tools/testing/selftests/bpf/xdpxceiver.c b/tools/testing/selftests/bpf/xdpxceiver.c index 0a5d23da486d..ffa5502ad95e 100644 --- a/tools/testing/selftests/bpf/xdpxceiver.c +++ b/tools/testing/selftests/bpf/xdpxceiver.c @@ -906,7 +906,10 @@ static bool rx_stats_are_valid(struct ifobject *ifobject) return true; case STAT_TEST_RX_FULL: xsk_stat = stats.rx_ring_full; - expected_stat -= RX_FULL_RXQSIZE; + if (ifobject->umem->num_frames < XSK_RING_PROD__DEFAULT_NUM_DESCS) + expected_stat = ifobject->umem->num_frames - RX_FULL_RXQSIZE; + else + expected_stat = XSK_RING_PROD__DEFAULT_NUM_DESCS - RX_FULL_RXQSIZE; break; case STAT_TEST_RX_FILL_EMPTY: xsk_stat = stats.rx_fill_ring_empty_descs; |