diff options
Diffstat (limited to 'samples')
97 files changed, 3130 insertions, 463 deletions
diff --git a/samples/auxdisplay/Makefile b/samples/auxdisplay/Makefile index 05e471feb6e5..0273bab27233 100644 --- a/samples/auxdisplay/Makefile +++ b/samples/auxdisplay/Makefile @@ -1,3 +1,4 @@ +# SPDX-License-Identifier: GPL-2.0 CC := $(CROSS_COMPILE)gcc CFLAGS := -I../../usr/include diff --git a/samples/bpf/Makefile b/samples/bpf/Makefile index 6c7468eb3684..9b4a66e3363e 100644 --- a/samples/bpf/Makefile +++ b/samples/bpf/Makefile @@ -1,3 +1,4 @@ +# SPDX-License-Identifier: GPL-2.0 # kbuild trick to avoid linker error. Can be omitted if a module is built. obj- := dummy.o @@ -36,6 +37,11 @@ hostprogs-y += lwt_len_hist hostprogs-y += xdp_tx_iptunnel hostprogs-y += test_map_in_map hostprogs-y += per_socket_stats_example +hostprogs-y += load_sock_ops +hostprogs-y += xdp_redirect +hostprogs-y += xdp_redirect_map +hostprogs-y += xdp_monitor +hostprogs-y += syscall_tp # Libbpf dependencies LIBBPF := ../../tools/lib/bpf/bpf.o @@ -52,6 +58,7 @@ tracex3-objs := bpf_load.o $(LIBBPF) tracex3_user.o tracex4-objs := bpf_load.o $(LIBBPF) tracex4_user.o tracex5-objs := bpf_load.o $(LIBBPF) tracex5_user.o tracex6-objs := bpf_load.o $(LIBBPF) tracex6_user.o +load_sock_ops-objs := bpf_load.o $(LIBBPF) load_sock_ops.o test_probe_write_user-objs := bpf_load.o $(LIBBPF) test_probe_write_user_user.o trace_output-objs := bpf_load.o $(LIBBPF) trace_output_user.o lathist-objs := bpf_load.o $(LIBBPF) lathist_user.o @@ -76,6 +83,10 @@ lwt_len_hist-objs := bpf_load.o $(LIBBPF) lwt_len_hist_user.o xdp_tx_iptunnel-objs := bpf_load.o $(LIBBPF) xdp_tx_iptunnel_user.o test_map_in_map-objs := bpf_load.o $(LIBBPF) test_map_in_map_user.o per_socket_stats_example-objs := $(LIBBPF) cookie_uid_helper_example.o +xdp_redirect-objs := bpf_load.o $(LIBBPF) xdp_redirect_user.o +xdp_redirect_map-objs := bpf_load.o $(LIBBPF) xdp_redirect_map_user.o +xdp_monitor-objs := bpf_load.o $(LIBBPF) xdp_monitor_user.o +syscall_tp-objs := bpf_load.o $(LIBBPF) syscall_tp_user.o # Tell kbuild to always build the programs always := $(hostprogs-y) @@ -111,6 +122,16 @@ always += lwt_len_hist_kern.o always += xdp_tx_iptunnel_kern.o always += test_map_in_map_kern.o always += cookie_uid_helper_example.o +always += tcp_synrto_kern.o +always += tcp_rwnd_kern.o +always += tcp_bufs_kern.o +always += tcp_cong_kern.o +always += tcp_iw_kern.o +always += tcp_clamp_kern.o +always += xdp_redirect_kern.o +always += xdp_redirect_map_kern.o +always += xdp_monitor_kern.o +always += syscall_tp_kern.o HOSTCFLAGS += -I$(objtree)/usr/include HOSTCFLAGS += -I$(srctree)/tools/lib/ @@ -130,6 +151,7 @@ HOSTLOADLIBES_tracex4 += -lelf -lrt HOSTLOADLIBES_tracex5 += -lelf HOSTLOADLIBES_tracex6 += -lelf HOSTLOADLIBES_test_cgrp2_sock2 += -lelf +HOSTLOADLIBES_load_sock_ops += -lelf HOSTLOADLIBES_test_probe_write_user += -lelf HOSTLOADLIBES_trace_output += -lelf -lrt HOSTLOADLIBES_lathist += -lelf @@ -146,6 +168,10 @@ HOSTLOADLIBES_tc_l2_redirect += -l elf HOSTLOADLIBES_lwt_len_hist += -l elf HOSTLOADLIBES_xdp_tx_iptunnel += -lelf HOSTLOADLIBES_test_map_in_map += -lelf +HOSTLOADLIBES_xdp_redirect += -lelf +HOSTLOADLIBES_xdp_redirect_map += -lelf +HOSTLOADLIBES_xdp_monitor += -lelf +HOSTLOADLIBES_syscall_tp += -lelf # Allows pointing LLC/CLANG to a LLVM backend with bpf support, redefine on cmdline: # make samples/bpf/ LLC=~/git/llvm/build/bin/llc CLANG=~/git/llvm/build/bin/clang @@ -160,6 +186,17 @@ clean: $(MAKE) -C ../../ M=$(CURDIR) clean @rm -f *~ +$(obj)/syscall_nrs.s: $(src)/syscall_nrs.c + $(call if_changed_dep,cc_s_c) + +$(obj)/syscall_nrs.h: $(obj)/syscall_nrs.s FORCE + $(call filechk,offsets,__SYSCALL_NRS_H__) + +clean-files += syscall_nrs.h + +FORCE: + + # Verify LLVM compiler tools are available and bpf target is supported by llc .PHONY: verify_cmds verify_target_bpf $(CLANG) $(LLC) @@ -180,11 +217,14 @@ verify_target_bpf: verify_cmds $(src)/*.c: verify_target_bpf +$(obj)/tracex5_kern.o: $(obj)/syscall_nrs.h + # asm/sysreg.h - inline assembly used by it is incompatible with llvm. # But, there is no easy way to fix it, so just exclude it since it is # useless for BPF samples. $(obj)/%.o: $(src)/%.c - $(CLANG) $(NOSTDINC_FLAGS) $(LINUXINCLUDE) $(EXTRA_CFLAGS) \ + $(CLANG) $(NOSTDINC_FLAGS) $(LINUXINCLUDE) $(EXTRA_CFLAGS) -I$(obj) \ + -I$(srctree)/tools/testing/selftests/bpf/ \ -D__KERNEL__ -D__ASM_SYSREG_H -Wno-unused-value -Wno-pointer-sign \ -Wno-compare-distinct-pointer-types \ -Wno-gnu-variable-sized-type-not-at-end \ diff --git a/samples/bpf/bpf_helpers.h b/samples/bpf/bpf_helpers.h deleted file mode 100644 index 9a9c95f2c9fb..000000000000 --- a/samples/bpf/bpf_helpers.h +++ /dev/null @@ -1,181 +0,0 @@ -#ifndef __BPF_HELPERS_H -#define __BPF_HELPERS_H - -/* helper macro to place programs, maps, license in - * different sections in elf_bpf file. Section names - * are interpreted by elf_bpf loader - */ -#define SEC(NAME) __attribute__((section(NAME), used)) - -/* helper functions called from eBPF programs written in C */ -static void *(*bpf_map_lookup_elem)(void *map, void *key) = - (void *) BPF_FUNC_map_lookup_elem; -static int (*bpf_map_update_elem)(void *map, void *key, void *value, - unsigned long long flags) = - (void *) BPF_FUNC_map_update_elem; -static int (*bpf_map_delete_elem)(void *map, void *key) = - (void *) BPF_FUNC_map_delete_elem; -static int (*bpf_probe_read)(void *dst, int size, void *unsafe_ptr) = - (void *) BPF_FUNC_probe_read; -static unsigned long long (*bpf_ktime_get_ns)(void) = - (void *) BPF_FUNC_ktime_get_ns; -static int (*bpf_trace_printk)(const char *fmt, int fmt_size, ...) = - (void *) BPF_FUNC_trace_printk; -static void (*bpf_tail_call)(void *ctx, void *map, int index) = - (void *) BPF_FUNC_tail_call; -static unsigned long long (*bpf_get_smp_processor_id)(void) = - (void *) BPF_FUNC_get_smp_processor_id; -static unsigned long long (*bpf_get_current_pid_tgid)(void) = - (void *) BPF_FUNC_get_current_pid_tgid; -static unsigned long long (*bpf_get_current_uid_gid)(void) = - (void *) BPF_FUNC_get_current_uid_gid; -static int (*bpf_get_current_comm)(void *buf, int buf_size) = - (void *) BPF_FUNC_get_current_comm; -static int (*bpf_perf_event_read)(void *map, int index) = - (void *) BPF_FUNC_perf_event_read; -static int (*bpf_clone_redirect)(void *ctx, int ifindex, int flags) = - (void *) BPF_FUNC_clone_redirect; -static int (*bpf_redirect)(int ifindex, int flags) = - (void *) BPF_FUNC_redirect; -static int (*bpf_perf_event_output)(void *ctx, void *map, - unsigned long long flags, void *data, - int size) = - (void *) BPF_FUNC_perf_event_output; -static int (*bpf_get_stackid)(void *ctx, void *map, int flags) = - (void *) BPF_FUNC_get_stackid; -static int (*bpf_probe_write_user)(void *dst, void *src, int size) = - (void *) BPF_FUNC_probe_write_user; -static int (*bpf_current_task_under_cgroup)(void *map, int index) = - (void *) BPF_FUNC_current_task_under_cgroup; -static int (*bpf_skb_get_tunnel_key)(void *ctx, void *key, int size, int flags) = - (void *) BPF_FUNC_skb_get_tunnel_key; -static int (*bpf_skb_set_tunnel_key)(void *ctx, void *key, int size, int flags) = - (void *) BPF_FUNC_skb_set_tunnel_key; -static int (*bpf_skb_get_tunnel_opt)(void *ctx, void *md, int size) = - (void *) BPF_FUNC_skb_get_tunnel_opt; -static int (*bpf_skb_set_tunnel_opt)(void *ctx, void *md, int size) = - (void *) BPF_FUNC_skb_set_tunnel_opt; -static unsigned long long (*bpf_get_prandom_u32)(void) = - (void *) BPF_FUNC_get_prandom_u32; -static int (*bpf_xdp_adjust_head)(void *ctx, int offset) = - (void *) BPF_FUNC_xdp_adjust_head; - -/* llvm builtin functions that eBPF C program may use to - * emit BPF_LD_ABS and BPF_LD_IND instructions - */ -struct sk_buff; -unsigned long long load_byte(void *skb, - unsigned long long off) asm("llvm.bpf.load.byte"); -unsigned long long load_half(void *skb, - unsigned long long off) asm("llvm.bpf.load.half"); -unsigned long long load_word(void *skb, - unsigned long long off) asm("llvm.bpf.load.word"); - -/* a helper structure used by eBPF C program - * to describe map attributes to elf_bpf loader - */ -struct bpf_map_def { - unsigned int type; - unsigned int key_size; - unsigned int value_size; - unsigned int max_entries; - unsigned int map_flags; - unsigned int inner_map_idx; -}; - -static int (*bpf_skb_load_bytes)(void *ctx, int off, void *to, int len) = - (void *) BPF_FUNC_skb_load_bytes; -static int (*bpf_skb_store_bytes)(void *ctx, int off, void *from, int len, int flags) = - (void *) BPF_FUNC_skb_store_bytes; -static int (*bpf_l3_csum_replace)(void *ctx, int off, int from, int to, int flags) = - (void *) BPF_FUNC_l3_csum_replace; -static int (*bpf_l4_csum_replace)(void *ctx, int off, int from, int to, int flags) = - (void *) BPF_FUNC_l4_csum_replace; -static int (*bpf_skb_under_cgroup)(void *ctx, void *map, int index) = - (void *) BPF_FUNC_skb_under_cgroup; -static int (*bpf_skb_change_head)(void *, int len, int flags) = - (void *) BPF_FUNC_skb_change_head; - -#if defined(__x86_64__) - -#define PT_REGS_PARM1(x) ((x)->di) -#define PT_REGS_PARM2(x) ((x)->si) -#define PT_REGS_PARM3(x) ((x)->dx) -#define PT_REGS_PARM4(x) ((x)->cx) -#define PT_REGS_PARM5(x) ((x)->r8) -#define PT_REGS_RET(x) ((x)->sp) -#define PT_REGS_FP(x) ((x)->bp) -#define PT_REGS_RC(x) ((x)->ax) -#define PT_REGS_SP(x) ((x)->sp) -#define PT_REGS_IP(x) ((x)->ip) - -#elif defined(__s390x__) - -#define PT_REGS_PARM1(x) ((x)->gprs[2]) -#define PT_REGS_PARM2(x) ((x)->gprs[3]) -#define PT_REGS_PARM3(x) ((x)->gprs[4]) -#define PT_REGS_PARM4(x) ((x)->gprs[5]) -#define PT_REGS_PARM5(x) ((x)->gprs[6]) -#define PT_REGS_RET(x) ((x)->gprs[14]) -#define PT_REGS_FP(x) ((x)->gprs[11]) /* Works only with CONFIG_FRAME_POINTER */ -#define PT_REGS_RC(x) ((x)->gprs[2]) -#define PT_REGS_SP(x) ((x)->gprs[15]) -#define PT_REGS_IP(x) ((x)->psw.addr) - -#elif defined(__aarch64__) - -#define PT_REGS_PARM1(x) ((x)->regs[0]) -#define PT_REGS_PARM2(x) ((x)->regs[1]) -#define PT_REGS_PARM3(x) ((x)->regs[2]) -#define PT_REGS_PARM4(x) ((x)->regs[3]) -#define PT_REGS_PARM5(x) ((x)->regs[4]) -#define PT_REGS_RET(x) ((x)->regs[30]) -#define PT_REGS_FP(x) ((x)->regs[29]) /* Works only with CONFIG_FRAME_POINTER */ -#define PT_REGS_RC(x) ((x)->regs[0]) -#define PT_REGS_SP(x) ((x)->sp) -#define PT_REGS_IP(x) ((x)->pc) - -#elif defined(__powerpc__) - -#define PT_REGS_PARM1(x) ((x)->gpr[3]) -#define PT_REGS_PARM2(x) ((x)->gpr[4]) -#define PT_REGS_PARM3(x) ((x)->gpr[5]) -#define PT_REGS_PARM4(x) ((x)->gpr[6]) -#define PT_REGS_PARM5(x) ((x)->gpr[7]) -#define PT_REGS_RC(x) ((x)->gpr[3]) -#define PT_REGS_SP(x) ((x)->sp) -#define PT_REGS_IP(x) ((x)->nip) - -#elif defined(__sparc__) - -#define PT_REGS_PARM1(x) ((x)->u_regs[UREG_I0]) -#define PT_REGS_PARM2(x) ((x)->u_regs[UREG_I1]) -#define PT_REGS_PARM3(x) ((x)->u_regs[UREG_I2]) -#define PT_REGS_PARM4(x) ((x)->u_regs[UREG_I3]) -#define PT_REGS_PARM5(x) ((x)->u_regs[UREG_I4]) -#define PT_REGS_RET(x) ((x)->u_regs[UREG_I7]) -#define PT_REGS_RC(x) ((x)->u_regs[UREG_I0]) -#define PT_REGS_SP(x) ((x)->u_regs[UREG_FP]) -#if defined(__arch64__) -#define PT_REGS_IP(x) ((x)->tpc) -#else -#define PT_REGS_IP(x) ((x)->pc) -#endif - -#endif - -#ifdef __powerpc__ -#define BPF_KPROBE_READ_RET_IP(ip, ctx) ({ (ip) = (ctx)->link; }) -#define BPF_KRETPROBE_READ_RET_IP BPF_KPROBE_READ_RET_IP -#elif defined(__sparc__) -#define BPF_KPROBE_READ_RET_IP(ip, ctx) ({ (ip) = PT_REGS_RET(ctx); }) -#define BPF_KRETPROBE_READ_RET_IP BPF_KPROBE_READ_RET_IP -#else -#define BPF_KPROBE_READ_RET_IP(ip, ctx) ({ \ - bpf_probe_read(&(ip), sizeof(ip), (void *)PT_REGS_RET(ctx)); }) -#define BPF_KRETPROBE_READ_RET_IP(ip, ctx) ({ \ - bpf_probe_read(&(ip), sizeof(ip), \ - (void *)(PT_REGS_FP(ctx) + sizeof(ip))); }) -#endif - -#endif diff --git a/samples/bpf/bpf_load.c b/samples/bpf/bpf_load.c index 74456b3eb89a..2325d7ad76df 100644 --- a/samples/bpf/bpf_load.c +++ b/samples/bpf/bpf_load.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0 #include <stdio.h> #include <sys/types.h> #include <sys/stat.h> @@ -64,6 +65,8 @@ static int load_and_attach(const char *event, struct bpf_insn *prog, int size) bool is_perf_event = strncmp(event, "perf_event", 10) == 0; bool is_cgroup_skb = strncmp(event, "cgroup/skb", 10) == 0; bool is_cgroup_sk = strncmp(event, "cgroup/sock", 11) == 0; + bool is_sockops = strncmp(event, "sockops", 7) == 0; + bool is_sk_skb = strncmp(event, "sk_skb", 6) == 0; size_t insns_cnt = size / sizeof(struct bpf_insn); enum bpf_prog_type prog_type; char buf[256]; @@ -89,6 +92,10 @@ static int load_and_attach(const char *event, struct bpf_insn *prog, int size) prog_type = BPF_PROG_TYPE_CGROUP_SKB; } else if (is_cgroup_sk) { prog_type = BPF_PROG_TYPE_CGROUP_SOCK; + } else if (is_sockops) { + prog_type = BPF_PROG_TYPE_SOCK_OPS; + } else if (is_sk_skb) { + prog_type = BPF_PROG_TYPE_SK_SKB; } else { printf("Unknown event '%s'\n", event); return -1; @@ -106,8 +113,11 @@ static int load_and_attach(const char *event, struct bpf_insn *prog, int size) if (is_xdp || is_perf_event || is_cgroup_skb || is_cgroup_sk) return 0; - if (is_socket) { - event += 6; + if (is_socket || is_sockops || is_sk_skb) { + if (is_socket) + event += 6; + else + event += 7; if (*event != '/') return 0; event++; @@ -192,7 +202,7 @@ static int load_and_attach(const char *event, struct bpf_insn *prog, int size) static int load_maps(struct bpf_map_data *maps, int nr_maps, fixup_map_cb fixup_map) { - int i; + int i, numa_node; for (i = 0; i < nr_maps; i++) { if (fixup_map) { @@ -204,21 +214,26 @@ static int load_maps(struct bpf_map_data *maps, int nr_maps, } } + numa_node = maps[i].def.map_flags & BPF_F_NUMA_NODE ? + maps[i].def.numa_node : -1; + if (maps[i].def.type == BPF_MAP_TYPE_ARRAY_OF_MAPS || maps[i].def.type == BPF_MAP_TYPE_HASH_OF_MAPS) { int inner_map_fd = map_fd[maps[i].def.inner_map_idx]; - map_fd[i] = bpf_create_map_in_map(maps[i].def.type, + map_fd[i] = bpf_create_map_in_map_node(maps[i].def.type, maps[i].def.key_size, inner_map_fd, maps[i].def.max_entries, - maps[i].def.map_flags); + maps[i].def.map_flags, + numa_node); } else { - map_fd[i] = bpf_create_map(maps[i].def.type, - maps[i].def.key_size, - maps[i].def.value_size, - maps[i].def.max_entries, - maps[i].def.map_flags); + map_fd[i] = bpf_create_map_node(maps[i].def.type, + maps[i].def.key_size, + maps[i].def.value_size, + maps[i].def.max_entries, + maps[i].def.map_flags, + numa_node); } if (map_fd[i] < 0) { printf("failed to create a map: %d %s\n", @@ -516,16 +531,18 @@ static int do_load_bpf_file(const char *path, fixup_map_cb fixup_map) processed_sec[maps_shndx] = true; } - /* load programs that need map fixup (relocations) */ + /* process all relo sections, and rewrite bpf insns for maps */ for (i = 1; i < ehdr.e_shnum; i++) { if (processed_sec[i]) continue; if (get_sec(elf, i, &ehdr, &shname, &shdr, &data)) continue; + if (shdr.sh_type == SHT_REL) { struct bpf_insn *insns; + /* locate prog sec that need map fixup (relocations) */ if (get_sec(elf, shdr.sh_info, &ehdr, &shname_prog, &shdr_prog, &data_prog)) continue; @@ -535,26 +552,15 @@ static int do_load_bpf_file(const char *path, fixup_map_cb fixup_map) continue; insns = (struct bpf_insn *) data_prog->d_buf; - - processed_sec[shdr.sh_info] = true; - processed_sec[i] = true; + processed_sec[i] = true; /* relo section */ if (parse_relo_and_apply(data, symbols, &shdr, insns, map_data, nr_maps)) continue; - - if (memcmp(shname_prog, "kprobe/", 7) == 0 || - memcmp(shname_prog, "kretprobe/", 10) == 0 || - memcmp(shname_prog, "tracepoint/", 11) == 0 || - memcmp(shname_prog, "xdp", 3) == 0 || - memcmp(shname_prog, "perf_event", 10) == 0 || - memcmp(shname_prog, "socket", 6) == 0 || - memcmp(shname_prog, "cgroup/", 7) == 0) - load_and_attach(shname_prog, insns, data_prog->d_size); } } - /* load programs that don't use maps */ + /* load programs */ for (i = 1; i < ehdr.e_shnum; i++) { if (processed_sec[i]) @@ -569,8 +575,14 @@ static int do_load_bpf_file(const char *path, fixup_map_cb fixup_map) memcmp(shname, "xdp", 3) == 0 || memcmp(shname, "perf_event", 10) == 0 || memcmp(shname, "socket", 6) == 0 || - memcmp(shname, "cgroup/", 7) == 0) - load_and_attach(shname, data->d_buf, data->d_size); + memcmp(shname, "cgroup/", 7) == 0 || + memcmp(shname, "sockops", 7) == 0 || + memcmp(shname, "sk_skb", 6) == 0) { + ret = load_and_attach(shname, data->d_buf, + data->d_size); + if (ret != 0) + goto done; + } } ret = 0; diff --git a/samples/bpf/bpf_load.h b/samples/bpf/bpf_load.h index ca0563d04744..7d57a4248893 100644 --- a/samples/bpf/bpf_load.h +++ b/samples/bpf/bpf_load.h @@ -1,3 +1,4 @@ +/* SPDX-License-Identifier: GPL-2.0 */ #ifndef __BPF_LOAD_H #define __BPF_LOAD_H @@ -13,6 +14,7 @@ struct bpf_map_def { unsigned int max_entries; unsigned int map_flags; unsigned int inner_map_idx; + unsigned int numa_node; }; struct bpf_map_data { diff --git a/samples/bpf/cgroup_helpers.c b/samples/bpf/cgroup_helpers.c index 9d1be9426401..09afaddfc9ba 100644 --- a/samples/bpf/cgroup_helpers.c +++ b/samples/bpf/cgroup_helpers.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0 #define _GNU_SOURCE #include <sched.h> #include <sys/mount.h> diff --git a/samples/bpf/cgroup_helpers.h b/samples/bpf/cgroup_helpers.h index 78c55207b6bd..06485e0002b3 100644 --- a/samples/bpf/cgroup_helpers.h +++ b/samples/bpf/cgroup_helpers.h @@ -1,3 +1,4 @@ +/* SPDX-License-Identifier: GPL-2.0 */ #ifndef __CGROUP_HELPERS_H #define __CGROUP_HELPERS_H #include <errno.h> diff --git a/samples/bpf/libbpf.h b/samples/bpf/libbpf.h index 8ab36a04c174..18bfee5aab6b 100644 --- a/samples/bpf/libbpf.h +++ b/samples/bpf/libbpf.h @@ -1,3 +1,4 @@ +/* SPDX-License-Identifier: GPL-2.0 */ /* eBPF mini library */ #ifndef __LIBBPF_H #define __LIBBPF_H diff --git a/samples/bpf/load_sock_ops.c b/samples/bpf/load_sock_ops.c new file mode 100644 index 000000000000..e5da6cf71a3e --- /dev/null +++ b/samples/bpf/load_sock_ops.c @@ -0,0 +1,97 @@ +/* Copyright (c) 2017 Facebook + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of version 2 of the GNU General Public + * License as published by the Free Software Foundation. + */ +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <linux/bpf.h> +#include "libbpf.h" +#include "bpf_load.h" +#include <unistd.h> +#include <errno.h> +#include <fcntl.h> +#include <linux/unistd.h> + +static void usage(char *pname) +{ + printf("USAGE:\n %s [-l] <cg-path> <prog filename>\n", pname); + printf("\tLoad and attach a sock_ops program to the specified " + "cgroup\n"); + printf("\tIf \"-l\" is used, the program will continue to run\n"); + printf("\tprinting the BPF log buffer\n"); + printf("\tIf the specified filename does not end in \".o\", it\n"); + printf("\tappends \"_kern.o\" to the name\n"); + printf("\n"); + printf(" %s -r <cg-path>\n", pname); + printf("\tDetaches the currently attached sock_ops program\n"); + printf("\tfrom the specified cgroup\n"); + printf("\n"); + exit(1); +} + +int main(int argc, char **argv) +{ + int logFlag = 0; + int error = 0; + char *cg_path; + char fn[500]; + char *prog; + int cg_fd; + + if (argc < 3) + usage(argv[0]); + + if (!strcmp(argv[1], "-r")) { + cg_path = argv[2]; + cg_fd = open(cg_path, O_DIRECTORY, O_RDONLY); + error = bpf_prog_detach(cg_fd, BPF_CGROUP_SOCK_OPS); + if (error) { + printf("ERROR: bpf_prog_detach: %d (%s)\n", + error, strerror(errno)); + return 2; + } + return 0; + } else if (!strcmp(argv[1], "-h")) { + usage(argv[0]); + } else if (!strcmp(argv[1], "-l")) { + logFlag = 1; + if (argc < 4) + usage(argv[0]); + } + + prog = argv[argc - 1]; + cg_path = argv[argc - 2]; + if (strlen(prog) > 480) { + fprintf(stderr, "ERROR: program name too long (> 480 chars)\n"); + return 3; + } + cg_fd = open(cg_path, O_DIRECTORY, O_RDONLY); + + if (!strcmp(prog + strlen(prog)-2, ".o")) + strcpy(fn, prog); + else + sprintf(fn, "%s_kern.o", prog); + if (logFlag) + printf("loading bpf file:%s\n", fn); + if (load_bpf_file(fn)) { + printf("ERROR: load_bpf_file failed for: %s\n", fn); + printf("%s", bpf_log_buf); + return 4; + } + if (logFlag) + printf("TCP BPF Loaded %s\n", fn); + + error = bpf_prog_attach(prog_fd[0], cg_fd, BPF_CGROUP_SOCK_OPS, 0); + if (error) { + printf("ERROR: bpf_prog_attach: %d (%s)\n", + error, strerror(errno)); + return 5; + } else if (logFlag) { + read_trace_pipe(); + } + + return error; +} diff --git a/samples/bpf/lwt_len_hist.sh b/samples/bpf/lwt_len_hist.sh index 7d567744c7fa..090b96eaf7f7 100644 --- a/samples/bpf/lwt_len_hist.sh +++ b/samples/bpf/lwt_len_hist.sh @@ -1,4 +1,5 @@ #!/bin/bash +# SPDX-License-Identifier: GPL-2.0 NS1=lwt_ns1 VETH0=tst_lwt1a diff --git a/samples/bpf/lwt_len_hist_user.c b/samples/bpf/lwt_len_hist_user.c index ec8f3bbcbef3..7fcb94c09112 100644 --- a/samples/bpf/lwt_len_hist_user.c +++ b/samples/bpf/lwt_len_hist_user.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0 #include <linux/unistd.h> #include <linux/bpf.h> diff --git a/samples/bpf/map_perf_test_kern.c b/samples/bpf/map_perf_test_kern.c index 245165817fbe..098c857f1eda 100644 --- a/samples/bpf/map_perf_test_kern.c +++ b/samples/bpf/map_perf_test_kern.c @@ -40,6 +40,8 @@ struct bpf_map_def SEC("maps") inner_lru_hash_map = { .key_size = sizeof(u32), .value_size = sizeof(long), .max_entries = MAX_ENTRIES, + .map_flags = BPF_F_NUMA_NODE, + .numa_node = 0, }; struct bpf_map_def SEC("maps") array_of_lru_hashs = { @@ -86,6 +88,13 @@ struct bpf_map_def SEC("maps") array_map = { .max_entries = MAX_ENTRIES, }; +struct bpf_map_def SEC("maps") lru_hash_lookup_map = { + .type = BPF_MAP_TYPE_LRU_HASH, + .key_size = sizeof(u32), + .value_size = sizeof(long), + .max_entries = MAX_ENTRIES, +}; + SEC("kprobe/sys_getuid") int stress_hmap(struct pt_regs *ctx) { @@ -146,12 +155,23 @@ int stress_percpu_hmap_alloc(struct pt_regs *ctx) SEC("kprobe/sys_connect") int stress_lru_hmap_alloc(struct pt_regs *ctx) { + char fmt[] = "Failed at stress_lru_hmap_alloc. ret:%dn"; + union { + u16 dst6[8]; + struct { + u16 magic0; + u16 magic1; + u16 tcase; + u16 unused16; + u32 unused32; + u32 key; + }; + } test_params; struct sockaddr_in6 *in6; - u16 test_case, dst6[8]; + u16 test_case; int addrlen, ret; - char fmt[] = "Failed at stress_lru_hmap_alloc. ret:%d\n"; long val = 1; - u32 key = bpf_get_prandom_u32(); + u32 key = 0; in6 = (struct sockaddr_in6 *)PT_REGS_PARM2(ctx); addrlen = (int)PT_REGS_PARM3(ctx); @@ -159,14 +179,18 @@ int stress_lru_hmap_alloc(struct pt_regs *ctx) if (addrlen != sizeof(*in6)) return 0; - ret = bpf_probe_read(dst6, sizeof(dst6), &in6->sin6_addr); + ret = bpf_probe_read(test_params.dst6, sizeof(test_params.dst6), + &in6->sin6_addr); if (ret) goto done; - if (dst6[0] != 0xdead || dst6[1] != 0xbeef) + if (test_params.magic0 != 0xdead || + test_params.magic1 != 0xbeef) return 0; - test_case = dst6[7]; + test_case = test_params.tcase; + if (test_case != 3) + key = bpf_get_prandom_u32(); if (test_case == 0) { ret = bpf_map_update_elem(&lru_hash_map, &key, &val, BPF_ANY); @@ -186,6 +210,16 @@ int stress_lru_hmap_alloc(struct pt_regs *ctx) ret = bpf_map_update_elem(nolocal_lru_map, &key, &val, BPF_ANY); + } else if (test_case == 3) { + u32 i; + + key = test_params.key; + +#pragma clang loop unroll(full) + for (i = 0; i < 32; i++) { + bpf_map_lookup_elem(&lru_hash_lookup_map, &key); + key++; + } } else { ret = -EINVAL; } diff --git a/samples/bpf/map_perf_test_user.c b/samples/bpf/map_perf_test_user.c index 1a8894b5ac51..f388254896f6 100644 --- a/samples/bpf/map_perf_test_user.c +++ b/samples/bpf/map_perf_test_user.c @@ -46,6 +46,7 @@ enum test_type { HASH_LOOKUP, ARRAY_LOOKUP, INNER_LRU_HASH_PREALLOC, + LRU_HASH_LOOKUP, NR_TESTS, }; @@ -60,6 +61,7 @@ const char *test_map_names[NR_TESTS] = { [HASH_LOOKUP] = "hash_map", [ARRAY_LOOKUP] = "array_map", [INNER_LRU_HASH_PREALLOC] = "inner_lru_hash_map", + [LRU_HASH_LOOKUP] = "lru_hash_lookup_map", }; static int test_flags = ~0; @@ -67,6 +69,8 @@ static uint32_t num_map_entries; static uint32_t inner_lru_hash_size; static int inner_lru_hash_idx = -1; static int array_of_lru_hashs_idx = -1; +static int lru_hash_lookup_idx = -1; +static int lru_hash_lookup_test_entries = 32; static uint32_t max_cnt = 1000000; static int check_test_flags(enum test_type t) @@ -86,6 +90,32 @@ static void test_hash_prealloc(int cpu) cpu, max_cnt * 1000000000ll / (time_get_ns() - start_time)); } +static int pre_test_lru_hash_lookup(int tasks) +{ + int fd = map_fd[lru_hash_lookup_idx]; + uint32_t key; + long val = 1; + int ret; + + if (num_map_entries > lru_hash_lookup_test_entries) + lru_hash_lookup_test_entries = num_map_entries; + + /* Populate the lru_hash_map for LRU_HASH_LOOKUP perf test. + * + * It is fine that the user requests for a map with + * num_map_entries < 32 and some of the later lru hash lookup + * may return not found. For LRU map, we are not interested + * in such small map performance. + */ + for (key = 0; key < lru_hash_lookup_test_entries; key++) { + ret = bpf_map_update_elem(fd, &key, &val, BPF_NOEXIST); + if (ret) + return ret; + } + + return 0; +} + static void do_test_lru(enum test_type test, int cpu) { static int inner_lru_map_fds[MAX_NR_CPUS]; @@ -97,14 +127,20 @@ static void do_test_lru(enum test_type test, int cpu) if (test == INNER_LRU_HASH_PREALLOC) { int outer_fd = map_fd[array_of_lru_hashs_idx]; + unsigned int mycpu, mynode; assert(cpu < MAX_NR_CPUS); if (cpu) { + ret = syscall(__NR_getcpu, &mycpu, &mynode, NULL); + assert(!ret); + inner_lru_map_fds[cpu] = - bpf_create_map(BPF_MAP_TYPE_LRU_HASH, - sizeof(uint32_t), sizeof(long), - inner_lru_hash_size, 0); + bpf_create_map_node(BPF_MAP_TYPE_LRU_HASH, + sizeof(uint32_t), + sizeof(long), + inner_lru_hash_size, 0, + mynode); if (inner_lru_map_fds[cpu] == -1) { printf("cannot create BPF_MAP_TYPE_LRU_HASH %s(%d)\n", strerror(errno), errno); @@ -129,13 +165,17 @@ static void do_test_lru(enum test_type test, int cpu) if (test == LRU_HASH_PREALLOC) { test_name = "lru_hash_map_perf"; - in6.sin6_addr.s6_addr16[7] = 0; + in6.sin6_addr.s6_addr16[2] = 0; } else if (test == NOCOMMON_LRU_HASH_PREALLOC) { test_name = "nocommon_lru_hash_map_perf"; - in6.sin6_addr.s6_addr16[7] = 1; + in6.sin6_addr.s6_addr16[2] = 1; } else if (test == INNER_LRU_HASH_PREALLOC) { test_name = "inner_lru_hash_map_perf"; - in6.sin6_addr.s6_addr16[7] = 2; + in6.sin6_addr.s6_addr16[2] = 2; + } else if (test == LRU_HASH_LOOKUP) { + test_name = "lru_hash_lookup_perf"; + in6.sin6_addr.s6_addr16[2] = 3; + in6.sin6_addr.s6_addr32[3] = 0; } else { assert(0); } @@ -144,6 +184,11 @@ static void do_test_lru(enum test_type test, int cpu) for (i = 0; i < max_cnt; i++) { ret = connect(-1, (const struct sockaddr *)&in6, sizeof(in6)); assert(ret == -1 && errno == EBADF); + if (in6.sin6_addr.s6_addr32[3] < + lru_hash_lookup_test_entries - 32) + in6.sin6_addr.s6_addr32[3] += 32; + else + in6.sin6_addr.s6_addr32[3] = 0; } printf("%d:%s pre-alloc %lld events per sec\n", cpu, test_name, @@ -165,6 +210,11 @@ static void test_inner_lru_hash_prealloc(int cpu) do_test_lru(INNER_LRU_HASH_PREALLOC, cpu); } +static void test_lru_hash_lookup(int cpu) +{ + do_test_lru(LRU_HASH_LOOKUP, cpu); +} + static void test_percpu_hash_prealloc(int cpu) { __u64 start_time; @@ -237,6 +287,11 @@ static void test_array_lookup(int cpu) cpu, max_cnt * 1000000000ll * 64 / (time_get_ns() - start_time)); } +typedef int (*pre_test_func)(int tasks); +const pre_test_func pre_test_funcs[] = { + [LRU_HASH_LOOKUP] = pre_test_lru_hash_lookup, +}; + typedef void (*test_func)(int cpu); const test_func test_funcs[] = { [HASH_PREALLOC] = test_hash_prealloc, @@ -249,8 +304,25 @@ const test_func test_funcs[] = { [HASH_LOOKUP] = test_hash_lookup, [ARRAY_LOOKUP] = test_array_lookup, [INNER_LRU_HASH_PREALLOC] = test_inner_lru_hash_prealloc, + [LRU_HASH_LOOKUP] = test_lru_hash_lookup, }; +static int pre_test(int tasks) +{ + int i; + + for (i = 0; i < NR_TESTS; i++) { + if (pre_test_funcs[i] && check_test_flags(i)) { + int ret = pre_test_funcs[i](tasks); + + if (ret) + return ret; + } + } + + return 0; +} + static void loop(int cpu) { cpu_set_t cpuset; @@ -271,6 +343,8 @@ static void run_perf_test(int tasks) pid_t pid[tasks]; int i; + assert(!pre_test(tasks)); + for (i = 0; i < tasks; i++) { pid[i] = fork(); if (pid[i] == 0) { @@ -338,6 +412,9 @@ static void fixup_map(struct bpf_map_data *map, int idx) array_of_lru_hashs_idx = idx; } + if (!strcmp("lru_hash_lookup_map", map->name)) + lru_hash_lookup_idx = idx; + if (num_map_entries <= 0) return; diff --git a/samples/bpf/run_cookie_uid_helper_example.sh b/samples/bpf/run_cookie_uid_helper_example.sh index f898cfa2b1aa..fc6bc0451ab4 100755 --- a/samples/bpf/run_cookie_uid_helper_example.sh +++ b/samples/bpf/run_cookie_uid_helper_example.sh @@ -1,4 +1,5 @@ #!/bin/bash +# SPDX-License-Identifier: GPL-2.0 local_dir="$(pwd)" root_dir=$local_dir/../.. mnt_dir=$(mktemp -d --tmp) diff --git a/samples/bpf/sock_example.h b/samples/bpf/sock_example.h index d8014065d479..772d5dad8465 100644 --- a/samples/bpf/sock_example.h +++ b/samples/bpf/sock_example.h @@ -1,3 +1,4 @@ +/* SPDX-License-Identifier: GPL-2.0 */ #include <stdlib.h> #include <stdio.h> #include <linux/unistd.h> diff --git a/samples/bpf/sock_flags_kern.c b/samples/bpf/sock_flags_kern.c index 533dd11a6baa..05dcdf8a4baa 100644 --- a/samples/bpf/sock_flags_kern.c +++ b/samples/bpf/sock_flags_kern.c @@ -9,8 +9,13 @@ SEC("cgroup/sock1") int bpf_prog1(struct bpf_sock *sk) { char fmt[] = "socket: family %d type %d protocol %d\n"; + char fmt2[] = "socket: uid %u gid %u\n"; + __u64 gid_uid = bpf_get_current_uid_gid(); + __u32 uid = gid_uid & 0xffffffff; + __u32 gid = gid_uid >> 32; bpf_trace_printk(fmt, sizeof(fmt), sk->family, sk->type, sk->protocol); + bpf_trace_printk(fmt2, sizeof(fmt2), uid, gid); /* block PF_INET6, SOCK_RAW, IPPROTO_ICMPV6 sockets * ie., make ping6 fail diff --git a/samples/bpf/sockex1_user.c b/samples/bpf/sockex1_user.c index 6cd2feb3e9b3..2be935c2627d 100644 --- a/samples/bpf/sockex1_user.c +++ b/samples/bpf/sockex1_user.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0 #include <stdio.h> #include <assert.h> #include <linux/bpf.h> diff --git a/samples/bpf/sockex2_user.c b/samples/bpf/sockex2_user.c index 0e0207c90841..44fe0805b087 100644 --- a/samples/bpf/sockex2_user.c +++ b/samples/bpf/sockex2_user.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0 #include <stdio.h> #include <assert.h> #include <linux/bpf.h> diff --git a/samples/bpf/sockex3_user.c b/samples/bpf/sockex3_user.c index b5524d417eb5..495ee02e2fb7 100644 --- a/samples/bpf/sockex3_user.c +++ b/samples/bpf/sockex3_user.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0 #include <stdio.h> #include <assert.h> #include <linux/bpf.h> @@ -8,6 +9,10 @@ #include <arpa/inet.h> #include <sys/resource.h> +#define PARSE_IP 3 +#define PARSE_IP_PROG_FD (prog_fd[0]) +#define PROG_ARRAY_FD (map_fd[0]) + struct bpf_flow_keys { __be32 src; __be32 dst; @@ -28,7 +33,9 @@ int main(int argc, char **argv) struct rlimit r = {RLIM_INFINITY, RLIM_INFINITY}; char filename[256]; FILE *f; - int i, sock; + int i, sock, err, id, key = PARSE_IP; + struct bpf_prog_info info = {}; + uint32_t info_len = sizeof(info); snprintf(filename, sizeof(filename), "%s_kern.o", argv[0]); setrlimit(RLIMIT_MEMLOCK, &r); @@ -38,6 +45,13 @@ int main(int argc, char **argv) return 1; } + /* Test fd array lookup which returns the id of the bpf_prog */ + err = bpf_obj_get_info_by_fd(PARSE_IP_PROG_FD, &info, &info_len); + assert(!err); + err = bpf_map_lookup_elem(PROG_ARRAY_FD, &key, &id); + assert(!err); + assert(id == info.id); + sock = open_raw_sock("lo"); assert(setsockopt(sock, SOL_SOCKET, SO_ATTACH_BPF, &prog_fd[4], diff --git a/samples/bpf/spintest_user.c b/samples/bpf/spintest_user.c index 80676c25fa50..3d736219a31c 100644 --- a/samples/bpf/spintest_user.c +++ b/samples/bpf/spintest_user.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0 #include <stdio.h> #include <unistd.h> #include <linux/bpf.h> diff --git a/samples/bpf/syscall_nrs.c b/samples/bpf/syscall_nrs.c new file mode 100644 index 000000000000..516e255cbe8f --- /dev/null +++ b/samples/bpf/syscall_nrs.c @@ -0,0 +1,13 @@ +// SPDX-License-Identifier: GPL-2.0 +#include <uapi/linux/unistd.h> +#include <linux/kbuild.h> + +#define SYSNR(_NR) DEFINE(SYS ## _NR, _NR) + +void syscall_defines(void) +{ + COMMENT("Linux system call numbers."); + SYSNR(__NR_write); + SYSNR(__NR_read); + SYSNR(__NR_mmap); +} diff --git a/samples/bpf/syscall_tp_kern.c b/samples/bpf/syscall_tp_kern.c new file mode 100644 index 000000000000..9149c524d279 --- /dev/null +++ b/samples/bpf/syscall_tp_kern.c @@ -0,0 +1,62 @@ +/* Copyright (c) 2017 Facebook + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of version 2 of the GNU General Public + * License as published by the Free Software Foundation. + */ +#include <uapi/linux/bpf.h> +#include "bpf_helpers.h" + +struct syscalls_enter_open_args { + unsigned long long unused; + long syscall_nr; + long filename_ptr; + long flags; + long mode; +}; + +struct syscalls_exit_open_args { + unsigned long long unused; + long syscall_nr; + long ret; +}; + +struct bpf_map_def SEC("maps") enter_open_map = { + .type = BPF_MAP_TYPE_ARRAY, + .key_size = sizeof(u32), + .value_size = sizeof(u32), + .max_entries = 1, +}; + +struct bpf_map_def SEC("maps") exit_open_map = { + .type = BPF_MAP_TYPE_ARRAY, + .key_size = sizeof(u32), + .value_size = sizeof(u32), + .max_entries = 1, +}; + +static __always_inline void count(void *map) +{ + u32 key = 0; + u32 *value, init_val = 1; + + value = bpf_map_lookup_elem(map, &key); + if (value) + *value += 1; + else + bpf_map_update_elem(map, &key, &init_val, BPF_NOEXIST); +} + +SEC("tracepoint/syscalls/sys_enter_open") +int trace_enter_open(struct syscalls_enter_open_args *ctx) +{ + count((void *)&enter_open_map); + return 0; +} + +SEC("tracepoint/syscalls/sys_exit_open") +int trace_enter_exit(struct syscalls_exit_open_args *ctx) +{ + count((void *)&exit_open_map); + return 0; +} diff --git a/samples/bpf/syscall_tp_user.c b/samples/bpf/syscall_tp_user.c new file mode 100644 index 000000000000..a3cb91ebf4e7 --- /dev/null +++ b/samples/bpf/syscall_tp_user.c @@ -0,0 +1,71 @@ +/* Copyright (c) 2017 Facebook + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of version 2 of the GNU General Public + * License as published by the Free Software Foundation. + */ +#include <stdio.h> +#include <unistd.h> +#include <fcntl.h> +#include <stdlib.h> +#include <signal.h> +#include <linux/bpf.h> +#include <string.h> +#include <linux/perf_event.h> +#include <errno.h> +#include <assert.h> +#include <stdbool.h> +#include <sys/resource.h> +#include "libbpf.h" +#include "bpf_load.h" + +/* This program verifies bpf attachment to tracepoint sys_enter_* and sys_exit_*. + * This requires kernel CONFIG_FTRACE_SYSCALLS to be set. + */ + +static void verify_map(int map_id) +{ + __u32 key = 0; + __u32 val; + + if (bpf_map_lookup_elem(map_id, &key, &val) != 0) { + fprintf(stderr, "map_lookup failed: %s\n", strerror(errno)); + return; + } + if (val == 0) + fprintf(stderr, "failed: map #%d returns value 0\n", map_id); +} + +int main(int argc, char **argv) +{ + struct rlimit r = {RLIM_INFINITY, RLIM_INFINITY}; + char filename[256]; + int fd; + + setrlimit(RLIMIT_MEMLOCK, &r); + snprintf(filename, sizeof(filename), "%s_kern.o", argv[0]); + + if (load_bpf_file(filename)) { + fprintf(stderr, "%s", bpf_log_buf); + return 1; + } + + /* current load_bpf_file has perf_event_open default pid = -1 + * and cpu = 0, which permits attached bpf execution on + * all cpus for all pid's. bpf program execution ignores + * cpu affinity. + */ + /* trigger some "open" operations */ + fd = open(filename, O_RDONLY); + if (fd < 0) { + fprintf(stderr, "open failed: %s\n", strerror(errno)); + return 1; + } + close(fd); + + /* verify the map */ + verify_map(map_fd[0]); + verify_map(map_fd[1]); + + return 0; +} diff --git a/samples/bpf/tc_l2_redirect.sh b/samples/bpf/tc_l2_redirect.sh index 80a05591a140..37d95ef3c20f 100755 --- a/samples/bpf/tc_l2_redirect.sh +++ b/samples/bpf/tc_l2_redirect.sh @@ -1,4 +1,5 @@ #!/bin/bash +# SPDX-License-Identifier: GPL-2.0 [[ -z $TC ]] && TC='tc' [[ -z $IP ]] && IP='ip' diff --git a/samples/bpf/tcbpf2_kern.c b/samples/bpf/tcbpf2_kern.c index 9c823a609e75..370b749f5ee6 100644 --- a/samples/bpf/tcbpf2_kern.c +++ b/samples/bpf/tcbpf2_kern.c @@ -17,6 +17,7 @@ #include <uapi/linux/pkt_cls.h> #include <net/ipv6.h> #include "bpf_helpers.h" +#include "bpf_endian.h" #define _htonl __builtin_bswap32 #define ERROR(ret) do {\ @@ -38,6 +39,10 @@ struct vxlan_metadata { u32 gbp; }; +struct erspan_metadata { + __be32 index; +}; + SEC("gre_set_tunnel") int _gre_set_tunnel(struct __sk_buff *skb) { @@ -76,6 +81,63 @@ int _gre_get_tunnel(struct __sk_buff *skb) return TC_ACT_OK; } +SEC("erspan_set_tunnel") +int _erspan_set_tunnel(struct __sk_buff *skb) +{ + struct bpf_tunnel_key key; + struct erspan_metadata md; + int ret; + + __builtin_memset(&key, 0x0, sizeof(key)); + key.remote_ipv4 = 0xac100164; /* 172.16.1.100 */ + key.tunnel_id = 2; + key.tunnel_tos = 0; + key.tunnel_ttl = 64; + + ret = bpf_skb_set_tunnel_key(skb, &key, sizeof(key), BPF_F_ZERO_CSUM_TX); + if (ret < 0) { + ERROR(ret); + return TC_ACT_SHOT; + } + + md.index = htonl(123); + ret = bpf_skb_set_tunnel_opt(skb, &md, sizeof(md)); + if (ret < 0) { + ERROR(ret); + return TC_ACT_SHOT; + } + + return TC_ACT_OK; +} + +SEC("erspan_get_tunnel") +int _erspan_get_tunnel(struct __sk_buff *skb) +{ + char fmt[] = "key %d remote ip 0x%x erspan index 0x%x\n"; + struct bpf_tunnel_key key; + struct erspan_metadata md; + u32 index; + int ret; + + ret = bpf_skb_get_tunnel_key(skb, &key, sizeof(key), 0); + if (ret < 0) { + ERROR(ret); + return TC_ACT_SHOT; + } + + ret = bpf_skb_get_tunnel_opt(skb, &md, sizeof(md)); + if (ret < 0) { + ERROR(ret); + return TC_ACT_SHOT; + } + + index = bpf_ntohl(md.index); + bpf_trace_printk(fmt, sizeof(fmt), + key.tunnel_id, key.remote_ipv4, index); + + return TC_ACT_OK; +} + SEC("vxlan_set_tunnel") int _vxlan_set_tunnel(struct __sk_buff *skb) { @@ -147,9 +209,9 @@ int _geneve_set_tunnel(struct __sk_buff *skb) __builtin_memset(&gopt, 0x0, sizeof(gopt)); gopt.opt_class = 0x102; /* Open Virtual Networking (OVN) */ gopt.type = 0x08; - gopt.r1 = 1; + gopt.r1 = 0; gopt.r2 = 0; - gopt.r3 = 1; + gopt.r3 = 0; gopt.length = 2; /* 4-byte multiple */ *(int *) &gopt.opt_data = 0xdeadbeef; @@ -378,5 +440,4 @@ int _ip6ip6_get_tunnel(struct __sk_buff *skb) return TC_ACT_OK; } - char _license[] SEC("license") = "GPL"; diff --git a/samples/bpf/tcp_bufs_kern.c b/samples/bpf/tcp_bufs_kern.c new file mode 100644 index 000000000000..ee83bbabd17c --- /dev/null +++ b/samples/bpf/tcp_bufs_kern.c @@ -0,0 +1,86 @@ +/* Copyright (c) 2017 Facebook + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of version 2 of the GNU General Public + * License as published by the Free Software Foundation. + * + * BPF program to set initial receive window to 40 packets and send + * and receive buffers to 1.5MB. This would usually be done after + * doing appropriate checks that indicate the hosts are far enough + * away (i.e. large RTT). + * + * Use load_sock_ops to load this BPF program. + */ + +#include <uapi/linux/bpf.h> +#include <uapi/linux/if_ether.h> +#include <uapi/linux/if_packet.h> +#include <uapi/linux/ip.h> +#include <linux/socket.h> +#include "bpf_helpers.h" +#include "bpf_endian.h" + +#define DEBUG 1 + +#define bpf_printk(fmt, ...) \ +({ \ + char ____fmt[] = fmt; \ + bpf_trace_printk(____fmt, sizeof(____fmt), \ + ##__VA_ARGS__); \ +}) + +SEC("sockops") +int bpf_bufs(struct bpf_sock_ops *skops) +{ + int bufsize = 1500000; + int rwnd_init = 40; + int rv = 0; + int op; + + /* For testing purposes, only execute rest of BPF program + * if neither port numberis 55601 + */ + if (bpf_ntohl(skops->remote_port) != 55601 && + skops->local_port != 55601) + return -1; + + op = (int) skops->op; + +#ifdef DEBUG + bpf_printk("Returning %d\n", rv); +#endif + + /* Usually there would be a check to insure the hosts are far + * from each other so it makes sense to increase buffer sizes + */ + switch (op) { + case BPF_SOCK_OPS_RWND_INIT: + rv = rwnd_init; + break; + case BPF_SOCK_OPS_TCP_CONNECT_CB: + /* Set sndbuf and rcvbuf of active connections */ + rv = bpf_setsockopt(skops, SOL_SOCKET, SO_SNDBUF, &bufsize, + sizeof(bufsize)); + rv = rv*100 + bpf_setsockopt(skops, SOL_SOCKET, SO_RCVBUF, + &bufsize, sizeof(bufsize)); + break; + case BPF_SOCK_OPS_ACTIVE_ESTABLISHED_CB: + /* Nothing to do */ + break; + case BPF_SOCK_OPS_PASSIVE_ESTABLISHED_CB: + /* Set sndbuf and rcvbuf of passive connections */ + rv = bpf_setsockopt(skops, SOL_SOCKET, SO_SNDBUF, &bufsize, + sizeof(bufsize)); + rv = rv*100 + bpf_setsockopt(skops, SOL_SOCKET, SO_RCVBUF, + &bufsize, sizeof(bufsize)); + break; + default: + rv = -1; + } +#ifdef DEBUG + bpf_printk("Returning %d\n", rv); +#endif + skops->reply = rv; + return 1; +} +char _license[] SEC("license") = "GPL"; diff --git a/samples/bpf/tcp_clamp_kern.c b/samples/bpf/tcp_clamp_kern.c new file mode 100644 index 000000000000..d68eadd9ca2d --- /dev/null +++ b/samples/bpf/tcp_clamp_kern.c @@ -0,0 +1,102 @@ +/* Copyright (c) 2017 Facebook + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of version 2 of the GNU General Public + * License as published by the Free Software Foundation. + * + * Sample BPF program to set send and receive buffers to 150KB, sndcwnd clamp + * to 100 packets and SYN and SYN_ACK RTOs to 10ms when both hosts are within + * the same datacenter. For his example, we assume they are within the same + * datacenter when the first 5.5 bytes of their IPv6 addresses are the same. + * + * Use load_sock_ops to load this BPF program. + */ + +#include <uapi/linux/bpf.h> +#include <uapi/linux/if_ether.h> +#include <uapi/linux/if_packet.h> +#include <uapi/linux/ip.h> +#include <linux/socket.h> +#include "bpf_helpers.h" +#include "bpf_endian.h" + +#define DEBUG 1 + +#define bpf_printk(fmt, ...) \ +({ \ + char ____fmt[] = fmt; \ + bpf_trace_printk(____fmt, sizeof(____fmt), \ + ##__VA_ARGS__); \ +}) + +SEC("sockops") +int bpf_clamp(struct bpf_sock_ops *skops) +{ + int bufsize = 150000; + int to_init = 10; + int clamp = 100; + int rv = 0; + int op; + + /* For testing purposes, only execute rest of BPF program + * if neither port numberis 55601 + */ + if (bpf_ntohl(skops->remote_port) != 55601 && skops->local_port != 55601) + return -1; + + op = (int) skops->op; + +#ifdef DEBUG + bpf_printk("BPF command: %d\n", op); +#endif + + /* Check that both hosts are within same datacenter. For this example + * it is the case when the first 5.5 bytes of their IPv6 addresses are + * the same. + */ + if (skops->family == AF_INET6 && + skops->local_ip6[0] == skops->remote_ip6[0] && + (bpf_ntohl(skops->local_ip6[1]) & 0xfff00000) == + (bpf_ntohl(skops->remote_ip6[1]) & 0xfff00000)) { + switch (op) { + case BPF_SOCK_OPS_TIMEOUT_INIT: + rv = to_init; + break; + case BPF_SOCK_OPS_TCP_CONNECT_CB: + /* Set sndbuf and rcvbuf of active connections */ + rv = bpf_setsockopt(skops, SOL_SOCKET, SO_SNDBUF, + &bufsize, sizeof(bufsize)); + rv = rv*100 + bpf_setsockopt(skops, SOL_SOCKET, + SO_RCVBUF, &bufsize, + sizeof(bufsize)); + break; + case BPF_SOCK_OPS_ACTIVE_ESTABLISHED_CB: + rv = bpf_setsockopt(skops, SOL_TCP, + TCP_BPF_SNDCWND_CLAMP, + &clamp, sizeof(clamp)); + break; + case BPF_SOCK_OPS_PASSIVE_ESTABLISHED_CB: + /* Set sndbuf and rcvbuf of passive connections */ + rv = bpf_setsockopt(skops, SOL_TCP, + TCP_BPF_SNDCWND_CLAMP, + &clamp, sizeof(clamp)); + rv = rv*100 + bpf_setsockopt(skops, SOL_SOCKET, + SO_SNDBUF, &bufsize, + sizeof(bufsize)); + rv = rv*100 + bpf_setsockopt(skops, SOL_SOCKET, + SO_RCVBUF, &bufsize, + sizeof(bufsize)); + break; + default: + rv = -1; + } + } else { + rv = -1; + } +#ifdef DEBUG + bpf_printk("Returning %d\n", rv); +#endif + skops->reply = rv; + return 1; +} +char _license[] SEC("license") = "GPL"; diff --git a/samples/bpf/tcp_cong_kern.c b/samples/bpf/tcp_cong_kern.c new file mode 100644 index 000000000000..dac15bce1fa9 --- /dev/null +++ b/samples/bpf/tcp_cong_kern.c @@ -0,0 +1,83 @@ +/* Copyright (c) 2017 Facebook + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of version 2 of the GNU General Public + * License as published by the Free Software Foundation. + * + * BPF program to set congestion control to dctcp when both hosts are + * in the same datacenter (as deteremined by IPv6 prefix). + * + * Use load_sock_ops to load this BPF program. + */ + +#include <uapi/linux/bpf.h> +#include <uapi/linux/tcp.h> +#include <uapi/linux/if_ether.h> +#include <uapi/linux/if_packet.h> +#include <uapi/linux/ip.h> +#include <linux/socket.h> +#include "bpf_helpers.h" +#include "bpf_endian.h" + +#define DEBUG 1 + +#define bpf_printk(fmt, ...) \ +({ \ + char ____fmt[] = fmt; \ + bpf_trace_printk(____fmt, sizeof(____fmt), \ + ##__VA_ARGS__); \ +}) + +SEC("sockops") +int bpf_cong(struct bpf_sock_ops *skops) +{ + char cong[] = "dctcp"; + int rv = 0; + int op; + + /* For testing purposes, only execute rest of BPF program + * if neither port numberis 55601 + */ + if (bpf_ntohl(skops->remote_port) != 55601 && + skops->local_port != 55601) + return -1; + + op = (int) skops->op; + +#ifdef DEBUG + bpf_printk("BPF command: %d\n", op); +#endif + + /* Check if both hosts are in the same datacenter. For this + * example they are if the 1st 5.5 bytes in the IPv6 address + * are the same. + */ + if (skops->family == AF_INET6 && + skops->local_ip6[0] == skops->remote_ip6[0] && + (bpf_ntohl(skops->local_ip6[1]) & 0xfff00000) == + (bpf_ntohl(skops->remote_ip6[1]) & 0xfff00000)) { + switch (op) { + case BPF_SOCK_OPS_NEEDS_ECN: + rv = 1; + break; + case BPF_SOCK_OPS_ACTIVE_ESTABLISHED_CB: + rv = bpf_setsockopt(skops, SOL_TCP, TCP_CONGESTION, + cong, sizeof(cong)); + break; + case BPF_SOCK_OPS_PASSIVE_ESTABLISHED_CB: + rv = bpf_setsockopt(skops, SOL_TCP, TCP_CONGESTION, + cong, sizeof(cong)); + break; + default: + rv = -1; + } + } else { + rv = -1; + } +#ifdef DEBUG + bpf_printk("Returning %d\n", rv); +#endif + skops->reply = rv; + return 1; +} +char _license[] SEC("license") = "GPL"; diff --git a/samples/bpf/tcp_iw_kern.c b/samples/bpf/tcp_iw_kern.c new file mode 100644 index 000000000000..23c5122ef819 --- /dev/null +++ b/samples/bpf/tcp_iw_kern.c @@ -0,0 +1,88 @@ +/* Copyright (c) 2017 Facebook + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of version 2 of the GNU General Public + * License as published by the Free Software Foundation. + * + * BPF program to set initial congestion window and initial receive + * window to 40 packets and send and receive buffers to 1.5MB. This + * would usually be done after doing appropriate checks that indicate + * the hosts are far enough away (i.e. large RTT). + * + * Use load_sock_ops to load this BPF program. + */ + +#include <uapi/linux/bpf.h> +#include <uapi/linux/if_ether.h> +#include <uapi/linux/if_packet.h> +#include <uapi/linux/ip.h> +#include <linux/socket.h> +#include "bpf_helpers.h" +#include "bpf_endian.h" + +#define DEBUG 1 + +#define bpf_printk(fmt, ...) \ +({ \ + char ____fmt[] = fmt; \ + bpf_trace_printk(____fmt, sizeof(____fmt), \ + ##__VA_ARGS__); \ +}) + +SEC("sockops") +int bpf_iw(struct bpf_sock_ops *skops) +{ + int bufsize = 1500000; + int rwnd_init = 40; + int iw = 40; + int rv = 0; + int op; + + /* For testing purposes, only execute rest of BPF program + * if neither port numberis 55601 + */ + if (bpf_ntohl(skops->remote_port) != 55601 && + skops->local_port != 55601) + return -1; + + op = (int) skops->op; + +#ifdef DEBUG + bpf_printk("BPF command: %d\n", op); +#endif + + /* Usually there would be a check to insure the hosts are far + * from each other so it makes sense to increase buffer sizes + */ + switch (op) { + case BPF_SOCK_OPS_RWND_INIT: + rv = rwnd_init; + break; + case BPF_SOCK_OPS_TCP_CONNECT_CB: + /* Set sndbuf and rcvbuf of active connections */ + rv = bpf_setsockopt(skops, SOL_SOCKET, SO_SNDBUF, &bufsize, + sizeof(bufsize)); + rv = rv*100 + bpf_setsockopt(skops, SOL_SOCKET, SO_RCVBUF, + &bufsize, sizeof(bufsize)); + break; + case BPF_SOCK_OPS_ACTIVE_ESTABLISHED_CB: + rv = bpf_setsockopt(skops, SOL_TCP, TCP_BPF_IW, &iw, + sizeof(iw)); + break; + case BPF_SOCK_OPS_PASSIVE_ESTABLISHED_CB: + /* Set sndbuf and rcvbuf of passive connections */ + rv = bpf_setsockopt(skops, SOL_SOCKET, SO_SNDBUF, &bufsize, + sizeof(bufsize)); + rv = rv*100 + bpf_setsockopt(skops, SOL_SOCKET, SO_RCVBUF, + &bufsize, sizeof(bufsize)); + break; + default: + rv = -1; + } +#ifdef DEBUG + bpf_printk("Returning %d\n", rv); +#endif + skops->reply = rv; + return 1; +} +char _license[] SEC("license") = "GPL"; diff --git a/samples/bpf/tcp_rwnd_kern.c b/samples/bpf/tcp_rwnd_kern.c new file mode 100644 index 000000000000..3f2a228f81ce --- /dev/null +++ b/samples/bpf/tcp_rwnd_kern.c @@ -0,0 +1,69 @@ +/* Copyright (c) 2017 Facebook + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of version 2 of the GNU General Public + * License as published by the Free Software Foundation. + * + * BPF program to set initial receive window to 40 packets when using IPv6 + * and the first 5.5 bytes of the IPv6 addresses are not the same (in this + * example that means both hosts are not the same datacenter). + * + * Use load_sock_ops to load this BPF program. + */ + +#include <uapi/linux/bpf.h> +#include <uapi/linux/if_ether.h> +#include <uapi/linux/if_packet.h> +#include <uapi/linux/ip.h> +#include <linux/socket.h> +#include "bpf_helpers.h" +#include "bpf_endian.h" + +#define DEBUG 1 + +#define bpf_printk(fmt, ...) \ +({ \ + char ____fmt[] = fmt; \ + bpf_trace_printk(____fmt, sizeof(____fmt), \ + ##__VA_ARGS__); \ +}) + +SEC("sockops") +int bpf_rwnd(struct bpf_sock_ops *skops) +{ + int rv = -1; + int op; + + /* For testing purposes, only execute rest of BPF program + * if neither port numberis 55601 + */ + if (bpf_ntohl(skops->remote_port) != + 55601 && skops->local_port != 55601) + return -1; + + op = (int) skops->op; + +#ifdef DEBUG + bpf_printk("BPF command: %d\n", op); +#endif + + /* Check for RWND_INIT operation and IPv6 addresses */ + if (op == BPF_SOCK_OPS_RWND_INIT && + skops->family == AF_INET6) { + + /* If the first 5.5 bytes of the IPv6 address are not the same + * then both hosts are not in the same datacenter + * so use a larger initial advertized window (40 packets) + */ + if (skops->local_ip6[0] != skops->remote_ip6[0] || + (bpf_ntohl(skops->local_ip6[1]) & 0xfffff000) != + (bpf_ntohl(skops->remote_ip6[1]) & 0xfffff000)) + rv = 40; + } +#ifdef DEBUG + bpf_printk("Returning %d\n", rv); +#endif + skops->reply = rv; + return 1; +} +char _license[] SEC("license") = "GPL"; diff --git a/samples/bpf/tcp_synrto_kern.c b/samples/bpf/tcp_synrto_kern.c new file mode 100644 index 000000000000..3c3fc83d81cb --- /dev/null +++ b/samples/bpf/tcp_synrto_kern.c @@ -0,0 +1,69 @@ +/* Copyright (c) 2017 Facebook + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of version 2 of the GNU General Public + * License as published by the Free Software Foundation. + * + * BPF program to set SYN and SYN-ACK RTOs to 10ms when using IPv6 addresses + * and the first 5.5 bytes of the IPv6 addresses are the same (in this example + * that means both hosts are in the same datacenter). + * + * Use load_sock_ops to load this BPF program. + */ + +#include <uapi/linux/bpf.h> +#include <uapi/linux/if_ether.h> +#include <uapi/linux/if_packet.h> +#include <uapi/linux/ip.h> +#include <linux/socket.h> +#include "bpf_helpers.h" +#include "bpf_endian.h" + +#define DEBUG 1 + +#define bpf_printk(fmt, ...) \ +({ \ + char ____fmt[] = fmt; \ + bpf_trace_printk(____fmt, sizeof(____fmt), \ + ##__VA_ARGS__); \ +}) + +SEC("sockops") +int bpf_synrto(struct bpf_sock_ops *skops) +{ + int rv = -1; + int op; + + /* For testing purposes, only execute rest of BPF program + * if neither port numberis 55601 + */ + if (bpf_ntohl(skops->remote_port) != 55601 && + skops->local_port != 55601) + return -1; + + op = (int) skops->op; + +#ifdef DEBUG + bpf_printk("BPF command: %d\n", op); +#endif + + /* Check for TIMEOUT_INIT operation and IPv6 addresses */ + if (op == BPF_SOCK_OPS_TIMEOUT_INIT && + skops->family == AF_INET6) { + + /* If the first 5.5 bytes of the IPv6 address are the same + * then both hosts are in the same datacenter + * so use an RTO of 10ms + */ + if (skops->local_ip6[0] == skops->remote_ip6[0] && + (bpf_ntohl(skops->local_ip6[1]) & 0xfff00000) == + (bpf_ntohl(skops->remote_ip6[1]) & 0xfff00000)) + rv = 10; + } +#ifdef DEBUG + bpf_printk("Returning %d\n", rv); +#endif + skops->reply = rv; + return 1; +} +char _license[] SEC("license") = "GPL"; diff --git a/samples/bpf/test_cgrp2_sock.c b/samples/bpf/test_cgrp2_sock.c index c3cfb23e23b5..e79594dd629b 100644 --- a/samples/bpf/test_cgrp2_sock.c +++ b/samples/bpf/test_cgrp2_sock.c @@ -19,68 +19,271 @@ #include <errno.h> #include <fcntl.h> #include <net/if.h> +#include <inttypes.h> #include <linux/bpf.h> #include "libbpf.h" char bpf_log_buf[BPF_LOG_BUF_SIZE]; -static int prog_load(int idx) +static int prog_load(__u32 idx, __u32 mark, __u32 prio) { - struct bpf_insn prog[] = { + /* save pointer to context */ + struct bpf_insn prog_start[] = { BPF_MOV64_REG(BPF_REG_6, BPF_REG_1), + }; + struct bpf_insn prog_end[] = { + BPF_MOV64_IMM(BPF_REG_0, 1), /* r0 = verdict */ + BPF_EXIT_INSN(), + }; + + /* set sk_bound_dev_if on socket */ + struct bpf_insn prog_dev[] = { BPF_MOV64_IMM(BPF_REG_3, idx), BPF_MOV64_IMM(BPF_REG_2, offsetof(struct bpf_sock, bound_dev_if)), BPF_STX_MEM(BPF_W, BPF_REG_1, BPF_REG_3, offsetof(struct bpf_sock, bound_dev_if)), - BPF_MOV64_IMM(BPF_REG_0, 1), /* r0 = verdict */ - BPF_EXIT_INSN(), }; - size_t insns_cnt = sizeof(prog) / sizeof(struct bpf_insn); - return bpf_load_program(BPF_PROG_TYPE_CGROUP_SOCK, prog, insns_cnt, + /* set mark on socket */ + struct bpf_insn prog_mark[] = { + /* get uid of process */ + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, + BPF_FUNC_get_current_uid_gid), + BPF_ALU64_IMM(BPF_AND, BPF_REG_0, 0xffffffff), + + /* if uid is 0, use given mark, else use the uid as the mark */ + BPF_MOV64_REG(BPF_REG_3, BPF_REG_0), + BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 1), + BPF_MOV64_IMM(BPF_REG_3, mark), + + /* set the mark on the new socket */ + BPF_MOV64_REG(BPF_REG_1, BPF_REG_6), + BPF_MOV64_IMM(BPF_REG_2, offsetof(struct bpf_sock, mark)), + BPF_STX_MEM(BPF_W, BPF_REG_1, BPF_REG_3, offsetof(struct bpf_sock, mark)), + }; + + /* set priority on socket */ + struct bpf_insn prog_prio[] = { + BPF_MOV64_REG(BPF_REG_1, BPF_REG_6), + BPF_MOV64_IMM(BPF_REG_3, prio), + BPF_MOV64_IMM(BPF_REG_2, offsetof(struct bpf_sock, priority)), + BPF_STX_MEM(BPF_W, BPF_REG_1, BPF_REG_3, offsetof(struct bpf_sock, priority)), + }; + + struct bpf_insn *prog; + size_t insns_cnt; + void *p; + int ret; + + insns_cnt = sizeof(prog_start) + sizeof(prog_end); + if (idx) + insns_cnt += sizeof(prog_dev); + + if (mark) + insns_cnt += sizeof(prog_mark); + + if (prio) + insns_cnt += sizeof(prog_prio); + + p = prog = malloc(insns_cnt); + if (!prog) { + fprintf(stderr, "Failed to allocate memory for instructions\n"); + return EXIT_FAILURE; + } + + memcpy(p, prog_start, sizeof(prog_start)); + p += sizeof(prog_start); + + if (idx) { + memcpy(p, prog_dev, sizeof(prog_dev)); + p += sizeof(prog_dev); + } + + if (mark) { + memcpy(p, prog_mark, sizeof(prog_mark)); + p += sizeof(prog_mark); + } + + if (prio) { + memcpy(p, prog_prio, sizeof(prog_prio)); + p += sizeof(prog_prio); + } + + memcpy(p, prog_end, sizeof(prog_end)); + p += sizeof(prog_end); + + insns_cnt /= sizeof(struct bpf_insn); + + ret = bpf_load_program(BPF_PROG_TYPE_CGROUP_SOCK, prog, insns_cnt, "GPL", 0, bpf_log_buf, BPF_LOG_BUF_SIZE); + + free(prog); + + return ret; +} + +static int get_bind_to_device(int sd, char *name, size_t len) +{ + socklen_t optlen = len; + int rc; + + name[0] = '\0'; + rc = getsockopt(sd, SOL_SOCKET, SO_BINDTODEVICE, name, &optlen); + if (rc < 0) + perror("setsockopt(SO_BINDTODEVICE)"); + + return rc; +} + +static unsigned int get_somark(int sd) +{ + unsigned int mark = 0; + socklen_t optlen = sizeof(mark); + int rc; + + rc = getsockopt(sd, SOL_SOCKET, SO_MARK, &mark, &optlen); + if (rc < 0) + perror("getsockopt(SO_MARK)"); + + return mark; +} + +static unsigned int get_priority(int sd) +{ + unsigned int prio = 0; + socklen_t optlen = sizeof(prio); + int rc; + + rc = getsockopt(sd, SOL_SOCKET, SO_PRIORITY, &prio, &optlen); + if (rc < 0) + perror("getsockopt(SO_PRIORITY)"); + + return prio; +} + +static int show_sockopts(int family) +{ + unsigned int mark, prio; + char name[16]; + int sd; + + sd = socket(family, SOCK_DGRAM, 17); + if (sd < 0) { + perror("socket"); + return 1; + } + + if (get_bind_to_device(sd, name, sizeof(name)) < 0) + return 1; + + mark = get_somark(sd); + prio = get_priority(sd); + + close(sd); + + printf("sd %d: dev %s, mark %u, priority %u\n", sd, name, mark, prio); + + return 0; } static int usage(const char *argv0) { - printf("Usage: %s cg-path device-index\n", argv0); + printf("Usage:\n"); + printf(" Attach a program\n"); + printf(" %s -b bind-to-dev -m mark -p prio cg-path\n", argv0); + printf("\n"); + printf(" Detach a program\n"); + printf(" %s -d cg-path\n", argv0); + printf("\n"); + printf(" Show inherited socket settings (mark, priority, and device)\n"); + printf(" %s [-6]\n", argv0); return EXIT_FAILURE; } int main(int argc, char **argv) { + __u32 idx = 0, mark = 0, prio = 0; + const char *cgrp_path = NULL; int cg_fd, prog_fd, ret; - unsigned int idx; + int family = PF_INET; + int do_attach = 1; + int rc; + + while ((rc = getopt(argc, argv, "db:m:p:6")) != -1) { + switch (rc) { + case 'd': + do_attach = 0; + break; + case 'b': + idx = if_nametoindex(optarg); + if (!idx) { + idx = strtoumax(optarg, NULL, 0); + if (!idx) { + printf("Invalid device name\n"); + return EXIT_FAILURE; + } + } + break; + case 'm': + mark = strtoumax(optarg, NULL, 0); + break; + case 'p': + prio = strtoumax(optarg, NULL, 0); + break; + case '6': + family = PF_INET6; + break; + default: + return usage(argv[0]); + } + } - if (argc < 2) - return usage(argv[0]); + if (optind == argc) + return show_sockopts(family); - idx = if_nametoindex(argv[2]); - if (!idx) { - printf("Invalid device name\n"); + cgrp_path = argv[optind]; + if (!cgrp_path) { + fprintf(stderr, "cgroup path not given\n"); return EXIT_FAILURE; } - cg_fd = open(argv[1], O_DIRECTORY | O_RDONLY); - if (cg_fd < 0) { - printf("Failed to open cgroup path: '%s'\n", strerror(errno)); + if (do_attach && !idx && !mark && !prio) { + fprintf(stderr, + "One of device, mark or priority must be given\n"); return EXIT_FAILURE; } - prog_fd = prog_load(idx); - printf("Output from kernel verifier:\n%s\n-------\n", bpf_log_buf); - - if (prog_fd < 0) { - printf("Failed to load prog: '%s'\n", strerror(errno)); + cg_fd = open(cgrp_path, O_DIRECTORY | O_RDONLY); + if (cg_fd < 0) { + printf("Failed to open cgroup path: '%s'\n", strerror(errno)); return EXIT_FAILURE; } - ret = bpf_prog_attach(prog_fd, cg_fd, BPF_CGROUP_INET_SOCK_CREATE, 0); - if (ret < 0) { - printf("Failed to attach prog to cgroup: '%s'\n", - strerror(errno)); - return EXIT_FAILURE; + if (do_attach) { + prog_fd = prog_load(idx, mark, prio); + if (prog_fd < 0) { + printf("Failed to load prog: '%s'\n", strerror(errno)); + printf("Output from kernel verifier:\n%s\n-------\n", + bpf_log_buf); + return EXIT_FAILURE; + } + + ret = bpf_prog_attach(prog_fd, cg_fd, + BPF_CGROUP_INET_SOCK_CREATE, 0); + if (ret < 0) { + printf("Failed to attach prog to cgroup: '%s'\n", + strerror(errno)); + return EXIT_FAILURE; + } + } else { + ret = bpf_prog_detach(cg_fd, BPF_CGROUP_INET_SOCK_CREATE); + if (ret < 0) { + printf("Failed to detach prog from cgroup: '%s'\n", + strerror(errno)); + return EXIT_FAILURE; + } } + close(cg_fd); return EXIT_SUCCESS; } diff --git a/samples/bpf/test_cgrp2_sock.sh b/samples/bpf/test_cgrp2_sock.sh index 925fd467c7cc..8ee0371a100a 100755 --- a/samples/bpf/test_cgrp2_sock.sh +++ b/samples/bpf/test_cgrp2_sock.sh @@ -1,47 +1,134 @@ -#!/bin/bash - -function config_device { - ip netns add at_ns0 - ip link add veth0 type veth peer name veth0b - ip link set veth0b up - ip link set veth0 netns at_ns0 - ip netns exec at_ns0 ip addr add 172.16.1.100/24 dev veth0 - ip netns exec at_ns0 ip addr add 2401:db00::1/64 dev veth0 nodad - ip netns exec at_ns0 ip link set dev veth0 up - ip link add foo type vrf table 1234 - ip link set foo up - ip addr add 172.16.1.101/24 dev veth0b - ip addr add 2401:db00::2/64 dev veth0b nodad - ip link set veth0b master foo +#!/bin/sh +# SPDX-License-Identifier: GPL-2.0 + +# Test various socket options that can be set by attaching programs to cgroups. + +CGRP_MNT="/tmp/cgroupv2-test_cgrp2_sock" + +################################################################################ +# +print_result() +{ + local rc=$1 + local status=" OK " + + [ $rc -ne 0 ] && status="FAIL" + + printf "%-50s [%4s]\n" "$2" "$status" } -function attach_bpf { - rm -rf /tmp/cgroupv2 - mkdir -p /tmp/cgroupv2 - mount -t cgroup2 none /tmp/cgroupv2 - mkdir -p /tmp/cgroupv2/foo - test_cgrp2_sock /tmp/cgroupv2/foo foo - echo $$ >> /tmp/cgroupv2/foo/cgroup.procs +check_sock() +{ + out=$(test_cgrp2_sock) + echo $out | grep -q "$1" + if [ $? -ne 0 ]; then + print_result 1 "IPv4: $2" + echo " expected: $1" + echo " have: $out" + rc=1 + else + print_result 0 "IPv4: $2" + fi } -function cleanup { - set +ex - ip netns delete at_ns0 - ip link del veth0 - ip link del foo - umount /tmp/cgroupv2 - rm -rf /tmp/cgroupv2 - set -ex +check_sock6() +{ + out=$(test_cgrp2_sock -6) + echo $out | grep -q "$1" + if [ $? -ne 0 ]; then + print_result 1 "IPv6: $2" + echo " expected: $1" + echo " have: $out" + rc=1 + else + print_result 0 "IPv6: $2" + fi } -function do_test { - ping -c1 -w1 172.16.1.100 - ping6 -c1 -w1 2401:db00::1 +################################################################################ +# + +cleanup() +{ + echo $$ >> ${CGRP_MNT}/cgroup.procs + rmdir ${CGRP_MNT}/sockopts } +cleanup_and_exit() +{ + local rc=$1 + local msg="$2" + + [ -n "$msg" ] && echo "ERROR: $msg" + + ip li del cgrp2_sock + umount ${CGRP_MNT} + + exit $rc +} + + +################################################################################ +# main + +rc=0 + +ip li add cgrp2_sock type dummy 2>/dev/null + +set -e +mkdir -p ${CGRP_MNT} +mount -t cgroup2 none ${CGRP_MNT} +set +e + + +# make sure we have a known start point cleanup 2>/dev/null -config_device -attach_bpf -do_test -cleanup -echo "*** PASS ***" + +mkdir -p ${CGRP_MNT}/sockopts +[ $? -ne 0 ] && cleanup_and_exit 1 "Failed to create cgroup hierarchy" + + +# set pid into cgroup +echo $$ > ${CGRP_MNT}/sockopts/cgroup.procs + +# no bpf program attached, so socket should show no settings +check_sock "dev , mark 0, priority 0" "No programs attached" +check_sock6 "dev , mark 0, priority 0" "No programs attached" + +# verify device is set +# +test_cgrp2_sock -b cgrp2_sock ${CGRP_MNT}/sockopts +if [ $? -ne 0 ]; then + cleanup_and_exit 1 "Failed to install program to set device" +fi +check_sock "dev cgrp2_sock, mark 0, priority 0" "Device set" +check_sock6 "dev cgrp2_sock, mark 0, priority 0" "Device set" + +# verify mark is set +# +test_cgrp2_sock -m 666 ${CGRP_MNT}/sockopts +if [ $? -ne 0 ]; then + cleanup_and_exit 1 "Failed to install program to set mark" +fi +check_sock "dev , mark 666, priority 0" "Mark set" +check_sock6 "dev , mark 666, priority 0" "Mark set" + +# verify priority is set +# +test_cgrp2_sock -p 123 ${CGRP_MNT}/sockopts +if [ $? -ne 0 ]; then + cleanup_and_exit 1 "Failed to install program to set priority" +fi +check_sock "dev , mark 0, priority 123" "Priority set" +check_sock6 "dev , mark 0, priority 123" "Priority set" + +# all 3 at once +# +test_cgrp2_sock -b cgrp2_sock -m 666 -p 123 ${CGRP_MNT}/sockopts +if [ $? -ne 0 ]; then + cleanup_and_exit 1 "Failed to install program to set device, mark and priority" +fi +check_sock "dev cgrp2_sock, mark 666, priority 123" "Priority set" +check_sock6 "dev cgrp2_sock, mark 666, priority 123" "Priority set" + +cleanup_and_exit $rc diff --git a/samples/bpf/test_cgrp2_sock2.c b/samples/bpf/test_cgrp2_sock2.c index db036077b644..e53f1f6f0867 100644 --- a/samples/bpf/test_cgrp2_sock2.c +++ b/samples/bpf/test_cgrp2_sock2.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0 /* eBPF example program: * * - Loads eBPF program diff --git a/samples/bpf/test_cgrp2_sock2.sh b/samples/bpf/test_cgrp2_sock2.sh index 891f12a0e26f..fc4e64d00cb3 100755 --- a/samples/bpf/test_cgrp2_sock2.sh +++ b/samples/bpf/test_cgrp2_sock2.sh @@ -1,4 +1,5 @@ #!/bin/bash +# SPDX-License-Identifier: GPL-2.0 function config_device { ip netns add at_ns0 diff --git a/samples/bpf/test_cgrp2_tc.sh b/samples/bpf/test_cgrp2_tc.sh index 0b119eeaf85c..12faf5847e22 100755 --- a/samples/bpf/test_cgrp2_tc.sh +++ b/samples/bpf/test_cgrp2_tc.sh @@ -1,4 +1,5 @@ #!/bin/bash +# SPDX-License-Identifier: GPL-2.0 MY_DIR=$(dirname $0) # Details on the bpf prog diff --git a/samples/bpf/test_cls_bpf.sh b/samples/bpf/test_cls_bpf.sh index 0365d5ee512c..aaddd67b37ff 100755 --- a/samples/bpf/test_cls_bpf.sh +++ b/samples/bpf/test_cls_bpf.sh @@ -1,4 +1,5 @@ #!/bin/bash +# SPDX-License-Identifier: GPL-2.0 function pktgen { ../pktgen/pktgen_bench_xmit_mode_netif_receive.sh -i $IFC -s 64 \ diff --git a/samples/bpf/test_ipip.sh b/samples/bpf/test_ipip.sh index 196925403ab4..9e507c305c6e 100755 --- a/samples/bpf/test_ipip.sh +++ b/samples/bpf/test_ipip.sh @@ -1,4 +1,5 @@ #!/bin/bash +# SPDX-License-Identifier: GPL-2.0 function config_device { ip netns add at_ns0 diff --git a/samples/bpf/test_lwt_bpf.sh b/samples/bpf/test_lwt_bpf.sh index a695ae268c40..65a976058dd3 100644 --- a/samples/bpf/test_lwt_bpf.sh +++ b/samples/bpf/test_lwt_bpf.sh @@ -1,4 +1,5 @@ #!/bin/bash +# SPDX-License-Identifier: GPL-2.0 # Uncomment to see generated bytecode #VERBOSE=verbose diff --git a/samples/bpf/test_map_in_map_user.c b/samples/bpf/test_map_in_map_user.c index f62fdc2bd428..1aca18539d8d 100644 --- a/samples/bpf/test_map_in_map_user.c +++ b/samples/bpf/test_map_in_map_user.c @@ -32,6 +32,20 @@ static const char * const test_names[] = { #define NR_TESTS (sizeof(test_names) / sizeof(*test_names)) +static void check_map_id(int inner_map_fd, int map_in_map_fd, uint32_t key) +{ + struct bpf_map_info info = {}; + uint32_t info_len = sizeof(info); + int ret, id; + + ret = bpf_obj_get_info_by_fd(inner_map_fd, &info, &info_len); + assert(!ret); + + ret = bpf_map_lookup_elem(map_in_map_fd, &key, &id); + assert(!ret); + assert(id == info.id); +} + static void populate_map(uint32_t port_key, int magic_result) { int ret; @@ -45,12 +59,15 @@ static void populate_map(uint32_t port_key, int magic_result) ret = bpf_map_update_elem(A_OF_PORT_A, &port_key, &PORT_A, BPF_ANY); assert(!ret); + check_map_id(PORT_A, A_OF_PORT_A, port_key); ret = bpf_map_update_elem(H_OF_PORT_A, &port_key, &PORT_A, BPF_NOEXIST); assert(!ret); + check_map_id(PORT_A, H_OF_PORT_A, port_key); ret = bpf_map_update_elem(H_OF_PORT_H, &port_key, &PORT_H, BPF_NOEXIST); assert(!ret); + check_map_id(PORT_H, H_OF_PORT_H, port_key); } static void test_map_in_map(void) diff --git a/samples/bpf/test_probe_write_user_user.c b/samples/bpf/test_probe_write_user_user.c index b5bf178a6ecc..bf8e3a9f3067 100644 --- a/samples/bpf/test_probe_write_user_user.c +++ b/samples/bpf/test_probe_write_user_user.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0 #include <stdio.h> #include <assert.h> #include <linux/bpf.h> diff --git a/samples/bpf/test_tunnel_bpf.sh b/samples/bpf/test_tunnel_bpf.sh index 1ff634f187b7..312e1722a39f 100755 --- a/samples/bpf/test_tunnel_bpf.sh +++ b/samples/bpf/test_tunnel_bpf.sh @@ -1,4 +1,5 @@ #!/bin/bash +# SPDX-License-Identifier: GPL-2.0 # In Namespace 0 (at_ns0) using native tunnel # Overlay IP: 10.1.1.100 # local 192.16.1.100 remote 192.16.1.200 @@ -32,6 +33,19 @@ function add_gre_tunnel { ip addr add dev $DEV 10.1.1.200/24 } +function add_erspan_tunnel { + # in namespace + ip netns exec at_ns0 \ + ip link add dev $DEV_NS type $TYPE seq key 2 local 172.16.1.100 remote 172.16.1.200 erspan 123 + ip netns exec at_ns0 ip link set dev $DEV_NS up + ip netns exec at_ns0 ip addr add dev $DEV_NS 10.1.1.100/24 + + # out of namespace + ip link add dev $DEV type $TYPE external + ip link set dev $DEV up + ip addr add dev $DEV 10.1.1.200/24 +} + function add_vxlan_tunnel { # Set static ARP entry here because iptables set-mark works # on L3 packet, as a result not applying to ARP packets, @@ -99,6 +113,18 @@ function test_gre { cleanup } +function test_erspan { + TYPE=erspan + DEV_NS=erspan00 + DEV=erspan11 + config_device + add_erspan_tunnel + attach_bpf $DEV erspan_set_tunnel erspan_get_tunnel + ping -c 1 10.1.1.100 + ip netns exec at_ns0 ping -c 1 10.1.1.200 + cleanup +} + function test_vxlan { TYPE=vxlan DEV_NS=vxlan00 @@ -149,15 +175,20 @@ function cleanup { ip link del veth1 ip link del ipip11 ip link del gretap11 + ip link del vxlan11 ip link del geneve11 + ip link del erspan11 pkill tcpdump pkill cat set -ex } +trap cleanup 0 2 3 6 9 cleanup echo "Testing GRE tunnel..." test_gre +echo "Testing ERSPAN tunnel..." +test_erspan echo "Testing VXLAN tunnel..." test_vxlan echo "Testing GENEVE tunnel..." diff --git a/samples/bpf/trace_event_user.c b/samples/bpf/trace_event_user.c index fa4336423da5..7bd827b84a67 100644 --- a/samples/bpf/trace_event_user.c +++ b/samples/bpf/trace_event_user.c @@ -75,7 +75,10 @@ static void print_stack(struct key_t *key, __u64 count) for (i = PERF_MAX_STACK_DEPTH - 1; i >= 0; i--) print_addr(ip[i]); } - printf("\n"); + if (count < 6) + printf("\r"); + else + printf("\n"); if (key->kernstack == -EEXIST && !warned) { printf("stackmap collisions seen. Consider increasing size\n"); @@ -105,7 +108,7 @@ static void print_stacks(void) bpf_map_delete_elem(fd, &next_key); key = next_key; } - + printf("\n"); if (!sys_read_seen || !sys_write_seen) { printf("BUG kernel stack doesn't contain sys_read() and sys_write()\n"); int_exit(0); @@ -122,24 +125,29 @@ static void test_perf_event_all_cpu(struct perf_event_attr *attr) { int nr_cpus = sysconf(_SC_NPROCESSORS_CONF); int *pmu_fd = malloc(nr_cpus * sizeof(int)); - int i; + int i, error = 0; /* open perf_event on all cpus */ for (i = 0; i < nr_cpus; i++) { pmu_fd[i] = sys_perf_event_open(attr, -1, i, -1, 0); if (pmu_fd[i] < 0) { printf("sys_perf_event_open failed\n"); + error = 1; goto all_cpu_err; } assert(ioctl(pmu_fd[i], PERF_EVENT_IOC_SET_BPF, prog_fd[0]) == 0); - assert(ioctl(pmu_fd[i], PERF_EVENT_IOC_ENABLE, 0) == 0); + assert(ioctl(pmu_fd[i], PERF_EVENT_IOC_ENABLE) == 0); } - system("dd if=/dev/zero of=/dev/null count=5000k"); + system("dd if=/dev/zero of=/dev/null count=5000k status=none"); print_stacks(); all_cpu_err: - for (i--; i >= 0; i--) + for (i--; i >= 0; i--) { + ioctl(pmu_fd[i], PERF_EVENT_IOC_DISABLE); close(pmu_fd[i]); + } free(pmu_fd); + if (error) + int_exit(0); } static void test_perf_event_task(struct perf_event_attr *attr) @@ -150,12 +158,13 @@ static void test_perf_event_task(struct perf_event_attr *attr) pmu_fd = sys_perf_event_open(attr, 0, -1, -1, 0); if (pmu_fd < 0) { printf("sys_perf_event_open failed\n"); - return; + int_exit(0); } assert(ioctl(pmu_fd, PERF_EVENT_IOC_SET_BPF, prog_fd[0]) == 0); - assert(ioctl(pmu_fd, PERF_EVENT_IOC_ENABLE, 0) == 0); - system("dd if=/dev/zero of=/dev/null count=5000k"); + assert(ioctl(pmu_fd, PERF_EVENT_IOC_ENABLE) == 0); + system("dd if=/dev/zero of=/dev/null count=5000k status=none"); print_stacks(); + ioctl(pmu_fd, PERF_EVENT_IOC_DISABLE); close(pmu_fd); } @@ -175,11 +184,56 @@ static void test_bpf_perf_event(void) .config = PERF_COUNT_SW_CPU_CLOCK, .inherit = 1, }; + struct perf_event_attr attr_hw_cache_l1d = { + .sample_freq = SAMPLE_FREQ, + .freq = 1, + .type = PERF_TYPE_HW_CACHE, + .config = + PERF_COUNT_HW_CACHE_L1D | + (PERF_COUNT_HW_CACHE_OP_READ << 8) | + (PERF_COUNT_HW_CACHE_RESULT_ACCESS << 16), + .inherit = 1, + }; + struct perf_event_attr attr_hw_cache_branch_miss = { + .sample_freq = SAMPLE_FREQ, + .freq = 1, + .type = PERF_TYPE_HW_CACHE, + .config = + PERF_COUNT_HW_CACHE_BPU | + (PERF_COUNT_HW_CACHE_OP_READ << 8) | + (PERF_COUNT_HW_CACHE_RESULT_MISS << 16), + .inherit = 1, + }; + struct perf_event_attr attr_type_raw = { + .sample_freq = SAMPLE_FREQ, + .freq = 1, + .type = PERF_TYPE_RAW, + /* Intel Instruction Retired */ + .config = 0xc0, + .inherit = 1, + }; + printf("Test HW_CPU_CYCLES\n"); test_perf_event_all_cpu(&attr_type_hw); test_perf_event_task(&attr_type_hw); + + printf("Test SW_CPU_CLOCK\n"); test_perf_event_all_cpu(&attr_type_sw); test_perf_event_task(&attr_type_sw); + + printf("Test HW_CACHE_L1D\n"); + test_perf_event_all_cpu(&attr_hw_cache_l1d); + test_perf_event_task(&attr_hw_cache_l1d); + + printf("Test HW_CACHE_BPU\n"); + test_perf_event_all_cpu(&attr_hw_cache_branch_miss); + test_perf_event_task(&attr_hw_cache_branch_miss); + + printf("Test Instruction Retired\n"); + test_perf_event_all_cpu(&attr_type_raw); + test_perf_event_task(&attr_type_raw); + + printf("*** PASS ***\n"); } @@ -209,7 +263,6 @@ int main(int argc, char **argv) return 0; } test_bpf_perf_event(); - int_exit(0); return 0; } diff --git a/samples/bpf/tracex1_user.c b/samples/bpf/tracex1_user.c index 31a48183beea..3dcb475fb135 100644 --- a/samples/bpf/tracex1_user.c +++ b/samples/bpf/tracex1_user.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0 #include <stdio.h> #include <linux/bpf.h> #include <unistd.h> diff --git a/samples/bpf/tracex2_user.c b/samples/bpf/tracex2_user.c index 7321a3f253c9..efb5e61918df 100644 --- a/samples/bpf/tracex2_user.c +++ b/samples/bpf/tracex2_user.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0 #include <stdio.h> #include <unistd.h> #include <stdlib.h> diff --git a/samples/bpf/tracex5_kern.c b/samples/bpf/tracex5_kern.c index 7e4cf74553ff..f57f4e1ea1ec 100644 --- a/samples/bpf/tracex5_kern.c +++ b/samples/bpf/tracex5_kern.c @@ -9,6 +9,7 @@ #include <uapi/linux/bpf.h> #include <uapi/linux/seccomp.h> #include <uapi/linux/unistd.h> +#include "syscall_nrs.h" #include "bpf_helpers.h" #define PROG(F) SEC("kprobe/"__stringify(F)) int bpf_func_##F @@ -17,7 +18,11 @@ struct bpf_map_def SEC("maps") progs = { .type = BPF_MAP_TYPE_PROG_ARRAY, .key_size = sizeof(u32), .value_size = sizeof(u32), +#ifdef __mips__ + .max_entries = 6000, /* MIPS n64 syscalls start at 5000 */ +#else .max_entries = 1024, +#endif }; SEC("kprobe/__seccomp_filter") @@ -37,7 +42,7 @@ int bpf_prog1(struct pt_regs *ctx) } /* we jump here when syscall number == __NR_write */ -PROG(__NR_write)(struct pt_regs *ctx) +PROG(SYS__NR_write)(struct pt_regs *ctx) { struct seccomp_data sd; @@ -50,7 +55,7 @@ PROG(__NR_write)(struct pt_regs *ctx) return 0; } -PROG(__NR_read)(struct pt_regs *ctx) +PROG(SYS__NR_read)(struct pt_regs *ctx) { struct seccomp_data sd; @@ -63,7 +68,7 @@ PROG(__NR_read)(struct pt_regs *ctx) return 0; } -PROG(__NR_mmap)(struct pt_regs *ctx) +PROG(SYS__NR_mmap)(struct pt_regs *ctx) { char fmt[] = "mmap\n"; bpf_trace_printk(fmt, sizeof(fmt)); diff --git a/samples/bpf/tracex5_user.c b/samples/bpf/tracex5_user.c index 36b5925bb137..4e2774b731f0 100644 --- a/samples/bpf/tracex5_user.c +++ b/samples/bpf/tracex5_user.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0 #include <stdio.h> #include <linux/bpf.h> #include <unistd.h> diff --git a/samples/bpf/tracex6_kern.c b/samples/bpf/tracex6_kern.c index be479c4af9e2..e7d180305974 100644 --- a/samples/bpf/tracex6_kern.c +++ b/samples/bpf/tracex6_kern.c @@ -3,22 +3,36 @@ #include <uapi/linux/bpf.h> #include "bpf_helpers.h" -struct bpf_map_def SEC("maps") my_map = { +struct bpf_map_def SEC("maps") counters = { .type = BPF_MAP_TYPE_PERF_EVENT_ARRAY, .key_size = sizeof(int), .value_size = sizeof(u32), - .max_entries = 32, + .max_entries = 64, +}; +struct bpf_map_def SEC("maps") values = { + .type = BPF_MAP_TYPE_HASH, + .key_size = sizeof(int), + .value_size = sizeof(u64), + .max_entries = 64, }; -SEC("kprobe/sys_write") +SEC("kprobe/htab_map_get_next_key") int bpf_prog1(struct pt_regs *ctx) { - u64 count; u32 key = bpf_get_smp_processor_id(); - char fmt[] = "CPU-%d %llu\n"; + u64 count, *val; + s64 error; + + count = bpf_perf_event_read(&counters, key); + error = (s64)count; + if (error <= -2 && error >= -22) + return 0; - count = bpf_perf_event_read(&my_map, key); - bpf_trace_printk(fmt, sizeof(fmt), key, count); + val = bpf_map_lookup_elem(&values, &key); + if (val) + *val = count; + else + bpf_map_update_elem(&values, &key, &count, BPF_NOEXIST); return 0; } diff --git a/samples/bpf/tracex6_user.c b/samples/bpf/tracex6_user.c index ca7874ed77f4..a8c22dcf8e4e 100644 --- a/samples/bpf/tracex6_user.c +++ b/samples/bpf/tracex6_user.c @@ -1,73 +1,178 @@ -#include <stdio.h> -#include <unistd.h> -#include <stdlib.h> -#include <stdbool.h> -#include <string.h> +// SPDX-License-Identifier: GPL-2.0 +#define _GNU_SOURCE + +#include <assert.h> #include <fcntl.h> -#include <poll.h> -#include <sys/ioctl.h> #include <linux/perf_event.h> #include <linux/bpf.h> -#include "libbpf.h" +#include <sched.h> +#include <stdio.h> +#include <stdlib.h> +#include <sys/ioctl.h> +#include <sys/resource.h> +#include <sys/time.h> +#include <sys/types.h> +#include <sys/wait.h> +#include <unistd.h> + #include "bpf_load.h" +#include "libbpf.h" #include "perf-sys.h" #define SAMPLE_PERIOD 0x7fffffffffffffffULL -static void test_bpf_perf_event(void) +static void check_on_cpu(int cpu, struct perf_event_attr *attr) { - int nr_cpus = sysconf(_SC_NPROCESSORS_CONF); - int *pmu_fd = malloc(nr_cpus * sizeof(int)); - int status, i; + int pmu_fd, error = 0; + cpu_set_t set; + __u64 value; - struct perf_event_attr attr_insn_pmu = { + /* Move to target CPU */ + CPU_ZERO(&set); + CPU_SET(cpu, &set); + assert(sched_setaffinity(0, sizeof(set), &set) == 0); + /* Open perf event and attach to the perf_event_array */ + pmu_fd = sys_perf_event_open(attr, -1/*pid*/, cpu/*cpu*/, -1/*group_fd*/, 0); + if (pmu_fd < 0) { + fprintf(stderr, "sys_perf_event_open failed on CPU %d\n", cpu); + error = 1; + goto on_exit; + } + assert(bpf_map_update_elem(map_fd[0], &cpu, &pmu_fd, BPF_ANY) == 0); + assert(ioctl(pmu_fd, PERF_EVENT_IOC_ENABLE, 0) == 0); + /* Trigger the kprobe */ + bpf_map_get_next_key(map_fd[1], &cpu, NULL); + /* Check the value */ + if (bpf_map_lookup_elem(map_fd[1], &cpu, &value)) { + fprintf(stderr, "Value missing for CPU %d\n", cpu); + error = 1; + goto on_exit; + } + fprintf(stderr, "CPU %d: %llu\n", cpu, value); + +on_exit: + assert(bpf_map_delete_elem(map_fd[0], &cpu) == 0 || error); + assert(ioctl(pmu_fd, PERF_EVENT_IOC_DISABLE, 0) == 0 || error); + assert(close(pmu_fd) == 0 || error); + assert(bpf_map_delete_elem(map_fd[1], &cpu) == 0 || error); + exit(error); +} + +static void test_perf_event_array(struct perf_event_attr *attr, + const char *name) +{ + int i, status, nr_cpus = sysconf(_SC_NPROCESSORS_CONF); + pid_t pid[nr_cpus]; + int err = 0; + + printf("Test reading %s counters\n", name); + + for (i = 0; i < nr_cpus; i++) { + pid[i] = fork(); + assert(pid[i] >= 0); + if (pid[i] == 0) { + check_on_cpu(i, attr); + exit(1); + } + } + + for (i = 0; i < nr_cpus; i++) { + assert(waitpid(pid[i], &status, 0) == pid[i]); + err |= status; + } + + if (err) + printf("Test: %s FAILED\n", name); +} + +static void test_bpf_perf_event(void) +{ + struct perf_event_attr attr_cycles = { .freq = 0, .sample_period = SAMPLE_PERIOD, .inherit = 0, .type = PERF_TYPE_HARDWARE, .read_format = 0, .sample_type = 0, - .config = 0,/* PMU: cycles */ + .config = PERF_COUNT_HW_CPU_CYCLES, + }; + struct perf_event_attr attr_clock = { + .freq = 0, + .sample_period = SAMPLE_PERIOD, + .inherit = 0, + .type = PERF_TYPE_SOFTWARE, + .read_format = 0, + .sample_type = 0, + .config = PERF_COUNT_SW_CPU_CLOCK, + }; + struct perf_event_attr attr_raw = { + .freq = 0, + .sample_period = SAMPLE_PERIOD, + .inherit = 0, + .type = PERF_TYPE_RAW, + .read_format = 0, + .sample_type = 0, + /* Intel Instruction Retired */ + .config = 0xc0, + }; + struct perf_event_attr attr_l1d_load = { + .freq = 0, + .sample_period = SAMPLE_PERIOD, + .inherit = 0, + .type = PERF_TYPE_HW_CACHE, + .read_format = 0, + .sample_type = 0, + .config = + PERF_COUNT_HW_CACHE_L1D | + (PERF_COUNT_HW_CACHE_OP_READ << 8) | + (PERF_COUNT_HW_CACHE_RESULT_ACCESS << 16), + }; + struct perf_event_attr attr_llc_miss = { + .freq = 0, + .sample_period = SAMPLE_PERIOD, + .inherit = 0, + .type = PERF_TYPE_HW_CACHE, + .read_format = 0, + .sample_type = 0, + .config = + PERF_COUNT_HW_CACHE_LL | + (PERF_COUNT_HW_CACHE_OP_READ << 8) | + (PERF_COUNT_HW_CACHE_RESULT_MISS << 16), + }; + struct perf_event_attr attr_msr_tsc = { + .freq = 0, + .sample_period = 0, + .inherit = 0, + /* From /sys/bus/event_source/devices/msr/ */ + .type = 7, + .read_format = 0, + .sample_type = 0, + .config = 0, }; - for (i = 0; i < nr_cpus; i++) { - pmu_fd[i] = sys_perf_event_open(&attr_insn_pmu, -1/*pid*/, i/*cpu*/, -1/*group_fd*/, 0); - if (pmu_fd[i] < 0) { - printf("event syscall failed\n"); - goto exit; - } - - bpf_map_update_elem(map_fd[0], &i, &pmu_fd[i], BPF_ANY); - ioctl(pmu_fd[i], PERF_EVENT_IOC_ENABLE, 0); - } + test_perf_event_array(&attr_cycles, "HARDWARE-cycles"); + test_perf_event_array(&attr_clock, "SOFTWARE-clock"); + test_perf_event_array(&attr_raw, "RAW-instruction-retired"); + test_perf_event_array(&attr_l1d_load, "HW_CACHE-L1D-load"); - status = system("ls > /dev/null"); - if (status) - goto exit; - status = system("sleep 2"); - if (status) - goto exit; - -exit: - for (i = 0; i < nr_cpus; i++) - close(pmu_fd[i]); - close(map_fd[0]); - free(pmu_fd); + /* below tests may fail in qemu */ + test_perf_event_array(&attr_llc_miss, "HW_CACHE-LLC-miss"); + test_perf_event_array(&attr_msr_tsc, "Dynamic-msr-tsc"); } int main(int argc, char **argv) { + struct rlimit r = {RLIM_INFINITY, RLIM_INFINITY}; char filename[256]; snprintf(filename, sizeof(filename), "%s_kern.o", argv[0]); + setrlimit(RLIMIT_MEMLOCK, &r); if (load_bpf_file(filename)) { printf("%s", bpf_log_buf); return 1; } test_bpf_perf_event(); - read_trace_pipe(); - return 0; } diff --git a/samples/bpf/xdp_monitor_kern.c b/samples/bpf/xdp_monitor_kern.c new file mode 100644 index 000000000000..74f3fd8ed729 --- /dev/null +++ b/samples/bpf/xdp_monitor_kern.c @@ -0,0 +1,88 @@ +/* XDP monitor tool, based on tracepoints + * + * Copyright(c) 2017 Jesper Dangaard Brouer, Red Hat Inc. + */ +#include <uapi/linux/bpf.h> +#include "bpf_helpers.h" + +struct bpf_map_def SEC("maps") redirect_err_cnt = { + .type = BPF_MAP_TYPE_PERCPU_ARRAY, + .key_size = sizeof(u32), + .value_size = sizeof(u64), + .max_entries = 2, + /* TODO: have entries for all possible errno's */ +}; + +/* Tracepoint format: /sys/kernel/debug/tracing/events/xdp/xdp_redirect/format + * Code in: kernel/include/trace/events/xdp.h + */ +struct xdp_redirect_ctx { + unsigned short common_type; // offset:0; size:2; signed:0; + unsigned char common_flags; // offset:2; size:1; signed:0; + unsigned char common_preempt_count;// offset:3; size:1; signed:0; + int common_pid; // offset:4; size:4; signed:1; + + int prog_id; // offset:8; size:4; signed:1; + u32 act; // offset:12 size:4; signed:0; + int ifindex; // offset:16 size:4; signed:1; + int err; // offset:20 size:4; signed:1; + int to_ifindex; // offset:24 size:4; signed:1; + u32 map_id; // offset:28 size:4; signed:0; + int map_index; // offset:32 size:4; signed:1; +}; // offset:36 + +enum { + XDP_REDIRECT_SUCCESS = 0, + XDP_REDIRECT_ERROR = 1 +}; + +static __always_inline +int xdp_redirect_collect_stat(struct xdp_redirect_ctx *ctx) +{ + u32 key = XDP_REDIRECT_ERROR; + int err = ctx->err; + u64 *cnt; + + if (!err) + key = XDP_REDIRECT_SUCCESS; + + cnt = bpf_map_lookup_elem(&redirect_err_cnt, &key); + if (!cnt) + return 0; + *cnt += 1; + + return 0; /* Indicate event was filtered (no further processing)*/ + /* + * Returning 1 here would allow e.g. a perf-record tracepoint + * to see and record these events, but it doesn't work well + * in-practice as stopping perf-record also unload this + * bpf_prog. Plus, there is additional overhead of doing so. + */ +} + +SEC("tracepoint/xdp/xdp_redirect_err") +int trace_xdp_redirect_err(struct xdp_redirect_ctx *ctx) +{ + return xdp_redirect_collect_stat(ctx); +} + + +SEC("tracepoint/xdp/xdp_redirect_map_err") +int trace_xdp_redirect_map_err(struct xdp_redirect_ctx *ctx) +{ + return xdp_redirect_collect_stat(ctx); +} + +/* Likely unloaded when prog starts */ +SEC("tracepoint/xdp/xdp_redirect") +int trace_xdp_redirect(struct xdp_redirect_ctx *ctx) +{ + return xdp_redirect_collect_stat(ctx); +} + +/* Likely unloaded when prog starts */ +SEC("tracepoint/xdp/xdp_redirect_map") +int trace_xdp_redirect_map(struct xdp_redirect_ctx *ctx) +{ + return xdp_redirect_collect_stat(ctx); +} diff --git a/samples/bpf/xdp_monitor_user.c b/samples/bpf/xdp_monitor_user.c new file mode 100644 index 000000000000..b51b4f5e3257 --- /dev/null +++ b/samples/bpf/xdp_monitor_user.c @@ -0,0 +1,295 @@ +/* Copyright(c) 2017 Jesper Dangaard Brouer, Red Hat, Inc. + */ +static const char *__doc__= + "XDP monitor tool, based on tracepoints\n" +; + +static const char *__doc_err_only__= + " NOTICE: Only tracking XDP redirect errors\n" + " Enable TX success stats via '--stats'\n" + " (which comes with a per packet processing overhead)\n" +; + +#include <errno.h> +#include <stdio.h> +#include <stdlib.h> +#include <stdbool.h> +#include <stdint.h> +#include <string.h> +#include <ctype.h> +#include <unistd.h> +#include <locale.h> + +#include <getopt.h> +#include <net/if.h> +#include <time.h> + +#include "libbpf.h" +#include "bpf_load.h" +#include "bpf_util.h" + +static int verbose = 1; +static bool debug = false; + +static const struct option long_options[] = { + {"help", no_argument, NULL, 'h' }, + {"debug", no_argument, NULL, 'D' }, + {"stats", no_argument, NULL, 'S' }, + {"sec", required_argument, NULL, 's' }, + {0, 0, NULL, 0 } +}; + +static void usage(char *argv[]) +{ + int i; + printf("\nDOCUMENTATION:\n%s\n", __doc__); + printf("\n"); + printf(" Usage: %s (options-see-below)\n", + argv[0]); + printf(" Listing options:\n"); + for (i = 0; long_options[i].name != 0; i++) { + printf(" --%-15s", long_options[i].name); + if (long_options[i].flag != NULL) + printf(" flag (internal value:%d)", + *long_options[i].flag); + else + printf("(internal short-option: -%c)", + long_options[i].val); + printf("\n"); + } + printf("\n"); +} + +#define NANOSEC_PER_SEC 1000000000 /* 10^9 */ +__u64 gettime(void) +{ + struct timespec t; + int res; + + res = clock_gettime(CLOCK_MONOTONIC, &t); + if (res < 0) { + fprintf(stderr, "Error with gettimeofday! (%i)\n", res); + exit(EXIT_FAILURE); + } + return (__u64) t.tv_sec * NANOSEC_PER_SEC + t.tv_nsec; +} + +enum { + REDIR_SUCCESS = 0, + REDIR_ERROR = 1, +}; +#define REDIR_RES_MAX 2 +static const char *redir_names[REDIR_RES_MAX] = { + [REDIR_SUCCESS] = "Success", + [REDIR_ERROR] = "Error", +}; +static const char *err2str(int err) +{ + if (err < REDIR_RES_MAX) + return redir_names[err]; + return NULL; +} + +struct record { + __u64 counter; + __u64 timestamp; +}; + +struct stats_record { + struct record xdp_redir[REDIR_RES_MAX]; +}; + +static void stats_print_headers(bool err_only) +{ + if (err_only) + printf("\n%s\n", __doc_err_only__); + + printf("%-14s %-10s %-18s %-9s\n", + "XDP_REDIRECT", "pps ", "pps-human-readable", "measure-period"); +} + +static void stats_print(struct stats_record *rec, + struct stats_record *prev, + bool err_only) +{ + int i = 0; + + if (err_only) + i = REDIR_ERROR; + + for (; i < REDIR_RES_MAX; i++) { + struct record *r = &rec->xdp_redir[i]; + struct record *p = &prev->xdp_redir[i]; + __u64 period = 0; + __u64 packets = 0; + double pps = 0; + double period_ = 0; + + if (p->timestamp) { + packets = r->counter - p->counter; + period = r->timestamp - p->timestamp; + if (period > 0) { + period_ = ((double) period / NANOSEC_PER_SEC); + pps = packets / period_; + } + } + + printf("%-14s %-10.0f %'-18.0f %f\n", + err2str(i), pps, pps, period_); + } +} + +static __u64 get_key32_value64_percpu(int fd, __u32 key) +{ + /* For percpu maps, userspace gets a value per possible CPU */ + unsigned int nr_cpus = bpf_num_possible_cpus(); + __u64 values[nr_cpus]; + __u64 sum = 0; + int i; + + if ((bpf_map_lookup_elem(fd, &key, values)) != 0) { + fprintf(stderr, + "ERR: bpf_map_lookup_elem failed key:0x%X\n", key); + return 0; + } + + /* Sum values from each CPU */ + for (i = 0; i < nr_cpus; i++) { + sum += values[i]; + } + return sum; +} + +static bool stats_collect(int fd, struct stats_record *rec) +{ + int i; + + /* TODO: Detect if someone unloaded the perf event_fd's, as + * this can happen by someone running perf-record -e + */ + + for (i = 0; i < REDIR_RES_MAX; i++) { + rec->xdp_redir[i].timestamp = gettime(); + rec->xdp_redir[i].counter = get_key32_value64_percpu(fd, i); + } + return true; +} + +static void stats_poll(int interval, bool err_only) +{ + struct stats_record rec, prev; + int map_fd; + + memset(&rec, 0, sizeof(rec)); + + /* Trick to pretty printf with thousands separators use %' */ + setlocale(LC_NUMERIC, "en_US"); + + /* Header */ + if (verbose) + printf("\n%s", __doc__); + + /* TODO Need more advanced stats on error types */ + if (verbose) + printf(" - Stats map: %s\n", map_data[0].name); + map_fd = map_data[0].fd; + + stats_print_headers(err_only); + fflush(stdout); + + while (1) { + memcpy(&prev, &rec, sizeof(rec)); + stats_collect(map_fd, &rec); + stats_print(&rec, &prev, err_only); + fflush(stdout); + sleep(interval); + } +} + +void print_bpf_prog_info(void) +{ + int i; + + /* Prog info */ + printf("Loaded BPF prog have %d bpf program(s)\n", prog_cnt); + for (i = 0; i < prog_cnt; i++) { + printf(" - prog_fd[%d] = fd(%d)\n", i, prog_fd[i]); + } + + /* Maps info */ + printf("Loaded BPF prog have %d map(s)\n", map_data_count); + for (i = 0; i < map_data_count; i++) { + char *name = map_data[i].name; + int fd = map_data[i].fd; + + printf(" - map_data[%d] = fd(%d) name:%s\n", i, fd, name); + } + + /* Event info */ + printf("Searching for (max:%d) event file descriptor(s)\n", prog_cnt); + for (i = 0; i < prog_cnt; i++) { + if (event_fd[i] != -1) + printf(" - event_fd[%d] = fd(%d)\n", i, event_fd[i]); + } +} + +int main(int argc, char **argv) +{ + int longindex = 0, opt; + int ret = EXIT_SUCCESS; + char bpf_obj_file[256]; + + /* Default settings: */ + bool errors_only = true; + int interval = 2; + + snprintf(bpf_obj_file, sizeof(bpf_obj_file), "%s_kern.o", argv[0]); + + /* Parse commands line args */ + while ((opt = getopt_long(argc, argv, "h", + long_options, &longindex)) != -1) { + switch (opt) { + case 'D': + debug = true; + break; + case 'S': + errors_only = false; + break; + case 's': + interval = atoi(optarg); + break; + case 'h': + default: + usage(argv); + return EXIT_FAILURE; + } + } + + if (load_bpf_file(bpf_obj_file)) { + printf("ERROR - bpf_log_buf: %s", bpf_log_buf); + return 1; + } + if (!prog_fd[0]) { + printf("ERROR - load_bpf_file: %s\n", strerror(errno)); + return 1; + } + + if (debug) { + print_bpf_prog_info(); + } + + /* Unload/stop tracepoint event by closing fd's */ + if (errors_only) { + /* The prog_fd[i] and event_fd[i] depend on the + * order the functions was defined in _kern.c + */ + close(event_fd[2]); /* tracepoint/xdp/xdp_redirect */ + close(prog_fd[2]); /* func: trace_xdp_redirect */ + close(event_fd[3]); /* tracepoint/xdp/xdp_redirect_map */ + close(prog_fd[3]); /* func: trace_xdp_redirect_map */ + } + + stats_poll(interval, errors_only); + + return ret; +} diff --git a/samples/bpf/xdp_redirect_kern.c b/samples/bpf/xdp_redirect_kern.c new file mode 100644 index 000000000000..8abb151e385f --- /dev/null +++ b/samples/bpf/xdp_redirect_kern.c @@ -0,0 +1,90 @@ +/* Copyright (c) 2016 John Fastabend <john.r.fastabend@intel.com> + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of version 2 of the GNU General Public + * License as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + */ +#define KBUILD_MODNAME "foo" +#include <uapi/linux/bpf.h> +#include <linux/in.h> +#include <linux/if_ether.h> +#include <linux/if_packet.h> +#include <linux/if_vlan.h> +#include <linux/ip.h> +#include <linux/ipv6.h> +#include "bpf_helpers.h" + +struct bpf_map_def SEC("maps") tx_port = { + .type = BPF_MAP_TYPE_ARRAY, + .key_size = sizeof(int), + .value_size = sizeof(int), + .max_entries = 1, +}; + +/* Count RX packets, as XDP bpf_prog doesn't get direct TX-success + * feedback. Redirect TX errors can be caught via a tracepoint. + */ +struct bpf_map_def SEC("maps") rxcnt = { + .type = BPF_MAP_TYPE_PERCPU_ARRAY, + .key_size = sizeof(u32), + .value_size = sizeof(long), + .max_entries = 1, +}; + +static void swap_src_dst_mac(void *data) +{ + unsigned short *p = data; + unsigned short dst[3]; + + dst[0] = p[0]; + dst[1] = p[1]; + dst[2] = p[2]; + p[0] = p[3]; + p[1] = p[4]; + p[2] = p[5]; + p[3] = dst[0]; + p[4] = dst[1]; + p[5] = dst[2]; +} + +SEC("xdp_redirect") +int xdp_redirect_prog(struct xdp_md *ctx) +{ + void *data_end = (void *)(long)ctx->data_end; + void *data = (void *)(long)ctx->data; + struct ethhdr *eth = data; + int rc = XDP_DROP; + int *ifindex, port = 0; + long *value; + u32 key = 0; + u64 nh_off; + + nh_off = sizeof(*eth); + if (data + nh_off > data_end) + return rc; + + ifindex = bpf_map_lookup_elem(&tx_port, &port); + if (!ifindex) + return rc; + + value = bpf_map_lookup_elem(&rxcnt, &key); + if (value) + *value += 1; + + swap_src_dst_mac(data); + return bpf_redirect(*ifindex, 0); +} + +/* Redirect require an XDP bpf_prog loaded on the TX device */ +SEC("xdp_redirect_dummy") +int xdp_redirect_dummy_prog(struct xdp_md *ctx) +{ + return XDP_PASS; +} + +char _license[] SEC("license") = "GPL"; diff --git a/samples/bpf/xdp_redirect_map_kern.c b/samples/bpf/xdp_redirect_map_kern.c new file mode 100644 index 000000000000..740a529ba84f --- /dev/null +++ b/samples/bpf/xdp_redirect_map_kern.c @@ -0,0 +1,92 @@ +/* Copyright (c) 2017 Covalent IO, Inc. http://covalent.io + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of version 2 of the GNU General Public + * License as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + */ +#define KBUILD_MODNAME "foo" +#include <uapi/linux/bpf.h> +#include <linux/in.h> +#include <linux/if_ether.h> +#include <linux/if_packet.h> +#include <linux/if_vlan.h> +#include <linux/ip.h> +#include <linux/ipv6.h> +#include "bpf_helpers.h" + +struct bpf_map_def SEC("maps") tx_port = { + .type = BPF_MAP_TYPE_DEVMAP, + .key_size = sizeof(int), + .value_size = sizeof(int), + .max_entries = 100, +}; + +/* Count RX packets, as XDP bpf_prog doesn't get direct TX-success + * feedback. Redirect TX errors can be caught via a tracepoint. + */ +struct bpf_map_def SEC("maps") rxcnt = { + .type = BPF_MAP_TYPE_PERCPU_ARRAY, + .key_size = sizeof(u32), + .value_size = sizeof(long), + .max_entries = 1, +}; + +static void swap_src_dst_mac(void *data) +{ + unsigned short *p = data; + unsigned short dst[3]; + + dst[0] = p[0]; + dst[1] = p[1]; + dst[2] = p[2]; + p[0] = p[3]; + p[1] = p[4]; + p[2] = p[5]; + p[3] = dst[0]; + p[4] = dst[1]; + p[5] = dst[2]; +} + +SEC("xdp_redirect_map") +int xdp_redirect_map_prog(struct xdp_md *ctx) +{ + void *data_end = (void *)(long)ctx->data_end; + void *data = (void *)(long)ctx->data; + struct ethhdr *eth = data; + int rc = XDP_DROP; + int vport, port = 0, m = 0; + long *value; + u32 key = 0; + u64 nh_off; + + nh_off = sizeof(*eth); + if (data + nh_off > data_end) + return rc; + + /* constant virtual port */ + vport = 0; + + /* count packet in global counter */ + value = bpf_map_lookup_elem(&rxcnt, &key); + if (value) + *value += 1; + + swap_src_dst_mac(data); + + /* send packet out physical port */ + return bpf_redirect_map(&tx_port, vport, 0); +} + +/* Redirect require an XDP bpf_prog loaded on the TX device */ +SEC("xdp_redirect_dummy") +int xdp_redirect_dummy_prog(struct xdp_md *ctx) +{ + return XDP_PASS; +} + +char _license[] SEC("license") = "GPL"; diff --git a/samples/bpf/xdp_redirect_map_user.c b/samples/bpf/xdp_redirect_map_user.c new file mode 100644 index 000000000000..d4d86a273fba --- /dev/null +++ b/samples/bpf/xdp_redirect_map_user.c @@ -0,0 +1,145 @@ +/* Copyright (c) 2017 Covalent IO, Inc. http://covalent.io + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of version 2 of the GNU General Public + * License as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + */ +#include <linux/bpf.h> +#include <linux/if_link.h> +#include <assert.h> +#include <errno.h> +#include <signal.h> +#include <stdio.h> +#include <stdlib.h> +#include <stdbool.h> +#include <string.h> +#include <unistd.h> +#include <libgen.h> + +#include "bpf_load.h" +#include "bpf_util.h" +#include "libbpf.h" + +static int ifindex_in; +static int ifindex_out; +static bool ifindex_out_xdp_dummy_attached = true; + +static __u32 xdp_flags; + +static void int_exit(int sig) +{ + set_link_xdp_fd(ifindex_in, -1, xdp_flags); + if (ifindex_out_xdp_dummy_attached) + set_link_xdp_fd(ifindex_out, -1, xdp_flags); + exit(0); +} + +static void poll_stats(int interval, int ifindex) +{ + unsigned int nr_cpus = bpf_num_possible_cpus(); + __u64 values[nr_cpus], prev[nr_cpus]; + + memset(prev, 0, sizeof(prev)); + + while (1) { + __u64 sum = 0; + __u32 key = 0; + int i; + + sleep(interval); + assert(bpf_map_lookup_elem(map_fd[1], &key, values) == 0); + for (i = 0; i < nr_cpus; i++) + sum += (values[i] - prev[i]); + if (sum) + printf("ifindex %i: %10llu pkt/s\n", + ifindex, sum / interval); + memcpy(prev, values, sizeof(values)); + } +} + +static void usage(const char *prog) +{ + fprintf(stderr, + "usage: %s [OPTS] IFINDEX_IN IFINDEX_OUT\n\n" + "OPTS:\n" + " -S use skb-mode\n" + " -N enforce native mode\n", + prog); +} + +int main(int argc, char **argv) +{ + const char *optstr = "SN"; + char filename[256]; + int ret, opt, key = 0; + + while ((opt = getopt(argc, argv, optstr)) != -1) { + switch (opt) { + case 'S': + xdp_flags |= XDP_FLAGS_SKB_MODE; + break; + case 'N': + xdp_flags |= XDP_FLAGS_DRV_MODE; + break; + default: + usage(basename(argv[0])); + return 1; + } + } + + if (optind == argc) { + printf("usage: %s IFINDEX_IN IFINDEX_OUT\n", argv[0]); + return 1; + } + + ifindex_in = strtoul(argv[optind], NULL, 0); + ifindex_out = strtoul(argv[optind + 1], NULL, 0); + printf("input: %d output: %d\n", ifindex_in, ifindex_out); + + snprintf(filename, sizeof(filename), "%s_kern.o", argv[0]); + + if (load_bpf_file(filename)) { + printf("%s", bpf_log_buf); + return 1; + } + + if (!prog_fd[0]) { + printf("load_bpf_file: %s\n", strerror(errno)); + return 1; + } + + if (set_link_xdp_fd(ifindex_in, prog_fd[0], xdp_flags) < 0) { + printf("ERROR: link set xdp fd failed on %d\n", ifindex_in); + return 1; + } + + /* Loading dummy XDP prog on out-device */ + if (set_link_xdp_fd(ifindex_out, prog_fd[1], + (xdp_flags | XDP_FLAGS_UPDATE_IF_NOEXIST)) < 0) { + printf("WARN: link set xdp fd failed on %d\n", ifindex_out); + ifindex_out_xdp_dummy_attached = false; + } + + signal(SIGINT, int_exit); + signal(SIGTERM, int_exit); + + printf("map[0] (vports) = %i, map[1] (map) = %i, map[2] (count) = %i\n", + map_fd[0], map_fd[1], map_fd[2]); + + /* populate virtual to physical port map */ + ret = bpf_map_update_elem(map_fd[0], &key, &ifindex_out, 0); + if (ret) { + perror("bpf_update_elem"); + goto out; + } + + poll_stats(2, ifindex_out); + +out: + return 0; +} diff --git a/samples/bpf/xdp_redirect_user.c b/samples/bpf/xdp_redirect_user.c new file mode 100644 index 000000000000..4475d837bf2c --- /dev/null +++ b/samples/bpf/xdp_redirect_user.c @@ -0,0 +1,143 @@ +/* Copyright (c) 2016 John Fastabend <john.r.fastabend@intel.com> + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of version 2 of the GNU General Public + * License as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + */ +#include <linux/bpf.h> +#include <linux/if_link.h> +#include <assert.h> +#include <errno.h> +#include <signal.h> +#include <stdio.h> +#include <stdlib.h> +#include <stdbool.h> +#include <string.h> +#include <unistd.h> +#include <libgen.h> + +#include "bpf_load.h" +#include "bpf_util.h" +#include "libbpf.h" + +static int ifindex_in; +static int ifindex_out; +static bool ifindex_out_xdp_dummy_attached = true; + +static __u32 xdp_flags; + +static void int_exit(int sig) +{ + set_link_xdp_fd(ifindex_in, -1, xdp_flags); + if (ifindex_out_xdp_dummy_attached) + set_link_xdp_fd(ifindex_out, -1, xdp_flags); + exit(0); +} + +static void poll_stats(int interval, int ifindex) +{ + unsigned int nr_cpus = bpf_num_possible_cpus(); + __u64 values[nr_cpus], prev[nr_cpus]; + + memset(prev, 0, sizeof(prev)); + + while (1) { + __u64 sum = 0; + __u32 key = 0; + int i; + + sleep(interval); + assert(bpf_map_lookup_elem(map_fd[1], &key, values) == 0); + for (i = 0; i < nr_cpus; i++) + sum += (values[i] - prev[i]); + if (sum) + printf("ifindex %i: %10llu pkt/s\n", + ifindex, sum / interval); + memcpy(prev, values, sizeof(values)); + } +} + +static void usage(const char *prog) +{ + fprintf(stderr, + "usage: %s [OPTS] IFINDEX_IN IFINDEX_OUT\n\n" + "OPTS:\n" + " -S use skb-mode\n" + " -N enforce native mode\n", + prog); +} + + +int main(int argc, char **argv) +{ + const char *optstr = "SN"; + char filename[256]; + int ret, opt, key = 0; + + while ((opt = getopt(argc, argv, optstr)) != -1) { + switch (opt) { + case 'S': + xdp_flags |= XDP_FLAGS_SKB_MODE; + break; + case 'N': + xdp_flags |= XDP_FLAGS_DRV_MODE; + break; + default: + usage(basename(argv[0])); + return 1; + } + } + + if (optind == argc) { + printf("usage: %s IFINDEX_IN IFINDEX_OUT\n", argv[0]); + return 1; + } + + ifindex_in = strtoul(argv[optind], NULL, 0); + ifindex_out = strtoul(argv[optind + 1], NULL, 0); + printf("input: %d output: %d\n", ifindex_in, ifindex_out); + + snprintf(filename, sizeof(filename), "%s_kern.o", argv[0]); + + if (load_bpf_file(filename)) { + printf("%s", bpf_log_buf); + return 1; + } + + if (!prog_fd[0]) { + printf("load_bpf_file: %s\n", strerror(errno)); + return 1; + } + + if (set_link_xdp_fd(ifindex_in, prog_fd[0], xdp_flags) < 0) { + printf("ERROR: link set xdp fd failed on %d\n", ifindex_in); + return 1; + } + + /* Loading dummy XDP prog on out-device */ + if (set_link_xdp_fd(ifindex_out, prog_fd[1], + (xdp_flags | XDP_FLAGS_UPDATE_IF_NOEXIST)) < 0) { + printf("WARN: link set xdp fd failed on %d\n", ifindex_out); + ifindex_out_xdp_dummy_attached = false; + } + + signal(SIGINT, int_exit); + signal(SIGTERM, int_exit); + + /* bpf redirect port */ + ret = bpf_map_update_elem(map_fd[0], &key, &ifindex_out, 0); + if (ret) { + perror("bpf_update_elem"); + goto out; + } + + poll_stats(2, ifindex_out); + +out: + return 0; +} diff --git a/samples/configfs/configfs_sample.c b/samples/configfs/configfs_sample.c index 1ea33119e532..004a4e201476 100644 --- a/samples/configfs/configfs_sample.c +++ b/samples/configfs/configfs_sample.c @@ -115,7 +115,7 @@ static struct configfs_attribute *childless_attrs[] = { NULL, }; -static struct config_item_type childless_type = { +static const struct config_item_type childless_type = { .ct_attrs = childless_attrs, .ct_owner = THIS_MODULE, }; @@ -193,7 +193,7 @@ static struct configfs_item_operations simple_child_item_ops = { .release = simple_child_release, }; -static struct config_item_type simple_child_type = { +static const struct config_item_type simple_child_type = { .ct_item_ops = &simple_child_item_ops, .ct_attrs = simple_child_attrs, .ct_owner = THIS_MODULE, @@ -261,7 +261,7 @@ static struct configfs_group_operations simple_children_group_ops = { .make_item = simple_children_make_item, }; -static struct config_item_type simple_children_type = { +static const struct config_item_type simple_children_type = { .ct_item_ops = &simple_children_item_ops, .ct_group_ops = &simple_children_group_ops, .ct_attrs = simple_children_attrs, @@ -331,7 +331,7 @@ static struct configfs_group_operations group_children_group_ops = { .make_group = group_children_make_group, }; -static struct config_item_type group_children_type = { +static const struct config_item_type group_children_type = { .ct_group_ops = &group_children_group_ops, .ct_attrs = group_children_attrs, .ct_owner = THIS_MODULE, diff --git a/samples/connector/Makefile b/samples/connector/Makefile index 91762d946a53..fe3c8542ae4a 100644 --- a/samples/connector/Makefile +++ b/samples/connector/Makefile @@ -1,3 +1,4 @@ +# SPDX-License-Identifier: GPL-2.0 obj-$(CONFIG_SAMPLE_CONNECTOR) += cn_test.o # List of programs to build diff --git a/samples/connector/cn_test.c b/samples/connector/cn_test.c index d12cc944b696..95cd06f4ec1e 100644 --- a/samples/connector/cn_test.c +++ b/samples/connector/cn_test.c @@ -125,12 +125,12 @@ static int cn_test_want_notify(void) #endif static u32 cn_test_timer_counter; -static void cn_test_timer_func(unsigned long __data) +static void cn_test_timer_func(struct timer_list *unused) { struct cn_msg *m; char data[32]; - pr_debug("%s: timer fired with data %lu\n", __func__, __data); + pr_debug("%s: timer fired\n", __func__); m = kzalloc(sizeof(*m) + sizeof(data), GFP_ATOMIC); if (m) { @@ -168,7 +168,7 @@ static int cn_test_init(void) goto err_out; } - setup_timer(&cn_test_timer, cn_test_timer_func, 0); + timer_setup(&cn_test_timer, cn_test_timer_func, 0); mod_timer(&cn_test_timer, jiffies + msecs_to_jiffies(1000)); pr_info("initialized with id={%u.%u}\n", diff --git a/samples/hidraw/Makefile b/samples/hidraw/Makefile index a9ab96188fbe..f5c3012ffa79 100644 --- a/samples/hidraw/Makefile +++ b/samples/hidraw/Makefile @@ -1,3 +1,4 @@ +# SPDX-License-Identifier: GPL-2.0 # kbuild trick to avoid linker error. Can be omitted if a module is built. obj- := dummy.o diff --git a/samples/hidraw/hid-example.c b/samples/hidraw/hid-example.c index 92e6c1511910..9bfd8ff6de82 100644 --- a/samples/hidraw/hid-example.c +++ b/samples/hidraw/hid-example.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0 /* * Hidraw Userspace Example * diff --git a/samples/kfifo/dma-example.c b/samples/kfifo/dma-example.c index aa243db93f01..be0d4a5fdf53 100644 --- a/samples/kfifo/dma-example.c +++ b/samples/kfifo/dma-example.c @@ -75,8 +75,8 @@ static int __init example_init(void) for (i = 0; i < nents; i++) { printk(KERN_INFO "sg[%d] -> " - "page_link 0x%.8lx offset 0x%.8x length 0x%.8x\n", - i, sg[i].page_link, sg[i].offset, sg[i].length); + "page %p offset 0x%.8x length 0x%.8x\n", + i, sg_page(&sg[i]), sg[i].offset, sg[i].length); if (sg_is_last(&sg[i])) break; @@ -104,8 +104,8 @@ static int __init example_init(void) for (i = 0; i < nents; i++) { printk(KERN_INFO "sg[%d] -> " - "page_link 0x%.8lx offset 0x%.8x length 0x%.8x\n", - i, sg[i].page_link, sg[i].offset, sg[i].length); + "page %p offset 0x%.8x length 0x%.8x\n", + i, sg_page(&sg[i]), sg[i].offset, sg[i].length); if (sg_is_last(&sg[i])) break; diff --git a/samples/kprobes/Makefile b/samples/kprobes/Makefile index 68739bc4fc6a..880e54d2c082 100644 --- a/samples/kprobes/Makefile +++ b/samples/kprobes/Makefile @@ -1,5 +1,5 @@ # builds the kprobes example kernel modules; # then to use one (as root): insmod <module_name.ko> -obj-$(CONFIG_SAMPLE_KPROBES) += kprobe_example.o jprobe_example.o +obj-$(CONFIG_SAMPLE_KPROBES) += kprobe_example.o obj-$(CONFIG_SAMPLE_KRETPROBES) += kretprobe_example.o diff --git a/samples/kprobes/jprobe_example.c b/samples/kprobes/jprobe_example.c deleted file mode 100644 index e3c0a40909f7..000000000000 --- a/samples/kprobes/jprobe_example.c +++ /dev/null @@ -1,67 +0,0 @@ -/* - * Here's a sample kernel module showing the use of jprobes to dump - * the arguments of _do_fork(). - * - * For more information on theory of operation of jprobes, see - * Documentation/kprobes.txt - * - * Build and insert the kernel module as done in the kprobe example. - * You will see the trace data in /var/log/messages and on the - * console whenever _do_fork() is invoked to create a new process. - * (Some messages may be suppressed if syslogd is configured to - * eliminate duplicate messages.) - */ - -#include <linux/kernel.h> -#include <linux/module.h> -#include <linux/kprobes.h> - -/* - * Jumper probe for _do_fork. - * Mirror principle enables access to arguments of the probed routine - * from the probe handler. - */ - -/* Proxy routine having the same arguments as actual _do_fork() routine */ -static long j_do_fork(unsigned long clone_flags, unsigned long stack_start, - unsigned long stack_size, int __user *parent_tidptr, - int __user *child_tidptr, unsigned long tls) -{ - pr_info("jprobe: clone_flags = 0x%lx, stack_start = 0x%lx " - "stack_size = 0x%lx\n", clone_flags, stack_start, stack_size); - - /* Always end with a call to jprobe_return(). */ - jprobe_return(); - return 0; -} - -static struct jprobe my_jprobe = { - .entry = j_do_fork, - .kp = { - .symbol_name = "_do_fork", - }, -}; - -static int __init jprobe_init(void) -{ - int ret; - - ret = register_jprobe(&my_jprobe); - if (ret < 0) { - pr_err("register_jprobe failed, returned %d\n", ret); - return -1; - } - pr_info("Planted jprobe at %p, handler addr %p\n", - my_jprobe.kp.addr, my_jprobe.entry); - return 0; -} - -static void __exit jprobe_exit(void) -{ - unregister_jprobe(&my_jprobe); - pr_info("jprobe at %p unregistered\n", my_jprobe.kp.addr); -} - -module_init(jprobe_init) -module_exit(jprobe_exit) -MODULE_LICENSE("GPL"); diff --git a/samples/kprobes/kprobe_example.c b/samples/kprobes/kprobe_example.c index 88b3e2d227ae..67de3b774bc9 100644 --- a/samples/kprobes/kprobe_example.c +++ b/samples/kprobes/kprobe_example.c @@ -47,6 +47,10 @@ static int handler_pre(struct kprobe *p, struct pt_regs *regs) " pstate = 0x%lx\n", p->symbol_name, p->addr, (long)regs->pc, (long)regs->pstate); #endif +#ifdef CONFIG_S390 + pr_info("<%s> pre_handler: p->addr, 0x%p, ip = 0x%lx, flags = 0x%lx\n", + p->symbol_name, p->addr, regs->psw.addr, regs->flags); +#endif /* A dump_stack() here will give a stack backtrace */ return 0; @@ -76,6 +80,10 @@ static void handler_post(struct kprobe *p, struct pt_regs *regs, pr_info("<%s> post_handler: p->addr = 0x%p, pstate = 0x%lx\n", p->symbol_name, p->addr, (long)regs->pstate); #endif +#ifdef CONFIG_S390 + pr_info("<%s> pre_handler: p->addr, 0x%p, flags = 0x%lx\n", + p->symbol_name, p->addr, regs->flags); +#endif } /* diff --git a/samples/mei/Makefile b/samples/mei/Makefile index 7aac216dc420..c7e52e9e92ca 100644 --- a/samples/mei/Makefile +++ b/samples/mei/Makefile @@ -1,3 +1,4 @@ +# SPDX-License-Identifier: GPL-2.0 CC := $(CROSS_COMPILE)gcc CFLAGS := -I../../usr/include diff --git a/samples/mic/mpssd/Makefile b/samples/mic/mpssd/Makefile index 3e3ef91fed6b..a7a6e0c70424 100644 --- a/samples/mic/mpssd/Makefile +++ b/samples/mic/mpssd/Makefile @@ -1,3 +1,4 @@ +# SPDX-License-Identifier: GPL-2.0 ifndef CROSS_COMPILE uname_M := $(shell uname -m 2>/dev/null || echo not) ARCH ?= $(shell echo $(uname_M) | sed -e s/i.86/x86/ -e s/x86_64/x86/) diff --git a/samples/mic/mpssd/mpssd.c b/samples/mic/mpssd/mpssd.c index 49db1def1721..f42ce551bb48 100644 --- a/samples/mic/mpssd/mpssd.c +++ b/samples/mic/mpssd/mpssd.c @@ -65,7 +65,7 @@ static struct mic_info mic_list; /* to align the pointer to the (next) page boundary */ #define PAGE_ALIGN(addr) _ALIGN(addr, PAGE_SIZE) -#define ACCESS_ONCE(x) (*(volatile typeof(x) *)&(x)) +#define READ_ONCE(x) (*(volatile typeof(x) *)&(x)) #define GSO_ENABLED 1 #define MAX_GSO_SIZE (64 * 1024) @@ -382,7 +382,7 @@ disp_iovec(struct mic_info *mic, struct mic_copy_desc *copy, static inline __u16 read_avail_idx(struct mic_vring *vr) { - return ACCESS_ONCE(vr->info->avail_idx); + return READ_ONCE(vr->info->avail_idx); } static inline void txrx_prepare(int type, bool tx, struct mic_vring *vr, @@ -523,7 +523,7 @@ spin_for_descriptors(struct mic_info *mic, struct mic_vring *vr) { __u16 avail_idx = read_avail_idx(vr); - while (avail_idx == le16toh(ACCESS_ONCE(vr->vr.avail->idx))) { + while (avail_idx == le16toh(READ_ONCE(vr->vr.avail->idx))) { #ifdef DEBUG mpsslog("%s %s waiting for desc avail %d info_avail %d\n", mic->name, __func__, diff --git a/samples/pktgen/README.rst b/samples/pktgen/README.rst index 8365c4e5c513..ff8929da61c5 100644 --- a/samples/pktgen/README.rst +++ b/samples/pktgen/README.rst @@ -21,7 +21,9 @@ across the sample scripts. Usage example is printed on errors:: -d : ($DEST_IP) destination IP -m : ($DST_MAC) destination MAC-addr -t : ($THREADS) threads to start + -f : ($F_THREAD) index of first thread (zero indexed CPU number) -c : ($SKB_CLONE) SKB clones send before alloc new SKB + -n : ($COUNT) num messages to send per thread, 0 means indefinitely -b : ($BURST) HW level bursting of SKBs -v : ($VERBOSE) verbose -x : ($DEBUG) debug diff --git a/samples/pktgen/parameters.sh b/samples/pktgen/parameters.sh index f70ea7dd5660..72fc562876e2 100644 --- a/samples/pktgen/parameters.sh +++ b/samples/pktgen/parameters.sh @@ -1,4 +1,5 @@ # +# SPDX-License-Identifier: GPL-2.0 # Common parameter parsing for pktgen scripts # @@ -10,7 +11,9 @@ function usage() { echo " -d : (\$DEST_IP) destination IP" echo " -m : (\$DST_MAC) destination MAC-addr" echo " -t : (\$THREADS) threads to start" + echo " -f : (\$F_THREAD) index of first thread (zero indexed CPU number)" echo " -c : (\$SKB_CLONE) SKB clones send before alloc new SKB" + echo " -n : (\$COUNT) num messages to send per thread, 0 means indefinitely" echo " -b : (\$BURST) HW level bursting of SKBs" echo " -v : (\$VERBOSE) verbose" echo " -x : (\$DEBUG) debug" @@ -20,7 +23,7 @@ function usage() { ## --- Parse command line arguments / parameters --- ## echo "Commandline options:" -while getopts "s:i:d:m:t:c:b:vxh6" option; do +while getopts "s:i:d:m:f:t:c:n:b:vxh6" option; do case $option in i) # interface export DEV=$OPTARG @@ -38,16 +41,22 @@ while getopts "s:i:d:m:t:c:b:vxh6" option; do export DST_MAC=$OPTARG info "Destination MAC set to: DST_MAC=$DST_MAC" ;; + f) + export F_THREAD=$OPTARG + info "Index of first thread (zero indexed CPU number): $F_THREAD" + ;; t) export THREADS=$OPTARG - export CPU_THREADS=$OPTARG - let "CPU_THREADS -= 1" - info "Number of threads to start: $THREADS (0 to $CPU_THREADS)" + info "Number of threads to start: $THREADS" ;; c) export CLONE_SKB=$OPTARG info "CLONE_SKB=$CLONE_SKB" ;; + n) + export COUNT=$OPTARG + info "COUNT=$COUNT" + ;; b) export BURST=$OPTARG info "SKB bursting: BURST=$BURST" @@ -77,12 +86,17 @@ if [ -z "$PKT_SIZE" ]; then info "Default packet size set to: set to: $PKT_SIZE bytes" fi +if [ -z "$F_THREAD" ]; then + # First thread (F_THREAD) reference the zero indexed CPU number + export F_THREAD=0 +fi + if [ -z "$THREADS" ]; then - # Zero CPU threads means one thread, because CPU numbers are zero indexed - export CPU_THREADS=0 export THREADS=1 fi +export L_THREAD=$(( THREADS + F_THREAD - 1 )) + if [ -z "$DEV" ]; then usage err 2 "Please specify output device" diff --git a/samples/pktgen/pktgen.conf-1-1-ip6 b/samples/pktgen/pktgen.conf-1-1-ip6 index 0b9ffd47fd41..62426afeef84 100755 --- a/samples/pktgen/pktgen.conf-1-1-ip6 +++ b/samples/pktgen/pktgen.conf-1-1-ip6 @@ -1,4 +1,5 @@ #!/bin/bash +# SPDX-License-Identifier: GPL-2.0 #modprobe pktgen diff --git a/samples/pktgen/pktgen.conf-1-1-ip6-rdos b/samples/pktgen/pktgen.conf-1-1-ip6-rdos index ad98e5f40776..3ac3eb1f3504 100755 --- a/samples/pktgen/pktgen.conf-1-1-ip6-rdos +++ b/samples/pktgen/pktgen.conf-1-1-ip6-rdos @@ -1,4 +1,5 @@ #!/bin/bash +# SPDX-License-Identifier: GPL-2.0 #modprobe pktgen diff --git a/samples/pktgen/pktgen.conf-1-2 b/samples/pktgen/pktgen.conf-1-2 index ba4eb26e168d..a85552760762 100755 --- a/samples/pktgen/pktgen.conf-1-2 +++ b/samples/pktgen/pktgen.conf-1-2 @@ -1,4 +1,5 @@ #!/bin/bash +# SPDX-License-Identifier: GPL-2.0 #modprobe pktgen diff --git a/samples/pktgen/pktgen_bench_xmit_mode_netif_receive.sh b/samples/pktgen/pktgen_bench_xmit_mode_netif_receive.sh index f3e1bedfd77f..2839f7d315cf 100755 --- a/samples/pktgen/pktgen_bench_xmit_mode_netif_receive.sh +++ b/samples/pktgen/pktgen_bench_xmit_mode_netif_receive.sh @@ -1,4 +1,5 @@ #!/bin/bash +# SPDX-License-Identifier: GPL-2.0 # # Benchmark script: # - developed for benchmarking ingress qdisc path @@ -39,16 +40,16 @@ if [ -z "$DEST_IP" ]; then fi [ -z "$DST_MAC" ] && DST_MAC="90:e2:ba:ff:ff:ff" [ -z "$BURST" ] && BURST=1024 +[ -z "$COUNT" ] && COUNT="10000000" # Zero means indefinitely # Base Config DELAY="0" # Zero means max speed -COUNT="10000000" # Zero means indefinitely # General cleanup everything since last run pg_ctrl "reset" # Threads are specified with parameter -t value in $THREADS -for ((thread = 0; thread < $THREADS; thread++)); do +for ((thread = $F_THREAD; thread <= $L_THREAD; thread++)); do # The device name is extended with @name, using thread number to # make then unique, but any name will do. dev=${DEV}@${thread} @@ -81,7 +82,7 @@ pg_ctrl "start" echo "Done" >&2 # Print results -for ((thread = 0; thread < $THREADS; thread++)); do +for ((thread = $F_THREAD; thread <= $L_THREAD; thread++)); do dev=${DEV}@${thread} echo "Device: $dev" cat /proc/net/pktgen/$dev | grep -A2 "Result:" diff --git a/samples/pktgen/pktgen_bench_xmit_mode_queue_xmit.sh b/samples/pktgen/pktgen_bench_xmit_mode_queue_xmit.sh index cc102e923241..e1ee54465def 100755 --- a/samples/pktgen/pktgen_bench_xmit_mode_queue_xmit.sh +++ b/samples/pktgen/pktgen_bench_xmit_mode_queue_xmit.sh @@ -1,4 +1,5 @@ #!/bin/bash +# SPDX-License-Identifier: GPL-2.0 # # Benchmark script: # - developed for benchmarking egress qdisc path, derived (more @@ -22,16 +23,16 @@ fi if [[ -n "$BURST" ]]; then err 1 "Bursting not supported for this mode" fi +[ -z "$COUNT" ] && COUNT="10000000" # Zero means indefinitely # Base Config DELAY="0" # Zero means max speed -COUNT="10000000" # Zero means indefinitely # General cleanup everything since last run pg_ctrl "reset" # Threads are specified with parameter -t value in $THREADS -for ((thread = 0; thread < $THREADS; thread++)); do +for ((thread = $F_THREAD; thread <= $L_THREAD; thread++)); do # The device name is extended with @name, using thread number to # make then unique, but any name will do. dev=${DEV}@${thread} @@ -61,7 +62,7 @@ pg_ctrl "start" echo "Done" >&2 # Print results -for ((thread = 0; thread < $THREADS; thread++)); do +for ((thread = $F_THREAD; thread <= $L_THREAD; thread++)); do dev=${DEV}@${thread} echo "Device: $dev" cat /proc/net/pktgen/$dev | grep -A2 "Result:" diff --git a/samples/pktgen/pktgen_sample01_simple.sh b/samples/pktgen/pktgen_sample01_simple.sh index 29ef4ba50796..e9ab4edba2d7 100755 --- a/samples/pktgen/pktgen_sample01_simple.sh +++ b/samples/pktgen/pktgen_sample01_simple.sh @@ -1,4 +1,5 @@ #!/bin/bash +# SPDX-License-Identifier: GPL-2.0 # # Simple example: # * pktgen sending with single thread and single interface @@ -20,10 +21,10 @@ fi [ -z "$CLONE_SKB" ] && CLONE_SKB="0" # Example enforce param "-m" for dst_mac [ -z "$DST_MAC" ] && usage && err 2 "Must specify -m dst_mac" +[ -z "$COUNT" ] && COUNT="100000" # Zero means indefinitely # Base Config DELAY="0" # Zero means max speed -COUNT="100000" # Zero means indefinitely # Flow variation random source port between min and max UDP_MIN=9 diff --git a/samples/pktgen/pktgen_sample02_multiqueue.sh b/samples/pktgen/pktgen_sample02_multiqueue.sh index c88a161d3e6f..99f740ae9857 100755 --- a/samples/pktgen/pktgen_sample02_multiqueue.sh +++ b/samples/pktgen/pktgen_sample02_multiqueue.sh @@ -1,4 +1,5 @@ #!/bin/bash +# SPDX-License-Identifier: GPL-2.0 # # Multiqueue: Using pktgen threads for sending on multiple CPUs # * adding devices to kernel threads @@ -13,9 +14,10 @@ root_check_run_with_sudo "$@" # Required param: -i dev in $DEV source ${basedir}/parameters.sh +[ -z "$COUNT" ] && COUNT="100000" # Zero means indefinitely + # Base Config DELAY="0" # Zero means max speed -COUNT="100000" # Zero means indefinitely [ -z "$CLONE_SKB" ] && CLONE_SKB="0" # Flow variation random source port between min and max @@ -32,7 +34,7 @@ fi pg_ctrl "reset" # Threads are specified with parameter -t value in $THREADS -for ((thread = 0; thread < $THREADS; thread++)); do +for ((thread = $F_THREAD; thread <= $L_THREAD; thread++)); do # The device name is extended with @name, using thread number to # make then unique, but any name will do. dev=${DEV}@${thread} @@ -70,7 +72,7 @@ pg_ctrl "start" echo "Done" >&2 # Print results -for ((thread = 0; thread < $THREADS; thread++)); do +for ((thread = $F_THREAD; thread <= $L_THREAD; thread++)); do dev=${DEV}@${thread} echo "Device: $dev" cat /proc/net/pktgen/$dev | grep -A2 "Result:" diff --git a/samples/pktgen/pktgen_sample03_burst_single_flow.sh b/samples/pktgen/pktgen_sample03_burst_single_flow.sh index 80cf8f5ba6b2..4c2e4217638a 100755 --- a/samples/pktgen/pktgen_sample03_burst_single_flow.sh +++ b/samples/pktgen/pktgen_sample03_burst_single_flow.sh @@ -1,4 +1,5 @@ #!/bin/bash +# SPDX-License-Identifier: GPL-2.0 # # Script for max single flow performance # - If correctly tuned[1], single CPU 10G wirespeed small pkts is possible[2] @@ -31,16 +32,16 @@ fi [ -z "$DST_MAC" ] && DST_MAC="90:e2:ba:ff:ff:ff" [ -z "$BURST" ] && BURST=32 [ -z "$CLONE_SKB" ] && CLONE_SKB="100000" +[ -z "$COUNT" ] && COUNT="0" # Zero means indefinitely # Base Config DELAY="0" # Zero means max speed -COUNT="0" # Zero means indefinitely # General cleanup everything since last run pg_ctrl "reset" # Threads are specified with parameter -t value in $THREADS -for ((thread = 0; thread < $THREADS; thread++)); do +for ((thread = $F_THREAD; thread <= $L_THREAD; thread++)); do dev=${DEV}@${thread} # Add remove all other devices and add_device $dev to thread @@ -71,7 +72,7 @@ done # Run if user hits control-c function control_c() { # Print results - for ((thread = 0; thread < $THREADS; thread++)); do + for ((thread = $F_THREAD; thread <= $L_THREAD; thread++)); do dev=${DEV}@${thread} echo "Device: $dev" cat /proc/net/pktgen/$dev | grep -A2 "Result:" diff --git a/samples/pktgen/pktgen_sample04_many_flows.sh b/samples/pktgen/pktgen_sample04_many_flows.sh index f60412e445bb..4df92b7176da 100755 --- a/samples/pktgen/pktgen_sample04_many_flows.sh +++ b/samples/pktgen/pktgen_sample04_many_flows.sh @@ -1,4 +1,5 @@ #!/bin/bash +# SPDX-License-Identifier: GPL-2.0 # # Script example for many flows testing # @@ -15,6 +16,7 @@ source ${basedir}/parameters.sh [ -z "$DEST_IP" ] && DEST_IP="198.18.0.42" [ -z "$DST_MAC" ] && DST_MAC="90:e2:ba:ff:ff:ff" [ -z "$CLONE_SKB" ] && CLONE_SKB="0" +[ -z "$COUNT" ] && COUNT="0" # Zero means indefinitely # NOTICE: Script specific settings # ======= @@ -26,7 +28,6 @@ source ${basedir}/parameters.sh # Base Config DELAY="0" # Zero means max speed -COUNT="0" # Zero means indefinitely if [[ -n "$BURST" ]]; then err 1 "Bursting not supported for this mode" @@ -36,7 +37,7 @@ fi pg_ctrl "reset" # Threads are specified with parameter -t value in $THREADS -for ((thread = 0; thread < $THREADS; thread++)); do +for ((thread = $F_THREAD; thread <= $L_THREAD; thread++)); do dev=${DEV}@${thread} # Add remove all other devices and add_device $dev to thread @@ -78,7 +79,7 @@ done # Run if user hits control-c function print_result() { # Print results - for ((thread = 0; thread < $THREADS; thread++)); do + for ((thread = $F_THREAD; thread <= $L_THREAD; thread++)); do dev=${DEV}@${thread} echo "Device: $dev" cat /proc/net/pktgen/$dev | grep -A2 "Result:" diff --git a/samples/pktgen/pktgen_sample05_flow_per_thread.sh b/samples/pktgen/pktgen_sample05_flow_per_thread.sh index 32ad818e2829..7f8b5e59f01e 100755 --- a/samples/pktgen/pktgen_sample05_flow_per_thread.sh +++ b/samples/pktgen/pktgen_sample05_flow_per_thread.sh @@ -1,4 +1,5 @@ #!/bin/bash +# SPDX-License-Identifier: GPL-2.0 # # Script will generate one flow per thread (-t N) # - Same destination IP @@ -20,17 +21,17 @@ source ${basedir}/parameters.sh [ -z "$DST_MAC" ] && DST_MAC="90:e2:ba:ff:ff:ff" [ -z "$CLONE_SKB" ] && CLONE_SKB="0" [ -z "$BURST" ] && BURST=32 +[ -z "$COUNT" ] && COUNT="0" # Zero means indefinitely # Base Config DELAY="0" # Zero means max speed -COUNT="0" # Zero means indefinitely # General cleanup everything since last run pg_ctrl "reset" # Threads are specified with parameter -t value in $THREADS -for ((thread = 0; thread < $THREADS; thread++)); do +for ((thread = $F_THREAD; thread <= $L_THREAD; thread++)); do dev=${DEV}@${thread} # Add remove all other devices and add_device $dev to thread @@ -66,7 +67,7 @@ done # Run if user hits control-c function print_result() { # Print results - for ((thread = 0; thread < $THREADS; thread++)); do + for ((thread = $F_THREAD; thread <= $L_THREAD; thread++)); do dev=${DEV}@${thread} echo "Device: $dev" cat /proc/net/pktgen/$dev | grep -A2 "Result:" diff --git a/samples/seccomp/Makefile b/samples/seccomp/Makefile index bf7cc6b0dc19..19a870eed82b 100644 --- a/samples/seccomp/Makefile +++ b/samples/seccomp/Makefile @@ -1,3 +1,4 @@ +# SPDX-License-Identifier: GPL-2.0 # kbuild trick to avoid linker error. Can be omitted if a module is built. obj- := dummy.o diff --git a/samples/seccomp/bpf-direct.c b/samples/seccomp/bpf-direct.c index 151ec3f52189..c09e4a17ac1a 100644 --- a/samples/seccomp/bpf-direct.c +++ b/samples/seccomp/bpf-direct.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0 /* * Seccomp filter example for x86 (32-bit and 64-bit) with BPF macros * diff --git a/samples/seccomp/bpf-fancy.c b/samples/seccomp/bpf-fancy.c index e8b24f443709..1ccb435025b6 100644 --- a/samples/seccomp/bpf-fancy.c +++ b/samples/seccomp/bpf-fancy.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0 /* * Seccomp BPF example using a macro-based generator. * diff --git a/samples/seccomp/bpf-helper.c b/samples/seccomp/bpf-helper.c index 1ef0f4d72898..ae260d77a868 100644 --- a/samples/seccomp/bpf-helper.c +++ b/samples/seccomp/bpf-helper.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0 /* * Seccomp BPF helper functions * diff --git a/samples/seccomp/bpf-helper.h b/samples/seccomp/bpf-helper.h index 1d8de9edd858..0cc9816fe8e8 100644 --- a/samples/seccomp/bpf-helper.h +++ b/samples/seccomp/bpf-helper.h @@ -1,3 +1,4 @@ +/* SPDX-License-Identifier: GPL-2.0 */ /* * Example wrapper around BPF macros. * diff --git a/samples/seccomp/dropper.c b/samples/seccomp/dropper.c index 68325ca5e71c..cc0648eb389e 100644 --- a/samples/seccomp/dropper.c +++ b/samples/seccomp/dropper.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0 /* * Naive system call dropper built on seccomp_filter. * diff --git a/samples/sockmap/Makefile b/samples/sockmap/Makefile new file mode 100644 index 000000000000..9291ab8e0f8c --- /dev/null +++ b/samples/sockmap/Makefile @@ -0,0 +1,78 @@ +# kbuild trick to avoid linker error. Can be omitted if a module is built. +obj- := dummy.o + +# List of programs to build +hostprogs-y := sockmap + +# Libbpf dependencies +LIBBPF := ../../tools/lib/bpf/bpf.o + +HOSTCFLAGS += -I$(objtree)/usr/include +HOSTCFLAGS += -I$(srctree)/tools/lib/ +HOSTCFLAGS += -I$(srctree)/tools/testing/selftests/bpf/ +HOSTCFLAGS += -I$(srctree)/tools/lib/ -I$(srctree)/tools/include +HOSTCFLAGS += -I$(srctree)/tools/perf + +sockmap-objs := ../bpf/bpf_load.o $(LIBBPF) sockmap_user.o + +# Tell kbuild to always build the programs +always := $(hostprogs-y) +always += sockmap_kern.o + +HOSTLOADLIBES_sockmap += -lelf -lpthread + +# Allows pointing LLC/CLANG to a LLVM backend with bpf support, redefine on cmdline: +# make samples/bpf/ LLC=~/git/llvm/build/bin/llc CLANG=~/git/llvm/build/bin/clang +LLC ?= llc +CLANG ?= clang + +# Trick to allow make to be run from this directory +all: + $(MAKE) -C ../../ $(CURDIR)/ + +clean: + $(MAKE) -C ../../ M=$(CURDIR) clean + @rm -f *~ + +$(obj)/syscall_nrs.s: $(src)/syscall_nrs.c + $(call if_changed_dep,cc_s_c) + +$(obj)/syscall_nrs.h: $(obj)/syscall_nrs.s FORCE + $(call filechk,offsets,__SYSCALL_NRS_H__) + +clean-files += syscall_nrs.h + +FORCE: + + +# Verify LLVM compiler tools are available and bpf target is supported by llc +.PHONY: verify_cmds verify_target_bpf $(CLANG) $(LLC) + +verify_cmds: $(CLANG) $(LLC) + @for TOOL in $^ ; do \ + if ! (which -- "$${TOOL}" > /dev/null 2>&1); then \ + echo "*** ERROR: Cannot find LLVM tool $${TOOL}" ;\ + exit 1; \ + else true; fi; \ + done + +verify_target_bpf: verify_cmds + @if ! (${LLC} -march=bpf -mattr=help > /dev/null 2>&1); then \ + echo "*** ERROR: LLVM (${LLC}) does not support 'bpf' target" ;\ + echo " NOTICE: LLVM version >= 3.7.1 required" ;\ + exit 2; \ + else true; fi + +$(src)/*.c: verify_target_bpf + +# asm/sysreg.h - inline assembly used by it is incompatible with llvm. +# But, there is no easy way to fix it, so just exclude it since it is +# useless for BPF samples. +$(obj)/%.o: $(src)/%.c + $(CLANG) $(NOSTDINC_FLAGS) $(LINUXINCLUDE) $(EXTRA_CFLAGS) -I$(obj) \ + -D__KERNEL__ -D__ASM_SYSREG_H -Wno-unused-value -Wno-pointer-sign \ + -Wno-compare-distinct-pointer-types \ + -Wno-gnu-variable-sized-type-not-at-end \ + -Wno-address-of-packed-member -Wno-tautological-compare \ + -Wno-unknown-warning-option \ + -O2 -emit-llvm -c $< -o -| $(LLC) -march=bpf -filetype=obj -o $@ diff --git a/samples/sockmap/sockmap_kern.c b/samples/sockmap/sockmap_kern.c new file mode 100644 index 000000000000..52b0053274f4 --- /dev/null +++ b/samples/sockmap/sockmap_kern.c @@ -0,0 +1,108 @@ +/* Copyright (c) 2017 Covalent IO, Inc. http://covalent.io + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of version 2 of the GNU General Public + * License as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + */ +#include <uapi/linux/bpf.h> +#include <uapi/linux/if_ether.h> +#include <uapi/linux/if_packet.h> +#include <uapi/linux/ip.h> +#include "../../tools/testing/selftests/bpf/bpf_helpers.h" +#include "../../tools/testing/selftests/bpf/bpf_endian.h" + +/* Sockmap sample program connects a client and a backend together + * using cgroups. + * + * client:X <---> frontend:80 client:X <---> backend:80 + * + * For simplicity we hard code values here and bind 1:1. The hard + * coded values are part of the setup in sockmap.sh script that + * is associated with this BPF program. + * + * The bpf_printk is verbose and prints information as connections + * are established and verdicts are decided. + */ + +#define bpf_printk(fmt, ...) \ +({ \ + char ____fmt[] = fmt; \ + bpf_trace_printk(____fmt, sizeof(____fmt), \ + ##__VA_ARGS__); \ +}) + +struct bpf_map_def SEC("maps") sock_map = { + .type = BPF_MAP_TYPE_SOCKMAP, + .key_size = sizeof(int), + .value_size = sizeof(int), + .max_entries = 20, +}; + +SEC("sk_skb1") +int bpf_prog1(struct __sk_buff *skb) +{ + return skb->len; +} + +SEC("sk_skb2") +int bpf_prog2(struct __sk_buff *skb) +{ + __u32 lport = skb->local_port; + __u32 rport = skb->remote_port; + int ret = 0; + + if (lport == 10000) + ret = 10; + else + ret = 1; + + bpf_printk("sockmap: %d -> %d @ %d\n", lport, bpf_ntohl(rport), ret); + return bpf_sk_redirect_map(skb, &sock_map, ret, 0); +} + +SEC("sockops") +int bpf_sockmap(struct bpf_sock_ops *skops) +{ + __u32 lport, rport; + int op, err = 0, index, key, ret; + + + op = (int) skops->op; + + switch (op) { + case BPF_SOCK_OPS_PASSIVE_ESTABLISHED_CB: + lport = skops->local_port; + rport = skops->remote_port; + + if (lport == 10000) { + ret = 1; + err = bpf_sock_map_update(skops, &sock_map, &ret, + BPF_NOEXIST); + bpf_printk("passive(%i -> %i) map ctx update err: %d\n", + lport, bpf_ntohl(rport), err); + } + break; + case BPF_SOCK_OPS_ACTIVE_ESTABLISHED_CB: + lport = skops->local_port; + rport = skops->remote_port; + + if (bpf_ntohl(rport) == 10001) { + ret = 10; + err = bpf_sock_map_update(skops, &sock_map, &ret, + BPF_NOEXIST); + bpf_printk("active(%i -> %i) map ctx update err: %d\n", + lport, bpf_ntohl(rport), err); + } + break; + default: + break; + } + + return 0; +} +char _license[] SEC("license") = "GPL"; diff --git a/samples/sockmap/sockmap_user.c b/samples/sockmap/sockmap_user.c new file mode 100644 index 000000000000..7cc9d228216f --- /dev/null +++ b/samples/sockmap/sockmap_user.c @@ -0,0 +1,294 @@ +/* Copyright (c) 2017 Covalent IO, Inc. http://covalent.io + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of version 2 of the GNU General Public + * License as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + */ +#include <stdio.h> +#include <stdlib.h> +#include <sys/socket.h> +#include <sys/ioctl.h> +#include <sys/select.h> +#include <netinet/in.h> +#include <arpa/inet.h> +#include <unistd.h> +#include <string.h> +#include <errno.h> +#include <sys/ioctl.h> +#include <stdbool.h> +#include <signal.h> +#include <fcntl.h> + +#include <sys/time.h> +#include <sys/types.h> + +#include <linux/netlink.h> +#include <linux/socket.h> +#include <linux/sock_diag.h> +#include <linux/bpf.h> +#include <linux/if_link.h> +#include <assert.h> +#include <libgen.h> + +#include "../bpf/bpf_load.h" +#include "../bpf/bpf_util.h" +#include "../bpf/libbpf.h" + +int running; +void running_handler(int a); + +/* randomly selected ports for testing on lo */ +#define S1_PORT 10000 +#define S2_PORT 10001 + +static int sockmap_test_sockets(int rate, int dot) +{ + int i, sc, err, max_fd, one = 1; + int s1, s2, c1, c2, p1, p2; + struct sockaddr_in addr; + struct timeval timeout; + char buf[1024] = {0}; + int *fds[4] = {&s1, &s2, &c1, &c2}; + fd_set w; + + s1 = s2 = p1 = p2 = c1 = c2 = 0; + + /* Init sockets */ + for (i = 0; i < 4; i++) { + *fds[i] = socket(AF_INET, SOCK_STREAM, 0); + if (*fds[i] < 0) { + perror("socket s1 failed()"); + err = *fds[i]; + goto out; + } + } + + /* Allow reuse */ + for (i = 0; i < 2; i++) { + err = setsockopt(*fds[i], SOL_SOCKET, SO_REUSEADDR, + (char *)&one, sizeof(one)); + if (err) { + perror("setsockopt failed()"); + goto out; + } + } + + /* Non-blocking sockets */ + for (i = 0; i < 4; i++) { + err = ioctl(*fds[i], FIONBIO, (char *)&one); + if (err < 0) { + perror("ioctl s1 failed()"); + goto out; + } + } + + /* Bind server sockets */ + memset(&addr, 0, sizeof(struct sockaddr_in)); + addr.sin_family = AF_INET; + addr.sin_addr.s_addr = inet_addr("127.0.0.1"); + + addr.sin_port = htons(S1_PORT); + err = bind(s1, (struct sockaddr *)&addr, sizeof(addr)); + if (err < 0) { + perror("bind s1 failed()\n"); + goto out; + } + + addr.sin_port = htons(S2_PORT); + err = bind(s2, (struct sockaddr *)&addr, sizeof(addr)); + if (err < 0) { + perror("bind s2 failed()\n"); + goto out; + } + + /* Listen server sockets */ + addr.sin_port = htons(S1_PORT); + err = listen(s1, 32); + if (err < 0) { + perror("listen s1 failed()\n"); + goto out; + } + + addr.sin_port = htons(S2_PORT); + err = listen(s2, 32); + if (err < 0) { + perror("listen s1 failed()\n"); + goto out; + } + + /* Initiate Connect */ + addr.sin_port = htons(S1_PORT); + err = connect(c1, (struct sockaddr *)&addr, sizeof(addr)); + if (err < 0 && errno != EINPROGRESS) { + perror("connect c1 failed()\n"); + goto out; + } + + addr.sin_port = htons(S2_PORT); + err = connect(c2, (struct sockaddr *)&addr, sizeof(addr)); + if (err < 0 && errno != EINPROGRESS) { + perror("connect c2 failed()\n"); + goto out; + } + + /* Accept Connecrtions */ + p1 = accept(s1, NULL, NULL); + if (p1 < 0) { + perror("accept s1 failed()\n"); + goto out; + } + + p2 = accept(s2, NULL, NULL); + if (p2 < 0) { + perror("accept s1 failed()\n"); + goto out; + } + + max_fd = p2; + timeout.tv_sec = 10; + timeout.tv_usec = 0; + + printf("connected sockets: c1 <-> p1, c2 <-> p2\n"); + printf("cgroups binding: c1(%i) <-> s1(%i) - - - c2(%i) <-> s2(%i)\n", + c1, s1, c2, s2); + + /* Ping/Pong data from client to server */ + sc = send(c1, buf, sizeof(buf), 0); + if (sc < 0) { + perror("send failed()\n"); + goto out; + } + + do { + int s, rc, i; + + /* FD sets */ + FD_ZERO(&w); + FD_SET(c1, &w); + FD_SET(c2, &w); + FD_SET(p1, &w); + FD_SET(p2, &w); + + s = select(max_fd + 1, &w, NULL, NULL, &timeout); + if (s == -1) { + perror("select()"); + break; + } else if (!s) { + fprintf(stderr, "unexpected timeout\n"); + break; + } + + for (i = 0; i <= max_fd && s > 0; ++i) { + if (!FD_ISSET(i, &w)) + continue; + + s--; + + rc = recv(i, buf, sizeof(buf), 0); + if (rc < 0) { + if (errno != EWOULDBLOCK) { + perror("recv failed()\n"); + break; + } + } + + if (rc == 0) { + close(i); + break; + } + + sc = send(i, buf, rc, 0); + if (sc < 0) { + perror("send failed()\n"); + break; + } + } + sleep(rate); + if (dot) { + printf("."); + fflush(stdout); + + } + } while (running); + +out: + close(s1); + close(s2); + close(p1); + close(p2); + close(c1); + close(c2); + return err; +} + +int main(int argc, char **argv) +{ + int rate = 1, dot = 1; + char filename[256]; + int err, cg_fd; + char *cg_path; + + cg_path = argv[argc - 1]; + snprintf(filename, sizeof(filename), "%s_kern.o", argv[0]); + + running = 1; + + /* catch SIGINT */ + signal(SIGINT, running_handler); + + if (load_bpf_file(filename)) { + fprintf(stderr, "load_bpf_file: (%s) %s\n", + filename, strerror(errno)); + return 1; + } + + /* Cgroup configuration */ + cg_fd = open(cg_path, O_DIRECTORY, O_RDONLY); + if (cg_fd < 0) { + fprintf(stderr, "ERROR: (%i) open cg path failed: %s\n", + cg_fd, cg_path); + return cg_fd; + } + + /* Attach programs to sockmap */ + err = bpf_prog_attach(prog_fd[0], map_fd[0], + BPF_SK_SKB_STREAM_PARSER, 0); + if (err) { + fprintf(stderr, "ERROR: bpf_prog_attach (sockmap): %d (%s)\n", + err, strerror(errno)); + return err; + } + + err = bpf_prog_attach(prog_fd[1], map_fd[0], + BPF_SK_SKB_STREAM_VERDICT, 0); + if (err) { + fprintf(stderr, "ERROR: bpf_prog_attach (sockmap): %d (%s)\n", + err, strerror(errno)); + return err; + } + + /* Attach to cgroups */ + err = bpf_prog_attach(prog_fd[2], cg_fd, BPF_CGROUP_SOCK_OPS, 0); + if (err) { + fprintf(stderr, "ERROR: bpf_prog_attach (groups): %d (%s)\n", + err, strerror(errno)); + return err; + } + + err = sockmap_test_sockets(rate, dot); + if (err) { + fprintf(stderr, "ERROR: test socket failed: %d\n", err); + return err; + } + return 0; +} + +void running_handler(int a) +{ + running = 0; +} diff --git a/samples/timers/Makefile b/samples/timers/Makefile index a5c3c4a35ca1..f9fa07460802 100644 --- a/samples/timers/Makefile +++ b/samples/timers/Makefile @@ -1,3 +1,4 @@ +# SPDX-License-Identifier: GPL-2.0 ifndef CROSS_COMPILE uname_M := $(shell uname -m 2>/dev/null || echo not) ARCH ?= $(shell echo $(uname_M) | sed -e s/i.86/x86/ -e s/x86_64/x86/) diff --git a/samples/timers/hpet_example.c b/samples/timers/hpet_example.c index 3ab4993d85e0..f1cb622f6ec0 100644 --- a/samples/timers/hpet_example.c +++ b/samples/timers/hpet_example.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0 #include <stdio.h> #include <stdlib.h> #include <unistd.h> diff --git a/samples/trace_events/trace-events-sample.c b/samples/trace_events/trace-events-sample.c index bc7fcf010a5b..5522692100ba 100644 --- a/samples/trace_events/trace-events-sample.c +++ b/samples/trace_events/trace-events-sample.c @@ -78,29 +78,37 @@ static int simple_thread_fn(void *arg) } static DEFINE_MUTEX(thread_mutex); +static int simple_thread_cnt; int foo_bar_reg(void) { + mutex_lock(&thread_mutex); + if (simple_thread_cnt++) + goto out; + pr_info("Starting thread for foo_bar_fn\n"); /* * We shouldn't be able to start a trace when the module is * unloading (there's other locks to prevent that). But * for consistency sake, we still take the thread_mutex. */ - mutex_lock(&thread_mutex); simple_tsk_fn = kthread_run(simple_thread_fn, NULL, "event-sample-fn"); + out: mutex_unlock(&thread_mutex); return 0; } void foo_bar_unreg(void) { - pr_info("Killing thread for foo_bar_fn\n"); - /* protect against module unloading */ mutex_lock(&thread_mutex); + if (--simple_thread_cnt) + goto out; + + pr_info("Killing thread for foo_bar_fn\n"); if (simple_tsk_fn) kthread_stop(simple_tsk_fn); simple_tsk_fn = NULL; + out: mutex_unlock(&thread_mutex); } diff --git a/samples/trace_events/trace-events-sample.h b/samples/trace_events/trace-events-sample.h index 76a75ab7a608..80b4a70315b6 100644 --- a/samples/trace_events/trace-events-sample.h +++ b/samples/trace_events/trace-events-sample.h @@ -1,3 +1,4 @@ +/* SPDX-License-Identifier: GPL-2.0 */ /* * If TRACE_SYSTEM is defined, that will be the directory created * in the ftrace directory under /sys/kernel/tracing/events/<system> @@ -95,7 +96,7 @@ * __entry->bar.x = y; * __array: There are three fields (type, name, size). The type is the - * type of elements in teh array, the name is the name of the array. + * type of elements in the array, the name is the name of the array. * size is the number of items in the array (not the total size). * * __array( char, foo, 10) is the same as saying: char foo[10]; @@ -112,7 +113,7 @@ * type is the type of the element, name is the name of the array. * The size is different than __array. It is not a static number, * but the algorithm to figure out the length of the array for the - * specific instance of tracepoint. Again, size is the numebr of + * specific instance of tracepoint. Again, size is the number of * items in the array, not the total length in bytes. * * __dynamic_array( int, foo, bar) is similar to: int foo[bar]; @@ -125,9 +126,9 @@ * Notice, that "__entry" is not needed here. * * __string: This is a special kind of __dynamic_array. It expects to - * have a nul terminated character array passed to it (it allows + * have a null terminated character array passed to it (it allows * for NULL too, which would be converted into "(null)"). __string - * takes two paramenter (name, src), where name is the name of + * takes two parameter (name, src), where name is the name of * the string saved, and src is the string to copy into the * ring buffer. * @@ -444,7 +445,7 @@ DECLARE_EVENT_CLASS(foo_template, /* * Here's a better way for the previous samples (except, the first - * exmaple had more fields and could not be used here). + * example had more fields and could not be used here). */ DEFINE_EVENT(foo_template, foo_with_template_simple, TP_PROTO(const char *foo, int bar), diff --git a/samples/uhid/uhid-example.c b/samples/uhid/uhid-example.c index 7d58a4b8d324..b72d645ce828 100644 --- a/samples/uhid/uhid-example.c +++ b/samples/uhid/uhid-example.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0 /* * UHID Example * diff --git a/samples/v4l/v4l2-pci-skeleton.c b/samples/v4l/v4l2-pci-skeleton.c index 93b76c3220fd..483e9bca9444 100644 --- a/samples/v4l/v4l2-pci-skeleton.c +++ b/samples/v4l/v4l2-pci-skeleton.c @@ -282,7 +282,7 @@ static void stop_streaming(struct vb2_queue *vq) * vb2_ops_wait_prepare/finish helper functions. If q->lock would be NULL, * then this driver would have to provide these ops. */ -static struct vb2_ops skel_qops = { +static const struct vb2_ops skel_qops = { .queue_setup = queue_setup, .buf_prepare = buffer_prepare, .buf_queue = buffer_queue, diff --git a/samples/vfio-mdev/mtty.c b/samples/vfio-mdev/mtty.c index ca495686b9c3..09f255bdf3ac 100644 --- a/samples/vfio-mdev/mtty.c +++ b/samples/vfio-mdev/mtty.c @@ -1413,7 +1413,7 @@ struct attribute_group *mdev_type_groups[] = { NULL, }; -struct mdev_parent_ops mdev_fops = { +static const struct mdev_parent_ops mdev_fops = { .owner = THIS_MODULE, .dev_attr_groups = mtty_dev_groups, .mdev_attr_groups = mdev_dev_groups, diff --git a/samples/watchdog/Makefile b/samples/watchdog/Makefile index 9b53d89b1ccf..a9430fa60253 100644 --- a/samples/watchdog/Makefile +++ b/samples/watchdog/Makefile @@ -1,3 +1,4 @@ +# SPDX-License-Identifier: GPL-2.0 CC := $(CROSS_COMPILE)gcc PROGS := watchdog-simple diff --git a/samples/watchdog/watchdog-simple.c b/samples/watchdog/watchdog-simple.c index ba45803a2216..9ce66d2ca2a9 100644 --- a/samples/watchdog/watchdog-simple.c +++ b/samples/watchdog/watchdog-simple.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0 #include <stdio.h> #include <stdlib.h> #include <unistd.h> |