diff options
151 files changed, 8321 insertions, 1402 deletions
diff --git a/Documentation/admin-guide/sysctl/net.rst b/Documentation/admin-guide/sysctl/net.rst index 555681ef6195..6394f5dc2303 100644 --- a/Documentation/admin-guide/sysctl/net.rst +++ b/Documentation/admin-guide/sysctl/net.rst @@ -102,6 +102,9 @@ Values: - 1 - enable JIT hardening for unprivileged users only - 2 - enable JIT hardening for all users +where "privileged user" in this context means a process having +CAP_BPF or CAP_SYS_ADMIN in the root user name space. + bpf_jit_kallsyms ---------------- diff --git a/Documentation/bpf/clang-notes.rst b/Documentation/bpf/clang-notes.rst new file mode 100644 index 000000000000..528feddf2db9 --- /dev/null +++ b/Documentation/bpf/clang-notes.rst @@ -0,0 +1,30 @@ +.. contents:: +.. sectnum:: + +========================== +Clang implementation notes +========================== + +This document provides more details specific to the Clang/LLVM implementation of the eBPF instruction set. + +Versions +======== + +Clang defined "CPU" versions, where a CPU version of 3 corresponds to the current eBPF ISA. + +Clang can select the eBPF ISA version using ``-mcpu=v3`` for example to select version 3. + +Arithmetic instructions +======================= + +For CPU versions prior to 3, Clang v7.0 and later can enable ``BPF_ALU`` support with +``-Xclang -target-feature -Xclang +alu32``. In CPU version 3, support is automatically included. + +Atomic operations +================= + +Clang can generate atomic instructions by default when ``-mcpu=v3`` is +enabled. If a lower version for ``-mcpu`` is set, the only atomic instruction +Clang can generate is ``BPF_ADD`` *without* ``BPF_FETCH``. If you need to enable +the atomics features, while keeping a lower ``-mcpu`` version, you can use +``-Xclang -target-feature -Xclang +alu32``. diff --git a/Documentation/bpf/index.rst b/Documentation/bpf/index.rst index 1bc2c5c58bdb..1b50de1983ee 100644 --- a/Documentation/bpf/index.rst +++ b/Documentation/bpf/index.rst @@ -26,6 +26,8 @@ that goes into great technical depth about the BPF Architecture. classic_vs_extended.rst bpf_licensing test_debug + clang-notes + linux-notes other .. only:: subproject and html diff --git a/Documentation/bpf/instruction-set.rst b/Documentation/bpf/instruction-set.rst index 1b0e6711dec9..4997d2088fef 100644 --- a/Documentation/bpf/instruction-set.rst +++ b/Documentation/bpf/instruction-set.rst @@ -1,7 +1,12 @@ +.. contents:: +.. sectnum:: + +======================================== +eBPF Instruction Set Specification, v1.0 +======================================== + +This document specifies version 1.0 of the eBPF instruction set. -==================== -eBPF Instruction Set -==================== Registers and calling convention ================================ @@ -11,10 +16,10 @@ all of which are 64-bits wide. The eBPF calling convention is defined as: - * R0: return value from function calls, and exit value for eBPF programs - * R1 - R5: arguments for function calls - * R6 - R9: callee saved registers that function calls will preserve - * R10: read-only frame pointer to access stack +* R0: return value from function calls, and exit value for eBPF programs +* R1 - R5: arguments for function calls +* R6 - R9: callee saved registers that function calls will preserve +* R10: read-only frame pointer to access stack R0 - R5 are scratch registers and eBPF programs needs to spill/fill them if necessary across calls. @@ -24,17 +29,17 @@ Instruction encoding eBPF has two instruction encodings: - * the basic instruction encoding, which uses 64 bits to encode an instruction - * the wide instruction encoding, which appends a second 64-bit immediate value - (imm64) after the basic instruction for a total of 128 bits. +* the basic instruction encoding, which uses 64 bits to encode an instruction +* the wide instruction encoding, which appends a second 64-bit immediate value + (imm64) after the basic instruction for a total of 128 bits. The basic instruction encoding looks as follows: - ============= ======= =============== ==================== ============ - 32 bits (MSB) 16 bits 4 bits 4 bits 8 bits (LSB) - ============= ======= =============== ==================== ============ - immediate offset source register destination register opcode - ============= ======= =============== ==================== ============ +============= ======= =============== ==================== ============ +32 bits (MSB) 16 bits 4 bits 4 bits 8 bits (LSB) +============= ======= =============== ==================== ============ +immediate offset source register destination register opcode +============= ======= =============== ==================== ============ Note that most instructions do not use all of the fields. Unused fields shall be cleared to zero. @@ -44,30 +49,30 @@ Instruction classes The three LSB bits of the 'opcode' field store the instruction class: - ========= ===== =============================== - class value description - ========= ===== =============================== - BPF_LD 0x00 non-standard load operations - BPF_LDX 0x01 load into register operations - BPF_ST 0x02 store from immediate operations - BPF_STX 0x03 store from register operations - BPF_ALU 0x04 32-bit arithmetic operations - BPF_JMP 0x05 64-bit jump operations - BPF_JMP32 0x06 32-bit jump operations - BPF_ALU64 0x07 64-bit arithmetic operations - ========= ===== =============================== +========= ===== =============================== =================================== +class value description reference +========= ===== =============================== =================================== +BPF_LD 0x00 non-standard load operations `Load and store instructions`_ +BPF_LDX 0x01 load into register operations `Load and store instructions`_ +BPF_ST 0x02 store from immediate operations `Load and store instructions`_ +BPF_STX 0x03 store from register operations `Load and store instructions`_ +BPF_ALU 0x04 32-bit arithmetic operations `Arithmetic and jump instructions`_ +BPF_JMP 0x05 64-bit jump operations `Arithmetic and jump instructions`_ +BPF_JMP32 0x06 32-bit jump operations `Arithmetic and jump instructions`_ +BPF_ALU64 0x07 64-bit arithmetic operations `Arithmetic and jump instructions`_ +========= ===== =============================== =================================== Arithmetic and jump instructions ================================ -For arithmetic and jump instructions (BPF_ALU, BPF_ALU64, BPF_JMP and -BPF_JMP32), the 8-bit 'opcode' field is divided into three parts: +For arithmetic and jump instructions (``BPF_ALU``, ``BPF_ALU64``, ``BPF_JMP`` and +``BPF_JMP32``), the 8-bit 'opcode' field is divided into three parts: - ============== ====== ================= - 4 bits (MSB) 1 bit 3 bits (LSB) - ============== ====== ================= - operation code source instruction class - ============== ====== ================= +============== ====== ================= +4 bits (MSB) 1 bit 3 bits (LSB) +============== ====== ================= +operation code source instruction class +============== ====== ================= The 4th bit encodes the source operand: @@ -84,51 +89,51 @@ The four MSB bits store the operation code. Arithmetic instructions ----------------------- -BPF_ALU uses 32-bit wide operands while BPF_ALU64 uses 64-bit wide operands for +``BPF_ALU`` uses 32-bit wide operands while ``BPF_ALU64`` uses 64-bit wide operands for otherwise identical operations. -The code field encodes the operation as below: - - ======== ===== ================================================= - code value description - ======== ===== ================================================= - BPF_ADD 0x00 dst += src - BPF_SUB 0x10 dst -= src - BPF_MUL 0x20 dst \*= src - BPF_DIV 0x30 dst /= src - BPF_OR 0x40 dst \|= src - BPF_AND 0x50 dst &= src - BPF_LSH 0x60 dst <<= src - BPF_RSH 0x70 dst >>= src - BPF_NEG 0x80 dst = ~src - BPF_MOD 0x90 dst %= src - BPF_XOR 0xa0 dst ^= src - BPF_MOV 0xb0 dst = src - BPF_ARSH 0xc0 sign extending shift right - BPF_END 0xd0 byte swap operations (see separate section below) - ======== ===== ================================================= - -BPF_ADD | BPF_X | BPF_ALU means:: +The 'code' field encodes the operation as below: + +======== ===== ========================================================== +code value description +======== ===== ========================================================== +BPF_ADD 0x00 dst += src +BPF_SUB 0x10 dst -= src +BPF_MUL 0x20 dst \*= src +BPF_DIV 0x30 dst /= src +BPF_OR 0x40 dst \|= src +BPF_AND 0x50 dst &= src +BPF_LSH 0x60 dst <<= src +BPF_RSH 0x70 dst >>= src +BPF_NEG 0x80 dst = ~src +BPF_MOD 0x90 dst %= src +BPF_XOR 0xa0 dst ^= src +BPF_MOV 0xb0 dst = src +BPF_ARSH 0xc0 sign extending shift right +BPF_END 0xd0 byte swap operations (see `Byte swap instructions`_ below) +======== ===== ========================================================== + +``BPF_ADD | BPF_X | BPF_ALU`` means:: dst_reg = (u32) dst_reg + (u32) src_reg; -BPF_ADD | BPF_X | BPF_ALU64 means:: +``BPF_ADD | BPF_X | BPF_ALU64`` means:: dst_reg = dst_reg + src_reg -BPF_XOR | BPF_K | BPF_ALU means:: +``BPF_XOR | BPF_K | BPF_ALU`` means:: src_reg = (u32) src_reg ^ (u32) imm32 -BPF_XOR | BPF_K | BPF_ALU64 means:: +``BPF_XOR | BPF_K | BPF_ALU64`` means:: src_reg = src_reg ^ imm32 Byte swap instructions ----------------------- +~~~~~~~~~~~~~~~~~~~~~~ The byte swap instructions use an instruction class of ``BPF_ALU`` and a 4-bit -code field of ``BPF_END``. +'code' field of ``BPF_END``. The byte swap instructions operate on the destination register only and do not use a separate source register or immediate value. @@ -136,14 +141,14 @@ only and do not use a separate source register or immediate value. The 1-bit source operand field in the opcode is used to to select what byte order the operation convert from or to: - ========= ===== ================================================= - source value description - ========= ===== ================================================= - BPF_TO_LE 0x00 convert between host byte order and little endian - BPF_TO_BE 0x08 convert between host byte order and big endian - ========= ===== ================================================= +========= ===== ================================================= +source value description +========= ===== ================================================= +BPF_TO_LE 0x00 convert between host byte order and little endian +BPF_TO_BE 0x08 convert between host byte order and big endian +========= ===== ================================================= -The imm field encodes the width of the swap operations. The following widths +The 'imm' field encodes the width of the swap operations. The following widths are supported: 16, 32 and 64. Examples: @@ -156,35 +161,31 @@ Examples: dst_reg = htobe64(dst_reg) -``BPF_FROM_LE`` and ``BPF_FROM_BE`` exist as aliases for ``BPF_TO_LE`` and -``BPF_TO_BE`` respectively. - - Jump instructions ----------------- -BPF_JMP32 uses 32-bit wide operands while BPF_JMP uses 64-bit wide operands for +``BPF_JMP32`` uses 32-bit wide operands while ``BPF_JMP`` uses 64-bit wide operands for otherwise identical operations. -The code field encodes the operation as below: - - ======== ===== ========================= ============ - code value description notes - ======== ===== ========================= ============ - BPF_JA 0x00 PC += off BPF_JMP only - BPF_JEQ 0x10 PC += off if dst == src - BPF_JGT 0x20 PC += off if dst > src unsigned - BPF_JGE 0x30 PC += off if dst >= src unsigned - BPF_JSET 0x40 PC += off if dst & src - BPF_JNE 0x50 PC += off if dst != src - BPF_JSGT 0x60 PC += off if dst > src signed - BPF_JSGE 0x70 PC += off if dst >= src signed - BPF_CALL 0x80 function call - BPF_EXIT 0x90 function / program return BPF_JMP only - BPF_JLT 0xa0 PC += off if dst < src unsigned - BPF_JLE 0xb0 PC += off if dst <= src unsigned - BPF_JSLT 0xc0 PC += off if dst < src signed - BPF_JSLE 0xd0 PC += off if dst <= src signed - ======== ===== ========================= ============ +The 'code' field encodes the operation as below: + +======== ===== ========================= ============ +code value description notes +======== ===== ========================= ============ +BPF_JA 0x00 PC += off BPF_JMP only +BPF_JEQ 0x10 PC += off if dst == src +BPF_JGT 0x20 PC += off if dst > src unsigned +BPF_JGE 0x30 PC += off if dst >= src unsigned +BPF_JSET 0x40 PC += off if dst & src +BPF_JNE 0x50 PC += off if dst != src +BPF_JSGT 0x60 PC += off if dst > src signed +BPF_JSGE 0x70 PC += off if dst >= src signed +BPF_CALL 0x80 function call +BPF_EXIT 0x90 function / program return BPF_JMP only +BPF_JLT 0xa0 PC += off if dst < src unsigned +BPF_JLE 0xb0 PC += off if dst <= src unsigned +BPF_JSLT 0xc0 PC += off if dst < src signed +BPF_JSLE 0xd0 PC += off if dst <= src signed +======== ===== ========================= ============ The eBPF program needs to store the return value into register R0 before doing a BPF_EXIT. @@ -193,14 +194,26 @@ BPF_EXIT. Load and store instructions =========================== -For load and store instructions (BPF_LD, BPF_LDX, BPF_ST and BPF_STX), the +For load and store instructions (``BPF_LD``, ``BPF_LDX``, ``BPF_ST``, and ``BPF_STX``), the 8-bit 'opcode' field is divided as: - ============ ====== ================= - 3 bits (MSB) 2 bits 3 bits (LSB) - ============ ====== ================= - mode size instruction class - ============ ====== ================= +============ ====== ================= +3 bits (MSB) 2 bits 3 bits (LSB) +============ ====== ================= +mode size instruction class +============ ====== ================= + +The mode modifier is one of: + + ============= ===== ==================================== ============= + mode modifier value description reference + ============= ===== ==================================== ============= + BPF_IMM 0x00 64-bit immediate instructions `64-bit immediate instructions`_ + BPF_ABS 0x20 legacy BPF packet access (absolute) `Legacy BPF Packet access instructions`_ + BPF_IND 0x40 legacy BPF packet access (indirect) `Legacy BPF Packet access instructions`_ + BPF_MEM 0x60 regular load and store operations `Regular load and store operations`_ + BPF_ATOMIC 0xc0 atomic operations `Atomic operations`_ + ============= ===== ==================================== ============= The size modifier is one of: @@ -213,19 +226,6 @@ The size modifier is one of: BPF_DW 0x18 double word (8 bytes) ============= ===== ===================== -The mode modifier is one of: - - ============= ===== ==================================== - mode modifier value description - ============= ===== ==================================== - BPF_IMM 0x00 64-bit immediate instructions - BPF_ABS 0x20 legacy BPF packet access (absolute) - BPF_IND 0x40 legacy BPF packet access (indirect) - BPF_MEM 0x60 regular load and store operations - BPF_ATOMIC 0xc0 atomic operations - ============= ===== ==================================== - - Regular load and store operations --------------------------------- @@ -256,44 +256,42 @@ by other eBPF programs or means outside of this specification. All atomic operations supported by eBPF are encoded as store operations that use the ``BPF_ATOMIC`` mode modifier as follows: - * ``BPF_ATOMIC | BPF_W | BPF_STX`` for 32-bit operations - * ``BPF_ATOMIC | BPF_DW | BPF_STX`` for 64-bit operations - * 8-bit and 16-bit wide atomic operations are not supported. +* ``BPF_ATOMIC | BPF_W | BPF_STX`` for 32-bit operations +* ``BPF_ATOMIC | BPF_DW | BPF_STX`` for 64-bit operations +* 8-bit and 16-bit wide atomic operations are not supported. -The imm field is used to encode the actual atomic operation. +The 'imm' field is used to encode the actual atomic operation. Simple atomic operation use a subset of the values defined to encode -arithmetic operations in the imm field to encode the atomic operation: +arithmetic operations in the 'imm' field to encode the atomic operation: - ======== ===== =========== - imm value description - ======== ===== =========== - BPF_ADD 0x00 atomic add - BPF_OR 0x40 atomic or - BPF_AND 0x50 atomic and - BPF_XOR 0xa0 atomic xor - ======== ===== =========== +======== ===== =========== +imm value description +======== ===== =========== +BPF_ADD 0x00 atomic add +BPF_OR 0x40 atomic or +BPF_AND 0x50 atomic and +BPF_XOR 0xa0 atomic xor +======== ===== =========== -``BPF_ATOMIC | BPF_W | BPF_STX`` with imm = BPF_ADD means:: +``BPF_ATOMIC | BPF_W | BPF_STX`` with 'imm' = BPF_ADD means:: *(u32 *)(dst_reg + off16) += src_reg -``BPF_ATOMIC | BPF_DW | BPF_STX`` with imm = BPF ADD means:: +``BPF_ATOMIC | BPF_DW | BPF_STX`` with 'imm' = BPF ADD means:: *(u64 *)(dst_reg + off16) += src_reg -``BPF_XADD`` is a deprecated name for ``BPF_ATOMIC | BPF_ADD``. - In addition to the simple atomic operations, there also is a modifier and two complex atomic operations: - =========== ================ =========================== - imm value description - =========== ================ =========================== - BPF_FETCH 0x01 modifier: return old value - BPF_XCHG 0xe0 | BPF_FETCH atomic exchange - BPF_CMPXCHG 0xf0 | BPF_FETCH atomic compare and exchange - =========== ================ =========================== +=========== ================ =========================== +imm value description +=========== ================ =========================== +BPF_FETCH 0x01 modifier: return old value +BPF_XCHG 0xe0 | BPF_FETCH atomic exchange +BPF_CMPXCHG 0xf0 | BPF_FETCH atomic compare and exchange +=========== ================ =========================== The ``BPF_FETCH`` modifier is optional for simple atomic operations, and always set for the complex atomic operations. If the ``BPF_FETCH`` flag @@ -309,16 +307,10 @@ The ``BPF_CMPXCHG`` operation atomically compares the value addressed by value that was at ``dst_reg + off`` before the operation is zero-extended and loaded back to ``R0``. -Clang can generate atomic instructions by default when ``-mcpu=v3`` is -enabled. If a lower version for ``-mcpu`` is set, the only atomic instruction -Clang can generate is ``BPF_ADD`` *without* ``BPF_FETCH``. If you need to enable -the atomics features, while keeping a lower ``-mcpu`` version, you can use -``-Xclang -target-feature -Xclang +alu32``. - 64-bit immediate instructions ----------------------------- -Instructions with the ``BPF_IMM`` mode modifier use the wide instruction +Instructions with the ``BPF_IMM`` 'mode' modifier use the wide instruction encoding for an extra imm64 value. There is currently only one such instruction. @@ -331,36 +323,6 @@ There is currently only one such instruction. Legacy BPF Packet access instructions ------------------------------------- -eBPF has special instructions for access to packet data that have been -carried over from classic BPF to retain the performance of legacy socket -filters running in the eBPF interpreter. - -The instructions come in two forms: ``BPF_ABS | <size> | BPF_LD`` and -``BPF_IND | <size> | BPF_LD``. - -These instructions are used to access packet data and can only be used when -the program context is a pointer to networking packet. ``BPF_ABS`` -accesses packet data at an absolute offset specified by the immediate data -and ``BPF_IND`` access packet data at an offset that includes the value of -a register in addition to the immediate data. - -These instructions have seven implicit operands: - - * Register R6 is an implicit input that must contain pointer to a - struct sk_buff. - * Register R0 is an implicit output which contains the data fetched from - the packet. - * Registers R1-R5 are scratch registers that are clobbered after a call to - ``BPF_ABS | BPF_LD`` or ``BPF_IND | BPF_LD`` instructions. - -These instructions have an implicit program exit condition as well. When an -eBPF program is trying to access the data beyond the packet boundary, the -program execution will be aborted. - -``BPF_ABS | BPF_W | BPF_LD`` means:: - - R0 = ntohl(*(u32 *) (((struct sk_buff *) R6)->data + imm32)) - -``BPF_IND | BPF_W | BPF_LD`` means:: - - R0 = ntohl(*(u32 *) (((struct sk_buff *) R6)->data + src_reg + imm32)) +eBPF previously introduced special instructions for access to packet data that were +carried over from classic BPF. However, these instructions are +deprecated and should no longer be used. diff --git a/Documentation/bpf/kfuncs.rst b/Documentation/bpf/kfuncs.rst index 781731749e55..0f858156371d 100644 --- a/Documentation/bpf/kfuncs.rst +++ b/Documentation/bpf/kfuncs.rst @@ -137,14 +137,22 @@ KF_ACQUIRE and KF_RET_NULL flags. -------------------------- The KF_TRUSTED_ARGS flag is used for kfuncs taking pointer arguments. It -indicates that the all pointer arguments will always be refcounted, and have -their offset set to 0. It can be used to enforce that a pointer to a refcounted -object acquired from a kfunc or BPF helper is passed as an argument to this -kfunc without any modifications (e.g. pointer arithmetic) such that it is -trusted and points to the original object. This flag is often used for kfuncs -that operate (change some property, perform some operation) on an object that -was obtained using an acquire kfunc. Such kfuncs need an unchanged pointer to -ensure the integrity of the operation being performed on the expected object. +indicates that the all pointer arguments will always have a guaranteed lifetime, +and pointers to kernel objects are always passed to helpers in their unmodified +form (as obtained from acquire kfuncs). + +It can be used to enforce that a pointer to a refcounted object acquired from a +kfunc or BPF helper is passed as an argument to this kfunc without any +modifications (e.g. pointer arithmetic) such that it is trusted and points to +the original object. + +Meanwhile, it is also allowed pass pointers to normal memory to such kfuncs, +but those can have a non-zero offset. + +This flag is often used for kfuncs that operate (change some property, perform +some operation) on an object that was obtained using an acquire kfunc. Such +kfuncs need an unchanged pointer to ensure the integrity of the operation being +performed on the expected object. 2.4.6 KF_SLEEPABLE flag ----------------------- diff --git a/Documentation/bpf/linux-notes.rst b/Documentation/bpf/linux-notes.rst new file mode 100644 index 000000000000..956b0c86699d --- /dev/null +++ b/Documentation/bpf/linux-notes.rst @@ -0,0 +1,53 @@ +.. contents:: +.. sectnum:: + +========================== +Linux implementation notes +========================== + +This document provides more details specific to the Linux kernel implementation of the eBPF instruction set. + +Byte swap instructions +====================== + +``BPF_FROM_LE`` and ``BPF_FROM_BE`` exist as aliases for ``BPF_TO_LE`` and ``BPF_TO_BE`` respectively. + +Legacy BPF Packet access instructions +===================================== + +As mentioned in the `ISA standard documentation <instruction-set.rst#legacy-bpf-packet-access-instructions>`_, +Linux has special eBPF instructions for access to packet data that have been +carried over from classic BPF to retain the performance of legacy socket +filters running in the eBPF interpreter. + +The instructions come in two forms: ``BPF_ABS | <size> | BPF_LD`` and +``BPF_IND | <size> | BPF_LD``. + +These instructions are used to access packet data and can only be used when +the program context is a pointer to a networking packet. ``BPF_ABS`` +accesses packet data at an absolute offset specified by the immediate data +and ``BPF_IND`` access packet data at an offset that includes the value of +a register in addition to the immediate data. + +These instructions have seven implicit operands: + +* Register R6 is an implicit input that must contain a pointer to a + struct sk_buff. +* Register R0 is an implicit output which contains the data fetched from + the packet. +* Registers R1-R5 are scratch registers that are clobbered by the + instruction. + +These instructions have an implicit program exit condition as well. If an +eBPF program attempts access data beyond the packet boundary, the +program execution will be aborted. + +``BPF_ABS | BPF_W | BPF_LD`` (0x20) means:: + + R0 = ntohl(*(u32 *) ((struct sk_buff *) R6->data + imm)) + +where ``ntohl()`` converts a 32-bit value from network byte order to host byte order. + +``BPF_IND | BPF_W | BPF_LD`` (0x40) means:: + + R0 = ntohl(*(u32 *) ((struct sk_buff *) R6->data + src + imm)) diff --git a/arch/arm64/net/bpf_jit_comp.c b/arch/arm64/net/bpf_jit_comp.c index 389623ae5a91..30f76178608b 100644 --- a/arch/arm64/net/bpf_jit_comp.c +++ b/arch/arm64/net/bpf_jit_comp.c @@ -1970,7 +1970,7 @@ int arch_prepare_bpf_trampoline(struct bpf_tramp_image *im, void *image, u32 flags, struct bpf_tramp_links *tlinks, void *orig_call) { - int ret; + int i, ret; int nargs = m->nr_args; int max_insns = ((long)image_end - (long)image) / AARCH64_INSN_SIZE; struct jit_ctx ctx = { @@ -1982,6 +1982,12 @@ int arch_prepare_bpf_trampoline(struct bpf_tramp_image *im, void *image, if (nargs > 8) return -ENOTSUPP; + /* don't support struct argument */ + for (i = 0; i < MAX_BPF_FUNC_ARGS; i++) { + if (m->arg_flags[i] & BTF_FMODEL_STRUCT_ARG) + return -ENOTSUPP; + } + ret = prepare_trampoline(&ctx, im, tlinks, orig_call, nargs, flags); if (ret < 0) return ret; diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index f9920f1341c8..089c20cefd2b 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -284,6 +284,7 @@ config X86 select PROC_PID_ARCH_STATUS if PROC_FS select HAVE_ARCH_NODE_DEV_GROUP if X86_SGX imply IMA_SECURE_AND_OR_TRUSTED_BOOT if EFI + select HAVE_DYNAMIC_FTRACE_NO_PATCHABLE config INSTRUCTION_DECODER def_bool y diff --git a/arch/x86/net/bpf_jit_comp.c b/arch/x86/net/bpf_jit_comp.c index c1f6c1c51d99..5b6230779cf3 100644 --- a/arch/x86/net/bpf_jit_comp.c +++ b/arch/x86/net/bpf_jit_comp.c @@ -662,7 +662,7 @@ static void emit_mov_imm64(u8 **pprog, u32 dst_reg, */ emit_mov_imm32(&prog, false, dst_reg, imm32_lo); } else { - /* movabsq %rax, imm64 */ + /* movabsq rax, imm64 */ EMIT2(add_1mod(0x48, dst_reg), add_1reg(0xB8, dst_reg)); EMIT(imm32_lo, 4); EMIT(imm32_hi, 4); @@ -1751,34 +1751,60 @@ emit_jmp: static void save_regs(const struct btf_func_model *m, u8 **prog, int nr_args, int stack_size) { - int i; + int i, j, arg_size, nr_regs; /* Store function arguments to stack. * For a function that accepts two pointers the sequence will be: * mov QWORD PTR [rbp-0x10],rdi * mov QWORD PTR [rbp-0x8],rsi */ - for (i = 0; i < min(nr_args, 6); i++) - emit_stx(prog, bytes_to_bpf_size(m->arg_size[i]), - BPF_REG_FP, - i == 5 ? X86_REG_R9 : BPF_REG_1 + i, - -(stack_size - i * 8)); + for (i = 0, j = 0; i < min(nr_args, 6); i++) { + if (m->arg_flags[i] & BTF_FMODEL_STRUCT_ARG) { + nr_regs = (m->arg_size[i] + 7) / 8; + arg_size = 8; + } else { + nr_regs = 1; + arg_size = m->arg_size[i]; + } + + while (nr_regs) { + emit_stx(prog, bytes_to_bpf_size(arg_size), + BPF_REG_FP, + j == 5 ? X86_REG_R9 : BPF_REG_1 + j, + -(stack_size - j * 8)); + nr_regs--; + j++; + } + } } static void restore_regs(const struct btf_func_model *m, u8 **prog, int nr_args, int stack_size) { - int i; + int i, j, arg_size, nr_regs; /* Restore function arguments from stack. * For a function that accepts two pointers the sequence will be: * EMIT4(0x48, 0x8B, 0x7D, 0xF0); mov rdi,QWORD PTR [rbp-0x10] * EMIT4(0x48, 0x8B, 0x75, 0xF8); mov rsi,QWORD PTR [rbp-0x8] */ - for (i = 0; i < min(nr_args, 6); i++) - emit_ldx(prog, bytes_to_bpf_size(m->arg_size[i]), - i == 5 ? X86_REG_R9 : BPF_REG_1 + i, - BPF_REG_FP, - -(stack_size - i * 8)); + for (i = 0, j = 0; i < min(nr_args, 6); i++) { + if (m->arg_flags[i] & BTF_FMODEL_STRUCT_ARG) { + nr_regs = (m->arg_size[i] + 7) / 8; + arg_size = 8; + } else { + nr_regs = 1; + arg_size = m->arg_size[i]; + } + + while (nr_regs) { + emit_ldx(prog, bytes_to_bpf_size(arg_size), + j == 5 ? X86_REG_R9 : BPF_REG_1 + j, + BPF_REG_FP, + -(stack_size - j * 8)); + nr_regs--; + j++; + } + } } static int invoke_bpf_prog(const struct btf_func_model *m, u8 **pprog, @@ -1810,6 +1836,9 @@ static int invoke_bpf_prog(const struct btf_func_model *m, u8 **pprog, if (p->aux->sleepable) { enter = __bpf_prog_enter_sleepable; exit = __bpf_prog_exit_sleepable; + } else if (p->type == BPF_PROG_TYPE_STRUCT_OPS) { + enter = __bpf_prog_enter_struct_ops; + exit = __bpf_prog_exit_struct_ops; } else if (p->expected_attach_type == BPF_LSM_CGROUP) { enter = __bpf_prog_enter_lsm_cgroup; exit = __bpf_prog_exit_lsm_cgroup; @@ -2013,13 +2042,14 @@ static int invoke_bpf_mod_ret(const struct btf_func_model *m, u8 **pprog, int arch_prepare_bpf_trampoline(struct bpf_tramp_image *im, void *image, void *image_end, const struct btf_func_model *m, u32 flags, struct bpf_tramp_links *tlinks, - void *orig_call) + void *func_addr) { - int ret, i, nr_args = m->nr_args; + int ret, i, nr_args = m->nr_args, extra_nregs = 0; int regs_off, ip_off, args_off, stack_size = nr_args * 8, run_ctx_off; struct bpf_tramp_links *fentry = &tlinks[BPF_TRAMP_FENTRY]; struct bpf_tramp_links *fexit = &tlinks[BPF_TRAMP_FEXIT]; struct bpf_tramp_links *fmod_ret = &tlinks[BPF_TRAMP_MODIFY_RETURN]; + void *orig_call = func_addr; u8 **branches = NULL; u8 *prog; bool save_ret; @@ -2028,6 +2058,14 @@ int arch_prepare_bpf_trampoline(struct bpf_tramp_image *im, void *image, void *i if (nr_args > 6) return -ENOTSUPP; + for (i = 0; i < MAX_BPF_FUNC_ARGS; i++) { + if (m->arg_flags[i] & BTF_FMODEL_STRUCT_ARG) + extra_nregs += (m->arg_size[i] + 7) / 8 - 1; + } + if (nr_args + extra_nregs > 6) + return -ENOTSUPP; + stack_size += extra_nregs * 8; + /* Generated trampoline stack layout: * * RBP + 8 [ return address ] @@ -2040,7 +2078,7 @@ int arch_prepare_bpf_trampoline(struct bpf_tramp_image *im, void *image, void *i * [ ... ] * RBP - regs_off [ reg_arg1 ] program's ctx pointer * - * RBP - args_off [ args count ] always + * RBP - args_off [ arg regs count ] always * * RBP - ip_off [ traced function ] BPF_TRAMP_F_IP_ARG flag * @@ -2083,21 +2121,19 @@ int arch_prepare_bpf_trampoline(struct bpf_tramp_image *im, void *image, void *i EMIT4(0x48, 0x83, 0xEC, stack_size); /* sub rsp, stack_size */ EMIT1(0x53); /* push rbx */ - /* Store number of arguments of the traced function: - * mov rax, nr_args + /* Store number of argument registers of the traced function: + * mov rax, nr_args + extra_nregs * mov QWORD PTR [rbp - args_off], rax */ - emit_mov_imm64(&prog, BPF_REG_0, 0, (u32) nr_args); + emit_mov_imm64(&prog, BPF_REG_0, 0, (u32) nr_args + extra_nregs); emit_stx(&prog, BPF_DW, BPF_REG_FP, BPF_REG_0, -args_off); if (flags & BPF_TRAMP_F_IP_ARG) { /* Store IP address of the traced function: - * mov rax, QWORD PTR [rbp + 8] - * sub rax, X86_PATCH_SIZE + * movabsq rax, func_addr * mov QWORD PTR [rbp - ip_off], rax */ - emit_ldx(&prog, BPF_DW, BPF_REG_0, BPF_REG_FP, 8); - EMIT4(0x48, 0x83, 0xe8, X86_PATCH_SIZE); + emit_mov_imm64(&prog, BPF_REG_0, (long) func_addr >> 32, (u32) (long) func_addr); emit_stx(&prog, BPF_DW, BPF_REG_FP, BPF_REG_0, -ip_off); } @@ -2209,7 +2245,7 @@ cleanup: return ret; } -static int emit_bpf_dispatcher(u8 **pprog, int a, int b, s64 *progs) +static int emit_bpf_dispatcher(u8 **pprog, int a, int b, s64 *progs, u8 *image, u8 *buf) { u8 *jg_reloc, *prog = *pprog; int pivot, err, jg_bytes = 1; @@ -2225,12 +2261,12 @@ static int emit_bpf_dispatcher(u8 **pprog, int a, int b, s64 *progs) EMIT2_off32(0x81, add_1reg(0xF8, BPF_REG_3), progs[a]); err = emit_cond_near_jump(&prog, /* je func */ - (void *)progs[a], prog, + (void *)progs[a], image + (prog - buf), X86_JE); if (err) return err; - emit_indirect_jump(&prog, 2 /* rdx */, prog); + emit_indirect_jump(&prog, 2 /* rdx */, image + (prog - buf)); *pprog = prog; return 0; @@ -2255,7 +2291,7 @@ static int emit_bpf_dispatcher(u8 **pprog, int a, int b, s64 *progs) jg_reloc = prog; err = emit_bpf_dispatcher(&prog, a, a + pivot, /* emit lower_part */ - progs); + progs, image, buf); if (err) return err; @@ -2269,7 +2305,7 @@ static int emit_bpf_dispatcher(u8 **pprog, int a, int b, s64 *progs) emit_code(jg_reloc - jg_bytes, jg_offset, jg_bytes); err = emit_bpf_dispatcher(&prog, a + pivot + 1, /* emit upper_part */ - b, progs); + b, progs, image, buf); if (err) return err; @@ -2289,12 +2325,12 @@ static int cmp_ips(const void *a, const void *b) return 0; } -int arch_prepare_bpf_dispatcher(void *image, s64 *funcs, int num_funcs) +int arch_prepare_bpf_dispatcher(void *image, void *buf, s64 *funcs, int num_funcs) { - u8 *prog = image; + u8 *prog = buf; sort(funcs, num_funcs, sizeof(funcs[0]), cmp_ips, NULL); - return emit_bpf_dispatcher(&prog, 0, num_funcs - 1, funcs); + return emit_bpf_dispatcher(&prog, 0, num_funcs - 1, funcs, image, buf); } struct x64_jit_data { diff --git a/include/asm-generic/vmlinux.lds.h b/include/asm-generic/vmlinux.lds.h index 7c90b1ab3e00..2a1a7c10cbd9 100644 --- a/include/asm-generic/vmlinux.lds.h +++ b/include/asm-generic/vmlinux.lds.h @@ -154,6 +154,14 @@ #define MEM_DISCARD(sec) *(.mem##sec) #endif +#ifndef CONFIG_HAVE_DYNAMIC_FTRACE_NO_PATCHABLE +#define KEEP_PATCHABLE KEEP(*(__patchable_function_entries)) +#define PATCHABLE_DISCARDS +#else +#define KEEP_PATCHABLE +#define PATCHABLE_DISCARDS *(__patchable_function_entries) +#endif + #ifdef CONFIG_FTRACE_MCOUNT_RECORD /* * The ftrace call sites are logged to a section whose name depends on the @@ -172,7 +180,7 @@ #define MCOUNT_REC() . = ALIGN(8); \ __start_mcount_loc = .; \ KEEP(*(__mcount_loc)) \ - KEEP(*(__patchable_function_entries)) \ + KEEP_PATCHABLE \ __stop_mcount_loc = .; \ ftrace_stub_graph = ftrace_stub; \ ftrace_ops_list_func = arch_ftrace_ops_list_func; @@ -1023,6 +1031,7 @@ #define COMMON_DISCARDS \ SANITIZER_DISCARDS \ + PATCHABLE_DISCARDS \ *(.discard) \ *(.discard.*) \ *(.modinfo) \ diff --git a/include/linux/bpf.h b/include/linux/bpf.h index 9c1674973e03..9e7d46d16032 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -280,14 +280,33 @@ static inline void check_and_init_map_value(struct bpf_map *map, void *dst) } } -/* copy everything but bpf_spin_lock and bpf_timer. There could be one of each. */ -static inline void copy_map_value(struct bpf_map *map, void *dst, void *src) +/* memcpy that is used with 8-byte aligned pointers, power-of-8 size and + * forced to use 'long' read/writes to try to atomically copy long counters. + * Best-effort only. No barriers here, since it _will_ race with concurrent + * updates from BPF programs. Called from bpf syscall and mostly used with + * size 8 or 16 bytes, so ask compiler to inline it. + */ +static inline void bpf_long_memcpy(void *dst, const void *src, u32 size) +{ + const long *lsrc = src; + long *ldst = dst; + + size /= sizeof(long); + while (size--) + *ldst++ = *lsrc++; +} + +/* copy everything but bpf_spin_lock, bpf_timer, and kptrs. There could be one of each. */ +static inline void __copy_map_value(struct bpf_map *map, void *dst, void *src, bool long_memcpy) { u32 curr_off = 0; int i; if (likely(!map->off_arr)) { - memcpy(dst, src, map->value_size); + if (long_memcpy) + bpf_long_memcpy(dst, src, round_up(map->value_size, 8)); + else + memcpy(dst, src, map->value_size); return; } @@ -299,6 +318,36 @@ static inline void copy_map_value(struct bpf_map *map, void *dst, void *src) } memcpy(dst + curr_off, src + curr_off, map->value_size - curr_off); } + +static inline void copy_map_value(struct bpf_map *map, void *dst, void *src) +{ + __copy_map_value(map, dst, src, false); +} + +static inline void copy_map_value_long(struct bpf_map *map, void *dst, void *src) +{ + __copy_map_value(map, dst, src, true); +} + +static inline void zero_map_value(struct bpf_map *map, void *dst) +{ + u32 curr_off = 0; + int i; + + if (likely(!map->off_arr)) { + memset(dst, 0, map->value_size); + return; + } + + for (i = 0; i < map->off_arr->cnt; i++) { + u32 next_off = map->off_arr->field_off[i]; + + memset(dst + curr_off, 0, next_off - curr_off); + curr_off += map->off_arr->field_sz[i]; + } + memset(dst + curr_off, 0, map->value_size - curr_off); +} + void copy_map_value_locked(struct bpf_map *map, void *dst, void *src, bool lock_src); void bpf_timer_cancel_and_free(void *timer); @@ -402,7 +451,7 @@ enum bpf_type_flag { /* DYNPTR points to memory local to the bpf program. */ DYNPTR_TYPE_LOCAL = BIT(8 + BPF_BASE_TYPE_BITS), - /* DYNPTR points to a ringbuf record. */ + /* DYNPTR points to a kernel-produced ringbuf record. */ DYNPTR_TYPE_RINGBUF = BIT(9 + BPF_BASE_TYPE_BITS), /* Size is known at compile time. */ @@ -607,6 +656,7 @@ enum bpf_reg_type { PTR_TO_MEM, /* reg points to valid memory region */ PTR_TO_BUF, /* reg points to a read/write buffer */ PTR_TO_FUNC, /* reg points to a bpf program function */ + PTR_TO_DYNPTR, /* reg points to a dynptr */ __BPF_REG_TYPE_MAX, /* Extended reg_types. */ @@ -727,10 +777,14 @@ enum bpf_cgroup_storage_type { */ #define MAX_BPF_FUNC_REG_ARGS 5 +/* The argument is a structure. */ +#define BTF_FMODEL_STRUCT_ARG BIT(0) + struct btf_func_model { u8 ret_size; u8 nr_args; u8 arg_size[MAX_BPF_FUNC_ARGS]; + u8 arg_flags[MAX_BPF_FUNC_ARGS]; }; /* Restore arguments before returning from trampoline to let original function @@ -810,6 +864,10 @@ u64 notrace __bpf_prog_enter_lsm_cgroup(struct bpf_prog *prog, struct bpf_tramp_run_ctx *run_ctx); void notrace __bpf_prog_exit_lsm_cgroup(struct bpf_prog *prog, u64 start, struct bpf_tramp_run_ctx *run_ctx); +u64 notrace __bpf_prog_enter_struct_ops(struct bpf_prog *prog, + struct bpf_tramp_run_ctx *run_ctx); +void notrace __bpf_prog_exit_struct_ops(struct bpf_prog *prog, u64 start, + struct bpf_tramp_run_ctx *run_ctx); void notrace __bpf_tramp_enter(struct bpf_tramp_image *tr); void notrace __bpf_tramp_exit(struct bpf_tramp_image *tr); @@ -892,6 +950,7 @@ struct bpf_dispatcher { struct bpf_dispatcher_prog progs[BPF_DISPATCHER_MAX]; int num_progs; void *image; + void *rw_image; u32 image_off; struct bpf_ksym ksym; }; @@ -910,7 +969,7 @@ int bpf_trampoline_unlink_prog(struct bpf_tramp_link *link, struct bpf_trampolin struct bpf_trampoline *bpf_trampoline_get(u64 key, struct bpf_attach_target_info *tgt_info); void bpf_trampoline_put(struct bpf_trampoline *tr); -int arch_prepare_bpf_dispatcher(void *image, s64 *funcs, int num_funcs); +int arch_prepare_bpf_dispatcher(void *image, void *buf, s64 *funcs, int num_funcs); #define BPF_DISPATCHER_INIT(_name) { \ .mutex = __MUTEX_INITIALIZER(_name.mutex), \ .func = &_name##_func, \ @@ -924,7 +983,14 @@ int arch_prepare_bpf_dispatcher(void *image, s64 *funcs, int num_funcs); }, \ } +#ifdef CONFIG_X86_64 +#define BPF_DISPATCHER_ATTRIBUTES __attribute__((patchable_function_entry(5))) +#else +#define BPF_DISPATCHER_ATTRIBUTES +#endif + #define DEFINE_BPF_DISPATCHER(name) \ + notrace BPF_DISPATCHER_ATTRIBUTES \ noinline __nocfi unsigned int bpf_dispatcher_##name##_func( \ const void *ctx, \ const struct bpf_insn *insnsi, \ @@ -946,7 +1012,6 @@ int arch_prepare_bpf_dispatcher(void *image, s64 *funcs, int num_funcs); void bpf_dispatcher_change_prog(struct bpf_dispatcher *d, struct bpf_prog *from, struct bpf_prog *to); /* Called only from JIT-enabled code, so there's no need for stubs. */ -void *bpf_jit_alloc_exec_page(void); void bpf_image_ksym_add(void *data, struct bpf_ksym *ksym); void bpf_image_ksym_del(struct bpf_ksym *ksym); void bpf_ksym_add(struct bpf_ksym *ksym); @@ -1334,6 +1399,11 @@ struct bpf_array { #define BPF_MAP_CAN_READ BIT(0) #define BPF_MAP_CAN_WRITE BIT(1) +/* Maximum number of user-producer ring buffer samples that can be drained in + * a call to bpf_user_ringbuf_drain(). + */ +#define BPF_MAX_USER_RINGBUF_SAMPLES (128 * 1024) + static inline u32 bpf_map_flags_to_cap(struct bpf_map *map) { u32 access_flags = map->map_flags & (BPF_F_RDONLY_PROG | BPF_F_WRONLY_PROG); @@ -1730,6 +1800,27 @@ int bpf_obj_get_user(const char __user *pathname, int flags); extern int bpf_iter_ ## target(args); \ int __init bpf_iter_ ## target(args) { return 0; } +/* + * The task type of iterators. + * + * For BPF task iterators, they can be parameterized with various + * parameters to visit only some of tasks. + * + * BPF_TASK_ITER_ALL (default) + * Iterate over resources of every task. + * + * BPF_TASK_ITER_TID + * Iterate over resources of a task/tid. + * + * BPF_TASK_ITER_TGID + * Iterate over resources of every task of a process / task group. + */ +enum bpf_iter_task_type { + BPF_TASK_ITER_ALL = 0, + BPF_TASK_ITER_TID, + BPF_TASK_ITER_TGID, +}; + struct bpf_iter_aux_info { /* for map_elem iter */ struct bpf_map *map; @@ -1739,6 +1830,10 @@ struct bpf_iter_aux_info { struct cgroup *start; /* starting cgroup */ enum bpf_cgroup_iter_order order; } cgroup; + struct { + enum bpf_iter_task_type type; + u32 pid; + } task; }; typedef int (*bpf_iter_attach_target_t)(struct bpf_prog *prog, @@ -1823,22 +1918,6 @@ int bpf_get_file_flag(int flags); int bpf_check_uarg_tail_zero(bpfptr_t uaddr, size_t expected_size, size_t actual_size); -/* memcpy that is used with 8-byte aligned pointers, power-of-8 size and - * forced to use 'long' read/writes to try to atomically copy long counters. - * Best-effort only. No barriers here, since it _will_ race with concurrent - * updates from BPF programs. Called from bpf syscall and mostly used with - * size 8 or 16 bytes, so ask compiler to inline it. - */ -static inline void bpf_long_memcpy(void *dst, const void *src, u32 size) -{ - const long *lsrc = src; - long *ldst = dst; - - size /= sizeof(long); - while (size--) - *ldst++ = *lsrc++; -} - /* verify correctness of eBPF program */ int bpf_check(struct bpf_prog **fp, union bpf_attr *attr, bpfptr_t uattr); @@ -1940,13 +2019,22 @@ int btf_distill_func_proto(struct bpf_verifier_log *log, const char *func_name, struct btf_func_model *m); +struct bpf_kfunc_arg_meta { + u64 r0_size; + bool r0_rdonly; + int ref_obj_id; + u32 flags; +}; + struct bpf_reg_state; int btf_check_subprog_arg_match(struct bpf_verifier_env *env, int subprog, struct bpf_reg_state *regs); +int btf_check_subprog_call(struct bpf_verifier_env *env, int subprog, + struct bpf_reg_state *regs); int btf_check_kfunc_arg_match(struct bpf_verifier_env *env, const struct btf *btf, u32 func_id, struct bpf_reg_state *regs, - u32 kfunc_flags); + struct bpf_kfunc_arg_meta *meta); int btf_prepare_func_args(struct bpf_verifier_env *env, int subprog, struct bpf_reg_state *reg); int btf_check_type_match(struct bpf_verifier_log *log, const struct bpf_prog *prog, @@ -1983,6 +2071,8 @@ static inline bool has_current_bpf_ctx(void) { return !!current->bpf_ctx; } + +void notrace bpf_prog_inc_misses_counter(struct bpf_prog *prog); #else /* !CONFIG_BPF_SYSCALL */ static inline struct bpf_prog *bpf_prog_get(u32 ufd) { @@ -2165,6 +2255,15 @@ static inline struct bpf_prog *bpf_prog_by_id(u32 id) return ERR_PTR(-ENOTSUPP); } +static inline int btf_struct_access(struct bpf_verifier_log *log, + const struct btf *btf, + const struct btf_type *t, int off, int size, + enum bpf_access_type atype, + u32 *next_btf_id, enum bpf_type_flag *flag) +{ + return -EACCES; +} + static inline const struct bpf_func_proto * bpf_base_func_proto(enum bpf_func_id func_id) { @@ -2196,6 +2295,10 @@ static inline bool has_current_bpf_ctx(void) { return false; } + +static inline void bpf_prog_inc_misses_counter(struct bpf_prog *prog) +{ +} #endif /* CONFIG_BPF_SYSCALL */ void __bpf_free_used_btfs(struct bpf_prog_aux *aux, @@ -2433,6 +2536,7 @@ extern const struct bpf_func_proto bpf_loop_proto; extern const struct bpf_func_proto bpf_copy_from_user_task_proto; extern const struct bpf_func_proto bpf_set_retval_proto; extern const struct bpf_func_proto bpf_get_retval_proto; +extern const struct bpf_func_proto bpf_user_ringbuf_drain_proto; const struct bpf_func_proto *tracing_prog_func_proto( enum bpf_func_id func_id, const struct bpf_prog *prog); @@ -2577,7 +2681,7 @@ enum bpf_dynptr_type { BPF_DYNPTR_TYPE_INVALID, /* Points to memory that is local to the bpf program */ BPF_DYNPTR_TYPE_LOCAL, - /* Underlying data is a ringbuf record */ + /* Underlying data is a kernel-produced ringbuf record */ BPF_DYNPTR_TYPE_RINGBUF, }; @@ -2585,6 +2689,7 @@ void bpf_dynptr_init(struct bpf_dynptr_kern *ptr, void *data, enum bpf_dynptr_type type, u32 offset, u32 size); void bpf_dynptr_set_null(struct bpf_dynptr_kern *ptr); int bpf_dynptr_check_size(u32 size); +u32 bpf_dynptr_get_size(struct bpf_dynptr_kern *ptr); #ifdef CONFIG_BPF_LSM void bpf_cgroup_atype_get(u32 attach_btf_id, int cgroup_atype); @@ -2594,4 +2699,12 @@ static inline void bpf_cgroup_atype_get(u32 attach_btf_id, int cgroup_atype) {} static inline void bpf_cgroup_atype_put(int cgroup_atype) {} #endif /* CONFIG_BPF_LSM */ +struct key; + +#ifdef CONFIG_KEYS +struct bpf_key { + struct key *key; + bool has_ref; +}; +#endif /* CONFIG_KEYS */ #endif /* _LINUX_BPF_H */ diff --git a/include/linux/bpf_types.h b/include/linux/bpf_types.h index 2b9112b80171..2c6a4f2562a7 100644 --- a/include/linux/bpf_types.h +++ b/include/linux/bpf_types.h @@ -126,6 +126,7 @@ BPF_MAP_TYPE(BPF_MAP_TYPE_STRUCT_OPS, bpf_struct_ops_map_ops) #endif BPF_MAP_TYPE(BPF_MAP_TYPE_RINGBUF, ringbuf_map_ops) BPF_MAP_TYPE(BPF_MAP_TYPE_BLOOM_FILTER, bloom_filter_map_ops) +BPF_MAP_TYPE(BPF_MAP_TYPE_USER_RINGBUF, user_ringbuf_map_ops) BPF_LINK_TYPE(BPF_LINK_TYPE_RAW_TRACEPOINT, raw_tracepoint) BPF_LINK_TYPE(BPF_LINK_TYPE_TRACING, tracing) diff --git a/include/linux/bpf_verifier.h b/include/linux/bpf_verifier.h index 1fdddbf3546b..9e1e6965f407 100644 --- a/include/linux/bpf_verifier.h +++ b/include/linux/bpf_verifier.h @@ -248,6 +248,7 @@ struct bpf_func_state { */ u32 async_entry_cnt; bool in_callback_fn; + struct tnum callback_ret_range; bool in_async_callback_fn; /* The following fields should be last. See copy_func_state() */ @@ -348,6 +349,27 @@ struct bpf_verifier_state { iter < frame->allocated_stack / BPF_REG_SIZE; \ iter++, reg = bpf_get_spilled_reg(iter, frame)) +/* Invoke __expr over regsiters in __vst, setting __state and __reg */ +#define bpf_for_each_reg_in_vstate(__vst, __state, __reg, __expr) \ + ({ \ + struct bpf_verifier_state *___vstate = __vst; \ + int ___i, ___j; \ + for (___i = 0; ___i <= ___vstate->curframe; ___i++) { \ + struct bpf_reg_state *___regs; \ + __state = ___vstate->frame[___i]; \ + ___regs = __state->regs; \ + for (___j = 0; ___j < MAX_BPF_REG; ___j++) { \ + __reg = &___regs[___j]; \ + (void)(__expr); \ + } \ + bpf_for_each_spilled_reg(___j, __state, __reg) { \ + if (!__reg) \ + continue; \ + (void)(__expr); \ + } \ + } \ + }) + /* linked list of verifier states used to prune search */ struct bpf_verifier_state_list { struct bpf_verifier_state state; @@ -571,6 +593,11 @@ int check_kfunc_mem_size_reg(struct bpf_verifier_env *env, struct bpf_reg_state u32 regno); int check_mem_reg(struct bpf_verifier_env *env, struct bpf_reg_state *reg, u32 regno, u32 mem_size); +bool is_dynptr_reg_valid_init(struct bpf_verifier_env *env, + struct bpf_reg_state *reg); +bool is_dynptr_type_expected(struct bpf_verifier_env *env, + struct bpf_reg_state *reg, + enum bpf_arg_type arg_type); /* this lives here instead of in bpf.h because it needs to dereference tgt_prog */ static inline u64 bpf_trampoline_compute_key(const struct bpf_prog *tgt_prog, @@ -598,6 +625,8 @@ int bpf_check_attach_target(struct bpf_verifier_log *log, struct bpf_attach_target_info *tgt_info); void bpf_free_kfunc_btf_tab(struct bpf_kfunc_btf_tab *tab); +int mark_chain_precision(struct bpf_verifier_env *env, int regno); + #define BPF_BASE_TYPE_MASK GENMASK(BPF_BASE_TYPE_BITS - 1, 0) /* extract base type from bpf_{arg, return, reg}_type. */ diff --git a/include/linux/btf.h b/include/linux/btf.h index ad93c2d9cc1c..f9aababc5d78 100644 --- a/include/linux/btf.h +++ b/include/linux/btf.h @@ -52,6 +52,15 @@ #define KF_SLEEPABLE (1 << 5) /* kfunc may sleep */ #define KF_DESTRUCTIVE (1 << 6) /* kfunc performs destructive actions */ +/* + * Return the name of the passed struct, if exists, or halt the build if for + * example the structure gets renamed. In this way, developers have to revisit + * the code using that structure name, and update it accordingly. + */ +#define stringify_struct(x) \ + ({ BUILD_BUG_ON(sizeof(struct x) < 0); \ + __stringify(x); }) + struct btf; struct btf_member; struct btf_type; @@ -441,4 +450,14 @@ static inline int register_btf_id_dtor_kfuncs(const struct btf_id_dtor_kfunc *dt } #endif +static inline bool btf_type_is_struct_ptr(struct btf *btf, const struct btf_type *t) +{ + if (!btf_type_is_ptr(t)) + return false; + + t = btf_type_skip_modifiers(btf, t->type, NULL); + + return btf_type_is_struct(t); +} + #endif diff --git a/include/linux/filter.h b/include/linux/filter.h index 527ae1d64e27..efc42a6e3aed 100644 --- a/include/linux/filter.h +++ b/include/linux/filter.h @@ -567,6 +567,12 @@ struct sk_filter { DECLARE_STATIC_KEY_FALSE(bpf_stats_enabled_key); +extern struct mutex nf_conn_btf_access_lock; +extern int (*nfct_btf_struct_access)(struct bpf_verifier_log *log, const struct btf *btf, + const struct btf_type *t, int off, int size, + enum bpf_access_type atype, u32 *next_btf_id, + enum bpf_type_flag *flag); + typedef unsigned int (*bpf_dispatcher_fn)(const void *ctx, const struct bpf_insn *insnsi, unsigned int (*bpf_func)(const void *, @@ -1017,6 +1023,8 @@ extern long bpf_jit_limit_max; typedef void (*bpf_jit_fill_hole_t)(void *area, unsigned int size); +void bpf_jit_fill_hole_with_zero(void *area, unsigned int size); + struct bpf_binary_header * bpf_jit_binary_alloc(unsigned int proglen, u8 **image_ptr, unsigned int alignment, @@ -1029,6 +1037,9 @@ void bpf_jit_free(struct bpf_prog *fp); struct bpf_binary_header * bpf_jit_binary_pack_hdr(const struct bpf_prog *fp); +void *bpf_prog_pack_alloc(u32 size, bpf_jit_fill_hole_t bpf_fill_ill_insns); +void bpf_prog_pack_free(struct bpf_binary_header *hdr); + static inline bool bpf_prog_kallsyms_verify_off(const struct bpf_prog *fp) { return list_empty(&fp->aux->ksym.lnode) || @@ -1099,7 +1110,7 @@ static inline bool bpf_jit_blinding_enabled(struct bpf_prog *prog) return false; if (!bpf_jit_harden) return false; - if (bpf_jit_harden == 1 && capable(CAP_SYS_ADMIN)) + if (bpf_jit_harden == 1 && bpf_capable()) return false; return true; diff --git a/include/linux/key.h b/include/linux/key.h index 7febc4881363..d27477faf00d 100644 --- a/include/linux/key.h +++ b/include/linux/key.h @@ -88,6 +88,12 @@ enum key_need_perm { KEY_DEFER_PERM_CHECK, /* Special: permission check is deferred */ }; +enum key_lookup_flag { + KEY_LOOKUP_CREATE = 0x01, + KEY_LOOKUP_PARTIAL = 0x02, + KEY_LOOKUP_ALL = (KEY_LOOKUP_CREATE | KEY_LOOKUP_PARTIAL), +}; + struct seq_file; struct user_struct; struct signal_struct; diff --git a/include/linux/kprobes.h b/include/linux/kprobes.h index 55041d2f884d..a0b92be98984 100644 --- a/include/linux/kprobes.h +++ b/include/linux/kprobes.h @@ -103,6 +103,7 @@ struct kprobe { * this flag is only for optimized_kprobe. */ #define KPROBE_FLAG_FTRACE 8 /* probe is using ftrace */ +#define KPROBE_FLAG_ON_FUNC_ENTRY 16 /* probe is on the function entry */ /* Has this kprobe gone ? */ static inline bool kprobe_gone(struct kprobe *p) diff --git a/include/linux/poison.h b/include/linux/poison.h index d62ef5a6b4e9..2d3249eb0e62 100644 --- a/include/linux/poison.h +++ b/include/linux/poison.h @@ -81,4 +81,7 @@ /********** net/core/page_pool.c **********/ #define PP_SIGNATURE (0x40 + POISON_POINTER_DELTA) +/********** kernel/bpf/ **********/ +#define BPF_PTR_POISON ((void *)(0xeB9FUL + POISON_POINTER_DELTA)) + #endif diff --git a/include/linux/tcp.h b/include/linux/tcp.h index a9fbe22732c3..3bdf687e2fb3 100644 --- a/include/linux/tcp.h +++ b/include/linux/tcp.h @@ -388,6 +388,12 @@ struct tcp_sock { u8 bpf_sock_ops_cb_flags; /* Control calling BPF programs * values defined in uapi/linux/tcp.h */ + u8 bpf_chg_cc_inprogress:1; /* In the middle of + * bpf_setsockopt(TCP_CONGESTION), + * it is to avoid the bpf_tcp_cc->init() + * to recur itself by calling + * bpf_setsockopt(TCP_CONGESTION, "itself"). + */ #define BPF_SOCK_OPS_TEST_FLAG(TP, ARG) (TP->bpf_sock_ops_cb_flags & ARG) #else #define BPF_SOCK_OPS_TEST_FLAG(TP, ARG) 0 diff --git a/include/linux/verification.h b/include/linux/verification.h index a655923335ae..f34e50ebcf60 100644 --- a/include/linux/verification.h +++ b/include/linux/verification.h @@ -17,6 +17,14 @@ #define VERIFY_USE_SECONDARY_KEYRING ((struct key *)1UL) #define VERIFY_USE_PLATFORM_KEYRING ((struct key *)2UL) +static inline int system_keyring_id_check(u64 id) +{ + if (id > (unsigned long)VERIFY_USE_PLATFORM_KEYRING) + return -EINVAL; + + return 0; +} + /* * The use to which an asymmetric key is being put. */ diff --git a/include/net/netfilter/nf_conntrack_bpf.h b/include/net/netfilter/nf_conntrack_bpf.h index a473b56842c5..2d0da478c8e0 100644 --- a/include/net/netfilter/nf_conntrack_bpf.h +++ b/include/net/netfilter/nf_conntrack_bpf.h @@ -3,13 +3,18 @@ #ifndef _NF_CONNTRACK_BPF_H #define _NF_CONNTRACK_BPF_H -#include <linux/btf.h> #include <linux/kconfig.h> +#include <net/netfilter/nf_conntrack.h> + +struct nf_conn___init { + struct nf_conn ct; +}; #if (IS_BUILTIN(CONFIG_NF_CONNTRACK) && IS_ENABLED(CONFIG_DEBUG_INFO_BTF)) || \ (IS_MODULE(CONFIG_NF_CONNTRACK) && IS_ENABLED(CONFIG_DEBUG_INFO_BTF_MODULES)) extern int register_nf_conntrack_bpf(void); +extern void cleanup_nf_conntrack_bpf(void); #else @@ -18,6 +23,24 @@ static inline int register_nf_conntrack_bpf(void) return 0; } +static inline void cleanup_nf_conntrack_bpf(void) +{ +} + +#endif + +#if (IS_BUILTIN(CONFIG_NF_NAT) && IS_ENABLED(CONFIG_DEBUG_INFO_BTF)) || \ + (IS_MODULE(CONFIG_NF_NAT) && IS_ENABLED(CONFIG_DEBUG_INFO_BTF_MODULES)) + +extern int register_nf_nat_bpf(void); + +#else + +static inline int register_nf_nat_bpf(void) +{ + return 0; +} + #endif #endif /* _NF_CONNTRACK_BPF_H */ diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index 793103b10eab..3075018a4ef8 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -110,6 +110,12 @@ union bpf_iter_link_info { __u32 cgroup_fd; __u64 cgroup_id; } cgroup; + /* Parameters of task iterators. */ + struct { + __u32 tid; + __u32 pid; + __u32 pid_fd; + } task; }; /* BPF syscall commands, see bpf(2) man-page for more details. */ @@ -928,6 +934,7 @@ enum bpf_map_type { BPF_MAP_TYPE_INODE_STORAGE, BPF_MAP_TYPE_TASK_STORAGE, BPF_MAP_TYPE_BLOOM_FILTER, + BPF_MAP_TYPE_USER_RINGBUF, }; /* Note that tracing related programs such as @@ -4950,6 +4957,7 @@ union bpf_attr { * Get address of the traced function (for tracing and kprobe programs). * Return * Address of the traced function. + * 0 for kprobes placed within the function (not at the entry). * * u64 bpf_get_attach_cookie(void *ctx) * Description @@ -5079,12 +5087,12 @@ union bpf_attr { * * long bpf_get_func_arg(void *ctx, u32 n, u64 *value) * Description - * Get **n**-th argument (zero based) of the traced function (for tracing programs) + * Get **n**-th argument register (zero based) of the traced function (for tracing programs) * returned in **value**. * * Return * 0 on success. - * **-EINVAL** if n >= arguments count of traced function. + * **-EINVAL** if n >= argument register count of traced function. * * long bpf_get_func_ret(void *ctx, u64 *value) * Description @@ -5097,10 +5105,11 @@ union bpf_attr { * * long bpf_get_func_arg_cnt(void *ctx) * Description - * Get number of arguments of the traced function (for tracing programs). + * Get number of registers of the traced function (for tracing programs) where + * function arguments are stored in these registers. * * Return - * The number of arguments of the traced function. + * The number of argument registers of the traced function. * * int bpf_get_retval(void) * Description @@ -5386,6 +5395,43 @@ union bpf_attr { * Return * Current *ktime*. * + * long bpf_user_ringbuf_drain(struct bpf_map *map, void *callback_fn, void *ctx, u64 flags) + * Description + * Drain samples from the specified user ring buffer, and invoke + * the provided callback for each such sample: + * + * long (\*callback_fn)(struct bpf_dynptr \*dynptr, void \*ctx); + * + * If **callback_fn** returns 0, the helper will continue to try + * and drain the next sample, up to a maximum of + * BPF_MAX_USER_RINGBUF_SAMPLES samples. If the return value is 1, + * the helper will skip the rest of the samples and return. Other + * return values are not used now, and will be rejected by the + * verifier. + * Return + * The number of drained samples if no error was encountered while + * draining samples, or 0 if no samples were present in the ring + * buffer. If a user-space producer was epoll-waiting on this map, + * and at least one sample was drained, they will receive an event + * notification notifying them of available space in the ring + * buffer. If the BPF_RB_NO_WAKEUP flag is passed to this + * function, no wakeup notification will be sent. If the + * BPF_RB_FORCE_WAKEUP flag is passed, a wakeup notification will + * be sent even if no sample was drained. + * + * On failure, the returned value is one of the following: + * + * **-EBUSY** if the ring buffer is contended, and another calling + * context was concurrently draining the ring buffer. + * + * **-EINVAL** if user-space is not properly tracking the ring + * buffer due to the producer position not being aligned to 8 + * bytes, a sample not being aligned to 8 bytes, or the producer + * position not matching the advertised length of a sample. + * + * **-E2BIG** if user-space has tried to publish a sample which is + * larger than the size of the ring buffer, or which cannot fit + * within a struct bpf_dynptr. */ #define __BPF_FUNC_MAPPER(FN) \ FN(unspec), \ @@ -5597,6 +5643,7 @@ union bpf_attr { FN(tcp_raw_check_syncookie_ipv4), \ FN(tcp_raw_check_syncookie_ipv6), \ FN(ktime_get_tai_ns), \ + FN(user_ringbuf_drain), \ /* */ /* integer value in 'imm' field of BPF_CALL instruction selects which helper @@ -6218,6 +6265,10 @@ struct bpf_link_info { __u64 cgroup_id; __u32 order; } cgroup; + struct { + __u32 tid; + __u32 pid; + } task; }; } iter; struct { diff --git a/kernel/bpf/arraymap.c b/kernel/bpf/arraymap.c index 624527401d4d..832b2659e96e 100644 --- a/kernel/bpf/arraymap.c +++ b/kernel/bpf/arraymap.c @@ -279,7 +279,8 @@ int bpf_percpu_array_copy(struct bpf_map *map, void *key, void *value) rcu_read_lock(); pptr = array->pptrs[index & array->index_mask]; for_each_possible_cpu(cpu) { - bpf_long_memcpy(value + off, per_cpu_ptr(pptr, cpu), size); + copy_map_value_long(map, value + off, per_cpu_ptr(pptr, cpu)); + check_and_init_map_value(map, value + off); off += size; } rcu_read_unlock(); @@ -338,8 +339,9 @@ static int array_map_update_elem(struct bpf_map *map, void *key, void *value, return -EINVAL; if (array->map.map_type == BPF_MAP_TYPE_PERCPU_ARRAY) { - memcpy(this_cpu_ptr(array->pptrs[index & array->index_mask]), - value, map->value_size); + val = this_cpu_ptr(array->pptrs[index & array->index_mask]); + copy_map_value(map, val, value); + check_and_free_fields(array, val); } else { val = array->value + (u64)array->elem_size * (index & array->index_mask); @@ -383,7 +385,8 @@ int bpf_percpu_array_update(struct bpf_map *map, void *key, void *value, rcu_read_lock(); pptr = array->pptrs[index & array->index_mask]; for_each_possible_cpu(cpu) { - bpf_long_memcpy(per_cpu_ptr(pptr, cpu), value + off, size); + copy_map_value_long(map, per_cpu_ptr(pptr, cpu), value + off); + check_and_free_fields(array, per_cpu_ptr(pptr, cpu)); off += size; } rcu_read_unlock(); @@ -421,8 +424,20 @@ static void array_map_free(struct bpf_map *map) int i; if (map_value_has_kptrs(map)) { - for (i = 0; i < array->map.max_entries; i++) - bpf_map_free_kptrs(map, array_map_elem_ptr(array, i)); + if (array->map.map_type == BPF_MAP_TYPE_PERCPU_ARRAY) { + for (i = 0; i < array->map.max_entries; i++) { + void __percpu *pptr = array->pptrs[i & array->index_mask]; + int cpu; + + for_each_possible_cpu(cpu) { + bpf_map_free_kptrs(map, per_cpu_ptr(pptr, cpu)); + cond_resched(); + } + } + } else { + for (i = 0; i < array->map.max_entries; i++) + bpf_map_free_kptrs(map, array_map_elem_ptr(array, i)); + } bpf_map_free_kptr_off_tab(map); } @@ -608,9 +623,9 @@ static int __bpf_array_map_seq_show(struct seq_file *seq, void *v) pptr = v; size = array->elem_size; for_each_possible_cpu(cpu) { - bpf_long_memcpy(info->percpu_value_buf + off, - per_cpu_ptr(pptr, cpu), - size); + copy_map_value_long(map, info->percpu_value_buf + off, + per_cpu_ptr(pptr, cpu)); + check_and_init_map_value(map, info->percpu_value_buf + off); off += size; } ctx.value = info->percpu_value_buf; diff --git a/kernel/bpf/btf.c b/kernel/bpf/btf.c index 903719b89238..a44ad4b347ff 100644 --- a/kernel/bpf/btf.c +++ b/kernel/bpf/btf.c @@ -208,7 +208,7 @@ enum btf_kfunc_hook { }; enum { - BTF_KFUNC_SET_MAX_CNT = 32, + BTF_KFUNC_SET_MAX_CNT = 256, BTF_DTOR_KFUNC_MAX_CNT = 256, }; @@ -818,6 +818,7 @@ const struct btf_type *btf_type_by_id(const struct btf *btf, u32 type_id) return NULL; return btf->types[type_id]; } +EXPORT_SYMBOL_GPL(btf_type_by_id); /* * Regular int is not a bit field and it must be either @@ -1396,7 +1397,6 @@ __printf(4, 5) static void __btf_verifier_log_type(struct btf_verifier_env *env, const char *fmt, ...) { struct bpf_verifier_log *log = &env->log; - u8 kind = BTF_INFO_KIND(t->info); struct btf *btf = env->btf; va_list args; @@ -1412,7 +1412,7 @@ __printf(4, 5) static void __btf_verifier_log_type(struct btf_verifier_env *env, __btf_verifier_log(log, "[%u] %s %s%s", env->log_type_id, - btf_kind_str[kind], + btf_type_str(t), __btf_name_by_offset(btf, t->name_off), log_details ? " " : ""); @@ -4854,7 +4854,6 @@ static int btf_parse_hdr(struct btf_verifier_env *env) u32 hdr_len, hdr_copy, btf_data_size; const struct btf_header *hdr; struct btf *btf; - int err; btf = env->btf; btf_data_size = btf->data_size; @@ -4911,11 +4910,7 @@ static int btf_parse_hdr(struct btf_verifier_env *env) return -EINVAL; } - err = btf_check_sec_info(env, btf_data_size); - if (err) - return err; - - return 0; + return btf_check_sec_info(env, btf_data_size); } static int btf_check_type_tags(struct btf_verifier_env *env, @@ -5328,6 +5323,34 @@ static bool is_int_ptr(struct btf *btf, const struct btf_type *t) return btf_type_is_int(t); } +static u32 get_ctx_arg_idx(struct btf *btf, const struct btf_type *func_proto, + int off) +{ + const struct btf_param *args; + const struct btf_type *t; + u32 offset = 0, nr_args; + int i; + + if (!func_proto) + return off / 8; + + nr_args = btf_type_vlen(func_proto); + args = (const struct btf_param *)(func_proto + 1); + for (i = 0; i < nr_args; i++) { + t = btf_type_skip_modifiers(btf, args[i].type, NULL); + offset += btf_type_is_ptr(t) ? 8 : roundup(t->size, 8); + if (off < offset) + return i; + } + + t = btf_type_skip_modifiers(btf, func_proto->type, NULL); + offset += btf_type_is_ptr(t) ? 8 : roundup(t->size, 8); + if (off < offset) + return nr_args; + + return nr_args + 1; +} + bool btf_ctx_access(int off, int size, enum bpf_access_type type, const struct bpf_prog *prog, struct bpf_insn_access_aux *info) @@ -5347,7 +5370,7 @@ bool btf_ctx_access(int off, int size, enum bpf_access_type type, tname, off); return false; } - arg = off / 8; + arg = get_ctx_arg_idx(btf, t, off); args = (const struct btf_param *)(t + 1); /* if (t == NULL) Fall back to default BPF prog with * MAX_BPF_FUNC_REG_ARGS u64 arguments. @@ -5398,7 +5421,7 @@ bool btf_ctx_access(int off, int size, enum bpf_access_type type, if (!btf_type_is_small_int(t)) { bpf_log(log, "ret type %s not allowed for fmod_ret\n", - btf_kind_str[BTF_INFO_KIND(t->info)]); + btf_type_str(t)); return false; } break; @@ -5417,7 +5440,7 @@ bool btf_ctx_access(int off, int size, enum bpf_access_type type, /* skip modifiers */ while (btf_type_is_modifier(t)) t = btf_type_by_id(btf, t->type); - if (btf_type_is_small_int(t) || btf_is_any_enum(t)) + if (btf_type_is_small_int(t) || btf_is_any_enum(t) || __btf_type_is_struct(t)) /* accessing a scalar */ return true; if (!btf_type_is_ptr(t)) { @@ -5425,7 +5448,7 @@ bool btf_ctx_access(int off, int size, enum bpf_access_type type, "func '%s' arg%d '%s' has type %s. Only pointer access is allowed\n", tname, arg, __btf_name_by_offset(btf, t->name_off), - btf_kind_str[BTF_INFO_KIND(t->info)]); + btf_type_str(t)); return false; } @@ -5509,11 +5532,11 @@ bool btf_ctx_access(int off, int size, enum bpf_access_type type, if (!btf_type_is_struct(t)) { bpf_log(log, "func '%s' arg%d type %s is not a struct\n", - tname, arg, btf_kind_str[BTF_INFO_KIND(t->info)]); + tname, arg, btf_type_str(t)); return false; } bpf_log(log, "func '%s' arg%d has btf_id %d type %s '%s'\n", - tname, arg, info->btf_id, btf_kind_str[BTF_INFO_KIND(t->info)], + tname, arg, info->btf_id, btf_type_str(t), __btf_name_by_offset(btf, t->name_off)); return true; } @@ -5881,7 +5904,7 @@ static int __get_type_size(struct btf *btf, u32 btf_id, if (btf_type_is_ptr(t)) /* kernel size of pointer. Not BPF's size of pointer*/ return sizeof(void *); - if (btf_type_is_int(t) || btf_is_any_enum(t)) + if (btf_type_is_int(t) || btf_is_any_enum(t) || __btf_type_is_struct(t)) return t->size; return -EINVAL; } @@ -5901,8 +5924,10 @@ int btf_distill_func_proto(struct bpf_verifier_log *log, /* BTF function prototype doesn't match the verifier types. * Fall back to MAX_BPF_FUNC_REG_ARGS u64 args. */ - for (i = 0; i < MAX_BPF_FUNC_REG_ARGS; i++) + for (i = 0; i < MAX_BPF_FUNC_REG_ARGS; i++) { m->arg_size[i] = 8; + m->arg_flags[i] = 0; + } m->ret_size = 8; m->nr_args = MAX_BPF_FUNC_REG_ARGS; return 0; @@ -5916,10 +5941,10 @@ int btf_distill_func_proto(struct bpf_verifier_log *log, return -EINVAL; } ret = __get_type_size(btf, func->type, &t); - if (ret < 0) { + if (ret < 0 || __btf_type_is_struct(t)) { bpf_log(log, "The function %s return type %s is unsupported.\n", - tname, btf_kind_str[BTF_INFO_KIND(t->info)]); + tname, btf_type_str(t)); return -EINVAL; } m->ret_size = ret; @@ -5932,10 +5957,12 @@ int btf_distill_func_proto(struct bpf_verifier_log *log, return -EINVAL; } ret = __get_type_size(btf, args[i].type, &t); - if (ret < 0) { + + /* No support of struct argument size greater than 16 bytes */ + if (ret < 0 || ret > 16) { bpf_log(log, "The function %s arg%d type %s is unsupported.\n", - tname, i, btf_kind_str[BTF_INFO_KIND(t->info)]); + tname, i, btf_type_str(t)); return -EINVAL; } if (ret == 0) { @@ -5945,6 +5972,7 @@ int btf_distill_func_proto(struct bpf_verifier_log *log, return -EINVAL; } m->arg_size[i] = ret; + m->arg_flags[i] = __btf_type_is_struct(t) ? BTF_FMODEL_STRUCT_ARG : 0; } m->nr_args = nargs; return 0; @@ -6166,14 +6194,40 @@ static bool is_kfunc_arg_mem_size(const struct btf *btf, return true; } +static bool btf_is_kfunc_arg_mem_size(const struct btf *btf, + const struct btf_param *arg, + const struct bpf_reg_state *reg, + const char *name) +{ + int len, target_len = strlen(name); + const struct btf_type *t; + const char *param_name; + + t = btf_type_skip_modifiers(btf, arg->type, NULL); + if (!btf_type_is_scalar(t) || reg->type != SCALAR_VALUE) + return false; + + param_name = btf_name_by_offset(btf, arg->name_off); + if (str_is_empty(param_name)) + return false; + len = strlen(param_name); + if (len != target_len) + return false; + if (strcmp(param_name, name)) + return false; + + return true; +} + static int btf_check_func_arg_match(struct bpf_verifier_env *env, const struct btf *btf, u32 func_id, struct bpf_reg_state *regs, bool ptr_to_mem_ok, - u32 kfunc_flags) + struct bpf_kfunc_arg_meta *kfunc_meta, + bool processing_call) { enum bpf_prog_type prog_type = resolve_prog_type(env->prog); - bool rel = false, kptr_get = false, trusted_arg = false; + bool rel = false, kptr_get = false, trusted_args = false; bool sleepable = false; struct bpf_verifier_log *log = &env->log; u32 i, nargs, ref_id, ref_obj_id = 0; @@ -6207,12 +6261,12 @@ static int btf_check_func_arg_match(struct bpf_verifier_env *env, return -EINVAL; } - if (is_kfunc) { + if (is_kfunc && kfunc_meta) { /* Only kfunc can be release func */ - rel = kfunc_flags & KF_RELEASE; - kptr_get = kfunc_flags & KF_KPTR_GET; - trusted_arg = kfunc_flags & KF_TRUSTED_ARGS; - sleepable = kfunc_flags & KF_SLEEPABLE; + rel = kfunc_meta->flags & KF_RELEASE; + kptr_get = kfunc_meta->flags & KF_KPTR_GET; + trusted_args = kfunc_meta->flags & KF_TRUSTED_ARGS; + sleepable = kfunc_meta->flags & KF_SLEEPABLE; } /* check that BTF function arguments match actual types that the @@ -6222,9 +6276,42 @@ static int btf_check_func_arg_match(struct bpf_verifier_env *env, enum bpf_arg_type arg_type = ARG_DONTCARE; u32 regno = i + 1; struct bpf_reg_state *reg = ®s[regno]; + bool obj_ptr = false; t = btf_type_skip_modifiers(btf, args[i].type, NULL); if (btf_type_is_scalar(t)) { + if (is_kfunc && kfunc_meta) { + bool is_buf_size = false; + + /* check for any const scalar parameter of name "rdonly_buf_size" + * or "rdwr_buf_size" + */ + if (btf_is_kfunc_arg_mem_size(btf, &args[i], reg, + "rdonly_buf_size")) { + kfunc_meta->r0_rdonly = true; + is_buf_size = true; + } else if (btf_is_kfunc_arg_mem_size(btf, &args[i], reg, + "rdwr_buf_size")) + is_buf_size = true; + + if (is_buf_size) { + if (kfunc_meta->r0_size) { + bpf_log(log, "2 or more rdonly/rdwr_buf_size parameters for kfunc"); + return -EINVAL; + } + + if (!tnum_is_const(reg->var_off)) { + bpf_log(log, "R%d is not a const\n", regno); + return -EINVAL; + } + + kfunc_meta->r0_size = reg->var_off.value; + ret = mark_chain_precision(env, regno); + if (ret) + return ret; + } + } + if (reg->type == SCALAR_VALUE) continue; bpf_log(log, "R%d is not a scalar\n", regno); @@ -6237,10 +6324,17 @@ static int btf_check_func_arg_match(struct bpf_verifier_env *env, return -EINVAL; } + /* These register types have special constraints wrt ref_obj_id + * and offset checks. The rest of trusted args don't. + */ + obj_ptr = reg->type == PTR_TO_CTX || reg->type == PTR_TO_BTF_ID || + reg2btf_ids[base_type(reg->type)]; + /* Check if argument must be a referenced pointer, args + i has * been verified to be a pointer (after skipping modifiers). + * PTR_TO_CTX is ok without having non-zero ref_obj_id. */ - if (is_kfunc && trusted_arg && !reg->ref_obj_id) { + if (is_kfunc && trusted_args && (obj_ptr && reg->type != PTR_TO_CTX) && !reg->ref_obj_id) { bpf_log(log, "R%d must be referenced\n", regno); return -EINVAL; } @@ -6249,12 +6343,23 @@ static int btf_check_func_arg_match(struct bpf_verifier_env *env, ref_tname = btf_name_by_offset(btf, ref_t->name_off); /* Trusted args have the same offset checks as release arguments */ - if (trusted_arg || (rel && reg->ref_obj_id)) + if ((trusted_args && obj_ptr) || (rel && reg->ref_obj_id)) arg_type |= OBJ_RELEASE; ret = check_func_arg_reg_off(env, reg, regno, arg_type); if (ret < 0) return ret; + if (is_kfunc && reg->ref_obj_id) { + /* Ensure only one argument is referenced PTR_TO_BTF_ID */ + if (ref_obj_id) { + bpf_log(log, "verifier internal error: more than one arg with ref_obj_id R%d %u %u\n", + regno, reg->ref_obj_id, ref_obj_id); + return -EFAULT; + } + ref_regno = regno; + ref_obj_id = reg->ref_obj_id; + } + /* kptr_get is only true for kfunc */ if (i == 0 && kptr_get) { struct bpf_map_value_off_desc *off_desc; @@ -6327,16 +6432,6 @@ static int btf_check_func_arg_match(struct bpf_verifier_env *env, if (reg->type == PTR_TO_BTF_ID) { reg_btf = reg->btf; reg_ref_id = reg->btf_id; - /* Ensure only one argument is referenced PTR_TO_BTF_ID */ - if (reg->ref_obj_id) { - if (ref_obj_id) { - bpf_log(log, "verifier internal error: more than one arg with ref_obj_id R%d %u %u\n", - regno, reg->ref_obj_id, ref_obj_id); - return -EFAULT; - } - ref_regno = regno; - ref_obj_id = reg->ref_obj_id; - } } else { reg_btf = btf_vmlinux; reg_ref_id = *reg2btf_ids[base_type(reg->type)]; @@ -6348,7 +6443,7 @@ static int btf_check_func_arg_match(struct bpf_verifier_env *env, reg_ref_t->name_off); if (!btf_struct_ids_match(log, reg_btf, reg_ref_id, reg->off, btf, ref_id, - trusted_arg || (rel && reg->ref_obj_id))) { + trusted_args || (rel && reg->ref_obj_id))) { bpf_log(log, "kernel function %s args#%d expected pointer to %s %s but R%d has a pointer to %s %s\n", func_name, i, btf_type_str(ref_t), ref_tname, @@ -6356,21 +6451,26 @@ static int btf_check_func_arg_match(struct bpf_verifier_env *env, reg_ref_tname); return -EINVAL; } - } else if (ptr_to_mem_ok) { + } else if (ptr_to_mem_ok && processing_call) { const struct btf_type *resolve_ret; u32 type_size; if (is_kfunc) { bool arg_mem_size = i + 1 < nargs && is_kfunc_arg_mem_size(btf, &args[i + 1], ®s[regno + 1]); + bool arg_dynptr = btf_type_is_struct(ref_t) && + !strcmp(ref_tname, + stringify_struct(bpf_dynptr_kern)); /* Permit pointer to mem, but only when argument * type is pointer to scalar, or struct composed * (recursively) of scalars. * When arg_mem_size is true, the pointer can be * void *. + * Also permit initialized local dynamic pointers. */ if (!btf_type_is_scalar(ref_t) && !__btf_type_is_scalar_struct(log, btf, ref_t, 0) && + !arg_dynptr && (arg_mem_size ? !btf_type_is_void(ref_t) : 1)) { bpf_log(log, "arg#%d pointer type %s %s must point to %sscalar, or struct with scalar\n", @@ -6378,6 +6478,34 @@ static int btf_check_func_arg_match(struct bpf_verifier_env *env, return -EINVAL; } + if (arg_dynptr) { + if (reg->type != PTR_TO_STACK) { + bpf_log(log, "arg#%d pointer type %s %s not to stack\n", + i, btf_type_str(ref_t), + ref_tname); + return -EINVAL; + } + + if (!is_dynptr_reg_valid_init(env, reg)) { + bpf_log(log, + "arg#%d pointer type %s %s must be valid and initialized\n", + i, btf_type_str(ref_t), + ref_tname); + return -EINVAL; + } + + if (!is_dynptr_type_expected(env, reg, + ARG_PTR_TO_DYNPTR | DYNPTR_TYPE_LOCAL)) { + bpf_log(log, + "arg#%d pointer type %s %s points to unsupported dynamic pointer type\n", + i, btf_type_str(ref_t), + ref_tname); + return -EINVAL; + } + + continue; + } + /* Check for mem, len pair */ if (arg_mem_size) { if (check_kfunc_mem_size_reg(env, ®s[regno + 1], regno + 1)) { @@ -6427,11 +6555,14 @@ static int btf_check_func_arg_match(struct bpf_verifier_env *env, return -EINVAL; } + if (kfunc_meta && ref_obj_id) + kfunc_meta->ref_obj_id = ref_obj_id; + /* returns argument register number > 0 in case of reference release kfunc */ return rel ? ref_regno : 0; } -/* Compare BTF of a function with given bpf_reg_state. +/* Compare BTF of a function declaration with given bpf_reg_state. * Returns: * EFAULT - there is a verifier bug. Abort verification. * EINVAL - there is a type mismatch or BTF is not available. @@ -6458,7 +6589,50 @@ int btf_check_subprog_arg_match(struct bpf_verifier_env *env, int subprog, return -EINVAL; is_global = prog->aux->func_info_aux[subprog].linkage == BTF_FUNC_GLOBAL; - err = btf_check_func_arg_match(env, btf, btf_id, regs, is_global, 0); + err = btf_check_func_arg_match(env, btf, btf_id, regs, is_global, NULL, false); + + /* Compiler optimizations can remove arguments from static functions + * or mismatched type can be passed into a global function. + * In such cases mark the function as unreliable from BTF point of view. + */ + if (err) + prog->aux->func_info_aux[subprog].unreliable = true; + return err; +} + +/* Compare BTF of a function call with given bpf_reg_state. + * Returns: + * EFAULT - there is a verifier bug. Abort verification. + * EINVAL - there is a type mismatch or BTF is not available. + * 0 - BTF matches with what bpf_reg_state expects. + * Only PTR_TO_CTX and SCALAR_VALUE states are recognized. + * + * NOTE: the code is duplicated from btf_check_subprog_arg_match() + * because btf_check_func_arg_match() is still doing both. Once that + * function is split in 2, we can call from here btf_check_subprog_arg_match() + * first, and then treat the calling part in a new code path. + */ +int btf_check_subprog_call(struct bpf_verifier_env *env, int subprog, + struct bpf_reg_state *regs) +{ + struct bpf_prog *prog = env->prog; + struct btf *btf = prog->aux->btf; + bool is_global; + u32 btf_id; + int err; + + if (!prog->aux->func_info) + return -EINVAL; + + btf_id = prog->aux->func_info[subprog].type_id; + if (!btf_id) + return -EFAULT; + + if (prog->aux->func_info_aux[subprog].unreliable) + return -EINVAL; + + is_global = prog->aux->func_info_aux[subprog].linkage == BTF_FUNC_GLOBAL; + err = btf_check_func_arg_match(env, btf, btf_id, regs, is_global, NULL, true); /* Compiler optimizations can remove arguments from static functions * or mismatched type can be passed into a global function. @@ -6472,9 +6646,9 @@ int btf_check_subprog_arg_match(struct bpf_verifier_env *env, int subprog, int btf_check_kfunc_arg_match(struct bpf_verifier_env *env, const struct btf *btf, u32 func_id, struct bpf_reg_state *regs, - u32 kfunc_flags) + struct bpf_kfunc_arg_meta *meta) { - return btf_check_func_arg_match(env, btf, func_id, regs, true, kfunc_flags); + return btf_check_func_arg_match(env, btf, func_id, regs, true, meta, true); } /* Convert BTF of a function into bpf_reg_state if possible @@ -6588,7 +6762,7 @@ int btf_prepare_func_args(struct bpf_verifier_env *env, int subprog, continue; } bpf_log(log, "Arg#%d type %s in %s() is not supported yet.\n", - i, btf_kind_str[BTF_INFO_KIND(t->info)], tname); + i, btf_type_str(t), tname); return -EINVAL; } return 0; @@ -7243,6 +7417,7 @@ static int bpf_prog_type_to_kfunc_hook(enum bpf_prog_type prog_type) case BPF_PROG_TYPE_STRUCT_OPS: return BTF_KFUNC_HOOK_STRUCT_OPS; case BPF_PROG_TYPE_TRACING: + case BPF_PROG_TYPE_LSM: return BTF_KFUNC_HOOK_TRACING; case BPF_PROG_TYPE_SYSCALL: return BTF_KFUNC_HOOK_SYSCALL; diff --git a/kernel/bpf/core.c b/kernel/bpf/core.c index d1be78c28619..711fd293b6de 100644 --- a/kernel/bpf/core.c +++ b/kernel/bpf/core.c @@ -825,6 +825,11 @@ struct bpf_prog_pack { unsigned long bitmap[]; }; +void bpf_jit_fill_hole_with_zero(void *area, unsigned int size) +{ + memset(area, 0, size); +} + #define BPF_PROG_SIZE_TO_NBITS(size) (round_up(size, BPF_PROG_CHUNK_SIZE) / BPF_PROG_CHUNK_SIZE) static DEFINE_MUTEX(pack_mutex); @@ -864,7 +869,7 @@ static struct bpf_prog_pack *alloc_new_pack(bpf_jit_fill_hole_t bpf_fill_ill_ins return pack; } -static void *bpf_prog_pack_alloc(u32 size, bpf_jit_fill_hole_t bpf_fill_ill_insns) +void *bpf_prog_pack_alloc(u32 size, bpf_jit_fill_hole_t bpf_fill_ill_insns) { unsigned int nbits = BPF_PROG_SIZE_TO_NBITS(size); struct bpf_prog_pack *pack; @@ -905,7 +910,7 @@ out: return ptr; } -static void bpf_prog_pack_free(struct bpf_binary_header *hdr) +void bpf_prog_pack_free(struct bpf_binary_header *hdr) { struct bpf_prog_pack *pack = NULL, *tmp; unsigned int nbits; diff --git a/kernel/bpf/dispatcher.c b/kernel/bpf/dispatcher.c index 2444bd15cc2d..fa64b80b8bca 100644 --- a/kernel/bpf/dispatcher.c +++ b/kernel/bpf/dispatcher.c @@ -85,12 +85,12 @@ static bool bpf_dispatcher_remove_prog(struct bpf_dispatcher *d, return false; } -int __weak arch_prepare_bpf_dispatcher(void *image, s64 *funcs, int num_funcs) +int __weak arch_prepare_bpf_dispatcher(void *image, void *buf, s64 *funcs, int num_funcs) { return -ENOTSUPP; } -static int bpf_dispatcher_prepare(struct bpf_dispatcher *d, void *image) +static int bpf_dispatcher_prepare(struct bpf_dispatcher *d, void *image, void *buf) { s64 ips[BPF_DISPATCHER_MAX] = {}, *ipsp = &ips[0]; int i; @@ -99,12 +99,12 @@ static int bpf_dispatcher_prepare(struct bpf_dispatcher *d, void *image) if (d->progs[i].prog) *ipsp++ = (s64)(uintptr_t)d->progs[i].prog->bpf_func; } - return arch_prepare_bpf_dispatcher(image, &ips[0], d->num_progs); + return arch_prepare_bpf_dispatcher(image, buf, &ips[0], d->num_progs); } static void bpf_dispatcher_update(struct bpf_dispatcher *d, int prev_num_progs) { - void *old, *new; + void *old, *new, *tmp; u32 noff; int err; @@ -117,8 +117,14 @@ static void bpf_dispatcher_update(struct bpf_dispatcher *d, int prev_num_progs) } new = d->num_progs ? d->image + noff : NULL; + tmp = d->num_progs ? d->rw_image + noff : NULL; if (new) { - if (bpf_dispatcher_prepare(d, new)) + /* Prepare the dispatcher in d->rw_image. Then use + * bpf_arch_text_copy to update d->image, which is RO+X. + */ + if (bpf_dispatcher_prepare(d, new, tmp)) + return; + if (IS_ERR(bpf_arch_text_copy(new, tmp, PAGE_SIZE / 2))) return; } @@ -140,9 +146,18 @@ void bpf_dispatcher_change_prog(struct bpf_dispatcher *d, struct bpf_prog *from, mutex_lock(&d->mutex); if (!d->image) { - d->image = bpf_jit_alloc_exec_page(); + d->image = bpf_prog_pack_alloc(PAGE_SIZE, bpf_jit_fill_hole_with_zero); if (!d->image) goto out; + d->rw_image = bpf_jit_alloc_exec(PAGE_SIZE); + if (!d->rw_image) { + u32 size = PAGE_SIZE; + + bpf_arch_text_copy(d->image, &size, sizeof(size)); + bpf_prog_pack_free((struct bpf_binary_header *)d->image); + d->image = NULL; + goto out; + } bpf_image_ksym_add(d->image, &d->ksym); } diff --git a/kernel/bpf/hashtab.c b/kernel/bpf/hashtab.c index 0fe3f136cbbe..ed3f8a53603b 100644 --- a/kernel/bpf/hashtab.c +++ b/kernel/bpf/hashtab.c @@ -68,24 +68,16 @@ * In theory the BPF locks could be converted to regular spinlocks as well, * but the bucket locks and percpu_freelist locks can be taken from * arbitrary contexts (perf, kprobes, tracepoints) which are required to be - * atomic contexts even on RT. These mechanisms require preallocated maps, - * so there is no need to invoke memory allocations within the lock held - * sections. - * - * BPF maps which need dynamic allocation are only used from (forced) - * thread context on RT and can therefore use regular spinlocks which in - * turn allows to invoke memory allocations from the lock held section. - * - * On a non RT kernel this distinction is neither possible nor required. - * spinlock maps to raw_spinlock and the extra code is optimized out by the - * compiler. + * atomic contexts even on RT. Before the introduction of bpf_mem_alloc, + * it is only safe to use raw spinlock for preallocated hash map on a RT kernel, + * because there is no memory allocation within the lock held sections. However + * after hash map was fully converted to use bpf_mem_alloc, there will be + * non-synchronous memory allocation for non-preallocated hash map, so it is + * safe to always use raw spinlock for bucket lock. */ struct bucket { struct hlist_nulls_head head; - union { - raw_spinlock_t raw_lock; - spinlock_t lock; - }; + raw_spinlock_t raw_lock; }; #define HASHTAB_MAP_LOCK_COUNT 8 @@ -141,26 +133,15 @@ static inline bool htab_is_prealloc(const struct bpf_htab *htab) return !(htab->map.map_flags & BPF_F_NO_PREALLOC); } -static inline bool htab_use_raw_lock(const struct bpf_htab *htab) -{ - return (!IS_ENABLED(CONFIG_PREEMPT_RT) || htab_is_prealloc(htab)); -} - static void htab_init_buckets(struct bpf_htab *htab) { unsigned int i; for (i = 0; i < htab->n_buckets; i++) { INIT_HLIST_NULLS_HEAD(&htab->buckets[i].head, i); - if (htab_use_raw_lock(htab)) { - raw_spin_lock_init(&htab->buckets[i].raw_lock); - lockdep_set_class(&htab->buckets[i].raw_lock, - &htab->lockdep_key); - } else { - spin_lock_init(&htab->buckets[i].lock); - lockdep_set_class(&htab->buckets[i].lock, + raw_spin_lock_init(&htab->buckets[i].raw_lock); + lockdep_set_class(&htab->buckets[i].raw_lock, &htab->lockdep_key); - } cond_resched(); } } @@ -170,28 +151,17 @@ static inline int htab_lock_bucket(const struct bpf_htab *htab, unsigned long *pflags) { unsigned long flags; - bool use_raw_lock; hash = hash & HASHTAB_MAP_LOCK_MASK; - use_raw_lock = htab_use_raw_lock(htab); - if (use_raw_lock) - preempt_disable(); - else - migrate_disable(); + preempt_disable(); if (unlikely(__this_cpu_inc_return(*(htab->map_locked[hash])) != 1)) { __this_cpu_dec(*(htab->map_locked[hash])); - if (use_raw_lock) - preempt_enable(); - else - migrate_enable(); + preempt_enable(); return -EBUSY; } - if (use_raw_lock) - raw_spin_lock_irqsave(&b->raw_lock, flags); - else - spin_lock_irqsave(&b->lock, flags); + raw_spin_lock_irqsave(&b->raw_lock, flags); *pflags = flags; return 0; @@ -201,18 +171,10 @@ static inline void htab_unlock_bucket(const struct bpf_htab *htab, struct bucket *b, u32 hash, unsigned long flags) { - bool use_raw_lock = htab_use_raw_lock(htab); - hash = hash & HASHTAB_MAP_LOCK_MASK; - if (use_raw_lock) - raw_spin_unlock_irqrestore(&b->raw_lock, flags); - else - spin_unlock_irqrestore(&b->lock, flags); + raw_spin_unlock_irqrestore(&b->raw_lock, flags); __this_cpu_dec(*(htab->map_locked[hash])); - if (use_raw_lock) - preempt_enable(); - else - migrate_enable(); + preempt_enable(); } static bool htab_lru_map_delete_node(void *arg, struct bpf_lru_node *node); @@ -622,6 +584,8 @@ static struct bpf_map *htab_map_alloc(union bpf_attr *attr) free_prealloc: prealloc_destroy(htab); free_map_locked: + if (htab->use_percpu_counter) + percpu_counter_destroy(&htab->pcount); for (i = 0; i < HASHTAB_MAP_LOCK_COUNT; i++) free_percpu(htab->map_locked[i]); bpf_map_area_free(htab->buckets); diff --git a/kernel/bpf/helpers.c b/kernel/bpf/helpers.c index fc08035f14ed..b069517a3da0 100644 --- a/kernel/bpf/helpers.c +++ b/kernel/bpf/helpers.c @@ -15,6 +15,7 @@ #include <linux/ctype.h> #include <linux/jiffies.h> #include <linux/pid_namespace.h> +#include <linux/poison.h> #include <linux/proc_ns.h> #include <linux/security.h> #include <linux/btf_ids.h> @@ -1376,10 +1377,9 @@ BPF_CALL_2(bpf_kptr_xchg, void *, map_value, void *, ptr) } /* Unlike other PTR_TO_BTF_ID helpers the btf_id in bpf_kptr_xchg() - * helper is determined dynamically by the verifier. + * helper is determined dynamically by the verifier. Use BPF_PTR_POISON to + * denote type that verifier will determine. */ -#define BPF_PTR_POISON ((void *)((0xeB9FUL << 2) + POISON_POINTER_DELTA)) - static const struct bpf_func_proto bpf_kptr_xchg_proto = { .func = bpf_kptr_xchg, .gpl_only = false, @@ -1408,7 +1408,7 @@ static void bpf_dynptr_set_type(struct bpf_dynptr_kern *ptr, enum bpf_dynptr_typ ptr->size |= type << DYNPTR_TYPE_SHIFT; } -static u32 bpf_dynptr_get_size(struct bpf_dynptr_kern *ptr) +u32 bpf_dynptr_get_size(struct bpf_dynptr_kern *ptr) { return ptr->size & DYNPTR_SIZE_MASK; } @@ -1446,6 +1446,8 @@ BPF_CALL_4(bpf_dynptr_from_mem, void *, data, u32, size, u64, flags, struct bpf_ { int err; + BTF_TYPE_EMIT(struct bpf_dynptr); + err = bpf_dynptr_check_size(size); if (err) goto error; @@ -1659,6 +1661,8 @@ bpf_base_func_proto(enum bpf_func_id func_id) return &bpf_for_each_map_elem_proto; case BPF_FUNC_loop: return &bpf_loop_proto; + case BPF_FUNC_user_ringbuf_drain: + return &bpf_user_ringbuf_drain_proto; default: break; } diff --git a/kernel/bpf/memalloc.c b/kernel/bpf/memalloc.c index 5cc952da7d41..5f83be1d2018 100644 --- a/kernel/bpf/memalloc.c +++ b/kernel/bpf/memalloc.c @@ -277,7 +277,8 @@ static void free_bulk(struct bpf_mem_cache *c) local_dec(&c->active); if (IS_ENABLED(CONFIG_PREEMPT_RT)) local_irq_restore(flags); - enque_to_free(c, llnode); + if (llnode) + enque_to_free(c, llnode); } while (cnt > (c->high_watermark + c->low_watermark) / 2); /* and drain free_llist_extra */ @@ -610,7 +611,7 @@ void notrace bpf_mem_free(struct bpf_mem_alloc *ma, void *ptr) if (!ptr) return; - idx = bpf_mem_cache_idx(__ksize(ptr - LLIST_NODE_SZ)); + idx = bpf_mem_cache_idx(ksize(ptr - LLIST_NODE_SZ)); if (idx < 0) return; diff --git a/kernel/bpf/percpu_freelist.c b/kernel/bpf/percpu_freelist.c index 00b874c8e889..b6e7f5c5b9ab 100644 --- a/kernel/bpf/percpu_freelist.c +++ b/kernel/bpf/percpu_freelist.c @@ -58,23 +58,21 @@ static inline void ___pcpu_freelist_push_nmi(struct pcpu_freelist *s, { int cpu, orig_cpu; - orig_cpu = cpu = raw_smp_processor_id(); + orig_cpu = raw_smp_processor_id(); while (1) { - struct pcpu_freelist_head *head; + for_each_cpu_wrap(cpu, cpu_possible_mask, orig_cpu) { + struct pcpu_freelist_head *head; - head = per_cpu_ptr(s->freelist, cpu); - if (raw_spin_trylock(&head->lock)) { - pcpu_freelist_push_node(head, node); - raw_spin_unlock(&head->lock); - return; + head = per_cpu_ptr(s->freelist, cpu); + if (raw_spin_trylock(&head->lock)) { + pcpu_freelist_push_node(head, node); + raw_spin_unlock(&head->lock); + return; + } } - cpu = cpumask_next(cpu, cpu_possible_mask); - if (cpu >= nr_cpu_ids) - cpu = 0; /* cannot lock any per cpu lock, try extralist */ - if (cpu == orig_cpu && - pcpu_freelist_try_push_extra(s, node)) + if (pcpu_freelist_try_push_extra(s, node)) return; } } @@ -125,13 +123,12 @@ static struct pcpu_freelist_node *___pcpu_freelist_pop(struct pcpu_freelist *s) { struct pcpu_freelist_head *head; struct pcpu_freelist_node *node; - int orig_cpu, cpu; + int cpu; - orig_cpu = cpu = raw_smp_processor_id(); - while (1) { + for_each_cpu_wrap(cpu, cpu_possible_mask, raw_smp_processor_id()) { head = per_cpu_ptr(s->freelist, cpu); if (!READ_ONCE(head->first)) - goto next_cpu; + continue; raw_spin_lock(&head->lock); node = head->first; if (node) { @@ -140,12 +137,6 @@ static struct pcpu_freelist_node *___pcpu_freelist_pop(struct pcpu_freelist *s) return node; } raw_spin_unlock(&head->lock); -next_cpu: - cpu = cpumask_next(cpu, cpu_possible_mask); - if (cpu >= nr_cpu_ids) - cpu = 0; - if (cpu == orig_cpu) - break; } /* per cpu lists are all empty, try extralist */ @@ -164,13 +155,12 @@ ___pcpu_freelist_pop_nmi(struct pcpu_freelist *s) { struct pcpu_freelist_head *head; struct pcpu_freelist_node *node; - int orig_cpu, cpu; + int cpu; - orig_cpu = cpu = raw_smp_processor_id(); - while (1) { + for_each_cpu_wrap(cpu, cpu_possible_mask, raw_smp_processor_id()) { head = per_cpu_ptr(s->freelist, cpu); if (!READ_ONCE(head->first)) - goto next_cpu; + continue; if (raw_spin_trylock(&head->lock)) { node = head->first; if (node) { @@ -180,12 +170,6 @@ ___pcpu_freelist_pop_nmi(struct pcpu_freelist *s) } raw_spin_unlock(&head->lock); } -next_cpu: - cpu = cpumask_next(cpu, cpu_possible_mask); - if (cpu >= nr_cpu_ids) - cpu = 0; - if (cpu == orig_cpu) - break; } /* cannot pop from per cpu lists, try extralist */ diff --git a/kernel/bpf/ringbuf.c b/kernel/bpf/ringbuf.c index b483aea35f41..9e832acf4692 100644 --- a/kernel/bpf/ringbuf.c +++ b/kernel/bpf/ringbuf.c @@ -38,10 +38,43 @@ struct bpf_ringbuf { struct page **pages; int nr_pages; spinlock_t spinlock ____cacheline_aligned_in_smp; - /* Consumer and producer counters are put into separate pages to allow - * mapping consumer page as r/w, but restrict producer page to r/o. - * This protects producer position from being modified by user-space - * application and ruining in-kernel position tracking. + /* For user-space producer ring buffers, an atomic_t busy bit is used + * to synchronize access to the ring buffers in the kernel, rather than + * the spinlock that is used for kernel-producer ring buffers. This is + * done because the ring buffer must hold a lock across a BPF program's + * callback: + * + * __bpf_user_ringbuf_peek() // lock acquired + * -> program callback_fn() + * -> __bpf_user_ringbuf_sample_release() // lock released + * + * It is unsafe and incorrect to hold an IRQ spinlock across what could + * be a long execution window, so we instead simply disallow concurrent + * access to the ring buffer by kernel consumers, and return -EBUSY from + * __bpf_user_ringbuf_peek() if the busy bit is held by another task. + */ + atomic_t busy ____cacheline_aligned_in_smp; + /* Consumer and producer counters are put into separate pages to + * allow each position to be mapped with different permissions. + * This prevents a user-space application from modifying the + * position and ruining in-kernel tracking. The permissions of the + * pages depend on who is producing samples: user-space or the + * kernel. + * + * Kernel-producer + * --------------- + * The producer position and data pages are mapped as r/o in + * userspace. For this approach, bits in the header of samples are + * used to signal to user-space, and to other producers, whether a + * sample is currently being written. + * + * User-space producer + * ------------------- + * Only the page containing the consumer position is mapped r/o in + * user-space. User-space producers also use bits of the header to + * communicate to the kernel, but the kernel must carefully check and + * validate each sample to ensure that they're correctly formatted, and + * fully contained within the ring buffer. */ unsigned long consumer_pos __aligned(PAGE_SIZE); unsigned long producer_pos __aligned(PAGE_SIZE); @@ -136,6 +169,7 @@ static struct bpf_ringbuf *bpf_ringbuf_alloc(size_t data_sz, int numa_node) return NULL; spin_lock_init(&rb->spinlock); + atomic_set(&rb->busy, 0); init_waitqueue_head(&rb->waitq); init_irq_work(&rb->work, bpf_ringbuf_notify); @@ -224,7 +258,7 @@ static int ringbuf_map_get_next_key(struct bpf_map *map, void *key, return -ENOTSUPP; } -static int ringbuf_map_mmap(struct bpf_map *map, struct vm_area_struct *vma) +static int ringbuf_map_mmap_kern(struct bpf_map *map, struct vm_area_struct *vma) { struct bpf_ringbuf_map *rb_map; @@ -242,6 +276,26 @@ static int ringbuf_map_mmap(struct bpf_map *map, struct vm_area_struct *vma) vma->vm_pgoff + RINGBUF_PGOFF); } +static int ringbuf_map_mmap_user(struct bpf_map *map, struct vm_area_struct *vma) +{ + struct bpf_ringbuf_map *rb_map; + + rb_map = container_of(map, struct bpf_ringbuf_map, map); + + if (vma->vm_flags & VM_WRITE) { + if (vma->vm_pgoff == 0) + /* Disallow writable mappings to the consumer pointer, + * and allow writable mappings to both the producer + * position, and the ring buffer data itself. + */ + return -EPERM; + } else { + vma->vm_flags &= ~VM_MAYWRITE; + } + /* remap_vmalloc_range() checks size and offset constraints */ + return remap_vmalloc_range(vma, rb_map->rb, vma->vm_pgoff + RINGBUF_PGOFF); +} + static unsigned long ringbuf_avail_data_sz(struct bpf_ringbuf *rb) { unsigned long cons_pos, prod_pos; @@ -251,8 +305,13 @@ static unsigned long ringbuf_avail_data_sz(struct bpf_ringbuf *rb) return prod_pos - cons_pos; } -static __poll_t ringbuf_map_poll(struct bpf_map *map, struct file *filp, - struct poll_table_struct *pts) +static u32 ringbuf_total_data_sz(const struct bpf_ringbuf *rb) +{ + return rb->mask + 1; +} + +static __poll_t ringbuf_map_poll_kern(struct bpf_map *map, struct file *filp, + struct poll_table_struct *pts) { struct bpf_ringbuf_map *rb_map; @@ -264,13 +323,26 @@ static __poll_t ringbuf_map_poll(struct bpf_map *map, struct file *filp, return 0; } +static __poll_t ringbuf_map_poll_user(struct bpf_map *map, struct file *filp, + struct poll_table_struct *pts) +{ + struct bpf_ringbuf_map *rb_map; + + rb_map = container_of(map, struct bpf_ringbuf_map, map); + poll_wait(filp, &rb_map->rb->waitq, pts); + + if (ringbuf_avail_data_sz(rb_map->rb) < ringbuf_total_data_sz(rb_map->rb)) + return EPOLLOUT | EPOLLWRNORM; + return 0; +} + BTF_ID_LIST_SINGLE(ringbuf_map_btf_ids, struct, bpf_ringbuf_map) const struct bpf_map_ops ringbuf_map_ops = { .map_meta_equal = bpf_map_meta_equal, .map_alloc = ringbuf_map_alloc, .map_free = ringbuf_map_free, - .map_mmap = ringbuf_map_mmap, - .map_poll = ringbuf_map_poll, + .map_mmap = ringbuf_map_mmap_kern, + .map_poll = ringbuf_map_poll_kern, .map_lookup_elem = ringbuf_map_lookup_elem, .map_update_elem = ringbuf_map_update_elem, .map_delete_elem = ringbuf_map_delete_elem, @@ -278,6 +350,20 @@ const struct bpf_map_ops ringbuf_map_ops = { .map_btf_id = &ringbuf_map_btf_ids[0], }; +BTF_ID_LIST_SINGLE(user_ringbuf_map_btf_ids, struct, bpf_ringbuf_map) +const struct bpf_map_ops user_ringbuf_map_ops = { + .map_meta_equal = bpf_map_meta_equal, + .map_alloc = ringbuf_map_alloc, + .map_free = ringbuf_map_free, + .map_mmap = ringbuf_map_mmap_user, + .map_poll = ringbuf_map_poll_user, + .map_lookup_elem = ringbuf_map_lookup_elem, + .map_update_elem = ringbuf_map_update_elem, + .map_delete_elem = ringbuf_map_delete_elem, + .map_get_next_key = ringbuf_map_get_next_key, + .map_btf_id = &user_ringbuf_map_btf_ids[0], +}; + /* Given pointer to ring buffer record metadata and struct bpf_ringbuf itself, * calculate offset from record metadata to ring buffer in pages, rounded * down. This page offset is stored as part of record metadata and allows to @@ -312,7 +398,7 @@ static void *__bpf_ringbuf_reserve(struct bpf_ringbuf *rb, u64 size) return NULL; len = round_up(size + BPF_RINGBUF_HDR_SZ, 8); - if (len > rb->mask + 1) + if (len > ringbuf_total_data_sz(rb)) return NULL; cons_pos = smp_load_acquire(&rb->consumer_pos); @@ -459,7 +545,7 @@ BPF_CALL_2(bpf_ringbuf_query, struct bpf_map *, map, u64, flags) case BPF_RB_AVAIL_DATA: return ringbuf_avail_data_sz(rb); case BPF_RB_RING_SIZE: - return rb->mask + 1; + return ringbuf_total_data_sz(rb); case BPF_RB_CONS_POS: return smp_load_acquire(&rb->consumer_pos); case BPF_RB_PROD_POS: @@ -553,3 +639,138 @@ const struct bpf_func_proto bpf_ringbuf_discard_dynptr_proto = { .arg1_type = ARG_PTR_TO_DYNPTR | DYNPTR_TYPE_RINGBUF | OBJ_RELEASE, .arg2_type = ARG_ANYTHING, }; + +static int __bpf_user_ringbuf_peek(struct bpf_ringbuf *rb, void **sample, u32 *size) +{ + int err; + u32 hdr_len, sample_len, total_len, flags, *hdr; + u64 cons_pos, prod_pos; + + /* Synchronizes with smp_store_release() in user-space producer. */ + prod_pos = smp_load_acquire(&rb->producer_pos); + if (prod_pos % 8) + return -EINVAL; + + /* Synchronizes with smp_store_release() in __bpf_user_ringbuf_sample_release() */ + cons_pos = smp_load_acquire(&rb->consumer_pos); + if (cons_pos >= prod_pos) + return -ENODATA; + + hdr = (u32 *)((uintptr_t)rb->data + (uintptr_t)(cons_pos & rb->mask)); + /* Synchronizes with smp_store_release() in user-space producer. */ + hdr_len = smp_load_acquire(hdr); + flags = hdr_len & (BPF_RINGBUF_BUSY_BIT | BPF_RINGBUF_DISCARD_BIT); + sample_len = hdr_len & ~flags; + total_len = round_up(sample_len + BPF_RINGBUF_HDR_SZ, 8); + + /* The sample must fit within the region advertised by the producer position. */ + if (total_len > prod_pos - cons_pos) + return -EINVAL; + + /* The sample must fit within the data region of the ring buffer. */ + if (total_len > ringbuf_total_data_sz(rb)) + return -E2BIG; + + /* The sample must fit into a struct bpf_dynptr. */ + err = bpf_dynptr_check_size(sample_len); + if (err) + return -E2BIG; + + if (flags & BPF_RINGBUF_DISCARD_BIT) { + /* If the discard bit is set, the sample should be skipped. + * + * Update the consumer pos, and return -EAGAIN so the caller + * knows to skip this sample and try to read the next one. + */ + smp_store_release(&rb->consumer_pos, cons_pos + total_len); + return -EAGAIN; + } + + if (flags & BPF_RINGBUF_BUSY_BIT) + return -ENODATA; + + *sample = (void *)((uintptr_t)rb->data + + (uintptr_t)((cons_pos + BPF_RINGBUF_HDR_SZ) & rb->mask)); + *size = sample_len; + return 0; +} + +static void __bpf_user_ringbuf_sample_release(struct bpf_ringbuf *rb, size_t size, u64 flags) +{ + u64 consumer_pos; + u32 rounded_size = round_up(size + BPF_RINGBUF_HDR_SZ, 8); + + /* Using smp_load_acquire() is unnecessary here, as the busy-bit + * prevents another task from writing to consumer_pos after it was read + * by this task with smp_load_acquire() in __bpf_user_ringbuf_peek(). + */ + consumer_pos = rb->consumer_pos; + /* Synchronizes with smp_load_acquire() in user-space producer. */ + smp_store_release(&rb->consumer_pos, consumer_pos + rounded_size); +} + +BPF_CALL_4(bpf_user_ringbuf_drain, struct bpf_map *, map, + void *, callback_fn, void *, callback_ctx, u64, flags) +{ + struct bpf_ringbuf *rb; + long samples, discarded_samples = 0, ret = 0; + bpf_callback_t callback = (bpf_callback_t)callback_fn; + u64 wakeup_flags = BPF_RB_NO_WAKEUP | BPF_RB_FORCE_WAKEUP; + int busy = 0; + + if (unlikely(flags & ~wakeup_flags)) + return -EINVAL; + + rb = container_of(map, struct bpf_ringbuf_map, map)->rb; + + /* If another consumer is already consuming a sample, wait for them to finish. */ + if (!atomic_try_cmpxchg(&rb->busy, &busy, 1)) + return -EBUSY; + + for (samples = 0; samples < BPF_MAX_USER_RINGBUF_SAMPLES && ret == 0; samples++) { + int err; + u32 size; + void *sample; + struct bpf_dynptr_kern dynptr; + + err = __bpf_user_ringbuf_peek(rb, &sample, &size); + if (err) { + if (err == -ENODATA) { + break; + } else if (err == -EAGAIN) { + discarded_samples++; + continue; + } else { + ret = err; + goto schedule_work_return; + } + } + + bpf_dynptr_init(&dynptr, sample, BPF_DYNPTR_TYPE_LOCAL, 0, size); + ret = callback((uintptr_t)&dynptr, (uintptr_t)callback_ctx, 0, 0, 0); + __bpf_user_ringbuf_sample_release(rb, size, flags); + } + ret = samples - discarded_samples; + +schedule_work_return: + /* Prevent the clearing of the busy-bit from being reordered before the + * storing of any rb consumer or producer positions. + */ + smp_mb__before_atomic(); + atomic_set(&rb->busy, 0); + + if (flags & BPF_RB_FORCE_WAKEUP) + irq_work_queue(&rb->work); + else if (!(flags & BPF_RB_NO_WAKEUP) && samples > 0) + irq_work_queue(&rb->work); + return ret; +} + +const struct bpf_func_proto bpf_user_ringbuf_drain_proto = { + .func = bpf_user_ringbuf_drain, + .ret_type = RET_INTEGER, + .arg1_type = ARG_CONST_MAP_PTR, + .arg2_type = ARG_PTR_TO_FUNC, + .arg3_type = ARG_PTR_TO_STACK_OR_NULL, + .arg4_type = ARG_ANYTHING, +}; diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c index 4fb08c43420d..372fad5ef3d3 100644 --- a/kernel/bpf/syscall.c +++ b/kernel/bpf/syscall.c @@ -598,7 +598,7 @@ void bpf_map_free_kptrs(struct bpf_map *map, void *map_value) if (off_desc->type == BPF_KPTR_UNREF) { u64 *p = (u64 *)btf_id_ptr; - WRITE_ONCE(p, 0); + WRITE_ONCE(*p, 0); continue; } old_ptr = xchg(btf_id_ptr, 0); @@ -1049,7 +1049,8 @@ static int map_check_btf(struct bpf_map *map, const struct btf *btf, } if (map->map_type != BPF_MAP_TYPE_HASH && map->map_type != BPF_MAP_TYPE_LRU_HASH && - map->map_type != BPF_MAP_TYPE_ARRAY) { + map->map_type != BPF_MAP_TYPE_ARRAY && + map->map_type != BPF_MAP_TYPE_PERCPU_ARRAY) { ret = -EOPNOTSUPP; goto free_map_tab; } @@ -1416,19 +1417,14 @@ static int map_update_elem(union bpf_attr *attr, bpfptr_t uattr) } value_size = bpf_map_value_size(map); - - err = -ENOMEM; - value = kvmalloc(value_size, GFP_USER | __GFP_NOWARN); - if (!value) + value = kvmemdup_bpfptr(uvalue, value_size); + if (IS_ERR(value)) { + err = PTR_ERR(value); goto free_key; - - err = -EFAULT; - if (copy_from_bpfptr(value, uvalue, value_size) != 0) - goto free_value; + } err = bpf_map_update_value(map, f, key, value, attr->flags); -free_value: kvfree(value); free_key: kvfree(key); @@ -2097,6 +2093,17 @@ struct bpf_prog_kstats { u64 misses; }; +void notrace bpf_prog_inc_misses_counter(struct bpf_prog *prog) +{ + struct bpf_prog_stats *stats; + unsigned int flags; + + stats = this_cpu_ptr(prog->stats); + flags = u64_stats_update_begin_irqsave(&stats->syncp); + u64_stats_inc(&stats->misses); + u64_stats_update_end_irqrestore(&stats->syncp, flags); +} + static void bpf_prog_get_stats(const struct bpf_prog *prog, struct bpf_prog_kstats *stats) { diff --git a/kernel/bpf/task_iter.c b/kernel/bpf/task_iter.c index 8c921799def4..67e03e1833ba 100644 --- a/kernel/bpf/task_iter.c +++ b/kernel/bpf/task_iter.c @@ -10,8 +10,17 @@ #include <linux/btf_ids.h> #include "mmap_unlock_work.h" +static const char * const iter_task_type_names[] = { + "ALL", + "TID", + "PID", +}; + struct bpf_iter_seq_task_common { struct pid_namespace *ns; + enum bpf_iter_task_type type; + u32 pid; + u32 pid_visiting; }; struct bpf_iter_seq_task_info { @@ -22,18 +31,115 @@ struct bpf_iter_seq_task_info { u32 tid; }; -static struct task_struct *task_seq_get_next(struct pid_namespace *ns, +static struct task_struct *task_group_seq_get_next(struct bpf_iter_seq_task_common *common, + u32 *tid, + bool skip_if_dup_files) +{ + struct task_struct *task, *next_task; + struct pid *pid; + u32 saved_tid; + + if (!*tid) { + /* The first time, the iterator calls this function. */ + pid = find_pid_ns(common->pid, common->ns); + if (!pid) + return NULL; + + task = get_pid_task(pid, PIDTYPE_TGID); + if (!task) + return NULL; + + *tid = common->pid; + common->pid_visiting = common->pid; + + return task; + } + + /* If the control returns to user space and comes back to the + * kernel again, *tid and common->pid_visiting should be the + * same for task_seq_start() to pick up the correct task. + */ + if (*tid == common->pid_visiting) { + pid = find_pid_ns(common->pid_visiting, common->ns); + task = get_pid_task(pid, PIDTYPE_PID); + + return task; + } + + pid = find_pid_ns(common->pid_visiting, common->ns); + if (!pid) + return NULL; + + task = get_pid_task(pid, PIDTYPE_PID); + if (!task) + return NULL; + +retry: + if (!pid_alive(task)) { + put_task_struct(task); + return NULL; + } + + next_task = next_thread(task); + put_task_struct(task); + if (!next_task) + return NULL; + + saved_tid = *tid; + *tid = __task_pid_nr_ns(next_task, PIDTYPE_PID, common->ns); + if (!*tid || *tid == common->pid) { + /* Run out of tasks of a process. The tasks of a + * thread_group are linked as circular linked list. + */ + *tid = saved_tid; + return NULL; + } + + get_task_struct(next_task); + common->pid_visiting = *tid; + + if (skip_if_dup_files && task->files == task->group_leader->files) { + task = next_task; + goto retry; + } + + return next_task; +} + +static struct task_struct *task_seq_get_next(struct bpf_iter_seq_task_common *common, u32 *tid, bool skip_if_dup_files) { struct task_struct *task = NULL; struct pid *pid; + if (common->type == BPF_TASK_ITER_TID) { + if (*tid && *tid != common->pid) + return NULL; + rcu_read_lock(); + pid = find_pid_ns(common->pid, common->ns); + if (pid) { + task = get_pid_task(pid, PIDTYPE_TGID); + *tid = common->pid; + } + rcu_read_unlock(); + + return task; + } + + if (common->type == BPF_TASK_ITER_TGID) { + rcu_read_lock(); + task = task_group_seq_get_next(common, tid, skip_if_dup_files); + rcu_read_unlock(); + + return task; + } + rcu_read_lock(); retry: - pid = find_ge_pid(*tid, ns); + pid = find_ge_pid(*tid, common->ns); if (pid) { - *tid = pid_nr_ns(pid, ns); + *tid = pid_nr_ns(pid, common->ns); task = get_pid_task(pid, PIDTYPE_PID); if (!task) { ++*tid; @@ -56,7 +162,7 @@ static void *task_seq_start(struct seq_file *seq, loff_t *pos) struct bpf_iter_seq_task_info *info = seq->private; struct task_struct *task; - task = task_seq_get_next(info->common.ns, &info->tid, false); + task = task_seq_get_next(&info->common, &info->tid, false); if (!task) return NULL; @@ -73,7 +179,7 @@ static void *task_seq_next(struct seq_file *seq, void *v, loff_t *pos) ++*pos; ++info->tid; put_task_struct((struct task_struct *)v); - task = task_seq_get_next(info->common.ns, &info->tid, false); + task = task_seq_get_next(&info->common, &info->tid, false); if (!task) return NULL; @@ -117,6 +223,41 @@ static void task_seq_stop(struct seq_file *seq, void *v) put_task_struct((struct task_struct *)v); } +static int bpf_iter_attach_task(struct bpf_prog *prog, + union bpf_iter_link_info *linfo, + struct bpf_iter_aux_info *aux) +{ + unsigned int flags; + struct pid *pid; + pid_t tgid; + + if ((!!linfo->task.tid + !!linfo->task.pid + !!linfo->task.pid_fd) > 1) + return -EINVAL; + + aux->task.type = BPF_TASK_ITER_ALL; + if (linfo->task.tid != 0) { + aux->task.type = BPF_TASK_ITER_TID; + aux->task.pid = linfo->task.tid; + } + if (linfo->task.pid != 0) { + aux->task.type = BPF_TASK_ITER_TGID; + aux->task.pid = linfo->task.pid; + } + if (linfo->task.pid_fd != 0) { + aux->task.type = BPF_TASK_ITER_TGID; + + pid = pidfd_get_pid(linfo->task.pid_fd, &flags); + if (IS_ERR(pid)) + return PTR_ERR(pid); + + tgid = pid_nr_ns(pid, task_active_pid_ns(current)); + aux->task.pid = tgid; + put_pid(pid); + } + + return 0; +} + static const struct seq_operations task_seq_ops = { .start = task_seq_start, .next = task_seq_next, @@ -137,8 +278,7 @@ struct bpf_iter_seq_task_file_info { static struct file * task_file_seq_get_next(struct bpf_iter_seq_task_file_info *info) { - struct pid_namespace *ns = info->common.ns; - u32 curr_tid = info->tid; + u32 saved_tid = info->tid; struct task_struct *curr_task; unsigned int curr_fd = info->fd; @@ -151,21 +291,18 @@ again: curr_task = info->task; curr_fd = info->fd; } else { - curr_task = task_seq_get_next(ns, &curr_tid, true); + curr_task = task_seq_get_next(&info->common, &info->tid, true); if (!curr_task) { info->task = NULL; - info->tid = curr_tid; return NULL; } - /* set info->task and info->tid */ + /* set info->task */ info->task = curr_task; - if (curr_tid == info->tid) { + if (saved_tid == info->tid) curr_fd = info->fd; - } else { - info->tid = curr_tid; + else curr_fd = 0; - } } rcu_read_lock(); @@ -186,9 +323,15 @@ again: /* the current task is done, go to the next task */ rcu_read_unlock(); put_task_struct(curr_task); + + if (info->common.type == BPF_TASK_ITER_TID) { + info->task = NULL; + return NULL; + } + info->task = NULL; info->fd = 0; - curr_tid = ++(info->tid); + saved_tid = ++(info->tid); goto again; } @@ -269,6 +412,9 @@ static int init_seq_pidns(void *priv_data, struct bpf_iter_aux_info *aux) struct bpf_iter_seq_task_common *common = priv_data; common->ns = get_pid_ns(task_active_pid_ns(current)); + common->type = aux->task.type; + common->pid = aux->task.pid; + return 0; } @@ -307,11 +453,10 @@ enum bpf_task_vma_iter_find_op { static struct vm_area_struct * task_vma_seq_get_next(struct bpf_iter_seq_task_vma_info *info) { - struct pid_namespace *ns = info->common.ns; enum bpf_task_vma_iter_find_op op; struct vm_area_struct *curr_vma; struct task_struct *curr_task; - u32 curr_tid = info->tid; + u32 saved_tid = info->tid; /* If this function returns a non-NULL vma, it holds a reference to * the task_struct, and holds read lock on vma->mm->mmap_lock. @@ -371,14 +516,13 @@ task_vma_seq_get_next(struct bpf_iter_seq_task_vma_info *info) } } else { again: - curr_task = task_seq_get_next(ns, &curr_tid, true); + curr_task = task_seq_get_next(&info->common, &info->tid, true); if (!curr_task) { - info->tid = curr_tid + 1; + info->tid++; goto finish; } - if (curr_tid != info->tid) { - info->tid = curr_tid; + if (saved_tid != info->tid) { /* new task, process the first vma */ op = task_vma_iter_first_vma; } else { @@ -430,9 +574,12 @@ again: return curr_vma; next_task: + if (info->common.type == BPF_TASK_ITER_TID) + goto finish; + put_task_struct(curr_task); info->task = NULL; - curr_tid++; + info->tid++; goto again; finish: @@ -531,8 +678,33 @@ static const struct bpf_iter_seq_info task_seq_info = { .seq_priv_size = sizeof(struct bpf_iter_seq_task_info), }; +static int bpf_iter_fill_link_info(const struct bpf_iter_aux_info *aux, struct bpf_link_info *info) +{ + switch (aux->task.type) { + case BPF_TASK_ITER_TID: + info->iter.task.tid = aux->task.pid; + break; + case BPF_TASK_ITER_TGID: + info->iter.task.pid = aux->task.pid; + break; + default: + break; + } + return 0; +} + +static void bpf_iter_task_show_fdinfo(const struct bpf_iter_aux_info *aux, struct seq_file *seq) +{ + seq_printf(seq, "task_type:\t%s\n", iter_task_type_names[aux->task.type]); + if (aux->task.type == BPF_TASK_ITER_TID) + seq_printf(seq, "tid:\t%u\n", aux->task.pid); + else if (aux->task.type == BPF_TASK_ITER_TGID) + seq_printf(seq, "pid:\t%u\n", aux->task.pid); +} + static struct bpf_iter_reg task_reg_info = { .target = "task", + .attach_target = bpf_iter_attach_task, .feature = BPF_ITER_RESCHED, .ctx_arg_info_size = 1, .ctx_arg_info = { @@ -540,6 +712,8 @@ static struct bpf_iter_reg task_reg_info = { PTR_TO_BTF_ID_OR_NULL }, }, .seq_info = &task_seq_info, + .fill_link_info = bpf_iter_fill_link_info, + .show_fdinfo = bpf_iter_task_show_fdinfo, }; static const struct bpf_iter_seq_info task_file_seq_info = { @@ -551,6 +725,7 @@ static const struct bpf_iter_seq_info task_file_seq_info = { static struct bpf_iter_reg task_file_reg_info = { .target = "task_file", + .attach_target = bpf_iter_attach_task, .feature = BPF_ITER_RESCHED, .ctx_arg_info_size = 2, .ctx_arg_info = { @@ -560,6 +735,8 @@ static struct bpf_iter_reg task_file_reg_info = { PTR_TO_BTF_ID_OR_NULL }, }, .seq_info = &task_file_seq_info, + .fill_link_info = bpf_iter_fill_link_info, + .show_fdinfo = bpf_iter_task_show_fdinfo, }; static const struct bpf_iter_seq_info task_vma_seq_info = { @@ -571,6 +748,7 @@ static const struct bpf_iter_seq_info task_vma_seq_info = { static struct bpf_iter_reg task_vma_reg_info = { .target = "task_vma", + .attach_target = bpf_iter_attach_task, .feature = BPF_ITER_RESCHED, .ctx_arg_info_size = 2, .ctx_arg_info = { @@ -580,6 +758,8 @@ static struct bpf_iter_reg task_vma_reg_info = { PTR_TO_BTF_ID_OR_NULL }, }, .seq_info = &task_vma_seq_info, + .fill_link_info = bpf_iter_fill_link_info, + .show_fdinfo = bpf_iter_task_show_fdinfo, }; BPF_CALL_5(bpf_find_vma, struct task_struct *, task, u64, start, diff --git a/kernel/bpf/trampoline.c b/kernel/bpf/trampoline.c index ad76940b02cc..bf0906e1e2b9 100644 --- a/kernel/bpf/trampoline.c +++ b/kernel/bpf/trampoline.c @@ -116,22 +116,6 @@ bool bpf_prog_has_trampoline(const struct bpf_prog *prog) (ptype == BPF_PROG_TYPE_LSM && eatype == BPF_LSM_MAC); } -void *bpf_jit_alloc_exec_page(void) -{ - void *image; - - image = bpf_jit_alloc_exec(PAGE_SIZE); - if (!image) - return NULL; - - set_vm_flush_reset_perms(image); - /* Keep image as writeable. The alternative is to keep flipping ro/rw - * every time new program is attached or detached. - */ - set_memory_x((long)image, 1); - return image; -} - void bpf_image_ksym_add(void *data, struct bpf_ksym *ksym) { ksym->start = (unsigned long) data; @@ -404,9 +388,10 @@ static struct bpf_tramp_image *bpf_tramp_image_alloc(u64 key, u32 idx) goto out_free_im; err = -ENOMEM; - im->image = image = bpf_jit_alloc_exec_page(); + im->image = image = bpf_jit_alloc_exec(PAGE_SIZE); if (!image) goto out_uncharge; + set_vm_flush_reset_perms(image); err = percpu_ref_init(&im->pcref, __bpf_tramp_image_release, 0, GFP_KERNEL); if (err) @@ -483,6 +468,9 @@ again: if (err < 0) goto out; + set_memory_ro((long)im->image, 1); + set_memory_x((long)im->image, 1); + WARN_ON(tr->cur_image && tr->selector == 0); WARN_ON(!tr->cur_image && tr->selector); if (tr->cur_image) @@ -863,17 +851,6 @@ static __always_inline u64 notrace bpf_prog_start_time(void) return start; } -static void notrace inc_misses_counter(struct bpf_prog *prog) -{ - struct bpf_prog_stats *stats; - unsigned int flags; - - stats = this_cpu_ptr(prog->stats); - flags = u64_stats_update_begin_irqsave(&stats->syncp); - u64_stats_inc(&stats->misses); - u64_stats_update_end_irqrestore(&stats->syncp, flags); -} - /* The logic is similar to bpf_prog_run(), but with an explicit * rcu_read_lock() and migrate_disable() which are required * for the trampoline. The macro is split into @@ -896,7 +873,7 @@ u64 notrace __bpf_prog_enter(struct bpf_prog *prog, struct bpf_tramp_run_ctx *ru run_ctx->saved_run_ctx = bpf_set_run_ctx(&run_ctx->run_ctx); if (unlikely(this_cpu_inc_return(*(prog->active)) != 1)) { - inc_misses_counter(prog); + bpf_prog_inc_misses_counter(prog); return 0; } return bpf_prog_start_time(); @@ -967,7 +944,7 @@ u64 notrace __bpf_prog_enter_sleepable(struct bpf_prog *prog, struct bpf_tramp_r might_fault(); if (unlikely(this_cpu_inc_return(*(prog->active)) != 1)) { - inc_misses_counter(prog); + bpf_prog_inc_misses_counter(prog); return 0; } @@ -987,6 +964,29 @@ void notrace __bpf_prog_exit_sleepable(struct bpf_prog *prog, u64 start, rcu_read_unlock_trace(); } +u64 notrace __bpf_prog_enter_struct_ops(struct bpf_prog *prog, + struct bpf_tramp_run_ctx *run_ctx) + __acquires(RCU) +{ + rcu_read_lock(); + migrate_disable(); + + run_ctx->saved_run_ctx = bpf_set_run_ctx(&run_ctx->run_ctx); + + return bpf_prog_start_time(); +} + +void notrace __bpf_prog_exit_struct_ops(struct bpf_prog *prog, u64 start, + struct bpf_tramp_run_ctx *run_ctx) + __releases(RCU) +{ + bpf_reset_run_ctx(run_ctx->saved_run_ctx); + + update_prog_stats(prog, start); + migrate_enable(); + rcu_read_unlock(); +} + void notrace __bpf_tramp_enter(struct bpf_tramp_image *tr) { percpu_ref_get(&tr->pcref); diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index 003f7ba19558..6f6d2d511c06 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -23,6 +23,7 @@ #include <linux/error-injection.h> #include <linux/bpf_lsm.h> #include <linux/btf_ids.h> +#include <linux/poison.h> #include "disasm.h" @@ -370,6 +371,7 @@ __printf(2, 3) void bpf_log(struct bpf_verifier_log *log, bpf_verifier_vlog(log, fmt, args); va_end(args); } +EXPORT_SYMBOL_GPL(bpf_log); static const char *ltrim(const char *s) { @@ -561,6 +563,7 @@ static const char *reg_type_str(struct bpf_verifier_env *env, [PTR_TO_BUF] = "buf", [PTR_TO_FUNC] = "func", [PTR_TO_MAP_KEY] = "map_key", + [PTR_TO_DYNPTR] = "dynptr_ptr", }; if (type & PTR_MAYBE_NULL) { @@ -779,8 +782,8 @@ static bool is_dynptr_reg_valid_uninit(struct bpf_verifier_env *env, struct bpf_ return true; } -static bool is_dynptr_reg_valid_init(struct bpf_verifier_env *env, struct bpf_reg_state *reg, - enum bpf_arg_type arg_type) +bool is_dynptr_reg_valid_init(struct bpf_verifier_env *env, + struct bpf_reg_state *reg) { struct bpf_func_state *state = func(env, reg); int spi = get_spi(reg->off); @@ -796,11 +799,24 @@ static bool is_dynptr_reg_valid_init(struct bpf_verifier_env *env, struct bpf_re return false; } + return true; +} + +bool is_dynptr_type_expected(struct bpf_verifier_env *env, + struct bpf_reg_state *reg, + enum bpf_arg_type arg_type) +{ + struct bpf_func_state *state = func(env, reg); + enum bpf_dynptr_type dynptr_type; + int spi = get_spi(reg->off); + /* ARG_PTR_TO_DYNPTR takes any type of dynptr */ if (arg_type == ARG_PTR_TO_DYNPTR) return true; - return state->stack[spi].spilled_ptr.dynptr.type == arg_to_dynptr_type(arg_type); + dynptr_type = arg_to_dynptr_type(arg_type); + + return state->stack[spi].spilled_ptr.dynptr.type == dynptr_type; } /* The reg state of a pointer or a bounded scalar was saved when @@ -1749,6 +1765,7 @@ static void init_func_state(struct bpf_verifier_env *env, state->callsite = callsite; state->frameno = frameno; state->subprogno = subprogno; + state->callback_ret_range = tnum_range(0, 0); init_reg_state(env, state); mark_verifier_state_scratched(env); } @@ -2908,7 +2925,7 @@ static int __mark_chain_precision(struct bpf_verifier_env *env, int regno, return 0; } -static int mark_chain_precision(struct bpf_verifier_env *env, int regno) +int mark_chain_precision(struct bpf_verifier_env *env, int regno) { return __mark_chain_precision(env, regno, -1); } @@ -5233,6 +5250,25 @@ static int check_helper_mem_access(struct bpf_verifier_env *env, int regno, env, regno, reg->off, access_size, zero_size_allowed, ACCESS_HELPER, meta); + case PTR_TO_CTX: + /* in case the function doesn't know how to access the context, + * (because we are in a program of type SYSCALL for example), we + * can not statically check its size. + * Dynamically check it now. + */ + if (!env->ops->convert_ctx_access) { + enum bpf_access_type atype = meta && meta->raw_mode ? BPF_WRITE : BPF_READ; + int offset = access_size - 1; + + /* Allow zero-byte read from PTR_TO_CTX */ + if (access_size == 0) + return zero_size_allowed ? 0 : -EACCES; + + return check_mem_access(env, env->insn_idx, regno, offset, BPF_B, + atype, -1, false); + } + + fallthrough; default: /* scalar_value or invalid ptr */ /* Allow zero-byte read from NULL, regardless of pointer type */ if (zero_size_allowed && access_size == 0 && @@ -5666,6 +5702,12 @@ static const struct bpf_reg_types stack_ptr_types = { .types = { PTR_TO_STACK } static const struct bpf_reg_types const_str_ptr_types = { .types = { PTR_TO_MAP_VALUE } }; static const struct bpf_reg_types timer_types = { .types = { PTR_TO_MAP_VALUE } }; static const struct bpf_reg_types kptr_types = { .types = { PTR_TO_MAP_VALUE } }; +static const struct bpf_reg_types dynptr_types = { + .types = { + PTR_TO_STACK, + PTR_TO_DYNPTR | DYNPTR_TYPE_LOCAL, + } +}; static const struct bpf_reg_types *compatible_reg_types[__BPF_ARG_TYPE_MAX] = { [ARG_PTR_TO_MAP_KEY] = &map_key_value_types, @@ -5692,7 +5734,7 @@ static const struct bpf_reg_types *compatible_reg_types[__BPF_ARG_TYPE_MAX] = { [ARG_PTR_TO_CONST_STR] = &const_str_ptr_types, [ARG_PTR_TO_TIMER] = &timer_types, [ARG_PTR_TO_KPTR] = &kptr_types, - [ARG_PTR_TO_DYNPTR] = &stack_ptr_types, + [ARG_PTR_TO_DYNPTR] = &dynptr_types, }; static int check_reg_type(struct bpf_verifier_env *env, u32 regno, @@ -5761,13 +5803,22 @@ found: if (meta->func_id == BPF_FUNC_kptr_xchg) { if (map_kptr_match_type(env, meta->kptr_off_desc, reg, regno)) return -EACCES; - } else if (!btf_struct_ids_match(&env->log, reg->btf, reg->btf_id, reg->off, - btf_vmlinux, *arg_btf_id, - strict_type_match)) { - verbose(env, "R%d is of type %s but %s is expected\n", - regno, kernel_type_name(reg->btf, reg->btf_id), - kernel_type_name(btf_vmlinux, *arg_btf_id)); - return -EACCES; + } else { + if (arg_btf_id == BPF_PTR_POISON) { + verbose(env, "verifier internal error:"); + verbose(env, "R%d has non-overwritten BPF_PTR_POISON type\n", + regno); + return -EACCES; + } + + if (!btf_struct_ids_match(&env->log, reg->btf, reg->btf_id, reg->off, + btf_vmlinux, *arg_btf_id, + strict_type_match)) { + verbose(env, "R%d is of type %s but %s is expected\n", + regno, kernel_type_name(reg->btf, reg->btf_id), + kernel_type_name(btf_vmlinux, *arg_btf_id)); + return -EACCES; + } } } @@ -6035,6 +6086,13 @@ skip_type_check: err = check_mem_size_reg(env, reg, regno, true, meta); break; case ARG_PTR_TO_DYNPTR: + /* We only need to check for initialized / uninitialized helper + * dynptr args if the dynptr is not PTR_TO_DYNPTR, as the + * assumption is that if it is, that a helper function + * initialized the dynptr on behalf of the BPF program. + */ + if (base_type(reg->type) == PTR_TO_DYNPTR) + break; if (arg_type & MEM_UNINIT) { if (!is_dynptr_reg_valid_uninit(env, reg)) { verbose(env, "Dynptr has to be an uninitialized dynptr\n"); @@ -6050,21 +6108,27 @@ skip_type_check: } meta->uninit_dynptr_regno = regno; - } else if (!is_dynptr_reg_valid_init(env, reg, arg_type)) { + } else if (!is_dynptr_reg_valid_init(env, reg)) { + verbose(env, + "Expected an initialized dynptr as arg #%d\n", + arg + 1); + return -EINVAL; + } else if (!is_dynptr_type_expected(env, reg, arg_type)) { const char *err_extra = ""; switch (arg_type & DYNPTR_TYPE_FLAG_MASK) { case DYNPTR_TYPE_LOCAL: - err_extra = "local "; + err_extra = "local"; break; case DYNPTR_TYPE_RINGBUF: - err_extra = "ringbuf "; + err_extra = "ringbuf"; break; default: + err_extra = "<unknown>"; break; } - - verbose(env, "Expected an initialized %sdynptr as arg #%d\n", + verbose(env, + "Expected a dynptr of type %s as arg #%d\n", err_extra, arg + 1); return -EINVAL; } @@ -6209,6 +6273,10 @@ static int check_map_func_compatibility(struct bpf_verifier_env *env, func_id != BPF_FUNC_ringbuf_discard_dynptr) goto error; break; + case BPF_MAP_TYPE_USER_RINGBUF: + if (func_id != BPF_FUNC_user_ringbuf_drain) + goto error; + break; case BPF_MAP_TYPE_STACK_TRACE: if (func_id != BPF_FUNC_get_stackid) goto error; @@ -6328,6 +6396,10 @@ static int check_map_func_compatibility(struct bpf_verifier_env *env, if (map->map_type != BPF_MAP_TYPE_RINGBUF) goto error; break; + case BPF_FUNC_user_ringbuf_drain: + if (map->map_type != BPF_MAP_TYPE_USER_RINGBUF) + goto error; + break; case BPF_FUNC_get_stackid: if (map->map_type != BPF_MAP_TYPE_STACK_TRACE) goto error; @@ -6494,31 +6566,15 @@ static int check_func_proto(const struct bpf_func_proto *fn, int func_id) /* Packet data might have moved, any old PTR_TO_PACKET[_META,_END] * are now invalid, so turn them into unknown SCALAR_VALUE. */ -static void __clear_all_pkt_pointers(struct bpf_verifier_env *env, - struct bpf_func_state *state) +static void clear_all_pkt_pointers(struct bpf_verifier_env *env) { - struct bpf_reg_state *regs = state->regs, *reg; - int i; - - for (i = 0; i < MAX_BPF_REG; i++) - if (reg_is_pkt_pointer_any(®s[i])) - mark_reg_unknown(env, regs, i); + struct bpf_func_state *state; + struct bpf_reg_state *reg; - bpf_for_each_spilled_reg(i, state, reg) { - if (!reg) - continue; + bpf_for_each_reg_in_vstate(env->cur_state, state, reg, ({ if (reg_is_pkt_pointer_any(reg)) __mark_reg_unknown(env, reg); - } -} - -static void clear_all_pkt_pointers(struct bpf_verifier_env *env) -{ - struct bpf_verifier_state *vstate = env->cur_state; - int i; - - for (i = 0; i <= vstate->curframe; i++) - __clear_all_pkt_pointers(env, vstate->frame[i]); + })); } enum { @@ -6547,41 +6603,24 @@ static void mark_pkt_end(struct bpf_verifier_state *vstate, int regn, bool range reg->range = AT_PKT_END; } -static void release_reg_references(struct bpf_verifier_env *env, - struct bpf_func_state *state, - int ref_obj_id) -{ - struct bpf_reg_state *regs = state->regs, *reg; - int i; - - for (i = 0; i < MAX_BPF_REG; i++) - if (regs[i].ref_obj_id == ref_obj_id) - mark_reg_unknown(env, regs, i); - - bpf_for_each_spilled_reg(i, state, reg) { - if (!reg) - continue; - if (reg->ref_obj_id == ref_obj_id) - __mark_reg_unknown(env, reg); - } -} - /* The pointer with the specified id has released its reference to kernel * resources. Identify all copies of the same pointer and clear the reference. */ static int release_reference(struct bpf_verifier_env *env, int ref_obj_id) { - struct bpf_verifier_state *vstate = env->cur_state; + struct bpf_func_state *state; + struct bpf_reg_state *reg; int err; - int i; err = release_reference_state(cur_func(env), ref_obj_id); if (err) return err; - for (i = 0; i <= vstate->curframe; i++) - release_reg_references(env, vstate->frame[i], ref_obj_id); + bpf_for_each_reg_in_vstate(env->cur_state, state, reg, ({ + if (reg->ref_obj_id == ref_obj_id) + __mark_reg_unknown(env, reg); + })); return 0; } @@ -6629,7 +6668,7 @@ static int __check_func_call(struct bpf_verifier_env *env, struct bpf_insn *insn func_info_aux = env->prog->aux->func_info_aux; if (func_info_aux) is_global = func_info_aux[subprog].linkage == BTF_FUNC_GLOBAL; - err = btf_check_subprog_arg_match(env, subprog, caller->regs); + err = btf_check_subprog_call(env, subprog, caller->regs); if (err == -EFAULT) return err; if (is_global) { @@ -6803,6 +6842,7 @@ static int set_map_elem_callback_state(struct bpf_verifier_env *env, return err; callee->in_callback_fn = true; + callee->callback_ret_range = tnum_range(0, 1); return 0; } @@ -6824,6 +6864,7 @@ static int set_loop_callback_state(struct bpf_verifier_env *env, __mark_reg_not_init(env, &callee->regs[BPF_REG_5]); callee->in_callback_fn = true; + callee->callback_ret_range = tnum_range(0, 1); return 0; } @@ -6853,6 +6894,7 @@ static int set_timer_callback_state(struct bpf_verifier_env *env, __mark_reg_not_init(env, &callee->regs[BPF_REG_4]); __mark_reg_not_init(env, &callee->regs[BPF_REG_5]); callee->in_async_callback_fn = true; + callee->callback_ret_range = tnum_range(0, 1); return 0; } @@ -6880,6 +6922,30 @@ static int set_find_vma_callback_state(struct bpf_verifier_env *env, __mark_reg_not_init(env, &callee->regs[BPF_REG_4]); __mark_reg_not_init(env, &callee->regs[BPF_REG_5]); callee->in_callback_fn = true; + callee->callback_ret_range = tnum_range(0, 1); + return 0; +} + +static int set_user_ringbuf_callback_state(struct bpf_verifier_env *env, + struct bpf_func_state *caller, + struct bpf_func_state *callee, + int insn_idx) +{ + /* bpf_user_ringbuf_drain(struct bpf_map *map, void *callback_fn, void + * callback_ctx, u64 flags); + * callback_fn(struct bpf_dynptr_t* dynptr, void *callback_ctx); + */ + __mark_reg_not_init(env, &callee->regs[BPF_REG_0]); + callee->regs[BPF_REG_1].type = PTR_TO_DYNPTR | DYNPTR_TYPE_LOCAL; + __mark_reg_known_zero(&callee->regs[BPF_REG_1]); + callee->regs[BPF_REG_2] = caller->regs[BPF_REG_3]; + + /* unused */ + __mark_reg_not_init(env, &callee->regs[BPF_REG_3]); + __mark_reg_not_init(env, &callee->regs[BPF_REG_4]); + __mark_reg_not_init(env, &callee->regs[BPF_REG_5]); + + callee->in_callback_fn = true; return 0; } @@ -6907,7 +6973,7 @@ static int prepare_func_exit(struct bpf_verifier_env *env, int *insn_idx) caller = state->frame[state->curframe]; if (callee->in_callback_fn) { /* enforce R0 return value range [0, 1]. */ - struct tnum range = tnum_range(0, 1); + struct tnum range = callee->callback_ret_range; if (r0->type != SCALAR_VALUE) { verbose(env, "R0 not a scalar value\n"); @@ -7342,12 +7408,18 @@ static int check_helper_call(struct bpf_verifier_env *env, struct bpf_insn *insn case BPF_FUNC_dynptr_data: for (i = 0; i < MAX_BPF_FUNC_REG_ARGS; i++) { if (arg_type_is_dynptr(fn->arg_type[i])) { + struct bpf_reg_state *reg = ®s[BPF_REG_1 + i]; + if (meta.ref_obj_id) { verbose(env, "verifier internal error: meta.ref_obj_id already set\n"); return -EFAULT; } - /* Find the id of the dynptr we're tracking the reference of */ - meta.ref_obj_id = stack_slot_get_id(env, ®s[BPF_REG_1 + i]); + + if (base_type(reg->type) != PTR_TO_DYNPTR) + /* Find the id of the dynptr we're + * tracking the reference of + */ + meta.ref_obj_id = stack_slot_get_id(env, reg); break; } } @@ -7356,6 +7428,10 @@ static int check_helper_call(struct bpf_verifier_env *env, struct bpf_insn *insn return -EFAULT; } break; + case BPF_FUNC_user_ringbuf_drain: + err = __check_func_call(env, insn, insn_idx_p, meta.subprogno, + set_user_ringbuf_callback_state); + break; } if (err) @@ -7465,6 +7541,12 @@ static int check_helper_call(struct bpf_verifier_env *env, struct bpf_insn *insn ret_btf = meta.kptr_off_desc->kptr.btf; ret_btf_id = meta.kptr_off_desc->kptr.btf_id; } else { + if (fn->ret_btf_id == BPF_PTR_POISON) { + verbose(env, "verifier internal error:"); + verbose(env, "func %s has non-overwritten BPF_PTR_POISON return type\n", + func_id_name(func_id)); + return -EINVAL; + } ret_btf = btf_vmlinux; ret_btf_id = *fn->ret_btf_id; } @@ -7576,6 +7658,7 @@ static int check_kfunc_call(struct bpf_verifier_env *env, struct bpf_insn *insn, { const struct btf_type *t, *func, *func_proto, *ptr_type; struct bpf_reg_state *regs = cur_regs(env); + struct bpf_kfunc_arg_meta meta = { 0 }; const char *func_name, *ptr_type_name; u32 i, nargs, func_id, ptr_type_id; int err, insn_idx = *insn_idx_p; @@ -7610,8 +7693,10 @@ static int check_kfunc_call(struct bpf_verifier_env *env, struct bpf_insn *insn, acq = *kfunc_flags & KF_ACQUIRE; + meta.flags = *kfunc_flags; + /* Check the arguments */ - err = btf_check_kfunc_arg_match(env, desc_btf, func_id, regs, *kfunc_flags); + err = btf_check_kfunc_arg_match(env, desc_btf, func_id, regs, &meta); if (err < 0) return err; /* In case of release function, we get register number of refcounted @@ -7632,7 +7717,7 @@ static int check_kfunc_call(struct bpf_verifier_env *env, struct bpf_insn *insn, /* Check return type */ t = btf_type_skip_modifiers(desc_btf, func_proto->type, NULL); - if (acq && !btf_type_is_ptr(t)) { + if (acq && !btf_type_is_struct_ptr(desc_btf, t)) { verbose(env, "acquire kernel function does not return PTR_TO_BTF_ID\n"); return -EINVAL; } @@ -7644,17 +7729,33 @@ static int check_kfunc_call(struct bpf_verifier_env *env, struct bpf_insn *insn, ptr_type = btf_type_skip_modifiers(desc_btf, t->type, &ptr_type_id); if (!btf_type_is_struct(ptr_type)) { - ptr_type_name = btf_name_by_offset(desc_btf, - ptr_type->name_off); - verbose(env, "kernel function %s returns pointer type %s %s is not supported\n", - func_name, btf_type_str(ptr_type), - ptr_type_name); - return -EINVAL; + if (!meta.r0_size) { + ptr_type_name = btf_name_by_offset(desc_btf, + ptr_type->name_off); + verbose(env, + "kernel function %s returns pointer type %s %s is not supported\n", + func_name, + btf_type_str(ptr_type), + ptr_type_name); + return -EINVAL; + } + + mark_reg_known_zero(env, regs, BPF_REG_0); + regs[BPF_REG_0].type = PTR_TO_MEM; + regs[BPF_REG_0].mem_size = meta.r0_size; + + if (meta.r0_rdonly) + regs[BPF_REG_0].type |= MEM_RDONLY; + + /* Ensures we don't access the memory after a release_reference() */ + if (meta.ref_obj_id) + regs[BPF_REG_0].ref_obj_id = meta.ref_obj_id; + } else { + mark_reg_known_zero(env, regs, BPF_REG_0); + regs[BPF_REG_0].btf = desc_btf; + regs[BPF_REG_0].type = PTR_TO_BTF_ID; + regs[BPF_REG_0].btf_id = ptr_type_id; } - mark_reg_known_zero(env, regs, BPF_REG_0); - regs[BPF_REG_0].btf = desc_btf; - regs[BPF_REG_0].type = PTR_TO_BTF_ID; - regs[BPF_REG_0].btf_id = ptr_type_id; if (*kfunc_flags & KF_RET_NULL) { regs[BPF_REG_0].type |= PTR_MAYBE_NULL; /* For mark_ptr_or_null_reg, see 93c230e3f5bd6 */ @@ -9297,34 +9398,14 @@ static int check_alu_op(struct bpf_verifier_env *env, struct bpf_insn *insn) return 0; } -static void __find_good_pkt_pointers(struct bpf_func_state *state, - struct bpf_reg_state *dst_reg, - enum bpf_reg_type type, int new_range) -{ - struct bpf_reg_state *reg; - int i; - - for (i = 0; i < MAX_BPF_REG; i++) { - reg = &state->regs[i]; - if (reg->type == type && reg->id == dst_reg->id) - /* keep the maximum range already checked */ - reg->range = max(reg->range, new_range); - } - - bpf_for_each_spilled_reg(i, state, reg) { - if (!reg) - continue; - if (reg->type == type && reg->id == dst_reg->id) - reg->range = max(reg->range, new_range); - } -} - static void find_good_pkt_pointers(struct bpf_verifier_state *vstate, struct bpf_reg_state *dst_reg, enum bpf_reg_type type, bool range_right_open) { - int new_range, i; + struct bpf_func_state *state; + struct bpf_reg_state *reg; + int new_range; if (dst_reg->off < 0 || (dst_reg->off == 0 && range_right_open)) @@ -9389,9 +9470,11 @@ static void find_good_pkt_pointers(struct bpf_verifier_state *vstate, * the range won't allow anything. * dst_reg->off is known < MAX_PACKET_OFF, therefore it fits in a u16. */ - for (i = 0; i <= vstate->curframe; i++) - __find_good_pkt_pointers(vstate->frame[i], dst_reg, type, - new_range); + bpf_for_each_reg_in_vstate(vstate, state, reg, ({ + if (reg->type == type && reg->id == dst_reg->id) + /* keep the maximum range already checked */ + reg->range = max(reg->range, new_range); + })); } static int is_branch32_taken(struct bpf_reg_state *reg, u32 val, u8 opcode) @@ -9880,7 +9963,7 @@ static void mark_ptr_or_null_reg(struct bpf_func_state *state, if (!reg_may_point_to_spin_lock(reg)) { /* For not-NULL ptr, reg->ref_obj_id will be reset - * in release_reg_references(). + * in release_reference(). * * reg->id is still used by spin_lock ptr. Other * than spin_lock ptr type, reg->id can be reset. @@ -9890,22 +9973,6 @@ static void mark_ptr_or_null_reg(struct bpf_func_state *state, } } -static void __mark_ptr_or_null_regs(struct bpf_func_state *state, u32 id, - bool is_null) -{ - struct bpf_reg_state *reg; - int i; - - for (i = 0; i < MAX_BPF_REG; i++) - mark_ptr_or_null_reg(state, &state->regs[i], id, is_null); - - bpf_for_each_spilled_reg(i, state, reg) { - if (!reg) - continue; - mark_ptr_or_null_reg(state, reg, id, is_null); - } -} - /* The logic is similar to find_good_pkt_pointers(), both could eventually * be folded together at some point. */ @@ -9913,10 +9980,9 @@ static void mark_ptr_or_null_regs(struct bpf_verifier_state *vstate, u32 regno, bool is_null) { struct bpf_func_state *state = vstate->frame[vstate->curframe]; - struct bpf_reg_state *regs = state->regs; + struct bpf_reg_state *regs = state->regs, *reg; u32 ref_obj_id = regs[regno].ref_obj_id; u32 id = regs[regno].id; - int i; if (ref_obj_id && ref_obj_id == id && is_null) /* regs[regno] is in the " == NULL" branch. @@ -9925,8 +9991,9 @@ static void mark_ptr_or_null_regs(struct bpf_verifier_state *vstate, u32 regno, */ WARN_ON_ONCE(release_reference_state(state, id)); - for (i = 0; i <= vstate->curframe; i++) - __mark_ptr_or_null_regs(vstate->frame[i], id, is_null); + bpf_for_each_reg_in_vstate(vstate, state, reg, ({ + mark_ptr_or_null_reg(state, reg, id, is_null); + })); } static bool try_match_pkt_pointers(const struct bpf_insn *insn, @@ -10039,23 +10106,11 @@ static void find_equal_scalars(struct bpf_verifier_state *vstate, { struct bpf_func_state *state; struct bpf_reg_state *reg; - int i, j; - - for (i = 0; i <= vstate->curframe; i++) { - state = vstate->frame[i]; - for (j = 0; j < MAX_BPF_REG; j++) { - reg = &state->regs[j]; - if (reg->type == SCALAR_VALUE && reg->id == known_reg->id) - *reg = *known_reg; - } - bpf_for_each_spilled_reg(j, state, reg) { - if (!reg) - continue; - if (reg->type == SCALAR_VALUE && reg->id == known_reg->id) - *reg = *known_reg; - } - } + bpf_for_each_reg_in_vstate(vstate, state, reg, ({ + if (reg->type == SCALAR_VALUE && reg->id == known_reg->id) + *reg = *known_reg; + })); } static int check_cond_jmp_op(struct bpf_verifier_env *env, @@ -12654,6 +12709,7 @@ static int check_map_prog_compatibility(struct bpf_verifier_env *env, case BPF_MAP_TYPE_ARRAY_OF_MAPS: case BPF_MAP_TYPE_HASH_OF_MAPS: case BPF_MAP_TYPE_RINGBUF: + case BPF_MAP_TYPE_USER_RINGBUF: case BPF_MAP_TYPE_INODE_STORAGE: case BPF_MAP_TYPE_SK_STORAGE: case BPF_MAP_TYPE_TASK_STORAGE: @@ -13447,9 +13503,6 @@ static int convert_ctx_accesses(struct bpf_verifier_env *env) insn->code = BPF_LDX | BPF_PROBE_MEM | BPF_SIZE((insn)->code); env->prog->aux->num_exentries++; - } else if (resolve_prog_type(env->prog) != BPF_PROG_TYPE_STRUCT_OPS) { - verbose(env, "Writes through BTF pointers are not allowed\n"); - return -EINVAL; } continue; default: diff --git a/kernel/kprobes.c b/kernel/kprobes.c index ca9d834d0b84..3220b0a2fb4a 100644 --- a/kernel/kprobes.c +++ b/kernel/kprobes.c @@ -1607,9 +1607,10 @@ int register_kprobe(struct kprobe *p) struct kprobe *old_p; struct module *probed_mod; kprobe_opcode_t *addr; + bool on_func_entry; /* Adjust probe address from symbol */ - addr = kprobe_addr(p); + addr = _kprobe_addr(p->addr, p->symbol_name, p->offset, &on_func_entry); if (IS_ERR(addr)) return PTR_ERR(addr); p->addr = addr; @@ -1629,6 +1630,9 @@ int register_kprobe(struct kprobe *p) mutex_lock(&kprobe_mutex); + if (on_func_entry) + p->flags |= KPROBE_FLAG_ON_FUNC_ENTRY; + old_p = get_kprobe(p->addr); if (old_p) { /* Since this may unoptimize 'old_p', locking 'text_mutex'. */ diff --git a/kernel/trace/Kconfig b/kernel/trace/Kconfig index 1052126bdca2..e9e95c790b8e 100644 --- a/kernel/trace/Kconfig +++ b/kernel/trace/Kconfig @@ -51,6 +51,12 @@ config HAVE_DYNAMIC_FTRACE_WITH_ARGS This allows for use of regs_get_kernel_argument() and kernel_stack_pointer(). +config HAVE_DYNAMIC_FTRACE_NO_PATCHABLE + bool + help + If the architecture generates __patchable_function_entries sections + but does not want them included in the ftrace locations. + config HAVE_FTRACE_MCOUNT_RECORD bool help diff --git a/kernel/trace/bpf_trace.c b/kernel/trace/bpf_trace.c index 68e5cdd24cef..688552df95ca 100644 --- a/kernel/trace/bpf_trace.c +++ b/kernel/trace/bpf_trace.c @@ -20,6 +20,8 @@ #include <linux/fprobe.h> #include <linux/bsearch.h> #include <linux/sort.h> +#include <linux/key.h> +#include <linux/verification.h> #include <net/bpf_sk_storage.h> @@ -1026,11 +1028,30 @@ static const struct bpf_func_proto bpf_get_func_ip_proto_tracing = { .arg1_type = ARG_PTR_TO_CTX, }; +#ifdef CONFIG_X86_KERNEL_IBT +static unsigned long get_entry_ip(unsigned long fentry_ip) +{ + u32 instr; + + /* Being extra safe in here in case entry ip is on the page-edge. */ + if (get_kernel_nofault(instr, (u32 *) fentry_ip - 1)) + return fentry_ip; + if (is_endbr(instr)) + fentry_ip -= ENDBR_INSN_SIZE; + return fentry_ip; +} +#else +#define get_entry_ip(fentry_ip) fentry_ip +#endif + BPF_CALL_1(bpf_get_func_ip_kprobe, struct pt_regs *, regs) { struct kprobe *kp = kprobe_running(); - return kp ? (uintptr_t)kp->addr : 0; + if (!kp || !(kp->flags & KPROBE_FLAG_ON_FUNC_ENTRY)) + return 0; + + return get_entry_ip((uintptr_t)kp->addr); } static const struct bpf_func_proto bpf_get_func_ip_proto_kprobe = { @@ -1181,6 +1202,184 @@ static const struct bpf_func_proto bpf_get_func_arg_cnt_proto = { .arg1_type = ARG_PTR_TO_CTX, }; +#ifdef CONFIG_KEYS +__diag_push(); +__diag_ignore_all("-Wmissing-prototypes", + "kfuncs which will be used in BPF programs"); + +/** + * bpf_lookup_user_key - lookup a key by its serial + * @serial: key handle serial number + * @flags: lookup-specific flags + * + * Search a key with a given *serial* and the provided *flags*. + * If found, increment the reference count of the key by one, and + * return it in the bpf_key structure. + * + * The bpf_key structure must be passed to bpf_key_put() when done + * with it, so that the key reference count is decremented and the + * bpf_key structure is freed. + * + * Permission checks are deferred to the time the key is used by + * one of the available key-specific kfuncs. + * + * Set *flags* with KEY_LOOKUP_CREATE, to attempt creating a requested + * special keyring (e.g. session keyring), if it doesn't yet exist. + * Set *flags* with KEY_LOOKUP_PARTIAL, to lookup a key without waiting + * for the key construction, and to retrieve uninstantiated keys (keys + * without data attached to them). + * + * Return: a bpf_key pointer with a valid key pointer if the key is found, a + * NULL pointer otherwise. + */ +struct bpf_key *bpf_lookup_user_key(u32 serial, u64 flags) +{ + key_ref_t key_ref; + struct bpf_key *bkey; + + if (flags & ~KEY_LOOKUP_ALL) + return NULL; + + /* + * Permission check is deferred until the key is used, as the + * intent of the caller is unknown here. + */ + key_ref = lookup_user_key(serial, flags, KEY_DEFER_PERM_CHECK); + if (IS_ERR(key_ref)) + return NULL; + + bkey = kmalloc(sizeof(*bkey), GFP_KERNEL); + if (!bkey) { + key_put(key_ref_to_ptr(key_ref)); + return NULL; + } + + bkey->key = key_ref_to_ptr(key_ref); + bkey->has_ref = true; + + return bkey; +} + +/** + * bpf_lookup_system_key - lookup a key by a system-defined ID + * @id: key ID + * + * Obtain a bpf_key structure with a key pointer set to the passed key ID. + * The key pointer is marked as invalid, to prevent bpf_key_put() from + * attempting to decrement the key reference count on that pointer. The key + * pointer set in such way is currently understood only by + * verify_pkcs7_signature(). + * + * Set *id* to one of the values defined in include/linux/verification.h: + * 0 for the primary keyring (immutable keyring of system keys); + * VERIFY_USE_SECONDARY_KEYRING for both the primary and secondary keyring + * (where keys can be added only if they are vouched for by existing keys + * in those keyrings); VERIFY_USE_PLATFORM_KEYRING for the platform + * keyring (primarily used by the integrity subsystem to verify a kexec'ed + * kerned image and, possibly, the initramfs signature). + * + * Return: a bpf_key pointer with an invalid key pointer set from the + * pre-determined ID on success, a NULL pointer otherwise + */ +struct bpf_key *bpf_lookup_system_key(u64 id) +{ + struct bpf_key *bkey; + + if (system_keyring_id_check(id) < 0) + return NULL; + + bkey = kmalloc(sizeof(*bkey), GFP_ATOMIC); + if (!bkey) + return NULL; + + bkey->key = (struct key *)(unsigned long)id; + bkey->has_ref = false; + + return bkey; +} + +/** + * bpf_key_put - decrement key reference count if key is valid and free bpf_key + * @bkey: bpf_key structure + * + * Decrement the reference count of the key inside *bkey*, if the pointer + * is valid, and free *bkey*. + */ +void bpf_key_put(struct bpf_key *bkey) +{ + if (bkey->has_ref) + key_put(bkey->key); + + kfree(bkey); +} + +#ifdef CONFIG_SYSTEM_DATA_VERIFICATION +/** + * bpf_verify_pkcs7_signature - verify a PKCS#7 signature + * @data_ptr: data to verify + * @sig_ptr: signature of the data + * @trusted_keyring: keyring with keys trusted for signature verification + * + * Verify the PKCS#7 signature *sig_ptr* against the supplied *data_ptr* + * with keys in a keyring referenced by *trusted_keyring*. + * + * Return: 0 on success, a negative value on error. + */ +int bpf_verify_pkcs7_signature(struct bpf_dynptr_kern *data_ptr, + struct bpf_dynptr_kern *sig_ptr, + struct bpf_key *trusted_keyring) +{ + int ret; + + if (trusted_keyring->has_ref) { + /* + * Do the permission check deferred in bpf_lookup_user_key(). + * See bpf_lookup_user_key() for more details. + * + * A call to key_task_permission() here would be redundant, as + * it is already done by keyring_search() called by + * find_asymmetric_key(). + */ + ret = key_validate(trusted_keyring->key); + if (ret < 0) + return ret; + } + + return verify_pkcs7_signature(data_ptr->data, + bpf_dynptr_get_size(data_ptr), + sig_ptr->data, + bpf_dynptr_get_size(sig_ptr), + trusted_keyring->key, + VERIFYING_UNSPECIFIED_SIGNATURE, NULL, + NULL); +} +#endif /* CONFIG_SYSTEM_DATA_VERIFICATION */ + +__diag_pop(); + +BTF_SET8_START(key_sig_kfunc_set) +BTF_ID_FLAGS(func, bpf_lookup_user_key, KF_ACQUIRE | KF_RET_NULL | KF_SLEEPABLE) +BTF_ID_FLAGS(func, bpf_lookup_system_key, KF_ACQUIRE | KF_RET_NULL) +BTF_ID_FLAGS(func, bpf_key_put, KF_RELEASE) +#ifdef CONFIG_SYSTEM_DATA_VERIFICATION +BTF_ID_FLAGS(func, bpf_verify_pkcs7_signature, KF_SLEEPABLE) +#endif +BTF_SET8_END(key_sig_kfunc_set) + +static const struct btf_kfunc_id_set bpf_key_sig_kfunc_set = { + .owner = THIS_MODULE, + .set = &key_sig_kfunc_set, +}; + +static int __init bpf_key_sig_kfuncs_init(void) +{ + return register_btf_kfunc_id_set(BPF_PROG_TYPE_TRACING, + &bpf_key_sig_kfunc_set); +} + +late_initcall(bpf_key_sig_kfuncs_init); +#endif /* CONFIG_KEYS */ + static const struct bpf_func_proto * bpf_tracing_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog) { @@ -2042,9 +2241,15 @@ static __always_inline void __bpf_trace_run(struct bpf_prog *prog, u64 *args) { cant_sleep(); + if (unlikely(this_cpu_inc_return(*(prog->active)) != 1)) { + bpf_prog_inc_misses_counter(prog); + goto out; + } rcu_read_lock(); (void) bpf_prog_run(prog, args); rcu_read_unlock(); +out: + this_cpu_dec(*(prog->active)); } #define UNPACK(...) __VA_ARGS__ @@ -2414,13 +2619,13 @@ kprobe_multi_link_prog_run(struct bpf_kprobe_multi_link *link, } static void -kprobe_multi_link_handler(struct fprobe *fp, unsigned long entry_ip, +kprobe_multi_link_handler(struct fprobe *fp, unsigned long fentry_ip, struct pt_regs *regs) { struct bpf_kprobe_multi_link *link; link = container_of(fp, struct bpf_kprobe_multi_link, fp); - kprobe_multi_link_prog_run(link, entry_ip, regs); + kprobe_multi_link_prog_run(link, get_entry_ip(fentry_ip), regs); } static int symbols_cmp_r(const void *a, const void *b, const void *priv) diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c index 439e2ab6905e..447d2e2a8549 100644 --- a/kernel/trace/ftrace.c +++ b/kernel/trace/ftrace.c @@ -8265,8 +8265,7 @@ static int kallsyms_callback(void *data, const char *name, if (args->addrs[idx]) return 0; - addr = ftrace_location(addr); - if (!addr) + if (!ftrace_location(addr)) return 0; args->addrs[idx] = addr; diff --git a/net/bpf/test_run.c b/net/bpf/test_run.c index 25d8ecf105aa..13d578ce2a09 100644 --- a/net/bpf/test_run.c +++ b/net/bpf/test_run.c @@ -606,6 +606,38 @@ noinline void bpf_kfunc_call_memb1_release(struct prog_test_member1 *p) WARN_ON_ONCE(1); } +static int *__bpf_kfunc_call_test_get_mem(struct prog_test_ref_kfunc *p, const int size) +{ + if (size > 2 * sizeof(int)) + return NULL; + + return (int *)p; +} + +noinline int *bpf_kfunc_call_test_get_rdwr_mem(struct prog_test_ref_kfunc *p, const int rdwr_buf_size) +{ + return __bpf_kfunc_call_test_get_mem(p, rdwr_buf_size); +} + +noinline int *bpf_kfunc_call_test_get_rdonly_mem(struct prog_test_ref_kfunc *p, const int rdonly_buf_size) +{ + return __bpf_kfunc_call_test_get_mem(p, rdonly_buf_size); +} + +/* the next 2 ones can't be really used for testing expect to ensure + * that the verifier rejects the call. + * Acquire functions must return struct pointers, so these ones are + * failing. + */ +noinline int *bpf_kfunc_call_test_acq_rdonly_mem(struct prog_test_ref_kfunc *p, const int rdonly_buf_size) +{ + return __bpf_kfunc_call_test_get_mem(p, rdonly_buf_size); +} + +noinline void bpf_kfunc_call_int_mem_release(int *p) +{ +} + noinline struct prog_test_ref_kfunc * bpf_kfunc_call_test_kptr_get(struct prog_test_ref_kfunc **pp, int a, int b) { @@ -712,6 +744,10 @@ BTF_ID_FLAGS(func, bpf_kfunc_call_memb_acquire, KF_ACQUIRE | KF_RET_NULL) BTF_ID_FLAGS(func, bpf_kfunc_call_test_release, KF_RELEASE) BTF_ID_FLAGS(func, bpf_kfunc_call_memb_release, KF_RELEASE) BTF_ID_FLAGS(func, bpf_kfunc_call_memb1_release, KF_RELEASE) +BTF_ID_FLAGS(func, bpf_kfunc_call_test_get_rdwr_mem, KF_RET_NULL) +BTF_ID_FLAGS(func, bpf_kfunc_call_test_get_rdonly_mem, KF_RET_NULL) +BTF_ID_FLAGS(func, bpf_kfunc_call_test_acq_rdonly_mem, KF_ACQUIRE | KF_RET_NULL) +BTF_ID_FLAGS(func, bpf_kfunc_call_int_mem_release, KF_RELEASE) BTF_ID_FLAGS(func, bpf_kfunc_call_test_kptr_get, KF_ACQUIRE | KF_RET_NULL | KF_KPTR_GET) BTF_ID_FLAGS(func, bpf_kfunc_call_test_pass_ctx) BTF_ID_FLAGS(func, bpf_kfunc_call_test_pass1) @@ -1634,6 +1670,7 @@ static int __init bpf_prog_test_run_init(void) ret = register_btf_kfunc_id_set(BPF_PROG_TYPE_SCHED_CLS, &bpf_prog_test_kfunc_set); ret = ret ?: register_btf_kfunc_id_set(BPF_PROG_TYPE_TRACING, &bpf_prog_test_kfunc_set); + ret = ret ?: register_btf_kfunc_id_set(BPF_PROG_TYPE_SYSCALL, &bpf_prog_test_kfunc_set); return ret ?: register_btf_id_dtor_kfuncs(bpf_prog_test_dtor_kfunc, ARRAY_SIZE(bpf_prog_test_dtor_kfunc), THIS_MODULE); diff --git a/net/core/filter.c b/net/core/filter.c index 31608801078e..bb0136e7a8e4 100644 --- a/net/core/filter.c +++ b/net/core/filter.c @@ -18,6 +18,7 @@ */ #include <linux/atomic.h> +#include <linux/bpf_verifier.h> #include <linux/module.h> #include <linux/types.h> #include <linux/mm.h> @@ -5101,6 +5102,59 @@ static int bpf_sol_tcp_setsockopt(struct sock *sk, int optname, return 0; } +static int sol_tcp_sockopt_congestion(struct sock *sk, char *optval, + int *optlen, bool getopt) +{ + struct tcp_sock *tp; + int ret; + + if (*optlen < 2) + return -EINVAL; + + if (getopt) { + if (!inet_csk(sk)->icsk_ca_ops) + return -EINVAL; + /* BPF expects NULL-terminated tcp-cc string */ + optval[--(*optlen)] = '\0'; + return do_tcp_getsockopt(sk, SOL_TCP, TCP_CONGESTION, + KERNEL_SOCKPTR(optval), + KERNEL_SOCKPTR(optlen)); + } + + /* "cdg" is the only cc that alloc a ptr + * in inet_csk_ca area. The bpf-tcp-cc may + * overwrite this ptr after switching to cdg. + */ + if (*optlen >= sizeof("cdg") - 1 && !strncmp("cdg", optval, *optlen)) + return -ENOTSUPP; + + /* It stops this looping + * + * .init => bpf_setsockopt(tcp_cc) => .init => + * bpf_setsockopt(tcp_cc)" => .init => .... + * + * The second bpf_setsockopt(tcp_cc) is not allowed + * in order to break the loop when both .init + * are the same bpf prog. + * + * This applies even the second bpf_setsockopt(tcp_cc) + * does not cause a loop. This limits only the first + * '.init' can call bpf_setsockopt(TCP_CONGESTION) to + * pick a fallback cc (eg. peer does not support ECN) + * and the second '.init' cannot fallback to + * another. + */ + tp = tcp_sk(sk); + if (tp->bpf_chg_cc_inprogress) + return -EBUSY; + + tp->bpf_chg_cc_inprogress = 1; + ret = do_tcp_setsockopt(sk, SOL_TCP, TCP_CONGESTION, + KERNEL_SOCKPTR(optval), *optlen); + tp->bpf_chg_cc_inprogress = 0; + return ret; +} + static int sol_tcp_sockopt(struct sock *sk, int optname, char *optval, int *optlen, bool getopt) @@ -5124,9 +5178,7 @@ static int sol_tcp_sockopt(struct sock *sk, int optname, return -EINVAL; break; case TCP_CONGESTION: - if (*optlen < 2) - return -EINVAL; - break; + return sol_tcp_sockopt_congestion(sk, optval, optlen, getopt); case TCP_SAVED_SYN: if (*optlen < 1) return -EINVAL; @@ -5151,13 +5203,6 @@ static int sol_tcp_sockopt(struct sock *sk, int optname, return 0; } - if (optname == TCP_CONGESTION) { - if (!inet_csk(sk)->icsk_ca_ops) - return -EINVAL; - /* BPF expects NULL-terminated tcp-cc string */ - optval[--(*optlen)] = '\0'; - } - return do_tcp_getsockopt(sk, SOL_TCP, optname, KERNEL_SOCKPTR(optval), KERNEL_SOCKPTR(optlen)); @@ -5284,12 +5329,6 @@ static int _bpf_getsockopt(struct sock *sk, int level, int optname, BPF_CALL_5(bpf_sk_setsockopt, struct sock *, sk, int, level, int, optname, char *, optval, int, optlen) { - if (level == SOL_TCP && optname == TCP_CONGESTION) { - if (optlen >= sizeof("cdg") - 1 && - !strncmp("cdg", optval, optlen)) - return -ENOTSUPP; - } - return _bpf_setsockopt(sk, level, optname, optval, optlen); } @@ -8605,6 +8644,36 @@ static bool tc_cls_act_is_valid_access(int off, int size, return bpf_skb_is_valid_access(off, size, type, prog, info); } +DEFINE_MUTEX(nf_conn_btf_access_lock); +EXPORT_SYMBOL_GPL(nf_conn_btf_access_lock); + +int (*nfct_btf_struct_access)(struct bpf_verifier_log *log, const struct btf *btf, + const struct btf_type *t, int off, int size, + enum bpf_access_type atype, u32 *next_btf_id, + enum bpf_type_flag *flag); +EXPORT_SYMBOL_GPL(nfct_btf_struct_access); + +static int tc_cls_act_btf_struct_access(struct bpf_verifier_log *log, + const struct btf *btf, + const struct btf_type *t, int off, + int size, enum bpf_access_type atype, + u32 *next_btf_id, + enum bpf_type_flag *flag) +{ + int ret = -EACCES; + + if (atype == BPF_READ) + return btf_struct_access(log, btf, t, off, size, atype, next_btf_id, + flag); + + mutex_lock(&nf_conn_btf_access_lock); + if (nfct_btf_struct_access) + ret = nfct_btf_struct_access(log, btf, t, off, size, atype, next_btf_id, flag); + mutex_unlock(&nf_conn_btf_access_lock); + + return ret; +} + static bool __is_valid_xdp_access(int off, int size) { if (off < 0 || off >= sizeof(struct xdp_md)) @@ -8664,6 +8733,27 @@ void bpf_warn_invalid_xdp_action(struct net_device *dev, struct bpf_prog *prog, } EXPORT_SYMBOL_GPL(bpf_warn_invalid_xdp_action); +static int xdp_btf_struct_access(struct bpf_verifier_log *log, + const struct btf *btf, + const struct btf_type *t, int off, + int size, enum bpf_access_type atype, + u32 *next_btf_id, + enum bpf_type_flag *flag) +{ + int ret = -EACCES; + + if (atype == BPF_READ) + return btf_struct_access(log, btf, t, off, size, atype, next_btf_id, + flag); + + mutex_lock(&nf_conn_btf_access_lock); + if (nfct_btf_struct_access) + ret = nfct_btf_struct_access(log, btf, t, off, size, atype, next_btf_id, flag); + mutex_unlock(&nf_conn_btf_access_lock); + + return ret; +} + static bool sock_addr_is_valid_access(int off, int size, enum bpf_access_type type, const struct bpf_prog *prog, @@ -10558,6 +10648,7 @@ const struct bpf_verifier_ops tc_cls_act_verifier_ops = { .convert_ctx_access = tc_cls_act_convert_ctx_access, .gen_prologue = tc_cls_act_prologue, .gen_ld_abs = bpf_gen_ld_abs, + .btf_struct_access = tc_cls_act_btf_struct_access, }; const struct bpf_prog_ops tc_cls_act_prog_ops = { @@ -10569,6 +10660,7 @@ const struct bpf_verifier_ops xdp_verifier_ops = { .is_valid_access = xdp_is_valid_access, .convert_ctx_access = xdp_convert_ctx_access, .gen_prologue = bpf_noop_prologue, + .btf_struct_access = xdp_btf_struct_access, }; const struct bpf_prog_ops xdp_prog_ops = { diff --git a/net/core/skmsg.c b/net/core/skmsg.c index 188f8558d27d..ca70525621c7 100644 --- a/net/core/skmsg.c +++ b/net/core/skmsg.c @@ -434,8 +434,10 @@ int sk_msg_recvmsg(struct sock *sk, struct sk_psock *psock, struct msghdr *msg, if (copied + copy > len) copy = len - copied; copy = copy_page_to_iter(page, sge->offset, copy, iter); - if (!copy) - return copied ? copied : -EFAULT; + if (!copy) { + copied = copied ? copied : -EFAULT; + goto out; + } copied += copy; if (likely(!peek)) { @@ -455,7 +457,7 @@ int sk_msg_recvmsg(struct sock *sk, struct sk_psock *psock, struct msghdr *msg, * didn't copy the entire length lets just break. */ if (copy != sge->length) - return copied; + goto out; sk_msg_iter_var_next(i); } @@ -477,7 +479,9 @@ int sk_msg_recvmsg(struct sock *sk, struct sk_psock *psock, struct msghdr *msg, } msg_rx = sk_psock_peek_msg(psock); } - +out: + if (psock->work_state.skb && copied > 0) + schedule_work(&psock->work); return copied; } EXPORT_SYMBOL_GPL(sk_msg_recvmsg); diff --git a/net/core/stream.c b/net/core/stream.c index ccc083cdef23..1105057ce00a 100644 --- a/net/core/stream.c +++ b/net/core/stream.c @@ -159,7 +159,8 @@ int sk_stream_wait_memory(struct sock *sk, long *timeo_p) *timeo_p = current_timeo; } out: - remove_wait_queue(sk_sleep(sk), &wait); + if (!sock_flag(sk, SOCK_DEAD)) + remove_wait_queue(sk_sleep(sk), &wait); return err; do_error: diff --git a/net/ipv4/bpf_tcp_ca.c b/net/ipv4/bpf_tcp_ca.c index 85a9e500c42d..6da16ae6a962 100644 --- a/net/ipv4/bpf_tcp_ca.c +++ b/net/ipv4/bpf_tcp_ca.c @@ -124,7 +124,7 @@ static int bpf_tcp_ca_btf_struct_access(struct bpf_verifier_log *log, return -EACCES; } - return NOT_INIT; + return 0; } BPF_CALL_2(bpf_tcp_send_ack, struct tcp_sock *, tp, u32, rcv_nxt) diff --git a/net/ipv4/ping.c b/net/ipv4/ping.c index b83c2bd9d722..517042caf6dc 100644 --- a/net/ipv4/ping.c +++ b/net/ipv4/ping.c @@ -33,6 +33,7 @@ #include <linux/skbuff.h> #include <linux/proc_fs.h> #include <linux/export.h> +#include <linux/bpf-cgroup.h> #include <net/sock.h> #include <net/ping.h> #include <net/udp.h> @@ -295,6 +296,19 @@ void ping_close(struct sock *sk, long timeout) } EXPORT_SYMBOL_GPL(ping_close); +static int ping_pre_connect(struct sock *sk, struct sockaddr *uaddr, + int addr_len) +{ + /* This check is replicated from __ip4_datagram_connect() and + * intended to prevent BPF program called below from accessing bytes + * that are out of the bound specified by user in addr_len. + */ + if (addr_len < sizeof(struct sockaddr_in)) + return -EINVAL; + + return BPF_CGROUP_RUN_PROG_INET4_CONNECT_LOCK(sk, uaddr); +} + /* Checks the bind address and possibly modifies sk->sk_bound_dev_if. */ static int ping_check_bind_addr(struct sock *sk, struct inet_sock *isk, struct sockaddr *uaddr, int addr_len) @@ -1009,6 +1023,7 @@ struct proto ping_prot = { .owner = THIS_MODULE, .init = ping_init_sock, .close = ping_close, + .pre_connect = ping_pre_connect, .connect = ip4_datagram_connect, .disconnect = __udp_disconnect, .setsockopt = ip_setsockopt, diff --git a/net/ipv4/tcp_minisocks.c b/net/ipv4/tcp_minisocks.c index 442838ab0253..79f30f026d89 100644 --- a/net/ipv4/tcp_minisocks.c +++ b/net/ipv4/tcp_minisocks.c @@ -561,6 +561,7 @@ struct sock *tcp_create_openreq_child(const struct sock *sk, newtp->fastopen_req = NULL; RCU_INIT_POINTER(newtp->fastopen_rsk, NULL); + newtp->bpf_chg_cc_inprogress = 0; tcp_bpf_clone(sk, newsk); __TCP_INC_STATS(sock_net(sk), TCP_MIB_PASSIVEOPENS); diff --git a/net/ipv6/ping.c b/net/ipv6/ping.c index 91b840514656..5f2ef8493714 100644 --- a/net/ipv6/ping.c +++ b/net/ipv6/ping.c @@ -20,6 +20,7 @@ #include <net/udp.h> #include <net/transp_v6.h> #include <linux/proc_fs.h> +#include <linux/bpf-cgroup.h> #include <net/ping.h> static void ping_v6_destroy(struct sock *sk) @@ -49,6 +50,20 @@ static int dummy_ipv6_chk_addr(struct net *net, const struct in6_addr *addr, return 0; } +static int ping_v6_pre_connect(struct sock *sk, struct sockaddr *uaddr, + int addr_len) +{ + /* This check is replicated from __ip6_datagram_connect() and + * intended to prevent BPF program called below from accessing + * bytes that are out of the bound specified by user in addr_len. + */ + + if (addr_len < SIN6_LEN_RFC2133) + return -EINVAL; + + return BPF_CGROUP_RUN_PROG_INET6_CONNECT_LOCK(sk, uaddr); +} + static int ping_v6_sendmsg(struct sock *sk, struct msghdr *msg, size_t len) { struct inet_sock *inet = inet_sk(sk); @@ -191,6 +206,7 @@ struct proto pingv6_prot = { .init = ping_init_sock, .close = ping_close, .destroy = ping_v6_destroy, + .pre_connect = ping_v6_pre_connect, .connect = ip6_datagram_connect_v6_only, .disconnect = __udp_disconnect, .setsockopt = ipv6_setsockopt, diff --git a/net/netfilter/Makefile b/net/netfilter/Makefile index 06df49ea6329..0f060d100880 100644 --- a/net/netfilter/Makefile +++ b/net/netfilter/Makefile @@ -60,6 +60,12 @@ obj-$(CONFIG_NF_NAT) += nf_nat.o nf_nat-$(CONFIG_NF_NAT_REDIRECT) += nf_nat_redirect.o nf_nat-$(CONFIG_NF_NAT_MASQUERADE) += nf_nat_masquerade.o +ifeq ($(CONFIG_NF_NAT),m) +nf_nat-$(CONFIG_DEBUG_INFO_BTF_MODULES) += nf_nat_bpf.o +else ifeq ($(CONFIG_NF_NAT),y) +nf_nat-$(CONFIG_DEBUG_INFO_BTF) += nf_nat_bpf.o +endif + # NAT helpers obj-$(CONFIG_NF_NAT_AMANDA) += nf_nat_amanda.o obj-$(CONFIG_NF_NAT_FTP) += nf_nat_ftp.o diff --git a/net/netfilter/nf_conntrack_bpf.c b/net/netfilter/nf_conntrack_bpf.c index 1cd87b28c9b0..8639e7efd0e2 100644 --- a/net/netfilter/nf_conntrack_bpf.c +++ b/net/netfilter/nf_conntrack_bpf.c @@ -6,12 +6,14 @@ * are exposed through to BPF programs is explicitly unstable. */ +#include <linux/bpf_verifier.h> #include <linux/bpf.h> #include <linux/btf.h> +#include <linux/filter.h> +#include <linux/mutex.h> #include <linux/types.h> #include <linux/btf_ids.h> #include <linux/net_namespace.h> -#include <net/netfilter/nf_conntrack.h> #include <net/netfilter/nf_conntrack_bpf.h> #include <net/netfilter/nf_conntrack_core.h> @@ -134,7 +136,6 @@ __bpf_nf_ct_alloc_entry(struct net *net, struct bpf_sock_tuple *bpf_tuple, memset(&ct->proto, 0, sizeof(ct->proto)); __nf_ct_set_timeout(ct, timeout * HZ); - ct->status |= IPS_CONFIRMED; out: if (opts->netns_id >= 0) @@ -184,14 +185,58 @@ static struct nf_conn *__bpf_nf_ct_lookup(struct net *net, return ct; } +BTF_ID_LIST(btf_nf_conn_ids) +BTF_ID(struct, nf_conn) +BTF_ID(struct, nf_conn___init) + +/* Check writes into `struct nf_conn` */ +static int _nf_conntrack_btf_struct_access(struct bpf_verifier_log *log, + const struct btf *btf, + const struct btf_type *t, int off, + int size, enum bpf_access_type atype, + u32 *next_btf_id, + enum bpf_type_flag *flag) +{ + const struct btf_type *ncit; + const struct btf_type *nct; + size_t end; + + ncit = btf_type_by_id(btf, btf_nf_conn_ids[1]); + nct = btf_type_by_id(btf, btf_nf_conn_ids[0]); + + if (t != nct && t != ncit) { + bpf_log(log, "only read is supported\n"); + return -EACCES; + } + + /* `struct nf_conn` and `struct nf_conn___init` have the same layout + * so we are safe to simply merge offset checks here + */ + switch (off) { +#if defined(CONFIG_NF_CONNTRACK_MARK) + case offsetof(struct nf_conn, mark): + end = offsetofend(struct nf_conn, mark); + break; +#endif + default: + bpf_log(log, "no write support to nf_conn at off %d\n", off); + return -EACCES; + } + + if (off + size > end) { + bpf_log(log, + "write access at off %d with size %d beyond the member of nf_conn ended at %zu\n", + off, size, end); + return -EACCES; + } + + return 0; +} + __diag_push(); __diag_ignore_all("-Wmissing-prototypes", "Global functions as their definitions will be in nf_conntrack BTF"); -struct nf_conn___init { - struct nf_conn ct; -}; - /* bpf_xdp_ct_alloc - Allocate a new CT entry * * Parameters: @@ -339,6 +384,7 @@ struct nf_conn *bpf_ct_insert_entry(struct nf_conn___init *nfct_i) struct nf_conn *nfct = (struct nf_conn *)nfct_i; int err; + nfct->status |= IPS_CONFIRMED; err = nf_conntrack_hash_check_insert(nfct); if (err < 0) { nf_conntrack_free(nfct); @@ -449,5 +495,19 @@ int register_nf_conntrack_bpf(void) int ret; ret = register_btf_kfunc_id_set(BPF_PROG_TYPE_XDP, &nf_conntrack_kfunc_set); - return ret ?: register_btf_kfunc_id_set(BPF_PROG_TYPE_SCHED_CLS, &nf_conntrack_kfunc_set); + ret = ret ?: register_btf_kfunc_id_set(BPF_PROG_TYPE_SCHED_CLS, &nf_conntrack_kfunc_set); + if (!ret) { + mutex_lock(&nf_conn_btf_access_lock); + nfct_btf_struct_access = _nf_conntrack_btf_struct_access; + mutex_unlock(&nf_conn_btf_access_lock); + } + + return ret; +} + +void cleanup_nf_conntrack_bpf(void) +{ + mutex_lock(&nf_conn_btf_access_lock); + nfct_btf_struct_access = NULL; + mutex_unlock(&nf_conn_btf_access_lock); } diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c index 8208a28ea342..f97bda06d2a9 100644 --- a/net/netfilter/nf_conntrack_core.c +++ b/net/netfilter/nf_conntrack_core.c @@ -2516,6 +2516,7 @@ static int kill_all(struct nf_conn *i, void *data) void nf_conntrack_cleanup_start(void) { + cleanup_nf_conntrack_bpf(); conntrack_gc_work.exiting = true; } diff --git a/net/netfilter/nf_nat_bpf.c b/net/netfilter/nf_nat_bpf.c new file mode 100644 index 000000000000..0fa5a0bbb0ff --- /dev/null +++ b/net/netfilter/nf_nat_bpf.c @@ -0,0 +1,79 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* Unstable NAT Helpers for XDP and TC-BPF hook + * + * These are called from the XDP and SCHED_CLS BPF programs. Note that it is + * allowed to break compatibility for these functions since the interface they + * are exposed through to BPF programs is explicitly unstable. + */ + +#include <linux/bpf.h> +#include <linux/btf_ids.h> +#include <net/netfilter/nf_conntrack_bpf.h> +#include <net/netfilter/nf_conntrack_core.h> +#include <net/netfilter/nf_nat.h> + +__diag_push(); +__diag_ignore_all("-Wmissing-prototypes", + "Global functions as their definitions will be in nf_nat BTF"); + +/* bpf_ct_set_nat_info - Set source or destination nat address + * + * Set source or destination nat address of the newly allocated + * nf_conn before insertion. This must be invoked for referenced + * PTR_TO_BTF_ID to nf_conn___init. + * + * Parameters: + * @nfct - Pointer to referenced nf_conn object, obtained using + * bpf_xdp_ct_alloc or bpf_skb_ct_alloc. + * @addr - Nat source/destination address + * @port - Nat source/destination port. Non-positive values are + * interpreted as select a random port. + * @manip - NF_NAT_MANIP_SRC or NF_NAT_MANIP_DST + */ +int bpf_ct_set_nat_info(struct nf_conn___init *nfct, + union nf_inet_addr *addr, int port, + enum nf_nat_manip_type manip) +{ + struct nf_conn *ct = (struct nf_conn *)nfct; + u16 proto = nf_ct_l3num(ct); + struct nf_nat_range2 range; + + if (proto != NFPROTO_IPV4 && proto != NFPROTO_IPV6) + return -EINVAL; + + memset(&range, 0, sizeof(struct nf_nat_range2)); + range.flags = NF_NAT_RANGE_MAP_IPS; + range.min_addr = *addr; + range.max_addr = range.min_addr; + if (port > 0) { + range.flags |= NF_NAT_RANGE_PROTO_SPECIFIED; + range.min_proto.all = cpu_to_be16(port); + range.max_proto.all = range.min_proto.all; + } + + return nf_nat_setup_info(ct, &range, manip) == NF_DROP ? -ENOMEM : 0; +} + +__diag_pop() + +BTF_SET8_START(nf_nat_kfunc_set) +BTF_ID_FLAGS(func, bpf_ct_set_nat_info, KF_TRUSTED_ARGS) +BTF_SET8_END(nf_nat_kfunc_set) + +static const struct btf_kfunc_id_set nf_bpf_nat_kfunc_set = { + .owner = THIS_MODULE, + .set = &nf_nat_kfunc_set, +}; + +int register_nf_nat_bpf(void) +{ + int ret; + + ret = register_btf_kfunc_id_set(BPF_PROG_TYPE_XDP, + &nf_bpf_nat_kfunc_set); + if (ret) + return ret; + + return register_btf_kfunc_id_set(BPF_PROG_TYPE_SCHED_CLS, + &nf_bpf_nat_kfunc_set); +} diff --git a/net/netfilter/nf_nat_core.c b/net/netfilter/nf_nat_core.c index 7981be526f26..d8e6380f6337 100644 --- a/net/netfilter/nf_nat_core.c +++ b/net/netfilter/nf_nat_core.c @@ -16,7 +16,7 @@ #include <linux/siphash.h> #include <linux/rtnetlink.h> -#include <net/netfilter/nf_conntrack.h> +#include <net/netfilter/nf_conntrack_bpf.h> #include <net/netfilter/nf_conntrack_core.h> #include <net/netfilter/nf_conntrack_helper.h> #include <net/netfilter/nf_conntrack_seqadj.h> @@ -1152,7 +1152,7 @@ static int __init nf_nat_init(void) WARN_ON(nf_nat_hook != NULL); RCU_INIT_POINTER(nf_nat_hook, &nat_hook); - return 0; + return register_nf_nat_bpf(); } static void __exit nf_nat_cleanup(void) diff --git a/samples/bpf/task_fd_query_kern.c b/samples/bpf/task_fd_query_kern.c index c821294e1774..186ac0a79c0a 100644 --- a/samples/bpf/task_fd_query_kern.c +++ b/samples/bpf/task_fd_query_kern.c @@ -10,7 +10,7 @@ int bpf_prog1(struct pt_regs *ctx) return 0; } -SEC("kretprobe/blk_account_io_done") +SEC("kretprobe/__blk_account_io_done") int bpf_prog2(struct pt_regs *ctx) { return 0; diff --git a/samples/bpf/task_fd_query_user.c b/samples/bpf/task_fd_query_user.c index 424718c0872c..a33d74bd3a4b 100644 --- a/samples/bpf/task_fd_query_user.c +++ b/samples/bpf/task_fd_query_user.c @@ -348,7 +348,7 @@ int main(int argc, char **argv) /* test two functions in the corresponding *_kern.c file */ CHECK_AND_RET(test_debug_fs_kprobe(0, "blk_mq_start_request", BPF_FD_TYPE_KPROBE)); - CHECK_AND_RET(test_debug_fs_kprobe(1, "blk_account_io_done", + CHECK_AND_RET(test_debug_fs_kprobe(1, "__blk_account_io_done", BPF_FD_TYPE_KRETPROBE)); /* test nondebug fs kprobe */ diff --git a/samples/bpf/tracex3_kern.c b/samples/bpf/tracex3_kern.c index 710a4410b2fb..bde6591cb20c 100644 --- a/samples/bpf/tracex3_kern.c +++ b/samples/bpf/tracex3_kern.c @@ -49,7 +49,7 @@ struct { __uint(max_entries, SLOTS); } lat_map SEC(".maps"); -SEC("kprobe/blk_account_io_done") +SEC("kprobe/__blk_account_io_done") int bpf_prog2(struct pt_regs *ctx) { long rq = PT_REGS_PARM1(ctx); diff --git a/samples/bpf/xdp_router_ipv4_user.c b/samples/bpf/xdp_router_ipv4_user.c index 294fc15ad1cb..683913bbf279 100644 --- a/samples/bpf/xdp_router_ipv4_user.c +++ b/samples/bpf/xdp_router_ipv4_user.c @@ -209,7 +209,7 @@ static void read_route(struct nlmsghdr *nh, int nll) /* Rereading the route table to check if * there is an entry with the same * prefix but a different metric as the - * deleted enty. + * deleted entry. */ get_route_table(AF_INET); } else if (prefix_key->data[0] == diff --git a/security/keys/internal.h b/security/keys/internal.h index 9b9cf3b6fcbb..3c1e7122076b 100644 --- a/security/keys/internal.h +++ b/security/keys/internal.h @@ -165,8 +165,6 @@ extern struct key *request_key_and_link(struct key_type *type, extern bool lookup_user_key_possessed(const struct key *key, const struct key_match_data *match_data); -#define KEY_LOOKUP_CREATE 0x01 -#define KEY_LOOKUP_PARTIAL 0x02 extern long join_session_keyring(const char *name); extern void key_change_session_keyring(struct callback_head *twork); diff --git a/tools/bpf/bpftool/Documentation/bpftool-map.rst b/tools/bpf/bpftool/Documentation/bpftool-map.rst index 7c188a598444..7f3b67a8b48f 100644 --- a/tools/bpf/bpftool/Documentation/bpftool-map.rst +++ b/tools/bpf/bpftool/Documentation/bpftool-map.rst @@ -55,7 +55,7 @@ MAP COMMANDS | | **devmap** | **devmap_hash** | **sockmap** | **cpumap** | **xskmap** | **sockhash** | | **cgroup_storage** | **reuseport_sockarray** | **percpu_cgroup_storage** | | **queue** | **stack** | **sk_storage** | **struct_ops** | **ringbuf** | **inode_storage** -| | **task_storage** | **bloom_filter** } +| | **task_storage** | **bloom_filter** | **user_ringbuf** } DESCRIPTION =========== diff --git a/tools/bpf/bpftool/btf.c b/tools/bpf/bpftool/btf.c index 0744bd1150be..68a70ac03c80 100644 --- a/tools/bpf/bpftool/btf.c +++ b/tools/bpf/bpftool/btf.c @@ -43,11 +43,6 @@ static const char * const btf_kind_str[NR_BTF_KINDS] = { [BTF_KIND_ENUM64] = "ENUM64", }; -struct btf_attach_point { - __u32 obj_id; - __u32 btf_id; -}; - static const char *btf_int_enc_str(__u8 encoding) { switch (encoding) { @@ -640,10 +635,9 @@ static int do_dump(int argc, char **argv) btf = btf__parse_split(*argv, base ?: base_btf); err = libbpf_get_error(btf); - if (err) { - btf = NULL; + if (!btf) { p_err("failed to load BTF from %s: %s", - *argv, strerror(err)); + *argv, strerror(errno)); goto done; } NEXT_ARG(); @@ -688,8 +682,8 @@ static int do_dump(int argc, char **argv) btf = btf__load_from_kernel_by_id_split(btf_id, base_btf); err = libbpf_get_error(btf); - if (err) { - p_err("get btf by id (%u): %s", btf_id, strerror(err)); + if (!btf) { + p_err("get btf by id (%u): %s", btf_id, strerror(errno)); goto done; } } @@ -825,7 +819,7 @@ build_btf_type_table(struct hashmap *tab, enum bpf_obj_type type, u32_as_hash_field(id)); if (err) { p_err("failed to append entry to hashmap for BTF ID %u, object ID %u: %s", - btf_id, id, strerror(errno)); + btf_id, id, strerror(-err)); goto err_free; } } diff --git a/tools/bpf/bpftool/gen.c b/tools/bpf/bpftool/gen.c index 7070dcffa822..cf8b4e525c88 100644 --- a/tools/bpf/bpftool/gen.c +++ b/tools/bpf/bpftool/gen.c @@ -1594,14 +1594,14 @@ static int do_object(int argc, char **argv) err = bpf_linker__add_file(linker, file, NULL); if (err) { - p_err("failed to link '%s': %s (%d)", file, strerror(err), err); + p_err("failed to link '%s': %s (%d)", file, strerror(errno), errno); goto out; } } err = bpf_linker__finalize(linker); if (err) { - p_err("failed to finalize ELF file: %s (%d)", strerror(err), err); + p_err("failed to finalize ELF file: %s (%d)", strerror(errno), errno); goto out; } diff --git a/tools/bpf/bpftool/link.c b/tools/bpf/bpftool/link.c index ef0dc2f8d5a2..2863639706dd 100644 --- a/tools/bpf/bpftool/link.c +++ b/tools/bpf/bpftool/link.c @@ -106,6 +106,13 @@ static const char *cgroup_order_string(__u32 order) } } +static bool is_iter_task_target(const char *target_name) +{ + return strcmp(target_name, "task") == 0 || + strcmp(target_name, "task_file") == 0 || + strcmp(target_name, "task_vma") == 0; +} + static void show_iter_json(struct bpf_link_info *info, json_writer_t *wtr) { const char *target_name = u64_to_ptr(info->iter.target_name); @@ -114,6 +121,12 @@ static void show_iter_json(struct bpf_link_info *info, json_writer_t *wtr) if (is_iter_map_target(target_name)) jsonw_uint_field(wtr, "map_id", info->iter.map.map_id); + else if (is_iter_task_target(target_name)) { + if (info->iter.task.tid) + jsonw_uint_field(wtr, "tid", info->iter.task.tid); + else if (info->iter.task.pid) + jsonw_uint_field(wtr, "pid", info->iter.task.pid); + } if (is_iter_cgroup_target(target_name)) { jsonw_lluint_field(wtr, "cgroup_id", info->iter.cgroup.cgroup_id); @@ -237,6 +250,12 @@ static void show_iter_plain(struct bpf_link_info *info) if (is_iter_map_target(target_name)) printf("map_id %u ", info->iter.map.map_id); + else if (is_iter_task_target(target_name)) { + if (info->iter.task.tid) + printf("tid %u ", info->iter.task.tid); + else if (info->iter.task.pid) + printf("pid %u ", info->iter.task.pid); + } if (is_iter_cgroup_target(target_name)) { printf("cgroup_id %llu ", info->iter.cgroup.cgroup_id); diff --git a/tools/bpf/bpftool/map.c b/tools/bpf/bpftool/map.c index 38b6bc9c26c3..9a6ca9f31133 100644 --- a/tools/bpf/bpftool/map.c +++ b/tools/bpf/bpftool/map.c @@ -1459,7 +1459,7 @@ static int do_help(int argc, char **argv) " devmap | devmap_hash | sockmap | cpumap | xskmap | sockhash |\n" " cgroup_storage | reuseport_sockarray | percpu_cgroup_storage |\n" " queue | stack | sk_storage | struct_ops | ringbuf | inode_storage |\n" - " task_storage | bloom_filter }\n" + " task_storage | bloom_filter | user_ringbuf }\n" " " HELP_SPEC_OPTIONS " |\n" " {-f|--bpffs} | {-n|--nomount} }\n" "", diff --git a/tools/bpf/bpftool/map_perf_ring.c b/tools/bpf/bpftool/map_perf_ring.c index 6b0c410152de..21d7d447e1f3 100644 --- a/tools/bpf/bpftool/map_perf_ring.c +++ b/tools/bpf/bpftool/map_perf_ring.c @@ -29,13 +29,6 @@ static volatile bool stop; -struct event_ring_info { - int fd; - int key; - unsigned int cpu; - void *mem; -}; - struct perf_event_sample { struct perf_event_header header; __u64 time; @@ -195,10 +188,9 @@ int do_event_pipe(int argc, char **argv) opts.map_keys = &ctx.idx; pb = perf_buffer__new_raw(map_fd, MMAP_PAGE_CNT, &perf_attr, print_bpf_output, &ctx, &opts); - err = libbpf_get_error(pb); - if (err) { + if (!pb) { p_err("failed to create perf buffer: %s (%d)", - strerror(err), err); + strerror(errno), errno); goto err_close_map; } @@ -213,7 +205,7 @@ int do_event_pipe(int argc, char **argv) err = perf_buffer__poll(pb, 200); if (err < 0 && err != -EINTR) { p_err("perf buffer polling failed: %s (%d)", - strerror(err), err); + strerror(errno), errno); goto err_close_pb; } } diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h index 793103b10eab..3075018a4ef8 100644 --- a/tools/include/uapi/linux/bpf.h +++ b/tools/include/uapi/linux/bpf.h @@ -110,6 +110,12 @@ union bpf_iter_link_info { __u32 cgroup_fd; __u64 cgroup_id; } cgroup; + /* Parameters of task iterators. */ + struct { + __u32 tid; + __u32 pid; + __u32 pid_fd; + } task; }; /* BPF syscall commands, see bpf(2) man-page for more details. */ @@ -928,6 +934,7 @@ enum bpf_map_type { BPF_MAP_TYPE_INODE_STORAGE, BPF_MAP_TYPE_TASK_STORAGE, BPF_MAP_TYPE_BLOOM_FILTER, + BPF_MAP_TYPE_USER_RINGBUF, }; /* Note that tracing related programs such as @@ -4950,6 +4957,7 @@ union bpf_attr { * Get address of the traced function (for tracing and kprobe programs). * Return * Address of the traced function. + * 0 for kprobes placed within the function (not at the entry). * * u64 bpf_get_attach_cookie(void *ctx) * Description @@ -5079,12 +5087,12 @@ union bpf_attr { * * long bpf_get_func_arg(void *ctx, u32 n, u64 *value) * Description - * Get **n**-th argument (zero based) of the traced function (for tracing programs) + * Get **n**-th argument register (zero based) of the traced function (for tracing programs) * returned in **value**. * * Return * 0 on success. - * **-EINVAL** if n >= arguments count of traced function. + * **-EINVAL** if n >= argument register count of traced function. * * long bpf_get_func_ret(void *ctx, u64 *value) * Description @@ -5097,10 +5105,11 @@ union bpf_attr { * * long bpf_get_func_arg_cnt(void *ctx) * Description - * Get number of arguments of the traced function (for tracing programs). + * Get number of registers of the traced function (for tracing programs) where + * function arguments are stored in these registers. * * Return - * The number of arguments of the traced function. + * The number of argument registers of the traced function. * * int bpf_get_retval(void) * Description @@ -5386,6 +5395,43 @@ union bpf_attr { * Return * Current *ktime*. * + * long bpf_user_ringbuf_drain(struct bpf_map *map, void *callback_fn, void *ctx, u64 flags) + * Description + * Drain samples from the specified user ring buffer, and invoke + * the provided callback for each such sample: + * + * long (\*callback_fn)(struct bpf_dynptr \*dynptr, void \*ctx); + * + * If **callback_fn** returns 0, the helper will continue to try + * and drain the next sample, up to a maximum of + * BPF_MAX_USER_RINGBUF_SAMPLES samples. If the return value is 1, + * the helper will skip the rest of the samples and return. Other + * return values are not used now, and will be rejected by the + * verifier. + * Return + * The number of drained samples if no error was encountered while + * draining samples, or 0 if no samples were present in the ring + * buffer. If a user-space producer was epoll-waiting on this map, + * and at least one sample was drained, they will receive an event + * notification notifying them of available space in the ring + * buffer. If the BPF_RB_NO_WAKEUP flag is passed to this + * function, no wakeup notification will be sent. If the + * BPF_RB_FORCE_WAKEUP flag is passed, a wakeup notification will + * be sent even if no sample was drained. + * + * On failure, the returned value is one of the following: + * + * **-EBUSY** if the ring buffer is contended, and another calling + * context was concurrently draining the ring buffer. + * + * **-EINVAL** if user-space is not properly tracking the ring + * buffer due to the producer position not being aligned to 8 + * bytes, a sample not being aligned to 8 bytes, or the producer + * position not matching the advertised length of a sample. + * + * **-E2BIG** if user-space has tried to publish a sample which is + * larger than the size of the ring buffer, or which cannot fit + * within a struct bpf_dynptr. */ #define __BPF_FUNC_MAPPER(FN) \ FN(unspec), \ @@ -5597,6 +5643,7 @@ union bpf_attr { FN(tcp_raw_check_syncookie_ipv4), \ FN(tcp_raw_check_syncookie_ipv6), \ FN(ktime_get_tai_ns), \ + FN(user_ringbuf_drain), \ /* */ /* integer value in 'imm' field of BPF_CALL instruction selects which helper @@ -6218,6 +6265,10 @@ struct bpf_link_info { __u64 cgroup_id; __u32 order; } cgroup; + struct { + __u32 tid; + __u32 pid; + } task; }; } iter; struct { diff --git a/tools/lib/bpf/bpf_helpers.h b/tools/lib/bpf/bpf_helpers.h index 867b734839dd..d37c4fe2849d 100644 --- a/tools/lib/bpf/bpf_helpers.h +++ b/tools/lib/bpf/bpf_helpers.h @@ -131,7 +131,7 @@ /* * Helper function to perform a tail call with a constant/immediate map slot. */ -#if (!defined(__clang__) || __clang_major__ >= 8) && defined(__bpf__) +#if __clang_major__ >= 8 && defined(__bpf__) static __always_inline void bpf_tail_call_static(void *ctx, const void *map, const __u32 slot) { @@ -139,8 +139,8 @@ bpf_tail_call_static(void *ctx, const void *map, const __u32 slot) __bpf_unreachable(); /* - * Provide a hard guarantee that the compiler won't optimize setting r2 - * (map pointer) and r3 (constant map index) from _different paths_ ending + * Provide a hard guarantee that LLVM won't optimize setting r2 (map + * pointer) and r3 (constant map index) from _different paths_ ending * up at the _same_ call insn as otherwise we won't be able to use the * jmpq/nopl retpoline-free patching by the x86-64 JIT in the kernel * given they mismatch. See also d2e4c1e6c294 ("bpf: Constant map key @@ -148,37 +148,18 @@ bpf_tail_call_static(void *ctx, const void *map, const __u32 slot) * * Note on clobber list: we need to stay in-line with BPF calling * convention, so even if we don't end up using r0, r4, r5, we need - * to mark them as clobber so that the compiler doesn't end up using - * them before / after the call. + * to mark them as clobber so that LLVM doesn't end up using them + * before / after the call. */ - asm volatile( -#ifdef __clang__ - "r1 = %[ctx]\n\t" + asm volatile("r1 = %[ctx]\n\t" "r2 = %[map]\n\t" "r3 = %[slot]\n\t" -#else - "mov %%r1,%[ctx]\n\t" - "mov %%r2,%[map]\n\t" - "mov %%r3,%[slot]\n\t" -#endif "call 12" :: [ctx]"r"(ctx), [map]"r"(map), [slot]"i"(slot) : "r0", "r1", "r2", "r3", "r4", "r5"); } #endif -/* - * Helper structure used by eBPF C program - * to describe BPF map attributes to libbpf loader - */ -struct bpf_map_def { - unsigned int type; - unsigned int key_size; - unsigned int value_size; - unsigned int max_entries; - unsigned int map_flags; -} __attribute__((deprecated("use BTF-defined maps in .maps section"))); - enum libbpf_pin_type { LIBBPF_PIN_NONE, /* PIN_BY_NAME: pin maps by name (in /sys/fs/bpf by default) */ diff --git a/tools/lib/bpf/bpf_tracing.h b/tools/lib/bpf/bpf_tracing.h index 5fdb93da423b..2972dc25ff72 100644 --- a/tools/lib/bpf/bpf_tracing.h +++ b/tools/lib/bpf/bpf_tracing.h @@ -438,6 +438,113 @@ typeof(name(0)) name(unsigned long long *ctx) \ static __always_inline typeof(name(0)) \ ____##name(unsigned long long *ctx, ##args) +#ifndef ___bpf_nth2 +#define ___bpf_nth2(_, _1, _2, _3, _4, _5, _6, _7, _8, _9, _10, _11, _12, _13, \ + _14, _15, _16, _17, _18, _19, _20, _21, _22, _23, _24, N, ...) N +#endif +#ifndef ___bpf_narg2 +#define ___bpf_narg2(...) \ + ___bpf_nth2(_, ##__VA_ARGS__, 12, 12, 11, 11, 10, 10, 9, 9, 8, 8, 7, 7, \ + 6, 6, 5, 5, 4, 4, 3, 3, 2, 2, 1, 1, 0) +#endif + +#define ___bpf_treg_cnt(t) \ + __builtin_choose_expr(sizeof(t) == 1, 1, \ + __builtin_choose_expr(sizeof(t) == 2, 1, \ + __builtin_choose_expr(sizeof(t) == 4, 1, \ + __builtin_choose_expr(sizeof(t) == 8, 1, \ + __builtin_choose_expr(sizeof(t) == 16, 2, \ + (void)0))))) + +#define ___bpf_reg_cnt0() (0) +#define ___bpf_reg_cnt1(t, x) (___bpf_reg_cnt0() + ___bpf_treg_cnt(t)) +#define ___bpf_reg_cnt2(t, x, args...) (___bpf_reg_cnt1(args) + ___bpf_treg_cnt(t)) +#define ___bpf_reg_cnt3(t, x, args...) (___bpf_reg_cnt2(args) + ___bpf_treg_cnt(t)) +#define ___bpf_reg_cnt4(t, x, args...) (___bpf_reg_cnt3(args) + ___bpf_treg_cnt(t)) +#define ___bpf_reg_cnt5(t, x, args...) (___bpf_reg_cnt4(args) + ___bpf_treg_cnt(t)) +#define ___bpf_reg_cnt6(t, x, args...) (___bpf_reg_cnt5(args) + ___bpf_treg_cnt(t)) +#define ___bpf_reg_cnt7(t, x, args...) (___bpf_reg_cnt6(args) + ___bpf_treg_cnt(t)) +#define ___bpf_reg_cnt8(t, x, args...) (___bpf_reg_cnt7(args) + ___bpf_treg_cnt(t)) +#define ___bpf_reg_cnt9(t, x, args...) (___bpf_reg_cnt8(args) + ___bpf_treg_cnt(t)) +#define ___bpf_reg_cnt10(t, x, args...) (___bpf_reg_cnt9(args) + ___bpf_treg_cnt(t)) +#define ___bpf_reg_cnt11(t, x, args...) (___bpf_reg_cnt10(args) + ___bpf_treg_cnt(t)) +#define ___bpf_reg_cnt12(t, x, args...) (___bpf_reg_cnt11(args) + ___bpf_treg_cnt(t)) +#define ___bpf_reg_cnt(args...) ___bpf_apply(___bpf_reg_cnt, ___bpf_narg2(args))(args) + +#define ___bpf_union_arg(t, x, n) \ + __builtin_choose_expr(sizeof(t) == 1, ({ union { __u8 z[1]; t x; } ___t = { .z = {ctx[n]}}; ___t.x; }), \ + __builtin_choose_expr(sizeof(t) == 2, ({ union { __u16 z[1]; t x; } ___t = { .z = {ctx[n]} }; ___t.x; }), \ + __builtin_choose_expr(sizeof(t) == 4, ({ union { __u32 z[1]; t x; } ___t = { .z = {ctx[n]} }; ___t.x; }), \ + __builtin_choose_expr(sizeof(t) == 8, ({ union { __u64 z[1]; t x; } ___t = {.z = {ctx[n]} }; ___t.x; }), \ + __builtin_choose_expr(sizeof(t) == 16, ({ union { __u64 z[2]; t x; } ___t = {.z = {ctx[n], ctx[n + 1]} }; ___t.x; }), \ + (void)0))))) + +#define ___bpf_ctx_arg0(n, args...) +#define ___bpf_ctx_arg1(n, t, x) , ___bpf_union_arg(t, x, n - ___bpf_reg_cnt1(t, x)) +#define ___bpf_ctx_arg2(n, t, x, args...) , ___bpf_union_arg(t, x, n - ___bpf_reg_cnt2(t, x, args)) ___bpf_ctx_arg1(n, args) +#define ___bpf_ctx_arg3(n, t, x, args...) , ___bpf_union_arg(t, x, n - ___bpf_reg_cnt3(t, x, args)) ___bpf_ctx_arg2(n, args) +#define ___bpf_ctx_arg4(n, t, x, args...) , ___bpf_union_arg(t, x, n - ___bpf_reg_cnt4(t, x, args)) ___bpf_ctx_arg3(n, args) +#define ___bpf_ctx_arg5(n, t, x, args...) , ___bpf_union_arg(t, x, n - ___bpf_reg_cnt5(t, x, args)) ___bpf_ctx_arg4(n, args) +#define ___bpf_ctx_arg6(n, t, x, args...) , ___bpf_union_arg(t, x, n - ___bpf_reg_cnt6(t, x, args)) ___bpf_ctx_arg5(n, args) +#define ___bpf_ctx_arg7(n, t, x, args...) , ___bpf_union_arg(t, x, n - ___bpf_reg_cnt7(t, x, args)) ___bpf_ctx_arg6(n, args) +#define ___bpf_ctx_arg8(n, t, x, args...) , ___bpf_union_arg(t, x, n - ___bpf_reg_cnt8(t, x, args)) ___bpf_ctx_arg7(n, args) +#define ___bpf_ctx_arg9(n, t, x, args...) , ___bpf_union_arg(t, x, n - ___bpf_reg_cnt9(t, x, args)) ___bpf_ctx_arg8(n, args) +#define ___bpf_ctx_arg10(n, t, x, args...) , ___bpf_union_arg(t, x, n - ___bpf_reg_cnt10(t, x, args)) ___bpf_ctx_arg9(n, args) +#define ___bpf_ctx_arg11(n, t, x, args...) , ___bpf_union_arg(t, x, n - ___bpf_reg_cnt11(t, x, args)) ___bpf_ctx_arg10(n, args) +#define ___bpf_ctx_arg12(n, t, x, args...) , ___bpf_union_arg(t, x, n - ___bpf_reg_cnt12(t, x, args)) ___bpf_ctx_arg11(n, args) +#define ___bpf_ctx_arg(args...) ___bpf_apply(___bpf_ctx_arg, ___bpf_narg2(args))(___bpf_reg_cnt(args), args) + +#define ___bpf_ctx_decl0() +#define ___bpf_ctx_decl1(t, x) , t x +#define ___bpf_ctx_decl2(t, x, args...) , t x ___bpf_ctx_decl1(args) +#define ___bpf_ctx_decl3(t, x, args...) , t x ___bpf_ctx_decl2(args) +#define ___bpf_ctx_decl4(t, x, args...) , t x ___bpf_ctx_decl3(args) +#define ___bpf_ctx_decl5(t, x, args...) , t x ___bpf_ctx_decl4(args) +#define ___bpf_ctx_decl6(t, x, args...) , t x ___bpf_ctx_decl5(args) +#define ___bpf_ctx_decl7(t, x, args...) , t x ___bpf_ctx_decl6(args) +#define ___bpf_ctx_decl8(t, x, args...) , t x ___bpf_ctx_decl7(args) +#define ___bpf_ctx_decl9(t, x, args...) , t x ___bpf_ctx_decl8(args) +#define ___bpf_ctx_decl10(t, x, args...) , t x ___bpf_ctx_decl9(args) +#define ___bpf_ctx_decl11(t, x, args...) , t x ___bpf_ctx_decl10(args) +#define ___bpf_ctx_decl12(t, x, args...) , t x ___bpf_ctx_decl11(args) +#define ___bpf_ctx_decl(args...) ___bpf_apply(___bpf_ctx_decl, ___bpf_narg2(args))(args) + +/* + * BPF_PROG2 is an enhanced version of BPF_PROG in order to handle struct + * arguments. Since each struct argument might take one or two u64 values + * in the trampoline stack, argument type size is needed to place proper number + * of u64 values for each argument. Therefore, BPF_PROG2 has different + * syntax from BPF_PROG. For example, for the following BPF_PROG syntax: + * + * int BPF_PROG(test2, int a, int b) { ... } + * + * the corresponding BPF_PROG2 syntax is: + * + * int BPF_PROG2(test2, int, a, int, b) { ... } + * + * where type and the corresponding argument name are separated by comma. + * + * Use BPF_PROG2 macro if one of the arguments might be a struct/union larger + * than 8 bytes: + * + * int BPF_PROG2(test_struct_arg, struct bpf_testmod_struct_arg_1, a, int, b, + * int, c, int, d, struct bpf_testmod_struct_arg_2, e, int, ret) + * { + * // access a, b, c, d, e, and ret directly + * ... + * } + */ +#define BPF_PROG2(name, args...) \ +name(unsigned long long *ctx); \ +static __always_inline typeof(name(0)) \ +____##name(unsigned long long *ctx ___bpf_ctx_decl(args)); \ +typeof(name(0)) name(unsigned long long *ctx) \ +{ \ + return ____##name(ctx ___bpf_ctx_arg(args)); \ +} \ +static __always_inline typeof(name(0)) \ +____##name(unsigned long long *ctx ___bpf_ctx_decl(args)) + struct pt_regs; #define ___bpf_kprobe_args0() ctx diff --git a/tools/lib/bpf/btf.c b/tools/lib/bpf/btf.c index 361131518d63..d88647da2c7f 100644 --- a/tools/lib/bpf/btf.c +++ b/tools/lib/bpf/btf.c @@ -4642,20 +4642,17 @@ static int btf_dedup_remap_types(struct btf_dedup *d) */ struct btf *btf__load_vmlinux_btf(void) { - struct { - const char *path_fmt; - bool raw_btf; - } locations[] = { + const char *locations[] = { /* try canonical vmlinux BTF through sysfs first */ - { "/sys/kernel/btf/vmlinux", true /* raw BTF */ }, - /* fall back to trying to find vmlinux ELF on disk otherwise */ - { "/boot/vmlinux-%1$s" }, - { "/lib/modules/%1$s/vmlinux-%1$s" }, - { "/lib/modules/%1$s/build/vmlinux" }, - { "/usr/lib/modules/%1$s/kernel/vmlinux" }, - { "/usr/lib/debug/boot/vmlinux-%1$s" }, - { "/usr/lib/debug/boot/vmlinux-%1$s.debug" }, - { "/usr/lib/debug/lib/modules/%1$s/vmlinux" }, + "/sys/kernel/btf/vmlinux", + /* fall back to trying to find vmlinux on disk otherwise */ + "/boot/vmlinux-%1$s", + "/lib/modules/%1$s/vmlinux-%1$s", + "/lib/modules/%1$s/build/vmlinux", + "/usr/lib/modules/%1$s/kernel/vmlinux", + "/usr/lib/debug/boot/vmlinux-%1$s", + "/usr/lib/debug/boot/vmlinux-%1$s.debug", + "/usr/lib/debug/lib/modules/%1$s/vmlinux", }; char path[PATH_MAX + 1]; struct utsname buf; @@ -4665,15 +4662,12 @@ struct btf *btf__load_vmlinux_btf(void) uname(&buf); for (i = 0; i < ARRAY_SIZE(locations); i++) { - snprintf(path, PATH_MAX, locations[i].path_fmt, buf.release); + snprintf(path, PATH_MAX, locations[i], buf.release); - if (access(path, R_OK)) + if (faccessat(AT_FDCWD, path, R_OK, AT_EACCESS)) continue; - if (locations[i].raw_btf) - btf = btf__parse_raw(path); - else - btf = btf__parse_elf(path, NULL); + btf = btf__parse(path, NULL); err = libbpf_get_error(btf); pr_debug("loading kernel BTF '%s': %d\n", path, err); if (err) diff --git a/tools/lib/bpf/btf.h b/tools/lib/bpf/btf.h index ae543144ee30..8e6880d91c84 100644 --- a/tools/lib/bpf/btf.h +++ b/tools/lib/bpf/btf.h @@ -486,6 +486,8 @@ static inline struct btf_enum *btf_enum(const struct btf_type *t) return (struct btf_enum *)(t + 1); } +struct btf_enum64; + static inline struct btf_enum64 *btf_enum64(const struct btf_type *t) { return (struct btf_enum64 *)(t + 1); @@ -493,7 +495,28 @@ static inline struct btf_enum64 *btf_enum64(const struct btf_type *t) static inline __u64 btf_enum64_value(const struct btf_enum64 *e) { - return ((__u64)e->val_hi32 << 32) | e->val_lo32; + /* struct btf_enum64 is introduced in Linux 6.0, which is very + * bleeding-edge. Here we are avoiding relying on struct btf_enum64 + * definition coming from kernel UAPI headers to support wider range + * of system-wide kernel headers. + * + * Given this header can be also included from C++ applications, that + * further restricts C tricks we can use (like using compatible + * anonymous struct). So just treat struct btf_enum64 as + * a three-element array of u32 and access second (lo32) and third + * (hi32) elements directly. + * + * For reference, here is a struct btf_enum64 definition: + * + * const struct btf_enum64 { + * __u32 name_off; + * __u32 val_lo32; + * __u32 val_hi32; + * }; + */ + const __u32 *e64 = (const __u32 *)e; + + return ((__u64)e64[2] << 32) | e64[1]; } static inline struct btf_member *btf_members(const struct btf_type *t) diff --git a/tools/lib/bpf/btf_dump.c b/tools/lib/bpf/btf_dump.c index 627edb5bb6de..4221f73a74d0 100644 --- a/tools/lib/bpf/btf_dump.c +++ b/tools/lib/bpf/btf_dump.c @@ -2385,7 +2385,7 @@ int btf_dump__dump_type_data(struct btf_dump *d, __u32 id, d->typed_dump->indent_lvl = OPTS_GET(opts, indent_level, 0); /* default indent string is a tab */ - if (!opts->indent_str) + if (!OPTS_GET(opts, indent_str, NULL)) d->typed_dump->indent_str[0] = '\t'; else libbpf_strlcpy(d->typed_dump->indent_str, opts->indent_str, diff --git a/tools/lib/bpf/libbpf.c b/tools/lib/bpf/libbpf.c index 3ad139285fad..184ce1684dcd 100644 --- a/tools/lib/bpf/libbpf.c +++ b/tools/lib/bpf/libbpf.c @@ -163,6 +163,7 @@ static const char * const map_type_name[] = { [BPF_MAP_TYPE_INODE_STORAGE] = "inode_storage", [BPF_MAP_TYPE_TASK_STORAGE] = "task_storage", [BPF_MAP_TYPE_BLOOM_FILTER] = "bloom_filter", + [BPF_MAP_TYPE_USER_RINGBUF] = "user_ringbuf", }; static const char * const prog_type_name[] = { @@ -883,7 +884,7 @@ __u32 get_kernel_version(void) __u32 major, minor, patch; struct utsname info; - if (access(ubuntu_kver_file, R_OK) == 0) { + if (faccessat(AT_FDCWD, ubuntu_kver_file, R_OK, AT_EACCESS) == 0) { FILE *f; f = fopen(ubuntu_kver_file, "r"); @@ -2096,19 +2097,30 @@ static bool get_map_field_int(const char *map_name, const struct btf *btf, return true; } +static int pathname_concat(char *buf, size_t buf_sz, const char *path, const char *name) +{ + int len; + + len = snprintf(buf, buf_sz, "%s/%s", path, name); + if (len < 0) + return -EINVAL; + if (len >= buf_sz) + return -ENAMETOOLONG; + + return 0; +} + static int build_map_pin_path(struct bpf_map *map, const char *path) { char buf[PATH_MAX]; - int len; + int err; if (!path) path = "/sys/fs/bpf"; - len = snprintf(buf, PATH_MAX, "%s/%s", path, bpf_map__name(map)); - if (len < 0) - return -EINVAL; - else if (len >= PATH_MAX) - return -ENAMETOOLONG; + err = pathname_concat(buf, sizeof(buf), path, bpf_map__name(map)); + if (err) + return err; return bpf_map__set_pin_path(map, buf); } @@ -2372,6 +2384,12 @@ static size_t adjust_ringbuf_sz(size_t sz) return sz; } +static bool map_is_ringbuf(const struct bpf_map *map) +{ + return map->def.type == BPF_MAP_TYPE_RINGBUF || + map->def.type == BPF_MAP_TYPE_USER_RINGBUF; +} + static void fill_map_from_def(struct bpf_map *map, const struct btf_map_def *def) { map->def.type = def->map_type; @@ -2386,7 +2404,7 @@ static void fill_map_from_def(struct bpf_map *map, const struct btf_map_def *def map->btf_value_type_id = def->value_type_id; /* auto-adjust BPF ringbuf map max_entries to be a multiple of page size */ - if (map->def.type == BPF_MAP_TYPE_RINGBUF) + if (map_is_ringbuf(map)) map->def.max_entries = adjust_ringbuf_sz(map->def.max_entries); if (def->parts & MAP_DEF_MAP_TYPE) @@ -4369,7 +4387,7 @@ int bpf_map__set_max_entries(struct bpf_map *map, __u32 max_entries) map->def.max_entries = max_entries; /* auto-adjust BPF ringbuf map max_entries to be a multiple of page size */ - if (map->def.type == BPF_MAP_TYPE_RINGBUF) + if (map_is_ringbuf(map)) map->def.max_entries = adjust_ringbuf_sz(map->def.max_entries); return 0; @@ -7961,17 +7979,9 @@ int bpf_object__pin_maps(struct bpf_object *obj, const char *path) continue; if (path) { - int len; - - len = snprintf(buf, PATH_MAX, "%s/%s", path, - bpf_map__name(map)); - if (len < 0) { - err = -EINVAL; - goto err_unpin_maps; - } else if (len >= PATH_MAX) { - err = -ENAMETOOLONG; + err = pathname_concat(buf, sizeof(buf), path, bpf_map__name(map)); + if (err) goto err_unpin_maps; - } sanitize_pin_path(buf); pin_path = buf; } else if (!map->pin_path) { @@ -8009,14 +8019,9 @@ int bpf_object__unpin_maps(struct bpf_object *obj, const char *path) char buf[PATH_MAX]; if (path) { - int len; - - len = snprintf(buf, PATH_MAX, "%s/%s", path, - bpf_map__name(map)); - if (len < 0) - return libbpf_err(-EINVAL); - else if (len >= PATH_MAX) - return libbpf_err(-ENAMETOOLONG); + err = pathname_concat(buf, sizeof(buf), path, bpf_map__name(map)); + if (err) + return libbpf_err(err); sanitize_pin_path(buf); pin_path = buf; } else if (!map->pin_path) { @@ -8034,6 +8039,7 @@ int bpf_object__unpin_maps(struct bpf_object *obj, const char *path) int bpf_object__pin_programs(struct bpf_object *obj, const char *path) { struct bpf_program *prog; + char buf[PATH_MAX]; int err; if (!obj) @@ -8045,17 +8051,9 @@ int bpf_object__pin_programs(struct bpf_object *obj, const char *path) } bpf_object__for_each_program(prog, obj) { - char buf[PATH_MAX]; - int len; - - len = snprintf(buf, PATH_MAX, "%s/%s", path, prog->name); - if (len < 0) { - err = -EINVAL; - goto err_unpin_programs; - } else if (len >= PATH_MAX) { - err = -ENAMETOOLONG; + err = pathname_concat(buf, sizeof(buf), path, prog->name); + if (err) goto err_unpin_programs; - } err = bpf_program__pin(prog, buf); if (err) @@ -8066,13 +8064,7 @@ int bpf_object__pin_programs(struct bpf_object *obj, const char *path) err_unpin_programs: while ((prog = bpf_object__prev_program(obj, prog))) { - char buf[PATH_MAX]; - int len; - - len = snprintf(buf, PATH_MAX, "%s/%s", path, prog->name); - if (len < 0) - continue; - else if (len >= PATH_MAX) + if (pathname_concat(buf, sizeof(buf), path, prog->name)) continue; bpf_program__unpin(prog, buf); @@ -8091,13 +8083,10 @@ int bpf_object__unpin_programs(struct bpf_object *obj, const char *path) bpf_object__for_each_program(prog, obj) { char buf[PATH_MAX]; - int len; - len = snprintf(buf, PATH_MAX, "%s/%s", path, prog->name); - if (len < 0) - return libbpf_err(-EINVAL); - else if (len >= PATH_MAX) - return libbpf_err(-ENAMETOOLONG); + err = pathname_concat(buf, sizeof(buf), path, prog->name); + if (err) + return libbpf_err(err); err = bpf_program__unpin(prog, buf); if (err) @@ -9084,11 +9073,15 @@ static int libbpf_find_attach_btf_id(struct bpf_program *prog, const char *attac int err = 0; /* BPF program's BTF ID */ - if (attach_prog_fd) { + if (prog->type == BPF_PROG_TYPE_EXT || attach_prog_fd) { + if (!attach_prog_fd) { + pr_warn("prog '%s': attach program FD is not set\n", prog->name); + return -EINVAL; + } err = libbpf_find_prog_btf_id(attach_name, attach_prog_fd); if (err < 0) { - pr_warn("failed to find BPF program (FD %d) BTF ID for '%s': %d\n", - attach_prog_fd, attach_name, err); + pr_warn("prog '%s': failed to find BPF program (FD %d) BTF ID for '%s': %d\n", + prog->name, attach_prog_fd, attach_name, err); return err; } *btf_obj_fd = 0; @@ -9105,7 +9098,8 @@ static int libbpf_find_attach_btf_id(struct bpf_program *prog, const char *attac err = find_kernel_btf_id(prog->obj, attach_name, attach_type, btf_obj_fd, btf_type_id); } if (err) { - pr_warn("failed to find kernel BTF type ID of '%s': %d\n", attach_name, err); + pr_warn("prog '%s': failed to find kernel BTF type ID of '%s': %d\n", + prog->name, attach_name, err); return err; } return 0; @@ -9910,7 +9904,7 @@ static bool use_debugfs(void) static int has_debugfs = -1; if (has_debugfs < 0) - has_debugfs = access(DEBUGFS, F_OK) == 0; + has_debugfs = faccessat(AT_FDCWD, DEBUGFS, F_OK, AT_EACCESS) == 0; return has_debugfs == 1; } @@ -10727,7 +10721,7 @@ static int resolve_full_path(const char *file, char *result, size_t result_sz) continue; snprintf(result, result_sz, "%.*s/%s", seg_len, s, file); /* ensure it has required permissions */ - if (access(result, perm) < 0) + if (faccessat(AT_FDCWD, result, perm, AT_EACCESS) < 0) continue; pr_debug("resolved '%s' to '%s'\n", file, result); return 0; diff --git a/tools/lib/bpf/libbpf.h b/tools/lib/bpf/libbpf.h index 88a1ac34b12a..eee883f007f9 100644 --- a/tools/lib/bpf/libbpf.h +++ b/tools/lib/bpf/libbpf.h @@ -118,7 +118,9 @@ struct bpf_object_open_opts { * auto-pinned to that path on load; defaults to "/sys/fs/bpf". */ const char *pin_root_path; - long :0; + + __u32 :32; /* stub out now removed attach_prog_fd */ + /* Additional kernel config content that augments and overrides * system Kconfig for CONFIG_xxx externs. */ @@ -1011,6 +1013,7 @@ LIBBPF_API int bpf_tc_query(const struct bpf_tc_hook *hook, /* Ring buffer APIs */ struct ring_buffer; +struct user_ring_buffer; typedef int (*ring_buffer_sample_fn)(void *ctx, void *data, size_t size); @@ -1030,6 +1033,112 @@ LIBBPF_API int ring_buffer__poll(struct ring_buffer *rb, int timeout_ms); LIBBPF_API int ring_buffer__consume(struct ring_buffer *rb); LIBBPF_API int ring_buffer__epoll_fd(const struct ring_buffer *rb); +struct user_ring_buffer_opts { + size_t sz; /* size of this struct, for forward/backward compatibility */ +}; + +#define user_ring_buffer_opts__last_field sz + +/* @brief **user_ring_buffer__new()** creates a new instance of a user ring + * buffer. + * + * @param map_fd A file descriptor to a BPF_MAP_TYPE_USER_RINGBUF map. + * @param opts Options for how the ring buffer should be created. + * @return A user ring buffer on success; NULL and errno being set on a + * failure. + */ +LIBBPF_API struct user_ring_buffer * +user_ring_buffer__new(int map_fd, const struct user_ring_buffer_opts *opts); + +/* @brief **user_ring_buffer__reserve()** reserves a pointer to a sample in the + * user ring buffer. + * @param rb A pointer to a user ring buffer. + * @param size The size of the sample, in bytes. + * @return A pointer to an 8-byte aligned reserved region of the user ring + * buffer; NULL, and errno being set if a sample could not be reserved. + * + * This function is *not* thread safe, and callers must synchronize accessing + * this function if there are multiple producers. If a size is requested that + * is larger than the size of the entire ring buffer, errno will be set to + * E2BIG and NULL is returned. If the ring buffer could accommodate the size, + * but currently does not have enough space, errno is set to ENOSPC and NULL is + * returned. + * + * After initializing the sample, callers must invoke + * **user_ring_buffer__submit()** to post the sample to the kernel. Otherwise, + * the sample must be freed with **user_ring_buffer__discard()**. + */ +LIBBPF_API void *user_ring_buffer__reserve(struct user_ring_buffer *rb, __u32 size); + +/* @brief **user_ring_buffer__reserve_blocking()** reserves a record in the + * ring buffer, possibly blocking for up to @timeout_ms until a sample becomes + * available. + * @param rb The user ring buffer. + * @param size The size of the sample, in bytes. + * @param timeout_ms The amount of time, in milliseconds, for which the caller + * should block when waiting for a sample. -1 causes the caller to block + * indefinitely. + * @return A pointer to an 8-byte aligned reserved region of the user ring + * buffer; NULL, and errno being set if a sample could not be reserved. + * + * This function is *not* thread safe, and callers must synchronize + * accessing this function if there are multiple producers + * + * If **timeout_ms** is -1, the function will block indefinitely until a sample + * becomes available. Otherwise, **timeout_ms** must be non-negative, or errno + * is set to EINVAL, and NULL is returned. If **timeout_ms** is 0, no blocking + * will occur and the function will return immediately after attempting to + * reserve a sample. + * + * If **size** is larger than the size of the entire ring buffer, errno is set + * to E2BIG and NULL is returned. If the ring buffer could accommodate + * **size**, but currently does not have enough space, the caller will block + * until at most **timeout_ms** has elapsed. If insufficient space is available + * at that time, errno is set to ENOSPC, and NULL is returned. + * + * The kernel guarantees that it will wake up this thread to check if + * sufficient space is available in the ring buffer at least once per + * invocation of the **bpf_ringbuf_drain()** helper function, provided that at + * least one sample is consumed, and the BPF program did not invoke the + * function with BPF_RB_NO_WAKEUP. A wakeup may occur sooner than that, but the + * kernel does not guarantee this. If the helper function is invoked with + * BPF_RB_FORCE_WAKEUP, a wakeup event will be sent even if no sample is + * consumed. + * + * When a sample of size **size** is found within **timeout_ms**, a pointer to + * the sample is returned. After initializing the sample, callers must invoke + * **user_ring_buffer__submit()** to post the sample to the ring buffer. + * Otherwise, the sample must be freed with **user_ring_buffer__discard()**. + */ +LIBBPF_API void *user_ring_buffer__reserve_blocking(struct user_ring_buffer *rb, + __u32 size, + int timeout_ms); + +/* @brief **user_ring_buffer__submit()** submits a previously reserved sample + * into the ring buffer. + * @param rb The user ring buffer. + * @param sample A reserved sample. + * + * It is not necessary to synchronize amongst multiple producers when invoking + * this function. + */ +LIBBPF_API void user_ring_buffer__submit(struct user_ring_buffer *rb, void *sample); + +/* @brief **user_ring_buffer__discard()** discards a previously reserved sample. + * @param rb The user ring buffer. + * @param sample A reserved sample. + * + * It is not necessary to synchronize amongst multiple producers when invoking + * this function. + */ +LIBBPF_API void user_ring_buffer__discard(struct user_ring_buffer *rb, void *sample); + +/* @brief **user_ring_buffer__free()** frees a ring buffer that was previously + * created with **user_ring_buffer__new()**. + * @param rb The user ring buffer being freed. + */ +LIBBPF_API void user_ring_buffer__free(struct user_ring_buffer *rb); + /* Perf buffer APIs */ struct perf_buffer; diff --git a/tools/lib/bpf/libbpf.map b/tools/lib/bpf/libbpf.map index 2b928dc21af0..c1d6aa7c82b6 100644 --- a/tools/lib/bpf/libbpf.map +++ b/tools/lib/bpf/libbpf.map @@ -368,3 +368,13 @@ LIBBPF_1.0.0 { libbpf_bpf_prog_type_str; perf_buffer__buffer; }; + +LIBBPF_1.1.0 { + global: + user_ring_buffer__discard; + user_ring_buffer__free; + user_ring_buffer__new; + user_ring_buffer__reserve; + user_ring_buffer__reserve_blocking; + user_ring_buffer__submit; +} LIBBPF_1.0.0; diff --git a/tools/lib/bpf/libbpf_probes.c b/tools/lib/bpf/libbpf_probes.c index 6d495656f554..f3a8e8e74eb8 100644 --- a/tools/lib/bpf/libbpf_probes.c +++ b/tools/lib/bpf/libbpf_probes.c @@ -231,6 +231,7 @@ static int probe_map_create(enum bpf_map_type map_type) return btf_fd; break; case BPF_MAP_TYPE_RINGBUF: + case BPF_MAP_TYPE_USER_RINGBUF: key_size = 0; value_size = 0; max_entries = 4096; diff --git a/tools/lib/bpf/libbpf_version.h b/tools/lib/bpf/libbpf_version.h index 2fb2f4290080..e944f5bce728 100644 --- a/tools/lib/bpf/libbpf_version.h +++ b/tools/lib/bpf/libbpf_version.h @@ -4,6 +4,6 @@ #define __LIBBPF_VERSION_H #define LIBBPF_MAJOR_VERSION 1 -#define LIBBPF_MINOR_VERSION 0 +#define LIBBPF_MINOR_VERSION 1 #endif /* __LIBBPF_VERSION_H */ diff --git a/tools/lib/bpf/nlattr.c b/tools/lib/bpf/nlattr.c index f57e77a6e40f..3900d052ed19 100644 --- a/tools/lib/bpf/nlattr.c +++ b/tools/lib/bpf/nlattr.c @@ -32,7 +32,7 @@ static struct nlattr *nla_next(const struct nlattr *nla, int *remaining) static int nla_ok(const struct nlattr *nla, int remaining) { - return remaining >= sizeof(*nla) && + return remaining >= (int)sizeof(*nla) && nla->nla_len >= sizeof(*nla) && nla->nla_len <= remaining; } diff --git a/tools/lib/bpf/ringbuf.c b/tools/lib/bpf/ringbuf.c index 8bc117bcc7bc..d285171d4b69 100644 --- a/tools/lib/bpf/ringbuf.c +++ b/tools/lib/bpf/ringbuf.c @@ -16,6 +16,7 @@ #include <asm/barrier.h> #include <sys/mman.h> #include <sys/epoll.h> +#include <time.h> #include "libbpf.h" #include "libbpf_internal.h" @@ -39,6 +40,23 @@ struct ring_buffer { int ring_cnt; }; +struct user_ring_buffer { + struct epoll_event event; + unsigned long *consumer_pos; + unsigned long *producer_pos; + void *data; + unsigned long mask; + size_t page_size; + int map_fd; + int epoll_fd; +}; + +/* 8-byte ring buffer header structure */ +struct ringbuf_hdr { + __u32 len; + __u32 pad; +}; + static void ringbuf_unmap_ring(struct ring_buffer *rb, struct ring *r) { if (r->consumer_pos) { @@ -300,3 +318,256 @@ int ring_buffer__epoll_fd(const struct ring_buffer *rb) { return rb->epoll_fd; } + +static void user_ringbuf_unmap_ring(struct user_ring_buffer *rb) +{ + if (rb->consumer_pos) { + munmap(rb->consumer_pos, rb->page_size); + rb->consumer_pos = NULL; + } + if (rb->producer_pos) { + munmap(rb->producer_pos, rb->page_size + 2 * (rb->mask + 1)); + rb->producer_pos = NULL; + } +} + +void user_ring_buffer__free(struct user_ring_buffer *rb) +{ + if (!rb) + return; + + user_ringbuf_unmap_ring(rb); + + if (rb->epoll_fd >= 0) + close(rb->epoll_fd); + + free(rb); +} + +static int user_ringbuf_map(struct user_ring_buffer *rb, int map_fd) +{ + struct bpf_map_info info; + __u32 len = sizeof(info); + void *tmp; + struct epoll_event *rb_epoll; + int err; + + memset(&info, 0, sizeof(info)); + + err = bpf_obj_get_info_by_fd(map_fd, &info, &len); + if (err) { + err = -errno; + pr_warn("user ringbuf: failed to get map info for fd=%d: %d\n", map_fd, err); + return err; + } + + if (info.type != BPF_MAP_TYPE_USER_RINGBUF) { + pr_warn("user ringbuf: map fd=%d is not BPF_MAP_TYPE_USER_RINGBUF\n", map_fd); + return -EINVAL; + } + + rb->map_fd = map_fd; + rb->mask = info.max_entries - 1; + + /* Map read-only consumer page */ + tmp = mmap(NULL, rb->page_size, PROT_READ, MAP_SHARED, map_fd, 0); + if (tmp == MAP_FAILED) { + err = -errno; + pr_warn("user ringbuf: failed to mmap consumer page for map fd=%d: %d\n", + map_fd, err); + return err; + } + rb->consumer_pos = tmp; + + /* Map read-write the producer page and data pages. We map the data + * region as twice the total size of the ring buffer to allow the + * simple reading and writing of samples that wrap around the end of + * the buffer. See the kernel implementation for details. + */ + tmp = mmap(NULL, rb->page_size + 2 * info.max_entries, + PROT_READ | PROT_WRITE, MAP_SHARED, map_fd, rb->page_size); + if (tmp == MAP_FAILED) { + err = -errno; + pr_warn("user ringbuf: failed to mmap data pages for map fd=%d: %d\n", + map_fd, err); + return err; + } + + rb->producer_pos = tmp; + rb->data = tmp + rb->page_size; + + rb_epoll = &rb->event; + rb_epoll->events = EPOLLOUT; + if (epoll_ctl(rb->epoll_fd, EPOLL_CTL_ADD, map_fd, rb_epoll) < 0) { + err = -errno; + pr_warn("user ringbuf: failed to epoll add map fd=%d: %d\n", map_fd, err); + return err; + } + + return 0; +} + +struct user_ring_buffer * +user_ring_buffer__new(int map_fd, const struct user_ring_buffer_opts *opts) +{ + struct user_ring_buffer *rb; + int err; + + if (!OPTS_VALID(opts, user_ring_buffer_opts)) + return errno = EINVAL, NULL; + + rb = calloc(1, sizeof(*rb)); + if (!rb) + return errno = ENOMEM, NULL; + + rb->page_size = getpagesize(); + + rb->epoll_fd = epoll_create1(EPOLL_CLOEXEC); + if (rb->epoll_fd < 0) { + err = -errno; + pr_warn("user ringbuf: failed to create epoll instance: %d\n", err); + goto err_out; + } + + err = user_ringbuf_map(rb, map_fd); + if (err) + goto err_out; + + return rb; + +err_out: + user_ring_buffer__free(rb); + return errno = -err, NULL; +} + +static void user_ringbuf_commit(struct user_ring_buffer *rb, void *sample, bool discard) +{ + __u32 new_len; + struct ringbuf_hdr *hdr; + uintptr_t hdr_offset; + + hdr_offset = rb->mask + 1 + (sample - rb->data) - BPF_RINGBUF_HDR_SZ; + hdr = rb->data + (hdr_offset & rb->mask); + + new_len = hdr->len & ~BPF_RINGBUF_BUSY_BIT; + if (discard) + new_len |= BPF_RINGBUF_DISCARD_BIT; + + /* Synchronizes with smp_load_acquire() in __bpf_user_ringbuf_peek() in + * the kernel. + */ + __atomic_exchange_n(&hdr->len, new_len, __ATOMIC_ACQ_REL); +} + +void user_ring_buffer__discard(struct user_ring_buffer *rb, void *sample) +{ + user_ringbuf_commit(rb, sample, true); +} + +void user_ring_buffer__submit(struct user_ring_buffer *rb, void *sample) +{ + user_ringbuf_commit(rb, sample, false); +} + +void *user_ring_buffer__reserve(struct user_ring_buffer *rb, __u32 size) +{ + __u32 avail_size, total_size, max_size; + /* 64-bit to avoid overflow in case of extreme application behavior */ + __u64 cons_pos, prod_pos; + struct ringbuf_hdr *hdr; + + /* Synchronizes with smp_store_release() in __bpf_user_ringbuf_peek() in + * the kernel. + */ + cons_pos = smp_load_acquire(rb->consumer_pos); + /* Synchronizes with smp_store_release() in user_ringbuf_commit() */ + prod_pos = smp_load_acquire(rb->producer_pos); + + max_size = rb->mask + 1; + avail_size = max_size - (prod_pos - cons_pos); + /* Round up total size to a multiple of 8. */ + total_size = (size + BPF_RINGBUF_HDR_SZ + 7) / 8 * 8; + + if (total_size > max_size) + return errno = E2BIG, NULL; + + if (avail_size < total_size) + return errno = ENOSPC, NULL; + + hdr = rb->data + (prod_pos & rb->mask); + hdr->len = size | BPF_RINGBUF_BUSY_BIT; + hdr->pad = 0; + + /* Synchronizes with smp_load_acquire() in __bpf_user_ringbuf_peek() in + * the kernel. + */ + smp_store_release(rb->producer_pos, prod_pos + total_size); + + return (void *)rb->data + ((prod_pos + BPF_RINGBUF_HDR_SZ) & rb->mask); +} + +static __u64 ns_elapsed_timespec(const struct timespec *start, const struct timespec *end) +{ + __u64 start_ns, end_ns, ns_per_s = 1000000000; + + start_ns = (__u64)start->tv_sec * ns_per_s + start->tv_nsec; + end_ns = (__u64)end->tv_sec * ns_per_s + end->tv_nsec; + + return end_ns - start_ns; +} + +void *user_ring_buffer__reserve_blocking(struct user_ring_buffer *rb, __u32 size, int timeout_ms) +{ + void *sample; + int err, ms_remaining = timeout_ms; + struct timespec start; + + if (timeout_ms < 0 && timeout_ms != -1) + return errno = EINVAL, NULL; + + if (timeout_ms != -1) { + err = clock_gettime(CLOCK_MONOTONIC, &start); + if (err) + return NULL; + } + + do { + int cnt, ms_elapsed; + struct timespec curr; + __u64 ns_per_ms = 1000000; + + sample = user_ring_buffer__reserve(rb, size); + if (sample) + return sample; + else if (errno != ENOSPC) + return NULL; + + /* The kernel guarantees at least one event notification + * delivery whenever at least one sample is drained from the + * ring buffer in an invocation to bpf_ringbuf_drain(). Other + * additional events may be delivered at any time, but only one + * event is guaranteed per bpf_ringbuf_drain() invocation, + * provided that a sample is drained, and the BPF program did + * not pass BPF_RB_NO_WAKEUP to bpf_ringbuf_drain(). If + * BPF_RB_FORCE_WAKEUP is passed to bpf_ringbuf_drain(), a + * wakeup event will be delivered even if no samples are + * drained. + */ + cnt = epoll_wait(rb->epoll_fd, &rb->event, 1, ms_remaining); + if (cnt < 0) + return NULL; + + if (timeout_ms == -1) + continue; + + err = clock_gettime(CLOCK_MONOTONIC, &curr); + if (err) + return NULL; + + ms_elapsed = ns_elapsed_timespec(&start, &curr) / ns_per_ms; + ms_remaining = timeout_ms - ms_elapsed; + } while (ms_remaining > 0); + + /* Try one more time to reserve a sample after the specified timeout has elapsed. */ + return user_ring_buffer__reserve(rb, size); +} diff --git a/tools/lib/bpf/usdt.c b/tools/lib/bpf/usdt.c index d18e37982344..e83b497c2245 100644 --- a/tools/lib/bpf/usdt.c +++ b/tools/lib/bpf/usdt.c @@ -282,7 +282,7 @@ struct usdt_manager *usdt_manager_new(struct bpf_object *obj) * If this is not supported, USDTs with semaphores will not be supported. * Added in: a6ca88b241d5 ("trace_uprobe: support reference counter in fd-based uprobe") */ - man->has_sema_refcnt = access(ref_ctr_sysfs_path, F_OK) == 0; + man->has_sema_refcnt = faccessat(AT_FDCWD, ref_ctr_sysfs_path, F_OK, AT_EACCESS) == 0; return man; } diff --git a/tools/objtool/check.c b/tools/objtool/check.c index e55fdf952a3a..9216060c3408 100644 --- a/tools/objtool/check.c +++ b/tools/objtool/check.c @@ -4113,7 +4113,8 @@ static int validate_ibt(struct objtool_file *file) !strcmp(sec->name, "__bug_table") || !strcmp(sec->name, "__ex_table") || !strcmp(sec->name, "__jump_table") || - !strcmp(sec->name, "__mcount_loc")) + !strcmp(sec->name, "__mcount_loc") || + strstr(sec->name, "__patchable_function_entries")) continue; list_for_each_entry(reloc, &sec->reloc->reloc_list, list) diff --git a/tools/testing/selftests/bpf/.gitignore b/tools/testing/selftests/bpf/.gitignore index 3a8cb2404ea6..07d2d0a8c5cb 100644 --- a/tools/testing/selftests/bpf/.gitignore +++ b/tools/testing/selftests/bpf/.gitignore @@ -39,6 +39,8 @@ test_cpp /tools /runqslower /bench +/veristat +/sign-file *.ko *.tmp xskxceiver diff --git a/tools/testing/selftests/bpf/DENYLIST.s390x b/tools/testing/selftests/bpf/DENYLIST.s390x index 18fbb6eab1e2..17e074eb42b8 100644 --- a/tools/testing/selftests/bpf/DENYLIST.s390x +++ b/tools/testing/selftests/bpf/DENYLIST.s390x @@ -70,3 +70,8 @@ setget_sockopt # attach unexpected error: -524 cb_refs # expected error message unexpected error: -524 (trampoline) cgroup_hierarchical_stats # JIT does not support calling kernel function (kfunc) htab_update # failed to attach: ERROR: strerror_r(-524)=22 (trampoline) +tracing_struct # failed to auto-attach: -524 (trampoline) +user_ringbuf # failed to find kernel BTF type ID of '__s390x_sys_prctl': -3 (?) +lookup_key # JIT does not support calling kernel function (kfunc) +verify_pkcs7_sig # JIT does not support calling kernel function (kfunc) +kfunc_dynptr_param # JIT does not support calling kernel function (kfunc) diff --git a/tools/testing/selftests/bpf/Makefile b/tools/testing/selftests/bpf/Makefile index c10adecb5a73..e6cf21fad69f 100644 --- a/tools/testing/selftests/bpf/Makefile +++ b/tools/testing/selftests/bpf/Makefile @@ -14,6 +14,7 @@ BPFTOOLDIR := $(TOOLSDIR)/bpf/bpftool APIDIR := $(TOOLSINCDIR)/uapi GENDIR := $(abspath ../../../../include/generated) GENHDR := $(GENDIR)/autoconf.h +HOSTPKG_CONFIG := pkg-config ifneq ($(wildcard $(GENHDR)),) GENFLAGS := -DHAVE_GENHDR @@ -75,16 +76,17 @@ TEST_PROGS := test_kmod.sh \ test_xsk.sh TEST_PROGS_EXTENDED := with_addr.sh \ - with_tunnels.sh ima_setup.sh \ + with_tunnels.sh ima_setup.sh verify_sig_setup.sh \ test_xdp_vlan.sh test_bpftool.py # Compile but not part of 'make run_tests' TEST_GEN_PROGS_EXTENDED = test_sock_addr test_skb_cgroup_id_user \ flow_dissector_load test_flow_dissector test_tcp_check_syncookie_user \ test_lirc_mode2_user xdping test_cpp runqslower bench bpf_testmod.ko \ - xskxceiver xdp_redirect_multi xdp_synproxy + xskxceiver xdp_redirect_multi xdp_synproxy veristat -TEST_CUSTOM_PROGS = $(OUTPUT)/urandom_read +TEST_CUSTOM_PROGS = $(OUTPUT)/urandom_read $(OUTPUT)/sign-file +TEST_GEN_FILES += liburandom_read.so # Emit succinct information message describing current building step # $1 - generic step name (e.g., CC, LINK, etc); @@ -189,6 +191,12 @@ $(OUTPUT)/urandom_read: urandom_read.c urandom_read_aux.c $(OUTPUT)/liburandom_r -fuse-ld=$(LLD) -Wl,-znoseparate-code \ -Wl,-rpath=. -Wl,--build-id=sha1 -o $@ +$(OUTPUT)/sign-file: ../../../../scripts/sign-file.c + $(call msg,SIGN-FILE,,$@) + $(Q)$(CC) $(shell $(HOSTPKG_CONFIG)--cflags libcrypto 2> /dev/null) \ + $< -o $@ \ + $(shell $(HOSTPKG_CONFIG) --libs libcrypto 2> /dev/null || echo -lcrypto) + $(OUTPUT)/bpf_testmod.ko: $(VMLINUX_BTF) $(wildcard bpf_testmod/Makefile bpf_testmod/*.[ch]) $(call msg,MOD,,$@) $(Q)$(RM) bpf_testmod/bpf_testmod.ko # force re-compilation @@ -351,11 +359,12 @@ LINKED_SKELS := test_static_linked.skel.h linked_funcs.skel.h \ test_subskeleton.skel.h test_subskeleton_lib.skel.h \ test_usdt.skel.h -LSKELS := kfunc_call_test.c fentry_test.c fexit_test.c fexit_sleep.c \ +LSKELS := fentry_test.c fexit_test.c fexit_sleep.c \ test_ringbuf.c atomics.c trace_printk.c trace_vprintk.c \ map_ptr_kern.c core_kern.c core_kern_overflow.c # Generate both light skeleton and libbpf skeleton for these -LSKELS_EXTRA := test_ksyms_module.c test_ksyms_weak.c kfunc_call_test_subprog.c +LSKELS_EXTRA := test_ksyms_module.c test_ksyms_weak.c kfunc_call_test.c \ + kfunc_call_test_subprog.c SKEL_BLACKLIST += $$(LSKELS) test_static_linked.skel.h-deps := test_static_linked1.bpf.o test_static_linked2.bpf.o @@ -515,7 +524,8 @@ TRUNNER_EXTRA_SOURCES := test_progs.c cgroup_helpers.c trace_helpers.c \ TRUNNER_EXTRA_FILES := $(OUTPUT)/urandom_read $(OUTPUT)/bpf_testmod.ko \ $(OUTPUT)/liburandom_read.so \ $(OUTPUT)/xdp_synproxy \ - ima_setup.sh \ + $(OUTPUT)/sign-file \ + ima_setup.sh verify_sig_setup.sh \ $(wildcard progs/btf_dump_test_case_*.c) TRUNNER_BPF_BUILD_RULE := CLANG_BPF_BUILD_RULE TRUNNER_BPF_CFLAGS := $(BPF_CFLAGS) $(CLANG_CFLAGS) -DENABLE_ATOMICS_TESTS @@ -594,6 +604,11 @@ $(OUTPUT)/bench: $(OUTPUT)/bench.o \ $(call msg,BINARY,,$@) $(Q)$(CC) $(CFLAGS) $(LDFLAGS) $(filter %.a %.o,$^) $(LDLIBS) -o $@ +$(OUTPUT)/veristat.o: $(BPFOBJ) +$(OUTPUT)/veristat: $(OUTPUT)/veristat.o + $(call msg,BINARY,,$@) + $(Q)$(CC) $(CFLAGS) $(LDFLAGS) $(filter %.a %.o,$^) $(LDLIBS) -o $@ + EXTRA_CLEAN := $(TEST_CUSTOM_PROGS) $(SCRATCH_DIR) $(HOST_SCRATCH_DIR) \ prog_tests/tests.h map_tests/tests.h verifier/tests.h \ feature bpftool \ diff --git a/tools/testing/selftests/bpf/bpf_testmod/bpf_testmod.c b/tools/testing/selftests/bpf/bpf_testmod/bpf_testmod.c index 792cb15bac40..a6021d6117b5 100644 --- a/tools/testing/selftests/bpf/bpf_testmod/bpf_testmod.c +++ b/tools/testing/selftests/bpf/bpf_testmod/bpf_testmod.c @@ -18,6 +18,46 @@ typedef int (*func_proto_typedef_nested1)(func_proto_typedef); typedef int (*func_proto_typedef_nested2)(func_proto_typedef_nested1); DEFINE_PER_CPU(int, bpf_testmod_ksym_percpu) = 123; +long bpf_testmod_test_struct_arg_result; + +struct bpf_testmod_struct_arg_1 { + int a; +}; +struct bpf_testmod_struct_arg_2 { + long a; + long b; +}; + +noinline int +bpf_testmod_test_struct_arg_1(struct bpf_testmod_struct_arg_2 a, int b, int c) { + bpf_testmod_test_struct_arg_result = a.a + a.b + b + c; + return bpf_testmod_test_struct_arg_result; +} + +noinline int +bpf_testmod_test_struct_arg_2(int a, struct bpf_testmod_struct_arg_2 b, int c) { + bpf_testmod_test_struct_arg_result = a + b.a + b.b + c; + return bpf_testmod_test_struct_arg_result; +} + +noinline int +bpf_testmod_test_struct_arg_3(int a, int b, struct bpf_testmod_struct_arg_2 c) { + bpf_testmod_test_struct_arg_result = a + b + c.a + c.b; + return bpf_testmod_test_struct_arg_result; +} + +noinline int +bpf_testmod_test_struct_arg_4(struct bpf_testmod_struct_arg_1 a, int b, + int c, int d, struct bpf_testmod_struct_arg_2 e) { + bpf_testmod_test_struct_arg_result = a.a + b + c + d + e.a + e.b; + return bpf_testmod_test_struct_arg_result; +} + +noinline int +bpf_testmod_test_struct_arg_5(void) { + bpf_testmod_test_struct_arg_result = 1; + return bpf_testmod_test_struct_arg_result; +} noinline void bpf_testmod_test_mod_kfunc(int i) @@ -98,11 +138,19 @@ bpf_testmod_test_read(struct file *file, struct kobject *kobj, .off = off, .len = len, }; + struct bpf_testmod_struct_arg_1 struct_arg1 = {10}; + struct bpf_testmod_struct_arg_2 struct_arg2 = {2, 3}; int i = 1; while (bpf_testmod_return_ptr(i)) i++; + (void)bpf_testmod_test_struct_arg_1(struct_arg2, 1, 4); + (void)bpf_testmod_test_struct_arg_2(1, struct_arg2, 4); + (void)bpf_testmod_test_struct_arg_3(1, 4, struct_arg2); + (void)bpf_testmod_test_struct_arg_4(struct_arg1, 1, 2, 3, struct_arg2); + (void)bpf_testmod_test_struct_arg_5(); + /* This is always true. Use the check to make sure the compiler * doesn't remove bpf_testmod_loop_test. */ diff --git a/tools/testing/selftests/bpf/config b/tools/testing/selftests/bpf/config index 3fc46f9cfb22..9213565c0311 100644 --- a/tools/testing/selftests/bpf/config +++ b/tools/testing/selftests/bpf/config @@ -7,9 +7,9 @@ CONFIG_BPF_LSM=y CONFIG_BPF_STREAM_PARSER=y CONFIG_BPF_SYSCALL=y CONFIG_CGROUP_BPF=y -CONFIG_CRYPTO_HMAC=m -CONFIG_CRYPTO_SHA256=m -CONFIG_CRYPTO_USER_API_HASH=m +CONFIG_CRYPTO_HMAC=y +CONFIG_CRYPTO_SHA256=y +CONFIG_CRYPTO_USER_API_HASH=y CONFIG_DYNAMIC_FTRACE=y CONFIG_FPROBE=y CONFIG_FTRACE_SYSCALLS=y @@ -24,30 +24,36 @@ CONFIG_IP_NF_FILTER=y CONFIG_IP_NF_RAW=y CONFIG_IP_NF_TARGET_SYNPROXY=y CONFIG_IPV6=y -CONFIG_IPV6_FOU=m -CONFIG_IPV6_FOU_TUNNEL=m +CONFIG_IPV6_FOU=y +CONFIG_IPV6_FOU_TUNNEL=y CONFIG_IPV6_GRE=y CONFIG_IPV6_SEG6_BPF=y -CONFIG_IPV6_SIT=m +CONFIG_IPV6_SIT=y CONFIG_IPV6_TUNNEL=y +CONFIG_KEYS=y CONFIG_LIRC=y CONFIG_LWTUNNEL=y +CONFIG_MODULE_SIG=y +CONFIG_MODULE_SRCVERSION_ALL=y +CONFIG_MODULE_UNLOAD=y +CONFIG_MODULES=y +CONFIG_MODVERSIONS=y CONFIG_MPLS=y -CONFIG_MPLS_IPTUNNEL=m -CONFIG_MPLS_ROUTING=m +CONFIG_MPLS_IPTUNNEL=y +CONFIG_MPLS_ROUTING=y CONFIG_MPTCP=y CONFIG_NET_CLS_ACT=y CONFIG_NET_CLS_BPF=y -CONFIG_NET_CLS_FLOWER=m -CONFIG_NET_FOU=m +CONFIG_NET_CLS_FLOWER=y +CONFIG_NET_FOU=y CONFIG_NET_FOU_IP_TUNNELS=y CONFIG_NET_IPGRE=y CONFIG_NET_IPGRE_DEMUX=y CONFIG_NET_IPIP=y -CONFIG_NET_MPLS_GSO=m +CONFIG_NET_MPLS_GSO=y CONFIG_NET_SCH_INGRESS=y CONFIG_NET_SCHED=y -CONFIG_NETDEVSIM=m +CONFIG_NETDEVSIM=y CONFIG_NETFILTER=y CONFIG_NETFILTER_SYNPROXY=y CONFIG_NETFILTER_XT_CONNMARK=y @@ -57,10 +63,11 @@ CONFIG_NF_CONNTRACK=y CONFIG_NF_CONNTRACK_MARK=y CONFIG_NF_DEFRAG_IPV4=y CONFIG_NF_DEFRAG_IPV6=y +CONFIG_NF_NAT=y CONFIG_RC_CORE=y CONFIG_SECURITY=y CONFIG_SECURITYFS=y -CONFIG_TEST_BPF=m +CONFIG_TEST_BPF=y CONFIG_USERFAULTFD=y CONFIG_VXLAN=y CONFIG_XDP_SOCKETS=y diff --git a/tools/testing/selftests/bpf/config.x86_64 b/tools/testing/selftests/bpf/config.x86_64 index f0859a1d37ab..21ce5ea4304e 100644 --- a/tools/testing/selftests/bpf/config.x86_64 +++ b/tools/testing/selftests/bpf/config.x86_64 @@ -47,7 +47,7 @@ CONFIG_CPU_IDLE_GOV_LADDER=y CONFIG_CPUSETS=y CONFIG_CRC_T10DIF=y CONFIG_CRYPTO_BLAKE2B=y -CONFIG_CRYPTO_DEV_VIRTIO=m +CONFIG_CRYPTO_DEV_VIRTIO=y CONFIG_CRYPTO_SEQIV=y CONFIG_CRYPTO_XXHASH=y CONFIG_DCB=y @@ -145,11 +145,6 @@ CONFIG_MCORE2=y CONFIG_MEMCG=y CONFIG_MEMORY_FAILURE=y CONFIG_MINIX_SUBPARTITION=y -CONFIG_MODULE_SIG=y -CONFIG_MODULE_SRCVERSION_ALL=y -CONFIG_MODULE_UNLOAD=y -CONFIG_MODULES=y -CONFIG_MODVERSIONS=y CONFIG_NAMESPACES=y CONFIG_NET=y CONFIG_NET_9P=y diff --git a/tools/testing/selftests/bpf/map_tests/array_map_batch_ops.c b/tools/testing/selftests/bpf/map_tests/array_map_batch_ops.c index 78c76496b14a..b595556315bc 100644 --- a/tools/testing/selftests/bpf/map_tests/array_map_batch_ops.c +++ b/tools/testing/selftests/bpf/map_tests/array_map_batch_ops.c @@ -3,6 +3,7 @@ #include <stdio.h> #include <errno.h> #include <string.h> +#include <unistd.h> #include <bpf/bpf.h> #include <bpf/libbpf.h> @@ -137,6 +138,7 @@ static void __test_map_lookup_and_update_batch(bool is_pcpu) free(keys); free(values); free(visited); + close(map_fd); } static void array_map_batch_ops(void) diff --git a/tools/testing/selftests/bpf/map_tests/htab_map_batch_ops.c b/tools/testing/selftests/bpf/map_tests/htab_map_batch_ops.c index f807d53fd8dd..1230ccf90128 100644 --- a/tools/testing/selftests/bpf/map_tests/htab_map_batch_ops.c +++ b/tools/testing/selftests/bpf/map_tests/htab_map_batch_ops.c @@ -3,6 +3,7 @@ #include <stdio.h> #include <errno.h> #include <string.h> +#include <unistd.h> #include <bpf/bpf.h> #include <bpf/libbpf.h> @@ -255,6 +256,7 @@ void __test_map_lookup_and_delete_batch(bool is_pcpu) free(visited); if (!is_pcpu) free(values); + close(map_fd); } void htab_map_batch_ops(void) diff --git a/tools/testing/selftests/bpf/map_tests/lpm_trie_map_batch_ops.c b/tools/testing/selftests/bpf/map_tests/lpm_trie_map_batch_ops.c index 87d07b596e17..b66d56ddb7ef 100644 --- a/tools/testing/selftests/bpf/map_tests/lpm_trie_map_batch_ops.c +++ b/tools/testing/selftests/bpf/map_tests/lpm_trie_map_batch_ops.c @@ -7,6 +7,7 @@ #include <errno.h> #include <string.h> #include <stdlib.h> +#include <unistd.h> #include <bpf/bpf.h> #include <bpf/libbpf.h> @@ -150,4 +151,5 @@ void test_lpm_trie_map_batch_ops(void) free(keys); free(values); free(visited); + close(map_fd); } diff --git a/tools/testing/selftests/bpf/map_tests/task_storage_map.c b/tools/testing/selftests/bpf/map_tests/task_storage_map.c index 1adc9c292eb2..7d050364efca 100644 --- a/tools/testing/selftests/bpf/map_tests/task_storage_map.c +++ b/tools/testing/selftests/bpf/map_tests/task_storage_map.c @@ -77,8 +77,12 @@ void test_task_storage_map_stress_lookup(void) CHECK(err, "open_and_load", "error %d\n", err); /* Only for a fully preemptible kernel */ - if (!skel->kconfig->CONFIG_PREEMPT) + if (!skel->kconfig->CONFIG_PREEMPT) { + printf("%s SKIP (no CONFIG_PREEMPT)\n", __func__); + read_bpf_task_storage_busy__destroy(skel); + skips++; return; + } /* Save the old affinity setting */ sched_getaffinity(getpid(), sizeof(old), &old); @@ -119,4 +123,5 @@ out: read_bpf_task_storage_busy__destroy(skel); /* Restore affinity setting */ sched_setaffinity(getpid(), sizeof(old), &old); + printf("%s:PASS\n", __func__); } diff --git a/tools/testing/selftests/bpf/prog_tests/bpf_iter.c b/tools/testing/selftests/bpf/prog_tests/bpf_iter.c index e89685bd587c..3369c5ec3a17 100644 --- a/tools/testing/selftests/bpf/prog_tests/bpf_iter.c +++ b/tools/testing/selftests/bpf/prog_tests/bpf_iter.c @@ -1,6 +1,8 @@ // SPDX-License-Identifier: GPL-2.0 /* Copyright (c) 2020 Facebook */ #include <test_progs.h> +#include <unistd.h> +#include <sys/syscall.h> #include "bpf_iter_ipv6_route.skel.h" #include "bpf_iter_netlink.skel.h" #include "bpf_iter_bpf_map.skel.h" @@ -14,6 +16,7 @@ #include "bpf_iter_udp4.skel.h" #include "bpf_iter_udp6.skel.h" #include "bpf_iter_unix.skel.h" +#include "bpf_iter_vma_offset.skel.h" #include "bpf_iter_test_kern1.skel.h" #include "bpf_iter_test_kern2.skel.h" #include "bpf_iter_test_kern3.skel.h" @@ -43,13 +46,13 @@ static void test_btf_id_or_null(void) } } -static void do_dummy_read(struct bpf_program *prog) +static void do_dummy_read_opts(struct bpf_program *prog, struct bpf_iter_attach_opts *opts) { struct bpf_link *link; char buf[16] = {}; int iter_fd, len; - link = bpf_program__attach_iter(prog, NULL); + link = bpf_program__attach_iter(prog, opts); if (!ASSERT_OK_PTR(link, "attach_iter")) return; @@ -68,6 +71,11 @@ free_link: bpf_link__destroy(link); } +static void do_dummy_read(struct bpf_program *prog) +{ + do_dummy_read_opts(prog, NULL); +} + static void do_read_map_iter_fd(struct bpf_object_skeleton **skel, struct bpf_program *prog, struct bpf_map *map) { @@ -167,19 +175,140 @@ static void test_bpf_map(void) bpf_iter_bpf_map__destroy(skel); } -static void test_task(void) +static int pidfd_open(pid_t pid, unsigned int flags) +{ + return syscall(SYS_pidfd_open, pid, flags); +} + +static void check_bpf_link_info(const struct bpf_program *prog) +{ + LIBBPF_OPTS(bpf_iter_attach_opts, opts); + union bpf_iter_link_info linfo; + struct bpf_link_info info = {}; + struct bpf_link *link; + __u32 info_len; + int err; + + memset(&linfo, 0, sizeof(linfo)); + linfo.task.tid = getpid(); + opts.link_info = &linfo; + opts.link_info_len = sizeof(linfo); + + link = bpf_program__attach_iter(prog, &opts); + if (!ASSERT_OK_PTR(link, "attach_iter")) + return; + + info_len = sizeof(info); + err = bpf_obj_get_info_by_fd(bpf_link__fd(link), &info, &info_len); + ASSERT_OK(err, "bpf_obj_get_info_by_fd"); + ASSERT_EQ(info.iter.task.tid, getpid(), "check_task_tid"); + + bpf_link__destroy(link); +} + +static pthread_mutex_t do_nothing_mutex; + +static void *do_nothing_wait(void *arg) +{ + pthread_mutex_lock(&do_nothing_mutex); + pthread_mutex_unlock(&do_nothing_mutex); + + pthread_exit(arg); +} + +static void test_task_common_nocheck(struct bpf_iter_attach_opts *opts, + int *num_unknown, int *num_known) { struct bpf_iter_task *skel; + pthread_t thread_id; + void *ret; skel = bpf_iter_task__open_and_load(); if (!ASSERT_OK_PTR(skel, "bpf_iter_task__open_and_load")) return; - do_dummy_read(skel->progs.dump_task); + ASSERT_OK(pthread_mutex_lock(&do_nothing_mutex), "pthread_mutex_lock"); + + ASSERT_OK(pthread_create(&thread_id, NULL, &do_nothing_wait, NULL), + "pthread_create"); + + skel->bss->tid = getpid(); + + do_dummy_read_opts(skel->progs.dump_task, opts); + + *num_unknown = skel->bss->num_unknown_tid; + *num_known = skel->bss->num_known_tid; + + ASSERT_OK(pthread_mutex_unlock(&do_nothing_mutex), "pthread_mutex_unlock"); + ASSERT_FALSE(pthread_join(thread_id, &ret) || ret != NULL, + "pthread_join"); bpf_iter_task__destroy(skel); } +static void test_task_common(struct bpf_iter_attach_opts *opts, int num_unknown, int num_known) +{ + int num_unknown_tid, num_known_tid; + + test_task_common_nocheck(opts, &num_unknown_tid, &num_known_tid); + ASSERT_EQ(num_unknown_tid, num_unknown, "check_num_unknown_tid"); + ASSERT_EQ(num_known_tid, num_known, "check_num_known_tid"); +} + +static void test_task_tid(void) +{ + LIBBPF_OPTS(bpf_iter_attach_opts, opts); + union bpf_iter_link_info linfo; + int num_unknown_tid, num_known_tid; + + memset(&linfo, 0, sizeof(linfo)); + linfo.task.tid = getpid(); + opts.link_info = &linfo; + opts.link_info_len = sizeof(linfo); + test_task_common(&opts, 0, 1); + + linfo.task.tid = 0; + linfo.task.pid = getpid(); + test_task_common(&opts, 1, 1); + + test_task_common_nocheck(NULL, &num_unknown_tid, &num_known_tid); + ASSERT_GT(num_unknown_tid, 1, "check_num_unknown_tid"); + ASSERT_EQ(num_known_tid, 1, "check_num_known_tid"); +} + +static void test_task_pid(void) +{ + LIBBPF_OPTS(bpf_iter_attach_opts, opts); + union bpf_iter_link_info linfo; + + memset(&linfo, 0, sizeof(linfo)); + linfo.task.pid = getpid(); + opts.link_info = &linfo; + opts.link_info_len = sizeof(linfo); + + test_task_common(&opts, 1, 1); +} + +static void test_task_pidfd(void) +{ + LIBBPF_OPTS(bpf_iter_attach_opts, opts); + union bpf_iter_link_info linfo; + int pidfd; + + pidfd = pidfd_open(getpid(), 0); + if (!ASSERT_GT(pidfd, 0, "pidfd_open")) + return; + + memset(&linfo, 0, sizeof(linfo)); + linfo.task.pid_fd = pidfd; + opts.link_info = &linfo; + opts.link_info_len = sizeof(linfo); + + test_task_common(&opts, 1, 1); + + close(pidfd); +} + static void test_task_sleepable(void) { struct bpf_iter_task *skel; @@ -212,14 +341,11 @@ static void test_task_stack(void) bpf_iter_task_stack__destroy(skel); } -static void *do_nothing(void *arg) -{ - pthread_exit(arg); -} - static void test_task_file(void) { + LIBBPF_OPTS(bpf_iter_attach_opts, opts); struct bpf_iter_task_file *skel; + union bpf_iter_link_info linfo; pthread_t thread_id; void *ret; @@ -229,19 +355,36 @@ static void test_task_file(void) skel->bss->tgid = getpid(); - if (!ASSERT_OK(pthread_create(&thread_id, NULL, &do_nothing, NULL), - "pthread_create")) - goto done; + ASSERT_OK(pthread_mutex_lock(&do_nothing_mutex), "pthread_mutex_lock"); - do_dummy_read(skel->progs.dump_task_file); + ASSERT_OK(pthread_create(&thread_id, NULL, &do_nothing_wait, NULL), + "pthread_create"); + + memset(&linfo, 0, sizeof(linfo)); + linfo.task.tid = getpid(); + opts.link_info = &linfo; + opts.link_info_len = sizeof(linfo); - if (!ASSERT_FALSE(pthread_join(thread_id, &ret) || ret != NULL, - "pthread_join")) - goto done; + do_dummy_read_opts(skel->progs.dump_task_file, &opts); ASSERT_EQ(skel->bss->count, 0, "check_count"); + ASSERT_EQ(skel->bss->unique_tgid_count, 1, "check_unique_tgid_count"); + + skel->bss->last_tgid = 0; + skel->bss->count = 0; + skel->bss->unique_tgid_count = 0; + + do_dummy_read(skel->progs.dump_task_file); + + ASSERT_EQ(skel->bss->count, 0, "check_count"); + ASSERT_GT(skel->bss->unique_tgid_count, 1, "check_unique_tgid_count"); + + check_bpf_link_info(skel->progs.dump_task_file); + + ASSERT_OK(pthread_mutex_unlock(&do_nothing_mutex), "pthread_mutex_unlock"); + ASSERT_OK(pthread_join(thread_id, &ret), "pthread_join"); + ASSERT_NULL(ret, "pthread_join"); -done: bpf_iter_task_file__destroy(skel); } @@ -1249,7 +1392,7 @@ static void str_strip_first_line(char *str) *dst = '\0'; } -static void test_task_vma(void) +static void test_task_vma_common(struct bpf_iter_attach_opts *opts) { int err, iter_fd = -1, proc_maps_fd = -1; struct bpf_iter_task_vma *skel; @@ -1261,13 +1404,14 @@ static void test_task_vma(void) return; skel->bss->pid = getpid(); + skel->bss->one_task = opts ? 1 : 0; err = bpf_iter_task_vma__load(skel); if (!ASSERT_OK(err, "bpf_iter_task_vma__load")) goto out; skel->links.proc_maps = bpf_program__attach_iter( - skel->progs.proc_maps, NULL); + skel->progs.proc_maps, opts); if (!ASSERT_OK_PTR(skel->links.proc_maps, "bpf_program__attach_iter")) { skel->links.proc_maps = NULL; @@ -1291,6 +1435,8 @@ static void test_task_vma(void) goto out; len += err; } + if (opts) + ASSERT_EQ(skel->bss->one_task_error, 0, "unexpected task"); /* read CMP_BUFFER_SIZE (1kB) from /proc/pid/maps */ snprintf(maps_path, 64, "/proc/%u/maps", skel->bss->pid); @@ -1306,6 +1452,9 @@ static void test_task_vma(void) str_strip_first_line(proc_maps_output); ASSERT_STREQ(task_vma_output, proc_maps_output, "compare_output"); + + check_bpf_link_info(skel->progs.proc_maps); + out: close(proc_maps_fd); close(iter_fd); @@ -1325,8 +1474,93 @@ void test_bpf_sockmap_map_iter_fd(void) bpf_iter_sockmap__destroy(skel); } +static void test_task_vma(void) +{ + LIBBPF_OPTS(bpf_iter_attach_opts, opts); + union bpf_iter_link_info linfo; + + memset(&linfo, 0, sizeof(linfo)); + linfo.task.tid = getpid(); + opts.link_info = &linfo; + opts.link_info_len = sizeof(linfo); + + test_task_vma_common(&opts); + test_task_vma_common(NULL); +} + +/* uprobe attach point */ +static noinline int trigger_func(int arg) +{ + asm volatile (""); + return arg + 1; +} + +static void test_task_vma_offset_common(struct bpf_iter_attach_opts *opts, bool one_proc) +{ + struct bpf_iter_vma_offset *skel; + struct bpf_link *link; + char buf[16] = {}; + int iter_fd, len; + int pgsz, shift; + + skel = bpf_iter_vma_offset__open_and_load(); + if (!ASSERT_OK_PTR(skel, "bpf_iter_vma_offset__open_and_load")) + return; + + skel->bss->pid = getpid(); + skel->bss->address = (uintptr_t)trigger_func; + for (pgsz = getpagesize(), shift = 0; pgsz > 1; pgsz >>= 1, shift++) + ; + skel->bss->page_shift = shift; + + link = bpf_program__attach_iter(skel->progs.get_vma_offset, opts); + if (!ASSERT_OK_PTR(link, "attach_iter")) + return; + + iter_fd = bpf_iter_create(bpf_link__fd(link)); + if (!ASSERT_GT(iter_fd, 0, "create_iter")) + goto exit; + + while ((len = read(iter_fd, buf, sizeof(buf))) > 0) + ; + buf[15] = 0; + ASSERT_EQ(strcmp(buf, "OK\n"), 0, "strcmp"); + + ASSERT_EQ(skel->bss->offset, get_uprobe_offset(trigger_func), "offset"); + if (one_proc) + ASSERT_EQ(skel->bss->unique_tgid_cnt, 1, "unique_tgid_count"); + else + ASSERT_GT(skel->bss->unique_tgid_cnt, 1, "unique_tgid_count"); + + close(iter_fd); + +exit: + bpf_link__destroy(link); +} + +static void test_task_vma_offset(void) +{ + LIBBPF_OPTS(bpf_iter_attach_opts, opts); + union bpf_iter_link_info linfo; + + memset(&linfo, 0, sizeof(linfo)); + linfo.task.pid = getpid(); + opts.link_info = &linfo; + opts.link_info_len = sizeof(linfo); + + test_task_vma_offset_common(&opts, true); + + linfo.task.pid = 0; + linfo.task.tid = getpid(); + test_task_vma_offset_common(&opts, true); + + test_task_vma_offset_common(NULL, false); +} + void test_bpf_iter(void) { + ASSERT_OK(pthread_mutex_init(&do_nothing_mutex, NULL), "pthread_mutex_init"); + if (test__start_subtest("btf_id_or_null")) test_btf_id_or_null(); if (test__start_subtest("ipv6_route")) @@ -1335,8 +1569,12 @@ void test_bpf_iter(void) test_netlink(); if (test__start_subtest("bpf_map")) test_bpf_map(); - if (test__start_subtest("task")) - test_task(); + if (test__start_subtest("task_tid")) + test_task_tid(); + if (test__start_subtest("task_pid")) + test_task_pid(); + if (test__start_subtest("task_pidfd")) + test_task_pidfd(); if (test__start_subtest("task_sleepable")) test_task_sleepable(); if (test__start_subtest("task_stack")) @@ -1397,4 +1635,6 @@ void test_bpf_iter(void) test_ksym_iter(); if (test__start_subtest("bpf_sockmap_map_iter_fd")) test_bpf_sockmap_map_iter_fd(); + if (test__start_subtest("vma_offset")) + test_task_vma_offset(); } diff --git a/tools/testing/selftests/bpf/prog_tests/bpf_nf.c b/tools/testing/selftests/bpf/prog_tests/bpf_nf.c index 544bf90ac2a7..8a838ea8bdf3 100644 --- a/tools/testing/selftests/bpf/prog_tests/bpf_nf.c +++ b/tools/testing/selftests/bpf/prog_tests/bpf_nf.c @@ -1,6 +1,7 @@ // SPDX-License-Identifier: GPL-2.0 #include <test_progs.h> #include <network_helpers.h> +#include <linux/netfilter/nf_conntrack_common.h> #include "test_bpf_nf.skel.h" #include "test_bpf_nf_fail.skel.h" @@ -17,6 +18,7 @@ struct { { "set_status_after_insert", "kernel function bpf_ct_set_status args#0 expected pointer to STRUCT nf_conn___init but" }, { "change_timeout_after_alloc", "kernel function bpf_ct_change_timeout args#0 expected pointer to STRUCT nf_conn but" }, { "change_status_after_alloc", "kernel function bpf_ct_change_status args#0 expected pointer to STRUCT nf_conn but" }, + { "write_not_allowlisted_field", "no write support to nf_conn at off" }, }; enum { @@ -24,7 +26,10 @@ enum { TEST_TC_BPF, }; -#define TIMEOUT_MS 3000 +#define TIMEOUT_MS 3000 +#define IPS_STATUS_MASK (IPS_CONFIRMED | IPS_SEEN_REPLY | \ + IPS_SRC_NAT_DONE | IPS_DST_NAT_DONE | \ + IPS_SRC_NAT | IPS_DST_NAT) static int connect_to_server(int srv_fd) { @@ -111,10 +116,12 @@ static void test_bpf_nf_ct(int mode) /* allow some tolerance for test_delta_timeout value to avoid races. */ ASSERT_GT(skel->bss->test_delta_timeout, 8, "Test for min ct timeout update"); ASSERT_LE(skel->bss->test_delta_timeout, 10, "Test for max ct timeout update"); - /* expected status is IPS_SEEN_REPLY */ - ASSERT_EQ(skel->bss->test_status, 2, "Test for ct status update "); + ASSERT_EQ(skel->bss->test_insert_lookup_mark, 77, "Test for insert and lookup mark value"); + ASSERT_EQ(skel->bss->test_status, IPS_STATUS_MASK, "Test for ct status update "); ASSERT_EQ(skel->data->test_exist_lookup, 0, "Test existing connection lookup"); ASSERT_EQ(skel->bss->test_exist_lookup_mark, 43, "Test existing connection lookup ctmark"); + ASSERT_EQ(skel->data->test_snat_addr, 0, "Test for source natting"); + ASSERT_EQ(skel->data->test_dnat_addr, 0, "Test for destination natting"); end: if (srv_client_fd != -1) close(srv_client_fd); diff --git a/tools/testing/selftests/bpf/prog_tests/bpf_tcp_ca.c b/tools/testing/selftests/bpf/prog_tests/bpf_tcp_ca.c index 2959a52ced06..e980188d4124 100644 --- a/tools/testing/selftests/bpf/prog_tests/bpf_tcp_ca.c +++ b/tools/testing/selftests/bpf/prog_tests/bpf_tcp_ca.c @@ -290,6 +290,10 @@ static void test_dctcp_fallback(void) goto done; ASSERT_STREQ(dctcp_skel->bss->cc_res, "cubic", "cc_res"); ASSERT_EQ(dctcp_skel->bss->tcp_cdg_res, -ENOTSUPP, "tcp_cdg_res"); + /* All setsockopt(TCP_CONGESTION) in the recurred + * bpf_dctcp->init() should fail with -EBUSY. + */ + ASSERT_EQ(dctcp_skel->bss->ebusy_cnt, 3, "ebusy_cnt"); err = getsockopt(srv_fd, SOL_TCP, TCP_CONGESTION, srv_cc, &cc_len); if (!ASSERT_OK(err, "getsockopt(srv_fd, TCP_CONGESTION)")) diff --git a/tools/testing/selftests/bpf/prog_tests/btf_dump.c b/tools/testing/selftests/bpf/prog_tests/btf_dump.c index b1ca954ed1e5..24da335482d4 100644 --- a/tools/testing/selftests/bpf/prog_tests/btf_dump.c +++ b/tools/testing/selftests/bpf/prog_tests/btf_dump.c @@ -764,7 +764,7 @@ static void test_btf_dump_struct_data(struct btf *btf, struct btf_dump *d, /* union with nested struct */ TEST_BTF_DUMP_DATA(btf, d, "union", str, union bpf_iter_link_info, BTF_F_COMPACT, - "(union bpf_iter_link_info){.map = (struct){.map_fd = (__u32)1,},.cgroup = (struct){.order = (enum bpf_cgroup_iter_order)BPF_CGROUP_ITER_SELF_ONLY,.cgroup_fd = (__u32)1,},}", + "(union bpf_iter_link_info){.map = (struct){.map_fd = (__u32)1,},.cgroup = (struct){.order = (enum bpf_cgroup_iter_order)BPF_CGROUP_ITER_SELF_ONLY,.cgroup_fd = (__u32)1,},.task = (struct){.tid = (__u32)1,.pid = (__u32)1,},}", { .cgroup = { .order = 1, .cgroup_fd = 1, }}); /* struct skb with nested structs/unions; because type output is so diff --git a/tools/testing/selftests/bpf/prog_tests/btf_skc_cls_ingress.c b/tools/testing/selftests/bpf/prog_tests/btf_skc_cls_ingress.c index 664ffc0364f4..7a277035c275 100644 --- a/tools/testing/selftests/bpf/prog_tests/btf_skc_cls_ingress.c +++ b/tools/testing/selftests/bpf/prog_tests/btf_skc_cls_ingress.c @@ -22,26 +22,6 @@ static __u32 duration; #define PROG_PIN_FILE "/sys/fs/bpf/btf_skc_cls_ingress" -static int write_sysctl(const char *sysctl, const char *value) -{ - int fd, err, len; - - fd = open(sysctl, O_WRONLY); - if (CHECK(fd == -1, "open sysctl", "open(%s): %s (%d)\n", - sysctl, strerror(errno), errno)) - return -1; - - len = strlen(value); - err = write(fd, value, len); - close(fd); - if (CHECK(err != len, "write sysctl", - "write(%s, %s, %d): err:%d %s (%d)\n", - sysctl, value, len, err, strerror(errno), errno)) - return -1; - - return 0; -} - static int prepare_netns(void) { if (CHECK(unshare(CLONE_NEWNET), "create netns", diff --git a/tools/testing/selftests/bpf/prog_tests/cgroup_hierarchical_stats.c b/tools/testing/selftests/bpf/prog_tests/cgroup_hierarchical_stats.c index bed1661596f7..3bd27d2ea668 100644 --- a/tools/testing/selftests/bpf/prog_tests/cgroup_hierarchical_stats.c +++ b/tools/testing/selftests/bpf/prog_tests/cgroup_hierarchical_stats.c @@ -1,6 +1,22 @@ // SPDX-License-Identifier: GPL-2.0-only /* - * Functions to manage eBPF programs attached to cgroup subsystems + * This test makes sure BPF stats collection using rstat works correctly. + * The test uses 3 BPF progs: + * (a) counter: This BPF prog is invoked every time we attach a process to a + * cgroup and locklessly increments a percpu counter. + * The program then calls cgroup_rstat_updated() to inform rstat + * of an update on the (cpu, cgroup) pair. + * + * (b) flusher: This BPF prog is invoked when an rstat flush is ongoing, it + * aggregates all percpu counters to a total counter, and also + * propagates the changes to the ancestor cgroups. + * + * (c) dumper: This BPF prog is a cgroup_iter. It is used to output the total + * counter of a cgroup through reading a file in userspace. + * + * The test sets up a cgroup hierarchy, and the above programs. It spawns a few + * processes in the leaf cgroups and makes sure all the counters are aggregated + * correctly. * * Copyright 2022 Google LLC. */ @@ -21,8 +37,10 @@ #define PAGE_SIZE 4096 #define MB(x) (x << 20) +#define PROCESSES_PER_CGROUP 3 + #define BPFFS_ROOT "/sys/fs/bpf/" -#define BPFFS_VMSCAN BPFFS_ROOT"vmscan/" +#define BPFFS_ATTACH_COUNTERS BPFFS_ROOT "attach_counters/" #define CG_ROOT_NAME "root" #define CG_ROOT_ID 1 @@ -79,7 +97,7 @@ static int setup_bpffs(void) return err; /* Create a directory to contain stat files in bpffs */ - err = mkdir(BPFFS_VMSCAN, 0755); + err = mkdir(BPFFS_ATTACH_COUNTERS, 0755); if (!ASSERT_OK(err, "mkdir")) return err; @@ -89,7 +107,7 @@ static int setup_bpffs(void) static void cleanup_bpffs(void) { /* Remove created directory in bpffs */ - ASSERT_OK(rmdir(BPFFS_VMSCAN), "rmdir "BPFFS_VMSCAN); + ASSERT_OK(rmdir(BPFFS_ATTACH_COUNTERS), "rmdir "BPFFS_ATTACH_COUNTERS); /* Unmount bpffs, if it wasn't already mounted when we started */ if (mounted_bpffs) @@ -118,18 +136,6 @@ static int setup_cgroups(void) cgroups[i].fd = fd; cgroups[i].id = get_cgroup_id(cgroups[i].path); - - /* - * Enable memcg controller for the entire hierarchy. - * Note that stats are collected for all cgroups in a hierarchy - * with memcg enabled anyway, but are only exposed for cgroups - * that have memcg enabled. - */ - if (i < N_NON_LEAF_CGROUPS) { - err = enable_controllers(cgroups[i].path, "memory"); - if (!ASSERT_OK(err, "enable_controllers")) - return err; - } } return 0; } @@ -154,109 +160,85 @@ static void destroy_hierarchy(void) cleanup_bpffs(); } -static int reclaimer(const char *cgroup_path, size_t size) -{ - static char size_buf[128]; - char *buf, *ptr; - int err; - - /* Join cgroup in the parent process workdir */ - if (join_parent_cgroup(cgroup_path)) - return EACCES; - - /* Allocate memory */ - buf = malloc(size); - if (!buf) - return ENOMEM; - - /* Write to memory to make sure it's actually allocated */ - for (ptr = buf; ptr < buf + size; ptr += PAGE_SIZE) - *ptr = 1; - - /* Try to reclaim memory */ - snprintf(size_buf, 128, "%lu", size); - err = write_cgroup_file_parent(cgroup_path, "memory.reclaim", size_buf); - - free(buf); - /* memory.reclaim returns EAGAIN if the amount is not fully reclaimed */ - if (err && errno != EAGAIN) - return errno; - - return 0; -} - -static int induce_vmscan(void) +static int attach_processes(void) { - int i, status; + int i, j, status; - /* - * In every leaf cgroup, run a child process that allocates some memory - * and attempts to reclaim some of it. - */ + /* In every leaf cgroup, attach 3 processes */ for (i = N_NON_LEAF_CGROUPS; i < N_CGROUPS; i++) { - pid_t pid; - - /* Create reclaimer child */ - pid = fork(); - if (pid == 0) { - status = reclaimer(cgroups[i].path, MB(5)); - exit(status); + for (j = 0; j < PROCESSES_PER_CGROUP; j++) { + pid_t pid; + + /* Create child and attach to cgroup */ + pid = fork(); + if (pid == 0) { + if (join_parent_cgroup(cgroups[i].path)) + exit(EACCES); + exit(0); + } + + /* Cleanup child */ + waitpid(pid, &status, 0); + if (!ASSERT_TRUE(WIFEXITED(status), "child process exited")) + return 1; + if (!ASSERT_EQ(WEXITSTATUS(status), 0, + "child process exit code")) + return 1; } - - /* Cleanup reclaimer child */ - waitpid(pid, &status, 0); - ASSERT_TRUE(WIFEXITED(status), "reclaimer exited"); - ASSERT_EQ(WEXITSTATUS(status), 0, "reclaim exit code"); } return 0; } static unsigned long long -get_cgroup_vmscan_delay(unsigned long long cgroup_id, const char *file_name) +get_attach_counter(unsigned long long cgroup_id, const char *file_name) { - unsigned long long vmscan = 0, id = 0; + unsigned long long attach_counter = 0, id = 0; static char buf[128], path[128]; /* For every cgroup, read the file generated by cgroup_iter */ - snprintf(path, 128, "%s%s", BPFFS_VMSCAN, file_name); + snprintf(path, 128, "%s%s", BPFFS_ATTACH_COUNTERS, file_name); if (!ASSERT_OK(read_from_file(path, buf, 128), "read cgroup_iter")) return 0; /* Check the output file formatting */ - ASSERT_EQ(sscanf(buf, "cg_id: %llu, total_vmscan_delay: %llu\n", - &id, &vmscan), 2, "output format"); + ASSERT_EQ(sscanf(buf, "cg_id: %llu, attach_counter: %llu\n", + &id, &attach_counter), 2, "output format"); /* Check that the cgroup_id is displayed correctly */ ASSERT_EQ(id, cgroup_id, "cgroup_id"); - /* Check that the vmscan reading is non-zero */ - ASSERT_GT(vmscan, 0, "vmscan_reading"); - return vmscan; + /* Check that the counter is non-zero */ + ASSERT_GT(attach_counter, 0, "attach counter non-zero"); + return attach_counter; } -static void check_vmscan_stats(void) +static void check_attach_counters(void) { - unsigned long long vmscan_readings[N_CGROUPS], vmscan_root; + unsigned long long attach_counters[N_CGROUPS], root_attach_counter; int i; - for (i = 0; i < N_CGROUPS; i++) { - vmscan_readings[i] = get_cgroup_vmscan_delay(cgroups[i].id, - cgroups[i].name); - } + for (i = 0; i < N_CGROUPS; i++) + attach_counters[i] = get_attach_counter(cgroups[i].id, + cgroups[i].name); /* Read stats for root too */ - vmscan_root = get_cgroup_vmscan_delay(CG_ROOT_ID, CG_ROOT_NAME); + root_attach_counter = get_attach_counter(CG_ROOT_ID, CG_ROOT_NAME); + + /* Check that all leafs cgroups have an attach counter of 3 */ + for (i = N_NON_LEAF_CGROUPS; i < N_CGROUPS; i++) + ASSERT_EQ(attach_counters[i], PROCESSES_PER_CGROUP, + "leaf cgroup attach counter"); /* Check that child1 == child1_1 + child1_2 */ - ASSERT_EQ(vmscan_readings[1], vmscan_readings[3] + vmscan_readings[4], - "child1_vmscan"); + ASSERT_EQ(attach_counters[1], attach_counters[3] + attach_counters[4], + "child1_counter"); /* Check that child2 == child2_1 + child2_2 */ - ASSERT_EQ(vmscan_readings[2], vmscan_readings[5] + vmscan_readings[6], - "child2_vmscan"); + ASSERT_EQ(attach_counters[2], attach_counters[5] + attach_counters[6], + "child2_counter"); /* Check that test == child1 + child2 */ - ASSERT_EQ(vmscan_readings[0], vmscan_readings[1] + vmscan_readings[2], - "test_vmscan"); + ASSERT_EQ(attach_counters[0], attach_counters[1] + attach_counters[2], + "test_counter"); /* Check that root >= test */ - ASSERT_GE(vmscan_root, vmscan_readings[1], "root_vmscan"); + ASSERT_GE(root_attach_counter, attach_counters[1], "root_counter"); } /* Creates iter link and pins in bpffs, returns 0 on success, -errno on failure. @@ -278,12 +260,12 @@ static int setup_cgroup_iter(struct cgroup_hierarchical_stats *obj, linfo.cgroup.order = BPF_CGROUP_ITER_SELF_ONLY; opts.link_info = &linfo; opts.link_info_len = sizeof(linfo); - link = bpf_program__attach_iter(obj->progs.dump_vmscan, &opts); + link = bpf_program__attach_iter(obj->progs.dumper, &opts); if (!ASSERT_OK_PTR(link, "attach_iter")) return -EFAULT; /* Pin the link to a bpffs file */ - snprintf(path, 128, "%s%s", BPFFS_VMSCAN, file_name); + snprintf(path, 128, "%s%s", BPFFS_ATTACH_COUNTERS, file_name); err = bpf_link__pin(link, path); ASSERT_OK(err, "pin cgroup_iter"); @@ -313,7 +295,7 @@ static int setup_progs(struct cgroup_hierarchical_stats **skel) if (!ASSERT_OK(err, "setup_cgroup_iter")) return err; - bpf_program__set_autoattach((*skel)->progs.dump_vmscan, false); + bpf_program__set_autoattach((*skel)->progs.dumper, false); err = cgroup_hierarchical_stats__attach(*skel); if (!ASSERT_OK(err, "attach")) return err; @@ -328,13 +310,13 @@ static void destroy_progs(struct cgroup_hierarchical_stats *skel) for (i = 0; i < N_CGROUPS; i++) { /* Delete files in bpffs that cgroup_iters are pinned in */ - snprintf(path, 128, "%s%s", BPFFS_VMSCAN, + snprintf(path, 128, "%s%s", BPFFS_ATTACH_COUNTERS, cgroups[i].name); ASSERT_OK(remove(path), "remove cgroup_iter pin"); } /* Delete root file in bpffs */ - snprintf(path, 128, "%s%s", BPFFS_VMSCAN, CG_ROOT_NAME); + snprintf(path, 128, "%s%s", BPFFS_ATTACH_COUNTERS, CG_ROOT_NAME); ASSERT_OK(remove(path), "remove cgroup_iter root pin"); cgroup_hierarchical_stats__destroy(skel); } @@ -347,9 +329,9 @@ void test_cgroup_hierarchical_stats(void) goto hierarchy_cleanup; if (setup_progs(&skel)) goto cleanup; - if (induce_vmscan()) + if (attach_processes()) goto cleanup; - check_vmscan_stats(); + check_attach_counters(); cleanup: destroy_progs(skel); hierarchy_cleanup: diff --git a/tools/testing/selftests/bpf/prog_tests/connect_ping.c b/tools/testing/selftests/bpf/prog_tests/connect_ping.c new file mode 100644 index 000000000000..289218c2216c --- /dev/null +++ b/tools/testing/selftests/bpf/prog_tests/connect_ping.c @@ -0,0 +1,178 @@ +// SPDX-License-Identifier: GPL-2.0-only + +/* + * Copyright 2022 Google LLC. + */ + +#define _GNU_SOURCE +#include <sys/mount.h> + +#include "test_progs.h" +#include "cgroup_helpers.h" +#include "network_helpers.h" + +#include "connect_ping.skel.h" + +/* 2001:db8::1 */ +#define BINDADDR_V6 { { { 0x20,0x01,0x0d,0xb8,0,0,0,0,0,0,0,0,0,0,0,1 } } } +static const struct in6_addr bindaddr_v6 = BINDADDR_V6; + +static void subtest(int cgroup_fd, struct connect_ping *skel, + int family, int do_bind) +{ + struct sockaddr_in sa4 = { + .sin_family = AF_INET, + .sin_addr.s_addr = htonl(INADDR_LOOPBACK), + }; + struct sockaddr_in6 sa6 = { + .sin6_family = AF_INET6, + .sin6_addr = IN6ADDR_LOOPBACK_INIT, + }; + struct sockaddr *sa; + socklen_t sa_len; + int protocol; + int sock_fd; + + switch (family) { + case AF_INET: + sa = (struct sockaddr *)&sa4; + sa_len = sizeof(sa4); + protocol = IPPROTO_ICMP; + break; + case AF_INET6: + sa = (struct sockaddr *)&sa6; + sa_len = sizeof(sa6); + protocol = IPPROTO_ICMPV6; + break; + } + + memset(skel->bss, 0, sizeof(*skel->bss)); + skel->bss->do_bind = do_bind; + + sock_fd = socket(family, SOCK_DGRAM, protocol); + if (!ASSERT_GE(sock_fd, 0, "sock-create")) + return; + + if (!ASSERT_OK(connect(sock_fd, sa, sa_len), "connect")) + goto close_sock; + + if (!ASSERT_EQ(skel->bss->invocations_v4, family == AF_INET ? 1 : 0, + "invocations_v4")) + goto close_sock; + if (!ASSERT_EQ(skel->bss->invocations_v6, family == AF_INET6 ? 1 : 0, + "invocations_v6")) + goto close_sock; + if (!ASSERT_EQ(skel->bss->has_error, 0, "has_error")) + goto close_sock; + + if (!ASSERT_OK(getsockname(sock_fd, sa, &sa_len), + "getsockname")) + goto close_sock; + + switch (family) { + case AF_INET: + if (!ASSERT_EQ(sa4.sin_family, family, "sin_family")) + goto close_sock; + if (!ASSERT_EQ(sa4.sin_addr.s_addr, + htonl(do_bind ? 0x01010101 : INADDR_LOOPBACK), + "sin_addr")) + goto close_sock; + break; + case AF_INET6: + if (!ASSERT_EQ(sa6.sin6_family, AF_INET6, "sin6_family")) + goto close_sock; + if (!ASSERT_EQ(memcmp(&sa6.sin6_addr, + do_bind ? &bindaddr_v6 : &in6addr_loopback, + sizeof(sa6.sin6_addr)), + 0, "sin6_addr")) + goto close_sock; + break; + } + +close_sock: + close(sock_fd); +} + +void test_connect_ping(void) +{ + struct connect_ping *skel; + int cgroup_fd; + + if (!ASSERT_OK(unshare(CLONE_NEWNET | CLONE_NEWNS), "unshare")) + return; + + /* overmount sysfs, and making original sysfs private so overmount + * does not propagate to other mntns. + */ + if (!ASSERT_OK(mount("none", "/sys", NULL, MS_PRIVATE, NULL), + "remount-private-sys")) + return; + if (!ASSERT_OK(mount("sysfs", "/sys", "sysfs", 0, NULL), + "mount-sys")) + return; + if (!ASSERT_OK(mount("bpffs", "/sys/fs/bpf", "bpf", 0, NULL), + "mount-bpf")) + goto clean_mount; + + if (!ASSERT_OK(system("ip link set dev lo up"), "lo-up")) + goto clean_mount; + if (!ASSERT_OK(system("ip addr add 1.1.1.1 dev lo"), "lo-addr-v4")) + goto clean_mount; + if (!ASSERT_OK(system("ip -6 addr add 2001:db8::1 dev lo"), "lo-addr-v6")) + goto clean_mount; + if (write_sysctl("/proc/sys/net/ipv4/ping_group_range", "0 0")) + goto clean_mount; + + cgroup_fd = test__join_cgroup("/connect_ping"); + if (!ASSERT_GE(cgroup_fd, 0, "cg-create")) + goto clean_mount; + + skel = connect_ping__open_and_load(); + if (!ASSERT_OK_PTR(skel, "skel-load")) + goto close_cgroup; + skel->links.connect_v4_prog = + bpf_program__attach_cgroup(skel->progs.connect_v4_prog, cgroup_fd); + if (!ASSERT_OK_PTR(skel->links.connect_v4_prog, "cg-attach-v4")) + goto skel_destroy; + skel->links.connect_v6_prog = + bpf_program__attach_cgroup(skel->progs.connect_v6_prog, cgroup_fd); + if (!ASSERT_OK_PTR(skel->links.connect_v6_prog, "cg-attach-v6")) + goto skel_destroy; + + /* Connect a v4 ping socket to localhost, assert that only v4 is called, + * and called exactly once, and that the socket's bound address is + * original loopback address. + */ + if (test__start_subtest("ipv4")) + subtest(cgroup_fd, skel, AF_INET, 0); + + /* Connect a v4 ping socket to localhost, assert that only v4 is called, + * and called exactly once, and that the socket's bound address is + * address we explicitly bound. + */ + if (test__start_subtest("ipv4-bind")) + subtest(cgroup_fd, skel, AF_INET, 1); + + /* Connect a v6 ping socket to localhost, assert that only v6 is called, + * and called exactly once, and that the socket's bound address is + * original loopback address. + */ + if (test__start_subtest("ipv6")) + subtest(cgroup_fd, skel, AF_INET6, 0); + + /* Connect a v6 ping socket to localhost, assert that only v6 is called, + * and called exactly once, and that the socket's bound address is + * address we explicitly bound. + */ + if (test__start_subtest("ipv6-bind")) + subtest(cgroup_fd, skel, AF_INET6, 1); + +skel_destroy: + connect_ping__destroy(skel); + +close_cgroup: + close(cgroup_fd); + +clean_mount: + umount2("/sys", MNT_DETACH); +} diff --git a/tools/testing/selftests/bpf/prog_tests/dynptr.c b/tools/testing/selftests/bpf/prog_tests/dynptr.c index bcf80b9f7c27..8fc4e6c02bfd 100644 --- a/tools/testing/selftests/bpf/prog_tests/dynptr.c +++ b/tools/testing/selftests/bpf/prog_tests/dynptr.c @@ -30,7 +30,7 @@ static struct { {"invalid_helper2", "Expected an initialized dynptr as arg #3"}, {"invalid_write1", "Expected an initialized dynptr as arg #1"}, {"invalid_write2", "Expected an initialized dynptr as arg #3"}, - {"invalid_write3", "Expected an initialized ringbuf dynptr as arg #1"}, + {"invalid_write3", "Expected an initialized dynptr as arg #1"}, {"invalid_write4", "arg 1 is an unacquired reference"}, {"invalid_read1", "invalid read from stack"}, {"invalid_read2", "cannot pass in dynptr at an offset"}, diff --git a/tools/testing/selftests/bpf/prog_tests/get_func_ip_test.c b/tools/testing/selftests/bpf/prog_tests/get_func_ip_test.c index 938dbd4d7c2f..fede8ef58b5b 100644 --- a/tools/testing/selftests/bpf/prog_tests/get_func_ip_test.c +++ b/tools/testing/selftests/bpf/prog_tests/get_func_ip_test.c @@ -2,7 +2,7 @@ #include <test_progs.h> #include "get_func_ip_test.skel.h" -void test_get_func_ip_test(void) +static void test_function_entry(void) { struct get_func_ip_test *skel = NULL; int err, prog_fd; @@ -12,14 +12,6 @@ void test_get_func_ip_test(void) if (!ASSERT_OK_PTR(skel, "get_func_ip_test__open")) return; - /* test6 is x86_64 specifc because of the instruction - * offset, disabling it for all other archs - */ -#ifndef __x86_64__ - bpf_program__set_autoload(skel->progs.test6, false); - bpf_program__set_autoload(skel->progs.test7, false); -#endif - err = get_func_ip_test__load(skel); if (!ASSERT_OK(err, "get_func_ip_test__load")) goto cleanup; @@ -43,11 +35,56 @@ void test_get_func_ip_test(void) ASSERT_EQ(skel->bss->test3_result, 1, "test3_result"); ASSERT_EQ(skel->bss->test4_result, 1, "test4_result"); ASSERT_EQ(skel->bss->test5_result, 1, "test5_result"); + +cleanup: + get_func_ip_test__destroy(skel); +} + +/* test6 is x86_64 specific because of the instruction + * offset, disabling it for all other archs + */ #ifdef __x86_64__ +static void test_function_body(void) +{ + struct get_func_ip_test *skel = NULL; + LIBBPF_OPTS(bpf_test_run_opts, topts); + LIBBPF_OPTS(bpf_kprobe_opts, kopts); + struct bpf_link *link6 = NULL; + int err, prog_fd; + + skel = get_func_ip_test__open(); + if (!ASSERT_OK_PTR(skel, "get_func_ip_test__open")) + return; + + bpf_program__set_autoload(skel->progs.test6, true); + + err = get_func_ip_test__load(skel); + if (!ASSERT_OK(err, "get_func_ip_test__load")) + goto cleanup; + + kopts.offset = skel->kconfig->CONFIG_X86_KERNEL_IBT ? 9 : 5; + + link6 = bpf_program__attach_kprobe_opts(skel->progs.test6, "bpf_fentry_test6", &kopts); + if (!ASSERT_OK_PTR(link6, "link6")) + goto cleanup; + + prog_fd = bpf_program__fd(skel->progs.test1); + err = bpf_prog_test_run_opts(prog_fd, &topts); + ASSERT_OK(err, "test_run"); + ASSERT_EQ(topts.retval, 0, "test_run"); + ASSERT_EQ(skel->bss->test6_result, 1, "test6_result"); - ASSERT_EQ(skel->bss->test7_result, 1, "test7_result"); -#endif cleanup: + bpf_link__destroy(link6); get_func_ip_test__destroy(skel); } +#else +#define test_function_body() +#endif + +void test_get_func_ip_test(void) +{ + test_function_entry(); + test_function_body(); +} diff --git a/tools/testing/selftests/bpf/prog_tests/kfunc_call.c b/tools/testing/selftests/bpf/prog_tests/kfunc_call.c index eede7c304f86..5af1ee8f0e6e 100644 --- a/tools/testing/selftests/bpf/prog_tests/kfunc_call.c +++ b/tools/testing/selftests/bpf/prog_tests/kfunc_call.c @@ -2,6 +2,8 @@ /* Copyright (c) 2021 Facebook */ #include <test_progs.h> #include <network_helpers.h> +#include "kfunc_call_fail.skel.h" +#include "kfunc_call_test.skel.h" #include "kfunc_call_test.lskel.h" #include "kfunc_call_test_subprog.skel.h" #include "kfunc_call_test_subprog.lskel.h" @@ -9,36 +11,220 @@ #include "cap_helpers.h" -static void test_main(void) +static size_t log_buf_sz = 1048576; /* 1 MB */ +static char obj_log_buf[1048576]; + +enum kfunc_test_type { + tc_test = 0, + syscall_test, + syscall_null_ctx_test, +}; + +struct kfunc_test_params { + const char *prog_name; + unsigned long lskel_prog_desc_offset; + int retval; + enum kfunc_test_type test_type; + const char *expected_err_msg; +}; + +#define __BPF_TEST_SUCCESS(name, __retval, type) \ + { \ + .prog_name = #name, \ + .lskel_prog_desc_offset = offsetof(struct kfunc_call_test_lskel, progs.name), \ + .retval = __retval, \ + .test_type = type, \ + .expected_err_msg = NULL, \ + } + +#define __BPF_TEST_FAIL(name, __retval, type, error_msg) \ + { \ + .prog_name = #name, \ + .lskel_prog_desc_offset = 0 /* unused when test is failing */, \ + .retval = __retval, \ + .test_type = type, \ + .expected_err_msg = error_msg, \ + } + +#define TC_TEST(name, retval) __BPF_TEST_SUCCESS(name, retval, tc_test) +#define SYSCALL_TEST(name, retval) __BPF_TEST_SUCCESS(name, retval, syscall_test) +#define SYSCALL_NULL_CTX_TEST(name, retval) __BPF_TEST_SUCCESS(name, retval, syscall_null_ctx_test) + +#define TC_FAIL(name, retval, error_msg) __BPF_TEST_FAIL(name, retval, tc_test, error_msg) +#define SYSCALL_NULL_CTX_FAIL(name, retval, error_msg) \ + __BPF_TEST_FAIL(name, retval, syscall_null_ctx_test, error_msg) + +static struct kfunc_test_params kfunc_tests[] = { + /* failure cases: + * if retval is 0 -> the program will fail to load and the error message is an error + * if retval is not 0 -> the program can be loaded but running it will gives the + * provided return value. The error message is thus the one + * from a successful load + */ + SYSCALL_NULL_CTX_FAIL(kfunc_syscall_test_fail, -EINVAL, "processed 4 insns"), + SYSCALL_NULL_CTX_FAIL(kfunc_syscall_test_null_fail, -EINVAL, "processed 4 insns"), + TC_FAIL(kfunc_call_test_get_mem_fail_rdonly, 0, "R0 cannot write into rdonly_mem"), + TC_FAIL(kfunc_call_test_get_mem_fail_use_after_free, 0, "invalid mem access 'scalar'"), + TC_FAIL(kfunc_call_test_get_mem_fail_oob, 0, "min value is outside of the allowed memory range"), + TC_FAIL(kfunc_call_test_get_mem_fail_not_const, 0, "is not a const"), + TC_FAIL(kfunc_call_test_mem_acquire_fail, 0, "acquire kernel function does not return PTR_TO_BTF_ID"), + + /* success cases */ + TC_TEST(kfunc_call_test1, 12), + TC_TEST(kfunc_call_test2, 3), + TC_TEST(kfunc_call_test_ref_btf_id, 0), + TC_TEST(kfunc_call_test_get_mem, 42), + SYSCALL_TEST(kfunc_syscall_test, 0), + SYSCALL_NULL_CTX_TEST(kfunc_syscall_test_null, 0), +}; + +struct syscall_test_args { + __u8 data[16]; + size_t size; +}; + +static void verify_success(struct kfunc_test_params *param) { - struct kfunc_call_test_lskel *skel; + struct kfunc_call_test_lskel *lskel = NULL; + LIBBPF_OPTS(bpf_test_run_opts, topts); + struct bpf_prog_desc *lskel_prog; + struct kfunc_call_test *skel; + struct bpf_program *prog; int prog_fd, err; - LIBBPF_OPTS(bpf_test_run_opts, topts, - .data_in = &pkt_v4, - .data_size_in = sizeof(pkt_v4), - .repeat = 1, - ); + struct syscall_test_args args = { + .size = 10, + }; + + switch (param->test_type) { + case syscall_test: + topts.ctx_in = &args; + topts.ctx_size_in = sizeof(args); + /* fallthrough */ + case syscall_null_ctx_test: + break; + case tc_test: + topts.data_in = &pkt_v4; + topts.data_size_in = sizeof(pkt_v4); + topts.repeat = 1; + break; + } - skel = kfunc_call_test_lskel__open_and_load(); + /* first test with normal libbpf */ + skel = kfunc_call_test__open_and_load(); if (!ASSERT_OK_PTR(skel, "skel")) return; - prog_fd = skel->progs.kfunc_call_test1.prog_fd; - err = bpf_prog_test_run_opts(prog_fd, &topts); - ASSERT_OK(err, "bpf_prog_test_run(test1)"); - ASSERT_EQ(topts.retval, 12, "test1-retval"); + prog = bpf_object__find_program_by_name(skel->obj, param->prog_name); + if (!ASSERT_OK_PTR(prog, "bpf_object__find_program_by_name")) + goto cleanup; - prog_fd = skel->progs.kfunc_call_test2.prog_fd; + prog_fd = bpf_program__fd(prog); err = bpf_prog_test_run_opts(prog_fd, &topts); - ASSERT_OK(err, "bpf_prog_test_run(test2)"); - ASSERT_EQ(topts.retval, 3, "test2-retval"); + if (!ASSERT_OK(err, param->prog_name)) + goto cleanup; + + if (!ASSERT_EQ(topts.retval, param->retval, "retval")) + goto cleanup; + + /* second test with light skeletons */ + lskel = kfunc_call_test_lskel__open_and_load(); + if (!ASSERT_OK_PTR(lskel, "lskel")) + goto cleanup; - prog_fd = skel->progs.kfunc_call_test_ref_btf_id.prog_fd; + lskel_prog = (struct bpf_prog_desc *)((char *)lskel + param->lskel_prog_desc_offset); + + prog_fd = lskel_prog->prog_fd; err = bpf_prog_test_run_opts(prog_fd, &topts); - ASSERT_OK(err, "bpf_prog_test_run(test_ref_btf_id)"); - ASSERT_EQ(topts.retval, 0, "test_ref_btf_id-retval"); + if (!ASSERT_OK(err, param->prog_name)) + goto cleanup; + + ASSERT_EQ(topts.retval, param->retval, "retval"); + +cleanup: + kfunc_call_test__destroy(skel); + if (lskel) + kfunc_call_test_lskel__destroy(lskel); +} + +static void verify_fail(struct kfunc_test_params *param) +{ + LIBBPF_OPTS(bpf_object_open_opts, opts); + LIBBPF_OPTS(bpf_test_run_opts, topts); + struct bpf_program *prog; + struct kfunc_call_fail *skel; + int prog_fd, err; + struct syscall_test_args args = { + .size = 10, + }; + + opts.kernel_log_buf = obj_log_buf; + opts.kernel_log_size = log_buf_sz; + opts.kernel_log_level = 1; + + switch (param->test_type) { + case syscall_test: + topts.ctx_in = &args; + topts.ctx_size_in = sizeof(args); + /* fallthrough */ + case syscall_null_ctx_test: + break; + case tc_test: + topts.data_in = &pkt_v4; + topts.data_size_in = sizeof(pkt_v4); + break; + topts.repeat = 1; + } + + skel = kfunc_call_fail__open_opts(&opts); + if (!ASSERT_OK_PTR(skel, "kfunc_call_fail__open_opts")) + goto cleanup; + + prog = bpf_object__find_program_by_name(skel->obj, param->prog_name); + if (!ASSERT_OK_PTR(prog, "bpf_object__find_program_by_name")) + goto cleanup; + + bpf_program__set_autoload(prog, true); + + err = kfunc_call_fail__load(skel); + if (!param->retval) { + /* the verifier is supposed to complain and refuses to load */ + if (!ASSERT_ERR(err, "unexpected load success")) + goto out_err; + + } else { + /* the program is loaded but must dynamically fail */ + if (!ASSERT_OK(err, "unexpected load error")) + goto out_err; + + prog_fd = bpf_program__fd(prog); + err = bpf_prog_test_run_opts(prog_fd, &topts); + if (!ASSERT_EQ(err, param->retval, param->prog_name)) + goto out_err; + } + +out_err: + if (!ASSERT_OK_PTR(strstr(obj_log_buf, param->expected_err_msg), "expected_err_msg")) { + fprintf(stderr, "Expected err_msg: %s\n", param->expected_err_msg); + fprintf(stderr, "Verifier output: %s\n", obj_log_buf); + } + +cleanup: + kfunc_call_fail__destroy(skel); +} + +static void test_main(void) +{ + int i; + + for (i = 0; i < ARRAY_SIZE(kfunc_tests); i++) { + if (!test__start_subtest(kfunc_tests[i].prog_name)) + continue; - kfunc_call_test_lskel__destroy(skel); + if (!kfunc_tests[i].expected_err_msg) + verify_success(&kfunc_tests[i]); + else + verify_fail(&kfunc_tests[i]); + } } static void test_subprog(void) @@ -121,8 +307,7 @@ static void test_destructive(void) void test_kfunc_call(void) { - if (test__start_subtest("main")) - test_main(); + test_main(); if (test__start_subtest("subprog")) test_subprog(); diff --git a/tools/testing/selftests/bpf/prog_tests/kfunc_dynptr_param.c b/tools/testing/selftests/bpf/prog_tests/kfunc_dynptr_param.c new file mode 100644 index 000000000000..c210657d4d0a --- /dev/null +++ b/tools/testing/selftests/bpf/prog_tests/kfunc_dynptr_param.c @@ -0,0 +1,164 @@ +// SPDX-License-Identifier: GPL-2.0 + +/* + * Copyright (c) 2022 Facebook + * Copyright (C) 2022 Huawei Technologies Duesseldorf GmbH + * + * Author: Roberto Sassu <roberto.sassu@huawei.com> + */ + +#include <test_progs.h> +#include "test_kfunc_dynptr_param.skel.h" + +static size_t log_buf_sz = 1048576; /* 1 MB */ +static char obj_log_buf[1048576]; + +static struct { + const char *prog_name; + const char *expected_verifier_err_msg; + int expected_runtime_err; +} kfunc_dynptr_tests[] = { + {"dynptr_type_not_supp", + "arg#0 pointer type STRUCT bpf_dynptr_kern points to unsupported dynamic pointer type", 0}, + {"not_valid_dynptr", + "arg#0 pointer type STRUCT bpf_dynptr_kern must be valid and initialized", 0}, + {"not_ptr_to_stack", "arg#0 pointer type STRUCT bpf_dynptr_kern not to stack", 0}, + {"dynptr_data_null", NULL, -EBADMSG}, +}; + +static bool kfunc_not_supported; + +static int libbpf_print_cb(enum libbpf_print_level level, const char *fmt, + va_list args) +{ + if (strcmp(fmt, "libbpf: extern (func ksym) '%s': not found in kernel or module BTFs\n")) + return 0; + + if (strcmp(va_arg(args, char *), "bpf_verify_pkcs7_signature")) + return 0; + + kfunc_not_supported = true; + return 0; +} + +static void verify_fail(const char *prog_name, const char *expected_err_msg) +{ + struct test_kfunc_dynptr_param *skel; + LIBBPF_OPTS(bpf_object_open_opts, opts); + libbpf_print_fn_t old_print_cb; + struct bpf_program *prog; + int err; + + opts.kernel_log_buf = obj_log_buf; + opts.kernel_log_size = log_buf_sz; + opts.kernel_log_level = 1; + + skel = test_kfunc_dynptr_param__open_opts(&opts); + if (!ASSERT_OK_PTR(skel, "test_kfunc_dynptr_param__open_opts")) + goto cleanup; + + prog = bpf_object__find_program_by_name(skel->obj, prog_name); + if (!ASSERT_OK_PTR(prog, "bpf_object__find_program_by_name")) + goto cleanup; + + bpf_program__set_autoload(prog, true); + + bpf_map__set_max_entries(skel->maps.ringbuf, getpagesize()); + + kfunc_not_supported = false; + + old_print_cb = libbpf_set_print(libbpf_print_cb); + err = test_kfunc_dynptr_param__load(skel); + libbpf_set_print(old_print_cb); + + if (err < 0 && kfunc_not_supported) { + fprintf(stderr, + "%s:SKIP:bpf_verify_pkcs7_signature() kfunc not supported\n", + __func__); + test__skip(); + goto cleanup; + } + + if (!ASSERT_ERR(err, "unexpected load success")) + goto cleanup; + + if (!ASSERT_OK_PTR(strstr(obj_log_buf, expected_err_msg), "expected_err_msg")) { + fprintf(stderr, "Expected err_msg: %s\n", expected_err_msg); + fprintf(stderr, "Verifier output: %s\n", obj_log_buf); + } + +cleanup: + test_kfunc_dynptr_param__destroy(skel); +} + +static void verify_success(const char *prog_name, int expected_runtime_err) +{ + struct test_kfunc_dynptr_param *skel; + libbpf_print_fn_t old_print_cb; + struct bpf_program *prog; + struct bpf_link *link; + __u32 next_id; + int err; + + skel = test_kfunc_dynptr_param__open(); + if (!ASSERT_OK_PTR(skel, "test_kfunc_dynptr_param__open")) + return; + + skel->bss->pid = getpid(); + + bpf_map__set_max_entries(skel->maps.ringbuf, getpagesize()); + + kfunc_not_supported = false; + + old_print_cb = libbpf_set_print(libbpf_print_cb); + err = test_kfunc_dynptr_param__load(skel); + libbpf_set_print(old_print_cb); + + if (err < 0 && kfunc_not_supported) { + fprintf(stderr, + "%s:SKIP:bpf_verify_pkcs7_signature() kfunc not supported\n", + __func__); + test__skip(); + goto cleanup; + } + + if (!ASSERT_OK(err, "test_kfunc_dynptr_param__load")) + goto cleanup; + + prog = bpf_object__find_program_by_name(skel->obj, prog_name); + if (!ASSERT_OK_PTR(prog, "bpf_object__find_program_by_name")) + goto cleanup; + + link = bpf_program__attach(prog); + if (!ASSERT_OK_PTR(link, "bpf_program__attach")) + goto cleanup; + + err = bpf_prog_get_next_id(0, &next_id); + + bpf_link__destroy(link); + + if (!ASSERT_OK(err, "bpf_prog_get_next_id")) + goto cleanup; + + ASSERT_EQ(skel->bss->err, expected_runtime_err, "err"); + +cleanup: + test_kfunc_dynptr_param__destroy(skel); +} + +void test_kfunc_dynptr_param(void) +{ + int i; + + for (i = 0; i < ARRAY_SIZE(kfunc_dynptr_tests); i++) { + if (!test__start_subtest(kfunc_dynptr_tests[i].prog_name)) + continue; + + if (kfunc_dynptr_tests[i].expected_verifier_err_msg) + verify_fail(kfunc_dynptr_tests[i].prog_name, + kfunc_dynptr_tests[i].expected_verifier_err_msg); + else + verify_success(kfunc_dynptr_tests[i].prog_name, + kfunc_dynptr_tests[i].expected_runtime_err); + } +} diff --git a/tools/testing/selftests/bpf/prog_tests/lookup_key.c b/tools/testing/selftests/bpf/prog_tests/lookup_key.c new file mode 100644 index 000000000000..68025e88f352 --- /dev/null +++ b/tools/testing/selftests/bpf/prog_tests/lookup_key.c @@ -0,0 +1,112 @@ +// SPDX-License-Identifier: GPL-2.0 + +/* + * Copyright (C) 2022 Huawei Technologies Duesseldorf GmbH + * + * Author: Roberto Sassu <roberto.sassu@huawei.com> + */ + +#include <linux/keyctl.h> +#include <test_progs.h> + +#include "test_lookup_key.skel.h" + +#define KEY_LOOKUP_CREATE 0x01 +#define KEY_LOOKUP_PARTIAL 0x02 + +static bool kfunc_not_supported; + +static int libbpf_print_cb(enum libbpf_print_level level, const char *fmt, + va_list args) +{ + char *func; + + if (strcmp(fmt, "libbpf: extern (func ksym) '%s': not found in kernel or module BTFs\n")) + return 0; + + func = va_arg(args, char *); + + if (strcmp(func, "bpf_lookup_user_key") && strcmp(func, "bpf_key_put") && + strcmp(func, "bpf_lookup_system_key")) + return 0; + + kfunc_not_supported = true; + return 0; +} + +void test_lookup_key(void) +{ + libbpf_print_fn_t old_print_cb; + struct test_lookup_key *skel; + __u32 next_id; + int ret; + + skel = test_lookup_key__open(); + if (!ASSERT_OK_PTR(skel, "test_lookup_key__open")) + return; + + old_print_cb = libbpf_set_print(libbpf_print_cb); + ret = test_lookup_key__load(skel); + libbpf_set_print(old_print_cb); + + if (ret < 0 && kfunc_not_supported) { + printf("%s:SKIP:bpf_lookup_*_key(), bpf_key_put() kfuncs not supported\n", + __func__); + test__skip(); + goto close_prog; + } + + if (!ASSERT_OK(ret, "test_lookup_key__load")) + goto close_prog; + + ret = test_lookup_key__attach(skel); + if (!ASSERT_OK(ret, "test_lookup_key__attach")) + goto close_prog; + + skel->bss->monitored_pid = getpid(); + skel->bss->key_serial = KEY_SPEC_THREAD_KEYRING; + + /* The thread-specific keyring does not exist, this test fails. */ + skel->bss->flags = 0; + + ret = bpf_prog_get_next_id(0, &next_id); + if (!ASSERT_LT(ret, 0, "bpf_prog_get_next_id")) + goto close_prog; + + /* Force creation of the thread-specific keyring, this test succeeds. */ + skel->bss->flags = KEY_LOOKUP_CREATE; + + ret = bpf_prog_get_next_id(0, &next_id); + if (!ASSERT_OK(ret, "bpf_prog_get_next_id")) + goto close_prog; + + /* Pass both lookup flags for parameter validation. */ + skel->bss->flags = KEY_LOOKUP_CREATE | KEY_LOOKUP_PARTIAL; + + ret = bpf_prog_get_next_id(0, &next_id); + if (!ASSERT_OK(ret, "bpf_prog_get_next_id")) + goto close_prog; + + /* Pass invalid flags. */ + skel->bss->flags = UINT64_MAX; + + ret = bpf_prog_get_next_id(0, &next_id); + if (!ASSERT_LT(ret, 0, "bpf_prog_get_next_id")) + goto close_prog; + + skel->bss->key_serial = 0; + skel->bss->key_id = 1; + + ret = bpf_prog_get_next_id(0, &next_id); + if (!ASSERT_OK(ret, "bpf_prog_get_next_id")) + goto close_prog; + + skel->bss->key_id = UINT32_MAX; + + ret = bpf_prog_get_next_id(0, &next_id); + ASSERT_LT(ret, 0, "bpf_prog_get_next_id"); + +close_prog: + skel->bss->monitored_pid = 0; + test_lookup_key__destroy(skel); +} diff --git a/tools/testing/selftests/bpf/prog_tests/sockmap_basic.c b/tools/testing/selftests/bpf/prog_tests/sockmap_basic.c index cec5c0882372..0aa088900699 100644 --- a/tools/testing/selftests/bpf/prog_tests/sockmap_basic.c +++ b/tools/testing/selftests/bpf/prog_tests/sockmap_basic.c @@ -27,21 +27,21 @@ static int connected_socket_v4(void) int s, repair, err; s = socket(AF_INET, SOCK_STREAM, 0); - if (CHECK_FAIL(s == -1)) + if (!ASSERT_GE(s, 0, "socket")) goto error; repair = TCP_REPAIR_ON; err = setsockopt(s, SOL_TCP, TCP_REPAIR, &repair, sizeof(repair)); - if (CHECK_FAIL(err)) + if (!ASSERT_OK(err, "setsockopt(TCP_REPAIR)")) goto error; err = connect(s, (struct sockaddr *)&addr, len); - if (CHECK_FAIL(err)) + if (!ASSERT_OK(err, "connect")) goto error; repair = TCP_REPAIR_OFF_NO_WP; err = setsockopt(s, SOL_TCP, TCP_REPAIR, &repair, sizeof(repair)); - if (CHECK_FAIL(err)) + if (!ASSERT_OK(err, "setsockopt(TCP_REPAIR)")) goto error; return s; @@ -54,7 +54,7 @@ error: static void compare_cookies(struct bpf_map *src, struct bpf_map *dst) { __u32 i, max_entries = bpf_map__max_entries(src); - int err, duration = 0, src_fd, dst_fd; + int err, src_fd, dst_fd; src_fd = bpf_map__fd(src); dst_fd = bpf_map__fd(dst); @@ -65,20 +65,18 @@ static void compare_cookies(struct bpf_map *src, struct bpf_map *dst) err = bpf_map_lookup_elem(src_fd, &i, &src_cookie); if (err && errno == ENOENT) { err = bpf_map_lookup_elem(dst_fd, &i, &dst_cookie); - CHECK(!err, "map_lookup_elem(dst)", "element %u not deleted\n", i); - CHECK(err && errno != ENOENT, "map_lookup_elem(dst)", "%s\n", - strerror(errno)); + ASSERT_ERR(err, "map_lookup_elem(dst)"); + ASSERT_EQ(errno, ENOENT, "map_lookup_elem(dst)"); continue; } - if (CHECK(err, "lookup_elem(src)", "%s\n", strerror(errno))) + if (!ASSERT_OK(err, "lookup_elem(src)")) continue; err = bpf_map_lookup_elem(dst_fd, &i, &dst_cookie); - if (CHECK(err, "lookup_elem(dst)", "%s\n", strerror(errno))) + if (!ASSERT_OK(err, "lookup_elem(dst)")) continue; - CHECK(dst_cookie != src_cookie, "cookie mismatch", - "%llu != %llu (pos %u)\n", dst_cookie, src_cookie, i); + ASSERT_EQ(dst_cookie, src_cookie, "cookie mismatch"); } } @@ -89,20 +87,16 @@ static void test_sockmap_create_update_free(enum bpf_map_type map_type) int s, map, err; s = connected_socket_v4(); - if (CHECK_FAIL(s < 0)) + if (!ASSERT_GE(s, 0, "connected_socket_v4")) return; map = bpf_map_create(map_type, NULL, sizeof(int), sizeof(int), 1, NULL); - if (CHECK_FAIL(map < 0)) { - perror("bpf_cmap_create"); + if (!ASSERT_GE(map, 0, "bpf_map_create")) goto out; - } err = bpf_map_update_elem(map, &zero, &s, BPF_NOEXIST); - if (CHECK_FAIL(err)) { - perror("bpf_map_update"); + if (!ASSERT_OK(err, "bpf_map_update")) goto out; - } out: close(map); @@ -115,32 +109,26 @@ static void test_skmsg_helpers(enum bpf_map_type map_type) int err, map, verdict; skel = test_skmsg_load_helpers__open_and_load(); - if (CHECK_FAIL(!skel)) { - perror("test_skmsg_load_helpers__open_and_load"); + if (!ASSERT_OK_PTR(skel, "test_skmsg_load_helpers__open_and_load")) return; - } verdict = bpf_program__fd(skel->progs.prog_msg_verdict); map = bpf_map__fd(skel->maps.sock_map); err = bpf_prog_attach(verdict, map, BPF_SK_MSG_VERDICT, 0); - if (CHECK_FAIL(err)) { - perror("bpf_prog_attach"); + if (!ASSERT_OK(err, "bpf_prog_attach")) goto out; - } err = bpf_prog_detach2(verdict, map, BPF_SK_MSG_VERDICT); - if (CHECK_FAIL(err)) { - perror("bpf_prog_detach2"); + if (!ASSERT_OK(err, "bpf_prog_detach2")) goto out; - } out: test_skmsg_load_helpers__destroy(skel); } static void test_sockmap_update(enum bpf_map_type map_type) { - int err, prog, src, duration = 0; + int err, prog, src; struct test_sockmap_update *skel; struct bpf_map *dst_map; const __u32 zero = 0; @@ -153,11 +141,11 @@ static void test_sockmap_update(enum bpf_map_type map_type) __s64 sk; sk = connected_socket_v4(); - if (CHECK(sk == -1, "connected_socket_v4", "cannot connect\n")) + if (!ASSERT_NEQ(sk, -1, "connected_socket_v4")) return; skel = test_sockmap_update__open_and_load(); - if (CHECK(!skel, "open_and_load", "cannot load skeleton\n")) + if (!ASSERT_OK_PTR(skel, "open_and_load")) goto close_sk; prog = bpf_program__fd(skel->progs.copy_sock_map); @@ -168,7 +156,7 @@ static void test_sockmap_update(enum bpf_map_type map_type) dst_map = skel->maps.dst_sock_hash; err = bpf_map_update_elem(src, &zero, &sk, BPF_NOEXIST); - if (CHECK(err, "update_elem(src)", "errno=%u\n", errno)) + if (!ASSERT_OK(err, "update_elem(src)")) goto out; err = bpf_prog_test_run_opts(prog, &topts); @@ -188,17 +176,16 @@ close_sk: static void test_sockmap_invalid_update(void) { struct test_sockmap_invalid_update *skel; - int duration = 0; skel = test_sockmap_invalid_update__open_and_load(); - if (CHECK(skel, "open_and_load", "verifier accepted map_update\n")) + if (!ASSERT_NULL(skel, "open_and_load")) test_sockmap_invalid_update__destroy(skel); } static void test_sockmap_copy(enum bpf_map_type map_type) { DECLARE_LIBBPF_OPTS(bpf_iter_attach_opts, opts); - int err, len, src_fd, iter_fd, duration = 0; + int err, len, src_fd, iter_fd; union bpf_iter_link_info linfo = {}; __u32 i, num_sockets, num_elems; struct bpf_iter_sockmap *skel; @@ -208,7 +195,7 @@ static void test_sockmap_copy(enum bpf_map_type map_type) char buf[64]; skel = bpf_iter_sockmap__open_and_load(); - if (CHECK(!skel, "bpf_iter_sockmap__open_and_load", "skeleton open_and_load failed\n")) + if (!ASSERT_OK_PTR(skel, "bpf_iter_sockmap__open_and_load")) return; if (map_type == BPF_MAP_TYPE_SOCKMAP) { @@ -222,7 +209,7 @@ static void test_sockmap_copy(enum bpf_map_type map_type) } sock_fd = calloc(num_sockets, sizeof(*sock_fd)); - if (CHECK(!sock_fd, "calloc(sock_fd)", "failed to allocate\n")) + if (!ASSERT_OK_PTR(sock_fd, "calloc(sock_fd)")) goto out; for (i = 0; i < num_sockets; i++) @@ -232,11 +219,11 @@ static void test_sockmap_copy(enum bpf_map_type map_type) for (i = 0; i < num_sockets; i++) { sock_fd[i] = connected_socket_v4(); - if (CHECK(sock_fd[i] == -1, "connected_socket_v4", "cannot connect\n")) + if (!ASSERT_NEQ(sock_fd[i], -1, "connected_socket_v4")) goto out; err = bpf_map_update_elem(src_fd, &i, &sock_fd[i], BPF_NOEXIST); - if (CHECK(err, "map_update", "failed: %s\n", strerror(errno))) + if (!ASSERT_OK(err, "map_update")) goto out; } @@ -248,22 +235,20 @@ static void test_sockmap_copy(enum bpf_map_type map_type) goto out; iter_fd = bpf_iter_create(bpf_link__fd(link)); - if (CHECK(iter_fd < 0, "create_iter", "create_iter failed\n")) + if (!ASSERT_GE(iter_fd, 0, "create_iter")) goto free_link; /* do some tests */ while ((len = read(iter_fd, buf, sizeof(buf))) > 0) ; - if (CHECK(len < 0, "read", "failed: %s\n", strerror(errno))) + if (!ASSERT_GE(len, 0, "read")) goto close_iter; /* test results */ - if (CHECK(skel->bss->elems != num_elems, "elems", "got %u expected %u\n", - skel->bss->elems, num_elems)) + if (!ASSERT_EQ(skel->bss->elems, num_elems, "elems")) goto close_iter; - if (CHECK(skel->bss->socks != num_sockets, "socks", "got %u expected %u\n", - skel->bss->socks, num_sockets)) + if (!ASSERT_EQ(skel->bss->socks, num_sockets, "socks")) goto close_iter; compare_cookies(src, skel->maps.dst); @@ -288,28 +273,22 @@ static void test_sockmap_skb_verdict_attach(enum bpf_attach_type first, int err, map, verdict; skel = test_sockmap_skb_verdict_attach__open_and_load(); - if (CHECK_FAIL(!skel)) { - perror("test_sockmap_skb_verdict_attach__open_and_load"); + if (!ASSERT_OK_PTR(skel, "open_and_load")) return; - } verdict = bpf_program__fd(skel->progs.prog_skb_verdict); map = bpf_map__fd(skel->maps.sock_map); err = bpf_prog_attach(verdict, map, first, 0); - if (CHECK_FAIL(err)) { - perror("bpf_prog_attach"); + if (!ASSERT_OK(err, "bpf_prog_attach")) goto out; - } err = bpf_prog_attach(verdict, map, second, 0); ASSERT_EQ(err, -EBUSY, "prog_attach_fail"); err = bpf_prog_detach2(verdict, map, first); - if (CHECK_FAIL(err)) { - perror("bpf_prog_detach2"); + if (!ASSERT_OK(err, "bpf_prog_detach2")) goto out; - } out: test_sockmap_skb_verdict_attach__destroy(skel); } diff --git a/tools/testing/selftests/bpf/prog_tests/sockmap_ktls.c b/tools/testing/selftests/bpf/prog_tests/sockmap_ktls.c index e172d89e92e1..2d0796314862 100644 --- a/tools/testing/selftests/bpf/prog_tests/sockmap_ktls.c +++ b/tools/testing/selftests/bpf/prog_tests/sockmap_ktls.c @@ -15,16 +15,12 @@ static int tcp_server(int family) int err, s; s = socket(family, SOCK_STREAM, 0); - if (CHECK_FAIL(s == -1)) { - perror("socket"); + if (!ASSERT_GE(s, 0, "socket")) return -1; - } err = listen(s, SOMAXCONN); - if (CHECK_FAIL(err)) { - perror("listen"); + if (!ASSERT_OK(err, "listen")) return -1; - } return s; } @@ -48,44 +44,31 @@ static void test_sockmap_ktls_disconnect_after_delete(int family, int map) return; err = getsockname(srv, (struct sockaddr *)&addr, &len); - if (CHECK_FAIL(err)) { - perror("getsockopt"); + if (!ASSERT_OK(err, "getsockopt")) goto close_srv; - } cli = socket(family, SOCK_STREAM, 0); - if (CHECK_FAIL(cli == -1)) { - perror("socket"); + if (!ASSERT_GE(cli, 0, "socket")) goto close_srv; - } err = connect(cli, (struct sockaddr *)&addr, len); - if (CHECK_FAIL(err)) { - perror("connect"); + if (!ASSERT_OK(err, "connect")) goto close_cli; - } err = bpf_map_update_elem(map, &zero, &cli, 0); - if (CHECK_FAIL(err)) { - perror("bpf_map_update_elem"); + if (!ASSERT_OK(err, "bpf_map_update_elem")) goto close_cli; - } err = setsockopt(cli, IPPROTO_TCP, TCP_ULP, "tls", strlen("tls")); - if (CHECK_FAIL(err)) { - perror("setsockopt(TCP_ULP)"); + if (!ASSERT_OK(err, "setsockopt(TCP_ULP)")) goto close_cli; - } err = bpf_map_delete_elem(map, &zero); - if (CHECK_FAIL(err)) { - perror("bpf_map_delete_elem"); + if (!ASSERT_OK(err, "bpf_map_delete_elem")) goto close_cli; - } err = disconnect(cli); - if (CHECK_FAIL(err)) - perror("disconnect"); + ASSERT_OK(err, "disconnect"); close_cli: close(cli); @@ -168,10 +151,8 @@ static void run_tests(int family, enum bpf_map_type map_type) int map; map = bpf_map_create(map_type, NULL, sizeof(int), sizeof(int), 1, NULL); - if (CHECK_FAIL(map < 0)) { - perror("bpf_map_create"); + if (!ASSERT_GE(map, 0, "bpf_map_create")) return; - } if (test__start_subtest(fmt_test_name("disconnect_after_delete", family, map_type))) test_sockmap_ktls_disconnect_after_delete(family, map); diff --git a/tools/testing/selftests/bpf/prog_tests/sockopt.c b/tools/testing/selftests/bpf/prog_tests/sockopt.c index cd09f4c7dd92..aa4debf62fc6 100644 --- a/tools/testing/selftests/bpf/prog_tests/sockopt.c +++ b/tools/testing/selftests/bpf/prog_tests/sockopt.c @@ -972,12 +972,12 @@ void test_sockopt(void) int cgroup_fd, i; cgroup_fd = test__join_cgroup("/sockopt"); - if (CHECK_FAIL(cgroup_fd < 0)) + if (!ASSERT_GE(cgroup_fd, 0, "join_cgroup")) return; for (i = 0; i < ARRAY_SIZE(tests); i++) { test__start_subtest(tests[i].descr); - CHECK_FAIL(run_test(cgroup_fd, &tests[i])); + ASSERT_OK(run_test(cgroup_fd, &tests[i]), tests[i].descr); } close(cgroup_fd); diff --git a/tools/testing/selftests/bpf/prog_tests/sockopt_inherit.c b/tools/testing/selftests/bpf/prog_tests/sockopt_inherit.c index c5cb6e8374b6..60c17a8e2789 100644 --- a/tools/testing/selftests/bpf/prog_tests/sockopt_inherit.c +++ b/tools/testing/selftests/bpf/prog_tests/sockopt_inherit.c @@ -76,20 +76,16 @@ static void *server_thread(void *arg) pthread_cond_signal(&server_started); pthread_mutex_unlock(&server_started_mtx); - if (CHECK_FAIL(err < 0)) { - perror("Failed to listed on socket"); + if (!ASSERT_GE(err, 0, "listed on socket")) return NULL; - } err += verify_sockopt(fd, CUSTOM_INHERIT1, "listen", 1); err += verify_sockopt(fd, CUSTOM_INHERIT2, "listen", 1); err += verify_sockopt(fd, CUSTOM_LISTENER, "listen", 1); client_fd = accept(fd, (struct sockaddr *)&addr, &len); - if (CHECK_FAIL(client_fd < 0)) { - perror("Failed to accept client"); + if (!ASSERT_GE(client_fd, 0, "accept client")) return NULL; - } err += verify_sockopt(client_fd, CUSTOM_INHERIT1, "accept", 1); err += verify_sockopt(client_fd, CUSTOM_INHERIT2, "accept", 1); @@ -183,20 +179,20 @@ static void run_test(int cgroup_fd) goto close_bpf_object; err = prog_attach(obj, cgroup_fd, "cgroup/getsockopt", "_getsockopt"); - if (CHECK_FAIL(err)) + if (!ASSERT_OK(err, "prog_attach _getsockopt")) goto close_bpf_object; err = prog_attach(obj, cgroup_fd, "cgroup/setsockopt", "_setsockopt"); - if (CHECK_FAIL(err)) + if (!ASSERT_OK(err, "prog_attach _setsockopt")) goto close_bpf_object; server_fd = start_server(); - if (CHECK_FAIL(server_fd < 0)) + if (!ASSERT_GE(server_fd, 0, "start_server")) goto close_bpf_object; pthread_mutex_lock(&server_started_mtx); - if (CHECK_FAIL(pthread_create(&tid, NULL, server_thread, - (void *)&server_fd))) { + if (!ASSERT_OK(pthread_create(&tid, NULL, server_thread, + (void *)&server_fd), "pthread_create")) { pthread_mutex_unlock(&server_started_mtx); goto close_server_fd; } @@ -204,17 +200,17 @@ static void run_test(int cgroup_fd) pthread_mutex_unlock(&server_started_mtx); client_fd = connect_to_server(server_fd); - if (CHECK_FAIL(client_fd < 0)) + if (!ASSERT_GE(client_fd, 0, "connect_to_server")) goto close_server_fd; - CHECK_FAIL(verify_sockopt(client_fd, CUSTOM_INHERIT1, "connect", 0)); - CHECK_FAIL(verify_sockopt(client_fd, CUSTOM_INHERIT2, "connect", 0)); - CHECK_FAIL(verify_sockopt(client_fd, CUSTOM_LISTENER, "connect", 0)); + ASSERT_OK(verify_sockopt(client_fd, CUSTOM_INHERIT1, "connect", 0), "verify_sockopt1"); + ASSERT_OK(verify_sockopt(client_fd, CUSTOM_INHERIT2, "connect", 0), "verify_sockopt2"); + ASSERT_OK(verify_sockopt(client_fd, CUSTOM_LISTENER, "connect", 0), "verify_sockopt ener"); pthread_join(tid, &server_err); err = (int)(long)server_err; - CHECK_FAIL(err); + ASSERT_OK(err, "pthread_join retval"); close(client_fd); @@ -229,7 +225,7 @@ void test_sockopt_inherit(void) int cgroup_fd; cgroup_fd = test__join_cgroup("/sockopt_inherit"); - if (CHECK_FAIL(cgroup_fd < 0)) + if (!ASSERT_GE(cgroup_fd, 0, "join_cgroup")) return; run_test(cgroup_fd); diff --git a/tools/testing/selftests/bpf/prog_tests/sockopt_multi.c b/tools/testing/selftests/bpf/prog_tests/sockopt_multi.c index 28d592dc54a7..7f5659349011 100644 --- a/tools/testing/selftests/bpf/prog_tests/sockopt_multi.c +++ b/tools/testing/selftests/bpf/prog_tests/sockopt_multi.c @@ -303,11 +303,11 @@ void test_sockopt_multi(void) int err = -1; cg_parent = test__join_cgroup("/parent"); - if (CHECK_FAIL(cg_parent < 0)) + if (!ASSERT_GE(cg_parent, 0, "join_cgroup /parent")) goto out; cg_child = test__join_cgroup("/parent/child"); - if (CHECK_FAIL(cg_child < 0)) + if (!ASSERT_GE(cg_child, 0, "join_cgroup /parent/child")) goto out; obj = bpf_object__open_file("sockopt_multi.bpf.o", NULL); @@ -319,11 +319,11 @@ void test_sockopt_multi(void) goto out; sock_fd = socket(AF_INET, SOCK_STREAM, 0); - if (CHECK_FAIL(sock_fd < 0)) + if (!ASSERT_GE(sock_fd, 0, "socket")) goto out; - CHECK_FAIL(run_getsockopt_test(obj, cg_parent, cg_child, sock_fd)); - CHECK_FAIL(run_setsockopt_test(obj, cg_parent, cg_child, sock_fd)); + ASSERT_OK(run_getsockopt_test(obj, cg_parent, cg_child, sock_fd), "getsockopt_test"); + ASSERT_OK(run_setsockopt_test(obj, cg_parent, cg_child, sock_fd), "setsockopt_test"); out: close(sock_fd); diff --git a/tools/testing/selftests/bpf/prog_tests/sockopt_sk.c b/tools/testing/selftests/bpf/prog_tests/sockopt_sk.c index 30a99d2ed5c6..60d952719d27 100644 --- a/tools/testing/selftests/bpf/prog_tests/sockopt_sk.c +++ b/tools/testing/selftests/bpf/prog_tests/sockopt_sk.c @@ -223,7 +223,7 @@ void test_sockopt_sk(void) int cgroup_fd; cgroup_fd = test__join_cgroup("/sockopt_sk"); - if (CHECK_FAIL(cgroup_fd < 0)) + if (!ASSERT_GE(cgroup_fd, 0, "join_cgroup /sockopt_sk")) return; run_test(cgroup_fd); diff --git a/tools/testing/selftests/bpf/prog_tests/tcp_estats.c b/tools/testing/selftests/bpf/prog_tests/tcp_estats.c index 032dbfb26256..e070bca2b764 100644 --- a/tools/testing/selftests/bpf/prog_tests/tcp_estats.c +++ b/tools/testing/selftests/bpf/prog_tests/tcp_estats.c @@ -6,11 +6,9 @@ void test_tcp_estats(void) const char *file = "./test_tcp_estats.bpf.o"; int err, prog_fd; struct bpf_object *obj; - __u32 duration = 0; err = bpf_prog_test_load(file, BPF_PROG_TYPE_TRACEPOINT, &obj, &prog_fd); - CHECK(err, "", "err %d errno %d\n", err, errno); - if (err) + if (!ASSERT_OK(err, "")) return; bpf_object__close(obj); diff --git a/tools/testing/selftests/bpf/prog_tests/tcp_hdr_options.c b/tools/testing/selftests/bpf/prog_tests/tcp_hdr_options.c index 1fa772079967..617bbce6ef8f 100644 --- a/tools/testing/selftests/bpf/prog_tests/tcp_hdr_options.c +++ b/tools/testing/selftests/bpf/prog_tests/tcp_hdr_options.c @@ -42,33 +42,10 @@ struct sk_fds { static int create_netns(void) { - if (CHECK(unshare(CLONE_NEWNET), "create netns", - "unshare(CLONE_NEWNET): %s (%d)", - strerror(errno), errno)) + if (!ASSERT_OK(unshare(CLONE_NEWNET), "create netns")) return -1; - if (CHECK(system("ip link set dev lo up"), "run ip cmd", - "failed to bring lo link up\n")) - return -1; - - return 0; -} - -static int write_sysctl(const char *sysctl, const char *value) -{ - int fd, err, len; - - fd = open(sysctl, O_WRONLY); - if (CHECK(fd == -1, "open sysctl", "open(%s): %s (%d)\n", - sysctl, strerror(errno), errno)) - return -1; - - len = strlen(value); - err = write(fd, value, len); - close(fd); - if (CHECK(err != len, "write sysctl", - "write(%s, %s): err:%d %s (%d)\n", - sysctl, value, err, strerror(errno), errno)) + if (!ASSERT_OK(system("ip link set dev lo up"), "run ip cmd")) return -1; return 0; @@ -100,16 +77,12 @@ static int sk_fds_shutdown(struct sk_fds *sk_fds) shutdown(sk_fds->active_fd, SHUT_WR); ret = read(sk_fds->passive_fd, &abyte, sizeof(abyte)); - if (CHECK(ret != 0, "read-after-shutdown(passive_fd):", - "ret:%d %s (%d)\n", - ret, strerror(errno), errno)) + if (!ASSERT_EQ(ret, 0, "read-after-shutdown(passive_fd):")) return -1; shutdown(sk_fds->passive_fd, SHUT_WR); ret = read(sk_fds->active_fd, &abyte, sizeof(abyte)); - if (CHECK(ret != 0, "read-after-shutdown(active_fd):", - "ret:%d %s (%d)\n", - ret, strerror(errno), errno)) + if (!ASSERT_EQ(ret, 0, "read-after-shutdown(active_fd):")) return -1; return 0; @@ -122,8 +95,7 @@ static int sk_fds_connect(struct sk_fds *sk_fds, bool fast_open) socklen_t len; sk_fds->srv_fd = start_server(AF_INET6, SOCK_STREAM, LO_ADDR6, 0, 0); - if (CHECK(sk_fds->srv_fd == -1, "start_server", "%s (%d)\n", - strerror(errno), errno)) + if (!ASSERT_NEQ(sk_fds->srv_fd, -1, "start_server")) goto error; if (fast_open) @@ -132,28 +104,25 @@ static int sk_fds_connect(struct sk_fds *sk_fds, bool fast_open) else sk_fds->active_fd = connect_to_fd(sk_fds->srv_fd, 0); - if (CHECK_FAIL(sk_fds->active_fd == -1)) { + if (!ASSERT_NEQ(sk_fds->active_fd, -1, "")) { close(sk_fds->srv_fd); goto error; } len = sizeof(addr6); - if (CHECK(getsockname(sk_fds->srv_fd, (struct sockaddr *)&addr6, - &len), "getsockname(srv_fd)", "%s (%d)\n", - strerror(errno), errno)) + if (!ASSERT_OK(getsockname(sk_fds->srv_fd, (struct sockaddr *)&addr6, + &len), "getsockname(srv_fd)")) goto error_close; sk_fds->passive_lport = ntohs(addr6.sin6_port); len = sizeof(addr6); - if (CHECK(getsockname(sk_fds->active_fd, (struct sockaddr *)&addr6, - &len), "getsockname(active_fd)", "%s (%d)\n", - strerror(errno), errno)) + if (!ASSERT_OK(getsockname(sk_fds->active_fd, (struct sockaddr *)&addr6, + &len), "getsockname(active_fd)")) goto error_close; sk_fds->active_lport = ntohs(addr6.sin6_port); sk_fds->passive_fd = accept(sk_fds->srv_fd, NULL, 0); - if (CHECK(sk_fds->passive_fd == -1, "accept(srv_fd)", "%s (%d)\n", - strerror(errno), errno)) + if (!ASSERT_NEQ(sk_fds->passive_fd, -1, "accept(srv_fd)")) goto error_close; if (fast_open) { @@ -161,8 +130,7 @@ static int sk_fds_connect(struct sk_fds *sk_fds, bool fast_open) int ret; ret = read(sk_fds->passive_fd, bytes_in, sizeof(bytes_in)); - if (CHECK(ret != sizeof(fast), "read fastopen syn data", - "expected=%lu actual=%d\n", sizeof(fast), ret)) { + if (!ASSERT_EQ(ret, sizeof(fast), "read fastopen syn data")) { close(sk_fds->passive_fd); goto error_close; } @@ -183,8 +151,7 @@ static int check_hdr_opt(const struct bpf_test_option *exp, const struct bpf_test_option *act, const char *hdr_desc) { - if (CHECK(memcmp(exp, act, sizeof(*exp)), - "expected-vs-actual", "unexpected %s\n", hdr_desc)) { + if (!ASSERT_OK(memcmp(exp, act, sizeof(*exp)), hdr_desc)) { print_option(exp, "expected: "); print_option(act, " actual: "); return -1; @@ -198,13 +165,11 @@ static int check_hdr_stg(const struct hdr_stg *exp, int fd, { struct hdr_stg act; - if (CHECK(bpf_map_lookup_elem(hdr_stg_map_fd, &fd, &act), - "map_lookup(hdr_stg_map_fd)", "%s %s (%d)\n", - stg_desc, strerror(errno), errno)) + if (!ASSERT_OK(bpf_map_lookup_elem(hdr_stg_map_fd, &fd, &act), + "map_lookup(hdr_stg_map_fd)")) return -1; - if (CHECK(memcmp(exp, &act, sizeof(*exp)), - "expected-vs-actual", "unexpected %s\n", stg_desc)) { + if (!ASSERT_OK(memcmp(exp, &act, sizeof(*exp)), stg_desc)) { print_hdr_stg(exp, "expected: "); print_hdr_stg(&act, " actual: "); return -1; @@ -248,9 +213,8 @@ static void check_hdr_and_close_fds(struct sk_fds *sk_fds) if (sk_fds_shutdown(sk_fds)) goto check_linum; - if (CHECK(expected_inherit_cb_flags != skel->bss->inherit_cb_flags, - "Unexpected inherit_cb_flags", "0x%x != 0x%x\n", - skel->bss->inherit_cb_flags, expected_inherit_cb_flags)) + if (!ASSERT_EQ(expected_inherit_cb_flags, skel->bss->inherit_cb_flags, + "inherit_cb_flags")) goto check_linum; if (check_hdr_stg(&exp_passive_hdr_stg, sk_fds->passive_fd, @@ -277,7 +241,7 @@ static void check_hdr_and_close_fds(struct sk_fds *sk_fds) "active_fin_in"); check_linum: - CHECK_FAIL(check_error_linum(sk_fds)); + ASSERT_FALSE(check_error_linum(sk_fds), "check_error_linum"); sk_fds_close(sk_fds); } @@ -517,26 +481,20 @@ static void misc(void) /* MSG_EOR to ensure skb will not be combined */ ret = send(sk_fds.active_fd, send_msg, sizeof(send_msg), MSG_EOR); - if (CHECK(ret != sizeof(send_msg), "send(msg)", "ret:%d\n", - ret)) + if (!ASSERT_EQ(ret, sizeof(send_msg), "send(msg)")) goto check_linum; ret = read(sk_fds.passive_fd, recv_msg, sizeof(recv_msg)); - if (CHECK(ret != sizeof(send_msg), "read(msg)", "ret:%d\n", - ret)) + if (ASSERT_EQ(ret, sizeof(send_msg), "read(msg)")) goto check_linum; } if (sk_fds_shutdown(&sk_fds)) goto check_linum; - CHECK(misc_skel->bss->nr_syn != 1, "unexpected nr_syn", - "expected (1) != actual (%u)\n", - misc_skel->bss->nr_syn); + ASSERT_EQ(misc_skel->bss->nr_syn, 1, "unexpected nr_syn"); - CHECK(misc_skel->bss->nr_data != nr_data, "unexpected nr_data", - "expected (%u) != actual (%u)\n", - nr_data, misc_skel->bss->nr_data); + ASSERT_EQ(misc_skel->bss->nr_data, nr_data, "unexpected nr_data"); /* The last ACK may have been delayed, so it is either 1 or 2. */ CHECK(misc_skel->bss->nr_pure_ack != 1 && @@ -545,12 +503,10 @@ static void misc(void) "expected (1 or 2) != actual (%u)\n", misc_skel->bss->nr_pure_ack); - CHECK(misc_skel->bss->nr_fin != 1, "unexpected nr_fin", - "expected (1) != actual (%u)\n", - misc_skel->bss->nr_fin); + ASSERT_EQ(misc_skel->bss->nr_fin, 1, "unexpected nr_fin"); check_linum: - CHECK_FAIL(check_error_linum(&sk_fds)); + ASSERT_FALSE(check_error_linum(&sk_fds), "check_error_linum"); sk_fds_close(&sk_fds); bpf_link__destroy(link); } @@ -575,15 +531,15 @@ void test_tcp_hdr_options(void) int i; skel = test_tcp_hdr_options__open_and_load(); - if (CHECK(!skel, "open and load skel", "failed")) + if (!ASSERT_OK_PTR(skel, "open and load skel")) return; misc_skel = test_misc_tcp_hdr_options__open_and_load(); - if (CHECK(!misc_skel, "open and load misc test skel", "failed")) + if (!ASSERT_OK_PTR(misc_skel, "open and load misc test skel")) goto skel_destroy; cg_fd = test__join_cgroup(CG_NAME); - if (CHECK_FAIL(cg_fd < 0)) + if (ASSERT_GE(cg_fd, 0, "join_cgroup")) goto skel_destroy; for (i = 0; i < ARRAY_SIZE(tests); i++) { diff --git a/tools/testing/selftests/bpf/prog_tests/tcp_rtt.c b/tools/testing/selftests/bpf/prog_tests/tcp_rtt.c index 96ff2c20af81..8fe84da1b9b4 100644 --- a/tools/testing/selftests/bpf/prog_tests/tcp_rtt.c +++ b/tools/testing/selftests/bpf/prog_tests/tcp_rtt.c @@ -16,8 +16,7 @@ static void send_byte(int fd) { char b = 0x55; - if (CHECK_FAIL(write(fd, &b, sizeof(b)) != 1)) - perror("Failed to send single byte"); + ASSERT_EQ(write(fd, &b, sizeof(b)), 1, "send single byte"); } static int wait_for_ack(int fd, int retries) @@ -51,10 +50,8 @@ static int verify_sk(int map_fd, int client_fd, const char *msg, __u32 invoked, int err = 0; struct tcp_rtt_storage val; - if (CHECK_FAIL(bpf_map_lookup_elem(map_fd, &client_fd, &val) < 0)) { - perror("Failed to read socket storage"); + if (!ASSERT_GE(bpf_map_lookup_elem(map_fd, &client_fd, &val), 0, "read socket storage")) return -1; - } if (val.invoked != invoked) { log_err("%s: unexpected bpf_tcp_sock.invoked %d != %d", @@ -151,14 +148,14 @@ void test_tcp_rtt(void) int server_fd, cgroup_fd; cgroup_fd = test__join_cgroup("/tcp_rtt"); - if (CHECK_FAIL(cgroup_fd < 0)) + if (!ASSERT_GE(cgroup_fd, 0, "join_cgroup /tcp_rtt")) return; server_fd = start_server(AF_INET, SOCK_STREAM, NULL, 0, 0); - if (CHECK_FAIL(server_fd < 0)) + if (!ASSERT_GE(server_fd, 0, "start_server")) goto close_cgroup_fd; - CHECK_FAIL(run_test(cgroup_fd, server_fd)); + ASSERT_OK(run_test(cgroup_fd, server_fd), "run_test"); close(server_fd); diff --git a/tools/testing/selftests/bpf/prog_tests/tcpbpf_user.c b/tools/testing/selftests/bpf/prog_tests/tcpbpf_user.c index 87923d2865b7..7e8fe1bad03f 100644 --- a/tools/testing/selftests/bpf/prog_tests/tcpbpf_user.c +++ b/tools/testing/selftests/bpf/prog_tests/tcpbpf_user.c @@ -8,8 +8,6 @@ #define LO_ADDR6 "::1" #define CG_NAME "/tcpbpf-user-test" -static __u32 duration; - static void verify_result(struct tcpbpf_globals *result) { __u32 expected_events = ((1 << BPF_SOCK_OPS_TIMEOUT_INIT) | @@ -22,9 +20,7 @@ static void verify_result(struct tcpbpf_globals *result) (1 << BPF_SOCK_OPS_TCP_LISTEN_CB)); /* check global map */ - CHECK(expected_events != result->event_map, "event_map", - "unexpected event_map: actual 0x%08x != expected 0x%08x\n", - result->event_map, expected_events); + ASSERT_EQ(expected_events, result->event_map, "event_map"); ASSERT_EQ(result->bytes_received, 501, "bytes_received"); ASSERT_EQ(result->bytes_acked, 1002, "bytes_acked"); @@ -56,18 +52,15 @@ static void run_test(struct tcpbpf_globals *result) int i, rv; listen_fd = start_server(AF_INET6, SOCK_STREAM, LO_ADDR6, 0, 0); - if (CHECK(listen_fd == -1, "start_server", "listen_fd:%d errno:%d\n", - listen_fd, errno)) + if (!ASSERT_NEQ(listen_fd, -1, "start_server")) goto done; cli_fd = connect_to_fd(listen_fd, 0); - if (CHECK(cli_fd == -1, "connect_to_fd(listen_fd)", - "cli_fd:%d errno:%d\n", cli_fd, errno)) + if (!ASSERT_NEQ(cli_fd, -1, "connect_to_fd(listen_fd)")) goto done; accept_fd = accept(listen_fd, NULL, NULL); - if (CHECK(accept_fd == -1, "accept(listen_fd)", - "accept_fd:%d errno:%d\n", accept_fd, errno)) + if (!ASSERT_NEQ(accept_fd, -1, "accept(listen_fd)")) goto done; /* Send 1000B of '+'s from cli_fd -> accept_fd */ @@ -75,11 +68,11 @@ static void run_test(struct tcpbpf_globals *result) buf[i] = '+'; rv = send(cli_fd, buf, 1000, 0); - if (CHECK(rv != 1000, "send(cli_fd)", "rv:%d errno:%d\n", rv, errno)) + if (!ASSERT_EQ(rv, 1000, "send(cli_fd)")) goto done; rv = recv(accept_fd, buf, 1000, 0); - if (CHECK(rv != 1000, "recv(accept_fd)", "rv:%d errno:%d\n", rv, errno)) + if (!ASSERT_EQ(rv, 1000, "recv(accept_fd)")) goto done; /* Send 500B of '.'s from accept_fd ->cli_fd */ @@ -87,11 +80,11 @@ static void run_test(struct tcpbpf_globals *result) buf[i] = '.'; rv = send(accept_fd, buf, 500, 0); - if (CHECK(rv != 500, "send(accept_fd)", "rv:%d errno:%d\n", rv, errno)) + if (!ASSERT_EQ(rv, 500, "send(accept_fd)")) goto done; rv = recv(cli_fd, buf, 500, 0); - if (CHECK(rv != 500, "recv(cli_fd)", "rv:%d errno:%d\n", rv, errno)) + if (!ASSERT_EQ(rv, 500, "recv(cli_fd)")) goto done; /* @@ -100,12 +93,12 @@ static void run_test(struct tcpbpf_globals *result) */ shutdown(accept_fd, SHUT_WR); err = recv(cli_fd, buf, 1, 0); - if (CHECK(err, "recv(cli_fd) for fin", "err:%d errno:%d\n", err, errno)) + if (!ASSERT_OK(err, "recv(cli_fd) for fin")) goto done; shutdown(cli_fd, SHUT_WR); err = recv(accept_fd, buf, 1, 0); - CHECK(err, "recv(accept_fd) for fin", "err:%d errno:%d\n", err, errno); + ASSERT_OK(err, "recv(accept_fd) for fin"); done: if (accept_fd != -1) close(accept_fd); @@ -124,12 +117,11 @@ void test_tcpbpf_user(void) int cg_fd = -1; skel = test_tcpbpf_kern__open_and_load(); - if (CHECK(!skel, "open and load skel", "failed")) + if (!ASSERT_OK_PTR(skel, "open and load skel")) return; cg_fd = test__join_cgroup(CG_NAME); - if (CHECK(cg_fd < 0, "test__join_cgroup(" CG_NAME ")", - "cg_fd:%d errno:%d", cg_fd, errno)) + if (!ASSERT_GE(cg_fd, 0, "test__join_cgroup(" CG_NAME ")")) goto err; skel->links.bpf_testcb = bpf_program__attach_cgroup(skel->progs.bpf_testcb, cg_fd); diff --git a/tools/testing/selftests/bpf/prog_tests/tracing_struct.c b/tools/testing/selftests/bpf/prog_tests/tracing_struct.c new file mode 100644 index 000000000000..d5022b91d1e4 --- /dev/null +++ b/tools/testing/selftests/bpf/prog_tests/tracing_struct.c @@ -0,0 +1,63 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2022 Meta Platforms, Inc. and affiliates. */ + +#include <test_progs.h> +#include "tracing_struct.skel.h" + +static void test_fentry(void) +{ + struct tracing_struct *skel; + int err; + + skel = tracing_struct__open_and_load(); + if (!ASSERT_OK_PTR(skel, "tracing_struct__open_and_load")) + return; + + err = tracing_struct__attach(skel); + if (!ASSERT_OK(err, "tracing_struct__attach")) + return; + + ASSERT_OK(trigger_module_test_read(256), "trigger_read"); + + ASSERT_EQ(skel->bss->t1_a_a, 2, "t1:a.a"); + ASSERT_EQ(skel->bss->t1_a_b, 3, "t1:a.b"); + ASSERT_EQ(skel->bss->t1_b, 1, "t1:b"); + ASSERT_EQ(skel->bss->t1_c, 4, "t1:c"); + + ASSERT_EQ(skel->bss->t1_nregs, 4, "t1 nregs"); + ASSERT_EQ(skel->bss->t1_reg0, 2, "t1 reg0"); + ASSERT_EQ(skel->bss->t1_reg1, 3, "t1 reg1"); + ASSERT_EQ(skel->bss->t1_reg2, 1, "t1 reg2"); + ASSERT_EQ(skel->bss->t1_reg3, 4, "t1 reg3"); + ASSERT_EQ(skel->bss->t1_ret, 10, "t1 ret"); + + ASSERT_EQ(skel->bss->t2_a, 1, "t2:a"); + ASSERT_EQ(skel->bss->t2_b_a, 2, "t2:b.a"); + ASSERT_EQ(skel->bss->t2_b_b, 3, "t2:b.b"); + ASSERT_EQ(skel->bss->t2_c, 4, "t2:c"); + ASSERT_EQ(skel->bss->t2_ret, 10, "t2 ret"); + + ASSERT_EQ(skel->bss->t3_a, 1, "t3:a"); + ASSERT_EQ(skel->bss->t3_b, 4, "t3:b"); + ASSERT_EQ(skel->bss->t3_c_a, 2, "t3:c.a"); + ASSERT_EQ(skel->bss->t3_c_b, 3, "t3:c.b"); + ASSERT_EQ(skel->bss->t3_ret, 10, "t3 ret"); + + ASSERT_EQ(skel->bss->t4_a_a, 10, "t4:a.a"); + ASSERT_EQ(skel->bss->t4_b, 1, "t4:b"); + ASSERT_EQ(skel->bss->t4_c, 2, "t4:c"); + ASSERT_EQ(skel->bss->t4_d, 3, "t4:d"); + ASSERT_EQ(skel->bss->t4_e_a, 2, "t4:e.a"); + ASSERT_EQ(skel->bss->t4_e_b, 3, "t4:e.b"); + ASSERT_EQ(skel->bss->t4_ret, 21, "t4 ret"); + + ASSERT_EQ(skel->bss->t5_ret, 1, "t5 ret"); + + tracing_struct__detach(skel); + tracing_struct__destroy(skel); +} + +void test_tracing_struct(void) +{ + test_fentry(); +} diff --git a/tools/testing/selftests/bpf/prog_tests/udp_limit.c b/tools/testing/selftests/bpf/prog_tests/udp_limit.c index 56c9d6bd38a3..2643d896ddae 100644 --- a/tools/testing/selftests/bpf/prog_tests/udp_limit.c +++ b/tools/testing/selftests/bpf/prog_tests/udp_limit.c @@ -5,8 +5,6 @@ #include <sys/types.h> #include <sys/socket.h> -static int duration; - void test_udp_limit(void) { struct udp_limit *skel; @@ -14,11 +12,11 @@ void test_udp_limit(void) int cgroup_fd; cgroup_fd = test__join_cgroup("/udp_limit"); - if (CHECK(cgroup_fd < 0, "cg-join", "errno %d", errno)) + if (!ASSERT_GE(cgroup_fd, 0, "cg-join")) return; skel = udp_limit__open_and_load(); - if (CHECK(!skel, "skel-load", "errno %d", errno)) + if (!ASSERT_OK_PTR(skel, "skel-load")) goto close_cgroup_fd; skel->links.sock = bpf_program__attach_cgroup(skel->progs.sock, cgroup_fd); @@ -32,11 +30,11 @@ void test_udp_limit(void) * verify that. */ fd1 = socket(AF_INET, SOCK_DGRAM, 0); - if (CHECK(fd1 < 0, "fd1", "errno %d", errno)) + if (!ASSERT_GE(fd1, 0, "socket(fd1)")) goto close_skeleton; fd2 = socket(AF_INET, SOCK_DGRAM, 0); - if (CHECK(fd2 >= 0, "fd2", "errno %d", errno)) + if (!ASSERT_LT(fd2, 0, "socket(fd2)")) goto close_skeleton; /* We can reopen again after close. */ @@ -44,7 +42,7 @@ void test_udp_limit(void) fd1 = -1; fd1 = socket(AF_INET, SOCK_DGRAM, 0); - if (CHECK(fd1 < 0, "fd1-again", "errno %d", errno)) + if (!ASSERT_GE(fd1, 0, "socket(fd1-again)")) goto close_skeleton; /* Make sure the program was invoked the expected @@ -54,13 +52,11 @@ void test_udp_limit(void) * - close fd1 - BPF_CGROUP_INET_SOCK_RELEASE * - open fd1 again - BPF_CGROUP_INET_SOCK_CREATE */ - if (CHECK(skel->bss->invocations != 4, "bss-invocations", - "invocations=%d", skel->bss->invocations)) + if (!ASSERT_EQ(skel->bss->invocations, 4, "bss-invocations")) goto close_skeleton; /* We should still have a single socket in use */ - if (CHECK(skel->bss->in_use != 1, "bss-in_use", - "in_use=%d", skel->bss->in_use)) + if (!ASSERT_EQ(skel->bss->in_use, 1, "bss-in_use")) goto close_skeleton; close_skeleton: diff --git a/tools/testing/selftests/bpf/prog_tests/user_ringbuf.c b/tools/testing/selftests/bpf/prog_tests/user_ringbuf.c new file mode 100644 index 000000000000..02b18d018b36 --- /dev/null +++ b/tools/testing/selftests/bpf/prog_tests/user_ringbuf.c @@ -0,0 +1,754 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2022 Meta Platforms, Inc. and affiliates. */ + +#define _GNU_SOURCE +#include <linux/compiler.h> +#include <linux/ring_buffer.h> +#include <pthread.h> +#include <stdio.h> +#include <stdlib.h> +#include <sys/mman.h> +#include <sys/syscall.h> +#include <sys/sysinfo.h> +#include <test_progs.h> +#include <uapi/linux/bpf.h> +#include <unistd.h> + +#include "user_ringbuf_fail.skel.h" +#include "user_ringbuf_success.skel.h" + +#include "../progs/test_user_ringbuf.h" + +static size_t log_buf_sz = 1 << 20; /* 1 MB */ +static char obj_log_buf[1048576]; +static const long c_sample_size = sizeof(struct sample) + BPF_RINGBUF_HDR_SZ; +static const long c_ringbuf_size = 1 << 12; /* 1 small page */ +static const long c_max_entries = c_ringbuf_size / c_sample_size; + +static void drain_current_samples(void) +{ + syscall(__NR_getpgid); +} + +static int write_samples(struct user_ring_buffer *ringbuf, uint32_t num_samples) +{ + int i, err = 0; + + /* Write some number of samples to the ring buffer. */ + for (i = 0; i < num_samples; i++) { + struct sample *entry; + int read; + + entry = user_ring_buffer__reserve(ringbuf, sizeof(*entry)); + if (!entry) { + err = -errno; + goto done; + } + + entry->pid = getpid(); + entry->seq = i; + entry->value = i * i; + + read = snprintf(entry->comm, sizeof(entry->comm), "%u", i); + if (read <= 0) { + /* Assert on the error path to avoid spamming logs with + * mostly success messages. + */ + ASSERT_GT(read, 0, "snprintf_comm"); + err = read; + user_ring_buffer__discard(ringbuf, entry); + goto done; + } + + user_ring_buffer__submit(ringbuf, entry); + } + +done: + drain_current_samples(); + + return err; +} + +static struct user_ringbuf_success *open_load_ringbuf_skel(void) +{ + struct user_ringbuf_success *skel; + int err; + + skel = user_ringbuf_success__open(); + if (!ASSERT_OK_PTR(skel, "skel_open")) + return NULL; + + err = bpf_map__set_max_entries(skel->maps.user_ringbuf, c_ringbuf_size); + if (!ASSERT_OK(err, "set_max_entries")) + goto cleanup; + + err = bpf_map__set_max_entries(skel->maps.kernel_ringbuf, c_ringbuf_size); + if (!ASSERT_OK(err, "set_max_entries")) + goto cleanup; + + err = user_ringbuf_success__load(skel); + if (!ASSERT_OK(err, "skel_load")) + goto cleanup; + + return skel; + +cleanup: + user_ringbuf_success__destroy(skel); + return NULL; +} + +static void test_user_ringbuf_mappings(void) +{ + int err, rb_fd; + int page_size = getpagesize(); + void *mmap_ptr; + struct user_ringbuf_success *skel; + + skel = open_load_ringbuf_skel(); + if (!skel) + return; + + rb_fd = bpf_map__fd(skel->maps.user_ringbuf); + /* cons_pos can be mapped R/O, can't add +X with mprotect. */ + mmap_ptr = mmap(NULL, page_size, PROT_READ, MAP_SHARED, rb_fd, 0); + ASSERT_OK_PTR(mmap_ptr, "ro_cons_pos"); + ASSERT_ERR(mprotect(mmap_ptr, page_size, PROT_WRITE), "write_cons_pos_protect"); + ASSERT_ERR(mprotect(mmap_ptr, page_size, PROT_EXEC), "exec_cons_pos_protect"); + ASSERT_ERR_PTR(mremap(mmap_ptr, 0, 4 * page_size, MREMAP_MAYMOVE), "wr_prod_pos"); + err = -errno; + ASSERT_ERR(err, "wr_prod_pos_err"); + ASSERT_OK(munmap(mmap_ptr, page_size), "unmap_ro_cons"); + + /* prod_pos can be mapped RW, can't add +X with mprotect. */ + mmap_ptr = mmap(NULL, page_size, PROT_READ | PROT_WRITE, MAP_SHARED, + rb_fd, page_size); + ASSERT_OK_PTR(mmap_ptr, "rw_prod_pos"); + ASSERT_ERR(mprotect(mmap_ptr, page_size, PROT_EXEC), "exec_prod_pos_protect"); + err = -errno; + ASSERT_ERR(err, "wr_prod_pos_err"); + ASSERT_OK(munmap(mmap_ptr, page_size), "unmap_rw_prod"); + + /* data pages can be mapped RW, can't add +X with mprotect. */ + mmap_ptr = mmap(NULL, page_size, PROT_WRITE, MAP_SHARED, rb_fd, + 2 * page_size); + ASSERT_OK_PTR(mmap_ptr, "rw_data"); + ASSERT_ERR(mprotect(mmap_ptr, page_size, PROT_EXEC), "exec_data_protect"); + err = -errno; + ASSERT_ERR(err, "exec_data_err"); + ASSERT_OK(munmap(mmap_ptr, page_size), "unmap_rw_data"); + + user_ringbuf_success__destroy(skel); +} + +static int load_skel_create_ringbufs(struct user_ringbuf_success **skel_out, + struct ring_buffer **kern_ringbuf_out, + ring_buffer_sample_fn callback, + struct user_ring_buffer **user_ringbuf_out) +{ + struct user_ringbuf_success *skel; + struct ring_buffer *kern_ringbuf = NULL; + struct user_ring_buffer *user_ringbuf = NULL; + int err = -ENOMEM, rb_fd; + + skel = open_load_ringbuf_skel(); + if (!skel) + return err; + + /* only trigger BPF program for current process */ + skel->bss->pid = getpid(); + + if (kern_ringbuf_out) { + rb_fd = bpf_map__fd(skel->maps.kernel_ringbuf); + kern_ringbuf = ring_buffer__new(rb_fd, callback, skel, NULL); + if (!ASSERT_OK_PTR(kern_ringbuf, "kern_ringbuf_create")) + goto cleanup; + + *kern_ringbuf_out = kern_ringbuf; + } + + if (user_ringbuf_out) { + rb_fd = bpf_map__fd(skel->maps.user_ringbuf); + user_ringbuf = user_ring_buffer__new(rb_fd, NULL); + if (!ASSERT_OK_PTR(user_ringbuf, "user_ringbuf_create")) + goto cleanup; + + *user_ringbuf_out = user_ringbuf; + ASSERT_EQ(skel->bss->read, 0, "no_reads_after_load"); + } + + err = user_ringbuf_success__attach(skel); + if (!ASSERT_OK(err, "skel_attach")) + goto cleanup; + + *skel_out = skel; + return 0; + +cleanup: + if (kern_ringbuf_out) + *kern_ringbuf_out = NULL; + if (user_ringbuf_out) + *user_ringbuf_out = NULL; + ring_buffer__free(kern_ringbuf); + user_ring_buffer__free(user_ringbuf); + user_ringbuf_success__destroy(skel); + return err; +} + +static int load_skel_create_user_ringbuf(struct user_ringbuf_success **skel_out, + struct user_ring_buffer **ringbuf_out) +{ + return load_skel_create_ringbufs(skel_out, NULL, NULL, ringbuf_out); +} + +static void manually_write_test_invalid_sample(struct user_ringbuf_success *skel, + __u32 size, __u64 producer_pos, int err) +{ + void *data_ptr; + __u64 *producer_pos_ptr; + int rb_fd, page_size = getpagesize(); + + rb_fd = bpf_map__fd(skel->maps.user_ringbuf); + + ASSERT_EQ(skel->bss->read, 0, "num_samples_before_bad_sample"); + + /* Map the producer_pos as RW. */ + producer_pos_ptr = mmap(NULL, page_size, PROT_READ | PROT_WRITE, + MAP_SHARED, rb_fd, page_size); + ASSERT_OK_PTR(producer_pos_ptr, "producer_pos_ptr"); + + /* Map the data pages as RW. */ + data_ptr = mmap(NULL, page_size, PROT_WRITE, MAP_SHARED, rb_fd, 2 * page_size); + ASSERT_OK_PTR(data_ptr, "rw_data"); + + memset(data_ptr, 0, BPF_RINGBUF_HDR_SZ); + *(__u32 *)data_ptr = size; + + /* Synchronizes with smp_load_acquire() in __bpf_user_ringbuf_peek() in the kernel. */ + smp_store_release(producer_pos_ptr, producer_pos + BPF_RINGBUF_HDR_SZ); + + drain_current_samples(); + ASSERT_EQ(skel->bss->read, 0, "num_samples_after_bad_sample"); + ASSERT_EQ(skel->bss->err, err, "err_after_bad_sample"); + + ASSERT_OK(munmap(producer_pos_ptr, page_size), "unmap_producer_pos"); + ASSERT_OK(munmap(data_ptr, page_size), "unmap_data_ptr"); +} + +static void test_user_ringbuf_post_misaligned(void) +{ + struct user_ringbuf_success *skel; + struct user_ring_buffer *ringbuf; + int err; + __u32 size = (1 << 5) + 7; + + err = load_skel_create_user_ringbuf(&skel, &ringbuf); + if (!ASSERT_OK(err, "misaligned_skel")) + return; + + manually_write_test_invalid_sample(skel, size, size, -EINVAL); + user_ring_buffer__free(ringbuf); + user_ringbuf_success__destroy(skel); +} + +static void test_user_ringbuf_post_producer_wrong_offset(void) +{ + struct user_ringbuf_success *skel; + struct user_ring_buffer *ringbuf; + int err; + __u32 size = (1 << 5); + + err = load_skel_create_user_ringbuf(&skel, &ringbuf); + if (!ASSERT_OK(err, "wrong_offset_skel")) + return; + + manually_write_test_invalid_sample(skel, size, size - 8, -EINVAL); + user_ring_buffer__free(ringbuf); + user_ringbuf_success__destroy(skel); +} + +static void test_user_ringbuf_post_larger_than_ringbuf_sz(void) +{ + struct user_ringbuf_success *skel; + struct user_ring_buffer *ringbuf; + int err; + __u32 size = c_ringbuf_size; + + err = load_skel_create_user_ringbuf(&skel, &ringbuf); + if (!ASSERT_OK(err, "huge_sample_skel")) + return; + + manually_write_test_invalid_sample(skel, size, size, -E2BIG); + user_ring_buffer__free(ringbuf); + user_ringbuf_success__destroy(skel); +} + +static void test_user_ringbuf_basic(void) +{ + struct user_ringbuf_success *skel; + struct user_ring_buffer *ringbuf; + int err; + + err = load_skel_create_user_ringbuf(&skel, &ringbuf); + if (!ASSERT_OK(err, "ringbuf_basic_skel")) + return; + + ASSERT_EQ(skel->bss->read, 0, "num_samples_read_before"); + + err = write_samples(ringbuf, 2); + if (!ASSERT_OK(err, "write_samples")) + goto cleanup; + + ASSERT_EQ(skel->bss->read, 2, "num_samples_read_after"); + +cleanup: + user_ring_buffer__free(ringbuf); + user_ringbuf_success__destroy(skel); +} + +static void test_user_ringbuf_sample_full_ring_buffer(void) +{ + struct user_ringbuf_success *skel; + struct user_ring_buffer *ringbuf; + int err; + void *sample; + + err = load_skel_create_user_ringbuf(&skel, &ringbuf); + if (!ASSERT_OK(err, "ringbuf_full_sample_skel")) + return; + + sample = user_ring_buffer__reserve(ringbuf, c_ringbuf_size - BPF_RINGBUF_HDR_SZ); + if (!ASSERT_OK_PTR(sample, "full_sample")) + goto cleanup; + + user_ring_buffer__submit(ringbuf, sample); + ASSERT_EQ(skel->bss->read, 0, "num_samples_read_before"); + drain_current_samples(); + ASSERT_EQ(skel->bss->read, 1, "num_samples_read_after"); + +cleanup: + user_ring_buffer__free(ringbuf); + user_ringbuf_success__destroy(skel); +} + +static void test_user_ringbuf_post_alignment_autoadjust(void) +{ + struct user_ringbuf_success *skel; + struct user_ring_buffer *ringbuf; + struct sample *sample; + int err; + + err = load_skel_create_user_ringbuf(&skel, &ringbuf); + if (!ASSERT_OK(err, "ringbuf_align_autoadjust_skel")) + return; + + /* libbpf should automatically round any sample up to an 8-byte alignment. */ + sample = user_ring_buffer__reserve(ringbuf, sizeof(*sample) + 1); + ASSERT_OK_PTR(sample, "reserve_autoaligned"); + user_ring_buffer__submit(ringbuf, sample); + + ASSERT_EQ(skel->bss->read, 0, "num_samples_read_before"); + drain_current_samples(); + ASSERT_EQ(skel->bss->read, 1, "num_samples_read_after"); + + user_ring_buffer__free(ringbuf); + user_ringbuf_success__destroy(skel); +} + +static void test_user_ringbuf_overfill(void) +{ + struct user_ringbuf_success *skel; + struct user_ring_buffer *ringbuf; + int err; + + err = load_skel_create_user_ringbuf(&skel, &ringbuf); + if (err) + return; + + err = write_samples(ringbuf, c_max_entries * 5); + ASSERT_ERR(err, "write_samples"); + ASSERT_EQ(skel->bss->read, c_max_entries, "max_entries"); + + user_ring_buffer__free(ringbuf); + user_ringbuf_success__destroy(skel); +} + +static void test_user_ringbuf_discards_properly_ignored(void) +{ + struct user_ringbuf_success *skel; + struct user_ring_buffer *ringbuf; + int err, num_discarded = 0; + __u64 *token; + + err = load_skel_create_user_ringbuf(&skel, &ringbuf); + if (err) + return; + + ASSERT_EQ(skel->bss->read, 0, "num_samples_read_before"); + + while (1) { + /* Write samples until the buffer is full. */ + token = user_ring_buffer__reserve(ringbuf, sizeof(*token)); + if (!token) + break; + + user_ring_buffer__discard(ringbuf, token); + num_discarded++; + } + + if (!ASSERT_GE(num_discarded, 0, "num_discarded")) + goto cleanup; + + /* Should not read any samples, as they are all discarded. */ + ASSERT_EQ(skel->bss->read, 0, "num_pre_kick"); + drain_current_samples(); + ASSERT_EQ(skel->bss->read, 0, "num_post_kick"); + + /* Now that the ring buffer has been drained, we should be able to + * reserve another token. + */ + token = user_ring_buffer__reserve(ringbuf, sizeof(*token)); + + if (!ASSERT_OK_PTR(token, "new_token")) + goto cleanup; + + user_ring_buffer__discard(ringbuf, token); +cleanup: + user_ring_buffer__free(ringbuf); + user_ringbuf_success__destroy(skel); +} + +static void test_user_ringbuf_loop(void) +{ + struct user_ringbuf_success *skel; + struct user_ring_buffer *ringbuf; + uint32_t total_samples = 8192; + uint32_t remaining_samples = total_samples; + int err; + + BUILD_BUG_ON(total_samples <= c_max_entries); + err = load_skel_create_user_ringbuf(&skel, &ringbuf); + if (err) + return; + + do { + uint32_t curr_samples; + + curr_samples = remaining_samples > c_max_entries + ? c_max_entries : remaining_samples; + err = write_samples(ringbuf, curr_samples); + if (err != 0) { + /* Assert inside of if statement to avoid flooding logs + * on the success path. + */ + ASSERT_OK(err, "write_samples"); + goto cleanup; + } + + remaining_samples -= curr_samples; + ASSERT_EQ(skel->bss->read, total_samples - remaining_samples, + "current_batched_entries"); + } while (remaining_samples > 0); + ASSERT_EQ(skel->bss->read, total_samples, "total_batched_entries"); + +cleanup: + user_ring_buffer__free(ringbuf); + user_ringbuf_success__destroy(skel); +} + +static int send_test_message(struct user_ring_buffer *ringbuf, + enum test_msg_op op, s64 operand_64, + s32 operand_32) +{ + struct test_msg *msg; + + msg = user_ring_buffer__reserve(ringbuf, sizeof(*msg)); + if (!msg) { + /* Assert on the error path to avoid spamming logs with mostly + * success messages. + */ + ASSERT_OK_PTR(msg, "reserve_msg"); + return -ENOMEM; + } + + msg->msg_op = op; + + switch (op) { + case TEST_MSG_OP_INC64: + case TEST_MSG_OP_MUL64: + msg->operand_64 = operand_64; + break; + case TEST_MSG_OP_INC32: + case TEST_MSG_OP_MUL32: + msg->operand_32 = operand_32; + break; + default: + PRINT_FAIL("Invalid operand %d\n", op); + user_ring_buffer__discard(ringbuf, msg); + return -EINVAL; + } + + user_ring_buffer__submit(ringbuf, msg); + + return 0; +} + +static void kick_kernel_read_messages(void) +{ + syscall(__NR_prctl); +} + +static int handle_kernel_msg(void *ctx, void *data, size_t len) +{ + struct user_ringbuf_success *skel = ctx; + struct test_msg *msg = data; + + switch (msg->msg_op) { + case TEST_MSG_OP_INC64: + skel->bss->user_mutated += msg->operand_64; + return 0; + case TEST_MSG_OP_INC32: + skel->bss->user_mutated += msg->operand_32; + return 0; + case TEST_MSG_OP_MUL64: + skel->bss->user_mutated *= msg->operand_64; + return 0; + case TEST_MSG_OP_MUL32: + skel->bss->user_mutated *= msg->operand_32; + return 0; + default: + fprintf(stderr, "Invalid operand %d\n", msg->msg_op); + return -EINVAL; + } +} + +static void drain_kernel_messages_buffer(struct ring_buffer *kern_ringbuf, + struct user_ringbuf_success *skel) +{ + int cnt; + + cnt = ring_buffer__consume(kern_ringbuf); + ASSERT_EQ(cnt, 8, "consume_kern_ringbuf"); + ASSERT_OK(skel->bss->err, "consume_kern_ringbuf_err"); +} + +static void test_user_ringbuf_msg_protocol(void) +{ + struct user_ringbuf_success *skel; + struct user_ring_buffer *user_ringbuf; + struct ring_buffer *kern_ringbuf; + int err, i; + __u64 expected_kern = 0; + + err = load_skel_create_ringbufs(&skel, &kern_ringbuf, handle_kernel_msg, &user_ringbuf); + if (!ASSERT_OK(err, "create_ringbufs")) + return; + + for (i = 0; i < 64; i++) { + enum test_msg_op op = i % TEST_MSG_OP_NUM_OPS; + __u64 operand_64 = TEST_OP_64; + __u32 operand_32 = TEST_OP_32; + + err = send_test_message(user_ringbuf, op, operand_64, operand_32); + if (err) { + /* Only assert on a failure to avoid spamming success logs. */ + ASSERT_OK(err, "send_test_message"); + goto cleanup; + } + + switch (op) { + case TEST_MSG_OP_INC64: + expected_kern += operand_64; + break; + case TEST_MSG_OP_INC32: + expected_kern += operand_32; + break; + case TEST_MSG_OP_MUL64: + expected_kern *= operand_64; + break; + case TEST_MSG_OP_MUL32: + expected_kern *= operand_32; + break; + default: + PRINT_FAIL("Unexpected op %d\n", op); + goto cleanup; + } + + if (i % 8 == 0) { + kick_kernel_read_messages(); + ASSERT_EQ(skel->bss->kern_mutated, expected_kern, "expected_kern"); + ASSERT_EQ(skel->bss->err, 0, "bpf_prog_err"); + drain_kernel_messages_buffer(kern_ringbuf, skel); + } + } + +cleanup: + ring_buffer__free(kern_ringbuf); + user_ring_buffer__free(user_ringbuf); + user_ringbuf_success__destroy(skel); +} + +static void *kick_kernel_cb(void *arg) +{ + /* Kick the kernel, causing it to drain the ring buffer and then wake + * up the test thread waiting on epoll. + */ + syscall(__NR_getrlimit); + + return NULL; +} + +static int spawn_kick_thread_for_poll(void) +{ + pthread_t thread; + + return pthread_create(&thread, NULL, kick_kernel_cb, NULL); +} + +static void test_user_ringbuf_blocking_reserve(void) +{ + struct user_ringbuf_success *skel; + struct user_ring_buffer *ringbuf; + int err, num_written = 0; + __u64 *token; + + err = load_skel_create_user_ringbuf(&skel, &ringbuf); + if (err) + return; + + ASSERT_EQ(skel->bss->read, 0, "num_samples_read_before"); + + while (1) { + /* Write samples until the buffer is full. */ + token = user_ring_buffer__reserve(ringbuf, sizeof(*token)); + if (!token) + break; + + *token = 0xdeadbeef; + + user_ring_buffer__submit(ringbuf, token); + num_written++; + } + + if (!ASSERT_GE(num_written, 0, "num_written")) + goto cleanup; + + /* Should not have read any samples until the kernel is kicked. */ + ASSERT_EQ(skel->bss->read, 0, "num_pre_kick"); + + /* We correctly time out after 1 second, without a sample. */ + token = user_ring_buffer__reserve_blocking(ringbuf, sizeof(*token), 1000); + if (!ASSERT_EQ(token, NULL, "pre_kick_timeout_token")) + goto cleanup; + + err = spawn_kick_thread_for_poll(); + if (!ASSERT_EQ(err, 0, "deferred_kick_thread\n")) + goto cleanup; + + /* After spawning another thread that asychronously kicks the kernel to + * drain the messages, we're able to block and successfully get a + * sample once we receive an event notification. + */ + token = user_ring_buffer__reserve_blocking(ringbuf, sizeof(*token), 10000); + + if (!ASSERT_OK_PTR(token, "block_token")) + goto cleanup; + + ASSERT_GT(skel->bss->read, 0, "num_post_kill"); + ASSERT_LE(skel->bss->read, num_written, "num_post_kill"); + ASSERT_EQ(skel->bss->err, 0, "err_post_poll"); + user_ring_buffer__discard(ringbuf, token); + +cleanup: + user_ring_buffer__free(ringbuf); + user_ringbuf_success__destroy(skel); +} + +static struct { + const char *prog_name; + const char *expected_err_msg; +} failure_tests[] = { + /* failure cases */ + {"user_ringbuf_callback_bad_access1", "negative offset dynptr_ptr ptr"}, + {"user_ringbuf_callback_bad_access2", "dereference of modified dynptr_ptr ptr"}, + {"user_ringbuf_callback_write_forbidden", "invalid mem access 'dynptr_ptr'"}, + {"user_ringbuf_callback_null_context_write", "invalid mem access 'scalar'"}, + {"user_ringbuf_callback_null_context_read", "invalid mem access 'scalar'"}, + {"user_ringbuf_callback_discard_dynptr", "arg 1 is an unacquired reference"}, + {"user_ringbuf_callback_submit_dynptr", "arg 1 is an unacquired reference"}, + {"user_ringbuf_callback_invalid_return", "At callback return the register R0 has value"}, +}; + +#define SUCCESS_TEST(_func) { _func, #_func } + +static struct { + void (*test_callback)(void); + const char *test_name; +} success_tests[] = { + SUCCESS_TEST(test_user_ringbuf_mappings), + SUCCESS_TEST(test_user_ringbuf_post_misaligned), + SUCCESS_TEST(test_user_ringbuf_post_producer_wrong_offset), + SUCCESS_TEST(test_user_ringbuf_post_larger_than_ringbuf_sz), + SUCCESS_TEST(test_user_ringbuf_basic), + SUCCESS_TEST(test_user_ringbuf_sample_full_ring_buffer), + SUCCESS_TEST(test_user_ringbuf_post_alignment_autoadjust), + SUCCESS_TEST(test_user_ringbuf_overfill), + SUCCESS_TEST(test_user_ringbuf_discards_properly_ignored), + SUCCESS_TEST(test_user_ringbuf_loop), + SUCCESS_TEST(test_user_ringbuf_msg_protocol), + SUCCESS_TEST(test_user_ringbuf_blocking_reserve), +}; + +static void verify_fail(const char *prog_name, const char *expected_err_msg) +{ + LIBBPF_OPTS(bpf_object_open_opts, opts); + struct bpf_program *prog; + struct user_ringbuf_fail *skel; + int err; + + opts.kernel_log_buf = obj_log_buf; + opts.kernel_log_size = log_buf_sz; + opts.kernel_log_level = 1; + + skel = user_ringbuf_fail__open_opts(&opts); + if (!ASSERT_OK_PTR(skel, "dynptr_fail__open_opts")) + goto cleanup; + + prog = bpf_object__find_program_by_name(skel->obj, prog_name); + if (!ASSERT_OK_PTR(prog, "bpf_object__find_program_by_name")) + goto cleanup; + + bpf_program__set_autoload(prog, true); + + bpf_map__set_max_entries(skel->maps.user_ringbuf, getpagesize()); + + err = user_ringbuf_fail__load(skel); + if (!ASSERT_ERR(err, "unexpected load success")) + goto cleanup; + + if (!ASSERT_OK_PTR(strstr(obj_log_buf, expected_err_msg), "expected_err_msg")) { + fprintf(stderr, "Expected err_msg: %s\n", expected_err_msg); + fprintf(stderr, "Verifier output: %s\n", obj_log_buf); + } + +cleanup: + user_ringbuf_fail__destroy(skel); +} + +void test_user_ringbuf(void) +{ + int i; + + for (i = 0; i < ARRAY_SIZE(success_tests); i++) { + if (!test__start_subtest(success_tests[i].test_name)) + continue; + + success_tests[i].test_callback(); + } + + for (i = 0; i < ARRAY_SIZE(failure_tests); i++) { + if (!test__start_subtest(failure_tests[i].prog_name)) + continue; + + verify_fail(failure_tests[i].prog_name, failure_tests[i].expected_err_msg); + } +} diff --git a/tools/testing/selftests/bpf/prog_tests/verify_pkcs7_sig.c b/tools/testing/selftests/bpf/prog_tests/verify_pkcs7_sig.c new file mode 100644 index 000000000000..579d6ee83ce0 --- /dev/null +++ b/tools/testing/selftests/bpf/prog_tests/verify_pkcs7_sig.c @@ -0,0 +1,399 @@ +// SPDX-License-Identifier: GPL-2.0 + +/* + * Copyright (C) 2022 Huawei Technologies Duesseldorf GmbH + * + * Author: Roberto Sassu <roberto.sassu@huawei.com> + */ + +#include <stdio.h> +#include <errno.h> +#include <stdlib.h> +#include <unistd.h> +#include <endian.h> +#include <limits.h> +#include <sys/stat.h> +#include <sys/wait.h> +#include <sys/mman.h> +#include <linux/keyctl.h> +#include <test_progs.h> + +#include "test_verify_pkcs7_sig.skel.h" + +#define MAX_DATA_SIZE (1024 * 1024) +#define MAX_SIG_SIZE 1024 + +#define VERIFY_USE_SECONDARY_KEYRING (1UL) +#define VERIFY_USE_PLATFORM_KEYRING (2UL) + +/* In stripped ARM and x86-64 modules, ~ is surprisingly rare. */ +#define MODULE_SIG_STRING "~Module signature appended~\n" + +/* + * Module signature information block. + * + * The constituents of the signature section are, in order: + * + * - Signer's name + * - Key identifier + * - Signature data + * - Information block + */ +struct module_signature { + __u8 algo; /* Public-key crypto algorithm [0] */ + __u8 hash; /* Digest algorithm [0] */ + __u8 id_type; /* Key identifier type [PKEY_ID_PKCS7] */ + __u8 signer_len; /* Length of signer's name [0] */ + __u8 key_id_len; /* Length of key identifier [0] */ + __u8 __pad[3]; + __be32 sig_len; /* Length of signature data */ +}; + +struct data { + __u8 data[MAX_DATA_SIZE]; + __u32 data_len; + __u8 sig[MAX_SIG_SIZE]; + __u32 sig_len; +}; + +static bool kfunc_not_supported; + +static int libbpf_print_cb(enum libbpf_print_level level, const char *fmt, + va_list args) +{ + if (strcmp(fmt, "libbpf: extern (func ksym) '%s': not found in kernel or module BTFs\n")) + return 0; + + if (strcmp(va_arg(args, char *), "bpf_verify_pkcs7_signature")) + return 0; + + kfunc_not_supported = true; + return 0; +} + +static int _run_setup_process(const char *setup_dir, const char *cmd) +{ + int child_pid, child_status; + + child_pid = fork(); + if (child_pid == 0) { + execlp("./verify_sig_setup.sh", "./verify_sig_setup.sh", cmd, + setup_dir, NULL); + exit(errno); + + } else if (child_pid > 0) { + waitpid(child_pid, &child_status, 0); + return WEXITSTATUS(child_status); + } + + return -EINVAL; +} + +static int populate_data_item_str(const char *tmp_dir, struct data *data_item) +{ + struct stat st; + char data_template[] = "/tmp/dataXXXXXX"; + char path[PATH_MAX]; + int ret, fd, child_status, child_pid; + + data_item->data_len = 4; + memcpy(data_item->data, "test", data_item->data_len); + + fd = mkstemp(data_template); + if (fd == -1) + return -errno; + + ret = write(fd, data_item->data, data_item->data_len); + + close(fd); + + if (ret != data_item->data_len) { + ret = -EIO; + goto out; + } + + child_pid = fork(); + + if (child_pid == -1) { + ret = -errno; + goto out; + } + + if (child_pid == 0) { + snprintf(path, sizeof(path), "%s/signing_key.pem", tmp_dir); + + return execlp("./sign-file", "./sign-file", "-d", "sha256", + path, path, data_template, NULL); + } + + waitpid(child_pid, &child_status, 0); + + ret = WEXITSTATUS(child_status); + if (ret) + goto out; + + snprintf(path, sizeof(path), "%s.p7s", data_template); + + ret = stat(path, &st); + if (ret == -1) { + ret = -errno; + goto out; + } + + if (st.st_size > sizeof(data_item->sig)) { + ret = -EINVAL; + goto out_sig; + } + + data_item->sig_len = st.st_size; + + fd = open(path, O_RDONLY); + if (fd == -1) { + ret = -errno; + goto out_sig; + } + + ret = read(fd, data_item->sig, data_item->sig_len); + + close(fd); + + if (ret != data_item->sig_len) { + ret = -EIO; + goto out_sig; + } + + ret = 0; +out_sig: + unlink(path); +out: + unlink(data_template); + return ret; +} + +static int populate_data_item_mod(struct data *data_item) +{ + char mod_path[PATH_MAX], *mod_path_ptr; + struct stat st; + void *mod; + FILE *fp; + struct module_signature ms; + int ret, fd, modlen, marker_len, sig_len; + + data_item->data_len = 0; + + if (stat("/lib/modules", &st) == -1) + return 0; + + /* Requires CONFIG_TCP_CONG_BIC=m. */ + fp = popen("find /lib/modules/$(uname -r) -name tcp_bic.ko", "r"); + if (!fp) + return 0; + + mod_path_ptr = fgets(mod_path, sizeof(mod_path), fp); + pclose(fp); + + if (!mod_path_ptr) + return 0; + + mod_path_ptr = strchr(mod_path, '\n'); + if (!mod_path_ptr) + return 0; + + *mod_path_ptr = '\0'; + + if (stat(mod_path, &st) == -1) + return 0; + + modlen = st.st_size; + marker_len = sizeof(MODULE_SIG_STRING) - 1; + + fd = open(mod_path, O_RDONLY); + if (fd == -1) + return -errno; + + mod = mmap(NULL, st.st_size, PROT_READ, MAP_PRIVATE, fd, 0); + + close(fd); + + if (mod == MAP_FAILED) + return -errno; + + if (strncmp(mod + modlen - marker_len, MODULE_SIG_STRING, marker_len)) { + ret = -EINVAL; + goto out; + } + + modlen -= marker_len; + + memcpy(&ms, mod + (modlen - sizeof(ms)), sizeof(ms)); + + sig_len = __be32_to_cpu(ms.sig_len); + modlen -= sig_len + sizeof(ms); + + if (modlen > sizeof(data_item->data)) { + ret = -E2BIG; + goto out; + } + + memcpy(data_item->data, mod, modlen); + data_item->data_len = modlen; + + if (sig_len > sizeof(data_item->sig)) { + ret = -E2BIG; + goto out; + } + + memcpy(data_item->sig, mod + modlen, sig_len); + data_item->sig_len = sig_len; + ret = 0; +out: + munmap(mod, st.st_size); + return ret; +} + +void test_verify_pkcs7_sig(void) +{ + libbpf_print_fn_t old_print_cb; + char tmp_dir_template[] = "/tmp/verify_sigXXXXXX"; + char *tmp_dir; + struct test_verify_pkcs7_sig *skel = NULL; + struct bpf_map *map; + struct data data; + int ret, zero = 0; + + /* Trigger creation of session keyring. */ + syscall(__NR_request_key, "keyring", "_uid.0", NULL, + KEY_SPEC_SESSION_KEYRING); + + tmp_dir = mkdtemp(tmp_dir_template); + if (!ASSERT_OK_PTR(tmp_dir, "mkdtemp")) + return; + + ret = _run_setup_process(tmp_dir, "setup"); + if (!ASSERT_OK(ret, "_run_setup_process")) + goto close_prog; + + skel = test_verify_pkcs7_sig__open(); + if (!ASSERT_OK_PTR(skel, "test_verify_pkcs7_sig__open")) + goto close_prog; + + old_print_cb = libbpf_set_print(libbpf_print_cb); + ret = test_verify_pkcs7_sig__load(skel); + libbpf_set_print(old_print_cb); + + if (ret < 0 && kfunc_not_supported) { + printf( + "%s:SKIP:bpf_verify_pkcs7_signature() kfunc not supported\n", + __func__); + test__skip(); + goto close_prog; + } + + if (!ASSERT_OK(ret, "test_verify_pkcs7_sig__load")) + goto close_prog; + + ret = test_verify_pkcs7_sig__attach(skel); + if (!ASSERT_OK(ret, "test_verify_pkcs7_sig__attach")) + goto close_prog; + + map = bpf_object__find_map_by_name(skel->obj, "data_input"); + if (!ASSERT_OK_PTR(map, "data_input not found")) + goto close_prog; + + skel->bss->monitored_pid = getpid(); + + /* Test without data and signature. */ + skel->bss->user_keyring_serial = KEY_SPEC_SESSION_KEYRING; + + ret = bpf_map_update_elem(bpf_map__fd(map), &zero, &data, BPF_ANY); + if (!ASSERT_LT(ret, 0, "bpf_map_update_elem data_input")) + goto close_prog; + + /* Test successful signature verification with session keyring. */ + ret = populate_data_item_str(tmp_dir, &data); + if (!ASSERT_OK(ret, "populate_data_item_str")) + goto close_prog; + + ret = bpf_map_update_elem(bpf_map__fd(map), &zero, &data, BPF_ANY); + if (!ASSERT_OK(ret, "bpf_map_update_elem data_input")) + goto close_prog; + + /* Test successful signature verification with testing keyring. */ + skel->bss->user_keyring_serial = syscall(__NR_request_key, "keyring", + "ebpf_testing_keyring", NULL, + KEY_SPEC_SESSION_KEYRING); + + ret = bpf_map_update_elem(bpf_map__fd(map), &zero, &data, BPF_ANY); + if (!ASSERT_OK(ret, "bpf_map_update_elem data_input")) + goto close_prog; + + /* + * Ensure key_task_permission() is called and rejects the keyring + * (no Search permission). + */ + syscall(__NR_keyctl, KEYCTL_SETPERM, skel->bss->user_keyring_serial, + 0x37373737); + + ret = bpf_map_update_elem(bpf_map__fd(map), &zero, &data, BPF_ANY); + if (!ASSERT_LT(ret, 0, "bpf_map_update_elem data_input")) + goto close_prog; + + syscall(__NR_keyctl, KEYCTL_SETPERM, skel->bss->user_keyring_serial, + 0x3f3f3f3f); + + /* + * Ensure key_validate() is called and rejects the keyring (key expired) + */ + syscall(__NR_keyctl, KEYCTL_SET_TIMEOUT, + skel->bss->user_keyring_serial, 1); + sleep(1); + + ret = bpf_map_update_elem(bpf_map__fd(map), &zero, &data, BPF_ANY); + if (!ASSERT_LT(ret, 0, "bpf_map_update_elem data_input")) + goto close_prog; + + skel->bss->user_keyring_serial = KEY_SPEC_SESSION_KEYRING; + + /* Test with corrupted data (signature verification should fail). */ + data.data[0] = 'a'; + ret = bpf_map_update_elem(bpf_map__fd(map), &zero, &data, BPF_ANY); + if (!ASSERT_LT(ret, 0, "bpf_map_update_elem data_input")) + goto close_prog; + + ret = populate_data_item_mod(&data); + if (!ASSERT_OK(ret, "populate_data_item_mod")) + goto close_prog; + + /* Test signature verification with system keyrings. */ + if (data.data_len) { + skel->bss->user_keyring_serial = 0; + skel->bss->system_keyring_id = 0; + + ret = bpf_map_update_elem(bpf_map__fd(map), &zero, &data, + BPF_ANY); + if (!ASSERT_OK(ret, "bpf_map_update_elem data_input")) + goto close_prog; + + skel->bss->system_keyring_id = VERIFY_USE_SECONDARY_KEYRING; + + ret = bpf_map_update_elem(bpf_map__fd(map), &zero, &data, + BPF_ANY); + if (!ASSERT_OK(ret, "bpf_map_update_elem data_input")) + goto close_prog; + + skel->bss->system_keyring_id = VERIFY_USE_PLATFORM_KEYRING; + + ret = bpf_map_update_elem(bpf_map__fd(map), &zero, &data, + BPF_ANY); + ASSERT_LT(ret, 0, "bpf_map_update_elem data_input"); + } + +close_prog: + _run_setup_process(tmp_dir, "cleanup"); + + if (!skel) + return; + + skel->bss->monitored_pid = 0; + test_verify_pkcs7_sig__destroy(skel); +} diff --git a/tools/testing/selftests/bpf/progs/bpf_dctcp.c b/tools/testing/selftests/bpf/progs/bpf_dctcp.c index 9573be6122be..460682759aed 100644 --- a/tools/testing/selftests/bpf/progs/bpf_dctcp.c +++ b/tools/testing/selftests/bpf/progs/bpf_dctcp.c @@ -11,6 +11,7 @@ #include <linux/types.h> #include <linux/stddef.h> #include <linux/tcp.h> +#include <errno.h> #include <bpf/bpf_helpers.h> #include <bpf/bpf_tracing.h> #include "bpf_tcp_helpers.h" @@ -23,6 +24,7 @@ const char tcp_cdg[] = "cdg"; char cc_res[TCP_CA_NAME_MAX]; int tcp_cdg_res = 0; int stg_result = 0; +int ebusy_cnt = 0; struct { __uint(type, BPF_MAP_TYPE_SK_STORAGE); @@ -64,16 +66,23 @@ void BPF_PROG(dctcp_init, struct sock *sk) if (!(tp->ecn_flags & TCP_ECN_OK) && fallback[0]) { /* Switch to fallback */ - bpf_setsockopt(sk, SOL_TCP, TCP_CONGESTION, - (void *)fallback, sizeof(fallback)); - /* Switch back to myself which the bpf trampoline - * stopped calling dctcp_init recursively. + if (bpf_setsockopt(sk, SOL_TCP, TCP_CONGESTION, + (void *)fallback, sizeof(fallback)) == -EBUSY) + ebusy_cnt++; + + /* Switch back to myself and the recurred dctcp_init() + * will get -EBUSY for all bpf_setsockopt(TCP_CONGESTION), + * except the last "cdg" one. */ - bpf_setsockopt(sk, SOL_TCP, TCP_CONGESTION, - (void *)bpf_dctcp, sizeof(bpf_dctcp)); + if (bpf_setsockopt(sk, SOL_TCP, TCP_CONGESTION, + (void *)bpf_dctcp, sizeof(bpf_dctcp)) == -EBUSY) + ebusy_cnt++; + /* Switch back to fallback */ - bpf_setsockopt(sk, SOL_TCP, TCP_CONGESTION, - (void *)fallback, sizeof(fallback)); + if (bpf_setsockopt(sk, SOL_TCP, TCP_CONGESTION, + (void *)fallback, sizeof(fallback)) == -EBUSY) + ebusy_cnt++; + /* Expecting -ENOTSUPP for tcp_cdg_res */ tcp_cdg_res = bpf_setsockopt(sk, SOL_TCP, TCP_CONGESTION, (void *)tcp_cdg, sizeof(tcp_cdg)); diff --git a/tools/testing/selftests/bpf/progs/bpf_iter_task.c b/tools/testing/selftests/bpf/progs/bpf_iter_task.c index d22741272692..96131b9a1caa 100644 --- a/tools/testing/selftests/bpf/progs/bpf_iter_task.c +++ b/tools/testing/selftests/bpf/progs/bpf_iter_task.c @@ -6,6 +6,10 @@ char _license[] SEC("license") = "GPL"; +uint32_t tid = 0; +int num_unknown_tid = 0; +int num_known_tid = 0; + SEC("iter/task") int dump_task(struct bpf_iter__task *ctx) { @@ -18,6 +22,11 @@ int dump_task(struct bpf_iter__task *ctx) return 0; } + if (task->pid != tid) + num_unknown_tid++; + else + num_known_tid++; + if (ctx->meta->seq_num == 0) BPF_SEQ_PRINTF(seq, " tgid gid\n"); diff --git a/tools/testing/selftests/bpf/progs/bpf_iter_task_file.c b/tools/testing/selftests/bpf/progs/bpf_iter_task_file.c index 6e7b400888fe..b0255080662d 100644 --- a/tools/testing/selftests/bpf/progs/bpf_iter_task_file.c +++ b/tools/testing/selftests/bpf/progs/bpf_iter_task_file.c @@ -7,14 +7,16 @@ char _license[] SEC("license") = "GPL"; int count = 0; int tgid = 0; +int last_tgid = 0; +int unique_tgid_count = 0; SEC("iter/task_file") int dump_task_file(struct bpf_iter__task_file *ctx) { struct seq_file *seq = ctx->meta->seq; struct task_struct *task = ctx->task; - __u32 fd = ctx->fd; struct file *file = ctx->file; + __u32 fd = ctx->fd; if (task == (void *)0 || file == (void *)0) return 0; @@ -27,6 +29,11 @@ int dump_task_file(struct bpf_iter__task_file *ctx) if (tgid == task->tgid && task->tgid != task->pid) count++; + if (last_tgid != task->tgid) { + last_tgid = task->tgid; + unique_tgid_count++; + } + BPF_SEQ_PRINTF(seq, "%8d %8d %8d %lx\n", task->tgid, task->pid, fd, (long)file->f_op); return 0; diff --git a/tools/testing/selftests/bpf/progs/bpf_iter_task_vma.c b/tools/testing/selftests/bpf/progs/bpf_iter_task_vma.c index 4ea6a37d1345..dd923dc637d5 100644 --- a/tools/testing/selftests/bpf/progs/bpf_iter_task_vma.c +++ b/tools/testing/selftests/bpf/progs/bpf_iter_task_vma.c @@ -20,6 +20,8 @@ char _license[] SEC("license") = "GPL"; #define D_PATH_BUF_SIZE 1024 char d_path_buf[D_PATH_BUF_SIZE] = {}; __u32 pid = 0; +__u32 one_task = 0; +__u32 one_task_error = 0; SEC("iter/task_vma") int proc_maps(struct bpf_iter__task_vma *ctx) { @@ -33,8 +35,11 @@ SEC("iter/task_vma") int proc_maps(struct bpf_iter__task_vma *ctx) return 0; file = vma->vm_file; - if (task->tgid != pid) + if (task->tgid != pid) { + if (one_task) + one_task_error = 1; return 0; + } perm_str[0] = (vma->vm_flags & VM_READ) ? 'r' : '-'; perm_str[1] = (vma->vm_flags & VM_WRITE) ? 'w' : '-'; perm_str[2] = (vma->vm_flags & VM_EXEC) ? 'x' : '-'; diff --git a/tools/testing/selftests/bpf/progs/bpf_iter_vma_offset.c b/tools/testing/selftests/bpf/progs/bpf_iter_vma_offset.c new file mode 100644 index 000000000000..ee7455d2623a --- /dev/null +++ b/tools/testing/selftests/bpf/progs/bpf_iter_vma_offset.c @@ -0,0 +1,37 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2022 Meta Platforms, Inc. and affiliates. */ +#include "bpf_iter.h" +#include <bpf/bpf_helpers.h> + +char _license[] SEC("license") = "GPL"; + +__u32 unique_tgid_cnt = 0; +uintptr_t address = 0; +uintptr_t offset = 0; +__u32 last_tgid = 0; +__u32 pid = 0; +__u32 page_shift = 0; + +SEC("iter/task_vma") +int get_vma_offset(struct bpf_iter__task_vma *ctx) +{ + struct vm_area_struct *vma = ctx->vma; + struct seq_file *seq = ctx->meta->seq; + struct task_struct *task = ctx->task; + + if (task == NULL || vma == NULL) + return 0; + + if (last_tgid != task->tgid) + unique_tgid_cnt++; + last_tgid = task->tgid; + + if (task->tgid != pid) + return 0; + + if (vma->vm_start <= address && vma->vm_end > address) { + offset = address - vma->vm_start + (vma->vm_pgoff << page_shift); + BPF_SEQ_PRINTF(seq, "OK\n"); + } + return 0; +} diff --git a/tools/testing/selftests/bpf/progs/cgroup_hierarchical_stats.c b/tools/testing/selftests/bpf/progs/cgroup_hierarchical_stats.c index 8ab4253a1592..c74362854948 100644 --- a/tools/testing/selftests/bpf/progs/cgroup_hierarchical_stats.c +++ b/tools/testing/selftests/bpf/progs/cgroup_hierarchical_stats.c @@ -1,7 +1,5 @@ // SPDX-License-Identifier: GPL-2.0-only /* - * Functions to manage eBPF programs attached to cgroup subsystems - * * Copyright 2022 Google LLC. */ #include "vmlinux.h" @@ -11,25 +9,14 @@ char _license[] SEC("license") = "GPL"; -/* - * Start times are stored per-task, not per-cgroup, as multiple tasks in one - * cgroup can perform reclaim concurrently. - */ -struct { - __uint(type, BPF_MAP_TYPE_TASK_STORAGE); - __uint(map_flags, BPF_F_NO_PREALLOC); - __type(key, int); - __type(value, __u64); -} vmscan_start_time SEC(".maps"); - -struct vmscan_percpu { +struct percpu_attach_counter { /* Previous percpu state, to figure out if we have new updates */ __u64 prev; /* Current percpu state */ __u64 state; }; -struct vmscan { +struct attach_counter { /* State propagated through children, pending aggregation */ __u64 pending; /* Total state, including all cpus and all children */ @@ -38,147 +25,94 @@ struct vmscan { struct { __uint(type, BPF_MAP_TYPE_PERCPU_HASH); - __uint(max_entries, 100); + __uint(max_entries, 1024); __type(key, __u64); - __type(value, struct vmscan_percpu); -} pcpu_cgroup_vmscan_elapsed SEC(".maps"); + __type(value, struct percpu_attach_counter); +} percpu_attach_counters SEC(".maps"); struct { __uint(type, BPF_MAP_TYPE_HASH); - __uint(max_entries, 100); + __uint(max_entries, 1024); __type(key, __u64); - __type(value, struct vmscan); -} cgroup_vmscan_elapsed SEC(".maps"); + __type(value, struct attach_counter); +} attach_counters SEC(".maps"); extern void cgroup_rstat_updated(struct cgroup *cgrp, int cpu) __ksym; extern void cgroup_rstat_flush(struct cgroup *cgrp) __ksym; -static struct cgroup *task_memcg(struct task_struct *task) -{ - int cgrp_id; - -#if __has_builtin(__builtin_preserve_enum_value) - cgrp_id = bpf_core_enum_value(enum cgroup_subsys_id, memory_cgrp_id); -#else - cgrp_id = memory_cgrp_id; -#endif - return task->cgroups->subsys[cgrp_id]->cgroup; -} - static uint64_t cgroup_id(struct cgroup *cgrp) { return cgrp->kn->id; } -static int create_vmscan_percpu_elem(__u64 cg_id, __u64 state) +static int create_percpu_attach_counter(__u64 cg_id, __u64 state) { - struct vmscan_percpu pcpu_init = {.state = state, .prev = 0}; + struct percpu_attach_counter pcpu_init = {.state = state, .prev = 0}; - return bpf_map_update_elem(&pcpu_cgroup_vmscan_elapsed, &cg_id, + return bpf_map_update_elem(&percpu_attach_counters, &cg_id, &pcpu_init, BPF_NOEXIST); } -static int create_vmscan_elem(__u64 cg_id, __u64 state, __u64 pending) +static int create_attach_counter(__u64 cg_id, __u64 state, __u64 pending) { - struct vmscan init = {.state = state, .pending = pending}; + struct attach_counter init = {.state = state, .pending = pending}; - return bpf_map_update_elem(&cgroup_vmscan_elapsed, &cg_id, + return bpf_map_update_elem(&attach_counters, &cg_id, &init, BPF_NOEXIST); } -SEC("tp_btf/mm_vmscan_memcg_reclaim_begin") -int BPF_PROG(vmscan_start, int order, gfp_t gfp_flags) +SEC("fentry/cgroup_attach_task") +int BPF_PROG(counter, struct cgroup *dst_cgrp, struct task_struct *leader, + bool threadgroup) { - struct task_struct *task = bpf_get_current_task_btf(); - __u64 *start_time_ptr; - - start_time_ptr = bpf_task_storage_get(&vmscan_start_time, task, 0, - BPF_LOCAL_STORAGE_GET_F_CREATE); - if (start_time_ptr) - *start_time_ptr = bpf_ktime_get_ns(); - return 0; -} - -SEC("tp_btf/mm_vmscan_memcg_reclaim_end") -int BPF_PROG(vmscan_end, unsigned long nr_reclaimed) -{ - struct vmscan_percpu *pcpu_stat; - struct task_struct *current = bpf_get_current_task_btf(); - struct cgroup *cgrp; - __u64 *start_time_ptr; - __u64 current_elapsed, cg_id; - __u64 end_time = bpf_ktime_get_ns(); - - /* - * cgrp is the first parent cgroup of current that has memcg enabled in - * its subtree_control, or NULL if memcg is disabled in the entire tree. - * In a cgroup hierarchy like this: - * a - * / \ - * b c - * If "a" has memcg enabled, while "b" doesn't, then processes in "b" - * will accumulate their stats directly to "a". This makes sure that no - * stats are lost from processes in leaf cgroups that don't have memcg - * enabled, but only exposes stats for cgroups that have memcg enabled. - */ - cgrp = task_memcg(current); - if (!cgrp) + __u64 cg_id = cgroup_id(dst_cgrp); + struct percpu_attach_counter *pcpu_counter = bpf_map_lookup_elem( + &percpu_attach_counters, + &cg_id); + + if (pcpu_counter) + pcpu_counter->state += 1; + else if (create_percpu_attach_counter(cg_id, 1)) return 0; - cg_id = cgroup_id(cgrp); - start_time_ptr = bpf_task_storage_get(&vmscan_start_time, current, 0, - BPF_LOCAL_STORAGE_GET_F_CREATE); - if (!start_time_ptr) - return 0; - - current_elapsed = end_time - *start_time_ptr; - pcpu_stat = bpf_map_lookup_elem(&pcpu_cgroup_vmscan_elapsed, - &cg_id); - if (pcpu_stat) - pcpu_stat->state += current_elapsed; - else if (create_vmscan_percpu_elem(cg_id, current_elapsed)) - return 0; - - cgroup_rstat_updated(cgrp, bpf_get_smp_processor_id()); + cgroup_rstat_updated(dst_cgrp, bpf_get_smp_processor_id()); return 0; } SEC("fentry/bpf_rstat_flush") -int BPF_PROG(vmscan_flush, struct cgroup *cgrp, struct cgroup *parent, int cpu) +int BPF_PROG(flusher, struct cgroup *cgrp, struct cgroup *parent, int cpu) { - struct vmscan_percpu *pcpu_stat; - struct vmscan *total_stat, *parent_stat; + struct percpu_attach_counter *pcpu_counter; + struct attach_counter *total_counter, *parent_counter; __u64 cg_id = cgroup_id(cgrp); __u64 parent_cg_id = parent ? cgroup_id(parent) : 0; - __u64 *pcpu_vmscan; __u64 state; __u64 delta = 0; /* Add CPU changes on this level since the last flush */ - pcpu_stat = bpf_map_lookup_percpu_elem(&pcpu_cgroup_vmscan_elapsed, - &cg_id, cpu); - if (pcpu_stat) { - state = pcpu_stat->state; - delta += state - pcpu_stat->prev; - pcpu_stat->prev = state; + pcpu_counter = bpf_map_lookup_percpu_elem(&percpu_attach_counters, + &cg_id, cpu); + if (pcpu_counter) { + state = pcpu_counter->state; + delta += state - pcpu_counter->prev; + pcpu_counter->prev = state; } - total_stat = bpf_map_lookup_elem(&cgroup_vmscan_elapsed, &cg_id); - if (!total_stat) { - if (create_vmscan_elem(cg_id, delta, 0)) + total_counter = bpf_map_lookup_elem(&attach_counters, &cg_id); + if (!total_counter) { + if (create_attach_counter(cg_id, delta, 0)) return 0; - goto update_parent; } /* Collect pending stats from subtree */ - if (total_stat->pending) { - delta += total_stat->pending; - total_stat->pending = 0; + if (total_counter->pending) { + delta += total_counter->pending; + total_counter->pending = 0; } /* Propagate changes to this cgroup's total */ - total_stat->state += delta; + total_counter->state += delta; update_parent: /* Skip if there are no changes to propagate, or no parent */ @@ -186,20 +120,20 @@ update_parent: return 0; /* Propagate changes to cgroup's parent */ - parent_stat = bpf_map_lookup_elem(&cgroup_vmscan_elapsed, - &parent_cg_id); - if (parent_stat) - parent_stat->pending += delta; + parent_counter = bpf_map_lookup_elem(&attach_counters, + &parent_cg_id); + if (parent_counter) + parent_counter->pending += delta; else - create_vmscan_elem(parent_cg_id, 0, delta); + create_attach_counter(parent_cg_id, 0, delta); return 0; } SEC("iter.s/cgroup") -int BPF_PROG(dump_vmscan, struct bpf_iter_meta *meta, struct cgroup *cgrp) +int BPF_PROG(dumper, struct bpf_iter_meta *meta, struct cgroup *cgrp) { struct seq_file *seq = meta->seq; - struct vmscan *total_stat; + struct attach_counter *total_counter; __u64 cg_id = cgrp ? cgroup_id(cgrp) : 0; /* Do nothing for the terminal call */ @@ -209,18 +143,13 @@ int BPF_PROG(dump_vmscan, struct bpf_iter_meta *meta, struct cgroup *cgrp) /* Flush the stats to make sure we get the most updated numbers */ cgroup_rstat_flush(cgrp); - total_stat = bpf_map_lookup_elem(&cgroup_vmscan_elapsed, &cg_id); - if (!total_stat) { - BPF_SEQ_PRINTF(seq, "cg_id: %llu, total_vmscan_delay: 0\n", + total_counter = bpf_map_lookup_elem(&attach_counters, &cg_id); + if (!total_counter) { + BPF_SEQ_PRINTF(seq, "cg_id: %llu, attach_counter: 0\n", cg_id); } else { - BPF_SEQ_PRINTF(seq, "cg_id: %llu, total_vmscan_delay: %llu\n", - cg_id, total_stat->state); + BPF_SEQ_PRINTF(seq, "cg_id: %llu, attach_counter: %llu\n", + cg_id, total_counter->state); } - - /* - * We only dump stats for one cgroup here, so return 1 to stop - * iteration after the first cgroup. - */ - return 1; + return 0; } diff --git a/tools/testing/selftests/bpf/progs/connect_ping.c b/tools/testing/selftests/bpf/progs/connect_ping.c new file mode 100644 index 000000000000..60178192b672 --- /dev/null +++ b/tools/testing/selftests/bpf/progs/connect_ping.c @@ -0,0 +1,53 @@ +// SPDX-License-Identifier: GPL-2.0-only + +/* + * Copyright 2022 Google LLC. + */ + +#include <linux/bpf.h> +#include <bpf/bpf_helpers.h> +#include <bpf/bpf_endian.h> +#include <netinet/in.h> +#include <sys/socket.h> + +/* 2001:db8::1 */ +#define BINDADDR_V6 { { { 0x20,0x01,0x0d,0xb8,0,0,0,0,0,0,0,0,0,0,0,1 } } } + +__u32 do_bind = 0; +__u32 has_error = 0; +__u32 invocations_v4 = 0; +__u32 invocations_v6 = 0; + +SEC("cgroup/connect4") +int connect_v4_prog(struct bpf_sock_addr *ctx) +{ + struct sockaddr_in sa = { + .sin_family = AF_INET, + .sin_addr.s_addr = bpf_htonl(0x01010101), + }; + + __sync_fetch_and_add(&invocations_v4, 1); + + if (do_bind && bpf_bind(ctx, (struct sockaddr *)&sa, sizeof(sa))) + has_error = 1; + + return 1; +} + +SEC("cgroup/connect6") +int connect_v6_prog(struct bpf_sock_addr *ctx) +{ + struct sockaddr_in6 sa = { + .sin6_family = AF_INET6, + .sin6_addr = BINDADDR_V6, + }; + + __sync_fetch_and_add(&invocations_v6, 1); + + if (do_bind && bpf_bind(ctx, (struct sockaddr *)&sa, sizeof(sa))) + has_error = 1; + + return 1; +} + +char _license[] SEC("license") = "GPL"; diff --git a/tools/testing/selftests/bpf/progs/get_func_ip_test.c b/tools/testing/selftests/bpf/progs/get_func_ip_test.c index a587aeca5ae0..8559e698b40d 100644 --- a/tools/testing/selftests/bpf/progs/get_func_ip_test.c +++ b/tools/testing/selftests/bpf/progs/get_func_ip_test.c @@ -2,6 +2,7 @@ #include <linux/bpf.h> #include <bpf/bpf_helpers.h> #include <bpf/bpf_tracing.h> +#include <stdbool.h> char _license[] SEC("license") = "GPL"; @@ -13,6 +14,16 @@ extern const void bpf_modify_return_test __ksym; extern const void bpf_fentry_test6 __ksym; extern const void bpf_fentry_test7 __ksym; +extern bool CONFIG_X86_KERNEL_IBT __kconfig __weak; + +/* This function is here to have CONFIG_X86_KERNEL_IBT + * used and added to object BTF. + */ +int unused(void) +{ + return CONFIG_X86_KERNEL_IBT ? 0 : 1; +} + __u64 test1_result = 0; SEC("fentry/bpf_fentry_test1") int BPF_PROG(test1, int a) @@ -64,21 +75,11 @@ int BPF_PROG(test5, int a, int *b, int ret) } __u64 test6_result = 0; -SEC("kprobe/bpf_fentry_test6+0x5") +SEC("?kprobe") int test6(struct pt_regs *ctx) { __u64 addr = bpf_get_func_ip(ctx); - test6_result = (const void *) addr == &bpf_fentry_test6 + 5; - return 0; -} - -__u64 test7_result = 0; -SEC("kprobe/bpf_fentry_test7+5") -int test7(struct pt_regs *ctx) -{ - __u64 addr = bpf_get_func_ip(ctx); - - test7_result = (const void *) addr == &bpf_fentry_test7 + 5; + test6_result = (const void *) addr == 0; return 0; } diff --git a/tools/testing/selftests/bpf/progs/kfunc_call_fail.c b/tools/testing/selftests/bpf/progs/kfunc_call_fail.c new file mode 100644 index 000000000000..b98313d391c6 --- /dev/null +++ b/tools/testing/selftests/bpf/progs/kfunc_call_fail.c @@ -0,0 +1,160 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2021 Facebook */ +#include <vmlinux.h> +#include <bpf/bpf_helpers.h> + +extern struct prog_test_ref_kfunc *bpf_kfunc_call_test_acquire(unsigned long *sp) __ksym; +extern void bpf_kfunc_call_test_release(struct prog_test_ref_kfunc *p) __ksym; +extern void bpf_kfunc_call_test_mem_len_pass1(void *mem, int len) __ksym; +extern int *bpf_kfunc_call_test_get_rdwr_mem(struct prog_test_ref_kfunc *p, const int rdwr_buf_size) __ksym; +extern int *bpf_kfunc_call_test_get_rdonly_mem(struct prog_test_ref_kfunc *p, const int rdonly_buf_size) __ksym; +extern int *bpf_kfunc_call_test_acq_rdonly_mem(struct prog_test_ref_kfunc *p, const int rdonly_buf_size) __ksym; +extern void bpf_kfunc_call_int_mem_release(int *p) __ksym; + +struct syscall_test_args { + __u8 data[16]; + size_t size; +}; + +SEC("?syscall") +int kfunc_syscall_test_fail(struct syscall_test_args *args) +{ + bpf_kfunc_call_test_mem_len_pass1(&args->data, sizeof(*args) + 1); + + return 0; +} + +SEC("?syscall") +int kfunc_syscall_test_null_fail(struct syscall_test_args *args) +{ + /* Must be called with args as a NULL pointer + * we do not check for it to have the verifier consider that + * the pointer might not be null, and so we can load it. + * + * So the following can not be added: + * + * if (args) + * return -22; + */ + + bpf_kfunc_call_test_mem_len_pass1(args, sizeof(*args)); + + return 0; +} + +SEC("?tc") +int kfunc_call_test_get_mem_fail_rdonly(struct __sk_buff *skb) +{ + struct prog_test_ref_kfunc *pt; + unsigned long s = 0; + int *p = NULL; + int ret = 0; + + pt = bpf_kfunc_call_test_acquire(&s); + if (pt) { + p = bpf_kfunc_call_test_get_rdonly_mem(pt, 2 * sizeof(int)); + if (p) + p[0] = 42; /* this is a read-only buffer, so -EACCES */ + else + ret = -1; + + bpf_kfunc_call_test_release(pt); + } + return ret; +} + +SEC("?tc") +int kfunc_call_test_get_mem_fail_use_after_free(struct __sk_buff *skb) +{ + struct prog_test_ref_kfunc *pt; + unsigned long s = 0; + int *p = NULL; + int ret = 0; + + pt = bpf_kfunc_call_test_acquire(&s); + if (pt) { + p = bpf_kfunc_call_test_get_rdwr_mem(pt, 2 * sizeof(int)); + if (p) { + p[0] = 42; + ret = p[1]; /* 108 */ + } else { + ret = -1; + } + + bpf_kfunc_call_test_release(pt); + } + if (p) + ret = p[0]; /* p is not valid anymore */ + + return ret; +} + +SEC("?tc") +int kfunc_call_test_get_mem_fail_oob(struct __sk_buff *skb) +{ + struct prog_test_ref_kfunc *pt; + unsigned long s = 0; + int *p = NULL; + int ret = 0; + + pt = bpf_kfunc_call_test_acquire(&s); + if (pt) { + p = bpf_kfunc_call_test_get_rdonly_mem(pt, 2 * sizeof(int)); + if (p) + ret = p[2 * sizeof(int)]; /* oob access, so -EACCES */ + else + ret = -1; + + bpf_kfunc_call_test_release(pt); + } + return ret; +} + +int not_const_size = 2 * sizeof(int); + +SEC("?tc") +int kfunc_call_test_get_mem_fail_not_const(struct __sk_buff *skb) +{ + struct prog_test_ref_kfunc *pt; + unsigned long s = 0; + int *p = NULL; + int ret = 0; + + pt = bpf_kfunc_call_test_acquire(&s); + if (pt) { + p = bpf_kfunc_call_test_get_rdonly_mem(pt, not_const_size); /* non const size, -EINVAL */ + if (p) + ret = p[0]; + else + ret = -1; + + bpf_kfunc_call_test_release(pt); + } + return ret; +} + +SEC("?tc") +int kfunc_call_test_mem_acquire_fail(struct __sk_buff *skb) +{ + struct prog_test_ref_kfunc *pt; + unsigned long s = 0; + int *p = NULL; + int ret = 0; + + pt = bpf_kfunc_call_test_acquire(&s); + if (pt) { + /* we are failing on this one, because we are not acquiring a PTR_TO_BTF_ID (a struct ptr) */ + p = bpf_kfunc_call_test_acq_rdonly_mem(pt, 2 * sizeof(int)); + if (p) + ret = p[0]; + else + ret = -1; + + bpf_kfunc_call_int_mem_release(p); + + bpf_kfunc_call_test_release(pt); + } + return ret; +} + +char _license[] SEC("license") = "GPL"; diff --git a/tools/testing/selftests/bpf/progs/kfunc_call_test.c b/tools/testing/selftests/bpf/progs/kfunc_call_test.c index 5aecbb9fdc68..f636e50be259 100644 --- a/tools/testing/selftests/bpf/progs/kfunc_call_test.c +++ b/tools/testing/selftests/bpf/progs/kfunc_call_test.c @@ -14,6 +14,8 @@ extern void bpf_kfunc_call_test_pass1(struct prog_test_pass1 *p) __ksym; extern void bpf_kfunc_call_test_pass2(struct prog_test_pass2 *p) __ksym; extern void bpf_kfunc_call_test_mem_len_pass1(void *mem, int len) __ksym; extern void bpf_kfunc_call_test_mem_len_fail2(__u64 *mem, int len) __ksym; +extern int *bpf_kfunc_call_test_get_rdwr_mem(struct prog_test_ref_kfunc *p, const int rdwr_buf_size) __ksym; +extern int *bpf_kfunc_call_test_get_rdonly_mem(struct prog_test_ref_kfunc *p, const int rdonly_buf_size) __ksym; SEC("tc") int kfunc_call_test2(struct __sk_buff *skb) @@ -92,4 +94,73 @@ int kfunc_call_test_pass(struct __sk_buff *skb) return 0; } +struct syscall_test_args { + __u8 data[16]; + size_t size; +}; + +SEC("syscall") +int kfunc_syscall_test(struct syscall_test_args *args) +{ + const long size = args->size; + + if (size > sizeof(args->data)) + return -7; /* -E2BIG */ + + bpf_kfunc_call_test_mem_len_pass1(&args->data, sizeof(args->data)); + bpf_kfunc_call_test_mem_len_pass1(&args->data, sizeof(*args)); + bpf_kfunc_call_test_mem_len_pass1(&args->data, size); + + return 0; +} + +SEC("syscall") +int kfunc_syscall_test_null(struct syscall_test_args *args) +{ + /* Must be called with args as a NULL pointer + * we do not check for it to have the verifier consider that + * the pointer might not be null, and so we can load it. + * + * So the following can not be added: + * + * if (args) + * return -22; + */ + + bpf_kfunc_call_test_mem_len_pass1(args, 0); + + return 0; +} + +SEC("tc") +int kfunc_call_test_get_mem(struct __sk_buff *skb) +{ + struct prog_test_ref_kfunc *pt; + unsigned long s = 0; + int *p = NULL; + int ret = 0; + + pt = bpf_kfunc_call_test_acquire(&s); + if (pt) { + p = bpf_kfunc_call_test_get_rdwr_mem(pt, 2 * sizeof(int)); + if (p) { + p[0] = 42; + ret = p[1]; /* 108 */ + } else { + ret = -1; + } + + if (ret >= 0) { + p = bpf_kfunc_call_test_get_rdonly_mem(pt, 2 * sizeof(int)); + if (p) + ret = p[0]; /* 42 */ + else + ret = -1; + } + + bpf_kfunc_call_test_release(pt); + } + return ret; +} + char _license[] SEC("license") = "GPL"; diff --git a/tools/testing/selftests/bpf/progs/kprobe_multi.c b/tools/testing/selftests/bpf/progs/kprobe_multi.c index 08f95a8155d1..98c3399e15c0 100644 --- a/tools/testing/selftests/bpf/progs/kprobe_multi.c +++ b/tools/testing/selftests/bpf/progs/kprobe_multi.c @@ -36,15 +36,13 @@ __u64 kretprobe_test6_result = 0; __u64 kretprobe_test7_result = 0; __u64 kretprobe_test8_result = 0; -extern bool CONFIG_X86_KERNEL_IBT __kconfig __weak; - static void kprobe_multi_check(void *ctx, bool is_return) { if (bpf_get_current_pid_tgid() >> 32 != pid) return; __u64 cookie = test_cookie ? bpf_get_attach_cookie(ctx) : 0; - __u64 addr = bpf_get_func_ip(ctx) - (CONFIG_X86_KERNEL_IBT ? 4 : 0); + __u64 addr = bpf_get_func_ip(ctx); #define SET(__var, __addr, __cookie) ({ \ if (((const void *) addr == __addr) && \ diff --git a/tools/testing/selftests/bpf/progs/test_bpf_nf.c b/tools/testing/selftests/bpf/progs/test_bpf_nf.c index 2722441850cc..227e85e85dda 100644 --- a/tools/testing/selftests/bpf/progs/test_bpf_nf.c +++ b/tools/testing/selftests/bpf/progs/test_bpf_nf.c @@ -1,6 +1,7 @@ // SPDX-License-Identifier: GPL-2.0 #include <vmlinux.h> #include <bpf/bpf_helpers.h> +#include <bpf/bpf_endian.h> #define EAFNOSUPPORT 97 #define EPROTO 71 @@ -23,6 +24,9 @@ int test_insert_entry = -EAFNOSUPPORT; int test_succ_lookup = -ENOENT; u32 test_delta_timeout = 0; u32 test_status = 0; +u32 test_insert_lookup_mark = 0; +int test_snat_addr = -EINVAL; +int test_dnat_addr = -EINVAL; __be32 saddr = 0; __be16 sport = 0; __be32 daddr = 0; @@ -53,6 +57,8 @@ void bpf_ct_set_timeout(struct nf_conn *, u32) __ksym; int bpf_ct_change_timeout(struct nf_conn *, u32) __ksym; int bpf_ct_set_status(struct nf_conn *, u32) __ksym; int bpf_ct_change_status(struct nf_conn *, u32) __ksym; +int bpf_ct_set_nat_info(struct nf_conn *, union nf_inet_addr *, + int port, enum nf_nat_manip_type) __ksym; static __always_inline void nf_ct_test(struct nf_conn *(*lookup_fn)(void *, struct bpf_sock_tuple *, u32, @@ -140,10 +146,21 @@ nf_ct_test(struct nf_conn *(*lookup_fn)(void *, struct bpf_sock_tuple *, u32, ct = alloc_fn(ctx, &bpf_tuple, sizeof(bpf_tuple.ipv4), &opts_def, sizeof(opts_def)); if (ct) { + __u16 sport = bpf_get_prandom_u32(); + __u16 dport = bpf_get_prandom_u32(); + union nf_inet_addr saddr = {}; + union nf_inet_addr daddr = {}; struct nf_conn *ct_ins; bpf_ct_set_timeout(ct, 10000); - bpf_ct_set_status(ct, IPS_CONFIRMED); + ct->mark = 77; + + /* snat */ + saddr.ip = bpf_get_prandom_u32(); + bpf_ct_set_nat_info(ct, &saddr, sport, NF_NAT_MANIP_SRC); + /* dnat */ + daddr.ip = bpf_get_prandom_u32(); + bpf_ct_set_nat_info(ct, &daddr, dport, NF_NAT_MANIP_DST); ct_ins = bpf_ct_insert_entry(ct); if (ct_ins) { @@ -152,12 +169,26 @@ nf_ct_test(struct nf_conn *(*lookup_fn)(void *, struct bpf_sock_tuple *, u32, ct_lk = lookup_fn(ctx, &bpf_tuple, sizeof(bpf_tuple.ipv4), &opts_def, sizeof(opts_def)); if (ct_lk) { + struct nf_conntrack_tuple *tuple; + + /* check snat and dnat addresses */ + tuple = &ct_lk->tuplehash[IP_CT_DIR_REPLY].tuple; + if (tuple->dst.u3.ip == saddr.ip && + tuple->dst.u.all == bpf_htons(sport)) + test_snat_addr = 0; + if (tuple->src.u3.ip == daddr.ip && + tuple->src.u.all == bpf_htons(dport)) + test_dnat_addr = 0; + /* update ct entry timeout */ bpf_ct_change_timeout(ct_lk, 10000); test_delta_timeout = ct_lk->timeout - bpf_jiffies64(); test_delta_timeout /= CONFIG_HZ; - test_status = IPS_SEEN_REPLY; - bpf_ct_change_status(ct_lk, IPS_SEEN_REPLY); + test_insert_lookup_mark = ct_lk->mark; + bpf_ct_change_status(ct_lk, + IPS_CONFIRMED | IPS_SEEN_REPLY); + test_status = ct_lk->status; + bpf_ct_release(ct_lk); test_succ_lookup = 0; } @@ -175,8 +206,10 @@ nf_ct_test(struct nf_conn *(*lookup_fn)(void *, struct bpf_sock_tuple *, u32, sizeof(opts_def)); if (ct) { test_exist_lookup = 0; - if (ct->mark == 42) - test_exist_lookup_mark = 43; + if (ct->mark == 42) { + ct->mark++; + test_exist_lookup_mark = ct->mark; + } bpf_ct_release(ct); } else { test_exist_lookup = opts_def.error; diff --git a/tools/testing/selftests/bpf/progs/test_bpf_nf_fail.c b/tools/testing/selftests/bpf/progs/test_bpf_nf_fail.c index bf79af15c808..0e4759ab38ff 100644 --- a/tools/testing/selftests/bpf/progs/test_bpf_nf_fail.c +++ b/tools/testing/selftests/bpf/progs/test_bpf_nf_fail.c @@ -70,6 +70,20 @@ int lookup_insert(struct __sk_buff *ctx) } SEC("?tc") +int write_not_allowlisted_field(struct __sk_buff *ctx) +{ + struct bpf_ct_opts___local opts = {}; + struct bpf_sock_tuple tup = {}; + struct nf_conn *ct; + + ct = bpf_skb_ct_lookup(ctx, &tup, sizeof(tup.ipv4), &opts, sizeof(opts)); + if (!ct) + return 0; + ct->status = 0xF00; + return 0; +} + +SEC("?tc") int set_timeout_after_insert(struct __sk_buff *ctx) { struct bpf_ct_opts___local opts = {}; diff --git a/tools/testing/selftests/bpf/progs/test_kfunc_dynptr_param.c b/tools/testing/selftests/bpf/progs/test_kfunc_dynptr_param.c new file mode 100644 index 000000000000..ce39d096bba3 --- /dev/null +++ b/tools/testing/selftests/bpf/progs/test_kfunc_dynptr_param.c @@ -0,0 +1,94 @@ +// SPDX-License-Identifier: GPL-2.0 + +/* + * Copyright (C) 2022 Huawei Technologies Duesseldorf GmbH + * + * Author: Roberto Sassu <roberto.sassu@huawei.com> + */ + +#include "vmlinux.h" +#include <errno.h> +#include <bpf/bpf_helpers.h> +#include <bpf/bpf_tracing.h> + +extern struct bpf_key *bpf_lookup_system_key(__u64 id) __ksym; +extern void bpf_key_put(struct bpf_key *key) __ksym; +extern int bpf_verify_pkcs7_signature(struct bpf_dynptr *data_ptr, + struct bpf_dynptr *sig_ptr, + struct bpf_key *trusted_keyring) __ksym; + +struct { + __uint(type, BPF_MAP_TYPE_RINGBUF); +} ringbuf SEC(".maps"); + +struct { + __uint(type, BPF_MAP_TYPE_ARRAY); + __uint(max_entries, 1); + __type(key, __u32); + __type(value, __u32); +} array_map SEC(".maps"); + +int err, pid; + +char _license[] SEC("license") = "GPL"; + +SEC("?lsm.s/bpf") +int BPF_PROG(dynptr_type_not_supp, int cmd, union bpf_attr *attr, + unsigned int size) +{ + char write_data[64] = "hello there, world!!"; + struct bpf_dynptr ptr; + + bpf_ringbuf_reserve_dynptr(&ringbuf, sizeof(write_data), 0, &ptr); + + return bpf_verify_pkcs7_signature(&ptr, &ptr, NULL); +} + +SEC("?lsm.s/bpf") +int BPF_PROG(not_valid_dynptr, int cmd, union bpf_attr *attr, unsigned int size) +{ + unsigned long val; + + return bpf_verify_pkcs7_signature((struct bpf_dynptr *)&val, + (struct bpf_dynptr *)&val, NULL); +} + +SEC("?lsm.s/bpf") +int BPF_PROG(not_ptr_to_stack, int cmd, union bpf_attr *attr, unsigned int size) +{ + unsigned long val; + + return bpf_verify_pkcs7_signature((struct bpf_dynptr *)val, + (struct bpf_dynptr *)val, NULL); +} + +SEC("lsm.s/bpf") +int BPF_PROG(dynptr_data_null, int cmd, union bpf_attr *attr, unsigned int size) +{ + struct bpf_key *trusted_keyring; + struct bpf_dynptr ptr; + __u32 *value; + int ret, zero = 0; + + if (bpf_get_current_pid_tgid() >> 32 != pid) + return 0; + + value = bpf_map_lookup_elem(&array_map, &zero); + if (!value) + return 0; + + /* Pass invalid flags. */ + ret = bpf_dynptr_from_mem(value, sizeof(*value), ((__u64)~0ULL), &ptr); + if (ret != -EINVAL) + return 0; + + trusted_keyring = bpf_lookup_system_key(0); + if (!trusted_keyring) + return 0; + + err = bpf_verify_pkcs7_signature(&ptr, &ptr, trusted_keyring); + + bpf_key_put(trusted_keyring); + + return 0; +} diff --git a/tools/testing/selftests/bpf/progs/test_lookup_key.c b/tools/testing/selftests/bpf/progs/test_lookup_key.c new file mode 100644 index 000000000000..c73776990ae3 --- /dev/null +++ b/tools/testing/selftests/bpf/progs/test_lookup_key.c @@ -0,0 +1,46 @@ +// SPDX-License-Identifier: GPL-2.0 + +/* + * Copyright (C) 2022 Huawei Technologies Duesseldorf GmbH + * + * Author: Roberto Sassu <roberto.sassu@huawei.com> + */ + +#include "vmlinux.h" +#include <errno.h> +#include <bpf/bpf_helpers.h> +#include <bpf/bpf_tracing.h> + +char _license[] SEC("license") = "GPL"; + +__u32 monitored_pid; +__u32 key_serial; +__u32 key_id; +__u64 flags; + +extern struct bpf_key *bpf_lookup_user_key(__u32 serial, __u64 flags) __ksym; +extern struct bpf_key *bpf_lookup_system_key(__u64 id) __ksym; +extern void bpf_key_put(struct bpf_key *key) __ksym; + +SEC("lsm.s/bpf") +int BPF_PROG(bpf, int cmd, union bpf_attr *attr, unsigned int size) +{ + struct bpf_key *bkey; + __u32 pid; + + pid = bpf_get_current_pid_tgid() >> 32; + if (pid != monitored_pid) + return 0; + + if (key_serial) + bkey = bpf_lookup_user_key(key_serial, flags); + else + bkey = bpf_lookup_system_key(key_id); + + if (!bkey) + return -ENOENT; + + bpf_key_put(bkey); + + return 0; +} diff --git a/tools/testing/selftests/bpf/progs/test_user_ringbuf.h b/tools/testing/selftests/bpf/progs/test_user_ringbuf.h new file mode 100644 index 000000000000..1643b4d59ba7 --- /dev/null +++ b/tools/testing/selftests/bpf/progs/test_user_ringbuf.h @@ -0,0 +1,35 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* Copyright (c) 2022 Meta Platforms, Inc. and affiliates. */ + +#ifndef _TEST_USER_RINGBUF_H +#define _TEST_USER_RINGBUF_H + +#define TEST_OP_64 4 +#define TEST_OP_32 2 + +enum test_msg_op { + TEST_MSG_OP_INC64, + TEST_MSG_OP_INC32, + TEST_MSG_OP_MUL64, + TEST_MSG_OP_MUL32, + + // Must come last. + TEST_MSG_OP_NUM_OPS, +}; + +struct test_msg { + enum test_msg_op msg_op; + union { + __s64 operand_64; + __s32 operand_32; + }; +}; + +struct sample { + int pid; + int seq; + long value; + char comm[16]; +}; + +#endif /* _TEST_USER_RINGBUF_H */ diff --git a/tools/testing/selftests/bpf/progs/test_verif_scale1.c b/tools/testing/selftests/bpf/progs/test_verif_scale1.c index d38153dab3dd..ac6135d9374c 100644 --- a/tools/testing/selftests/bpf/progs/test_verif_scale1.c +++ b/tools/testing/selftests/bpf/progs/test_verif_scale1.c @@ -5,7 +5,7 @@ #define ATTR __attribute__((noinline)) #include "test_jhash.h" -SEC("scale90_noinline") +SEC("tc") int balancer_ingress(struct __sk_buff *ctx) { void *data_end = (void *)(long)ctx->data_end; diff --git a/tools/testing/selftests/bpf/progs/test_verif_scale3.c b/tools/testing/selftests/bpf/progs/test_verif_scale3.c index 9beb5bf80373..ca33a9b711c4 100644 --- a/tools/testing/selftests/bpf/progs/test_verif_scale3.c +++ b/tools/testing/selftests/bpf/progs/test_verif_scale3.c @@ -5,7 +5,7 @@ #define ATTR __attribute__((noinline)) #include "test_jhash.h" -SEC("scale90_noinline32") +SEC("tc") int balancer_ingress(struct __sk_buff *ctx) { void *data_end = (void *)(long)ctx->data_end; diff --git a/tools/testing/selftests/bpf/progs/test_verify_pkcs7_sig.c b/tools/testing/selftests/bpf/progs/test_verify_pkcs7_sig.c new file mode 100644 index 000000000000..ce419304ff1f --- /dev/null +++ b/tools/testing/selftests/bpf/progs/test_verify_pkcs7_sig.c @@ -0,0 +1,90 @@ +// SPDX-License-Identifier: GPL-2.0 + +/* + * Copyright (C) 2022 Huawei Technologies Duesseldorf GmbH + * + * Author: Roberto Sassu <roberto.sassu@huawei.com> + */ + +#include "vmlinux.h" +#include <errno.h> +#include <bpf/bpf_helpers.h> +#include <bpf/bpf_tracing.h> + +#define MAX_DATA_SIZE (1024 * 1024) +#define MAX_SIG_SIZE 1024 + +extern struct bpf_key *bpf_lookup_user_key(__u32 serial, __u64 flags) __ksym; +extern struct bpf_key *bpf_lookup_system_key(__u64 id) __ksym; +extern void bpf_key_put(struct bpf_key *key) __ksym; +extern int bpf_verify_pkcs7_signature(struct bpf_dynptr *data_ptr, + struct bpf_dynptr *sig_ptr, + struct bpf_key *trusted_keyring) __ksym; + +__u32 monitored_pid; +__u32 user_keyring_serial; +__u64 system_keyring_id; + +struct data { + __u8 data[MAX_DATA_SIZE]; + __u32 data_len; + __u8 sig[MAX_SIG_SIZE]; + __u32 sig_len; +}; + +struct { + __uint(type, BPF_MAP_TYPE_ARRAY); + __uint(max_entries, 1); + __type(key, __u32); + __type(value, struct data); +} data_input SEC(".maps"); + +char _license[] SEC("license") = "GPL"; + +SEC("lsm.s/bpf") +int BPF_PROG(bpf, int cmd, union bpf_attr *attr, unsigned int size) +{ + struct bpf_dynptr data_ptr, sig_ptr; + struct data *data_val; + struct bpf_key *trusted_keyring; + __u32 pid; + __u64 value; + int ret, zero = 0; + + pid = bpf_get_current_pid_tgid() >> 32; + if (pid != monitored_pid) + return 0; + + data_val = bpf_map_lookup_elem(&data_input, &zero); + if (!data_val) + return 0; + + bpf_probe_read(&value, sizeof(value), &attr->value); + + bpf_copy_from_user(data_val, sizeof(struct data), + (void *)(unsigned long)value); + + if (data_val->data_len > sizeof(data_val->data)) + return -EINVAL; + + bpf_dynptr_from_mem(data_val->data, data_val->data_len, 0, &data_ptr); + + if (data_val->sig_len > sizeof(data_val->sig)) + return -EINVAL; + + bpf_dynptr_from_mem(data_val->sig, data_val->sig_len, 0, &sig_ptr); + + if (user_keyring_serial) + trusted_keyring = bpf_lookup_user_key(user_keyring_serial, 0); + else + trusted_keyring = bpf_lookup_system_key(system_keyring_id); + + if (!trusted_keyring) + return -ENOENT; + + ret = bpf_verify_pkcs7_signature(&data_ptr, &sig_ptr, trusted_keyring); + + bpf_key_put(trusted_keyring); + + return ret; +} diff --git a/tools/testing/selftests/bpf/progs/timer.c b/tools/testing/selftests/bpf/progs/timer.c index 0053c5402173..acda5c9cea93 100644 --- a/tools/testing/selftests/bpf/progs/timer.c +++ b/tools/testing/selftests/bpf/progs/timer.c @@ -120,7 +120,7 @@ static int timer_cb1(void *map, int *key, struct bpf_timer *timer) } SEC("fentry/bpf_fentry_test1") -int BPF_PROG(test1, int a) +int BPF_PROG2(test1, int, a) { struct bpf_timer *arr_timer, *lru_timer; struct elem init = {}; @@ -236,7 +236,7 @@ int bpf_timer_test(void) } SEC("fentry/bpf_fentry_test2") -int BPF_PROG(test2, int a, int b) +int BPF_PROG2(test2, int, a, int, b) { struct hmap_elem init = {}, *val; int key = HTAB, key_malloc = HTAB_MALLOC; diff --git a/tools/testing/selftests/bpf/progs/tracing_struct.c b/tools/testing/selftests/bpf/progs/tracing_struct.c new file mode 100644 index 000000000000..e718f0ebee7d --- /dev/null +++ b/tools/testing/selftests/bpf/progs/tracing_struct.c @@ -0,0 +1,120 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2022 Meta Platforms, Inc. and affiliates. */ + +#include <vmlinux.h> +#include <bpf/bpf_tracing.h> +#include <bpf/bpf_helpers.h> + +struct bpf_testmod_struct_arg_1 { + int a; +}; +struct bpf_testmod_struct_arg_2 { + long a; + long b; +}; + +long t1_a_a, t1_a_b, t1_b, t1_c, t1_ret, t1_nregs; +__u64 t1_reg0, t1_reg1, t1_reg2, t1_reg3; +long t2_a, t2_b_a, t2_b_b, t2_c, t2_ret; +long t3_a, t3_b, t3_c_a, t3_c_b, t3_ret; +long t4_a_a, t4_b, t4_c, t4_d, t4_e_a, t4_e_b, t4_ret; +long t5_ret; + +SEC("fentry/bpf_testmod_test_struct_arg_1") +int BPF_PROG2(test_struct_arg_1, struct bpf_testmod_struct_arg_2, a, int, b, int, c) +{ + t1_a_a = a.a; + t1_a_b = a.b; + t1_b = b; + t1_c = c; + return 0; +} + +SEC("fexit/bpf_testmod_test_struct_arg_1") +int BPF_PROG2(test_struct_arg_2, struct bpf_testmod_struct_arg_2, a, int, b, int, c, int, ret) +{ + t1_nregs = bpf_get_func_arg_cnt(ctx); + /* a.a */ + bpf_get_func_arg(ctx, 0, &t1_reg0); + /* a.b */ + bpf_get_func_arg(ctx, 1, &t1_reg1); + /* b */ + bpf_get_func_arg(ctx, 2, &t1_reg2); + t1_reg2 = (int)t1_reg2; + /* c */ + bpf_get_func_arg(ctx, 3, &t1_reg3); + t1_reg3 = (int)t1_reg3; + + t1_ret = ret; + return 0; +} + +SEC("fentry/bpf_testmod_test_struct_arg_2") +int BPF_PROG2(test_struct_arg_3, int, a, struct bpf_testmod_struct_arg_2, b, int, c) +{ + t2_a = a; + t2_b_a = b.a; + t2_b_b = b.b; + t2_c = c; + return 0; +} + +SEC("fexit/bpf_testmod_test_struct_arg_2") +int BPF_PROG2(test_struct_arg_4, int, a, struct bpf_testmod_struct_arg_2, b, int, c, int, ret) +{ + t2_ret = ret; + return 0; +} + +SEC("fentry/bpf_testmod_test_struct_arg_3") +int BPF_PROG2(test_struct_arg_5, int, a, int, b, struct bpf_testmod_struct_arg_2, c) +{ + t3_a = a; + t3_b = b; + t3_c_a = c.a; + t3_c_b = c.b; + return 0; +} + +SEC("fexit/bpf_testmod_test_struct_arg_3") +int BPF_PROG2(test_struct_arg_6, int, a, int, b, struct bpf_testmod_struct_arg_2, c, int, ret) +{ + t3_ret = ret; + return 0; +} + +SEC("fentry/bpf_testmod_test_struct_arg_4") +int BPF_PROG2(test_struct_arg_7, struct bpf_testmod_struct_arg_1, a, int, b, + int, c, int, d, struct bpf_testmod_struct_arg_2, e) +{ + t4_a_a = a.a; + t4_b = b; + t4_c = c; + t4_d = d; + t4_e_a = e.a; + t4_e_b = e.b; + return 0; +} + +SEC("fexit/bpf_testmod_test_struct_arg_4") +int BPF_PROG2(test_struct_arg_8, struct bpf_testmod_struct_arg_1, a, int, b, + int, c, int, d, struct bpf_testmod_struct_arg_2, e, int, ret) +{ + t4_ret = ret; + return 0; +} + +SEC("fentry/bpf_testmod_test_struct_arg_5") +int BPF_PROG2(test_struct_arg_9) +{ + return 0; +} + +SEC("fexit/bpf_testmod_test_struct_arg_5") +int BPF_PROG2(test_struct_arg_10, int, ret) +{ + t5_ret = ret; + return 0; +} + +char _license[] SEC("license") = "GPL"; diff --git a/tools/testing/selftests/bpf/progs/user_ringbuf_fail.c b/tools/testing/selftests/bpf/progs/user_ringbuf_fail.c new file mode 100644 index 000000000000..82aba4529aa9 --- /dev/null +++ b/tools/testing/selftests/bpf/progs/user_ringbuf_fail.c @@ -0,0 +1,177 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2022 Meta Platforms, Inc. and affiliates. */ + +#include <linux/bpf.h> +#include <bpf/bpf_helpers.h> +#include "bpf_misc.h" + +char _license[] SEC("license") = "GPL"; + +struct sample { + int pid; + int seq; + long value; + char comm[16]; +}; + +struct { + __uint(type, BPF_MAP_TYPE_USER_RINGBUF); +} user_ringbuf SEC(".maps"); + +static long +bad_access1(struct bpf_dynptr *dynptr, void *context) +{ + const struct sample *sample; + + sample = bpf_dynptr_data(dynptr - 1, 0, sizeof(*sample)); + bpf_printk("Was able to pass bad pointer %lx\n", (__u64)dynptr - 1); + + return 0; +} + +/* A callback that accesses a dynptr in a bpf_user_ringbuf_drain callback should + * not be able to read before the pointer. + */ +SEC("?raw_tp/sys_nanosleep") +int user_ringbuf_callback_bad_access1(void *ctx) +{ + bpf_user_ringbuf_drain(&user_ringbuf, bad_access1, NULL, 0); + + return 0; +} + +static long +bad_access2(struct bpf_dynptr *dynptr, void *context) +{ + const struct sample *sample; + + sample = bpf_dynptr_data(dynptr + 1, 0, sizeof(*sample)); + bpf_printk("Was able to pass bad pointer %lx\n", (__u64)dynptr + 1); + + return 0; +} + +/* A callback that accesses a dynptr in a bpf_user_ringbuf_drain callback should + * not be able to read past the end of the pointer. + */ +SEC("?raw_tp/sys_nanosleep") +int user_ringbuf_callback_bad_access2(void *ctx) +{ + bpf_user_ringbuf_drain(&user_ringbuf, bad_access2, NULL, 0); + + return 0; +} + +static long +write_forbidden(struct bpf_dynptr *dynptr, void *context) +{ + *((long *)dynptr) = 0; + + return 0; +} + +/* A callback that accesses a dynptr in a bpf_user_ringbuf_drain callback should + * not be able to write to that pointer. + */ +SEC("?raw_tp/sys_nanosleep") +int user_ringbuf_callback_write_forbidden(void *ctx) +{ + bpf_user_ringbuf_drain(&user_ringbuf, write_forbidden, NULL, 0); + + return 0; +} + +static long +null_context_write(struct bpf_dynptr *dynptr, void *context) +{ + *((__u64 *)context) = 0; + + return 0; +} + +/* A callback that accesses a dynptr in a bpf_user_ringbuf_drain callback should + * not be able to write to that pointer. + */ +SEC("?raw_tp/sys_nanosleep") +int user_ringbuf_callback_null_context_write(void *ctx) +{ + bpf_user_ringbuf_drain(&user_ringbuf, null_context_write, NULL, 0); + + return 0; +} + +static long +null_context_read(struct bpf_dynptr *dynptr, void *context) +{ + __u64 id = *((__u64 *)context); + + bpf_printk("Read id %lu\n", id); + + return 0; +} + +/* A callback that accesses a dynptr in a bpf_user_ringbuf_drain callback should + * not be able to write to that pointer. + */ +SEC("?raw_tp/sys_nanosleep") +int user_ringbuf_callback_null_context_read(void *ctx) +{ + bpf_user_ringbuf_drain(&user_ringbuf, null_context_read, NULL, 0); + + return 0; +} + +static long +try_discard_dynptr(struct bpf_dynptr *dynptr, void *context) +{ + bpf_ringbuf_discard_dynptr(dynptr, 0); + + return 0; +} + +/* A callback that accesses a dynptr in a bpf_user_ringbuf_drain callback should + * not be able to read past the end of the pointer. + */ +SEC("?raw_tp/sys_nanosleep") +int user_ringbuf_callback_discard_dynptr(void *ctx) +{ + bpf_user_ringbuf_drain(&user_ringbuf, try_discard_dynptr, NULL, 0); + + return 0; +} + +static long +try_submit_dynptr(struct bpf_dynptr *dynptr, void *context) +{ + bpf_ringbuf_submit_dynptr(dynptr, 0); + + return 0; +} + +/* A callback that accesses a dynptr in a bpf_user_ringbuf_drain callback should + * not be able to read past the end of the pointer. + */ +SEC("?raw_tp/sys_nanosleep") +int user_ringbuf_callback_submit_dynptr(void *ctx) +{ + bpf_user_ringbuf_drain(&user_ringbuf, try_submit_dynptr, NULL, 0); + + return 0; +} + +static long +invalid_drain_callback_return(struct bpf_dynptr *dynptr, void *context) +{ + return 2; +} + +/* A callback that accesses a dynptr in a bpf_user_ringbuf_drain callback should + * not be able to write to that pointer. + */ +SEC("?raw_tp/sys_nanosleep") +int user_ringbuf_callback_invalid_return(void *ctx) +{ + bpf_user_ringbuf_drain(&user_ringbuf, invalid_drain_callback_return, NULL, 0); + + return 0; +} diff --git a/tools/testing/selftests/bpf/progs/user_ringbuf_success.c b/tools/testing/selftests/bpf/progs/user_ringbuf_success.c new file mode 100644 index 000000000000..099c23d9aa21 --- /dev/null +++ b/tools/testing/selftests/bpf/progs/user_ringbuf_success.c @@ -0,0 +1,218 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2022 Meta Platforms, Inc. and affiliates. */ + +#include <linux/bpf.h> +#include <bpf/bpf_helpers.h> +#include "bpf_misc.h" +#include "test_user_ringbuf.h" + +char _license[] SEC("license") = "GPL"; + +struct { + __uint(type, BPF_MAP_TYPE_USER_RINGBUF); +} user_ringbuf SEC(".maps"); + +struct { + __uint(type, BPF_MAP_TYPE_RINGBUF); +} kernel_ringbuf SEC(".maps"); + +/* inputs */ +int pid, err, val; + +int read = 0; + +/* Counter used for end-to-end protocol test */ +__u64 kern_mutated = 0; +__u64 user_mutated = 0; +__u64 expected_user_mutated = 0; + +static int +is_test_process(void) +{ + int cur_pid = bpf_get_current_pid_tgid() >> 32; + + return cur_pid == pid; +} + +static long +record_sample(struct bpf_dynptr *dynptr, void *context) +{ + const struct sample *sample = NULL; + struct sample stack_sample; + int status; + static int num_calls; + + if (num_calls++ % 2 == 0) { + status = bpf_dynptr_read(&stack_sample, sizeof(stack_sample), dynptr, 0, 0); + if (status) { + bpf_printk("bpf_dynptr_read() failed: %d\n", status); + err = 1; + return 0; + } + } else { + sample = bpf_dynptr_data(dynptr, 0, sizeof(*sample)); + if (!sample) { + bpf_printk("Unexpectedly failed to get sample\n"); + err = 2; + return 0; + } + stack_sample = *sample; + } + + __sync_fetch_and_add(&read, 1); + return 0; +} + +static void +handle_sample_msg(const struct test_msg *msg) +{ + switch (msg->msg_op) { + case TEST_MSG_OP_INC64: + kern_mutated += msg->operand_64; + break; + case TEST_MSG_OP_INC32: + kern_mutated += msg->operand_32; + break; + case TEST_MSG_OP_MUL64: + kern_mutated *= msg->operand_64; + break; + case TEST_MSG_OP_MUL32: + kern_mutated *= msg->operand_32; + break; + default: + bpf_printk("Unrecognized op %d\n", msg->msg_op); + err = 2; + } +} + +static long +read_protocol_msg(struct bpf_dynptr *dynptr, void *context) +{ + const struct test_msg *msg = NULL; + + msg = bpf_dynptr_data(dynptr, 0, sizeof(*msg)); + if (!msg) { + err = 1; + bpf_printk("Unexpectedly failed to get msg\n"); + return 0; + } + + handle_sample_msg(msg); + + return 0; +} + +static int publish_next_kern_msg(__u32 index, void *context) +{ + struct test_msg *msg = NULL; + int operand_64 = TEST_OP_64; + int operand_32 = TEST_OP_32; + + msg = bpf_ringbuf_reserve(&kernel_ringbuf, sizeof(*msg), 0); + if (!msg) { + err = 4; + return 1; + } + + switch (index % TEST_MSG_OP_NUM_OPS) { + case TEST_MSG_OP_INC64: + msg->operand_64 = operand_64; + msg->msg_op = TEST_MSG_OP_INC64; + expected_user_mutated += operand_64; + break; + case TEST_MSG_OP_INC32: + msg->operand_32 = operand_32; + msg->msg_op = TEST_MSG_OP_INC32; + expected_user_mutated += operand_32; + break; + case TEST_MSG_OP_MUL64: + msg->operand_64 = operand_64; + msg->msg_op = TEST_MSG_OP_MUL64; + expected_user_mutated *= operand_64; + break; + case TEST_MSG_OP_MUL32: + msg->operand_32 = operand_32; + msg->msg_op = TEST_MSG_OP_MUL32; + expected_user_mutated *= operand_32; + break; + default: + bpf_ringbuf_discard(msg, 0); + err = 5; + return 1; + } + + bpf_ringbuf_submit(msg, 0); + + return 0; +} + +static void +publish_kern_messages(void) +{ + if (expected_user_mutated != user_mutated) { + bpf_printk("%lu != %lu\n", expected_user_mutated, user_mutated); + err = 3; + return; + } + + bpf_loop(8, publish_next_kern_msg, NULL, 0); +} + +SEC("fentry/" SYS_PREFIX "sys_prctl") +int test_user_ringbuf_protocol(void *ctx) +{ + long status = 0; + struct sample *sample = NULL; + struct bpf_dynptr ptr; + + if (!is_test_process()) + return 0; + + status = bpf_user_ringbuf_drain(&user_ringbuf, read_protocol_msg, NULL, 0); + if (status < 0) { + bpf_printk("Drain returned: %ld\n", status); + err = 1; + return 0; + } + + publish_kern_messages(); + + return 0; +} + +SEC("fentry/" SYS_PREFIX "sys_getpgid") +int test_user_ringbuf(void *ctx) +{ + int status = 0; + struct sample *sample = NULL; + struct bpf_dynptr ptr; + + if (!is_test_process()) + return 0; + + err = bpf_user_ringbuf_drain(&user_ringbuf, record_sample, NULL, 0); + + return 0; +} + +static long +do_nothing_cb(struct bpf_dynptr *dynptr, void *context) +{ + __sync_fetch_and_add(&read, 1); + return 0; +} + +SEC("fentry/" SYS_PREFIX "sys_getrlimit") +int test_user_ringbuf_epoll(void *ctx) +{ + long num_samples; + + if (!is_test_process()) + return 0; + + num_samples = bpf_user_ringbuf_drain(&user_ringbuf, do_nothing_cb, NULL, 0); + if (num_samples <= 0) + err = 1; + + return 0; +} diff --git a/tools/testing/selftests/bpf/test_kmod.sh b/tools/testing/selftests/bpf/test_kmod.sh index 4f6444bcd53f..50dca53ac536 100755 --- a/tools/testing/selftests/bpf/test_kmod.sh +++ b/tools/testing/selftests/bpf/test_kmod.sh @@ -1,6 +1,11 @@ #!/bin/sh # SPDX-License-Identifier: GPL-2.0 +# Usage: +# ./test_kmod.sh [module_param]... +# Ex.: ./test_kmod.sh test_range=1,3 +# All the parameters are passed to the kernel module. + # Kselftest framework requirement - SKIP code is 4. ksft_skip=4 @@ -24,17 +29,18 @@ test_run() sysctl -w net.core.bpf_jit_harden=$2 2>&1 > /dev/null echo "[ JIT enabled:$1 hardened:$2 ]" + shift 2 dmesg -C if [ -f ${OUTPUT}/lib/test_bpf.ko ]; then - insmod ${OUTPUT}/lib/test_bpf.ko 2> /dev/null + insmod ${OUTPUT}/lib/test_bpf.ko "$@" 2> /dev/null if [ $? -ne 0 ]; then rc=1 fi else # Use modprobe dry run to check for missing test_bpf module - if ! /sbin/modprobe -q -n test_bpf; then + if ! /sbin/modprobe -q -n test_bpf "$@"; then echo "test_bpf: [SKIP]" - elif /sbin/modprobe -q test_bpf; then + elif /sbin/modprobe -q test_bpf "$@"; then echo "test_bpf: ok" else echo "test_bpf: [FAIL]" @@ -59,9 +65,9 @@ test_restore() rc=0 test_save -test_run 0 0 -test_run 1 0 -test_run 1 1 -test_run 1 2 +test_run 0 0 "$@" +test_run 1 0 "$@" +test_run 1 1 "$@" +test_run 1 2 "$@" test_restore exit $rc diff --git a/tools/testing/selftests/bpf/test_maps.c b/tools/testing/selftests/bpf/test_maps.c index 00b9cc305e58..b73152822aa2 100644 --- a/tools/testing/selftests/bpf/test_maps.c +++ b/tools/testing/selftests/bpf/test_maps.c @@ -30,7 +30,7 @@ #define ENOTSUPP 524 #endif -static int skips; +int skips; static struct bpf_map_create_opts map_opts = { .sz = sizeof(map_opts) }; @@ -659,13 +659,13 @@ static void test_sockmap(unsigned int tasks, void *data) { struct bpf_map *bpf_map_rx, *bpf_map_tx, *bpf_map_msg, *bpf_map_break; int map_fd_msg = 0, map_fd_rx = 0, map_fd_tx = 0, map_fd_break; + struct bpf_object *parse_obj, *verdict_obj, *msg_obj; int ports[] = {50200, 50201, 50202, 50204}; int err, i, fd, udp, sfd[6] = {0xdeadbeef}; u8 buf[20] = {0x0, 0x5, 0x3, 0x2, 0x1, 0x0}; int parse_prog, verdict_prog, msg_prog; struct sockaddr_in addr; int one = 1, s, sc, rc; - struct bpf_object *obj; struct timeval to; __u32 key, value; pid_t pid[tasks]; @@ -761,6 +761,7 @@ static void test_sockmap(unsigned int tasks, void *data) i, udp); goto out_sockmap; } + close(udp); /* Test update without programs */ for (i = 0; i < 6; i++) { @@ -823,27 +824,27 @@ static void test_sockmap(unsigned int tasks, void *data) /* Load SK_SKB program and Attach */ err = bpf_prog_test_load(SOCKMAP_PARSE_PROG, - BPF_PROG_TYPE_SK_SKB, &obj, &parse_prog); + BPF_PROG_TYPE_SK_SKB, &parse_obj, &parse_prog); if (err) { printf("Failed to load SK_SKB parse prog\n"); goto out_sockmap; } err = bpf_prog_test_load(SOCKMAP_TCP_MSG_PROG, - BPF_PROG_TYPE_SK_MSG, &obj, &msg_prog); + BPF_PROG_TYPE_SK_MSG, &msg_obj, &msg_prog); if (err) { printf("Failed to load SK_SKB msg prog\n"); goto out_sockmap; } err = bpf_prog_test_load(SOCKMAP_VERDICT_PROG, - BPF_PROG_TYPE_SK_SKB, &obj, &verdict_prog); + BPF_PROG_TYPE_SK_SKB, &verdict_obj, &verdict_prog); if (err) { printf("Failed to load SK_SKB verdict prog\n"); goto out_sockmap; } - bpf_map_rx = bpf_object__find_map_by_name(obj, "sock_map_rx"); + bpf_map_rx = bpf_object__find_map_by_name(verdict_obj, "sock_map_rx"); if (!bpf_map_rx) { printf("Failed to load map rx from verdict prog\n"); goto out_sockmap; @@ -855,7 +856,7 @@ static void test_sockmap(unsigned int tasks, void *data) goto out_sockmap; } - bpf_map_tx = bpf_object__find_map_by_name(obj, "sock_map_tx"); + bpf_map_tx = bpf_object__find_map_by_name(verdict_obj, "sock_map_tx"); if (!bpf_map_tx) { printf("Failed to load map tx from verdict prog\n"); goto out_sockmap; @@ -867,7 +868,7 @@ static void test_sockmap(unsigned int tasks, void *data) goto out_sockmap; } - bpf_map_msg = bpf_object__find_map_by_name(obj, "sock_map_msg"); + bpf_map_msg = bpf_object__find_map_by_name(verdict_obj, "sock_map_msg"); if (!bpf_map_msg) { printf("Failed to load map msg from msg_verdict prog\n"); goto out_sockmap; @@ -879,7 +880,7 @@ static void test_sockmap(unsigned int tasks, void *data) goto out_sockmap; } - bpf_map_break = bpf_object__find_map_by_name(obj, "sock_map_break"); + bpf_map_break = bpf_object__find_map_by_name(verdict_obj, "sock_map_break"); if (!bpf_map_break) { printf("Failed to load map tx from verdict prog\n"); goto out_sockmap; @@ -1125,7 +1126,9 @@ static void test_sockmap(unsigned int tasks, void *data) } close(fd); close(map_fd_rx); - bpf_object__close(obj); + bpf_object__close(parse_obj); + bpf_object__close(msg_obj); + bpf_object__close(verdict_obj); return; out: for (i = 0; i < 6; i++) @@ -1283,8 +1286,11 @@ static void test_map_in_map(void) printf("Inner map mim.inner was not destroyed\n"); goto out_map_in_map; } + + close(fd); } + bpf_object__close(obj); return; out_map_in_map: diff --git a/tools/testing/selftests/bpf/test_maps.h b/tools/testing/selftests/bpf/test_maps.h index 77d8587ac4ed..f6fbca761732 100644 --- a/tools/testing/selftests/bpf/test_maps.h +++ b/tools/testing/selftests/bpf/test_maps.h @@ -14,4 +14,6 @@ } \ }) +extern int skips; + #endif diff --git a/tools/testing/selftests/bpf/test_progs.c b/tools/testing/selftests/bpf/test_progs.c index 3561c97701f2..0e9a47f97890 100644 --- a/tools/testing/selftests/bpf/test_progs.c +++ b/tools/testing/selftests/bpf/test_progs.c @@ -943,6 +943,23 @@ int trigger_module_test_write(int write_sz) return 0; } +int write_sysctl(const char *sysctl, const char *value) +{ + int fd, err, len; + + fd = open(sysctl, O_WRONLY); + if (!ASSERT_NEQ(fd, -1, "open sysctl")) + return -1; + + len = strlen(value); + err = write(fd, value, len); + close(fd); + if (!ASSERT_EQ(err, len, "write sysctl")) + return -1; + + return 0; +} + #define MAX_BACKTRACE_SZ 128 void crash_handler(int signum) { diff --git a/tools/testing/selftests/bpf/test_progs.h b/tools/testing/selftests/bpf/test_progs.h index 5fe1365c2bb1..b090996daee5 100644 --- a/tools/testing/selftests/bpf/test_progs.h +++ b/tools/testing/selftests/bpf/test_progs.h @@ -384,6 +384,7 @@ int extract_build_id(char *build_id, size_t size); int kern_sync_rcu(void); int trigger_module_test_read(int read_sz); int trigger_module_test_write(int write_sz); +int write_sysctl(const char *sysctl, const char *value); #ifdef __x86_64__ #define SYS_NANOSLEEP_KPROBE_NAME "__x64_sys_nanosleep" diff --git a/tools/testing/selftests/bpf/test_sockmap.c b/tools/testing/selftests/bpf/test_sockmap.c index dcb038e342d8..e768181a1bd7 100644 --- a/tools/testing/selftests/bpf/test_sockmap.c +++ b/tools/testing/selftests/bpf/test_sockmap.c @@ -138,6 +138,7 @@ struct sockmap_options { bool data_test; bool drop_expected; bool check_recved_len; + bool tx_wait_mem; int iov_count; int iov_length; int rate; @@ -578,6 +579,10 @@ static int msg_loop(int fd, int iov_count, int iov_length, int cnt, sent = sendmsg(fd, &msg, flags); if (!drop && sent < 0) { + if (opt->tx_wait_mem && errno == EACCES) { + errno = 0; + goto out_errno; + } perror("sendmsg loop error"); goto out_errno; } else if (drop && sent >= 0) { @@ -644,6 +649,15 @@ static int msg_loop(int fd, int iov_count, int iov_length, int cnt, goto out_errno; } + if (opt->tx_wait_mem) { + FD_ZERO(&w); + FD_SET(fd, &w); + slct = select(max_fd + 1, NULL, NULL, &w, &timeout); + errno = 0; + close(fd); + goto out_errno; + } + errno = 0; if (peek_flag) { flags |= MSG_PEEK; @@ -752,6 +766,22 @@ static int sendmsg_test(struct sockmap_options *opt) return err; } + if (opt->tx_wait_mem) { + struct timeval timeout; + int rxtx_buf_len = 1024; + + timeout.tv_sec = 3; + timeout.tv_usec = 0; + + err = setsockopt(c2, SOL_SOCKET, SO_SNDTIMEO, &timeout, sizeof(struct timeval)); + err |= setsockopt(c2, SOL_SOCKET, SO_SNDBUFFORCE, &rxtx_buf_len, sizeof(int)); + err |= setsockopt(p2, SOL_SOCKET, SO_RCVBUFFORCE, &rxtx_buf_len, sizeof(int)); + if (err) { + perror("setsockopt failed()"); + return errno; + } + } + rxpid = fork(); if (rxpid == 0) { if (txmsg_pop || txmsg_start_pop) @@ -788,6 +818,9 @@ static int sendmsg_test(struct sockmap_options *opt) return errno; } + if (opt->tx_wait_mem) + close(c2); + txpid = fork(); if (txpid == 0) { if (opt->sendpage) @@ -1452,6 +1485,14 @@ static void test_txmsg_redir(int cgrp, struct sockmap_options *opt) test_send(opt, cgrp); } +static void test_txmsg_redir_wait_sndmem(int cgrp, struct sockmap_options *opt) +{ + txmsg_redir = 1; + opt->tx_wait_mem = true; + test_send_large(opt, cgrp); + opt->tx_wait_mem = false; +} + static void test_txmsg_drop(int cgrp, struct sockmap_options *opt) { txmsg_drop = 1; @@ -1800,6 +1841,7 @@ static int populate_progs(char *bpf_file) struct _test test[] = { {"txmsg test passthrough", test_txmsg_pass}, {"txmsg test redirect", test_txmsg_redir}, + {"txmsg test redirect wait send mem", test_txmsg_redir_wait_sndmem}, {"txmsg test drop", test_txmsg_drop}, {"txmsg test ingress redirect", test_txmsg_ingress_redir}, {"txmsg test skb", test_txmsg_skb}, diff --git a/tools/testing/selftests/bpf/test_verifier.c b/tools/testing/selftests/bpf/test_verifier.c index f9d553fbf68a..2dbcbf363c18 100644 --- a/tools/testing/selftests/bpf/test_verifier.c +++ b/tools/testing/selftests/bpf/test_verifier.c @@ -1498,7 +1498,8 @@ static void do_test_single(struct bpf_test *test, bool unpriv, opts.log_level = DEFAULT_LIBBPF_LOG_LEVEL; opts.prog_flags = pflags; - if (prog_type == BPF_PROG_TYPE_TRACING && test->kfunc) { + if ((prog_type == BPF_PROG_TYPE_TRACING || + prog_type == BPF_PROG_TYPE_LSM) && test->kfunc) { int attach_btf_id; attach_btf_id = libbpf_find_vmlinux_btf_id(test->kfunc, diff --git a/tools/testing/selftests/bpf/verifier/calls.c b/tools/testing/selftests/bpf/verifier/calls.c index 3fb4f69b1962..e1a937277b54 100644 --- a/tools/testing/selftests/bpf/verifier/calls.c +++ b/tools/testing/selftests/bpf/verifier/calls.c @@ -284,7 +284,7 @@ .result = ACCEPT, }, { - "calls: not on unpriviledged", + "calls: not on unprivileged", .insns = { BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 2), BPF_MOV64_IMM(BPF_REG_0, 1), diff --git a/tools/testing/selftests/bpf/verifier/ref_tracking.c b/tools/testing/selftests/bpf/verifier/ref_tracking.c index 57a83d763ec1..f18ce867271f 100644 --- a/tools/testing/selftests/bpf/verifier/ref_tracking.c +++ b/tools/testing/selftests/bpf/verifier/ref_tracking.c @@ -85,6 +85,145 @@ .result = REJECT, }, { + "reference tracking: acquire/release user key reference", + .insns = { + BPF_MOV64_IMM(BPF_REG_1, -3), + BPF_MOV64_IMM(BPF_REG_2, 0), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, BPF_PSEUDO_KFUNC_CALL, 0, 0), + BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 2), + BPF_MOV64_REG(BPF_REG_1, BPF_REG_0), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, BPF_PSEUDO_KFUNC_CALL, 0, 0), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .prog_type = BPF_PROG_TYPE_LSM, + .kfunc = "bpf", + .expected_attach_type = BPF_LSM_MAC, + .flags = BPF_F_SLEEPABLE, + .fixup_kfunc_btf_id = { + { "bpf_lookup_user_key", 2 }, + { "bpf_key_put", 5 }, + }, + .result = ACCEPT, +}, +{ + "reference tracking: acquire/release system key reference", + .insns = { + BPF_MOV64_IMM(BPF_REG_1, 1), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, BPF_PSEUDO_KFUNC_CALL, 0, 0), + BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 2), + BPF_MOV64_REG(BPF_REG_1, BPF_REG_0), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, BPF_PSEUDO_KFUNC_CALL, 0, 0), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .prog_type = BPF_PROG_TYPE_LSM, + .kfunc = "bpf", + .expected_attach_type = BPF_LSM_MAC, + .flags = BPF_F_SLEEPABLE, + .fixup_kfunc_btf_id = { + { "bpf_lookup_system_key", 1 }, + { "bpf_key_put", 4 }, + }, + .result = ACCEPT, +}, +{ + "reference tracking: release user key reference without check", + .insns = { + BPF_MOV64_IMM(BPF_REG_1, -3), + BPF_MOV64_IMM(BPF_REG_2, 0), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, BPF_PSEUDO_KFUNC_CALL, 0, 0), + BPF_MOV64_REG(BPF_REG_1, BPF_REG_0), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, BPF_PSEUDO_KFUNC_CALL, 0, 0), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .prog_type = BPF_PROG_TYPE_LSM, + .kfunc = "bpf", + .expected_attach_type = BPF_LSM_MAC, + .flags = BPF_F_SLEEPABLE, + .errstr = "arg#0 pointer type STRUCT bpf_key must point to scalar, or struct with scalar", + .fixup_kfunc_btf_id = { + { "bpf_lookup_user_key", 2 }, + { "bpf_key_put", 4 }, + }, + .result = REJECT, +}, +{ + "reference tracking: release system key reference without check", + .insns = { + BPF_MOV64_IMM(BPF_REG_1, 1), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, BPF_PSEUDO_KFUNC_CALL, 0, 0), + BPF_MOV64_REG(BPF_REG_1, BPF_REG_0), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, BPF_PSEUDO_KFUNC_CALL, 0, 0), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .prog_type = BPF_PROG_TYPE_LSM, + .kfunc = "bpf", + .expected_attach_type = BPF_LSM_MAC, + .flags = BPF_F_SLEEPABLE, + .errstr = "arg#0 pointer type STRUCT bpf_key must point to scalar, or struct with scalar", + .fixup_kfunc_btf_id = { + { "bpf_lookup_system_key", 1 }, + { "bpf_key_put", 3 }, + }, + .result = REJECT, +}, +{ + "reference tracking: release with NULL key pointer", + .insns = { + BPF_MOV64_IMM(BPF_REG_1, 0), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, BPF_PSEUDO_KFUNC_CALL, 0, 0), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .prog_type = BPF_PROG_TYPE_LSM, + .kfunc = "bpf", + .expected_attach_type = BPF_LSM_MAC, + .flags = BPF_F_SLEEPABLE, + .errstr = "arg#0 pointer type STRUCT bpf_key must point to scalar, or struct with scalar", + .fixup_kfunc_btf_id = { + { "bpf_key_put", 1 }, + }, + .result = REJECT, +}, +{ + "reference tracking: leak potential reference to user key", + .insns = { + BPF_MOV64_IMM(BPF_REG_1, -3), + BPF_MOV64_IMM(BPF_REG_2, 0), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, BPF_PSEUDO_KFUNC_CALL, 0, 0), + BPF_EXIT_INSN(), + }, + .prog_type = BPF_PROG_TYPE_LSM, + .kfunc = "bpf", + .expected_attach_type = BPF_LSM_MAC, + .flags = BPF_F_SLEEPABLE, + .errstr = "Unreleased reference", + .fixup_kfunc_btf_id = { + { "bpf_lookup_user_key", 2 }, + }, + .result = REJECT, +}, +{ + "reference tracking: leak potential reference to system key", + .insns = { + BPF_MOV64_IMM(BPF_REG_1, 1), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, BPF_PSEUDO_KFUNC_CALL, 0, 0), + BPF_EXIT_INSN(), + }, + .prog_type = BPF_PROG_TYPE_LSM, + .kfunc = "bpf", + .expected_attach_type = BPF_LSM_MAC, + .flags = BPF_F_SLEEPABLE, + .errstr = "Unreleased reference", + .fixup_kfunc_btf_id = { + { "bpf_lookup_system_key", 1 }, + }, + .result = REJECT, +}, +{ "reference tracking: release reference without check", .insns = { BPF_SK_LOOKUP(sk_lookup_tcp), diff --git a/tools/testing/selftests/bpf/verifier/var_off.c b/tools/testing/selftests/bpf/verifier/var_off.c index 187c6f6e32bc..d37f512fad16 100644 --- a/tools/testing/selftests/bpf/verifier/var_off.c +++ b/tools/testing/selftests/bpf/verifier/var_off.c @@ -121,7 +121,7 @@ BPF_EXIT_INSN(), }, .fixup_map_hash_8b = { 1 }, - /* The unpriviledged case is not too interesting; variable + /* The unprivileged case is not too interesting; variable * stack access is rejected. */ .errstr_unpriv = "R2 variable stack access prohibited for !root", diff --git a/tools/testing/selftests/bpf/verify_sig_setup.sh b/tools/testing/selftests/bpf/verify_sig_setup.sh new file mode 100755 index 000000000000..ba08922b4a27 --- /dev/null +++ b/tools/testing/selftests/bpf/verify_sig_setup.sh @@ -0,0 +1,104 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 + +set -e +set -u +set -o pipefail + +VERBOSE="${SELFTESTS_VERBOSE:=0}" +LOG_FILE="$(mktemp /tmp/verify_sig_setup.log.XXXXXX)" + +x509_genkey_content="\ +[ req ] +default_bits = 2048 +distinguished_name = req_distinguished_name +prompt = no +string_mask = utf8only +x509_extensions = myexts + +[ req_distinguished_name ] +CN = eBPF Signature Verification Testing Key + +[ myexts ] +basicConstraints=critical,CA:FALSE +keyUsage=digitalSignature +subjectKeyIdentifier=hash +authorityKeyIdentifier=keyid +" + +usage() +{ + echo "Usage: $0 <setup|cleanup <existing_tmp_dir>" + exit 1 +} + +setup() +{ + local tmp_dir="$1" + + echo "${x509_genkey_content}" > ${tmp_dir}/x509.genkey + + openssl req -new -nodes -utf8 -sha256 -days 36500 \ + -batch -x509 -config ${tmp_dir}/x509.genkey \ + -outform PEM -out ${tmp_dir}/signing_key.pem \ + -keyout ${tmp_dir}/signing_key.pem 2>&1 + + openssl x509 -in ${tmp_dir}/signing_key.pem -out \ + ${tmp_dir}/signing_key.der -outform der + + key_id=$(cat ${tmp_dir}/signing_key.der | keyctl padd asymmetric ebpf_testing_key @s) + + keyring_id=$(keyctl newring ebpf_testing_keyring @s) + keyctl link $key_id $keyring_id +} + +cleanup() { + local tmp_dir="$1" + + keyctl unlink $(keyctl search @s asymmetric ebpf_testing_key) @s + keyctl unlink $(keyctl search @s keyring ebpf_testing_keyring) @s + rm -rf ${tmp_dir} +} + +catch() +{ + local exit_code="$1" + local log_file="$2" + + if [[ "${exit_code}" -ne 0 ]]; then + cat "${log_file}" >&3 + fi + + rm -f "${log_file}" + exit ${exit_code} +} + +main() +{ + [[ $# -ne 2 ]] && usage + + local action="$1" + local tmp_dir="$2" + + [[ ! -d "${tmp_dir}" ]] && echo "Directory ${tmp_dir} doesn't exist" && exit 1 + + if [[ "${action}" == "setup" ]]; then + setup "${tmp_dir}" + elif [[ "${action}" == "cleanup" ]]; then + cleanup "${tmp_dir}" + else + echo "Unknown action: ${action}" + exit 1 + fi +} + +trap 'catch "$?" "${LOG_FILE}"' EXIT + +if [[ "${VERBOSE}" -eq 0 ]]; then + # Save the stderr to 3 so that we can output back to + # it incase of an error. + exec 3>&2 1>"${LOG_FILE}" 2>&1 +fi + +main "$@" +rm -f "${LOG_FILE}" diff --git a/tools/testing/selftests/bpf/veristat.c b/tools/testing/selftests/bpf/veristat.c new file mode 100644 index 000000000000..b0d83a28e348 --- /dev/null +++ b/tools/testing/selftests/bpf/veristat.c @@ -0,0 +1,1322 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2022 Meta Platforms, Inc. and affiliates. */ +#define _GNU_SOURCE +#include <argp.h> +#include <string.h> +#include <stdlib.h> +#include <linux/compiler.h> +#include <sched.h> +#include <pthread.h> +#include <dirent.h> +#include <signal.h> +#include <fcntl.h> +#include <unistd.h> +#include <sys/time.h> +#include <sys/sysinfo.h> +#include <sys/stat.h> +#include <bpf/libbpf.h> +#include <libelf.h> +#include <gelf.h> + +enum stat_id { + VERDICT, + DURATION, + TOTAL_INSNS, + TOTAL_STATES, + PEAK_STATES, + MAX_STATES_PER_INSN, + MARK_READ_MAX_LEN, + + FILE_NAME, + PROG_NAME, + + ALL_STATS_CNT, + NUM_STATS_CNT = FILE_NAME - VERDICT, +}; + +struct verif_stats { + char *file_name; + char *prog_name; + + long stats[NUM_STATS_CNT]; +}; + +struct stat_specs { + int spec_cnt; + enum stat_id ids[ALL_STATS_CNT]; + bool asc[ALL_STATS_CNT]; + int lens[ALL_STATS_CNT * 3]; /* 3x for comparison mode */ +}; + +enum resfmt { + RESFMT_TABLE, + RESFMT_TABLE_CALCLEN, /* fake format to pre-calculate table's column widths */ + RESFMT_CSV, +}; + +struct filter { + char *file_glob; + char *prog_glob; +}; + +static struct env { + char **filenames; + int filename_cnt; + bool verbose; + bool quiet; + int log_level; + enum resfmt out_fmt; + bool comparison_mode; + + struct verif_stats *prog_stats; + int prog_stat_cnt; + + /* baseline_stats is allocated and used only in comparsion mode */ + struct verif_stats *baseline_stats; + int baseline_stat_cnt; + + struct stat_specs output_spec; + struct stat_specs sort_spec; + + struct filter *allow_filters; + struct filter *deny_filters; + int allow_filter_cnt; + int deny_filter_cnt; + + int files_processed; + int files_skipped; + int progs_processed; + int progs_skipped; +} env; + +static int libbpf_print_fn(enum libbpf_print_level level, const char *format, va_list args) +{ + if (!env.verbose) + return 0; + if (level == LIBBPF_DEBUG /* && !env.verbose */) + return 0; + return vfprintf(stderr, format, args); +} + +const char *argp_program_version = "veristat"; +const char *argp_program_bug_address = "<bpf@vger.kernel.org>"; +const char argp_program_doc[] = +"veristat BPF verifier stats collection and comparison tool.\n" +"\n" +"USAGE: veristat <obj-file> [<obj-file>...]\n" +" OR: veristat -C <baseline.csv> <comparison.csv>\n"; + +static const struct argp_option opts[] = { + { NULL, 'h', NULL, OPTION_HIDDEN, "Show the full help" }, + { "verbose", 'v', NULL, 0, "Verbose mode" }, + { "log-level", 'l', "LEVEL", 0, "Verifier log level (default 0 for normal mode, 1 for verbose mode)" }, + { "quiet", 'q', NULL, 0, "Quiet mode" }, + { "emit", 'e', "SPEC", 0, "Specify stats to be emitted" }, + { "sort", 's', "SPEC", 0, "Specify sort order" }, + { "output-format", 'o', "FMT", 0, "Result output format (table, csv), default is table." }, + { "compare", 'C', NULL, 0, "Comparison mode" }, + { "filter", 'f', "FILTER", 0, "Filter expressions (or @filename for file with expressions)." }, + {}, +}; + +static int parse_stats(const char *stats_str, struct stat_specs *specs); +static int append_filter(struct filter **filters, int *cnt, const char *str); +static int append_filter_file(const char *path); + +static error_t parse_arg(int key, char *arg, struct argp_state *state) +{ + void *tmp; + int err; + + switch (key) { + case 'h': + argp_state_help(state, stderr, ARGP_HELP_STD_HELP); + break; + case 'v': + env.verbose = true; + break; + case 'q': + env.quiet = true; + break; + case 'e': + err = parse_stats(arg, &env.output_spec); + if (err) + return err; + break; + case 's': + err = parse_stats(arg, &env.sort_spec); + if (err) + return err; + break; + case 'o': + if (strcmp(arg, "table") == 0) { + env.out_fmt = RESFMT_TABLE; + } else if (strcmp(arg, "csv") == 0) { + env.out_fmt = RESFMT_CSV; + } else { + fprintf(stderr, "Unrecognized output format '%s'\n", arg); + return -EINVAL; + } + break; + case 'l': + errno = 0; + env.log_level = strtol(arg, NULL, 10); + if (errno) { + fprintf(stderr, "invalid log level: %s\n", arg); + argp_usage(state); + } + break; + case 'C': + env.comparison_mode = true; + break; + case 'f': + if (arg[0] == '@') + err = append_filter_file(arg + 1); + else if (arg[0] == '!') + err = append_filter(&env.deny_filters, &env.deny_filter_cnt, arg + 1); + else + err = append_filter(&env.allow_filters, &env.allow_filter_cnt, arg); + if (err) { + fprintf(stderr, "Failed to collect program filter expressions: %d\n", err); + return err; + } + break; + case ARGP_KEY_ARG: + tmp = realloc(env.filenames, (env.filename_cnt + 1) * sizeof(*env.filenames)); + if (!tmp) + return -ENOMEM; + env.filenames = tmp; + env.filenames[env.filename_cnt] = strdup(arg); + if (!env.filenames[env.filename_cnt]) + return -ENOMEM; + env.filename_cnt++; + break; + default: + return ARGP_ERR_UNKNOWN; + } + return 0; +} + +static const struct argp argp = { + .options = opts, + .parser = parse_arg, + .doc = argp_program_doc, +}; + + +/* Adapted from perf/util/string.c */ +static bool glob_matches(const char *str, const char *pat) +{ + while (*str && *pat && *pat != '*') { + if (*str != *pat) + return false; + str++; + pat++; + } + /* Check wild card */ + if (*pat == '*') { + while (*pat == '*') + pat++; + if (!*pat) /* Tail wild card matches all */ + return true; + while (*str) + if (glob_matches(str++, pat)) + return true; + } + return !*str && !*pat; +} + +static bool should_process_file(const char *filename) +{ + int i; + + if (env.deny_filter_cnt > 0) { + for (i = 0; i < env.deny_filter_cnt; i++) { + if (glob_matches(filename, env.deny_filters[i].file_glob)) + return false; + } + } + + if (env.allow_filter_cnt == 0) + return true; + + for (i = 0; i < env.allow_filter_cnt; i++) { + if (glob_matches(filename, env.allow_filters[i].file_glob)) + return true; + } + + return false; +} + +static bool is_bpf_obj_file(const char *path) { + Elf64_Ehdr *ehdr; + int fd, err = -EINVAL; + Elf *elf = NULL; + + fd = open(path, O_RDONLY | O_CLOEXEC); + if (fd < 0) + return true; /* we'll fail later and propagate error */ + + /* ensure libelf is initialized */ + (void)elf_version(EV_CURRENT); + + elf = elf_begin(fd, ELF_C_READ, NULL); + if (!elf) + goto cleanup; + + if (elf_kind(elf) != ELF_K_ELF || gelf_getclass(elf) != ELFCLASS64) + goto cleanup; + + ehdr = elf64_getehdr(elf); + /* Old LLVM set e_machine to EM_NONE */ + if (!ehdr || ehdr->e_type != ET_REL || (ehdr->e_machine && ehdr->e_machine != EM_BPF)) + goto cleanup; + + err = 0; +cleanup: + if (elf) + elf_end(elf); + close(fd); + return err == 0; +} + +static bool should_process_prog(const char *path, const char *prog_name) +{ + const char *filename = basename(path); + int i; + + if (env.deny_filter_cnt > 0) { + for (i = 0; i < env.deny_filter_cnt; i++) { + if (glob_matches(filename, env.deny_filters[i].file_glob)) + return false; + if (!env.deny_filters[i].prog_glob) + continue; + if (glob_matches(prog_name, env.deny_filters[i].prog_glob)) + return false; + } + } + + if (env.allow_filter_cnt == 0) + return true; + + for (i = 0; i < env.allow_filter_cnt; i++) { + if (!glob_matches(filename, env.allow_filters[i].file_glob)) + continue; + /* if filter specifies only filename glob part, it implicitly + * allows all progs within that file + */ + if (!env.allow_filters[i].prog_glob) + return true; + if (glob_matches(prog_name, env.allow_filters[i].prog_glob)) + return true; + } + + return false; +} + +static int append_filter(struct filter **filters, int *cnt, const char *str) +{ + struct filter *f; + void *tmp; + const char *p; + + tmp = realloc(*filters, (*cnt + 1) * sizeof(**filters)); + if (!tmp) + return -ENOMEM; + *filters = tmp; + + f = &(*filters)[*cnt]; + f->file_glob = f->prog_glob = NULL; + + /* filter can be specified either as "<obj-glob>" or "<obj-glob>/<prog-glob>" */ + p = strchr(str, '/'); + if (!p) { + f->file_glob = strdup(str); + if (!f->file_glob) + return -ENOMEM; + } else { + f->file_glob = strndup(str, p - str); + f->prog_glob = strdup(p + 1); + if (!f->file_glob || !f->prog_glob) { + free(f->file_glob); + free(f->prog_glob); + f->file_glob = f->prog_glob = NULL; + return -ENOMEM; + } + } + + *cnt = *cnt + 1; + return 0; +} + +static int append_filter_file(const char *path) +{ + char buf[1024]; + FILE *f; + int err = 0; + + f = fopen(path, "r"); + if (!f) { + err = -errno; + fprintf(stderr, "Failed to open filters in '%s': %d\n", path, err); + return err; + } + + while (fscanf(f, " %1023[^\n]\n", buf) == 1) { + /* lines starting with # are comments, skip them */ + if (buf[0] == '\0' || buf[0] == '#') + continue; + /* lines starting with ! are negative match filters */ + if (buf[0] == '!') + err = append_filter(&env.deny_filters, &env.deny_filter_cnt, buf + 1); + else + err = append_filter(&env.allow_filters, &env.allow_filter_cnt, buf); + if (err) + goto cleanup; + } + +cleanup: + fclose(f); + return err; +} + +static const struct stat_specs default_output_spec = { + .spec_cnt = 7, + .ids = { + FILE_NAME, PROG_NAME, VERDICT, DURATION, + TOTAL_INSNS, TOTAL_STATES, PEAK_STATES, + }, +}; + +static const struct stat_specs default_sort_spec = { + .spec_cnt = 2, + .ids = { + FILE_NAME, PROG_NAME, + }, + .asc = { true, true, }, +}; + +static struct stat_def { + const char *header; + const char *names[4]; + bool asc_by_default; +} stat_defs[] = { + [FILE_NAME] = { "File", {"file_name", "filename", "file"}, true /* asc */ }, + [PROG_NAME] = { "Program", {"prog_name", "progname", "prog"}, true /* asc */ }, + [VERDICT] = { "Verdict", {"verdict"}, true /* asc: failure, success */ }, + [DURATION] = { "Duration (us)", {"duration", "dur"}, }, + [TOTAL_INSNS] = { "Total insns", {"total_insns", "insns"}, }, + [TOTAL_STATES] = { "Total states", {"total_states", "states"}, }, + [PEAK_STATES] = { "Peak states", {"peak_states"}, }, + [MAX_STATES_PER_INSN] = { "Max states per insn", {"max_states_per_insn"}, }, + [MARK_READ_MAX_LEN] = { "Max mark read length", {"max_mark_read_len", "mark_read"}, }, +}; + +static int parse_stat(const char *stat_name, struct stat_specs *specs) +{ + int id, i; + + if (specs->spec_cnt >= ARRAY_SIZE(specs->ids)) { + fprintf(stderr, "Can't specify more than %zd stats\n", ARRAY_SIZE(specs->ids)); + return -E2BIG; + } + + for (id = 0; id < ARRAY_SIZE(stat_defs); id++) { + struct stat_def *def = &stat_defs[id]; + + for (i = 0; i < ARRAY_SIZE(stat_defs[id].names); i++) { + if (!def->names[i] || strcmp(def->names[i], stat_name) != 0) + continue; + + specs->ids[specs->spec_cnt] = id; + specs->asc[specs->spec_cnt] = def->asc_by_default; + specs->spec_cnt++; + + return 0; + } + } + + fprintf(stderr, "Unrecognized stat name '%s'\n", stat_name); + return -ESRCH; +} + +static int parse_stats(const char *stats_str, struct stat_specs *specs) +{ + char *input, *state = NULL, *next; + int err; + + input = strdup(stats_str); + if (!input) + return -ENOMEM; + + while ((next = strtok_r(state ? NULL : input, ",", &state))) { + err = parse_stat(next, specs); + if (err) + return err; + } + + return 0; +} + +static void free_verif_stats(struct verif_stats *stats, size_t stat_cnt) +{ + int i; + + if (!stats) + return; + + for (i = 0; i < stat_cnt; i++) { + free(stats[i].file_name); + free(stats[i].prog_name); + } + free(stats); +} + +static char verif_log_buf[64 * 1024]; + +#define MAX_PARSED_LOG_LINES 100 + +static int parse_verif_log(char * const buf, size_t buf_sz, struct verif_stats *s) +{ + const char *cur; + int pos, lines; + + buf[buf_sz - 1] = '\0'; + + for (pos = strlen(buf) - 1, lines = 0; pos >= 0 && lines < MAX_PARSED_LOG_LINES; lines++) { + /* find previous endline or otherwise take the start of log buf */ + for (cur = &buf[pos]; cur > buf && cur[0] != '\n'; cur--, pos--) { + } + /* next time start from end of previous line (or pos goes to <0) */ + pos--; + /* if we found endline, point right after endline symbol; + * otherwise, stay at the beginning of log buf + */ + if (cur[0] == '\n') + cur++; + + if (1 == sscanf(cur, "verification time %ld usec\n", &s->stats[DURATION])) + continue; + if (6 == sscanf(cur, "processed %ld insns (limit %*d) max_states_per_insn %ld total_states %ld peak_states %ld mark_read %ld", + &s->stats[TOTAL_INSNS], + &s->stats[MAX_STATES_PER_INSN], + &s->stats[TOTAL_STATES], + &s->stats[PEAK_STATES], + &s->stats[MARK_READ_MAX_LEN])) + continue; + } + + return 0; +} + +static int process_prog(const char *filename, struct bpf_object *obj, struct bpf_program *prog) +{ + const char *prog_name = bpf_program__name(prog); + size_t buf_sz = sizeof(verif_log_buf); + char *buf = verif_log_buf; + struct verif_stats *stats; + int err = 0; + void *tmp; + + if (!should_process_prog(filename, bpf_program__name(prog))) { + env.progs_skipped++; + return 0; + } + + tmp = realloc(env.prog_stats, (env.prog_stat_cnt + 1) * sizeof(*env.prog_stats)); + if (!tmp) + return -ENOMEM; + env.prog_stats = tmp; + stats = &env.prog_stats[env.prog_stat_cnt++]; + memset(stats, 0, sizeof(*stats)); + + if (env.verbose) { + buf_sz = 16 * 1024 * 1024; + buf = malloc(buf_sz); + if (!buf) + return -ENOMEM; + bpf_program__set_log_buf(prog, buf, buf_sz); + bpf_program__set_log_level(prog, env.log_level | 4); /* stats + log */ + } else { + bpf_program__set_log_buf(prog, buf, buf_sz); + bpf_program__set_log_level(prog, 4); /* only verifier stats */ + } + verif_log_buf[0] = '\0'; + + err = bpf_object__load(obj); + env.progs_processed++; + + stats->file_name = strdup(basename(filename)); + stats->prog_name = strdup(bpf_program__name(prog)); + stats->stats[VERDICT] = err == 0; /* 1 - success, 0 - failure */ + parse_verif_log(buf, buf_sz, stats); + + if (env.verbose) { + printf("PROCESSING %s/%s, DURATION US: %ld, VERDICT: %s, VERIFIER LOG:\n%s\n", + filename, prog_name, stats->stats[DURATION], + err ? "failure" : "success", buf); + } + + if (verif_log_buf != buf) + free(buf); + + return 0; +}; + +static int process_obj(const char *filename) +{ + struct bpf_object *obj = NULL, *tobj; + struct bpf_program *prog, *tprog, *lprog; + libbpf_print_fn_t old_libbpf_print_fn; + LIBBPF_OPTS(bpf_object_open_opts, opts); + int err = 0, prog_cnt = 0; + + if (!should_process_file(basename(filename))) { + if (env.verbose) + printf("Skipping '%s' due to filters...\n", filename); + env.files_skipped++; + return 0; + } + if (!is_bpf_obj_file(filename)) { + if (env.verbose) + printf("Skipping '%s' as it's not a BPF object file...\n", filename); + env.files_skipped++; + return 0; + } + + if (!env.quiet && env.out_fmt == RESFMT_TABLE) + printf("Processing '%s'...\n", basename(filename)); + + old_libbpf_print_fn = libbpf_set_print(libbpf_print_fn); + obj = bpf_object__open_file(filename, &opts); + if (!obj) { + /* if libbpf can't open BPF object file, it could be because + * that BPF object file is incomplete and has to be statically + * linked into a final BPF object file; instead of bailing + * out, report it into stderr, mark it as skipped, and + * proceeed + */ + fprintf(stderr, "Failed to open '%s': %d\n", filename, -errno); + env.files_skipped++; + err = 0; + goto cleanup; + } + + env.files_processed++; + + bpf_object__for_each_program(prog, obj) { + prog_cnt++; + } + + if (prog_cnt == 1) { + prog = bpf_object__next_program(obj, NULL); + bpf_program__set_autoload(prog, true); + process_prog(filename, obj, prog); + goto cleanup; + } + + bpf_object__for_each_program(prog, obj) { + const char *prog_name = bpf_program__name(prog); + + tobj = bpf_object__open_file(filename, &opts); + if (!tobj) { + err = -errno; + fprintf(stderr, "Failed to open '%s': %d\n", filename, err); + goto cleanup; + } + + bpf_object__for_each_program(tprog, tobj) { + const char *tprog_name = bpf_program__name(tprog); + + if (strcmp(prog_name, tprog_name) == 0) { + bpf_program__set_autoload(tprog, true); + lprog = tprog; + } else { + bpf_program__set_autoload(tprog, false); + } + } + + process_prog(filename, tobj, lprog); + bpf_object__close(tobj); + } + +cleanup: + bpf_object__close(obj); + libbpf_set_print(old_libbpf_print_fn); + return err; +} + +static int cmp_stat(const struct verif_stats *s1, const struct verif_stats *s2, + enum stat_id id, bool asc) +{ + int cmp = 0; + + switch (id) { + case FILE_NAME: + cmp = strcmp(s1->file_name, s2->file_name); + break; + case PROG_NAME: + cmp = strcmp(s1->prog_name, s2->prog_name); + break; + case VERDICT: + case DURATION: + case TOTAL_INSNS: + case TOTAL_STATES: + case PEAK_STATES: + case MAX_STATES_PER_INSN: + case MARK_READ_MAX_LEN: { + long v1 = s1->stats[id]; + long v2 = s2->stats[id]; + + if (v1 != v2) + cmp = v1 < v2 ? -1 : 1; + break; + } + default: + fprintf(stderr, "Unrecognized stat #%d\n", id); + exit(1); + } + + return asc ? cmp : -cmp; +} + +static int cmp_prog_stats(const void *v1, const void *v2) +{ + const struct verif_stats *s1 = v1, *s2 = v2; + int i, cmp; + + for (i = 0; i < env.sort_spec.spec_cnt; i++) { + cmp = cmp_stat(s1, s2, env.sort_spec.ids[i], env.sort_spec.asc[i]); + if (cmp != 0) + return cmp; + } + + return 0; +} + +#define HEADER_CHAR '-' +#define COLUMN_SEP " " + +static void output_header_underlines(void) +{ + int i, j, len; + + for (i = 0; i < env.output_spec.spec_cnt; i++) { + len = env.output_spec.lens[i]; + + printf("%s", i == 0 ? "" : COLUMN_SEP); + for (j = 0; j < len; j++) + printf("%c", HEADER_CHAR); + } + printf("\n"); +} + +static void output_headers(enum resfmt fmt) +{ + int i, len; + + for (i = 0; i < env.output_spec.spec_cnt; i++) { + int id = env.output_spec.ids[i]; + int *max_len = &env.output_spec.lens[i]; + + switch (fmt) { + case RESFMT_TABLE_CALCLEN: + len = snprintf(NULL, 0, "%s", stat_defs[id].header); + if (len > *max_len) + *max_len = len; + break; + case RESFMT_TABLE: + printf("%s%-*s", i == 0 ? "" : COLUMN_SEP, *max_len, stat_defs[id].header); + if (i == env.output_spec.spec_cnt - 1) + printf("\n"); + break; + case RESFMT_CSV: + printf("%s%s", i == 0 ? "" : ",", stat_defs[id].names[0]); + if (i == env.output_spec.spec_cnt - 1) + printf("\n"); + break; + } + } + + if (fmt == RESFMT_TABLE) + output_header_underlines(); +} + +static void prepare_value(const struct verif_stats *s, enum stat_id id, + const char **str, long *val) +{ + switch (id) { + case FILE_NAME: + *str = s->file_name; + break; + case PROG_NAME: + *str = s->prog_name; + break; + case VERDICT: + *str = s->stats[VERDICT] ? "success" : "failure"; + break; + case DURATION: + case TOTAL_INSNS: + case TOTAL_STATES: + case PEAK_STATES: + case MAX_STATES_PER_INSN: + case MARK_READ_MAX_LEN: + *val = s->stats[id]; + break; + default: + fprintf(stderr, "Unrecognized stat #%d\n", id); + exit(1); + } +} + +static void output_stats(const struct verif_stats *s, enum resfmt fmt, bool last) +{ + int i; + + for (i = 0; i < env.output_spec.spec_cnt; i++) { + int id = env.output_spec.ids[i]; + int *max_len = &env.output_spec.lens[i], len; + const char *str = NULL; + long val = 0; + + prepare_value(s, id, &str, &val); + + switch (fmt) { + case RESFMT_TABLE_CALCLEN: + if (str) + len = snprintf(NULL, 0, "%s", str); + else + len = snprintf(NULL, 0, "%ld", val); + if (len > *max_len) + *max_len = len; + break; + case RESFMT_TABLE: + if (str) + printf("%s%-*s", i == 0 ? "" : COLUMN_SEP, *max_len, str); + else + printf("%s%*ld", i == 0 ? "" : COLUMN_SEP, *max_len, val); + if (i == env.output_spec.spec_cnt - 1) + printf("\n"); + break; + case RESFMT_CSV: + if (str) + printf("%s%s", i == 0 ? "" : ",", str); + else + printf("%s%ld", i == 0 ? "" : ",", val); + if (i == env.output_spec.spec_cnt - 1) + printf("\n"); + break; + } + } + + if (last && fmt == RESFMT_TABLE) { + output_header_underlines(); + printf("Done. Processed %d files, %d programs. Skipped %d files, %d programs.\n", + env.files_processed, env.files_skipped, env.progs_processed, env.progs_skipped); + } +} + +static int handle_verif_mode(void) +{ + int i, err; + + if (env.filename_cnt == 0) { + fprintf(stderr, "Please provide path to BPF object file!\n"); + argp_help(&argp, stderr, ARGP_HELP_USAGE, "veristat"); + return -EINVAL; + } + + for (i = 0; i < env.filename_cnt; i++) { + err = process_obj(env.filenames[i]); + if (err) { + fprintf(stderr, "Failed to process '%s': %d\n", env.filenames[i], err); + return err; + } + } + + qsort(env.prog_stats, env.prog_stat_cnt, sizeof(*env.prog_stats), cmp_prog_stats); + + if (env.out_fmt == RESFMT_TABLE) { + /* calculate column widths */ + output_headers(RESFMT_TABLE_CALCLEN); + for (i = 0; i < env.prog_stat_cnt; i++) + output_stats(&env.prog_stats[i], RESFMT_TABLE_CALCLEN, false); + } + + /* actually output the table */ + output_headers(env.out_fmt); + for (i = 0; i < env.prog_stat_cnt; i++) { + output_stats(&env.prog_stats[i], env.out_fmt, i == env.prog_stat_cnt - 1); + } + + return 0; +} + +static int parse_stat_value(const char *str, enum stat_id id, struct verif_stats *st) +{ + switch (id) { + case FILE_NAME: + st->file_name = strdup(str); + if (!st->file_name) + return -ENOMEM; + break; + case PROG_NAME: + st->prog_name = strdup(str); + if (!st->prog_name) + return -ENOMEM; + break; + case VERDICT: + if (strcmp(str, "success") == 0) { + st->stats[VERDICT] = true; + } else if (strcmp(str, "failure") == 0) { + st->stats[VERDICT] = false; + } else { + fprintf(stderr, "Unrecognized verification verdict '%s'\n", str); + return -EINVAL; + } + break; + case DURATION: + case TOTAL_INSNS: + case TOTAL_STATES: + case PEAK_STATES: + case MAX_STATES_PER_INSN: + case MARK_READ_MAX_LEN: { + long val; + int err, n; + + if (sscanf(str, "%ld %n", &val, &n) != 1 || n != strlen(str)) { + err = -errno; + fprintf(stderr, "Failed to parse '%s' as integer\n", str); + return err; + } + + st->stats[id] = val; + break; + } + default: + fprintf(stderr, "Unrecognized stat #%d\n", id); + return -EINVAL; + } + return 0; +} + +static int parse_stats_csv(const char *filename, struct stat_specs *specs, + struct verif_stats **statsp, int *stat_cntp) +{ + char line[4096]; + FILE *f; + int err = 0; + bool header = true; + + f = fopen(filename, "r"); + if (!f) { + err = -errno; + fprintf(stderr, "Failed to open '%s': %d\n", filename, err); + return err; + } + + *stat_cntp = 0; + + while (fgets(line, sizeof(line), f)) { + char *input = line, *state = NULL, *next; + struct verif_stats *st = NULL; + int col = 0; + + if (!header) { + void *tmp; + + tmp = realloc(*statsp, (*stat_cntp + 1) * sizeof(**statsp)); + if (!tmp) { + err = -ENOMEM; + goto cleanup; + } + *statsp = tmp; + + st = &(*statsp)[*stat_cntp]; + memset(st, 0, sizeof(*st)); + + *stat_cntp += 1; + } + + while ((next = strtok_r(state ? NULL : input, ",\n", &state))) { + if (header) { + /* for the first line, set up spec stats */ + err = parse_stat(next, specs); + if (err) + goto cleanup; + continue; + } + + /* for all other lines, parse values based on spec */ + if (col >= specs->spec_cnt) { + fprintf(stderr, "Found extraneous column #%d in row #%d of '%s'\n", + col, *stat_cntp, filename); + err = -EINVAL; + goto cleanup; + } + err = parse_stat_value(next, specs->ids[col], st); + if (err) + goto cleanup; + col++; + } + + if (header) { + header = false; + continue; + } + + if (col < specs->spec_cnt) { + fprintf(stderr, "Not enough columns in row #%d in '%s'\n", + *stat_cntp, filename); + err = -EINVAL; + goto cleanup; + } + + if (!st->file_name || !st->prog_name) { + fprintf(stderr, "Row #%d in '%s' is missing file and/or program name\n", + *stat_cntp, filename); + err = -EINVAL; + goto cleanup; + } + + /* in comparison mode we can only check filters after we + * parsed entire line; if row should be ignored we pretend we + * never parsed it + */ + if (!should_process_prog(st->file_name, st->prog_name)) { + free(st->file_name); + free(st->prog_name); + *stat_cntp -= 1; + } + } + + if (!feof(f)) { + err = -errno; + fprintf(stderr, "Failed I/O for '%s': %d\n", filename, err); + } + +cleanup: + fclose(f); + return err; +} + +/* empty/zero stats for mismatched rows */ +static const struct verif_stats fallback_stats = { .file_name = "", .prog_name = "" }; + +static bool is_key_stat(enum stat_id id) +{ + return id == FILE_NAME || id == PROG_NAME; +} + +static void output_comp_header_underlines(void) +{ + int i, j, k; + + for (i = 0; i < env.output_spec.spec_cnt; i++) { + int id = env.output_spec.ids[i]; + int max_j = is_key_stat(id) ? 1 : 3; + + for (j = 0; j < max_j; j++) { + int len = env.output_spec.lens[3 * i + j]; + + printf("%s", i + j == 0 ? "" : COLUMN_SEP); + + for (k = 0; k < len; k++) + printf("%c", HEADER_CHAR); + } + } + printf("\n"); +} + +static void output_comp_headers(enum resfmt fmt) +{ + static const char *table_sfxs[3] = {" (A)", " (B)", " (DIFF)"}; + static const char *name_sfxs[3] = {"_base", "_comp", "_diff"}; + int i, j, len; + + for (i = 0; i < env.output_spec.spec_cnt; i++) { + int id = env.output_spec.ids[i]; + /* key stats don't have A/B/DIFF columns, they are common for both data sets */ + int max_j = is_key_stat(id) ? 1 : 3; + + for (j = 0; j < max_j; j++) { + int *max_len = &env.output_spec.lens[3 * i + j]; + bool last = (i == env.output_spec.spec_cnt - 1) && (j == max_j - 1); + const char *sfx; + + switch (fmt) { + case RESFMT_TABLE_CALCLEN: + sfx = is_key_stat(id) ? "" : table_sfxs[j]; + len = snprintf(NULL, 0, "%s%s", stat_defs[id].header, sfx); + if (len > *max_len) + *max_len = len; + break; + case RESFMT_TABLE: + sfx = is_key_stat(id) ? "" : table_sfxs[j]; + printf("%s%-*s%s", i + j == 0 ? "" : COLUMN_SEP, + *max_len - (int)strlen(sfx), stat_defs[id].header, sfx); + if (last) + printf("\n"); + break; + case RESFMT_CSV: + sfx = is_key_stat(id) ? "" : name_sfxs[j]; + printf("%s%s%s", i + j == 0 ? "" : ",", stat_defs[id].names[0], sfx); + if (last) + printf("\n"); + break; + } + } + } + + if (fmt == RESFMT_TABLE) + output_comp_header_underlines(); +} + +static void output_comp_stats(const struct verif_stats *base, const struct verif_stats *comp, + enum resfmt fmt, bool last) +{ + char base_buf[1024] = {}, comp_buf[1024] = {}, diff_buf[1024] = {}; + int i; + + for (i = 0; i < env.output_spec.spec_cnt; i++) { + int id = env.output_spec.ids[i], len; + int *max_len_base = &env.output_spec.lens[3 * i + 0]; + int *max_len_comp = &env.output_spec.lens[3 * i + 1]; + int *max_len_diff = &env.output_spec.lens[3 * i + 2]; + const char *base_str = NULL, *comp_str = NULL; + long base_val = 0, comp_val = 0, diff_val = 0; + + prepare_value(base, id, &base_str, &base_val); + prepare_value(comp, id, &comp_str, &comp_val); + + /* normalize all the outputs to be in string buffers for simplicity */ + if (is_key_stat(id)) { + /* key stats (file and program name) are always strings */ + if (base != &fallback_stats) + snprintf(base_buf, sizeof(base_buf), "%s", base_str); + else + snprintf(base_buf, sizeof(base_buf), "%s", comp_str); + } else if (base_str) { + snprintf(base_buf, sizeof(base_buf), "%s", base_str); + snprintf(comp_buf, sizeof(comp_buf), "%s", comp_str); + if (strcmp(base_str, comp_str) == 0) + snprintf(diff_buf, sizeof(diff_buf), "%s", "MATCH"); + else + snprintf(diff_buf, sizeof(diff_buf), "%s", "MISMATCH"); + } else { + snprintf(base_buf, sizeof(base_buf), "%ld", base_val); + snprintf(comp_buf, sizeof(comp_buf), "%ld", comp_val); + + diff_val = comp_val - base_val; + if (base == &fallback_stats || comp == &fallback_stats || base_val == 0) { + snprintf(diff_buf, sizeof(diff_buf), "%+ld (%+.2lf%%)", + diff_val, comp_val < base_val ? -100.0 : 100.0); + } else { + snprintf(diff_buf, sizeof(diff_buf), "%+ld (%+.2lf%%)", + diff_val, diff_val * 100.0 / base_val); + } + } + + switch (fmt) { + case RESFMT_TABLE_CALCLEN: + len = strlen(base_buf); + if (len > *max_len_base) + *max_len_base = len; + if (!is_key_stat(id)) { + len = strlen(comp_buf); + if (len > *max_len_comp) + *max_len_comp = len; + len = strlen(diff_buf); + if (len > *max_len_diff) + *max_len_diff = len; + } + break; + case RESFMT_TABLE: { + /* string outputs are left-aligned, number outputs are right-aligned */ + const char *fmt = base_str ? "%s%-*s" : "%s%*s"; + + printf(fmt, i == 0 ? "" : COLUMN_SEP, *max_len_base, base_buf); + if (!is_key_stat(id)) { + printf(fmt, COLUMN_SEP, *max_len_comp, comp_buf); + printf(fmt, COLUMN_SEP, *max_len_diff, diff_buf); + } + if (i == env.output_spec.spec_cnt - 1) + printf("\n"); + break; + } + case RESFMT_CSV: + printf("%s%s", i == 0 ? "" : ",", base_buf); + if (!is_key_stat(id)) { + printf("%s%s", i == 0 ? "" : ",", comp_buf); + printf("%s%s", i == 0 ? "" : ",", diff_buf); + } + if (i == env.output_spec.spec_cnt - 1) + printf("\n"); + break; + } + } + + if (last && fmt == RESFMT_TABLE) + output_comp_header_underlines(); +} + +static int cmp_stats_key(const struct verif_stats *base, const struct verif_stats *comp) +{ + int r; + + r = strcmp(base->file_name, comp->file_name); + if (r != 0) + return r; + return strcmp(base->prog_name, comp->prog_name); +} + +static int handle_comparison_mode(void) +{ + struct stat_specs base_specs = {}, comp_specs = {}; + enum resfmt cur_fmt; + int err, i, j; + + if (env.filename_cnt != 2) { + fprintf(stderr, "Comparison mode expects exactly two input CSV files!\n"); + argp_help(&argp, stderr, ARGP_HELP_USAGE, "veristat"); + return -EINVAL; + } + + err = parse_stats_csv(env.filenames[0], &base_specs, + &env.baseline_stats, &env.baseline_stat_cnt); + if (err) { + fprintf(stderr, "Failed to parse stats from '%s': %d\n", env.filenames[0], err); + return err; + } + err = parse_stats_csv(env.filenames[1], &comp_specs, + &env.prog_stats, &env.prog_stat_cnt); + if (err) { + fprintf(stderr, "Failed to parse stats from '%s': %d\n", env.filenames[1], err); + return err; + } + + /* To keep it simple we validate that the set and order of stats in + * both CSVs are exactly the same. This can be lifted with a bit more + * pre-processing later. + */ + if (base_specs.spec_cnt != comp_specs.spec_cnt) { + fprintf(stderr, "Number of stats in '%s' and '%s' differs (%d != %d)!\n", + env.filenames[0], env.filenames[1], + base_specs.spec_cnt, comp_specs.spec_cnt); + return -EINVAL; + } + for (i = 0; i < base_specs.spec_cnt; i++) { + if (base_specs.ids[i] != comp_specs.ids[i]) { + fprintf(stderr, "Stats composition differs between '%s' and '%s' (%s != %s)!\n", + env.filenames[0], env.filenames[1], + stat_defs[base_specs.ids[i]].names[0], + stat_defs[comp_specs.ids[i]].names[0]); + return -EINVAL; + } + } + + qsort(env.prog_stats, env.prog_stat_cnt, sizeof(*env.prog_stats), cmp_prog_stats); + qsort(env.baseline_stats, env.baseline_stat_cnt, sizeof(*env.baseline_stats), cmp_prog_stats); + + /* for human-readable table output we need to do extra pass to + * calculate column widths, so we substitute current output format + * with RESFMT_TABLE_CALCLEN and later revert it back to RESFMT_TABLE + * and do everything again. + */ + if (env.out_fmt == RESFMT_TABLE) + cur_fmt = RESFMT_TABLE_CALCLEN; + else + cur_fmt = env.out_fmt; + +one_more_time: + output_comp_headers(cur_fmt); + + /* If baseline and comparison datasets have different subset of rows + * (we match by 'object + prog' as a unique key) then assume + * empty/missing/zero value for rows that are missing in the opposite + * data set + */ + i = j = 0; + while (i < env.baseline_stat_cnt || j < env.prog_stat_cnt) { + bool last = (i == env.baseline_stat_cnt - 1) || (j == env.prog_stat_cnt - 1); + const struct verif_stats *base, *comp; + int r; + + base = i < env.baseline_stat_cnt ? &env.baseline_stats[i] : &fallback_stats; + comp = j < env.prog_stat_cnt ? &env.prog_stats[j] : &fallback_stats; + + if (!base->file_name || !base->prog_name) { + fprintf(stderr, "Entry #%d in '%s' doesn't have file and/or program name specified!\n", + i, env.filenames[0]); + return -EINVAL; + } + if (!comp->file_name || !comp->prog_name) { + fprintf(stderr, "Entry #%d in '%s' doesn't have file and/or program name specified!\n", + j, env.filenames[1]); + return -EINVAL; + } + + r = cmp_stats_key(base, comp); + if (r == 0) { + output_comp_stats(base, comp, cur_fmt, last); + i++; + j++; + } else if (comp == &fallback_stats || r < 0) { + output_comp_stats(base, &fallback_stats, cur_fmt, last); + i++; + } else { + output_comp_stats(&fallback_stats, comp, cur_fmt, last); + j++; + } + } + + if (cur_fmt == RESFMT_TABLE_CALCLEN) { + cur_fmt = RESFMT_TABLE; + goto one_more_time; /* ... this time with feeling */ + } + + return 0; +} + +int main(int argc, char **argv) +{ + int err = 0, i; + + if (argp_parse(&argp, argc, argv, 0, NULL, NULL)) + return 1; + + if (env.verbose && env.quiet) { + fprintf(stderr, "Verbose and quiet modes are incompatible, please specify just one or neither!\n"); + argp_help(&argp, stderr, ARGP_HELP_USAGE, "veristat"); + return 1; + } + if (env.verbose && env.log_level == 0) + env.log_level = 1; + + if (env.output_spec.spec_cnt == 0) + env.output_spec = default_output_spec; + if (env.sort_spec.spec_cnt == 0) + env.sort_spec = default_sort_spec; + + if (env.comparison_mode) + err = handle_comparison_mode(); + else + err = handle_verif_mode(); + + free_verif_stats(env.prog_stats, env.prog_stat_cnt); + free_verif_stats(env.baseline_stats, env.baseline_stat_cnt); + for (i = 0; i < env.filename_cnt; i++) + free(env.filenames[i]); + free(env.filenames); + for (i = 0; i < env.allow_filter_cnt; i++) { + free(env.allow_filters[i].file_glob); + free(env.allow_filters[i].prog_glob); + } + free(env.allow_filters); + for (i = 0; i < env.deny_filter_cnt; i++) { + free(env.deny_filters[i].file_glob); + free(env.deny_filters[i].prog_glob); + } + free(env.deny_filters); + return -err; +} diff --git a/tools/testing/selftests/bpf/veristat.cfg b/tools/testing/selftests/bpf/veristat.cfg new file mode 100644 index 000000000000..1a385061618d --- /dev/null +++ b/tools/testing/selftests/bpf/veristat.cfg @@ -0,0 +1,17 @@ +# pre-canned list of rather complex selftests/bpf BPF object files to monitor +# BPF verifier's performance on +bpf_flow* +bpf_loop_bench* +loop* +netif_receive_skb* +profiler* +pyperf* +strobemeta* +test_cls_redirect* +test_l4lb +test_sysctl* +test_tcp_hdr_* +test_usdt* +test_verif_scale* +test_xdp_noinline* +xdp_synproxy* diff --git a/tools/testing/selftests/bpf/xskxceiver.c b/tools/testing/selftests/bpf/xskxceiver.c index ef33309bbe49..d1a5f3218c34 100644 --- a/tools/testing/selftests/bpf/xskxceiver.c +++ b/tools/testing/selftests/bpf/xskxceiver.c @@ -1953,9 +1953,6 @@ int main(int argc, char **argv) pkt_stream_delete(tx_pkt_stream_default); pkt_stream_delete(rx_pkt_stream_default); - free(ifobj_rx->umem); - if (!ifobj_tx->shared_umem) - free(ifobj_tx->umem); ifobject_delete(ifobj_tx); ifobject_delete(ifobj_rx); |