From d54dba41999498b38a40940e1123019d50b26496 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Thu, 11 Feb 2021 13:03:28 +0100 Subject: objtool: Allow UNWIND_HINT to suppress dodgy stack modifications rewind_stack_do_exit() UNWIND_HINT_FUNC /* Prevent any naive code from trying to unwind to our caller. */ xorl %ebp, %ebp movq PER_CPU_VAR(cpu_current_top_of_stack), %rax leaq -PTREGS_SIZE(%rax), %rsp UNWIND_HINT_REGS call do_exit Does unspeakable things to the stack, which objtool currently fails to detect due to a limitation in instruction decoding. This will be rectified after which the above will result in: arch/x86/entry/entry_64.o: warning: objtool: .text+0xab: unsupported stack register modification Allow the UNWIND_HINT on the next instruction to suppress this, it will overwrite the state anyway. Suggested-by: Josh Poimboeuf Signed-off-by: Peter Zijlstra (Intel) Signed-off-by: Ingo Molnar Acked-by: Josh Poimboeuf Tested-by: Nick Desaulniers Link: https://lkml.kernel.org/r/20210211173626.918498579@infradead.org --- tools/objtool/check.c | 15 +++++++++------ 1 file changed, 9 insertions(+), 6 deletions(-) diff --git a/tools/objtool/check.c b/tools/objtool/check.c index 068cdb41f76f..12b8f0f01176 100644 --- a/tools/objtool/check.c +++ b/tools/objtool/check.c @@ -1959,8 +1959,9 @@ static void restore_reg(struct cfi_state *cfi, unsigned char reg) * 41 5d pop %r13 * c3 retq */ -static int update_cfi_state(struct instruction *insn, struct cfi_state *cfi, - struct stack_op *op) +static int update_cfi_state(struct instruction *insn, + struct instruction *next_insn, + struct cfi_state *cfi, struct stack_op *op) { struct cfi_reg *cfa = &cfi->cfa; struct cfi_reg *regs = cfi->regs; @@ -2161,7 +2162,7 @@ static int update_cfi_state(struct instruction *insn, struct cfi_state *cfi, break; } - if (op->dest.reg == cfi->cfa.base) { + if (op->dest.reg == cfi->cfa.base && !(next_insn && next_insn->hint)) { WARN_FUNC("unsupported stack register modification", insn->sec, insn->offset); return -1; @@ -2433,13 +2434,15 @@ static int propagate_alt_cfi(struct objtool_file *file, struct instruction *insn return 0; } -static int handle_insn_ops(struct instruction *insn, struct insn_state *state) +static int handle_insn_ops(struct instruction *insn, + struct instruction *next_insn, + struct insn_state *state) { struct stack_op *op; list_for_each_entry(op, &insn->stack_ops, list) { - if (update_cfi_state(insn, &state->cfi, op)) + if (update_cfi_state(insn, next_insn, &state->cfi, op)) return 1; if (op->dest.type == OP_DEST_PUSHF) { @@ -2719,7 +2722,7 @@ static int validate_branch(struct objtool_file *file, struct symbol *func, return 0; } - if (handle_insn_ops(insn, &state)) + if (handle_insn_ops(insn, next_insn, &state)) return 1; switch (insn->type) { -- cgit v1.2.3 From d473b18b2ef62563fb874f9cae6e123f99129e3f Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Tue, 9 Feb 2021 20:18:21 +0100 Subject: objtool,x86: Renumber CFI_reg Make them match the instruction encoding numbering. Signed-off-by: Peter Zijlstra (Intel) Signed-off-by: Ingo Molnar Acked-by: Josh Poimboeuf Tested-by: Nick Desaulniers Link: https://lkml.kernel.org/r/20210211173627.033720313@infradead.org --- tools/objtool/arch/x86/include/arch/cfi_regs.h | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/tools/objtool/arch/x86/include/arch/cfi_regs.h b/tools/objtool/arch/x86/include/arch/cfi_regs.h index 79bc517efba8..0579d22c433c 100644 --- a/tools/objtool/arch/x86/include/arch/cfi_regs.h +++ b/tools/objtool/arch/x86/include/arch/cfi_regs.h @@ -4,13 +4,13 @@ #define _OBJTOOL_CFI_REGS_H #define CFI_AX 0 -#define CFI_DX 1 -#define CFI_CX 2 +#define CFI_CX 1 +#define CFI_DX 2 #define CFI_BX 3 -#define CFI_SI 4 -#define CFI_DI 5 -#define CFI_BP 6 -#define CFI_SP 7 +#define CFI_SP 4 +#define CFI_BP 5 +#define CFI_SI 6 +#define CFI_DI 7 #define CFI_R8 8 #define CFI_R9 9 #define CFI_R10 10 -- cgit v1.2.3 From 2ee0c363492f1acc1082125218e6a80c0d7d502b Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Tue, 9 Feb 2021 21:29:16 +0100 Subject: objtool,x86: Rewrite LEA decode Current LEA decoding is a bunch of special cases, properly decode the instruction, with exception of full SIB and RIP-relative modes. Signed-off-by: Peter Zijlstra (Intel) Signed-off-by: Ingo Molnar Acked-by: Josh Poimboeuf Tested-by: Nick Desaulniers Link: https://lkml.kernel.org/r/20210211173627.143250641@infradead.org --- tools/objtool/arch/x86/decode.c | 86 ++++++++++++++--------------------------- 1 file changed, 28 insertions(+), 58 deletions(-) diff --git a/tools/objtool/arch/x86/decode.c b/tools/objtool/arch/x86/decode.c index 549813cff8ab..d8f01387d671 100644 --- a/tools/objtool/arch/x86/decode.c +++ b/tools/objtool/arch/x86/decode.c @@ -91,9 +91,10 @@ int arch_decode_instruction(const struct elf *elf, const struct section *sec, { struct insn insn; int x86_64, sign; - unsigned char op1, op2, rex = 0, rex_b = 0, rex_r = 0, rex_w = 0, - rex_x = 0, modrm = 0, modrm_mod = 0, modrm_rm = 0, - modrm_reg = 0, sib = 0; + unsigned char op1, op2, + rex = 0, rex_b = 0, rex_r = 0, rex_w = 0, rex_x = 0, + modrm = 0, modrm_mod = 0, modrm_rm = 0, modrm_reg = 0, + sib = 0; struct stack_op *op = NULL; struct symbol *sym; @@ -328,68 +329,37 @@ int arch_decode_instruction(const struct elf *elf, const struct section *sec, break; case 0x8d: - if (sib == 0x24 && rex_w && !rex_b && !rex_x) { - - ADD_OP(op) { - if (!insn.displacement.value) { - /* lea (%rsp), reg */ - op->src.type = OP_SRC_REG; - } else { - /* lea disp(%rsp), reg */ - op->src.type = OP_SRC_ADD; - op->src.offset = insn.displacement.value; - } - op->src.reg = CFI_SP; - op->dest.type = OP_DEST_REG; - op->dest.reg = op_to_cfi_reg[modrm_reg][rex_r]; - } - - } else if (rex == 0x48 && modrm == 0x65) { - - /* lea disp(%rbp), %rsp */ - ADD_OP(op) { - op->src.type = OP_SRC_ADD; - op->src.reg = CFI_BP; - op->src.offset = insn.displacement.value; - op->dest.type = OP_DEST_REG; - op->dest.reg = CFI_SP; - } + if (modrm_mod == 3) { + WARN("invalid LEA encoding at %s:0x%lx", sec->name, offset); + break; + } - } else if (rex == 0x49 && modrm == 0x62 && - insn.displacement.value == -8) { + /* skip non 64bit ops */ + if (!rex_w) + break; - /* - * lea -0x8(%r10), %rsp - * - * Restoring rsp back to its original value after a - * stack realignment. - */ - ADD_OP(op) { - op->src.type = OP_SRC_ADD; - op->src.reg = CFI_R10; - op->src.offset = -8; - op->dest.type = OP_DEST_REG; - op->dest.reg = CFI_SP; - } + /* skip nontrivial SIB */ + if (modrm_rm == 4 && !(sib == 0x24 && rex_b == rex_x)) + break; - } else if (rex == 0x49 && modrm == 0x65 && - insn.displacement.value == -16) { + /* skip RIP relative displacement */ + if (modrm_rm == 5 && modrm_mod == 0) + break; - /* - * lea -0x10(%r13), %rsp - * - * Restoring rsp back to its original value after a - * stack realignment. - */ - ADD_OP(op) { + /* lea disp(%src), %dst */ + ADD_OP(op) { + op->src.offset = insn.displacement.value; + if (!op->src.offset) { + /* lea (%src), %dst */ + op->src.type = OP_SRC_REG; + } else { + /* lea disp(%src), %dst */ op->src.type = OP_SRC_ADD; - op->src.reg = CFI_R13; - op->src.offset = -16; - op->dest.type = OP_DEST_REG; - op->dest.reg = CFI_SP; } + op->src.reg = op_to_cfi_reg[modrm_rm][rex_b]; + op->dest.type = OP_DEST_REG; + op->dest.reg = op_to_cfi_reg[modrm_reg][rex_r]; } - break; case 0x8f: -- cgit v1.2.3 From ffc7e74f36a2c7424da262a32a0bbe59669677ef Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Tue, 9 Feb 2021 21:41:13 +0100 Subject: objtool,x86: Rewrite LEAVE Since we can now have multiple stack-ops per instruction, we don't need to special case LEAVE and can simply emit the composite operations. Signed-off-by: Peter Zijlstra (Intel) Signed-off-by: Ingo Molnar Acked-by: Josh Poimboeuf Tested-by: Nick Desaulniers Link: https://lkml.kernel.org/r/20210211173627.253273977@infradead.org --- tools/objtool/arch/x86/decode.c | 14 +++++++++++--- tools/objtool/check.c | 24 ++---------------------- tools/objtool/include/objtool/arch.h | 1 - 3 files changed, 13 insertions(+), 26 deletions(-) diff --git a/tools/objtool/arch/x86/decode.c b/tools/objtool/arch/x86/decode.c index d8f01387d671..47b9acfc6a4c 100644 --- a/tools/objtool/arch/x86/decode.c +++ b/tools/objtool/arch/x86/decode.c @@ -446,9 +446,17 @@ int arch_decode_instruction(const struct elf *elf, const struct section *sec, * mov bp, sp * pop bp */ - ADD_OP(op) - op->dest.type = OP_DEST_LEAVE; - + ADD_OP(op) { + op->src.type = OP_SRC_REG; + op->src.reg = CFI_BP; + op->dest.type = OP_DEST_REG; + op->dest.reg = CFI_SP; + } + ADD_OP(op) { + op->src.type = OP_SRC_POP; + op->dest.type = OP_DEST_REG; + op->dest.reg = CFI_BP; + } break; case 0xe3: diff --git a/tools/objtool/check.c b/tools/objtool/check.c index 12b8f0f01176..a0f762a15ad5 100644 --- a/tools/objtool/check.c +++ b/tools/objtool/check.c @@ -2020,7 +2020,7 @@ static int update_cfi_state(struct instruction *insn, } else if (op->src.reg == CFI_BP && op->dest.reg == CFI_SP && - cfa->base == CFI_BP) { + (cfa->base == CFI_BP || cfa->base == cfi->drap_reg)) { /* * mov %rbp, %rsp @@ -2217,7 +2217,7 @@ static int update_cfi_state(struct instruction *insn, cfa->offset = 0; cfi->drap_offset = -1; - } else if (regs[op->dest.reg].offset == -cfi->stack_size) { + } else if (cfi->stack_size == -regs[op->dest.reg].offset) { /* pop %reg */ restore_reg(cfi, op->dest.reg); @@ -2358,26 +2358,6 @@ static int update_cfi_state(struct instruction *insn, break; - case OP_DEST_LEAVE: - if ((!cfi->drap && cfa->base != CFI_BP) || - (cfi->drap && cfa->base != cfi->drap_reg)) { - WARN_FUNC("leave instruction with modified stack frame", - insn->sec, insn->offset); - return -1; - } - - /* leave (mov %rbp, %rsp; pop %rbp) */ - - cfi->stack_size = -cfi->regs[CFI_BP].offset - 8; - restore_reg(cfi, CFI_BP); - - if (!cfi->drap) { - cfa->base = CFI_SP; - cfa->offset -= 8; - } - - break; - case OP_DEST_MEM: if (op->src.type != OP_SRC_POP && op->src.type != OP_SRC_POPF) { WARN_FUNC("unknown stack-related memory operation", diff --git a/tools/objtool/include/objtool/arch.h b/tools/objtool/include/objtool/arch.h index 6ff0685f5cc5..ff21f387712d 100644 --- a/tools/objtool/include/objtool/arch.h +++ b/tools/objtool/include/objtool/arch.h @@ -35,7 +35,6 @@ enum op_dest_type { OP_DEST_MEM, OP_DEST_PUSH, OP_DEST_PUSHF, - OP_DEST_LEAVE, }; struct op_dest { -- cgit v1.2.3 From 16ef7f159c503c7befec7018ee0e82fdc311721e Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Tue, 9 Feb 2021 19:59:43 +0100 Subject: objtool,x86: Simplify register decode Since the CFI_reg number now matches the instruction encoding order do away with the op_to_cfi_reg[] and use direct assignment. Signed-off-by: Peter Zijlstra (Intel) Signed-off-by: Ingo Molnar Acked-by: Josh Poimboeuf Tested-by: Nick Desaulniers Link: https://lkml.kernel.org/r/20210211173627.362004522@infradead.org --- tools/objtool/arch/x86/decode.c | 79 ++++++++++++++++++++--------------------- 1 file changed, 39 insertions(+), 40 deletions(-) diff --git a/tools/objtool/arch/x86/decode.c b/tools/objtool/arch/x86/decode.c index 47b9acfc6a4c..5ce7dc4d8a0a 100644 --- a/tools/objtool/arch/x86/decode.c +++ b/tools/objtool/arch/x86/decode.c @@ -17,17 +17,6 @@ #include #include -static unsigned char op_to_cfi_reg[][2] = { - {CFI_AX, CFI_R8}, - {CFI_CX, CFI_R9}, - {CFI_DX, CFI_R10}, - {CFI_BX, CFI_R11}, - {CFI_SP, CFI_R12}, - {CFI_BP, CFI_R13}, - {CFI_SI, CFI_R14}, - {CFI_DI, CFI_R15}, -}; - static int is_x86_64(const struct elf *elf) { switch (elf->ehdr.e_machine) { @@ -94,7 +83,7 @@ int arch_decode_instruction(const struct elf *elf, const struct section *sec, unsigned char op1, op2, rex = 0, rex_b = 0, rex_r = 0, rex_w = 0, rex_x = 0, modrm = 0, modrm_mod = 0, modrm_rm = 0, modrm_reg = 0, - sib = 0; + sib = 0 /* , sib_scale = 0, sib_index = 0, sib_base = 0 */; struct stack_op *op = NULL; struct symbol *sym; @@ -130,23 +119,29 @@ int arch_decode_instruction(const struct elf *elf, const struct section *sec, if (insn.modrm.nbytes) { modrm = insn.modrm.bytes[0]; modrm_mod = X86_MODRM_MOD(modrm); - modrm_reg = X86_MODRM_REG(modrm); - modrm_rm = X86_MODRM_RM(modrm); + modrm_reg = X86_MODRM_REG(modrm) + 8*rex_r; + modrm_rm = X86_MODRM_RM(modrm) + 8*rex_b; } - if (insn.sib.nbytes) + if (insn.sib.nbytes) { sib = insn.sib.bytes[0]; + /* + sib_scale = X86_SIB_SCALE(sib); + sib_index = X86_SIB_INDEX(sib) + 8*rex_x; + sib_base = X86_SIB_BASE(sib) + 8*rex_b; + */ + } switch (op1) { case 0x1: case 0x29: - if (rex_w && !rex_b && modrm_mod == 3 && modrm_rm == 4) { + if (rex_w && modrm_mod == 3 && modrm_rm == CFI_SP) { /* add/sub reg, %rsp */ ADD_OP(op) { op->src.type = OP_SRC_ADD; - op->src.reg = op_to_cfi_reg[modrm_reg][rex_r]; + op->src.reg = modrm_reg; op->dest.type = OP_DEST_REG; op->dest.reg = CFI_SP; } @@ -158,7 +153,7 @@ int arch_decode_instruction(const struct elf *elf, const struct section *sec, /* push reg */ ADD_OP(op) { op->src.type = OP_SRC_REG; - op->src.reg = op_to_cfi_reg[op1 & 0x7][rex_b]; + op->src.reg = (op1 & 0x7) + 8*rex_b; op->dest.type = OP_DEST_PUSH; } @@ -170,7 +165,7 @@ int arch_decode_instruction(const struct elf *elf, const struct section *sec, ADD_OP(op) { op->src.type = OP_SRC_POP; op->dest.type = OP_DEST_REG; - op->dest.reg = op_to_cfi_reg[op1 & 0x7][rex_b]; + op->dest.reg = (op1 & 0x7) + 8*rex_b; } break; @@ -223,7 +218,7 @@ int arch_decode_instruction(const struct elf *elf, const struct section *sec, break; case 0x89: - if (rex_w && !rex_r && modrm_reg == 4) { + if (rex_w && modrm_reg == CFI_SP) { if (modrm_mod == 3) { /* mov %rsp, reg */ @@ -231,17 +226,17 @@ int arch_decode_instruction(const struct elf *elf, const struct section *sec, op->src.type = OP_SRC_REG; op->src.reg = CFI_SP; op->dest.type = OP_DEST_REG; - op->dest.reg = op_to_cfi_reg[modrm_rm][rex_b]; + op->dest.reg = modrm_rm; } break; } else { /* skip nontrivial SIB */ - if (modrm_rm == 4 && !(sib == 0x24 && rex_b == rex_x)) + if ((modrm_rm & 7) == 4 && !(sib == 0x24 && rex_b == rex_x)) break; /* skip RIP relative displacement */ - if (modrm_rm == 5 && modrm_mod == 0) + if ((modrm_rm & 7) == 5 && modrm_mod == 0) break; /* mov %rsp, disp(%reg) */ @@ -249,7 +244,7 @@ int arch_decode_instruction(const struct elf *elf, const struct section *sec, op->src.type = OP_SRC_REG; op->src.reg = CFI_SP; op->dest.type = OP_DEST_REG_INDIRECT; - op->dest.reg = op_to_cfi_reg[modrm_rm][rex_b]; + op->dest.reg = modrm_rm; op->dest.offset = insn.displacement.value; } break; @@ -258,12 +253,12 @@ int arch_decode_instruction(const struct elf *elf, const struct section *sec, break; } - if (rex_w && !rex_b && modrm_mod == 3 && modrm_rm == 4) { + if (rex_w && modrm_mod == 3 && modrm_rm == CFI_SP) { /* mov reg, %rsp */ ADD_OP(op) { op->src.type = OP_SRC_REG; - op->src.reg = op_to_cfi_reg[modrm_reg][rex_r]; + op->src.reg = modrm_reg; op->dest.type = OP_DEST_REG; op->dest.reg = CFI_SP; } @@ -272,13 +267,12 @@ int arch_decode_instruction(const struct elf *elf, const struct section *sec, /* fallthrough */ case 0x88: - if (!rex_b && - (modrm_mod == 1 || modrm_mod == 2) && modrm_rm == 5) { + if ((modrm_mod == 1 || modrm_mod == 2) && modrm_rm == CFI_BP) { /* mov reg, disp(%rbp) */ ADD_OP(op) { op->src.type = OP_SRC_REG; - op->src.reg = op_to_cfi_reg[modrm_reg][rex_r]; + op->src.reg = modrm_reg; op->dest.type = OP_DEST_REG_INDIRECT; op->dest.reg = CFI_BP; op->dest.offset = insn.displacement.value; @@ -286,12 +280,12 @@ int arch_decode_instruction(const struct elf *elf, const struct section *sec, break; } - if (rex_w && !rex_b && modrm_rm == 4 && sib == 0x24) { + if (rex_w && modrm_rm == CFI_SP && sib == 0x24) { /* mov reg, disp(%rsp) */ ADD_OP(op) { op->src.type = OP_SRC_REG; - op->src.reg = op_to_cfi_reg[modrm_reg][rex_r]; + op->src.reg = modrm_reg; op->dest.type = OP_DEST_REG_INDIRECT; op->dest.reg = CFI_SP; op->dest.offset = insn.displacement.value; @@ -302,7 +296,10 @@ int arch_decode_instruction(const struct elf *elf, const struct section *sec, break; case 0x8b: - if (rex_w && !rex_b && modrm_mod == 1 && modrm_rm == 5) { + if (!rex_w) + break; + + if (modrm_mod == 1 && modrm_rm == CFI_BP) { /* mov disp(%rbp), reg */ ADD_OP(op) { @@ -310,11 +307,12 @@ int arch_decode_instruction(const struct elf *elf, const struct section *sec, op->src.reg = CFI_BP; op->src.offset = insn.displacement.value; op->dest.type = OP_DEST_REG; - op->dest.reg = op_to_cfi_reg[modrm_reg][rex_r]; + op->dest.reg = modrm_reg; } + break; + } - } else if (rex_w && !rex_b && sib == 0x24 && - modrm_mod != 3 && modrm_rm == 4) { + if (modrm_mod != 3 && modrm_rm == CFI_SP && sib == 0x24) { /* mov disp(%rsp), reg */ ADD_OP(op) { @@ -322,8 +320,9 @@ int arch_decode_instruction(const struct elf *elf, const struct section *sec, op->src.reg = CFI_SP; op->src.offset = insn.displacement.value; op->dest.type = OP_DEST_REG; - op->dest.reg = op_to_cfi_reg[modrm_reg][rex_r]; + op->dest.reg = modrm_reg; } + break; } break; @@ -339,11 +338,11 @@ int arch_decode_instruction(const struct elf *elf, const struct section *sec, break; /* skip nontrivial SIB */ - if (modrm_rm == 4 && !(sib == 0x24 && rex_b == rex_x)) + if ((modrm_rm & 7) == 4 && !(sib == 0x24 && rex_b == rex_x)) break; /* skip RIP relative displacement */ - if (modrm_rm == 5 && modrm_mod == 0) + if ((modrm_rm & 7) == 5 && modrm_mod == 0) break; /* lea disp(%src), %dst */ @@ -356,9 +355,9 @@ int arch_decode_instruction(const struct elf *elf, const struct section *sec, /* lea disp(%src), %dst */ op->src.type = OP_SRC_ADD; } - op->src.reg = op_to_cfi_reg[modrm_rm][rex_b]; + op->src.reg = modrm_rm; op->dest.type = OP_DEST_REG; - op->dest.reg = op_to_cfi_reg[modrm_reg][rex_r]; + op->dest.reg = modrm_reg; } break; -- cgit v1.2.3 From 78df6245c3c82484200b9f8e306dc86fb19e9c02 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Wed, 10 Feb 2021 11:47:35 +0100 Subject: objtool,x86: Support %riz encodings When there's a SIB byte, the register otherwise denoted by r/m will then be denoted by SIB.base REX.b will now extend this. SIB.index == SP is magic and notes an index value zero. This means that there's a bunch of alternative (longer) encodings for the same thing. Eg. 'ModRM.mod != 3, ModRM.r/m = AX' can be encoded as 'ModRM.mod != 3, ModRM.r/m = SP, SIB.base = AX, SIB.index = SP' which is actually 4 different encodings because the value of SIB.scale is irrelevant, giving rise to 5 different but equal encodings. Support these encodings and clean up the SIB handling in general. Signed-off-by: Peter Zijlstra (Intel) Signed-off-by: Ingo Molnar Acked-by: Josh Poimboeuf Tested-by: Nick Desaulniers Link: https://lkml.kernel.org/r/20210211173627.472967498@infradead.org --- tools/objtool/arch/x86/decode.c | 67 +++++++++++++++++++++++++++++------------ 1 file changed, 48 insertions(+), 19 deletions(-) diff --git a/tools/objtool/arch/x86/decode.c b/tools/objtool/arch/x86/decode.c index 5ce7dc4d8a0a..78ae5be2fb70 100644 --- a/tools/objtool/arch/x86/decode.c +++ b/tools/objtool/arch/x86/decode.c @@ -72,6 +72,25 @@ unsigned long arch_jump_destination(struct instruction *insn) return -1; \ else for (list_add_tail(&op->list, ops_list); op; op = NULL) +/* + * Helpers to decode ModRM/SIB: + * + * r/m| AX CX DX BX | SP | BP | SI DI | + * | R8 R9 R10 R11 | R12 | R13 | R14 R15 | + * Mod+----------------+-----+-----+---------+ + * 00 | [r/m] |[SIB]|[IP+]| [r/m] | + * 01 | [r/m + d8] |[S+d]| [r/m + d8] | + * 10 | [r/m + d32] |[S+D]| [r/m + d32] | + * 11 | r/ m | + * + */ +#define is_RIP() ((modrm_rm & 7) == CFI_BP && modrm_mod == 0) +#define have_SIB() ((modrm_rm & 7) == CFI_SP && modrm_mod != 3) + +#define rm_is(reg) (have_SIB() ? \ + sib_base == (reg) && sib_index == CFI_SP : \ + modrm_rm == (reg)) + int arch_decode_instruction(const struct elf *elf, const struct section *sec, unsigned long offset, unsigned int maxlen, unsigned int *len, enum insn_type *type, @@ -83,7 +102,7 @@ int arch_decode_instruction(const struct elf *elf, const struct section *sec, unsigned char op1, op2, rex = 0, rex_b = 0, rex_r = 0, rex_w = 0, rex_x = 0, modrm = 0, modrm_mod = 0, modrm_rm = 0, modrm_reg = 0, - sib = 0 /* , sib_scale = 0, sib_index = 0, sib_base = 0 */; + sib = 0, /* sib_scale = 0, */ sib_index = 0, sib_base = 0; struct stack_op *op = NULL; struct symbol *sym; @@ -125,11 +144,9 @@ int arch_decode_instruction(const struct elf *elf, const struct section *sec, if (insn.sib.nbytes) { sib = insn.sib.bytes[0]; - /* - sib_scale = X86_SIB_SCALE(sib); + /* sib_scale = X86_SIB_SCALE(sib); */ sib_index = X86_SIB_INDEX(sib) + 8*rex_x; sib_base = X86_SIB_BASE(sib) + 8*rex_b; - */ } switch (op1) { @@ -218,7 +235,10 @@ int arch_decode_instruction(const struct elf *elf, const struct section *sec, break; case 0x89: - if (rex_w && modrm_reg == CFI_SP) { + if (!rex_w) + break; + + if (modrm_reg == CFI_SP) { if (modrm_mod == 3) { /* mov %rsp, reg */ @@ -231,14 +251,17 @@ int arch_decode_instruction(const struct elf *elf, const struct section *sec, break; } else { - /* skip nontrivial SIB */ - if ((modrm_rm & 7) == 4 && !(sib == 0x24 && rex_b == rex_x)) - break; - /* skip RIP relative displacement */ - if ((modrm_rm & 7) == 5 && modrm_mod == 0) + if (is_RIP()) break; + /* skip nontrivial SIB */ + if (have_SIB()) { + modrm_rm = sib_base; + if (sib_index != CFI_SP) + break; + } + /* mov %rsp, disp(%reg) */ ADD_OP(op) { op->src.type = OP_SRC_REG; @@ -253,7 +276,7 @@ int arch_decode_instruction(const struct elf *elf, const struct section *sec, break; } - if (rex_w && modrm_mod == 3 && modrm_rm == CFI_SP) { + if (modrm_mod == 3 && modrm_rm == CFI_SP) { /* mov reg, %rsp */ ADD_OP(op) { @@ -267,6 +290,9 @@ int arch_decode_instruction(const struct elf *elf, const struct section *sec, /* fallthrough */ case 0x88: + if (!rex_w) + break; + if ((modrm_mod == 1 || modrm_mod == 2) && modrm_rm == CFI_BP) { /* mov reg, disp(%rbp) */ @@ -280,7 +306,7 @@ int arch_decode_instruction(const struct elf *elf, const struct section *sec, break; } - if (rex_w && modrm_rm == CFI_SP && sib == 0x24) { + if (modrm_mod != 3 && rm_is(CFI_SP)) { /* mov reg, disp(%rsp) */ ADD_OP(op) { @@ -299,7 +325,7 @@ int arch_decode_instruction(const struct elf *elf, const struct section *sec, if (!rex_w) break; - if (modrm_mod == 1 && modrm_rm == CFI_BP) { + if ((modrm_mod == 1 || modrm_mod == 2) && modrm_rm == CFI_BP) { /* mov disp(%rbp), reg */ ADD_OP(op) { @@ -312,7 +338,7 @@ int arch_decode_instruction(const struct elf *elf, const struct section *sec, break; } - if (modrm_mod != 3 && modrm_rm == CFI_SP && sib == 0x24) { + if (modrm_mod != 3 && rm_is(CFI_SP)) { /* mov disp(%rsp), reg */ ADD_OP(op) { @@ -337,14 +363,17 @@ int arch_decode_instruction(const struct elf *elf, const struct section *sec, if (!rex_w) break; - /* skip nontrivial SIB */ - if ((modrm_rm & 7) == 4 && !(sib == 0x24 && rex_b == rex_x)) - break; - /* skip RIP relative displacement */ - if ((modrm_rm & 7) == 5 && modrm_mod == 0) + if (is_RIP()) break; + /* skip nontrivial SIB */ + if (have_SIB()) { + modrm_rm = sib_base; + if (sib_index != CFI_SP) + break; + } + /* lea disp(%src), %dst */ ADD_OP(op) { op->src.offset = insn.displacement.value; -- cgit v1.2.3 From 961d83b9073b1ce5834af50d3c69e5e2461c6fd3 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Wed, 10 Feb 2021 14:11:30 +0100 Subject: objtool,x86: Rewrite ADD/SUB/AND Support sign extending and imm8 forms. Signed-off-by: Peter Zijlstra (Intel) Signed-off-by: Ingo Molnar Acked-by: Josh Poimboeuf Tested-by: Nick Desaulniers Link: https://lkml.kernel.org/r/20210211173627.588366777@infradead.org --- tools/objtool/arch/x86/decode.c | 70 ++++++++++++++++++++++++++++++----------- 1 file changed, 51 insertions(+), 19 deletions(-) diff --git a/tools/objtool/arch/x86/decode.c b/tools/objtool/arch/x86/decode.c index 78ae5be2fb70..b42e5ec083a0 100644 --- a/tools/objtool/arch/x86/decode.c +++ b/tools/objtool/arch/x86/decode.c @@ -98,13 +98,14 @@ int arch_decode_instruction(const struct elf *elf, const struct section *sec, struct list_head *ops_list) { struct insn insn; - int x86_64, sign; + int x86_64; unsigned char op1, op2, rex = 0, rex_b = 0, rex_r = 0, rex_w = 0, rex_x = 0, modrm = 0, modrm_mod = 0, modrm_rm = 0, modrm_reg = 0, sib = 0, /* sib_scale = 0, */ sib_index = 0, sib_base = 0; struct stack_op *op = NULL; struct symbol *sym; + u64 imm; x86_64 = is_x86_64(elf); if (x86_64 == -1) @@ -200,12 +201,54 @@ int arch_decode_instruction(const struct elf *elf, const struct section *sec, *type = INSN_JUMP_CONDITIONAL; break; - case 0x81: - case 0x83: - if (rex != 0x48) + case 0x80 ... 0x83: + /* + * 1000 00sw : mod OP r/m : immediate + * + * s - sign extend immediate + * w - imm8 / imm32 + * + * OP: 000 ADD 100 AND + * 001 OR 101 SUB + * 010 ADC 110 XOR + * 011 SBB 111 CMP + */ + + /* 64bit only */ + if (!rex_w) break; - if (modrm == 0xe4) { + /* %rsp target only */ + if (!(modrm_mod == 3 && modrm_rm == CFI_SP)) + break; + + imm = insn.immediate.value; + if (op1 & 2) { /* sign extend */ + if (op1 & 1) { /* imm32 */ + imm <<= 32; + imm = (s64)imm >> 32; + } else { /* imm8 */ + imm <<= 56; + imm = (s64)imm >> 56; + } + } + + switch (modrm_reg & 7) { + case 5: + imm = -imm; + /* fallthrough */ + case 0: + /* add/sub imm, %rsp */ + ADD_OP(op) { + op->src.type = OP_SRC_ADD; + op->src.reg = CFI_SP; + op->src.offset = imm; + op->dest.type = OP_DEST_REG; + op->dest.reg = CFI_SP; + } + break; + + case 4: /* and imm, %rsp */ ADD_OP(op) { op->src.type = OP_SRC_AND; @@ -215,23 +258,12 @@ int arch_decode_instruction(const struct elf *elf, const struct section *sec, op->dest.reg = CFI_SP; } break; - } - if (modrm == 0xc4) - sign = 1; - else if (modrm == 0xec) - sign = -1; - else + default: + /* WARN ? */ break; - - /* add/sub imm, %rsp */ - ADD_OP(op) { - op->src.type = OP_SRC_ADD; - op->src.reg = CFI_SP; - op->src.offset = insn.immediate.value * sign; - op->dest.type = OP_DEST_REG; - op->dest.reg = CFI_SP; } + break; case 0x89: -- cgit v1.2.3 From 36d92e43d01cbeeec99abdf405362243051d6b3f Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Fri, 12 Feb 2021 09:13:00 +0100 Subject: objtool,x86: More ModRM sugar Better helpers to decode ModRM. Signed-off-by: Peter Zijlstra (Intel) Signed-off-by: Ingo Molnar Acked-by: Josh Poimboeuf Link: https://lkml.kernel.org/r/YCZB/ljatFXqQbm8@hirez.programming.kicks-ass.net --- tools/objtool/arch/x86/decode.c | 28 +++++++++++++++++----------- 1 file changed, 17 insertions(+), 11 deletions(-) diff --git a/tools/objtool/arch/x86/decode.c b/tools/objtool/arch/x86/decode.c index b42e5ec083a0..431bafb881d4 100644 --- a/tools/objtool/arch/x86/decode.c +++ b/tools/objtool/arch/x86/decode.c @@ -82,15 +82,21 @@ unsigned long arch_jump_destination(struct instruction *insn) * 01 | [r/m + d8] |[S+d]| [r/m + d8] | * 10 | [r/m + d32] |[S+D]| [r/m + d32] | * 11 | r/ m | - * */ + +#define mod_is_mem() (modrm_mod != 3) +#define mod_is_reg() (modrm_mod == 3) + #define is_RIP() ((modrm_rm & 7) == CFI_BP && modrm_mod == 0) -#define have_SIB() ((modrm_rm & 7) == CFI_SP && modrm_mod != 3) +#define have_SIB() ((modrm_rm & 7) == CFI_SP && mod_is_mem()) #define rm_is(reg) (have_SIB() ? \ sib_base == (reg) && sib_index == CFI_SP : \ modrm_rm == (reg)) +#define rm_is_mem(reg) (mod_is_mem() && !is_RIP() && rm_is(reg)) +#define rm_is_reg(reg) (mod_is_reg() && modrm_rm == (reg)) + int arch_decode_instruction(const struct elf *elf, const struct section *sec, unsigned long offset, unsigned int maxlen, unsigned int *len, enum insn_type *type, @@ -154,7 +160,7 @@ int arch_decode_instruction(const struct elf *elf, const struct section *sec, case 0x1: case 0x29: - if (rex_w && modrm_mod == 3 && modrm_rm == CFI_SP) { + if (rex_w && rm_is_reg(CFI_SP)) { /* add/sub reg, %rsp */ ADD_OP(op) { @@ -219,7 +225,7 @@ int arch_decode_instruction(const struct elf *elf, const struct section *sec, break; /* %rsp target only */ - if (!(modrm_mod == 3 && modrm_rm == CFI_SP)) + if (!rm_is_reg(CFI_SP)) break; imm = insn.immediate.value; @@ -272,7 +278,7 @@ int arch_decode_instruction(const struct elf *elf, const struct section *sec, if (modrm_reg == CFI_SP) { - if (modrm_mod == 3) { + if (mod_is_reg()) { /* mov %rsp, reg */ ADD_OP(op) { op->src.type = OP_SRC_REG; @@ -308,7 +314,7 @@ int arch_decode_instruction(const struct elf *elf, const struct section *sec, break; } - if (modrm_mod == 3 && modrm_rm == CFI_SP) { + if (rm_is_reg(CFI_SP)) { /* mov reg, %rsp */ ADD_OP(op) { @@ -325,7 +331,7 @@ int arch_decode_instruction(const struct elf *elf, const struct section *sec, if (!rex_w) break; - if ((modrm_mod == 1 || modrm_mod == 2) && modrm_rm == CFI_BP) { + if (rm_is_mem(CFI_BP)) { /* mov reg, disp(%rbp) */ ADD_OP(op) { @@ -338,7 +344,7 @@ int arch_decode_instruction(const struct elf *elf, const struct section *sec, break; } - if (modrm_mod != 3 && rm_is(CFI_SP)) { + if (rm_is_mem(CFI_SP)) { /* mov reg, disp(%rsp) */ ADD_OP(op) { @@ -357,7 +363,7 @@ int arch_decode_instruction(const struct elf *elf, const struct section *sec, if (!rex_w) break; - if ((modrm_mod == 1 || modrm_mod == 2) && modrm_rm == CFI_BP) { + if (rm_is_mem(CFI_BP)) { /* mov disp(%rbp), reg */ ADD_OP(op) { @@ -370,7 +376,7 @@ int arch_decode_instruction(const struct elf *elf, const struct section *sec, break; } - if (modrm_mod != 3 && rm_is(CFI_SP)) { + if (rm_is_mem(CFI_SP)) { /* mov disp(%rsp), reg */ ADD_OP(op) { @@ -386,7 +392,7 @@ int arch_decode_instruction(const struct elf *elf, const struct section *sec, break; case 0x8d: - if (modrm_mod == 3) { + if (mod_is_reg()) { WARN("invalid LEA encoding at %s:0x%lx", sec->name, offset); break; } -- cgit v1.2.3 From 8ad15c6900840e8a2163012f4581c52127622e02 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Fri, 26 Feb 2021 10:59:59 +0100 Subject: objtool: Add --backup Teach objtool to write backups files, such that it becomes easier to see what objtool did to the object file. Backup files will be ${name}.orig. Suggested-by: Borislav Petkov Signed-off-by: Peter Zijlstra (Intel) Signed-off-by: Ingo Molnar Acked-by: Borislav Petkov Acked-by: Josh Poimboeuf Link: https://lkml.kernel.org/r/YD4obT3aoXPWl7Ax@hirez.programming.kicks-ass.net --- tools/objtool/builtin-check.c | 4 ++- tools/objtool/include/objtool/builtin.h | 3 +- tools/objtool/objtool.c | 64 +++++++++++++++++++++++++++++++++ 3 files changed, 69 insertions(+), 2 deletions(-) diff --git a/tools/objtool/builtin-check.c b/tools/objtool/builtin-check.c index c3a85d8f6c5c..97f063d55b9f 100644 --- a/tools/objtool/builtin-check.c +++ b/tools/objtool/builtin-check.c @@ -18,7 +18,8 @@ #include #include -bool no_fp, no_unreachable, retpoline, module, backtrace, uaccess, stats, validate_dup, vmlinux, mcount, noinstr; +bool no_fp, no_unreachable, retpoline, module, backtrace, uaccess, stats, + validate_dup, vmlinux, mcount, noinstr, backup; static const char * const check_usage[] = { "objtool check [] file.o", @@ -37,6 +38,7 @@ const struct option check_options[] = { OPT_BOOLEAN('n', "noinstr", &noinstr, "noinstr validation for vmlinux.o"), OPT_BOOLEAN('l', "vmlinux", &vmlinux, "vmlinux.o validation"), OPT_BOOLEAN('M', "mcount", &mcount, "generate __mcount_loc"), + OPT_BOOLEAN('B', "backup", &backup, "create .orig files before modification"), OPT_END(), }; diff --git a/tools/objtool/include/objtool/builtin.h b/tools/objtool/include/objtool/builtin.h index 2502bb27de17..d019210d74b1 100644 --- a/tools/objtool/include/objtool/builtin.h +++ b/tools/objtool/include/objtool/builtin.h @@ -8,7 +8,8 @@ #include extern const struct option check_options[]; -extern bool no_fp, no_unreachable, retpoline, module, backtrace, uaccess, stats, validate_dup, vmlinux, mcount, noinstr; +extern bool no_fp, no_unreachable, retpoline, module, backtrace, uaccess, stats, + validate_dup, vmlinux, mcount, noinstr, backup; extern int cmd_check(int argc, const char **argv); extern int cmd_orc(int argc, const char **argv); diff --git a/tools/objtool/objtool.c b/tools/objtool/objtool.c index 7b97ce499405..43c1836a06b4 100644 --- a/tools/objtool/objtool.c +++ b/tools/objtool/objtool.c @@ -17,6 +17,7 @@ #include #include #include +#include #include #include #include @@ -44,6 +45,64 @@ bool help; const char *objname; static struct objtool_file file; +static bool objtool_create_backup(const char *_objname) +{ + int len = strlen(_objname); + char *buf, *base, *name = malloc(len+6); + int s, d, l, t; + + if (!name) { + perror("failed backup name malloc"); + return false; + } + + strcpy(name, _objname); + strcpy(name + len, ".orig"); + + d = open(name, O_CREAT|O_WRONLY|O_TRUNC, 0644); + if (d < 0) { + perror("failed to create backup file"); + return false; + } + + s = open(_objname, O_RDONLY); + if (s < 0) { + perror("failed to open orig file"); + return false; + } + + buf = malloc(4096); + if (!buf) { + perror("failed backup data malloc"); + return false; + } + + while ((l = read(s, buf, 4096)) > 0) { + base = buf; + do { + t = write(d, base, l); + if (t < 0) { + perror("failed backup write"); + return false; + } + base += t; + l -= t; + } while (l); + } + + if (l < 0) { + perror("failed backup read"); + return false; + } + + free(name); + free(buf); + close(d); + close(s); + + return true; +} + struct objtool_file *objtool_open_read(const char *_objname) { if (objname) { @@ -59,6 +118,11 @@ struct objtool_file *objtool_open_read(const char *_objname) if (!file.elf) return NULL; + if (backup && !objtool_create_backup(objname)) { + WARN("can't create backup file"); + return NULL; + } + INIT_LIST_HEAD(&file.insn_list); hash_init(file.insn_hash); INIT_LIST_HEAD(&file.static_call_list); -- cgit v1.2.3 From a2f605f9ff57397d05a8e2f282b78a69f574d305 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Fri, 26 Feb 2021 11:18:24 +0100 Subject: objtool: Collate parse_options() users Ensure there's a single place that parses check_options, in preparation for extending where to get options from. Signed-off-by: Peter Zijlstra (Intel) Signed-off-by: Ingo Molnar Acked-by: Josh Poimboeuf Link: https://lkml.kernel.org/r/20210226110004.193108106@infradead.org --- tools/objtool/builtin-check.c | 14 +++++++++----- tools/objtool/builtin-orc.c | 5 +---- tools/objtool/include/objtool/builtin.h | 2 ++ 3 files changed, 12 insertions(+), 9 deletions(-) diff --git a/tools/objtool/builtin-check.c b/tools/objtool/builtin-check.c index 97f063d55b9f..03997524b93f 100644 --- a/tools/objtool/builtin-check.c +++ b/tools/objtool/builtin-check.c @@ -42,17 +42,21 @@ const struct option check_options[] = { OPT_END(), }; +int cmd_parse_options(int argc, const char **argv, const char * const usage[]) +{ + argc = parse_options(argc, argv, check_options, usage, 0); + if (argc != 1) + usage_with_options(usage, check_options); + return argc; +} + int cmd_check(int argc, const char **argv) { const char *objname; struct objtool_file *file; int ret; - argc = parse_options(argc, argv, check_options, check_usage, 0); - - if (argc != 1) - usage_with_options(check_usage, check_options); - + argc = cmd_parse_options(argc, argv, check_usage); objname = argv[0]; file = objtool_open_read(objname); diff --git a/tools/objtool/builtin-orc.c b/tools/objtool/builtin-orc.c index 8273bbf7cebb..17f8b9307738 100644 --- a/tools/objtool/builtin-orc.c +++ b/tools/objtool/builtin-orc.c @@ -34,10 +34,7 @@ int cmd_orc(int argc, const char **argv) struct objtool_file *file; int ret; - argc = parse_options(argc, argv, check_options, orc_usage, 0); - if (argc != 1) - usage_with_options(orc_usage, check_options); - + argc = cmd_parse_options(argc, argv, orc_usage); objname = argv[0]; file = objtool_open_read(objname); diff --git a/tools/objtool/include/objtool/builtin.h b/tools/objtool/include/objtool/builtin.h index d019210d74b1..15ac0b7d3d6a 100644 --- a/tools/objtool/include/objtool/builtin.h +++ b/tools/objtool/include/objtool/builtin.h @@ -11,6 +11,8 @@ extern const struct option check_options[]; extern bool no_fp, no_unreachable, retpoline, module, backtrace, uaccess, stats, validate_dup, vmlinux, mcount, noinstr, backup; +extern int cmd_parse_options(int argc, const char **argv, const char * const usage[]); + extern int cmd_check(int argc, const char **argv); extern int cmd_orc(int argc, const char **argv); -- cgit v1.2.3 From 900b4df347bbac4874149a226143a556909faba8 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Fri, 26 Feb 2021 11:32:30 +0100 Subject: objtool: Parse options from OBJTOOL_ARGS Teach objtool to parse options from the OBJTOOL_ARGS environment variable. This enables things like: $ OBJTOOL_ARGS="--backup" make O=defconfig-build/ kernel/ponies.o to obtain both defconfig-build/kernel/ponies.o{,.orig} and easily inspect what objtool actually did. Suggested-by: Borislav Petkov Signed-off-by: Peter Zijlstra (Intel) Signed-off-by: Ingo Molnar Acked-by: Josh Poimboeuf Link: https://lkml.kernel.org/r/20210226110004.252553847@infradead.org --- tools/objtool/builtin-check.c | 25 +++++++++++++++++++++++++ 1 file changed, 25 insertions(+) diff --git a/tools/objtool/builtin-check.c b/tools/objtool/builtin-check.c index 03997524b93f..8b38b5d6fec7 100644 --- a/tools/objtool/builtin-check.c +++ b/tools/objtool/builtin-check.c @@ -15,6 +15,7 @@ #include #include +#include #include #include @@ -26,6 +27,11 @@ static const char * const check_usage[] = { NULL, }; +static const char * const env_usage[] = { + "OBJTOOL_ARGS=\"\"", + NULL, +}; + const struct option check_options[] = { OPT_BOOLEAN('f', "no-fp", &no_fp, "Skip frame pointer validation"), OPT_BOOLEAN('u', "no-unreachable", &no_unreachable, "Skip 'unreachable instruction' warnings"), @@ -44,6 +50,25 @@ const struct option check_options[] = { int cmd_parse_options(int argc, const char **argv, const char * const usage[]) { + const char *envv[16] = { }; + char *env; + int envc; + + env = getenv("OBJTOOL_ARGS"); + if (env) { + envv[0] = "OBJTOOL_ARGS"; + for (envc = 1; envc < ARRAY_SIZE(envv); ) { + envv[envc++] = env; + env = strchr(env, ' '); + if (!env) + break; + *env = '\0'; + env++; + } + + parse_options(envc, envv, check_options, env_usage, 0); + } + argc = parse_options(argc, argv, check_options, usage, 0); if (argc != 1) usage_with_options(usage, check_options); -- cgit v1.2.3 From 99033461e685b48549ec77608b4bda75ddf772ce Mon Sep 17 00:00:00 2001 From: Josh Poimboeuf Date: Wed, 24 Feb 2021 10:29:14 -0600 Subject: objtool: Support asm jump tables Objtool detection of asm jump tables would normally just work, except for the fact that asm retpolines use alternatives. Objtool thinks the alternative code path (a jump to the retpoline) is a sibling call. Don't treat alternative indirect branches as sibling calls when the original instruction has a jump table. Signed-off-by: Josh Poimboeuf Tested-by: Ard Biesheuvel Acked-by: Ard Biesheuvel Tested-by: Sami Tolvanen Acked-by: Peter Zijlstra (Intel) Acked-by: Herbert Xu Link: https://lore.kernel.org/r/460cf4dc675d64e1124146562cabd2c05aa322e8.1614182415.git.jpoimboe@redhat.com --- tools/objtool/check.c | 14 +++++++++++++- 1 file changed, 13 insertions(+), 1 deletion(-) diff --git a/tools/objtool/check.c b/tools/objtool/check.c index a0f762a15ad5..46621e8a80c1 100644 --- a/tools/objtool/check.c +++ b/tools/objtool/check.c @@ -108,6 +108,18 @@ static struct instruction *prev_insn_same_sym(struct objtool_file *file, for (insn = next_insn_same_sec(file, insn); insn; \ insn = next_insn_same_sec(file, insn)) +static bool is_jump_table_jump(struct instruction *insn) +{ + struct alt_group *alt_group = insn->alt_group; + + if (insn->jump_table) + return true; + + /* Retpoline alternative for a jump table? */ + return alt_group && alt_group->orig_group && + alt_group->orig_group->first_insn->jump_table; +} + static bool is_sibling_call(struct instruction *insn) { /* @@ -120,7 +132,7 @@ static bool is_sibling_call(struct instruction *insn) /* An indirect jump is either a sibling call or a jump to a table. */ if (insn->type == INSN_JUMP_DYNAMIC) - return list_empty(&insn->alts); + return !is_jump_table_jump(insn); /* add_jump_destinations() sets insn->call_dest for sibling calls. */ return (is_static_jump(insn) && insn->call_dest); -- cgit v1.2.3 From 4f08300916e882a0c34a2f325ff3fea2be2e57b3 Mon Sep 17 00:00:00 2001 From: Josh Poimboeuf Date: Wed, 24 Feb 2021 10:29:15 -0600 Subject: x86/crypto/aesni-intel_avx: Remove unused macros These macros are no longer used; remove them. Signed-off-by: Josh Poimboeuf Tested-by: Ard Biesheuvel Acked-by: Ard Biesheuvel Tested-by: Sami Tolvanen Acked-by: Peter Zijlstra (Intel) Acked-by: Herbert Xu Link: https://lore.kernel.org/r/53f7136ea93ebdbca399959e6d2991ecb46e733e.1614182415.git.jpoimboe@redhat.com --- arch/x86/crypto/aesni-intel_avx-x86_64.S | 8 -------- 1 file changed, 8 deletions(-) diff --git a/arch/x86/crypto/aesni-intel_avx-x86_64.S b/arch/x86/crypto/aesni-intel_avx-x86_64.S index 2cf8e94d986a..4fdf38e92d51 100644 --- a/arch/x86/crypto/aesni-intel_avx-x86_64.S +++ b/arch/x86/crypto/aesni-intel_avx-x86_64.S @@ -212,10 +212,6 @@ HashKey_8_k = 16*21 # store XOR of HashKey^8 <<1 mod poly here (for Karatsu #define arg4 %rcx #define arg5 %r8 #define arg6 %r9 -#define arg7 STACK_OFFSET+8*1(%r14) -#define arg8 STACK_OFFSET+8*2(%r14) -#define arg9 STACK_OFFSET+8*3(%r14) -#define arg10 STACK_OFFSET+8*4(%r14) #define keysize 2*15*16(arg1) i = 0 @@ -237,9 +233,6 @@ define_reg j %j .noaltmacro .endm -# need to push 4 registers into stack to maintain -STACK_OFFSET = 8*4 - TMP1 = 16*0 # Temporary storage for AAD TMP2 = 16*1 # Temporary storage for AES State 2 (State 1 is stored in an XMM register) TMP3 = 16*2 # Temporary storage for AES State 3 @@ -256,7 +249,6 @@ VARIABLE_OFFSET = 16*8 ################################ .macro FUNC_SAVE - #the number of pushes must equal STACK_OFFSET push %r12 push %r13 push %r14 -- cgit v1.2.3 From ff5796b6dbea4763fdca002101e32b60aa17f8e8 Mon Sep 17 00:00:00 2001 From: Josh Poimboeuf Date: Wed, 24 Feb 2021 10:29:16 -0600 Subject: x86/crypto/aesni-intel_avx: Fix register usage comments Fix register usage comments to match reality. Signed-off-by: Josh Poimboeuf Tested-by: Ard Biesheuvel Acked-by: Ard Biesheuvel Tested-by: Sami Tolvanen Acked-by: Peter Zijlstra (Intel) Acked-by: Herbert Xu Link: https://lore.kernel.org/r/8655d4513a0ed1eddec609165064153973010aa2.1614182415.git.jpoimboe@redhat.com --- arch/x86/crypto/aesni-intel_avx-x86_64.S | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/arch/x86/crypto/aesni-intel_avx-x86_64.S b/arch/x86/crypto/aesni-intel_avx-x86_64.S index 4fdf38e92d51..188f1848a730 100644 --- a/arch/x86/crypto/aesni-intel_avx-x86_64.S +++ b/arch/x86/crypto/aesni-intel_avx-x86_64.S @@ -286,7 +286,7 @@ VARIABLE_OFFSET = 16*8 # combined for GCM encrypt and decrypt functions # clobbering all xmm registers -# clobbering r10, r11, r12, r13, r14, r15 +# clobbering r10, r11, r12, r13, r15, rax .macro GCM_ENC_DEC INITIAL_BLOCKS GHASH_8_ENCRYPT_8_PARALLEL GHASH_LAST_8 GHASH_MUL ENC_DEC REP vmovdqu AadHash(arg2), %xmm8 vmovdqu HashKey(arg2), %xmm13 # xmm13 = HashKey @@ -988,7 +988,7 @@ _partial_block_done_\@: ## num_initial_blocks = b mod 4# ## encrypt the initial num_initial_blocks blocks and apply ghash on the ciphertext ## r10, r11, r12, rax are clobbered -## arg1, arg3, arg4, r14 are used as a pointer only, not modified +## arg1, arg2, arg3, arg4 are used as pointers only, not modified .macro INITIAL_BLOCKS_AVX REP num_initial_blocks T1 T2 T3 T4 T5 CTR XMM1 XMM2 XMM3 XMM4 XMM5 XMM6 XMM7 XMM8 T6 T_key ENC_DEC i = (8-\num_initial_blocks) @@ -1223,7 +1223,7 @@ _initial_blocks_done\@: # encrypt 8 blocks at a time # ghash the 8 previously encrypted ciphertext blocks -# arg1, arg3, arg4 are used as pointers only, not modified +# arg1, arg2, arg3, arg4 are used as pointers only, not modified # r11 is the data offset value .macro GHASH_8_ENCRYPT_8_PARALLEL_AVX REP T1 T2 T3 T4 T5 T6 CTR XMM1 XMM2 XMM3 XMM4 XMM5 XMM6 XMM7 XMM8 T7 loop_idx ENC_DEC @@ -1936,7 +1936,7 @@ SYM_FUNC_END(aesni_gcm_finalize_avx_gen2) ## num_initial_blocks = b mod 4# ## encrypt the initial num_initial_blocks blocks and apply ghash on the ciphertext ## r10, r11, r12, rax are clobbered -## arg1, arg3, arg4, r14 are used as a pointer only, not modified +## arg1, arg2, arg3, arg4 are used as pointers only, not modified .macro INITIAL_BLOCKS_AVX2 REP num_initial_blocks T1 T2 T3 T4 T5 CTR XMM1 XMM2 XMM3 XMM4 XMM5 XMM6 XMM7 XMM8 T6 T_key ENC_DEC VER i = (8-\num_initial_blocks) @@ -2178,7 +2178,7 @@ _initial_blocks_done\@: # encrypt 8 blocks at a time # ghash the 8 previously encrypted ciphertext blocks -# arg1, arg3, arg4 are used as pointers only, not modified +# arg1, arg2, arg3, arg4 are used as pointers only, not modified # r11 is the data offset value .macro GHASH_8_ENCRYPT_8_PARALLEL_AVX2 REP T1 T2 T3 T4 T5 T6 CTR XMM1 XMM2 XMM3 XMM4 XMM5 XMM6 XMM7 XMM8 T7 loop_idx ENC_DEC -- cgit v1.2.3 From e163be86fff3deec70f63330fc43fedf892c9aee Mon Sep 17 00:00:00 2001 From: Josh Poimboeuf Date: Wed, 24 Feb 2021 10:29:17 -0600 Subject: x86/crypto/aesni-intel_avx: Standardize stack alignment prologue Use RBP instead of R14 for saving the old stack pointer before realignment. This resembles what compilers normally do. This enables ORC unwinding by allowing objtool to understand the stack realignment. Signed-off-by: Josh Poimboeuf Tested-by: Ard Biesheuvel Acked-by: Ard Biesheuvel Tested-by: Sami Tolvanen Acked-by: Peter Zijlstra (Intel) Acked-by: Herbert Xu Link: https://lore.kernel.org/r/02d00a0903a0959f4787e186e2a07d271e1f63d4.1614182415.git.jpoimboe@redhat.com --- arch/x86/crypto/aesni-intel_avx-x86_64.S | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) diff --git a/arch/x86/crypto/aesni-intel_avx-x86_64.S b/arch/x86/crypto/aesni-intel_avx-x86_64.S index 188f1848a730..98e3552b6e03 100644 --- a/arch/x86/crypto/aesni-intel_avx-x86_64.S +++ b/arch/x86/crypto/aesni-intel_avx-x86_64.S @@ -251,22 +251,20 @@ VARIABLE_OFFSET = 16*8 .macro FUNC_SAVE push %r12 push %r13 - push %r14 push %r15 - mov %rsp, %r14 - - + push %rbp + mov %rsp, %rbp sub $VARIABLE_OFFSET, %rsp and $~63, %rsp # align rsp to 64 bytes .endm .macro FUNC_RESTORE - mov %r14, %rsp + mov %rbp, %rsp + pop %rbp pop %r15 - pop %r14 pop %r13 pop %r12 .endm -- cgit v1.2.3 From dabe5167a3cbb4bf16b20c0e5b6497513e2e3a08 Mon Sep 17 00:00:00 2001 From: Josh Poimboeuf Date: Wed, 24 Feb 2021 10:29:18 -0600 Subject: x86/crypto/camellia-aesni-avx2: Unconditionally allocate stack buffer A conditional stack allocation violates traditional unwinding requirements when a single instruction can have differing stack layouts. There's no benefit in allocating the stack buffer conditionally. Just do it unconditionally. Signed-off-by: Josh Poimboeuf Tested-by: Ard Biesheuvel Acked-by: Ard Biesheuvel Tested-by: Sami Tolvanen Acked-by: Peter Zijlstra (Intel) Acked-by: Herbert Xu Link: https://lore.kernel.org/r/85ac96613ee5784b6239c18d3f68b1f3c509caa3.1614182415.git.jpoimboe@redhat.com --- arch/x86/crypto/camellia-aesni-avx2-asm_64.S | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/arch/x86/crypto/camellia-aesni-avx2-asm_64.S b/arch/x86/crypto/camellia-aesni-avx2-asm_64.S index 782e9712a1ec..706f70829a07 100644 --- a/arch/x86/crypto/camellia-aesni-avx2-asm_64.S +++ b/arch/x86/crypto/camellia-aesni-avx2-asm_64.S @@ -990,6 +990,7 @@ SYM_FUNC_START(camellia_cbc_dec_32way) * %rdx: src (32 blocks) */ FRAME_BEGIN + subq $(16 * 32), %rsp; vzeroupper; @@ -1002,7 +1003,6 @@ SYM_FUNC_START(camellia_cbc_dec_32way) %ymm8, %ymm9, %ymm10, %ymm11, %ymm12, %ymm13, %ymm14, %ymm15, %rdx, (key_table)(CTX, %r8, 8)); - movq %rsp, %r10; cmpq %rsi, %rdx; je .Lcbc_dec_use_stack; @@ -1015,7 +1015,6 @@ SYM_FUNC_START(camellia_cbc_dec_32way) * dst still in-use (because dst == src), so use stack for temporary * storage. */ - subq $(16 * 32), %rsp; movq %rsp, %rax; .Lcbc_dec_continue: @@ -1025,7 +1024,6 @@ SYM_FUNC_START(camellia_cbc_dec_32way) vpxor %ymm7, %ymm7, %ymm7; vinserti128 $1, (%rdx), %ymm7, %ymm7; vpxor (%rax), %ymm7, %ymm7; - movq %r10, %rsp; vpxor (0 * 32 + 16)(%rdx), %ymm6, %ymm6; vpxor (1 * 32 + 16)(%rdx), %ymm5, %ymm5; vpxor (2 * 32 + 16)(%rdx), %ymm4, %ymm4; @@ -1047,6 +1045,7 @@ SYM_FUNC_START(camellia_cbc_dec_32way) vzeroupper; + addq $(16 * 32), %rsp; FRAME_END ret; SYM_FUNC_END(camellia_cbc_dec_32way) -- cgit v1.2.3 From 2b02ed55482a1c5c310a7f53707292fcf1601e7a Mon Sep 17 00:00:00 2001 From: Josh Poimboeuf Date: Wed, 24 Feb 2021 10:29:19 -0600 Subject: x86/crypto/crc32c-pcl-intel: Standardize jump table Simplify the jump table code so that it resembles a compiler-generated table. This enables ORC unwinding by allowing objtool to follow all the potential code paths. Signed-off-by: Josh Poimboeuf Tested-by: Ard Biesheuvel Acked-by: Ard Biesheuvel Tested-by: Sami Tolvanen Acked-by: Peter Zijlstra (Intel) Acked-by: Herbert Xu Link: https://lore.kernel.org/r/5357a039def90b8ef6b5874ef12cda008ecf18ba.1614182415.git.jpoimboe@redhat.com --- arch/x86/crypto/crc32c-pcl-intel-asm_64.S | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) diff --git a/arch/x86/crypto/crc32c-pcl-intel-asm_64.S b/arch/x86/crypto/crc32c-pcl-intel-asm_64.S index 884dc767b051..ac1f303eed0f 100644 --- a/arch/x86/crypto/crc32c-pcl-intel-asm_64.S +++ b/arch/x86/crypto/crc32c-pcl-intel-asm_64.S @@ -53,7 +53,7 @@ .endm .macro JMPTBL_ENTRY i -.word crc_\i - crc_array +.quad crc_\i .endm .macro JNC_LESS_THAN j @@ -168,10 +168,7 @@ continue_block: xor crc2, crc2 ## branch into array - lea jump_table(%rip), %bufp - movzwq (%bufp, %rax, 2), len - lea crc_array(%rip), %bufp - lea (%bufp, len, 1), %bufp + mov jump_table(,%rax,8), %bufp JMP_NOSPEC bufp ################################################################ -- cgit v1.2.3 From 35a0067d2c02a7c35466db5f207b7b9265de84d9 Mon Sep 17 00:00:00 2001 From: Josh Poimboeuf Date: Wed, 24 Feb 2021 10:29:20 -0600 Subject: x86/crypto/sha_ni: Standardize stack alignment prologue Use a more standard prologue for saving the stack pointer before realigning the stack. This enables ORC unwinding by allowing objtool to understand the stack realignment. Signed-off-by: Josh Poimboeuf Tested-by: Ard Biesheuvel Acked-by: Ard Biesheuvel Tested-by: Sami Tolvanen Acked-by: Peter Zijlstra (Intel) Acked-by: Herbert Xu Link: https://lore.kernel.org/r/5033e1a79867dff1b18e1b4d0783c38897d3f223.1614182415.git.jpoimboe@redhat.com --- arch/x86/crypto/sha1_ni_asm.S | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/arch/x86/crypto/sha1_ni_asm.S b/arch/x86/crypto/sha1_ni_asm.S index 11efe3a45a1f..5d8415f482bd 100644 --- a/arch/x86/crypto/sha1_ni_asm.S +++ b/arch/x86/crypto/sha1_ni_asm.S @@ -59,8 +59,6 @@ #define DATA_PTR %rsi /* 2nd arg */ #define NUM_BLKS %rdx /* 3rd arg */ -#define RSPSAVE %rax - /* gcc conversion */ #define FRAME_SIZE 32 /* space for 2x16 bytes */ @@ -96,7 +94,8 @@ .text .align 32 SYM_FUNC_START(sha1_ni_transform) - mov %rsp, RSPSAVE + push %rbp + mov %rsp, %rbp sub $FRAME_SIZE, %rsp and $~0xF, %rsp @@ -288,7 +287,8 @@ SYM_FUNC_START(sha1_ni_transform) pextrd $3, E0, 1*16(DIGEST_PTR) .Ldone_hash: - mov RSPSAVE, %rsp + mov %rbp, %rsp + pop %rbp ret SYM_FUNC_END(sha1_ni_transform) -- cgit v1.2.3 From 20114c899cafa8313534a841cab0ab1f7ab09672 Mon Sep 17 00:00:00 2001 From: Josh Poimboeuf Date: Wed, 24 Feb 2021 10:29:21 -0600 Subject: x86/crypto/sha1_avx2: Standardize stack alignment prologue Use a more standard prologue for saving the stack pointer before realigning the stack. This enables ORC unwinding by allowing objtool to understand the stack realignment. Signed-off-by: Josh Poimboeuf Tested-by: Ard Biesheuvel Acked-by: Ard Biesheuvel Tested-by: Sami Tolvanen Acked-by: Peter Zijlstra (Intel) Acked-by: Herbert Xu Link: https://lore.kernel.org/r/fdaaf8670ed1f52f55ba9a6bbac98c1afddc1af6.1614182415.git.jpoimboe@redhat.com --- arch/x86/crypto/sha1_avx2_x86_64_asm.S | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/arch/x86/crypto/sha1_avx2_x86_64_asm.S b/arch/x86/crypto/sha1_avx2_x86_64_asm.S index 1e594d60afa5..5eed620f4676 100644 --- a/arch/x86/crypto/sha1_avx2_x86_64_asm.S +++ b/arch/x86/crypto/sha1_avx2_x86_64_asm.S @@ -645,9 +645,9 @@ _loop3: RESERVE_STACK = (W_SIZE*4 + 8+24) /* Align stack */ - mov %rsp, %rbx + push %rbp + mov %rsp, %rbp and $~(0x20-1), %rsp - push %rbx sub $RESERVE_STACK, %rsp avx2_zeroupper @@ -665,8 +665,8 @@ _loop3: avx2_zeroupper - add $RESERVE_STACK, %rsp - pop %rsp + mov %rbp, %rsp + pop %rbp pop %r15 pop %r14 -- cgit v1.2.3 From ce5846668076aa76a17ab559f0296374e3611fec Mon Sep 17 00:00:00 2001 From: Josh Poimboeuf Date: Wed, 24 Feb 2021 10:29:22 -0600 Subject: x86/crypto/sha256-avx2: Standardize stack alignment prologue Use a more standard prologue for saving the stack pointer before realigning the stack. This enables ORC unwinding by allowing objtool to understand the stack realignment. Signed-off-by: Josh Poimboeuf Tested-by: Ard Biesheuvel Acked-by: Ard Biesheuvel Tested-by: Sami Tolvanen Acked-by: Peter Zijlstra (Intel) Acked-by: Herbert Xu Link: https://lore.kernel.org/r/8048e7444c49a8137f05265262b83dc50f8fb7f3.1614182415.git.jpoimboe@redhat.com --- arch/x86/crypto/sha256-avx2-asm.S | 13 ++++++------- 1 file changed, 6 insertions(+), 7 deletions(-) diff --git a/arch/x86/crypto/sha256-avx2-asm.S b/arch/x86/crypto/sha256-avx2-asm.S index 11ff60c29c8b..4087f7432a7e 100644 --- a/arch/x86/crypto/sha256-avx2-asm.S +++ b/arch/x86/crypto/sha256-avx2-asm.S @@ -117,15 +117,13 @@ _XMM_SAVE_SIZE = 0 _INP_END_SIZE = 8 _INP_SIZE = 8 _CTX_SIZE = 8 -_RSP_SIZE = 8 _XFER = 0 _XMM_SAVE = _XFER + _XFER_SIZE _INP_END = _XMM_SAVE + _XMM_SAVE_SIZE _INP = _INP_END + _INP_END_SIZE _CTX = _INP + _INP_SIZE -_RSP = _CTX + _CTX_SIZE -STACK_SIZE = _RSP + _RSP_SIZE +STACK_SIZE = _CTX + _CTX_SIZE # rotate_Xs # Rotate values of symbols X0...X3 @@ -533,11 +531,11 @@ SYM_FUNC_START(sha256_transform_rorx) pushq %r14 pushq %r15 - mov %rsp, %rax + push %rbp + mov %rsp, %rbp + subq $STACK_SIZE, %rsp and $-32, %rsp # align rsp to 32 byte boundary - mov %rax, _RSP(%rsp) - shl $6, NUM_BLKS # convert to bytes jz done_hash @@ -704,7 +702,8 @@ only_one_block: done_hash: - mov _RSP(%rsp), %rsp + mov %rbp, %rsp + pop %rbp popq %r15 popq %r14 -- cgit v1.2.3 From d61684b56edf369f0a6d388088d7c9d59f1618d4 Mon Sep 17 00:00:00 2001 From: Josh Poimboeuf Date: Wed, 24 Feb 2021 10:29:23 -0600 Subject: x86/crypto/sha512-avx: Standardize stack alignment prologue Use a more standard prologue for saving the stack pointer before realigning the stack. This enables ORC unwinding by allowing objtool to understand the stack realignment. Signed-off-by: Josh Poimboeuf Tested-by: Ard Biesheuvel Acked-by: Ard Biesheuvel Tested-by: Sami Tolvanen Acked-by: Peter Zijlstra (Intel) Acked-by: Herbert Xu Link: https://lore.kernel.org/r/d36e9ea1c819d87fa89b3df3fa83e2a1ede18146.1614182415.git.jpoimboe@redhat.com --- arch/x86/crypto/sha512-avx-asm.S | 41 +++++++++++++++++++--------------------- 1 file changed, 19 insertions(+), 22 deletions(-) diff --git a/arch/x86/crypto/sha512-avx-asm.S b/arch/x86/crypto/sha512-avx-asm.S index 684d58c8bc4f..3d8f0fd4eea8 100644 --- a/arch/x86/crypto/sha512-avx-asm.S +++ b/arch/x86/crypto/sha512-avx-asm.S @@ -76,14 +76,10 @@ tmp0 = %rax W_SIZE = 80*8 # W[t] + K[t] | W[t+1] + K[t+1] WK_SIZE = 2*8 -RSPSAVE_SIZE = 1*8 -GPRSAVE_SIZE = 5*8 frame_W = 0 frame_WK = frame_W + W_SIZE -frame_RSPSAVE = frame_WK + WK_SIZE -frame_GPRSAVE = frame_RSPSAVE + RSPSAVE_SIZE -frame_size = frame_GPRSAVE + GPRSAVE_SIZE +frame_size = frame_WK + WK_SIZE # Useful QWORD "arrays" for simpler memory references # MSG, DIGEST, K_t, W_t are arrays @@ -281,18 +277,18 @@ SYM_FUNC_START(sha512_transform_avx) test msglen, msglen je nowork + # Save GPRs + push %rbx + push %r12 + push %r13 + push %r14 + push %r15 + # Allocate Stack Space - mov %rsp, %rax + push %rbp + mov %rsp, %rbp sub $frame_size, %rsp and $~(0x20 - 1), %rsp - mov %rax, frame_RSPSAVE(%rsp) - - # Save GPRs - mov %rbx, frame_GPRSAVE(%rsp) - mov %r12, frame_GPRSAVE +8*1(%rsp) - mov %r13, frame_GPRSAVE +8*2(%rsp) - mov %r14, frame_GPRSAVE +8*3(%rsp) - mov %r15, frame_GPRSAVE +8*4(%rsp) updateblock: @@ -353,15 +349,16 @@ updateblock: dec msglen jnz updateblock - # Restore GPRs - mov frame_GPRSAVE(%rsp), %rbx - mov frame_GPRSAVE +8*1(%rsp), %r12 - mov frame_GPRSAVE +8*2(%rsp), %r13 - mov frame_GPRSAVE +8*3(%rsp), %r14 - mov frame_GPRSAVE +8*4(%rsp), %r15 - # Restore Stack Pointer - mov frame_RSPSAVE(%rsp), %rsp + mov %rbp, %rsp + pop %rbp + + # Restore GPRs + pop %r15 + pop %r14 + pop %r13 + pop %r12 + pop %rbx nowork: ret -- cgit v1.2.3 From ec063e090bd6487097d459bb4272508b78448270 Mon Sep 17 00:00:00 2001 From: Josh Poimboeuf Date: Wed, 24 Feb 2021 10:29:24 -0600 Subject: x86/crypto/sha512-avx2: Standardize stack alignment prologue Use a more standard prologue for saving the stack pointer before realigning the stack. This enables ORC unwinding by allowing objtool to understand the stack realignment. Signed-off-by: Josh Poimboeuf Tested-by: Ard Biesheuvel Acked-by: Ard Biesheuvel Tested-by: Sami Tolvanen Acked-by: Peter Zijlstra (Intel) Acked-by: Herbert Xu Link: https://lore.kernel.org/r/b1a7b29fcfc65d60a3b6e77ef75f4762a5b8488d.1614182415.git.jpoimboe@redhat.com --- arch/x86/crypto/sha512-avx2-asm.S | 42 +++++++++++++++++++-------------------- 1 file changed, 20 insertions(+), 22 deletions(-) diff --git a/arch/x86/crypto/sha512-avx2-asm.S b/arch/x86/crypto/sha512-avx2-asm.S index 3a44bdcfd583..072cb0f0deae 100644 --- a/arch/x86/crypto/sha512-avx2-asm.S +++ b/arch/x86/crypto/sha512-avx2-asm.S @@ -102,17 +102,13 @@ SRND_SIZE = 1*8 INP_SIZE = 1*8 INPEND_SIZE = 1*8 CTX_SIZE = 1*8 -RSPSAVE_SIZE = 1*8 -GPRSAVE_SIZE = 5*8 frame_XFER = 0 frame_SRND = frame_XFER + XFER_SIZE frame_INP = frame_SRND + SRND_SIZE frame_INPEND = frame_INP + INP_SIZE frame_CTX = frame_INPEND + INPEND_SIZE -frame_RSPSAVE = frame_CTX + CTX_SIZE -frame_GPRSAVE = frame_RSPSAVE + RSPSAVE_SIZE -frame_size = frame_GPRSAVE + GPRSAVE_SIZE +frame_size = frame_CTX + CTX_SIZE ## assume buffers not aligned #define VMOVDQ vmovdqu @@ -570,18 +566,18 @@ frame_size = frame_GPRSAVE + GPRSAVE_SIZE # "blocks" is the message length in SHA512 blocks ######################################################################## SYM_FUNC_START(sha512_transform_rorx) + # Save GPRs + push %rbx + push %r12 + push %r13 + push %r14 + push %r15 + # Allocate Stack Space - mov %rsp, %rax + push %rbp + mov %rsp, %rbp sub $frame_size, %rsp and $~(0x20 - 1), %rsp - mov %rax, frame_RSPSAVE(%rsp) - - # Save GPRs - mov %rbx, 8*0+frame_GPRSAVE(%rsp) - mov %r12, 8*1+frame_GPRSAVE(%rsp) - mov %r13, 8*2+frame_GPRSAVE(%rsp) - mov %r14, 8*3+frame_GPRSAVE(%rsp) - mov %r15, 8*4+frame_GPRSAVE(%rsp) shl $7, NUM_BLKS # convert to bytes jz done_hash @@ -672,15 +668,17 @@ loop2: done_hash: -# Restore GPRs - mov 8*0+frame_GPRSAVE(%rsp), %rbx - mov 8*1+frame_GPRSAVE(%rsp), %r12 - mov 8*2+frame_GPRSAVE(%rsp), %r13 - mov 8*3+frame_GPRSAVE(%rsp), %r14 - mov 8*4+frame_GPRSAVE(%rsp), %r15 - # Restore Stack Pointer - mov frame_RSPSAVE(%rsp), %rsp + mov %rbp, %rsp + pop %rbp + + # Restore GPRs + pop %r15 + pop %r14 + pop %r13 + pop %r12 + pop %rbx + ret SYM_FUNC_END(sha512_transform_rorx) -- cgit v1.2.3 From 27d26793f2105281d9374928448142777cef6f74 Mon Sep 17 00:00:00 2001 From: Josh Poimboeuf Date: Wed, 24 Feb 2021 10:29:25 -0600 Subject: x86/crypto/sha512-ssse3: Standardize stack alignment prologue Use a more standard prologue for saving the stack pointer before realigning the stack. This enables ORC unwinding by allowing objtool to understand the stack realignment. Signed-off-by: Josh Poimboeuf Tested-by: Ard Biesheuvel Acked-by: Ard Biesheuvel Tested-by: Sami Tolvanen Acked-by: Peter Zijlstra (Intel) Acked-by: Herbert Xu Link: https://lore.kernel.org/r/6ecaaac9f3828fbb903513bf90c34a08380a8e35.1614182415.git.jpoimboe@redhat.com --- arch/x86/crypto/sha512-ssse3-asm.S | 41 ++++++++++++++++++-------------------- 1 file changed, 19 insertions(+), 22 deletions(-) diff --git a/arch/x86/crypto/sha512-ssse3-asm.S b/arch/x86/crypto/sha512-ssse3-asm.S index 50812af0b083..bd51c9070bed 100644 --- a/arch/x86/crypto/sha512-ssse3-asm.S +++ b/arch/x86/crypto/sha512-ssse3-asm.S @@ -74,14 +74,10 @@ tmp0 = %rax W_SIZE = 80*8 WK_SIZE = 2*8 -RSPSAVE_SIZE = 1*8 -GPRSAVE_SIZE = 5*8 frame_W = 0 frame_WK = frame_W + W_SIZE -frame_RSPSAVE = frame_WK + WK_SIZE -frame_GPRSAVE = frame_RSPSAVE + RSPSAVE_SIZE -frame_size = frame_GPRSAVE + GPRSAVE_SIZE +frame_size = frame_WK + WK_SIZE # Useful QWORD "arrays" for simpler memory references # MSG, DIGEST, K_t, W_t are arrays @@ -283,18 +279,18 @@ SYM_FUNC_START(sha512_transform_ssse3) test msglen, msglen je nowork + # Save GPRs + push %rbx + push %r12 + push %r13 + push %r14 + push %r15 + # Allocate Stack Space - mov %rsp, %rax + push %rbp + mov %rsp, %rbp sub $frame_size, %rsp and $~(0x20 - 1), %rsp - mov %rax, frame_RSPSAVE(%rsp) - - # Save GPRs - mov %rbx, frame_GPRSAVE(%rsp) - mov %r12, frame_GPRSAVE +8*1(%rsp) - mov %r13, frame_GPRSAVE +8*2(%rsp) - mov %r14, frame_GPRSAVE +8*3(%rsp) - mov %r15, frame_GPRSAVE +8*4(%rsp) updateblock: @@ -355,15 +351,16 @@ updateblock: dec msglen jnz updateblock - # Restore GPRs - mov frame_GPRSAVE(%rsp), %rbx - mov frame_GPRSAVE +8*1(%rsp), %r12 - mov frame_GPRSAVE +8*2(%rsp), %r13 - mov frame_GPRSAVE +8*3(%rsp), %r14 - mov frame_GPRSAVE +8*4(%rsp), %r15 - # Restore Stack Pointer - mov frame_RSPSAVE(%rsp), %rsp + mov %rbp, %rsp + pop %rbp + + # Restore GPRs + pop %r15 + pop %r14 + pop %r13 + pop %r12 + pop %rbx nowork: ret -- cgit v1.2.3 From 7d3d10e0e85fb7c23a86a70f795b1eabd2bc030b Mon Sep 17 00:00:00 2001 From: Josh Poimboeuf Date: Wed, 24 Feb 2021 10:29:26 -0600 Subject: x86/crypto: Enable objtool in crypto code Now that all the stack alignment prologues have been cleaned up in the crypto code, enable objtool. Among other benefits, this will allow ORC unwinding to work. Signed-off-by: Josh Poimboeuf Tested-by: Ard Biesheuvel Acked-by: Ard Biesheuvel Tested-by: Sami Tolvanen Acked-by: Peter Zijlstra (Intel) Acked-by: Herbert Xu Link: https://lore.kernel.org/r/fc2a1918c50e33e46ef0e9a5de02743f2f6e3639.1614182415.git.jpoimboe@redhat.com --- arch/x86/crypto/Makefile | 2 -- 1 file changed, 2 deletions(-) diff --git a/arch/x86/crypto/Makefile b/arch/x86/crypto/Makefile index b28e36b7c96b..d0959e7b809f 100644 --- a/arch/x86/crypto/Makefile +++ b/arch/x86/crypto/Makefile @@ -2,8 +2,6 @@ # # x86 crypto algorithms -OBJECT_FILES_NON_STANDARD := y - obj-$(CONFIG_CRYPTO_TWOFISH_586) += twofish-i586.o twofish-i586-y := twofish-i586-asm_32.o twofish_glue.o obj-$(CONFIG_CRYPTO_TWOFISH_X86_64) += twofish-x86_64.o -- cgit v1.2.3