From 26fa1263b0ba35f01a25f4f7e70ba7c44455d1e3 Mon Sep 17 00:00:00 2001 From: Lai Jiangshan Date: Sun, 19 Apr 2020 14:40:49 +0000 Subject: x86/entry/64: Remove an unused label The label .Lcommon_\sym was introduced by 39e9543344fa. (x86-64: Reduce amount of redundant code generated for invalidate_interruptNN) And all the other relevant information was removed by 52aec3308db8 (x86/tlb: replace INVALIDATE_TLB_VECTOR by CALL_FUNCTION_VECTOR) Signed-off-by: Lai Jiangshan Signed-off-by: Thomas Gleixner Link: https://lkml.kernel.org/r/20200419144049.1906-4-laijs@linux.alibaba.com --- arch/x86/entry/entry_64.S | 1 - 1 file changed, 1 deletion(-) (limited to 'arch') diff --git a/arch/x86/entry/entry_64.S b/arch/x86/entry/entry_64.S index eead1e2bebd5..609c71e3dba1 100644 --- a/arch/x86/entry/entry_64.S +++ b/arch/x86/entry/entry_64.S @@ -798,7 +798,6 @@ _ASM_NOKPROBE(common_interrupt) SYM_CODE_START(\sym) UNWIND_HINT_IRET_REGS pushq $~(\num) -.Lcommon_\sym: call interrupt_entry UNWIND_HINT_REGS indirect=1 call \do_sym /* rdi points to pt_regs */ -- cgit v1.2.3 From c75890700455113c366f795f3d22ee03623835e8 Mon Sep 17 00:00:00 2001 From: Lai Jiangshan Date: Sun, 19 Apr 2020 14:40:47 +0000 Subject: x86/entry/64: Remove unneeded kernel CR3 switching When native_load_gs_index() fails on .Lgs_change, CR3 must be kernel CR3. No need to switch it. Signed-off-by: Lai Jiangshan Signed-off-by: Thomas Gleixner Link: https://lkml.kernel.org/r/20200419144049.1906-2-laijs@linux.alibaba.com --- arch/x86/entry/entry_64.S | 1 - 1 file changed, 1 deletion(-) (limited to 'arch') diff --git a/arch/x86/entry/entry_64.S b/arch/x86/entry/entry_64.S index 609c71e3dba1..87ffa792bc99 100644 --- a/arch/x86/entry/entry_64.S +++ b/arch/x86/entry/entry_64.S @@ -1334,7 +1334,6 @@ SYM_CODE_START_LOCAL(error_entry) */ SWAPGS FENCE_SWAPGS_USER_ENTRY - SWITCH_TO_KERNEL_CR3 scratch_reg=%rax jmp .Lerror_entry_done .Lbstep_iret: -- cgit v1.2.3 From fbaed278a3cc72a46aadae667b8c6754b78640a6 Mon Sep 17 00:00:00 2001 From: Lai Jiangshan Date: Sun, 19 Apr 2020 14:40:48 +0000 Subject: x86/idt: Remove address operator on function machine_check() machine_check is function address, the address operator on it is nop for compiler. Make it consistent with the other function addresses in the same file. Signed-off-by: Lai Jiangshan Signed-off-by: Thomas Gleixner Link: https://lkml.kernel.org/r/20200419144049.1906-3-laijs@linux.alibaba.com --- arch/x86/kernel/idt.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'arch') diff --git a/arch/x86/kernel/idt.c b/arch/x86/kernel/idt.c index 87ef69a72c52..98bcb502f967 100644 --- a/arch/x86/kernel/idt.c +++ b/arch/x86/kernel/idt.c @@ -93,7 +93,7 @@ static const __initconst struct idt_data def_idts[] = { INTG(X86_TRAP_DB, debug), #ifdef CONFIG_X86_MCE - INTG(X86_TRAP_MC, &machine_check), + INTG(X86_TRAP_MC, machine_check), #endif SYSG(X86_TRAP_OF, overflow), @@ -186,7 +186,7 @@ static const __initconst struct idt_data ist_idts[] = { ISTG(X86_TRAP_NMI, nmi, IST_INDEX_NMI), ISTG(X86_TRAP_DF, double_fault, IST_INDEX_DF), #ifdef CONFIG_X86_MCE - ISTG(X86_TRAP_MC, &machine_check, IST_INDEX_MCE), + ISTG(X86_TRAP_MC, machine_check, IST_INDEX_MCE), #endif }; -- cgit v1.2.3 From a0bb51f2638e0810c347024679239fd10a8f7990 Mon Sep 17 00:00:00 2001 From: Vitaly Kuznetsov Date: Tue, 28 Apr 2020 11:38:22 +0200 Subject: x86/xen: Split HVM vector callback setup and interrupt gate allocation As a preparatory change for making alloc_intr_gate() __init split xen_callback_vector() into callback vector setup via hypercall (xen_setup_callback_vector()) and interrupt gate allocation (xen_alloc_callback_vector()). xen_setup_callback_vector() is being called twice: on init and upon system resume from xen_hvm_post_suspend(). alloc_intr_gate() only needs to be called once. Suggested-by: Thomas Gleixner Signed-off-by: Vitaly Kuznetsov Signed-off-by: Thomas Gleixner Link: https://lkml.kernel.org/r/20200428093824.1451532-2-vkuznets@redhat.com --- arch/x86/xen/suspend_hvm.c | 2 +- arch/x86/xen/xen-ops.h | 2 +- drivers/xen/events/events_base.c | 28 +++++++++++++++++----------- 3 files changed, 19 insertions(+), 13 deletions(-) (limited to 'arch') diff --git a/arch/x86/xen/suspend_hvm.c b/arch/x86/xen/suspend_hvm.c index e666b614cf6d..5152afe16876 100644 --- a/arch/x86/xen/suspend_hvm.c +++ b/arch/x86/xen/suspend_hvm.c @@ -13,6 +13,6 @@ void xen_hvm_post_suspend(int suspend_cancelled) xen_hvm_init_shared_info(); xen_vcpu_restore(); } - xen_callback_vector(); + xen_setup_callback_vector(); xen_unplug_emulated_devices(); } diff --git a/arch/x86/xen/xen-ops.h b/arch/x86/xen/xen-ops.h index 45a441c33d6d..1cc1568bfe04 100644 --- a/arch/x86/xen/xen-ops.h +++ b/arch/x86/xen/xen-ops.h @@ -55,7 +55,7 @@ void xen_enable_sysenter(void); void xen_enable_syscall(void); void xen_vcpu_restore(void); -void xen_callback_vector(void); +void xen_setup_callback_vector(void); void xen_hvm_init_shared_info(void); void xen_unplug_emulated_devices(void); diff --git a/drivers/xen/events/events_base.c b/drivers/xen/events/events_base.c index 3a791c8485d0..eb35c3cda9a6 100644 --- a/drivers/xen/events/events_base.c +++ b/drivers/xen/events/events_base.c @@ -1639,26 +1639,30 @@ EXPORT_SYMBOL_GPL(xen_set_callback_via); /* Vector callbacks are better than PCI interrupts to receive event * channel notifications because we can receive vector callbacks on any * vcpu and we don't need PCI support or APIC interactions. */ -void xen_callback_vector(void) +void xen_setup_callback_vector(void) { - int rc; uint64_t callback_via; if (xen_have_vector_callback) { callback_via = HVM_CALLBACK_VECTOR(HYPERVISOR_CALLBACK_VECTOR); - rc = xen_set_callback_via(callback_via); - if (rc) { + if (xen_set_callback_via(callback_via)) { pr_err("Request for Xen HVM callback vector failed\n"); xen_have_vector_callback = 0; - return; } - pr_info_once("Xen HVM callback vector for event delivery is enabled\n"); - alloc_intr_gate(HYPERVISOR_CALLBACK_VECTOR, - xen_hvm_callback_vector); } } + +static __init void xen_alloc_callback_vector(void) +{ + if (!xen_have_vector_callback) + return; + + pr_info("Xen HVM callback vector for event delivery is enabled\n"); + alloc_intr_gate(HYPERVISOR_CALLBACK_VECTOR, xen_hvm_callback_vector); +} #else -void xen_callback_vector(void) {} +void xen_setup_callback_vector(void) {} +static inline void xen_alloc_callback_vector(void) {} #endif #undef MODULE_PARAM_PREFIX @@ -1692,8 +1696,10 @@ void __init xen_init_IRQ(void) if (xen_initial_domain()) pci_xen_initial_domain(); } - if (xen_feature(XENFEAT_hvm_callback_vector)) - xen_callback_vector(); + if (xen_feature(XENFEAT_hvm_callback_vector)) { + xen_setup_callback_vector(); + xen_alloc_callback_vector(); + } if (xen_hvm_domain()) { native_init_IRQ(); -- cgit v1.2.3 From 06184325a1cce27a02a688d98740f90fe06e0133 Mon Sep 17 00:00:00 2001 From: Vitaly Kuznetsov Date: Tue, 28 Apr 2020 11:38:23 +0200 Subject: x86/idt: Annotate alloc_intr_gate() with __init There seems to be no reason to allocate interrupt gates after init. Mark alloc_intr_gate() as __init and add WARN_ON() checks making sure it is only used before idt_setup_apic_and_irq_gates() finalizes IDT setup and maps all un-allocated entries to spurious entries. Suggested-by: Thomas Gleixner Signed-off-by: Vitaly Kuznetsov Signed-off-by: Thomas Gleixner Link: https://lkml.kernel.org/r/20200428093824.1451532-3-vkuznets@redhat.com --- arch/x86/kernel/idt.c | 16 +++++++++++++--- 1 file changed, 13 insertions(+), 3 deletions(-) (limited to 'arch') diff --git a/arch/x86/kernel/idt.c b/arch/x86/kernel/idt.c index 98bcb502f967..0e9205137de8 100644 --- a/arch/x86/kernel/idt.c +++ b/arch/x86/kernel/idt.c @@ -51,6 +51,9 @@ struct idt_data { #define TSKG(_vector, _gdt) \ G(_vector, NULL, DEFAULT_STACK, GATE_TASK, DPL0, _gdt << 3) + +static bool idt_setup_done __initdata; + /* * Early traps running on the DEFAULT_STACK because the other interrupt * stacks work only after cpu_init(). @@ -323,6 +326,7 @@ void __init idt_setup_apic_and_irq_gates(void) set_intr_gate(i, entry); } #endif + idt_setup_done = true; } /** @@ -352,6 +356,7 @@ void idt_invalidate(void *addr) load_idt(&idt); } +/* This goes away once ASYNC_PF is sanitized */ void __init update_intr_gate(unsigned int n, const void *addr) { if (WARN_ON_ONCE(!test_bit(n, system_vectors))) @@ -359,9 +364,14 @@ void __init update_intr_gate(unsigned int n, const void *addr) set_intr_gate(n, addr); } -void alloc_intr_gate(unsigned int n, const void *addr) +void __init alloc_intr_gate(unsigned int n, const void *addr) { - BUG_ON(n < FIRST_SYSTEM_VECTOR); - if (!test_and_set_bit(n, system_vectors)) + if (WARN_ON(n < FIRST_SYSTEM_VECTOR)) + return; + + if (WARN_ON(idt_setup_done)) + return; + + if (!WARN_ON(test_and_set_bit(n, system_vectors))) set_intr_gate(n, addr); } -- cgit v1.2.3 From 1f1fbc70c10e81f70e9fbe2102d439c883269811 Mon Sep 17 00:00:00 2001 From: Vitaly Kuznetsov Date: Tue, 28 Apr 2020 11:38:24 +0200 Subject: x86/idt: Keep spurious entries unset in system_vectors With commit dc20b2d52653 ("x86/idt: Move interrupt gate initialization to IDT code") non assigned system vectors are also marked as used in 'used_vectors' (now 'system_vectors') bitmap. This makes checks in arch_show_interrupts() whether a particular system vector is allocated to always pass and e.g. 'Hyper-V reenlightenment interrupts' entry always shows up in /proc/interrupts. Another side effect of having all unassigned system vectors marked as used is that irq_matrix_debug_show() will wrongly count them among 'System' vectors. As it is now ensured that alloc_intr_gate() is not called after init, it is possible to leave unused entries in 'system_vectors' unset to fix these issues. Signed-off-by: Vitaly Kuznetsov Signed-off-by: Thomas Gleixner Link: https://lkml.kernel.org/r/20200428093824.1451532-4-vkuznets@redhat.com --- arch/x86/kernel/idt.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/x86/kernel/idt.c b/arch/x86/kernel/idt.c index 0e9205137de8..36fef90a38e7 100644 --- a/arch/x86/kernel/idt.c +++ b/arch/x86/kernel/idt.c @@ -321,7 +321,11 @@ void __init idt_setup_apic_and_irq_gates(void) #ifdef CONFIG_X86_LOCAL_APIC for_each_clear_bit_from(i, system_vectors, NR_VECTORS) { - set_bit(i, system_vectors); + /* + * Don't set the non assigned system vectors in the + * system_vectors bitmap. Otherwise they show up in + * /proc/interrupts. + */ entry = spurious_entries_start + 8 * (i - FIRST_SYSTEM_VECTOR); set_intr_gate(i, entry); } -- cgit v1.2.3 From 24ae0c91cbc57c2deb0401bd653453a508acdcee Mon Sep 17 00:00:00 2001 From: Andy Lutomirski Date: Mon, 24 Feb 2020 13:24:58 +0100 Subject: x86/hw_breakpoint: Prevent data breakpoints on cpu_entry_area A data breakpoint near the top of an IST stack will cause unrecoverable recursion. A data breakpoint on the GDT, IDT, or TSS is terrifying. Prevent either of these from happening. Co-developed-by: Peter Zijlstra Signed-off-by: Andy Lutomirski Signed-off-by: Peter Zijlstra (Intel) Signed-off-by: Thomas Gleixner Reviewed-by: Borislav Petkov Reviewed-by: Lai Jiangshan Reviewed-by: Alexandre Chartre Link: https://lkml.kernel.org/r/20200505134058.272448010@linutronix.de --- arch/x86/kernel/hw_breakpoint.c | 25 +++++++++++++++++++++++++ 1 file changed, 25 insertions(+) (limited to 'arch') diff --git a/arch/x86/kernel/hw_breakpoint.c b/arch/x86/kernel/hw_breakpoint.c index 4d8d53ed02c9..d42fc0eaf193 100644 --- a/arch/x86/kernel/hw_breakpoint.c +++ b/arch/x86/kernel/hw_breakpoint.c @@ -227,10 +227,35 @@ int arch_check_bp_in_kernelspace(struct arch_hw_breakpoint *hw) return (va >= TASK_SIZE_MAX) || ((va + len - 1) >= TASK_SIZE_MAX); } +/* + * Checks whether the range from addr to end, inclusive, overlaps the CPU + * entry area range. + */ +static inline bool within_cpu_entry_area(unsigned long addr, unsigned long end) +{ + return end >= CPU_ENTRY_AREA_BASE && + addr < (CPU_ENTRY_AREA_BASE + CPU_ENTRY_AREA_TOTAL_SIZE); +} + static int arch_build_bp_info(struct perf_event *bp, const struct perf_event_attr *attr, struct arch_hw_breakpoint *hw) { + unsigned long bp_end; + + bp_end = attr->bp_addr + attr->bp_len - 1; + if (bp_end < attr->bp_addr) + return -EINVAL; + + /* + * Prevent any breakpoint of any type that overlaps the + * cpu_entry_area. This protects the IST stacks and also + * reduces the chance that we ever find out what happens if + * there's a data breakpoint on the GDT, IDT, or TSS. + */ + if (within_cpu_entry_area(attr->bp_addr, bp_end)) + return -EINVAL; + hw->address = attr->bp_addr; hw->mask = 0; -- cgit v1.2.3 From e9660391d0ebd174b169af3d6de680c2f836027c Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Thu, 20 Feb 2020 13:17:27 +0100 Subject: x86/doublefault: Remove memmove() call Use of memmove() in #DF is problematic considered tracing and other instrumentation. Remove the memmove() call and simply write out what needs doing; this even clarifies the code, win-win! The code copies from the espfix64 stack to the normal task stack, there is no possible way for that to overlap. Survives selftests/x86, specifically sigreturn_64. Suggested-by: Borislav Petkov Signed-off-by: Peter Zijlstra (Intel) Signed-off-by: Thomas Gleixner Reviewed-by: Alexandre Chartre Acked-by: Andy Lutomirski Link: https://lkml.kernel.org/r/20200505134058.863038566@linutronix.de --- arch/x86/kernel/traps.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/x86/kernel/traps.c b/arch/x86/kernel/traps.c index 4cc541051994..48468f61202c 100644 --- a/arch/x86/kernel/traps.c +++ b/arch/x86/kernel/traps.c @@ -299,6 +299,7 @@ dotraplinkage void do_double_fault(struct pt_regs *regs, long error_code, unsign regs->ip == (unsigned long)native_irq_return_iret) { struct pt_regs *gpregs = (struct pt_regs *)this_cpu_read(cpu_tss_rw.x86_tss.sp0) - 1; + unsigned long *p = (unsigned long *)regs->sp; /* * regs->sp points to the failing IRET frame on the @@ -306,7 +307,11 @@ dotraplinkage void do_double_fault(struct pt_regs *regs, long error_code, unsign * in gpregs->ss through gpregs->ip. * */ - memmove(&gpregs->ip, (void *)regs->sp, 5*8); + gpregs->ip = p[0]; + gpregs->cs = p[1]; + gpregs->flags = p[2]; + gpregs->sp = p[3]; + gpregs->ss = p[4]; gpregs->orig_ax = 0; /* Missing (lost) #GP error code */ /* -- cgit v1.2.3 From 725005897ec4ba07d6227a1ac3121048153eb3ce Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Tue, 25 Feb 2020 23:16:08 +0100 Subject: x86/entry/64: Avoid pointless code when CONTEXT_TRACKING=n GAS cannot optimize out the test and conditional jump when context tracking is disabled and CALL_enter_from_user_mode is an empty macro. Wrap it in #ifdeffery. Will go away once all this is moved to C. Signed-off-by: Thomas Gleixner Reviewed-by: Alexandre Chartre Acked-by: Frederic Weisbecker Acked-by: Andy Lutomirski Acked-by: Peter Zijlstra Link: https://lkml.kernel.org/r/20200505134058.955968069@linutronix.de --- arch/x86/entry/entry_64.S | 2 ++ 1 file changed, 2 insertions(+) (limited to 'arch') diff --git a/arch/x86/entry/entry_64.S b/arch/x86/entry/entry_64.S index 87ffa792bc99..a15b70ac87b5 100644 --- a/arch/x86/entry/entry_64.S +++ b/arch/x86/entry/entry_64.S @@ -889,12 +889,14 @@ apicinterrupt IRQ_WORK_VECTOR irq_work_interrupt smp_irq_work_interrupt TRACE_IRQS_OFF .endif +#ifdef CONFIG_CONTEXT_TRACKING .if \paranoid == 0 testb $3, CS(%rsp) jz .Lfrom_kernel_no_context_tracking_\@ CALL_enter_from_user_mode .Lfrom_kernel_no_context_tracking_\@: .endif +#endif movq %rsp, %rdi /* pt_regs pointer */ -- cgit v1.2.3 From 44d7e4fbc08eca153ea4b436a1440639dff2c771 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Thu, 5 Mar 2020 11:16:49 +0100 Subject: x86/entry: Remove the unused LOCKDEP_SYSEXIT cruft No users left since two years due to commit 21d375b6b34f ("x86/entry/64: Remove the SYSCALL64 fast path") Signed-off-by: Thomas Gleixner Reviewed-by: Alexandre Chartre Acked-by: Peter Zijlstra Link: https://lkml.kernel.org/r/20200505134059.061301403@linutronix.de --- arch/x86/entry/thunk_64.S | 5 ----- arch/x86/include/asm/irqflags.h | 24 ------------------------ 2 files changed, 29 deletions(-) (limited to 'arch') diff --git a/arch/x86/entry/thunk_64.S b/arch/x86/entry/thunk_64.S index dbe4493b534e..34f980c9b766 100644 --- a/arch/x86/entry/thunk_64.S +++ b/arch/x86/entry/thunk_64.S @@ -42,10 +42,6 @@ SYM_FUNC_END(\name) THUNK trace_hardirqs_off_thunk,trace_hardirqs_off_caller,1 #endif -#ifdef CONFIG_DEBUG_LOCK_ALLOC - THUNK lockdep_sys_exit_thunk,lockdep_sys_exit -#endif - #ifdef CONFIG_PREEMPTION THUNK preempt_schedule_thunk, preempt_schedule THUNK preempt_schedule_notrace_thunk, preempt_schedule_notrace @@ -54,7 +50,6 @@ SYM_FUNC_END(\name) #endif #if defined(CONFIG_TRACE_IRQFLAGS) \ - || defined(CONFIG_DEBUG_LOCK_ALLOC) \ || defined(CONFIG_PREEMPTION) SYM_CODE_START_LOCAL_NOALIGN(.L_restore) popq %r11 diff --git a/arch/x86/include/asm/irqflags.h b/arch/x86/include/asm/irqflags.h index 8a0e56e1dcc9..e00f064b009e 100644 --- a/arch/x86/include/asm/irqflags.h +++ b/arch/x86/include/asm/irqflags.h @@ -180,30 +180,6 @@ static inline int arch_irqs_disabled(void) # define TRACE_IRQS_ON # define TRACE_IRQS_OFF #endif -#ifdef CONFIG_DEBUG_LOCK_ALLOC -# ifdef CONFIG_X86_64 -# define LOCKDEP_SYS_EXIT call lockdep_sys_exit_thunk -# define LOCKDEP_SYS_EXIT_IRQ \ - TRACE_IRQS_ON; \ - sti; \ - call lockdep_sys_exit_thunk; \ - cli; \ - TRACE_IRQS_OFF; -# else -# define LOCKDEP_SYS_EXIT \ - pushl %eax; \ - pushl %ecx; \ - pushl %edx; \ - call lockdep_sys_exit; \ - popl %edx; \ - popl %ecx; \ - popl %eax; -# define LOCKDEP_SYS_EXIT_IRQ -# endif -#else -# define LOCKDEP_SYS_EXIT -# define LOCKDEP_SYS_EXIT_IRQ -#endif #endif /* __ASSEMBLY__ */ #endif -- cgit v1.2.3 From 20355e5f73a75e58cee4c80d4cd92ce0d1628023 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Thu, 5 Mar 2020 14:01:25 +0100 Subject: x86/entry: Exclude low level entry code from sanitizing The sanitizers are not really applicable to the fragile low level entry code. Entry code needs to carefully setup a normal 'runtime' environment. Signed-off-by: Peter Zijlstra (Intel) Signed-off-by: Thomas Gleixner Reviewed-by: Alexandre Chartre Link: https://lkml.kernel.org/r/20200505134059.970057117@linutronix.de --- arch/x86/entry/Makefile | 8 ++++++++ 1 file changed, 8 insertions(+) (limited to 'arch') diff --git a/arch/x86/entry/Makefile b/arch/x86/entry/Makefile index 85eb381259c2..cdf45ff92dc8 100644 --- a/arch/x86/entry/Makefile +++ b/arch/x86/entry/Makefile @@ -3,6 +3,14 @@ # Makefile for the x86 low level entry code # +KASAN_SANITIZE := n +UBSAN_SANITIZE := n +KCOV_INSTRUMENT := n + +CFLAGS_REMOVE_common.o = $(CC_FLAGS_FTRACE) -fstack-protector -fstack-protector-strong +CFLAGS_REMOVE_syscall_32.o = $(CC_FLAGS_FTRACE) -fstack-protector -fstack-protector-strong +CFLAGS_REMOVE_syscall_64.o = $(CC_FLAGS_FTRACE) -fstack-protector -fstack-protector-strong + OBJECT_FILES_NON_STANDARD_entry_64_compat.o := y CFLAGS_syscall_64.o += $(call cc-option,-Wno-override-init,) -- cgit v1.2.3 From 5916d5f9b3347344a3d96ba6b54cf8e290eba96a Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Fri, 13 Mar 2020 13:49:51 +0100 Subject: bug: Annotate WARN/BUG/stackfail as noinstr safe Warnings, bugs and stack protection fails from noinstr sections, e.g. low level and early entry code, are likely to be fatal. Mark them as "safe" to be invoked from noinstr protected code to avoid annotating all usage sites. Getting the information out is important. Signed-off-by: Thomas Gleixner Reviewed-by: Alexandre Chartre Acked-by: Peter Zijlstra Link: https://lkml.kernel.org/r/20200505134100.376598577@linutronix.de --- arch/x86/include/asm/bug.h | 3 +++ include/asm-generic/bug.h | 9 +++++++-- kernel/panic.c | 4 +++- 3 files changed, 13 insertions(+), 3 deletions(-) (limited to 'arch') diff --git a/arch/x86/include/asm/bug.h b/arch/x86/include/asm/bug.h index facba9bc30ca..fb34ff641e0a 100644 --- a/arch/x86/include/asm/bug.h +++ b/arch/x86/include/asm/bug.h @@ -70,14 +70,17 @@ do { \ #define HAVE_ARCH_BUG #define BUG() \ do { \ + instrumentation_begin(); \ _BUG_FLAGS(ASM_UD2, 0); \ unreachable(); \ } while (0) #define __WARN_FLAGS(flags) \ do { \ + instrumentation_begin(); \ _BUG_FLAGS(ASM_UD2, BUGFLAG_WARNING|(flags)); \ annotate_reachable(); \ + instrumentation_end(); \ } while (0) #include diff --git a/include/asm-generic/bug.h b/include/asm-generic/bug.h index 384b5c835ced..c94e33ae3e7b 100644 --- a/include/asm-generic/bug.h +++ b/include/asm-generic/bug.h @@ -83,14 +83,19 @@ extern __printf(4, 5) void warn_slowpath_fmt(const char *file, const int line, unsigned taint, const char *fmt, ...); #define __WARN() __WARN_printf(TAINT_WARN, NULL) -#define __WARN_printf(taint, arg...) \ - warn_slowpath_fmt(__FILE__, __LINE__, taint, arg) +#define __WARN_printf(taint, arg...) do { \ + instrumentation_begin(); \ + warn_slowpath_fmt(__FILE__, __LINE__, taint, arg); \ + instrumentation_end(); \ + } while (0) #else extern __printf(1, 2) void __warn_printk(const char *fmt, ...); #define __WARN() __WARN_FLAGS(BUGFLAG_TAINT(TAINT_WARN)) #define __WARN_printf(taint, arg...) do { \ + instrumentation_begin(); \ __warn_printk(arg); \ __WARN_FLAGS(BUGFLAG_NO_CUT_HERE | BUGFLAG_TAINT(taint));\ + instrumentation_end(); \ } while (0) #define WARN_ON_ONCE(condition) ({ \ int __ret_warn_on = !!(condition); \ diff --git a/kernel/panic.c b/kernel/panic.c index 85568bbfb12b..e2157ca387c8 100644 --- a/kernel/panic.c +++ b/kernel/panic.c @@ -680,10 +680,12 @@ device_initcall(register_warn_debugfs); * Called when gcc's -fstack-protector feature is used, and * gcc detects corruption of the on-stack canary value */ -__visible void __stack_chk_fail(void) +__visible noinstr void __stack_chk_fail(void) { + instrumentation_begin(); panic("stack-protector: Kernel stack is corrupted in: %pB", __builtin_return_address(0)); + instrumentation_end(); } EXPORT_SYMBOL(__stack_chk_fail); -- cgit v1.2.3 From fba8dbeaf30e2c8db2c2cfeb38f6dbffcbf86bba Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Fri, 15 May 2020 17:39:05 +0200 Subject: x86/idt: Remove update_intr_gate() No more users. Signed-off-by: Thomas Gleixner --- arch/x86/include/asm/desc.h | 1 - arch/x86/kernel/idt.c | 8 -------- 2 files changed, 9 deletions(-) (limited to 'arch') diff --git a/arch/x86/include/asm/desc.h b/arch/x86/include/asm/desc.h index 68a99d2a5f33..085a2dd312b4 100644 --- a/arch/x86/include/asm/desc.h +++ b/arch/x86/include/asm/desc.h @@ -386,7 +386,6 @@ static inline void set_desc_limit(struct desc_struct *desc, unsigned long limit) desc->limit1 = (limit >> 16) & 0xf; } -void update_intr_gate(unsigned int n, const void *addr); void alloc_intr_gate(unsigned int n, const void *addr); extern unsigned long system_vectors[]; diff --git a/arch/x86/kernel/idt.c b/arch/x86/kernel/idt.c index 36fef90a38e7..95609ee4c8b3 100644 --- a/arch/x86/kernel/idt.c +++ b/arch/x86/kernel/idt.c @@ -360,14 +360,6 @@ void idt_invalidate(void *addr) load_idt(&idt); } -/* This goes away once ASYNC_PF is sanitized */ -void __init update_intr_gate(unsigned int n, const void *addr) -{ - if (WARN_ON_ONCE(!test_bit(n, system_vectors))) - return; - set_intr_gate(n, addr); -} - void __init alloc_intr_gate(unsigned int n, const void *addr) { if (WARN_ON(n < FIRST_SYSTEM_VECTOR)) -- cgit v1.2.3 From b9f6976bfb949121bb6e1e6f4fd9909735729148 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Wed, 25 Mar 2020 19:45:26 +0100 Subject: x86/entry/64: Move non entry code into .text section All ASM code which is not part of the entry functionality can move out into the .text section. No reason to keep it in the non-instrumentable entry section. Signed-off-by: Thomas Gleixner Reviewed-by: Steven Rostedt (VMware) Reviewed-by: Alexandre Chartre Acked-by: Peter Zijlstra Link: https://lkml.kernel.org/r/20200505134340.227579223@linutronix.de --- arch/x86/entry/entry_64.S | 18 ++++++++++++++---- arch/x86/kernel/ftrace_64.S | 2 +- 2 files changed, 15 insertions(+), 5 deletions(-) (limited to 'arch') diff --git a/arch/x86/entry/entry_64.S b/arch/x86/entry/entry_64.S index a15b70ac87b5..b199f43cff28 100644 --- a/arch/x86/entry/entry_64.S +++ b/arch/x86/entry/entry_64.S @@ -279,6 +279,7 @@ SYM_CODE_END(entry_SYSCALL_64) * %rdi: prev task * %rsi: next task */ +.pushsection .text, "ax" SYM_FUNC_START(__switch_to_asm) /* * Save callee-saved registers @@ -321,6 +322,7 @@ SYM_FUNC_START(__switch_to_asm) jmp __switch_to SYM_FUNC_END(__switch_to_asm) +.popsection /* * A newly forked process directly context switches into this address. @@ -329,6 +331,7 @@ SYM_FUNC_END(__switch_to_asm) * rbx: kernel thread func (NULL for user thread) * r12: kernel thread arg */ +.pushsection .text, "ax" SYM_CODE_START(ret_from_fork) UNWIND_HINT_EMPTY movq %rax, %rdi @@ -357,6 +360,7 @@ SYM_CODE_START(ret_from_fork) movq $0, RAX(%rsp) jmp 2b SYM_CODE_END(ret_from_fork) +.popsection /* * Build the entry stubs with some assembler magic. @@ -1037,10 +1041,12 @@ idtentry alignment_check do_alignment_check has_error_code=1 idtentry simd_coprocessor_error do_simd_coprocessor_error has_error_code=0 - /* - * Reload gs selector with exception handling - * edi: new selector - */ +/* + * Reload gs selector with exception handling + * edi: new selector + * + * Is in entry.text as it shouldn't be instrumented. + */ SYM_FUNC_START(native_load_gs_index) FRAME_BEGIN pushfq @@ -1076,6 +1082,7 @@ SYM_CODE_END(.Lbad_gs) .previous /* Call softirq on interrupt stack. Interrupts are off. */ +.pushsection .text, "ax" SYM_FUNC_START(do_softirq_own_stack) pushq %rbp mov %rsp, %rbp @@ -1085,6 +1092,7 @@ SYM_FUNC_START(do_softirq_own_stack) leaveq ret SYM_FUNC_END(do_softirq_own_stack) +.popsection #ifdef CONFIG_XEN_PV idtentry hypervisor_callback xen_do_hypervisor_callback has_error_code=0 @@ -1728,6 +1736,7 @@ SYM_CODE_START(ignore_sysret) SYM_CODE_END(ignore_sysret) #endif +.pushsection .text, "ax" SYM_CODE_START(rewind_stack_do_exit) UNWIND_HINT_FUNC /* Prevent any naive code from trying to unwind to our caller. */ @@ -1739,3 +1748,4 @@ SYM_CODE_START(rewind_stack_do_exit) call do_exit SYM_CODE_END(rewind_stack_do_exit) +.popsection diff --git a/arch/x86/kernel/ftrace_64.S b/arch/x86/kernel/ftrace_64.S index aa5d28aeb31e..083a3da7bb73 100644 --- a/arch/x86/kernel/ftrace_64.S +++ b/arch/x86/kernel/ftrace_64.S @@ -12,7 +12,7 @@ #include .code64 - .section .entry.text, "ax" + .section .text, "ax" #ifdef CONFIG_FRAME_POINTER /* Save parent and function stack frames (rip and rbp) */ -- cgit v1.2.3 From 8c0fa8a036cd9c000bcf761413b565b429f629fc Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Wed, 25 Mar 2020 19:47:40 +0100 Subject: x86/entry/32: Move non entry code into .text section All ASM code which is not part of the entry functionality can move out into the .text section. No reason to keep it in the non-instrumentable entry section. Signed-off-by: Thomas Gleixner Reviewed-by: Alexandre Chartre Acked-by: Peter Zijlstra Link: https://lkml.kernel.org/r/20200505134340.320164650@linutronix.de --- arch/x86/entry/entry_32.S | 9 +++++++++ 1 file changed, 9 insertions(+) (limited to 'arch') diff --git a/arch/x86/entry/entry_32.S b/arch/x86/entry/entry_32.S index a5eed844e948..bf0082bb07a1 100644 --- a/arch/x86/entry/entry_32.S +++ b/arch/x86/entry/entry_32.S @@ -730,6 +730,7 @@ * %eax: prev task * %edx: next task */ +.pushsection .text, "ax" SYM_CODE_START(__switch_to_asm) /* * Save callee-saved registers @@ -776,6 +777,7 @@ SYM_CODE_START(__switch_to_asm) jmp __switch_to SYM_CODE_END(__switch_to_asm) +.popsection /* * The unwinder expects the last frame on the stack to always be at the same @@ -784,6 +786,7 @@ SYM_CODE_END(__switch_to_asm) * asmlinkage function so its argument has to be pushed on the stack. This * wrapper creates a proper "end of stack" frame header before the call. */ +.pushsection .text, "ax" SYM_FUNC_START(schedule_tail_wrapper) FRAME_BEGIN @@ -794,6 +797,8 @@ SYM_FUNC_START(schedule_tail_wrapper) FRAME_END ret SYM_FUNC_END(schedule_tail_wrapper) +.popsection + /* * A newly forked process directly context switches into this address. * @@ -801,6 +806,7 @@ SYM_FUNC_END(schedule_tail_wrapper) * ebx: kernel thread func (NULL for user thread) * edi: kernel thread arg */ +.pushsection .text, "ax" SYM_CODE_START(ret_from_fork) call schedule_tail_wrapper @@ -825,6 +831,7 @@ SYM_CODE_START(ret_from_fork) movl $0, PT_EAX(%esp) jmp 2b SYM_CODE_END(ret_from_fork) +.popsection /* * Return to user mode is not as complex as all this looks, @@ -1691,6 +1698,7 @@ SYM_CODE_START(general_protection) jmp common_exception SYM_CODE_END(general_protection) +.pushsection .text, "ax" SYM_CODE_START(rewind_stack_do_exit) /* Prevent any naive code from trying to unwind to our caller. */ xorl %ebp, %ebp @@ -1701,3 +1709,4 @@ SYM_CODE_START(rewind_stack_do_exit) call do_exit 1: jmp 1b SYM_CODE_END(rewind_stack_do_exit) +.popsection -- cgit v1.2.3 From 1723be30e46fbda0c5971d3a19a37a7c2499bc90 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Sat, 29 Feb 2020 15:12:33 +0100 Subject: x86/entry: Mark enter_from_user_mode() noinstr Both the callers in the low level ASM code and __context_tracking_exit() which is invoked from enter_from_user_mode() via user_exit_irqoff() are marked NOKPROBE. Allowing enter_from_user_mode() to be probed is inconsistent at best. Aside of that while function tracing per se is safe the function trace entry/exit points can be used via BPF as well which is not safe to use before context tracking has reached CONTEXT_KERNEL and adjusted RCU. Mark it noinstr which moves it into the instrumentation protected text section and includes notrace. Note, this needs further fixups in context tracking to ensure that the full call chain is protected. Will be addressed in follow up changes. Signed-off-by: Thomas Gleixner Reviewed-by: Masami Hiramatsu Reviewed-by: Alexandre Chartre Acked-by: Peter Zijlstra Link: https://lkml.kernel.org/r/20200505134340.429059405@linutronix.de --- arch/x86/entry/common.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/x86/entry/common.c b/arch/x86/entry/common.c index 76735ec813e6..d862add7918b 100644 --- a/arch/x86/entry/common.c +++ b/arch/x86/entry/common.c @@ -41,7 +41,7 @@ #ifdef CONFIG_CONTEXT_TRACKING /* Called on entry from user mode with IRQs off. */ -__visible inline void enter_from_user_mode(void) +__visible inline noinstr void enter_from_user_mode(void) { CT_WARN_ON(ct_state() != CONTEXT_USER); user_exit_irqoff(); -- cgit v1.2.3 From 8f159f1dfa1ea29d70a84335fe6a8bd501a9eecd Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Tue, 10 Mar 2020 14:46:27 +0100 Subject: x86/entry/common: Protect against instrumentation Mark the various syscall entries with noinstr to protect them against instrumentation and add the noinstrumentation_begin()/end() annotations to mark the parts of the functions which are safe to call out into instrumentable code. Signed-off-by: Thomas Gleixner Reviewed-by: Alexandre Chartre Acked-by: Peter Zijlstra Link: https://lkml.kernel.org/r/20200505134340.520277507@linutronix.de --- arch/x86/entry/common.c | 133 ++++++++++++++++++++++++++++++++---------------- 1 file changed, 89 insertions(+), 44 deletions(-) (limited to 'arch') diff --git a/arch/x86/entry/common.c b/arch/x86/entry/common.c index d862add7918b..9892fb7c9d44 100644 --- a/arch/x86/entry/common.c +++ b/arch/x86/entry/common.c @@ -41,15 +41,26 @@ #ifdef CONFIG_CONTEXT_TRACKING /* Called on entry from user mode with IRQs off. */ -__visible inline noinstr void enter_from_user_mode(void) +__visible noinstr void enter_from_user_mode(void) { - CT_WARN_ON(ct_state() != CONTEXT_USER); + enum ctx_state state = ct_state(); + user_exit_irqoff(); + + instrumentation_begin(); + CT_WARN_ON(state != CONTEXT_USER); + instrumentation_end(); } #else static inline void enter_from_user_mode(void) {} #endif +static noinstr void exit_to_user_mode(void) +{ + user_enter_irqoff(); + mds_user_clear_cpu_buffers(); +} + static void do_audit_syscall_entry(struct pt_regs *regs, u32 arch) { #ifdef CONFIG_X86_64 @@ -179,8 +190,7 @@ static void exit_to_usermode_loop(struct pt_regs *regs, u32 cached_flags) } } -/* Called with IRQs disabled. */ -__visible inline void prepare_exit_to_usermode(struct pt_regs *regs) +static void __prepare_exit_to_usermode(struct pt_regs *regs) { struct thread_info *ti = current_thread_info(); u32 cached_flags; @@ -219,10 +229,14 @@ __visible inline void prepare_exit_to_usermode(struct pt_regs *regs) */ ti->status &= ~(TS_COMPAT|TS_I386_REGS_POKED); #endif +} - user_enter_irqoff(); - - mds_user_clear_cpu_buffers(); +__visible noinstr void prepare_exit_to_usermode(struct pt_regs *regs) +{ + instrumentation_begin(); + __prepare_exit_to_usermode(regs); + instrumentation_end(); + exit_to_user_mode(); } #define SYSCALL_EXIT_WORK_FLAGS \ @@ -251,11 +265,7 @@ static void syscall_slow_exit_work(struct pt_regs *regs, u32 cached_flags) tracehook_report_syscall_exit(regs, step); } -/* - * Called with IRQs on and fully valid regs. Returns with IRQs off in a - * state such that we can immediately switch to user mode. - */ -__visible inline void syscall_return_slowpath(struct pt_regs *regs) +static void __syscall_return_slowpath(struct pt_regs *regs) { struct thread_info *ti = current_thread_info(); u32 cached_flags = READ_ONCE(ti->flags); @@ -276,15 +286,29 @@ __visible inline void syscall_return_slowpath(struct pt_regs *regs) syscall_slow_exit_work(regs, cached_flags); local_irq_disable(); - prepare_exit_to_usermode(regs); + __prepare_exit_to_usermode(regs); +} + +/* + * Called with IRQs on and fully valid regs. Returns with IRQs off in a + * state such that we can immediately switch to user mode. + */ +__visible noinstr void syscall_return_slowpath(struct pt_regs *regs) +{ + instrumentation_begin(); + __syscall_return_slowpath(regs); + instrumentation_end(); + exit_to_user_mode(); } #ifdef CONFIG_X86_64 -__visible void do_syscall_64(unsigned long nr, struct pt_regs *regs) +__visible noinstr void do_syscall_64(unsigned long nr, struct pt_regs *regs) { struct thread_info *ti; enter_from_user_mode(); + instrumentation_begin(); + local_irq_enable(); ti = current_thread_info(); if (READ_ONCE(ti->flags) & _TIF_WORK_SYSCALL_ENTRY) @@ -301,8 +325,10 @@ __visible void do_syscall_64(unsigned long nr, struct pt_regs *regs) regs->ax = x32_sys_call_table[nr](regs); #endif } + __syscall_return_slowpath(regs); - syscall_return_slowpath(regs); + instrumentation_end(); + exit_to_user_mode(); } #endif @@ -313,7 +339,7 @@ __visible void do_syscall_64(unsigned long nr, struct pt_regs *regs) * extremely hot in workloads that use it, and it's usually called from * do_fast_syscall_32, so forcibly inline it to improve performance. */ -static __always_inline void do_syscall_32_irqs_on(struct pt_regs *regs) +static void do_syscall_32_irqs_on(struct pt_regs *regs) { struct thread_info *ti = current_thread_info(); unsigned int nr = (unsigned int)regs->orig_ax; @@ -337,27 +363,62 @@ static __always_inline void do_syscall_32_irqs_on(struct pt_regs *regs) regs->ax = ia32_sys_call_table[nr](regs); } - syscall_return_slowpath(regs); + __syscall_return_slowpath(regs); } /* Handles int $0x80 */ -__visible void do_int80_syscall_32(struct pt_regs *regs) +__visible noinstr void do_int80_syscall_32(struct pt_regs *regs) { enter_from_user_mode(); + instrumentation_begin(); + local_irq_enable(); do_syscall_32_irqs_on(regs); + + instrumentation_end(); + exit_to_user_mode(); +} + +static bool __do_fast_syscall_32(struct pt_regs *regs) +{ + int res; + + /* Fetch EBP from where the vDSO stashed it. */ + if (IS_ENABLED(CONFIG_X86_64)) { + /* + * Micro-optimization: the pointer we're following is + * explicitly 32 bits, so it can't be out of range. + */ + res = __get_user(*(u32 *)®s->bp, + (u32 __user __force *)(unsigned long)(u32)regs->sp); + } else { + res = get_user(*(u32 *)®s->bp, + (u32 __user __force *)(unsigned long)(u32)regs->sp); + } + + if (res) { + /* User code screwed up. */ + regs->ax = -EFAULT; + local_irq_disable(); + __prepare_exit_to_usermode(regs); + return false; + } + + /* Now this is just like a normal syscall. */ + do_syscall_32_irqs_on(regs); + return true; } /* Returns 0 to return using IRET or 1 to return using SYSEXIT/SYSRETL. */ -__visible long do_fast_syscall_32(struct pt_regs *regs) +__visible noinstr long do_fast_syscall_32(struct pt_regs *regs) { /* * Called using the internal vDSO SYSENTER/SYSCALL32 calling * convention. Adjust regs so it looks like we entered using int80. */ - unsigned long landing_pad = (unsigned long)current->mm->context.vdso + - vdso_image_32.sym_int80_landing_pad; + vdso_image_32.sym_int80_landing_pad; + bool success; /* * SYSENTER loses EIP, and even SYSCALL32 needs us to skip forward @@ -367,33 +428,17 @@ __visible long do_fast_syscall_32(struct pt_regs *regs) regs->ip = landing_pad; enter_from_user_mode(); + instrumentation_begin(); local_irq_enable(); + success = __do_fast_syscall_32(regs); - /* Fetch EBP from where the vDSO stashed it. */ - if ( -#ifdef CONFIG_X86_64 - /* - * Micro-optimization: the pointer we're following is explicitly - * 32 bits, so it can't be out of range. - */ - __get_user(*(u32 *)®s->bp, - (u32 __user __force *)(unsigned long)(u32)regs->sp) -#else - get_user(*(u32 *)®s->bp, - (u32 __user __force *)(unsigned long)(u32)regs->sp) -#endif - ) { - - /* User code screwed up. */ - local_irq_disable(); - regs->ax = -EFAULT; - prepare_exit_to_usermode(regs); - return 0; /* Keep it simple: use IRET. */ - } + instrumentation_end(); + exit_to_user_mode(); - /* Now this is just like a normal syscall. */ - do_syscall_32_irqs_on(regs); + /* If it failed, keep it simple: use IRET. */ + if (!success) + return 0; #ifdef CONFIG_X86_64 /* -- cgit v1.2.3 From dd8e2d9ae64fa4348530df3e45e9f874d807a1c2 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Tue, 25 Feb 2020 23:08:05 +0100 Subject: x86/entry: Move irq tracing on syscall entry to C-code Now that the C entry points are safe, move the irq flags tracing code into the entry helper: - Invoke lockdep before calling into context tracking - Use the safe trace_hardirqs_on_prepare() trace function after context tracking established state and RCU is watching. enter_from_user_mode() is also still invoked from the exception/interrupt entry code which still contains the ASM irq flags tracing. So this is just a redundant and harmless invocation of tracing / lockdep until these are removed as well. Signed-off-by: Thomas Gleixner Reviewed-by: Alexandre Chartre Acked-by: Peter Zijlstra Link: https://lkml.kernel.org/r/20200505134340.611961721@linutronix.de --- arch/x86/entry/common.c | 21 +++++++++++++++++++-- arch/x86/entry/entry_32.S | 12 ------------ arch/x86/entry/entry_64.S | 2 -- arch/x86/entry/entry_64_compat.S | 18 ------------------ 4 files changed, 19 insertions(+), 34 deletions(-) (limited to 'arch') diff --git a/arch/x86/entry/common.c b/arch/x86/entry/common.c index 9892fb7c9d44..7473c1297a84 100644 --- a/arch/x86/entry/common.c +++ b/arch/x86/entry/common.c @@ -40,19 +40,36 @@ #include #ifdef CONFIG_CONTEXT_TRACKING -/* Called on entry from user mode with IRQs off. */ +/** + * enter_from_user_mode - Establish state when coming from user mode + * + * Syscall entry disables interrupts, but user mode is traced as interrupts + * enabled. Also with NO_HZ_FULL RCU might be idle. + * + * 1) Tell lockdep that interrupts are disabled + * 2) Invoke context tracking if enabled to reactivate RCU + * 3) Trace interrupts off state + */ __visible noinstr void enter_from_user_mode(void) { enum ctx_state state = ct_state(); + lockdep_hardirqs_off(CALLER_ADDR0); user_exit_irqoff(); instrumentation_begin(); CT_WARN_ON(state != CONTEXT_USER); + trace_hardirqs_off_prepare(); instrumentation_end(); } #else -static inline void enter_from_user_mode(void) {} +static __always_inline void enter_from_user_mode(void) +{ + lockdep_hardirqs_off(CALLER_ADDR0); + instrumentation_begin(); + trace_hardirqs_off_prepare(); + instrumentation_end(); +} #endif static noinstr void exit_to_user_mode(void) diff --git a/arch/x86/entry/entry_32.S b/arch/x86/entry/entry_32.S index bf0082bb07a1..65704e02e1e2 100644 --- a/arch/x86/entry/entry_32.S +++ b/arch/x86/entry/entry_32.S @@ -967,12 +967,6 @@ SYM_FUNC_START(entry_SYSENTER_32) jnz .Lsysenter_fix_flags .Lsysenter_flags_fixed: - /* - * User mode is traced as though IRQs are on, and SYSENTER - * turned them off. - */ - TRACE_IRQS_OFF - movl %esp, %eax call do_fast_syscall_32 /* XEN PV guests always use IRET path */ @@ -1082,12 +1076,6 @@ SYM_FUNC_START(entry_INT80_32) SAVE_ALL pt_regs_ax=$-ENOSYS switch_stacks=1 /* save rest */ - /* - * User mode is traced as though IRQs are on, and the interrupt gate - * turned them off. - */ - TRACE_IRQS_OFF - movl %esp, %eax call do_int80_syscall_32 .Lsyscall_32_done: diff --git a/arch/x86/entry/entry_64.S b/arch/x86/entry/entry_64.S index b199f43cff28..9e34fe849a4e 100644 --- a/arch/x86/entry/entry_64.S +++ b/arch/x86/entry/entry_64.S @@ -167,8 +167,6 @@ SYM_INNER_LABEL(entry_SYSCALL_64_after_hwframe, SYM_L_GLOBAL) PUSH_AND_CLEAR_REGS rax=$-ENOSYS - TRACE_IRQS_OFF - /* IRQs are off. */ movq %rax, %rdi movq %rsp, %rsi diff --git a/arch/x86/entry/entry_64_compat.S b/arch/x86/entry/entry_64_compat.S index f1d3ccae5dd5..e2e8bd77dc27 100644 --- a/arch/x86/entry/entry_64_compat.S +++ b/arch/x86/entry/entry_64_compat.S @@ -129,12 +129,6 @@ SYM_FUNC_START(entry_SYSENTER_compat) jnz .Lsysenter_fix_flags .Lsysenter_flags_fixed: - /* - * User mode is traced as though IRQs are on, and SYSENTER - * turned them off. - */ - TRACE_IRQS_OFF - movq %rsp, %rdi call do_fast_syscall_32 /* XEN PV guests always use IRET path */ @@ -247,12 +241,6 @@ SYM_INNER_LABEL(entry_SYSCALL_compat_after_hwframe, SYM_L_GLOBAL) pushq $0 /* pt_regs->r15 = 0 */ xorl %r15d, %r15d /* nospec r15 */ - /* - * User mode is traced as though IRQs are on, and SYSENTER - * turned them off. - */ - TRACE_IRQS_OFF - movq %rsp, %rdi call do_fast_syscall_32 /* XEN PV guests always use IRET path */ @@ -403,12 +391,6 @@ SYM_CODE_START(entry_INT80_compat) xorl %r15d, %r15d /* nospec r15 */ cld - /* - * User mode is traced as though IRQs are on, and the interrupt - * gate turned them off. - */ - TRACE_IRQS_OFF - movq %rsp, %rdi call do_int80_syscall_32 .Lsyscall_32_done: -- cgit v1.2.3 From 4983e5d74c821780d518232eea4acdc4a8f0b44d Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Wed, 4 Mar 2020 12:51:59 +0100 Subject: x86/entry: Move irq flags tracing to prepare_exit_to_usermode() This is another step towards more C-code and less convoluted ASM. Similar to the entry path, invoke the tracer before context tracking which might turn off RCU and invoke lockdep as the last step before going back to user space. Annotate the code sections in exit_to_user_mode() accordingly so objtool won't complain about the tracer invocation. Signed-off-by: Thomas Gleixner Reviewed-by: Alexandre Chartre Acked-by: Peter Zijlstra Acked-by: Andy Lutomirski Link: https://lkml.kernel.org/r/20200505134340.703783926@linutronix.de --- arch/x86/entry/common.c | 19 ++++++++++++++++++- arch/x86/entry/entry_32.S | 12 ++++-------- arch/x86/entry/entry_64.S | 4 ---- arch/x86/entry/entry_64_compat.S | 14 +++++--------- 4 files changed, 27 insertions(+), 22 deletions(-) (limited to 'arch') diff --git a/arch/x86/entry/common.c b/arch/x86/entry/common.c index 7473c1297a84..e4f9f5f2c21b 100644 --- a/arch/x86/entry/common.c +++ b/arch/x86/entry/common.c @@ -72,10 +72,27 @@ static __always_inline void enter_from_user_mode(void) } #endif -static noinstr void exit_to_user_mode(void) +/** + * exit_to_user_mode - Fixup state when exiting to user mode + * + * Syscall exit enables interrupts, but the kernel state is interrupts + * disabled when this is invoked. Also tell RCU about it. + * + * 1) Trace interrupts on state + * 2) Invoke context tracking if enabled to adjust RCU state + * 3) Clear CPU buffers if CPU is affected by MDS and the migitation is on. + * 4) Tell lockdep that interrupts are enabled + */ +static __always_inline void exit_to_user_mode(void) { + instrumentation_begin(); + trace_hardirqs_on_prepare(); + lockdep_hardirqs_on_prepare(CALLER_ADDR0); + instrumentation_end(); + user_enter_irqoff(); mds_user_clear_cpu_buffers(); + lockdep_hardirqs_on(CALLER_ADDR0); } static void do_audit_syscall_entry(struct pt_regs *regs, u32 arch) diff --git a/arch/x86/entry/entry_32.S b/arch/x86/entry/entry_32.S index 65704e02e1e2..d9da0b7f38ff 100644 --- a/arch/x86/entry/entry_32.S +++ b/arch/x86/entry/entry_32.S @@ -817,8 +817,7 @@ SYM_CODE_START(ret_from_fork) /* When we fork, we trace the syscall return in the child, too. */ movl %esp, %eax call syscall_return_slowpath - STACKLEAK_ERASE - jmp restore_all + jmp .Lsyscall_32_done /* kernel thread */ 1: movl %edi, %eax @@ -862,7 +861,7 @@ ret_from_intr: TRACE_IRQS_OFF movl %esp, %eax call prepare_exit_to_usermode - jmp restore_all + jmp restore_all_switch_stack SYM_CODE_END(ret_from_exception) SYM_ENTRY(__begin_SYSENTER_singlestep_region, SYM_L_GLOBAL, SYM_A_NONE) @@ -975,8 +974,7 @@ SYM_FUNC_START(entry_SYSENTER_32) STACKLEAK_ERASE -/* Opportunistic SYSEXIT */ - TRACE_IRQS_ON /* User mode traces as IRQs on. */ + /* Opportunistic SYSEXIT */ /* * Setup entry stack - we keep the pointer in %eax and do the @@ -1079,11 +1077,9 @@ SYM_FUNC_START(entry_INT80_32) movl %esp, %eax call do_int80_syscall_32 .Lsyscall_32_done: - STACKLEAK_ERASE -restore_all: - TRACE_IRQS_ON +restore_all_switch_stack: SWITCH_TO_ENTRY_STACK CHECK_AND_APPLY_ESPFIX diff --git a/arch/x86/entry/entry_64.S b/arch/x86/entry/entry_64.S index 9e34fe849a4e..9866b54d9acc 100644 --- a/arch/x86/entry/entry_64.S +++ b/arch/x86/entry/entry_64.S @@ -172,8 +172,6 @@ SYM_INNER_LABEL(entry_SYSCALL_64_after_hwframe, SYM_L_GLOBAL) movq %rsp, %rsi call do_syscall_64 /* returns with IRQs disabled */ - TRACE_IRQS_ON /* return enables interrupts */ - /* * Try to use SYSRET instead of IRET if we're returning to * a completely clean 64-bit userspace context. If we're not, @@ -342,7 +340,6 @@ SYM_CODE_START(ret_from_fork) UNWIND_HINT_REGS movq %rsp, %rdi call syscall_return_slowpath /* returns with IRQs disabled */ - TRACE_IRQS_ON /* user mode is traced as IRQS on */ jmp swapgs_restore_regs_and_return_to_usermode 1: @@ -620,7 +617,6 @@ ret_from_intr: .Lretint_user: mov %rsp,%rdi call prepare_exit_to_usermode - TRACE_IRQS_ON SYM_INNER_LABEL(swapgs_restore_regs_and_return_to_usermode, SYM_L_GLOBAL) #ifdef CONFIG_DEBUG_ENTRY diff --git a/arch/x86/entry/entry_64_compat.S b/arch/x86/entry/entry_64_compat.S index e2e8bd77dc27..7c29ed898033 100644 --- a/arch/x86/entry/entry_64_compat.S +++ b/arch/x86/entry/entry_64_compat.S @@ -132,8 +132,8 @@ SYM_FUNC_START(entry_SYSENTER_compat) movq %rsp, %rdi call do_fast_syscall_32 /* XEN PV guests always use IRET path */ - ALTERNATIVE "testl %eax, %eax; jz .Lsyscall_32_done", \ - "jmp .Lsyscall_32_done", X86_FEATURE_XENPV + ALTERNATIVE "testl %eax, %eax; jz swapgs_restore_regs_and_return_to_usermode", \ + "jmp swapgs_restore_regs_and_return_to_usermode", X86_FEATURE_XENPV jmp sysret32_from_system_call .Lsysenter_fix_flags: @@ -244,8 +244,8 @@ SYM_INNER_LABEL(entry_SYSCALL_compat_after_hwframe, SYM_L_GLOBAL) movq %rsp, %rdi call do_fast_syscall_32 /* XEN PV guests always use IRET path */ - ALTERNATIVE "testl %eax, %eax; jz .Lsyscall_32_done", \ - "jmp .Lsyscall_32_done", X86_FEATURE_XENPV + ALTERNATIVE "testl %eax, %eax; jz swapgs_restore_regs_and_return_to_usermode", \ + "jmp swapgs_restore_regs_and_return_to_usermode", X86_FEATURE_XENPV /* Opportunistic SYSRET */ sysret32_from_system_call: @@ -254,7 +254,7 @@ sysret32_from_system_call: * stack. So let's erase the thread stack right now. */ STACKLEAK_ERASE - TRACE_IRQS_ON /* User mode traces as IRQs on. */ + movq RBX(%rsp), %rbx /* pt_regs->rbx */ movq RBP(%rsp), %rbp /* pt_regs->rbp */ movq EFLAGS(%rsp), %r11 /* pt_regs->flags (in r11) */ @@ -393,9 +393,5 @@ SYM_CODE_START(entry_INT80_compat) movq %rsp, %rdi call do_int80_syscall_32 -.Lsyscall_32_done: - - /* Go back to user mode. */ - TRACE_IRQS_ON jmp swapgs_restore_regs_and_return_to_usermode SYM_CODE_END(entry_INT80_compat) -- cgit v1.2.3 From a7ef9ba986b5fae9d80f8a7b31db0423687efe4e Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Wed, 4 Mar 2020 12:49:18 +0100 Subject: x86/speculation/mds: Mark mds_user_clear_cpu_buffers() __always_inline Prevent the compiler from uninlining and creating traceable/probable functions as this is invoked _after_ context tracking switched to CONTEXT_USER and rcu idle. Signed-off-by: Thomas Gleixner Reviewed-by: Alexandre Chartre Acked-by: Peter Zijlstra Link: https://lkml.kernel.org/r/20200505134340.902709267@linutronix.de --- arch/x86/include/asm/nospec-branch.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'arch') diff --git a/arch/x86/include/asm/nospec-branch.h b/arch/x86/include/asm/nospec-branch.h index d52d1aacdd97..e7752b4038ff 100644 --- a/arch/x86/include/asm/nospec-branch.h +++ b/arch/x86/include/asm/nospec-branch.h @@ -262,7 +262,7 @@ DECLARE_STATIC_KEY_FALSE(mds_idle_clear); * combination with microcode which triggers a CPU buffer flush when the * instruction is executed. */ -static inline void mds_clear_cpu_buffers(void) +static __always_inline void mds_clear_cpu_buffers(void) { static const u16 ds = __KERNEL_DS; @@ -283,7 +283,7 @@ static inline void mds_clear_cpu_buffers(void) * * Clear CPU buffers if the corresponding static key is enabled */ -static inline void mds_user_clear_cpu_buffers(void) +static __always_inline void mds_user_clear_cpu_buffers(void) { if (static_branch_likely(&mds_user_clear)) mds_clear_cpu_buffers(); -- cgit v1.2.3 From 1c3e5d3f60e26415d4227aa1193cf9e2db4df834 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Tue, 12 May 2020 18:17:12 +0200 Subject: x86/entry: Make entry_64_compat.S objtool clean Currently entry_64_compat is exempt from objtool, but with vmlinux mode there is no hiding it. Make the following changes to make it pass: - change entry_SYSENTER_compat to STT_NOTYPE; it's not a function and doesn't have function type stack setup. - mark all STT_NOTYPE symbols with UNWIND_HINT_EMPTY; so we do validate them and don't treat them as unreachable. - don't abuse RSP as a temp register, this confuses objtool mightily as it (rightfully) thinks we're doing unspeakable things to the stack. Signed-off-by: Peter Zijlstra (Intel) Signed-off-by: Thomas Gleixner Reviewed-by: Alexandre Chartre Acked-by: Andy Lutomirski Link: https://lkml.kernel.org/r/20200505134341.272248024@linutronix.de --- arch/x86/entry/Makefile | 2 -- arch/x86/entry/entry_64_compat.S | 25 ++++++++++++++++++++----- 2 files changed, 20 insertions(+), 7 deletions(-) (limited to 'arch') diff --git a/arch/x86/entry/Makefile b/arch/x86/entry/Makefile index cdf45ff92dc8..b7a5790d8d63 100644 --- a/arch/x86/entry/Makefile +++ b/arch/x86/entry/Makefile @@ -11,8 +11,6 @@ CFLAGS_REMOVE_common.o = $(CC_FLAGS_FTRACE) -fstack-protector -fstack-protector- CFLAGS_REMOVE_syscall_32.o = $(CC_FLAGS_FTRACE) -fstack-protector -fstack-protector-strong CFLAGS_REMOVE_syscall_64.o = $(CC_FLAGS_FTRACE) -fstack-protector -fstack-protector-strong -OBJECT_FILES_NON_STANDARD_entry_64_compat.o := y - CFLAGS_syscall_64.o += $(call cc-option,-Wno-override-init,) CFLAGS_syscall_32.o += $(call cc-option,-Wno-override-init,) obj-y := entry_$(BITS).o thunk_$(BITS).o syscall_$(BITS).o diff --git a/arch/x86/entry/entry_64_compat.S b/arch/x86/entry/entry_64_compat.S index 7c29ed898033..0f974ae01e62 100644 --- a/arch/x86/entry/entry_64_compat.S +++ b/arch/x86/entry/entry_64_compat.S @@ -46,12 +46,14 @@ * ebp user stack * 0(%ebp) arg6 */ -SYM_FUNC_START(entry_SYSENTER_compat) +SYM_CODE_START(entry_SYSENTER_compat) + UNWIND_HINT_EMPTY /* Interrupts are off on entry. */ SWAPGS - /* We are about to clobber %rsp anyway, clobbering here is OK */ - SWITCH_TO_KERNEL_CR3 scratch_reg=%rsp + pushq %rax + SWITCH_TO_KERNEL_CR3 scratch_reg=%rax + popq %rax movq PER_CPU_VAR(cpu_current_top_of_stack), %rsp @@ -104,6 +106,9 @@ SYM_FUNC_START(entry_SYSENTER_compat) xorl %r14d, %r14d /* nospec r14 */ pushq $0 /* pt_regs->r15 = 0 */ xorl %r15d, %r15d /* nospec r15 */ + + UNWIND_HINT_REGS + cld /* @@ -141,7 +146,7 @@ SYM_FUNC_START(entry_SYSENTER_compat) popfq jmp .Lsysenter_flags_fixed SYM_INNER_LABEL(__end_entry_SYSENTER_compat, SYM_L_GLOBAL) -SYM_FUNC_END(entry_SYSENTER_compat) +SYM_CODE_END(entry_SYSENTER_compat) /* * 32-bit SYSCALL entry. @@ -191,6 +196,7 @@ SYM_FUNC_END(entry_SYSENTER_compat) * 0(%esp) arg6 */ SYM_CODE_START(entry_SYSCALL_compat) + UNWIND_HINT_EMPTY /* Interrupts are off on entry. */ swapgs @@ -241,6 +247,8 @@ SYM_INNER_LABEL(entry_SYSCALL_compat_after_hwframe, SYM_L_GLOBAL) pushq $0 /* pt_regs->r15 = 0 */ xorl %r15d, %r15d /* nospec r15 */ + UNWIND_HINT_REGS + movq %rsp, %rdi call do_fast_syscall_32 /* XEN PV guests always use IRET path */ @@ -328,6 +336,7 @@ SYM_CODE_END(entry_SYSCALL_compat) * ebp arg6 */ SYM_CODE_START(entry_INT80_compat) + UNWIND_HINT_EMPTY /* * Interrupts are off on entry. */ @@ -349,8 +358,11 @@ SYM_CODE_START(entry_INT80_compat) /* Need to switch before accessing the thread stack. */ SWITCH_TO_KERNEL_CR3 scratch_reg=%rdi + /* In the Xen PV case we already run on the thread stack. */ - ALTERNATIVE "movq %rsp, %rdi", "jmp .Lint80_keep_stack", X86_FEATURE_XENPV + ALTERNATIVE "", "jmp .Lint80_keep_stack", X86_FEATURE_XENPV + + movq %rsp, %rdi movq PER_CPU_VAR(cpu_current_top_of_stack), %rsp pushq 6*8(%rdi) /* regs->ss */ @@ -389,6 +401,9 @@ SYM_CODE_START(entry_INT80_compat) xorl %r14d, %r14d /* nospec r14 */ pushq %r15 /* pt_regs->r15 */ xorl %r15d, %r15d /* nospec r15 */ + + UNWIND_HINT_REGS + cld movq %rsp, %rdi -- cgit v1.2.3 From d73a332936a6d33be3aa3fa4bee959efab09e431 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Wed, 25 Mar 2020 19:53:38 +0100 Subject: x86/traps: Mark fixup_bad_iret() noinstr This is called from deep entry ASM in a situation where instrumentation will cause more harm than providing useful information. Switch from memmove() to memcpy() because memmove() can't be called from noinstr code. Signed-off-by: Thomas Gleixner Reviewed-by: Alexandre Chartre Reviewed-by: Masami Hiramatsu Acked-by: Peter Zijlstra Acked-by: Andy Lutomirski Link: https://lkml.kernel.org/r/20200505134903.346741553@linutronix.de --- arch/x86/kernel/traps.c | 16 +++++++++------- 1 file changed, 9 insertions(+), 7 deletions(-) (limited to 'arch') diff --git a/arch/x86/kernel/traps.c b/arch/x86/kernel/traps.c index 48468f61202c..b2b36561a569 100644 --- a/arch/x86/kernel/traps.c +++ b/arch/x86/kernel/traps.c @@ -578,7 +578,7 @@ struct bad_iret_stack { struct pt_regs regs; }; -asmlinkage __visible notrace +asmlinkage __visible noinstr struct bad_iret_stack *fixup_bad_iret(struct bad_iret_stack *s) { /* @@ -589,19 +589,21 @@ struct bad_iret_stack *fixup_bad_iret(struct bad_iret_stack *s) * just below the IRET frame) and we want to pretend that the * exception came from the IRET target. */ - struct bad_iret_stack *new_stack = - (struct bad_iret_stack *)this_cpu_read(cpu_tss_rw.x86_tss.sp0) - 1; + struct bad_iret_stack tmp, *new_stack = + (struct bad_iret_stack *)__this_cpu_read(cpu_tss_rw.x86_tss.sp0) - 1; - /* Copy the IRET target to the new stack. */ - memmove(&new_stack->regs.ip, (void *)s->regs.sp, 5*8); + /* Copy the IRET target to the temporary storage. */ + memcpy(&tmp.regs.ip, (void *)s->regs.sp, 5*8); /* Copy the remainder of the stack from the current stack. */ - memmove(new_stack, s, offsetof(struct bad_iret_stack, regs.ip)); + memcpy(&tmp, s, offsetof(struct bad_iret_stack, regs.ip)); + + /* Update the entry stack */ + memcpy(new_stack, &tmp, sizeof(tmp)); BUG_ON(!user_mode(&new_stack->regs)); return new_stack; } -NOKPROBE_SYMBOL(fixup_bad_iret); #endif static bool is_sysenter_singlestep(struct pt_regs *regs) -- cgit v1.2.3 From daf7a69787b587d454adea73377a904e09fd54a9 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Wed, 25 Mar 2020 23:47:51 +0100 Subject: x86/traps: Mark sync_regs() noinstr Replace the notrace and NOKPROBE annotations with noinstr. Signed-off-by: Thomas Gleixner Reviewed-by: Alexandre Chartre Reviewed-by: Masami Hiramatsu Acked-by: Peter Zijlstra Acked-by: Andy Lutomirski Link: https://lkml.kernel.org/r/20200505134903.439765290@linutronix.de --- arch/x86/kernel/traps.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'arch') diff --git a/arch/x86/kernel/traps.c b/arch/x86/kernel/traps.c index b2b36561a569..adcc62380ece 100644 --- a/arch/x86/kernel/traps.c +++ b/arch/x86/kernel/traps.c @@ -564,14 +564,13 @@ NOKPROBE_SYMBOL(do_int3); * to switch to the normal thread stack if the interrupted code was in * user mode. The actual stack switch is done in entry_64.S */ -asmlinkage __visible notrace struct pt_regs *sync_regs(struct pt_regs *eregs) +asmlinkage __visible noinstr struct pt_regs *sync_regs(struct pt_regs *eregs) { struct pt_regs *regs = (struct pt_regs *)this_cpu_read(cpu_current_top_of_stack) - 1; if (regs != eregs) *regs = *eregs; return regs; } -NOKPROBE_SYMBOL(sync_regs); struct bad_iret_stack { void *error_entry_ret; -- cgit v1.2.3 From 410367e321b5cbd4a616161142a7d162cf55885e Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Wed, 4 Mar 2020 23:32:15 +0100 Subject: x86/entry: Disable interrupts for native_load_gs_index() in C code There is absolutely no point in doing this in ASM code. Move it to C. Signed-off-by: Thomas Gleixner Reviewed-by: Alexandre Chartre Acked-by: Peter Zijlstra Acked-by: Andy Lutomirski Link: https://lkml.kernel.org/r/20200505134903.531534675@linutronix.de --- arch/x86/entry/entry_64.S | 11 +++-------- arch/x86/include/asm/special_insns.h | 14 ++++++++++++-- 2 files changed, 15 insertions(+), 10 deletions(-) (limited to 'arch') diff --git a/arch/x86/entry/entry_64.S b/arch/x86/entry/entry_64.S index 9866b54d9acc..be8ed3a84caf 100644 --- a/arch/x86/entry/entry_64.S +++ b/arch/x86/entry/entry_64.S @@ -1041,22 +1041,17 @@ idtentry simd_coprocessor_error do_simd_coprocessor_error has_error_code=0 * * Is in entry.text as it shouldn't be instrumented. */ -SYM_FUNC_START(native_load_gs_index) +SYM_FUNC_START(asm_load_gs_index) FRAME_BEGIN - pushfq - DISABLE_INTERRUPTS(CLBR_ANY & ~CLBR_RDI) - TRACE_IRQS_OFF SWAPGS .Lgs_change: movl %edi, %gs 2: ALTERNATIVE "", "mfence", X86_BUG_SWAPGS_FENCE SWAPGS - TRACE_IRQS_FLAGS (%rsp) - popfq FRAME_END ret -SYM_FUNC_END(native_load_gs_index) -EXPORT_SYMBOL(native_load_gs_index) +SYM_FUNC_END(asm_load_gs_index) +EXPORT_SYMBOL(asm_load_gs_index) _ASM_EXTABLE(.Lgs_change, .Lbad_gs) .section .fixup, "ax" diff --git a/arch/x86/include/asm/special_insns.h b/arch/x86/include/asm/special_insns.h index 6d37b8fcfc77..82436cb04ccf 100644 --- a/arch/x86/include/asm/special_insns.h +++ b/arch/x86/include/asm/special_insns.h @@ -7,6 +7,7 @@ #include #include +#include #include /* @@ -129,7 +130,16 @@ static inline void native_wbinvd(void) asm volatile("wbinvd": : :"memory"); } -extern asmlinkage void native_load_gs_index(unsigned); +extern asmlinkage void asm_load_gs_index(unsigned int selector); + +static inline void native_load_gs_index(unsigned int selector) +{ + unsigned long flags; + + local_irq_save(flags); + asm_load_gs_index(selector); + local_irq_restore(flags); +} static inline unsigned long __read_cr4(void) { @@ -186,7 +196,7 @@ static inline void wbinvd(void) #ifdef CONFIG_X86_64 -static inline void load_gs_index(unsigned selector) +static inline void load_gs_index(unsigned int selector) { native_load_gs_index(selector); } -- cgit v1.2.3 From c9317202af70ee03d44fdd68abebdb640b8ab411 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Tue, 12 May 2020 14:54:14 +0200 Subject: x86/entry/64: Use native swapgs in asm_load_gs_index() When PARAVIRT_XXL is in use, then load_gs_index() uses xen_load_gs_index() and asm_load_gs_index() is unused. It's therefore pointless to use the paravirtualized SWAPGS implementation in asm_load_gs_index(). Switch it to a plain swapgs. Signed-off-by: Thomas Gleixner Reviewed-by: Steven Rostedt (VMware) Acked-by: Juergen Gross Acked-by: Peter Zijlstra Link: https://lkml.kernel.org/r/20200512213809.583980272@linutronix.de --- arch/x86/entry/entry_64.S | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'arch') diff --git a/arch/x86/entry/entry_64.S b/arch/x86/entry/entry_64.S index be8ed3a84caf..9747b42fedd5 100644 --- a/arch/x86/entry/entry_64.S +++ b/arch/x86/entry/entry_64.S @@ -1043,11 +1043,11 @@ idtentry simd_coprocessor_error do_simd_coprocessor_error has_error_code=0 */ SYM_FUNC_START(asm_load_gs_index) FRAME_BEGIN - SWAPGS + swapgs .Lgs_change: movl %edi, %gs 2: ALTERNATIVE "", "mfence", X86_BUG_SWAPGS_FENCE - SWAPGS + swapgs FRAME_END ret SYM_FUNC_END(asm_load_gs_index) @@ -1057,7 +1057,7 @@ EXPORT_SYMBOL(asm_load_gs_index) .section .fixup, "ax" /* running with kernelgs */ SYM_CODE_START_LOCAL_NOALIGN(.Lbad_gs) - SWAPGS /* switch back to user gs */ + swapgs /* switch back to user gs */ .macro ZAP_GS /* This can't be a string because the preprocessor needs to see it. */ movl $__USER_DS, %eax -- cgit v1.2.3 From ca4c6a9858c23b4f330113f391f2eadc983e780f Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Wed, 23 Oct 2019 14:27:10 +0200 Subject: x86/traps: Make interrupt enable/disable symmetric in C code Traps enable interrupts conditionally but rely on the ASM return code to disable them again. That results in redundant interrupt disable and trace calls. Make the trap handlers disable interrupts before returning to avoid that, which allows simplification of the ASM entry code in follow up changes. Originally-by: Peter Zijlstra Signed-off-by: Thomas Gleixner Reviewed-by: Alexandre Chartre Acked-by: Peter Zijlstra Acked-by: Andy Lutomirski Link: https://lkml.kernel.org/r/20200505134903.622702796@linutronix.de --- arch/x86/kernel/traps.c | 28 +++++++++++++++++++--------- arch/x86/mm/fault.c | 15 +++++++++++++-- 2 files changed, 32 insertions(+), 11 deletions(-) (limited to 'arch') diff --git a/arch/x86/kernel/traps.c b/arch/x86/kernel/traps.c index adcc62380ece..f5f4a76fb516 100644 --- a/arch/x86/kernel/traps.c +++ b/arch/x86/kernel/traps.c @@ -201,6 +201,7 @@ static void do_error_trap(struct pt_regs *regs, long error_code, char *str, NOTIFY_STOP) { cond_local_irq_enable(regs); do_trap(trapnr, signr, str, regs, error_code, sicode, addr); + cond_local_irq_disable(regs); } } @@ -397,6 +398,8 @@ dotraplinkage void do_bounds(struct pt_regs *regs, long error_code) die("bounds", regs, error_code); do_trap(X86_TRAP_BR, SIGSEGV, "bounds", regs, error_code, 0, NULL); + + cond_local_irq_disable(regs); } enum kernel_gp_hint { @@ -456,12 +459,13 @@ dotraplinkage void do_general_protection(struct pt_regs *regs, long error_code) if (static_cpu_has(X86_FEATURE_UMIP)) { if (user_mode(regs) && fixup_umip_exception(regs)) - return; + goto exit; } if (v8086_mode(regs)) { local_irq_enable(); handle_vm86_fault((struct kernel_vm86_regs *) regs, error_code); + local_irq_disable(); return; } @@ -473,12 +477,11 @@ dotraplinkage void do_general_protection(struct pt_regs *regs, long error_code) show_signal(tsk, SIGSEGV, "", desc, regs, error_code); force_sig(SIGSEGV); - - return; + goto exit; } if (fixup_exception(regs, X86_TRAP_GP, error_code, 0)) - return; + goto exit; tsk->thread.error_code = error_code; tsk->thread.trap_nr = X86_TRAP_GP; @@ -490,11 +493,11 @@ dotraplinkage void do_general_protection(struct pt_regs *regs, long error_code) if (!preemptible() && kprobe_running() && kprobe_fault_handler(regs, X86_TRAP_GP)) - return; + goto exit; ret = notify_die(DIE_GPF, desc, regs, error_code, X86_TRAP_GP, SIGSEGV); if (ret == NOTIFY_STOP) - return; + goto exit; if (error_code) snprintf(desc, sizeof(desc), "segment-related " GPFSTR); @@ -516,6 +519,8 @@ dotraplinkage void do_general_protection(struct pt_regs *regs, long error_code) die_addr(desc, regs, error_code, gp_addr); +exit: + cond_local_irq_disable(regs); } NOKPROBE_SYMBOL(do_general_protection); @@ -773,7 +778,7 @@ static void math_error(struct pt_regs *regs, int error_code, int trapnr) if (!user_mode(regs)) { if (fixup_exception(regs, trapnr, error_code, 0)) - return; + goto exit; task->thread.error_code = error_code; task->thread.trap_nr = trapnr; @@ -781,7 +786,7 @@ static void math_error(struct pt_regs *regs, int error_code, int trapnr) if (notify_die(DIE_TRAP, str, regs, error_code, trapnr, SIGFPE) != NOTIFY_STOP) die(str, regs, error_code); - return; + goto exit; } /* @@ -795,10 +800,12 @@ static void math_error(struct pt_regs *regs, int error_code, int trapnr) si_code = fpu__exception_code(fpu, trapnr); /* Retry when we get spurious exceptions: */ if (!si_code) - return; + goto exit; force_sig_fault(SIGFPE, si_code, (void __user *)uprobe_get_trap_addr(regs)); +exit: + cond_local_irq_disable(regs); } dotraplinkage void do_coprocessor_error(struct pt_regs *regs, long error_code) @@ -853,6 +860,8 @@ do_device_not_available(struct pt_regs *regs, long error_code) info.regs = regs; math_emulate(&info); + + cond_local_irq_disable(regs); return; } #endif @@ -883,6 +892,7 @@ dotraplinkage void do_iret_error(struct pt_regs *regs, long error_code) do_trap(X86_TRAP_IRET, SIGILL, "iret exception", regs, error_code, ILL_BADSTK, (void __user *)NULL); } + local_irq_disable(); } #endif diff --git a/arch/x86/mm/fault.c b/arch/x86/mm/fault.c index 0b03ae8c39cd..53db18615f31 100644 --- a/arch/x86/mm/fault.c +++ b/arch/x86/mm/fault.c @@ -786,6 +786,8 @@ __bad_area_nosemaphore(struct pt_regs *regs, unsigned long error_code, force_sig_fault(SIGSEGV, si_code, (void __user *)address); + local_irq_disable(); + return; } @@ -1384,9 +1386,18 @@ do_page_fault(struct pt_regs *regs, unsigned long hw_error_code, return; /* Was the fault on kernel-controlled part of the address space? */ - if (unlikely(fault_in_kernel_space(address))) + if (unlikely(fault_in_kernel_space(address))) { do_kern_addr_fault(regs, hw_error_code, address); - else + } else { do_user_addr_fault(regs, hw_error_code, address); + /* + * User address page fault handling might have reenabled + * interrupts. Fixing up all potential exit points of + * do_user_addr_fault() and its leaf functions is just not + * doable w/o creating an unholy mess or turning the code + * upside down. + */ + local_irq_disable(); + } } NOKPROBE_SYMBOL(do_page_fault); -- cgit v1.2.3 From 877f183f83cc33b1b09313b9c18ab7ee5abda44f Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Tue, 25 Feb 2020 23:16:07 +0100 Subject: x86/traps: Split trap numbers out in a separate header So they can be used in ASM code. For this it is also necessary to convert them to defines. Will be used for the rework of the entry code. Signed-off-by: Thomas Gleixner Reviewed-by: Andy Lutomirski Reviewed-by: Alexandre Chartre Acked-by: Peter Zijlstra Link: https://lkml.kernel.org/r/20200505134903.731004084@linutronix.de --- arch/x86/include/asm/trapnr.h | 31 +++++++++++++++++++++++++++++++ arch/x86/include/asm/traps.h | 26 +------------------------- 2 files changed, 32 insertions(+), 25 deletions(-) create mode 100644 arch/x86/include/asm/trapnr.h (limited to 'arch') diff --git a/arch/x86/include/asm/trapnr.h b/arch/x86/include/asm/trapnr.h new file mode 100644 index 000000000000..082f45631fa9 --- /dev/null +++ b/arch/x86/include/asm/trapnr.h @@ -0,0 +1,31 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _ASM_X86_TRAPNR_H +#define _ASM_X86_TRAPNR_H + +/* Interrupts/Exceptions */ + +#define X86_TRAP_DE 0 /* Divide-by-zero */ +#define X86_TRAP_DB 1 /* Debug */ +#define X86_TRAP_NMI 2 /* Non-maskable Interrupt */ +#define X86_TRAP_BP 3 /* Breakpoint */ +#define X86_TRAP_OF 4 /* Overflow */ +#define X86_TRAP_BR 5 /* Bound Range Exceeded */ +#define X86_TRAP_UD 6 /* Invalid Opcode */ +#define X86_TRAP_NM 7 /* Device Not Available */ +#define X86_TRAP_DF 8 /* Double Fault */ +#define X86_TRAP_OLD_MF 9 /* Coprocessor Segment Overrun */ +#define X86_TRAP_TS 10 /* Invalid TSS */ +#define X86_TRAP_NP 11 /* Segment Not Present */ +#define X86_TRAP_SS 12 /* Stack Segment Fault */ +#define X86_TRAP_GP 13 /* General Protection Fault */ +#define X86_TRAP_PF 14 /* Page Fault */ +#define X86_TRAP_SPURIOUS 15 /* Spurious Interrupt */ +#define X86_TRAP_MF 16 /* x87 Floating-Point Exception */ +#define X86_TRAP_AC 17 /* Alignment Check */ +#define X86_TRAP_MC 18 /* Machine Check */ +#define X86_TRAP_XF 19 /* SIMD Floating-Point Exception */ +#define X86_TRAP_VE 20 /* Virtualization Exception */ +#define X86_TRAP_CP 21 /* Control Protection Exception */ +#define X86_TRAP_IRET 32 /* IRET Exception */ + +#endif diff --git a/arch/x86/include/asm/traps.h b/arch/x86/include/asm/traps.h index 2ae904bf25e4..2376620dc66f 100644 --- a/arch/x86/include/asm/traps.h +++ b/arch/x86/include/asm/traps.h @@ -7,6 +7,7 @@ #include #include /* TRAP_TRACE, ... */ +#include #define dotraplinkage __visible @@ -122,31 +123,6 @@ void __noreturn handle_stack_overflow(const char *message, unsigned long fault_address); #endif -/* Interrupts/Exceptions */ -enum { - X86_TRAP_DE = 0, /* 0, Divide-by-zero */ - X86_TRAP_DB, /* 1, Debug */ - X86_TRAP_NMI, /* 2, Non-maskable Interrupt */ - X86_TRAP_BP, /* 3, Breakpoint */ - X86_TRAP_OF, /* 4, Overflow */ - X86_TRAP_BR, /* 5, Bound Range Exceeded */ - X86_TRAP_UD, /* 6, Invalid Opcode */ - X86_TRAP_NM, /* 7, Device Not Available */ - X86_TRAP_DF, /* 8, Double Fault */ - X86_TRAP_OLD_MF, /* 9, Coprocessor Segment Overrun */ - X86_TRAP_TS, /* 10, Invalid TSS */ - X86_TRAP_NP, /* 11, Segment Not Present */ - X86_TRAP_SS, /* 12, Stack Segment Fault */ - X86_TRAP_GP, /* 13, General Protection Fault */ - X86_TRAP_PF, /* 14, Page Fault */ - X86_TRAP_SPURIOUS, /* 15, Spurious Interrupt */ - X86_TRAP_MF, /* 16, x87 Floating-Point Exception */ - X86_TRAP_AC, /* 17, Alignment Check */ - X86_TRAP_MC, /* 18, Machine Check */ - X86_TRAP_XF, /* 19, SIMD Floating-Point Exception */ - X86_TRAP_IRET = 32, /* 32, IRET Exception */ -}; - /* * Page fault error code bits: * -- cgit v1.2.3 From 67f1386616dc0c70f30f214245521c582666edac Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Tue, 25 Feb 2020 23:16:09 +0100 Subject: x86/entry/64: Reorder idtentries Move them all together so verifying the cleanup patches for binary equivalence will be easier. Signed-off-by: Thomas Gleixner Reviewed-by: Alexandre Chartre Acked-by: Andy Lutomirski Acked-by: Peter Zijlstra Link: https://lkml.kernel.org/r/20200505134903.841853522@linutronix.de --- arch/x86/entry/entry_64.S | 36 +++++++++++++++++------------------- 1 file changed, 17 insertions(+), 19 deletions(-) (limited to 'arch') diff --git a/arch/x86/entry/entry_64.S b/arch/x86/entry/entry_64.S index 9747b42fedd5..e62061e02b21 100644 --- a/arch/x86/entry/entry_64.S +++ b/arch/x86/entry/entry_64.S @@ -1020,20 +1020,36 @@ _ASM_NOKPROBE(\sym) SYM_CODE_END(\sym) .endm + idtentry divide_error do_divide_error has_error_code=0 idtentry overflow do_overflow has_error_code=0 +idtentry int3 do_int3 has_error_code=0 create_gap=1 idtentry bounds do_bounds has_error_code=0 idtentry invalid_op do_invalid_op has_error_code=0 idtentry device_not_available do_device_not_available has_error_code=0 -idtentry double_fault do_double_fault has_error_code=1 paranoid=2 read_cr2=1 idtentry coprocessor_segment_overrun do_coprocessor_segment_overrun has_error_code=0 idtentry invalid_TSS do_invalid_TSS has_error_code=1 idtentry segment_not_present do_segment_not_present has_error_code=1 +idtentry stack_segment do_stack_segment has_error_code=1 +idtentry general_protection do_general_protection has_error_code=1 idtentry spurious_interrupt_bug do_spurious_interrupt_bug has_error_code=0 idtentry coprocessor_error do_coprocessor_error has_error_code=0 idtentry alignment_check do_alignment_check has_error_code=1 idtentry simd_coprocessor_error do_simd_coprocessor_error has_error_code=0 +idtentry page_fault do_page_fault has_error_code=1 read_cr2=1 + +#ifdef CONFIG_X86_MCE +idtentry machine_check do_mce has_error_code=0 paranoid=1 +#endif +idtentry debug do_debug has_error_code=0 paranoid=1 shift_ist=IST_INDEX_DB ist_offset=DB_STACK_OFFSET +idtentry double_fault do_double_fault has_error_code=1 paranoid=2 read_cr2=1 + +#ifdef CONFIG_XEN_PV +idtentry hypervisor_callback xen_do_hypervisor_callback has_error_code=0 +idtentry xennmi do_nmi has_error_code=0 +idtentry xendebug do_debug has_error_code=0 +#endif /* * Reload gs selector with exception handling @@ -1084,8 +1100,6 @@ SYM_FUNC_END(do_softirq_own_stack) .popsection #ifdef CONFIG_XEN_PV -idtentry hypervisor_callback xen_do_hypervisor_callback has_error_code=0 - /* * A note on the "critical region" in our callback handler. * We want to avoid stacking callback handlers due to events occurring @@ -1188,22 +1202,6 @@ apicinterrupt3 HYPERVISOR_CALLBACK_VECTOR \ acrn_hv_callback_vector acrn_hv_vector_handler #endif -idtentry debug do_debug has_error_code=0 paranoid=1 shift_ist=IST_INDEX_DB ist_offset=DB_STACK_OFFSET -idtentry int3 do_int3 has_error_code=0 create_gap=1 -idtentry stack_segment do_stack_segment has_error_code=1 - -#ifdef CONFIG_XEN_PV -idtentry xennmi do_nmi has_error_code=0 -idtentry xendebug do_debug has_error_code=0 -#endif - -idtentry general_protection do_general_protection has_error_code=1 -idtentry page_fault do_page_fault has_error_code=1 read_cr2=1 - -#ifdef CONFIG_X86_MCE -idtentry machine_check do_mce has_error_code=0 paranoid=1 -#endif - /* * Save all registers in pt_regs, and switch gs if needed. * Use slow, but surefire "are we in kernel?" check. -- cgit v1.2.3 From cfa82a00533f7074011a3a49fbb6ed1b1f6fa010 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Tue, 25 Feb 2020 23:16:10 +0100 Subject: x86/entry: Distangle idtentry idtentry is a completely unreadable maze. Split it into distinct idtentry variants which only contain the minimal code: - idtentry for regular exceptions - idtentry_mce_debug for #MCE and #DB - idtentry_df for #DF The generated binary code is equivalent. Signed-off-by: Thomas Gleixner Reviewed-by: Alexandre Chartre Acked-by: Peter Zijlstra Acked-by: Andy Lutomirski Link: https://lkml.kernel.org/r/20200505134903.949227617@linutronix.de --- arch/x86/entry/entry_64.S | 403 +++++++++++++++++++++++++--------------------- 1 file changed, 220 insertions(+), 183 deletions(-) (limited to 'arch') diff --git a/arch/x86/entry/entry_64.S b/arch/x86/entry/entry_64.S index e62061e02b21..01bfe7f1bea5 100644 --- a/arch/x86/entry/entry_64.S +++ b/arch/x86/entry/entry_64.S @@ -37,6 +37,7 @@ #include #include #include +#include #include #include @@ -493,6 +494,202 @@ SYM_CODE_END(spurious_entries_start) decl PER_CPU_VAR(irq_count) .endm +/** + * idtentry_body - Macro to emit code calling the C function + * @vector: Vector number + * @cfunc: C function to be called + * @has_error_code: Hardware pushed error code on stack + */ +.macro idtentry_body vector cfunc has_error_code:req + + call error_entry + UNWIND_HINT_REGS + + .if \vector == X86_TRAP_PF + /* + * Store CR2 early so subsequent faults cannot clobber it. Use R12 as + * intermediate storage as RDX can be clobbered in enter_from_user_mode(). + * GET_CR2_INTO can clobber RAX. + */ + GET_CR2_INTO(%r12); + .endif + + TRACE_IRQS_OFF + +#ifdef CONFIG_CONTEXT_TRACKING + testb $3, CS(%rsp) + jz .Lfrom_kernel_no_ctxt_tracking_\@ + CALL_enter_from_user_mode +.Lfrom_kernel_no_ctxt_tracking_\@: +#endif + + movq %rsp, %rdi /* pt_regs pointer into 1st argument*/ + + .if \has_error_code == 1 + movq ORIG_RAX(%rsp), %rsi /* get error code into 2nd argument*/ + movq $-1, ORIG_RAX(%rsp) /* no syscall to restart */ + .else + xorl %esi, %esi /* Clear the error code */ + .endif + + .if \vector == X86_TRAP_PF + movq %r12, %rdx /* Move CR2 into 3rd argument */ + .endif + + call \cfunc + + jmp error_exit +.endm + +/** + * idtentry - Macro to generate entry stubs for simple IDT entries + * @vector: Vector number + * @asmsym: ASM symbol for the entry point + * @cfunc: C function to be called + * @has_error_code: Hardware pushed error code on stack + * + * The macro emits code to set up the kernel context for straight forward + * and simple IDT entries. No IST stack, no paranoid entry checks. + */ +.macro idtentry vector asmsym cfunc has_error_code:req +SYM_CODE_START(\asmsym) + UNWIND_HINT_IRET_REGS offset=\has_error_code*8 + ASM_CLAC + + .if \has_error_code == 0 + pushq $-1 /* ORIG_RAX: no syscall to restart */ + .endif + + .if \vector == X86_TRAP_BP + /* + * If coming from kernel space, create a 6-word gap to allow the + * int3 handler to emulate a call instruction. + */ + testb $3, CS-ORIG_RAX(%rsp) + jnz .Lfrom_usermode_no_gap_\@ + .rept 6 + pushq 5*8(%rsp) + .endr + UNWIND_HINT_IRET_REGS offset=8 +.Lfrom_usermode_no_gap_\@: + .endif + + idtentry_body \vector \cfunc \has_error_code + +_ASM_NOKPROBE(\asmsym) +SYM_CODE_END(\asmsym) +.endm + +/* + * MCE and DB exceptions + */ +#define CPU_TSS_IST(x) PER_CPU_VAR(cpu_tss_rw) + (TSS_ist + (x) * 8) + +/** + * idtentry_mce_db - Macro to generate entry stubs for #MC and #DB + * @vector: Vector number + * @asmsym: ASM symbol for the entry point + * @cfunc: C function to be called + * + * The macro emits code to set up the kernel context for #MC and #DB + * + * If the entry comes from user space it uses the normal entry path + * including the return to user space work and preemption checks on + * exit. + * + * If hits in kernel mode then it needs to go through the paranoid + * entry as the exception can hit any random state. No preemption + * check on exit to keep the paranoid path simple. + * + * If the trap is #DB then the interrupt stack entry in the IST is + * moved to the second stack, so a potential recursion will have a + * fresh IST. + */ +.macro idtentry_mce_db vector asmsym cfunc +SYM_CODE_START(\asmsym) + UNWIND_HINT_IRET_REGS + ASM_CLAC + + pushq $-1 /* ORIG_RAX: no syscall to restart */ + + /* + * If the entry is from userspace, switch stacks and treat it as + * a normal entry. + */ + testb $3, CS-ORIG_RAX(%rsp) + jnz .Lfrom_usermode_switch_stack_\@ + + /* + * paranoid_entry returns SWAPGS flag for paranoid_exit in EBX. + * EBX == 0 -> SWAPGS, EBX == 1 -> no SWAPGS + */ + call paranoid_entry + + UNWIND_HINT_REGS + + .if \vector == X86_TRAP_DB + TRACE_IRQS_OFF_DEBUG + .else + TRACE_IRQS_OFF + .endif + + movq %rsp, %rdi /* pt_regs pointer */ + xorl %esi, %esi /* Clear the error code */ + + .if \vector == X86_TRAP_DB + subq $DB_STACK_OFFSET, CPU_TSS_IST(IST_INDEX_DB) + .endif + + call \cfunc + + .if \vector == X86_TRAP_DB + addq $DB_STACK_OFFSET, CPU_TSS_IST(IST_INDEX_DB) + .endif + + jmp paranoid_exit + + /* Switch to the regular task stack and use the noist entry point */ +.Lfrom_usermode_switch_stack_\@: + idtentry_body vector \cfunc, has_error_code=0 + +_ASM_NOKPROBE(\asmsym) +SYM_CODE_END(\asmsym) +.endm + +/* + * Double fault entry. Straight paranoid. No checks from which context + * this comes because for the espfix induced #DF this would do the wrong + * thing. + */ +.macro idtentry_df vector asmsym cfunc +SYM_CODE_START(\asmsym) + UNWIND_HINT_IRET_REGS offset=8 + ASM_CLAC + + /* + * paranoid_entry returns SWAPGS flag for paranoid_exit in EBX. + * EBX == 0 -> SWAPGS, EBX == 1 -> no SWAPGS + */ + call paranoid_entry + UNWIND_HINT_REGS + + /* Read CR2 early */ + GET_CR2_INTO(%r12); + + TRACE_IRQS_OFF + + movq %rsp, %rdi /* pt_regs pointer into first argument */ + movq ORIG_RAX(%rsp), %rsi /* get error code into 2nd argument*/ + movq $-1, ORIG_RAX(%rsp) /* no syscall to restart */ + movq %r12, %rdx /* Move CR2 into 3rd argument */ + call \cfunc + + jmp paranoid_exit + +_ASM_NOKPROBE(\asmsym) +SYM_CODE_END(\asmsym) +.endm + /* * Interrupt entry helper function. * @@ -860,195 +1057,35 @@ apicinterrupt IRQ_WORK_VECTOR irq_work_interrupt smp_irq_work_interrupt /* * Exception entry points. */ -#define CPU_TSS_IST(x) PER_CPU_VAR(cpu_tss_rw) + (TSS_ist + (x) * 8) - -.macro idtentry_part do_sym, has_error_code:req, read_cr2:req, paranoid:req, shift_ist=-1, ist_offset=0 - - .if \paranoid - call paranoid_entry - /* returned flag: ebx=0: need swapgs on exit, ebx=1: don't need it */ - .else - call error_entry - .endif - UNWIND_HINT_REGS - - .if \read_cr2 - /* - * Store CR2 early so subsequent faults cannot clobber it. Use R12 as - * intermediate storage as RDX can be clobbered in enter_from_user_mode(). - * GET_CR2_INTO can clobber RAX. - */ - GET_CR2_INTO(%r12); - .endif - - .if \shift_ist != -1 - TRACE_IRQS_OFF_DEBUG /* reload IDT in case of recursion */ - .else - TRACE_IRQS_OFF - .endif - -#ifdef CONFIG_CONTEXT_TRACKING - .if \paranoid == 0 - testb $3, CS(%rsp) - jz .Lfrom_kernel_no_context_tracking_\@ - CALL_enter_from_user_mode -.Lfrom_kernel_no_context_tracking_\@: - .endif -#endif - - movq %rsp, %rdi /* pt_regs pointer */ - - .if \has_error_code - movq ORIG_RAX(%rsp), %rsi /* get error code */ - movq $-1, ORIG_RAX(%rsp) /* no syscall to restart */ - .else - xorl %esi, %esi /* no error code */ - .endif - - .if \shift_ist != -1 - subq $\ist_offset, CPU_TSS_IST(\shift_ist) - .endif - - .if \read_cr2 - movq %r12, %rdx /* Move CR2 into 3rd argument */ - .endif - - call \do_sym - - .if \shift_ist != -1 - addq $\ist_offset, CPU_TSS_IST(\shift_ist) - .endif - - .if \paranoid - /* this procedure expect "no swapgs" flag in ebx */ - jmp paranoid_exit - .else - jmp error_exit - .endif - -.endm - -/** - * idtentry - Generate an IDT entry stub - * @sym: Name of the generated entry point - * @do_sym: C function to be called - * @has_error_code: True if this IDT vector has an error code on the stack - * @paranoid: non-zero means that this vector may be invoked from - * kernel mode with user GSBASE and/or user CR3. - * 2 is special -- see below. - * @shift_ist: Set to an IST index if entries from kernel mode should - * decrement the IST stack so that nested entries get a - * fresh stack. (This is for #DB, which has a nasty habit - * of recursing.) - * @create_gap: create a 6-word stack gap when coming from kernel mode. - * @read_cr2: load CR2 into the 3rd argument; done before calling any C code - * - * idtentry generates an IDT stub that sets up a usable kernel context, - * creates struct pt_regs, and calls @do_sym. The stub has the following - * special behaviors: - * - * On an entry from user mode, the stub switches from the trampoline or - * IST stack to the normal thread stack. On an exit to user mode, the - * normal exit-to-usermode path is invoked. - * - * On an exit to kernel mode, if @paranoid == 0, we check for preemption, - * whereas we omit the preemption check if @paranoid != 0. This is purely - * because the implementation is simpler this way. The kernel only needs - * to check for asynchronous kernel preemption when IRQ handlers return. - * - * If @paranoid == 0, then the stub will handle IRET faults by pretending - * that the fault came from user mode. It will handle gs_change faults by - * pretending that the fault happened with kernel GSBASE. Since this handling - * is omitted for @paranoid != 0, the #GP, #SS, and #NP stubs must have - * @paranoid == 0. This special handling will do the wrong thing for - * espfix-induced #DF on IRET, so #DF must not use @paranoid == 0. - * - * @paranoid == 2 is special: the stub will never switch stacks. This is for - * #DF: if the thread stack is somehow unusable, we'll still get a useful OOPS. - */ -.macro idtentry sym do_sym has_error_code:req paranoid=0 shift_ist=-1 ist_offset=0 create_gap=0 read_cr2=0 -SYM_CODE_START(\sym) - UNWIND_HINT_IRET_REGS offset=\has_error_code*8 - - /* Sanity check */ - .if \shift_ist != -1 && \paranoid != 1 - .error "using shift_ist requires paranoid=1" - .endif - - .if \create_gap && \paranoid - .error "using create_gap requires paranoid=0" - .endif - - ASM_CLAC - - .if \has_error_code == 0 - pushq $-1 /* ORIG_RAX: no syscall to restart */ - .endif - - .if \paranoid == 1 - testb $3, CS-ORIG_RAX(%rsp) /* If coming from userspace, switch stacks */ - jnz .Lfrom_usermode_switch_stack_\@ - .endif - - .if \create_gap == 1 - /* - * If coming from kernel space, create a 6-word gap to allow the - * int3 handler to emulate a call instruction. - */ - testb $3, CS-ORIG_RAX(%rsp) - jnz .Lfrom_usermode_no_gap_\@ - .rept 6 - pushq 5*8(%rsp) - .endr - UNWIND_HINT_IRET_REGS offset=8 -.Lfrom_usermode_no_gap_\@: - .endif - - idtentry_part \do_sym, \has_error_code, \read_cr2, \paranoid, \shift_ist, \ist_offset - - .if \paranoid == 1 - /* - * Entry from userspace. Switch stacks and treat it - * as a normal entry. This means that paranoid handlers - * run in real process context if user_mode(regs). - */ -.Lfrom_usermode_switch_stack_\@: - idtentry_part \do_sym, \has_error_code, \read_cr2, paranoid=0 - .endif - -_ASM_NOKPROBE(\sym) -SYM_CODE_END(\sym) -.endm - -idtentry divide_error do_divide_error has_error_code=0 -idtentry overflow do_overflow has_error_code=0 -idtentry int3 do_int3 has_error_code=0 create_gap=1 -idtentry bounds do_bounds has_error_code=0 -idtentry invalid_op do_invalid_op has_error_code=0 -idtentry device_not_available do_device_not_available has_error_code=0 -idtentry coprocessor_segment_overrun do_coprocessor_segment_overrun has_error_code=0 -idtentry invalid_TSS do_invalid_TSS has_error_code=1 -idtentry segment_not_present do_segment_not_present has_error_code=1 -idtentry stack_segment do_stack_segment has_error_code=1 -idtentry general_protection do_general_protection has_error_code=1 -idtentry spurious_interrupt_bug do_spurious_interrupt_bug has_error_code=0 -idtentry coprocessor_error do_coprocessor_error has_error_code=0 -idtentry alignment_check do_alignment_check has_error_code=1 -idtentry simd_coprocessor_error do_simd_coprocessor_error has_error_code=0 - -idtentry page_fault do_page_fault has_error_code=1 read_cr2=1 +idtentry X86_TRAP_DE divide_error do_divide_error has_error_code=0 +idtentry X86_TRAP_OF overflow do_overflow has_error_code=0 +idtentry X86_TRAP_BP int3 do_int3 has_error_code=0 +idtentry X86_TRAP_BR bounds do_bounds has_error_code=0 +idtentry X86_TRAP_UD invalid_op do_invalid_op has_error_code=0 +idtentry X86_TRAP_NM device_not_available do_device_not_available has_error_code=0 +idtentry X86_TRAP_OLD_MF coprocessor_segment_overrun do_coprocessor_segment_overrun has_error_code=0 +idtentry X86_TRAP_TS invalid_TSS do_invalid_TSS has_error_code=1 +idtentry X86_TRAP_NP segment_not_present do_segment_not_present has_error_code=1 +idtentry X86_TRAP_SS stack_segment do_stack_segment has_error_code=1 +idtentry X86_TRAP_GP general_protection do_general_protection has_error_code=1 +idtentry X86_TRAP_SPURIOUS spurious_interrupt_bug do_spurious_interrupt_bug has_error_code=0 +idtentry X86_TRAP_MF coprocessor_error do_coprocessor_error has_error_code=0 +idtentry X86_TRAP_AC alignment_check do_alignment_check has_error_code=1 +idtentry X86_TRAP_XF simd_coprocessor_error do_simd_coprocessor_error has_error_code=0 + +idtentry X86_TRAP_PF page_fault do_page_fault has_error_code=1 #ifdef CONFIG_X86_MCE -idtentry machine_check do_mce has_error_code=0 paranoid=1 +idtentry_mce_db X86_TRAP_MCE machine_check do_mce #endif -idtentry debug do_debug has_error_code=0 paranoid=1 shift_ist=IST_INDEX_DB ist_offset=DB_STACK_OFFSET -idtentry double_fault do_double_fault has_error_code=1 paranoid=2 read_cr2=1 +idtentry_mce_db X86_TRAP_DB debug do_debug +idtentry_df X86_TRAP_DF double_fault do_double_fault #ifdef CONFIG_XEN_PV -idtentry hypervisor_callback xen_do_hypervisor_callback has_error_code=0 -idtentry xennmi do_nmi has_error_code=0 -idtentry xendebug do_debug has_error_code=0 +idtentry 512 /* dummy */ hypervisor_callback xen_do_hypervisor_callback has_error_code=0 +idtentry X86_TRAP_NMI xennmi do_nmi has_error_code=0 +idtentry X86_TRAP_DB xendebug do_debug has_error_code=0 #endif /* -- cgit v1.2.3 From 424c7d0a9a396ba93815b8861033e62791622cc3 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Thu, 26 Mar 2020 16:56:20 +0100 Subject: x86/entry/64: Provide sane error entry/exit For gradual conversion provide a macro parameter and the required code which allows to handle instrumentation and interrupt flags tracking in C. Signed-off-by: Thomas Gleixner Reviewed-by: Alexandre Chartre Acked-by: Peter Zijlstra Acked-by: Andy Lutomirski Link: https://lkml.kernel.org/r/20200505134904.058904490@linutronix.de --- arch/x86/entry/entry_64.S | 22 +++++++++++++++++++--- 1 file changed, 19 insertions(+), 3 deletions(-) (limited to 'arch') diff --git a/arch/x86/entry/entry_64.S b/arch/x86/entry/entry_64.S index 01bfe7f1bea5..96ad26f1bcf3 100644 --- a/arch/x86/entry/entry_64.S +++ b/arch/x86/entry/entry_64.S @@ -499,8 +499,9 @@ SYM_CODE_END(spurious_entries_start) * @vector: Vector number * @cfunc: C function to be called * @has_error_code: Hardware pushed error code on stack + * @sane: Sane variant which handles irq tracing, context tracking in C */ -.macro idtentry_body vector cfunc has_error_code:req +.macro idtentry_body vector cfunc has_error_code:req sane=0 call error_entry UNWIND_HINT_REGS @@ -514,6 +515,7 @@ SYM_CODE_END(spurious_entries_start) GET_CR2_INTO(%r12); .endif + .if \sane == 0 TRACE_IRQS_OFF #ifdef CONFIG_CONTEXT_TRACKING @@ -522,6 +524,7 @@ SYM_CODE_END(spurious_entries_start) CALL_enter_from_user_mode .Lfrom_kernel_no_ctxt_tracking_\@: #endif + .endif movq %rsp, %rdi /* pt_regs pointer into 1st argument*/ @@ -538,7 +541,11 @@ SYM_CODE_END(spurious_entries_start) call \cfunc + .if \sane == 0 jmp error_exit + .else + jmp error_return + .endif .endm /** @@ -547,11 +554,12 @@ SYM_CODE_END(spurious_entries_start) * @asmsym: ASM symbol for the entry point * @cfunc: C function to be called * @has_error_code: Hardware pushed error code on stack + * @sane: Sane variant which handles irq tracing, context tracking in C * * The macro emits code to set up the kernel context for straight forward * and simple IDT entries. No IST stack, no paranoid entry checks. */ -.macro idtentry vector asmsym cfunc has_error_code:req +.macro idtentry vector asmsym cfunc has_error_code:req sane=0 SYM_CODE_START(\asmsym) UNWIND_HINT_IRET_REGS offset=\has_error_code*8 ASM_CLAC @@ -574,7 +582,7 @@ SYM_CODE_START(\asmsym) .Lfrom_usermode_no_gap_\@: .endif - idtentry_body \vector \cfunc \has_error_code + idtentry_body \vector \cfunc \has_error_code \sane _ASM_NOKPROBE(\asmsym) SYM_CODE_END(\asmsym) @@ -1403,6 +1411,14 @@ SYM_CODE_START_LOCAL(error_exit) jmp .Lretint_user SYM_CODE_END(error_exit) +SYM_CODE_START_LOCAL(error_return) + UNWIND_HINT_REGS + DEBUG_ENTRY_ASSERT_IRQS_OFF + testb $3, CS(%rsp) + jz restore_regs_and_return_to_kernel + jmp swapgs_restore_regs_and_return_to_usermode +SYM_CODE_END(error_return) + /* * Runs on exception stack. Xen PV does not go through this path at all, * so we can use real assembly here. -- cgit v1.2.3 From 60400677e1280dae7d903e5997fb1cfabb22d4bd Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Tue, 25 Feb 2020 23:16:11 +0100 Subject: x86/entry/32: Provide macro to emit IDT entry stubs 32 and 64 bit have unnecessary different ways to populate the exception entry code. Provide a idtentry macro which allows to consolidate all of that. Signed-off-by: Thomas Gleixner Reviewed-by: Alexandre Chartre Acked-by: Andy Lutomirski Acked-by: Peter Zijlstra Link: https://lkml.kernel.org/r/20200505134904.166735365@linutronix.de --- arch/x86/entry/entry_32.S | 68 +++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 68 insertions(+) (limited to 'arch') diff --git a/arch/x86/entry/entry_32.S b/arch/x86/entry/entry_32.S index d9da0b7f38ff..eb64e78052e1 100644 --- a/arch/x86/entry/entry_32.S +++ b/arch/x86/entry/entry_32.S @@ -44,6 +44,7 @@ #include #include #include +#include #include #include "calling.h" @@ -726,6 +727,31 @@ .Lend_\@: .endm + +/** + * idtentry - Macro to generate entry stubs for simple IDT entries + * @vector: Vector number + * @asmsym: ASM symbol for the entry point + * @cfunc: C function to be called + * @has_error_code: Hardware pushed error code on stack + * @sane: Compatibility flag with 64bit + */ +.macro idtentry vector asmsym cfunc has_error_code:req sane=0 +SYM_CODE_START(\asmsym) + ASM_CLAC + cld + + .if \has_error_code == 0 + pushl $0 /* Clear the error code */ + .endif + + /* Push the C-function address into the GS slot */ + pushl $\cfunc + /* Invoke the common exception entry */ + jmp handle_exception +SYM_CODE_END(\asmsym) +.endm + /* * %eax: prev task * %edx: next task @@ -1517,6 +1543,48 @@ SYM_CODE_START_LOCAL_NOALIGN(common_exception) jmp ret_from_exception SYM_CODE_END(common_exception) +SYM_CODE_START_LOCAL_NOALIGN(handle_exception) + /* the function address is in %gs's slot on the stack */ + SAVE_ALL switch_stacks=1 skip_gs=1 unwind_espfix=1 + ENCODE_FRAME_POINTER + + /* fixup %gs */ + GS_TO_REG %ecx + movl PT_GS(%esp), %edi # get the function address + REG_TO_PTGS %ecx + SET_KERNEL_GS %ecx + + /* fixup orig %eax */ + movl PT_ORIG_EAX(%esp), %edx # get the error code + movl $-1, PT_ORIG_EAX(%esp) # no syscall to restart + + movl %esp, %eax # pt_regs pointer + CALL_NOSPEC edi + +#ifdef CONFIG_VM86 + movl PT_EFLAGS(%esp), %eax # mix EFLAGS and CS + movb PT_CS(%esp), %al + andl $(X86_EFLAGS_VM | SEGMENT_RPL_MASK), %eax +#else + /* + * We can be coming here from child spawned by kernel_thread(). + */ + movl PT_CS(%esp), %eax + andl $SEGMENT_RPL_MASK, %eax +#endif + cmpl $USER_RPL, %eax # returning to v8086 or userspace ? + jnb ret_to_user + + PARANOID_EXIT_TO_KERNEL_MODE + BUG_IF_WRONG_CR3 + RESTORE_REGS 4 + jmp .Lirq_return + +ret_to_user: + movl %esp, %eax + jmp restore_all_switch_stack +SYM_CODE_END(handle_exception) + SYM_CODE_START(debug) /* * Entry from sysenter is now handled in common_exception -- cgit v1.2.3 From 53aaf262c66ee237e4163f1af347939ebd9c51c2 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Tue, 25 Feb 2020 23:16:12 +0100 Subject: x86/idtentry: Provide macros to define/declare IDT entry points Provide DECLARE/DEFINE_IDTENTRY() macros. DEFINE_IDTENTRY() provides a wrapper which acts as the function definition. The exception handler body is just appended to it with curly brackets. The entry point is marked noinstr so that irq tracing and the enter_from_user_mode() can be moved into the C-entry point. As all C-entries use the same macro (or a later variant) the necessary entry handling can be implemented at one central place. DECLARE_IDTENTRY() provides the function prototypes: - The C entry point cfunc - The ASM entry point asm_cfunc - The XEN/PV entry point xen_asm_cfunc They all follow the same naming convention. When included from ASM code DECLARE_IDTENTRY() is a macro which emits the low level entry point in assembly by instantiating idtentry. IDTENTRY is the simplest variant which just has a pt_regs argument. It's going to be used for all exceptions which have no error code. Signed-off-by: Thomas Gleixner Reviewed-by: Alexandre Chartre Acked-by: Andy Lutomirski Acked-by: Peter Zijlstra Link: https://lkml.kernel.org/r/20200505134904.273363275@linutronix.de --- arch/x86/entry/entry_32.S | 6 ++++ arch/x86/entry/entry_64.S | 6 ++++ arch/x86/include/asm/idtentry.h | 67 +++++++++++++++++++++++++++++++++++++++++ arch/x86/include/asm/traps.h | 2 +- 4 files changed, 80 insertions(+), 1 deletion(-) create mode 100644 arch/x86/include/asm/idtentry.h (limited to 'arch') diff --git a/arch/x86/entry/entry_32.S b/arch/x86/entry/entry_32.S index eb64e78052e1..8c0e07e2860f 100644 --- a/arch/x86/entry/entry_32.S +++ b/arch/x86/entry/entry_32.S @@ -752,6 +752,12 @@ SYM_CODE_START(\asmsym) SYM_CODE_END(\asmsym) .endm +/* + * Include the defines which emit the idt entries which are shared + * shared between 32 and 64 bit. + */ +#include + /* * %eax: prev task * %edx: next task diff --git a/arch/x86/entry/entry_64.S b/arch/x86/entry/entry_64.S index 96ad26f1bcf3..ee0716290557 100644 --- a/arch/x86/entry/entry_64.S +++ b/arch/x86/entry/entry_64.S @@ -698,6 +698,12 @@ _ASM_NOKPROBE(\asmsym) SYM_CODE_END(\asmsym) .endm +/* + * Include the defines which emit the idt entries which are shared + * shared between 32 and 64 bit. + */ +#include + /* * Interrupt entry helper function. * diff --git a/arch/x86/include/asm/idtentry.h b/arch/x86/include/asm/idtentry.h new file mode 100644 index 000000000000..bbd81e200b32 --- /dev/null +++ b/arch/x86/include/asm/idtentry.h @@ -0,0 +1,67 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _ASM_X86_IDTENTRY_H +#define _ASM_X86_IDTENTRY_H + +/* Interrupts/Exceptions */ +#include + +#ifndef __ASSEMBLY__ + +/** + * DECLARE_IDTENTRY - Declare functions for simple IDT entry points + * No error code pushed by hardware + * @vector: Vector number (ignored for C) + * @func: Function name of the entry point + * + * Declares three functions: + * - The ASM entry point: asm_##func + * - The XEN PV trap entry point: xen_##func (maybe unused) + * - The C handler called from the ASM entry point + * + * Note: This is the C variant of DECLARE_IDTENTRY(). As the name says it + * declares the entry points for usage in C code. There is an ASM variant + * as well which is used to emit the entry stubs in entry_32/64.S. + */ +#define DECLARE_IDTENTRY(vector, func) \ + asmlinkage void asm_##func(void); \ + asmlinkage void xen_asm_##func(void); \ + __visible void func(struct pt_regs *regs) + +/** + * DEFINE_IDTENTRY - Emit code for simple IDT entry points + * @func: Function name of the entry point + * + * @func is called from ASM entry code with interrupts disabled. + * + * The macro is written so it acts as function definition. Append the + * body with a pair of curly brackets. + * + * idtentry_enter() contains common code which has to be invoked before + * arbitrary code in the body. idtentry_exit() contains common code + * which has to run before returning to the low level assembly code. + */ +#define DEFINE_IDTENTRY(func) \ +static __always_inline void __##func(struct pt_regs *regs); \ + \ +__visible noinstr void func(struct pt_regs *regs) \ +{ \ + idtentry_enter(regs); \ + instrumentation_begin(); \ + __##func (regs); \ + instrumentation_end(); \ + idtentry_exit(regs); \ +} \ + \ +static __always_inline void __##func(struct pt_regs *regs) + +#else /* !__ASSEMBLY__ */ + +/* + * The ASM variants for DECLARE_IDTENTRY*() which emit the ASM entry stubs. + */ +#define DECLARE_IDTENTRY(vector, func) \ + idtentry vector asm_##func func has_error_code=0 sane=1 + +#endif /* __ASSEMBLY__ */ + +#endif diff --git a/arch/x86/include/asm/traps.h b/arch/x86/include/asm/traps.h index 2376620dc66f..814273f8b166 100644 --- a/arch/x86/include/asm/traps.h +++ b/arch/x86/include/asm/traps.h @@ -6,8 +6,8 @@ #include #include +#include #include /* TRAP_TRACE, ... */ -#include #define dotraplinkage __visible -- cgit v1.2.3 From 0ba50e861ae9f91ffb6589e3a8ecbd47859b0275 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Thu, 26 Mar 2020 16:28:52 +0100 Subject: x86/entry/common: Provide idtentry_enter/exit() Provide functions which handle the low level entry and exit similar to enter/exit from user mode. Signed-off-by: Thomas Gleixner Reviewed-by: Alexandre Chartre Acked-by: Peter Zijlstra Acked-by: Andy Lutomirski Link: https://lkml.kernel.org/r/20200505134904.457578656@linutronix.de --- arch/x86/entry/common.c | 99 +++++++++++++++++++++++++++++++++++++++++ arch/x86/include/asm/idtentry.h | 3 ++ 2 files changed, 102 insertions(+) (limited to 'arch') diff --git a/arch/x86/entry/common.c b/arch/x86/entry/common.c index e4f9f5f2c21b..9ebe33485428 100644 --- a/arch/x86/entry/common.c +++ b/arch/x86/entry/common.c @@ -510,3 +510,102 @@ SYSCALL_DEFINE0(ni_syscall) { return -ENOSYS; } + +/** + * idtentry_enter - Handle state tracking on idtentry + * @regs: Pointer to pt_regs of interrupted context + * + * Invokes: + * - lockdep irqflag state tracking as low level ASM entry disabled + * interrupts. + * + * - Context tracking if the exception hit user mode. + * + * - RCU notification if the exception hit kernel mode. + * + * - The hardirq tracer to keep the state consistent as low level ASM + * entry disabled interrupts. + */ +void noinstr idtentry_enter(struct pt_regs *regs) +{ + if (user_mode(regs)) { + enter_from_user_mode(); + } else { + lockdep_hardirqs_off(CALLER_ADDR0); + rcu_irq_enter(); + instrumentation_begin(); + trace_hardirqs_off_prepare(); + instrumentation_end(); + } +} + +/** + * idtentry_exit - Common code to handle return from exceptions + * @regs: Pointer to pt_regs (exception entry regs) + * + * Depending on the return target (kernel/user) this runs the necessary + * preemption and work checks if possible and required and returns to + * the caller with interrupts disabled and no further work pending. + * + * This is the last action before returning to the low level ASM code which + * just needs to return to the appropriate context. + * + * Invoked by all exception/interrupt IDTENTRY handlers which are not + * returning through the paranoid exit path (all except NMI, #DF and the IST + * variants of #MC and #DB) and are therefore on the thread stack. + */ +void noinstr idtentry_exit(struct pt_regs *regs) +{ + lockdep_assert_irqs_disabled(); + + /* Check whether this returns to user mode */ + if (user_mode(regs)) { + prepare_exit_to_usermode(regs); + } else if (regs->flags & X86_EFLAGS_IF) { + /* Check kernel preemption, if enabled */ + if (IS_ENABLED(CONFIG_PREEMPTION)) { + /* + * This needs to be done very carefully. + * idtentry_enter() invoked rcu_irq_enter(). This + * needs to be undone before scheduling. + * + * Preemption is disabled inside of RCU idle + * sections. When the task returns from + * preempt_schedule_irq(), RCU is still watching. + * + * rcu_irq_exit_preempt() has additional state + * checking if CONFIG_PROVE_RCU=y + */ + if (!preempt_count()) { + if (IS_ENABLED(CONFIG_DEBUG_ENTRY)) + WARN_ON_ONCE(!on_thread_stack()); + instrumentation_begin(); + rcu_irq_exit_preempt(); + if (need_resched()) + preempt_schedule_irq(); + /* Covers both tracing and lockdep */ + trace_hardirqs_on(); + instrumentation_end(); + return; + } + } + /* + * If preemption is disabled then this needs to be done + * carefully with respect to RCU. The exception might come + * from a RCU idle section in the idle task due to the fact + * that safe_halt() enables interrupts. So this needs the + * same ordering of lockdep/tracing and RCU as the return + * to user mode path. + */ + instrumentation_begin(); + /* Tell the tracer that IRET will enable interrupts */ + trace_hardirqs_on_prepare(); + lockdep_hardirqs_on_prepare(CALLER_ADDR0); + instrumentation_end(); + rcu_irq_exit(); + lockdep_hardirqs_on(CALLER_ADDR0); + } else { + /* IRQ flags state is correct already. Just tell RCU */ + rcu_irq_exit(); + } +} diff --git a/arch/x86/include/asm/idtentry.h b/arch/x86/include/asm/idtentry.h index bbd81e200b32..2adfd80ea2f3 100644 --- a/arch/x86/include/asm/idtentry.h +++ b/arch/x86/include/asm/idtentry.h @@ -7,6 +7,9 @@ #ifndef __ASSEMBLY__ +void idtentry_enter(struct pt_regs *regs); +void idtentry_exit(struct pt_regs *regs); + /** * DECLARE_IDTENTRY - Declare functions for simple IDT entry points * No error code pushed by hardware -- cgit v1.2.3 From 218e31b6e7a33c9b5e5d608aa79d51665bb84e62 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Tue, 25 Feb 2020 23:16:13 +0100 Subject: x86/traps: Prepare for using DEFINE_IDTENTRY Prepare for using IDTENTRY to define the C exception/trap entry points. It would be possible to glue this into the existing macro maze, but it's simpler and better to read at the end to just make them distinct. Provide a trivial inline helper to read the trap address and add a comment explaining the logic behind it. The existing macros will be removed once all instances are converted. Signed-off-by: Thomas Gleixner Reviewed-by: Alexandre Chartre Acked-by: Peter Zijlstra Link: https://lkml.kernel.org/r/20200505134904.556327833@linutronix.de --- arch/x86/kernel/traps.c | 15 +++++++++++++++ 1 file changed, 15 insertions(+) (limited to 'arch') diff --git a/arch/x86/kernel/traps.c b/arch/x86/kernel/traps.c index f5f4a76fb516..3857c0fd3306 100644 --- a/arch/x86/kernel/traps.c +++ b/arch/x86/kernel/traps.c @@ -205,6 +205,21 @@ static void do_error_trap(struct pt_regs *regs, long error_code, char *str, } } +/* + * Posix requires to provide the address of the faulting instruction for + * SIGILL (#UD) and SIGFPE (#DE) in the si_addr member of siginfo_t. + * + * This address is usually regs->ip, but when an uprobe moved the code out + * of line then regs->ip points to the XOL code which would confuse + * anything which analyzes the fault address vs. the unmodified binary. If + * a trap happened in XOL code then uprobe maps regs->ip back to the + * original instruction address. + */ +static __always_inline void __user *error_get_trap_addr(struct pt_regs *regs) +{ + return (void __user *)uprobe_get_trap_addr(regs); +} + #define IP ((void __user *)uprobe_get_trap_addr(regs)) #define DO_ERROR(trapnr, signr, sicode, addr, str, name) \ dotraplinkage void do_##name(struct pt_regs *regs, long error_code) \ -- cgit v1.2.3 From 9d06c4027f21fcfa60221bd7203eda3c82568467 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Tue, 25 Feb 2020 23:16:14 +0100 Subject: x86/entry: Convert Divide Error to IDTENTRY Convert #DE to IDTENTRY: - Implement the C entry point with DEFINE_IDTENTRY - Emit the ASM stub with DECLARE_IDTENTRY - Remove the ASM idtentry in 64bit - Remove the open coded ASM entry code in 32bit - Fixup the XEN/PV code No functional change. Signed-off-by: Thomas Gleixner Reviewed-by: Alexandre Chartre Acked-by: Peter Zijlstra Acked-by: Andy Lutomirski Link: https://lkml.kernel.org/r/20200505134904.663914713@linutronix.de --- arch/x86/entry/entry_32.S | 7 ------- arch/x86/entry/entry_64.S | 1 - arch/x86/include/asm/idtentry.h | 12 ++++++++++++ arch/x86/include/asm/traps.h | 3 --- arch/x86/kernel/idt.c | 2 +- arch/x86/kernel/traps.c | 7 ++++++- arch/x86/xen/enlighten_pv.c | 7 ++++++- arch/x86/xen/xen-asm_64.S | 2 +- 8 files changed, 26 insertions(+), 15 deletions(-) (limited to 'arch') diff --git a/arch/x86/entry/entry_32.S b/arch/x86/entry/entry_32.S index 8c0e07e2860f..398fd3c59228 100644 --- a/arch/x86/entry/entry_32.S +++ b/arch/x86/entry/entry_32.S @@ -1377,13 +1377,6 @@ SYM_CODE_START(alignment_check) jmp common_exception SYM_CODE_END(alignment_check) -SYM_CODE_START(divide_error) - ASM_CLAC - pushl $0 # no error code - pushl $do_divide_error - jmp common_exception -SYM_CODE_END(divide_error) - #ifdef CONFIG_X86_MCE SYM_CODE_START(machine_check) ASM_CLAC diff --git a/arch/x86/entry/entry_64.S b/arch/x86/entry/entry_64.S index ee0716290557..e18594d2dffd 100644 --- a/arch/x86/entry/entry_64.S +++ b/arch/x86/entry/entry_64.S @@ -1072,7 +1072,6 @@ apicinterrupt IRQ_WORK_VECTOR irq_work_interrupt smp_irq_work_interrupt * Exception entry points. */ -idtentry X86_TRAP_DE divide_error do_divide_error has_error_code=0 idtentry X86_TRAP_OF overflow do_overflow has_error_code=0 idtentry X86_TRAP_BP int3 do_int3 has_error_code=0 idtentry X86_TRAP_BR bounds do_bounds has_error_code=0 diff --git a/arch/x86/include/asm/idtentry.h b/arch/x86/include/asm/idtentry.h index 2adfd80ea2f3..e6f6e296bfec 100644 --- a/arch/x86/include/asm/idtentry.h +++ b/arch/x86/include/asm/idtentry.h @@ -67,4 +67,16 @@ static __always_inline void __##func(struct pt_regs *regs) #endif /* __ASSEMBLY__ */ +/* + * The actual entry points. Note that DECLARE_IDTENTRY*() serves two + * purposes: + * - provide the function declarations when included from C-Code + * - emit the ASM stubs when included from entry_32/64.S + * + * This avoids duplicate defines and ensures that everything is consistent. + */ + +/* Simple exception entry points. No hardware error code */ +DECLARE_IDTENTRY(X86_TRAP_DE, exc_divide_error); + #endif diff --git a/arch/x86/include/asm/traps.h b/arch/x86/include/asm/traps.h index 814273f8b166..e68cbf87fe4a 100644 --- a/arch/x86/include/asm/traps.h +++ b/arch/x86/include/asm/traps.h @@ -11,7 +11,6 @@ #define dotraplinkage __visible -asmlinkage void divide_error(void); asmlinkage void debug(void); asmlinkage void nmi(void); asmlinkage void int3(void); @@ -38,7 +37,6 @@ asmlinkage void machine_check(void); asmlinkage void simd_coprocessor_error(void); #if defined(CONFIG_X86_64) && defined(CONFIG_XEN_PV) -asmlinkage void xen_divide_error(void); asmlinkage void xen_xennmi(void); asmlinkage void xen_xendebug(void); asmlinkage void xen_int3(void); @@ -62,7 +60,6 @@ asmlinkage void xen_machine_check(void); asmlinkage void xen_simd_coprocessor_error(void); #endif -dotraplinkage void do_divide_error(struct pt_regs *regs, long error_code); dotraplinkage void do_debug(struct pt_regs *regs, long error_code); dotraplinkage void do_nmi(struct pt_regs *regs, long error_code); dotraplinkage void do_int3(struct pt_regs *regs, long error_code); diff --git a/arch/x86/kernel/idt.c b/arch/x86/kernel/idt.c index 95609ee4c8b3..f2a751b10a01 100644 --- a/arch/x86/kernel/idt.c +++ b/arch/x86/kernel/idt.c @@ -73,7 +73,7 @@ static const __initconst struct idt_data early_idts[] = { * set up TSS. */ static const __initconst struct idt_data def_idts[] = { - INTG(X86_TRAP_DE, divide_error), + INTG(X86_TRAP_DE, asm_exc_divide_error), INTG(X86_TRAP_NMI, nmi), INTG(X86_TRAP_BR, bounds), INTG(X86_TRAP_UD, invalid_op), diff --git a/arch/x86/kernel/traps.c b/arch/x86/kernel/traps.c index 3857c0fd3306..37092f74ec42 100644 --- a/arch/x86/kernel/traps.c +++ b/arch/x86/kernel/traps.c @@ -220,6 +220,12 @@ static __always_inline void __user *error_get_trap_addr(struct pt_regs *regs) return (void __user *)uprobe_get_trap_addr(regs); } +DEFINE_IDTENTRY(exc_divide_error) +{ + do_error_trap(regs, 0, "divide_error", X86_TRAP_DE, SIGFPE, + FPE_INTDIV, error_get_trap_addr(regs)); +} + #define IP ((void __user *)uprobe_get_trap_addr(regs)) #define DO_ERROR(trapnr, signr, sicode, addr, str, name) \ dotraplinkage void do_##name(struct pt_regs *regs, long error_code) \ @@ -227,7 +233,6 @@ dotraplinkage void do_##name(struct pt_regs *regs, long error_code) \ do_error_trap(regs, error_code, str, trapnr, signr, sicode, addr); \ } -DO_ERROR(X86_TRAP_DE, SIGFPE, FPE_INTDIV, IP, "divide error", divide_error) DO_ERROR(X86_TRAP_OF, SIGSEGV, 0, NULL, "overflow", overflow) DO_ERROR(X86_TRAP_UD, SIGILL, ILL_ILLOPN, IP, "invalid opcode", invalid_op) DO_ERROR(X86_TRAP_OLD_MF, SIGFPE, 0, NULL, "coprocessor segment overrun", coprocessor_segment_overrun) diff --git a/arch/x86/xen/enlighten_pv.c b/arch/x86/xen/enlighten_pv.c index c2c97faaf004..06c53c94c813 100644 --- a/arch/x86/xen/enlighten_pv.c +++ b/arch/x86/xen/enlighten_pv.c @@ -604,6 +604,11 @@ struct trap_array_entry { bool ist_okay; }; +#define TRAP_ENTRY(func, ist_ok) { \ + .orig = asm_##func, \ + .xen = xen_asm_##func, \ + .ist_okay = ist_ok } + static struct trap_array_entry trap_array[] = { { debug, xen_xendebug, true }, { double_fault, xen_double_fault, true }, @@ -617,7 +622,7 @@ static struct trap_array_entry trap_array[] = { { entry_INT80_compat, xen_entry_INT80_compat, false }, #endif { page_fault, xen_page_fault, false }, - { divide_error, xen_divide_error, false }, + TRAP_ENTRY(exc_divide_error, false ), { bounds, xen_bounds, false }, { invalid_op, xen_invalid_op, false }, { device_not_available, xen_device_not_available, false }, diff --git a/arch/x86/xen/xen-asm_64.S b/arch/x86/xen/xen-asm_64.S index 0a0fd168683a..48ac67ef094a 100644 --- a/arch/x86/xen/xen-asm_64.S +++ b/arch/x86/xen/xen-asm_64.S @@ -28,7 +28,7 @@ SYM_CODE_END(xen_\name) _ASM_NOKPROBE(xen_\name) .endm -xen_pv_trap divide_error +xen_pv_trap asm_exc_divide_error xen_pv_trap debug xen_pv_trap xendebug xen_pv_trap int3 -- cgit v1.2.3 From 4b6b9111c0b9aa4c3b319f1c5a3b1d5850792167 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Tue, 25 Feb 2020 23:16:15 +0100 Subject: x86/entry: Convert Overflow exception to IDTENTRY Convert #OF to IDTENTRY: - Implement the C entry point with DEFINE_IDTENTRY - Emit the ASM stub with DECLARE_IDTENTRY - Remove the ASM idtentry in 64bit - Remove the open coded ASM entry code in 32bit - Fixup the XEN/PV code - Remove the old prototypes No functional change. Signed-off-by: Thomas Gleixner Reviewed-by: Alexandre Chartre Acked-by: Andy Lutomirski Acked-by: Peter Zijlstra Link: https://lkml.kernel.org/r/20200505134904.771457898@linutronix.de --- arch/x86/entry/entry_32.S | 7 ------- arch/x86/entry/entry_64.S | 1 - arch/x86/include/asm/idtentry.h | 1 + arch/x86/include/asm/traps.h | 3 --- arch/x86/kernel/idt.c | 2 +- arch/x86/kernel/traps.c | 6 +++++- arch/x86/xen/enlighten_pv.c | 2 +- arch/x86/xen/xen-asm_64.S | 2 +- 8 files changed, 9 insertions(+), 15 deletions(-) (limited to 'arch') diff --git a/arch/x86/entry/entry_32.S b/arch/x86/entry/entry_32.S index 398fd3c59228..e5d57f6754ed 100644 --- a/arch/x86/entry/entry_32.S +++ b/arch/x86/entry/entry_32.S @@ -1325,13 +1325,6 @@ SYM_CODE_START(native_iret) SYM_CODE_END(native_iret) #endif -SYM_CODE_START(overflow) - ASM_CLAC - pushl $0 - pushl $do_overflow - jmp common_exception -SYM_CODE_END(overflow) - SYM_CODE_START(bounds) ASM_CLAC pushl $0 diff --git a/arch/x86/entry/entry_64.S b/arch/x86/entry/entry_64.S index e18594d2dffd..eb503b0b0cff 100644 --- a/arch/x86/entry/entry_64.S +++ b/arch/x86/entry/entry_64.S @@ -1072,7 +1072,6 @@ apicinterrupt IRQ_WORK_VECTOR irq_work_interrupt smp_irq_work_interrupt * Exception entry points. */ -idtentry X86_TRAP_OF overflow do_overflow has_error_code=0 idtentry X86_TRAP_BP int3 do_int3 has_error_code=0 idtentry X86_TRAP_BR bounds do_bounds has_error_code=0 idtentry X86_TRAP_UD invalid_op do_invalid_op has_error_code=0 diff --git a/arch/x86/include/asm/idtentry.h b/arch/x86/include/asm/idtentry.h index e6f6e296bfec..54c41b3a2925 100644 --- a/arch/x86/include/asm/idtentry.h +++ b/arch/x86/include/asm/idtentry.h @@ -78,5 +78,6 @@ static __always_inline void __##func(struct pt_regs *regs) /* Simple exception entry points. No hardware error code */ DECLARE_IDTENTRY(X86_TRAP_DE, exc_divide_error); +DECLARE_IDTENTRY(X86_TRAP_OF, exc_overflow); #endif diff --git a/arch/x86/include/asm/traps.h b/arch/x86/include/asm/traps.h index e68cbf87fe4a..1b0452c47044 100644 --- a/arch/x86/include/asm/traps.h +++ b/arch/x86/include/asm/traps.h @@ -14,7 +14,6 @@ asmlinkage void debug(void); asmlinkage void nmi(void); asmlinkage void int3(void); -asmlinkage void overflow(void); asmlinkage void bounds(void); asmlinkage void invalid_op(void); asmlinkage void device_not_available(void); @@ -40,7 +39,6 @@ asmlinkage void simd_coprocessor_error(void); asmlinkage void xen_xennmi(void); asmlinkage void xen_xendebug(void); asmlinkage void xen_int3(void); -asmlinkage void xen_overflow(void); asmlinkage void xen_bounds(void); asmlinkage void xen_invalid_op(void); asmlinkage void xen_device_not_available(void); @@ -63,7 +61,6 @@ asmlinkage void xen_simd_coprocessor_error(void); dotraplinkage void do_debug(struct pt_regs *regs, long error_code); dotraplinkage void do_nmi(struct pt_regs *regs, long error_code); dotraplinkage void do_int3(struct pt_regs *regs, long error_code); -dotraplinkage void do_overflow(struct pt_regs *regs, long error_code); dotraplinkage void do_bounds(struct pt_regs *regs, long error_code); dotraplinkage void do_invalid_op(struct pt_regs *regs, long error_code); dotraplinkage void do_device_not_available(struct pt_regs *regs, long error_code); diff --git a/arch/x86/kernel/idt.c b/arch/x86/kernel/idt.c index f2a751b10a01..f8d79629535e 100644 --- a/arch/x86/kernel/idt.c +++ b/arch/x86/kernel/idt.c @@ -99,7 +99,7 @@ static const __initconst struct idt_data def_idts[] = { INTG(X86_TRAP_MC, machine_check), #endif - SYSG(X86_TRAP_OF, overflow), + SYSG(X86_TRAP_OF, asm_exc_overflow), #if defined(CONFIG_IA32_EMULATION) SYSG(IA32_SYSCALL_VECTOR, entry_INT80_compat), #elif defined(CONFIG_X86_32) diff --git a/arch/x86/kernel/traps.c b/arch/x86/kernel/traps.c index 37092f74ec42..b522e2aa9e9a 100644 --- a/arch/x86/kernel/traps.c +++ b/arch/x86/kernel/traps.c @@ -226,6 +226,11 @@ DEFINE_IDTENTRY(exc_divide_error) FPE_INTDIV, error_get_trap_addr(regs)); } +DEFINE_IDTENTRY(exc_overflow) +{ + do_error_trap(regs, 0, "overflow", X86_TRAP_OF, SIGSEGV, 0, NULL); +} + #define IP ((void __user *)uprobe_get_trap_addr(regs)) #define DO_ERROR(trapnr, signr, sicode, addr, str, name) \ dotraplinkage void do_##name(struct pt_regs *regs, long error_code) \ @@ -233,7 +238,6 @@ dotraplinkage void do_##name(struct pt_regs *regs, long error_code) \ do_error_trap(regs, error_code, str, trapnr, signr, sicode, addr); \ } -DO_ERROR(X86_TRAP_OF, SIGSEGV, 0, NULL, "overflow", overflow) DO_ERROR(X86_TRAP_UD, SIGILL, ILL_ILLOPN, IP, "invalid opcode", invalid_op) DO_ERROR(X86_TRAP_OLD_MF, SIGFPE, 0, NULL, "coprocessor segment overrun", coprocessor_segment_overrun) DO_ERROR(X86_TRAP_TS, SIGSEGV, 0, NULL, "invalid TSS", invalid_TSS) diff --git a/arch/x86/xen/enlighten_pv.c b/arch/x86/xen/enlighten_pv.c index 06c53c94c813..25cf701f916b 100644 --- a/arch/x86/xen/enlighten_pv.c +++ b/arch/x86/xen/enlighten_pv.c @@ -617,7 +617,7 @@ static struct trap_array_entry trap_array[] = { #endif { nmi, xen_xennmi, true }, { int3, xen_int3, false }, - { overflow, xen_overflow, false }, + TRAP_ENTRY(exc_overflow, false ), #ifdef CONFIG_IA32_EMULATION { entry_INT80_compat, xen_entry_INT80_compat, false }, #endif diff --git a/arch/x86/xen/xen-asm_64.S b/arch/x86/xen/xen-asm_64.S index 48ac67ef094a..c9c86a06b25d 100644 --- a/arch/x86/xen/xen-asm_64.S +++ b/arch/x86/xen/xen-asm_64.S @@ -33,7 +33,7 @@ xen_pv_trap debug xen_pv_trap xendebug xen_pv_trap int3 xen_pv_trap xennmi -xen_pv_trap overflow +xen_pv_trap asm_exc_overflow xen_pv_trap bounds xen_pv_trap invalid_op xen_pv_trap device_not_available -- cgit v1.2.3 From 58d9c81facf55dbd1836d114ce360a048e3a0582 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Tue, 25 Feb 2020 23:16:17 +0100 Subject: x86/entry: Convert Bounds exception to IDTENTRY Convert #BR to IDTENTRY: - Implement the C entry point with DEFINE_IDTENTRY - Emit the ASM stub with DECLARE_IDTENTRY - Remove the ASM idtentry in 64bit - Remove the open coded ASM entry code in 32bit - Fixup the XEN/PV code - Remove the old prototypes - Remove the RCU warning as the new entry macro ensures correctness No functional change. Signed-off-by: Thomas Gleixner Reviewed-by: Alexandre Chartre Acked-by: Andy Lutomirski Acked-by: Peter Zijlstra Link: https://lkml.kernel.org/r/20200505134904.863001309@linutronix.de --- arch/x86/entry/entry_32.S | 7 ------- arch/x86/entry/entry_64.S | 1 - arch/x86/include/asm/idtentry.h | 1 + arch/x86/include/asm/traps.h | 3 --- arch/x86/kernel/idt.c | 2 +- arch/x86/kernel/traps.c | 9 ++++----- arch/x86/xen/enlighten_pv.c | 2 +- arch/x86/xen/xen-asm_64.S | 2 +- 8 files changed, 8 insertions(+), 19 deletions(-) (limited to 'arch') diff --git a/arch/x86/entry/entry_32.S b/arch/x86/entry/entry_32.S index e5d57f6754ed..31a64df56e3d 100644 --- a/arch/x86/entry/entry_32.S +++ b/arch/x86/entry/entry_32.S @@ -1325,13 +1325,6 @@ SYM_CODE_START(native_iret) SYM_CODE_END(native_iret) #endif -SYM_CODE_START(bounds) - ASM_CLAC - pushl $0 - pushl $do_bounds - jmp common_exception -SYM_CODE_END(bounds) - SYM_CODE_START(invalid_op) ASM_CLAC pushl $0 diff --git a/arch/x86/entry/entry_64.S b/arch/x86/entry/entry_64.S index eb503b0b0cff..a3e484da55d1 100644 --- a/arch/x86/entry/entry_64.S +++ b/arch/x86/entry/entry_64.S @@ -1073,7 +1073,6 @@ apicinterrupt IRQ_WORK_VECTOR irq_work_interrupt smp_irq_work_interrupt */ idtentry X86_TRAP_BP int3 do_int3 has_error_code=0 -idtentry X86_TRAP_BR bounds do_bounds has_error_code=0 idtentry X86_TRAP_UD invalid_op do_invalid_op has_error_code=0 idtentry X86_TRAP_NM device_not_available do_device_not_available has_error_code=0 idtentry X86_TRAP_OLD_MF coprocessor_segment_overrun do_coprocessor_segment_overrun has_error_code=0 diff --git a/arch/x86/include/asm/idtentry.h b/arch/x86/include/asm/idtentry.h index 54c41b3a2925..d7c160b24981 100644 --- a/arch/x86/include/asm/idtentry.h +++ b/arch/x86/include/asm/idtentry.h @@ -79,5 +79,6 @@ static __always_inline void __##func(struct pt_regs *regs) /* Simple exception entry points. No hardware error code */ DECLARE_IDTENTRY(X86_TRAP_DE, exc_divide_error); DECLARE_IDTENTRY(X86_TRAP_OF, exc_overflow); +DECLARE_IDTENTRY(X86_TRAP_BR, exc_bounds); #endif diff --git a/arch/x86/include/asm/traps.h b/arch/x86/include/asm/traps.h index 1b0452c47044..73fe4eb3157c 100644 --- a/arch/x86/include/asm/traps.h +++ b/arch/x86/include/asm/traps.h @@ -14,7 +14,6 @@ asmlinkage void debug(void); asmlinkage void nmi(void); asmlinkage void int3(void); -asmlinkage void bounds(void); asmlinkage void invalid_op(void); asmlinkage void device_not_available(void); #ifdef CONFIG_X86_64 @@ -39,7 +38,6 @@ asmlinkage void simd_coprocessor_error(void); asmlinkage void xen_xennmi(void); asmlinkage void xen_xendebug(void); asmlinkage void xen_int3(void); -asmlinkage void xen_bounds(void); asmlinkage void xen_invalid_op(void); asmlinkage void xen_device_not_available(void); asmlinkage void xen_double_fault(void); @@ -61,7 +59,6 @@ asmlinkage void xen_simd_coprocessor_error(void); dotraplinkage void do_debug(struct pt_regs *regs, long error_code); dotraplinkage void do_nmi(struct pt_regs *regs, long error_code); dotraplinkage void do_int3(struct pt_regs *regs, long error_code); -dotraplinkage void do_bounds(struct pt_regs *regs, long error_code); dotraplinkage void do_invalid_op(struct pt_regs *regs, long error_code); dotraplinkage void do_device_not_available(struct pt_regs *regs, long error_code); dotraplinkage void do_double_fault(struct pt_regs *regs, long error_code, unsigned long cr2); diff --git a/arch/x86/kernel/idt.c b/arch/x86/kernel/idt.c index f8d79629535e..87583b69cbc2 100644 --- a/arch/x86/kernel/idt.c +++ b/arch/x86/kernel/idt.c @@ -75,7 +75,7 @@ static const __initconst struct idt_data early_idts[] = { static const __initconst struct idt_data def_idts[] = { INTG(X86_TRAP_DE, asm_exc_divide_error), INTG(X86_TRAP_NMI, nmi), - INTG(X86_TRAP_BR, bounds), + INTG(X86_TRAP_BR, asm_exc_bounds), INTG(X86_TRAP_UD, invalid_op), INTG(X86_TRAP_NM, device_not_available), INTG(X86_TRAP_OLD_MF, coprocessor_segment_overrun), diff --git a/arch/x86/kernel/traps.c b/arch/x86/kernel/traps.c index b522e2aa9e9a..7a9fb8b9e1a8 100644 --- a/arch/x86/kernel/traps.c +++ b/arch/x86/kernel/traps.c @@ -410,18 +410,17 @@ dotraplinkage void do_double_fault(struct pt_regs *regs, long error_code, unsign panic("Machine halted."); } -dotraplinkage void do_bounds(struct pt_regs *regs, long error_code) +DEFINE_IDTENTRY(exc_bounds) { - RCU_LOCKDEP_WARN(!rcu_is_watching(), "entry code didn't wake RCU"); - if (notify_die(DIE_TRAP, "bounds", regs, error_code, + if (notify_die(DIE_TRAP, "bounds", regs, 0, X86_TRAP_BR, SIGSEGV) == NOTIFY_STOP) return; cond_local_irq_enable(regs); if (!user_mode(regs)) - die("bounds", regs, error_code); + die("bounds", regs, 0); - do_trap(X86_TRAP_BR, SIGSEGV, "bounds", regs, error_code, 0, NULL); + do_trap(X86_TRAP_BR, SIGSEGV, "bounds", regs, 0, 0, NULL); cond_local_irq_disable(regs); } diff --git a/arch/x86/xen/enlighten_pv.c b/arch/x86/xen/enlighten_pv.c index 25cf701f916b..29a52233b81d 100644 --- a/arch/x86/xen/enlighten_pv.c +++ b/arch/x86/xen/enlighten_pv.c @@ -623,7 +623,7 @@ static struct trap_array_entry trap_array[] = { #endif { page_fault, xen_page_fault, false }, TRAP_ENTRY(exc_divide_error, false ), - { bounds, xen_bounds, false }, + TRAP_ENTRY(exc_bounds, false ), { invalid_op, xen_invalid_op, false }, { device_not_available, xen_device_not_available, false }, { coprocessor_segment_overrun, xen_coprocessor_segment_overrun, false }, diff --git a/arch/x86/xen/xen-asm_64.S b/arch/x86/xen/xen-asm_64.S index c9c86a06b25d..2cdbf6a37e16 100644 --- a/arch/x86/xen/xen-asm_64.S +++ b/arch/x86/xen/xen-asm_64.S @@ -34,7 +34,7 @@ xen_pv_trap xendebug xen_pv_trap int3 xen_pv_trap xennmi xen_pv_trap asm_exc_overflow -xen_pv_trap bounds +xen_pv_trap asm_exc_bounds xen_pv_trap invalid_op xen_pv_trap device_not_available xen_pv_trap double_fault -- cgit v1.2.3 From 49893c5cb281f8691dcbe53e6f85a963f47a4b9b Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Tue, 25 Feb 2020 23:16:18 +0100 Subject: x86/entry: Convert Invalid Opcode exception to IDTENTRY Convert #UD to IDTENTRY: - Implement the C entry point with DEFINE_IDTENTRY - Emit the ASM stub with DECLARE_IDTENTRY - Remove the ASM idtentry in 64bit - Remove the open coded ASM entry code in 32bit - Fixup the XEN/PV code - Fixup the FOOF bug call in fault.c - Remove the old prototypes No functional change. Signed-off-by: Thomas Gleixner Reviewed-by: Alexandre Chartre Acked-by: Andy Lutomirski Acked-by: Peter Zijlstra Link: https://lkml.kernel.org/r/20200505134904.955511913@linutronix.de --- arch/x86/entry/entry_32.S | 7 ------- arch/x86/entry/entry_64.S | 1 - arch/x86/include/asm/idtentry.h | 1 + arch/x86/include/asm/traps.h | 8 +++++--- arch/x86/kernel/idt.c | 2 +- arch/x86/kernel/traps.c | 16 +++++++++++++++- arch/x86/mm/fault.c | 2 +- arch/x86/xen/enlighten_pv.c | 2 +- arch/x86/xen/xen-asm_64.S | 2 +- 9 files changed, 25 insertions(+), 16 deletions(-) (limited to 'arch') diff --git a/arch/x86/entry/entry_32.S b/arch/x86/entry/entry_32.S index 31a64df56e3d..95a96020f9e4 100644 --- a/arch/x86/entry/entry_32.S +++ b/arch/x86/entry/entry_32.S @@ -1325,13 +1325,6 @@ SYM_CODE_START(native_iret) SYM_CODE_END(native_iret) #endif -SYM_CODE_START(invalid_op) - ASM_CLAC - pushl $0 - pushl $do_invalid_op - jmp common_exception -SYM_CODE_END(invalid_op) - SYM_CODE_START(coprocessor_segment_overrun) ASM_CLAC pushl $0 diff --git a/arch/x86/entry/entry_64.S b/arch/x86/entry/entry_64.S index a3e484da55d1..ebd5f9fa4f5c 100644 --- a/arch/x86/entry/entry_64.S +++ b/arch/x86/entry/entry_64.S @@ -1073,7 +1073,6 @@ apicinterrupt IRQ_WORK_VECTOR irq_work_interrupt smp_irq_work_interrupt */ idtentry X86_TRAP_BP int3 do_int3 has_error_code=0 -idtentry X86_TRAP_UD invalid_op do_invalid_op has_error_code=0 idtentry X86_TRAP_NM device_not_available do_device_not_available has_error_code=0 idtentry X86_TRAP_OLD_MF coprocessor_segment_overrun do_coprocessor_segment_overrun has_error_code=0 idtentry X86_TRAP_TS invalid_TSS do_invalid_TSS has_error_code=1 diff --git a/arch/x86/include/asm/idtentry.h b/arch/x86/include/asm/idtentry.h index d7c160b24981..f34630f09864 100644 --- a/arch/x86/include/asm/idtentry.h +++ b/arch/x86/include/asm/idtentry.h @@ -80,5 +80,6 @@ static __always_inline void __##func(struct pt_regs *regs) DECLARE_IDTENTRY(X86_TRAP_DE, exc_divide_error); DECLARE_IDTENTRY(X86_TRAP_OF, exc_overflow); DECLARE_IDTENTRY(X86_TRAP_BR, exc_bounds); +DECLARE_IDTENTRY(X86_TRAP_UD, exc_invalid_op); #endif diff --git a/arch/x86/include/asm/traps.h b/arch/x86/include/asm/traps.h index 73fe4eb3157c..71a4a7e6ba89 100644 --- a/arch/x86/include/asm/traps.h +++ b/arch/x86/include/asm/traps.h @@ -14,7 +14,6 @@ asmlinkage void debug(void); asmlinkage void nmi(void); asmlinkage void int3(void); -asmlinkage void invalid_op(void); asmlinkage void device_not_available(void); #ifdef CONFIG_X86_64 asmlinkage void double_fault(void); @@ -38,7 +37,6 @@ asmlinkage void simd_coprocessor_error(void); asmlinkage void xen_xennmi(void); asmlinkage void xen_xendebug(void); asmlinkage void xen_int3(void); -asmlinkage void xen_invalid_op(void); asmlinkage void xen_device_not_available(void); asmlinkage void xen_double_fault(void); asmlinkage void xen_coprocessor_segment_overrun(void); @@ -59,7 +57,6 @@ asmlinkage void xen_simd_coprocessor_error(void); dotraplinkage void do_debug(struct pt_regs *regs, long error_code); dotraplinkage void do_nmi(struct pt_regs *regs, long error_code); dotraplinkage void do_int3(struct pt_regs *regs, long error_code); -dotraplinkage void do_invalid_op(struct pt_regs *regs, long error_code); dotraplinkage void do_device_not_available(struct pt_regs *regs, long error_code); dotraplinkage void do_double_fault(struct pt_regs *regs, long error_code, unsigned long cr2); dotraplinkage void do_coprocessor_segment_overrun(struct pt_regs *regs, long error_code); @@ -84,6 +81,11 @@ struct bad_iret_stack *fixup_bad_iret(struct bad_iret_stack *s); void __init trap_init(void); #endif +#ifdef CONFIG_X86_F00F_BUG +/* For handling the FOOF bug */ +void handle_invalid_op(struct pt_regs *regs); +#endif + static inline int get_si_code(unsigned long condition) { if (condition & DR_STEP) diff --git a/arch/x86/kernel/idt.c b/arch/x86/kernel/idt.c index 87583b69cbc2..8b48f54aeff6 100644 --- a/arch/x86/kernel/idt.c +++ b/arch/x86/kernel/idt.c @@ -76,7 +76,7 @@ static const __initconst struct idt_data def_idts[] = { INTG(X86_TRAP_DE, asm_exc_divide_error), INTG(X86_TRAP_NMI, nmi), INTG(X86_TRAP_BR, asm_exc_bounds), - INTG(X86_TRAP_UD, invalid_op), + INTG(X86_TRAP_UD, asm_exc_invalid_op), INTG(X86_TRAP_NM, device_not_available), INTG(X86_TRAP_OLD_MF, coprocessor_segment_overrun), INTG(X86_TRAP_TS, invalid_TSS), diff --git a/arch/x86/kernel/traps.c b/arch/x86/kernel/traps.c index 7a9fb8b9e1a8..71ac43dc036a 100644 --- a/arch/x86/kernel/traps.c +++ b/arch/x86/kernel/traps.c @@ -231,6 +231,21 @@ DEFINE_IDTENTRY(exc_overflow) do_error_trap(regs, 0, "overflow", X86_TRAP_OF, SIGSEGV, 0, NULL); } +#ifdef CONFIG_X86_F00F_BUG +void handle_invalid_op(struct pt_regs *regs) +#else +static inline void handle_invalid_op(struct pt_regs *regs) +#endif +{ + do_error_trap(regs, 0, "invalid opcode", X86_TRAP_UD, SIGILL, + ILL_ILLOPN, error_get_trap_addr(regs)); +} + +DEFINE_IDTENTRY(exc_invalid_op) +{ + handle_invalid_op(regs); +} + #define IP ((void __user *)uprobe_get_trap_addr(regs)) #define DO_ERROR(trapnr, signr, sicode, addr, str, name) \ dotraplinkage void do_##name(struct pt_regs *regs, long error_code) \ @@ -238,7 +253,6 @@ dotraplinkage void do_##name(struct pt_regs *regs, long error_code) \ do_error_trap(regs, error_code, str, trapnr, signr, sicode, addr); \ } -DO_ERROR(X86_TRAP_UD, SIGILL, ILL_ILLOPN, IP, "invalid opcode", invalid_op) DO_ERROR(X86_TRAP_OLD_MF, SIGFPE, 0, NULL, "coprocessor segment overrun", coprocessor_segment_overrun) DO_ERROR(X86_TRAP_TS, SIGSEGV, 0, NULL, "invalid TSS", invalid_TSS) DO_ERROR(X86_TRAP_NP, SIGBUS, 0, NULL, "segment not present", segment_not_present) diff --git a/arch/x86/mm/fault.c b/arch/x86/mm/fault.c index 53db18615f31..d7b52a2a1bce 100644 --- a/arch/x86/mm/fault.c +++ b/arch/x86/mm/fault.c @@ -426,7 +426,7 @@ static int is_f00f_bug(struct pt_regs *regs, unsigned long address) nr = (address - idt_descr.address) >> 3; if (nr == 6) { - do_invalid_op(regs, 0); + handle_invalid_op(regs); return 1; } } diff --git a/arch/x86/xen/enlighten_pv.c b/arch/x86/xen/enlighten_pv.c index 29a52233b81d..5a0e60986e19 100644 --- a/arch/x86/xen/enlighten_pv.c +++ b/arch/x86/xen/enlighten_pv.c @@ -624,7 +624,7 @@ static struct trap_array_entry trap_array[] = { { page_fault, xen_page_fault, false }, TRAP_ENTRY(exc_divide_error, false ), TRAP_ENTRY(exc_bounds, false ), - { invalid_op, xen_invalid_op, false }, + TRAP_ENTRY(exc_invalid_op, false ), { device_not_available, xen_device_not_available, false }, { coprocessor_segment_overrun, xen_coprocessor_segment_overrun, false }, { invalid_TSS, xen_invalid_TSS, false }, diff --git a/arch/x86/xen/xen-asm_64.S b/arch/x86/xen/xen-asm_64.S index 2cdbf6a37e16..999f09e8bb09 100644 --- a/arch/x86/xen/xen-asm_64.S +++ b/arch/x86/xen/xen-asm_64.S @@ -35,7 +35,7 @@ xen_pv_trap int3 xen_pv_trap xennmi xen_pv_trap asm_exc_overflow xen_pv_trap asm_exc_bounds -xen_pv_trap invalid_op +xen_pv_trap asm_exc_invalid_op xen_pv_trap device_not_available xen_pv_trap double_fault xen_pv_trap coprocessor_segment_overrun -- cgit v1.2.3 From 866ae2ccee4ac092fea14f18d537205e14c5a904 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Tue, 25 Feb 2020 23:16:19 +0100 Subject: x86/entry: Convert Device not available exception to IDTENTRY Convert #NM to IDTENTRY: - Implement the C entry point with DEFINE_IDTENTRY - Emit the ASM stub with DECLARE_IDTENTRY - Remove the ASM idtentry in 64bit - Remove the open coded ASM entry code in 32bit - Fixup the XEN/PV code - Remove the old prototypes - Remove the RCU warning as the new entry macro ensures correctness No functional change. Signed-off-by: Thomas Gleixner Reviewed-by: Alexandre Chartre Acked-by: Andy Lutomirski Acked-by: Peter Zijlstra Link: https://lkml.kernel.org/r/20200505134905.056243863@linutronix.de --- arch/x86/entry/entry_32.S | 7 ------- arch/x86/entry/entry_64.S | 1 - arch/x86/include/asm/idtentry.h | 1 + arch/x86/include/asm/traps.h | 3 --- arch/x86/kernel/idt.c | 2 +- arch/x86/kernel/traps.c | 8 ++------ arch/x86/xen/enlighten_pv.c | 2 +- arch/x86/xen/xen-asm_64.S | 2 +- 8 files changed, 6 insertions(+), 20 deletions(-) (limited to 'arch') diff --git a/arch/x86/entry/entry_32.S b/arch/x86/entry/entry_32.S index 95a96020f9e4..7d7f2836c071 100644 --- a/arch/x86/entry/entry_32.S +++ b/arch/x86/entry/entry_32.S @@ -1311,13 +1311,6 @@ SYM_CODE_START(simd_coprocessor_error) jmp common_exception SYM_CODE_END(simd_coprocessor_error) -SYM_CODE_START(device_not_available) - ASM_CLAC - pushl $0 - pushl $do_device_not_available - jmp common_exception -SYM_CODE_END(device_not_available) - #ifdef CONFIG_PARAVIRT SYM_CODE_START(native_iret) iret diff --git a/arch/x86/entry/entry_64.S b/arch/x86/entry/entry_64.S index ebd5f9fa4f5c..d7cf00026d33 100644 --- a/arch/x86/entry/entry_64.S +++ b/arch/x86/entry/entry_64.S @@ -1073,7 +1073,6 @@ apicinterrupt IRQ_WORK_VECTOR irq_work_interrupt smp_irq_work_interrupt */ idtentry X86_TRAP_BP int3 do_int3 has_error_code=0 -idtentry X86_TRAP_NM device_not_available do_device_not_available has_error_code=0 idtentry X86_TRAP_OLD_MF coprocessor_segment_overrun do_coprocessor_segment_overrun has_error_code=0 idtentry X86_TRAP_TS invalid_TSS do_invalid_TSS has_error_code=1 idtentry X86_TRAP_NP segment_not_present do_segment_not_present has_error_code=1 diff --git a/arch/x86/include/asm/idtentry.h b/arch/x86/include/asm/idtentry.h index f34630f09864..fd6f996ce584 100644 --- a/arch/x86/include/asm/idtentry.h +++ b/arch/x86/include/asm/idtentry.h @@ -81,5 +81,6 @@ DECLARE_IDTENTRY(X86_TRAP_DE, exc_divide_error); DECLARE_IDTENTRY(X86_TRAP_OF, exc_overflow); DECLARE_IDTENTRY(X86_TRAP_BR, exc_bounds); DECLARE_IDTENTRY(X86_TRAP_UD, exc_invalid_op); +DECLARE_IDTENTRY(X86_TRAP_NM, exc_device_not_available); #endif diff --git a/arch/x86/include/asm/traps.h b/arch/x86/include/asm/traps.h index 71a4a7e6ba89..e5f2c90c0e8b 100644 --- a/arch/x86/include/asm/traps.h +++ b/arch/x86/include/asm/traps.h @@ -14,7 +14,6 @@ asmlinkage void debug(void); asmlinkage void nmi(void); asmlinkage void int3(void); -asmlinkage void device_not_available(void); #ifdef CONFIG_X86_64 asmlinkage void double_fault(void); #endif @@ -37,7 +36,6 @@ asmlinkage void simd_coprocessor_error(void); asmlinkage void xen_xennmi(void); asmlinkage void xen_xendebug(void); asmlinkage void xen_int3(void); -asmlinkage void xen_device_not_available(void); asmlinkage void xen_double_fault(void); asmlinkage void xen_coprocessor_segment_overrun(void); asmlinkage void xen_invalid_TSS(void); @@ -57,7 +55,6 @@ asmlinkage void xen_simd_coprocessor_error(void); dotraplinkage void do_debug(struct pt_regs *regs, long error_code); dotraplinkage void do_nmi(struct pt_regs *regs, long error_code); dotraplinkage void do_int3(struct pt_regs *regs, long error_code); -dotraplinkage void do_device_not_available(struct pt_regs *regs, long error_code); dotraplinkage void do_double_fault(struct pt_regs *regs, long error_code, unsigned long cr2); dotraplinkage void do_coprocessor_segment_overrun(struct pt_regs *regs, long error_code); dotraplinkage void do_invalid_TSS(struct pt_regs *regs, long error_code); diff --git a/arch/x86/kernel/idt.c b/arch/x86/kernel/idt.c index 8b48f54aeff6..cdc2d8bbd338 100644 --- a/arch/x86/kernel/idt.c +++ b/arch/x86/kernel/idt.c @@ -77,7 +77,7 @@ static const __initconst struct idt_data def_idts[] = { INTG(X86_TRAP_NMI, nmi), INTG(X86_TRAP_BR, asm_exc_bounds), INTG(X86_TRAP_UD, asm_exc_invalid_op), - INTG(X86_TRAP_NM, device_not_available), + INTG(X86_TRAP_NM, asm_exc_device_not_available), INTG(X86_TRAP_OLD_MF, coprocessor_segment_overrun), INTG(X86_TRAP_TS, invalid_TSS), INTG(X86_TRAP_NP, segment_not_present), diff --git a/arch/x86/kernel/traps.c b/arch/x86/kernel/traps.c index 71ac43dc036a..b8af5eb6a929 100644 --- a/arch/x86/kernel/traps.c +++ b/arch/x86/kernel/traps.c @@ -882,13 +882,10 @@ do_spurious_interrupt_bug(struct pt_regs *regs, long error_code) */ } -dotraplinkage void -do_device_not_available(struct pt_regs *regs, long error_code) +DEFINE_IDTENTRY(exc_device_not_available) { unsigned long cr0 = read_cr0(); - RCU_LOCKDEP_WARN(!rcu_is_watching(), "entry code didn't wake RCU"); - #ifdef CONFIG_MATH_EMULATION if (!boot_cpu_has(X86_FEATURE_FPU) && (cr0 & X86_CR0_EM)) { struct math_emu_info info = { }; @@ -913,10 +910,9 @@ do_device_not_available(struct pt_regs *regs, long error_code) * to kill the task than getting stuck in a never-ending * loop of #NM faults. */ - die("unexpected #NM exception", regs, error_code); + die("unexpected #NM exception", regs, 0); } } -NOKPROBE_SYMBOL(do_device_not_available); #ifdef CONFIG_X86_32 dotraplinkage void do_iret_error(struct pt_regs *regs, long error_code) diff --git a/arch/x86/xen/enlighten_pv.c b/arch/x86/xen/enlighten_pv.c index 5a0e60986e19..94e2f918403e 100644 --- a/arch/x86/xen/enlighten_pv.c +++ b/arch/x86/xen/enlighten_pv.c @@ -625,7 +625,7 @@ static struct trap_array_entry trap_array[] = { TRAP_ENTRY(exc_divide_error, false ), TRAP_ENTRY(exc_bounds, false ), TRAP_ENTRY(exc_invalid_op, false ), - { device_not_available, xen_device_not_available, false }, + TRAP_ENTRY(exc_device_not_available, false ), { coprocessor_segment_overrun, xen_coprocessor_segment_overrun, false }, { invalid_TSS, xen_invalid_TSS, false }, { segment_not_present, xen_segment_not_present, false }, diff --git a/arch/x86/xen/xen-asm_64.S b/arch/x86/xen/xen-asm_64.S index 999f09e8bb09..5215e5724cfb 100644 --- a/arch/x86/xen/xen-asm_64.S +++ b/arch/x86/xen/xen-asm_64.S @@ -36,7 +36,7 @@ xen_pv_trap xennmi xen_pv_trap asm_exc_overflow xen_pv_trap asm_exc_bounds xen_pv_trap asm_exc_invalid_op -xen_pv_trap device_not_available +xen_pv_trap asm_exc_device_not_available xen_pv_trap double_fault xen_pv_trap coprocessor_segment_overrun xen_pv_trap invalid_TSS -- cgit v1.2.3 From f95658fdb575233f79e3e7ed7ecf990389d31319 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Tue, 25 Feb 2020 23:16:20 +0100 Subject: x86/entry: Convert Coprocessor segment overrun exception to IDTENTRY Convert #OLD_MF to IDTENTRY: - Implement the C entry point with DEFINE_IDTENTRY - Emit the ASM stub with DECLARE_IDTENTRY - Remove the ASM idtentry in 64bit - Remove the open coded ASM entry code in 32bit - Fixup the XEN/PV code - Remove the old prototypes No functional change. Signed-off-by: Thomas Gleixner Reviewed-by: Alexandre Chartre Acked-by: Andy Lutomirski Acked-by: Peter Zijlstra Link: https://lkml.kernel.org/r/20200505134905.838823510@linutronix.de --- arch/x86/entry/entry_32.S | 7 ------- arch/x86/entry/entry_64.S | 1 - arch/x86/include/asm/idtentry.h | 1 + arch/x86/include/asm/traps.h | 3 --- arch/x86/kernel/idt.c | 2 +- arch/x86/kernel/traps.c | 7 ++++++- arch/x86/xen/enlighten_pv.c | 2 +- arch/x86/xen/xen-asm_64.S | 2 +- 8 files changed, 10 insertions(+), 15 deletions(-) (limited to 'arch') diff --git a/arch/x86/entry/entry_32.S b/arch/x86/entry/entry_32.S index 7d7f2836c071..916ce827f606 100644 --- a/arch/x86/entry/entry_32.S +++ b/arch/x86/entry/entry_32.S @@ -1318,13 +1318,6 @@ SYM_CODE_START(native_iret) SYM_CODE_END(native_iret) #endif -SYM_CODE_START(coprocessor_segment_overrun) - ASM_CLAC - pushl $0 - pushl $do_coprocessor_segment_overrun - jmp common_exception -SYM_CODE_END(coprocessor_segment_overrun) - SYM_CODE_START(invalid_TSS) ASM_CLAC pushl $do_invalid_TSS diff --git a/arch/x86/entry/entry_64.S b/arch/x86/entry/entry_64.S index d7cf00026d33..423c3a9783af 100644 --- a/arch/x86/entry/entry_64.S +++ b/arch/x86/entry/entry_64.S @@ -1073,7 +1073,6 @@ apicinterrupt IRQ_WORK_VECTOR irq_work_interrupt smp_irq_work_interrupt */ idtentry X86_TRAP_BP int3 do_int3 has_error_code=0 -idtentry X86_TRAP_OLD_MF coprocessor_segment_overrun do_coprocessor_segment_overrun has_error_code=0 idtentry X86_TRAP_TS invalid_TSS do_invalid_TSS has_error_code=1 idtentry X86_TRAP_NP segment_not_present do_segment_not_present has_error_code=1 idtentry X86_TRAP_SS stack_segment do_stack_segment has_error_code=1 diff --git a/arch/x86/include/asm/idtentry.h b/arch/x86/include/asm/idtentry.h index fd6f996ce584..0c3374012a50 100644 --- a/arch/x86/include/asm/idtentry.h +++ b/arch/x86/include/asm/idtentry.h @@ -82,5 +82,6 @@ DECLARE_IDTENTRY(X86_TRAP_OF, exc_overflow); DECLARE_IDTENTRY(X86_TRAP_BR, exc_bounds); DECLARE_IDTENTRY(X86_TRAP_UD, exc_invalid_op); DECLARE_IDTENTRY(X86_TRAP_NM, exc_device_not_available); +DECLARE_IDTENTRY(X86_TRAP_OLD_MF, exc_coproc_segment_overrun); #endif diff --git a/arch/x86/include/asm/traps.h b/arch/x86/include/asm/traps.h index e5f2c90c0e8b..5e9d402e6d6c 100644 --- a/arch/x86/include/asm/traps.h +++ b/arch/x86/include/asm/traps.h @@ -17,7 +17,6 @@ asmlinkage void int3(void); #ifdef CONFIG_X86_64 asmlinkage void double_fault(void); #endif -asmlinkage void coprocessor_segment_overrun(void); asmlinkage void invalid_TSS(void); asmlinkage void segment_not_present(void); asmlinkage void stack_segment(void); @@ -37,7 +36,6 @@ asmlinkage void xen_xennmi(void); asmlinkage void xen_xendebug(void); asmlinkage void xen_int3(void); asmlinkage void xen_double_fault(void); -asmlinkage void xen_coprocessor_segment_overrun(void); asmlinkage void xen_invalid_TSS(void); asmlinkage void xen_segment_not_present(void); asmlinkage void xen_stack_segment(void); @@ -56,7 +54,6 @@ dotraplinkage void do_debug(struct pt_regs *regs, long error_code); dotraplinkage void do_nmi(struct pt_regs *regs, long error_code); dotraplinkage void do_int3(struct pt_regs *regs, long error_code); dotraplinkage void do_double_fault(struct pt_regs *regs, long error_code, unsigned long cr2); -dotraplinkage void do_coprocessor_segment_overrun(struct pt_regs *regs, long error_code); dotraplinkage void do_invalid_TSS(struct pt_regs *regs, long error_code); dotraplinkage void do_segment_not_present(struct pt_regs *regs, long error_code); dotraplinkage void do_stack_segment(struct pt_regs *regs, long error_code); diff --git a/arch/x86/kernel/idt.c b/arch/x86/kernel/idt.c index cdc2d8bbd338..758d325103e8 100644 --- a/arch/x86/kernel/idt.c +++ b/arch/x86/kernel/idt.c @@ -78,7 +78,7 @@ static const __initconst struct idt_data def_idts[] = { INTG(X86_TRAP_BR, asm_exc_bounds), INTG(X86_TRAP_UD, asm_exc_invalid_op), INTG(X86_TRAP_NM, asm_exc_device_not_available), - INTG(X86_TRAP_OLD_MF, coprocessor_segment_overrun), + INTG(X86_TRAP_OLD_MF, asm_exc_coproc_segment_overrun), INTG(X86_TRAP_TS, invalid_TSS), INTG(X86_TRAP_NP, segment_not_present), INTG(X86_TRAP_SS, stack_segment), diff --git a/arch/x86/kernel/traps.c b/arch/x86/kernel/traps.c index b8af5eb6a929..3ce1f667d078 100644 --- a/arch/x86/kernel/traps.c +++ b/arch/x86/kernel/traps.c @@ -246,6 +246,12 @@ DEFINE_IDTENTRY(exc_invalid_op) handle_invalid_op(regs); } +DEFINE_IDTENTRY(exc_coproc_segment_overrun) +{ + do_error_trap(regs, 0, "coprocessor segment overrun", + X86_TRAP_OLD_MF, SIGFPE, 0, NULL); +} + #define IP ((void __user *)uprobe_get_trap_addr(regs)) #define DO_ERROR(trapnr, signr, sicode, addr, str, name) \ dotraplinkage void do_##name(struct pt_regs *regs, long error_code) \ @@ -253,7 +259,6 @@ dotraplinkage void do_##name(struct pt_regs *regs, long error_code) \ do_error_trap(regs, error_code, str, trapnr, signr, sicode, addr); \ } -DO_ERROR(X86_TRAP_OLD_MF, SIGFPE, 0, NULL, "coprocessor segment overrun", coprocessor_segment_overrun) DO_ERROR(X86_TRAP_TS, SIGSEGV, 0, NULL, "invalid TSS", invalid_TSS) DO_ERROR(X86_TRAP_NP, SIGBUS, 0, NULL, "segment not present", segment_not_present) DO_ERROR(X86_TRAP_SS, SIGBUS, 0, NULL, "stack segment", stack_segment) diff --git a/arch/x86/xen/enlighten_pv.c b/arch/x86/xen/enlighten_pv.c index 94e2f918403e..027cc3d9ad52 100644 --- a/arch/x86/xen/enlighten_pv.c +++ b/arch/x86/xen/enlighten_pv.c @@ -626,7 +626,7 @@ static struct trap_array_entry trap_array[] = { TRAP_ENTRY(exc_bounds, false ), TRAP_ENTRY(exc_invalid_op, false ), TRAP_ENTRY(exc_device_not_available, false ), - { coprocessor_segment_overrun, xen_coprocessor_segment_overrun, false }, + TRAP_ENTRY(exc_coproc_segment_overrun, false ), { invalid_TSS, xen_invalid_TSS, false }, { segment_not_present, xen_segment_not_present, false }, { stack_segment, xen_stack_segment, false }, diff --git a/arch/x86/xen/xen-asm_64.S b/arch/x86/xen/xen-asm_64.S index 5215e5724cfb..7ac9c269b994 100644 --- a/arch/x86/xen/xen-asm_64.S +++ b/arch/x86/xen/xen-asm_64.S @@ -38,7 +38,7 @@ xen_pv_trap asm_exc_bounds xen_pv_trap asm_exc_invalid_op xen_pv_trap asm_exc_device_not_available xen_pv_trap double_fault -xen_pv_trap coprocessor_segment_overrun +xen_pv_trap asm_exc_coproc_segment_overrun xen_pv_trap invalid_TSS xen_pv_trap segment_not_present xen_pv_trap stack_segment -- cgit v1.2.3 From aabfe5383ec7b480ca222ac05d49eb3c83dc022a Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Tue, 25 Feb 2020 23:16:21 +0100 Subject: x86/idtentry: Provide IDTENTRY_ERRORCODE Same as IDTENTRY but the C entry point has an error code argument. Signed-off-by: Thomas Gleixner Reviewed-by: Alexandre Chartre Acked-by: Andy Lutomirski Acked-by: Peter Zijlstra Link: https://lkml.kernel.org/r/20200505134905.258989060@linutronix.de --- arch/x86/include/asm/idtentry.h | 46 +++++++++++++++++++++++++++++++++++++++++ 1 file changed, 46 insertions(+) (limited to 'arch') diff --git a/arch/x86/include/asm/idtentry.h b/arch/x86/include/asm/idtentry.h index 0c3374012a50..f35d5c97ddfb 100644 --- a/arch/x86/include/asm/idtentry.h +++ b/arch/x86/include/asm/idtentry.h @@ -57,6 +57,49 @@ __visible noinstr void func(struct pt_regs *regs) \ \ static __always_inline void __##func(struct pt_regs *regs) +/** + * DECLARE_IDTENTRY_ERRORCODE - Declare functions for simple IDT entry points + * Error code pushed by hardware + * @vector: Vector number (ignored for C) + * @func: Function name of the entry point + * + * Declares three functions: + * - The ASM entry point: asm_##func + * - The XEN PV trap entry point: xen_##func (maybe unused) + * - The C handler called from the ASM entry point + * + * Same as DECLARE_IDTENTRY, but has an extra error_code argument for the + * C-handler. + */ +#define DECLARE_IDTENTRY_ERRORCODE(vector, func) \ + asmlinkage void asm_##func(void); \ + asmlinkage void xen_asm_##func(void); \ + __visible void func(struct pt_regs *regs, unsigned long error_code) + +/** + * DEFINE_IDTENTRY_ERRORCODE - Emit code for simple IDT entry points + * Error code pushed by hardware + * @func: Function name of the entry point + * + * Same as DEFINE_IDTENTRY, but has an extra error_code argument + */ +#define DEFINE_IDTENTRY_ERRORCODE(func) \ +static __always_inline void __##func(struct pt_regs *regs, \ + unsigned long error_code); \ + \ +__visible noinstr void func(struct pt_regs *regs, \ + unsigned long error_code) \ +{ \ + idtentry_enter(regs); \ + instrumentation_begin(); \ + __##func (regs, error_code); \ + instrumentation_end(); \ + idtentry_exit(regs); \ +} \ + \ +static __always_inline void __##func(struct pt_regs *regs, \ + unsigned long error_code) + #else /* !__ASSEMBLY__ */ /* @@ -65,6 +108,9 @@ static __always_inline void __##func(struct pt_regs *regs) #define DECLARE_IDTENTRY(vector, func) \ idtentry vector asm_##func func has_error_code=0 sane=1 +#define DECLARE_IDTENTRY_ERRORCODE(vector, func) \ + idtentry vector asm_##func func has_error_code=1 sane=1 + #endif /* __ASSEMBLY__ */ /* -- cgit v1.2.3 From 97b3d290b865cf9115f7d37d40b7482efba4d46d Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Tue, 25 Feb 2020 23:16:22 +0100 Subject: x86/entry: Convert Invalid TSS exception to IDTENTRY Convert #TS to IDTENTRY_ERRORCODE: - Implement the C entry point with DEFINE_IDTENTRY - Emit the ASM stub with DECLARE_IDTENTRY - Remove the ASM idtentry in 64bit - Remove the open coded ASM entry code in 32bit - Fixup the XEN/PV code - Remove the old prototypes No functional change. Signed-off-by: Thomas Gleixner Reviewed-by: Alexandre Chartre Acked-by: Andy Lutomirski Acked-by: Peter Zijlstra Link: https://lkml.kernel.org/r/20200505134905.350676449@linutronix.de --- arch/x86/entry/entry_32.S | 6 ------ arch/x86/entry/entry_64.S | 1 - arch/x86/include/asm/idtentry.h | 3 +++ arch/x86/include/asm/traps.h | 3 --- arch/x86/kernel/idt.c | 2 +- arch/x86/kernel/traps.c | 7 ++++++- arch/x86/xen/enlighten_pv.c | 2 +- arch/x86/xen/xen-asm_64.S | 2 +- 8 files changed, 12 insertions(+), 14 deletions(-) (limited to 'arch') diff --git a/arch/x86/entry/entry_32.S b/arch/x86/entry/entry_32.S index 916ce827f606..2143a6208afc 100644 --- a/arch/x86/entry/entry_32.S +++ b/arch/x86/entry/entry_32.S @@ -1318,12 +1318,6 @@ SYM_CODE_START(native_iret) SYM_CODE_END(native_iret) #endif -SYM_CODE_START(invalid_TSS) - ASM_CLAC - pushl $do_invalid_TSS - jmp common_exception -SYM_CODE_END(invalid_TSS) - SYM_CODE_START(segment_not_present) ASM_CLAC pushl $do_segment_not_present diff --git a/arch/x86/entry/entry_64.S b/arch/x86/entry/entry_64.S index 423c3a9783af..07307cf08c2d 100644 --- a/arch/x86/entry/entry_64.S +++ b/arch/x86/entry/entry_64.S @@ -1073,7 +1073,6 @@ apicinterrupt IRQ_WORK_VECTOR irq_work_interrupt smp_irq_work_interrupt */ idtentry X86_TRAP_BP int3 do_int3 has_error_code=0 -idtentry X86_TRAP_TS invalid_TSS do_invalid_TSS has_error_code=1 idtentry X86_TRAP_NP segment_not_present do_segment_not_present has_error_code=1 idtentry X86_TRAP_SS stack_segment do_stack_segment has_error_code=1 idtentry X86_TRAP_GP general_protection do_general_protection has_error_code=1 diff --git a/arch/x86/include/asm/idtentry.h b/arch/x86/include/asm/idtentry.h index f35d5c97ddfb..aa0d4656684f 100644 --- a/arch/x86/include/asm/idtentry.h +++ b/arch/x86/include/asm/idtentry.h @@ -130,4 +130,7 @@ DECLARE_IDTENTRY(X86_TRAP_UD, exc_invalid_op); DECLARE_IDTENTRY(X86_TRAP_NM, exc_device_not_available); DECLARE_IDTENTRY(X86_TRAP_OLD_MF, exc_coproc_segment_overrun); +/* Simple exception entries with error code pushed by hardware */ +DECLARE_IDTENTRY_ERRORCODE(X86_TRAP_TS, exc_invalid_tss); + #endif diff --git a/arch/x86/include/asm/traps.h b/arch/x86/include/asm/traps.h index 5e9d402e6d6c..30bc589e9f5a 100644 --- a/arch/x86/include/asm/traps.h +++ b/arch/x86/include/asm/traps.h @@ -17,7 +17,6 @@ asmlinkage void int3(void); #ifdef CONFIG_X86_64 asmlinkage void double_fault(void); #endif -asmlinkage void invalid_TSS(void); asmlinkage void segment_not_present(void); asmlinkage void stack_segment(void); asmlinkage void general_protection(void); @@ -36,7 +35,6 @@ asmlinkage void xen_xennmi(void); asmlinkage void xen_xendebug(void); asmlinkage void xen_int3(void); asmlinkage void xen_double_fault(void); -asmlinkage void xen_invalid_TSS(void); asmlinkage void xen_segment_not_present(void); asmlinkage void xen_stack_segment(void); asmlinkage void xen_general_protection(void); @@ -54,7 +52,6 @@ dotraplinkage void do_debug(struct pt_regs *regs, long error_code); dotraplinkage void do_nmi(struct pt_regs *regs, long error_code); dotraplinkage void do_int3(struct pt_regs *regs, long error_code); dotraplinkage void do_double_fault(struct pt_regs *regs, long error_code, unsigned long cr2); -dotraplinkage void do_invalid_TSS(struct pt_regs *regs, long error_code); dotraplinkage void do_segment_not_present(struct pt_regs *regs, long error_code); dotraplinkage void do_stack_segment(struct pt_regs *regs, long error_code); dotraplinkage void do_general_protection(struct pt_regs *regs, long error_code); diff --git a/arch/x86/kernel/idt.c b/arch/x86/kernel/idt.c index 758d325103e8..caa740df1404 100644 --- a/arch/x86/kernel/idt.c +++ b/arch/x86/kernel/idt.c @@ -79,7 +79,7 @@ static const __initconst struct idt_data def_idts[] = { INTG(X86_TRAP_UD, asm_exc_invalid_op), INTG(X86_TRAP_NM, asm_exc_device_not_available), INTG(X86_TRAP_OLD_MF, asm_exc_coproc_segment_overrun), - INTG(X86_TRAP_TS, invalid_TSS), + INTG(X86_TRAP_TS, asm_exc_invalid_tss), INTG(X86_TRAP_NP, segment_not_present), INTG(X86_TRAP_SS, stack_segment), INTG(X86_TRAP_GP, general_protection), diff --git a/arch/x86/kernel/traps.c b/arch/x86/kernel/traps.c index 3ce1f667d078..10ab0837668f 100644 --- a/arch/x86/kernel/traps.c +++ b/arch/x86/kernel/traps.c @@ -252,6 +252,12 @@ DEFINE_IDTENTRY(exc_coproc_segment_overrun) X86_TRAP_OLD_MF, SIGFPE, 0, NULL); } +DEFINE_IDTENTRY_ERRORCODE(exc_invalid_tss) +{ + do_error_trap(regs, error_code, "invalid TSS", X86_TRAP_TS, SIGSEGV, + 0, NULL); +} + #define IP ((void __user *)uprobe_get_trap_addr(regs)) #define DO_ERROR(trapnr, signr, sicode, addr, str, name) \ dotraplinkage void do_##name(struct pt_regs *regs, long error_code) \ @@ -259,7 +265,6 @@ dotraplinkage void do_##name(struct pt_regs *regs, long error_code) \ do_error_trap(regs, error_code, str, trapnr, signr, sicode, addr); \ } -DO_ERROR(X86_TRAP_TS, SIGSEGV, 0, NULL, "invalid TSS", invalid_TSS) DO_ERROR(X86_TRAP_NP, SIGBUS, 0, NULL, "segment not present", segment_not_present) DO_ERROR(X86_TRAP_SS, SIGBUS, 0, NULL, "stack segment", stack_segment) #undef IP diff --git a/arch/x86/xen/enlighten_pv.c b/arch/x86/xen/enlighten_pv.c index 027cc3d9ad52..650ce235585c 100644 --- a/arch/x86/xen/enlighten_pv.c +++ b/arch/x86/xen/enlighten_pv.c @@ -627,7 +627,7 @@ static struct trap_array_entry trap_array[] = { TRAP_ENTRY(exc_invalid_op, false ), TRAP_ENTRY(exc_device_not_available, false ), TRAP_ENTRY(exc_coproc_segment_overrun, false ), - { invalid_TSS, xen_invalid_TSS, false }, + TRAP_ENTRY(exc_invalid_tss, false ), { segment_not_present, xen_segment_not_present, false }, { stack_segment, xen_stack_segment, false }, { general_protection, xen_general_protection, false }, diff --git a/arch/x86/xen/xen-asm_64.S b/arch/x86/xen/xen-asm_64.S index 7ac9c269b994..f7a890c95db7 100644 --- a/arch/x86/xen/xen-asm_64.S +++ b/arch/x86/xen/xen-asm_64.S @@ -39,7 +39,7 @@ xen_pv_trap asm_exc_invalid_op xen_pv_trap asm_exc_device_not_available xen_pv_trap double_fault xen_pv_trap asm_exc_coproc_segment_overrun -xen_pv_trap invalid_TSS +xen_pv_trap asm_exc_invalid_tss xen_pv_trap segment_not_present xen_pv_trap stack_segment xen_pv_trap general_protection -- cgit v1.2.3 From 99a3fb8d01af1085f16a417a748e0a462dc92d29 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Tue, 25 Feb 2020 23:16:23 +0100 Subject: x86/entry: Convert Segment not present exception to IDTENTRY Convert #NP to IDTENTRY_ERRORCODE: - Implement the C entry point with DEFINE_IDTENTRY - Emit the ASM stub with DECLARE_IDTENTRY - Remove the ASM idtentry in 64bit - Remove the open coded ASM entry code in 32bit - Fixup the XEN/PV code - Remove the old prototypes No functional change. Signed-off-by: Thomas Gleixner Reviewed-by: Alexandre Chartre Acked-by: Andy Lutomirski Acked-by: Peter Zijlstra Link: https://lkml.kernel.org/r/20200505134905.443591450@linutronix.de --- arch/x86/entry/entry_32.S | 6 ------ arch/x86/entry/entry_64.S | 1 - arch/x86/include/asm/idtentry.h | 1 + arch/x86/include/asm/traps.h | 3 --- arch/x86/kernel/idt.c | 2 +- arch/x86/kernel/traps.c | 7 ++++++- arch/x86/xen/enlighten_pv.c | 2 +- arch/x86/xen/xen-asm_64.S | 2 +- 8 files changed, 10 insertions(+), 14 deletions(-) (limited to 'arch') diff --git a/arch/x86/entry/entry_32.S b/arch/x86/entry/entry_32.S index 2143a6208afc..b01dbb3f616b 100644 --- a/arch/x86/entry/entry_32.S +++ b/arch/x86/entry/entry_32.S @@ -1318,12 +1318,6 @@ SYM_CODE_START(native_iret) SYM_CODE_END(native_iret) #endif -SYM_CODE_START(segment_not_present) - ASM_CLAC - pushl $do_segment_not_present - jmp common_exception -SYM_CODE_END(segment_not_present) - SYM_CODE_START(stack_segment) ASM_CLAC pushl $do_stack_segment diff --git a/arch/x86/entry/entry_64.S b/arch/x86/entry/entry_64.S index 07307cf08c2d..367a207603a6 100644 --- a/arch/x86/entry/entry_64.S +++ b/arch/x86/entry/entry_64.S @@ -1073,7 +1073,6 @@ apicinterrupt IRQ_WORK_VECTOR irq_work_interrupt smp_irq_work_interrupt */ idtentry X86_TRAP_BP int3 do_int3 has_error_code=0 -idtentry X86_TRAP_NP segment_not_present do_segment_not_present has_error_code=1 idtentry X86_TRAP_SS stack_segment do_stack_segment has_error_code=1 idtentry X86_TRAP_GP general_protection do_general_protection has_error_code=1 idtentry X86_TRAP_SPURIOUS spurious_interrupt_bug do_spurious_interrupt_bug has_error_code=0 diff --git a/arch/x86/include/asm/idtentry.h b/arch/x86/include/asm/idtentry.h index aa0d4656684f..d517c09e8d0b 100644 --- a/arch/x86/include/asm/idtentry.h +++ b/arch/x86/include/asm/idtentry.h @@ -132,5 +132,6 @@ DECLARE_IDTENTRY(X86_TRAP_OLD_MF, exc_coproc_segment_overrun); /* Simple exception entries with error code pushed by hardware */ DECLARE_IDTENTRY_ERRORCODE(X86_TRAP_TS, exc_invalid_tss); +DECLARE_IDTENTRY_ERRORCODE(X86_TRAP_NP, exc_segment_not_present); #endif diff --git a/arch/x86/include/asm/traps.h b/arch/x86/include/asm/traps.h index 30bc589e9f5a..970c88894fc4 100644 --- a/arch/x86/include/asm/traps.h +++ b/arch/x86/include/asm/traps.h @@ -17,7 +17,6 @@ asmlinkage void int3(void); #ifdef CONFIG_X86_64 asmlinkage void double_fault(void); #endif -asmlinkage void segment_not_present(void); asmlinkage void stack_segment(void); asmlinkage void general_protection(void); asmlinkage void page_fault(void); @@ -35,7 +34,6 @@ asmlinkage void xen_xennmi(void); asmlinkage void xen_xendebug(void); asmlinkage void xen_int3(void); asmlinkage void xen_double_fault(void); -asmlinkage void xen_segment_not_present(void); asmlinkage void xen_stack_segment(void); asmlinkage void xen_general_protection(void); asmlinkage void xen_page_fault(void); @@ -52,7 +50,6 @@ dotraplinkage void do_debug(struct pt_regs *regs, long error_code); dotraplinkage void do_nmi(struct pt_regs *regs, long error_code); dotraplinkage void do_int3(struct pt_regs *regs, long error_code); dotraplinkage void do_double_fault(struct pt_regs *regs, long error_code, unsigned long cr2); -dotraplinkage void do_segment_not_present(struct pt_regs *regs, long error_code); dotraplinkage void do_stack_segment(struct pt_regs *regs, long error_code); dotraplinkage void do_general_protection(struct pt_regs *regs, long error_code); dotraplinkage void do_page_fault(struct pt_regs *regs, unsigned long error_code, unsigned long address); diff --git a/arch/x86/kernel/idt.c b/arch/x86/kernel/idt.c index caa740df1404..b9acc7f5684a 100644 --- a/arch/x86/kernel/idt.c +++ b/arch/x86/kernel/idt.c @@ -80,7 +80,7 @@ static const __initconst struct idt_data def_idts[] = { INTG(X86_TRAP_NM, asm_exc_device_not_available), INTG(X86_TRAP_OLD_MF, asm_exc_coproc_segment_overrun), INTG(X86_TRAP_TS, asm_exc_invalid_tss), - INTG(X86_TRAP_NP, segment_not_present), + INTG(X86_TRAP_NP, asm_exc_segment_not_present), INTG(X86_TRAP_SS, stack_segment), INTG(X86_TRAP_GP, general_protection), INTG(X86_TRAP_SPURIOUS, spurious_interrupt_bug), diff --git a/arch/x86/kernel/traps.c b/arch/x86/kernel/traps.c index 10ab0837668f..88ba5f0400fd 100644 --- a/arch/x86/kernel/traps.c +++ b/arch/x86/kernel/traps.c @@ -258,6 +258,12 @@ DEFINE_IDTENTRY_ERRORCODE(exc_invalid_tss) 0, NULL); } +DEFINE_IDTENTRY_ERRORCODE(exc_segment_not_present) +{ + do_error_trap(regs, error_code, "segment not present", X86_TRAP_NP, + SIGBUS, 0, NULL); +} + #define IP ((void __user *)uprobe_get_trap_addr(regs)) #define DO_ERROR(trapnr, signr, sicode, addr, str, name) \ dotraplinkage void do_##name(struct pt_regs *regs, long error_code) \ @@ -265,7 +271,6 @@ dotraplinkage void do_##name(struct pt_regs *regs, long error_code) \ do_error_trap(regs, error_code, str, trapnr, signr, sicode, addr); \ } -DO_ERROR(X86_TRAP_NP, SIGBUS, 0, NULL, "segment not present", segment_not_present) DO_ERROR(X86_TRAP_SS, SIGBUS, 0, NULL, "stack segment", stack_segment) #undef IP diff --git a/arch/x86/xen/enlighten_pv.c b/arch/x86/xen/enlighten_pv.c index 650ce235585c..4e2a41ebc369 100644 --- a/arch/x86/xen/enlighten_pv.c +++ b/arch/x86/xen/enlighten_pv.c @@ -628,7 +628,7 @@ static struct trap_array_entry trap_array[] = { TRAP_ENTRY(exc_device_not_available, false ), TRAP_ENTRY(exc_coproc_segment_overrun, false ), TRAP_ENTRY(exc_invalid_tss, false ), - { segment_not_present, xen_segment_not_present, false }, + TRAP_ENTRY(exc_segment_not_present, false ), { stack_segment, xen_stack_segment, false }, { general_protection, xen_general_protection, false }, { spurious_interrupt_bug, xen_spurious_interrupt_bug, false }, diff --git a/arch/x86/xen/xen-asm_64.S b/arch/x86/xen/xen-asm_64.S index f7a890c95db7..c8ce7ad71202 100644 --- a/arch/x86/xen/xen-asm_64.S +++ b/arch/x86/xen/xen-asm_64.S @@ -40,7 +40,7 @@ xen_pv_trap asm_exc_device_not_available xen_pv_trap double_fault xen_pv_trap asm_exc_coproc_segment_overrun xen_pv_trap asm_exc_invalid_tss -xen_pv_trap segment_not_present +xen_pv_trap asm_exc_segment_not_present xen_pv_trap stack_segment xen_pv_trap general_protection xen_pv_trap page_fault -- cgit v1.2.3 From fd9689bf91131c4bea5ea54f828af5267f5ed6a0 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Tue, 25 Feb 2020 23:16:24 +0100 Subject: x86/entry: Convert Stack segment exception to IDTENTRY Convert #SS to IDTENTRY_ERRORCODE: - Implement the C entry point with DEFINE_IDTENTRY - Emit the ASM stub with DECLARE_IDTENTRY - Remove the ASM idtentry in 64bit - Remove the open coded ASM entry code in 32bit - Fixup the XEN/PV code - Remove the old prototypes No functional change. Signed-off-by: Thomas Gleixner Reviewed-by: Alexandre Chartre Acked-by: Peter Zijlstra Acked-by: Andy Lutomirski Link: https://lkml.kernel.org/r/20200505134905.539867572@linutronix.de --- arch/x86/entry/entry_32.S | 6 ------ arch/x86/entry/entry_64.S | 1 - arch/x86/include/asm/idtentry.h | 1 + arch/x86/include/asm/traps.h | 3 --- arch/x86/kernel/idt.c | 2 +- arch/x86/kernel/traps.c | 12 ++++-------- arch/x86/xen/enlighten_pv.c | 2 +- arch/x86/xen/xen-asm_64.S | 2 +- 8 files changed, 8 insertions(+), 21 deletions(-) (limited to 'arch') diff --git a/arch/x86/entry/entry_32.S b/arch/x86/entry/entry_32.S index b01dbb3f616b..ffe43d2f8fe9 100644 --- a/arch/x86/entry/entry_32.S +++ b/arch/x86/entry/entry_32.S @@ -1318,12 +1318,6 @@ SYM_CODE_START(native_iret) SYM_CODE_END(native_iret) #endif -SYM_CODE_START(stack_segment) - ASM_CLAC - pushl $do_stack_segment - jmp common_exception -SYM_CODE_END(stack_segment) - SYM_CODE_START(alignment_check) ASM_CLAC pushl $do_alignment_check diff --git a/arch/x86/entry/entry_64.S b/arch/x86/entry/entry_64.S index 367a207603a6..c92592de3370 100644 --- a/arch/x86/entry/entry_64.S +++ b/arch/x86/entry/entry_64.S @@ -1073,7 +1073,6 @@ apicinterrupt IRQ_WORK_VECTOR irq_work_interrupt smp_irq_work_interrupt */ idtentry X86_TRAP_BP int3 do_int3 has_error_code=0 -idtentry X86_TRAP_SS stack_segment do_stack_segment has_error_code=1 idtentry X86_TRAP_GP general_protection do_general_protection has_error_code=1 idtentry X86_TRAP_SPURIOUS spurious_interrupt_bug do_spurious_interrupt_bug has_error_code=0 idtentry X86_TRAP_MF coprocessor_error do_coprocessor_error has_error_code=0 diff --git a/arch/x86/include/asm/idtentry.h b/arch/x86/include/asm/idtentry.h index d517c09e8d0b..4c0abd31db5b 100644 --- a/arch/x86/include/asm/idtentry.h +++ b/arch/x86/include/asm/idtentry.h @@ -133,5 +133,6 @@ DECLARE_IDTENTRY(X86_TRAP_OLD_MF, exc_coproc_segment_overrun); /* Simple exception entries with error code pushed by hardware */ DECLARE_IDTENTRY_ERRORCODE(X86_TRAP_TS, exc_invalid_tss); DECLARE_IDTENTRY_ERRORCODE(X86_TRAP_NP, exc_segment_not_present); +DECLARE_IDTENTRY_ERRORCODE(X86_TRAP_SS, exc_stack_segment); #endif diff --git a/arch/x86/include/asm/traps.h b/arch/x86/include/asm/traps.h index 970c88894fc4..5e580ff1dcf9 100644 --- a/arch/x86/include/asm/traps.h +++ b/arch/x86/include/asm/traps.h @@ -17,7 +17,6 @@ asmlinkage void int3(void); #ifdef CONFIG_X86_64 asmlinkage void double_fault(void); #endif -asmlinkage void stack_segment(void); asmlinkage void general_protection(void); asmlinkage void page_fault(void); asmlinkage void async_page_fault(void); @@ -34,7 +33,6 @@ asmlinkage void xen_xennmi(void); asmlinkage void xen_xendebug(void); asmlinkage void xen_int3(void); asmlinkage void xen_double_fault(void); -asmlinkage void xen_stack_segment(void); asmlinkage void xen_general_protection(void); asmlinkage void xen_page_fault(void); asmlinkage void xen_spurious_interrupt_bug(void); @@ -50,7 +48,6 @@ dotraplinkage void do_debug(struct pt_regs *regs, long error_code); dotraplinkage void do_nmi(struct pt_regs *regs, long error_code); dotraplinkage void do_int3(struct pt_regs *regs, long error_code); dotraplinkage void do_double_fault(struct pt_regs *regs, long error_code, unsigned long cr2); -dotraplinkage void do_stack_segment(struct pt_regs *regs, long error_code); dotraplinkage void do_general_protection(struct pt_regs *regs, long error_code); dotraplinkage void do_page_fault(struct pt_regs *regs, unsigned long error_code, unsigned long address); dotraplinkage void do_spurious_interrupt_bug(struct pt_regs *regs, long error_code); diff --git a/arch/x86/kernel/idt.c b/arch/x86/kernel/idt.c index b9acc7f5684a..8d95cbf56624 100644 --- a/arch/x86/kernel/idt.c +++ b/arch/x86/kernel/idt.c @@ -81,7 +81,7 @@ static const __initconst struct idt_data def_idts[] = { INTG(X86_TRAP_OLD_MF, asm_exc_coproc_segment_overrun), INTG(X86_TRAP_TS, asm_exc_invalid_tss), INTG(X86_TRAP_NP, asm_exc_segment_not_present), - INTG(X86_TRAP_SS, stack_segment), + INTG(X86_TRAP_SS, asm_exc_stack_segment), INTG(X86_TRAP_GP, general_protection), INTG(X86_TRAP_SPURIOUS, spurious_interrupt_bug), INTG(X86_TRAP_MF, coprocessor_error), diff --git a/arch/x86/kernel/traps.c b/arch/x86/kernel/traps.c index 88ba5f0400fd..3dfdc4d3de87 100644 --- a/arch/x86/kernel/traps.c +++ b/arch/x86/kernel/traps.c @@ -264,16 +264,12 @@ DEFINE_IDTENTRY_ERRORCODE(exc_segment_not_present) SIGBUS, 0, NULL); } -#define IP ((void __user *)uprobe_get_trap_addr(regs)) -#define DO_ERROR(trapnr, signr, sicode, addr, str, name) \ -dotraplinkage void do_##name(struct pt_regs *regs, long error_code) \ -{ \ - do_error_trap(regs, error_code, str, trapnr, signr, sicode, addr); \ +DEFINE_IDTENTRY_ERRORCODE(exc_stack_segment) +{ + do_error_trap(regs, error_code, "stack segment", X86_TRAP_SS, SIGBUS, + 0, NULL); } -DO_ERROR(X86_TRAP_SS, SIGBUS, 0, NULL, "stack segment", stack_segment) -#undef IP - dotraplinkage void do_alignment_check(struct pt_regs *regs, long error_code) { char *str = "alignment check"; diff --git a/arch/x86/xen/enlighten_pv.c b/arch/x86/xen/enlighten_pv.c index 4e2a41ebc369..8290f39d2839 100644 --- a/arch/x86/xen/enlighten_pv.c +++ b/arch/x86/xen/enlighten_pv.c @@ -629,7 +629,7 @@ static struct trap_array_entry trap_array[] = { TRAP_ENTRY(exc_coproc_segment_overrun, false ), TRAP_ENTRY(exc_invalid_tss, false ), TRAP_ENTRY(exc_segment_not_present, false ), - { stack_segment, xen_stack_segment, false }, + TRAP_ENTRY(exc_stack_segment, false ), { general_protection, xen_general_protection, false }, { spurious_interrupt_bug, xen_spurious_interrupt_bug, false }, { coprocessor_error, xen_coprocessor_error, false }, diff --git a/arch/x86/xen/xen-asm_64.S b/arch/x86/xen/xen-asm_64.S index c8ce7ad71202..0ecc0559f657 100644 --- a/arch/x86/xen/xen-asm_64.S +++ b/arch/x86/xen/xen-asm_64.S @@ -41,7 +41,7 @@ xen_pv_trap double_fault xen_pv_trap asm_exc_coproc_segment_overrun xen_pv_trap asm_exc_invalid_tss xen_pv_trap asm_exc_segment_not_present -xen_pv_trap stack_segment +xen_pv_trap asm_exc_stack_segment xen_pv_trap general_protection xen_pv_trap page_fault xen_pv_trap spurious_interrupt_bug -- cgit v1.2.3 From be4c11afbb6d5317274e61fda0edf744080fb72b Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Tue, 25 Feb 2020 23:16:25 +0100 Subject: x86/entry: Convert General protection exception to IDTENTRY Convert #GP to IDTENTRY_ERRORCODE: - Implement the C entry point with DEFINE_IDTENTRY - Emit the ASM stub with DECLARE_IDTENTRY - Remove the ASM idtentry in 64bit - Remove the open coded ASM entry code in 32bit - Fixup the XEN/PV code - Remove the old prototypes - Remove the RCU warning as the new entry macro ensures correctness No functional change. Signed-off-by: Thomas Gleixner Reviewed-by: Alexandre Chartre Acked-by: Peter Zijlstra Acked-by: Andy Lutomirski Link: https://lkml.kernel.org/r/20200505134905.637269946@linutronix.de --- arch/x86/entry/entry_32.S | 8 +------- arch/x86/entry/entry_64.S | 3 +-- arch/x86/include/asm/idtentry.h | 1 + arch/x86/include/asm/traps.h | 3 --- arch/x86/kernel/idt.c | 2 +- arch/x86/kernel/traps.c | 8 +++----- arch/x86/xen/enlighten_pv.c | 2 +- arch/x86/xen/xen-asm_64.S | 2 +- 8 files changed, 9 insertions(+), 20 deletions(-) (limited to 'arch') diff --git a/arch/x86/entry/entry_32.S b/arch/x86/entry/entry_32.S index ffe43d2f8fe9..9d94a036bf35 100644 --- a/arch/x86/entry/entry_32.S +++ b/arch/x86/entry/entry_32.S @@ -1302,7 +1302,7 @@ SYM_CODE_START(simd_coprocessor_error) pushl $0 #ifdef CONFIG_X86_INVD_BUG /* AMD 486 bug: invd from userspace calls exception 19 instead of #GP */ - ALTERNATIVE "pushl $do_general_protection", \ + ALTERNATIVE "pushl $exc_general_protection", \ "pushl $do_simd_coprocessor_error", \ X86_FEATURE_XMM #else @@ -1690,12 +1690,6 @@ SYM_CODE_START(int3) jmp common_exception SYM_CODE_END(int3) -SYM_CODE_START(general_protection) - ASM_CLAC - pushl $do_general_protection - jmp common_exception -SYM_CODE_END(general_protection) - .pushsection .text, "ax" SYM_CODE_START(rewind_stack_do_exit) /* Prevent any naive code from trying to unwind to our caller. */ diff --git a/arch/x86/entry/entry_64.S b/arch/x86/entry/entry_64.S index c92592de3370..5cecdd18f548 100644 --- a/arch/x86/entry/entry_64.S +++ b/arch/x86/entry/entry_64.S @@ -1073,7 +1073,6 @@ apicinterrupt IRQ_WORK_VECTOR irq_work_interrupt smp_irq_work_interrupt */ idtentry X86_TRAP_BP int3 do_int3 has_error_code=0 -idtentry X86_TRAP_GP general_protection do_general_protection has_error_code=1 idtentry X86_TRAP_SPURIOUS spurious_interrupt_bug do_spurious_interrupt_bug has_error_code=0 idtentry X86_TRAP_MF coprocessor_error do_coprocessor_error has_error_code=0 idtentry X86_TRAP_AC alignment_check do_alignment_check has_error_code=1 @@ -1209,7 +1208,7 @@ SYM_CODE_START(xen_failsafe_callback) addq $0x30, %rsp pushq $0 /* RIP */ UNWIND_HINT_IRET_REGS offset=8 - jmp general_protection + jmp asm_exc_general_protection 1: /* Segment mismatch => Category 1 (Bad segment). Retry the IRET. */ movq (%rsp), %rcx movq 8(%rsp), %r11 diff --git a/arch/x86/include/asm/idtentry.h b/arch/x86/include/asm/idtentry.h index 4c0abd31db5b..986fc655d2ab 100644 --- a/arch/x86/include/asm/idtentry.h +++ b/arch/x86/include/asm/idtentry.h @@ -134,5 +134,6 @@ DECLARE_IDTENTRY(X86_TRAP_OLD_MF, exc_coproc_segment_overrun); DECLARE_IDTENTRY_ERRORCODE(X86_TRAP_TS, exc_invalid_tss); DECLARE_IDTENTRY_ERRORCODE(X86_TRAP_NP, exc_segment_not_present); DECLARE_IDTENTRY_ERRORCODE(X86_TRAP_SS, exc_stack_segment); +DECLARE_IDTENTRY_ERRORCODE(X86_TRAP_GP, exc_general_protection); #endif diff --git a/arch/x86/include/asm/traps.h b/arch/x86/include/asm/traps.h index 5e580ff1dcf9..3a096a49b343 100644 --- a/arch/x86/include/asm/traps.h +++ b/arch/x86/include/asm/traps.h @@ -17,7 +17,6 @@ asmlinkage void int3(void); #ifdef CONFIG_X86_64 asmlinkage void double_fault(void); #endif -asmlinkage void general_protection(void); asmlinkage void page_fault(void); asmlinkage void async_page_fault(void); asmlinkage void spurious_interrupt_bug(void); @@ -33,7 +32,6 @@ asmlinkage void xen_xennmi(void); asmlinkage void xen_xendebug(void); asmlinkage void xen_int3(void); asmlinkage void xen_double_fault(void); -asmlinkage void xen_general_protection(void); asmlinkage void xen_page_fault(void); asmlinkage void xen_spurious_interrupt_bug(void); asmlinkage void xen_coprocessor_error(void); @@ -48,7 +46,6 @@ dotraplinkage void do_debug(struct pt_regs *regs, long error_code); dotraplinkage void do_nmi(struct pt_regs *regs, long error_code); dotraplinkage void do_int3(struct pt_regs *regs, long error_code); dotraplinkage void do_double_fault(struct pt_regs *regs, long error_code, unsigned long cr2); -dotraplinkage void do_general_protection(struct pt_regs *regs, long error_code); dotraplinkage void do_page_fault(struct pt_regs *regs, unsigned long error_code, unsigned long address); dotraplinkage void do_spurious_interrupt_bug(struct pt_regs *regs, long error_code); dotraplinkage void do_coprocessor_error(struct pt_regs *regs, long error_code); diff --git a/arch/x86/kernel/idt.c b/arch/x86/kernel/idt.c index 8d95cbf56624..6f0af12f08c8 100644 --- a/arch/x86/kernel/idt.c +++ b/arch/x86/kernel/idt.c @@ -82,7 +82,7 @@ static const __initconst struct idt_data def_idts[] = { INTG(X86_TRAP_TS, asm_exc_invalid_tss), INTG(X86_TRAP_NP, asm_exc_segment_not_present), INTG(X86_TRAP_SS, asm_exc_stack_segment), - INTG(X86_TRAP_GP, general_protection), + INTG(X86_TRAP_GP, asm_exc_general_protection), INTG(X86_TRAP_SPURIOUS, spurious_interrupt_bug), INTG(X86_TRAP_MF, coprocessor_error), INTG(X86_TRAP_AC, alignment_check), diff --git a/arch/x86/kernel/traps.c b/arch/x86/kernel/traps.c index 3dfdc4d3de87..e65c7612ecf3 100644 --- a/arch/x86/kernel/traps.c +++ b/arch/x86/kernel/traps.c @@ -145,7 +145,7 @@ do_trap_no_signal(struct task_struct *tsk, int trapnr, const char *str, * process no chance to handle the signal and notice the * kernel fault information, so that won't result in polluting * the information about previously queued, but not yet - * delivered, faults. See also do_general_protection below. + * delivered, faults. See also exc_general_protection below. */ tsk->thread.error_code = error_code; tsk->thread.trap_nr = trapnr; @@ -375,7 +375,7 @@ dotraplinkage void do_double_fault(struct pt_regs *regs, long error_code, unsign * which is what the stub expects, given that the faulting * RIP will be the IRET instruction. */ - regs->ip = (unsigned long)general_protection; + regs->ip = (unsigned long)asm_exc_general_protection; regs->sp = (unsigned long)&gpregs->orig_ax; return; @@ -494,7 +494,7 @@ static enum kernel_gp_hint get_kernel_gp_address(struct pt_regs *regs, #define GPFSTR "general protection fault" -dotraplinkage void do_general_protection(struct pt_regs *regs, long error_code) +DEFINE_IDTENTRY_ERRORCODE(exc_general_protection) { char desc[sizeof(GPFSTR) + 50 + 2*sizeof(unsigned long) + 1] = GPFSTR; enum kernel_gp_hint hint = GP_NO_HINT; @@ -502,7 +502,6 @@ dotraplinkage void do_general_protection(struct pt_regs *regs, long error_code) unsigned long gp_addr; int ret; - RCU_LOCKDEP_WARN(!rcu_is_watching(), "entry code didn't wake RCU"); cond_local_irq_enable(regs); if (static_cpu_has(X86_FEATURE_UMIP)) { @@ -570,7 +569,6 @@ dotraplinkage void do_general_protection(struct pt_regs *regs, long error_code) exit: cond_local_irq_disable(regs); } -NOKPROBE_SYMBOL(do_general_protection); dotraplinkage void notrace do_int3(struct pt_regs *regs, long error_code) { diff --git a/arch/x86/xen/enlighten_pv.c b/arch/x86/xen/enlighten_pv.c index 8290f39d2839..ca441c7c3222 100644 --- a/arch/x86/xen/enlighten_pv.c +++ b/arch/x86/xen/enlighten_pv.c @@ -630,7 +630,7 @@ static struct trap_array_entry trap_array[] = { TRAP_ENTRY(exc_invalid_tss, false ), TRAP_ENTRY(exc_segment_not_present, false ), TRAP_ENTRY(exc_stack_segment, false ), - { general_protection, xen_general_protection, false }, + TRAP_ENTRY(exc_general_protection, false ), { spurious_interrupt_bug, xen_spurious_interrupt_bug, false }, { coprocessor_error, xen_coprocessor_error, false }, { alignment_check, xen_alignment_check, false }, diff --git a/arch/x86/xen/xen-asm_64.S b/arch/x86/xen/xen-asm_64.S index 0ecc0559f657..802ec00b2f9c 100644 --- a/arch/x86/xen/xen-asm_64.S +++ b/arch/x86/xen/xen-asm_64.S @@ -42,7 +42,7 @@ xen_pv_trap asm_exc_coproc_segment_overrun xen_pv_trap asm_exc_invalid_tss xen_pv_trap asm_exc_segment_not_present xen_pv_trap asm_exc_stack_segment -xen_pv_trap general_protection +xen_pv_trap asm_exc_general_protection xen_pv_trap page_fault xen_pv_trap spurious_interrupt_bug xen_pv_trap coprocessor_error -- cgit v1.2.3 From dad7106f8194df1b096666c5499ef732497ddb15 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Tue, 25 Feb 2020 23:16:26 +0100 Subject: x86/entry: Convert Spurious interrupt bug exception to IDTENTRY Convert #SPURIOUS to IDTENTRY_ERRORCODE: - Implement the C entry point with DEFINE_IDTENTRY - Emit the ASM stub with DECLARE_IDTENTRY - Remove the ASM idtentry in 64bit - Remove the open coded ASM entry code in 32bit - Fixup the XEN/PV code - Remove the old prototypes No functional change. Signed-off-by: Thomas Gleixner Reviewed-by: Alexandre Chartre Acked-by: Peter Zijlstra Acked-by: Andy Lutomirski Link: https://lkml.kernel.org/r/20200505134905.728077036@linutronix.de --- arch/x86/entry/entry_32.S | 7 ------- arch/x86/entry/entry_64.S | 1 - arch/x86/include/asm/idtentry.h | 1 + arch/x86/include/asm/traps.h | 3 --- arch/x86/kernel/idt.c | 2 +- arch/x86/kernel/traps.c | 3 +-- arch/x86/xen/enlighten_pv.c | 2 +- arch/x86/xen/xen-asm_64.S | 2 +- 8 files changed, 5 insertions(+), 16 deletions(-) (limited to 'arch') diff --git a/arch/x86/entry/entry_32.S b/arch/x86/entry/entry_32.S index 9d94a036bf35..f7610e1608d3 100644 --- a/arch/x86/entry/entry_32.S +++ b/arch/x86/entry/entry_32.S @@ -1333,13 +1333,6 @@ SYM_CODE_START(machine_check) SYM_CODE_END(machine_check) #endif -SYM_CODE_START(spurious_interrupt_bug) - ASM_CLAC - pushl $0 - pushl $do_spurious_interrupt_bug - jmp common_exception -SYM_CODE_END(spurious_interrupt_bug) - #ifdef CONFIG_XEN_PV SYM_FUNC_START(xen_hypervisor_callback) /* diff --git a/arch/x86/entry/entry_64.S b/arch/x86/entry/entry_64.S index 5cecdd18f548..1a677eec4d6d 100644 --- a/arch/x86/entry/entry_64.S +++ b/arch/x86/entry/entry_64.S @@ -1073,7 +1073,6 @@ apicinterrupt IRQ_WORK_VECTOR irq_work_interrupt smp_irq_work_interrupt */ idtentry X86_TRAP_BP int3 do_int3 has_error_code=0 -idtentry X86_TRAP_SPURIOUS spurious_interrupt_bug do_spurious_interrupt_bug has_error_code=0 idtentry X86_TRAP_MF coprocessor_error do_coprocessor_error has_error_code=0 idtentry X86_TRAP_AC alignment_check do_alignment_check has_error_code=1 idtentry X86_TRAP_XF simd_coprocessor_error do_simd_coprocessor_error has_error_code=0 diff --git a/arch/x86/include/asm/idtentry.h b/arch/x86/include/asm/idtentry.h index 986fc655d2ab..d309b060422b 100644 --- a/arch/x86/include/asm/idtentry.h +++ b/arch/x86/include/asm/idtentry.h @@ -129,6 +129,7 @@ DECLARE_IDTENTRY(X86_TRAP_BR, exc_bounds); DECLARE_IDTENTRY(X86_TRAP_UD, exc_invalid_op); DECLARE_IDTENTRY(X86_TRAP_NM, exc_device_not_available); DECLARE_IDTENTRY(X86_TRAP_OLD_MF, exc_coproc_segment_overrun); +DECLARE_IDTENTRY(X86_TRAP_SPURIOUS, exc_spurious_interrupt_bug); /* Simple exception entries with error code pushed by hardware */ DECLARE_IDTENTRY_ERRORCODE(X86_TRAP_TS, exc_invalid_tss); diff --git a/arch/x86/include/asm/traps.h b/arch/x86/include/asm/traps.h index 3a096a49b343..4450f3b43f97 100644 --- a/arch/x86/include/asm/traps.h +++ b/arch/x86/include/asm/traps.h @@ -19,7 +19,6 @@ asmlinkage void double_fault(void); #endif asmlinkage void page_fault(void); asmlinkage void async_page_fault(void); -asmlinkage void spurious_interrupt_bug(void); asmlinkage void coprocessor_error(void); asmlinkage void alignment_check(void); #ifdef CONFIG_X86_MCE @@ -33,7 +32,6 @@ asmlinkage void xen_xendebug(void); asmlinkage void xen_int3(void); asmlinkage void xen_double_fault(void); asmlinkage void xen_page_fault(void); -asmlinkage void xen_spurious_interrupt_bug(void); asmlinkage void xen_coprocessor_error(void); asmlinkage void xen_alignment_check(void); #ifdef CONFIG_X86_MCE @@ -47,7 +45,6 @@ dotraplinkage void do_nmi(struct pt_regs *regs, long error_code); dotraplinkage void do_int3(struct pt_regs *regs, long error_code); dotraplinkage void do_double_fault(struct pt_regs *regs, long error_code, unsigned long cr2); dotraplinkage void do_page_fault(struct pt_regs *regs, unsigned long error_code, unsigned long address); -dotraplinkage void do_spurious_interrupt_bug(struct pt_regs *regs, long error_code); dotraplinkage void do_coprocessor_error(struct pt_regs *regs, long error_code); dotraplinkage void do_alignment_check(struct pt_regs *regs, long error_code); dotraplinkage void do_simd_coprocessor_error(struct pt_regs *regs, long error_code); diff --git a/arch/x86/kernel/idt.c b/arch/x86/kernel/idt.c index 6f0af12f08c8..8e8936dcf6e4 100644 --- a/arch/x86/kernel/idt.c +++ b/arch/x86/kernel/idt.c @@ -83,7 +83,7 @@ static const __initconst struct idt_data def_idts[] = { INTG(X86_TRAP_NP, asm_exc_segment_not_present), INTG(X86_TRAP_SS, asm_exc_stack_segment), INTG(X86_TRAP_GP, asm_exc_general_protection), - INTG(X86_TRAP_SPURIOUS, spurious_interrupt_bug), + INTG(X86_TRAP_SPURIOUS, asm_exc_spurious_interrupt_bug), INTG(X86_TRAP_MF, coprocessor_error), INTG(X86_TRAP_AC, alignment_check), INTG(X86_TRAP_XF, simd_coprocessor_error), diff --git a/arch/x86/kernel/traps.c b/arch/x86/kernel/traps.c index e65c7612ecf3..2c638b9bc827 100644 --- a/arch/x86/kernel/traps.c +++ b/arch/x86/kernel/traps.c @@ -867,8 +867,7 @@ do_simd_coprocessor_error(struct pt_regs *regs, long error_code) math_error(regs, error_code, X86_TRAP_XF); } -dotraplinkage void -do_spurious_interrupt_bug(struct pt_regs *regs, long error_code) +DEFINE_IDTENTRY(exc_spurious_interrupt_bug) { /* * This addresses a Pentium Pro Erratum: diff --git a/arch/x86/xen/enlighten_pv.c b/arch/x86/xen/enlighten_pv.c index ca441c7c3222..5b2dfb027fe2 100644 --- a/arch/x86/xen/enlighten_pv.c +++ b/arch/x86/xen/enlighten_pv.c @@ -631,7 +631,7 @@ static struct trap_array_entry trap_array[] = { TRAP_ENTRY(exc_segment_not_present, false ), TRAP_ENTRY(exc_stack_segment, false ), TRAP_ENTRY(exc_general_protection, false ), - { spurious_interrupt_bug, xen_spurious_interrupt_bug, false }, + TRAP_ENTRY(exc_spurious_interrupt_bug, false ), { coprocessor_error, xen_coprocessor_error, false }, { alignment_check, xen_alignment_check, false }, { simd_coprocessor_error, xen_simd_coprocessor_error, false }, diff --git a/arch/x86/xen/xen-asm_64.S b/arch/x86/xen/xen-asm_64.S index 802ec00b2f9c..698a9c50d877 100644 --- a/arch/x86/xen/xen-asm_64.S +++ b/arch/x86/xen/xen-asm_64.S @@ -44,7 +44,7 @@ xen_pv_trap asm_exc_segment_not_present xen_pv_trap asm_exc_stack_segment xen_pv_trap asm_exc_general_protection xen_pv_trap page_fault -xen_pv_trap spurious_interrupt_bug +xen_pv_trap asm_exc_spurious_interrupt_bug xen_pv_trap coprocessor_error xen_pv_trap alignment_check #ifdef CONFIG_X86_MCE -- cgit v1.2.3 From 14a8bd2aa7c355b3a8879618a4f70f9c2b0004f7 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Tue, 25 Feb 2020 23:16:27 +0100 Subject: x86/entry: Convert Coprocessor error exception to IDTENTRY Convert #MF to IDTENTRY_ERRORCODE: - Implement the C entry point with DEFINE_IDTENTRY - Emit the ASM stub with DECLARE_IDTENTRY - Remove the ASM idtentry in 64bit - Remove the open coded ASM entry code in 32bit - Fixup the XEN/PV code - Remove the old prototypes - Remove the RCU warning as the new entry macro ensures correctness No functional change. Signed-off-by: Thomas Gleixner Reviewed-by: Alexandre Chartre Acked-by: Peter Zijlstra Acked-by: Andy Lutomirski Link: https://lkml.kernel.org/r/20200505134905.838823510@linutronix.de --- arch/x86/entry/entry_32.S | 7 ------- arch/x86/entry/entry_64.S | 1 - arch/x86/include/asm/idtentry.h | 1 + arch/x86/include/asm/traps.h | 3 --- arch/x86/kernel/idt.c | 2 +- arch/x86/kernel/traps.c | 5 ++--- arch/x86/xen/enlighten_pv.c | 2 +- arch/x86/xen/xen-asm_64.S | 2 +- 8 files changed, 6 insertions(+), 17 deletions(-) (limited to 'arch') diff --git a/arch/x86/entry/entry_32.S b/arch/x86/entry/entry_32.S index f7610e1608d3..66d4683026f0 100644 --- a/arch/x86/entry/entry_32.S +++ b/arch/x86/entry/entry_32.S @@ -1290,13 +1290,6 @@ SYM_FUNC_END(name) /* The include is where all of the SMP etc. interrupts come from */ #include -SYM_CODE_START(coprocessor_error) - ASM_CLAC - pushl $0 - pushl $do_coprocessor_error - jmp common_exception -SYM_CODE_END(coprocessor_error) - SYM_CODE_START(simd_coprocessor_error) ASM_CLAC pushl $0 diff --git a/arch/x86/entry/entry_64.S b/arch/x86/entry/entry_64.S index 1a677eec4d6d..f1f126bb4945 100644 --- a/arch/x86/entry/entry_64.S +++ b/arch/x86/entry/entry_64.S @@ -1073,7 +1073,6 @@ apicinterrupt IRQ_WORK_VECTOR irq_work_interrupt smp_irq_work_interrupt */ idtentry X86_TRAP_BP int3 do_int3 has_error_code=0 -idtentry X86_TRAP_MF coprocessor_error do_coprocessor_error has_error_code=0 idtentry X86_TRAP_AC alignment_check do_alignment_check has_error_code=1 idtentry X86_TRAP_XF simd_coprocessor_error do_simd_coprocessor_error has_error_code=0 diff --git a/arch/x86/include/asm/idtentry.h b/arch/x86/include/asm/idtentry.h index d309b060422b..ed44ba6210f2 100644 --- a/arch/x86/include/asm/idtentry.h +++ b/arch/x86/include/asm/idtentry.h @@ -130,6 +130,7 @@ DECLARE_IDTENTRY(X86_TRAP_UD, exc_invalid_op); DECLARE_IDTENTRY(X86_TRAP_NM, exc_device_not_available); DECLARE_IDTENTRY(X86_TRAP_OLD_MF, exc_coproc_segment_overrun); DECLARE_IDTENTRY(X86_TRAP_SPURIOUS, exc_spurious_interrupt_bug); +DECLARE_IDTENTRY(X86_TRAP_MF, exc_coprocessor_error); /* Simple exception entries with error code pushed by hardware */ DECLARE_IDTENTRY_ERRORCODE(X86_TRAP_TS, exc_invalid_tss); diff --git a/arch/x86/include/asm/traps.h b/arch/x86/include/asm/traps.h index 4450f3b43f97..e84677b685c5 100644 --- a/arch/x86/include/asm/traps.h +++ b/arch/x86/include/asm/traps.h @@ -19,7 +19,6 @@ asmlinkage void double_fault(void); #endif asmlinkage void page_fault(void); asmlinkage void async_page_fault(void); -asmlinkage void coprocessor_error(void); asmlinkage void alignment_check(void); #ifdef CONFIG_X86_MCE asmlinkage void machine_check(void); @@ -32,7 +31,6 @@ asmlinkage void xen_xendebug(void); asmlinkage void xen_int3(void); asmlinkage void xen_double_fault(void); asmlinkage void xen_page_fault(void); -asmlinkage void xen_coprocessor_error(void); asmlinkage void xen_alignment_check(void); #ifdef CONFIG_X86_MCE asmlinkage void xen_machine_check(void); @@ -45,7 +43,6 @@ dotraplinkage void do_nmi(struct pt_regs *regs, long error_code); dotraplinkage void do_int3(struct pt_regs *regs, long error_code); dotraplinkage void do_double_fault(struct pt_regs *regs, long error_code, unsigned long cr2); dotraplinkage void do_page_fault(struct pt_regs *regs, unsigned long error_code, unsigned long address); -dotraplinkage void do_coprocessor_error(struct pt_regs *regs, long error_code); dotraplinkage void do_alignment_check(struct pt_regs *regs, long error_code); dotraplinkage void do_simd_coprocessor_error(struct pt_regs *regs, long error_code); #ifdef CONFIG_X86_32 diff --git a/arch/x86/kernel/idt.c b/arch/x86/kernel/idt.c index 8e8936dcf6e4..2bde50d4cfa1 100644 --- a/arch/x86/kernel/idt.c +++ b/arch/x86/kernel/idt.c @@ -84,7 +84,7 @@ static const __initconst struct idt_data def_idts[] = { INTG(X86_TRAP_SS, asm_exc_stack_segment), INTG(X86_TRAP_GP, asm_exc_general_protection), INTG(X86_TRAP_SPURIOUS, asm_exc_spurious_interrupt_bug), - INTG(X86_TRAP_MF, coprocessor_error), + INTG(X86_TRAP_MF, asm_exc_coprocessor_error), INTG(X86_TRAP_AC, alignment_check), INTG(X86_TRAP_XF, simd_coprocessor_error), diff --git a/arch/x86/kernel/traps.c b/arch/x86/kernel/traps.c index 2c638b9bc827..ba26bebfed72 100644 --- a/arch/x86/kernel/traps.c +++ b/arch/x86/kernel/traps.c @@ -854,10 +854,9 @@ exit: cond_local_irq_disable(regs); } -dotraplinkage void do_coprocessor_error(struct pt_regs *regs, long error_code) +DEFINE_IDTENTRY(exc_coprocessor_error) { - RCU_LOCKDEP_WARN(!rcu_is_watching(), "entry code didn't wake RCU"); - math_error(regs, error_code, X86_TRAP_MF); + math_error(regs, 0, X86_TRAP_MF); } dotraplinkage void diff --git a/arch/x86/xen/enlighten_pv.c b/arch/x86/xen/enlighten_pv.c index 5b2dfb027fe2..678c50888db2 100644 --- a/arch/x86/xen/enlighten_pv.c +++ b/arch/x86/xen/enlighten_pv.c @@ -632,7 +632,7 @@ static struct trap_array_entry trap_array[] = { TRAP_ENTRY(exc_stack_segment, false ), TRAP_ENTRY(exc_general_protection, false ), TRAP_ENTRY(exc_spurious_interrupt_bug, false ), - { coprocessor_error, xen_coprocessor_error, false }, + TRAP_ENTRY(exc_coprocessor_error, false ), { alignment_check, xen_alignment_check, false }, { simd_coprocessor_error, xen_simd_coprocessor_error, false }, }; diff --git a/arch/x86/xen/xen-asm_64.S b/arch/x86/xen/xen-asm_64.S index 698a9c50d877..589de186d8cd 100644 --- a/arch/x86/xen/xen-asm_64.S +++ b/arch/x86/xen/xen-asm_64.S @@ -45,7 +45,7 @@ xen_pv_trap asm_exc_stack_segment xen_pv_trap asm_exc_general_protection xen_pv_trap page_fault xen_pv_trap asm_exc_spurious_interrupt_bug -xen_pv_trap coprocessor_error +xen_pv_trap asm_exc_coprocessor_error xen_pv_trap alignment_check #ifdef CONFIG_X86_MCE xen_pv_trap machine_check -- cgit v1.2.3 From 436608bb00a59f5457cee26f416067860ca88d9d Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Tue, 25 Feb 2020 23:16:28 +0100 Subject: x86/entry: Convert Alignment check exception to IDTENTRY Convert #AC to IDTENTRY_ERRORCODE: - Implement the C entry point with DEFINE_IDTENTRY - Emit the ASM stub with DECLARE_IDTENTRY - Remove the ASM idtentry in 64bit - Remove the open coded ASM entry code in 32bit - Fixup the XEN/PV code - Remove the old prototypes - Remove the RCU warning as the new entry macro ensures correctness No functional change. Signed-off-by: Thomas Gleixner Reviewed-by: Alexandre Chartre Acked-by: Peter Zijlstra Acked-by: Andy Lutomirski Link: https://lkml.kernel.org/r/20200505134905.928967113@linutronix.de --- arch/x86/entry/entry_32.S | 6 ------ arch/x86/entry/entry_64.S | 1 - arch/x86/include/asm/idtentry.h | 1 + arch/x86/include/asm/traps.h | 3 --- arch/x86/kernel/idt.c | 2 +- arch/x86/kernel/traps.c | 4 +--- arch/x86/xen/enlighten_pv.c | 2 +- arch/x86/xen/xen-asm_64.S | 2 +- 8 files changed, 5 insertions(+), 16 deletions(-) (limited to 'arch') diff --git a/arch/x86/entry/entry_32.S b/arch/x86/entry/entry_32.S index 66d4683026f0..740289017179 100644 --- a/arch/x86/entry/entry_32.S +++ b/arch/x86/entry/entry_32.S @@ -1311,12 +1311,6 @@ SYM_CODE_START(native_iret) SYM_CODE_END(native_iret) #endif -SYM_CODE_START(alignment_check) - ASM_CLAC - pushl $do_alignment_check - jmp common_exception -SYM_CODE_END(alignment_check) - #ifdef CONFIG_X86_MCE SYM_CODE_START(machine_check) ASM_CLAC diff --git a/arch/x86/entry/entry_64.S b/arch/x86/entry/entry_64.S index f1f126bb4945..3c95a6307d72 100644 --- a/arch/x86/entry/entry_64.S +++ b/arch/x86/entry/entry_64.S @@ -1073,7 +1073,6 @@ apicinterrupt IRQ_WORK_VECTOR irq_work_interrupt smp_irq_work_interrupt */ idtentry X86_TRAP_BP int3 do_int3 has_error_code=0 -idtentry X86_TRAP_AC alignment_check do_alignment_check has_error_code=1 idtentry X86_TRAP_XF simd_coprocessor_error do_simd_coprocessor_error has_error_code=0 idtentry X86_TRAP_PF page_fault do_page_fault has_error_code=1 diff --git a/arch/x86/include/asm/idtentry.h b/arch/x86/include/asm/idtentry.h index ed44ba6210f2..531dbc0ee365 100644 --- a/arch/x86/include/asm/idtentry.h +++ b/arch/x86/include/asm/idtentry.h @@ -137,5 +137,6 @@ DECLARE_IDTENTRY_ERRORCODE(X86_TRAP_TS, exc_invalid_tss); DECLARE_IDTENTRY_ERRORCODE(X86_TRAP_NP, exc_segment_not_present); DECLARE_IDTENTRY_ERRORCODE(X86_TRAP_SS, exc_stack_segment); DECLARE_IDTENTRY_ERRORCODE(X86_TRAP_GP, exc_general_protection); +DECLARE_IDTENTRY_ERRORCODE(X86_TRAP_AC, exc_alignment_check); #endif diff --git a/arch/x86/include/asm/traps.h b/arch/x86/include/asm/traps.h index e84677b685c5..0f755e156f05 100644 --- a/arch/x86/include/asm/traps.h +++ b/arch/x86/include/asm/traps.h @@ -19,7 +19,6 @@ asmlinkage void double_fault(void); #endif asmlinkage void page_fault(void); asmlinkage void async_page_fault(void); -asmlinkage void alignment_check(void); #ifdef CONFIG_X86_MCE asmlinkage void machine_check(void); #endif /* CONFIG_X86_MCE */ @@ -31,7 +30,6 @@ asmlinkage void xen_xendebug(void); asmlinkage void xen_int3(void); asmlinkage void xen_double_fault(void); asmlinkage void xen_page_fault(void); -asmlinkage void xen_alignment_check(void); #ifdef CONFIG_X86_MCE asmlinkage void xen_machine_check(void); #endif /* CONFIG_X86_MCE */ @@ -43,7 +41,6 @@ dotraplinkage void do_nmi(struct pt_regs *regs, long error_code); dotraplinkage void do_int3(struct pt_regs *regs, long error_code); dotraplinkage void do_double_fault(struct pt_regs *regs, long error_code, unsigned long cr2); dotraplinkage void do_page_fault(struct pt_regs *regs, unsigned long error_code, unsigned long address); -dotraplinkage void do_alignment_check(struct pt_regs *regs, long error_code); dotraplinkage void do_simd_coprocessor_error(struct pt_regs *regs, long error_code); #ifdef CONFIG_X86_32 dotraplinkage void do_iret_error(struct pt_regs *regs, long error_code); diff --git a/arch/x86/kernel/idt.c b/arch/x86/kernel/idt.c index 2bde50d4cfa1..af4819610783 100644 --- a/arch/x86/kernel/idt.c +++ b/arch/x86/kernel/idt.c @@ -85,7 +85,7 @@ static const __initconst struct idt_data def_idts[] = { INTG(X86_TRAP_GP, asm_exc_general_protection), INTG(X86_TRAP_SPURIOUS, asm_exc_spurious_interrupt_bug), INTG(X86_TRAP_MF, asm_exc_coprocessor_error), - INTG(X86_TRAP_AC, alignment_check), + INTG(X86_TRAP_AC, asm_exc_alignment_check), INTG(X86_TRAP_XF, simd_coprocessor_error), #ifdef CONFIG_X86_32 diff --git a/arch/x86/kernel/traps.c b/arch/x86/kernel/traps.c index ba26bebfed72..9f156c84195d 100644 --- a/arch/x86/kernel/traps.c +++ b/arch/x86/kernel/traps.c @@ -270,12 +270,10 @@ DEFINE_IDTENTRY_ERRORCODE(exc_stack_segment) 0, NULL); } -dotraplinkage void do_alignment_check(struct pt_regs *regs, long error_code) +DEFINE_IDTENTRY_ERRORCODE(exc_alignment_check) { char *str = "alignment check"; - RCU_LOCKDEP_WARN(!rcu_is_watching(), "entry code didn't wake RCU"); - if (notify_die(DIE_TRAP, str, regs, error_code, X86_TRAP_AC, SIGBUS) == NOTIFY_STOP) return; diff --git a/arch/x86/xen/enlighten_pv.c b/arch/x86/xen/enlighten_pv.c index 678c50888db2..1097e35d6afb 100644 --- a/arch/x86/xen/enlighten_pv.c +++ b/arch/x86/xen/enlighten_pv.c @@ -633,7 +633,7 @@ static struct trap_array_entry trap_array[] = { TRAP_ENTRY(exc_general_protection, false ), TRAP_ENTRY(exc_spurious_interrupt_bug, false ), TRAP_ENTRY(exc_coprocessor_error, false ), - { alignment_check, xen_alignment_check, false }, + TRAP_ENTRY(exc_alignment_check, false ), { simd_coprocessor_error, xen_simd_coprocessor_error, false }, }; diff --git a/arch/x86/xen/xen-asm_64.S b/arch/x86/xen/xen-asm_64.S index 589de186d8cd..a591becaee2b 100644 --- a/arch/x86/xen/xen-asm_64.S +++ b/arch/x86/xen/xen-asm_64.S @@ -46,7 +46,7 @@ xen_pv_trap asm_exc_general_protection xen_pv_trap page_fault xen_pv_trap asm_exc_spurious_interrupt_bug xen_pv_trap asm_exc_coprocessor_error -xen_pv_trap alignment_check +xen_pv_trap asm_exc_alignment_check #ifdef CONFIG_X86_MCE xen_pv_trap machine_check #endif /* CONFIG_X86_MCE */ -- cgit v1.2.3 From 48227e21f7430e31042f63e078a45cd230e9fdfc Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Tue, 25 Feb 2020 23:16:29 +0100 Subject: x86/entry: Convert SIMD coprocessor error exception to IDTENTRY Convert #XF to IDTENTRY_ERRORCODE: - Implement the C entry point with DEFINE_IDTENTRY - Emit the ASM stub with DECLARE_IDTENTRY - Handle INVD_BUG in C - Remove the ASM idtentry in 64bit - Remove the open coded ASM entry code in 32bit - Fixup the XEN/PV code - Remove the old prototypes - Remove the RCU warning as the new entry macro ensures correctness No functional change. Signed-off-by: Thomas Gleixner Reviewed-by: Alexandre Chartre Acked-by: Peter Zijlstra Acked-by: Andy Lutomirski Link: https://lkml.kernel.org/r/20200505134906.021552202@linutronix.de --- arch/x86/entry/entry_32.S | 14 -------------- arch/x86/entry/entry_64.S | 1 - arch/x86/include/asm/idtentry.h | 1 + arch/x86/include/asm/traps.h | 3 --- arch/x86/kernel/idt.c | 2 +- arch/x86/kernel/traps.c | 29 +++++++++++++++++------------ arch/x86/xen/enlighten_pv.c | 2 +- arch/x86/xen/xen-asm_64.S | 2 +- 8 files changed, 21 insertions(+), 33 deletions(-) (limited to 'arch') diff --git a/arch/x86/entry/entry_32.S b/arch/x86/entry/entry_32.S index 740289017179..c93fb73af039 100644 --- a/arch/x86/entry/entry_32.S +++ b/arch/x86/entry/entry_32.S @@ -1290,20 +1290,6 @@ SYM_FUNC_END(name) /* The include is where all of the SMP etc. interrupts come from */ #include -SYM_CODE_START(simd_coprocessor_error) - ASM_CLAC - pushl $0 -#ifdef CONFIG_X86_INVD_BUG - /* AMD 486 bug: invd from userspace calls exception 19 instead of #GP */ - ALTERNATIVE "pushl $exc_general_protection", \ - "pushl $do_simd_coprocessor_error", \ - X86_FEATURE_XMM -#else - pushl $do_simd_coprocessor_error -#endif - jmp common_exception -SYM_CODE_END(simd_coprocessor_error) - #ifdef CONFIG_PARAVIRT SYM_CODE_START(native_iret) iret diff --git a/arch/x86/entry/entry_64.S b/arch/x86/entry/entry_64.S index 3c95a6307d72..1bada7b26210 100644 --- a/arch/x86/entry/entry_64.S +++ b/arch/x86/entry/entry_64.S @@ -1073,7 +1073,6 @@ apicinterrupt IRQ_WORK_VECTOR irq_work_interrupt smp_irq_work_interrupt */ idtentry X86_TRAP_BP int3 do_int3 has_error_code=0 -idtentry X86_TRAP_XF simd_coprocessor_error do_simd_coprocessor_error has_error_code=0 idtentry X86_TRAP_PF page_fault do_page_fault has_error_code=1 diff --git a/arch/x86/include/asm/idtentry.h b/arch/x86/include/asm/idtentry.h index 531dbc0ee365..99d4759bd914 100644 --- a/arch/x86/include/asm/idtentry.h +++ b/arch/x86/include/asm/idtentry.h @@ -131,6 +131,7 @@ DECLARE_IDTENTRY(X86_TRAP_NM, exc_device_not_available); DECLARE_IDTENTRY(X86_TRAP_OLD_MF, exc_coproc_segment_overrun); DECLARE_IDTENTRY(X86_TRAP_SPURIOUS, exc_spurious_interrupt_bug); DECLARE_IDTENTRY(X86_TRAP_MF, exc_coprocessor_error); +DECLARE_IDTENTRY(X86_TRAP_XF, exc_simd_coprocessor_error); /* Simple exception entries with error code pushed by hardware */ DECLARE_IDTENTRY_ERRORCODE(X86_TRAP_TS, exc_invalid_tss); diff --git a/arch/x86/include/asm/traps.h b/arch/x86/include/asm/traps.h index 0f755e156f05..e7eb7532233d 100644 --- a/arch/x86/include/asm/traps.h +++ b/arch/x86/include/asm/traps.h @@ -22,7 +22,6 @@ asmlinkage void async_page_fault(void); #ifdef CONFIG_X86_MCE asmlinkage void machine_check(void); #endif /* CONFIG_X86_MCE */ -asmlinkage void simd_coprocessor_error(void); #if defined(CONFIG_X86_64) && defined(CONFIG_XEN_PV) asmlinkage void xen_xennmi(void); @@ -33,7 +32,6 @@ asmlinkage void xen_page_fault(void); #ifdef CONFIG_X86_MCE asmlinkage void xen_machine_check(void); #endif /* CONFIG_X86_MCE */ -asmlinkage void xen_simd_coprocessor_error(void); #endif dotraplinkage void do_debug(struct pt_regs *regs, long error_code); @@ -41,7 +39,6 @@ dotraplinkage void do_nmi(struct pt_regs *regs, long error_code); dotraplinkage void do_int3(struct pt_regs *regs, long error_code); dotraplinkage void do_double_fault(struct pt_regs *regs, long error_code, unsigned long cr2); dotraplinkage void do_page_fault(struct pt_regs *regs, unsigned long error_code, unsigned long address); -dotraplinkage void do_simd_coprocessor_error(struct pt_regs *regs, long error_code); #ifdef CONFIG_X86_32 dotraplinkage void do_iret_error(struct pt_regs *regs, long error_code); #endif diff --git a/arch/x86/kernel/idt.c b/arch/x86/kernel/idt.c index af4819610783..38b565b7e5b8 100644 --- a/arch/x86/kernel/idt.c +++ b/arch/x86/kernel/idt.c @@ -86,7 +86,7 @@ static const __initconst struct idt_data def_idts[] = { INTG(X86_TRAP_SPURIOUS, asm_exc_spurious_interrupt_bug), INTG(X86_TRAP_MF, asm_exc_coprocessor_error), INTG(X86_TRAP_AC, asm_exc_alignment_check), - INTG(X86_TRAP_XF, simd_coprocessor_error), + INTG(X86_TRAP_XF, asm_exc_simd_coprocessor_error), #ifdef CONFIG_X86_32 TSKG(X86_TRAP_DF, GDT_ENTRY_DOUBLEFAULT_TSS), diff --git a/arch/x86/kernel/traps.c b/arch/x86/kernel/traps.c index 9f156c84195d..1702922ebd9c 100644 --- a/arch/x86/kernel/traps.c +++ b/arch/x86/kernel/traps.c @@ -810,7 +810,7 @@ NOKPROBE_SYMBOL(do_debug); * the correct behaviour even in the presence of the asynchronous * IRQ13 behaviour */ -static void math_error(struct pt_regs *regs, int error_code, int trapnr) +static void math_error(struct pt_regs *regs, int trapnr) { struct task_struct *task = current; struct fpu *fpu = &task->thread.fpu; @@ -821,15 +821,15 @@ static void math_error(struct pt_regs *regs, int error_code, int trapnr) cond_local_irq_enable(regs); if (!user_mode(regs)) { - if (fixup_exception(regs, trapnr, error_code, 0)) + if (fixup_exception(regs, trapnr, 0, 0)) goto exit; - task->thread.error_code = error_code; + task->thread.error_code = 0; task->thread.trap_nr = trapnr; - if (notify_die(DIE_TRAP, str, regs, error_code, - trapnr, SIGFPE) != NOTIFY_STOP) - die(str, regs, error_code); + if (notify_die(DIE_TRAP, str, regs, 0, trapnr, + SIGFPE) != NOTIFY_STOP) + die(str, regs, 0); goto exit; } @@ -839,7 +839,7 @@ static void math_error(struct pt_regs *regs, int error_code, int trapnr) fpu__save(fpu); task->thread.trap_nr = trapnr; - task->thread.error_code = error_code; + task->thread.error_code = 0; si_code = fpu__exception_code(fpu, trapnr); /* Retry when we get spurious exceptions: */ @@ -854,14 +854,19 @@ exit: DEFINE_IDTENTRY(exc_coprocessor_error) { - math_error(regs, 0, X86_TRAP_MF); + math_error(regs, X86_TRAP_MF); } -dotraplinkage void -do_simd_coprocessor_error(struct pt_regs *regs, long error_code) +DEFINE_IDTENTRY(exc_simd_coprocessor_error) { - RCU_LOCKDEP_WARN(!rcu_is_watching(), "entry code didn't wake RCU"); - math_error(regs, error_code, X86_TRAP_XF); + if (IS_ENABLED(CONFIG_X86_INVD_BUG)) { + /* AMD 486 bug: INVD in CPL 0 raises #XF instead of #GP */ + if (!static_cpu_has(X86_FEATURE_XMM)) { + __exc_general_protection(regs, 0); + return; + } + } + math_error(regs, X86_TRAP_XF); } DEFINE_IDTENTRY(exc_spurious_interrupt_bug) diff --git a/arch/x86/xen/enlighten_pv.c b/arch/x86/xen/enlighten_pv.c index 1097e35d6afb..0a30fc0fe0fa 100644 --- a/arch/x86/xen/enlighten_pv.c +++ b/arch/x86/xen/enlighten_pv.c @@ -634,7 +634,7 @@ static struct trap_array_entry trap_array[] = { TRAP_ENTRY(exc_spurious_interrupt_bug, false ), TRAP_ENTRY(exc_coprocessor_error, false ), TRAP_ENTRY(exc_alignment_check, false ), - { simd_coprocessor_error, xen_simd_coprocessor_error, false }, + TRAP_ENTRY(exc_simd_coprocessor_error, false ), }; static bool __ref get_trap_addr(void **addr, unsigned int ist) diff --git a/arch/x86/xen/xen-asm_64.S b/arch/x86/xen/xen-asm_64.S index a591becaee2b..6a91157d5b5c 100644 --- a/arch/x86/xen/xen-asm_64.S +++ b/arch/x86/xen/xen-asm_64.S @@ -50,7 +50,7 @@ xen_pv_trap asm_exc_alignment_check #ifdef CONFIG_X86_MCE xen_pv_trap machine_check #endif /* CONFIG_X86_MCE */ -xen_pv_trap simd_coprocessor_error +xen_pv_trap asm_exc_simd_coprocessor_error #ifdef CONFIG_IA32_EMULATION xen_pv_trap entry_INT80_compat #endif -- cgit v1.2.3 From d77290507ab2ac691d50389e255ebd11a6cbc35a Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Tue, 25 Feb 2020 23:16:30 +0100 Subject: x86/entry/32: Convert IRET exception to IDTENTRY_SW Convert the IRET exception handler to IDTENTRY_SW. This is slightly different than the conversions of hardware exceptions as the IRET exception is invoked via an exception table when IRET faults. So it just uses the IDTENTRY_SW mechanism for consistency. It does not emit ASM code as it does not fit the other idtentry exceptions. - Implement the C entry point with DEFINE_IDTENTRY_SW() which maps to DEFINE_IDTENTRY() - Fixup the XEN/PV code - Remove the old prototypes - Remove the RCU warning as the new entry macro ensures correctness No functional change. Signed-off-by: Thomas Gleixner Reviewed-by: Alexandre Chartre Acked-by: Peter Zijlstra Acked-by: Andy Lutomirski Link: https://lkml.kernel.org/r/20200505134906.128769226@linutronix.de --- arch/x86/entry/entry_32.S | 14 +++++++------- arch/x86/include/asm/idtentry.h | 10 ++++++++++ arch/x86/include/asm/traps.h | 3 --- arch/x86/kernel/traps.c | 8 +++----- arch/x86/xen/xen-asm_32.S | 2 +- 5 files changed, 21 insertions(+), 16 deletions(-) (limited to 'arch') diff --git a/arch/x86/entry/entry_32.S b/arch/x86/entry/entry_32.S index c93fb73af039..f7a5f1cda058 100644 --- a/arch/x86/entry/entry_32.S +++ b/arch/x86/entry/entry_32.S @@ -1147,9 +1147,9 @@ restore_all_kernel: jmp .Lirq_return .section .fixup, "ax" -SYM_CODE_START(iret_exc) +SYM_CODE_START(asm_iret_error) pushl $0 # no error code - pushl $do_iret_error + pushl $iret_error #ifdef CONFIG_DEBUG_ENTRY /* @@ -1163,10 +1163,10 @@ SYM_CODE_START(iret_exc) popl %eax #endif - jmp common_exception -SYM_CODE_END(iret_exc) + jmp handle_exception +SYM_CODE_END(asm_iret_error) .previous - _ASM_EXTABLE(.Lirq_return, iret_exc) + _ASM_EXTABLE(.Lirq_return, asm_iret_error) SYM_FUNC_END(entry_INT80_32) .macro FIXUP_ESPFIX_STACK @@ -1293,7 +1293,7 @@ SYM_FUNC_END(name) #ifdef CONFIG_PARAVIRT SYM_CODE_START(native_iret) iret - _ASM_EXTABLE(native_iret, iret_exc) + _ASM_EXTABLE(native_iret, asm_iret_error) SYM_CODE_END(native_iret) #endif @@ -1358,7 +1358,7 @@ SYM_FUNC_START(xen_failsafe_callback) popl %eax lea 16(%esp), %esp jz 5f - jmp iret_exc + jmp asm_iret_error 5: pushl $-1 /* orig_ax = -1 => not a system call */ SAVE_ALL ENCODE_FRAME_POINTER diff --git a/arch/x86/include/asm/idtentry.h b/arch/x86/include/asm/idtentry.h index 99d4759bd914..ee6ebfef7e57 100644 --- a/arch/x86/include/asm/idtentry.h +++ b/arch/x86/include/asm/idtentry.h @@ -57,6 +57,10 @@ __visible noinstr void func(struct pt_regs *regs) \ \ static __always_inline void __##func(struct pt_regs *regs) +/* Special case for 32bit IRET 'trap' */ +#define DECLARE_IDTENTRY_SW DECLARE_IDTENTRY +#define DEFINE_IDTENTRY_SW DEFINE_IDTENTRY + /** * DECLARE_IDTENTRY_ERRORCODE - Declare functions for simple IDT entry points * Error code pushed by hardware @@ -111,6 +115,9 @@ static __always_inline void __##func(struct pt_regs *regs, \ #define DECLARE_IDTENTRY_ERRORCODE(vector, func) \ idtentry vector asm_##func func has_error_code=1 sane=1 +/* Special case for 32bit IRET 'trap'. Do not emit ASM code */ +#define DECLARE_IDTENTRY_SW(vector, func) + #endif /* __ASSEMBLY__ */ /* @@ -133,6 +140,9 @@ DECLARE_IDTENTRY(X86_TRAP_SPURIOUS, exc_spurious_interrupt_bug); DECLARE_IDTENTRY(X86_TRAP_MF, exc_coprocessor_error); DECLARE_IDTENTRY(X86_TRAP_XF, exc_simd_coprocessor_error); +/* 32bit software IRET trap. Do not emit ASM code */ +DECLARE_IDTENTRY_SW(X86_TRAP_IRET, iret_error); + /* Simple exception entries with error code pushed by hardware */ DECLARE_IDTENTRY_ERRORCODE(X86_TRAP_TS, exc_invalid_tss); DECLARE_IDTENTRY_ERRORCODE(X86_TRAP_NP, exc_segment_not_present); diff --git a/arch/x86/include/asm/traps.h b/arch/x86/include/asm/traps.h index e7eb7532233d..5774d0b6cf77 100644 --- a/arch/x86/include/asm/traps.h +++ b/arch/x86/include/asm/traps.h @@ -39,9 +39,6 @@ dotraplinkage void do_nmi(struct pt_regs *regs, long error_code); dotraplinkage void do_int3(struct pt_regs *regs, long error_code); dotraplinkage void do_double_fault(struct pt_regs *regs, long error_code, unsigned long cr2); dotraplinkage void do_page_fault(struct pt_regs *regs, unsigned long error_code, unsigned long address); -#ifdef CONFIG_X86_32 -dotraplinkage void do_iret_error(struct pt_regs *regs, long error_code); -#endif dotraplinkage void do_mce(struct pt_regs *regs, long error_code); #ifdef CONFIG_X86_64 diff --git a/arch/x86/kernel/traps.c b/arch/x86/kernel/traps.c index 1702922ebd9c..b28a64d7691f 100644 --- a/arch/x86/kernel/traps.c +++ b/arch/x86/kernel/traps.c @@ -925,14 +925,12 @@ DEFINE_IDTENTRY(exc_device_not_available) } #ifdef CONFIG_X86_32 -dotraplinkage void do_iret_error(struct pt_regs *regs, long error_code) +DEFINE_IDTENTRY_SW(iret_error) { - RCU_LOCKDEP_WARN(!rcu_is_watching(), "entry code didn't wake RCU"); local_irq_enable(); - - if (notify_die(DIE_TRAP, "iret exception", regs, error_code, + if (notify_die(DIE_TRAP, "iret exception", regs, 0, X86_TRAP_IRET, SIGILL) != NOTIFY_STOP) { - do_trap(X86_TRAP_IRET, SIGILL, "iret exception", regs, error_code, + do_trap(X86_TRAP_IRET, SIGILL, "iret exception", regs, 0, ILL_BADSTK, (void __user *)NULL); } local_irq_disable(); diff --git a/arch/x86/xen/xen-asm_32.S b/arch/x86/xen/xen-asm_32.S index 2712e9155306..812ff01e4e34 100644 --- a/arch/x86/xen/xen-asm_32.S +++ b/arch/x86/xen/xen-asm_32.S @@ -117,7 +117,7 @@ iret_restore_end: 1: iret xen_iret_end_crit: - _ASM_EXTABLE(1b, iret_exc) + _ASM_EXTABLE(1b, asm_iret_error) hyper_iret: /* put this out of line since its very rarely used */ -- cgit v1.2.3 From 4979fb53ab0ed35eddd20a73c25a5597bc22a57f Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Tue, 21 Jan 2020 15:53:09 +0100 Subject: x86/int3: Ensure that poke_int3_handler() is not traced In order to ensure poke_int3_handler() is completely self contained -- this is called while modifying other text, imagine the fun of hitting another INT3 -- ensure that everything it uses is not traced. The primary means here is to force inlining; bsearch() is notrace because all of lib/ is. Signed-off-by: Thomas Gleixner Signed-off-by: Peter Zijlstra (Intel) Signed-off-by: Thomas Gleixner Reviewed-by: Alexandre Chartre Acked-by: Andy Lutomirski Link: https://lkml.kernel.org/r/20200505135313.410702173@linutronix.de --- arch/x86/include/asm/ptrace.h | 2 +- arch/x86/include/asm/text-patching.h | 11 +++++++---- arch/x86/kernel/alternative.c | 13 ++++++------- 3 files changed, 14 insertions(+), 12 deletions(-) (limited to 'arch') diff --git a/arch/x86/include/asm/ptrace.h b/arch/x86/include/asm/ptrace.h index 6d6475fdd327..ebedeab48704 100644 --- a/arch/x86/include/asm/ptrace.h +++ b/arch/x86/include/asm/ptrace.h @@ -123,7 +123,7 @@ static inline void regs_set_return_value(struct pt_regs *regs, unsigned long rc) * On x86_64, vm86 mode is mercifully nonexistent, and we don't need * the extra check. */ -static inline int user_mode(struct pt_regs *regs) +static __always_inline int user_mode(struct pt_regs *regs) { #ifdef CONFIG_X86_32 return ((regs->cs & SEGMENT_RPL_MASK) | (regs->flags & X86_VM_MASK)) >= USER_RPL; diff --git a/arch/x86/include/asm/text-patching.h b/arch/x86/include/asm/text-patching.h index 67315fa3956a..6593b42cb379 100644 --- a/arch/x86/include/asm/text-patching.h +++ b/arch/x86/include/asm/text-patching.h @@ -64,7 +64,7 @@ extern void text_poke_finish(void); #define DISP32_SIZE 4 -static inline int text_opcode_size(u8 opcode) +static __always_inline int text_opcode_size(u8 opcode) { int size = 0; @@ -118,12 +118,14 @@ extern __ro_after_init struct mm_struct *poking_mm; extern __ro_after_init unsigned long poking_addr; #ifndef CONFIG_UML_X86 -static inline void int3_emulate_jmp(struct pt_regs *regs, unsigned long ip) +static __always_inline +void int3_emulate_jmp(struct pt_regs *regs, unsigned long ip) { regs->ip = ip; } -static inline void int3_emulate_push(struct pt_regs *regs, unsigned long val) +static __always_inline +void int3_emulate_push(struct pt_regs *regs, unsigned long val) { /* * The int3 handler in entry_64.S adds a gap between the @@ -138,7 +140,8 @@ static inline void int3_emulate_push(struct pt_regs *regs, unsigned long val) *(unsigned long *)regs->sp = val; } -static inline void int3_emulate_call(struct pt_regs *regs, unsigned long func) +static __always_inline +void int3_emulate_call(struct pt_regs *regs, unsigned long func) { int3_emulate_push(regs, regs->ip - INT3_INSN_SIZE + CALL_INSN_SIZE); int3_emulate_jmp(regs, func); diff --git a/arch/x86/kernel/alternative.c b/arch/x86/kernel/alternative.c index a9195ce8265d..dd81ed5beeca 100644 --- a/arch/x86/kernel/alternative.c +++ b/arch/x86/kernel/alternative.c @@ -1011,7 +1011,8 @@ struct bp_patching_desc { static struct bp_patching_desc *bp_desc; -static inline struct bp_patching_desc *try_get_desc(struct bp_patching_desc **descp) +static __always_inline +struct bp_patching_desc *try_get_desc(struct bp_patching_desc **descp) { struct bp_patching_desc *desc = READ_ONCE(*descp); /* rcu_dereference */ @@ -1021,18 +1022,18 @@ static inline struct bp_patching_desc *try_get_desc(struct bp_patching_desc **de return desc; } -static inline void put_desc(struct bp_patching_desc *desc) +static __always_inline void put_desc(struct bp_patching_desc *desc) { smp_mb__before_atomic(); atomic_dec(&desc->refs); } -static inline void *text_poke_addr(struct text_poke_loc *tp) +static __always_inline void *text_poke_addr(struct text_poke_loc *tp) { return _stext + tp->rel_addr; } -static int notrace patch_cmp(const void *key, const void *elt) +static int noinstr patch_cmp(const void *key, const void *elt) { struct text_poke_loc *tp = (struct text_poke_loc *) elt; @@ -1042,9 +1043,8 @@ static int notrace patch_cmp(const void *key, const void *elt) return 1; return 0; } -NOKPROBE_SYMBOL(patch_cmp); -int notrace poke_int3_handler(struct pt_regs *regs) +int noinstr poke_int3_handler(struct pt_regs *regs) { struct bp_patching_desc *desc; struct text_poke_loc *tp; @@ -1118,7 +1118,6 @@ out_put: put_desc(desc); return ret; } -NOKPROBE_SYMBOL(poke_int3_handler); #define TP_VEC_MAX (PAGE_SIZE / sizeof(struct text_poke_loc)) static struct text_poke_loc tp_vec[TP_VEC_MAX]; -- cgit v1.2.3 From ef882bfef933408360e4d9d0c2c83a1e2fc996f3 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Fri, 24 Jan 2020 22:08:45 +0100 Subject: x86/int3: Avoid atomic instrumentation Use arch_atomic_*() and __READ_ONCE() to ensure nothing untoward creeps in and ruins things. That is; this is the INT3 text poke handler, strictly limit the code that runs in it, lest it inadvertenly hits yet another INT3. Reported-by: Thomas Gleixner Signed-off-by: Peter Zijlstra (Intel) Signed-off-by: Thomas Gleixner Reviewed-by: Masami Hiramatsu Reviewed-by: Alexandre Chartre Acked-by: Andy Lutomirski Link: https://lkml.kernel.org/r/20200505135313.517429268@linutronix.de --- arch/x86/kernel/alternative.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'arch') diff --git a/arch/x86/kernel/alternative.c b/arch/x86/kernel/alternative.c index dd81ed5beeca..50a8d24a417e 100644 --- a/arch/x86/kernel/alternative.c +++ b/arch/x86/kernel/alternative.c @@ -1014,9 +1014,9 @@ static struct bp_patching_desc *bp_desc; static __always_inline struct bp_patching_desc *try_get_desc(struct bp_patching_desc **descp) { - struct bp_patching_desc *desc = READ_ONCE(*descp); /* rcu_dereference */ + struct bp_patching_desc *desc = __READ_ONCE(*descp); /* rcu_dereference */ - if (!desc || !atomic_inc_not_zero(&desc->refs)) + if (!desc || !arch_atomic_inc_not_zero(&desc->refs)) return NULL; return desc; @@ -1025,7 +1025,7 @@ struct bp_patching_desc *try_get_desc(struct bp_patching_desc **descp) static __always_inline void put_desc(struct bp_patching_desc *desc) { smp_mb__before_atomic(); - atomic_dec(&desc->refs); + arch_atomic_dec(&desc->refs); } static __always_inline void *text_poke_addr(struct text_poke_loc *tp) -- cgit v1.2.3 From f64366efd8c60b93138b813d071d2cd201fd0f6e Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Thu, 20 Feb 2020 13:28:06 +0100 Subject: x86/int3: Inline bsearch() Avoid calling out to bsearch() by inlining it, for normal kernel configs this was the last external call and poke_int3_handler() is now fully self sufficient -- no calls to external code. Signed-off-by: Peter Zijlstra (Intel) Signed-off-by: Thomas Gleixner Reviewed-by: Alexandre Chartre Acked-by: Andy Lutomirski Link: https://lkml.kernel.org/r/20200505135313.731774429@linutronix.de --- arch/x86/kernel/alternative.c | 8 ++++---- arch/x86/kernel/traps.c | 5 +++++ 2 files changed, 9 insertions(+), 4 deletions(-) (limited to 'arch') diff --git a/arch/x86/kernel/alternative.c b/arch/x86/kernel/alternative.c index 50a8d24a417e..8fd39ff74a49 100644 --- a/arch/x86/kernel/alternative.c +++ b/arch/x86/kernel/alternative.c @@ -1033,7 +1033,7 @@ static __always_inline void *text_poke_addr(struct text_poke_loc *tp) return _stext + tp->rel_addr; } -static int noinstr patch_cmp(const void *key, const void *elt) +static __always_inline int patch_cmp(const void *key, const void *elt) { struct text_poke_loc *tp = (struct text_poke_loc *) elt; @@ -1077,9 +1077,9 @@ int noinstr poke_int3_handler(struct pt_regs *regs) * Skip the binary search if there is a single member in the vector. */ if (unlikely(desc->nr_entries > 1)) { - tp = bsearch(ip, desc->vec, desc->nr_entries, - sizeof(struct text_poke_loc), - patch_cmp); + tp = __inline_bsearch(ip, desc->vec, desc->nr_entries, + sizeof(struct text_poke_loc), + patch_cmp); if (!tp) goto out_put; } else { diff --git a/arch/x86/kernel/traps.c b/arch/x86/kernel/traps.c index b28a64d7691f..280c290f414f 100644 --- a/arch/x86/kernel/traps.c +++ b/arch/x86/kernel/traps.c @@ -570,6 +570,11 @@ exit: dotraplinkage void notrace do_int3(struct pt_regs *regs, long error_code) { + /* + * poke_int3_handler() is completely self contained code; it does (and + * must) *NOT* call out to anything, lest it hits upon yet another + * INT3. + */ if (poke_int3_handler(regs)) return; -- cgit v1.2.3 From 0dc6cdc21b94eed8cdacf34eabb4175cebd13775 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Wed, 4 Mar 2020 15:22:09 +0100 Subject: x86/idtentry: Provide IDTENTRY_RAW Some exception handlers need to do extra work before any of the entry helpers are invoked. Provide IDTENTRY_RAW for this. Signed-off-by: Thomas Gleixner Reviewed-by: Alexandre Chartre Acked-by: Peter Zijlstra Acked-by: Andy Lutomirski Link: https://lkml.kernel.org/r/20200505135313.830540017@linutronix.de --- arch/x86/include/asm/idtentry.h | 31 +++++++++++++++++++++++++++++++ 1 file changed, 31 insertions(+) (limited to 'arch') diff --git a/arch/x86/include/asm/idtentry.h b/arch/x86/include/asm/idtentry.h index ee6ebfef7e57..2f31d03f3e57 100644 --- a/arch/x86/include/asm/idtentry.h +++ b/arch/x86/include/asm/idtentry.h @@ -104,6 +104,34 @@ __visible noinstr void func(struct pt_regs *regs, \ static __always_inline void __##func(struct pt_regs *regs, \ unsigned long error_code) +/** + * DECLARE_IDTENTRY_RAW - Declare functions for raw IDT entry points + * No error code pushed by hardware + * @vector: Vector number (ignored for C) + * @func: Function name of the entry point + * + * Maps to DECLARE_IDTENTRY(). + */ +#define DECLARE_IDTENTRY_RAW(vector, func) \ + DECLARE_IDTENTRY(vector, func) + +/** + * DEFINE_IDTENTRY_RAW - Emit code for raw IDT entry points + * @func: Function name of the entry point + * + * @func is called from ASM entry code with interrupts disabled. + * + * The macro is written so it acts as function definition. Append the + * body with a pair of curly brackets. + * + * Contrary to DEFINE_IDTENTRY() this does not invoke the + * idtentry_enter/exit() helpers before and after the body invocation. This + * needs to be done in the body itself if applicable. Use if extra work + * is required before the enter/exit() helpers are invoked. + */ +#define DEFINE_IDTENTRY_RAW(func) \ +__visible noinstr void func(struct pt_regs *regs) + #else /* !__ASSEMBLY__ */ /* @@ -118,6 +146,9 @@ static __always_inline void __##func(struct pt_regs *regs, \ /* Special case for 32bit IRET 'trap'. Do not emit ASM code */ #define DECLARE_IDTENTRY_SW(vector, func) +#define DECLARE_IDTENTRY_RAW(vector, func) \ + DECLARE_IDTENTRY(vector, func) + #endif /* __ASSEMBLY__ */ /* -- cgit v1.2.3 From 8edd7e37aed8b9df938a63f0b0259c70569ce3d2 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Tue, 25 Feb 2020 23:16:16 +0100 Subject: x86/entry: Convert INT3 exception to IDTENTRY_RAW Convert #BP to IDTENTRY_RAW: - Implement the C entry point with DEFINE_IDTENTRY_RAW - Invoke idtentry_enter/exit() from the function body - Emit the ASM stub with DECLARE_IDTENTRY_RAW - Remove the ASM idtentry in 64bit - Remove the open coded ASM entry code in 32bit - Fixup the XEN/PV code - Remove the old prototypes No functional change. This could be a plain IDTENTRY, but as Peter pointed out INT3 is broken vs. the static key in the context tracking code as this static key might be in the state of being patched and has an int3 which would recurse forever. IDTENTRY_RAW is therefore chosen to allow addressing this issue without lots of code churn. Signed-off-by: Thomas Gleixner Reviewed-by: Alexandre Chartre Acked-by: Peter Zijlstra Acked-by: Andy Lutomirski Link: https://lkml.kernel.org/r/20200505135313.938474960@linutronix.de --- arch/x86/entry/entry_32.S | 7 ------- arch/x86/entry/entry_64.S | 2 -- arch/x86/include/asm/idtentry.h | 3 +++ arch/x86/include/asm/traps.h | 3 --- arch/x86/kernel/idt.c | 2 +- arch/x86/kernel/traps.c | 28 +++++++++++++++++----------- arch/x86/xen/enlighten_pv.c | 2 +- arch/x86/xen/xen-asm_64.S | 2 +- 8 files changed, 23 insertions(+), 26 deletions(-) (limited to 'arch') diff --git a/arch/x86/entry/entry_32.S b/arch/x86/entry/entry_32.S index f7a5f1cda058..b9b0ddb53a08 100644 --- a/arch/x86/entry/entry_32.S +++ b/arch/x86/entry/entry_32.S @@ -1649,13 +1649,6 @@ SYM_CODE_START(nmi) #endif SYM_CODE_END(nmi) -SYM_CODE_START(int3) - ASM_CLAC - pushl $0 - pushl $do_int3 - jmp common_exception -SYM_CODE_END(int3) - .pushsection .text, "ax" SYM_CODE_START(rewind_stack_do_exit) /* Prevent any naive code from trying to unwind to our caller. */ diff --git a/arch/x86/entry/entry_64.S b/arch/x86/entry/entry_64.S index 1bada7b26210..69ddd052aef2 100644 --- a/arch/x86/entry/entry_64.S +++ b/arch/x86/entry/entry_64.S @@ -1072,8 +1072,6 @@ apicinterrupt IRQ_WORK_VECTOR irq_work_interrupt smp_irq_work_interrupt * Exception entry points. */ -idtentry X86_TRAP_BP int3 do_int3 has_error_code=0 - idtentry X86_TRAP_PF page_fault do_page_fault has_error_code=1 #ifdef CONFIG_X86_MCE diff --git a/arch/x86/include/asm/idtentry.h b/arch/x86/include/asm/idtentry.h index 2f31d03f3e57..3dc4d5b246d3 100644 --- a/arch/x86/include/asm/idtentry.h +++ b/arch/x86/include/asm/idtentry.h @@ -181,4 +181,7 @@ DECLARE_IDTENTRY_ERRORCODE(X86_TRAP_SS, exc_stack_segment); DECLARE_IDTENTRY_ERRORCODE(X86_TRAP_GP, exc_general_protection); DECLARE_IDTENTRY_ERRORCODE(X86_TRAP_AC, exc_alignment_check); +/* Raw exception entries which need extra work */ +DECLARE_IDTENTRY_RAW(X86_TRAP_BP, exc_int3); + #endif diff --git a/arch/x86/include/asm/traps.h b/arch/x86/include/asm/traps.h index 5774d0b6cf77..698285a2b660 100644 --- a/arch/x86/include/asm/traps.h +++ b/arch/x86/include/asm/traps.h @@ -13,7 +13,6 @@ asmlinkage void debug(void); asmlinkage void nmi(void); -asmlinkage void int3(void); #ifdef CONFIG_X86_64 asmlinkage void double_fault(void); #endif @@ -26,7 +25,6 @@ asmlinkage void machine_check(void); #if defined(CONFIG_X86_64) && defined(CONFIG_XEN_PV) asmlinkage void xen_xennmi(void); asmlinkage void xen_xendebug(void); -asmlinkage void xen_int3(void); asmlinkage void xen_double_fault(void); asmlinkage void xen_page_fault(void); #ifdef CONFIG_X86_MCE @@ -36,7 +34,6 @@ asmlinkage void xen_machine_check(void); dotraplinkage void do_debug(struct pt_regs *regs, long error_code); dotraplinkage void do_nmi(struct pt_regs *regs, long error_code); -dotraplinkage void do_int3(struct pt_regs *regs, long error_code); dotraplinkage void do_double_fault(struct pt_regs *regs, long error_code, unsigned long cr2); dotraplinkage void do_page_fault(struct pt_regs *regs, unsigned long error_code, unsigned long address); dotraplinkage void do_mce(struct pt_regs *regs, long error_code); diff --git a/arch/x86/kernel/idt.c b/arch/x86/kernel/idt.c index 38b565b7e5b8..9ca8af65a212 100644 --- a/arch/x86/kernel/idt.c +++ b/arch/x86/kernel/idt.c @@ -60,7 +60,7 @@ static bool idt_setup_done __initdata; */ static const __initconst struct idt_data early_idts[] = { INTG(X86_TRAP_DB, debug), - SYSG(X86_TRAP_BP, int3), + SYSG(X86_TRAP_BP, asm_exc_int3), #ifdef CONFIG_X86_32 INTG(X86_TRAP_PF, page_fault), #endif diff --git a/arch/x86/kernel/traps.c b/arch/x86/kernel/traps.c index 280c290f414f..0ad12dffde22 100644 --- a/arch/x86/kernel/traps.c +++ b/arch/x86/kernel/traps.c @@ -568,7 +568,7 @@ exit: cond_local_irq_disable(regs); } -dotraplinkage void notrace do_int3(struct pt_regs *regs, long error_code) +DEFINE_IDTENTRY_RAW(exc_int3) { /* * poke_int3_handler() is completely self contained code; it does (and @@ -579,16 +579,20 @@ dotraplinkage void notrace do_int3(struct pt_regs *regs, long error_code) return; /* - * Unlike any other non-IST entry, we can be called from pretty much - * any location in the kernel through kprobes -- text_poke() will most - * likely be handled by poke_int3_handler() above. This means this - * handler is effectively NMI-like. + * idtentry_enter() uses static_branch_{,un}likely() and therefore + * can trigger INT3, hence poke_int3_handler() must be done + * before. If the entry came from kernel mode, then use nmi_enter() + * because the INT3 could have been hit in any context including + * NMI. */ - if (!user_mode(regs)) + if (user_mode(regs)) + idtentry_enter(regs); + else nmi_enter(); + instrumentation_begin(); #ifdef CONFIG_KGDB_LOW_LEVEL_TRAP - if (kgdb_ll_trap(DIE_INT3, "int3", regs, error_code, X86_TRAP_BP, + if (kgdb_ll_trap(DIE_INT3, "int3", regs, 0, X86_TRAP_BP, SIGTRAP) == NOTIFY_STOP) goto exit; #endif /* CONFIG_KGDB_LOW_LEVEL_TRAP */ @@ -598,19 +602,21 @@ dotraplinkage void notrace do_int3(struct pt_regs *regs, long error_code) goto exit; #endif - if (notify_die(DIE_INT3, "int3", regs, error_code, X86_TRAP_BP, + if (notify_die(DIE_INT3, "int3", regs, 0, X86_TRAP_BP, SIGTRAP) == NOTIFY_STOP) goto exit; cond_local_irq_enable(regs); - do_trap(X86_TRAP_BP, SIGTRAP, "int3", regs, error_code, 0, NULL); + do_trap(X86_TRAP_BP, SIGTRAP, "int3", regs, 0, 0, NULL); cond_local_irq_disable(regs); exit: - if (!user_mode(regs)) + instrumentation_end(); + if (user_mode(regs)) + idtentry_exit(regs); + else nmi_exit(); } -NOKPROBE_SYMBOL(do_int3); #ifdef CONFIG_X86_64 /* diff --git a/arch/x86/xen/enlighten_pv.c b/arch/x86/xen/enlighten_pv.c index 0a30fc0fe0fa..5bcd86c4dff0 100644 --- a/arch/x86/xen/enlighten_pv.c +++ b/arch/x86/xen/enlighten_pv.c @@ -616,7 +616,7 @@ static struct trap_array_entry trap_array[] = { { machine_check, xen_machine_check, true }, #endif { nmi, xen_xennmi, true }, - { int3, xen_int3, false }, + TRAP_ENTRY(exc_int3, false ), TRAP_ENTRY(exc_overflow, false ), #ifdef CONFIG_IA32_EMULATION { entry_INT80_compat, xen_entry_INT80_compat, false }, diff --git a/arch/x86/xen/xen-asm_64.S b/arch/x86/xen/xen-asm_64.S index 6a91157d5b5c..44f55569a37f 100644 --- a/arch/x86/xen/xen-asm_64.S +++ b/arch/x86/xen/xen-asm_64.S @@ -31,7 +31,7 @@ _ASM_NOKPROBE(xen_\name) xen_pv_trap asm_exc_divide_error xen_pv_trap debug xen_pv_trap xendebug -xen_pv_trap int3 +xen_pv_trap asm_exc_int3 xen_pv_trap xennmi xen_pv_trap asm_exc_overflow xen_pv_trap asm_exc_bounds -- cgit v1.2.3 From 21e28290b31708b72763641604e239eb369c230d Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Thu, 5 Mar 2020 16:09:52 +0100 Subject: x86/traps: Split int3 handler up For code simplicity split up the int3 handler into a kernel and user part which makes the code flow simpler to understand. Signed-off-by: Peter Zijlstra (Intel) Signed-off-by: Thomas Gleixner Reviewed-by: Alexandre Chartre Link: https://lkml.kernel.org/r/20200505135314.045220765@linutronix.de --- arch/x86/kernel/traps.c | 68 +++++++++++++++++++++++++++++-------------------- 1 file changed, 40 insertions(+), 28 deletions(-) (limited to 'arch') diff --git a/arch/x86/kernel/traps.c b/arch/x86/kernel/traps.c index 0ad12dffde22..21c8cfce24d3 100644 --- a/arch/x86/kernel/traps.c +++ b/arch/x86/kernel/traps.c @@ -568,6 +568,35 @@ exit: cond_local_irq_disable(regs); } +static bool do_int3(struct pt_regs *regs) +{ + int res; + +#ifdef CONFIG_KGDB_LOW_LEVEL_TRAP + if (kgdb_ll_trap(DIE_INT3, "int3", regs, 0, X86_TRAP_BP, + SIGTRAP) == NOTIFY_STOP) + return true; +#endif /* CONFIG_KGDB_LOW_LEVEL_TRAP */ + +#ifdef CONFIG_KPROBES + if (kprobe_int3_handler(regs)) + return true; +#endif + res = notify_die(DIE_INT3, "int3", regs, 0, X86_TRAP_BP, SIGTRAP); + + return res == NOTIFY_STOP; +} + +static void do_int3_user(struct pt_regs *regs) +{ + if (do_int3(regs)) + return; + + cond_local_irq_enable(regs); + do_trap(X86_TRAP_BP, SIGTRAP, "int3", regs, 0, 0, NULL); + cond_local_irq_disable(regs); +} + DEFINE_IDTENTRY_RAW(exc_int3) { /* @@ -585,37 +614,20 @@ DEFINE_IDTENTRY_RAW(exc_int3) * because the INT3 could have been hit in any context including * NMI. */ - if (user_mode(regs)) + if (user_mode(regs)) { idtentry_enter(regs); - else - nmi_enter(); - - instrumentation_begin(); -#ifdef CONFIG_KGDB_LOW_LEVEL_TRAP - if (kgdb_ll_trap(DIE_INT3, "int3", regs, 0, X86_TRAP_BP, - SIGTRAP) == NOTIFY_STOP) - goto exit; -#endif /* CONFIG_KGDB_LOW_LEVEL_TRAP */ - -#ifdef CONFIG_KPROBES - if (kprobe_int3_handler(regs)) - goto exit; -#endif - - if (notify_die(DIE_INT3, "int3", regs, 0, X86_TRAP_BP, - SIGTRAP) == NOTIFY_STOP) - goto exit; - - cond_local_irq_enable(regs); - do_trap(X86_TRAP_BP, SIGTRAP, "int3", regs, 0, 0, NULL); - cond_local_irq_disable(regs); - -exit: - instrumentation_end(); - if (user_mode(regs)) + instrumentation_begin(); + do_int3_user(regs); + instrumentation_end(); idtentry_exit(regs); - else + } else { + nmi_enter(); + instrumentation_begin(); + if (!do_int3(regs)) + die("int3", regs, 0); + instrumentation_end(); nmi_exit(); + } } #ifdef CONFIG_X86_64 -- cgit v1.2.3 From 2c058b03cc06ba485169778a271f87e5ac57dd83 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Tue, 25 Feb 2020 23:33:22 +0100 Subject: x86/idtentry: Provide IDTENTRY_IST Same as IDTENTRY but for exceptions which run on Interrupt Stacks (IST) on 64bit. For 32bit this maps to IDTENTRY. There are 3 variants which will be used: IDTENTRY_MCE IDTENTRY_DB IDTENTRY_NMI These map to IDTENTRY_IST, but only the MCE and DB variants are emitting ASM code as the NMI entry needs hand crafted ASM still. The function defines do not contain any idtenter/exit calls as these exceptions need special treatment. Signed-off-by: Thomas Gleixner Reviewed-by: Alexandre Chartre Acked-by: Peter Zijlstra Acked-by: Andy Lutomirski Link: https://lkml.kernel.org/r/20200505135314.137125609@linutronix.de --- arch/x86/include/asm/idtentry.h | 54 +++++++++++++++++++++++++++++++++++++++++ 1 file changed, 54 insertions(+) (limited to 'arch') diff --git a/arch/x86/include/asm/idtentry.h b/arch/x86/include/asm/idtentry.h index 3dc4d5b246d3..3edd6d011ecd 100644 --- a/arch/x86/include/asm/idtentry.h +++ b/arch/x86/include/asm/idtentry.h @@ -132,6 +132,42 @@ static __always_inline void __##func(struct pt_regs *regs, \ #define DEFINE_IDTENTRY_RAW(func) \ __visible noinstr void func(struct pt_regs *regs) +#ifdef CONFIG_X86_64 +/** + * DECLARE_IDTENTRY_IST - Declare functions for IST handling IDT entry points + * @vector: Vector number (ignored for C) + * @func: Function name of the entry point + * + * Maps to DECLARE_IDTENTRY_RAW + */ +#define DECLARE_IDTENTRY_IST(vector, func) \ + DECLARE_IDTENTRY_RAW(vector, func) + +/** + * DEFINE_IDTENTRY_IST - Emit code for IST entry points + * @func: Function name of the entry point + * + * Maps to DEFINE_IDTENTRY_RAW + */ +#define DEFINE_IDTENTRY_IST(func) \ + DEFINE_IDTENTRY_RAW(func) + +#else /* CONFIG_X86_64 */ +/* Maps to a regular IDTENTRY on 32bit for now */ +# define DECLARE_IDTENTRY_IST DECLARE_IDTENTRY +# define DEFINE_IDTENTRY_IST DEFINE_IDTENTRY +#endif /* !CONFIG_X86_64 */ + +/* C-Code mapping */ +#define DECLARE_IDTENTRY_MCE DECLARE_IDTENTRY_IST +#define DEFINE_IDTENTRY_MCE DEFINE_IDTENTRY_IST + +#define DECLARE_IDTENTRY_NMI DECLARE_IDTENTRY_IST +#define DEFINE_IDTENTRY_NMI DEFINE_IDTENTRY_IST + +#define DECLARE_IDTENTRY_DEBUG DECLARE_IDTENTRY_IST +#define DEFINE_IDTENTRY_DEBUG DEFINE_IDTENTRY_IST + #else /* !__ASSEMBLY__ */ /* @@ -149,6 +185,24 @@ __visible noinstr void func(struct pt_regs *regs) #define DECLARE_IDTENTRY_RAW(vector, func) \ DECLARE_IDTENTRY(vector, func) +#ifdef CONFIG_X86_64 +# define DECLARE_IDTENTRY_MCE(vector, func) \ + idtentry_mce_db vector asm_##func func + +# define DECLARE_IDTENTRY_DEBUG(vector, func) \ + idtentry_mce_db vector asm_##func func + +#else +# define DECLARE_IDTENTRY_MCE(vector, func) \ + DECLARE_IDTENTRY(vector, func) + +# define DECLARE_IDTENTRY_DEBUG(vector, func) \ + DECLARE_IDTENTRY(vector, func) +#endif + +/* No ASM code emitted for NMI */ +#define DECLARE_IDTENTRY_NMI(vector, func) + #endif /* __ASSEMBLY__ */ /* -- cgit v1.2.3 From 94a46d316f2b54e3de8a4fa884cb16383db7fcd8 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Fri, 3 Apr 2020 22:37:31 +0200 Subject: x86/mce: Move nmi_enter/exit() into the entry point There is no reason to have nmi_enter/exit() in the actual MCE handlers. Move it to the entry point. This also covers the until now uncovered initial handler which only prints. Signed-off-by: Thomas Gleixner Reviewed-by: Alexandre Chartre Acked-by: Peter Zijlstra Acked-by: Andy Lutomirski Link: https://lkml.kernel.org/r/20200505135314.243936614@linutronix.de --- arch/x86/kernel/cpu/mce/core.c | 26 +++++++++++++------------- arch/x86/kernel/cpu/mce/p5.c | 4 ---- arch/x86/kernel/cpu/mce/winchip.c | 4 ---- 3 files changed, 13 insertions(+), 21 deletions(-) (limited to 'arch') diff --git a/arch/x86/kernel/cpu/mce/core.c b/arch/x86/kernel/cpu/mce/core.c index e9265e2f28c9..f5993ed6e16b 100644 --- a/arch/x86/kernel/cpu/mce/core.c +++ b/arch/x86/kernel/cpu/mce/core.c @@ -1100,8 +1100,10 @@ static void mce_clear_state(unsigned long *toclear) * kdump kernel establishing a new #MC handler where a broadcasted MCE * might not get handled properly. */ -static bool __mc_check_crashing_cpu(int cpu) +static noinstr bool mce_check_crashing_cpu(void) { + unsigned int cpu = smp_processor_id(); + if (cpu_is_offline(cpu) || (crashing_cpu != -1 && crashing_cpu != cpu)) { u64 mcgstatus; @@ -1235,7 +1237,6 @@ void noinstr do_machine_check(struct pt_regs *regs, long error_code) DECLARE_BITMAP(valid_banks, MAX_NR_BANKS); DECLARE_BITMAP(toclear, MAX_NR_BANKS); struct mca_config *cfg = &mca_cfg; - int cpu = smp_processor_id(); struct mce m, *final; char *msg = NULL; int worst = 0; @@ -1264,11 +1265,6 @@ void noinstr do_machine_check(struct pt_regs *regs, long error_code) */ int lmce = 1; - if (__mc_check_crashing_cpu(cpu)) - return; - - nmi_enter(); - this_cpu_inc(mce_exception_count); mce_gather_info(&m, regs); @@ -1356,7 +1352,7 @@ void noinstr do_machine_check(struct pt_regs *regs, long error_code) sync_core(); if (worst != MCE_AR_SEVERITY && !kill_it) - goto out_ist; + return; /* Fault was in user mode and we need to take some action */ if ((m.cs & 3) == 3) { @@ -1373,9 +1369,6 @@ void noinstr do_machine_check(struct pt_regs *regs, long error_code) if (!fixup_exception(regs, X86_TRAP_MC, error_code, 0)) mce_panic("Failed kernel mode recovery", &m, msg); } - -out_ist: - nmi_exit(); } EXPORT_SYMBOL_GPL(do_machine_check); @@ -1912,11 +1905,18 @@ static void unexpected_machine_check(struct pt_regs *regs, long error_code) void (*machine_check_vector)(struct pt_regs *, long error_code) = unexpected_machine_check; -dotraplinkage notrace void do_mce(struct pt_regs *regs, long error_code) +dotraplinkage noinstr void do_mce(struct pt_regs *regs, long error_code) { + if (machine_check_vector == do_machine_check && + mce_check_crashing_cpu()) + return; + + nmi_enter(); + machine_check_vector(regs, error_code); + + nmi_exit(); } -NOKPROBE_SYMBOL(do_mce); /* * Called for each booted CPU to set up machine checks. diff --git a/arch/x86/kernel/cpu/mce/p5.c b/arch/x86/kernel/cpu/mce/p5.c index 5ee94aa1b766..dc29f0f7b3ed 100644 --- a/arch/x86/kernel/cpu/mce/p5.c +++ b/arch/x86/kernel/cpu/mce/p5.c @@ -25,8 +25,6 @@ static void pentium_machine_check(struct pt_regs *regs, long error_code) { u32 loaddr, hi, lotype; - nmi_enter(); - rdmsr(MSR_IA32_P5_MC_ADDR, loaddr, hi); rdmsr(MSR_IA32_P5_MC_TYPE, lotype, hi); @@ -39,8 +37,6 @@ static void pentium_machine_check(struct pt_regs *regs, long error_code) } add_taint(TAINT_MACHINE_CHECK, LOCKDEP_NOW_UNRELIABLE); - - nmi_exit(); } /* Set up machine check reporting for processors with Intel style MCE: */ diff --git a/arch/x86/kernel/cpu/mce/winchip.c b/arch/x86/kernel/cpu/mce/winchip.c index b3938c195365..3f8f84ba0f51 100644 --- a/arch/x86/kernel/cpu/mce/winchip.c +++ b/arch/x86/kernel/cpu/mce/winchip.c @@ -19,12 +19,8 @@ /* Machine check handler for WinChip C6: */ static void winchip_machine_check(struct pt_regs *regs, long error_code) { - nmi_enter(); - pr_emerg("CPU0: Machine Check Exception.\n"); add_taint(TAINT_MACHINE_CHECK, LOCKDEP_NOW_UNRELIABLE); - - nmi_exit(); } /* Set up machine check reporting on the Winchip C6 series */ -- cgit v1.2.3 From 8cd501c1facc159dff6db63775151c9200a3ea1e Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Tue, 25 Feb 2020 23:33:23 +0100 Subject: x86/entry: Convert Machine Check to IDTENTRY_IST Convert #MC to IDTENTRY_MCE: - Implement the C entry points with DEFINE_IDTENTRY_MCE - Emit the ASM stub with DECLARE_IDTENTRY_MCE - Remove the ASM idtentry in 64bit - Remove the open coded ASM entry code in 32bit - Fixup the XEN/PV code - Remove the old prototypes - Remove the error code from *machine_check_vector() as it is always 0 and not used by any of the functions it can point to. Fixup all the functions as well. No functional change. Signed-off-by: Thomas Gleixner Reviewed-by: Alexandre Chartre Acked-by: Peter Zijlstra Acked-by: Andy Lutomirski Link: https://lkml.kernel.org/r/20200505135314.334980426@linutronix.de --- arch/x86/entry/entry_32.S | 9 --------- arch/x86/entry/entry_64.S | 3 --- arch/x86/include/asm/idtentry.h | 4 ++++ arch/x86/include/asm/mce.h | 2 +- arch/x86/include/asm/traps.h | 7 ------- arch/x86/kernel/cpu/mce/core.c | 23 ++++++++++++++--------- arch/x86/kernel/cpu/mce/inject.c | 4 ++-- arch/x86/kernel/cpu/mce/internal.h | 2 +- arch/x86/kernel/cpu/mce/p5.c | 2 +- arch/x86/kernel/cpu/mce/winchip.c | 2 +- arch/x86/kernel/idt.c | 10 +++++----- arch/x86/kvm/svm/svm.c | 2 +- arch/x86/kvm/vmx/vmx.c | 2 +- arch/x86/xen/enlighten_pv.c | 2 +- arch/x86/xen/xen-asm_64.S | 2 +- 15 files changed, 33 insertions(+), 43 deletions(-) (limited to 'arch') diff --git a/arch/x86/entry/entry_32.S b/arch/x86/entry/entry_32.S index b9b0ddb53a08..4dd3d706d9fc 100644 --- a/arch/x86/entry/entry_32.S +++ b/arch/x86/entry/entry_32.S @@ -1297,15 +1297,6 @@ SYM_CODE_START(native_iret) SYM_CODE_END(native_iret) #endif -#ifdef CONFIG_X86_MCE -SYM_CODE_START(machine_check) - ASM_CLAC - pushl $0 - pushl $do_mce - jmp common_exception -SYM_CODE_END(machine_check) -#endif - #ifdef CONFIG_XEN_PV SYM_FUNC_START(xen_hypervisor_callback) /* diff --git a/arch/x86/entry/entry_64.S b/arch/x86/entry/entry_64.S index 69ddd052aef2..5007b975ca7c 100644 --- a/arch/x86/entry/entry_64.S +++ b/arch/x86/entry/entry_64.S @@ -1074,9 +1074,6 @@ apicinterrupt IRQ_WORK_VECTOR irq_work_interrupt smp_irq_work_interrupt idtentry X86_TRAP_PF page_fault do_page_fault has_error_code=1 -#ifdef CONFIG_X86_MCE -idtentry_mce_db X86_TRAP_MCE machine_check do_mce -#endif idtentry_mce_db X86_TRAP_DB debug do_debug idtentry_df X86_TRAP_DF double_fault do_double_fault diff --git a/arch/x86/include/asm/idtentry.h b/arch/x86/include/asm/idtentry.h index 3edd6d011ecd..36fe964c0d53 100644 --- a/arch/x86/include/asm/idtentry.h +++ b/arch/x86/include/asm/idtentry.h @@ -238,4 +238,8 @@ DECLARE_IDTENTRY_ERRORCODE(X86_TRAP_AC, exc_alignment_check); /* Raw exception entries which need extra work */ DECLARE_IDTENTRY_RAW(X86_TRAP_BP, exc_int3); +#ifdef CONFIG_X86_MCE +DECLARE_IDTENTRY_MCE(X86_TRAP_MC, exc_machine_check); +#endif + #endif diff --git a/arch/x86/include/asm/mce.h b/arch/x86/include/asm/mce.h index f9cea081c05b..a00130112b02 100644 --- a/arch/x86/include/asm/mce.h +++ b/arch/x86/include/asm/mce.h @@ -238,7 +238,7 @@ extern void mce_disable_bank(int bank); /* * Exception handler */ -void do_machine_check(struct pt_regs *, long); +void do_machine_check(struct pt_regs *pt_regs); /* * Threshold handler diff --git a/arch/x86/include/asm/traps.h b/arch/x86/include/asm/traps.h index 698285a2b660..6096db912625 100644 --- a/arch/x86/include/asm/traps.h +++ b/arch/x86/include/asm/traps.h @@ -18,25 +18,18 @@ asmlinkage void double_fault(void); #endif asmlinkage void page_fault(void); asmlinkage void async_page_fault(void); -#ifdef CONFIG_X86_MCE -asmlinkage void machine_check(void); -#endif /* CONFIG_X86_MCE */ #if defined(CONFIG_X86_64) && defined(CONFIG_XEN_PV) asmlinkage void xen_xennmi(void); asmlinkage void xen_xendebug(void); asmlinkage void xen_double_fault(void); asmlinkage void xen_page_fault(void); -#ifdef CONFIG_X86_MCE -asmlinkage void xen_machine_check(void); -#endif /* CONFIG_X86_MCE */ #endif dotraplinkage void do_debug(struct pt_regs *regs, long error_code); dotraplinkage void do_nmi(struct pt_regs *regs, long error_code); dotraplinkage void do_double_fault(struct pt_regs *regs, long error_code, unsigned long cr2); dotraplinkage void do_page_fault(struct pt_regs *regs, unsigned long error_code, unsigned long address); -dotraplinkage void do_mce(struct pt_regs *regs, long error_code); #ifdef CONFIG_X86_64 asmlinkage __visible notrace struct pt_regs *sync_regs(struct pt_regs *eregs); diff --git a/arch/x86/kernel/cpu/mce/core.c b/arch/x86/kernel/cpu/mce/core.c index f5993ed6e16b..842dd03c3918 100644 --- a/arch/x86/kernel/cpu/mce/core.c +++ b/arch/x86/kernel/cpu/mce/core.c @@ -1232,7 +1232,7 @@ static void kill_me_maybe(struct callback_head *cb) * backing the user stack, tracing that reads the user stack will cause * potentially infinite recursion. */ -void noinstr do_machine_check(struct pt_regs *regs, long error_code) +void noinstr do_machine_check(struct pt_regs *regs) { DECLARE_BITMAP(valid_banks, MAX_NR_BANKS); DECLARE_BITMAP(toclear, MAX_NR_BANKS); @@ -1366,7 +1366,7 @@ void noinstr do_machine_check(struct pt_regs *regs, long error_code) current->mce_kill_me.func = kill_me_now; task_work_add(current, ¤t->mce_kill_me, true); } else { - if (!fixup_exception(regs, X86_TRAP_MC, error_code, 0)) + if (!fixup_exception(regs, X86_TRAP_MC, 0, 0)) mce_panic("Failed kernel mode recovery", &m, msg); } } @@ -1895,27 +1895,32 @@ bool filter_mce(struct mce *m) } /* Handle unconfigured int18 (should never happen) */ -static void unexpected_machine_check(struct pt_regs *regs, long error_code) +static void unexpected_machine_check(struct pt_regs *regs) { pr_err("CPU#%d: Unexpected int18 (Machine Check)\n", smp_processor_id()); } /* Call the installed machine check handler for this CPU setup. */ -void (*machine_check_vector)(struct pt_regs *, long error_code) = - unexpected_machine_check; +void (*machine_check_vector)(struct pt_regs *) = unexpected_machine_check; -dotraplinkage noinstr void do_mce(struct pt_regs *regs, long error_code) +DEFINE_IDTENTRY_MCE(exc_machine_check) { if (machine_check_vector == do_machine_check && mce_check_crashing_cpu()) return; - nmi_enter(); + if (user_mode(regs)) + idtentry_enter(regs); + else + nmi_enter(); - machine_check_vector(regs, error_code); + machine_check_vector(regs); - nmi_exit(); + if (user_mode(regs)) + idtentry_exit(regs); + else + nmi_exit(); } /* diff --git a/arch/x86/kernel/cpu/mce/inject.c b/arch/x86/kernel/cpu/mce/inject.c index 3413b41b8d55..0593b192eb8f 100644 --- a/arch/x86/kernel/cpu/mce/inject.c +++ b/arch/x86/kernel/cpu/mce/inject.c @@ -146,9 +146,9 @@ static void raise_exception(struct mce *m, struct pt_regs *pregs) regs.cs = m->cs; pregs = ®s; } - /* in mcheck exeception handler, irq will be disabled */ + /* do_machine_check() expects interrupts disabled -- at least */ local_irq_save(flags); - do_machine_check(pregs, 0); + do_machine_check(pregs); local_irq_restore(flags); m->finished = 0; } diff --git a/arch/x86/kernel/cpu/mce/internal.h b/arch/x86/kernel/cpu/mce/internal.h index 3b008172ad73..b74ca4a28c66 100644 --- a/arch/x86/kernel/cpu/mce/internal.h +++ b/arch/x86/kernel/cpu/mce/internal.h @@ -9,7 +9,7 @@ #include /* Pointer to the installed machine check handler for this CPU setup. */ -extern void (*machine_check_vector)(struct pt_regs *, long error_code); +extern void (*machine_check_vector)(struct pt_regs *); enum severity_level { MCE_NO_SEVERITY, diff --git a/arch/x86/kernel/cpu/mce/p5.c b/arch/x86/kernel/cpu/mce/p5.c index dc29f0f7b3ed..eaebc4ce7398 100644 --- a/arch/x86/kernel/cpu/mce/p5.c +++ b/arch/x86/kernel/cpu/mce/p5.c @@ -21,7 +21,7 @@ int mce_p5_enabled __read_mostly; /* Machine check handler for Pentium class Intel CPUs: */ -static void pentium_machine_check(struct pt_regs *regs, long error_code) +static void pentium_machine_check(struct pt_regs *regs) { u32 loaddr, hi, lotype; diff --git a/arch/x86/kernel/cpu/mce/winchip.c b/arch/x86/kernel/cpu/mce/winchip.c index 3f8f84ba0f51..90e3d60c645e 100644 --- a/arch/x86/kernel/cpu/mce/winchip.c +++ b/arch/x86/kernel/cpu/mce/winchip.c @@ -17,7 +17,7 @@ #include "internal.h" /* Machine check handler for WinChip C6: */ -static void winchip_machine_check(struct pt_regs *regs, long error_code) +static void winchip_machine_check(struct pt_regs *regs) { pr_emerg("CPU0: Machine Check Exception.\n"); add_taint(TAINT_MACHINE_CHECK, LOCKDEP_NOW_UNRELIABLE); diff --git a/arch/x86/kernel/idt.c b/arch/x86/kernel/idt.c index 9ca8af65a212..6b93840784d5 100644 --- a/arch/x86/kernel/idt.c +++ b/arch/x86/kernel/idt.c @@ -96,7 +96,7 @@ static const __initconst struct idt_data def_idts[] = { INTG(X86_TRAP_DB, debug), #ifdef CONFIG_X86_MCE - INTG(X86_TRAP_MC, machine_check), + INTG(X86_TRAP_MC, asm_exc_machine_check), #endif SYSG(X86_TRAP_OF, asm_exc_overflow), @@ -185,11 +185,11 @@ gate_desc debug_idt_table[IDT_ENTRIES] __page_aligned_bss; * cpu_init() when the TSS has been initialized. */ static const __initconst struct idt_data ist_idts[] = { - ISTG(X86_TRAP_DB, debug, IST_INDEX_DB), - ISTG(X86_TRAP_NMI, nmi, IST_INDEX_NMI), - ISTG(X86_TRAP_DF, double_fault, IST_INDEX_DF), + ISTG(X86_TRAP_DB, debug, IST_INDEX_DB), + ISTG(X86_TRAP_NMI, nmi, IST_INDEX_NMI), + ISTG(X86_TRAP_DF, double_fault, IST_INDEX_DF), #ifdef CONFIG_X86_MCE - ISTG(X86_TRAP_MC, machine_check, IST_INDEX_MCE), + ISTG(X86_TRAP_MC, asm_exc_machine_check, IST_INDEX_MCE), #endif }; diff --git a/arch/x86/kvm/svm/svm.c b/arch/x86/kvm/svm/svm.c index 9e333b91ff78..7502cd65528f 100644 --- a/arch/x86/kvm/svm/svm.c +++ b/arch/x86/kvm/svm/svm.c @@ -1837,7 +1837,7 @@ static void kvm_machine_check(void) .flags = X86_EFLAGS_IF, }; - do_machine_check(®s, 0); + do_machine_check(®s); #endif } diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c index 170cc76a581f..2b5ba6063a2d 100644 --- a/arch/x86/kvm/vmx/vmx.c +++ b/arch/x86/kvm/vmx/vmx.c @@ -4683,7 +4683,7 @@ static void kvm_machine_check(void) .flags = X86_EFLAGS_IF, }; - do_machine_check(®s, 0); + do_machine_check(®s); #endif } diff --git a/arch/x86/xen/enlighten_pv.c b/arch/x86/xen/enlighten_pv.c index 5bcd86c4dff0..267583f9b207 100644 --- a/arch/x86/xen/enlighten_pv.c +++ b/arch/x86/xen/enlighten_pv.c @@ -613,7 +613,7 @@ static struct trap_array_entry trap_array[] = { { debug, xen_xendebug, true }, { double_fault, xen_double_fault, true }, #ifdef CONFIG_X86_MCE - { machine_check, xen_machine_check, true }, + TRAP_ENTRY(exc_machine_check, true ), #endif { nmi, xen_xennmi, true }, TRAP_ENTRY(exc_int3, false ), diff --git a/arch/x86/xen/xen-asm_64.S b/arch/x86/xen/xen-asm_64.S index 44f55569a37f..617ef3f98160 100644 --- a/arch/x86/xen/xen-asm_64.S +++ b/arch/x86/xen/xen-asm_64.S @@ -48,7 +48,7 @@ xen_pv_trap asm_exc_spurious_interrupt_bug xen_pv_trap asm_exc_coprocessor_error xen_pv_trap asm_exc_alignment_check #ifdef CONFIG_X86_MCE -xen_pv_trap machine_check +xen_pv_trap asm_exc_machine_check #endif /* CONFIG_X86_MCE */ xen_pv_trap asm_exc_simd_coprocessor_error #ifdef CONFIG_IA32_EMULATION -- cgit v1.2.3 From aedbdeab00dcfcc6d751f9fb1b4896b01911d494 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Sat, 4 Apr 2020 15:39:13 +0200 Subject: x86/mce: Use untraced rd/wrmsr in the MCE offline/crash check mce_check_crashing_cpu() is called right at the entry of the MCE handler. It uses mce_rdmsr() and mce_wrmsr() which are wrappers around rdmsr() and wrmsr() to handle the MCE error injection mechanism, which is pointless in this context, i.e. when the MCE hits an offline CPU or the system is already marked crashing. The MSR access can also be traced, so use the untraceable variants. This is also safe vs. XEN paravirt as these MSRs are not affected by XEN PV modifications. Signed-off-by: Thomas Gleixner Reviewed-by: Alexandre Chartre Acked-by: Peter Zijlstra Acked-by: Andy Lutomirski Link: https://lkml.kernel.org/r/20200505135314.426347351@linutronix.de --- arch/x86/kernel/cpu/mce/core.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'arch') diff --git a/arch/x86/kernel/cpu/mce/core.c b/arch/x86/kernel/cpu/mce/core.c index 842dd03c3918..317765245190 100644 --- a/arch/x86/kernel/cpu/mce/core.c +++ b/arch/x86/kernel/cpu/mce/core.c @@ -1108,7 +1108,7 @@ static noinstr bool mce_check_crashing_cpu(void) (crashing_cpu != -1 && crashing_cpu != cpu)) { u64 mcgstatus; - mcgstatus = mce_rdmsrl(MSR_IA32_MCG_STATUS); + mcgstatus = __rdmsr(MSR_IA32_MCG_STATUS); if (boot_cpu_data.x86_vendor == X86_VENDOR_ZHAOXIN) { if (mcgstatus & MCG_STATUS_LMCES) @@ -1116,7 +1116,7 @@ static noinstr bool mce_check_crashing_cpu(void) } if (mcgstatus & MCG_STATUS_RIPV) { - mce_wrmsrl(MSR_IA32_MCG_STATUS, 0); + __wrmsr(MSR_IA32_MCG_STATUS, 0, 0); return true; } } -- cgit v1.2.3 From 9cce81cff748ef0e79b41c6e37d7137267f1212f Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Tue, 25 Feb 2020 23:33:24 +0100 Subject: x86/idtentry: Provide IDTENTRY_XEN for XEN/PV XEN/PV has special wrappers for NMI and DB exceptions. They redirect these exceptions through regular IDTENTRY points. Provide the necessary IDTENTRY macros to make this work Signed-off-by: Thomas Gleixner Reviewed-by: Alexandre Chartre Acked-by: Peter Zijlstra Acked-by: Andy Lutomirski Link: https://lkml.kernel.org/r/20200505135314.518622698@linutronix.de --- arch/x86/include/asm/idtentry.h | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) (limited to 'arch') diff --git a/arch/x86/include/asm/idtentry.h b/arch/x86/include/asm/idtentry.h index 36fe964c0d53..2315eecf04fd 100644 --- a/arch/x86/include/asm/idtentry.h +++ b/arch/x86/include/asm/idtentry.h @@ -168,6 +168,18 @@ __visible noinstr void func(struct pt_regs *regs) #define DECLARE_IDTENTRY_DEBUG DECLARE_IDTENTRY_IST #define DEFINE_IDTENTRY_DEBUG DEFINE_IDTENTRY_IST +/** + * DECLARE_IDTENTRY_XEN - Declare functions for XEN redirect IDT entry points + * @vector: Vector number (ignored for C) + * @func: Function name of the entry point + * + * Used for xennmi and xendebug redirections. No DEFINE as this is all ASM + * indirection magic. + */ +#define DECLARE_IDTENTRY_XEN(vector, func) \ + asmlinkage void xen_asm_exc_xen##func(void); \ + asmlinkage void asm_exc_xen##func(void) + #else /* !__ASSEMBLY__ */ /* @@ -203,6 +215,10 @@ __visible noinstr void func(struct pt_regs *regs) /* No ASM code emitted for NMI */ #define DECLARE_IDTENTRY_NMI(vector, func) +/* XEN NMI and DB wrapper */ +#define DECLARE_IDTENTRY_XEN(vector, func) \ + idtentry vector asm_exc_xen##func exc_##func has_error_code=0 sane=1 + #endif /* __ASSEMBLY__ */ /* -- cgit v1.2.3 From 6271fef00b3489690e52ce95edbc378357513547 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Tue, 25 Feb 2020 23:33:25 +0100 Subject: x86/entry: Convert NMI to IDTENTRY_NMI Convert #NMI to IDTENTRY_NMI: - Implement the C entry point with DEFINE_IDTENTRY_NMI - Fixup the XEN/PV code - Remove the old prototypes No functional change. Signed-off-by: Thomas Gleixner Reviewed-by: Alexandre Chartre Acked-by: Peter Zijlstra Acked-by: Andy Lutomirski Link: https://lkml.kernel.org/r/20200505135314.609932306@linutronix.de --- arch/x86/entry/entry_32.S | 8 ++++---- arch/x86/entry/entry_64.S | 15 +++++++-------- arch/x86/include/asm/idtentry.h | 4 ++++ arch/x86/include/asm/traps.h | 3 --- arch/x86/kernel/idt.c | 4 ++-- arch/x86/kernel/nmi.c | 4 +--- arch/x86/xen/enlighten_pv.c | 7 ++++++- arch/x86/xen/xen-asm_64.S | 2 +- 8 files changed, 25 insertions(+), 22 deletions(-) (limited to 'arch') diff --git a/arch/x86/entry/entry_32.S b/arch/x86/entry/entry_32.S index 4dd3d706d9fc..d4961cac73f6 100644 --- a/arch/x86/entry/entry_32.S +++ b/arch/x86/entry/entry_32.S @@ -1545,7 +1545,7 @@ SYM_CODE_END(double_fault) * switched stacks. We handle both conditions by simply checking whether we * interrupted kernel code running on the SYSENTER stack. */ -SYM_CODE_START(nmi) +SYM_CODE_START(asm_exc_nmi) ASM_CLAC #ifdef CONFIG_X86_ESPFIX32 @@ -1574,7 +1574,7 @@ SYM_CODE_START(nmi) jb .Lnmi_from_sysenter_stack /* Not on SYSENTER stack. */ - call do_nmi + call exc_nmi jmp .Lnmi_return .Lnmi_from_sysenter_stack: @@ -1584,7 +1584,7 @@ SYM_CODE_START(nmi) */ movl %esp, %ebx movl PER_CPU_VAR(cpu_current_top_of_stack), %esp - call do_nmi + call exc_nmi movl %ebx, %esp .Lnmi_return: @@ -1638,7 +1638,7 @@ SYM_CODE_START(nmi) lss (1+5+6)*4(%esp), %esp # back to espfix stack jmp .Lirq_return #endif -SYM_CODE_END(nmi) +SYM_CODE_END(asm_exc_nmi) .pushsection .text, "ax" SYM_CODE_START(rewind_stack_do_exit) diff --git a/arch/x86/entry/entry_64.S b/arch/x86/entry/entry_64.S index 5007b975ca7c..3d7f2cc29be3 100644 --- a/arch/x86/entry/entry_64.S +++ b/arch/x86/entry/entry_64.S @@ -1079,7 +1079,6 @@ idtentry_df X86_TRAP_DF double_fault do_double_fault #ifdef CONFIG_XEN_PV idtentry 512 /* dummy */ hypervisor_callback xen_do_hypervisor_callback has_error_code=0 -idtentry X86_TRAP_NMI xennmi do_nmi has_error_code=0 idtentry X86_TRAP_DB xendebug do_debug has_error_code=0 #endif @@ -1414,7 +1413,7 @@ SYM_CODE_END(error_return) * %r14: Used to save/restore the CR3 of the interrupted context * when PAGE_TABLE_ISOLATION is in use. Do not clobber. */ -SYM_CODE_START(nmi) +SYM_CODE_START(asm_exc_nmi) UNWIND_HINT_IRET_REGS /* @@ -1499,7 +1498,7 @@ SYM_CODE_START(nmi) movq %rsp, %rdi movq $-1, %rsi - call do_nmi + call exc_nmi /* * Return back to user mode. We must *not* do the normal exit @@ -1556,7 +1555,7 @@ SYM_CODE_START(nmi) * end_repeat_nmi, then we are a nested NMI. We must not * modify the "iret" frame because it's being written by * the outer NMI. That's okay; the outer NMI handler is - * about to about to call do_nmi anyway, so we can just + * about to about to call exc_nmi() anyway, so we can just * resume the outer NMI. */ @@ -1675,7 +1674,7 @@ repeat_nmi: * RSP is pointing to "outermost RIP". gsbase is unknown, but, if * we're repeating an NMI, gsbase has the same value that it had on * the first iteration. paranoid_entry will load the kernel - * gsbase if needed before we call do_nmi. "NMI executing" + * gsbase if needed before we call exc_nmi(). "NMI executing" * is zero. */ movq $1, 10*8(%rsp) /* Set "NMI executing". */ @@ -1709,10 +1708,10 @@ end_repeat_nmi: call paranoid_entry UNWIND_HINT_REGS - /* paranoidentry do_nmi, 0; without TRACE_IRQS_OFF */ + /* paranoidentry exc_nmi(), 0; without TRACE_IRQS_OFF */ movq %rsp, %rdi movq $-1, %rsi - call do_nmi + call exc_nmi /* Always restore stashed CR3 value (see paranoid_entry) */ RESTORE_CR3 scratch_reg=%r15 save_reg=%r14 @@ -1749,7 +1748,7 @@ nmi_restore: * about espfix64 on the way back to kernel mode. */ iretq -SYM_CODE_END(nmi) +SYM_CODE_END(asm_exc_nmi) #ifndef CONFIG_IA32_EMULATION /* diff --git a/arch/x86/include/asm/idtentry.h b/arch/x86/include/asm/idtentry.h index 2315eecf04fd..1f067e6c4051 100644 --- a/arch/x86/include/asm/idtentry.h +++ b/arch/x86/include/asm/idtentry.h @@ -258,4 +258,8 @@ DECLARE_IDTENTRY_RAW(X86_TRAP_BP, exc_int3); DECLARE_IDTENTRY_MCE(X86_TRAP_MC, exc_machine_check); #endif +/* NMI */ +DECLARE_IDTENTRY_NMI(X86_TRAP_NMI, exc_nmi); +DECLARE_IDTENTRY_XEN(X86_TRAP_NMI, nmi); + #endif diff --git a/arch/x86/include/asm/traps.h b/arch/x86/include/asm/traps.h index 6096db912625..57b83ae19c15 100644 --- a/arch/x86/include/asm/traps.h +++ b/arch/x86/include/asm/traps.h @@ -12,7 +12,6 @@ #define dotraplinkage __visible asmlinkage void debug(void); -asmlinkage void nmi(void); #ifdef CONFIG_X86_64 asmlinkage void double_fault(void); #endif @@ -20,14 +19,12 @@ asmlinkage void page_fault(void); asmlinkage void async_page_fault(void); #if defined(CONFIG_X86_64) && defined(CONFIG_XEN_PV) -asmlinkage void xen_xennmi(void); asmlinkage void xen_xendebug(void); asmlinkage void xen_double_fault(void); asmlinkage void xen_page_fault(void); #endif dotraplinkage void do_debug(struct pt_regs *regs, long error_code); -dotraplinkage void do_nmi(struct pt_regs *regs, long error_code); dotraplinkage void do_double_fault(struct pt_regs *regs, long error_code, unsigned long cr2); dotraplinkage void do_page_fault(struct pt_regs *regs, unsigned long error_code, unsigned long address); diff --git a/arch/x86/kernel/idt.c b/arch/x86/kernel/idt.c index 6b93840784d5..d3fecd88677c 100644 --- a/arch/x86/kernel/idt.c +++ b/arch/x86/kernel/idt.c @@ -74,7 +74,7 @@ static const __initconst struct idt_data early_idts[] = { */ static const __initconst struct idt_data def_idts[] = { INTG(X86_TRAP_DE, asm_exc_divide_error), - INTG(X86_TRAP_NMI, nmi), + INTG(X86_TRAP_NMI, asm_exc_nmi), INTG(X86_TRAP_BR, asm_exc_bounds), INTG(X86_TRAP_UD, asm_exc_invalid_op), INTG(X86_TRAP_NM, asm_exc_device_not_available), @@ -186,7 +186,7 @@ gate_desc debug_idt_table[IDT_ENTRIES] __page_aligned_bss; */ static const __initconst struct idt_data ist_idts[] = { ISTG(X86_TRAP_DB, debug, IST_INDEX_DB), - ISTG(X86_TRAP_NMI, nmi, IST_INDEX_NMI), + ISTG(X86_TRAP_NMI, asm_exc_nmi, IST_INDEX_NMI), ISTG(X86_TRAP_DF, double_fault, IST_INDEX_DF), #ifdef CONFIG_X86_MCE ISTG(X86_TRAP_MC, asm_exc_machine_check, IST_INDEX_MCE), diff --git a/arch/x86/kernel/nmi.c b/arch/x86/kernel/nmi.c index bdcc5146de96..3b05cc802abb 100644 --- a/arch/x86/kernel/nmi.c +++ b/arch/x86/kernel/nmi.c @@ -503,8 +503,7 @@ static bool notrace is_debug_stack(unsigned long addr) NOKPROBE_SYMBOL(is_debug_stack); #endif -dotraplinkage notrace void -do_nmi(struct pt_regs *regs, long error_code) +DEFINE_IDTENTRY_NMI(exc_nmi) { if (IS_ENABLED(CONFIG_SMP) && cpu_is_offline(smp_processor_id())) return; @@ -554,7 +553,6 @@ nmi_restart: if (user_mode(regs)) mds_user_clear_cpu_buffers(); } -NOKPROBE_SYMBOL(do_nmi); void stop_nmi(void) { diff --git a/arch/x86/xen/enlighten_pv.c b/arch/x86/xen/enlighten_pv.c index 267583f9b207..0d6c2789e676 100644 --- a/arch/x86/xen/enlighten_pv.c +++ b/arch/x86/xen/enlighten_pv.c @@ -609,13 +609,18 @@ struct trap_array_entry { .xen = xen_asm_##func, \ .ist_okay = ist_ok } +#define TRAP_ENTRY_REDIR(func, xenfunc, ist_ok) { \ + .orig = asm_##func, \ + .xen = xen_asm_##xenfunc, \ + .ist_okay = ist_ok } + static struct trap_array_entry trap_array[] = { { debug, xen_xendebug, true }, { double_fault, xen_double_fault, true }, #ifdef CONFIG_X86_MCE TRAP_ENTRY(exc_machine_check, true ), #endif - { nmi, xen_xennmi, true }, + TRAP_ENTRY_REDIR(exc_nmi, exc_xennmi, true ), TRAP_ENTRY(exc_int3, false ), TRAP_ENTRY(exc_overflow, false ), #ifdef CONFIG_IA32_EMULATION diff --git a/arch/x86/xen/xen-asm_64.S b/arch/x86/xen/xen-asm_64.S index 617ef3f98160..04fa01b096ee 100644 --- a/arch/x86/xen/xen-asm_64.S +++ b/arch/x86/xen/xen-asm_64.S @@ -32,7 +32,7 @@ xen_pv_trap asm_exc_divide_error xen_pv_trap debug xen_pv_trap xendebug xen_pv_trap asm_exc_int3 -xen_pv_trap xennmi +xen_pv_trap asm_exc_xennmi xen_pv_trap asm_exc_overflow xen_pv_trap asm_exc_bounds xen_pv_trap asm_exc_invalid_op -- cgit v1.2.3 From f051f697955049c7cf10a635ab8149aa619243b2 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Mon, 6 Apr 2020 15:55:06 +0200 Subject: x86/nmi: Protect NMI entry against instrumentation Mark all functions in the fragile code parts noinstr or force inlining so they can't be instrumented. Also make the hardware latency tracer invocation explicit outside of non-instrumentable section. Signed-off-by: Thomas Gleixner Reviewed-by: Alexandre Chartre Acked-by: Peter Zijlstra Acked-by: Andy Lutomirski Link: https://lkml.kernel.org/r/20200505135314.716186134@linutronix.de --- arch/x86/include/asm/desc.h | 8 ++++---- arch/x86/kernel/cpu/common.c | 6 ++---- arch/x86/kernel/nmi.c | 15 +++++++++------ 3 files changed, 15 insertions(+), 14 deletions(-) (limited to 'arch') diff --git a/arch/x86/include/asm/desc.h b/arch/x86/include/asm/desc.h index 085a2dd312b4..d6c3d346c63a 100644 --- a/arch/x86/include/asm/desc.h +++ b/arch/x86/include/asm/desc.h @@ -214,7 +214,7 @@ static inline void native_load_gdt(const struct desc_ptr *dtr) asm volatile("lgdt %0"::"m" (*dtr)); } -static inline void native_load_idt(const struct desc_ptr *dtr) +static __always_inline void native_load_idt(const struct desc_ptr *dtr) { asm volatile("lidt %0"::"m" (*dtr)); } @@ -392,7 +392,7 @@ extern unsigned long system_vectors[]; #ifdef CONFIG_X86_64 DECLARE_PER_CPU(u32, debug_idt_ctr); -static inline bool is_debug_idt_enabled(void) +static __always_inline bool is_debug_idt_enabled(void) { if (this_cpu_read(debug_idt_ctr)) return true; @@ -400,7 +400,7 @@ static inline bool is_debug_idt_enabled(void) return false; } -static inline void load_debug_idt(void) +static __always_inline void load_debug_idt(void) { load_idt((const struct desc_ptr *)&debug_idt_descr); } @@ -422,7 +422,7 @@ static inline void load_debug_idt(void) * that doesn't need to disable interrupts, as nothing should be * bothering the CPU then. */ -static inline void load_current_idt(void) +static __always_inline void load_current_idt(void) { if (is_debug_idt_enabled()) load_debug_idt(); diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index 8be042df12c3..f4645f9ff9cb 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c @@ -1709,21 +1709,19 @@ void syscall_init(void) DEFINE_PER_CPU(int, debug_stack_usage); DEFINE_PER_CPU(u32, debug_idt_ctr); -void debug_stack_set_zero(void) +noinstr void debug_stack_set_zero(void) { this_cpu_inc(debug_idt_ctr); load_current_idt(); } -NOKPROBE_SYMBOL(debug_stack_set_zero); -void debug_stack_reset(void) +noinstr void debug_stack_reset(void) { if (WARN_ON(!this_cpu_read(debug_idt_ctr))) return; if (this_cpu_dec_return(debug_idt_ctr) == 0) load_current_idt(); } -NOKPROBE_SYMBOL(debug_stack_reset); #else /* CONFIG_X86_64 */ diff --git a/arch/x86/kernel/nmi.c b/arch/x86/kernel/nmi.c index 3b05cc802abb..3052c78f03aa 100644 --- a/arch/x86/kernel/nmi.c +++ b/arch/x86/kernel/nmi.c @@ -303,7 +303,7 @@ NOKPROBE_SYMBOL(unknown_nmi_error); static DEFINE_PER_CPU(bool, swallow_nmi); static DEFINE_PER_CPU(unsigned long, last_nmi_rip); -static void default_do_nmi(struct pt_regs *regs) +static noinstr void default_do_nmi(struct pt_regs *regs) { unsigned char reason = 0; int handled; @@ -329,6 +329,8 @@ static void default_do_nmi(struct pt_regs *regs) __this_cpu_write(last_nmi_rip, regs->ip); + instrumentation_begin(); + handled = nmi_handle(NMI_LOCAL, regs); __this_cpu_add(nmi_stats.normal, handled); if (handled) { @@ -342,7 +344,7 @@ static void default_do_nmi(struct pt_regs *regs) */ if (handled > 1) __this_cpu_write(swallow_nmi, true); - return; + goto out; } /* @@ -374,7 +376,7 @@ static void default_do_nmi(struct pt_regs *regs) #endif __this_cpu_add(nmi_stats.external, 1); raw_spin_unlock(&nmi_reason_lock); - return; + goto out; } raw_spin_unlock(&nmi_reason_lock); @@ -412,8 +414,10 @@ static void default_do_nmi(struct pt_regs *regs) __this_cpu_add(nmi_stats.swallow, 1); else unknown_nmi_error(reason, regs); + +out: + instrumentation_end(); } -NOKPROBE_SYMBOL(default_do_nmi); /* * NMIs can page fault or hit breakpoints which will cause it to lose @@ -485,7 +489,7 @@ static DEFINE_PER_CPU(unsigned long, nmi_cr2); */ static DEFINE_PER_CPU(int, update_debug_stack); -static bool notrace is_debug_stack(unsigned long addr) +static noinstr bool is_debug_stack(unsigned long addr) { struct cea_exception_stacks *cs = __this_cpu_read(cea_exception_stacks); unsigned long top = CEA_ESTACK_TOP(cs, DB); @@ -500,7 +504,6 @@ static bool notrace is_debug_stack(unsigned long addr) */ return addr >= bot && addr < top; } -NOKPROBE_SYMBOL(is_debug_stack); #endif DEFINE_IDTENTRY_NMI(exc_nmi) -- cgit v1.2.3 From 9f58fdde95c9509a4ded27a6d0035e79294002b4 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Mon, 6 Apr 2020 21:02:56 +0200 Subject: x86/db: Split out dr6/7 handling DR6/7 should be handled before nmi_enter() is invoked and restore after nmi_exit() to minimize the exposure. Split it out into helper inlines and bring it into the correct order. Signed-off-by: Peter Zijlstra Signed-off-by: Thomas Gleixner Reviewed-by: Alexandre Chartre Acked-by: Andy Lutomirski Link: https://lkml.kernel.org/r/20200505135314.808628211@linutronix.de --- arch/x86/kernel/hw_breakpoint.c | 6 +--- arch/x86/kernel/traps.c | 75 ++++++++++++++++++++++++++++++----------- 2 files changed, 57 insertions(+), 24 deletions(-) (limited to 'arch') diff --git a/arch/x86/kernel/hw_breakpoint.c b/arch/x86/kernel/hw_breakpoint.c index d42fc0eaf193..9ddf441ccaa8 100644 --- a/arch/x86/kernel/hw_breakpoint.c +++ b/arch/x86/kernel/hw_breakpoint.c @@ -464,7 +464,7 @@ static int hw_breakpoint_handler(struct die_args *args) { int i, cpu, rc = NOTIFY_STOP; struct perf_event *bp; - unsigned long dr7, dr6; + unsigned long dr6; unsigned long *dr6_p; /* The DR6 value is pointed by args->err */ @@ -479,9 +479,6 @@ static int hw_breakpoint_handler(struct die_args *args) if ((dr6 & DR_TRAP_BITS) == 0) return NOTIFY_DONE; - get_debugreg(dr7, 7); - /* Disable breakpoints during exception handling */ - set_debugreg(0UL, 7); /* * Assert that local interrupts are disabled * Reset the DRn bits in the virtualized register value. @@ -538,7 +535,6 @@ static int hw_breakpoint_handler(struct die_args *args) (dr6 & (~DR_TRAP_BITS))) rc = NOTIFY_DONE; - set_debugreg(dr7, 7); put_cpu(); return rc; diff --git a/arch/x86/kernel/traps.c b/arch/x86/kernel/traps.c index 21c8cfce24d3..de5120e2fbe1 100644 --- a/arch/x86/kernel/traps.c +++ b/arch/x86/kernel/traps.c @@ -700,6 +700,57 @@ static bool is_sysenter_singlestep(struct pt_regs *regs) #endif } +static __always_inline void debug_enter(unsigned long *dr6, unsigned long *dr7) +{ + /* + * Disable breakpoints during exception handling; recursive exceptions + * are exceedingly 'fun'. + * + * Since this function is NOKPROBE, and that also applies to + * HW_BREAKPOINT_X, we can't hit a breakpoint before this (XXX except a + * HW_BREAKPOINT_W on our stack) + * + * Entry text is excluded for HW_BP_X and cpu_entry_area, which + * includes the entry stack is excluded for everything. + */ + get_debugreg(*dr7, 7); + set_debugreg(0, 7); + + /* + * Ensure the compiler doesn't lower the above statements into + * the critical section; disabling breakpoints late would not + * be good. + */ + barrier(); + + /* + * The Intel SDM says: + * + * Certain debug exceptions may clear bits 0-3. The remaining + * contents of the DR6 register are never cleared by the + * processor. To avoid confusion in identifying debug + * exceptions, debug handlers should clear the register before + * returning to the interrupted task. + * + * Keep it simple: clear DR6 immediately. + */ + get_debugreg(*dr6, 6); + set_debugreg(0, 6); + /* Filter out all the reserved bits which are preset to 1 */ + *dr6 &= ~DR6_RESERVED; +} + +static __always_inline void debug_exit(unsigned long dr7) +{ + /* + * Ensure the compiler doesn't raise this statement into + * the critical section; enabling breakpoints early would + * not be good. + */ + barrier(); + set_debugreg(dr7, 7); +} + /* * Our handling of the processor debug registers is non-trivial. * We do not clear them on entry and exit from the kernel. Therefore @@ -727,28 +778,13 @@ static bool is_sysenter_singlestep(struct pt_regs *regs) dotraplinkage void do_debug(struct pt_regs *regs, long error_code) { struct task_struct *tsk = current; + unsigned long dr6, dr7; int user_icebp = 0; - unsigned long dr6; int si_code; - nmi_enter(); - - get_debugreg(dr6, 6); - /* - * The Intel SDM says: - * - * Certain debug exceptions may clear bits 0-3. The remaining - * contents of the DR6 register are never cleared by the - * processor. To avoid confusion in identifying debug - * exceptions, debug handlers should clear the register before - * returning to the interrupted task. - * - * Keep it simple: clear DR6 immediately. - */ - set_debugreg(0, 6); + debug_enter(&dr6, &dr7); - /* Filter out all the reserved bits which are preset to 1 */ - dr6 &= ~DR6_RESERVED; + nmi_enter(); /* * The SDM says "The processor clears the BTF flag when it @@ -786,7 +822,7 @@ dotraplinkage void do_debug(struct pt_regs *regs, long error_code) #endif if (notify_die(DIE_DEBUG, "debug", regs, (long)&dr6, error_code, - SIGTRAP) == NOTIFY_STOP) + SIGTRAP) == NOTIFY_STOP) goto exit; /* @@ -825,6 +861,7 @@ dotraplinkage void do_debug(struct pt_regs *regs, long error_code) exit: nmi_exit(); + debug_exit(dr7); } NOKPROBE_SYMBOL(do_debug); -- cgit v1.2.3 From 2bbc68f8373c0631ebf137f376fbea00e8086be7 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Tue, 25 Feb 2020 23:33:26 +0100 Subject: x86/entry: Convert Debug exception to IDTENTRY_DB Convert #DB to IDTENTRY_ERRORCODE: - Implement the C entry point with DEFINE_IDTENTRY_DB - Emit the ASM stub with DECLARE_IDTENTRY - Remove the ASM idtentry in 64bit - Remove the open coded ASM entry code in 32bit - Fixup the XEN/PV code - Remove the old prototypes No functional change. Signed-off-by: Thomas Gleixner Reviewed-by: Alexandre Chartre Acked-by: Peter Zijlstra Acked-by: Andy Lutomirski Link: https://lkml.kernel.org/r/20200505135314.900297476@linutronix.de --- arch/x86/entry/entry_32.S | 10 ---------- arch/x86/entry/entry_64.S | 2 -- arch/x86/include/asm/idtentry.h | 4 ++++ arch/x86/include/asm/traps.h | 3 --- arch/x86/kernel/idt.c | 8 ++++---- arch/x86/kernel/traps.c | 21 +++++++++++++-------- arch/x86/xen/enlighten_pv.c | 2 +- arch/x86/xen/xen-asm_64.S | 4 ++-- 8 files changed, 24 insertions(+), 30 deletions(-) (limited to 'arch') diff --git a/arch/x86/entry/entry_32.S b/arch/x86/entry/entry_32.S index d4961cac73f6..30c6ed3d7c52 100644 --- a/arch/x86/entry/entry_32.S +++ b/arch/x86/entry/entry_32.S @@ -1488,16 +1488,6 @@ ret_to_user: jmp restore_all_switch_stack SYM_CODE_END(handle_exception) -SYM_CODE_START(debug) - /* - * Entry from sysenter is now handled in common_exception - */ - ASM_CLAC - pushl $0 - pushl $do_debug - jmp common_exception -SYM_CODE_END(debug) - SYM_CODE_START(double_fault) 1: /* diff --git a/arch/x86/entry/entry_64.S b/arch/x86/entry/entry_64.S index 3d7f2cc29be3..f47629a7f8e6 100644 --- a/arch/x86/entry/entry_64.S +++ b/arch/x86/entry/entry_64.S @@ -1074,12 +1074,10 @@ apicinterrupt IRQ_WORK_VECTOR irq_work_interrupt smp_irq_work_interrupt idtentry X86_TRAP_PF page_fault do_page_fault has_error_code=1 -idtentry_mce_db X86_TRAP_DB debug do_debug idtentry_df X86_TRAP_DF double_fault do_double_fault #ifdef CONFIG_XEN_PV idtentry 512 /* dummy */ hypervisor_callback xen_do_hypervisor_callback has_error_code=0 -idtentry X86_TRAP_DB xendebug do_debug has_error_code=0 #endif /* diff --git a/arch/x86/include/asm/idtentry.h b/arch/x86/include/asm/idtentry.h index 1f067e6c4051..fcd4230a979b 100644 --- a/arch/x86/include/asm/idtentry.h +++ b/arch/x86/include/asm/idtentry.h @@ -262,4 +262,8 @@ DECLARE_IDTENTRY_MCE(X86_TRAP_MC, exc_machine_check); DECLARE_IDTENTRY_NMI(X86_TRAP_NMI, exc_nmi); DECLARE_IDTENTRY_XEN(X86_TRAP_NMI, nmi); +/* #DB */ +DECLARE_IDTENTRY_DEBUG(X86_TRAP_DB, exc_debug); +DECLARE_IDTENTRY_XEN(X86_TRAP_DB, debug); + #endif diff --git a/arch/x86/include/asm/traps.h b/arch/x86/include/asm/traps.h index 57b83ae19c15..9bd602d0130d 100644 --- a/arch/x86/include/asm/traps.h +++ b/arch/x86/include/asm/traps.h @@ -11,7 +11,6 @@ #define dotraplinkage __visible -asmlinkage void debug(void); #ifdef CONFIG_X86_64 asmlinkage void double_fault(void); #endif @@ -19,12 +18,10 @@ asmlinkage void page_fault(void); asmlinkage void async_page_fault(void); #if defined(CONFIG_X86_64) && defined(CONFIG_XEN_PV) -asmlinkage void xen_xendebug(void); asmlinkage void xen_double_fault(void); asmlinkage void xen_page_fault(void); #endif -dotraplinkage void do_debug(struct pt_regs *regs, long error_code); dotraplinkage void do_double_fault(struct pt_regs *regs, long error_code, unsigned long cr2); dotraplinkage void do_page_fault(struct pt_regs *regs, unsigned long error_code, unsigned long address); diff --git a/arch/x86/kernel/idt.c b/arch/x86/kernel/idt.c index d3fecd88677c..ddf3f3db3235 100644 --- a/arch/x86/kernel/idt.c +++ b/arch/x86/kernel/idt.c @@ -59,7 +59,7 @@ static bool idt_setup_done __initdata; * stacks work only after cpu_init(). */ static const __initconst struct idt_data early_idts[] = { - INTG(X86_TRAP_DB, debug), + INTG(X86_TRAP_DB, asm_exc_debug), SYSG(X86_TRAP_BP, asm_exc_int3), #ifdef CONFIG_X86_32 INTG(X86_TRAP_PF, page_fault), @@ -93,7 +93,7 @@ static const __initconst struct idt_data def_idts[] = { #else INTG(X86_TRAP_DF, double_fault), #endif - INTG(X86_TRAP_DB, debug), + INTG(X86_TRAP_DB, asm_exc_debug), #ifdef CONFIG_X86_MCE INTG(X86_TRAP_MC, asm_exc_machine_check), @@ -164,7 +164,7 @@ static const __initconst struct idt_data early_pf_idts[] = { * stack set to DEFAULT_STACK (0). Required for NMI trap handling. */ static const __initconst struct idt_data dbg_idts[] = { - INTG(X86_TRAP_DB, debug), + INTG(X86_TRAP_DB, asm_exc_debug), }; #endif @@ -185,7 +185,7 @@ gate_desc debug_idt_table[IDT_ENTRIES] __page_aligned_bss; * cpu_init() when the TSS has been initialized. */ static const __initconst struct idt_data ist_idts[] = { - ISTG(X86_TRAP_DB, debug, IST_INDEX_DB), + ISTG(X86_TRAP_DB, asm_exc_debug, IST_INDEX_DB), ISTG(X86_TRAP_NMI, asm_exc_nmi, IST_INDEX_NMI), ISTG(X86_TRAP_DF, double_fault, IST_INDEX_DF), #ifdef CONFIG_X86_MCE diff --git a/arch/x86/kernel/traps.c b/arch/x86/kernel/traps.c index de5120e2fbe1..569408a681b6 100644 --- a/arch/x86/kernel/traps.c +++ b/arch/x86/kernel/traps.c @@ -775,7 +775,7 @@ static __always_inline void debug_exit(unsigned long dr7) * * May run on IST stack. */ -dotraplinkage void do_debug(struct pt_regs *regs, long error_code) +DEFINE_IDTENTRY_DEBUG(exc_debug) { struct task_struct *tsk = current; unsigned long dr6, dr7; @@ -784,7 +784,10 @@ dotraplinkage void do_debug(struct pt_regs *regs, long error_code) debug_enter(&dr6, &dr7); - nmi_enter(); + if (user_mode(regs)) + idtentry_enter(regs); + else + nmi_enter(); /* * The SDM says "The processor clears the BTF flag when it @@ -821,7 +824,7 @@ dotraplinkage void do_debug(struct pt_regs *regs, long error_code) goto exit; #endif - if (notify_die(DIE_DEBUG, "debug", regs, (long)&dr6, error_code, + if (notify_die(DIE_DEBUG, "debug", regs, (long)&dr6, 0, SIGTRAP) == NOTIFY_STOP) goto exit; @@ -835,8 +838,8 @@ dotraplinkage void do_debug(struct pt_regs *regs, long error_code) cond_local_irq_enable(regs); if (v8086_mode(regs)) { - handle_vm86_trap((struct kernel_vm86_regs *) regs, error_code, - X86_TRAP_DB); + handle_vm86_trap((struct kernel_vm86_regs *) regs, 0, + X86_TRAP_DB); cond_local_irq_disable(regs); debug_stack_usage_dec(); goto exit; @@ -855,15 +858,17 @@ dotraplinkage void do_debug(struct pt_regs *regs, long error_code) } si_code = get_si_code(tsk->thread.debugreg6); if (tsk->thread.debugreg6 & (DR_STEP | DR_TRAP_BITS) || user_icebp) - send_sigtrap(regs, error_code, si_code); + send_sigtrap(regs, 0, si_code); cond_local_irq_disable(regs); debug_stack_usage_dec(); exit: - nmi_exit(); + if (user_mode(regs)) + idtentry_exit(regs); + else + nmi_exit(); debug_exit(dr7); } -NOKPROBE_SYMBOL(do_debug); /* * Note that we play around with the 'TS' bit in an attempt to get diff --git a/arch/x86/xen/enlighten_pv.c b/arch/x86/xen/enlighten_pv.c index 0d6c2789e676..376851d1039a 100644 --- a/arch/x86/xen/enlighten_pv.c +++ b/arch/x86/xen/enlighten_pv.c @@ -615,7 +615,7 @@ struct trap_array_entry { .ist_okay = ist_ok } static struct trap_array_entry trap_array[] = { - { debug, xen_xendebug, true }, + TRAP_ENTRY_REDIR(exc_debug, exc_xendebug, true ), { double_fault, xen_double_fault, true }, #ifdef CONFIG_X86_MCE TRAP_ENTRY(exc_machine_check, true ), diff --git a/arch/x86/xen/xen-asm_64.S b/arch/x86/xen/xen-asm_64.S index 04fa01b096ee..9999ea377476 100644 --- a/arch/x86/xen/xen-asm_64.S +++ b/arch/x86/xen/xen-asm_64.S @@ -29,8 +29,8 @@ _ASM_NOKPROBE(xen_\name) .endm xen_pv_trap asm_exc_divide_error -xen_pv_trap debug -xen_pv_trap xendebug +xen_pv_trap asm_exc_debug +xen_pv_trap asm_exc_xendebug xen_pv_trap asm_exc_int3 xen_pv_trap asm_exc_xennmi xen_pv_trap asm_exc_overflow -- cgit v1.2.3 From df7ccaffd2035ebd4bfbb2d980e5817c31f4a891 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Tue, 25 Feb 2020 23:33:27 +0100 Subject: x86/entry/64: Remove error code clearing from #DB and #MCE ASM stub The C entry points do not expect an error code. Signed-off-by: Thomas Gleixner Reviewed-by: Alexandre Chartre Acked-by: Peter Zijlstra Acked-by: Andy Lutomirski Link: https://lkml.kernel.org/r/20200505135314.992621707@linutronix.de --- arch/x86/entry/entry_64.S | 1 - 1 file changed, 1 deletion(-) (limited to 'arch') diff --git a/arch/x86/entry/entry_64.S b/arch/x86/entry/entry_64.S index f47629a7f8e6..eeb428582d3d 100644 --- a/arch/x86/entry/entry_64.S +++ b/arch/x86/entry/entry_64.S @@ -642,7 +642,6 @@ SYM_CODE_START(\asmsym) .endif movq %rsp, %rdi /* pt_regs pointer */ - xorl %esi, %esi /* Clear the error code */ .if \vector == X86_TRAP_DB subq $DB_STACK_OFFSET, CPU_TSS_IST(IST_INDEX_DB) -- cgit v1.2.3 From f08e32ec3cfcf9e6d3640007de590c225ab2e201 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Tue, 25 Feb 2020 23:33:28 +0100 Subject: x86/idtentry: Provide IDTRENTRY_NOIST variants for #DB and #MC Provide NOIST entry point macros which allows to implement NOIST variants of the C entry points. These are invoked when #DB or #MC enter from user space. This allows explicit handling of the difference between user mode and kernel mode entry later. Signed-off-by: Thomas Gleixner Reviewed-by: Alexandre Chartre Acked-by: Peter Zijlstra Acked-by: Andy Lutomirski Link: https://lkml.kernel.org/r/20200505135315.084882104@linutronix.de --- arch/x86/include/asm/idtentry.h | 19 +++++++++++++++++-- 1 file changed, 17 insertions(+), 2 deletions(-) (limited to 'arch') diff --git a/arch/x86/include/asm/idtentry.h b/arch/x86/include/asm/idtentry.h index fcd4230a979b..060f9e358b1c 100644 --- a/arch/x86/include/asm/idtentry.h +++ b/arch/x86/include/asm/idtentry.h @@ -138,10 +138,12 @@ __visible noinstr void func(struct pt_regs *regs) * @vector: Vector number (ignored for C) * @func: Function name of the entry point * - * Maps to DECLARE_IDTENTRY_RAW + * Maps to DECLARE_IDTENTRY_RAW, but declares also the NOIST C handler + * which is called from the ASM entry point on user mode entry */ #define DECLARE_IDTENTRY_IST(vector, func) \ - DECLARE_IDTENTRY_RAW(vector, func) + DECLARE_IDTENTRY_RAW(vector, func); \ + __visible void noist_##func(struct pt_regs *regs) /** * DEFINE_IDTENTRY_IST - Emit code for IST entry points @@ -152,6 +154,17 @@ __visible noinstr void func(struct pt_regs *regs) #define DEFINE_IDTENTRY_IST(func) \ DEFINE_IDTENTRY_RAW(func) +/** + * DEFINE_IDTENTRY_NOIST - Emit code for NOIST entry points which + * belong to a IST entry point (MCE, DB) + * @func: Function name of the entry point. Must be the same as + * the function name of the corresponding IST variant + * + * Maps to DEFINE_IDTENTRY_RAW(). + */ +#define DEFINE_IDTENTRY_NOIST(func) \ + DEFINE_IDTENTRY_RAW(noist_##func) + #else /* CONFIG_X86_64 */ /* Maps to a regular IDTENTRY on 32bit for now */ # define DECLARE_IDTENTRY_IST DECLARE_IDTENTRY @@ -161,12 +174,14 @@ __visible noinstr void func(struct pt_regs *regs) /* C-Code mapping */ #define DECLARE_IDTENTRY_MCE DECLARE_IDTENTRY_IST #define DEFINE_IDTENTRY_MCE DEFINE_IDTENTRY_IST +#define DEFINE_IDTENTRY_MCE_USER DEFINE_IDTENTRY_NOIST #define DECLARE_IDTENTRY_NMI DECLARE_IDTENTRY_IST #define DEFINE_IDTENTRY_NMI DEFINE_IDTENTRY_IST #define DECLARE_IDTENTRY_DEBUG DECLARE_IDTENTRY_IST #define DEFINE_IDTENTRY_DEBUG DEFINE_IDTENTRY_IST +#define DEFINE_IDTENTRY_DEBUG_USER DEFINE_IDTENTRY_NOIST /** * DECLARE_IDTENTRY_XEN - Declare functions for XEN redirect IDT entry points -- cgit v1.2.3 From 4c0dcd8350a03cb65f645a039f2772be880ee74a Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Tue, 25 Feb 2020 23:33:29 +0100 Subject: x86/entry: Implement user mode C entry points for #DB and #MCE The MCE entry point uses the same mechanism as the IST entry point for now. For #DB split the inner workings and just keep the nmi_enter/exit() magic in the IST variant. Fixup the ASM code to emit the proper noist_##cfunc call. Signed-off-by: Thomas Gleixner Reviewed-by: Alexandre Chartre Acked-by: Peter Zijlstra Acked-by: Andy Lutomirski Link: https://lkml.kernel.org/r/20200505135315.177564104@linutronix.de --- arch/x86/entry/entry_64.S | 2 +- arch/x86/kernel/cpu/mce/core.c | 40 +++++++++++++++++++----- arch/x86/kernel/traps.c | 70 ++++++++++++++++++++++++++++++++---------- 3 files changed, 88 insertions(+), 24 deletions(-) (limited to 'arch') diff --git a/arch/x86/entry/entry_64.S b/arch/x86/entry/entry_64.S index eeb428582d3d..d302839b9b3c 100644 --- a/arch/x86/entry/entry_64.S +++ b/arch/x86/entry/entry_64.S @@ -657,7 +657,7 @@ SYM_CODE_START(\asmsym) /* Switch to the regular task stack and use the noist entry point */ .Lfrom_usermode_switch_stack_\@: - idtentry_body vector \cfunc, has_error_code=0 + idtentry_body vector noist_\cfunc, has_error_code=0 sane=1 _ASM_NOKPROBE(\asmsym) SYM_CODE_END(\asmsym) diff --git a/arch/x86/kernel/cpu/mce/core.c b/arch/x86/kernel/cpu/mce/core.c index 317765245190..a72c0135a5ec 100644 --- a/arch/x86/kernel/cpu/mce/core.c +++ b/arch/x86/kernel/cpu/mce/core.c @@ -1904,24 +1904,50 @@ static void unexpected_machine_check(struct pt_regs *regs) /* Call the installed machine check handler for this CPU setup. */ void (*machine_check_vector)(struct pt_regs *) = unexpected_machine_check; -DEFINE_IDTENTRY_MCE(exc_machine_check) +static __always_inline void exc_machine_check_kernel(struct pt_regs *regs) { + /* + * Only required when from kernel mode. See + * mce_check_crashing_cpu() for details. + */ if (machine_check_vector == do_machine_check && mce_check_crashing_cpu()) return; - if (user_mode(regs)) - idtentry_enter(regs); - else - nmi_enter(); + nmi_enter(); + machine_check_vector(regs); + nmi_exit(); +} +static __always_inline void exc_machine_check_user(struct pt_regs *regs) +{ + idtentry_enter(regs); machine_check_vector(regs); + idtentry_exit(regs); +} +#ifdef CONFIG_X86_64 +/* MCE hit kernel mode */ +DEFINE_IDTENTRY_MCE(exc_machine_check) +{ + exc_machine_check_kernel(regs); +} + +/* The user mode variant. */ +DEFINE_IDTENTRY_MCE_USER(exc_machine_check) +{ + exc_machine_check_user(regs); +} +#else +/* 32bit unified entry point */ +DEFINE_IDTENTRY_MCE(exc_machine_check) +{ if (user_mode(regs)) - idtentry_exit(regs); + exc_machine_check_user(regs); else - nmi_exit(); + exc_machine_check_kernel(regs); } +#endif /* * Called for each booted CPU to set up machine checks. diff --git a/arch/x86/kernel/traps.c b/arch/x86/kernel/traps.c index 569408a681b6..4f248c5d5cab 100644 --- a/arch/x86/kernel/traps.c +++ b/arch/x86/kernel/traps.c @@ -775,20 +775,12 @@ static __always_inline void debug_exit(unsigned long dr7) * * May run on IST stack. */ -DEFINE_IDTENTRY_DEBUG(exc_debug) +static noinstr void handle_debug(struct pt_regs *regs, unsigned long dr6) { struct task_struct *tsk = current; - unsigned long dr6, dr7; int user_icebp = 0; int si_code; - debug_enter(&dr6, &dr7); - - if (user_mode(regs)) - idtentry_enter(regs); - else - nmi_enter(); - /* * The SDM says "The processor clears the BTF flag when it * generates a debug exception." Clear TIF_BLOCKSTEP to keep @@ -800,7 +792,7 @@ DEFINE_IDTENTRY_DEBUG(exc_debug) is_sysenter_singlestep(regs))) { dr6 &= ~DR_STEP; if (!dr6) - goto exit; + return; /* * else we might have gotten a single-step trap and hit a * watchpoint at the same time, in which case we should fall @@ -821,12 +813,12 @@ DEFINE_IDTENTRY_DEBUG(exc_debug) #ifdef CONFIG_KPROBES if (kprobe_debug_handler(regs)) - goto exit; + return; #endif if (notify_die(DIE_DEBUG, "debug", regs, (long)&dr6, 0, SIGTRAP) == NOTIFY_STOP) - goto exit; + return; /* * Let others (NMI) know that the debug stack is in use @@ -842,7 +834,7 @@ DEFINE_IDTENTRY_DEBUG(exc_debug) X86_TRAP_DB); cond_local_irq_disable(regs); debug_stack_usage_dec(); - goto exit; + return; } if (WARN_ON_ONCE((dr6 & DR_STEP) && !user_mode(regs))) { @@ -861,14 +853,60 @@ DEFINE_IDTENTRY_DEBUG(exc_debug) send_sigtrap(regs, 0, si_code); cond_local_irq_disable(regs); debug_stack_usage_dec(); +} + +static __always_inline void exc_debug_kernel(struct pt_regs *regs, + unsigned long dr6) +{ + nmi_enter(); + handle_debug(regs, dr6); + nmi_exit(); +} + +static __always_inline void exc_debug_user(struct pt_regs *regs, + unsigned long dr6) +{ + idtentry_enter(regs); + handle_debug(regs, dr6); + idtentry_exit(regs); +} + +#ifdef CONFIG_X86_64 +/* IST stack entry */ +DEFINE_IDTENTRY_DEBUG(exc_debug) +{ + unsigned long dr6, dr7; + + debug_enter(&dr6, &dr7); + exc_debug_kernel(regs, dr6); + debug_exit(dr7); +} + +/* User entry, runs on regular task stack */ +DEFINE_IDTENTRY_DEBUG_USER(exc_debug) +{ + unsigned long dr6, dr7; + + debug_enter(&dr6, &dr7); + exc_debug_user(regs, dr6); + debug_exit(dr7); +} +#else +/* 32 bit does not have separate entry points. */ +DEFINE_IDTENTRY_DEBUG(exc_debug) +{ + unsigned long dr6, dr7; + + debug_enter(&dr6, &dr7); -exit: if (user_mode(regs)) - idtentry_exit(regs); + exc_debug_user(regs, dr6); else - nmi_exit(); + exc_debug_kernel(regs, dr6); + debug_exit(dr7); } +#endif /* * Note that we play around with the 'TS' bit in an attempt to get -- cgit v1.2.3 From 9347f41352181bf4a7e663f7b5f4a4bb32244d73 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Mon, 4 May 2020 19:56:26 +0200 Subject: x86/traps: Restructure #DB handling Now that there are separate entry points, move the kernel/user_mode specifc checks into the entry functions so the common handling code does not need the extra mode checks. Make the code more readable while at it. Signed-off-by: Thomas Gleixner Reviewed-by: Alexandre Chartre Acked-by: Peter Zijlstra Acked-by: Andy Lutomirski Link: https://lkml.kernel.org/r/20200505135315.283276272@linutronix.de --- arch/x86/kernel/traps.c | 69 +++++++++++++++++++++++++------------------------ 1 file changed, 35 insertions(+), 34 deletions(-) (limited to 'arch') diff --git a/arch/x86/kernel/traps.c b/arch/x86/kernel/traps.c index 4f248c5d5cab..b62e962871f2 100644 --- a/arch/x86/kernel/traps.c +++ b/arch/x86/kernel/traps.c @@ -775,39 +775,12 @@ static __always_inline void debug_exit(unsigned long dr7) * * May run on IST stack. */ -static noinstr void handle_debug(struct pt_regs *regs, unsigned long dr6) +static void noinstr handle_debug(struct pt_regs *regs, unsigned long dr6, + bool user_icebp) { struct task_struct *tsk = current; - int user_icebp = 0; int si_code; - /* - * The SDM says "The processor clears the BTF flag when it - * generates a debug exception." Clear TIF_BLOCKSTEP to keep - * TIF_BLOCKSTEP in sync with the hardware BTF flag. - */ - clear_tsk_thread_flag(tsk, TIF_BLOCKSTEP); - - if (unlikely(!user_mode(regs) && (dr6 & DR_STEP) && - is_sysenter_singlestep(regs))) { - dr6 &= ~DR_STEP; - if (!dr6) - return; - /* - * else we might have gotten a single-step trap and hit a - * watchpoint at the same time, in which case we should fall - * through and handle the watchpoint. - */ - } - - /* - * If dr6 has no reason to give us about the origin of this trap, - * then it's very likely the result of an icebp/int01 trap. - * User wants a sigtrap for that. - */ - if (!dr6 && user_mode(regs)) - user_icebp = 1; - /* Store the virtualized DR6 value */ tsk->thread.debugreg6 = dr6; @@ -832,9 +805,7 @@ static noinstr void handle_debug(struct pt_regs *regs, unsigned long dr6) if (v8086_mode(regs)) { handle_vm86_trap((struct kernel_vm86_regs *) regs, 0, X86_TRAP_DB); - cond_local_irq_disable(regs); - debug_stack_usage_dec(); - return; + goto out; } if (WARN_ON_ONCE((dr6 & DR_STEP) && !user_mode(regs))) { @@ -848,9 +819,12 @@ static noinstr void handle_debug(struct pt_regs *regs, unsigned long dr6) set_tsk_thread_flag(tsk, TIF_SINGLESTEP); regs->flags &= ~X86_EFLAGS_TF; } + si_code = get_si_code(tsk->thread.debugreg6); if (tsk->thread.debugreg6 & (DR_STEP | DR_TRAP_BITS) || user_icebp) send_sigtrap(regs, 0, si_code); + +out: cond_local_irq_disable(regs); debug_stack_usage_dec(); } @@ -859,7 +833,27 @@ static __always_inline void exc_debug_kernel(struct pt_regs *regs, unsigned long dr6) { nmi_enter(); - handle_debug(regs, dr6); + /* + * The SDM says "The processor clears the BTF flag when it + * generates a debug exception." Clear TIF_BLOCKSTEP to keep + * TIF_BLOCKSTEP in sync with the hardware BTF flag. + */ + clear_thread_flag(TIF_BLOCKSTEP); + + /* + * Catch SYSENTER with TF set and clear DR_STEP. If this hit a + * watchpoint at the same time then that will still be handled. + */ + if ((dr6 & DR_STEP) && is_sysenter_singlestep(regs)) + dr6 &= ~DR_STEP; + + /* + * If DR6 is zero, no point in trying to handle it. The kernel is + * not using INT1. + */ + if (dr6) + handle_debug(regs, dr6, false); + nmi_exit(); } @@ -867,7 +861,14 @@ static __always_inline void exc_debug_user(struct pt_regs *regs, unsigned long dr6) { idtentry_enter(regs); - handle_debug(regs, dr6); + clear_thread_flag(TIF_BLOCKSTEP); + + /* + * If dr6 has no reason to give us about the origin of this trap, + * then it's very likely the result of an icebp/int01 trap. + * User wants a sigtrap for that. + */ + handle_debug(regs, dr6, !dr6); idtentry_exit(regs); } -- cgit v1.2.3 From 75347bb2535a6d5549cc3e436467b7c40d7bb874 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Thu, 30 Apr 2020 11:07:20 +0200 Subject: x86/traps: Address objtool noinstr complaints in #DB The functions invoked from handle_debug() can be instrumented. Tell objtool about it. Signed-off-by: Thomas Gleixner Reviewed-by: Alexandre Chartre Acked-by: Peter Zijlstra Acked-by: Andy Lutomirski Link: https://lkml.kernel.org/r/20200505135315.380927730@linutronix.de --- arch/x86/kernel/traps.c | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) (limited to 'arch') diff --git a/arch/x86/kernel/traps.c b/arch/x86/kernel/traps.c index b62e962871f2..41bb0cb9df84 100644 --- a/arch/x86/kernel/traps.c +++ b/arch/x86/kernel/traps.c @@ -784,14 +784,19 @@ static void noinstr handle_debug(struct pt_regs *regs, unsigned long dr6, /* Store the virtualized DR6 value */ tsk->thread.debugreg6 = dr6; + instrumentation_begin(); #ifdef CONFIG_KPROBES - if (kprobe_debug_handler(regs)) + if (kprobe_debug_handler(regs)) { + instrumentation_end(); return; + } #endif if (notify_die(DIE_DEBUG, "debug", regs, (long)&dr6, 0, - SIGTRAP) == NOTIFY_STOP) + SIGTRAP) == NOTIFY_STOP) { + instrumentation_end(); return; + } /* * Let others (NMI) know that the debug stack is in use @@ -827,6 +832,7 @@ static void noinstr handle_debug(struct pt_regs *regs, unsigned long dr6, out: cond_local_irq_disable(regs); debug_stack_usage_dec(); + instrumentation_end(); } static __always_inline void exc_debug_kernel(struct pt_regs *regs, -- cgit v1.2.3 From 865d3a9afe7eddf320e7f61a442864d6efe27505 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Tue, 21 Apr 2020 21:22:36 +0200 Subject: x86/mce: Address objtools noinstr complaints Mark the relevant functions noinstr, use the plain non-instrumented MSR accessors. The only odd part is the instrumentation_begin()/end() pair around the indirect machine_check_vector() call as objtool can't figure that out. The possible invoked functions are annotated correctly. Also use notrace variant of nmi_enter/exit(). If MCEs happen then hardware latency tracing is the least of the worries. Signed-off-by: Thomas Gleixner Reviewed-by: Alexandre Chartre Acked-by: Peter Zijlstra Acked-by: Andy Lutomirski Link: https://lkml.kernel.org/r/20200505135315.476734898@linutronix.de --- arch/x86/kernel/cpu/mce/core.c | 20 +++++++++++++++----- arch/x86/kernel/cpu/mce/p5.c | 4 +++- arch/x86/kernel/cpu/mce/winchip.c | 4 +++- kernel/time/timekeeping.c | 2 +- 4 files changed, 22 insertions(+), 8 deletions(-) (limited to 'arch') diff --git a/arch/x86/kernel/cpu/mce/core.c b/arch/x86/kernel/cpu/mce/core.c index a72c0135a5ec..a32a7e236bb1 100644 --- a/arch/x86/kernel/cpu/mce/core.c +++ b/arch/x86/kernel/cpu/mce/core.c @@ -130,7 +130,7 @@ static void (*quirk_no_way_out)(int bank, struct mce *m, struct pt_regs *regs); BLOCKING_NOTIFIER_HEAD(x86_mce_decoder_chain); /* Do initial initialization of a struct mce */ -void mce_setup(struct mce *m) +noinstr void mce_setup(struct mce *m) { memset(m, 0, sizeof(struct mce)); m->cpu = m->extcpu = smp_processor_id(); @@ -140,12 +140,12 @@ void mce_setup(struct mce *m) m->cpuid = cpuid_eax(1); m->socketid = cpu_data(m->extcpu).phys_proc_id; m->apicid = cpu_data(m->extcpu).initial_apicid; - rdmsrl(MSR_IA32_MCG_CAP, m->mcgcap); + m->mcgcap = __rdmsr(MSR_IA32_MCG_CAP); if (this_cpu_has(X86_FEATURE_INTEL_PPIN)) - rdmsrl(MSR_PPIN, m->ppin); + m->ppin = __rdmsr(MSR_PPIN); else if (this_cpu_has(X86_FEATURE_AMD_PPIN)) - rdmsrl(MSR_AMD_PPIN, m->ppin); + m->ppin = __rdmsr(MSR_AMD_PPIN); m->microcode = boot_cpu_data.microcode; } @@ -1895,10 +1895,12 @@ bool filter_mce(struct mce *m) } /* Handle unconfigured int18 (should never happen) */ -static void unexpected_machine_check(struct pt_regs *regs) +static noinstr void unexpected_machine_check(struct pt_regs *regs) { + instrumentation_begin(); pr_err("CPU#%d: Unexpected int18 (Machine Check)\n", smp_processor_id()); + instrumentation_end(); } /* Call the installed machine check handler for this CPU setup. */ @@ -1915,14 +1917,22 @@ static __always_inline void exc_machine_check_kernel(struct pt_regs *regs) return; nmi_enter(); + /* + * The call targets are marked noinstr, but objtool can't figure + * that out because it's an indirect call. Annotate it. + */ + instrumentation_begin(); machine_check_vector(regs); + instrumentation_end(); nmi_exit(); } static __always_inline void exc_machine_check_user(struct pt_regs *regs) { idtentry_enter(regs); + instrumentation_begin(); machine_check_vector(regs); + instrumentation_end(); idtentry_exit(regs); } diff --git a/arch/x86/kernel/cpu/mce/p5.c b/arch/x86/kernel/cpu/mce/p5.c index eaebc4ce7398..19e90cae8e97 100644 --- a/arch/x86/kernel/cpu/mce/p5.c +++ b/arch/x86/kernel/cpu/mce/p5.c @@ -21,10 +21,11 @@ int mce_p5_enabled __read_mostly; /* Machine check handler for Pentium class Intel CPUs: */ -static void pentium_machine_check(struct pt_regs *regs) +static noinstr void pentium_machine_check(struct pt_regs *regs) { u32 loaddr, hi, lotype; + instrumentation_begin(); rdmsr(MSR_IA32_P5_MC_ADDR, loaddr, hi); rdmsr(MSR_IA32_P5_MC_TYPE, lotype, hi); @@ -37,6 +38,7 @@ static void pentium_machine_check(struct pt_regs *regs) } add_taint(TAINT_MACHINE_CHECK, LOCKDEP_NOW_UNRELIABLE); + instrumentation_end(); } /* Set up machine check reporting for processors with Intel style MCE: */ diff --git a/arch/x86/kernel/cpu/mce/winchip.c b/arch/x86/kernel/cpu/mce/winchip.c index 90e3d60c645e..9c9f0abd2d7f 100644 --- a/arch/x86/kernel/cpu/mce/winchip.c +++ b/arch/x86/kernel/cpu/mce/winchip.c @@ -17,10 +17,12 @@ #include "internal.h" /* Machine check handler for WinChip C6: */ -static void winchip_machine_check(struct pt_regs *regs) +static noinstr void winchip_machine_check(struct pt_regs *regs) { + instrumentation_begin(); pr_emerg("CPU0: Machine Check Exception.\n"); add_taint(TAINT_MACHINE_CHECK, LOCKDEP_NOW_UNRELIABLE); + instrumentation_end(); } /* Set up machine check reporting on the Winchip C6 series */ diff --git a/kernel/time/timekeeping.c b/kernel/time/timekeeping.c index 9ebaab13339d..d20d489841c8 100644 --- a/kernel/time/timekeeping.c +++ b/kernel/time/timekeeping.c @@ -953,7 +953,7 @@ EXPORT_SYMBOL_GPL(ktime_get_real_seconds); * but without the sequence counter protect. This internal function * is called just when timekeeping lock is already held. */ -time64_t __ktime_get_real_seconds(void) +noinstr time64_t __ktime_get_real_seconds(void) { struct timekeeper *tk = &tk_core.timekeeper; -- cgit v1.2.3 From 6a8dfa8e4053adfcf02ee4d96287943064166beb Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Tue, 25 Feb 2020 23:33:30 +0100 Subject: x86/idtentry: Provide IDTENTRY_DF Provide a separate macro for #DF as this needs to emit paranoid only code and has also a special ASM stub in 32bit. Signed-off-by: Thomas Gleixner Reviewed-by: Alexandre Chartre Acked-by: Peter Zijlstra Acked-by: Andy Lutomirski Link: https://lkml.kernel.org/r/20200505135315.583415264@linutronix.de --- arch/x86/include/asm/idtentry.h | 87 +++++++++++++++++++++++++++++++++++++++++ 1 file changed, 87 insertions(+) (limited to 'arch') diff --git a/arch/x86/include/asm/idtentry.h b/arch/x86/include/asm/idtentry.h index 060f9e358b1c..9521f329bbbe 100644 --- a/arch/x86/include/asm/idtentry.h +++ b/arch/x86/include/asm/idtentry.h @@ -132,6 +132,35 @@ static __always_inline void __##func(struct pt_regs *regs, \ #define DEFINE_IDTENTRY_RAW(func) \ __visible noinstr void func(struct pt_regs *regs) +/** + * DECLARE_IDTENTRY_RAW_ERRORCODE - Declare functions for raw IDT entry points + * Error code pushed by hardware + * @vector: Vector number (ignored for C) + * @func: Function name of the entry point + * + * Maps to DECLARE_IDTENTRY_ERRORCODE() + */ +#define DECLARE_IDTENTRY_RAW_ERRORCODE(vector, func) \ + DECLARE_IDTENTRY_ERRORCODE(vector, func) + +/** + * DEFINE_IDTENTRY_RAW_ERRORCODE - Emit code for raw IDT entry points + * @func: Function name of the entry point + * + * @func is called from ASM entry code with interrupts disabled. + * + * The macro is written so it acts as function definition. Append the + * body with a pair of curly brackets. + * + * Contrary to DEFINE_IDTENTRY_ERRORCODE() this does not invoke the + * idtentry_enter/exit() helpers before and after the body invocation. This + * needs to be done in the body itself if applicable. Use if extra work + * is required before the enter/exit() helpers are invoked. + */ +#define DEFINE_IDTENTRY_RAW_ERRORCODE(func) \ +__visible noinstr void func(struct pt_regs *regs, unsigned long error_code) + + #ifdef CONFIG_X86_64 /** * DECLARE_IDTENTRY_IST - Declare functions for IST handling IDT entry points @@ -165,10 +194,58 @@ __visible noinstr void func(struct pt_regs *regs) #define DEFINE_IDTENTRY_NOIST(func) \ DEFINE_IDTENTRY_RAW(noist_##func) +/** + * DECLARE_IDTENTRY_DF - Declare functions for double fault + * @vector: Vector number (ignored for C) + * @func: Function name of the entry point + * + * Maps to DECLARE_IDTENTRY_RAW_ERRORCODE + */ +#define DECLARE_IDTENTRY_DF(vector, func) \ + DECLARE_IDTENTRY_RAW_ERRORCODE(vector, func) + +/** + * DEFINE_IDTENTRY_DF - Emit code for double fault + * @func: Function name of the entry point + * + * Maps to DEFINE_IDTENTRY_RAW_ERRORCODE + */ +#define DEFINE_IDTENTRY_DF(func) \ + DEFINE_IDTENTRY_RAW_ERRORCODE(func) + #else /* CONFIG_X86_64 */ + /* Maps to a regular IDTENTRY on 32bit for now */ # define DECLARE_IDTENTRY_IST DECLARE_IDTENTRY # define DEFINE_IDTENTRY_IST DEFINE_IDTENTRY + +/** + * DECLARE_IDTENTRY_DF - Declare functions for double fault 32bit variant + * @vector: Vector number (ignored for C) + * @func: Function name of the entry point + * + * Declares two functions: + * - The ASM entry point: asm_##func + * - The C handler called from the C shim + */ +#define DECLARE_IDTENTRY_DF(vector, func) \ + asmlinkage void asm_##func(void); \ + __visible void func(struct pt_regs *regs, \ + unsigned long error_code, \ + unsigned long address) + +/** + * DEFINE_IDTENTRY_DF - Emit code for double fault on 32bit + * @func: Function name of the entry point + * + * This is called through the doublefault shim which already provides + * cr2 in the address argument. + */ +#define DEFINE_IDTENTRY_DF(func) \ +__visible noinstr void func(struct pt_regs *regs, \ + unsigned long error_code, \ + unsigned long address) + #endif /* !CONFIG_X86_64 */ /* C-Code mapping */ @@ -212,6 +289,9 @@ __visible noinstr void func(struct pt_regs *regs) #define DECLARE_IDTENTRY_RAW(vector, func) \ DECLARE_IDTENTRY(vector, func) +#define DECLARE_IDTENTRY_RAW_ERRORCODE(vector, func) \ + DECLARE_IDTENTRY_ERRORCODE(vector, func) + #ifdef CONFIG_X86_64 # define DECLARE_IDTENTRY_MCE(vector, func) \ idtentry_mce_db vector asm_##func func @@ -219,12 +299,19 @@ __visible noinstr void func(struct pt_regs *regs) # define DECLARE_IDTENTRY_DEBUG(vector, func) \ idtentry_mce_db vector asm_##func func +# define DECLARE_IDTENTRY_DF(vector, func) \ + idtentry_df vector asm_##func func + #else # define DECLARE_IDTENTRY_MCE(vector, func) \ DECLARE_IDTENTRY(vector, func) # define DECLARE_IDTENTRY_DEBUG(vector, func) \ DECLARE_IDTENTRY(vector, func) + +/* No ASM emitted for DF as this goes through a C shim */ +# define DECLARE_IDTENTRY_DF(vector, func) + #endif /* No ASM code emitted for NMI */ -- cgit v1.2.3 From c29c775a554f7060b6fb31b68f88a3c9087cf1c5 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Tue, 25 Feb 2020 23:33:31 +0100 Subject: x86/entry: Convert double fault exception to IDTENTRY_DF Convert #DF to IDTENTRY_DF - Implement the C entry point with DEFINE_IDTENTRY_DF - Emit the ASM stub with DECLARE_IDTENTRY_DF on 64bit - Remove the ASM idtentry in 64bit - Adjust the 32bit shim code - Fixup the XEN/PV code - Remove the old prototypes No functional change. Signed-off-by: Thomas Gleixner Reviewed-by: Alexandre Chartre Acked-by: Peter Zijlstra Acked-by: Andy Lutomirski Link: https://lkml.kernel.org/r/20200505135315.583415264@linutronix.de --- arch/x86/entry/entry_32.S | 4 ++-- arch/x86/entry/entry_64.S | 10 +--------- arch/x86/include/asm/idtentry.h | 3 +++ arch/x86/include/asm/traps.h | 5 ----- arch/x86/kernel/doublefault_32.c | 10 ++++------ arch/x86/kernel/idt.c | 4 ++-- arch/x86/kernel/traps.c | 17 ++++++++++++++--- arch/x86/xen/enlighten_pv.c | 4 ++-- arch/x86/xen/xen-asm_64.S | 2 +- 9 files changed, 29 insertions(+), 30 deletions(-) (limited to 'arch') diff --git a/arch/x86/entry/entry_32.S b/arch/x86/entry/entry_32.S index 30c6ed3d7c52..28d13f07b84d 100644 --- a/arch/x86/entry/entry_32.S +++ b/arch/x86/entry/entry_32.S @@ -1488,7 +1488,7 @@ ret_to_user: jmp restore_all_switch_stack SYM_CODE_END(handle_exception) -SYM_CODE_START(double_fault) +SYM_CODE_START(asm_exc_double_fault) 1: /* * This is a task gate handler, not an interrupt gate handler. @@ -1526,7 +1526,7 @@ SYM_CODE_START(double_fault) 1: hlt jmp 1b -SYM_CODE_END(double_fault) +SYM_CODE_END(asm_exc_double_fault) /* * NMI is doubly nasty. It can happen on the first instruction of diff --git a/arch/x86/entry/entry_64.S b/arch/x86/entry/entry_64.S index d302839b9b3c..d983a0d4bc73 100644 --- a/arch/x86/entry/entry_64.S +++ b/arch/x86/entry/entry_64.S @@ -680,15 +680,9 @@ SYM_CODE_START(\asmsym) call paranoid_entry UNWIND_HINT_REGS - /* Read CR2 early */ - GET_CR2_INTO(%r12); - - TRACE_IRQS_OFF - movq %rsp, %rdi /* pt_regs pointer into first argument */ movq ORIG_RAX(%rsp), %rsi /* get error code into 2nd argument*/ movq $-1, ORIG_RAX(%rsp) /* no syscall to restart */ - movq %r12, %rdx /* Move CR2 into 3rd argument */ call \cfunc jmp paranoid_exit @@ -918,7 +912,7 @@ SYM_INNER_LABEL(native_irq_return_iret, SYM_L_GLOBAL) /* * This may fault. Non-paranoid faults on return to userspace are * handled by fixup_bad_iret. These include #SS, #GP, and #NP. - * Double-faults due to espfix64 are handled in do_double_fault. + * Double-faults due to espfix64 are handled in exc_double_fault. * Other faults here are fatal. */ iretq @@ -1073,8 +1067,6 @@ apicinterrupt IRQ_WORK_VECTOR irq_work_interrupt smp_irq_work_interrupt idtentry X86_TRAP_PF page_fault do_page_fault has_error_code=1 -idtentry_df X86_TRAP_DF double_fault do_double_fault - #ifdef CONFIG_XEN_PV idtentry 512 /* dummy */ hypervisor_callback xen_do_hypervisor_callback has_error_code=0 #endif diff --git a/arch/x86/include/asm/idtentry.h b/arch/x86/include/asm/idtentry.h index 9521f329bbbe..ce97478ffc40 100644 --- a/arch/x86/include/asm/idtentry.h +++ b/arch/x86/include/asm/idtentry.h @@ -368,4 +368,7 @@ DECLARE_IDTENTRY_XEN(X86_TRAP_NMI, nmi); DECLARE_IDTENTRY_DEBUG(X86_TRAP_DB, exc_debug); DECLARE_IDTENTRY_XEN(X86_TRAP_DB, debug); +/* #DF */ +DECLARE_IDTENTRY_DF(X86_TRAP_DF, exc_double_fault); + #endif diff --git a/arch/x86/include/asm/traps.h b/arch/x86/include/asm/traps.h index 9bd602d0130d..f5a2e438a878 100644 --- a/arch/x86/include/asm/traps.h +++ b/arch/x86/include/asm/traps.h @@ -11,18 +11,13 @@ #define dotraplinkage __visible -#ifdef CONFIG_X86_64 -asmlinkage void double_fault(void); -#endif asmlinkage void page_fault(void); asmlinkage void async_page_fault(void); #if defined(CONFIG_X86_64) && defined(CONFIG_XEN_PV) -asmlinkage void xen_double_fault(void); asmlinkage void xen_page_fault(void); #endif -dotraplinkage void do_double_fault(struct pt_regs *regs, long error_code, unsigned long cr2); dotraplinkage void do_page_fault(struct pt_regs *regs, unsigned long error_code, unsigned long address); #ifdef CONFIG_X86_64 diff --git a/arch/x86/kernel/doublefault_32.c b/arch/x86/kernel/doublefault_32.c index 2ccc57f152a4..759d392cbe9f 100644 --- a/arch/x86/kernel/doublefault_32.c +++ b/arch/x86/kernel/doublefault_32.c @@ -10,7 +10,6 @@ #include #include -extern void double_fault(void); #define ptr_ok(x) ((x) > PAGE_OFFSET && (x) < PAGE_OFFSET + MAXMEM) #define TSS(x) this_cpu_read(cpu_tss_rw.x86_tss.x) @@ -21,7 +20,7 @@ static void set_df_gdt_entry(unsigned int cpu); * Called by double_fault with CR0.TS and EFLAGS.NT cleared. The CPU thinks * we're running the doublefault task. Cannot return. */ -asmlinkage notrace void __noreturn doublefault_shim(void) +asmlinkage noinstr void __noreturn doublefault_shim(void) { unsigned long cr2; struct pt_regs regs; @@ -40,7 +39,7 @@ asmlinkage notrace void __noreturn doublefault_shim(void) * Fill in pt_regs. A downside of doing this in C is that the unwinder * won't see it (no ENCODE_FRAME_POINTER), so a nested stack dump * won't successfully unwind to the source of the double fault. - * The main dump from do_double_fault() is fine, though, since it + * The main dump from exc_double_fault() is fine, though, since it * uses these regs directly. * * If anyone ever cares, this could be moved to asm. @@ -70,7 +69,7 @@ asmlinkage notrace void __noreturn doublefault_shim(void) regs.cx = TSS(cx); regs.bx = TSS(bx); - do_double_fault(®s, 0, cr2); + exc_double_fault(®s, 0, cr2); /* * x86_32 does not save the original CR3 anywhere on a task switch. @@ -84,7 +83,6 @@ asmlinkage notrace void __noreturn doublefault_shim(void) */ panic("cannot return from double fault\n"); } -NOKPROBE_SYMBOL(doublefault_shim); DEFINE_PER_CPU_PAGE_ALIGNED(struct doublefault_stack, doublefault_stack) = { .tss = { @@ -95,7 +93,7 @@ DEFINE_PER_CPU_PAGE_ALIGNED(struct doublefault_stack, doublefault_stack) = { .ldt = 0, .io_bitmap_base = IO_BITMAP_OFFSET_INVALID, - .ip = (unsigned long) double_fault, + .ip = (unsigned long) asm_exc_double_fault, .flags = X86_EFLAGS_FIXED, .es = __USER_DS, .cs = __KERNEL_CS, diff --git a/arch/x86/kernel/idt.c b/arch/x86/kernel/idt.c index ddf3f3db3235..ec55479e1dd1 100644 --- a/arch/x86/kernel/idt.c +++ b/arch/x86/kernel/idt.c @@ -91,7 +91,7 @@ static const __initconst struct idt_data def_idts[] = { #ifdef CONFIG_X86_32 TSKG(X86_TRAP_DF, GDT_ENTRY_DOUBLEFAULT_TSS), #else - INTG(X86_TRAP_DF, double_fault), + INTG(X86_TRAP_DF, asm_exc_double_fault), #endif INTG(X86_TRAP_DB, asm_exc_debug), @@ -187,7 +187,7 @@ gate_desc debug_idt_table[IDT_ENTRIES] __page_aligned_bss; static const __initconst struct idt_data ist_idts[] = { ISTG(X86_TRAP_DB, asm_exc_debug, IST_INDEX_DB), ISTG(X86_TRAP_NMI, asm_exc_nmi, IST_INDEX_NMI), - ISTG(X86_TRAP_DF, double_fault, IST_INDEX_DF), + ISTG(X86_TRAP_DF, asm_exc_double_fault, IST_INDEX_DF), #ifdef CONFIG_X86_MCE ISTG(X86_TRAP_MC, asm_exc_machine_check, IST_INDEX_MCE), #endif diff --git a/arch/x86/kernel/traps.c b/arch/x86/kernel/traps.c index 41bb0cb9df84..35298c1df32f 100644 --- a/arch/x86/kernel/traps.c +++ b/arch/x86/kernel/traps.c @@ -319,12 +319,19 @@ __visible void __noreturn handle_stack_overflow(const char *message, * from the TSS. Returning is, in principle, okay, but changes to regs will * be lost. If, for some reason, we need to return to a context with modified * regs, the shim code could be adjusted to synchronize the registers. + * + * The 32bit #DF shim provides CR2 already as an argument. On 64bit it needs + * to be read before doing anything else. */ -dotraplinkage void do_double_fault(struct pt_regs *regs, long error_code, unsigned long cr2) +DEFINE_IDTENTRY_DF(exc_double_fault) { static const char str[] = "double fault"; struct task_struct *tsk = current; +#ifdef CONFIG_X86_64 + unsigned long address = read_cr2(); +#endif + #ifdef CONFIG_X86_ESPFIX64 extern unsigned char native_irq_return_iret[]; @@ -381,6 +388,7 @@ dotraplinkage void do_double_fault(struct pt_regs *regs, long error_code, unsign #endif nmi_enter(); + instrumentation_begin(); notify_die(DIE_TRAP, str, regs, error_code, X86_TRAP_DF, SIGSEGV); tsk->thread.error_code = error_code; @@ -424,13 +432,16 @@ dotraplinkage void do_double_fault(struct pt_regs *regs, long error_code, unsign * stack even if the actual trigger for the double fault was * something else. */ - if ((unsigned long)task_stack_page(tsk) - 1 - cr2 < PAGE_SIZE) - handle_stack_overflow("kernel stack overflow (double-fault)", regs, cr2); + if ((unsigned long)task_stack_page(tsk) - 1 - address < PAGE_SIZE) { + handle_stack_overflow("kernel stack overflow (double-fault)", + regs, address); + } #endif pr_emerg("PANIC: double fault, error_code: 0x%lx\n", error_code); die("double fault", regs, error_code); panic("Machine halted."); + instrumentation_end(); } DEFINE_IDTENTRY(exc_bounds) diff --git a/arch/x86/xen/enlighten_pv.c b/arch/x86/xen/enlighten_pv.c index 376851d1039a..008291121cb4 100644 --- a/arch/x86/xen/enlighten_pv.c +++ b/arch/x86/xen/enlighten_pv.c @@ -616,7 +616,7 @@ struct trap_array_entry { static struct trap_array_entry trap_array[] = { TRAP_ENTRY_REDIR(exc_debug, exc_xendebug, true ), - { double_fault, xen_double_fault, true }, + TRAP_ENTRY(exc_double_fault, true ), #ifdef CONFIG_X86_MCE TRAP_ENTRY(exc_machine_check, true ), #endif @@ -651,7 +651,7 @@ static bool __ref get_trap_addr(void **addr, unsigned int ist) * Replace trap handler addresses by Xen specific ones. * Check for known traps using IST and whitelist them. * The debugger ones are the only ones we care about. - * Xen will handle faults like double_fault, * so we should never see + * Xen will handle faults like double_fault, so we should never see * them. Warn if there's an unexpected IST-using fault handler. */ for (nr = 0; nr < ARRAY_SIZE(trap_array); nr++) { diff --git a/arch/x86/xen/xen-asm_64.S b/arch/x86/xen/xen-asm_64.S index 9999ea377476..e46d863bcaa4 100644 --- a/arch/x86/xen/xen-asm_64.S +++ b/arch/x86/xen/xen-asm_64.S @@ -37,7 +37,7 @@ xen_pv_trap asm_exc_overflow xen_pv_trap asm_exc_bounds xen_pv_trap asm_exc_invalid_op xen_pv_trap asm_exc_device_not_available -xen_pv_trap double_fault +xen_pv_trap asm_exc_double_fault xen_pv_trap asm_exc_coproc_segment_overrun xen_pv_trap asm_exc_invalid_tss xen_pv_trap asm_exc_segment_not_present -- cgit v1.2.3 From 7102cb07132624cdc09aa8e40c03ae34b4cbb74a Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Mon, 25 May 2020 09:42:41 +0200 Subject: x86/entry: Fix allnoconfig build warning The following commit: 095b7a3e7745 ("x86/entry: Convert double fault exception to IDTENTRY_DF") introduced a new build warning on 64-bit allnoconfig kernels, that have CONFIG_VMAP_STACK disabled: arch/x86/kernel/traps.c:332:16: warning: unused variable ‘address’ [-Wunused-variable] This variable is only used if CONFIG_VMAP_STACK is defined, so make it dependent on that, not CONFIG_X86_64. Signed-off-by: Ingo Molnar Cc: Thomas Gleixner Cc: Alexandre Chartre Cc: Peter Zijlstra Cc: Andy Lutomirski Cc: Borislav Petkov --- arch/x86/kernel/traps.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/x86/kernel/traps.c b/arch/x86/kernel/traps.c index 35298c1df32f..9e5d81cb94ba 100644 --- a/arch/x86/kernel/traps.c +++ b/arch/x86/kernel/traps.c @@ -328,7 +328,7 @@ DEFINE_IDTENTRY_DF(exc_double_fault) static const char str[] = "double fault"; struct task_struct *tsk = current; -#ifdef CONFIG_X86_64 +#ifdef CONFIG_VMAP_STACK unsigned long address = read_cr2(); #endif -- cgit v1.2.3 From 3eeec385848855c8109eb72b8b309078d5507968 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Thu, 21 May 2020 22:05:17 +0200 Subject: x86/entry: Provide idtentry_entry/exit_cond_rcu() After a lengthy discussion [1] it turned out that RCU does not need a full rcu_irq_enter/exit() when RCU is already watching. All it needs if NOHZ_FULL is active is to check whether the tick needs to be restarted. This allows to avoid a separate variant for the pagefault handler which cannot invoke rcu_irq_enter() on a kernel pagefault which might sleep. The cond_rcu argument is only temporary and will be removed once the existing users of idtentry_enter/exit() have been cleaned up. After that the code can be significantly simplified. [ mingo: Simplified the control flow ] Signed-off-by: Thomas Gleixner Signed-off-by: Ingo Molnar Acked-by: "Paul E. McKenney" Acked-by: Andy Lutomirski Link: [1] https://lkml.kernel.org/r/20200515235125.628629605@linutronix.de Link: https://lore.kernel.org/r/20200521202117.181397835@linutronix.de --- arch/x86/entry/common.c | 79 +++++++++++++++++++++++++++++++++-------- arch/x86/include/asm/idtentry.h | 14 ++++++-- 2 files changed, 76 insertions(+), 17 deletions(-) (limited to 'arch') diff --git a/arch/x86/entry/common.c b/arch/x86/entry/common.c index 9ebe33485428..a7f5846a4102 100644 --- a/arch/x86/entry/common.c +++ b/arch/x86/entry/common.c @@ -512,8 +512,10 @@ SYSCALL_DEFINE0(ni_syscall) } /** - * idtentry_enter - Handle state tracking on idtentry + * idtentry_enter_cond_rcu - Handle state tracking on idtentry with conditional + * RCU handling * @regs: Pointer to pt_regs of interrupted context + * @cond_rcu: Invoke rcu_irq_enter() only if RCU is not watching * * Invokes: * - lockdep irqflag state tracking as low level ASM entry disabled @@ -521,40 +523,84 @@ SYSCALL_DEFINE0(ni_syscall) * * - Context tracking if the exception hit user mode. * - * - RCU notification if the exception hit kernel mode. - * * - The hardirq tracer to keep the state consistent as low level ASM * entry disabled interrupts. + * + * For kernel mode entries RCU handling is done conditional. If RCU is + * watching then the only RCU requirement is to check whether the tick has + * to be restarted. If RCU is not watching then rcu_irq_enter() has to be + * invoked on entry and rcu_irq_exit() on exit. + * + * Avoiding the rcu_irq_enter/exit() calls is an optimization but also + * solves the problem of kernel mode pagefaults which can schedule, which + * is not possible after invoking rcu_irq_enter() without undoing it. + * + * For user mode entries enter_from_user_mode() must be invoked to + * establish the proper context for NOHZ_FULL. Otherwise scheduling on exit + * would not be possible. + * + * Returns: True if RCU has been adjusted on a kernel entry + * False otherwise + * + * The return value must be fed into the rcu_exit argument of + * idtentry_exit_cond_rcu(). */ -void noinstr idtentry_enter(struct pt_regs *regs) +bool noinstr idtentry_enter_cond_rcu(struct pt_regs *regs, bool cond_rcu) { if (user_mode(regs)) { enter_from_user_mode(); - } else { + return false; + } + + if (!cond_rcu || !__rcu_is_watching()) { + /* + * If RCU is not watching then the same careful + * sequence vs. lockdep and tracing is required + * as in enter_from_user_mode(). + * + * This only happens for IRQs that hit the idle + * loop, i.e. if idle is not using MWAIT. + */ lockdep_hardirqs_off(CALLER_ADDR0); rcu_irq_enter(); instrumentation_begin(); trace_hardirqs_off_prepare(); instrumentation_end(); + + return true; } + + /* + * If RCU is watching then RCU only wants to check + * whether it needs to restart the tick in NOHZ + * mode. + */ + instrumentation_begin(); + rcu_irq_enter_check_tick(); + /* Use the combo lockdep/tracing function */ + trace_hardirqs_off(); + instrumentation_end(); + + return false; } /** - * idtentry_exit - Common code to handle return from exceptions + * idtentry_exit_cond_rcu - Handle return from exception with conditional RCU + * handling * @regs: Pointer to pt_regs (exception entry regs) + * @rcu_exit: Invoke rcu_irq_exit() if true * * Depending on the return target (kernel/user) this runs the necessary - * preemption and work checks if possible and required and returns to + * preemption and work checks if possible and reguired and returns to * the caller with interrupts disabled and no further work pending. * * This is the last action before returning to the low level ASM code which * just needs to return to the appropriate context. * - * Invoked by all exception/interrupt IDTENTRY handlers which are not - * returning through the paranoid exit path (all except NMI, #DF and the IST - * variants of #MC and #DB) and are therefore on the thread stack. + * Counterpart to idtentry_enter_cond_rcu(). The return value of the entry + * function must be fed into the @rcu_exit argument. */ -void noinstr idtentry_exit(struct pt_regs *regs) +void noinstr idtentry_exit_cond_rcu(struct pt_regs *regs, bool rcu_exit) { lockdep_assert_irqs_disabled(); @@ -580,7 +626,8 @@ void noinstr idtentry_exit(struct pt_regs *regs) if (IS_ENABLED(CONFIG_DEBUG_ENTRY)) WARN_ON_ONCE(!on_thread_stack()); instrumentation_begin(); - rcu_irq_exit_preempt(); + if (rcu_exit) + rcu_irq_exit_preempt(); if (need_resched()) preempt_schedule_irq(); /* Covers both tracing and lockdep */ @@ -602,10 +649,12 @@ void noinstr idtentry_exit(struct pt_regs *regs) trace_hardirqs_on_prepare(); lockdep_hardirqs_on_prepare(CALLER_ADDR0); instrumentation_end(); - rcu_irq_exit(); + if (rcu_exit) + rcu_irq_exit(); lockdep_hardirqs_on(CALLER_ADDR0); } else { - /* IRQ flags state is correct already. Just tell RCU */ - rcu_irq_exit(); + /* IRQ flags state is correct already. Just tell RCU. */ + if (rcu_exit) + rcu_irq_exit(); } } diff --git a/arch/x86/include/asm/idtentry.h b/arch/x86/include/asm/idtentry.h index ce97478ffc40..a116b80662d4 100644 --- a/arch/x86/include/asm/idtentry.h +++ b/arch/x86/include/asm/idtentry.h @@ -7,8 +7,18 @@ #ifndef __ASSEMBLY__ -void idtentry_enter(struct pt_regs *regs); -void idtentry_exit(struct pt_regs *regs); +bool idtentry_enter_cond_rcu(struct pt_regs *regs, bool cond_rcu); +void idtentry_exit_cond_rcu(struct pt_regs *regs, bool rcu_exit); + +static __always_inline void idtentry_enter(struct pt_regs *regs) +{ + idtentry_enter_cond_rcu(regs, false); +} + +static __always_inline void idtentry_exit(struct pt_regs *regs) +{ + idtentry_exit_cond_rcu(regs, true); +} /** * DECLARE_IDTENTRY - Declare functions for simple IDT entry points -- cgit v1.2.3 From 9f9781b60dfa68d5094a41982f1efa75215a62b1 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Thu, 21 May 2020 22:05:18 +0200 Subject: x86/entry: Provide idtentry_enter/exit_user() As there are exceptions which already handle entry from user mode and from kernel mode separately, providing explicit user entry/exit handling callbacks makes sense and makes the code easier to understand. Signed-off-by: Thomas Gleixner Signed-off-by: Ingo Molnar Acked-by: Andy Lutomirski Link: https://lore.kernel.org/r/20200521202117.289548561@linutronix.de --- arch/x86/entry/common.c | 31 +++++++++++++++++++++++++++++++ arch/x86/include/asm/idtentry.h | 3 +++ 2 files changed, 34 insertions(+) (limited to 'arch') diff --git a/arch/x86/entry/common.c b/arch/x86/entry/common.c index a7f5846a4102..b7fcb1355adf 100644 --- a/arch/x86/entry/common.c +++ b/arch/x86/entry/common.c @@ -658,3 +658,34 @@ void noinstr idtentry_exit_cond_rcu(struct pt_regs *regs, bool rcu_exit) rcu_irq_exit(); } } + +/** + * idtentry_enter_user - Handle state tracking on idtentry from user mode + * @regs: Pointer to pt_regs of interrupted context + * + * Invokes enter_from_user_mode() to establish the proper context for + * NOHZ_FULL. Otherwise scheduling on exit would not be possible. + */ +void noinstr idtentry_enter_user(struct pt_regs *regs) +{ + enter_from_user_mode(); +} + +/** + * idtentry_exit_user - Handle return from exception to user mode + * @regs: Pointer to pt_regs (exception entry regs) + * + * Runs the necessary preemption and work checks and returns to the caller + * with interrupts disabled and no further work pending. + * + * This is the last action before returning to the low level ASM code which + * just needs to return to the appropriate context. + * + * Counterpart to idtentry_enter_user(). + */ +void noinstr idtentry_exit_user(struct pt_regs *regs) +{ + lockdep_assert_irqs_disabled(); + + prepare_exit_to_usermode(regs); +} diff --git a/arch/x86/include/asm/idtentry.h b/arch/x86/include/asm/idtentry.h index a116b80662d4..b3aca728f2fb 100644 --- a/arch/x86/include/asm/idtentry.h +++ b/arch/x86/include/asm/idtentry.h @@ -7,6 +7,9 @@ #ifndef __ASSEMBLY__ +void idtentry_enter_user(struct pt_regs *regs); +void idtentry_exit_user(struct pt_regs *regs); + bool idtentry_enter_cond_rcu(struct pt_regs *regs, bool cond_rcu); void idtentry_exit_cond_rcu(struct pt_regs *regs, bool rcu_exit); -- cgit v1.2.3 From fa95d7dc1abceb288db2959badb9aaf558eb0530 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Thu, 21 May 2020 22:05:19 +0200 Subject: x86/idtentry: Switch to conditional RCU handling Switch all idtentry_enter/exit() users over to the new conditional RCU handling scheme and make the user mode entries in #DB, #INT3 and #MCE use the user mode idtentry functions. Signed-off-by: Thomas Gleixner Signed-off-by: Ingo Molnar Acked-by: Andy Lutomirski Link: https://lore.kernel.org/r/20200521202117.382387286@linutronix.de --- arch/x86/include/asm/idtentry.h | 10 ++++++---- arch/x86/kernel/cpu/mce/core.c | 4 ++-- arch/x86/kernel/traps.c | 10 +++++----- 3 files changed, 13 insertions(+), 11 deletions(-) (limited to 'arch') diff --git a/arch/x86/include/asm/idtentry.h b/arch/x86/include/asm/idtentry.h index b3aca728f2fb..0f974e52e13b 100644 --- a/arch/x86/include/asm/idtentry.h +++ b/arch/x86/include/asm/idtentry.h @@ -61,11 +61,12 @@ static __always_inline void __##func(struct pt_regs *regs); \ \ __visible noinstr void func(struct pt_regs *regs) \ { \ - idtentry_enter(regs); \ + bool rcu_exit = idtentry_enter_cond_rcu(regs); \ + \ instrumentation_begin(); \ __##func (regs); \ instrumentation_end(); \ - idtentry_exit(regs); \ + idtentry_exit_cond_rcu(regs, rcu_exit); \ } \ \ static __always_inline void __##func(struct pt_regs *regs) @@ -107,11 +108,12 @@ static __always_inline void __##func(struct pt_regs *regs, \ __visible noinstr void func(struct pt_regs *regs, \ unsigned long error_code) \ { \ - idtentry_enter(regs); \ + bool rcu_exit = idtentry_enter_cond_rcu(regs); \ + \ instrumentation_begin(); \ __##func (regs, error_code); \ instrumentation_end(); \ - idtentry_exit(regs); \ + idtentry_exit_cond_rcu(regs, rcu_exit); \ } \ \ static __always_inline void __##func(struct pt_regs *regs, \ diff --git a/arch/x86/kernel/cpu/mce/core.c b/arch/x86/kernel/cpu/mce/core.c index a32a7e236bb1..c47f004f6231 100644 --- a/arch/x86/kernel/cpu/mce/core.c +++ b/arch/x86/kernel/cpu/mce/core.c @@ -1929,11 +1929,11 @@ static __always_inline void exc_machine_check_kernel(struct pt_regs *regs) static __always_inline void exc_machine_check_user(struct pt_regs *regs) { - idtentry_enter(regs); + idtentry_enter_user(regs); instrumentation_begin(); machine_check_vector(regs); instrumentation_end(); - idtentry_exit(regs); + idtentry_exit_user(regs); } #ifdef CONFIG_X86_64 diff --git a/arch/x86/kernel/traps.c b/arch/x86/kernel/traps.c index 9e5d81cb94ba..f28be3e51cca 100644 --- a/arch/x86/kernel/traps.c +++ b/arch/x86/kernel/traps.c @@ -619,18 +619,18 @@ DEFINE_IDTENTRY_RAW(exc_int3) return; /* - * idtentry_enter() uses static_branch_{,un}likely() and therefore + * idtentry_enter_user() uses static_branch_{,un}likely() and therefore * can trigger INT3, hence poke_int3_handler() must be done * before. If the entry came from kernel mode, then use nmi_enter() * because the INT3 could have been hit in any context including * NMI. */ if (user_mode(regs)) { - idtentry_enter(regs); + idtentry_enter_user(regs); instrumentation_begin(); do_int3_user(regs); instrumentation_end(); - idtentry_exit(regs); + idtentry_exit_user(regs); } else { nmi_enter(); instrumentation_begin(); @@ -877,7 +877,7 @@ static __always_inline void exc_debug_kernel(struct pt_regs *regs, static __always_inline void exc_debug_user(struct pt_regs *regs, unsigned long dr6) { - idtentry_enter(regs); + idtentry_enter_user(regs); clear_thread_flag(TIF_BLOCKSTEP); /* @@ -886,7 +886,7 @@ static __always_inline void exc_debug_user(struct pt_regs *regs, * User wants a sigtrap for that. */ handle_debug(regs, dr6, !dr6); - idtentry_exit(regs); + idtentry_exit_user(regs); } #ifdef CONFIG_X86_64 -- cgit v1.2.3 From 9ee01e0f69a925b6ff7d5f39441413c55132b167 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Thu, 21 May 2020 22:05:20 +0200 Subject: x86/entry: Clean up idtentry_enter/exit() leftovers Now that everything is converted to conditional RCU handling remove idtentry_enter/exit() and tidy up the conditional functions. This does not remove rcu_irq_exit_preempt(), to avoid conflicts with the RCU tree. Will be removed once all of this hits Linus's tree. Signed-off-by: Thomas Gleixner Signed-off-by: Ingo Molnar Acked-by: Andy Lutomirski Link: https://lore.kernel.org/r/20200521202117.473597954@linutronix.de --- arch/x86/entry/common.c | 67 ++++++++++++++++++----------------------- arch/x86/include/asm/idtentry.h | 12 +------- 2 files changed, 30 insertions(+), 49 deletions(-) (limited to 'arch') diff --git a/arch/x86/entry/common.c b/arch/x86/entry/common.c index b7fcb1355adf..2a80e4e1b4c1 100644 --- a/arch/x86/entry/common.c +++ b/arch/x86/entry/common.c @@ -515,7 +515,6 @@ SYSCALL_DEFINE0(ni_syscall) * idtentry_enter_cond_rcu - Handle state tracking on idtentry with conditional * RCU handling * @regs: Pointer to pt_regs of interrupted context - * @cond_rcu: Invoke rcu_irq_enter() only if RCU is not watching * * Invokes: * - lockdep irqflag state tracking as low level ASM entry disabled @@ -545,14 +544,14 @@ SYSCALL_DEFINE0(ni_syscall) * The return value must be fed into the rcu_exit argument of * idtentry_exit_cond_rcu(). */ -bool noinstr idtentry_enter_cond_rcu(struct pt_regs *regs, bool cond_rcu) +bool noinstr idtentry_enter_cond_rcu(struct pt_regs *regs) { if (user_mode(regs)) { enter_from_user_mode(); return false; } - if (!cond_rcu || !__rcu_is_watching()) { + if (!__rcu_is_watching()) { /* * If RCU is not watching then the same careful * sequence vs. lockdep and tracing is required @@ -608,52 +607,44 @@ void noinstr idtentry_exit_cond_rcu(struct pt_regs *regs, bool rcu_exit) if (user_mode(regs)) { prepare_exit_to_usermode(regs); } else if (regs->flags & X86_EFLAGS_IF) { + /* + * If RCU was not watching on entry this needs to be done + * carefully and needs the same ordering of lockdep/tracing + * and RCU as the return to user mode path. + */ + if (rcu_exit) { + instrumentation_begin(); + /* Tell the tracer that IRET will enable interrupts */ + trace_hardirqs_on_prepare(); + lockdep_hardirqs_on_prepare(CALLER_ADDR0); + instrumentation_end(); + rcu_irq_exit(); + lockdep_hardirqs_on(CALLER_ADDR0); + return; + } + + instrumentation_begin(); + /* Check kernel preemption, if enabled */ if (IS_ENABLED(CONFIG_PREEMPTION)) { - /* - * This needs to be done very carefully. - * idtentry_enter() invoked rcu_irq_enter(). This - * needs to be undone before scheduling. - * - * Preemption is disabled inside of RCU idle - * sections. When the task returns from - * preempt_schedule_irq(), RCU is still watching. - * - * rcu_irq_exit_preempt() has additional state - * checking if CONFIG_PROVE_RCU=y - */ if (!preempt_count()) { + /* Sanity check RCU and thread stack */ + rcu_irq_exit_check_preempt(); if (IS_ENABLED(CONFIG_DEBUG_ENTRY)) WARN_ON_ONCE(!on_thread_stack()); - instrumentation_begin(); - if (rcu_exit) - rcu_irq_exit_preempt(); if (need_resched()) preempt_schedule_irq(); - /* Covers both tracing and lockdep */ - trace_hardirqs_on(); - instrumentation_end(); - return; } } - /* - * If preemption is disabled then this needs to be done - * carefully with respect to RCU. The exception might come - * from a RCU idle section in the idle task due to the fact - * that safe_halt() enables interrupts. So this needs the - * same ordering of lockdep/tracing and RCU as the return - * to user mode path. - */ - instrumentation_begin(); - /* Tell the tracer that IRET will enable interrupts */ - trace_hardirqs_on_prepare(); - lockdep_hardirqs_on_prepare(CALLER_ADDR0); + /* Covers both tracing and lockdep */ + trace_hardirqs_on(); + instrumentation_end(); - if (rcu_exit) - rcu_irq_exit(); - lockdep_hardirqs_on(CALLER_ADDR0); } else { - /* IRQ flags state is correct already. Just tell RCU. */ + /* + * IRQ flags state is correct already. Just tell RCU if it + * was not watching on entry. + */ if (rcu_exit) rcu_irq_exit(); } diff --git a/arch/x86/include/asm/idtentry.h b/arch/x86/include/asm/idtentry.h index 0f974e52e13b..b05688973b92 100644 --- a/arch/x86/include/asm/idtentry.h +++ b/arch/x86/include/asm/idtentry.h @@ -10,19 +10,9 @@ void idtentry_enter_user(struct pt_regs *regs); void idtentry_exit_user(struct pt_regs *regs); -bool idtentry_enter_cond_rcu(struct pt_regs *regs, bool cond_rcu); +bool idtentry_enter_cond_rcu(struct pt_regs *regs); void idtentry_exit_cond_rcu(struct pt_regs *regs, bool rcu_exit); -static __always_inline void idtentry_enter(struct pt_regs *regs) -{ - idtentry_enter_cond_rcu(regs, false); -} - -static __always_inline void idtentry_exit(struct pt_regs *regs) -{ - idtentry_exit_cond_rcu(regs, true); -} - /** * DECLARE_IDTENTRY - Declare functions for simple IDT entry points * No error code pushed by hardware -- cgit v1.2.3 From 931b94145981e411bd2c934657649347ba8a9083 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Thu, 21 May 2020 22:05:23 +0200 Subject: x86/entry: Provide helpers for executing on the irqstack Device interrupt handlers and system vector handlers are executed on the interrupt stack. The stack switch happens in the low level assembly entry code. This conflicts with the efforts to consolidate the exit code in C to ensure correctness vs. RCU and tracing. As there is no way to move #DB away from IST due to the MOV SS issue, the requirements vs. #DB and NMI for switching to the interrupt stack do not exist anymore. The only requirement is that interrupts are disabled. That allows the moving of the stack switching to C code, which simplifies the entry/exit handling further, because it allows the switching of stacks after handling the entry and on exit before handling RCU, returning to usermode and kernel preemption in the same way as for regular exceptions. The initial attempt of having the stack switching in inline ASM caused too much headache vs. objtool and the unwinder. After analysing the use cases it was agreed on that having the stack switch in ASM for the price of an indirect call is acceptable, as the main users are indirect call heavy anyway and the few system vectors which are empty shells (scheduler IPI and KVM posted interrupt vectors) can run from the regular stack. Provide helper functions to check whether the interrupt stack is already active and whether stack switching is required. 64-bit only for now, as 32-bit has a variant of that already. Once this is cleaned up, the two implementations might be consolidated as an additional cleanup on top. Signed-off-by: Thomas Gleixner Signed-off-by: Ingo Molnar Acked-by: Andy Lutomirski Link: https://lore.kernel.org/r/20200521202117.763775313@linutronix.de --- arch/x86/entry/entry_64.S | 39 +++++++++++++++++++++++++++++ arch/x86/include/asm/irq_stack.h | 53 ++++++++++++++++++++++++++++++++++++++++ 2 files changed, 92 insertions(+) create mode 100644 arch/x86/include/asm/irq_stack.h (limited to 'arch') diff --git a/arch/x86/entry/entry_64.S b/arch/x86/entry/entry_64.S index d983a0d4bc73..159737062611 100644 --- a/arch/x86/entry/entry_64.S +++ b/arch/x86/entry/entry_64.S @@ -1106,6 +1106,45 @@ SYM_CODE_START_LOCAL_NOALIGN(.Lbad_gs) SYM_CODE_END(.Lbad_gs) .previous +/* + * rdi: New stack pointer points to the top word of the stack + * rsi: Function pointer + * rdx: Function argument (can be NULL if none) + */ +SYM_FUNC_START(asm_call_on_stack) + /* + * Save the frame pointer unconditionally. This allows the ORC + * unwinder to handle the stack switch. + */ + pushq %rbp + mov %rsp, %rbp + + /* + * The unwinder relies on the word at the top of the new stack + * page linking back to the previous RSP. + */ + mov %rsp, (%rdi) + mov %rdi, %rsp + /* Move the argument to the right place */ + mov %rdx, %rdi + +1: + .pushsection .discard.instr_begin + .long 1b - . + .popsection + + CALL_NOSPEC rsi + +2: + .pushsection .discard.instr_end + .long 2b - . + .popsection + + /* Restore the previous stack pointer from RBP. */ + leaveq + ret +SYM_FUNC_END(asm_call_on_stack) + /* Call softirq on interrupt stack. Interrupts are off. */ .pushsection .text, "ax" SYM_FUNC_START(do_softirq_own_stack) diff --git a/arch/x86/include/asm/irq_stack.h b/arch/x86/include/asm/irq_stack.h new file mode 100644 index 000000000000..4ae66f097101 --- /dev/null +++ b/arch/x86/include/asm/irq_stack.h @@ -0,0 +1,53 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _ASM_X86_IRQ_STACK_H +#define _ASM_X86_IRQ_STACK_H + +#include + +#include + +#ifdef CONFIG_X86_64 +static __always_inline bool irqstack_active(void) +{ + return __this_cpu_read(irq_count) != -1; +} + +void asm_call_on_stack(void *sp, void *func, void *arg); + +static __always_inline void __run_on_irqstack(void *func, void *arg) +{ + void *tos = __this_cpu_read(hardirq_stack_ptr); + + __this_cpu_add(irq_count, 1); + asm_call_on_stack(tos - 8, func, arg); + __this_cpu_sub(irq_count, 1); +} + +#else /* CONFIG_X86_64 */ +static inline bool irqstack_active(void) { return false; } +static inline void __run_on_irqstack(void *func, void *arg) { } +#endif /* !CONFIG_X86_64 */ + +static __always_inline bool irq_needs_irq_stack(struct pt_regs *regs) +{ + if (IS_ENABLED(CONFIG_X86_32)) + return false; + if (!regs) + return !irqstack_active(); + return !user_mode(regs) && !irqstack_active(); +} + +static __always_inline void run_on_irqstack_cond(void *func, void *arg, + struct pt_regs *regs) +{ + void (*__func)(void *arg) = func; + + lockdep_assert_irqs_disabled(); + + if (irq_needs_irq_stack(regs)) + __run_on_irqstack(__func, arg); + else + __func(arg); +} + +#endif -- cgit v1.2.3 From eb6555c83933ce8e094d5429d57970aaa9f0591e Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Thu, 21 May 2020 22:05:24 +0200 Subject: x86/entry/64: Move do_softirq_own_stack() to C The first step to get rid of the ENTER/LEAVE_IRQ_STACK ASM macro maze. Use the new C code helpers to move do_softirq_own_stack() out of ASM code. Signed-off-by: Thomas Gleixner Signed-off-by: Ingo Molnar Acked-by: Andy Lutomirski Link: https://lore.kernel.org/r/20200521202117.870911120@linutronix.de --- arch/x86/entry/entry_64.S | 13 ------------- arch/x86/kernel/irq_64.c | 6 ++++++ 2 files changed, 6 insertions(+), 13 deletions(-) (limited to 'arch') diff --git a/arch/x86/entry/entry_64.S b/arch/x86/entry/entry_64.S index 159737062611..6b518be4da0a 100644 --- a/arch/x86/entry/entry_64.S +++ b/arch/x86/entry/entry_64.S @@ -1145,19 +1145,6 @@ SYM_FUNC_START(asm_call_on_stack) ret SYM_FUNC_END(asm_call_on_stack) -/* Call softirq on interrupt stack. Interrupts are off. */ -.pushsection .text, "ax" -SYM_FUNC_START(do_softirq_own_stack) - pushq %rbp - mov %rsp, %rbp - ENTER_IRQ_STACK regs=0 old_rsp=%r11 - call __do_softirq - LEAVE_IRQ_STACK regs=0 - leaveq - ret -SYM_FUNC_END(do_softirq_own_stack) -.popsection - #ifdef CONFIG_XEN_PV /* * A note on the "critical region" in our callback handler. diff --git a/arch/x86/kernel/irq_64.c b/arch/x86/kernel/irq_64.c index 6b32ab009c19..1b4fe93a86c5 100644 --- a/arch/x86/kernel/irq_64.c +++ b/arch/x86/kernel/irq_64.c @@ -20,6 +20,7 @@ #include #include +#include #include #include @@ -70,3 +71,8 @@ int irq_init_percpu_irqstack(unsigned int cpu) return 0; return map_irq_stack(cpu); } + +void do_softirq_own_stack(void) +{ + run_on_irqstack_cond(__do_softirq, NULL, NULL); +} -- cgit v1.2.3 From 1de16e0c17155d138282f3a9f65914a9a5da757e Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Thu, 21 May 2020 22:05:25 +0200 Subject: x86/entry: Split out idtentry_exit_cond_resched() The XEN PV hypercall requires the ability of conditional rescheduling when preemption is disabled because some hypercalls take ages. Split out the rescheduling code from idtentry_exit_cond_rcu() so it can be reused for that. Signed-off-by: Thomas Gleixner Signed-off-by: Ingo Molnar Acked-by: Andy Lutomirski Link: https://lore.kernel.org/r/20200521202117.962199649@linutronix.de --- arch/x86/entry/common.c | 30 +++++++++++++++--------------- 1 file changed, 15 insertions(+), 15 deletions(-) (limited to 'arch') diff --git a/arch/x86/entry/common.c b/arch/x86/entry/common.c index 2a80e4e1b4c1..066215a243b4 100644 --- a/arch/x86/entry/common.c +++ b/arch/x86/entry/common.c @@ -583,6 +583,20 @@ bool noinstr idtentry_enter_cond_rcu(struct pt_regs *regs) return false; } +static void idtentry_exit_cond_resched(struct pt_regs *regs, bool may_sched) +{ + if (may_sched && !preempt_count()) { + /* Sanity check RCU and thread stack */ + rcu_irq_exit_check_preempt(); + if (IS_ENABLED(CONFIG_DEBUG_ENTRY)) + WARN_ON_ONCE(!on_thread_stack()); + if (need_resched()) + preempt_schedule_irq(); + } + /* Covers both tracing and lockdep */ + trace_hardirqs_on(); +} + /** * idtentry_exit_cond_rcu - Handle return from exception with conditional RCU * handling @@ -624,21 +638,7 @@ void noinstr idtentry_exit_cond_rcu(struct pt_regs *regs, bool rcu_exit) } instrumentation_begin(); - - /* Check kernel preemption, if enabled */ - if (IS_ENABLED(CONFIG_PREEMPTION)) { - if (!preempt_count()) { - /* Sanity check RCU and thread stack */ - rcu_irq_exit_check_preempt(); - if (IS_ENABLED(CONFIG_DEBUG_ENTRY)) - WARN_ON_ONCE(!on_thread_stack()); - if (need_resched()) - preempt_schedule_irq(); - } - } - /* Covers both tracing and lockdep */ - trace_hardirqs_on(); - + idtentry_exit_cond_resched(regs, IS_ENABLED(CONFIG_PREEMPTION)); instrumentation_end(); } else { /* -- cgit v1.2.3 From 2f6474e4636bcc68af6c44abb2703f12d7f083da Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Thu, 21 May 2020 22:05:26 +0200 Subject: x86/entry: Switch XEN/PV hypercall entry to IDTENTRY Convert the XEN/PV hypercall to IDTENTRY: - Emit the ASM stub with DECLARE_IDTENTRY - Remove the ASM idtentry in 64-bit - Remove the open coded ASM entry code in 32-bit - Remove the old prototypes The handler stubs need to stay in ASM code as they need corner case handling and adjustment of the stack pointer. Provide a new C function which invokes the entry/exit handling and calls into the XEN handler on the interrupt stack if required. The exit code is slightly different from the regular idtentry_exit() on non-preemptible kernels. If the hypercall is preemptible and need_resched() is set then XEN provides a preempt hypercall scheduling function. Move this functionality into the entry code so it can use the existing idtentry functionality. [ mingo: Build fixes. ] Signed-off-by: Thomas Gleixner Signed-off-by: Ingo Molnar Acked-by: Andy Lutomirski Acked-by: Juergen Gross Tested-by: Juergen Gross Link: https://lore.kernel.org/r/20200521202118.055270078@linutronix.de --- arch/x86/entry/common.c | 78 +++++++++++++++++++++++++++++++++++++++++ arch/x86/entry/entry_32.S | 20 ++++++----- arch/x86/entry/entry_64.S | 20 ++++------- arch/x86/include/asm/idtentry.h | 34 ++++++++++++++++++ arch/x86/xen/setup.c | 4 ++- arch/x86/xen/smp_pv.c | 3 +- arch/x86/xen/xen-asm_32.S | 12 ++++--- arch/x86/xen/xen-asm_64.S | 2 +- arch/x86/xen/xen-ops.h | 1 - drivers/xen/Makefile | 2 +- drivers/xen/preempt.c | 42 ---------------------- include/xen/xen-ops.h | 19 ++++------ 12 files changed, 151 insertions(+), 86 deletions(-) delete mode 100644 drivers/xen/preempt.c (limited to 'arch') diff --git a/arch/x86/entry/common.c b/arch/x86/entry/common.c index 066215a243b4..a0f8c3cb130a 100644 --- a/arch/x86/entry/common.c +++ b/arch/x86/entry/common.c @@ -27,6 +27,11 @@ #include #include +#ifdef CONFIG_XEN_PV +#include +#include +#endif + #include #include #include @@ -35,6 +40,7 @@ #include #include #include +#include #define CREATE_TRACE_POINTS #include @@ -680,3 +686,75 @@ void noinstr idtentry_exit_user(struct pt_regs *regs) prepare_exit_to_usermode(regs); } + +#ifdef CONFIG_XEN_PV +#ifndef CONFIG_PREEMPTION +/* + * Some hypercalls issued by the toolstack can take many 10s of + * seconds. Allow tasks running hypercalls via the privcmd driver to + * be voluntarily preempted even if full kernel preemption is + * disabled. + * + * Such preemptible hypercalls are bracketed by + * xen_preemptible_hcall_begin() and xen_preemptible_hcall_end() + * calls. + */ +DEFINE_PER_CPU(bool, xen_in_preemptible_hcall); +EXPORT_SYMBOL_GPL(xen_in_preemptible_hcall); + +/* + * In case of scheduling the flag must be cleared and restored after + * returning from schedule as the task might move to a different CPU. + */ +static __always_inline bool get_and_clear_inhcall(void) +{ + bool inhcall = __this_cpu_read(xen_in_preemptible_hcall); + + __this_cpu_write(xen_in_preemptible_hcall, false); + return inhcall; +} + +static __always_inline void restore_inhcall(bool inhcall) +{ + __this_cpu_write(xen_in_preemptible_hcall, inhcall); +} +#else +static __always_inline bool get_and_clear_inhcall(void) { return false; } +static __always_inline void restore_inhcall(bool inhcall) { } +#endif + +static void __xen_pv_evtchn_do_upcall(void) +{ + irq_enter_rcu(); + inc_irq_stat(irq_hv_callback_count); + + xen_hvm_evtchn_do_upcall(); + + irq_exit_rcu(); +} + +__visible noinstr void xen_pv_evtchn_do_upcall(struct pt_regs *regs) +{ + struct pt_regs *old_regs; + bool inhcall, rcu_exit; + + rcu_exit = idtentry_enter_cond_rcu(regs); + old_regs = set_irq_regs(regs); + + instrumentation_begin(); + run_on_irqstack_cond(__xen_pv_evtchn_do_upcall, NULL, regs); + instrumentation_begin(); + + set_irq_regs(old_regs); + + inhcall = get_and_clear_inhcall(); + if (inhcall && !WARN_ON_ONCE(rcu_exit)) { + instrumentation_begin(); + idtentry_exit_cond_resched(regs, true); + instrumentation_end(); + restore_inhcall(inhcall); + } else { + idtentry_exit_cond_rcu(regs, rcu_exit); + } +} +#endif /* CONFIG_XEN_PV */ diff --git a/arch/x86/entry/entry_32.S b/arch/x86/entry/entry_32.S index 28d13f07b84d..3ab04dca9aab 100644 --- a/arch/x86/entry/entry_32.S +++ b/arch/x86/entry/entry_32.S @@ -1298,7 +1298,13 @@ SYM_CODE_END(native_iret) #endif #ifdef CONFIG_XEN_PV -SYM_FUNC_START(xen_hypervisor_callback) +/* + * See comment in entry_64.S for further explanation + * + * Note: This is not an actual IDT entry point. It's a XEN specific entry + * point and therefore named to match the 64-bit trampoline counterpart. + */ +SYM_FUNC_START(xen_asm_exc_xen_hypervisor_callback) /* * Check to see if we got the event in the critical * region in xen_iret_direct, after we've reenabled @@ -1315,14 +1321,11 @@ SYM_FUNC_START(xen_hypervisor_callback) pushl $-1 /* orig_ax = -1 => not a system call */ SAVE_ALL ENCODE_FRAME_POINTER - TRACE_IRQS_OFF + mov %esp, %eax - call xen_evtchn_do_upcall -#ifndef CONFIG_PREEMPTION - call xen_maybe_preempt_hcall -#endif - jmp ret_from_intr -SYM_FUNC_END(xen_hypervisor_callback) + call xen_pv_evtchn_do_upcall + jmp handle_exception_return +SYM_FUNC_END(xen_asm_exc_xen_hypervisor_callback) /* * Hypervisor uses this for application faults while it executes. @@ -1464,6 +1467,7 @@ SYM_CODE_START_LOCAL_NOALIGN(handle_exception) movl %esp, %eax # pt_regs pointer CALL_NOSPEC edi +handle_exception_return: #ifdef CONFIG_VM86 movl PT_EFLAGS(%esp), %eax # mix EFLAGS and CS movb PT_CS(%esp), %al diff --git a/arch/x86/entry/entry_64.S b/arch/x86/entry/entry_64.S index 6b518be4da0a..dadb37d07266 100644 --- a/arch/x86/entry/entry_64.S +++ b/arch/x86/entry/entry_64.S @@ -1067,10 +1067,6 @@ apicinterrupt IRQ_WORK_VECTOR irq_work_interrupt smp_irq_work_interrupt idtentry X86_TRAP_PF page_fault do_page_fault has_error_code=1 -#ifdef CONFIG_XEN_PV -idtentry 512 /* dummy */ hypervisor_callback xen_do_hypervisor_callback has_error_code=0 -#endif - /* * Reload gs selector with exception handling * edi: new selector @@ -1158,9 +1154,10 @@ SYM_FUNC_END(asm_call_on_stack) * So, on entry to the handler we detect whether we interrupted an * existing activation in its critical region -- if so, we pop the current * activation and restart the handler using the previous one. + * + * C calling convention: exc_xen_hypervisor_callback(struct *pt_regs) */ -/* do_hypervisor_callback(struct *pt_regs) */ -SYM_CODE_START_LOCAL(xen_do_hypervisor_callback) +SYM_CODE_START_LOCAL(exc_xen_hypervisor_callback) /* * Since we don't modify %rdi, evtchn_do_upall(struct *pt_regs) will @@ -1170,15 +1167,10 @@ SYM_CODE_START_LOCAL(xen_do_hypervisor_callback) movq %rdi, %rsp /* we don't return, adjust the stack frame */ UNWIND_HINT_REGS - ENTER_IRQ_STACK old_rsp=%r10 - call xen_evtchn_do_upcall - LEAVE_IRQ_STACK + call xen_pv_evtchn_do_upcall -#ifndef CONFIG_PREEMPTION - call xen_maybe_preempt_hcall -#endif - jmp error_exit -SYM_CODE_END(xen_do_hypervisor_callback) + jmp error_return +SYM_CODE_END(exc_xen_hypervisor_callback) /* * Hypervisor uses this for application faults while it executes. diff --git a/arch/x86/include/asm/idtentry.h b/arch/x86/include/asm/idtentry.h index b05688973b92..b2a5fe02dcf0 100644 --- a/arch/x86/include/asm/idtentry.h +++ b/arch/x86/include/asm/idtentry.h @@ -165,6 +165,21 @@ __visible noinstr void func(struct pt_regs *regs) #define DEFINE_IDTENTRY_RAW_ERRORCODE(func) \ __visible noinstr void func(struct pt_regs *regs, unsigned long error_code) +/** + * DECLARE_IDTENTRY_XENCB - Declare functions for XEN HV callback entry point + * @vector: Vector number (ignored for C) + * @func: Function name of the entry point + * + * Declares three functions: + * - The ASM entry point: asm_##func + * - The XEN PV trap entry point: xen_##func (maybe unused) + * - The C handler called from the ASM entry point + * + * Maps to DECLARE_IDTENTRY(). Distinct entry point to handle the 32/64-bit + * difference + */ +#define DECLARE_IDTENTRY_XENCB(vector, func) \ + DECLARE_IDTENTRY(vector, func) #ifdef CONFIG_X86_64 /** @@ -307,6 +322,9 @@ __visible noinstr void func(struct pt_regs *regs, \ # define DECLARE_IDTENTRY_DF(vector, func) \ idtentry_df vector asm_##func func +# define DECLARE_IDTENTRY_XENCB(vector, func) \ + DECLARE_IDTENTRY(vector, func) + #else # define DECLARE_IDTENTRY_MCE(vector, func) \ DECLARE_IDTENTRY(vector, func) @@ -317,6 +335,9 @@ __visible noinstr void func(struct pt_regs *regs, \ /* No ASM emitted for DF as this goes through a C shim */ # define DECLARE_IDTENTRY_DF(vector, func) +/* No ASM emitted for XEN hypervisor callback */ +# define DECLARE_IDTENTRY_XENCB(vector, func) + #endif /* No ASM code emitted for NMI */ @@ -337,6 +358,13 @@ __visible noinstr void func(struct pt_regs *regs, \ * This avoids duplicate defines and ensures that everything is consistent. */ +/* + * Dummy trap number so the low level ASM macro vector number checks do not + * match which results in emitting plain IDTENTRY stubs without bells and + * whistels. + */ +#define X86_TRAP_OTHER 0xFFFF + /* Simple exception entry points. No hardware error code */ DECLARE_IDTENTRY(X86_TRAP_DE, exc_divide_error); DECLARE_IDTENTRY(X86_TRAP_OF, exc_overflow); @@ -376,4 +404,10 @@ DECLARE_IDTENTRY_XEN(X86_TRAP_DB, debug); /* #DF */ DECLARE_IDTENTRY_DF(X86_TRAP_DF, exc_double_fault); +#ifdef CONFIG_XEN_PV +DECLARE_IDTENTRY_XENCB(X86_TRAP_OTHER, exc_xen_hypervisor_callback); +#endif + +#undef X86_TRAP_OTHER + #endif diff --git a/arch/x86/xen/setup.c b/arch/x86/xen/setup.c index 1a2d8a50dac4..3566e37241d7 100644 --- a/arch/x86/xen/setup.c +++ b/arch/x86/xen/setup.c @@ -20,6 +20,7 @@ #include #include #include +#include #include #include @@ -993,7 +994,8 @@ static void __init xen_pvmmu_arch_setup(void) HYPERVISOR_vm_assist(VMASST_CMD_enable, VMASST_TYPE_pae_extended_cr3); - if (register_callback(CALLBACKTYPE_event, xen_hypervisor_callback) || + if (register_callback(CALLBACKTYPE_event, + xen_asm_exc_xen_hypervisor_callback) || register_callback(CALLBACKTYPE_failsafe, xen_failsafe_callback)) BUG(); diff --git a/arch/x86/xen/smp_pv.c b/arch/x86/xen/smp_pv.c index 8fa01c545460..171aff1b11f2 100644 --- a/arch/x86/xen/smp_pv.c +++ b/arch/x86/xen/smp_pv.c @@ -26,6 +26,7 @@ #include #include +#include #include #include @@ -348,7 +349,7 @@ cpu_initialize_context(unsigned int cpu, struct task_struct *idle) ctxt->gs_base_kernel = per_cpu_offset(cpu); #endif ctxt->event_callback_eip = - (unsigned long)xen_hypervisor_callback; + (unsigned long)xen_asm_exc_xen_hypervisor_callback; ctxt->failsafe_callback_eip = (unsigned long)xen_failsafe_callback; per_cpu(xen_cr3, cpu) = __pa(swapper_pg_dir); diff --git a/arch/x86/xen/xen-asm_32.S b/arch/x86/xen/xen-asm_32.S index 812ff01e4e34..4757cec33abe 100644 --- a/arch/x86/xen/xen-asm_32.S +++ b/arch/x86/xen/xen-asm_32.S @@ -93,7 +93,7 @@ xen_iret_start_crit: /* * If there's something pending, mask events again so we can - * jump back into xen_hypervisor_callback. Otherwise do not + * jump back into exc_xen_hypervisor_callback. Otherwise do not * touch XEN_vcpu_info_mask. */ jne 1f @@ -113,7 +113,7 @@ iret_restore_end: * Events are masked, so jumping out of the critical region is * OK. */ - je xen_hypervisor_callback + je xen_asm_exc_xen_hypervisor_callback 1: iret xen_iret_end_crit: @@ -127,7 +127,7 @@ SYM_CODE_END(xen_iret) .globl xen_iret_start_crit, xen_iret_end_crit /* - * This is called by xen_hypervisor_callback in entry_32.S when it sees + * This is called by xen_asm_exc_xen_hypervisor_callback in entry_32.S when it sees * that the EIP at the time of interrupt was between * xen_iret_start_crit and xen_iret_end_crit. * @@ -144,7 +144,7 @@ SYM_CODE_END(xen_iret) * eflags } * cs } nested exception info * eip } - * return address : (into xen_hypervisor_callback) + * return address : (into xen_asm_exc_xen_hypervisor_callback) * * In order to deliver the nested exception properly, we need to discard the * nested exception frame such that when we handle the exception, we do it @@ -152,7 +152,8 @@ SYM_CODE_END(xen_iret) * * The only caveat is that if the outer eax hasn't been restored yet (i.e. * it's still on stack), we need to restore its value here. - */ +*/ +.pushsection .noinstr.text, "ax" SYM_CODE_START(xen_iret_crit_fixup) /* * Paranoia: Make sure we're really coming from kernel space. @@ -181,3 +182,4 @@ SYM_CODE_START(xen_iret_crit_fixup) 2: ret SYM_CODE_END(xen_iret_crit_fixup) +.popsection diff --git a/arch/x86/xen/xen-asm_64.S b/arch/x86/xen/xen-asm_64.S index e46d863bcaa4..19fbbdbcbde9 100644 --- a/arch/x86/xen/xen-asm_64.S +++ b/arch/x86/xen/xen-asm_64.S @@ -54,7 +54,7 @@ xen_pv_trap asm_exc_simd_coprocessor_error #ifdef CONFIG_IA32_EMULATION xen_pv_trap entry_INT80_compat #endif -xen_pv_trap hypervisor_callback +xen_pv_trap asm_exc_xen_hypervisor_callback __INIT SYM_CODE_START(xen_early_idt_handler_array) diff --git a/arch/x86/xen/xen-ops.h b/arch/x86/xen/xen-ops.h index 1cc1568bfe04..ad05d0589381 100644 --- a/arch/x86/xen/xen-ops.h +++ b/arch/x86/xen/xen-ops.h @@ -8,7 +8,6 @@ #include /* These are code, but not functions. Defined in entry.S */ -extern const char xen_hypervisor_callback[]; extern const char xen_failsafe_callback[]; void xen_sysenter_target(void); diff --git a/drivers/xen/Makefile b/drivers/xen/Makefile index 0c4efa6fe450..0d322f3d90cd 100644 --- a/drivers/xen/Makefile +++ b/drivers/xen/Makefile @@ -1,6 +1,6 @@ # SPDX-License-Identifier: GPL-2.0 obj-$(CONFIG_HOTPLUG_CPU) += cpu_hotplug.o -obj-y += grant-table.o features.o balloon.o manage.o preempt.o time.o +obj-y += grant-table.o features.o balloon.o manage.o time.o obj-y += mem-reservation.o obj-y += events/ obj-y += xenbus/ diff --git a/drivers/xen/preempt.c b/drivers/xen/preempt.c deleted file mode 100644 index 17240c5325a3..000000000000 --- a/drivers/xen/preempt.c +++ /dev/null @@ -1,42 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0-or-later -/* - * Preemptible hypercalls - * - * Copyright (C) 2014 Citrix Systems R&D ltd. - */ - -#include -#include - -#ifndef CONFIG_PREEMPTION - -/* - * Some hypercalls issued by the toolstack can take many 10s of - * seconds. Allow tasks running hypercalls via the privcmd driver to - * be voluntarily preempted even if full kernel preemption is - * disabled. - * - * Such preemptible hypercalls are bracketed by - * xen_preemptible_hcall_begin() and xen_preemptible_hcall_end() - * calls. - */ - -DEFINE_PER_CPU(bool, xen_in_preemptible_hcall); -EXPORT_SYMBOL_GPL(xen_in_preemptible_hcall); - -asmlinkage __visible void xen_maybe_preempt_hcall(void) -{ - if (unlikely(__this_cpu_read(xen_in_preemptible_hcall) - && need_resched())) { - /* - * Clear flag as we may be rescheduled on a different - * cpu. - */ - __this_cpu_write(xen_in_preemptible_hcall, false); - local_irq_enable(); - cond_resched(); - local_irq_disable(); - __this_cpu_write(xen_in_preemptible_hcall, true); - } -} -#endif /* CONFIG_PREEMPTION */ diff --git a/include/xen/xen-ops.h b/include/xen/xen-ops.h index 095be1d66f31..39a5580f8feb 100644 --- a/include/xen/xen-ops.h +++ b/include/xen/xen-ops.h @@ -215,17 +215,7 @@ bool xen_running_on_version_or_later(unsigned int major, unsigned int minor); void xen_efi_runtime_setup(void); -#ifdef CONFIG_PREEMPTION - -static inline void xen_preemptible_hcall_begin(void) -{ -} - -static inline void xen_preemptible_hcall_end(void) -{ -} - -#else +#if defined(CONFIG_XEN_PV) && !defined(CONFIG_PREEMPTION) DECLARE_PER_CPU(bool, xen_in_preemptible_hcall); @@ -239,6 +229,11 @@ static inline void xen_preemptible_hcall_end(void) __this_cpu_write(xen_in_preemptible_hcall, false); } -#endif /* CONFIG_PREEMPTION */ +#else + +static inline void xen_preemptible_hcall_begin(void) { } +static inline void xen_preemptible_hcall_end(void) { } + +#endif /* CONFIG_XEN_PV && !CONFIG_PREEMPTION */ #endif /* INCLUDE_XEN_OPS_H */ -- cgit v1.2.3 From 00cf8baf9c2af3c17f9d77bb9d07d44d330d0df2 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Thu, 21 May 2020 22:05:27 +0200 Subject: x86/entry/64: Simplify idtentry_body All C functions which do not have an error code have been converted to the new IDTENTRY interface which does not expect an error code in the arguments. Spare the XORL. Signed-off-by: Thomas Gleixner Signed-off-by: Ingo Molnar Acked-by: Andy Lutomirski Link: https://lore.kernel.org/r/20200521202118.145811853@linutronix.de --- arch/x86/entry/entry_64.S | 2 -- 1 file changed, 2 deletions(-) (limited to 'arch') diff --git a/arch/x86/entry/entry_64.S b/arch/x86/entry/entry_64.S index dadb37d07266..b70c7788ef08 100644 --- a/arch/x86/entry/entry_64.S +++ b/arch/x86/entry/entry_64.S @@ -531,8 +531,6 @@ SYM_CODE_END(spurious_entries_start) .if \has_error_code == 1 movq ORIG_RAX(%rsp), %rsi /* get error code into 2nd argument*/ movq $-1, ORIG_RAX(%rsp) /* no syscall to restart */ - .else - xorl %esi, %esi /* Clear the error code */ .endif .if \vector == X86_TRAP_PF -- cgit v1.2.3 From 91eeafea1e4b7c95cc4f38af186d7d48fceef89a Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Thu, 21 May 2020 22:05:28 +0200 Subject: x86/entry: Switch page fault exception to IDTENTRY_RAW Convert page fault exceptions to IDTENTRY_RAW: - Implement the C entry point with DEFINE_IDTENTRY_RAW - Add the CR2 read into the exception handler - Add the idtentry_enter/exit_cond_rcu() invocations in in the regular page fault handler and in the async PF part. - Emit the ASM stub with DECLARE_IDTENTRY_RAW - Remove the ASM idtentry in 64-bit - Remove the CR2 read from 64-bit - Remove the open coded ASM entry code in 32-bit - Fix up the XEN/PV code - Remove the old prototypes No functional change. Signed-off-by: Thomas Gleixner Signed-off-by: Ingo Molnar Acked-by: Andy Lutomirski Link: https://lore.kernel.org/r/20200521202118.238455120@linutronix.de --- arch/x86/entry/entry_32.S | 30 ------------------ arch/x86/entry/entry_64.S | 19 ------------ arch/x86/include/asm/idtentry.h | 3 +- arch/x86/include/asm/traps.h | 11 ------- arch/x86/kernel/idt.c | 4 +-- arch/x86/kernel/kvm.c | 15 +++++---- arch/x86/mm/fault.c | 69 ++++++++++++++++++++++++++++------------- arch/x86/xen/enlighten_pv.c | 2 +- arch/x86/xen/xen-asm_64.S | 2 +- 9 files changed, 63 insertions(+), 92 deletions(-) (limited to 'arch') diff --git a/arch/x86/entry/entry_32.S b/arch/x86/entry/entry_32.S index 3ab04dca9aab..660ed3ed37dc 100644 --- a/arch/x86/entry/entry_32.S +++ b/arch/x86/entry/entry_32.S @@ -1398,36 +1398,6 @@ BUILD_INTERRUPT3(hv_stimer0_callback_vector, HYPERV_STIMER0_VECTOR, #endif /* CONFIG_HYPERV */ -SYM_CODE_START(page_fault) - ASM_CLAC - pushl $do_page_fault - jmp common_exception_read_cr2 -SYM_CODE_END(page_fault) - -SYM_CODE_START_LOCAL_NOALIGN(common_exception_read_cr2) - /* the function address is in %gs's slot on the stack */ - SAVE_ALL switch_stacks=1 skip_gs=1 unwind_espfix=1 - - ENCODE_FRAME_POINTER - - /* fixup %gs */ - GS_TO_REG %ecx - movl PT_GS(%esp), %edi - REG_TO_PTGS %ecx - SET_KERNEL_GS %ecx - - GET_CR2_INTO(%ecx) # might clobber %eax - - /* fixup orig %eax */ - movl PT_ORIG_EAX(%esp), %edx # get the error code - movl $-1, PT_ORIG_EAX(%esp) # no syscall to restart - - TRACE_IRQS_OFF - movl %esp, %eax # pt_regs pointer - CALL_NOSPEC edi - jmp ret_from_exception -SYM_CODE_END(common_exception_read_cr2) - SYM_CODE_START_LOCAL_NOALIGN(common_exception) /* the function address is in %gs's slot on the stack */ SAVE_ALL switch_stacks=1 skip_gs=1 unwind_espfix=1 diff --git a/arch/x86/entry/entry_64.S b/arch/x86/entry/entry_64.S index b70c7788ef08..5789f76932b6 100644 --- a/arch/x86/entry/entry_64.S +++ b/arch/x86/entry/entry_64.S @@ -506,15 +506,6 @@ SYM_CODE_END(spurious_entries_start) call error_entry UNWIND_HINT_REGS - .if \vector == X86_TRAP_PF - /* - * Store CR2 early so subsequent faults cannot clobber it. Use R12 as - * intermediate storage as RDX can be clobbered in enter_from_user_mode(). - * GET_CR2_INTO can clobber RAX. - */ - GET_CR2_INTO(%r12); - .endif - .if \sane == 0 TRACE_IRQS_OFF @@ -533,10 +524,6 @@ SYM_CODE_END(spurious_entries_start) movq $-1, ORIG_RAX(%rsp) /* no syscall to restart */ .endif - .if \vector == X86_TRAP_PF - movq %r12, %rdx /* Move CR2 into 3rd argument */ - .endif - call \cfunc .if \sane == 0 @@ -1059,12 +1046,6 @@ apicinterrupt SPURIOUS_APIC_VECTOR spurious_interrupt smp_spurious_interrupt apicinterrupt IRQ_WORK_VECTOR irq_work_interrupt smp_irq_work_interrupt #endif -/* - * Exception entry points. - */ - -idtentry X86_TRAP_PF page_fault do_page_fault has_error_code=1 - /* * Reload gs selector with exception handling * edi: new selector diff --git a/arch/x86/include/asm/idtentry.h b/arch/x86/include/asm/idtentry.h index b2a5fe02dcf0..9ec5466e4c05 100644 --- a/arch/x86/include/asm/idtentry.h +++ b/arch/x86/include/asm/idtentry.h @@ -387,7 +387,8 @@ DECLARE_IDTENTRY_ERRORCODE(X86_TRAP_GP, exc_general_protection); DECLARE_IDTENTRY_ERRORCODE(X86_TRAP_AC, exc_alignment_check); /* Raw exception entries which need extra work */ -DECLARE_IDTENTRY_RAW(X86_TRAP_BP, exc_int3); +DECLARE_IDTENTRY_RAW(X86_TRAP_BP, exc_int3); +DECLARE_IDTENTRY_RAW_ERRORCODE(X86_TRAP_PF, exc_page_fault); #ifdef CONFIG_X86_MCE DECLARE_IDTENTRY_MCE(X86_TRAP_MC, exc_machine_check); diff --git a/arch/x86/include/asm/traps.h b/arch/x86/include/asm/traps.h index f5a2e438a878..d7de360eec74 100644 --- a/arch/x86/include/asm/traps.h +++ b/arch/x86/include/asm/traps.h @@ -9,17 +9,6 @@ #include #include /* TRAP_TRACE, ... */ -#define dotraplinkage __visible - -asmlinkage void page_fault(void); -asmlinkage void async_page_fault(void); - -#if defined(CONFIG_X86_64) && defined(CONFIG_XEN_PV) -asmlinkage void xen_page_fault(void); -#endif - -dotraplinkage void do_page_fault(struct pt_regs *regs, unsigned long error_code, unsigned long address); - #ifdef CONFIG_X86_64 asmlinkage __visible notrace struct pt_regs *sync_regs(struct pt_regs *eregs); asmlinkage __visible notrace diff --git a/arch/x86/kernel/idt.c b/arch/x86/kernel/idt.c index ec55479e1dd1..ddb11154aeee 100644 --- a/arch/x86/kernel/idt.c +++ b/arch/x86/kernel/idt.c @@ -62,7 +62,7 @@ static const __initconst struct idt_data early_idts[] = { INTG(X86_TRAP_DB, asm_exc_debug), SYSG(X86_TRAP_BP, asm_exc_int3), #ifdef CONFIG_X86_32 - INTG(X86_TRAP_PF, page_fault), + INTG(X86_TRAP_PF, asm_exc_page_fault), #endif }; @@ -156,7 +156,7 @@ static const __initconst struct idt_data apic_idts[] = { * stacks work only after cpu_init(). */ static const __initconst struct idt_data early_pf_idts[] = { - INTG(X86_TRAP_PF, page_fault), + INTG(X86_TRAP_PF, asm_exc_page_fault), }; /* diff --git a/arch/x86/kernel/kvm.c b/arch/x86/kernel/kvm.c index d6f22a3a1f7d..d00f7c430e65 100644 --- a/arch/x86/kernel/kvm.c +++ b/arch/x86/kernel/kvm.c @@ -218,7 +218,7 @@ again: } EXPORT_SYMBOL_GPL(kvm_async_pf_task_wake); -u32 kvm_read_and_reset_apf_flags(void) +noinstr u32 kvm_read_and_reset_apf_flags(void) { u32 flags = 0; @@ -230,11 +230,11 @@ u32 kvm_read_and_reset_apf_flags(void) return flags; } EXPORT_SYMBOL_GPL(kvm_read_and_reset_apf_flags); -NOKPROBE_SYMBOL(kvm_read_and_reset_apf_flags); -bool __kvm_handle_async_pf(struct pt_regs *regs, u32 token) +noinstr bool __kvm_handle_async_pf(struct pt_regs *regs, u32 token) { u32 reason = kvm_read_and_reset_apf_flags(); + bool rcu_exit; switch (reason) { case KVM_PV_REASON_PAGE_NOT_PRESENT: @@ -244,6 +244,9 @@ bool __kvm_handle_async_pf(struct pt_regs *regs, u32 token) return false; } + rcu_exit = idtentry_enter_cond_rcu(regs); + instrumentation_begin(); + /* * If the host managed to inject an async #PF into an interrupt * disabled region, then die hard as this is not going to end well @@ -258,13 +261,13 @@ bool __kvm_handle_async_pf(struct pt_regs *regs, u32 token) /* Page is swapped out by the host. */ kvm_async_pf_task_wait_schedule(token); } else { - rcu_irq_enter(); kvm_async_pf_task_wake(token); - rcu_irq_exit(); } + + instrumentation_end(); + idtentry_exit_cond_rcu(regs, rcu_exit); return true; } -NOKPROBE_SYMBOL(__kvm_handle_async_pf); static void __init paravirt_ops_setup(void) { diff --git a/arch/x86/mm/fault.c b/arch/x86/mm/fault.c index d7b52a2a1bce..eef29bb53cd0 100644 --- a/arch/x86/mm/fault.c +++ b/arch/x86/mm/fault.c @@ -1357,11 +1357,38 @@ trace_page_fault_entries(struct pt_regs *regs, unsigned long error_code, trace_page_fault_kernel(address, regs, error_code); } -dotraplinkage void -do_page_fault(struct pt_regs *regs, unsigned long hw_error_code, - unsigned long address) +static __always_inline void +handle_page_fault(struct pt_regs *regs, unsigned long error_code, + unsigned long address) +{ + trace_page_fault_entries(regs, error_code, address); + + if (unlikely(kmmio_fault(regs, address))) + return; + + /* Was the fault on kernel-controlled part of the address space? */ + if (unlikely(fault_in_kernel_space(address))) { + do_kern_addr_fault(regs, error_code, address); + } else { + do_user_addr_fault(regs, error_code, address); + /* + * User address page fault handling might have reenabled + * interrupts. Fixing up all potential exit points of + * do_user_addr_fault() and its leaf functions is just not + * doable w/o creating an unholy mess or turning the code + * upside down. + */ + local_irq_disable(); + } +} + +DEFINE_IDTENTRY_RAW_ERRORCODE(exc_page_fault) { + unsigned long address = read_cr2(); + bool rcu_exit; + prefetchw(¤t->mm->mmap_lock); + /* * KVM has two types of events that are, logically, interrupts, but * are unfortunately delivered using the #PF vector. These events are @@ -1376,28 +1403,28 @@ do_page_fault(struct pt_regs *regs, unsigned long hw_error_code, * getting values from real and async page faults mixed up. * * Fingers crossed. + * + * The async #PF handling code takes care of idtentry handling + * itself. */ if (kvm_handle_async_pf(regs, (u32)address)) return; - trace_page_fault_entries(regs, hw_error_code, address); + /* + * Entry handling for valid #PF from kernel mode is slightly + * different: RCU is already watching and rcu_irq_enter() must not + * be invoked because a kernel fault on a user space address might + * sleep. + * + * In case the fault hit a RCU idle region the conditional entry + * code reenabled RCU to avoid subsequent wreckage which helps + * debugability. + */ + rcu_exit = idtentry_enter_cond_rcu(regs); - if (unlikely(kmmio_fault(regs, address))) - return; + instrumentation_begin(); + handle_page_fault(regs, error_code, address); + instrumentation_end(); - /* Was the fault on kernel-controlled part of the address space? */ - if (unlikely(fault_in_kernel_space(address))) { - do_kern_addr_fault(regs, hw_error_code, address); - } else { - do_user_addr_fault(regs, hw_error_code, address); - /* - * User address page fault handling might have reenabled - * interrupts. Fixing up all potential exit points of - * do_user_addr_fault() and its leaf functions is just not - * doable w/o creating an unholy mess or turning the code - * upside down. - */ - local_irq_disable(); - } + idtentry_exit_cond_rcu(regs, rcu_exit); } -NOKPROBE_SYMBOL(do_page_fault); diff --git a/arch/x86/xen/enlighten_pv.c b/arch/x86/xen/enlighten_pv.c index 008291121cb4..33b309d65955 100644 --- a/arch/x86/xen/enlighten_pv.c +++ b/arch/x86/xen/enlighten_pv.c @@ -626,7 +626,7 @@ static struct trap_array_entry trap_array[] = { #ifdef CONFIG_IA32_EMULATION { entry_INT80_compat, xen_entry_INT80_compat, false }, #endif - { page_fault, xen_page_fault, false }, + TRAP_ENTRY(exc_page_fault, false ), TRAP_ENTRY(exc_divide_error, false ), TRAP_ENTRY(exc_bounds, false ), TRAP_ENTRY(exc_invalid_op, false ), diff --git a/arch/x86/xen/xen-asm_64.S b/arch/x86/xen/xen-asm_64.S index 19fbbdbcbde9..5d252aaeade8 100644 --- a/arch/x86/xen/xen-asm_64.S +++ b/arch/x86/xen/xen-asm_64.S @@ -43,7 +43,7 @@ xen_pv_trap asm_exc_invalid_tss xen_pv_trap asm_exc_segment_not_present xen_pv_trap asm_exc_stack_segment xen_pv_trap asm_exc_general_protection -xen_pv_trap page_fault +xen_pv_trap asm_exc_page_fault xen_pv_trap asm_exc_spurious_interrupt_bug xen_pv_trap asm_exc_coprocessor_error xen_pv_trap asm_exc_alignment_check -- cgit v1.2.3 From e2dcb5f1390715244aec12dbd6f294863ca37b88 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Thu, 21 May 2020 22:05:29 +0200 Subject: x86/entry: Remove the transition leftovers Now that all exceptions are converted over the sane flag is not longer needed. Also the vector argument of idtentry_body on 64-bit is pointless now. Signed-off-by: Thomas Gleixner Signed-off-by: Ingo Molnar Acked-by: Andy Lutomirski Link: https://lore.kernel.org/r/20200521202118.331115895@linutronix.de --- arch/x86/entry/entry_32.S | 3 +-- arch/x86/entry/entry_64.S | 26 ++++---------------------- arch/x86/include/asm/idtentry.h | 6 +++--- 3 files changed, 8 insertions(+), 27 deletions(-) (limited to 'arch') diff --git a/arch/x86/entry/entry_32.S b/arch/x86/entry/entry_32.S index 660ed3ed37dc..6c6ae3a8c1fc 100644 --- a/arch/x86/entry/entry_32.S +++ b/arch/x86/entry/entry_32.S @@ -734,9 +734,8 @@ * @asmsym: ASM symbol for the entry point * @cfunc: C function to be called * @has_error_code: Hardware pushed error code on stack - * @sane: Compatibility flag with 64bit */ -.macro idtentry vector asmsym cfunc has_error_code:req sane=0 +.macro idtentry vector asmsym cfunc has_error_code:req SYM_CODE_START(\asmsym) ASM_CLAC cld diff --git a/arch/x86/entry/entry_64.S b/arch/x86/entry/entry_64.S index 5789f76932b6..2e476f488ace 100644 --- a/arch/x86/entry/entry_64.S +++ b/arch/x86/entry/entry_64.S @@ -496,27 +496,14 @@ SYM_CODE_END(spurious_entries_start) /** * idtentry_body - Macro to emit code calling the C function - * @vector: Vector number * @cfunc: C function to be called * @has_error_code: Hardware pushed error code on stack - * @sane: Sane variant which handles irq tracing, context tracking in C */ -.macro idtentry_body vector cfunc has_error_code:req sane=0 +.macro idtentry_body cfunc has_error_code:req call error_entry UNWIND_HINT_REGS - .if \sane == 0 - TRACE_IRQS_OFF - -#ifdef CONFIG_CONTEXT_TRACKING - testb $3, CS(%rsp) - jz .Lfrom_kernel_no_ctxt_tracking_\@ - CALL_enter_from_user_mode -.Lfrom_kernel_no_ctxt_tracking_\@: -#endif - .endif - movq %rsp, %rdi /* pt_regs pointer into 1st argument*/ .if \has_error_code == 1 @@ -526,11 +513,7 @@ SYM_CODE_END(spurious_entries_start) call \cfunc - .if \sane == 0 - jmp error_exit - .else jmp error_return - .endif .endm /** @@ -539,12 +522,11 @@ SYM_CODE_END(spurious_entries_start) * @asmsym: ASM symbol for the entry point * @cfunc: C function to be called * @has_error_code: Hardware pushed error code on stack - * @sane: Sane variant which handles irq tracing, context tracking in C * * The macro emits code to set up the kernel context for straight forward * and simple IDT entries. No IST stack, no paranoid entry checks. */ -.macro idtentry vector asmsym cfunc has_error_code:req sane=0 +.macro idtentry vector asmsym cfunc has_error_code:req SYM_CODE_START(\asmsym) UNWIND_HINT_IRET_REGS offset=\has_error_code*8 ASM_CLAC @@ -567,7 +549,7 @@ SYM_CODE_START(\asmsym) .Lfrom_usermode_no_gap_\@: .endif - idtentry_body \vector \cfunc \has_error_code \sane + idtentry_body \cfunc \has_error_code _ASM_NOKPROBE(\asmsym) SYM_CODE_END(\asmsym) @@ -642,7 +624,7 @@ SYM_CODE_START(\asmsym) /* Switch to the regular task stack and use the noist entry point */ .Lfrom_usermode_switch_stack_\@: - idtentry_body vector noist_\cfunc, has_error_code=0 sane=1 + idtentry_body noist_\cfunc, has_error_code=0 _ASM_NOKPROBE(\asmsym) SYM_CODE_END(\asmsym) diff --git a/arch/x86/include/asm/idtentry.h b/arch/x86/include/asm/idtentry.h index 9ec5466e4c05..36e5b929389b 100644 --- a/arch/x86/include/asm/idtentry.h +++ b/arch/x86/include/asm/idtentry.h @@ -298,10 +298,10 @@ __visible noinstr void func(struct pt_regs *regs, \ * The ASM variants for DECLARE_IDTENTRY*() which emit the ASM entry stubs. */ #define DECLARE_IDTENTRY(vector, func) \ - idtentry vector asm_##func func has_error_code=0 sane=1 + idtentry vector asm_##func func has_error_code=0 #define DECLARE_IDTENTRY_ERRORCODE(vector, func) \ - idtentry vector asm_##func func has_error_code=1 sane=1 + idtentry vector asm_##func func has_error_code=1 /* Special case for 32bit IRET 'trap'. Do not emit ASM code */ #define DECLARE_IDTENTRY_SW(vector, func) @@ -345,7 +345,7 @@ __visible noinstr void func(struct pt_regs *regs, \ /* XEN NMI and DB wrapper */ #define DECLARE_IDTENTRY_XEN(vector, func) \ - idtentry vector asm_exc_xen##func exc_##func has_error_code=0 sane=1 + idtentry vector asm_exc_xen##func exc_##func has_error_code=0 #endif /* __ASSEMBLY__ */ -- cgit v1.2.3 From e88d974136dbb5d6962eeb63075900603e737a1e Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Thu, 21 May 2020 22:05:30 +0200 Subject: x86/entry: Change exit path of xen_failsafe_callback xen_failsafe_callback() is invoked from XEN for two cases: 1. Fault while reloading DS, ES, FS or GS 2. Fault while executing IRET #1 retries the IRET after XEN has fixed up the segments. #2 injects a #GP which kills the task For #1 there is no reason to go through the full exception return path because the tasks TIF state is still the same. So just going straight to the IRET path is good enough. Signed-off-by: Thomas Gleixner Signed-off-by: Ingo Molnar Acked-by: Andy Lutomirski Link: https://lore.kernel.org/r/20200521202118.423224507@linutronix.de --- arch/x86/entry/entry_32.S | 2 +- arch/x86/entry/entry_64.S | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) (limited to 'arch') diff --git a/arch/x86/entry/entry_32.S b/arch/x86/entry/entry_32.S index 6c6ae3a8c1fc..6fcdee9feba0 100644 --- a/arch/x86/entry/entry_32.S +++ b/arch/x86/entry/entry_32.S @@ -1355,7 +1355,7 @@ SYM_FUNC_START(xen_failsafe_callback) 5: pushl $-1 /* orig_ax = -1 => not a system call */ SAVE_ALL ENCODE_FRAME_POINTER - jmp ret_from_exception + jmp handle_exception_return .section .fixup, "ax" 6: xorl %eax, %eax diff --git a/arch/x86/entry/entry_64.S b/arch/x86/entry/entry_64.S index 2e476f488ace..a526fb57b65d 100644 --- a/arch/x86/entry/entry_64.S +++ b/arch/x86/entry/entry_64.S @@ -1175,7 +1175,7 @@ SYM_CODE_START(xen_failsafe_callback) pushq $-1 /* orig_ax = -1 => not a system call */ PUSH_AND_CLEAR_REGS ENCODE_FRAME_POINTER - jmp error_exit + jmp error_return SYM_CODE_END(xen_failsafe_callback) #endif /* CONFIG_XEN_PV */ -- cgit v1.2.3 From 23d73f2ad4e7e7ce8bebdd1e138c8c79439dc301 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Thu, 21 May 2020 22:05:31 +0200 Subject: x86/entry/64: Remove error_exit() No more users. Signed-off-by: Thomas Gleixner Signed-off-by: Ingo Molnar Acked-by: Andy Lutomirski Link: https://lore.kernel.org/r/20200521202118.516757524@linutronix.de --- arch/x86/entry/entry_64.S | 9 --------- 1 file changed, 9 deletions(-) (limited to 'arch') diff --git a/arch/x86/entry/entry_64.S b/arch/x86/entry/entry_64.S index a526fb57b65d..76993591fdf6 100644 --- a/arch/x86/entry/entry_64.S +++ b/arch/x86/entry/entry_64.S @@ -1356,15 +1356,6 @@ SYM_CODE_START_LOCAL(error_entry) jmp .Lerror_entry_from_usermode_after_swapgs SYM_CODE_END(error_entry) -SYM_CODE_START_LOCAL(error_exit) - UNWIND_HINT_REGS - DISABLE_INTERRUPTS(CLBR_ANY) - TRACE_IRQS_OFF - testb $3, CS(%rsp) - jz retint_kernel - jmp .Lretint_user -SYM_CODE_END(error_exit) - SYM_CODE_START_LOCAL(error_return) UNWIND_HINT_REGS DEBUG_ENTRY_ASSERT_IRQS_OFF -- cgit v1.2.3 From 74ebed3193aa4964b6cb9d146c9c01c7759ef4f2 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Thu, 21 May 2020 22:05:32 +0200 Subject: x86/entry/32: Remove common_exception() No more users. Signed-off-by: Thomas Gleixner Signed-off-by: Ingo Molnar Acked-by: Andy Lutomirski Link: https://lore.kernel.org/r/20200521202118.611906966@linutronix.de --- arch/x86/entry/entry_32.S | 21 --------------------- 1 file changed, 21 deletions(-) (limited to 'arch') diff --git a/arch/x86/entry/entry_32.S b/arch/x86/entry/entry_32.S index 6fcdee9feba0..158a5250ebc5 100644 --- a/arch/x86/entry/entry_32.S +++ b/arch/x86/entry/entry_32.S @@ -1397,27 +1397,6 @@ BUILD_INTERRUPT3(hv_stimer0_callback_vector, HYPERV_STIMER0_VECTOR, #endif /* CONFIG_HYPERV */ -SYM_CODE_START_LOCAL_NOALIGN(common_exception) - /* the function address is in %gs's slot on the stack */ - SAVE_ALL switch_stacks=1 skip_gs=1 unwind_espfix=1 - ENCODE_FRAME_POINTER - - /* fixup %gs */ - GS_TO_REG %ecx - movl PT_GS(%esp), %edi # get the function address - REG_TO_PTGS %ecx - SET_KERNEL_GS %ecx - - /* fixup orig %eax */ - movl PT_ORIG_EAX(%esp), %edx # get the error code - movl $-1, PT_ORIG_EAX(%esp) # no syscall to restart - - TRACE_IRQS_OFF - movl %esp, %eax # pt_regs pointer - CALL_NOSPEC edi - jmp ret_from_exception -SYM_CODE_END(common_exception) - SYM_CODE_START_LOCAL_NOALIGN(handle_exception) /* the function address is in %gs's slot on the stack */ SAVE_ALL switch_stacks=1 skip_gs=1 unwind_espfix=1 -- cgit v1.2.3 From 79b9c183021ef3f5ca2d5168cd3fd442580eca09 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Thu, 21 May 2020 22:05:33 +0200 Subject: x86/irq: Use generic irq_regs implementation The only difference is the name of the per-CPU variable: irq_regs vs. __irq_regs, but the accessor functions are identical. Remove the pointless copy and use the generic variant. Signed-off-by: Thomas Gleixner Signed-off-by: Ingo Molnar Acked-by: Andy Lutomirski Link: https://lore.kernel.org/r/20200521202118.704169051@linutronix.de --- arch/x86/include/asm/irq_regs.h | 32 -------------------------------- arch/x86/kernel/irq.c | 3 --- 2 files changed, 35 deletions(-) delete mode 100644 arch/x86/include/asm/irq_regs.h (limited to 'arch') diff --git a/arch/x86/include/asm/irq_regs.h b/arch/x86/include/asm/irq_regs.h deleted file mode 100644 index 187ce59aea28..000000000000 --- a/arch/x86/include/asm/irq_regs.h +++ /dev/null @@ -1,32 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -/* - * Per-cpu current frame pointer - the location of the last exception frame on - * the stack, stored in the per-cpu area. - * - * Jeremy Fitzhardinge - */ -#ifndef _ASM_X86_IRQ_REGS_H -#define _ASM_X86_IRQ_REGS_H - -#include - -#define ARCH_HAS_OWN_IRQ_REGS - -DECLARE_PER_CPU(struct pt_regs *, irq_regs); - -static inline struct pt_regs *get_irq_regs(void) -{ - return __this_cpu_read(irq_regs); -} - -static inline struct pt_regs *set_irq_regs(struct pt_regs *new_regs) -{ - struct pt_regs *old_regs; - - old_regs = get_irq_regs(); - __this_cpu_write(irq_regs, new_regs); - - return old_regs; -} - -#endif /* _ASM_X86_IRQ_REGS_32_H */ diff --git a/arch/x86/kernel/irq.c b/arch/x86/kernel/irq.c index c7965ff429c5..252065d32ab5 100644 --- a/arch/x86/kernel/irq.c +++ b/arch/x86/kernel/irq.c @@ -26,9 +26,6 @@ DEFINE_PER_CPU_SHARED_ALIGNED(irq_cpustat_t, irq_stat); EXPORT_PER_CPU_SYMBOL(irq_stat); -DEFINE_PER_CPU(struct pt_regs *, irq_regs); -EXPORT_PER_CPU_SYMBOL(irq_regs); - atomic_t irq_err_count; /* -- cgit v1.2.3 From 633260fa143bbed05e65dc557a492667dfdc45bb Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Thu, 21 May 2020 22:05:34 +0200 Subject: x86/irq: Convey vector as argument and not in ptregs Device interrupts which go through do_IRQ() or the spurious interrupt handler have their separate entry code on 64 bit for no good reason. Both 32 and 64 bit transport the vector number through ORIG_[RE]AX in pt_regs. Further the vector number is forced to fit into an u8 and is complemented and offset by 0x80 so it's in the signed character range. Otherwise GAS would expand the pushq to a 5 byte instruction for any vector > 0x7F. Treat the vector number like an error code and hand it to the C function as argument. This allows to get rid of the extra entry code in a later step. Simplify the error code push magic by implementing the pushq imm8 via a '.byte 0x6a, vector' sequence so GAS is not able to screw it up. As the pushq imm8 is sign extending the resulting error code needs to be truncated to 8 bits in C code. Originally-by: Andy Lutomirski Signed-off-by: Thomas Gleixner Signed-off-by: Ingo Molnar Acked-by: Andy Lutomirski Link: https://lore.kernel.org/r/20200521202118.796915981@linutronix.de --- arch/x86/entry/calling.h | 5 +++- arch/x86/entry/entry_32.S | 33 ++++----------------------- arch/x86/entry/entry_64.S | 40 ++++++-------------------------- arch/x86/include/asm/entry_arch.h | 2 +- arch/x86/include/asm/hw_irq.h | 1 + arch/x86/include/asm/idtentry.h | 48 +++++++++++++++++++++++++++++++++++++++ arch/x86/include/asm/irq.h | 2 +- arch/x86/include/asm/traps.h | 3 ++- arch/x86/kernel/apic/apic.c | 31 ++++++++++++++++++++----- arch/x86/kernel/idt.c | 2 +- arch/x86/kernel/irq.c | 14 ++++++++---- 11 files changed, 103 insertions(+), 78 deletions(-) (limited to 'arch') diff --git a/arch/x86/entry/calling.h b/arch/x86/entry/calling.h index 1c7f13bb6728..98da0d3c0b1a 100644 --- a/arch/x86/entry/calling.h +++ b/arch/x86/entry/calling.h @@ -341,7 +341,10 @@ For 32-bit we have the following conventions - kernel is built with #endif .endm -#endif /* CONFIG_X86_64 */ +#else /* CONFIG_X86_64 */ +# undef UNWIND_HINT_IRET_REGS +# define UNWIND_HINT_IRET_REGS +#endif /* !CONFIG_X86_64 */ .macro STACKLEAK_ERASE #ifdef CONFIG_GCC_PLUGIN_STACKLEAK diff --git a/arch/x86/entry/entry_32.S b/arch/x86/entry/entry_32.S index 158a5250ebc5..40092c81dcb8 100644 --- a/arch/x86/entry/entry_32.S +++ b/arch/x86/entry/entry_32.S @@ -1215,40 +1215,15 @@ SYM_FUNC_END(entry_INT80_32) #endif .endm -/* - * Build the entry stubs with some assembler magic. - * We pack 1 stub into every 8-byte block. - */ - .align 8 -SYM_CODE_START(irq_entries_start) - vector=FIRST_EXTERNAL_VECTOR - .rept (FIRST_SYSTEM_VECTOR - FIRST_EXTERNAL_VECTOR) - pushl $(~vector+0x80) /* Note: always in signed byte range */ - vector=vector+1 - jmp common_interrupt - .align 8 - .endr -SYM_CODE_END(irq_entries_start) - #ifdef CONFIG_X86_LOCAL_APIC - .align 8 -SYM_CODE_START(spurious_entries_start) - vector=FIRST_SYSTEM_VECTOR - .rept (NR_VECTORS - FIRST_SYSTEM_VECTOR) - pushl $(~vector+0x80) /* Note: always in signed byte range */ - vector=vector+1 - jmp common_spurious - .align 8 - .endr -SYM_CODE_END(spurious_entries_start) - SYM_CODE_START_LOCAL(common_spurious) ASM_CLAC - addl $-0x80, (%esp) /* Adjust vector into the [-256, -1] range */ SAVE_ALL switch_stacks=1 ENCODE_FRAME_POINTER TRACE_IRQS_OFF movl %esp, %eax + movl PT_ORIG_EAX(%esp), %edx /* get the vector from stack */ + movl $-1, PT_ORIG_EAX(%esp) /* no syscall to restart */ call smp_spurious_interrupt jmp ret_from_intr SYM_CODE_END(common_spurious) @@ -1261,12 +1236,12 @@ SYM_CODE_END(common_spurious) .p2align CONFIG_X86_L1_CACHE_SHIFT SYM_CODE_START_LOCAL(common_interrupt) ASM_CLAC - addl $-0x80, (%esp) /* Adjust vector into the [-256, -1] range */ - SAVE_ALL switch_stacks=1 ENCODE_FRAME_POINTER TRACE_IRQS_OFF movl %esp, %eax + movl PT_ORIG_EAX(%esp), %edx /* get the vector from stack */ + movl $-1, PT_ORIG_EAX(%esp) /* no syscall to restart */ call do_IRQ jmp ret_from_intr SYM_CODE_END(common_interrupt) diff --git a/arch/x86/entry/entry_64.S b/arch/x86/entry/entry_64.S index 76993591fdf6..e7434cda9a38 100644 --- a/arch/x86/entry/entry_64.S +++ b/arch/x86/entry/entry_64.S @@ -358,34 +358,6 @@ SYM_CODE_START(ret_from_fork) SYM_CODE_END(ret_from_fork) .popsection -/* - * Build the entry stubs with some assembler magic. - * We pack 1 stub into every 8-byte block. - */ - .align 8 -SYM_CODE_START(irq_entries_start) - vector=FIRST_EXTERNAL_VECTOR - .rept (FIRST_SYSTEM_VECTOR - FIRST_EXTERNAL_VECTOR) - UNWIND_HINT_IRET_REGS - pushq $(~vector+0x80) /* Note: always in signed byte range */ - jmp common_interrupt - .align 8 - vector=vector+1 - .endr -SYM_CODE_END(irq_entries_start) - - .align 8 -SYM_CODE_START(spurious_entries_start) - vector=FIRST_SYSTEM_VECTOR - .rept (NR_VECTORS - FIRST_SYSTEM_VECTOR) - UNWIND_HINT_IRET_REGS - pushq $(~vector+0x80) /* Note: always in signed byte range */ - jmp common_spurious - .align 8 - vector=vector+1 - .endr -SYM_CODE_END(spurious_entries_start) - .macro DEBUG_ENTRY_ASSERT_IRQS_OFF #ifdef CONFIG_DEBUG_ENTRY pushq %rax @@ -755,13 +727,14 @@ _ASM_NOKPROBE(interrupt_entry) /* Interrupt entry/exit. */ /* - * The interrupt stubs push (~vector+0x80) onto the stack and + * The interrupt stubs push vector onto the stack and * then jump to common_spurious/interrupt. */ SYM_CODE_START_LOCAL(common_spurious) - addq $-0x80, (%rsp) /* Adjust vector to [-256, -1] range */ call interrupt_entry UNWIND_HINT_REGS indirect=1 + movq ORIG_RAX(%rdi), %rsi /* get vector from stack */ + movq $-1, ORIG_RAX(%rdi) /* no syscall to restart */ call smp_spurious_interrupt /* rdi points to pt_regs */ jmp ret_from_intr SYM_CODE_END(common_spurious) @@ -770,10 +743,11 @@ _ASM_NOKPROBE(common_spurious) /* common_interrupt is a hotpath. Align it */ .p2align CONFIG_X86_L1_CACHE_SHIFT SYM_CODE_START_LOCAL(common_interrupt) - addq $-0x80, (%rsp) /* Adjust vector to [-256, -1] range */ call interrupt_entry UNWIND_HINT_REGS indirect=1 - call do_IRQ /* rdi points to pt_regs */ + movq ORIG_RAX(%rdi), %rsi /* get vector from stack */ + movq $-1, ORIG_RAX(%rdi) /* no syscall to restart */ + call do_IRQ /* rdi points to pt_regs */ /* 0(%rsp): old RSP */ ret_from_intr: DISABLE_INTERRUPTS(CLBR_ANY) @@ -1022,7 +996,7 @@ apicinterrupt RESCHEDULE_VECTOR reschedule_interrupt smp_reschedule_interrupt #endif apicinterrupt ERROR_APIC_VECTOR error_interrupt smp_error_interrupt -apicinterrupt SPURIOUS_APIC_VECTOR spurious_interrupt smp_spurious_interrupt +apicinterrupt SPURIOUS_APIC_VECTOR spurious_apic_interrupt smp_spurious_apic_interrupt #ifdef CONFIG_IRQ_WORK apicinterrupt IRQ_WORK_VECTOR irq_work_interrupt smp_irq_work_interrupt diff --git a/arch/x86/include/asm/entry_arch.h b/arch/x86/include/asm/entry_arch.h index 416422762845..cd57ce6134c9 100644 --- a/arch/x86/include/asm/entry_arch.h +++ b/arch/x86/include/asm/entry_arch.h @@ -35,7 +35,7 @@ BUILD_INTERRUPT(kvm_posted_intr_nested_ipi, POSTED_INTR_NESTED_VECTOR) BUILD_INTERRUPT(apic_timer_interrupt,LOCAL_TIMER_VECTOR) BUILD_INTERRUPT(error_interrupt,ERROR_APIC_VECTOR) -BUILD_INTERRUPT(spurious_interrupt,SPURIOUS_APIC_VECTOR) +BUILD_INTERRUPT(spurious_apic_interrupt,SPURIOUS_APIC_VECTOR) BUILD_INTERRUPT(x86_platform_ipi, X86_PLATFORM_IPI_VECTOR) #ifdef CONFIG_IRQ_WORK diff --git a/arch/x86/include/asm/hw_irq.h b/arch/x86/include/asm/hw_irq.h index 4154bc5f6a4e..0ffe80792b2d 100644 --- a/arch/x86/include/asm/hw_irq.h +++ b/arch/x86/include/asm/hw_irq.h @@ -39,6 +39,7 @@ extern asmlinkage void irq_work_interrupt(void); extern asmlinkage void uv_bau_message_intr1(void); extern asmlinkage void spurious_interrupt(void); +extern asmlinkage void spurious_apic_interrupt(void); extern asmlinkage void thermal_interrupt(void); extern asmlinkage void reschedule_interrupt(void); diff --git a/arch/x86/include/asm/idtentry.h b/arch/x86/include/asm/idtentry.h index 36e5b929389b..2fc0dc8af2a4 100644 --- a/arch/x86/include/asm/idtentry.h +++ b/arch/x86/include/asm/idtentry.h @@ -347,6 +347,54 @@ __visible noinstr void func(struct pt_regs *regs, \ #define DECLARE_IDTENTRY_XEN(vector, func) \ idtentry vector asm_exc_xen##func exc_##func has_error_code=0 +/* + * ASM code to emit the common vector entry stubs where each stub is + * packed into 8 bytes. + * + * Note, that the 'pushq imm8' is emitted via '.byte 0x6a, vector' because + * GCC treats the local vector variable as unsigned int and would expand + * all vectors above 0x7F to a 5 byte push. The original code did an + * adjustment of the vector number to be in the signed byte range to avoid + * this. While clever it's mindboggling counterintuitive and requires the + * odd conversion back to a real vector number in the C entry points. Using + * .byte achieves the same thing and the only fixup needed in the C entry + * point is to mask off the bits above bit 7 because the push is sign + * extending. + */ + .align 8 +SYM_CODE_START(irq_entries_start) + vector=FIRST_EXTERNAL_VECTOR + pos = . + .rept (FIRST_SYSTEM_VECTOR - FIRST_EXTERNAL_VECTOR) + UNWIND_HINT_IRET_REGS + .byte 0x6a, vector + jmp common_interrupt + nop + /* Ensure that the above is 8 bytes max */ + . = pos + 8 + pos=pos+8 + vector=vector+1 + .endr +SYM_CODE_END(irq_entries_start) + +#ifdef CONFIG_X86_LOCAL_APIC + .align 8 +SYM_CODE_START(spurious_entries_start) + vector=FIRST_SYSTEM_VECTOR + pos = . + .rept (NR_VECTORS - FIRST_SYSTEM_VECTOR) + UNWIND_HINT_IRET_REGS + .byte 0x6a, vector + jmp common_spurious + nop + /* Ensure that the above is 8 bytes max */ + . = pos + 8 + pos=pos+8 + vector=vector+1 + .endr +SYM_CODE_END(spurious_entries_start) +#endif + #endif /* __ASSEMBLY__ */ /* diff --git a/arch/x86/include/asm/irq.h b/arch/x86/include/asm/irq.h index 72fba0eeeb30..74690a373c58 100644 --- a/arch/x86/include/asm/irq.h +++ b/arch/x86/include/asm/irq.h @@ -36,7 +36,7 @@ extern void native_init_IRQ(void); extern void handle_irq(struct irq_desc *desc, struct pt_regs *regs); -extern __visible void do_IRQ(struct pt_regs *regs); +extern __visible void do_IRQ(struct pt_regs *regs, unsigned long vector); extern void init_ISA_irqs(void); diff --git a/arch/x86/include/asm/traps.h b/arch/x86/include/asm/traps.h index d7de360eec74..32b2becf7806 100644 --- a/arch/x86/include/asm/traps.h +++ b/arch/x86/include/asm/traps.h @@ -41,8 +41,9 @@ asmlinkage void smp_deferred_error_interrupt(struct pt_regs *regs); #endif void smp_apic_timer_interrupt(struct pt_regs *regs); -void smp_spurious_interrupt(struct pt_regs *regs); void smp_error_interrupt(struct pt_regs *regs); +void smp_spurious_apic_interrupt(struct pt_regs *regs); +void smp_spurious_interrupt(struct pt_regs *regs, unsigned long vector); asmlinkage void smp_irq_move_cleanup_interrupt(void); #ifdef CONFIG_VMAP_STACK diff --git a/arch/x86/kernel/apic/apic.c b/arch/x86/kernel/apic/apic.c index 4b1d31be50b4..6c2b807a7eae 100644 --- a/arch/x86/kernel/apic/apic.c +++ b/arch/x86/kernel/apic/apic.c @@ -2120,15 +2120,29 @@ void __init register_lapic_address(unsigned long address) * Local APIC interrupts */ -/* - * This interrupt should _never_ happen with our APIC/SMP architecture +/** + * smp_spurious_interrupt - Catch all for interrupts raised on unused vectors + * @regs: Pointer to pt_regs on stack + * @error_code: The vector number is in the lower 8 bits + * + * This is invoked from ASM entry code to catch all interrupts which + * trigger on an entry which is routed to the common_spurious idtentry + * point. + * + * Also called from smp_spurious_apic_interrupt(). */ -__visible void __irq_entry smp_spurious_interrupt(struct pt_regs *regs) +__visible void __irq_entry smp_spurious_interrupt(struct pt_regs *regs, + unsigned long vector) { - u8 vector = ~regs->orig_ax; u32 v; entering_irq(); + /* + * The push in the entry ASM code which stores the vector number on + * the stack in the error code slot is sign expanding. Just use the + * lower 8 bits. + */ + vector &= 0xFF; trace_spurious_apic_entry(vector); inc_irq_stat(irq_spurious_count); @@ -2149,11 +2163,11 @@ __visible void __irq_entry smp_spurious_interrupt(struct pt_regs *regs) */ v = apic_read(APIC_ISR + ((vector & ~0x1f) >> 1)); if (v & (1 << (vector & 0x1f))) { - pr_info("Spurious interrupt (vector 0x%02x) on CPU#%d. Acked\n", + pr_info("Spurious interrupt (vector 0x%02lx) on CPU#%d. Acked\n", vector, smp_processor_id()); ack_APIC_irq(); } else { - pr_info("Spurious interrupt (vector 0x%02x) on CPU#%d. Not pending!\n", + pr_info("Spurious interrupt (vector 0x%02lx) on CPU#%d. Not pending!\n", vector, smp_processor_id()); } out: @@ -2161,6 +2175,11 @@ out: exiting_irq(); } +__visible void smp_spurious_apic_interrupt(struct pt_regs *regs) +{ + smp_spurious_interrupt(regs, SPURIOUS_APIC_VECTOR); +} + /* * This interrupt should never happen with our APIC/SMP architecture */ diff --git a/arch/x86/kernel/idt.c b/arch/x86/kernel/idt.c index ddb11154aeee..20408e31c18d 100644 --- a/arch/x86/kernel/idt.c +++ b/arch/x86/kernel/idt.c @@ -145,7 +145,7 @@ static const __initconst struct idt_data apic_idts[] = { #ifdef CONFIG_X86_UV INTG(UV_BAU_MESSAGE, uv_bau_message_intr1), #endif - INTG(SPURIOUS_APIC_VECTOR, spurious_interrupt), + INTG(SPURIOUS_APIC_VECTOR, spurious_apic_interrupt), INTG(ERROR_APIC_VECTOR, error_interrupt), #endif }; diff --git a/arch/x86/kernel/irq.c b/arch/x86/kernel/irq.c index 252065d32ab5..c7669363251a 100644 --- a/arch/x86/kernel/irq.c +++ b/arch/x86/kernel/irq.c @@ -227,14 +227,18 @@ u64 arch_irq_stat(void) * SMP cross-CPU interrupts have their own specific * handlers). */ -__visible void __irq_entry do_IRQ(struct pt_regs *regs) +__visible void __irq_entry do_IRQ(struct pt_regs *regs, unsigned long vector) { struct pt_regs *old_regs = set_irq_regs(regs); - struct irq_desc * desc; - /* high bit used in ret_from_ code */ - unsigned vector = ~regs->orig_ax; + struct irq_desc *desc; entering_irq(); + /* + * The push in the entry ASM code which stores the vector number on + * the stack in the error code slot is sign expanding. Just use the + * lower 8 bits. + */ + vector &= 0xFF; /* entering_irq() tells RCU that we're not quiescent. Check it. */ RCU_LOCKDEP_WARN(!rcu_is_watching(), "IRQ failed to wake up RCU"); @@ -249,7 +253,7 @@ __visible void __irq_entry do_IRQ(struct pt_regs *regs) ack_APIC_irq(); if (desc == VECTOR_UNUSED) { - pr_emerg_ratelimited("%s: %d.%d No irq handler for vector\n", + pr_emerg_ratelimited("%s: %d.%lu No irq handler for vector\n", __func__, smp_processor_id(), vector); } else { -- cgit v1.2.3 From 7c2a57364cae0f2e070a27d728f1df6844ffff56 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Thu, 21 May 2020 22:05:35 +0200 Subject: x86/irq: Rework handle_irq() for 64-bit To consolidate the interrupt entry/exit code vs. the other exceptions make handle_irq() an inline and handle both 64-bit and 32-bit mode. Preparatory change to move irq stack switching for 64-bit to C which allows to consolidate the entry exit handling by reusing the idtentry machinery both in ASM and C. Signed-off-by: Thomas Gleixner Signed-off-by: Ingo Molnar Acked-by: Andy Lutomirski Link: https://lore.kernel.org/r/20200521202118.889972748@linutronix.de --- arch/x86/include/asm/irq.h | 2 +- arch/x86/kernel/irq.c | 11 ++++++++++- arch/x86/kernel/irq_32.c | 2 +- 3 files changed, 12 insertions(+), 3 deletions(-) (limited to 'arch') diff --git a/arch/x86/include/asm/irq.h b/arch/x86/include/asm/irq.h index 74690a373c58..67aa1e2a5b4a 100644 --- a/arch/x86/include/asm/irq.h +++ b/arch/x86/include/asm/irq.h @@ -34,7 +34,7 @@ extern __visible void smp_kvm_posted_intr_nested_ipi(struct pt_regs *regs); extern void (*x86_platform_ipi_callback)(void); extern void native_init_IRQ(void); -extern void handle_irq(struct irq_desc *desc, struct pt_regs *regs); +extern void __handle_irq(struct irq_desc *desc, struct pt_regs *regs); extern __visible void do_IRQ(struct pt_regs *regs, unsigned long vector); diff --git a/arch/x86/kernel/irq.c b/arch/x86/kernel/irq.c index c7669363251a..5495ea4debba 100644 --- a/arch/x86/kernel/irq.c +++ b/arch/x86/kernel/irq.c @@ -13,6 +13,7 @@ #include #include +#include #include #include #include @@ -221,6 +222,14 @@ u64 arch_irq_stat(void) return sum; } +static __always_inline void handle_irq(struct irq_desc *desc, + struct pt_regs *regs) +{ + if (IS_ENABLED(CONFIG_X86_64)) + run_on_irqstack_cond(desc->handle_irq, desc, regs); + else + __handle_irq(desc, regs); +} /* * do_IRQ handles all normal device IRQ's (the special @@ -246,7 +255,7 @@ __visible void __irq_entry do_IRQ(struct pt_regs *regs, unsigned long vector) desc = __this_cpu_read(vector_irq[vector]); if (likely(!IS_ERR_OR_NULL(desc))) { if (IS_ENABLED(CONFIG_X86_32)) - handle_irq(desc, regs); + __handle_irq(desc, regs); else generic_handle_irq_desc(desc); } else { diff --git a/arch/x86/kernel/irq_32.c b/arch/x86/kernel/irq_32.c index a759ca97cd01..0b79efc87be5 100644 --- a/arch/x86/kernel/irq_32.c +++ b/arch/x86/kernel/irq_32.c @@ -148,7 +148,7 @@ void do_softirq_own_stack(void) call_on_stack(__do_softirq, isp); } -void handle_irq(struct irq_desc *desc, struct pt_regs *regs) +void __handle_irq(struct irq_desc *desc, struct pt_regs *regs) { int overflow = check_stack_overflow(); -- cgit v1.2.3 From 0bf7c314ff68622468945a24ea2f7ebc1edf0a6b Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Thu, 21 May 2020 22:05:36 +0200 Subject: x86/entry: Add IRQENTRY_IRQ macro Provide a seperate IDTENTRY macro for device interrupts. Similar to IDTENTRY_ERRORCODE with the addition of invoking irq_enter/exit_rcu() and providing the errorcode as a 'u8' argument to the C function, which truncates the sign extended vector number. Signed-off-by: Thomas Gleixner Signed-off-by: Ingo Molnar Acked-by: Andy Lutomirski Link: https://lore.kernel.org/r/20200521202118.984573165@linutronix.de --- arch/x86/entry/entry_32.S | 14 ++++++++++++ arch/x86/entry/entry_64.S | 14 ++++++++++++ arch/x86/include/asm/idtentry.h | 49 +++++++++++++++++++++++++++++++++++++++++ 3 files changed, 77 insertions(+) (limited to 'arch') diff --git a/arch/x86/entry/entry_32.S b/arch/x86/entry/entry_32.S index 40092c81dcb8..ba2a70d7118a 100644 --- a/arch/x86/entry/entry_32.S +++ b/arch/x86/entry/entry_32.S @@ -751,6 +751,20 @@ SYM_CODE_START(\asmsym) SYM_CODE_END(\asmsym) .endm +.macro idtentry_irq vector cfunc + .p2align CONFIG_X86_L1_CACHE_SHIFT +SYM_CODE_START_LOCAL(asm_\cfunc) + ASM_CLAC + SAVE_ALL switch_stacks=1 + ENCODE_FRAME_POINTER + movl %esp, %eax + movl PT_ORIG_EAX(%esp), %edx /* get the vector from stack */ + movl $-1, PT_ORIG_EAX(%esp) /* no syscall to restart */ + call \cfunc + jmp handle_exception_return +SYM_CODE_END(asm_\cfunc) +.endm + /* * Include the defines which emit the idt entries which are shared * shared between 32 and 64 bit. diff --git a/arch/x86/entry/entry_64.S b/arch/x86/entry/entry_64.S index e7434cda9a38..9162a073e524 100644 --- a/arch/x86/entry/entry_64.S +++ b/arch/x86/entry/entry_64.S @@ -527,6 +527,20 @@ _ASM_NOKPROBE(\asmsym) SYM_CODE_END(\asmsym) .endm +/* + * Interrupt entry/exit. + * + + The interrupt stubs push (vector) onto the stack, which is the error_code + * position of idtentry exceptions, and jump to one of the two idtentry points + * (common/spurious). + * + * common_interrupt is a hotpath, align it to a cache line + */ +.macro idtentry_irq vector cfunc + .p2align CONFIG_X86_L1_CACHE_SHIFT + idtentry \vector asm_\cfunc \cfunc has_error_code=1 +.endm + /* * MCE and DB exceptions */ diff --git a/arch/x86/include/asm/idtentry.h b/arch/x86/include/asm/idtentry.h index 2fc0dc8af2a4..eaee48bd6a19 100644 --- a/arch/x86/include/asm/idtentry.h +++ b/arch/x86/include/asm/idtentry.h @@ -165,6 +165,51 @@ __visible noinstr void func(struct pt_regs *regs) #define DEFINE_IDTENTRY_RAW_ERRORCODE(func) \ __visible noinstr void func(struct pt_regs *regs, unsigned long error_code) +/** + * DECLARE_IDTENTRY_IRQ - Declare functions for device interrupt IDT entry + * points (common/spurious) + * @vector: Vector number (ignored for C) + * @func: Function name of the entry point + * + * Maps to DECLARE_IDTENTRY_ERRORCODE() + */ +#define DECLARE_IDTENTRY_IRQ(vector, func) \ + DECLARE_IDTENTRY_ERRORCODE(vector, func) + +/** + * DEFINE_IDTENTRY_IRQ - Emit code for device interrupt IDT entry points + * @func: Function name of the entry point + * + * The vector number is pushed by the low level entry stub and handed + * to the function as error_code argument which needs to be truncated + * to an u8 because the push is sign extending. + * + * On 64-bit idtentry_enter/exit() are invoked in the ASM entry code before + * and after switching to the interrupt stack. On 32-bit this happens in C. + * + * irq_enter/exit_rcu() are invoked before the function body and the + * KVM L1D flush request is set. + */ +#define DEFINE_IDTENTRY_IRQ(func) \ +static __always_inline void __##func(struct pt_regs *regs, u8 vector); \ + \ +__visible noinstr void func(struct pt_regs *regs, \ + unsigned long error_code) \ +{ \ + bool rcu_exit = idtentry_enter_cond_rcu(regs); \ + \ + instrumentation_begin(); \ + irq_enter_rcu(); \ + kvm_set_cpu_l1tf_flush_l1d(); \ + __##func (regs, (u8)error_code); \ + irq_exit_rcu(); \ + lockdep_hardirq_exit(); \ + instrumentation_end(); \ + idtentry_exit_cond_rcu(regs, rcu_exit); \ +} \ + \ +static __always_inline void __##func(struct pt_regs *regs, u8 vector) + /** * DECLARE_IDTENTRY_XENCB - Declare functions for XEN HV callback entry point * @vector: Vector number (ignored for C) @@ -312,6 +357,10 @@ __visible noinstr void func(struct pt_regs *regs, \ #define DECLARE_IDTENTRY_RAW_ERRORCODE(vector, func) \ DECLARE_IDTENTRY_ERRORCODE(vector, func) +/* Entries for common/spurious (device) interrupts */ +#define DECLARE_IDTENTRY_IRQ(vector, func) \ + idtentry_irq vector func + #ifdef CONFIG_X86_64 # define DECLARE_IDTENTRY_MCE(vector, func) \ idtentry_mce_db vector asm_##func func -- cgit v1.2.3 From fa5e5c409213265da8a188b4a5e4e641b1382eb4 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Thu, 21 May 2020 22:05:37 +0200 Subject: x86/entry: Use idtentry for interrupts Replace the extra interrupt handling code and reuse the existing idtentry machinery. This moves the irq stack switching on 64-bit from ASM to C code; 32-bit already does the stack switching in C. This requires to remove HAVE_IRQ_EXIT_ON_IRQ_STACK as the stack switch is not longer in the low level entry code. Signed-off-by: Thomas Gleixner Signed-off-by: Ingo Molnar Acked-by: Andy Lutomirski Link: https://lore.kernel.org/r/20200521202119.078690991@linutronix.de --- arch/x86/Kconfig | 1 - arch/x86/entry/entry_32.S | 31 ------------------------------- arch/x86/entry/entry_64.S | 31 +++---------------------------- arch/x86/include/asm/hw_irq.h | 1 - arch/x86/include/asm/idtentry.h | 10 ++++++++-- arch/x86/include/asm/traps.h | 1 - arch/x86/kernel/apic/apic.c | 23 ++++++++--------------- arch/x86/kernel/apic/msi.c | 3 ++- arch/x86/kernel/irq.c | 27 +++++++-------------------- 9 files changed, 28 insertions(+), 100 deletions(-) (limited to 'arch') diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index 10dae8b96ed5..a16c45460f1b 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -181,7 +181,6 @@ config X86 select HAVE_HW_BREAKPOINT select HAVE_IDE select HAVE_IOREMAP_PROT - select HAVE_IRQ_EXIT_ON_IRQ_STACK if X86_64 select HAVE_IRQ_TIME_ACCOUNTING select HAVE_KERNEL_BZIP2 select HAVE_KERNEL_GZIP diff --git a/arch/x86/entry/entry_32.S b/arch/x86/entry/entry_32.S index ba2a70d7118a..b47b7b223811 100644 --- a/arch/x86/entry/entry_32.S +++ b/arch/x86/entry/entry_32.S @@ -1229,37 +1229,6 @@ SYM_FUNC_END(entry_INT80_32) #endif .endm -#ifdef CONFIG_X86_LOCAL_APIC -SYM_CODE_START_LOCAL(common_spurious) - ASM_CLAC - SAVE_ALL switch_stacks=1 - ENCODE_FRAME_POINTER - TRACE_IRQS_OFF - movl %esp, %eax - movl PT_ORIG_EAX(%esp), %edx /* get the vector from stack */ - movl $-1, PT_ORIG_EAX(%esp) /* no syscall to restart */ - call smp_spurious_interrupt - jmp ret_from_intr -SYM_CODE_END(common_spurious) -#endif - -/* - * the CPU automatically disables interrupts when executing an IRQ vector, - * so IRQ-flags tracing has to follow that: - */ - .p2align CONFIG_X86_L1_CACHE_SHIFT -SYM_CODE_START_LOCAL(common_interrupt) - ASM_CLAC - SAVE_ALL switch_stacks=1 - ENCODE_FRAME_POINTER - TRACE_IRQS_OFF - movl %esp, %eax - movl PT_ORIG_EAX(%esp), %edx /* get the vector from stack */ - movl $-1, PT_ORIG_EAX(%esp) /* no syscall to restart */ - call do_IRQ - jmp ret_from_intr -SYM_CODE_END(common_interrupt) - #define BUILD_INTERRUPT3(name, nr, fn) \ SYM_FUNC_START(name) \ ASM_CLAC; \ diff --git a/arch/x86/entry/entry_64.S b/arch/x86/entry/entry_64.S index 9162a073e524..e54bcd3244f8 100644 --- a/arch/x86/entry/entry_64.S +++ b/arch/x86/entry/entry_64.S @@ -737,32 +737,7 @@ SYM_CODE_START(interrupt_entry) SYM_CODE_END(interrupt_entry) _ASM_NOKPROBE(interrupt_entry) - -/* Interrupt entry/exit. */ - -/* - * The interrupt stubs push vector onto the stack and - * then jump to common_spurious/interrupt. - */ -SYM_CODE_START_LOCAL(common_spurious) - call interrupt_entry - UNWIND_HINT_REGS indirect=1 - movq ORIG_RAX(%rdi), %rsi /* get vector from stack */ - movq $-1, ORIG_RAX(%rdi) /* no syscall to restart */ - call smp_spurious_interrupt /* rdi points to pt_regs */ - jmp ret_from_intr -SYM_CODE_END(common_spurious) -_ASM_NOKPROBE(common_spurious) - -/* common_interrupt is a hotpath. Align it */ - .p2align CONFIG_X86_L1_CACHE_SHIFT -SYM_CODE_START_LOCAL(common_interrupt) - call interrupt_entry - UNWIND_HINT_REGS indirect=1 - movq ORIG_RAX(%rdi), %rsi /* get vector from stack */ - movq $-1, ORIG_RAX(%rdi) /* no syscall to restart */ - call do_IRQ /* rdi points to pt_regs */ - /* 0(%rsp): old RSP */ +SYM_CODE_START_LOCAL(common_interrupt_return) ret_from_intr: DISABLE_INTERRUPTS(CLBR_ANY) TRACE_IRQS_OFF @@ -945,8 +920,8 @@ native_irq_return_ldt: */ jmp native_irq_return_iret #endif -SYM_CODE_END(common_interrupt) -_ASM_NOKPROBE(common_interrupt) +SYM_CODE_END(common_interrupt_return) +_ASM_NOKPROBE(common_interrupt_return) /* * APIC interrupts. diff --git a/arch/x86/include/asm/hw_irq.h b/arch/x86/include/asm/hw_irq.h index 0ffe80792b2d..3213d36b92d3 100644 --- a/arch/x86/include/asm/hw_irq.h +++ b/arch/x86/include/asm/hw_irq.h @@ -38,7 +38,6 @@ extern asmlinkage void error_interrupt(void); extern asmlinkage void irq_work_interrupt(void); extern asmlinkage void uv_bau_message_intr1(void); -extern asmlinkage void spurious_interrupt(void); extern asmlinkage void spurious_apic_interrupt(void); extern asmlinkage void thermal_interrupt(void); extern asmlinkage void reschedule_interrupt(void); diff --git a/arch/x86/include/asm/idtentry.h b/arch/x86/include/asm/idtentry.h index eaee48bd6a19..341888184d1c 100644 --- a/arch/x86/include/asm/idtentry.h +++ b/arch/x86/include/asm/idtentry.h @@ -417,7 +417,7 @@ SYM_CODE_START(irq_entries_start) .rept (FIRST_SYSTEM_VECTOR - FIRST_EXTERNAL_VECTOR) UNWIND_HINT_IRET_REGS .byte 0x6a, vector - jmp common_interrupt + jmp asm_common_interrupt nop /* Ensure that the above is 8 bytes max */ . = pos + 8 @@ -434,7 +434,7 @@ SYM_CODE_START(spurious_entries_start) .rept (NR_VECTORS - FIRST_SYSTEM_VECTOR) UNWIND_HINT_IRET_REGS .byte 0x6a, vector - jmp common_spurious + jmp asm_spurious_interrupt nop /* Ensure that the above is 8 bytes max */ . = pos + 8 @@ -506,6 +506,12 @@ DECLARE_IDTENTRY_DF(X86_TRAP_DF, exc_double_fault); DECLARE_IDTENTRY_XENCB(X86_TRAP_OTHER, exc_xen_hypervisor_callback); #endif +/* Device interrupts common/spurious */ +DECLARE_IDTENTRY_IRQ(X86_TRAP_OTHER, common_interrupt); +#ifdef CONFIG_X86_LOCAL_APIC +DECLARE_IDTENTRY_IRQ(X86_TRAP_OTHER, spurious_interrupt); +#endif + #undef X86_TRAP_OTHER #endif diff --git a/arch/x86/include/asm/traps.h b/arch/x86/include/asm/traps.h index 32b2becf7806..97e6945bfce8 100644 --- a/arch/x86/include/asm/traps.h +++ b/arch/x86/include/asm/traps.h @@ -43,7 +43,6 @@ asmlinkage void smp_deferred_error_interrupt(struct pt_regs *regs); void smp_apic_timer_interrupt(struct pt_regs *regs); void smp_error_interrupt(struct pt_regs *regs); void smp_spurious_apic_interrupt(struct pt_regs *regs); -void smp_spurious_interrupt(struct pt_regs *regs, unsigned long vector); asmlinkage void smp_irq_move_cleanup_interrupt(void); #ifdef CONFIG_VMAP_STACK diff --git a/arch/x86/kernel/apic/apic.c b/arch/x86/kernel/apic/apic.c index 6c2b807a7eae..b7bfd3a1abb7 100644 --- a/arch/x86/kernel/apic/apic.c +++ b/arch/x86/kernel/apic/apic.c @@ -2121,9 +2121,9 @@ void __init register_lapic_address(unsigned long address) */ /** - * smp_spurious_interrupt - Catch all for interrupts raised on unused vectors + * spurious_interrupt - Catch all for interrupts raised on unused vectors * @regs: Pointer to pt_regs on stack - * @error_code: The vector number is in the lower 8 bits + * @vector: The vector number * * This is invoked from ASM entry code to catch all interrupts which * trigger on an entry which is routed to the common_spurious idtentry @@ -2131,18 +2131,10 @@ void __init register_lapic_address(unsigned long address) * * Also called from smp_spurious_apic_interrupt(). */ -__visible void __irq_entry smp_spurious_interrupt(struct pt_regs *regs, - unsigned long vector) +DEFINE_IDTENTRY_IRQ(spurious_interrupt) { u32 v; - entering_irq(); - /* - * The push in the entry ASM code which stores the vector number on - * the stack in the error code slot is sign expanding. Just use the - * lower 8 bits. - */ - vector &= 0xFF; trace_spurious_apic_entry(vector); inc_irq_stat(irq_spurious_count); @@ -2163,21 +2155,22 @@ __visible void __irq_entry smp_spurious_interrupt(struct pt_regs *regs, */ v = apic_read(APIC_ISR + ((vector & ~0x1f) >> 1)); if (v & (1 << (vector & 0x1f))) { - pr_info("Spurious interrupt (vector 0x%02lx) on CPU#%d. Acked\n", + pr_info("Spurious interrupt (vector 0x%02x) on CPU#%d. Acked\n", vector, smp_processor_id()); ack_APIC_irq(); } else { - pr_info("Spurious interrupt (vector 0x%02lx) on CPU#%d. Not pending!\n", + pr_info("Spurious interrupt (vector 0x%02x) on CPU#%d. Not pending!\n", vector, smp_processor_id()); } out: trace_spurious_apic_exit(vector); - exiting_irq(); } __visible void smp_spurious_apic_interrupt(struct pt_regs *regs) { - smp_spurious_interrupt(regs, SPURIOUS_APIC_VECTOR); + entering_irq(); + __spurious_interrupt(regs, SPURIOUS_APIC_VECTOR); + exiting_irq(); } /* diff --git a/arch/x86/kernel/apic/msi.c b/arch/x86/kernel/apic/msi.c index 159bd0cb8548..5cbaca58af95 100644 --- a/arch/x86/kernel/apic/msi.c +++ b/arch/x86/kernel/apic/msi.c @@ -115,7 +115,8 @@ msi_set_affinity(struct irq_data *irqd, const struct cpumask *mask, bool force) * denote it as spurious which is no harm as this is a rare event * and interrupt handlers have to cope with spurious interrupts * anyway. If the vector is unused, then it is marked so it won't - * trigger the 'No irq handler for vector' warning in do_IRQ(). + * trigger the 'No irq handler for vector' warning in + * common_interrupt(). * * This requires to hold vector lock to prevent concurrent updates to * the affected vector. diff --git a/arch/x86/kernel/irq.c b/arch/x86/kernel/irq.c index 5495ea4debba..c449b8434036 100644 --- a/arch/x86/kernel/irq.c +++ b/arch/x86/kernel/irq.c @@ -20,6 +20,7 @@ #include #include #include +#include #define CREATE_TRACE_POINTS #include @@ -232,37 +233,25 @@ static __always_inline void handle_irq(struct irq_desc *desc, } /* - * do_IRQ handles all normal device IRQ's (the special - * SMP cross-CPU interrupts have their own specific - * handlers). + * common_interrupt() handles all normal device IRQ's (the special SMP + * cross-CPU interrupts have their own entry points). */ -__visible void __irq_entry do_IRQ(struct pt_regs *regs, unsigned long vector) +DEFINE_IDTENTRY_IRQ(common_interrupt) { struct pt_regs *old_regs = set_irq_regs(regs); struct irq_desc *desc; - entering_irq(); - /* - * The push in the entry ASM code which stores the vector number on - * the stack in the error code slot is sign expanding. Just use the - * lower 8 bits. - */ - vector &= 0xFF; - - /* entering_irq() tells RCU that we're not quiescent. Check it. */ + /* entry code tells RCU that we're not quiescent. Check it. */ RCU_LOCKDEP_WARN(!rcu_is_watching(), "IRQ failed to wake up RCU"); desc = __this_cpu_read(vector_irq[vector]); if (likely(!IS_ERR_OR_NULL(desc))) { - if (IS_ENABLED(CONFIG_X86_32)) - __handle_irq(desc, regs); - else - generic_handle_irq_desc(desc); + handle_irq(desc, regs); } else { ack_APIC_irq(); if (desc == VECTOR_UNUSED) { - pr_emerg_ratelimited("%s: %d.%lu No irq handler for vector\n", + pr_emerg_ratelimited("%s: %d.%u No irq handler for vector\n", __func__, smp_processor_id(), vector); } else { @@ -270,8 +259,6 @@ __visible void __irq_entry do_IRQ(struct pt_regs *regs, unsigned long vector) } } - exiting_irq(); - set_irq_regs(old_regs); } -- cgit v1.2.3 From 6368558c37107bed35950cfbd994f49de07236dc Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Thu, 21 May 2020 22:05:38 +0200 Subject: x86/entry: Provide IDTENTRY_SYSVEC Provide IDTENTRY variants for system vectors to consolidate the different mechanisms to emit the ASM stubs for 32- and 64-bit. On 64-bit this also moves the stack switching from ASM to C code. 32-bit will excute the system vectors w/o stack switching as before. The simple variant is meant for "empty" system vectors like scheduler IPI and KVM posted interrupt vectors. These do not need the full glory of irq enter/exit handling with softirq processing and more. Signed-off-by: Thomas Gleixner Signed-off-by: Ingo Molnar Acked-by: Andy Lutomirski Link: https://lore.kernel.org/r/20200521202119.185317067@linutronix.de --- arch/x86/entry/entry_32.S | 4 +++ arch/x86/entry/entry_64.S | 8 +++++ arch/x86/include/asm/idtentry.h | 79 +++++++++++++++++++++++++++++++++++++++++ 3 files changed, 91 insertions(+) (limited to 'arch') diff --git a/arch/x86/entry/entry_32.S b/arch/x86/entry/entry_32.S index b47b7b223811..a8803aa3a07b 100644 --- a/arch/x86/entry/entry_32.S +++ b/arch/x86/entry/entry_32.S @@ -765,6 +765,10 @@ SYM_CODE_START_LOCAL(asm_\cfunc) SYM_CODE_END(asm_\cfunc) .endm +.macro idtentry_sysvec vector cfunc + idtentry \vector asm_\cfunc \cfunc has_error_code=0 +.endm + /* * Include the defines which emit the idt entries which are shared * shared between 32 and 64 bit. diff --git a/arch/x86/entry/entry_64.S b/arch/x86/entry/entry_64.S index e54bcd3244f8..9b7183dac202 100644 --- a/arch/x86/entry/entry_64.S +++ b/arch/x86/entry/entry_64.S @@ -541,6 +541,14 @@ SYM_CODE_END(\asmsym) idtentry \vector asm_\cfunc \cfunc has_error_code=1 .endm +/* + * System vectors which invoke their handlers directly and are not + * going through the regular common device interrupt handling code. + */ +.macro idtentry_sysvec vector cfunc + idtentry \vector asm_\cfunc \cfunc has_error_code=0 +.endm + /* * MCE and DB exceptions */ diff --git a/arch/x86/include/asm/idtentry.h b/arch/x86/include/asm/idtentry.h index 341888184d1c..63f7b99703cf 100644 --- a/arch/x86/include/asm/idtentry.h +++ b/arch/x86/include/asm/idtentry.h @@ -6,6 +6,9 @@ #include #ifndef __ASSEMBLY__ +#include + +#include void idtentry_enter_user(struct pt_regs *regs); void idtentry_exit_user(struct pt_regs *regs); @@ -210,6 +213,78 @@ __visible noinstr void func(struct pt_regs *regs, \ \ static __always_inline void __##func(struct pt_regs *regs, u8 vector) +/** + * DECLARE_IDTENTRY_SYSVEC - Declare functions for system vector entry points + * @vector: Vector number (ignored for C) + * @func: Function name of the entry point + * + * Declares three functions: + * - The ASM entry point: asm_##func + * - The XEN PV trap entry point: xen_##func (maybe unused) + * - The C handler called from the ASM entry point + * + * Maps to DECLARE_IDTENTRY(). + */ +#define DECLARE_IDTENTRY_SYSVEC(vector, func) \ + DECLARE_IDTENTRY(vector, func) + +/** + * DEFINE_IDTENTRY_SYSVEC - Emit code for system vector IDT entry points + * @func: Function name of the entry point + * + * idtentry_enter/exit() and irq_enter/exit_rcu() are invoked before the + * function body. KVM L1D flush request is set. + * + * Runs the function on the interrupt stack if the entry hit kernel mode + */ +#define DEFINE_IDTENTRY_SYSVEC(func) \ +static void __##func(struct pt_regs *regs); \ + \ +__visible noinstr void func(struct pt_regs *regs) \ +{ \ + bool rcu_exit = idtentry_enter_cond_rcu(regs); \ + \ + instrumentation_begin(); \ + irq_enter_rcu(); \ + kvm_set_cpu_l1tf_flush_l1d(); \ + run_on_irqstack_cond(__##func, regs, regs); \ + irq_exit_rcu(); \ + lockdep_hardirq_exit(); \ + instrumentation_end(); \ + idtentry_exit_cond_rcu(regs, rcu_exit); \ +} \ + \ +static noinline void __##func(struct pt_regs *regs) + +/** + * DEFINE_IDTENTRY_SYSVEC_SIMPLE - Emit code for simple system vector IDT + * entry points + * @func: Function name of the entry point + * + * Runs the function on the interrupted stack. No switch to IRQ stack and + * only the minimal __irq_enter/exit() handling. + * + * Only use for 'empty' vectors like reschedule IPI and KVM posted + * interrupt vectors. + */ +#define DEFINE_IDTENTRY_SYSVEC_SIMPLE(func) \ +static __always_inline void __##func(struct pt_regs *regs); \ + \ +__visible noinstr void func(struct pt_regs *regs) \ +{ \ + bool rcu_exit = idtentry_enter_cond_rcu(regs); \ + \ + instrumentation_begin(); \ + __irq_enter_raw(); \ + kvm_set_cpu_l1tf_flush_l1d(); \ + __##func (regs); \ + __irq_exit_raw(); \ + instrumentation_end(); \ + idtentry_exit_cond_rcu(regs, rcu_exit); \ +} \ + \ +static __always_inline void __##func(struct pt_regs *regs) + /** * DECLARE_IDTENTRY_XENCB - Declare functions for XEN HV callback entry point * @vector: Vector number (ignored for C) @@ -361,6 +436,10 @@ __visible noinstr void func(struct pt_regs *regs, \ #define DECLARE_IDTENTRY_IRQ(vector, func) \ idtentry_irq vector func +/* System vector entries */ +#define DECLARE_IDTENTRY_SYSVEC(vector, func) \ + idtentry_sysvec vector func + #ifdef CONFIG_X86_64 # define DECLARE_IDTENTRY_MCE(vector, func) \ idtentry_mce_db vector asm_##func func -- cgit v1.2.3 From db0338eec5836eea3bd1b274212234d04bac2034 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Thu, 21 May 2020 22:05:39 +0200 Subject: x86/entry: Convert APIC interrupts to IDTENTRY_SYSVEC Convert APIC interrupts to IDTENTRY_SYSVEC: - Implement the C entry point with DEFINE_IDTENTRY_SYSVEC - Emit the ASM stub with DECLARE_IDTENTRY_SYSVEC - Remove the ASM idtentries in 64-bit - Remove the BUILD_INTERRUPT entries in 32-bit - Remove the old prototypes No functional change. Signed-off-by: Thomas Gleixner Signed-off-by: Ingo Molnar Acked-by: Andy Lutomirski Link: https://lore.kernel.org/r/20200521202119.280728850@linutronix.de --- arch/x86/entry/entry_64.S | 6 ------ arch/x86/include/asm/entry_arch.h | 5 ----- arch/x86/include/asm/hw_irq.h | 4 ---- arch/x86/include/asm/idtentry.h | 8 ++++++++ arch/x86/include/asm/irq.h | 1 - arch/x86/include/asm/traps.h | 3 --- arch/x86/kernel/apic/apic.c | 23 +++++------------------ arch/x86/kernel/idt.c | 8 ++++---- arch/x86/kernel/irq.c | 5 ++--- 9 files changed, 19 insertions(+), 44 deletions(-) (limited to 'arch') diff --git a/arch/x86/entry/entry_64.S b/arch/x86/entry/entry_64.S index 9b7183dac202..25f71a0c9d3e 100644 --- a/arch/x86/entry/entry_64.S +++ b/arch/x86/entry/entry_64.S @@ -965,9 +965,6 @@ apicinterrupt3 REBOOT_VECTOR reboot_interrupt smp_reboot_interrupt apicinterrupt3 UV_BAU_MESSAGE uv_bau_message_intr1 uv_bau_message_interrupt #endif -apicinterrupt LOCAL_TIMER_VECTOR apic_timer_interrupt smp_apic_timer_interrupt -apicinterrupt X86_PLATFORM_IPI_VECTOR x86_platform_ipi smp_x86_platform_ipi - #ifdef CONFIG_HAVE_KVM apicinterrupt3 POSTED_INTR_VECTOR kvm_posted_intr_ipi smp_kvm_posted_intr_ipi apicinterrupt3 POSTED_INTR_WAKEUP_VECTOR kvm_posted_intr_wakeup_ipi smp_kvm_posted_intr_wakeup_ipi @@ -992,9 +989,6 @@ apicinterrupt CALL_FUNCTION_VECTOR call_function_interrupt smp_call_function_i apicinterrupt RESCHEDULE_VECTOR reschedule_interrupt smp_reschedule_interrupt #endif -apicinterrupt ERROR_APIC_VECTOR error_interrupt smp_error_interrupt -apicinterrupt SPURIOUS_APIC_VECTOR spurious_apic_interrupt smp_spurious_apic_interrupt - #ifdef CONFIG_IRQ_WORK apicinterrupt IRQ_WORK_VECTOR irq_work_interrupt smp_irq_work_interrupt #endif diff --git a/arch/x86/include/asm/entry_arch.h b/arch/x86/include/asm/entry_arch.h index cd57ce6134c9..d10d6d807e73 100644 --- a/arch/x86/include/asm/entry_arch.h +++ b/arch/x86/include/asm/entry_arch.h @@ -33,11 +33,6 @@ BUILD_INTERRUPT(kvm_posted_intr_nested_ipi, POSTED_INTR_NESTED_VECTOR) */ #ifdef CONFIG_X86_LOCAL_APIC -BUILD_INTERRUPT(apic_timer_interrupt,LOCAL_TIMER_VECTOR) -BUILD_INTERRUPT(error_interrupt,ERROR_APIC_VECTOR) -BUILD_INTERRUPT(spurious_apic_interrupt,SPURIOUS_APIC_VECTOR) -BUILD_INTERRUPT(x86_platform_ipi, X86_PLATFORM_IPI_VECTOR) - #ifdef CONFIG_IRQ_WORK BUILD_INTERRUPT(irq_work_interrupt, IRQ_WORK_VECTOR) #endif diff --git a/arch/x86/include/asm/hw_irq.h b/arch/x86/include/asm/hw_irq.h index 3213d36b92d3..1765993360e7 100644 --- a/arch/x86/include/asm/hw_irq.h +++ b/arch/x86/include/asm/hw_irq.h @@ -29,16 +29,12 @@ #include /* Interrupt handlers registered during init_IRQ */ -extern asmlinkage void apic_timer_interrupt(void); -extern asmlinkage void x86_platform_ipi(void); extern asmlinkage void kvm_posted_intr_ipi(void); extern asmlinkage void kvm_posted_intr_wakeup_ipi(void); extern asmlinkage void kvm_posted_intr_nested_ipi(void); -extern asmlinkage void error_interrupt(void); extern asmlinkage void irq_work_interrupt(void); extern asmlinkage void uv_bau_message_intr1(void); -extern asmlinkage void spurious_apic_interrupt(void); extern asmlinkage void thermal_interrupt(void); extern asmlinkage void reschedule_interrupt(void); diff --git a/arch/x86/include/asm/idtentry.h b/arch/x86/include/asm/idtentry.h index 63f7b99703cf..b95f36276c6c 100644 --- a/arch/x86/include/asm/idtentry.h +++ b/arch/x86/include/asm/idtentry.h @@ -591,6 +591,14 @@ DECLARE_IDTENTRY_IRQ(X86_TRAP_OTHER, common_interrupt); DECLARE_IDTENTRY_IRQ(X86_TRAP_OTHER, spurious_interrupt); #endif +/* System vector entry points */ +#ifdef CONFIG_X86_LOCAL_APIC +DECLARE_IDTENTRY_SYSVEC(ERROR_APIC_VECTOR, sysvec_error_interrupt); +DECLARE_IDTENTRY_SYSVEC(SPURIOUS_APIC_VECTOR, sysvec_spurious_apic_interrupt); +DECLARE_IDTENTRY_SYSVEC(LOCAL_TIMER_VECTOR, sysvec_apic_timer_interrupt); +DECLARE_IDTENTRY_SYSVEC(X86_PLATFORM_IPI_VECTOR, sysvec_x86_platform_ipi); +#endif + #undef X86_TRAP_OTHER #endif diff --git a/arch/x86/include/asm/irq.h b/arch/x86/include/asm/irq.h index 67aa1e2a5b4a..c7c43e86805a 100644 --- a/arch/x86/include/asm/irq.h +++ b/arch/x86/include/asm/irq.h @@ -46,7 +46,6 @@ extern void __init init_IRQ(void); void arch_trigger_cpumask_backtrace(const struct cpumask *mask, bool exclude_self); -extern __visible void smp_x86_platform_ipi(struct pt_regs *regs); #define arch_trigger_cpumask_backtrace arch_trigger_cpumask_backtrace #endif diff --git a/arch/x86/include/asm/traps.h b/arch/x86/include/asm/traps.h index 97e6945bfce8..933934c3e173 100644 --- a/arch/x86/include/asm/traps.h +++ b/arch/x86/include/asm/traps.h @@ -40,9 +40,6 @@ asmlinkage void smp_threshold_interrupt(struct pt_regs *regs); asmlinkage void smp_deferred_error_interrupt(struct pt_regs *regs); #endif -void smp_apic_timer_interrupt(struct pt_regs *regs); -void smp_error_interrupt(struct pt_regs *regs); -void smp_spurious_apic_interrupt(struct pt_regs *regs); asmlinkage void smp_irq_move_cleanup_interrupt(void); #ifdef CONFIG_VMAP_STACK diff --git a/arch/x86/kernel/apic/apic.c b/arch/x86/kernel/apic/apic.c index b7bfd3a1abb7..9244377ed454 100644 --- a/arch/x86/kernel/apic/apic.c +++ b/arch/x86/kernel/apic/apic.c @@ -1088,23 +1088,14 @@ static void local_apic_timer_interrupt(void) * [ if a single-CPU system runs an SMP kernel then we call the local * interrupt as well. Thus we cannot inline the local irq ... ] */ -__visible void __irq_entry smp_apic_timer_interrupt(struct pt_regs *regs) +DEFINE_IDTENTRY_SYSVEC(sysvec_apic_timer_interrupt) { struct pt_regs *old_regs = set_irq_regs(regs); - /* - * NOTE! We'd better ACK the irq immediately, - * because timer handling can be slow. - * - * update_process_times() expects us to have done irq_enter(). - * Besides, if we don't timer interrupts ignore the global - * interrupt lock, which is the WrongThing (tm) to do. - */ - entering_ack_irq(); + ack_APIC_irq(); trace_local_timer_entry(LOCAL_TIMER_VECTOR); local_apic_timer_interrupt(); trace_local_timer_exit(LOCAL_TIMER_VECTOR); - exiting_irq(); set_irq_regs(old_regs); } @@ -2129,7 +2120,7 @@ void __init register_lapic_address(unsigned long address) * trigger on an entry which is routed to the common_spurious idtentry * point. * - * Also called from smp_spurious_apic_interrupt(). + * Also called from sysvec_spurious_apic_interrupt(). */ DEFINE_IDTENTRY_IRQ(spurious_interrupt) { @@ -2166,17 +2157,15 @@ out: trace_spurious_apic_exit(vector); } -__visible void smp_spurious_apic_interrupt(struct pt_regs *regs) +DEFINE_IDTENTRY_SYSVEC(sysvec_spurious_apic_interrupt) { - entering_irq(); __spurious_interrupt(regs, SPURIOUS_APIC_VECTOR); - exiting_irq(); } /* * This interrupt should never happen with our APIC/SMP architecture */ -__visible void __irq_entry smp_error_interrupt(struct pt_regs *regs) +DEFINE_IDTENTRY_SYSVEC(sysvec_error_interrupt) { static const char * const error_interrupt_reason[] = { "Send CS error", /* APIC Error Bit 0 */ @@ -2190,7 +2179,6 @@ __visible void __irq_entry smp_error_interrupt(struct pt_regs *regs) }; u32 v, i = 0; - entering_irq(); trace_error_apic_entry(ERROR_APIC_VECTOR); /* First tickle the hardware, only then report what went on. -- REW */ @@ -2214,7 +2202,6 @@ __visible void __irq_entry smp_error_interrupt(struct pt_regs *regs) apic_printk(APIC_DEBUG, KERN_CONT "\n"); trace_error_apic_exit(ERROR_APIC_VECTOR); - exiting_irq(); } /** diff --git a/arch/x86/kernel/idt.c b/arch/x86/kernel/idt.c index 20408e31c18d..93c1b27f40f4 100644 --- a/arch/x86/kernel/idt.c +++ b/arch/x86/kernel/idt.c @@ -132,8 +132,8 @@ static const __initconst struct idt_data apic_idts[] = { #endif #ifdef CONFIG_X86_LOCAL_APIC - INTG(LOCAL_TIMER_VECTOR, apic_timer_interrupt), - INTG(X86_PLATFORM_IPI_VECTOR, x86_platform_ipi), + INTG(LOCAL_TIMER_VECTOR, asm_sysvec_apic_timer_interrupt), + INTG(X86_PLATFORM_IPI_VECTOR, asm_sysvec_x86_platform_ipi), # ifdef CONFIG_HAVE_KVM INTG(POSTED_INTR_VECTOR, kvm_posted_intr_ipi), INTG(POSTED_INTR_WAKEUP_VECTOR, kvm_posted_intr_wakeup_ipi), @@ -145,8 +145,8 @@ static const __initconst struct idt_data apic_idts[] = { #ifdef CONFIG_X86_UV INTG(UV_BAU_MESSAGE, uv_bau_message_intr1), #endif - INTG(SPURIOUS_APIC_VECTOR, spurious_apic_interrupt), - INTG(ERROR_APIC_VECTOR, error_interrupt), + INTG(SPURIOUS_APIC_VECTOR, asm_sysvec_spurious_apic_interrupt), + INTG(ERROR_APIC_VECTOR, asm_sysvec_error_interrupt), #endif }; diff --git a/arch/x86/kernel/irq.c b/arch/x86/kernel/irq.c index c449b8434036..7e3005274f83 100644 --- a/arch/x86/kernel/irq.c +++ b/arch/x86/kernel/irq.c @@ -268,17 +268,16 @@ void (*x86_platform_ipi_callback)(void) = NULL; /* * Handler for X86_PLATFORM_IPI_VECTOR. */ -__visible void __irq_entry smp_x86_platform_ipi(struct pt_regs *regs) +DEFINE_IDTENTRY_SYSVEC(sysvec_x86_platform_ipi) { struct pt_regs *old_regs = set_irq_regs(regs); - entering_ack_irq(); + ack_APIC_irq(); trace_x86_platform_ipi_entry(X86_PLATFORM_IPI_VECTOR); inc_irq_stat(x86_platform_ipis); if (x86_platform_ipi_callback) x86_platform_ipi_callback(); trace_x86_platform_ipi_exit(X86_PLATFORM_IPI_VECTOR); - exiting_irq(); set_irq_regs(old_regs); } #endif -- cgit v1.2.3 From 582f9191231b994582ad5349a7b06b3255c926fb Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Thu, 21 May 2020 22:05:40 +0200 Subject: x86/entry: Convert SMP system vectors to IDTENTRY_SYSVEC Convert SMP system vectors to IDTENTRY_SYSVEC: - Implement the C entry point with DEFINE_IDTENTRY_SYSVEC - Emit the ASM stub with DECLARE_IDTENTRY_SYSVEC - Remove the ASM idtentries in 64-bit - Remove the BUILD_INTERRUPT entries in 32-bit - Remove the old prototypes No functional change. Signed-off-by: Thomas Gleixner Signed-off-by: Ingo Molnar Acked-by: Andy Lutomirski Link: https://lore.kernel.org/r/20200521202119.372234635@linutronix.de --- arch/x86/entry/entry_64.S | 7 ------- arch/x86/include/asm/entry_arch.h | 4 ---- arch/x86/include/asm/hw_irq.h | 5 ----- arch/x86/include/asm/idtentry.h | 7 +++++++ arch/x86/include/asm/traps.h | 2 -- arch/x86/kernel/apic/vector.c | 5 ++--- arch/x86/kernel/idt.c | 10 +++++----- arch/x86/kernel/smp.c | 18 +++++++----------- 8 files changed, 21 insertions(+), 37 deletions(-) (limited to 'arch') diff --git a/arch/x86/entry/entry_64.S b/arch/x86/entry/entry_64.S index 25f71a0c9d3e..f3ccb27c028e 100644 --- a/arch/x86/entry/entry_64.S +++ b/arch/x86/entry/entry_64.S @@ -956,11 +956,6 @@ apicinterrupt3 \num \sym \do_sym POP_SECTION_IRQENTRY .endm -#ifdef CONFIG_SMP -apicinterrupt3 IRQ_MOVE_CLEANUP_VECTOR irq_move_cleanup_interrupt smp_irq_move_cleanup_interrupt -apicinterrupt3 REBOOT_VECTOR reboot_interrupt smp_reboot_interrupt -#endif - #ifdef CONFIG_X86_UV apicinterrupt3 UV_BAU_MESSAGE uv_bau_message_intr1 uv_bau_message_interrupt #endif @@ -984,8 +979,6 @@ apicinterrupt THERMAL_APIC_VECTOR thermal_interrupt smp_thermal_interrupt #endif #ifdef CONFIG_SMP -apicinterrupt CALL_FUNCTION_SINGLE_VECTOR call_function_single_interrupt smp_call_function_single_interrupt -apicinterrupt CALL_FUNCTION_VECTOR call_function_interrupt smp_call_function_interrupt apicinterrupt RESCHEDULE_VECTOR reschedule_interrupt smp_reschedule_interrupt #endif diff --git a/arch/x86/include/asm/entry_arch.h b/arch/x86/include/asm/entry_arch.h index d10d6d807e73..2e2055bcfeb2 100644 --- a/arch/x86/include/asm/entry_arch.h +++ b/arch/x86/include/asm/entry_arch.h @@ -12,10 +12,6 @@ */ #ifdef CONFIG_SMP BUILD_INTERRUPT(reschedule_interrupt,RESCHEDULE_VECTOR) -BUILD_INTERRUPT(call_function_interrupt,CALL_FUNCTION_VECTOR) -BUILD_INTERRUPT(call_function_single_interrupt,CALL_FUNCTION_SINGLE_VECTOR) -BUILD_INTERRUPT(irq_move_cleanup_interrupt, IRQ_MOVE_CLEANUP_VECTOR) -BUILD_INTERRUPT(reboot_interrupt, REBOOT_VECTOR) #endif #ifdef CONFIG_HAVE_KVM diff --git a/arch/x86/include/asm/hw_irq.h b/arch/x86/include/asm/hw_irq.h index 1765993360e7..36a38695f27f 100644 --- a/arch/x86/include/asm/hw_irq.h +++ b/arch/x86/include/asm/hw_irq.h @@ -38,14 +38,9 @@ extern asmlinkage void uv_bau_message_intr1(void); extern asmlinkage void thermal_interrupt(void); extern asmlinkage void reschedule_interrupt(void); -extern asmlinkage void irq_move_cleanup_interrupt(void); -extern asmlinkage void reboot_interrupt(void); extern asmlinkage void threshold_interrupt(void); extern asmlinkage void deferred_error_interrupt(void); -extern asmlinkage void call_function_interrupt(void); -extern asmlinkage void call_function_single_interrupt(void); - #ifdef CONFIG_X86_LOCAL_APIC struct irq_data; struct pci_dev; diff --git a/arch/x86/include/asm/idtentry.h b/arch/x86/include/asm/idtentry.h index b95f36276c6c..b44f4ac22af6 100644 --- a/arch/x86/include/asm/idtentry.h +++ b/arch/x86/include/asm/idtentry.h @@ -599,6 +599,13 @@ DECLARE_IDTENTRY_SYSVEC(LOCAL_TIMER_VECTOR, sysvec_apic_timer_interrupt); DECLARE_IDTENTRY_SYSVEC(X86_PLATFORM_IPI_VECTOR, sysvec_x86_platform_ipi); #endif +#ifdef CONFIG_SMP +DECLARE_IDTENTRY_SYSVEC(IRQ_MOVE_CLEANUP_VECTOR, sysvec_irq_move_cleanup); +DECLARE_IDTENTRY_SYSVEC(REBOOT_VECTOR, sysvec_reboot); +DECLARE_IDTENTRY_SYSVEC(CALL_FUNCTION_SINGLE_VECTOR, sysvec_call_function_single); +DECLARE_IDTENTRY_SYSVEC(CALL_FUNCTION_VECTOR, sysvec_call_function); +#endif + #undef X86_TRAP_OTHER #endif diff --git a/arch/x86/include/asm/traps.h b/arch/x86/include/asm/traps.h index 933934c3e173..0c40f37f8cb7 100644 --- a/arch/x86/include/asm/traps.h +++ b/arch/x86/include/asm/traps.h @@ -40,8 +40,6 @@ asmlinkage void smp_threshold_interrupt(struct pt_regs *regs); asmlinkage void smp_deferred_error_interrupt(struct pt_regs *regs); #endif -asmlinkage void smp_irq_move_cleanup_interrupt(void); - #ifdef CONFIG_VMAP_STACK void __noreturn handle_stack_overflow(const char *message, struct pt_regs *regs, diff --git a/arch/x86/kernel/apic/vector.c b/arch/x86/kernel/apic/vector.c index 67768e54438b..c48be6e1f676 100644 --- a/arch/x86/kernel/apic/vector.c +++ b/arch/x86/kernel/apic/vector.c @@ -861,13 +861,13 @@ static void free_moved_vector(struct apic_chip_data *apicd) apicd->move_in_progress = 0; } -asmlinkage __visible void __irq_entry smp_irq_move_cleanup_interrupt(void) +DEFINE_IDTENTRY_SYSVEC(sysvec_irq_move_cleanup) { struct hlist_head *clhead = this_cpu_ptr(&cleanup_list); struct apic_chip_data *apicd; struct hlist_node *tmp; - entering_ack_irq(); + ack_APIC_irq(); /* Prevent vectors vanishing under us */ raw_spin_lock(&vector_lock); @@ -892,7 +892,6 @@ asmlinkage __visible void __irq_entry smp_irq_move_cleanup_interrupt(void) } raw_spin_unlock(&vector_lock); - exiting_irq(); } static void __send_cleanup_vector(struct apic_chip_data *apicd) diff --git a/arch/x86/kernel/idt.c b/arch/x86/kernel/idt.c index 93c1b27f40f4..018a5424b574 100644 --- a/arch/x86/kernel/idt.c +++ b/arch/x86/kernel/idt.c @@ -112,11 +112,11 @@ static const __initconst struct idt_data def_idts[] = { */ static const __initconst struct idt_data apic_idts[] = { #ifdef CONFIG_SMP - INTG(RESCHEDULE_VECTOR, reschedule_interrupt), - INTG(CALL_FUNCTION_VECTOR, call_function_interrupt), - INTG(CALL_FUNCTION_SINGLE_VECTOR, call_function_single_interrupt), - INTG(IRQ_MOVE_CLEANUP_VECTOR, irq_move_cleanup_interrupt), - INTG(REBOOT_VECTOR, reboot_interrupt), + INTG(RESCHEDULE_VECTOR, reschedule_interrupt), + INTG(CALL_FUNCTION_VECTOR, asm_sysvec_call_function), + INTG(CALL_FUNCTION_SINGLE_VECTOR, asm_sysvec_call_function_single), + INTG(IRQ_MOVE_CLEANUP_VECTOR, asm_sysvec_irq_move_cleanup), + INTG(REBOOT_VECTOR, asm_sysvec_reboot), #endif #ifdef CONFIG_X86_THERMAL_VECTOR diff --git a/arch/x86/kernel/smp.c b/arch/x86/kernel/smp.c index b8d4e9c3c070..e5647daa7e96 100644 --- a/arch/x86/kernel/smp.c +++ b/arch/x86/kernel/smp.c @@ -27,6 +27,7 @@ #include #include #include +#include #include #include #include @@ -130,13 +131,11 @@ static int smp_stop_nmi_callback(unsigned int val, struct pt_regs *regs) /* * this function calls the 'stop' function on all other CPUs in the system. */ - -asmlinkage __visible void smp_reboot_interrupt(void) +DEFINE_IDTENTRY_SYSVEC(sysvec_reboot) { - ipi_entering_ack_irq(); + ack_APIC_irq(); cpu_emergency_vmxoff(); stop_this_cpu(NULL); - irq_exit(); } static int register_stop_handler(void) @@ -227,7 +226,6 @@ __visible void __irq_entry smp_reschedule_interrupt(struct pt_regs *regs) { ack_APIC_irq(); inc_irq_stat(irq_resched_count); - kvm_set_cpu_l1tf_flush_l1d(); if (trace_resched_ipi_enabled()) { /* @@ -244,24 +242,22 @@ __visible void __irq_entry smp_reschedule_interrupt(struct pt_regs *regs) scheduler_ipi(); } -__visible void __irq_entry smp_call_function_interrupt(struct pt_regs *regs) +DEFINE_IDTENTRY_SYSVEC(sysvec_call_function) { - ipi_entering_ack_irq(); + ack_APIC_irq(); trace_call_function_entry(CALL_FUNCTION_VECTOR); inc_irq_stat(irq_call_count); generic_smp_call_function_interrupt(); trace_call_function_exit(CALL_FUNCTION_VECTOR); - exiting_irq(); } -__visible void __irq_entry smp_call_function_single_interrupt(struct pt_regs *r) +DEFINE_IDTENTRY_SYSVEC(sysvec_call_function_single) { - ipi_entering_ack_irq(); + ack_APIC_irq(); trace_call_function_single_entry(CALL_FUNCTION_SINGLE_VECTOR); inc_irq_stat(irq_call_count); generic_smp_call_function_single_interrupt(); trace_call_function_single_exit(CALL_FUNCTION_SINGLE_VECTOR); - exiting_irq(); } static int __init nonmi_ipi_setup(char *str) -- cgit v1.2.3 From 720909a7abd351535bfb485a0ecce03c2e4467e2 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Thu, 21 May 2020 22:05:41 +0200 Subject: x86/entry: Convert various system vectors Convert various system vectors to IDTENTRY_SYSVEC: - Implement the C entry point with DEFINE_IDTENTRY_SYSVEC - Emit the ASM stub with DECLARE_IDTENTRY_SYSVEC - Remove the ASM idtentries in 64-bit - Remove the BUILD_INTERRUPT entries in 32-bit - Remove the old prototypes No functional change. Signed-off-by: Thomas Gleixner Signed-off-by: Ingo Molnar Acked-by: Andy Lutomirski Link: https://lore.kernel.org/r/20200521202119.464812973@linutronix.de --- arch/x86/entry/entry_64.S | 19 ------------------- arch/x86/include/asm/apic.h | 13 ------------- arch/x86/include/asm/entry_arch.h | 25 ------------------------- arch/x86/include/asm/hw_irq.h | 6 ------ arch/x86/include/asm/idtentry.h | 22 ++++++++++++++++++++++ arch/x86/include/asm/irq_work.h | 1 - arch/x86/include/asm/traps.h | 5 ----- arch/x86/include/asm/uv/uv_bau.h | 8 ++------ arch/x86/kernel/cpu/mce/amd.c | 5 ++--- arch/x86/kernel/cpu/mce/therm_throt.c | 5 ++--- arch/x86/kernel/cpu/mce/threshold.c | 5 ++--- arch/x86/kernel/idt.c | 28 ++++++++++++++-------------- arch/x86/kernel/irq_work.c | 6 +++--- arch/x86/platform/uv/tlb_uv.c | 2 +- 14 files changed, 48 insertions(+), 102 deletions(-) (limited to 'arch') diff --git a/arch/x86/entry/entry_64.S b/arch/x86/entry/entry_64.S index f3ccb27c028e..2301f62d08e6 100644 --- a/arch/x86/entry/entry_64.S +++ b/arch/x86/entry/entry_64.S @@ -956,9 +956,6 @@ apicinterrupt3 \num \sym \do_sym POP_SECTION_IRQENTRY .endm -#ifdef CONFIG_X86_UV -apicinterrupt3 UV_BAU_MESSAGE uv_bau_message_intr1 uv_bau_message_interrupt -#endif #ifdef CONFIG_HAVE_KVM apicinterrupt3 POSTED_INTR_VECTOR kvm_posted_intr_ipi smp_kvm_posted_intr_ipi @@ -966,26 +963,10 @@ apicinterrupt3 POSTED_INTR_WAKEUP_VECTOR kvm_posted_intr_wakeup_ipi smp_kvm_post apicinterrupt3 POSTED_INTR_NESTED_VECTOR kvm_posted_intr_nested_ipi smp_kvm_posted_intr_nested_ipi #endif -#ifdef CONFIG_X86_MCE_THRESHOLD -apicinterrupt THRESHOLD_APIC_VECTOR threshold_interrupt smp_threshold_interrupt -#endif - -#ifdef CONFIG_X86_MCE_AMD -apicinterrupt DEFERRED_ERROR_VECTOR deferred_error_interrupt smp_deferred_error_interrupt -#endif - -#ifdef CONFIG_X86_THERMAL_VECTOR -apicinterrupt THERMAL_APIC_VECTOR thermal_interrupt smp_thermal_interrupt -#endif - #ifdef CONFIG_SMP apicinterrupt RESCHEDULE_VECTOR reschedule_interrupt smp_reschedule_interrupt #endif -#ifdef CONFIG_IRQ_WORK -apicinterrupt IRQ_WORK_VECTOR irq_work_interrupt smp_irq_work_interrupt -#endif - /* * Reload gs selector with exception handling * edi: new selector diff --git a/arch/x86/include/asm/apic.h b/arch/x86/include/asm/apic.h index 19e94af9cc5d..a5416865b6fa 100644 --- a/arch/x86/include/asm/apic.h +++ b/arch/x86/include/asm/apic.h @@ -534,24 +534,11 @@ static inline void entering_ack_irq(void) ack_APIC_irq(); } -static inline void ipi_entering_ack_irq(void) -{ - irq_enter(); - ack_APIC_irq(); - kvm_set_cpu_l1tf_flush_l1d(); -} - static inline void exiting_irq(void) { irq_exit(); } -static inline void exiting_ack_irq(void) -{ - ack_APIC_irq(); - irq_exit(); -} - extern void ioapic_zap_locks(void); #endif /* _ASM_X86_APIC_H */ diff --git a/arch/x86/include/asm/entry_arch.h b/arch/x86/include/asm/entry_arch.h index 2e2055bcfeb2..69a5320a4673 100644 --- a/arch/x86/include/asm/entry_arch.h +++ b/arch/x86/include/asm/entry_arch.h @@ -20,28 +20,3 @@ BUILD_INTERRUPT(kvm_posted_intr_wakeup_ipi, POSTED_INTR_WAKEUP_VECTOR) BUILD_INTERRUPT(kvm_posted_intr_nested_ipi, POSTED_INTR_NESTED_VECTOR) #endif -/* - * every pentium local APIC has two 'local interrupts', with a - * soft-definable vector attached to both interrupts, one of - * which is a timer interrupt, the other one is error counter - * overflow. Linux uses the local APIC timer interrupt to get - * a much simpler SMP time architecture: - */ -#ifdef CONFIG_X86_LOCAL_APIC - -#ifdef CONFIG_IRQ_WORK -BUILD_INTERRUPT(irq_work_interrupt, IRQ_WORK_VECTOR) -#endif - -#ifdef CONFIG_X86_THERMAL_VECTOR -BUILD_INTERRUPT(thermal_interrupt,THERMAL_APIC_VECTOR) -#endif - -#ifdef CONFIG_X86_MCE_THRESHOLD -BUILD_INTERRUPT(threshold_interrupt,THRESHOLD_APIC_VECTOR) -#endif - -#ifdef CONFIG_X86_MCE_AMD -BUILD_INTERRUPT(deferred_error_interrupt, DEFERRED_ERROR_VECTOR) -#endif -#endif diff --git a/arch/x86/include/asm/hw_irq.h b/arch/x86/include/asm/hw_irq.h index 36a38695f27f..7281c7e3a0f6 100644 --- a/arch/x86/include/asm/hw_irq.h +++ b/arch/x86/include/asm/hw_irq.h @@ -32,15 +32,9 @@ extern asmlinkage void kvm_posted_intr_ipi(void); extern asmlinkage void kvm_posted_intr_wakeup_ipi(void); extern asmlinkage void kvm_posted_intr_nested_ipi(void); -extern asmlinkage void irq_work_interrupt(void); -extern asmlinkage void uv_bau_message_intr1(void); -extern asmlinkage void thermal_interrupt(void); extern asmlinkage void reschedule_interrupt(void); -extern asmlinkage void threshold_interrupt(void); -extern asmlinkage void deferred_error_interrupt(void); - #ifdef CONFIG_X86_LOCAL_APIC struct irq_data; struct pci_dev; diff --git a/arch/x86/include/asm/idtentry.h b/arch/x86/include/asm/idtentry.h index b44f4ac22af6..cd752e6cf5af 100644 --- a/arch/x86/include/asm/idtentry.h +++ b/arch/x86/include/asm/idtentry.h @@ -606,6 +606,28 @@ DECLARE_IDTENTRY_SYSVEC(CALL_FUNCTION_SINGLE_VECTOR, sysvec_call_function_single DECLARE_IDTENTRY_SYSVEC(CALL_FUNCTION_VECTOR, sysvec_call_function); #endif +#ifdef CONFIG_X86_LOCAL_APIC +# ifdef CONFIG_X86_UV +DECLARE_IDTENTRY_SYSVEC(UV_BAU_MESSAGE, sysvec_uv_bau_message); +# endif + +# ifdef CONFIG_X86_MCE_THRESHOLD +DECLARE_IDTENTRY_SYSVEC(THRESHOLD_APIC_VECTOR, sysvec_threshold); +# endif + +# ifdef CONFIG_X86_MCE_AMD +DECLARE_IDTENTRY_SYSVEC(DEFERRED_ERROR_VECTOR, sysvec_deferred_error); +# endif + +# ifdef CONFIG_X86_THERMAL_VECTOR +DECLARE_IDTENTRY_SYSVEC(THERMAL_APIC_VECTOR, sysvec_thermal); +# endif + +# ifdef CONFIG_IRQ_WORK +DECLARE_IDTENTRY_SYSVEC(IRQ_WORK_VECTOR, sysvec_irq_work); +# endif +#endif + #undef X86_TRAP_OTHER #endif diff --git a/arch/x86/include/asm/irq_work.h b/arch/x86/include/asm/irq_work.h index 80b35e3adf03..800ffce0db29 100644 --- a/arch/x86/include/asm/irq_work.h +++ b/arch/x86/include/asm/irq_work.h @@ -10,7 +10,6 @@ static inline bool arch_irq_work_has_interrupt(void) return boot_cpu_has(X86_FEATURE_APIC); } extern void arch_irq_work_raise(void); -extern __visible void smp_irq_work_interrupt(struct pt_regs *regs); #else static inline bool arch_irq_work_has_interrupt(void) { diff --git a/arch/x86/include/asm/traps.h b/arch/x86/include/asm/traps.h index 0c40f37f8cb7..714b1a30e7b0 100644 --- a/arch/x86/include/asm/traps.h +++ b/arch/x86/include/asm/traps.h @@ -34,11 +34,6 @@ static inline int get_si_code(unsigned long condition) extern int panic_on_unrecovered_nmi; void math_emulate(struct math_emu_info *); -#ifndef CONFIG_X86_32 -asmlinkage void smp_thermal_interrupt(struct pt_regs *regs); -asmlinkage void smp_threshold_interrupt(struct pt_regs *regs); -asmlinkage void smp_deferred_error_interrupt(struct pt_regs *regs); -#endif #ifdef CONFIG_VMAP_STACK void __noreturn handle_stack_overflow(const char *message, diff --git a/arch/x86/include/asm/uv/uv_bau.h b/arch/x86/include/asm/uv/uv_bau.h index 13687bf0e0a9..f1188bd47658 100644 --- a/arch/x86/include/asm/uv/uv_bau.h +++ b/arch/x86/include/asm/uv/uv_bau.h @@ -12,6 +12,8 @@ #define _ASM_X86_UV_UV_BAU_H #include +#include + #define BITSPERBYTE 8 /* @@ -799,12 +801,6 @@ static inline void bau_cpubits_clear(struct bau_local_cpumask *dstp, int nbits) bitmap_zero(&dstp->bits, nbits); } -extern void uv_bau_message_intr1(void); -#ifdef CONFIG_TRACING -#define trace_uv_bau_message_intr1 uv_bau_message_intr1 -#endif -extern void uv_bau_timeout_intr1(void); - struct atomic_short { short counter; }; diff --git a/arch/x86/kernel/cpu/mce/amd.c b/arch/x86/kernel/cpu/mce/amd.c index 52de616a8065..a906d68a18a2 100644 --- a/arch/x86/kernel/cpu/mce/amd.c +++ b/arch/x86/kernel/cpu/mce/amd.c @@ -907,14 +907,13 @@ static void __log_error(unsigned int bank, u64 status, u64 addr, u64 misc) mce_log(&m); } -asmlinkage __visible void __irq_entry smp_deferred_error_interrupt(struct pt_regs *regs) +DEFINE_IDTENTRY_SYSVEC(sysvec_deferred_error) { - entering_irq(); trace_deferred_error_apic_entry(DEFERRED_ERROR_VECTOR); inc_irq_stat(irq_deferred_error_count); deferred_error_int_vector(); trace_deferred_error_apic_exit(DEFERRED_ERROR_VECTOR); - exiting_ack_irq(); + ack_APIC_irq(); } /* diff --git a/arch/x86/kernel/cpu/mce/therm_throt.c b/arch/x86/kernel/cpu/mce/therm_throt.c index f36dc0742085..a7cd2d203ced 100644 --- a/arch/x86/kernel/cpu/mce/therm_throt.c +++ b/arch/x86/kernel/cpu/mce/therm_throt.c @@ -614,14 +614,13 @@ static void unexpected_thermal_interrupt(void) static void (*smp_thermal_vector)(void) = unexpected_thermal_interrupt; -asmlinkage __visible void __irq_entry smp_thermal_interrupt(struct pt_regs *regs) +DEFINE_IDTENTRY_SYSVEC(sysvec_thermal) { - entering_irq(); trace_thermal_apic_entry(THERMAL_APIC_VECTOR); inc_irq_stat(irq_thermal_count); smp_thermal_vector(); trace_thermal_apic_exit(THERMAL_APIC_VECTOR); - exiting_ack_irq(); + ack_APIC_irq(); } /* Thermal monitoring depends on APIC, ACPI and clock modulation */ diff --git a/arch/x86/kernel/cpu/mce/threshold.c b/arch/x86/kernel/cpu/mce/threshold.c index 28812cc15300..6a059a035021 100644 --- a/arch/x86/kernel/cpu/mce/threshold.c +++ b/arch/x86/kernel/cpu/mce/threshold.c @@ -21,12 +21,11 @@ static void default_threshold_interrupt(void) void (*mce_threshold_vector)(void) = default_threshold_interrupt; -asmlinkage __visible void __irq_entry smp_threshold_interrupt(struct pt_regs *regs) +DEFINE_IDTENTRY_SYSVEC(sysvec_threshold) { - entering_irq(); trace_threshold_apic_entry(THRESHOLD_APIC_VECTOR); inc_irq_stat(irq_threshold_count); mce_threshold_vector(); trace_threshold_apic_exit(THRESHOLD_APIC_VECTOR); - exiting_ack_irq(); + ack_APIC_irq(); } diff --git a/arch/x86/kernel/idt.c b/arch/x86/kernel/idt.c index 018a5424b574..3d811d058f2e 100644 --- a/arch/x86/kernel/idt.c +++ b/arch/x86/kernel/idt.c @@ -120,33 +120,33 @@ static const __initconst struct idt_data apic_idts[] = { #endif #ifdef CONFIG_X86_THERMAL_VECTOR - INTG(THERMAL_APIC_VECTOR, thermal_interrupt), + INTG(THERMAL_APIC_VECTOR, asm_sysvec_thermal), #endif #ifdef CONFIG_X86_MCE_THRESHOLD - INTG(THRESHOLD_APIC_VECTOR, threshold_interrupt), + INTG(THRESHOLD_APIC_VECTOR, asm_sysvec_threshold), #endif #ifdef CONFIG_X86_MCE_AMD - INTG(DEFERRED_ERROR_VECTOR, deferred_error_interrupt), + INTG(DEFERRED_ERROR_VECTOR, asm_sysvec_deferred_error), #endif #ifdef CONFIG_X86_LOCAL_APIC - INTG(LOCAL_TIMER_VECTOR, asm_sysvec_apic_timer_interrupt), - INTG(X86_PLATFORM_IPI_VECTOR, asm_sysvec_x86_platform_ipi), + INTG(LOCAL_TIMER_VECTOR, asm_sysvec_apic_timer_interrupt), + INTG(X86_PLATFORM_IPI_VECTOR, asm_sysvec_x86_platform_ipi), # ifdef CONFIG_HAVE_KVM - INTG(POSTED_INTR_VECTOR, kvm_posted_intr_ipi), - INTG(POSTED_INTR_WAKEUP_VECTOR, kvm_posted_intr_wakeup_ipi), - INTG(POSTED_INTR_NESTED_VECTOR, kvm_posted_intr_nested_ipi), + INTG(POSTED_INTR_VECTOR, kvm_posted_intr_ipi), + INTG(POSTED_INTR_WAKEUP_VECTOR, kvm_posted_intr_wakeup_ipi), + INTG(POSTED_INTR_NESTED_VECTOR, kvm_posted_intr_nested_ipi), # endif # ifdef CONFIG_IRQ_WORK - INTG(IRQ_WORK_VECTOR, irq_work_interrupt), + INTG(IRQ_WORK_VECTOR, asm_sysvec_irq_work), # endif -#ifdef CONFIG_X86_UV - INTG(UV_BAU_MESSAGE, uv_bau_message_intr1), -#endif - INTG(SPURIOUS_APIC_VECTOR, asm_sysvec_spurious_apic_interrupt), - INTG(ERROR_APIC_VECTOR, asm_sysvec_error_interrupt), +# ifdef CONFIG_X86_UV + INTG(UV_BAU_MESSAGE, asm_sysvec_uv_bau_message), +# endif + INTG(SPURIOUS_APIC_VECTOR, asm_sysvec_spurious_apic_interrupt), + INTG(ERROR_APIC_VECTOR, asm_sysvec_error_interrupt), #endif }; diff --git a/arch/x86/kernel/irq_work.c b/arch/x86/kernel/irq_work.c index 80bee7695a20..890d4778cd35 100644 --- a/arch/x86/kernel/irq_work.c +++ b/arch/x86/kernel/irq_work.c @@ -9,18 +9,18 @@ #include #include #include +#include #include #include #ifdef CONFIG_X86_LOCAL_APIC -__visible void __irq_entry smp_irq_work_interrupt(struct pt_regs *regs) +DEFINE_IDTENTRY_SYSVEC(sysvec_irq_work) { - ipi_entering_ack_irq(); + ack_APIC_irq(); trace_irq_work_entry(IRQ_WORK_VECTOR); inc_irq_stat(apic_irq_work_irqs); irq_work_run(); trace_irq_work_exit(IRQ_WORK_VECTOR); - exiting_irq(); } void arch_irq_work_raise(void) diff --git a/arch/x86/platform/uv/tlb_uv.c b/arch/x86/platform/uv/tlb_uv.c index 4ea69690c3e4..0ac96ca304c7 100644 --- a/arch/x86/platform/uv/tlb_uv.c +++ b/arch/x86/platform/uv/tlb_uv.c @@ -1272,7 +1272,7 @@ static void process_uv2_message(struct msg_desc *mdp, struct bau_control *bcp) * (the resource will not be freed until noninterruptable cpus see this * interrupt; hardware may timeout the s/w ack and reply ERROR) */ -void uv_bau_message_interrupt(struct pt_regs *regs) +DEFINE_IDTENTRY_SYSVEC(sysvec_uv_bau_message) { int count = 0; cycles_t time_start; -- cgit v1.2.3 From 9c3b1f4975c46fc2932fd6d53e63c14f0ddf985f Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Thu, 21 May 2020 22:05:42 +0200 Subject: x86/entry: Convert KVM vectors to IDTENTRY_SYSVEC* Convert KVM specific system vectors to IDTENTRY_SYSVEC*: The two empty stub handlers which only increment the stats counter do no need to run on the interrupt stack. Use IDTENTRY_SYSVEC_SIMPLE for them. The wakeup handler does more work and runs on the interrupt stack. None of these handlers need to save and restore the irq_regs pointer. Signed-off-by: Thomas Gleixner Signed-off-by: Ingo Molnar Acked-by: Paolo Bonzini Acked-by: Andy Lutomirski Link: https://lore.kernel.org/r/20200521202119.555715519@linutronix.de --- arch/x86/entry/entry_64.S | 7 ------- arch/x86/include/asm/entry_arch.h | 7 ------- arch/x86/include/asm/hw_irq.h | 4 ---- arch/x86/include/asm/idtentry.h | 6 ++++++ arch/x86/include/asm/irq.h | 3 --- arch/x86/kernel/idt.c | 6 +++--- arch/x86/kernel/irq.c | 24 ++++++------------------ 7 files changed, 15 insertions(+), 42 deletions(-) (limited to 'arch') diff --git a/arch/x86/entry/entry_64.S b/arch/x86/entry/entry_64.S index 2301f62d08e6..ea1f2930876c 100644 --- a/arch/x86/entry/entry_64.S +++ b/arch/x86/entry/entry_64.S @@ -956,13 +956,6 @@ apicinterrupt3 \num \sym \do_sym POP_SECTION_IRQENTRY .endm - -#ifdef CONFIG_HAVE_KVM -apicinterrupt3 POSTED_INTR_VECTOR kvm_posted_intr_ipi smp_kvm_posted_intr_ipi -apicinterrupt3 POSTED_INTR_WAKEUP_VECTOR kvm_posted_intr_wakeup_ipi smp_kvm_posted_intr_wakeup_ipi -apicinterrupt3 POSTED_INTR_NESTED_VECTOR kvm_posted_intr_nested_ipi smp_kvm_posted_intr_nested_ipi -#endif - #ifdef CONFIG_SMP apicinterrupt RESCHEDULE_VECTOR reschedule_interrupt smp_reschedule_interrupt #endif diff --git a/arch/x86/include/asm/entry_arch.h b/arch/x86/include/asm/entry_arch.h index 69a5320a4673..a01bb74244ac 100644 --- a/arch/x86/include/asm/entry_arch.h +++ b/arch/x86/include/asm/entry_arch.h @@ -13,10 +13,3 @@ #ifdef CONFIG_SMP BUILD_INTERRUPT(reschedule_interrupt,RESCHEDULE_VECTOR) #endif - -#ifdef CONFIG_HAVE_KVM -BUILD_INTERRUPT(kvm_posted_intr_ipi, POSTED_INTR_VECTOR) -BUILD_INTERRUPT(kvm_posted_intr_wakeup_ipi, POSTED_INTR_WAKEUP_VECTOR) -BUILD_INTERRUPT(kvm_posted_intr_nested_ipi, POSTED_INTR_NESTED_VECTOR) -#endif - diff --git a/arch/x86/include/asm/hw_irq.h b/arch/x86/include/asm/hw_irq.h index 7281c7e3a0f6..fd5e7c8825e1 100644 --- a/arch/x86/include/asm/hw_irq.h +++ b/arch/x86/include/asm/hw_irq.h @@ -29,10 +29,6 @@ #include /* Interrupt handlers registered during init_IRQ */ -extern asmlinkage void kvm_posted_intr_ipi(void); -extern asmlinkage void kvm_posted_intr_wakeup_ipi(void); -extern asmlinkage void kvm_posted_intr_nested_ipi(void); - extern asmlinkage void reschedule_interrupt(void); #ifdef CONFIG_X86_LOCAL_APIC diff --git a/arch/x86/include/asm/idtentry.h b/arch/x86/include/asm/idtentry.h index cd752e6cf5af..7f7c80bd272c 100644 --- a/arch/x86/include/asm/idtentry.h +++ b/arch/x86/include/asm/idtentry.h @@ -628,6 +628,12 @@ DECLARE_IDTENTRY_SYSVEC(IRQ_WORK_VECTOR, sysvec_irq_work); # endif #endif +#ifdef CONFIG_HAVE_KVM +DECLARE_IDTENTRY_SYSVEC(POSTED_INTR_VECTOR, sysvec_kvm_posted_intr_ipi); +DECLARE_IDTENTRY_SYSVEC(POSTED_INTR_WAKEUP_VECTOR, sysvec_kvm_posted_intr_wakeup_ipi); +DECLARE_IDTENTRY_SYSVEC(POSTED_INTR_NESTED_VECTOR, sysvec_kvm_posted_intr_nested_ipi); +#endif + #undef X86_TRAP_OTHER #endif diff --git a/arch/x86/include/asm/irq.h b/arch/x86/include/asm/irq.h index c7c43e86805a..f73dd3f8b043 100644 --- a/arch/x86/include/asm/irq.h +++ b/arch/x86/include/asm/irq.h @@ -26,9 +26,6 @@ extern void fixup_irqs(void); #ifdef CONFIG_HAVE_KVM extern void kvm_set_posted_intr_wakeup_handler(void (*handler)(void)); -extern __visible void smp_kvm_posted_intr_ipi(struct pt_regs *regs); -extern __visible void smp_kvm_posted_intr_wakeup_ipi(struct pt_regs *regs); -extern __visible void smp_kvm_posted_intr_nested_ipi(struct pt_regs *regs); #endif extern void (*x86_platform_ipi_callback)(void); diff --git a/arch/x86/kernel/idt.c b/arch/x86/kernel/idt.c index 3d811d058f2e..faaadd430882 100644 --- a/arch/x86/kernel/idt.c +++ b/arch/x86/kernel/idt.c @@ -135,9 +135,9 @@ static const __initconst struct idt_data apic_idts[] = { INTG(LOCAL_TIMER_VECTOR, asm_sysvec_apic_timer_interrupt), INTG(X86_PLATFORM_IPI_VECTOR, asm_sysvec_x86_platform_ipi), # ifdef CONFIG_HAVE_KVM - INTG(POSTED_INTR_VECTOR, kvm_posted_intr_ipi), - INTG(POSTED_INTR_WAKEUP_VECTOR, kvm_posted_intr_wakeup_ipi), - INTG(POSTED_INTR_NESTED_VECTOR, kvm_posted_intr_nested_ipi), + INTG(POSTED_INTR_VECTOR, asm_sysvec_kvm_posted_intr_ipi), + INTG(POSTED_INTR_WAKEUP_VECTOR, asm_sysvec_kvm_posted_intr_wakeup_ipi), + INTG(POSTED_INTR_NESTED_VECTOR, asm_sysvec_kvm_posted_intr_nested_ipi), # endif # ifdef CONFIG_IRQ_WORK INTG(IRQ_WORK_VECTOR, asm_sysvec_irq_work), diff --git a/arch/x86/kernel/irq.c b/arch/x86/kernel/irq.c index 7e3005274f83..181060247e3c 100644 --- a/arch/x86/kernel/irq.c +++ b/arch/x86/kernel/irq.c @@ -298,41 +298,29 @@ EXPORT_SYMBOL_GPL(kvm_set_posted_intr_wakeup_handler); /* * Handler for POSTED_INTERRUPT_VECTOR. */ -__visible void smp_kvm_posted_intr_ipi(struct pt_regs *regs) +DEFINE_IDTENTRY_SYSVEC_SIMPLE(sysvec_kvm_posted_intr_ipi) { - struct pt_regs *old_regs = set_irq_regs(regs); - - entering_ack_irq(); + ack_APIC_irq(); inc_irq_stat(kvm_posted_intr_ipis); - exiting_irq(); - set_irq_regs(old_regs); } /* * Handler for POSTED_INTERRUPT_WAKEUP_VECTOR. */ -__visible void smp_kvm_posted_intr_wakeup_ipi(struct pt_regs *regs) +DEFINE_IDTENTRY_SYSVEC(sysvec_kvm_posted_intr_wakeup_ipi) { - struct pt_regs *old_regs = set_irq_regs(regs); - - entering_ack_irq(); + ack_APIC_irq(); inc_irq_stat(kvm_posted_intr_wakeup_ipis); kvm_posted_intr_wakeup_handler(); - exiting_irq(); - set_irq_regs(old_regs); } /* * Handler for POSTED_INTERRUPT_NESTED_VECTOR. */ -__visible void smp_kvm_posted_intr_nested_ipi(struct pt_regs *regs) +DEFINE_IDTENTRY_SYSVEC_SIMPLE(sysvec_kvm_posted_intr_nested_ipi) { - struct pt_regs *old_regs = set_irq_regs(regs); - - entering_ack_irq(); + ack_APIC_irq(); inc_irq_stat(kvm_posted_intr_nested_ipis); - exiting_irq(); - set_irq_regs(old_regs); } #endif -- cgit v1.2.3 From a16be368dd3fb695077cc9bc59c988b548955eec Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Thu, 21 May 2020 22:05:43 +0200 Subject: x86/entry: Convert various hypervisor vectors to IDTENTRY_SYSVEC Convert various hypervisor vectors to IDTENTRY_SYSVEC: - Implement the C entry point with DEFINE_IDTENTRY_SYSVEC - Emit the ASM stub with DECLARE_IDTENTRY_SYSVEC - Remove the ASM idtentries in 64-bit - Remove the BUILD_INTERRUPT entries in 32-bit - Remove the old prototypes No functional change. Signed-off-by: Thomas Gleixner Signed-off-by: Ingo Molnar Acked-by: Andy Lutomirski Reviewed-by: Wei Liu Link: https://lore.kernel.org/r/20200521202119.647997594@linutronix.de --- arch/x86/entry/entry_32.S | 14 -------------- arch/x86/entry/entry_64.S | 17 ----------------- arch/x86/hyperv/hv_init.c | 9 +++------ arch/x86/include/asm/acrn.h | 11 ----------- arch/x86/include/asm/apic.h | 20 -------------------- arch/x86/include/asm/idtentry.h | 10 ++++++++++ arch/x86/include/asm/mshyperv.h | 13 ------------- arch/x86/kernel/cpu/acrn.c | 9 ++++----- arch/x86/kernel/cpu/mshyperv.c | 22 ++++++++++------------ 9 files changed, 27 insertions(+), 98 deletions(-) delete mode 100644 arch/x86/include/asm/acrn.h (limited to 'arch') diff --git a/arch/x86/entry/entry_32.S b/arch/x86/entry/entry_32.S index a8803aa3a07b..bb9ebd2df05e 100644 --- a/arch/x86/entry/entry_32.S +++ b/arch/x86/entry/entry_32.S @@ -1345,20 +1345,6 @@ BUILD_INTERRUPT3(xen_hvm_callback_vector, HYPERVISOR_CALLBACK_VECTOR, xen_evtchn_do_upcall) #endif - -#if IS_ENABLED(CONFIG_HYPERV) - -BUILD_INTERRUPT3(hyperv_callback_vector, HYPERVISOR_CALLBACK_VECTOR, - hyperv_vector_handler) - -BUILD_INTERRUPT3(hyperv_reenlightenment_vector, HYPERV_REENLIGHTENMENT_VECTOR, - hyperv_reenlightenment_intr) - -BUILD_INTERRUPT3(hv_stimer0_callback_vector, HYPERV_STIMER0_VECTOR, - hv_stimer0_vector_handler) - -#endif /* CONFIG_HYPERV */ - SYM_CODE_START_LOCAL_NOALIGN(handle_exception) /* the function address is in %gs's slot on the stack */ SAVE_ALL switch_stacks=1 skip_gs=1 unwind_espfix=1 diff --git a/arch/x86/entry/entry_64.S b/arch/x86/entry/entry_64.S index ea1f2930876c..b97fcda28019 100644 --- a/arch/x86/entry/entry_64.S +++ b/arch/x86/entry/entry_64.S @@ -1116,23 +1116,6 @@ apicinterrupt3 HYPERVISOR_CALLBACK_VECTOR \ xen_hvm_callback_vector xen_evtchn_do_upcall #endif - -#if IS_ENABLED(CONFIG_HYPERV) -apicinterrupt3 HYPERVISOR_CALLBACK_VECTOR \ - hyperv_callback_vector hyperv_vector_handler - -apicinterrupt3 HYPERV_REENLIGHTENMENT_VECTOR \ - hyperv_reenlightenment_vector hyperv_reenlightenment_intr - -apicinterrupt3 HYPERV_STIMER0_VECTOR \ - hv_stimer0_callback_vector hv_stimer0_vector_handler -#endif /* CONFIG_HYPERV */ - -#if IS_ENABLED(CONFIG_ACRN_GUEST) -apicinterrupt3 HYPERVISOR_CALLBACK_VECTOR \ - acrn_hv_callback_vector acrn_hv_vector_handler -#endif - /* * Save all registers in pt_regs, and switch gs if needed. * Use slow, but surefire "are we in kernel?" check. diff --git a/arch/x86/hyperv/hv_init.c b/arch/x86/hyperv/hv_init.c index e2137070386a..a54c6a401581 100644 --- a/arch/x86/hyperv/hv_init.c +++ b/arch/x86/hyperv/hv_init.c @@ -15,6 +15,7 @@ #include #include #include +#include #include #include #include @@ -152,15 +153,11 @@ static inline bool hv_reenlightenment_available(void) ms_hyperv.features & HV_X64_ACCESS_REENLIGHTENMENT; } -__visible void __irq_entry hyperv_reenlightenment_intr(struct pt_regs *regs) +DEFINE_IDTENTRY_SYSVEC(sysvec_hyperv_reenlightenment) { - entering_ack_irq(); - + ack_APIC_irq(); inc_irq_stat(irq_hv_reenlightenment_count); - schedule_delayed_work(&hv_reenlightenment_work, HZ/10); - - exiting_irq(); } void set_hv_tscchange_cb(void (*cb)(void)) diff --git a/arch/x86/include/asm/acrn.h b/arch/x86/include/asm/acrn.h deleted file mode 100644 index 4adb13f08af7..000000000000 --- a/arch/x86/include/asm/acrn.h +++ /dev/null @@ -1,11 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -#ifndef _ASM_X86_ACRN_H -#define _ASM_X86_ACRN_H - -extern void acrn_hv_callback_vector(void); -#ifdef CONFIG_TRACING -#define trace_acrn_hv_callback_vector acrn_hv_callback_vector -#endif - -extern void acrn_hv_vector_handler(struct pt_regs *regs); -#endif /* _ASM_X86_ACRN_H */ diff --git a/arch/x86/include/asm/apic.h b/arch/x86/include/asm/apic.h index a5416865b6fa..2cc44e957c31 100644 --- a/arch/x86/include/asm/apic.h +++ b/arch/x86/include/asm/apic.h @@ -519,26 +519,6 @@ static inline bool apic_id_is_primary_thread(unsigned int id) { return false; } static inline void apic_smt_update(void) { } #endif -extern void irq_enter(void); -extern void irq_exit(void); - -static inline void entering_irq(void) -{ - irq_enter(); - kvm_set_cpu_l1tf_flush_l1d(); -} - -static inline void entering_ack_irq(void) -{ - entering_irq(); - ack_APIC_irq(); -} - -static inline void exiting_irq(void) -{ - irq_exit(); -} - extern void ioapic_zap_locks(void); #endif /* _ASM_X86_APIC_H */ diff --git a/arch/x86/include/asm/idtentry.h b/arch/x86/include/asm/idtentry.h index 7f7c80bd272c..1b6d3ea1fc96 100644 --- a/arch/x86/include/asm/idtentry.h +++ b/arch/x86/include/asm/idtentry.h @@ -634,6 +634,16 @@ DECLARE_IDTENTRY_SYSVEC(POSTED_INTR_WAKEUP_VECTOR, sysvec_kvm_posted_intr_wakeup DECLARE_IDTENTRY_SYSVEC(POSTED_INTR_NESTED_VECTOR, sysvec_kvm_posted_intr_nested_ipi); #endif +#if IS_ENABLED(CONFIG_HYPERV) +DECLARE_IDTENTRY_SYSVEC(HYPERVISOR_CALLBACK_VECTOR, sysvec_hyperv_callback); +DECLARE_IDTENTRY_SYSVEC(HYPERVISOR_REENLIGHTENMENT_VECTOR, sysvec_hyperv_reenlightenment); +DECLARE_IDTENTRY_SYSVEC(HYPERVISOR_STIMER0_VECTOR, sysvec_hyperv_stimer0); +#endif + +#if IS_ENABLED(CONFIG_ACRN_GUEST) +DECLARE_IDTENTRY_SYSVEC(HYPERVISOR_CALLBACK_VECTOR, sysvec_acrn_hv_callback); +#endif + #undef X86_TRAP_OTHER #endif diff --git a/arch/x86/include/asm/mshyperv.h b/arch/x86/include/asm/mshyperv.h index d30805ed323e..60b944dd2df1 100644 --- a/arch/x86/include/asm/mshyperv.h +++ b/arch/x86/include/asm/mshyperv.h @@ -54,20 +54,8 @@ typedef int (*hyperv_fill_flush_list_func)( vclocks_set_used(VDSO_CLOCKMODE_HVCLOCK); #define hv_get_raw_timer() rdtsc_ordered() -void hyperv_callback_vector(void); -void hyperv_reenlightenment_vector(void); -#ifdef CONFIG_TRACING -#define trace_hyperv_callback_vector hyperv_callback_vector -#endif void hyperv_vector_handler(struct pt_regs *regs); -/* - * Routines for stimer0 Direct Mode handling. - * On x86/x64, there are no percpu actions to take. - */ -void hv_stimer0_vector_handler(struct pt_regs *regs); -void hv_stimer0_callback_vector(void); - static inline void hv_enable_stimer0_percpu_irq(int irq) {} static inline void hv_disable_stimer0_percpu_irq(int irq) {} @@ -226,7 +214,6 @@ void hyperv_setup_mmu_ops(void); void *hv_alloc_hyperv_page(void); void *hv_alloc_hyperv_zeroed_page(void); void hv_free_hyperv_page(unsigned long addr); -void hyperv_reenlightenment_intr(struct pt_regs *regs); void set_hv_tscchange_cb(void (*cb)(void)); void clear_hv_tscchange_cb(void); void hyperv_stop_tsc_emulation(void); diff --git a/arch/x86/kernel/cpu/acrn.c b/arch/x86/kernel/cpu/acrn.c index 676022e71791..1da9b1c9a2db 100644 --- a/arch/x86/kernel/cpu/acrn.c +++ b/arch/x86/kernel/cpu/acrn.c @@ -10,10 +10,10 @@ */ #include -#include #include #include #include +#include #include static uint32_t __init acrn_detect(void) @@ -24,7 +24,7 @@ static uint32_t __init acrn_detect(void) static void __init acrn_init_platform(void) { /* Setup the IDT for ACRN hypervisor callback */ - alloc_intr_gate(HYPERVISOR_CALLBACK_VECTOR, acrn_hv_callback_vector); + alloc_intr_gate(HYPERVISOR_CALLBACK_VECTOR, asm_sysvec_acrn_hv_callback); } static bool acrn_x2apic_available(void) @@ -39,7 +39,7 @@ static bool acrn_x2apic_available(void) static void (*acrn_intr_handler)(void); -__visible void __irq_entry acrn_hv_vector_handler(struct pt_regs *regs) +DEFINE_IDTENTRY_SYSVEC(sysvec_acrn_hv_callback) { struct pt_regs *old_regs = set_irq_regs(regs); @@ -50,13 +50,12 @@ __visible void __irq_entry acrn_hv_vector_handler(struct pt_regs *regs) * will block the interrupt whose vector is lower than * HYPERVISOR_CALLBACK_VECTOR. */ - entering_ack_irq(); + ack_APIC_irq(); inc_irq_stat(irq_hv_callback_count); if (acrn_intr_handler) acrn_intr_handler(); - exiting_irq(); set_irq_regs(old_regs); } diff --git a/arch/x86/kernel/cpu/mshyperv.c b/arch/x86/kernel/cpu/mshyperv.c index ebf34c7bc8bc..af94f05a5c66 100644 --- a/arch/x86/kernel/cpu/mshyperv.c +++ b/arch/x86/kernel/cpu/mshyperv.c @@ -23,6 +23,7 @@ #include #include #include +#include #include #include #include @@ -40,11 +41,10 @@ static void (*hv_stimer0_handler)(void); static void (*hv_kexec_handler)(void); static void (*hv_crash_handler)(struct pt_regs *regs); -__visible void __irq_entry hyperv_vector_handler(struct pt_regs *regs) +DEFINE_IDTENTRY_SYSVEC(sysvec_hyperv_callback) { struct pt_regs *old_regs = set_irq_regs(regs); - entering_irq(); inc_irq_stat(irq_hv_callback_count); if (vmbus_handler) vmbus_handler(); @@ -52,7 +52,6 @@ __visible void __irq_entry hyperv_vector_handler(struct pt_regs *regs) if (ms_hyperv.hints & HV_DEPRECATING_AEOI_RECOMMENDED) ack_APIC_irq(); - exiting_irq(); set_irq_regs(old_regs); } @@ -73,19 +72,16 @@ EXPORT_SYMBOL_GPL(hv_remove_vmbus_irq); * Routines to do per-architecture handling of stimer0 * interrupts when in Direct Mode */ - -__visible void __irq_entry hv_stimer0_vector_handler(struct pt_regs *regs) +DEFINE_IDTENTRY_SYSVEC(sysvec_hyperv_stimer0) { struct pt_regs *old_regs = set_irq_regs(regs); - entering_irq(); inc_irq_stat(hyperv_stimer0_count); if (hv_stimer0_handler) hv_stimer0_handler(); add_interrupt_randomness(HYPERV_STIMER0_VECTOR, 0); ack_APIC_irq(); - exiting_irq(); set_irq_regs(old_regs); } @@ -331,17 +327,19 @@ static void __init ms_hyperv_init_platform(void) x86_platform.apic_post_init = hyperv_init; hyperv_setup_mmu_ops(); /* Setup the IDT for hypervisor callback */ - alloc_intr_gate(HYPERVISOR_CALLBACK_VECTOR, hyperv_callback_vector); + alloc_intr_gate(HYPERVISOR_CALLBACK_VECTOR, asm_sysvec_hyperv_callback); /* Setup the IDT for reenlightenment notifications */ - if (ms_hyperv.features & HV_X64_ACCESS_REENLIGHTENMENT) + if (ms_hyperv.features & HV_X64_ACCESS_REENLIGHTENMENT) { alloc_intr_gate(HYPERV_REENLIGHTENMENT_VECTOR, - hyperv_reenlightenment_vector); + asm_sysvec_hyperv_reenlightenment); + } /* Setup the IDT for stimer0 */ - if (ms_hyperv.misc_features & HV_STIMER_DIRECT_MODE_AVAILABLE) + if (ms_hyperv.misc_features & HV_STIMER_DIRECT_MODE_AVAILABLE) { alloc_intr_gate(HYPERV_STIMER0_VECTOR, - hv_stimer0_callback_vector); + asm_sysvec_hyperv_stimer0); + } # ifdef CONFIG_SMP smp_ops.smp_prepare_boot_cpu = hv_smp_prepare_boot_cpu; -- cgit v1.2.3 From cb09ea2924cbf1a42da59bd30a59cc1836240bcb Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Thu, 21 May 2020 22:05:44 +0200 Subject: x86/entry: Convert XEN hypercall vector to IDTENTRY_SYSVEC Convert the last oldstyle defined vector to IDTENTRY_SYSVEC: - Implement the C entry point with DEFINE_IDTENTRY_SYSVEC - Emit the ASM stub with DECLARE_IDTENTRY_SYSVEC - Remove the ASM idtentries in 64-bit - Remove the BUILD_INTERRUPT entries in 32-bit - Remove the old prototypes Fixup the related XEN code by providing the primary C entry point in x86 to avoid cluttering the generic code with X86'isms. No functional change. Signed-off-by: Thomas Gleixner Signed-off-by: Ingo Molnar Acked-by: Andy Lutomirski Link: https://lore.kernel.org/r/20200521202119.741950104@linutronix.de --- arch/x86/entry/entry_32.S | 5 ----- arch/x86/entry/entry_64.S | 5 ----- arch/x86/include/asm/idtentry.h | 4 ++++ arch/x86/xen/enlighten_hvm.c | 12 ++++++++++++ drivers/xen/events/events_base.c | 6 ++---- include/xen/events.h | 7 ------- 6 files changed, 18 insertions(+), 21 deletions(-) (limited to 'arch') diff --git a/arch/x86/entry/entry_32.S b/arch/x86/entry/entry_32.S index bb9ebd2df05e..f8e8aeb10ba4 100644 --- a/arch/x86/entry/entry_32.S +++ b/arch/x86/entry/entry_32.S @@ -1340,11 +1340,6 @@ SYM_FUNC_START(xen_failsafe_callback) SYM_FUNC_END(xen_failsafe_callback) #endif /* CONFIG_XEN_PV */ -#ifdef CONFIG_XEN_PVHVM -BUILD_INTERRUPT3(xen_hvm_callback_vector, HYPERVISOR_CALLBACK_VECTOR, - xen_evtchn_do_upcall) -#endif - SYM_CODE_START_LOCAL_NOALIGN(handle_exception) /* the function address is in %gs's slot on the stack */ SAVE_ALL switch_stacks=1 skip_gs=1 unwind_espfix=1 diff --git a/arch/x86/entry/entry_64.S b/arch/x86/entry/entry_64.S index b97fcda28019..fd7efb8deded 100644 --- a/arch/x86/entry/entry_64.S +++ b/arch/x86/entry/entry_64.S @@ -1111,11 +1111,6 @@ SYM_CODE_START(xen_failsafe_callback) SYM_CODE_END(xen_failsafe_callback) #endif /* CONFIG_XEN_PV */ -#ifdef CONFIG_XEN_PVHVM -apicinterrupt3 HYPERVISOR_CALLBACK_VECTOR \ - xen_hvm_callback_vector xen_evtchn_do_upcall -#endif - /* * Save all registers in pt_regs, and switch gs if needed. * Use slow, but surefire "are we in kernel?" check. diff --git a/arch/x86/include/asm/idtentry.h b/arch/x86/include/asm/idtentry.h index 1b6d3ea1fc96..71cf82bf24ba 100644 --- a/arch/x86/include/asm/idtentry.h +++ b/arch/x86/include/asm/idtentry.h @@ -644,6 +644,10 @@ DECLARE_IDTENTRY_SYSVEC(HYPERVISOR_STIMER0_VECTOR, sysvec_hyperv_stimer0); DECLARE_IDTENTRY_SYSVEC(HYPERVISOR_CALLBACK_VECTOR, sysvec_acrn_hv_callback); #endif +#ifdef CONFIG_XEN_PVHVM +DECLARE_IDTENTRY_SYSVEC(HYPERVISOR_CALLBACK_VECTOR, sysvec_xen_hvm_callback); +#endif + #undef X86_TRAP_OTHER #endif diff --git a/arch/x86/xen/enlighten_hvm.c b/arch/x86/xen/enlighten_hvm.c index e138f7de52d2..3e89b0067ff0 100644 --- a/arch/x86/xen/enlighten_hvm.c +++ b/arch/x86/xen/enlighten_hvm.c @@ -13,6 +13,7 @@ #include #include #include +#include #include #include #include @@ -118,6 +119,17 @@ static void __init init_hvm_pv_info(void) this_cpu_write(xen_vcpu_id, smp_processor_id()); } +DEFINE_IDTENTRY_SYSVEC(sysvec_xen_hvm_callback) +{ + struct pt_regs *old_regs = set_irq_regs(regs); + + inc_irq_stat(irq_hv_callback_count); + + xen_hvm_evtchn_do_upcall(); + + set_irq_regs(old_regs); +} + #ifdef CONFIG_KEXEC_CORE static void xen_hvm_shutdown(void) { diff --git a/drivers/xen/events/events_base.c b/drivers/xen/events/events_base.c index eb35c3cda9a6..140c7bf33a98 100644 --- a/drivers/xen/events/events_base.c +++ b/drivers/xen/events/events_base.c @@ -37,6 +37,7 @@ #ifdef CONFIG_X86 #include #include +#include #include #include #include @@ -1236,9 +1237,6 @@ void xen_evtchn_do_upcall(struct pt_regs *regs) struct pt_regs *old_regs = set_irq_regs(regs); irq_enter(); -#ifdef CONFIG_X86 - inc_irq_stat(irq_hv_callback_count); -#endif __xen_evtchn_do_upcall(); @@ -1658,7 +1656,7 @@ static __init void xen_alloc_callback_vector(void) return; pr_info("Xen HVM callback vector for event delivery is enabled\n"); - alloc_intr_gate(HYPERVISOR_CALLBACK_VECTOR, xen_hvm_callback_vector); + alloc_intr_gate(HYPERVISOR_CALLBACK_VECTOR, asm_sysvec_xen_hvm_callback); } #else void xen_setup_callback_vector(void) {} diff --git a/include/xen/events.h b/include/xen/events.h index 12b0dcb6a120..df1e6391f63f 100644 --- a/include/xen/events.h +++ b/include/xen/events.h @@ -90,13 +90,6 @@ unsigned int irq_from_evtchn(evtchn_port_t evtchn); int irq_from_virq(unsigned int cpu, unsigned int virq); evtchn_port_t evtchn_from_irq(unsigned irq); -#ifdef CONFIG_XEN_PVHVM -/* Xen HVM evtchn vector callback */ -void xen_hvm_callback_vector(void); -#ifdef CONFIG_TRACING -#define trace_xen_hvm_callback_vector xen_hvm_callback_vector -#endif -#endif int xen_set_callback_via(uint64_t via); void xen_evtchn_do_upcall(struct pt_regs *regs); void xen_hvm_evtchn_do_upcall(void); -- cgit v1.2.3 From 13cad9851ef1d004640991d45227dd35c08f45fc Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Thu, 21 May 2020 22:05:45 +0200 Subject: x86/entry: Convert reschedule interrupt to IDTENTRY_SYSVEC_SIMPLE The scheduler IPI does not need the full interrupt entry handling logic when the entry is from kernel mode. Use IDTENTRY_SYSVEC_SIMPLE and spare all the overhead. Signed-off-by: Thomas Gleixner Signed-off-by: Ingo Molnar Acked-by: Andy Lutomirski Link: https://lore.kernel.org/r/20200521202119.835425642@linutronix.de --- arch/x86/entry/entry_64.S | 4 ---- arch/x86/include/asm/entry_arch.h | 3 --- arch/x86/include/asm/hw_irq.h | 3 --- arch/x86/include/asm/idtentry.h | 1 + arch/x86/include/asm/trace/common.h | 4 ---- arch/x86/include/asm/trace/irq_vectors.h | 17 +---------------- arch/x86/kernel/idt.c | 2 +- arch/x86/kernel/smp.c | 19 ++++--------------- arch/x86/kernel/tracepoint.c | 17 ----------------- 9 files changed, 7 insertions(+), 63 deletions(-) (limited to 'arch') diff --git a/arch/x86/entry/entry_64.S b/arch/x86/entry/entry_64.S index fd7efb8deded..9c0722e725ef 100644 --- a/arch/x86/entry/entry_64.S +++ b/arch/x86/entry/entry_64.S @@ -956,10 +956,6 @@ apicinterrupt3 \num \sym \do_sym POP_SECTION_IRQENTRY .endm -#ifdef CONFIG_SMP -apicinterrupt RESCHEDULE_VECTOR reschedule_interrupt smp_reschedule_interrupt -#endif - /* * Reload gs selector with exception handling * edi: new selector diff --git a/arch/x86/include/asm/entry_arch.h b/arch/x86/include/asm/entry_arch.h index a01bb74244ac..3e841ed5c17a 100644 --- a/arch/x86/include/asm/entry_arch.h +++ b/arch/x86/include/asm/entry_arch.h @@ -10,6 +10,3 @@ * is no hardware IRQ pin equivalent for them, they are triggered * through the ICC by us (IPIs) */ -#ifdef CONFIG_SMP -BUILD_INTERRUPT(reschedule_interrupt,RESCHEDULE_VECTOR) -#endif diff --git a/arch/x86/include/asm/hw_irq.h b/arch/x86/include/asm/hw_irq.h index fd5e7c8825e1..74c12437401e 100644 --- a/arch/x86/include/asm/hw_irq.h +++ b/arch/x86/include/asm/hw_irq.h @@ -28,9 +28,6 @@ #include #include -/* Interrupt handlers registered during init_IRQ */ -extern asmlinkage void reschedule_interrupt(void); - #ifdef CONFIG_X86_LOCAL_APIC struct irq_data; struct pci_dev; diff --git a/arch/x86/include/asm/idtentry.h b/arch/x86/include/asm/idtentry.h index 71cf82bf24ba..38b672ded40b 100644 --- a/arch/x86/include/asm/idtentry.h +++ b/arch/x86/include/asm/idtentry.h @@ -600,6 +600,7 @@ DECLARE_IDTENTRY_SYSVEC(X86_PLATFORM_IPI_VECTOR, sysvec_x86_platform_ipi); #endif #ifdef CONFIG_SMP +DECLARE_IDTENTRY(RESCHEDULE_VECTOR, sysvec_reschedule_ipi); DECLARE_IDTENTRY_SYSVEC(IRQ_MOVE_CLEANUP_VECTOR, sysvec_irq_move_cleanup); DECLARE_IDTENTRY_SYSVEC(REBOOT_VECTOR, sysvec_reboot); DECLARE_IDTENTRY_SYSVEC(CALL_FUNCTION_SINGLE_VECTOR, sysvec_call_function_single); diff --git a/arch/x86/include/asm/trace/common.h b/arch/x86/include/asm/trace/common.h index 57c8da027d99..f0f9bcdb74d9 100644 --- a/arch/x86/include/asm/trace/common.h +++ b/arch/x86/include/asm/trace/common.h @@ -5,12 +5,8 @@ DECLARE_STATIC_KEY_FALSE(trace_pagefault_key); #define trace_pagefault_enabled() \ static_branch_unlikely(&trace_pagefault_key) -DECLARE_STATIC_KEY_FALSE(trace_resched_ipi_key); -#define trace_resched_ipi_enabled() \ - static_branch_unlikely(&trace_resched_ipi_key) #else static inline bool trace_pagefault_enabled(void) { return false; } -static inline bool trace_resched_ipi_enabled(void) { return false; } #endif #endif diff --git a/arch/x86/include/asm/trace/irq_vectors.h b/arch/x86/include/asm/trace/irq_vectors.h index 33b9d0f0aafe..88e7f0f3bf62 100644 --- a/arch/x86/include/asm/trace/irq_vectors.h +++ b/arch/x86/include/asm/trace/irq_vectors.h @@ -10,9 +10,6 @@ #ifdef CONFIG_X86_LOCAL_APIC -extern int trace_resched_ipi_reg(void); -extern void trace_resched_ipi_unreg(void); - DECLARE_EVENT_CLASS(x86_irq_vector, TP_PROTO(int vector), @@ -37,18 +34,6 @@ DEFINE_EVENT_FN(x86_irq_vector, name##_exit, \ TP_PROTO(int vector), \ TP_ARGS(vector), NULL, NULL); -#define DEFINE_RESCHED_IPI_EVENT(name) \ -DEFINE_EVENT_FN(x86_irq_vector, name##_entry, \ - TP_PROTO(int vector), \ - TP_ARGS(vector), \ - trace_resched_ipi_reg, \ - trace_resched_ipi_unreg); \ -DEFINE_EVENT_FN(x86_irq_vector, name##_exit, \ - TP_PROTO(int vector), \ - TP_ARGS(vector), \ - trace_resched_ipi_reg, \ - trace_resched_ipi_unreg); - /* * local_timer - called when entering/exiting a local timer interrupt * vector handler @@ -99,7 +84,7 @@ TRACE_EVENT_PERF_PERM(irq_work_exit, is_sampling_event(p_event) ? -EPERM : 0); /* * reschedule - called when entering/exiting a reschedule vector handler */ -DEFINE_RESCHED_IPI_EVENT(reschedule); +DEFINE_IRQ_VECTOR_EVENT(reschedule); /* * call_function - called when entering/exiting a call function interrupt diff --git a/arch/x86/kernel/idt.c b/arch/x86/kernel/idt.c index faaadd430882..bc9b0d1d7bb8 100644 --- a/arch/x86/kernel/idt.c +++ b/arch/x86/kernel/idt.c @@ -112,7 +112,7 @@ static const __initconst struct idt_data def_idts[] = { */ static const __initconst struct idt_data apic_idts[] = { #ifdef CONFIG_SMP - INTG(RESCHEDULE_VECTOR, reschedule_interrupt), + INTG(RESCHEDULE_VECTOR, asm_sysvec_reschedule_ipi), INTG(CALL_FUNCTION_VECTOR, asm_sysvec_call_function), INTG(CALL_FUNCTION_SINGLE_VECTOR, asm_sysvec_call_function_single), INTG(IRQ_MOVE_CLEANUP_VECTOR, asm_sysvec_irq_move_cleanup), diff --git a/arch/x86/kernel/smp.c b/arch/x86/kernel/smp.c index e5647daa7e96..eff4ce3b10da 100644 --- a/arch/x86/kernel/smp.c +++ b/arch/x86/kernel/smp.c @@ -220,26 +220,15 @@ static void native_stop_other_cpus(int wait) /* * Reschedule call back. KVM uses this interrupt to force a cpu out of - * guest mode + * guest mode. */ -__visible void __irq_entry smp_reschedule_interrupt(struct pt_regs *regs) +DEFINE_IDTENTRY_SYSVEC_SIMPLE(sysvec_reschedule_ipi) { ack_APIC_irq(); + trace_reschedule_entry(RESCHEDULE_VECTOR); inc_irq_stat(irq_resched_count); - - if (trace_resched_ipi_enabled()) { - /* - * scheduler_ipi() might call irq_enter() as well, but - * nested calls are fine. - */ - irq_enter(); - trace_reschedule_entry(RESCHEDULE_VECTOR); - scheduler_ipi(); - trace_reschedule_exit(RESCHEDULE_VECTOR); - irq_exit(); - return; - } scheduler_ipi(); + trace_reschedule_exit(RESCHEDULE_VECTOR); } DEFINE_IDTENTRY_SYSVEC(sysvec_call_function) diff --git a/arch/x86/kernel/tracepoint.c b/arch/x86/kernel/tracepoint.c index 496748ed266a..fcfc077afe2d 100644 --- a/arch/x86/kernel/tracepoint.c +++ b/arch/x86/kernel/tracepoint.c @@ -25,20 +25,3 @@ void trace_pagefault_unreg(void) { static_branch_dec(&trace_pagefault_key); } - -#ifdef CONFIG_SMP - -DEFINE_STATIC_KEY_FALSE(trace_resched_ipi_key); - -int trace_resched_ipi_reg(void) -{ - static_branch_inc(&trace_resched_ipi_key); - return 0; -} - -void trace_resched_ipi_unreg(void) -{ - static_branch_dec(&trace_resched_ipi_key); -} - -#endif -- cgit v1.2.3 From 75da04f7f3cb416a68475e040175dc013da32de2 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Thu, 21 May 2020 22:05:46 +0200 Subject: x86/entry: Remove the apic/BUILD interrupt leftovers Remove all the code which was there to emit the system vector stubs. All users are gone. Move the now unused GET_CR2_INTO macro muck to head_64.S where the last user is. Fixup the eye hurting comment there while at it. Signed-off-by: Thomas Gleixner Signed-off-by: Ingo Molnar Acked-by: Andy Lutomirski Link: https://lore.kernel.org/r/20200521202119.927433002@linutronix.de --- arch/x86/entry/calling.h | 20 ------ arch/x86/entry/entry_32.S | 18 ----- arch/x86/entry/entry_64.S | 143 -------------------------------------- arch/x86/include/asm/entry_arch.h | 12 ---- arch/x86/kernel/head_64.S | 7 +- 5 files changed, 4 insertions(+), 196 deletions(-) delete mode 100644 arch/x86/include/asm/entry_arch.h (limited to 'arch') diff --git a/arch/x86/entry/calling.h b/arch/x86/entry/calling.h index 98da0d3c0b1a..4208c1e3f601 100644 --- a/arch/x86/entry/calling.h +++ b/arch/x86/entry/calling.h @@ -351,23 +351,3 @@ For 32-bit we have the following conventions - kernel is built with call stackleak_erase #endif .endm - -/* - * This does 'call enter_from_user_mode' unless we can avoid it based on - * kernel config or using the static jump infrastructure. - */ -.macro CALL_enter_from_user_mode -#ifdef CONFIG_CONTEXT_TRACKING -#ifdef CONFIG_JUMP_LABEL - STATIC_JUMP_IF_FALSE .Lafter_call_\@, context_tracking_key, def=0 -#endif - call enter_from_user_mode -.Lafter_call_\@: -#endif -.endm - -#ifdef CONFIG_PARAVIRT_XXL -#define GET_CR2_INTO(reg) GET_CR2_INTO_AX ; _ASM_MOV %_ASM_AX, reg -#else -#define GET_CR2_INTO(reg) _ASM_MOV %cr2, reg -#endif diff --git a/arch/x86/entry/entry_32.S b/arch/x86/entry/entry_32.S index f8e8aeb10ba4..c8f176c88a3c 100644 --- a/arch/x86/entry/entry_32.S +++ b/arch/x86/entry/entry_32.S @@ -1233,24 +1233,6 @@ SYM_FUNC_END(entry_INT80_32) #endif .endm -#define BUILD_INTERRUPT3(name, nr, fn) \ -SYM_FUNC_START(name) \ - ASM_CLAC; \ - pushl $~(nr); \ - SAVE_ALL switch_stacks=1; \ - ENCODE_FRAME_POINTER; \ - TRACE_IRQS_OFF \ - movl %esp, %eax; \ - call fn; \ - jmp ret_from_intr; \ -SYM_FUNC_END(name) - -#define BUILD_INTERRUPT(name, nr) \ - BUILD_INTERRUPT3(name, nr, smp_##name); \ - -/* The include is where all of the SMP etc. interrupts come from */ -#include - #ifdef CONFIG_PARAVIRT SYM_CODE_START(native_iret) iret diff --git a/arch/x86/entry/entry_64.S b/arch/x86/entry/entry_64.S index 9c0722e725ef..389f97faee45 100644 --- a/arch/x86/entry/entry_64.S +++ b/arch/x86/entry/entry_64.S @@ -658,108 +658,7 @@ SYM_CODE_END(\asmsym) */ #include -/* - * Interrupt entry helper function. - * - * Entry runs with interrupts off. Stack layout at entry: - * +----------------------------------------------------+ - * | regs->ss | - * | regs->rsp | - * | regs->eflags | - * | regs->cs | - * | regs->ip | - * +----------------------------------------------------+ - * | regs->orig_ax = ~(interrupt number) | - * +----------------------------------------------------+ - * | return address | - * +----------------------------------------------------+ - */ -SYM_CODE_START(interrupt_entry) - UNWIND_HINT_IRET_REGS offset=16 - ASM_CLAC - cld - - testb $3, CS-ORIG_RAX+8(%rsp) - jz 1f - SWAPGS - FENCE_SWAPGS_USER_ENTRY - /* - * Switch to the thread stack. The IRET frame and orig_ax are - * on the stack, as well as the return address. RDI..R12 are - * not (yet) on the stack and space has not (yet) been - * allocated for them. - */ - pushq %rdi - - /* Need to switch before accessing the thread stack. */ - SWITCH_TO_KERNEL_CR3 scratch_reg=%rdi - movq %rsp, %rdi - movq PER_CPU_VAR(cpu_current_top_of_stack), %rsp - - /* - * We have RDI, return address, and orig_ax on the stack on - * top of the IRET frame. That means offset=24 - */ - UNWIND_HINT_IRET_REGS base=%rdi offset=24 - - pushq 7*8(%rdi) /* regs->ss */ - pushq 6*8(%rdi) /* regs->rsp */ - pushq 5*8(%rdi) /* regs->eflags */ - pushq 4*8(%rdi) /* regs->cs */ - pushq 3*8(%rdi) /* regs->ip */ - UNWIND_HINT_IRET_REGS - pushq 2*8(%rdi) /* regs->orig_ax */ - pushq 8(%rdi) /* return address */ - - movq (%rdi), %rdi - jmp 2f -1: - FENCE_SWAPGS_KERNEL_ENTRY -2: - PUSH_AND_CLEAR_REGS save_ret=1 - ENCODE_FRAME_POINTER 8 - - testb $3, CS+8(%rsp) - jz 1f - - /* - * IRQ from user mode. - * - * We need to tell lockdep that IRQs are off. We can't do this until - * we fix gsbase, and we should do it before enter_from_user_mode - * (which can take locks). Since TRACE_IRQS_OFF is idempotent, - * the simplest way to handle it is to just call it twice if - * we enter from user mode. There's no reason to optimize this since - * TRACE_IRQS_OFF is a no-op if lockdep is off. - */ - TRACE_IRQS_OFF - - CALL_enter_from_user_mode - -1: - ENTER_IRQ_STACK old_rsp=%rdi save_ret=1 - /* We entered an interrupt context - irqs are off: */ - TRACE_IRQS_OFF - - ret -SYM_CODE_END(interrupt_entry) -_ASM_NOKPROBE(interrupt_entry) - SYM_CODE_START_LOCAL(common_interrupt_return) -ret_from_intr: - DISABLE_INTERRUPTS(CLBR_ANY) - TRACE_IRQS_OFF - - LEAVE_IRQ_STACK - - testb $3, CS(%rsp) - jz retint_kernel - - /* Interrupt came from user space */ -.Lretint_user: - mov %rsp,%rdi - call prepare_exit_to_usermode - SYM_INNER_LABEL(swapgs_restore_regs_and_return_to_usermode, SYM_L_GLOBAL) #ifdef CONFIG_DEBUG_ENTRY /* Assert that pt_regs indicates user mode. */ @@ -802,23 +701,6 @@ SYM_INNER_LABEL(swapgs_restore_regs_and_return_to_usermode, SYM_L_GLOBAL) INTERRUPT_RETURN -/* Returning to kernel space */ -retint_kernel: -#ifdef CONFIG_PREEMPTION - /* Interrupts are off */ - /* Check if we need preemption */ - btl $9, EFLAGS(%rsp) /* were interrupts off? */ - jnc 1f - cmpl $0, PER_CPU_VAR(__preempt_count) - jnz 1f - call preempt_schedule_irq -1: -#endif - /* - * The iretq could re-enable interrupts: - */ - TRACE_IRQS_IRETQ - SYM_INNER_LABEL(restore_regs_and_return_to_kernel, SYM_L_GLOBAL) #ifdef CONFIG_DEBUG_ENTRY /* Assert that pt_regs indicates kernel mode. */ @@ -931,31 +813,6 @@ native_irq_return_ldt: SYM_CODE_END(common_interrupt_return) _ASM_NOKPROBE(common_interrupt_return) -/* - * APIC interrupts. - */ -.macro apicinterrupt3 num sym do_sym -SYM_CODE_START(\sym) - UNWIND_HINT_IRET_REGS - pushq $~(\num) - call interrupt_entry - UNWIND_HINT_REGS indirect=1 - call \do_sym /* rdi points to pt_regs */ - jmp ret_from_intr -SYM_CODE_END(\sym) -_ASM_NOKPROBE(\sym) -.endm - -/* Make sure APIC interrupt handlers end up in the irqentry section: */ -#define PUSH_SECTION_IRQENTRY .pushsection .irqentry.text, "ax" -#define POP_SECTION_IRQENTRY .popsection - -.macro apicinterrupt num sym do_sym -PUSH_SECTION_IRQENTRY -apicinterrupt3 \num \sym \do_sym -POP_SECTION_IRQENTRY -.endm - /* * Reload gs selector with exception handling * edi: new selector diff --git a/arch/x86/include/asm/entry_arch.h b/arch/x86/include/asm/entry_arch.h deleted file mode 100644 index 3e841ed5c17a..000000000000 --- a/arch/x86/include/asm/entry_arch.h +++ /dev/null @@ -1,12 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -/* - * This file is designed to contain the BUILD_INTERRUPT specifications for - * all of the extra named interrupt vectors used by the architecture. - * Usually this is the Inter Process Interrupts (IPIs) - */ - -/* - * The following vectors are part of the Linux architecture, there - * is no hardware IRQ pin equivalent for them, they are triggered - * through the ICC by us (IPIs) - */ diff --git a/arch/x86/kernel/head_64.S b/arch/x86/kernel/head_64.S index 4fc33fdf0f16..16da4ac01597 100644 --- a/arch/x86/kernel/head_64.S +++ b/arch/x86/kernel/head_64.S @@ -29,15 +29,16 @@ #ifdef CONFIG_PARAVIRT_XXL #include #include +#define GET_CR2_INTO(reg) GET_CR2_INTO_AX ; _ASM_MOV %_ASM_AX, reg #else #define INTERRUPT_RETURN iretq +#define GET_CR2_INTO(reg) _ASM_MOV %cr2, reg #endif -/* we are not able to switch in one step to the final KERNEL ADDRESS SPACE +/* + * We are not able to switch in one step to the final KERNEL ADDRESS SPACE * because we need identity-mapped pages. - * */ - #define l4_index(x) (((x) >> 39) & 511) #define pud_index(x) (((x) >> PUD_SHIFT) & (PTRS_PER_PUD-1)) -- cgit v1.2.3 From e3e5c64ea1f5a81ace6984e7abbdd369ab631c93 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Thu, 21 May 2020 22:05:47 +0200 Subject: x86/entry/64: Remove IRQ stack switching ASM No more users. Signed-off-by: Thomas Gleixner Signed-off-by: Ingo Molnar Acked-by: Andy Lutomirski Link: https://lore.kernel.org/r/20200521202120.021462159@linutronix.de --- arch/x86/entry/entry_64.S | 96 ----------------------------------------------- 1 file changed, 96 deletions(-) (limited to 'arch') diff --git a/arch/x86/entry/entry_64.S b/arch/x86/entry/entry_64.S index 389f97faee45..29a8a83a6f5f 100644 --- a/arch/x86/entry/entry_64.S +++ b/arch/x86/entry/entry_64.S @@ -370,102 +370,6 @@ SYM_CODE_END(ret_from_fork) #endif .endm -/* - * Enters the IRQ stack if we're not already using it. NMI-safe. Clobbers - * flags and puts old RSP into old_rsp, and leaves all other GPRs alone. - * Requires kernel GSBASE. - * - * The invariant is that, if irq_count != -1, then the IRQ stack is in use. - */ -.macro ENTER_IRQ_STACK regs=1 old_rsp save_ret=0 - DEBUG_ENTRY_ASSERT_IRQS_OFF - - .if \save_ret - /* - * If save_ret is set, the original stack contains one additional - * entry -- the return address. Therefore, move the address one - * entry below %rsp to \old_rsp. - */ - leaq 8(%rsp), \old_rsp - .else - movq %rsp, \old_rsp - .endif - - .if \regs - UNWIND_HINT_REGS base=\old_rsp - .endif - - incl PER_CPU_VAR(irq_count) - jnz .Lirq_stack_push_old_rsp_\@ - - /* - * Right now, if we just incremented irq_count to zero, we've - * claimed the IRQ stack but we haven't switched to it yet. - * - * If anything is added that can interrupt us here without using IST, - * it must be *extremely* careful to limit its stack usage. This - * could include kprobes and a hypothetical future IST-less #DB - * handler. - * - * The OOPS unwinder relies on the word at the top of the IRQ - * stack linking back to the previous RSP for the entire time we're - * on the IRQ stack. For this to work reliably, we need to write - * it before we actually move ourselves to the IRQ stack. - */ - - movq \old_rsp, PER_CPU_VAR(irq_stack_backing_store + IRQ_STACK_SIZE - 8) - movq PER_CPU_VAR(hardirq_stack_ptr), %rsp - -#ifdef CONFIG_DEBUG_ENTRY - /* - * If the first movq above becomes wrong due to IRQ stack layout - * changes, the only way we'll notice is if we try to unwind right - * here. Assert that we set up the stack right to catch this type - * of bug quickly. - */ - cmpq -8(%rsp), \old_rsp - je .Lirq_stack_okay\@ - ud2 - .Lirq_stack_okay\@: -#endif - -.Lirq_stack_push_old_rsp_\@: - pushq \old_rsp - - .if \regs - UNWIND_HINT_REGS indirect=1 - .endif - - .if \save_ret - /* - * Push the return address to the stack. This return address can - * be found at the "real" original RSP, which was offset by 8 at - * the beginning of this macro. - */ - pushq -8(\old_rsp) - .endif -.endm - -/* - * Undoes ENTER_IRQ_STACK. - */ -.macro LEAVE_IRQ_STACK regs=1 - DEBUG_ENTRY_ASSERT_IRQS_OFF - /* We need to be off the IRQ stack before decrementing irq_count. */ - popq %rsp - - .if \regs - UNWIND_HINT_REGS - .endif - - /* - * As in ENTER_IRQ_STACK, irq_count == 0, we are still claiming - * the irq stack but we're not on it. - */ - - decl PER_CPU_VAR(irq_count) -.endm - /** * idtentry_body - Macro to emit code calling the C function * @cfunc: C function to be called -- cgit v1.2.3 From 3b6c9bf69ef34c5ca36c78aad4ff76b9d9afc92c Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Thu, 21 May 2020 22:05:48 +0200 Subject: x86/entry: Make enter_from_user_mode() static The ASM users are gone. All callers are local. Signed-off-by: Thomas Gleixner Signed-off-by: Ingo Molnar Acked-by: Andy Lutomirski Link: https://lore.kernel.org/r/20200521202120.129232680@linutronix.de --- arch/x86/entry/common.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/x86/entry/common.c b/arch/x86/entry/common.c index a0f8c3cb130a..b0b1c3cf0e6e 100644 --- a/arch/x86/entry/common.c +++ b/arch/x86/entry/common.c @@ -56,7 +56,7 @@ * 2) Invoke context tracking if enabled to reactivate RCU * 3) Trace interrupts off state */ -__visible noinstr void enter_from_user_mode(void) +static noinstr void enter_from_user_mode(void) { enum ctx_state state = ct_state(); -- cgit v1.2.3 From fa95a0cb0423661eb615b6ac1e9882ce9a75719a Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Thu, 21 May 2020 22:05:49 +0200 Subject: x86/entry/32: Remove redundant irq disable code All exceptions/interrupts return with interrupts disabled now. No point in doing this in ASM again. Signed-off-by: Thomas Gleixner Signed-off-by: Ingo Molnar Acked-by: Andy Lutomirski Link: https://lore.kernel.org/r/20200521202120.221223450@linutronix.de --- arch/x86/entry/entry_32.S | 76 ----------------------------------------------- 1 file changed, 76 deletions(-) (limited to 'arch') diff --git a/arch/x86/entry/entry_32.S b/arch/x86/entry/entry_32.S index c8f176c88a3c..2d29f77a3601 100644 --- a/arch/x86/entry/entry_32.S +++ b/arch/x86/entry/entry_32.S @@ -51,34 +51,6 @@ .section .entry.text, "ax" -/* - * We use macros for low-level operations which need to be overridden - * for paravirtualization. The following will never clobber any registers: - * INTERRUPT_RETURN (aka. "iret") - * GET_CR0_INTO_EAX (aka. "movl %cr0, %eax") - * ENABLE_INTERRUPTS_SYSEXIT (aka "sti; sysexit"). - * - * For DISABLE_INTERRUPTS/ENABLE_INTERRUPTS (aka "cli"/"sti"), you must - * specify what registers can be overwritten (CLBR_NONE, CLBR_EAX/EDX/ECX/ANY). - * Allowing a register to be clobbered can shrink the paravirt replacement - * enough to patch inline, increasing performance. - */ - -#ifdef CONFIG_PREEMPTION -# define preempt_stop(clobbers) DISABLE_INTERRUPTS(clobbers); TRACE_IRQS_OFF -#else -# define preempt_stop(clobbers) -#endif - -.macro TRACE_IRQS_IRET -#ifdef CONFIG_TRACE_IRQFLAGS - testl $X86_EFLAGS_IF, PT_EFLAGS(%esp) # interrupts off? - jz 1f - TRACE_IRQS_ON -1: -#endif -.endm - #define PTI_SWITCH_MASK (1 << PAGE_SHIFT) /* @@ -881,38 +853,6 @@ SYM_CODE_START(ret_from_fork) SYM_CODE_END(ret_from_fork) .popsection -/* - * Return to user mode is not as complex as all this looks, - * but we want the default path for a system call return to - * go as quickly as possible which is why some of this is - * less clear than it otherwise should be. - */ - - # userspace resumption stub bypassing syscall exit tracing -SYM_CODE_START_LOCAL(ret_from_exception) - preempt_stop(CLBR_ANY) -ret_from_intr: -#ifdef CONFIG_VM86 - movl PT_EFLAGS(%esp), %eax # mix EFLAGS and CS - movb PT_CS(%esp), %al - andl $(X86_EFLAGS_VM | SEGMENT_RPL_MASK), %eax -#else - /* - * We can be coming here from child spawned by kernel_thread(). - */ - movl PT_CS(%esp), %eax - andl $SEGMENT_RPL_MASK, %eax -#endif - cmpl $USER_RPL, %eax - jb restore_all_kernel # not returning to v8086 or userspace - - DISABLE_INTERRUPTS(CLBR_ANY) - TRACE_IRQS_OFF - movl %esp, %eax - call prepare_exit_to_usermode - jmp restore_all_switch_stack -SYM_CODE_END(ret_from_exception) - SYM_ENTRY(__begin_SYSENTER_singlestep_region, SYM_L_GLOBAL, SYM_A_NONE) /* * All code from here through __end_SYSENTER_singlestep_region is subject @@ -1147,22 +1087,6 @@ restore_all_switch_stack: */ INTERRUPT_RETURN -restore_all_kernel: -#ifdef CONFIG_PREEMPTION - DISABLE_INTERRUPTS(CLBR_ANY) - cmpl $0, PER_CPU_VAR(__preempt_count) - jnz .Lno_preempt - testl $X86_EFLAGS_IF, PT_EFLAGS(%esp) # interrupts off (exception path) ? - jz .Lno_preempt - call preempt_schedule_irq -.Lno_preempt: -#endif - TRACE_IRQS_IRET - PARANOID_EXIT_TO_KERNEL_MODE - BUG_IF_WRONG_CR3 - RESTORE_REGS 4 - jmp .Lirq_return - .section .fixup, "ax" SYM_CODE_START(asm_iret_error) pushl $0 # no error code -- cgit v1.2.3 From 9628f26baef262a49d877e3785e8b88d241bc064 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Thu, 21 May 2020 22:05:50 +0200 Subject: x86/entry/64: Remove TRACE_IRQS_*_DEBUG Since INT3/#BP no longer runs on an IST, this workaround is no longer required. Tested by running lockdep+ftrace as described in the initial commit: 5963e317b1e9 ("ftrace/x86: Do not change stacks in DEBUG when calling lockdep") Signed-off-by: Thomas Gleixner Signed-off-by: Peter Zijlstra (Intel) Signed-off-by: Thomas Gleixner Signed-off-by: Ingo Molnar Reviewed-by: Steven Rostedt (VMware) Acked-by: Andy Lutomirski Link: https://lore.kernel.org/r/20200521202120.319418546@linutronix.de --- arch/x86/entry/entry_64.S | 48 +++-------------------------------------------- 1 file changed, 3 insertions(+), 45 deletions(-) (limited to 'arch') diff --git a/arch/x86/entry/entry_64.S b/arch/x86/entry/entry_64.S index 29a8a83a6f5f..fb7f12628346 100644 --- a/arch/x86/entry/entry_64.S +++ b/arch/x86/entry/entry_64.S @@ -67,44 +67,6 @@ SYM_CODE_END(native_usergs_sysret64) TRACE_IRQS_FLAGS EFLAGS(%rsp) .endm -/* - * When dynamic function tracer is enabled it will add a breakpoint - * to all locations that it is about to modify, sync CPUs, update - * all the code, sync CPUs, then remove the breakpoints. In this time - * if lockdep is enabled, it might jump back into the debug handler - * outside the updating of the IST protection. (TRACE_IRQS_ON/OFF). - * - * We need to change the IDT table before calling TRACE_IRQS_ON/OFF to - * make sure the stack pointer does not get reset back to the top - * of the debug stack, and instead just reuses the current stack. - */ -#if defined(CONFIG_DYNAMIC_FTRACE) && defined(CONFIG_TRACE_IRQFLAGS) - -.macro TRACE_IRQS_OFF_DEBUG - call debug_stack_set_zero - TRACE_IRQS_OFF - call debug_stack_reset -.endm - -.macro TRACE_IRQS_ON_DEBUG - call debug_stack_set_zero - TRACE_IRQS_ON - call debug_stack_reset -.endm - -.macro TRACE_IRQS_IRETQ_DEBUG - btl $9, EFLAGS(%rsp) /* interrupts off? */ - jnc 1f - TRACE_IRQS_ON_DEBUG -1: -.endm - -#else -# define TRACE_IRQS_OFF_DEBUG TRACE_IRQS_OFF -# define TRACE_IRQS_ON_DEBUG TRACE_IRQS_ON -# define TRACE_IRQS_IRETQ_DEBUG TRACE_IRQS_IRETQ -#endif - /* * 64-bit SYSCALL instruction entry. Up to 6 arguments in registers. * @@ -500,11 +462,7 @@ SYM_CODE_START(\asmsym) UNWIND_HINT_REGS - .if \vector == X86_TRAP_DB - TRACE_IRQS_OFF_DEBUG - .else - TRACE_IRQS_OFF - .endif + TRACE_IRQS_OFF movq %rsp, %rdi /* pt_regs pointer */ @@ -924,7 +882,7 @@ SYM_CODE_END(paranoid_entry) SYM_CODE_START_LOCAL(paranoid_exit) UNWIND_HINT_REGS DISABLE_INTERRUPTS(CLBR_ANY) - TRACE_IRQS_OFF_DEBUG + TRACE_IRQS_OFF testl %ebx, %ebx /* swapgs needed? */ jnz .Lparanoid_exit_no_swapgs TRACE_IRQS_IRETQ @@ -933,7 +891,7 @@ SYM_CODE_START_LOCAL(paranoid_exit) SWAPGS_UNSAFE_STACK jmp restore_regs_and_return_to_kernel .Lparanoid_exit_no_swapgs: - TRACE_IRQS_IRETQ_DEBUG + TRACE_IRQS_IRETQ /* Always restore stashed CR3 value (see paranoid_entry) */ RESTORE_CR3 scratch_reg=%rbx save_reg=%r14 jmp restore_regs_and_return_to_kernel -- cgit v1.2.3 From 3ffdfdcec1bae39b68b990762350b3cd3127f23f Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Thu, 21 May 2020 22:05:51 +0200 Subject: x86/entry: Move paranoid irq tracing out of ASM code The last step to remove the irq tracing cruft from ASM. Ignore #DF as the maschine is going to die anyway. Signed-off-by: Thomas Gleixner Signed-off-by: Ingo Molnar Acked-by: Andy Lutomirski Link: https://lore.kernel.org/r/20200521202120.414043330@linutronix.de --- arch/x86/entry/entry_64.S | 13 ------------- arch/x86/kernel/cpu/mce/core.c | 3 +++ arch/x86/kernel/nmi.c | 3 +++ arch/x86/kernel/traps.c | 11 +++++++++++ 4 files changed, 17 insertions(+), 13 deletions(-) (limited to 'arch') diff --git a/arch/x86/entry/entry_64.S b/arch/x86/entry/entry_64.S index fb7f12628346..2566554fe04e 100644 --- a/arch/x86/entry/entry_64.S +++ b/arch/x86/entry/entry_64.S @@ -16,7 +16,6 @@ * * Some macro usage: * - SYM_FUNC_START/END:Define functions in the symbol table. - * - TRACE_IRQ_*: Trace hardirq state for lock debugging. * - idtentry: Define exception entry points. */ #include @@ -107,11 +106,6 @@ SYM_CODE_END(native_usergs_sysret64) SYM_CODE_START(entry_SYSCALL_64) UNWIND_HINT_EMPTY - /* - * Interrupts are off on entry. - * We do not frame this tiny irq-off block with TRACE_IRQS_OFF/ON, - * it is too small to ever cause noticeable irq latency. - */ swapgs /* tss.sp2 is scratch space. */ @@ -462,8 +456,6 @@ SYM_CODE_START(\asmsym) UNWIND_HINT_REGS - TRACE_IRQS_OFF - movq %rsp, %rdi /* pt_regs pointer */ .if \vector == X86_TRAP_DB @@ -881,17 +873,13 @@ SYM_CODE_END(paranoid_entry) */ SYM_CODE_START_LOCAL(paranoid_exit) UNWIND_HINT_REGS - DISABLE_INTERRUPTS(CLBR_ANY) - TRACE_IRQS_OFF testl %ebx, %ebx /* swapgs needed? */ jnz .Lparanoid_exit_no_swapgs - TRACE_IRQS_IRETQ /* Always restore stashed CR3 value (see paranoid_entry) */ RESTORE_CR3 scratch_reg=%rbx save_reg=%r14 SWAPGS_UNSAFE_STACK jmp restore_regs_and_return_to_kernel .Lparanoid_exit_no_swapgs: - TRACE_IRQS_IRETQ /* Always restore stashed CR3 value (see paranoid_entry) */ RESTORE_CR3 scratch_reg=%rbx save_reg=%r14 jmp restore_regs_and_return_to_kernel @@ -1292,7 +1280,6 @@ end_repeat_nmi: call paranoid_entry UNWIND_HINT_REGS - /* paranoidentry exc_nmi(), 0; without TRACE_IRQS_OFF */ movq %rsp, %rdi movq $-1, %rsi call exc_nmi diff --git a/arch/x86/kernel/cpu/mce/core.c b/arch/x86/kernel/cpu/mce/core.c index c47f004f6231..068e6cab1286 100644 --- a/arch/x86/kernel/cpu/mce/core.c +++ b/arch/x86/kernel/cpu/mce/core.c @@ -1922,7 +1922,10 @@ static __always_inline void exc_machine_check_kernel(struct pt_regs *regs) * that out because it's an indirect call. Annotate it. */ instrumentation_begin(); + trace_hardirqs_off_prepare(); machine_check_vector(regs); + if (regs->flags & X86_EFLAGS_IF) + trace_hardirqs_on_prepare(); instrumentation_end(); nmi_exit(); } diff --git a/arch/x86/kernel/nmi.c b/arch/x86/kernel/nmi.c index 3052c78f03aa..5df4e7f58369 100644 --- a/arch/x86/kernel/nmi.c +++ b/arch/x86/kernel/nmi.c @@ -330,6 +330,7 @@ static noinstr void default_do_nmi(struct pt_regs *regs) __this_cpu_write(last_nmi_rip, regs->ip); instrumentation_begin(); + trace_hardirqs_off_prepare(); handled = nmi_handle(NMI_LOCAL, regs); __this_cpu_add(nmi_stats.normal, handled); @@ -416,6 +417,8 @@ static noinstr void default_do_nmi(struct pt_regs *regs) unknown_nmi_error(reason, regs); out: + if (regs->flags & X86_EFLAGS_IF) + trace_hardirqs_on_prepare(); instrumentation_end(); } diff --git a/arch/x86/kernel/traps.c b/arch/x86/kernel/traps.c index f28be3e51cca..50fb9cd5be97 100644 --- a/arch/x86/kernel/traps.c +++ b/arch/x86/kernel/traps.c @@ -634,8 +634,11 @@ DEFINE_IDTENTRY_RAW(exc_int3) } else { nmi_enter(); instrumentation_begin(); + trace_hardirqs_off_prepare(); if (!do_int3(regs)) die("int3", regs, 0); + if (regs->flags & X86_EFLAGS_IF) + trace_hardirqs_on_prepare(); instrumentation_end(); nmi_exit(); } @@ -850,6 +853,10 @@ static __always_inline void exc_debug_kernel(struct pt_regs *regs, unsigned long dr6) { nmi_enter(); + instrumentation_begin(); + trace_hardirqs_off_prepare(); + instrumentation_end(); + /* * The SDM says "The processor clears the BTF flag when it * generates a debug exception." Clear TIF_BLOCKSTEP to keep @@ -871,6 +878,10 @@ static __always_inline void exc_debug_kernel(struct pt_regs *regs, if (dr6) handle_debug(regs, dr6, false); + instrumentation_begin(); + if (regs->flags & X86_EFLAGS_IF) + trace_hardirqs_on_prepare(); + instrumentation_end(); nmi_exit(); } -- cgit v1.2.3 From 320100a5ffe5ec781ec3dc190a57ce5e32885855 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Thu, 21 May 2020 22:05:52 +0200 Subject: x86/entry: Remove the TRACE_IRQS cruft No more users. Signed-off-by: Thomas Gleixner Signed-off-by: Ingo Molnar Acked-by: Andy Lutomirski Link: https://lore.kernel.org/r/20200521202120.523289762@linutronix.de --- arch/x86/entry/entry_64.S | 13 ------------- arch/x86/entry/thunk_64.S | 9 +-------- arch/x86/include/asm/irqflags.h | 10 ---------- 3 files changed, 1 insertion(+), 31 deletions(-) (limited to 'arch') diff --git a/arch/x86/entry/entry_64.S b/arch/x86/entry/entry_64.S index 2566554fe04e..265ff97b3961 100644 --- a/arch/x86/entry/entry_64.S +++ b/arch/x86/entry/entry_64.S @@ -53,19 +53,6 @@ SYM_CODE_START(native_usergs_sysret64) SYM_CODE_END(native_usergs_sysret64) #endif /* CONFIG_PARAVIRT */ -.macro TRACE_IRQS_FLAGS flags:req -#ifdef CONFIG_TRACE_IRQFLAGS - btl $9, \flags /* interrupts off? */ - jnc 1f - TRACE_IRQS_ON -1: -#endif -.endm - -.macro TRACE_IRQS_IRETQ - TRACE_IRQS_FLAGS EFLAGS(%rsp) -.endm - /* * 64-bit SYSCALL instruction entry. Up to 6 arguments in registers. * diff --git a/arch/x86/entry/thunk_64.S b/arch/x86/entry/thunk_64.S index 34f980c9b766..ccd32877a3c4 100644 --- a/arch/x86/entry/thunk_64.S +++ b/arch/x86/entry/thunk_64.S @@ -3,7 +3,6 @@ * Save registers before calling assembly functions. This avoids * disturbance of register allocation in some inline assembly constructs. * Copyright 2001,2002 by Andi Kleen, SuSE Labs. - * Added trace_hardirqs callers - Copyright 2007 Steven Rostedt, Red Hat, Inc. */ #include #include "calling.h" @@ -37,11 +36,6 @@ SYM_FUNC_END(\name) _ASM_NOKPROBE(\name) .endm -#ifdef CONFIG_TRACE_IRQFLAGS - THUNK trace_hardirqs_on_thunk,trace_hardirqs_on_caller,1 - THUNK trace_hardirqs_off_thunk,trace_hardirqs_off_caller,1 -#endif - #ifdef CONFIG_PREEMPTION THUNK preempt_schedule_thunk, preempt_schedule THUNK preempt_schedule_notrace_thunk, preempt_schedule_notrace @@ -49,8 +43,7 @@ SYM_FUNC_END(\name) EXPORT_SYMBOL(preempt_schedule_notrace_thunk) #endif -#if defined(CONFIG_TRACE_IRQFLAGS) \ - || defined(CONFIG_PREEMPTION) +#ifdef CONFIG_PREEMPTION SYM_CODE_START_LOCAL_NOALIGN(.L_restore) popq %r11 popq %r10 diff --git a/arch/x86/include/asm/irqflags.h b/arch/x86/include/asm/irqflags.h index e00f064b009e..8ddff8dbaed5 100644 --- a/arch/x86/include/asm/irqflags.h +++ b/arch/x86/include/asm/irqflags.h @@ -172,14 +172,4 @@ static inline int arch_irqs_disabled(void) } #endif /* !__ASSEMBLY__ */ -#ifdef __ASSEMBLY__ -#ifdef CONFIG_TRACE_IRQFLAGS -# define TRACE_IRQS_ON call trace_hardirqs_on_thunk; -# define TRACE_IRQS_OFF call trace_hardirqs_off_thunk; -#else -# define TRACE_IRQS_ON -# define TRACE_IRQS_OFF -#endif -#endif /* __ASSEMBLY__ */ - #endif -- cgit v1.2.3 From 998c2034c6a36bd48284e8c8f945a6c6ffc0e3f0 Mon Sep 17 00:00:00 2001 From: Vitaly Kuznetsov Date: Wed, 20 May 2020 18:16:00 +0200 Subject: xen: Move xen_setup_callback_vector() definition to include/xen/hvm.h Kbuild test robot reports the following problem on ARM: for 'xen_setup_callback_vector' [-Wmissing-prototypes] 1664 | void xen_setup_callback_vector(void) {} | ^~~~~~~~~~~~~~~~~~~~~~~~~ The problem is that xen_setup_callback_vector is a x86 only thing, its definition is present in arch/x86/xen/xen-ops.h but not on ARM. In events_base.c there is a stub for !CONFIG_XEN_PVHVM but it is not declared as 'static'. On x86 the situation is hardly better: drivers/xen/events/events_base.c doesn't include 'xen-ops.h' from arch/x86/xen/, it includes its namesake from include/xen/ which also results in a 'no previous prototype' warning. Currently, xen_setup_callback_vector() has two call sites: one in drivers/xen/events_base.c and another in arch/x86/xen/suspend_hvm.c. The former is placed under #ifdef CONFIG_X86 and the later is only compiled in when CONFIG_XEN_PVHVM. Resolve the issue by moving xen_setup_callback_vector() declaration to arch neutral 'include/xen/hvm.h' as the implementation lives in arch neutral drivers/xen/events/events_base.c. Reported-by: kbuild test robot Signed-off-by: Vitaly Kuznetsov Signed-off-by: Thomas Gleixner Reviewed-by: Juergen Gross Link: https://lkml.kernel.org/r/20200520161600.361895-1-vkuznets@redhat.com --- arch/x86/xen/suspend_hvm.c | 1 + arch/x86/xen/xen-ops.h | 1 - include/xen/hvm.h | 2 ++ include/xen/interface/hvm/hvm_op.h | 2 ++ 4 files changed, 5 insertions(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/x86/xen/suspend_hvm.c b/arch/x86/xen/suspend_hvm.c index 5152afe16876..9d548b0c772f 100644 --- a/arch/x86/xen/suspend_hvm.c +++ b/arch/x86/xen/suspend_hvm.c @@ -2,6 +2,7 @@ #include #include +#include #include #include diff --git a/arch/x86/xen/xen-ops.h b/arch/x86/xen/xen-ops.h index ad05d0589381..53b224fd6177 100644 --- a/arch/x86/xen/xen-ops.h +++ b/arch/x86/xen/xen-ops.h @@ -54,7 +54,6 @@ void xen_enable_sysenter(void); void xen_enable_syscall(void); void xen_vcpu_restore(void); -void xen_setup_callback_vector(void); void xen_hvm_init_shared_info(void); void xen_unplug_emulated_devices(void); diff --git a/include/xen/hvm.h b/include/xen/hvm.h index 0b15f8cb17fc..b7fd7fc9ad41 100644 --- a/include/xen/hvm.h +++ b/include/xen/hvm.h @@ -58,4 +58,6 @@ static inline int hvm_get_parameter(int idx, uint64_t *value) #define HVM_CALLBACK_VECTOR(x) (((uint64_t)HVM_CALLBACK_VIA_TYPE_VECTOR)<<\ HVM_CALLBACK_VIA_TYPE_SHIFT | (x)) +void xen_setup_callback_vector(void); + #endif /* XEN_HVM_H__ */ diff --git a/include/xen/interface/hvm/hvm_op.h b/include/xen/interface/hvm/hvm_op.h index 956a04682865..25d945ef17de 100644 --- a/include/xen/interface/hvm/hvm_op.h +++ b/include/xen/interface/hvm/hvm_op.h @@ -21,6 +21,8 @@ #ifndef __XEN_PUBLIC_HVM_HVM_OP_H__ #define __XEN_PUBLIC_HVM_HVM_OP_H__ +#include + /* Get/set subcommands: the second argument of the hypercall is a * pointer to a xen_hvm_param struct. */ #define HVMOP_set_param 0 -- cgit v1.2.3 From d390e6de89d30402bd06056c40cea72328aec9b1 Mon Sep 17 00:00:00 2001 From: Lai Jiangshan Date: Fri, 29 May 2020 23:27:29 +0200 Subject: x86/hw_breakpoint: Add within_area() to check data breakpoints Add a within_area() helper to checking whether the data breakpoints overlap with cpu_entry_area. It will be used to completely prevent data breakpoints on GDT, IDT, or TSS. Signed-off-by: Lai Jiangshan Signed-off-by: Peter Zijlstra (Intel) Signed-off-by: Thomas Gleixner Link: https://lkml.kernel.org/r/20200526014221.2119-2-laijs@linux.alibaba.com Link: https://lkml.kernel.org/r/20200529213320.784524504@infradead.org --- arch/x86/kernel/hw_breakpoint.c | 13 +++++++++++-- 1 file changed, 11 insertions(+), 2 deletions(-) (limited to 'arch') diff --git a/arch/x86/kernel/hw_breakpoint.c b/arch/x86/kernel/hw_breakpoint.c index 9ddf441ccaa8..c149c7b29ac3 100644 --- a/arch/x86/kernel/hw_breakpoint.c +++ b/arch/x86/kernel/hw_breakpoint.c @@ -227,14 +227,23 @@ int arch_check_bp_in_kernelspace(struct arch_hw_breakpoint *hw) return (va >= TASK_SIZE_MAX) || ((va + len - 1) >= TASK_SIZE_MAX); } +/* + * Checks whether the range [addr, end], overlaps the area [base, base + size). + */ +static inline bool within_area(unsigned long addr, unsigned long end, + unsigned long base, unsigned long size) +{ + return end >= base && addr < (base + size); +} + /* * Checks whether the range from addr to end, inclusive, overlaps the CPU * entry area range. */ static inline bool within_cpu_entry_area(unsigned long addr, unsigned long end) { - return end >= CPU_ENTRY_AREA_BASE && - addr < (CPU_ENTRY_AREA_BASE + CPU_ENTRY_AREA_TOTAL_SIZE); + return within_area(addr, end, CPU_ENTRY_AREA_BASE, + CPU_ENTRY_AREA_TOTAL_SIZE); } static int arch_build_bp_info(struct perf_event *bp, -- cgit v1.2.3 From 97417cb9ad4ed052d7a4c5c0d75db1ff1b0981fb Mon Sep 17 00:00:00 2001 From: Lai Jiangshan Date: Fri, 29 May 2020 23:27:30 +0200 Subject: x86/hw_breakpoint: Prevent data breakpoints on direct GDT A data breakpoint on the GDT can be fatal and must be avoided. The GDT in the CPU entry area is already protected, but not the direct GDT. Add the necessary protection. Signed-off-by: Lai Jiangshan Signed-off-by: Peter Zijlstra (Intel) Signed-off-by: Thomas Gleixner Link: https://lkml.kernel.org/r/20200526014221.2119-3-laijs@linux.alibaba.com Link: https://lkml.kernel.org/r/20200529213320.840953950@infradead.org --- arch/x86/kernel/hw_breakpoint.c | 30 ++++++++++++++++++++++-------- 1 file changed, 22 insertions(+), 8 deletions(-) (limited to 'arch') diff --git a/arch/x86/kernel/hw_breakpoint.c b/arch/x86/kernel/hw_breakpoint.c index c149c7b29ac3..f859095c1b6c 100644 --- a/arch/x86/kernel/hw_breakpoint.c +++ b/arch/x86/kernel/hw_breakpoint.c @@ -32,6 +32,7 @@ #include #include #include +#include /* Per cpu debug control register value */ DEFINE_PER_CPU(unsigned long, cpu_dr7); @@ -237,13 +238,26 @@ static inline bool within_area(unsigned long addr, unsigned long end, } /* - * Checks whether the range from addr to end, inclusive, overlaps the CPU - * entry area range. + * Checks whether the range from addr to end, inclusive, overlaps the fixed + * mapped CPU entry area range or other ranges used for CPU entry. */ -static inline bool within_cpu_entry_area(unsigned long addr, unsigned long end) +static inline bool within_cpu_entry(unsigned long addr, unsigned long end) { - return within_area(addr, end, CPU_ENTRY_AREA_BASE, - CPU_ENTRY_AREA_TOTAL_SIZE); + int cpu; + + /* CPU entry erea is always used for CPU entry */ + if (within_area(addr, end, CPU_ENTRY_AREA_BASE, + CPU_ENTRY_AREA_TOTAL_SIZE)) + return true; + + for_each_possible_cpu(cpu) { + /* The original rw GDT is being used after load_direct_gdt() */ + if (within_area(addr, end, (unsigned long)get_cpu_gdt_rw(cpu), + GDT_SIZE)) + return true; + } + + return false; } static int arch_build_bp_info(struct perf_event *bp, @@ -257,12 +271,12 @@ static int arch_build_bp_info(struct perf_event *bp, return -EINVAL; /* - * Prevent any breakpoint of any type that overlaps the - * cpu_entry_area. This protects the IST stacks and also + * Prevent any breakpoint of any type that overlaps the CPU + * entry area and data. This protects the IST stacks and also * reduces the chance that we ever find out what happens if * there's a data breakpoint on the GDT, IDT, or TSS. */ - if (within_cpu_entry_area(attr->bp_addr, bp_end)) + if (within_cpu_entry(attr->bp_addr, bp_end)) return -EINVAL; hw->address = attr->bp_addr; -- cgit v1.2.3 From f9fe0b89f05441c6e4034e024c2c75a0d93024c1 Mon Sep 17 00:00:00 2001 From: Lai Jiangshan Date: Fri, 29 May 2020 23:27:31 +0200 Subject: x86/hw_breakpoint: Prevent data breakpoints on per_cpu cpu_tss_rw cpu_tss_rw is not directly referenced by hardware, but cpu_tss_rw is accessed in CPU entry code, especially when #DB shifts its stacks. If a data breakpoint would be set on cpu_tss_rw.x86_tss.ist[IST_INDEX_DB], it would cause recursive #DB ending up in a double fault. Add it to the list of protected items. Signed-off-by: Lai Jiangshan Signed-off-by: Peter Zijlstra (Intel) Signed-off-by: Thomas Gleixner Link: https://lkml.kernel.org/r/20200526014221.2119-4-laijs@linux.alibaba.com Link: https://lkml.kernel.org/r/20200529213320.897976479@infradead.org --- arch/x86/kernel/hw_breakpoint.c | 9 +++++++++ 1 file changed, 9 insertions(+) (limited to 'arch') diff --git a/arch/x86/kernel/hw_breakpoint.c b/arch/x86/kernel/hw_breakpoint.c index f859095c1b6c..f311bbfda1ba 100644 --- a/arch/x86/kernel/hw_breakpoint.c +++ b/arch/x86/kernel/hw_breakpoint.c @@ -255,6 +255,15 @@ static inline bool within_cpu_entry(unsigned long addr, unsigned long end) if (within_area(addr, end, (unsigned long)get_cpu_gdt_rw(cpu), GDT_SIZE)) return true; + + /* + * cpu_tss_rw is not directly referenced by hardware, but + * cpu_tss_rw is also used in CPU entry code, + */ + if (within_area(addr, end, + (unsigned long)&per_cpu(cpu_tss_rw, cpu), + sizeof(struct tss_struct))) + return true; } return false; -- cgit v1.2.3 From fdef24dfccb7be06e6ebe11d6c6c56987421870f Mon Sep 17 00:00:00 2001 From: Lai Jiangshan Date: Fri, 29 May 2020 23:27:32 +0200 Subject: x86/hw_breakpoint: Prevent data breakpoints on user_pcid_flush_mask The per-CPU user_pcid_flush_mask is used in the low level entry code. A data breakpoint can cause #DB recursion. Protect the full cpu_tlbstate structure for simplicity. Signed-off-by: Lai Jiangshan Signed-off-by: Peter Zijlstra (Intel) Signed-off-by: Thomas Gleixner Link: https://lkml.kernel.org/r/20200526014221.2119-5-laijs@linux.alibaba.com Link: https://lkml.kernel.org/r/20200529213320.955117574@infradead.org --- arch/x86/kernel/hw_breakpoint.c | 11 +++++++++++ 1 file changed, 11 insertions(+) (limited to 'arch') diff --git a/arch/x86/kernel/hw_breakpoint.c b/arch/x86/kernel/hw_breakpoint.c index f311bbfda1ba..fc1743a2b0e9 100644 --- a/arch/x86/kernel/hw_breakpoint.c +++ b/arch/x86/kernel/hw_breakpoint.c @@ -33,6 +33,7 @@ #include #include #include +#include /* Per cpu debug control register value */ DEFINE_PER_CPU(unsigned long, cpu_dr7); @@ -264,6 +265,16 @@ static inline bool within_cpu_entry(unsigned long addr, unsigned long end) (unsigned long)&per_cpu(cpu_tss_rw, cpu), sizeof(struct tss_struct))) return true; + + /* + * cpu_tlbstate.user_pcid_flush_mask is used for CPU entry. + * If a data breakpoint on it, it will cause an unwanted #DB. + * Protect the full cpu_tlbstate structure to be sure. + */ + if (within_area(addr, end, + (unsigned long)&per_cpu(cpu_tlbstate, cpu), + sizeof(struct tlb_state))) + return true; } return false; -- cgit v1.2.3 From e1de11d4d1a64ac1b90b9833f1a3629dae18facb Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Fri, 29 May 2020 23:27:33 +0200 Subject: x86/entry: Introduce local_db_{save,restore}() In order to allow other exceptions than #DB to disable breakpoints, provide common helpers. Signed-off-by: Peter Zijlstra (Intel) Signed-off-by: Thomas Gleixner Link: https://lkml.kernel.org/r/20200529213321.012060983@infradead.org --- arch/x86/include/asm/debugreg.h | 30 ++++++++++++++++++++++++++++++ arch/x86/kernel/traps.c | 18 ++---------------- 2 files changed, 32 insertions(+), 16 deletions(-) (limited to 'arch') diff --git a/arch/x86/include/asm/debugreg.h b/arch/x86/include/asm/debugreg.h index 1a8609a15856..4ef8690be8a7 100644 --- a/arch/x86/include/asm/debugreg.h +++ b/arch/x86/include/asm/debugreg.h @@ -113,6 +113,36 @@ static inline void debug_stack_usage_inc(void) { } static inline void debug_stack_usage_dec(void) { } #endif /* X86_64 */ +static __always_inline unsigned long local_db_save(void) +{ + unsigned long dr7; + + get_debugreg(dr7, 7); + dr7 &= ~0x400; /* architecturally set bit */ + if (dr7) + set_debugreg(0, 7); + /* + * Ensure the compiler doesn't lower the above statements into + * the critical section; disabling breakpoints late would not + * be good. + */ + barrier(); + + return dr7; +} + +static __always_inline void local_db_restore(unsigned long dr7) +{ + /* + * Ensure the compiler doesn't raise this statement into + * the critical section; enabling breakpoints early would + * not be good. + */ + barrier(); + if (dr7) + set_debugreg(dr7, 7); +} + #ifdef CONFIG_CPU_SUP_AMD extern void set_dr_addr_mask(unsigned long mask, int dr); #else diff --git a/arch/x86/kernel/traps.c b/arch/x86/kernel/traps.c index 50fb9cd5be97..bcb9dd961c6d 100644 --- a/arch/x86/kernel/traps.c +++ b/arch/x86/kernel/traps.c @@ -727,15 +727,7 @@ static __always_inline void debug_enter(unsigned long *dr6, unsigned long *dr7) * Entry text is excluded for HW_BP_X and cpu_entry_area, which * includes the entry stack is excluded for everything. */ - get_debugreg(*dr7, 7); - set_debugreg(0, 7); - - /* - * Ensure the compiler doesn't lower the above statements into - * the critical section; disabling breakpoints late would not - * be good. - */ - barrier(); + *dr7 = local_db_save(); /* * The Intel SDM says: @@ -756,13 +748,7 @@ static __always_inline void debug_enter(unsigned long *dr6, unsigned long *dr7) static __always_inline void debug_exit(unsigned long dr7) { - /* - * Ensure the compiler doesn't raise this statement into - * the critical section; enabling breakpoints early would - * not be good. - */ - barrier(); - set_debugreg(dr7, 7); + local_db_restore(dr7); } /* -- cgit v1.2.3 From fd338e3564b0b8597a89f83941a0eda3e5092cc0 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Fri, 29 May 2020 23:27:34 +0200 Subject: x86/entry, nmi: Disable #DB Instead of playing stupid games with IST stacks, fully disallow #DB during NMIs. There is absolutely no reason to allow them, and killing this saves a heap of trouble. #DB is already forbidden on noinstr and CEA, so there can't be a #DB before this. Disabling it right after nmi_enter() ensures that the full NMI code is protected. Signed-off-by: Peter Zijlstra (Intel) Signed-off-by: Thomas Gleixner Link: https://lkml.kernel.org/r/20200529213321.069223695@infradead.org --- arch/x86/kernel/nmi.c | 55 +++------------------------------------------------ 1 file changed, 3 insertions(+), 52 deletions(-) (limited to 'arch') diff --git a/arch/x86/kernel/nmi.c b/arch/x86/kernel/nmi.c index 5df4e7f58369..873a8c040b86 100644 --- a/arch/x86/kernel/nmi.c +++ b/arch/x86/kernel/nmi.c @@ -474,40 +474,7 @@ enum nmi_states { }; static DEFINE_PER_CPU(enum nmi_states, nmi_state); static DEFINE_PER_CPU(unsigned long, nmi_cr2); - -#ifdef CONFIG_X86_64 -/* - * In x86_64, we need to handle breakpoint -> NMI -> breakpoint. Without - * some care, the inner breakpoint will clobber the outer breakpoint's - * stack. - * - * If a breakpoint is being processed, and the debug stack is being - * used, if an NMI comes in and also hits a breakpoint, the stack - * pointer will be set to the same fixed address as the breakpoint that - * was interrupted, causing that stack to be corrupted. To handle this - * case, check if the stack that was interrupted is the debug stack, and - * if so, change the IDT so that new breakpoints will use the current - * stack and not switch to the fixed address. On return of the NMI, - * switch back to the original IDT. - */ -static DEFINE_PER_CPU(int, update_debug_stack); - -static noinstr bool is_debug_stack(unsigned long addr) -{ - struct cea_exception_stacks *cs = __this_cpu_read(cea_exception_stacks); - unsigned long top = CEA_ESTACK_TOP(cs, DB); - unsigned long bot = CEA_ESTACK_BOT(cs, DB1); - - if (__this_cpu_read(debug_stack_usage)) - return true; - /* - * Note, this covers the guard page between DB and DB1 as well to - * avoid two checks. But by all means @addr can never point into - * the guard page. - */ - return addr >= bot && addr < top; -} -#endif +static DEFINE_PER_CPU(unsigned long, nmi_dr7); DEFINE_IDTENTRY_NMI(exc_nmi) { @@ -522,18 +489,7 @@ DEFINE_IDTENTRY_NMI(exc_nmi) this_cpu_write(nmi_cr2, read_cr2()); nmi_restart: -#ifdef CONFIG_X86_64 - /* - * If we interrupted a breakpoint, it is possible that - * the nmi handler will have breakpoints too. We need to - * change the IDT such that breakpoints that happen here - * continue to use the NMI stack. - */ - if (unlikely(is_debug_stack(regs->sp))) { - debug_stack_set_zero(); - this_cpu_write(update_debug_stack, 1); - } -#endif + this_cpu_write(nmi_dr7, local_db_save()); nmi_enter(); @@ -544,12 +500,7 @@ nmi_restart: nmi_exit(); -#ifdef CONFIG_X86_64 - if (unlikely(this_cpu_read(update_debug_stack))) { - debug_stack_reset(); - this_cpu_write(update_debug_stack, 0); - } -#endif + local_db_restore(this_cpu_read(nmi_dr7)); if (unlikely(this_cpu_read(nmi_cr2) != read_cr2())) write_cr2(this_cpu_read(nmi_cr2)); -- cgit v1.2.3 From cd840e424f27fcc1ae8d14b7ec3ec4560ee6561a Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Fri, 29 May 2020 23:27:35 +0200 Subject: x86/entry, mce: Disallow #DB during #MC #MC is fragile as heck, don't tempt fate. Signed-off-by: Peter Zijlstra (Intel) Signed-off-by: Thomas Gleixner Link: https://lkml.kernel.org/r/20200529213321.131187767@infradead.org --- arch/x86/kernel/cpu/mce/core.c | 12 ++++++++++++ 1 file changed, 12 insertions(+) (limited to 'arch') diff --git a/arch/x86/kernel/cpu/mce/core.c b/arch/x86/kernel/cpu/mce/core.c index 068e6cab1286..be499267bbb4 100644 --- a/arch/x86/kernel/cpu/mce/core.c +++ b/arch/x86/kernel/cpu/mce/core.c @@ -1943,22 +1943,34 @@ static __always_inline void exc_machine_check_user(struct pt_regs *regs) /* MCE hit kernel mode */ DEFINE_IDTENTRY_MCE(exc_machine_check) { + unsigned long dr7; + + dr7 = local_db_save(); exc_machine_check_kernel(regs); + local_db_restore(dr7); } /* The user mode variant. */ DEFINE_IDTENTRY_MCE_USER(exc_machine_check) { + unsigned long dr7; + + dr7 = local_db_save(); exc_machine_check_user(regs); + local_db_restore(dr7); } #else /* 32bit unified entry point */ DEFINE_IDTENTRY_MCE(exc_machine_check) { + unsigned long dr7; + + dr7 = local_db_save(); if (user_mode(regs)) exc_machine_check_user(regs); else exc_machine_check_kernel(regs); + local_db_restore(dr7); } #endif -- cgit v1.2.3 From 84b6a3491567a540f955e18d8e615493afa36df0 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Fri, 29 May 2020 23:27:36 +0200 Subject: x86/entry: Optimize local_db_save() for virt Because DRn access is 'difficult' with virt; but the DR7 read is cheaper than a cacheline miss on native, add a virt specific fast path to local_db_save(), such that when breakpoints are not in use to avoid touching DRn entirely. Suggested-by: Andy Lutomirski Signed-off-by: Peter Zijlstra (Intel) Signed-off-by: Thomas Gleixner Link: https://lkml.kernel.org/r/20200529213321.187833200@infradead.org --- arch/x86/include/asm/debugreg.h | 5 ++++- arch/x86/kernel/hw_breakpoint.c | 26 ++++++++++++++++++++++---- arch/x86/kvm/vmx/nested.c | 2 +- 3 files changed, 27 insertions(+), 6 deletions(-) (limited to 'arch') diff --git a/arch/x86/include/asm/debugreg.h b/arch/x86/include/asm/debugreg.h index 4ef8690be8a7..3e1c5021d0f8 100644 --- a/arch/x86/include/asm/debugreg.h +++ b/arch/x86/include/asm/debugreg.h @@ -85,7 +85,7 @@ static inline void hw_breakpoint_disable(void) set_debugreg(0UL, 3); } -static inline int hw_breakpoint_active(void) +static inline bool hw_breakpoint_active(void) { return __this_cpu_read(cpu_dr7) & DR_GLOBAL_ENABLE_MASK; } @@ -117,6 +117,9 @@ static __always_inline unsigned long local_db_save(void) { unsigned long dr7; + if (static_cpu_has(X86_FEATURE_HYPERVISOR) && !hw_breakpoint_active()) + return 0; + get_debugreg(dr7, 7); dr7 &= ~0x400; /* architecturally set bit */ if (dr7) diff --git a/arch/x86/kernel/hw_breakpoint.c b/arch/x86/kernel/hw_breakpoint.c index fc1743a2b0e9..8cdf29ffd95f 100644 --- a/arch/x86/kernel/hw_breakpoint.c +++ b/arch/x86/kernel/hw_breakpoint.c @@ -99,6 +99,8 @@ int arch_install_hw_breakpoint(struct perf_event *bp) unsigned long *dr7; int i; + lockdep_assert_irqs_disabled(); + for (i = 0; i < HBP_NUM; i++) { struct perf_event **slot = this_cpu_ptr(&bp_per_reg[i]); @@ -117,6 +119,12 @@ int arch_install_hw_breakpoint(struct perf_event *bp) dr7 = this_cpu_ptr(&cpu_dr7); *dr7 |= encode_dr7(i, info->len, info->type); + /* + * Ensure we first write cpu_dr7 before we set the DR7 register. + * This ensures an NMI never see cpu_dr7 0 when DR7 is not. + */ + barrier(); + set_debugreg(*dr7, 7); if (info->mask) set_dr_addr_mask(info->mask, i); @@ -136,9 +144,11 @@ int arch_install_hw_breakpoint(struct perf_event *bp) void arch_uninstall_hw_breakpoint(struct perf_event *bp) { struct arch_hw_breakpoint *info = counter_arch_bp(bp); - unsigned long *dr7; + unsigned long dr7; int i; + lockdep_assert_irqs_disabled(); + for (i = 0; i < HBP_NUM; i++) { struct perf_event **slot = this_cpu_ptr(&bp_per_reg[i]); @@ -151,12 +161,20 @@ void arch_uninstall_hw_breakpoint(struct perf_event *bp) if (WARN_ONCE(i == HBP_NUM, "Can't find any breakpoint slot")) return; - dr7 = this_cpu_ptr(&cpu_dr7); - *dr7 &= ~__encode_dr7(i, info->len, info->type); + dr7 = this_cpu_read(cpu_dr7); + dr7 &= ~__encode_dr7(i, info->len, info->type); - set_debugreg(*dr7, 7); + set_debugreg(dr7, 7); if (info->mask) set_dr_addr_mask(0, i); + + /* + * Ensure the write to cpu_dr7 is after we've set the DR7 register. + * This ensures an NMI never see cpu_dr7 0 when DR7 is not. + */ + barrier(); + + this_cpu_write(cpu_dr7, dr7); } static int arch_bp_generic_len(int x86_len) diff --git a/arch/x86/kvm/vmx/nested.c b/arch/x86/kvm/vmx/nested.c index 9c74a732b08d..2e7238a57fc1 100644 --- a/arch/x86/kvm/vmx/nested.c +++ b/arch/x86/kvm/vmx/nested.c @@ -3087,9 +3087,9 @@ static int nested_vmx_check_vmentry_hw(struct kvm_vcpu *vcpu) /* * VMExit clears RFLAGS.IF and DR7, even on a consistency check. */ - local_irq_enable(); if (hw_breakpoint_active()) set_debugreg(__this_cpu_read(cpu_dr7), 7); + local_irq_enable(); preempt_enable(); /* -- cgit v1.2.3 From f9912ada82862df341b3e86864cbd532d0d24b84 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Fri, 29 May 2020 23:27:37 +0200 Subject: x86/entry: Remove debug IDT frobbing This is all unused now. Signed-off-by: Peter Zijlstra (Intel) Signed-off-by: Thomas Gleixner Link: https://lkml.kernel.org/r/20200529213321.245019500@infradead.org --- arch/x86/include/asm/debugreg.h | 19 ------------------- arch/x86/include/asm/desc.h | 34 +--------------------------------- arch/x86/kernel/cpu/common.c | 17 ----------------- arch/x86/kernel/idt.c | 30 ------------------------------ arch/x86/kernel/traps.c | 9 --------- 5 files changed, 1 insertion(+), 108 deletions(-) (limited to 'arch') diff --git a/arch/x86/include/asm/debugreg.h b/arch/x86/include/asm/debugreg.h index 3e1c5021d0f8..42fc35d86535 100644 --- a/arch/x86/include/asm/debugreg.h +++ b/arch/x86/include/asm/debugreg.h @@ -94,25 +94,6 @@ extern void aout_dump_debugregs(struct user *dump); extern void hw_breakpoint_restore(void); -#ifdef CONFIG_X86_64 -DECLARE_PER_CPU(int, debug_stack_usage); -static inline void debug_stack_usage_inc(void) -{ - __this_cpu_inc(debug_stack_usage); -} -static inline void debug_stack_usage_dec(void) -{ - __this_cpu_dec(debug_stack_usage); -} -void debug_stack_set_zero(void); -void debug_stack_reset(void); -#else /* !X86_64 */ -static inline void debug_stack_set_zero(void) { } -static inline void debug_stack_reset(void) { } -static inline void debug_stack_usage_inc(void) { } -static inline void debug_stack_usage_dec(void) { } -#endif /* X86_64 */ - static __always_inline unsigned long local_db_save(void) { unsigned long dr7; diff --git a/arch/x86/include/asm/desc.h b/arch/x86/include/asm/desc.h index d6c3d346c63a..07632f31147a 100644 --- a/arch/x86/include/asm/desc.h +++ b/arch/x86/include/asm/desc.h @@ -42,8 +42,6 @@ static inline void fill_ldt(struct desc_struct *desc, const struct user_desc *in extern struct desc_ptr idt_descr; extern gate_desc idt_table[]; -extern const struct desc_ptr debug_idt_descr; -extern gate_desc debug_idt_table[]; struct gdt_page { struct desc_struct gdt[GDT_ENTRIES]; @@ -390,31 +388,6 @@ void alloc_intr_gate(unsigned int n, const void *addr); extern unsigned long system_vectors[]; -#ifdef CONFIG_X86_64 -DECLARE_PER_CPU(u32, debug_idt_ctr); -static __always_inline bool is_debug_idt_enabled(void) -{ - if (this_cpu_read(debug_idt_ctr)) - return true; - - return false; -} - -static __always_inline void load_debug_idt(void) -{ - load_idt((const struct desc_ptr *)&debug_idt_descr); -} -#else -static inline bool is_debug_idt_enabled(void) -{ - return false; -} - -static inline void load_debug_idt(void) -{ -} -#endif - /* * The load_current_idt() must be called with interrupts disabled * to avoid races. That way the IDT will always be set back to the expected @@ -424,10 +397,7 @@ static inline void load_debug_idt(void) */ static __always_inline void load_current_idt(void) { - if (is_debug_idt_enabled()) - load_debug_idt(); - else - load_idt((const struct desc_ptr *)&idt_descr); + load_idt((const struct desc_ptr *)&idt_descr); } extern void idt_setup_early_handler(void); @@ -438,11 +408,9 @@ extern void idt_setup_apic_and_irq_gates(void); #ifdef CONFIG_X86_64 extern void idt_setup_early_pf(void); extern void idt_setup_ist_traps(void); -extern void idt_setup_debugidt_traps(void); #else static inline void idt_setup_early_pf(void) { } static inline void idt_setup_ist_traps(void) { } -static inline void idt_setup_debugidt_traps(void) { } #endif extern void idt_invalidate(void *addr); diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index f4645f9ff9cb..043d93cdcaad 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c @@ -1706,23 +1706,6 @@ void syscall_init(void) X86_EFLAGS_IOPL|X86_EFLAGS_AC|X86_EFLAGS_NT); } -DEFINE_PER_CPU(int, debug_stack_usage); -DEFINE_PER_CPU(u32, debug_idt_ctr); - -noinstr void debug_stack_set_zero(void) -{ - this_cpu_inc(debug_idt_ctr); - load_current_idt(); -} - -noinstr void debug_stack_reset(void) -{ - if (WARN_ON(!this_cpu_read(debug_idt_ctr))) - return; - if (this_cpu_dec_return(debug_idt_ctr) == 0) - load_current_idt(); -} - #else /* CONFIG_X86_64 */ DEFINE_PER_CPU(struct task_struct *, current_task) = &init_task; diff --git a/arch/x86/kernel/idt.c b/arch/x86/kernel/idt.c index bc9b0d1d7bb8..226c99229886 100644 --- a/arch/x86/kernel/idt.c +++ b/arch/x86/kernel/idt.c @@ -158,14 +158,6 @@ static const __initconst struct idt_data apic_idts[] = { static const __initconst struct idt_data early_pf_idts[] = { INTG(X86_TRAP_PF, asm_exc_page_fault), }; - -/* - * Override for the debug_idt. Same as the default, but with interrupt - * stack set to DEFAULT_STACK (0). Required for NMI trap handling. - */ -static const __initconst struct idt_data dbg_idts[] = { - INTG(X86_TRAP_DB, asm_exc_debug), -}; #endif /* Must be page-aligned because the real IDT is used in a fixmap. */ @@ -177,9 +169,6 @@ struct desc_ptr idt_descr __ro_after_init = { }; #ifdef CONFIG_X86_64 -/* No need to be aligned, but done to keep all IDTs defined the same way. */ -gate_desc debug_idt_table[IDT_ENTRIES] __page_aligned_bss; - /* * The exceptions which use Interrupt stacks. They are setup after * cpu_init() when the TSS has been initialized. @@ -192,15 +181,6 @@ static const __initconst struct idt_data ist_idts[] = { ISTG(X86_TRAP_MC, asm_exc_machine_check, IST_INDEX_MCE), #endif }; - -/* - * Override for the debug_idt. Same as the default, but with interrupt - * stack set to DEFAULT_STACK (0). Required for NMI trap handling. - */ -const struct desc_ptr debug_idt_descr = { - .size = IDT_ENTRIES * 16 - 1, - .address = (unsigned long) debug_idt_table, -}; #endif static inline void idt_init_desc(gate_desc *gate, const struct idt_data *d) @@ -292,16 +272,6 @@ void __init idt_setup_ist_traps(void) { idt_setup_from_table(idt_table, ist_idts, ARRAY_SIZE(ist_idts), true); } - -/** - * idt_setup_debugidt_traps - Initialize the debug idt table with debug traps - */ -void __init idt_setup_debugidt_traps(void) -{ - memcpy(&debug_idt_table, &idt_table, IDT_ENTRIES * 16); - - idt_setup_from_table(debug_idt_table, dbg_idts, ARRAY_SIZE(dbg_idts), false); -} #endif /** diff --git a/arch/x86/kernel/traps.c b/arch/x86/kernel/traps.c index bcb9dd961c6d..6f887be1ac0c 100644 --- a/arch/x86/kernel/traps.c +++ b/arch/x86/kernel/traps.c @@ -798,12 +798,6 @@ static void noinstr handle_debug(struct pt_regs *regs, unsigned long dr6, return; } - /* - * Let others (NMI) know that the debug stack is in use - * as we may switch to the interrupt stack. - */ - debug_stack_usage_inc(); - /* It's safe to allow irq's after DR6 has been saved */ cond_local_irq_enable(regs); @@ -831,7 +825,6 @@ static void noinstr handle_debug(struct pt_regs *regs, unsigned long dr6, out: cond_local_irq_disable(regs); - debug_stack_usage_dec(); instrumentation_end(); } @@ -1077,6 +1070,4 @@ void __init trap_init(void) cpu_init(); idt_setup_ist_traps(); - - idt_setup_debugidt_traps(); } -- cgit v1.2.3 From fd501d4f0399700011acde486576c7c1eb8e7a61 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Fri, 29 May 2020 23:27:38 +0200 Subject: x86/entry: Remove DBn stacks Both #DB itself, as all other IST users (NMI, #MC) now clear DR7 on entry. Combined with not allowing breakpoints on entry/noinstr/NOKPROBE text and no single step (EFLAGS.TF) inside the #DB handler should guarantee no nested #DB. Signed-off-by: Peter Zijlstra (Intel) Signed-off-by: Thomas Gleixner Link: https://lkml.kernel.org/r/20200529213321.303027161@infradead.org --- arch/x86/entry/entry_64.S | 17 ----------------- arch/x86/include/asm/cpu_entry_area.h | 12 +++--------- arch/x86/kernel/asm-offsets_64.c | 3 --- arch/x86/kernel/dumpstack_64.c | 7 ++----- arch/x86/mm/cpu_entry_area.c | 1 - 5 files changed, 5 insertions(+), 35 deletions(-) (limited to 'arch') diff --git a/arch/x86/entry/entry_64.S b/arch/x86/entry/entry_64.S index 265ff97b3961..8ecaeee53653 100644 --- a/arch/x86/entry/entry_64.S +++ b/arch/x86/entry/entry_64.S @@ -396,11 +396,6 @@ SYM_CODE_END(\asmsym) idtentry \vector asm_\cfunc \cfunc has_error_code=0 .endm -/* - * MCE and DB exceptions - */ -#define CPU_TSS_IST(x) PER_CPU_VAR(cpu_tss_rw) + (TSS_ist + (x) * 8) - /** * idtentry_mce_db - Macro to generate entry stubs for #MC and #DB * @vector: Vector number @@ -416,10 +411,6 @@ SYM_CODE_END(\asmsym) * If hits in kernel mode then it needs to go through the paranoid * entry as the exception can hit any random state. No preemption * check on exit to keep the paranoid path simple. - * - * If the trap is #DB then the interrupt stack entry in the IST is - * moved to the second stack, so a potential recursion will have a - * fresh IST. */ .macro idtentry_mce_db vector asmsym cfunc SYM_CODE_START(\asmsym) @@ -445,16 +436,8 @@ SYM_CODE_START(\asmsym) movq %rsp, %rdi /* pt_regs pointer */ - .if \vector == X86_TRAP_DB - subq $DB_STACK_OFFSET, CPU_TSS_IST(IST_INDEX_DB) - .endif - call \cfunc - .if \vector == X86_TRAP_DB - addq $DB_STACK_OFFSET, CPU_TSS_IST(IST_INDEX_DB) - .endif - jmp paranoid_exit /* Switch to the regular task stack and use the noist entry point */ diff --git a/arch/x86/include/asm/cpu_entry_area.h b/arch/x86/include/asm/cpu_entry_area.h index 02c0078d3787..8902fdb7de13 100644 --- a/arch/x86/include/asm/cpu_entry_area.h +++ b/arch/x86/include/asm/cpu_entry_area.h @@ -11,15 +11,11 @@ #ifdef CONFIG_X86_64 /* Macro to enforce the same ordering and stack sizes */ -#define ESTACKS_MEMBERS(guardsize, db2_holesize)\ +#define ESTACKS_MEMBERS(guardsize) \ char DF_stack_guard[guardsize]; \ char DF_stack[EXCEPTION_STKSZ]; \ char NMI_stack_guard[guardsize]; \ char NMI_stack[EXCEPTION_STKSZ]; \ - char DB2_stack_guard[guardsize]; \ - char DB2_stack[db2_holesize]; \ - char DB1_stack_guard[guardsize]; \ - char DB1_stack[EXCEPTION_STKSZ]; \ char DB_stack_guard[guardsize]; \ char DB_stack[EXCEPTION_STKSZ]; \ char MCE_stack_guard[guardsize]; \ @@ -28,12 +24,12 @@ /* The exception stacks' physical storage. No guard pages required */ struct exception_stacks { - ESTACKS_MEMBERS(0, 0) + ESTACKS_MEMBERS(0) }; /* The effective cpu entry area mapping with guard pages. */ struct cea_exception_stacks { - ESTACKS_MEMBERS(PAGE_SIZE, EXCEPTION_STKSZ) + ESTACKS_MEMBERS(PAGE_SIZE) }; /* @@ -42,8 +38,6 @@ struct cea_exception_stacks { enum exception_stack_ordering { ESTACK_DF, ESTACK_NMI, - ESTACK_DB2, - ESTACK_DB1, ESTACK_DB, ESTACK_MCE, N_EXCEPTION_STACKS diff --git a/arch/x86/kernel/asm-offsets_64.c b/arch/x86/kernel/asm-offsets_64.c index c2a47016f243..828be792231e 100644 --- a/arch/x86/kernel/asm-offsets_64.c +++ b/arch/x86/kernel/asm-offsets_64.c @@ -57,9 +57,6 @@ int main(void) BLANK(); #undef ENTRY - OFFSET(TSS_ist, tss_struct, x86_tss.ist); - DEFINE(DB_STACK_OFFSET, offsetof(struct cea_exception_stacks, DB_stack) - - offsetof(struct cea_exception_stacks, DB1_stack)); BLANK(); #ifdef CONFIG_STACKPROTECTOR diff --git a/arch/x86/kernel/dumpstack_64.c b/arch/x86/kernel/dumpstack_64.c index 460ae7f66818..4a94d38cd141 100644 --- a/arch/x86/kernel/dumpstack_64.c +++ b/arch/x86/kernel/dumpstack_64.c @@ -22,15 +22,13 @@ static const char * const exception_stack_names[] = { [ ESTACK_DF ] = "#DF", [ ESTACK_NMI ] = "NMI", - [ ESTACK_DB2 ] = "#DB2", - [ ESTACK_DB1 ] = "#DB1", [ ESTACK_DB ] = "#DB", [ ESTACK_MCE ] = "#MC", }; const char *stack_type_name(enum stack_type type) { - BUILD_BUG_ON(N_EXCEPTION_STACKS != 6); + BUILD_BUG_ON(N_EXCEPTION_STACKS != 4); if (type == STACK_TYPE_IRQ) return "IRQ"; @@ -79,7 +77,6 @@ static const struct estack_pages estack_pages[CEA_ESTACK_PAGES] ____cacheline_aligned = { EPAGERANGE(DF), EPAGERANGE(NMI), - EPAGERANGE(DB1), EPAGERANGE(DB), EPAGERANGE(MCE), }; @@ -91,7 +88,7 @@ static bool in_exception_stack(unsigned long *stack, struct stack_info *info) struct pt_regs *regs; unsigned int k; - BUILD_BUG_ON(N_EXCEPTION_STACKS != 6); + BUILD_BUG_ON(N_EXCEPTION_STACKS != 4); begin = (unsigned long)__this_cpu_read(cea_exception_stacks); /* diff --git a/arch/x86/mm/cpu_entry_area.c b/arch/x86/mm/cpu_entry_area.c index 6f8b48f545f4..770b613790b3 100644 --- a/arch/x86/mm/cpu_entry_area.c +++ b/arch/x86/mm/cpu_entry_area.c @@ -107,7 +107,6 @@ static void __init percpu_setup_exception_stacks(unsigned int cpu) */ cea_map_stack(DF); cea_map_stack(NMI); - cea_map_stack(DB1); cea_map_stack(DB); cea_map_stack(MCE); } -- cgit v1.2.3 From 59bc300b712998d10254ee20e24f2e7ec09c560a Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Fri, 29 May 2020 23:27:39 +0200 Subject: x86/entry: Clarify irq_{enter,exit}_rcu() Because: irq_enter_rcu() includes lockdep_hardirq_enter() irq_exit_rcu() does *NOT* include lockdep_hardirq_exit() Which resulted in two 'stray' lockdep_hardirq_exit() calls in idtentry.h, and me spending a long time trying to find the matching enter calls. Signed-off-by: Peter Zijlstra (Intel) Signed-off-by: Thomas Gleixner Link: https://lkml.kernel.org/r/20200529213321.359433429@infradead.org --- arch/x86/include/asm/idtentry.h | 2 -- kernel/softirq.c | 21 ++++++++++++++------- 2 files changed, 14 insertions(+), 9 deletions(-) (limited to 'arch') diff --git a/arch/x86/include/asm/idtentry.h b/arch/x86/include/asm/idtentry.h index 38b672ded40b..d203c541a65a 100644 --- a/arch/x86/include/asm/idtentry.h +++ b/arch/x86/include/asm/idtentry.h @@ -206,7 +206,6 @@ __visible noinstr void func(struct pt_regs *regs, \ kvm_set_cpu_l1tf_flush_l1d(); \ __##func (regs, (u8)error_code); \ irq_exit_rcu(); \ - lockdep_hardirq_exit(); \ instrumentation_end(); \ idtentry_exit_cond_rcu(regs, rcu_exit); \ } \ @@ -249,7 +248,6 @@ __visible noinstr void func(struct pt_regs *regs) \ kvm_set_cpu_l1tf_flush_l1d(); \ run_on_irqstack_cond(__##func, regs, regs); \ irq_exit_rcu(); \ - lockdep_hardirq_exit(); \ instrumentation_end(); \ idtentry_exit_cond_rcu(regs, rcu_exit); \ } \ diff --git a/kernel/softirq.c b/kernel/softirq.c index beb8e3a66c7c..c4201b7f42b1 100644 --- a/kernel/softirq.c +++ b/kernel/softirq.c @@ -404,12 +404,7 @@ static inline void tick_irq_exit(void) #endif } -/** - * irq_exit_rcu() - Exit an interrupt context without updating RCU - * - * Also processes softirqs if needed and possible. - */ -void irq_exit_rcu(void) +static inline void __irq_exit_rcu(void) { #ifndef __ARCH_IRQ_EXIT_IRQS_DISABLED local_irq_disable(); @@ -424,6 +419,18 @@ void irq_exit_rcu(void) tick_irq_exit(); } +/** + * irq_exit_rcu() - Exit an interrupt context without updating RCU + * + * Also processes softirqs if needed and possible. + */ +void irq_exit_rcu(void) +{ + __irq_exit_rcu(); + /* must be last! */ + lockdep_hardirq_exit(); +} + /** * irq_exit - Exit an interrupt context, update RCU and lockdep * @@ -431,7 +438,7 @@ void irq_exit_rcu(void) */ void irq_exit(void) { - irq_exit_rcu(); + __irq_exit_rcu(); rcu_irq_exit(); /* must be last! */ lockdep_hardirq_exit(); -- cgit v1.2.3 From bf2b3008440072068580c609d79a079656af0588 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Fri, 29 May 2020 23:27:40 +0200 Subject: x86/entry: Rename trace_hardirqs_off_prepare() The typical pattern for trace_hardirqs_off_prepare() is: ENTRY lockdep_hardirqs_off(); // because hardware ... do entry magic instrumentation_begin(); trace_hardirqs_off_prepare(); ... do actual work trace_hardirqs_on_prepare(); lockdep_hardirqs_on_prepare(); instrumentation_end(); ... do exit magic lockdep_hardirqs_on(); which shows that it's named wrong, rename it to trace_hardirqs_off_finish(), as it concludes the hardirq_off transition. Also, given that the above is the only correct order, make the traditional all-in-one trace_hardirqs_off() follow suit. Signed-off-by: Peter Zijlstra (Intel) Signed-off-by: Thomas Gleixner Link: https://lkml.kernel.org/r/20200529213321.415774872@infradead.org --- arch/x86/entry/common.c | 6 +++--- arch/x86/kernel/cpu/mce/core.c | 2 +- arch/x86/kernel/nmi.c | 2 +- arch/x86/kernel/traps.c | 4 ++-- include/linux/irqflags.h | 4 ++-- kernel/trace/trace_preemptirq.c | 10 +++++----- 6 files changed, 14 insertions(+), 14 deletions(-) (limited to 'arch') diff --git a/arch/x86/entry/common.c b/arch/x86/entry/common.c index b0b1c3cf0e6e..f4d57782c14b 100644 --- a/arch/x86/entry/common.c +++ b/arch/x86/entry/common.c @@ -65,7 +65,7 @@ static noinstr void enter_from_user_mode(void) instrumentation_begin(); CT_WARN_ON(state != CONTEXT_USER); - trace_hardirqs_off_prepare(); + trace_hardirqs_off_finish(); instrumentation_end(); } #else @@ -73,7 +73,7 @@ static __always_inline void enter_from_user_mode(void) { lockdep_hardirqs_off(CALLER_ADDR0); instrumentation_begin(); - trace_hardirqs_off_prepare(); + trace_hardirqs_off_finish(); instrumentation_end(); } #endif @@ -569,7 +569,7 @@ bool noinstr idtentry_enter_cond_rcu(struct pt_regs *regs) lockdep_hardirqs_off(CALLER_ADDR0); rcu_irq_enter(); instrumentation_begin(); - trace_hardirqs_off_prepare(); + trace_hardirqs_off_finish(); instrumentation_end(); return true; diff --git a/arch/x86/kernel/cpu/mce/core.c b/arch/x86/kernel/cpu/mce/core.c index be499267bbb4..b9cb381b4019 100644 --- a/arch/x86/kernel/cpu/mce/core.c +++ b/arch/x86/kernel/cpu/mce/core.c @@ -1922,7 +1922,7 @@ static __always_inline void exc_machine_check_kernel(struct pt_regs *regs) * that out because it's an indirect call. Annotate it. */ instrumentation_begin(); - trace_hardirqs_off_prepare(); + trace_hardirqs_off_finish(); machine_check_vector(regs); if (regs->flags & X86_EFLAGS_IF) trace_hardirqs_on_prepare(); diff --git a/arch/x86/kernel/nmi.c b/arch/x86/kernel/nmi.c index 873a8c040b86..3a98ff36f411 100644 --- a/arch/x86/kernel/nmi.c +++ b/arch/x86/kernel/nmi.c @@ -330,7 +330,7 @@ static noinstr void default_do_nmi(struct pt_regs *regs) __this_cpu_write(last_nmi_rip, regs->ip); instrumentation_begin(); - trace_hardirqs_off_prepare(); + trace_hardirqs_off_finish(); handled = nmi_handle(NMI_LOCAL, regs); __this_cpu_add(nmi_stats.normal, handled); diff --git a/arch/x86/kernel/traps.c b/arch/x86/kernel/traps.c index 6f887be1ac0c..79af913e78a3 100644 --- a/arch/x86/kernel/traps.c +++ b/arch/x86/kernel/traps.c @@ -634,7 +634,7 @@ DEFINE_IDTENTRY_RAW(exc_int3) } else { nmi_enter(); instrumentation_begin(); - trace_hardirqs_off_prepare(); + trace_hardirqs_off_finish(); if (!do_int3(regs)) die("int3", regs, 0); if (regs->flags & X86_EFLAGS_IF) @@ -833,7 +833,7 @@ static __always_inline void exc_debug_kernel(struct pt_regs *regs, { nmi_enter(); instrumentation_begin(); - trace_hardirqs_off_prepare(); + trace_hardirqs_off_finish(); instrumentation_end(); /* diff --git a/include/linux/irqflags.h b/include/linux/irqflags.h index d7f7e436c3af..6384d2813ded 100644 --- a/include/linux/irqflags.h +++ b/include/linux/irqflags.h @@ -32,7 +32,7 @@ #ifdef CONFIG_TRACE_IRQFLAGS extern void trace_hardirqs_on_prepare(void); - extern void trace_hardirqs_off_prepare(void); + extern void trace_hardirqs_off_finish(void); extern void trace_hardirqs_on(void); extern void trace_hardirqs_off(void); # define lockdep_hardirq_context(p) ((p)->hardirq_context) @@ -101,7 +101,7 @@ do { \ #else # define trace_hardirqs_on_prepare() do { } while (0) -# define trace_hardirqs_off_prepare() do { } while (0) +# define trace_hardirqs_off_finish() do { } while (0) # define trace_hardirqs_on() do { } while (0) # define trace_hardirqs_off() do { } while (0) # define lockdep_hardirq_context(p) 0 diff --git a/kernel/trace/trace_preemptirq.c b/kernel/trace/trace_preemptirq.c index fb0691b8a88d..f10073e62603 100644 --- a/kernel/trace/trace_preemptirq.c +++ b/kernel/trace/trace_preemptirq.c @@ -58,7 +58,7 @@ NOKPROBE_SYMBOL(trace_hardirqs_on); * and lockdep uses a staged approach which splits the lockdep hardirq * tracking into a RCU on and a RCU off section. */ -void trace_hardirqs_off_prepare(void) +void trace_hardirqs_off_finish(void) { if (!this_cpu_read(tracing_irq_cpu)) { this_cpu_write(tracing_irq_cpu, 1); @@ -68,19 +68,19 @@ void trace_hardirqs_off_prepare(void) } } -EXPORT_SYMBOL(trace_hardirqs_off_prepare); -NOKPROBE_SYMBOL(trace_hardirqs_off_prepare); +EXPORT_SYMBOL(trace_hardirqs_off_finish); +NOKPROBE_SYMBOL(trace_hardirqs_off_finish); void trace_hardirqs_off(void) { + lockdep_hardirqs_off(CALLER_ADDR0); + if (!this_cpu_read(tracing_irq_cpu)) { this_cpu_write(tracing_irq_cpu, 1); tracer_hardirqs_off(CALLER_ADDR0, CALLER_ADDR1); if (!in_nmi()) trace_irq_disable_rcuidle(CALLER_ADDR0, CALLER_ADDR1); } - - lockdep_hardirqs_off(CALLER_ADDR0); } EXPORT_SYMBOL(trace_hardirqs_off); NOKPROBE_SYMBOL(trace_hardirqs_off); -- cgit v1.2.3 From bdf5bde8aec7b53d0ea3a44d880a4e5106ff37f3 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Thu, 28 May 2020 16:53:16 +0200 Subject: x86/idt: Mark init only functions __init Since 8175cfbbbfcb ("x86/idt: Remove update_intr_gate()") set_intr_gate() and idt_setup_from_table() are only called from __init functions. Mark them as well. Signed-off-by: Thomas Gleixner Link: https://lkml.kernel.org/r/20200528145522.715816477@linutronix.de --- arch/x86/kernel/idt.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'arch') diff --git a/arch/x86/kernel/idt.c b/arch/x86/kernel/idt.c index 226c99229886..4b99f7bec384 100644 --- a/arch/x86/kernel/idt.c +++ b/arch/x86/kernel/idt.c @@ -197,7 +197,7 @@ static inline void idt_init_desc(gate_desc *gate, const struct idt_data *d) #endif } -static void +static __init void idt_setup_from_table(gate_desc *idt, const struct idt_data *t, int size, bool sys) { gate_desc desc; @@ -210,7 +210,7 @@ idt_setup_from_table(gate_desc *idt, const struct idt_data *t, int size, bool sy } } -static void set_intr_gate(unsigned int n, const void *addr) +static __init void set_intr_gate(unsigned int n, const void *addr) { struct idt_data data; -- cgit v1.2.3 From 94438af40d06c110988fc9e30baf801f38b1491a Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Thu, 28 May 2020 16:53:17 +0200 Subject: x86/idt: Add comments about early #PF handling The difference between 32 and 64 bit vs. early #PF handling is not documented. Replace the FIXME at idt_setup_early_pf() with proper comments. Signed-off-by: Thomas Gleixner Link: https://lkml.kernel.org/r/20200528145522.807135882@linutronix.de --- arch/x86/kernel/idt.c | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) (limited to 'arch') diff --git a/arch/x86/kernel/idt.c b/arch/x86/kernel/idt.c index 4b99f7bec384..5ef82fcf333d 100644 --- a/arch/x86/kernel/idt.c +++ b/arch/x86/kernel/idt.c @@ -61,7 +61,11 @@ static bool idt_setup_done __initdata; static const __initconst struct idt_data early_idts[] = { INTG(X86_TRAP_DB, asm_exc_debug), SYSG(X86_TRAP_BP, asm_exc_int3), + #ifdef CONFIG_X86_32 + /* + * Not possible on 64-bit. See idt_setup_early_pf() for details. + */ INTG(X86_TRAP_PF, asm_exc_page_fault), #endif }; @@ -256,8 +260,10 @@ void __init idt_setup_traps(void) * cpu_init() is invoked and sets up TSS. The IST variant is installed * after that. * - * FIXME: Why is 32bit and 64bit installing the PF handler at different - * places in the early setup code? + * Note, that X86_64 cannot install the real #PF handler in + * idt_setup_early_traps() because the memory intialization needs the #PF + * handler from the early_idt_handler_array to initialize the early page + * tables. */ void __init idt_setup_early_pf(void) { -- cgit v1.2.3 From 5a2bafca1b0675a126143eea3610143130347783 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Thu, 28 May 2020 16:53:18 +0200 Subject: x86/idt: Use proper constants for table size Use the actual struct size to calculate the IDT table size instead of hardcoded values. Signed-off-by: Thomas Gleixner Link: https://lkml.kernel.org/r/20200528145522.898591501@linutronix.de --- arch/x86/kernel/idt.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/x86/kernel/idt.c b/arch/x86/kernel/idt.c index 5ef82fcf333d..b6e1a87f0822 100644 --- a/arch/x86/kernel/idt.c +++ b/arch/x86/kernel/idt.c @@ -51,6 +51,7 @@ struct idt_data { #define TSKG(_vector, _gdt) \ G(_vector, NULL, DEFAULT_STACK, GATE_TASK, DPL0, _gdt << 3) +#define IDT_TABLE_SIZE (IDT_ENTRIES * sizeof(gate_desc)) static bool idt_setup_done __initdata; @@ -168,7 +169,7 @@ static const __initconst struct idt_data early_pf_idts[] = { gate_desc idt_table[IDT_ENTRIES] __page_aligned_bss; struct desc_ptr idt_descr __ro_after_init = { - .size = (IDT_ENTRIES * 2 * sizeof(unsigned long)) - 1, + .size = IDT_TABLE_SIZE - 1, .address = (unsigned long) idt_table, }; -- cgit v1.2.3 From 00229a54300108502f68c8777faca2d13f805f1a Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Thu, 28 May 2020 16:53:19 +0200 Subject: x86/idt: Cleanup trap_init() No point in having all the IDT cruft in trap_init(). Move it into the IDT code and fixup the comments. Signed-off-by: Thomas Gleixner Link: https://lkml.kernel.org/r/20200528145522.992376498@linutronix.de --- arch/x86/kernel/idt.c | 18 ++++++++++++++++++ arch/x86/kernel/traps.c | 9 --------- 2 files changed, 18 insertions(+), 9 deletions(-) (limited to 'arch') diff --git a/arch/x86/kernel/idt.c b/arch/x86/kernel/idt.c index b6e1a87f0822..902cdd006313 100644 --- a/arch/x86/kernel/idt.c +++ b/arch/x86/kernel/idt.c @@ -4,6 +4,7 @@ */ #include +#include #include #include #include @@ -281,6 +282,19 @@ void __init idt_setup_ist_traps(void) } #endif +static void __init idt_map_in_cea(void) +{ + /* + * Set the IDT descriptor to a fixed read-only location in the cpu + * entry area, so that the "sidt" instruction will not leak the + * location of the kernel, and to defend the IDT against arbitrary + * memory write vulnerabilities. + */ + cea_set_pte(CPU_ENTRY_AREA_RO_IDT_VADDR, __pa_symbol(idt_table), + PAGE_KERNEL_RO); + idt_descr.address = CPU_ENTRY_AREA_RO_IDT; +} + /** * idt_setup_apic_and_irq_gates - Setup APIC/SMP and normal interrupt gates */ @@ -307,6 +321,10 @@ void __init idt_setup_apic_and_irq_gates(void) set_intr_gate(i, entry); } #endif + /* Map IDT into CPU entry area and reload it. */ + idt_map_in_cea(); + load_idt(&idt_descr); + idt_setup_done = true; } diff --git a/arch/x86/kernel/traps.c b/arch/x86/kernel/traps.c index 79af913e78a3..5566fe50ef98 100644 --- a/arch/x86/kernel/traps.c +++ b/arch/x86/kernel/traps.c @@ -1055,15 +1055,6 @@ void __init trap_init(void) idt_setup_traps(); - /* - * Set the IDT descriptor to a fixed read-only location, so that the - * "sidt" instruction will not leak the location of the kernel, and - * to defend the IDT against arbitrary memory write vulnerabilities. - * It will be reloaded in cpu_init() */ - cea_set_pte(CPU_ENTRY_AREA_RO_IDT_VADDR, __pa_symbol(idt_table), - PAGE_KERNEL_RO); - idt_descr.address = CPU_ENTRY_AREA_RO_IDT; - /* * Should be a barrier for any external CPU state: */ -- cgit v1.2.3 From 3e77abda65b1cec10ef6b18b1ccfee0beaf400f1 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Thu, 28 May 2020 16:53:20 +0200 Subject: x86/idt: Consolidate idt functionality - Move load_current_idt() out of line and replace the hideous comment with a lockdep assert. This allows to make idt_table and idt_descr static. - Mark idt_table read only after the IDT initialization is complete. - Shuffle code around to consolidate the #ifdef sections into one. - Adapt the F00F bug code. Signed-off-by: Thomas Gleixner Link: https://lkml.kernel.org/r/20200528145523.084915381@linutronix.de --- arch/x86/include/asm/desc.h | 17 ++---------- arch/x86/kernel/idt.c | 63 +++++++++++++++++++++++++++------------------ arch/x86/mm/fault.c | 16 +++--------- 3 files changed, 44 insertions(+), 52 deletions(-) (limited to 'arch') diff --git a/arch/x86/include/asm/desc.h b/arch/x86/include/asm/desc.h index 07632f31147a..1ced11d31932 100644 --- a/arch/x86/include/asm/desc.h +++ b/arch/x86/include/asm/desc.h @@ -40,9 +40,6 @@ static inline void fill_ldt(struct desc_struct *desc, const struct user_desc *in desc->l = 0; } -extern struct desc_ptr idt_descr; -extern gate_desc idt_table[]; - struct gdt_page { struct desc_struct gdt[GDT_ENTRIES]; } __attribute__((aligned(PAGE_SIZE))); @@ -388,22 +385,12 @@ void alloc_intr_gate(unsigned int n, const void *addr); extern unsigned long system_vectors[]; -/* - * The load_current_idt() must be called with interrupts disabled - * to avoid races. That way the IDT will always be set back to the expected - * descriptor. It's also called when a CPU is being initialized, and - * that doesn't need to disable interrupts, as nothing should be - * bothering the CPU then. - */ -static __always_inline void load_current_idt(void) -{ - load_idt((const struct desc_ptr *)&idt_descr); -} - +extern void load_current_idt(void); extern void idt_setup_early_handler(void); extern void idt_setup_early_traps(void); extern void idt_setup_traps(void); extern void idt_setup_apic_and_irq_gates(void); +extern bool idt_is_f00f_address(unsigned long address); #ifdef CONFIG_X86_64 extern void idt_setup_early_pf(void); diff --git a/arch/x86/kernel/idt.c b/arch/x86/kernel/idt.c index 902cdd006313..0db21206f2f3 100644 --- a/arch/x86/kernel/idt.c +++ b/arch/x86/kernel/idt.c @@ -5,6 +5,7 @@ #include #include +#include #include #include #include @@ -156,37 +157,25 @@ static const __initconst struct idt_data apic_idts[] = { #endif }; -#ifdef CONFIG_X86_64 -/* - * Early traps running on the DEFAULT_STACK because the other interrupt - * stacks work only after cpu_init(). - */ -static const __initconst struct idt_data early_pf_idts[] = { - INTG(X86_TRAP_PF, asm_exc_page_fault), -}; -#endif - -/* Must be page-aligned because the real IDT is used in a fixmap. */ -gate_desc idt_table[IDT_ENTRIES] __page_aligned_bss; +/* Must be page-aligned because the real IDT is used in the cpu entry area */ +static gate_desc idt_table[IDT_ENTRIES] __page_aligned_bss; struct desc_ptr idt_descr __ro_after_init = { .size = IDT_TABLE_SIZE - 1, .address = (unsigned long) idt_table, }; -#ifdef CONFIG_X86_64 -/* - * The exceptions which use Interrupt stacks. They are setup after - * cpu_init() when the TSS has been initialized. - */ -static const __initconst struct idt_data ist_idts[] = { - ISTG(X86_TRAP_DB, asm_exc_debug, IST_INDEX_DB), - ISTG(X86_TRAP_NMI, asm_exc_nmi, IST_INDEX_NMI), - ISTG(X86_TRAP_DF, asm_exc_double_fault, IST_INDEX_DF), -#ifdef CONFIG_X86_MCE - ISTG(X86_TRAP_MC, asm_exc_machine_check, IST_INDEX_MCE), -#endif -}; +void load_current_idt(void) +{ + lockdep_assert_irqs_disabled(); + load_idt(&idt_descr); +} + +#ifdef CONFIG_X86_F00F_BUG +bool idt_is_f00f_address(unsigned long address) +{ + return ((address - idt_descr.address) >> 3) == 6; +} #endif static inline void idt_init_desc(gate_desc *gate, const struct idt_data *d) @@ -255,6 +244,27 @@ void __init idt_setup_traps(void) } #ifdef CONFIG_X86_64 +/* + * Early traps running on the DEFAULT_STACK because the other interrupt + * stacks work only after cpu_init(). + */ +static const __initconst struct idt_data early_pf_idts[] = { + INTG(X86_TRAP_PF, asm_exc_page_fault), +}; + +/* + * The exceptions which use Interrupt stacks. They are setup after + * cpu_init() when the TSS has been initialized. + */ +static const __initconst struct idt_data ist_idts[] = { + ISTG(X86_TRAP_DB, asm_exc_debug, IST_INDEX_DB), + ISTG(X86_TRAP_NMI, asm_exc_nmi, IST_INDEX_NMI), + ISTG(X86_TRAP_DF, asm_exc_double_fault, IST_INDEX_DF), +#ifdef CONFIG_X86_MCE + ISTG(X86_TRAP_MC, asm_exc_machine_check, IST_INDEX_MCE), +#endif +}; + /** * idt_setup_early_pf - Initialize the idt table with early pagefault handler * @@ -325,6 +335,9 @@ void __init idt_setup_apic_and_irq_gates(void) idt_map_in_cea(); load_idt(&idt_descr); + /* Make the IDT table read only */ + set_memory_ro((unsigned long)&idt_table, 1); + idt_setup_done = true; } diff --git a/arch/x86/mm/fault.c b/arch/x86/mm/fault.c index eef29bb53cd0..66be9bd60307 100644 --- a/arch/x86/mm/fault.c +++ b/arch/x86/mm/fault.c @@ -414,21 +414,13 @@ static int is_errata100(struct pt_regs *regs, unsigned long address) return 0; } +/* Pentium F0 0F C7 C8 bug workaround: */ static int is_f00f_bug(struct pt_regs *regs, unsigned long address) { #ifdef CONFIG_X86_F00F_BUG - unsigned long nr; - - /* - * Pentium F0 0F C7 C8 bug workaround: - */ - if (boot_cpu_has_bug(X86_BUG_F00F)) { - nr = (address - idt_descr.address) >> 3; - - if (nr == 6) { - handle_invalid_op(regs); - return 1; - } + if (boot_cpu_has_bug(X86_BUG_F00F) && idt_is_f00f_address(address)) { + handle_invalid_op(regs); + return 1; } #endif return 0; -- cgit v1.2.3 From 28eaf87121abfb574fabfb08e21322b4dc377b10 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Wed, 3 Jun 2020 13:40:17 +0200 Subject: x86/entry: __always_inline debugreg for noinstr vmlinux.o: warning: objtool: exc_debug()+0x21: call to native_get_debugreg() leaves .noinstr.text section Signed-off-by: Peter Zijlstra (Intel) Signed-off-by: Thomas Gleixner Link: https://lkml.kernel.org/r/20200603114051.954401211@infradead.org --- arch/x86/include/asm/debugreg.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'arch') diff --git a/arch/x86/include/asm/debugreg.h b/arch/x86/include/asm/debugreg.h index 42fc35d86535..e89558a3fe4a 100644 --- a/arch/x86/include/asm/debugreg.h +++ b/arch/x86/include/asm/debugreg.h @@ -18,7 +18,7 @@ DECLARE_PER_CPU(unsigned long, cpu_dr7); native_set_debugreg(register, value) #endif -static inline unsigned long native_get_debugreg(int regno) +static __always_inline unsigned long native_get_debugreg(int regno) { unsigned long val = 0; /* Damn you, gcc! */ @@ -47,7 +47,7 @@ static inline unsigned long native_get_debugreg(int regno) return val; } -static inline void native_set_debugreg(int regno, unsigned long value) +static __always_inline void native_set_debugreg(int regno, unsigned long value) { switch (regno) { case 0: @@ -85,7 +85,7 @@ static inline void hw_breakpoint_disable(void) set_debugreg(0UL, 3); } -static inline bool hw_breakpoint_active(void) +static __always_inline bool hw_breakpoint_active(void) { return __this_cpu_read(cpu_dr7) & DR_GLOBAL_ENABLE_MASK; } -- cgit v1.2.3 From 7a745be1cc902d7376fdc29d6b5533eb46532be1 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Wed, 3 Jun 2020 13:40:18 +0200 Subject: x86/entry: __always_inline irqflags for noinstr vmlinux.o: warning: objtool: lockdep_hardirqs_on()+0x65: call to arch_local_save_flags() leaves .noinstr.text section vmlinux.o: warning: objtool: lockdep_hardirqs_off()+0x5d: call to arch_local_save_flags() leaves .noinstr.text section vmlinux.o: warning: objtool: lock_is_held_type()+0x35: call to arch_local_irq_save() leaves .noinstr.text section vmlinux.o: warning: objtool: check_preemption_disabled()+0x31: call to arch_local_save_flags() leaves .noinstr.text section vmlinux.o: warning: objtool: check_preemption_disabled()+0x33: call to arch_irqs_disabled_flags() leaves .noinstr.text section vmlinux.o: warning: objtool: lock_is_held_type()+0x2f: call to native_irq_disable() leaves .noinstr.text section Signed-off-by: Peter Zijlstra (Intel) Signed-off-by: Thomas Gleixner Link: https://lkml.kernel.org/r/20200603114052.012171668@infradead.org --- arch/x86/include/asm/irqflags.h | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) (limited to 'arch') diff --git a/arch/x86/include/asm/irqflags.h b/arch/x86/include/asm/irqflags.h index 8ddff8dbaed5..02a0cf547d7b 100644 --- a/arch/x86/include/asm/irqflags.h +++ b/arch/x86/include/asm/irqflags.h @@ -17,7 +17,7 @@ /* Declaration required for gcc < 4.9 to prevent -Werror=missing-prototypes */ extern inline unsigned long native_save_fl(void); -extern inline unsigned long native_save_fl(void) +extern __always_inline unsigned long native_save_fl(void) { unsigned long flags; @@ -44,12 +44,12 @@ extern inline void native_restore_fl(unsigned long flags) :"memory", "cc"); } -static inline void native_irq_disable(void) +static __always_inline void native_irq_disable(void) { asm volatile("cli": : :"memory"); } -static inline void native_irq_enable(void) +static __always_inline void native_irq_enable(void) { asm volatile("sti": : :"memory"); } @@ -74,22 +74,22 @@ static inline __cpuidle void native_halt(void) #ifndef __ASSEMBLY__ #include -static inline notrace unsigned long arch_local_save_flags(void) +static __always_inline unsigned long arch_local_save_flags(void) { return native_save_fl(); } -static inline notrace void arch_local_irq_restore(unsigned long flags) +static __always_inline void arch_local_irq_restore(unsigned long flags) { native_restore_fl(flags); } -static inline notrace void arch_local_irq_disable(void) +static __always_inline void arch_local_irq_disable(void) { native_irq_disable(); } -static inline notrace void arch_local_irq_enable(void) +static __always_inline void arch_local_irq_enable(void) { native_irq_enable(); } @@ -115,7 +115,7 @@ static inline __cpuidle void halt(void) /* * For spinlocks, etc: */ -static inline notrace unsigned long arch_local_irq_save(void) +static __always_inline unsigned long arch_local_irq_save(void) { unsigned long flags = arch_local_save_flags(); arch_local_irq_disable(); @@ -159,12 +159,12 @@ static inline notrace unsigned long arch_local_irq_save(void) #endif /* CONFIG_PARAVIRT_XXL */ #ifndef __ASSEMBLY__ -static inline int arch_irqs_disabled_flags(unsigned long flags) +static __always_inline int arch_irqs_disabled_flags(unsigned long flags) { return !(flags & X86_EFLAGS_IF); } -static inline int arch_irqs_disabled(void) +static __always_inline int arch_irqs_disabled(void) { unsigned long flags = arch_local_save_flags(); -- cgit v1.2.3 From 4b281e541bba74bf9574335289484c577f41eaf7 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Wed, 3 Jun 2020 13:40:19 +0200 Subject: x86/entry: __always_inline arch_atomic_* for noinstr vmlinux.o: warning: objtool: rcu_dynticks_eqs_exit()+0x33: call to arch_atomic_and.constprop.0() leaves .noinstr.text section Signed-off-by: Peter Zijlstra (Intel) Signed-off-by: Thomas Gleixner Link: https://lkml.kernel.org/r/20200603114052.070166551@infradead.org --- arch/x86/include/asm/atomic.h | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) (limited to 'arch') diff --git a/arch/x86/include/asm/atomic.h b/arch/x86/include/asm/atomic.h index a9ae58826074..bf35e476a776 100644 --- a/arch/x86/include/asm/atomic.h +++ b/arch/x86/include/asm/atomic.h @@ -205,13 +205,13 @@ static __always_inline bool arch_atomic_try_cmpxchg(atomic_t *v, int *old, int n } #define arch_atomic_try_cmpxchg arch_atomic_try_cmpxchg -static inline int arch_atomic_xchg(atomic_t *v, int new) +static __always_inline int arch_atomic_xchg(atomic_t *v, int new) { return arch_xchg(&v->counter, new); } #define arch_atomic_xchg arch_atomic_xchg -static inline void arch_atomic_and(int i, atomic_t *v) +static __always_inline void arch_atomic_and(int i, atomic_t *v) { asm volatile(LOCK_PREFIX "andl %1,%0" : "+m" (v->counter) @@ -219,7 +219,7 @@ static inline void arch_atomic_and(int i, atomic_t *v) : "memory"); } -static inline int arch_atomic_fetch_and(int i, atomic_t *v) +static __always_inline int arch_atomic_fetch_and(int i, atomic_t *v) { int val = arch_atomic_read(v); @@ -229,7 +229,7 @@ static inline int arch_atomic_fetch_and(int i, atomic_t *v) } #define arch_atomic_fetch_and arch_atomic_fetch_and -static inline void arch_atomic_or(int i, atomic_t *v) +static __always_inline void arch_atomic_or(int i, atomic_t *v) { asm volatile(LOCK_PREFIX "orl %1,%0" : "+m" (v->counter) @@ -237,7 +237,7 @@ static inline void arch_atomic_or(int i, atomic_t *v) : "memory"); } -static inline int arch_atomic_fetch_or(int i, atomic_t *v) +static __always_inline int arch_atomic_fetch_or(int i, atomic_t *v) { int val = arch_atomic_read(v); @@ -247,7 +247,7 @@ static inline int arch_atomic_fetch_or(int i, atomic_t *v) } #define arch_atomic_fetch_or arch_atomic_fetch_or -static inline void arch_atomic_xor(int i, atomic_t *v) +static __always_inline void arch_atomic_xor(int i, atomic_t *v) { asm volatile(LOCK_PREFIX "xorl %1,%0" : "+m" (v->counter) @@ -255,7 +255,7 @@ static inline void arch_atomic_xor(int i, atomic_t *v) : "memory"); } -static inline int arch_atomic_fetch_xor(int i, atomic_t *v) +static __always_inline int arch_atomic_fetch_xor(int i, atomic_t *v) { int val = arch_atomic_read(v); -- cgit v1.2.3 From 5ef227933117085f1320b3421ef43a26bf624b4c Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Wed, 3 Jun 2020 13:40:20 +0200 Subject: x86/entry: Re-order #DB handler to avoid *SAN instrumentation vmlinux.o: warning: objtool: exc_debug()+0xbb: call to clear_ti_thread_flag.constprop.0() leaves .noinstr.text section vmlinux.o: warning: objtool: noist_exc_debug()+0x55: call to clear_ti_thread_flag.constprop.0() leaves .noinstr.text section Rework things so that handle_debug() looses the noinstr and move the clear_thread_flag() into that. Signed-off-by: Peter Zijlstra (Intel) Signed-off-by: Thomas Gleixner Link: https://lkml.kernel.org/r/20200603114052.127756554@infradead.org --- arch/x86/kernel/traps.c | 55 ++++++++++++++++++++++++------------------------- 1 file changed, 27 insertions(+), 28 deletions(-) (limited to 'arch') diff --git a/arch/x86/kernel/traps.c b/arch/x86/kernel/traps.c index 5566fe50ef98..7febae381b91 100644 --- a/arch/x86/kernel/traps.c +++ b/arch/x86/kernel/traps.c @@ -775,26 +775,44 @@ static __always_inline void debug_exit(unsigned long dr7) * * May run on IST stack. */ -static void noinstr handle_debug(struct pt_regs *regs, unsigned long dr6, - bool user_icebp) +static void handle_debug(struct pt_regs *regs, unsigned long dr6, bool user) { struct task_struct *tsk = current; + bool user_icebp; int si_code; + /* + * The SDM says "The processor clears the BTF flag when it + * generates a debug exception." Clear TIF_BLOCKSTEP to keep + * TIF_BLOCKSTEP in sync with the hardware BTF flag. + */ + clear_thread_flag(TIF_BLOCKSTEP); + + /* + * If DR6 is zero, no point in trying to handle it. The kernel is + * not using INT1. + */ + if (!user && !dr6) + return; + + /* + * If dr6 has no reason to give us about the origin of this trap, + * then it's very likely the result of an icebp/int01 trap. + * User wants a sigtrap for that. + */ + user_icebp = user && !dr6; + /* Store the virtualized DR6 value */ tsk->thread.debugreg6 = dr6; - instrumentation_begin(); #ifdef CONFIG_KPROBES if (kprobe_debug_handler(regs)) { - instrumentation_end(); return; } #endif if (notify_die(DIE_DEBUG, "debug", regs, (long)&dr6, 0, SIGTRAP) == NOTIFY_STOP) { - instrumentation_end(); return; } @@ -825,7 +843,6 @@ static void noinstr handle_debug(struct pt_regs *regs, unsigned long dr6, out: cond_local_irq_disable(regs); - instrumentation_end(); } static __always_inline void exc_debug_kernel(struct pt_regs *regs, @@ -834,14 +851,6 @@ static __always_inline void exc_debug_kernel(struct pt_regs *regs, nmi_enter(); instrumentation_begin(); trace_hardirqs_off_finish(); - instrumentation_end(); - - /* - * The SDM says "The processor clears the BTF flag when it - * generates a debug exception." Clear TIF_BLOCKSTEP to keep - * TIF_BLOCKSTEP in sync with the hardware BTF flag. - */ - clear_thread_flag(TIF_BLOCKSTEP); /* * Catch SYSENTER with TF set and clear DR_STEP. If this hit a @@ -850,14 +859,8 @@ static __always_inline void exc_debug_kernel(struct pt_regs *regs, if ((dr6 & DR_STEP) && is_sysenter_singlestep(regs)) dr6 &= ~DR_STEP; - /* - * If DR6 is zero, no point in trying to handle it. The kernel is - * not using INT1. - */ - if (dr6) - handle_debug(regs, dr6, false); + handle_debug(regs, dr6, false); - instrumentation_begin(); if (regs->flags & X86_EFLAGS_IF) trace_hardirqs_on_prepare(); instrumentation_end(); @@ -868,14 +871,10 @@ static __always_inline void exc_debug_user(struct pt_regs *regs, unsigned long dr6) { idtentry_enter_user(regs); - clear_thread_flag(TIF_BLOCKSTEP); + instrumentation_begin(); - /* - * If dr6 has no reason to give us about the origin of this trap, - * then it's very likely the result of an icebp/int01 trap. - * User wants a sigtrap for that. - */ - handle_debug(regs, dr6, !dr6); + handle_debug(regs, dr6, true); + instrumentation_end(); idtentry_exit_user(regs); } -- cgit v1.2.3 From 2823e83a3dc0f54d23db67ca07d74b9c8bb1fdda Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Wed, 3 Jun 2020 13:40:22 +0200 Subject: x86/entry: __always_inline CR2 for noinstr vmlinux.o: warning: objtool: exc_page_fault()+0x9: call to read_cr2() leaves .noinstr.text section vmlinux.o: warning: objtool: exc_page_fault()+0x24: call to prefetchw() leaves .noinstr.text section vmlinux.o: warning: objtool: exc_page_fault()+0x21: call to kvm_handle_async_pf.isra.0() leaves .noinstr.text section vmlinux.o: warning: objtool: exc_nmi()+0x1cc: call to write_cr2() leaves .noinstr.text section Signed-off-by: Peter Zijlstra (Intel) Signed-off-by: Thomas Gleixner Link: https://lkml.kernel.org/r/20200603114052.243227806@infradead.org --- arch/x86/include/asm/kvm_para.h | 2 +- arch/x86/include/asm/processor.h | 2 +- arch/x86/include/asm/special_insns.h | 8 ++++---- 3 files changed, 6 insertions(+), 6 deletions(-) (limited to 'arch') diff --git a/arch/x86/include/asm/kvm_para.h b/arch/x86/include/asm/kvm_para.h index 57fd1966c4ea..49d3a9edb06f 100644 --- a/arch/x86/include/asm/kvm_para.h +++ b/arch/x86/include/asm/kvm_para.h @@ -141,7 +141,7 @@ static inline void kvm_disable_steal_time(void) return; } -static inline bool kvm_handle_async_pf(struct pt_regs *regs, u32 token) +static __always_inline bool kvm_handle_async_pf(struct pt_regs *regs, u32 token) { return false; } diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h index 29ee0c088009..42cd333616c4 100644 --- a/arch/x86/include/asm/processor.h +++ b/arch/x86/include/asm/processor.h @@ -823,7 +823,7 @@ static inline void prefetch(const void *x) * Useful for spinlocks to avoid one state transition in the * cache coherency protocol: */ -static inline void prefetchw(const void *x) +static __always_inline void prefetchw(const void *x) { alternative_input(BASE_PREFETCH, "prefetchw %P1", X86_FEATURE_3DNOWPREFETCH, diff --git a/arch/x86/include/asm/special_insns.h b/arch/x86/include/asm/special_insns.h index 82436cb04ccf..eb8e781c4353 100644 --- a/arch/x86/include/asm/special_insns.h +++ b/arch/x86/include/asm/special_insns.h @@ -28,14 +28,14 @@ static inline unsigned long native_read_cr0(void) return val; } -static inline unsigned long native_read_cr2(void) +static __always_inline unsigned long native_read_cr2(void) { unsigned long val; asm volatile("mov %%cr2,%0\n\t" : "=r" (val), "=m" (__force_order)); return val; } -static inline void native_write_cr2(unsigned long val) +static __always_inline void native_write_cr2(unsigned long val) { asm volatile("mov %0,%%cr2": : "r" (val), "m" (__force_order)); } @@ -160,12 +160,12 @@ static inline void write_cr0(unsigned long x) native_write_cr0(x); } -static inline unsigned long read_cr2(void) +static __always_inline unsigned long read_cr2(void) { return native_read_cr2(); } -static inline void write_cr2(unsigned long x) +static __always_inline void write_cr2(unsigned long x) { native_write_cr2(x); } -- cgit v1.2.3 From f0178fc01fe46bab6a95415f5647d1a74efcad1b Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Wed, 10 Jun 2020 08:37:01 +0200 Subject: x86/entry: Unbreak __irqentry_text_start/end magic The entry rework moved interrupt entry code from the irqentry to the noinstr section which made the irqentry section empty. This breaks boundary checks which rely on the __irqentry_text_start/end markers to find out whether a function in a stack trace is interrupt/exception entry code. This affects the function graph tracer and filter_irq_stacks(). As the IDT entry points are all sequentialy emitted this is rather simple to unbreak by injecting __irqentry_text_start/end as global labels. To make this work correctly: - Remove the IRQENTRY_TEXT section from the x86 linker script - Define __irqentry so it breaks the build if it's used - Adjust the entry mirroring in PTI - Remove the redundant kprobes and unwinder bound checks Reported-by: Qian Cai Signed-off-by: Thomas Gleixner --- arch/x86/entry/entry_32.S | 11 ++++++++++- arch/x86/entry/entry_64.S | 11 ++++++++++- arch/x86/include/asm/irq.h | 7 +++++++ arch/x86/kernel/kprobes/core.c | 7 ------- arch/x86/kernel/kprobes/opt.c | 4 +--- arch/x86/kernel/unwind_frame.c | 8 +------- arch/x86/kernel/vmlinux.lds.S | 1 - arch/x86/mm/pti.c | 4 ++-- include/linux/interrupt.h | 8 +++++--- 9 files changed, 36 insertions(+), 25 deletions(-) (limited to 'arch') diff --git a/arch/x86/entry/entry_32.S b/arch/x86/entry/entry_32.S index 2d29f77a3601..024d7d276cd4 100644 --- a/arch/x86/entry/entry_32.S +++ b/arch/x86/entry/entry_32.S @@ -743,10 +743,19 @@ SYM_CODE_END(asm_\cfunc) /* * Include the defines which emit the idt entries which are shared - * shared between 32 and 64 bit. + * shared between 32 and 64 bit and emit the __irqentry_text_* markers + * so the stacktrace boundary checks work. */ + .align 16 + .globl __irqentry_text_start +__irqentry_text_start: + #include + .align 16 + .globl __irqentry_text_end +__irqentry_text_end: + /* * %eax: prev task * %edx: next task diff --git a/arch/x86/entry/entry_64.S b/arch/x86/entry/entry_64.S index 8ecaeee53653..d2a00c97e53f 100644 --- a/arch/x86/entry/entry_64.S +++ b/arch/x86/entry/entry_64.S @@ -478,10 +478,19 @@ SYM_CODE_END(\asmsym) /* * Include the defines which emit the idt entries which are shared - * shared between 32 and 64 bit. + * shared between 32 and 64 bit and emit the __irqentry_text_* markers + * so the stacktrace boundary checks work. */ + .align 16 + .globl __irqentry_text_start +__irqentry_text_start: + #include + .align 16 + .globl __irqentry_text_end +__irqentry_text_end: + SYM_CODE_START_LOCAL(common_interrupt_return) SYM_INNER_LABEL(swapgs_restore_regs_and_return_to_usermode, SYM_L_GLOBAL) #ifdef CONFIG_DEBUG_ENTRY diff --git a/arch/x86/include/asm/irq.h b/arch/x86/include/asm/irq.h index f73dd3f8b043..528c8a71fe7f 100644 --- a/arch/x86/include/asm/irq.h +++ b/arch/x86/include/asm/irq.h @@ -11,6 +11,13 @@ #include #include +/* + * The irq entry code is in the noinstr section and the start/end of + * __irqentry_text is emitted via labels. Make the build fail if + * something moves a C function into the __irq_entry section. + */ +#define __irq_entry __invalid_section + static inline int irq_canonicalize(int irq) { return ((irq == 2) ? 9 : irq); diff --git a/arch/x86/kernel/kprobes/core.c b/arch/x86/kernel/kprobes/core.c index 85de8fa69b24..3bafe1bd4dc7 100644 --- a/arch/x86/kernel/kprobes/core.c +++ b/arch/x86/kernel/kprobes/core.c @@ -1073,13 +1073,6 @@ NOKPROBE_SYMBOL(kprobe_fault_handler); int __init arch_populate_kprobe_blacklist(void) { - int ret; - - ret = kprobe_add_area_blacklist((unsigned long)__irqentry_text_start, - (unsigned long)__irqentry_text_end); - if (ret) - return ret; - return kprobe_add_area_blacklist((unsigned long)__entry_text_start, (unsigned long)__entry_text_end); } diff --git a/arch/x86/kernel/kprobes/opt.c b/arch/x86/kernel/kprobes/opt.c index 234f58e0fe8c..321c19950285 100644 --- a/arch/x86/kernel/kprobes/opt.c +++ b/arch/x86/kernel/kprobes/opt.c @@ -286,9 +286,7 @@ static int can_optimize(unsigned long paddr) * stack handling and registers setup. */ if (((paddr >= (unsigned long)__entry_text_start) && - (paddr < (unsigned long)__entry_text_end)) || - ((paddr >= (unsigned long)__irqentry_text_start) && - (paddr < (unsigned long)__irqentry_text_end))) + (paddr < (unsigned long)__entry_text_end))) return 0; /* Check there is enough space for a relative jump. */ diff --git a/arch/x86/kernel/unwind_frame.c b/arch/x86/kernel/unwind_frame.c index 54226110bc7f..722a85f3b2dd 100644 --- a/arch/x86/kernel/unwind_frame.c +++ b/arch/x86/kernel/unwind_frame.c @@ -74,13 +74,7 @@ static bool in_entry_code(unsigned long ip) { char *addr = (char *)ip; - if (addr >= __entry_text_start && addr < __entry_text_end) - return true; - - if (addr >= __irqentry_text_start && addr < __irqentry_text_end) - return true; - - return false; + return addr >= __entry_text_start && addr < __entry_text_end; } static inline unsigned long *last_frame(struct unwind_state *state) diff --git a/arch/x86/kernel/vmlinux.lds.S b/arch/x86/kernel/vmlinux.lds.S index 1bf7e312361f..b4c6b6f35548 100644 --- a/arch/x86/kernel/vmlinux.lds.S +++ b/arch/x86/kernel/vmlinux.lds.S @@ -134,7 +134,6 @@ SECTIONS KPROBES_TEXT ALIGN_ENTRY_TEXT_BEGIN ENTRY_TEXT - IRQENTRY_TEXT ALIGN_ENTRY_TEXT_END SOFTIRQENTRY_TEXT *(.fixup) diff --git a/arch/x86/mm/pti.c b/arch/x86/mm/pti.c index a3c6757a65c7..a8a924b3c335 100644 --- a/arch/x86/mm/pti.c +++ b/arch/x86/mm/pti.c @@ -492,12 +492,12 @@ static void __init pti_setup_espfix64(void) } /* - * Clone the populated PMDs of the entry and irqentry text and force it RO. + * Clone the populated PMDs of the entry text and force it RO. */ static void pti_clone_entry_text(void) { pti_clone_pgtable((unsigned long) __entry_text_start, - (unsigned long) __irqentry_text_end, + (unsigned long) __entry_text_end, PTI_CLONE_PMD); } diff --git a/include/linux/interrupt.h b/include/linux/interrupt.h index 80f637c3a6f3..5db970b6615a 100644 --- a/include/linux/interrupt.h +++ b/include/linux/interrupt.h @@ -760,8 +760,10 @@ extern int arch_early_irq_init(void); /* * We want to know which function is an entrypoint of a hardirq or a softirq. */ -#define __irq_entry __attribute__((__section__(".irqentry.text"))) -#define __softirq_entry \ - __attribute__((__section__(".softirqentry.text"))) +#ifndef __irq_entry +# define __irq_entry __attribute__((__section__(".irqentry.text"))) +#endif + +#define __softirq_entry __attribute__((__section__(".softirqentry.text"))) #endif -- cgit v1.2.3 From 15a416e8aaa758b5534f64a3972dae05275bc225 Mon Sep 17 00:00:00 2001 From: Andy Lutomirski Date: Thu, 11 Jun 2020 20:26:38 -0700 Subject: x86/entry: Treat BUG/WARN as NMI-like entries BUG/WARN are cleverly optimized using UD2 to handle the BUG/WARN out of line in an exception fixup. But if BUG or WARN is issued in a funny RCU context, then the idtentry_enter...() path might helpfully WARN that the RCU context is invalid, which results in infinite recursion. Split the BUG/WARN handling into an nmi_enter()/nmi_exit() path in exc_invalid_op() to increase the chance to survive the experience. [ tglx: Make the declaration match the implementation ] Signed-off-by: Andy Lutomirski Signed-off-by: Thomas Gleixner Link: https://lkml.kernel.org/r/f8fe40e0088749734b4435b554f73eee53dcf7a8.1591932307.git.luto@kernel.org --- arch/x86/include/asm/idtentry.h | 2 +- arch/x86/kernel/traps.c | 64 ++++++++++++++++++++++++----------------- arch/x86/mm/extable.c | 15 ++++++++-- 3 files changed, 52 insertions(+), 29 deletions(-) (limited to 'arch') diff --git a/arch/x86/include/asm/idtentry.h b/arch/x86/include/asm/idtentry.h index d203c541a65a..2fc6b0c58341 100644 --- a/arch/x86/include/asm/idtentry.h +++ b/arch/x86/include/asm/idtentry.h @@ -543,7 +543,6 @@ SYM_CODE_END(spurious_entries_start) DECLARE_IDTENTRY(X86_TRAP_DE, exc_divide_error); DECLARE_IDTENTRY(X86_TRAP_OF, exc_overflow); DECLARE_IDTENTRY(X86_TRAP_BR, exc_bounds); -DECLARE_IDTENTRY(X86_TRAP_UD, exc_invalid_op); DECLARE_IDTENTRY(X86_TRAP_NM, exc_device_not_available); DECLARE_IDTENTRY(X86_TRAP_OLD_MF, exc_coproc_segment_overrun); DECLARE_IDTENTRY(X86_TRAP_SPURIOUS, exc_spurious_interrupt_bug); @@ -561,6 +560,7 @@ DECLARE_IDTENTRY_ERRORCODE(X86_TRAP_GP, exc_general_protection); DECLARE_IDTENTRY_ERRORCODE(X86_TRAP_AC, exc_alignment_check); /* Raw exception entries which need extra work */ +DECLARE_IDTENTRY_RAW(X86_TRAP_UD, exc_invalid_op); DECLARE_IDTENTRY_RAW(X86_TRAP_BP, exc_int3); DECLARE_IDTENTRY_RAW_ERRORCODE(X86_TRAP_PF, exc_page_fault); diff --git a/arch/x86/kernel/traps.c b/arch/x86/kernel/traps.c index 7febae381b91..af75109485c2 100644 --- a/arch/x86/kernel/traps.c +++ b/arch/x86/kernel/traps.c @@ -97,24 +97,6 @@ int is_valid_bugaddr(unsigned long addr) return ud == INSN_UD0 || ud == INSN_UD2; } -int fixup_bug(struct pt_regs *regs, int trapnr) -{ - if (trapnr != X86_TRAP_UD) - return 0; - - switch (report_bug(regs->ip, regs)) { - case BUG_TRAP_TYPE_NONE: - case BUG_TRAP_TYPE_BUG: - break; - - case BUG_TRAP_TYPE_WARN: - regs->ip += LEN_UD2; - return 1; - } - - return 0; -} - static nokprobe_inline int do_trap_no_signal(struct task_struct *tsk, int trapnr, const char *str, struct pt_regs *regs, long error_code) @@ -190,13 +172,6 @@ static void do_error_trap(struct pt_regs *regs, long error_code, char *str, { RCU_LOCKDEP_WARN(!rcu_is_watching(), "entry code didn't wake RCU"); - /* - * WARN*()s end up here; fix them up before we call the - * notifier chain. - */ - if (!user_mode(regs) && fixup_bug(regs, trapnr)) - return; - if (notify_die(DIE_TRAP, str, regs, error_code, trapnr, signr) != NOTIFY_STOP) { cond_local_irq_enable(regs); @@ -241,9 +216,46 @@ static inline void handle_invalid_op(struct pt_regs *regs) ILL_ILLOPN, error_get_trap_addr(regs)); } -DEFINE_IDTENTRY(exc_invalid_op) +DEFINE_IDTENTRY_RAW(exc_invalid_op) { + bool rcu_exit; + + /* + * Handle BUG/WARN like NMIs instead of like normal idtentries: + * if we bugged/warned in a bad RCU context, for example, the last + * thing we want is to BUG/WARN again in the idtentry code, ad + * infinitum. + */ + if (!user_mode(regs) && is_valid_bugaddr(regs->ip)) { + enum bug_trap_type type; + + nmi_enter(); + instrumentation_begin(); + trace_hardirqs_off_finish(); + type = report_bug(regs->ip, regs); + if (regs->flags & X86_EFLAGS_IF) + trace_hardirqs_on_prepare(); + instrumentation_end(); + nmi_exit(); + + if (type == BUG_TRAP_TYPE_WARN) { + /* Skip the ud2. */ + regs->ip += LEN_UD2; + return; + } + + /* + * Else, if this was a BUG and report_bug returns or if this + * was just a normal #UD, we want to continue onward and + * crash. + */ + } + + rcu_exit = idtentry_enter_cond_rcu(regs); + instrumentation_begin(); handle_invalid_op(regs); + instrumentation_end(); + idtentry_exit_cond_rcu(regs, rcu_exit); } DEFINE_IDTENTRY(exc_coproc_segment_overrun) diff --git a/arch/x86/mm/extable.c b/arch/x86/mm/extable.c index b991aa4bdfae..1d6cb07f4f86 100644 --- a/arch/x86/mm/extable.c +++ b/arch/x86/mm/extable.c @@ -204,8 +204,19 @@ void __init early_fixup_exception(struct pt_regs *regs, int trapnr) if (fixup_exception(regs, trapnr, regs->orig_ax, 0)) return; - if (fixup_bug(regs, trapnr)) - return; + if (trapnr == X86_TRAP_UD) { + if (report_bug(regs->ip, regs) == BUG_TRAP_TYPE_WARN) { + /* Skip the ud2. */ + regs->ip += LEN_UD2; + return; + } + + /* + * If this was a BUG and report_bug returns or if this + * was just a normal #UD, we want to continue onward and + * crash. + */ + } fail: early_printk("PANIC: early exception 0x%02x IP %lx:%lx error %lx cr2 0x%lx\n", -- cgit v1.2.3 From 71ed49d8fb33023f242419a77ecb1141c029cac4 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Fri, 12 Jun 2020 14:02:27 +0200 Subject: x86/entry: Make NMI use IDTENTRY_RAW For no reason other than beginning brainmelt, IDTENTRY_NMI was mapped to IDTENTRY_IST. This is not a problem on 64bit because the IST default entry point maps to IDTENTRY_RAW which does not any entry handling. The surplus function declaration for the noist C entry point is unused and as there is no ASM code emitted for NMI this went unnoticed. On 32bit IDTENTRY_IST maps to a regular IDTENTRY which does the normal entry handling. That is clearly the wrong thing to do for NMI. Map it to IDTENTRY_RAW to unbreak it. The IDTENTRY_NMI mapping needs to stay to avoid emitting ASM code. Fixes: 6271fef00b34 ("x86/entry: Convert NMI to IDTENTRY_NMI") Reported-by: Naresh Kamboju Debugged-by: Andy Lutomirski Signed-off-by: Thomas Gleixner Link: https://lkml.kernel.org/r/CA+G9fYvF3cyrY+-iw_SZtpN-i2qA2BruHg4M=QYECU2-dNdsMw@mail.gmail.com --- arch/x86/include/asm/idtentry.h | 4 ++-- arch/x86/kernel/nmi.c | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) (limited to 'arch') diff --git a/arch/x86/include/asm/idtentry.h b/arch/x86/include/asm/idtentry.h index 2fc6b0c58341..cf51c50eb356 100644 --- a/arch/x86/include/asm/idtentry.h +++ b/arch/x86/include/asm/idtentry.h @@ -391,8 +391,8 @@ __visible noinstr void func(struct pt_regs *regs, \ #define DEFINE_IDTENTRY_MCE DEFINE_IDTENTRY_IST #define DEFINE_IDTENTRY_MCE_USER DEFINE_IDTENTRY_NOIST -#define DECLARE_IDTENTRY_NMI DECLARE_IDTENTRY_IST -#define DEFINE_IDTENTRY_NMI DEFINE_IDTENTRY_IST +#define DECLARE_IDTENTRY_NMI DECLARE_IDTENTRY_RAW +#define DEFINE_IDTENTRY_NMI DEFINE_IDTENTRY_RAW #define DECLARE_IDTENTRY_DEBUG DECLARE_IDTENTRY_IST #define DEFINE_IDTENTRY_DEBUG DEFINE_IDTENTRY_IST diff --git a/arch/x86/kernel/nmi.c b/arch/x86/kernel/nmi.c index 3a98ff36f411..2de365f15684 100644 --- a/arch/x86/kernel/nmi.c +++ b/arch/x86/kernel/nmi.c @@ -476,7 +476,7 @@ static DEFINE_PER_CPU(enum nmi_states, nmi_state); static DEFINE_PER_CPU(unsigned long, nmi_cr2); static DEFINE_PER_CPU(unsigned long, nmi_dr7); -DEFINE_IDTENTRY_NMI(exc_nmi) +DEFINE_IDTENTRY_RAW(exc_nmi) { if (IS_ENABLED(CONFIG_SMP) && cpu_is_offline(smp_processor_id())) return; -- cgit v1.2.3 From 0bf3924bfabd13ba21aa702344fc00b3b3263e5a Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Fri, 12 Jun 2020 15:55:00 +0200 Subject: x86/entry: Force rcu_irq_enter() when in idle task The idea of conditionally calling into rcu_irq_enter() only when RCU is not watching turned out to be not completely thought through. Paul noticed occasional premature end of grace periods in RCU torture testing. Bisection led to the commit which made the invocation of rcu_irq_enter() conditional on !rcu_is_watching(). It turned out that this conditional breaks RCU assumptions about the idle task when the scheduler tick happens to be a nested interrupt. Nested interrupts can happen when the first interrupt invokes softirq processing on return which enables interrupts. If that nested tick interrupt does not invoke rcu_irq_enter() then the RCU's irq-nesting checks will believe that this interrupt came directly from idle, which will cause RCU to report a quiescent state. Because this interrupt instead came from a softirq handler which might have been executing an RCU read-side critical section, this can cause the grace period to end prematurely. Change the condition from !rcu_is_watching() to is_idle_task(current) which enforces that interrupts in the idle task unconditionally invoke rcu_irq_enter() independent of the RCU state. This is also correct vs. user mode entries in NOHZ full scenarios because user mode entries bring RCU out of EQS and force the RCU irq nesting state accounting to nested. As only the first interrupt can enter from user mode a nested tick interrupt will enter from kernel mode and as the nesting state accounting is forced to nesting it will not do anything stupid even if rcu_irq_enter() has not been invoked. Fixes: 3eeec3858488 ("x86/entry: Provide idtentry_entry/exit_cond_rcu()") Reported-by: "Paul E. McKenney" Signed-off-by: Thomas Gleixner Tested-by: "Paul E. McKenney" Reviewed-by: "Paul E. McKenney" Acked-by: Andy Lutomirski Acked-by: Frederic Weisbecker Link: https://lkml.kernel.org/r/87wo4cxubv.fsf@nanos.tec.linutronix.de --- arch/x86/entry/common.c | 35 ++++++++++++++++++++++++++++------- 1 file changed, 28 insertions(+), 7 deletions(-) (limited to 'arch') diff --git a/arch/x86/entry/common.c b/arch/x86/entry/common.c index f4d57782c14b..bd3f14175193 100644 --- a/arch/x86/entry/common.c +++ b/arch/x86/entry/common.c @@ -557,14 +557,34 @@ bool noinstr idtentry_enter_cond_rcu(struct pt_regs *regs) return false; } - if (!__rcu_is_watching()) { + /* + * If this entry hit the idle task invoke rcu_irq_enter() whether + * RCU is watching or not. + * + * Interupts can nest when the first interrupt invokes softirq + * processing on return which enables interrupts. + * + * Scheduler ticks in the idle task can mark quiescent state and + * terminate a grace period, if and only if the timer interrupt is + * not nested into another interrupt. + * + * Checking for __rcu_is_watching() here would prevent the nesting + * interrupt to invoke rcu_irq_enter(). If that nested interrupt is + * the tick then rcu_flavor_sched_clock_irq() would wrongfully + * assume that it is the first interupt and eventually claim + * quiescient state and end grace periods prematurely. + * + * Unconditionally invoke rcu_irq_enter() so RCU state stays + * consistent. + * + * TINY_RCU does not support EQS, so let the compiler eliminate + * this part when enabled. + */ + if (!IS_ENABLED(CONFIG_TINY_RCU) && is_idle_task(current)) { /* * If RCU is not watching then the same careful * sequence vs. lockdep and tracing is required * as in enter_from_user_mode(). - * - * This only happens for IRQs that hit the idle - * loop, i.e. if idle is not using MWAIT. */ lockdep_hardirqs_off(CALLER_ADDR0); rcu_irq_enter(); @@ -576,9 +596,10 @@ bool noinstr idtentry_enter_cond_rcu(struct pt_regs *regs) } /* - * If RCU is watching then RCU only wants to check - * whether it needs to restart the tick in NOHZ - * mode. + * If RCU is watching then RCU only wants to check whether it needs + * to restart the tick in NOHZ mode. rcu_irq_enter_check_tick() + * already contains a warning when RCU is not watching, so no point + * in having another one here. */ instrumentation_begin(); rcu_irq_enter_check_tick(); -- cgit v1.2.3