From f960181d5d88ab6c84be8fb5e7f75080c78dfdd1 Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Thu, 3 Aug 2017 17:23:19 +0100 Subject: arm64: neon: replace generic definition of may_use_simd() In preparation of modifying the logic that decides whether kernel mode NEON is allowable, which is required for SVE support, introduce an implementation of may_use_simd() that reflects the current reality, i.e., that SIMD is allowed in any context. Signed-off-by: Ard Biesheuvel Reviewed-by: Dave Martin Signed-off-by: Catalin Marinas --- arch/arm64/include/asm/Kbuild | 1 - arch/arm64/include/asm/simd.h | 23 +++++++++++++++++++++++ 2 files changed, 23 insertions(+), 1 deletion(-) create mode 100644 arch/arm64/include/asm/simd.h (limited to 'arch/arm64') diff --git a/arch/arm64/include/asm/Kbuild b/arch/arm64/include/asm/Kbuild index f81c7b685fc6..2326e39d5892 100644 --- a/arch/arm64/include/asm/Kbuild +++ b/arch/arm64/include/asm/Kbuild @@ -20,7 +20,6 @@ generic-y += rwsem.h generic-y += segment.h generic-y += serial.h generic-y += set_memory.h -generic-y += simd.h generic-y += sizes.h generic-y += switch_to.h generic-y += trace_clock.h diff --git a/arch/arm64/include/asm/simd.h b/arch/arm64/include/asm/simd.h new file mode 100644 index 000000000000..96959b52afae --- /dev/null +++ b/arch/arm64/include/asm/simd.h @@ -0,0 +1,23 @@ +/* + * Copyright (C) 2017 Linaro Ltd. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 as published + * by the Free Software Foundation. + */ + +#ifndef __ASM_SIMD_H +#define __ASM_SIMD_H + +#include + +/* + * may_use_simd - whether it is allowable at this time to issue SIMD + * instructions or access the SIMD register file + */ +static __must_check inline bool may_use_simd(void) +{ + return true; +} + +#endif -- cgit v1.2.3 From 0fc9179ad0bf2f97790c0568442299679ca346cf Mon Sep 17 00:00:00 2001 From: Dave Martin Date: Thu, 3 Aug 2017 17:23:20 +0100 Subject: arm64: neon: Add missing header guard in asm/neon.h doesn't have a header inclusion guard, but it should have one for consistency with other headers. This patch adds a suitable guard. Signed-off-by: Dave Martin Reviewed-by: Ard Biesheuvel Signed-off-by: Catalin Marinas --- arch/arm64/include/asm/neon.h | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'arch/arm64') diff --git a/arch/arm64/include/asm/neon.h b/arch/arm64/include/asm/neon.h index ad4cdc966c0f..5368bd04fe7b 100644 --- a/arch/arm64/include/asm/neon.h +++ b/arch/arm64/include/asm/neon.h @@ -8,6 +8,9 @@ * published by the Free Software Foundation. */ +#ifndef __ASM_NEON_H +#define __ASM_NEON_H + #include #include @@ -17,3 +20,5 @@ void kernel_neon_begin_partial(u32 num_regs); void kernel_neon_end(void); + +#endif /* ! __ASM_NEON_H */ -- cgit v1.2.3 From 504641859e5c616210c0894149e09fb6928e398f Mon Sep 17 00:00:00 2001 From: Dave Martin Date: Thu, 3 Aug 2017 17:23:21 +0100 Subject: arm64: fpsimd: Consistently use __this_cpu_ ops where appropriate __this_cpu_ ops are not used consistently with regard to this_cpu_ ops in a couple of places in fpsimd.c. Since preemption is explicitly disabled in fpsimd_restore_current_state() and fpsimd_update_current_state(), this patch converts this_cpu_ ops in those functions to __this_cpu_ ops. This doesn't save cost on arm64, but benefits from additional assertions in the core code. Signed-off-by: Dave Martin Reviewed-by: Ard Biesheuvel Signed-off-by: Catalin Marinas --- arch/arm64/kernel/fpsimd.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'arch/arm64') diff --git a/arch/arm64/kernel/fpsimd.c b/arch/arm64/kernel/fpsimd.c index 06da8ea16bbe..d7e5f8a2d4f5 100644 --- a/arch/arm64/kernel/fpsimd.c +++ b/arch/arm64/kernel/fpsimd.c @@ -194,7 +194,7 @@ void fpsimd_restore_current_state(void) struct fpsimd_state *st = ¤t->thread.fpsimd_state; fpsimd_load_state(st); - this_cpu_write(fpsimd_last_state, st); + __this_cpu_write(fpsimd_last_state, st); st->cpu = smp_processor_id(); } preempt_enable(); @@ -214,7 +214,7 @@ void fpsimd_update_current_state(struct fpsimd_state *state) if (test_and_clear_thread_flag(TIF_FOREIGN_FPSTATE)) { struct fpsimd_state *st = ¤t->thread.fpsimd_state; - this_cpu_write(fpsimd_last_state, st); + __this_cpu_write(fpsimd_last_state, st); st->cpu = smp_processor_id(); } preempt_enable(); -- cgit v1.2.3 From 4328825d4fdc185d365d8e858cace8b324198a70 Mon Sep 17 00:00:00 2001 From: Dave Martin Date: Thu, 3 Aug 2017 17:23:22 +0100 Subject: arm64: neon: Allow EFI runtime services to use FPSIMD in irq context In order to be able to cope with kernel-mode NEON being unavailable in hardirq/nmi context and non-nestable, we need special handling for EFI runtime service calls that may be made during an interrupt that interrupted a kernel_neon_begin()..._end() block. This will occur if the kernel tries to write diagnostic data to EFI persistent storage during a panic triggered by an NMI for example. EFI runtime services specify an ABI that clobbers the FPSIMD state, rather than being able to use it optionally as an accelerator. This means that EFI is really a special case and can be handled specially. To enable EFI calls from interrupts, this patch creates dedicated __efi_fpsimd_{begin,end}() helpers solely for this purpose, which save/restore to a separate percpu buffer if called in a context where kernel_neon_begin() is not usable. Signed-off-by: Dave Martin Reviewed-by: Ard Biesheuvel Signed-off-by: Catalin Marinas --- arch/arm64/include/asm/efi.h | 5 ++-- arch/arm64/include/asm/fpsimd.h | 4 ++++ arch/arm64/kernel/fpsimd.c | 52 +++++++++++++++++++++++++++++++++++++++++ 3 files changed, 59 insertions(+), 2 deletions(-) (limited to 'arch/arm64') diff --git a/arch/arm64/include/asm/efi.h b/arch/arm64/include/asm/efi.h index 8f3043aba873..835822242a1a 100644 --- a/arch/arm64/include/asm/efi.h +++ b/arch/arm64/include/asm/efi.h @@ -3,6 +3,7 @@ #include #include +#include #include #include #include @@ -20,8 +21,8 @@ int efi_set_mapping_permissions(struct mm_struct *mm, efi_memory_desc_t *md); #define arch_efi_call_virt_setup() \ ({ \ - kernel_neon_begin(); \ efi_virtmap_load(); \ + __efi_fpsimd_begin(); \ }) #define arch_efi_call_virt(p, f, args...) \ @@ -33,8 +34,8 @@ int efi_set_mapping_permissions(struct mm_struct *mm, efi_memory_desc_t *md); #define arch_efi_call_virt_teardown() \ ({ \ + __efi_fpsimd_end(); \ efi_virtmap_unload(); \ - kernel_neon_end(); \ }) #define ARCH_EFI_IRQ_FLAGS_MASK (PSR_D_BIT | PSR_A_BIT | PSR_I_BIT | PSR_F_BIT) diff --git a/arch/arm64/include/asm/fpsimd.h b/arch/arm64/include/asm/fpsimd.h index 50f559f574fe..5155f21e15e3 100644 --- a/arch/arm64/include/asm/fpsimd.h +++ b/arch/arm64/include/asm/fpsimd.h @@ -81,6 +81,10 @@ extern void fpsimd_save_partial_state(struct fpsimd_partial_state *state, u32 num_regs); extern void fpsimd_load_partial_state(struct fpsimd_partial_state *state); +/* For use by EFI runtime services calls only */ +extern void __efi_fpsimd_begin(void); +extern void __efi_fpsimd_end(void); + #endif #endif diff --git a/arch/arm64/kernel/fpsimd.c b/arch/arm64/kernel/fpsimd.c index d7e5f8a2d4f5..bcde88e2d981 100644 --- a/arch/arm64/kernel/fpsimd.c +++ b/arch/arm64/kernel/fpsimd.c @@ -21,12 +21,15 @@ #include #include #include +#include #include #include #include #include #include +#include +#include #define FPEXC_IOF (1 << 0) #define FPEXC_DZF (1 << 1) @@ -276,6 +279,55 @@ void kernel_neon_end(void) } EXPORT_SYMBOL(kernel_neon_end); +DEFINE_PER_CPU(struct fpsimd_state, efi_fpsimd_state); +DEFINE_PER_CPU(bool, efi_fpsimd_state_used); + +/* + * EFI runtime services support functions + * + * The ABI for EFI runtime services allows EFI to use FPSIMD during the call. + * This means that for EFI (and only for EFI), we have to assume that FPSIMD + * is always used rather than being an optional accelerator. + * + * These functions provide the necessary support for ensuring FPSIMD + * save/restore in the contexts from which EFI is used. + * + * Do not use them for any other purpose -- if tempted to do so, you are + * either doing something wrong or you need to propose some refactoring. + */ + +/* + * __efi_fpsimd_begin(): prepare FPSIMD for making an EFI runtime services call + */ +void __efi_fpsimd_begin(void) +{ + if (!system_supports_fpsimd()) + return; + + WARN_ON(preemptible()); + + if (may_use_simd()) + kernel_neon_begin(); + else { + fpsimd_save_state(this_cpu_ptr(&efi_fpsimd_state)); + __this_cpu_write(efi_fpsimd_state_used, true); + } +} + +/* + * __efi_fpsimd_end(): clean up FPSIMD after an EFI runtime services call + */ +void __efi_fpsimd_end(void) +{ + if (!system_supports_fpsimd()) + return; + + if (__this_cpu_xchg(efi_fpsimd_state_used, false)) + fpsimd_load_state(this_cpu_ptr(&efi_fpsimd_state)); + else + kernel_neon_end(); +} + #endif /* CONFIG_KERNEL_MODE_NEON */ #ifdef CONFIG_CPU_PM -- cgit v1.2.3 From cb84d11e1625aa3a081d898ca2640bf3a9ca0e96 Mon Sep 17 00:00:00 2001 From: Dave Martin Date: Thu, 3 Aug 2017 17:23:23 +0100 Subject: arm64: neon: Remove support for nested or hardirq kernel-mode NEON Support for kernel-mode NEON to be nested and/or used in hardirq context adds significant complexity, and the benefits may be marginal. In practice, kernel-mode NEON is not used in hardirq context, and is rarely used in softirq context (by certain mac80211 drivers). This patch implements an arm64 may_use_simd() function to allow clients to check whether kernel-mode NEON is usable in the current context, and simplifies kernel_neon_{begin,end}() to handle only saving of the task FPSIMD state (if any). Without nesting, there is no other state to save. The partial fpsimd save/restore functions become redundant as a result of these changes, so they are removed too. The save/restore model is changed to operate directly on task_struct without additional percpu storage. This simplifies the code and saves a bit of memory, but means that softirqs must now be disabled when manipulating the task fpsimd state from task context: correspondingly, preempt_{en,dis}sable() calls are upgraded to local_bh_{en,dis}able() as appropriate. fpsimd_thread_switch() already runs with hardirqs disabled and so is already protected from softirqs. These changes should make it easier to support kernel-mode NEON in the presence of the Scalable Vector extension in the future. Signed-off-by: Dave Martin Reviewed-by: Ard Biesheuvel Signed-off-by: Catalin Marinas --- arch/arm64/include/asm/fpsimd.h | 14 ----- arch/arm64/include/asm/fpsimdmacros.h | 56 ----------------- arch/arm64/include/asm/neon.h | 4 +- arch/arm64/include/asm/simd.h | 33 +++++++++- arch/arm64/kernel/entry-fpsimd.S | 24 ------- arch/arm64/kernel/fpsimd.c | 115 +++++++++++++++++++++++----------- 6 files changed, 111 insertions(+), 135 deletions(-) (limited to 'arch/arm64') diff --git a/arch/arm64/include/asm/fpsimd.h b/arch/arm64/include/asm/fpsimd.h index 5155f21e15e3..410c48163c6a 100644 --- a/arch/arm64/include/asm/fpsimd.h +++ b/arch/arm64/include/asm/fpsimd.h @@ -41,16 +41,6 @@ struct fpsimd_state { unsigned int cpu; }; -/* - * Struct for stacking the bottom 'n' FP/SIMD registers. - */ -struct fpsimd_partial_state { - u32 fpsr; - u32 fpcr; - u32 num_regs; - __uint128_t vregs[32]; -}; - #if defined(__KERNEL__) && defined(CONFIG_COMPAT) /* Masks for extracting the FPSR and FPCR from the FPSCR */ @@ -77,10 +67,6 @@ extern void fpsimd_update_current_state(struct fpsimd_state *state); extern void fpsimd_flush_task_state(struct task_struct *target); -extern void fpsimd_save_partial_state(struct fpsimd_partial_state *state, - u32 num_regs); -extern void fpsimd_load_partial_state(struct fpsimd_partial_state *state); - /* For use by EFI runtime services calls only */ extern void __efi_fpsimd_begin(void); extern void __efi_fpsimd_end(void); diff --git a/arch/arm64/include/asm/fpsimdmacros.h b/arch/arm64/include/asm/fpsimdmacros.h index a2daf1293028..0f5fdd388b0d 100644 --- a/arch/arm64/include/asm/fpsimdmacros.h +++ b/arch/arm64/include/asm/fpsimdmacros.h @@ -75,59 +75,3 @@ ldr w\tmpnr, [\state, #16 * 2 + 4] fpsimd_restore_fpcr x\tmpnr, \state .endm - -.macro fpsimd_save_partial state, numnr, tmpnr1, tmpnr2 - mrs x\tmpnr1, fpsr - str w\numnr, [\state, #8] - mrs x\tmpnr2, fpcr - stp w\tmpnr1, w\tmpnr2, [\state] - adr x\tmpnr1, 0f - add \state, \state, x\numnr, lsl #4 - sub x\tmpnr1, x\tmpnr1, x\numnr, lsl #1 - br x\tmpnr1 - stp q30, q31, [\state, #-16 * 30 - 16] - stp q28, q29, [\state, #-16 * 28 - 16] - stp q26, q27, [\state, #-16 * 26 - 16] - stp q24, q25, [\state, #-16 * 24 - 16] - stp q22, q23, [\state, #-16 * 22 - 16] - stp q20, q21, [\state, #-16 * 20 - 16] - stp q18, q19, [\state, #-16 * 18 - 16] - stp q16, q17, [\state, #-16 * 16 - 16] - stp q14, q15, [\state, #-16 * 14 - 16] - stp q12, q13, [\state, #-16 * 12 - 16] - stp q10, q11, [\state, #-16 * 10 - 16] - stp q8, q9, [\state, #-16 * 8 - 16] - stp q6, q7, [\state, #-16 * 6 - 16] - stp q4, q5, [\state, #-16 * 4 - 16] - stp q2, q3, [\state, #-16 * 2 - 16] - stp q0, q1, [\state, #-16 * 0 - 16] -0: -.endm - -.macro fpsimd_restore_partial state, tmpnr1, tmpnr2 - ldp w\tmpnr1, w\tmpnr2, [\state] - msr fpsr, x\tmpnr1 - fpsimd_restore_fpcr x\tmpnr2, x\tmpnr1 - adr x\tmpnr1, 0f - ldr w\tmpnr2, [\state, #8] - add \state, \state, x\tmpnr2, lsl #4 - sub x\tmpnr1, x\tmpnr1, x\tmpnr2, lsl #1 - br x\tmpnr1 - ldp q30, q31, [\state, #-16 * 30 - 16] - ldp q28, q29, [\state, #-16 * 28 - 16] - ldp q26, q27, [\state, #-16 * 26 - 16] - ldp q24, q25, [\state, #-16 * 24 - 16] - ldp q22, q23, [\state, #-16 * 22 - 16] - ldp q20, q21, [\state, #-16 * 20 - 16] - ldp q18, q19, [\state, #-16 * 18 - 16] - ldp q16, q17, [\state, #-16 * 16 - 16] - ldp q14, q15, [\state, #-16 * 14 - 16] - ldp q12, q13, [\state, #-16 * 12 - 16] - ldp q10, q11, [\state, #-16 * 10 - 16] - ldp q8, q9, [\state, #-16 * 8 - 16] - ldp q6, q7, [\state, #-16 * 6 - 16] - ldp q4, q5, [\state, #-16 * 4 - 16] - ldp q2, q3, [\state, #-16 * 2 - 16] - ldp q0, q1, [\state, #-16 * 0 - 16] -0: -.endm diff --git a/arch/arm64/include/asm/neon.h b/arch/arm64/include/asm/neon.h index 5368bd04fe7b..fb9d137256a6 100644 --- a/arch/arm64/include/asm/neon.h +++ b/arch/arm64/include/asm/neon.h @@ -16,9 +16,7 @@ #define cpu_has_neon() system_supports_fpsimd() -#define kernel_neon_begin() kernel_neon_begin_partial(32) - -void kernel_neon_begin_partial(u32 num_regs); +void kernel_neon_begin(void); void kernel_neon_end(void); #endif /* ! __ASM_NEON_H */ diff --git a/arch/arm64/include/asm/simd.h b/arch/arm64/include/asm/simd.h index 96959b52afae..5a1a927b74a2 100644 --- a/arch/arm64/include/asm/simd.h +++ b/arch/arm64/include/asm/simd.h @@ -9,15 +9,46 @@ #ifndef __ASM_SIMD_H #define __ASM_SIMD_H +#include +#include +#include #include +#ifdef CONFIG_KERNEL_MODE_NEON + +DECLARE_PER_CPU(bool, kernel_neon_busy); + /* * may_use_simd - whether it is allowable at this time to issue SIMD * instructions or access the SIMD register file + * + * Callers must not assume that the result remains true beyond the next + * preempt_enable() or return from softirq context. */ static __must_check inline bool may_use_simd(void) { - return true; + /* + * The raw_cpu_read() is racy if called with preemption enabled. + * This is not a bug: kernel_neon_busy is only set when + * preemption is disabled, so we cannot migrate to another CPU + * while it is set, nor can we migrate to a CPU where it is set. + * So, if we find it clear on some CPU then we're guaranteed to + * find it clear on any CPU we could migrate to. + * + * If we are in between kernel_neon_begin()...kernel_neon_end(), + * the flag will be set, but preemption is also disabled, so we + * can't migrate to another CPU and spuriously see it become + * false. + */ + return !in_irq() && !in_nmi() && !raw_cpu_read(kernel_neon_busy); } +#else /* ! CONFIG_KERNEL_MODE_NEON */ + +static __must_check inline bool may_use_simd(void) { + return false; +} + +#endif /* ! CONFIG_KERNEL_MODE_NEON */ + #endif diff --git a/arch/arm64/kernel/entry-fpsimd.S b/arch/arm64/kernel/entry-fpsimd.S index c44a82f146b1..6a27cd6dbfa6 100644 --- a/arch/arm64/kernel/entry-fpsimd.S +++ b/arch/arm64/kernel/entry-fpsimd.S @@ -41,27 +41,3 @@ ENTRY(fpsimd_load_state) fpsimd_restore x0, 8 ret ENDPROC(fpsimd_load_state) - -#ifdef CONFIG_KERNEL_MODE_NEON - -/* - * Save the bottom n FP registers. - * - * x0 - pointer to struct fpsimd_partial_state - */ -ENTRY(fpsimd_save_partial_state) - fpsimd_save_partial x0, 1, 8, 9 - ret -ENDPROC(fpsimd_save_partial_state) - -/* - * Load the bottom n FP registers. - * - * x0 - pointer to struct fpsimd_partial_state - */ -ENTRY(fpsimd_load_partial_state) - fpsimd_restore_partial x0, 8, 9 - ret -ENDPROC(fpsimd_load_partial_state) - -#endif diff --git a/arch/arm64/kernel/fpsimd.c b/arch/arm64/kernel/fpsimd.c index bcde88e2d981..138fcfaeadc1 100644 --- a/arch/arm64/kernel/fpsimd.c +++ b/arch/arm64/kernel/fpsimd.c @@ -17,18 +17,18 @@ * along with this program. If not, see . */ +#include #include #include #include #include +#include #include #include #include -#include #include #include -#include #include #define FPEXC_IOF (1 << 0) @@ -65,6 +65,13 @@ * CPU currently contain the most recent userland FPSIMD state of the current * task. * + * In order to allow softirq handlers to use FPSIMD, kernel_neon_begin() may + * save the task's FPSIMD context back to task_struct from softirq context. + * To prevent this from racing with the manipulation of the task's FPSIMD state + * from task context and thereby corrupting the state, it is necessary to + * protect any manipulation of a task's fpsimd_state or TIF_FOREIGN_FPSTATE + * flag with local_bh_disable() unless softirqs are already masked. + * * For a certain task, the sequence may look something like this: * - the task gets scheduled in; if both the task's fpsimd_state.cpu field * contains the id of the current CPU, and the CPU's fpsimd_last_state per-cpu @@ -164,9 +171,14 @@ void fpsimd_flush_thread(void) { if (!system_supports_fpsimd()) return; + + local_bh_disable(); + memset(¤t->thread.fpsimd_state, 0, sizeof(struct fpsimd_state)); fpsimd_flush_task_state(current); set_thread_flag(TIF_FOREIGN_FPSTATE); + + local_bh_enable(); } /* @@ -177,10 +189,13 @@ void fpsimd_preserve_current_state(void) { if (!system_supports_fpsimd()) return; - preempt_disable(); + + local_bh_disable(); + if (!test_thread_flag(TIF_FOREIGN_FPSTATE)) fpsimd_save_state(¤t->thread.fpsimd_state); - preempt_enable(); + + local_bh_enable(); } /* @@ -192,7 +207,9 @@ void fpsimd_restore_current_state(void) { if (!system_supports_fpsimd()) return; - preempt_disable(); + + local_bh_disable(); + if (test_and_clear_thread_flag(TIF_FOREIGN_FPSTATE)) { struct fpsimd_state *st = ¤t->thread.fpsimd_state; @@ -200,7 +217,8 @@ void fpsimd_restore_current_state(void) __this_cpu_write(fpsimd_last_state, st); st->cpu = smp_processor_id(); } - preempt_enable(); + + local_bh_enable(); } /* @@ -212,7 +230,9 @@ void fpsimd_update_current_state(struct fpsimd_state *state) { if (!system_supports_fpsimd()) return; - preempt_disable(); + + local_bh_disable(); + fpsimd_load_state(state); if (test_and_clear_thread_flag(TIF_FOREIGN_FPSTATE)) { struct fpsimd_state *st = ¤t->thread.fpsimd_state; @@ -220,7 +240,8 @@ void fpsimd_update_current_state(struct fpsimd_state *state) __this_cpu_write(fpsimd_last_state, st); st->cpu = smp_processor_id(); } - preempt_enable(); + + local_bh_enable(); } /* @@ -233,49 +254,69 @@ void fpsimd_flush_task_state(struct task_struct *t) #ifdef CONFIG_KERNEL_MODE_NEON -static DEFINE_PER_CPU(struct fpsimd_partial_state, hardirq_fpsimdstate); -static DEFINE_PER_CPU(struct fpsimd_partial_state, softirq_fpsimdstate); +DEFINE_PER_CPU(bool, kernel_neon_busy); /* * Kernel-side NEON support functions */ -void kernel_neon_begin_partial(u32 num_regs) + +/* + * kernel_neon_begin(): obtain the CPU FPSIMD registers for use by the calling + * context + * + * Must not be called unless may_use_simd() returns true. + * Task context in the FPSIMD registers is saved back to memory as necessary. + * + * A matching call to kernel_neon_end() must be made before returning from the + * calling context. + * + * The caller may freely use the FPSIMD registers until kernel_neon_end() is + * called. + */ +void kernel_neon_begin(void) { if (WARN_ON(!system_supports_fpsimd())) return; - if (in_interrupt()) { - struct fpsimd_partial_state *s = this_cpu_ptr( - in_irq() ? &hardirq_fpsimdstate : &softirq_fpsimdstate); - BUG_ON(num_regs > 32); - fpsimd_save_partial_state(s, roundup(num_regs, 2)); - } else { - /* - * Save the userland FPSIMD state if we have one and if we - * haven't done so already. Clear fpsimd_last_state to indicate - * that there is no longer userland FPSIMD state in the - * registers. - */ - preempt_disable(); - if (current->mm && - !test_and_set_thread_flag(TIF_FOREIGN_FPSTATE)) - fpsimd_save_state(¤t->thread.fpsimd_state); - this_cpu_write(fpsimd_last_state, NULL); - } + BUG_ON(!may_use_simd()); + + local_bh_disable(); + + __this_cpu_write(kernel_neon_busy, true); + + /* Save unsaved task fpsimd state, if any: */ + if (current->mm && !test_and_set_thread_flag(TIF_FOREIGN_FPSTATE)) + fpsimd_save_state(¤t->thread.fpsimd_state); + + /* Invalidate any task state remaining in the fpsimd regs: */ + __this_cpu_write(fpsimd_last_state, NULL); + + preempt_disable(); + + local_bh_enable(); } -EXPORT_SYMBOL(kernel_neon_begin_partial); +EXPORT_SYMBOL(kernel_neon_begin); +/* + * kernel_neon_end(): give the CPU FPSIMD registers back to the current task + * + * Must be called from a context in which kernel_neon_begin() was previously + * called, with no call to kernel_neon_end() in the meantime. + * + * The caller must not use the FPSIMD registers after this function is called, + * unless kernel_neon_begin() is called again in the meantime. + */ void kernel_neon_end(void) { + bool busy; + if (!system_supports_fpsimd()) return; - if (in_interrupt()) { - struct fpsimd_partial_state *s = this_cpu_ptr( - in_irq() ? &hardirq_fpsimdstate : &softirq_fpsimdstate); - fpsimd_load_partial_state(s); - } else { - preempt_enable(); - } + + busy = __this_cpu_xchg(kernel_neon_busy, false); + WARN_ON(!busy); /* No matching kernel_neon_begin()? */ + + preempt_enable(); } EXPORT_SYMBOL(kernel_neon_end); -- cgit v1.2.3 From 174dfb12860eac361f3ced9fefb51393fec5bd32 Mon Sep 17 00:00:00 2001 From: Catalin Marinas Date: Fri, 4 Aug 2017 15:10:12 +0100 Subject: arm64: neon: Temporarily add a kernel_mode_begin_partial() definition The crypto code currently relies on kernel_mode_begin_partial() being available. Until the corresponding crypto patches are merged, define this macro temporarily, though with different semantics as it cannot be called in interrupt context. Signed-off-by: Catalin Marinas --- arch/arm64/include/asm/neon.h | 7 +++++++ 1 file changed, 7 insertions(+) (limited to 'arch/arm64') diff --git a/arch/arm64/include/asm/neon.h b/arch/arm64/include/asm/neon.h index fb9d137256a6..f922eaf780f9 100644 --- a/arch/arm64/include/asm/neon.h +++ b/arch/arm64/include/asm/neon.h @@ -19,4 +19,11 @@ void kernel_neon_begin(void); void kernel_neon_end(void); +/* + * Temporary macro to allow the crypto code to compile. Note that the + * semantics of kernel_neon_begin_partial() are now different from the + * original as it does not allow being called in an interrupt context. + */ +#define kernel_neon_begin_partial(num_regs) kernel_neon_begin() + #endif /* ! __ASM_NEON_H */ -- cgit v1.2.3 From 35d0e6fb4d219d64ab3b7cffef7a11a0662140f5 Mon Sep 17 00:00:00 2001 From: Dave Martin Date: Tue, 1 Aug 2017 15:35:53 +0100 Subject: arm64: syscallno is secretly an int, make it official The upper 32 bits of the syscallno field in thread_struct are handled inconsistently, being sometimes zero extended and sometimes sign-extended. In fact, only the lower 32 bits seem to have any real significance for the behaviour of the code: it's been OK to handle the upper bits inconsistently because they don't matter. Currently, the only place I can find where those bits are significant is in calling trace_sys_enter(), which may be unintentional: for example, if a compat tracer attempts to cancel a syscall by passing -1 to (COMPAT_)PTRACE_SET_SYSCALL at the syscall-enter-stop, it will be traced as syscall 4294967295 rather than -1 as might be expected (and as occurs for a native tracer doing the same thing). Elsewhere, reads of syscallno cast it to an int or truncate it. There's also a conspicuous amount of code and casting to bodge around the fact that although semantically an int, syscallno is stored as a u64. Let's not pretend any more. In order to preserve the stp x instruction that stores the syscall number in entry.S, this patch special-cases the layout of struct pt_regs for big endian so that the newly 32-bit syscallno field maps onto the low bits of the stored value. This is not beautiful, but benchmarking of the getpid syscall on Juno suggests indicates a minor slowdown if the stp is split into an stp x and stp w. Signed-off-by: Dave Martin Acked-by: Will Deacon Signed-off-by: Catalin Marinas --- arch/arm64/include/asm/processor.h | 2 +- arch/arm64/include/asm/ptrace.h | 9 ++++++++- arch/arm64/kernel/entry.S | 34 +++++++++++++++++----------------- arch/arm64/kernel/ptrace.c | 2 +- arch/arm64/kernel/signal.c | 6 +++--- arch/arm64/kernel/signal32.c | 2 +- arch/arm64/kernel/traps.c | 2 +- 7 files changed, 32 insertions(+), 25 deletions(-) (limited to 'arch/arm64') diff --git a/arch/arm64/include/asm/processor.h b/arch/arm64/include/asm/processor.h index 64c9e78f9882..379def1d6b67 100644 --- a/arch/arm64/include/asm/processor.h +++ b/arch/arm64/include/asm/processor.h @@ -112,7 +112,7 @@ void tls_preserve_current_state(void); static inline void start_thread_common(struct pt_regs *regs, unsigned long pc) { memset(regs, 0, sizeof(*regs)); - regs->syscallno = ~0UL; + regs->syscallno = ~0; regs->pc = pc; } diff --git a/arch/arm64/include/asm/ptrace.h b/arch/arm64/include/asm/ptrace.h index 11403fdd0a50..21c87dc240e1 100644 --- a/arch/arm64/include/asm/ptrace.h +++ b/arch/arm64/include/asm/ptrace.h @@ -116,7 +116,14 @@ struct pt_regs { }; }; u64 orig_x0; - u64 syscallno; +#ifdef __AARCH64EB__ + u32 unused2; + s32 syscallno; +#else + s32 syscallno; + u32 unused2; +#endif + u64 orig_addr_limit; u64 unused; // maintain 16 byte alignment }; diff --git a/arch/arm64/kernel/entry.S b/arch/arm64/kernel/entry.S index b738880350f9..3bf0bd7a2f29 100644 --- a/arch/arm64/kernel/entry.S +++ b/arch/arm64/kernel/entry.S @@ -142,8 +142,8 @@ alternative_else_nop_endif * Set syscallno to -1 by default (overridden later if real syscall). */ .if \el == 0 - mvn x21, xzr - str x21, [sp, #S_SYSCALLNO] + mvn w21, wzr + str w21, [sp, #S_SYSCALLNO] .endif /* @@ -290,8 +290,9 @@ alternative_else_nop_endif * * x7 is reserved for the system call number in 32-bit mode. */ -sc_nr .req x25 // number of system calls -scno .req x26 // syscall number +wsc_nr .req w25 // number of system calls +wscno .req w26 // syscall number +xscno .req x26 // syscall number (zero-extended) stbl .req x27 // syscall table pointer tsk .req x28 // current thread_info @@ -577,8 +578,8 @@ el0_svc_compat: * AArch32 syscall handling */ adrp stbl, compat_sys_call_table // load compat syscall table pointer - uxtw scno, w7 // syscall number in w7 (r7) - mov sc_nr, #__NR_compat_syscalls + mov wscno, w7 // syscall number in w7 (r7) + mov wsc_nr, #__NR_compat_syscalls b el0_svc_naked .align 6 @@ -798,19 +799,19 @@ ENDPROC(ret_from_fork) .align 6 el0_svc: adrp stbl, sys_call_table // load syscall table pointer - uxtw scno, w8 // syscall number in w8 - mov sc_nr, #__NR_syscalls + mov wscno, w8 // syscall number in w8 + mov wsc_nr, #__NR_syscalls el0_svc_naked: // compat entry point - stp x0, scno, [sp, #S_ORIG_X0] // save the original x0 and syscall number + stp x0, xscno, [sp, #S_ORIG_X0] // save the original x0 and syscall number enable_dbg_and_irq ct_user_exit 1 ldr x16, [tsk, #TSK_TI_FLAGS] // check for syscall hooks tst x16, #_TIF_SYSCALL_WORK b.ne __sys_trace - cmp scno, sc_nr // check upper syscall limit + cmp wscno, wsc_nr // check upper syscall limit b.hs ni_sys - ldr x16, [stbl, scno, lsl #3] // address in the syscall table + ldr x16, [stbl, xscno, lsl #3] // address in the syscall table blr x16 // call sys_* routine b ret_fast_syscall ni_sys: @@ -824,24 +825,23 @@ ENDPROC(el0_svc) * switches, and waiting for our parent to respond. */ __sys_trace: - mov w0, #-1 // set default errno for - cmp scno, x0 // user-issued syscall(-1) + cmp wscno, #-1 // user-issued syscall(-1)? b.ne 1f - mov x0, #-ENOSYS + mov x0, #-ENOSYS // set default errno if so str x0, [sp, #S_X0] 1: mov x0, sp bl syscall_trace_enter cmp w0, #-1 // skip the syscall? b.eq __sys_trace_return_skipped - uxtw scno, w0 // syscall number (possibly new) + mov wscno, w0 // syscall number (possibly new) mov x1, sp // pointer to regs - cmp scno, sc_nr // check upper syscall limit + cmp wscno, wsc_nr // check upper syscall limit b.hs __ni_sys_trace ldp x0, x1, [sp] // restore the syscall args ldp x2, x3, [sp, #S_X2] ldp x4, x5, [sp, #S_X4] ldp x6, x7, [sp, #S_X6] - ldr x16, [stbl, scno, lsl #3] // address in the syscall table + ldr x16, [stbl, xscno, lsl #3] // address in the syscall table blr x16 // call sys_* routine __sys_trace_return: diff --git a/arch/arm64/kernel/ptrace.c b/arch/arm64/kernel/ptrace.c index 1b38c0150aec..de774805f672 100644 --- a/arch/arm64/kernel/ptrace.c +++ b/arch/arm64/kernel/ptrace.c @@ -1363,7 +1363,7 @@ static void tracehook_report_syscall(struct pt_regs *regs, if (dir == PTRACE_SYSCALL_EXIT) tracehook_report_syscall_exit(regs, 0); else if (tracehook_report_syscall_entry(regs)) - regs->syscallno = ~0UL; + regs->syscallno = ~0; regs->regs[regno] = saved_reg; } diff --git a/arch/arm64/kernel/signal.c b/arch/arm64/kernel/signal.c index 089c3747995d..4d04b891c00d 100644 --- a/arch/arm64/kernel/signal.c +++ b/arch/arm64/kernel/signal.c @@ -387,7 +387,7 @@ static int restore_sigframe(struct pt_regs *regs, /* * Avoid sys_rt_sigreturn() restarting. */ - regs->syscallno = ~0UL; + regs->syscallno = ~0; err |= !valid_user_regs(®s->user_regs, current); if (err == 0) @@ -673,7 +673,7 @@ static void do_signal(struct pt_regs *regs) { unsigned long continue_addr = 0, restart_addr = 0; int retval = 0; - int syscall = (int)regs->syscallno; + int syscall = regs->syscallno; struct ksignal ksig; /* @@ -687,7 +687,7 @@ static void do_signal(struct pt_regs *regs) /* * Avoid additional syscall restarting via ret_to_user. */ - regs->syscallno = ~0UL; + regs->syscallno = ~0; /* * Prepare for system call restart. We do this here so that a diff --git a/arch/arm64/kernel/signal32.c b/arch/arm64/kernel/signal32.c index c747a0fc5d7d..d98ca76cbd39 100644 --- a/arch/arm64/kernel/signal32.c +++ b/arch/arm64/kernel/signal32.c @@ -354,7 +354,7 @@ static int compat_restore_sigframe(struct pt_regs *regs, /* * Avoid compat_sys_sigreturn() restarting. */ - regs->syscallno = ~0UL; + regs->syscallno = ~0; err |= !valid_user_regs(®s->user_regs, current); diff --git a/arch/arm64/kernel/traps.c b/arch/arm64/kernel/traps.c index 8a62648848e5..0f047e916cee 100644 --- a/arch/arm64/kernel/traps.c +++ b/arch/arm64/kernel/traps.c @@ -593,7 +593,7 @@ asmlinkage long do_ni_syscall(struct pt_regs *regs) if (show_unhandled_signals_ratelimited()) { pr_info("%s[%d]: syscall %d\n", current->comm, - task_pid_nr(current), (int)regs->syscallno); + task_pid_nr(current), regs->syscallno); dump_instr("", regs); if (user_mode(regs)) __show_regs(regs); -- cgit v1.2.3 From 17c28958600928109049a3bcc814b0d5bfb1ff3a Mon Sep 17 00:00:00 2001 From: Dave Martin Date: Tue, 1 Aug 2017 15:35:54 +0100 Subject: arm64: Abstract syscallno manipulation The -1 "no syscall" value is written in various ways, shared with the user ABI in some places, and generally obscure. This patch attempts to make things a little more consistent and readable by replacing all these uses with a single #define. A couple of symbolic helpers are provided to clarify the intent further. Because the in-syscall check in do_signal() is changed from >= 0 to != NO_SYSCALL by this patch, different behaviour may be observable if syscallno is set to values less than -1 by a tracer. However, this is not different from the behaviour that is already observable if a tracer sets syscallno to a value >= __NR_(compat_)syscalls. It appears that this can cause spurious syscall restarting, but that is not a new behaviour either, and does not appear harmful. Signed-off-by: Dave Martin Acked-by: Will Deacon Signed-off-by: Catalin Marinas --- arch/arm64/include/asm/processor.h | 2 +- arch/arm64/include/asm/ptrace.h | 21 +++++++++++++++++++++ arch/arm64/kernel/entry.S | 10 ++++------ arch/arm64/kernel/ptrace.c | 2 +- arch/arm64/kernel/signal.c | 10 +++++----- arch/arm64/kernel/signal32.c | 2 +- 6 files changed, 33 insertions(+), 14 deletions(-) (limited to 'arch/arm64') diff --git a/arch/arm64/include/asm/processor.h b/arch/arm64/include/asm/processor.h index 379def1d6b67..b7334f11c4cf 100644 --- a/arch/arm64/include/asm/processor.h +++ b/arch/arm64/include/asm/processor.h @@ -112,7 +112,7 @@ void tls_preserve_current_state(void); static inline void start_thread_common(struct pt_regs *regs, unsigned long pc) { memset(regs, 0, sizeof(*regs)); - regs->syscallno = ~0; + forget_syscall(regs); regs->pc = pc; } diff --git a/arch/arm64/include/asm/ptrace.h b/arch/arm64/include/asm/ptrace.h index 21c87dc240e1..4f64373b84fd 100644 --- a/arch/arm64/include/asm/ptrace.h +++ b/arch/arm64/include/asm/ptrace.h @@ -72,8 +72,19 @@ #define COMPAT_PT_TEXT_ADDR 0x10000 #define COMPAT_PT_DATA_ADDR 0x10004 #define COMPAT_PT_TEXT_END_ADDR 0x10008 + +/* + * If pt_regs.syscallno == NO_SYSCALL, then the thread is not executing + * a syscall -- i.e., its most recent entry into the kernel from + * userspace was not via SVC, or otherwise a tracer cancelled the syscall. + * + * This must have the value -1, for ABI compatibility with ptrace etc. + */ +#define NO_SYSCALL (-1) + #ifndef __ASSEMBLY__ #include +#include /* sizeof(struct user) for AArch32 */ #define COMPAT_USER_SZ 296 @@ -128,6 +139,16 @@ struct pt_regs { u64 unused; // maintain 16 byte alignment }; +static inline bool in_syscall(struct pt_regs const *regs) +{ + return regs->syscallno != NO_SYSCALL; +} + +static inline void forget_syscall(struct pt_regs *regs) +{ + regs->syscallno = NO_SYSCALL; +} + #define MAX_REG_OFFSET offsetof(struct pt_regs, pstate) #define arch_has_single_step() (1) diff --git a/arch/arm64/kernel/entry.S b/arch/arm64/kernel/entry.S index 3bf0bd7a2f29..cace76d17535 100644 --- a/arch/arm64/kernel/entry.S +++ b/arch/arm64/kernel/entry.S @@ -138,11 +138,9 @@ alternative_else_nop_endif stp x22, x23, [sp, #S_PC] - /* - * Set syscallno to -1 by default (overridden later if real syscall). - */ + /* Not in a syscall by default (el0_svc overwrites for real syscall) */ .if \el == 0 - mvn w21, wzr + mov w21, #NO_SYSCALL str w21, [sp, #S_SYSCALLNO] .endif @@ -825,13 +823,13 @@ ENDPROC(el0_svc) * switches, and waiting for our parent to respond. */ __sys_trace: - cmp wscno, #-1 // user-issued syscall(-1)? + cmp wscno, #NO_SYSCALL // user-issued syscall(-1)? b.ne 1f mov x0, #-ENOSYS // set default errno if so str x0, [sp, #S_X0] 1: mov x0, sp bl syscall_trace_enter - cmp w0, #-1 // skip the syscall? + cmp w0, #NO_SYSCALL // skip the syscall? b.eq __sys_trace_return_skipped mov wscno, w0 // syscall number (possibly new) mov x1, sp // pointer to regs diff --git a/arch/arm64/kernel/ptrace.c b/arch/arm64/kernel/ptrace.c index de774805f672..28619b5b6746 100644 --- a/arch/arm64/kernel/ptrace.c +++ b/arch/arm64/kernel/ptrace.c @@ -1363,7 +1363,7 @@ static void tracehook_report_syscall(struct pt_regs *regs, if (dir == PTRACE_SYSCALL_EXIT) tracehook_report_syscall_exit(regs, 0); else if (tracehook_report_syscall_entry(regs)) - regs->syscallno = ~0; + forget_syscall(regs); regs->regs[regno] = saved_reg; } diff --git a/arch/arm64/kernel/signal.c b/arch/arm64/kernel/signal.c index 4d04b891c00d..4991e87f80cc 100644 --- a/arch/arm64/kernel/signal.c +++ b/arch/arm64/kernel/signal.c @@ -36,6 +36,7 @@ #include #include #include +#include #include #include @@ -387,7 +388,7 @@ static int restore_sigframe(struct pt_regs *regs, /* * Avoid sys_rt_sigreturn() restarting. */ - regs->syscallno = ~0; + forget_syscall(regs); err |= !valid_user_regs(®s->user_regs, current); if (err == 0) @@ -673,13 +674,12 @@ static void do_signal(struct pt_regs *regs) { unsigned long continue_addr = 0, restart_addr = 0; int retval = 0; - int syscall = regs->syscallno; struct ksignal ksig; /* * If we were from a system call, check for system call restarting... */ - if (syscall >= 0) { + if (in_syscall(regs)) { continue_addr = regs->pc; restart_addr = continue_addr - (compat_thumb_mode(regs) ? 2 : 4); retval = regs->regs[0]; @@ -687,7 +687,7 @@ static void do_signal(struct pt_regs *regs) /* * Avoid additional syscall restarting via ret_to_user. */ - regs->syscallno = ~0; + forget_syscall(regs); /* * Prepare for system call restart. We do this here so that a @@ -731,7 +731,7 @@ static void do_signal(struct pt_regs *regs) * Handle restarting a different system call. As above, if a debugger * has chosen to restart at a different PC, ignore the restart. */ - if (syscall >= 0 && regs->pc == restart_addr) { + if (in_syscall(regs) && regs->pc == restart_addr) { if (retval == -ERESTART_RESTARTBLOCK) setup_restart_syscall(regs); user_rewind_single_step(current); diff --git a/arch/arm64/kernel/signal32.c b/arch/arm64/kernel/signal32.c index d98ca76cbd39..4e5a664be04b 100644 --- a/arch/arm64/kernel/signal32.c +++ b/arch/arm64/kernel/signal32.c @@ -354,7 +354,7 @@ static int compat_restore_sigframe(struct pt_regs *regs, /* * Avoid compat_sys_sigreturn() restarting. */ - regs->syscallno = ~0; + forget_syscall(regs); err |= !valid_user_regs(®s->user_regs, current); -- cgit v1.2.3 From 1f9b8936f36f4a8e1d9923f5d03295d668cdf098 Mon Sep 17 00:00:00 2001 From: Julien Thierry Date: Fri, 4 Aug 2017 09:31:42 +0100 Subject: arm64: Decode information from ESR upon mem faults When receiving unhandled faults from the CPU, description is very sparse. Adding information about faults decoded from ESR. Added defines to esr.h corresponding ESR fields. Values are based on ARM Archtecture Reference Manual (DDI 0487B.a), section D7.2.28 ESR_ELx, Exception Syndrome Register (ELx) (pages D7-2275 to D7-2280). New output is of the form: [ 77.818059] Mem abort info: [ 77.820826] Exception class = DABT (current EL), IL = 32 bits [ 77.826706] SET = 0, FnV = 0 [ 77.829742] EA = 0, S1PTW = 0 [ 77.832849] Data abort info: [ 77.835713] ISV = 0, ISS = 0x00000070 [ 77.839522] CM = 0, WnR = 1 Signed-off-by: Julien Thierry Cc: Will Deacon Cc: Mark Rutland [catalin.marinas@arm.com: fix "%lu" in a pr_alert() call] Signed-off-by: Catalin Marinas --- arch/arm64/include/asm/esr.h | 39 ++++++++++++++++++++++++++---------- arch/arm64/mm/fault.c | 47 ++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 76 insertions(+), 10 deletions(-) (limited to 'arch/arm64') diff --git a/arch/arm64/include/asm/esr.h b/arch/arm64/include/asm/esr.h index 8cabd57b6348..130b5343ba6d 100644 --- a/arch/arm64/include/asm/esr.h +++ b/arch/arm64/include/asm/esr.h @@ -77,16 +77,23 @@ #define ESR_ELx_EC_MASK (UL(0x3F) << ESR_ELx_EC_SHIFT) #define ESR_ELx_EC(esr) (((esr) & ESR_ELx_EC_MASK) >> ESR_ELx_EC_SHIFT) -#define ESR_ELx_IL (UL(1) << 25) +#define ESR_ELx_IL_SHIFT (25) +#define ESR_ELx_IL (UL(1) << ESR_ELx_IL_SHIFT) #define ESR_ELx_ISS_MASK (ESR_ELx_IL - 1) /* ISS field definitions shared by different classes */ -#define ESR_ELx_WNR (UL(1) << 6) +#define ESR_ELx_WNR_SHIFT (6) +#define ESR_ELx_WNR (UL(1) << ESR_ELx_WNR_SHIFT) /* Shared ISS field definitions for Data/Instruction aborts */ -#define ESR_ELx_FnV (UL(1) << 10) -#define ESR_ELx_EA (UL(1) << 9) -#define ESR_ELx_S1PTW (UL(1) << 7) +#define ESR_ELx_SET_SHIFT (11) +#define ESR_ELx_SET_MASK (UL(3) << ESR_ELx_SET_SHIFT) +#define ESR_ELx_FnV_SHIFT (10) +#define ESR_ELx_FnV (UL(1) << ESR_ELx_FnV_SHIFT) +#define ESR_ELx_EA_SHIFT (9) +#define ESR_ELx_EA (UL(1) << ESR_ELx_EA_SHIFT) +#define ESR_ELx_S1PTW_SHIFT (7) +#define ESR_ELx_S1PTW (UL(1) << ESR_ELx_S1PTW_SHIFT) /* Shared ISS fault status code(IFSC/DFSC) for Data/Instruction aborts */ #define ESR_ELx_FSC (0x3F) @@ -97,15 +104,20 @@ #define ESR_ELx_FSC_PERM (0x0C) /* ISS field definitions for Data Aborts */ -#define ESR_ELx_ISV (UL(1) << 24) +#define ESR_ELx_ISV_SHIFT (24) +#define ESR_ELx_ISV (UL(1) << ESR_ELx_ISV_SHIFT) #define ESR_ELx_SAS_SHIFT (22) #define ESR_ELx_SAS (UL(3) << ESR_ELx_SAS_SHIFT) -#define ESR_ELx_SSE (UL(1) << 21) +#define ESR_ELx_SSE_SHIFT (21) +#define ESR_ELx_SSE (UL(1) << ESR_ELx_SSE_SHIFT) #define ESR_ELx_SRT_SHIFT (16) #define ESR_ELx_SRT_MASK (UL(0x1F) << ESR_ELx_SRT_SHIFT) -#define ESR_ELx_SF (UL(1) << 15) -#define ESR_ELx_AR (UL(1) << 14) -#define ESR_ELx_CM (UL(1) << 8) +#define ESR_ELx_SF_SHIFT (15) +#define ESR_ELx_SF (UL(1) << ESR_ELx_SF_SHIFT) +#define ESR_ELx_AR_SHIFT (14) +#define ESR_ELx_AR (UL(1) << ESR_ELx_AR_SHIFT) +#define ESR_ELx_CM_SHIFT (8) +#define ESR_ELx_CM (UL(1) << ESR_ELx_CM_SHIFT) /* ISS field definitions for exceptions taken in to Hyp */ #define ESR_ELx_CV (UL(1) << 24) @@ -209,6 +221,13 @@ #ifndef __ASSEMBLY__ #include +static inline bool esr_is_data_abort(u32 esr) +{ + const u32 ec = ESR_ELx_EC(esr); + + return ec == ESR_ELx_EC_DABT_LOW || ec == ESR_ELx_EC_DABT_CUR; +} + const char *esr_get_class_string(u32 esr); #endif /* __ASSEMBLY */ diff --git a/arch/arm64/mm/fault.c b/arch/arm64/mm/fault.c index 2509e4fe6992..52ee273afeec 100644 --- a/arch/arm64/mm/fault.c +++ b/arch/arm64/mm/fault.c @@ -82,6 +82,49 @@ static inline int notify_page_fault(struct pt_regs *regs, unsigned int esr) } #endif +static void data_abort_decode(unsigned int esr) +{ + pr_alert("Data abort info:\n"); + + if (esr & ESR_ELx_ISV) { + pr_alert(" Access size = %u byte(s)\n", + 1U << ((esr & ESR_ELx_SAS) >> ESR_ELx_SAS_SHIFT)); + pr_alert(" SSE = %lu, SRT = %lu\n", + (esr & ESR_ELx_SSE) >> ESR_ELx_SSE_SHIFT, + (esr & ESR_ELx_SRT_MASK) >> ESR_ELx_SRT_SHIFT); + pr_alert(" SF = %lu, AR = %lu\n", + (esr & ESR_ELx_SF) >> ESR_ELx_SF_SHIFT, + (esr & ESR_ELx_AR) >> ESR_ELx_AR_SHIFT); + } else { + pr_alert(" ISV = 0, ISS = 0x%08lu\n", esr & ESR_ELx_ISS_MASK); + } + + pr_alert(" CM = %lu, WnR = %lu\n", + (esr & ESR_ELx_CM) >> ESR_ELx_CM_SHIFT, + (esr & ESR_ELx_WNR) >> ESR_ELx_WNR_SHIFT); +} + +/* + * Decode mem abort information + */ +static void mem_abort_decode(unsigned int esr) +{ + pr_alert("Mem abort info:\n"); + + pr_alert(" Exception class = %s, IL = %u bits\n", + esr_get_class_string(esr), + (esr & ESR_ELx_IL) ? 32 : 16); + pr_alert(" SET = %lu, FnV = %lu\n", + (esr & ESR_ELx_SET_MASK) >> ESR_ELx_SET_SHIFT, + (esr & ESR_ELx_FnV) >> ESR_ELx_FnV_SHIFT); + pr_alert(" EA = %lu, S1PTW = %lu\n", + (esr & ESR_ELx_EA) >> ESR_ELx_EA_SHIFT, + (esr & ESR_ELx_S1PTW) >> ESR_ELx_S1PTW_SHIFT); + + if (esr_is_data_abort(esr)) + data_abort_decode(esr); +} + /* * Dump out the page tables associated with 'addr' in the currently active mm. */ @@ -248,6 +291,8 @@ static void __do_kernel_fault(unsigned long addr, unsigned int esr, pr_alert("Unable to handle kernel %s at virtual address %08lx\n", msg, addr); + mem_abort_decode(esr); + show_pte(addr); die("Oops", regs, esr); bust_spinlocks(0); @@ -702,6 +747,8 @@ asmlinkage void __exception do_mem_abort(unsigned long addr, unsigned int esr, pr_alert("Unhandled fault: %s (0x%08x) at 0x%016lx\n", inf->name, esr, addr); + mem_abort_decode(esr); + info.si_signo = inf->sig; info.si_errno = 0; info.si_code = inf->code; -- cgit v1.2.3 From 11cefd5ac25f242349994140a3bce3a20db0c751 Mon Sep 17 00:00:00 2001 From: Catalin Marinas Date: Mon, 7 Aug 2017 12:36:35 +0100 Subject: arm64: neon: Export kernel_neon_busy to loadable modules may_use_simd() can be invoked from loadable modules and it accesses kernel_neon_busy. Make sure that the latter is exported. Fixes: cb84d11e1625 ("arm64: neon: Remove support for nested or hardirq kernel-mode NEON") Signed-off-by: Catalin Marinas --- arch/arm64/kernel/fpsimd.c | 1 + 1 file changed, 1 insertion(+) (limited to 'arch/arm64') diff --git a/arch/arm64/kernel/fpsimd.c b/arch/arm64/kernel/fpsimd.c index 138fcfaeadc1..9da4e636b328 100644 --- a/arch/arm64/kernel/fpsimd.c +++ b/arch/arm64/kernel/fpsimd.c @@ -255,6 +255,7 @@ void fpsimd_flush_task_state(struct task_struct *t) #ifdef CONFIG_KERNEL_MODE_NEON DEFINE_PER_CPU(bool, kernel_neon_busy); +EXPORT_PER_CPU_SYMBOL(kernel_neon_busy); /* * Kernel-side NEON support functions -- cgit v1.2.3 From 1031a1592908ccd3240f4a5731c96c382c932310 Mon Sep 17 00:00:00 2001 From: Pratyush Anand Date: Sat, 1 Jul 2017 12:03:35 +0530 Subject: arm64: perf: Allow more than one cycle counter to be used Currently: $ perf stat -e cycles:u -e cycles:k true Performance counter stats for 'true': 2,24,699 cycles:u cycles:k (0.00%) 0.000788087 seconds time elapsed We can not count more than one cycle counter in one instance,because we allow to map cycle counter into PMCCNTR_EL0 only. However, if I did not miss anything then specification do not prohibit to use PMEVCNTR_EL0 for cycle count as well. Modify the code so that it still prefers to use PMCCNTR_EL0 for cycle counter, however allow to use PMEVCNTR_EL0 if PMCCNTR_EL0 is already in use. After this patch: $ perf stat -e cycles:u -e cycles:k true Performance counter stats for 'true': 2,17,310 cycles:u 7,40,009 cycles:k 0.000764149 seconds time elapsed Signed-off-by: Pratyush Anand Signed-off-by: Will Deacon --- arch/arm64/kernel/perf_event.c | 11 ++++------- 1 file changed, 4 insertions(+), 7 deletions(-) (limited to 'arch/arm64') diff --git a/arch/arm64/kernel/perf_event.c b/arch/arm64/kernel/perf_event.c index b5798ba21189..372317667773 100644 --- a/arch/arm64/kernel/perf_event.c +++ b/arch/arm64/kernel/perf_event.c @@ -846,17 +846,14 @@ static int armv8pmu_get_event_idx(struct pmu_hw_events *cpuc, struct hw_perf_event *hwc = &event->hw; unsigned long evtype = hwc->config_base & ARMV8_PMU_EVTYPE_EVENT; - /* Always place a cycle counter into the cycle counter. */ + /* Always prefer to place a cycle counter into the cycle counter. */ if (evtype == ARMV8_PMUV3_PERFCTR_CPU_CYCLES) { - if (test_and_set_bit(ARMV8_IDX_CYCLE_COUNTER, cpuc->used_mask)) - return -EAGAIN; - - return ARMV8_IDX_CYCLE_COUNTER; + if (!test_and_set_bit(ARMV8_IDX_CYCLE_COUNTER, cpuc->used_mask)) + return ARMV8_IDX_CYCLE_COUNTER; } /* - * For anything other than a cycle counter, try and use - * the events counters + * Otherwise use events counters */ for (idx = ARMV8_IDX_COUNTER0; idx < cpu_pmu->num_events; ++idx) { if (!test_and_set_bit(idx, cpuc->used_mask)) -- cgit v1.2.3 From db44e9c5ecf1b60336ccfc176b07ba7d81d855e0 Mon Sep 17 00:00:00 2001 From: Mark Rutland Date: Wed, 26 Jul 2017 14:41:40 +0100 Subject: arm64: Add ASM_BUG() Currently. we can only use BUG() from C code, though there are situations where we would like an equivalent mechanism in assembly code. This patch refactors our BUG() definition such that it can be used in either C or assembly, in the form of a new ASM_BUG(). The refactoring requires the removal of escape sequences, such as '\n' and '\t', but these aren't strictly necessary as we can use ';' to terminate assembler statements. The low-level assembly is factored out into , with retained as the C wrapper. Signed-off-by: Mark Rutland Cc: Ard Biesheuvel Cc: Catalin Marinas Cc: Dave Martin Cc: James Morse Cc: Will Deacon --- arch/arm64/include/asm/asm-bug.h | 54 ++++++++++++++++++++++++++++++++++++++++ arch/arm64/include/asm/bug.h | 35 +++----------------------- 2 files changed, 57 insertions(+), 32 deletions(-) create mode 100644 arch/arm64/include/asm/asm-bug.h (limited to 'arch/arm64') diff --git a/arch/arm64/include/asm/asm-bug.h b/arch/arm64/include/asm/asm-bug.h new file mode 100644 index 000000000000..636e755bcdca --- /dev/null +++ b/arch/arm64/include/asm/asm-bug.h @@ -0,0 +1,54 @@ +#ifndef __ASM_ASM_BUG_H +/* + * Copyright (C) 2017 ARM Limited + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + */ +#define __ASM_ASM_BUG_H + +#include + +#ifdef CONFIG_DEBUG_BUGVERBOSE +#define _BUGVERBOSE_LOCATION(file, line) __BUGVERBOSE_LOCATION(file, line) +#define __BUGVERBOSE_LOCATION(file, line) \ + .pushsection .rodata.str,"aMS",@progbits,1; \ + 2: .string file; \ + .popsection; \ + \ + .long 2b - 0b; \ + .short line; +#else +#define _BUGVERBOSE_LOCATION(file, line) +#endif + +#ifdef CONFIG_GENERIC_BUG + +#define __BUG_ENTRY(flags) \ + .pushsection __bug_table,"aw"; \ + .align 2; \ + 0: .long 1f - 0b; \ +_BUGVERBOSE_LOCATION(__FILE__, __LINE__) \ + .short flags; \ + .popsection; \ + 1: +#else +#define __BUG_ENTRY(flags) +#endif + +#define ASM_BUG_FLAGS(flags) \ + __BUG_ENTRY(flags) \ + brk BUG_BRK_IMM + +#define ASM_BUG() ASM_BUG_FLAGS(0) + +#endif /* __ASM_ASM_BUG_H */ diff --git a/arch/arm64/include/asm/bug.h b/arch/arm64/include/asm/bug.h index a02a57186f56..d7dc43752705 100644 --- a/arch/arm64/include/asm/bug.h +++ b/arch/arm64/include/asm/bug.h @@ -18,41 +18,12 @@ #ifndef _ARCH_ARM64_ASM_BUG_H #define _ARCH_ARM64_ASM_BUG_H -#include +#include -#ifdef CONFIG_DEBUG_BUGVERBOSE -#define _BUGVERBOSE_LOCATION(file, line) __BUGVERBOSE_LOCATION(file, line) -#define __BUGVERBOSE_LOCATION(file, line) \ - ".pushsection .rodata.str,\"aMS\",@progbits,1\n" \ - "2: .string \"" file "\"\n\t" \ - ".popsection\n\t" \ - \ - ".long 2b - 0b\n\t" \ - ".short " #line "\n\t" -#else -#define _BUGVERBOSE_LOCATION(file, line) -#endif - -#ifdef CONFIG_GENERIC_BUG - -#define __BUG_ENTRY(flags) \ - ".pushsection __bug_table,\"aw\"\n\t" \ - ".align 2\n\t" \ - "0: .long 1f - 0b\n\t" \ -_BUGVERBOSE_LOCATION(__FILE__, __LINE__) \ - ".short " #flags "\n\t" \ - ".popsection\n" \ - "1: " -#else -#define __BUG_ENTRY(flags) "" -#endif +#include #define __BUG_FLAGS(flags) \ - asm volatile ( \ - __BUG_ENTRY(flags) \ - "brk %[imm]" :: [imm] "i" (BUG_BRK_IMM) \ - ); - + asm volatile (__stringify(ASM_BUG_FLAGS(flags))); #define BUG() do { \ __BUG_FLAGS(0); \ -- cgit v1.2.3 From 2d0e751a4789fc5ab4a5c9de5d6407b41fdfbbf0 Mon Sep 17 00:00:00 2001 From: Mark Rutland Date: Wed, 26 Jul 2017 11:14:53 +0100 Subject: arm64: consistently use bl for C exception entry In most cases, our exception entry assembly branches to C handlers with a BL instruction, but in cases where we do not expect to return, we use B instead. While this is correct today, it means that backtraces for fatal exceptions miss the entry assembly (as the LR is stale at the point we call C code), while non-fatal exceptions have the entry assembly in the LR. In subsequent patches, we will need the LR to be set in these cases in order to backtrace reliably. This patch updates these sites to use a BL, ensuring consistency, and preparing for backtrace rework. An ASM_BUG() is added after each of these new BLs, which both catches unexpected returns, and ensures that the LR value doesn't point to another function label. Signed-off-by: Mark Rutland Cc: Ard Biesheuvel Cc: Catalin Marinas Cc: James Morse Cc: Will Deacon --- arch/arm64/kernel/entry.S | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) (limited to 'arch/arm64') diff --git a/arch/arm64/kernel/entry.S b/arch/arm64/kernel/entry.S index b738880350f9..660612a07ec5 100644 --- a/arch/arm64/kernel/entry.S +++ b/arch/arm64/kernel/entry.S @@ -351,7 +351,8 @@ END(vectors) mov x0, sp mov x1, #\reason mrs x2, esr_el1 - b bad_mode + bl bad_mode + ASM_BUG() .endm el0_sync_invalid: @@ -448,14 +449,16 @@ el1_sp_pc: mrs x0, far_el1 enable_dbg mov x2, sp - b do_sp_pc_abort + bl do_sp_pc_abort + ASM_BUG() el1_undef: /* * Undefined instruction */ enable_dbg mov x0, sp - b do_undefinstr + bl do_undefinstr + ASM_BUG() el1_dbg: /* * Debug exception handling @@ -473,7 +476,8 @@ el1_inv: mov x0, sp mov x2, x1 mov x1, #BAD_SYNC - b bad_mode + bl bad_mode + ASM_BUG() ENDPROC(el1_sync) .align 6 -- cgit v1.2.3 From ed84b4e9582bdfeffc617589fe17dddfc5fe6672 Mon Sep 17 00:00:00 2001 From: Mark Rutland Date: Wed, 26 Jul 2017 16:05:20 +0100 Subject: arm64: move non-entry code out of .entry.text Currently, cpu_switch_to and ret_from_fork both live in .entry.text, though neither form the critical path for an exception entry. In subsequent patches, we will require that code in .entry.text is part of the critical path for exception entry, for which we can assume certain properties (e.g. the presence of exception regs on the stack). Neither cpu_switch_to nor ret_from_fork will meet these requirements, so we must move them out of .entry.text. To ensure that neither are kprobed after being moved out of .entry.text, we must explicitly blacklist them, requiring a new NOKPROBE() asm helper. Signed-off-by: Mark Rutland Cc: Ard Biesheuvel Cc: Catalin Marinas Cc: James Morse Cc: Will Deacon --- arch/arm64/include/asm/assembler.h | 11 +++++ arch/arm64/kernel/entry.S | 90 +++++++++++++++++++------------------- 2 files changed, 57 insertions(+), 44 deletions(-) (limited to 'arch/arm64') diff --git a/arch/arm64/include/asm/assembler.h b/arch/arm64/include/asm/assembler.h index 1b67c3782d00..610a42018241 100644 --- a/arch/arm64/include/asm/assembler.h +++ b/arch/arm64/include/asm/assembler.h @@ -403,6 +403,17 @@ alternative_endif .size __pi_##x, . - x; \ ENDPROC(x) +/* + * Annotate a function as being unsuitable for kprobes. + */ +#ifdef CONFIG_KPROBES +#define NOKPROBE(x) \ + .pushsection "_kprobe_blacklist", "aw"; \ + .quad x; \ + .popsection; +#else +#define NOKPROBE(x) +#endif /* * Emit a 64-bit absolute little endian symbol reference in a way that * ensures that it will be resolved at build time, even when building a diff --git a/arch/arm64/kernel/entry.S b/arch/arm64/kernel/entry.S index 660612a07ec5..9e126d3d8b53 100644 --- a/arch/arm64/kernel/entry.S +++ b/arch/arm64/kernel/entry.S @@ -710,38 +710,6 @@ el0_irq_naked: b ret_to_user ENDPROC(el0_irq) -/* - * Register switch for AArch64. The callee-saved registers need to be saved - * and restored. On entry: - * x0 = previous task_struct (must be preserved across the switch) - * x1 = next task_struct - * Previous and next are guaranteed not to be the same. - * - */ -ENTRY(cpu_switch_to) - mov x10, #THREAD_CPU_CONTEXT - add x8, x0, x10 - mov x9, sp - stp x19, x20, [x8], #16 // store callee-saved registers - stp x21, x22, [x8], #16 - stp x23, x24, [x8], #16 - stp x25, x26, [x8], #16 - stp x27, x28, [x8], #16 - stp x29, x9, [x8], #16 - str lr, [x8] - add x8, x1, x10 - ldp x19, x20, [x8], #16 // restore callee-saved registers - ldp x21, x22, [x8], #16 - ldp x23, x24, [x8], #16 - ldp x25, x26, [x8], #16 - ldp x27, x28, [x8], #16 - ldp x29, x9, [x8], #16 - ldr lr, [x8] - mov sp, x9 - msr sp_el0, x1 - ret -ENDPROC(cpu_switch_to) - /* * This is the fast syscall return path. We do as little as possible here, * and this includes saving x0 back into the kernel stack. @@ -784,18 +752,6 @@ finish_ret_to_user: kernel_exit 0 ENDPROC(ret_to_user) -/* - * This is how we return from a fork. - */ -ENTRY(ret_from_fork) - bl schedule_tail - cbz x19, 1f // not a kernel thread - mov x0, x20 - blr x19 -1: get_thread_info tsk - b ret_to_user -ENDPROC(ret_from_fork) - /* * SVC handler. */ @@ -869,3 +825,49 @@ ENTRY(sys_rt_sigreturn_wrapper) mov x0, sp b sys_rt_sigreturn ENDPROC(sys_rt_sigreturn_wrapper) + +/* + * Register switch for AArch64. The callee-saved registers need to be saved + * and restored. On entry: + * x0 = previous task_struct (must be preserved across the switch) + * x1 = next task_struct + * Previous and next are guaranteed not to be the same. + * + */ +ENTRY(cpu_switch_to) + mov x10, #THREAD_CPU_CONTEXT + add x8, x0, x10 + mov x9, sp + stp x19, x20, [x8], #16 // store callee-saved registers + stp x21, x22, [x8], #16 + stp x23, x24, [x8], #16 + stp x25, x26, [x8], #16 + stp x27, x28, [x8], #16 + stp x29, x9, [x8], #16 + str lr, [x8] + add x8, x1, x10 + ldp x19, x20, [x8], #16 // restore callee-saved registers + ldp x21, x22, [x8], #16 + ldp x23, x24, [x8], #16 + ldp x25, x26, [x8], #16 + ldp x27, x28, [x8], #16 + ldp x29, x9, [x8], #16 + ldr lr, [x8] + mov sp, x9 + msr sp_el0, x1 + ret +ENDPROC(cpu_switch_to) +NOKPROBE(cpu_switch_to) + +/* + * This is how we return from a fork. + */ +ENTRY(ret_from_fork) + bl schedule_tail + cbz x19, 1f // not a kernel thread + mov x0, x20 + blr x19 +1: get_thread_info tsk + b ret_to_user +ENDPROC(ret_from_fork) +NOKPROBE(ret_from_fork) -- cgit v1.2.3 From 096683724cb2eb95fea759a2580996df1039fdd0 Mon Sep 17 00:00:00 2001 From: Mark Rutland Date: Thu, 20 Jul 2017 14:01:01 +0100 Subject: arm64: unwind: avoid percpu indirection for irq stack Our IRQ_STACK_PTR() and on_irq_stack() helpers both take a cpu argument, used to generate a percpu address. In all cases, they are passed {raw_,}smp_processor_id(), so this parameter is redundant. Since {raw_,}smp_processor_id() use a percpu variable internally, this approach means we generate a percpu offset to find the current cpu, then use this to index an array of percpu offsets, which we then use to find the current CPU's IRQ stack pointer. Thus, most of the work is redundant. Instead, we can consistently use raw_cpu_ptr() to generate the CPU's irq_stack pointer by simply adding the percpu offset to the irq_stack address, which is simpler in both respects. Signed-off-by: Mark Rutland Signed-off-by: Ard Biesheuvel Cc: Catalin Marinas Cc: James Morse Cc: Will Deacon --- arch/arm64/include/asm/irq.h | 6 +++--- arch/arm64/kernel/ptrace.c | 2 +- arch/arm64/kernel/stacktrace.c | 4 ++-- arch/arm64/kernel/traps.c | 2 +- 4 files changed, 7 insertions(+), 7 deletions(-) (limited to 'arch/arm64') diff --git a/arch/arm64/include/asm/irq.h b/arch/arm64/include/asm/irq.h index b77197d941fc..6d6f85e4923e 100644 --- a/arch/arm64/include/asm/irq.h +++ b/arch/arm64/include/asm/irq.h @@ -32,7 +32,7 @@ DECLARE_PER_CPU(unsigned long [IRQ_STACK_SIZE/sizeof(long)], irq_stack); * from kernel_entry can be found. * */ -#define IRQ_STACK_PTR(cpu) ((unsigned long)per_cpu(irq_stack, cpu) + IRQ_STACK_START_SP) +#define IRQ_STACK_PTR() ((unsigned long)raw_cpu_ptr(irq_stack) + IRQ_STACK_START_SP) /* * The offset from irq_stack_ptr where entry.S will store the original @@ -47,10 +47,10 @@ static inline int nr_legacy_irqs(void) return 0; } -static inline bool on_irq_stack(unsigned long sp, int cpu) +static inline bool on_irq_stack(unsigned long sp) { /* variable names the same as kernel/stacktrace.c */ - unsigned long low = (unsigned long)per_cpu(irq_stack, cpu); + unsigned long low = (unsigned long)raw_cpu_ptr(irq_stack); unsigned long high = low + IRQ_STACK_START_SP; return (low <= sp && sp <= high); diff --git a/arch/arm64/kernel/ptrace.c b/arch/arm64/kernel/ptrace.c index 1b38c0150aec..baf0838205c7 100644 --- a/arch/arm64/kernel/ptrace.c +++ b/arch/arm64/kernel/ptrace.c @@ -127,7 +127,7 @@ static bool regs_within_kernel_stack(struct pt_regs *regs, unsigned long addr) { return ((addr & ~(THREAD_SIZE - 1)) == (kernel_stack_pointer(regs) & ~(THREAD_SIZE - 1))) || - on_irq_stack(addr, raw_smp_processor_id()); + on_irq_stack(addr); } /** diff --git a/arch/arm64/kernel/stacktrace.c b/arch/arm64/kernel/stacktrace.c index 09d37d66b630..6ffb965be641 100644 --- a/arch/arm64/kernel/stacktrace.c +++ b/arch/arm64/kernel/stacktrace.c @@ -54,13 +54,13 @@ int notrace unwind_frame(struct task_struct *tsk, struct stackframe *frame) * non-preemptible context. */ if (tsk == current && !preemptible()) - irq_stack_ptr = IRQ_STACK_PTR(smp_processor_id()); + irq_stack_ptr = IRQ_STACK_PTR(); else irq_stack_ptr = 0; low = frame->sp; /* irq stacks are not THREAD_SIZE aligned */ - if (on_irq_stack(frame->sp, raw_smp_processor_id())) + if (on_irq_stack(frame->sp)) high = irq_stack_ptr; else high = ALIGN(low, THREAD_SIZE) - 0x20; diff --git a/arch/arm64/kernel/traps.c b/arch/arm64/kernel/traps.c index d48f47080213..5797f5037ec9 100644 --- a/arch/arm64/kernel/traps.c +++ b/arch/arm64/kernel/traps.c @@ -159,7 +159,7 @@ void dump_backtrace(struct pt_regs *regs, struct task_struct *tsk) * non-preemptible context. */ if (tsk == current && !preemptible()) - irq_stack_ptr = IRQ_STACK_PTR(smp_processor_id()); + irq_stack_ptr = IRQ_STACK_PTR(); else irq_stack_ptr = 0; -- cgit v1.2.3 From c7365330753c55a061db0a1837a27fd5e44b1408 Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Sat, 22 Jul 2017 12:48:34 +0100 Subject: arm64: unwind: disregard frame.sp when validating frame pointer Currently, when unwinding the call stack, we validate the frame pointer of each frame against frame.sp, whose value is not clearly defined, and which makes it more difficult to link stack frames together across different stacks. It is far better to simply check whether the frame pointer itself points into a valid stack. Signed-off-by: Ard Biesheuvel Signed-off-by: Mark Rutland Cc: Catalin Marinas Cc: James Morse Cc: Will Deacon --- arch/arm64/include/asm/irq.h | 10 +++++++++- arch/arm64/kernel/stacktrace.c | 24 +++++++----------------- 2 files changed, 16 insertions(+), 18 deletions(-) (limited to 'arch/arm64') diff --git a/arch/arm64/include/asm/irq.h b/arch/arm64/include/asm/irq.h index 6d6f85e4923e..8155e486ce48 100644 --- a/arch/arm64/include/asm/irq.h +++ b/arch/arm64/include/asm/irq.h @@ -7,6 +7,7 @@ #ifndef __ASSEMBLER__ #include +#include #include #include @@ -49,12 +50,19 @@ static inline int nr_legacy_irqs(void) static inline bool on_irq_stack(unsigned long sp) { - /* variable names the same as kernel/stacktrace.c */ unsigned long low = (unsigned long)raw_cpu_ptr(irq_stack); unsigned long high = low + IRQ_STACK_START_SP; return (low <= sp && sp <= high); } +static inline bool on_task_stack(struct task_struct *tsk, unsigned long sp) +{ + unsigned long low = (unsigned long)task_stack_page(tsk); + unsigned long high = low + THREAD_SIZE; + + return (low <= sp && sp < high); +} + #endif /* !__ASSEMBLER__ */ #endif diff --git a/arch/arm64/kernel/stacktrace.c b/arch/arm64/kernel/stacktrace.c index 6ffb965be641..beaf51fb3088 100644 --- a/arch/arm64/kernel/stacktrace.c +++ b/arch/arm64/kernel/stacktrace.c @@ -42,9 +42,10 @@ */ int notrace unwind_frame(struct task_struct *tsk, struct stackframe *frame) { - unsigned long high, low; unsigned long fp = frame->fp; - unsigned long irq_stack_ptr; + + if (fp & 0xf) + return -EINVAL; if (!tsk) tsk = current; @@ -53,19 +54,8 @@ int notrace unwind_frame(struct task_struct *tsk, struct stackframe *frame) * Switching between stacks is valid when tracing current and in * non-preemptible context. */ - if (tsk == current && !preemptible()) - irq_stack_ptr = IRQ_STACK_PTR(); - else - irq_stack_ptr = 0; - - low = frame->sp; - /* irq stacks are not THREAD_SIZE aligned */ - if (on_irq_stack(frame->sp)) - high = irq_stack_ptr; - else - high = ALIGN(low, THREAD_SIZE) - 0x20; - - if (fp < low || fp > high || fp & 0xf) + if (!(tsk == current && !preemptible() && on_irq_stack(fp)) && + !on_task_stack(tsk, fp)) return -EINVAL; frame->sp = fp + 0x10; @@ -94,9 +84,9 @@ int notrace unwind_frame(struct task_struct *tsk, struct stackframe *frame) * Check the frame->fp we read from the bottom of the irq_stack, * and the original task stack pointer are both in current->stack. */ - if (frame->sp == irq_stack_ptr) { + if (frame->sp == IRQ_STACK_PTR()) { struct pt_regs *irq_args; - unsigned long orig_sp = IRQ_STACK_TO_TASK_STACK(irq_stack_ptr); + unsigned long orig_sp = IRQ_STACK_TO_TASK_STACK(frame->sp); if (object_is_on_stack((void *)orig_sp) && object_is_on_stack((void *)frame->fp)) { -- cgit v1.2.3 From 6c833bb9247ed51028279ef7b82ebbbe60d789e3 Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Tue, 8 Aug 2017 16:58:33 +0100 Subject: arm64: perf: Allow standard PMUv3 events to be extended by the CPU type Rather than continue adding CPU-specific event maps, instead look up by default in the PMUv3 event map and only fallback to the CPU-specific maps if either the event isn't described by PMUv3, or it is described but the PMCEID registers say that it is unsupported by the current CPU. Signed-off-by: Will Deacon --- arch/arm64/kernel/perf_event.c | 46 ++++++++++++++++++++++++------------------ drivers/perf/arm_pmu.c | 6 ++++++ 2 files changed, 32 insertions(+), 20 deletions(-) (limited to 'arch/arm64') diff --git a/arch/arm64/kernel/perf_event.c b/arch/arm64/kernel/perf_event.c index 372317667773..b83f986e7fbf 100644 --- a/arch/arm64/kernel/perf_event.c +++ b/arch/arm64/kernel/perf_event.c @@ -921,7 +921,13 @@ static void armv8pmu_reset(void *info) ARMV8_PMU_PMCR_LC); } -static int armv8_pmuv3_map_event(struct perf_event *event) +static int __armv8_pmuv3_map_event(struct perf_event *event, + const unsigned (*extra_event_map) + [PERF_COUNT_HW_MAX], + const unsigned (*extra_cache_map) + [PERF_COUNT_HW_CACHE_MAX] + [PERF_COUNT_HW_CACHE_OP_MAX] + [PERF_COUNT_HW_CACHE_RESULT_MAX]) { int hw_event_id; struct arm_pmu *armpmu = to_arm_pmu(event->pmu); @@ -929,44 +935,44 @@ static int armv8_pmuv3_map_event(struct perf_event *event) hw_event_id = armpmu_map_event(event, &armv8_pmuv3_perf_map, &armv8_pmuv3_perf_cache_map, ARMV8_PMU_EVTYPE_EVENT); - if (hw_event_id < 0) - return hw_event_id; - /* disable micro/arch events not supported by this PMU */ - if ((hw_event_id < ARMV8_PMUV3_MAX_COMMON_EVENTS) && - !test_bit(hw_event_id, armpmu->pmceid_bitmap)) { - return -EOPNOTSUPP; + /* Onl expose micro/arch events supported by this PMU */ + if ((hw_event_id > 0) && (hw_event_id < ARMV8_PMUV3_MAX_COMMON_EVENTS) + && test_bit(hw_event_id, armpmu->pmceid_bitmap)) { + return hw_event_id; } - return hw_event_id; + return armpmu_map_event(event, extra_event_map, extra_cache_map, + ARMV8_PMU_EVTYPE_EVENT); +} + +static int armv8_pmuv3_map_event(struct perf_event *event) +{ + return __armv8_pmuv3_map_event(event, NULL, NULL); } static int armv8_a53_map_event(struct perf_event *event) { - return armpmu_map_event(event, &armv8_a53_perf_map, - &armv8_a53_perf_cache_map, - ARMV8_PMU_EVTYPE_EVENT); + return __armv8_pmuv3_map_event(event, &armv8_a53_perf_map, + &armv8_a53_perf_cache_map); } static int armv8_a57_map_event(struct perf_event *event) { - return armpmu_map_event(event, &armv8_a57_perf_map, - &armv8_a57_perf_cache_map, - ARMV8_PMU_EVTYPE_EVENT); + return __armv8_pmuv3_map_event(event, &armv8_a57_perf_map, + &armv8_a57_perf_cache_map); } static int armv8_thunder_map_event(struct perf_event *event) { - return armpmu_map_event(event, &armv8_thunder_perf_map, - &armv8_thunder_perf_cache_map, - ARMV8_PMU_EVTYPE_EVENT); + return __armv8_pmuv3_map_event(event, &armv8_thunder_perf_map, + &armv8_thunder_perf_cache_map); } static int armv8_vulcan_map_event(struct perf_event *event) { - return armpmu_map_event(event, &armv8_vulcan_perf_map, - &armv8_vulcan_perf_cache_map, - ARMV8_PMU_EVTYPE_EVENT); + return __armv8_pmuv3_map_event(event, &armv8_vulcan_perf_map, + &armv8_vulcan_perf_cache_map); } struct armv8pmu_probe_info { diff --git a/drivers/perf/arm_pmu.c b/drivers/perf/arm_pmu.c index 1c5e0f333779..d14fc2e67f93 100644 --- a/drivers/perf/arm_pmu.c +++ b/drivers/perf/arm_pmu.c @@ -47,6 +47,9 @@ armpmu_map_cache_event(const unsigned (*cache_map) if (cache_result >= PERF_COUNT_HW_CACHE_RESULT_MAX) return -EINVAL; + if (!cache_map) + return -ENOENT; + ret = (int)(*cache_map)[cache_type][cache_op][cache_result]; if (ret == CACHE_OP_UNSUPPORTED) @@ -63,6 +66,9 @@ armpmu_map_hw_event(const unsigned (*event_map)[PERF_COUNT_HW_MAX], u64 config) if (config >= PERF_COUNT_HW_MAX) return -EINVAL; + if (!event_map) + return -ENOENT; + mapping = (*event_map)[config]; return mapping == HW_OP_UNSUPPORTED ? -ENOENT : mapping; } -- cgit v1.2.3 From 09c2a7dc4ca2755892b74858fe3bf62b652ff9d0 Mon Sep 17 00:00:00 2001 From: Robin Murphy Date: Tue, 25 Jul 2017 11:55:38 +0100 Subject: arm64: mm: Fix set_memory_valid() declaration Clearly, set_memory_valid() has never been seen in the same room as its declaration... Whilst the type mismatch is such that kexec probably wasn't broken in practice, fix it to match the definition as it should. Fixes: 9b0aa14e3155 ("arm64: mm: add set_memory_valid()") Reviewed-by: Will Deacon Signed-off-by: Robin Murphy Signed-off-by: Catalin Marinas --- arch/arm64/include/asm/cacheflush.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/arm64') diff --git a/arch/arm64/include/asm/cacheflush.h b/arch/arm64/include/asm/cacheflush.h index d74a284abdc2..4d4f650c290e 100644 --- a/arch/arm64/include/asm/cacheflush.h +++ b/arch/arm64/include/asm/cacheflush.h @@ -150,6 +150,6 @@ static inline void flush_cache_vunmap(unsigned long start, unsigned long end) { } -int set_memory_valid(unsigned long addr, unsigned long size, int enable); +int set_memory_valid(unsigned long addr, int numpages, int enable); #endif -- cgit v1.2.3 From d46befef4c03fb61a62b3319ff5265aaac7bc465 Mon Sep 17 00:00:00 2001 From: Robin Murphy Date: Tue, 25 Jul 2017 11:55:39 +0100 Subject: arm64: Convert __inval_cache_range() to area-based __inval_cache_range() is already the odd one out among our data cache maintenance routines as the only remaining range-based one; as we're going to want an invalidation routine to call from C code for the pmem API, let's tweak the prototype and name to bring it in line with the clean operations, and to make its relationship with __dma_inv_area() neatly mirror that of __clean_dcache_area_poc() and __dma_clean_area(). The loop clearing the early page tables gets mildly massaged in the process for the sake of consistency. Reviewed-by: Will Deacon Signed-off-by: Robin Murphy Signed-off-by: Catalin Marinas --- arch/arm64/include/asm/cacheflush.h | 1 + arch/arm64/kernel/head.S | 18 +++++++++--------- arch/arm64/mm/cache.S | 23 ++++++++++++++--------- 3 files changed, 24 insertions(+), 18 deletions(-) (limited to 'arch/arm64') diff --git a/arch/arm64/include/asm/cacheflush.h b/arch/arm64/include/asm/cacheflush.h index 4d4f650c290e..b4b43a94dffd 100644 --- a/arch/arm64/include/asm/cacheflush.h +++ b/arch/arm64/include/asm/cacheflush.h @@ -67,6 +67,7 @@ */ extern void flush_icache_range(unsigned long start, unsigned long end); extern void __flush_dcache_area(void *addr, size_t len); +extern void __inval_dcache_area(void *addr, size_t len); extern void __clean_dcache_area_poc(void *addr, size_t len); extern void __clean_dcache_area_pou(void *addr, size_t len); extern long __flush_cache_user_range(unsigned long start, unsigned long end); diff --git a/arch/arm64/kernel/head.S b/arch/arm64/kernel/head.S index 973df7de7bf8..73a0531e0187 100644 --- a/arch/arm64/kernel/head.S +++ b/arch/arm64/kernel/head.S @@ -143,8 +143,8 @@ preserve_boot_args: dmb sy // needed before dc ivac with // MMU off - add x1, x0, #0x20 // 4 x 8 bytes - b __inval_cache_range // tail call + mov x1, #0x20 // 4 x 8 bytes + b __inval_dcache_area // tail call ENDPROC(preserve_boot_args) /* @@ -221,20 +221,20 @@ __create_page_tables: * dirty cache lines being evicted. */ adrp x0, idmap_pg_dir - adrp x1, swapper_pg_dir + SWAPPER_DIR_SIZE + RESERVED_TTBR0_SIZE - bl __inval_cache_range + ldr x1, =(IDMAP_DIR_SIZE + SWAPPER_DIR_SIZE + RESERVED_TTBR0_SIZE) + bl __inval_dcache_area /* * Clear the idmap and swapper page tables. */ adrp x0, idmap_pg_dir - adrp x6, swapper_pg_dir + SWAPPER_DIR_SIZE + RESERVED_TTBR0_SIZE + ldr x1, =(IDMAP_DIR_SIZE + SWAPPER_DIR_SIZE + RESERVED_TTBR0_SIZE) 1: stp xzr, xzr, [x0], #16 stp xzr, xzr, [x0], #16 stp xzr, xzr, [x0], #16 stp xzr, xzr, [x0], #16 - cmp x0, x6 - b.lo 1b + subs x1, x1, #64 + b.ne 1b mov x7, SWAPPER_MM_MMUFLAGS @@ -307,9 +307,9 @@ __create_page_tables: * tables again to remove any speculatively loaded cache lines. */ adrp x0, idmap_pg_dir - adrp x1, swapper_pg_dir + SWAPPER_DIR_SIZE + RESERVED_TTBR0_SIZE + ldr x1, =(IDMAP_DIR_SIZE + SWAPPER_DIR_SIZE + RESERVED_TTBR0_SIZE) dmb sy - bl __inval_cache_range + bl __inval_dcache_area ret x28 ENDPROC(__create_page_tables) diff --git a/arch/arm64/mm/cache.S b/arch/arm64/mm/cache.S index 83c27b6e6dca..ed47fbbb4b05 100644 --- a/arch/arm64/mm/cache.S +++ b/arch/arm64/mm/cache.S @@ -109,20 +109,25 @@ ENTRY(__clean_dcache_area_pou) ENDPROC(__clean_dcache_area_pou) /* - * __dma_inv_area(start, size) - * - start - virtual start address of region + * __inval_dcache_area(kaddr, size) + * + * Ensure that any D-cache lines for the interval [kaddr, kaddr+size) + * are invalidated. Any partial lines at the ends of the interval are + * also cleaned to PoC to prevent data loss. + * + * - kaddr - kernel address * - size - size in question */ -__dma_inv_area: - add x1, x1, x0 +ENTRY(__inval_dcache_area) /* FALLTHROUGH */ /* - * __inval_cache_range(start, end) - * - start - start address of region - * - end - end address of region + * __dma_inv_area(start, size) + * - start - virtual start address of region + * - size - size in question */ -ENTRY(__inval_cache_range) +__dma_inv_area: + add x1, x1, x0 dcache_line_size x2, x3 sub x3, x2, #1 tst x1, x3 // end cache line aligned? @@ -140,7 +145,7 @@ ENTRY(__inval_cache_range) b.lo 2b dsb sy ret -ENDPIPROC(__inval_cache_range) +ENDPIPROC(__inval_dcache_area) ENDPROC(__dma_inv_area) /* -- cgit v1.2.3 From 7aac405ebb3224037efd56b73d82d181111cdac3 Mon Sep 17 00:00:00 2001 From: Robin Murphy Date: Tue, 25 Jul 2017 11:55:40 +0100 Subject: arm64: Expose DC CVAP to userspace The ARMv8.2-DCPoP feature introduces persistent memory support to the architecture, by defining a point of persistence in the memory hierarchy, and a corresponding cache maintenance operation, DC CVAP. Expose the support via HWCAP and MRS emulation. Reviewed-by: Will Deacon Signed-off-by: Robin Murphy Signed-off-by: Catalin Marinas --- Documentation/arm64/cpu-feature-registers.txt | 2 ++ arch/arm64/include/asm/sysreg.h | 1 + arch/arm64/include/uapi/asm/hwcap.h | 1 + arch/arm64/kernel/cpufeature.c | 2 ++ arch/arm64/kernel/cpuinfo.c | 1 + 5 files changed, 7 insertions(+) (limited to 'arch/arm64') diff --git a/Documentation/arm64/cpu-feature-registers.txt b/Documentation/arm64/cpu-feature-registers.txt index d1c97f9f51cc..dad411d635d8 100644 --- a/Documentation/arm64/cpu-feature-registers.txt +++ b/Documentation/arm64/cpu-feature-registers.txt @@ -179,6 +179,8 @@ infrastructure: | FCMA | [19-16] | y | |--------------------------------------------------| | JSCVT | [15-12] | y | + |--------------------------------------------------| + | DPB | [3-0] | y | x--------------------------------------------------x Appendix I: Example diff --git a/arch/arm64/include/asm/sysreg.h b/arch/arm64/include/asm/sysreg.h index 248339e4aaf5..f707fed5886f 100644 --- a/arch/arm64/include/asm/sysreg.h +++ b/arch/arm64/include/asm/sysreg.h @@ -329,6 +329,7 @@ #define ID_AA64ISAR1_LRCPC_SHIFT 20 #define ID_AA64ISAR1_FCMA_SHIFT 16 #define ID_AA64ISAR1_JSCVT_SHIFT 12 +#define ID_AA64ISAR1_DPB_SHIFT 0 /* id_aa64pfr0 */ #define ID_AA64PFR0_GIC_SHIFT 24 diff --git a/arch/arm64/include/uapi/asm/hwcap.h b/arch/arm64/include/uapi/asm/hwcap.h index 4e187ce2a811..4b9344cba83a 100644 --- a/arch/arm64/include/uapi/asm/hwcap.h +++ b/arch/arm64/include/uapi/asm/hwcap.h @@ -35,5 +35,6 @@ #define HWCAP_JSCVT (1 << 13) #define HWCAP_FCMA (1 << 14) #define HWCAP_LRCPC (1 << 15) +#define HWCAP_DCPOP (1 << 16) #endif /* _UAPI__ASM_HWCAP_H */ diff --git a/arch/arm64/kernel/cpufeature.c b/arch/arm64/kernel/cpufeature.c index 9f9e0064c8c1..a2542ef3ff25 100644 --- a/arch/arm64/kernel/cpufeature.c +++ b/arch/arm64/kernel/cpufeature.c @@ -120,6 +120,7 @@ static const struct arm64_ftr_bits ftr_id_aa64isar1[] = { ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_EXACT, ID_AA64ISAR1_LRCPC_SHIFT, 4, 0), ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_EXACT, ID_AA64ISAR1_FCMA_SHIFT, 4, 0), ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_EXACT, ID_AA64ISAR1_JSCVT_SHIFT, 4, 0), + ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_EXACT, ID_AA64ISAR1_DPB_SHIFT, 4, 0), ARM64_FTR_END, }; @@ -916,6 +917,7 @@ static const struct arm64_cpu_capabilities arm64_elf_hwcaps[] = { HWCAP_CAP(SYS_ID_AA64PFR0_EL1, ID_AA64PFR0_FP_SHIFT, FTR_SIGNED, 1, CAP_HWCAP, HWCAP_FPHP), HWCAP_CAP(SYS_ID_AA64PFR0_EL1, ID_AA64PFR0_ASIMD_SHIFT, FTR_SIGNED, 0, CAP_HWCAP, HWCAP_ASIMD), HWCAP_CAP(SYS_ID_AA64PFR0_EL1, ID_AA64PFR0_ASIMD_SHIFT, FTR_SIGNED, 1, CAP_HWCAP, HWCAP_ASIMDHP), + HWCAP_CAP(SYS_ID_AA64ISAR1_EL1, ID_AA64ISAR1_DPB_SHIFT, FTR_UNSIGNED, 1, CAP_HWCAP, HWCAP_DCPOP), HWCAP_CAP(SYS_ID_AA64ISAR1_EL1, ID_AA64ISAR1_JSCVT_SHIFT, FTR_UNSIGNED, 1, CAP_HWCAP, HWCAP_JSCVT), HWCAP_CAP(SYS_ID_AA64ISAR1_EL1, ID_AA64ISAR1_FCMA_SHIFT, FTR_UNSIGNED, 1, CAP_HWCAP, HWCAP_FCMA), HWCAP_CAP(SYS_ID_AA64ISAR1_EL1, ID_AA64ISAR1_LRCPC_SHIFT, FTR_UNSIGNED, 1, CAP_HWCAP, HWCAP_LRCPC), diff --git a/arch/arm64/kernel/cpuinfo.c b/arch/arm64/kernel/cpuinfo.c index f495ee5049fd..311885962830 100644 --- a/arch/arm64/kernel/cpuinfo.c +++ b/arch/arm64/kernel/cpuinfo.c @@ -68,6 +68,7 @@ static const char *const hwcap_str[] = { "jscvt", "fcma", "lrcpc", + "dcpop", NULL }; -- cgit v1.2.3 From e1bc5d1b8e0547c258e65dd97a03560f4d69e635 Mon Sep 17 00:00:00 2001 From: Robin Murphy Date: Tue, 25 Jul 2017 11:55:41 +0100 Subject: arm64: Handle trapped DC CVAP Cache clean to PoP is subject to the same access controls as to PoC, so if we are trapping userspace cache maintenance with SCTLR_EL1.UCI, we need to be prepared to handle it. To avoid getting into complicated fights with binutils about ARMv8.2 options, we'll just cheat and use the raw SYS instruction rather than the 'proper' DC alias. Reviewed-by: Will Deacon Signed-off-by: Robin Murphy Signed-off-by: Catalin Marinas --- arch/arm64/include/asm/esr.h | 3 ++- arch/arm64/kernel/traps.c | 3 +++ 2 files changed, 5 insertions(+), 1 deletion(-) (limited to 'arch/arm64') diff --git a/arch/arm64/include/asm/esr.h b/arch/arm64/include/asm/esr.h index 130b5343ba6d..66ed8b6b9976 100644 --- a/arch/arm64/include/asm/esr.h +++ b/arch/arm64/include/asm/esr.h @@ -169,9 +169,10 @@ /* * User space cache operations have the following sysreg encoding * in System instructions. - * op0=1, op1=3, op2=1, crn=7, crm={ 5, 10, 11, 14 }, WRITE (L=0) + * op0=1, op1=3, op2=1, crn=7, crm={ 5, 10, 11, 12, 14 }, WRITE (L=0) */ #define ESR_ELx_SYS64_ISS_CRM_DC_CIVAC 14 +#define ESR_ELx_SYS64_ISS_CRM_DC_CVAP 12 #define ESR_ELx_SYS64_ISS_CRM_DC_CVAU 11 #define ESR_ELx_SYS64_ISS_CRM_DC_CVAC 10 #define ESR_ELx_SYS64_ISS_CRM_IC_IVAU 5 diff --git a/arch/arm64/kernel/traps.c b/arch/arm64/kernel/traps.c index 0f047e916cee..ccb9727d67b2 100644 --- a/arch/arm64/kernel/traps.c +++ b/arch/arm64/kernel/traps.c @@ -484,6 +484,9 @@ static void user_cache_maint_handler(unsigned int esr, struct pt_regs *regs) case ESR_ELx_SYS64_ISS_CRM_DC_CVAC: /* DC CVAC, gets promoted */ __user_cache_maint("dc civac", address, ret); break; + case ESR_ELx_SYS64_ISS_CRM_DC_CVAP: /* DC CVAP */ + __user_cache_maint("sys 3, c7, c12, 1", address, ret); + break; case ESR_ELx_SYS64_ISS_CRM_DC_CIVAC: /* DC CIVAC */ __user_cache_maint("dc civac", address, ret); break; -- cgit v1.2.3 From d50e071fdaa33c1b399c764c44fa1ce879881185 Mon Sep 17 00:00:00 2001 From: Robin Murphy Date: Tue, 25 Jul 2017 11:55:42 +0100 Subject: arm64: Implement pmem API support Add a clean-to-point-of-persistence cache maintenance helper, and wire up the basic architectural support for the pmem driver based on it. Reviewed-by: Will Deacon Signed-off-by: Robin Murphy [catalin.marinas@arm.com: move arch_*_pmem() functions to arch/arm64/mm/flush.c] [catalin.marinas@arm.com: change dmb(sy) to dmb(osh)] Signed-off-by: Catalin Marinas --- arch/arm64/Kconfig | 11 +++++++++++ arch/arm64/include/asm/assembler.h | 6 ++++++ arch/arm64/include/asm/cacheflush.h | 1 + arch/arm64/include/asm/cpucaps.h | 3 ++- arch/arm64/kernel/cpufeature.c | 11 +++++++++++ arch/arm64/mm/cache.S | 14 ++++++++++++++ arch/arm64/mm/flush.c | 16 ++++++++++++++++ 7 files changed, 61 insertions(+), 1 deletion(-) (limited to 'arch/arm64') diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig index dfd908630631..0b0576a54724 100644 --- a/arch/arm64/Kconfig +++ b/arch/arm64/Kconfig @@ -960,6 +960,17 @@ config ARM64_UAO regular load/store instructions if the cpu does not implement the feature. +config ARM64_PMEM + bool "Enable support for persistent memory" + select ARCH_HAS_PMEM_API + help + Say Y to enable support for the persistent memory API based on the + ARMv8.2 DCPoP feature. + + The feature is detected at runtime, and the kernel will use DC CVAC + operations if DC CVAP is not supported (following the behaviour of + DC CVAP itself if the system does not define a point of persistence). + endmenu config ARM64_MODULE_CMODEL_LARGE diff --git a/arch/arm64/include/asm/assembler.h b/arch/arm64/include/asm/assembler.h index 1b67c3782d00..5d8903c45031 100644 --- a/arch/arm64/include/asm/assembler.h +++ b/arch/arm64/include/asm/assembler.h @@ -352,6 +352,12 @@ alternative_if_not ARM64_WORKAROUND_CLEAN_CACHE dc \op, \kaddr alternative_else dc civac, \kaddr +alternative_endif + .elseif (\op == cvap) +alternative_if ARM64_HAS_DCPOP + sys 3, c7, c12, 1, \kaddr // dc cvap +alternative_else + dc cvac, \kaddr alternative_endif .else dc \op, \kaddr diff --git a/arch/arm64/include/asm/cacheflush.h b/arch/arm64/include/asm/cacheflush.h index b4b43a94dffd..76d1cc85d5b1 100644 --- a/arch/arm64/include/asm/cacheflush.h +++ b/arch/arm64/include/asm/cacheflush.h @@ -69,6 +69,7 @@ extern void flush_icache_range(unsigned long start, unsigned long end); extern void __flush_dcache_area(void *addr, size_t len); extern void __inval_dcache_area(void *addr, size_t len); extern void __clean_dcache_area_poc(void *addr, size_t len); +extern void __clean_dcache_area_pop(void *addr, size_t len); extern void __clean_dcache_area_pou(void *addr, size_t len); extern long __flush_cache_user_range(unsigned long start, unsigned long end); extern void sync_icache_aliases(void *kaddr, unsigned long len); diff --git a/arch/arm64/include/asm/cpucaps.h b/arch/arm64/include/asm/cpucaps.h index 8d2272c6822c..8da621627d7c 100644 --- a/arch/arm64/include/asm/cpucaps.h +++ b/arch/arm64/include/asm/cpucaps.h @@ -39,7 +39,8 @@ #define ARM64_WORKAROUND_QCOM_FALKOR_E1003 18 #define ARM64_WORKAROUND_858921 19 #define ARM64_WORKAROUND_CAVIUM_30115 20 +#define ARM64_HAS_DCPOP 21 -#define ARM64_NCAPS 21 +#define ARM64_NCAPS 22 #endif /* __ASM_CPUCAPS_H */ diff --git a/arch/arm64/kernel/cpufeature.c b/arch/arm64/kernel/cpufeature.c index a2542ef3ff25..cd52d365d1f0 100644 --- a/arch/arm64/kernel/cpufeature.c +++ b/arch/arm64/kernel/cpufeature.c @@ -889,6 +889,17 @@ static const struct arm64_cpu_capabilities arm64_features[] = { .min_field_value = 0, .matches = has_no_fpsimd, }, +#ifdef CONFIG_ARM64_PMEM + { + .desc = "Data cache clean to Point of Persistence", + .capability = ARM64_HAS_DCPOP, + .def_scope = SCOPE_SYSTEM, + .matches = has_cpuid_feature, + .sys_reg = SYS_ID_AA64ISAR1_EL1, + .field_pos = ID_AA64ISAR1_DPB_SHIFT, + .min_field_value = 1, + }, +#endif {}, }; diff --git a/arch/arm64/mm/cache.S b/arch/arm64/mm/cache.S index ed47fbbb4b05..7f1dbe962cf5 100644 --- a/arch/arm64/mm/cache.S +++ b/arch/arm64/mm/cache.S @@ -171,6 +171,20 @@ __dma_clean_area: ENDPIPROC(__clean_dcache_area_poc) ENDPROC(__dma_clean_area) +/* + * __clean_dcache_area_pop(kaddr, size) + * + * Ensure that any D-cache lines for the interval [kaddr, kaddr+size) + * are cleaned to the PoP. + * + * - kaddr - kernel address + * - size - size in question + */ +ENTRY(__clean_dcache_area_pop) + dcache_by_line_op cvap, sy, x0, x1, x2, x3 + ret +ENDPIPROC(__clean_dcache_area_pop) + /* * __dma_flush_area(start, size) * diff --git a/arch/arm64/mm/flush.c b/arch/arm64/mm/flush.c index 21a8d828cbf4..280f90ff33a2 100644 --- a/arch/arm64/mm/flush.c +++ b/arch/arm64/mm/flush.c @@ -83,3 +83,19 @@ EXPORT_SYMBOL(flush_dcache_page); * Additional functions defined in assembly. */ EXPORT_SYMBOL(flush_icache_range); + +#ifdef CONFIG_ARCH_HAS_PMEM_API +static inline void arch_wb_cache_pmem(void *addr, size_t size) +{ + /* Ensure order against any prior non-cacheable writes */ + dmb(osh); + __clean_dcache_area_pop(addr, size); +} +EXPORT_SYMBOL_GPL(arch_wb_cache_pmem); + +static inline void arch_invalidate_pmem(void *addr, size_t size) +{ + __inval_dcache_area(addr, size); +} +EXPORT_SYMBOL_GPL(arch_invalidate_pmem); +#endif -- cgit v1.2.3 From 5d7bdeb1eeb250222304cb7b8126892cc47980a8 Mon Sep 17 00:00:00 2001 From: Robin Murphy Date: Tue, 25 Jul 2017 11:55:43 +0100 Subject: arm64: uaccess: Implement *_flushcache variants Implement the set of copy functions with guarantees of a clean cache upon completion necessary to support the pmem driver. Reviewed-by: Will Deacon Signed-off-by: Robin Murphy Signed-off-by: Catalin Marinas --- arch/arm64/Kconfig | 1 + arch/arm64/include/asm/string.h | 4 ++++ arch/arm64/include/asm/uaccess.h | 12 ++++++++++++ arch/arm64/lib/Makefile | 2 ++ 4 files changed, 19 insertions(+) (limited to 'arch/arm64') diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig index 0b0576a54724..e43a63b3d14b 100644 --- a/arch/arm64/Kconfig +++ b/arch/arm64/Kconfig @@ -963,6 +963,7 @@ config ARM64_UAO config ARM64_PMEM bool "Enable support for persistent memory" select ARCH_HAS_PMEM_API + select ARCH_HAS_UACCESS_FLUSHCACHE help Say Y to enable support for the persistent memory API based on the ARMv8.2 DCPoP feature. diff --git a/arch/arm64/include/asm/string.h b/arch/arm64/include/asm/string.h index d0aa42907569..dd95d33a5bd5 100644 --- a/arch/arm64/include/asm/string.h +++ b/arch/arm64/include/asm/string.h @@ -52,6 +52,10 @@ extern void *__memset(void *, int, __kernel_size_t); #define __HAVE_ARCH_MEMCMP extern int memcmp(const void *, const void *, size_t); +#ifdef CONFIG_ARCH_HAS_UACCESS_FLUSHCACHE +#define __HAVE_ARCH_MEMCPY_FLUSHCACHE +void memcpy_flushcache(void *dst, const void *src, size_t cnt); +#endif #if defined(CONFIG_KASAN) && !defined(__SANITIZE_ADDRESS__) diff --git a/arch/arm64/include/asm/uaccess.h b/arch/arm64/include/asm/uaccess.h index fab46a0ea223..bf8435deb8a1 100644 --- a/arch/arm64/include/asm/uaccess.h +++ b/arch/arm64/include/asm/uaccess.h @@ -347,4 +347,16 @@ extern long strncpy_from_user(char *dest, const char __user *src, long count); extern __must_check long strnlen_user(const char __user *str, long n); +#ifdef CONFIG_ARCH_HAS_UACCESS_FLUSHCACHE +struct page; +void memcpy_page_flushcache(char *to, struct page *page, size_t offset, size_t len); +extern unsigned long __must_check __copy_user_flushcache(void *to, const void __user *from, unsigned long n); + +static inline int __copy_from_user_flushcache(void *dst, const void __user *src, unsigned size) +{ + kasan_check_write(dst, size); + return __copy_user_flushcache(dst, src, size); +} +#endif + #endif /* __ASM_UACCESS_H */ diff --git a/arch/arm64/lib/Makefile b/arch/arm64/lib/Makefile index c86b7909ef31..a0abc142c92b 100644 --- a/arch/arm64/lib/Makefile +++ b/arch/arm64/lib/Makefile @@ -17,3 +17,5 @@ CFLAGS_atomic_ll_sc.o := -fcall-used-x0 -ffixed-x1 -ffixed-x2 \ -fcall-saved-x10 -fcall-saved-x11 -fcall-saved-x12 \ -fcall-saved-x13 -fcall-saved-x14 -fcall-saved-x15 \ -fcall-saved-x18 + +lib-$(CONFIG_ARCH_HAS_UACCESS_FLUSHCACHE) += uaccess_flushcache.o -- cgit v1.2.3 From 739586951b8abe381a98797a5e27a0a9336333d6 Mon Sep 17 00:00:00 2001 From: Dmitry Safonov Date: Wed, 26 Jul 2017 20:07:37 +0300 Subject: arm64/vdso: Support mremap() for vDSO vDSO VMA address is saved in mm_context for the purpose of using restorer from vDSO page to return to userspace after signal handling. In Checkpoint Restore in Userspace (CRIU) project we place vDSO VMA on restore back to the place where it was on the dump. With the exception for x86 (where there is API to map vDSO with arch_prctl()), we move vDSO inherited from CRIU task to restoree position by mremap(). CRIU does support arm64 architecture, but kernel doesn't update context.vdso pointer after mremap(). Which results in translation fault after signal handling on restored application: https://github.com/xemul/criu/issues/288 Make vDSO code track the VMA address by supplying .mremap() fops the same way it's done for x86 and arm32 by: commit b059a453b1cf ("x86/vdso: Add mremap hook to vm_special_mapping") commit 280e87e98c09 ("ARM: 8683/1: ARM32: Support mremap() for sigpage/vDSO"). Cc: Russell King Cc: linux-arm-kernel@lists.infradead.org Cc: Cyrill Gorcunov Cc: Pavel Emelyanov Cc: Christopher Covington Reviewed-by: Will Deacon Signed-off-by: Dmitry Safonov Signed-off-by: Catalin Marinas --- arch/arm64/kernel/vdso.c | 15 +++++++++++++++ 1 file changed, 15 insertions(+) (limited to 'arch/arm64') diff --git a/arch/arm64/kernel/vdso.c b/arch/arm64/kernel/vdso.c index e8f759f764f2..2d419006ad43 100644 --- a/arch/arm64/kernel/vdso.c +++ b/arch/arm64/kernel/vdso.c @@ -110,12 +110,27 @@ int aarch32_setup_vectors_page(struct linux_binprm *bprm, int uses_interp) } #endif /* CONFIG_COMPAT */ +static int vdso_mremap(const struct vm_special_mapping *sm, + struct vm_area_struct *new_vma) +{ + unsigned long new_size = new_vma->vm_end - new_vma->vm_start; + unsigned long vdso_size = vdso_end - vdso_start; + + if (vdso_size != new_size) + return -EINVAL; + + current->mm->context.vdso = (void *)new_vma->vm_start; + + return 0; +} + static struct vm_special_mapping vdso_spec[2] __ro_after_init = { { .name = "[vvar]", }, { .name = "[vdso]", + .mremap = vdso_mremap, }, }; -- cgit v1.2.3 From 7326749801396105aef0ed9229df746ac9e24300 Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Sat, 22 Jul 2017 18:45:33 +0100 Subject: arm64: unwind: reference pt_regs via embedded stack frame As it turns out, the unwind code is slightly broken, and probably has been for a while. The problem is in the dumping of the exception stack, which is intended to dump the contents of the pt_regs struct at each level in the call stack where an exception was taken and routed to a routine marked as __exception (which means its stack frame is right below the pt_regs struct on the stack). 'Right below the pt_regs struct' is ill defined, though: the unwind code assigns 'frame pointer + 0x10' to the .sp member of the stackframe struct at each level, and dump_backtrace() happily dereferences that as the pt_regs pointer when encountering an __exception routine. However, the actual size of the stack frame created by this routine (which could be one of many __exception routines we have in the kernel) is not known, and so frame.sp is pretty useless to figure out where struct pt_regs really is. So it seems the only way to ensure that we can find our struct pt_regs when walking the stack frames is to put it at a known fixed offset of the stack frame pointer that is passed to such __exception routines. The simplest way to do that is to put it inside pt_regs itself, which is the main change implemented by this patch. As a bonus, doing this allows us to get rid of a fair amount of cruft related to walking from one stack to the other, which is especially nice since we intend to introduce yet another stack for overflow handling once we add support for vmapped stacks. It also fixes an inconsistency where we only add a stack frame pointing to ELR_EL1 if we are executing from the IRQ stack but not when we are executing from the task stack. To consistly identify exceptions regs even in the presence of exceptions taken from entry code, we must check whether the next frame was created by entry text, rather than whether the current frame was crated by exception text. To avoid backtracing using PCs that fall in the idmap, or are controlled by userspace, we must explcitly zero the FP and LR in startup paths, and must ensure that the frame embedded in pt_regs is zeroed upon entry from EL0. To avoid these NULL entries showin in the backtrace, unwind_frame() is updated to avoid them. Signed-off-by: Ard Biesheuvel [Mark: compare current frame against .entry.text, avoid bogus PCs] Signed-off-by: Mark Rutland Cc: Catalin Marinas Cc: James Morse Cc: Will Deacon --- arch/arm64/include/asm/irq.h | 25 ------------------------- arch/arm64/include/asm/ptrace.h | 1 + arch/arm64/include/asm/traps.h | 5 +++++ arch/arm64/kernel/asm-offsets.c | 1 + arch/arm64/kernel/entry.S | 20 ++++++++++++-------- arch/arm64/kernel/head.S | 4 ++++ arch/arm64/kernel/stacktrace.c | 33 ++++++--------------------------- arch/arm64/kernel/traps.c | 32 +++++++------------------------- 8 files changed, 36 insertions(+), 85 deletions(-) (limited to 'arch/arm64') diff --git a/arch/arm64/include/asm/irq.h b/arch/arm64/include/asm/irq.h index 8155e486ce48..8ba89c4ca183 100644 --- a/arch/arm64/include/asm/irq.h +++ b/arch/arm64/include/asm/irq.h @@ -16,31 +16,6 @@ struct pt_regs; DECLARE_PER_CPU(unsigned long [IRQ_STACK_SIZE/sizeof(long)], irq_stack); -/* - * The highest address on the stack, and the first to be used. Used to - * find the dummy-stack frame put down by el?_irq() in entry.S, which - * is structured as follows: - * - * ------------ - * | | <- irq_stack_ptr - * top ------------ - * | x19 | <- irq_stack_ptr - 0x08 - * ------------ - * | x29 | <- irq_stack_ptr - 0x10 - * ------------ - * - * where x19 holds a copy of the task stack pointer where the struct pt_regs - * from kernel_entry can be found. - * - */ -#define IRQ_STACK_PTR() ((unsigned long)raw_cpu_ptr(irq_stack) + IRQ_STACK_START_SP) - -/* - * The offset from irq_stack_ptr where entry.S will store the original - * stack pointer. Used by unwind_frame() and dump_backtrace(). - */ -#define IRQ_STACK_TO_TASK_STACK(ptr) (*((unsigned long *)((ptr) - 0x08))) - extern void set_handle_irq(void (*handle_irq)(struct pt_regs *)); static inline int nr_legacy_irqs(void) diff --git a/arch/arm64/include/asm/ptrace.h b/arch/arm64/include/asm/ptrace.h index 11403fdd0a50..ee72aa979078 100644 --- a/arch/arm64/include/asm/ptrace.h +++ b/arch/arm64/include/asm/ptrace.h @@ -119,6 +119,7 @@ struct pt_regs { u64 syscallno; u64 orig_addr_limit; u64 unused; // maintain 16 byte alignment + u64 stackframe[2]; }; #define MAX_REG_OFFSET offsetof(struct pt_regs, pstate) diff --git a/arch/arm64/include/asm/traps.h b/arch/arm64/include/asm/traps.h index 02e9035b0685..41361684580d 100644 --- a/arch/arm64/include/asm/traps.h +++ b/arch/arm64/include/asm/traps.h @@ -60,4 +60,9 @@ static inline int in_exception_text(unsigned long ptr) return in ? : __in_irqentry_text(ptr); } +static inline int in_entry_text(unsigned long ptr) +{ + return ptr >= (unsigned long)&__entry_text_start && + ptr < (unsigned long)&__entry_text_end; +} #endif diff --git a/arch/arm64/kernel/asm-offsets.c b/arch/arm64/kernel/asm-offsets.c index b3bb7ef97bc8..71bf088f1e4b 100644 --- a/arch/arm64/kernel/asm-offsets.c +++ b/arch/arm64/kernel/asm-offsets.c @@ -75,6 +75,7 @@ int main(void) DEFINE(S_ORIG_X0, offsetof(struct pt_regs, orig_x0)); DEFINE(S_SYSCALLNO, offsetof(struct pt_regs, syscallno)); DEFINE(S_ORIG_ADDR_LIMIT, offsetof(struct pt_regs, orig_addr_limit)); + DEFINE(S_STACKFRAME, offsetof(struct pt_regs, stackframe)); DEFINE(S_FRAME_SIZE, sizeof(struct pt_regs)); BLANK(); DEFINE(MM_CONTEXT_ID, offsetof(struct mm_struct, context.id.counter)); diff --git a/arch/arm64/kernel/entry.S b/arch/arm64/kernel/entry.S index 9e126d3d8b53..612a077ba109 100644 --- a/arch/arm64/kernel/entry.S +++ b/arch/arm64/kernel/entry.S @@ -111,6 +111,18 @@ mrs x23, spsr_el1 stp lr, x21, [sp, #S_LR] + /* + * In order to be able to dump the contents of struct pt_regs at the + * time the exception was taken (in case we attempt to walk the call + * stack later), chain it together with the stack frames. + */ + .if \el == 0 + stp xzr, xzr, [sp, #S_STACKFRAME] + .else + stp x29, x22, [sp, #S_STACKFRAME] + .endif + add x29, sp, #S_STACKFRAME + #ifdef CONFIG_ARM64_SW_TTBR0_PAN /* * Set the TTBR0 PAN bit in SPSR. When the exception is taken from @@ -265,14 +277,6 @@ alternative_else_nop_endif /* switch to the irq stack */ mov sp, x26 - - /* - * Add a dummy stack frame, this non-standard format is fixed up - * by unwind_frame() - */ - stp x29, x19, [sp, #-16]! - mov x29, sp - 9998: .endm diff --git a/arch/arm64/kernel/head.S b/arch/arm64/kernel/head.S index 973df7de7bf8..f9e4aacf4f42 100644 --- a/arch/arm64/kernel/head.S +++ b/arch/arm64/kernel/head.S @@ -362,6 +362,9 @@ __primary_switched: ret // to __primary_switch() 0: #endif + add sp, sp, #16 + mov x29, #0 + mov x30, #0 b start_kernel ENDPROC(__primary_switched) @@ -617,6 +620,7 @@ __secondary_switched: ldr x2, [x0, #CPU_BOOT_TASK] msr sp_el0, x2 mov x29, #0 + mov x30, #0 b secondary_start_kernel ENDPROC(__secondary_switched) diff --git a/arch/arm64/kernel/stacktrace.c b/arch/arm64/kernel/stacktrace.c index beaf51fb3088..81d9262acaf0 100644 --- a/arch/arm64/kernel/stacktrace.c +++ b/arch/arm64/kernel/stacktrace.c @@ -76,34 +76,13 @@ int notrace unwind_frame(struct task_struct *tsk, struct stackframe *frame) #endif /* CONFIG_FUNCTION_GRAPH_TRACER */ /* - * Check whether we are going to walk through from interrupt stack - * to task stack. - * If we reach the end of the stack - and its an interrupt stack, - * unpack the dummy frame to find the original elr. - * - * Check the frame->fp we read from the bottom of the irq_stack, - * and the original task stack pointer are both in current->stack. + * Frames created upon entry from EL0 have NULL FP and PC values, so + * don't bother reporting these. Frames created by __noreturn functions + * might have a valid FP even if PC is bogus, so only terminate where + * both are NULL. */ - if (frame->sp == IRQ_STACK_PTR()) { - struct pt_regs *irq_args; - unsigned long orig_sp = IRQ_STACK_TO_TASK_STACK(frame->sp); - - if (object_is_on_stack((void *)orig_sp) && - object_is_on_stack((void *)frame->fp)) { - frame->sp = orig_sp; - - /* orig_sp is the saved pt_regs, find the elr */ - irq_args = (struct pt_regs *)orig_sp; - frame->pc = irq_args->pc; - } else { - /* - * This frame has a non-standard format, and we - * didn't fix it, because the data looked wrong. - * Refuse to output this frame. - */ - return -EINVAL; - } - } + if (!frame->fp && !frame->pc) + return -EINVAL; return 0; } diff --git a/arch/arm64/kernel/traps.c b/arch/arm64/kernel/traps.c index 5797f5037ec9..075c29a24345 100644 --- a/arch/arm64/kernel/traps.c +++ b/arch/arm64/kernel/traps.c @@ -143,7 +143,6 @@ static void dump_instr(const char *lvl, struct pt_regs *regs) void dump_backtrace(struct pt_regs *regs, struct task_struct *tsk) { struct stackframe frame; - unsigned long irq_stack_ptr; int skip; pr_debug("%s(regs = %p tsk = %p)\n", __func__, regs, tsk); @@ -154,15 +153,6 @@ void dump_backtrace(struct pt_regs *regs, struct task_struct *tsk) if (!try_get_task_stack(tsk)) return; - /* - * Switching between stacks is valid when tracing current and in - * non-preemptible context. - */ - if (tsk == current && !preemptible()) - irq_stack_ptr = IRQ_STACK_PTR(); - else - irq_stack_ptr = 0; - if (tsk == current) { frame.fp = (unsigned long)__builtin_frame_address(0); frame.sp = current_stack_pointer; @@ -182,13 +172,12 @@ void dump_backtrace(struct pt_regs *regs, struct task_struct *tsk) skip = !!regs; printk("Call trace:\n"); while (1) { - unsigned long where = frame.pc; unsigned long stack; int ret; /* skip until specified stack frame */ if (!skip) { - dump_backtrace_entry(where); + dump_backtrace_entry(frame.pc); } else if (frame.fp == regs->regs[29]) { skip = 0; /* @@ -203,20 +192,13 @@ void dump_backtrace(struct pt_regs *regs, struct task_struct *tsk) ret = unwind_frame(tsk, &frame); if (ret < 0) break; - stack = frame.sp; - if (in_exception_text(where)) { - /* - * If we switched to the irq_stack before calling this - * exception handler, then the pt_regs will be on the - * task stack. The easiest way to tell is if the large - * pt_regs would overlap with the end of the irq_stack. - */ - if (stack < irq_stack_ptr && - (stack + sizeof(struct pt_regs)) > irq_stack_ptr) - stack = IRQ_STACK_TO_TASK_STACK(irq_stack_ptr); + if (in_entry_text(frame.pc)) { + stack = frame.fp - offsetof(struct pt_regs, stackframe); - dump_mem("", "Exception stack", stack, - stack + sizeof(struct pt_regs)); + if (on_task_stack(tsk, stack) || + (tsk == current && !preemptible() && on_irq_stack(stack))) + dump_mem("", "Exception stack", stack, + stack + sizeof(struct pt_regs)); } } -- cgit v1.2.3 From 31e43ad3b74a5d7b282023b72f25fc677c14c727 Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Sun, 23 Jul 2017 09:05:38 +0100 Subject: arm64: unwind: remove sp from struct stackframe The unwind code sets the sp member of struct stackframe to 'frame pointer + 0x10' unconditionally, without regard for whether doing so produces a legal value. So let's simply remove it now that we have stopped using it anyway. Signed-off-by: Ard Biesheuvel Signed-off-by: Mark Rutland Cc: Catalin Marinas Cc: James Morse Cc: Will Deacon --- arch/arm64/include/asm/stacktrace.h | 1 - arch/arm64/kernel/perf_callchain.c | 1 - arch/arm64/kernel/process.c | 5 +---- arch/arm64/kernel/return_address.c | 1 - arch/arm64/kernel/stacktrace.c | 4 ---- arch/arm64/kernel/time.c | 1 - arch/arm64/kernel/traps.c | 2 -- 7 files changed, 1 insertion(+), 14 deletions(-) (limited to 'arch/arm64') diff --git a/arch/arm64/include/asm/stacktrace.h b/arch/arm64/include/asm/stacktrace.h index 5b6eafccc5d8..3bebab378c72 100644 --- a/arch/arm64/include/asm/stacktrace.h +++ b/arch/arm64/include/asm/stacktrace.h @@ -20,7 +20,6 @@ struct task_struct; struct stackframe { unsigned long fp; - unsigned long sp; unsigned long pc; #ifdef CONFIG_FUNCTION_GRAPH_TRACER unsigned int graph; diff --git a/arch/arm64/kernel/perf_callchain.c b/arch/arm64/kernel/perf_callchain.c index 713ca824f266..bcafd7dcfe8b 100644 --- a/arch/arm64/kernel/perf_callchain.c +++ b/arch/arm64/kernel/perf_callchain.c @@ -162,7 +162,6 @@ void perf_callchain_kernel(struct perf_callchain_entry_ctx *entry, } frame.fp = regs->regs[29]; - frame.sp = regs->sp; frame.pc = regs->pc; #ifdef CONFIG_FUNCTION_GRAPH_TRACER frame.graph = current->curr_ret_stack; diff --git a/arch/arm64/kernel/process.c b/arch/arm64/kernel/process.c index 659ae8094ed5..85b953dd023a 100644 --- a/arch/arm64/kernel/process.c +++ b/arch/arm64/kernel/process.c @@ -382,15 +382,12 @@ unsigned long get_wchan(struct task_struct *p) return 0; frame.fp = thread_saved_fp(p); - frame.sp = thread_saved_sp(p); frame.pc = thread_saved_pc(p); #ifdef CONFIG_FUNCTION_GRAPH_TRACER frame.graph = p->curr_ret_stack; #endif do { - if (frame.sp < stack_page || - frame.sp >= stack_page + THREAD_SIZE || - unwind_frame(p, &frame)) + if (unwind_frame(p, &frame)) goto out; if (!in_sched_functions(frame.pc)) { ret = frame.pc; diff --git a/arch/arm64/kernel/return_address.c b/arch/arm64/kernel/return_address.c index 12a87f2600f2..933adbc0f654 100644 --- a/arch/arm64/kernel/return_address.c +++ b/arch/arm64/kernel/return_address.c @@ -42,7 +42,6 @@ void *return_address(unsigned int level) data.addr = NULL; frame.fp = (unsigned long)__builtin_frame_address(0); - frame.sp = current_stack_pointer; frame.pc = (unsigned long)return_address; /* dummy */ #ifdef CONFIG_FUNCTION_GRAPH_TRACER frame.graph = current->curr_ret_stack; diff --git a/arch/arm64/kernel/stacktrace.c b/arch/arm64/kernel/stacktrace.c index 81d9262acaf0..35588caad9d0 100644 --- a/arch/arm64/kernel/stacktrace.c +++ b/arch/arm64/kernel/stacktrace.c @@ -58,7 +58,6 @@ int notrace unwind_frame(struct task_struct *tsk, struct stackframe *frame) !on_task_stack(tsk, fp)) return -EINVAL; - frame->sp = fp + 0x10; frame->fp = READ_ONCE_NOCHECK(*(unsigned long *)(fp)); frame->pc = READ_ONCE_NOCHECK(*(unsigned long *)(fp + 8)); @@ -136,7 +135,6 @@ void save_stack_trace_regs(struct pt_regs *regs, struct stack_trace *trace) data.no_sched_functions = 0; frame.fp = regs->regs[29]; - frame.sp = regs->sp; frame.pc = regs->pc; #ifdef CONFIG_FUNCTION_GRAPH_TRACER frame.graph = current->curr_ret_stack; @@ -161,12 +159,10 @@ void save_stack_trace_tsk(struct task_struct *tsk, struct stack_trace *trace) if (tsk != current) { data.no_sched_functions = 1; frame.fp = thread_saved_fp(tsk); - frame.sp = thread_saved_sp(tsk); frame.pc = thread_saved_pc(tsk); } else { data.no_sched_functions = 0; frame.fp = (unsigned long)__builtin_frame_address(0); - frame.sp = current_stack_pointer; frame.pc = (unsigned long)save_stack_trace_tsk; } #ifdef CONFIG_FUNCTION_GRAPH_TRACER diff --git a/arch/arm64/kernel/time.c b/arch/arm64/kernel/time.c index da33c90248e9..a4391280fba9 100644 --- a/arch/arm64/kernel/time.c +++ b/arch/arm64/kernel/time.c @@ -50,7 +50,6 @@ unsigned long profile_pc(struct pt_regs *regs) return regs->pc; frame.fp = regs->regs[29]; - frame.sp = regs->sp; frame.pc = regs->pc; #ifdef CONFIG_FUNCTION_GRAPH_TRACER frame.graph = -1; /* no task info */ diff --git a/arch/arm64/kernel/traps.c b/arch/arm64/kernel/traps.c index 075c29a24345..c2a81bf8827e 100644 --- a/arch/arm64/kernel/traps.c +++ b/arch/arm64/kernel/traps.c @@ -155,14 +155,12 @@ void dump_backtrace(struct pt_regs *regs, struct task_struct *tsk) if (tsk == current) { frame.fp = (unsigned long)__builtin_frame_address(0); - frame.sp = current_stack_pointer; frame.pc = (unsigned long)dump_backtrace; } else { /* * task blocked in __switch_to */ frame.fp = thread_saved_fp(tsk); - frame.sp = thread_saved_sp(tsk); frame.pc = thread_saved_pc(tsk); } #ifdef CONFIG_FUNCTION_GRAPH_TRACER -- cgit v1.2.3 From 66c3ec5a712005625437474cf5a04148d7890350 Mon Sep 17 00:00:00 2001 From: Dave Martin Date: Wed, 9 Aug 2017 11:43:28 +0100 Subject: arm64: neon: Forbid when irqs are disabled Currently, may_use_simd() can return true if IRQs are disabled. If the caller goes ahead and calls kernel_neon_begin(), this can result in use of local_bh_enable() in an unsafe context. In particular, __efi_fpsimd_begin() may do this when calling EFI as part of system shutdown. This patch ensures that callers don't think they can use kernel_neon_begin() in such a context. Acked-by: Ard Biesheuvel Signed-off-by: Dave Martin Signed-off-by: Catalin Marinas --- arch/arm64/include/asm/simd.h | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'arch/arm64') diff --git a/arch/arm64/include/asm/simd.h b/arch/arm64/include/asm/simd.h index 5a1a927b74a2..fa8b3fe932e6 100644 --- a/arch/arm64/include/asm/simd.h +++ b/arch/arm64/include/asm/simd.h @@ -10,6 +10,7 @@ #define __ASM_SIMD_H #include +#include #include #include #include @@ -40,7 +41,8 @@ static __must_check inline bool may_use_simd(void) * can't migrate to another CPU and spuriously see it become * false. */ - return !in_irq() && !in_nmi() && !raw_cpu_read(kernel_neon_busy); + return !in_irq() && !irqs_disabled() && !in_nmi() && + !raw_cpu_read(kernel_neon_busy); } #else /* ! CONFIG_KERNEL_MODE_NEON */ -- cgit v1.2.3 From 21cfa0e96d1d521bec4e2f22a19437080e1357e7 Mon Sep 17 00:00:00 2001 From: Robin Murphy Date: Thu, 10 Aug 2017 10:49:21 +0100 Subject: arm64: uaccess: Add the uaccess_flushcache.c file The uaccess_flushcache.c file was inadvertently dropped by the maintainer in a previous commit. Add it back. Fixes: 5d7bdeb1eeb2 ("arm64: uaccess: Implement *_flushcache variants") Signed-off-by: Robin Murphy Signed-off-by: Catalin Marinas --- arch/arm64/lib/uaccess_flushcache.c | 47 +++++++++++++++++++++++++++++++++++++ 1 file changed, 47 insertions(+) create mode 100644 arch/arm64/lib/uaccess_flushcache.c (limited to 'arch/arm64') diff --git a/arch/arm64/lib/uaccess_flushcache.c b/arch/arm64/lib/uaccess_flushcache.c new file mode 100644 index 000000000000..b6ceafdb8b72 --- /dev/null +++ b/arch/arm64/lib/uaccess_flushcache.c @@ -0,0 +1,47 @@ +/* + * Copyright (C) 2017 ARM Ltd. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + */ + +#include +#include +#include + +void memcpy_flushcache(void *dst, const void *src, size_t cnt) +{ + /* + * We assume this should not be called with @dst pointing to + * non-cacheable memory, such that we don't need an explicit + * barrier to order the cache maintenance against the memcpy. + */ + memcpy(dst, src, cnt); + __clean_dcache_area_pop(dst, cnt); +} +EXPORT_SYMBOL_GPL(memcpy_flushcache); + +void memcpy_page_flushcache(char *to, struct page *page, size_t offset, + size_t len) +{ + memcpy_flushcache(to, page_address(page) + offset, len); +} + +unsigned long __copy_user_flushcache(void *to, const void __user *from, + unsigned long n) +{ + unsigned long rc = __arch_copy_from_user(to, from, n); + + /* See above */ + __clean_dcache_area_pop(to, n - rc); + return rc; +} -- cgit v1.2.3 From 5cf7fb26ea841e31488723a32b1613e6b5b876fe Mon Sep 17 00:00:00 2001 From: Julien Thierry Date: Tue, 25 Jul 2017 17:27:36 +0100 Subject: arm64: perf: Connect additional events to pmu counters Last level caches and node events were almost never connected in current supported cores. We connect last level caches to the actual last level within the core and node events are connected to bus accesses. Signed-off-by: Julien Thierry Cc: Will Deacon Cc: Mark Rutland Cc: Catalin Marinas Signed-off-by: Will Deacon --- arch/arm64/kernel/perf_event.c | 11 +++++++++++ 1 file changed, 11 insertions(+) (limited to 'arch/arm64') diff --git a/arch/arm64/kernel/perf_event.c b/arch/arm64/kernel/perf_event.c index b83f986e7fbf..f336753d5baa 100644 --- a/arch/arm64/kernel/perf_event.c +++ b/arch/arm64/kernel/perf_event.c @@ -302,6 +302,9 @@ static const unsigned armv8_a53_perf_cache_map[PERF_COUNT_HW_CACHE_MAX] [C(BPU)][C(OP_READ)][C(RESULT_MISS)] = ARMV8_PMUV3_PERFCTR_BR_MIS_PRED, [C(BPU)][C(OP_WRITE)][C(RESULT_ACCESS)] = ARMV8_PMUV3_PERFCTR_BR_PRED, [C(BPU)][C(OP_WRITE)][C(RESULT_MISS)] = ARMV8_PMUV3_PERFCTR_BR_MIS_PRED, + + [C(NODE)][C(OP_READ)][C(RESULT_ACCESS)] = ARMV8_IMPDEF_PERFCTR_BUS_ACCESS_RD, + [C(NODE)][C(OP_WRITE)][C(RESULT_ACCESS)] = ARMV8_IMPDEF_PERFCTR_BUS_ACCESS_WR, }; static const unsigned armv8_a57_perf_cache_map[PERF_COUNT_HW_CACHE_MAX] @@ -317,6 +320,11 @@ static const unsigned armv8_a57_perf_cache_map[PERF_COUNT_HW_CACHE_MAX] [C(L1I)][C(OP_READ)][C(RESULT_ACCESS)] = ARMV8_PMUV3_PERFCTR_L1I_CACHE, [C(L1I)][C(OP_READ)][C(RESULT_MISS)] = ARMV8_PMUV3_PERFCTR_L1I_CACHE_REFILL, + [C(LL)][C(OP_READ)][C(RESULT_ACCESS)] = ARMV8_PMUV3_PERFCTR_L2D_CACHE, + [C(LL)][C(OP_READ)][C(RESULT_MISS)] = ARMV8_PMUV3_PERFCTR_L2D_CACHE_REFILL, + [C(LL)][C(OP_WRITE)][C(RESULT_ACCESS)] = ARMV8_PMUV3_PERFCTR_L2D_CACHE, + [C(LL)][C(OP_WRITE)][C(RESULT_MISS)] = ARMV8_PMUV3_PERFCTR_L2D_CACHE_REFILL, + [C(DTLB)][C(OP_READ)][C(RESULT_MISS)] = ARMV8_IMPDEF_PERFCTR_L1D_TLB_REFILL_RD, [C(DTLB)][C(OP_WRITE)][C(RESULT_MISS)] = ARMV8_IMPDEF_PERFCTR_L1D_TLB_REFILL_WR, @@ -326,6 +334,9 @@ static const unsigned armv8_a57_perf_cache_map[PERF_COUNT_HW_CACHE_MAX] [C(BPU)][C(OP_READ)][C(RESULT_MISS)] = ARMV8_PMUV3_PERFCTR_BR_MIS_PRED, [C(BPU)][C(OP_WRITE)][C(RESULT_ACCESS)] = ARMV8_PMUV3_PERFCTR_BR_PRED, [C(BPU)][C(OP_WRITE)][C(RESULT_MISS)] = ARMV8_PMUV3_PERFCTR_BR_MIS_PRED, + + [C(NODE)][C(OP_READ)][C(RESULT_ACCESS)] = ARMV8_IMPDEF_PERFCTR_BUS_ACCESS_RD, + [C(NODE)][C(OP_WRITE)][C(RESULT_ACCESS)] = ARMV8_IMPDEF_PERFCTR_BUS_ACCESS_WR, }; static const unsigned armv8_thunder_perf_cache_map[PERF_COUNT_HW_CACHE_MAX] -- cgit v1.2.3 From d0d09d4d99e08767050bc30f2b19d6146abe01e2 Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Tue, 8 Aug 2017 17:11:27 +0100 Subject: arm64: perf: Remove redundant entries from CPU-specific event maps Now that the event mapping code always looks into the PMUv3 events before any extended mappings, the extended mappings can be reduced to only those events that are not discoverable through the PMCEID registers. Signed-off-by: Will Deacon --- arch/arm64/kernel/perf_event.c | 114 ++--------------------------------------- 1 file changed, 4 insertions(+), 110 deletions(-) (limited to 'arch/arm64') diff --git a/arch/arm64/kernel/perf_event.c b/arch/arm64/kernel/perf_event.c index f336753d5baa..f7737f6dcc36 100644 --- a/arch/arm64/kernel/perf_event.c +++ b/arch/arm64/kernel/perf_event.c @@ -202,55 +202,6 @@ static const unsigned armv8_pmuv3_perf_map[PERF_COUNT_HW_MAX] = { [PERF_COUNT_HW_STALLED_CYCLES_BACKEND] = ARMV8_PMUV3_PERFCTR_STALL_BACKEND, }; -/* ARM Cortex-A53 HW events mapping. */ -static const unsigned armv8_a53_perf_map[PERF_COUNT_HW_MAX] = { - PERF_MAP_ALL_UNSUPPORTED, - [PERF_COUNT_HW_CPU_CYCLES] = ARMV8_PMUV3_PERFCTR_CPU_CYCLES, - [PERF_COUNT_HW_INSTRUCTIONS] = ARMV8_PMUV3_PERFCTR_INST_RETIRED, - [PERF_COUNT_HW_CACHE_REFERENCES] = ARMV8_PMUV3_PERFCTR_L1D_CACHE, - [PERF_COUNT_HW_CACHE_MISSES] = ARMV8_PMUV3_PERFCTR_L1D_CACHE_REFILL, - [PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = ARMV8_PMUV3_PERFCTR_PC_WRITE_RETIRED, - [PERF_COUNT_HW_BRANCH_MISSES] = ARMV8_PMUV3_PERFCTR_BR_MIS_PRED, - [PERF_COUNT_HW_BUS_CYCLES] = ARMV8_PMUV3_PERFCTR_BUS_CYCLES, -}; - -/* ARM Cortex-A57 and Cortex-A72 events mapping. */ -static const unsigned armv8_a57_perf_map[PERF_COUNT_HW_MAX] = { - PERF_MAP_ALL_UNSUPPORTED, - [PERF_COUNT_HW_CPU_CYCLES] = ARMV8_PMUV3_PERFCTR_CPU_CYCLES, - [PERF_COUNT_HW_INSTRUCTIONS] = ARMV8_PMUV3_PERFCTR_INST_RETIRED, - [PERF_COUNT_HW_CACHE_REFERENCES] = ARMV8_PMUV3_PERFCTR_L1D_CACHE, - [PERF_COUNT_HW_CACHE_MISSES] = ARMV8_PMUV3_PERFCTR_L1D_CACHE_REFILL, - [PERF_COUNT_HW_BRANCH_MISSES] = ARMV8_PMUV3_PERFCTR_BR_MIS_PRED, - [PERF_COUNT_HW_BUS_CYCLES] = ARMV8_PMUV3_PERFCTR_BUS_CYCLES, -}; - -static const unsigned armv8_thunder_perf_map[PERF_COUNT_HW_MAX] = { - PERF_MAP_ALL_UNSUPPORTED, - [PERF_COUNT_HW_CPU_CYCLES] = ARMV8_PMUV3_PERFCTR_CPU_CYCLES, - [PERF_COUNT_HW_INSTRUCTIONS] = ARMV8_PMUV3_PERFCTR_INST_RETIRED, - [PERF_COUNT_HW_CACHE_REFERENCES] = ARMV8_PMUV3_PERFCTR_L1D_CACHE, - [PERF_COUNT_HW_CACHE_MISSES] = ARMV8_PMUV3_PERFCTR_L1D_CACHE_REFILL, - [PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = ARMV8_PMUV3_PERFCTR_PC_WRITE_RETIRED, - [PERF_COUNT_HW_BRANCH_MISSES] = ARMV8_PMUV3_PERFCTR_BR_MIS_PRED, - [PERF_COUNT_HW_STALLED_CYCLES_FRONTEND] = ARMV8_PMUV3_PERFCTR_STALL_FRONTEND, - [PERF_COUNT_HW_STALLED_CYCLES_BACKEND] = ARMV8_PMUV3_PERFCTR_STALL_BACKEND, -}; - -/* Broadcom Vulcan events mapping */ -static const unsigned armv8_vulcan_perf_map[PERF_COUNT_HW_MAX] = { - PERF_MAP_ALL_UNSUPPORTED, - [PERF_COUNT_HW_CPU_CYCLES] = ARMV8_PMUV3_PERFCTR_CPU_CYCLES, - [PERF_COUNT_HW_INSTRUCTIONS] = ARMV8_PMUV3_PERFCTR_INST_RETIRED, - [PERF_COUNT_HW_CACHE_REFERENCES] = ARMV8_PMUV3_PERFCTR_L1D_CACHE, - [PERF_COUNT_HW_CACHE_MISSES] = ARMV8_PMUV3_PERFCTR_L1D_CACHE_REFILL, - [PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = ARMV8_PMUV3_PERFCTR_BR_RETIRED, - [PERF_COUNT_HW_BRANCH_MISSES] = ARMV8_PMUV3_PERFCTR_BR_MIS_PRED, - [PERF_COUNT_HW_BUS_CYCLES] = ARMV8_PMUV3_PERFCTR_BUS_CYCLES, - [PERF_COUNT_HW_STALLED_CYCLES_FRONTEND] = ARMV8_PMUV3_PERFCTR_STALL_FRONTEND, - [PERF_COUNT_HW_STALLED_CYCLES_BACKEND] = ARMV8_PMUV3_PERFCTR_STALL_BACKEND, -}; - static const unsigned armv8_pmuv3_perf_cache_map[PERF_COUNT_HW_CACHE_MAX] [PERF_COUNT_HW_CACHE_OP_MAX] [PERF_COUNT_HW_CACHE_RESULT_MAX] = { @@ -281,28 +232,8 @@ static const unsigned armv8_a53_perf_cache_map[PERF_COUNT_HW_CACHE_MAX] [PERF_COUNT_HW_CACHE_RESULT_MAX] = { PERF_CACHE_MAP_ALL_UNSUPPORTED, - [C(L1D)][C(OP_READ)][C(RESULT_ACCESS)] = ARMV8_PMUV3_PERFCTR_L1D_CACHE, - [C(L1D)][C(OP_READ)][C(RESULT_MISS)] = ARMV8_PMUV3_PERFCTR_L1D_CACHE_REFILL, - [C(L1D)][C(OP_WRITE)][C(RESULT_ACCESS)] = ARMV8_PMUV3_PERFCTR_L1D_CACHE, - [C(L1D)][C(OP_WRITE)][C(RESULT_MISS)] = ARMV8_PMUV3_PERFCTR_L1D_CACHE_REFILL, [C(L1D)][C(OP_PREFETCH)][C(RESULT_MISS)] = ARMV8_A53_PERFCTR_PREF_LINEFILL, - [C(L1I)][C(OP_READ)][C(RESULT_ACCESS)] = ARMV8_PMUV3_PERFCTR_L1I_CACHE, - [C(L1I)][C(OP_READ)][C(RESULT_MISS)] = ARMV8_PMUV3_PERFCTR_L1I_CACHE_REFILL, - - [C(LL)][C(OP_READ)][C(RESULT_ACCESS)] = ARMV8_PMUV3_PERFCTR_L2D_CACHE, - [C(LL)][C(OP_READ)][C(RESULT_MISS)] = ARMV8_PMUV3_PERFCTR_L2D_CACHE_REFILL, - [C(LL)][C(OP_WRITE)][C(RESULT_ACCESS)] = ARMV8_PMUV3_PERFCTR_L2D_CACHE, - [C(LL)][C(OP_WRITE)][C(RESULT_MISS)] = ARMV8_PMUV3_PERFCTR_L2D_CACHE_REFILL, - - [C(DTLB)][C(OP_READ)][C(RESULT_MISS)] = ARMV8_PMUV3_PERFCTR_L1D_TLB_REFILL, - [C(ITLB)][C(OP_READ)][C(RESULT_MISS)] = ARMV8_PMUV3_PERFCTR_L1I_TLB_REFILL, - - [C(BPU)][C(OP_READ)][C(RESULT_ACCESS)] = ARMV8_PMUV3_PERFCTR_BR_PRED, - [C(BPU)][C(OP_READ)][C(RESULT_MISS)] = ARMV8_PMUV3_PERFCTR_BR_MIS_PRED, - [C(BPU)][C(OP_WRITE)][C(RESULT_ACCESS)] = ARMV8_PMUV3_PERFCTR_BR_PRED, - [C(BPU)][C(OP_WRITE)][C(RESULT_MISS)] = ARMV8_PMUV3_PERFCTR_BR_MIS_PRED, - [C(NODE)][C(OP_READ)][C(RESULT_ACCESS)] = ARMV8_IMPDEF_PERFCTR_BUS_ACCESS_RD, [C(NODE)][C(OP_WRITE)][C(RESULT_ACCESS)] = ARMV8_IMPDEF_PERFCTR_BUS_ACCESS_WR, }; @@ -317,24 +248,9 @@ static const unsigned armv8_a57_perf_cache_map[PERF_COUNT_HW_CACHE_MAX] [C(L1D)][C(OP_WRITE)][C(RESULT_ACCESS)] = ARMV8_IMPDEF_PERFCTR_L1D_CACHE_WR, [C(L1D)][C(OP_WRITE)][C(RESULT_MISS)] = ARMV8_IMPDEF_PERFCTR_L1D_CACHE_REFILL_WR, - [C(L1I)][C(OP_READ)][C(RESULT_ACCESS)] = ARMV8_PMUV3_PERFCTR_L1I_CACHE, - [C(L1I)][C(OP_READ)][C(RESULT_MISS)] = ARMV8_PMUV3_PERFCTR_L1I_CACHE_REFILL, - - [C(LL)][C(OP_READ)][C(RESULT_ACCESS)] = ARMV8_PMUV3_PERFCTR_L2D_CACHE, - [C(LL)][C(OP_READ)][C(RESULT_MISS)] = ARMV8_PMUV3_PERFCTR_L2D_CACHE_REFILL, - [C(LL)][C(OP_WRITE)][C(RESULT_ACCESS)] = ARMV8_PMUV3_PERFCTR_L2D_CACHE, - [C(LL)][C(OP_WRITE)][C(RESULT_MISS)] = ARMV8_PMUV3_PERFCTR_L2D_CACHE_REFILL, - [C(DTLB)][C(OP_READ)][C(RESULT_MISS)] = ARMV8_IMPDEF_PERFCTR_L1D_TLB_REFILL_RD, [C(DTLB)][C(OP_WRITE)][C(RESULT_MISS)] = ARMV8_IMPDEF_PERFCTR_L1D_TLB_REFILL_WR, - [C(ITLB)][C(OP_READ)][C(RESULT_MISS)] = ARMV8_PMUV3_PERFCTR_L1I_TLB_REFILL, - - [C(BPU)][C(OP_READ)][C(RESULT_ACCESS)] = ARMV8_PMUV3_PERFCTR_BR_PRED, - [C(BPU)][C(OP_READ)][C(RESULT_MISS)] = ARMV8_PMUV3_PERFCTR_BR_MIS_PRED, - [C(BPU)][C(OP_WRITE)][C(RESULT_ACCESS)] = ARMV8_PMUV3_PERFCTR_BR_PRED, - [C(BPU)][C(OP_WRITE)][C(RESULT_MISS)] = ARMV8_PMUV3_PERFCTR_BR_MIS_PRED, - [C(NODE)][C(OP_READ)][C(RESULT_ACCESS)] = ARMV8_IMPDEF_PERFCTR_BUS_ACCESS_RD, [C(NODE)][C(OP_WRITE)][C(RESULT_ACCESS)] = ARMV8_IMPDEF_PERFCTR_BUS_ACCESS_WR, }; @@ -351,8 +267,6 @@ static const unsigned armv8_thunder_perf_cache_map[PERF_COUNT_HW_CACHE_MAX] [C(L1D)][C(OP_PREFETCH)][C(RESULT_ACCESS)] = ARMV8_THUNDER_PERFCTR_L1D_CACHE_PREF_ACCESS, [C(L1D)][C(OP_PREFETCH)][C(RESULT_MISS)] = ARMV8_THUNDER_PERFCTR_L1D_CACHE_PREF_MISS, - [C(L1I)][C(OP_READ)][C(RESULT_ACCESS)] = ARMV8_PMUV3_PERFCTR_L1I_CACHE, - [C(L1I)][C(OP_READ)][C(RESULT_MISS)] = ARMV8_PMUV3_PERFCTR_L1I_CACHE_REFILL, [C(L1I)][C(OP_PREFETCH)][C(RESULT_ACCESS)] = ARMV8_THUNDER_PERFCTR_L1I_CACHE_PREF_ACCESS, [C(L1I)][C(OP_PREFETCH)][C(RESULT_MISS)] = ARMV8_THUNDER_PERFCTR_L1I_CACHE_PREF_MISS, @@ -360,13 +274,6 @@ static const unsigned armv8_thunder_perf_cache_map[PERF_COUNT_HW_CACHE_MAX] [C(DTLB)][C(OP_READ)][C(RESULT_MISS)] = ARMV8_IMPDEF_PERFCTR_L1D_TLB_REFILL_RD, [C(DTLB)][C(OP_WRITE)][C(RESULT_ACCESS)] = ARMV8_IMPDEF_PERFCTR_L1D_TLB_WR, [C(DTLB)][C(OP_WRITE)][C(RESULT_MISS)] = ARMV8_IMPDEF_PERFCTR_L1D_TLB_REFILL_WR, - - [C(ITLB)][C(OP_READ)][C(RESULT_MISS)] = ARMV8_PMUV3_PERFCTR_L1I_TLB_REFILL, - - [C(BPU)][C(OP_READ)][C(RESULT_ACCESS)] = ARMV8_PMUV3_PERFCTR_BR_PRED, - [C(BPU)][C(OP_READ)][C(RESULT_MISS)] = ARMV8_PMUV3_PERFCTR_BR_MIS_PRED, - [C(BPU)][C(OP_WRITE)][C(RESULT_ACCESS)] = ARMV8_PMUV3_PERFCTR_BR_PRED, - [C(BPU)][C(OP_WRITE)][C(RESULT_MISS)] = ARMV8_PMUV3_PERFCTR_BR_MIS_PRED, }; static const unsigned armv8_vulcan_perf_cache_map[PERF_COUNT_HW_CACHE_MAX] @@ -379,22 +286,11 @@ static const unsigned armv8_vulcan_perf_cache_map[PERF_COUNT_HW_CACHE_MAX] [C(L1D)][C(OP_WRITE)][C(RESULT_ACCESS)] = ARMV8_IMPDEF_PERFCTR_L1D_CACHE_WR, [C(L1D)][C(OP_WRITE)][C(RESULT_MISS)] = ARMV8_IMPDEF_PERFCTR_L1D_CACHE_REFILL_WR, - [C(L1I)][C(OP_READ)][C(RESULT_ACCESS)] = ARMV8_PMUV3_PERFCTR_L1I_CACHE, - [C(L1I)][C(OP_READ)][C(RESULT_MISS)] = ARMV8_PMUV3_PERFCTR_L1I_CACHE_REFILL, - - [C(ITLB)][C(OP_READ)][C(RESULT_MISS)] = ARMV8_PMUV3_PERFCTR_L1I_TLB_REFILL, - [C(ITLB)][C(OP_READ)][C(RESULT_ACCESS)] = ARMV8_PMUV3_PERFCTR_L1I_TLB, - [C(DTLB)][C(OP_READ)][C(RESULT_ACCESS)] = ARMV8_IMPDEF_PERFCTR_L1D_TLB_RD, [C(DTLB)][C(OP_WRITE)][C(RESULT_ACCESS)] = ARMV8_IMPDEF_PERFCTR_L1D_TLB_WR, [C(DTLB)][C(OP_READ)][C(RESULT_MISS)] = ARMV8_IMPDEF_PERFCTR_L1D_TLB_REFILL_RD, [C(DTLB)][C(OP_WRITE)][C(RESULT_MISS)] = ARMV8_IMPDEF_PERFCTR_L1D_TLB_REFILL_WR, - [C(BPU)][C(OP_READ)][C(RESULT_ACCESS)] = ARMV8_PMUV3_PERFCTR_BR_PRED, - [C(BPU)][C(OP_READ)][C(RESULT_MISS)] = ARMV8_PMUV3_PERFCTR_BR_MIS_PRED, - [C(BPU)][C(OP_WRITE)][C(RESULT_ACCESS)] = ARMV8_PMUV3_PERFCTR_BR_PRED, - [C(BPU)][C(OP_WRITE)][C(RESULT_MISS)] = ARMV8_PMUV3_PERFCTR_BR_MIS_PRED, - [C(NODE)][C(OP_READ)][C(RESULT_ACCESS)] = ARMV8_IMPDEF_PERFCTR_BUS_ACCESS_RD, [C(NODE)][C(OP_WRITE)][C(RESULT_ACCESS)] = ARMV8_IMPDEF_PERFCTR_BUS_ACCESS_WR, }; @@ -964,25 +860,23 @@ static int armv8_pmuv3_map_event(struct perf_event *event) static int armv8_a53_map_event(struct perf_event *event) { - return __armv8_pmuv3_map_event(event, &armv8_a53_perf_map, - &armv8_a53_perf_cache_map); + return __armv8_pmuv3_map_event(event, NULL, &armv8_a53_perf_cache_map); } static int armv8_a57_map_event(struct perf_event *event) { - return __armv8_pmuv3_map_event(event, &armv8_a57_perf_map, - &armv8_a57_perf_cache_map); + return __armv8_pmuv3_map_event(event, NULL, &armv8_a57_perf_cache_map); } static int armv8_thunder_map_event(struct perf_event *event) { - return __armv8_pmuv3_map_event(event, &armv8_thunder_perf_map, + return __armv8_pmuv3_map_event(event, NULL, &armv8_thunder_perf_cache_map); } static int armv8_vulcan_map_event(struct perf_event *event) { - return __armv8_pmuv3_map_event(event, &armv8_vulcan_perf_map, + return __armv8_pmuv3_map_event(event, NULL, &armv8_vulcan_perf_cache_map); } -- cgit v1.2.3 From 5561b6c5e9813df16d7453f6ce1a0546221fca97 Mon Sep 17 00:00:00 2001 From: Julien Thierry Date: Wed, 9 Aug 2017 17:46:38 +0100 Subject: arm64: perf: add support for Cortex-A73 The Cortex-A73 uses some implementation defined perf events. This patch sets up the necessary mapping for Cortex-A73. Mappings are based on Cortex-A73 TRM r0p2, section 11.9 Events (pages 11-457 to 11-460). Signed-off-by: Julien Thierry Cc: Will Deacon Cc: Mark Rutland Cc: Catalin Marinas Signed-off-by: Will Deacon --- Documentation/devicetree/bindings/arm/pmu.txt | 1 + arch/arm64/kernel/perf_event.c | 37 +++++++++++++++++++++++++++ 2 files changed, 38 insertions(+) (limited to 'arch/arm64') diff --git a/Documentation/devicetree/bindings/arm/pmu.txt b/Documentation/devicetree/bindings/arm/pmu.txt index 61c8b4620415..54c9727c70d8 100644 --- a/Documentation/devicetree/bindings/arm/pmu.txt +++ b/Documentation/devicetree/bindings/arm/pmu.txt @@ -9,6 +9,7 @@ Required properties: - compatible : should be one of "apm,potenza-pmu" "arm,armv8-pmuv3" + "arm,cortex-a73-pmu" "arm,cortex-a72-pmu" "arm,cortex-a57-pmu" "arm,cortex-a53-pmu" diff --git a/arch/arm64/kernel/perf_event.c b/arch/arm64/kernel/perf_event.c index f7737f6dcc36..3fc00f61f729 100644 --- a/arch/arm64/kernel/perf_event.c +++ b/arch/arm64/kernel/perf_event.c @@ -255,6 +255,21 @@ static const unsigned armv8_a57_perf_cache_map[PERF_COUNT_HW_CACHE_MAX] [C(NODE)][C(OP_WRITE)][C(RESULT_ACCESS)] = ARMV8_IMPDEF_PERFCTR_BUS_ACCESS_WR, }; +static const unsigned armv8_a73_perf_cache_map[PERF_COUNT_HW_CACHE_MAX] + [PERF_COUNT_HW_CACHE_OP_MAX] + [PERF_COUNT_HW_CACHE_RESULT_MAX] = { + PERF_CACHE_MAP_ALL_UNSUPPORTED, + + [C(L1D)][C(OP_READ)][C(RESULT_ACCESS)] = ARMV8_IMPDEF_PERFCTR_L1D_CACHE_RD, + [C(L1D)][C(OP_WRITE)][C(RESULT_ACCESS)] = ARMV8_IMPDEF_PERFCTR_L1D_CACHE_WR, + + [C(NODE)][C(OP_READ)][C(RESULT_ACCESS)] = ARMV8_IMPDEF_PERFCTR_BUS_ACCESS_RD, + [C(NODE)][C(OP_WRITE)][C(RESULT_ACCESS)] = ARMV8_IMPDEF_PERFCTR_BUS_ACCESS_WR, + + [C(NODE)][C(OP_READ)][C(RESULT_ACCESS)] = ARMV8_IMPDEF_PERFCTR_BUS_ACCESS_RD, + [C(NODE)][C(OP_WRITE)][C(RESULT_ACCESS)] = ARMV8_IMPDEF_PERFCTR_BUS_ACCESS_WR, +}; + static const unsigned armv8_thunder_perf_cache_map[PERF_COUNT_HW_CACHE_MAX] [PERF_COUNT_HW_CACHE_OP_MAX] [PERF_COUNT_HW_CACHE_RESULT_MAX] = { @@ -868,6 +883,11 @@ static int armv8_a57_map_event(struct perf_event *event) return __armv8_pmuv3_map_event(event, NULL, &armv8_a57_perf_cache_map); } +static int armv8_a73_map_event(struct perf_event *event) +{ + return __armv8_pmuv3_map_event(event, NULL, &armv8_a73_perf_cache_map); +} + static int armv8_thunder_map_event(struct perf_event *event) { return __armv8_pmuv3_map_event(event, NULL, @@ -1018,6 +1038,22 @@ static int armv8_a72_pmu_init(struct arm_pmu *cpu_pmu) return 0; } +static int armv8_a73_pmu_init(struct arm_pmu *cpu_pmu) +{ + int ret = armv8_pmu_init(cpu_pmu); + if (ret) + return ret; + + cpu_pmu->name = "armv8_cortex_a73"; + cpu_pmu->map_event = armv8_a73_map_event; + cpu_pmu->attr_groups[ARMPMU_ATTR_GROUP_EVENTS] = + &armv8_pmuv3_events_attr_group; + cpu_pmu->attr_groups[ARMPMU_ATTR_GROUP_FORMATS] = + &armv8_pmuv3_format_attr_group; + + return 0; +} + static int armv8_thunder_pmu_init(struct arm_pmu *cpu_pmu) { int ret = armv8_pmu_init(cpu_pmu); @@ -1055,6 +1091,7 @@ static const struct of_device_id armv8_pmu_of_device_ids[] = { {.compatible = "arm,cortex-a53-pmu", .data = armv8_a53_pmu_init}, {.compatible = "arm,cortex-a57-pmu", .data = armv8_a57_pmu_init}, {.compatible = "arm,cortex-a72-pmu", .data = armv8_a72_pmu_init}, + {.compatible = "arm,cortex-a73-pmu", .data = armv8_a73_pmu_init}, {.compatible = "cavium,thunder-pmu", .data = armv8_thunder_pmu_init}, {.compatible = "brcm,vulcan-pmu", .data = armv8_vulcan_pmu_init}, {}, -- cgit v1.2.3 From e884f80cf2a76a86547e2316982e1f200f556ddf Mon Sep 17 00:00:00 2001 From: Julien Thierry Date: Wed, 9 Aug 2017 17:46:39 +0100 Subject: arm64: perf: add support for Cortex-A35 The Cortex-A35 uses some implementation defined perf events. The Cortex-A35 derives from the Cortex-A53 core, using the same event mapings based on Cortex-A35 TRM r0p2, section C2.3 - Performance monitoring events (pages C2-562 to C2-565). Signed-off-by: Julien Thierry Cc: Will Deacon Cc: Mark Rutland Cc: Catalin Marinas Signed-off-by: Will Deacon --- Documentation/devicetree/bindings/arm/pmu.txt | 1 + arch/arm64/kernel/perf_event.c | 17 +++++++++++++++++ 2 files changed, 18 insertions(+) (limited to 'arch/arm64') diff --git a/Documentation/devicetree/bindings/arm/pmu.txt b/Documentation/devicetree/bindings/arm/pmu.txt index 54c9727c70d8..13611a8199bb 100644 --- a/Documentation/devicetree/bindings/arm/pmu.txt +++ b/Documentation/devicetree/bindings/arm/pmu.txt @@ -13,6 +13,7 @@ Required properties: "arm,cortex-a72-pmu" "arm,cortex-a57-pmu" "arm,cortex-a53-pmu" + "arm,cortex-a35-pmu" "arm,cortex-a17-pmu" "arm,cortex-a15-pmu" "arm,cortex-a12-pmu" diff --git a/arch/arm64/kernel/perf_event.c b/arch/arm64/kernel/perf_event.c index 3fc00f61f729..9eaef51f83ff 100644 --- a/arch/arm64/kernel/perf_event.c +++ b/arch/arm64/kernel/perf_event.c @@ -990,6 +990,22 @@ static int armv8_pmuv3_init(struct arm_pmu *cpu_pmu) return 0; } +static int armv8_a35_pmu_init(struct arm_pmu *cpu_pmu) +{ + int ret = armv8_pmu_init(cpu_pmu); + if (ret) + return ret; + + cpu_pmu->name = "armv8_cortex_a35"; + cpu_pmu->map_event = armv8_a53_map_event; + cpu_pmu->attr_groups[ARMPMU_ATTR_GROUP_EVENTS] = + &armv8_pmuv3_events_attr_group; + cpu_pmu->attr_groups[ARMPMU_ATTR_GROUP_FORMATS] = + &armv8_pmuv3_format_attr_group; + + return 0; +} + static int armv8_a53_pmu_init(struct arm_pmu *cpu_pmu) { int ret = armv8_pmu_init(cpu_pmu); @@ -1088,6 +1104,7 @@ static int armv8_vulcan_pmu_init(struct arm_pmu *cpu_pmu) static const struct of_device_id armv8_pmu_of_device_ids[] = { {.compatible = "arm,armv8-pmuv3", .data = armv8_pmuv3_init}, + {.compatible = "arm,cortex-a35-pmu", .data = armv8_a35_pmu_init}, {.compatible = "arm,cortex-a53-pmu", .data = armv8_a53_pmu_init}, {.compatible = "arm,cortex-a57-pmu", .data = armv8_a57_pmu_init}, {.compatible = "arm,cortex-a72-pmu", .data = armv8_a72_pmu_init}, -- cgit v1.2.3 From caf5ef7d15c511bbef691d0931adad56c2967435 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Thu, 10 Aug 2017 16:52:31 +0200 Subject: arm64: fix pmem interface definition Defining the two functions as 'static inline' and exporting them leads to the interesting case where we can use the interface from loadable modules, but not from built-in drivers, as shown in this link failure: vers/nvdimm/claim.o: In function `nsio_rw_bytes': claim.c:(.text+0x1b8): undefined reference to `arch_invalidate_pmem' drivers/nvdimm/pmem.o: In function `pmem_dax_flush': pmem.c:(.text+0x11c): undefined reference to `arch_wb_cache_pmem' drivers/nvdimm/pmem.o: In function `pmem_make_request': pmem.c:(.text+0x5a4): undefined reference to `arch_invalidate_pmem' pmem.c:(.text+0x650): undefined reference to `arch_invalidate_pmem' pmem.c:(.text+0x6d4): undefined reference to `arch_invalidate_pmem' This removes the bogus 'static inline'. Fixes: d50e071fdaa3 ("arm64: Implement pmem API support") Acked-by: Robin Murphy Signed-off-by: Arnd Bergmann Signed-off-by: Catalin Marinas --- arch/arm64/mm/flush.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'arch/arm64') diff --git a/arch/arm64/mm/flush.c b/arch/arm64/mm/flush.c index 280f90ff33a2..e36ed5087b5c 100644 --- a/arch/arm64/mm/flush.c +++ b/arch/arm64/mm/flush.c @@ -85,7 +85,7 @@ EXPORT_SYMBOL(flush_dcache_page); EXPORT_SYMBOL(flush_icache_range); #ifdef CONFIG_ARCH_HAS_PMEM_API -static inline void arch_wb_cache_pmem(void *addr, size_t size) +void arch_wb_cache_pmem(void *addr, size_t size) { /* Ensure order against any prior non-cacheable writes */ dmb(osh); @@ -93,7 +93,7 @@ static inline void arch_wb_cache_pmem(void *addr, size_t size) } EXPORT_SYMBOL_GPL(arch_wb_cache_pmem); -static inline void arch_invalidate_pmem(void *addr, size_t size) +void arch_invalidate_pmem(void *addr, size_t size) { __inval_dcache_area(addr, size); } -- cgit v1.2.3 From 82d24d114f249d919b918ff8eefde4117db8f088 Mon Sep 17 00:00:00 2001 From: Kevin Brodsky Date: Fri, 4 Aug 2017 10:17:00 -0700 Subject: arm64: compat: Remove leftover variable declaration Commit a1d5ebaf8ccd ("arm64: big-endian: don't treat code as data when copying sigret code") moved the 32-bit sigreturn trampoline code from the aarch32_sigret_code array to kuser32.S. The commit removed the array definition from signal32.c, but not its declaration in signal32.h. Remove the leftover declaration. Signed-off-by: Kevin Brodsky Signed-off-by: Mark Salyzyn Signed-off-by: Catalin Marinas --- arch/arm64/include/asm/signal32.h | 2 -- 1 file changed, 2 deletions(-) (limited to 'arch/arm64') diff --git a/arch/arm64/include/asm/signal32.h b/arch/arm64/include/asm/signal32.h index eeaa97559bab..81abea0b7650 100644 --- a/arch/arm64/include/asm/signal32.h +++ b/arch/arm64/include/asm/signal32.h @@ -22,8 +22,6 @@ #define AARCH32_KERN_SIGRET_CODE_OFFSET 0x500 -extern const compat_ulong_t aarch32_sigret_code[6]; - int compat_setup_frame(int usig, struct ksignal *ksig, sigset_t *set, struct pt_regs *regs); int compat_setup_rt_frame(int usig, struct ksignal *ksig, sigset_t *set, -- cgit v1.2.3 From 969ff73e72fe903a2354c51e01c1a1f937c544ca Mon Sep 17 00:00:00 2001 From: Dou Liyang Date: Wed, 26 Jul 2017 21:34:26 +0800 Subject: arm64: numa: Remove the unused parent_node() macro Commit a7be6e5a7f8d ("mm: drop useless local parameters of __register_one_node()") removes the last user of parent_node(). The parent_node() macro in ARM64 platform is unnecessary. Remove it for cleanup. Reported-by: Michael Ellerman Acked-by: Will Deacon Signed-off-by: Dou Liyang Cc: Michael Ellerman Cc: linux-arm-kernel@lists.infradead.org Signed-off-by: Catalin Marinas --- arch/arm64/include/asm/numa.h | 3 --- 1 file changed, 3 deletions(-) (limited to 'arch/arm64') diff --git a/arch/arm64/include/asm/numa.h b/arch/arm64/include/asm/numa.h index bf466d1876e3..ef7b23863a7c 100644 --- a/arch/arm64/include/asm/numa.h +++ b/arch/arm64/include/asm/numa.h @@ -7,9 +7,6 @@ #define NR_NODE_MEMBLKS (MAX_NUMNODES * 2) -/* currently, arm64 implements flat NUMA topology */ -#define parent_node(node) (node) - int __node_distance(int from, int to); #define node_distance(a, b) __node_distance(a, b) -- cgit v1.2.3 From c5bc503cbeee8586395aa541d2b53c69c3dd6930 Mon Sep 17 00:00:00 2001 From: Mark Rutland Date: Mon, 7 Aug 2017 12:10:51 +0100 Subject: arm64: remove __die()'s stack dump Our __die() implementation tries to dump the stack memory, in addition to a backtrace, which is problematic. For contemporary 16K stacks, this can be a lot of data, which can take a long time to dump, and can push other useful context out of the kernel's printk ringbuffer (and/or a user's scrollback buffer on an attached console). Additionally, the code implicitly assumes that the SP is on the task's stack, and tries to dump everything between the SP and the highest task stack address. When the SP points at an IRQ stack (or is corrupted), this makes the kernel attempt to dump vast amounts of VA space. With vmap'd stacks, this may result in erroneous accesses to peripherals. This patch removes the memory dump, leaving us to rely on the backtrace, and other means of dumping stack memory such as kdump. Signed-off-by: Mark Rutland Reviewed-by: Will Deacon Tested-by: Laura Abbott Cc: Ard Biesheuvel Cc: Catalin Marinas Cc: James Morse --- arch/arm64/kernel/traps.c | 2 -- 1 file changed, 2 deletions(-) (limited to 'arch/arm64') diff --git a/arch/arm64/kernel/traps.c b/arch/arm64/kernel/traps.c index c2a81bf8827e..9633773ca42c 100644 --- a/arch/arm64/kernel/traps.c +++ b/arch/arm64/kernel/traps.c @@ -237,8 +237,6 @@ static int __die(const char *str, int err, struct pt_regs *regs) end_of_stack(tsk)); if (!user_mode(regs)) { - dump_mem(KERN_EMERG, "Stack: ", regs->sp, - THREAD_SIZE + (unsigned long)task_stack_page(tsk)); dump_backtrace(regs, tsk); dump_instr(KERN_EMERG, regs); } -- cgit v1.2.3 From 34be98f4944f99076f049a6806fc5f5207a755d3 Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Thu, 20 Jul 2017 17:15:45 +0100 Subject: arm64: kernel: remove {THREAD,IRQ_STACK}_START_SP For historical reasons, we leave the top 16 bytes of our task and IRQ stacks unused, a practice used to ensure that the SP can always be masked to find the base of the current stack (historically, where thread_info could be found). However, this is not necessary, as: * When an exception is taken from a task stack, we decrement the SP by S_FRAME_SIZE and stash the exception registers before we compare the SP against the task stack. In such cases, the SP must be at least S_FRAME_SIZE below the limit, and can be safely masked to determine whether the task stack is in use. * When transitioning to an IRQ stack, we'll place a dummy frame onto the IRQ stack before enabling asynchronous exceptions, or executing code we expect to trigger faults. Thus, if an exception is taken from the IRQ stack, the SP must be at least 16 bytes below the limit. * We no longer mask the SP to find the thread_info, which is now found via sp_el0. Note that historically, the offset was critical to ensure that cpu_switch_to() found the correct stack for new threads that hadn't yet executed ret_from_fork(). Given that, this initial offset serves no purpose, and can be removed. This brings us in-line with other architectures (e.g. x86) which do not rely on this masking. Signed-off-by: Ard Biesheuvel [Mark: rebase, kill THREAD_START_SP, commit msg additions] Signed-off-by: Mark Rutland Reviewed-by: Will Deacon Tested-by: Laura Abbott Cc: Catalin Marinas Cc: James Morse --- arch/arm64/include/asm/irq.h | 5 ++--- arch/arm64/include/asm/processor.h | 2 +- arch/arm64/include/asm/thread_info.h | 1 - arch/arm64/kernel/entry.S | 2 +- arch/arm64/kernel/smp.c | 2 +- 5 files changed, 5 insertions(+), 7 deletions(-) (limited to 'arch/arm64') diff --git a/arch/arm64/include/asm/irq.h b/arch/arm64/include/asm/irq.h index 8ba89c4ca183..1ebe202b1a24 100644 --- a/arch/arm64/include/asm/irq.h +++ b/arch/arm64/include/asm/irq.h @@ -2,7 +2,6 @@ #define __ASM_IRQ_H #define IRQ_STACK_SIZE THREAD_SIZE -#define IRQ_STACK_START_SP THREAD_START_SP #ifndef __ASSEMBLER__ @@ -26,9 +25,9 @@ static inline int nr_legacy_irqs(void) static inline bool on_irq_stack(unsigned long sp) { unsigned long low = (unsigned long)raw_cpu_ptr(irq_stack); - unsigned long high = low + IRQ_STACK_START_SP; + unsigned long high = low + IRQ_STACK_SIZE; - return (low <= sp && sp <= high); + return (low <= sp && sp < high); } static inline bool on_task_stack(struct task_struct *tsk, unsigned long sp) diff --git a/arch/arm64/include/asm/processor.h b/arch/arm64/include/asm/processor.h index 64c9e78f9882..6687dd29f7e0 100644 --- a/arch/arm64/include/asm/processor.h +++ b/arch/arm64/include/asm/processor.h @@ -159,7 +159,7 @@ extern struct task_struct *cpu_switch_to(struct task_struct *prev, struct task_struct *next); #define task_pt_regs(p) \ - ((struct pt_regs *)(THREAD_START_SP + task_stack_page(p)) - 1) + ((struct pt_regs *)(THREAD_SIZE + task_stack_page(p)) - 1) #define KSTK_EIP(tsk) ((unsigned long)task_pt_regs(tsk)->pc) #define KSTK_ESP(tsk) user_stack_pointer(task_pt_regs(tsk)) diff --git a/arch/arm64/include/asm/thread_info.h b/arch/arm64/include/asm/thread_info.h index 46c3b93cf865..b29ab0e12e60 100644 --- a/arch/arm64/include/asm/thread_info.h +++ b/arch/arm64/include/asm/thread_info.h @@ -30,7 +30,6 @@ #endif #define THREAD_SIZE 16384 -#define THREAD_START_SP (THREAD_SIZE - 16) #ifndef __ASSEMBLY__ diff --git a/arch/arm64/kernel/entry.S b/arch/arm64/kernel/entry.S index 612a077ba109..f31c7b26a686 100644 --- a/arch/arm64/kernel/entry.S +++ b/arch/arm64/kernel/entry.S @@ -272,7 +272,7 @@ alternative_else_nop_endif cbnz x25, 9998f adr_this_cpu x25, irq_stack, x26 - mov x26, #IRQ_STACK_START_SP + mov x26, #IRQ_STACK_SIZE add x26, x25, x26 /* switch to the irq stack */ diff --git a/arch/arm64/kernel/smp.c b/arch/arm64/kernel/smp.c index dc66e6ec3a99..f13ddb2404f9 100644 --- a/arch/arm64/kernel/smp.c +++ b/arch/arm64/kernel/smp.c @@ -154,7 +154,7 @@ int __cpu_up(unsigned int cpu, struct task_struct *idle) * page tables. */ secondary_data.task = idle; - secondary_data.stack = task_stack_page(idle) + THREAD_START_SP; + secondary_data.stack = task_stack_page(idle) + THREAD_SIZE; update_cpu_boot_status(CPU_MMU_OFF); __flush_dcache_area(&secondary_data, sizeof(secondary_data)); -- cgit v1.2.3 From b6531456ba279bb7cea5dd2e7b3ec6a3ed0f4668 Mon Sep 17 00:00:00 2001 From: Mark Rutland Date: Fri, 14 Jul 2017 19:43:56 +0100 Subject: arm64: factor out PAGE_* and CONT_* definitions Some headers rely on PAGE_* definitions from , but cannot include this due to potential circular includes. For example, a number of definitions in rely on PAGE_SHIFT, and includes . This requires users of these definitions to include both headers, which is fragile and error-prone. This patch ameliorates matters by moving the basic definitions out to a new header, . Both and are updated to include this, avoiding this fragility, and avoiding the possibility of circular include dependencies. Signed-off-by: Mark Rutland Reviewed-by: Will Deacon Tested-by: Laura Abbott Cc: Ard Biesheuvel Cc: Catalin Marinas Cc: James Morse --- arch/arm64/include/asm/memory.h | 1 + arch/arm64/include/asm/page-def.h | 34 ++++++++++++++++++++++++++++++++++ arch/arm64/include/asm/page.h | 12 +----------- 3 files changed, 36 insertions(+), 11 deletions(-) create mode 100644 arch/arm64/include/asm/page-def.h (limited to 'arch/arm64') diff --git a/arch/arm64/include/asm/memory.h b/arch/arm64/include/asm/memory.h index 32f82723338a..77d55dcfb86c 100644 --- a/arch/arm64/include/asm/memory.h +++ b/arch/arm64/include/asm/memory.h @@ -25,6 +25,7 @@ #include #include #include +#include #include /* diff --git a/arch/arm64/include/asm/page-def.h b/arch/arm64/include/asm/page-def.h new file mode 100644 index 000000000000..01591a29dc2e --- /dev/null +++ b/arch/arm64/include/asm/page-def.h @@ -0,0 +1,34 @@ +/* + * Based on arch/arm/include/asm/page.h + * + * Copyright (C) 1995-2003 Russell King + * Copyright (C) 2017 ARM Ltd. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + */ +#ifndef __ASM_PAGE_DEF_H +#define __ASM_PAGE_DEF_H + +#include + +/* PAGE_SHIFT determines the page size */ +/* CONT_SHIFT determines the number of pages which can be tracked together */ +#define PAGE_SHIFT CONFIG_ARM64_PAGE_SHIFT +#define CONT_SHIFT CONFIG_ARM64_CONT_SHIFT +#define PAGE_SIZE (_AC(1, UL) << PAGE_SHIFT) +#define PAGE_MASK (~(PAGE_SIZE-1)) + +#define CONT_SIZE (_AC(1, UL) << (CONT_SHIFT + PAGE_SHIFT)) +#define CONT_MASK (~(CONT_SIZE-1)) + +#endif /* __ASM_PAGE_DEF_H */ diff --git a/arch/arm64/include/asm/page.h b/arch/arm64/include/asm/page.h index 8472c6def5ef..60d02c81a3a2 100644 --- a/arch/arm64/include/asm/page.h +++ b/arch/arm64/include/asm/page.h @@ -19,17 +19,7 @@ #ifndef __ASM_PAGE_H #define __ASM_PAGE_H -#include - -/* PAGE_SHIFT determines the page size */ -/* CONT_SHIFT determines the number of pages which can be tracked together */ -#define PAGE_SHIFT CONFIG_ARM64_PAGE_SHIFT -#define CONT_SHIFT CONFIG_ARM64_CONT_SHIFT -#define PAGE_SIZE (_AC(1, UL) << PAGE_SHIFT) -#define PAGE_MASK (~(PAGE_SIZE-1)) - -#define CONT_SIZE (_AC(1, UL) << (CONT_SHIFT + PAGE_SHIFT)) -#define CONT_MASK (~(CONT_SIZE-1)) +#include #ifndef __ASSEMBLY__ -- cgit v1.2.3 From dbc9344a68e506f19f80a9affc8fe7023a9cdc4c Mon Sep 17 00:00:00 2001 From: Mark Rutland Date: Fri, 14 Jul 2017 16:39:21 +0100 Subject: arm64: clean up THREAD_* definitions Currently we define THREAD_SIZE and THREAD_SIZE_ORDER separately, with the latter dependent on particular CONFIG_ARM64_*K_PAGES definitions. This is somewhat opaque, and will get in the way of future modifications to THREAD_SIZE. This patch cleans this up, defining both in terms of a common THREAD_SHIFT, and using PAGE_SHIFT to calculate THREAD_SIZE_ORDER, rather than using a number of definitions dependent on config symbols. Subsequent patches will make use of this to alter the stack size used in some configurations. At the same time, these are moved into , which will avoid circular include issues in subsequent patches. To ensure that existing code isn't adversely affected, is updated to transitively include these definitions. Signed-off-by: Mark Rutland Reviewed-by: Will Deacon Tested-by: Laura Abbott Cc: Ard Biesheuvel Cc: Catalin Marinas Cc: James Morse --- arch/arm64/include/asm/memory.h | 8 ++++++++ arch/arm64/include/asm/thread_info.h | 9 +-------- 2 files changed, 9 insertions(+), 8 deletions(-) (limited to 'arch/arm64') diff --git a/arch/arm64/include/asm/memory.h b/arch/arm64/include/asm/memory.h index 77d55dcfb86c..8ab4774e2616 100644 --- a/arch/arm64/include/asm/memory.h +++ b/arch/arm64/include/asm/memory.h @@ -102,6 +102,14 @@ #define KASAN_SHADOW_SIZE (0) #endif +#define THREAD_SHIFT 14 + +#if THREAD_SHIFT >= PAGE_SHIFT +#define THREAD_SIZE_ORDER (THREAD_SHIFT - PAGE_SHIFT) +#endif + +#define THREAD_SIZE (UL(1) << THREAD_SHIFT) + /* * Memory types available. */ diff --git a/arch/arm64/include/asm/thread_info.h b/arch/arm64/include/asm/thread_info.h index b29ab0e12e60..aa04b733b349 100644 --- a/arch/arm64/include/asm/thread_info.h +++ b/arch/arm64/include/asm/thread_info.h @@ -23,18 +23,11 @@ #include -#ifdef CONFIG_ARM64_4K_PAGES -#define THREAD_SIZE_ORDER 2 -#elif defined(CONFIG_ARM64_16K_PAGES) -#define THREAD_SIZE_ORDER 0 -#endif - -#define THREAD_SIZE 16384 - #ifndef __ASSEMBLY__ struct task_struct; +#include #include #include -- cgit v1.2.3 From f60ad4edcf07238a3d2646d65d8d217032452550 Mon Sep 17 00:00:00 2001 From: Mark Rutland Date: Thu, 20 Jul 2017 12:26:48 +0100 Subject: arm64: clean up irq stack definitions Before we add yet another stack to the kernel, it would be nice to ensure that we consistently organise stack definitions and related helper functions. This patch moves the basic IRQ stack defintions to to live with their task stack counterparts. Helpers used for unwinding are moved into , where subsequent patches will add helpers for other stacks. Includes are fixed up accordingly. This patch is a pure refactoring -- there should be no functional changes as a result of this patch. Signed-off-by: Mark Rutland Reviewed-by: Will Deacon Tested-by: Laura Abbott Cc: Ard Biesheuvel Cc: Catalin Marinas Cc: James Morse --- arch/arm64/include/asm/irq.h | 24 ------------------------ arch/arm64/include/asm/memory.h | 2 ++ arch/arm64/include/asm/stacktrace.h | 25 ++++++++++++++++++++++++- arch/arm64/kernel/ptrace.c | 1 + 4 files changed, 27 insertions(+), 25 deletions(-) (limited to 'arch/arm64') diff --git a/arch/arm64/include/asm/irq.h b/arch/arm64/include/asm/irq.h index 1ebe202b1a24..5e6f77239064 100644 --- a/arch/arm64/include/asm/irq.h +++ b/arch/arm64/include/asm/irq.h @@ -1,20 +1,12 @@ #ifndef __ASM_IRQ_H #define __ASM_IRQ_H -#define IRQ_STACK_SIZE THREAD_SIZE - #ifndef __ASSEMBLER__ -#include -#include - #include -#include struct pt_regs; -DECLARE_PER_CPU(unsigned long [IRQ_STACK_SIZE/sizeof(long)], irq_stack); - extern void set_handle_irq(void (*handle_irq)(struct pt_regs *)); static inline int nr_legacy_irqs(void) @@ -22,21 +14,5 @@ static inline int nr_legacy_irqs(void) return 0; } -static inline bool on_irq_stack(unsigned long sp) -{ - unsigned long low = (unsigned long)raw_cpu_ptr(irq_stack); - unsigned long high = low + IRQ_STACK_SIZE; - - return (low <= sp && sp < high); -} - -static inline bool on_task_stack(struct task_struct *tsk, unsigned long sp) -{ - unsigned long low = (unsigned long)task_stack_page(tsk); - unsigned long high = low + THREAD_SIZE; - - return (low <= sp && sp < high); -} - #endif /* !__ASSEMBLER__ */ #endif diff --git a/arch/arm64/include/asm/memory.h b/arch/arm64/include/asm/memory.h index 8ab4774e2616..1fc24532987e 100644 --- a/arch/arm64/include/asm/memory.h +++ b/arch/arm64/include/asm/memory.h @@ -110,6 +110,8 @@ #define THREAD_SIZE (UL(1) << THREAD_SHIFT) +#define IRQ_STACK_SIZE THREAD_SIZE + /* * Memory types available. */ diff --git a/arch/arm64/include/asm/stacktrace.h b/arch/arm64/include/asm/stacktrace.h index 3bebab378c72..000e24182a5c 100644 --- a/arch/arm64/include/asm/stacktrace.h +++ b/arch/arm64/include/asm/stacktrace.h @@ -16,7 +16,12 @@ #ifndef __ASM_STACKTRACE_H #define __ASM_STACKTRACE_H -struct task_struct; +#include +#include +#include + +#include +#include struct stackframe { unsigned long fp; @@ -31,4 +36,22 @@ extern void walk_stackframe(struct task_struct *tsk, struct stackframe *frame, int (*fn)(struct stackframe *, void *), void *data); extern void dump_backtrace(struct pt_regs *regs, struct task_struct *tsk); +DECLARE_PER_CPU(unsigned long [IRQ_STACK_SIZE/sizeof(long)], irq_stack); + +static inline bool on_irq_stack(unsigned long sp) +{ + unsigned long low = (unsigned long)raw_cpu_ptr(irq_stack); + unsigned long high = low + IRQ_STACK_SIZE; + + return (low <= sp && sp < high); +} + +static inline bool on_task_stack(struct task_struct *tsk, unsigned long sp) +{ + unsigned long low = (unsigned long)task_stack_page(tsk); + unsigned long high = low + THREAD_SIZE; + + return (low <= sp && sp < high); +} + #endif /* __ASM_STACKTRACE_H */ diff --git a/arch/arm64/kernel/ptrace.c b/arch/arm64/kernel/ptrace.c index baf0838205c7..a9f87157c371 100644 --- a/arch/arm64/kernel/ptrace.c +++ b/arch/arm64/kernel/ptrace.c @@ -42,6 +42,7 @@ #include #include #include +#include #include #include #include -- cgit v1.2.3 From 8018ba4edfd3a8b46f876c65988bd0d8e35c32a6 Mon Sep 17 00:00:00 2001 From: Mark Rutland Date: Fri, 14 Jul 2017 15:38:43 +0100 Subject: arm64: move SEGMENT_ALIGN to Currently we define SEGMENT_ALIGN directly in our vmlinux.lds.S. This is unfortunate, as the EFI stub currently open-codes the same number, and in future we'll want to fiddle with this. This patch moves the definition to our , where it can be used by both vmlinux.lds.S and the EFI stub code. Signed-off-by: Mark Rutland Reviewed-by: Will Deacon Tested-by: Laura Abbott Cc: Ard Biesheuvel Cc: Catalin Marinas Cc: James Morse --- arch/arm64/include/asm/memory.h | 19 +++++++++++++++++++ arch/arm64/kernel/vmlinux.lds.S | 16 ---------------- 2 files changed, 19 insertions(+), 16 deletions(-) (limited to 'arch/arm64') diff --git a/arch/arm64/include/asm/memory.h b/arch/arm64/include/asm/memory.h index 1fc24532987e..7fa6ad48d574 100644 --- a/arch/arm64/include/asm/memory.h +++ b/arch/arm64/include/asm/memory.h @@ -112,6 +112,25 @@ #define IRQ_STACK_SIZE THREAD_SIZE +/* + * Alignment of kernel segments (e.g. .text, .data). + */ +#if defined(CONFIG_DEBUG_ALIGN_RODATA) +/* + * 4 KB granule: 1 level 2 entry + * 16 KB granule: 128 level 3 entries, with contiguous bit + * 64 KB granule: 32 level 3 entries, with contiguous bit + */ +#define SEGMENT_ALIGN SZ_2M +#else +/* + * 4 KB granule: 16 level 3 entries, with contiguous bit + * 16 KB granule: 4 level 3 entries, without contiguous bit + * 64 KB granule: 1 level 3 entry + */ +#define SEGMENT_ALIGN SZ_64K +#endif + /* * Memory types available. */ diff --git a/arch/arm64/kernel/vmlinux.lds.S b/arch/arm64/kernel/vmlinux.lds.S index 987a00ee446c..71565386d063 100644 --- a/arch/arm64/kernel/vmlinux.lds.S +++ b/arch/arm64/kernel/vmlinux.lds.S @@ -72,22 +72,6 @@ PECOFF_FILE_ALIGNMENT = 0x200; #define PECOFF_EDATA_PADDING #endif -#if defined(CONFIG_DEBUG_ALIGN_RODATA) -/* - * 4 KB granule: 1 level 2 entry - * 16 KB granule: 128 level 3 entries, with contiguous bit - * 64 KB granule: 32 level 3 entries, with contiguous bit - */ -#define SEGMENT_ALIGN SZ_2M -#else -/* - * 4 KB granule: 16 level 3 entries, with contiguous bit - * 16 KB granule: 4 level 3 entries, without contiguous bit - * 64 KB granule: 1 level 3 entry - */ -#define SEGMENT_ALIGN SZ_64K -#endif - SECTIONS { /* -- cgit v1.2.3 From 170976bcab073870af059b5e848c80689bd5e931 Mon Sep 17 00:00:00 2001 From: Mark Rutland Date: Fri, 14 Jul 2017 15:54:36 +0100 Subject: efi/arm64: add EFI_KIMG_ALIGN The EFI stub is intimately coupled with the kernel, and takes advantage of this by relocating the kernel at a weaker alignment than the documented boot protocol mandates. However, it does so by assuming it can align the kernel to the segment alignment, and assumes that this is 64K. In subsequent patches, we'll have to consider other details to determine this de-facto alignment constraint. This patch adds a new EFI_KIMG_ALIGN definition that will track the kernel's de-facto alignment requirements. Subsequent patches will modify this as required. Signed-off-by: Mark Rutland Reviewed-by: Will Deacon Tested-by: Laura Abbott Cc: Ard Biesheuvel Cc: Catalin Marinas Cc: James Morse Cc: Matt Fleming --- arch/arm64/include/asm/efi.h | 3 +++ drivers/firmware/efi/libstub/arm64-stub.c | 6 ++++-- 2 files changed, 7 insertions(+), 2 deletions(-) (limited to 'arch/arm64') diff --git a/arch/arm64/include/asm/efi.h b/arch/arm64/include/asm/efi.h index 8f3043aba873..0e8cc3b85bb8 100644 --- a/arch/arm64/include/asm/efi.h +++ b/arch/arm64/include/asm/efi.h @@ -4,6 +4,7 @@ #include #include #include +#include #include #include #include @@ -48,6 +49,8 @@ int efi_set_mapping_permissions(struct mm_struct *mm, efi_memory_desc_t *md); */ #define EFI_FDT_ALIGN SZ_2M /* used by allocate_new_fdt_and_exit_boot() */ +#define EFI_KIMG_ALIGN SEGMENT_ALIGN + /* on arm64, the FDT may be located anywhere in system RAM */ static inline unsigned long efi_get_max_fdt_addr(unsigned long dram_base) { diff --git a/drivers/firmware/efi/libstub/arm64-stub.c b/drivers/firmware/efi/libstub/arm64-stub.c index b4c2589d7c91..af6ae95a5e34 100644 --- a/drivers/firmware/efi/libstub/arm64-stub.c +++ b/drivers/firmware/efi/libstub/arm64-stub.c @@ -11,6 +11,7 @@ */ #include #include +#include #include #include @@ -81,9 +82,10 @@ efi_status_t handle_kernel_image(efi_system_table_t *sys_table_arg, /* * If CONFIG_DEBUG_ALIGN_RODATA is not set, produce a * displacement in the interval [0, MIN_KIMG_ALIGN) that - * is a multiple of the minimal segment alignment (SZ_64K) + * doesn't violate this kernel's de-facto alignment + * constraints. */ - u32 mask = (MIN_KIMG_ALIGN - 1) & ~(SZ_64K - 1); + u32 mask = (MIN_KIMG_ALIGN - 1) & ~(EFI_KIMG_ALIGN - 1); u32 offset = !IS_ENABLED(CONFIG_DEBUG_ALIGN_RODATA) ? (phys_seed >> 32) & mask : TEXT_OFFSET; -- cgit v1.2.3 From b11e5759bfac0c474d95ec4780b1566350e64cad Mon Sep 17 00:00:00 2001 From: Mark Rutland Date: Wed, 19 Jul 2017 17:24:49 +0100 Subject: arm64: factor out entry stack manipulation In subsequent patches, we will detect stack overflow in our exception entry code, by verifying the SP after it has been decremented to make space for the exception regs. This verification code is small, and we can minimize its impact by placing it directly in the vectors. To avoid redundant modification of the SP, we also need to move the initial decrement of the SP into the vectors. As a preparatory step, this patch introduces kernel_ventry, which performs this decrement, and updates the entry code accordingly. Subsequent patches will fold SP verification into kernel_ventry. There should be no functional change as a result of this patch. Signed-off-by: Ard Biesheuvel [Mark: turn into prep patch, expand commit msg] Signed-off-by: Mark Rutland Reviewed-by: Will Deacon Tested-by: Laura Abbott Cc: Catalin Marinas Cc: James Morse --- arch/arm64/kernel/entry.S | 47 ++++++++++++++++++++++++++--------------------- 1 file changed, 26 insertions(+), 21 deletions(-) (limited to 'arch/arm64') diff --git a/arch/arm64/kernel/entry.S b/arch/arm64/kernel/entry.S index f31c7b26a686..58eba94279c5 100644 --- a/arch/arm64/kernel/entry.S +++ b/arch/arm64/kernel/entry.S @@ -69,8 +69,13 @@ #define BAD_FIQ 2 #define BAD_ERROR 3 - .macro kernel_entry, el, regsize = 64 + .macro kernel_ventry label + .align 7 sub sp, sp, #S_FRAME_SIZE + b \label + .endm + + .macro kernel_entry, el, regsize = 64 .if \regsize == 32 mov w0, w0 // zero upper 32 bits of x0 .endif @@ -319,31 +324,31 @@ tsk .req x28 // current thread_info .align 11 ENTRY(vectors) - ventry el1_sync_invalid // Synchronous EL1t - ventry el1_irq_invalid // IRQ EL1t - ventry el1_fiq_invalid // FIQ EL1t - ventry el1_error_invalid // Error EL1t + kernel_ventry el1_sync_invalid // Synchronous EL1t + kernel_ventry el1_irq_invalid // IRQ EL1t + kernel_ventry el1_fiq_invalid // FIQ EL1t + kernel_ventry el1_error_invalid // Error EL1t - ventry el1_sync // Synchronous EL1h - ventry el1_irq // IRQ EL1h - ventry el1_fiq_invalid // FIQ EL1h - ventry el1_error_invalid // Error EL1h + kernel_ventry el1_sync // Synchronous EL1h + kernel_ventry el1_irq // IRQ EL1h + kernel_ventry el1_fiq_invalid // FIQ EL1h + kernel_ventry el1_error_invalid // Error EL1h - ventry el0_sync // Synchronous 64-bit EL0 - ventry el0_irq // IRQ 64-bit EL0 - ventry el0_fiq_invalid // FIQ 64-bit EL0 - ventry el0_error_invalid // Error 64-bit EL0 + kernel_ventry el0_sync // Synchronous 64-bit EL0 + kernel_ventry el0_irq // IRQ 64-bit EL0 + kernel_ventry el0_fiq_invalid // FIQ 64-bit EL0 + kernel_ventry el0_error_invalid // Error 64-bit EL0 #ifdef CONFIG_COMPAT - ventry el0_sync_compat // Synchronous 32-bit EL0 - ventry el0_irq_compat // IRQ 32-bit EL0 - ventry el0_fiq_invalid_compat // FIQ 32-bit EL0 - ventry el0_error_invalid_compat // Error 32-bit EL0 + kernel_ventry el0_sync_compat // Synchronous 32-bit EL0 + kernel_ventry el0_irq_compat // IRQ 32-bit EL0 + kernel_ventry el0_fiq_invalid_compat // FIQ 32-bit EL0 + kernel_ventry el0_error_invalid_compat // Error 32-bit EL0 #else - ventry el0_sync_invalid // Synchronous 32-bit EL0 - ventry el0_irq_invalid // IRQ 32-bit EL0 - ventry el0_fiq_invalid // FIQ 32-bit EL0 - ventry el0_error_invalid // Error 32-bit EL0 + kernel_ventry el0_sync_invalid // Synchronous 32-bit EL0 + kernel_ventry el0_irq_invalid // IRQ 32-bit EL0 + kernel_ventry el0_fiq_invalid // FIQ 32-bit EL0 + kernel_ventry el0_error_invalid // Error 32-bit EL0 #endif END(vectors) -- cgit v1.2.3 From 8ea41b11ef746e1ac97f8c90911e5c61f8bd5cc0 Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Sat, 15 Jul 2017 17:23:13 +0100 Subject: arm64: assembler: allow adr_this_cpu to use the stack pointer Given that adr_this_cpu already requires a temp register in addition to the destination register, tweak the instruction sequence so that sp may be used as well. This will simplify switching to per-cpu stacks in subsequent patches. While this limits the range of adr_this_cpu, to +/-4GiB, we don't currently use adr_this_cpu in modules, and this is not problematic for the main kernel image. Signed-off-by: Ard Biesheuvel [Mark: add more commit text] Signed-off-by: Mark Rutland Reviewed-by: Will Deacon Tested-by: Laura Abbott Cc: Catalin Marinas Cc: James Morse --- arch/arm64/include/asm/assembler.h | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) (limited to 'arch/arm64') diff --git a/arch/arm64/include/asm/assembler.h b/arch/arm64/include/asm/assembler.h index 610a42018241..2f2bd5192b5e 100644 --- a/arch/arm64/include/asm/assembler.h +++ b/arch/arm64/include/asm/assembler.h @@ -230,12 +230,18 @@ lr .req x30 // link register .endm /* - * @dst: Result of per_cpu(sym, smp_processor_id()) + * @dst: Result of per_cpu(sym, smp_processor_id()), can be SP for + * non-module code * @sym: The name of the per-cpu variable * @tmp: scratch register */ .macro adr_this_cpu, dst, sym, tmp +#ifndef MODULE + adrp \tmp, \sym + add \dst, \tmp, #:lo12:\sym +#else adr_l \dst, \sym +#endif mrs \tmp, tpidr_el1 add \dst, \dst, \tmp .endm -- cgit v1.2.3 From f60fe78f133243e6de0f05fdefc3ed2f3c5085ca Mon Sep 17 00:00:00 2001 From: Mark Rutland Date: Mon, 31 Jul 2017 21:17:03 +0100 Subject: arm64: use an irq stack pointer We allocate our IRQ stacks using a percpu array. This allows us to generate our IRQ stack pointers with adr_this_cpu, but bloats the kernel Image with the boot CPU's IRQ stack. Additionally, these are packed with other percpu variables, and aren't guaranteed to have guard pages. When we enable VMAP_STACK we'll want to vmap our IRQ stacks also, in order to provide guard pages and to permit more stringent alignment requirements. Doing so will require that we use a percpu pointer to each IRQ stack, rather than allocating a percpu IRQ stack in the kernel image. This patch updates our IRQ stack code to use a percpu pointer to the base of each IRQ stack. This will allow us to change the way the stack is allocated with minimal changes elsewhere. In some cases we may try to backtrace before the IRQ stack pointers are initialised, so on_irq_stack() is updated to account for this. In testing with cyclictest, there was no measureable difference between using adr_this_cpu (for irq_stack) and ldr_this_cpu (for irq_stack_ptr) in the IRQ entry path. Signed-off-by: Mark Rutland Reviewed-by: Will Deacon Tested-by: Laura Abbott Cc: Ard Biesheuvel Cc: Catalin Marinas Cc: James Morse --- arch/arm64/include/asm/stacktrace.h | 7 +++++-- arch/arm64/kernel/entry.S | 2 +- arch/arm64/kernel/irq.c | 10 ++++++++++ 3 files changed, 16 insertions(+), 3 deletions(-) (limited to 'arch/arm64') diff --git a/arch/arm64/include/asm/stacktrace.h b/arch/arm64/include/asm/stacktrace.h index 000e24182a5c..4c68d8a81988 100644 --- a/arch/arm64/include/asm/stacktrace.h +++ b/arch/arm64/include/asm/stacktrace.h @@ -36,13 +36,16 @@ extern void walk_stackframe(struct task_struct *tsk, struct stackframe *frame, int (*fn)(struct stackframe *, void *), void *data); extern void dump_backtrace(struct pt_regs *regs, struct task_struct *tsk); -DECLARE_PER_CPU(unsigned long [IRQ_STACK_SIZE/sizeof(long)], irq_stack); +DECLARE_PER_CPU(unsigned long *, irq_stack_ptr); static inline bool on_irq_stack(unsigned long sp) { - unsigned long low = (unsigned long)raw_cpu_ptr(irq_stack); + unsigned long low = (unsigned long)raw_cpu_read(irq_stack_ptr); unsigned long high = low + IRQ_STACK_SIZE; + if (!low) + return false; + return (low <= sp && sp < high); } diff --git a/arch/arm64/kernel/entry.S b/arch/arm64/kernel/entry.S index 58eba94279c5..52348869f82f 100644 --- a/arch/arm64/kernel/entry.S +++ b/arch/arm64/kernel/entry.S @@ -276,7 +276,7 @@ alternative_else_nop_endif and x25, x25, #~(THREAD_SIZE - 1) cbnz x25, 9998f - adr_this_cpu x25, irq_stack, x26 + ldr_this_cpu x25, irq_stack_ptr, x26 mov x26, #IRQ_STACK_SIZE add x26, x25, x26 diff --git a/arch/arm64/kernel/irq.c b/arch/arm64/kernel/irq.c index 2386b26c0712..5141282e47d5 100644 --- a/arch/arm64/kernel/irq.c +++ b/arch/arm64/kernel/irq.c @@ -32,6 +32,7 @@ unsigned long irq_err_count; /* irq stack only needs to be 16 byte aligned - not IRQ_STACK_SIZE aligned. */ DEFINE_PER_CPU(unsigned long [IRQ_STACK_SIZE/sizeof(long)], irq_stack) __aligned(16); +DEFINE_PER_CPU(unsigned long *, irq_stack_ptr); int arch_show_interrupts(struct seq_file *p, int prec) { @@ -50,8 +51,17 @@ void __init set_handle_irq(void (*handle_irq)(struct pt_regs *)) handle_arch_irq = handle_irq; } +static void init_irq_stacks(void) +{ + int cpu; + + for_each_possible_cpu(cpu) + per_cpu(irq_stack_ptr, cpu) = per_cpu(irq_stack, cpu); +} + void __init init_IRQ(void) { + init_irq_stacks(); irqchip_init(); if (!handle_arch_irq) panic("No interrupt controller found."); -- cgit v1.2.3 From e3067861ba6650a566a6273738c23c956ad55c02 Mon Sep 17 00:00:00 2001 From: Mark Rutland Date: Fri, 21 Jul 2017 14:25:33 +0100 Subject: arm64: add basic VMAP_STACK support This patch enables arm64 to be built with vmap'd task and IRQ stacks. As vmap'd stacks are mapped at page granularity, stacks must be a multiple of PAGE_SIZE. This means that a 64K page kernel must use stacks of at least 64K in size. To minimize the increase in Image size, IRQ stacks are dynamically allocated at boot time, rather than embedding the boot CPU's IRQ stack in the kernel image. This patch was co-authored by Ard Biesheuvel and Mark Rutland. Signed-off-by: Ard Biesheuvel Signed-off-by: Mark Rutland Reviewed-by: Will Deacon Tested-by: Laura Abbott Cc: Catalin Marinas Cc: James Morse --- arch/arm64/Kconfig | 1 + arch/arm64/include/asm/efi.h | 7 ++++++- arch/arm64/include/asm/memory.h | 23 ++++++++++++++++++++++- arch/arm64/kernel/irq.c | 30 ++++++++++++++++++++++++++++-- arch/arm64/kernel/vmlinux.lds.S | 2 +- 5 files changed, 58 insertions(+), 5 deletions(-) (limited to 'arch/arm64') diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig index dfd908630631..d66f9db3e6db 100644 --- a/arch/arm64/Kconfig +++ b/arch/arm64/Kconfig @@ -75,6 +75,7 @@ config ARM64 select HAVE_ARCH_SECCOMP_FILTER select HAVE_ARCH_TRACEHOOK select HAVE_ARCH_TRANSPARENT_HUGEPAGE + select HAVE_ARCH_VMAP_STACK select HAVE_ARM_SMCCC select HAVE_EBPF_JIT select HAVE_C_RECORDMCOUNT diff --git a/arch/arm64/include/asm/efi.h b/arch/arm64/include/asm/efi.h index 0e8cc3b85bb8..2b1e5def2e49 100644 --- a/arch/arm64/include/asm/efi.h +++ b/arch/arm64/include/asm/efi.h @@ -49,7 +49,12 @@ int efi_set_mapping_permissions(struct mm_struct *mm, efi_memory_desc_t *md); */ #define EFI_FDT_ALIGN SZ_2M /* used by allocate_new_fdt_and_exit_boot() */ -#define EFI_KIMG_ALIGN SEGMENT_ALIGN +/* + * In some configurations (e.g. VMAP_STACK && 64K pages), stacks built into the + * kernel need greater alignment than we require the segments to be padded to. + */ +#define EFI_KIMG_ALIGN \ + (SEGMENT_ALIGN > THREAD_ALIGN ? SEGMENT_ALIGN : THREAD_ALIGN) /* on arm64, the FDT may be located anywhere in system RAM */ static inline unsigned long efi_get_max_fdt_addr(unsigned long dram_base) diff --git a/arch/arm64/include/asm/memory.h b/arch/arm64/include/asm/memory.h index 7fa6ad48d574..c5cd2c599b24 100644 --- a/arch/arm64/include/asm/memory.h +++ b/arch/arm64/include/asm/memory.h @@ -102,7 +102,17 @@ #define KASAN_SHADOW_SIZE (0) #endif -#define THREAD_SHIFT 14 +#define MIN_THREAD_SHIFT 14 + +/* + * VMAP'd stacks are allocated at page granularity, so we must ensure that such + * stacks are a multiple of page size. + */ +#if defined(CONFIG_VMAP_STACK) && (MIN_THREAD_SHIFT < PAGE_SHIFT) +#define THREAD_SHIFT PAGE_SHIFT +#else +#define THREAD_SHIFT MIN_THREAD_SHIFT +#endif #if THREAD_SHIFT >= PAGE_SHIFT #define THREAD_SIZE_ORDER (THREAD_SHIFT - PAGE_SHIFT) @@ -110,6 +120,17 @@ #define THREAD_SIZE (UL(1) << THREAD_SHIFT) +/* + * By aligning VMAP'd stacks to 2 * THREAD_SIZE, we can detect overflow by + * checking sp & (1 << THREAD_SHIFT), which we can do cheaply in the entry + * assembly. + */ +#ifdef CONFIG_VMAP_STACK +#define THREAD_ALIGN (2 * THREAD_SIZE) +#else +#define THREAD_ALIGN THREAD_SIZE +#endif + #define IRQ_STACK_SIZE THREAD_SIZE /* diff --git a/arch/arm64/kernel/irq.c b/arch/arm64/kernel/irq.c index 5141282e47d5..713561e5bcab 100644 --- a/arch/arm64/kernel/irq.c +++ b/arch/arm64/kernel/irq.c @@ -23,15 +23,15 @@ #include #include +#include #include #include #include #include +#include unsigned long irq_err_count; -/* irq stack only needs to be 16 byte aligned - not IRQ_STACK_SIZE aligned. */ -DEFINE_PER_CPU(unsigned long [IRQ_STACK_SIZE/sizeof(long)], irq_stack) __aligned(16); DEFINE_PER_CPU(unsigned long *, irq_stack_ptr); int arch_show_interrupts(struct seq_file *p, int prec) @@ -51,6 +51,31 @@ void __init set_handle_irq(void (*handle_irq)(struct pt_regs *)) handle_arch_irq = handle_irq; } +#ifdef CONFIG_VMAP_STACK +static void init_irq_stacks(void) +{ + int cpu; + unsigned long *p; + + for_each_possible_cpu(cpu) { + /* + * To ensure that VMAP'd stack overflow detection works + * correctly, the IRQ stacks need to have the same + * alignment as other stacks. + */ + p = __vmalloc_node_range(IRQ_STACK_SIZE, THREAD_ALIGN, + VMALLOC_START, VMALLOC_END, + THREADINFO_GFP, PAGE_KERNEL, + 0, cpu_to_node(cpu), + __builtin_return_address(0)); + + per_cpu(irq_stack_ptr, cpu) = p; + } +} +#else +/* irq stack only needs to be 16 byte aligned - not IRQ_STACK_SIZE aligned. */ +DEFINE_PER_CPU_ALIGNED(unsigned long [IRQ_STACK_SIZE/sizeof(long)], irq_stack); + static void init_irq_stacks(void) { int cpu; @@ -58,6 +83,7 @@ static void init_irq_stacks(void) for_each_possible_cpu(cpu) per_cpu(irq_stack_ptr, cpu) = per_cpu(irq_stack, cpu); } +#endif void __init init_IRQ(void) { diff --git a/arch/arm64/kernel/vmlinux.lds.S b/arch/arm64/kernel/vmlinux.lds.S index 71565386d063..fe56c268a7d9 100644 --- a/arch/arm64/kernel/vmlinux.lds.S +++ b/arch/arm64/kernel/vmlinux.lds.S @@ -176,7 +176,7 @@ SECTIONS _data = .; _sdata = .; - RW_DATA_SECTION(L1_CACHE_BYTES, PAGE_SIZE, THREAD_SIZE) + RW_DATA_SECTION(L1_CACHE_BYTES, PAGE_SIZE, THREAD_ALIGN) /* * Data written with the MMU off but read with the MMU on requires -- cgit v1.2.3 From 12964443e8d1914010f9269f9f9abc4e122bc6ca Mon Sep 17 00:00:00 2001 From: Mark Rutland Date: Tue, 1 Aug 2017 18:51:15 +0100 Subject: arm64: add on_accessible_stack() Both unwind_frame() and dump_backtrace() try to check whether a stack address is sane to access, with very similar logic. Both will need updating in order to handle overflow stacks. Factor out this logic into a helper, so that we can avoid further duplication when we add overflow stacks. Signed-off-by: Mark Rutland Reviewed-by: Will Deacon Tested-by: Laura Abbott Cc: Ard Biesheuvel Cc: Catalin Marinas Cc: James Morse --- arch/arm64/include/asm/stacktrace.h | 16 ++++++++++++++++ arch/arm64/kernel/stacktrace.c | 7 +------ arch/arm64/kernel/traps.c | 3 +-- 3 files changed, 18 insertions(+), 8 deletions(-) (limited to 'arch/arm64') diff --git a/arch/arm64/include/asm/stacktrace.h b/arch/arm64/include/asm/stacktrace.h index 4c68d8a81988..92ddb6d25cf3 100644 --- a/arch/arm64/include/asm/stacktrace.h +++ b/arch/arm64/include/asm/stacktrace.h @@ -57,4 +57,20 @@ static inline bool on_task_stack(struct task_struct *tsk, unsigned long sp) return (low <= sp && sp < high); } +/* + * We can only safely access per-cpu stacks from current in a non-preemptible + * context. + */ +static inline bool on_accessible_stack(struct task_struct *tsk, unsigned long sp) +{ + if (on_task_stack(tsk, sp)) + return true; + if (tsk != current || preemptible()) + return false; + if (on_irq_stack(sp)) + return true; + + return false; +} + #endif /* __ASM_STACKTRACE_H */ diff --git a/arch/arm64/kernel/stacktrace.c b/arch/arm64/kernel/stacktrace.c index 35588caad9d0..3144584617e7 100644 --- a/arch/arm64/kernel/stacktrace.c +++ b/arch/arm64/kernel/stacktrace.c @@ -50,12 +50,7 @@ int notrace unwind_frame(struct task_struct *tsk, struct stackframe *frame) if (!tsk) tsk = current; - /* - * Switching between stacks is valid when tracing current and in - * non-preemptible context. - */ - if (!(tsk == current && !preemptible() && on_irq_stack(fp)) && - !on_task_stack(tsk, fp)) + if (!on_accessible_stack(tsk, fp)) return -EINVAL; frame->fp = READ_ONCE_NOCHECK(*(unsigned long *)(fp)); diff --git a/arch/arm64/kernel/traps.c b/arch/arm64/kernel/traps.c index 9633773ca42c..d01c5988354b 100644 --- a/arch/arm64/kernel/traps.c +++ b/arch/arm64/kernel/traps.c @@ -193,8 +193,7 @@ void dump_backtrace(struct pt_regs *regs, struct task_struct *tsk) if (in_entry_text(frame.pc)) { stack = frame.fp - offsetof(struct pt_regs, stackframe); - if (on_task_stack(tsk, stack) || - (tsk == current && !preemptible() && on_irq_stack(stack))) + if (on_accessible_stack(tsk, stack)) dump_mem("", "Exception stack", stack, stack + sizeof(struct pt_regs)); } -- cgit v1.2.3 From 872d8327ce8982883b8237b2c320c8666f14e561 Mon Sep 17 00:00:00 2001 From: Mark Rutland Date: Fri, 14 Jul 2017 20:30:35 +0100 Subject: arm64: add VMAP_STACK overflow detection This patch adds stack overflow detection to arm64, usable when vmap'd stacks are in use. Overflow is detected in a small preamble executed for each exception entry, which checks whether there is enough space on the current stack for the general purpose registers to be saved. If there is not enough space, the overflow handler is invoked on a per-cpu overflow stack. This approach preserves the original exception information in ESR_EL1 (and where appropriate, FAR_EL1). Task and IRQ stacks are aligned to double their size, enabling overflow to be detected with a single bit test. For example, a 16K stack is aligned to 32K, ensuring that bit 14 of the SP must be zero. On an overflow (or underflow), this bit is flipped. Thus, overflow (of less than the size of the stack) can be detected by testing whether this bit is set. The overflow check is performed before any attempt is made to access the stack, avoiding recursive faults (and the loss of exception information these would entail). As logical operations cannot be performed on the SP directly, the SP is temporarily swapped with a general purpose register using arithmetic operations to enable the test to be performed. This gives us a useful error message on stack overflow, as can be trigger with the LKDTM overflow test: [ 305.388749] lkdtm: Performing direct entry OVERFLOW [ 305.395444] Insufficient stack space to handle exception! [ 305.395482] ESR: 0x96000047 -- DABT (current EL) [ 305.399890] FAR: 0xffff00000a5e7f30 [ 305.401315] Task stack: [0xffff00000a5e8000..0xffff00000a5ec000] [ 305.403815] IRQ stack: [0xffff000008000000..0xffff000008004000] [ 305.407035] Overflow stack: [0xffff80003efce4e0..0xffff80003efcf4e0] [ 305.409622] CPU: 0 PID: 1219 Comm: sh Not tainted 4.13.0-rc3-00021-g9636aea #5 [ 305.412785] Hardware name: linux,dummy-virt (DT) [ 305.415756] task: ffff80003d051c00 task.stack: ffff00000a5e8000 [ 305.419221] PC is at recursive_loop+0x10/0x48 [ 305.421637] LR is at recursive_loop+0x38/0x48 [ 305.423768] pc : [] lr : [] pstate: 40000145 [ 305.428020] sp : ffff00000a5e7f50 [ 305.430469] x29: ffff00000a5e8350 x28: ffff80003d051c00 [ 305.433191] x27: ffff000008981000 x26: ffff000008f80400 [ 305.439012] x25: ffff00000a5ebeb8 x24: ffff00000a5ebeb8 [ 305.440369] x23: ffff000008f80138 x22: 0000000000000009 [ 305.442241] x21: ffff80003ce65000 x20: ffff000008f80188 [ 305.444552] x19: 0000000000000013 x18: 0000000000000006 [ 305.446032] x17: 0000ffffa2601280 x16: ffff0000081fe0b8 [ 305.448252] x15: ffff000008ff546d x14: 000000000047a4c8 [ 305.450246] x13: ffff000008ff7872 x12: 0000000005f5e0ff [ 305.452953] x11: ffff000008ed2548 x10: 000000000005ee8d [ 305.454824] x9 : ffff000008545380 x8 : ffff00000a5e8770 [ 305.457105] x7 : 1313131313131313 x6 : 00000000000000e1 [ 305.459285] x5 : 0000000000000000 x4 : 0000000000000000 [ 305.461781] x3 : 0000000000000000 x2 : 0000000000000400 [ 305.465119] x1 : 0000000000000013 x0 : 0000000000000012 [ 305.467724] Kernel panic - not syncing: kernel stack overflow [ 305.470561] CPU: 0 PID: 1219 Comm: sh Not tainted 4.13.0-rc3-00021-g9636aea #5 [ 305.473325] Hardware name: linux,dummy-virt (DT) [ 305.475070] Call trace: [ 305.476116] [] dump_backtrace+0x0/0x378 [ 305.478991] [] show_stack+0x14/0x20 [ 305.481237] [] dump_stack+0x98/0xb8 [ 305.483294] [] panic+0x118/0x280 [ 305.485673] [] nmi_panic+0x6c/0x70 [ 305.486216] [] handle_bad_stack+0x118/0x128 [ 305.486612] Exception stack(0xffff80003efcf3a0 to 0xffff80003efcf4e0) [ 305.487334] f3a0: 0000000000000012 0000000000000013 0000000000000400 0000000000000000 [ 305.488025] f3c0: 0000000000000000 0000000000000000 00000000000000e1 1313131313131313 [ 305.488908] f3e0: ffff00000a5e8770 ffff000008545380 000000000005ee8d ffff000008ed2548 [ 305.489403] f400: 0000000005f5e0ff ffff000008ff7872 000000000047a4c8 ffff000008ff546d [ 305.489759] f420: ffff0000081fe0b8 0000ffffa2601280 0000000000000006 0000000000000013 [ 305.490256] f440: ffff000008f80188 ffff80003ce65000 0000000000000009 ffff000008f80138 [ 305.490683] f460: ffff00000a5ebeb8 ffff00000a5ebeb8 ffff000008f80400 ffff000008981000 [ 305.491051] f480: ffff80003d051c00 ffff00000a5e8350 ffff00000859f358 ffff00000a5e7f50 [ 305.491444] f4a0: ffff00000859f330 0000000040000145 0000000000000000 0000000000000000 [ 305.492008] f4c0: 0001000000000000 0000000000000000 ffff00000a5e8350 ffff00000859f330 [ 305.493063] [] __bad_stack+0x88/0x8c [ 305.493396] [] recursive_loop+0x10/0x48 [ 305.493731] [] recursive_loop+0x38/0x48 [ 305.494088] [] recursive_loop+0x38/0x48 [ 305.494425] [] recursive_loop+0x38/0x48 [ 305.494649] [] recursive_loop+0x38/0x48 [ 305.494898] [] recursive_loop+0x38/0x48 [ 305.495205] [] recursive_loop+0x38/0x48 [ 305.495453] [] recursive_loop+0x38/0x48 [ 305.495708] [] recursive_loop+0x38/0x48 [ 305.496000] [] recursive_loop+0x38/0x48 [ 305.496302] [] recursive_loop+0x38/0x48 [ 305.496644] [] recursive_loop+0x38/0x48 [ 305.496894] [] recursive_loop+0x38/0x48 [ 305.497138] [] recursive_loop+0x38/0x48 [ 305.497325] [] lkdtm_OVERFLOW+0x14/0x20 [ 305.497506] [] lkdtm_do_action+0x1c/0x28 [ 305.497786] [] direct_entry+0xe0/0x170 [ 305.498095] [] full_proxy_write+0x60/0xa8 [ 305.498387] [] __vfs_write+0x1c/0x128 [ 305.498679] [] vfs_write+0xa0/0x1b0 [ 305.498926] [] SyS_write+0x44/0xa0 [ 305.499182] Exception stack(0xffff00000a5ebec0 to 0xffff00000a5ec000) [ 305.499429] bec0: 0000000000000001 000000001c4cf5e0 0000000000000009 000000001c4cf5e0 [ 305.499674] bee0: 574f4c465245564f 0000000000000000 0000000000000000 8000000080808080 [ 305.499904] bf00: 0000000000000040 0000000000000038 fefefeff1b4bc2ff 7f7f7f7f7f7fff7f [ 305.500189] bf20: 0101010101010101 0000000000000000 000000000047a4c8 0000000000000038 [ 305.500712] bf40: 0000000000000000 0000ffffa2601280 0000ffffc63f6068 00000000004b5000 [ 305.501241] bf60: 0000000000000001 000000001c4cf5e0 0000000000000009 000000001c4cf5e0 [ 305.501791] bf80: 0000000000000020 0000000000000000 00000000004b5000 000000001c4cc458 [ 305.502314] bfa0: 0000000000000000 0000ffffc63f7950 000000000040a3c4 0000ffffc63f70e0 [ 305.502762] bfc0: 0000ffffa2601268 0000000080000000 0000000000000001 0000000000000040 [ 305.503207] bfe0: 0000000000000000 0000000000000000 0000000000000000 0000000000000000 [ 305.503680] [] el0_svc_naked+0x24/0x28 [ 305.504720] Kernel Offset: disabled [ 305.505189] CPU features: 0x002082 [ 305.505473] Memory Limit: none [ 305.506181] ---[ end Kernel panic - not syncing: kernel stack overflow This patch was co-authored by Ard Biesheuvel and Mark Rutland. Signed-off-by: Ard Biesheuvel Signed-off-by: Mark Rutland Reviewed-by: Will Deacon Tested-by: Laura Abbott Cc: Catalin Marinas Cc: James Morse --- arch/arm64/include/asm/memory.h | 2 ++ arch/arm64/include/asm/stacktrace.h | 16 +++++++++ arch/arm64/kernel/entry.S | 70 +++++++++++++++++++++++++++++++++++++ arch/arm64/kernel/traps.c | 39 +++++++++++++++++++++ 4 files changed, 127 insertions(+) (limited to 'arch/arm64') diff --git a/arch/arm64/include/asm/memory.h b/arch/arm64/include/asm/memory.h index c5cd2c599b24..1a025b744107 100644 --- a/arch/arm64/include/asm/memory.h +++ b/arch/arm64/include/asm/memory.h @@ -133,6 +133,8 @@ #define IRQ_STACK_SIZE THREAD_SIZE +#define OVERFLOW_STACK_SIZE SZ_4K + /* * Alignment of kernel segments (e.g. .text, .data). */ diff --git a/arch/arm64/include/asm/stacktrace.h b/arch/arm64/include/asm/stacktrace.h index 92ddb6d25cf3..6ad30776e984 100644 --- a/arch/arm64/include/asm/stacktrace.h +++ b/arch/arm64/include/asm/stacktrace.h @@ -57,6 +57,20 @@ static inline bool on_task_stack(struct task_struct *tsk, unsigned long sp) return (low <= sp && sp < high); } +#ifdef CONFIG_VMAP_STACK +DECLARE_PER_CPU(unsigned long [OVERFLOW_STACK_SIZE/sizeof(long)], overflow_stack); + +static inline bool on_overflow_stack(unsigned long sp) +{ + unsigned long low = (unsigned long)raw_cpu_ptr(overflow_stack); + unsigned long high = low + OVERFLOW_STACK_SIZE; + + return (low <= sp && sp < high); +} +#else +static inline bool on_overflow_stack(unsigned long sp) { return false; } +#endif + /* * We can only safely access per-cpu stacks from current in a non-preemptible * context. @@ -69,6 +83,8 @@ static inline bool on_accessible_stack(struct task_struct *tsk, unsigned long sp return false; if (on_irq_stack(sp)) return true; + if (on_overflow_stack(sp)) + return true; return false; } diff --git a/arch/arm64/kernel/entry.S b/arch/arm64/kernel/entry.S index 52348869f82f..3ef6e2297fb4 100644 --- a/arch/arm64/kernel/entry.S +++ b/arch/arm64/kernel/entry.S @@ -72,6 +72,48 @@ .macro kernel_ventry label .align 7 sub sp, sp, #S_FRAME_SIZE +#ifdef CONFIG_VMAP_STACK + /* + * Test whether the SP has overflowed, without corrupting a GPR. + * Task and IRQ stacks are aligned to (1 << THREAD_SHIFT). + */ + add sp, sp, x0 // sp' = sp + x0 + sub x0, sp, x0 // x0' = sp' - x0 = (sp + x0) - x0 = sp + tbnz x0, #THREAD_SHIFT, 0f + sub x0, sp, x0 // x0'' = sp' - x0' = (sp + x0) - sp = x0 + sub sp, sp, x0 // sp'' = sp' - x0 = (sp + x0) - x0 = sp + b \label + +0: + /* + * Either we've just detected an overflow, or we've taken an exception + * while on the overflow stack. Either way, we won't return to + * userspace, and can clobber EL0 registers to free up GPRs. + */ + + /* Stash the original SP (minus S_FRAME_SIZE) in tpidr_el0. */ + msr tpidr_el0, x0 + + /* Recover the original x0 value and stash it in tpidrro_el0 */ + sub x0, sp, x0 + msr tpidrro_el0, x0 + + /* Switch to the overflow stack */ + adr_this_cpu sp, overflow_stack + OVERFLOW_STACK_SIZE, x0 + + /* + * Check whether we were already on the overflow stack. This may happen + * after panic() re-enables interrupts. + */ + mrs x0, tpidr_el0 // sp of interrupted context + sub x0, sp, x0 // delta with top of overflow stack + tst x0, #~(OVERFLOW_STACK_SIZE - 1) // within range? + b.ne __bad_stack // no? -> bad stack pointer + + /* We were already on the overflow stack. Restore sp/x0 and carry on. */ + sub sp, sp, x0 + mrs x0, tpidrro_el0 +#endif b \label .endm @@ -352,6 +394,34 @@ ENTRY(vectors) #endif END(vectors) +#ifdef CONFIG_VMAP_STACK + /* + * We detected an overflow in kernel_ventry, which switched to the + * overflow stack. Stash the exception regs, and head to our overflow + * handler. + */ +__bad_stack: + /* Restore the original x0 value */ + mrs x0, tpidrro_el0 + + /* + * Store the original GPRs to the new stack. The orginal SP (minus + * S_FRAME_SIZE) was stashed in tpidr_el0 by kernel_ventry. + */ + sub sp, sp, #S_FRAME_SIZE + kernel_entry 1 + mrs x0, tpidr_el0 + add x0, x0, #S_FRAME_SIZE + str x0, [sp, #S_SP] + + /* Stash the regs for handle_bad_stack */ + mov x0, sp + + /* Time to die */ + bl handle_bad_stack + ASM_BUG() +#endif /* CONFIG_VMAP_STACK */ + /* * Invalid mode handlers */ diff --git a/arch/arm64/kernel/traps.c b/arch/arm64/kernel/traps.c index d01c5988354b..2d591804e46f 100644 --- a/arch/arm64/kernel/traps.c +++ b/arch/arm64/kernel/traps.c @@ -32,6 +32,7 @@ #include #include #include +#include #include #include @@ -41,6 +42,7 @@ #include #include #include +#include #include #include #include @@ -666,6 +668,43 @@ asmlinkage void bad_el0_sync(struct pt_regs *regs, int reason, unsigned int esr) force_sig_info(info.si_signo, &info, current); } +#ifdef CONFIG_VMAP_STACK + +DEFINE_PER_CPU(unsigned long [OVERFLOW_STACK_SIZE/sizeof(long)], overflow_stack) + __aligned(16); + +asmlinkage void handle_bad_stack(struct pt_regs *regs) +{ + unsigned long tsk_stk = (unsigned long)current->stack; + unsigned long irq_stk = (unsigned long)this_cpu_read(irq_stack_ptr); + unsigned long ovf_stk = (unsigned long)this_cpu_ptr(overflow_stack); + unsigned int esr = read_sysreg(esr_el1); + unsigned long far = read_sysreg(far_el1); + + console_verbose(); + pr_emerg("Insufficient stack space to handle exception!"); + + pr_emerg("ESR: 0x%08x -- %s\n", esr, esr_get_class_string(esr)); + pr_emerg("FAR: 0x%016lx\n", far); + + pr_emerg("Task stack: [0x%016lx..0x%016lx]\n", + tsk_stk, tsk_stk + THREAD_SIZE); + pr_emerg("IRQ stack: [0x%016lx..0x%016lx]\n", + irq_stk, irq_stk + THREAD_SIZE); + pr_emerg("Overflow stack: [0x%016lx..0x%016lx]\n", + ovf_stk, ovf_stk + OVERFLOW_STACK_SIZE); + + __show_regs(regs); + + /* + * We use nmi_panic to limit the potential for recusive overflows, and + * to get a better stack trace. + */ + nmi_panic(NULL, "kernel stack overflow"); + cpu_park_loop(); +} +#endif + void __pte_error(const char *file, int line, unsigned long val) { pr_err("%s:%d: bad pte %016lx.\n", file, line, val); -- cgit v1.2.3 From 3b66023d574fee8a481f8e4e1b5bd15583a3b5bf Mon Sep 17 00:00:00 2001 From: Dave Martin Date: Fri, 18 Aug 2017 14:53:47 +0100 Subject: arm64: neon/efi: Make EFI fpsimd save/restore variables static The percpu variables efi_fpsimd_state and efi_fpsimd_state_used, used by the FPSIMD save/restore routines for EFI calls, are unintentionally global. There's no reason for anything outside fpsimd.c to touch these, so this patch makes them static (as they should have been in the first place). Signed-off-by: Dave Martin Signed-off-by: Catalin Marinas --- arch/arm64/kernel/fpsimd.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'arch/arm64') diff --git a/arch/arm64/kernel/fpsimd.c b/arch/arm64/kernel/fpsimd.c index 9da4e636b328..3a68cf38a6b3 100644 --- a/arch/arm64/kernel/fpsimd.c +++ b/arch/arm64/kernel/fpsimd.c @@ -321,8 +321,8 @@ void kernel_neon_end(void) } EXPORT_SYMBOL(kernel_neon_end); -DEFINE_PER_CPU(struct fpsimd_state, efi_fpsimd_state); -DEFINE_PER_CPU(bool, efi_fpsimd_state_used); +static DEFINE_PER_CPU(struct fpsimd_state, efi_fpsimd_state); +static DEFINE_PER_CPU(bool, efi_fpsimd_state_used); /* * EFI runtime services support functions -- cgit v1.2.3 From 3bbf7157ac66a88d94b291d4d5e2b2a9319a0f90 Mon Sep 17 00:00:00 2001 From: Catalin Marinas Date: Mon, 26 Jun 2017 14:27:36 +0100 Subject: arm64: Convert pte handling from inline asm to using (cmp)xchg With the support for hardware updates of the access and dirty states, the following pte handling functions had to be implemented using exclusives: __ptep_test_and_clear_young(), ptep_get_and_clear(), ptep_set_wrprotect() and ptep_set_access_flags(). To take advantage of the LSE atomic instructions and also make the code cleaner, convert these pte functions to use the more generic cmpxchg()/xchg(). Reviewed-by: Will Deacon Acked-by: Mark Rutland Acked-by: Steve Capper Signed-off-by: Catalin Marinas --- arch/arm64/include/asm/pgtable.h | 71 +++++++++++++++++++--------------------- arch/arm64/mm/fault.c | 24 +++++++------- 2 files changed, 44 insertions(+), 51 deletions(-) (limited to 'arch/arm64') diff --git a/arch/arm64/include/asm/pgtable.h b/arch/arm64/include/asm/pgtable.h index 6eae342ced6b..9127688ae775 100644 --- a/arch/arm64/include/asm/pgtable.h +++ b/arch/arm64/include/asm/pgtable.h @@ -39,6 +39,7 @@ #ifndef __ASSEMBLY__ +#include #include #include @@ -173,6 +174,11 @@ static inline pte_t pte_clear_rdonly(pte_t pte) return clear_pte_bit(pte, __pgprot(PTE_RDONLY)); } +static inline pte_t pte_set_rdonly(pte_t pte) +{ + return set_pte_bit(pte, __pgprot(PTE_RDONLY)); +} + static inline pte_t pte_mkpresent(pte_t pte) { return set_pte_bit(pte, __pgprot(PTE_VALID)); @@ -593,20 +599,17 @@ static inline int pmdp_set_access_flags(struct vm_area_struct *vma, #define __HAVE_ARCH_PTEP_TEST_AND_CLEAR_YOUNG static inline int __ptep_test_and_clear_young(pte_t *ptep) { - pteval_t pteval; - unsigned int tmp, res; + pte_t old_pte, pte; - asm volatile("// __ptep_test_and_clear_young\n" - " prfm pstl1strm, %2\n" - "1: ldxr %0, %2\n" - " ubfx %w3, %w0, %5, #1 // extract PTE_AF (young)\n" - " and %0, %0, %4 // clear PTE_AF\n" - " stxr %w1, %0, %2\n" - " cbnz %w1, 1b\n" - : "=&r" (pteval), "=&r" (tmp), "+Q" (pte_val(*ptep)), "=&r" (res) - : "L" (~PTE_AF), "I" (ilog2(PTE_AF))); + pte = READ_ONCE(*ptep); + do { + old_pte = pte; + pte = pte_mkold(pte); + pte_val(pte) = cmpxchg_relaxed(&pte_val(*ptep), + pte_val(old_pte), pte_val(pte)); + } while (pte_val(pte) != pte_val(old_pte)); - return res; + return pte_young(pte); } static inline int ptep_test_and_clear_young(struct vm_area_struct *vma, @@ -630,17 +633,7 @@ static inline int pmdp_test_and_clear_young(struct vm_area_struct *vma, static inline pte_t ptep_get_and_clear(struct mm_struct *mm, unsigned long address, pte_t *ptep) { - pteval_t old_pteval; - unsigned int tmp; - - asm volatile("// ptep_get_and_clear\n" - " prfm pstl1strm, %2\n" - "1: ldxr %0, %2\n" - " stxr %w1, xzr, %2\n" - " cbnz %w1, 1b\n" - : "=&r" (old_pteval), "=&r" (tmp), "+Q" (pte_val(*ptep))); - - return __pte(old_pteval); + return __pte(xchg_relaxed(&pte_val(*ptep), 0)); } #ifdef CONFIG_TRANSPARENT_HUGEPAGE @@ -659,21 +652,23 @@ static inline pmd_t pmdp_huge_get_and_clear(struct mm_struct *mm, #define __HAVE_ARCH_PTEP_SET_WRPROTECT static inline void ptep_set_wrprotect(struct mm_struct *mm, unsigned long address, pte_t *ptep) { - pteval_t pteval; - unsigned long tmp; - - asm volatile("// ptep_set_wrprotect\n" - " prfm pstl1strm, %2\n" - "1: ldxr %0, %2\n" - " tst %0, %4 // check for hw dirty (!PTE_RDONLY)\n" - " csel %1, %3, xzr, eq // set PTE_DIRTY|PTE_RDONLY if dirty\n" - " orr %0, %0, %1 // if !dirty, PTE_RDONLY is already set\n" - " and %0, %0, %5 // clear PTE_WRITE/PTE_DBM\n" - " stxr %w1, %0, %2\n" - " cbnz %w1, 1b\n" - : "=&r" (pteval), "=&r" (tmp), "+Q" (pte_val(*ptep)) - : "r" (PTE_DIRTY|PTE_RDONLY), "L" (PTE_RDONLY), "L" (~PTE_WRITE) - : "cc"); + pte_t old_pte, pte; + + pte = READ_ONCE(*ptep); + do { + old_pte = pte; + /* + * If hardware-dirty (PTE_WRITE/DBM bit set and PTE_RDONLY + * clear), set the PTE_DIRTY and PTE_RDONLY bits. + */ + if (pte_hw_dirty(pte)) { + pte = pte_mkdirty(pte); + pte = pte_set_rdonly(pte); + } + pte = pte_wrprotect(pte); + pte_val(pte) = cmpxchg_relaxed(&pte_val(*ptep), + pte_val(old_pte), pte_val(pte)); + } while (pte_val(pte) != pte_val(old_pte)); } #ifdef CONFIG_TRANSPARENT_HUGEPAGE diff --git a/arch/arm64/mm/fault.c b/arch/arm64/mm/fault.c index 52ee273afeec..430eaf82da49 100644 --- a/arch/arm64/mm/fault.c +++ b/arch/arm64/mm/fault.c @@ -34,6 +34,7 @@ #include #include +#include #include #include #include @@ -197,8 +198,7 @@ int ptep_set_access_flags(struct vm_area_struct *vma, unsigned long address, pte_t *ptep, pte_t entry, int dirty) { - pteval_t old_pteval; - unsigned int tmp; + pteval_t old_pteval, pteval; if (pte_same(*ptep, entry)) return 0; @@ -208,7 +208,7 @@ int ptep_set_access_flags(struct vm_area_struct *vma, /* set PTE_RDONLY if actual read-only or clean PTE */ if (!pte_write(entry) || !pte_sw_dirty(entry)) - pte_val(entry) |= PTE_RDONLY; + entry = pte_set_rdonly(entry); /* * Setting the flags must be done atomically to avoid racing with the @@ -217,16 +217,14 @@ int ptep_set_access_flags(struct vm_area_struct *vma, * (calculated as: a & b == ~(~a | ~b)). */ pte_val(entry) ^= PTE_RDONLY; - asm volatile("// ptep_set_access_flags\n" - " prfm pstl1strm, %2\n" - "1: ldxr %0, %2\n" - " eor %0, %0, %3 // negate PTE_RDONLY in *ptep\n" - " orr %0, %0, %4 // set flags\n" - " eor %0, %0, %3 // negate final PTE_RDONLY\n" - " stxr %w1, %0, %2\n" - " cbnz %w1, 1b\n" - : "=&r" (old_pteval), "=&r" (tmp), "+Q" (pte_val(*ptep)) - : "L" (PTE_RDONLY), "r" (pte_val(entry))); + pteval = READ_ONCE(pte_val(*ptep)); + do { + old_pteval = pteval; + pteval ^= PTE_RDONLY; + pteval |= pte_val(entry); + pteval ^= PTE_RDONLY; + pteval = cmpxchg_relaxed(&pte_val(*ptep), old_pteval, pteval); + } while (pteval != old_pteval); flush_tlb_fix_spurious_fault(vma, address); return 1; -- cgit v1.2.3 From 0966253d7ccddc42a5211b3488bb4f202c04de1b Mon Sep 17 00:00:00 2001 From: Catalin Marinas Date: Thu, 6 Jul 2017 11:46:39 +0100 Subject: kvm: arm64: Convert kvm_set_s2pte_readonly() from inline asm to cmpxchg() To take advantage of the LSE atomic instructions and also make the code cleaner, convert the kvm_set_s2pte_readonly() function to use the more generic cmpxchg(). Cc: Marc Zyngier Reviewed-by: Will Deacon Reviewed-by: Christoffer Dall Acked-by: Mark Rutland Signed-off-by: Catalin Marinas --- arch/arm64/include/asm/kvm_mmu.h | 21 +++++++++------------ 1 file changed, 9 insertions(+), 12 deletions(-) (limited to 'arch/arm64') diff --git a/arch/arm64/include/asm/kvm_mmu.h b/arch/arm64/include/asm/kvm_mmu.h index a89cc22abadc..672c8684d5c2 100644 --- a/arch/arm64/include/asm/kvm_mmu.h +++ b/arch/arm64/include/asm/kvm_mmu.h @@ -175,18 +175,15 @@ static inline pmd_t kvm_s2pmd_mkwrite(pmd_t pmd) static inline void kvm_set_s2pte_readonly(pte_t *pte) { - pteval_t pteval; - unsigned long tmp; - - asm volatile("// kvm_set_s2pte_readonly\n" - " prfm pstl1strm, %2\n" - "1: ldxr %0, %2\n" - " and %0, %0, %3 // clear PTE_S2_RDWR\n" - " orr %0, %0, %4 // set PTE_S2_RDONLY\n" - " stxr %w1, %0, %2\n" - " cbnz %w1, 1b\n" - : "=&r" (pteval), "=&r" (tmp), "+Q" (pte_val(*pte)) - : "L" (~PTE_S2_RDWR), "L" (PTE_S2_RDONLY)); + pteval_t old_pteval, pteval; + + pteval = READ_ONCE(pte_val(*pte)); + do { + old_pteval = pteval; + pteval &= ~PTE_S2_RDWR; + pteval |= PTE_S2_RDONLY; + pteval = cmpxchg_relaxed(&pte_val(*pte), old_pteval, pteval); + } while (pteval != old_pteval); } static inline bool kvm_s2pte_readonly(pte_t *pte) -- cgit v1.2.3 From 73e86cb03cf2ec0aa3789dc8621c6d53619cac5e Mon Sep 17 00:00:00 2001 From: Catalin Marinas Date: Tue, 4 Jul 2017 19:04:18 +0100 Subject: arm64: Move PTE_RDONLY bit handling out of set_pte_at() Currently PTE_RDONLY is treated as a hardware only bit and not handled by the pte_mkwrite(), pte_wrprotect() or the user PAGE_* definitions. The set_pte_at() function is responsible for setting this bit based on the write permission or dirty state. This patch moves the PTE_RDONLY handling out of set_pte_at into the pte_mkwrite()/pte_wrprotect() functions. The PAGE_* definitions to need to be updated to explicitly include PTE_RDONLY when !PTE_WRITE. The patch also removes the redundant PAGE_COPY(_EXEC) definitions as they are identical to the corresponding PAGE_READONLY(_EXEC). Reviewed-by: Will Deacon Signed-off-by: Catalin Marinas --- arch/arm64/include/asm/pgtable-prot.h | 18 ++++++++---------- arch/arm64/include/asm/pgtable.h | 34 ++++++++++------------------------ arch/arm64/kernel/hibernate.c | 4 ++-- arch/arm64/mm/fault.c | 6 +----- 4 files changed, 21 insertions(+), 41 deletions(-) (limited to 'arch/arm64') diff --git a/arch/arm64/include/asm/pgtable-prot.h b/arch/arm64/include/asm/pgtable-prot.h index 2142c7726e76..0a5635fb0ef9 100644 --- a/arch/arm64/include/asm/pgtable-prot.h +++ b/arch/arm64/include/asm/pgtable-prot.h @@ -63,23 +63,21 @@ #define PAGE_S2 __pgprot(PROT_DEFAULT | PTE_S2_MEMATTR(MT_S2_NORMAL) | PTE_S2_RDONLY) #define PAGE_S2_DEVICE __pgprot(PROT_DEFAULT | PTE_S2_MEMATTR(MT_S2_DEVICE_nGnRE) | PTE_S2_RDONLY | PTE_UXN) -#define PAGE_NONE __pgprot(((_PAGE_DEFAULT) & ~PTE_VALID) | PTE_PROT_NONE | PTE_PXN | PTE_UXN) +#define PAGE_NONE __pgprot(((_PAGE_DEFAULT) & ~PTE_VALID) | PTE_PROT_NONE | PTE_RDONLY | PTE_PXN | PTE_UXN) #define PAGE_SHARED __pgprot(_PAGE_DEFAULT | PTE_USER | PTE_NG | PTE_PXN | PTE_UXN | PTE_WRITE) #define PAGE_SHARED_EXEC __pgprot(_PAGE_DEFAULT | PTE_USER | PTE_NG | PTE_PXN | PTE_WRITE) -#define PAGE_COPY __pgprot(_PAGE_DEFAULT | PTE_USER | PTE_NG | PTE_PXN | PTE_UXN) -#define PAGE_COPY_EXEC __pgprot(_PAGE_DEFAULT | PTE_USER | PTE_NG | PTE_PXN) -#define PAGE_READONLY __pgprot(_PAGE_DEFAULT | PTE_USER | PTE_NG | PTE_PXN | PTE_UXN) -#define PAGE_READONLY_EXEC __pgprot(_PAGE_DEFAULT | PTE_USER | PTE_NG | PTE_PXN) -#define PAGE_EXECONLY __pgprot(_PAGE_DEFAULT | PTE_NG | PTE_PXN) +#define PAGE_READONLY __pgprot(_PAGE_DEFAULT | PTE_USER | PTE_RDONLY | PTE_NG | PTE_PXN | PTE_UXN) +#define PAGE_READONLY_EXEC __pgprot(_PAGE_DEFAULT | PTE_USER | PTE_RDONLY | PTE_NG | PTE_PXN) +#define PAGE_EXECONLY __pgprot(_PAGE_DEFAULT | PTE_RDONLY | PTE_NG | PTE_PXN) #define __P000 PAGE_NONE #define __P001 PAGE_READONLY -#define __P010 PAGE_COPY -#define __P011 PAGE_COPY +#define __P010 PAGE_READONLY +#define __P011 PAGE_READONLY #define __P100 PAGE_EXECONLY #define __P101 PAGE_READONLY_EXEC -#define __P110 PAGE_COPY_EXEC -#define __P111 PAGE_COPY_EXEC +#define __P110 PAGE_READONLY_EXEC +#define __P111 PAGE_READONLY_EXEC #define __S000 PAGE_NONE #define __S001 PAGE_READONLY diff --git a/arch/arm64/include/asm/pgtable.h b/arch/arm64/include/asm/pgtable.h index 9127688ae775..a04bfb869a80 100644 --- a/arch/arm64/include/asm/pgtable.h +++ b/arch/arm64/include/asm/pgtable.h @@ -125,12 +125,16 @@ static inline pte_t set_pte_bit(pte_t pte, pgprot_t prot) static inline pte_t pte_wrprotect(pte_t pte) { - return clear_pte_bit(pte, __pgprot(PTE_WRITE)); + pte = clear_pte_bit(pte, __pgprot(PTE_WRITE)); + pte = set_pte_bit(pte, __pgprot(PTE_RDONLY)); + return pte; } static inline pte_t pte_mkwrite(pte_t pte) { - return set_pte_bit(pte, __pgprot(PTE_WRITE)); + pte = set_pte_bit(pte, __pgprot(PTE_WRITE)); + pte = clear_pte_bit(pte, __pgprot(PTE_RDONLY)); + return pte; } static inline pte_t pte_mkclean(pte_t pte) @@ -169,16 +173,6 @@ static inline pte_t pte_mknoncont(pte_t pte) return clear_pte_bit(pte, __pgprot(PTE_CONT)); } -static inline pte_t pte_clear_rdonly(pte_t pte) -{ - return clear_pte_bit(pte, __pgprot(PTE_RDONLY)); -} - -static inline pte_t pte_set_rdonly(pte_t pte) -{ - return set_pte_bit(pte, __pgprot(PTE_RDONLY)); -} - static inline pte_t pte_mkpresent(pte_t pte) { return set_pte_bit(pte, __pgprot(PTE_VALID)); @@ -226,14 +220,8 @@ extern void __sync_icache_dcache(pte_t pteval, unsigned long addr); static inline void set_pte_at(struct mm_struct *mm, unsigned long addr, pte_t *ptep, pte_t pte) { - if (pte_present(pte)) { - if (pte_sw_dirty(pte) && pte_write(pte)) - pte_val(pte) &= ~PTE_RDONLY; - else - pte_val(pte) |= PTE_RDONLY; - if (pte_user_exec(pte) && !pte_special(pte)) - __sync_icache_dcache(pte, addr); - } + if (pte_present(pte) && pte_user_exec(pte) && !pte_special(pte)) + __sync_icache_dcache(pte, addr); /* * If the existing pte is valid, check for potential race with @@ -659,12 +647,10 @@ static inline void ptep_set_wrprotect(struct mm_struct *mm, unsigned long addres old_pte = pte; /* * If hardware-dirty (PTE_WRITE/DBM bit set and PTE_RDONLY - * clear), set the PTE_DIRTY and PTE_RDONLY bits. + * clear), set the PTE_DIRTY bit. */ - if (pte_hw_dirty(pte)) { + if (pte_hw_dirty(pte)) pte = pte_mkdirty(pte); - pte = pte_set_rdonly(pte); - } pte = pte_wrprotect(pte); pte_val(pte) = cmpxchg_relaxed(&pte_val(*ptep), pte_val(old_pte), pte_val(pte)); diff --git a/arch/arm64/kernel/hibernate.c b/arch/arm64/kernel/hibernate.c index a44e13942d30..095d3c170f5d 100644 --- a/arch/arm64/kernel/hibernate.c +++ b/arch/arm64/kernel/hibernate.c @@ -330,7 +330,7 @@ static void _copy_pte(pte_t *dst_pte, pte_t *src_pte, unsigned long addr) * read only (code, rodata). Clear the RDONLY bit from * the temporary mappings we use during restore. */ - set_pte(dst_pte, pte_clear_rdonly(pte)); + set_pte(dst_pte, pte_mkwrite(pte)); } else if (debug_pagealloc_enabled() && !pte_none(pte)) { /* * debug_pagealloc will removed the PTE_VALID bit if @@ -343,7 +343,7 @@ static void _copy_pte(pte_t *dst_pte, pte_t *src_pte, unsigned long addr) */ BUG_ON(!pfn_valid(pte_pfn(pte))); - set_pte(dst_pte, pte_mkpresent(pte_clear_rdonly(pte))); + set_pte(dst_pte, pte_mkpresent(pte_mkwrite(pte))); } } diff --git a/arch/arm64/mm/fault.c b/arch/arm64/mm/fault.c index 430eaf82da49..f75ed5c4b994 100644 --- a/arch/arm64/mm/fault.c +++ b/arch/arm64/mm/fault.c @@ -204,11 +204,7 @@ int ptep_set_access_flags(struct vm_area_struct *vma, return 0; /* only preserve the access flags and write permission */ - pte_val(entry) &= PTE_AF | PTE_WRITE | PTE_DIRTY; - - /* set PTE_RDONLY if actual read-only or clean PTE */ - if (!pte_write(entry) || !pte_sw_dirty(entry)) - entry = pte_set_rdonly(entry); + pte_val(entry) &= PTE_RDONLY | PTE_AF | PTE_WRITE | PTE_DIRTY; /* * Setting the flags must be done atomically to avoid racing with the -- cgit v1.2.3 From 64c26841b34957ef8f33f7a9e8663aeee59c3ded Mon Sep 17 00:00:00 2001 From: Catalin Marinas Date: Wed, 5 Jul 2017 10:59:42 +0100 Subject: arm64: Ignore hardware dirty bit updates in ptep_set_wrprotect() ptep_set_wrprotect() is only called on CoW mappings which are private (!VM_SHARED) with the pte either read-only (!PTE_WRITE && PTE_RDONLY) or writable and software-dirty (PTE_WRITE && !PTE_RDONLY && PTE_DIRTY). There is no race with the hardware update of the dirty state: clearing of PTE_RDONLY when PTE_WRITE (a.k.a. PTE_DBM) is set. This patch removes the code setting the software PTE_DIRTY bit in ptep_set_wrprotect() as superfluous. A VM_WARN_ONCE is introduced in case the above logic is wrong or the core mm code changes its use of ptep_set_wrprotect(). Reviewed-by: Will Deacon Acked-by: Steve Capper Signed-off-by: Catalin Marinas --- arch/arm64/include/asm/pgtable.h | 21 +++++++++++++-------- 1 file changed, 13 insertions(+), 8 deletions(-) (limited to 'arch/arm64') diff --git a/arch/arm64/include/asm/pgtable.h b/arch/arm64/include/asm/pgtable.h index a04bfb869a80..0117cbcd62d4 100644 --- a/arch/arm64/include/asm/pgtable.h +++ b/arch/arm64/include/asm/pgtable.h @@ -634,23 +634,28 @@ static inline pmd_t pmdp_huge_get_and_clear(struct mm_struct *mm, #endif /* CONFIG_TRANSPARENT_HUGEPAGE */ /* - * ptep_set_wrprotect - mark read-only while trasferring potential hardware - * dirty status (PTE_DBM && !PTE_RDONLY) to the software PTE_DIRTY bit. + * ptep_set_wrprotect - mark read-only while preserving the hardware update of + * the Access Flag. */ #define __HAVE_ARCH_PTEP_SET_WRPROTECT static inline void ptep_set_wrprotect(struct mm_struct *mm, unsigned long address, pte_t *ptep) { pte_t old_pte, pte; + /* + * ptep_set_wrprotect() is only called on CoW mappings which are + * private (!VM_SHARED) with the pte either read-only (!PTE_WRITE && + * PTE_RDONLY) or writable and software-dirty (PTE_WRITE && + * !PTE_RDONLY && PTE_DIRTY); see is_cow_mapping() and + * protection_map[]. There is no race with the hardware update of the + * dirty state: clearing of PTE_RDONLY when PTE_WRITE (a.k.a. PTE_DBM) + * is set. + */ + VM_WARN_ONCE(pte_write(*ptep) && !pte_dirty(*ptep), + "%s: potential race with hardware DBM", __func__); pte = READ_ONCE(*ptep); do { old_pte = pte; - /* - * If hardware-dirty (PTE_WRITE/DBM bit set and PTE_RDONLY - * clear), set the PTE_DIRTY bit. - */ - if (pte_hw_dirty(pte)) - pte = pte_mkdirty(pte); pte = pte_wrprotect(pte); pte_val(pte) = cmpxchg_relaxed(&pte_val(*ptep), pte_val(old_pte), pte_val(pte)); -- cgit v1.2.3 From af29678fe785ad79e7386e97b57093482f0dd7c4 Mon Sep 17 00:00:00 2001 From: Catalin Marinas Date: Thu, 6 Jul 2017 11:53:08 +0100 Subject: arm64: Remove the !CONFIG_ARM64_HW_AFDBM alternative code paths Since the pte handling for hardware AF/DBM works even when the hardware feature is not present, make the pte accessors implementation permanent and remove the corresponding #ifdefs. The Kconfig option is kept as it can still be used to disable the feature at the hardware level. Reviewed-by: Will Deacon Cc: Marc Zyngier Cc: Christoffer Dall Signed-off-by: Catalin Marinas --- arch/arm64/include/asm/pgtable.h | 9 +-------- arch/arm64/kvm/hyp/s2-setup.c | 2 +- arch/arm64/mm/fault.c | 2 -- 3 files changed, 2 insertions(+), 11 deletions(-) (limited to 'arch/arm64') diff --git a/arch/arm64/include/asm/pgtable.h b/arch/arm64/include/asm/pgtable.h index 0117cbcd62d4..bc4e92337d16 100644 --- a/arch/arm64/include/asm/pgtable.h +++ b/arch/arm64/include/asm/pgtable.h @@ -85,11 +85,7 @@ extern unsigned long empty_zero_page[PAGE_SIZE / sizeof(unsigned long)]; (__boundary - 1 < (end) - 1) ? __boundary : (end); \ }) -#ifdef CONFIG_ARM64_HW_AFDBM #define pte_hw_dirty(pte) (pte_write(pte) && !(pte_val(pte) & PTE_RDONLY)) -#else -#define pte_hw_dirty(pte) (0) -#endif #define pte_sw_dirty(pte) (!!(pte_val(pte) & PTE_DIRTY)) #define pte_dirty(pte) (pte_sw_dirty(pte) || pte_hw_dirty(pte)) @@ -228,8 +224,7 @@ static inline void set_pte_at(struct mm_struct *mm, unsigned long addr, * hardware updates of the pte (ptep_set_access_flags safely changes * valid ptes without going through an invalid entry). */ - if (IS_ENABLED(CONFIG_ARM64_HW_AFDBM) && - pte_valid(*ptep) && pte_valid(pte)) { + if (pte_valid(*ptep) && pte_valid(pte)) { VM_WARN_ONCE(!pte_young(pte), "%s: racy access flag clearing: 0x%016llx -> 0x%016llx", __func__, pte_val(*ptep), pte_val(pte)); @@ -565,7 +560,6 @@ static inline pmd_t pmd_modify(pmd_t pmd, pgprot_t newprot) return pte_pmd(pte_modify(pmd_pte(pmd), newprot)); } -#ifdef CONFIG_ARM64_HW_AFDBM #define __HAVE_ARCH_PTEP_SET_ACCESS_FLAGS extern int ptep_set_access_flags(struct vm_area_struct *vma, unsigned long address, pte_t *ptep, @@ -670,7 +664,6 @@ static inline void pmdp_set_wrprotect(struct mm_struct *mm, ptep_set_wrprotect(mm, address, (pte_t *)pmdp); } #endif -#endif /* CONFIG_ARM64_HW_AFDBM */ extern pgd_t swapper_pg_dir[PTRS_PER_PGD]; extern pgd_t idmap_pg_dir[PTRS_PER_PGD]; diff --git a/arch/arm64/kvm/hyp/s2-setup.c b/arch/arm64/kvm/hyp/s2-setup.c index b81f4091c909..a81f5e10fc8c 100644 --- a/arch/arm64/kvm/hyp/s2-setup.c +++ b/arch/arm64/kvm/hyp/s2-setup.c @@ -70,7 +70,7 @@ u32 __hyp_text __init_stage2_translation(void) * Management in ID_AA64MMFR1_EL1 and enable the feature in VTCR_EL2. */ tmp = (read_sysreg(id_aa64mmfr1_el1) >> ID_AA64MMFR1_HADBS_SHIFT) & 0xf; - if (IS_ENABLED(CONFIG_ARM64_HW_AFDBM) && tmp) + if (tmp) val |= VTCR_EL2_HA; /* diff --git a/arch/arm64/mm/fault.c b/arch/arm64/mm/fault.c index f75ed5c4b994..778d0bb89551 100644 --- a/arch/arm64/mm/fault.c +++ b/arch/arm64/mm/fault.c @@ -183,7 +183,6 @@ void show_pte(unsigned long addr) pr_cont("\n"); } -#ifdef CONFIG_ARM64_HW_AFDBM /* * This function sets the access flags (dirty, accessed), as well as write * permission, and only to a more permissive setting. @@ -225,7 +224,6 @@ int ptep_set_access_flags(struct vm_area_struct *vma, flush_tlb_fix_spurious_fault(vma, address); return 1; } -#endif static bool is_el1_instruction_abort(unsigned int esr) { -- cgit v1.2.3 From 2fa59ec8ccf38bbc2193ae61aed9afa0687974e0 Mon Sep 17 00:00:00 2001 From: Vladimir Murzin Date: Mon, 14 Aug 2017 09:55:46 +0100 Subject: arm64: dma-mapping: Do not pass data to gen_pool_set_algo() gen_pool_first_fit_order_align() does not make use of additional data, so pass plain NULL there. Signed-off-by: Vladimir Murzin Signed-off-by: Catalin Marinas --- arch/arm64/mm/dma-mapping.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/arm64') diff --git a/arch/arm64/mm/dma-mapping.c b/arch/arm64/mm/dma-mapping.c index f27d4dd04384..7038e1a61397 100644 --- a/arch/arm64/mm/dma-mapping.c +++ b/arch/arm64/mm/dma-mapping.c @@ -425,7 +425,7 @@ static int __init atomic_pool_init(void) gen_pool_set_algo(atomic_pool, gen_pool_first_fit_order_align, - (void *)PAGE_SHIFT); + NULL); pr_info("DMA: preallocated %zu KiB pool for atomic allocations\n", atomic_pool_size / 1024); -- cgit v1.2.3 From 8165f70648da0a4a51e5871693781b2cc29b29d6 Mon Sep 17 00:00:00 2001 From: Vladimir Murzin Date: Mon, 14 Aug 2017 09:55:47 +0100 Subject: arm64: dma-mapping: Mark atomic_pool as __ro_after_init atomic_pool is setup once while init stage and never changed after that, so it is good candidate for __ro_after_init Signed-off-by: Vladimir Murzin Signed-off-by: Catalin Marinas --- arch/arm64/mm/dma-mapping.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/arm64') diff --git a/arch/arm64/mm/dma-mapping.c b/arch/arm64/mm/dma-mapping.c index 7038e1a61397..614af886b7ef 100644 --- a/arch/arm64/mm/dma-mapping.c +++ b/arch/arm64/mm/dma-mapping.c @@ -42,7 +42,7 @@ static pgprot_t __get_dma_pgprot(unsigned long attrs, pgprot_t prot, return prot; } -static struct gen_pool *atomic_pool; +static struct gen_pool *atomic_pool __ro_after_init; #define DEFAULT_DMA_COHERENT_POOL_SIZE SZ_256K static size_t atomic_pool_size __initdata = DEFAULT_DMA_COHERENT_POOL_SIZE; -- cgit v1.2.3 From a88ce63b642cf8cd82cbc278429ccd9de4455a07 Mon Sep 17 00:00:00 2001 From: Hoeun Ryu Date: Thu, 17 Aug 2017 11:24:27 +0900 Subject: arm64: kexec: have own crash_smp_send_stop() for crash dump for nonpanic cores Commit 0ee5941 : (x86/panic: replace smp_send_stop() with kdump friendly version in panic path) introduced crash_smp_send_stop() which is a weak function and can be overridden by architecture codes to fix the side effect caused by commit f06e515 : (kernel/panic.c: add "crash_kexec_post_ notifiers" option). ARM64 architecture uses the weak version function and the problem is that the weak function simply calls smp_send_stop() which makes other CPUs offline and takes away the chance to save crash information for nonpanic CPUs in machine_crash_shutdown() when crash_kexec_post_notifiers kernel option is enabled. Calling smp_send_crash_stop() in machine_crash_shutdown() is useless because all nonpanic CPUs are already offline by smp_send_stop() in this case and smp_send_crash_stop() only works against online CPUs. The result is that secondary CPUs registers are not saved by crash_save_cpu() and the vmcore file misreports these CPUs as being offline. crash_smp_send_stop() is implemented to fix this problem by replacing the existing smp_send_crash_stop() and adding a check for multiple calling to the function. The function (strong symbol version) saves crash information for nonpanic CPUs and machine_crash_shutdown() tries to save crash information for nonpanic CPUs only when crash_kexec_post_notifiers kernel option is disabled. * crash_kexec_post_notifiers : false panic() __crash_kexec() machine_crash_shutdown() crash_smp_send_stop() <= save crash dump for nonpanic cores * crash_kexec_post_notifiers : true panic() crash_smp_send_stop() <= save crash dump for nonpanic cores __crash_kexec() machine_crash_shutdown() crash_smp_send_stop() <= just return. Signed-off-by: Hoeun Ryu Reviewed-by: James Morse Tested-by: James Morse Signed-off-by: Catalin Marinas --- arch/arm64/include/asm/smp.h | 2 +- arch/arm64/kernel/machine_kexec.c | 2 +- arch/arm64/kernel/smp.c | 12 +++++++++++- 3 files changed, 13 insertions(+), 3 deletions(-) (limited to 'arch/arm64') diff --git a/arch/arm64/include/asm/smp.h b/arch/arm64/include/asm/smp.h index 55f08c5acfad..f82b447bd34f 100644 --- a/arch/arm64/include/asm/smp.h +++ b/arch/arm64/include/asm/smp.h @@ -148,7 +148,7 @@ static inline void cpu_panic_kernel(void) */ bool cpus_are_stuck_in_kernel(void); -extern void smp_send_crash_stop(void); +extern void crash_smp_send_stop(void); extern bool smp_crash_stop_failed(void); #endif /* ifndef __ASSEMBLY__ */ diff --git a/arch/arm64/kernel/machine_kexec.c b/arch/arm64/kernel/machine_kexec.c index 481f54a866c5..11121f608eb5 100644 --- a/arch/arm64/kernel/machine_kexec.c +++ b/arch/arm64/kernel/machine_kexec.c @@ -252,7 +252,7 @@ void machine_crash_shutdown(struct pt_regs *regs) local_irq_disable(); /* shutdown non-crashing cpus */ - smp_send_crash_stop(); + crash_smp_send_stop(); /* for crashing cpu */ crash_save_cpu(regs, smp_processor_id()); diff --git a/arch/arm64/kernel/smp.c b/arch/arm64/kernel/smp.c index f13ddb2404f9..ffe089942ac4 100644 --- a/arch/arm64/kernel/smp.c +++ b/arch/arm64/kernel/smp.c @@ -977,11 +977,21 @@ void smp_send_stop(void) } #ifdef CONFIG_KEXEC_CORE -void smp_send_crash_stop(void) +void crash_smp_send_stop(void) { + static int cpus_stopped; cpumask_t mask; unsigned long timeout; + /* + * This function can be called twice in panic path, but obviously + * we execute this only once. + */ + if (cpus_stopped) + return; + + cpus_stopped = 1; + if (num_online_cpus() == 1) return; -- cgit v1.2.3 From d3ea79527757ba65b3ee08e10c59a3c84f34e4bf Mon Sep 17 00:00:00 2001 From: Steve Capper Date: Tue, 22 Aug 2017 11:42:41 +0100 Subject: arm64: hugetlb: set_huge_pte_at Add WARN_ON on !pte_present This patch adds a WARN_ON to set_huge_pte_at as the accessor assumes that entries to be written down are all present. (There are separate accessors to clear huge ptes). We will need to handle the !pte_present case where memory offlining is used on hugetlb pages. swap and migration entries will be supplied to set_huge_pte_at in this case. Cc: David Woods Signed-off-by: Steve Capper Signed-off-by: Punit Agrawal Signed-off-by: Catalin Marinas --- arch/arm64/mm/hugetlbpage.c | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'arch/arm64') diff --git a/arch/arm64/mm/hugetlbpage.c b/arch/arm64/mm/hugetlbpage.c index 656e0ece2289..7b61e4833432 100644 --- a/arch/arm64/mm/hugetlbpage.c +++ b/arch/arm64/mm/hugetlbpage.c @@ -67,6 +67,12 @@ void set_huge_pte_at(struct mm_struct *mm, unsigned long addr, unsigned long pfn; pgprot_t hugeprot; + /* + * Code needs to be expanded to handle huge swap and migration + * entries. Needed for HUGETLB and MEMORY_FAILURE. + */ + WARN_ON(!pte_present(pte)); + if (!pte_cont(pte)) { set_pte_at(mm, addr, ptep, pte); return; -- cgit v1.2.3 From b5b0be86d7181ed82cee9d8ac5073a48f038a305 Mon Sep 17 00:00:00 2001 From: Steve Capper Date: Tue, 22 Aug 2017 11:42:42 +0100 Subject: arm64: hugetlb: Introduce pte_pgprot helper Rather than xor pte bits in various places, use this helper function. Cc: David Woods Signed-off-by: Steve Capper Signed-off-by: Punit Agrawal Reviewed-by: Mark Rutland Signed-off-by: Catalin Marinas --- arch/arm64/mm/hugetlbpage.c | 16 ++++++++++++---- 1 file changed, 12 insertions(+), 4 deletions(-) (limited to 'arch/arm64') diff --git a/arch/arm64/mm/hugetlbpage.c b/arch/arm64/mm/hugetlbpage.c index 7b61e4833432..cb84ca33bc6b 100644 --- a/arch/arm64/mm/hugetlbpage.c +++ b/arch/arm64/mm/hugetlbpage.c @@ -41,6 +41,16 @@ int pud_huge(pud_t pud) #endif } +/* + * Select all bits except the pfn + */ +static inline pgprot_t pte_pgprot(pte_t pte) +{ + unsigned long pfn = pte_pfn(pte); + + return __pgprot(pte_val(pfn_pte(pfn, __pgprot(0))) ^ pte_val(pte)); +} + static int find_num_contig(struct mm_struct *mm, unsigned long addr, pte_t *ptep, size_t *pgsize) { @@ -80,7 +90,7 @@ void set_huge_pte_at(struct mm_struct *mm, unsigned long addr, ncontig = find_num_contig(mm, addr, ptep, &pgsize); pfn = pte_pfn(pte); - hugeprot = __pgprot(pte_val(pfn_pte(pfn, __pgprot(0))) ^ pte_val(pte)); + hugeprot = pte_pgprot(pte); for (i = 0; i < ncontig; i++) { pr_debug("%s: set pte %p to 0x%llx\n", __func__, ptep, pte_val(pfn_pte(pfn, hugeprot))); @@ -223,9 +233,7 @@ int huge_ptep_set_access_flags(struct vm_area_struct *vma, size_t pgsize = 0; unsigned long pfn = pte_pfn(pte); /* Select all bits except the pfn */ - pgprot_t hugeprot = - __pgprot(pte_val(pfn_pte(pfn, __pgprot(0))) ^ - pte_val(pte)); + pgprot_t hugeprot = pte_pgprot(pte); pfn = pte_pfn(pte); ncontig = find_num_contig(vma->vm_mm, addr, ptep, -- cgit v1.2.3 From 29a7287dceb76729960a15095fbbfcffa2179b07 Mon Sep 17 00:00:00 2001 From: Steve Capper Date: Tue, 22 Aug 2017 11:42:43 +0100 Subject: arm64: hugetlb: Spring clean huge pte accessors This patch aims to re-structure the huge pte accessors without affecting their functionality. Control flow is changed to reduce indentation and expanded use is made of post for loop variable modification. It is then much easier to add break-before-make semantics in a subsequent patch. Cc: David Woods Signed-off-by: Steve Capper Signed-off-by: Punit Agrawal Reviewed-by: Mark Rutland Signed-off-by: Catalin Marinas --- arch/arm64/mm/hugetlbpage.c | 119 ++++++++++++++++++++------------------------ 1 file changed, 54 insertions(+), 65 deletions(-) (limited to 'arch/arm64') diff --git a/arch/arm64/mm/hugetlbpage.c b/arch/arm64/mm/hugetlbpage.c index cb84ca33bc6b..08deed7c71f0 100644 --- a/arch/arm64/mm/hugetlbpage.c +++ b/arch/arm64/mm/hugetlbpage.c @@ -74,7 +74,7 @@ void set_huge_pte_at(struct mm_struct *mm, unsigned long addr, size_t pgsize; int i; int ncontig; - unsigned long pfn; + unsigned long pfn, dpfn; pgprot_t hugeprot; /* @@ -90,14 +90,13 @@ void set_huge_pte_at(struct mm_struct *mm, unsigned long addr, ncontig = find_num_contig(mm, addr, ptep, &pgsize); pfn = pte_pfn(pte); + dpfn = pgsize >> PAGE_SHIFT; hugeprot = pte_pgprot(pte); - for (i = 0; i < ncontig; i++) { + + for (i = 0; i < ncontig; i++, ptep++, addr += pgsize, pfn += dpfn) { pr_debug("%s: set pte %p to 0x%llx\n", __func__, ptep, pte_val(pfn_pte(pfn, hugeprot))); set_pte_at(mm, addr, ptep, pfn_pte(pfn, hugeprot)); - ptep++; - pfn += pgsize >> PAGE_SHIFT; - addr += pgsize; } } @@ -195,91 +194,81 @@ pte_t arch_make_huge_pte(pte_t entry, struct vm_area_struct *vma, pte_t huge_ptep_get_and_clear(struct mm_struct *mm, unsigned long addr, pte_t *ptep) { - pte_t pte; - - if (pte_cont(*ptep)) { - int ncontig, i; - size_t pgsize; - bool is_dirty = false; - - ncontig = find_num_contig(mm, addr, ptep, &pgsize); - /* save the 1st pte to return */ - pte = ptep_get_and_clear(mm, addr, ptep); - for (i = 1, addr += pgsize; i < ncontig; ++i, addr += pgsize) { - /* - * If HW_AFDBM is enabled, then the HW could - * turn on the dirty bit for any of the page - * in the set, so check them all. - */ - ++ptep; - if (pte_dirty(ptep_get_and_clear(mm, addr, ptep))) - is_dirty = true; - } - if (is_dirty) - return pte_mkdirty(pte); - else - return pte; - } else { + int ncontig, i; + size_t pgsize; + pte_t orig_pte = huge_ptep_get(ptep); + + if (!pte_cont(orig_pte)) return ptep_get_and_clear(mm, addr, ptep); + + ncontig = find_num_contig(mm, addr, ptep, &pgsize); + for (i = 0; i < ncontig; i++, addr += pgsize, ptep++) { + /* + * If HW_AFDBM is enabled, then the HW could + * turn on the dirty bit for any of the page + * in the set, so check them all. + */ + if (pte_dirty(ptep_get_and_clear(mm, addr, ptep))) + orig_pte = pte_mkdirty(orig_pte); } + + return orig_pte; } int huge_ptep_set_access_flags(struct vm_area_struct *vma, unsigned long addr, pte_t *ptep, pte_t pte, int dirty) { - if (pte_cont(pte)) { - int ncontig, i, changed = 0; - size_t pgsize = 0; - unsigned long pfn = pte_pfn(pte); - /* Select all bits except the pfn */ - pgprot_t hugeprot = pte_pgprot(pte); - - pfn = pte_pfn(pte); - ncontig = find_num_contig(vma->vm_mm, addr, ptep, - &pgsize); - for (i = 0; i < ncontig; ++i, ++ptep, addr += pgsize) { - changed |= ptep_set_access_flags(vma, addr, ptep, - pfn_pte(pfn, - hugeprot), - dirty); - pfn += pgsize >> PAGE_SHIFT; - } - return changed; - } else { + int ncontig, i, changed = 0; + size_t pgsize = 0; + unsigned long pfn = pte_pfn(pte), dpfn; + pgprot_t hugeprot; + + if (!pte_cont(pte)) return ptep_set_access_flags(vma, addr, ptep, pte, dirty); + + ncontig = find_num_contig(vma->vm_mm, addr, ptep, &pgsize); + dpfn = pgsize >> PAGE_SHIFT; + hugeprot = pte_pgprot(pte); + + for (i = 0; i < ncontig; i++, ptep++, addr += pgsize, pfn += dpfn) { + changed |= ptep_set_access_flags(vma, addr, ptep, + pfn_pte(pfn, hugeprot), dirty); } + + return changed; } void huge_ptep_set_wrprotect(struct mm_struct *mm, unsigned long addr, pte_t *ptep) { - if (pte_cont(*ptep)) { - int ncontig, i; - size_t pgsize = 0; + int ncontig, i; + size_t pgsize; - ncontig = find_num_contig(mm, addr, ptep, &pgsize); - for (i = 0; i < ncontig; ++i, ++ptep, addr += pgsize) - ptep_set_wrprotect(mm, addr, ptep); - } else { + if (!pte_cont(*ptep)) { ptep_set_wrprotect(mm, addr, ptep); + return; } + + ncontig = find_num_contig(mm, addr, ptep, &pgsize); + for (i = 0; i < ncontig; i++, ptep++, addr += pgsize) + ptep_set_wrprotect(mm, addr, ptep); } void huge_ptep_clear_flush(struct vm_area_struct *vma, unsigned long addr, pte_t *ptep) { - if (pte_cont(*ptep)) { - int ncontig, i; - size_t pgsize = 0; - - ncontig = find_num_contig(vma->vm_mm, addr, ptep, - &pgsize); - for (i = 0; i < ncontig; ++i, ++ptep, addr += pgsize) - ptep_clear_flush(vma, addr, ptep); - } else { + int ncontig, i; + size_t pgsize; + + if (!pte_cont(*ptep)) { ptep_clear_flush(vma, addr, ptep); + return; } + + ncontig = find_num_contig(vma->vm_mm, addr, ptep, &pgsize); + for (i = 0; i < ncontig; i++, ptep++, addr += pgsize) + ptep_clear_flush(vma, addr, ptep); } static __init int setup_hugepagesz(char *opt) -- cgit v1.2.3 From d8bdcff2876424d44d08a4d16a54fee518f9d5b8 Mon Sep 17 00:00:00 2001 From: Steve Capper Date: Tue, 22 Aug 2017 11:42:44 +0100 Subject: arm64: hugetlb: Add break-before-make logic for contiguous entries It has become apparent that one has to take special care when modifying attributes of memory mappings that employ the contiguous bit. Both the requirement and the architecturally correct "Break-Before-Make" technique of updating contiguous entries can be found described in: ARM DDI 0487A.k_iss10775, "Misprogramming of the Contiguous bit", page D4-1762. The huge pte accessors currently replace the attributes of contiguous pte entries in place thus can, on certain platforms, lead to TLB conflict aborts or even erroneous results returned from TLB lookups. This patch adds two helper functions - * get_clear_flush(.) - clears a contiguous entry and returns the head pte (whilst taking care to retain dirty bit information that could have been modified by DBM). * clear_flush(.) that clears a contiguous entry A tlb invalidate is performed to then ensure that there is no possibility of multiple tlb entries being present for the same region. Cc: David Woods Signed-off-by: Steve Capper (Added helper clear_flush(), updated commit log, and some cleanup) Signed-off-by: Punit Agrawal [catalin.marinas@arm.com: remove CONFIG_ARM64_HW_AFDBM check] Signed-off-by: Catalin Marinas --- arch/arm64/mm/hugetlbpage.c | 112 +++++++++++++++++++++++++++++++++++--------- 1 file changed, 91 insertions(+), 21 deletions(-) (limited to 'arch/arm64') diff --git a/arch/arm64/mm/hugetlbpage.c b/arch/arm64/mm/hugetlbpage.c index 08deed7c71f0..b82df85fe920 100644 --- a/arch/arm64/mm/hugetlbpage.c +++ b/arch/arm64/mm/hugetlbpage.c @@ -68,6 +68,66 @@ static int find_num_contig(struct mm_struct *mm, unsigned long addr, return CONT_PTES; } +/* + * Changing some bits of contiguous entries requires us to follow a + * Break-Before-Make approach, breaking the whole contiguous set + * before we can change any entries. See ARM DDI 0487A.k_iss10775, + * "Misprogramming of the Contiguous bit", page D4-1762. + * + * This helper performs the break step. + */ +static pte_t get_clear_flush(struct mm_struct *mm, + unsigned long addr, + pte_t *ptep, + unsigned long pgsize, + unsigned long ncontig) +{ + struct vm_area_struct vma = { .vm_mm = mm }; + pte_t orig_pte = huge_ptep_get(ptep); + bool valid = pte_valid(orig_pte); + unsigned long i, saddr = addr; + + for (i = 0; i < ncontig; i++, addr += pgsize, ptep++) { + pte_t pte = ptep_get_and_clear(mm, addr, ptep); + + /* + * If HW_AFDBM is enabled, then the HW could turn on + * the dirty bit for any page in the set, so check + * them all. All hugetlb entries are already young. + */ + if (pte_dirty(pte)) + orig_pte = pte_mkdirty(orig_pte); + } + + if (valid) + flush_tlb_range(&vma, saddr, addr); + return orig_pte; +} + +/* + * Changing some bits of contiguous entries requires us to follow a + * Break-Before-Make approach, breaking the whole contiguous set + * before we can change any entries. See ARM DDI 0487A.k_iss10775, + * "Misprogramming of the Contiguous bit", page D4-1762. + * + * This helper performs the break step for use cases where the + * original pte is not needed. + */ +static void clear_flush(struct mm_struct *mm, + unsigned long addr, + pte_t *ptep, + unsigned long pgsize, + unsigned long ncontig) +{ + struct vm_area_struct vma = { .vm_mm = mm }; + unsigned long i, saddr = addr; + + for (i = 0; i < ncontig; i++, addr += pgsize, ptep++) + pte_clear(mm, addr, ptep); + + flush_tlb_range(&vma, saddr, addr); +} + void set_huge_pte_at(struct mm_struct *mm, unsigned long addr, pte_t *ptep, pte_t pte) { @@ -93,6 +153,8 @@ void set_huge_pte_at(struct mm_struct *mm, unsigned long addr, dpfn = pgsize >> PAGE_SHIFT; hugeprot = pte_pgprot(pte); + clear_flush(mm, addr, ptep, pgsize, ncontig); + for (i = 0; i < ncontig; i++, ptep++, addr += pgsize, pfn += dpfn) { pr_debug("%s: set pte %p to 0x%llx\n", __func__, ptep, pte_val(pfn_pte(pfn, hugeprot))); @@ -194,7 +256,7 @@ pte_t arch_make_huge_pte(pte_t entry, struct vm_area_struct *vma, pte_t huge_ptep_get_and_clear(struct mm_struct *mm, unsigned long addr, pte_t *ptep) { - int ncontig, i; + int ncontig; size_t pgsize; pte_t orig_pte = huge_ptep_get(ptep); @@ -202,17 +264,8 @@ pte_t huge_ptep_get_and_clear(struct mm_struct *mm, return ptep_get_and_clear(mm, addr, ptep); ncontig = find_num_contig(mm, addr, ptep, &pgsize); - for (i = 0; i < ncontig; i++, addr += pgsize, ptep++) { - /* - * If HW_AFDBM is enabled, then the HW could - * turn on the dirty bit for any of the page - * in the set, so check them all. - */ - if (pte_dirty(ptep_get_and_clear(mm, addr, ptep))) - orig_pte = pte_mkdirty(orig_pte); - } - return orig_pte; + return get_clear_flush(mm, addr, ptep, pgsize, ncontig); } int huge_ptep_set_access_flags(struct vm_area_struct *vma, @@ -223,18 +276,25 @@ int huge_ptep_set_access_flags(struct vm_area_struct *vma, size_t pgsize = 0; unsigned long pfn = pte_pfn(pte), dpfn; pgprot_t hugeprot; + pte_t orig_pte; if (!pte_cont(pte)) return ptep_set_access_flags(vma, addr, ptep, pte, dirty); ncontig = find_num_contig(vma->vm_mm, addr, ptep, &pgsize); dpfn = pgsize >> PAGE_SHIFT; - hugeprot = pte_pgprot(pte); - for (i = 0; i < ncontig; i++, ptep++, addr += pgsize, pfn += dpfn) { - changed |= ptep_set_access_flags(vma, addr, ptep, - pfn_pte(pfn, hugeprot), dirty); - } + orig_pte = get_clear_flush(vma->vm_mm, addr, ptep, pgsize, ncontig); + if (!pte_same(orig_pte, pte)) + changed = 1; + + /* Make sure we don't lose the dirty state */ + if (pte_dirty(orig_pte)) + pte = pte_mkdirty(pte); + + hugeprot = pte_pgprot(pte); + for (i = 0; i < ncontig; i++, ptep++, addr += pgsize, pfn += dpfn) + set_pte_at(vma->vm_mm, addr, ptep, pfn_pte(pfn, hugeprot)); return changed; } @@ -242,8 +302,11 @@ int huge_ptep_set_access_flags(struct vm_area_struct *vma, void huge_ptep_set_wrprotect(struct mm_struct *mm, unsigned long addr, pte_t *ptep) { + unsigned long pfn, dpfn; + pgprot_t hugeprot; int ncontig, i; size_t pgsize; + pte_t pte; if (!pte_cont(*ptep)) { ptep_set_wrprotect(mm, addr, ptep); @@ -251,15 +314,23 @@ void huge_ptep_set_wrprotect(struct mm_struct *mm, } ncontig = find_num_contig(mm, addr, ptep, &pgsize); - for (i = 0; i < ncontig; i++, ptep++, addr += pgsize) - ptep_set_wrprotect(mm, addr, ptep); + dpfn = pgsize >> PAGE_SHIFT; + + pte = get_clear_flush(mm, addr, ptep, pgsize, ncontig); + pte = pte_wrprotect(pte); + + hugeprot = pte_pgprot(pte); + pfn = pte_pfn(pte); + + for (i = 0; i < ncontig; i++, ptep++, addr += pgsize, pfn += dpfn) + set_pte_at(mm, addr, ptep, pfn_pte(pfn, hugeprot)); } void huge_ptep_clear_flush(struct vm_area_struct *vma, unsigned long addr, pte_t *ptep) { - int ncontig, i; size_t pgsize; + int ncontig; if (!pte_cont(*ptep)) { ptep_clear_flush(vma, addr, ptep); @@ -267,8 +338,7 @@ void huge_ptep_clear_flush(struct vm_area_struct *vma, } ncontig = find_num_contig(vma->vm_mm, addr, ptep, &pgsize); - for (i = 0; i < ncontig; i++, ptep++, addr += pgsize) - ptep_clear_flush(vma, addr, ptep); + clear_flush(vma->vm_mm, addr, ptep, pgsize, ncontig); } static __init int setup_hugepagesz(char *opt) -- cgit v1.2.3 From 30f3ac00ad2f822937839c95cbb22ce483190c4c Mon Sep 17 00:00:00 2001 From: Punit Agrawal Date: Tue, 22 Aug 2017 11:42:45 +0100 Subject: arm64: hugetlb: Handle swap entries in huge_pte_offset() for contiguous hugepages huge_pte_offset() was updated to correctly handle swap entries for hugepages. With the addition of the size parameter, it is now possible to disambiguate whether the request is for a regular hugepage or a contiguous hugepage. Fix huge_pte_offset() for contiguous hugepages by using the size to find the correct page table entry. Signed-off-by: Punit Agrawal Cc: David Woods Signed-off-by: Catalin Marinas --- arch/arm64/mm/hugetlbpage.c | 19 ++++++++++++++----- 1 file changed, 14 insertions(+), 5 deletions(-) (limited to 'arch/arm64') diff --git a/arch/arm64/mm/hugetlbpage.c b/arch/arm64/mm/hugetlbpage.c index b82df85fe920..b91ec151e62c 100644 --- a/arch/arm64/mm/hugetlbpage.c +++ b/arch/arm64/mm/hugetlbpage.c @@ -221,19 +221,28 @@ pte_t *huge_pte_offset(struct mm_struct *mm, return NULL; pud = pud_offset(pgd, addr); - if (pud_none(*pud)) + if (sz != PUD_SIZE && pud_none(*pud)) return NULL; - /* swap or huge page */ - if (!pud_present(*pud) || pud_huge(*pud)) + /* hugepage or swap? */ + if (pud_huge(*pud) || !pud_present(*pud)) return (pte_t *)pud; /* table; check the next level */ + if (sz == CONT_PMD_SIZE) + addr &= CONT_PMD_MASK; + pmd = pmd_offset(pud, addr); - if (pmd_none(*pmd)) + if (!(sz == PMD_SIZE || sz == CONT_PMD_SIZE) && + pmd_none(*pmd)) return NULL; - if (!pmd_present(*pmd) || pmd_huge(*pmd)) + if (pmd_huge(*pmd) || !pmd_present(*pmd)) return (pte_t *)pmd; + if (sz == CONT_PTE_SIZE) { + pte_t *pte = pte_offset_kernel(pmd, (addr & CONT_PTE_MASK)); + return pte; + } + return NULL; } -- cgit v1.2.3 From c3e4ed5c3d5d79af940eb24c810dddcec6d2b536 Mon Sep 17 00:00:00 2001 From: Punit Agrawal Date: Tue, 22 Aug 2017 11:42:46 +0100 Subject: arm64: hugetlb: Override huge_pte_clear() to support contiguous hugepages The default huge_pte_clear() implementation does not clear contiguous page table entries when it encounters contiguous hugepages that are supported on arm64. Fix this by overriding the default implementation to clear all the entries associated with contiguous hugepages. Signed-off-by: Punit Agrawal Cc: David Woods Signed-off-by: Catalin Marinas --- arch/arm64/include/asm/hugetlb.h | 6 +++++- arch/arm64/mm/hugetlbpage.c | 38 ++++++++++++++++++++++++++++++++++++++ 2 files changed, 43 insertions(+), 1 deletion(-) (limited to 'arch/arm64') diff --git a/arch/arm64/include/asm/hugetlb.h b/arch/arm64/include/asm/hugetlb.h index 793bd73b0d07..df8c0aea0917 100644 --- a/arch/arm64/include/asm/hugetlb.h +++ b/arch/arm64/include/asm/hugetlb.h @@ -18,7 +18,6 @@ #ifndef __ASM_HUGETLB_H #define __ASM_HUGETLB_H -#include #include static inline pte_t huge_ptep_get(pte_t *ptep) @@ -82,6 +81,11 @@ extern void huge_ptep_set_wrprotect(struct mm_struct *mm, unsigned long addr, pte_t *ptep); extern void huge_ptep_clear_flush(struct vm_area_struct *vma, unsigned long addr, pte_t *ptep); +extern void huge_pte_clear(struct mm_struct *mm, unsigned long addr, + pte_t *ptep, unsigned long sz); +#define huge_pte_clear huge_pte_clear + +#include #ifdef CONFIG_ARCH_HAS_GIGANTIC_PAGE static inline bool gigantic_page_supported(void) { return true; } diff --git a/arch/arm64/mm/hugetlbpage.c b/arch/arm64/mm/hugetlbpage.c index b91ec151e62c..035c121c675b 100644 --- a/arch/arm64/mm/hugetlbpage.c +++ b/arch/arm64/mm/hugetlbpage.c @@ -68,6 +68,32 @@ static int find_num_contig(struct mm_struct *mm, unsigned long addr, return CONT_PTES; } +static inline int num_contig_ptes(unsigned long size, size_t *pgsize) +{ + int contig_ptes = 0; + + *pgsize = size; + + switch (size) { +#ifdef CONFIG_ARM64_4K_PAGES + case PUD_SIZE: +#endif + case PMD_SIZE: + contig_ptes = 1; + break; + case CONT_PMD_SIZE: + *pgsize = PMD_SIZE; + contig_ptes = CONT_PMDS; + break; + case CONT_PTE_SIZE: + *pgsize = PAGE_SIZE; + contig_ptes = CONT_PTES; + break; + } + + return contig_ptes; +} + /* * Changing some bits of contiguous entries requires us to follow a * Break-Before-Make approach, breaking the whole contiguous set @@ -262,6 +288,18 @@ pte_t arch_make_huge_pte(pte_t entry, struct vm_area_struct *vma, return entry; } +void huge_pte_clear(struct mm_struct *mm, unsigned long addr, + pte_t *ptep, unsigned long sz) +{ + int i, ncontig; + size_t pgsize; + + ncontig = num_contig_ptes(sz, &pgsize); + + for (i = 0; i < ncontig; i++, addr += pgsize, ptep++) + pte_clear(mm, addr, ptep); +} + pte_t huge_ptep_get_and_clear(struct mm_struct *mm, unsigned long addr, pte_t *ptep) { -- cgit v1.2.3 From a8d623eefd780288c0299f517da0845da687fbfc Mon Sep 17 00:00:00 2001 From: Punit Agrawal Date: Tue, 22 Aug 2017 11:42:47 +0100 Subject: arm64: hugetlb: Override set_huge_swap_pte_at() to support contiguous hugepages The default implementation of set_huge_swap_pte_at() does not support hugepages consisting of contiguous ptes. Override it to add support for contiguous hugepages. Signed-off-by: Punit Agrawal Cc: David Woods Signed-off-by: Catalin Marinas --- arch/arm64/include/asm/hugetlb.h | 3 +++ arch/arm64/mm/hugetlbpage.c | 12 ++++++++++++ 2 files changed, 15 insertions(+) (limited to 'arch/arm64') diff --git a/arch/arm64/include/asm/hugetlb.h b/arch/arm64/include/asm/hugetlb.h index df8c0aea0917..1dca41bea16a 100644 --- a/arch/arm64/include/asm/hugetlb.h +++ b/arch/arm64/include/asm/hugetlb.h @@ -84,6 +84,9 @@ extern void huge_ptep_clear_flush(struct vm_area_struct *vma, extern void huge_pte_clear(struct mm_struct *mm, unsigned long addr, pte_t *ptep, unsigned long sz); #define huge_pte_clear huge_pte_clear +extern void set_huge_swap_pte_at(struct mm_struct *mm, unsigned long addr, + pte_t *ptep, pte_t pte, unsigned long sz); +#define set_huge_swap_pte_at set_huge_swap_pte_at #include diff --git a/arch/arm64/mm/hugetlbpage.c b/arch/arm64/mm/hugetlbpage.c index 035c121c675b..76915b736b17 100644 --- a/arch/arm64/mm/hugetlbpage.c +++ b/arch/arm64/mm/hugetlbpage.c @@ -188,6 +188,18 @@ void set_huge_pte_at(struct mm_struct *mm, unsigned long addr, } } +void set_huge_swap_pte_at(struct mm_struct *mm, unsigned long addr, + pte_t *ptep, pte_t pte, unsigned long sz) +{ + int i, ncontig; + size_t pgsize; + + ncontig = num_contig_ptes(sz, &pgsize); + + for (i = 0; i < ncontig; i++, ptep++) + set_pte(ptep, pte); +} + pte_t *huge_pte_alloc(struct mm_struct *mm, unsigned long addr, unsigned long sz) { -- cgit v1.2.3 From 5cd028b9d90403bf24c8bf7915ed61c7a9bfce6c Mon Sep 17 00:00:00 2001 From: Punit Agrawal Date: Tue, 22 Aug 2017 11:42:48 +0100 Subject: arm64: Re-enable support for contiguous hugepages also known as - Revert "Revert "Revert "commit 66b3923a1a0f ("arm64: hugetlb: add support for PTE contiguous bit")""" Now that our hugetlb implementation is compliant with the break-before-make requirements of the architecture and we have addressed some of the issues in core code required for properly dealing with hardware poisoning of contiguous hugepages let's re-enable support for contiguous hugepages. This reverts commit 6ae979ab39a368c18ceb0424bf824d172d6ab56f. Signed-off-by: Punit Agrawal Signed-off-by: Catalin Marinas --- arch/arm64/mm/hugetlbpage.c | 14 ++++++++++++++ 1 file changed, 14 insertions(+) (limited to 'arch/arm64') diff --git a/arch/arm64/mm/hugetlbpage.c b/arch/arm64/mm/hugetlbpage.c index 76915b736b17..3ceb4f275a1a 100644 --- a/arch/arm64/mm/hugetlbpage.c +++ b/arch/arm64/mm/hugetlbpage.c @@ -408,6 +408,10 @@ static __init int setup_hugepagesz(char *opt) hugetlb_add_hstate(PMD_SHIFT - PAGE_SHIFT); } else if (ps == PUD_SIZE) { hugetlb_add_hstate(PUD_SHIFT - PAGE_SHIFT); + } else if (ps == (PAGE_SIZE * CONT_PTES)) { + hugetlb_add_hstate(CONT_PTE_SHIFT); + } else if (ps == (PMD_SIZE * CONT_PMDS)) { + hugetlb_add_hstate((PMD_SHIFT + CONT_PMD_SHIFT) - PAGE_SHIFT); } else { hugetlb_bad_size(); pr_err("hugepagesz: Unsupported page size %lu K\n", ps >> 10); @@ -416,3 +420,13 @@ static __init int setup_hugepagesz(char *opt) return 1; } __setup("hugepagesz=", setup_hugepagesz); + +#ifdef CONFIG_ARM64_64K_PAGES +static __init int add_default_hugepagesz(void) +{ + if (size_to_hstate(CONT_PTES * PAGE_SIZE) == NULL) + hugetlb_add_hstate(CONT_PTE_SHIFT); + return 0; +} +arch_initcall(add_default_hugepagesz); +#endif -- cgit v1.2.3 From 828f193dd62a40ade5ea8b24cb8b0a22c30df673 Mon Sep 17 00:00:00 2001 From: Steve Capper Date: Tue, 22 Aug 2017 11:42:49 +0100 Subject: arm64: hugetlb: Cleanup setup_hugepagesz Replace a lot of if statements with switch and case labels to make it much clearer which huge page sizes are supported. Also, we prevent PUD_SIZE from being used on systems not running with 4KB PAGE_SIZE. Before if one supplied PUD_SIZE in these circumstances, then unusuable huge page sizes would be in use. Fixes: 084bd29810a5 ("ARM64: mm: HugeTLB support.") Cc: David Woods Signed-off-by: Steve Capper Signed-off-by: Punit Agrawal Reviewed-by: Mark Rutland Signed-off-by: Catalin Marinas --- arch/arm64/mm/hugetlbpage.c | 26 +++++++++++++------------- 1 file changed, 13 insertions(+), 13 deletions(-) (limited to 'arch/arm64') diff --git a/arch/arm64/mm/hugetlbpage.c b/arch/arm64/mm/hugetlbpage.c index 3ceb4f275a1a..6cb0fa92a651 100644 --- a/arch/arm64/mm/hugetlbpage.c +++ b/arch/arm64/mm/hugetlbpage.c @@ -404,20 +404,20 @@ static __init int setup_hugepagesz(char *opt) { unsigned long ps = memparse(opt, &opt); - if (ps == PMD_SIZE) { - hugetlb_add_hstate(PMD_SHIFT - PAGE_SHIFT); - } else if (ps == PUD_SIZE) { - hugetlb_add_hstate(PUD_SHIFT - PAGE_SHIFT); - } else if (ps == (PAGE_SIZE * CONT_PTES)) { - hugetlb_add_hstate(CONT_PTE_SHIFT); - } else if (ps == (PMD_SIZE * CONT_PMDS)) { - hugetlb_add_hstate((PMD_SHIFT + CONT_PMD_SHIFT) - PAGE_SHIFT); - } else { - hugetlb_bad_size(); - pr_err("hugepagesz: Unsupported page size %lu K\n", ps >> 10); - return 0; + switch (ps) { +#ifdef CONFIG_ARM64_4K_PAGES + case PUD_SIZE: +#endif + case PMD_SIZE * CONT_PMDS: + case PMD_SIZE: + case PAGE_SIZE * CONT_PTES: + hugetlb_add_hstate(ilog2(ps) - PAGE_SHIFT); + return 1; } - return 1; + + hugetlb_bad_size(); + pr_err("hugepagesz: Unsupported page size %lu K\n", ps >> 10); + return 0; } __setup("hugepagesz=", setup_hugepagesz); -- cgit v1.2.3 From 5ce93ab624cee4ed68086c946bd6d18b9b3f64aa Mon Sep 17 00:00:00 2001 From: Yury Norov Date: Sun, 20 Aug 2017 13:20:47 +0300 Subject: arm64: introduce separated bits for mm_context_t flags Currently mm->context.flags field uses thread_info flags which is not the best idea for many reasons. For example, mm_context_t doesn't need most of thread_info flags. And it would be difficult to add new mm-related flag if needed because it may easily interfere with TIF ones. To deal with it, the new MMCF_AARCH32 flag is introduced for mm_context_t->flags, where MMCF prefix stands for mm_context_t flags. Also, mm_context_t flag doesn't require atomicity and ordering of the access, so using set/clear_bit() is replaced with simple masks. Signed-off-by: Yury Norov Signed-off-by: Catalin Marinas --- arch/arm64/include/asm/elf.h | 4 ++-- arch/arm64/include/asm/mmu.h | 2 ++ arch/arm64/kernel/probes/uprobes.c | 2 +- 3 files changed, 5 insertions(+), 3 deletions(-) (limited to 'arch/arm64') diff --git a/arch/arm64/include/asm/elf.h b/arch/arm64/include/asm/elf.h index acae781f7359..f4e33f8356ca 100644 --- a/arch/arm64/include/asm/elf.h +++ b/arch/arm64/include/asm/elf.h @@ -139,7 +139,7 @@ typedef struct user_fpsimd_state elf_fpregset_t; #define SET_PERSONALITY(ex) \ ({ \ - clear_bit(TIF_32BIT, ¤t->mm->context.flags); \ + current->mm->context.flags = 0; \ clear_thread_flag(TIF_32BIT); \ current->personality &= ~READ_IMPLIES_EXEC; \ }) @@ -195,7 +195,7 @@ typedef compat_elf_greg_t compat_elf_gregset_t[COMPAT_ELF_NGREG]; */ #define COMPAT_SET_PERSONALITY(ex) \ ({ \ - set_bit(TIF_32BIT, ¤t->mm->context.flags); \ + current->mm->context.flags = MMCF_AARCH32; \ set_thread_flag(TIF_32BIT); \ }) #define COMPAT_ARCH_DLINFO diff --git a/arch/arm64/include/asm/mmu.h b/arch/arm64/include/asm/mmu.h index 5468c834b072..0d34bf0a89c7 100644 --- a/arch/arm64/include/asm/mmu.h +++ b/arch/arm64/include/asm/mmu.h @@ -16,6 +16,8 @@ #ifndef __ASM_MMU_H #define __ASM_MMU_H +#define MMCF_AARCH32 0x1 /* mm context flag for AArch32 executables */ + typedef struct { atomic64_t id; void *vdso; diff --git a/arch/arm64/kernel/probes/uprobes.c b/arch/arm64/kernel/probes/uprobes.c index 26c998534dca..636ca0119c0e 100644 --- a/arch/arm64/kernel/probes/uprobes.c +++ b/arch/arm64/kernel/probes/uprobes.c @@ -40,7 +40,7 @@ int arch_uprobe_analyze_insn(struct arch_uprobe *auprobe, struct mm_struct *mm, probe_opcode_t insn; /* TODO: Currently we do not support AARCH32 instruction probing */ - if (test_bit(TIF_32BIT, &mm->context.flags)) + if (mm->context.flags & MMCF_AARCH32) return -ENOTSUPP; else if (!IS_ALIGNED(addr, AARCH64_INSN_SIZE)) return -EINVAL; -- cgit v1.2.3 From d1be5c99a0341249bf6f74eb1cbc3d5fc4ef2be7 Mon Sep 17 00:00:00 2001 From: Yury Norov Date: Sun, 20 Aug 2017 13:20:48 +0300 Subject: arm64: cleanup {COMPAT_,}SET_PERSONALITY() macro There is some work that should be done after setting the personality. Currently it's done in the macro, which is not the best idea. In this patch new arch_setup_new_exec() routine is introduced, and all setup code is moved there, as suggested by Catalin: https://lkml.org/lkml/2017/8/4/494 Cc: Pratyush Anand Signed-off-by: Yury Norov [catalin.marinas@arm.com: comments changed or removed] Signed-off-by: Catalin Marinas --- arch/arm64/include/asm/elf.h | 2 -- arch/arm64/include/asm/thread_info.h | 3 +++ arch/arm64/kernel/process.c | 8 ++++++++ 3 files changed, 11 insertions(+), 2 deletions(-) (limited to 'arch/arm64') diff --git a/arch/arm64/include/asm/elf.h b/arch/arm64/include/asm/elf.h index f4e33f8356ca..c55043709c69 100644 --- a/arch/arm64/include/asm/elf.h +++ b/arch/arm64/include/asm/elf.h @@ -139,7 +139,6 @@ typedef struct user_fpsimd_state elf_fpregset_t; #define SET_PERSONALITY(ex) \ ({ \ - current->mm->context.flags = 0; \ clear_thread_flag(TIF_32BIT); \ current->personality &= ~READ_IMPLIES_EXEC; \ }) @@ -195,7 +194,6 @@ typedef compat_elf_greg_t compat_elf_gregset_t[COMPAT_ELF_NGREG]; */ #define COMPAT_SET_PERSONALITY(ex) \ ({ \ - current->mm->context.flags = MMCF_AARCH32; \ set_thread_flag(TIF_32BIT); \ }) #define COMPAT_ARCH_DLINFO diff --git a/arch/arm64/include/asm/thread_info.h b/arch/arm64/include/asm/thread_info.h index aa04b733b349..2eca178bc943 100644 --- a/arch/arm64/include/asm/thread_info.h +++ b/arch/arm64/include/asm/thread_info.h @@ -60,6 +60,9 @@ struct thread_info { #define thread_saved_fp(tsk) \ ((unsigned long)(tsk->thread.cpu_context.fp)) +void arch_setup_new_exec(void); +#define arch_setup_new_exec arch_setup_new_exec + #endif /* diff --git a/arch/arm64/kernel/process.c b/arch/arm64/kernel/process.c index 85b953dd023a..e6bf19c1dddb 100644 --- a/arch/arm64/kernel/process.c +++ b/arch/arm64/kernel/process.c @@ -414,3 +414,11 @@ unsigned long arch_randomize_brk(struct mm_struct *mm) else return randomize_page(mm->brk, SZ_1G); } + +/* + * Called from setup_new_exec() after (COMPAT_)SET_PERSONALITY. + */ +void arch_setup_new_exec(void) +{ + current->mm->context.flags = is_compat_task() ? MMCF_AARCH32 : 0; +} -- cgit v1.2.3