From 2a2848e7c2fde1c26ff46998ac10f7bf9ca2de04 Mon Sep 17 00:00:00 2001 From: Anshuman Khandual Date: Fri, 8 Apr 2022 09:40:09 +0530 Subject: arm64/mm: Compute PTRS_PER_[PMD|PUD] independently of PTRS_PER_PTE Possible page table entries (or pointers) on non-zero page table levels are dependent on a single page size i.e PAGE_SIZE and size required for each individual page table entry i.e 8 bytes. PTRS_PER_[PMD|PUD] as such are not related to PTRS_PER_PTE in any manner, as being implied currently. So lets just make this very explicit and compute these macros independently. Cc: Will Deacon Cc: linux-arm-kernel@lists.infradead.org Cc: linux-kernel@vger.kernel.org Signed-off-by: Anshuman Khandual Link: https://lore.kernel.org/r/20220408041009.1259701-1-anshuman.khandual@arm.com Signed-off-by: Catalin Marinas --- arch/arm64/include/asm/pgtable-hwdef.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'arch') diff --git a/arch/arm64/include/asm/pgtable-hwdef.h b/arch/arm64/include/asm/pgtable-hwdef.h index 66671ff05183..dd3d12bce07b 100644 --- a/arch/arm64/include/asm/pgtable-hwdef.h +++ b/arch/arm64/include/asm/pgtable-hwdef.h @@ -49,7 +49,7 @@ #define PMD_SHIFT ARM64_HW_PGTABLE_LEVEL_SHIFT(2) #define PMD_SIZE (_AC(1, UL) << PMD_SHIFT) #define PMD_MASK (~(PMD_SIZE-1)) -#define PTRS_PER_PMD PTRS_PER_PTE +#define PTRS_PER_PMD (1 << (PAGE_SHIFT - 3)) #endif /* @@ -59,7 +59,7 @@ #define PUD_SHIFT ARM64_HW_PGTABLE_LEVEL_SHIFT(1) #define PUD_SIZE (_AC(1, UL) << PUD_SHIFT) #define PUD_MASK (~(PUD_SIZE-1)) -#define PTRS_PER_PUD PTRS_PER_PTE +#define PTRS_PER_PUD (1 << (PAGE_SHIFT - 3)) #endif /* -- cgit v1.2.3 From 4f6277e8ac397e5932d9af5c6b3dadefe81d53dc Mon Sep 17 00:00:00 2001 From: Madhavan T. Venkataraman Date: Wed, 13 Apr 2022 15:59:05 +0100 Subject: arm64: stacktrace: remove NULL task check from unwind_frame() Currently, there is a check for a NULL task in unwind_frame(). It is not needed since all current callers pass a non-NULL task. There should be no functional change as a result of this patch. Signed-off-by: Madhavan T. Venkataraman Reviewed-by: Mark Brown Signed-off-by: Mark Rutland Reviewed-by: Kalesh Singh for the series. Link: https://lore.kernel.org/r/20220413145910.3060139-2-mark.rutland@arm.com Signed-off-by: Catalin Marinas --- arch/arm64/kernel/stacktrace.c | 3 --- 1 file changed, 3 deletions(-) (limited to 'arch') diff --git a/arch/arm64/kernel/stacktrace.c b/arch/arm64/kernel/stacktrace.c index e4103e085681..94932ade5c79 100644 --- a/arch/arm64/kernel/stacktrace.c +++ b/arch/arm64/kernel/stacktrace.c @@ -70,9 +70,6 @@ static int notrace unwind_frame(struct task_struct *tsk, unsigned long fp = frame->fp; struct stack_info info; - if (!tsk) - tsk = current; - /* Final frame; nothing to unwind */ if (fp == (unsigned long)task_pt_regs(tsk)->stackframe) return -ENOENT; -- cgit v1.2.3 From cb86a41b35c8f5da93ee7370c11634b57509d22b Mon Sep 17 00:00:00 2001 From: Mark Rutland Date: Wed, 13 Apr 2022 15:59:06 +0100 Subject: arm64: stacktrace: delete PCS comment The comment at the top of stacktrace.c isn't all that helpful, as it's not associated with the code which inspects the frame record, and the code example isn't representative of common code generation today. Delete it. There should be no functional change as a result of this patch. Signed-off-by: Mark Rutland Cc: Madhavan T. Venkataraman Cc: Mark Brown Cc: Will Deacon Reviewed-by: Madhavan T. Venkataraman Reviewed-by: Mark Brown Reviewed-by: Kalesh Singh for the series. Link: https://lore.kernel.org/r/20220413145910.3060139-3-mark.rutland@arm.com Signed-off-by: Catalin Marinas --- arch/arm64/kernel/stacktrace.c | 15 --------------- 1 file changed, 15 deletions(-) (limited to 'arch') diff --git a/arch/arm64/kernel/stacktrace.c b/arch/arm64/kernel/stacktrace.c index 94932ade5c79..08af9ca9a845 100644 --- a/arch/arm64/kernel/stacktrace.c +++ b/arch/arm64/kernel/stacktrace.c @@ -18,21 +18,6 @@ #include #include -/* - * AArch64 PCS assigns the frame pointer to x29. - * - * A simple function prologue looks like this: - * sub sp, sp, #0x10 - * stp x29, x30, [sp] - * mov x29, sp - * - * A simple function epilogue looks like this: - * mov sp, x29 - * ldp x29, x30, [sp] - * add sp, sp, #0x10 - */ - - static notrace void start_backtrace(struct stackframe *frame, unsigned long fp, unsigned long pc) { -- cgit v1.2.3 From 96bb1530c4f9039996bf95d28dcc957861558696 Mon Sep 17 00:00:00 2001 From: Mark Rutland Date: Wed, 13 Apr 2022 15:59:07 +0100 Subject: arm64: stacktrace: make struct stackframe private to stacktrace.c Now that arm64 uses arch_stack_walk() consistently, struct stackframe is only used within stacktrace.c. To make it easier to read and maintain this code, it would be nicer if the definition were there too. Move the definition into stacktrace.c. There should be no functional change as a result of this patch. Signed-off-by: Mark Rutland Cc: Madhavan T. Venkataraman Cc: Mark Brown Cc: Will Deacon Reviewed-by: Madhavan T. Venkataraman Reviwed-by: Mark Brown Reviewed-by: Kalesh Singh for the series. Link: https://lore.kernel.org/r/20220413145910.3060139-4-mark.rutland@arm.com Signed-off-by: Catalin Marinas --- arch/arm64/include/asm/stacktrace.h | 32 -------------------------------- arch/arm64/kernel/stacktrace.c | 32 ++++++++++++++++++++++++++++++++ 2 files changed, 32 insertions(+), 32 deletions(-) (limited to 'arch') diff --git a/arch/arm64/include/asm/stacktrace.h b/arch/arm64/include/asm/stacktrace.h index e77cdef9ca29..aec9315bf156 100644 --- a/arch/arm64/include/asm/stacktrace.h +++ b/arch/arm64/include/asm/stacktrace.h @@ -31,38 +31,6 @@ struct stack_info { enum stack_type type; }; -/* - * A snapshot of a frame record or fp/lr register values, along with some - * accounting information necessary for robust unwinding. - * - * @fp: The fp value in the frame record (or the real fp) - * @pc: The lr value in the frame record (or the real lr) - * - * @stacks_done: Stacks which have been entirely unwound, for which it is no - * longer valid to unwind to. - * - * @prev_fp: The fp that pointed to this frame record, or a synthetic value - * of 0. This is used to ensure that within a stack, each - * subsequent frame record is at an increasing address. - * @prev_type: The type of stack this frame record was on, or a synthetic - * value of STACK_TYPE_UNKNOWN. This is used to detect a - * transition from one stack to another. - * - * @kr_cur: When KRETPROBES is selected, holds the kretprobe instance - * associated with the most recently encountered replacement lr - * value. - */ -struct stackframe { - unsigned long fp; - unsigned long pc; - DECLARE_BITMAP(stacks_done, __NR_STACK_TYPES); - unsigned long prev_fp; - enum stack_type prev_type; -#ifdef CONFIG_KRETPROBES - struct llist_node *kr_cur; -#endif -}; - extern void dump_backtrace(struct pt_regs *regs, struct task_struct *tsk, const char *loglvl); diff --git a/arch/arm64/kernel/stacktrace.c b/arch/arm64/kernel/stacktrace.c index 08af9ca9a845..073d0941a5b6 100644 --- a/arch/arm64/kernel/stacktrace.c +++ b/arch/arm64/kernel/stacktrace.c @@ -18,6 +18,38 @@ #include #include +/* + * A snapshot of a frame record or fp/lr register values, along with some + * accounting information necessary for robust unwinding. + * + * @fp: The fp value in the frame record (or the real fp) + * @pc: The lr value in the frame record (or the real lr) + * + * @stacks_done: Stacks which have been entirely unwound, for which it is no + * longer valid to unwind to. + * + * @prev_fp: The fp that pointed to this frame record, or a synthetic value + * of 0. This is used to ensure that within a stack, each + * subsequent frame record is at an increasing address. + * @prev_type: The type of stack this frame record was on, or a synthetic + * value of STACK_TYPE_UNKNOWN. This is used to detect a + * transition from one stack to another. + * + * @kr_cur: When KRETPROBES is selected, holds the kretprobe instance + * associated with the most recently encountered replacement lr + * value. + */ +struct stackframe { + unsigned long fp; + unsigned long pc; + DECLARE_BITMAP(stacks_done, __NR_STACK_TYPES); + unsigned long prev_fp; + enum stack_type prev_type; +#ifdef CONFIG_KRETPROBES + struct llist_node *kr_cur; +#endif +}; + static notrace void start_backtrace(struct stackframe *frame, unsigned long fp, unsigned long pc) { -- cgit v1.2.3 From c797bd45480b41d1d988b0a74d8d5a7c615bd674 Mon Sep 17 00:00:00 2001 From: Madhavan T. Venkataraman Date: Wed, 13 Apr 2022 15:59:08 +0100 Subject: arm64: stacktrace: rename unwinder functions Rename unwinder functions for consistency and better naming. - Rename start_backtrace() to unwind_init(). - Rename unwind_frame() to unwind_next(). - Rename walk_stackframe() to unwind(). There should be no functional change as a result of this patch. Signed-off-by: Madhavan T. Venkataraman Reviewed-by: Mark Brown Signed-off-by: Mark Rutland Reviewed-by: Kalesh Singh for the series. Link: https://lore.kernel.org/r/20220413145910.3060139-5-mark.rutland@arm.com Signed-off-by: Catalin Marinas --- arch/arm64/kernel/stacktrace.c | 34 +++++++++++++++++----------------- 1 file changed, 17 insertions(+), 17 deletions(-) (limited to 'arch') diff --git a/arch/arm64/kernel/stacktrace.c b/arch/arm64/kernel/stacktrace.c index 073d0941a5b6..d65fde99b74a 100644 --- a/arch/arm64/kernel/stacktrace.c +++ b/arch/arm64/kernel/stacktrace.c @@ -50,8 +50,8 @@ struct stackframe { #endif }; -static notrace void start_backtrace(struct stackframe *frame, unsigned long fp, - unsigned long pc) +static notrace void unwind_init(struct stackframe *frame, unsigned long fp, + unsigned long pc) { frame->fp = fp; frame->pc = pc; @@ -62,7 +62,7 @@ static notrace void start_backtrace(struct stackframe *frame, unsigned long fp, /* * Prime the first unwind. * - * In unwind_frame() we'll check that the FP points to a valid stack, + * In unwind_next() we'll check that the FP points to a valid stack, * which can't be STACK_TYPE_UNKNOWN, and the first unwind will be * treated as a transition to whichever stack that happens to be. The * prev_fp value won't be used, but we set it to 0 such that it is @@ -72,7 +72,7 @@ static notrace void start_backtrace(struct stackframe *frame, unsigned long fp, frame->prev_fp = 0; frame->prev_type = STACK_TYPE_UNKNOWN; } -NOKPROBE_SYMBOL(start_backtrace); +NOKPROBE_SYMBOL(unwind_init); /* * Unwind from one frame record (A) to the next frame record (B). @@ -81,8 +81,8 @@ NOKPROBE_SYMBOL(start_backtrace); * records (e.g. a cycle), determined based on the location and fp value of A * and the location (but not the fp value) of B. */ -static int notrace unwind_frame(struct task_struct *tsk, - struct stackframe *frame) +static int notrace unwind_next(struct task_struct *tsk, + struct stackframe *frame) { unsigned long fp = frame->fp; struct stack_info info; @@ -122,7 +122,7 @@ static int notrace unwind_frame(struct task_struct *tsk, /* * Record this frame record's values and location. The prev_fp and - * prev_type are only meaningful to the next unwind_frame() invocation. + * prev_type are only meaningful to the next unwind_next() invocation. */ frame->fp = READ_ONCE_NOCHECK(*(unsigned long *)(fp)); frame->pc = READ_ONCE_NOCHECK(*(unsigned long *)(fp + 8)); @@ -155,23 +155,23 @@ static int notrace unwind_frame(struct task_struct *tsk, return 0; } -NOKPROBE_SYMBOL(unwind_frame); +NOKPROBE_SYMBOL(unwind_next); -static void notrace walk_stackframe(struct task_struct *tsk, - struct stackframe *frame, - bool (*fn)(void *, unsigned long), void *data) +static void notrace unwind(struct task_struct *tsk, + struct stackframe *frame, + bool (*fn)(void *, unsigned long), void *data) { while (1) { int ret; if (!fn(data, frame->pc)) break; - ret = unwind_frame(tsk, frame); + ret = unwind_next(tsk, frame); if (ret < 0) break; } } -NOKPROBE_SYMBOL(walk_stackframe); +NOKPROBE_SYMBOL(unwind); static bool dump_backtrace_entry(void *arg, unsigned long where) { @@ -213,14 +213,14 @@ noinline notrace void arch_stack_walk(stack_trace_consume_fn consume_entry, struct stackframe frame; if (regs) - start_backtrace(&frame, regs->regs[29], regs->pc); + unwind_init(&frame, regs->regs[29], regs->pc); else if (task == current) - start_backtrace(&frame, + unwind_init(&frame, (unsigned long)__builtin_frame_address(1), (unsigned long)__builtin_return_address(0)); else - start_backtrace(&frame, thread_saved_fp(task), + unwind_init(&frame, thread_saved_fp(task), thread_saved_pc(task)); - walk_stackframe(task, &frame, consume_entry, cookie); + unwind(task, &frame, consume_entry, cookie); } -- cgit v1.2.3 From e9d75a0ba87851187fe52493f1527229a7e101b3 Mon Sep 17 00:00:00 2001 From: Madhavan T. Venkataraman Date: Wed, 13 Apr 2022 15:59:09 +0100 Subject: arm64: stacktrace: rename stackframe to unwind_state Rename "struct stackframe" to "struct unwind_state" for consistency and better naming. Accordingly, rename variable/argument "frame" to "state". There should be no functional change as a result of this patch. Signed-off-by: Madhavan T. Venkataraman Reviewed-by: Mark Brown Signed-off-by: Mark Rutland Reviewed-by: Kalesh Singh for the series. Link: https://lore.kernel.org/r/20220413145910.3060139-6-mark.rutland@arm.com Signed-off-by: Catalin Marinas --- arch/arm64/kernel/stacktrace.c | 68 +++++++++++++++++++++--------------------- 1 file changed, 34 insertions(+), 34 deletions(-) (limited to 'arch') diff --git a/arch/arm64/kernel/stacktrace.c b/arch/arm64/kernel/stacktrace.c index d65fde99b74a..d5a195748aff 100644 --- a/arch/arm64/kernel/stacktrace.c +++ b/arch/arm64/kernel/stacktrace.c @@ -39,7 +39,7 @@ * associated with the most recently encountered replacement lr * value. */ -struct stackframe { +struct unwind_state { unsigned long fp; unsigned long pc; DECLARE_BITMAP(stacks_done, __NR_STACK_TYPES); @@ -50,13 +50,13 @@ struct stackframe { #endif }; -static notrace void unwind_init(struct stackframe *frame, unsigned long fp, +static notrace void unwind_init(struct unwind_state *state, unsigned long fp, unsigned long pc) { - frame->fp = fp; - frame->pc = pc; + state->fp = fp; + state->pc = pc; #ifdef CONFIG_KRETPROBES - frame->kr_cur = NULL; + state->kr_cur = NULL; #endif /* @@ -68,9 +68,9 @@ static notrace void unwind_init(struct stackframe *frame, unsigned long fp, * prev_fp value won't be used, but we set it to 0 such that it is * definitely not an accessible stack address. */ - bitmap_zero(frame->stacks_done, __NR_STACK_TYPES); - frame->prev_fp = 0; - frame->prev_type = STACK_TYPE_UNKNOWN; + bitmap_zero(state->stacks_done, __NR_STACK_TYPES); + state->prev_fp = 0; + state->prev_type = STACK_TYPE_UNKNOWN; } NOKPROBE_SYMBOL(unwind_init); @@ -82,9 +82,9 @@ NOKPROBE_SYMBOL(unwind_init); * and the location (but not the fp value) of B. */ static int notrace unwind_next(struct task_struct *tsk, - struct stackframe *frame) + struct unwind_state *state) { - unsigned long fp = frame->fp; + unsigned long fp = state->fp; struct stack_info info; /* Final frame; nothing to unwind */ @@ -97,7 +97,7 @@ static int notrace unwind_next(struct task_struct *tsk, if (!on_accessible_stack(tsk, fp, 16, &info)) return -EINVAL; - if (test_bit(info.type, frame->stacks_done)) + if (test_bit(info.type, state->stacks_done)) return -EINVAL; /* @@ -113,27 +113,27 @@ static int notrace unwind_next(struct task_struct *tsk, * stack to another, it's never valid to unwind back to that first * stack. */ - if (info.type == frame->prev_type) { - if (fp <= frame->prev_fp) + if (info.type == state->prev_type) { + if (fp <= state->prev_fp) return -EINVAL; } else { - set_bit(frame->prev_type, frame->stacks_done); + set_bit(state->prev_type, state->stacks_done); } /* * Record this frame record's values and location. The prev_fp and * prev_type are only meaningful to the next unwind_next() invocation. */ - frame->fp = READ_ONCE_NOCHECK(*(unsigned long *)(fp)); - frame->pc = READ_ONCE_NOCHECK(*(unsigned long *)(fp + 8)); - frame->prev_fp = fp; - frame->prev_type = info.type; + state->fp = READ_ONCE_NOCHECK(*(unsigned long *)(fp)); + state->pc = READ_ONCE_NOCHECK(*(unsigned long *)(fp + 8)); + state->prev_fp = fp; + state->prev_type = info.type; - frame->pc = ptrauth_strip_insn_pac(frame->pc); + state->pc = ptrauth_strip_insn_pac(state->pc); #ifdef CONFIG_FUNCTION_GRAPH_TRACER if (tsk->ret_stack && - (frame->pc == (unsigned long)return_to_handler)) { + (state->pc == (unsigned long)return_to_handler)) { unsigned long orig_pc; /* * This is a case where function graph tracer has @@ -141,16 +141,16 @@ static int notrace unwind_next(struct task_struct *tsk, * to hook a function return. * So replace it to an original value. */ - orig_pc = ftrace_graph_ret_addr(tsk, NULL, frame->pc, - (void *)frame->fp); - if (WARN_ON_ONCE(frame->pc == orig_pc)) + orig_pc = ftrace_graph_ret_addr(tsk, NULL, state->pc, + (void *)state->fp); + if (WARN_ON_ONCE(state->pc == orig_pc)) return -EINVAL; - frame->pc = orig_pc; + state->pc = orig_pc; } #endif /* CONFIG_FUNCTION_GRAPH_TRACER */ #ifdef CONFIG_KRETPROBES - if (is_kretprobe_trampoline(frame->pc)) - frame->pc = kretprobe_find_ret_addr(tsk, (void *)frame->fp, &frame->kr_cur); + if (is_kretprobe_trampoline(state->pc)) + state->pc = kretprobe_find_ret_addr(tsk, (void *)state->fp, &state->kr_cur); #endif return 0; @@ -158,15 +158,15 @@ static int notrace unwind_next(struct task_struct *tsk, NOKPROBE_SYMBOL(unwind_next); static void notrace unwind(struct task_struct *tsk, - struct stackframe *frame, + struct unwind_state *state, bool (*fn)(void *, unsigned long), void *data) { while (1) { int ret; - if (!fn(data, frame->pc)) + if (!fn(data, state->pc)) break; - ret = unwind_next(tsk, frame); + ret = unwind_next(tsk, state); if (ret < 0) break; } @@ -210,17 +210,17 @@ noinline notrace void arch_stack_walk(stack_trace_consume_fn consume_entry, void *cookie, struct task_struct *task, struct pt_regs *regs) { - struct stackframe frame; + struct unwind_state state; if (regs) - unwind_init(&frame, regs->regs[29], regs->pc); + unwind_init(&state, regs->regs[29], regs->pc); else if (task == current) - unwind_init(&frame, + unwind_init(&state, (unsigned long)__builtin_frame_address(1), (unsigned long)__builtin_return_address(0)); else - unwind_init(&frame, thread_saved_fp(task), + unwind_init(&state, thread_saved_fp(task), thread_saved_pc(task)); - unwind(task, &frame, consume_entry, cookie); + unwind(task, &state, consume_entry, cookie); } -- cgit v1.2.3 From bd5552bc4807a87a6597c629204712c7df7284f4 Mon Sep 17 00:00:00 2001 From: Madhavan T. Venkataraman Date: Wed, 13 Apr 2022 15:59:10 +0100 Subject: arm64: stacktrace: align with common naming For historical reasons, the naming of parameters and their types in the arm64 stacktrace code differs from that used in generic code and other architectures, even though the types are equivalent. For consistency and clarity, use the generic names. There should be no functional change as a result of this patch. Signed-off-by: Madhavan T. Venkataraman Signed-off-by: Mark Rutland Reviewed-by: Mark Brown Reviewed-by: Kalesh Singh for the series. Link: https://lore.kernel.org/r/20220413145910.3060139-7-mark.rutland@arm.com Signed-off-by: Catalin Marinas --- arch/arm64/kernel/stacktrace.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'arch') diff --git a/arch/arm64/kernel/stacktrace.c b/arch/arm64/kernel/stacktrace.c index d5a195748aff..0467cb79f080 100644 --- a/arch/arm64/kernel/stacktrace.c +++ b/arch/arm64/kernel/stacktrace.c @@ -159,12 +159,12 @@ NOKPROBE_SYMBOL(unwind_next); static void notrace unwind(struct task_struct *tsk, struct unwind_state *state, - bool (*fn)(void *, unsigned long), void *data) + stack_trace_consume_fn consume_entry, void *cookie) { while (1) { int ret; - if (!fn(data, state->pc)) + if (!consume_entry(cookie, state->pc)) break; ret = unwind_next(tsk, state); if (ret < 0) -- cgit v1.2.3 From b4adc83b07706042ad6e6a767f6c04636db69bcc Mon Sep 17 00:00:00 2001 From: Mark Brown Date: Tue, 19 Apr 2022 12:22:13 +0100 Subject: arm64/sme: System register and exception syndrome definitions The arm64 Scalable Matrix Extension (SME) adds some new system registers, fields in existing system registers and exception syndromes. This patch adds definitions for these for use in future patches implementing support for this extension. Since SME will be the first user of FEAT_HCX in the kernel also include the definitions for enumerating it and the HCRX system register it adds. Signed-off-by: Mark Brown Acked-by: Catalin Marinas Link: https://lore.kernel.org/r/20220419112247.711548-6-broonie@kernel.org Signed-off-by: Catalin Marinas --- arch/arm64/include/asm/esr.h | 12 ++++++- arch/arm64/include/asm/kvm_arm.h | 1 + arch/arm64/include/asm/sysreg.h | 67 ++++++++++++++++++++++++++++++++++++++++ arch/arm64/kernel/traps.c | 1 + 4 files changed, 80 insertions(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/arm64/include/asm/esr.h b/arch/arm64/include/asm/esr.h index d52a0b269ee8..43872e0cfd1e 100644 --- a/arch/arm64/include/asm/esr.h +++ b/arch/arm64/include/asm/esr.h @@ -37,7 +37,8 @@ #define ESR_ELx_EC_ERET (0x1a) /* EL2 only */ /* Unallocated EC: 0x1B */ #define ESR_ELx_EC_FPAC (0x1C) /* EL1 and above */ -/* Unallocated EC: 0x1D - 0x1E */ +#define ESR_ELx_EC_SME (0x1D) +/* Unallocated EC: 0x1E */ #define ESR_ELx_EC_IMP_DEF (0x1f) /* EL3 only */ #define ESR_ELx_EC_IABT_LOW (0x20) #define ESR_ELx_EC_IABT_CUR (0x21) @@ -327,6 +328,15 @@ #define ESR_ELx_CP15_32_ISS_SYS_CNTFRQ (ESR_ELx_CP15_32_ISS_SYS_VAL(0, 0, 14, 0) |\ ESR_ELx_CP15_32_ISS_DIR_READ) +/* + * ISS values for SME traps + */ + +#define ESR_ELx_SME_ISS_SME_DISABLED 0 +#define ESR_ELx_SME_ISS_ILL 1 +#define ESR_ELx_SME_ISS_SM_DISABLED 2 +#define ESR_ELx_SME_ISS_ZA_DISABLED 3 + #ifndef __ASSEMBLY__ #include diff --git a/arch/arm64/include/asm/kvm_arm.h b/arch/arm64/include/asm/kvm_arm.h index 1767ded83888..13ae232ec4a1 100644 --- a/arch/arm64/include/asm/kvm_arm.h +++ b/arch/arm64/include/asm/kvm_arm.h @@ -279,6 +279,7 @@ #define CPTR_EL2_TCPAC (1U << 31) #define CPTR_EL2_TAM (1 << 30) #define CPTR_EL2_TTA (1 << 20) +#define CPTR_EL2_TSM (1 << 12) #define CPTR_EL2_TFP (1 << CPTR_EL2_TFP_SHIFT) #define CPTR_EL2_TZ (1 << 8) #define CPTR_NVHE_EL2_RES1 0x000032ff /* known RES1 bits in CPTR_EL2 (nVHE) */ diff --git a/arch/arm64/include/asm/sysreg.h b/arch/arm64/include/asm/sysreg.h index fbf5f8bb9055..bebfdd27296a 100644 --- a/arch/arm64/include/asm/sysreg.h +++ b/arch/arm64/include/asm/sysreg.h @@ -118,6 +118,10 @@ * System registers, organised loosely by encoding but grouped together * where the architected name contains an index. e.g. ID_MMFR_EL1. */ +#define SYS_SVCR_SMSTOP_SM_EL0 sys_reg(0, 3, 4, 2, 3) +#define SYS_SVCR_SMSTART_SM_EL0 sys_reg(0, 3, 4, 3, 3) +#define SYS_SVCR_SMSTOP_SMZA_EL0 sys_reg(0, 3, 4, 6, 3) + #define SYS_OSDTRRX_EL1 sys_reg(2, 0, 0, 0, 2) #define SYS_MDCCINT_EL1 sys_reg(2, 0, 0, 2, 0) #define SYS_MDSCR_EL1 sys_reg(2, 0, 0, 2, 2) @@ -181,6 +185,7 @@ #define SYS_ID_AA64PFR0_EL1 sys_reg(3, 0, 0, 4, 0) #define SYS_ID_AA64PFR1_EL1 sys_reg(3, 0, 0, 4, 1) #define SYS_ID_AA64ZFR0_EL1 sys_reg(3, 0, 0, 4, 4) +#define SYS_ID_AA64SMFR0_EL1 sys_reg(3, 0, 0, 4, 5) #define SYS_ID_AA64DFR0_EL1 sys_reg(3, 0, 0, 5, 0) #define SYS_ID_AA64DFR1_EL1 sys_reg(3, 0, 0, 5, 1) @@ -204,6 +209,8 @@ #define SYS_ZCR_EL1 sys_reg(3, 0, 1, 2, 0) #define SYS_TRFCR_EL1 sys_reg(3, 0, 1, 2, 1) +#define SYS_SMPRI_EL1 sys_reg(3, 0, 1, 2, 4) +#define SYS_SMCR_EL1 sys_reg(3, 0, 1, 2, 6) #define SYS_TTBR0_EL1 sys_reg(3, 0, 2, 0, 0) #define SYS_TTBR1_EL1 sys_reg(3, 0, 2, 0, 1) @@ -396,6 +403,8 @@ #define TRBIDR_ALIGN_MASK GENMASK(3, 0) #define TRBIDR_ALIGN_SHIFT 0 +#define SMPRI_EL1_PRIORITY_MASK 0xf + #define SYS_PMINTENSET_EL1 sys_reg(3, 0, 9, 14, 1) #define SYS_PMINTENCLR_EL1 sys_reg(3, 0, 9, 14, 2) @@ -451,8 +460,13 @@ #define SYS_CCSIDR_EL1 sys_reg(3, 1, 0, 0, 0) #define SYS_CLIDR_EL1 sys_reg(3, 1, 0, 0, 1) #define SYS_GMID_EL1 sys_reg(3, 1, 0, 0, 4) +#define SYS_SMIDR_EL1 sys_reg(3, 1, 0, 0, 6) #define SYS_AIDR_EL1 sys_reg(3, 1, 0, 0, 7) +#define SYS_SMIDR_EL1_IMPLEMENTER_SHIFT 24 +#define SYS_SMIDR_EL1_SMPS_SHIFT 15 +#define SYS_SMIDR_EL1_AFFINITY_SHIFT 0 + #define SYS_CSSELR_EL1 sys_reg(3, 2, 0, 0, 0) #define SYS_CTR_EL0 sys_reg(3, 3, 0, 0, 1) @@ -461,6 +475,10 @@ #define SYS_RNDR_EL0 sys_reg(3, 3, 2, 4, 0) #define SYS_RNDRRS_EL0 sys_reg(3, 3, 2, 4, 1) +#define SYS_SVCR_EL0 sys_reg(3, 3, 4, 2, 2) +#define SYS_SVCR_EL0_ZA_MASK 2 +#define SYS_SVCR_EL0_SM_MASK 1 + #define SYS_PMCR_EL0 sys_reg(3, 3, 9, 12, 0) #define SYS_PMCNTENSET_EL0 sys_reg(3, 3, 9, 12, 1) #define SYS_PMCNTENCLR_EL0 sys_reg(3, 3, 9, 12, 2) @@ -477,6 +495,7 @@ #define SYS_TPIDR_EL0 sys_reg(3, 3, 13, 0, 2) #define SYS_TPIDRRO_EL0 sys_reg(3, 3, 13, 0, 3) +#define SYS_TPIDR2_EL0 sys_reg(3, 3, 13, 0, 5) #define SYS_SCXTNUM_EL0 sys_reg(3, 3, 13, 0, 7) @@ -546,6 +565,9 @@ #define SYS_HFGITR_EL2 sys_reg(3, 4, 1, 1, 6) #define SYS_ZCR_EL2 sys_reg(3, 4, 1, 2, 0) #define SYS_TRFCR_EL2 sys_reg(3, 4, 1, 2, 1) +#define SYS_HCRX_EL2 sys_reg(3, 4, 1, 2, 2) +#define SYS_SMPRIMAP_EL2 sys_reg(3, 4, 1, 2, 5) +#define SYS_SMCR_EL2 sys_reg(3, 4, 1, 2, 6) #define SYS_DACR32_EL2 sys_reg(3, 4, 3, 0, 0) #define SYS_HDFGRTR_EL2 sys_reg(3, 4, 3, 1, 4) #define SYS_HDFGWTR_EL2 sys_reg(3, 4, 3, 1, 5) @@ -605,6 +627,7 @@ #define SYS_SCTLR_EL12 sys_reg(3, 5, 1, 0, 0) #define SYS_CPACR_EL12 sys_reg(3, 5, 1, 0, 2) #define SYS_ZCR_EL12 sys_reg(3, 5, 1, 2, 0) +#define SYS_SMCR_EL12 sys_reg(3, 5, 1, 2, 6) #define SYS_TTBR0_EL12 sys_reg(3, 5, 2, 0, 0) #define SYS_TTBR1_EL12 sys_reg(3, 5, 2, 0, 1) #define SYS_TCR_EL12 sys_reg(3, 5, 2, 0, 2) @@ -628,6 +651,7 @@ #define SYS_CNTV_CVAL_EL02 sys_reg(3, 5, 14, 3, 2) /* Common SCTLR_ELx flags. */ +#define SCTLR_ELx_ENTP2 (BIT(60)) #define SCTLR_ELx_DSSBS (BIT(44)) #define SCTLR_ELx_ATA (BIT(43)) @@ -836,6 +860,7 @@ #define ID_AA64PFR0_ELx_32BIT_64BIT 0x2 /* id_aa64pfr1 */ +#define ID_AA64PFR1_SME_SHIFT 24 #define ID_AA64PFR1_MPAMFRAC_SHIFT 16 #define ID_AA64PFR1_RASFRAC_SHIFT 12 #define ID_AA64PFR1_MTE_SHIFT 8 @@ -846,6 +871,7 @@ #define ID_AA64PFR1_SSBS_PSTATE_ONLY 1 #define ID_AA64PFR1_SSBS_PSTATE_INSNS 2 #define ID_AA64PFR1_BT_BTI 0x1 +#define ID_AA64PFR1_SME 1 #define ID_AA64PFR1_MTE_NI 0x0 #define ID_AA64PFR1_MTE_EL0 0x1 @@ -874,6 +900,23 @@ #define ID_AA64ZFR0_AES_PMULL 0x2 #define ID_AA64ZFR0_SVEVER_SVE2 0x1 +/* id_aa64smfr0 */ +#define ID_AA64SMFR0_FA64_SHIFT 63 +#define ID_AA64SMFR0_I16I64_SHIFT 52 +#define ID_AA64SMFR0_F64F64_SHIFT 48 +#define ID_AA64SMFR0_I8I32_SHIFT 36 +#define ID_AA64SMFR0_F16F32_SHIFT 35 +#define ID_AA64SMFR0_B16F32_SHIFT 34 +#define ID_AA64SMFR0_F32F32_SHIFT 32 + +#define ID_AA64SMFR0_FA64 0x1 +#define ID_AA64SMFR0_I16I64 0x4 +#define ID_AA64SMFR0_F64F64 0x1 +#define ID_AA64SMFR0_I8I32 0x4 +#define ID_AA64SMFR0_F16F32 0x1 +#define ID_AA64SMFR0_B16F32 0x1 +#define ID_AA64SMFR0_F32F32 0x1 + /* id_aa64mmfr0 */ #define ID_AA64MMFR0_ECV_SHIFT 60 #define ID_AA64MMFR0_FGT_SHIFT 56 @@ -926,6 +969,7 @@ /* id_aa64mmfr1 */ #define ID_AA64MMFR1_ECBHB_SHIFT 60 +#define ID_AA64MMFR1_HCX_SHIFT 40 #define ID_AA64MMFR1_AFP_SHIFT 44 #define ID_AA64MMFR1_ETS_SHIFT 36 #define ID_AA64MMFR1_TWED_SHIFT 32 @@ -1119,9 +1163,24 @@ #define ZCR_ELx_LEN_SIZE 9 #define ZCR_ELx_LEN_MASK 0x1ff +#define SMCR_ELx_FA64_SHIFT 31 +#define SMCR_ELx_FA64_MASK (1 << SMCR_ELx_FA64_SHIFT) + +/* + * The SMCR_ELx_LEN_* definitions intentionally include bits [8:4] which + * are reserved by the SME architecture for future expansion of the LEN + * field, with compatible semantics. + */ +#define SMCR_ELx_LEN_SHIFT 0 +#define SMCR_ELx_LEN_SIZE 9 +#define SMCR_ELx_LEN_MASK 0x1ff + #define CPACR_EL1_FPEN_EL1EN (BIT(20)) /* enable EL1 access */ #define CPACR_EL1_FPEN_EL0EN (BIT(21)) /* enable EL0 access, if EL1EN set */ +#define CPACR_EL1_SMEN_EL1EN (BIT(24)) /* enable EL1 access */ +#define CPACR_EL1_SMEN_EL0EN (BIT(25)) /* enable EL0 access, if EL1EN set */ + #define CPACR_EL1_ZEN_EL1EN (BIT(16)) /* enable EL1 access */ #define CPACR_EL1_ZEN_EL0EN (BIT(17)) /* enable EL0 access, if EL1EN set */ @@ -1170,6 +1229,8 @@ #define TRFCR_ELx_ExTRE BIT(1) #define TRFCR_ELx_E0TRE BIT(0) +/* HCRX_EL2 definitions */ +#define HCRX_EL2_SMPME_MASK (1 << 5) /* GIC Hypervisor interface registers */ /* ICH_MISR_EL2 bit definitions */ @@ -1233,6 +1294,12 @@ #define ICH_VTR_TDS_SHIFT 19 #define ICH_VTR_TDS_MASK (1 << ICH_VTR_TDS_SHIFT) +/* HFG[WR]TR_EL2 bit definitions */ +#define HFGxTR_EL2_nTPIDR2_EL0_SHIFT 55 +#define HFGxTR_EL2_nTPIDR2_EL0_MASK BIT_MASK(HFGxTR_EL2_nTPIDR2_EL0_SHIFT) +#define HFGxTR_EL2_nSMPRI_EL1_SHIFT 54 +#define HFGxTR_EL2_nSMPRI_EL1_MASK BIT_MASK(HFGxTR_EL2_nSMPRI_EL1_SHIFT) + #define ARM64_FEATURE_FIELD_BITS 4 /* Create a mask for the feature bits of the specified feature. */ diff --git a/arch/arm64/kernel/traps.c b/arch/arm64/kernel/traps.c index 0529fd57567e..6751621e5bea 100644 --- a/arch/arm64/kernel/traps.c +++ b/arch/arm64/kernel/traps.c @@ -821,6 +821,7 @@ static const char *esr_class_str[] = { [ESR_ELx_EC_SVE] = "SVE", [ESR_ELx_EC_ERET] = "ERET/ERETAA/ERETAB", [ESR_ELx_EC_FPAC] = "FPAC", + [ESR_ELx_EC_SME] = "SME", [ESR_ELx_EC_IMP_DEF] = "EL3 IMP DEF", [ESR_ELx_EC_IABT_LOW] = "IABT (lower EL)", [ESR_ELx_EC_IABT_CUR] = "IABT (current EL)", -- cgit v1.2.3 From ca8a4ebcff4465f0272637433c789a5e4a272626 Mon Sep 17 00:00:00 2001 From: Mark Brown Date: Tue, 19 Apr 2022 12:22:14 +0100 Subject: arm64/sme: Manually encode SME instructions As with SVE rather than impose ambitious toolchain requirements for SME we manually encode the few instructions which we require in order to perform the work the kernel needs to do. The instructions used to save and restore context are provided as assembler macros while those for entering and leaving streaming mode are done in asm volatile blocks since they are expected to be used from C. We could do the SMSTART and SMSTOP operations with read/modify/write cycles on SVCR but using the aliases provided for individual field accesses should be slightly faster. These instructions are aliases for MSR but since our minimum toolchain requirements are old enough to mean that we can't use the sX_X_cX_cX_X form and they always use xzr rather than taking a value like write_sysreg_s() wants we just use .inst. Signed-off-by: Mark Brown Reviewed-by: Catalin Marinas Link: https://lore.kernel.org/r/20220419112247.711548-7-broonie@kernel.org Signed-off-by: Catalin Marinas --- arch/arm64/include/asm/fpsimd.h | 25 ++++++++++++++++ arch/arm64/include/asm/fpsimdmacros.h | 54 +++++++++++++++++++++++++++++++++++ 2 files changed, 79 insertions(+) (limited to 'arch') diff --git a/arch/arm64/include/asm/fpsimd.h b/arch/arm64/include/asm/fpsimd.h index cb24385e3632..6e2dc9dcbf49 100644 --- a/arch/arm64/include/asm/fpsimd.h +++ b/arch/arm64/include/asm/fpsimd.h @@ -249,6 +249,31 @@ static inline void sve_setup(void) { } #endif /* ! CONFIG_ARM64_SVE */ +#ifdef CONFIG_ARM64_SME + +static inline void sme_smstart_sm(void) +{ + asm volatile(__msr_s(SYS_SVCR_SMSTART_SM_EL0, "xzr")); +} + +static inline void sme_smstop_sm(void) +{ + asm volatile(__msr_s(SYS_SVCR_SMSTOP_SM_EL0, "xzr")); +} + +static inline void sme_smstop(void) +{ + asm volatile(__msr_s(SYS_SVCR_SMSTOP_SMZA_EL0, "xzr")); +} + +#else + +static inline void sme_smstart_sm(void) { } +static inline void sme_smstop_sm(void) { } +static inline void sme_smstop(void) { } + +#endif /* ! CONFIG_ARM64_SME */ + /* For use by EFI runtime services calls only */ extern void __efi_fpsimd_begin(void); extern void __efi_fpsimd_end(void); diff --git a/arch/arm64/include/asm/fpsimdmacros.h b/arch/arm64/include/asm/fpsimdmacros.h index 2509d7dde55a..2e9a33155081 100644 --- a/arch/arm64/include/asm/fpsimdmacros.h +++ b/arch/arm64/include/asm/fpsimdmacros.h @@ -93,6 +93,12 @@ .endif .endm +.macro _sme_check_wv v + .if (\v) < 12 || (\v) > 15 + .error "Bad vector select register \v." + .endif +.endm + /* SVE instruction encodings for non-SVE-capable assemblers */ /* (pre binutils 2.28, all kernel capable clang versions support SVE) */ @@ -174,6 +180,54 @@ | (\np) .endm +/* SME instruction encodings for non-SME-capable assemblers */ +/* (pre binutils 2.38/LLVM 13) */ + +/* RDSVL X\nx, #\imm */ +.macro _sme_rdsvl nx, imm + _check_general_reg \nx + _check_num (\imm), -0x20, 0x1f + .inst 0x04bf5800 \ + | (\nx) \ + | (((\imm) & 0x3f) << 5) +.endm + +/* + * STR (vector from ZA array): + * STR ZA[\nw, #\offset], [X\nxbase, #\offset, MUL VL] + */ +.macro _sme_str_zav nw, nxbase, offset=0 + _sme_check_wv \nw + _check_general_reg \nxbase + _check_num (\offset), -0x100, 0xff + .inst 0xe1200000 \ + | (((\nw) & 3) << 13) \ + | ((\nxbase) << 5) \ + | ((\offset) & 7) +.endm + +/* + * LDR (vector to ZA array): + * LDR ZA[\nw, #\offset], [X\nxbase, #\offset, MUL VL] + */ +.macro _sme_ldr_zav nw, nxbase, offset=0 + _sme_check_wv \nw + _check_general_reg \nxbase + _check_num (\offset), -0x100, 0xff + .inst 0xe1000000 \ + | (((\nw) & 3) << 13) \ + | ((\nxbase) << 5) \ + | ((\offset) & 7) +.endm + +/* + * Zero the entire ZA array + * ZERO ZA + */ +.macro zero_za + .inst 0xc00800ff +.endm + .macro __for from:req, to:req .if (\from) == (\to) _for__body %\from -- cgit v1.2.3 From b2cf6a23289b3268cc7915a09c0c8372147b2727 Mon Sep 17 00:00:00 2001 From: Mark Brown Date: Tue, 19 Apr 2022 12:22:15 +0100 Subject: arm64/sme: Early CPU setup for SME SME requires similar setup to that for SVE: disable traps to EL2 and make sure that the maximum vector length is available to EL1, for SME we have two traps - one for SME itself and one for TPIDR2. In addition since we currently make no active use of priority control for SCMUs we map all SME priorities lower ELs may configure to 0, the architecture specified minimum priority, to ensure that nothing we manage is able to configure itself to consume excessive resources. This will need to be revisited should there be a need to manage SME priorities at runtime. Signed-off-by: Mark Brown Reviewed-by: Catalin Marinas Link: https://lore.kernel.org/r/20220419112247.711548-8-broonie@kernel.org Signed-off-by: Catalin Marinas --- arch/arm64/include/asm/el2_setup.h | 64 +++++++++++++++++++++++++++++++++++--- 1 file changed, 60 insertions(+), 4 deletions(-) (limited to 'arch') diff --git a/arch/arm64/include/asm/el2_setup.h b/arch/arm64/include/asm/el2_setup.h index c31be7eda9df..fabdbde0fe02 100644 --- a/arch/arm64/include/asm/el2_setup.h +++ b/arch/arm64/include/asm/el2_setup.h @@ -143,6 +143,50 @@ .Lskip_sve_\@: .endm +/* SME register access and priority mapping */ +.macro __init_el2_nvhe_sme + mrs x1, id_aa64pfr1_el1 + ubfx x1, x1, #ID_AA64PFR1_SME_SHIFT, #4 + cbz x1, .Lskip_sme_\@ + + bic x0, x0, #CPTR_EL2_TSM // Also disable SME traps + msr cptr_el2, x0 // Disable copro. traps to EL2 + isb + + mrs x1, sctlr_el2 + orr x1, x1, #SCTLR_ELx_ENTP2 // Disable TPIDR2 traps + msr sctlr_el2, x1 + isb + + mov x1, #0 // SMCR controls + + mrs_s x2, SYS_ID_AA64SMFR0_EL1 + ubfx x2, x2, #ID_AA64SMFR0_FA64_SHIFT, #1 // Full FP in SM? + cbz x2, .Lskip_sme_fa64_\@ + + orr x1, x1, SMCR_ELx_FA64_MASK +.Lskip_sme_fa64_\@: + + orr x1, x1, #SMCR_ELx_LEN_MASK // Enable full SME vector + msr_s SYS_SMCR_EL2, x1 // length for EL1. + + mrs_s x1, SYS_SMIDR_EL1 // Priority mapping supported? + ubfx x1, x1, #SYS_SMIDR_EL1_SMPS_SHIFT, #1 + cbz x1, .Lskip_sme_\@ + + msr_s SYS_SMPRIMAP_EL2, xzr // Make all priorities equal + + mrs x1, id_aa64mmfr1_el1 // HCRX_EL2 present? + ubfx x1, x1, #ID_AA64MMFR1_HCX_SHIFT, #4 + cbz x1, .Lskip_sme_\@ + + mrs_s x1, SYS_HCRX_EL2 + orr x1, x1, #HCRX_EL2_SMPME_MASK // Enable priority mapping + msr_s SYS_HCRX_EL2, x1 + +.Lskip_sme_\@: +.endm + /* Disable any fine grained traps */ .macro __init_el2_fgt mrs x1, id_aa64mmfr0_el1 @@ -153,15 +197,26 @@ mrs x1, id_aa64dfr0_el1 ubfx x1, x1, #ID_AA64DFR0_PMSVER_SHIFT, #4 cmp x1, #3 - b.lt .Lset_fgt_\@ + b.lt .Lset_debug_fgt_\@ /* Disable PMSNEVFR_EL1 read and write traps */ orr x0, x0, #(1 << 62) -.Lset_fgt_\@: +.Lset_debug_fgt_\@: msr_s SYS_HDFGRTR_EL2, x0 msr_s SYS_HDFGWTR_EL2, x0 - msr_s SYS_HFGRTR_EL2, xzr - msr_s SYS_HFGWTR_EL2, xzr + + mov x0, xzr + mrs x1, id_aa64pfr1_el1 + ubfx x1, x1, #ID_AA64PFR1_SME_SHIFT, #4 + cbz x1, .Lset_fgt_\@ + + /* Disable nVHE traps of TPIDR2 and SMPRI */ + orr x0, x0, #HFGxTR_EL2_nSMPRI_EL1_MASK + orr x0, x0, #HFGxTR_EL2_nTPIDR2_EL0_MASK + +.Lset_fgt_\@: + msr_s SYS_HFGRTR_EL2, x0 + msr_s SYS_HFGWTR_EL2, x0 msr_s SYS_HFGITR_EL2, xzr mrs x1, id_aa64pfr0_el1 // AMU traps UNDEF without AMU @@ -196,6 +251,7 @@ __init_el2_nvhe_idregs __init_el2_nvhe_cptr __init_el2_nvhe_sve + __init_el2_nvhe_sme __init_el2_fgt __init_el2_nvhe_prepare_eret .endm -- cgit v1.2.3 From 5e64b862c4823ab53aac028042abd918c2f27041 Mon Sep 17 00:00:00 2001 From: Mark Brown Date: Tue, 19 Apr 2022 12:22:16 +0100 Subject: arm64/sme: Basic enumeration support This patch introduces basic cpufeature support for discovering the presence of the Scalable Matrix Extension. Signed-off-by: Mark Brown Reviewed-by: Catalin Marinas Link: https://lore.kernel.org/r/20220419112247.711548-9-broonie@kernel.org Signed-off-by: Catalin Marinas --- Documentation/arm64/elf_hwcaps.rst | 33 +++++++++++++++++++++ arch/arm64/include/asm/cpu.h | 1 + arch/arm64/include/asm/cpufeature.h | 12 ++++++++ arch/arm64/include/asm/fpsimd.h | 2 ++ arch/arm64/include/asm/hwcap.h | 8 +++++ arch/arm64/include/uapi/asm/hwcap.h | 8 +++++ arch/arm64/kernel/cpufeature.c | 59 +++++++++++++++++++++++++++++++++++++ arch/arm64/kernel/cpuinfo.c | 9 ++++++ arch/arm64/kernel/fpsimd.c | 30 +++++++++++++++++++ arch/arm64/tools/cpucaps | 2 ++ 10 files changed, 164 insertions(+) (limited to 'arch') diff --git a/Documentation/arm64/elf_hwcaps.rst b/Documentation/arm64/elf_hwcaps.rst index a8f30963e550..f8d818eaaff5 100644 --- a/Documentation/arm64/elf_hwcaps.rst +++ b/Documentation/arm64/elf_hwcaps.rst @@ -264,6 +264,39 @@ HWCAP2_MTE3 Functionality implied by ID_AA64PFR1_EL1.MTE == 0b0011, as described by Documentation/arm64/memory-tagging-extension.rst. +HWCAP2_SME + + Functionality implied by ID_AA64PFR1_EL1.SME == 0b0001, as described + by Documentation/arm64/sme.rst. + +HWCAP2_SME_I16I64 + + Functionality implied by ID_AA64SMFR0_EL1.I16I64 == 0b1111. + +HWCAP2_SME_F64F64 + + Functionality implied by ID_AA64SMFR0_EL1.F64F64 == 0b1. + +HWCAP2_SME_I8I32 + + Functionality implied by ID_AA64SMFR0_EL1.I8I32 == 0b1111. + +HWCAP2_SME_F16F32 + + Functionality implied by ID_AA64SMFR0_EL1.F16F32 == 0b1. + +HWCAP2_SME_B16F32 + + Functionality implied by ID_AA64SMFR0_EL1.B16F32 == 0b1. + +HWCAP2_SME_F32F32 + + Functionality implied by ID_AA64SMFR0_EL1.F32F32 == 0b1. + +HWCAP2_SME_FA64 + + Functionality implied by ID_AA64SMFR0_EL1.FA64 == 0b1. + 4. Unused AT_HWCAP bits ----------------------- diff --git a/arch/arm64/include/asm/cpu.h b/arch/arm64/include/asm/cpu.h index a58e366f0b07..d08062bcb9c1 100644 --- a/arch/arm64/include/asm/cpu.h +++ b/arch/arm64/include/asm/cpu.h @@ -58,6 +58,7 @@ struct cpuinfo_arm64 { u64 reg_id_aa64pfr0; u64 reg_id_aa64pfr1; u64 reg_id_aa64zfr0; + u64 reg_id_aa64smfr0; struct cpuinfo_32bit aarch32; diff --git a/arch/arm64/include/asm/cpufeature.h b/arch/arm64/include/asm/cpufeature.h index c62e7e5e2f0c..8ac12e4094aa 100644 --- a/arch/arm64/include/asm/cpufeature.h +++ b/arch/arm64/include/asm/cpufeature.h @@ -759,6 +759,18 @@ static __always_inline bool system_supports_sve(void) cpus_have_const_cap(ARM64_SVE); } +static __always_inline bool system_supports_sme(void) +{ + return IS_ENABLED(CONFIG_ARM64_SME) && + cpus_have_const_cap(ARM64_SME); +} + +static __always_inline bool system_supports_fa64(void) +{ + return IS_ENABLED(CONFIG_ARM64_SME) && + cpus_have_const_cap(ARM64_SME_FA64); +} + static __always_inline bool system_supports_cnp(void) { return IS_ENABLED(CONFIG_ARM64_CNP) && diff --git a/arch/arm64/include/asm/fpsimd.h b/arch/arm64/include/asm/fpsimd.h index 6e2dc9dcbf49..2e8ef00e7520 100644 --- a/arch/arm64/include/asm/fpsimd.h +++ b/arch/arm64/include/asm/fpsimd.h @@ -74,6 +74,8 @@ extern void sve_set_vq(unsigned long vq_minus_1); struct arm64_cpu_capabilities; extern void sve_kernel_enable(const struct arm64_cpu_capabilities *__unused); +extern void sme_kernel_enable(const struct arm64_cpu_capabilities *__unused); +extern void fa64_kernel_enable(const struct arm64_cpu_capabilities *__unused); extern u64 read_zcr_features(void); diff --git a/arch/arm64/include/asm/hwcap.h b/arch/arm64/include/asm/hwcap.h index 8db5ec0089db..9f0ce004fdbc 100644 --- a/arch/arm64/include/asm/hwcap.h +++ b/arch/arm64/include/asm/hwcap.h @@ -109,6 +109,14 @@ #define KERNEL_HWCAP_AFP __khwcap2_feature(AFP) #define KERNEL_HWCAP_RPRES __khwcap2_feature(RPRES) #define KERNEL_HWCAP_MTE3 __khwcap2_feature(MTE3) +#define KERNEL_HWCAP_SME __khwcap2_feature(SME) +#define KERNEL_HWCAP_SME_I16I64 __khwcap2_feature(SME_I16I64) +#define KERNEL_HWCAP_SME_F64F64 __khwcap2_feature(SME_F64F64) +#define KERNEL_HWCAP_SME_I8I32 __khwcap2_feature(SME_I8I32) +#define KERNEL_HWCAP_SME_F16F32 __khwcap2_feature(SME_F16F32) +#define KERNEL_HWCAP_SME_B16F32 __khwcap2_feature(SME_B16F32) +#define KERNEL_HWCAP_SME_F32F32 __khwcap2_feature(SME_F32F32) +#define KERNEL_HWCAP_SME_FA64 __khwcap2_feature(SME_FA64) /* * This yields a mask that user programs can use to figure out what diff --git a/arch/arm64/include/uapi/asm/hwcap.h b/arch/arm64/include/uapi/asm/hwcap.h index 99cb5d383048..b0256cec63b5 100644 --- a/arch/arm64/include/uapi/asm/hwcap.h +++ b/arch/arm64/include/uapi/asm/hwcap.h @@ -79,5 +79,13 @@ #define HWCAP2_AFP (1 << 20) #define HWCAP2_RPRES (1 << 21) #define HWCAP2_MTE3 (1 << 22) +#define HWCAP2_SME (1 << 23) +#define HWCAP2_SME_I16I64 (1 << 24) +#define HWCAP2_SME_F64F64 (1 << 25) +#define HWCAP2_SME_I8I32 (1 << 26) +#define HWCAP2_SME_F16F32 (1 << 27) +#define HWCAP2_SME_B16F32 (1 << 28) +#define HWCAP2_SME_F32F32 (1 << 29) +#define HWCAP2_SME_FA64 (1 << 30) #endif /* _UAPI__ASM_HWCAP_H */ diff --git a/arch/arm64/kernel/cpufeature.c b/arch/arm64/kernel/cpufeature.c index d72c4b4d389c..0f2d7ddd69ae 100644 --- a/arch/arm64/kernel/cpufeature.c +++ b/arch/arm64/kernel/cpufeature.c @@ -261,6 +261,8 @@ static const struct arm64_ftr_bits ftr_id_aa64pfr0[] = { }; static const struct arm64_ftr_bits ftr_id_aa64pfr1[] = { + ARM64_FTR_BITS(FTR_VISIBLE_IF_IS_ENABLED(CONFIG_ARM64_SME), + FTR_STRICT, FTR_LOWER_SAFE, ID_AA64PFR1_SME_SHIFT, 4, 0), ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64PFR1_MPAMFRAC_SHIFT, 4, 0), ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64PFR1_RASFRAC_SHIFT, 4, 0), ARM64_FTR_BITS(FTR_VISIBLE_IF_IS_ENABLED(CONFIG_ARM64_MTE), @@ -293,6 +295,24 @@ static const struct arm64_ftr_bits ftr_id_aa64zfr0[] = { ARM64_FTR_END, }; +static const struct arm64_ftr_bits ftr_id_aa64smfr0[] = { + ARM64_FTR_BITS(FTR_VISIBLE_IF_IS_ENABLED(CONFIG_ARM64_SME), + FTR_STRICT, FTR_EXACT, ID_AA64SMFR0_FA64_SHIFT, 1, 0), + ARM64_FTR_BITS(FTR_VISIBLE_IF_IS_ENABLED(CONFIG_ARM64_SME), + FTR_STRICT, FTR_EXACT, ID_AA64SMFR0_I16I64_SHIFT, 4, 0), + ARM64_FTR_BITS(FTR_VISIBLE_IF_IS_ENABLED(CONFIG_ARM64_SME), + FTR_STRICT, FTR_EXACT, ID_AA64SMFR0_F64F64_SHIFT, 1, 0), + ARM64_FTR_BITS(FTR_VISIBLE_IF_IS_ENABLED(CONFIG_ARM64_SME), + FTR_STRICT, FTR_EXACT, ID_AA64SMFR0_I8I32_SHIFT, 4, 0), + ARM64_FTR_BITS(FTR_VISIBLE_IF_IS_ENABLED(CONFIG_ARM64_SME), + FTR_STRICT, FTR_EXACT, ID_AA64SMFR0_F16F32_SHIFT, 1, 0), + ARM64_FTR_BITS(FTR_VISIBLE_IF_IS_ENABLED(CONFIG_ARM64_SME), + FTR_STRICT, FTR_EXACT, ID_AA64SMFR0_B16F32_SHIFT, 1, 0), + ARM64_FTR_BITS(FTR_VISIBLE_IF_IS_ENABLED(CONFIG_ARM64_SME), + FTR_STRICT, FTR_EXACT, ID_AA64SMFR0_F32F32_SHIFT, 1, 0), + ARM64_FTR_END, +}; + static const struct arm64_ftr_bits ftr_id_aa64mmfr0[] = { ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64MMFR0_ECV_SHIFT, 4, 0), ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64MMFR0_FGT_SHIFT, 4, 0), @@ -645,6 +665,7 @@ static const struct __ftr_reg_entry { ARM64_FTR_REG_OVERRIDE(SYS_ID_AA64PFR1_EL1, ftr_id_aa64pfr1, &id_aa64pfr1_override), ARM64_FTR_REG(SYS_ID_AA64ZFR0_EL1, ftr_id_aa64zfr0), + ARM64_FTR_REG(SYS_ID_AA64SMFR0_EL1, ftr_id_aa64smfr0), /* Op1 = 0, CRn = 0, CRm = 5 */ ARM64_FTR_REG(SYS_ID_AA64DFR0_EL1, ftr_id_aa64dfr0), @@ -960,6 +981,7 @@ void __init init_cpu_features(struct cpuinfo_arm64 *info) init_cpu_ftr_reg(SYS_ID_AA64PFR0_EL1, info->reg_id_aa64pfr0); init_cpu_ftr_reg(SYS_ID_AA64PFR1_EL1, info->reg_id_aa64pfr1); init_cpu_ftr_reg(SYS_ID_AA64ZFR0_EL1, info->reg_id_aa64zfr0); + init_cpu_ftr_reg(SYS_ID_AA64SMFR0_EL1, info->reg_id_aa64smfr0); if (id_aa64pfr0_32bit_el0(info->reg_id_aa64pfr0)) init_32bit_cpu_features(&info->aarch32); @@ -2442,6 +2464,33 @@ static const struct arm64_cpu_capabilities arm64_features[] = { .matches = has_cpuid_feature, .min_field_value = 1, }, +#ifdef CONFIG_ARM64_SME + { + .desc = "Scalable Matrix Extension", + .type = ARM64_CPUCAP_SYSTEM_FEATURE, + .capability = ARM64_SME, + .sys_reg = SYS_ID_AA64PFR1_EL1, + .sign = FTR_UNSIGNED, + .field_pos = ID_AA64PFR1_SME_SHIFT, + .field_width = 4, + .min_field_value = ID_AA64PFR1_SME, + .matches = has_cpuid_feature, + .cpu_enable = sme_kernel_enable, + }, + /* FA64 should be sorted after the base SME capability */ + { + .desc = "FA64", + .type = ARM64_CPUCAP_SYSTEM_FEATURE, + .capability = ARM64_SME_FA64, + .sys_reg = SYS_ID_AA64SMFR0_EL1, + .sign = FTR_UNSIGNED, + .field_pos = ID_AA64SMFR0_FA64_SHIFT, + .field_width = 1, + .min_field_value = ID_AA64SMFR0_FA64, + .matches = has_cpuid_feature, + .cpu_enable = fa64_kernel_enable, + }, +#endif /* CONFIG_ARM64_SME */ {}, }; @@ -2575,6 +2624,16 @@ static const struct arm64_cpu_capabilities arm64_elf_hwcaps[] = { HWCAP_CAP(SYS_ID_AA64MMFR0_EL1, ID_AA64MMFR0_ECV_SHIFT, 4, FTR_UNSIGNED, 1, CAP_HWCAP, KERNEL_HWCAP_ECV), HWCAP_CAP(SYS_ID_AA64MMFR1_EL1, ID_AA64MMFR1_AFP_SHIFT, 4, FTR_UNSIGNED, 1, CAP_HWCAP, KERNEL_HWCAP_AFP), HWCAP_CAP(SYS_ID_AA64ISAR2_EL1, ID_AA64ISAR2_RPRES_SHIFT, 4, FTR_UNSIGNED, 1, CAP_HWCAP, KERNEL_HWCAP_RPRES), +#ifdef CONFIG_ARM64_SME + HWCAP_CAP(SYS_ID_AA64PFR1_EL1, ID_AA64PFR1_SME_SHIFT, 4, FTR_UNSIGNED, ID_AA64PFR1_SME, CAP_HWCAP, KERNEL_HWCAP_SME), + HWCAP_CAP(SYS_ID_AA64SMFR0_EL1, ID_AA64SMFR0_FA64_SHIFT, 1, FTR_UNSIGNED, ID_AA64SMFR0_FA64, CAP_HWCAP, KERNEL_HWCAP_SME_FA64), + HWCAP_CAP(SYS_ID_AA64SMFR0_EL1, ID_AA64SMFR0_I16I64_SHIFT, 4, FTR_UNSIGNED, ID_AA64SMFR0_I16I64, CAP_HWCAP, KERNEL_HWCAP_SME_I16I64), + HWCAP_CAP(SYS_ID_AA64SMFR0_EL1, ID_AA64SMFR0_F64F64_SHIFT, 1, FTR_UNSIGNED, ID_AA64SMFR0_F64F64, CAP_HWCAP, KERNEL_HWCAP_SME_F64F64), + HWCAP_CAP(SYS_ID_AA64SMFR0_EL1, ID_AA64SMFR0_I8I32_SHIFT, 4, FTR_UNSIGNED, ID_AA64SMFR0_I8I32, CAP_HWCAP, KERNEL_HWCAP_SME_I8I32), + HWCAP_CAP(SYS_ID_AA64SMFR0_EL1, ID_AA64SMFR0_F16F32_SHIFT, 1, FTR_UNSIGNED, ID_AA64SMFR0_F16F32, CAP_HWCAP, KERNEL_HWCAP_SME_F16F32), + HWCAP_CAP(SYS_ID_AA64SMFR0_EL1, ID_AA64SMFR0_B16F32_SHIFT, 1, FTR_UNSIGNED, ID_AA64SMFR0_B16F32, CAP_HWCAP, KERNEL_HWCAP_SME_B16F32), + HWCAP_CAP(SYS_ID_AA64SMFR0_EL1, ID_AA64SMFR0_F32F32_SHIFT, 1, FTR_UNSIGNED, ID_AA64SMFR0_F32F32, CAP_HWCAP, KERNEL_HWCAP_SME_F32F32), +#endif /* CONFIG_ARM64_SME */ {}, }; diff --git a/arch/arm64/kernel/cpuinfo.c b/arch/arm64/kernel/cpuinfo.c index 330b92ea863a..a73fe2888b7e 100644 --- a/arch/arm64/kernel/cpuinfo.c +++ b/arch/arm64/kernel/cpuinfo.c @@ -98,6 +98,14 @@ static const char *const hwcap_str[] = { [KERNEL_HWCAP_AFP] = "afp", [KERNEL_HWCAP_RPRES] = "rpres", [KERNEL_HWCAP_MTE3] = "mte3", + [KERNEL_HWCAP_SME] = "sme", + [KERNEL_HWCAP_SME_I16I64] = "smei16i64", + [KERNEL_HWCAP_SME_F64F64] = "smef64f64", + [KERNEL_HWCAP_SME_I8I32] = "smei8i32", + [KERNEL_HWCAP_SME_F16F32] = "smef16f32", + [KERNEL_HWCAP_SME_B16F32] = "smeb16f32", + [KERNEL_HWCAP_SME_F32F32] = "smef32f32", + [KERNEL_HWCAP_SME_FA64] = "smefa64", }; #ifdef CONFIG_COMPAT @@ -401,6 +409,7 @@ static void __cpuinfo_store_cpu(struct cpuinfo_arm64 *info) info->reg_id_aa64pfr0 = read_cpuid(ID_AA64PFR0_EL1); info->reg_id_aa64pfr1 = read_cpuid(ID_AA64PFR1_EL1); info->reg_id_aa64zfr0 = read_cpuid(ID_AA64ZFR0_EL1); + info->reg_id_aa64smfr0 = read_cpuid(ID_AA64SMFR0_EL1); if (id_aa64pfr1_mte(info->reg_id_aa64pfr1)) info->reg_gmid = read_cpuid(GMID_EL1); diff --git a/arch/arm64/kernel/fpsimd.c b/arch/arm64/kernel/fpsimd.c index 47af76e53221..e4fba0bfb55e 100644 --- a/arch/arm64/kernel/fpsimd.c +++ b/arch/arm64/kernel/fpsimd.c @@ -993,6 +993,32 @@ void fpsimd_release_task(struct task_struct *dead_task) #endif /* CONFIG_ARM64_SVE */ +#ifdef CONFIG_ARM64_SME + +void sme_kernel_enable(const struct arm64_cpu_capabilities *__always_unused p) +{ + /* Set priority for all PEs to architecturally defined minimum */ + write_sysreg_s(read_sysreg_s(SYS_SMPRI_EL1) & ~SMPRI_EL1_PRIORITY_MASK, + SYS_SMPRI_EL1); + + /* Allow SME in kernel */ + write_sysreg(read_sysreg(CPACR_EL1) | CPACR_EL1_SMEN_EL1EN, CPACR_EL1); + isb(); +} + +/* + * This must be called after sme_kernel_enable(), we rely on the + * feature table being sorted to ensure this. + */ +void fa64_kernel_enable(const struct arm64_cpu_capabilities *__always_unused p) +{ + /* Allow use of FA64 */ + write_sysreg_s(read_sysreg_s(SYS_SMCR_EL1) | SMCR_ELx_FA64_MASK, + SYS_SMCR_EL1); +} + +#endif /* CONFIG_ARM64_SVE */ + /* * Trapped SVE access * @@ -1538,6 +1564,10 @@ static int __init fpsimd_init(void) if (!cpu_have_named_feature(ASIMD)) pr_notice("Advanced SIMD is not implemented\n"); + + if (cpu_have_named_feature(SME) && !cpu_have_named_feature(SVE)) + pr_notice("SME is implemented but not SVE\n"); + return sve_sysctl_init(); } core_initcall(fpsimd_init); diff --git a/arch/arm64/tools/cpucaps b/arch/arm64/tools/cpucaps index 3ed418f70e3b..e52b289a27c2 100644 --- a/arch/arm64/tools/cpucaps +++ b/arch/arm64/tools/cpucaps @@ -43,6 +43,8 @@ KVM_PROTECTED_MODE MISMATCHED_CACHE_TYPE MTE MTE_ASYMM +SME +SME_FA64 SPECTRE_V2 SPECTRE_V3A SPECTRE_V4 -- cgit v1.2.3 From b42990d3bf77cc29d7c33e21518c1f806dae6b21 Mon Sep 17 00:00:00 2001 From: Mark Brown Date: Tue, 19 Apr 2022 12:22:17 +0100 Subject: arm64/sme: Identify supported SME vector lengths at boot The vector lengths used for SME are controlled through a similar set of registers to those for SVE and enumerated using a similar algorithm with some slight differences due to the fact that unlike SVE there are no restrictions on which combinations of vector lengths can be supported nor any mandatory vector lengths which must be implemented. Add a new vector type and implement support for enumerating it. One slightly awkward feature is that we need to read the current vector length using a different instruction (or enter streaming mode which would have the same issue and be higher cost). Rather than add an ops structure we add special cases directly in the otherwise generic vec_probe_vqs() function, this is a bit inelegant but it's the only place where this is an issue. Signed-off-by: Mark Brown Reviewed-by: Catalin Marinas Link: https://lore.kernel.org/r/20220419112247.711548-10-broonie@kernel.org Signed-off-by: Catalin Marinas --- arch/arm64/include/asm/cpu.h | 3 + arch/arm64/include/asm/cpufeature.h | 7 ++ arch/arm64/include/asm/fpsimd.h | 26 ++++++++ arch/arm64/include/asm/processor.h | 1 + arch/arm64/kernel/cpufeature.c | 47 ++++++++++++++ arch/arm64/kernel/cpuinfo.c | 4 ++ arch/arm64/kernel/entry-fpsimd.S | 9 +++ arch/arm64/kernel/fpsimd.c | 123 +++++++++++++++++++++++++++++++++++- 8 files changed, 218 insertions(+), 2 deletions(-) (limited to 'arch') diff --git a/arch/arm64/include/asm/cpu.h b/arch/arm64/include/asm/cpu.h index d08062bcb9c1..115cdec1ae87 100644 --- a/arch/arm64/include/asm/cpu.h +++ b/arch/arm64/include/asm/cpu.h @@ -64,6 +64,9 @@ struct cpuinfo_arm64 { /* pseudo-ZCR for recording maximum ZCR_EL1 LEN value: */ u64 reg_zcr; + + /* pseudo-SMCR for recording maximum SMCR_EL1 LEN value: */ + u64 reg_smcr; }; DECLARE_PER_CPU(struct cpuinfo_arm64, cpu_data); diff --git a/arch/arm64/include/asm/cpufeature.h b/arch/arm64/include/asm/cpufeature.h index 8ac12e4094aa..5ddfae233ea5 100644 --- a/arch/arm64/include/asm/cpufeature.h +++ b/arch/arm64/include/asm/cpufeature.h @@ -622,6 +622,13 @@ static inline bool id_aa64pfr0_sve(u64 pfr0) return val > 0; } +static inline bool id_aa64pfr1_sme(u64 pfr1) +{ + u32 val = cpuid_feature_extract_unsigned_field(pfr1, ID_AA64PFR1_SME_SHIFT); + + return val > 0; +} + static inline bool id_aa64pfr1_mte(u64 pfr1) { u32 val = cpuid_feature_extract_unsigned_field(pfr1, ID_AA64PFR1_MTE_SHIFT); diff --git a/arch/arm64/include/asm/fpsimd.h b/arch/arm64/include/asm/fpsimd.h index 2e8ef00e7520..32cd682258d9 100644 --- a/arch/arm64/include/asm/fpsimd.h +++ b/arch/arm64/include/asm/fpsimd.h @@ -78,6 +78,7 @@ extern void sme_kernel_enable(const struct arm64_cpu_capabilities *__unused); extern void fa64_kernel_enable(const struct arm64_cpu_capabilities *__unused); extern u64 read_zcr_features(void); +extern u64 read_smcr_features(void); /* * Helpers to translate bit indices in sve_vq_map to VQ values (and @@ -172,6 +173,12 @@ static inline void write_vl(enum vec_type type, u64 val) tmp = read_sysreg_s(SYS_ZCR_EL1) & ~ZCR_ELx_LEN_MASK; write_sysreg_s(tmp | val, SYS_ZCR_EL1); break; +#endif +#ifdef CONFIG_ARM64_SME + case ARM64_VEC_SME: + tmp = read_sysreg_s(SYS_SMCR_EL1) & ~SMCR_ELx_LEN_MASK; + write_sysreg_s(tmp | val, SYS_SMCR_EL1); + break; #endif default: WARN_ON_ONCE(1); @@ -268,12 +275,31 @@ static inline void sme_smstop(void) asm volatile(__msr_s(SYS_SVCR_SMSTOP_SMZA_EL0, "xzr")); } +extern void __init sme_setup(void); + +static inline int sme_max_vl(void) +{ + return vec_max_vl(ARM64_VEC_SME); +} + +static inline int sme_max_virtualisable_vl(void) +{ + return vec_max_virtualisable_vl(ARM64_VEC_SME); +} + +extern unsigned int sme_get_vl(void); + #else static inline void sme_smstart_sm(void) { } static inline void sme_smstop_sm(void) { } static inline void sme_smstop(void) { } +static inline void sme_setup(void) { } +static inline unsigned int sme_get_vl(void) { return 0; } +static inline int sme_max_vl(void) { return 0; } +static inline int sme_max_virtualisable_vl(void) { return 0; } + #endif /* ! CONFIG_ARM64_SME */ /* For use by EFI runtime services calls only */ diff --git a/arch/arm64/include/asm/processor.h b/arch/arm64/include/asm/processor.h index 73e38d9a540c..abf34a9c2eab 100644 --- a/arch/arm64/include/asm/processor.h +++ b/arch/arm64/include/asm/processor.h @@ -118,6 +118,7 @@ struct debug_info { enum vec_type { ARM64_VEC_SVE = 0, + ARM64_VEC_SME, ARM64_VEC_MAX, }; diff --git a/arch/arm64/kernel/cpufeature.c b/arch/arm64/kernel/cpufeature.c index 0f2d7ddd69ae..082b3f48cbfd 100644 --- a/arch/arm64/kernel/cpufeature.c +++ b/arch/arm64/kernel/cpufeature.c @@ -581,6 +581,12 @@ static const struct arm64_ftr_bits ftr_zcr[] = { ARM64_FTR_END, }; +static const struct arm64_ftr_bits ftr_smcr[] = { + ARM64_FTR_BITS(FTR_HIDDEN, FTR_NONSTRICT, FTR_LOWER_SAFE, + SMCR_ELx_LEN_SHIFT, SMCR_ELx_LEN_SIZE, 0), /* LEN */ + ARM64_FTR_END, +}; + /* * Common ftr bits for a 32bit register with all hidden, strict * attributes, with 4bit feature fields and a default safe value of @@ -687,6 +693,7 @@ static const struct __ftr_reg_entry { /* Op1 = 0, CRn = 1, CRm = 2 */ ARM64_FTR_REG(SYS_ZCR_EL1, ftr_zcr), + ARM64_FTR_REG(SYS_SMCR_EL1, ftr_smcr), /* Op1 = 1, CRn = 0, CRm = 0 */ ARM64_FTR_REG(SYS_GMID_EL1, ftr_gmid), @@ -991,6 +998,12 @@ void __init init_cpu_features(struct cpuinfo_arm64 *info) vec_init_vq_map(ARM64_VEC_SVE); } + if (id_aa64pfr1_sme(info->reg_id_aa64pfr1)) { + init_cpu_ftr_reg(SYS_SMCR_EL1, info->reg_smcr); + if (IS_ENABLED(CONFIG_ARM64_SME)) + vec_init_vq_map(ARM64_VEC_SME); + } + if (id_aa64pfr1_mte(info->reg_id_aa64pfr1)) init_cpu_ftr_reg(SYS_GMID_EL1, info->reg_gmid); @@ -1217,6 +1230,9 @@ void update_cpu_features(int cpu, taint |= check_update_ftr_reg(SYS_ID_AA64ZFR0_EL1, cpu, info->reg_id_aa64zfr0, boot->reg_id_aa64zfr0); + taint |= check_update_ftr_reg(SYS_ID_AA64SMFR0_EL1, cpu, + info->reg_id_aa64smfr0, boot->reg_id_aa64smfr0); + if (id_aa64pfr0_sve(info->reg_id_aa64pfr0)) { taint |= check_update_ftr_reg(SYS_ZCR_EL1, cpu, info->reg_zcr, boot->reg_zcr); @@ -1227,6 +1243,16 @@ void update_cpu_features(int cpu, vec_update_vq_map(ARM64_VEC_SVE); } + if (id_aa64pfr1_sme(info->reg_id_aa64pfr1)) { + taint |= check_update_ftr_reg(SYS_SMCR_EL1, cpu, + info->reg_smcr, boot->reg_smcr); + + /* Probe vector lengths, unless we already gave up on SME */ + if (id_aa64pfr1_sme(read_sanitised_ftr_reg(SYS_ID_AA64PFR1_EL1)) && + !system_capabilities_finalized()) + vec_update_vq_map(ARM64_VEC_SME); + } + /* * The kernel uses the LDGM/STGM instructions and the number of tags * they read/write depends on the GMID_EL1.BS field. Check that the @@ -2931,6 +2957,23 @@ static void verify_sve_features(void) /* Add checks on other ZCR bits here if necessary */ } +static void verify_sme_features(void) +{ + u64 safe_smcr = read_sanitised_ftr_reg(SYS_SMCR_EL1); + u64 smcr = read_smcr_features(); + + unsigned int safe_len = safe_smcr & SMCR_ELx_LEN_MASK; + unsigned int len = smcr & SMCR_ELx_LEN_MASK; + + if (len < safe_len || vec_verify_vq_map(ARM64_VEC_SME)) { + pr_crit("CPU%d: SME: vector length support mismatch\n", + smp_processor_id()); + cpu_die_early(); + } + + /* Add checks on other SMCR bits here if necessary */ +} + static void verify_hyp_capabilities(void) { u64 safe_mmfr1, mmfr0, mmfr1; @@ -2983,6 +3026,9 @@ static void verify_local_cpu_capabilities(void) if (system_supports_sve()) verify_sve_features(); + if (system_supports_sme()) + verify_sme_features(); + if (is_hyp_mode_available()) verify_hyp_capabilities(); } @@ -3100,6 +3146,7 @@ void __init setup_cpu_features(void) pr_info("emulated: Privileged Access Never (PAN) using TTBR0_EL1 switching\n"); sve_setup(); + sme_setup(); minsigstksz_setup(); /* Advertise that we have computed the system capabilities */ diff --git a/arch/arm64/kernel/cpuinfo.c b/arch/arm64/kernel/cpuinfo.c index a73fe2888b7e..8a8136a096ac 100644 --- a/arch/arm64/kernel/cpuinfo.c +++ b/arch/arm64/kernel/cpuinfo.c @@ -421,6 +421,10 @@ static void __cpuinfo_store_cpu(struct cpuinfo_arm64 *info) id_aa64pfr0_sve(info->reg_id_aa64pfr0)) info->reg_zcr = read_zcr_features(); + if (IS_ENABLED(CONFIG_ARM64_SME) && + id_aa64pfr1_sme(info->reg_id_aa64pfr1)) + info->reg_smcr = read_smcr_features(); + cpuinfo_detect_icache_policy(info); } diff --git a/arch/arm64/kernel/entry-fpsimd.S b/arch/arm64/kernel/entry-fpsimd.S index dc242e269f9a..deee5f01462e 100644 --- a/arch/arm64/kernel/entry-fpsimd.S +++ b/arch/arm64/kernel/entry-fpsimd.S @@ -86,3 +86,12 @@ SYM_FUNC_START(sve_flush_live) SYM_FUNC_END(sve_flush_live) #endif /* CONFIG_ARM64_SVE */ + +#ifdef CONFIG_ARM64_SME + +SYM_FUNC_START(sme_get_vl) + _sme_rdsvl 0, 1 + ret +SYM_FUNC_END(sme_get_vl) + +#endif /* CONFIG_ARM64_SME */ diff --git a/arch/arm64/kernel/fpsimd.c b/arch/arm64/kernel/fpsimd.c index e4fba0bfb55e..5e5fbd9cba75 100644 --- a/arch/arm64/kernel/fpsimd.c +++ b/arch/arm64/kernel/fpsimd.c @@ -136,6 +136,12 @@ __ro_after_init struct vl_info vl_info[ARM64_VEC_MAX] = { .max_virtualisable_vl = SVE_VL_MIN, }, #endif +#ifdef CONFIG_ARM64_SME + [ARM64_VEC_SME] = { + .type = ARM64_VEC_SME, + .name = "SME", + }, +#endif }; static unsigned int vec_vl_inherit_flag(enum vec_type type) @@ -186,6 +192,20 @@ extern void __percpu *efi_sve_state; #endif /* ! CONFIG_ARM64_SVE */ +#ifdef CONFIG_ARM64_SME + +static int get_sme_default_vl(void) +{ + return get_default_vl(ARM64_VEC_SME); +} + +static void set_sme_default_vl(int val) +{ + set_default_vl(ARM64_VEC_SME, val); +} + +#endif + DEFINE_PER_CPU(bool, fpsimd_context_busy); EXPORT_PER_CPU_SYMBOL(fpsimd_context_busy); @@ -409,6 +429,8 @@ static unsigned int find_supported_vector_length(enum vec_type type, if (vl > max_vl) vl = max_vl; + if (vl < info->min_vl) + vl = info->min_vl; bit = find_next_bit(info->vq_map, SVE_VQ_MAX, __vq_to_bit(sve_vq_from_vl(vl))); @@ -770,7 +792,23 @@ static void vec_probe_vqs(struct vl_info *info, for (vq = SVE_VQ_MAX; vq >= SVE_VQ_MIN; --vq) { write_vl(info->type, vq - 1); /* self-syncing */ - vl = sve_get_vl(); + + switch (info->type) { + case ARM64_VEC_SVE: + vl = sve_get_vl(); + break; + case ARM64_VEC_SME: + vl = sme_get_vl(); + break; + default: + vl = 0; + break; + } + + /* Minimum VL identified? */ + if (sve_vq_from_vl(vl) > vq) + break; + vq = sve_vq_from_vl(vl); /* skip intervening lengths */ set_bit(__vq_to_bit(vq), map); } @@ -1017,7 +1055,88 @@ void fa64_kernel_enable(const struct arm64_cpu_capabilities *__always_unused p) SYS_SMCR_EL1); } -#endif /* CONFIG_ARM64_SVE */ +/* + * Read the pseudo-SMCR used by cpufeatures to identify the supported + * vector length. + * + * Use only if SME is present. + * This function clobbers the SME vector length. + */ +u64 read_smcr_features(void) +{ + u64 smcr; + unsigned int vq_max; + + sme_kernel_enable(NULL); + sme_smstart_sm(); + + /* + * Set the maximum possible VL. + */ + write_sysreg_s(read_sysreg_s(SYS_SMCR_EL1) | SMCR_ELx_LEN_MASK, + SYS_SMCR_EL1); + + smcr = read_sysreg_s(SYS_SMCR_EL1); + smcr &= ~(u64)SMCR_ELx_LEN_MASK; /* Only the LEN field */ + vq_max = sve_vq_from_vl(sve_get_vl()); + smcr |= vq_max - 1; /* set LEN field to maximum effective value */ + + sme_smstop_sm(); + + return smcr; +} + +void __init sme_setup(void) +{ + struct vl_info *info = &vl_info[ARM64_VEC_SME]; + u64 smcr; + int min_bit; + + if (!system_supports_sme()) + return; + + /* + * SME doesn't require any particular vector length be + * supported but it does require at least one. We should have + * disabled the feature entirely while bringing up CPUs but + * let's double check here. + */ + WARN_ON(bitmap_empty(info->vq_map, SVE_VQ_MAX)); + + min_bit = find_last_bit(info->vq_map, SVE_VQ_MAX); + info->min_vl = sve_vl_from_vq(__bit_to_vq(min_bit)); + + smcr = read_sanitised_ftr_reg(SYS_SMCR_EL1); + info->max_vl = sve_vl_from_vq((smcr & SMCR_ELx_LEN_MASK) + 1); + + /* + * Sanity-check that the max VL we determined through CPU features + * corresponds properly to sme_vq_map. If not, do our best: + */ + if (WARN_ON(info->max_vl != find_supported_vector_length(ARM64_VEC_SME, + info->max_vl))) + info->max_vl = find_supported_vector_length(ARM64_VEC_SME, + info->max_vl); + + WARN_ON(info->min_vl > info->max_vl); + + /* + * For the default VL, pick the maximum supported value <= 32 + * (256 bits) if there is one since this is guaranteed not to + * grow the signal frame when in streaming mode, otherwise the + * minimum available VL will be used. + */ + set_sme_default_vl(find_supported_vector_length(ARM64_VEC_SME, 32)); + + pr_info("SME: minimum available vector length %u bytes per vector\n", + info->min_vl); + pr_info("SME: maximum available vector length %u bytes per vector\n", + info->max_vl); + pr_info("SME: default vector length %u bytes per vector\n", + get_sme_default_vl()); +} + +#endif /* CONFIG_ARM64_SME */ /* * Trapped SVE access -- cgit v1.2.3 From 12f1bacfc5d9e55bedbfc7a25bf42ff6d19d1dab Mon Sep 17 00:00:00 2001 From: Mark Brown Date: Tue, 19 Apr 2022 12:22:18 +0100 Subject: arm64/sme: Implement sysctl to set the default vector length As for SVE provide a sysctl which allows the default SME vector length to be configured. Signed-off-by: Mark Brown Reviewed-by: Catalin Marinas Link: https://lore.kernel.org/r/20220419112247.711548-11-broonie@kernel.org Signed-off-by: Catalin Marinas --- arch/arm64/kernel/fpsimd.c | 29 ++++++++++++++++++++++++++++- 1 file changed, 28 insertions(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/arm64/kernel/fpsimd.c b/arch/arm64/kernel/fpsimd.c index 5e5fbd9cba75..754a96563f6f 100644 --- a/arch/arm64/kernel/fpsimd.c +++ b/arch/arm64/kernel/fpsimd.c @@ -489,6 +489,30 @@ static int __init sve_sysctl_init(void) static int __init sve_sysctl_init(void) { return 0; } #endif /* ! (CONFIG_ARM64_SVE && CONFIG_SYSCTL) */ +#if defined(CONFIG_ARM64_SME) && defined(CONFIG_SYSCTL) +static struct ctl_table sme_default_vl_table[] = { + { + .procname = "sme_default_vector_length", + .mode = 0644, + .proc_handler = vec_proc_do_default_vl, + .extra1 = &vl_info[ARM64_VEC_SME], + }, + { } +}; + +static int __init sme_sysctl_init(void) +{ + if (system_supports_sme()) + if (!register_sysctl("abi", sme_default_vl_table)) + return -EINVAL; + + return 0; +} + +#else /* ! (CONFIG_ARM64_SME && CONFIG_SYSCTL) */ +static int __init sme_sysctl_init(void) { return 0; } +#endif /* ! (CONFIG_ARM64_SME && CONFIG_SYSCTL) */ + #define ZREG(sve_state, vq, n) ((char *)(sve_state) + \ (SVE_SIG_ZREG_OFFSET(vq, n) - SVE_SIG_REGS_OFFSET)) @@ -1687,6 +1711,9 @@ static int __init fpsimd_init(void) if (cpu_have_named_feature(SME) && !cpu_have_named_feature(SVE)) pr_notice("SME is implemented but not SVE\n"); - return sve_sysctl_init(); + sve_sysctl_init(); + sme_sysctl_init(); + + return 0; } core_initcall(fpsimd_init); -- cgit v1.2.3 From 9e4ab6c89109472082616f8d2f6ada7deaffe161 Mon Sep 17 00:00:00 2001 From: Mark Brown Date: Tue, 19 Apr 2022 12:22:19 +0100 Subject: arm64/sme: Implement vector length configuration prctl()s As for SVE provide a prctl() interface which allows processes to configure their SME vector length. Signed-off-by: Mark Brown Reviewed-by: Catalin Marinas Link: https://lore.kernel.org/r/20220419112247.711548-12-broonie@kernel.org Signed-off-by: Catalin Marinas --- arch/arm64/include/asm/fpsimd.h | 4 ++++ arch/arm64/include/asm/processor.h | 4 +++- arch/arm64/include/asm/thread_info.h | 1 + arch/arm64/kernel/fpsimd.c | 32 ++++++++++++++++++++++++++++++++ include/uapi/linux/prctl.h | 9 +++++++++ kernel/sys.c | 12 ++++++++++++ 6 files changed, 61 insertions(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/arm64/include/asm/fpsimd.h b/arch/arm64/include/asm/fpsimd.h index 32cd682258d9..38fd6aab7feb 100644 --- a/arch/arm64/include/asm/fpsimd.h +++ b/arch/arm64/include/asm/fpsimd.h @@ -288,6 +288,8 @@ static inline int sme_max_virtualisable_vl(void) } extern unsigned int sme_get_vl(void); +extern int sme_set_current_vl(unsigned long arg); +extern int sme_get_current_vl(void); #else @@ -299,6 +301,8 @@ static inline void sme_setup(void) { } static inline unsigned int sme_get_vl(void) { return 0; } static inline int sme_max_vl(void) { return 0; } static inline int sme_max_virtualisable_vl(void) { return 0; } +static inline int sme_set_current_vl(unsigned long arg) { return -EINVAL; } +static inline int sme_get_current_vl(void) { return -EINVAL; } #endif /* ! CONFIG_ARM64_SME */ diff --git a/arch/arm64/include/asm/processor.h b/arch/arm64/include/asm/processor.h index abf34a9c2eab..7a57cbff8a03 100644 --- a/arch/arm64/include/asm/processor.h +++ b/arch/arm64/include/asm/processor.h @@ -355,9 +355,11 @@ extern void __init minsigstksz_setup(void); */ #include -/* Userspace interface for PR_SVE_{SET,GET}_VL prctl()s: */ +/* Userspace interface for PR_S[MV]E_{SET,GET}_VL prctl()s: */ #define SVE_SET_VL(arg) sve_set_current_vl(arg) #define SVE_GET_VL() sve_get_current_vl() +#define SME_SET_VL(arg) sme_set_current_vl(arg) +#define SME_GET_VL() sme_get_current_vl() /* PR_PAC_RESET_KEYS prctl */ #define PAC_RESET_KEYS(tsk, arg) ptrauth_prctl_reset_keys(tsk, arg) diff --git a/arch/arm64/include/asm/thread_info.h b/arch/arm64/include/asm/thread_info.h index e1317b7c4525..4e6b58dcd6f9 100644 --- a/arch/arm64/include/asm/thread_info.h +++ b/arch/arm64/include/asm/thread_info.h @@ -82,6 +82,7 @@ int arch_dup_task_struct(struct task_struct *dst, #define TIF_SVE_VL_INHERIT 24 /* Inherit SVE vl_onexec across exec */ #define TIF_SSBD 25 /* Wants SSB mitigation */ #define TIF_TAGGED_ADDR 26 /* Allow tagged user addresses */ +#define TIF_SME_VL_INHERIT 28 /* Inherit SME vl_onexec across exec */ #define _TIF_SIGPENDING (1 << TIF_SIGPENDING) #define _TIF_NEED_RESCHED (1 << TIF_NEED_RESCHED) diff --git a/arch/arm64/kernel/fpsimd.c b/arch/arm64/kernel/fpsimd.c index 754a96563f6f..39f44fcb9b99 100644 --- a/arch/arm64/kernel/fpsimd.c +++ b/arch/arm64/kernel/fpsimd.c @@ -149,6 +149,8 @@ static unsigned int vec_vl_inherit_flag(enum vec_type type) switch (type) { case ARM64_VEC_SVE: return TIF_SVE_VL_INHERIT; + case ARM64_VEC_SME: + return TIF_SME_VL_INHERIT; default: WARN_ON_ONCE(1); return 0; @@ -807,6 +809,36 @@ int sve_get_current_vl(void) return vec_prctl_status(ARM64_VEC_SVE, 0); } +#ifdef CONFIG_ARM64_SME +/* PR_SME_SET_VL */ +int sme_set_current_vl(unsigned long arg) +{ + unsigned long vl, flags; + int ret; + + vl = arg & PR_SME_VL_LEN_MASK; + flags = arg & ~vl; + + if (!system_supports_sme() || is_compat_task()) + return -EINVAL; + + ret = vec_set_vector_length(current, ARM64_VEC_SME, vl, flags); + if (ret) + return ret; + + return vec_prctl_status(ARM64_VEC_SME, flags); +} + +/* PR_SME_GET_VL */ +int sme_get_current_vl(void) +{ + if (!system_supports_sme() || is_compat_task()) + return -EINVAL; + + return vec_prctl_status(ARM64_VEC_SME, 0); +} +#endif /* CONFIG_ARM64_SME */ + static void vec_probe_vqs(struct vl_info *info, DECLARE_BITMAP(map, SVE_VQ_MAX)) { diff --git a/include/uapi/linux/prctl.h b/include/uapi/linux/prctl.h index e998764f0262..a5e06dcbba13 100644 --- a/include/uapi/linux/prctl.h +++ b/include/uapi/linux/prctl.h @@ -272,6 +272,15 @@ struct prctl_mm_map { # define PR_SCHED_CORE_SCOPE_THREAD_GROUP 1 # define PR_SCHED_CORE_SCOPE_PROCESS_GROUP 2 +/* arm64 Scalable Matrix Extension controls */ +/* Flag values must be in sync with SVE versions */ +#define PR_SME_SET_VL 63 /* set task vector length */ +# define PR_SME_SET_VL_ONEXEC (1 << 18) /* defer effect until exec */ +#define PR_SME_GET_VL 64 /* get task vector length */ +/* Bits common to PR_SME_SET_VL and PR_SME_GET_VL */ +# define PR_SME_VL_LEN_MASK 0xffff +# define PR_SME_VL_INHERIT (1 << 17) /* inherit across exec */ + #define PR_SET_VMA 0x53564d41 # define PR_SET_VMA_ANON_NAME 0 diff --git a/kernel/sys.c b/kernel/sys.c index 374f83e95239..b911fa6d81ab 100644 --- a/kernel/sys.c +++ b/kernel/sys.c @@ -117,6 +117,12 @@ #ifndef SVE_GET_VL # define SVE_GET_VL() (-EINVAL) #endif +#ifndef SME_SET_VL +# define SME_SET_VL(a) (-EINVAL) +#endif +#ifndef SME_GET_VL +# define SME_GET_VL() (-EINVAL) +#endif #ifndef PAC_RESET_KEYS # define PAC_RESET_KEYS(a, b) (-EINVAL) #endif @@ -2541,6 +2547,12 @@ SYSCALL_DEFINE5(prctl, int, option, unsigned long, arg2, unsigned long, arg3, case PR_SVE_GET_VL: error = SVE_GET_VL(); break; + case PR_SME_SET_VL: + error = SME_SET_VL(arg2); + break; + case PR_SME_GET_VL: + error = SME_GET_VL(); + break; case PR_GET_SPECULATION_CTRL: if (arg3 || arg4 || arg5) return -EINVAL; -- cgit v1.2.3 From a9d69158595017d260ab37bf88b8f125e5e8144c Mon Sep 17 00:00:00 2001 From: Mark Brown Date: Tue, 19 Apr 2022 12:22:20 +0100 Subject: arm64/sme: Implement support for TPIDR2 The Scalable Matrix Extension introduces support for a new thread specific data register TPIDR2 intended for use by libc. The kernel must save the value of TPIDR2 on context switch and should ensure that all new threads start off with a default value of 0. Add a field to the thread_struct to store TPIDR2 and context switch it with the other thread specific data. In case there are future extensions which also use TPIDR2 we introduce system_supports_tpidr2() and use that rather than system_supports_sme() for TPIDR2 handling. Signed-off-by: Mark Brown Reviewed-by: Catalin Marinas Link: https://lore.kernel.org/r/20220419112247.711548-13-broonie@kernel.org Signed-off-by: Catalin Marinas --- arch/arm64/include/asm/cpufeature.h | 5 +++++ arch/arm64/include/asm/processor.h | 1 + arch/arm64/kernel/fpsimd.c | 4 ++++ arch/arm64/kernel/process.c | 14 ++++++++++++-- 4 files changed, 22 insertions(+), 2 deletions(-) (limited to 'arch') diff --git a/arch/arm64/include/asm/cpufeature.h b/arch/arm64/include/asm/cpufeature.h index 5ddfae233ea5..14a8f3d93add 100644 --- a/arch/arm64/include/asm/cpufeature.h +++ b/arch/arm64/include/asm/cpufeature.h @@ -778,6 +778,11 @@ static __always_inline bool system_supports_fa64(void) cpus_have_const_cap(ARM64_SME_FA64); } +static __always_inline bool system_supports_tpidr2(void) +{ + return system_supports_sme(); +} + static __always_inline bool system_supports_cnp(void) { return IS_ENABLED(CONFIG_ARM64_CNP) && diff --git a/arch/arm64/include/asm/processor.h b/arch/arm64/include/asm/processor.h index 7a57cbff8a03..849e97d418a8 100644 --- a/arch/arm64/include/asm/processor.h +++ b/arch/arm64/include/asm/processor.h @@ -169,6 +169,7 @@ struct thread_struct { u64 mte_ctrl; #endif u64 sctlr_user; + u64 tpidr2_el0; }; static inline unsigned int thread_get_vl(struct thread_struct *thread, diff --git a/arch/arm64/kernel/fpsimd.c b/arch/arm64/kernel/fpsimd.c index 39f44fcb9b99..231f2d85b65e 100644 --- a/arch/arm64/kernel/fpsimd.c +++ b/arch/arm64/kernel/fpsimd.c @@ -1098,6 +1098,10 @@ void sme_kernel_enable(const struct arm64_cpu_capabilities *__always_unused p) /* Allow SME in kernel */ write_sysreg(read_sysreg(CPACR_EL1) | CPACR_EL1_SMEN_EL1EN, CPACR_EL1); isb(); + + /* Allow EL0 to access TPIDR2 */ + write_sysreg(read_sysreg(SCTLR_EL1) | SCTLR_ELx_ENTP2, SCTLR_EL1); + isb(); } /* diff --git a/arch/arm64/kernel/process.c b/arch/arm64/kernel/process.c index 7fa97df55e3a..e20571f19718 100644 --- a/arch/arm64/kernel/process.c +++ b/arch/arm64/kernel/process.c @@ -250,6 +250,8 @@ void show_regs(struct pt_regs *regs) static void tls_thread_flush(void) { write_sysreg(0, tpidr_el0); + if (system_supports_tpidr2()) + write_sysreg_s(0, SYS_TPIDR2_EL0); if (is_compat_task()) { current->thread.uw.tp_value = 0; @@ -343,6 +345,8 @@ int copy_thread(unsigned long clone_flags, unsigned long stack_start, * out-of-sync with the saved value. */ *task_user_tls(p) = read_sysreg(tpidr_el0); + if (system_supports_tpidr2()) + p->thread.tpidr2_el0 = read_sysreg_s(SYS_TPIDR2_EL0); if (stack_start) { if (is_compat_thread(task_thread_info(p))) @@ -353,10 +357,12 @@ int copy_thread(unsigned long clone_flags, unsigned long stack_start, /* * If a TLS pointer was passed to clone, use it for the new - * thread. + * thread. We also reset TPIDR2 if it's in use. */ - if (clone_flags & CLONE_SETTLS) + if (clone_flags & CLONE_SETTLS) { p->thread.uw.tp_value = tls; + p->thread.tpidr2_el0 = 0; + } } else { /* * A kthread has no context to ERET to, so ensure any buggy @@ -387,6 +393,8 @@ int copy_thread(unsigned long clone_flags, unsigned long stack_start, void tls_preserve_current_state(void) { *task_user_tls(current) = read_sysreg(tpidr_el0); + if (system_supports_tpidr2() && !is_compat_task()) + current->thread.tpidr2_el0 = read_sysreg_s(SYS_TPIDR2_EL0); } static void tls_thread_switch(struct task_struct *next) @@ -399,6 +407,8 @@ static void tls_thread_switch(struct task_struct *next) write_sysreg(0, tpidrro_el0); write_sysreg(*task_user_tls(next), tpidr_el0); + if (system_supports_tpidr2()) + write_sysreg_s(next->thread.tpidr2_el0, SYS_TPIDR2_EL0); } /* -- cgit v1.2.3 From b40c559b45bec736f588c57dd5be967fe573058b Mon Sep 17 00:00:00 2001 From: Mark Brown Date: Tue, 19 Apr 2022 12:22:21 +0100 Subject: arm64/sme: Implement SVCR context switching In SME the use of both streaming SVE mode and ZA are tracked through PSTATE.SM and PSTATE.ZA, visible through the system register SVCR. In order to context switch the floating point state for SME we need to context switch the contents of this register as part of context switching the floating point state. Since changing the vector length exits streaming SVE mode and disables ZA we also make sure we update SVCR appropriately when setting vector length, and similarly ensure that new threads have streaming SVE mode and ZA disabled. Signed-off-by: Mark Brown Reviewed-by: Catalin Marinas Link: https://lore.kernel.org/r/20220419112247.711548-14-broonie@kernel.org Signed-off-by: Catalin Marinas --- arch/arm64/include/asm/fpsimd.h | 3 ++- arch/arm64/include/asm/processor.h | 1 + arch/arm64/include/asm/thread_info.h | 1 + arch/arm64/kernel/fpsimd.c | 18 +++++++++++++++++- arch/arm64/kernel/process.c | 2 ++ arch/arm64/kvm/fpsimd.c | 7 ++++++- 6 files changed, 29 insertions(+), 3 deletions(-) (limited to 'arch') diff --git a/arch/arm64/include/asm/fpsimd.h b/arch/arm64/include/asm/fpsimd.h index 38fd6aab7feb..821d270980da 100644 --- a/arch/arm64/include/asm/fpsimd.h +++ b/arch/arm64/include/asm/fpsimd.h @@ -46,7 +46,8 @@ extern void fpsimd_restore_current_state(void); extern void fpsimd_update_current_state(struct user_fpsimd_state const *state); extern void fpsimd_bind_state_to_cpu(struct user_fpsimd_state *state, - void *sve_state, unsigned int sve_vl); + void *sve_state, unsigned int sve_vl, + u64 *svcr); extern void fpsimd_flush_task_state(struct task_struct *target); extern void fpsimd_save_and_flush_cpu_state(void); diff --git a/arch/arm64/include/asm/processor.h b/arch/arm64/include/asm/processor.h index 849e97d418a8..22cd11e86854 100644 --- a/arch/arm64/include/asm/processor.h +++ b/arch/arm64/include/asm/processor.h @@ -169,6 +169,7 @@ struct thread_struct { u64 mte_ctrl; #endif u64 sctlr_user; + u64 svcr; u64 tpidr2_el0; }; diff --git a/arch/arm64/include/asm/thread_info.h b/arch/arm64/include/asm/thread_info.h index 4e6b58dcd6f9..848739c15de8 100644 --- a/arch/arm64/include/asm/thread_info.h +++ b/arch/arm64/include/asm/thread_info.h @@ -82,6 +82,7 @@ int arch_dup_task_struct(struct task_struct *dst, #define TIF_SVE_VL_INHERIT 24 /* Inherit SVE vl_onexec across exec */ #define TIF_SSBD 25 /* Wants SSB mitigation */ #define TIF_TAGGED_ADDR 26 /* Allow tagged user addresses */ +#define TIF_SME 27 /* SME in use */ #define TIF_SME_VL_INHERIT 28 /* Inherit SME vl_onexec across exec */ #define _TIF_SIGPENDING (1 << TIF_SIGPENDING) diff --git a/arch/arm64/kernel/fpsimd.c b/arch/arm64/kernel/fpsimd.c index 231f2d85b65e..1c113349f6cc 100644 --- a/arch/arm64/kernel/fpsimd.c +++ b/arch/arm64/kernel/fpsimd.c @@ -121,6 +121,7 @@ struct fpsimd_last_state_struct { struct user_fpsimd_state *st; void *sve_state; + u64 *svcr; unsigned int sve_vl; }; @@ -359,6 +360,9 @@ static void task_fpsimd_load(void) WARN_ON(!system_supports_fpsimd()); WARN_ON(!have_cpu_fpsimd_context()); + if (IS_ENABLED(CONFIG_ARM64_SME) && test_thread_flag(TIF_SME)) + write_sysreg_s(current->thread.svcr, SYS_SVCR_EL0); + if (IS_ENABLED(CONFIG_ARM64_SVE) && test_thread_flag(TIF_SVE)) { sve_set_vq(sve_vq_from_vl(task_get_sve_vl(current)) - 1); sve_load_state(sve_pffr(¤t->thread), @@ -390,6 +394,12 @@ static void fpsimd_save(void) if (test_thread_flag(TIF_FOREIGN_FPSTATE)) return; + if (IS_ENABLED(CONFIG_ARM64_SME) && + test_thread_flag(TIF_SME)) { + u64 *svcr = last->svcr; + *svcr = read_sysreg_s(SYS_SVCR_EL0); + } + if (IS_ENABLED(CONFIG_ARM64_SVE) && test_thread_flag(TIF_SVE)) { if (WARN_ON(sve_get_vl() != last->sve_vl)) { @@ -741,6 +751,10 @@ int vec_set_vector_length(struct task_struct *task, enum vec_type type, if (test_and_clear_tsk_thread_flag(task, TIF_SVE)) sve_to_fpsimd(task); + if (system_supports_sme() && type == ARM64_VEC_SME) + task->thread.svcr &= ~(SYS_SVCR_EL0_SM_MASK | + SYS_SVCR_EL0_ZA_MASK); + if (task == current) put_cpu_fpsimd_context(); @@ -1404,6 +1418,7 @@ static void fpsimd_bind_task_to_cpu(void) last->st = ¤t->thread.uw.fpsimd_state; last->sve_state = current->thread.sve_state; last->sve_vl = task_get_sve_vl(current); + last->svcr = ¤t->thread.svcr; current->thread.fpsimd_cpu = smp_processor_id(); if (system_supports_sve()) { @@ -1418,7 +1433,7 @@ static void fpsimd_bind_task_to_cpu(void) } void fpsimd_bind_state_to_cpu(struct user_fpsimd_state *st, void *sve_state, - unsigned int sve_vl) + unsigned int sve_vl, u64 *svcr) { struct fpsimd_last_state_struct *last = this_cpu_ptr(&fpsimd_last_state); @@ -1427,6 +1442,7 @@ void fpsimd_bind_state_to_cpu(struct user_fpsimd_state *st, void *sve_state, WARN_ON(!in_softirq() && !irqs_disabled()); last->st = st; + last->svcr = svcr; last->sve_state = sve_state; last->sve_vl = sve_vl; } diff --git a/arch/arm64/kernel/process.c b/arch/arm64/kernel/process.c index e20571f19718..07f235b46cf5 100644 --- a/arch/arm64/kernel/process.c +++ b/arch/arm64/kernel/process.c @@ -310,6 +310,8 @@ int arch_dup_task_struct(struct task_struct *dst, struct task_struct *src) dst->thread.sve_state = NULL; clear_tsk_thread_flag(dst, TIF_SVE); + dst->thread.svcr = 0; + /* clear any pending asynchronous tag fault raised by the parent */ clear_tsk_thread_flag(dst, TIF_MTE_ASYNC_FAULT); diff --git a/arch/arm64/kvm/fpsimd.c b/arch/arm64/kvm/fpsimd.c index 397fdac75cb1..ac09f1f682ff 100644 --- a/arch/arm64/kvm/fpsimd.c +++ b/arch/arm64/kvm/fpsimd.c @@ -109,9 +109,14 @@ void kvm_arch_vcpu_ctxsync_fp(struct kvm_vcpu *vcpu) WARN_ON_ONCE(!irqs_disabled()); if (vcpu->arch.flags & KVM_ARM64_FP_ENABLED) { + /* + * Currently we do not support SME guests so SVCR is + * always 0 and we just need a variable to point to. + */ fpsimd_bind_state_to_cpu(&vcpu->arch.ctxt.fp_regs, vcpu->arch.sve_state, - vcpu->arch.sve_max_vl); + vcpu->arch.sve_max_vl, + NULL); clear_thread_flag(TIF_FOREIGN_FPSTATE); update_thread_flag(TIF_SVE, vcpu_has_sve(vcpu)); -- cgit v1.2.3 From af7167d6d2675f3343eff3ad6c9b4a8e30122e2c Mon Sep 17 00:00:00 2001 From: Mark Brown Date: Tue, 19 Apr 2022 12:22:22 +0100 Subject: arm64/sme: Implement streaming SVE context switching When in streaming mode we need to save and restore the streaming mode SVE register state rather than the regular SVE register state. This uses the streaming mode vector length and omits FFR but is otherwise identical, if TIF_SVE is enabled when we are in streaming mode then streaming mode takes precedence. This does not handle use of streaming SVE state with KVM, ptrace or signals. This will be updated in further patches. Signed-off-by: Mark Brown Reviewed-by: Catalin Marinas Link: https://lore.kernel.org/r/20220419112247.711548-15-broonie@kernel.org Signed-off-by: Catalin Marinas --- arch/arm64/include/asm/fpsimd.h | 22 ++++++- arch/arm64/include/asm/fpsimdmacros.h | 11 ++++ arch/arm64/include/asm/processor.h | 10 ++++ arch/arm64/kernel/entry-fpsimd.S | 5 ++ arch/arm64/kernel/fpsimd.c | 109 +++++++++++++++++++++++++++------- arch/arm64/kvm/fpsimd.c | 2 +- 6 files changed, 136 insertions(+), 23 deletions(-) (limited to 'arch') diff --git a/arch/arm64/include/asm/fpsimd.h b/arch/arm64/include/asm/fpsimd.h index 821d270980da..cd94f5c5b516 100644 --- a/arch/arm64/include/asm/fpsimd.h +++ b/arch/arm64/include/asm/fpsimd.h @@ -47,11 +47,21 @@ extern void fpsimd_update_current_state(struct user_fpsimd_state const *state); extern void fpsimd_bind_state_to_cpu(struct user_fpsimd_state *state, void *sve_state, unsigned int sve_vl, - u64 *svcr); + unsigned int sme_vl, u64 *svcr); extern void fpsimd_flush_task_state(struct task_struct *target); extern void fpsimd_save_and_flush_cpu_state(void); +static inline bool thread_sm_enabled(struct thread_struct *thread) +{ + return system_supports_sme() && (thread->svcr & SYS_SVCR_EL0_SM_MASK); +} + +static inline bool thread_za_enabled(struct thread_struct *thread) +{ + return system_supports_sme() && (thread->svcr & SYS_SVCR_EL0_ZA_MASK); +} + /* Maximum VL that SVE/SME VL-agnostic software can transparently support */ #define VL_ARCH_MAX 0x100 @@ -63,7 +73,14 @@ static inline size_t sve_ffr_offset(int vl) static inline void *sve_pffr(struct thread_struct *thread) { - return (char *)thread->sve_state + sve_ffr_offset(thread_get_sve_vl(thread)); + unsigned int vl; + + if (system_supports_sme() && thread_sm_enabled(thread)) + vl = thread_get_sme_vl(thread); + else + vl = thread_get_sve_vl(thread); + + return (char *)thread->sve_state + sve_ffr_offset(vl); } extern void sve_save_state(void *state, u32 *pfpsr, int save_ffr); @@ -72,6 +89,7 @@ extern void sve_load_state(void const *state, u32 const *pfpsr, extern void sve_flush_live(bool flush_ffr, unsigned long vq_minus_1); extern unsigned int sve_get_vl(void); extern void sve_set_vq(unsigned long vq_minus_1); +extern void sme_set_vq(unsigned long vq_minus_1); struct arm64_cpu_capabilities; extern void sve_kernel_enable(const struct arm64_cpu_capabilities *__unused); diff --git a/arch/arm64/include/asm/fpsimdmacros.h b/arch/arm64/include/asm/fpsimdmacros.h index 2e9a33155081..f6ab36e0cd8d 100644 --- a/arch/arm64/include/asm/fpsimdmacros.h +++ b/arch/arm64/include/asm/fpsimdmacros.h @@ -262,6 +262,17 @@ 921: .endm +/* Update SMCR_EL1.LEN with the new VQ */ +.macro sme_load_vq xvqminus1, xtmp, xtmp2 + mrs_s \xtmp, SYS_SMCR_EL1 + bic \xtmp2, \xtmp, SMCR_ELx_LEN_MASK + orr \xtmp2, \xtmp2, \xvqminus1 + cmp \xtmp2, \xtmp + b.eq 921f + msr_s SYS_SMCR_EL1, \xtmp2 //self-synchronising +921: +.endm + /* Preserve the first 128-bits of Znz and zero the rest. */ .macro _sve_flush_z nz _sve_check_zreg \nz diff --git a/arch/arm64/include/asm/processor.h b/arch/arm64/include/asm/processor.h index 22cd11e86854..7542310b4e6b 100644 --- a/arch/arm64/include/asm/processor.h +++ b/arch/arm64/include/asm/processor.h @@ -184,6 +184,11 @@ static inline unsigned int thread_get_sve_vl(struct thread_struct *thread) return thread_get_vl(thread, ARM64_VEC_SVE); } +static inline unsigned int thread_get_sme_vl(struct thread_struct *thread) +{ + return thread_get_vl(thread, ARM64_VEC_SME); +} + unsigned int task_get_vl(const struct task_struct *task, enum vec_type type); void task_set_vl(struct task_struct *task, enum vec_type type, unsigned long vl); @@ -197,6 +202,11 @@ static inline unsigned int task_get_sve_vl(const struct task_struct *task) return task_get_vl(task, ARM64_VEC_SVE); } +static inline unsigned int task_get_sme_vl(const struct task_struct *task) +{ + return task_get_vl(task, ARM64_VEC_SME); +} + static inline void task_set_sve_vl(struct task_struct *task, unsigned long vl) { task_set_vl(task, ARM64_VEC_SVE, vl); diff --git a/arch/arm64/kernel/entry-fpsimd.S b/arch/arm64/kernel/entry-fpsimd.S index deee5f01462e..6f88c0f86d50 100644 --- a/arch/arm64/kernel/entry-fpsimd.S +++ b/arch/arm64/kernel/entry-fpsimd.S @@ -94,4 +94,9 @@ SYM_FUNC_START(sme_get_vl) ret SYM_FUNC_END(sme_get_vl) +SYM_FUNC_START(sme_set_vq) + sme_load_vq x0, x1, x2 + ret +SYM_FUNC_END(sme_set_vq) + #endif /* CONFIG_ARM64_SME */ diff --git a/arch/arm64/kernel/fpsimd.c b/arch/arm64/kernel/fpsimd.c index 1c113349f6cc..f8506a875eb2 100644 --- a/arch/arm64/kernel/fpsimd.c +++ b/arch/arm64/kernel/fpsimd.c @@ -123,6 +123,7 @@ struct fpsimd_last_state_struct { void *sve_state; u64 *svcr; unsigned int sve_vl; + unsigned int sme_vl; }; static DEFINE_PER_CPU(struct fpsimd_last_state_struct, fpsimd_last_state); @@ -301,17 +302,28 @@ void task_set_vl_onexec(struct task_struct *task, enum vec_type type, task->thread.vl_onexec[type] = vl; } +/* + * TIF_SME controls whether a task can use SME without trapping while + * in userspace, when TIF_SME is set then we must have storage + * alocated in sve_state and za_state to store the contents of both ZA + * and the SVE registers for both streaming and non-streaming modes. + * + * If both SVCR.ZA and SVCR.SM are disabled then at any point we + * may disable TIF_SME and reenable traps. + */ + + /* * TIF_SVE controls whether a task can use SVE without trapping while - * in userspace, and also the way a task's FPSIMD/SVE state is stored - * in thread_struct. + * in userspace, and also (together with TIF_SME) the way a task's + * FPSIMD/SVE state is stored in thread_struct. * * The kernel uses this flag to track whether a user task is actively * using SVE, and therefore whether full SVE register state needs to * be tracked. If not, the cheaper FPSIMD context handling code can * be used instead of the more costly SVE equivalents. * - * * TIF_SVE set: + * * TIF_SVE or SVCR.SM set: * * The task can execute SVE instructions while in userspace without * trapping to the kernel. @@ -319,7 +331,8 @@ void task_set_vl_onexec(struct task_struct *task, enum vec_type type, * When stored, Z0-Z31 (incorporating Vn in bits[127:0] or the * corresponding Zn), P0-P15 and FFR are encoded in in * task->thread.sve_state, formatted appropriately for vector - * length task->thread.sve_vl. + * length task->thread.sve_vl or, if SVCR.SM is set, + * task->thread.sme_vl. * * task->thread.sve_state must point to a valid buffer at least * sve_state_size(task) bytes in size. @@ -357,19 +370,40 @@ void task_set_vl_onexec(struct task_struct *task, enum vec_type type, */ static void task_fpsimd_load(void) { + bool restore_sve_regs = false; + bool restore_ffr; + WARN_ON(!system_supports_fpsimd()); WARN_ON(!have_cpu_fpsimd_context()); - if (IS_ENABLED(CONFIG_ARM64_SME) && test_thread_flag(TIF_SME)) - write_sysreg_s(current->thread.svcr, SYS_SVCR_EL0); - + /* Check if we should restore SVE first */ if (IS_ENABLED(CONFIG_ARM64_SVE) && test_thread_flag(TIF_SVE)) { sve_set_vq(sve_vq_from_vl(task_get_sve_vl(current)) - 1); + restore_sve_regs = true; + restore_ffr = true; + } + + /* Restore SME, override SVE register configuration if needed */ + if (system_supports_sme()) { + unsigned long sme_vl = task_get_sme_vl(current); + + if (test_thread_flag(TIF_SME)) + sme_set_vq(sve_vq_from_vl(sme_vl) - 1); + + write_sysreg_s(current->thread.svcr, SYS_SVCR_EL0); + + if (thread_sm_enabled(¤t->thread)) { + restore_sve_regs = true; + restore_ffr = system_supports_fa64(); + } + } + + if (restore_sve_regs) sve_load_state(sve_pffr(¤t->thread), - ¤t->thread.uw.fpsimd_state.fpsr, true); - } else { + ¤t->thread.uw.fpsimd_state.fpsr, + restore_ffr); + else fpsimd_load_state(¤t->thread.uw.fpsimd_state); - } } /* @@ -387,6 +421,9 @@ static void fpsimd_save(void) struct fpsimd_last_state_struct const *last = this_cpu_ptr(&fpsimd_last_state); /* set by fpsimd_bind_task_to_cpu() or fpsimd_bind_state_to_cpu() */ + bool save_sve_regs = false; + bool save_ffr; + unsigned int vl; WARN_ON(!system_supports_fpsimd()); WARN_ON(!have_cpu_fpsimd_context()); @@ -394,15 +431,33 @@ static void fpsimd_save(void) if (test_thread_flag(TIF_FOREIGN_FPSTATE)) return; - if (IS_ENABLED(CONFIG_ARM64_SME) && - test_thread_flag(TIF_SME)) { + if (test_thread_flag(TIF_SVE)) { + save_sve_regs = true; + save_ffr = true; + vl = last->sve_vl; + } + + if (system_supports_sme()) { u64 *svcr = last->svcr; *svcr = read_sysreg_s(SYS_SVCR_EL0); + + if (thread_za_enabled(¤t->thread)) { + /* ZA state managment is not implemented yet */ + force_signal_inject(SIGKILL, SI_KERNEL, 0, 0); + return; + } + + /* If we are in streaming mode override regular SVE. */ + if (*svcr & SYS_SVCR_EL0_SM_MASK) { + save_sve_regs = true; + save_ffr = system_supports_fa64(); + vl = last->sme_vl; + } } - if (IS_ENABLED(CONFIG_ARM64_SVE) && - test_thread_flag(TIF_SVE)) { - if (WARN_ON(sve_get_vl() != last->sve_vl)) { + if (IS_ENABLED(CONFIG_ARM64_SVE) && save_sve_regs) { + /* Get the configured VL from RDVL, will account for SM */ + if (WARN_ON(sve_get_vl() != vl)) { /* * Can't save the user regs, so current would * re-enter user with corrupt state. @@ -413,8 +468,8 @@ static void fpsimd_save(void) } sve_save_state((char *)last->sve_state + - sve_ffr_offset(last->sve_vl), - &last->st->fpsr, true); + sve_ffr_offset(vl), + &last->st->fpsr, save_ffr); } else { fpsimd_save_state(last->st); } @@ -619,7 +674,14 @@ static void sve_to_fpsimd(struct task_struct *task) */ static size_t sve_state_size(struct task_struct const *task) { - return SVE_SIG_REGS_SIZE(sve_vq_from_vl(task_get_sve_vl(task))); + unsigned int vl = 0; + + if (system_supports_sve()) + vl = task_get_sve_vl(task); + if (system_supports_sme()) + vl = max(vl, task_get_sme_vl(task)); + + return SVE_SIG_REGS_SIZE(sve_vq_from_vl(vl)); } /* @@ -748,7 +810,8 @@ int vec_set_vector_length(struct task_struct *task, enum vec_type type, } fpsimd_flush_task_state(task); - if (test_and_clear_tsk_thread_flag(task, TIF_SVE)) + if (test_and_clear_tsk_thread_flag(task, TIF_SVE) || + thread_sm_enabled(&task->thread)) sve_to_fpsimd(task); if (system_supports_sme() && type == ARM64_VEC_SME) @@ -1375,6 +1438,9 @@ void fpsimd_flush_thread(void) fpsimd_flush_thread_vl(ARM64_VEC_SVE); } + if (system_supports_sme()) + fpsimd_flush_thread_vl(ARM64_VEC_SME); + put_cpu_fpsimd_context(); } @@ -1418,6 +1484,7 @@ static void fpsimd_bind_task_to_cpu(void) last->st = ¤t->thread.uw.fpsimd_state; last->sve_state = current->thread.sve_state; last->sve_vl = task_get_sve_vl(current); + last->sme_vl = task_get_sme_vl(current); last->svcr = ¤t->thread.svcr; current->thread.fpsimd_cpu = smp_processor_id(); @@ -1433,7 +1500,8 @@ static void fpsimd_bind_task_to_cpu(void) } void fpsimd_bind_state_to_cpu(struct user_fpsimd_state *st, void *sve_state, - unsigned int sve_vl, u64 *svcr) + unsigned int sve_vl, unsigned int sme_vl, + u64 *svcr) { struct fpsimd_last_state_struct *last = this_cpu_ptr(&fpsimd_last_state); @@ -1445,6 +1513,7 @@ void fpsimd_bind_state_to_cpu(struct user_fpsimd_state *st, void *sve_state, last->svcr = svcr; last->sve_state = sve_state; last->sve_vl = sve_vl; + last->sme_vl = sme_vl; } /* diff --git a/arch/arm64/kvm/fpsimd.c b/arch/arm64/kvm/fpsimd.c index ac09f1f682ff..394e583bb73e 100644 --- a/arch/arm64/kvm/fpsimd.c +++ b/arch/arm64/kvm/fpsimd.c @@ -116,7 +116,7 @@ void kvm_arch_vcpu_ctxsync_fp(struct kvm_vcpu *vcpu) fpsimd_bind_state_to_cpu(&vcpu->arch.ctxt.fp_regs, vcpu->arch.sve_state, vcpu->arch.sve_max_vl, - NULL); + 0, NULL); clear_thread_flag(TIF_FOREIGN_FPSTATE); update_thread_flag(TIF_SVE, vcpu_has_sve(vcpu)); -- cgit v1.2.3 From 0033cd9339642f9b7bef23f96aa2e7277ab51cce Mon Sep 17 00:00:00 2001 From: Mark Brown Date: Tue, 19 Apr 2022 12:22:23 +0100 Subject: arm64/sme: Implement ZA context switching Allocate space for storing ZA on first access to SME and use that to save and restore ZA state when context switching. We do this by using the vector form of the LDR and STR ZA instructions, these do not require streaming mode and have implementation recommendations that they avoid contention issues in shared SMCU implementations. Since ZA is architecturally guaranteed to be zeroed when enabled we do not need to explicitly zero ZA, either we will be restoring from a saved copy or trapping on first use of SME so we know that ZA must be disabled. Signed-off-by: Mark Brown Reviewed-by: Catalin Marinas Link: https://lore.kernel.org/r/20220419112247.711548-16-broonie@kernel.org Signed-off-by: Catalin Marinas --- arch/arm64/include/asm/fpsimd.h | 5 ++++- arch/arm64/include/asm/fpsimdmacros.h | 22 ++++++++++++++++++++++ arch/arm64/include/asm/kvm_host.h | 3 +++ arch/arm64/include/asm/processor.h | 1 + arch/arm64/kernel/entry-fpsimd.S | 22 ++++++++++++++++++++++ arch/arm64/kernel/fpsimd.c | 20 +++++++++++++------- arch/arm64/kvm/fpsimd.c | 2 +- 7 files changed, 66 insertions(+), 9 deletions(-) (limited to 'arch') diff --git a/arch/arm64/include/asm/fpsimd.h b/arch/arm64/include/asm/fpsimd.h index cd94f5c5b516..1a709c03bb6c 100644 --- a/arch/arm64/include/asm/fpsimd.h +++ b/arch/arm64/include/asm/fpsimd.h @@ -47,7 +47,8 @@ extern void fpsimd_update_current_state(struct user_fpsimd_state const *state); extern void fpsimd_bind_state_to_cpu(struct user_fpsimd_state *state, void *sve_state, unsigned int sve_vl, - unsigned int sme_vl, u64 *svcr); + void *za_state, unsigned int sme_vl, + u64 *svcr); extern void fpsimd_flush_task_state(struct task_struct *target); extern void fpsimd_save_and_flush_cpu_state(void); @@ -90,6 +91,8 @@ extern void sve_flush_live(bool flush_ffr, unsigned long vq_minus_1); extern unsigned int sve_get_vl(void); extern void sve_set_vq(unsigned long vq_minus_1); extern void sme_set_vq(unsigned long vq_minus_1); +extern void za_save_state(void *state); +extern void za_load_state(void const *state); struct arm64_cpu_capabilities; extern void sve_kernel_enable(const struct arm64_cpu_capabilities *__unused); diff --git a/arch/arm64/include/asm/fpsimdmacros.h b/arch/arm64/include/asm/fpsimdmacros.h index f6ab36e0cd8d..5e0910cf4832 100644 --- a/arch/arm64/include/asm/fpsimdmacros.h +++ b/arch/arm64/include/asm/fpsimdmacros.h @@ -319,3 +319,25 @@ ldr w\nxtmp, [\xpfpsr, #4] msr fpcr, x\nxtmp .endm + +.macro sme_save_za nxbase, xvl, nw + mov w\nw, #0 + +423: + _sme_str_zav \nw, \nxbase + add x\nxbase, x\nxbase, \xvl + add x\nw, x\nw, #1 + cmp \xvl, x\nw + bne 423b +.endm + +.macro sme_load_za nxbase, xvl, nw + mov w\nw, #0 + +423: + _sme_ldr_zav \nw, \nxbase + add x\nxbase, x\nxbase, \xvl + add x\nw, x\nw, #1 + cmp \xvl, x\nw + bne 423b +.endm diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h index 94a27a7520f4..8a7c442d5b57 100644 --- a/arch/arm64/include/asm/kvm_host.h +++ b/arch/arm64/include/asm/kvm_host.h @@ -295,8 +295,11 @@ struct vcpu_reset_state { struct kvm_vcpu_arch { struct kvm_cpu_context ctxt; + + /* Guest floating point state */ void *sve_state; unsigned int sve_max_vl; + u64 svcr; /* Stage 2 paging state used by the hardware on next switch */ struct kvm_s2_mmu *hw_mmu; diff --git a/arch/arm64/include/asm/processor.h b/arch/arm64/include/asm/processor.h index 7542310b4e6b..6a3a6c3dec90 100644 --- a/arch/arm64/include/asm/processor.h +++ b/arch/arm64/include/asm/processor.h @@ -154,6 +154,7 @@ struct thread_struct { unsigned int fpsimd_cpu; void *sve_state; /* SVE registers, if any */ + void *za_state; /* ZA register, if any */ unsigned int vl[ARM64_VEC_MAX]; /* vector length */ unsigned int vl_onexec[ARM64_VEC_MAX]; /* vl after next exec */ unsigned long fault_address; /* fault info */ diff --git a/arch/arm64/kernel/entry-fpsimd.S b/arch/arm64/kernel/entry-fpsimd.S index 6f88c0f86d50..229436f33df5 100644 --- a/arch/arm64/kernel/entry-fpsimd.S +++ b/arch/arm64/kernel/entry-fpsimd.S @@ -99,4 +99,26 @@ SYM_FUNC_START(sme_set_vq) ret SYM_FUNC_END(sme_set_vq) +/* + * Save the SME state + * + * x0 - pointer to buffer for state + */ +SYM_FUNC_START(za_save_state) + _sme_rdsvl 1, 1 // x1 = VL/8 + sme_save_za 0, x1, 12 + ret +SYM_FUNC_END(za_save_state) + +/* + * Load the SME state + * + * x0 - pointer to buffer for state + */ +SYM_FUNC_START(za_load_state) + _sme_rdsvl 1, 1 // x1 = VL/8 + sme_load_za 0, x1, 12 + ret +SYM_FUNC_END(za_load_state) + #endif /* CONFIG_ARM64_SME */ diff --git a/arch/arm64/kernel/fpsimd.c b/arch/arm64/kernel/fpsimd.c index f8506a875eb2..dc38f3f2a28a 100644 --- a/arch/arm64/kernel/fpsimd.c +++ b/arch/arm64/kernel/fpsimd.c @@ -121,6 +121,7 @@ struct fpsimd_last_state_struct { struct user_fpsimd_state *st; void *sve_state; + void *za_state; u64 *svcr; unsigned int sve_vl; unsigned int sme_vl; @@ -387,11 +388,15 @@ static void task_fpsimd_load(void) if (system_supports_sme()) { unsigned long sme_vl = task_get_sme_vl(current); + /* Ensure VL is set up for restoring data */ if (test_thread_flag(TIF_SME)) sme_set_vq(sve_vq_from_vl(sme_vl) - 1); write_sysreg_s(current->thread.svcr, SYS_SVCR_EL0); + if (thread_za_enabled(¤t->thread)) + za_load_state(current->thread.za_state); + if (thread_sm_enabled(¤t->thread)) { restore_sve_regs = true; restore_ffr = system_supports_fa64(); @@ -441,11 +446,10 @@ static void fpsimd_save(void) u64 *svcr = last->svcr; *svcr = read_sysreg_s(SYS_SVCR_EL0); - if (thread_za_enabled(¤t->thread)) { - /* ZA state managment is not implemented yet */ - force_signal_inject(SIGKILL, SI_KERNEL, 0, 0); - return; - } + *svcr = read_sysreg_s(SYS_SVCR_EL0); + + if (*svcr & SYS_SVCR_EL0_ZA_MASK) + za_save_state(last->za_state); /* If we are in streaming mode override regular SVE. */ if (*svcr & SYS_SVCR_EL0_SM_MASK) { @@ -1483,6 +1487,7 @@ static void fpsimd_bind_task_to_cpu(void) WARN_ON(!system_supports_fpsimd()); last->st = ¤t->thread.uw.fpsimd_state; last->sve_state = current->thread.sve_state; + last->za_state = current->thread.za_state; last->sve_vl = task_get_sve_vl(current); last->sme_vl = task_get_sme_vl(current); last->svcr = ¤t->thread.svcr; @@ -1500,8 +1505,8 @@ static void fpsimd_bind_task_to_cpu(void) } void fpsimd_bind_state_to_cpu(struct user_fpsimd_state *st, void *sve_state, - unsigned int sve_vl, unsigned int sme_vl, - u64 *svcr) + unsigned int sve_vl, void *za_state, + unsigned int sme_vl, u64 *svcr) { struct fpsimd_last_state_struct *last = this_cpu_ptr(&fpsimd_last_state); @@ -1512,6 +1517,7 @@ void fpsimd_bind_state_to_cpu(struct user_fpsimd_state *st, void *sve_state, last->st = st; last->svcr = svcr; last->sve_state = sve_state; + last->za_state = za_state; last->sve_vl = sve_vl; last->sme_vl = sme_vl; } diff --git a/arch/arm64/kvm/fpsimd.c b/arch/arm64/kvm/fpsimd.c index 394e583bb73e..57d7ac3cfa0c 100644 --- a/arch/arm64/kvm/fpsimd.c +++ b/arch/arm64/kvm/fpsimd.c @@ -116,7 +116,7 @@ void kvm_arch_vcpu_ctxsync_fp(struct kvm_vcpu *vcpu) fpsimd_bind_state_to_cpu(&vcpu->arch.ctxt.fp_regs, vcpu->arch.sve_state, vcpu->arch.sve_max_vl, - 0, NULL); + NULL, 0, &vcpu->arch.svcr); clear_thread_flag(TIF_FOREIGN_FPSTATE); update_thread_flag(TIF_SVE, vcpu_has_sve(vcpu)); -- cgit v1.2.3 From 8bd7f91c03d886f41d35f6108078d20be5a4a1bd Mon Sep 17 00:00:00 2001 From: Mark Brown Date: Tue, 19 Apr 2022 12:22:24 +0100 Subject: arm64/sme: Implement traps and syscall handling for SME By default all SME operations in userspace will trap. When this happens we allocate storage space for the SME register state, set up the SVE registers and disable traps. We do not need to initialize ZA since the architecture guarantees that it will be zeroed when enabled and when we trap ZA is disabled. On syscall we exit streaming mode if we were previously in it and ensure that all but the lower 128 bits of the registers are zeroed while preserving the state of ZA. This follows the aarch64 PCS for SME, ZA state is preserved over a function call and streaming mode is exited. Since the traps for SME do not distinguish between streaming mode SVE and ZA usage if ZA is in use rather than reenabling traps we instead zero the parts of the SVE registers not shared with FPSIMD and leave SME enabled, this simplifies handling SME traps. If ZA is not in use then we reenable SME traps and fall through to normal handling of SVE. Signed-off-by: Mark Brown Reviewed-by: Catalin Marinas Link: https://lore.kernel.org/r/20220419112247.711548-17-broonie@kernel.org Signed-off-by: Catalin Marinas --- arch/arm64/include/asm/esr.h | 1 + arch/arm64/include/asm/exception.h | 1 + arch/arm64/include/asm/fpsimd.h | 39 +++++++++ arch/arm64/kernel/entry-common.c | 11 +++ arch/arm64/kernel/fpsimd.c | 167 +++++++++++++++++++++++++++++++++---- arch/arm64/kernel/process.c | 30 ++++++- arch/arm64/kernel/syscall.c | 29 ++++++- 7 files changed, 255 insertions(+), 23 deletions(-) (limited to 'arch') diff --git a/arch/arm64/include/asm/esr.h b/arch/arm64/include/asm/esr.h index 43872e0cfd1e..0467837fd66b 100644 --- a/arch/arm64/include/asm/esr.h +++ b/arch/arm64/include/asm/esr.h @@ -76,6 +76,7 @@ #define ESR_ELx_IL_SHIFT (25) #define ESR_ELx_IL (UL(1) << ESR_ELx_IL_SHIFT) #define ESR_ELx_ISS_MASK (ESR_ELx_IL - 1) +#define ESR_ELx_ISS(esr) ((esr) & ESR_ELx_ISS_MASK) /* ISS field definitions shared by different classes */ #define ESR_ELx_WNR_SHIFT (6) diff --git a/arch/arm64/include/asm/exception.h b/arch/arm64/include/asm/exception.h index 339477dca551..2add7f33b7c2 100644 --- a/arch/arm64/include/asm/exception.h +++ b/arch/arm64/include/asm/exception.h @@ -64,6 +64,7 @@ void do_debug_exception(unsigned long addr_if_watchpoint, unsigned int esr, struct pt_regs *regs); void do_fpsimd_acc(unsigned int esr, struct pt_regs *regs); void do_sve_acc(unsigned int esr, struct pt_regs *regs); +void do_sme_acc(unsigned int esr, struct pt_regs *regs); void do_fpsimd_exc(unsigned int esr, struct pt_regs *regs); void do_sysinstr(unsigned int esr, struct pt_regs *regs); void do_sp_pc_abort(unsigned long addr, unsigned int esr, struct pt_regs *regs); diff --git a/arch/arm64/include/asm/fpsimd.h b/arch/arm64/include/asm/fpsimd.h index 1a709c03bb6c..6c33bc832ed4 100644 --- a/arch/arm64/include/asm/fpsimd.h +++ b/arch/arm64/include/asm/fpsimd.h @@ -239,6 +239,8 @@ static inline bool sve_vq_available(unsigned int vq) return vq_available(ARM64_VEC_SVE, vq); } +size_t sve_state_size(struct task_struct const *task); + #else /* ! CONFIG_ARM64_SVE */ static inline void sve_alloc(struct task_struct *task) { } @@ -278,10 +280,25 @@ static inline void vec_update_vq_map(enum vec_type t) { } static inline int vec_verify_vq_map(enum vec_type t) { return 0; } static inline void sve_setup(void) { } +static inline size_t sve_state_size(struct task_struct const *task) +{ + return 0; +} + #endif /* ! CONFIG_ARM64_SVE */ #ifdef CONFIG_ARM64_SME +static inline void sme_user_disable(void) +{ + sysreg_clear_set(cpacr_el1, CPACR_EL1_SMEN_EL0EN, 0); +} + +static inline void sme_user_enable(void) +{ + sysreg_clear_set(cpacr_el1, 0, CPACR_EL1_SMEN_EL0EN); +} + static inline void sme_smstart_sm(void) { asm volatile(__msr_s(SYS_SVCR_SMSTART_SM_EL0, "xzr")); @@ -309,16 +326,33 @@ static inline int sme_max_virtualisable_vl(void) return vec_max_virtualisable_vl(ARM64_VEC_SME); } +extern void sme_alloc(struct task_struct *task); extern unsigned int sme_get_vl(void); extern int sme_set_current_vl(unsigned long arg); extern int sme_get_current_vl(void); +/* + * Return how many bytes of memory are required to store the full SME + * specific state (currently just ZA) for task, given task's currently + * configured vector length. + */ +static inline size_t za_state_size(struct task_struct const *task) +{ + unsigned int vl = task_get_sme_vl(task); + + return ZA_SIG_REGS_SIZE(sve_vq_from_vl(vl)); +} + #else +static inline void sme_user_disable(void) { BUILD_BUG(); } +static inline void sme_user_enable(void) { BUILD_BUG(); } + static inline void sme_smstart_sm(void) { } static inline void sme_smstop_sm(void) { } static inline void sme_smstop(void) { } +static inline void sme_alloc(struct task_struct *task) { } static inline void sme_setup(void) { } static inline unsigned int sme_get_vl(void) { return 0; } static inline int sme_max_vl(void) { return 0; } @@ -326,6 +360,11 @@ static inline int sme_max_virtualisable_vl(void) { return 0; } static inline int sme_set_current_vl(unsigned long arg) { return -EINVAL; } static inline int sme_get_current_vl(void) { return -EINVAL; } +static inline size_t za_state_size(struct task_struct const *task) +{ + return 0; +} + #endif /* ! CONFIG_ARM64_SME */ /* For use by EFI runtime services calls only */ diff --git a/arch/arm64/kernel/entry-common.c b/arch/arm64/kernel/entry-common.c index 878c65aa7206..29139e9a1517 100644 --- a/arch/arm64/kernel/entry-common.c +++ b/arch/arm64/kernel/entry-common.c @@ -537,6 +537,14 @@ static void noinstr el0_sve_acc(struct pt_regs *regs, unsigned long esr) exit_to_user_mode(regs); } +static void noinstr el0_sme_acc(struct pt_regs *regs, unsigned long esr) +{ + enter_from_user_mode(regs); + local_daif_restore(DAIF_PROCCTX); + do_sme_acc(esr, regs); + exit_to_user_mode(regs); +} + static void noinstr el0_fpsimd_exc(struct pt_regs *regs, unsigned long esr) { enter_from_user_mode(regs); @@ -645,6 +653,9 @@ asmlinkage void noinstr el0t_64_sync_handler(struct pt_regs *regs) case ESR_ELx_EC_SVE: el0_sve_acc(regs, esr); break; + case ESR_ELx_EC_SME: + el0_sme_acc(regs, esr); + break; case ESR_ELx_EC_FP_EXC64: el0_fpsimd_exc(regs, esr); break; diff --git a/arch/arm64/kernel/fpsimd.c b/arch/arm64/kernel/fpsimd.c index dc38f3f2a28a..00a0cbd01ce5 100644 --- a/arch/arm64/kernel/fpsimd.c +++ b/arch/arm64/kernel/fpsimd.c @@ -209,6 +209,12 @@ static void set_sme_default_vl(int val) set_default_vl(ARM64_VEC_SME, val); } +static void sme_free(struct task_struct *); + +#else + +static inline void sme_free(struct task_struct *t) { } + #endif DEFINE_PER_CPU(bool, fpsimd_context_busy); @@ -676,7 +682,7 @@ static void sve_to_fpsimd(struct task_struct *task) * Return how many bytes of memory are required to store the full SVE * state for task, given task's currently configured vector length. */ -static size_t sve_state_size(struct task_struct const *task) +size_t sve_state_size(struct task_struct const *task) { unsigned int vl = 0; @@ -818,18 +824,22 @@ int vec_set_vector_length(struct task_struct *task, enum vec_type type, thread_sm_enabled(&task->thread)) sve_to_fpsimd(task); - if (system_supports_sme() && type == ARM64_VEC_SME) + if (system_supports_sme() && type == ARM64_VEC_SME) { task->thread.svcr &= ~(SYS_SVCR_EL0_SM_MASK | SYS_SVCR_EL0_ZA_MASK); + clear_thread_flag(TIF_SME); + } if (task == current) put_cpu_fpsimd_context(); /* - * Force reallocation of task SVE state to the correct size - * on next use: + * Force reallocation of task SVE and SME state to the correct + * size on next use: */ sve_free(task); + if (system_supports_sme() && type == ARM64_VEC_SME) + sme_free(task); task_set_vl(task, type, vl); @@ -1164,12 +1174,43 @@ void __init sve_setup(void) void fpsimd_release_task(struct task_struct *dead_task) { __sve_free(dead_task); + sme_free(dead_task); } #endif /* CONFIG_ARM64_SVE */ #ifdef CONFIG_ARM64_SME +/* This will move to uapi/asm/sigcontext.h when signals are implemented */ +#define ZA_SIG_REGS_SIZE(vq) ((vq * __SVE_VQ_BYTES) * (vq * __SVE_VQ_BYTES)) + +/* + * Ensure that task->thread.za_state is allocated and sufficiently large. + * + * This function should be used only in preparation for replacing + * task->thread.za_state with new data. The memory is always zeroed + * here to prevent stale data from showing through: this is done in + * the interest of testability and predictability, the architecture + * guarantees that when ZA is enabled it will be zeroed. + */ +void sme_alloc(struct task_struct *task) +{ + if (task->thread.za_state) { + memset(task->thread.za_state, 0, za_state_size(task)); + return; + } + + /* This could potentially be up to 64K. */ + task->thread.za_state = + kzalloc(za_state_size(task), GFP_KERNEL); +} + +static void sme_free(struct task_struct *task) +{ + kfree(task->thread.za_state); + task->thread.za_state = NULL; +} + void sme_kernel_enable(const struct arm64_cpu_capabilities *__always_unused p) { /* Set priority for all PEs to architecturally defined minimum */ @@ -1279,6 +1320,29 @@ void __init sme_setup(void) #endif /* CONFIG_ARM64_SME */ +static void sve_init_regs(void) +{ + /* + * Convert the FPSIMD state to SVE, zeroing all the state that + * is not shared with FPSIMD. If (as is likely) the current + * state is live in the registers then do this there and + * update our metadata for the current task including + * disabling the trap, otherwise update our in-memory copy. + * We are guaranteed to not be in streaming mode, we can only + * take a SVE trap when not in streaming mode and we can't be + * in streaming mode when taking a SME trap. + */ + if (!test_thread_flag(TIF_FOREIGN_FPSTATE)) { + unsigned long vq_minus_one = + sve_vq_from_vl(task_get_sve_vl(current)) - 1; + sve_set_vq(vq_minus_one); + sve_flush_live(true, vq_minus_one); + fpsimd_bind_task_to_cpu(); + } else { + fpsimd_to_sve(current); + } +} + /* * Trapped SVE access * @@ -1310,22 +1374,77 @@ void do_sve_acc(unsigned int esr, struct pt_regs *regs) WARN_ON(1); /* SVE access shouldn't have trapped */ /* - * Convert the FPSIMD state to SVE, zeroing all the state that - * is not shared with FPSIMD. If (as is likely) the current - * state is live in the registers then do this there and - * update our metadata for the current task including - * disabling the trap, otherwise update our in-memory copy. + * Even if the task can have used streaming mode we can only + * generate SVE access traps in normal SVE mode and + * transitioning out of streaming mode may discard any + * streaming mode state. Always clear the high bits to avoid + * any potential errors tracking what is properly initialised. + */ + sve_init_regs(); + + put_cpu_fpsimd_context(); +} + +/* + * Trapped SME access + * + * Storage is allocated for the full SVE and SME state, the current + * FPSIMD register contents are migrated to SVE if SVE is not already + * active, and the access trap is disabled. + * + * TIF_SME should be clear on entry: otherwise, fpsimd_restore_current_state() + * would have disabled the SME access trap for userspace during + * ret_to_user, making an SVE access trap impossible in that case. + */ +void do_sme_acc(unsigned int esr, struct pt_regs *regs) +{ + /* Even if we chose not to use SME, the hardware could still trap: */ + if (unlikely(!system_supports_sme()) || WARN_ON(is_compat_task())) { + force_signal_inject(SIGILL, ILL_ILLOPC, regs->pc, 0); + return; + } + + /* + * If this not a trap due to SME being disabled then something + * is being used in the wrong mode, report as SIGILL. */ + if (ESR_ELx_ISS(esr) != ESR_ELx_SME_ISS_SME_DISABLED) { + force_signal_inject(SIGILL, ILL_ILLOPC, regs->pc, 0); + return; + } + + sve_alloc(current); + sme_alloc(current); + if (!current->thread.sve_state || !current->thread.za_state) { + force_sig(SIGKILL); + return; + } + + get_cpu_fpsimd_context(); + + /* With TIF_SME userspace shouldn't generate any traps */ + if (test_and_set_thread_flag(TIF_SME)) + WARN_ON(1); + if (!test_thread_flag(TIF_FOREIGN_FPSTATE)) { unsigned long vq_minus_one = - sve_vq_from_vl(task_get_sve_vl(current)) - 1; - sve_set_vq(vq_minus_one); - sve_flush_live(true, vq_minus_one); + sve_vq_from_vl(task_get_sme_vl(current)) - 1; + sme_set_vq(vq_minus_one); + fpsimd_bind_task_to_cpu(); - } else { - fpsimd_to_sve(current); } + /* + * If SVE was not already active initialise the SVE registers, + * any non-shared state between the streaming and regular SVE + * registers is architecturally guaranteed to be zeroed when + * we enter streaming mode. We do not need to initialize ZA + * since ZA must be disabled at this point and enabling ZA is + * architecturally defined to zero ZA. + */ + if (system_supports_sve() && !test_thread_flag(TIF_SVE)) + sve_init_regs(); + put_cpu_fpsimd_context(); } @@ -1442,8 +1561,12 @@ void fpsimd_flush_thread(void) fpsimd_flush_thread_vl(ARM64_VEC_SVE); } - if (system_supports_sme()) + if (system_supports_sme()) { + clear_thread_flag(TIF_SME); + sme_free(current); fpsimd_flush_thread_vl(ARM64_VEC_SME); + current->thread.svcr = 0; + } put_cpu_fpsimd_context(); } @@ -1493,14 +1616,22 @@ static void fpsimd_bind_task_to_cpu(void) last->svcr = ¤t->thread.svcr; current->thread.fpsimd_cpu = smp_processor_id(); + /* + * Toggle SVE and SME trapping for userspace if needed, these + * are serialsied by ret_to_user(). + */ + if (system_supports_sme()) { + if (test_thread_flag(TIF_SME)) + sme_user_enable(); + else + sme_user_disable(); + } + if (system_supports_sve()) { - /* Toggle SVE trapping for userspace if needed */ if (test_thread_flag(TIF_SVE)) sve_user_enable(); else sve_user_disable(); - - /* Serialised by exception return to user */ } } diff --git a/arch/arm64/kernel/process.c b/arch/arm64/kernel/process.c index 07f235b46cf5..99c293513817 100644 --- a/arch/arm64/kernel/process.c +++ b/arch/arm64/kernel/process.c @@ -300,17 +300,41 @@ int arch_dup_task_struct(struct task_struct *dst, struct task_struct *src) /* * Detach src's sve_state (if any) from dst so that it does not - * get erroneously used or freed prematurely. dst's sve_state + * get erroneously used or freed prematurely. dst's copies * will be allocated on demand later on if dst uses SVE. * For consistency, also clear TIF_SVE here: this could be done * later in copy_process(), but to avoid tripping up future - * maintainers it is best not to leave TIF_SVE and sve_state in + * maintainers it is best not to leave TIF flags and buffers in * an inconsistent state, even temporarily. */ dst->thread.sve_state = NULL; clear_tsk_thread_flag(dst, TIF_SVE); - dst->thread.svcr = 0; + /* + * In the unlikely event that we create a new thread with ZA + * enabled we should retain the ZA state so duplicate it here. + * This may be shortly freed if we exec() or if CLONE_SETTLS + * but it's simpler to do it here. To avoid confusing the rest + * of the code ensure that we have a sve_state allocated + * whenever za_state is allocated. + */ + if (thread_za_enabled(&src->thread)) { + dst->thread.sve_state = kzalloc(sve_state_size(src), + GFP_KERNEL); + if (!dst->thread.za_state) + return -ENOMEM; + dst->thread.za_state = kmemdup(src->thread.za_state, + za_state_size(src), + GFP_KERNEL); + if (!dst->thread.za_state) { + kfree(dst->thread.sve_state); + dst->thread.sve_state = NULL; + return -ENOMEM; + } + } else { + dst->thread.za_state = NULL; + clear_tsk_thread_flag(dst, TIF_SME); + } /* clear any pending asynchronous tag fault raised by the parent */ clear_tsk_thread_flag(dst, TIF_MTE_ASYNC_FAULT); diff --git a/arch/arm64/kernel/syscall.c b/arch/arm64/kernel/syscall.c index c938603b3ba0..92c69e5ac269 100644 --- a/arch/arm64/kernel/syscall.c +++ b/arch/arm64/kernel/syscall.c @@ -158,11 +158,36 @@ trace_exit: syscall_trace_exit(regs); } -static inline void sve_user_discard(void) +/* + * As per the ABI exit SME streaming mode and clear the SVE state not + * shared with FPSIMD on syscall entry. + */ +static inline void fp_user_discard(void) { + /* + * If SME is active then exit streaming mode. If ZA is active + * then flush the SVE registers but leave userspace access to + * both SVE and SME enabled, otherwise disable SME for the + * task and fall through to disabling SVE too. This means + * that after a syscall we never have any streaming mode + * register state to track, if this changes the KVM code will + * need updating. + */ + if (system_supports_sme() && test_thread_flag(TIF_SME)) { + u64 svcr = read_sysreg_s(SYS_SVCR_EL0); + + if (svcr & SYS_SVCR_EL0_SM_MASK) + sme_smstop_sm(); + } + if (!system_supports_sve()) return; + /* + * If SME is not active then disable SVE, the registers will + * be cleared when userspace next attempts to access them and + * we do not need to track the SVE register state until then. + */ clear_thread_flag(TIF_SVE); /* @@ -177,7 +202,7 @@ static inline void sve_user_discard(void) void do_el0_svc(struct pt_regs *regs) { - sve_user_discard(); + fp_user_discard(); el0_svc_common(regs, regs->regs[8], __NR_syscalls, sys_call_table); } -- cgit v1.2.3 From 40a8e87bb32855b39839d35b5b5b125494b3a604 Mon Sep 17 00:00:00 2001 From: Mark Brown Date: Tue, 19 Apr 2022 12:22:25 +0100 Subject: arm64/sme: Disable ZA and streaming mode when handling signals The ABI requires that streaming mode and ZA are disabled when invoking signal handlers, do this in setup_return() when we prepare the task state for the signal handler. Signed-off-by: Mark Brown Reviewed-by: Catalin Marinas Link: https://lore.kernel.org/r/20220419112247.711548-18-broonie@kernel.org Signed-off-by: Catalin Marinas --- arch/arm64/kernel/signal.c | 7 +++++++ 1 file changed, 7 insertions(+) (limited to 'arch') diff --git a/arch/arm64/kernel/signal.c b/arch/arm64/kernel/signal.c index 4a4122ef6f39..42efa464e46e 100644 --- a/arch/arm64/kernel/signal.c +++ b/arch/arm64/kernel/signal.c @@ -759,6 +759,13 @@ static void setup_return(struct pt_regs *regs, struct k_sigaction *ka, /* TCO (Tag Check Override) always cleared for signal handlers */ regs->pstate &= ~PSR_TCO_BIT; + /* Signal handlers are invoked with ZA and streaming mode disabled */ + if (system_supports_sme()) { + current->thread.svcr &= ~(SYS_SVCR_EL0_ZA_MASK | + SYS_SVCR_EL0_SM_MASK); + sme_smstop(); + } + if (ka->sa.sa_flags & SA_RESTORER) sigtramp = ka->sa.sa_restorer; else -- cgit v1.2.3 From 85ed24dad2904f7c141911d91b7807ab02694b5e Mon Sep 17 00:00:00 2001 From: Mark Brown Date: Tue, 19 Apr 2022 12:22:26 +0100 Subject: arm64/sme: Implement streaming SVE signal handling When in streaming mode we have the same set of SVE registers as we do in regular SVE mode with the exception of FFR and the use of the SME vector length. Provide signal handling for these registers by taking one of the reserved words in the SVE signal context as a flags field and defining a flag which is set for streaming mode. When the flag is set the vector length is set to the streaming mode vector length and we save and restore streaming mode data. We support entering or leaving streaming mode based on the value of the flag but do not support changing the vector length, this is not currently supported SVE signal handling. We could instead allocate a separate record in the signal frame for the streaming mode SVE context but this inflates the size of the maximal signal frame required and adds complication when validating signal frames from userspace, especially given the current structure of the code. Any implementation of support for streaming mode vectors in signals will have some potential for causing issues for applications that attempt to handle SVE vectors in signals, use streaming mode but do not understand streaming mode in their signal handling code, it is hard to identify a case that is clearly better than any other - they all have cases where they could cause unexpected register corruption or faults. Signed-off-by: Mark Brown Reviewed-by: Catalin Marinas Link: https://lore.kernel.org/r/20220419112247.711548-19-broonie@kernel.org Signed-off-by: Catalin Marinas --- arch/arm64/include/asm/processor.h | 8 ++++++ arch/arm64/include/uapi/asm/sigcontext.h | 16 +++++++++--- arch/arm64/kernel/signal.c | 42 ++++++++++++++++++++++++-------- 3 files changed, 53 insertions(+), 13 deletions(-) (limited to 'arch') diff --git a/arch/arm64/include/asm/processor.h b/arch/arm64/include/asm/processor.h index 6a3a6c3dec90..1d2ca4870b84 100644 --- a/arch/arm64/include/asm/processor.h +++ b/arch/arm64/include/asm/processor.h @@ -190,6 +190,14 @@ static inline unsigned int thread_get_sme_vl(struct thread_struct *thread) return thread_get_vl(thread, ARM64_VEC_SME); } +static inline unsigned int thread_get_cur_vl(struct thread_struct *thread) +{ + if (system_supports_sme() && (thread->svcr & SYS_SVCR_EL0_SM_MASK)) + return thread_get_sme_vl(thread); + else + return thread_get_sve_vl(thread); +} + unsigned int task_get_vl(const struct task_struct *task, enum vec_type type); void task_set_vl(struct task_struct *task, enum vec_type type, unsigned long vl); diff --git a/arch/arm64/include/uapi/asm/sigcontext.h b/arch/arm64/include/uapi/asm/sigcontext.h index 0c796c795dbe..57e9f8c3ee9e 100644 --- a/arch/arm64/include/uapi/asm/sigcontext.h +++ b/arch/arm64/include/uapi/asm/sigcontext.h @@ -134,9 +134,12 @@ struct extra_context { struct sve_context { struct _aarch64_ctx head; __u16 vl; - __u16 __reserved[3]; + __u16 flags; + __u16 __reserved[2]; }; +#define SVE_SIG_FLAG_SM 0x1 /* Context describes streaming mode */ + #endif /* !__ASSEMBLY__ */ #include @@ -186,9 +189,16 @@ struct sve_context { * sve_context.vl must equal the thread's current vector length when * doing a sigreturn. * + * On systems with support for SME the SVE register state may reflect either + * streaming or non-streaming mode. In streaming mode the streaming mode + * vector length will be used and the flag SVE_SIG_FLAG_SM will be set in + * the flags field. It is permitted to enter or leave streaming mode in + * a signal return, applications should take care to ensure that any difference + * in vector length between the two modes is handled, including any resizing + * and movement of context blocks. * - * Note: for all these macros, the "vq" argument denotes the SVE - * vector length in quadwords (i.e., units of 128 bits). + * Note: for all these macros, the "vq" argument denotes the vector length + * in quadwords (i.e., units of 128 bits). * * The correct way to obtain vq is to use sve_vq_from_vl(vl). The * result is valid if and only if sve_vl_valid(vl) is true. This is diff --git a/arch/arm64/kernel/signal.c b/arch/arm64/kernel/signal.c index 42efa464e46e..0ddce6afd2a3 100644 --- a/arch/arm64/kernel/signal.c +++ b/arch/arm64/kernel/signal.c @@ -226,11 +226,17 @@ static int preserve_sve_context(struct sve_context __user *ctx) { int err = 0; u16 reserved[ARRAY_SIZE(ctx->__reserved)]; + u16 flags = 0; unsigned int vl = task_get_sve_vl(current); unsigned int vq = 0; - if (test_thread_flag(TIF_SVE)) + if (thread_sm_enabled(¤t->thread)) { + vl = task_get_sme_vl(current); vq = sve_vq_from_vl(vl); + flags |= SVE_SIG_FLAG_SM; + } else if (test_thread_flag(TIF_SVE)) { + vq = sve_vq_from_vl(vl); + } memset(reserved, 0, sizeof(reserved)); @@ -238,6 +244,7 @@ static int preserve_sve_context(struct sve_context __user *ctx) __put_user_error(round_up(SVE_SIG_CONTEXT_SIZE(vq), 16), &ctx->head.size, err); __put_user_error(vl, &ctx->vl, err); + __put_user_error(flags, &ctx->flags, err); BUILD_BUG_ON(sizeof(ctx->__reserved) != sizeof(reserved)); err |= __copy_to_user(&ctx->__reserved, reserved, sizeof(reserved)); @@ -258,18 +265,28 @@ static int preserve_sve_context(struct sve_context __user *ctx) static int restore_sve_fpsimd_context(struct user_ctxs *user) { int err; - unsigned int vq; + unsigned int vl, vq; struct user_fpsimd_state fpsimd; struct sve_context sve; if (__copy_from_user(&sve, user->sve, sizeof(sve))) return -EFAULT; - if (sve.vl != task_get_sve_vl(current)) + if (sve.flags & SVE_SIG_FLAG_SM) { + if (!system_supports_sme()) + return -EINVAL; + + vl = task_get_sme_vl(current); + } else { + vl = task_get_sve_vl(current); + } + + if (sve.vl != vl) return -EINVAL; if (sve.head.size <= sizeof(*user->sve)) { clear_thread_flag(TIF_SVE); + current->thread.svcr &= ~SYS_SVCR_EL0_SM_MASK; goto fpsimd_only; } @@ -301,7 +318,10 @@ static int restore_sve_fpsimd_context(struct user_ctxs *user) if (err) return -EFAULT; - set_thread_flag(TIF_SVE); + if (sve.flags & SVE_SIG_FLAG_SM) + current->thread.svcr |= SYS_SVCR_EL0_SM_MASK; + else + set_thread_flag(TIF_SVE); fpsimd_only: /* copy the FP and status/control registers */ @@ -393,7 +413,7 @@ static int parse_user_sigframe(struct user_ctxs *user, break; case SVE_MAGIC: - if (!system_supports_sve()) + if (!system_supports_sve() && !system_supports_sme()) goto invalid; if (user->sve) @@ -594,11 +614,12 @@ static int setup_sigframe_layout(struct rt_sigframe_user_layout *user, if (system_supports_sve()) { unsigned int vq = 0; - if (add_all || test_thread_flag(TIF_SVE)) { - int vl = sve_max_vl(); + if (add_all || test_thread_flag(TIF_SVE) || + thread_sm_enabled(¤t->thread)) { + int vl = max(sve_max_vl(), sme_max_vl()); if (!add_all) - vl = task_get_sve_vl(current); + vl = thread_get_cur_vl(¤t->thread); vq = sve_vq_from_vl(vl); } @@ -649,8 +670,9 @@ static int setup_sigframe(struct rt_sigframe_user_layout *user, __put_user_error(current->thread.fault_code, &esr_ctx->esr, err); } - /* Scalable Vector Extension state, if present */ - if (system_supports_sve() && err == 0 && user->sve_offset) { + /* Scalable Vector Extension state (including streaming), if present */ + if ((system_supports_sve() || system_supports_sme()) && + err == 0 && user->sve_offset) { struct sve_context __user *sve_ctx = apply_user_offset(user, user->sve_offset); err |= preserve_sve_context(sve_ctx); -- cgit v1.2.3 From 39782210eb7e87634d96cacb6ece370bc59d74ba Mon Sep 17 00:00:00 2001 From: Mark Brown Date: Tue, 19 Apr 2022 12:22:27 +0100 Subject: arm64/sme: Implement ZA signal handling Implement support for ZA in signal handling in a very similar way to how we implement support for SVE registers, using a signal context structure with optional register state after it. Where present this register state stores the ZA matrix as a series of horizontal vectors numbered from 0 to VL/8 in the endinanness independent format used for vectors. As with SVE we do not allow changes in the vector length during signal return but we do allow ZA to be enabled or disabled. Signed-off-by: Mark Brown Reviewed-by: Catalin Marinas Link: https://lore.kernel.org/r/20220419112247.711548-20-broonie@kernel.org Signed-off-by: Catalin Marinas --- arch/arm64/include/uapi/asm/sigcontext.h | 41 +++++++++ arch/arm64/kernel/fpsimd.c | 3 - arch/arm64/kernel/signal.c | 139 +++++++++++++++++++++++++++++++ 3 files changed, 180 insertions(+), 3 deletions(-) (limited to 'arch') diff --git a/arch/arm64/include/uapi/asm/sigcontext.h b/arch/arm64/include/uapi/asm/sigcontext.h index 57e9f8c3ee9e..4aaf31e3bf16 100644 --- a/arch/arm64/include/uapi/asm/sigcontext.h +++ b/arch/arm64/include/uapi/asm/sigcontext.h @@ -140,6 +140,14 @@ struct sve_context { #define SVE_SIG_FLAG_SM 0x1 /* Context describes streaming mode */ +#define ZA_MAGIC 0x54366345 + +struct za_context { + struct _aarch64_ctx head; + __u16 vl; + __u16 __reserved[3]; +}; + #endif /* !__ASSEMBLY__ */ #include @@ -259,4 +267,37 @@ struct sve_context { #define SVE_SIG_CONTEXT_SIZE(vq) \ (SVE_SIG_REGS_OFFSET + SVE_SIG_REGS_SIZE(vq)) +/* + * If the ZA register is enabled for the thread at signal delivery then, + * za_context.head.size >= ZA_SIG_CONTEXT_SIZE(sve_vq_from_vl(za_context.vl)) + * and the register data may be accessed using the ZA_SIG_*() macros. + * + * If za_context.head.size < ZA_SIG_CONTEXT_SIZE(sve_vq_from_vl(za_context.vl)) + * then ZA was not enabled and no register data was included in which case + * ZA register was not enabled for the thread and no register data + * the ZA_SIG_*() macros should not be used except for this check. + * + * The same convention applies when returning from a signal: a caller + * will need to remove or resize the za_context block if it wants to + * enable the ZA register when it was previously non-live or vice-versa. + * This may require the caller to allocate fresh memory and/or move other + * context blocks in the signal frame. + * + * Changing the vector length during signal return is not permitted: + * za_context.vl must equal the thread's current SME vector length when + * doing a sigreturn. + */ + +#define ZA_SIG_REGS_OFFSET \ + ((sizeof(struct za_context) + (__SVE_VQ_BYTES - 1)) \ + / __SVE_VQ_BYTES * __SVE_VQ_BYTES) + +#define ZA_SIG_REGS_SIZE(vq) ((vq * __SVE_VQ_BYTES) * (vq * __SVE_VQ_BYTES)) + +#define ZA_SIG_ZAV_OFFSET(vq, n) (ZA_SIG_REGS_OFFSET + \ + (SVE_SIG_ZREG_SIZE(vq) * n)) + +#define ZA_SIG_CONTEXT_SIZE(vq) \ + (ZA_SIG_REGS_OFFSET + ZA_SIG_REGS_SIZE(vq)) + #endif /* _UAPI__ASM_SIGCONTEXT_H */ diff --git a/arch/arm64/kernel/fpsimd.c b/arch/arm64/kernel/fpsimd.c index 00a0cbd01ce5..80f7ca12f855 100644 --- a/arch/arm64/kernel/fpsimd.c +++ b/arch/arm64/kernel/fpsimd.c @@ -1181,9 +1181,6 @@ void fpsimd_release_task(struct task_struct *dead_task) #ifdef CONFIG_ARM64_SME -/* This will move to uapi/asm/sigcontext.h when signals are implemented */ -#define ZA_SIG_REGS_SIZE(vq) ((vq * __SVE_VQ_BYTES) * (vq * __SVE_VQ_BYTES)) - /* * Ensure that task->thread.za_state is allocated and sufficiently large. * diff --git a/arch/arm64/kernel/signal.c b/arch/arm64/kernel/signal.c index 0ddce6afd2a3..2295948d97fd 100644 --- a/arch/arm64/kernel/signal.c +++ b/arch/arm64/kernel/signal.c @@ -56,6 +56,7 @@ struct rt_sigframe_user_layout { unsigned long fpsimd_offset; unsigned long esr_offset; unsigned long sve_offset; + unsigned long za_offset; unsigned long extra_offset; unsigned long end_offset; }; @@ -218,6 +219,7 @@ static int restore_fpsimd_context(struct fpsimd_context __user *ctx) struct user_ctxs { struct fpsimd_context __user *fpsimd; struct sve_context __user *sve; + struct za_context __user *za; }; #ifdef CONFIG_ARM64_SVE @@ -346,6 +348,101 @@ extern int restore_sve_fpsimd_context(struct user_ctxs *user); #endif /* ! CONFIG_ARM64_SVE */ +#ifdef CONFIG_ARM64_SME + +static int preserve_za_context(struct za_context __user *ctx) +{ + int err = 0; + u16 reserved[ARRAY_SIZE(ctx->__reserved)]; + unsigned int vl = task_get_sme_vl(current); + unsigned int vq; + + if (thread_za_enabled(¤t->thread)) + vq = sve_vq_from_vl(vl); + else + vq = 0; + + memset(reserved, 0, sizeof(reserved)); + + __put_user_error(ZA_MAGIC, &ctx->head.magic, err); + __put_user_error(round_up(ZA_SIG_CONTEXT_SIZE(vq), 16), + &ctx->head.size, err); + __put_user_error(vl, &ctx->vl, err); + BUILD_BUG_ON(sizeof(ctx->__reserved) != sizeof(reserved)); + err |= __copy_to_user(&ctx->__reserved, reserved, sizeof(reserved)); + + if (vq) { + /* + * This assumes that the ZA state has already been saved to + * the task struct by calling the function + * fpsimd_signal_preserve_current_state(). + */ + err |= __copy_to_user((char __user *)ctx + ZA_SIG_REGS_OFFSET, + current->thread.za_state, + ZA_SIG_REGS_SIZE(vq)); + } + + return err ? -EFAULT : 0; +} + +static int restore_za_context(struct user_ctxs __user *user) +{ + int err; + unsigned int vq; + struct za_context za; + + if (__copy_from_user(&za, user->za, sizeof(za))) + return -EFAULT; + + if (za.vl != task_get_sme_vl(current)) + return -EINVAL; + + if (za.head.size <= sizeof(*user->za)) { + current->thread.svcr &= ~SYS_SVCR_EL0_ZA_MASK; + return 0; + } + + vq = sve_vq_from_vl(za.vl); + + if (za.head.size < ZA_SIG_CONTEXT_SIZE(vq)) + return -EINVAL; + + /* + * Careful: we are about __copy_from_user() directly into + * thread.za_state with preemption enabled, so protection is + * needed to prevent a racing context switch from writing stale + * registers back over the new data. + */ + + fpsimd_flush_task_state(current); + /* From now, fpsimd_thread_switch() won't touch thread.sve_state */ + + sme_alloc(current); + if (!current->thread.za_state) { + current->thread.svcr &= ~SYS_SVCR_EL0_ZA_MASK; + clear_thread_flag(TIF_SME); + return -ENOMEM; + } + + err = __copy_from_user(current->thread.za_state, + (char __user const *)user->za + + ZA_SIG_REGS_OFFSET, + ZA_SIG_REGS_SIZE(vq)); + if (err) + return -EFAULT; + + set_thread_flag(TIF_SME); + current->thread.svcr |= SYS_SVCR_EL0_ZA_MASK; + + return 0; +} +#else /* ! CONFIG_ARM64_SME */ + +/* Turn any non-optimised out attempts to use these into a link error: */ +extern int preserve_za_context(void __user *ctx); +extern int restore_za_context(struct user_ctxs *user); + +#endif /* ! CONFIG_ARM64_SME */ static int parse_user_sigframe(struct user_ctxs *user, struct rt_sigframe __user *sf) @@ -360,6 +457,7 @@ static int parse_user_sigframe(struct user_ctxs *user, user->fpsimd = NULL; user->sve = NULL; + user->za = NULL; if (!IS_ALIGNED((unsigned long)base, 16)) goto invalid; @@ -425,6 +523,19 @@ static int parse_user_sigframe(struct user_ctxs *user, user->sve = (struct sve_context __user *)head; break; + case ZA_MAGIC: + if (!system_supports_sme()) + goto invalid; + + if (user->za) + goto invalid; + + if (size < sizeof(*user->za)) + goto invalid; + + user->za = (struct za_context __user *)head; + break; + case EXTRA_MAGIC: if (have_extra_context) goto invalid; @@ -548,6 +659,9 @@ static int restore_sigframe(struct pt_regs *regs, } } + if (err == 0 && system_supports_sme() && user.za) + err = restore_za_context(&user); + return err; } @@ -630,6 +744,24 @@ static int setup_sigframe_layout(struct rt_sigframe_user_layout *user, return err; } + if (system_supports_sme()) { + unsigned int vl; + unsigned int vq = 0; + + if (add_all) + vl = sme_max_vl(); + else + vl = task_get_sme_vl(current); + + if (thread_za_enabled(¤t->thread)) + vq = sve_vq_from_vl(vl); + + err = sigframe_alloc(user, &user->za_offset, + ZA_SIG_CONTEXT_SIZE(vq)); + if (err) + return err; + } + return sigframe_alloc_end(user); } @@ -678,6 +810,13 @@ static int setup_sigframe(struct rt_sigframe_user_layout *user, err |= preserve_sve_context(sve_ctx); } + /* ZA state if present */ + if (system_supports_sme() && err == 0 && user->za_offset) { + struct za_context __user *za_ctx = + apply_user_offset(user, user->za_offset); + err |= preserve_za_context(za_ctx); + } + if (err == 0 && user->extra_offset) { char __user *sfp = (char __user *)user->sigframe; char __user *userp = -- cgit v1.2.3 From e12310a0d30f260b26297bc8d7c95769489af038 Mon Sep 17 00:00:00 2001 From: Mark Brown Date: Tue, 19 Apr 2022 12:22:28 +0100 Subject: arm64/sme: Implement ptrace support for streaming mode SVE registers The streaming mode SVE registers are represented using the same data structures as for SVE but since the vector lengths supported and in use may not be the same as SVE we represent them with a new type NT_ARM_SSVE. Unfortunately we only have a single 16 bit reserved field available in the header so there is no space to fit the current and maximum vector length for both standard and streaming SVE mode without redefining the structure in a way the creates a complicatd and fragile ABI. Since FFR is not present in streaming mode it is read and written as zero. Setting NT_ARM_SSVE registers will put the task into streaming mode, similarly setting NT_ARM_SVE registers will exit it. Reads that do not correspond to the current mode of the task will return the header with no register data. For compatibility reasons on write setting no flag for the register type will be interpreted as setting SVE registers, though users can provide no register data as an alternative mechanism for doing so. Signed-off-by: Mark Brown Reviewed-by: Catalin Marinas Link: https://lore.kernel.org/r/20220419112247.711548-21-broonie@kernel.org Signed-off-by: Catalin Marinas --- arch/arm64/include/asm/fpsimd.h | 1 + arch/arm64/include/uapi/asm/ptrace.h | 13 ++- arch/arm64/kernel/fpsimd.c | 31 +++-- arch/arm64/kernel/ptrace.c | 214 +++++++++++++++++++++++++++-------- include/uapi/linux/elf.h | 1 + 5 files changed, 201 insertions(+), 59 deletions(-) (limited to 'arch') diff --git a/arch/arm64/include/asm/fpsimd.h b/arch/arm64/include/asm/fpsimd.h index 6c33bc832ed4..5afcd0709aae 100644 --- a/arch/arm64/include/asm/fpsimd.h +++ b/arch/arm64/include/asm/fpsimd.h @@ -144,6 +144,7 @@ struct vl_info { extern void sve_alloc(struct task_struct *task); extern void fpsimd_release_task(struct task_struct *task); extern void fpsimd_sync_to_sve(struct task_struct *task); +extern void fpsimd_force_sync_to_sve(struct task_struct *task); extern void sve_sync_to_fpsimd(struct task_struct *task); extern void sve_sync_from_fpsimd_zeropad(struct task_struct *task); diff --git a/arch/arm64/include/uapi/asm/ptrace.h b/arch/arm64/include/uapi/asm/ptrace.h index 758ae984ff97..522b925a78c1 100644 --- a/arch/arm64/include/uapi/asm/ptrace.h +++ b/arch/arm64/include/uapi/asm/ptrace.h @@ -109,7 +109,7 @@ struct user_hwdebug_state { } dbg_regs[16]; }; -/* SVE/FP/SIMD state (NT_ARM_SVE) */ +/* SVE/FP/SIMD state (NT_ARM_SVE & NT_ARM_SSVE) */ struct user_sve_header { __u32 size; /* total meaningful regset content in bytes */ @@ -220,6 +220,7 @@ struct user_sve_header { (SVE_PT_SVE_PREG_OFFSET(vq, __SVE_NUM_PREGS) - \ SVE_PT_SVE_PREGS_OFFSET(vq)) +/* For streaming mode SVE (SSVE) FFR must be read and written as zero */ #define SVE_PT_SVE_FFR_OFFSET(vq) \ (SVE_PT_REGS_OFFSET + __SVE_FFR_OFFSET(vq)) @@ -240,10 +241,12 @@ struct user_sve_header { - SVE_PT_SVE_OFFSET + (__SVE_VQ_BYTES - 1)) \ / __SVE_VQ_BYTES * __SVE_VQ_BYTES) -#define SVE_PT_SIZE(vq, flags) \ - (((flags) & SVE_PT_REGS_MASK) == SVE_PT_REGS_SVE ? \ - SVE_PT_SVE_OFFSET + SVE_PT_SVE_SIZE(vq, flags) \ - : SVE_PT_FPSIMD_OFFSET + SVE_PT_FPSIMD_SIZE(vq, flags)) +#define SVE_PT_SIZE(vq, flags) \ + (((flags) & SVE_PT_REGS_MASK) == SVE_PT_REGS_SVE ? \ + SVE_PT_SVE_OFFSET + SVE_PT_SVE_SIZE(vq, flags) \ + : ((((flags) & SVE_PT_REGS_MASK) == SVE_PT_REGS_FPSIMD ? \ + SVE_PT_FPSIMD_OFFSET + SVE_PT_FPSIMD_SIZE(vq, flags) \ + : SVE_PT_REGS_OFFSET))) /* pointer authentication masks (NT_ARM_PAC_MASK) */ diff --git a/arch/arm64/kernel/fpsimd.c b/arch/arm64/kernel/fpsimd.c index 80f7ca12f855..94f06e9d37cf 100644 --- a/arch/arm64/kernel/fpsimd.c +++ b/arch/arm64/kernel/fpsimd.c @@ -643,7 +643,7 @@ static void fpsimd_to_sve(struct task_struct *task) if (!system_supports_sve()) return; - vq = sve_vq_from_vl(task_get_sve_vl(task)); + vq = sve_vq_from_vl(thread_get_cur_vl(&task->thread)); __fpsimd_to_sve(sst, fst, vq); } @@ -660,7 +660,7 @@ static void fpsimd_to_sve(struct task_struct *task) */ static void sve_to_fpsimd(struct task_struct *task) { - unsigned int vq; + unsigned int vq, vl; void const *sst = task->thread.sve_state; struct user_fpsimd_state *fst = &task->thread.uw.fpsimd_state; unsigned int i; @@ -669,7 +669,8 @@ static void sve_to_fpsimd(struct task_struct *task) if (!system_supports_sve()) return; - vq = sve_vq_from_vl(task_get_sve_vl(task)); + vl = thread_get_cur_vl(&task->thread); + vq = sve_vq_from_vl(vl); for (i = 0; i < SVE_NUM_ZREGS; ++i) { p = (__uint128_t const *)ZREG(sst, vq, i); fst->vregs[i] = arm64_le128_to_cpu(*p); @@ -717,6 +718,19 @@ void sve_alloc(struct task_struct *task) } +/* + * Force the FPSIMD state shared with SVE to be updated in the SVE state + * even if the SVE state is the current active state. + * + * This should only be called by ptrace. task must be non-runnable. + * task->thread.sve_state must point to at least sve_state_size(task) + * bytes of allocated kernel memory. + */ +void fpsimd_force_sync_to_sve(struct task_struct *task) +{ + fpsimd_to_sve(task); +} + /* * Ensure that task->thread.sve_state is up to date with respect to * the user task, irrespective of when SVE is in use or not. @@ -727,7 +741,8 @@ void sve_alloc(struct task_struct *task) */ void fpsimd_sync_to_sve(struct task_struct *task) { - if (!test_tsk_thread_flag(task, TIF_SVE)) + if (!test_tsk_thread_flag(task, TIF_SVE) && + !thread_sm_enabled(&task->thread)) fpsimd_to_sve(task); } @@ -741,7 +756,8 @@ void fpsimd_sync_to_sve(struct task_struct *task) */ void sve_sync_to_fpsimd(struct task_struct *task) { - if (test_tsk_thread_flag(task, TIF_SVE)) + if (test_tsk_thread_flag(task, TIF_SVE) || + thread_sm_enabled(&task->thread)) sve_to_fpsimd(task); } @@ -766,7 +782,7 @@ void sve_sync_from_fpsimd_zeropad(struct task_struct *task) if (!test_tsk_thread_flag(task, TIF_SVE)) return; - vq = sve_vq_from_vl(task_get_sve_vl(task)); + vq = sve_vq_from_vl(thread_get_cur_vl(&task->thread)); memset(sst, 0, SVE_SIG_REGS_SIZE(vq)); __fpsimd_to_sve(sst, fst, vq); @@ -810,8 +826,7 @@ int vec_set_vector_length(struct task_struct *task, enum vec_type type, /* * To ensure the FPSIMD bits of the SVE vector registers are preserved, * write any live register state back to task_struct, and convert to a - * regular FPSIMD thread. Since the vector length can only be changed - * with a syscall we can't be in streaming mode while reconfiguring. + * regular FPSIMD thread. */ if (task == current) { get_cpu_fpsimd_context(); diff --git a/arch/arm64/kernel/ptrace.c b/arch/arm64/kernel/ptrace.c index 230a47b9189e..60185c27b394 100644 --- a/arch/arm64/kernel/ptrace.c +++ b/arch/arm64/kernel/ptrace.c @@ -713,21 +713,51 @@ static int system_call_set(struct task_struct *target, #ifdef CONFIG_ARM64_SVE static void sve_init_header_from_task(struct user_sve_header *header, - struct task_struct *target) + struct task_struct *target, + enum vec_type type) { unsigned int vq; + bool active; + bool fpsimd_only; + enum vec_type task_type; memset(header, 0, sizeof(*header)); - header->flags = test_tsk_thread_flag(target, TIF_SVE) ? - SVE_PT_REGS_SVE : SVE_PT_REGS_FPSIMD; - if (test_tsk_thread_flag(target, TIF_SVE_VL_INHERIT)) - header->flags |= SVE_PT_VL_INHERIT; + /* Check if the requested registers are active for the task */ + if (thread_sm_enabled(&target->thread)) + task_type = ARM64_VEC_SME; + else + task_type = ARM64_VEC_SVE; + active = (task_type == type); + + switch (type) { + case ARM64_VEC_SVE: + if (test_tsk_thread_flag(target, TIF_SVE_VL_INHERIT)) + header->flags |= SVE_PT_VL_INHERIT; + fpsimd_only = !test_tsk_thread_flag(target, TIF_SVE); + break; + case ARM64_VEC_SME: + if (test_tsk_thread_flag(target, TIF_SME_VL_INHERIT)) + header->flags |= SVE_PT_VL_INHERIT; + fpsimd_only = false; + break; + default: + WARN_ON_ONCE(1); + return; + } - header->vl = task_get_sve_vl(target); + if (active) { + if (fpsimd_only) { + header->flags |= SVE_PT_REGS_FPSIMD; + } else { + header->flags |= SVE_PT_REGS_SVE; + } + } + + header->vl = task_get_vl(target, type); vq = sve_vq_from_vl(header->vl); - header->max_vl = sve_max_vl(); + header->max_vl = vec_max_vl(type); header->size = SVE_PT_SIZE(vq, header->flags); header->max_size = SVE_PT_SIZE(sve_vq_from_vl(header->max_vl), SVE_PT_REGS_SVE); @@ -738,19 +768,17 @@ static unsigned int sve_size_from_header(struct user_sve_header const *header) return ALIGN(header->size, SVE_VQ_BYTES); } -static int sve_get(struct task_struct *target, - const struct user_regset *regset, - struct membuf to) +static int sve_get_common(struct task_struct *target, + const struct user_regset *regset, + struct membuf to, + enum vec_type type) { struct user_sve_header header; unsigned int vq; unsigned long start, end; - if (!system_supports_sve()) - return -EINVAL; - /* Header */ - sve_init_header_from_task(&header, target); + sve_init_header_from_task(&header, target, type); vq = sve_vq_from_vl(header.vl); membuf_write(&to, &header, sizeof(header)); @@ -758,49 +786,61 @@ static int sve_get(struct task_struct *target, if (target == current) fpsimd_preserve_current_state(); - /* Registers: FPSIMD-only case */ - BUILD_BUG_ON(SVE_PT_FPSIMD_OFFSET != sizeof(header)); - if ((header.flags & SVE_PT_REGS_MASK) == SVE_PT_REGS_FPSIMD) + BUILD_BUG_ON(SVE_PT_SVE_OFFSET != sizeof(header)); + + switch ((header.flags & SVE_PT_REGS_MASK)) { + case SVE_PT_REGS_FPSIMD: return __fpr_get(target, regset, to); - /* Otherwise: full SVE case */ + case SVE_PT_REGS_SVE: + start = SVE_PT_SVE_OFFSET; + end = SVE_PT_SVE_FFR_OFFSET(vq) + SVE_PT_SVE_FFR_SIZE(vq); + membuf_write(&to, target->thread.sve_state, end - start); - BUILD_BUG_ON(SVE_PT_SVE_OFFSET != sizeof(header)); - start = SVE_PT_SVE_OFFSET; - end = SVE_PT_SVE_FFR_OFFSET(vq) + SVE_PT_SVE_FFR_SIZE(vq); - membuf_write(&to, target->thread.sve_state, end - start); + start = end; + end = SVE_PT_SVE_FPSR_OFFSET(vq); + membuf_zero(&to, end - start); - start = end; - end = SVE_PT_SVE_FPSR_OFFSET(vq); - membuf_zero(&to, end - start); + /* + * Copy fpsr, and fpcr which must follow contiguously in + * struct fpsimd_state: + */ + start = end; + end = SVE_PT_SVE_FPCR_OFFSET(vq) + SVE_PT_SVE_FPCR_SIZE; + membuf_write(&to, &target->thread.uw.fpsimd_state.fpsr, + end - start); - /* - * Copy fpsr, and fpcr which must follow contiguously in - * struct fpsimd_state: - */ - start = end; - end = SVE_PT_SVE_FPCR_OFFSET(vq) + SVE_PT_SVE_FPCR_SIZE; - membuf_write(&to, &target->thread.uw.fpsimd_state.fpsr, end - start); + start = end; + end = sve_size_from_header(&header); + return membuf_zero(&to, end - start); - start = end; - end = sve_size_from_header(&header); - return membuf_zero(&to, end - start); + default: + return 0; + } } -static int sve_set(struct task_struct *target, +static int sve_get(struct task_struct *target, const struct user_regset *regset, - unsigned int pos, unsigned int count, - const void *kbuf, const void __user *ubuf) + struct membuf to) +{ + if (!system_supports_sve()) + return -EINVAL; + + return sve_get_common(target, regset, to, ARM64_VEC_SVE); +} + +static int sve_set_common(struct task_struct *target, + const struct user_regset *regset, + unsigned int pos, unsigned int count, + const void *kbuf, const void __user *ubuf, + enum vec_type type) { int ret; struct user_sve_header header; unsigned int vq; unsigned long start, end; - if (!system_supports_sve()) - return -EINVAL; - /* Header */ if (count < sizeof(header)) return -EINVAL; @@ -813,13 +853,37 @@ static int sve_set(struct task_struct *target, * Apart from SVE_PT_REGS_MASK, all SVE_PT_* flags are consumed by * vec_set_vector_length(), which will also validate them for us: */ - ret = vec_set_vector_length(target, ARM64_VEC_SVE, header.vl, + ret = vec_set_vector_length(target, type, header.vl, ((unsigned long)header.flags & ~SVE_PT_REGS_MASK) << 16); if (ret) goto out; /* Actual VL set may be less than the user asked for: */ - vq = sve_vq_from_vl(task_get_sve_vl(target)); + vq = sve_vq_from_vl(task_get_vl(target, type)); + + /* Enter/exit streaming mode */ + if (system_supports_sme()) { + u64 old_svcr = target->thread.svcr; + + switch (type) { + case ARM64_VEC_SVE: + target->thread.svcr &= ~SYS_SVCR_EL0_SM_MASK; + break; + case ARM64_VEC_SME: + target->thread.svcr |= SYS_SVCR_EL0_SM_MASK; + break; + default: + WARN_ON_ONCE(1); + return -EINVAL; + } + + /* + * If we switched then invalidate any existing SVE + * state and ensure there's storage. + */ + if (target->thread.svcr != old_svcr) + sve_alloc(target); + } /* Registers: FPSIMD-only case */ @@ -828,10 +892,15 @@ static int sve_set(struct task_struct *target, ret = __fpr_set(target, regset, pos, count, kbuf, ubuf, SVE_PT_FPSIMD_OFFSET); clear_tsk_thread_flag(target, TIF_SVE); + if (type == ARM64_VEC_SME) + fpsimd_force_sync_to_sve(target); goto out; } - /* Otherwise: full SVE case */ + /* + * Otherwise: no registers or full SVE case. For backwards + * compatibility reasons we treat empty flags as SVE registers. + */ /* * If setting a different VL from the requested VL and there is @@ -852,8 +921,9 @@ static int sve_set(struct task_struct *target, /* * Ensure target->thread.sve_state is up to date with target's - * FPSIMD regs, so that a short copyin leaves trailing registers - * unmodified. + * FPSIMD regs, so that a short copyin leaves trailing + * registers unmodified. Always enable SVE even if going into + * streaming mode. */ fpsimd_sync_to_sve(target); set_tsk_thread_flag(target, TIF_SVE); @@ -889,8 +959,46 @@ out: return ret; } +static int sve_set(struct task_struct *target, + const struct user_regset *regset, + unsigned int pos, unsigned int count, + const void *kbuf, const void __user *ubuf) +{ + if (!system_supports_sve()) + return -EINVAL; + + return sve_set_common(target, regset, pos, count, kbuf, ubuf, + ARM64_VEC_SVE); +} + #endif /* CONFIG_ARM64_SVE */ +#ifdef CONFIG_ARM64_SME + +static int ssve_get(struct task_struct *target, + const struct user_regset *regset, + struct membuf to) +{ + if (!system_supports_sme()) + return -EINVAL; + + return sve_get_common(target, regset, to, ARM64_VEC_SME); +} + +static int ssve_set(struct task_struct *target, + const struct user_regset *regset, + unsigned int pos, unsigned int count, + const void *kbuf, const void __user *ubuf) +{ + if (!system_supports_sme()) + return -EINVAL; + + return sve_set_common(target, regset, pos, count, kbuf, ubuf, + ARM64_VEC_SME); +} + +#endif /* CONFIG_ARM64_SME */ + #ifdef CONFIG_ARM64_PTR_AUTH static int pac_mask_get(struct task_struct *target, const struct user_regset *regset, @@ -1108,6 +1216,9 @@ enum aarch64_regset { #ifdef CONFIG_ARM64_SVE REGSET_SVE, #endif +#ifdef CONFIG_ARM64_SVE + REGSET_SSVE, +#endif #ifdef CONFIG_ARM64_PTR_AUTH REGSET_PAC_MASK, REGSET_PAC_ENABLED_KEYS, @@ -1188,6 +1299,17 @@ static const struct user_regset aarch64_regsets[] = { .set = sve_set, }, #endif +#ifdef CONFIG_ARM64_SME + [REGSET_SSVE] = { /* Streaming mode SVE */ + .core_note_type = NT_ARM_SSVE, + .n = DIV_ROUND_UP(SVE_PT_SIZE(SVE_VQ_MAX, SVE_PT_REGS_SVE), + SVE_VQ_BYTES), + .size = SVE_VQ_BYTES, + .align = SVE_VQ_BYTES, + .regset_get = ssve_get, + .set = ssve_set, + }, +#endif #ifdef CONFIG_ARM64_PTR_AUTH [REGSET_PAC_MASK] = { .core_note_type = NT_ARM_PAC_MASK, diff --git a/include/uapi/linux/elf.h b/include/uapi/linux/elf.h index 787c657bfae8..a8dc688e1826 100644 --- a/include/uapi/linux/elf.h +++ b/include/uapi/linux/elf.h @@ -431,6 +431,7 @@ typedef struct elf64_shdr { #define NT_ARM_PACG_KEYS 0x408 /* ARM pointer authentication generic key */ #define NT_ARM_TAGGED_ADDR_CTRL 0x409 /* arm64 tagged address control (prctl()) */ #define NT_ARM_PAC_ENABLED_KEYS 0x40a /* arm64 ptr auth enabled keys (prctl()) */ +#define NT_ARM_SSVE 0x40b /* ARM Streaming SVE registers */ #define NT_ARC_V2 0x600 /* ARCv2 accumulator/extra registers */ #define NT_VMCOREDD 0x700 /* Vmcore Device Dump Note */ #define NT_MIPS_DSP 0x800 /* MIPS DSP ASE registers */ -- cgit v1.2.3 From 776b4a1cf36411e96972455ca72906b722b80ea1 Mon Sep 17 00:00:00 2001 From: Mark Brown Date: Tue, 19 Apr 2022 12:22:29 +0100 Subject: arm64/sme: Add ptrace support for ZA The ZA array can be read and written with the NT_ARM_ZA. Similarly to our interface for the SVE vector registers the regset consists of a header with information on the current vector length followed by an optional register data payload, represented as for signals as a series of horizontal vectors from 0 to VL/8 in the endianness independent format used for vectors. On get if ZA is enabled then register data will be provided, otherwise it will be omitted. On set if register data is provided then ZA is enabled and initialized using the provided data, otherwise it is disabled. Signed-off-by: Mark Brown Reviewed-by: Catalin Marinas Link: https://lore.kernel.org/r/20220419112247.711548-22-broonie@kernel.org Signed-off-by: Catalin Marinas --- arch/arm64/include/uapi/asm/ptrace.h | 56 ++++++++++++++ arch/arm64/kernel/ptrace.c | 144 +++++++++++++++++++++++++++++++++++ include/uapi/linux/elf.h | 1 + 3 files changed, 201 insertions(+) (limited to 'arch') diff --git a/arch/arm64/include/uapi/asm/ptrace.h b/arch/arm64/include/uapi/asm/ptrace.h index 522b925a78c1..7fa2f7036aa7 100644 --- a/arch/arm64/include/uapi/asm/ptrace.h +++ b/arch/arm64/include/uapi/asm/ptrace.h @@ -268,6 +268,62 @@ struct user_pac_generic_keys { __uint128_t apgakey; }; +/* ZA state (NT_ARM_ZA) */ + +struct user_za_header { + __u32 size; /* total meaningful regset content in bytes */ + __u32 max_size; /* maxmium possible size for this thread */ + __u16 vl; /* current vector length */ + __u16 max_vl; /* maximum possible vector length */ + __u16 flags; + __u16 __reserved; +}; + +/* + * Common ZA_PT_* flags: + * These must be kept in sync with prctl interface in + */ +#define ZA_PT_VL_INHERIT ((1 << 17) /* PR_SME_VL_INHERIT */ >> 16) +#define ZA_PT_VL_ONEXEC ((1 << 18) /* PR_SME_SET_VL_ONEXEC */ >> 16) + + +/* + * The remainder of the ZA state follows struct user_za_header. The + * total size of the ZA state (including header) depends on the + * metadata in the header: ZA_PT_SIZE(vq, flags) gives the total size + * of the state in bytes, including the header. + * + * Refer to for details of how to pass the correct + * "vq" argument to these macros. + */ + +/* Offset from the start of struct user_za_header to the register data */ +#define ZA_PT_ZA_OFFSET \ + ((sizeof(struct user_za_header) + (__SVE_VQ_BYTES - 1)) \ + / __SVE_VQ_BYTES * __SVE_VQ_BYTES) + +/* + * The payload starts at offset ZA_PT_ZA_OFFSET, and is of size + * ZA_PT_ZA_SIZE(vq, flags). + * + * The ZA array is stored as a sequence of horizontal vectors ZAV of SVL/8 + * bytes each, starting from vector 0. + * + * Additional data might be appended in the future. + * + * The ZA matrix is represented in memory in an endianness-invariant layout + * which differs from the layout used for the FPSIMD V-registers on big-endian + * systems: see sigcontext.h for more explanation. + */ + +#define ZA_PT_ZAV_OFFSET(vq, n) \ + (ZA_PT_ZA_OFFSET + ((vq * __SVE_VQ_BYTES) * n)) + +#define ZA_PT_ZA_SIZE(vq) ((vq * __SVE_VQ_BYTES) * (vq * __SVE_VQ_BYTES)) + +#define ZA_PT_SIZE(vq) \ + (ZA_PT_ZA_OFFSET + ZA_PT_ZA_SIZE(vq)) + #endif /* __ASSEMBLY__ */ #endif /* _UAPI__ASM_PTRACE_H */ diff --git a/arch/arm64/kernel/ptrace.c b/arch/arm64/kernel/ptrace.c index 60185c27b394..47d8a7472171 100644 --- a/arch/arm64/kernel/ptrace.c +++ b/arch/arm64/kernel/ptrace.c @@ -997,6 +997,141 @@ static int ssve_set(struct task_struct *target, ARM64_VEC_SME); } +static int za_get(struct task_struct *target, + const struct user_regset *regset, + struct membuf to) +{ + struct user_za_header header; + unsigned int vq; + unsigned long start, end; + + if (!system_supports_sme()) + return -EINVAL; + + /* Header */ + memset(&header, 0, sizeof(header)); + + if (test_tsk_thread_flag(target, TIF_SME_VL_INHERIT)) + header.flags |= ZA_PT_VL_INHERIT; + + header.vl = task_get_sme_vl(target); + vq = sve_vq_from_vl(header.vl); + header.max_vl = sme_max_vl(); + header.max_size = ZA_PT_SIZE(vq); + + /* If ZA is not active there is only the header */ + if (thread_za_enabled(&target->thread)) + header.size = ZA_PT_SIZE(vq); + else + header.size = ZA_PT_ZA_OFFSET; + + membuf_write(&to, &header, sizeof(header)); + + BUILD_BUG_ON(ZA_PT_ZA_OFFSET != sizeof(header)); + end = ZA_PT_ZA_OFFSET; + + if (target == current) + fpsimd_preserve_current_state(); + + /* Any register data to include? */ + if (thread_za_enabled(&target->thread)) { + start = end; + end = ZA_PT_SIZE(vq); + membuf_write(&to, target->thread.za_state, end - start); + } + + /* Zero any trailing padding */ + start = end; + end = ALIGN(header.size, SVE_VQ_BYTES); + return membuf_zero(&to, end - start); +} + +static int za_set(struct task_struct *target, + const struct user_regset *regset, + unsigned int pos, unsigned int count, + const void *kbuf, const void __user *ubuf) +{ + int ret; + struct user_za_header header; + unsigned int vq; + unsigned long start, end; + + if (!system_supports_sme()) + return -EINVAL; + + /* Header */ + if (count < sizeof(header)) + return -EINVAL; + ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf, &header, + 0, sizeof(header)); + if (ret) + goto out; + + /* + * All current ZA_PT_* flags are consumed by + * vec_set_vector_length(), which will also validate them for + * us: + */ + ret = vec_set_vector_length(target, ARM64_VEC_SME, header.vl, + ((unsigned long)header.flags) << 16); + if (ret) + goto out; + + /* Actual VL set may be less than the user asked for: */ + vq = sve_vq_from_vl(task_get_sme_vl(target)); + + /* Ensure there is some SVE storage for streaming mode */ + if (!target->thread.sve_state) { + sve_alloc(target); + if (!target->thread.sve_state) { + clear_thread_flag(TIF_SME); + ret = -ENOMEM; + goto out; + } + } + + /* Allocate/reinit ZA storage */ + sme_alloc(target); + if (!target->thread.za_state) { + ret = -ENOMEM; + clear_tsk_thread_flag(target, TIF_SME); + goto out; + } + + /* If there is no data then disable ZA */ + if (!count) { + target->thread.svcr &= ~SYS_SVCR_EL0_ZA_MASK; + goto out; + } + + /* + * If setting a different VL from the requested VL and there is + * register data, the data layout will be wrong: don't even + * try to set the registers in this case. + */ + if (vq != sve_vq_from_vl(header.vl)) { + ret = -EIO; + goto out; + } + + BUILD_BUG_ON(ZA_PT_ZA_OFFSET != sizeof(header)); + start = ZA_PT_ZA_OFFSET; + end = ZA_PT_SIZE(vq); + ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf, + target->thread.za_state, + start, end); + if (ret) + goto out; + + /* Mark ZA as active and let userspace use it */ + set_tsk_thread_flag(target, TIF_SME); + target->thread.svcr |= SYS_SVCR_EL0_ZA_MASK; + +out: + fpsimd_flush_task_state(target); + return ret; +} + #endif /* CONFIG_ARM64_SME */ #ifdef CONFIG_ARM64_PTR_AUTH @@ -1218,6 +1353,7 @@ enum aarch64_regset { #endif #ifdef CONFIG_ARM64_SVE REGSET_SSVE, + REGSET_ZA, #endif #ifdef CONFIG_ARM64_PTR_AUTH REGSET_PAC_MASK, @@ -1309,6 +1445,14 @@ static const struct user_regset aarch64_regsets[] = { .regset_get = ssve_get, .set = ssve_set, }, + [REGSET_ZA] = { /* SME ZA */ + .core_note_type = NT_ARM_ZA, + .n = DIV_ROUND_UP(ZA_PT_ZA_SIZE(SVE_VQ_MAX), SVE_VQ_BYTES), + .size = SVE_VQ_BYTES, + .align = SVE_VQ_BYTES, + .regset_get = za_get, + .set = za_set, + }, #endif #ifdef CONFIG_ARM64_PTR_AUTH [REGSET_PAC_MASK] = { diff --git a/include/uapi/linux/elf.h b/include/uapi/linux/elf.h index a8dc688e1826..97808f958903 100644 --- a/include/uapi/linux/elf.h +++ b/include/uapi/linux/elf.h @@ -432,6 +432,7 @@ typedef struct elf64_shdr { #define NT_ARM_TAGGED_ADDR_CTRL 0x409 /* arm64 tagged address control (prctl()) */ #define NT_ARM_PAC_ENABLED_KEYS 0x40a /* arm64 ptr auth enabled keys (prctl()) */ #define NT_ARM_SSVE 0x40b /* ARM Streaming SVE registers */ +#define NT_ARM_ZA 0x40c /* ARM SME ZA registers */ #define NT_ARC_V2 0x600 /* ARCv2 accumulator/extra registers */ #define NT_VMCOREDD 0x700 /* Vmcore Device Dump Note */ #define NT_MIPS_DSP 0x800 /* MIPS DSP ASE registers */ -- cgit v1.2.3 From d45d7ff7047f7f6c3221b0f028fade640812f931 Mon Sep 17 00:00:00 2001 From: Mark Brown Date: Tue, 19 Apr 2022 12:22:30 +0100 Subject: arm64/sme: Disable streaming mode and ZA when flushing CPU state Both streaming mode and ZA may increase power consumption when they are enabled and streaming mode makes many FPSIMD and SVE instructions undefined which will cause problems for any kernel mode floating point so disable both when we flush the CPU state. This covers both kernel_neon_begin() and idle and after flushing the state a reload is always required anyway. Signed-off-by: Mark Brown Reviewed-by: Catalin Marinas Link: https://lore.kernel.org/r/20220419112247.711548-23-broonie@kernel.org Signed-off-by: Catalin Marinas --- arch/arm64/kernel/fpsimd.c | 9 +++++++++ 1 file changed, 9 insertions(+) (limited to 'arch') diff --git a/arch/arm64/kernel/fpsimd.c b/arch/arm64/kernel/fpsimd.c index 94f06e9d37cf..9592cdd7d635 100644 --- a/arch/arm64/kernel/fpsimd.c +++ b/arch/arm64/kernel/fpsimd.c @@ -1759,6 +1759,15 @@ static void fpsimd_flush_cpu_state(void) { WARN_ON(!system_supports_fpsimd()); __this_cpu_write(fpsimd_last_state.st, NULL); + + /* + * Leaving streaming mode enabled will cause issues for any kernel + * NEON and leaving streaming mode or ZA enabled may increase power + * consumption. + */ + if (system_supports_sme()) + sme_smstop(); + set_thread_flag(TIF_FOREIGN_FPSTATE); } -- cgit v1.2.3 From e0838f6373e5cb72516fc4c26bba309097e2a80a Mon Sep 17 00:00:00 2001 From: Mark Brown Date: Tue, 19 Apr 2022 12:22:31 +0100 Subject: arm64/sme: Save and restore streaming mode over EFI runtime calls When saving and restoring the floating point state over an EFI runtime call ensure that we handle streaming mode, only handling FFR if we are not in streaming mode and ensuring that we are in normal mode over the call into runtime services. We currently assume that ZA will not be modified by runtime services, the specification is not yet finalised so this may need updating if that changes. Signed-off-by: Mark Brown Reviewed-by: Catalin Marinas Link: https://lore.kernel.org/r/20220419112247.711548-24-broonie@kernel.org Signed-off-by: Catalin Marinas --- arch/arm64/kernel/fpsimd.c | 48 ++++++++++++++++++++++++++++++++++++++++------ 1 file changed, 42 insertions(+), 6 deletions(-) (limited to 'arch') diff --git a/arch/arm64/kernel/fpsimd.c b/arch/arm64/kernel/fpsimd.c index 9592cdd7d635..64431bc62472 100644 --- a/arch/arm64/kernel/fpsimd.c +++ b/arch/arm64/kernel/fpsimd.c @@ -1056,21 +1056,25 @@ int vec_verify_vq_map(enum vec_type type) static void __init sve_efi_setup(void) { - struct vl_info *info = &vl_info[ARM64_VEC_SVE]; + int max_vl = 0; + int i; if (!IS_ENABLED(CONFIG_EFI)) return; + for (i = 0; i < ARRAY_SIZE(vl_info); i++) + max_vl = max(vl_info[i].max_vl, max_vl); + /* * alloc_percpu() warns and prints a backtrace if this goes wrong. * This is evidence of a crippled system and we are returning void, * so no attempt is made to handle this situation here. */ - if (!sve_vl_valid(info->max_vl)) + if (!sve_vl_valid(max_vl)) goto fail; efi_sve_state = __alloc_percpu( - SVE_SIG_REGS_SIZE(sve_vq_from_vl(info->max_vl)), SVE_VQ_BYTES); + SVE_SIG_REGS_SIZE(sve_vq_from_vl(max_vl)), SVE_VQ_BYTES); if (!efi_sve_state) goto fail; @@ -1845,6 +1849,7 @@ EXPORT_SYMBOL(kernel_neon_end); static DEFINE_PER_CPU(struct user_fpsimd_state, efi_fpsimd_state); static DEFINE_PER_CPU(bool, efi_fpsimd_state_used); static DEFINE_PER_CPU(bool, efi_sve_state_used); +static DEFINE_PER_CPU(bool, efi_sm_state); /* * EFI runtime services support functions @@ -1879,12 +1884,28 @@ void __efi_fpsimd_begin(void) */ if (system_supports_sve() && likely(efi_sve_state)) { char *sve_state = this_cpu_ptr(efi_sve_state); + bool ffr = true; + u64 svcr; __this_cpu_write(efi_sve_state_used, true); + if (system_supports_sme()) { + svcr = read_sysreg_s(SYS_SVCR_EL0); + + if (!system_supports_fa64()) + ffr = svcr & SYS_SVCR_EL0_SM_MASK; + + __this_cpu_write(efi_sm_state, ffr); + } + sve_save_state(sve_state + sve_ffr_offset(sve_max_vl()), &this_cpu_ptr(&efi_fpsimd_state)->fpsr, - true); + ffr); + + if (system_supports_sme()) + sysreg_clear_set_s(SYS_SVCR_EL0, + SYS_SVCR_EL0_SM_MASK, 0); + } else { fpsimd_save_state(this_cpu_ptr(&efi_fpsimd_state)); } @@ -1907,11 +1928,26 @@ void __efi_fpsimd_end(void) if (system_supports_sve() && likely(__this_cpu_read(efi_sve_state_used))) { char const *sve_state = this_cpu_ptr(efi_sve_state); + bool ffr = true; + + /* + * Restore streaming mode; EFI calls are + * normal function calls so should not return in + * streaming mode. + */ + if (system_supports_sme()) { + if (__this_cpu_read(efi_sm_state)) { + sysreg_clear_set_s(SYS_SVCR_EL0, + 0, + SYS_SVCR_EL0_SM_MASK); + if (!system_supports_fa64()) + ffr = efi_sm_state; + } + } - sve_set_vq(sve_vq_from_vl(sve_get_vl()) - 1); sve_load_state(sve_state + sve_ffr_offset(sve_max_vl()), &this_cpu_ptr(&efi_fpsimd_state)->fpsr, - true); + ffr); __this_cpu_write(efi_sve_state_used, false); } else { -- cgit v1.2.3 From 90807748ca3ac4874853b2148928529bf1f13e5e Mon Sep 17 00:00:00 2001 From: Mark Brown Date: Tue, 19 Apr 2022 12:22:32 +0100 Subject: KVM: arm64: Hide SME system registers from guests For the time being we do not support use of SME by KVM guests, support for this will be enabled in future. In order to prevent any side effects or side channels via the new system registers, including the EL0 read/write register TPIDR2, explicitly undefine all the system registers added by SME and mask out the SME bitfield in SYS_ID_AA64PFR1. Signed-off-by: Mark Brown Reviewed-by: Catalin Marinas Reviewed-by: Marc Zyngier Link: https://lore.kernel.org/r/20220419112247.711548-25-broonie@kernel.org Signed-off-by: Catalin Marinas --- arch/arm64/kvm/sys_regs.c | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/arm64/kvm/sys_regs.c b/arch/arm64/kvm/sys_regs.c index 7b45c040cc27..689e53dd4cb1 100644 --- a/arch/arm64/kvm/sys_regs.c +++ b/arch/arm64/kvm/sys_regs.c @@ -1132,6 +1132,8 @@ static u64 read_id_reg(const struct kvm_vcpu *vcpu, case SYS_ID_AA64PFR1_EL1: if (!kvm_has_mte(vcpu->kvm)) val &= ~ARM64_FEATURE_MASK(ID_AA64PFR1_MTE); + + val &= ~ARM64_FEATURE_MASK(ID_AA64PFR1_SME); break; case SYS_ID_AA64ISAR1_EL1: if (!vcpu_has_ptrauth(vcpu)) @@ -1553,7 +1555,7 @@ static const struct sys_reg_desc sys_reg_descs[] = { ID_UNALLOCATED(4,2), ID_UNALLOCATED(4,3), ID_SANITISED(ID_AA64ZFR0_EL1), - ID_UNALLOCATED(4,5), + ID_HIDDEN(ID_AA64SMFR0_EL1), ID_UNALLOCATED(4,6), ID_UNALLOCATED(4,7), @@ -1596,6 +1598,8 @@ static const struct sys_reg_desc sys_reg_descs[] = { { SYS_DESC(SYS_ZCR_EL1), NULL, reset_val, ZCR_EL1, 0, .visibility = sve_visibility }, { SYS_DESC(SYS_TRFCR_EL1), undef_access }, + { SYS_DESC(SYS_SMPRI_EL1), undef_access }, + { SYS_DESC(SYS_SMCR_EL1), undef_access }, { SYS_DESC(SYS_TTBR0_EL1), access_vm_reg, reset_unknown, TTBR0_EL1 }, { SYS_DESC(SYS_TTBR1_EL1), access_vm_reg, reset_unknown, TTBR1_EL1 }, { SYS_DESC(SYS_TCR_EL1), access_vm_reg, reset_val, TCR_EL1, 0 }, @@ -1678,8 +1682,10 @@ static const struct sys_reg_desc sys_reg_descs[] = { { SYS_DESC(SYS_CCSIDR_EL1), access_ccsidr }, { SYS_DESC(SYS_CLIDR_EL1), access_clidr }, + { SYS_DESC(SYS_SMIDR_EL1), undef_access }, { SYS_DESC(SYS_CSSELR_EL1), access_csselr, reset_unknown, CSSELR_EL1 }, { SYS_DESC(SYS_CTR_EL0), access_ctr }, + { SYS_DESC(SYS_SVCR_EL0), undef_access }, { PMU_SYS_REG(SYS_PMCR_EL0), .access = access_pmcr, .reset = reset_pmcr, .reg = PMCR_EL0 }, @@ -1719,6 +1725,7 @@ static const struct sys_reg_desc sys_reg_descs[] = { { SYS_DESC(SYS_TPIDR_EL0), NULL, reset_unknown, TPIDR_EL0 }, { SYS_DESC(SYS_TPIDRRO_EL0), NULL, reset_unknown, TPIDRRO_EL0 }, + { SYS_DESC(SYS_TPIDR2_EL0), undef_access }, { SYS_DESC(SYS_SCXTNUM_EL0), undef_access }, -- cgit v1.2.3 From 51729fb1d0683df5e9e4d5dbe2ec46188f011da9 Mon Sep 17 00:00:00 2001 From: Mark Brown Date: Tue, 19 Apr 2022 12:22:33 +0100 Subject: KVM: arm64: Trap SME usage in guest SME defines two new traps which need to be enabled for guests to ensure that they can't use SME, one for the main SME operations which mirrors the traps for SVE and another for access to TPIDR2 in SCTLR_EL2. For VHE manage SMEN along with ZEN in activate_traps() and the FP state management callbacks, along with SCTLR_EL2.EnTPIDR2. There is no existing dynamic management of SCTLR_EL2. For nVHE manage TSM in activate_traps() along with the fine grained traps for TPIDR2 and SMPRI. There is no existing dynamic management of fine grained traps. Signed-off-by: Mark Brown Reviewed-by: Catalin Marinas Reviewed-by: Marc Zyngier Link: https://lore.kernel.org/r/20220419112247.711548-26-broonie@kernel.org Signed-off-by: Catalin Marinas --- arch/arm64/kvm/hyp/nvhe/switch.c | 30 ++++++++++++++++++++++++++++++ arch/arm64/kvm/hyp/vhe/switch.c | 11 ++++++++++- 2 files changed, 40 insertions(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/arm64/kvm/hyp/nvhe/switch.c b/arch/arm64/kvm/hyp/nvhe/switch.c index 6410d21d8695..caace61ea459 100644 --- a/arch/arm64/kvm/hyp/nvhe/switch.c +++ b/arch/arm64/kvm/hyp/nvhe/switch.c @@ -47,10 +47,24 @@ static void __activate_traps(struct kvm_vcpu *vcpu) val |= CPTR_EL2_TFP | CPTR_EL2_TZ; __activate_traps_fpsimd32(vcpu); } + if (cpus_have_final_cap(ARM64_SME)) + val |= CPTR_EL2_TSM; write_sysreg(val, cptr_el2); write_sysreg(__this_cpu_read(kvm_hyp_vector), vbar_el2); + if (cpus_have_final_cap(ARM64_SME)) { + val = read_sysreg_s(SYS_HFGRTR_EL2); + val &= ~(HFGxTR_EL2_nTPIDR2_EL0_MASK | + HFGxTR_EL2_nSMPRI_EL1_MASK); + write_sysreg_s(val, SYS_HFGRTR_EL2); + + val = read_sysreg_s(SYS_HFGWTR_EL2); + val &= ~(HFGxTR_EL2_nTPIDR2_EL0_MASK | + HFGxTR_EL2_nSMPRI_EL1_MASK); + write_sysreg_s(val, SYS_HFGWTR_EL2); + } + if (cpus_have_final_cap(ARM64_WORKAROUND_SPECULATIVE_AT)) { struct kvm_cpu_context *ctxt = &vcpu->arch.ctxt; @@ -94,9 +108,25 @@ static void __deactivate_traps(struct kvm_vcpu *vcpu) write_sysreg(this_cpu_ptr(&kvm_init_params)->hcr_el2, hcr_el2); + if (cpus_have_final_cap(ARM64_SME)) { + u64 val; + + val = read_sysreg_s(SYS_HFGRTR_EL2); + val |= HFGxTR_EL2_nTPIDR2_EL0_MASK | + HFGxTR_EL2_nSMPRI_EL1_MASK; + write_sysreg_s(val, SYS_HFGRTR_EL2); + + val = read_sysreg_s(SYS_HFGWTR_EL2); + val |= HFGxTR_EL2_nTPIDR2_EL0_MASK | + HFGxTR_EL2_nSMPRI_EL1_MASK; + write_sysreg_s(val, SYS_HFGWTR_EL2); + } + cptr = CPTR_EL2_DEFAULT; if (vcpu_has_sve(vcpu) && (vcpu->arch.flags & KVM_ARM64_FP_ENABLED)) cptr |= CPTR_EL2_TZ; + if (cpus_have_final_cap(ARM64_SME)) + cptr &= ~CPTR_EL2_TSM; write_sysreg(cptr, cptr_el2); write_sysreg(__kvm_hyp_host_vector, vbar_el2); diff --git a/arch/arm64/kvm/hyp/vhe/switch.c b/arch/arm64/kvm/hyp/vhe/switch.c index 262dfe03134d..969f20daf97a 100644 --- a/arch/arm64/kvm/hyp/vhe/switch.c +++ b/arch/arm64/kvm/hyp/vhe/switch.c @@ -41,7 +41,8 @@ static void __activate_traps(struct kvm_vcpu *vcpu) val = read_sysreg(cpacr_el1); val |= CPACR_EL1_TTA; - val &= ~(CPACR_EL1_ZEN_EL0EN | CPACR_EL1_ZEN_EL1EN); + val &= ~(CPACR_EL1_ZEN_EL0EN | CPACR_EL1_ZEN_EL1EN | + CPACR_EL1_SMEN_EL0EN | CPACR_EL1_SMEN_EL1EN); /* * With VHE (HCR.E2H == 1), accesses to CPACR_EL1 are routed to @@ -62,6 +63,10 @@ static void __activate_traps(struct kvm_vcpu *vcpu) __activate_traps_fpsimd32(vcpu); } + if (cpus_have_final_cap(ARM64_SME)) + write_sysreg(read_sysreg(sctlr_el2) & ~SCTLR_ELx_ENTP2, + sctlr_el2); + write_sysreg(val, cpacr_el1); write_sysreg(__this_cpu_read(kvm_hyp_vector), vbar_el1); @@ -83,6 +88,10 @@ static void __deactivate_traps(struct kvm_vcpu *vcpu) */ asm(ALTERNATIVE("nop", "isb", ARM64_WORKAROUND_SPECULATIVE_AT)); + if (cpus_have_final_cap(ARM64_SME)) + write_sysreg(read_sysreg(sctlr_el2) | SCTLR_ELx_ENTP2, + sctlr_el2); + write_sysreg(CPACR_EL1_DEFAULT, cpacr_el1); if (!arm64_kernel_unmapped_at_el0()) -- cgit v1.2.3 From 861262ab862702061ae3355b811a07b15d1b2fc0 Mon Sep 17 00:00:00 2001 From: Mark Brown Date: Tue, 19 Apr 2022 12:22:34 +0100 Subject: KVM: arm64: Handle SME host state when running guests While we don't currently support SME in guests we do currently support it for the host system so we need to take care of SME's impact, including the floating point register state, when running guests. Simiarly to SVE we need to manage the traps in CPACR_RL1, what is new is the handling of streaming mode and ZA. Normally we defer any handling of the floating point register state until the guest first uses it however if the system is in streaming mode FPSIMD and SVE operations may generate SME traps which we would need to distinguish from actual attempts by the guest to use SME. Rather than do this for the time being if we are in streaming mode when entering the guest we force the floating point state to be saved immediately and exit streaming mode, meaning that the guest won't generate SME traps for supported operations. We could handle ZA in the access trap similarly to the FPSIMD/SVE state without the disruption caused by streaming mode but for simplicity handle it the same way as streaming mode for now. This will be revisited when we support SME for guests (hopefully before SME hardware becomes available), for now it will only incur additional cost on systems with SME and even there only if streaming mode or ZA are enabled. Signed-off-by: Mark Brown Reviewed-by: Catalin Marinas Reviewed-by: Marc Zyngier Link: https://lore.kernel.org/r/20220419112247.711548-27-broonie@kernel.org Signed-off-by: Catalin Marinas --- arch/arm64/include/asm/kvm_host.h | 1 + arch/arm64/kvm/fpsimd.c | 36 ++++++++++++++++++++++++++++++++++++ 2 files changed, 37 insertions(+) (limited to 'arch') diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h index 8a7c442d5b57..f8f0d30dd1a2 100644 --- a/arch/arm64/include/asm/kvm_host.h +++ b/arch/arm64/include/asm/kvm_host.h @@ -454,6 +454,7 @@ struct kvm_vcpu_arch { #define KVM_ARM64_DEBUG_STATE_SAVE_TRBE (1 << 13) /* Save TRBE context if active */ #define KVM_ARM64_FP_FOREIGN_FPSTATE (1 << 14) #define KVM_ARM64_ON_UNSUPPORTED_CPU (1 << 15) /* Physical CPU not in supported_cpus */ +#define KVM_ARM64_HOST_SME_ENABLED (1 << 16) /* SME enabled for EL0 */ #define KVM_GUESTDBG_VALID_MASK (KVM_GUESTDBG_ENABLE | \ KVM_GUESTDBG_USE_SW_BP | \ diff --git a/arch/arm64/kvm/fpsimd.c b/arch/arm64/kvm/fpsimd.c index 57d7ac3cfa0c..441edb9c398c 100644 --- a/arch/arm64/kvm/fpsimd.c +++ b/arch/arm64/kvm/fpsimd.c @@ -82,6 +82,26 @@ void kvm_arch_vcpu_load_fp(struct kvm_vcpu *vcpu) if (read_sysreg(cpacr_el1) & CPACR_EL1_ZEN_EL0EN) vcpu->arch.flags |= KVM_ARM64_HOST_SVE_ENABLED; + + /* + * We don't currently support SME guests but if we leave + * things in streaming mode then when the guest starts running + * FPSIMD or SVE code it may generate SME traps so as a + * special case if we are in streaming mode we force the host + * state to be saved now and exit streaming mode so that we + * don't have to handle any SME traps for valid guest + * operations. Do this for ZA as well for now for simplicity. + */ + if (system_supports_sme()) { + if (read_sysreg(cpacr_el1) & CPACR_EL1_SMEN_EL0EN) + vcpu->arch.flags |= KVM_ARM64_HOST_SME_ENABLED; + + if (read_sysreg_s(SYS_SVCR_EL0) & + (SYS_SVCR_EL0_SM_MASK | SYS_SVCR_EL0_ZA_MASK)) { + vcpu->arch.flags &= ~KVM_ARM64_FP_HOST; + fpsimd_save_and_flush_cpu_state(); + } + } } /* @@ -135,6 +155,22 @@ void kvm_arch_vcpu_put_fp(struct kvm_vcpu *vcpu) local_irq_save(flags); + /* + * If we have VHE then the Hyp code will reset CPACR_EL1 to + * CPACR_EL1_DEFAULT and we need to reenable SME. + */ + if (has_vhe() && system_supports_sme()) { + /* Also restore EL0 state seen on entry */ + if (vcpu->arch.flags & KVM_ARM64_HOST_SME_ENABLED) + sysreg_clear_set(CPACR_EL1, 0, + CPACR_EL1_SMEN_EL0EN | + CPACR_EL1_SMEN_EL1EN); + else + sysreg_clear_set(CPACR_EL1, + CPACR_EL1_SMEN_EL0EN, + CPACR_EL1_SMEN_EL1EN); + } + if (vcpu->arch.flags & KVM_ARM64_FP_ENABLED) { if (vcpu_has_sve(vcpu)) { __vcpu_sys_reg(vcpu, ZCR_EL1) = read_sysreg_el1(SYS_ZCR); -- cgit v1.2.3 From a1f4ccd25cc256255813f584f10e5527369d4a02 Mon Sep 17 00:00:00 2001 From: Mark Brown Date: Tue, 19 Apr 2022 12:22:35 +0100 Subject: arm64/sme: Provide Kconfig for SME Now that basline support for the Scalable Matrix Extension (SME) is present introduce the Kconfig option allowing it to be built. While the feature registers don't impose a strong requirement for a system with SME to support SVE at runtime the support for streaming mode SVE is mostly shared with normal SVE so depend on SVE. Signed-off-by: Mark Brown Reviewed-by: Catalin Marinas Link: https://lore.kernel.org/r/20220419112247.711548-28-broonie@kernel.org Signed-off-by: Catalin Marinas --- arch/arm64/Kconfig | 11 +++++++++++ 1 file changed, 11 insertions(+) (limited to 'arch') diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig index 57c4c995965f..0897984918e8 100644 --- a/arch/arm64/Kconfig +++ b/arch/arm64/Kconfig @@ -1939,6 +1939,17 @@ config ARM64_SVE booting the kernel. If unsure and you are not observing these symptoms, you should assume that it is safe to say Y. +config ARM64_SME + bool "ARM Scalable Matrix Extension support" + default y + depends on ARM64_SVE + help + The Scalable Matrix Extension (SME) is an extension to the AArch64 + execution state which utilises a substantial subset of the SVE + instruction set, together with the addition of new architectural + register state capable of holding two dimensional matrix tiles to + enable various matrix operations. + config ARM64_MODULE_PLTS bool "Use PLTs to allow module memory to spill over into vmalloc area" depends on MODULES -- cgit v1.2.3 From da32b5817253697671af961715517bfbb308a592 Mon Sep 17 00:00:00 2001 From: Catalin Marinas Date: Sat, 23 Apr 2022 11:07:49 +0100 Subject: mm: Add fault_in_subpage_writeable() to probe at sub-page granularity On hardware with features like arm64 MTE or SPARC ADI, an access fault can be triggered at sub-page granularity. Depending on how the fault_in_writeable() function is used, the caller can get into a live-lock by continuously retrying the fault-in on an address different from the one where the uaccess failed. In the majority of cases progress is ensured by the following conditions: 1. copy_to_user_nofault() guarantees at least one byte access if the user address is not faulting. 2. The fault_in_writeable() loop is resumed from the first address that could not be accessed by copy_to_user_nofault(). If the loop iteration is restarted from an earlier (initial) point, the loop is repeated with the same conditions and it would live-lock. Introduce an arch-specific probe_subpage_writeable() and call it from the newly added fault_in_subpage_writeable() function. The arch code with sub-page faults will have to implement the specific probing functionality. Note that no other fault_in_subpage_*() functions are added since they have no callers currently susceptible to a live-lock. Signed-off-by: Catalin Marinas Cc: Andrew Morton Link: https://lore.kernel.org/r/20220423100751.1870771-2-catalin.marinas@arm.com Signed-off-by: Catalin Marinas --- arch/Kconfig | 7 +++++++ include/linux/pagemap.h | 1 + include/linux/uaccess.h | 22 ++++++++++++++++++++++ mm/gup.c | 29 +++++++++++++++++++++++++++++ 4 files changed, 59 insertions(+) (limited to 'arch') diff --git a/arch/Kconfig b/arch/Kconfig index 29b0167c088b..b34032279926 100644 --- a/arch/Kconfig +++ b/arch/Kconfig @@ -24,6 +24,13 @@ config KEXEC_ELF config HAVE_IMA_KEXEC bool +config ARCH_HAS_SUBPAGE_FAULTS + bool + help + Select if the architecture can check permissions at sub-page + granularity (e.g. arm64 MTE). The probe_user_*() functions + must be implemented. + config HOTPLUG_SMT bool diff --git a/include/linux/pagemap.h b/include/linux/pagemap.h index 993994cd943a..6165283bdb6f 100644 --- a/include/linux/pagemap.h +++ b/include/linux/pagemap.h @@ -1046,6 +1046,7 @@ void folio_add_wait_queue(struct folio *folio, wait_queue_entry_t *waiter); * Fault in userspace address range. */ size_t fault_in_writeable(char __user *uaddr, size_t size); +size_t fault_in_subpage_writeable(char __user *uaddr, size_t size); size_t fault_in_safe_writeable(const char __user *uaddr, size_t size); size_t fault_in_readable(const char __user *uaddr, size_t size); diff --git a/include/linux/uaccess.h b/include/linux/uaccess.h index 546179418ffa..5a328cf02b75 100644 --- a/include/linux/uaccess.h +++ b/include/linux/uaccess.h @@ -231,6 +231,28 @@ static inline bool pagefault_disabled(void) */ #define faulthandler_disabled() (pagefault_disabled() || in_atomic()) +#ifndef CONFIG_ARCH_HAS_SUBPAGE_FAULTS + +/** + * probe_subpage_writeable: probe the user range for write faults at sub-page + * granularity (e.g. arm64 MTE) + * @uaddr: start of address range + * @size: size of address range + * + * Returns 0 on success, the number of bytes not probed on fault. + * + * It is expected that the caller checked for the write permission of each + * page in the range either by put_user() or GUP. The architecture port can + * implement a more efficient get_user() probing if the same sub-page faults + * are triggered by either a read or a write. + */ +static inline size_t probe_subpage_writeable(char __user *uaddr, size_t size) +{ + return 0; +} + +#endif /* CONFIG_ARCH_HAS_SUBPAGE_FAULTS */ + #ifndef ARCH_HAS_NOCACHE_UACCESS static inline __must_check unsigned long diff --git a/mm/gup.c b/mm/gup.c index f598a037eb04..501bc150792c 100644 --- a/mm/gup.c +++ b/mm/gup.c @@ -1648,6 +1648,35 @@ out: } EXPORT_SYMBOL(fault_in_writeable); +/** + * fault_in_subpage_writeable - fault in an address range for writing + * @uaddr: start of address range + * @size: size of address range + * + * Fault in a user address range for writing while checking for permissions at + * sub-page granularity (e.g. arm64 MTE). This function should be used when + * the caller cannot guarantee forward progress of a copy_to_user() loop. + * + * Returns the number of bytes not faulted in (like copy_to_user() and + * copy_from_user()). + */ +size_t fault_in_subpage_writeable(char __user *uaddr, size_t size) +{ + size_t faulted_in; + + /* + * Attempt faulting in at page granularity first for page table + * permission checking. The arch-specific probe_subpage_writeable() + * functions may not check for this. + */ + faulted_in = size - fault_in_writeable(uaddr, size); + if (faulted_in) + faulted_in -= probe_subpage_writeable(uaddr, faulted_in); + + return size - faulted_in; +} +EXPORT_SYMBOL(fault_in_subpage_writeable); + /* * fault_in_safe_writeable - fault in an address range for writing * @uaddr: start of address range -- cgit v1.2.3 From f3ba50a7a100e91b0b13ca43190a66c1bfdb9993 Mon Sep 17 00:00:00 2001 From: Catalin Marinas Date: Sat, 23 Apr 2022 11:07:50 +0100 Subject: arm64: Add support for user sub-page fault probing With MTE, even if the pte allows an access, a mismatched tag somewhere within a page can still cause a fault. Select ARCH_HAS_SUBPAGE_FAULTS if MTE is enabled and implement the probe_subpage_writeable() function. Note that get_user() is sufficient for the writeable MTE check since the same tag mismatch fault would be triggered by a read. The caller of probe_subpage_writeable() will need to check the pte permissions (put_user, GUP). Signed-off-by: Catalin Marinas Cc: Will Deacon Link: https://lore.kernel.org/r/20220423100751.1870771-3-catalin.marinas@arm.com Signed-off-by: Catalin Marinas --- arch/arm64/Kconfig | 1 + arch/arm64/include/asm/mte.h | 1 + arch/arm64/include/asm/uaccess.h | 15 +++++++++++++++ arch/arm64/kernel/mte.c | 30 ++++++++++++++++++++++++++++++ 4 files changed, 47 insertions(+) (limited to 'arch') diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig index 57c4c995965f..290b88238103 100644 --- a/arch/arm64/Kconfig +++ b/arch/arm64/Kconfig @@ -1871,6 +1871,7 @@ config ARM64_MTE depends on AS_HAS_LSE_ATOMICS # Required for tag checking in the uaccess routines depends on ARM64_PAN + select ARCH_HAS_SUBPAGE_FAULTS select ARCH_USES_HIGH_VMA_FLAGS help Memory Tagging (part of the ARMv8.5 Extensions) provides diff --git a/arch/arm64/include/asm/mte.h b/arch/arm64/include/asm/mte.h index adcb937342f1..aa523591a44e 100644 --- a/arch/arm64/include/asm/mte.h +++ b/arch/arm64/include/asm/mte.h @@ -47,6 +47,7 @@ long set_mte_ctrl(struct task_struct *task, unsigned long arg); long get_mte_ctrl(struct task_struct *task); int mte_ptrace_copy_tags(struct task_struct *child, long request, unsigned long addr, unsigned long data); +size_t mte_probe_user_range(const char __user *uaddr, size_t size); #else /* CONFIG_ARM64_MTE */ diff --git a/arch/arm64/include/asm/uaccess.h b/arch/arm64/include/asm/uaccess.h index e8dce0cc5eaa..63f9c828f1a7 100644 --- a/arch/arm64/include/asm/uaccess.h +++ b/arch/arm64/include/asm/uaccess.h @@ -460,4 +460,19 @@ static inline int __copy_from_user_flushcache(void *dst, const void __user *src, } #endif +#ifdef CONFIG_ARCH_HAS_SUBPAGE_FAULTS + +/* + * Return 0 on success, the number of bytes not probed otherwise. + */ +static inline size_t probe_subpage_writeable(const char __user *uaddr, + size_t size) +{ + if (!system_supports_mte()) + return 0; + return mte_probe_user_range(uaddr, size); +} + +#endif /* CONFIG_ARCH_HAS_SUBPAGE_FAULTS */ + #endif /* __ASM_UACCESS_H */ diff --git a/arch/arm64/kernel/mte.c b/arch/arm64/kernel/mte.c index 78b3e0f8e997..35697a09926f 100644 --- a/arch/arm64/kernel/mte.c +++ b/arch/arm64/kernel/mte.c @@ -15,6 +15,7 @@ #include #include #include +#include #include #include @@ -543,3 +544,32 @@ static int register_mte_tcf_preferred_sysctl(void) return 0; } subsys_initcall(register_mte_tcf_preferred_sysctl); + +/* + * Return 0 on success, the number of bytes not probed otherwise. + */ +size_t mte_probe_user_range(const char __user *uaddr, size_t size) +{ + const char __user *end = uaddr + size; + int err = 0; + char val; + + __raw_get_user(val, uaddr, err); + if (err) + return size; + + uaddr = PTR_ALIGN(uaddr, MTE_GRANULE_SIZE); + while (uaddr < end) { + /* + * A read is sufficient for mte, the caller should have probed + * for the pte write permission if required. + */ + __raw_get_user(val, uaddr, err); + if (err) + return end - uaddr; + uaddr += MTE_GRANULE_SIZE; + } + (void)val; + + return 0; +} -- cgit v1.2.3 From 8a58bcd00e2e8d46afce468adc09fcd7968f514c Mon Sep 17 00:00:00 2001 From: Mark Brown Date: Wed, 27 Apr 2022 14:08:28 +0100 Subject: arm64/sme: Add ID_AA64SMFR0_EL1 to __read_sysreg_by_encoding() We need to explicitly enumerate all the ID registers which we rely on for CPU capabilities in __read_sysreg_by_encoding(), ID_AA64SMFR0_EL1 was missed from this list so we trip a BUG() in paths which rely on that function such as CPU hotplug. Add the register. Reported-by: Marek Szyprowski Signed-off-by: Mark Brown Tested-by: Marek Szyprowski Link: https://lore.kernel.org/r/20220427130828.162615-1-broonie@kernel.org Signed-off-by: Catalin Marinas --- arch/arm64/kernel/cpufeature.c | 1 + 1 file changed, 1 insertion(+) (limited to 'arch') diff --git a/arch/arm64/kernel/cpufeature.c b/arch/arm64/kernel/cpufeature.c index 082b3f48cbfd..619324b8bcef 100644 --- a/arch/arm64/kernel/cpufeature.c +++ b/arch/arm64/kernel/cpufeature.c @@ -1336,6 +1336,7 @@ u64 __read_sysreg_by_encoding(u32 sys_id) read_sysreg_case(SYS_ID_AA64PFR0_EL1); read_sysreg_case(SYS_ID_AA64PFR1_EL1); read_sysreg_case(SYS_ID_AA64ZFR0_EL1); + read_sysreg_case(SYS_ID_AA64SMFR0_EL1); read_sysreg_case(SYS_ID_AA64DFR0_EL1); read_sysreg_case(SYS_ID_AA64DFR1_EL1); read_sysreg_case(SYS_ID_AA64MMFR0_EL1); -- cgit v1.2.3 From 2e29b9971ac54dec88baa58856a230ec2f2a2dff Mon Sep 17 00:00:00 2001 From: Wan Jiabing Date: Tue, 26 Apr 2022 19:30:53 +0800 Subject: arm64/sme: Fix NULL check after kzalloc Fix following coccicheck error: ./arch/arm64/kernel/process.c:322:2-23: alloc with no test, possible model on line 326 Here should be dst->thread.sve_state. Fixes: 8bd7f91c03d8 ("arm64/sme: Implement traps and syscall handling for SME") Signed-off-by: Wan Jiabing Reviwed-by: Mark Brown Link: https://lore.kernel.org/r/20220426113054.630983-1-wanjiabing@vivo.com Signed-off-by: Catalin Marinas --- arch/arm64/kernel/process.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/arm64/kernel/process.c b/arch/arm64/kernel/process.c index 99c293513817..9734c9fb1a32 100644 --- a/arch/arm64/kernel/process.c +++ b/arch/arm64/kernel/process.c @@ -321,7 +321,7 @@ int arch_dup_task_struct(struct task_struct *dst, struct task_struct *src) if (thread_za_enabled(&src->thread)) { dst->thread.sve_state = kzalloc(sve_state_size(src), GFP_KERNEL); - if (!dst->thread.za_state) + if (!dst->thread.sve_state) return -ENOMEM; dst->thread.za_state = kmemdup(src->thread.za_state, za_state_size(src), -- cgit v1.2.3 From e999995c84c3abb6dcae83f8c35942a8d4ee0451 Mon Sep 17 00:00:00 2001 From: Chengming Zhou Date: Thu, 21 Apr 2022 00:00:05 +0800 Subject: ftrace: cleanup ftrace_graph_caller enable and disable The ftrace_[enable,disable]_ftrace_graph_caller() are used to do special hooks for graph tracer, which are not needed on some ARCHs that use graph_ops:func function to install return_hooker. So introduce the weak version in ftrace core code to cleanup in x86. Signed-off-by: Chengming Zhou Acked-by: Steven Rostedt (Google) Acked-by: Mark Rutland Link: https://lore.kernel.org/r/20220420160006.17880-1-zhouchengming@bytedance.com Signed-off-by: Catalin Marinas --- arch/x86/kernel/ftrace.c | 17 ++--------------- kernel/trace/fgraph.c | 18 ++++++++++++++++++ 2 files changed, 20 insertions(+), 15 deletions(-) (limited to 'arch') diff --git a/arch/x86/kernel/ftrace.c b/arch/x86/kernel/ftrace.c index 1e31c7d21597..b09d73c2ba89 100644 --- a/arch/x86/kernel/ftrace.c +++ b/arch/x86/kernel/ftrace.c @@ -579,9 +579,7 @@ void arch_ftrace_trampoline_free(struct ftrace_ops *ops) #ifdef CONFIG_FUNCTION_GRAPH_TRACER -#ifdef CONFIG_DYNAMIC_FTRACE - -#ifndef CONFIG_HAVE_DYNAMIC_FTRACE_WITH_ARGS +#if defined(CONFIG_DYNAMIC_FTRACE) && !defined(CONFIG_HAVE_DYNAMIC_FTRACE_WITH_ARGS) extern void ftrace_graph_call(void); static const char *ftrace_jmp_replace(unsigned long ip, unsigned long addr) { @@ -610,18 +608,7 @@ int ftrace_disable_ftrace_graph_caller(void) return ftrace_mod_jmp(ip, &ftrace_stub); } -#else /* !CONFIG_HAVE_DYNAMIC_FTRACE_WITH_ARGS */ -int ftrace_enable_ftrace_graph_caller(void) -{ - return 0; -} - -int ftrace_disable_ftrace_graph_caller(void) -{ - return 0; -} -#endif /* CONFIG_HAVE_DYNAMIC_FTRACE_WITH_ARGS */ -#endif /* !CONFIG_DYNAMIC_FTRACE */ +#endif /* CONFIG_DYNAMIC_FTRACE && !CONFIG_HAVE_DYNAMIC_FTRACE_WITH_ARGS */ /* * Hook the return address and push it in the stack of return addrs diff --git a/kernel/trace/fgraph.c b/kernel/trace/fgraph.c index 8f4fb328133a..289311680c29 100644 --- a/kernel/trace/fgraph.c +++ b/kernel/trace/fgraph.c @@ -30,6 +30,24 @@ int ftrace_graph_active; /* Both enabled by default (can be cleared by function_graph tracer flags */ static bool fgraph_sleep_time = true; +/* + * archs can override this function if they must do something + * to enable hook for graph tracer. + */ +int __weak ftrace_enable_ftrace_graph_caller(void) +{ + return 0; +} + +/* + * archs can override this function if they must do something + * to disable hook for graph tracer. + */ +int __weak ftrace_disable_ftrace_graph_caller(void) +{ + return 0; +} + /** * ftrace_graph_stop - set to permanently disable function graph tracing * -- cgit v1.2.3 From c4a0ebf87cebbfa28d56e7d93b2536e2311e30c9 Mon Sep 17 00:00:00 2001 From: Chengming Zhou Date: Thu, 21 Apr 2022 00:00:06 +0800 Subject: arm64/ftrace: Make function graph use ftrace directly As we do in commit 0c0593b45c9b ("x86/ftrace: Make function graph use ftrace directly"), we don't need special hook for graph tracer, but instead we use graph_ops:func function to install return_hooker. Since commit 3b23e4991fb6 ("arm64: implement ftrace with regs") add implementation for FTRACE_WITH_REGS on arm64, we can easily adopt the same cleanup on arm64. And this cleanup only changes the FTRACE_WITH_REGS implementation, so the mcount-based implementation is unaffected. While in theory it would be possible to make a similar cleanup for !FTRACE_WITH_REGS, this will require rework of the core code, and so for now we only change the FTRACE_WITH_REGS implementation. Tested-by: Mark Rutland Reviewed-by: Mark Rutland Signed-off-by: Chengming Zhou Link: https://lore.kernel.org/r/20220420160006.17880-2-zhouchengming@bytedance.com Signed-off-by: Catalin Marinas --- arch/arm64/include/asm/ftrace.h | 7 +++++++ arch/arm64/kernel/entry-ftrace.S | 17 ----------------- arch/arm64/kernel/ftrace.c | 17 +++++++++++++++++ 3 files changed, 24 insertions(+), 17 deletions(-) (limited to 'arch') diff --git a/arch/arm64/include/asm/ftrace.h b/arch/arm64/include/asm/ftrace.h index 1494cfa8639b..dbc45a4157fa 100644 --- a/arch/arm64/include/asm/ftrace.h +++ b/arch/arm64/include/asm/ftrace.h @@ -80,8 +80,15 @@ static inline unsigned long ftrace_call_adjust(unsigned long addr) #ifdef CONFIG_DYNAMIC_FTRACE_WITH_REGS struct dyn_ftrace; +struct ftrace_ops; +struct ftrace_regs; + int ftrace_init_nop(struct module *mod, struct dyn_ftrace *rec); #define ftrace_init_nop ftrace_init_nop + +void ftrace_graph_func(unsigned long ip, unsigned long parent_ip, + struct ftrace_ops *op, struct ftrace_regs *fregs); +#define ftrace_graph_func ftrace_graph_func #endif #define ftrace_return_address(n) return_address(n) diff --git a/arch/arm64/kernel/entry-ftrace.S b/arch/arm64/kernel/entry-ftrace.S index e535480a4069..d42a205ef625 100644 --- a/arch/arm64/kernel/entry-ftrace.S +++ b/arch/arm64/kernel/entry-ftrace.S @@ -97,12 +97,6 @@ SYM_CODE_START(ftrace_common) SYM_INNER_LABEL(ftrace_call, SYM_L_GLOBAL) bl ftrace_stub -#ifdef CONFIG_FUNCTION_GRAPH_TRACER -SYM_INNER_LABEL(ftrace_graph_call, SYM_L_GLOBAL) // ftrace_graph_caller(); - nop // If enabled, this will be replaced - // "b ftrace_graph_caller" -#endif - /* * At the callsite x0-x8 and x19-x30 were live. Any C code will have preserved * x19-x29 per the AAPCS, and we created frame records upon entry, so we need @@ -127,17 +121,6 @@ ftrace_common_return: ret x9 SYM_CODE_END(ftrace_common) -#ifdef CONFIG_FUNCTION_GRAPH_TRACER -SYM_CODE_START(ftrace_graph_caller) - ldr x0, [sp, #S_PC] - sub x0, x0, #AARCH64_INSN_SIZE // ip (callsite's BL insn) - add x1, sp, #S_LR // parent_ip (callsite's LR) - ldr x2, [sp, #PT_REGS_SIZE] // parent fp (callsite's FP) - bl prepare_ftrace_return - b ftrace_common_return -SYM_CODE_END(ftrace_graph_caller) -#endif - #else /* CONFIG_DYNAMIC_FTRACE_WITH_REGS */ /* diff --git a/arch/arm64/kernel/ftrace.c b/arch/arm64/kernel/ftrace.c index 4506c4a90ac1..f447c4a36f69 100644 --- a/arch/arm64/kernel/ftrace.c +++ b/arch/arm64/kernel/ftrace.c @@ -268,6 +268,22 @@ void prepare_ftrace_return(unsigned long self_addr, unsigned long *parent, } #ifdef CONFIG_DYNAMIC_FTRACE + +#ifdef CONFIG_DYNAMIC_FTRACE_WITH_REGS +void ftrace_graph_func(unsigned long ip, unsigned long parent_ip, + struct ftrace_ops *op, struct ftrace_regs *fregs) +{ + /* + * When DYNAMIC_FTRACE_WITH_REGS is selected, `fregs` can never be NULL + * and arch_ftrace_get_regs(fregs) will always give a non-NULL pt_regs + * in which we can safely modify the LR. + */ + struct pt_regs *regs = arch_ftrace_get_regs(fregs); + unsigned long *parent = (unsigned long *)&procedure_link_pointer(regs); + + prepare_ftrace_return(ip, parent, frame_pointer(regs)); +} +#else /* * Turn on/off the call to ftrace_graph_caller() in ftrace_caller() * depending on @enable. @@ -297,5 +313,6 @@ int ftrace_disable_ftrace_graph_caller(void) { return ftrace_modify_graph_caller(false); } +#endif /* CONFIG_DYNAMIC_FTRACE_WITH_REGS */ #endif /* CONFIG_DYNAMIC_FTRACE */ #endif /* CONFIG_FUNCTION_GRAPH_TRACER */ -- cgit v1.2.3 From a99ef9cb4b79e79d1574804eaf39608f8187e174 Mon Sep 17 00:00:00 2001 From: Alexandru Elisei Date: Mon, 25 Apr 2022 12:44:40 +0100 Subject: arm64: Make ESR_ELx_xVC_IMM_MASK compatible with assembly ESR_ELx_xVC_IMM_MASK is used as a mask for the immediate value for the HVC/SMC instructions. The header file is included by assembly files (like entry.S) and ESR_ELx_xVC_IMM_MASK is not conditioned on __ASSEMBLY__ being undefined. Use the UL() macro for defining the constant's size, as that is compatible with both C code and assembly, whereas the UL suffix only works for C code. Signed-off-by: Alexandru Elisei Reviewed-by: Marc Zyngier Link: https://lore.kernel.org/r/20220425114444.368693-2-alexandru.elisei@arm.com Signed-off-by: Catalin Marinas --- arch/arm64/include/asm/esr.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/arm64/include/asm/esr.h b/arch/arm64/include/asm/esr.h index d52a0b269ee8..7356e2f05755 100644 --- a/arch/arm64/include/asm/esr.h +++ b/arch/arm64/include/asm/esr.h @@ -136,7 +136,7 @@ #define ESR_ELx_WFx_ISS_TI (UL(1) << 0) #define ESR_ELx_WFx_ISS_WFI (UL(0) << 0) #define ESR_ELx_WFx_ISS_WFE (UL(1) << 0) -#define ESR_ELx_xVC_IMM_MASK ((1UL << 16) - 1) +#define ESR_ELx_xVC_IMM_MASK ((UL(1) << 16) - 1) #define DISR_EL1_IDS (UL(1) << 24) /* -- cgit v1.2.3 From 3fed9e551417b84038b15117732ea4505eee386b Mon Sep 17 00:00:00 2001 From: Alexandru Elisei Date: Mon, 25 Apr 2022 12:44:41 +0100 Subject: arm64: compat: Do not treat syscall number as ESR_ELx for a bad syscall If a compat process tries to execute an unknown system call above the __ARM_NR_COMPAT_END number, the kernel sends a SIGILL signal to the offending process. Information about the error is printed to dmesg in compat_arm_syscall() -> arm64_notify_die() -> arm64_force_sig_fault() -> arm64_show_signal(). arm64_show_signal() interprets a non-zero value for current->thread.fault_code as an exception syndrome and displays the message associated with the ESR_ELx.EC field (bits 31:26). current->thread.fault_code is set in compat_arm_syscall() -> arm64_notify_die() with the bad syscall number instead of a valid ESR_ELx value. This means that the ESR_ELx.EC field has the value that the user set for the syscall number and the kernel can end up printing bogus exception messages*. For example, for the syscall number 0x68000000, which evaluates to ESR_ELx.EC value of 0x1A (ESR_ELx_EC_FPAC) the kernel prints this error: [ 18.349161] syscall[300]: unhandled exception: ERET/ERETAA/ERETAB, ESR 0x68000000, Oops - bad compat syscall(2) in syscall[10000+50000] [ 18.350639] CPU: 2 PID: 300 Comm: syscall Not tainted 5.18.0-rc1 #79 [ 18.351249] Hardware name: Pine64 RockPro64 v2.0 (DT) [..] which is misleading, as the bad compat syscall has nothing to do with pointer authentication. Stop arm64_show_signal() from printing exception syndrome information by having compat_arm_syscall() set the ESR_ELx value to 0, as it has no meaning for an invalid system call number. The example above now becomes: [ 19.935275] syscall[301]: unhandled exception: Oops - bad compat syscall(2) in syscall[10000+50000] [ 19.936124] CPU: 1 PID: 301 Comm: syscall Not tainted 5.18.0-rc1-00005-g7e08006d4102 #80 [ 19.936894] Hardware name: Pine64 RockPro64 v2.0 (DT) [..] which although shows less information because the syscall number, wrongfully advertised as the ESR value, is missing, it is better than showing plainly wrong information. The syscall number can be easily obtained with strace. *A 32-bit value above or equal to 0x8000_0000 is interpreted as a negative integer in compat_arm_syscal() and the condition scno < __ARM_NR_COMPAT_END evaluates to true; the syscall will exit to userspace in this case with the ENOSYS error code instead of arm64_notify_die() being called. Signed-off-by: Alexandru Elisei Reviewed-by: Marc Zyngier Link: https://lore.kernel.org/r/20220425114444.368693-3-alexandru.elisei@arm.com Signed-off-by: Catalin Marinas --- arch/arm64/kernel/sys_compat.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/arm64/kernel/sys_compat.c b/arch/arm64/kernel/sys_compat.c index 12c6864e51e1..df14336c3a29 100644 --- a/arch/arm64/kernel/sys_compat.c +++ b/arch/arm64/kernel/sys_compat.c @@ -113,6 +113,6 @@ long compat_arm_syscall(struct pt_regs *regs, int scno) addr = instruction_pointer(regs) - (compat_thumb_mode(regs) ? 2 : 4); arm64_notify_die("Oops - bad compat syscall(2)", regs, - SIGILL, ILL_ILLTRP, addr, scno); + SIGILL, ILL_ILLTRP, addr, 0); return 0; } -- cgit v1.2.3 From 8d56e5c5a99ce1d17d39ce5a8260e42c2a2d7682 Mon Sep 17 00:00:00 2001 From: Alexandru Elisei Date: Mon, 25 Apr 2022 12:44:42 +0100 Subject: arm64: Treat ESR_ELx as a 64-bit register In the initial release of the ARM Architecture Reference Manual for ARMv8-A, the ESR_ELx registers were defined as 32-bit registers. This changed in 2018 with version D.a (ARM DDI 0487D.a) of the architecture, when they became 64-bit registers, with bits [63:32] defined as RES0. In version G.a, a new field was added to ESR_ELx, ISS2, which covers bits [36:32]. This field is used when the Armv8.7 extension FEAT_LS64 is implemented. As a result of the evolution of the register width, Linux stores it as both a 64-bit value and a 32-bit value, which hasn't affected correctness so far as Linux only uses the lower 32 bits of the register. Make the register type consistent and always treat it as 64-bit wide. The register is redefined as an "unsigned long", which is an unsigned double-word (64-bit quantity) for the LP64 machine (aapcs64 [1], Table 1, page 14). The type was chosen because "unsigned int" is the most frequent type for ESR_ELx and because FAR_ELx, which is used together with ESR_ELx in exception handling, is also declared as "unsigned long". The 64-bit type also makes adding support for architectural features that use fields above bit 31 easier in the future. The KVM hypervisor will receive a similar update in a subsequent patch. [1] https://github.com/ARM-software/abi-aa/releases/download/2021Q3/aapcs64.pdf Signed-off-by: Alexandru Elisei Reviewed-by: Marc Zyngier Link: https://lore.kernel.org/r/20220425114444.368693-4-alexandru.elisei@arm.com Signed-off-by: Catalin Marinas --- arch/arm64/include/asm/debug-monitors.h | 4 +- arch/arm64/include/asm/esr.h | 6 +-- arch/arm64/include/asm/exception.h | 28 ++++++------- arch/arm64/include/asm/system_misc.h | 4 +- arch/arm64/include/asm/traps.h | 12 +++--- arch/arm64/kernel/debug-monitors.c | 12 +++--- arch/arm64/kernel/entry-common.c | 6 +-- arch/arm64/kernel/fpsimd.c | 6 +-- arch/arm64/kernel/hw_breakpoint.c | 4 +- arch/arm64/kernel/kgdb.c | 6 +-- arch/arm64/kernel/probes/kprobes.c | 4 +- arch/arm64/kernel/probes/uprobes.c | 4 +- arch/arm64/kernel/traps.c | 66 +++++++++++++++---------------- arch/arm64/mm/fault.c | 70 ++++++++++++++++----------------- 14 files changed, 116 insertions(+), 116 deletions(-) (limited to 'arch') diff --git a/arch/arm64/include/asm/debug-monitors.h b/arch/arm64/include/asm/debug-monitors.h index 00c291067e57..7b7e05c02691 100644 --- a/arch/arm64/include/asm/debug-monitors.h +++ b/arch/arm64/include/asm/debug-monitors.h @@ -64,7 +64,7 @@ struct task_struct; struct step_hook { struct list_head node; - int (*fn)(struct pt_regs *regs, unsigned int esr); + int (*fn)(struct pt_regs *regs, unsigned long esr); }; void register_user_step_hook(struct step_hook *hook); @@ -75,7 +75,7 @@ void unregister_kernel_step_hook(struct step_hook *hook); struct break_hook { struct list_head node; - int (*fn)(struct pt_regs *regs, unsigned int esr); + int (*fn)(struct pt_regs *regs, unsigned long esr); u16 imm; u16 mask; /* These bits are ignored when comparing with imm */ }; diff --git a/arch/arm64/include/asm/esr.h b/arch/arm64/include/asm/esr.h index 7356e2f05755..9d18f82c57d5 100644 --- a/arch/arm64/include/asm/esr.h +++ b/arch/arm64/include/asm/esr.h @@ -330,14 +330,14 @@ #ifndef __ASSEMBLY__ #include -static inline bool esr_is_data_abort(u32 esr) +static inline bool esr_is_data_abort(unsigned long esr) { - const u32 ec = ESR_ELx_EC(esr); + const unsigned long ec = ESR_ELx_EC(esr); return ec == ESR_ELx_EC_DABT_LOW || ec == ESR_ELx_EC_DABT_CUR; } -const char *esr_get_class_string(u32 esr); +const char *esr_get_class_string(unsigned long esr); #endif /* __ASSEMBLY */ #endif /* __ASM_ESR_H */ diff --git a/arch/arm64/include/asm/exception.h b/arch/arm64/include/asm/exception.h index 339477dca551..0e6535aa78c2 100644 --- a/arch/arm64/include/asm/exception.h +++ b/arch/arm64/include/asm/exception.h @@ -19,9 +19,9 @@ #define __exception_irq_entry __kprobes #endif -static inline u32 disr_to_esr(u64 disr) +static inline unsigned long disr_to_esr(u64 disr) { - unsigned int esr = ESR_ELx_EC_SERROR << ESR_ELx_EC_SHIFT; + unsigned long esr = ESR_ELx_EC_SERROR << ESR_ELx_EC_SHIFT; if ((disr & DISR_EL1_IDS) == 0) esr |= (disr & DISR_EL1_ESR_MASK); @@ -57,23 +57,23 @@ asmlinkage void call_on_irq_stack(struct pt_regs *regs, void (*func)(struct pt_regs *)); asmlinkage void asm_exit_to_user_mode(struct pt_regs *regs); -void do_mem_abort(unsigned long far, unsigned int esr, struct pt_regs *regs); +void do_mem_abort(unsigned long far, unsigned long esr, struct pt_regs *regs); void do_undefinstr(struct pt_regs *regs); void do_bti(struct pt_regs *regs); -void do_debug_exception(unsigned long addr_if_watchpoint, unsigned int esr, +void do_debug_exception(unsigned long addr_if_watchpoint, unsigned long esr, struct pt_regs *regs); -void do_fpsimd_acc(unsigned int esr, struct pt_regs *regs); -void do_sve_acc(unsigned int esr, struct pt_regs *regs); -void do_fpsimd_exc(unsigned int esr, struct pt_regs *regs); -void do_sysinstr(unsigned int esr, struct pt_regs *regs); -void do_sp_pc_abort(unsigned long addr, unsigned int esr, struct pt_regs *regs); -void bad_el0_sync(struct pt_regs *regs, int reason, unsigned int esr); -void do_cp15instr(unsigned int esr, struct pt_regs *regs); +void do_fpsimd_acc(unsigned long esr, struct pt_regs *regs); +void do_sve_acc(unsigned long esr, struct pt_regs *regs); +void do_fpsimd_exc(unsigned long esr, struct pt_regs *regs); +void do_sysinstr(unsigned long esr, struct pt_regs *regs); +void do_sp_pc_abort(unsigned long addr, unsigned long esr, struct pt_regs *regs); +void bad_el0_sync(struct pt_regs *regs, int reason, unsigned long esr); +void do_cp15instr(unsigned long esr, struct pt_regs *regs); void do_el0_svc(struct pt_regs *regs); void do_el0_svc_compat(struct pt_regs *regs); -void do_ptrauth_fault(struct pt_regs *regs, unsigned int esr); -void do_serror(struct pt_regs *regs, unsigned int esr); +void do_ptrauth_fault(struct pt_regs *regs, unsigned long esr); +void do_serror(struct pt_regs *regs, unsigned long esr); void do_notify_resume(struct pt_regs *regs, unsigned long thread_flags); -void panic_bad_stack(struct pt_regs *regs, unsigned int esr, unsigned long far); +void panic_bad_stack(struct pt_regs *regs, unsigned long esr, unsigned long far); #endif /* __ASM_EXCEPTION_H */ diff --git a/arch/arm64/include/asm/system_misc.h b/arch/arm64/include/asm/system_misc.h index 305a7157c6a6..0eb7709422e2 100644 --- a/arch/arm64/include/asm/system_misc.h +++ b/arch/arm64/include/asm/system_misc.h @@ -23,9 +23,9 @@ void die(const char *msg, struct pt_regs *regs, int err); struct siginfo; void arm64_notify_die(const char *str, struct pt_regs *regs, int signo, int sicode, unsigned long far, - int err); + unsigned long err); -void hook_debug_fault_code(int nr, int (*fn)(unsigned long, unsigned int, +void hook_debug_fault_code(int nr, int (*fn)(unsigned long, unsigned long, struct pt_regs *), int sig, int code, const char *name); diff --git a/arch/arm64/include/asm/traps.h b/arch/arm64/include/asm/traps.h index 54f32a0675df..6e5826470bea 100644 --- a/arch/arm64/include/asm/traps.h +++ b/arch/arm64/include/asm/traps.h @@ -24,7 +24,7 @@ struct undef_hook { void register_undef_hook(struct undef_hook *hook); void unregister_undef_hook(struct undef_hook *hook); -void force_signal_inject(int signal, int code, unsigned long address, unsigned int err); +void force_signal_inject(int signal, int code, unsigned long address, unsigned long err); void arm64_notify_segfault(unsigned long addr); void arm64_force_sig_fault(int signo, int code, unsigned long far, const char *str); void arm64_force_sig_mceerr(int code, unsigned long far, short lsb, const char *str); @@ -57,7 +57,7 @@ static inline int in_entry_text(unsigned long ptr) * errors share the same encoding as an all-zeros encoding from a CPU that * doesn't support RAS. */ -static inline bool arm64_is_ras_serror(u32 esr) +static inline bool arm64_is_ras_serror(unsigned long esr) { WARN_ON(preemptible()); @@ -77,9 +77,9 @@ static inline bool arm64_is_ras_serror(u32 esr) * We treat them as Uncontainable. * Non-RAS SError's are reported as Uncontained/Uncategorized. */ -static inline u32 arm64_ras_serror_get_severity(u32 esr) +static inline unsigned long arm64_ras_serror_get_severity(unsigned long esr) { - u32 aet = esr & ESR_ELx_AET; + unsigned long aet = esr & ESR_ELx_AET; if (!arm64_is_ras_serror(esr)) { /* Not a RAS error, we can't interpret the ESR. */ @@ -98,6 +98,6 @@ static inline u32 arm64_ras_serror_get_severity(u32 esr) return aet; } -bool arm64_is_fatal_ras_serror(struct pt_regs *regs, unsigned int esr); -void __noreturn arm64_serror_panic(struct pt_regs *regs, u32 esr); +bool arm64_is_fatal_ras_serror(struct pt_regs *regs, unsigned long esr); +void __noreturn arm64_serror_panic(struct pt_regs *regs, unsigned long esr); #endif diff --git a/arch/arm64/kernel/debug-monitors.c b/arch/arm64/kernel/debug-monitors.c index 4f3661eeb7ec..bf9fe71589bc 100644 --- a/arch/arm64/kernel/debug-monitors.c +++ b/arch/arm64/kernel/debug-monitors.c @@ -202,7 +202,7 @@ void unregister_kernel_step_hook(struct step_hook *hook) * So we call all the registered handlers, until the right handler is * found which returns zero. */ -static int call_step_hook(struct pt_regs *regs, unsigned int esr) +static int call_step_hook(struct pt_regs *regs, unsigned long esr) { struct step_hook *hook; struct list_head *list; @@ -238,7 +238,7 @@ static void send_user_sigtrap(int si_code) "User debug trap"); } -static int single_step_handler(unsigned long unused, unsigned int esr, +static int single_step_handler(unsigned long unused, unsigned long esr, struct pt_regs *regs) { bool handler_found = false; @@ -299,11 +299,11 @@ void unregister_kernel_break_hook(struct break_hook *hook) unregister_debug_hook(&hook->node); } -static int call_break_hook(struct pt_regs *regs, unsigned int esr) +static int call_break_hook(struct pt_regs *regs, unsigned long esr) { struct break_hook *hook; struct list_head *list; - int (*fn)(struct pt_regs *regs, unsigned int esr) = NULL; + int (*fn)(struct pt_regs *regs, unsigned long esr) = NULL; list = user_mode(regs) ? &user_break_hook : &kernel_break_hook; @@ -312,7 +312,7 @@ static int call_break_hook(struct pt_regs *regs, unsigned int esr) * entirely not preemptible, and we can use rcu list safely here. */ list_for_each_entry_rcu(hook, list, node) { - unsigned int comment = esr & ESR_ELx_BRK64_ISS_COMMENT_MASK; + unsigned long comment = esr & ESR_ELx_BRK64_ISS_COMMENT_MASK; if ((comment & ~hook->mask) == hook->imm) fn = hook->fn; @@ -322,7 +322,7 @@ static int call_break_hook(struct pt_regs *regs, unsigned int esr) } NOKPROBE_SYMBOL(call_break_hook); -static int brk_handler(unsigned long unused, unsigned int esr, +static int brk_handler(unsigned long unused, unsigned long esr, struct pt_regs *regs) { if (call_break_hook(regs, esr) == DBG_HOOK_HANDLED) diff --git a/arch/arm64/kernel/entry-common.c b/arch/arm64/kernel/entry-common.c index 878c65aa7206..6ba10edfb49c 100644 --- a/arch/arm64/kernel/entry-common.c +++ b/arch/arm64/kernel/entry-common.c @@ -282,13 +282,13 @@ extern void (*handle_arch_irq)(struct pt_regs *); extern void (*handle_arch_fiq)(struct pt_regs *); static void noinstr __panic_unhandled(struct pt_regs *regs, const char *vector, - unsigned int esr) + unsigned long esr) { arm64_enter_nmi(regs); console_verbose(); - pr_crit("Unhandled %s exception on CPU%d, ESR 0x%08x -- %s\n", + pr_crit("Unhandled %s exception on CPU%d, ESR 0x%016lx -- %s\n", vector, smp_processor_id(), esr, esr_get_class_string(esr)); @@ -818,7 +818,7 @@ UNHANDLED(el0t, 32, error) #ifdef CONFIG_VMAP_STACK asmlinkage void noinstr handle_bad_stack(struct pt_regs *regs) { - unsigned int esr = read_sysreg(esr_el1); + unsigned long esr = read_sysreg(esr_el1); unsigned long far = read_sysreg(far_el1); arm64_enter_nmi(regs); diff --git a/arch/arm64/kernel/fpsimd.c b/arch/arm64/kernel/fpsimd.c index 47af76e53221..22bf0cfe236b 100644 --- a/arch/arm64/kernel/fpsimd.c +++ b/arch/arm64/kernel/fpsimd.c @@ -1004,7 +1004,7 @@ void fpsimd_release_task(struct task_struct *dead_task) * would have disabled the SVE access trap for userspace during * ret_to_user, making an SVE access trap impossible in that case. */ -void do_sve_acc(unsigned int esr, struct pt_regs *regs) +void do_sve_acc(unsigned long esr, struct pt_regs *regs) { /* Even if we chose not to use SVE, the hardware could still trap: */ if (unlikely(!system_supports_sve()) || WARN_ON(is_compat_task())) { @@ -1046,7 +1046,7 @@ void do_sve_acc(unsigned int esr, struct pt_regs *regs) /* * Trapped FP/ASIMD access. */ -void do_fpsimd_acc(unsigned int esr, struct pt_regs *regs) +void do_fpsimd_acc(unsigned long esr, struct pt_regs *regs) { /* TODO: implement lazy context saving/restoring */ WARN_ON(1); @@ -1055,7 +1055,7 @@ void do_fpsimd_acc(unsigned int esr, struct pt_regs *regs) /* * Raise a SIGFPE for the current process. */ -void do_fpsimd_exc(unsigned int esr, struct pt_regs *regs) +void do_fpsimd_exc(unsigned long esr, struct pt_regs *regs) { unsigned int si_code = FPE_FLTUNK; diff --git a/arch/arm64/kernel/hw_breakpoint.c b/arch/arm64/kernel/hw_breakpoint.c index cd868084e724..b29a311bb055 100644 --- a/arch/arm64/kernel/hw_breakpoint.c +++ b/arch/arm64/kernel/hw_breakpoint.c @@ -617,7 +617,7 @@ NOKPROBE_SYMBOL(toggle_bp_registers); /* * Debug exception handlers. */ -static int breakpoint_handler(unsigned long unused, unsigned int esr, +static int breakpoint_handler(unsigned long unused, unsigned long esr, struct pt_regs *regs) { int i, step = 0, *kernel_step; @@ -751,7 +751,7 @@ static int watchpoint_report(struct perf_event *wp, unsigned long addr, return step; } -static int watchpoint_handler(unsigned long addr, unsigned int esr, +static int watchpoint_handler(unsigned long addr, unsigned long esr, struct pt_regs *regs) { int i, step = 0, *kernel_step, access, closest_match = 0; diff --git a/arch/arm64/kernel/kgdb.c b/arch/arm64/kernel/kgdb.c index 2aede780fb80..cda9c1e9864f 100644 --- a/arch/arm64/kernel/kgdb.c +++ b/arch/arm64/kernel/kgdb.c @@ -232,14 +232,14 @@ int kgdb_arch_handle_exception(int exception_vector, int signo, return err; } -static int kgdb_brk_fn(struct pt_regs *regs, unsigned int esr) +static int kgdb_brk_fn(struct pt_regs *regs, unsigned long esr) { kgdb_handle_exception(1, SIGTRAP, 0, regs); return DBG_HOOK_HANDLED; } NOKPROBE_SYMBOL(kgdb_brk_fn) -static int kgdb_compiled_brk_fn(struct pt_regs *regs, unsigned int esr) +static int kgdb_compiled_brk_fn(struct pt_regs *regs, unsigned long esr) { compiled_break = 1; kgdb_handle_exception(1, SIGTRAP, 0, regs); @@ -248,7 +248,7 @@ static int kgdb_compiled_brk_fn(struct pt_regs *regs, unsigned int esr) } NOKPROBE_SYMBOL(kgdb_compiled_brk_fn); -static int kgdb_step_brk_fn(struct pt_regs *regs, unsigned int esr) +static int kgdb_step_brk_fn(struct pt_regs *regs, unsigned long esr) { if (!kgdb_single_step) return DBG_HOOK_ERROR; diff --git a/arch/arm64/kernel/probes/kprobes.c b/arch/arm64/kernel/probes/kprobes.c index d9dfa82c1f18..d1d182320245 100644 --- a/arch/arm64/kernel/probes/kprobes.c +++ b/arch/arm64/kernel/probes/kprobes.c @@ -335,7 +335,7 @@ static void __kprobes kprobe_handler(struct pt_regs *regs) } static int __kprobes -kprobe_breakpoint_ss_handler(struct pt_regs *regs, unsigned int esr) +kprobe_breakpoint_ss_handler(struct pt_regs *regs, unsigned long esr) { struct kprobe_ctlblk *kcb = get_kprobe_ctlblk(); unsigned long addr = instruction_pointer(regs); @@ -359,7 +359,7 @@ static struct break_hook kprobes_break_ss_hook = { }; static int __kprobes -kprobe_breakpoint_handler(struct pt_regs *regs, unsigned int esr) +kprobe_breakpoint_handler(struct pt_regs *regs, unsigned long esr) { kprobe_handler(regs); return DBG_HOOK_HANDLED; diff --git a/arch/arm64/kernel/probes/uprobes.c b/arch/arm64/kernel/probes/uprobes.c index 9be668f3f034..d49aef2657cd 100644 --- a/arch/arm64/kernel/probes/uprobes.c +++ b/arch/arm64/kernel/probes/uprobes.c @@ -166,7 +166,7 @@ int arch_uprobe_exception_notify(struct notifier_block *self, } static int uprobe_breakpoint_handler(struct pt_regs *regs, - unsigned int esr) + unsigned long esr) { if (uprobe_pre_sstep_notifier(regs)) return DBG_HOOK_HANDLED; @@ -175,7 +175,7 @@ static int uprobe_breakpoint_handler(struct pt_regs *regs, } static int uprobe_single_step_handler(struct pt_regs *regs, - unsigned int esr) + unsigned long esr) { struct uprobe_task *utask = current->utask; diff --git a/arch/arm64/kernel/traps.c b/arch/arm64/kernel/traps.c index 0529fd57567e..da24a4c4f58b 100644 --- a/arch/arm64/kernel/traps.c +++ b/arch/arm64/kernel/traps.c @@ -242,7 +242,7 @@ static void arm64_show_signal(int signo, const char *str) static DEFINE_RATELIMIT_STATE(rs, DEFAULT_RATELIMIT_INTERVAL, DEFAULT_RATELIMIT_BURST); struct task_struct *tsk = current; - unsigned int esr = tsk->thread.fault_code; + unsigned long esr = tsk->thread.fault_code; struct pt_regs *regs = task_pt_regs(tsk); /* Leave if the signal won't be shown */ @@ -253,7 +253,7 @@ static void arm64_show_signal(int signo, const char *str) pr_info("%s[%d]: unhandled exception: ", tsk->comm, task_pid_nr(tsk)); if (esr) - pr_cont("%s, ESR 0x%08x, ", esr_get_class_string(esr), esr); + pr_cont("%s, ESR 0x%016lx, ", esr_get_class_string(esr), esr); pr_cont("%s", str); print_vma_addr(KERN_CONT " in ", regs->pc); @@ -287,7 +287,7 @@ void arm64_force_sig_ptrace_errno_trap(int errno, unsigned long far, void arm64_notify_die(const char *str, struct pt_regs *regs, int signo, int sicode, unsigned long far, - int err) + unsigned long err) { if (user_mode(regs)) { WARN_ON(regs != current_pt_regs()); @@ -439,7 +439,7 @@ exit: return fn ? fn(regs, instr) : 1; } -void force_signal_inject(int signal, int code, unsigned long address, unsigned int err) +void force_signal_inject(int signal, int code, unsigned long address, unsigned long err) { const char *desc; struct pt_regs *regs = current_pt_regs(); @@ -506,7 +506,7 @@ void do_bti(struct pt_regs *regs) } NOKPROBE_SYMBOL(do_bti); -void do_ptrauth_fault(struct pt_regs *regs, unsigned int esr) +void do_ptrauth_fault(struct pt_regs *regs, unsigned long esr) { /* * Unexpected FPAC exception or pointer authentication failure in @@ -532,7 +532,7 @@ NOKPROBE_SYMBOL(do_ptrauth_fault); uaccess_ttbr0_disable(); \ } -static void user_cache_maint_handler(unsigned int esr, struct pt_regs *regs) +static void user_cache_maint_handler(unsigned long esr, struct pt_regs *regs) { unsigned long tagged_address, address; int rt = ESR_ELx_SYS64_ISS_RT(esr); @@ -572,7 +572,7 @@ static void user_cache_maint_handler(unsigned int esr, struct pt_regs *regs) arm64_skip_faulting_instruction(regs, AARCH64_INSN_SIZE); } -static void ctr_read_handler(unsigned int esr, struct pt_regs *regs) +static void ctr_read_handler(unsigned long esr, struct pt_regs *regs) { int rt = ESR_ELx_SYS64_ISS_RT(esr); unsigned long val = arm64_ftr_reg_user_value(&arm64_ftr_reg_ctrel0); @@ -591,7 +591,7 @@ static void ctr_read_handler(unsigned int esr, struct pt_regs *regs) arm64_skip_faulting_instruction(regs, AARCH64_INSN_SIZE); } -static void cntvct_read_handler(unsigned int esr, struct pt_regs *regs) +static void cntvct_read_handler(unsigned long esr, struct pt_regs *regs) { int rt = ESR_ELx_SYS64_ISS_RT(esr); @@ -599,7 +599,7 @@ static void cntvct_read_handler(unsigned int esr, struct pt_regs *regs) arm64_skip_faulting_instruction(regs, AARCH64_INSN_SIZE); } -static void cntfrq_read_handler(unsigned int esr, struct pt_regs *regs) +static void cntfrq_read_handler(unsigned long esr, struct pt_regs *regs) { int rt = ESR_ELx_SYS64_ISS_RT(esr); @@ -607,7 +607,7 @@ static void cntfrq_read_handler(unsigned int esr, struct pt_regs *regs) arm64_skip_faulting_instruction(regs, AARCH64_INSN_SIZE); } -static void mrs_handler(unsigned int esr, struct pt_regs *regs) +static void mrs_handler(unsigned long esr, struct pt_regs *regs) { u32 sysreg, rt; @@ -618,15 +618,15 @@ static void mrs_handler(unsigned int esr, struct pt_regs *regs) force_signal_inject(SIGILL, ILL_ILLOPC, regs->pc, 0); } -static void wfi_handler(unsigned int esr, struct pt_regs *regs) +static void wfi_handler(unsigned long esr, struct pt_regs *regs) { arm64_skip_faulting_instruction(regs, AARCH64_INSN_SIZE); } struct sys64_hook { - unsigned int esr_mask; - unsigned int esr_val; - void (*handler)(unsigned int esr, struct pt_regs *regs); + unsigned long esr_mask; + unsigned long esr_val; + void (*handler)(unsigned long esr, struct pt_regs *regs); }; static const struct sys64_hook sys64_hooks[] = { @@ -675,7 +675,7 @@ static const struct sys64_hook sys64_hooks[] = { }; #ifdef CONFIG_COMPAT -static bool cp15_cond_valid(unsigned int esr, struct pt_regs *regs) +static bool cp15_cond_valid(unsigned long esr, struct pt_regs *regs) { int cond; @@ -695,7 +695,7 @@ static bool cp15_cond_valid(unsigned int esr, struct pt_regs *regs) return aarch32_opcode_cond_checks[cond](regs->pstate); } -static void compat_cntfrq_read_handler(unsigned int esr, struct pt_regs *regs) +static void compat_cntfrq_read_handler(unsigned long esr, struct pt_regs *regs) { int reg = (esr & ESR_ELx_CP15_32_ISS_RT_MASK) >> ESR_ELx_CP15_32_ISS_RT_SHIFT; @@ -712,7 +712,7 @@ static const struct sys64_hook cp15_32_hooks[] = { {}, }; -static void compat_cntvct_read_handler(unsigned int esr, struct pt_regs *regs) +static void compat_cntvct_read_handler(unsigned long esr, struct pt_regs *regs) { int rt = (esr & ESR_ELx_CP15_64_ISS_RT_MASK) >> ESR_ELx_CP15_64_ISS_RT_SHIFT; int rt2 = (esr & ESR_ELx_CP15_64_ISS_RT2_MASK) >> ESR_ELx_CP15_64_ISS_RT2_SHIFT; @@ -737,7 +737,7 @@ static const struct sys64_hook cp15_64_hooks[] = { {}, }; -void do_cp15instr(unsigned int esr, struct pt_regs *regs) +void do_cp15instr(unsigned long esr, struct pt_regs *regs) { const struct sys64_hook *hook, *hook_base; @@ -778,7 +778,7 @@ void do_cp15instr(unsigned int esr, struct pt_regs *regs) NOKPROBE_SYMBOL(do_cp15instr); #endif -void do_sysinstr(unsigned int esr, struct pt_regs *regs) +void do_sysinstr(unsigned long esr, struct pt_regs *regs) { const struct sys64_hook *hook; @@ -842,7 +842,7 @@ static const char *esr_class_str[] = { [ESR_ELx_EC_BRK64] = "BRK (AArch64)", }; -const char *esr_get_class_string(u32 esr) +const char *esr_get_class_string(unsigned long esr) { return esr_class_str[ESR_ELx_EC(esr)]; } @@ -851,7 +851,7 @@ const char *esr_get_class_string(u32 esr) * bad_el0_sync handles unexpected, but potentially recoverable synchronous * exceptions taken from EL0. */ -void bad_el0_sync(struct pt_regs *regs, int reason, unsigned int esr) +void bad_el0_sync(struct pt_regs *regs, int reason, unsigned long esr) { unsigned long pc = instruction_pointer(regs); @@ -867,7 +867,7 @@ void bad_el0_sync(struct pt_regs *regs, int reason, unsigned int esr) DEFINE_PER_CPU(unsigned long [OVERFLOW_STACK_SIZE/sizeof(long)], overflow_stack) __aligned(16); -void panic_bad_stack(struct pt_regs *regs, unsigned int esr, unsigned long far) +void panic_bad_stack(struct pt_regs *regs, unsigned long esr, unsigned long far) { unsigned long tsk_stk = (unsigned long)current->stack; unsigned long irq_stk = (unsigned long)this_cpu_read(irq_stack_ptr); @@ -876,7 +876,7 @@ void panic_bad_stack(struct pt_regs *regs, unsigned int esr, unsigned long far) console_verbose(); pr_emerg("Insufficient stack space to handle exception!"); - pr_emerg("ESR: 0x%08x -- %s\n", esr, esr_get_class_string(esr)); + pr_emerg("ESR: 0x%016lx -- %s\n", esr, esr_get_class_string(esr)); pr_emerg("FAR: 0x%016lx\n", far); pr_emerg("Task stack: [0x%016lx..0x%016lx]\n", @@ -897,11 +897,11 @@ void panic_bad_stack(struct pt_regs *regs, unsigned int esr, unsigned long far) } #endif -void __noreturn arm64_serror_panic(struct pt_regs *regs, u32 esr) +void __noreturn arm64_serror_panic(struct pt_regs *regs, unsigned long esr) { console_verbose(); - pr_crit("SError Interrupt on CPU%d, code 0x%08x -- %s\n", + pr_crit("SError Interrupt on CPU%d, code 0x%016lx -- %s\n", smp_processor_id(), esr, esr_get_class_string(esr)); if (regs) __show_regs(regs); @@ -912,9 +912,9 @@ void __noreturn arm64_serror_panic(struct pt_regs *regs, u32 esr) unreachable(); } -bool arm64_is_fatal_ras_serror(struct pt_regs *regs, unsigned int esr) +bool arm64_is_fatal_ras_serror(struct pt_regs *regs, unsigned long esr) { - u32 aet = arm64_ras_serror_get_severity(esr); + unsigned long aet = arm64_ras_serror_get_severity(esr); switch (aet) { case ESR_ELx_AET_CE: /* corrected error */ @@ -944,7 +944,7 @@ bool arm64_is_fatal_ras_serror(struct pt_regs *regs, unsigned int esr) } } -void do_serror(struct pt_regs *regs, unsigned int esr) +void do_serror(struct pt_regs *regs, unsigned long esr) { /* non-RAS errors are not containable */ if (!arm64_is_ras_serror(esr) || arm64_is_fatal_ras_serror(regs, esr)) @@ -965,7 +965,7 @@ int is_valid_bugaddr(unsigned long addr) return 1; } -static int bug_handler(struct pt_regs *regs, unsigned int esr) +static int bug_handler(struct pt_regs *regs, unsigned long esr) { switch (report_bug(regs->pc, regs)) { case BUG_TRAP_TYPE_BUG: @@ -990,7 +990,7 @@ static struct break_hook bug_break_hook = { .imm = BUG_BRK_IMM, }; -static int reserved_fault_handler(struct pt_regs *regs, unsigned int esr) +static int reserved_fault_handler(struct pt_regs *regs, unsigned long esr) { pr_err("%s generated an invalid instruction at %pS!\n", "Kernel text patching", @@ -1012,7 +1012,7 @@ static struct break_hook fault_break_hook = { #define KASAN_ESR_SIZE_MASK 0x0f #define KASAN_ESR_SIZE(esr) (1 << ((esr) & KASAN_ESR_SIZE_MASK)) -static int kasan_handler(struct pt_regs *regs, unsigned int esr) +static int kasan_handler(struct pt_regs *regs, unsigned long esr) { bool recover = esr & KASAN_ESR_RECOVER; bool write = esr & KASAN_ESR_WRITE; @@ -1055,11 +1055,11 @@ static struct break_hook kasan_break_hook = { * Initial handler for AArch64 BRK exceptions * This handler only used until debug_traps_init(). */ -int __init early_brk64(unsigned long addr, unsigned int esr, +int __init early_brk64(unsigned long addr, unsigned long esr, struct pt_regs *regs) { #ifdef CONFIG_KASAN_SW_TAGS - unsigned int comment = esr & ESR_ELx_BRK64_ISS_COMMENT_MASK; + unsigned long comment = esr & ESR_ELx_BRK64_ISS_COMMENT_MASK; if ((comment & ~KASAN_BRK_MASK) == KASAN_BRK_IMM) return kasan_handler(regs, esr) != DBG_HOOK_HANDLED; diff --git a/arch/arm64/mm/fault.c b/arch/arm64/mm/fault.c index 77341b160aca..24f9b43bc18e 100644 --- a/arch/arm64/mm/fault.c +++ b/arch/arm64/mm/fault.c @@ -43,7 +43,7 @@ #include struct fault_info { - int (*fn)(unsigned long far, unsigned int esr, + int (*fn)(unsigned long far, unsigned long esr, struct pt_regs *regs); int sig; int code; @@ -53,17 +53,17 @@ struct fault_info { static const struct fault_info fault_info[]; static struct fault_info debug_fault_info[]; -static inline const struct fault_info *esr_to_fault_info(unsigned int esr) +static inline const struct fault_info *esr_to_fault_info(unsigned long esr) { return fault_info + (esr & ESR_ELx_FSC); } -static inline const struct fault_info *esr_to_debug_fault_info(unsigned int esr) +static inline const struct fault_info *esr_to_debug_fault_info(unsigned long esr) { return debug_fault_info + DBG_ESR_EVT(esr); } -static void data_abort_decode(unsigned int esr) +static void data_abort_decode(unsigned long esr) { pr_alert("Data abort info:\n"); @@ -85,11 +85,11 @@ static void data_abort_decode(unsigned int esr) (esr & ESR_ELx_WNR) >> ESR_ELx_WNR_SHIFT); } -static void mem_abort_decode(unsigned int esr) +static void mem_abort_decode(unsigned long esr) { pr_alert("Mem abort info:\n"); - pr_alert(" ESR = 0x%08x\n", esr); + pr_alert(" ESR = 0x%016lx\n", esr); pr_alert(" EC = 0x%02lx: %s, IL = %u bits\n", ESR_ELx_EC(esr), esr_get_class_string(esr), (esr & ESR_ELx_IL) ? 32 : 16); @@ -99,7 +99,7 @@ static void mem_abort_decode(unsigned int esr) pr_alert(" EA = %lu, S1PTW = %lu\n", (esr & ESR_ELx_EA) >> ESR_ELx_EA_SHIFT, (esr & ESR_ELx_S1PTW) >> ESR_ELx_S1PTW_SHIFT); - pr_alert(" FSC = 0x%02x: %s\n", (esr & ESR_ELx_FSC), + pr_alert(" FSC = 0x%02lx: %s\n", (esr & ESR_ELx_FSC), esr_to_fault_info(esr)->name); if (esr_is_data_abort(esr)) @@ -229,20 +229,20 @@ int ptep_set_access_flags(struct vm_area_struct *vma, return 1; } -static bool is_el1_instruction_abort(unsigned int esr) +static bool is_el1_instruction_abort(unsigned long esr) { return ESR_ELx_EC(esr) == ESR_ELx_EC_IABT_CUR; } -static bool is_el1_data_abort(unsigned int esr) +static bool is_el1_data_abort(unsigned long esr) { return ESR_ELx_EC(esr) == ESR_ELx_EC_DABT_CUR; } -static inline bool is_el1_permission_fault(unsigned long addr, unsigned int esr, +static inline bool is_el1_permission_fault(unsigned long addr, unsigned long esr, struct pt_regs *regs) { - unsigned int fsc_type = esr & ESR_ELx_FSC_TYPE; + unsigned long fsc_type = esr & ESR_ELx_FSC_TYPE; if (!is_el1_data_abort(esr) && !is_el1_instruction_abort(esr)) return false; @@ -258,7 +258,7 @@ static inline bool is_el1_permission_fault(unsigned long addr, unsigned int esr, } static bool __kprobes is_spurious_el1_translation_fault(unsigned long addr, - unsigned int esr, + unsigned long esr, struct pt_regs *regs) { unsigned long flags; @@ -290,7 +290,7 @@ static bool __kprobes is_spurious_el1_translation_fault(unsigned long addr, } static void die_kernel_fault(const char *msg, unsigned long addr, - unsigned int esr, struct pt_regs *regs) + unsigned long esr, struct pt_regs *regs) { bust_spinlocks(1); @@ -308,7 +308,7 @@ static void die_kernel_fault(const char *msg, unsigned long addr, } #ifdef CONFIG_KASAN_HW_TAGS -static void report_tag_fault(unsigned long addr, unsigned int esr, +static void report_tag_fault(unsigned long addr, unsigned long esr, struct pt_regs *regs) { /* @@ -320,11 +320,11 @@ static void report_tag_fault(unsigned long addr, unsigned int esr, } #else /* Tag faults aren't enabled without CONFIG_KASAN_HW_TAGS. */ -static inline void report_tag_fault(unsigned long addr, unsigned int esr, +static inline void report_tag_fault(unsigned long addr, unsigned long esr, struct pt_regs *regs) { } #endif -static void do_tag_recovery(unsigned long addr, unsigned int esr, +static void do_tag_recovery(unsigned long addr, unsigned long esr, struct pt_regs *regs) { @@ -339,9 +339,9 @@ static void do_tag_recovery(unsigned long addr, unsigned int esr, isb(); } -static bool is_el1_mte_sync_tag_check_fault(unsigned int esr) +static bool is_el1_mte_sync_tag_check_fault(unsigned long esr) { - unsigned int fsc = esr & ESR_ELx_FSC; + unsigned long fsc = esr & ESR_ELx_FSC; if (!is_el1_data_abort(esr)) return false; @@ -352,7 +352,7 @@ static bool is_el1_mte_sync_tag_check_fault(unsigned int esr) return false; } -static void __do_kernel_fault(unsigned long addr, unsigned int esr, +static void __do_kernel_fault(unsigned long addr, unsigned long esr, struct pt_regs *regs) { const char *msg; @@ -393,7 +393,7 @@ static void __do_kernel_fault(unsigned long addr, unsigned int esr, die_kernel_fault(msg, addr, esr, regs); } -static void set_thread_esr(unsigned long address, unsigned int esr) +static void set_thread_esr(unsigned long address, unsigned long esr) { current->thread.fault_address = address; @@ -441,7 +441,7 @@ static void set_thread_esr(unsigned long address, unsigned int esr) * exception level). Fail safe by not providing an ESR * context record at all. */ - WARN(1, "ESR 0x%x is not DABT or IABT from EL0\n", esr); + WARN(1, "ESR 0x%lx is not DABT or IABT from EL0\n", esr); esr = 0; break; } @@ -450,7 +450,7 @@ static void set_thread_esr(unsigned long address, unsigned int esr) current->thread.fault_code = esr; } -static void do_bad_area(unsigned long far, unsigned int esr, +static void do_bad_area(unsigned long far, unsigned long esr, struct pt_regs *regs) { unsigned long addr = untagged_addr(far); @@ -501,7 +501,7 @@ static vm_fault_t __do_page_fault(struct mm_struct *mm, unsigned long addr, return handle_mm_fault(vma, addr, mm_flags, regs); } -static bool is_el0_instruction_abort(unsigned int esr) +static bool is_el0_instruction_abort(unsigned long esr) { return ESR_ELx_EC(esr) == ESR_ELx_EC_IABT_LOW; } @@ -510,12 +510,12 @@ static bool is_el0_instruction_abort(unsigned int esr) * Note: not valid for EL1 DC IVAC, but we never use that such that it * should fault. EL0 cannot issue DC IVAC (undef). */ -static bool is_write_abort(unsigned int esr) +static bool is_write_abort(unsigned long esr) { return (esr & ESR_ELx_WNR) && !(esr & ESR_ELx_CM); } -static int __kprobes do_page_fault(unsigned long far, unsigned int esr, +static int __kprobes do_page_fault(unsigned long far, unsigned long esr, struct pt_regs *regs) { const struct fault_info *inf; @@ -671,7 +671,7 @@ no_context: } static int __kprobes do_translation_fault(unsigned long far, - unsigned int esr, + unsigned long esr, struct pt_regs *regs) { unsigned long addr = untagged_addr(far); @@ -683,19 +683,19 @@ static int __kprobes do_translation_fault(unsigned long far, return 0; } -static int do_alignment_fault(unsigned long far, unsigned int esr, +static int do_alignment_fault(unsigned long far, unsigned long esr, struct pt_regs *regs) { do_bad_area(far, esr, regs); return 0; } -static int do_bad(unsigned long far, unsigned int esr, struct pt_regs *regs) +static int do_bad(unsigned long far, unsigned long esr, struct pt_regs *regs) { return 1; /* "fault" */ } -static int do_sea(unsigned long far, unsigned int esr, struct pt_regs *regs) +static int do_sea(unsigned long far, unsigned long esr, struct pt_regs *regs) { const struct fault_info *inf; unsigned long siaddr; @@ -725,7 +725,7 @@ static int do_sea(unsigned long far, unsigned int esr, struct pt_regs *regs) return 0; } -static int do_tag_check_fault(unsigned long far, unsigned int esr, +static int do_tag_check_fault(unsigned long far, unsigned long esr, struct pt_regs *regs) { /* @@ -805,7 +805,7 @@ static const struct fault_info fault_info[] = { { do_bad, SIGKILL, SI_KERNEL, "unknown 63" }, }; -void do_mem_abort(unsigned long far, unsigned int esr, struct pt_regs *regs) +void do_mem_abort(unsigned long far, unsigned long esr, struct pt_regs *regs) { const struct fault_info *inf = esr_to_fault_info(esr); unsigned long addr = untagged_addr(far); @@ -825,14 +825,14 @@ void do_mem_abort(unsigned long far, unsigned int esr, struct pt_regs *regs) } NOKPROBE_SYMBOL(do_mem_abort); -void do_sp_pc_abort(unsigned long addr, unsigned int esr, struct pt_regs *regs) +void do_sp_pc_abort(unsigned long addr, unsigned long esr, struct pt_regs *regs) { arm64_notify_die("SP/PC alignment exception", regs, SIGBUS, BUS_ADRALN, addr, esr); } NOKPROBE_SYMBOL(do_sp_pc_abort); -int __init early_brk64(unsigned long addr, unsigned int esr, +int __init early_brk64(unsigned long addr, unsigned long esr, struct pt_regs *regs); /* @@ -852,7 +852,7 @@ static struct fault_info __refdata debug_fault_info[] = { }; void __init hook_debug_fault_code(int nr, - int (*fn)(unsigned long, unsigned int, struct pt_regs *), + int (*fn)(unsigned long, unsigned long, struct pt_regs *), int sig, int code, const char *name) { BUG_ON(nr < 0 || nr >= ARRAY_SIZE(debug_fault_info)); @@ -885,7 +885,7 @@ static void debug_exception_exit(struct pt_regs *regs) } NOKPROBE_SYMBOL(debug_exception_exit); -void do_debug_exception(unsigned long addr_if_watchpoint, unsigned int esr, +void do_debug_exception(unsigned long addr_if_watchpoint, unsigned long esr, struct pt_regs *regs) { const struct fault_info *inf = esr_to_debug_fault_info(esr); -- cgit v1.2.3 From 0b12620fddb8a8087091df1a9c7b1da1dec7a4a0 Mon Sep 17 00:00:00 2001 From: Alexandru Elisei Date: Mon, 25 Apr 2022 12:44:43 +0100 Subject: KVM: arm64: Treat ESR_EL2 as a 64-bit register ESR_EL2 was defined as a 32-bit register in the initial release of the ARM Architecture Manual for Armv8-A, and was later extended to 64 bits, with bits [63:32] RES0. ARMv8.7 introduced FEAT_LS64, which makes use of bits [36:32]. KVM treats ESR_EL1 as a 64-bit register when saving and restoring the guest context, but ESR_EL2 is handled as a 32-bit register. Start treating ESR_EL2 as a 64-bit register to allow KVM to make use of the most significant 32 bits in the future. The type chosen to represent ESR_EL2 is u64, as that is consistent with the notation KVM overwhelmingly uses today (u32), and how the rest of the registers are declared. Signed-off-by: Alexandru Elisei Reviewed-by: Marc Zyngier Link: https://lore.kernel.org/r/20220425114444.368693-5-alexandru.elisei@arm.com Signed-off-by: Catalin Marinas --- arch/arm64/include/asm/kvm_emulate.h | 6 +++--- arch/arm64/include/asm/kvm_host.h | 2 +- arch/arm64/include/asm/kvm_ras.h | 2 +- arch/arm64/kvm/handle_exit.c | 14 +++++++------- arch/arm64/kvm/hyp/include/hyp/switch.h | 2 +- arch/arm64/kvm/hyp/nvhe/sys_regs.c | 2 +- arch/arm64/kvm/hyp/vgic-v3-sr.c | 4 ++-- arch/arm64/kvm/inject_fault.c | 4 ++-- arch/arm64/kvm/sys_regs.c | 4 ++-- 9 files changed, 20 insertions(+), 20 deletions(-) (limited to 'arch') diff --git a/arch/arm64/include/asm/kvm_emulate.h b/arch/arm64/include/asm/kvm_emulate.h index 7496deab025a..ab19a7317e12 100644 --- a/arch/arm64/include/asm/kvm_emulate.h +++ b/arch/arm64/include/asm/kvm_emulate.h @@ -235,14 +235,14 @@ static inline bool vcpu_mode_priv(const struct kvm_vcpu *vcpu) return mode != PSR_MODE_EL0t; } -static __always_inline u32 kvm_vcpu_get_esr(const struct kvm_vcpu *vcpu) +static __always_inline u64 kvm_vcpu_get_esr(const struct kvm_vcpu *vcpu) { return vcpu->arch.fault.esr_el2; } static __always_inline int kvm_vcpu_get_condition(const struct kvm_vcpu *vcpu) { - u32 esr = kvm_vcpu_get_esr(vcpu); + u64 esr = kvm_vcpu_get_esr(vcpu); if (esr & ESR_ELx_CV) return (esr & ESR_ELx_COND_MASK) >> ESR_ELx_COND_SHIFT; @@ -373,7 +373,7 @@ static __always_inline bool kvm_vcpu_abt_issea(const struct kvm_vcpu *vcpu) static __always_inline int kvm_vcpu_sys_get_rt(struct kvm_vcpu *vcpu) { - u32 esr = kvm_vcpu_get_esr(vcpu); + u64 esr = kvm_vcpu_get_esr(vcpu); return ESR_ELx_SYS64_ISS_RT(esr); } diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h index 94a27a7520f4..850430d15cd0 100644 --- a/arch/arm64/include/asm/kvm_host.h +++ b/arch/arm64/include/asm/kvm_host.h @@ -153,7 +153,7 @@ struct kvm_arch { }; struct kvm_vcpu_fault_info { - u32 esr_el2; /* Hyp Syndrom Register */ + u64 esr_el2; /* Hyp Syndrom Register */ u64 far_el2; /* Hyp Fault Address Register */ u64 hpfar_el2; /* Hyp IPA Fault Address Register */ u64 disr_el1; /* Deferred [SError] Status Register */ diff --git a/arch/arm64/include/asm/kvm_ras.h b/arch/arm64/include/asm/kvm_ras.h index 8ac6ee77437c..87e10d9a635b 100644 --- a/arch/arm64/include/asm/kvm_ras.h +++ b/arch/arm64/include/asm/kvm_ras.h @@ -14,7 +14,7 @@ * Was this synchronous external abort a RAS notification? * Returns '0' for errors handled by some RAS subsystem, or -ENOENT. */ -static inline int kvm_handle_guest_sea(phys_addr_t addr, unsigned int esr) +static inline int kvm_handle_guest_sea(phys_addr_t addr, u64 esr) { /* apei_claim_sea(NULL) expects to mask interrupts itself */ lockdep_assert_irqs_enabled(); diff --git a/arch/arm64/kvm/handle_exit.c b/arch/arm64/kvm/handle_exit.c index 97fe14aab1a3..93d92130d36c 100644 --- a/arch/arm64/kvm/handle_exit.c +++ b/arch/arm64/kvm/handle_exit.c @@ -26,7 +26,7 @@ typedef int (*exit_handle_fn)(struct kvm_vcpu *); -static void kvm_handle_guest_serror(struct kvm_vcpu *vcpu, u32 esr) +static void kvm_handle_guest_serror(struct kvm_vcpu *vcpu, u64 esr) { if (!arm64_is_ras_serror(esr) || arm64_is_fatal_ras_serror(NULL, esr)) kvm_inject_vabt(vcpu); @@ -117,10 +117,10 @@ static int kvm_handle_wfx(struct kvm_vcpu *vcpu) static int kvm_handle_guest_debug(struct kvm_vcpu *vcpu) { struct kvm_run *run = vcpu->run; - u32 esr = kvm_vcpu_get_esr(vcpu); + u64 esr = kvm_vcpu_get_esr(vcpu); run->exit_reason = KVM_EXIT_DEBUG; - run->debug.arch.hsr = esr; + run->debug.arch.hsr = lower_32_bits(esr); if (ESR_ELx_EC(esr) == ESR_ELx_EC_WATCHPT_LOW) run->debug.arch.far = vcpu->arch.fault.far_el2; @@ -130,9 +130,9 @@ static int kvm_handle_guest_debug(struct kvm_vcpu *vcpu) static int kvm_handle_unknown_ec(struct kvm_vcpu *vcpu) { - u32 esr = kvm_vcpu_get_esr(vcpu); + u64 esr = kvm_vcpu_get_esr(vcpu); - kvm_pr_unimpl("Unknown exception class: esr: %#08x -- %s\n", + kvm_pr_unimpl("Unknown exception class: esr: %#016llx -- %s\n", esr, esr_get_class_string(esr)); kvm_inject_undefined(vcpu); @@ -187,7 +187,7 @@ static exit_handle_fn arm_exit_handlers[] = { static exit_handle_fn kvm_get_exit_handler(struct kvm_vcpu *vcpu) { - u32 esr = kvm_vcpu_get_esr(vcpu); + u64 esr = kvm_vcpu_get_esr(vcpu); u8 esr_ec = ESR_ELx_EC(esr); return arm_exit_handlers[esr_ec]; @@ -334,6 +334,6 @@ void __noreturn __cold nvhe_hyp_panic_handler(u64 esr, u64 spsr, */ kvm_err("Hyp Offset: 0x%llx\n", hyp_offset); - panic("HYP panic:\nPS:%08llx PC:%016llx ESR:%08llx\nFAR:%016llx HPFAR:%016llx PAR:%016llx\nVCPU:%016lx\n", + panic("HYP panic:\nPS:%08llx PC:%016llx ESR:%016llx\nFAR:%016llx HPFAR:%016llx PAR:%016llx\nVCPU:%016lx\n", spsr, elr_virt, esr, far, hpfar, par, vcpu); } diff --git a/arch/arm64/kvm/hyp/include/hyp/switch.h b/arch/arm64/kvm/hyp/include/hyp/switch.h index 5d31f6c64c8c..37d9f211c200 100644 --- a/arch/arm64/kvm/hyp/include/hyp/switch.h +++ b/arch/arm64/kvm/hyp/include/hyp/switch.h @@ -266,7 +266,7 @@ static inline bool handle_tx2_tvm(struct kvm_vcpu *vcpu) return true; } -static inline bool esr_is_ptrauth_trap(u32 esr) +static inline bool esr_is_ptrauth_trap(u64 esr) { switch (esr_sys64_to_sysreg(esr)) { case SYS_APIAKEYLO_EL1: diff --git a/arch/arm64/kvm/hyp/nvhe/sys_regs.c b/arch/arm64/kvm/hyp/nvhe/sys_regs.c index 33f5181af330..619f94fc95fa 100644 --- a/arch/arm64/kvm/hyp/nvhe/sys_regs.c +++ b/arch/arm64/kvm/hyp/nvhe/sys_regs.c @@ -33,7 +33,7 @@ u64 id_aa64mmfr2_el1_sys_val; */ static void inject_undef64(struct kvm_vcpu *vcpu) { - u32 esr = (ESR_ELx_EC_UNKNOWN << ESR_ELx_EC_SHIFT); + u64 esr = (ESR_ELx_EC_UNKNOWN << ESR_ELx_EC_SHIFT); *vcpu_pc(vcpu) = read_sysreg_el2(SYS_ELR); *vcpu_cpsr(vcpu) = read_sysreg_el2(SYS_SPSR); diff --git a/arch/arm64/kvm/hyp/vgic-v3-sr.c b/arch/arm64/kvm/hyp/vgic-v3-sr.c index 4fb419f7b8b6..6cb638b184b1 100644 --- a/arch/arm64/kvm/hyp/vgic-v3-sr.c +++ b/arch/arm64/kvm/hyp/vgic-v3-sr.c @@ -473,7 +473,7 @@ static int __vgic_v3_bpr_min(void) static int __vgic_v3_get_group(struct kvm_vcpu *vcpu) { - u32 esr = kvm_vcpu_get_esr(vcpu); + u64 esr = kvm_vcpu_get_esr(vcpu); u8 crm = (esr & ESR_ELx_SYS64_ISS_CRM_MASK) >> ESR_ELx_SYS64_ISS_CRM_SHIFT; return crm != 8; @@ -1016,7 +1016,7 @@ static void __vgic_v3_write_ctlr(struct kvm_vcpu *vcpu, u32 vmcr, int rt) int __vgic_v3_perform_cpuif_access(struct kvm_vcpu *vcpu) { int rt; - u32 esr; + u64 esr; u32 vmcr; void (*fn)(struct kvm_vcpu *, u32, int); bool is_read; diff --git a/arch/arm64/kvm/inject_fault.c b/arch/arm64/kvm/inject_fault.c index b47df73e98d7..3664e30f5694 100644 --- a/arch/arm64/kvm/inject_fault.c +++ b/arch/arm64/kvm/inject_fault.c @@ -18,7 +18,7 @@ static void inject_abt64(struct kvm_vcpu *vcpu, bool is_iabt, unsigned long addr { unsigned long cpsr = *vcpu_cpsr(vcpu); bool is_aarch32 = vcpu_mode_is_32bit(vcpu); - u32 esr = 0; + u64 esr = 0; vcpu->arch.flags |= (KVM_ARM64_EXCEPT_AA64_EL1 | KVM_ARM64_EXCEPT_AA64_ELx_SYNC | @@ -50,7 +50,7 @@ static void inject_abt64(struct kvm_vcpu *vcpu, bool is_iabt, unsigned long addr static void inject_undef64(struct kvm_vcpu *vcpu) { - u32 esr = (ESR_ELx_EC_UNKNOWN << ESR_ELx_EC_SHIFT); + u64 esr = (ESR_ELx_EC_UNKNOWN << ESR_ELx_EC_SHIFT); vcpu->arch.flags |= (KVM_ARM64_EXCEPT_AA64_EL1 | KVM_ARM64_EXCEPT_AA64_ELx_SYNC | diff --git a/arch/arm64/kvm/sys_regs.c b/arch/arm64/kvm/sys_regs.c index 7b45c040cc27..2bde95662bbf 100644 --- a/arch/arm64/kvm/sys_regs.c +++ b/arch/arm64/kvm/sys_regs.c @@ -2304,7 +2304,7 @@ static int kvm_handle_cp_64(struct kvm_vcpu *vcpu, size_t nr_global) { struct sys_reg_params params; - u32 esr = kvm_vcpu_get_esr(vcpu); + u64 esr = kvm_vcpu_get_esr(vcpu); int Rt = kvm_vcpu_sys_get_rt(vcpu); int Rt2 = (esr >> 10) & 0x1f; @@ -2354,7 +2354,7 @@ static int kvm_handle_cp_32(struct kvm_vcpu *vcpu, size_t nr_global) { struct sys_reg_params params; - u32 esr = kvm_vcpu_get_esr(vcpu); + u64 esr = kvm_vcpu_get_esr(vcpu); int Rt = kvm_vcpu_sys_get_rt(vcpu); params.CRm = (esr >> 1) & 0xf; -- cgit v1.2.3 From 18f3976fdb5da2ba9572845e6f7dfb58652871ea Mon Sep 17 00:00:00 2001 From: Alexandru Elisei Date: Mon, 25 Apr 2022 12:44:44 +0100 Subject: KVM: arm64: uapi: Add kvm_debug_exit_arch.hsr_high When userspace is debugging a VM, the kvm_debug_exit_arch part of the kvm_run struct contains arm64 specific debug information: the ESR_EL2 value, encoded in the field "hsr", and the address of the instruction that caused the exception, encoded in the field "far". Linux has moved to treating ESR_EL2 as a 64-bit register, but unfortunately kvm_debug_exit_arch.hsr cannot be changed because that would change the memory layout of the struct on big endian machines: Current layout: | Layout with "hsr" extended to 64 bits: | offset 0: ESR_EL2[31:0] (hsr) | offset 0: ESR_EL2[61:32] (hsr[61:32]) offset 4: padding | offset 4: ESR_EL2[31:0] (hsr[31:0]) offset 8: FAR_EL2[61:0] (far) | offset 8: FAR_EL2[61:0] (far) which breaks existing code. The padding is inserted by the compiler because the "far" field must be aligned to 8 bytes (each field must be naturally aligned - aapcs64 [1], page 18), and the struct itself must be aligned to 8 bytes (the struct must be aligned to the maximum alignment of its fields - aapcs64, page 18), which means that "hsr" must be aligned to 8 bytes as it is the first field in the struct. To avoid changing the struct size and layout for the existing fields, add a new field, "hsr_high", which replaces the existing padding. "hsr_high" will be used to hold the ESR_EL2[61:32] bits of the register. The memory layout, both on big and little endian machine, becomes: offset 0: ESR_EL2[31:0] (hsr) offset 4: ESR_EL2[61:32] (hsr_high) offset 8: FAR_EL2[61:0] (far) The padding that the compiler inserts for the current struct layout is unitialized. To prevent an updated userspace running on an old kernel mistaking the padding for a valid "hsr_high" value, add a new flag, KVM_DEBUG_ARCH_HSR_HIGH_VALID, to kvm_run->flags to let userspace know that "hsr_high" holds a valid ESR_EL2[61:32] value. [1] https://github.com/ARM-software/abi-aa/releases/download/2021Q3/aapcs64.pdf Signed-off-by: Alexandru Elisei Reviewed-by: Marc Zyngier Link: https://lore.kernel.org/r/20220425114444.368693-6-alexandru.elisei@arm.com Signed-off-by: Catalin Marinas --- Documentation/virt/kvm/api.rst | 2 ++ arch/arm64/include/uapi/asm/kvm.h | 2 ++ arch/arm64/kvm/arm.c | 1 + arch/arm64/kvm/handle_exit.c | 2 ++ 4 files changed, 7 insertions(+) (limited to 'arch') diff --git a/Documentation/virt/kvm/api.rst b/Documentation/virt/kvm/api.rst index 85c7abc51af5..ecd70d99f3e0 100644 --- a/Documentation/virt/kvm/api.rst +++ b/Documentation/virt/kvm/api.rst @@ -5713,6 +5713,8 @@ affect the device's behavior. Current defined flags:: #define KVM_RUN_X86_SMM (1 << 0) /* x86, set if bus lock detected in VM */ #define KVM_RUN_BUS_LOCK (1 << 1) + /* arm64, set for KVM_EXIT_DEBUG */ + #define KVM_DEBUG_ARCH_HSR_HIGH_VALID (1 << 0) :: diff --git a/arch/arm64/include/uapi/asm/kvm.h b/arch/arm64/include/uapi/asm/kvm.h index c1b6ddc02d2f..ab585359242d 100644 --- a/arch/arm64/include/uapi/asm/kvm.h +++ b/arch/arm64/include/uapi/asm/kvm.h @@ -139,8 +139,10 @@ struct kvm_guest_debug_arch { __u64 dbg_wvr[KVM_ARM_MAX_DBG_REGS]; }; +#define KVM_DEBUG_ARCH_HSR_HIGH_VALID (1 << 0) struct kvm_debug_exit_arch { __u32 hsr; + __u32 hsr_high; /* ESR_EL2[61:32] */ __u64 far; /* used for watchpoints */ }; diff --git a/arch/arm64/kvm/arm.c b/arch/arm64/kvm/arm.c index 523bc934fe2f..7ef4fd2fe20a 100644 --- a/arch/arm64/kvm/arm.c +++ b/arch/arm64/kvm/arm.c @@ -783,6 +783,7 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu) ret = 1; run->exit_reason = KVM_EXIT_UNKNOWN; + run->flags = 0; while (ret > 0) { /* * Check conditions before entering the guest diff --git a/arch/arm64/kvm/handle_exit.c b/arch/arm64/kvm/handle_exit.c index 93d92130d36c..0b829292dc54 100644 --- a/arch/arm64/kvm/handle_exit.c +++ b/arch/arm64/kvm/handle_exit.c @@ -121,6 +121,8 @@ static int kvm_handle_guest_debug(struct kvm_vcpu *vcpu) run->exit_reason = KVM_EXIT_DEBUG; run->debug.arch.hsr = lower_32_bits(esr); + run->debug.arch.hsr_high = upper_32_bits(esr); + run->flags = KVM_DEBUG_ARCH_HSR_HIGH_VALID; if (ESR_ELx_EC(esr) == ESR_ELx_EC_WATCHPT_LOW) run->debug.arch.far = vcpu->arch.fault.far_el2; -- cgit v1.2.3 From 48e6f22e25a44e43952db5fbb767dea0c9319cb2 Mon Sep 17 00:00:00 2001 From: Michal Orzel Date: Tue, 26 Apr 2022 09:06:03 +0200 Subject: arm64: cputype: Avoid overflow using MIDR_IMPLEMENTOR_MASK Value of macro MIDR_IMPLEMENTOR_MASK exceeds the range of integer and can lead to overflow. Currently there is no issue as it is used in expressions implicitly casting it to u32. To avoid possible problems, fix the macro. Signed-off-by: Michal Orzel Link: https://lore.kernel.org/r/20220426070603.56031-1-michal.orzel@arm.com Signed-off-by: Catalin Marinas --- arch/arm64/include/asm/cputype.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/arm64/include/asm/cputype.h b/arch/arm64/include/asm/cputype.h index ff8f4511df71..92331c07c2d1 100644 --- a/arch/arm64/include/asm/cputype.h +++ b/arch/arm64/include/asm/cputype.h @@ -36,7 +36,7 @@ #define MIDR_VARIANT(midr) \ (((midr) & MIDR_VARIANT_MASK) >> MIDR_VARIANT_SHIFT) #define MIDR_IMPLEMENTOR_SHIFT 24 -#define MIDR_IMPLEMENTOR_MASK (0xff << MIDR_IMPLEMENTOR_SHIFT) +#define MIDR_IMPLEMENTOR_MASK (0xffU << MIDR_IMPLEMENTOR_SHIFT) #define MIDR_IMPLEMENTOR(midr) \ (((midr) & MIDR_IMPLEMENTOR_MASK) >> MIDR_IMPLEMENTOR_SHIFT) -- cgit v1.2.3 From e6a6b34f97efe3ded077b31f4370b4c1206c9e56 Mon Sep 17 00:00:00 2001 From: Mark Brown Date: Tue, 3 May 2022 18:02:22 +0100 Subject: arm64/sysreg: Introduce helpers for access to sysreg fields The macros we define for the bitfields within sysregs have very regular names, especially once we switch to automatic generation of those macros. Take advantage of this to define wrappers around FIELD_PREP() allowing us to simplify setting values in fields either numerically SYS_FIELD_PREP(SCTLR_EL1, TCF0, 0x0) or using the values of enumerations within the fields SYS_FIELD_PREP_ENUM(SCTLR_EL1, TCF0, ASYMM) Suggested-by: Mark Rutland Signed-off-by: Mark Brown Link: https://lore.kernel.org/r/20220503170233.507788-2-broonie@kernel.org Signed-off-by: Catalin Marinas --- arch/arm64/include/asm/sysreg.h | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'arch') diff --git a/arch/arm64/include/asm/sysreg.h b/arch/arm64/include/asm/sysreg.h index fbf5f8bb9055..8543a315c5ca 100644 --- a/arch/arm64/include/asm/sysreg.h +++ b/arch/arm64/include/asm/sysreg.h @@ -1345,4 +1345,10 @@ #endif +#define SYS_FIELD_PREP(reg, field, val) \ + FIELD_PREP(reg##_##field##_MASK, val) + +#define SYS_FIELD_PREP_ENUM(reg, field, val) \ + FIELD_PREP(reg##_##field##_MASK, reg##_##field##_##val) + #endif /* __ASM_SYSREG_H */ -- cgit v1.2.3 From 96f101a9eab479dbfbdf7713ba966f9031c9c045 Mon Sep 17 00:00:00 2001 From: Mark Brown Date: Tue, 3 May 2022 18:02:23 +0100 Subject: arm64/mte: Make TCF0 naming and field values more standard In preparation for automatic generation of SCTLR_EL1 register definitions make the macros used to define SCTLR_EL1.TCF0 and the enumeration values it has more standard so they can be used with FIELD_PREP() via the newly defined SYS_FIELD_PREP_ helpers. Since the field also exists in SCTLR_EL2 with the same values also rename the macros to SCTLR_ELx rather than SCTLR_EL1. There should be no functional change as a result of this patch. Signed-off-by: Mark Brown Acked-by: Mark Rutland --- arch/arm64/include/asm/sysreg.h | 8 ++++---- arch/arm64/kernel/mte.c | 6 +++--- 2 files changed, 7 insertions(+), 7 deletions(-) (limited to 'arch') diff --git a/arch/arm64/include/asm/sysreg.h b/arch/arm64/include/asm/sysreg.h index 8543a315c5ca..6dc840be0268 100644 --- a/arch/arm64/include/asm/sysreg.h +++ b/arch/arm64/include/asm/sysreg.h @@ -678,10 +678,10 @@ #define SCTLR_EL1_ATA0 (BIT(42)) #define SCTLR_EL1_TCF0_SHIFT 38 -#define SCTLR_EL1_TCF0_NONE (UL(0x0) << SCTLR_EL1_TCF0_SHIFT) -#define SCTLR_EL1_TCF0_SYNC (UL(0x1) << SCTLR_EL1_TCF0_SHIFT) -#define SCTLR_EL1_TCF0_ASYNC (UL(0x2) << SCTLR_EL1_TCF0_SHIFT) -#define SCTLR_EL1_TCF0_ASYMM (UL(0x3) << SCTLR_EL1_TCF0_SHIFT) +#define SCTLR_EL1_TCF0_NONE (UL(0x0)) +#define SCTLR_EL1_TCF0_SYNC (UL(0x1)) +#define SCTLR_EL1_TCF0_ASYNC (UL(0x2)) +#define SCTLR_EL1_TCF0_ASYMM (UL(0x3)) #define SCTLR_EL1_TCF0_MASK (UL(0x3) << SCTLR_EL1_TCF0_SHIFT) #define SCTLR_EL1_BT1 (BIT(36)) diff --git a/arch/arm64/kernel/mte.c b/arch/arm64/kernel/mte.c index 78b3e0f8e997..41469b69a48e 100644 --- a/arch/arm64/kernel/mte.c +++ b/arch/arm64/kernel/mte.c @@ -216,11 +216,11 @@ static void mte_update_sctlr_user(struct task_struct *task) * default order. */ if (resolved_mte_tcf & MTE_CTRL_TCF_ASYMM) - sctlr |= SCTLR_EL1_TCF0_ASYMM; + sctlr |= SYS_FIELD_PREP_ENUM(SCTLR_EL1, TCF0, ASYMM); else if (resolved_mte_tcf & MTE_CTRL_TCF_ASYNC) - sctlr |= SCTLR_EL1_TCF0_ASYNC; + sctlr |= SYS_FIELD_PREP_ENUM(SCTLR_EL1, TCF0, ASYNC); else if (resolved_mte_tcf & MTE_CTRL_TCF_SYNC) - sctlr |= SCTLR_EL1_TCF0_SYNC; + sctlr |= SYS_FIELD_PREP_ENUM(SCTLR_EL1, TCF0, SYNC); task->thread.sctlr_user = sctlr; } -- cgit v1.2.3 From bc249e37b9334826de29111c5350c3e7a08a3969 Mon Sep 17 00:00:00 2001 From: Mark Brown Date: Tue, 3 May 2022 18:02:24 +0100 Subject: arm64/mte: Make TCF field values and naming more standard In preparation for automatic generation of the defines for system registers make the values used for the enumeration in SCTLR_ELx.TCF suitable for use with the newly defined SYS_FIELD_PREP_ENUM helper, removing the shift from the define and using the helper to generate it on use instead. Since we only ever interact with this field in EL1 and in preparation for generation of the defines also rename from SCTLR_ELx to SCTLR_EL1. SCTLR_EL2 is not quite the same as SCTLR_EL1 so the conversion does not share the field definitions. There should be no functional change from this patch. Signed-off-by: Mark Brown Acked-by: Mark Rutland Link: https://lore.kernel.org/r/20220503170233.507788-4-broonie@kernel.org Signed-off-by: Catalin Marinas --- arch/arm64/include/asm/sysreg.h | 14 +++++++------- arch/arm64/kernel/mte.c | 9 +++++---- arch/arm64/mm/fault.c | 3 ++- 3 files changed, 14 insertions(+), 12 deletions(-) (limited to 'arch') diff --git a/arch/arm64/include/asm/sysreg.h b/arch/arm64/include/asm/sysreg.h index 6dc840be0268..732d84111d9f 100644 --- a/arch/arm64/include/asm/sysreg.h +++ b/arch/arm64/include/asm/sysreg.h @@ -631,13 +631,6 @@ #define SCTLR_ELx_DSSBS (BIT(44)) #define SCTLR_ELx_ATA (BIT(43)) -#define SCTLR_ELx_TCF_SHIFT 40 -#define SCTLR_ELx_TCF_NONE (UL(0x0) << SCTLR_ELx_TCF_SHIFT) -#define SCTLR_ELx_TCF_SYNC (UL(0x1) << SCTLR_ELx_TCF_SHIFT) -#define SCTLR_ELx_TCF_ASYNC (UL(0x2) << SCTLR_ELx_TCF_SHIFT) -#define SCTLR_ELx_TCF_ASYMM (UL(0x3) << SCTLR_ELx_TCF_SHIFT) -#define SCTLR_ELx_TCF_MASK (UL(0x3) << SCTLR_ELx_TCF_SHIFT) - #define SCTLR_ELx_ENIA_SHIFT 31 #define SCTLR_ELx_ITFSB (BIT(37)) @@ -677,6 +670,13 @@ #define SCTLR_EL1_EPAN (BIT(57)) #define SCTLR_EL1_ATA0 (BIT(42)) +#define SCTLR_EL1_TCF_SHIFT 40 +#define SCTLR_EL1_TCF_NONE (UL(0x0)) +#define SCTLR_EL1_TCF_SYNC (UL(0x1)) +#define SCTLR_EL1_TCF_ASYNC (UL(0x2)) +#define SCTLR_EL1_TCF_ASYMM (UL(0x3)) +#define SCTLR_EL1_TCF_MASK (UL(0x3) << SCTLR_EL1_TCF_SHIFT) + #define SCTLR_EL1_TCF0_SHIFT 38 #define SCTLR_EL1_TCF0_NONE (UL(0x0)) #define SCTLR_EL1_TCF0_SYNC (UL(0x1)) diff --git a/arch/arm64/kernel/mte.c b/arch/arm64/kernel/mte.c index 41469b69a48e..98f5e1e13c36 100644 --- a/arch/arm64/kernel/mte.c +++ b/arch/arm64/kernel/mte.c @@ -106,7 +106,8 @@ int memcmp_pages(struct page *page1, struct page *page2) static inline void __mte_enable_kernel(const char *mode, unsigned long tcf) { /* Enable MTE Sync Mode for EL1. */ - sysreg_clear_set(sctlr_el1, SCTLR_ELx_TCF_MASK, tcf); + sysreg_clear_set(sctlr_el1, SCTLR_EL1_TCF_MASK, + SYS_FIELD_PREP(SCTLR_EL1, TCF, tcf)); isb(); pr_info_once("MTE: enabled in %s mode at EL1\n", mode); @@ -122,12 +123,12 @@ void mte_enable_kernel_sync(void) WARN_ONCE(system_uses_mte_async_or_asymm_mode(), "MTE async mode enabled system wide!"); - __mte_enable_kernel("synchronous", SCTLR_ELx_TCF_SYNC); + __mte_enable_kernel("synchronous", SCTLR_EL1_TCF_SYNC); } void mte_enable_kernel_async(void) { - __mte_enable_kernel("asynchronous", SCTLR_ELx_TCF_ASYNC); + __mte_enable_kernel("asynchronous", SCTLR_EL1_TCF_ASYNC); /* * MTE async mode is set system wide by the first PE that @@ -144,7 +145,7 @@ void mte_enable_kernel_async(void) void mte_enable_kernel_asymm(void) { if (cpus_have_cap(ARM64_MTE_ASYMM)) { - __mte_enable_kernel("asymmetric", SCTLR_ELx_TCF_ASYMM); + __mte_enable_kernel("asymmetric", SCTLR_EL1_TCF_ASYMM); /* * MTE asymm mode behaves as async mode for store diff --git a/arch/arm64/mm/fault.c b/arch/arm64/mm/fault.c index 77341b160aca..5e280cc566ca 100644 --- a/arch/arm64/mm/fault.c +++ b/arch/arm64/mm/fault.c @@ -335,7 +335,8 @@ static void do_tag_recovery(unsigned long addr, unsigned int esr, * It will be done lazily on the other CPUs when they will hit a * tag fault. */ - sysreg_clear_set(sctlr_el1, SCTLR_ELx_TCF_MASK, SCTLR_ELx_TCF_NONE); + sysreg_clear_set(sctlr_el1, SCTLR_EL1_TCF_MASK, + SYS_FIELD_PREP_ENUM(SCTLR_EL1, TCF, NONE)); isb(); } -- cgit v1.2.3 From e4e6a9d5593c18b637668ac70ea1bfa868b5c210 Mon Sep 17 00:00:00 2001 From: Mark Brown Date: Tue, 3 May 2022 18:02:25 +0100 Subject: arm64/sysreg: Rename SCTLR_EL1_NTWE/TWI to SCTLR_EL1_nTWE/TWI We already use lower case in some defines in sysreg.h, for consistency with the architecture definition do so for SCTLR_EL1.nTWE and SCTLR_EL1.nTWI. No functional change. Signed-off-by: Mark Brown Acked-by: Mark Rutland Link: https://lore.kernel.org/r/20220503170233.507788-5-broonie@kernel.org Signed-off-by: Catalin Marinas --- arch/arm64/include/asm/sysreg.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'arch') diff --git a/arch/arm64/include/asm/sysreg.h b/arch/arm64/include/asm/sysreg.h index 732d84111d9f..7e9de3c87cd9 100644 --- a/arch/arm64/include/asm/sysreg.h +++ b/arch/arm64/include/asm/sysreg.h @@ -689,8 +689,8 @@ #define SCTLR_EL1_UCI (BIT(26)) #define SCTLR_EL1_E0E (BIT(24)) #define SCTLR_EL1_SPAN (BIT(23)) -#define SCTLR_EL1_NTWE (BIT(18)) -#define SCTLR_EL1_NTWI (BIT(16)) +#define SCTLR_EL1_nTWE (BIT(18)) +#define SCTLR_EL1_nTWI (BIT(16)) #define SCTLR_EL1_UCT (BIT(15)) #define SCTLR_EL1_DZE (BIT(14)) #define SCTLR_EL1_UMA (BIT(9)) @@ -714,7 +714,7 @@ #define INIT_SCTLR_EL1_MMU_ON \ (SCTLR_ELx_M | SCTLR_ELx_C | SCTLR_ELx_SA | SCTLR_EL1_SA0 | \ SCTLR_EL1_SED | SCTLR_ELx_I | SCTLR_EL1_DZE | SCTLR_EL1_UCT | \ - SCTLR_EL1_NTWE | SCTLR_ELx_IESB | SCTLR_EL1_SPAN | SCTLR_ELx_ITFSB | \ + SCTLR_EL1_nTWE | SCTLR_ELx_IESB | SCTLR_EL1_SPAN | SCTLR_ELx_ITFSB | \ ENDIAN_SET_EL1 | SCTLR_EL1_UCI | SCTLR_EL1_EPAN | SCTLR_EL1_RES1) /* MAIR_ELx memory attributes (used by Linux) */ -- cgit v1.2.3 From 56eb621b8ab6c1d36e9eb6e75367ec49ba482f1a Mon Sep 17 00:00:00 2001 From: Mark Brown Date: Tue, 3 May 2022 18:02:26 +0100 Subject: arm64/sysreg: Define bits for previously RES1 fields in SCTLR_EL1 In older revisions of the architecture SCTLR_EL1 contained several RES1 fields but in DDI0487H.a these now all have assigned functions. In preparation for automatically generating sysreg.h provide explicit definitions for all these bits and use them in the INIT_SCTLR_EL1_ macros where _RES1 was previously used. There should be no functional change. Signed-off-by: Mark Brown Reviewed-by: Mark Rutland Link: https://lore.kernel.org/r/20220503170233.507788-6-broonie@kernel.org Signed-off-by: Catalin Marinas --- arch/arm64/include/asm/sysreg.h | 53 +++++++++++++++++++++++++---------------- 1 file changed, 32 insertions(+), 21 deletions(-) (limited to 'arch') diff --git a/arch/arm64/include/asm/sysreg.h b/arch/arm64/include/asm/sysreg.h index 7e9de3c87cd9..331e2521a81a 100644 --- a/arch/arm64/include/asm/sysreg.h +++ b/arch/arm64/include/asm/sysreg.h @@ -633,19 +633,24 @@ #define SCTLR_ELx_ENIA_SHIFT 31 -#define SCTLR_ELx_ITFSB (BIT(37)) -#define SCTLR_ELx_ENIA (BIT(SCTLR_ELx_ENIA_SHIFT)) -#define SCTLR_ELx_ENIB (BIT(30)) -#define SCTLR_ELx_ENDA (BIT(27)) -#define SCTLR_ELx_EE (BIT(25)) -#define SCTLR_ELx_IESB (BIT(21)) -#define SCTLR_ELx_WXN (BIT(19)) -#define SCTLR_ELx_ENDB (BIT(13)) -#define SCTLR_ELx_I (BIT(12)) -#define SCTLR_ELx_SA (BIT(3)) -#define SCTLR_ELx_C (BIT(2)) -#define SCTLR_ELx_A (BIT(1)) -#define SCTLR_ELx_M (BIT(0)) +#define SCTLR_ELx_ITFSB (BIT(37)) +#define SCTLR_ELx_ENIA (BIT(SCTLR_ELx_ENIA_SHIFT)) +#define SCTLR_ELx_ENIB (BIT(30)) +#define SCTLR_ELx_LSMAOE (BIT(29)) +#define SCTLR_ELx_nTLSMD (BIT(28)) +#define SCTLR_ELx_ENDA (BIT(27)) +#define SCTLR_ELx_EE (BIT(25)) +#define SCTLR_ELx_EIS (BIT(22)) +#define SCTLR_ELx_IESB (BIT(21)) +#define SCTLR_ELx_TSCXT (BIT(20)) +#define SCTLR_ELx_WXN (BIT(19)) +#define SCTLR_ELx_ENDB (BIT(13)) +#define SCTLR_ELx_I (BIT(12)) +#define SCTLR_ELx_EOS (BIT(11)) +#define SCTLR_ELx_SA (BIT(3)) +#define SCTLR_ELx_C (BIT(2)) +#define SCTLR_ELx_A (BIT(1)) +#define SCTLR_ELx_M (BIT(0)) /* SCTLR_EL2 specific flags. */ #define SCTLR_EL2_RES1 ((BIT(4)) | (BIT(5)) | (BIT(11)) | (BIT(16)) | \ @@ -686,22 +691,24 @@ #define SCTLR_EL1_BT1 (BIT(36)) #define SCTLR_EL1_BT0 (BIT(35)) +#define SCTLR_EL1_LSMAOE (BIT(29)) +#define SCTLR_EL1_nTLSMD (BIT(28)) #define SCTLR_EL1_UCI (BIT(26)) #define SCTLR_EL1_E0E (BIT(24)) #define SCTLR_EL1_SPAN (BIT(23)) +#define SCTLR_EL1_EIS (BIT(22)) +#define SCTLR_EL1_TSCXT (BIT(20)) #define SCTLR_EL1_nTWE (BIT(18)) #define SCTLR_EL1_nTWI (BIT(16)) #define SCTLR_EL1_UCT (BIT(15)) #define SCTLR_EL1_DZE (BIT(14)) +#define SCTLR_EL1_EOS (BIT(11)) #define SCTLR_EL1_UMA (BIT(9)) #define SCTLR_EL1_SED (BIT(8)) #define SCTLR_EL1_ITD (BIT(7)) #define SCTLR_EL1_CP15BEN (BIT(5)) #define SCTLR_EL1_SA0 (BIT(4)) -#define SCTLR_EL1_RES1 ((BIT(11)) | (BIT(20)) | (BIT(22)) | (BIT(28)) | \ - (BIT(29))) - #ifdef CONFIG_CPU_BIG_ENDIAN #define ENDIAN_SET_EL1 (SCTLR_EL1_E0E | SCTLR_ELx_EE) #else @@ -709,13 +716,17 @@ #endif #define INIT_SCTLR_EL1_MMU_OFF \ - (ENDIAN_SET_EL1 | SCTLR_EL1_RES1) + (ENDIAN_SET_EL1 | SCTLR_EL1_LSMAOE | SCTLR_EL1_nTLSMD | \ + SCTLR_EL1_EIS | SCTLR_EL1_TSCXT | SCTLR_EL1_EOS) #define INIT_SCTLR_EL1_MMU_ON \ - (SCTLR_ELx_M | SCTLR_ELx_C | SCTLR_ELx_SA | SCTLR_EL1_SA0 | \ - SCTLR_EL1_SED | SCTLR_ELx_I | SCTLR_EL1_DZE | SCTLR_EL1_UCT | \ - SCTLR_EL1_nTWE | SCTLR_ELx_IESB | SCTLR_EL1_SPAN | SCTLR_ELx_ITFSB | \ - ENDIAN_SET_EL1 | SCTLR_EL1_UCI | SCTLR_EL1_EPAN | SCTLR_EL1_RES1) + (SCTLR_ELx_M | SCTLR_ELx_C | SCTLR_ELx_SA | \ + SCTLR_EL1_SA0 | SCTLR_EL1_SED | SCTLR_ELx_I | \ + SCTLR_EL1_DZE | SCTLR_EL1_UCT | SCTLR_EL1_nTWE | \ + SCTLR_ELx_IESB | SCTLR_EL1_SPAN | SCTLR_ELx_ITFSB | \ + ENDIAN_SET_EL1 | SCTLR_EL1_UCI | SCTLR_EL1_EPAN | \ + SCTLR_EL1_LSMAOE | SCTLR_EL1_nTLSMD | SCTLR_EL1_EIS | \ + SCTLR_EL1_TSCXT | SCTLR_EL1_EOS) /* MAIR_ELx memory attributes (used by Linux) */ #define MAIR_ATTR_DEVICE_nGnRnE UL(0x00) -- cgit v1.2.3 From 6329eb543d991a120fba5de1362fa3df7b6b62e1 Mon Sep 17 00:00:00 2001 From: Mark Brown Date: Tue, 3 May 2022 18:02:27 +0100 Subject: arm64: Update name of ID_AA64ISAR0_EL1_ATOMIC to reflect ARM The architecture reference manual refers to the field in bits 23:20 of ID_AA64ISAR0_EL1 with the name "atomic" but the kernel defines for this bitfield use the name "atomics". Bring the two into sync to make it easier to cross reference with the specification. Signed-off-by: Mark Brown Acked-by: Mark Rutland Link: https://lore.kernel.org/r/20220503170233.507788-7-broonie@kernel.org Signed-off-by: Catalin Marinas --- arch/arm64/include/asm/sysreg.h | 2 +- arch/arm64/kernel/cpufeature.c | 6 +++--- arch/arm64/kvm/hyp/include/nvhe/fixed_config.h | 2 +- 3 files changed, 5 insertions(+), 5 deletions(-) (limited to 'arch') diff --git a/arch/arm64/include/asm/sysreg.h b/arch/arm64/include/asm/sysreg.h index 331e2521a81a..0bb259ec6ee8 100644 --- a/arch/arm64/include/asm/sysreg.h +++ b/arch/arm64/include/asm/sysreg.h @@ -749,7 +749,7 @@ #define ID_AA64ISAR0_SM3_SHIFT 36 #define ID_AA64ISAR0_SHA3_SHIFT 32 #define ID_AA64ISAR0_RDM_SHIFT 28 -#define ID_AA64ISAR0_ATOMICS_SHIFT 20 +#define ID_AA64ISAR0_ATOMIC_SHIFT 20 #define ID_AA64ISAR0_CRC32_SHIFT 16 #define ID_AA64ISAR0_SHA2_SHIFT 12 #define ID_AA64ISAR0_SHA1_SHIFT 8 diff --git a/arch/arm64/kernel/cpufeature.c b/arch/arm64/kernel/cpufeature.c index d72c4b4d389c..18833fe6d148 100644 --- a/arch/arm64/kernel/cpufeature.c +++ b/arch/arm64/kernel/cpufeature.c @@ -200,7 +200,7 @@ static const struct arm64_ftr_bits ftr_id_aa64isar0[] = { ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64ISAR0_SM3_SHIFT, 4, 0), ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64ISAR0_SHA3_SHIFT, 4, 0), ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64ISAR0_RDM_SHIFT, 4, 0), - ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64ISAR0_ATOMICS_SHIFT, 4, 0), + ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64ISAR0_ATOMIC_SHIFT, 4, 0), ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64ISAR0_CRC32_SHIFT, 4, 0), ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64ISAR0_SHA2_SHIFT, 4, 0), ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64ISAR0_SHA1_SHIFT, 4, 0), @@ -2013,7 +2013,7 @@ static const struct arm64_cpu_capabilities arm64_features[] = { .type = ARM64_CPUCAP_SYSTEM_FEATURE, .matches = has_cpuid_feature, .sys_reg = SYS_ID_AA64ISAR0_EL1, - .field_pos = ID_AA64ISAR0_ATOMICS_SHIFT, + .field_pos = ID_AA64ISAR0_ATOMIC_SHIFT, .field_width = 4, .sign = FTR_UNSIGNED, .min_field_value = 2, @@ -2520,7 +2520,7 @@ static const struct arm64_cpu_capabilities arm64_elf_hwcaps[] = { HWCAP_CAP(SYS_ID_AA64ISAR0_EL1, ID_AA64ISAR0_SHA2_SHIFT, 4, FTR_UNSIGNED, 1, CAP_HWCAP, KERNEL_HWCAP_SHA2), HWCAP_CAP(SYS_ID_AA64ISAR0_EL1, ID_AA64ISAR0_SHA2_SHIFT, 4, FTR_UNSIGNED, 2, CAP_HWCAP, KERNEL_HWCAP_SHA512), HWCAP_CAP(SYS_ID_AA64ISAR0_EL1, ID_AA64ISAR0_CRC32_SHIFT, 4, FTR_UNSIGNED, 1, CAP_HWCAP, KERNEL_HWCAP_CRC32), - HWCAP_CAP(SYS_ID_AA64ISAR0_EL1, ID_AA64ISAR0_ATOMICS_SHIFT, 4, FTR_UNSIGNED, 2, CAP_HWCAP, KERNEL_HWCAP_ATOMICS), + HWCAP_CAP(SYS_ID_AA64ISAR0_EL1, ID_AA64ISAR0_ATOMIC_SHIFT, 4, FTR_UNSIGNED, 2, CAP_HWCAP, KERNEL_HWCAP_ATOMICS), HWCAP_CAP(SYS_ID_AA64ISAR0_EL1, ID_AA64ISAR0_RDM_SHIFT, 4, FTR_UNSIGNED, 1, CAP_HWCAP, KERNEL_HWCAP_ASIMDRDM), HWCAP_CAP(SYS_ID_AA64ISAR0_EL1, ID_AA64ISAR0_SHA3_SHIFT, 4, FTR_UNSIGNED, 1, CAP_HWCAP, KERNEL_HWCAP_SHA3), HWCAP_CAP(SYS_ID_AA64ISAR0_EL1, ID_AA64ISAR0_SM3_SHIFT, 4, FTR_UNSIGNED, 1, CAP_HWCAP, KERNEL_HWCAP_SM3), diff --git a/arch/arm64/kvm/hyp/include/nvhe/fixed_config.h b/arch/arm64/kvm/hyp/include/nvhe/fixed_config.h index 5ad626527d41..63a114b9b2ed 100644 --- a/arch/arm64/kvm/hyp/include/nvhe/fixed_config.h +++ b/arch/arm64/kvm/hyp/include/nvhe/fixed_config.h @@ -163,7 +163,7 @@ ARM64_FEATURE_MASK(ID_AA64ISAR0_SHA1) | \ ARM64_FEATURE_MASK(ID_AA64ISAR0_SHA2) | \ ARM64_FEATURE_MASK(ID_AA64ISAR0_CRC32) | \ - ARM64_FEATURE_MASK(ID_AA64ISAR0_ATOMICS) | \ + ARM64_FEATURE_MASK(ID_AA64ISAR0_ATOMIC) | \ ARM64_FEATURE_MASK(ID_AA64ISAR0_RDM) | \ ARM64_FEATURE_MASK(ID_AA64ISAR0_SHA3) | \ ARM64_FEATURE_MASK(ID_AA64ISAR0_SM3) | \ -- cgit v1.2.3 From 0eda2ec48907f0ec8c283306c98f28d13e43dafd Mon Sep 17 00:00:00 2001 From: Mark Brown Date: Tue, 3 May 2022 18:02:28 +0100 Subject: arm64/sysreg: Standardise ID_AA64ISAR0_EL1 macro names The macros for accessing fields in ID_AA64ISAR0_EL1 omit the _EL1 from the name of the register. In preparation for converting this register to be automatically generated update the names to include an _EL1, there should be no functional change. Signed-off-by: Mark Brown Acked-by: Mark Rutland Link: https://lore.kernel.org/r/20220503170233.507788-8-broonie@kernel.org Signed-off-by: Catalin Marinas --- arch/arm64/include/asm/archrandom.h | 2 +- arch/arm64/include/asm/sysreg.h | 34 ++++++------- arch/arm64/kernel/cpufeature.c | 70 +++++++++++++------------- arch/arm64/kvm/hyp/include/nvhe/fixed_config.h | 28 +++++------ 4 files changed, 67 insertions(+), 67 deletions(-) (limited to 'arch') diff --git a/arch/arm64/include/asm/archrandom.h b/arch/arm64/include/asm/archrandom.h index d1bb5e71df25..3a6b6d38c5b8 100644 --- a/arch/arm64/include/asm/archrandom.h +++ b/arch/arm64/include/asm/archrandom.h @@ -142,7 +142,7 @@ static inline bool __init __early_cpu_has_rndr(void) { /* Open code as we run prior to the first call to cpufeature. */ unsigned long ftr = read_sysreg_s(SYS_ID_AA64ISAR0_EL1); - return (ftr >> ID_AA64ISAR0_RNDR_SHIFT) & 0xf; + return (ftr >> ID_AA64ISAR0_EL1_RNDR_SHIFT) & 0xf; } static inline bool __init __must_check diff --git a/arch/arm64/include/asm/sysreg.h b/arch/arm64/include/asm/sysreg.h index 0bb259ec6ee8..ae440b1ffb8e 100644 --- a/arch/arm64/include/asm/sysreg.h +++ b/arch/arm64/include/asm/sysreg.h @@ -740,23 +740,23 @@ #define MAIR_ATTRIDX(attr, idx) ((attr) << ((idx) * 8)) /* id_aa64isar0 */ -#define ID_AA64ISAR0_RNDR_SHIFT 60 -#define ID_AA64ISAR0_TLB_SHIFT 56 -#define ID_AA64ISAR0_TS_SHIFT 52 -#define ID_AA64ISAR0_FHM_SHIFT 48 -#define ID_AA64ISAR0_DP_SHIFT 44 -#define ID_AA64ISAR0_SM4_SHIFT 40 -#define ID_AA64ISAR0_SM3_SHIFT 36 -#define ID_AA64ISAR0_SHA3_SHIFT 32 -#define ID_AA64ISAR0_RDM_SHIFT 28 -#define ID_AA64ISAR0_ATOMIC_SHIFT 20 -#define ID_AA64ISAR0_CRC32_SHIFT 16 -#define ID_AA64ISAR0_SHA2_SHIFT 12 -#define ID_AA64ISAR0_SHA1_SHIFT 8 -#define ID_AA64ISAR0_AES_SHIFT 4 - -#define ID_AA64ISAR0_TLB_RANGE_NI 0x0 -#define ID_AA64ISAR0_TLB_RANGE 0x2 +#define ID_AA64ISAR0_EL1_RNDR_SHIFT 60 +#define ID_AA64ISAR0_EL1_TLB_SHIFT 56 +#define ID_AA64ISAR0_EL1_TS_SHIFT 52 +#define ID_AA64ISAR0_EL1_FHM_SHIFT 48 +#define ID_AA64ISAR0_EL1_DP_SHIFT 44 +#define ID_AA64ISAR0_EL1_SM4_SHIFT 40 +#define ID_AA64ISAR0_EL1_SM3_SHIFT 36 +#define ID_AA64ISAR0_EL1_SHA3_SHIFT 32 +#define ID_AA64ISAR0_EL1_RDM_SHIFT 28 +#define ID_AA64ISAR0_EL1_ATOMIC_SHIFT 20 +#define ID_AA64ISAR0_EL1_CRC32_SHIFT 16 +#define ID_AA64ISAR0_EL1_SHA2_SHIFT 12 +#define ID_AA64ISAR0_EL1_SHA1_SHIFT 8 +#define ID_AA64ISAR0_EL1_AES_SHIFT 4 + +#define ID_AA64ISAR0_EL1_TLB_RANGE_NI 0x0 +#define ID_AA64ISAR0_EL1_TLB_RANGE 0x2 /* id_aa64isar1 */ #define ID_AA64ISAR1_I8MM_SHIFT 52 diff --git a/arch/arm64/kernel/cpufeature.c b/arch/arm64/kernel/cpufeature.c index 18833fe6d148..01e7ae167f9f 100644 --- a/arch/arm64/kernel/cpufeature.c +++ b/arch/arm64/kernel/cpufeature.c @@ -191,20 +191,20 @@ static bool __system_matches_cap(unsigned int n); * sync with the documentation of the CPU feature register ABI. */ static const struct arm64_ftr_bits ftr_id_aa64isar0[] = { - ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64ISAR0_RNDR_SHIFT, 4, 0), - ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64ISAR0_TLB_SHIFT, 4, 0), - ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64ISAR0_TS_SHIFT, 4, 0), - ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64ISAR0_FHM_SHIFT, 4, 0), - ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64ISAR0_DP_SHIFT, 4, 0), - ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64ISAR0_SM4_SHIFT, 4, 0), - ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64ISAR0_SM3_SHIFT, 4, 0), - ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64ISAR0_SHA3_SHIFT, 4, 0), - ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64ISAR0_RDM_SHIFT, 4, 0), - ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64ISAR0_ATOMIC_SHIFT, 4, 0), - ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64ISAR0_CRC32_SHIFT, 4, 0), - ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64ISAR0_SHA2_SHIFT, 4, 0), - ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64ISAR0_SHA1_SHIFT, 4, 0), - ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64ISAR0_AES_SHIFT, 4, 0), + ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64ISAR0_EL1_RNDR_SHIFT, 4, 0), + ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64ISAR0_EL1_TLB_SHIFT, 4, 0), + ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64ISAR0_EL1_TS_SHIFT, 4, 0), + ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64ISAR0_EL1_FHM_SHIFT, 4, 0), + ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64ISAR0_EL1_DP_SHIFT, 4, 0), + ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64ISAR0_EL1_SM4_SHIFT, 4, 0), + ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64ISAR0_EL1_SM3_SHIFT, 4, 0), + ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64ISAR0_EL1_SHA3_SHIFT, 4, 0), + ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64ISAR0_EL1_RDM_SHIFT, 4, 0), + ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64ISAR0_EL1_ATOMIC_SHIFT, 4, 0), + ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64ISAR0_EL1_CRC32_SHIFT, 4, 0), + ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64ISAR0_EL1_SHA2_SHIFT, 4, 0), + ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64ISAR0_EL1_SHA1_SHIFT, 4, 0), + ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64ISAR0_EL1_AES_SHIFT, 4, 0), ARM64_FTR_END, }; @@ -2013,7 +2013,7 @@ static const struct arm64_cpu_capabilities arm64_features[] = { .type = ARM64_CPUCAP_SYSTEM_FEATURE, .matches = has_cpuid_feature, .sys_reg = SYS_ID_AA64ISAR0_EL1, - .field_pos = ID_AA64ISAR0_ATOMIC_SHIFT, + .field_pos = ID_AA64ISAR0_EL1_ATOMIC_SHIFT, .field_width = 4, .sign = FTR_UNSIGNED, .min_field_value = 2, @@ -2195,10 +2195,10 @@ static const struct arm64_cpu_capabilities arm64_features[] = { .type = ARM64_CPUCAP_SYSTEM_FEATURE, .matches = has_cpuid_feature, .sys_reg = SYS_ID_AA64ISAR0_EL1, - .field_pos = ID_AA64ISAR0_TLB_SHIFT, + .field_pos = ID_AA64ISAR0_EL1_TLB_SHIFT, .field_width = 4, .sign = FTR_UNSIGNED, - .min_field_value = ID_AA64ISAR0_TLB_RANGE, + .min_field_value = ID_AA64ISAR0_EL1_TLB_RANGE, }, #ifdef CONFIG_ARM64_HW_AFDBM { @@ -2227,7 +2227,7 @@ static const struct arm64_cpu_capabilities arm64_features[] = { .type = ARM64_CPUCAP_SYSTEM_FEATURE, .matches = has_cpuid_feature, .sys_reg = SYS_ID_AA64ISAR0_EL1, - .field_pos = ID_AA64ISAR0_CRC32_SHIFT, + .field_pos = ID_AA64ISAR0_EL1_CRC32_SHIFT, .field_width = 4, .min_field_value = 1, }, @@ -2382,7 +2382,7 @@ static const struct arm64_cpu_capabilities arm64_features[] = { .type = ARM64_CPUCAP_SYSTEM_FEATURE, .matches = has_cpuid_feature, .sys_reg = SYS_ID_AA64ISAR0_EL1, - .field_pos = ID_AA64ISAR0_RNDR_SHIFT, + .field_pos = ID_AA64ISAR0_EL1_RNDR_SHIFT, .field_width = 4, .sign = FTR_UNSIGNED, .min_field_value = 1, @@ -2514,22 +2514,22 @@ static const struct arm64_cpu_capabilities ptr_auth_hwcap_gen_matches[] = { #endif static const struct arm64_cpu_capabilities arm64_elf_hwcaps[] = { - HWCAP_CAP(SYS_ID_AA64ISAR0_EL1, ID_AA64ISAR0_AES_SHIFT, 4, FTR_UNSIGNED, 2, CAP_HWCAP, KERNEL_HWCAP_PMULL), - HWCAP_CAP(SYS_ID_AA64ISAR0_EL1, ID_AA64ISAR0_AES_SHIFT, 4, FTR_UNSIGNED, 1, CAP_HWCAP, KERNEL_HWCAP_AES), - HWCAP_CAP(SYS_ID_AA64ISAR0_EL1, ID_AA64ISAR0_SHA1_SHIFT, 4, FTR_UNSIGNED, 1, CAP_HWCAP, KERNEL_HWCAP_SHA1), - HWCAP_CAP(SYS_ID_AA64ISAR0_EL1, ID_AA64ISAR0_SHA2_SHIFT, 4, FTR_UNSIGNED, 1, CAP_HWCAP, KERNEL_HWCAP_SHA2), - HWCAP_CAP(SYS_ID_AA64ISAR0_EL1, ID_AA64ISAR0_SHA2_SHIFT, 4, FTR_UNSIGNED, 2, CAP_HWCAP, KERNEL_HWCAP_SHA512), - HWCAP_CAP(SYS_ID_AA64ISAR0_EL1, ID_AA64ISAR0_CRC32_SHIFT, 4, FTR_UNSIGNED, 1, CAP_HWCAP, KERNEL_HWCAP_CRC32), - HWCAP_CAP(SYS_ID_AA64ISAR0_EL1, ID_AA64ISAR0_ATOMIC_SHIFT, 4, FTR_UNSIGNED, 2, CAP_HWCAP, KERNEL_HWCAP_ATOMICS), - HWCAP_CAP(SYS_ID_AA64ISAR0_EL1, ID_AA64ISAR0_RDM_SHIFT, 4, FTR_UNSIGNED, 1, CAP_HWCAP, KERNEL_HWCAP_ASIMDRDM), - HWCAP_CAP(SYS_ID_AA64ISAR0_EL1, ID_AA64ISAR0_SHA3_SHIFT, 4, FTR_UNSIGNED, 1, CAP_HWCAP, KERNEL_HWCAP_SHA3), - HWCAP_CAP(SYS_ID_AA64ISAR0_EL1, ID_AA64ISAR0_SM3_SHIFT, 4, FTR_UNSIGNED, 1, CAP_HWCAP, KERNEL_HWCAP_SM3), - HWCAP_CAP(SYS_ID_AA64ISAR0_EL1, ID_AA64ISAR0_SM4_SHIFT, 4, FTR_UNSIGNED, 1, CAP_HWCAP, KERNEL_HWCAP_SM4), - HWCAP_CAP(SYS_ID_AA64ISAR0_EL1, ID_AA64ISAR0_DP_SHIFT, 4, FTR_UNSIGNED, 1, CAP_HWCAP, KERNEL_HWCAP_ASIMDDP), - HWCAP_CAP(SYS_ID_AA64ISAR0_EL1, ID_AA64ISAR0_FHM_SHIFT, 4, FTR_UNSIGNED, 1, CAP_HWCAP, KERNEL_HWCAP_ASIMDFHM), - HWCAP_CAP(SYS_ID_AA64ISAR0_EL1, ID_AA64ISAR0_TS_SHIFT, 4, FTR_UNSIGNED, 1, CAP_HWCAP, KERNEL_HWCAP_FLAGM), - HWCAP_CAP(SYS_ID_AA64ISAR0_EL1, ID_AA64ISAR0_TS_SHIFT, 4, FTR_UNSIGNED, 2, CAP_HWCAP, KERNEL_HWCAP_FLAGM2), - HWCAP_CAP(SYS_ID_AA64ISAR0_EL1, ID_AA64ISAR0_RNDR_SHIFT, 4, FTR_UNSIGNED, 1, CAP_HWCAP, KERNEL_HWCAP_RNG), + HWCAP_CAP(SYS_ID_AA64ISAR0_EL1, ID_AA64ISAR0_EL1_AES_SHIFT, 4, FTR_UNSIGNED, 2, CAP_HWCAP, KERNEL_HWCAP_PMULL), + HWCAP_CAP(SYS_ID_AA64ISAR0_EL1, ID_AA64ISAR0_EL1_AES_SHIFT, 4, FTR_UNSIGNED, 1, CAP_HWCAP, KERNEL_HWCAP_AES), + HWCAP_CAP(SYS_ID_AA64ISAR0_EL1, ID_AA64ISAR0_EL1_SHA1_SHIFT, 4, FTR_UNSIGNED, 1, CAP_HWCAP, KERNEL_HWCAP_SHA1), + HWCAP_CAP(SYS_ID_AA64ISAR0_EL1, ID_AA64ISAR0_EL1_SHA2_SHIFT, 4, FTR_UNSIGNED, 1, CAP_HWCAP, KERNEL_HWCAP_SHA2), + HWCAP_CAP(SYS_ID_AA64ISAR0_EL1, ID_AA64ISAR0_EL1_SHA2_SHIFT, 4, FTR_UNSIGNED, 2, CAP_HWCAP, KERNEL_HWCAP_SHA512), + HWCAP_CAP(SYS_ID_AA64ISAR0_EL1, ID_AA64ISAR0_EL1_CRC32_SHIFT, 4, FTR_UNSIGNED, 1, CAP_HWCAP, KERNEL_HWCAP_CRC32), + HWCAP_CAP(SYS_ID_AA64ISAR0_EL1, ID_AA64ISAR0_EL1_ATOMIC_SHIFT, 4, FTR_UNSIGNED, 2, CAP_HWCAP, KERNEL_HWCAP_ATOMICS), + HWCAP_CAP(SYS_ID_AA64ISAR0_EL1, ID_AA64ISAR0_EL1_RDM_SHIFT, 4, FTR_UNSIGNED, 1, CAP_HWCAP, KERNEL_HWCAP_ASIMDRDM), + HWCAP_CAP(SYS_ID_AA64ISAR0_EL1, ID_AA64ISAR0_EL1_SHA3_SHIFT, 4, FTR_UNSIGNED, 1, CAP_HWCAP, KERNEL_HWCAP_SHA3), + HWCAP_CAP(SYS_ID_AA64ISAR0_EL1, ID_AA64ISAR0_EL1_SM3_SHIFT, 4, FTR_UNSIGNED, 1, CAP_HWCAP, KERNEL_HWCAP_SM3), + HWCAP_CAP(SYS_ID_AA64ISAR0_EL1, ID_AA64ISAR0_EL1_SM4_SHIFT, 4, FTR_UNSIGNED, 1, CAP_HWCAP, KERNEL_HWCAP_SM4), + HWCAP_CAP(SYS_ID_AA64ISAR0_EL1, ID_AA64ISAR0_EL1_DP_SHIFT, 4, FTR_UNSIGNED, 1, CAP_HWCAP, KERNEL_HWCAP_ASIMDDP), + HWCAP_CAP(SYS_ID_AA64ISAR0_EL1, ID_AA64ISAR0_EL1_FHM_SHIFT, 4, FTR_UNSIGNED, 1, CAP_HWCAP, KERNEL_HWCAP_ASIMDFHM), + HWCAP_CAP(SYS_ID_AA64ISAR0_EL1, ID_AA64ISAR0_EL1_TS_SHIFT, 4, FTR_UNSIGNED, 1, CAP_HWCAP, KERNEL_HWCAP_FLAGM), + HWCAP_CAP(SYS_ID_AA64ISAR0_EL1, ID_AA64ISAR0_EL1_TS_SHIFT, 4, FTR_UNSIGNED, 2, CAP_HWCAP, KERNEL_HWCAP_FLAGM2), + HWCAP_CAP(SYS_ID_AA64ISAR0_EL1, ID_AA64ISAR0_EL1_RNDR_SHIFT, 4, FTR_UNSIGNED, 1, CAP_HWCAP, KERNEL_HWCAP_RNG), HWCAP_CAP(SYS_ID_AA64PFR0_EL1, ID_AA64PFR0_FP_SHIFT, 4, FTR_SIGNED, 0, CAP_HWCAP, KERNEL_HWCAP_FP), HWCAP_CAP(SYS_ID_AA64PFR0_EL1, ID_AA64PFR0_FP_SHIFT, 4, FTR_SIGNED, 1, CAP_HWCAP, KERNEL_HWCAP_FPHP), HWCAP_CAP(SYS_ID_AA64PFR0_EL1, ID_AA64PFR0_ASIMD_SHIFT, 4, FTR_SIGNED, 0, CAP_HWCAP, KERNEL_HWCAP_ASIMD), diff --git a/arch/arm64/kvm/hyp/include/nvhe/fixed_config.h b/arch/arm64/kvm/hyp/include/nvhe/fixed_config.h index 63a114b9b2ed..fd55014b3497 100644 --- a/arch/arm64/kvm/hyp/include/nvhe/fixed_config.h +++ b/arch/arm64/kvm/hyp/include/nvhe/fixed_config.h @@ -159,20 +159,20 @@ * No restrictions on instructions implemented in AArch64. */ #define PVM_ID_AA64ISAR0_ALLOW (\ - ARM64_FEATURE_MASK(ID_AA64ISAR0_AES) | \ - ARM64_FEATURE_MASK(ID_AA64ISAR0_SHA1) | \ - ARM64_FEATURE_MASK(ID_AA64ISAR0_SHA2) | \ - ARM64_FEATURE_MASK(ID_AA64ISAR0_CRC32) | \ - ARM64_FEATURE_MASK(ID_AA64ISAR0_ATOMIC) | \ - ARM64_FEATURE_MASK(ID_AA64ISAR0_RDM) | \ - ARM64_FEATURE_MASK(ID_AA64ISAR0_SHA3) | \ - ARM64_FEATURE_MASK(ID_AA64ISAR0_SM3) | \ - ARM64_FEATURE_MASK(ID_AA64ISAR0_SM4) | \ - ARM64_FEATURE_MASK(ID_AA64ISAR0_DP) | \ - ARM64_FEATURE_MASK(ID_AA64ISAR0_FHM) | \ - ARM64_FEATURE_MASK(ID_AA64ISAR0_TS) | \ - ARM64_FEATURE_MASK(ID_AA64ISAR0_TLB) | \ - ARM64_FEATURE_MASK(ID_AA64ISAR0_RNDR) \ + ARM64_FEATURE_MASK(ID_AA64ISAR0_EL1_AES) | \ + ARM64_FEATURE_MASK(ID_AA64ISAR0_EL1_SHA1) | \ + ARM64_FEATURE_MASK(ID_AA64ISAR0_EL1_SHA2) | \ + ARM64_FEATURE_MASK(ID_AA64ISAR0_EL1_CRC32) | \ + ARM64_FEATURE_MASK(ID_AA64ISAR0_EL1_ATOMIC) | \ + ARM64_FEATURE_MASK(ID_AA64ISAR0_EL1_RDM) | \ + ARM64_FEATURE_MASK(ID_AA64ISAR0_EL1_SHA3) | \ + ARM64_FEATURE_MASK(ID_AA64ISAR0_EL1_SM3) | \ + ARM64_FEATURE_MASK(ID_AA64ISAR0_EL1_SM4) | \ + ARM64_FEATURE_MASK(ID_AA64ISAR0_EL1_DP) | \ + ARM64_FEATURE_MASK(ID_AA64ISAR0_EL1_FHM) | \ + ARM64_FEATURE_MASK(ID_AA64ISAR0_EL1_TS) | \ + ARM64_FEATURE_MASK(ID_AA64ISAR0_EL1_TLB) | \ + ARM64_FEATURE_MASK(ID_AA64ISAR0_EL1_RNDR) \ ) #define PVM_ID_AA64ISAR1_ALLOW (\ -- cgit v1.2.3 From 66847e0618d7dcaf34d9626e2b1f699fc05a2fef Mon Sep 17 00:00:00 2001 From: Mark Rutland Date: Tue, 3 May 2022 18:02:29 +0100 Subject: arm64: Add sysreg header generation scripting The arm64 kernel requires some metadata for each system register it may need to access. Currently we have: * A SYS_ definition which sorresponds to a sys_reg() macro. This is used both to look up a sysreg by encoding (e.g. in KVM), and also to generate code to access a sysreg where the assembler is unaware of the specific sysreg encoding. Where assemblers support the S3__C_C_ syntax for system registers, we could use this rather than manually assembling the instructions. However, we don't have consistent definitions for these and we currently still need to handle toolchains that lack this feature. * A set of __SHIFT and __MASK definitions, which can be used to extract fields from the register, or to construct a register from a set of fields. These do not follow the convention used by , and the masks are not shifted into place, preventing their use in FIELD_PREP() and FIELD_GET(). We require the SHIFT definitions for inline assembly (and WIDTH definitions would be helpful for UBFX/SBFX), so we cannot only define a shifted MASK. Defining a SHIFT, WIDTH, shifted MASK and unshifted MASK is tedious and error-prone and life is much easier when they can be relied up to exist when writing code. * A set of __ definitions for each enumerated value a field may hold. These are used when identifying the presence of features. Atop of this, other code has to build up metadata at runtime (e.g. the sets of RES0/RES1 bits in a register). This patch adds scripting so that we can have an easier-to-manage canonical representation of this metadata, from which we can generate all the definitions necessary for various use-cases, e.g. | #define REG_ID_AA64ISAR0_EL1 S3_0_C0_C6_0 | #define SYS_ID_AA64ISAR0_EL1 sys_reg(3, 0, 0, 6, 0) | #define SYS_ID_AA64ISAR0_EL1_Op0 3 | #define SYS_ID_AA64ISAR0_EL1_Op1 0 | #define SYS_ID_AA64ISAR0_EL1_CRn 0 | #define SYS_ID_AA64ISAR0_EL1_CRm 6 | #define SYS_ID_AA64ISAR0_EL1_Op2 0 | #define ID_AA64ISAR0_EL1_RNDR GENMASK(63, 60) | #define ID_AA64ISAR0_EL1_RNDR_MASK GENMASK(63, 60) | #define ID_AA64ISAR0_EL1_RNDR_SHIFT 60 | #define ID_AA64ISAR0_EL1_RNDR_WIDTH 4 | #define ID_AA64ISAR0_EL1_RNDR_NI UL(0b0000) | #define ID_AA64ISAR0_EL1_RNDR_IMP UL(0b0001) The script requires that all bits in the register be specified and that there be no overlapping fields. This helps the script spot errors in the input but means that the few registers which change layout at runtime depending on things like virtualisation settings will need some manual handling. No actual register conversions are done here but a header for the register data with some documention of the format is provided. For cases where multiple registers share a layout (eg, when identical controls are provided at multiple ELs) the register fields can be defined once and referenced from the actual registers, currently we do not generate actual defines for the individual registers. At the moment this is only intended to express metadata from the architecture, and does not handle policy imposed by the kernel, such as values exposed to userspace or VMs. In future this could be extended to express such information. This script was mostly written by Mark Rutland but has been extended by Mark Brown to improve validation of input and better integrate with the kernel. Signed-off-by: Mark Rutland Co-Developed-by: Mark Brown Signed-off-by: Mark Brown Link: https://lore.kernel.org/r/20220503170233.507788-9-broonie@kernel.org Signed-off-by: Catalin Marinas --- arch/arm64/tools/gen-sysreg.awk | 261 ++++++++++++++++++++++++++++++++++++++++ arch/arm64/tools/sysreg | 48 ++++++++ 2 files changed, 309 insertions(+) create mode 100755 arch/arm64/tools/gen-sysreg.awk create mode 100644 arch/arm64/tools/sysreg (limited to 'arch') diff --git a/arch/arm64/tools/gen-sysreg.awk b/arch/arm64/tools/gen-sysreg.awk new file mode 100755 index 000000000000..3ffd77cbb499 --- /dev/null +++ b/arch/arm64/tools/gen-sysreg.awk @@ -0,0 +1,261 @@ +#!/bin/awk -f +# SPDX-License-Identifier: GPL-2.0 +# gen-sysreg.awk: arm64 sysreg header generator +# +# Usage: awk -f gen-sysreg.awk sysregs.txt + +# Log an error and terminate +function fatal(msg) { + print "Error at " NR ": " msg > "/dev/stderr" + exit 1 +} + +# Sanity check that the start or end of a block makes sense at this point in +# the file. If not, produce an error and terminate. +# +# @this - the $Block or $EndBlock +# @prev - the only valid block to already be in (value of @block) +# @new - the new value of @block +function change_block(this, prev, new) { + if (block != prev) + fatal("unexpected " this " (inside " block ")") + + block = new +} + +# Sanity check the number of records for a field makes sense. If not, produce +# an error and terminate. +function expect_fields(nf) { + if (NF != nf) + fatal(NF " fields found where " nf " expected") +} + +# Print a CPP macro definition, padded with spaces so that the macro bodies +# line up in a column +function define(name, val) { + printf "%-48s%s\n", "#define " name, val +} + +# Print standard BITMASK/SHIFT/WIDTH CPP definitions for a field +function define_field(reg, field, msb, lsb) { + define(reg "_" field, "GENMASK(" msb ", " lsb ")") + define(reg "_" field "_MASK", "GENMASK(" msb ", " lsb ")") + define(reg "_" field "_SHIFT", lsb) + define(reg "_" field "_WIDTH", msb - lsb + 1) +} + +# Parse a "[:]" string into the global variables @msb and @lsb +function parse_bitdef(reg, field, bitdef, _bits) +{ + if (bitdef ~ /^[0-9]+$/) { + msb = bitdef + lsb = bitdef + } else if (split(bitdef, _bits, ":") == 2) { + msb = _bits[1] + lsb = _bits[2] + } else { + fatal("invalid bit-range definition '" bitdef "'") + } + + + if (msb != next_bit) + fatal(reg "." field " starts at " msb " not " next_bit) + if (63 < msb || msb < 0) + fatal(reg "." field " invalid high bit in '" bitdef "'") + if (63 < lsb || lsb < 0) + fatal(reg "." field " invalid low bit in '" bitdef "'") + if (msb < lsb) + fatal(reg "." field " invalid bit-range '" bitdef "'") + if (low > high) + fatal(reg "." field " has invalid range " high "-" low) + + next_bit = lsb - 1 +} + +BEGIN { + print "#ifndef __ASM_SYSREG_DEFS_H" + print "#define __ASM_SYSREG_DEFS_H" + print "" + print "/* Generated file - do not edit */" + + block = "None" +} + +END { + print "#endif /* __ASM_SYSREG_DEFS_H */" +} + +# skip blank lines and comment lines +/^$/ { next } +/^#/ { next } + +/^SysregFields/ { + change_block("SysregFields", "None", "SysregFields") + expect_fields(2) + + reg = $2 + + res0 = "UL(0)" + res1 = "UL(0)" + + print "" + + next_bit = 63 + + next +} + +/^EndSysregFields/ { + if (next_bit > 0) + fatal("Unspecified bits in " reg) + + change_block("EndSysregFields", "SysregFields", "None") + + define(reg "_RES0", "(" res0 ")") + define(reg "_RES1", "(" res1 ")") + print "" + + reg = null + res0 = null + res1 = null + + next +} + +/^Sysreg/ { + change_block("Sysreg", "None", "Sysreg") + expect_fields(7) + + reg = $2 + op0 = $3 + op1 = $4 + crn = $5 + crm = $6 + op2 = $7 + + res0 = "UL(0)" + res1 = "UL(0)" + + define("REG_" reg, "S" op0 "_" op1 "_C" crn "_C" crm "_" op2) + define("SYS_" reg, "sys_reg(" op0 ", " op1 ", " crn ", " crm ", " op2 ")") + + define("SYS_" reg "_Op0", op0) + define("SYS_" reg "_Op1", op1) + define("SYS_" reg "_CRn", crn) + define("SYS_" reg "_CRm", crm) + define("SYS_" reg "_Op2", op2) + + print "" + + next_bit = 63 + + next +} + +/^EndSysreg/ { + if (next_bit > 0) + fatal("Unspecified bits in " reg) + + change_block("EndSysreg", "Sysreg", "None") + + if (res0 != null) + define(reg "_RES0", "(" res0 ")") + if (res1 != null) + define(reg "_RES1", "(" res1 ")") + print "" + + reg = null + op0 = null + op1 = null + crn = null + crm = null + op2 = null + res0 = null + res1 = null + + next +} + +# Currently this is effectivey a comment, in future we may want to emit +# defines for the fields. +/^Fields/ && (block == "Sysreg") { + expect_fields(2) + + if (next_bit != 63) + fatal("Some fields already defined for " reg) + + print "/* See " $2 " */" + print "" + + next_bit = 0 + res0 = null + res1 = null + + next +} + + +/^Res0/ && (block == "Sysreg" || block == "SysregFields") { + expect_fields(2) + parse_bitdef(reg, "RES0", $2) + field = "RES0_" msb "_" lsb + + res0 = res0 " | GENMASK_ULL(" msb ", " lsb ")" + + next +} + +/^Res1/ && (block == "Sysreg" || block == "SysregFields") { + expect_fields(2) + parse_bitdef(reg, "RES1", $2) + field = "RES1_" msb "_" lsb + + res1 = res1 " | GENMASK_ULL(" msb ", " lsb ")" + + next +} + +/^Field/ && (block == "Sysreg" || block == "SysregFields") { + expect_fields(3) + field = $3 + parse_bitdef(reg, field, $2) + + define_field(reg, field, msb, lsb) + print "" + + next +} + +/^Enum/ { + change_block("Enum", "Sysreg", "Enum") + expect_fields(3) + field = $3 + parse_bitdef(reg, field, $2) + + define_field(reg, field, msb, lsb) + + next +} + +/^EndEnum/ { + change_block("EndEnum", "Enum", "Sysreg") + field = null + msb = null + lsb = null + print "" + next +} + +/0b[01]+/ && block = "Enum" { + expect_fields(2) + val = $1 + name = $2 + + define(reg "_" field "_" name, "UL(" val ")") + next +} + +# Any lines not handled by previous rules are unexpected +{ + fatal("unhandled statement") +} diff --git a/arch/arm64/tools/sysreg b/arch/arm64/tools/sysreg new file mode 100644 index 000000000000..8e39c718c1b8 --- /dev/null +++ b/arch/arm64/tools/sysreg @@ -0,0 +1,48 @@ +# SPDX-License-Identifier: GPL-2.0-only +# +# System register metadata + +# Each System register is described by a Sysreg block: + +# Sysreg +# +# ... +# EndSysreg + +# Within a Sysreg block, each field can be described as one of: + +# Res0 [:] + +# Res1 [:] + +# Field [:] + +# Enum [:] +# +# ... +# EndEnum + +# Alternatively if multiple registers share the same layout then +# a SysregFields block can be used to describe the shared layout + +# SysregFields +# +# ... +# EndSysregFields + +# and referenced from within the Sysreg: + +# Sysreg +# Fields +# EndSysreg + +# For ID registers we adopt a few conventions for translating the +# language in the ARM into defines: +# +# NI - Not implemented +# IMP - Implemented +# +# In general it is recommended that new enumeration items be named for the +# feature that introduces them (eg, FEAT_LS64_ACCDATA introduces enumeration +# item ACCDATA) though it may be more taseful to do something else. + -- cgit v1.2.3 From c07d8017bceb82cbe5fedd129d072c59a53f5513 Mon Sep 17 00:00:00 2001 From: Mark Brown Date: Tue, 3 May 2022 18:02:30 +0100 Subject: arm64/sysreg: Enable automatic generation of system register definitions Now that we have a script for generating system registers hook it up to the build system similarly to cpucaps. Since we don't currently have any actual register information in the input file this should produce no change in the built kernel. For ease of review the register information will be converted in separate patches. Signed-off-by: Mark Brown Acked-by: Mark Rutland Link: https://lore.kernel.org/r/20220503170233.507788-10-broonie@kernel.org Signed-off-by: Catalin Marinas --- arch/arm64/include/asm/Kbuild | 1 + arch/arm64/include/asm/sysreg.h | 8 ++++++++ arch/arm64/tools/Makefile | 8 +++++++- 3 files changed, 16 insertions(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/arm64/include/asm/Kbuild b/arch/arm64/include/asm/Kbuild index 345fe98605ba..5c8ee5a541d2 100644 --- a/arch/arm64/include/asm/Kbuild +++ b/arch/arm64/include/asm/Kbuild @@ -7,3 +7,4 @@ generic-y += parport.h generic-y += user.h generated-y += cpucaps.h +generated-y += sysreg-defs.h diff --git a/arch/arm64/include/asm/sysreg.h b/arch/arm64/include/asm/sysreg.h index ae440b1ffb8e..db07a01776d8 100644 --- a/arch/arm64/include/asm/sysreg.h +++ b/arch/arm64/include/asm/sysreg.h @@ -114,6 +114,14 @@ #define SYS_DC_CSW sys_insn(1, 0, 7, 10, 2) #define SYS_DC_CISW sys_insn(1, 0, 7, 14, 2) +/* + * Automatically generated definitions for system registers, the + * manual encodings below are in the process of being converted to + * come from here. The header relies on the definition of sys_reg() + * earlier in this file. + */ +#include "asm/sysreg-defs.h" + /* * System registers, organised loosely by encoding but grouped together * where the architected name contains an index. e.g. ID_MMFR_EL1. diff --git a/arch/arm64/tools/Makefile b/arch/arm64/tools/Makefile index cf1307188150..07a93ab21a62 100644 --- a/arch/arm64/tools/Makefile +++ b/arch/arm64/tools/Makefile @@ -3,7 +3,7 @@ gen := arch/$(ARCH)/include/generated kapi := $(gen)/asm -kapi-hdrs-y := $(kapi)/cpucaps.h +kapi-hdrs-y := $(kapi)/cpucaps.h $(kapi)/sysreg-defs.h targets += $(addprefix ../../../, $(kapi-hdrs-y)) @@ -14,5 +14,11 @@ kapi: $(kapi-hdrs-y) quiet_cmd_gen_cpucaps = GEN $@ cmd_gen_cpucaps = mkdir -p $(dir $@); $(AWK) -f $(real-prereqs) > $@ +quiet_cmd_gen_sysreg = GEN $@ + cmd_gen_sysreg = mkdir -p $(dir $@); $(AWK) -f $(real-prereqs) > $@ + $(kapi)/cpucaps.h: $(src)/gen-cpucaps.awk $(src)/cpucaps FORCE $(call if_changed,gen_cpucaps) + +$(kapi)/sysreg-defs.h: $(src)/gen-sysreg.awk $(src)/sysreg FORCE + $(call if_changed,gen_sysreg) -- cgit v1.2.3 From e33bb6461cd6ca0e0dad8392f0e3ee0179871e7a Mon Sep 17 00:00:00 2001 From: Mark Brown Date: Tue, 3 May 2022 18:02:31 +0100 Subject: arm64/sysreg: Generate definitions for ID_AA64ISAR0_EL1 Remove the manual definitions for ID_AA64ISAR0_EL1 in favour of automatic generation. There should be no functional change. The only notable change is that 27:24 TME is defined rather than RES0 reflecting DDI0487H.a. Signed-off-by: Mark Brown Acked-by: Mark Rutland Link: https://lore.kernel.org/r/20220503170233.507788-11-broonie@kernel.org Signed-off-by: Catalin Marinas --- arch/arm64/include/asm/sysreg.h | 20 ------------ arch/arm64/tools/sysreg | 67 +++++++++++++++++++++++++++++++++++++++++ 2 files changed, 67 insertions(+), 20 deletions(-) (limited to 'arch') diff --git a/arch/arm64/include/asm/sysreg.h b/arch/arm64/include/asm/sysreg.h index db07a01776d8..f5e02f27a5c9 100644 --- a/arch/arm64/include/asm/sysreg.h +++ b/arch/arm64/include/asm/sysreg.h @@ -196,7 +196,6 @@ #define SYS_ID_AA64AFR0_EL1 sys_reg(3, 0, 0, 5, 4) #define SYS_ID_AA64AFR1_EL1 sys_reg(3, 0, 0, 5, 5) -#define SYS_ID_AA64ISAR0_EL1 sys_reg(3, 0, 0, 6, 0) #define SYS_ID_AA64ISAR1_EL1 sys_reg(3, 0, 0, 6, 1) #define SYS_ID_AA64ISAR2_EL1 sys_reg(3, 0, 0, 6, 2) @@ -747,25 +746,6 @@ /* Position the attr at the correct index */ #define MAIR_ATTRIDX(attr, idx) ((attr) << ((idx) * 8)) -/* id_aa64isar0 */ -#define ID_AA64ISAR0_EL1_RNDR_SHIFT 60 -#define ID_AA64ISAR0_EL1_TLB_SHIFT 56 -#define ID_AA64ISAR0_EL1_TS_SHIFT 52 -#define ID_AA64ISAR0_EL1_FHM_SHIFT 48 -#define ID_AA64ISAR0_EL1_DP_SHIFT 44 -#define ID_AA64ISAR0_EL1_SM4_SHIFT 40 -#define ID_AA64ISAR0_EL1_SM3_SHIFT 36 -#define ID_AA64ISAR0_EL1_SHA3_SHIFT 32 -#define ID_AA64ISAR0_EL1_RDM_SHIFT 28 -#define ID_AA64ISAR0_EL1_ATOMIC_SHIFT 20 -#define ID_AA64ISAR0_EL1_CRC32_SHIFT 16 -#define ID_AA64ISAR0_EL1_SHA2_SHIFT 12 -#define ID_AA64ISAR0_EL1_SHA1_SHIFT 8 -#define ID_AA64ISAR0_EL1_AES_SHIFT 4 - -#define ID_AA64ISAR0_EL1_TLB_RANGE_NI 0x0 -#define ID_AA64ISAR0_EL1_TLB_RANGE 0x2 - /* id_aa64isar1 */ #define ID_AA64ISAR1_I8MM_SHIFT 52 #define ID_AA64ISAR1_DGH_SHIFT 48 diff --git a/arch/arm64/tools/sysreg b/arch/arm64/tools/sysreg index 8e39c718c1b8..4d8991574437 100644 --- a/arch/arm64/tools/sysreg +++ b/arch/arm64/tools/sysreg @@ -46,3 +46,70 @@ # feature that introduces them (eg, FEAT_LS64_ACCDATA introduces enumeration # item ACCDATA) though it may be more taseful to do something else. +Sysreg ID_AA64ISAR0_EL1 3 0 0 6 0 +Enum 63:60 RNDR + 0b0000 NI + 0b0001 IMP +EndEnum +Enum 59:56 TLB + 0b0000 NI + 0b0001 OS + 0b0010 RANGE +EndEnum +Enum 55:52 TS + 0b0000 NI + 0b0001 FLAGM + 0b0010 FLAGM2 +EndEnum +Enum 51:48 FHM + 0b0000 NI + 0b0001 IMP +EndEnum +Enum 47:44 DP + 0b0000 NI + 0b0001 IMP +EndEnum +Enum 43:40 SM4 + 0b0000 NI + 0b0001 IMP +EndEnum +Enum 39:36 SM3 + 0b0000 NI + 0b0001 IMP +EndEnum +Enum 35:32 SHA3 + 0b0000 NI + 0b0001 IMP +EndEnum +Enum 31:28 RDM + 0b0000 NI + 0b0001 IMP +EndEnum +Enum 27:24 TME + 0b0000 NI + 0b0001 IMP +EndEnum +Enum 23:20 ATOMIC + 0b0000 NI + 0b0010 IMP +EndEnum +Enum 19:16 CRC32 + 0b0000 NI + 0b0001 IMP +EndEnum +Enum 15:12 SHA2 + 0b0000 NI + 0b0001 SHA256 + 0b0010 SHA512 +EndEnum +Enum 11:8 SHA1 + 0b0000 NI + 0b0001 IMP +EndEnum +Enum 7:4 AES + 0b0000 NI + 0b0001 AES + 0b0010 PMULL +EndEnum +Res0 3:0 +EndSysreg -- cgit v1.2.3 From 41fde735062d8dbf7ebf27b278ac567eaf8d9255 Mon Sep 17 00:00:00 2001 From: Mark Brown Date: Tue, 3 May 2022 18:02:32 +0100 Subject: arm64/sysreg: Generate definitions for TTBRn_EL1 Automatically generate definitions for accessing the TTBRn_EL1 registers, no functional change. Signed-off-by: Mark Brown Acked-by: Mark Rutland Link: https://lore.kernel.org/r/20220503170233.507788-12-broonie@kernel.org Signed-off-by: Catalin Marinas --- arch/arm64/include/asm/sysreg.h | 2 -- arch/arm64/tools/sysreg | 14 ++++++++++++++ 2 files changed, 14 insertions(+), 2 deletions(-) (limited to 'arch') diff --git a/arch/arm64/include/asm/sysreg.h b/arch/arm64/include/asm/sysreg.h index f5e02f27a5c9..c61bda1db2c8 100644 --- a/arch/arm64/include/asm/sysreg.h +++ b/arch/arm64/include/asm/sysreg.h @@ -212,8 +212,6 @@ #define SYS_ZCR_EL1 sys_reg(3, 0, 1, 2, 0) #define SYS_TRFCR_EL1 sys_reg(3, 0, 1, 2, 1) -#define SYS_TTBR0_EL1 sys_reg(3, 0, 2, 0, 0) -#define SYS_TTBR1_EL1 sys_reg(3, 0, 2, 0, 1) #define SYS_TCR_EL1 sys_reg(3, 0, 2, 0, 2) #define SYS_APIAKEYLO_EL1 sys_reg(3, 0, 2, 1, 0) diff --git a/arch/arm64/tools/sysreg b/arch/arm64/tools/sysreg index 4d8991574437..e77354847a64 100644 --- a/arch/arm64/tools/sysreg +++ b/arch/arm64/tools/sysreg @@ -113,3 +113,17 @@ Enum 7:4 AES EndEnum Res0 3:0 EndSysreg + +SysregFields TTBRx_EL1 +Field 63:48 ASID +Field 47:1 BADDR +Field 0 CnP +EndSysregFields + +Sysreg TTBR0_EL1 3 0 2 0 0 +Fields TTBRx_EL1 +EndSysreg + +Sysreg TTBR1_EL1 3 0 2 0 1 +Fields TTBRx_EL1 +EndSysreg -- cgit v1.2.3 From 5028fbad2d57910e8c776ba1c868da0e4f64978f Mon Sep 17 00:00:00 2001 From: Hector Martin Date: Mon, 2 May 2022 18:14:27 +0900 Subject: arm64: Set ARCH_NR_GPIO to 2048 for ARCH_APPLE We're already running into the 512 GPIO limit on t600[01] depending on how many SMC GPIOs we allocate, and a 2-die version could double that. Let's make it 2K to be safe for now. Signed-off-by: Hector Martin Acked-by: Marc Zyngier Link: https://lore.kernel.org/r/20220502091427.28416-1-marcan@marcan.st Signed-off-by: Catalin Marinas --- arch/arm64/Kconfig | 12 ++++++++++++ 1 file changed, 12 insertions(+) (limited to 'arch') diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig index 57c4c995965f..764433588fdd 100644 --- a/arch/arm64/Kconfig +++ b/arch/arm64/Kconfig @@ -2041,6 +2041,18 @@ config STACKPROTECTOR_PER_TASK def_bool y depends on STACKPROTECTOR && CC_HAVE_STACKPROTECTOR_SYSREG +# The GPIO number here must be sorted by descending number. In case of +# a multiplatform kernel, we just want the highest value required by the +# selected platforms. +config ARCH_NR_GPIO + int + default 2048 if ARCH_APPLE + default 0 + help + Maximum number of GPIOs in the system. + + If unsure, leave the default value. + endmenu menu "Boot options" -- cgit v1.2.3 From 7a41a97b65ea7c4e0458b11e7a2c71c6dd3be0c4 Mon Sep 17 00:00:00 2001 From: Mark Brown Date: Tue, 3 May 2022 18:02:33 +0100 Subject: arm64/sysreg: Generate definitions for SCTLR_EL1 Automatically generate register definitions for SCTLR_EL1. No functional change. Signed-off-by: Mark Brown Link: https://lore.kernel.org/r/20220503170233.507788-13-broonie@kernel.org [catalin.marinas@arm.com: fix the SCTLR_EL1 encoding] Signed-off-by: Catalin Marinas --- arch/arm64/include/asm/sysreg.h | 38 ---------------------- arch/arm64/tools/sysreg | 71 +++++++++++++++++++++++++++++++++++++++++ 2 files changed, 71 insertions(+), 38 deletions(-) (limited to 'arch') diff --git a/arch/arm64/include/asm/sysreg.h b/arch/arm64/include/asm/sysreg.h index c61bda1db2c8..a6b8b0deadfb 100644 --- a/arch/arm64/include/asm/sysreg.h +++ b/arch/arm64/include/asm/sysreg.h @@ -203,7 +203,6 @@ #define SYS_ID_AA64MMFR1_EL1 sys_reg(3, 0, 0, 7, 1) #define SYS_ID_AA64MMFR2_EL1 sys_reg(3, 0, 0, 7, 2) -#define SYS_SCTLR_EL1 sys_reg(3, 0, 1, 0, 0) #define SYS_ACTLR_EL1 sys_reg(3, 0, 1, 0, 1) #define SYS_CPACR_EL1 sys_reg(3, 0, 1, 0, 2) #define SYS_RGSR_EL1 sys_reg(3, 0, 1, 0, 5) @@ -677,43 +676,6 @@ (SCTLR_EL2_RES1 | ENDIAN_SET_EL2) /* SCTLR_EL1 specific flags. */ -#define SCTLR_EL1_EPAN (BIT(57)) -#define SCTLR_EL1_ATA0 (BIT(42)) - -#define SCTLR_EL1_TCF_SHIFT 40 -#define SCTLR_EL1_TCF_NONE (UL(0x0)) -#define SCTLR_EL1_TCF_SYNC (UL(0x1)) -#define SCTLR_EL1_TCF_ASYNC (UL(0x2)) -#define SCTLR_EL1_TCF_ASYMM (UL(0x3)) -#define SCTLR_EL1_TCF_MASK (UL(0x3) << SCTLR_EL1_TCF_SHIFT) - -#define SCTLR_EL1_TCF0_SHIFT 38 -#define SCTLR_EL1_TCF0_NONE (UL(0x0)) -#define SCTLR_EL1_TCF0_SYNC (UL(0x1)) -#define SCTLR_EL1_TCF0_ASYNC (UL(0x2)) -#define SCTLR_EL1_TCF0_ASYMM (UL(0x3)) -#define SCTLR_EL1_TCF0_MASK (UL(0x3) << SCTLR_EL1_TCF0_SHIFT) - -#define SCTLR_EL1_BT1 (BIT(36)) -#define SCTLR_EL1_BT0 (BIT(35)) -#define SCTLR_EL1_LSMAOE (BIT(29)) -#define SCTLR_EL1_nTLSMD (BIT(28)) -#define SCTLR_EL1_UCI (BIT(26)) -#define SCTLR_EL1_E0E (BIT(24)) -#define SCTLR_EL1_SPAN (BIT(23)) -#define SCTLR_EL1_EIS (BIT(22)) -#define SCTLR_EL1_TSCXT (BIT(20)) -#define SCTLR_EL1_nTWE (BIT(18)) -#define SCTLR_EL1_nTWI (BIT(16)) -#define SCTLR_EL1_UCT (BIT(15)) -#define SCTLR_EL1_DZE (BIT(14)) -#define SCTLR_EL1_EOS (BIT(11)) -#define SCTLR_EL1_UMA (BIT(9)) -#define SCTLR_EL1_SED (BIT(8)) -#define SCTLR_EL1_ITD (BIT(7)) -#define SCTLR_EL1_CP15BEN (BIT(5)) -#define SCTLR_EL1_SA0 (BIT(4)) - #ifdef CONFIG_CPU_BIG_ENDIAN #define ENDIAN_SET_EL1 (SCTLR_EL1_E0E | SCTLR_ELx_EE) #else diff --git a/arch/arm64/tools/sysreg b/arch/arm64/tools/sysreg index e77354847a64..c5619629bf9c 100644 --- a/arch/arm64/tools/sysreg +++ b/arch/arm64/tools/sysreg @@ -114,6 +114,77 @@ EndEnum Res0 3:0 EndSysreg +Sysreg SCTLR_EL1 3 0 1 0 0 +Field 63 TIDCP +Field 62 SPINMASK +Field 61 NMI +Field 60 EnTP2 +Res0 59:58 +Field 57 EPAN +Field 56 EnALS +Field 55 EnAS0 +Field 54 EnASR +Field 53 TME +Field 52 TME0 +Field 51 TMT +Field 50 TMT0 +Field 49:46 TWEDEL +Field 45 TWEDEn +Field 44 DSSBS +Field 43 ATA +Field 42 ATA0 +Enum 41:40 TCF + 0b00 NONE + 0b01 SYNC + 0b10 ASYNC + 0b11 ASYMM +EndEnum +Enum 39:38 TCF0 + 0b00 NONE + 0b01 SYNC + 0b10 ASYNC + 0b11 ASYMM +EndEnum +Field 37 ITFSB +Field 36 BT1 +Field 35 BT0 +Res0 34 +Field 33 MSCEn +Field 32 CMOW +Field 31 EnIA +Field 30 EnIB +Field 29 LSMAOE +Field 28 nTLSMD +Field 27 EnDA +Field 26 UCI +Field 25 EE +Field 24 E0E +Field 23 SPAN +Field 22 EIS +Field 21 IESB +Field 20 TSCXT +Field 19 WXN +Field 18 nTWE +Res0 17 +Field 16 nTWI +Field 15 UCT +Field 14 DZE +Field 13 EnDB +Field 12 I +Field 11 EOS +Field 10 EnRCTX +Field 9 UMA +Field 8 SED +Field 7 ITD +Field 6 nAA +Field 5 CP15BEN +Field 4 SA0 +Field 3 SA +Field 2 C +Field 1 A +Field 0 M +EndSysreg + SysregFields TTBRx_EL1 Field 63:48 ASID Field 47:1 BADDR -- cgit v1.2.3 From 921d161f15d6b090599f6a8c23f131969edbd1fa Mon Sep 17 00:00:00 2001 From: Tong Tiangen Date: Wed, 20 Apr 2022 03:04:13 +0000 Subject: arm64: fix types in copy_highpage() In copy_highpage() the `kto` and `kfrom` local variables are pointers to struct page, but these are used to hold arbitrary pointers to kernel memory . Each call to page_address() returns a void pointer to memory associated with the relevant page, and copy_page() expects void pointers to this memory. This inconsistency was introduced in commit 2563776b41c3 ("arm64: mte: Tags-aware copy_{user_,}highpage() implementations") and while this doesn't appear to be harmful in practice it is clearly wrong. Correct this by making `kto` and `kfrom` void pointers. Fixes: 2563776b41c3 ("arm64: mte: Tags-aware copy_{user_,}highpage() implementations") Signed-off-by: Tong Tiangen Acked-by: Mark Rutland Reviewed-by: Kefeng Wang Link: https://lore.kernel.org/r/20220420030418.3189040-3-tongtiangen@huawei.com Signed-off-by: Catalin Marinas --- arch/arm64/mm/copypage.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'arch') diff --git a/arch/arm64/mm/copypage.c b/arch/arm64/mm/copypage.c index b5447e53cd73..0dea80bf6de4 100644 --- a/arch/arm64/mm/copypage.c +++ b/arch/arm64/mm/copypage.c @@ -16,8 +16,8 @@ void copy_highpage(struct page *to, struct page *from) { - struct page *kto = page_address(to); - struct page *kfrom = page_address(from); + void *kto = page_address(to); + void *kfrom = page_address(from); copy_page(kto, kfrom); -- cgit v1.2.3 From f41ef4c2ee99d255c82d8ac6f720f28116340869 Mon Sep 17 00:00:00 2001 From: Kefeng Wang Date: Mon, 11 Apr 2022 17:24:55 +0800 Subject: arm64: mm: Cleanup useless parameters in zone_sizes_init() Directly use max_pfn for max and no one use min, kill them. Reviewed-by: Vijay Balakrishna Signed-off-by: Kefeng Wang Link: https://lore.kernel.org/r/20220411092455.1461-4-wangkefeng.wang@huawei.com Signed-off-by: Catalin Marinas --- arch/arm64/mm/init.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'arch') diff --git a/arch/arm64/mm/init.c b/arch/arm64/mm/init.c index 1e7b1550e2fc..fb07e94242bb 100644 --- a/arch/arm64/mm/init.c +++ b/arch/arm64/mm/init.c @@ -157,7 +157,7 @@ static phys_addr_t __init max_zone_phys(unsigned int zone_bits) return min(zone_mask, memblock_end_of_DRAM() - 1) + 1; } -static void __init zone_sizes_init(unsigned long min, unsigned long max) +static void __init zone_sizes_init(void) { unsigned long max_zone_pfns[MAX_NR_ZONES] = {0}; unsigned int __maybe_unused acpi_zone_dma_bits; @@ -176,7 +176,7 @@ static void __init zone_sizes_init(unsigned long min, unsigned long max) if (!arm64_dma_phys_limit) arm64_dma_phys_limit = dma32_phys_limit; #endif - max_zone_pfns[ZONE_NORMAL] = max; + max_zone_pfns[ZONE_NORMAL] = max_pfn; free_area_init(max_zone_pfns); } @@ -374,7 +374,7 @@ void __init bootmem_init(void) * done after the fixed reservations */ sparse_init(); - zone_sizes_init(min, max); + zone_sizes_init(); /* * Reserve the CMA area after arm64_dma_phys_limit was initialised. -- cgit v1.2.3 From d158a0608eb85f29f98357b97d01b80250636613 Mon Sep 17 00:00:00 2001 From: Mark Brown Date: Thu, 5 May 2022 23:15:17 +0100 Subject: arm64/sme: More sensibly define the size for the ZA register set Since the vector length configuration mechanism is identical between SVE and SME we share large elements of the code including the definition for the maximum vector length. Unfortunately when we were defining the ABI for SVE we included not only the actual maximum vector length of 2048 bits but also the value possible if all the bits reserved in the architecture for expansion of the LEN field were used, 16384 bits. This starts creating problems if we try to allocate anything for the ZA matrix based on the maximum possible vector length, as we do for the regset used with ptrace during the process of generating a core dump. While the maximum potential size for ZA with the current architecture is a reasonably managable 64K with the higher reserved limit ZA would be 64M which leads to entirely reasonable complaints from the memory management code when we try to allocate a buffer of that size. Avoid these issues by defining the actual maximum vector length for the architecture and using it for the SME regsets. Also use the full ZA_PT_SIZE() with the header rather than just the actual register payload when specifying the size, fixing support for the largest vector lengths now that we have this new, lower define. With the SVE maximum this did not cause problems due to the extra headroom we had. While we're at it add a comment clarifying why even though ZA is a single register we tell the regset code that it is a multi-register regset. Reported-by: Qian Cai Signed-off-by: Mark Brown Tested-by: Naresh Kamboju Link: https://lore.kernel.org/r/20220505221517.1642014-1-broonie@kernel.org Signed-off-by: Catalin Marinas --- arch/arm64/include/asm/fpsimd.h | 12 ++++++++++++ arch/arm64/kernel/ptrace.c | 12 ++++++++++-- 2 files changed, 22 insertions(+), 2 deletions(-) (limited to 'arch') diff --git a/arch/arm64/include/asm/fpsimd.h b/arch/arm64/include/asm/fpsimd.h index 5afcd0709aae..75caa2098d5b 100644 --- a/arch/arm64/include/asm/fpsimd.h +++ b/arch/arm64/include/asm/fpsimd.h @@ -32,6 +32,18 @@ #define VFP_STATE_SIZE ((32 * 8) + 4) #endif +/* + * When we defined the maximum SVE vector length we defined the ABI so + * that the maximum vector length included all the reserved for future + * expansion bits in ZCR rather than those just currently defined by + * the architecture. While SME follows a similar pattern the fact that + * it includes a square matrix means that any allocations that attempt + * to cover the maximum potential vector length (such as happen with + * the regset used for ptrace) end up being extremely large. Define + * the much lower actual limit for use in such situations. + */ +#define SME_VQ_MAX 16 + struct task_struct; extern void fpsimd_save_state(struct user_fpsimd_state *state); diff --git a/arch/arm64/kernel/ptrace.c b/arch/arm64/kernel/ptrace.c index 47d8a7472171..60ebc3060cf1 100644 --- a/arch/arm64/kernel/ptrace.c +++ b/arch/arm64/kernel/ptrace.c @@ -1438,7 +1438,7 @@ static const struct user_regset aarch64_regsets[] = { #ifdef CONFIG_ARM64_SME [REGSET_SSVE] = { /* Streaming mode SVE */ .core_note_type = NT_ARM_SSVE, - .n = DIV_ROUND_UP(SVE_PT_SIZE(SVE_VQ_MAX, SVE_PT_REGS_SVE), + .n = DIV_ROUND_UP(SVE_PT_SIZE(SME_VQ_MAX, SVE_PT_REGS_SVE), SVE_VQ_BYTES), .size = SVE_VQ_BYTES, .align = SVE_VQ_BYTES, @@ -1447,7 +1447,15 @@ static const struct user_regset aarch64_regsets[] = { }, [REGSET_ZA] = { /* SME ZA */ .core_note_type = NT_ARM_ZA, - .n = DIV_ROUND_UP(ZA_PT_ZA_SIZE(SVE_VQ_MAX), SVE_VQ_BYTES), + /* + * ZA is a single register but it's variably sized and + * the ptrace core requires that the size of any data + * be an exact multiple of the configured register + * size so report as though we had SVE_VQ_BYTES + * registers. These values aren't exposed to + * userspace. + */ + .n = DIV_ROUND_UP(ZA_PT_SIZE(SME_VQ_MAX), SVE_VQ_BYTES), .size = SVE_VQ_BYTES, .align = SVE_VQ_BYTES, .regset_get = za_get, -- cgit v1.2.3 From e6b394425c615d1596ce7d9de23a3a34ee2e612b Mon Sep 17 00:00:00 2001 From: Zhen Lei Date: Fri, 6 May 2022 19:43:58 +0800 Subject: arm64: Use insert_resource() to simplify code insert_resource() traverses the subtree layer by layer from the root node until a proper location is found. Compared with request_resource(), the parent node does not need to be determined in advance. In addition, move the insertion of node 'crashk_res' into function reserve_crashkernel() to make the associated code close together. Signed-off-by: Zhen Lei Acked-by: John Donnelly Acked-by: Baoquan He Link: https://lore.kernel.org/r/20220506114402.365-3-thunder.leizhen@huawei.com Signed-off-by: Catalin Marinas --- arch/arm64/kernel/setup.c | 17 +++-------------- arch/arm64/mm/init.c | 1 + 2 files changed, 4 insertions(+), 14 deletions(-) (limited to 'arch') diff --git a/arch/arm64/kernel/setup.c b/arch/arm64/kernel/setup.c index 3505789cf4bd..fea3223704b6 100644 --- a/arch/arm64/kernel/setup.c +++ b/arch/arm64/kernel/setup.c @@ -225,6 +225,8 @@ static void __init request_standard_resources(void) kernel_code.end = __pa_symbol(__init_begin - 1); kernel_data.start = __pa_symbol(_sdata); kernel_data.end = __pa_symbol(_end - 1); + insert_resource(&iomem_resource, &kernel_code); + insert_resource(&iomem_resource, &kernel_data); num_standard_resources = memblock.memory.cnt; res_size = num_standard_resources * sizeof(*standard_resources); @@ -246,20 +248,7 @@ static void __init request_standard_resources(void) res->end = __pfn_to_phys(memblock_region_memory_end_pfn(region)) - 1; } - request_resource(&iomem_resource, res); - - if (kernel_code.start >= res->start && - kernel_code.end <= res->end) - request_resource(res, &kernel_code); - if (kernel_data.start >= res->start && - kernel_data.end <= res->end) - request_resource(res, &kernel_data); -#ifdef CONFIG_KEXEC_CORE - /* Userspace will find "Crash kernel" region in /proc/iomem. */ - if (crashk_res.end && crashk_res.start >= res->start && - crashk_res.end <= res->end) - request_resource(res, &crashk_res); -#endif + insert_resource(&iomem_resource, res); } } diff --git a/arch/arm64/mm/init.c b/arch/arm64/mm/init.c index 1e7b1550e2fc..51863f1448c6 100644 --- a/arch/arm64/mm/init.c +++ b/arch/arm64/mm/init.c @@ -137,6 +137,7 @@ static void __init reserve_crashkernel(void) kmemleak_ignore_phys(crash_base); crashk_res.start = crash_base; crashk_res.end = crash_base + crash_size - 1; + insert_resource(&iomem_resource, &crashk_res); } /* -- cgit v1.2.3 From 944a45abfabc171fd121315ff0d5e62b11cb5d6f Mon Sep 17 00:00:00 2001 From: Chen Zhou Date: Fri, 6 May 2022 19:43:59 +0800 Subject: arm64: kdump: Reimplement crashkernel=X There are following issues in arm64 kdump: 1. We use crashkernel=X to reserve crashkernel in DMA zone, which will fail when there is not enough low memory. 2. If reserving crashkernel above DMA zone, in this case, crash dump kernel will fail to boot because there is no low memory available for allocation. To solve these issues, introduce crashkernel=X,[high,low]. The "crashkernel=X,high" is used to select a region above DMA zone, and the "crashkernel=Y,low" is used to allocate specified size low memory. Signed-off-by: Chen Zhou Co-developed-by: Zhen Lei Signed-off-by: Zhen Lei Link: https://lore.kernel.org/r/20220506114402.365-4-thunder.leizhen@huawei.com Signed-off-by: Catalin Marinas --- arch/arm64/kernel/machine_kexec.c | 9 +++-- arch/arm64/kernel/machine_kexec_file.c | 12 +++++-- arch/arm64/mm/init.c | 63 ++++++++++++++++++++++++++++++---- 3 files changed, 74 insertions(+), 10 deletions(-) (limited to 'arch') diff --git a/arch/arm64/kernel/machine_kexec.c b/arch/arm64/kernel/machine_kexec.c index e16b248699d5..19c2d487cb08 100644 --- a/arch/arm64/kernel/machine_kexec.c +++ b/arch/arm64/kernel/machine_kexec.c @@ -329,8 +329,13 @@ bool crash_is_nosave(unsigned long pfn) /* in reserved memory? */ addr = __pfn_to_phys(pfn); - if ((addr < crashk_res.start) || (crashk_res.end < addr)) - return false; + if ((addr < crashk_res.start) || (crashk_res.end < addr)) { + if (!crashk_low_res.end) + return false; + + if ((addr < crashk_low_res.start) || (crashk_low_res.end < addr)) + return false; + } if (!kexec_crash_image) return true; diff --git a/arch/arm64/kernel/machine_kexec_file.c b/arch/arm64/kernel/machine_kexec_file.c index 59c648d51848..889951291cc0 100644 --- a/arch/arm64/kernel/machine_kexec_file.c +++ b/arch/arm64/kernel/machine_kexec_file.c @@ -65,10 +65,18 @@ static int prepare_elf_headers(void **addr, unsigned long *sz) /* Exclude crashkernel region */ ret = crash_exclude_mem_range(cmem, crashk_res.start, crashk_res.end); + if (ret) + goto out; + + if (crashk_low_res.end) { + ret = crash_exclude_mem_range(cmem, crashk_low_res.start, crashk_low_res.end); + if (ret) + goto out; + } - if (!ret) - ret = crash_prepare_elf64_headers(cmem, true, addr, sz); + ret = crash_prepare_elf64_headers(cmem, true, addr, sz); +out: kfree(cmem); return ret; } diff --git a/arch/arm64/mm/init.c b/arch/arm64/mm/init.c index 51863f1448c6..18ba66c90991 100644 --- a/arch/arm64/mm/init.c +++ b/arch/arm64/mm/init.c @@ -90,6 +90,32 @@ phys_addr_t __ro_after_init arm64_dma_phys_limit; phys_addr_t __ro_after_init arm64_dma_phys_limit = PHYS_MASK + 1; #endif +/* Current arm64 boot protocol requires 2MB alignment */ +#define CRASH_ALIGN SZ_2M + +#define CRASH_ADDR_LOW_MAX arm64_dma_phys_limit +#define CRASH_ADDR_HIGH_MAX (PHYS_MASK + 1) + +static int __init reserve_crashkernel_low(unsigned long long low_size) +{ + unsigned long long low_base; + + low_base = memblock_phys_alloc_range(low_size, CRASH_ALIGN, 0, CRASH_ADDR_LOW_MAX); + if (!low_base) { + pr_err("cannot allocate crashkernel low memory (size:0x%llx).\n", low_size); + return -ENOMEM; + } + + pr_info("crashkernel low memory reserved: 0x%08llx - 0x%08llx (%lld MB)\n", + low_base, low_base + low_size, low_size >> 20); + + crashk_low_res.start = low_base; + crashk_low_res.end = low_base + low_size - 1; + insert_resource(&iomem_resource, &crashk_low_res); + + return 0; +} + /* * reserve_crashkernel() - reserves memory for crash kernel * @@ -100,17 +126,35 @@ phys_addr_t __ro_after_init arm64_dma_phys_limit = PHYS_MASK + 1; static void __init reserve_crashkernel(void) { unsigned long long crash_base, crash_size; - unsigned long long crash_max = arm64_dma_phys_limit; + unsigned long long crash_low_size = 0; + unsigned long long crash_max = CRASH_ADDR_LOW_MAX; + char *cmdline = boot_command_line; int ret; if (!IS_ENABLED(CONFIG_KEXEC_CORE)) return; - ret = parse_crashkernel(boot_command_line, memblock_phys_mem_size(), + /* crashkernel=X[@offset] */ + ret = parse_crashkernel(cmdline, memblock_phys_mem_size(), &crash_size, &crash_base); - /* no crashkernel= or invalid value specified */ - if (ret || !crash_size) + if (ret == -ENOENT) { + ret = parse_crashkernel_high(cmdline, 0, &crash_size, &crash_base); + if (ret || !crash_size) + return; + + /* + * crashkernel=Y,low can be specified or not, but invalid value + * is not allowed. + */ + ret = parse_crashkernel_low(cmdline, 0, &crash_low_size, &crash_base); + if (ret && (ret != -ENOENT)) + return; + + crash_max = CRASH_ADDR_HIGH_MAX; + } else if (ret || !crash_size) { + /* The specified value is invalid */ return; + } crash_size = PAGE_ALIGN(crash_size); @@ -118,8 +162,7 @@ static void __init reserve_crashkernel(void) if (crash_base) crash_max = crash_base + crash_size; - /* Current arm64 boot protocol requires 2MB alignment */ - crash_base = memblock_phys_alloc_range(crash_size, SZ_2M, + crash_base = memblock_phys_alloc_range(crash_size, CRASH_ALIGN, crash_base, crash_max); if (!crash_base) { pr_warn("cannot allocate crashkernel (size:0x%llx)\n", @@ -127,6 +170,11 @@ static void __init reserve_crashkernel(void) return; } + if (crash_low_size && reserve_crashkernel_low(crash_low_size)) { + memblock_phys_free(crash_base, crash_size); + return; + } + pr_info("crashkernel reserved: 0x%016llx - 0x%016llx (%lld MB)\n", crash_base, crash_base + crash_size, crash_size >> 20); @@ -135,6 +183,9 @@ static void __init reserve_crashkernel(void) * map. Inform kmemleak so that it won't try to access it. */ kmemleak_ignore_phys(crash_base); + if (crashk_low_res.end) + kmemleak_ignore_phys(crashk_low_res.start); + crashk_res.start = crash_base; crashk_res.end = crash_base + crash_size - 1; insert_resource(&iomem_resource, &crashk_res); -- cgit v1.2.3 From 710c8d6c026c0bbbd5d9036ef210e263edeb07d3 Mon Sep 17 00:00:00 2001 From: Linu Cherian Date: Mon, 9 May 2022 10:02:21 +0530 Subject: arm64: Declare non global symbols as static Fix below sparse warnings introduced while adding errata. arch/arm64/kernel/cpu_errata.c:218:25: sparse: warning: symbol 'cavium_erratum_23154_cpus' was not declared. Should it be static? Reported-by: kernel test robot Signed-off-by: Linu Cherian Acked-by: Will Deacon Link: https://lore.kernel.org/r/20220509043221.16361-1-lcherian@marvell.com Signed-off-by: Catalin Marinas --- arch/arm64/kernel/cpu_errata.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/arm64/kernel/cpu_errata.c b/arch/arm64/kernel/cpu_errata.c index 4c9b5b4b7a0b..49f4863c6c56 100644 --- a/arch/arm64/kernel/cpu_errata.c +++ b/arch/arm64/kernel/cpu_errata.c @@ -215,7 +215,7 @@ static const struct arm64_cpu_capabilities arm64_repeat_tlbi_list[] = { #endif #ifdef CONFIG_CAVIUM_ERRATUM_23154 -const struct midr_range cavium_erratum_23154_cpus[] = { +static const struct midr_range cavium_erratum_23154_cpus[] = { MIDR_ALL_VERSIONS(MIDR_THUNDERX), MIDR_ALL_VERSIONS(MIDR_THUNDERX_81XX), MIDR_ALL_VERSIONS(MIDR_THUNDERX_83XX), -- cgit v1.2.3 From fb396bb459c1fa3920dd8a9d84680398c65fed75 Mon Sep 17 00:00:00 2001 From: Anshuman Khandual Date: Tue, 10 May 2022 10:09:30 +0530 Subject: arm64/hugetlb: Drop TLB flush from get_clear_flush() This drops now redundant TLB flush in get_clear_flush() which is no longer required after recent commit 697a1d44af8b ("tlb: hugetlb: Add more sizes to tlb_remove_huge_tlb_entry"). It also renames this function i.e dropping off '_flush' and replacing it with '__contig' as appropriate. Cc: Will Deacon Cc: Mike Kravetz Cc: linux-arm-kernel@lists.infradead.org Cc: linux-kernel@vger.kernel.org Signed-off-by: Anshuman Khandual Link: https://lore.kernel.org/r/20220510043930.2410985-1-anshuman.khandual@arm.com Signed-off-by: Catalin Marinas --- arch/arm64/mm/hugetlbpage.c | 16 +++++----------- 1 file changed, 5 insertions(+), 11 deletions(-) (limited to 'arch') diff --git a/arch/arm64/mm/hugetlbpage.c b/arch/arm64/mm/hugetlbpage.c index cbace1c9e137..749435b01a89 100644 --- a/arch/arm64/mm/hugetlbpage.c +++ b/arch/arm64/mm/hugetlbpage.c @@ -166,15 +166,14 @@ static inline int num_contig_ptes(unsigned long size, size_t *pgsize) * * This helper performs the break step. */ -static pte_t get_clear_flush(struct mm_struct *mm, +static pte_t get_clear_contig(struct mm_struct *mm, unsigned long addr, pte_t *ptep, unsigned long pgsize, unsigned long ncontig) { pte_t orig_pte = huge_ptep_get(ptep); - bool valid = pte_valid(orig_pte); - unsigned long i, saddr = addr; + unsigned long i; for (i = 0; i < ncontig; i++, addr += pgsize, ptep++) { pte_t pte = ptep_get_and_clear(mm, addr, ptep); @@ -190,11 +189,6 @@ static pte_t get_clear_flush(struct mm_struct *mm, if (pte_young(pte)) orig_pte = pte_mkyoung(orig_pte); } - - if (valid) { - struct vm_area_struct vma = TLB_FLUSH_VMA(mm, 0); - flush_tlb_range(&vma, saddr, addr); - } return orig_pte; } @@ -392,7 +386,7 @@ pte_t huge_ptep_get_and_clear(struct mm_struct *mm, ncontig = find_num_contig(mm, addr, ptep, &pgsize); - return get_clear_flush(mm, addr, ptep, pgsize, ncontig); + return get_clear_contig(mm, addr, ptep, pgsize, ncontig); } /* @@ -443,7 +437,7 @@ int huge_ptep_set_access_flags(struct vm_area_struct *vma, if (!__cont_access_flags_changed(ptep, pte, ncontig)) return 0; - orig_pte = get_clear_flush(vma->vm_mm, addr, ptep, pgsize, ncontig); + orig_pte = get_clear_contig(vma->vm_mm, addr, ptep, pgsize, ncontig); /* Make sure we don't lose the dirty or young state */ if (pte_dirty(orig_pte)) @@ -476,7 +470,7 @@ void huge_ptep_set_wrprotect(struct mm_struct *mm, ncontig = find_num_contig(mm, addr, ptep, &pgsize); dpfn = pgsize >> PAGE_SHIFT; - pte = get_clear_flush(mm, addr, ptep, pgsize, ncontig); + pte = get_clear_contig(mm, addr, ptep, pgsize, ncontig); pte = pte_wrprotect(pte); hugeprot = pte_pgprot(pte); -- cgit v1.2.3 From 82bf59002e0f84e51c16589080c2feba6e6ec78a Mon Sep 17 00:00:00 2001 From: Mark Rutland Date: Fri, 13 May 2022 18:41:17 +0100 Subject: arm64/sysreg: improve comment for regs without fields Currently for registers without fields we create a comment pointing at the common definitions, e.g. | #define REG_TTBR0_EL1 S3_0_C2_C0_0 | #define SYS_TTBR0_EL1 sys_reg(3, 0, 2, 0, 0) | #define SYS_TTBR0_EL1_Op0 3 | #define SYS_TTBR0_EL1_Op1 0 | #define SYS_TTBR0_EL1_CRn 2 | #define SYS_TTBR0_EL1_CRm 0 | #define SYS_TTBR0_EL1_Op2 0 | | /* See TTBRx_EL1 */ It would be slightly nicer if the comment said what we should be looking for, e.g. | #define REG_TTBR0_EL1 S3_0_C2_C0_0 | #define SYS_TTBR0_EL1 sys_reg(3, 0, 2, 0, 0) | #define SYS_TTBR0_EL1_Op0 3 | #define SYS_TTBR0_EL1_Op1 0 | #define SYS_TTBR0_EL1_CRn 2 | #define SYS_TTBR0_EL1_CRm 0 | #define SYS_TTBR0_EL1_Op2 0 | | /* For TTBR0_EL1 fields see TTBRx_EL1 */ Update the comment generation accordingly. Signed-off-by: Mark Rutland Cc: Mark Brown Cc: Will Deacon Link: https://lore.kernel.org/r/20220513174118.266966-2-mark.rutland@arm.com Signed-off-by: Catalin Marinas --- arch/arm64/tools/gen-sysreg.awk | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/arm64/tools/gen-sysreg.awk b/arch/arm64/tools/gen-sysreg.awk index 3ffd77cbb499..f41feb87d0ca 100755 --- a/arch/arm64/tools/gen-sysreg.awk +++ b/arch/arm64/tools/gen-sysreg.awk @@ -184,7 +184,7 @@ END { if (next_bit != 63) fatal("Some fields already defined for " reg) - print "/* See " $2 " */" + print "/* For " reg " fields see " $2 " */" print "" next_bit = 0 -- cgit v1.2.3 From 5005d1dbbb3828078f32dff24b77866502e45e93 Mon Sep 17 00:00:00 2001 From: Mark Rutland Date: Fri, 13 May 2022 18:41:18 +0100 Subject: arm64/sysreg: fix odd line spacing Between the header and the definitions, there's no line gap, and in a couple of places a double line gap for no semantic reason, which makes the output look a little odd. Fix this so blocks are consistently separated with a single line gap: * Add a newline after the "Generated file" comment line, so this is clearly split from whatever the first definition in the file is. * At the start of a SysregFields block there's no need for a newline as we haven't output any sysreg encoding details prior to this. * At the end of a Sysreg block there's no need for a newline if we have no RES0 or RES1 fields, as there will be a line gap after the previous element (e.g. a Fields line). There should be no functional change as a result of this patch. Signed-off-by: Mark Rutland Cc: Mark Brown Cc: Will Deacon Link: https://lore.kernel.org/r/20220513174118.266966-3-mark.rutland@arm.com Signed-off-by: Catalin Marinas --- arch/arm64/tools/gen-sysreg.awk | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'arch') diff --git a/arch/arm64/tools/gen-sysreg.awk b/arch/arm64/tools/gen-sysreg.awk index f41feb87d0ca..4aa7ff8ce707 100755 --- a/arch/arm64/tools/gen-sysreg.awk +++ b/arch/arm64/tools/gen-sysreg.awk @@ -77,6 +77,7 @@ BEGIN { print "#define __ASM_SYSREG_DEFS_H" print "" print "/* Generated file - do not edit */" + print "" block = "None" } @@ -98,8 +99,6 @@ END { res0 = "UL(0)" res1 = "UL(0)" - print "" - next_bit = 63 next @@ -162,7 +161,8 @@ END { define(reg "_RES0", "(" res0 ")") if (res1 != null) define(reg "_RES1", "(" res1 ")") - print "" + if (res0 != null || res1 != null) + print "" reg = null op0 = null -- cgit v1.2.3 From b4d6bb38f9dc7cd8ccb117c247a41ec12578929c Mon Sep 17 00:00:00 2001 From: Robin Murphy Date: Wed, 20 Apr 2022 03:04:16 +0000 Subject: arm64: mte: Clean up user tag accessors Invoking user_ldst to explicitly add a post-increment of 0 is silly. Just use a normal USER() annotation and save the redundant instruction. Signed-off-by: Robin Murphy Reviewed-by: Tong Tiangen Acked-by: Mark Rutland Link: https://lore.kernel.org/r/20220420030418.3189040-6-tongtiangen@huawei.com Signed-off-by: Catalin Marinas --- arch/arm64/lib/mte.S | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'arch') diff --git a/arch/arm64/lib/mte.S b/arch/arm64/lib/mte.S index 8590af3c98c0..eeb9e45bcce8 100644 --- a/arch/arm64/lib/mte.S +++ b/arch/arm64/lib/mte.S @@ -93,7 +93,7 @@ SYM_FUNC_START(mte_copy_tags_from_user) mov x3, x1 cbz x2, 2f 1: - user_ldst 2f, ldtrb, w4, x1, 0 +USER(2f, ldtrb w4, [x1]) lsl x4, x4, #MTE_TAG_SHIFT stg x4, [x0], #MTE_GRANULE_SIZE add x1, x1, #1 @@ -120,7 +120,7 @@ SYM_FUNC_START(mte_copy_tags_to_user) 1: ldg x4, [x1] ubfx x4, x4, #MTE_TAG_SHIFT, #MTE_TAG_SIZE - user_ldst 2f, sttrb, w4, x0, 0 +USER(2f, sttrb w4, [x0]) add x0, x0, #1 add x1, x1, #MTE_GRANULE_SIZE subs x2, x2, #1 -- cgit v1.2.3 From c733812dd77350ba0da18cd6e474e5a2e6461b49 Mon Sep 17 00:00:00 2001 From: Valentin Schneider Date: Thu, 5 May 2022 18:32:07 +0200 Subject: arm64: mm: Make arch_faults_on_old_pte() check for migratability arch_faults_on_old_pte() relies on the calling context being non-preemptible. CONFIG_PREEMPT_RT turns the PTE lock into a sleepable spinlock, which doesn't disable preemption once acquired, triggering the warning in arch_faults_on_old_pte(). It does however disable migration, ensuring the task remains on the same CPU during the entirety of the critical section, making the read of cpu_has_hw_af() safe and stable. Make arch_faults_on_old_pte() check cant_migrate() instead of preemptible(). Cc: Valentin Schneider Suggested-by: Sebastian Andrzej Siewior Signed-off-by: Valentin Schneider Link: https://lore.kernel.org/r/20220127192437.1192957-1-valentin.schneider@arm.com Acked-by: Catalin Marinas Link: https://lore.kernel.org/r/20220505163207.85751-4-bigeasy@linutronix.de Signed-off-by: Catalin Marinas --- arch/arm64/include/asm/pgtable.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/arm64/include/asm/pgtable.h b/arch/arm64/include/asm/pgtable.h index 94e147e5456c..9c0a9bfd6b07 100644 --- a/arch/arm64/include/asm/pgtable.h +++ b/arch/arm64/include/asm/pgtable.h @@ -1001,7 +1001,8 @@ static inline void update_mmu_cache(struct vm_area_struct *vma, */ static inline bool arch_faults_on_old_pte(void) { - WARN_ON(preemptible()); + /* The register read below requires a stable CPU to make any sense */ + cant_migrate(); return !cpu_has_hw_af(); } -- cgit v1.2.3 From a1259dd807192917eb98603f8bc7b43f70cea5b9 Mon Sep 17 00:00:00 2001 From: Sebastian Andrzej Siewior Date: Thu, 5 May 2022 18:32:05 +0200 Subject: arm64/sve: Delay freeing memory in fpsimd_flush_thread() fpsimd_flush_thread() invokes kfree() via sve_free()+sme_free() within a preempt disabled section which is not working on -RT. Delay freeing of memory until preemption is enabled again. Signed-off-by: Sebastian Andrzej Siewior Reviewed-by: Mark Brown Link: https://lore.kernel.org/r/20220505163207.85751-2-bigeasy@linutronix.de Signed-off-by: Catalin Marinas --- arch/arm64/kernel/fpsimd.c | 17 +++++++++++++++-- 1 file changed, 15 insertions(+), 2 deletions(-) (limited to 'arch') diff --git a/arch/arm64/kernel/fpsimd.c b/arch/arm64/kernel/fpsimd.c index 64431bc62472..6e53badff8a6 100644 --- a/arch/arm64/kernel/fpsimd.c +++ b/arch/arm64/kernel/fpsimd.c @@ -1562,6 +1562,9 @@ static void fpsimd_flush_thread_vl(enum vec_type type) void fpsimd_flush_thread(void) { + void *sve_state = NULL; + void *za_state = NULL; + if (!system_supports_fpsimd()) return; @@ -1573,18 +1576,28 @@ void fpsimd_flush_thread(void) if (system_supports_sve()) { clear_thread_flag(TIF_SVE); - sve_free(current); + + /* Defer kfree() while in atomic context */ + sve_state = current->thread.sve_state; + current->thread.sve_state = NULL; + fpsimd_flush_thread_vl(ARM64_VEC_SVE); } if (system_supports_sme()) { clear_thread_flag(TIF_SME); - sme_free(current); + + /* Defer kfree() while in atomic context */ + za_state = current->thread.za_state; + current->thread.za_state = NULL; + fpsimd_flush_thread_vl(ARM64_VEC_SME); current->thread.svcr = 0; } put_cpu_fpsimd_context(); + kfree(sve_state); + kfree(za_state); } /* -- cgit v1.2.3 From 696207d4258b2ab66dbd1655a7cfb3e978889085 Mon Sep 17 00:00:00 2001 From: Sebastian Andrzej Siewior Date: Thu, 5 May 2022 18:32:06 +0200 Subject: arm64/sve: Make kernel FPU protection RT friendly Non RT kernels need to protect FPU against preemption and bottom half processing. This is achieved by disabling bottom halves via local_bh_disable() which implictly disables preemption. On RT kernels this protection mechanism is not sufficient because local_bh_disable() does not disable preemption. It serializes bottom half related processing via a CPU local lock. As bottom halves are running always in thread context on RT kernels disabling preemption is the proper choice as it implicitly prevents bottom half processing. Signed-off-by: Sebastian Andrzej Siewior Acked-by: Mark Brown Link: https://lore.kernel.org/r/20220505163207.85751-3-bigeasy@linutronix.de Signed-off-by: Catalin Marinas --- arch/arm64/kernel/fpsimd.c | 16 ++++++++++++++-- 1 file changed, 14 insertions(+), 2 deletions(-) (limited to 'arch') diff --git a/arch/arm64/kernel/fpsimd.c b/arch/arm64/kernel/fpsimd.c index 6e53badff8a6..a568735b7c2e 100644 --- a/arch/arm64/kernel/fpsimd.c +++ b/arch/arm64/kernel/fpsimd.c @@ -237,10 +237,19 @@ static void __get_cpu_fpsimd_context(void) * * The double-underscore version must only be called if you know the task * can't be preempted. + * + * On RT kernels local_bh_disable() is not sufficient because it only + * serializes soft interrupt related sections via a local lock, but stays + * preemptible. Disabling preemption is the right choice here as bottom + * half processing is always in thread context on RT kernels so it + * implicitly prevents bottom half processing as well. */ static void get_cpu_fpsimd_context(void) { - local_bh_disable(); + if (!IS_ENABLED(CONFIG_PREEMPT_RT)) + local_bh_disable(); + else + preempt_disable(); __get_cpu_fpsimd_context(); } @@ -261,7 +270,10 @@ static void __put_cpu_fpsimd_context(void) static void put_cpu_fpsimd_context(void) { __put_cpu_fpsimd_context(); - local_bh_enable(); + if (!IS_ENABLED(CONFIG_PREEMPT_RT)) + local_bh_enable(); + else + preempt_enable(); } static bool have_cpu_fpsimd_context(void) -- cgit v1.2.3 From f171f9e4097d29db88a99ea96bb6c08e819a52a4 Mon Sep 17 00:00:00 2001 From: Mark Brown Date: Tue, 10 May 2022 17:11:57 +0100 Subject: arm64/fp: Make SVE and SME length register definition match architecture Currently (as of DDI0487H.a) the architecture defines the vector length control field in ZCR and SMCR as being 4 bits wide with an additional 5 bits reserved above it marked as RAZ/WI for future expansion. The kernel currently attempts to anticipate such expansion by treating these extra bits as part of the LEN field but this will be inconvenient when we start generating the defines and would cause problems in the event that the architecture goes a different direction with these fields. Let's instead change the defines to reflect the currently defined architecture, we can update in future as needed. No change in behaviour should be seen in any system, even emulated systems using the maximum allowed vector length for the current architecture. Signed-off-by: Mark Brown Link: https://lore.kernel.org/r/20220510161208.631259-2-broonie@kernel.org Signed-off-by: Catalin Marinas --- arch/arm64/include/asm/sysreg.h | 18 ++++-------------- 1 file changed, 4 insertions(+), 14 deletions(-) (limited to 'arch') diff --git a/arch/arm64/include/asm/sysreg.h b/arch/arm64/include/asm/sysreg.h index 422741ca5631..4d78b6aeebb4 100644 --- a/arch/arm64/include/asm/sysreg.h +++ b/arch/arm64/include/asm/sysreg.h @@ -1113,26 +1113,16 @@ #define DCZID_DZP_SHIFT 4 #define DCZID_BS_SHIFT 0 -/* - * The ZCR_ELx_LEN_* definitions intentionally include bits [8:4] which - * are reserved by the SVE architecture for future expansion of the LEN - * field, with compatible semantics. - */ #define ZCR_ELx_LEN_SHIFT 0 -#define ZCR_ELx_LEN_SIZE 9 -#define ZCR_ELx_LEN_MASK 0x1ff +#define ZCR_ELx_LEN_SIZE 4 +#define ZCR_ELx_LEN_MASK 0xf #define SMCR_ELx_FA64_SHIFT 31 #define SMCR_ELx_FA64_MASK (1 << SMCR_ELx_FA64_SHIFT) -/* - * The SMCR_ELx_LEN_* definitions intentionally include bits [8:4] which - * are reserved by the SME architecture for future expansion of the LEN - * field, with compatible semantics. - */ #define SMCR_ELx_LEN_SHIFT 0 -#define SMCR_ELx_LEN_SIZE 9 -#define SMCR_ELx_LEN_MASK 0x1ff +#define SMCR_ELx_LEN_SIZE 4 +#define SMCR_ELx_LEN_MASK 0xf #define CPACR_EL1_FPEN_EL1EN (BIT(20)) /* enable EL1 access */ #define CPACR_EL1_FPEN_EL0EN (BIT(21)) /* enable EL0 access, if EL1EN set */ -- cgit v1.2.3 From 5b06dcfd9e0a5dd63ecadf9169ee92a80b063322 Mon Sep 17 00:00:00 2001 From: Mark Brown Date: Tue, 10 May 2022 17:11:58 +0100 Subject: arm64/fp: Rename SVE and SME LEN field name to _WIDTH The SVE and SVE length configuration field LEN have constants specifying their width called _SIZE rather than the more normal _WIDTH, in preparation for automatic generation rename to _WIDTH. No functional change. Signed-off-by: Mark Brown Link: https://lore.kernel.org/r/20220510161208.631259-3-broonie@kernel.org Signed-off-by: Catalin Marinas --- arch/arm64/include/asm/sysreg.h | 4 ++-- arch/arm64/kernel/cpufeature.c | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) (limited to 'arch') diff --git a/arch/arm64/include/asm/sysreg.h b/arch/arm64/include/asm/sysreg.h index 4d78b6aeebb4..b83808ebc58f 100644 --- a/arch/arm64/include/asm/sysreg.h +++ b/arch/arm64/include/asm/sysreg.h @@ -1114,14 +1114,14 @@ #define DCZID_BS_SHIFT 0 #define ZCR_ELx_LEN_SHIFT 0 -#define ZCR_ELx_LEN_SIZE 4 +#define ZCR_ELx_LEN_WIDTH 4 #define ZCR_ELx_LEN_MASK 0xf #define SMCR_ELx_FA64_SHIFT 31 #define SMCR_ELx_FA64_MASK (1 << SMCR_ELx_FA64_SHIFT) #define SMCR_ELx_LEN_SHIFT 0 -#define SMCR_ELx_LEN_SIZE 4 +#define SMCR_ELx_LEN_WIDTH 4 #define SMCR_ELx_LEN_MASK 0xf #define CPACR_EL1_FPEN_EL1EN (BIT(20)) /* enable EL1 access */ diff --git a/arch/arm64/kernel/cpufeature.c b/arch/arm64/kernel/cpufeature.c index 08689362cd89..665ad380c07f 100644 --- a/arch/arm64/kernel/cpufeature.c +++ b/arch/arm64/kernel/cpufeature.c @@ -577,13 +577,13 @@ static const struct arm64_ftr_bits ftr_id_dfr1[] = { static const struct arm64_ftr_bits ftr_zcr[] = { ARM64_FTR_BITS(FTR_HIDDEN, FTR_NONSTRICT, FTR_LOWER_SAFE, - ZCR_ELx_LEN_SHIFT, ZCR_ELx_LEN_SIZE, 0), /* LEN */ + ZCR_ELx_LEN_SHIFT, ZCR_ELx_LEN_WIDTH, 0), /* LEN */ ARM64_FTR_END, }; static const struct arm64_ftr_bits ftr_smcr[] = { ARM64_FTR_BITS(FTR_HIDDEN, FTR_NONSTRICT, FTR_LOWER_SAFE, - SMCR_ELx_LEN_SHIFT, SMCR_ELx_LEN_SIZE, 0), /* LEN */ + SMCR_ELx_LEN_SHIFT, SMCR_ELx_LEN_WIDTH, 0), /* LEN */ ARM64_FTR_END, }; -- cgit v1.2.3 From a6dab6cc0f4cd0b341f002ce7d0683701612f527 Mon Sep 17 00:00:00 2001 From: Mark Brown Date: Tue, 10 May 2022 17:11:59 +0100 Subject: arm64/sme: Drop SYS_ from SMIDR_EL1 defines We currently have a non-standard SYS_ prefix in the constants generated for SMIDR_EL1 bitfields. Drop this in preparation for automatic register definition generation, no functional change. Signed-off-by: Mark Brown Link: https://lore.kernel.org/r/20220510161208.631259-4-broonie@kernel.org Signed-off-by: Catalin Marinas --- arch/arm64/include/asm/el2_setup.h | 2 +- arch/arm64/include/asm/sysreg.h | 6 +++--- 2 files changed, 4 insertions(+), 4 deletions(-) (limited to 'arch') diff --git a/arch/arm64/include/asm/el2_setup.h b/arch/arm64/include/asm/el2_setup.h index fabdbde0fe02..34ceff08cac4 100644 --- a/arch/arm64/include/asm/el2_setup.h +++ b/arch/arm64/include/asm/el2_setup.h @@ -171,7 +171,7 @@ msr_s SYS_SMCR_EL2, x1 // length for EL1. mrs_s x1, SYS_SMIDR_EL1 // Priority mapping supported? - ubfx x1, x1, #SYS_SMIDR_EL1_SMPS_SHIFT, #1 + ubfx x1, x1, #SMIDR_EL1_SMPS_SHIFT, #1 cbz x1, .Lskip_sme_\@ msr_s SYS_SMPRIMAP_EL2, xzr // Make all priorities equal diff --git a/arch/arm64/include/asm/sysreg.h b/arch/arm64/include/asm/sysreg.h index b83808ebc58f..ab2d7cbc63fc 100644 --- a/arch/arm64/include/asm/sysreg.h +++ b/arch/arm64/include/asm/sysreg.h @@ -467,9 +467,9 @@ #define SYS_SMIDR_EL1 sys_reg(3, 1, 0, 0, 6) #define SYS_AIDR_EL1 sys_reg(3, 1, 0, 0, 7) -#define SYS_SMIDR_EL1_IMPLEMENTER_SHIFT 24 -#define SYS_SMIDR_EL1_SMPS_SHIFT 15 -#define SYS_SMIDR_EL1_AFFINITY_SHIFT 0 +#define SMIDR_EL1_IMPLEMENTER_SHIFT 24 +#define SMIDR_EL1_SMPS_SHIFT 15 +#define SMIDR_EL1_AFFINITY_SHIFT 0 #define SYS_CSSELR_EL1 sys_reg(3, 2, 0, 0, 0) -- cgit v1.2.3 From e65fc01bf271cefa6269b7ab1badcf7cddae5d40 Mon Sep 17 00:00:00 2001 From: Mark Brown Date: Tue, 10 May 2022 17:12:00 +0100 Subject: arm64/sme: Standardise bitfield names for SVCR The bitfield definitions for SVCR have a SYS_ added to the names of the constant which will be a problem for automatic generation. Remove the prefixes, no functional change. Signed-off-by: Mark Brown Link: https://lore.kernel.org/r/20220510161208.631259-5-broonie@kernel.org Signed-off-by: Catalin Marinas --- arch/arm64/include/asm/fpsimd.h | 4 ++-- arch/arm64/include/asm/processor.h | 2 +- arch/arm64/include/asm/sysreg.h | 4 ++-- arch/arm64/kernel/fpsimd.c | 6 +++--- 4 files changed, 8 insertions(+), 8 deletions(-) (limited to 'arch') diff --git a/arch/arm64/include/asm/fpsimd.h b/arch/arm64/include/asm/fpsimd.h index 75caa2098d5b..aa11dbec0d70 100644 --- a/arch/arm64/include/asm/fpsimd.h +++ b/arch/arm64/include/asm/fpsimd.h @@ -67,12 +67,12 @@ extern void fpsimd_save_and_flush_cpu_state(void); static inline bool thread_sm_enabled(struct thread_struct *thread) { - return system_supports_sme() && (thread->svcr & SYS_SVCR_EL0_SM_MASK); + return system_supports_sme() && (thread->svcr & SVCR_EL0_SM_MASK); } static inline bool thread_za_enabled(struct thread_struct *thread) { - return system_supports_sme() && (thread->svcr & SYS_SVCR_EL0_ZA_MASK); + return system_supports_sme() && (thread->svcr & SVCR_EL0_ZA_MASK); } /* Maximum VL that SVE/SME VL-agnostic software can transparently support */ diff --git a/arch/arm64/include/asm/processor.h b/arch/arm64/include/asm/processor.h index 1d2ca4870b84..69ce163d2fb2 100644 --- a/arch/arm64/include/asm/processor.h +++ b/arch/arm64/include/asm/processor.h @@ -192,7 +192,7 @@ static inline unsigned int thread_get_sme_vl(struct thread_struct *thread) static inline unsigned int thread_get_cur_vl(struct thread_struct *thread) { - if (system_supports_sme() && (thread->svcr & SYS_SVCR_EL0_SM_MASK)) + if (system_supports_sme() && (thread->svcr & SVCR_EL0_SM_MASK)) return thread_get_sme_vl(thread); else return thread_get_sve_vl(thread); diff --git a/arch/arm64/include/asm/sysreg.h b/arch/arm64/include/asm/sysreg.h index ab2d7cbc63fc..4459cd4a37f5 100644 --- a/arch/arm64/include/asm/sysreg.h +++ b/arch/arm64/include/asm/sysreg.h @@ -480,8 +480,8 @@ #define SYS_RNDRRS_EL0 sys_reg(3, 3, 2, 4, 1) #define SYS_SVCR_EL0 sys_reg(3, 3, 4, 2, 2) -#define SYS_SVCR_EL0_ZA_MASK 2 -#define SYS_SVCR_EL0_SM_MASK 1 +#define SVCR_EL0_ZA_MASK 2 +#define SVCR_EL0_SM_MASK 1 #define SYS_PMCR_EL0 sys_reg(3, 3, 9, 12, 0) #define SYS_PMCNTENSET_EL0 sys_reg(3, 3, 9, 12, 1) diff --git a/arch/arm64/kernel/fpsimd.c b/arch/arm64/kernel/fpsimd.c index a568735b7c2e..a5f6d6d9f372 100644 --- a/arch/arm64/kernel/fpsimd.c +++ b/arch/arm64/kernel/fpsimd.c @@ -1918,7 +1918,7 @@ void __efi_fpsimd_begin(void) svcr = read_sysreg_s(SYS_SVCR_EL0); if (!system_supports_fa64()) - ffr = svcr & SYS_SVCR_EL0_SM_MASK; + ffr = svcr & SVCR_EL0_SM_MASK; __this_cpu_write(efi_sm_state, ffr); } @@ -1929,7 +1929,7 @@ void __efi_fpsimd_begin(void) if (system_supports_sme()) sysreg_clear_set_s(SYS_SVCR_EL0, - SYS_SVCR_EL0_SM_MASK, 0); + SVCR_EL0_SM_MASK, 0); } else { fpsimd_save_state(this_cpu_ptr(&efi_fpsimd_state)); @@ -1964,7 +1964,7 @@ void __efi_fpsimd_end(void) if (__this_cpu_read(efi_sm_state)) { sysreg_clear_set_s(SYS_SVCR_EL0, 0, - SYS_SVCR_EL0_SM_MASK); + SVCR_EL0_SM_MASK); if (!system_supports_fa64()) ffr = efi_sm_state; } -- cgit v1.2.3 From ec0067a63e5a37de74025d46095cfe7a7af3114a Mon Sep 17 00:00:00 2001 From: Mark Brown Date: Tue, 10 May 2022 17:12:01 +0100 Subject: arm64/sme: Remove _EL0 from name of SVCR - FIXME sysreg.h The defines for SVCR call it SVCR_EL0 however the architecture calls the register SVCR with no _EL0 suffix. In preparation for generating the sysreg definitions rename to match the architecture, no functional change. Signed-off-by: Mark Brown Link: https://lore.kernel.org/r/20220510161208.631259-6-broonie@kernel.org Signed-off-by: Catalin Marinas --- arch/arm64/include/asm/fpsimd.h | 4 ++-- arch/arm64/include/asm/processor.h | 2 +- arch/arm64/include/asm/sysreg.h | 6 +++--- arch/arm64/kernel/fpsimd.c | 26 +++++++++++++------------- arch/arm64/kernel/ptrace.c | 8 ++++---- arch/arm64/kernel/signal.c | 14 +++++++------- arch/arm64/kernel/syscall.c | 4 ++-- arch/arm64/kvm/fpsimd.c | 4 ++-- arch/arm64/kvm/sys_regs.c | 2 +- 9 files changed, 35 insertions(+), 35 deletions(-) (limited to 'arch') diff --git a/arch/arm64/include/asm/fpsimd.h b/arch/arm64/include/asm/fpsimd.h index aa11dbec0d70..9bb1873f5295 100644 --- a/arch/arm64/include/asm/fpsimd.h +++ b/arch/arm64/include/asm/fpsimd.h @@ -67,12 +67,12 @@ extern void fpsimd_save_and_flush_cpu_state(void); static inline bool thread_sm_enabled(struct thread_struct *thread) { - return system_supports_sme() && (thread->svcr & SVCR_EL0_SM_MASK); + return system_supports_sme() && (thread->svcr & SVCR_SM_MASK); } static inline bool thread_za_enabled(struct thread_struct *thread) { - return system_supports_sme() && (thread->svcr & SVCR_EL0_ZA_MASK); + return system_supports_sme() && (thread->svcr & SVCR_ZA_MASK); } /* Maximum VL that SVE/SME VL-agnostic software can transparently support */ diff --git a/arch/arm64/include/asm/processor.h b/arch/arm64/include/asm/processor.h index 69ce163d2fb2..8de5a4fc06e3 100644 --- a/arch/arm64/include/asm/processor.h +++ b/arch/arm64/include/asm/processor.h @@ -192,7 +192,7 @@ static inline unsigned int thread_get_sme_vl(struct thread_struct *thread) static inline unsigned int thread_get_cur_vl(struct thread_struct *thread) { - if (system_supports_sme() && (thread->svcr & SVCR_EL0_SM_MASK)) + if (system_supports_sme() && (thread->svcr & SVCR_SM_MASK)) return thread_get_sme_vl(thread); else return thread_get_sve_vl(thread); diff --git a/arch/arm64/include/asm/sysreg.h b/arch/arm64/include/asm/sysreg.h index 4459cd4a37f5..a2f0759f65b2 100644 --- a/arch/arm64/include/asm/sysreg.h +++ b/arch/arm64/include/asm/sysreg.h @@ -479,9 +479,9 @@ #define SYS_RNDR_EL0 sys_reg(3, 3, 2, 4, 0) #define SYS_RNDRRS_EL0 sys_reg(3, 3, 2, 4, 1) -#define SYS_SVCR_EL0 sys_reg(3, 3, 4, 2, 2) -#define SVCR_EL0_ZA_MASK 2 -#define SVCR_EL0_SM_MASK 1 +#define SYS_SVCR sys_reg(3, 3, 4, 2, 2) +#define SVCR_ZA_MASK 2 +#define SVCR_SM_MASK 1 #define SYS_PMCR_EL0 sys_reg(3, 3, 9, 12, 0) #define SYS_PMCNTENSET_EL0 sys_reg(3, 3, 9, 12, 1) diff --git a/arch/arm64/kernel/fpsimd.c b/arch/arm64/kernel/fpsimd.c index a5f6d6d9f372..759d40cac1fe 100644 --- a/arch/arm64/kernel/fpsimd.c +++ b/arch/arm64/kernel/fpsimd.c @@ -410,7 +410,7 @@ static void task_fpsimd_load(void) if (test_thread_flag(TIF_SME)) sme_set_vq(sve_vq_from_vl(sme_vl) - 1); - write_sysreg_s(current->thread.svcr, SYS_SVCR_EL0); + write_sysreg_s(current->thread.svcr, SYS_SVCR); if (thread_za_enabled(¤t->thread)) za_load_state(current->thread.za_state); @@ -462,15 +462,15 @@ static void fpsimd_save(void) if (system_supports_sme()) { u64 *svcr = last->svcr; - *svcr = read_sysreg_s(SYS_SVCR_EL0); + *svcr = read_sysreg_s(SYS_SVCR); - *svcr = read_sysreg_s(SYS_SVCR_EL0); + *svcr = read_sysreg_s(SYS_SVCR); - if (*svcr & SYS_SVCR_EL0_ZA_MASK) + if (*svcr & SVCR_ZA_MASK) za_save_state(last->za_state); /* If we are in streaming mode override regular SVE. */ - if (*svcr & SYS_SVCR_EL0_SM_MASK) { + if (*svcr & SVCR_SM_MASK) { save_sve_regs = true; save_ffr = system_supports_fa64(); vl = last->sme_vl; @@ -852,8 +852,8 @@ int vec_set_vector_length(struct task_struct *task, enum vec_type type, sve_to_fpsimd(task); if (system_supports_sme() && type == ARM64_VEC_SME) { - task->thread.svcr &= ~(SYS_SVCR_EL0_SM_MASK | - SYS_SVCR_EL0_ZA_MASK); + task->thread.svcr &= ~(SVCR_SM_MASK | + SVCR_ZA_MASK); clear_thread_flag(TIF_SME); } @@ -1915,10 +1915,10 @@ void __efi_fpsimd_begin(void) __this_cpu_write(efi_sve_state_used, true); if (system_supports_sme()) { - svcr = read_sysreg_s(SYS_SVCR_EL0); + svcr = read_sysreg_s(SYS_SVCR); if (!system_supports_fa64()) - ffr = svcr & SVCR_EL0_SM_MASK; + ffr = svcr & SVCR_SM_MASK; __this_cpu_write(efi_sm_state, ffr); } @@ -1928,8 +1928,8 @@ void __efi_fpsimd_begin(void) ffr); if (system_supports_sme()) - sysreg_clear_set_s(SYS_SVCR_EL0, - SVCR_EL0_SM_MASK, 0); + sysreg_clear_set_s(SYS_SVCR, + SVCR_SM_MASK, 0); } else { fpsimd_save_state(this_cpu_ptr(&efi_fpsimd_state)); @@ -1962,9 +1962,9 @@ void __efi_fpsimd_end(void) */ if (system_supports_sme()) { if (__this_cpu_read(efi_sm_state)) { - sysreg_clear_set_s(SYS_SVCR_EL0, + sysreg_clear_set_s(SYS_SVCR, 0, - SVCR_EL0_SM_MASK); + SVCR_SM_MASK); if (!system_supports_fa64()) ffr = efi_sm_state; } diff --git a/arch/arm64/kernel/ptrace.c b/arch/arm64/kernel/ptrace.c index 60ebc3060cf1..21da83187a60 100644 --- a/arch/arm64/kernel/ptrace.c +++ b/arch/arm64/kernel/ptrace.c @@ -867,10 +867,10 @@ static int sve_set_common(struct task_struct *target, switch (type) { case ARM64_VEC_SVE: - target->thread.svcr &= ~SYS_SVCR_EL0_SM_MASK; + target->thread.svcr &= ~SVCR_SM_MASK; break; case ARM64_VEC_SME: - target->thread.svcr |= SYS_SVCR_EL0_SM_MASK; + target->thread.svcr |= SVCR_SM_MASK; break; default: WARN_ON_ONCE(1); @@ -1100,7 +1100,7 @@ static int za_set(struct task_struct *target, /* If there is no data then disable ZA */ if (!count) { - target->thread.svcr &= ~SYS_SVCR_EL0_ZA_MASK; + target->thread.svcr &= ~SVCR_ZA_MASK; goto out; } @@ -1125,7 +1125,7 @@ static int za_set(struct task_struct *target, /* Mark ZA as active and let userspace use it */ set_tsk_thread_flag(target, TIF_SME); - target->thread.svcr |= SYS_SVCR_EL0_ZA_MASK; + target->thread.svcr |= SVCR_ZA_MASK; out: fpsimd_flush_task_state(target); diff --git a/arch/arm64/kernel/signal.c b/arch/arm64/kernel/signal.c index 2295948d97fd..18bf590dc1c7 100644 --- a/arch/arm64/kernel/signal.c +++ b/arch/arm64/kernel/signal.c @@ -288,7 +288,7 @@ static int restore_sve_fpsimd_context(struct user_ctxs *user) if (sve.head.size <= sizeof(*user->sve)) { clear_thread_flag(TIF_SVE); - current->thread.svcr &= ~SYS_SVCR_EL0_SM_MASK; + current->thread.svcr &= ~SVCR_SM_MASK; goto fpsimd_only; } @@ -321,7 +321,7 @@ static int restore_sve_fpsimd_context(struct user_ctxs *user) return -EFAULT; if (sve.flags & SVE_SIG_FLAG_SM) - current->thread.svcr |= SYS_SVCR_EL0_SM_MASK; + current->thread.svcr |= SVCR_SM_MASK; else set_thread_flag(TIF_SVE); @@ -398,7 +398,7 @@ static int restore_za_context(struct user_ctxs __user *user) return -EINVAL; if (za.head.size <= sizeof(*user->za)) { - current->thread.svcr &= ~SYS_SVCR_EL0_ZA_MASK; + current->thread.svcr &= ~SVCR_ZA_MASK; return 0; } @@ -419,7 +419,7 @@ static int restore_za_context(struct user_ctxs __user *user) sme_alloc(current); if (!current->thread.za_state) { - current->thread.svcr &= ~SYS_SVCR_EL0_ZA_MASK; + current->thread.svcr &= ~SVCR_ZA_MASK; clear_thread_flag(TIF_SME); return -ENOMEM; } @@ -432,7 +432,7 @@ static int restore_za_context(struct user_ctxs __user *user) return -EFAULT; set_thread_flag(TIF_SME); - current->thread.svcr |= SYS_SVCR_EL0_ZA_MASK; + current->thread.svcr |= SVCR_ZA_MASK; return 0; } @@ -922,8 +922,8 @@ static void setup_return(struct pt_regs *regs, struct k_sigaction *ka, /* Signal handlers are invoked with ZA and streaming mode disabled */ if (system_supports_sme()) { - current->thread.svcr &= ~(SYS_SVCR_EL0_ZA_MASK | - SYS_SVCR_EL0_SM_MASK); + current->thread.svcr &= ~(SVCR_ZA_MASK | + SVCR_SM_MASK); sme_smstop(); } diff --git a/arch/arm64/kernel/syscall.c b/arch/arm64/kernel/syscall.c index 92c69e5ac269..733451fe7e41 100644 --- a/arch/arm64/kernel/syscall.c +++ b/arch/arm64/kernel/syscall.c @@ -174,9 +174,9 @@ static inline void fp_user_discard(void) * need updating. */ if (system_supports_sme() && test_thread_flag(TIF_SME)) { - u64 svcr = read_sysreg_s(SYS_SVCR_EL0); + u64 svcr = read_sysreg_s(SYS_SVCR); - if (svcr & SYS_SVCR_EL0_SM_MASK) + if (svcr & SVCR_SM_MASK) sme_smstop_sm(); } diff --git a/arch/arm64/kvm/fpsimd.c b/arch/arm64/kvm/fpsimd.c index 441edb9c398c..3d251a4d2cf7 100644 --- a/arch/arm64/kvm/fpsimd.c +++ b/arch/arm64/kvm/fpsimd.c @@ -96,8 +96,8 @@ void kvm_arch_vcpu_load_fp(struct kvm_vcpu *vcpu) if (read_sysreg(cpacr_el1) & CPACR_EL1_SMEN_EL0EN) vcpu->arch.flags |= KVM_ARM64_HOST_SME_ENABLED; - if (read_sysreg_s(SYS_SVCR_EL0) & - (SYS_SVCR_EL0_SM_MASK | SYS_SVCR_EL0_ZA_MASK)) { + if (read_sysreg_s(SYS_SVCR) & + (SVCR_SM_MASK | SVCR_ZA_MASK)) { vcpu->arch.flags &= ~KVM_ARM64_FP_HOST; fpsimd_save_and_flush_cpu_state(); } diff --git a/arch/arm64/kvm/sys_regs.c b/arch/arm64/kvm/sys_regs.c index 689e53dd4cb1..1cf01c022b30 100644 --- a/arch/arm64/kvm/sys_regs.c +++ b/arch/arm64/kvm/sys_regs.c @@ -1685,7 +1685,7 @@ static const struct sys_reg_desc sys_reg_descs[] = { { SYS_DESC(SYS_SMIDR_EL1), undef_access }, { SYS_DESC(SYS_CSSELR_EL1), access_csselr, reset_unknown, CSSELR_EL1 }, { SYS_DESC(SYS_CTR_EL0), access_ctr }, - { SYS_DESC(SYS_SVCR_EL0), undef_access }, + { SYS_DESC(SYS_SVCR), undef_access }, { PMU_SYS_REG(SYS_PMCR_EL0), .access = access_pmcr, .reset = reset_pmcr, .reg = PMCR_EL0 }, -- cgit v1.2.3 From 9e2c0819ac853d94c927d5d2f59e2ca2b48500b4 Mon Sep 17 00:00:00 2001 From: Mark Brown Date: Tue, 10 May 2022 17:12:02 +0100 Subject: arm64/sysreg: Support generation of RAZ fields Add a statement for RAZ bitfields to the automatic register generation script. Nothing is emitted to the header for these fields. Signed-off-by: Mark Brown Acked-by: Mark Rutland Link: https://lore.kernel.org/r/20220510161208.631259-7-broonie@kernel.org Signed-off-by: Catalin Marinas --- arch/arm64/tools/gen-sysreg.awk | 7 +++++++ 1 file changed, 7 insertions(+) (limited to 'arch') diff --git a/arch/arm64/tools/gen-sysreg.awk b/arch/arm64/tools/gen-sysreg.awk index 4aa7ff8ce707..89bfb74e28de 100755 --- a/arch/arm64/tools/gen-sysreg.awk +++ b/arch/arm64/tools/gen-sysreg.awk @@ -226,6 +226,13 @@ END { next } +/^Raz/ && (block == "Sysreg" || block == "SysregFields") { + expect_fields(2) + parse_bitdef(reg, field, $2) + + next +} + /^Enum/ { change_block("Enum", "Sysreg", "Enum") expect_fields(3) -- cgit v1.2.3 From 0d1322e7ea755b9de4819aa246ebab924b4cefec Mon Sep 17 00:00:00 2001 From: Mark Brown Date: Tue, 10 May 2022 17:12:03 +0100 Subject: arm64/sme: Automatically generate defines for SMCR Convert SMCR to use the register definition code, no functional change. Signed-off-by: Mark Brown Reviewed-by: Mark Rutland Link: https://lore.kernel.org/r/20220510161208.631259-8-broonie@kernel.org Signed-off-by: Catalin Marinas --- arch/arm64/include/asm/sysreg.h | 10 ---------- arch/arm64/tools/sysreg | 20 ++++++++++++++++++++ 2 files changed, 20 insertions(+), 10 deletions(-) (limited to 'arch') diff --git a/arch/arm64/include/asm/sysreg.h b/arch/arm64/include/asm/sysreg.h index a2f0759f65b2..cbf03a1f316e 100644 --- a/arch/arm64/include/asm/sysreg.h +++ b/arch/arm64/include/asm/sysreg.h @@ -216,7 +216,6 @@ #define SYS_ZCR_EL1 sys_reg(3, 0, 1, 2, 0) #define SYS_TRFCR_EL1 sys_reg(3, 0, 1, 2, 1) #define SYS_SMPRI_EL1 sys_reg(3, 0, 1, 2, 4) -#define SYS_SMCR_EL1 sys_reg(3, 0, 1, 2, 6) #define SYS_TCR_EL1 sys_reg(3, 0, 2, 0, 2) @@ -571,7 +570,6 @@ #define SYS_TRFCR_EL2 sys_reg(3, 4, 1, 2, 1) #define SYS_HCRX_EL2 sys_reg(3, 4, 1, 2, 2) #define SYS_SMPRIMAP_EL2 sys_reg(3, 4, 1, 2, 5) -#define SYS_SMCR_EL2 sys_reg(3, 4, 1, 2, 6) #define SYS_DACR32_EL2 sys_reg(3, 4, 3, 0, 0) #define SYS_HDFGRTR_EL2 sys_reg(3, 4, 3, 1, 4) #define SYS_HDFGWTR_EL2 sys_reg(3, 4, 3, 1, 5) @@ -631,7 +629,6 @@ #define SYS_SCTLR_EL12 sys_reg(3, 5, 1, 0, 0) #define SYS_CPACR_EL12 sys_reg(3, 5, 1, 0, 2) #define SYS_ZCR_EL12 sys_reg(3, 5, 1, 2, 0) -#define SYS_SMCR_EL12 sys_reg(3, 5, 1, 2, 6) #define SYS_TTBR0_EL12 sys_reg(3, 5, 2, 0, 0) #define SYS_TTBR1_EL12 sys_reg(3, 5, 2, 0, 1) #define SYS_TCR_EL12 sys_reg(3, 5, 2, 0, 2) @@ -1117,13 +1114,6 @@ #define ZCR_ELx_LEN_WIDTH 4 #define ZCR_ELx_LEN_MASK 0xf -#define SMCR_ELx_FA64_SHIFT 31 -#define SMCR_ELx_FA64_MASK (1 << SMCR_ELx_FA64_SHIFT) - -#define SMCR_ELx_LEN_SHIFT 0 -#define SMCR_ELx_LEN_WIDTH 4 -#define SMCR_ELx_LEN_MASK 0xf - #define CPACR_EL1_FPEN_EL1EN (BIT(20)) /* enable EL1 access */ #define CPACR_EL1_FPEN_EL0EN (BIT(21)) /* enable EL0 access, if EL1EN set */ diff --git a/arch/arm64/tools/sysreg b/arch/arm64/tools/sysreg index c5619629bf9c..d0ac57648000 100644 --- a/arch/arm64/tools/sysreg +++ b/arch/arm64/tools/sysreg @@ -185,6 +185,26 @@ Field 1 A Field 0 M EndSysreg +SysregFields SMCR_ELx +Res0 63:32 +Field 31 FA64 +Res0 30:9 +Raz 8:4 +Field 3:0 LEN +EndSysregFields + +Sysreg SMCR_EL1 3 0 1 2 6 +Fields SMCR_ELx +EndSysreg + +Sysreg SMCR_EL2 3 4 1 2 6 +Fields SMCR_ELx +EndSysreg + +Sysreg SMCR_EL12 3 5 1 2 6 +Fields SMCR_ELx +EndSysreg + SysregFields TTBRx_EL1 Field 63:48 ASID Field 47:1 BADDR -- cgit v1.2.3 From c37b8700b7234c91e38c3a6c8dcddb6bffdfb218 Mon Sep 17 00:00:00 2001 From: Mark Brown Date: Tue, 10 May 2022 17:12:04 +0100 Subject: arm64/sme: Automatically generate SMIDR_EL1 defines Automatically generate the defines for SMIDR_EL1, no functional change. Signed-off-by: Mark Brown Reviewed-by: Mark Rutland Link: https://lore.kernel.org/r/20220510161208.631259-9-broonie@kernel.org Signed-off-by: Catalin Marinas --- arch/arm64/include/asm/sysreg.h | 1 - arch/arm64/tools/sysreg | 9 +++++++++ 2 files changed, 9 insertions(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/arm64/include/asm/sysreg.h b/arch/arm64/include/asm/sysreg.h index cbf03a1f316e..ce08a42637bc 100644 --- a/arch/arm64/include/asm/sysreg.h +++ b/arch/arm64/include/asm/sysreg.h @@ -463,7 +463,6 @@ #define SYS_CCSIDR_EL1 sys_reg(3, 1, 0, 0, 0) #define SYS_CLIDR_EL1 sys_reg(3, 1, 0, 0, 1) #define SYS_GMID_EL1 sys_reg(3, 1, 0, 0, 4) -#define SYS_SMIDR_EL1 sys_reg(3, 1, 0, 0, 6) #define SYS_AIDR_EL1 sys_reg(3, 1, 0, 0, 7) #define SMIDR_EL1_IMPLEMENTER_SHIFT 24 diff --git a/arch/arm64/tools/sysreg b/arch/arm64/tools/sysreg index d0ac57648000..1bf88ca3da5b 100644 --- a/arch/arm64/tools/sysreg +++ b/arch/arm64/tools/sysreg @@ -197,6 +197,15 @@ Sysreg SMCR_EL1 3 0 1 2 6 Fields SMCR_ELx EndSysreg +Sysreg SMIDR_EL1 3 1 0 0 6 +Res0 63:32 +Field 31:24 IMPLEMENTER +Field 23:16 REVISION +Field 15 SMPS +Res0 14:12 +Field 11:0 AFFINITY +EndSysreg + Sysreg SMCR_EL2 3 4 1 2 6 Fields SMCR_ELx EndSysreg -- cgit v1.2.3 From 8e053810e6ce90bd45f97370708b7803f6957651 Mon Sep 17 00:00:00 2001 From: Mark Brown Date: Tue, 10 May 2022 17:12:05 +0100 Subject: arm64/sme: Automatically generate SMPRIMAP_EL2 definitions No functional change should be seen from converting SMPRIMAP_EL2 to be generated. Signed-off-by: Mark Brown Reviewed-by: Mark Rutland Link: https://lore.kernel.org/r/20220510161208.631259-10-broonie@kernel.org Signed-off-by: Catalin Marinas --- arch/arm64/include/asm/sysreg.h | 1 - arch/arm64/tools/sysreg | 19 +++++++++++++++++++ 2 files changed, 19 insertions(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/arm64/include/asm/sysreg.h b/arch/arm64/include/asm/sysreg.h index ce08a42637bc..2a9468d449fa 100644 --- a/arch/arm64/include/asm/sysreg.h +++ b/arch/arm64/include/asm/sysreg.h @@ -568,7 +568,6 @@ #define SYS_ZCR_EL2 sys_reg(3, 4, 1, 2, 0) #define SYS_TRFCR_EL2 sys_reg(3, 4, 1, 2, 1) #define SYS_HCRX_EL2 sys_reg(3, 4, 1, 2, 2) -#define SYS_SMPRIMAP_EL2 sys_reg(3, 4, 1, 2, 5) #define SYS_DACR32_EL2 sys_reg(3, 4, 3, 0, 0) #define SYS_HDFGRTR_EL2 sys_reg(3, 4, 3, 1, 4) #define SYS_HDFGWTR_EL2 sys_reg(3, 4, 3, 1, 5) diff --git a/arch/arm64/tools/sysreg b/arch/arm64/tools/sysreg index 1bf88ca3da5b..2cdcdac0465e 100644 --- a/arch/arm64/tools/sysreg +++ b/arch/arm64/tools/sysreg @@ -206,6 +206,25 @@ Res0 14:12 Field 11:0 AFFINITY EndSysreg +Sysreg SMPRIMAP_EL2 3 4 1 2 5 +Field 63:60 P15 +Field 59:56 P14 +Field 55:52 P13 +Field 51:48 P12 +Field 47:44 P11 +Field 43:40 P10 +Field 39:36 F9 +Field 35:32 P8 +Field 31:28 P7 +Field 27:24 P6 +Field 23:20 P5 +Field 19:16 P4 +Field 15:12 P3 +Field 11:8 P2 +Field 7:4 P1 +Field 3:0 P0 +EndSysreg + Sysreg SMCR_EL2 3 4 1 2 6 Fields SMCR_ELx EndSysreg -- cgit v1.2.3 From 9321f0492b89c8d8286d8c5e06f45c984a8221a4 Mon Sep 17 00:00:00 2001 From: Mark Brown Date: Tue, 10 May 2022 17:12:06 +0100 Subject: arm64/sme: Generate SMPRI_EL1 definitions Convert SMPRI_EL1 to be generated. No functional change. Signed-off-by: Mark Brown Reviewed-by: Mark Rutland Link: https://lore.kernel.org/r/20220510161208.631259-11-broonie@kernel.org Signed-off-by: Catalin Marinas --- arch/arm64/include/asm/sysreg.h | 3 --- arch/arm64/tools/sysreg | 5 +++++ 2 files changed, 5 insertions(+), 3 deletions(-) (limited to 'arch') diff --git a/arch/arm64/include/asm/sysreg.h b/arch/arm64/include/asm/sysreg.h index 2a9468d449fa..b4affc3fd569 100644 --- a/arch/arm64/include/asm/sysreg.h +++ b/arch/arm64/include/asm/sysreg.h @@ -215,7 +215,6 @@ #define SYS_ZCR_EL1 sys_reg(3, 0, 1, 2, 0) #define SYS_TRFCR_EL1 sys_reg(3, 0, 1, 2, 1) -#define SYS_SMPRI_EL1 sys_reg(3, 0, 1, 2, 4) #define SYS_TCR_EL1 sys_reg(3, 0, 2, 0, 2) @@ -406,8 +405,6 @@ #define TRBIDR_ALIGN_MASK GENMASK(3, 0) #define TRBIDR_ALIGN_SHIFT 0 -#define SMPRI_EL1_PRIORITY_MASK 0xf - #define SYS_PMINTENSET_EL1 sys_reg(3, 0, 9, 14, 1) #define SYS_PMINTENCLR_EL1 sys_reg(3, 0, 9, 14, 2) diff --git a/arch/arm64/tools/sysreg b/arch/arm64/tools/sysreg index 2cdcdac0465e..d29bc429f504 100644 --- a/arch/arm64/tools/sysreg +++ b/arch/arm64/tools/sysreg @@ -185,6 +185,11 @@ Field 1 A Field 0 M EndSysreg +Sysreg SMPRI_EL1 3 0 1 2 4 +Res0 63:4 +Field 3:0 PRIORITY +EndSysreg + SysregFields SMCR_ELx Res0 63:32 Field 31 FA64 -- cgit v1.2.3 From 11e12a91c118780b76ecae3610efd49b7ff7d39e Mon Sep 17 00:00:00 2001 From: Mark Brown Date: Tue, 10 May 2022 17:12:07 +0100 Subject: arm64/sme: Generate defintions for SVCR Convert SVCR to automatic generation, no functional change. Signed-off-by: Mark Brown Reviewed-by: Mark Rutland Link: https://lore.kernel.org/r/20220510161208.631259-12-broonie@kernel.org Signed-off-by: Catalin Marinas --- arch/arm64/include/asm/sysreg.h | 4 ---- arch/arm64/tools/sysreg | 6 ++++++ 2 files changed, 6 insertions(+), 4 deletions(-) (limited to 'arch') diff --git a/arch/arm64/include/asm/sysreg.h b/arch/arm64/include/asm/sysreg.h index b4affc3fd569..804b5326c393 100644 --- a/arch/arm64/include/asm/sysreg.h +++ b/arch/arm64/include/asm/sysreg.h @@ -474,10 +474,6 @@ #define SYS_RNDR_EL0 sys_reg(3, 3, 2, 4, 0) #define SYS_RNDRRS_EL0 sys_reg(3, 3, 2, 4, 1) -#define SYS_SVCR sys_reg(3, 3, 4, 2, 2) -#define SVCR_ZA_MASK 2 -#define SVCR_SM_MASK 1 - #define SYS_PMCR_EL0 sys_reg(3, 3, 9, 12, 0) #define SYS_PMCNTENSET_EL0 sys_reg(3, 3, 9, 12, 1) #define SYS_PMCNTENCLR_EL0 sys_reg(3, 3, 9, 12, 2) diff --git a/arch/arm64/tools/sysreg b/arch/arm64/tools/sysreg index d29bc429f504..7888603db50a 100644 --- a/arch/arm64/tools/sysreg +++ b/arch/arm64/tools/sysreg @@ -211,6 +211,12 @@ Res0 14:12 Field 11:0 AFFINITY EndSysreg +Sysreg SVCR 3 3 4 2 2 +Res0 63:2 +Field 1 ZA +Field 0 SM +EndSysreg + Sysreg SMPRIMAP_EL2 3 4 1 2 5 Field 63:60 P15 Field 59:56 P14 -- cgit v1.2.3 From 89e9fb327421081166c1d1682b6601ac93dd610c Mon Sep 17 00:00:00 2001 From: Mark Brown Date: Tue, 10 May 2022 17:12:08 +0100 Subject: arm64/sve: Generate ZCR definitions Convert the various ZCR instances to automatic generation, no functional changes expected. Signed-off-by: Mark Brown Reviewed-by: Mark Rutland Link: https://lore.kernel.org/r/20220510161208.631259-13-broonie@kernel.org Signed-off-by: Catalin Marinas --- arch/arm64/include/asm/sysreg.h | 7 ------- arch/arm64/tools/sysreg | 18 ++++++++++++++++++ 2 files changed, 18 insertions(+), 7 deletions(-) (limited to 'arch') diff --git a/arch/arm64/include/asm/sysreg.h b/arch/arm64/include/asm/sysreg.h index 804b5326c393..91e4f8601393 100644 --- a/arch/arm64/include/asm/sysreg.h +++ b/arch/arm64/include/asm/sysreg.h @@ -213,7 +213,6 @@ #define SYS_RGSR_EL1 sys_reg(3, 0, 1, 0, 5) #define SYS_GCR_EL1 sys_reg(3, 0, 1, 0, 6) -#define SYS_ZCR_EL1 sys_reg(3, 0, 1, 2, 0) #define SYS_TRFCR_EL1 sys_reg(3, 0, 1, 2, 1) #define SYS_TCR_EL1 sys_reg(3, 0, 2, 0, 2) @@ -558,7 +557,6 @@ #define SYS_HFGRTR_EL2 sys_reg(3, 4, 1, 1, 4) #define SYS_HFGWTR_EL2 sys_reg(3, 4, 1, 1, 5) #define SYS_HFGITR_EL2 sys_reg(3, 4, 1, 1, 6) -#define SYS_ZCR_EL2 sys_reg(3, 4, 1, 2, 0) #define SYS_TRFCR_EL2 sys_reg(3, 4, 1, 2, 1) #define SYS_HCRX_EL2 sys_reg(3, 4, 1, 2, 2) #define SYS_DACR32_EL2 sys_reg(3, 4, 3, 0, 0) @@ -619,7 +617,6 @@ /* VHE encodings for architectural EL0/1 system registers */ #define SYS_SCTLR_EL12 sys_reg(3, 5, 1, 0, 0) #define SYS_CPACR_EL12 sys_reg(3, 5, 1, 0, 2) -#define SYS_ZCR_EL12 sys_reg(3, 5, 1, 2, 0) #define SYS_TTBR0_EL12 sys_reg(3, 5, 2, 0, 0) #define SYS_TTBR1_EL12 sys_reg(3, 5, 2, 0, 1) #define SYS_TCR_EL12 sys_reg(3, 5, 2, 0, 2) @@ -1101,10 +1098,6 @@ #define DCZID_DZP_SHIFT 4 #define DCZID_BS_SHIFT 0 -#define ZCR_ELx_LEN_SHIFT 0 -#define ZCR_ELx_LEN_WIDTH 4 -#define ZCR_ELx_LEN_MASK 0xf - #define CPACR_EL1_FPEN_EL1EN (BIT(20)) /* enable EL1 access */ #define CPACR_EL1_FPEN_EL0EN (BIT(21)) /* enable EL0 access, if EL1EN set */ diff --git a/arch/arm64/tools/sysreg b/arch/arm64/tools/sysreg index 7888603db50a..a236d7a821b4 100644 --- a/arch/arm64/tools/sysreg +++ b/arch/arm64/tools/sysreg @@ -190,6 +190,16 @@ Res0 63:4 Field 3:0 PRIORITY EndSysreg +SysregFields ZCR_ELx +Res0 63:9 +Raz 8:4 +Field 3:0 LEN +EndSysregFields + +Sysreg ZCR_EL1 3 0 1 2 0 +Fields ZCR_ELx +EndSysreg + SysregFields SMCR_ELx Res0 63:32 Field 31 FA64 @@ -217,6 +227,10 @@ Field 1 ZA Field 0 SM EndSysreg +Sysreg ZCR_EL2 3 4 1 2 0 +Fields ZCR_ELx +EndSysreg + Sysreg SMPRIMAP_EL2 3 4 1 2 5 Field 63:60 P15 Field 59:56 P14 @@ -240,6 +254,10 @@ Sysreg SMCR_EL2 3 4 1 2 6 Fields SMCR_ELx EndSysreg +Sysreg ZCR_EL12 3 5 1 2 0 +Fields ZCR_ELx +EndSysreg + Sysreg SMCR_EL12 3 5 1 2 6 Fields SMCR_ELx EndSysreg -- cgit v1.2.3 From 8f0f104e2ab6eed4cad3b111dc206f843bda43ea Mon Sep 17 00:00:00 2001 From: Zhen Lei Date: Wed, 11 May 2022 11:20:32 +0800 Subject: arm64: kdump: Do not allocate crash low memory if not needed When "crashkernel=X,high" is specified, the specified "crashkernel=Y,low" memory is not required in the following corner cases: 1. If both CONFIG_ZONE_DMA and CONFIG_ZONE_DMA32 are disabled, it means that the devices can access any memory. 2. If the system memory is small, the crash high memory may be allocated from the DMA zones. If that happens, there's no need to allocate another crash low memory because there's already one. Add condition '(crash_base >= CRASH_ADDR_LOW_MAX)' to determine whether the 'high' memory is allocated above DMA zones. Note: when both CONFIG_ZONE_DMA and CONFIG_ZONE_DMA32 are disabled, the entire physical memory is DMA accessible, CRASH_ADDR_LOW_MAX equals 'PHYS_MASK + 1'. Signed-off-by: Zhen Lei Acked-by: Baoquan He Link: https://lore.kernel.org/r/20220511032033.426-1-thunder.leizhen@huawei.com Signed-off-by: Catalin Marinas --- Documentation/admin-guide/kernel-parameters.txt | 5 +++-- arch/arm64/mm/init.c | 3 ++- 2 files changed, 5 insertions(+), 3 deletions(-) (limited to 'arch') diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt index f6ff55840751..1b543c3109f4 100644 --- a/Documentation/admin-guide/kernel-parameters.txt +++ b/Documentation/admin-guide/kernel-parameters.txt @@ -823,7 +823,7 @@ low memory is needed to make sure DMA buffers for 32-bit devices won't run out. Kernel would try to allocate at least 256M below 4G automatically. - This one let user to specify own low range under 4G + This one lets the user specify own low range under 4G for second kernel instead. 0: to disable low allocation. It will be ignored when crashkernel=X,high is not used @@ -832,7 +832,8 @@ [KNL, ARM64] range in low memory. This one lets the user specify a low range in the DMA zone for the crash dump kernel. - It will be ignored when crashkernel=X,high is not used. + It will be ignored when crashkernel=X,high is not used + or memory reserved is located in the DMA zones. cryptomgr.notests [KNL] Disable crypto self-tests diff --git a/arch/arm64/mm/init.c b/arch/arm64/mm/init.c index 18ba66c90991..ac510fb6a2c0 100644 --- a/arch/arm64/mm/init.c +++ b/arch/arm64/mm/init.c @@ -170,7 +170,8 @@ static void __init reserve_crashkernel(void) return; } - if (crash_low_size && reserve_crashkernel_low(crash_low_size)) { + if ((crash_base >= CRASH_ADDR_LOW_MAX) && + crash_low_size && reserve_crashkernel_low(crash_low_size)) { memblock_phys_free(crash_base, crash_size); return; } -- cgit v1.2.3 From f0d9d79ec793ec66271e80ff2f9bf7a10458a584 Mon Sep 17 00:00:00 2001 From: Baolin Wang Date: Mon, 16 May 2022 08:55:57 +0800 Subject: arm64/hugetlb: Use ptep_get() to get the pte value of a huge page The original huge_ptep_get() on ARM64 is just a wrapper of ptep_get(), which will not take into account any contig-PTEs dirty and access bits. Meanwhile we will implement a new ARM64-specific huge_ptep_get() interface in following patch, which will take into account any contig-PTEs dirty and access bits. To keep the same efficient logic to get the pte value, change to use ptep_get() as a preparation. Signed-off-by: Baolin Wang Reviewed-by: Muchun Song Reviewed-by: Anshuman Khandual Link: https://lore.kernel.org/r/5113ed6e103f995e1d0f0c9fda0373b761bbcad2.1652496622.git.baolin.wang@linux.alibaba.com Signed-off-by: Catalin Marinas --- arch/arm64/mm/hugetlbpage.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'arch') diff --git a/arch/arm64/mm/hugetlbpage.c b/arch/arm64/mm/hugetlbpage.c index 749435b01a89..2aa1b0e176a2 100644 --- a/arch/arm64/mm/hugetlbpage.c +++ b/arch/arm64/mm/hugetlbpage.c @@ -172,7 +172,7 @@ static pte_t get_clear_contig(struct mm_struct *mm, unsigned long pgsize, unsigned long ncontig) { - pte_t orig_pte = huge_ptep_get(ptep); + pte_t orig_pte = ptep_get(ptep); unsigned long i; for (i = 0; i < ncontig; i++, addr += pgsize, ptep++) { @@ -379,7 +379,7 @@ pte_t huge_ptep_get_and_clear(struct mm_struct *mm, { int ncontig; size_t pgsize; - pte_t orig_pte = huge_ptep_get(ptep); + pte_t orig_pte = ptep_get(ptep); if (!pte_cont(orig_pte)) return ptep_get_and_clear(mm, addr, ptep); @@ -402,11 +402,11 @@ static int __cont_access_flags_changed(pte_t *ptep, pte_t pte, int ncontig) { int i; - if (pte_write(pte) != pte_write(huge_ptep_get(ptep))) + if (pte_write(pte) != pte_write(ptep_get(ptep))) return 1; for (i = 0; i < ncontig; i++) { - pte_t orig_pte = huge_ptep_get(ptep + i); + pte_t orig_pte = ptep_get(ptep + i); if (pte_dirty(pte) != pte_dirty(orig_pte)) return 1; -- cgit v1.2.3 From bc5dfb4fd7bd471c77ea48143159eb5e1308d636 Mon Sep 17 00:00:00 2001 From: Baolin Wang Date: Mon, 16 May 2022 08:55:58 +0800 Subject: arm64/hugetlb: Implement arm64 specific huge_ptep_get() Now we use huge_ptep_get() to get the pte value of a hugetlb page, however it will only return one specific pte value for the CONT-PTE or CONT-PMD size hugetlb on ARM64 system, which can contain several continuous pte or pmd entries with same page table attributes. And it will not take into account the subpages' dirty or young bits of a CONT-PTE/PMD size hugetlb page. So the huge_ptep_get() is inconsistent with huge_ptep_get_and_clear(), which already takes account the dirty or young bits for any subpages in this CONT-PTE/PMD size hugetlb [1]. Meanwhile we can miss dirty or young flags statistics for hugetlb pages with current huge_ptep_get(), such as the gather_hugetlb_stats() function, and CONT-PTE/PMD hugetlb monitoring with DAMON. Thus define an ARM64 specific huge_ptep_get() implementation as well as enabling __HAVE_ARCH_HUGE_PTEP_GET, that will take into account any subpages' dirty or young bits for CONT-PTE/PMD size hugetlb page, for those functions that want to check the dirty and young flags of a hugetlb page. [1] https://lore.kernel.org/linux-mm/85bd80b4-b4fd-0d3f-a2e5-149559f2f387@oracle.com/ Suggested-by: Muchun Song Signed-off-by: Baolin Wang Reviewed-by: Muchun Song Reviewed-by: Anshuman Khandual Link: https://lore.kernel.org/r/624109a80ac4bbdf1e462dfa0b49e9f7c31a7c0d.1652496622.git.baolin.wang@linux.alibaba.com Signed-off-by: Catalin Marinas --- arch/arm64/include/asm/hugetlb.h | 2 ++ arch/arm64/mm/hugetlbpage.c | 22 ++++++++++++++++++++++ 2 files changed, 24 insertions(+) (limited to 'arch') diff --git a/arch/arm64/include/asm/hugetlb.h b/arch/arm64/include/asm/hugetlb.h index 1242f71937f8..d656822b13f1 100644 --- a/arch/arm64/include/asm/hugetlb.h +++ b/arch/arm64/include/asm/hugetlb.h @@ -44,6 +44,8 @@ extern void huge_ptep_clear_flush(struct vm_area_struct *vma, #define __HAVE_ARCH_HUGE_PTE_CLEAR extern void huge_pte_clear(struct mm_struct *mm, unsigned long addr, pte_t *ptep, unsigned long sz); +#define __HAVE_ARCH_HUGE_PTEP_GET +extern pte_t huge_ptep_get(pte_t *ptep); extern void set_huge_swap_pte_at(struct mm_struct *mm, unsigned long addr, pte_t *ptep, pte_t pte, unsigned long sz); #define set_huge_swap_pte_at set_huge_swap_pte_at diff --git a/arch/arm64/mm/hugetlbpage.c b/arch/arm64/mm/hugetlbpage.c index 2aa1b0e176a2..64bb078e2e7b 100644 --- a/arch/arm64/mm/hugetlbpage.c +++ b/arch/arm64/mm/hugetlbpage.c @@ -158,6 +158,28 @@ static inline int num_contig_ptes(unsigned long size, size_t *pgsize) return contig_ptes; } +pte_t huge_ptep_get(pte_t *ptep) +{ + int ncontig, i; + size_t pgsize; + pte_t orig_pte = ptep_get(ptep); + + if (!pte_present(orig_pte) || !pte_cont(orig_pte)) + return orig_pte; + + ncontig = num_contig_ptes(page_size(pte_page(orig_pte)), &pgsize); + for (i = 0; i < ncontig; i++, ptep++) { + pte_t pte = ptep_get(ptep); + + if (pte_dirty(pte)) + orig_pte = pte_mkdirty(orig_pte); + + if (pte_young(pte)) + orig_pte = pte_mkyoung(orig_pte); + } + return orig_pte; +} + /* * Changing some bits of contiguous entries requires us to follow a * Break-Before-Make approach, breaking the whole contiguous set -- cgit v1.2.3 From 6ee3cf6a209fc76d8ae51fba357a62841ec6124c Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Fri, 29 Apr 2022 15:13:46 +0200 Subject: arm64: lds: move special code sections out of kernel exec segment There are a few code sections that are emitted into the kernel's executable .text segment simply because they contain code, but are actually never executed via this mapping, so they can happily live in a region that gets mapped without executable permissions, reducing the risk of being gadgetized. Note that the kexec and hibernate region contents are always copied into a fresh page, and so there is no need to align them as long as the overall size of each is below 4 KiB. Signed-off-by: Ard Biesheuvel Acked-by: Mark Rutland Link: https://lore.kernel.org/r/20220429131347.3621090-2-ardb@kernel.org Signed-off-by: Catalin Marinas --- arch/arm64/kernel/vmlinux.lds.S | 21 ++++++++++++--------- 1 file changed, 12 insertions(+), 9 deletions(-) (limited to 'arch') diff --git a/arch/arm64/kernel/vmlinux.lds.S b/arch/arm64/kernel/vmlinux.lds.S index edaf0faf766f..2d4a8f995175 100644 --- a/arch/arm64/kernel/vmlinux.lds.S +++ b/arch/arm64/kernel/vmlinux.lds.S @@ -93,7 +93,6 @@ jiffies = jiffies_64; #ifdef CONFIG_HIBERNATION #define HIBERNATE_TEXT \ - . = ALIGN(SZ_4K); \ __hibernate_exit_text_start = .; \ *(.hibernate_exit.text) \ __hibernate_exit_text_end = .; @@ -103,7 +102,6 @@ jiffies = jiffies_64; #ifdef CONFIG_KEXEC_CORE #define KEXEC_TEXT \ - . = ALIGN(SZ_4K); \ __relocate_new_kernel_start = .; \ *(.kexec_relocate.text) \ __relocate_new_kernel_end = .; @@ -170,9 +168,6 @@ SECTIONS KPROBES_TEXT HYPERVISOR_TEXT IDMAP_TEXT - HIBERNATE_TEXT - KEXEC_TEXT - TRAMP_TEXT *(.gnu.warning) . = ALIGN(16); *(.got) /* Global offset table */ @@ -194,6 +189,14 @@ SECTIONS HYPERVISOR_DATA_SECTIONS + /* code sections that are never executed via the kernel mapping */ + .rodata.text : { + TRAMP_TEXT + HIBERNATE_TEXT + KEXEC_TEXT + . = ALIGN(PAGE_SIZE); + } + idmap_pg_dir = .; . += IDMAP_DIR_SIZE; idmap_pg_end = .; @@ -337,8 +340,8 @@ ASSERT(__hyp_idmap_text_end - __hyp_idmap_text_start <= PAGE_SIZE, ASSERT(__idmap_text_end - (__idmap_text_start & ~(SZ_4K - 1)) <= SZ_4K, "ID map text too big or misaligned") #ifdef CONFIG_HIBERNATION -ASSERT(__hibernate_exit_text_end - (__hibernate_exit_text_start & ~(SZ_4K - 1)) - <= SZ_4K, "Hibernate exit text too big or misaligned") +ASSERT(__hibernate_exit_text_end - __hibernate_exit_text_start <= SZ_4K, + "Hibernate exit text is bigger than 4 KiB") #endif #ifdef CONFIG_UNMAP_KERNEL_AT_EL0 ASSERT((__entry_tramp_text_end - __entry_tramp_text_start) <= 3*PAGE_SIZE, @@ -362,7 +365,7 @@ ASSERT(swapper_pg_dir - tramp_pg_dir == TRAMP_SWAPPER_OFFSET, #ifdef CONFIG_KEXEC_CORE /* kexec relocation code should fit into one KEXEC_CONTROL_PAGE_SIZE */ -ASSERT(__relocate_new_kernel_end - (__relocate_new_kernel_start & ~(SZ_4K - 1)) - <= SZ_4K, "kexec relocation code is too big or misaligned") +ASSERT(__relocate_new_kernel_end - __relocate_new_kernel_start <= SZ_4K, + "kexec relocation code is bigger than 4 KiB") ASSERT(KEXEC_CONTROL_PAGE_SIZE >= SZ_4K, "KEXEC_CONTROL_PAGE_SIZE is broken") #endif -- cgit v1.2.3 From 01142791b0d11f20becccd0b30ed5e8fbb3822b6 Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Fri, 29 Apr 2022 15:13:47 +0200 Subject: arm64: mm: avoid writable executable mappings in kexec/hibernate code The temporary mappings of the low-level kexec and hibernate helpers are created with both writable and executable attributes, which is not necessary here, and generally best avoided. So use read-only, executable attributes instead. Signed-off-by: Ard Biesheuvel Acked-by: Mark Rutland Link: https://lore.kernel.org/r/20220429131347.3621090-3-ardb@kernel.org Signed-off-by: Catalin Marinas --- arch/arm64/mm/trans_pgd.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/arm64/mm/trans_pgd.c b/arch/arm64/mm/trans_pgd.c index d7da8ca40d2e..4ea2eefbc053 100644 --- a/arch/arm64/mm/trans_pgd.c +++ b/arch/arm64/mm/trans_pgd.c @@ -238,7 +238,7 @@ int trans_pgd_idmap_page(struct trans_pgd_info *info, phys_addr_t *trans_ttbr0, int this_level, index, level_lsb, level_msb; dst_addr &= PAGE_MASK; - prev_level_entry = pte_val(pfn_pte(pfn, PAGE_KERNEL_EXEC)); + prev_level_entry = pte_val(pfn_pte(pfn, PAGE_KERNEL_ROX)); for (this_level = 3; this_level >= 0; this_level--) { levels[this_level] = trans_alloc(info); -- cgit v1.2.3 From 3cb7e662a9309e1d54d3d3aba530616a20ea9a10 Mon Sep 17 00:00:00 2001 From: Juerg Haefliger Date: Tue, 17 May 2022 16:16:47 +0200 Subject: arm64: Kconfig: Fix indentation and add comments The convention for indentation seems to be a single tab. Help text is further indented by an additional two whitespaces. Fix the lines that violate these rules. While add it, add trailing comments to endif and endmenu statements for better readability. Signed-off-by: Juerg Haefliger Link: https://lore.kernel.org/r/20220517141648.331976-2-juergh@canonical.com Signed-off-by: Catalin Marinas --- arch/arm64/Kconfig | 95 +++++++++++++++++++++++++++--------------------------- 1 file changed, 47 insertions(+), 48 deletions(-) (limited to 'arch') diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig index 764433588fdd..2741e98d27fd 100644 --- a/arch/arm64/Kconfig +++ b/arch/arm64/Kconfig @@ -253,31 +253,31 @@ config ARM64_CONT_PMD_SHIFT default 4 config ARCH_MMAP_RND_BITS_MIN - default 14 if ARM64_64K_PAGES - default 16 if ARM64_16K_PAGES - default 18 + default 14 if ARM64_64K_PAGES + default 16 if ARM64_16K_PAGES + default 18 # max bits determined by the following formula: # VA_BITS - PAGE_SHIFT - 3 config ARCH_MMAP_RND_BITS_MAX - default 19 if ARM64_VA_BITS=36 - default 24 if ARM64_VA_BITS=39 - default 27 if ARM64_VA_BITS=42 - default 30 if ARM64_VA_BITS=47 - default 29 if ARM64_VA_BITS=48 && ARM64_64K_PAGES - default 31 if ARM64_VA_BITS=48 && ARM64_16K_PAGES - default 33 if ARM64_VA_BITS=48 - default 14 if ARM64_64K_PAGES - default 16 if ARM64_16K_PAGES - default 18 + default 19 if ARM64_VA_BITS=36 + default 24 if ARM64_VA_BITS=39 + default 27 if ARM64_VA_BITS=42 + default 30 if ARM64_VA_BITS=47 + default 29 if ARM64_VA_BITS=48 && ARM64_64K_PAGES + default 31 if ARM64_VA_BITS=48 && ARM64_16K_PAGES + default 33 if ARM64_VA_BITS=48 + default 14 if ARM64_64K_PAGES + default 16 if ARM64_16K_PAGES + default 18 config ARCH_MMAP_RND_COMPAT_BITS_MIN - default 7 if ARM64_64K_PAGES - default 9 if ARM64_16K_PAGES - default 11 + default 7 if ARM64_64K_PAGES + default 9 if ARM64_16K_PAGES + default 11 config ARCH_MMAP_RND_COMPAT_BITS_MAX - default 16 + default 16 config NO_IOPORT_MAP def_bool y if !PCI @@ -304,7 +304,7 @@ config GENERIC_HWEIGHT def_bool y config GENERIC_CSUM - def_bool y + def_bool y config GENERIC_CALIBRATE_DELAY def_bool y @@ -1037,8 +1037,7 @@ config SOCIONEXT_SYNQUACER_PREITS If unsure, say Y. -endmenu - +endmenu # "ARM errata workarounds via the alternatives framework" choice prompt "Page size" @@ -1566,9 +1565,9 @@ config SETEND_EMULATION be unexpected results in the applications. If unsure, say Y -endif +endif # ARMV8_DEPRECATED -endif +endif # COMPAT menu "ARMv8.1 architectural features" @@ -1593,15 +1592,15 @@ config ARM64_PAN bool "Enable support for Privileged Access Never (PAN)" default y help - Privileged Access Never (PAN; part of the ARMv8.1 Extensions) - prevents the kernel or hypervisor from accessing user-space (EL0) - memory directly. + Privileged Access Never (PAN; part of the ARMv8.1 Extensions) + prevents the kernel or hypervisor from accessing user-space (EL0) + memory directly. - Choosing this option will cause any unprotected (not using - copy_to_user et al) memory access to fail with a permission fault. + Choosing this option will cause any unprotected (not using + copy_to_user et al) memory access to fail with a permission fault. - The feature is detected at runtime, and will remain as a 'nop' - instruction if the cpu does not implement the feature. + The feature is detected at runtime, and will remain as a 'nop' + instruction if the cpu does not implement the feature. config AS_HAS_LDAPR def_bool $(as-instr,.arch_extension rcpc) @@ -1629,15 +1628,15 @@ config ARM64_USE_LSE_ATOMICS built with binutils >= 2.25 in order for the new instructions to be used. -endmenu +endmenu # "ARMv8.1 architectural features" menu "ARMv8.2 architectural features" config AS_HAS_ARMV8_2 - def_bool $(cc-option,-Wa$(comma)-march=armv8.2-a) + def_bool $(cc-option,-Wa$(comma)-march=armv8.2-a) config AS_HAS_SHA3 - def_bool $(as-instr,.arch armv8.2-a+sha3) + def_bool $(as-instr,.arch armv8.2-a+sha3) config ARM64_PMEM bool "Enable support for persistent memory" @@ -1681,7 +1680,7 @@ config ARM64_CNP at runtime, and does not affect PEs that do not implement this feature. -endmenu +endmenu # "ARMv8.2 architectural features" menu "ARMv8.3 architectural features" @@ -1744,7 +1743,7 @@ config AS_HAS_PAC config AS_HAS_CFI_NEGATE_RA_STATE def_bool $(as-instr,.cfi_startproc\n.cfi_negate_ra_state\n.cfi_endproc\n) -endmenu +endmenu # "ARMv8.3 architectural features" menu "ARMv8.4 architectural features" @@ -1785,7 +1784,7 @@ config ARM64_TLB_RANGE The feature introduces new assembly instructions, and they were support when binutils >= 2.30. -endmenu +endmenu # "ARMv8.4 architectural features" menu "ARMv8.5 architectural features" @@ -1892,7 +1891,7 @@ config ARM64_MTE Documentation/arm64/memory-tagging-extension.rst. -endmenu +endmenu # "ARMv8.5 architectural features" menu "ARMv8.7 architectural features" @@ -1901,12 +1900,12 @@ config ARM64_EPAN default y depends on ARM64_PAN help - Enhanced Privileged Access Never (EPAN) allows Privileged - Access Never to be used with Execute-only mappings. + Enhanced Privileged Access Never (EPAN) allows Privileged + Access Never to be used with Execute-only mappings. - The feature is detected at runtime, and will remain disabled - if the cpu does not implement the feature. -endmenu + The feature is detected at runtime, and will remain disabled + if the cpu does not implement the feature. +endmenu # "ARMv8.7 architectural features" config ARM64_SVE bool "ARM Scalable Vector Extension support" @@ -1982,7 +1981,7 @@ config ARM64_DEBUG_PRIORITY_MASKING the validity of ICC_PMR_EL1 when calling concerned functions. If unsure, say N -endif +endif # ARM64_PSEUDO_NMI config RELOCATABLE bool "Build a relocatable kernel image" if EXPERT @@ -2053,7 +2052,7 @@ config ARCH_NR_GPIO If unsure, leave the default value. -endmenu +endmenu # "Kernel Features" menu "Boot options" @@ -2117,7 +2116,7 @@ config EFI help This option provides support for runtime services provided by UEFI firmware (such as non-volatile variables, realtime - clock, and platform reset). A UEFI stub is also provided to + clock, and platform reset). A UEFI stub is also provided to allow the kernel to be booted as an EFI application. This is only useful on systems that have UEFI firmware. @@ -2132,7 +2131,7 @@ config DMI However, even with this option, the resultant kernel should continue to boot on existing non-UEFI platforms. -endmenu +endmenu # "Boot options" config SYSVIPC_COMPAT def_bool y @@ -2153,7 +2152,7 @@ config ARCH_HIBERNATION_HEADER config ARCH_SUSPEND_POSSIBLE def_bool y -endmenu +endmenu # "Power management options" menu "CPU Power Management" @@ -2161,7 +2160,7 @@ source "drivers/cpuidle/Kconfig" source "drivers/cpufreq/Kconfig" -endmenu +endmenu # "CPU Power Management" source "drivers/acpi/Kconfig" @@ -2169,4 +2168,4 @@ source "arch/arm64/kvm/Kconfig" if CRYPTO source "arch/arm64/crypto/Kconfig" -endif +endif # CRYPTO -- cgit v1.2.3 From aea3cb356c9643b0936cb7c898e23edcd7c8f6c9 Mon Sep 17 00:00:00 2001 From: Juerg Haefliger Date: Tue, 17 May 2022 16:16:48 +0200 Subject: arm64: Kconfig.platforms: Add comments Add trailing comments to endmenu statements for better readability. Signed-off-by: Juerg Haefliger Link: https://lore.kernel.org/r/20220517141648.331976-3-juergh@canonical.com Signed-off-by: Catalin Marinas --- arch/arm64/Kconfig.platforms | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/arm64/Kconfig.platforms b/arch/arm64/Kconfig.platforms index 30b123cde02c..de9a18d3026f 100644 --- a/arch/arm64/Kconfig.platforms +++ b/arch/arm64/Kconfig.platforms @@ -325,4 +325,4 @@ config ARCH_ZYNQMP help This enables support for Xilinx ZynqMP Family -endmenu +endmenu # "Platform selection" -- cgit v1.2.3 From 8e1f78a92101e327740ea0dac903bff9ad37a59a Mon Sep 17 00:00:00 2001 From: Geert Uytterhoeven Date: Tue, 17 May 2022 16:52:03 +0200 Subject: arm64/sve: Move sve_free() into SVE code section If CONFIG_ARM64_SVE is not set: arch/arm64/kernel/fpsimd.c:294:13: warning: ‘sve_free’ defined but not used [-Wunused-function] Fix this by moving sve_free() and __sve_free() into the existing section protected by "#ifdef CONFIG_ARM64_SVE", now the last user outside that section has been removed. Fixes: a1259dd80719 ("arm64/sve: Delay freeing memory in fpsimd_flush_thread()") Signed-off-by: Geert Uytterhoeven Reviewed-by: Mark Brown Link: https://lore.kernel.org/r/cd633284683c24cb9469f8ff429915aedf67f868.1652798894.git.geert+renesas@glider.be Signed-off-by: Catalin Marinas --- arch/arm64/kernel/fpsimd.c | 33 ++++++++++++++++----------------- 1 file changed, 16 insertions(+), 17 deletions(-) (limited to 'arch') diff --git a/arch/arm64/kernel/fpsimd.c b/arch/arm64/kernel/fpsimd.c index a568735b7c2e..a6eee3fa3448 100644 --- a/arch/arm64/kernel/fpsimd.c +++ b/arch/arm64/kernel/fpsimd.c @@ -281,23 +281,6 @@ static bool have_cpu_fpsimd_context(void) return !preemptible() && __this_cpu_read(fpsimd_context_busy); } -/* - * Call __sve_free() directly only if you know task can't be scheduled - * or preempted. - */ -static void __sve_free(struct task_struct *task) -{ - kfree(task->thread.sve_state); - task->thread.sve_state = NULL; -} - -static void sve_free(struct task_struct *task) -{ - WARN_ON(test_tsk_thread_flag(task, TIF_SVE)); - - __sve_free(task); -} - unsigned int task_get_vl(const struct task_struct *task, enum vec_type type) { return task->thread.vl[type]; @@ -690,6 +673,22 @@ static void sve_to_fpsimd(struct task_struct *task) } #ifdef CONFIG_ARM64_SVE +/* + * Call __sve_free() directly only if you know task can't be scheduled + * or preempted. + */ +static void __sve_free(struct task_struct *task) +{ + kfree(task->thread.sve_state); + task->thread.sve_state = NULL; +} + +static void sve_free(struct task_struct *task) +{ + WARN_ON(test_tsk_thread_flag(task, TIF_SVE)); + + __sve_free(task); +} /* * Return how many bytes of memory are required to store the full SVE -- cgit v1.2.3 From af65ea977bb8056b4e12b7057ababf19ab086f67 Mon Sep 17 00:00:00 2001 From: Mark Brown Date: Fri, 20 May 2022 17:16:33 +0100 Subject: arm64/sysreg: Generate definitions for CLIDR_EL1 Convert CLIDR_EL1 to be automatically generated with definition as per DDI0487H.a. No functional change. Signed-off-by: Mark Brown Reviewed-by: Mark Rutland Link: https://lore.kernel.org/r/20220520161639.324236-2-broonie@kernel.org Signed-off-by: Catalin Marinas --- arch/arm64/include/asm/sysreg.h | 1 - arch/arm64/tools/sysreg | 16 ++++++++++++++++ 2 files changed, 16 insertions(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/arm64/include/asm/sysreg.h b/arch/arm64/include/asm/sysreg.h index 91e4f8601393..0cea8bdb792f 100644 --- a/arch/arm64/include/asm/sysreg.h +++ b/arch/arm64/include/asm/sysreg.h @@ -457,7 +457,6 @@ #define SYS_CNTKCTL_EL1 sys_reg(3, 0, 14, 1, 0) #define SYS_CCSIDR_EL1 sys_reg(3, 1, 0, 0, 0) -#define SYS_CLIDR_EL1 sys_reg(3, 1, 0, 0, 1) #define SYS_GMID_EL1 sys_reg(3, 1, 0, 0, 4) #define SYS_AIDR_EL1 sys_reg(3, 1, 0, 0, 7) diff --git a/arch/arm64/tools/sysreg b/arch/arm64/tools/sysreg index a236d7a821b4..0067d07f9125 100644 --- a/arch/arm64/tools/sysreg +++ b/arch/arm64/tools/sysreg @@ -212,6 +212,22 @@ Sysreg SMCR_EL1 3 0 1 2 6 Fields SMCR_ELx EndSysreg +Sysreg CLIDR_EL1 3 1 0 0 1 +Res0 63:47 +Field 46:33 Ttypen +Field 32:30 ICB +Field 29:27 LoUU +Field 26:24 LoC +Field 23:21 LoUIS +Field 20:18 Ctype7 +Field 17:15 Ctype6 +Field 14:12 Ctype5 +Field 11:9 Ctype4 +Field 8:6 Ctype3 +Field 5:3 Ctype2 +Field 2:0 Ctype1 +EndSysreg + Sysreg SMIDR_EL1 3 1 0 0 6 Res0 63:32 Field 31:24 IMPLEMENTER -- cgit v1.2.3 From 8c12e22c9f88142630bfadc3685b640aea94a9d5 Mon Sep 17 00:00:00 2001 From: Mark Brown Date: Fri, 20 May 2022 17:16:34 +0100 Subject: arm64/sysreg: Generate definitions for CONTEXTIDR_ELx Convert the various CONTEXTIDR_ELx register definitions to be automatically generated following the definitions in DDI0487H.a. No functional change. Signed-off-by: Mark Brown Reviewed-by: Mark Rutland Link: https://lore.kernel.org/r/20220520161639.324236-3-broonie@kernel.org Signed-off-by: Catalin Marinas --- arch/arm64/include/asm/sysreg.h | 2 -- arch/arm64/tools/sysreg | 17 +++++++++++++++++ 2 files changed, 17 insertions(+), 2 deletions(-) (limited to 'arch') diff --git a/arch/arm64/include/asm/sysreg.h b/arch/arm64/include/asm/sysreg.h index 0cea8bdb792f..4fd64e6ec407 100644 --- a/arch/arm64/include/asm/sysreg.h +++ b/arch/arm64/include/asm/sysreg.h @@ -449,7 +449,6 @@ #define SYS_ICC_IGRPEN0_EL1 sys_reg(3, 0, 12, 12, 6) #define SYS_ICC_IGRPEN1_EL1 sys_reg(3, 0, 12, 12, 7) -#define SYS_CONTEXTIDR_EL1 sys_reg(3, 0, 13, 0, 1) #define SYS_TPIDR_EL1 sys_reg(3, 0, 13, 0, 4) #define SYS_SCXTNUM_EL1 sys_reg(3, 0, 13, 0, 7) @@ -629,7 +628,6 @@ #define SYS_MAIR_EL12 sys_reg(3, 5, 10, 2, 0) #define SYS_AMAIR_EL12 sys_reg(3, 5, 10, 3, 0) #define SYS_VBAR_EL12 sys_reg(3, 5, 12, 0, 0) -#define SYS_CONTEXTIDR_EL12 sys_reg(3, 5, 13, 0, 1) #define SYS_CNTKCTL_EL12 sys_reg(3, 5, 14, 1, 0) #define SYS_CNTP_TVAL_EL02 sys_reg(3, 5, 14, 2, 0) #define SYS_CNTP_CTL_EL02 sys_reg(3, 5, 14, 2, 1) diff --git a/arch/arm64/tools/sysreg b/arch/arm64/tools/sysreg index 0067d07f9125..b2e01e108b8e 100644 --- a/arch/arm64/tools/sysreg +++ b/arch/arm64/tools/sysreg @@ -212,6 +212,15 @@ Sysreg SMCR_EL1 3 0 1 2 6 Fields SMCR_ELx EndSysreg +SysregFields CONTEXTIDR_ELx +Res0 63:32 +Field 31:0 PROCID +EndSysregFields + +Sysreg CONTEXTIDR_EL1 3 0 13 0 1 +Fields CONTEXTIDR_ELx +EndSysreg + Sysreg CLIDR_EL1 3 1 0 0 1 Res0 63:47 Field 46:33 Ttypen @@ -270,6 +279,10 @@ Sysreg SMCR_EL2 3 4 1 2 6 Fields SMCR_ELx EndSysreg +Sysreg CONTEXTIDR_EL2 3 4 13 0 1 +Fields CONTEXTIDR_ELx +EndSysreg + Sysreg ZCR_EL12 3 5 1 2 0 Fields ZCR_ELx EndSysreg @@ -278,6 +291,10 @@ Sysreg SMCR_EL12 3 5 1 2 6 Fields SMCR_ELx EndSysreg +Sysreg CONTEXTIDR_EL12 3 5 13 0 1 +Fields CONTEXTIDR_ELx +EndSysreg + SysregFields TTBRx_EL1 Field 63:48 ASID Field 47:1 BADDR -- cgit v1.2.3 From b5c0f1051dc3ba4a4c53fccb6604ecc56b0a2982 Mon Sep 17 00:00:00 2001 From: Mark Brown Date: Fri, 20 May 2022 17:16:35 +0100 Subject: arm64/sysreg: Generate definitions for CPACR_ELx Convert the CPACR system register definitions to be automatically generated using the definitions in DDI0487H.a. The kernel does have some additional definitions for subfields of SMEN, FPEN and ZEN which are not identified as distinct subfields in the architecture so the definitions are not updated as part of this patch. No functional change. Signed-off-by: Mark Brown Reviewed-by: Mark Rutland Link: https://lore.kernel.org/r/20220520161639.324236-4-broonie@kernel.org Signed-off-by: Catalin Marinas --- arch/arm64/include/asm/sysreg.h | 2 -- arch/arm64/tools/sysreg | 20 ++++++++++++++++++++ 2 files changed, 20 insertions(+), 2 deletions(-) (limited to 'arch') diff --git a/arch/arm64/include/asm/sysreg.h b/arch/arm64/include/asm/sysreg.h index 4fd64e6ec407..7603c3344697 100644 --- a/arch/arm64/include/asm/sysreg.h +++ b/arch/arm64/include/asm/sysreg.h @@ -209,7 +209,6 @@ #define SYS_ID_AA64MMFR2_EL1 sys_reg(3, 0, 0, 7, 2) #define SYS_ACTLR_EL1 sys_reg(3, 0, 1, 0, 1) -#define SYS_CPACR_EL1 sys_reg(3, 0, 1, 0, 2) #define SYS_RGSR_EL1 sys_reg(3, 0, 1, 0, 5) #define SYS_GCR_EL1 sys_reg(3, 0, 1, 0, 6) @@ -614,7 +613,6 @@ /* VHE encodings for architectural EL0/1 system registers */ #define SYS_SCTLR_EL12 sys_reg(3, 5, 1, 0, 0) -#define SYS_CPACR_EL12 sys_reg(3, 5, 1, 0, 2) #define SYS_TTBR0_EL12 sys_reg(3, 5, 2, 0, 0) #define SYS_TTBR1_EL12 sys_reg(3, 5, 2, 0, 1) #define SYS_TCR_EL12 sys_reg(3, 5, 2, 0, 2) diff --git a/arch/arm64/tools/sysreg b/arch/arm64/tools/sysreg index b2e01e108b8e..4bf413770b65 100644 --- a/arch/arm64/tools/sysreg +++ b/arch/arm64/tools/sysreg @@ -185,6 +185,22 @@ Field 1 A Field 0 M EndSysreg +SysregFields CPACR_ELx +Res0 63:29 +Field 28 TTA +Res0 27:26 +Field 25:24 SMEN +Res0 23:22 +Field 21:20 FPEN +Res0 19:18 +Field 17:16 ZEN +Res0 15:0 +EndSysregFields + +Sysreg CPACR_EL1 3 0 1 0 2 +Fields CPACR_ELx +EndSysreg + Sysreg SMPRI_EL1 3 0 1 2 4 Res0 63:4 Field 3:0 PRIORITY @@ -283,6 +299,10 @@ Sysreg CONTEXTIDR_EL2 3 4 13 0 1 Fields CONTEXTIDR_ELx EndSysreg +Sysreg CPACR_EL12 3 5 1 0 2 +Fields CPACR_ELx +EndSysreg + Sysreg ZCR_EL12 3 5 1 2 0 Fields ZCR_ELx EndSysreg -- cgit v1.2.3 From 8bd354b30533632396627291c4a3792f9c2947b2 Mon Sep 17 00:00:00 2001 From: Mark Brown Date: Fri, 20 May 2022 17:16:36 +0100 Subject: arm64/sysreg: Generate definitions for CSSELR_EL1 Convert CSSELR_EL1 to automatic generation as per DDI0487H.a, no functional change. Signed-off-by: Mark Brown Reviewed-by: Mark Rutland Link: https://lore.kernel.org/r/20220520161639.324236-5-broonie@kernel.org Signed-off-by: Catalin Marinas --- arch/arm64/include/asm/sysreg.h | 2 -- arch/arm64/tools/sysreg | 7 +++++++ 2 files changed, 7 insertions(+), 2 deletions(-) (limited to 'arch') diff --git a/arch/arm64/include/asm/sysreg.h b/arch/arm64/include/asm/sysreg.h index 7603c3344697..d1ca7f11e110 100644 --- a/arch/arm64/include/asm/sysreg.h +++ b/arch/arm64/include/asm/sysreg.h @@ -462,8 +462,6 @@ #define SMIDR_EL1_SMPS_SHIFT 15 #define SMIDR_EL1_AFFINITY_SHIFT 0 -#define SYS_CSSELR_EL1 sys_reg(3, 2, 0, 0, 0) - #define SYS_CTR_EL0 sys_reg(3, 3, 0, 0, 1) #define SYS_DCZID_EL0 sys_reg(3, 3, 0, 0, 7) diff --git a/arch/arm64/tools/sysreg b/arch/arm64/tools/sysreg index 4bf413770b65..759075747dcc 100644 --- a/arch/arm64/tools/sysreg +++ b/arch/arm64/tools/sysreg @@ -262,6 +262,13 @@ Res0 14:12 Field 11:0 AFFINITY EndSysreg +Sysreg CSSELR_EL1 3 2 0 0 0 +Res0 63:5 +Field 4 TnD +Field 3:1 Level +Field 0 InD +EndSysreg + Sysreg SVCR 3 3 4 2 2 Res0 63:2 Field 1 ZA -- cgit v1.2.3 From 01baa57ad6865bf60d5fcd77b31b2bd8bb155176 Mon Sep 17 00:00:00 2001 From: Mark Brown Date: Fri, 20 May 2022 17:16:37 +0100 Subject: arm64/sysreg: Generate definitions for DACR32_EL2 Convert DACR32_EL2 to automatic register generation as per DDI0487H.a, no functional change. Signed-off-by: Mark Brown Reviewed-by: Mark Rutland Link: https://lore.kernel.org/r/20220520161639.324236-6-broonie@kernel.org Signed-off-by: Catalin Marinas --- arch/arm64/include/asm/sysreg.h | 1 - arch/arm64/tools/sysreg | 20 ++++++++++++++++++++ 2 files changed, 20 insertions(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/arm64/include/asm/sysreg.h b/arch/arm64/include/asm/sysreg.h index d1ca7f11e110..c9d2d2a3dd68 100644 --- a/arch/arm64/include/asm/sysreg.h +++ b/arch/arm64/include/asm/sysreg.h @@ -554,7 +554,6 @@ #define SYS_HFGITR_EL2 sys_reg(3, 4, 1, 1, 6) #define SYS_TRFCR_EL2 sys_reg(3, 4, 1, 2, 1) #define SYS_HCRX_EL2 sys_reg(3, 4, 1, 2, 2) -#define SYS_DACR32_EL2 sys_reg(3, 4, 3, 0, 0) #define SYS_HDFGRTR_EL2 sys_reg(3, 4, 3, 1, 4) #define SYS_HDFGWTR_EL2 sys_reg(3, 4, 3, 1, 5) #define SYS_HAFGRTR_EL2 sys_reg(3, 4, 3, 1, 6) diff --git a/arch/arm64/tools/sysreg b/arch/arm64/tools/sysreg index 759075747dcc..af21acbb542d 100644 --- a/arch/arm64/tools/sysreg +++ b/arch/arm64/tools/sysreg @@ -302,6 +302,26 @@ Sysreg SMCR_EL2 3 4 1 2 6 Fields SMCR_ELx EndSysreg +Sysreg DACR32_EL2 3 4 3 0 0 +Res0 63:32 +Field 31:30 D15 +Field 29:28 D14 +Field 27:26 D13 +Field 25:24 D12 +Field 23:22 D11 +Field 21:20 D10 +Field 19:18 D9 +Field 17:16 D8 +Field 15:14 D7 +Field 13:12 D6 +Field 11:10 D5 +Field 9:8 D4 +Field 7:6 D3 +Field 5:4 D2 +Field 3:2 D1 +Field 1:0 D0 +EndSysreg + Sysreg CONTEXTIDR_EL2 3 4 13 0 1 Fields CONTEXTIDR_ELx EndSysreg -- cgit v1.2.3 From dffdeade18432d257e0c1845dc4e694f414a9721 Mon Sep 17 00:00:00 2001 From: Mark Brown Date: Fri, 20 May 2022 17:16:38 +0100 Subject: arm64/sysreg: Generate definitions for FAR_ELx Convert FAR_ELx to automatic register generation as per DDI0487H.a. In the architecture these registers have a single field "named" as "Faulting Virtual Address for synchronous exceptions taken to ELx" occupying the entire register, in order to fit in with the requirement to describe the contents of the register I have created a single field named ADDR. No functional change. Signed-off-by: Mark Brown Reviewed-by: Mark Rutland Link: https://lore.kernel.org/r/20220520161639.324236-7-broonie@kernel.org Signed-off-by: Catalin Marinas --- arch/arm64/include/asm/sysreg.h | 3 --- arch/arm64/tools/sysreg | 12 ++++++++++++ 2 files changed, 12 insertions(+), 3 deletions(-) (limited to 'arch') diff --git a/arch/arm64/include/asm/sysreg.h b/arch/arm64/include/asm/sysreg.h index c9d2d2a3dd68..55f998c3dc28 100644 --- a/arch/arm64/include/asm/sysreg.h +++ b/arch/arm64/include/asm/sysreg.h @@ -249,7 +249,6 @@ #define SYS_TFSR_EL1 sys_reg(3, 0, 5, 6, 0) #define SYS_TFSRE0_EL1 sys_reg(3, 0, 5, 6, 1) -#define SYS_FAR_EL1 sys_reg(3, 0, 6, 0, 0) #define SYS_PAR_EL1 sys_reg(3, 0, 7, 4, 0) #define SYS_PAR_EL1_F BIT(0) @@ -564,7 +563,6 @@ #define SYS_VSESR_EL2 sys_reg(3, 4, 5, 2, 3) #define SYS_FPEXC32_EL2 sys_reg(3, 4, 5, 3, 0) #define SYS_TFSR_EL2 sys_reg(3, 4, 5, 6, 0) -#define SYS_FAR_EL2 sys_reg(3, 4, 6, 0, 0) #define SYS_VDISR_EL2 sys_reg(3, 4, 12, 1, 1) #define __SYS__AP0Rx_EL2(x) sys_reg(3, 4, 12, 8, x) @@ -619,7 +617,6 @@ #define SYS_AFSR1_EL12 sys_reg(3, 5, 5, 1, 1) #define SYS_ESR_EL12 sys_reg(3, 5, 5, 2, 0) #define SYS_TFSR_EL12 sys_reg(3, 5, 5, 6, 0) -#define SYS_FAR_EL12 sys_reg(3, 5, 6, 0, 0) #define SYS_MAIR_EL12 sys_reg(3, 5, 10, 2, 0) #define SYS_AMAIR_EL12 sys_reg(3, 5, 10, 3, 0) #define SYS_VBAR_EL12 sys_reg(3, 5, 12, 0, 0) diff --git a/arch/arm64/tools/sysreg b/arch/arm64/tools/sysreg index af21acbb542d..ff5e552f7420 100644 --- a/arch/arm64/tools/sysreg +++ b/arch/arm64/tools/sysreg @@ -228,6 +228,10 @@ Sysreg SMCR_EL1 3 0 1 2 6 Fields SMCR_ELx EndSysreg +Sysreg FAR_EL1 3 0 6 0 0 +Field 63:0 ADDR +EndSysreg + SysregFields CONTEXTIDR_ELx Res0 63:32 Field 31:0 PROCID @@ -322,6 +326,10 @@ Field 3:2 D1 Field 1:0 D0 EndSysreg +Sysreg FAR_EL2 3 4 6 0 0 +Field 63:0 ADDR +EndSysreg + Sysreg CONTEXTIDR_EL2 3 4 13 0 1 Fields CONTEXTIDR_ELx EndSysreg @@ -338,6 +346,10 @@ Sysreg SMCR_EL12 3 5 1 2 6 Fields SMCR_ELx EndSysreg +Sysreg FAR_EL12 3 5 6 0 0 +Field 63:0 ADDR +EndSysreg + Sysreg CONTEXTIDR_EL12 3 5 13 0 1 Fields CONTEXTIDR_ELx EndSysreg -- cgit v1.2.3