From e51ea5442996261d4bc3a5b934d27cc0ce6a991c Mon Sep 17 00:00:00 2001 From: Al Viro Date: Tue, 14 Jul 2020 15:10:46 -0400 Subject: sparc32: don't bother with lookup_fault() in __bzero() Signed-off-by: Al Viro --- arch/sparc/lib/memset.S | 15 +++------------ 1 file changed, 3 insertions(+), 12 deletions(-) (limited to 'arch') diff --git a/arch/sparc/lib/memset.S b/arch/sparc/lib/memset.S index b89d42b29e34..725041c5224a 100644 --- a/arch/sparc/lib/memset.S +++ b/arch/sparc/lib/memset.S @@ -19,7 +19,7 @@ 98: x,y; \ .section .fixup,ALLOC,EXECINSTR; \ .align 4; \ -99: ba 30f; \ +99: retl; \ a, b, %o0; \ .section __ex_table,ALLOC; \ .align 4; \ @@ -194,24 +194,15 @@ __memset_end: 1: sll %g2, 3, %g2 add %o3, %o1, %o0 - b 30f + retl sub %o0, %g2, %o0 21: mov 8, %o0 and %o1, 7, %o1 sub %o0, %g2, %o0 sll %o0, 3, %o0 - b 30f + retl add %o0, %o1, %o0 -30: -/* %o4 is faulting address, %o5 is %pc where fault occurred */ - save %sp, -104, %sp - mov %i5, %o0 - mov %i7, %o1 - call lookup_fault - mov %i4, %o2 - ret - restore .globl __bzero_end __bzero_end: -- cgit v1.2.3 From 5f99d33810b00666825784342868240e5790c704 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Tue, 14 Jul 2020 15:12:34 -0400 Subject: sparc32: kill lookup_fault() No callers left. As the result we can kill * lookup_fault() itself * the kludge in do_sparc_fault() for passing the arguments for eventual lookup_fault() into exception handler and labels used by it * the last of magical exception table entries (in __clear_user()) Signed-off-by: Al Viro --- arch/sparc/include/asm/uaccess_32.h | 5 ---- arch/sparc/lib/memset.S | 3 -- arch/sparc/mm/fault_32.c | 56 ------------------------------------- arch/sparc/mm/mm_32.h | 2 -- 4 files changed, 66 deletions(-) (limited to 'arch') diff --git a/arch/sparc/include/asm/uaccess_32.h b/arch/sparc/include/asm/uaccess_32.h index 0a2d3ebc4bb8..54a3ba742647 100644 --- a/arch/sparc/include/asm/uaccess_32.h +++ b/arch/sparc/include/asm/uaccess_32.h @@ -252,12 +252,7 @@ static inline unsigned long __clear_user(void __user *addr, unsigned long size) unsigned long ret; __asm__ __volatile__ ( - ".section __ex_table,#alloc\n\t" - ".align 4\n\t" - ".word 1f,3\n\t" - ".previous\n\t" "mov %2, %%o1\n" - "1:\n\t" "call __bzero\n\t" " mov %1, %%o0\n\t" "mov %%o0, %0\n" diff --git a/arch/sparc/lib/memset.S b/arch/sparc/lib/memset.S index 725041c5224a..d79550e3e63f 100644 --- a/arch/sparc/lib/memset.S +++ b/arch/sparc/lib/memset.S @@ -68,8 +68,6 @@ __bzero_begin: .globl memset EXPORT_SYMBOL(__bzero) EXPORT_SYMBOL(memset) - .globl __memset_start, __memset_end -__memset_start: memset: mov %o0, %g1 mov 1, %g4 @@ -181,7 +179,6 @@ __bzero: 5: retl clr %o0 -__memset_end: .section .fixup,#alloc,#execinstr .align 4 diff --git a/arch/sparc/mm/fault_32.c b/arch/sparc/mm/fault_32.c index 40ce087dfecf..290869fd6b6a 100644 --- a/arch/sparc/mm/fault_32.c +++ b/arch/sparc/mm/fault_32.c @@ -54,54 +54,6 @@ static void __noreturn unhandled_fault(unsigned long address, die_if_kernel("Oops", regs); } -asmlinkage int lookup_fault(unsigned long pc, unsigned long ret_pc, - unsigned long address) -{ - struct pt_regs regs; - unsigned long g2; - unsigned int insn; - int i; - - i = search_extables_range(ret_pc, &g2); - switch (i) { - case 3: - /* load & store will be handled by fixup */ - return 3; - - case 1: - /* store will be handled by fixup, load will bump out */ - /* for _to_ macros */ - insn = *((unsigned int *) pc); - if ((insn >> 21) & 1) - return 1; - break; - - case 2: - /* load will be handled by fixup, store will bump out */ - /* for _from_ macros */ - insn = *((unsigned int *) pc); - if (!((insn >> 21) & 1) || ((insn>>19)&0x3f) == 15) - return 2; - break; - - default: - break; - } - - memset(®s, 0, sizeof(regs)); - regs.pc = pc; - regs.npc = pc + 4; - __asm__ __volatile__( - "rd %%psr, %0\n\t" - "nop\n\t" - "nop\n\t" - "nop\n" : "=r" (regs.psr)); - unhandled_fault(address, current, ®s); - - /* Not reached */ - return 0; -} - static inline void show_signal_msg(struct pt_regs *regs, int sig, int code, unsigned long address, struct task_struct *tsk) @@ -286,20 +238,12 @@ no_context: fixup = search_extables_range(regs->pc, &g2); /* Values below 10 are reserved for other things */ if (fixup > 10) { - extern const unsigned int __memset_start[]; - extern const unsigned int __memset_end[]; - #ifdef DEBUG_EXCEPTIONS printk("Exception: PC<%08lx> faddr<%08lx>\n", regs->pc, address); printk("EX_TABLE: insn<%08lx> fixup<%08x> g2<%08lx>\n", regs->pc, fixup, g2); #endif - if ((regs->pc >= (unsigned long)__memset_start && - regs->pc < (unsigned long)__memset_end)) { - regs->u_regs[UREG_I4] = address; - regs->u_regs[UREG_I5] = regs->pc; - } regs->u_regs[UREG_G2] = g2; regs->pc = fixup; regs->npc = regs->pc + 4; diff --git a/arch/sparc/mm/mm_32.h b/arch/sparc/mm/mm_32.h index ce750a99eea9..ee55f1080634 100644 --- a/arch/sparc/mm/mm_32.h +++ b/arch/sparc/mm/mm_32.h @@ -1,7 +1,5 @@ /* SPDX-License-Identifier: GPL-2.0 */ /* fault_32.c - visible as they are called from assembler */ -asmlinkage int lookup_fault(unsigned long pc, unsigned long ret_pc, - unsigned long address); asmlinkage void do_sparc_fault(struct pt_regs *regs, int text_fault, int write, unsigned long address); -- cgit v1.2.3 From df06c27ebd86af2b4c43f698c3d38b781dbc722d Mon Sep 17 00:00:00 2001 From: Al Viro Date: Tue, 14 Jul 2020 17:27:56 -0400 Subject: sparc32: switch __bzero() away from range exception table entries Signed-off-by: Al Viro --- arch/sparc/lib/memset.S | 72 ++++++++++++++++++++++--------------------------- 1 file changed, 32 insertions(+), 40 deletions(-) (limited to 'arch') diff --git a/arch/sparc/lib/memset.S b/arch/sparc/lib/memset.S index d79550e3e63f..eaff68213fdf 100644 --- a/arch/sparc/lib/memset.S +++ b/arch/sparc/lib/memset.S @@ -27,35 +27,44 @@ .text; \ .align 4 -#define EXT(start,end,handler) \ +#define STORE(source, base, offset, n) \ +98: std source, [base + offset + n]; \ + .section .fixup,ALLOC,EXECINSTR; \ + .align 4; \ +99: ba 30f; \ + sub %o3, n - offset, %o3; \ .section __ex_table,ALLOC; \ .align 4; \ - .word start, 0, end, handler; \ + .word 98b, 99b; \ .text; \ - .align 4 + .align 4; + +#define STORE_LAST(source, base, offset, n) \ + EX(std source, [base - offset - n], \ + add %o1, offset + n); /* Please don't change these macros, unless you change the logic * in the .fixup section below as well. * Store 64 bytes at (BASE + OFFSET) using value SOURCE. */ -#define ZERO_BIG_BLOCK(base, offset, source) \ - std source, [base + offset + 0x00]; \ - std source, [base + offset + 0x08]; \ - std source, [base + offset + 0x10]; \ - std source, [base + offset + 0x18]; \ - std source, [base + offset + 0x20]; \ - std source, [base + offset + 0x28]; \ - std source, [base + offset + 0x30]; \ - std source, [base + offset + 0x38]; +#define ZERO_BIG_BLOCK(base, offset, source) \ + STORE(source, base, offset, 0x00); \ + STORE(source, base, offset, 0x08); \ + STORE(source, base, offset, 0x10); \ + STORE(source, base, offset, 0x18); \ + STORE(source, base, offset, 0x20); \ + STORE(source, base, offset, 0x28); \ + STORE(source, base, offset, 0x30); \ + STORE(source, base, offset, 0x38); #define ZERO_LAST_BLOCKS(base, offset, source) \ - std source, [base - offset - 0x38]; \ - std source, [base - offset - 0x30]; \ - std source, [base - offset - 0x28]; \ - std source, [base - offset - 0x20]; \ - std source, [base - offset - 0x18]; \ - std source, [base - offset - 0x10]; \ - std source, [base - offset - 0x08]; \ - std source, [base - offset - 0x00]; + STORE_LAST(source, base, offset, 0x38); \ + STORE_LAST(source, base, offset, 0x30); \ + STORE_LAST(source, base, offset, 0x28); \ + STORE_LAST(source, base, offset, 0x20); \ + STORE_LAST(source, base, offset, 0x18); \ + STORE_LAST(source, base, offset, 0x10); \ + STORE_LAST(source, base, offset, 0x08); \ + STORE_LAST(source, base, offset, 0x00); .text .align 4 @@ -120,8 +129,6 @@ __bzero: ZERO_BIG_BLOCK(%o0, 0x00, %g2) subcc %o3, 128, %o3 ZERO_BIG_BLOCK(%o0, 0x40, %g2) -11: - EXT(10b, 11b, 20f) bne 10b add %o0, 128, %o0 @@ -136,7 +143,6 @@ __bzero: jmp %o4 add %o0, %o2, %o0 -12: ZERO_LAST_BLOCKS(%o0, 0x48, %g2) ZERO_LAST_BLOCKS(%o0, 0x08, %g2) 13: @@ -182,24 +188,10 @@ __bzero: .section .fixup,#alloc,#execinstr .align 4 -20: - cmp %g2, 8 - bleu 1f - and %o1, 0x7f, %o1 - sub %g2, 9, %g2 - add %o3, 64, %o3 -1: - sll %g2, 3, %g2 - add %o3, %o1, %o0 - retl - sub %o0, %g2, %o0 -21: - mov 8, %o0 - and %o1, 7, %o1 - sub %o0, %g2, %o0 - sll %o0, 3, %o0 +30: + and %o1, 0x7f, %o1 retl - add %o0, %o1, %o0 + add %o3, %o1, %o0 .globl __bzero_end __bzero_end: -- cgit v1.2.3 From cfd5fa7021a54b8ed9bad16246b6b34851fd48d1 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Tue, 14 Jul 2020 17:33:55 -0400 Subject: sparc32: get rid of range exception table entries in checksum_32.S trivial - we don't even look at instruction offsets in the handler Signed-off-by: Al Viro --- arch/sparc/lib/checksum_32.S | 64 +++++++++++++++++++------------------------- 1 file changed, 27 insertions(+), 37 deletions(-) (limited to 'arch') diff --git a/arch/sparc/lib/checksum_32.S b/arch/sparc/lib/checksum_32.S index 7488d130faf7..781e39b3c009 100644 --- a/arch/sparc/lib/checksum_32.S +++ b/arch/sparc/lib/checksum_32.S @@ -155,13 +155,6 @@ cpout: retl ! get outta here .text; \ .align 4 -#define EXT(start,end) \ - .section __ex_table,ALLOC; \ - .align 4; \ - .word start, 0, end, cc_fault; \ - .text; \ - .align 4 - /* This aligned version executes typically in 8.5 superscalar cycles, this * is the best I can do. I say 8.5 because the final add will pair with * the next ldd in the main unrolled loop. Thus the pipe is always full. @@ -169,20 +162,20 @@ cpout: retl ! get outta here * please check the fixup code below as well. */ #define CSUMCOPY_BIGCHUNK_ALIGNED(src, dst, sum, off, t0, t1, t2, t3, t4, t5, t6, t7) \ - ldd [src + off + 0x00], t0; \ - ldd [src + off + 0x08], t2; \ + EX(ldd [src + off + 0x00], t0); \ + EX(ldd [src + off + 0x08], t2); \ addxcc t0, sum, sum; \ - ldd [src + off + 0x10], t4; \ + EX(ldd [src + off + 0x10], t4); \ addxcc t1, sum, sum; \ - ldd [src + off + 0x18], t6; \ + EX(ldd [src + off + 0x18], t6); \ addxcc t2, sum, sum; \ - std t0, [dst + off + 0x00]; \ + EX(std t0, [dst + off + 0x00]); \ addxcc t3, sum, sum; \ - std t2, [dst + off + 0x08]; \ + EX(std t2, [dst + off + 0x08]); \ addxcc t4, sum, sum; \ - std t4, [dst + off + 0x10]; \ + EX(std t4, [dst + off + 0x10]); \ addxcc t5, sum, sum; \ - std t6, [dst + off + 0x18]; \ + EX(std t6, [dst + off + 0x18]); \ addxcc t6, sum, sum; \ addxcc t7, sum, sum; @@ -191,39 +184,39 @@ cpout: retl ! get outta here * Viking MXCC into streaming mode. Ho hum... */ #define CSUMCOPY_BIGCHUNK(src, dst, sum, off, t0, t1, t2, t3, t4, t5, t6, t7) \ - ldd [src + off + 0x00], t0; \ - ldd [src + off + 0x08], t2; \ - ldd [src + off + 0x10], t4; \ - ldd [src + off + 0x18], t6; \ - st t0, [dst + off + 0x00]; \ + EX(ldd [src + off + 0x00], t0); \ + EX(ldd [src + off + 0x08], t2); \ + EX(ldd [src + off + 0x10], t4); \ + EX(ldd [src + off + 0x18], t6); \ + EX(st t0, [dst + off + 0x00]); \ addxcc t0, sum, sum; \ - st t1, [dst + off + 0x04]; \ + EX(st t1, [dst + off + 0x04]); \ addxcc t1, sum, sum; \ - st t2, [dst + off + 0x08]; \ + EX(st t2, [dst + off + 0x08]); \ addxcc t2, sum, sum; \ - st t3, [dst + off + 0x0c]; \ + EX(st t3, [dst + off + 0x0c]); \ addxcc t3, sum, sum; \ - st t4, [dst + off + 0x10]; \ + EX(st t4, [dst + off + 0x10]); \ addxcc t4, sum, sum; \ - st t5, [dst + off + 0x14]; \ + EX(st t5, [dst + off + 0x14]); \ addxcc t5, sum, sum; \ - st t6, [dst + off + 0x18]; \ + EX(st t6, [dst + off + 0x18]); \ addxcc t6, sum, sum; \ - st t7, [dst + off + 0x1c]; \ + EX(st t7, [dst + off + 0x1c]); \ addxcc t7, sum, sum; /* Yuck, 6 superscalar cycles... */ #define CSUMCOPY_LASTCHUNK(src, dst, sum, off, t0, t1, t2, t3) \ - ldd [src - off - 0x08], t0; \ - ldd [src - off - 0x00], t2; \ + EX(ldd [src - off - 0x08], t0); \ + EX(ldd [src - off - 0x00], t2); \ addxcc t0, sum, sum; \ - st t0, [dst - off - 0x08]; \ + EX(st t0, [dst - off - 0x08]); \ addxcc t1, sum, sum; \ - st t1, [dst - off - 0x04]; \ + EX(st t1, [dst - off - 0x04]); \ addxcc t2, sum, sum; \ - st t2, [dst - off - 0x00]; \ + EX(st t2, [dst - off - 0x00]); \ addxcc t3, sum, sum; \ - st t3, [dst - off + 0x04]; + EX(st t3, [dst - off + 0x04]); /* Handle the end cruft code out of band for better cache patterns. */ cc_end_cruft: @@ -331,7 +324,6 @@ __csum_partial_copy_sparc_generic: CSUMCOPY_BIGCHUNK(%o0,%o1,%g7,0x20,%o4,%o5,%g2,%g3,%g4,%g5,%o2,%o3) CSUMCOPY_BIGCHUNK(%o0,%o1,%g7,0x40,%o4,%o5,%g2,%g3,%g4,%g5,%o2,%o3) CSUMCOPY_BIGCHUNK(%o0,%o1,%g7,0x60,%o4,%o5,%g2,%g3,%g4,%g5,%o2,%o3) -10: EXT(5b, 10b) ! note for exception handling sub %g1, 128, %g1 ! detract from length addx %g0, %g7, %g7 ! add in last carry bit andcc %g1, 0xffffff80, %g0 ! more to csum? @@ -356,8 +348,7 @@ cctbl: CSUMCOPY_LASTCHUNK(%o0,%o1,%g7,0x68,%g2,%g3,%g4,%g5) CSUMCOPY_LASTCHUNK(%o0,%o1,%g7,0x28,%g2,%g3,%g4,%g5) CSUMCOPY_LASTCHUNK(%o0,%o1,%g7,0x18,%g2,%g3,%g4,%g5) CSUMCOPY_LASTCHUNK(%o0,%o1,%g7,0x08,%g2,%g3,%g4,%g5) -12: EXT(cctbl, 12b) ! note for exception table handling - addx %g0, %g7, %g7 +12: addx %g0, %g7, %g7 andcc %o3, 0xf, %g0 ! check for low bits set ccte: bne cc_end_cruft ! something left, handle it out of band andcc %o3, 8, %g0 ! begin checks for that code @@ -367,7 +358,6 @@ ccdbl: CSUMCOPY_BIGCHUNK_ALIGNED(%o0,%o1,%g7,0x00,%o4,%o5,%g2,%g3,%g4,%g5,%o2,%o CSUMCOPY_BIGCHUNK_ALIGNED(%o0,%o1,%g7,0x20,%o4,%o5,%g2,%g3,%g4,%g5,%o2,%o3) CSUMCOPY_BIGCHUNK_ALIGNED(%o0,%o1,%g7,0x40,%o4,%o5,%g2,%g3,%g4,%g5,%o2,%o3) CSUMCOPY_BIGCHUNK_ALIGNED(%o0,%o1,%g7,0x60,%o4,%o5,%g2,%g3,%g4,%g5,%o2,%o3) -11: EXT(ccdbl, 11b) ! note for exception table handling sub %g1, 128, %g1 ! detract from length addx %g0, %g7, %g7 ! add in last carry bit andcc %g1, 0xffffff80, %g0 ! more to csum? -- cgit v1.2.3 From c4da8e0dc6f7cec80f32af080cadf47c1753a2ab Mon Sep 17 00:00:00 2001 From: Al Viro Date: Thu, 16 Jul 2020 10:19:37 -0400 Subject: sparc32: switch copy_user.S away from range exception table entries Those were the last range exception table entries, which will allow to get rid of a lot of weirdness. Emits the same code into .text. Signed-off-by: Al Viro --- arch/sparc/lib/copy_user.S | 315 ++++++++++++++++----------------------------- 1 file changed, 112 insertions(+), 203 deletions(-) (limited to 'arch') diff --git a/arch/sparc/lib/copy_user.S b/arch/sparc/lib/copy_user.S index dc72f2b970b7..954572c78539 100644 --- a/arch/sparc/lib/copy_user.S +++ b/arch/sparc/lib/copy_user.S @@ -21,98 +21,134 @@ /* Work around cpp -rob */ #define ALLOC #alloc #define EXECINSTR #execinstr + +#define EX_ENTRY(l1, l2) \ + .section __ex_table,ALLOC; \ + .align 4; \ + .word l1, l2; \ + .text; + #define EX(x,y,a,b) \ 98: x,y; \ .section .fixup,ALLOC,EXECINSTR; \ .align 4; \ -99: ba fixupretl; \ - a, b, %g3; \ - .section __ex_table,ALLOC; \ - .align 4; \ - .word 98b, 99b; \ - .text; \ - .align 4 +99: retl; \ + a, b, %o0; \ + EX_ENTRY(98b, 99b) #define EX2(x,y,c,d,e,a,b) \ 98: x,y; \ .section .fixup,ALLOC,EXECINSTR; \ .align 4; \ 99: c, d, e; \ - ba fixupretl; \ - a, b, %g3; \ - .section __ex_table,ALLOC; \ - .align 4; \ - .word 98b, 99b; \ - .text; \ - .align 4 + retl; \ + a, b, %o0; \ + EX_ENTRY(98b, 99b) #define EXO2(x,y) \ 98: x, y; \ - .section __ex_table,ALLOC; \ - .align 4; \ - .word 98b, 97f; \ - .text; \ - .align 4 + EX_ENTRY(98b, 97f) -#define EXT(start,end,handler) \ - .section __ex_table,ALLOC; \ - .align 4; \ - .word start, 0, end, handler; \ - .text; \ - .align 4 +#define LD(insn, src, offset, reg, label) \ +98: insn [%src + (offset)], %reg; \ + .section .fixup,ALLOC,EXECINSTR; \ +99: ba label; \ + mov offset, %g5; \ + EX_ENTRY(98b, 99b) -/* Please do not change following macros unless you change logic used - * in .fixup at the end of this file as well - */ +#define ST(insn, dst, offset, reg, label) \ +98: insn %reg, [%dst + (offset)]; \ + .section .fixup,ALLOC,EXECINSTR; \ +99: ba label; \ + mov offset, %g5; \ + EX_ENTRY(98b, 99b) /* Both these macros have to start with exactly the same insn */ +/* left: g7 + (g1 % 128) - offset */ #define MOVE_BIGCHUNK(src, dst, offset, t0, t1, t2, t3, t4, t5, t6, t7) \ - ldd [%src + (offset) + 0x00], %t0; \ - ldd [%src + (offset) + 0x08], %t2; \ - ldd [%src + (offset) + 0x10], %t4; \ - ldd [%src + (offset) + 0x18], %t6; \ - st %t0, [%dst + (offset) + 0x00]; \ - st %t1, [%dst + (offset) + 0x04]; \ - st %t2, [%dst + (offset) + 0x08]; \ - st %t3, [%dst + (offset) + 0x0c]; \ - st %t4, [%dst + (offset) + 0x10]; \ - st %t5, [%dst + (offset) + 0x14]; \ - st %t6, [%dst + (offset) + 0x18]; \ - st %t7, [%dst + (offset) + 0x1c]; - + LD(ldd, src, offset + 0x00, t0, bigchunk_fault) \ + LD(ldd, src, offset + 0x08, t2, bigchunk_fault) \ + LD(ldd, src, offset + 0x10, t4, bigchunk_fault) \ + LD(ldd, src, offset + 0x18, t6, bigchunk_fault) \ + ST(st, dst, offset + 0x00, t0, bigchunk_fault) \ + ST(st, dst, offset + 0x04, t1, bigchunk_fault) \ + ST(st, dst, offset + 0x08, t2, bigchunk_fault) \ + ST(st, dst, offset + 0x0c, t3, bigchunk_fault) \ + ST(st, dst, offset + 0x10, t4, bigchunk_fault) \ + ST(st, dst, offset + 0x14, t5, bigchunk_fault) \ + ST(st, dst, offset + 0x18, t6, bigchunk_fault) \ + ST(st, dst, offset + 0x1c, t7, bigchunk_fault) + +/* left: g7 + (g1 % 128) - offset */ #define MOVE_BIGALIGNCHUNK(src, dst, offset, t0, t1, t2, t3, t4, t5, t6, t7) \ - ldd [%src + (offset) + 0x00], %t0; \ - ldd [%src + (offset) + 0x08], %t2; \ - ldd [%src + (offset) + 0x10], %t4; \ - ldd [%src + (offset) + 0x18], %t6; \ - std %t0, [%dst + (offset) + 0x00]; \ - std %t2, [%dst + (offset) + 0x08]; \ - std %t4, [%dst + (offset) + 0x10]; \ - std %t6, [%dst + (offset) + 0x18]; + LD(ldd, src, offset + 0x00, t0, bigchunk_fault) \ + LD(ldd, src, offset + 0x08, t2, bigchunk_fault) \ + LD(ldd, src, offset + 0x10, t4, bigchunk_fault) \ + LD(ldd, src, offset + 0x18, t6, bigchunk_fault) \ + ST(std, dst, offset + 0x00, t0, bigchunk_fault) \ + ST(std, dst, offset + 0x08, t2, bigchunk_fault) \ + ST(std, dst, offset + 0x10, t4, bigchunk_fault) \ + ST(std, dst, offset + 0x18, t6, bigchunk_fault) + .section .fixup,#alloc,#execinstr +bigchunk_fault: + sub %g7, %g5, %o0 + and %g1, 127, %g1 + retl + add %o0, %g1, %o0 + +/* left: offset + 16 + (g1 % 16) */ #define MOVE_LASTCHUNK(src, dst, offset, t0, t1, t2, t3) \ - ldd [%src - (offset) - 0x10], %t0; \ - ldd [%src - (offset) - 0x08], %t2; \ - st %t0, [%dst - (offset) - 0x10]; \ - st %t1, [%dst - (offset) - 0x0c]; \ - st %t2, [%dst - (offset) - 0x08]; \ - st %t3, [%dst - (offset) - 0x04]; + LD(ldd, src, -(offset + 0x10), t0, lastchunk_fault) \ + LD(ldd, src, -(offset + 0x08), t2, lastchunk_fault) \ + ST(st, dst, -(offset + 0x10), t0, lastchunk_fault) \ + ST(st, dst, -(offset + 0x0c), t1, lastchunk_fault) \ + ST(st, dst, -(offset + 0x08), t2, lastchunk_fault) \ + ST(st, dst, -(offset + 0x04), t3, lastchunk_fault) -#define MOVE_HALFCHUNK(src, dst, offset, t0, t1, t2, t3) \ - lduh [%src + (offset) + 0x00], %t0; \ - lduh [%src + (offset) + 0x02], %t1; \ - lduh [%src + (offset) + 0x04], %t2; \ - lduh [%src + (offset) + 0x06], %t3; \ - sth %t0, [%dst + (offset) + 0x00]; \ - sth %t1, [%dst + (offset) + 0x02]; \ - sth %t2, [%dst + (offset) + 0x04]; \ - sth %t3, [%dst + (offset) + 0x06]; + .section .fixup,#alloc,#execinstr +lastchunk_fault: + and %g1, 15, %g1 + retl + sub %g1, %g5, %o0 +/* left: o3 + (o2 % 16) - offset */ +#define MOVE_HALFCHUNK(src, dst, offset, t0, t1, t2, t3) \ + LD(lduh, src, offset + 0x00, t0, halfchunk_fault) \ + LD(lduh, src, offset + 0x02, t1, halfchunk_fault) \ + LD(lduh, src, offset + 0x04, t2, halfchunk_fault) \ + LD(lduh, src, offset + 0x06, t3, halfchunk_fault) \ + ST(sth, dst, offset + 0x00, t0, halfchunk_fault) \ + ST(sth, dst, offset + 0x02, t1, halfchunk_fault) \ + ST(sth, dst, offset + 0x04, t2, halfchunk_fault) \ + ST(sth, dst, offset + 0x06, t3, halfchunk_fault) + +/* left: o3 + (o2 % 16) + offset + 2 */ #define MOVE_SHORTCHUNK(src, dst, offset, t0, t1) \ - ldub [%src - (offset) - 0x02], %t0; \ - ldub [%src - (offset) - 0x01], %t1; \ - stb %t0, [%dst - (offset) - 0x02]; \ - stb %t1, [%dst - (offset) - 0x01]; + LD(ldub, src, -(offset + 0x02), t0, halfchunk_fault) \ + LD(ldub, src, -(offset + 0x01), t1, halfchunk_fault) \ + ST(stb, dst, -(offset + 0x02), t0, halfchunk_fault) \ + ST(stb, dst, -(offset + 0x01), t1, halfchunk_fault) + + .section .fixup,#alloc,#execinstr +halfchunk_fault: + and %o2, 15, %o2 + sub %o3, %g5, %o3 + retl + add %o2, %o3, %o0 + +/* left: offset + 2 + (o2 % 2) */ +#define MOVE_LAST_SHORTCHUNK(src, dst, offset, t0, t1) \ + LD(ldub, src, -(offset + 0x02), t0, last_shortchunk_fault) \ + LD(ldub, src, -(offset + 0x01), t1, last_shortchunk_fault) \ + ST(stb, dst, -(offset + 0x02), t0, last_shortchunk_fault) \ + ST(stb, dst, -(offset + 0x01), t1, last_shortchunk_fault) + + .section .fixup,#alloc,#execinstr +last_shortchunk_fault: + and %o2, 1, %o2 + retl + sub %o2, %g5, %o0 .text .align 4 @@ -182,8 +218,6 @@ __copy_user: /* %o0=dst %o1=src %o2=len */ MOVE_BIGCHUNK(o1, o0, 0x20, o2, o3, o4, o5, g2, g3, g4, g5) MOVE_BIGCHUNK(o1, o0, 0x40, o2, o3, o4, o5, g2, g3, g4, g5) MOVE_BIGCHUNK(o1, o0, 0x60, o2, o3, o4, o5, g2, g3, g4, g5) -80: - EXT(5b, 80b, 50f) subcc %g7, 128, %g7 add %o1, 128, %o1 bne 5b @@ -201,7 +235,6 @@ __copy_user: /* %o0=dst %o1=src %o2=len */ jmpl %o5 + %lo(copy_user_table_end), %g0 add %o0, %g7, %o0 -copy_user_table: MOVE_LASTCHUNK(o1, o0, 0x60, g2, g3, g4, g5) MOVE_LASTCHUNK(o1, o0, 0x50, g2, g3, g4, g5) MOVE_LASTCHUNK(o1, o0, 0x40, g2, g3, g4, g5) @@ -210,7 +243,6 @@ copy_user_table: MOVE_LASTCHUNK(o1, o0, 0x10, g2, g3, g4, g5) MOVE_LASTCHUNK(o1, o0, 0x00, g2, g3, g4, g5) copy_user_table_end: - EXT(copy_user_table, copy_user_table_end, 51f) be copy_user_last7 andcc %g1, 4, %g0 @@ -250,8 +282,6 @@ ldd_std: MOVE_BIGALIGNCHUNK(o1, o0, 0x20, o2, o3, o4, o5, g2, g3, g4, g5) MOVE_BIGALIGNCHUNK(o1, o0, 0x40, o2, o3, o4, o5, g2, g3, g4, g5) MOVE_BIGALIGNCHUNK(o1, o0, 0x60, o2, o3, o4, o5, g2, g3, g4, g5) -81: - EXT(ldd_std, 81b, 52f) subcc %g7, 128, %g7 add %o1, 128, %o1 bne ldd_std @@ -290,8 +320,6 @@ cannot_optimize: 10: MOVE_HALFCHUNK(o1, o0, 0x00, g2, g3, g4, g5) MOVE_HALFCHUNK(o1, o0, 0x08, g2, g3, g4, g5) -82: - EXT(10b, 82b, 53f) subcc %o3, 0x10, %o3 add %o1, 0x10, %o1 bne 10b @@ -308,8 +336,6 @@ byte_chunk: MOVE_SHORTCHUNK(o1, o0, -0x0c, g2, g3) MOVE_SHORTCHUNK(o1, o0, -0x0e, g2, g3) MOVE_SHORTCHUNK(o1, o0, -0x10, g2, g3) -83: - EXT(byte_chunk, 83b, 54f) subcc %o3, 0x10, %o3 add %o1, 0x10, %o1 bne byte_chunk @@ -325,16 +351,14 @@ short_end: add %o1, %o3, %o1 jmpl %o5 + %lo(short_table_end), %g0 andcc %o2, 1, %g0 -84: - MOVE_SHORTCHUNK(o1, o0, 0x0c, g2, g3) - MOVE_SHORTCHUNK(o1, o0, 0x0a, g2, g3) - MOVE_SHORTCHUNK(o1, o0, 0x08, g2, g3) - MOVE_SHORTCHUNK(o1, o0, 0x06, g2, g3) - MOVE_SHORTCHUNK(o1, o0, 0x04, g2, g3) - MOVE_SHORTCHUNK(o1, o0, 0x02, g2, g3) - MOVE_SHORTCHUNK(o1, o0, 0x00, g2, g3) + MOVE_LAST_SHORTCHUNK(o1, o0, 0x0c, g2, g3) + MOVE_LAST_SHORTCHUNK(o1, o0, 0x0a, g2, g3) + MOVE_LAST_SHORTCHUNK(o1, o0, 0x08, g2, g3) + MOVE_LAST_SHORTCHUNK(o1, o0, 0x06, g2, g3) + MOVE_LAST_SHORTCHUNK(o1, o0, 0x04, g2, g3) + MOVE_LAST_SHORTCHUNK(o1, o0, 0x02, g2, g3) + MOVE_LAST_SHORTCHUNK(o1, o0, 0x00, g2, g3) short_table_end: - EXT(84b, short_table_end, 55f) be 1f nop EX(ldub [%o1], %g2, add %g0, 1) @@ -363,123 +387,8 @@ short_aligned_end: .section .fixup,#alloc,#execinstr .align 4 97: - mov %o2, %g3 -fixupretl: retl - mov %g3, %o0 - -/* exception routine sets %g2 to (broken_insn - first_insn)>>2 */ -50: -/* This magic counts how many bytes are left when crash in MOVE_BIGCHUNK - * happens. This is derived from the amount ldd reads, st stores, etc. - * x = g2 % 12; - * g3 = g1 + g7 - ((g2 / 12) * 32 + (x < 4) ? 0 : (x - 4) * 4); - * o0 += (g2 / 12) * 32; - */ - cmp %g2, 12 - add %o0, %g7, %o0 - bcs 1f - cmp %g2, 24 - bcs 2f - cmp %g2, 36 - bcs 3f - nop - sub %g2, 12, %g2 - sub %g7, 32, %g7 -3: sub %g2, 12, %g2 - sub %g7, 32, %g7 -2: sub %g2, 12, %g2 - sub %g7, 32, %g7 -1: cmp %g2, 4 - bcs,a 60f - clr %g2 - sub %g2, 4, %g2 - sll %g2, 2, %g2 -60: and %g1, 0x7f, %g3 - sub %o0, %g7, %o0 - add %g3, %g7, %g3 - ba fixupretl - sub %g3, %g2, %g3 -51: -/* i = 41 - g2; j = i % 6; - * g3 = (g1 & 15) + (i / 6) * 16 + (j < 4) ? (j + 1) * 4 : 16; - * o0 -= (i / 6) * 16 + 16; - */ - neg %g2 - and %g1, 0xf, %g1 - add %g2, 41, %g2 - add %o0, %g1, %o0 -1: cmp %g2, 6 - bcs,a 2f - cmp %g2, 4 - add %g1, 16, %g1 - b 1b - sub %g2, 6, %g2 -2: bcc,a 2f - mov 16, %g2 - inc %g2 - sll %g2, 2, %g2 -2: add %g1, %g2, %g3 - ba fixupretl - sub %o0, %g3, %o0 -52: -/* g3 = g1 + g7 - (g2 / 8) * 32 + (g2 & 4) ? (g2 & 3) * 8 : 0; - o0 += (g2 / 8) * 32 */ - andn %g2, 7, %g4 - add %o0, %g7, %o0 - andcc %g2, 4, %g0 - and %g2, 3, %g2 - sll %g4, 2, %g4 - sll %g2, 3, %g2 - bne 60b - sub %g7, %g4, %g7 - ba 60b - clr %g2 -53: -/* g3 = o3 + (o2 & 15) - (g2 & 8) - (g2 & 4) ? (g2 & 3) * 2 : 0; - o0 += (g2 & 8) */ - and %g2, 3, %g4 - andcc %g2, 4, %g0 - and %g2, 8, %g2 - sll %g4, 1, %g4 - be 1f - add %o0, %g2, %o0 - add %g2, %g4, %g2 -1: and %o2, 0xf, %g3 - add %g3, %o3, %g3 - ba fixupretl - sub %g3, %g2, %g3 -54: -/* g3 = o3 + (o2 & 15) - (g2 / 4) * 2 - (g2 & 2) ? (g2 & 1) : 0; - o0 += (g2 / 4) * 2 */ - srl %g2, 2, %o4 - and %g2, 1, %o5 - srl %g2, 1, %g2 - add %o4, %o4, %o4 - and %o5, %g2, %o5 - and %o2, 0xf, %o2 - add %o0, %o4, %o0 - sub %o3, %o5, %o3 - sub %o2, %o4, %o2 - ba fixupretl - add %o2, %o3, %g3 -55: -/* i = 27 - g2; - g3 = (o2 & 1) + i / 4 * 2 + !(i & 3); - o0 -= i / 4 * 2 + 1 */ - neg %g2 - and %o2, 1, %o2 - add %g2, 27, %g2 - srl %g2, 2, %o5 - andcc %g2, 3, %g0 - mov 1, %g2 - add %o5, %o5, %o5 - be,a 1f - clr %g2 -1: add %g2, %o5, %g3 - sub %o0, %g3, %o0 - ba fixupretl - add %g3, %o2, %g3 + mov %o2, %o0 .globl __copy_user_end __copy_user_end: -- cgit v1.2.3 From b4edf06c8aaae30ef926bd6853df6e59a7579ee9 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Thu, 16 Jul 2020 14:05:36 -0400 Subject: sparc32: switch to generic extables Signed-off-by: Al Viro --- arch/sparc/include/asm/elf_64.h | 1 - arch/sparc/include/asm/extable.h | 21 +++++++ arch/sparc/include/asm/extable_64.h | 21 ------- arch/sparc/include/asm/uaccess.h | 3 + arch/sparc/include/asm/uaccess_32.h | 33 ----------- arch/sparc/include/asm/uaccess_64.h | 1 - arch/sparc/kernel/unaligned_32.c | 10 ++-- arch/sparc/mm/Makefile | 2 +- arch/sparc/mm/extable.c | 107 ------------------------------------ arch/sparc/mm/fault_32.c | 26 ++++----- lib/extable.c | 5 -- 11 files changed, 41 insertions(+), 189 deletions(-) create mode 100644 arch/sparc/include/asm/extable.h delete mode 100644 arch/sparc/include/asm/extable_64.h delete mode 100644 arch/sparc/mm/extable.c (limited to 'arch') diff --git a/arch/sparc/include/asm/elf_64.h b/arch/sparc/include/asm/elf_64.h index 7e078bc73ef5..8fb09eec8c3e 100644 --- a/arch/sparc/include/asm/elf_64.h +++ b/arch/sparc/include/asm/elf_64.h @@ -8,7 +8,6 @@ #include #include -#include #include #include diff --git a/arch/sparc/include/asm/extable.h b/arch/sparc/include/asm/extable.h new file mode 100644 index 000000000000..554a9dc376fc --- /dev/null +++ b/arch/sparc/include/asm/extable.h @@ -0,0 +1,21 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef __ASM_EXTABLE_H +#define __ASM_EXTABLE_H +/* + * The exception table consists of pairs of addresses: the first is the + * address of an instruction that is allowed to fault, and the second is + * the address at which the program should continue. No registers are + * modified, so it is entirely up to the continuation code to figure out + * what to do. + * + * All the routines below use bits of fixup code that are out of line + * with the main instruction path. This means when everything is well, + * we don't even have to jump over them. Further, they do not intrude + * on our cache or tlb entries. + */ + +struct exception_table_entry { + unsigned int insn, fixup; +}; + +#endif diff --git a/arch/sparc/include/asm/extable_64.h b/arch/sparc/include/asm/extable_64.h deleted file mode 100644 index 5a0171907b7e..000000000000 --- a/arch/sparc/include/asm/extable_64.h +++ /dev/null @@ -1,21 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -#ifndef __ASM_EXTABLE64_H -#define __ASM_EXTABLE64_H -/* - * The exception table consists of pairs of addresses: the first is the - * address of an instruction that is allowed to fault, and the second is - * the address at which the program should continue. No registers are - * modified, so it is entirely up to the continuation code to figure out - * what to do. - * - * All the routines below use bits of fixup code that are out of line - * with the main instruction path. This means when everything is well, - * we don't even have to jump over them. Further, they do not intrude - * on our cache or tlb entries. - */ - -struct exception_table_entry { - unsigned int insn, fixup; -}; - -#endif diff --git a/arch/sparc/include/asm/uaccess.h b/arch/sparc/include/asm/uaccess.h index dd85bc2c2cad..390094200fc4 100644 --- a/arch/sparc/include/asm/uaccess.h +++ b/arch/sparc/include/asm/uaccess.h @@ -1,6 +1,9 @@ /* SPDX-License-Identifier: GPL-2.0 */ #ifndef ___ASM_SPARC_UACCESS_H #define ___ASM_SPARC_UACCESS_H + +#include + #if defined(__sparc__) && defined(__arch64__) #include #else diff --git a/arch/sparc/include/asm/uaccess_32.h b/arch/sparc/include/asm/uaccess_32.h index 54a3ba742647..4a12346bb69c 100644 --- a/arch/sparc/include/asm/uaccess_32.h +++ b/arch/sparc/include/asm/uaccess_32.h @@ -13,9 +13,6 @@ #include -#define ARCH_HAS_SORT_EXTABLE -#define ARCH_HAS_SEARCH_EXTABLE - /* Sparc is not segmented, however we need to be able to fool access_ok() * when doing system calls from kernel mode legitimately. * @@ -40,36 +37,6 @@ #define __access_ok(addr, size) (__user_ok((addr) & get_fs().seg, (size))) #define access_ok(addr, size) __access_ok((unsigned long)(addr), size) -/* - * The exception table consists of pairs of addresses: the first is the - * address of an instruction that is allowed to fault, and the second is - * the address at which the program should continue. No registers are - * modified, so it is entirely up to the continuation code to figure out - * what to do. - * - * All the routines below use bits of fixup code that are out of line - * with the main instruction path. This means when everything is well, - * we don't even have to jump over them. Further, they do not intrude - * on our cache or tlb entries. - * - * There is a special way how to put a range of potentially faulting - * insns (like twenty ldd/std's with now intervening other instructions) - * You specify address of first in insn and 0 in fixup and in the next - * exception_table_entry you specify last potentially faulting insn + 1 - * and in fixup the routine which should handle the fault. - * That fixup code will get - * (faulting_insn_address - first_insn_in_the_range_address)/4 - * in %g2 (ie. index of the faulting instruction in the range). - */ - -struct exception_table_entry -{ - unsigned long insn, fixup; -}; - -/* Returns 0 if exception not found and fixup otherwise. */ -unsigned long search_extables_range(unsigned long addr, unsigned long *g2); - /* Uh, these should become the main single-value transfer routines.. * They automatically use the right size if we just have the right * pointer type.. diff --git a/arch/sparc/include/asm/uaccess_64.h b/arch/sparc/include/asm/uaccess_64.h index 698cf69f74e9..30eb4c6414d1 100644 --- a/arch/sparc/include/asm/uaccess_64.h +++ b/arch/sparc/include/asm/uaccess_64.h @@ -10,7 +10,6 @@ #include #include #include -#include #include diff --git a/arch/sparc/kernel/unaligned_32.c b/arch/sparc/kernel/unaligned_32.c index 83db94c0b431..1d7c3eaabdd4 100644 --- a/arch/sparc/kernel/unaligned_32.c +++ b/arch/sparc/kernel/unaligned_32.c @@ -16,6 +16,7 @@ #include #include #include +#include #include @@ -213,10 +214,10 @@ static inline int ok_for_kernel(unsigned int insn) static void kernel_mna_trap_fault(struct pt_regs *regs, unsigned int insn) { - unsigned long g2 = regs->u_regs [UREG_G2]; - unsigned long fixup = search_extables_range(regs->pc, &g2); + const struct exception_table_entry *entry; - if (!fixup) { + entry = search_exception_tables(regs->pc); + if (!entry) { unsigned long address = compute_effective_address(regs, insn); if(address < PAGE_SIZE) { printk(KERN_ALERT "Unable to handle kernel NULL pointer dereference in mna handler"); @@ -232,9 +233,8 @@ static void kernel_mna_trap_fault(struct pt_regs *regs, unsigned int insn) die_if_kernel("Oops", regs); /* Not reached */ } - regs->pc = fixup; + regs->pc = entry->fixup; regs->npc = regs->pc + 4; - regs->u_regs [UREG_G2] = g2; } asmlinkage void kernel_unaligned_trap(struct pt_regs *regs, unsigned int insn) diff --git a/arch/sparc/mm/Makefile b/arch/sparc/mm/Makefile index 68db1f859b02..871354aa3c00 100644 --- a/arch/sparc/mm/Makefile +++ b/arch/sparc/mm/Makefile @@ -8,7 +8,7 @@ ccflags-y := -Werror obj-$(CONFIG_SPARC64) += ultra.o tlb.o tsb.o obj-y += fault_$(BITS).o obj-y += init_$(BITS).o -obj-$(CONFIG_SPARC32) += extable.o srmmu.o iommu.o io-unit.o +obj-$(CONFIG_SPARC32) += srmmu.o iommu.o io-unit.o obj-$(CONFIG_SPARC32) += srmmu_access.o obj-$(CONFIG_SPARC32) += hypersparc.o viking.o tsunami.o swift.o obj-$(CONFIG_SPARC32) += leon_mm.o diff --git a/arch/sparc/mm/extable.c b/arch/sparc/mm/extable.c deleted file mode 100644 index 241b40641873..000000000000 --- a/arch/sparc/mm/extable.c +++ /dev/null @@ -1,107 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -/* - * linux/arch/sparc/mm/extable.c - */ - -#include -#include -#include - -void sort_extable(struct exception_table_entry *start, - struct exception_table_entry *finish) -{ -} - -/* Caller knows they are in a range if ret->fixup == 0 */ -const struct exception_table_entry * -search_extable(const struct exception_table_entry *base, - const size_t num, - unsigned long value) -{ - int i; - - /* Single insn entries are encoded as: - * word 1: insn address - * word 2: fixup code address - * - * Range entries are encoded as: - * word 1: first insn address - * word 2: 0 - * word 3: last insn address + 4 bytes - * word 4: fixup code address - * - * Deleted entries are encoded as: - * word 1: unused - * word 2: -1 - * - * See asm/uaccess.h for more details. - */ - - /* 1. Try to find an exact match. */ - for (i = 0; i < num; i++) { - if (base[i].fixup == 0) { - /* A range entry, skip both parts. */ - i++; - continue; - } - - /* A deleted entry; see trim_init_extable */ - if (base[i].fixup == -1) - continue; - - if (base[i].insn == value) - return &base[i]; - } - - /* 2. Try to find a range match. */ - for (i = 0; i < (num - 1); i++) { - if (base[i].fixup) - continue; - - if (base[i].insn <= value && base[i + 1].insn > value) - return &base[i]; - - i++; - } - - return NULL; -} - -#ifdef CONFIG_MODULES -/* We could memmove them around; easier to mark the trimmed ones. */ -void trim_init_extable(struct module *m) -{ - unsigned int i; - bool range; - - for (i = 0; i < m->num_exentries; i += range ? 2 : 1) { - range = m->extable[i].fixup == 0; - - if (within_module_init(m->extable[i].insn, m)) { - m->extable[i].fixup = -1; - if (range) - m->extable[i+1].fixup = -1; - } - if (range) - i++; - } -} -#endif /* CONFIG_MODULES */ - -/* Special extable search, which handles ranges. Returns fixup */ -unsigned long search_extables_range(unsigned long addr, unsigned long *g2) -{ - const struct exception_table_entry *entry; - - entry = search_exception_tables(addr); - if (!entry) - return 0; - - /* Inside range? Fix g2 and return correct fixup */ - if (!entry->fixup) { - *g2 = (addr - entry->insn) / 4; - return (entry + 1)->fixup; - } - - return entry->fixup; -} diff --git a/arch/sparc/mm/fault_32.c b/arch/sparc/mm/fault_32.c index 290869fd6b6a..de2031c2b2d7 100644 --- a/arch/sparc/mm/fault_32.c +++ b/arch/sparc/mm/fault_32.c @@ -23,6 +23,7 @@ #include #include #include +#include #include #include @@ -114,8 +115,6 @@ asmlinkage void do_sparc_fault(struct pt_regs *regs, int text_fault, int write, struct vm_area_struct *vma; struct task_struct *tsk = current; struct mm_struct *mm = tsk->mm; - unsigned int fixup; - unsigned long g2; int from_user = !(regs->psr & PSR_PS); int code; vm_fault_t fault; @@ -233,22 +232,19 @@ bad_area_nosemaphore: /* Is this in ex_table? */ no_context: - g2 = regs->u_regs[UREG_G2]; if (!from_user) { - fixup = search_extables_range(regs->pc, &g2); - /* Values below 10 are reserved for other things */ - if (fixup > 10) { + const struct exception_table_entry *entry; + + entry = search_exception_tables(regs->pc); #ifdef DEBUG_EXCEPTIONS - printk("Exception: PC<%08lx> faddr<%08lx>\n", - regs->pc, address); - printk("EX_TABLE: insn<%08lx> fixup<%08x> g2<%08lx>\n", - regs->pc, fixup, g2); + printk("Exception: PC<%08lx> faddr<%08lx>\n", + regs->pc, address); + printk("EX_TABLE: insn<%08lx> fixup<%08x>\n", + regs->pc, entry->fixup); #endif - regs->u_regs[UREG_G2] = g2; - regs->pc = fixup; - regs->npc = regs->pc + 4; - return; - } + regs->pc = entry->fixup; + regs->npc = regs->pc + 4; + return; } unhandled_fault(address, tsk, regs); diff --git a/lib/extable.c b/lib/extable.c index c3e59caf7ffa..9c9f40bd2b3d 100644 --- a/lib/extable.c +++ b/lib/extable.c @@ -21,7 +21,6 @@ static inline unsigned long ex_to_insn(const struct exception_table_entry *x) } #endif -#ifndef ARCH_HAS_SORT_EXTABLE #ifndef ARCH_HAS_RELATIVE_EXTABLE #define swap_ex NULL #else @@ -88,9 +87,6 @@ void trim_init_extable(struct module *m) m->num_exentries--; } #endif /* CONFIG_MODULES */ -#endif /* !ARCH_HAS_SORT_EXTABLE */ - -#ifndef ARCH_HAS_SEARCH_EXTABLE static int cmp_ex_search(const void *key, const void *elt) { @@ -120,4 +116,3 @@ search_extable(const struct exception_table_entry *base, return bsearch(&value, base, num, sizeof(struct exception_table_entry), cmp_ex_search); } -#endif -- cgit v1.2.3 From d17b9ec777d86c590a77a404565be5d6005f2fe2 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Wed, 19 Aug 2020 17:50:40 -0400 Subject: sparc64: get rid of fake_swapper_regs no reason to have ->kregs of initial thread set up in a special way - we can keep them on stack, same as for every other thread. Signed-off-by: Al Viro --- arch/sparc/include/asm/thread_info_64.h | 1 + arch/sparc/kernel/head_64.S | 2 +- arch/sparc/kernel/setup_64.c | 4 ---- 3 files changed, 2 insertions(+), 5 deletions(-) (limited to 'arch') diff --git a/arch/sparc/include/asm/thread_info_64.h b/arch/sparc/include/asm/thread_info_64.h index 42cd4cd3892e..8047a9caab2f 100644 --- a/arch/sparc/include/asm/thread_info_64.h +++ b/arch/sparc/include/asm/thread_info_64.h @@ -118,6 +118,7 @@ struct thread_info { .task = &tsk, \ .current_ds = ASI_P, \ .preempt_count = INIT_PREEMPT_COUNT, \ + .kregs = (struct pt_regs *)(init_stack+THREAD_SIZE)-1 \ } /* how to get the thread information struct from C */ diff --git a/arch/sparc/kernel/head_64.S b/arch/sparc/kernel/head_64.S index c5ff2472b3d9..72a5bdc833ea 100644 --- a/arch/sparc/kernel/head_64.S +++ b/arch/sparc/kernel/head_64.S @@ -706,7 +706,7 @@ tlb_fixup_done: wr %g0, ASI_P, %asi mov 1, %g1 sllx %g1, THREAD_SHIFT, %g1 - sub %g1, (STACKFRAME_SZ + STACK_BIAS), %g1 + sub %g1, (STACKFRAME_SZ + STACK_BIAS + TRACEREG_SZ), %g1 add %g6, %g1, %sp /* Set per-cpu pointer initially to zero, this makes diff --git a/arch/sparc/kernel/setup_64.c b/arch/sparc/kernel/setup_64.c index d87244197d5c..48abee4eee29 100644 --- a/arch/sparc/kernel/setup_64.c +++ b/arch/sparc/kernel/setup_64.c @@ -165,8 +165,6 @@ extern int root_mountflags; char reboot_command[COMMAND_LINE_SIZE]; -static struct pt_regs fake_swapper_regs = { { 0, }, 0, 0, 0, 0 }; - static void __init per_cpu_patch(void) { struct cpuid_patch_entry *p; @@ -661,8 +659,6 @@ void __init setup_arch(char **cmdline_p) rd_image_start = ram_flags & RAMDISK_IMAGE_START_MASK; #endif - task_thread_info(&init_task)->kregs = &fake_swapper_regs; - #ifdef CONFIG_IP_PNP if (!ic_set_manually) { phandle chosen = prom_finddevice("/chosen"); -- cgit v1.2.3 From af7652500b4c43643a8531b82974e97b1248a03a Mon Sep 17 00:00:00 2001 From: Al Viro Date: Wed, 19 Aug 2020 18:05:02 -0400 Subject: sparc32: get rid of fake_swapper_regs no reason to have ->kregs of initial thread set up in a special way - we can keep them on stack, same as for every other thread. Signed-off-by: Al Viro --- arch/sparc/include/asm/processor_32.h | 1 + arch/sparc/kernel/head_32.S | 2 +- arch/sparc/kernel/process_32.c | 9 +-------- arch/sparc/kernel/setup_32.c | 3 --- 4 files changed, 3 insertions(+), 12 deletions(-) (limited to 'arch') diff --git a/arch/sparc/include/asm/processor_32.h b/arch/sparc/include/asm/processor_32.h index 3c4bc2189092..d7b71d7bed1f 100644 --- a/arch/sparc/include/asm/processor_32.h +++ b/arch/sparc/include/asm/processor_32.h @@ -60,6 +60,7 @@ struct thread_struct { #define INIT_THREAD { \ .flags = SPARC_FLAG_KTHREAD, \ .current_ds = KERNEL_DS, \ + .kregs = (struct pt_regs *)(init_stack+THREAD_SIZE)-1 \ } /* Do necessary setup to start up a newly executed thread. */ diff --git a/arch/sparc/kernel/head_32.S b/arch/sparc/kernel/head_32.S index be30c8d4cc73..6044b82b9767 100644 --- a/arch/sparc/kernel/head_32.S +++ b/arch/sparc/kernel/head_32.S @@ -515,7 +515,7 @@ continue_boot: /* I want a kernel stack NOW! */ set init_thread_union, %g1 - set (THREAD_SIZE - STACKFRAME_SZ), %g2 + set (THREAD_SIZE - STACKFRAME_SZ - TRACEREG_SZ), %g2 add %g1, %g2, %sp mov 0, %fp /* And for good luck */ diff --git a/arch/sparc/kernel/process_32.c b/arch/sparc/kernel/process_32.c index a02363735915..97549e01f540 100644 --- a/arch/sparc/kernel/process_32.c +++ b/arch/sparc/kernel/process_32.c @@ -218,14 +218,7 @@ void flush_thread(void) } /* This task is no longer a kernel thread. */ - if (current->thread.flags & SPARC_FLAG_KTHREAD) { - current->thread.flags &= ~SPARC_FLAG_KTHREAD; - - /* We must fixup kregs as well. */ - /* XXX This was not fixed for ti for a while, worked. Unused? */ - current->thread.kregs = (struct pt_regs *) - (task_stack_page(current) + (THREAD_SIZE - TRACEREG_SZ)); - } + current->thread.flags &= ~SPARC_FLAG_KTHREAD; } static inline struct sparc_stackf __user * diff --git a/arch/sparc/kernel/setup_32.c b/arch/sparc/kernel/setup_32.c index eea43a1aef1b..c8e0dd99f370 100644 --- a/arch/sparc/kernel/setup_32.c +++ b/arch/sparc/kernel/setup_32.c @@ -266,7 +266,6 @@ static __init void leon_patch(void) } struct tt_entry *sparc_ttable; -static struct pt_regs fake_swapper_regs; /* Called from head_32.S - before we have setup anything * in the kernel. Be very careful with what you do here. @@ -363,8 +362,6 @@ void __init setup_arch(char **cmdline_p) (*(linux_dbvec->teach_debugger))(); } - init_task.thread.kregs = &fake_swapper_regs; - /* Run-time patch instructions to match the cpu model */ per_cpu_patch(); -- cgit v1.2.3 From 415ddc3b105616d6a4fec279ed7d87841cbfa3fb Mon Sep 17 00:00:00 2001 From: Al Viro Date: Thu, 20 Aug 2020 10:55:25 -0400 Subject: sparc32: take ->thread.flags out it was used for two things - dealing with unusual ->kregs for kernel threads and "emulate unaligned userland accesses for this thread"; the former is killed off by the previous commit, the latter never had been set in the first place. Signed-off-by: Al Viro --- arch/sparc/include/asm/processor_32.h | 5 -- arch/sparc/kernel/process_32.c | 5 -- arch/sparc/kernel/unaligned_32.c | 96 +---------------------------------- 3 files changed, 1 insertion(+), 105 deletions(-) (limited to 'arch') diff --git a/arch/sparc/include/asm/processor_32.h b/arch/sparc/include/asm/processor_32.h index d7b71d7bed1f..b6242f7771e9 100644 --- a/arch/sparc/include/asm/processor_32.h +++ b/arch/sparc/include/asm/processor_32.h @@ -50,15 +50,10 @@ struct thread_struct { unsigned long fsr; unsigned long fpqdepth; struct fpq fpqueue[16]; - unsigned long flags; mm_segment_t current_ds; }; -#define SPARC_FLAG_KTHREAD 0x1 /* task is a kernel thread */ -#define SPARC_FLAG_UNALIGNED 0x2 /* is allowed to do unaligned accesses */ - #define INIT_THREAD { \ - .flags = SPARC_FLAG_KTHREAD, \ .current_ds = KERNEL_DS, \ .kregs = (struct pt_regs *)(init_stack+THREAD_SIZE)-1 \ } diff --git a/arch/sparc/kernel/process_32.c b/arch/sparc/kernel/process_32.c index 97549e01f540..f75caecff115 100644 --- a/arch/sparc/kernel/process_32.c +++ b/arch/sparc/kernel/process_32.c @@ -216,9 +216,6 @@ void flush_thread(void) clear_thread_flag(TIF_USEDFPU); #endif } - - /* This task is no longer a kernel thread. */ - current->thread.flags &= ~SPARC_FLAG_KTHREAD; } static inline struct sparc_stackf __user * @@ -306,7 +303,6 @@ int copy_thread(unsigned long clone_flags, unsigned long sp, unsigned long arg, extern int nwindows; unsigned long psr; memset(new_stack, 0, STACKFRAME_SZ + TRACEREG_SZ); - p->thread.flags |= SPARC_FLAG_KTHREAD; p->thread.current_ds = KERNEL_DS; ti->kpc = (((unsigned long) ret_from_kernel_thread) - 0x8); childregs->u_regs[UREG_G1] = sp; /* function */ @@ -318,7 +314,6 @@ int copy_thread(unsigned long clone_flags, unsigned long sp, unsigned long arg, } memcpy(new_stack, (char *)regs - STACKFRAME_SZ, STACKFRAME_SZ + TRACEREG_SZ); childregs->u_regs[UREG_FP] = sp; - p->thread.flags &= ~SPARC_FLAG_KTHREAD; p->thread.current_ds = USER_DS; ti->kpc = (((unsigned long) ret_from_fork) - 0x8); ti->kpsr = current->thread.fork_kpsr | PSR_PIL; diff --git a/arch/sparc/kernel/unaligned_32.c b/arch/sparc/kernel/unaligned_32.c index 83db94c0b431..82b60d09ea3c 100644 --- a/arch/sparc/kernel/unaligned_32.c +++ b/arch/sparc/kernel/unaligned_32.c @@ -274,103 +274,9 @@ asmlinkage void kernel_unaligned_trap(struct pt_regs *regs, unsigned int insn) } } -static inline int ok_for_user(struct pt_regs *regs, unsigned int insn, - enum direction dir) -{ - unsigned int reg; - int size = ((insn >> 19) & 3) == 3 ? 8 : 4; - - if ((regs->pc | regs->npc) & 3) - return 0; - - /* Must access_ok() in all the necessary places. */ -#define WINREG_ADDR(regnum) \ - ((void __user *)(((unsigned long *)regs->u_regs[UREG_FP])+(regnum))) - - reg = (insn >> 25) & 0x1f; - if (reg >= 16) { - if (!access_ok(WINREG_ADDR(reg - 16), size)) - return -EFAULT; - } - reg = (insn >> 14) & 0x1f; - if (reg >= 16) { - if (!access_ok(WINREG_ADDR(reg - 16), size)) - return -EFAULT; - } - if (!(insn & 0x2000)) { - reg = (insn & 0x1f); - if (reg >= 16) { - if (!access_ok(WINREG_ADDR(reg - 16), size)) - return -EFAULT; - } - } -#undef WINREG_ADDR - return 0; -} - -static void user_mna_trap_fault(struct pt_regs *regs, unsigned int insn) +asmlinkage void user_unaligned_trap(struct pt_regs *regs, unsigned int insn) { send_sig_fault(SIGBUS, BUS_ADRALN, (void __user *)safe_compute_effective_address(regs, insn), 0, current); } - -asmlinkage void user_unaligned_trap(struct pt_regs *regs, unsigned int insn) -{ - enum direction dir; - - if(!(current->thread.flags & SPARC_FLAG_UNALIGNED) || - (((insn >> 30) & 3) != 3)) - goto kill_user; - dir = decode_direction(insn); - if(!ok_for_user(regs, insn, dir)) { - goto kill_user; - } else { - int err, size = decode_access_size(insn); - unsigned long addr; - - if(floating_point_load_or_store_p(insn)) { - printk("User FPU load/store unaligned unsupported.\n"); - goto kill_user; - } - - addr = compute_effective_address(regs, insn); - perf_sw_event(PERF_COUNT_SW_ALIGNMENT_FAULTS, 1, regs, addr); - switch(dir) { - case load: - err = do_int_load(fetch_reg_addr(((insn>>25)&0x1f), - regs), - size, (unsigned long *) addr, - decode_signedness(insn)); - break; - - case store: - err = do_int_store(((insn>>25)&0x1f), size, - (unsigned long *) addr, regs); - break; - - case both: - /* - * This was supported in 2.4. However, we question - * the value of SWAP instruction across word boundaries. - */ - printk("Unaligned SWAP unsupported.\n"); - err = -EFAULT; - break; - - default: - unaligned_panic("Impossible user unaligned trap."); - goto out; - } - if (err) - goto kill_user; - else - advance(regs); - goto out; - } - -kill_user: - user_mna_trap_fault(regs, insn); -out: - ; -} -- cgit v1.2.3 From b29dd96b905f3dd543f4ca729447286adf934dd6 Mon Sep 17 00:00:00 2001 From: Brendan Jackman Date: Tue, 16 Feb 2021 12:53:07 +0000 Subject: bpf, x86: Fix BPF_FETCH atomic and/or/xor with r0 as src This code generates a CMPXCHG loop in order to implement atomic_fetch bitwise operations. Because CMPXCHG is hard-coded to use rax (which holds the BPF r0 value), it saves the _real_ r0 value into the internal "ax" temporary register and restores it once the loop is complete. In the middle of the loop, the actual bitwise operation is performed using src_reg. The bug occurs when src_reg is r0: as described above, r0 has been clobbered and the real r0 value is in the ax register. Therefore, perform this operation on the ax register instead, when src_reg is r0. Fixes: 981f94c3e921 ("bpf: Add bitwise atomic instructions") Signed-off-by: Brendan Jackman Signed-off-by: Daniel Borkmann Acked-by: KP Singh Link: https://lore.kernel.org/bpf/20210216125307.1406237-1-jackmanb@google.com --- arch/x86/net/bpf_jit_comp.c | 10 +++++++--- tools/testing/selftests/bpf/verifier/atomic_and.c | 23 +++++++++++++++++++++++ 2 files changed, 30 insertions(+), 3 deletions(-) (limited to 'arch') diff --git a/arch/x86/net/bpf_jit_comp.c b/arch/x86/net/bpf_jit_comp.c index 79e7a0ec1da5..6926d0ca6c71 100644 --- a/arch/x86/net/bpf_jit_comp.c +++ b/arch/x86/net/bpf_jit_comp.c @@ -1349,6 +1349,7 @@ st: if (is_imm8(insn->off)) insn->imm == (BPF_XOR | BPF_FETCH)) { u8 *branch_target; bool is64 = BPF_SIZE(insn->code) == BPF_DW; + u32 real_src_reg = src_reg; /* * Can't be implemented with a single x86 insn. @@ -1357,6 +1358,9 @@ st: if (is_imm8(insn->off)) /* Will need RAX as a CMPXCHG operand so save R0 */ emit_mov_reg(&prog, true, BPF_REG_AX, BPF_REG_0); + if (src_reg == BPF_REG_0) + real_src_reg = BPF_REG_AX; + branch_target = prog; /* Load old value */ emit_ldx(&prog, BPF_SIZE(insn->code), @@ -1366,9 +1370,9 @@ st: if (is_imm8(insn->off)) * put the result in the AUX_REG. */ emit_mov_reg(&prog, is64, AUX_REG, BPF_REG_0); - maybe_emit_mod(&prog, AUX_REG, src_reg, is64); + maybe_emit_mod(&prog, AUX_REG, real_src_reg, is64); EMIT2(simple_alu_opcodes[BPF_OP(insn->imm)], - add_2reg(0xC0, AUX_REG, src_reg)); + add_2reg(0xC0, AUX_REG, real_src_reg)); /* Attempt to swap in new value */ err = emit_atomic(&prog, BPF_CMPXCHG, dst_reg, AUX_REG, insn->off, @@ -1381,7 +1385,7 @@ st: if (is_imm8(insn->off)) */ EMIT2(X86_JNE, -(prog - branch_target) - 2); /* Return the pre-modification value */ - emit_mov_reg(&prog, is64, src_reg, BPF_REG_0); + emit_mov_reg(&prog, is64, real_src_reg, BPF_REG_0); /* Restore R0 after clobbering RAX */ emit_mov_reg(&prog, true, BPF_REG_0, BPF_REG_AX); break; diff --git a/tools/testing/selftests/bpf/verifier/atomic_and.c b/tools/testing/selftests/bpf/verifier/atomic_and.c index 1bdc8e6684f7..fe4bb70eb9c5 100644 --- a/tools/testing/selftests/bpf/verifier/atomic_and.c +++ b/tools/testing/selftests/bpf/verifier/atomic_and.c @@ -75,3 +75,26 @@ }, .result = ACCEPT, }, +{ + "BPF_ATOMIC_AND with fetch - r0 as source reg", + .insns = { + /* val = 0x110; */ + BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0x110), + /* old = atomic_fetch_and(&val, 0x011); */ + BPF_MOV64_IMM(BPF_REG_0, 0x011), + BPF_ATOMIC_OP(BPF_DW, BPF_AND | BPF_FETCH, BPF_REG_10, BPF_REG_0, -8), + /* if (old != 0x110) exit(3); */ + BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0x110, 2), + BPF_MOV64_IMM(BPF_REG_0, 3), + BPF_EXIT_INSN(), + /* if (val != 0x010) exit(2); */ + BPF_LDX_MEM(BPF_DW, BPF_REG_1, BPF_REG_10, -8), + BPF_JMP_IMM(BPF_JEQ, BPF_REG_1, 0x010, 2), + BPF_MOV64_IMM(BPF_REG_1, 2), + BPF_EXIT_INSN(), + /* exit(0); */ + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .result = ACCEPT, +}, -- cgit v1.2.3 From eead089311f4d935ab5d1d8fbb0c42ad44699ada Mon Sep 17 00:00:00 2001 From: Michael Ellerman Date: Thu, 18 Feb 2021 23:30:58 +1100 Subject: powerpc/4xx: Fix build errors from mfdcr() lkp reported a build error in fsp2.o: CC arch/powerpc/platforms/44x/fsp2.o {standard input}:577: Error: unsupported relocation against base Which comes from: pr_err("GESR0: 0x%08x\n", mfdcr(base + PLB4OPB_GESR0)); Where our mfdcr() macro is stringifying "base + PLB4OPB_GESR0", and passing that to the assembler, which obviously doesn't work. The mfdcr() macro already checks that the argument is constant using __builtin_constant_p(), and if not calls the out-of-line version of mfdcr(). But in this case GCC is smart enough to notice that "base + PLB4OPB_GESR0" will be constant, even though it's not something we can immediately stringify into a register number. Segher pointed out that passing the register number to the inline asm as a constant would be better, and in fact it fixes the build error, presumably because it gives GCC a chance to resolve the value. While we're at it, change mtdcr() similarly. Reported-by: kernel test robot Suggested-by: Segher Boessenkool Signed-off-by: Michael Ellerman Acked-by: Feng Tang Link: https://lore.kernel.org/r/20210218123058.748882-1-mpe@ellerman.id.au --- arch/powerpc/include/asm/dcr-native.h | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'arch') diff --git a/arch/powerpc/include/asm/dcr-native.h b/arch/powerpc/include/asm/dcr-native.h index 7141ccea8c94..a92059964579 100644 --- a/arch/powerpc/include/asm/dcr-native.h +++ b/arch/powerpc/include/asm/dcr-native.h @@ -53,8 +53,8 @@ static inline void mtdcrx(unsigned int reg, unsigned int val) #define mfdcr(rn) \ ({unsigned int rval; \ if (__builtin_constant_p(rn) && rn < 1024) \ - asm volatile("mfdcr %0," __stringify(rn) \ - : "=r" (rval)); \ + asm volatile("mfdcr %0, %1" : "=r" (rval) \ + : "n" (rn)); \ else if (likely(cpu_has_feature(CPU_FTR_INDEXED_DCR))) \ rval = mfdcrx(rn); \ else \ @@ -64,8 +64,8 @@ static inline void mtdcrx(unsigned int reg, unsigned int val) #define mtdcr(rn, v) \ do { \ if (__builtin_constant_p(rn) && rn < 1024) \ - asm volatile("mtdcr " __stringify(rn) ",%0" \ - : : "r" (v)); \ + asm volatile("mtdcr %0, %1" \ + : : "n" (rn), "r" (v)); \ else if (likely(cpu_has_feature(CPU_FTR_INDEXED_DCR))) \ mtdcrx(rn, v); \ else \ -- cgit v1.2.3 From f9619d5e5174867536b7e558683bc4408eab833f Mon Sep 17 00:00:00 2001 From: Greg Kurz Date: Mon, 15 Feb 2021 10:45:06 +0100 Subject: powerpc/pseries: Don't enforce MSI affinity with kdump Depending on the number of online CPUs in the original kernel, it is likely for CPU #0 to be offline in a kdump kernel. The associated IRQs in the affinity mappings provided by irq_create_affinity_masks() are thus not started by irq_startup(), as per-design with managed IRQs. This can be a problem with multi-queue block devices driven by blk-mq : such a non-started IRQ is very likely paired with the single queue enforced by blk-mq during kdump (see blk_mq_alloc_tag_set()). This causes the device to remain silent and likely hangs the guest at some point. This is a regression caused by commit 9ea69a55b3b9 ("powerpc/pseries: Pass MSI affinity to irq_create_mapping()"). Note that this only happens with the XIVE interrupt controller because XICS has a workaround to bypass affinity, which is activated during kdump with the "noirqdistrib" kernel parameter. The issue comes from a combination of factors: - discrepancy between the number of queues detected by the multi-queue block driver, that was used to create the MSI vectors, and the single queue mode enforced later on by blk-mq because of kdump (i.e. keeping all queues fixes the issue) - CPU#0 offline (i.e. kdump always succeed with CPU#0) Given that I couldn't reproduce on x86, which seems to always have CPU#0 online even during kdump, I'm not sure where this should be fixed. Hence going for another approach : fine-grained affinity is for performance and we don't really care about that during kdump. Simply revert to the previous working behavior of ignoring affinity masks in this case only. Fixes: 9ea69a55b3b9 ("powerpc/pseries: Pass MSI affinity to irq_create_mapping()") Cc: stable@vger.kernel.org # v5.10+ Signed-off-by: Greg Kurz Reviewed-by: Laurent Vivier Reviewed-by: Cédric Le Goater Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20210215094506.1196119-1-groug@kaod.org --- arch/powerpc/platforms/pseries/msi.c | 25 +++++++++++++++++++++++-- 1 file changed, 23 insertions(+), 2 deletions(-) (limited to 'arch') diff --git a/arch/powerpc/platforms/pseries/msi.c b/arch/powerpc/platforms/pseries/msi.c index b3ac2455faad..637300330507 100644 --- a/arch/powerpc/platforms/pseries/msi.c +++ b/arch/powerpc/platforms/pseries/msi.c @@ -4,6 +4,7 @@ * Copyright 2006-2007 Michael Ellerman, IBM Corp. */ +#include #include #include #include @@ -458,8 +459,28 @@ again: return hwirq; } - virq = irq_create_mapping_affinity(NULL, hwirq, - entry->affinity); + /* + * Depending on the number of online CPUs in the original + * kernel, it is likely for CPU #0 to be offline in a kdump + * kernel. The associated IRQs in the affinity mappings + * provided by irq_create_affinity_masks() are thus not + * started by irq_startup(), as per-design for managed IRQs. + * This can be a problem with multi-queue block devices driven + * by blk-mq : such a non-started IRQ is very likely paired + * with the single queue enforced by blk-mq during kdump (see + * blk_mq_alloc_tag_set()). This causes the device to remain + * silent and likely hangs the guest at some point. + * + * We don't really care for fine-grained affinity when doing + * kdump actually : simply ignore the pre-computed affinity + * masks in this case and let the default mask with all CPUs + * be used when creating the IRQ mappings. + */ + if (is_kdump_kernel()) + virq = irq_create_mapping(NULL, hwirq); + else + virq = irq_create_mapping_affinity(NULL, hwirq, + entry->affinity); if (!virq) { pr_debug("rtas_msi: Failed mapping hwirq %d\n", hwirq); -- cgit v1.2.3 From c119565a15a628efdfa51352f9f6c5186e506a1c Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Mon, 1 Feb 2021 06:29:50 +0000 Subject: powerpc/603: Fix protection of user pages mapped with PROT_NONE On book3s/32, page protection is defined by the PP bits in the PTE which provide the following protection depending on the access keys defined in the matching segment register: - PP 00 means RW with key 0 and N/A with key 1. - PP 01 means RW with key 0 and RO with key 1. - PP 10 means RW with both key 0 and key 1. - PP 11 means RO with both key 0 and key 1. Since the implementation of kernel userspace access protection, PP bits have been set as follows: - PP00 for pages without _PAGE_USER - PP01 for pages with _PAGE_USER and _PAGE_RW - PP11 for pages with _PAGE_USER and without _PAGE_RW For kernelspace segments, kernel accesses are performed with key 0 and user accesses are performed with key 1. As PP00 is used for non _PAGE_USER pages, user can't access kernel pages not flagged _PAGE_USER while kernel can. For userspace segments, both kernel and user accesses are performed with key 0, therefore pages not flagged _PAGE_USER are still accessible to the user. This shouldn't be an issue, because userspace is expected to be accessible to the user. But unlike most other architectures, powerpc implements PROT_NONE protection by removing _PAGE_USER flag instead of flagging the page as not valid. This means that pages in userspace that are not flagged _PAGE_USER shall remain inaccessible. To get the expected behaviour, just mimic other architectures in the TLB miss handler by checking _PAGE_USER permission on userspace accesses as if it was the _PAGE_PRESENT bit. Note that this problem only is only for 603 cores. The 604+ have an hash table, and hash_page() function already implement the verification of _PAGE_USER permission on userspace pages. Fixes: f342adca3afc ("powerpc/32s: Prepare Kernel Userspace Access Protection") Cc: stable@vger.kernel.org # v5.2+ Reported-by: Christoph Plattner Signed-off-by: Christophe Leroy Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/4a0c6e3bb8f0c162457bf54d9bc6fd8d7b55129f.1612160907.git.christophe.leroy@csgroup.eu --- arch/powerpc/kernel/head_book3s_32.S | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) (limited to 'arch') diff --git a/arch/powerpc/kernel/head_book3s_32.S b/arch/powerpc/kernel/head_book3s_32.S index 727fdab557c9..565e84e20a72 100644 --- a/arch/powerpc/kernel/head_book3s_32.S +++ b/arch/powerpc/kernel/head_book3s_32.S @@ -457,11 +457,12 @@ InstructionTLBMiss: cmplw 0,r1,r3 #endif mfspr r2, SPRN_SDR1 - li r1,_PAGE_PRESENT | _PAGE_ACCESSED | _PAGE_EXEC + li r1,_PAGE_PRESENT | _PAGE_ACCESSED | _PAGE_EXEC | _PAGE_USER rlwinm r2, r2, 28, 0xfffff000 #ifdef CONFIG_MODULES bgt- 112f lis r2, (swapper_pg_dir - PAGE_OFFSET)@ha /* if kernel address, use */ + li r1,_PAGE_PRESENT | _PAGE_ACCESSED | _PAGE_EXEC addi r2, r2, (swapper_pg_dir - PAGE_OFFSET)@l /* kernel page table */ #endif 112: rlwimi r2,r3,12,20,29 /* insert top 10 bits of address */ @@ -520,10 +521,11 @@ DataLoadTLBMiss: lis r1, TASK_SIZE@h /* check if kernel address */ cmplw 0,r1,r3 mfspr r2, SPRN_SDR1 - li r1, _PAGE_PRESENT | _PAGE_ACCESSED + li r1, _PAGE_PRESENT | _PAGE_ACCESSED | _PAGE_USER rlwinm r2, r2, 28, 0xfffff000 bgt- 112f lis r2, (swapper_pg_dir - PAGE_OFFSET)@ha /* if kernel address, use */ + li r1, _PAGE_PRESENT | _PAGE_ACCESSED addi r2, r2, (swapper_pg_dir - PAGE_OFFSET)@l /* kernel page table */ 112: rlwimi r2,r3,12,20,29 /* insert top 10 bits of address */ lwz r2,0(r2) /* get pmd entry */ @@ -597,10 +599,11 @@ DataStoreTLBMiss: lis r1, TASK_SIZE@h /* check if kernel address */ cmplw 0,r1,r3 mfspr r2, SPRN_SDR1 - li r1, _PAGE_RW | _PAGE_DIRTY | _PAGE_PRESENT | _PAGE_ACCESSED + li r1, _PAGE_RW | _PAGE_DIRTY | _PAGE_PRESENT | _PAGE_ACCESSED | _PAGE_USER rlwinm r2, r2, 28, 0xfffff000 bgt- 112f lis r2, (swapper_pg_dir - PAGE_OFFSET)@ha /* if kernel address, use */ + li r1, _PAGE_RW | _PAGE_DIRTY | _PAGE_PRESENT | _PAGE_ACCESSED addi r2, r2, (swapper_pg_dir - PAGE_OFFSET)@l /* kernel page table */ 112: rlwimi r2,r3,12,20,29 /* insert top 10 bits of address */ lwz r2,0(r2) /* get pmd entry */ -- cgit v1.2.3 From 91b6c5dbe9e072dbdb181eed89c5c824e92ac0f5 Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Wed, 24 Feb 2021 06:34:22 +0000 Subject: powerpc/syscall: Force inlining of __prep_irq_for_enabled_exit() As reported by kernel test robot, a randconfig with high amount of debuging options can lead to build failure for undefined reference to replay_soft_interrupts() on ppc32. This is due to gcc not seeing that __prep_irq_for_enabled_exit() always returns true on ppc32 because it doesn't inline it for some reason. Force inlining of __prep_irq_for_enabled_exit() to fix the build. Fixes: 344bb20b159d ("powerpc/syscall: Make interrupt.c buildable on PPC32") Reported-by: kernel test robot Signed-off-by: Christophe Leroy Acked-by: Nicholas Piggin Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/53f3a1f719441761000c41154602bf097d4350b5.1614148356.git.christophe.leroy@csgroup.eu --- arch/powerpc/kernel/interrupt.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/powerpc/kernel/interrupt.c b/arch/powerpc/kernel/interrupt.c index 398cd86b6ada..2ef3c4051bb9 100644 --- a/arch/powerpc/kernel/interrupt.c +++ b/arch/powerpc/kernel/interrupt.c @@ -149,7 +149,7 @@ notrace long system_call_exception(long r3, long r4, long r5, * enabled when the interrupt handler returns (indicating a process-context / * synchronous interrupt) then irqs_enabled should be true. */ -static notrace inline bool __prep_irq_for_enabled_exit(bool clear_ri) +static notrace __always_inline bool __prep_irq_for_enabled_exit(bool clear_ri) { /* This must be done with RI=1 because tracing may touch vmaps */ trace_hardirqs_on(); -- cgit v1.2.3 From 386a966f5ce71a0364b158c5d0a6971f4e418ea8 Mon Sep 17 00:00:00 2001 From: Uwe Kleine-König Date: Thu, 25 Feb 2021 23:18:34 +0100 Subject: vio: make remove callback return void The driver core ignores the return value of struct bus_type::remove() because there is only little that can be done. To simplify the quest to make this function return void, let struct vio_driver::remove() return void, too. All users already unconditionally return 0, this commit makes it obvious that returning an error code is a bad idea. Note there are two nominally different implementations for a vio bus: one in arch/sparc/kernel/vio.c and the other in arch/powerpc/platforms/pseries/vio.c. This patch only adapts the powerpc one. Before this patch for a device that was bound to a driver without a remove callback vio_cmo_bus_remove(viodev) wasn't called. As the device core still considers the device unbound after vio_bus_remove() returns calling this unconditionally is the consistent behaviour which is implemented here. Signed-off-by: Uwe Kleine-König Reviewed-by: Tyrel Datwyler Acked-by: Lijun Pan Acked-by: Greg Kroah-Hartman [mpe: Drop unneeded hvcs_remove() forward declaration, squash in change from sfr to drop ibmvnic_remove() forward declaration] Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20210225221834.160083-1-uwe@kleine-koenig.org --- arch/powerpc/include/asm/vio.h | 2 +- arch/powerpc/platforms/pseries/vio.c | 7 +++---- drivers/char/hw_random/pseries-rng.c | 3 +-- drivers/char/tpm/tpm_ibmvtpm.c | 4 +--- drivers/crypto/nx/nx-842-pseries.c | 4 +--- drivers/crypto/nx/nx.c | 4 +--- drivers/misc/ibmvmc.c | 4 +--- drivers/net/ethernet/ibm/ibmveth.c | 4 +--- drivers/net/ethernet/ibm/ibmvnic.c | 5 +---- drivers/scsi/ibmvscsi/ibmvfc.c | 3 +-- drivers/scsi/ibmvscsi/ibmvscsi.c | 4 +--- drivers/scsi/ibmvscsi_tgt/ibmvscsi_tgt.c | 4 +--- drivers/tty/hvc/hvcs.c | 4 +--- 13 files changed, 15 insertions(+), 37 deletions(-) (limited to 'arch') diff --git a/arch/powerpc/include/asm/vio.h b/arch/powerpc/include/asm/vio.h index 0cf52746531b..721c0d6715ac 100644 --- a/arch/powerpc/include/asm/vio.h +++ b/arch/powerpc/include/asm/vio.h @@ -113,7 +113,7 @@ struct vio_driver { const char *name; const struct vio_device_id *id_table; int (*probe)(struct vio_dev *dev, const struct vio_device_id *id); - int (*remove)(struct vio_dev *dev); + void (*remove)(struct vio_dev *dev); /* A driver must have a get_desired_dma() function to * be loaded in a CMO environment if it uses DMA. */ diff --git a/arch/powerpc/platforms/pseries/vio.c b/arch/powerpc/platforms/pseries/vio.c index b2797cfe4e2b..9cb4fc839fd5 100644 --- a/arch/powerpc/platforms/pseries/vio.c +++ b/arch/powerpc/platforms/pseries/vio.c @@ -1261,7 +1261,6 @@ static int vio_bus_remove(struct device *dev) struct vio_dev *viodev = to_vio_dev(dev); struct vio_driver *viodrv = to_vio_driver(dev->driver); struct device *devptr; - int ret = 1; /* * Hold a reference to the device after the remove function is called @@ -1270,13 +1269,13 @@ static int vio_bus_remove(struct device *dev) devptr = get_device(dev); if (viodrv->remove) - ret = viodrv->remove(viodev); + viodrv->remove(viodev); - if (!ret && firmware_has_feature(FW_FEATURE_CMO)) + if (firmware_has_feature(FW_FEATURE_CMO)) vio_cmo_bus_remove(viodev); put_device(devptr); - return ret; + return 0; } /** diff --git a/drivers/char/hw_random/pseries-rng.c b/drivers/char/hw_random/pseries-rng.c index 8038a8a9fb58..f4949b689bd5 100644 --- a/drivers/char/hw_random/pseries-rng.c +++ b/drivers/char/hw_random/pseries-rng.c @@ -54,10 +54,9 @@ static int pseries_rng_probe(struct vio_dev *dev, return hwrng_register(&pseries_rng); } -static int pseries_rng_remove(struct vio_dev *dev) +static void pseries_rng_remove(struct vio_dev *dev) { hwrng_unregister(&pseries_rng); - return 0; } static const struct vio_device_id pseries_rng_driver_ids[] = { diff --git a/drivers/char/tpm/tpm_ibmvtpm.c b/drivers/char/tpm/tpm_ibmvtpm.c index 994385bf37c0..903604769de9 100644 --- a/drivers/char/tpm/tpm_ibmvtpm.c +++ b/drivers/char/tpm/tpm_ibmvtpm.c @@ -343,7 +343,7 @@ static int ibmvtpm_crq_send_init_complete(struct ibmvtpm_dev *ibmvtpm) * * Return: Always 0. */ -static int tpm_ibmvtpm_remove(struct vio_dev *vdev) +static void tpm_ibmvtpm_remove(struct vio_dev *vdev) { struct tpm_chip *chip = dev_get_drvdata(&vdev->dev); struct ibmvtpm_dev *ibmvtpm = dev_get_drvdata(&chip->dev); @@ -372,8 +372,6 @@ static int tpm_ibmvtpm_remove(struct vio_dev *vdev) kfree(ibmvtpm); /* For tpm_ibmvtpm_get_desired_dma */ dev_set_drvdata(&vdev->dev, NULL); - - return 0; } /** diff --git a/drivers/crypto/nx/nx-842-pseries.c b/drivers/crypto/nx/nx-842-pseries.c index 2de5e3672e42..cc8dd3072b8b 100644 --- a/drivers/crypto/nx/nx-842-pseries.c +++ b/drivers/crypto/nx/nx-842-pseries.c @@ -1042,7 +1042,7 @@ error: return ret; } -static int nx842_remove(struct vio_dev *viodev) +static void nx842_remove(struct vio_dev *viodev) { struct nx842_devdata *old_devdata; unsigned long flags; @@ -1063,8 +1063,6 @@ static int nx842_remove(struct vio_dev *viodev) if (old_devdata) kfree(old_devdata->counters); kfree(old_devdata); - - return 0; } static const struct vio_device_id nx842_vio_driver_ids[] = { diff --git a/drivers/crypto/nx/nx.c b/drivers/crypto/nx/nx.c index 0d2dc5be7f19..1d0e8a1ba160 100644 --- a/drivers/crypto/nx/nx.c +++ b/drivers/crypto/nx/nx.c @@ -783,7 +783,7 @@ static int nx_probe(struct vio_dev *viodev, const struct vio_device_id *id) return nx_register_algs(); } -static int nx_remove(struct vio_dev *viodev) +static void nx_remove(struct vio_dev *viodev) { dev_dbg(&viodev->dev, "entering nx_remove for UA 0x%x\n", viodev->unit_address); @@ -811,8 +811,6 @@ static int nx_remove(struct vio_dev *viodev) nx_unregister_skcipher(&nx_ecb_aes_alg, NX_FC_AES, NX_MODE_AES_ECB); } - - return 0; } diff --git a/drivers/misc/ibmvmc.c b/drivers/misc/ibmvmc.c index 2d778d0f011e..c0fe3295c330 100644 --- a/drivers/misc/ibmvmc.c +++ b/drivers/misc/ibmvmc.c @@ -2288,15 +2288,13 @@ crq_failed: return -EPERM; } -static int ibmvmc_remove(struct vio_dev *vdev) +static void ibmvmc_remove(struct vio_dev *vdev) { struct crq_server_adapter *adapter = dev_get_drvdata(&vdev->dev); dev_info(adapter->dev, "Entering remove for UA 0x%x\n", vdev->unit_address); ibmvmc_release_crq_queue(adapter); - - return 0; } static struct vio_device_id ibmvmc_device_table[] = { diff --git a/drivers/net/ethernet/ibm/ibmveth.c b/drivers/net/ethernet/ibm/ibmveth.c index c3ec9ceed833..7fea9ae60f13 100644 --- a/drivers/net/ethernet/ibm/ibmveth.c +++ b/drivers/net/ethernet/ibm/ibmveth.c @@ -1758,7 +1758,7 @@ static int ibmveth_probe(struct vio_dev *dev, const struct vio_device_id *id) return 0; } -static int ibmveth_remove(struct vio_dev *dev) +static void ibmveth_remove(struct vio_dev *dev) { struct net_device *netdev = dev_get_drvdata(&dev->dev); struct ibmveth_adapter *adapter = netdev_priv(netdev); @@ -1771,8 +1771,6 @@ static int ibmveth_remove(struct vio_dev *dev) free_netdev(netdev); dev_set_drvdata(&dev->dev, NULL); - - return 0; } static struct attribute veth_active_attr; diff --git a/drivers/net/ethernet/ibm/ibmvnic.c b/drivers/net/ethernet/ibm/ibmvnic.c index 118a4bd3f877..fe3201ba2034 100644 --- a/drivers/net/ethernet/ibm/ibmvnic.c +++ b/drivers/net/ethernet/ibm/ibmvnic.c @@ -78,7 +78,6 @@ MODULE_LICENSE("GPL"); MODULE_VERSION(IBMVNIC_DRIVER_VERSION); static int ibmvnic_version = IBMVNIC_INITIAL_VERSION; -static int ibmvnic_remove(struct vio_dev *); static void release_sub_crqs(struct ibmvnic_adapter *, bool); static int ibmvnic_reset_crq(struct ibmvnic_adapter *); static int ibmvnic_send_crq_init(struct ibmvnic_adapter *); @@ -5396,7 +5395,7 @@ ibmvnic_init_fail: return rc; } -static int ibmvnic_remove(struct vio_dev *dev) +static void ibmvnic_remove(struct vio_dev *dev) { struct net_device *netdev = dev_get_drvdata(&dev->dev); struct ibmvnic_adapter *adapter = netdev_priv(netdev); @@ -5437,8 +5436,6 @@ static int ibmvnic_remove(struct vio_dev *dev) device_remove_file(&dev->dev, &dev_attr_failover); free_netdev(netdev); dev_set_drvdata(&dev->dev, NULL); - - return 0; } static ssize_t failover_store(struct device *dev, struct device_attribute *attr, diff --git a/drivers/scsi/ibmvscsi/ibmvfc.c b/drivers/scsi/ibmvscsi/ibmvfc.c index 755313b766b9..e663085a8944 100644 --- a/drivers/scsi/ibmvscsi/ibmvfc.c +++ b/drivers/scsi/ibmvscsi/ibmvfc.c @@ -6038,7 +6038,7 @@ out: * Return value: * 0 **/ -static int ibmvfc_remove(struct vio_dev *vdev) +static void ibmvfc_remove(struct vio_dev *vdev) { struct ibmvfc_host *vhost = dev_get_drvdata(&vdev->dev); LIST_HEAD(purge); @@ -6070,7 +6070,6 @@ static int ibmvfc_remove(struct vio_dev *vdev) spin_unlock(&ibmvfc_driver_lock); scsi_host_put(vhost->host); LEAVE; - return 0; } /** diff --git a/drivers/scsi/ibmvscsi/ibmvscsi.c b/drivers/scsi/ibmvscsi/ibmvscsi.c index 29fcc44be2d5..77fafb1bc173 100644 --- a/drivers/scsi/ibmvscsi/ibmvscsi.c +++ b/drivers/scsi/ibmvscsi/ibmvscsi.c @@ -2335,7 +2335,7 @@ static int ibmvscsi_probe(struct vio_dev *vdev, const struct vio_device_id *id) return -1; } -static int ibmvscsi_remove(struct vio_dev *vdev) +static void ibmvscsi_remove(struct vio_dev *vdev) { struct ibmvscsi_host_data *hostdata = dev_get_drvdata(&vdev->dev); @@ -2356,8 +2356,6 @@ static int ibmvscsi_remove(struct vio_dev *vdev) spin_unlock(&ibmvscsi_driver_lock); scsi_host_put(hostdata->host); - - return 0; } /** diff --git a/drivers/scsi/ibmvscsi_tgt/ibmvscsi_tgt.c b/drivers/scsi/ibmvscsi_tgt/ibmvscsi_tgt.c index cc3908c2d2f9..9abd9e253af6 100644 --- a/drivers/scsi/ibmvscsi_tgt/ibmvscsi_tgt.c +++ b/drivers/scsi/ibmvscsi_tgt/ibmvscsi_tgt.c @@ -3595,7 +3595,7 @@ free_adapter: return rc; } -static int ibmvscsis_remove(struct vio_dev *vdev) +static void ibmvscsis_remove(struct vio_dev *vdev) { struct scsi_info *vscsi = dev_get_drvdata(&vdev->dev); @@ -3622,8 +3622,6 @@ static int ibmvscsis_remove(struct vio_dev *vdev) list_del(&vscsi->list); spin_unlock_bh(&ibmvscsis_dev_lock); kfree(vscsi); - - return 0; } static ssize_t system_id_show(struct device *dev, diff --git a/drivers/tty/hvc/hvcs.c b/drivers/tty/hvc/hvcs.c index c90848919644..9afa1dcef2c2 100644 --- a/drivers/tty/hvc/hvcs.c +++ b/drivers/tty/hvc/hvcs.c @@ -317,7 +317,6 @@ static void hvcs_hangup(struct tty_struct * tty); static int hvcs_probe(struct vio_dev *dev, const struct vio_device_id *id); -static int hvcs_remove(struct vio_dev *dev); static int __init hvcs_module_init(void); static void __exit hvcs_module_exit(void); static int hvcs_initialize(void); @@ -819,7 +818,7 @@ static int hvcs_probe( return 0; } -static int hvcs_remove(struct vio_dev *dev) +static void hvcs_remove(struct vio_dev *dev) { struct hvcs_struct *hvcsd = dev_get_drvdata(&dev->dev); unsigned long flags; @@ -849,7 +848,6 @@ static int hvcs_remove(struct vio_dev *dev) printk(KERN_INFO "HVCS: vty-server@%X removed from the" " vio bus.\n", dev->unit_address); - return 0; }; static struct vio_driver hvcs_vio_driver = { -- cgit v1.2.3 From acdad8fb4a1574323db88f98a38b630691574e16 Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Sat, 27 Feb 2021 16:30:48 +0000 Subject: powerpc: Force inlining of mmu_has_feature to fix build failure The test robot has managed to generate a random config leading to following build failure: LD .tmp_vmlinux.kallsyms1 powerpc64-linux-ld: arch/powerpc/mm/pgtable.o: in function `ptep_set_access_flags': pgtable.c:(.text.ptep_set_access_flags+0xf0): undefined reference to `hash__flush_tlb_page' powerpc64-linux-ld: arch/powerpc/mm/book3s32/mmu.o: in function `MMU_init_hw_patch': mmu.c:(.init.text+0x452): undefined reference to `patch__hash_page_A0' powerpc64-linux-ld: mmu.c:(.init.text+0x45e): undefined reference to `patch__hash_page_A0' powerpc64-linux-ld: mmu.c:(.init.text+0x46a): undefined reference to `patch__hash_page_A1' powerpc64-linux-ld: mmu.c:(.init.text+0x476): undefined reference to `patch__hash_page_A1' powerpc64-linux-ld: mmu.c:(.init.text+0x482): undefined reference to `patch__hash_page_A2' powerpc64-linux-ld: mmu.c:(.init.text+0x48e): undefined reference to `patch__hash_page_A2' powerpc64-linux-ld: mmu.c:(.init.text+0x49e): undefined reference to `patch__hash_page_B' powerpc64-linux-ld: mmu.c:(.init.text+0x4aa): undefined reference to `patch__hash_page_B' powerpc64-linux-ld: mmu.c:(.init.text+0x4b6): undefined reference to `patch__hash_page_C' powerpc64-linux-ld: mmu.c:(.init.text+0x4c2): undefined reference to `patch__hash_page_C' powerpc64-linux-ld: mmu.c:(.init.text+0x4ce): undefined reference to `patch__flush_hash_A0' powerpc64-linux-ld: mmu.c:(.init.text+0x4da): undefined reference to `patch__flush_hash_A0' powerpc64-linux-ld: mmu.c:(.init.text+0x4e6): undefined reference to `patch__flush_hash_A1' powerpc64-linux-ld: mmu.c:(.init.text+0x4f2): undefined reference to `patch__flush_hash_A1' powerpc64-linux-ld: mmu.c:(.init.text+0x4fe): undefined reference to `patch__flush_hash_A2' powerpc64-linux-ld: mmu.c:(.init.text+0x50a): undefined reference to `patch__flush_hash_A2' powerpc64-linux-ld: mmu.c:(.init.text+0x522): undefined reference to `patch__flush_hash_B' powerpc64-linux-ld: mmu.c:(.init.text+0x532): undefined reference to `patch__flush_hash_B' powerpc64-linux-ld: arch/powerpc/mm/book3s32/mmu.o: in function `update_mmu_cache': mmu.c:(.text.update_mmu_cache+0xa0): undefined reference to `add_hash_page' powerpc64-linux-ld: mm/memory.o: in function `zap_pte_range': memory.c:(.text.zap_pte_range+0x160): undefined reference to `flush_hash_pages' powerpc64-linux-ld: mm/memory.o: in function `handle_pte_fault': memory.c:(.text.handle_pte_fault+0x180): undefined reference to `hash__flush_tlb_page' This is due to mmu_has_feature() not being inlined. See extract of build of mmu.c with -Winline: In file included from ./include/linux/mm_types.h:19, from ./include/linux/mmzone.h:21, from ./include/linux/gfp.h:6, from ./include/linux/mm.h:10, from arch/powerpc/mm/book3s32/mmu.c:21: ./arch/powerpc/include/asm/mmu.h: In function 'find_free_bat': ./arch/powerpc/include/asm/mmu.h:231:20: warning: inlining failed in call to 'early_mmu_has_feature': call is unlikely and code size would grow [-Winline] 231 | static inline bool early_mmu_has_feature(unsigned long feature) | ^~~~~~~~~~~~~~~~~~~~~ ./arch/powerpc/include/asm/mmu.h:291:9: note: called from here 291 | return early_mmu_has_feature(feature); | ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ The code relies on constant folding of MMU_FTRS_POSSIBLE at buildtime and elimination of non possible parts of code at compile time. For this to work, mmu_has_feature() and early_mmu_has_feature() must be inlined. Fixes: 259149cf7c3c ("powerpc/32s: Only build hash code when CONFIG_PPC_BOOK3S_604 is selected") Reported-by: kernel test robot Signed-off-by: Christophe Leroy Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/cf61345912c078c96f171afd0fcc48ef27cbdc3f.1614443418.git.christophe.leroy@csgroup.eu --- arch/powerpc/include/asm/mmu.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'arch') diff --git a/arch/powerpc/include/asm/mmu.h b/arch/powerpc/include/asm/mmu.h index 80b27f5d9648..607168b1aef4 100644 --- a/arch/powerpc/include/asm/mmu.h +++ b/arch/powerpc/include/asm/mmu.h @@ -228,7 +228,7 @@ enum { #define MMU_FTRS_ALWAYS 0 #endif -static inline bool early_mmu_has_feature(unsigned long feature) +static __always_inline bool early_mmu_has_feature(unsigned long feature) { if (MMU_FTRS_ALWAYS & feature) return true; @@ -286,7 +286,7 @@ static inline void mmu_feature_keys_init(void) } -static inline bool mmu_has_feature(unsigned long feature) +static __always_inline bool mmu_has_feature(unsigned long feature) { return early_mmu_has_feature(feature); } -- cgit v1.2.3 From 5ae5fbd2107959b68ac69a8b75412208663aea88 Mon Sep 17 00:00:00 2001 From: Athira Rajeev Date: Thu, 25 Feb 2021 05:10:39 -0500 Subject: powerpc/perf: Fix handling of privilege level checks in perf interrupt context Running "perf mem record" in powerpc platforms with selinux enabled resulted in soft lockup's. Below call-trace was seen in the logs: CPU: 58 PID: 3751 Comm: sssd_nss Not tainted 5.11.0-rc7+ #2 NIP: c000000000dff3d4 LR: c000000000dff3d0 CTR: 0000000000000000 REGS: c000007fffab7d60 TRAP: 0100 Not tainted (5.11.0-rc7+) ... NIP _raw_spin_lock_irqsave+0x94/0x120 LR _raw_spin_lock_irqsave+0x90/0x120 Call Trace: 0xc00000000fd47260 (unreliable) skb_queue_tail+0x3c/0x90 audit_log_end+0x6c/0x180 common_lsm_audit+0xb0/0xe0 slow_avc_audit+0xa4/0x110 avc_has_perm+0x1c4/0x260 selinux_perf_event_open+0x74/0xd0 security_perf_event_open+0x68/0xc0 record_and_restart+0x6e8/0x7f0 perf_event_interrupt+0x22c/0x560 performance_monitor_exception0x4c/0x60 performance_monitor_common_virt+0x1c8/0x1d0 interrupt: f00 at _raw_spin_lock_irqsave+0x38/0x120 NIP: c000000000dff378 LR: c000000000b5fbbc CTR: c0000000007d47f0 REGS: c00000000fd47860 TRAP: 0f00 Not tainted (5.11.0-rc7+) ... NIP _raw_spin_lock_irqsave+0x38/0x120 LR skb_queue_tail+0x3c/0x90 interrupt: f00 0x38 (unreliable) 0xc00000000aae6200 audit_log_end+0x6c/0x180 audit_log_exit+0x344/0xf80 __audit_syscall_exit+0x2c0/0x320 do_syscall_trace_leave+0x148/0x200 syscall_exit_prepare+0x324/0x390 system_call_common+0xfc/0x27c The above trace shows that while the CPU was handling a performance monitor exception, there was a call to security_perf_event_open() function. In powerpc core-book3s, this function is called from perf_allow_kernel() check during recording of data address in the sample via perf_get_data_addr(). Commit da97e18458fb ("perf_event: Add support for LSM and SELinux checks") introduced security enhancements to perf. As part of this commit, the new security hook for perf_event_open() was added in all places where perf paranoid check was previously used. In powerpc core-book3s code, originally had paranoid checks in perf_get_data_addr() and power_pmu_bhrb_read(). So perf_paranoid_kernel() checks were replaced with perf_allow_kernel() in these PMU helper functions as well. The intention of paranoid checks in core-book3s was to verify privilege access before capturing some of the sample data. Along with paranoid checks, perf_allow_kernel() also does a security_perf_event_open(). Since these functions are accessed while recording a sample, we end up calling selinux_perf_event_open() in PMI context. Some of the security functions use spinlock like sidtab_sid2str_put(). If a perf interrupt hits under a spin lock and if we end up in calling selinux hook functions in PMI handler, this could cause a dead lock. Since the purpose of this security hook is to control access to perf_event_open(), it is not right to call this in interrupt context. The paranoid checks in powerpc core-book3s were done at interrupt time which is also not correct. Reference commits: Commit cd1231d7035f ("powerpc/perf: Prevent kernel address leak via perf_get_data_addr()") Commit bb19af816025 ("powerpc/perf: Prevent kernel address leak to userspace via BHRB buffer") We only allow creation of events that have already passed the privilege checks in perf_event_open(). So these paranoid checks are not needed at event time. As a fix, patch uses 'event->attr.exclude_kernel' check to prevent exposing kernel address for userspace only sampling. Fixes: cd1231d7035f ("powerpc/perf: Prevent kernel address leak via perf_get_data_addr()") Cc: stable@vger.kernel.org # v4.17+ Suggested-by: Michael Ellerman Signed-off-by: Athira Rajeev Acked-by: Peter Zijlstra (Intel) Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/1614247839-1428-1-git-send-email-atrajeev@linux.vnet.ibm.com --- arch/powerpc/perf/core-book3s.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'arch') diff --git a/arch/powerpc/perf/core-book3s.c b/arch/powerpc/perf/core-book3s.c index 6817331e22ff..766f064f00fb 100644 --- a/arch/powerpc/perf/core-book3s.c +++ b/arch/powerpc/perf/core-book3s.c @@ -222,7 +222,7 @@ static inline void perf_get_data_addr(struct perf_event *event, struct pt_regs * if (!(mmcra & MMCRA_SAMPLE_ENABLE) || sdar_valid) *addrp = mfspr(SPRN_SDAR); - if (is_kernel_addr(mfspr(SPRN_SDAR)) && perf_allow_kernel(&event->attr) != 0) + if (is_kernel_addr(mfspr(SPRN_SDAR)) && event->attr.exclude_kernel) *addrp = 0; } @@ -507,7 +507,7 @@ static void power_pmu_bhrb_read(struct perf_event *event, struct cpu_hw_events * * addresses, hence include a check before filtering code */ if (!(ppmu->flags & PPMU_ARCH_31) && - is_kernel_addr(addr) && perf_allow_kernel(&event->attr) != 0) + is_kernel_addr(addr) && event->attr.exclude_kernel) continue; /* Branches are read most recent first (ie. mfbhrb 0 is -- cgit v1.2.3 From 5c88a17e15795226b56d83f579cbb9b7a4864f79 Mon Sep 17 00:00:00 2001 From: Jordan Niethe Date: Thu, 25 Feb 2021 14:19:46 +1100 Subject: powerpc/sstep: Fix VSX instruction emulation Commit af99da74333b ("powerpc/sstep: Support VSX vector paired storage access instructions") added loading and storing 32 word long data into adjacent VSRs. However the calculation used to determine if two VSRs needed to be loaded/stored inadvertently prevented the load/storing taking place for instructions with a data length less than 16 words. This causes the emulation to not function correctly, which can be seen by the alignment_handler selftest: $ ./alignment_handler [snip] test: test_alignment_handler_vsx_207 tags: git_version:powerpc-5.12-1-0-g82d2c16b350f VSX: 2.07B Doing lxsspx: PASSED Doing lxsiwax: FAILED: Wrong Data Doing lxsiwzx: PASSED Doing stxsspx: PASSED Doing stxsiwx: PASSED failure: test_alignment_handler_vsx_207 test: test_alignment_handler_vsx_300 tags: git_version:powerpc-5.12-1-0-g82d2c16b350f VSX: 3.00B Doing lxsd: PASSED Doing lxsibzx: PASSED Doing lxsihzx: PASSED Doing lxssp: FAILED: Wrong Data Doing lxv: PASSED Doing lxvb16x: PASSED Doing lxvh8x: PASSED Doing lxvx: PASSED Doing lxvwsx: FAILED: Wrong Data Doing lxvl: PASSED Doing lxvll: PASSED Doing stxsd: PASSED Doing stxsibx: PASSED Doing stxsihx: PASSED Doing stxssp: PASSED Doing stxv: PASSED Doing stxvb16x: PASSED Doing stxvh8x: PASSED Doing stxvx: PASSED Doing stxvl: PASSED Doing stxvll: PASSED failure: test_alignment_handler_vsx_300 [snip] Fix this by making sure all VSX instruction emulation correctly load/store from the VSRs. Fixes: af99da74333b ("powerpc/sstep: Support VSX vector paired storage access instructions") Signed-off-by: Jordan Niethe Reviewed-by: Ravi Bangoria Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20210225031946.1458206-1-jniethe5@gmail.com --- arch/powerpc/lib/sstep.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'arch') diff --git a/arch/powerpc/lib/sstep.c b/arch/powerpc/lib/sstep.c index bb5c20d4ca91..c6aebc149d14 100644 --- a/arch/powerpc/lib/sstep.c +++ b/arch/powerpc/lib/sstep.c @@ -904,7 +904,7 @@ static nokprobe_inline int do_vsx_load(struct instruction_op *op, if (!address_ok(regs, ea, size) || copy_mem_in(mem, ea, size, regs)) return -EFAULT; - nr_vsx_regs = size / sizeof(__vector128); + nr_vsx_regs = max(1ul, size / sizeof(__vector128)); emulate_vsx_load(op, buf, mem, cross_endian); preempt_disable(); if (reg < 32) { @@ -951,7 +951,7 @@ static nokprobe_inline int do_vsx_store(struct instruction_op *op, if (!address_ok(regs, ea, size)) return -EFAULT; - nr_vsx_regs = size / sizeof(__vector128); + nr_vsx_regs = max(1ul, size / sizeof(__vector128)); preempt_disable(); if (reg < 32) { /* FP regs + extensions */ -- cgit v1.2.3 From 778e45d7720d663811352943dd515b41f6849637 Mon Sep 17 00:00:00 2001 From: Helge Deller Date: Tue, 2 Mar 2021 21:07:07 +0100 Subject: parisc: Enable -mlong-calls gcc option with CONFIG_COMPILE_TEST The kernel test robot reported multiple linkage problems like this: hppa64-linux-ld: init/main.o(.init.text+0x56c): cannot reach printk init/main.o: in function `unknown_bootoption': (.init.text+0x56c): relocation truncated to fit: R_PARISC_PCREL22F against symbol `printk' defined in .text.unlikely section in kernel/printk/printk.o There are two ways to solve it: a) Enable the -mlong-call compiler option (CONFIG_MLONGCALLS), b) Add long branch stub support in 64-bit linker. While b) is the long-term solution, this patch works around the issue by automatically enabling the CONFIG_MLONGCALLS option when CONFIG_COMPILE_TEST is set, which indicates that a non-production kernel (e.g. 0-day kernel) is built. Signed-off-by: Helge Deller Reported-by: kernel test robot Fixes: 00e35f2b0e8a ("parisc: Enable -mlong-calls gcc option by default when !CONFIG_MODULES") Cc: stable@vger.kernel.org # v5.6+ --- arch/parisc/Kconfig | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) (limited to 'arch') diff --git a/arch/parisc/Kconfig b/arch/parisc/Kconfig index 4e53ac46e857..afc3b8d03572 100644 --- a/arch/parisc/Kconfig +++ b/arch/parisc/Kconfig @@ -203,9 +203,12 @@ config PREFETCH def_bool y depends on PA8X00 || PA7200 +config PARISC_HUGE_KERNEL + def_bool y if !MODULES || UBSAN || FTRACE || COMPILE_TEST + config MLONGCALLS - def_bool y if !MODULES || UBSAN || FTRACE - bool "Enable the -mlong-calls compiler option for big kernels" if MODULES && !UBSAN && !FTRACE + def_bool y if PARISC_HUGE_KERNEL + bool "Enable the -mlong-calls compiler option for big kernels" if !PARISC_HUGE_KERNEL depends on PA8X00 help If you configure the kernel to include many drivers built-in instead -- cgit v1.2.3 From 460c9f1c944b4bf04f2934478fd3f865b730b771 Mon Sep 17 00:00:00 2001 From: Zhang Yunkai Date: Wed, 3 Mar 2021 18:24:10 -0800 Subject: arch/parisc/kernel: remove duplicate include in ptrace 'linux/compat.h' included in 'arch/parisc/kernel/ptrace.c' is duplicated. It is also included in the 24th line. Signed-off-by: Zhang Yunkai Signed-off-by: Helge Deller --- arch/parisc/kernel/ptrace.c | 2 -- 1 file changed, 2 deletions(-) (limited to 'arch') diff --git a/arch/parisc/kernel/ptrace.c b/arch/parisc/kernel/ptrace.c index 2127974982df..65de6c4c9354 100644 --- a/arch/parisc/kernel/ptrace.c +++ b/arch/parisc/kernel/ptrace.c @@ -567,8 +567,6 @@ static const struct user_regset_view user_parisc_native_view = { }; #ifdef CONFIG_64BIT -#include - static int gpr32_get(struct task_struct *target, const struct user_regset *regset, struct membuf to) -- cgit v1.2.3 From beda430177f56656e7980dcce93456ffaa35676b Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Thu, 4 Mar 2021 18:18:08 -0800 Subject: KVM: x86: Ensure deadline timer has truly expired before posting its IRQ When posting a deadline timer interrupt, open code the checks guarding __kvm_wait_lapic_expire() in order to skip the lapic_timer_int_injected() check in kvm_wait_lapic_expire(). The injection check will always fail since the interrupt has not yet be injected. Moving the call after injection would also be wrong as that wouldn't actually delay delivery of the IRQ if it is indeed sent via posted interrupt. Fixes: 010fd37fddf6 ("KVM: LAPIC: Reduce world switch latency caused by timer_advance_ns") Cc: stable@vger.kernel.org Signed-off-by: Sean Christopherson Message-Id: <20210305021808.3769732-1-seanjc@google.com> Signed-off-by: Paolo Bonzini --- arch/x86/kvm/lapic.c | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c index 45d40bfacb7c..cb8ebfaccfb6 100644 --- a/arch/x86/kvm/lapic.c +++ b/arch/x86/kvm/lapic.c @@ -1642,7 +1642,16 @@ static void apic_timer_expired(struct kvm_lapic *apic, bool from_timer_fn) } if (kvm_use_posted_timer_interrupt(apic->vcpu)) { - kvm_wait_lapic_expire(vcpu); + /* + * Ensure the guest's timer has truly expired before posting an + * interrupt. Open code the relevant checks to avoid querying + * lapic_timer_int_injected(), which will be false since the + * interrupt isn't yet injected. Waiting until after injecting + * is not an option since that won't help a posted interrupt. + */ + if (vcpu->arch.apic->lapic_timer.expired_tscdeadline && + vcpu->arch.apic->lapic_timer.timer_advance_ns) + __kvm_wait_lapic_expire(vcpu); kvm_apic_inject_pending_timer_irqs(apic); return; } -- cgit v1.2.3 From 99840a75454b66d69d2a450ab04e6438d75eba48 Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Thu, 4 Mar 2021 18:16:37 -0800 Subject: KVM: SVM: Connect 'npt' module param to KVM's internal 'npt_enabled' Directly connect the 'npt' param to the 'npt_enabled' variable so that runtime adjustments to npt_enabled are reflected in sysfs. Move the !PAE restriction to a runtime check to ensure NPT is forced off if the host is using 2-level paging, and add a comment explicitly stating why NPT requires a 64-bit kernel or a kernel with PAE enabled. Opportunistically switch the param to octal permissions. Signed-off-by: Sean Christopherson Message-Id: <20210305021637.3768573-1-seanjc@google.com> Reviewed-by: Vitaly Kuznetsov Signed-off-by: Paolo Bonzini --- arch/x86/kvm/svm/svm.c | 25 +++++++++++++------------ 1 file changed, 13 insertions(+), 12 deletions(-) (limited to 'arch') diff --git a/arch/x86/kvm/svm/svm.c b/arch/x86/kvm/svm/svm.c index baee91c1e936..58a45bb139f8 100644 --- a/arch/x86/kvm/svm/svm.c +++ b/arch/x86/kvm/svm/svm.c @@ -115,13 +115,6 @@ static const struct svm_direct_access_msrs { { .index = MSR_INVALID, .always = false }, }; -/* enable NPT for AMD64 and X86 with PAE */ -#if defined(CONFIG_X86_64) || defined(CONFIG_X86_PAE) -bool npt_enabled = true; -#else -bool npt_enabled; -#endif - /* * These 2 parameters are used to config the controls for Pause-Loop Exiting: * pause_filter_count: On processors that support Pause filtering(indicated @@ -170,9 +163,12 @@ module_param(pause_filter_count_shrink, ushort, 0444); static unsigned short pause_filter_count_max = KVM_SVM_DEFAULT_PLE_WINDOW_MAX; module_param(pause_filter_count_max, ushort, 0444); -/* allow nested paging (virtualized MMU) for all guests */ -static int npt = true; -module_param(npt, int, S_IRUGO); +/* + * Use nested page tables by default. Note, NPT may get forced off by + * svm_hardware_setup() if it's unsupported by hardware or the host kernel. + */ +bool npt_enabled = true; +module_param_named(npt, npt_enabled, bool, 0444); /* allow nested virtualization in KVM/SVM */ static int nested = true; @@ -988,10 +984,15 @@ static __init int svm_hardware_setup(void) goto err; } - if (!boot_cpu_has(X86_FEATURE_NPT)) + /* + * KVM's MMU doesn't support using 2-level paging for itself, and thus + * NPT isn't supported if the host is using 2-level paging since host + * CR4 is unchanged on VMRUN. + */ + if (!IS_ENABLED(CONFIG_X86_64) && !IS_ENABLED(CONFIG_X86_PAE)) npt_enabled = false; - if (npt_enabled && !npt) + if (!boot_cpu_has(X86_FEATURE_NPT)) npt_enabled = false; kvm_configure_mmu(npt_enabled, get_max_npt_level(), PG_LEVEL_1G); -- cgit v1.2.3 From 4691453406c3a799fdebac83a689919c2c877f04 Mon Sep 17 00:00:00 2001 From: Muhammad Usama Anjum Date: Fri, 5 Mar 2021 23:08:16 +0500 Subject: kvm: x86: use NULL instead of using plain integer as pointer Sparse warnings removed: warning: Using plain integer as NULL pointer Signed-off-by: Muhammad Usama Anjum Message-Id: <20210305180816.GA488770@LEGION> Signed-off-by: Paolo Bonzini --- arch/x86/kvm/x86.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 868213ca4f98..46b0e52671bb 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -10600,7 +10600,7 @@ void __user * __x86_set_memory_region(struct kvm *kvm, int id, gpa_t gpa, return (void __user *)hva; } else { if (!slot || !slot->npages) - return 0; + return NULL; old_npages = slot->npages; hva = slot->userspace_addr; -- cgit v1.2.3 From b96b0c5de685df82019e16826a282d53d86d112c Mon Sep 17 00:00:00 2001 From: Suzuki K Poulose Date: Fri, 5 Mar 2021 18:52:47 +0000 Subject: KVM: arm64: nvhe: Save the SPE context early The nVHE KVM hyp drains and disables the SPE buffer, before entering the guest, as the EL1&0 translation regime is going to be loaded with that of the guest. But this operation is performed way too late, because : - The owning translation regime of the SPE buffer is transferred to EL2. (MDCR_EL2_E2PB == 0) - The guest Stage1 is loaded. Thus the flush could use the host EL1 virtual address, but use the EL2 translations instead of host EL1, for writing out any cached data. Fix this by moving the SPE buffer handling early enough. The restore path is doing the right thing. Fixes: 014c4c77aad7 ("KVM: arm64: Improve debug register save/restore flow") Cc: stable@vger.kernel.org Cc: Christoffer Dall Cc: Marc Zyngier Cc: Will Deacon Cc: Catalin Marinas Cc: Mark Rutland Cc: Alexandru Elisei Reviewed-by: Alexandru Elisei Signed-off-by: Suzuki K Poulose Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/20210302120345.3102874-1-suzuki.poulose@arm.com Message-Id: <20210305185254.3730990-2-maz@kernel.org> Signed-off-by: Paolo Bonzini --- arch/arm64/include/asm/kvm_hyp.h | 5 +++++ arch/arm64/kvm/hyp/nvhe/debug-sr.c | 12 ++++++++++-- arch/arm64/kvm/hyp/nvhe/switch.c | 11 ++++++++++- 3 files changed, 25 insertions(+), 3 deletions(-) (limited to 'arch') diff --git a/arch/arm64/include/asm/kvm_hyp.h b/arch/arm64/include/asm/kvm_hyp.h index c0450828378b..385bd7dd3d39 100644 --- a/arch/arm64/include/asm/kvm_hyp.h +++ b/arch/arm64/include/asm/kvm_hyp.h @@ -83,6 +83,11 @@ void sysreg_restore_guest_state_vhe(struct kvm_cpu_context *ctxt); void __debug_switch_to_guest(struct kvm_vcpu *vcpu); void __debug_switch_to_host(struct kvm_vcpu *vcpu); +#ifdef __KVM_NVHE_HYPERVISOR__ +void __debug_save_host_buffers_nvhe(struct kvm_vcpu *vcpu); +void __debug_restore_host_buffers_nvhe(struct kvm_vcpu *vcpu); +#endif + void __fpsimd_save_state(struct user_fpsimd_state *fp_regs); void __fpsimd_restore_state(struct user_fpsimd_state *fp_regs); diff --git a/arch/arm64/kvm/hyp/nvhe/debug-sr.c b/arch/arm64/kvm/hyp/nvhe/debug-sr.c index 91a711aa8382..f401724f12ef 100644 --- a/arch/arm64/kvm/hyp/nvhe/debug-sr.c +++ b/arch/arm64/kvm/hyp/nvhe/debug-sr.c @@ -58,16 +58,24 @@ static void __debug_restore_spe(u64 pmscr_el1) write_sysreg_s(pmscr_el1, SYS_PMSCR_EL1); } -void __debug_switch_to_guest(struct kvm_vcpu *vcpu) +void __debug_save_host_buffers_nvhe(struct kvm_vcpu *vcpu) { /* Disable and flush SPE data generation */ __debug_save_spe(&vcpu->arch.host_debug_state.pmscr_el1); +} + +void __debug_switch_to_guest(struct kvm_vcpu *vcpu) +{ __debug_switch_to_guest_common(vcpu); } -void __debug_switch_to_host(struct kvm_vcpu *vcpu) +void __debug_restore_host_buffers_nvhe(struct kvm_vcpu *vcpu) { __debug_restore_spe(vcpu->arch.host_debug_state.pmscr_el1); +} + +void __debug_switch_to_host(struct kvm_vcpu *vcpu) +{ __debug_switch_to_host_common(vcpu); } diff --git a/arch/arm64/kvm/hyp/nvhe/switch.c b/arch/arm64/kvm/hyp/nvhe/switch.c index f3d0e9eca56c..59aa1045fdaf 100644 --- a/arch/arm64/kvm/hyp/nvhe/switch.c +++ b/arch/arm64/kvm/hyp/nvhe/switch.c @@ -192,6 +192,14 @@ int __kvm_vcpu_run(struct kvm_vcpu *vcpu) pmu_switch_needed = __pmu_switch_to_guest(host_ctxt); __sysreg_save_state_nvhe(host_ctxt); + /* + * We must flush and disable the SPE buffer for nVHE, as + * the translation regime(EL1&0) is going to be loaded with + * that of the guest. And we must do this before we change the + * translation regime to EL2 (via MDCR_EL2_E2PB == 0) and + * before we load guest Stage1. + */ + __debug_save_host_buffers_nvhe(vcpu); __adjust_pc(vcpu); @@ -234,11 +242,12 @@ int __kvm_vcpu_run(struct kvm_vcpu *vcpu) if (vcpu->arch.flags & KVM_ARM64_FP_ENABLED) __fpsimd_save_fpexc32(vcpu); + __debug_switch_to_host(vcpu); /* * This must come after restoring the host sysregs, since a non-VHE * system may enable SPE here and make use of the TTBRs. */ - __debug_switch_to_host(vcpu); + __debug_restore_host_buffers_nvhe(vcpu); if (pmu_switch_needed) __pmu_switch_to_host(host_ctxt); -- cgit v1.2.3 From 31948332d5fa392ad933f4a6a10026850649ed76 Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Fri, 5 Mar 2021 18:52:48 +0000 Subject: KVM: arm64: Avoid corrupting vCPU context register in guest exit Commit 7db21530479f ("KVM: arm64: Restore hyp when panicking in guest context") tracks the currently running vCPU, clearing the pointer to NULL on exit from a guest. Unfortunately, the use of 'set_loaded_vcpu' clobbers x1 to point at the kvm_hyp_ctxt instead of the vCPU context, causing the subsequent RAS code to go off into the weeds when it saves the DISR assuming that the CPU context is embedded in a struct vCPU. Leave x1 alone and use x3 as a temporary register instead when clearing the vCPU on the guest exit path. Cc: Marc Zyngier Cc: Andrew Scull Cc: Fixes: 7db21530479f ("KVM: arm64: Restore hyp when panicking in guest context") Suggested-by: Quentin Perret Signed-off-by: Will Deacon Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/20210226181211.14542-1-will@kernel.org Message-Id: <20210305185254.3730990-3-maz@kernel.org> Signed-off-by: Paolo Bonzini --- arch/arm64/kvm/hyp/entry.S | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/arm64/kvm/hyp/entry.S b/arch/arm64/kvm/hyp/entry.S index b0afad7a99c6..0c66a1d408fd 100644 --- a/arch/arm64/kvm/hyp/entry.S +++ b/arch/arm64/kvm/hyp/entry.S @@ -146,7 +146,7 @@ SYM_INNER_LABEL(__guest_exit, SYM_L_GLOBAL) // Now restore the hyp regs restore_callee_saved_regs x2 - set_loaded_vcpu xzr, x1, x2 + set_loaded_vcpu xzr, x2, x3 alternative_if ARM64_HAS_RAS_EXTN // If we have the RAS extensions we can consume a pending error -- cgit v1.2.3 From c4b000c3928d4f20acef79dccf3a65ae3795e0b0 Mon Sep 17 00:00:00 2001 From: Andrew Scull Date: Fri, 5 Mar 2021 18:52:49 +0000 Subject: KVM: arm64: Fix nVHE hyp panic host context restore When panicking from the nVHE hyp and restoring the host context, x29 is expected to hold a pointer to the host context. This wasn't being done so fix it to make sure there's a valid pointer the host context being used. Rather than passing a boolean indicating whether or not the host context should be restored, instead pass the pointer to the host context. NULL is passed to indicate that no context should be restored. Fixes: a2e102e20fd6 ("KVM: arm64: nVHE: Handle hyp panics") Cc: stable@vger.kernel.org Signed-off-by: Andrew Scull [maz: partial rewrite to fit 5.12-rc1] Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/20210219122406.1337626-1-ascull@google.com Message-Id: <20210305185254.3730990-4-maz@kernel.org> Signed-off-by: Paolo Bonzini --- arch/arm64/include/asm/kvm_hyp.h | 3 ++- arch/arm64/kvm/hyp/nvhe/host.S | 15 ++++++++------- arch/arm64/kvm/hyp/nvhe/switch.c | 3 +-- 3 files changed, 11 insertions(+), 10 deletions(-) (limited to 'arch') diff --git a/arch/arm64/include/asm/kvm_hyp.h b/arch/arm64/include/asm/kvm_hyp.h index 385bd7dd3d39..32ae676236b6 100644 --- a/arch/arm64/include/asm/kvm_hyp.h +++ b/arch/arm64/include/asm/kvm_hyp.h @@ -102,7 +102,8 @@ bool kvm_host_psci_handler(struct kvm_cpu_context *host_ctxt); void __noreturn hyp_panic(void); #ifdef __KVM_NVHE_HYPERVISOR__ -void __noreturn __hyp_do_panic(bool restore_host, u64 spsr, u64 elr, u64 par); +void __noreturn __hyp_do_panic(struct kvm_cpu_context *host_ctxt, u64 spsr, + u64 elr, u64 par); #endif #endif /* __ARM64_KVM_HYP_H__ */ diff --git a/arch/arm64/kvm/hyp/nvhe/host.S b/arch/arm64/kvm/hyp/nvhe/host.S index 6585a7cbbc56..5d94584840cc 100644 --- a/arch/arm64/kvm/hyp/nvhe/host.S +++ b/arch/arm64/kvm/hyp/nvhe/host.S @@ -71,7 +71,8 @@ SYM_FUNC_START(__host_enter) SYM_FUNC_END(__host_enter) /* - * void __noreturn __hyp_do_panic(bool restore_host, u64 spsr, u64 elr, u64 par); + * void __noreturn __hyp_do_panic(struct kvm_cpu_context *host_ctxt, u64 spsr, + * u64 elr, u64 par); */ SYM_FUNC_START(__hyp_do_panic) /* Prepare and exit to the host's panic funciton. */ @@ -82,9 +83,11 @@ SYM_FUNC_START(__hyp_do_panic) hyp_kimg_va lr, x6 msr elr_el2, lr - /* Set the panic format string. Use the, now free, LR as scratch. */ - ldr lr, =__hyp_panic_string - hyp_kimg_va lr, x6 + mov x29, x0 + + /* Load the format string into x0 and arguments into x1-7 */ + ldr x0, =__hyp_panic_string + hyp_kimg_va x0, x6 /* Load the format arguments into x1-7. */ mov x6, x3 @@ -94,9 +97,7 @@ SYM_FUNC_START(__hyp_do_panic) mrs x5, hpfar_el2 /* Enter the host, conditionally restoring the host context. */ - cmp x0, xzr - mov x0, lr - b.eq __host_enter_without_restoring + cbz x29, __host_enter_without_restoring b __host_enter_for_panic SYM_FUNC_END(__hyp_do_panic) diff --git a/arch/arm64/kvm/hyp/nvhe/switch.c b/arch/arm64/kvm/hyp/nvhe/switch.c index 59aa1045fdaf..68ab6b4d5141 100644 --- a/arch/arm64/kvm/hyp/nvhe/switch.c +++ b/arch/arm64/kvm/hyp/nvhe/switch.c @@ -266,7 +266,6 @@ void __noreturn hyp_panic(void) u64 spsr = read_sysreg_el2(SYS_SPSR); u64 elr = read_sysreg_el2(SYS_ELR); u64 par = read_sysreg_par(); - bool restore_host = true; struct kvm_cpu_context *host_ctxt; struct kvm_vcpu *vcpu; @@ -280,7 +279,7 @@ void __noreturn hyp_panic(void) __sysreg_restore_state_nvhe(host_ctxt); } - __hyp_do_panic(restore_host, spsr, elr, par); + __hyp_do_panic(host_ctxt, spsr, elr, par); unreachable(); } -- cgit v1.2.3 From 6b5b368fccd7109b052e45af8ba1464c8d140a49 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Fri, 5 Mar 2021 18:52:50 +0000 Subject: KVM: arm64: Turn kvm_arm_support_pmu_v3() into a static key We currently find out about the presence of a HW PMU (or the handling of that PMU by perf, which amounts to the same thing) in a fairly roundabout way, by checking the number of counters available to perf. That's good enough for now, but we will soon need to find about about that on paths where perf is out of reach (in the world switch). Instead, let's turn kvm_arm_support_pmu_v3() into a static key. Signed-off-by: Marc Zyngier Reviewed-by: Alexandru Elisei Link: https://lore.kernel.org/r/20210209114844.3278746-2-maz@kernel.org Message-Id: <20210305185254.3730990-5-maz@kernel.org> Signed-off-by: Paolo Bonzini --- arch/arm64/kvm/perf.c | 10 ++++++++++ arch/arm64/kvm/pmu-emul.c | 10 ---------- include/kvm/arm_pmu.h | 9 +++++++-- 3 files changed, 17 insertions(+), 12 deletions(-) (limited to 'arch') diff --git a/arch/arm64/kvm/perf.c b/arch/arm64/kvm/perf.c index d45b8b9a4415..739164324afe 100644 --- a/arch/arm64/kvm/perf.c +++ b/arch/arm64/kvm/perf.c @@ -11,6 +11,8 @@ #include +DEFINE_STATIC_KEY_FALSE(kvm_arm_pmu_available); + static int kvm_is_in_guest(void) { return kvm_get_running_vcpu() != NULL; @@ -48,6 +50,14 @@ static struct perf_guest_info_callbacks kvm_guest_cbs = { int kvm_perf_init(void) { + /* + * Check if HW_PERF_EVENTS are supported by checking the number of + * hardware performance counters. This could ensure the presence of + * a physical PMU and CONFIG_PERF_EVENT is selected. + */ + if (IS_ENABLED(CONFIG_ARM_PMU) && perf_num_counters() > 0) + static_branch_enable(&kvm_arm_pmu_available); + return perf_register_guest_info_callbacks(&kvm_guest_cbs); } diff --git a/arch/arm64/kvm/pmu-emul.c b/arch/arm64/kvm/pmu-emul.c index e9ec08b0b070..e32c6e139a09 100644 --- a/arch/arm64/kvm/pmu-emul.c +++ b/arch/arm64/kvm/pmu-emul.c @@ -823,16 +823,6 @@ u64 kvm_pmu_get_pmceid(struct kvm_vcpu *vcpu, bool pmceid1) return val & mask; } -bool kvm_arm_support_pmu_v3(void) -{ - /* - * Check if HW_PERF_EVENTS are supported by checking the number of - * hardware performance counters. This could ensure the presence of - * a physical PMU and CONFIG_PERF_EVENT is selected. - */ - return (perf_num_counters() > 0); -} - int kvm_arm_pmu_v3_enable(struct kvm_vcpu *vcpu) { if (!kvm_vcpu_has_pmu(vcpu)) diff --git a/include/kvm/arm_pmu.h b/include/kvm/arm_pmu.h index 8dcb3e1477bc..6fd3cda608e4 100644 --- a/include/kvm/arm_pmu.h +++ b/include/kvm/arm_pmu.h @@ -13,6 +13,13 @@ #define ARMV8_PMU_CYCLE_IDX (ARMV8_PMU_MAX_COUNTERS - 1) #define ARMV8_PMU_MAX_COUNTER_PAIRS ((ARMV8_PMU_MAX_COUNTERS + 1) >> 1) +DECLARE_STATIC_KEY_FALSE(kvm_arm_pmu_available); + +static __always_inline bool kvm_arm_support_pmu_v3(void) +{ + return static_branch_likely(&kvm_arm_pmu_available); +} + #ifdef CONFIG_HW_PERF_EVENTS struct kvm_pmc { @@ -47,7 +54,6 @@ void kvm_pmu_software_increment(struct kvm_vcpu *vcpu, u64 val); void kvm_pmu_handle_pmcr(struct kvm_vcpu *vcpu, u64 val); void kvm_pmu_set_counter_event_type(struct kvm_vcpu *vcpu, u64 data, u64 select_idx); -bool kvm_arm_support_pmu_v3(void); int kvm_arm_pmu_v3_set_attr(struct kvm_vcpu *vcpu, struct kvm_device_attr *attr); int kvm_arm_pmu_v3_get_attr(struct kvm_vcpu *vcpu, @@ -87,7 +93,6 @@ static inline void kvm_pmu_software_increment(struct kvm_vcpu *vcpu, u64 val) {} static inline void kvm_pmu_handle_pmcr(struct kvm_vcpu *vcpu, u64 val) {} static inline void kvm_pmu_set_counter_event_type(struct kvm_vcpu *vcpu, u64 data, u64 select_idx) {} -static inline bool kvm_arm_support_pmu_v3(void) { return false; } static inline int kvm_arm_pmu_v3_set_attr(struct kvm_vcpu *vcpu, struct kvm_device_attr *attr) { -- cgit v1.2.3 From f27647b588c13647a60074b5a8dd39a86d919a1d Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Fri, 5 Mar 2021 18:52:51 +0000 Subject: KVM: arm64: Don't access PMSELR_EL0/PMUSERENR_EL0 when no PMU is available When running under a nesting hypervisor, it isn't guaranteed that the virtual HW will include a PMU. In which case, let's not try to access the PMU registers in the world switch, as that'd be deadly. Reported-by: Andre Przywara Signed-off-by: Marc Zyngier Reviewed-by: Alexandru Elisei Link: https://lore.kernel.org/r/20210209114844.3278746-3-maz@kernel.org Message-Id: <20210305185254.3730990-6-maz@kernel.org> Signed-off-by: Paolo Bonzini --- arch/arm64/kernel/image-vars.h | 3 +++ arch/arm64/kvm/hyp/include/hyp/switch.h | 9 ++++++--- 2 files changed, 9 insertions(+), 3 deletions(-) (limited to 'arch') diff --git a/arch/arm64/kernel/image-vars.h b/arch/arm64/kernel/image-vars.h index 23f1a557bd9f..5aa9ed1e9ec6 100644 --- a/arch/arm64/kernel/image-vars.h +++ b/arch/arm64/kernel/image-vars.h @@ -101,6 +101,9 @@ KVM_NVHE_ALIAS(__stop___kvm_ex_table); /* Array containing bases of nVHE per-CPU memory regions. */ KVM_NVHE_ALIAS(kvm_arm_hyp_percpu_base); +/* PMU available static key */ +KVM_NVHE_ALIAS(kvm_arm_pmu_available); + #endif /* CONFIG_KVM */ #endif /* __ARM64_KERNEL_IMAGE_VARS_H */ diff --git a/arch/arm64/kvm/hyp/include/hyp/switch.h b/arch/arm64/kvm/hyp/include/hyp/switch.h index 54f4860cd87c..6c1f51f25eb3 100644 --- a/arch/arm64/kvm/hyp/include/hyp/switch.h +++ b/arch/arm64/kvm/hyp/include/hyp/switch.h @@ -90,15 +90,18 @@ static inline void __activate_traps_common(struct kvm_vcpu *vcpu) * counter, which could make a PMXEVCNTR_EL0 access UNDEF at * EL1 instead of being trapped to EL2. */ - write_sysreg(0, pmselr_el0); - write_sysreg(ARMV8_PMU_USERENR_MASK, pmuserenr_el0); + if (kvm_arm_support_pmu_v3()) { + write_sysreg(0, pmselr_el0); + write_sysreg(ARMV8_PMU_USERENR_MASK, pmuserenr_el0); + } write_sysreg(vcpu->arch.mdcr_el2, mdcr_el2); } static inline void __deactivate_traps_common(void) { write_sysreg(0, hstr_el2); - write_sysreg(0, pmuserenr_el0); + if (kvm_arm_support_pmu_v3()) + write_sysreg(0, pmuserenr_el0); } static inline void ___activate_traps(struct kvm_vcpu *vcpu) -- cgit v1.2.3 From b9d699e2694d032aa8ecc15141f698ccb050dc95 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Fri, 5 Mar 2021 18:52:52 +0000 Subject: KVM: arm64: Rename __vgic_v3_get_ich_vtr_el2() to __vgic_v3_get_gic_config() As we are about to report a bit more information to the rest of the kernel, rename __vgic_v3_get_ich_vtr_el2() to the more explicit __vgic_v3_get_gic_config(). No functional change. Tested-by: Shameer Kolothum Signed-off-by: Marc Zyngier Message-Id: <20210305185254.3730990-7-maz@kernel.org> Signed-off-by: Paolo Bonzini --- arch/arm64/include/asm/kvm_asm.h | 4 ++-- arch/arm64/kvm/hyp/nvhe/hyp-main.c | 6 +++--- arch/arm64/kvm/hyp/vgic-v3-sr.c | 7 ++++++- arch/arm64/kvm/vgic/vgic-v3.c | 4 +++- 4 files changed, 14 insertions(+), 7 deletions(-) (limited to 'arch') diff --git a/arch/arm64/include/asm/kvm_asm.h b/arch/arm64/include/asm/kvm_asm.h index 22d933e9b59e..9c0e396dd03f 100644 --- a/arch/arm64/include/asm/kvm_asm.h +++ b/arch/arm64/include/asm/kvm_asm.h @@ -50,7 +50,7 @@ #define __KVM_HOST_SMCCC_FUNC___kvm_tlb_flush_local_vmid 5 #define __KVM_HOST_SMCCC_FUNC___kvm_timer_set_cntvoff 6 #define __KVM_HOST_SMCCC_FUNC___kvm_enable_ssbs 7 -#define __KVM_HOST_SMCCC_FUNC___vgic_v3_get_ich_vtr_el2 8 +#define __KVM_HOST_SMCCC_FUNC___vgic_v3_get_gic_config 8 #define __KVM_HOST_SMCCC_FUNC___vgic_v3_read_vmcr 9 #define __KVM_HOST_SMCCC_FUNC___vgic_v3_write_vmcr 10 #define __KVM_HOST_SMCCC_FUNC___vgic_v3_init_lrs 11 @@ -192,7 +192,7 @@ extern void __kvm_timer_set_cntvoff(u64 cntvoff); extern int __kvm_vcpu_run(struct kvm_vcpu *vcpu); -extern u64 __vgic_v3_get_ich_vtr_el2(void); +extern u64 __vgic_v3_get_gic_config(void); extern u64 __vgic_v3_read_vmcr(void); extern void __vgic_v3_write_vmcr(u32 vmcr); extern void __vgic_v3_init_lrs(void); diff --git a/arch/arm64/kvm/hyp/nvhe/hyp-main.c b/arch/arm64/kvm/hyp/nvhe/hyp-main.c index f012f8665ecc..8f129968204e 100644 --- a/arch/arm64/kvm/hyp/nvhe/hyp-main.c +++ b/arch/arm64/kvm/hyp/nvhe/hyp-main.c @@ -67,9 +67,9 @@ static void handle___kvm_enable_ssbs(struct kvm_cpu_context *host_ctxt) write_sysreg_el2(tmp, SYS_SCTLR); } -static void handle___vgic_v3_get_ich_vtr_el2(struct kvm_cpu_context *host_ctxt) +static void handle___vgic_v3_get_gic_config(struct kvm_cpu_context *host_ctxt) { - cpu_reg(host_ctxt, 1) = __vgic_v3_get_ich_vtr_el2(); + cpu_reg(host_ctxt, 1) = __vgic_v3_get_gic_config(); } static void handle___vgic_v3_read_vmcr(struct kvm_cpu_context *host_ctxt) @@ -118,7 +118,7 @@ static const hcall_t host_hcall[] = { HANDLE_FUNC(__kvm_tlb_flush_local_vmid), HANDLE_FUNC(__kvm_timer_set_cntvoff), HANDLE_FUNC(__kvm_enable_ssbs), - HANDLE_FUNC(__vgic_v3_get_ich_vtr_el2), + HANDLE_FUNC(__vgic_v3_get_gic_config), HANDLE_FUNC(__vgic_v3_read_vmcr), HANDLE_FUNC(__vgic_v3_write_vmcr), HANDLE_FUNC(__vgic_v3_init_lrs), diff --git a/arch/arm64/kvm/hyp/vgic-v3-sr.c b/arch/arm64/kvm/hyp/vgic-v3-sr.c index 80406f463c28..005daa0c9dd7 100644 --- a/arch/arm64/kvm/hyp/vgic-v3-sr.c +++ b/arch/arm64/kvm/hyp/vgic-v3-sr.c @@ -405,7 +405,12 @@ void __vgic_v3_init_lrs(void) __gic_v3_set_lr(0, i); } -u64 __vgic_v3_get_ich_vtr_el2(void) +/* + * Return the GIC CPU configuration: + * - [31:0] ICH_VTR_EL2 + * - [63:32] RES0 + */ +u64 __vgic_v3_get_gic_config(void) { return read_gicreg(ICH_VTR_EL2); } diff --git a/arch/arm64/kvm/vgic/vgic-v3.c b/arch/arm64/kvm/vgic/vgic-v3.c index 52915b342351..c3e6c3fd333b 100644 --- a/arch/arm64/kvm/vgic/vgic-v3.c +++ b/arch/arm64/kvm/vgic/vgic-v3.c @@ -574,9 +574,11 @@ early_param("kvm-arm.vgic_v4_enable", early_gicv4_enable); */ int vgic_v3_probe(const struct gic_kvm_info *info) { - u32 ich_vtr_el2 = kvm_call_hyp_ret(__vgic_v3_get_ich_vtr_el2); + u64 ich_vtr_el2 = kvm_call_hyp_ret(__vgic_v3_get_gic_config); int ret; + ich_vtr_el2 = (u32)ich_vtr_el2; + /* * The ListRegs field is 5 bits, but there is an architectural * maximum of 16 list registers. Just ignore bit 4... -- cgit v1.2.3 From 9739f6ef053f104a997165701c6e15582c4307ee Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Fri, 5 Mar 2021 18:52:53 +0000 Subject: KVM: arm64: Workaround firmware wrongly advertising GICv2-on-v3 compatibility It looks like we have broken firmware out there that wrongly advertises a GICv2 compatibility interface, despite the CPUs not being able to deal with it. To work around this, check that the CPU initialising KVM is actually able to switch to MMIO instead of system registers, and use that as a precondition to enable GICv2 compatibility in KVM. Note that the detection happens on a single CPU. If the firmware is lying *and* that the CPUs are asymetric, all hope is lost anyway. Reported-by: Shameerali Kolothum Thodi Tested-by: Shameer Kolothum Signed-off-by: Marc Zyngier Message-Id: <20210305185254.3730990-8-maz@kernel.org> Signed-off-by: Paolo Bonzini --- arch/arm64/kvm/hyp/vgic-v3-sr.c | 35 +++++++++++++++++++++++++++++++++-- arch/arm64/kvm/vgic/vgic-v3.c | 8 ++++++-- 2 files changed, 39 insertions(+), 4 deletions(-) (limited to 'arch') diff --git a/arch/arm64/kvm/hyp/vgic-v3-sr.c b/arch/arm64/kvm/hyp/vgic-v3-sr.c index 005daa0c9dd7..ee3682b9873c 100644 --- a/arch/arm64/kvm/hyp/vgic-v3-sr.c +++ b/arch/arm64/kvm/hyp/vgic-v3-sr.c @@ -408,11 +408,42 @@ void __vgic_v3_init_lrs(void) /* * Return the GIC CPU configuration: * - [31:0] ICH_VTR_EL2 - * - [63:32] RES0 + * - [62:32] RES0 + * - [63] MMIO (GICv2) capable */ u64 __vgic_v3_get_gic_config(void) { - return read_gicreg(ICH_VTR_EL2); + u64 val, sre = read_gicreg(ICC_SRE_EL1); + unsigned long flags = 0; + + /* + * To check whether we have a MMIO-based (GICv2 compatible) + * CPU interface, we need to disable the system register + * view. To do that safely, we have to prevent any interrupt + * from firing (which would be deadly). + * + * Note that this only makes sense on VHE, as interrupts are + * already masked for nVHE as part of the exception entry to + * EL2. + */ + if (has_vhe()) + flags = local_daif_save(); + + write_gicreg(0, ICC_SRE_EL1); + isb(); + + val = read_gicreg(ICC_SRE_EL1); + + write_gicreg(sre, ICC_SRE_EL1); + isb(); + + if (has_vhe()) + local_daif_restore(flags); + + val = (val & ICC_SRE_EL1_SRE) ? 0 : (1ULL << 63); + val |= read_gicreg(ICH_VTR_EL2); + + return val; } u64 __vgic_v3_read_vmcr(void) diff --git a/arch/arm64/kvm/vgic/vgic-v3.c b/arch/arm64/kvm/vgic/vgic-v3.c index c3e6c3fd333b..6f530925a231 100644 --- a/arch/arm64/kvm/vgic/vgic-v3.c +++ b/arch/arm64/kvm/vgic/vgic-v3.c @@ -575,8 +575,10 @@ early_param("kvm-arm.vgic_v4_enable", early_gicv4_enable); int vgic_v3_probe(const struct gic_kvm_info *info) { u64 ich_vtr_el2 = kvm_call_hyp_ret(__vgic_v3_get_gic_config); + bool has_v2; int ret; + has_v2 = ich_vtr_el2 >> 63; ich_vtr_el2 = (u32)ich_vtr_el2; /* @@ -596,13 +598,15 @@ int vgic_v3_probe(const struct gic_kvm_info *info) gicv4_enable ? "en" : "dis"); } + kvm_vgic_global_state.vcpu_base = 0; + if (!info->vcpu.start) { kvm_info("GICv3: no GICV resource entry\n"); - kvm_vgic_global_state.vcpu_base = 0; + } else if (!has_v2) { + pr_warn(FW_BUG "CPU interface incapable of MMIO access\n"); } else if (!PAGE_ALIGNED(info->vcpu.start)) { pr_warn("GICV physical address 0x%llx not page aligned\n", (unsigned long long)info->vcpu.start); - kvm_vgic_global_state.vcpu_base = 0; } else { kvm_vgic_global_state.vcpu_base = info->vcpu.start; kvm_vgic_global_state.can_emulate_gicv2 = true; -- cgit v1.2.3 From 357ad203d45c0f9d76a8feadbd5a1c5d460c638b Mon Sep 17 00:00:00 2001 From: Jia He Date: Fri, 5 Mar 2021 18:52:54 +0000 Subject: KVM: arm64: Fix range alignment when walking page tables When walking the page tables at a given level, and if the start address for the range isn't aligned for that level, we propagate the misalignment on each iteration at that level. This results in the walker ignoring a number of entries (depending on the original misalignment) on each subsequent iteration. Properly aligning the address before the next iteration addresses this issue. Cc: stable@vger.kernel.org Reported-by: Howard Zhang Acked-by: Will Deacon Signed-off-by: Jia He Fixes: b1e57de62cfb ("KVM: arm64: Add stand-alone page-table walker infrastructure") [maz: rewrite commit message] Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/20210303024225.2591-1-justin.he@arm.com Message-Id: <20210305185254.3730990-9-maz@kernel.org> Signed-off-by: Paolo Bonzini --- arch/arm64/kvm/hyp/pgtable.c | 1 + 1 file changed, 1 insertion(+) (limited to 'arch') diff --git a/arch/arm64/kvm/hyp/pgtable.c b/arch/arm64/kvm/hyp/pgtable.c index 4d177ce1d536..926fc07074f5 100644 --- a/arch/arm64/kvm/hyp/pgtable.c +++ b/arch/arm64/kvm/hyp/pgtable.c @@ -223,6 +223,7 @@ static inline int __kvm_pgtable_visit(struct kvm_pgtable_walk_data *data, goto out; if (!table) { + data->addr = ALIGN_DOWN(data->addr, kvm_granule_size(level)); data->addr += kvm_granule_size(level); goto out; } -- cgit v1.2.3 From afbef30149587ad46f4780b1e0cc5e219745ce90 Mon Sep 17 00:00:00 2001 From: Kan Liang Date: Mon, 30 Nov 2020 11:38:41 -0800 Subject: perf/x86/intel: Set PERF_ATTACH_SCHED_CB for large PEBS and LBR To supply a PID/TID for large PEBS, it requires flushing the PEBS buffer in a context switch. For normal LBRs, a context switch can flip the address space and LBR entries are not tagged with an identifier, we need to wipe the LBR, even for per-cpu events. For LBR callstack, save/restore the stack is required during a context switch. Set PERF_ATTACH_SCHED_CB for the event with large PEBS & LBR. Fixes: 9c964efa4330 ("perf/x86/intel: Drain the PEBS buffer during context switches") Signed-off-by: Kan Liang Signed-off-by: Peter Zijlstra (Intel) Signed-off-by: Ingo Molnar Link: https://lkml.kernel.org/r/20201130193842.10569-2-kan.liang@linux.intel.com --- arch/x86/events/intel/core.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/x86/events/intel/core.c b/arch/x86/events/intel/core.c index 5bac48d5c18e..7bbb5bb98d8c 100644 --- a/arch/x86/events/intel/core.c +++ b/arch/x86/events/intel/core.c @@ -3662,8 +3662,10 @@ static int intel_pmu_hw_config(struct perf_event *event) if (!(event->attr.freq || (event->attr.wakeup_events && !event->attr.watermark))) { event->hw.flags |= PERF_X86_EVENT_AUTO_RELOAD; if (!(event->attr.sample_type & - ~intel_pmu_large_pebs_flags(event))) + ~intel_pmu_large_pebs_flags(event))) { event->hw.flags |= PERF_X86_EVENT_LARGE_PEBS; + event->attach_state |= PERF_ATTACH_SCHED_CB; + } } if (x86_pmu.pebs_aliases) x86_pmu.pebs_aliases(event); @@ -3676,6 +3678,7 @@ static int intel_pmu_hw_config(struct perf_event *event) ret = intel_pmu_setup_lbr_filter(event); if (ret) return ret; + event->attach_state |= PERF_ATTACH_SCHED_CB; /* * BTS is set up earlier in this path, so don't account twice -- cgit v1.2.3 From e504e74cc3a2c092b05577ce3e8e013fae7d94e6 Mon Sep 17 00:00:00 2001 From: Josh Poimboeuf Date: Fri, 5 Feb 2021 08:24:02 -0600 Subject: x86/unwind/orc: Disable KASAN checking in the ORC unwinder, part 2 KASAN reserves "redzone" areas between stack frames in order to detect stack overruns. A read or write to such an area triggers a KASAN "stack-out-of-bounds" BUG. Normally, the ORC unwinder stays in-bounds and doesn't access the redzone. But sometimes it can't find ORC metadata for a given instruction. This can happen for code which is missing ORC metadata, or for generated code. In such cases, the unwinder attempts to fall back to frame pointers, as a best-effort type thing. This fallback often works, but when it doesn't, the unwinder can get confused and go off into the weeds into the KASAN redzone, triggering the aforementioned KASAN BUG. But in this case, the unwinder's confusion is actually harmless and working as designed. It already has checks in place to prevent off-stack accesses, but those checks get short-circuited by the KASAN BUG. And a BUG is a lot more disruptive than a harmless unwinder warning. Disable the KASAN checks by using READ_ONCE_NOCHECK() for all stack accesses. This finishes the job started by commit 881125bfe65b ("x86/unwind: Disable KASAN checking in the ORC unwinder"), which only partially fixed the issue. Fixes: ee9f8fce9964 ("x86/unwind: Add the ORC unwinder") Reported-by: Ivan Babrou Signed-off-by: Josh Poimboeuf Signed-off-by: Peter Zijlstra (Intel) Signed-off-by: Borislav Petkov Reviewed-by: Steven Rostedt (VMware) Tested-by: Ivan Babrou Cc: stable@kernel.org Link: https://lkml.kernel.org/r/9583327904ebbbeda399eca9c56d6c7085ac20fe.1612534649.git.jpoimboe@redhat.com --- arch/x86/kernel/unwind_orc.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) (limited to 'arch') diff --git a/arch/x86/kernel/unwind_orc.c b/arch/x86/kernel/unwind_orc.c index 2a1d47f47eee..1bcc14c870ab 100644 --- a/arch/x86/kernel/unwind_orc.c +++ b/arch/x86/kernel/unwind_orc.c @@ -367,8 +367,8 @@ static bool deref_stack_regs(struct unwind_state *state, unsigned long addr, if (!stack_access_ok(state, addr, sizeof(struct pt_regs))) return false; - *ip = regs->ip; - *sp = regs->sp; + *ip = READ_ONCE_NOCHECK(regs->ip); + *sp = READ_ONCE_NOCHECK(regs->sp); return true; } @@ -380,8 +380,8 @@ static bool deref_stack_iret_regs(struct unwind_state *state, unsigned long addr if (!stack_access_ok(state, addr, IRET_FRAME_SIZE)) return false; - *ip = regs->ip; - *sp = regs->sp; + *ip = READ_ONCE_NOCHECK(regs->ip); + *sp = READ_ONCE_NOCHECK(regs->sp); return true; } @@ -402,12 +402,12 @@ static bool get_reg(struct unwind_state *state, unsigned int reg_off, return false; if (state->full_regs) { - *val = ((unsigned long *)state->regs)[reg]; + *val = READ_ONCE_NOCHECK(((unsigned long *)state->regs)[reg]); return true; } if (state->prev_regs) { - *val = ((unsigned long *)state->prev_regs)[reg]; + *val = READ_ONCE_NOCHECK(((unsigned long *)state->prev_regs)[reg]); return true; } -- cgit v1.2.3 From b59cc97674c947861783ca92b9a6e7d043adba96 Mon Sep 17 00:00:00 2001 From: Josh Poimboeuf Date: Fri, 5 Feb 2021 08:24:03 -0600 Subject: x86/unwind/orc: Silence warnings caused by missing ORC data The ORC unwinder attempts to fall back to frame pointers when ORC data is missing for a given instruction. It sets state->error, but then tries to keep going as a best-effort type of thing. That may result in further warnings if the unwinder gets lost. Until we have some way to register generated code with the unwinder, missing ORC will be expected, and occasionally going off the rails will also be expected. So don't warn about it. Signed-off-by: Josh Poimboeuf Signed-off-by: Peter Zijlstra (Intel) Signed-off-by: Borislav Petkov Tested-by: Ivan Babrou Link: https://lkml.kernel.org/r/06d02c4bbb220bd31668db579278b0352538efbb.1612534649.git.jpoimboe@redhat.com --- arch/x86/kernel/unwind_orc.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/x86/kernel/unwind_orc.c b/arch/x86/kernel/unwind_orc.c index 1bcc14c870ab..a1202536fc57 100644 --- a/arch/x86/kernel/unwind_orc.c +++ b/arch/x86/kernel/unwind_orc.c @@ -13,7 +13,7 @@ #define orc_warn_current(args...) \ ({ \ - if (state->task == current) \ + if (state->task == current && !state->error) \ orc_warn(args); \ }) -- cgit v1.2.3 From 5d5675df792ff67e74a500c4c94db0f99e6a10ef Mon Sep 17 00:00:00 2001 From: Andy Lutomirski Date: Thu, 4 Mar 2021 11:05:54 -0800 Subject: x86/entry: Fix entry/exit mismatch on failed fast 32-bit syscalls On a 32-bit fast syscall that fails to read its arguments from user memory, the kernel currently does syscall exit work but not syscall entry work. This confuses audit and ptrace. For example: $ ./tools/testing/selftests/x86/syscall_arg_fault_32 ... strace: pid 264258: entering, ptrace_syscall_info.op == 2 ... This is a minimal fix intended for ease of backporting. A more complete cleanup is coming. Fixes: 0b085e68f407 ("x86/entry: Consolidate 32/64 bit syscall entry") Signed-off-by: Andy Lutomirski Signed-off-by: Thomas Gleixner Signed-off-by: Borislav Petkov Cc: stable@vger.kernel.org Link: https://lore.kernel.org/r/8c82296ddf803b91f8d1e5eac89e5803ba54ab0e.1614884673.git.luto@kernel.org --- arch/x86/entry/common.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/x86/entry/common.c b/arch/x86/entry/common.c index a2433ae8a65e..4efd39aacb9f 100644 --- a/arch/x86/entry/common.c +++ b/arch/x86/entry/common.c @@ -128,7 +128,8 @@ static noinstr bool __do_fast_syscall_32(struct pt_regs *regs) regs->ax = -EFAULT; instrumentation_end(); - syscall_exit_to_user_mode(regs); + local_irq_disable(); + irqentry_exit_to_user_mode(regs); return false; } -- cgit v1.2.3 From a65a802aadba072ca7514fc0c301fd7fdc6fc6cb Mon Sep 17 00:00:00 2001 From: Geert Uytterhoeven Date: Fri, 5 Mar 2021 09:41:22 +0100 Subject: m68k: Fix virt_addr_valid() W=1 compiler warnings If CONFIG_DEBUG_SG=y, and CONFIG_MMU=y: include/linux/scatterlist.h: In function ‘sg_set_buf’: arch/m68k/include/asm/page_mm.h:174:49: warning: ordered comparison of pointer with null pointer [-Wextra] 174 | #define virt_addr_valid(kaddr) ((void *)(kaddr) >= (void *)PAGE_OFFSET && (void *)(kaddr) < high_memory) | ^~ or CONFIG_MMU=n: include/linux/scatterlist.h: In function ‘sg_set_buf’: arch/m68k/include/asm/page_no.h:33:50: warning: ordered comparison of pointer with null pointer [-Wextra] 33 | #define virt_addr_valid(kaddr) (((void *)(kaddr) >= (void *)PAGE_OFFSET) && \ | ^~ Fix this by doing the comparison in the "unsigned long" instead of the "void *" domain. Note that for now this is only seen when compiling btrfs, due to commit e9aa7c285d20a69c ("btrfs: enable W=1 checks for btrfs"), but as people are doing more W=1 compile testing, it will start to show up elsewhere, too. Signed-off-by: Geert Uytterhoeven Acked-by: Greg Ungerer Link: https://lore.kernel.org/r/20210305084122.4118826-1-geert@linux-m68k.org --- arch/m68k/include/asm/page_mm.h | 2 +- arch/m68k/include/asm/page_no.h | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) (limited to 'arch') diff --git a/arch/m68k/include/asm/page_mm.h b/arch/m68k/include/asm/page_mm.h index 7f5912af2a52..9e8f0cc30a2c 100644 --- a/arch/m68k/include/asm/page_mm.h +++ b/arch/m68k/include/asm/page_mm.h @@ -171,7 +171,7 @@ static inline __attribute_const__ int __virt_to_node_shift(void) #include #endif -#define virt_addr_valid(kaddr) ((void *)(kaddr) >= (void *)PAGE_OFFSET && (void *)(kaddr) < high_memory) +#define virt_addr_valid(kaddr) ((unsigned long)(kaddr) >= PAGE_OFFSET && (unsigned long)(kaddr) < (unsigned long)high_memory) #define pfn_valid(pfn) virt_addr_valid(pfn_to_virt(pfn)) #endif /* __ASSEMBLY__ */ diff --git a/arch/m68k/include/asm/page_no.h b/arch/m68k/include/asm/page_no.h index 6bbe52025de3..8d0f862ee9d7 100644 --- a/arch/m68k/include/asm/page_no.h +++ b/arch/m68k/include/asm/page_no.h @@ -30,8 +30,8 @@ extern unsigned long memory_end; #define page_to_pfn(page) virt_to_pfn(page_to_virt(page)) #define pfn_valid(pfn) ((pfn) < max_mapnr) -#define virt_addr_valid(kaddr) (((void *)(kaddr) >= (void *)PAGE_OFFSET) && \ - ((void *)(kaddr) < (void *)memory_end)) +#define virt_addr_valid(kaddr) (((unsigned long)(kaddr) >= PAGE_OFFSET) && \ + ((unsigned long)(kaddr) < memory_end)) #endif /* __ASSEMBLY__ */ -- cgit v1.2.3 From eba8e1af5a61e61e5d77e1dfe1e8e20735ebc9c6 Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Tue, 23 Feb 2021 19:52:20 +0100 Subject: s390/time,idle: get rid of unsigned long long Get rid of unsigned long long, and use unsigned long instead everywhere. The usage of unsigned long long is a leftover from 31 bit kernel support. Signed-off-by: Heiko Carstens --- arch/s390/include/asm/idle.h | 12 ++++++------ arch/s390/include/asm/timex.h | 36 ++++++++++++++++++------------------ arch/s390/kernel/idle.c | 12 ++++++------ arch/s390/kernel/time.c | 28 ++++++++++++++-------------- arch/s390/kvm/interrupt.c | 2 +- drivers/s390/cio/device_fsm.c | 2 +- 6 files changed, 46 insertions(+), 46 deletions(-) (limited to 'arch') diff --git a/arch/s390/include/asm/idle.h b/arch/s390/include/asm/idle.h index b04f6a794cdf..5cea629c548e 100644 --- a/arch/s390/include/asm/idle.h +++ b/arch/s390/include/asm/idle.h @@ -14,12 +14,12 @@ struct s390_idle_data { seqcount_t seqcount; - unsigned long long idle_count; - unsigned long long idle_time; - unsigned long long clock_idle_enter; - unsigned long long clock_idle_exit; - unsigned long long timer_idle_enter; - unsigned long long timer_idle_exit; + unsigned long idle_count; + unsigned long idle_time; + unsigned long clock_idle_enter; + unsigned long clock_idle_exit; + unsigned long timer_idle_enter; + unsigned long timer_idle_exit; unsigned long mt_cycles_enter[8]; }; diff --git a/arch/s390/include/asm/timex.h b/arch/s390/include/asm/timex.h index c4e23e925665..f6326c6d2abe 100644 --- a/arch/s390/include/asm/timex.h +++ b/arch/s390/include/asm/timex.h @@ -98,10 +98,10 @@ extern unsigned char ptff_function_mask[16]; /* Query TOD offset result */ struct ptff_qto { - unsigned long long physical_clock; - unsigned long long tod_offset; - unsigned long long logical_tod_offset; - unsigned long long tod_epoch_difference; + unsigned long physical_clock; + unsigned long tod_offset; + unsigned long logical_tod_offset; + unsigned long tod_epoch_difference; } __packed; static inline int ptff_query(unsigned int nr) @@ -151,9 +151,9 @@ struct ptff_qui { rc; \ }) -static inline unsigned long long local_tick_disable(void) +static inline unsigned long local_tick_disable(void) { - unsigned long long old; + unsigned long old; old = S390_lowcore.clock_comparator; S390_lowcore.clock_comparator = clock_comparator_max; @@ -161,7 +161,7 @@ static inline unsigned long long local_tick_disable(void) return old; } -static inline void local_tick_enable(unsigned long long comp) +static inline void local_tick_enable(unsigned long comp) { S390_lowcore.clock_comparator = comp; set_clock_comparator(S390_lowcore.clock_comparator); @@ -169,9 +169,9 @@ static inline void local_tick_enable(unsigned long long comp) #define CLOCK_TICK_RATE 1193180 /* Underlying HZ */ -typedef unsigned long long cycles_t; +typedef unsigned long cycles_t; -static inline unsigned long long get_tod_clock(void) +static inline unsigned long get_tod_clock(void) { union tod_clock clk; @@ -179,10 +179,10 @@ static inline unsigned long long get_tod_clock(void) return clk.tod; } -static inline unsigned long long get_tod_clock_fast(void) +static inline unsigned long get_tod_clock_fast(void) { #ifdef CONFIG_HAVE_MARCH_Z9_109_FEATURES - unsigned long long clk; + unsigned long clk; asm volatile("stckf %0" : "=Q" (clk) : : "cc"); return clk; @@ -208,9 +208,9 @@ extern union tod_clock tod_clock_base; * Therefore preemption must be disabled, otherwise the returned * value is not guaranteed to be monotonic. */ -static inline unsigned long long get_tod_clock_monotonic(void) +static inline unsigned long get_tod_clock_monotonic(void) { - unsigned long long tod; + unsigned long tod; preempt_disable_notrace(); tod = get_tod_clock() - tod_clock_base.tod; @@ -237,7 +237,7 @@ static inline unsigned long long get_tod_clock_monotonic(void) * -> ns = (th * 125) + ((tl * 125) >> 9); * */ -static inline unsigned long long tod_to_ns(unsigned long long todval) +static inline unsigned long tod_to_ns(unsigned long todval) { return ((todval >> 9) * 125) + (((todval & 0x1ff) * 125) >> 9); } @@ -249,10 +249,10 @@ static inline unsigned long long tod_to_ns(unsigned long long todval) * * Returns: true if a is later than b */ -static inline int tod_after(unsigned long long a, unsigned long long b) +static inline int tod_after(unsigned long a, unsigned long b) { if (MACHINE_HAS_SCC) - return (long long) a > (long long) b; + return (long) a > (long) b; return a > b; } @@ -263,10 +263,10 @@ static inline int tod_after(unsigned long long a, unsigned long long b) * * Returns: true if a is later than b */ -static inline int tod_after_eq(unsigned long long a, unsigned long long b) +static inline int tod_after_eq(unsigned long a, unsigned long b) { if (MACHINE_HAS_SCC) - return (long long) a >= (long long) b; + return (long) a >= (long) b; return a >= b; } diff --git a/arch/s390/kernel/idle.c b/arch/s390/kernel/idle.c index 812073ea073e..4bf1ee293f2b 100644 --- a/arch/s390/kernel/idle.c +++ b/arch/s390/kernel/idle.c @@ -47,7 +47,7 @@ void account_idle_time_irq(void) void arch_cpu_idle(void) { struct s390_idle_data *idle = this_cpu_ptr(&s390_idle); - unsigned long long idle_time; + unsigned long idle_time; unsigned long psw_mask; /* Wait for external, I/O or machine check interrupt. */ @@ -73,7 +73,7 @@ static ssize_t show_idle_count(struct device *dev, struct device_attribute *attr, char *buf) { struct s390_idle_data *idle = &per_cpu(s390_idle, dev->id); - unsigned long long idle_count; + unsigned long idle_count; unsigned int seq; do { @@ -82,14 +82,14 @@ static ssize_t show_idle_count(struct device *dev, if (READ_ONCE(idle->clock_idle_enter)) idle_count++; } while (read_seqcount_retry(&idle->seqcount, seq)); - return sprintf(buf, "%llu\n", idle_count); + return sprintf(buf, "%lu\n", idle_count); } DEVICE_ATTR(idle_count, 0444, show_idle_count, NULL); static ssize_t show_idle_time(struct device *dev, struct device_attribute *attr, char *buf) { - unsigned long long now, idle_time, idle_enter, idle_exit, in_idle; + unsigned long now, idle_time, idle_enter, idle_exit, in_idle; struct s390_idle_data *idle = &per_cpu(s390_idle, dev->id); unsigned int seq; @@ -109,14 +109,14 @@ static ssize_t show_idle_time(struct device *dev, } } idle_time += in_idle; - return sprintf(buf, "%llu\n", idle_time >> 12); + return sprintf(buf, "%lu\n", idle_time >> 12); } DEVICE_ATTR(idle_time_us, 0444, show_idle_time, NULL); u64 arch_cpu_idle_time(int cpu) { struct s390_idle_data *idle = &per_cpu(s390_idle, cpu); - unsigned long long now, idle_enter, idle_exit, in_idle; + unsigned long now, idle_enter, idle_exit, in_idle; unsigned int seq; do { diff --git a/arch/s390/kernel/time.c b/arch/s390/kernel/time.c index 06bcfa636638..165da961f901 100644 --- a/arch/s390/kernel/time.c +++ b/arch/s390/kernel/time.c @@ -68,10 +68,10 @@ EXPORT_SYMBOL(s390_epoch_delta_notifier); unsigned char ptff_function_mask[16]; -static unsigned long long lpar_offset; -static unsigned long long initial_leap_seconds; -static unsigned long long tod_steering_end; -static long long tod_steering_delta; +static unsigned long lpar_offset; +static unsigned long initial_leap_seconds; +static unsigned long tod_steering_end; +static long tod_steering_delta; /* * Get time offsets with PTFF @@ -96,7 +96,7 @@ void __init time_early_init(void) /* get initial leap seconds */ if (ptff_query(PTFF_QUI) && ptff(&qui, sizeof(qui), PTFF_QUI) == 0) - initial_leap_seconds = (unsigned long long) + initial_leap_seconds = (unsigned long) ((long) qui.old_leap * 4096000000L); } @@ -222,7 +222,7 @@ void __init read_persistent_wall_and_boot_offset(struct timespec64 *wall_time, static u64 read_tod_clock(struct clocksource *cs) { - unsigned long long now, adj; + unsigned long now, adj; preempt_disable(); /* protect from changes to steering parameters */ now = get_tod_clock(); @@ -362,7 +362,7 @@ static inline int check_sync_clock(void) * Apply clock delta to the global data structures. * This is called once on the CPU that performed the clock sync. */ -static void clock_sync_global(unsigned long long delta) +static void clock_sync_global(unsigned long delta) { unsigned long now, adj; struct ptff_qto qto; @@ -378,7 +378,7 @@ static void clock_sync_global(unsigned long long delta) -(adj >> 15) : (adj >> 15); tod_steering_delta += delta; if ((abs(tod_steering_delta) >> 48) != 0) - panic("TOD clock sync offset %lli is too large to drift\n", + panic("TOD clock sync offset %li is too large to drift\n", tod_steering_delta); tod_steering_end = now + (abs(tod_steering_delta) << 15); vdso_data->arch_data.tod_steering_end = tod_steering_end; @@ -394,7 +394,7 @@ static void clock_sync_global(unsigned long long delta) * Apply clock delta to the per-CPU data structures of this CPU. * This is called for each online CPU after the call to clock_sync_global. */ -static void clock_sync_local(unsigned long long delta) +static void clock_sync_local(unsigned long delta) { /* Add the delta to the clock comparator. */ if (S390_lowcore.clock_comparator != clock_comparator_max) { @@ -418,7 +418,7 @@ static void __init time_init_wq(void) struct clock_sync_data { atomic_t cpus; int in_sync; - unsigned long long clock_delta; + unsigned long clock_delta; }; /* @@ -538,7 +538,7 @@ static int stpinfo_valid(void) static int stp_sync_clock(void *data) { struct clock_sync_data *sync = data; - unsigned long long clock_delta, flags; + u64 clock_delta, flags; static int first; int rc; @@ -720,8 +720,8 @@ static ssize_t ctn_id_show(struct device *dev, mutex_lock(&stp_mutex); if (stpinfo_valid()) - ret = sprintf(buf, "%016llx\n", - *(unsigned long long *) stp_info.ctnid); + ret = sprintf(buf, "%016lx\n", + *(unsigned long *) stp_info.ctnid); mutex_unlock(&stp_mutex); return ret; } @@ -794,7 +794,7 @@ static ssize_t leap_seconds_scheduled_show(struct device *dev, if (!stzi.lsoib.p) return sprintf(buf, "0,0\n"); - return sprintf(buf, "%llu,%d\n", + return sprintf(buf, "%lu,%d\n", tod_to_ns(stzi.lsoib.nlsout - TOD_UNIX_EPOCH) / NSEC_PER_SEC, stzi.lsoib.nlso - stzi.lsoib.also); } diff --git a/arch/s390/kvm/interrupt.c b/arch/s390/kvm/interrupt.c index e3183bd05910..d548d60caed2 100644 --- a/arch/s390/kvm/interrupt.c +++ b/arch/s390/kvm/interrupt.c @@ -1287,7 +1287,7 @@ static u64 __calculate_sltime(struct kvm_vcpu *vcpu) /* already expired? */ if (cputm >> 63) return 0; - return min(sltime, tod_to_ns(cputm)); + return min_t(u64, sltime, tod_to_ns(cputm)); } } else if (cpu_timer_interrupts_enabled(vcpu)) { sltime = kvm_s390_get_cpu_timer(vcpu); diff --git a/drivers/s390/cio/device_fsm.c b/drivers/s390/cio/device_fsm.c index 6420b197bb05..05e136cfb8be 100644 --- a/drivers/s390/cio/device_fsm.c +++ b/drivers/s390/cio/device_fsm.c @@ -47,7 +47,7 @@ static void ccw_timeout_log(struct ccw_device *cdev) orb = &private->orb; cc = stsch(sch->schid, &schib); - printk(KERN_WARNING "cio: ccw device timeout occurred at %llx, " + printk(KERN_WARNING "cio: ccw device timeout occurred at %lx, " "device information:\n", get_tod_clock()); printk(KERN_WARNING "cio: orb:\n"); print_hex_dump(KERN_WARNING, "cio: ", DUMP_PREFIX_NONE, 16, 1, -- cgit v1.2.3 From f9d8cbf33e9fceee671a49760cdcfa4be6a55102 Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Wed, 17 Feb 2021 12:47:41 +0100 Subject: s390/topology: remove always false if check The cpumask being checked in cpu_group_map() must have at least one cpu set; therefore remove the check. Reviewed-by: Alexander Gordeev Signed-off-by: Heiko Carstens --- arch/s390/kernel/topology.c | 2 -- 1 file changed, 2 deletions(-) (limited to 'arch') diff --git a/arch/s390/kernel/topology.c b/arch/s390/kernel/topology.c index e7ce447651b9..bfcc327acc6b 100644 --- a/arch/s390/kernel/topology.c +++ b/arch/s390/kernel/topology.c @@ -76,8 +76,6 @@ static void cpu_group_map(cpumask_t *dst, struct mask_info *info, unsigned int c } info = info->next; } - if (cpumask_empty(&mask)) - cpumask_copy(&mask, cpumask_of(cpu)); break; case TOPOLOGY_MODE_PACKAGE: cpumask_copy(&mask, cpu_present_mask); -- cgit v1.2.3 From c41b20de1a7c9a41ceab293a6f08927312ada679 Mon Sep 17 00:00:00 2001 From: Thomas Richter Date: Thu, 25 Feb 2021 14:28:52 +0100 Subject: s390/cpumf: remove 60 seconds read limit Remove the 60 seconds read interval limit. Do not impose any limit at all and allow read of complete counter sets. Signed-off-by: Thomas Richter Acked-by: Heiko Carstens Signed-off-by: Heiko Carstens --- arch/s390/kernel/perf_cpum_cf_diag.c | 18 ++---------------- 1 file changed, 2 insertions(+), 16 deletions(-) (limited to 'arch') diff --git a/arch/s390/kernel/perf_cpum_cf_diag.c b/arch/s390/kernel/perf_cpum_cf_diag.c index db4877bbb9aa..5eebc912df5a 100644 --- a/arch/s390/kernel/perf_cpum_cf_diag.c +++ b/arch/s390/kernel/perf_cpum_cf_diag.c @@ -29,9 +29,7 @@ #include #define CF_DIAG_CTRSET_DEF 0xfeef /* Counter set header mark */ -#define CF_DIAG_MIN_INTERVAL 60 /* Minimum counter set read */ /* interval in seconds */ -static unsigned long cf_diag_interval = CF_DIAG_MIN_INTERVAL; static unsigned int cf_diag_cpu_speed; static debug_info_t *cf_diag_dbg; @@ -729,7 +727,6 @@ static DEFINE_MUTEX(cf_diag_ctrset_mutex); static struct cf_diag_ctrset { unsigned long ctrset; /* Bit mask of counter set to read */ cpumask_t mask; /* CPU mask to read from */ - time64_t lastread; /* Epoch counter set last read */ } cf_diag_ctrset; static void cf_diag_ctrset_clear(void) @@ -866,27 +863,16 @@ static int cf_diag_all_read(unsigned long arg) { struct cf_diag_call_on_cpu_parm p; cpumask_var_t mask; - time64_t now; - int rc = 0; + int rc; debug_sprintf_event(cf_diag_dbg, 5, "%s\n", __func__); if (!alloc_cpumask_var(&mask, GFP_KERNEL)) return -ENOMEM; - now = ktime_get_seconds(); - if (cf_diag_ctrset.lastread + cf_diag_interval > now) { - debug_sprintf_event(cf_diag_dbg, 5, "%s now %lld " - " lastread %lld\n", __func__, now, - cf_diag_ctrset.lastread); - rc = -EAGAIN; - goto out; - } else { - cf_diag_ctrset.lastread = now; - } + p.sets = cf_diag_ctrset.ctrset; cpumask_and(mask, &cf_diag_ctrset.mask, cpu_online_mask); on_each_cpu_mask(mask, cf_diag_cpu_read, &p, 1); rc = cf_diag_all_copy(arg, mask); -out: free_cpumask_var(mask); debug_sprintf_event(cf_diag_dbg, 5, "%s rc %d\n", __func__, rc); return rc; -- cgit v1.2.3 From 46b635b6abcf2ee106d36f2e84e942f56030c8f5 Mon Sep 17 00:00:00 2001 From: Thomas Richter Date: Fri, 26 Feb 2021 15:00:56 +0100 Subject: s390/cpumf: rename header file to hwctrset.h Signed-off-by: Thomas Richter Suggested-by: Hendrick Brueckner Acked-by: Heiko Carstens Signed-off-by: Heiko Carstens --- arch/s390/include/uapi/asm/hwctrset.h | 51 ++++++++++++++++++++++++++ arch/s390/include/uapi/asm/perf_cpum_cf_diag.h | 51 -------------------------- arch/s390/kernel/perf_cpum_cf_diag.c | 2 +- 3 files changed, 52 insertions(+), 52 deletions(-) create mode 100644 arch/s390/include/uapi/asm/hwctrset.h delete mode 100644 arch/s390/include/uapi/asm/perf_cpum_cf_diag.h (limited to 'arch') diff --git a/arch/s390/include/uapi/asm/hwctrset.h b/arch/s390/include/uapi/asm/hwctrset.h new file mode 100644 index 000000000000..3d8284b95f87 --- /dev/null +++ b/arch/s390/include/uapi/asm/hwctrset.h @@ -0,0 +1,51 @@ +/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ +/* + * Copyright IBM Corp. 2021 + * Interface implementation for communication with the CPU Measurement + * counter facility device driver. + * + * Author(s): Thomas Richter + * + * Define for ioctl() commands to communicate with the CPU Measurement + * counter facility device driver. + */ + +#ifndef _PERF_CPUM_CF_DIAG_H +#define _PERF_CPUM_CF_DIAG_H + +#include +#include + +#define S390_HWCTR_DEVICE "hwctr" +#define S390_HWCTR_START_VERSION 1 + +struct s390_ctrset_start { /* Set CPUs to operate on */ + __u64 version; /* Version of interface */ + __u64 data_bytes; /* # of bytes required */ + __u64 cpumask_len; /* Length of CPU mask in bytes */ + __u64 *cpumask; /* Pointer to CPU mask */ + __u64 counter_sets; /* Bit mask of counter sets to get */ +}; + +struct s390_ctrset_setdata { /* Counter set data */ + __u32 set; /* Counter set number */ + __u32 no_cnts; /* # of counters stored in cv[] */ + __u64 cv[0]; /* Counter values (variable length) */ +}; + +struct s390_ctrset_cpudata { /* Counter set data per CPU */ + __u32 cpu_nr; /* CPU number */ + __u32 no_sets; /* # of counters sets in data[] */ + struct s390_ctrset_setdata data[0]; +}; + +struct s390_ctrset_read { /* Structure to get all ctr sets */ + __u64 no_cpus; /* Total # of CPUs data taken from */ + struct s390_ctrset_cpudata data[0]; +}; + +#define S390_HWCTR_MAGIC 'C' /* Random magic # for ioctls */ +#define S390_HWCTR_START _IOWR(S390_HWCTR_MAGIC, 1, struct s390_ctrset_start) +#define S390_HWCTR_STOP _IO(S390_HWCTR_MAGIC, 2) +#define S390_HWCTR_READ _IOWR(S390_HWCTR_MAGIC, 3, struct s390_ctrset_read) +#endif diff --git a/arch/s390/include/uapi/asm/perf_cpum_cf_diag.h b/arch/s390/include/uapi/asm/perf_cpum_cf_diag.h deleted file mode 100644 index 3d8284b95f87..000000000000 --- a/arch/s390/include/uapi/asm/perf_cpum_cf_diag.h +++ /dev/null @@ -1,51 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ -/* - * Copyright IBM Corp. 2021 - * Interface implementation for communication with the CPU Measurement - * counter facility device driver. - * - * Author(s): Thomas Richter - * - * Define for ioctl() commands to communicate with the CPU Measurement - * counter facility device driver. - */ - -#ifndef _PERF_CPUM_CF_DIAG_H -#define _PERF_CPUM_CF_DIAG_H - -#include -#include - -#define S390_HWCTR_DEVICE "hwctr" -#define S390_HWCTR_START_VERSION 1 - -struct s390_ctrset_start { /* Set CPUs to operate on */ - __u64 version; /* Version of interface */ - __u64 data_bytes; /* # of bytes required */ - __u64 cpumask_len; /* Length of CPU mask in bytes */ - __u64 *cpumask; /* Pointer to CPU mask */ - __u64 counter_sets; /* Bit mask of counter sets to get */ -}; - -struct s390_ctrset_setdata { /* Counter set data */ - __u32 set; /* Counter set number */ - __u32 no_cnts; /* # of counters stored in cv[] */ - __u64 cv[0]; /* Counter values (variable length) */ -}; - -struct s390_ctrset_cpudata { /* Counter set data per CPU */ - __u32 cpu_nr; /* CPU number */ - __u32 no_sets; /* # of counters sets in data[] */ - struct s390_ctrset_setdata data[0]; -}; - -struct s390_ctrset_read { /* Structure to get all ctr sets */ - __u64 no_cpus; /* Total # of CPUs data taken from */ - struct s390_ctrset_cpudata data[0]; -}; - -#define S390_HWCTR_MAGIC 'C' /* Random magic # for ioctls */ -#define S390_HWCTR_START _IOWR(S390_HWCTR_MAGIC, 1, struct s390_ctrset_start) -#define S390_HWCTR_STOP _IO(S390_HWCTR_MAGIC, 2) -#define S390_HWCTR_READ _IOWR(S390_HWCTR_MAGIC, 3, struct s390_ctrset_read) -#endif diff --git a/arch/s390/kernel/perf_cpum_cf_diag.c b/arch/s390/kernel/perf_cpum_cf_diag.c index 5eebc912df5a..bc302b86ce28 100644 --- a/arch/s390/kernel/perf_cpum_cf_diag.c +++ b/arch/s390/kernel/perf_cpum_cf_diag.c @@ -26,7 +26,7 @@ #include #include -#include +#include #define CF_DIAG_CTRSET_DEF 0xfeef /* Counter set header mark */ /* interval in seconds */ -- cgit v1.2.3 From 1c0a9c7997325ef7a8f71fca2e1e6091e757c94b Mon Sep 17 00:00:00 2001 From: Jiapeng Chong Date: Thu, 25 Feb 2021 14:15:36 +0800 Subject: s390/cpumf: remove unneeded semicolon Fix the following coccicheck warnings: ./arch/s390/kernel/perf_cpum_cf.c:272:2-3: Unneeded semicolon. Reported-by: Abaci Robot Signed-off-by: Jiapeng Chong Signed-off-by: Heiko Carstens Link: https://lore.kernel.org/r/1614233736-87331-1-git-send-email-jiapeng.chong@linux.alibaba.com Signed-off-by: Heiko Carstens --- arch/s390/kernel/perf_cpum_cf.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/s390/kernel/perf_cpum_cf.c b/arch/s390/kernel/perf_cpum_cf.c index 0eb1d1cc53a8..b3beef64d3d4 100644 --- a/arch/s390/kernel/perf_cpum_cf.c +++ b/arch/s390/kernel/perf_cpum_cf.c @@ -269,7 +269,7 @@ static int __hw_perf_event_init(struct perf_event *event, unsigned int type) case CPUMF_CTR_SET_MAX: /* The counter could not be associated to a counter set */ return -EINVAL; - }; + } /* Initialize for using the CPU-measurement counter facility */ if (!atomic_inc_not_zero(&num_events)) { -- cgit v1.2.3 From d50aa69d36be43fa8927fd8ef305c4af88b6b450 Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Mon, 1 Mar 2021 21:48:16 +0100 Subject: s390: update defconfigs Signed-off-by: Heiko Carstens --- arch/s390/configs/debug_defconfig | 16 +++------------- arch/s390/configs/defconfig | 11 ++--------- arch/s390/configs/zfcpdump_defconfig | 1 - 3 files changed, 5 insertions(+), 23 deletions(-) (limited to 'arch') diff --git a/arch/s390/configs/debug_defconfig b/arch/s390/configs/debug_defconfig index 02056b024091..dc0b69058ac4 100644 --- a/arch/s390/configs/debug_defconfig +++ b/arch/s390/configs/debug_defconfig @@ -275,9 +275,9 @@ CONFIG_IP_VS_DH=m CONFIG_IP_VS_SH=m CONFIG_IP_VS_SED=m CONFIG_IP_VS_NQ=m +CONFIG_IP_VS_TWOS=m CONFIG_IP_VS_FTP=m CONFIG_IP_VS_PE_SIP=m -CONFIG_NF_TABLES_IPV4=y CONFIG_NFT_FIB_IPV4=m CONFIG_NF_TABLES_ARP=y CONFIG_IP_NF_IPTABLES=m @@ -298,7 +298,6 @@ CONFIG_IP_NF_SECURITY=m CONFIG_IP_NF_ARPTABLES=m CONFIG_IP_NF_ARPFILTER=m CONFIG_IP_NF_ARP_MANGLE=m -CONFIG_NF_TABLES_IPV6=y CONFIG_NFT_FIB_IPV6=m CONFIG_IP6_NF_IPTABLES=m CONFIG_IP6_NF_MATCH_AH=m @@ -481,7 +480,6 @@ CONFIG_NLMON=m # CONFIG_NET_VENDOR_AQUANTIA is not set # CONFIG_NET_VENDOR_ARC is not set # CONFIG_NET_VENDOR_ATHEROS is not set -# CONFIG_NET_VENDOR_AURORA is not set # CONFIG_NET_VENDOR_BROADCOM is not set # CONFIG_NET_VENDOR_BROCADE is not set # CONFIG_NET_VENDOR_CADENCE is not set @@ -581,7 +579,6 @@ CONFIG_VIRTIO_BALLOON=m CONFIG_VIRTIO_INPUT=y CONFIG_VHOST_NET=m CONFIG_VHOST_VSOCK=m -# CONFIG_SURFACE_PLATFORMS is not set CONFIG_S390_CCW_IOMMU=y CONFIG_S390_AP_IOMMU=y CONFIG_EXT4_FS=y @@ -635,6 +632,7 @@ CONFIG_NTFS_RW=y CONFIG_PROC_KCORE=y CONFIG_TMPFS=y CONFIG_TMPFS_POSIX_ACL=y +CONFIG_TMPFS_INODE64=y CONFIG_HUGETLBFS=y CONFIG_CONFIGFS_FS=m CONFIG_ECRYPT_FS=m @@ -714,12 +712,8 @@ CONFIG_CRYPTO_VMAC=m CONFIG_CRYPTO_CRC32=m CONFIG_CRYPTO_BLAKE2S=m CONFIG_CRYPTO_MICHAEL_MIC=m -CONFIG_CRYPTO_RMD128=m CONFIG_CRYPTO_RMD160=m -CONFIG_CRYPTO_RMD256=m -CONFIG_CRYPTO_RMD320=m CONFIG_CRYPTO_SHA3=m -CONFIG_CRYPTO_TGR192=m CONFIG_CRYPTO_WP512=m CONFIG_CRYPTO_AES_TI=m CONFIG_CRYPTO_ANUBIS=m @@ -731,7 +725,6 @@ CONFIG_CRYPTO_CAST6=m CONFIG_CRYPTO_DES=m CONFIG_CRYPTO_FCRYPT=m CONFIG_CRYPTO_KHAZAD=m -CONFIG_CRYPTO_SALSA20=m CONFIG_CRYPTO_SEED=m CONFIG_CRYPTO_SERPENT=m CONFIG_CRYPTO_SM4=m @@ -796,12 +789,9 @@ CONFIG_DEBUG_OBJECTS_RCU_HEAD=y CONFIG_DEBUG_OBJECTS_PERCPU_COUNTER=y CONFIG_SLUB_DEBUG_ON=y CONFIG_SLUB_STATS=y -CONFIG_DEBUG_KMEMLEAK=y -CONFIG_DEBUG_KMEMLEAK_DEFAULT_OFF=y CONFIG_DEBUG_STACK_USAGE=y CONFIG_DEBUG_VM=y CONFIG_DEBUG_VM_VMACACHE=y -CONFIG_DEBUG_VM_RB=y CONFIG_DEBUG_VM_PGFLAGS=y CONFIG_DEBUG_MEMORY_INIT=y CONFIG_MEMORY_NOTIFIER_ERROR_INJECT=m @@ -838,6 +828,7 @@ CONFIG_BPF_KPROBE_OVERRIDE=y CONFIG_HIST_TRIGGERS=y CONFIG_FTRACE_STARTUP_TEST=y # CONFIG_EVENT_TRACE_STARTUP_TEST is not set +CONFIG_DEBUG_ENTRY=y CONFIG_NOTIFIER_ERROR_INJECTION=m CONFIG_NETDEV_NOTIFIER_ERROR_INJECT=m CONFIG_FAULT_INJECTION=y @@ -861,4 +852,3 @@ CONFIG_PERCPU_TEST=m CONFIG_ATOMIC64_SELFTEST=y CONFIG_TEST_BITOPS=m CONFIG_TEST_BPF=m -CONFIG_DEBUG_ENTRY=y diff --git a/arch/s390/configs/defconfig b/arch/s390/configs/defconfig index bac721a501da..320379da96d9 100644 --- a/arch/s390/configs/defconfig +++ b/arch/s390/configs/defconfig @@ -266,9 +266,9 @@ CONFIG_IP_VS_DH=m CONFIG_IP_VS_SH=m CONFIG_IP_VS_SED=m CONFIG_IP_VS_NQ=m +CONFIG_IP_VS_TWOS=m CONFIG_IP_VS_FTP=m CONFIG_IP_VS_PE_SIP=m -CONFIG_NF_TABLES_IPV4=y CONFIG_NFT_FIB_IPV4=m CONFIG_NF_TABLES_ARP=y CONFIG_IP_NF_IPTABLES=m @@ -289,7 +289,6 @@ CONFIG_IP_NF_SECURITY=m CONFIG_IP_NF_ARPTABLES=m CONFIG_IP_NF_ARPFILTER=m CONFIG_IP_NF_ARP_MANGLE=m -CONFIG_NF_TABLES_IPV6=y CONFIG_NFT_FIB_IPV6=m CONFIG_IP6_NF_IPTABLES=m CONFIG_IP6_NF_MATCH_AH=m @@ -473,7 +472,6 @@ CONFIG_NLMON=m # CONFIG_NET_VENDOR_AQUANTIA is not set # CONFIG_NET_VENDOR_ARC is not set # CONFIG_NET_VENDOR_ATHEROS is not set -# CONFIG_NET_VENDOR_AURORA is not set # CONFIG_NET_VENDOR_BROADCOM is not set # CONFIG_NET_VENDOR_BROCADE is not set # CONFIG_NET_VENDOR_CADENCE is not set @@ -573,7 +571,6 @@ CONFIG_VIRTIO_BALLOON=m CONFIG_VIRTIO_INPUT=y CONFIG_VHOST_NET=m CONFIG_VHOST_VSOCK=m -# CONFIG_SURFACE_PLATFORMS is not set CONFIG_S390_CCW_IOMMU=y CONFIG_S390_AP_IOMMU=y CONFIG_EXT4_FS=y @@ -623,6 +620,7 @@ CONFIG_NTFS_RW=y CONFIG_PROC_KCORE=y CONFIG_TMPFS=y CONFIG_TMPFS_POSIX_ACL=y +CONFIG_TMPFS_INODE64=y CONFIG_HUGETLBFS=y CONFIG_CONFIGFS_FS=m CONFIG_ECRYPT_FS=m @@ -703,12 +701,8 @@ CONFIG_CRYPTO_VMAC=m CONFIG_CRYPTO_CRC32=m CONFIG_CRYPTO_BLAKE2S=m CONFIG_CRYPTO_MICHAEL_MIC=m -CONFIG_CRYPTO_RMD128=m CONFIG_CRYPTO_RMD160=m -CONFIG_CRYPTO_RMD256=m -CONFIG_CRYPTO_RMD320=m CONFIG_CRYPTO_SHA3=m -CONFIG_CRYPTO_TGR192=m CONFIG_CRYPTO_WP512=m CONFIG_CRYPTO_AES_TI=m CONFIG_CRYPTO_ANUBIS=m @@ -720,7 +714,6 @@ CONFIG_CRYPTO_CAST6=m CONFIG_CRYPTO_DES=m CONFIG_CRYPTO_FCRYPT=m CONFIG_CRYPTO_KHAZAD=m -CONFIG_CRYPTO_SALSA20=m CONFIG_CRYPTO_SEED=m CONFIG_CRYPTO_SERPENT=m CONFIG_CRYPTO_SM4=m diff --git a/arch/s390/configs/zfcpdump_defconfig b/arch/s390/configs/zfcpdump_defconfig index acf982a2ae4c..039fd499b4a9 100644 --- a/arch/s390/configs/zfcpdump_defconfig +++ b/arch/s390/configs/zfcpdump_defconfig @@ -61,7 +61,6 @@ CONFIG_RAW_DRIVER=y # CONFIG_HID is not set # CONFIG_VIRTIO_MENU is not set # CONFIG_VHOST_MENU is not set -# CONFIG_SURFACE_PLATFORMS is not set # CONFIG_IOMMU_SUPPORT is not set # CONFIG_DNOTIFY is not set # CONFIG_INOTIFY_USER is not set -- cgit v1.2.3 From 78c7cccaab9d5f9ead44579d79dd7d13a05aec7e Mon Sep 17 00:00:00 2001 From: Alexander Egorenkov Date: Fri, 26 Feb 2021 09:40:47 +0100 Subject: s390: remove IBM_PARTITION and CONFIGFS_FS from zfcpdump defconfig Remove by zfcpdump unused CONFIG_IBM_PARTITION and CONFIG_CONFIGFS_FS. Signed-off-by: Alexander Egorenkov Reviewed-by: Steffen Maier Signed-off-by: Heiko Carstens --- arch/s390/configs/zfcpdump_defconfig | 2 -- 1 file changed, 2 deletions(-) (limited to 'arch') diff --git a/arch/s390/configs/zfcpdump_defconfig b/arch/s390/configs/zfcpdump_defconfig index 039fd499b4a9..76123a4b26ab 100644 --- a/arch/s390/configs/zfcpdump_defconfig +++ b/arch/s390/configs/zfcpdump_defconfig @@ -26,7 +26,6 @@ CONFIG_CRASH_DUMP=y # CONFIG_SECCOMP is not set # CONFIG_GCC_PLUGINS is not set CONFIG_PARTITION_ADVANCED=y -CONFIG_IBM_PARTITION=y # CONFIG_CORE_DUMP_DEFAULT_ELF_HEADERS is not set # CONFIG_COMPACTION is not set # CONFIG_MIGRATION is not set @@ -64,7 +63,6 @@ CONFIG_RAW_DRIVER=y # CONFIG_IOMMU_SUPPORT is not set # CONFIG_DNOTIFY is not set # CONFIG_INOTIFY_USER is not set -CONFIG_CONFIGFS_FS=y # CONFIG_MISC_FILESYSTEMS is not set # CONFIG_NETWORK_FILESYSTEMS is not set CONFIG_LSM="yama,loadpin,safesetid,integrity" -- cgit v1.2.3 From 7a05293af39fc716d0f51c0164cbb727302396a2 Mon Sep 17 00:00:00 2001 From: Paul Cercueil Date: Wed, 3 Mar 2021 19:33:05 +0000 Subject: MIPS: boot/compressed: Copy DTB to aligned address Since 5.12-rc1, the Device Tree blob must now be properly aligned. Therefore, the decompress routine must be careful to copy the blob at the next aligned address after the kernel image. This commit fixes the kernel sometimes not booting with a Device Tree blob appended to it. Fixes: 79edff12060f ("scripts/dtc: Update to upstream version v1.6.0-51-g183df9e9c2b9") Signed-off-by: Paul Cercueil Acked-by: Rob Herring Signed-off-by: Thomas Bogendoerfer --- arch/mips/boot/compressed/decompress.c | 8 ++++++++ arch/mips/kernel/vmlinux.lds.S | 2 ++ 2 files changed, 10 insertions(+) (limited to 'arch') diff --git a/arch/mips/boot/compressed/decompress.c b/arch/mips/boot/compressed/decompress.c index e3946b06e840..3d70d15ada28 100644 --- a/arch/mips/boot/compressed/decompress.c +++ b/arch/mips/boot/compressed/decompress.c @@ -14,6 +14,7 @@ #include #include +#include /* * These two variables specify the free mem region @@ -120,6 +121,13 @@ void decompress_kernel(unsigned long boot_heap_start) /* last four bytes is always image size in little endian */ image_size = get_unaligned_le32((void *)&__image_end - 4); + /* The device tree's address must be properly aligned */ + image_size = ALIGN(image_size, STRUCT_ALIGNMENT); + + puts("Copy device tree to address "); + puthex(VMLINUX_LOAD_ADDRESS_ULL + image_size); + puts("\n"); + /* copy dtb to where the booted kernel will expect it */ memcpy((void *)VMLINUX_LOAD_ADDRESS_ULL + image_size, __appended_dtb, dtb_size); diff --git a/arch/mips/kernel/vmlinux.lds.S b/arch/mips/kernel/vmlinux.lds.S index c1c345be04ff..4b4e39b7c79b 100644 --- a/arch/mips/kernel/vmlinux.lds.S +++ b/arch/mips/kernel/vmlinux.lds.S @@ -145,6 +145,7 @@ SECTIONS } #ifdef CONFIG_MIPS_ELF_APPENDED_DTB + STRUCT_ALIGN(); .appended_dtb : AT(ADDR(.appended_dtb) - LOAD_OFFSET) { *(.appended_dtb) KEEP(*(.appended_dtb)) @@ -172,6 +173,7 @@ SECTIONS #endif #ifdef CONFIG_MIPS_RAW_APPENDED_DTB + STRUCT_ALIGN(); __appended_dtb = .; /* leave space for appended DTB */ . += 0x100000; -- cgit v1.2.3 From 6c810cf20feef0d4338e9b424ab7f2644a8b353e Mon Sep 17 00:00:00 2001 From: Maciej W. Rozycki Date: Wed, 3 Mar 2021 02:16:04 +0100 Subject: crypto: mips/poly1305 - enable for all MIPS processors The MIPS Poly1305 implementation is generic MIPS code written such as to support down to the original MIPS I and MIPS III ISA for the 32-bit and 64-bit variant respectively. Lift the current limitation then to enable code for MIPSr1 ISA or newer processors only and have it available for all MIPS processors. Signed-off-by: Maciej W. Rozycki Fixes: a11d055e7a64 ("crypto: mips/poly1305 - incorporate OpenSSL/CRYPTOGAMS optimized implementation") Cc: stable@vger.kernel.org # v5.5+ Acked-by: Jason A. Donenfeld Signed-off-by: Thomas Bogendoerfer --- arch/mips/crypto/Makefile | 4 ++-- crypto/Kconfig | 2 +- drivers/net/Kconfig | 2 +- 3 files changed, 4 insertions(+), 4 deletions(-) (limited to 'arch') diff --git a/arch/mips/crypto/Makefile b/arch/mips/crypto/Makefile index 8e1deaf00e0c..5e4105cccf9f 100644 --- a/arch/mips/crypto/Makefile +++ b/arch/mips/crypto/Makefile @@ -12,8 +12,8 @@ AFLAGS_chacha-core.o += -O2 # needed to fill branch delay slots obj-$(CONFIG_CRYPTO_POLY1305_MIPS) += poly1305-mips.o poly1305-mips-y := poly1305-core.o poly1305-glue.o -perlasm-flavour-$(CONFIG_CPU_MIPS32) := o32 -perlasm-flavour-$(CONFIG_CPU_MIPS64) := 64 +perlasm-flavour-$(CONFIG_32BIT) := o32 +perlasm-flavour-$(CONFIG_64BIT) := 64 quiet_cmd_perlasm = PERLASM $@ cmd_perlasm = $(PERL) $(<) $(perlasm-flavour-y) $(@) diff --git a/crypto/Kconfig b/crypto/Kconfig index 15c9c28d9f53..5809cc198fa7 100644 --- a/crypto/Kconfig +++ b/crypto/Kconfig @@ -767,7 +767,7 @@ config CRYPTO_POLY1305_X86_64 config CRYPTO_POLY1305_MIPS tristate "Poly1305 authenticator algorithm (MIPS optimized)" - depends on CPU_MIPS32 || (CPU_MIPS64 && 64BIT) + depends on MIPS select CRYPTO_ARCH_HAVE_LIB_POLY1305 config CRYPTO_MD4 diff --git a/drivers/net/Kconfig b/drivers/net/Kconfig index b09bed554f26..bcd31f458d1a 100644 --- a/drivers/net/Kconfig +++ b/drivers/net/Kconfig @@ -94,7 +94,7 @@ config WIREGUARD select CRYPTO_BLAKE2S_ARM if ARM select CRYPTO_CURVE25519_NEON if ARM && KERNEL_MODE_NEON select CRYPTO_CHACHA_MIPS if CPU_MIPS32_R2 - select CRYPTO_POLY1305_MIPS if CPU_MIPS32 || (CPU_MIPS64 && 64BIT) + select CRYPTO_POLY1305_MIPS if MIPS help WireGuard is a secure, fast, and easy to use replacement for IPSec that uses modern cryptography and clever networking tricks. It's -- cgit v1.2.3 From df304c2d0dfd63c40561a8107a217e84fc3515e8 Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Wed, 3 Mar 2021 13:49:26 +0000 Subject: arm64: cpufeatures: Fix handling of CONFIG_CMDLINE for idreg overrides The built-in kernel commandline (CONFIG_CMDLINE) can be configured in three different ways: 1. CMDLINE_FORCE: Use CONFIG_CMDLINE instead of any bootloader args 2. CMDLINE_EXTEND: Append the bootloader args to CONFIG_CMDLINE 3. CMDLINE_FROM_BOOTLOADER: Only use CONFIG_CMDLINE if there aren't any bootloader args. The early cmdline parsing to detect idreg overrides gets (2) and (3) slightly wrong: in the case of (2) the bootloader args are parsed first and in the case of (3) the CMDLINE is always parsed. Fix these issues by moving the bootargs parsing out into a helper function and following the same logic as that used by the EFI stub. Reviewed-by: Marc Zyngier Fixes: 33200303553d ("arm64: cpufeature: Add an early command-line cpufeature override facility") Link: https://lore.kernel.org/r/20210303134927.18975-2-will@kernel.org Signed-off-by: Will Deacon --- arch/arm64/kernel/idreg-override.c | 44 ++++++++++++++++++++++---------------- 1 file changed, 25 insertions(+), 19 deletions(-) (limited to 'arch') diff --git a/arch/arm64/kernel/idreg-override.c b/arch/arm64/kernel/idreg-override.c index dffb16682330..cc071712c6f9 100644 --- a/arch/arm64/kernel/idreg-override.c +++ b/arch/arm64/kernel/idreg-override.c @@ -163,33 +163,39 @@ static __init void __parse_cmdline(const char *cmdline, bool parse_aliases) } while (1); } -static __init void parse_cmdline(void) +static __init const u8 *get_bootargs_cmdline(void) { - if (!IS_ENABLED(CONFIG_CMDLINE_FORCE)) { - const u8 *prop; - void *fdt; - int node; + const u8 *prop; + void *fdt; + int node; - fdt = get_early_fdt_ptr(); - if (!fdt) - goto out; + fdt = get_early_fdt_ptr(); + if (!fdt) + return NULL; - node = fdt_path_offset(fdt, "/chosen"); - if (node < 0) - goto out; + node = fdt_path_offset(fdt, "/chosen"); + if (node < 0) + return NULL; - prop = fdt_getprop(fdt, node, "bootargs", NULL); - if (!prop) - goto out; + prop = fdt_getprop(fdt, node, "bootargs", NULL); + if (!prop) + return NULL; - __parse_cmdline(prop, true); + return strlen(prop) ? prop : NULL; +} - if (!IS_ENABLED(CONFIG_CMDLINE_EXTEND)) - return; +static __init void parse_cmdline(void) +{ + const u8 *prop = get_bootargs_cmdline(); + + if (IS_ENABLED(CONFIG_CMDLINE_EXTEND) || + IS_ENABLED(CONFIG_CMDLINE_FORCE) || + !prop) { + __parse_cmdline(CONFIG_CMDLINE, true); } -out: - __parse_cmdline(CONFIG_CMDLINE, true); + if (!IS_ENABLED(CONFIG_CMDLINE_FORCE) && prop) + __parse_cmdline(prop, true); } /* Keep checkers quiet */ -- cgit v1.2.3 From cae118b6acc309539b33339e846cbb19187c164c Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Wed, 3 Mar 2021 13:49:27 +0000 Subject: arm64: Drop support for CMDLINE_EXTEND The documented behaviour for CMDLINE_EXTEND is that the arguments from the bootloader are appended to the built-in kernel command line. This also matches the option parsing behaviour for the EFI stub and early ID register overrides. Bizarrely, the fdt behaviour is the other way around: appending the built-in command line to the bootloader arguments, resulting in a command-line that doesn't necessarily line-up with the parsing order and definitely doesn't line-up with the documented behaviour. As it turns out, there is a proposal [1] to replace CMDLINE_EXTEND with CMDLINE_PREPEND and CMDLINE_APPEND options which should hopefully make the intended behaviour much clearer. While we wait for those to land, drop CMDLINE_EXTEND for now as there appears to be little enthusiasm for changing the current FDT behaviour. [1] https://lore.kernel.org/lkml/20190319232448.45964-2-danielwa@cisco.com/ Cc: Max Uvarov Cc: Rob Herring Cc: Ard Biesheuvel Cc: Marc Zyngier Cc: Doug Anderson Cc: Tyler Hicks Cc: Frank Rowand Cc: Catalin Marinas Link: https://lore.kernel.org/r/CAL_JsqJX=TCCs7=gg486r9TN4NYscMTCLNfqJF9crskKPq-bTg@mail.gmail.com Link: https://lore.kernel.org/r/20210303134927.18975-3-will@kernel.org Signed-off-by: Will Deacon --- arch/arm64/Kconfig | 6 ------ arch/arm64/kernel/idreg-override.c | 5 +---- 2 files changed, 1 insertion(+), 10 deletions(-) (limited to 'arch') diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig index 1f212b47a48a..f15418332d16 100644 --- a/arch/arm64/Kconfig +++ b/arch/arm64/Kconfig @@ -1855,12 +1855,6 @@ config CMDLINE_FROM_BOOTLOADER the boot loader doesn't provide any, the default kernel command string provided in CMDLINE will be used. -config CMDLINE_EXTEND - bool "Extend bootloader kernel arguments" - help - The command-line arguments provided by the boot loader will be - appended to the default kernel command string. - config CMDLINE_FORCE bool "Always use the default kernel command string" help diff --git a/arch/arm64/kernel/idreg-override.c b/arch/arm64/kernel/idreg-override.c index cc071712c6f9..83f1c4b92095 100644 --- a/arch/arm64/kernel/idreg-override.c +++ b/arch/arm64/kernel/idreg-override.c @@ -188,11 +188,8 @@ static __init void parse_cmdline(void) { const u8 *prop = get_bootargs_cmdline(); - if (IS_ENABLED(CONFIG_CMDLINE_EXTEND) || - IS_ENABLED(CONFIG_CMDLINE_FORCE) || - !prop) { + if (IS_ENABLED(CONFIG_CMDLINE_FORCE) || !prop) __parse_cmdline(CONFIG_CMDLINE, true); - } if (!IS_ENABLED(CONFIG_CMDLINE_FORCE) && prop) __parse_cmdline(prop, true); -- cgit v1.2.3 From 07fb6dc327f108937881a096ec6e367a07a7395d Mon Sep 17 00:00:00 2001 From: Anshuman Khandual Date: Mon, 1 Mar 2021 10:36:32 +0530 Subject: arm64/mm: Drop redundant ARCH_WANT_HUGE_PMD_SHARE There is already an ARCH_WANT_HUGE_PMD_SHARE which is being selected for applicable configurations. Hence just drop the other redundant entry. Cc: Catalin Marinas Cc: Will Deacon Cc: linux-arm-kernel@lists.infradead.org Cc: linux-kernel@vger.kernel.org Signed-off-by: Anshuman Khandual Link: https://lore.kernel.org/r/1614575192-21307-1-git-send-email-anshuman.khandual@arm.com Signed-off-by: Will Deacon --- arch/arm64/Kconfig | 2 -- 1 file changed, 2 deletions(-) (limited to 'arch') diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig index f15418332d16..6f36732dc11a 100644 --- a/arch/arm64/Kconfig +++ b/arch/arm64/Kconfig @@ -1055,8 +1055,6 @@ config HW_PERF_EVENTS config SYS_SUPPORTS_HUGETLBFS def_bool y -config ARCH_WANT_HUGE_PMD_SHARE - config ARCH_HAS_CACHE_LINE_SIZE def_bool y -- cgit v1.2.3 From 79cc2ed5a716544621b11a3f90550e5c7d314306 Mon Sep 17 00:00:00 2001 From: Anshuman Khandual Date: Mon, 1 Mar 2021 16:55:14 +0530 Subject: arm64/mm: Drop THP conditionality from FORCE_MAX_ZONEORDER Currently without THP being enabled, MAX_ORDER via FORCE_MAX_ZONEORDER gets reduced to 11, which falls below HUGETLB_PAGE_ORDER for certain 16K and 64K page size configurations. This is problematic which throws up the following warning during boot as pageblock_order via HUGETLB_PAGE_ORDER order exceeds MAX_ORDER. WARNING: CPU: 7 PID: 127 at mm/vmstat.c:1092 __fragmentation_index+0x58/0x70 Modules linked in: CPU: 7 PID: 127 Comm: kswapd0 Not tainted 5.12.0-rc1-00005-g0221e3101a1 #237 Hardware name: linux,dummy-virt (DT) pstate: 20400005 (nzCv daif +PAN -UAO -TCO BTYPE=--) pc : __fragmentation_index+0x58/0x70 lr : fragmentation_index+0x88/0xa8 sp : ffff800016ccfc00 x29: ffff800016ccfc00 x28: 0000000000000000 x27: ffff800011fd4000 x26: 0000000000000002 x25: ffff800016ccfda0 x24: 0000000000000002 x23: 0000000000000640 x22: ffff0005ffcb5b18 x21: 0000000000000002 x20: 000000000000000d x19: ffff0005ffcb3980 x18: 0000000000000004 x17: 0000000000000001 x16: 0000000000000019 x15: ffff800011ca7fb8 x14: 00000000000002b3 x13: 0000000000000000 x12: 00000000000005e0 x11: 0000000000000003 x10: 0000000000000080 x9 : ffff800011c93948 x8 : 0000000000000000 x7 : 0000000000000000 x6 : 0000000000007000 x5 : 0000000000007944 x4 : 0000000000000032 x3 : 000000000000001c x2 : 000000000000000b x1 : ffff800016ccfc10 x0 : 000000000000000d Call trace: __fragmentation_index+0x58/0x70 compaction_suitable+0x58/0x78 wakeup_kcompactd+0x8c/0xd8 balance_pgdat+0x570/0x5d0 kswapd+0x1e0/0x388 kthread+0x154/0x158 ret_from_fork+0x10/0x30 This solves the problem via keeping FORCE_MAX_ZONEORDER unchanged with or without THP on 16K and 64K page size configurations, making sure that the HUGETLB_PAGE_ORDER (and pageblock_order) would never exceed MAX_ORDER. Cc: Catalin Marinas Cc: Will Deacon Cc: linux-arm-kernel@lists.infradead.org Cc: linux-kernel@vger.kernel.org Signed-off-by: Anshuman Khandual Acked-by: Catalin Marinas Link: https://lore.kernel.org/r/1614597914-28565-1-git-send-email-anshuman.khandual@arm.com Signed-off-by: Will Deacon --- arch/arm64/Kconfig | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'arch') diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig index 6f36732dc11a..5656e7aacd69 100644 --- a/arch/arm64/Kconfig +++ b/arch/arm64/Kconfig @@ -1155,8 +1155,8 @@ config XEN config FORCE_MAX_ZONEORDER int - default "14" if (ARM64_64K_PAGES && TRANSPARENT_HUGEPAGE) - default "12" if (ARM64_16K_PAGES && TRANSPARENT_HUGEPAGE) + default "14" if ARM64_64K_PAGES + default "12" if ARM64_16K_PAGES default "11" help The kernel memory allocator divides physically contiguous memory -- cgit v1.2.3 From 78a81d88f60ba773cbe890205e1ee67f00502948 Mon Sep 17 00:00:00 2001 From: Joerg Roedel Date: Wed, 3 Mar 2021 15:17:12 +0100 Subject: x86/sev-es: Introduce ip_within_syscall_gap() helper Introduce a helper to check whether an exception came from the syscall gap and use it in the SEV-ES code. Extend the check to also cover the compatibility SYSCALL entry path. Fixes: 315562c9af3d5 ("x86/sev-es: Adjust #VC IST Stack on entering NMI handler") Signed-off-by: Joerg Roedel Signed-off-by: Borislav Petkov Cc: stable@vger.kernel.org # 5.10+ Link: https://lkml.kernel.org/r/20210303141716.29223-2-joro@8bytes.org --- arch/x86/entry/entry_64_compat.S | 2 ++ arch/x86/include/asm/proto.h | 1 + arch/x86/include/asm/ptrace.h | 15 +++++++++++++++ arch/x86/kernel/traps.c | 3 +-- 4 files changed, 19 insertions(+), 2 deletions(-) (limited to 'arch') diff --git a/arch/x86/entry/entry_64_compat.S b/arch/x86/entry/entry_64_compat.S index 541fdaf64045..0051cf5c792d 100644 --- a/arch/x86/entry/entry_64_compat.S +++ b/arch/x86/entry/entry_64_compat.S @@ -210,6 +210,8 @@ SYM_CODE_START(entry_SYSCALL_compat) /* Switch to the kernel stack */ movq PER_CPU_VAR(cpu_current_top_of_stack), %rsp +SYM_INNER_LABEL(entry_SYSCALL_compat_safe_stack, SYM_L_GLOBAL) + /* Construct struct pt_regs on stack */ pushq $__USER32_DS /* pt_regs->ss */ pushq %r8 /* pt_regs->sp */ diff --git a/arch/x86/include/asm/proto.h b/arch/x86/include/asm/proto.h index 2c35f1c01a2d..b6a9d51d1d79 100644 --- a/arch/x86/include/asm/proto.h +++ b/arch/x86/include/asm/proto.h @@ -25,6 +25,7 @@ void __end_SYSENTER_singlestep_region(void); void entry_SYSENTER_compat(void); void __end_entry_SYSENTER_compat(void); void entry_SYSCALL_compat(void); +void entry_SYSCALL_compat_safe_stack(void); void entry_INT80_compat(void); #ifdef CONFIG_XEN_PV void xen_entry_INT80_compat(void); diff --git a/arch/x86/include/asm/ptrace.h b/arch/x86/include/asm/ptrace.h index d8324a236696..409f661481e1 100644 --- a/arch/x86/include/asm/ptrace.h +++ b/arch/x86/include/asm/ptrace.h @@ -94,6 +94,8 @@ struct pt_regs { #include #endif +#include + struct cpuinfo_x86; struct task_struct; @@ -175,6 +177,19 @@ static inline bool any_64bit_mode(struct pt_regs *regs) #ifdef CONFIG_X86_64 #define current_user_stack_pointer() current_pt_regs()->sp #define compat_user_stack_pointer() current_pt_regs()->sp + +static inline bool ip_within_syscall_gap(struct pt_regs *regs) +{ + bool ret = (regs->ip >= (unsigned long)entry_SYSCALL_64 && + regs->ip < (unsigned long)entry_SYSCALL_64_safe_stack); + +#ifdef CONFIG_IA32_EMULATION + ret = ret || (regs->ip >= (unsigned long)entry_SYSCALL_compat && + regs->ip < (unsigned long)entry_SYSCALL_compat_safe_stack); +#endif + + return ret; +} #endif static inline unsigned long kernel_stack_pointer(struct pt_regs *regs) diff --git a/arch/x86/kernel/traps.c b/arch/x86/kernel/traps.c index 7f5aec758f0e..ac1874a2a70e 100644 --- a/arch/x86/kernel/traps.c +++ b/arch/x86/kernel/traps.c @@ -694,8 +694,7 @@ asmlinkage __visible noinstr struct pt_regs *vc_switch_off_ist(struct pt_regs *r * In the SYSCALL entry path the RSP value comes from user-space - don't * trust it and switch to the current kernel stack */ - if (regs->ip >= (unsigned long)entry_SYSCALL_64 && - regs->ip < (unsigned long)entry_SYSCALL_64_safe_stack) { + if (ip_within_syscall_gap(regs)) { sp = this_cpu_read(cpu_current_top_of_stack); goto sync; } -- cgit v1.2.3 From 6654111c893fec1516d83046d2b237e83e0d5967 Mon Sep 17 00:00:00 2001 From: Bjørn Mork Date: Sun, 7 Mar 2021 19:23:01 +0100 Subject: MIPS: vmlinux.lds.S: align raw appended dtb to 8 bytes The devicetree specification requires 8-byte alignment in memory. This is now enforced by libfdt since commit 79edff12060f ("scripts/dtc: Update to upstream version v1.6.0-51-g183df9e9c2b9") which included the upstream commit 5e735860c478 ("libfdt: Check for 8-byte address alignment in fdt_ro_probe_()"). This broke the MIPS raw appended DTBs which would be appended to the image immediately following the initramfs section. This ends with a 32bit size, resulting in a 4-byte alignment of the DTB. Fix by padding with zeroes to 8-bytes when MIPS_RAW_APPENDED_DTB is defined. Fixes: 79edff12060f ("scripts/dtc: Update to upstream version v1.6.0-51-g183df9e9c2b9") Cc: Rob Herring Cc: Frank Rowand Signed-off-by: Bjørn Mork Signed-off-by: Thomas Bogendoerfer --- arch/mips/kernel/vmlinux.lds.S | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/mips/kernel/vmlinux.lds.S b/arch/mips/kernel/vmlinux.lds.S index 4b4e39b7c79b..1234834cc4c4 100644 --- a/arch/mips/kernel/vmlinux.lds.S +++ b/arch/mips/kernel/vmlinux.lds.S @@ -173,7 +173,11 @@ SECTIONS #endif #ifdef CONFIG_MIPS_RAW_APPENDED_DTB - STRUCT_ALIGN(); + .fill : { + FILL(0); + BYTE(0); + . = ALIGN(8); + } __appended_dtb = .; /* leave space for appended DTB */ . += 0x100000; -- cgit v1.2.3 From eeb0753ba27b26f609e61f9950b14f1b934fe429 Mon Sep 17 00:00:00 2001 From: Anshuman Khandual Date: Fri, 5 Mar 2021 10:54:57 +0530 Subject: arm64/mm: Fix pfn_valid() for ZONE_DEVICE based memory pfn_valid() validates a pfn but basically it checks for a valid struct page backing for that pfn. It should always return positive for memory ranges backed with struct page mapping. But currently pfn_valid() fails for all ZONE_DEVICE based memory types even though they have struct page mapping. pfn_valid() asserts that there is a memblock entry for a given pfn without MEMBLOCK_NOMAP flag being set. The problem with ZONE_DEVICE based memory is that they do not have memblock entries. Hence memblock_is_map_memory() will invariably fail via memblock_search() for a ZONE_DEVICE based address. This eventually fails pfn_valid() which is wrong. memblock_is_map_memory() needs to be skipped for such memory ranges. As ZONE_DEVICE memory gets hotplugged into the system via memremap_pages() called from a driver, their respective memory sections will not have SECTION_IS_EARLY set. Normal hotplug memory will never have MEMBLOCK_NOMAP set in their memblock regions. Because the flag MEMBLOCK_NOMAP was specifically designed and set for firmware reserved memory regions. memblock_is_map_memory() can just be skipped as its always going to be positive and that will be an optimization for the normal hotplug memory. Like ZONE_DEVICE based memory, all normal hotplugged memory too will not have SECTION_IS_EARLY set for their sections Skipping memblock_is_map_memory() for all non early memory sections would fix pfn_valid() problem for ZONE_DEVICE based memory and also improve its performance for normal hotplug memory as well. Cc: Catalin Marinas Cc: Will Deacon Cc: Ard Biesheuvel Cc: Robin Murphy Cc: linux-arm-kernel@lists.infradead.org Cc: linux-kernel@vger.kernel.org Acked-by: David Hildenbrand Fixes: 73b20c84d42d ("arm64: mm: implement pte_devmap support") Signed-off-by: Anshuman Khandual Acked-by: Catalin Marinas Link: https://lore.kernel.org/r/1614921898-4099-2-git-send-email-anshuman.khandual@arm.com Signed-off-by: Will Deacon --- arch/arm64/mm/init.c | 12 ++++++++++++ 1 file changed, 12 insertions(+) (limited to 'arch') diff --git a/arch/arm64/mm/init.c b/arch/arm64/mm/init.c index 0ace5e68efba..5920c527845a 100644 --- a/arch/arm64/mm/init.c +++ b/arch/arm64/mm/init.c @@ -230,6 +230,18 @@ int pfn_valid(unsigned long pfn) if (!valid_section(__pfn_to_section(pfn))) return 0; + + /* + * ZONE_DEVICE memory does not have the memblock entries. + * memblock_is_map_memory() check for ZONE_DEVICE based + * addresses will always fail. Even the normal hotplugged + * memory will never have MEMBLOCK_NOMAP flag set in their + * memblock entries. Skip memblock search for all non early + * memory sections covering all of hotplug memory including + * both normal and ZONE_DEVICE based. + */ + if (!early_section(__pfn_to_section(pfn))) + return pfn_section_valid(__pfn_to_section(pfn), pfn); #endif return memblock_is_map_memory(addr); } -- cgit v1.2.3 From 093bbe211ea566fa828536275e09ee9d75df1f25 Mon Sep 17 00:00:00 2001 From: Anshuman Khandual Date: Fri, 5 Mar 2021 10:54:58 +0530 Subject: arm64/mm: Reorganize pfn_valid() There are multiple instances of pfn_to_section_nr() and __pfn_to_section() when CONFIG_SPARSEMEM is enabled. This can be optimized if memory section is fetched earlier. This replaces the open coded PFN and ADDR conversion with PFN_PHYS() and PHYS_PFN() helpers. While there, also add a comment. This does not cause any functional change. Cc: Catalin Marinas Cc: Will Deacon Cc: Ard Biesheuvel Cc: linux-arm-kernel@lists.infradead.org Cc: linux-kernel@vger.kernel.org Reviewed-by: David Hildenbrand Signed-off-by: Anshuman Khandual Acked-by: Catalin Marinas Link: https://lore.kernel.org/r/1614921898-4099-3-git-send-email-anshuman.khandual@arm.com Signed-off-by: Will Deacon --- arch/arm64/mm/init.c | 21 ++++++++++++++++----- 1 file changed, 16 insertions(+), 5 deletions(-) (limited to 'arch') diff --git a/arch/arm64/mm/init.c b/arch/arm64/mm/init.c index 5920c527845a..3685e12aba9b 100644 --- a/arch/arm64/mm/init.c +++ b/arch/arm64/mm/init.c @@ -219,16 +219,26 @@ static void __init zone_sizes_init(unsigned long min, unsigned long max) int pfn_valid(unsigned long pfn) { - phys_addr_t addr = pfn << PAGE_SHIFT; + phys_addr_t addr = PFN_PHYS(pfn); - if ((addr >> PAGE_SHIFT) != pfn) + /* + * Ensure the upper PAGE_SHIFT bits are clear in the + * pfn. Else it might lead to false positives when + * some of the upper bits are set, but the lower bits + * match a valid pfn. + */ + if (PHYS_PFN(addr) != pfn) return 0; #ifdef CONFIG_SPARSEMEM +{ + struct mem_section *ms; + if (pfn_to_section_nr(pfn) >= NR_MEM_SECTIONS) return 0; - if (!valid_section(__pfn_to_section(pfn))) + ms = __pfn_to_section(pfn); + if (!valid_section(ms)) return 0; /* @@ -240,8 +250,9 @@ int pfn_valid(unsigned long pfn) * memory sections covering all of hotplug memory including * both normal and ZONE_DEVICE based. */ - if (!early_section(__pfn_to_section(pfn))) - return pfn_section_valid(__pfn_to_section(pfn), pfn); + if (!early_section(ms)) + return pfn_section_valid(ms, pfn); +} #endif return memblock_is_map_memory(addr); } -- cgit v1.2.3 From 3e895f4cbd158c31f1295d097a73ea4fe50f88f4 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Wed, 17 Feb 2021 18:10:35 +0000 Subject: ARM: ep93xx: Select GENERIC_IRQ_MULTI_HANDLER directly ep93xx currently relies of CONFIG_ARM_VIC to select GENERIC_IRQ_MULTI_HANDLER. Given that this is logically a platform architecture property, add the selection of GENERIC_IRQ_MULTI_HANDLER at the platform level. Further patches will remove the selection from the irqchip side. Reported-by: Marc Rutland Signed-off-by: Marc Zyngier Signed-off-by: Mark Rutland Tested-by: Hector Martin Cc: Catalin Marinas Cc: James Morse Cc: Thomas Gleixner Cc: Will Deacon Signed-off-by: Marc Zyngier --- arch/arm/Kconfig | 1 + 1 file changed, 1 insertion(+) (limited to 'arch') diff --git a/arch/arm/Kconfig b/arch/arm/Kconfig index 853aab5ab327..5da96f5df48f 100644 --- a/arch/arm/Kconfig +++ b/arch/arm/Kconfig @@ -348,6 +348,7 @@ config ARCH_EP93XX select ARM_AMBA imply ARM_PATCH_PHYS_VIRT select ARM_VIC + select GENERIC_IRQ_MULTI_HANDLER select AUTO_ZRELADDR select CLKDEV_LOOKUP select CLKSRC_MMIO -- cgit v1.2.3 From dbaee836d60a8e1b03e7d53a37893235662ba124 Mon Sep 17 00:00:00 2001 From: Sami Tolvanen Date: Fri, 5 Mar 2021 12:21:24 -0800 Subject: KVM: arm64: Don't use cbz/adr with external symbols allmodconfig + CONFIG_LTO_CLANG_THIN=y fails to build due to following linker errors: ld.lld: error: irqbypass.c:(function __guest_enter: .text+0x21CC): relocation R_AARCH64_CONDBR19 out of range: 2031220 is not in [-1048576, 1048575]; references hyp_panic >>> defined in vmlinux.o ld.lld: error: irqbypass.c:(function __guest_enter: .text+0x21E0): relocation R_AARCH64_ADR_PREL_LO21 out of range: 2031200 is not in [-1048576, 1048575]; references hyp_panic >>> defined in vmlinux.o This is because with LTO, the compiler ends up placing hyp_panic() more than 1MB away from __guest_enter(). Use an unconditional branch and adr_l instead to fix the issue. Link: https://github.com/ClangBuiltLinux/linux/issues/1317 Reported-by: Nathan Chancellor Suggested-by: Marc Zyngier Suggested-by: Ard Biesheuvel Signed-off-by: Sami Tolvanen Reviewed-by: Kees Cook Acked-by: Will Deacon Tested-by: Nathan Chancellor Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/20210305202124.3768527-1-samitolvanen@google.com --- arch/arm64/kvm/hyp/entry.S | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) (limited to 'arch') diff --git a/arch/arm64/kvm/hyp/entry.S b/arch/arm64/kvm/hyp/entry.S index 0c66a1d408fd..e831d3dfd50d 100644 --- a/arch/arm64/kvm/hyp/entry.S +++ b/arch/arm64/kvm/hyp/entry.S @@ -85,8 +85,10 @@ SYM_INNER_LABEL(__guest_exit_panic, SYM_L_GLOBAL) // If the hyp context is loaded, go straight to hyp_panic get_loaded_vcpu x0, x1 - cbz x0, hyp_panic + cbnz x0, 1f + b hyp_panic +1: // The hyp context is saved so make sure it is restored to allow // hyp_panic to run at hyp and, subsequently, panic to run in the host. // This makes use of __guest_exit to avoid duplication but sets the @@ -94,7 +96,7 @@ SYM_INNER_LABEL(__guest_exit_panic, SYM_L_GLOBAL) // current state is saved to the guest context but it will only be // accurate if the guest had been completely restored. adr_this_cpu x0, kvm_hyp_ctxt, x1 - adr x1, hyp_panic + adr_l x1, hyp_panic str x1, [x0, #CPU_XREG_OFFSET(30)] get_vcpu_ptr x1, x0 -- cgit v1.2.3 From bd67b711bfaa02cf19e88aa2d9edae5c1c1d2739 Mon Sep 17 00:00:00 2001 From: Thomas Bogendoerfer Date: Mon, 8 Mar 2021 10:24:47 +0100 Subject: MIPS: kernel: Reserve exception base early to prevent corruption BMIPS is one of the few platforms that do change the exception base. After commit 2dcb39645441 ("memblock: do not start bottom-up allocations with kernel_end") we started seeing BMIPS boards fail to boot with the built-in FDT being corrupted. Before the cited commit, early allocations would be in the [kernel_end, RAM_END] range, but after commit they would be within [RAM_START + PAGE_SIZE, RAM_END]. The custom exception base handler that is installed by bmips_ebase_setup() done for BMIPS5000 CPUs ends-up trampling on the memory region allocated by unflatten_and_copy_device_tree() thus corrupting the FDT used by the kernel. To fix this, we need to perform an early reservation of the custom exception space. Additional we reserve the first 4k (1k for R3k) for either normal exception vector space (legacy CPUs) or special vectors like cache exceptions. Huge thanks to Serge for analysing and proposing a solution to this issue. Fixes: 2dcb39645441 ("memblock: do not start bottom-up allocations with kernel_end") Reported-by: Kamal Dasu Debugged-by: Serge Semin Acked-by: Mike Rapoport Tested-by: Florian Fainelli Reviewed-by: Serge Semin Signed-off-by: Thomas Bogendoerfer --- arch/mips/include/asm/traps.h | 3 +++ arch/mips/kernel/cpu-probe.c | 6 ++++++ arch/mips/kernel/cpu-r3k-probe.c | 3 +++ arch/mips/kernel/traps.c | 10 +++++----- 4 files changed, 17 insertions(+), 5 deletions(-) (limited to 'arch') diff --git a/arch/mips/include/asm/traps.h b/arch/mips/include/asm/traps.h index 6aa8f126a43d..b710e76c9c65 100644 --- a/arch/mips/include/asm/traps.h +++ b/arch/mips/include/asm/traps.h @@ -24,8 +24,11 @@ extern void (*board_ebase_setup)(void); extern void (*board_cache_error_setup)(void); extern int register_nmi_notifier(struct notifier_block *nb); +extern void reserve_exception_space(phys_addr_t addr, unsigned long size); extern char except_vec_nmi[]; +#define VECTORSPACING 0x100 /* for EI/VI mode */ + #define nmi_notifier(fn, pri) \ ({ \ static struct notifier_block fn##_nb = { \ diff --git a/arch/mips/kernel/cpu-probe.c b/arch/mips/kernel/cpu-probe.c index 9a89637b4ecf..b71892064f27 100644 --- a/arch/mips/kernel/cpu-probe.c +++ b/arch/mips/kernel/cpu-probe.c @@ -26,6 +26,7 @@ #include #include #include +#include #include #include "fpu-probe.h" @@ -1628,6 +1629,7 @@ static inline void cpu_probe_broadcom(struct cpuinfo_mips *c, unsigned int cpu) c->cputype = CPU_BMIPS3300; __cpu_name[cpu] = "Broadcom BMIPS3300"; set_elf_platform(cpu, "bmips3300"); + reserve_exception_space(0x400, VECTORSPACING * 64); break; case PRID_IMP_BMIPS43XX: { int rev = c->processor_id & PRID_REV_MASK; @@ -1638,6 +1640,7 @@ static inline void cpu_probe_broadcom(struct cpuinfo_mips *c, unsigned int cpu) __cpu_name[cpu] = "Broadcom BMIPS4380"; set_elf_platform(cpu, "bmips4380"); c->options |= MIPS_CPU_RIXI; + reserve_exception_space(0x400, VECTORSPACING * 64); } else { c->cputype = CPU_BMIPS4350; __cpu_name[cpu] = "Broadcom BMIPS4350"; @@ -1654,6 +1657,7 @@ static inline void cpu_probe_broadcom(struct cpuinfo_mips *c, unsigned int cpu) __cpu_name[cpu] = "Broadcom BMIPS5000"; set_elf_platform(cpu, "bmips5000"); c->options |= MIPS_CPU_ULRI | MIPS_CPU_RIXI; + reserve_exception_space(0x1000, VECTORSPACING * 64); break; } } @@ -2133,6 +2137,8 @@ void cpu_probe(void) if (cpu == 0) __ua_limit = ~((1ull << cpu_vmbits) - 1); #endif + + reserve_exception_space(0, 0x1000); } void cpu_report(void) diff --git a/arch/mips/kernel/cpu-r3k-probe.c b/arch/mips/kernel/cpu-r3k-probe.c index abdbbe8c5a43..af654771918c 100644 --- a/arch/mips/kernel/cpu-r3k-probe.c +++ b/arch/mips/kernel/cpu-r3k-probe.c @@ -21,6 +21,7 @@ #include #include #include +#include #include "fpu-probe.h" @@ -158,6 +159,8 @@ void cpu_probe(void) cpu_set_fpu_opts(c); else cpu_set_nofpu_opts(c); + + reserve_exception_space(0, 0x400); } void cpu_report(void) diff --git a/arch/mips/kernel/traps.c b/arch/mips/kernel/traps.c index e0352958e2f7..808b8b61ded1 100644 --- a/arch/mips/kernel/traps.c +++ b/arch/mips/kernel/traps.c @@ -2009,13 +2009,16 @@ void __noreturn nmi_exception_handler(struct pt_regs *regs) nmi_exit(); } -#define VECTORSPACING 0x100 /* for EI/VI mode */ - unsigned long ebase; EXPORT_SYMBOL_GPL(ebase); unsigned long exception_handlers[32]; unsigned long vi_handlers[64]; +void reserve_exception_space(phys_addr_t addr, unsigned long size) +{ + memblock_reserve(addr, size); +} + void __init *set_except_vector(int n, void *addr) { unsigned long handler = (unsigned long) addr; @@ -2367,10 +2370,7 @@ void __init trap_init(void) if (!cpu_has_mips_r2_r6) { ebase = CAC_BASE; - ebase_pa = virt_to_phys((void *)ebase); vec_size = 0x400; - - memblock_reserve(ebase_pa, vec_size); } else { if (cpu_has_veic || cpu_has_vint) vec_size = 0x200 + VECTORSPACING*64; -- cgit v1.2.3 From cea15316ceee2d4a51dfdecd79e08a438135416c Mon Sep 17 00:00:00 2001 From: Naveen N. Rao Date: Thu, 4 Mar 2021 07:34:11 +0530 Subject: powerpc/64s: Fix instruction encoding for lis in ppc_function_entry() 'lis r2,N' is 'addis r2,0,N' and the instruction encoding in the macro LIS_R2 is incorrect (it currently maps to 'addis r0,r2,N'). Fix the same. Fixes: c71b7eff426f ("powerpc: Add ABIv2 support to ppc_function_entry") Cc: stable@vger.kernel.org # v3.16+ Reported-by: Jiri Olsa Signed-off-by: Naveen N. Rao Acked-by: Segher Boessenkool Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20210304020411.16796-1-naveen.n.rao@linux.vnet.ibm.com --- arch/powerpc/include/asm/code-patching.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/powerpc/include/asm/code-patching.h b/arch/powerpc/include/asm/code-patching.h index eacc9102c251..d5b3c3bb95b4 100644 --- a/arch/powerpc/include/asm/code-patching.h +++ b/arch/powerpc/include/asm/code-patching.h @@ -73,7 +73,7 @@ void __patch_exception(int exc, unsigned long addr); #endif #define OP_RT_RA_MASK 0xffff0000UL -#define LIS_R2 0x3c020000UL +#define LIS_R2 0x3c400000UL #define ADDIS_R2_R12 0x3c4c0000UL #define ADDI_R2_R2 0x38420000UL -- cgit v1.2.3 From 545ac14c16b5dbd909d5a90ddf5b5a629a40fa94 Mon Sep 17 00:00:00 2001 From: Joerg Roedel Date: Wed, 3 Mar 2021 15:17:13 +0100 Subject: x86/sev-es: Check regs->sp is trusted before adjusting #VC IST stack The code in the NMI handler to adjust the #VC handler IST stack is needed in case an NMI hits when the #VC handler is still using its IST stack. But the check for this condition also needs to look if the regs->sp value is trusted, meaning it was not set by user-space. Extend the check to not use regs->sp when the NMI interrupted user-space code or the SYSCALL gap. Fixes: 315562c9af3d5 ("x86/sev-es: Adjust #VC IST Stack on entering NMI handler") Reported-by: Andy Lutomirski Signed-off-by: Joerg Roedel Signed-off-by: Borislav Petkov Cc: stable@vger.kernel.org # 5.10+ Link: https://lkml.kernel.org/r/20210303141716.29223-3-joro@8bytes.org --- arch/x86/kernel/sev-es.c | 14 ++++++++++++-- 1 file changed, 12 insertions(+), 2 deletions(-) (limited to 'arch') diff --git a/arch/x86/kernel/sev-es.c b/arch/x86/kernel/sev-es.c index 84c1821819af..301f20f6d4dd 100644 --- a/arch/x86/kernel/sev-es.c +++ b/arch/x86/kernel/sev-es.c @@ -121,8 +121,18 @@ static void __init setup_vc_stacks(int cpu) cea_set_pte((void *)vaddr, pa, PAGE_KERNEL); } -static __always_inline bool on_vc_stack(unsigned long sp) +static __always_inline bool on_vc_stack(struct pt_regs *regs) { + unsigned long sp = regs->sp; + + /* User-mode RSP is not trusted */ + if (user_mode(regs)) + return false; + + /* SYSCALL gap still has user-mode RSP */ + if (ip_within_syscall_gap(regs)) + return false; + return ((sp >= __this_cpu_ist_bottom_va(VC)) && (sp < __this_cpu_ist_top_va(VC))); } @@ -144,7 +154,7 @@ void noinstr __sev_es_ist_enter(struct pt_regs *regs) old_ist = __this_cpu_read(cpu_tss_rw.x86_tss.ist[IST_INDEX_VC]); /* Make room on the IST stack */ - if (on_vc_stack(regs->sp)) + if (on_vc_stack(regs)) new_ist = ALIGN_DOWN(regs->sp, 8) - sizeof(old_ist); else new_ist = old_ist - sizeof(old_ist); -- cgit v1.2.3 From 62441a1fb53263bda349b6e5997c3cc5c120d89e Mon Sep 17 00:00:00 2001 From: Joerg Roedel Date: Wed, 3 Mar 2021 15:17:15 +0100 Subject: x86/sev-es: Correctly track IRQ states in runtime #VC handler Call irqentry_nmi_enter()/irqentry_nmi_exit() in the #VC handler to correctly track the IRQ state during its execution. Fixes: 0786138c78e79 ("x86/sev-es: Add a Runtime #VC Exception Handler") Reported-by: Andy Lutomirski Signed-off-by: Joerg Roedel Signed-off-by: Borislav Petkov Cc: stable@vger.kernel.org # v5.10+ Link: https://lkml.kernel.org/r/20210303141716.29223-5-joro@8bytes.org --- arch/x86/kernel/sev-es.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) (limited to 'arch') diff --git a/arch/x86/kernel/sev-es.c b/arch/x86/kernel/sev-es.c index 301f20f6d4dd..c3fd8fa79838 100644 --- a/arch/x86/kernel/sev-es.c +++ b/arch/x86/kernel/sev-es.c @@ -1258,13 +1258,12 @@ static __always_inline bool on_vc_fallback_stack(struct pt_regs *regs) DEFINE_IDTENTRY_VC_SAFE_STACK(exc_vmm_communication) { struct sev_es_runtime_data *data = this_cpu_read(runtime_data); + irqentry_state_t irq_state; struct ghcb_state state; struct es_em_ctxt ctxt; enum es_result result; struct ghcb *ghcb; - lockdep_assert_irqs_disabled(); - /* * Handle #DB before calling into !noinstr code to avoid recursive #DB. */ @@ -1273,6 +1272,8 @@ DEFINE_IDTENTRY_VC_SAFE_STACK(exc_vmm_communication) return; } + irq_state = irqentry_nmi_enter(regs); + lockdep_assert_irqs_disabled(); instrumentation_begin(); /* @@ -1335,6 +1336,7 @@ DEFINE_IDTENTRY_VC_SAFE_STACK(exc_vmm_communication) out: instrumentation_end(); + irqentry_nmi_exit(regs, irq_state); return; -- cgit v1.2.3 From bffe30dd9f1f3b2608a87ac909a224d6be472485 Mon Sep 17 00:00:00 2001 From: Joerg Roedel Date: Wed, 3 Mar 2021 15:17:16 +0100 Subject: x86/sev-es: Use __copy_from_user_inatomic() The #VC handler must run in atomic context and cannot sleep. This is a problem when it tries to fetch instruction bytes from user-space via copy_from_user(). Introduce a insn_fetch_from_user_inatomic() helper which uses __copy_from_user_inatomic() to safely copy the instruction bytes to kernel memory in the #VC handler. Fixes: 5e3427a7bc432 ("x86/sev-es: Handle instruction fetches from user-space") Signed-off-by: Joerg Roedel Signed-off-by: Borislav Petkov Cc: stable@vger.kernel.org # v5.10+ Link: https://lkml.kernel.org/r/20210303141716.29223-6-joro@8bytes.org --- arch/x86/include/asm/insn-eval.h | 2 ++ arch/x86/kernel/sev-es.c | 2 +- arch/x86/lib/insn-eval.c | 66 +++++++++++++++++++++++++++++++--------- 3 files changed, 55 insertions(+), 15 deletions(-) (limited to 'arch') diff --git a/arch/x86/include/asm/insn-eval.h b/arch/x86/include/asm/insn-eval.h index a0f839aa144d..98b4dae5e8bc 100644 --- a/arch/x86/include/asm/insn-eval.h +++ b/arch/x86/include/asm/insn-eval.h @@ -23,6 +23,8 @@ unsigned long insn_get_seg_base(struct pt_regs *regs, int seg_reg_idx); int insn_get_code_seg_params(struct pt_regs *regs); int insn_fetch_from_user(struct pt_regs *regs, unsigned char buf[MAX_INSN_SIZE]); +int insn_fetch_from_user_inatomic(struct pt_regs *regs, + unsigned char buf[MAX_INSN_SIZE]); bool insn_decode(struct insn *insn, struct pt_regs *regs, unsigned char buf[MAX_INSN_SIZE], int buf_size); diff --git a/arch/x86/kernel/sev-es.c b/arch/x86/kernel/sev-es.c index c3fd8fa79838..04a780abb512 100644 --- a/arch/x86/kernel/sev-es.c +++ b/arch/x86/kernel/sev-es.c @@ -258,7 +258,7 @@ static enum es_result vc_decode_insn(struct es_em_ctxt *ctxt) int res; if (user_mode(ctxt->regs)) { - res = insn_fetch_from_user(ctxt->regs, buffer); + res = insn_fetch_from_user_inatomic(ctxt->regs, buffer); if (!res) { ctxt->fi.vector = X86_TRAP_PF; ctxt->fi.error_code = X86_PF_INSTR | X86_PF_USER; diff --git a/arch/x86/lib/insn-eval.c b/arch/x86/lib/insn-eval.c index 4229950a5d78..bb0b3fe1e0a0 100644 --- a/arch/x86/lib/insn-eval.c +++ b/arch/x86/lib/insn-eval.c @@ -1415,6 +1415,25 @@ void __user *insn_get_addr_ref(struct insn *insn, struct pt_regs *regs) } } +static unsigned long insn_get_effective_ip(struct pt_regs *regs) +{ + unsigned long seg_base = 0; + + /* + * If not in user-space long mode, a custom code segment could be in + * use. This is true in protected mode (if the process defined a local + * descriptor table), or virtual-8086 mode. In most of the cases + * seg_base will be zero as in USER_CS. + */ + if (!user_64bit_mode(regs)) { + seg_base = insn_get_seg_base(regs, INAT_SEG_REG_CS); + if (seg_base == -1L) + return 0; + } + + return seg_base + regs->ip; +} + /** * insn_fetch_from_user() - Copy instruction bytes from user-space memory * @regs: Structure with register values as seen when entering kernel mode @@ -1431,24 +1450,43 @@ void __user *insn_get_addr_ref(struct insn *insn, struct pt_regs *regs) */ int insn_fetch_from_user(struct pt_regs *regs, unsigned char buf[MAX_INSN_SIZE]) { - unsigned long seg_base = 0; + unsigned long ip; int not_copied; - /* - * If not in user-space long mode, a custom code segment could be in - * use. This is true in protected mode (if the process defined a local - * descriptor table), or virtual-8086 mode. In most of the cases - * seg_base will be zero as in USER_CS. - */ - if (!user_64bit_mode(regs)) { - seg_base = insn_get_seg_base(regs, INAT_SEG_REG_CS); - if (seg_base == -1L) - return 0; - } + ip = insn_get_effective_ip(regs); + if (!ip) + return 0; + + not_copied = copy_from_user(buf, (void __user *)ip, MAX_INSN_SIZE); + return MAX_INSN_SIZE - not_copied; +} + +/** + * insn_fetch_from_user_inatomic() - Copy instruction bytes from user-space memory + * while in atomic code + * @regs: Structure with register values as seen when entering kernel mode + * @buf: Array to store the fetched instruction + * + * Gets the linear address of the instruction and copies the instruction bytes + * to the buf. This function must be used in atomic context. + * + * Returns: + * + * Number of instruction bytes copied. + * + * 0 if nothing was copied. + */ +int insn_fetch_from_user_inatomic(struct pt_regs *regs, unsigned char buf[MAX_INSN_SIZE]) +{ + unsigned long ip; + int not_copied; + + ip = insn_get_effective_ip(regs); + if (!ip) + return 0; - not_copied = copy_from_user(buf, (void __user *)(seg_base + regs->ip), - MAX_INSN_SIZE); + not_copied = __copy_from_user_inatomic(buf, (void __user *)ip, MAX_INSN_SIZE); return MAX_INSN_SIZE - not_copied; } -- cgit v1.2.3 From 86c83365ab76e4b43cedd3ce07a07d32a4dc79ba Mon Sep 17 00:00:00 2001 From: Andrey Konovalov Date: Mon, 8 Mar 2021 17:10:23 +0100 Subject: arm64: kasan: fix page_alloc tagging with DEBUG_VIRTUAL When CONFIG_DEBUG_VIRTUAL is enabled, the default page_to_virt() macro implementation from include/linux/mm.h is used. That definition doesn't account for KASAN tags, which leads to no tags on page_alloc allocations. Provide an arm64-specific definition for page_to_virt() when CONFIG_DEBUG_VIRTUAL is enabled that takes care of KASAN tags. Fixes: 2813b9c02962 ("kasan, mm, arm64: tag non slab memory allocated via pagealloc") Cc: Signed-off-by: Andrey Konovalov Reviewed-by: Catalin Marinas Link: https://lore.kernel.org/r/4b55b35202706223d3118230701c6a59749d9b72.1615219501.git.andreyknvl@google.com Signed-off-by: Will Deacon --- arch/arm64/include/asm/memory.h | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'arch') diff --git a/arch/arm64/include/asm/memory.h b/arch/arm64/include/asm/memory.h index c759faf7a1ff..0aabc3be9a75 100644 --- a/arch/arm64/include/asm/memory.h +++ b/arch/arm64/include/asm/memory.h @@ -328,6 +328,11 @@ static inline void *phys_to_virt(phys_addr_t x) #define ARCH_PFN_OFFSET ((unsigned long)PHYS_PFN_OFFSET) #if !defined(CONFIG_SPARSEMEM_VMEMMAP) || defined(CONFIG_DEBUG_VIRTUAL) +#define page_to_virt(x) ({ \ + __typeof__(x) __page = x; \ + void *__addr = __va(page_to_phys(__page)); \ + (void *)__tag_set((const void *)__addr, page_kasan_tag(__page));\ +}) #define virt_to_page(x) pfn_to_page(virt_to_pfn(x)) #else #define page_to_virt(x) ({ \ -- cgit v1.2.3 From 01dc9262ff5797b675c32c0c6bc682777d23de05 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Wed, 3 Mar 2021 16:45:05 +0000 Subject: KVM: arm64: Ensure I-cache isolation between vcpus of a same VM It recently became apparent that the ARMv8 architecture has interesting rules regarding attributes being used when fetching instructions if the MMU is off at Stage-1. In this situation, the CPU is allowed to fetch from the PoC and allocate into the I-cache (unless the memory is mapped with the XN attribute at Stage-2). If we transpose this to vcpus sharing a single physical CPU, it is possible for a vcpu running with its MMU off to influence another vcpu running with its MMU on, as the latter is expected to fetch from the PoU (and self-patching code doesn't flush below that level). In order to solve this, reuse the vcpu-private TLB invalidation code to apply the same policy to the I-cache, nuking it every time the vcpu runs on a physical CPU that ran another vcpu of the same VM in the past. This involve renaming __kvm_tlb_flush_local_vmid() to __kvm_flush_cpu_context(), and inserting a local i-cache invalidation there. Cc: stable@vger.kernel.org Signed-off-by: Marc Zyngier Acked-by: Will Deacon Acked-by: Catalin Marinas Link: https://lore.kernel.org/r/20210303164505.68492-1-maz@kernel.org --- arch/arm64/include/asm/kvm_asm.h | 4 ++-- arch/arm64/kvm/arm.c | 7 ++++++- arch/arm64/kvm/hyp/nvhe/hyp-main.c | 6 +++--- arch/arm64/kvm/hyp/nvhe/tlb.c | 3 ++- arch/arm64/kvm/hyp/vhe/tlb.c | 3 ++- 5 files changed, 15 insertions(+), 8 deletions(-) (limited to 'arch') diff --git a/arch/arm64/include/asm/kvm_asm.h b/arch/arm64/include/asm/kvm_asm.h index 9c0e396dd03f..a7ab84f781f7 100644 --- a/arch/arm64/include/asm/kvm_asm.h +++ b/arch/arm64/include/asm/kvm_asm.h @@ -47,7 +47,7 @@ #define __KVM_HOST_SMCCC_FUNC___kvm_flush_vm_context 2 #define __KVM_HOST_SMCCC_FUNC___kvm_tlb_flush_vmid_ipa 3 #define __KVM_HOST_SMCCC_FUNC___kvm_tlb_flush_vmid 4 -#define __KVM_HOST_SMCCC_FUNC___kvm_tlb_flush_local_vmid 5 +#define __KVM_HOST_SMCCC_FUNC___kvm_flush_cpu_context 5 #define __KVM_HOST_SMCCC_FUNC___kvm_timer_set_cntvoff 6 #define __KVM_HOST_SMCCC_FUNC___kvm_enable_ssbs 7 #define __KVM_HOST_SMCCC_FUNC___vgic_v3_get_gic_config 8 @@ -183,10 +183,10 @@ DECLARE_KVM_HYP_SYM(__bp_harden_hyp_vecs); #define __bp_harden_hyp_vecs CHOOSE_HYP_SYM(__bp_harden_hyp_vecs) extern void __kvm_flush_vm_context(void); +extern void __kvm_flush_cpu_context(struct kvm_s2_mmu *mmu); extern void __kvm_tlb_flush_vmid_ipa(struct kvm_s2_mmu *mmu, phys_addr_t ipa, int level); extern void __kvm_tlb_flush_vmid(struct kvm_s2_mmu *mmu); -extern void __kvm_tlb_flush_local_vmid(struct kvm_s2_mmu *mmu); extern void __kvm_timer_set_cntvoff(u64 cntvoff); diff --git a/arch/arm64/kvm/arm.c b/arch/arm64/kvm/arm.c index bb85da1d5880..a391b984dd05 100644 --- a/arch/arm64/kvm/arm.c +++ b/arch/arm64/kvm/arm.c @@ -385,11 +385,16 @@ void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu) last_ran = this_cpu_ptr(mmu->last_vcpu_ran); /* + * We guarantee that both TLBs and I-cache are private to each + * vcpu. If detecting that a vcpu from the same VM has + * previously run on the same physical CPU, call into the + * hypervisor code to nuke the relevant contexts. + * * We might get preempted before the vCPU actually runs, but * over-invalidation doesn't affect correctness. */ if (*last_ran != vcpu->vcpu_id) { - kvm_call_hyp(__kvm_tlb_flush_local_vmid, mmu); + kvm_call_hyp(__kvm_flush_cpu_context, mmu); *last_ran = vcpu->vcpu_id; } diff --git a/arch/arm64/kvm/hyp/nvhe/hyp-main.c b/arch/arm64/kvm/hyp/nvhe/hyp-main.c index 8f129968204e..936328207bde 100644 --- a/arch/arm64/kvm/hyp/nvhe/hyp-main.c +++ b/arch/arm64/kvm/hyp/nvhe/hyp-main.c @@ -46,11 +46,11 @@ static void handle___kvm_tlb_flush_vmid(struct kvm_cpu_context *host_ctxt) __kvm_tlb_flush_vmid(kern_hyp_va(mmu)); } -static void handle___kvm_tlb_flush_local_vmid(struct kvm_cpu_context *host_ctxt) +static void handle___kvm_flush_cpu_context(struct kvm_cpu_context *host_ctxt) { DECLARE_REG(struct kvm_s2_mmu *, mmu, host_ctxt, 1); - __kvm_tlb_flush_local_vmid(kern_hyp_va(mmu)); + __kvm_flush_cpu_context(kern_hyp_va(mmu)); } static void handle___kvm_timer_set_cntvoff(struct kvm_cpu_context *host_ctxt) @@ -115,7 +115,7 @@ static const hcall_t host_hcall[] = { HANDLE_FUNC(__kvm_flush_vm_context), HANDLE_FUNC(__kvm_tlb_flush_vmid_ipa), HANDLE_FUNC(__kvm_tlb_flush_vmid), - HANDLE_FUNC(__kvm_tlb_flush_local_vmid), + HANDLE_FUNC(__kvm_flush_cpu_context), HANDLE_FUNC(__kvm_timer_set_cntvoff), HANDLE_FUNC(__kvm_enable_ssbs), HANDLE_FUNC(__vgic_v3_get_gic_config), diff --git a/arch/arm64/kvm/hyp/nvhe/tlb.c b/arch/arm64/kvm/hyp/nvhe/tlb.c index fbde89a2c6e8..229b06748c20 100644 --- a/arch/arm64/kvm/hyp/nvhe/tlb.c +++ b/arch/arm64/kvm/hyp/nvhe/tlb.c @@ -123,7 +123,7 @@ void __kvm_tlb_flush_vmid(struct kvm_s2_mmu *mmu) __tlb_switch_to_host(&cxt); } -void __kvm_tlb_flush_local_vmid(struct kvm_s2_mmu *mmu) +void __kvm_flush_cpu_context(struct kvm_s2_mmu *mmu) { struct tlb_inv_context cxt; @@ -131,6 +131,7 @@ void __kvm_tlb_flush_local_vmid(struct kvm_s2_mmu *mmu) __tlb_switch_to_guest(mmu, &cxt); __tlbi(vmalle1); + asm volatile("ic iallu"); dsb(nsh); isb(); diff --git a/arch/arm64/kvm/hyp/vhe/tlb.c b/arch/arm64/kvm/hyp/vhe/tlb.c index fd7895945bbc..66f17349f0c3 100644 --- a/arch/arm64/kvm/hyp/vhe/tlb.c +++ b/arch/arm64/kvm/hyp/vhe/tlb.c @@ -127,7 +127,7 @@ void __kvm_tlb_flush_vmid(struct kvm_s2_mmu *mmu) __tlb_switch_to_host(&cxt); } -void __kvm_tlb_flush_local_vmid(struct kvm_s2_mmu *mmu) +void __kvm_flush_cpu_context(struct kvm_s2_mmu *mmu) { struct tlb_inv_context cxt; @@ -135,6 +135,7 @@ void __kvm_tlb_flush_local_vmid(struct kvm_s2_mmu *mmu) __tlb_switch_to_guest(mmu, &cxt); __tlbi(vmalle1); + asm volatile("ic iallu"); dsb(nsh); isb(); -- cgit v1.2.3 From 7aed41cff35a9aaf3431b8c0c23daa7d8bb77cd3 Mon Sep 17 00:00:00 2001 From: Michael Ellerman Date: Tue, 9 Mar 2021 17:53:15 +1100 Subject: powerpc/64s: Use symbolic macros for function entry encoding In ppc_function_entry() we look for a specific set of instructions by masking the instructions and comparing with a known value. Currently those known values are just literal hex values, and we recently discovered one of them was wrong. Instead construct the values using the existing constants we have for defining various fields of instructions. Suggested-by: Christophe Leroy Signed-off-by: Michael Ellerman Acked-by: Naveen N. Rao Link: https://lore.kernel.org/r/20210309071544.515303-1-mpe@ellerman.id.au --- arch/powerpc/include/asm/code-patching.h | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) (limited to 'arch') diff --git a/arch/powerpc/include/asm/code-patching.h b/arch/powerpc/include/asm/code-patching.h index d5b3c3bb95b4..f1d029bf906e 100644 --- a/arch/powerpc/include/asm/code-patching.h +++ b/arch/powerpc/include/asm/code-patching.h @@ -73,9 +73,10 @@ void __patch_exception(int exc, unsigned long addr); #endif #define OP_RT_RA_MASK 0xffff0000UL -#define LIS_R2 0x3c400000UL -#define ADDIS_R2_R12 0x3c4c0000UL -#define ADDI_R2_R2 0x38420000UL +#define LIS_R2 (PPC_INST_ADDIS | __PPC_RT(R2)) +#define ADDIS_R2_R12 (PPC_INST_ADDIS | __PPC_RT(R2) | __PPC_RA(R12)) +#define ADDI_R2_R2 (PPC_INST_ADDI | __PPC_RT(R2) | __PPC_RA(R2)) + static inline unsigned long ppc_function_entry(void *func) { -- cgit v1.2.3 From 73ac79881804eed2e9d76ecdd1018037f8510cb1 Mon Sep 17 00:00:00 2001 From: Nicholas Piggin Date: Mon, 8 Mar 2021 18:55:30 +1000 Subject: powerpc: Fix inverted SET_FULL_REGS bitop This bit operation was inverted and set the low bit rather than cleared it, breaking the ability to ptrace non-volatile GPRs after exec. Fix. Only affects 64e and 32-bit. Fixes: feb9df3462e6 ("powerpc/64s: Always has full regs, so remove remnant checks") Cc: stable@vger.kernel.org # v5.8+ Signed-off-by: Nicholas Piggin Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20210308085530.3191843-1-npiggin@gmail.com --- arch/powerpc/include/asm/ptrace.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'arch') diff --git a/arch/powerpc/include/asm/ptrace.h b/arch/powerpc/include/asm/ptrace.h index 975ba260006a..1499e928ea6a 100644 --- a/arch/powerpc/include/asm/ptrace.h +++ b/arch/powerpc/include/asm/ptrace.h @@ -195,7 +195,7 @@ static inline void regs_set_return_value(struct pt_regs *regs, unsigned long rc) #define TRAP_FLAGS_MASK 0x11 #define TRAP(regs) ((regs)->trap & ~TRAP_FLAGS_MASK) #define FULL_REGS(regs) (((regs)->trap & 1) == 0) -#define SET_FULL_REGS(regs) ((regs)->trap |= 1) +#define SET_FULL_REGS(regs) ((regs)->trap &= ~1) #endif #define CHECK_FULL_REGS(regs) BUG_ON(!FULL_REGS(regs)) #define NV_REG_POISON 0xdeadbeefdeadbeefUL @@ -210,7 +210,7 @@ static inline void regs_set_return_value(struct pt_regs *regs, unsigned long rc) #define TRAP_FLAGS_MASK 0x1F #define TRAP(regs) ((regs)->trap & ~TRAP_FLAGS_MASK) #define FULL_REGS(regs) (((regs)->trap & 1) == 0) -#define SET_FULL_REGS(regs) ((regs)->trap |= 1) +#define SET_FULL_REGS(regs) ((regs)->trap &= ~1) #define IS_CRITICAL_EXC(regs) (((regs)->trap & 2) != 0) #define IS_MCHECK_EXC(regs) (((regs)->trap & 4) != 0) #define IS_DEBUG_EXC(regs) (((regs)->trap & 8) != 0) -- cgit v1.2.3 From c080a173301ffc62cb6c76308c803c7fee05517a Mon Sep 17 00:00:00 2001 From: Daniel Axtens Date: Thu, 25 Feb 2021 14:09:59 +1100 Subject: powerpc/64s/exception: Clean up a missed SRR specifier Nick's patch cleaning up the SRR specifiers in exception-64s.S missed a single instance of EXC_HV_OR_STD. Clean that up. Caught by clang's integrated assembler. Fixes: 3f7fbd97d07d ("powerpc/64s/exception: Clean up SRR specifiers") Signed-off-by: Daniel Axtens Acked-by: Nicholas Piggin Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20210225031006.1204774-2-dja@axtens.net --- arch/powerpc/kernel/exceptions-64s.S | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/powerpc/kernel/exceptions-64s.S b/arch/powerpc/kernel/exceptions-64s.S index 60d3051a8bc8..8082b690e874 100644 --- a/arch/powerpc/kernel/exceptions-64s.S +++ b/arch/powerpc/kernel/exceptions-64s.S @@ -466,7 +466,7 @@ DEFINE_FIXED_SYMBOL(\name\()_common_real) ld r10,PACAKMSR(r13) /* get MSR value for kernel */ /* MSR[RI] is clear iff using SRR regs */ - .if IHSRR == EXC_HV_OR_STD + .if IHSRR_IF_HVMODE BEGIN_FTR_SECTION xori r10,r10,MSR_RI END_FTR_SECTION_IFCLR(CPU_FTR_HVMODE) -- cgit v1.2.3 From bd73758803c2eedc037c2268b65a19542a832594 Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Tue, 9 Mar 2021 08:39:39 +0000 Subject: powerpc: Fix missing declaration of [en/dis]able_kernel_vsx() Add stub instances of enable_kernel_vsx() and disable_kernel_vsx() when CONFIG_VSX is not set, to avoid following build failure. CC [M] drivers/gpu/drm/amd/amdgpu/../display/dc/calcs/dcn_calcs.o In file included from ./drivers/gpu/drm/amd/amdgpu/../display/dc/dm_services_types.h:29, from ./drivers/gpu/drm/amd/amdgpu/../display/dc/dm_services.h:37, from drivers/gpu/drm/amd/amdgpu/../display/dc/calcs/dcn_calcs.c:27: drivers/gpu/drm/amd/amdgpu/../display/dc/calcs/dcn_calcs.c: In function 'dcn_bw_apply_registry_override': ./drivers/gpu/drm/amd/amdgpu/../display/dc/os_types.h:64:3: error: implicit declaration of function 'enable_kernel_vsx'; did you mean 'enable_kernel_fp'? [-Werror=implicit-function-declaration] 64 | enable_kernel_vsx(); \ | ^~~~~~~~~~~~~~~~~ drivers/gpu/drm/amd/amdgpu/../display/dc/calcs/dcn_calcs.c:640:2: note: in expansion of macro 'DC_FP_START' 640 | DC_FP_START(); | ^~~~~~~~~~~ ./drivers/gpu/drm/amd/amdgpu/../display/dc/os_types.h:75:3: error: implicit declaration of function 'disable_kernel_vsx'; did you mean 'disable_kernel_fp'? [-Werror=implicit-function-declaration] 75 | disable_kernel_vsx(); \ | ^~~~~~~~~~~~~~~~~~ drivers/gpu/drm/amd/amdgpu/../display/dc/calcs/dcn_calcs.c:676:2: note: in expansion of macro 'DC_FP_END' 676 | DC_FP_END(); | ^~~~~~~~~ cc1: some warnings being treated as errors make[5]: *** [drivers/gpu/drm/amd/amdgpu/../display/dc/calcs/dcn_calcs.o] Error 1 This works because the caller is checking if VSX is available using cpu_has_feature(): #define DC_FP_START() { \ if (cpu_has_feature(CPU_FTR_VSX_COMP)) { \ preempt_disable(); \ enable_kernel_vsx(); \ } else if (cpu_has_feature(CPU_FTR_ALTIVEC_COMP)) { \ preempt_disable(); \ enable_kernel_altivec(); \ } else if (!cpu_has_feature(CPU_FTR_FPU_UNAVAILABLE)) { \ preempt_disable(); \ enable_kernel_fp(); \ } \ When CONFIG_VSX is not selected, cpu_has_feature(CPU_FTR_VSX_COMP) constant folds to 'false' so the call to enable_kernel_vsx() is discarded and the build succeeds. Fixes: 16a9dea110a6 ("amdgpu: Enable initial DCN support on POWER") Cc: stable@vger.kernel.org # v5.6+ Reported-by: Geert Uytterhoeven Reported-by: kernel test robot Signed-off-by: Christophe Leroy [mpe: Incorporate some discussion comments into the change log] Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/8d7d285a027e9d21f5ff7f850fa71a2655b0c4af.1615279170.git.christophe.leroy@csgroup.eu --- arch/powerpc/include/asm/switch_to.h | 10 ++++++++++ 1 file changed, 10 insertions(+) (limited to 'arch') diff --git a/arch/powerpc/include/asm/switch_to.h b/arch/powerpc/include/asm/switch_to.h index fdab93428372..9d1fbd8be1c7 100644 --- a/arch/powerpc/include/asm/switch_to.h +++ b/arch/powerpc/include/asm/switch_to.h @@ -71,6 +71,16 @@ static inline void disable_kernel_vsx(void) { msr_check_and_clear(MSR_FP|MSR_VEC|MSR_VSX); } +#else +static inline void enable_kernel_vsx(void) +{ + BUILD_BUG(); +} + +static inline void disable_kernel_vsx(void) +{ + BUILD_BUG(); +} #endif #ifdef CONFIG_SPE -- cgit v1.2.3 From e5e8b80d352ec999d2bba3ea584f541c83f4ca3f Mon Sep 17 00:00:00 2001 From: Rob Gardner Date: Sun, 28 Feb 2021 22:48:16 -0700 Subject: sparc64: Fix opcode filtering in handling of no fault loads is_no_fault_exception() has two bugs which were discovered via random opcode testing with stress-ng. Both are caused by improper filtering of opcodes. The first bug can be triggered by a floating point store with a no-fault ASI, for instance "sta %f0, [%g0] #ASI_PNF", opcode C1A01040. The code first tests op3[5] (0x1000000), which denotes a floating point instruction, and then tests op3[2] (0x200000), which denotes a store instruction. But these bits are not mutually exclusive, and the above mentioned opcode has both bits set. The intent is to filter out stores, so the test for stores must be done first in order to have any effect. The second bug can be triggered by a floating point load with one of the invalid ASI values 0x8e or 0x8f, which pass this check in is_no_fault_exception(): if ((asi & 0xf2) == ASI_PNF) An example instruction is "ldqa [%l7 + %o7] #ASI 0x8f, %f38", opcode CF95D1EF. Asi values greater than 0x8b (ASI_SNFL) are fatal in handle_ldf_stq(), and is_no_fault_exception() must not allow these invalid asi values to make it that far. In both of these cases, handle_ldf_stq() reacts by calling sun4v_data_access_exception() or spitfire_data_access_exception(), which call is_no_fault_exception() and results in an infinite recursion. Signed-off-by: Rob Gardner Tested-by: Anatoly Pugachev Signed-off-by: David S. Miller --- arch/sparc/kernel/traps_64.c | 13 ++++++------- 1 file changed, 6 insertions(+), 7 deletions(-) (limited to 'arch') diff --git a/arch/sparc/kernel/traps_64.c b/arch/sparc/kernel/traps_64.c index d92e5eaa4c1d..a850dccd78ea 100644 --- a/arch/sparc/kernel/traps_64.c +++ b/arch/sparc/kernel/traps_64.c @@ -275,14 +275,13 @@ bool is_no_fault_exception(struct pt_regs *regs) asi = (regs->tstate >> 24); /* saved %asi */ else asi = (insn >> 5); /* immediate asi */ - if ((asi & 0xf2) == ASI_PNF) { - if (insn & 0x1000000) { /* op3[5:4]=3 */ - handle_ldf_stq(insn, regs); - return true; - } else if (insn & 0x200000) { /* op3[2], stores */ + if ((asi & 0xf6) == ASI_PNF) { + if (insn & 0x200000) /* op3[2], stores */ return false; - } - handle_ld_nf(insn, regs); + if (insn & 0x1000000) /* op3[5:4]=3 (fp) */ + handle_ldf_stq(insn, regs); + else + handle_ld_nf(insn, regs); return true; } } -- cgit v1.2.3 From 69264b4a43aff7307283e2bae29e9305ab6b7d47 Mon Sep 17 00:00:00 2001 From: Corentin Labbe Date: Mon, 8 Mar 2021 09:51:26 +0000 Subject: sparc: sparc64_defconfig: remove duplicate CONFIGs After my patch there is CONFIG_ATA defined twice. Remove the duplicate one. Same problem for CONFIG_HAPPYMEAL, except I added as builtin for boot test with NFS. Reported-by: Stephen Rothwell Fixes: a57cdeb369ef ("sparc: sparc64_defconfig: add necessary configs for qemu") Signed-off-by: Corentin Labbe Signed-off-by: David S. Miller --- arch/sparc/configs/sparc64_defconfig | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) (limited to 'arch') diff --git a/arch/sparc/configs/sparc64_defconfig b/arch/sparc/configs/sparc64_defconfig index 148f44b33890..12a4fb0bd52a 100644 --- a/arch/sparc/configs/sparc64_defconfig +++ b/arch/sparc/configs/sparc64_defconfig @@ -93,7 +93,7 @@ CONFIG_NETDEVICES=y CONFIG_NET_ETHERNET=y CONFIG_MII=m CONFIG_SUNLANCE=m -CONFIG_HAPPYMEAL=m +CONFIG_HAPPYMEAL=y CONFIG_SUNGEM=m CONFIG_SUNVNET=m CONFIG_LDMVSW=m @@ -234,9 +234,7 @@ CONFIG_CRYPTO_TWOFISH=m CONFIG_CRC16=m CONFIG_LIBCRC32C=m CONFIG_VCC=m -CONFIG_ATA=y CONFIG_PATA_CMD64X=y -CONFIG_HAPPYMEAL=y CONFIG_IP_PNP=y CONFIG_IP_PNP_DHCP=y CONFIG_DEVTMPFS=y -- cgit v1.2.3 From d15dfd31384ba3cb93150e5f87661a76fa419f74 Mon Sep 17 00:00:00 2001 From: Catalin Marinas Date: Tue, 9 Mar 2021 12:26:01 +0000 Subject: arm64: mte: Map hotplugged memory as Normal Tagged In a system supporting MTE, the linear map must allow reading/writing allocation tags by setting the memory type as Normal Tagged. Currently, this is only handled for memory present at boot. Hotplugged memory uses Normal non-Tagged memory. Introduce pgprot_mhp() for hotplugged memory and use it in add_memory_resource(). The arm64 code maps pgprot_mhp() to pgprot_tagged(). Note that ZONE_DEVICE memory should not be mapped as Tagged and therefore setting the memory type in arch_add_memory() is not feasible. Signed-off-by: Catalin Marinas Fixes: 0178dc761368 ("arm64: mte: Use Normal Tagged attributes for the linear map") Reported-by: Patrick Daly Tested-by: Patrick Daly Link: https://lore.kernel.org/r/1614745263-27827-1-git-send-email-pdaly@codeaurora.org Cc: # 5.10.x Cc: Will Deacon Cc: Andrew Morton Cc: Vincenzo Frascino Cc: David Hildenbrand Reviewed-by: David Hildenbrand Reviewed-by: Vincenzo Frascino Reviewed-by: Anshuman Khandual Link: https://lore.kernel.org/r/20210309122601.5543-1-catalin.marinas@arm.com Signed-off-by: Will Deacon --- arch/arm64/include/asm/pgtable-prot.h | 1 - arch/arm64/include/asm/pgtable.h | 3 +++ arch/arm64/mm/mmu.c | 3 ++- include/linux/pgtable.h | 4 ++++ mm/memory_hotplug.c | 2 +- 5 files changed, 10 insertions(+), 3 deletions(-) (limited to 'arch') diff --git a/arch/arm64/include/asm/pgtable-prot.h b/arch/arm64/include/asm/pgtable-prot.h index 046be789fbb4..9a65fb528110 100644 --- a/arch/arm64/include/asm/pgtable-prot.h +++ b/arch/arm64/include/asm/pgtable-prot.h @@ -66,7 +66,6 @@ extern bool arm64_use_ng_mappings; #define _PAGE_DEFAULT (_PROT_DEFAULT | PTE_ATTRINDX(MT_NORMAL)) #define PAGE_KERNEL __pgprot(PROT_NORMAL) -#define PAGE_KERNEL_TAGGED __pgprot(PROT_NORMAL_TAGGED) #define PAGE_KERNEL_RO __pgprot((PROT_NORMAL & ~PTE_WRITE) | PTE_RDONLY) #define PAGE_KERNEL_ROX __pgprot((PROT_NORMAL & ~(PTE_WRITE | PTE_PXN)) | PTE_RDONLY) #define PAGE_KERNEL_EXEC __pgprot(PROT_NORMAL & ~PTE_PXN) diff --git a/arch/arm64/include/asm/pgtable.h b/arch/arm64/include/asm/pgtable.h index e17b96d0e4b5..47027796c2f9 100644 --- a/arch/arm64/include/asm/pgtable.h +++ b/arch/arm64/include/asm/pgtable.h @@ -486,6 +486,9 @@ static inline pmd_t pmd_mkdevmap(pmd_t pmd) __pgprot_modify(prot, PTE_ATTRINDX_MASK, PTE_ATTRINDX(MT_NORMAL_NC) | PTE_PXN | PTE_UXN) #define pgprot_device(prot) \ __pgprot_modify(prot, PTE_ATTRINDX_MASK, PTE_ATTRINDX(MT_DEVICE_nGnRE) | PTE_PXN | PTE_UXN) +#define pgprot_tagged(prot) \ + __pgprot_modify(prot, PTE_ATTRINDX_MASK, PTE_ATTRINDX(MT_NORMAL_TAGGED)) +#define pgprot_mhp pgprot_tagged /* * DMA allocations for non-coherent devices use what the Arm architecture calls * "Normal non-cacheable" memory, which permits speculation, unaligned accesses diff --git a/arch/arm64/mm/mmu.c b/arch/arm64/mm/mmu.c index 3802cfbdd20d..9c8aa1b44cd5 100644 --- a/arch/arm64/mm/mmu.c +++ b/arch/arm64/mm/mmu.c @@ -512,7 +512,8 @@ static void __init map_mem(pgd_t *pgdp) * if MTE is present. Otherwise, it has the same attributes as * PAGE_KERNEL. */ - __map_memblock(pgdp, start, end, PAGE_KERNEL_TAGGED, flags); + __map_memblock(pgdp, start, end, pgprot_tagged(PAGE_KERNEL), + flags); } /* diff --git a/include/linux/pgtable.h b/include/linux/pgtable.h index cdfc4e9f253e..5e772392a379 100644 --- a/include/linux/pgtable.h +++ b/include/linux/pgtable.h @@ -904,6 +904,10 @@ static inline void ptep_modify_prot_commit(struct vm_area_struct *vma, #define pgprot_device pgprot_noncached #endif +#ifndef pgprot_mhp +#define pgprot_mhp(prot) (prot) +#endif + #ifdef CONFIG_MMU #ifndef pgprot_modify #define pgprot_modify pgprot_modify diff --git a/mm/memory_hotplug.c b/mm/memory_hotplug.c index 5ba51a8bdaeb..0cdbbfbc5757 100644 --- a/mm/memory_hotplug.c +++ b/mm/memory_hotplug.c @@ -1072,7 +1072,7 @@ static int online_memory_block(struct memory_block *mem, void *arg) */ int __ref add_memory_resource(int nid, struct resource *res, mhp_t mhp_flags) { - struct mhp_params params = { .pgprot = PAGE_KERNEL }; + struct mhp_params params = { .pgprot = pgprot_mhp(PAGE_KERNEL) }; u64 start, size; bool new_node = false; int ret; -- cgit v1.2.3 From 26f55386f964cefa92ab7ccbed68f1a313074215 Mon Sep 17 00:00:00 2001 From: James Morse Date: Wed, 10 Mar 2021 11:23:10 +0530 Subject: arm64/mm: Fix __enable_mmu() for new TGRAN range values As per ARM ARM DDI 0487G.a, when FEAT_LPA2 is implemented, ID_AA64MMFR0_EL1 might contain a range of values to describe supported translation granules (4K and 16K pages sizes in particular) instead of just enabled or disabled values. This changes __enable_mmu() function to handle complete acceptable range of values (depending on whether the field is signed or unsigned) now represented with ID_AA64MMFR0_TGRAN_SUPPORTED_[MIN..MAX] pair. While here, also fix similar situations in EFI stub and KVM as well. Cc: Catalin Marinas Cc: Will Deacon Cc: Marc Zyngier Cc: James Morse Cc: Suzuki K Poulose Cc: Ard Biesheuvel Cc: Mark Rutland Cc: linux-arm-kernel@lists.infradead.org Cc: kvmarm@lists.cs.columbia.edu Cc: linux-efi@vger.kernel.org Cc: linux-kernel@vger.kernel.org Acked-by: Marc Zyngier Signed-off-by: James Morse Signed-off-by: Anshuman Khandual Link: https://lore.kernel.org/r/1615355590-21102-1-git-send-email-anshuman.khandual@arm.com Signed-off-by: Will Deacon --- arch/arm64/include/asm/sysreg.h | 20 ++++++++++++++------ arch/arm64/kernel/head.S | 6 ++++-- arch/arm64/kvm/reset.c | 10 ++++++---- drivers/firmware/efi/libstub/arm64-stub.c | 2 +- 4 files changed, 25 insertions(+), 13 deletions(-) (limited to 'arch') diff --git a/arch/arm64/include/asm/sysreg.h b/arch/arm64/include/asm/sysreg.h index dfd4edbfe360..d4a5fca984c3 100644 --- a/arch/arm64/include/asm/sysreg.h +++ b/arch/arm64/include/asm/sysreg.h @@ -796,6 +796,11 @@ #define ID_AA64MMFR0_PARANGE_48 0x5 #define ID_AA64MMFR0_PARANGE_52 0x6 +#define ID_AA64MMFR0_TGRAN_2_SUPPORTED_DEFAULT 0x0 +#define ID_AA64MMFR0_TGRAN_2_SUPPORTED_NONE 0x1 +#define ID_AA64MMFR0_TGRAN_2_SUPPORTED_MIN 0x2 +#define ID_AA64MMFR0_TGRAN_2_SUPPORTED_MAX 0x7 + #ifdef CONFIG_ARM64_PA_BITS_52 #define ID_AA64MMFR0_PARANGE_MAX ID_AA64MMFR0_PARANGE_52 #else @@ -961,14 +966,17 @@ #define ID_PFR1_PROGMOD_SHIFT 0 #if defined(CONFIG_ARM64_4K_PAGES) -#define ID_AA64MMFR0_TGRAN_SHIFT ID_AA64MMFR0_TGRAN4_SHIFT -#define ID_AA64MMFR0_TGRAN_SUPPORTED ID_AA64MMFR0_TGRAN4_SUPPORTED +#define ID_AA64MMFR0_TGRAN_SHIFT ID_AA64MMFR0_TGRAN4_SHIFT +#define ID_AA64MMFR0_TGRAN_SUPPORTED_MIN ID_AA64MMFR0_TGRAN4_SUPPORTED +#define ID_AA64MMFR0_TGRAN_SUPPORTED_MAX 0x7 #elif defined(CONFIG_ARM64_16K_PAGES) -#define ID_AA64MMFR0_TGRAN_SHIFT ID_AA64MMFR0_TGRAN16_SHIFT -#define ID_AA64MMFR0_TGRAN_SUPPORTED ID_AA64MMFR0_TGRAN16_SUPPORTED +#define ID_AA64MMFR0_TGRAN_SHIFT ID_AA64MMFR0_TGRAN16_SHIFT +#define ID_AA64MMFR0_TGRAN_SUPPORTED_MIN ID_AA64MMFR0_TGRAN16_SUPPORTED +#define ID_AA64MMFR0_TGRAN_SUPPORTED_MAX 0xF #elif defined(CONFIG_ARM64_64K_PAGES) -#define ID_AA64MMFR0_TGRAN_SHIFT ID_AA64MMFR0_TGRAN64_SHIFT -#define ID_AA64MMFR0_TGRAN_SUPPORTED ID_AA64MMFR0_TGRAN64_SUPPORTED +#define ID_AA64MMFR0_TGRAN_SHIFT ID_AA64MMFR0_TGRAN64_SHIFT +#define ID_AA64MMFR0_TGRAN_SUPPORTED_MIN ID_AA64MMFR0_TGRAN64_SUPPORTED +#define ID_AA64MMFR0_TGRAN_SUPPORTED_MAX 0x7 #endif #define MVFR2_FPMISC_SHIFT 4 diff --git a/arch/arm64/kernel/head.S b/arch/arm64/kernel/head.S index 66b0e0b66e31..8b469f164091 100644 --- a/arch/arm64/kernel/head.S +++ b/arch/arm64/kernel/head.S @@ -655,8 +655,10 @@ SYM_FUNC_END(__secondary_too_slow) SYM_FUNC_START(__enable_mmu) mrs x2, ID_AA64MMFR0_EL1 ubfx x2, x2, #ID_AA64MMFR0_TGRAN_SHIFT, 4 - cmp x2, #ID_AA64MMFR0_TGRAN_SUPPORTED - b.ne __no_granule_support + cmp x2, #ID_AA64MMFR0_TGRAN_SUPPORTED_MIN + b.lt __no_granule_support + cmp x2, #ID_AA64MMFR0_TGRAN_SUPPORTED_MAX + b.gt __no_granule_support update_early_cpu_boot_status 0, x2, x3 adrp x2, idmap_pg_dir phys_to_ttbr x1, x1 diff --git a/arch/arm64/kvm/reset.c b/arch/arm64/kvm/reset.c index 47f3f035f3ea..e81c7ec9e102 100644 --- a/arch/arm64/kvm/reset.c +++ b/arch/arm64/kvm/reset.c @@ -311,16 +311,18 @@ int kvm_set_ipa_limit(void) } switch (cpuid_feature_extract_unsigned_field(mmfr0, tgran_2)) { - default: - case 1: + case ID_AA64MMFR0_TGRAN_2_SUPPORTED_NONE: kvm_err("PAGE_SIZE not supported at Stage-2, giving up\n"); return -EINVAL; - case 0: + case ID_AA64MMFR0_TGRAN_2_SUPPORTED_DEFAULT: kvm_debug("PAGE_SIZE supported at Stage-2 (default)\n"); break; - case 2: + case ID_AA64MMFR0_TGRAN_2_SUPPORTED_MIN ... ID_AA64MMFR0_TGRAN_2_SUPPORTED_MAX: kvm_debug("PAGE_SIZE supported at Stage-2 (advertised)\n"); break; + default: + kvm_err("Unsupported value for TGRAN_2, giving up\n"); + return -EINVAL; } kvm_ipa_limit = id_aa64mmfr0_parange_to_phys_shift(parange); diff --git a/drivers/firmware/efi/libstub/arm64-stub.c b/drivers/firmware/efi/libstub/arm64-stub.c index b69d63143e0d..7bf0a7acae5e 100644 --- a/drivers/firmware/efi/libstub/arm64-stub.c +++ b/drivers/firmware/efi/libstub/arm64-stub.c @@ -24,7 +24,7 @@ efi_status_t check_platform_features(void) return EFI_SUCCESS; tg = (read_cpuid(ID_AA64MMFR0_EL1) >> ID_AA64MMFR0_TGRAN_SHIFT) & 0xf; - if (tg != ID_AA64MMFR0_TGRAN_SUPPORTED) { + if (tg < ID_AA64MMFR0_TGRAN_SUPPORTED_MIN || tg > ID_AA64MMFR0_TGRAN_SUPPORTED_MAX) { if (IS_ENABLED(CONFIG_ARM64_64K_PAGES)) efi_err("This 64 KB granular kernel is not supported by your CPU\n"); else -- cgit v1.2.3 From 7bb8bc6eb550116c504fb25af8678b9d7ca2abc5 Mon Sep 17 00:00:00 2001 From: Rob Herring Date: Tue, 9 Mar 2021 17:44:12 -0700 Subject: arm64: perf: Fix 64-bit event counter read truncation Commit 0fdf1bb75953 ("arm64: perf: Avoid PMXEV* indirection") changed armv8pmu_read_evcntr() to return a u32 instead of u64. The result is silent truncation of the event counter when using 64-bit counters. Given the offending commit appears to have passed thru several folks, it seems likely this was a bad rebase after v8.5 PMU 64-bit counters landed. Cc: Alexandru Elisei Cc: Julien Thierry Cc: Mark Rutland Cc: Will Deacon Cc: Catalin Marinas Cc: Peter Zijlstra Cc: Ingo Molnar Cc: Arnaldo Carvalho de Melo Cc: Alexander Shishkin Cc: Jiri Olsa Cc: Namhyung Kim Cc: Fixes: 0fdf1bb75953 ("arm64: perf: Avoid PMXEV* indirection") Signed-off-by: Rob Herring Acked-by: Mark Rutland Reviewed-by: Alexandru Elisei Link: https://lore.kernel.org/r/20210310004412.1450128-1-robh@kernel.org Signed-off-by: Will Deacon --- arch/arm64/kernel/perf_event.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/arm64/kernel/perf_event.c b/arch/arm64/kernel/perf_event.c index 7d2318f80955..4658fcf88c2b 100644 --- a/arch/arm64/kernel/perf_event.c +++ b/arch/arm64/kernel/perf_event.c @@ -460,7 +460,7 @@ static inline int armv8pmu_counter_has_overflowed(u32 pmnc, int idx) return pmnc & BIT(ARMV8_IDX_TO_COUNTER(idx)); } -static inline u32 armv8pmu_read_evcntr(int idx) +static inline u64 armv8pmu_read_evcntr(int idx) { u32 counter = ARMV8_IDX_TO_COUNTER(idx); -- cgit v1.2.3 From c8e2fe13d1d1f3a02842b7b909d4e4846a4b6a2c Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Tue, 9 Mar 2021 09:10:19 -0800 Subject: x86/perf: Use RET0 as default for guest_get_msrs to handle "no PMU" case Initialize x86_pmu.guest_get_msrs to return 0/NULL to handle the "nop" case. Patching in perf_guest_get_msrs_nop() during setup does not work if there is no PMU, as setup bails before updating the static calls, leaving x86_pmu.guest_get_msrs NULL and thus a complete nop. Ultimately, this causes VMX abort on VM-Exit due to KVM putting random garbage from the stack into the MSR load list. Add a comment in KVM to note that nr_msrs is valid if and only if the return value is non-NULL. Fixes: abd562df94d1 ("x86/perf: Use static_call for x86_pmu.guest_get_msrs") Reported-by: Dmitry Vyukov Reported-by: syzbot+cce9ef2dd25246f815ee@syzkaller.appspotmail.com Suggested-by: Peter Zijlstra Signed-off-by: Sean Christopherson Signed-off-by: Peter Zijlstra (Intel) Link: https://lkml.kernel.org/r/20210309171019.1125243-1-seanjc@google.com --- arch/x86/events/core.c | 15 ++++++--------- arch/x86/kvm/vmx/vmx.c | 2 +- 2 files changed, 7 insertions(+), 10 deletions(-) (limited to 'arch') diff --git a/arch/x86/events/core.c b/arch/x86/events/core.c index 6ddeed3cd2ac..18df17129695 100644 --- a/arch/x86/events/core.c +++ b/arch/x86/events/core.c @@ -81,7 +81,11 @@ DEFINE_STATIC_CALL_NULL(x86_pmu_swap_task_ctx, *x86_pmu.swap_task_ctx); DEFINE_STATIC_CALL_NULL(x86_pmu_drain_pebs, *x86_pmu.drain_pebs); DEFINE_STATIC_CALL_NULL(x86_pmu_pebs_aliases, *x86_pmu.pebs_aliases); -DEFINE_STATIC_CALL_NULL(x86_pmu_guest_get_msrs, *x86_pmu.guest_get_msrs); +/* + * This one is magic, it will get called even when PMU init fails (because + * there is no PMU), in which case it should simply return NULL. + */ +DEFINE_STATIC_CALL_RET0(x86_pmu_guest_get_msrs, *x86_pmu.guest_get_msrs); u64 __read_mostly hw_cache_event_ids [PERF_COUNT_HW_CACHE_MAX] @@ -1944,13 +1948,6 @@ static void _x86_pmu_read(struct perf_event *event) x86_perf_event_update(event); } -static inline struct perf_guest_switch_msr * -perf_guest_get_msrs_nop(int *nr) -{ - *nr = 0; - return NULL; -} - static int __init init_hw_perf_events(void) { struct x86_pmu_quirk *quirk; @@ -2025,7 +2022,7 @@ static int __init init_hw_perf_events(void) x86_pmu.read = _x86_pmu_read; if (!x86_pmu.guest_get_msrs) - x86_pmu.guest_get_msrs = perf_guest_get_msrs_nop; + x86_pmu.guest_get_msrs = (void *)&__static_call_return0; x86_pmu_static_call_update(); diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c index 50810d471462..32cf8287d4a7 100644 --- a/arch/x86/kvm/vmx/vmx.c +++ b/arch/x86/kvm/vmx/vmx.c @@ -6580,8 +6580,8 @@ static void atomic_switch_perf_msrs(struct vcpu_vmx *vmx) int i, nr_msrs; struct perf_guest_switch_msr *msrs; + /* Note, nr_msrs may be garbage if perf_guest_get_msrs() returns NULL. */ msrs = perf_guest_get_msrs(&nr_msrs); - if (!msrs) return; -- cgit v1.2.3 From 0f9b05b9a01a4cf6b6bdf904faacf4796e2aa232 Mon Sep 17 00:00:00 2001 From: Jan Beulich Date: Tue, 9 Mar 2021 18:00:44 +0100 Subject: Xen: drop exports of {set,clear}_foreign_p2m_mapping() They're only used internally, and the layering violation they contain (x86) or imply (Arm) of calling HYPERVISOR_grant_table_op() strongly advise against any (uncontrolled) use from a module. The functions also never had users except the ones from drivers/xen/grant-table.c forever since their introduction in 3.15. Signed-off-by: Jan Beulich Reviewed-by: Stefano Stabellini Link: https://lore.kernel.org/r/746a5cd6-1446-eda4-8b23-03c1cac30b8d@suse.com Signed-off-by: Boris Ostrovsky --- arch/arm/xen/p2m.c | 2 -- arch/x86/xen/p2m.c | 2 -- 2 files changed, 4 deletions(-) (limited to 'arch') diff --git a/arch/arm/xen/p2m.c b/arch/arm/xen/p2m.c index acb464547a54..5316cf36cb57 100644 --- a/arch/arm/xen/p2m.c +++ b/arch/arm/xen/p2m.c @@ -130,7 +130,6 @@ int set_foreign_p2m_mapping(struct gnttab_map_grant_ref *map_ops, return 0; } -EXPORT_SYMBOL_GPL(set_foreign_p2m_mapping); int clear_foreign_p2m_mapping(struct gnttab_unmap_grant_ref *unmap_ops, struct gnttab_unmap_grant_ref *kunmap_ops, @@ -145,7 +144,6 @@ int clear_foreign_p2m_mapping(struct gnttab_unmap_grant_ref *unmap_ops, return 0; } -EXPORT_SYMBOL_GPL(clear_foreign_p2m_mapping); bool __set_phys_to_machine_multi(unsigned long pfn, unsigned long mfn, unsigned long nr_pages) diff --git a/arch/x86/xen/p2m.c b/arch/x86/xen/p2m.c index a3cc33091f46..e912992e0b77 100644 --- a/arch/x86/xen/p2m.c +++ b/arch/x86/xen/p2m.c @@ -776,7 +776,6 @@ int set_foreign_p2m_mapping(struct gnttab_map_grant_ref *map_ops, out: return ret; } -EXPORT_SYMBOL_GPL(set_foreign_p2m_mapping); int clear_foreign_p2m_mapping(struct gnttab_unmap_grant_ref *unmap_ops, struct gnttab_unmap_grant_ref *kunmap_ops, @@ -802,7 +801,6 @@ int clear_foreign_p2m_mapping(struct gnttab_unmap_grant_ref *unmap_ops, return ret; } -EXPORT_SYMBOL_GPL(clear_foreign_p2m_mapping); #ifdef CONFIG_XEN_DEBUG_FS #include -- cgit v1.2.3 From bce21a2b48ede7cbcab92db18bc956daf1d5c246 Mon Sep 17 00:00:00 2001 From: Jan Beulich Date: Wed, 10 Mar 2021 11:45:26 +0100 Subject: Xen/gnttab: introduce common INVALID_GRANT_{HANDLE,REF} It's not helpful if every driver has to cook its own. Generalize xenbus'es INVALID_GRANT_HANDLE and pcifront's INVALID_GRANT_REF (which shouldn't have expanded to zero to begin with). Use the constants in p2m.c and gntdev.c right away, and update field types where necessary so they would match with the constants' types (albeit without touching struct ioctl_gntdev_grant_ref's ref field, as that's part of the public interface of the kernel and would require introducing a dependency on Xen's grant_table.h public header). Signed-off-by: Jan Beulich Reviewed-by: Juergen Gross Link: https://lore.kernel.org/r/db7c38a5-0d75-d5d1-19de-e5fe9f0b9c48@suse.com Signed-off-by: Boris Ostrovsky --- arch/arm/xen/p2m.c | 3 ++- arch/x86/xen/p2m.c | 4 ++-- drivers/pci/xen-pcifront.c | 4 ++-- drivers/xen/gntdev.c | 24 +++++++++++++----------- include/xen/grant_table.h | 7 +++++++ include/xen/xenbus.h | 1 - 6 files changed, 26 insertions(+), 17 deletions(-) (limited to 'arch') diff --git a/arch/arm/xen/p2m.c b/arch/arm/xen/p2m.c index 5316cf36cb57..84a1cea1f43b 100644 --- a/arch/arm/xen/p2m.c +++ b/arch/arm/xen/p2m.c @@ -11,6 +11,7 @@ #include #include +#include #include #include @@ -109,7 +110,7 @@ int set_foreign_p2m_mapping(struct gnttab_map_grant_ref *map_ops, map_ops[i].status = GNTST_general_error; unmap.host_addr = map_ops[i].host_addr, unmap.handle = map_ops[i].handle; - map_ops[i].handle = ~0; + map_ops[i].handle = INVALID_GRANT_HANDLE; if (map_ops[i].flags & GNTMAP_device_map) unmap.dev_bus_addr = map_ops[i].dev_bus_addr; else diff --git a/arch/x86/xen/p2m.c b/arch/x86/xen/p2m.c index e912992e0b77..17d80f751fcb 100644 --- a/arch/x86/xen/p2m.c +++ b/arch/x86/xen/p2m.c @@ -741,7 +741,7 @@ int set_foreign_p2m_mapping(struct gnttab_map_grant_ref *map_ops, map_ops[i].status = GNTST_general_error; unmap[0].host_addr = map_ops[i].host_addr, unmap[0].handle = map_ops[i].handle; - map_ops[i].handle = ~0; + map_ops[i].handle = INVALID_GRANT_HANDLE; if (map_ops[i].flags & GNTMAP_device_map) unmap[0].dev_bus_addr = map_ops[i].dev_bus_addr; else @@ -751,7 +751,7 @@ int set_foreign_p2m_mapping(struct gnttab_map_grant_ref *map_ops, kmap_ops[i].status = GNTST_general_error; unmap[1].host_addr = kmap_ops[i].host_addr, unmap[1].handle = kmap_ops[i].handle; - kmap_ops[i].handle = ~0; + kmap_ops[i].handle = INVALID_GRANT_HANDLE; if (kmap_ops[i].flags & GNTMAP_device_map) unmap[1].dev_bus_addr = kmap_ops[i].dev_bus_addr; else diff --git a/drivers/pci/xen-pcifront.c b/drivers/pci/xen-pcifront.c index c6fe0cfec0f6..2d7502648219 100644 --- a/drivers/pci/xen-pcifront.c +++ b/drivers/pci/xen-pcifront.c @@ -26,7 +26,7 @@ #include #include -#define INVALID_GRANT_REF (0) + #define INVALID_EVTCHN (-1) struct pci_bus_entry { @@ -42,7 +42,7 @@ struct pcifront_device { struct list_head root_buses; int evtchn; - int gnt_ref; + grant_ref_t gnt_ref; int irq; diff --git a/drivers/xen/gntdev.c b/drivers/xen/gntdev.c index b60ebd8bdb26..ebfd3e7b20d4 100644 --- a/drivers/xen/gntdev.c +++ b/drivers/xen/gntdev.c @@ -186,11 +186,11 @@ struct gntdev_grant_map *gntdev_alloc_map(struct gntdev_priv *priv, int count, goto err; for (i = 0; i < count; i++) { - add->map_ops[i].handle = -1; - add->unmap_ops[i].handle = -1; + add->map_ops[i].handle = INVALID_GRANT_HANDLE; + add->unmap_ops[i].handle = INVALID_GRANT_HANDLE; if (use_ptemod) { - add->kmap_ops[i].handle = -1; - add->kunmap_ops[i].handle = -1; + add->kmap_ops[i].handle = INVALID_GRANT_HANDLE; + add->kunmap_ops[i].handle = INVALID_GRANT_HANDLE; } } @@ -279,7 +279,7 @@ static int find_grant_ptes(pte_t *pte, unsigned long addr, void *data) map->grants[pgnr].ref, map->grants[pgnr].domid); gnttab_set_unmap_op(&map->unmap_ops[pgnr], pte_maddr, flags, - -1 /* handle */); + INVALID_GRANT_HANDLE); return 0; } @@ -297,7 +297,7 @@ int gntdev_map_grant_pages(struct gntdev_grant_map *map) if (!use_ptemod) { /* Note: it could already be mapped */ - if (map->map_ops[0].handle != -1) + if (map->map_ops[0].handle != INVALID_GRANT_HANDLE) return 0; for (i = 0; i < map->count; i++) { unsigned long addr = (unsigned long) @@ -306,7 +306,7 @@ int gntdev_map_grant_pages(struct gntdev_grant_map *map) map->grants[i].ref, map->grants[i].domid); gnttab_set_unmap_op(&map->unmap_ops[i], addr, - map->flags, -1 /* handle */); + map->flags, INVALID_GRANT_HANDLE); } } else { /* @@ -332,7 +332,7 @@ int gntdev_map_grant_pages(struct gntdev_grant_map *map) map->grants[i].ref, map->grants[i].domid); gnttab_set_unmap_op(&map->kunmap_ops[i], address, - flags, -1); + flags, INVALID_GRANT_HANDLE); } } @@ -390,7 +390,7 @@ static int __unmap_grant_pages(struct gntdev_grant_map *map, int offset, pr_debug("unmap handle=%d st=%d\n", map->unmap_ops[offset+i].handle, map->unmap_ops[offset+i].status); - map->unmap_ops[offset+i].handle = -1; + map->unmap_ops[offset+i].handle = INVALID_GRANT_HANDLE; } return err; } @@ -406,13 +406,15 @@ static int unmap_grant_pages(struct gntdev_grant_map *map, int offset, * already unmapped some of the grants. Only unmap valid ranges. */ while (pages && !err) { - while (pages && map->unmap_ops[offset].handle == -1) { + while (pages && + map->unmap_ops[offset].handle == INVALID_GRANT_HANDLE) { offset++; pages--; } range = 0; while (range < pages) { - if (map->unmap_ops[offset+range].handle == -1) + if (map->unmap_ops[offset + range].handle == + INVALID_GRANT_HANDLE) break; range++; } diff --git a/include/xen/grant_table.h b/include/xen/grant_table.h index 0b1182a3cf41..cb854df031ce 100644 --- a/include/xen/grant_table.h +++ b/include/xen/grant_table.h @@ -50,6 +50,13 @@ #include #include +/* + * Technically there's no reliably invalid grant reference or grant handle, + * so pick the value that is the most unlikely one to be observed valid. + */ +#define INVALID_GRANT_REF ((grant_ref_t)-1) +#define INVALID_GRANT_HANDLE ((grant_handle_t)-1) + #define GNTTAB_RESERVED_XENSTORE 1 /* NR_GRANT_FRAMES must be less than or equal to that configured in Xen */ diff --git a/include/xen/xenbus.h b/include/xen/xenbus.h index 0b1386073d49..b94074c82772 100644 --- a/include/xen/xenbus.h +++ b/include/xen/xenbus.h @@ -51,7 +51,6 @@ #define XENBUS_MAX_RING_GRANT_ORDER 4 #define XENBUS_MAX_RING_GRANTS (1U << XENBUS_MAX_RING_GRANT_ORDER) -#define INVALID_GRANT_HANDLE (~0U) /* Register callback to watch this node. */ struct xenbus_watch -- cgit v1.2.3 From bf3c255150619b71badb328c4dab48401a7ed62d Mon Sep 17 00:00:00 2001 From: Sami Tolvanen Date: Mon, 8 Mar 2021 10:46:56 -0800 Subject: kbuild: Allow LTO to be selected with KASAN_HW_TAGS While LTO with KASAN is normally not useful, hardware tag-based KASAN can be used also in production kernels with ARM64_MTE. Therefore, allow KASAN_HW_TAGS to be selected together with HAS_LTO_CLANG. Reported-by: Alistair Delva Signed-off-by: Sami Tolvanen Reviewed-by: Kees Cook Signed-off-by: Masahiro Yamada --- arch/Kconfig | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/Kconfig b/arch/Kconfig index 2bb30673d8e6..2e7139b39e8f 100644 --- a/arch/Kconfig +++ b/arch/Kconfig @@ -638,7 +638,7 @@ config HAS_LTO_CLANG depends on $(success,$(AR) --help | head -n 1 | grep -qi llvm) depends on ARCH_SUPPORTS_LTO_CLANG depends on !FTRACE_MCOUNT_USE_RECORDMCOUNT - depends on !KASAN + depends on !KASAN || KASAN_HW_TAGS depends on !GCOV_KERNEL help The compiler and Kconfig options support building with Clang's -- cgit v1.2.3 From 4c273d23c44ad49c73353737b303e78585a4503f Mon Sep 17 00:00:00 2001 From: Masahiro Yamada Date: Wed, 10 Mar 2021 22:54:22 +0900 Subject: kbuild: remove LLVM=1 test from HAS_LTO_CLANG As Documentation/kbuild/llvm.rst notes, LLVM=1 switches the default of tools, but you can still override CC, LD, etc. individually. This LLVM=1 check is unneeded because each tool is already checked separately. "make CC=clang LD=ld.lld NM=llvm-nm AR=llvm-ar LLVM_IAS=1 menuconfig" should be able to enable Clang LTO. Signed-off-by: Masahiro Yamada Reviewed-by: Nathan Chancellor --- arch/Kconfig | 1 - 1 file changed, 1 deletion(-) (limited to 'arch') diff --git a/arch/Kconfig b/arch/Kconfig index 2e7139b39e8f..ecfd3520b676 100644 --- a/arch/Kconfig +++ b/arch/Kconfig @@ -632,7 +632,6 @@ config HAS_LTO_CLANG def_bool y # Clang >= 11: https://github.com/ClangBuiltLinux/linux/issues/510 depends on CC_IS_CLANG && CLANG_VERSION >= 110000 && LD_IS_LLD - depends on $(success,test $(LLVM) -eq 1) depends on $(success,test $(LLVM_IAS) -eq 1) depends on $(success,$(NM) --help | head -n 1 | grep -qi llvm) depends on $(success,$(AR) --help | head -n 1 | grep -qi llvm) -- cgit v1.2.3 From 7ba8f2b2d652cd8d8a2ab61f4be66973e70f9f88 Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Wed, 10 Mar 2021 18:15:11 +0100 Subject: arm64: mm: use a 48-bit ID map when possible on 52-bit VA builds 52-bit VA kernels can run on hardware that is only 48-bit capable, but configure the ID map as 52-bit by default. This was not a problem until recently, because the special T0SZ value for a 52-bit VA space was never programmed into the TCR register anwyay, and because a 52-bit ID map happens to use the same number of translation levels as a 48-bit one. This behavior was changed by commit 1401bef703a4 ("arm64: mm: Always update TCR_EL1 from __cpu_set_tcr_t0sz()"), which causes the unsupported T0SZ value for a 52-bit VA to be programmed into TCR_EL1. While some hardware simply ignores this, Mark reports that Amberwing systems choke on this, resulting in a broken boot. But even before that commit, the unsupported idmap_t0sz value was exposed to KVM and used to program TCR_EL2 incorrectly as well. Given that we already have to deal with address spaces being either 48-bit or 52-bit in size, the cleanest approach seems to be to simply default to a 48-bit VA ID map, and only switch to a 52-bit one if the placement of the kernel in DRAM requires it. This is guaranteed not to happen unless the system is actually 52-bit VA capable. Fixes: 90ec95cda91a ("arm64: mm: Introduce VA_BITS_MIN") Reported-by: Mark Salter Link: http://lore.kernel.org/r/20210310003216.410037-1-msalter@redhat.com Signed-off-by: Ard Biesheuvel Link: https://lore.kernel.org/r/20210310171515.416643-2-ardb@kernel.org Signed-off-by: Will Deacon --- arch/arm64/include/asm/mmu_context.h | 5 +---- arch/arm64/kernel/head.S | 2 +- arch/arm64/mm/mmu.c | 2 +- 3 files changed, 3 insertions(+), 6 deletions(-) (limited to 'arch') diff --git a/arch/arm64/include/asm/mmu_context.h b/arch/arm64/include/asm/mmu_context.h index 70ce8c1d2b07..0f467d550f27 100644 --- a/arch/arm64/include/asm/mmu_context.h +++ b/arch/arm64/include/asm/mmu_context.h @@ -65,10 +65,7 @@ extern u64 idmap_ptrs_per_pgd; static inline bool __cpu_uses_extended_idmap(void) { - if (IS_ENABLED(CONFIG_ARM64_VA_BITS_52)) - return false; - - return unlikely(idmap_t0sz != TCR_T0SZ(VA_BITS)); + return unlikely(idmap_t0sz != TCR_T0SZ(vabits_actual)); } /* diff --git a/arch/arm64/kernel/head.S b/arch/arm64/kernel/head.S index 8b469f164091..840bda1869e9 100644 --- a/arch/arm64/kernel/head.S +++ b/arch/arm64/kernel/head.S @@ -319,7 +319,7 @@ SYM_FUNC_START_LOCAL(__create_page_tables) */ adrp x5, __idmap_text_end clz x5, x5 - cmp x5, TCR_T0SZ(VA_BITS) // default T0SZ small enough? + cmp x5, TCR_T0SZ(VA_BITS_MIN) // default T0SZ small enough? b.ge 1f // .. then skip VA range extension adr_l x6, idmap_t0sz diff --git a/arch/arm64/mm/mmu.c b/arch/arm64/mm/mmu.c index 9c8aa1b44cd5..7484ea4f6ba0 100644 --- a/arch/arm64/mm/mmu.c +++ b/arch/arm64/mm/mmu.c @@ -40,7 +40,7 @@ #define NO_BLOCK_MAPPINGS BIT(0) #define NO_CONT_MAPPINGS BIT(1) -u64 idmap_t0sz = TCR_T0SZ(VA_BITS); +u64 idmap_t0sz = TCR_T0SZ(VA_BITS_MIN); u64 idmap_ptrs_per_pgd = PTRS_PER_PGD; u64 __section(".mmuoff.data.write") vabits_actual; -- cgit v1.2.3 From 30b2675761b8a1a2b6ef56b535ef51b789bb7150 Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Wed, 10 Mar 2021 18:15:12 +0100 Subject: arm64: mm: remove unused __cpu_uses_extended_idmap[_level()] These routines lost all existing users during the latest merge window so we can remove them. This avoids the need to fix them in the context of fixing a regression related to the ID map on 52-bit VA kernels. Signed-off-by: Ard Biesheuvel Link: https://lore.kernel.org/r/20210310171515.416643-3-ardb@kernel.org Signed-off-by: Will Deacon --- arch/arm64/include/asm/mmu_context.h | 14 -------------- 1 file changed, 14 deletions(-) (limited to 'arch') diff --git a/arch/arm64/include/asm/mmu_context.h b/arch/arm64/include/asm/mmu_context.h index 0f467d550f27..bd02e99b1a4c 100644 --- a/arch/arm64/include/asm/mmu_context.h +++ b/arch/arm64/include/asm/mmu_context.h @@ -63,20 +63,6 @@ static inline void cpu_switch_mm(pgd_t *pgd, struct mm_struct *mm) extern u64 idmap_t0sz; extern u64 idmap_ptrs_per_pgd; -static inline bool __cpu_uses_extended_idmap(void) -{ - return unlikely(idmap_t0sz != TCR_T0SZ(vabits_actual)); -} - -/* - * True if the extended ID map requires an extra level of translation table - * to be configured. - */ -static inline bool __cpu_uses_extended_idmap_level(void) -{ - return ARM64_HW_PGTABLE_LEVELS(64 - idmap_t0sz) > CONFIG_PGTABLE_LEVELS; -} - /* * Ensure TCR.T0SZ is set to the provided value. */ -- cgit v1.2.3 From 0b736881c8f1a6cd912f7a9162b9e097b28c1c30 Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Tue, 9 Mar 2021 12:09:26 +0000 Subject: powerpc/traps: unrecoverable_exception() is not an interrupt handler unrecoverable_exception() is called from interrupt handlers or after an interrupt handler has failed. Make it a standard function to avoid doubling the actions performed on interrupt entry (e.g.: user time accounting). Fixes: 3a96570ffceb ("powerpc: convert interrupt handlers to use wrappers") Signed-off-by: Christophe Leroy Reviewed-by: Nicholas Piggin Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/ae96c59fa2cb7f24a8929c58cfa2c909cb8ff1f1.1615291471.git.christophe.leroy@csgroup.eu --- arch/powerpc/include/asm/interrupt.h | 3 ++- arch/powerpc/kernel/interrupt.c | 1 - arch/powerpc/kernel/traps.c | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) (limited to 'arch') diff --git a/arch/powerpc/include/asm/interrupt.h b/arch/powerpc/include/asm/interrupt.h index aedfba29e43a..e8d09a841373 100644 --- a/arch/powerpc/include/asm/interrupt.h +++ b/arch/powerpc/include/asm/interrupt.h @@ -410,7 +410,6 @@ DECLARE_INTERRUPT_HANDLER(altivec_assist_exception); DECLARE_INTERRUPT_HANDLER(CacheLockingException); DECLARE_INTERRUPT_HANDLER(SPEFloatingPointException); DECLARE_INTERRUPT_HANDLER(SPEFloatingPointRoundException); -DECLARE_INTERRUPT_HANDLER(unrecoverable_exception); DECLARE_INTERRUPT_HANDLER(WatchdogException); DECLARE_INTERRUPT_HANDLER(kernel_bad_stack); @@ -437,6 +436,8 @@ DECLARE_INTERRUPT_HANDLER_NMI(hmi_exception_realmode); DECLARE_INTERRUPT_HANDLER_ASYNC(TAUException); +void unrecoverable_exception(struct pt_regs *regs); + void replay_system_reset(void); void replay_soft_interrupts(void); diff --git a/arch/powerpc/kernel/interrupt.c b/arch/powerpc/kernel/interrupt.c index 2ef3c4051bb9..c475a229a42a 100644 --- a/arch/powerpc/kernel/interrupt.c +++ b/arch/powerpc/kernel/interrupt.c @@ -436,7 +436,6 @@ again: return ret; } -void unrecoverable_exception(struct pt_regs *regs); void preempt_schedule_irq(void); notrace unsigned long interrupt_exit_kernel_prepare(struct pt_regs *regs, unsigned long msr) diff --git a/arch/powerpc/kernel/traps.c b/arch/powerpc/kernel/traps.c index 1583fd1c6010..a44a30b0688c 100644 --- a/arch/powerpc/kernel/traps.c +++ b/arch/powerpc/kernel/traps.c @@ -2170,7 +2170,7 @@ DEFINE_INTERRUPT_HANDLER(SPEFloatingPointRoundException) * in the MSR is 0. This indicates that SRR0/1 are live, and that * we therefore lost state by taking this exception. */ -DEFINE_INTERRUPT_HANDLER(unrecoverable_exception) +void unrecoverable_exception(struct pt_regs *regs) { pr_emerg("Unrecoverable exception %lx at %lx (msr=%lx)\n", regs->trap, regs->nip, regs->msr); -- cgit v1.2.3 From ba08abca66d46381df60842f64f70099d5482b92 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Mon, 8 Mar 2021 15:46:04 +0100 Subject: objtool,x86: Fix uaccess PUSHF/POPF validation Commit ab234a260b1f ("x86/pv: Rework arch_local_irq_restore() to not use popf") replaced "push %reg; popf" with something like: "test $0x200, %reg; jz 1f; sti; 1:", which breaks the pushf/popf symmetry that commit ea24213d8088 ("objtool: Add UACCESS validation") relies on. The result is: drivers/gpu/drm/amd/amdgpu/si.o: warning: objtool: si_common_hw_init()+0xf36: PUSHF stack exhausted Meanwhile, commit c9c324dc22aa ("objtool: Support stack layout changes in alternatives") makes that we can actually use stack-ops in alternatives, which means we can revert 1ff865e343c2 ("x86,smap: Fix smap_{save,restore}() alternatives"). That in turn means we can limit the PUSHF/POPF handling of ea24213d8088 to those instructions that are in alternatives. Fixes: ab234a260b1f ("x86/pv: Rework arch_local_irq_restore() to not use popf") Reported-by: Borislav Petkov Signed-off-by: Peter Zijlstra (Intel) Acked-by: Josh Poimboeuf Link: https://lkml.kernel.org/r/YEY4rIbQYa5fnnEp@hirez.programming.kicks-ass.net --- arch/x86/include/asm/smap.h | 10 ++++------ tools/objtool/check.c | 3 +++ 2 files changed, 7 insertions(+), 6 deletions(-) (limited to 'arch') diff --git a/arch/x86/include/asm/smap.h b/arch/x86/include/asm/smap.h index 8b58d6975d5d..0bc9b0895f33 100644 --- a/arch/x86/include/asm/smap.h +++ b/arch/x86/include/asm/smap.h @@ -58,9 +58,8 @@ static __always_inline unsigned long smap_save(void) unsigned long flags; asm volatile ("# smap_save\n\t" - ALTERNATIVE("jmp 1f", "", X86_FEATURE_SMAP) - "pushf; pop %0; " __ASM_CLAC "\n\t" - "1:" + ALTERNATIVE("", "pushf; pop %0; " __ASM_CLAC "\n\t", + X86_FEATURE_SMAP) : "=rm" (flags) : : "memory", "cc"); return flags; @@ -69,9 +68,8 @@ static __always_inline unsigned long smap_save(void) static __always_inline void smap_restore(unsigned long flags) { asm volatile ("# smap_restore\n\t" - ALTERNATIVE("jmp 1f", "", X86_FEATURE_SMAP) - "push %0; popf\n\t" - "1:" + ALTERNATIVE("", "push %0; popf\n\t", + X86_FEATURE_SMAP) : : "g" (flags) : "memory", "cc"); } diff --git a/tools/objtool/check.c b/tools/objtool/check.c index 068cdb41f76f..5e5388a38e2a 100644 --- a/tools/objtool/check.c +++ b/tools/objtool/check.c @@ -2442,6 +2442,9 @@ static int handle_insn_ops(struct instruction *insn, struct insn_state *state) if (update_cfi_state(insn, &state->cfi, op)) return 1; + if (!insn->alt_group) + continue; + if (op->dest.type == OP_DEST_PUSHF) { if (!state->uaccess_stack) { state->uaccess_stack = 1; -- cgit v1.2.3 From 7d717558dd5ef10d28866750d5c24ff892ea3778 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Thu, 11 Mar 2021 10:00:15 +0000 Subject: KVM: arm64: Reject VM creation when the default IPA size is unsupported KVM/arm64 has forever used a 40bit default IPA space, partially due to its 32bit heritage (where the only choice is 40bit). However, there are implementations in the wild that have a *cough* much smaller *cough* IPA space, which leads to a misprogramming of VTCR_EL2, and a guest that is stuck on its first memory access if userspace dares to ask for the default IPA setting (which most VMMs do). Instead, blundly reject the creation of such VM, as we can't satisfy the requirements from userspace (with a one-off warning). Also clarify the boot warning, and document that the VM creation will fail when an unsupported IPA size is provided. Although this is an ABI change, it doesn't really change much for userspace: - the guest couldn't run before this change, but no error was returned. At least userspace knows what is happening. - a memory slot that was accepted because it did fit the default IPA space now doesn't even get a chance to be registered. The other thing that is left doing is to convince userspace to actually use the IPA space setting instead of relying on the antiquated default. Fixes: 233a7cb23531 ("kvm: arm64: Allow tuning the physical address size for VM") Signed-off-by: Marc Zyngier Cc: stable@vger.kernel.org Reviewed-by: Andrew Jones Reviewed-by: Eric Auger Link: https://lore.kernel.org/r/20210311100016.3830038-2-maz@kernel.org --- Documentation/virt/kvm/api.rst | 3 +++ arch/arm64/kvm/reset.c | 12 ++++++++---- 2 files changed, 11 insertions(+), 4 deletions(-) (limited to 'arch') diff --git a/Documentation/virt/kvm/api.rst b/Documentation/virt/kvm/api.rst index 1a2b5210cdbf..38e327d4b479 100644 --- a/Documentation/virt/kvm/api.rst +++ b/Documentation/virt/kvm/api.rst @@ -182,6 +182,9 @@ is dependent on the CPU capability and the kernel configuration. The limit can be retrieved using KVM_CAP_ARM_VM_IPA_SIZE of the KVM_CHECK_EXTENSION ioctl() at run-time. +Creation of the VM will fail if the requested IPA size (whether it is +implicit or explicit) is unsupported on the host. + Please note that configuring the IPA size does not affect the capability exposed by the guest CPUs in ID_AA64MMFR0_EL1[PARange]. It only affects size of the address translated by the stage2 level (guest physical to diff --git a/arch/arm64/kvm/reset.c b/arch/arm64/kvm/reset.c index 47f3f035f3ea..9d3d09a89894 100644 --- a/arch/arm64/kvm/reset.c +++ b/arch/arm64/kvm/reset.c @@ -324,10 +324,9 @@ int kvm_set_ipa_limit(void) } kvm_ipa_limit = id_aa64mmfr0_parange_to_phys_shift(parange); - WARN(kvm_ipa_limit < KVM_PHYS_SHIFT, - "KVM IPA Size Limit (%d bits) is smaller than default size\n", - kvm_ipa_limit); - kvm_info("IPA Size Limit: %d bits\n", kvm_ipa_limit); + kvm_info("IPA Size Limit: %d bits%s\n", kvm_ipa_limit, + ((kvm_ipa_limit < KVM_PHYS_SHIFT) ? + " (Reduced IPA size, limited VM/VMM compatibility)" : "")); return 0; } @@ -356,6 +355,11 @@ int kvm_arm_setup_stage2(struct kvm *kvm, unsigned long type) return -EINVAL; } else { phys_shift = KVM_PHYS_SHIFT; + if (phys_shift > kvm_ipa_limit) { + pr_warn_once("%s using unsupported default IPA limit, upgrade your VMM\n", + current->comm); + return -EINVAL; + } } mmfr0 = read_sanitised_ftr_reg(SYS_ID_AA64MMFR0_EL1); -- cgit v1.2.3 From 262b003d059c6671601a19057e9fe1a5e7f23722 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Thu, 11 Mar 2021 10:00:16 +0000 Subject: KVM: arm64: Fix exclusive limit for IPA size When registering a memslot, we check the size and location of that memslot against the IPA size to ensure that we can provide guest access to the whole of the memory. Unfortunately, this check rejects memslot that end-up at the exact limit of the addressing capability for a given IPA size. For example, it refuses the creation of a 2GB memslot at 0x8000000 with a 32bit IPA space. Fix it by relaxing the check to accept a memslot reaching the limit of the IPA space. Fixes: c3058d5da222 ("arm/arm64: KVM: Ensure memslots are within KVM_PHYS_SIZE") Reviewed-by: Eric Auger Signed-off-by: Marc Zyngier Cc: stable@vger.kernel.org Reviewed-by: Andrew Jones Link: https://lore.kernel.org/r/20210311100016.3830038-3-maz@kernel.org --- arch/arm64/kvm/mmu.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'arch') diff --git a/arch/arm64/kvm/mmu.c b/arch/arm64/kvm/mmu.c index 77cb2d28f2a4..8711894db8c2 100644 --- a/arch/arm64/kvm/mmu.c +++ b/arch/arm64/kvm/mmu.c @@ -1312,8 +1312,7 @@ int kvm_arch_prepare_memory_region(struct kvm *kvm, * Prevent userspace from creating a memory region outside of the IPA * space addressable by the KVM guest IPA space. */ - if (memslot->base_gfn + memslot->npages >= - (kvm_phys_size(kvm) >> PAGE_SHIFT)) + if ((memslot->base_gfn + memslot->npages) > (kvm_phys_size(kvm) >> PAGE_SHIFT)) return -EFAULT; mmap_read_lock(current->mm); -- cgit v1.2.3 From 6fcd9cbc6a903f48eebaa14657aeccb003f69a3d Mon Sep 17 00:00:00 2001 From: Muhammad Usama Anjum Date: Sat, 6 Mar 2021 00:11:23 +0500 Subject: kvm: x86: annotate RCU pointers This patch adds the annotation to fix the following sparse errors: arch/x86/kvm//x86.c:8147:15: error: incompatible types in comparison expression (different address spaces): arch/x86/kvm//x86.c:8147:15: struct kvm_apic_map [noderef] __rcu * arch/x86/kvm//x86.c:8147:15: struct kvm_apic_map * arch/x86/kvm//x86.c:10628:16: error: incompatible types in comparison expression (different address spaces): arch/x86/kvm//x86.c:10628:16: struct kvm_apic_map [noderef] __rcu * arch/x86/kvm//x86.c:10628:16: struct kvm_apic_map * arch/x86/kvm//x86.c:10629:15: error: incompatible types in comparison expression (different address spaces): arch/x86/kvm//x86.c:10629:15: struct kvm_pmu_event_filter [noderef] __rcu * arch/x86/kvm//x86.c:10629:15: struct kvm_pmu_event_filter * arch/x86/kvm//lapic.c:267:15: error: incompatible types in comparison expression (different address spaces): arch/x86/kvm//lapic.c:267:15: struct kvm_apic_map [noderef] __rcu * arch/x86/kvm//lapic.c:267:15: struct kvm_apic_map * arch/x86/kvm//lapic.c:269:9: error: incompatible types in comparison expression (different address spaces): arch/x86/kvm//lapic.c:269:9: struct kvm_apic_map [noderef] __rcu * arch/x86/kvm//lapic.c:269:9: struct kvm_apic_map * arch/x86/kvm//lapic.c:637:15: error: incompatible types in comparison expression (different address spaces): arch/x86/kvm//lapic.c:637:15: struct kvm_apic_map [noderef] __rcu * arch/x86/kvm//lapic.c:637:15: struct kvm_apic_map * arch/x86/kvm//lapic.c:994:15: error: incompatible types in comparison expression (different address spaces): arch/x86/kvm//lapic.c:994:15: struct kvm_apic_map [noderef] __rcu * arch/x86/kvm//lapic.c:994:15: struct kvm_apic_map * arch/x86/kvm//lapic.c:1036:15: error: incompatible types in comparison expression (different address spaces): arch/x86/kvm//lapic.c:1036:15: struct kvm_apic_map [noderef] __rcu * arch/x86/kvm//lapic.c:1036:15: struct kvm_apic_map * arch/x86/kvm//lapic.c:1173:15: error: incompatible types in comparison expression (different address spaces): arch/x86/kvm//lapic.c:1173:15: struct kvm_apic_map [noderef] __rcu * arch/x86/kvm//lapic.c:1173:15: struct kvm_apic_map * arch/x86/kvm//pmu.c:190:18: error: incompatible types in comparison expression (different address spaces): arch/x86/kvm//pmu.c:190:18: struct kvm_pmu_event_filter [noderef] __rcu * arch/x86/kvm//pmu.c:190:18: struct kvm_pmu_event_filter * arch/x86/kvm//pmu.c:251:18: error: incompatible types in comparison expression (different address spaces): arch/x86/kvm//pmu.c:251:18: struct kvm_pmu_event_filter [noderef] __rcu * arch/x86/kvm//pmu.c:251:18: struct kvm_pmu_event_filter * arch/x86/kvm//pmu.c:522:18: error: incompatible types in comparison expression (different address spaces): arch/x86/kvm//pmu.c:522:18: struct kvm_pmu_event_filter [noderef] __rcu * arch/x86/kvm//pmu.c:522:18: struct kvm_pmu_event_filter * arch/x86/kvm//pmu.c:522:18: error: incompatible types in comparison expression (different address spaces): arch/x86/kvm//pmu.c:522:18: struct kvm_pmu_event_filter [noderef] __rcu * arch/x86/kvm//pmu.c:522:18: struct kvm_pmu_event_filter * Signed-off-by: Muhammad Usama Anjum Message-Id: <20210305191123.GA497469@LEGION> Signed-off-by: Paolo Bonzini --- arch/x86/include/asm/kvm_host.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'arch') diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index 877a4025d8da..9bc091ecaaeb 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h @@ -963,7 +963,7 @@ struct kvm_arch { struct kvm_pit *vpit; atomic_t vapics_in_nmi_mode; struct mutex apic_map_lock; - struct kvm_apic_map *apic_map; + struct kvm_apic_map __rcu *apic_map; atomic_t apic_map_dirty; bool apic_access_page_done; @@ -1036,7 +1036,7 @@ struct kvm_arch { bool bus_lock_detection_enabled; - struct kvm_pmu_event_filter *pmu_event_filter; + struct kvm_pmu_event_filter __rcu *pmu_event_filter; struct task_struct *nx_lpage_recovery_thread; #ifdef CONFIG_X86_64 -- cgit v1.2.3 From d7eb79c6290c7ae4561418544072e0a3266e7384 Mon Sep 17 00:00:00 2001 From: Wanpeng Li Date: Wed, 24 Feb 2021 09:37:29 +0800 Subject: KVM: kvmclock: Fix vCPUs > 64 can't be online/hotpluged # lscpu Architecture: x86_64 CPU op-mode(s): 32-bit, 64-bit Byte Order: Little Endian CPU(s): 88 On-line CPU(s) list: 0-63 Off-line CPU(s) list: 64-87 # cat /proc/cmdline BOOT_IMAGE=/vmlinuz-5.10.0-rc3-tlinux2-0050+ root=/dev/mapper/cl-root ro rd.lvm.lv=cl/root rhgb quiet console=ttyS0 LANG=en_US .UTF-8 no-kvmclock-vsyscall # echo 1 > /sys/devices/system/cpu/cpu76/online -bash: echo: write error: Cannot allocate memory The per-cpu vsyscall pvclock data pointer assigns either an element of the static array hv_clock_boot (#vCPU <= 64) or dynamically allocated memory hvclock_mem (vCPU > 64), the dynamically memory will not be allocated if kvmclock vsyscall is disabled, this can result in cpu hotpluged fails in kvmclock_setup_percpu() which returns -ENOMEM. It's broken for no-vsyscall and sometimes you end up with vsyscall disabled if the host does something strange. This patch fixes it by allocating this dynamically memory unconditionally even if vsyscall is disabled. Fixes: 6a1cac56f4 ("x86/kvm: Use __bss_decrypted attribute in shared variables") Reported-by: Zelin Deng Cc: Brijesh Singh Cc: stable@vger.kernel.org#v4.19-rc5+ Signed-off-by: Wanpeng Li Message-Id: <1614130683-24137-1-git-send-email-wanpengli@tencent.com> Signed-off-by: Paolo Bonzini --- arch/x86/kernel/kvmclock.c | 19 +++++++++---------- 1 file changed, 9 insertions(+), 10 deletions(-) (limited to 'arch') diff --git a/arch/x86/kernel/kvmclock.c b/arch/x86/kernel/kvmclock.c index aa593743acf6..1fc0962c89c0 100644 --- a/arch/x86/kernel/kvmclock.c +++ b/arch/x86/kernel/kvmclock.c @@ -268,21 +268,20 @@ static void __init kvmclock_init_mem(void) static int __init kvm_setup_vsyscall_timeinfo(void) { -#ifdef CONFIG_X86_64 - u8 flags; + kvmclock_init_mem(); - if (!per_cpu(hv_clock_per_cpu, 0) || !kvmclock_vsyscall) - return 0; +#ifdef CONFIG_X86_64 + if (per_cpu(hv_clock_per_cpu, 0) && kvmclock_vsyscall) { + u8 flags; - flags = pvclock_read_flags(&hv_clock_boot[0].pvti); - if (!(flags & PVCLOCK_TSC_STABLE_BIT)) - return 0; + flags = pvclock_read_flags(&hv_clock_boot[0].pvti); + if (!(flags & PVCLOCK_TSC_STABLE_BIT)) + return 0; - kvm_clock.vdso_clock_mode = VDSO_CLOCKMODE_PVCLOCK; + kvm_clock.vdso_clock_mode = VDSO_CLOCKMODE_PVCLOCK; + } #endif - kvmclock_init_mem(); - return 0; } early_initcall(kvm_setup_vsyscall_timeinfo); -- cgit v1.2.3 From 8df9f1af2eced9720f71cf310275d81c1bf07a06 Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Tue, 9 Mar 2021 16:30:29 -0800 Subject: KVM: x86/mmu: Skip !MMU-present SPTEs when removing SP in exclusive mode If mmu_lock is held for write, don't bother setting !PRESENT SPTEs to REMOVED_SPTE when recursively zapping SPTEs as part of shadow page removal. The concurrent write protections provided by REMOVED_SPTE are not needed, there are no backing page side effects to record, and MMIO SPTEs can be left as is since they are protected by the memslot generation, not by ensuring that the MMIO SPTE is unreachable (which is racy with respect to lockless walks regardless of zapping behavior). Skipping !PRESENT drastically reduces the number of updates needed to tear down sparsely populated MMUs, e.g. when tearing down a 6gb VM that didn't touch much memory, 6929/7168 (~96.6%) of SPTEs were '0' and could be skipped. Avoiding the write itself is likely close to a wash, but avoiding __handle_changed_spte() is a clear-cut win as that involves saving and restoring all non-volatile GPRs (it's a subtly big function), as well as several conditional branches before bailing out. Cc: Ben Gardon Signed-off-by: Sean Christopherson Message-Id: <20210310003029.1250571-1-seanjc@google.com> Signed-off-by: Paolo Bonzini --- arch/x86/kvm/mmu/tdp_mmu.c | 11 +++++++++++ 1 file changed, 11 insertions(+) (limited to 'arch') diff --git a/arch/x86/kvm/mmu/tdp_mmu.c b/arch/x86/kvm/mmu/tdp_mmu.c index c926c6b899a1..d78915019b08 100644 --- a/arch/x86/kvm/mmu/tdp_mmu.c +++ b/arch/x86/kvm/mmu/tdp_mmu.c @@ -337,7 +337,18 @@ static void handle_removed_tdp_mmu_page(struct kvm *kvm, u64 *pt, cpu_relax(); } } else { + /* + * If the SPTE is not MMU-present, there is no backing + * page associated with the SPTE and so no side effects + * that need to be recorded, and exclusive ownership of + * mmu_lock ensures the SPTE can't be made present. + * Note, zapping MMIO SPTEs is also unnecessary as they + * are guarded by the memslots generation, not by being + * unreachable. + */ old_child_spte = READ_ONCE(*sptep); + if (!is_shadow_present_pte(old_child_spte)) + continue; /* * Marking the SPTE as a removed SPTE is not -- cgit v1.2.3 From 35737d2db2f4567106c90060ad110b27cb354fa4 Mon Sep 17 00:00:00 2001 From: Wanpeng Li Date: Thu, 4 Mar 2021 08:35:18 +0800 Subject: KVM: LAPIC: Advancing the timer expiration on guest initiated write Advancing the timer expiration should only be necessary on guest initiated writes. When we cancel the timer and clear .pending during state restore, clear expired_tscdeadline as well. Reviewed-by: Sean Christopherson Signed-off-by: Wanpeng Li Message-Id: <1614818118-965-1-git-send-email-wanpengli@tencent.com> Signed-off-by: Paolo Bonzini --- arch/x86/kvm/lapic.c | 1 + 1 file changed, 1 insertion(+) (limited to 'arch') diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c index cb8ebfaccfb6..cc369b9ad8f1 100644 --- a/arch/x86/kvm/lapic.c +++ b/arch/x86/kvm/lapic.c @@ -2604,6 +2604,7 @@ int kvm_apic_set_state(struct kvm_vcpu *vcpu, struct kvm_lapic_state *s) apic_update_ppr(apic); hrtimer_cancel(&apic->lapic_timer.timer); + apic->lapic_timer.expired_tscdeadline = 0; apic_update_lvtt(apic); apic_manage_nmi_watchdog(apic, kvm_lapic_get_reg(apic, APIC_LVT0)); update_divide_count(apic); -- cgit v1.2.3 From 0ceb1ace4a2778e34a5414e5349712ae4dc41d85 Mon Sep 17 00:00:00 2001 From: Sergei Trofimovich Date: Fri, 12 Mar 2021 21:08:23 -0800 Subject: ia64: fix ia64_syscall_get_set_arguments() for break-based syscalls In https://bugs.gentoo.org/769614 Dmitry noticed that `ptrace(PTRACE_GET_SYSCALL_INFO)` does not work for syscalls called via glibc's syscall() wrapper. ia64 has two ways to call syscalls from userspace: via `break` and via `eps` instructions. The difference is in stack layout: 1. `eps` creates simple stack frame: no locals, in{0..7} == out{0..8} 2. `break` uses userspace stack frame: may be locals (glibc provides one), in{0..7} == out{0..8}. Both work fine in syscall handling cde itself. But `ptrace(PTRACE_GET_SYSCALL_INFO)` uses unwind mechanism to re-extract syscall arguments but it does not account for locals. The change always skips locals registers. It should not change `eps` path as kernel's handler already enforces locals=0 and fixes `break`. Tested on v5.10 on rx3600 machine (ia64 9040 CPU). Link: https://lkml.kernel.org/r/20210221002554.333076-1-slyfox@gentoo.org Link: https://bugs.gentoo.org/769614 Signed-off-by: Sergei Trofimovich Reported-by: Dmitry V. Levin Cc: Oleg Nesterov Cc: John Paul Adrian Glaubitz Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/ia64/kernel/ptrace.c | 24 ++++++++++++++++++------ 1 file changed, 18 insertions(+), 6 deletions(-) (limited to 'arch') diff --git a/arch/ia64/kernel/ptrace.c b/arch/ia64/kernel/ptrace.c index c3490ee2daa5..e14f5653393a 100644 --- a/arch/ia64/kernel/ptrace.c +++ b/arch/ia64/kernel/ptrace.c @@ -2013,27 +2013,39 @@ static void syscall_get_set_args_cb(struct unw_frame_info *info, void *data) { struct syscall_get_set_args *args = data; struct pt_regs *pt = args->regs; - unsigned long *krbs, cfm, ndirty; + unsigned long *krbs, cfm, ndirty, nlocals, nouts; int i, count; if (unw_unwind_to_user(info) < 0) return; + /* + * We get here via a few paths: + * - break instruction: cfm is shared with caller. + * syscall args are in out= regs, locals are non-empty. + * - epsinstruction: cfm is set by br.call + * locals don't exist. + * + * For both cases argguments are reachable in cfm.sof - cfm.sol. + * CFM: [ ... | sor: 17..14 | sol : 13..7 | sof : 6..0 ] + */ cfm = pt->cr_ifs; + nlocals = (cfm >> 7) & 0x7f; /* aka sol */ + nouts = (cfm & 0x7f) - nlocals; /* aka sof - sol */ krbs = (unsigned long *)info->task + IA64_RBS_OFFSET/8; ndirty = ia64_rse_num_regs(krbs, krbs + (pt->loadrs >> 19)); count = 0; if (in_syscall(pt)) - count = min_t(int, args->n, cfm & 0x7f); + count = min_t(int, args->n, nouts); + /* Iterate over outs. */ for (i = 0; i < count; i++) { + int j = ndirty + nlocals + i + args->i; if (args->rw) - *ia64_rse_skip_regs(krbs, ndirty + i + args->i) = - args->args[i]; + *ia64_rse_skip_regs(krbs, j) = args->args[i]; else - args->args[i] = *ia64_rse_skip_regs(krbs, - ndirty + i + args->i); + args->args[i] = *ia64_rse_skip_regs(krbs, j); } if (!args->rw) { -- cgit v1.2.3 From 61bf318eac2c13356f7bd1c6a05421ef504ccc8a Mon Sep 17 00:00:00 2001 From: Sergei Trofimovich Date: Fri, 12 Mar 2021 21:08:27 -0800 Subject: ia64: fix ptrace(PTRACE_SYSCALL_INFO_EXIT) sign In https://bugs.gentoo.org/769614 Dmitry noticed that `ptrace(PTRACE_GET_SYSCALL_INFO)` does not return error sign properly. The bug is in mismatch between get/set errors: static inline long syscall_get_error(struct task_struct *task, struct pt_regs *regs) { return regs->r10 == -1 ? regs->r8:0; } static inline long syscall_get_return_value(struct task_struct *task, struct pt_regs *regs) { return regs->r8; } static inline void syscall_set_return_value(struct task_struct *task, struct pt_regs *regs, int error, long val) { if (error) { /* error < 0, but ia64 uses > 0 return value */ regs->r8 = -error; regs->r10 = -1; } else { regs->r8 = val; regs->r10 = 0; } } Tested on v5.10 on rx3600 machine (ia64 9040 CPU). Link: https://lkml.kernel.org/r/20210221002554.333076-2-slyfox@gentoo.org Link: https://bugs.gentoo.org/769614 Signed-off-by: Sergei Trofimovich Reported-by: Dmitry V. Levin Reviewed-by: Dmitry V. Levin Cc: John Paul Adrian Glaubitz Cc: Oleg Nesterov Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/ia64/include/asm/syscall.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/ia64/include/asm/syscall.h b/arch/ia64/include/asm/syscall.h index 6c6f16e409a8..0d23c0049301 100644 --- a/arch/ia64/include/asm/syscall.h +++ b/arch/ia64/include/asm/syscall.h @@ -32,7 +32,7 @@ static inline void syscall_rollback(struct task_struct *task, static inline long syscall_get_error(struct task_struct *task, struct pt_regs *regs) { - return regs->r10 == -1 ? regs->r8:0; + return regs->r10 == -1 ? -regs->r8:0; } static inline long syscall_get_return_value(struct task_struct *task, -- cgit v1.2.3