diff options
author | Thomas Gleixner | 2020-08-26 01:36:07 +0200 |
---|---|---|
committer | Thomas Gleixner | 2020-08-26 01:36:07 +0200 |
commit | ceb2465c51195967f11f6507538579816ac67cb8 (patch) | |
tree | eab534b1062608e36a7b54ccc7be693625f235b8 /arch | |
parent | f107cee94ba4d2c7357fde59a1d84346c73d4958 (diff) | |
parent | 821fc9e261f3af235752f46e59084467cfd440c4 (diff) |
Merge tag 'irqchip-fixes-5.9-2' of git://git.kernel.org/pub/scm/linux/kernel/git/maz/arm-platforms into irq/urgent
Pull irqchip fixes from Marc Zyngier:
- Revert the wholesale conversion to platform drivers of the pdc, sysirq
and cirq drivers, as it breaks a number of platforms even when the
driver is built-in (probe ordering bites you).
- Prevent interrupt from being lost with the STM32 exti driver
- Fix wake-up interrupts for the MIPS Ingenic driver
- Fix an embarassing typo in the new module helpers, leading to the probe
failing most of the time
- The promised TI firmware rework that couldn't make it into the merge
window due to a very badly managed set of dependencies
Diffstat (limited to 'arch')
335 files changed, 4857 insertions, 5491 deletions
diff --git a/arch/Kconfig b/arch/Kconfig index 3329fa143637..af14a567b493 100644 --- a/arch/Kconfig +++ b/arch/Kconfig @@ -972,6 +972,9 @@ config HAVE_SPARSE_SYSCALL_NR entries at 4000, 5000 and 6000 locations. This option turns on syscall related optimizations for a given architecture. +config ARCH_HAS_VDSO_DATA + bool + source "kernel/gcov/Kconfig" source "scripts/gcc-plugins/Kconfig" diff --git a/arch/alpha/include/asm/core_apecs.h b/arch/alpha/include/asm/core_apecs.h index 0a07055bc0fe..2d9726fc02ef 100644 --- a/arch/alpha/include/asm/core_apecs.h +++ b/arch/alpha/include/asm/core_apecs.h @@ -384,7 +384,7 @@ struct el_apecs_procdata } \ } while (0) -__EXTERN_INLINE unsigned int apecs_ioread8(void __iomem *xaddr) +__EXTERN_INLINE unsigned int apecs_ioread8(const void __iomem *xaddr) { unsigned long addr = (unsigned long) xaddr; unsigned long result, base_and_type; @@ -420,7 +420,7 @@ __EXTERN_INLINE void apecs_iowrite8(u8 b, void __iomem *xaddr) *(vuip) ((addr << 5) + base_and_type) = w; } -__EXTERN_INLINE unsigned int apecs_ioread16(void __iomem *xaddr) +__EXTERN_INLINE unsigned int apecs_ioread16(const void __iomem *xaddr) { unsigned long addr = (unsigned long) xaddr; unsigned long result, base_and_type; @@ -456,7 +456,7 @@ __EXTERN_INLINE void apecs_iowrite16(u16 b, void __iomem *xaddr) *(vuip) ((addr << 5) + base_and_type) = w; } -__EXTERN_INLINE unsigned int apecs_ioread32(void __iomem *xaddr) +__EXTERN_INLINE unsigned int apecs_ioread32(const void __iomem *xaddr) { unsigned long addr = (unsigned long) xaddr; if (addr < APECS_DENSE_MEM) diff --git a/arch/alpha/include/asm/core_cia.h b/arch/alpha/include/asm/core_cia.h index c706a7f2b061..cb22991f6761 100644 --- a/arch/alpha/include/asm/core_cia.h +++ b/arch/alpha/include/asm/core_cia.h @@ -342,7 +342,7 @@ struct el_CIA_sysdata_mcheck { #define vuip volatile unsigned int __force * #define vulp volatile unsigned long __force * -__EXTERN_INLINE unsigned int cia_ioread8(void __iomem *xaddr) +__EXTERN_INLINE unsigned int cia_ioread8(const void __iomem *xaddr) { unsigned long addr = (unsigned long) xaddr; unsigned long result, base_and_type; @@ -374,7 +374,7 @@ __EXTERN_INLINE void cia_iowrite8(u8 b, void __iomem *xaddr) *(vuip) ((addr << 5) + base_and_type) = w; } -__EXTERN_INLINE unsigned int cia_ioread16(void __iomem *xaddr) +__EXTERN_INLINE unsigned int cia_ioread16(const void __iomem *xaddr) { unsigned long addr = (unsigned long) xaddr; unsigned long result, base_and_type; @@ -404,7 +404,7 @@ __EXTERN_INLINE void cia_iowrite16(u16 b, void __iomem *xaddr) *(vuip) ((addr << 5) + base_and_type) = w; } -__EXTERN_INLINE unsigned int cia_ioread32(void __iomem *xaddr) +__EXTERN_INLINE unsigned int cia_ioread32(const void __iomem *xaddr) { unsigned long addr = (unsigned long) xaddr; if (addr < CIA_DENSE_MEM) diff --git a/arch/alpha/include/asm/core_lca.h b/arch/alpha/include/asm/core_lca.h index 84d5e5b84f4f..ec86314418cb 100644 --- a/arch/alpha/include/asm/core_lca.h +++ b/arch/alpha/include/asm/core_lca.h @@ -230,7 +230,7 @@ union el_lca { } while (0) -__EXTERN_INLINE unsigned int lca_ioread8(void __iomem *xaddr) +__EXTERN_INLINE unsigned int lca_ioread8(const void __iomem *xaddr) { unsigned long addr = (unsigned long) xaddr; unsigned long result, base_and_type; @@ -266,7 +266,7 @@ __EXTERN_INLINE void lca_iowrite8(u8 b, void __iomem *xaddr) *(vuip) ((addr << 5) + base_and_type) = w; } -__EXTERN_INLINE unsigned int lca_ioread16(void __iomem *xaddr) +__EXTERN_INLINE unsigned int lca_ioread16(const void __iomem *xaddr) { unsigned long addr = (unsigned long) xaddr; unsigned long result, base_and_type; @@ -302,7 +302,7 @@ __EXTERN_INLINE void lca_iowrite16(u16 b, void __iomem *xaddr) *(vuip) ((addr << 5) + base_and_type) = w; } -__EXTERN_INLINE unsigned int lca_ioread32(void __iomem *xaddr) +__EXTERN_INLINE unsigned int lca_ioread32(const void __iomem *xaddr) { unsigned long addr = (unsigned long) xaddr; if (addr < LCA_DENSE_MEM) diff --git a/arch/alpha/include/asm/core_marvel.h b/arch/alpha/include/asm/core_marvel.h index cc6fd92d5fa9..b266e02e284b 100644 --- a/arch/alpha/include/asm/core_marvel.h +++ b/arch/alpha/include/asm/core_marvel.h @@ -332,10 +332,10 @@ struct io7 { #define vucp volatile unsigned char __force * #define vusp volatile unsigned short __force * -extern unsigned int marvel_ioread8(void __iomem *); +extern unsigned int marvel_ioread8(const void __iomem *); extern void marvel_iowrite8(u8 b, void __iomem *); -__EXTERN_INLINE unsigned int marvel_ioread16(void __iomem *addr) +__EXTERN_INLINE unsigned int marvel_ioread16(const void __iomem *addr) { return __kernel_ldwu(*(vusp)addr); } diff --git a/arch/alpha/include/asm/core_mcpcia.h b/arch/alpha/include/asm/core_mcpcia.h index b30dc128210d..cb24d1bd6141 100644 --- a/arch/alpha/include/asm/core_mcpcia.h +++ b/arch/alpha/include/asm/core_mcpcia.h @@ -267,7 +267,7 @@ extern inline int __mcpcia_is_mmio(unsigned long addr) return (addr & 0x80000000UL) == 0; } -__EXTERN_INLINE unsigned int mcpcia_ioread8(void __iomem *xaddr) +__EXTERN_INLINE unsigned int mcpcia_ioread8(const void __iomem *xaddr) { unsigned long addr = (unsigned long)xaddr & MCPCIA_MEM_MASK; unsigned long hose = (unsigned long)xaddr & ~MCPCIA_MEM_MASK; @@ -291,7 +291,7 @@ __EXTERN_INLINE void mcpcia_iowrite8(u8 b, void __iomem *xaddr) *(vuip) ((addr << 5) + hose + 0x00) = w; } -__EXTERN_INLINE unsigned int mcpcia_ioread16(void __iomem *xaddr) +__EXTERN_INLINE unsigned int mcpcia_ioread16(const void __iomem *xaddr) { unsigned long addr = (unsigned long)xaddr & MCPCIA_MEM_MASK; unsigned long hose = (unsigned long)xaddr & ~MCPCIA_MEM_MASK; @@ -315,7 +315,7 @@ __EXTERN_INLINE void mcpcia_iowrite16(u16 b, void __iomem *xaddr) *(vuip) ((addr << 5) + hose + 0x08) = w; } -__EXTERN_INLINE unsigned int mcpcia_ioread32(void __iomem *xaddr) +__EXTERN_INLINE unsigned int mcpcia_ioread32(const void __iomem *xaddr) { unsigned long addr = (unsigned long)xaddr; diff --git a/arch/alpha/include/asm/core_t2.h b/arch/alpha/include/asm/core_t2.h index e0b33d09e93a..12bb7addc789 100644 --- a/arch/alpha/include/asm/core_t2.h +++ b/arch/alpha/include/asm/core_t2.h @@ -572,7 +572,7 @@ __EXTERN_INLINE int t2_is_mmio(const volatile void __iomem *addr) it doesn't make sense to merge the pio and mmio routines. */ #define IOPORT(OS, NS) \ -__EXTERN_INLINE unsigned int t2_ioread##NS(void __iomem *xaddr) \ +__EXTERN_INLINE unsigned int t2_ioread##NS(const void __iomem *xaddr) \ { \ if (t2_is_mmio(xaddr)) \ return t2_read##OS(xaddr); \ diff --git a/arch/alpha/include/asm/io.h b/arch/alpha/include/asm/io.h index a4d0c19f1e79..1f6a909d1fa5 100644 --- a/arch/alpha/include/asm/io.h +++ b/arch/alpha/include/asm/io.h @@ -150,9 +150,9 @@ static inline void generic_##NAME(TYPE b, QUAL void __iomem *addr) \ alpha_mv.mv_##NAME(b, addr); \ } -REMAP1(unsigned int, ioread8, /**/) -REMAP1(unsigned int, ioread16, /**/) -REMAP1(unsigned int, ioread32, /**/) +REMAP1(unsigned int, ioread8, const) +REMAP1(unsigned int, ioread16, const) +REMAP1(unsigned int, ioread32, const) REMAP1(u8, readb, const volatile) REMAP1(u16, readw, const volatile) REMAP1(u32, readl, const volatile) @@ -307,7 +307,7 @@ static inline int __is_mmio(const volatile void __iomem *addr) */ #if IO_CONCAT(__IO_PREFIX,trivial_io_bw) -extern inline unsigned int ioread8(void __iomem *addr) +extern inline unsigned int ioread8(const void __iomem *addr) { unsigned int ret; mb(); @@ -316,7 +316,7 @@ extern inline unsigned int ioread8(void __iomem *addr) return ret; } -extern inline unsigned int ioread16(void __iomem *addr) +extern inline unsigned int ioread16(const void __iomem *addr) { unsigned int ret; mb(); @@ -359,7 +359,7 @@ extern inline void outw(u16 b, unsigned long port) #endif #if IO_CONCAT(__IO_PREFIX,trivial_io_lq) -extern inline unsigned int ioread32(void __iomem *addr) +extern inline unsigned int ioread32(const void __iomem *addr) { unsigned int ret; mb(); @@ -489,10 +489,10 @@ extern inline void writeq(u64 b, volatile void __iomem *addr) } #endif -#define ioread16be(p) be16_to_cpu(ioread16(p)) -#define ioread32be(p) be32_to_cpu(ioread32(p)) -#define iowrite16be(v,p) iowrite16(cpu_to_be16(v), (p)) -#define iowrite32be(v,p) iowrite32(cpu_to_be32(v), (p)) +#define ioread16be(p) swab16(ioread16(p)) +#define ioread32be(p) swab32(ioread32(p)) +#define iowrite16be(v,p) iowrite16(swab16(v), (p)) +#define iowrite32be(v,p) iowrite32(swab32(v), (p)) #define inb_p inb #define inw_p inw diff --git a/arch/alpha/include/asm/io_trivial.h b/arch/alpha/include/asm/io_trivial.h index ba3d8f0cfe0c..a1a29cbe02fa 100644 --- a/arch/alpha/include/asm/io_trivial.h +++ b/arch/alpha/include/asm/io_trivial.h @@ -7,15 +7,15 @@ #if IO_CONCAT(__IO_PREFIX,trivial_io_bw) __EXTERN_INLINE unsigned int -IO_CONCAT(__IO_PREFIX,ioread8)(void __iomem *a) +IO_CONCAT(__IO_PREFIX,ioread8)(const void __iomem *a) { - return __kernel_ldbu(*(volatile u8 __force *)a); + return __kernel_ldbu(*(const volatile u8 __force *)a); } __EXTERN_INLINE unsigned int -IO_CONCAT(__IO_PREFIX,ioread16)(void __iomem *a) +IO_CONCAT(__IO_PREFIX,ioread16)(const void __iomem *a) { - return __kernel_ldwu(*(volatile u16 __force *)a); + return __kernel_ldwu(*(const volatile u16 __force *)a); } __EXTERN_INLINE void @@ -33,9 +33,9 @@ IO_CONCAT(__IO_PREFIX,iowrite16)(u16 b, void __iomem *a) #if IO_CONCAT(__IO_PREFIX,trivial_io_lq) __EXTERN_INLINE unsigned int -IO_CONCAT(__IO_PREFIX,ioread32)(void __iomem *a) +IO_CONCAT(__IO_PREFIX,ioread32)(const void __iomem *a) { - return *(volatile u32 __force *)a; + return *(const volatile u32 __force *)a; } __EXTERN_INLINE void @@ -73,14 +73,14 @@ IO_CONCAT(__IO_PREFIX,writew)(u16 b, volatile void __iomem *a) __EXTERN_INLINE u8 IO_CONCAT(__IO_PREFIX,readb)(const volatile void __iomem *a) { - void __iomem *addr = (void __iomem *)a; + const void __iomem *addr = (const void __iomem *)a; return IO_CONCAT(__IO_PREFIX,ioread8)(addr); } __EXTERN_INLINE u16 IO_CONCAT(__IO_PREFIX,readw)(const volatile void __iomem *a) { - void __iomem *addr = (void __iomem *)a; + const void __iomem *addr = (const void __iomem *)a; return IO_CONCAT(__IO_PREFIX,ioread16)(addr); } diff --git a/arch/alpha/include/asm/jensen.h b/arch/alpha/include/asm/jensen.h index 436dc905b6ad..916895155a88 100644 --- a/arch/alpha/include/asm/jensen.h +++ b/arch/alpha/include/asm/jensen.h @@ -305,7 +305,7 @@ __EXTERN_INLINE int jensen_is_mmio(const volatile void __iomem *addr) that it doesn't make sense to merge them. */ #define IOPORT(OS, NS) \ -__EXTERN_INLINE unsigned int jensen_ioread##NS(void __iomem *xaddr) \ +__EXTERN_INLINE unsigned int jensen_ioread##NS(const void __iomem *xaddr) \ { \ if (jensen_is_mmio(xaddr)) \ return jensen_read##OS(xaddr - 0x100000000ul); \ diff --git a/arch/alpha/include/asm/machvec.h b/arch/alpha/include/asm/machvec.h index a6b73c6d10ee..a4e96e2bec74 100644 --- a/arch/alpha/include/asm/machvec.h +++ b/arch/alpha/include/asm/machvec.h @@ -46,9 +46,9 @@ struct alpha_machine_vector void (*mv_pci_tbi)(struct pci_controller *hose, dma_addr_t start, dma_addr_t end); - unsigned int (*mv_ioread8)(void __iomem *); - unsigned int (*mv_ioread16)(void __iomem *); - unsigned int (*mv_ioread32)(void __iomem *); + unsigned int (*mv_ioread8)(const void __iomem *); + unsigned int (*mv_ioread16)(const void __iomem *); + unsigned int (*mv_ioread32)(const void __iomem *); void (*mv_iowrite8)(u8, void __iomem *); void (*mv_iowrite16)(u16, void __iomem *); diff --git a/arch/alpha/include/asm/uaccess.h b/arch/alpha/include/asm/uaccess.h index 1fe2b56cb861..1b6f25efa247 100644 --- a/arch/alpha/include/asm/uaccess.h +++ b/arch/alpha/include/asm/uaccess.h @@ -20,7 +20,7 @@ #define get_fs() (current_thread_info()->addr_limit) #define set_fs(x) (current_thread_info()->addr_limit = (x)) -#define segment_eq(a, b) ((a).seg == (b).seg) +#define uaccess_kernel() (get_fs().seg == KERNEL_DS.seg) /* * Is a address valid? This does a straightforward calculation rather diff --git a/arch/alpha/kernel/core_marvel.c b/arch/alpha/kernel/core_marvel.c index 4c80d992a659..4485b77f8658 100644 --- a/arch/alpha/kernel/core_marvel.c +++ b/arch/alpha/kernel/core_marvel.c @@ -806,7 +806,7 @@ void __iomem *marvel_ioportmap (unsigned long addr) } unsigned int -marvel_ioread8(void __iomem *xaddr) +marvel_ioread8(const void __iomem *xaddr) { unsigned long addr = (unsigned long) xaddr; if (__marvel_is_port_kbd(addr)) diff --git a/arch/alpha/kernel/io.c b/arch/alpha/kernel/io.c index 938de13adfbf..838586abb1e0 100644 --- a/arch/alpha/kernel/io.c +++ b/arch/alpha/kernel/io.c @@ -14,7 +14,7 @@ "generic", which bumps through the machine vector. */ unsigned int -ioread8(void __iomem *addr) +ioread8(const void __iomem *addr) { unsigned int ret; mb(); @@ -23,7 +23,7 @@ ioread8(void __iomem *addr) return ret; } -unsigned int ioread16(void __iomem *addr) +unsigned int ioread16(const void __iomem *addr) { unsigned int ret; mb(); @@ -32,7 +32,7 @@ unsigned int ioread16(void __iomem *addr) return ret; } -unsigned int ioread32(void __iomem *addr) +unsigned int ioread32(const void __iomem *addr) { unsigned int ret; mb(); @@ -257,7 +257,7 @@ EXPORT_SYMBOL(readq_relaxed); /* * Read COUNT 8-bit bytes from port PORT into memory starting at SRC. */ -void ioread8_rep(void __iomem *port, void *dst, unsigned long count) +void ioread8_rep(const void __iomem *port, void *dst, unsigned long count) { while ((unsigned long)dst & 0x3) { if (!count) @@ -300,7 +300,7 @@ EXPORT_SYMBOL(insb); * the interfaces seems to be slow: just using the inlined version * of the inw() breaks things. */ -void ioread16_rep(void __iomem *port, void *dst, unsigned long count) +void ioread16_rep(const void __iomem *port, void *dst, unsigned long count) { if (unlikely((unsigned long)dst & 0x3)) { if (!count) @@ -340,7 +340,7 @@ EXPORT_SYMBOL(insw); * but the interfaces seems to be slow: just using the inlined version * of the inl() breaks things. */ -void ioread32_rep(void __iomem *port, void *dst, unsigned long count) +void ioread32_rep(const void __iomem *port, void *dst, unsigned long count) { if (unlikely((unsigned long)dst & 0x3)) { while (count--) { diff --git a/arch/alpha/kernel/syscalls/syscall.tbl b/arch/alpha/kernel/syscalls/syscall.tbl index a28fb211881d..ec8bed9e7b75 100644 --- a/arch/alpha/kernel/syscalls/syscall.tbl +++ b/arch/alpha/kernel/syscalls/syscall.tbl @@ -249,7 +249,7 @@ 316 common mlockall sys_mlockall 317 common munlockall sys_munlockall 318 common sysinfo sys_sysinfo -319 common _sysctl sys_sysctl +319 common _sysctl sys_ni_syscall # 320 was sys_idle 321 common oldumount sys_oldumount 322 common swapon sys_swapon diff --git a/arch/alpha/mm/fault.c b/arch/alpha/mm/fault.c index c2303a8c2b9f..09172f017efc 100644 --- a/arch/alpha/mm/fault.c +++ b/arch/alpha/mm/fault.c @@ -25,6 +25,7 @@ #include <linux/interrupt.h> #include <linux/extable.h> #include <linux/uaccess.h> +#include <linux/perf_event.h> extern void die_if_kernel(char *,struct pt_regs *,long, unsigned long *); @@ -116,6 +117,7 @@ do_page_fault(unsigned long address, unsigned long mmcsr, #endif if (user_mode(regs)) flags |= FAULT_FLAG_USER; + perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS, 1, regs, address); retry: mmap_read_lock(mm); vma = find_vma(mm, address); @@ -148,7 +150,7 @@ retry: /* If for any reason at all we couldn't handle the fault, make sure we exit gracefully rather than endlessly redo the fault. */ - fault = handle_mm_fault(vma, address, flags); + fault = handle_mm_fault(vma, address, flags, regs); if (fault_signal_pending(fault, regs)) return; @@ -164,10 +166,6 @@ retry: } if (flags & FAULT_FLAG_ALLOW_RETRY) { - if (fault & VM_FAULT_MAJOR) - current->maj_flt++; - else - current->min_flt++; if (fault & VM_FAULT_RETRY) { flags |= FAULT_FLAG_TRIED; diff --git a/arch/arc/include/asm/segment.h b/arch/arc/include/asm/segment.h index 6a2a5be5026d..871f8ab11bfd 100644 --- a/arch/arc/include/asm/segment.h +++ b/arch/arc/include/asm/segment.h @@ -14,8 +14,7 @@ typedef unsigned long mm_segment_t; #define KERNEL_DS MAKE_MM_SEG(0) #define USER_DS MAKE_MM_SEG(TASK_SIZE) - -#define segment_eq(a, b) ((a) == (b)) +#define uaccess_kernel() (get_fs() == KERNEL_DS) #endif /* __ASSEMBLY__ */ #endif /* __ASMARC_SEGMENT_H */ diff --git a/arch/arc/kernel/process.c b/arch/arc/kernel/process.c index e12c80d71b78..efeba1fe7252 100644 --- a/arch/arc/kernel/process.c +++ b/arch/arc/kernel/process.c @@ -91,7 +91,7 @@ fault: goto fail; mmap_read_lock(current->mm); - ret = fixup_user_fault(current, current->mm, (unsigned long) uaddr, + ret = fixup_user_fault(current->mm, (unsigned long) uaddr, FAULT_FLAG_WRITE, NULL); mmap_read_unlock(current->mm); diff --git a/arch/arc/mm/fault.c b/arch/arc/mm/fault.c index 7287c793d1c9..f5657cb68e4f 100644 --- a/arch/arc/mm/fault.c +++ b/arch/arc/mm/fault.c @@ -105,6 +105,7 @@ void do_page_fault(unsigned long address, struct pt_regs *regs) if (write) flags |= FAULT_FLAG_WRITE; + perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS, 1, regs, address); retry: mmap_read_lock(mm); @@ -130,7 +131,7 @@ retry: goto bad_area; } - fault = handle_mm_fault(vma, address, flags); + fault = handle_mm_fault(vma, address, flags, regs); /* Quick path to respond to signals */ if (fault_signal_pending(fault, regs)) { @@ -155,22 +156,9 @@ bad_area: * Major/minor page fault accounting * (in case of retry we only land here once) */ - perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS, 1, regs, address); - - if (likely(!(fault & VM_FAULT_ERROR))) { - if (fault & VM_FAULT_MAJOR) { - tsk->maj_flt++; - perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS_MAJ, 1, - regs, address); - } else { - tsk->min_flt++; - perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS_MIN, 1, - regs, address); - } - + if (likely(!(fault & VM_FAULT_ERROR))) /* Normal return path: fault Handled Gracefully */ return; - } if (!user_mode(regs)) goto no_context; diff --git a/arch/arm/configs/am200epdkit_defconfig b/arch/arm/configs/am200epdkit_defconfig index f56ac394caf1..4e49d6cb2f62 100644 --- a/arch/arm/configs/am200epdkit_defconfig +++ b/arch/arm/configs/am200epdkit_defconfig @@ -3,7 +3,6 @@ CONFIG_LOCALVERSION="gum" CONFIG_SYSVIPC=y CONFIG_SYSFS_DEPRECATED_V2=y CONFIG_EXPERT=y -# CONFIG_SYSCTL_SYSCALL is not set # CONFIG_EPOLL is not set # CONFIG_SHMEM is not set # CONFIG_VM_EVENT_COUNTERS is not set diff --git a/arch/arm/include/asm/uaccess.h b/arch/arm/include/asm/uaccess.h index b5fdd30252f8..a13d90206472 100644 --- a/arch/arm/include/asm/uaccess.h +++ b/arch/arm/include/asm/uaccess.h @@ -76,7 +76,7 @@ static inline void set_fs(mm_segment_t fs) modify_domain(DOMAIN_KERNEL, fs ? DOMAIN_CLIENT : DOMAIN_MANAGER); } -#define segment_eq(a, b) ((a) == (b)) +#define uaccess_kernel() (get_fs() == KERNEL_DS) /* * We use 33-bit arithmetic here. Success returns zero, failure returns @@ -267,7 +267,7 @@ extern int __put_user_8(void *, unsigned long long); */ #define USER_DS KERNEL_DS -#define segment_eq(a, b) (1) +#define uaccess_kernel() (true) #define __addr_ok(addr) ((void)(addr), 1) #define __range_ok(addr, size) ((void)(addr), 0) #define get_fs() (KERNEL_DS) diff --git a/arch/arm/include/asm/vdso/gettimeofday.h b/arch/arm/include/asm/vdso/gettimeofday.h index 1b207cf07697..2134cbd5469f 100644 --- a/arch/arm/include/asm/vdso/gettimeofday.h +++ b/arch/arm/include/asm/vdso/gettimeofday.h @@ -113,7 +113,8 @@ static inline bool arm_vdso_hres_capable(void) } #define __arch_vdso_hres_capable arm_vdso_hres_capable -static __always_inline u64 __arch_get_hw_counter(int clock_mode) +static __always_inline u64 __arch_get_hw_counter(int clock_mode, + const struct vdso_data *vd) { #ifdef CONFIG_ARM_ARCH_TIMER u64 cycle_now; diff --git a/arch/arm/kernel/signal.c b/arch/arm/kernel/signal.c index ab2568996ddb..c9dc912b83f0 100644 --- a/arch/arm/kernel/signal.c +++ b/arch/arm/kernel/signal.c @@ -713,7 +713,9 @@ struct page *get_signal_page(void) /* Defer to generic check */ asmlinkage void addr_limit_check_failed(void) { +#ifdef CONFIG_MMU addr_limit_user_check(); +#endif } #ifdef CONFIG_DEBUG_RSEQ diff --git a/arch/arm/mm/fault.c b/arch/arm/mm/fault.c index c6550eddfce1..efa402025031 100644 --- a/arch/arm/mm/fault.c +++ b/arch/arm/mm/fault.c @@ -202,7 +202,8 @@ static inline bool access_error(unsigned int fsr, struct vm_area_struct *vma) static vm_fault_t __kprobes __do_page_fault(struct mm_struct *mm, unsigned long addr, unsigned int fsr, - unsigned int flags, struct task_struct *tsk) + unsigned int flags, struct task_struct *tsk, + struct pt_regs *regs) { struct vm_area_struct *vma; vm_fault_t fault; @@ -224,7 +225,7 @@ good_area: goto out; } - return handle_mm_fault(vma, addr & PAGE_MASK, flags); + return handle_mm_fault(vma, addr & PAGE_MASK, flags, regs); check_stack: /* Don't allow expansion below FIRST_USER_ADDRESS */ @@ -266,6 +267,8 @@ do_page_fault(unsigned long addr, unsigned int fsr, struct pt_regs *regs) if ((fsr & FSR_WRITE) && !(fsr & FSR_CM)) flags |= FAULT_FLAG_WRITE; + perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS, 1, regs, addr); + /* * As per x86, we may deadlock here. However, since the kernel only * validly references user space from well defined areas of the code, @@ -290,7 +293,7 @@ retry: #endif } - fault = __do_page_fault(mm, addr, fsr, flags, tsk); + fault = __do_page_fault(mm, addr, fsr, flags, tsk, regs); /* If we need to retry but a fatal signal is pending, handle the * signal first. We do not need to release the mmap_lock because @@ -302,23 +305,7 @@ retry: return 0; } - /* - * Major/minor page fault accounting is only done on the - * initial attempt. If we go through a retry, it is extremely - * likely that the page will be found in page cache at that point. - */ - - perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS, 1, regs, addr); if (!(fault & VM_FAULT_ERROR) && flags & FAULT_FLAG_ALLOW_RETRY) { - if (fault & VM_FAULT_MAJOR) { - tsk->maj_flt++; - perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS_MAJ, 1, - regs, addr); - } else { - tsk->min_flt++; - perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS_MIN, 1, - regs, addr); - } if (fault & VM_FAULT_RETRY) { flags |= FAULT_FLAG_TRIED; goto retry; diff --git a/arch/arm/tools/syscall.tbl b/arch/arm/tools/syscall.tbl index 7e8ee4adf269..171077cbf419 100644 --- a/arch/arm/tools/syscall.tbl +++ b/arch/arm/tools/syscall.tbl @@ -162,7 +162,7 @@ 146 common writev sys_writev 147 common getsid sys_getsid 148 common fdatasync sys_fdatasync -149 common _sysctl sys_sysctl +149 common _sysctl sys_ni_syscall 150 common mlock sys_mlock 151 common munlock sys_munlock 152 common mlockall sys_mlockall diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig index e11b4ea06127..6d232837cbee 100644 --- a/arch/arm64/Kconfig +++ b/arch/arm64/Kconfig @@ -1182,22 +1182,6 @@ config HARDEN_BRANCH_PREDICTOR If unsure, say Y. -config HARDEN_EL2_VECTORS - bool "Harden EL2 vector mapping against system register leak" if EXPERT - default y - help - Speculation attacks against some high-performance processors can - be used to leak privileged information such as the vector base - register, resulting in a potential defeat of the EL2 layout - randomization. - - This config option will map the vectors to a fixed location, - independent of the EL2 code mapping, so that revealing VBAR_EL2 - to an attacker does not give away any extra information. This - only gets enabled on affected CPUs. - - If unsure, say Y. - config ARM64_SSBD bool "Speculative Store Bypass Disable" if EXPERT default y @@ -1520,7 +1504,6 @@ menu "ARMv8.3 architectural features" config ARM64_PTR_AUTH bool "Enable support for pointer authentication" default y - depends on !KVM || ARM64_VHE depends on (CC_HAS_SIGN_RETURN_ADDRESS || CC_HAS_BRANCH_PROT_PAC_RET) && AS_HAS_PAC # Modern compilers insert a .note.gnu.property section note for PAC # which is only understood by binutils starting with version 2.33.1. @@ -1547,8 +1530,7 @@ config ARM64_PTR_AUTH The feature is detected at runtime. If the feature is not present in hardware it will not be advertised to userspace/KVM guest nor will it - be enabled. However, KVM guest also require VHE mode and hence - CONFIG_ARM64_VHE=y option to use this feature. + be enabled. If the feature is present on the boot CPU but not on a late CPU, then the late CPU will be parked. Also, if the boot CPU does not have diff --git a/arch/arm64/boot/dts/ti/k3-am65-main.dtsi b/arch/arm64/boot/dts/ti/k3-am65-main.dtsi index 9edfae5944f7..24ef18fe77df 100644 --- a/arch/arm64/boot/dts/ti/k3-am65-main.dtsi +++ b/arch/arm64/boot/dts/ti/k3-am65-main.dtsi @@ -417,10 +417,10 @@ ti,intr-trigger-type = <1>; interrupt-controller; interrupt-parent = <&gic500>; - #interrupt-cells = <2>; + #interrupt-cells = <1>; ti,sci = <&dmsc>; - ti,sci-dst-id = <56>; - ti,sci-rm-range-girq = <0x1>; + ti,sci-dev-id = <100>; + ti,interrupt-ranges = <0 392 32>; }; main_navss { @@ -438,10 +438,11 @@ ti,intr-trigger-type = <4>; interrupt-controller; interrupt-parent = <&gic500>; - #interrupt-cells = <2>; + #interrupt-cells = <1>; ti,sci = <&dmsc>; - ti,sci-dst-id = <56>; - ti,sci-rm-range-girq = <0x0>, <0x2>; + ti,sci-dev-id = <182>; + ti,interrupt-ranges = <0 64 64>, + <64 448 64>; }; inta_main_udmass: interrupt-controller@33d00000 { @@ -452,8 +453,7 @@ msi-controller; ti,sci = <&dmsc>; ti,sci-dev-id = <179>; - ti,sci-rm-range-vint = <0x0>; - ti,sci-rm-range-global-event = <0x1>; + ti,interrupt-ranges = <0 0 256>; }; secure_proxy_main: mailbox@32c00000 { @@ -589,7 +589,7 @@ <0x0 0x33000000 0x0 0x40000>; reg-names = "rt", "fifos", "proxy_gcfg", "proxy_target"; ti,num-rings = <818>; - ti,sci-rm-range-gp-rings = <0x2>; /* GP ring range */ + ti,sci-rm-range-gp-rings = <0x1>; /* GP ring range */ ti,dma-ring-reset-quirk; ti,sci = <&dmsc>; ti,sci-dev-id = <187>; @@ -609,11 +609,11 @@ ti,sci-dev-id = <188>; ti,ringacc = <&ringacc>; - ti,sci-rm-range-tchan = <0x1>, /* TX_HCHAN */ - <0x2>; /* TX_CHAN */ - ti,sci-rm-range-rchan = <0x4>, /* RX_HCHAN */ - <0x5>; /* RX_CHAN */ - ti,sci-rm-range-rflow = <0x6>; /* GP RFLOW */ + ti,sci-rm-range-tchan = <0xf>, /* TX_HCHAN */ + <0xd>; /* TX_CHAN */ + ti,sci-rm-range-rchan = <0xb>, /* RX_HCHAN */ + <0xa>; /* RX_CHAN */ + ti,sci-rm-range-rflow = <0x0>; /* GP RFLOW */ }; cpts@310d0000 { @@ -622,7 +622,7 @@ reg-names = "cpts"; clocks = <&main_cpts_mux>; clock-names = "cpts"; - interrupts-extended = <&intr_main_navss 163 0>; + interrupts-extended = <&intr_main_navss 391>; interrupt-names = "cpts"; ti,cpts-periodic-outputs = <6>; ti,cpts-ext-ts-inputs = <8>; @@ -645,8 +645,7 @@ gpio-controller; #gpio-cells = <2>; interrupt-parent = <&intr_main_gpio>; - interrupts = <57 256>, <57 257>, <57 258>, <57 259>, <57 260>, - <57 261>; + interrupts = <192>, <193>, <194>, <195>, <196>, <197>; interrupt-controller; #interrupt-cells = <2>; ti,ngpio = <96>; @@ -661,8 +660,7 @@ gpio-controller; #gpio-cells = <2>; interrupt-parent = <&intr_main_gpio>; - interrupts = <58 256>, <58 257>, <58 258>, <58 259>, <58 260>, - <58 261>; + interrupts = <200>, <201>, <202>, <203>, <204>, <205>; interrupt-controller; #interrupt-cells = <2>; ti,ngpio = <90>; diff --git a/arch/arm64/boot/dts/ti/k3-am65-mcu.dtsi b/arch/arm64/boot/dts/ti/k3-am65-mcu.dtsi index 8c1abcfe0860..51ca4b4d4c21 100644 --- a/arch/arm64/boot/dts/ti/k3-am65-mcu.dtsi +++ b/arch/arm64/boot/dts/ti/k3-am65-mcu.dtsi @@ -134,7 +134,7 @@ <0x0 0x2a500000 0x0 0x40000>; reg-names = "rt", "fifos", "proxy_gcfg", "proxy_target"; ti,num-rings = <286>; - ti,sci-rm-range-gp-rings = <0x2>; /* GP ring range */ + ti,sci-rm-range-gp-rings = <0x1>; /* GP ring range */ ti,dma-ring-reset-quirk; ti,sci = <&dmsc>; ti,sci-dev-id = <195>; @@ -154,11 +154,11 @@ ti,sci-dev-id = <194>; ti,ringacc = <&mcu_ringacc>; - ti,sci-rm-range-tchan = <0x1>, /* TX_HCHAN */ - <0x2>; /* TX_CHAN */ - ti,sci-rm-range-rchan = <0x3>, /* RX_HCHAN */ - <0x4>; /* RX_CHAN */ - ti,sci-rm-range-rflow = <0x5>; /* GP RFLOW */ + ti,sci-rm-range-tchan = <0xf>, /* TX_HCHAN */ + <0xd>; /* TX_CHAN */ + ti,sci-rm-range-rchan = <0xb>, /* RX_HCHAN */ + <0xa>; /* RX_CHAN */ + ti,sci-rm-range-rflow = <0x0>; /* GP RFLOW */ }; }; diff --git a/arch/arm64/boot/dts/ti/k3-am65-wakeup.dtsi b/arch/arm64/boot/dts/ti/k3-am65-wakeup.dtsi index 5f55b9e82cf1..a1ffe88d9664 100644 --- a/arch/arm64/boot/dts/ti/k3-am65-wakeup.dtsi +++ b/arch/arm64/boot/dts/ti/k3-am65-wakeup.dtsi @@ -74,10 +74,10 @@ ti,intr-trigger-type = <1>; interrupt-controller; interrupt-parent = <&gic500>; - #interrupt-cells = <2>; + #interrupt-cells = <1>; ti,sci = <&dmsc>; - ti,sci-dst-id = <56>; - ti,sci-rm-range-girq = <0x4>; + ti,sci-dev-id = <156>; + ti,interrupt-ranges = <0 712 16>; }; wkup_gpio0: wkup_gpio0@42110000 { @@ -86,7 +86,7 @@ gpio-controller; #gpio-cells = <2>; interrupt-parent = <&intr_wkup_gpio>; - interrupts = <59 128>, <59 129>, <59 130>, <59 131>; + interrupts = <60>, <61>, <62>, <63>; interrupt-controller; #interrupt-cells = <2>; ti,ngpio = <56>; diff --git a/arch/arm64/boot/dts/ti/k3-am654-base-board.dts b/arch/arm64/boot/dts/ti/k3-am654-base-board.dts index 611e66207010..b8a8a0fcb8af 100644 --- a/arch/arm64/boot/dts/ti/k3-am654-base-board.dts +++ b/arch/arm64/boot/dts/ti/k3-am654-base-board.dts @@ -384,7 +384,7 @@ }; &mailbox0_cluster0 { - interrupts = <164 0>; + interrupts = <436>; mbox_mcu_r5fss0_core0: mbox-mcu-r5fss0-core0 { ti,mbox-tx = <1 0 0>; @@ -393,7 +393,7 @@ }; &mailbox0_cluster1 { - interrupts = <165 0>; + interrupts = <432>; mbox_mcu_r5fss0_core1: mbox-mcu-r5fss0-core1 { ti,mbox-tx = <1 0 0>; diff --git a/arch/arm64/boot/dts/ti/k3-j721e-common-proc-board.dts b/arch/arm64/boot/dts/ti/k3-j721e-common-proc-board.dts index 8bc1e6ecc50e..e8fc01d97ada 100644 --- a/arch/arm64/boot/dts/ti/k3-j721e-common-proc-board.dts +++ b/arch/arm64/boot/dts/ti/k3-j721e-common-proc-board.dts @@ -287,7 +287,7 @@ }; &mailbox0_cluster0 { - interrupts = <214 0>; + interrupts = <436>; mbox_mcu_r5fss0_core0: mbox-mcu-r5fss0-core0 { ti,mbox-rx = <0 0 0>; @@ -301,7 +301,7 @@ }; &mailbox0_cluster1 { - interrupts = <215 0>; + interrupts = <432>; mbox_main_r5fss0_core0: mbox-main-r5fss0-core0 { ti,mbox-rx = <0 0 0>; @@ -315,7 +315,7 @@ }; &mailbox0_cluster2 { - interrupts = <216 0>; + interrupts = <428>; mbox_main_r5fss1_core0: mbox-main-r5fss1-core0 { ti,mbox-rx = <0 0 0>; @@ -329,7 +329,7 @@ }; &mailbox0_cluster3 { - interrupts = <217 0>; + interrupts = <424>; mbox_c66_0: mbox-c66-0 { ti,mbox-rx = <0 0 0>; @@ -343,7 +343,7 @@ }; &mailbox0_cluster4 { - interrupts = <218 0>; + interrupts = <420>; mbox_c71_0: mbox-c71-0 { ti,mbox-rx = <0 0 0>; diff --git a/arch/arm64/boot/dts/ti/k3-j721e-main.dtsi b/arch/arm64/boot/dts/ti/k3-j721e-main.dtsi index d14060207f00..12ceea9b3c9a 100644 --- a/arch/arm64/boot/dts/ti/k3-j721e-main.dtsi +++ b/arch/arm64/boot/dts/ti/k3-j721e-main.dtsi @@ -80,10 +80,10 @@ ti,intr-trigger-type = <1>; interrupt-controller; interrupt-parent = <&gic500>; - #interrupt-cells = <2>; + #interrupt-cells = <1>; ti,sci = <&dmsc>; - ti,sci-dst-id = <14>; - ti,sci-rm-range-girq = <0x1>; + ti,sci-dev-id = <131>; + ti,interrupt-ranges = <8 392 56>; }; main_navss { @@ -101,10 +101,12 @@ ti,intr-trigger-type = <4>; interrupt-controller; interrupt-parent = <&gic500>; - #interrupt-cells = <2>; + #interrupt-cells = <1>; ti,sci = <&dmsc>; - ti,sci-dst-id = <14>; - ti,sci-rm-range-girq = <0>, <2>; + ti,sci-dev-id = <213>; + ti,interrupt-ranges = <0 64 64>, + <64 448 64>, + <128 672 64>; }; main_udmass_inta: interrupt-controller@33d00000 { @@ -115,8 +117,7 @@ msi-controller; ti,sci = <&dmsc>; ti,sci-dev-id = <209>; - ti,sci-rm-range-vint = <0xa>; - ti,sci-rm-range-global-event = <0xd>; + ti,interrupt-ranges = <0 0 256>; }; secure_proxy_main: mailbox@32c00000 { @@ -296,7 +297,7 @@ reg-names = "cpts"; clocks = <&k3_clks 201 1>; clock-names = "cpts"; - interrupts-extended = <&main_navss_intr 201 0>; + interrupts-extended = <&main_navss_intr 391>; interrupt-names = "cpts"; ti,cpts-periodic-outputs = <6>; ti,cpts-ext-ts-inputs = <8>; @@ -688,8 +689,8 @@ gpio-controller; #gpio-cells = <2>; interrupt-parent = <&main_gpio_intr>; - interrupts = <105 0>, <105 1>, <105 2>, <105 3>, - <105 4>, <105 5>, <105 6>, <105 7>; + interrupts = <256>, <257>, <258>, <259>, + <260>, <261>, <262>, <263>; interrupt-controller; #interrupt-cells = <2>; ti,ngpio = <128>; @@ -705,7 +706,7 @@ gpio-controller; #gpio-cells = <2>; interrupt-parent = <&main_gpio_intr>; - interrupts = <106 0>, <106 1>, <106 2>; + interrupts = <288>, <289>, <290>; interrupt-controller; #interrupt-cells = <2>; ti,ngpio = <36>; @@ -721,8 +722,8 @@ gpio-controller; #gpio-cells = <2>; interrupt-parent = <&main_gpio_intr>; - interrupts = <107 0>, <107 1>, <107 2>, <107 3>, - <107 4>, <107 5>, <107 6>, <107 7>; + interrupts = <264>, <265>, <266>, <267>, + <268>, <269>, <270>, <271>; interrupt-controller; #interrupt-cells = <2>; ti,ngpio = <128>; @@ -738,7 +739,7 @@ gpio-controller; #gpio-cells = <2>; interrupt-parent = <&main_gpio_intr>; - interrupts = <108 0>, <108 1>, <108 2>; + interrupts = <292>, <293>, <294>; interrupt-controller; #interrupt-cells = <2>; ti,ngpio = <36>; @@ -754,8 +755,8 @@ gpio-controller; #gpio-cells = <2>; interrupt-parent = <&main_gpio_intr>; - interrupts = <109 0>, <109 1>, <109 2>, <109 3>, - <109 4>, <109 5>, <109 6>, <109 7>; + interrupts = <272>, <273>, <274>, <275>, + <276>, <277>, <278>, <279>; interrupt-controller; #interrupt-cells = <2>; ti,ngpio = <128>; @@ -771,7 +772,7 @@ gpio-controller; #gpio-cells = <2>; interrupt-parent = <&main_gpio_intr>; - interrupts = <110 0>, <110 1>, <110 2>; + interrupts = <296>, <297>, <298>; interrupt-controller; #interrupt-cells = <2>; ti,ngpio = <36>; @@ -787,8 +788,8 @@ gpio-controller; #gpio-cells = <2>; interrupt-parent = <&main_gpio_intr>; - interrupts = <111 0>, <111 1>, <111 2>, <111 3>, - <111 4>, <111 5>, <111 6>, <111 7>; + interrupts = <280>, <281>, <282>, <283>, + <284>, <285>, <286>, <287>; interrupt-controller; #interrupt-cells = <2>; ti,ngpio = <128>; @@ -804,7 +805,7 @@ gpio-controller; #gpio-cells = <2>; interrupt-parent = <&main_gpio_intr>; - interrupts = <112 0>, <112 1>, <112 2>; + interrupts = <300>, <301>, <302>; interrupt-controller; #interrupt-cells = <2>; ti,ngpio = <36>; diff --git a/arch/arm64/boot/dts/ti/k3-j721e-mcu-wakeup.dtsi b/arch/arm64/boot/dts/ti/k3-j721e-mcu-wakeup.dtsi index 30a735bcd0c8..c4a48e8d420a 100644 --- a/arch/arm64/boot/dts/ti/k3-j721e-mcu-wakeup.dtsi +++ b/arch/arm64/boot/dts/ti/k3-j721e-mcu-wakeup.dtsi @@ -101,10 +101,10 @@ ti,intr-trigger-type = <1>; interrupt-controller; interrupt-parent = <&gic500>; - #interrupt-cells = <2>; + #interrupt-cells = <1>; ti,sci = <&dmsc>; - ti,sci-dst-id = <14>; - ti,sci-rm-range-girq = <0x5>; + ti,sci-dev-id = <137>; + ti,interrupt-ranges = <16 960 16>; }; wkup_gpio0: gpio@42110000 { @@ -113,8 +113,7 @@ gpio-controller; #gpio-cells = <2>; interrupt-parent = <&wkup_gpio_intr>; - interrupts = <113 0>, <113 1>, <113 2>, - <113 3>, <113 4>, <113 5>; + interrupts = <103>, <104>, <105>, <106>, <107>, <108>; interrupt-controller; #interrupt-cells = <2>; ti,ngpio = <84>; @@ -130,8 +129,7 @@ gpio-controller; #gpio-cells = <2>; interrupt-parent = <&wkup_gpio_intr>; - interrupts = <114 0>, <114 1>, <114 2>, - <114 3>, <114 4>, <114 5>; + interrupts = <112>, <113>, <114>, <115>, <116>, <117>; interrupt-controller; #interrupt-cells = <2>; ti,ngpio = <84>; diff --git a/arch/arm64/include/asm/kvm_asm.h b/arch/arm64/include/asm/kvm_asm.h index 352aaebf4198..fb1a922b31ba 100644 --- a/arch/arm64/include/asm/kvm_asm.h +++ b/arch/arm64/include/asm/kvm_asm.h @@ -42,33 +42,81 @@ #include <linux/mm.h> -/* Translate a kernel address of @sym into its equivalent linear mapping */ -#define kvm_ksym_ref(sym) \ +/* + * Translate name of a symbol defined in nVHE hyp to the name seen + * by kernel proper. All nVHE symbols are prefixed by the build system + * to avoid clashes with the VHE variants. + */ +#define kvm_nvhe_sym(sym) __kvm_nvhe_##sym + +#define DECLARE_KVM_VHE_SYM(sym) extern char sym[] +#define DECLARE_KVM_NVHE_SYM(sym) extern char kvm_nvhe_sym(sym)[] + +/* + * Define a pair of symbols sharing the same name but one defined in + * VHE and the other in nVHE hyp implementations. + */ +#define DECLARE_KVM_HYP_SYM(sym) \ + DECLARE_KVM_VHE_SYM(sym); \ + DECLARE_KVM_NVHE_SYM(sym) + +#define CHOOSE_VHE_SYM(sym) sym +#define CHOOSE_NVHE_SYM(sym) kvm_nvhe_sym(sym) + +#ifndef __KVM_NVHE_HYPERVISOR__ +/* + * BIG FAT WARNINGS: + * + * - Don't be tempted to change the following is_kernel_in_hyp_mode() + * to has_vhe(). has_vhe() is implemented as a *final* capability, + * while this is used early at boot time, when the capabilities are + * not final yet.... + * + * - Don't let the nVHE hypervisor have access to this, as it will + * pick the *wrong* symbol (yes, it runs at EL2...). + */ +#define CHOOSE_HYP_SYM(sym) (is_kernel_in_hyp_mode() ? CHOOSE_VHE_SYM(sym) \ + : CHOOSE_NVHE_SYM(sym)) +#else +/* The nVHE hypervisor shouldn't even try to access anything */ +extern void *__nvhe_undefined_symbol; +#define CHOOSE_HYP_SYM(sym) __nvhe_undefined_symbol +#endif + +/* Translate a kernel address @ptr into its equivalent linear mapping */ +#define kvm_ksym_ref(ptr) \ ({ \ - void *val = &sym; \ + void *val = (ptr); \ if (!is_kernel_in_hyp_mode()) \ - val = lm_alias(&sym); \ + val = lm_alias((ptr)); \ val; \ }) +#define kvm_ksym_ref_nvhe(sym) kvm_ksym_ref(kvm_nvhe_sym(sym)) struct kvm; struct kvm_vcpu; +struct kvm_s2_mmu; -extern char __kvm_hyp_init[]; -extern char __kvm_hyp_init_end[]; +DECLARE_KVM_NVHE_SYM(__kvm_hyp_init); +DECLARE_KVM_HYP_SYM(__kvm_hyp_vector); +#define __kvm_hyp_init CHOOSE_NVHE_SYM(__kvm_hyp_init) +#define __kvm_hyp_vector CHOOSE_HYP_SYM(__kvm_hyp_vector) -extern char __kvm_hyp_vector[]; +#ifdef CONFIG_KVM_INDIRECT_VECTORS +extern atomic_t arm64_el2_vector_last_slot; +DECLARE_KVM_HYP_SYM(__bp_harden_hyp_vecs); +#define __bp_harden_hyp_vecs CHOOSE_HYP_SYM(__bp_harden_hyp_vecs) +#endif extern void __kvm_flush_vm_context(void); -extern void __kvm_tlb_flush_vmid_ipa(struct kvm *kvm, phys_addr_t ipa); -extern void __kvm_tlb_flush_vmid(struct kvm *kvm); -extern void __kvm_tlb_flush_local_vmid(struct kvm_vcpu *vcpu); +extern void __kvm_tlb_flush_vmid_ipa(struct kvm_s2_mmu *mmu, phys_addr_t ipa, + int level); +extern void __kvm_tlb_flush_vmid(struct kvm_s2_mmu *mmu); +extern void __kvm_tlb_flush_local_vmid(struct kvm_s2_mmu *mmu); extern void __kvm_timer_set_cntvoff(u64 cntvoff); -extern int kvm_vcpu_run_vhe(struct kvm_vcpu *vcpu); - -extern int __kvm_vcpu_run_nvhe(struct kvm_vcpu *vcpu); +extern int __kvm_vcpu_run(struct kvm_vcpu *vcpu); extern void __kvm_enable_ssbs(void); @@ -143,7 +191,6 @@ extern char __smccc_workaround_1_smc[__SMCCC_WORKAROUND_1_SMC_SZ]; .macro get_vcpu_ptr vcpu, ctxt get_host_ctxt \ctxt, \vcpu ldr \vcpu, [\ctxt, #HOST_CONTEXT_VCPU] - kern_hyp_va \vcpu .endm #endif diff --git a/arch/arm64/include/asm/kvm_coproc.h b/arch/arm64/include/asm/kvm_coproc.h index 454373704b8a..d6bb40122fdb 100644 --- a/arch/arm64/include/asm/kvm_coproc.h +++ b/arch/arm64/include/asm/kvm_coproc.h @@ -19,14 +19,6 @@ struct kvm_sys_reg_table { size_t num; }; -struct kvm_sys_reg_target_table { - struct kvm_sys_reg_table table64; - struct kvm_sys_reg_table table32; -}; - -void kvm_register_target_sys_reg_table(unsigned int target, - struct kvm_sys_reg_target_table *table); - int kvm_handle_cp14_load_store(struct kvm_vcpu *vcpu); int kvm_handle_cp14_32(struct kvm_vcpu *vcpu); int kvm_handle_cp14_64(struct kvm_vcpu *vcpu); diff --git a/arch/arm64/include/asm/kvm_emulate.h b/arch/arm64/include/asm/kvm_emulate.h index 4d0f8ea600ba..49a55be2b9a2 100644 --- a/arch/arm64/include/asm/kvm_emulate.h +++ b/arch/arm64/include/asm/kvm_emulate.h @@ -124,33 +124,12 @@ static inline void vcpu_set_vsesr(struct kvm_vcpu *vcpu, u64 vsesr) static __always_inline unsigned long *vcpu_pc(const struct kvm_vcpu *vcpu) { - return (unsigned long *)&vcpu_gp_regs(vcpu)->regs.pc; -} - -static inline unsigned long *__vcpu_elr_el1(const struct kvm_vcpu *vcpu) -{ - return (unsigned long *)&vcpu_gp_regs(vcpu)->elr_el1; -} - -static inline unsigned long vcpu_read_elr_el1(const struct kvm_vcpu *vcpu) -{ - if (vcpu->arch.sysregs_loaded_on_cpu) - return read_sysreg_el1(SYS_ELR); - else - return *__vcpu_elr_el1(vcpu); -} - -static inline void vcpu_write_elr_el1(const struct kvm_vcpu *vcpu, unsigned long v) -{ - if (vcpu->arch.sysregs_loaded_on_cpu) - write_sysreg_el1(v, SYS_ELR); - else - *__vcpu_elr_el1(vcpu) = v; + return (unsigned long *)&vcpu_gp_regs(vcpu)->pc; } static __always_inline unsigned long *vcpu_cpsr(const struct kvm_vcpu *vcpu) { - return (unsigned long *)&vcpu_gp_regs(vcpu)->regs.pstate; + return (unsigned long *)&vcpu_gp_regs(vcpu)->pstate; } static __always_inline bool vcpu_mode_is_32bit(const struct kvm_vcpu *vcpu) @@ -179,14 +158,14 @@ static inline void vcpu_set_thumb(struct kvm_vcpu *vcpu) static __always_inline unsigned long vcpu_get_reg(const struct kvm_vcpu *vcpu, u8 reg_num) { - return (reg_num == 31) ? 0 : vcpu_gp_regs(vcpu)->regs.regs[reg_num]; + return (reg_num == 31) ? 0 : vcpu_gp_regs(vcpu)->regs[reg_num]; } static __always_inline void vcpu_set_reg(struct kvm_vcpu *vcpu, u8 reg_num, unsigned long val) { if (reg_num != 31) - vcpu_gp_regs(vcpu)->regs.regs[reg_num] = val; + vcpu_gp_regs(vcpu)->regs[reg_num] = val; } static inline unsigned long vcpu_read_spsr(const struct kvm_vcpu *vcpu) @@ -197,7 +176,7 @@ static inline unsigned long vcpu_read_spsr(const struct kvm_vcpu *vcpu) if (vcpu->arch.sysregs_loaded_on_cpu) return read_sysreg_el1(SYS_SPSR); else - return vcpu_gp_regs(vcpu)->spsr[KVM_SPSR_EL1]; + return __vcpu_sys_reg(vcpu, SPSR_EL1); } static inline void vcpu_write_spsr(struct kvm_vcpu *vcpu, unsigned long v) @@ -210,7 +189,7 @@ static inline void vcpu_write_spsr(struct kvm_vcpu *vcpu, unsigned long v) if (vcpu->arch.sysregs_loaded_on_cpu) write_sysreg_el1(v, SYS_SPSR); else - vcpu_gp_regs(vcpu)->spsr[KVM_SPSR_EL1] = v; + __vcpu_sys_reg(vcpu, SPSR_EL1) = v; } /* @@ -259,14 +238,14 @@ static inline bool vcpu_mode_priv(const struct kvm_vcpu *vcpu) return mode != PSR_MODE_EL0t; } -static __always_inline u32 kvm_vcpu_get_hsr(const struct kvm_vcpu *vcpu) +static __always_inline u32 kvm_vcpu_get_esr(const struct kvm_vcpu *vcpu) { return vcpu->arch.fault.esr_el2; } static __always_inline int kvm_vcpu_get_condition(const struct kvm_vcpu *vcpu) { - u32 esr = kvm_vcpu_get_hsr(vcpu); + u32 esr = kvm_vcpu_get_esr(vcpu); if (esr & ESR_ELx_CV) return (esr & ESR_ELx_COND_MASK) >> ESR_ELx_COND_SHIFT; @@ -291,64 +270,64 @@ static inline u64 kvm_vcpu_get_disr(const struct kvm_vcpu *vcpu) static inline u32 kvm_vcpu_hvc_get_imm(const struct kvm_vcpu *vcpu) { - return kvm_vcpu_get_hsr(vcpu) & ESR_ELx_xVC_IMM_MASK; + return kvm_vcpu_get_esr(vcpu) & ESR_ELx_xVC_IMM_MASK; } static __always_inline bool kvm_vcpu_dabt_isvalid(const struct kvm_vcpu *vcpu) { - return !!(kvm_vcpu_get_hsr(vcpu) & ESR_ELx_ISV); + return !!(kvm_vcpu_get_esr(vcpu) & ESR_ELx_ISV); } static inline unsigned long kvm_vcpu_dabt_iss_nisv_sanitized(const struct kvm_vcpu *vcpu) { - return kvm_vcpu_get_hsr(vcpu) & (ESR_ELx_CM | ESR_ELx_WNR | ESR_ELx_FSC); + return kvm_vcpu_get_esr(vcpu) & (ESR_ELx_CM | ESR_ELx_WNR | ESR_ELx_FSC); } static inline bool kvm_vcpu_dabt_issext(const struct kvm_vcpu *vcpu) { - return !!(kvm_vcpu_get_hsr(vcpu) & ESR_ELx_SSE); + return !!(kvm_vcpu_get_esr(vcpu) & ESR_ELx_SSE); } static inline bool kvm_vcpu_dabt_issf(const struct kvm_vcpu *vcpu) { - return !!(kvm_vcpu_get_hsr(vcpu) & ESR_ELx_SF); + return !!(kvm_vcpu_get_esr(vcpu) & ESR_ELx_SF); } static __always_inline int kvm_vcpu_dabt_get_rd(const struct kvm_vcpu *vcpu) { - return (kvm_vcpu_get_hsr(vcpu) & ESR_ELx_SRT_MASK) >> ESR_ELx_SRT_SHIFT; + return (kvm_vcpu_get_esr(vcpu) & ESR_ELx_SRT_MASK) >> ESR_ELx_SRT_SHIFT; } static __always_inline bool kvm_vcpu_dabt_iss1tw(const struct kvm_vcpu *vcpu) { - return !!(kvm_vcpu_get_hsr(vcpu) & ESR_ELx_S1PTW); + return !!(kvm_vcpu_get_esr(vcpu) & ESR_ELx_S1PTW); } static __always_inline bool kvm_vcpu_dabt_iswrite(const struct kvm_vcpu *vcpu) { - return !!(kvm_vcpu_get_hsr(vcpu) & ESR_ELx_WNR) || + return !!(kvm_vcpu_get_esr(vcpu) & ESR_ELx_WNR) || kvm_vcpu_dabt_iss1tw(vcpu); /* AF/DBM update */ } static inline bool kvm_vcpu_dabt_is_cm(const struct kvm_vcpu *vcpu) { - return !!(kvm_vcpu_get_hsr(vcpu) & ESR_ELx_CM); + return !!(kvm_vcpu_get_esr(vcpu) & ESR_ELx_CM); } static __always_inline unsigned int kvm_vcpu_dabt_get_as(const struct kvm_vcpu *vcpu) { - return 1 << ((kvm_vcpu_get_hsr(vcpu) & ESR_ELx_SAS) >> ESR_ELx_SAS_SHIFT); + return 1 << ((kvm_vcpu_get_esr(vcpu) & ESR_ELx_SAS) >> ESR_ELx_SAS_SHIFT); } /* This one is not specific to Data Abort */ static __always_inline bool kvm_vcpu_trap_il_is32bit(const struct kvm_vcpu *vcpu) { - return !!(kvm_vcpu_get_hsr(vcpu) & ESR_ELx_IL); + return !!(kvm_vcpu_get_esr(vcpu) & ESR_ELx_IL); } static __always_inline u8 kvm_vcpu_trap_get_class(const struct kvm_vcpu *vcpu) { - return ESR_ELx_EC(kvm_vcpu_get_hsr(vcpu)); + return ESR_ELx_EC(kvm_vcpu_get_esr(vcpu)); } static inline bool kvm_vcpu_trap_is_iabt(const struct kvm_vcpu *vcpu) @@ -358,15 +337,15 @@ static inline bool kvm_vcpu_trap_is_iabt(const struct kvm_vcpu *vcpu) static __always_inline u8 kvm_vcpu_trap_get_fault(const struct kvm_vcpu *vcpu) { - return kvm_vcpu_get_hsr(vcpu) & ESR_ELx_FSC; + return kvm_vcpu_get_esr(vcpu) & ESR_ELx_FSC; } static __always_inline u8 kvm_vcpu_trap_get_fault_type(const struct kvm_vcpu *vcpu) { - return kvm_vcpu_get_hsr(vcpu) & ESR_ELx_FSC_TYPE; + return kvm_vcpu_get_esr(vcpu) & ESR_ELx_FSC_TYPE; } -static __always_inline bool kvm_vcpu_dabt_isextabt(const struct kvm_vcpu *vcpu) +static __always_inline bool kvm_vcpu_abt_issea(const struct kvm_vcpu *vcpu) { switch (kvm_vcpu_trap_get_fault(vcpu)) { case FSC_SEA: @@ -387,7 +366,7 @@ static __always_inline bool kvm_vcpu_dabt_isextabt(const struct kvm_vcpu *vcpu) static __always_inline int kvm_vcpu_sys_get_rt(struct kvm_vcpu *vcpu) { - u32 esr = kvm_vcpu_get_hsr(vcpu); + u32 esr = kvm_vcpu_get_esr(vcpu); return ESR_ELx_SYS64_ISS_RT(esr); } @@ -516,14 +495,14 @@ static __always_inline void kvm_skip_instr(struct kvm_vcpu *vcpu, bool is_wide_i * Skip an instruction which has been emulated at hyp while most guest sysregs * are live. */ -static __always_inline void __hyp_text __kvm_skip_instr(struct kvm_vcpu *vcpu) +static __always_inline void __kvm_skip_instr(struct kvm_vcpu *vcpu) { *vcpu_pc(vcpu) = read_sysreg_el2(SYS_ELR); - vcpu->arch.ctxt.gp_regs.regs.pstate = read_sysreg_el2(SYS_SPSR); + vcpu_gp_regs(vcpu)->pstate = read_sysreg_el2(SYS_SPSR); kvm_skip_instr(vcpu, kvm_vcpu_trap_il_is32bit(vcpu)); - write_sysreg_el2(vcpu->arch.ctxt.gp_regs.regs.pstate, SYS_SPSR); + write_sysreg_el2(vcpu_gp_regs(vcpu)->pstate, SYS_SPSR); write_sysreg_el2(*vcpu_pc(vcpu), SYS_ELR); } diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h index f81151ad3d3c..65568b23868a 100644 --- a/arch/arm64/include/asm/kvm_host.h +++ b/arch/arm64/include/asm/kvm_host.h @@ -66,19 +66,34 @@ struct kvm_vmid { u32 vmid; }; -struct kvm_arch { +struct kvm_s2_mmu { struct kvm_vmid vmid; - /* stage2 entry level table */ - pgd_t *pgd; - phys_addr_t pgd_phys; - - /* VTCR_EL2 value for this VM */ - u64 vtcr; + /* + * stage2 entry level table + * + * Two kvm_s2_mmu structures in the same VM can point to the same + * pgd here. This happens when running a guest using a + * translation regime that isn't affected by its own stage-2 + * translation, such as a non-VHE hypervisor running at vEL2, or + * for vEL1/EL0 with vHCR_EL2.VM == 0. In that case, we use the + * canonical stage-2 page tables. + */ + pgd_t *pgd; + phys_addr_t pgd_phys; /* The last vcpu id that ran on each physical CPU */ int __percpu *last_vcpu_ran; + struct kvm *kvm; +}; + +struct kvm_arch { + struct kvm_s2_mmu mmu; + + /* VTCR_EL2 value for this VM */ + u64 vtcr; + /* The maximum number of vCPUs depends on the used GIC model */ int max_vcpus; @@ -159,6 +174,16 @@ enum vcpu_sysreg { APGAKEYLO_EL1, APGAKEYHI_EL1, + ELR_EL1, + SP_EL1, + SPSR_EL1, + + CNTVOFF_EL2, + CNTV_CVAL_EL0, + CNTV_CTL_EL0, + CNTP_CVAL_EL0, + CNTP_CTL_EL0, + /* 32bit specific registers. Keep them at the end of the range */ DACR32_EL2, /* Domain Access Control Register */ IFSR32_EL2, /* Instruction Fault Status Register */ @@ -210,7 +235,15 @@ enum vcpu_sysreg { #define NR_COPRO_REGS (NR_SYS_REGS * 2) struct kvm_cpu_context { - struct kvm_regs gp_regs; + struct user_pt_regs regs; /* sp = sp_el0 */ + + u64 spsr_abt; + u64 spsr_und; + u64 spsr_irq; + u64 spsr_fiq; + + struct user_fpsimd_state fp_regs; + union { u64 sys_regs[NR_SYS_REGS]; u32 copro[NR_COPRO_REGS]; @@ -243,6 +276,9 @@ struct kvm_vcpu_arch { void *sve_state; unsigned int sve_max_vl; + /* Stage 2 paging state used by the hardware on next switch */ + struct kvm_s2_mmu *hw_mmu; + /* HYP configuration */ u64 hcr_el2; u32 mdcr_el2; @@ -327,7 +363,7 @@ struct kvm_vcpu_arch { struct vcpu_reset_state reset_state; /* True when deferrable sysregs are loaded on the physical CPU, - * see kvm_vcpu_load_sysregs and kvm_vcpu_put_sysregs. */ + * see kvm_vcpu_load_sysregs_vhe and kvm_vcpu_put_sysregs_vhe. */ bool sysregs_loaded_on_cpu; /* Guest PV state */ @@ -378,15 +414,20 @@ struct kvm_vcpu_arch { #define vcpu_has_ptrauth(vcpu) false #endif -#define vcpu_gp_regs(v) (&(v)->arch.ctxt.gp_regs) +#define vcpu_gp_regs(v) (&(v)->arch.ctxt.regs) /* - * Only use __vcpu_sys_reg if you know you want the memory backed version of a - * register, and not the one most recently accessed by a running VCPU. For - * example, for userspace access or for system registers that are never context - * switched, but only emulated. + * Only use __vcpu_sys_reg/ctxt_sys_reg if you know you want the + * memory backed version of a register, and not the one most recently + * accessed by a running VCPU. For example, for userspace access or + * for system registers that are never context switched, but only + * emulated. */ -#define __vcpu_sys_reg(v,r) ((v)->arch.ctxt.sys_regs[(r)]) +#define __ctxt_sys_reg(c,r) (&(c)->sys_regs[(r)]) + +#define ctxt_sys_reg(c,r) (*__ctxt_sys_reg(c,r)) + +#define __vcpu_sys_reg(v,r) (ctxt_sys_reg(&(v)->arch.ctxt, (r))) u64 vcpu_read_sys_reg(const struct kvm_vcpu *vcpu, int reg); void vcpu_write_sys_reg(struct kvm_vcpu *vcpu, u64 val, int reg); @@ -442,6 +483,18 @@ void kvm_arm_resume_guest(struct kvm *kvm); u64 __kvm_call_hyp(void *hypfn, ...); +#define kvm_call_hyp_nvhe(f, ...) \ + do { \ + DECLARE_KVM_NVHE_SYM(f); \ + __kvm_call_hyp(kvm_ksym_ref_nvhe(f), ##__VA_ARGS__); \ + } while(0) + +#define kvm_call_hyp_nvhe_ret(f, ...) \ + ({ \ + DECLARE_KVM_NVHE_SYM(f); \ + __kvm_call_hyp(kvm_ksym_ref_nvhe(f), ##__VA_ARGS__); \ + }) + /* * The couple of isb() below are there to guarantee the same behaviour * on VHE as on !VHE, where the eret to EL1 acts as a context @@ -453,7 +506,7 @@ u64 __kvm_call_hyp(void *hypfn, ...); f(__VA_ARGS__); \ isb(); \ } else { \ - __kvm_call_hyp(kvm_ksym_ref(f), ##__VA_ARGS__); \ + kvm_call_hyp_nvhe(f, ##__VA_ARGS__); \ } \ } while(0) @@ -465,8 +518,7 @@ u64 __kvm_call_hyp(void *hypfn, ...); ret = f(__VA_ARGS__); \ isb(); \ } else { \ - ret = __kvm_call_hyp(kvm_ksym_ref(f), \ - ##__VA_ARGS__); \ + ret = kvm_call_hyp_nvhe_ret(f, ##__VA_ARGS__); \ } \ \ ret; \ @@ -518,7 +570,7 @@ DECLARE_PER_CPU(kvm_host_data_t, kvm_host_data); static inline void kvm_init_host_cpu_context(struct kvm_cpu_context *cpu_ctxt) { /* The host's MPIDR is immutable, so let's set it up at boot time */ - cpu_ctxt->sys_regs[MPIDR_EL1] = read_cpuid_mpidr(); + ctxt_sys_reg(cpu_ctxt, MPIDR_EL1) = read_cpuid_mpidr(); } static inline bool kvm_arch_requires_vhe(void) @@ -619,8 +671,8 @@ static inline int kvm_arm_have_ssbd(void) } } -void kvm_vcpu_load_sysregs(struct kvm_vcpu *vcpu); -void kvm_vcpu_put_sysregs(struct kvm_vcpu *vcpu); +void kvm_vcpu_load_sysregs_vhe(struct kvm_vcpu *vcpu); +void kvm_vcpu_put_sysregs_vhe(struct kvm_vcpu *vcpu); int kvm_set_ipa_limit(void); diff --git a/arch/arm64/include/asm/kvm_hyp.h b/arch/arm64/include/asm/kvm_hyp.h index ce3080834bfa..46689e7db46c 100644 --- a/arch/arm64/include/asm/kvm_hyp.h +++ b/arch/arm64/include/asm/kvm_hyp.h @@ -12,8 +12,6 @@ #include <asm/alternative.h> #include <asm/sysreg.h> -#define __hyp_text __section(.hyp.text) notrace __noscs - #define read_sysreg_elx(r,nvh,vh) \ ({ \ u64 reg; \ @@ -63,17 +61,20 @@ void __vgic_v3_save_aprs(struct vgic_v3_cpu_if *cpu_if); void __vgic_v3_restore_aprs(struct vgic_v3_cpu_if *cpu_if); int __vgic_v3_perform_cpuif_access(struct kvm_vcpu *vcpu); +#ifdef __KVM_NVHE_HYPERVISOR__ void __timer_enable_traps(struct kvm_vcpu *vcpu); void __timer_disable_traps(struct kvm_vcpu *vcpu); +#endif +#ifdef __KVM_NVHE_HYPERVISOR__ void __sysreg_save_state_nvhe(struct kvm_cpu_context *ctxt); void __sysreg_restore_state_nvhe(struct kvm_cpu_context *ctxt); +#else void sysreg_save_host_state_vhe(struct kvm_cpu_context *ctxt); void sysreg_restore_host_state_vhe(struct kvm_cpu_context *ctxt); void sysreg_save_guest_state_vhe(struct kvm_cpu_context *ctxt); void sysreg_restore_guest_state_vhe(struct kvm_cpu_context *ctxt); -void __sysreg32_save_state(struct kvm_vcpu *vcpu); -void __sysreg32_restore_state(struct kvm_vcpu *vcpu); +#endif void __debug_switch_to_guest(struct kvm_vcpu *vcpu); void __debug_switch_to_host(struct kvm_vcpu *vcpu); @@ -81,11 +82,17 @@ void __debug_switch_to_host(struct kvm_vcpu *vcpu); void __fpsimd_save_state(struct user_fpsimd_state *fp_regs); void __fpsimd_restore_state(struct user_fpsimd_state *fp_regs); +#ifndef __KVM_NVHE_HYPERVISOR__ void activate_traps_vhe_load(struct kvm_vcpu *vcpu); void deactivate_traps_vhe_put(void); +#endif u64 __guest_enter(struct kvm_vcpu *vcpu, struct kvm_cpu_context *host_ctxt); + +void __noreturn hyp_panic(struct kvm_cpu_context *host_ctxt); +#ifdef __KVM_NVHE_HYPERVISOR__ void __noreturn __hyp_do_panic(unsigned long, ...); +#endif #endif /* __ARM64_KVM_HYP_H__ */ diff --git a/arch/arm64/include/asm/kvm_mmu.h b/arch/arm64/include/asm/kvm_mmu.h index 40be8f6c7351..189839c3706a 100644 --- a/arch/arm64/include/asm/kvm_mmu.h +++ b/arch/arm64/include/asm/kvm_mmu.h @@ -134,8 +134,8 @@ int create_hyp_exec_mappings(phys_addr_t phys_addr, size_t size, void free_hyp_pgds(void); void stage2_unmap_vm(struct kvm *kvm); -int kvm_alloc_stage2_pgd(struct kvm *kvm); -void kvm_free_stage2_pgd(struct kvm *kvm); +int kvm_init_stage2_mmu(struct kvm *kvm, struct kvm_s2_mmu *mmu); +void kvm_free_stage2_pgd(struct kvm_s2_mmu *mmu); int kvm_phys_addr_ioremap(struct kvm *kvm, phys_addr_t guest_ipa, phys_addr_t pa, unsigned long size, bool writable); @@ -577,13 +577,13 @@ static inline u64 kvm_vttbr_baddr_mask(struct kvm *kvm) return vttbr_baddr_mask(kvm_phys_shift(kvm), kvm_stage2_levels(kvm)); } -static __always_inline u64 kvm_get_vttbr(struct kvm *kvm) +static __always_inline u64 kvm_get_vttbr(struct kvm_s2_mmu *mmu) { - struct kvm_vmid *vmid = &kvm->arch.vmid; + struct kvm_vmid *vmid = &mmu->vmid; u64 vmid_field, baddr; u64 cnp = system_supports_cnp() ? VTTBR_CNP_BIT : 0; - baddr = kvm->arch.pgd_phys; + baddr = mmu->pgd_phys; vmid_field = (u64)vmid->vmid << VTTBR_VMID_SHIFT; return kvm_phys_to_vttbr(baddr) | vmid_field | cnp; } @@ -592,10 +592,10 @@ static __always_inline u64 kvm_get_vttbr(struct kvm *kvm) * Must be called from hyp code running at EL2 with an updated VTTBR * and interrupts disabled. */ -static __always_inline void __load_guest_stage2(struct kvm *kvm) +static __always_inline void __load_guest_stage2(struct kvm_s2_mmu *mmu) { - write_sysreg(kvm->arch.vtcr, vtcr_el2); - write_sysreg(kvm_get_vttbr(kvm), vttbr_el2); + write_sysreg(kern_hyp_va(mmu->kvm)->arch.vtcr, vtcr_el2); + write_sysreg(kvm_get_vttbr(mmu), vttbr_el2); /* * ARM errata 1165522 and 1530923 require the actual execution of the diff --git a/arch/arm64/include/asm/kvm_ptrauth.h b/arch/arm64/include/asm/kvm_ptrauth.h index 6301813dcace..0ddf98c3ba9f 100644 --- a/arch/arm64/include/asm/kvm_ptrauth.h +++ b/arch/arm64/include/asm/kvm_ptrauth.h @@ -61,44 +61,36 @@ /* * Both ptrauth_switch_to_guest and ptrauth_switch_to_host macros will - * check for the presence of one of the cpufeature flag - * ARM64_HAS_ADDRESS_AUTH_ARCH or ARM64_HAS_ADDRESS_AUTH_IMP_DEF and + * check for the presence ARM64_HAS_ADDRESS_AUTH, which is defined as + * (ARM64_HAS_ADDRESS_AUTH_ARCH || ARM64_HAS_ADDRESS_AUTH_IMP_DEF) and * then proceed ahead with the save/restore of Pointer Authentication - * key registers. + * key registers if enabled for the guest. */ .macro ptrauth_switch_to_guest g_ctxt, reg1, reg2, reg3 -alternative_if ARM64_HAS_ADDRESS_AUTH_ARCH - b 1000f +alternative_if_not ARM64_HAS_ADDRESS_AUTH + b .L__skip_switch\@ alternative_else_nop_endif -alternative_if_not ARM64_HAS_ADDRESS_AUTH_IMP_DEF - b 1001f -alternative_else_nop_endif -1000: - ldr \reg1, [\g_ctxt, #(VCPU_HCR_EL2 - VCPU_CONTEXT)] + mrs \reg1, hcr_el2 and \reg1, \reg1, #(HCR_API | HCR_APK) - cbz \reg1, 1001f + cbz \reg1, .L__skip_switch\@ add \reg1, \g_ctxt, #CPU_APIAKEYLO_EL1 ptrauth_restore_state \reg1, \reg2, \reg3 -1001: +.L__skip_switch\@: .endm .macro ptrauth_switch_to_host g_ctxt, h_ctxt, reg1, reg2, reg3 -alternative_if ARM64_HAS_ADDRESS_AUTH_ARCH - b 2000f -alternative_else_nop_endif -alternative_if_not ARM64_HAS_ADDRESS_AUTH_IMP_DEF - b 2001f +alternative_if_not ARM64_HAS_ADDRESS_AUTH + b .L__skip_switch\@ alternative_else_nop_endif -2000: - ldr \reg1, [\g_ctxt, #(VCPU_HCR_EL2 - VCPU_CONTEXT)] + mrs \reg1, hcr_el2 and \reg1, \reg1, #(HCR_API | HCR_APK) - cbz \reg1, 2001f + cbz \reg1, .L__skip_switch\@ add \reg1, \g_ctxt, #CPU_APIAKEYLO_EL1 ptrauth_save_state \reg1, \reg2, \reg3 add \reg1, \h_ctxt, #CPU_APIAKEYLO_EL1 ptrauth_restore_state \reg1, \reg2, \reg3 isb -2001: +.L__skip_switch\@: .endm #else /* !CONFIG_ARM64_PTR_AUTH */ diff --git a/arch/arm64/include/asm/mmu.h b/arch/arm64/include/asm/mmu.h index 8444df000181..a7a5ecaa2e83 100644 --- a/arch/arm64/include/asm/mmu.h +++ b/arch/arm64/include/asm/mmu.h @@ -45,13 +45,6 @@ struct bp_hardening_data { bp_hardening_cb_t fn; }; -#if (defined(CONFIG_HARDEN_BRANCH_PREDICTOR) || \ - defined(CONFIG_HARDEN_EL2_VECTORS)) - -extern char __bp_harden_hyp_vecs[]; -extern atomic_t arm64_el2_vector_last_slot; -#endif /* CONFIG_HARDEN_BRANCH_PREDICTOR || CONFIG_HARDEN_EL2_VECTORS */ - #ifdef CONFIG_HARDEN_BRANCH_PREDICTOR DECLARE_PER_CPU_READ_MOSTLY(struct bp_hardening_data, bp_hardening_data); diff --git a/arch/arm64/include/asm/uaccess.h b/arch/arm64/include/asm/uaccess.h index 8d7c466f809b..991dd5f031e4 100644 --- a/arch/arm64/include/asm/uaccess.h +++ b/arch/arm64/include/asm/uaccess.h @@ -50,7 +50,7 @@ static inline void set_fs(mm_segment_t fs) CONFIG_ARM64_UAO)); } -#define segment_eq(a, b) ((a) == (b)) +#define uaccess_kernel() (get_fs() == KERNEL_DS) /* * Test whether a block of memory is a valid user space address. diff --git a/arch/arm64/include/asm/unistd32.h b/arch/arm64/include/asm/unistd32.h index 17e81bd9a2d3..734860ac7cf9 100644 --- a/arch/arm64/include/asm/unistd32.h +++ b/arch/arm64/include/asm/unistd32.h @@ -308,8 +308,8 @@ __SYSCALL(__NR_writev, compat_sys_writev) __SYSCALL(__NR_getsid, sys_getsid) #define __NR_fdatasync 148 __SYSCALL(__NR_fdatasync, sys_fdatasync) -#define __NR__sysctl 149 -__SYSCALL(__NR__sysctl, compat_sys_sysctl) + /* 149 was sys_sysctl */ +__SYSCALL(149, sys_ni_syscall) #define __NR_mlock 150 __SYSCALL(__NR_mlock, sys_mlock) #define __NR_munlock 151 diff --git a/arch/arm64/include/asm/vdso/compat_gettimeofday.h b/arch/arm64/include/asm/vdso/compat_gettimeofday.h index 75cbae60455b..7508b0ac1d21 100644 --- a/arch/arm64/include/asm/vdso/compat_gettimeofday.h +++ b/arch/arm64/include/asm/vdso/compat_gettimeofday.h @@ -103,7 +103,8 @@ int clock_getres32_fallback(clockid_t _clkid, struct old_timespec32 *_ts) return ret; } -static __always_inline u64 __arch_get_hw_counter(s32 clock_mode) +static __always_inline u64 __arch_get_hw_counter(s32 clock_mode, + const struct vdso_data *vd) { u64 res; diff --git a/arch/arm64/include/asm/vdso/gettimeofday.h b/arch/arm64/include/asm/vdso/gettimeofday.h index 9c29ad3049f8..631ab1281633 100644 --- a/arch/arm64/include/asm/vdso/gettimeofday.h +++ b/arch/arm64/include/asm/vdso/gettimeofday.h @@ -64,7 +64,8 @@ int clock_getres_fallback(clockid_t _clkid, struct __kernel_timespec *_ts) return ret; } -static __always_inline u64 __arch_get_hw_counter(s32 clock_mode) +static __always_inline u64 __arch_get_hw_counter(s32 clock_mode, + const struct vdso_data *vd) { u64 res; diff --git a/arch/arm64/include/asm/virt.h b/arch/arm64/include/asm/virt.h index 5051b388c654..09977acc007d 100644 --- a/arch/arm64/include/asm/virt.h +++ b/arch/arm64/include/asm/virt.h @@ -85,10 +85,17 @@ static inline bool is_kernel_in_hyp_mode(void) static __always_inline bool has_vhe(void) { - if (cpus_have_final_cap(ARM64_HAS_VIRT_HOST_EXTN)) + /* + * The following macros are defined for code specic to VHE/nVHE. + * If has_vhe() is inlined into those compilation units, it can + * be determined statically. Otherwise fall back to caps. + */ + if (__is_defined(__KVM_VHE_HYPERVISOR__)) return true; - - return false; + else if (__is_defined(__KVM_NVHE_HYPERVISOR__)) + return false; + else + return cpus_have_final_cap(ARM64_HAS_VIRT_HOST_EXTN); } #endif /* __ASSEMBLY__ */ diff --git a/arch/arm64/kernel/asm-offsets.c b/arch/arm64/kernel/asm-offsets.c index 0577e2142284..7d32fc959b1a 100644 --- a/arch/arm64/kernel/asm-offsets.c +++ b/arch/arm64/kernel/asm-offsets.c @@ -102,13 +102,12 @@ int main(void) DEFINE(VCPU_FAULT_DISR, offsetof(struct kvm_vcpu, arch.fault.disr_el1)); DEFINE(VCPU_WORKAROUND_FLAGS, offsetof(struct kvm_vcpu, arch.workaround_flags)); DEFINE(VCPU_HCR_EL2, offsetof(struct kvm_vcpu, arch.hcr_el2)); - DEFINE(CPU_GP_REGS, offsetof(struct kvm_cpu_context, gp_regs)); + DEFINE(CPU_USER_PT_REGS, offsetof(struct kvm_cpu_context, regs)); DEFINE(CPU_APIAKEYLO_EL1, offsetof(struct kvm_cpu_context, sys_regs[APIAKEYLO_EL1])); DEFINE(CPU_APIBKEYLO_EL1, offsetof(struct kvm_cpu_context, sys_regs[APIBKEYLO_EL1])); DEFINE(CPU_APDAKEYLO_EL1, offsetof(struct kvm_cpu_context, sys_regs[APDAKEYLO_EL1])); DEFINE(CPU_APDBKEYLO_EL1, offsetof(struct kvm_cpu_context, sys_regs[APDBKEYLO_EL1])); DEFINE(CPU_APGAKEYLO_EL1, offsetof(struct kvm_cpu_context, sys_regs[APGAKEYLO_EL1])); - DEFINE(CPU_USER_PT_REGS, offsetof(struct kvm_regs, regs)); DEFINE(HOST_CONTEXT_VCPU, offsetof(struct kvm_cpu_context, __hyp_running_vcpu)); DEFINE(HOST_DATA_CONTEXT, offsetof(struct kvm_host_data, host_ctxt)); #endif diff --git a/arch/arm64/kernel/cpu_errata.c b/arch/arm64/kernel/cpu_errata.c index 79728bfb5351..6bd1d3ad037a 100644 --- a/arch/arm64/kernel/cpu_errata.c +++ b/arch/arm64/kernel/cpu_errata.c @@ -632,7 +632,7 @@ has_neoverse_n1_erratum_1542419(const struct arm64_cpu_capabilities *entry, return is_midr_in_range(midr, &range) && has_dic; } -#if defined(CONFIG_HARDEN_EL2_VECTORS) +#ifdef CONFIG_RANDOMIZE_BASE static const struct midr_range ca57_a72[] = { MIDR_ALL_VERSIONS(MIDR_CORTEX_A57), @@ -891,7 +891,7 @@ const struct arm64_cpu_capabilities arm64_errata[] = { .type = ARM64_CPUCAP_LOCAL_CPU_ERRATUM, .matches = check_branch_predictor, }, -#ifdef CONFIG_HARDEN_EL2_VECTORS +#ifdef CONFIG_RANDOMIZE_BASE { .desc = "EL2 vector hardening", .capability = ARM64_HARDEN_EL2_VECTORS, diff --git a/arch/arm64/kernel/image-vars.h b/arch/arm64/kernel/image-vars.h index be0a63ffed23..9e897c500237 100644 --- a/arch/arm64/kernel/image-vars.h +++ b/arch/arm64/kernel/image-vars.h @@ -51,4 +51,58 @@ __efistub__ctype = _ctype; #endif +#ifdef CONFIG_KVM + +/* + * KVM nVHE code has its own symbol namespace prefixed with __kvm_nvhe_, to + * separate it from the kernel proper. The following symbols are legally + * accessed by it, therefore provide aliases to make them linkable. + * Do not include symbols which may not be safely accessed under hypervisor + * memory mappings. + */ + +#define KVM_NVHE_ALIAS(sym) __kvm_nvhe_##sym = sym; + +/* Alternative callbacks for init-time patching of nVHE hyp code. */ +KVM_NVHE_ALIAS(arm64_enable_wa2_handling); +KVM_NVHE_ALIAS(kvm_patch_vector_branch); +KVM_NVHE_ALIAS(kvm_update_va_mask); + +/* Global kernel state accessed by nVHE hyp code. */ +KVM_NVHE_ALIAS(arm64_ssbd_callback_required); +KVM_NVHE_ALIAS(kvm_host_data); +KVM_NVHE_ALIAS(kvm_vgic_global_state); + +/* Kernel constant needed to compute idmap addresses. */ +KVM_NVHE_ALIAS(kimage_voffset); + +/* Kernel symbols used to call panic() from nVHE hyp code (via ERET). */ +KVM_NVHE_ALIAS(__hyp_panic_string); +KVM_NVHE_ALIAS(panic); + +/* Vectors installed by hyp-init on reset HVC. */ +KVM_NVHE_ALIAS(__hyp_stub_vectors); + +/* IDMAP TCR_EL1.T0SZ as computed by the EL1 init code */ +KVM_NVHE_ALIAS(idmap_t0sz); + +/* Kernel symbol used by icache_is_vpipt(). */ +KVM_NVHE_ALIAS(__icache_flags); + +/* Kernel symbols needed for cpus_have_final/const_caps checks. */ +KVM_NVHE_ALIAS(arm64_const_caps_ready); +KVM_NVHE_ALIAS(cpu_hwcap_keys); +KVM_NVHE_ALIAS(cpu_hwcaps); + +/* Static keys which are set if a vGIC trap should be handled in hyp. */ +KVM_NVHE_ALIAS(vgic_v2_cpuif_trap); +KVM_NVHE_ALIAS(vgic_v3_cpuif_trap); + +/* Static key checked in pmr_sync(). */ +#ifdef CONFIG_ARM64_PSEUDO_NMI +KVM_NVHE_ALIAS(gic_pmr_sync); +#endif + +#endif /* CONFIG_KVM */ + #endif /* __ARM64_KERNEL_IMAGE_VARS_H */ diff --git a/arch/arm64/kernel/sdei.c b/arch/arm64/kernel/sdei.c index dab88260b137..7689f2031c0c 100644 --- a/arch/arm64/kernel/sdei.c +++ b/arch/arm64/kernel/sdei.c @@ -180,7 +180,7 @@ static __kprobes unsigned long _sdei_handler(struct pt_regs *regs, /* * We didn't take an exception to get here, set PAN. UAO will be cleared - * by sdei_event_handler()s set_fs(USER_DS) call. + * by sdei_event_handler()s force_uaccess_begin() call. */ __uaccess_enable_hw_pan(); diff --git a/arch/arm64/kvm/Kconfig b/arch/arm64/kvm/Kconfig index 13489aff4440..318c8f2df245 100644 --- a/arch/arm64/kvm/Kconfig +++ b/arch/arm64/kvm/Kconfig @@ -58,7 +58,7 @@ config KVM_ARM_PMU virtual machines. config KVM_INDIRECT_VECTORS - def_bool HARDEN_BRANCH_PREDICTOR || HARDEN_EL2_VECTORS + def_bool HARDEN_BRANCH_PREDICTOR || RANDOMIZE_BASE endif # KVM diff --git a/arch/arm64/kvm/Makefile b/arch/arm64/kvm/Makefile index 8d3d9513cbfe..99977c1972cc 100644 --- a/arch/arm64/kvm/Makefile +++ b/arch/arm64/kvm/Makefile @@ -13,8 +13,8 @@ obj-$(CONFIG_KVM) += hyp/ kvm-y := $(KVM)/kvm_main.o $(KVM)/coalesced_mmio.o $(KVM)/eventfd.o \ $(KVM)/vfio.o $(KVM)/irqchip.o \ arm.o mmu.o mmio.o psci.o perf.o hypercalls.o pvtime.o \ - inject_fault.o regmap.o va_layout.o hyp.o hyp-init.o handle_exit.o \ - guest.o debug.o reset.o sys_regs.o sys_regs_generic_v8.o \ + inject_fault.o regmap.o va_layout.o hyp.o handle_exit.o \ + guest.o debug.o reset.o sys_regs.o \ vgic-sys-reg-v3.o fpsimd.o pmu.o \ aarch32.o arch_timer.o \ vgic/vgic.o vgic/vgic-init.o \ diff --git a/arch/arm64/kvm/arch_timer.c b/arch/arm64/kvm/arch_timer.c index a1fe0ea3254e..32ba6fbc3814 100644 --- a/arch/arm64/kvm/arch_timer.c +++ b/arch/arm64/kvm/arch_timer.c @@ -51,6 +51,93 @@ static u64 kvm_arm_timer_read(struct kvm_vcpu *vcpu, struct arch_timer_context *timer, enum kvm_arch_timer_regs treg); +u32 timer_get_ctl(struct arch_timer_context *ctxt) +{ + struct kvm_vcpu *vcpu = ctxt->vcpu; + + switch(arch_timer_ctx_index(ctxt)) { + case TIMER_VTIMER: + return __vcpu_sys_reg(vcpu, CNTV_CTL_EL0); + case TIMER_PTIMER: + return __vcpu_sys_reg(vcpu, CNTP_CTL_EL0); + default: + WARN_ON(1); + return 0; + } +} + +u64 timer_get_cval(struct arch_timer_context *ctxt) +{ + struct kvm_vcpu *vcpu = ctxt->vcpu; + + switch(arch_timer_ctx_index(ctxt)) { + case TIMER_VTIMER: + return __vcpu_sys_reg(vcpu, CNTV_CVAL_EL0); + case TIMER_PTIMER: + return __vcpu_sys_reg(vcpu, CNTP_CVAL_EL0); + default: + WARN_ON(1); + return 0; + } +} + +static u64 timer_get_offset(struct arch_timer_context *ctxt) +{ + struct kvm_vcpu *vcpu = ctxt->vcpu; + + switch(arch_timer_ctx_index(ctxt)) { + case TIMER_VTIMER: + return __vcpu_sys_reg(vcpu, CNTVOFF_EL2); + default: + return 0; + } +} + +static void timer_set_ctl(struct arch_timer_context *ctxt, u32 ctl) +{ + struct kvm_vcpu *vcpu = ctxt->vcpu; + + switch(arch_timer_ctx_index(ctxt)) { + case TIMER_VTIMER: + __vcpu_sys_reg(vcpu, CNTV_CTL_EL0) = ctl; + break; + case TIMER_PTIMER: + __vcpu_sys_reg(vcpu, CNTP_CTL_EL0) = ctl; + break; + default: + WARN_ON(1); + } +} + +static void timer_set_cval(struct arch_timer_context *ctxt, u64 cval) +{ + struct kvm_vcpu *vcpu = ctxt->vcpu; + + switch(arch_timer_ctx_index(ctxt)) { + case TIMER_VTIMER: + __vcpu_sys_reg(vcpu, CNTV_CVAL_EL0) = cval; + break; + case TIMER_PTIMER: + __vcpu_sys_reg(vcpu, CNTP_CVAL_EL0) = cval; + break; + default: + WARN_ON(1); + } +} + +static void timer_set_offset(struct arch_timer_context *ctxt, u64 offset) +{ + struct kvm_vcpu *vcpu = ctxt->vcpu; + + switch(arch_timer_ctx_index(ctxt)) { + case TIMER_VTIMER: + __vcpu_sys_reg(vcpu, CNTVOFF_EL2) = offset; + break; + default: + WARN(offset, "timer %ld\n", arch_timer_ctx_index(ctxt)); + } +} + u64 kvm_phys_timer_read(void) { return timecounter->cc->read(timecounter->cc); @@ -124,8 +211,8 @@ static u64 kvm_timer_compute_delta(struct arch_timer_context *timer_ctx) { u64 cval, now; - cval = timer_ctx->cnt_cval; - now = kvm_phys_timer_read() - timer_ctx->cntvoff; + cval = timer_get_cval(timer_ctx); + now = kvm_phys_timer_read() - timer_get_offset(timer_ctx); if (now < cval) { u64 ns; @@ -144,8 +231,8 @@ static bool kvm_timer_irq_can_fire(struct arch_timer_context *timer_ctx) { WARN_ON(timer_ctx && timer_ctx->loaded); return timer_ctx && - !(timer_ctx->cnt_ctl & ARCH_TIMER_CTRL_IT_MASK) && - (timer_ctx->cnt_ctl & ARCH_TIMER_CTRL_ENABLE); + ((timer_get_ctl(timer_ctx) & + (ARCH_TIMER_CTRL_IT_MASK | ARCH_TIMER_CTRL_ENABLE)) == ARCH_TIMER_CTRL_ENABLE); } /* @@ -256,8 +343,8 @@ static bool kvm_timer_should_fire(struct arch_timer_context *timer_ctx) if (!kvm_timer_irq_can_fire(timer_ctx)) return false; - cval = timer_ctx->cnt_cval; - now = kvm_phys_timer_read() - timer_ctx->cntvoff; + cval = timer_get_cval(timer_ctx); + now = kvm_phys_timer_read() - timer_get_offset(timer_ctx); return cval <= now; } @@ -350,8 +437,8 @@ static void timer_save_state(struct arch_timer_context *ctx) switch (index) { case TIMER_VTIMER: - ctx->cnt_ctl = read_sysreg_el0(SYS_CNTV_CTL); - ctx->cnt_cval = read_sysreg_el0(SYS_CNTV_CVAL); + timer_set_ctl(ctx, read_sysreg_el0(SYS_CNTV_CTL)); + timer_set_cval(ctx, read_sysreg_el0(SYS_CNTV_CVAL)); /* Disable the timer */ write_sysreg_el0(0, SYS_CNTV_CTL); @@ -359,8 +446,8 @@ static void timer_save_state(struct arch_timer_context *ctx) break; case TIMER_PTIMER: - ctx->cnt_ctl = read_sysreg_el0(SYS_CNTP_CTL); - ctx->cnt_cval = read_sysreg_el0(SYS_CNTP_CVAL); + timer_set_ctl(ctx, read_sysreg_el0(SYS_CNTP_CTL)); + timer_set_cval(ctx, read_sysreg_el0(SYS_CNTP_CVAL)); /* Disable the timer */ write_sysreg_el0(0, SYS_CNTP_CTL); @@ -429,14 +516,14 @@ static void timer_restore_state(struct arch_timer_context *ctx) switch (index) { case TIMER_VTIMER: - write_sysreg_el0(ctx->cnt_cval, SYS_CNTV_CVAL); + write_sysreg_el0(timer_get_cval(ctx), SYS_CNTV_CVAL); isb(); - write_sysreg_el0(ctx->cnt_ctl, SYS_CNTV_CTL); + write_sysreg_el0(timer_get_ctl(ctx), SYS_CNTV_CTL); break; case TIMER_PTIMER: - write_sysreg_el0(ctx->cnt_cval, SYS_CNTP_CVAL); + write_sysreg_el0(timer_get_cval(ctx), SYS_CNTP_CVAL); isb(); - write_sysreg_el0(ctx->cnt_ctl, SYS_CNTP_CTL); + write_sysreg_el0(timer_get_ctl(ctx), SYS_CNTP_CTL); break; case NR_KVM_TIMERS: BUG(); @@ -528,7 +615,7 @@ void kvm_timer_vcpu_load(struct kvm_vcpu *vcpu) kvm_timer_vcpu_load_nogic(vcpu); } - set_cntvoff(map.direct_vtimer->cntvoff); + set_cntvoff(timer_get_offset(map.direct_vtimer)); kvm_timer_unblocking(vcpu); @@ -615,7 +702,7 @@ static void unmask_vtimer_irq_user(struct kvm_vcpu *vcpu) } } -void kvm_timer_sync_hwstate(struct kvm_vcpu *vcpu) +void kvm_timer_sync_user(struct kvm_vcpu *vcpu) { struct arch_timer_cpu *timer = vcpu_timer(vcpu); @@ -639,8 +726,8 @@ int kvm_timer_vcpu_reset(struct kvm_vcpu *vcpu) * resets the timer to be disabled and unmasked and is compliant with * the ARMv7 architecture. */ - vcpu_vtimer(vcpu)->cnt_ctl = 0; - vcpu_ptimer(vcpu)->cnt_ctl = 0; + timer_set_ctl(vcpu_vtimer(vcpu), 0); + timer_set_ctl(vcpu_ptimer(vcpu), 0); if (timer->enabled) { kvm_timer_update_irq(vcpu, false, vcpu_vtimer(vcpu)); @@ -668,13 +755,13 @@ static void update_vtimer_cntvoff(struct kvm_vcpu *vcpu, u64 cntvoff) mutex_lock(&kvm->lock); kvm_for_each_vcpu(i, tmp, kvm) - vcpu_vtimer(tmp)->cntvoff = cntvoff; + timer_set_offset(vcpu_vtimer(tmp), cntvoff); /* * When called from the vcpu create path, the CPU being created is not * included in the loop above, so we just set it here as well. */ - vcpu_vtimer(vcpu)->cntvoff = cntvoff; + timer_set_offset(vcpu_vtimer(vcpu), cntvoff); mutex_unlock(&kvm->lock); } @@ -684,9 +771,12 @@ void kvm_timer_vcpu_init(struct kvm_vcpu *vcpu) struct arch_timer_context *vtimer = vcpu_vtimer(vcpu); struct arch_timer_context *ptimer = vcpu_ptimer(vcpu); + vtimer->vcpu = vcpu; + ptimer->vcpu = vcpu; + /* Synchronize cntvoff across all vtimers of a VM. */ update_vtimer_cntvoff(vcpu, kvm_phys_timer_read()); - ptimer->cntvoff = 0; + timer_set_offset(ptimer, 0); hrtimer_init(&timer->bg_timer, CLOCK_MONOTONIC, HRTIMER_MODE_ABS_HARD); timer->bg_timer.function = kvm_bg_timer_expire; @@ -704,9 +794,6 @@ void kvm_timer_vcpu_init(struct kvm_vcpu *vcpu) vtimer->host_timer_irq_flags = host_vtimer_irq_flags; ptimer->host_timer_irq_flags = host_ptimer_irq_flags; - - vtimer->vcpu = vcpu; - ptimer->vcpu = vcpu; } static void kvm_timer_init_interrupt(void *info) @@ -756,10 +843,12 @@ static u64 read_timer_ctl(struct arch_timer_context *timer) * UNKNOWN when ENABLE bit is 0, so we chose to set ISTATUS bit * regardless of ENABLE bit for our implementation convenience. */ + u32 ctl = timer_get_ctl(timer); + if (!kvm_timer_compute_delta(timer)) - return timer->cnt_ctl | ARCH_TIMER_CTRL_IT_STAT; - else - return timer->cnt_ctl; + ctl |= ARCH_TIMER_CTRL_IT_STAT; + + return ctl; } u64 kvm_arm_timer_get_reg(struct kvm_vcpu *vcpu, u64 regid) @@ -795,8 +884,8 @@ static u64 kvm_arm_timer_read(struct kvm_vcpu *vcpu, switch (treg) { case TIMER_REG_TVAL: - val = timer->cnt_cval - kvm_phys_timer_read() + timer->cntvoff; - val &= lower_32_bits(val); + val = timer_get_cval(timer) - kvm_phys_timer_read() + timer_get_offset(timer); + val = lower_32_bits(val); break; case TIMER_REG_CTL: @@ -804,11 +893,11 @@ static u64 kvm_arm_timer_read(struct kvm_vcpu *vcpu, break; case TIMER_REG_CVAL: - val = timer->cnt_cval; + val = timer_get_cval(timer); break; case TIMER_REG_CNT: - val = kvm_phys_timer_read() - timer->cntvoff; + val = kvm_phys_timer_read() - timer_get_offset(timer); break; default: @@ -842,15 +931,15 @@ static void kvm_arm_timer_write(struct kvm_vcpu *vcpu, { switch (treg) { case TIMER_REG_TVAL: - timer->cnt_cval = kvm_phys_timer_read() - timer->cntvoff + (s32)val; + timer_set_cval(timer, kvm_phys_timer_read() - timer_get_offset(timer) + (s32)val); break; case TIMER_REG_CTL: - timer->cnt_ctl = val & ~ARCH_TIMER_CTRL_IT_STAT; + timer_set_ctl(timer, val & ~ARCH_TIMER_CTRL_IT_STAT); break; case TIMER_REG_CVAL: - timer->cnt_cval = val; + timer_set_cval(timer, val); break; default: diff --git a/arch/arm64/kvm/arm.c b/arch/arm64/kvm/arm.c index 73e12869afe3..691d21e4c717 100644 --- a/arch/arm64/kvm/arm.c +++ b/arch/arm64/kvm/arm.c @@ -106,22 +106,15 @@ static int kvm_arm_default_max_vcpus(void) */ int kvm_arch_init_vm(struct kvm *kvm, unsigned long type) { - int ret, cpu; + int ret; ret = kvm_arm_setup_stage2(kvm, type); if (ret) return ret; - kvm->arch.last_vcpu_ran = alloc_percpu(typeof(*kvm->arch.last_vcpu_ran)); - if (!kvm->arch.last_vcpu_ran) - return -ENOMEM; - - for_each_possible_cpu(cpu) - *per_cpu_ptr(kvm->arch.last_vcpu_ran, cpu) = -1; - - ret = kvm_alloc_stage2_pgd(kvm); + ret = kvm_init_stage2_mmu(kvm, &kvm->arch.mmu); if (ret) - goto out_fail_alloc; + return ret; ret = create_hyp_mappings(kvm, kvm + 1, PAGE_HYP); if (ret) @@ -129,18 +122,12 @@ int kvm_arch_init_vm(struct kvm *kvm, unsigned long type) kvm_vgic_early_init(kvm); - /* Mark the initial VMID generation invalid */ - kvm->arch.vmid.vmid_gen = 0; - /* The maximum number of VCPUs is limited by the host's GIC model */ kvm->arch.max_vcpus = kvm_arm_default_max_vcpus(); return ret; out_free_stage2_pgd: - kvm_free_stage2_pgd(kvm); -out_fail_alloc: - free_percpu(kvm->arch.last_vcpu_ran); - kvm->arch.last_vcpu_ran = NULL; + kvm_free_stage2_pgd(&kvm->arch.mmu); return ret; } @@ -160,9 +147,6 @@ void kvm_arch_destroy_vm(struct kvm *kvm) kvm_vgic_destroy(kvm); - free_percpu(kvm->arch.last_vcpu_ran); - kvm->arch.last_vcpu_ran = NULL; - for (i = 0; i < KVM_MAX_VCPUS; ++i) { if (kvm->vcpus[i]) { kvm_vcpu_destroy(kvm->vcpus[i]); @@ -281,6 +265,8 @@ int kvm_arch_vcpu_create(struct kvm_vcpu *vcpu) kvm_arm_pvtime_vcpu_init(&vcpu->arch); + vcpu->arch.hw_mmu = &vcpu->kvm->arch.mmu; + err = kvm_vgic_vcpu_init(vcpu); if (err) return err; @@ -336,16 +322,18 @@ void kvm_arch_vcpu_unblocking(struct kvm_vcpu *vcpu) void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu) { + struct kvm_s2_mmu *mmu; int *last_ran; - last_ran = this_cpu_ptr(vcpu->kvm->arch.last_vcpu_ran); + mmu = vcpu->arch.hw_mmu; + last_ran = this_cpu_ptr(mmu->last_vcpu_ran); /* * We might get preempted before the vCPU actually runs, but * over-invalidation doesn't affect correctness. */ if (*last_ran != vcpu->vcpu_id) { - kvm_call_hyp(__kvm_tlb_flush_local_vmid, vcpu); + kvm_call_hyp(__kvm_tlb_flush_local_vmid, mmu); *last_ran = vcpu->vcpu_id; } @@ -353,7 +341,8 @@ void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu) kvm_vgic_load(vcpu); kvm_timer_vcpu_load(vcpu); - kvm_vcpu_load_sysregs(vcpu); + if (has_vhe()) + kvm_vcpu_load_sysregs_vhe(vcpu); kvm_arch_vcpu_load_fp(vcpu); kvm_vcpu_pmu_restore_guest(vcpu); if (kvm_arm_is_pvtime_enabled(&vcpu->arch)) @@ -371,7 +360,8 @@ void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu) void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu) { kvm_arch_vcpu_put_fp(vcpu); - kvm_vcpu_put_sysregs(vcpu); + if (has_vhe()) + kvm_vcpu_put_sysregs_vhe(vcpu); kvm_timer_vcpu_put(vcpu); kvm_vgic_put(vcpu); kvm_vcpu_pmu_restore_host(vcpu); @@ -468,7 +458,6 @@ static bool need_new_vmid_gen(struct kvm_vmid *vmid) /** * update_vmid - Update the vmid with a valid VMID for the current generation - * @kvm: The guest that struct vmid belongs to * @vmid: The stage-2 VMID information struct */ static void update_vmid(struct kvm_vmid *vmid) @@ -680,7 +669,7 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu) */ cond_resched(); - update_vmid(&vcpu->kvm->arch.vmid); + update_vmid(&vcpu->arch.hw_mmu->vmid); check_vcpu_requests(vcpu); @@ -729,13 +718,13 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu) */ smp_store_mb(vcpu->mode, IN_GUEST_MODE); - if (ret <= 0 || need_new_vmid_gen(&vcpu->kvm->arch.vmid) || + if (ret <= 0 || need_new_vmid_gen(&vcpu->arch.hw_mmu->vmid) || kvm_request_pending(vcpu)) { vcpu->mode = OUTSIDE_GUEST_MODE; isb(); /* Ensure work in x_flush_hwstate is committed */ kvm_pmu_sync_hwstate(vcpu); if (static_branch_unlikely(&userspace_irqchip_in_use)) - kvm_timer_sync_hwstate(vcpu); + kvm_timer_sync_user(vcpu); kvm_vgic_sync_hwstate(vcpu); local_irq_enable(); preempt_enable(); @@ -750,11 +739,7 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu) trace_kvm_entry(*vcpu_pc(vcpu)); guest_enter_irqoff(); - if (has_vhe()) { - ret = kvm_vcpu_run_vhe(vcpu); - } else { - ret = kvm_call_hyp_ret(__kvm_vcpu_run_nvhe, vcpu); - } + ret = kvm_call_hyp_ret(__kvm_vcpu_run, vcpu); vcpu->mode = OUTSIDE_GUEST_MODE; vcpu->stat.exits++; @@ -784,7 +769,7 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu) * timer virtual interrupt state. */ if (static_branch_unlikely(&userspace_irqchip_in_use)) - kvm_timer_sync_hwstate(vcpu); + kvm_timer_sync_user(vcpu); kvm_arch_vcpu_ctxsync_fp(vcpu); @@ -1287,7 +1272,7 @@ static void cpu_init_hyp_mode(void) * so that we can use adr_l to access per-cpu variables in EL2. */ tpidr_el2 = ((unsigned long)this_cpu_ptr(&kvm_host_data) - - (unsigned long)kvm_ksym_ref(kvm_host_data)); + (unsigned long)kvm_ksym_ref(&kvm_host_data)); pgd_ptr = kvm_mmu_get_httbr(); hyp_stack_ptr = __this_cpu_read(kvm_arm_hyp_stack_page) + PAGE_SIZE; @@ -1308,7 +1293,7 @@ static void cpu_init_hyp_mode(void) */ if (this_cpu_has_cap(ARM64_SSBS) && arm64_get_ssbd_state() == ARM64_SSBD_FORCE_DISABLE) { - kvm_call_hyp(__kvm_enable_ssbs); + kvm_call_hyp_nvhe(__kvm_enable_ssbs); } } diff --git a/arch/arm64/kvm/fpsimd.c b/arch/arm64/kvm/fpsimd.c index e329a36b2bee..3e081d556e81 100644 --- a/arch/arm64/kvm/fpsimd.c +++ b/arch/arm64/kvm/fpsimd.c @@ -85,7 +85,7 @@ void kvm_arch_vcpu_ctxsync_fp(struct kvm_vcpu *vcpu) WARN_ON_ONCE(!irqs_disabled()); if (vcpu->arch.flags & KVM_ARM64_FP_ENABLED) { - fpsimd_bind_state_to_cpu(&vcpu->arch.ctxt.gp_regs.fp_regs, + fpsimd_bind_state_to_cpu(&vcpu->arch.ctxt.fp_regs, vcpu->arch.sve_state, vcpu->arch.sve_max_vl); @@ -109,12 +109,10 @@ void kvm_arch_vcpu_put_fp(struct kvm_vcpu *vcpu) local_irq_save(flags); if (vcpu->arch.flags & KVM_ARM64_FP_ENABLED) { - u64 *guest_zcr = &vcpu->arch.ctxt.sys_regs[ZCR_EL1]; - fpsimd_save_and_flush_cpu_state(); if (guest_has_sve) - *guest_zcr = read_sysreg_s(SYS_ZCR_EL12); + __vcpu_sys_reg(vcpu, ZCR_EL1) = read_sysreg_s(SYS_ZCR_EL12); } else if (host_has_sve) { /* * The FPSIMD/SVE state in the CPU has not been touched, and we diff --git a/arch/arm64/kvm/guest.c b/arch/arm64/kvm/guest.c index aea43ec60f37..dfb5218137ca 100644 --- a/arch/arm64/kvm/guest.c +++ b/arch/arm64/kvm/guest.c @@ -101,19 +101,69 @@ static int core_reg_size_from_offset(const struct kvm_vcpu *vcpu, u64 off) return size; } -static int validate_core_offset(const struct kvm_vcpu *vcpu, - const struct kvm_one_reg *reg) +static void *core_reg_addr(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg) { u64 off = core_reg_offset_from_id(reg->id); int size = core_reg_size_from_offset(vcpu, off); if (size < 0) - return -EINVAL; + return NULL; if (KVM_REG_SIZE(reg->id) != size) - return -EINVAL; + return NULL; - return 0; + switch (off) { + case KVM_REG_ARM_CORE_REG(regs.regs[0]) ... + KVM_REG_ARM_CORE_REG(regs.regs[30]): + off -= KVM_REG_ARM_CORE_REG(regs.regs[0]); + off /= 2; + return &vcpu->arch.ctxt.regs.regs[off]; + + case KVM_REG_ARM_CORE_REG(regs.sp): + return &vcpu->arch.ctxt.regs.sp; + + case KVM_REG_ARM_CORE_REG(regs.pc): + return &vcpu->arch.ctxt.regs.pc; + + case KVM_REG_ARM_CORE_REG(regs.pstate): + return &vcpu->arch.ctxt.regs.pstate; + + case KVM_REG_ARM_CORE_REG(sp_el1): + return __ctxt_sys_reg(&vcpu->arch.ctxt, SP_EL1); + + case KVM_REG_ARM_CORE_REG(elr_el1): + return __ctxt_sys_reg(&vcpu->arch.ctxt, ELR_EL1); + + case KVM_REG_ARM_CORE_REG(spsr[KVM_SPSR_EL1]): + return __ctxt_sys_reg(&vcpu->arch.ctxt, SPSR_EL1); + + case KVM_REG_ARM_CORE_REG(spsr[KVM_SPSR_ABT]): + return &vcpu->arch.ctxt.spsr_abt; + + case KVM_REG_ARM_CORE_REG(spsr[KVM_SPSR_UND]): + return &vcpu->arch.ctxt.spsr_und; + + case KVM_REG_ARM_CORE_REG(spsr[KVM_SPSR_IRQ]): + return &vcpu->arch.ctxt.spsr_irq; + + case KVM_REG_ARM_CORE_REG(spsr[KVM_SPSR_FIQ]): + return &vcpu->arch.ctxt.spsr_fiq; + + case KVM_REG_ARM_CORE_REG(fp_regs.vregs[0]) ... + KVM_REG_ARM_CORE_REG(fp_regs.vregs[31]): + off -= KVM_REG_ARM_CORE_REG(fp_regs.vregs[0]); + off /= 4; + return &vcpu->arch.ctxt.fp_regs.vregs[off]; + + case KVM_REG_ARM_CORE_REG(fp_regs.fpsr): + return &vcpu->arch.ctxt.fp_regs.fpsr; + + case KVM_REG_ARM_CORE_REG(fp_regs.fpcr): + return &vcpu->arch.ctxt.fp_regs.fpcr; + + default: + return NULL; + } } static int get_core_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg) @@ -125,8 +175,8 @@ static int get_core_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg) * off the index in the "array". */ __u32 __user *uaddr = (__u32 __user *)(unsigned long)reg->addr; - struct kvm_regs *regs = vcpu_gp_regs(vcpu); - int nr_regs = sizeof(*regs) / sizeof(__u32); + int nr_regs = sizeof(struct kvm_regs) / sizeof(__u32); + void *addr; u32 off; /* Our ID is an index into the kvm_regs struct. */ @@ -135,10 +185,11 @@ static int get_core_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg) (off + (KVM_REG_SIZE(reg->id) / sizeof(__u32))) >= nr_regs) return -ENOENT; - if (validate_core_offset(vcpu, reg)) + addr = core_reg_addr(vcpu, reg); + if (!addr) return -EINVAL; - if (copy_to_user(uaddr, ((u32 *)regs) + off, KVM_REG_SIZE(reg->id))) + if (copy_to_user(uaddr, addr, KVM_REG_SIZE(reg->id))) return -EFAULT; return 0; @@ -147,10 +198,9 @@ static int get_core_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg) static int set_core_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg) { __u32 __user *uaddr = (__u32 __user *)(unsigned long)reg->addr; - struct kvm_regs *regs = vcpu_gp_regs(vcpu); - int nr_regs = sizeof(*regs) / sizeof(__u32); + int nr_regs = sizeof(struct kvm_regs) / sizeof(__u32); __uint128_t tmp; - void *valp = &tmp; + void *valp = &tmp, *addr; u64 off; int err = 0; @@ -160,7 +210,8 @@ static int set_core_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg) (off + (KVM_REG_SIZE(reg->id) / sizeof(__u32))) >= nr_regs) return -ENOENT; - if (validate_core_offset(vcpu, reg)) + addr = core_reg_addr(vcpu, reg); + if (!addr) return -EINVAL; if (KVM_REG_SIZE(reg->id) > sizeof(tmp)) @@ -198,7 +249,7 @@ static int set_core_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg) } } - memcpy((u32 *)regs + off, valp, KVM_REG_SIZE(reg->id)); + memcpy(addr, valp, KVM_REG_SIZE(reg->id)); if (*vcpu_cpsr(vcpu) & PSR_MODE32_BIT) { int i; diff --git a/arch/arm64/kvm/handle_exit.c b/arch/arm64/kvm/handle_exit.c index 1df3beafd73f..fe6c7d79309d 100644 --- a/arch/arm64/kvm/handle_exit.c +++ b/arch/arm64/kvm/handle_exit.c @@ -89,7 +89,7 @@ static int handle_no_fpsimd(struct kvm_vcpu *vcpu) */ static int kvm_handle_wfx(struct kvm_vcpu *vcpu) { - if (kvm_vcpu_get_hsr(vcpu) & ESR_ELx_WFx_ISS_WFE) { + if (kvm_vcpu_get_esr(vcpu) & ESR_ELx_WFx_ISS_WFE) { trace_kvm_wfx_arm64(*vcpu_pc(vcpu), true); vcpu->stat.wfe_exit_stat++; kvm_vcpu_on_spin(vcpu, vcpu_mode_priv(vcpu)); @@ -119,13 +119,13 @@ static int kvm_handle_wfx(struct kvm_vcpu *vcpu) static int kvm_handle_guest_debug(struct kvm_vcpu *vcpu) { struct kvm_run *run = vcpu->run; - u32 hsr = kvm_vcpu_get_hsr(vcpu); + u32 esr = kvm_vcpu_get_esr(vcpu); int ret = 0; run->exit_reason = KVM_EXIT_DEBUG; - run->debug.arch.hsr = hsr; + run->debug.arch.hsr = esr; - switch (ESR_ELx_EC(hsr)) { + switch (ESR_ELx_EC(esr)) { case ESR_ELx_EC_WATCHPT_LOW: run->debug.arch.far = vcpu->arch.fault.far_el2; /* fall through */ @@ -135,8 +135,8 @@ static int kvm_handle_guest_debug(struct kvm_vcpu *vcpu) case ESR_ELx_EC_BRK64: break; default: - kvm_err("%s: un-handled case hsr: %#08x\n", - __func__, (unsigned int) hsr); + kvm_err("%s: un-handled case esr: %#08x\n", + __func__, (unsigned int) esr); ret = -1; break; } @@ -146,10 +146,10 @@ static int kvm_handle_guest_debug(struct kvm_vcpu *vcpu) static int kvm_handle_unknown_ec(struct kvm_vcpu *vcpu) { - u32 hsr = kvm_vcpu_get_hsr(vcpu); + u32 esr = kvm_vcpu_get_esr(vcpu); - kvm_pr_unimpl("Unknown exception class: hsr: %#08x -- %s\n", - hsr, esr_get_class_string(hsr)); + kvm_pr_unimpl("Unknown exception class: esr: %#08x -- %s\n", + esr, esr_get_class_string(esr)); kvm_inject_undefined(vcpu); return 1; @@ -200,10 +200,10 @@ static exit_handle_fn arm_exit_handlers[] = { static exit_handle_fn kvm_get_exit_handler(struct kvm_vcpu *vcpu) { - u32 hsr = kvm_vcpu_get_hsr(vcpu); - u8 hsr_ec = ESR_ELx_EC(hsr); + u32 esr = kvm_vcpu_get_esr(vcpu); + u8 esr_ec = ESR_ELx_EC(esr); - return arm_exit_handlers[hsr_ec]; + return arm_exit_handlers[esr_ec]; } /* @@ -242,15 +242,15 @@ int handle_exit(struct kvm_vcpu *vcpu, int exception_index) struct kvm_run *run = vcpu->run; if (ARM_SERROR_PENDING(exception_index)) { - u8 hsr_ec = ESR_ELx_EC(kvm_vcpu_get_hsr(vcpu)); + u8 esr_ec = ESR_ELx_EC(kvm_vcpu_get_esr(vcpu)); /* * HVC/SMC already have an adjusted PC, which we need * to correct in order to return to after having * injected the SError. */ - if (hsr_ec == ESR_ELx_EC_HVC32 || hsr_ec == ESR_ELx_EC_HVC64 || - hsr_ec == ESR_ELx_EC_SMC32 || hsr_ec == ESR_ELx_EC_SMC64) { + if (esr_ec == ESR_ELx_EC_HVC32 || esr_ec == ESR_ELx_EC_HVC64 || + esr_ec == ESR_ELx_EC_SMC32 || esr_ec == ESR_ELx_EC_SMC64) { u32 adj = kvm_vcpu_trap_il_is32bit(vcpu) ? 4 : 2; *vcpu_pc(vcpu) -= adj; } @@ -307,5 +307,5 @@ void handle_exit_early(struct kvm_vcpu *vcpu, int exception_index) exception_index = ARM_EXCEPTION_CODE(exception_index); if (exception_index == ARM_EXCEPTION_EL1_SERROR) - kvm_handle_guest_serror(vcpu, kvm_vcpu_get_hsr(vcpu)); + kvm_handle_guest_serror(vcpu, kvm_vcpu_get_esr(vcpu)); } diff --git a/arch/arm64/kvm/hyp/Makefile b/arch/arm64/kvm/hyp/Makefile index 8c9880783839..f54f0e89a71c 100644 --- a/arch/arm64/kvm/hyp/Makefile +++ b/arch/arm64/kvm/hyp/Makefile @@ -3,18 +3,12 @@ # Makefile for Kernel-based Virtual Machine module, HYP part # -ccflags-y += -fno-stack-protector -DDISABLE_BRANCH_PROFILING \ - $(DISABLE_STACKLEAK_PLUGIN) - -obj-$(CONFIG_KVM) += hyp.o - -hyp-y := vgic-v3-sr.o timer-sr.o aarch32.o vgic-v2-cpuif-proxy.o sysreg-sr.o \ - debug-sr.o entry.o switch.o fpsimd.o tlb.o hyp-entry.o - -# KVM code is run at a different exception code with a different map, so -# compiler instrumentation that inserts callbacks or checks into the code may -# cause crashes. Just disable it. -GCOV_PROFILE := n -KASAN_SANITIZE := n -UBSAN_SANITIZE := n -KCOV_INSTRUMENT := n +incdir := $(srctree)/$(src)/include +subdir-asflags-y := -I$(incdir) +subdir-ccflags-y := -I$(incdir) \ + -fno-stack-protector \ + -DDISABLE_BRANCH_PROFILING \ + $(DISABLE_STACKLEAK_PLUGIN) + +obj-$(CONFIG_KVM) += vhe/ nvhe/ +obj-$(CONFIG_KVM_INDIRECT_VECTORS) += smccc_wa.o diff --git a/arch/arm64/kvm/hyp/aarch32.c b/arch/arm64/kvm/hyp/aarch32.c index 25c0e47d57cb..ae56d8a4b382 100644 --- a/arch/arm64/kvm/hyp/aarch32.c +++ b/arch/arm64/kvm/hyp/aarch32.c @@ -44,14 +44,14 @@ static const unsigned short cc_map[16] = { /* * Check if a trapped instruction should have been executed or not. */ -bool __hyp_text kvm_condition_valid32(const struct kvm_vcpu *vcpu) +bool kvm_condition_valid32(const struct kvm_vcpu *vcpu) { unsigned long cpsr; u32 cpsr_cond; int cond; /* Top two bits non-zero? Unconditional. */ - if (kvm_vcpu_get_hsr(vcpu) >> 30) + if (kvm_vcpu_get_esr(vcpu) >> 30) return true; /* Is condition field valid? */ @@ -93,7 +93,7 @@ bool __hyp_text kvm_condition_valid32(const struct kvm_vcpu *vcpu) * * IT[7:0] -> CPSR[26:25],CPSR[15:10] */ -static void __hyp_text kvm_adjust_itstate(struct kvm_vcpu *vcpu) +static void kvm_adjust_itstate(struct kvm_vcpu *vcpu) { unsigned long itbits, cond; unsigned long cpsr = *vcpu_cpsr(vcpu); @@ -123,7 +123,7 @@ static void __hyp_text kvm_adjust_itstate(struct kvm_vcpu *vcpu) * kvm_skip_instr - skip a trapped instruction and proceed to the next * @vcpu: The vcpu pointer */ -void __hyp_text kvm_skip_instr32(struct kvm_vcpu *vcpu, bool is_wide_instr) +void kvm_skip_instr32(struct kvm_vcpu *vcpu, bool is_wide_instr) { u32 pc = *vcpu_pc(vcpu); bool is_thumb; diff --git a/arch/arm64/kvm/hyp/entry.S b/arch/arm64/kvm/hyp/entry.S index 90186cf6473e..ee32a7743389 100644 --- a/arch/arm64/kvm/hyp/entry.S +++ b/arch/arm64/kvm/hyp/entry.S @@ -16,12 +16,10 @@ #include <asm/kvm_mmu.h> #include <asm/kvm_ptrauth.h> -#define CPU_GP_REG_OFFSET(x) (CPU_GP_REGS + x) -#define CPU_XREG_OFFSET(x) CPU_GP_REG_OFFSET(CPU_USER_PT_REGS + 8*x) +#define CPU_XREG_OFFSET(x) (CPU_USER_PT_REGS + 8*x) #define CPU_SP_EL0_OFFSET (CPU_XREG_OFFSET(30) + 8) .text - .pushsection .hyp.text, "ax" /* * We treat x18 as callee-saved as the host may use it as a platform diff --git a/arch/arm64/kvm/hyp/fpsimd.S b/arch/arm64/kvm/hyp/fpsimd.S index 5b8ff517ff10..01f114aa47b0 100644 --- a/arch/arm64/kvm/hyp/fpsimd.S +++ b/arch/arm64/kvm/hyp/fpsimd.S @@ -9,7 +9,6 @@ #include <asm/fpsimdmacros.h> .text - .pushsection .hyp.text, "ax" SYM_FUNC_START(__fpsimd_save_state) fpsimd_save x0, 1 diff --git a/arch/arm64/kvm/hyp/hyp-entry.S b/arch/arm64/kvm/hyp/hyp-entry.S index 9c5cfb04170e..689fccbc9de7 100644 --- a/arch/arm64/kvm/hyp/hyp-entry.S +++ b/arch/arm64/kvm/hyp/hyp-entry.S @@ -16,7 +16,6 @@ #include <asm/mmu.h> .text - .pushsection .hyp.text, "ax" .macro do_el2_call /* @@ -40,6 +39,7 @@ el1_sync: // Guest trapped into EL2 ccmp x0, #ESR_ELx_EC_HVC32, #4, ne b.ne el1_trap +#ifdef __KVM_NVHE_HYPERVISOR__ mrs x1, vttbr_el2 // If vttbr is valid, the guest cbnz x1, el1_hvc_guest // called HVC @@ -74,6 +74,7 @@ el1_sync: // Guest trapped into EL2 eret sb +#endif /* __KVM_NVHE_HYPERVISOR__ */ el1_hvc_guest: /* @@ -180,6 +181,7 @@ el2_error: eret sb +#ifdef __KVM_NVHE_HYPERVISOR__ SYM_FUNC_START(__hyp_do_panic) mov lr, #(PSR_F_BIT | PSR_I_BIT | PSR_A_BIT | PSR_D_BIT |\ PSR_MODE_EL1h) @@ -189,6 +191,7 @@ SYM_FUNC_START(__hyp_do_panic) eret sb SYM_FUNC_END(__hyp_do_panic) +#endif SYM_CODE_START(__hyp_panic) get_host_ctxt x0, x1 @@ -318,20 +321,4 @@ SYM_CODE_START(__bp_harden_hyp_vecs) 1: .org __bp_harden_hyp_vecs + __BP_HARDEN_HYP_VECS_SZ .org 1b SYM_CODE_END(__bp_harden_hyp_vecs) - - .popsection - -SYM_CODE_START(__smccc_workaround_1_smc) - esb - sub sp, sp, #(8 * 4) - stp x2, x3, [sp, #(8 * 0)] - stp x0, x1, [sp, #(8 * 2)] - mov w0, #ARM_SMCCC_ARCH_WORKAROUND_1 - smc #0 - ldp x2, x3, [sp, #(8 * 0)] - ldp x0, x1, [sp, #(8 * 2)] - add sp, sp, #(8 * 4) -1: .org __smccc_workaround_1_smc + __SMCCC_WORKAROUND_1_SMC_SZ - .org 1b -SYM_CODE_END(__smccc_workaround_1_smc) #endif diff --git a/arch/arm64/kvm/hyp/debug-sr.c b/arch/arm64/kvm/hyp/include/hyp/debug-sr.h index e95af204fec7..0297dc63988c 100644 --- a/arch/arm64/kvm/hyp/debug-sr.c +++ b/arch/arm64/kvm/hyp/include/hyp/debug-sr.h @@ -4,6 +4,9 @@ * Author: Marc Zyngier <marc.zyngier@arm.com> */ +#ifndef __ARM64_KVM_HYP_DEBUG_SR_H__ +#define __ARM64_KVM_HYP_DEBUG_SR_H__ + #include <linux/compiler.h> #include <linux/kvm_host.h> @@ -85,53 +88,8 @@ default: write_debug(ptr[0], reg, 0); \ } -static void __hyp_text __debug_save_spe_nvhe(u64 *pmscr_el1) -{ - u64 reg; - - /* Clear pmscr in case of early return */ - *pmscr_el1 = 0; - - /* SPE present on this CPU? */ - if (!cpuid_feature_extract_unsigned_field(read_sysreg(id_aa64dfr0_el1), - ID_AA64DFR0_PMSVER_SHIFT)) - return; - - /* Yes; is it owned by EL3? */ - reg = read_sysreg_s(SYS_PMBIDR_EL1); - if (reg & BIT(SYS_PMBIDR_EL1_P_SHIFT)) - return; - - /* No; is the host actually using the thing? */ - reg = read_sysreg_s(SYS_PMBLIMITR_EL1); - if (!(reg & BIT(SYS_PMBLIMITR_EL1_E_SHIFT))) - return; - - /* Yes; save the control register and disable data generation */ - *pmscr_el1 = read_sysreg_s(SYS_PMSCR_EL1); - write_sysreg_s(0, SYS_PMSCR_EL1); - isb(); - - /* Now drain all buffered data to memory */ - psb_csync(); - dsb(nsh); -} - -static void __hyp_text __debug_restore_spe_nvhe(u64 pmscr_el1) -{ - if (!pmscr_el1) - return; - - /* The host page table is installed, but not yet synchronised */ - isb(); - - /* Re-enable data generation */ - write_sysreg_s(pmscr_el1, SYS_PMSCR_EL1); -} - -static void __hyp_text __debug_save_state(struct kvm_vcpu *vcpu, - struct kvm_guest_debug_arch *dbg, - struct kvm_cpu_context *ctxt) +static void __debug_save_state(struct kvm_guest_debug_arch *dbg, + struct kvm_cpu_context *ctxt) { u64 aa64dfr0; int brps, wrps; @@ -145,12 +103,11 @@ static void __hyp_text __debug_save_state(struct kvm_vcpu *vcpu, save_debug(dbg->dbg_wcr, dbgwcr, wrps); save_debug(dbg->dbg_wvr, dbgwvr, wrps); - ctxt->sys_regs[MDCCINT_EL1] = read_sysreg(mdccint_el1); + ctxt_sys_reg(ctxt, MDCCINT_EL1) = read_sysreg(mdccint_el1); } -static void __hyp_text __debug_restore_state(struct kvm_vcpu *vcpu, - struct kvm_guest_debug_arch *dbg, - struct kvm_cpu_context *ctxt) +static void __debug_restore_state(struct kvm_guest_debug_arch *dbg, + struct kvm_cpu_context *ctxt) { u64 aa64dfr0; int brps, wrps; @@ -165,23 +122,16 @@ static void __hyp_text __debug_restore_state(struct kvm_vcpu *vcpu, restore_debug(dbg->dbg_wcr, dbgwcr, wrps); restore_debug(dbg->dbg_wvr, dbgwvr, wrps); - write_sysreg(ctxt->sys_regs[MDCCINT_EL1], mdccint_el1); + write_sysreg(ctxt_sys_reg(ctxt, MDCCINT_EL1), mdccint_el1); } -void __hyp_text __debug_switch_to_guest(struct kvm_vcpu *vcpu) +static inline void __debug_switch_to_guest_common(struct kvm_vcpu *vcpu) { struct kvm_cpu_context *host_ctxt; struct kvm_cpu_context *guest_ctxt; struct kvm_guest_debug_arch *host_dbg; struct kvm_guest_debug_arch *guest_dbg; - /* - * Non-VHE: Disable and flush SPE data generation - * VHE: The vcpu can run, but it can't hide. - */ - if (!has_vhe()) - __debug_save_spe_nvhe(&vcpu->arch.host_debug_state.pmscr_el1); - if (!(vcpu->arch.flags & KVM_ARM64_DEBUG_DIRTY)) return; @@ -190,20 +140,17 @@ void __hyp_text __debug_switch_to_guest(struct kvm_vcpu *vcpu) host_dbg = &vcpu->arch.host_debug_state.regs; guest_dbg = kern_hyp_va(vcpu->arch.debug_ptr); - __debug_save_state(vcpu, host_dbg, host_ctxt); - __debug_restore_state(vcpu, guest_dbg, guest_ctxt); + __debug_save_state(host_dbg, host_ctxt); + __debug_restore_state(guest_dbg, guest_ctxt); } -void __hyp_text __debug_switch_to_host(struct kvm_vcpu *vcpu) +static inline void __debug_switch_to_host_common(struct kvm_vcpu *vcpu) { struct kvm_cpu_context *host_ctxt; struct kvm_cpu_context *guest_ctxt; struct kvm_guest_debug_arch *host_dbg; struct kvm_guest_debug_arch *guest_dbg; - if (!has_vhe()) - __debug_restore_spe_nvhe(vcpu->arch.host_debug_state.pmscr_el1); - if (!(vcpu->arch.flags & KVM_ARM64_DEBUG_DIRTY)) return; @@ -212,13 +159,10 @@ void __hyp_text __debug_switch_to_host(struct kvm_vcpu *vcpu) host_dbg = &vcpu->arch.host_debug_state.regs; guest_dbg = kern_hyp_va(vcpu->arch.debug_ptr); - __debug_save_state(vcpu, guest_dbg, guest_ctxt); - __debug_restore_state(vcpu, host_dbg, host_ctxt); + __debug_save_state(guest_dbg, guest_ctxt); + __debug_restore_state(host_dbg, host_ctxt); vcpu->arch.flags &= ~KVM_ARM64_DEBUG_DIRTY; } -u32 __hyp_text __kvm_get_mdcr_el2(void) -{ - return read_sysreg(mdcr_el2); -} +#endif /* __ARM64_KVM_HYP_DEBUG_SR_H__ */ diff --git a/arch/arm64/kvm/hyp/include/hyp/switch.h b/arch/arm64/kvm/hyp/include/hyp/switch.h new file mode 100644 index 000000000000..426ef65601dd --- /dev/null +++ b/arch/arm64/kvm/hyp/include/hyp/switch.h @@ -0,0 +1,511 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Copyright (C) 2015 - ARM Ltd + * Author: Marc Zyngier <marc.zyngier@arm.com> + */ + +#ifndef __ARM64_KVM_HYP_SWITCH_H__ +#define __ARM64_KVM_HYP_SWITCH_H__ + +#include <linux/arm-smccc.h> +#include <linux/kvm_host.h> +#include <linux/types.h> +#include <linux/jump_label.h> +#include <uapi/linux/psci.h> + +#include <kvm/arm_psci.h> + +#include <asm/barrier.h> +#include <asm/cpufeature.h> +#include <asm/kprobes.h> +#include <asm/kvm_asm.h> +#include <asm/kvm_emulate.h> +#include <asm/kvm_hyp.h> +#include <asm/kvm_mmu.h> +#include <asm/fpsimd.h> +#include <asm/debug-monitors.h> +#include <asm/processor.h> +#include <asm/thread_info.h> + +extern const char __hyp_panic_string[]; + +/* Check whether the FP regs were dirtied while in the host-side run loop: */ +static inline bool update_fp_enabled(struct kvm_vcpu *vcpu) +{ + /* + * When the system doesn't support FP/SIMD, we cannot rely on + * the _TIF_FOREIGN_FPSTATE flag. However, we always inject an + * abort on the very first access to FP and thus we should never + * see KVM_ARM64_FP_ENABLED. For added safety, make sure we always + * trap the accesses. + */ + if (!system_supports_fpsimd() || + vcpu->arch.host_thread_info->flags & _TIF_FOREIGN_FPSTATE) + vcpu->arch.flags &= ~(KVM_ARM64_FP_ENABLED | + KVM_ARM64_FP_HOST); + + return !!(vcpu->arch.flags & KVM_ARM64_FP_ENABLED); +} + +/* Save the 32-bit only FPSIMD system register state */ +static inline void __fpsimd_save_fpexc32(struct kvm_vcpu *vcpu) +{ + if (!vcpu_el1_is_32bit(vcpu)) + return; + + __vcpu_sys_reg(vcpu, FPEXC32_EL2) = read_sysreg(fpexc32_el2); +} + +static inline void __activate_traps_fpsimd32(struct kvm_vcpu *vcpu) +{ + /* + * We are about to set CPTR_EL2.TFP to trap all floating point + * register accesses to EL2, however, the ARM ARM clearly states that + * traps are only taken to EL2 if the operation would not otherwise + * trap to EL1. Therefore, always make sure that for 32-bit guests, + * we set FPEXC.EN to prevent traps to EL1, when setting the TFP bit. + * If FP/ASIMD is not implemented, FPEXC is UNDEFINED and any access to + * it will cause an exception. + */ + if (vcpu_el1_is_32bit(vcpu) && system_supports_fpsimd()) { + write_sysreg(1 << 30, fpexc32_el2); + isb(); + } +} + +static inline void __activate_traps_common(struct kvm_vcpu *vcpu) +{ + /* Trap on AArch32 cp15 c15 (impdef sysregs) accesses (EL1 or EL0) */ + write_sysreg(1 << 15, hstr_el2); + + /* + * Make sure we trap PMU access from EL0 to EL2. Also sanitize + * PMSELR_EL0 to make sure it never contains the cycle + * counter, which could make a PMXEVCNTR_EL0 access UNDEF at + * EL1 instead of being trapped to EL2. + */ + write_sysreg(0, pmselr_el0); + write_sysreg(ARMV8_PMU_USERENR_MASK, pmuserenr_el0); + write_sysreg(vcpu->arch.mdcr_el2, mdcr_el2); +} + +static inline void __deactivate_traps_common(void) +{ + write_sysreg(0, hstr_el2); + write_sysreg(0, pmuserenr_el0); +} + +static inline void ___activate_traps(struct kvm_vcpu *vcpu) +{ + u64 hcr = vcpu->arch.hcr_el2; + + if (cpus_have_final_cap(ARM64_WORKAROUND_CAVIUM_TX2_219_TVM)) + hcr |= HCR_TVM; + + write_sysreg(hcr, hcr_el2); + + if (cpus_have_final_cap(ARM64_HAS_RAS_EXTN) && (hcr & HCR_VSE)) + write_sysreg_s(vcpu->arch.vsesr_el2, SYS_VSESR_EL2); +} + +static inline void ___deactivate_traps(struct kvm_vcpu *vcpu) +{ + /* + * If we pended a virtual abort, preserve it until it gets + * cleared. See D1.14.3 (Virtual Interrupts) for details, but + * the crucial bit is "On taking a vSError interrupt, + * HCR_EL2.VSE is cleared to 0." + */ + if (vcpu->arch.hcr_el2 & HCR_VSE) { + vcpu->arch.hcr_el2 &= ~HCR_VSE; + vcpu->arch.hcr_el2 |= read_sysreg(hcr_el2) & HCR_VSE; + } +} + +static inline void __activate_vm(struct kvm_s2_mmu *mmu) +{ + __load_guest_stage2(mmu); +} + +static inline bool __translate_far_to_hpfar(u64 far, u64 *hpfar) +{ + u64 par, tmp; + + /* + * Resolve the IPA the hard way using the guest VA. + * + * Stage-1 translation already validated the memory access + * rights. As such, we can use the EL1 translation regime, and + * don't have to distinguish between EL0 and EL1 access. + * + * We do need to save/restore PAR_EL1 though, as we haven't + * saved the guest context yet, and we may return early... + */ + par = read_sysreg(par_el1); + asm volatile("at s1e1r, %0" : : "r" (far)); + isb(); + + tmp = read_sysreg(par_el1); + write_sysreg(par, par_el1); + + if (unlikely(tmp & SYS_PAR_EL1_F)) + return false; /* Translation failed, back to guest */ + + /* Convert PAR to HPFAR format */ + *hpfar = PAR_TO_HPFAR(tmp); + return true; +} + +static inline bool __populate_fault_info(struct kvm_vcpu *vcpu) +{ + u8 ec; + u64 esr; + u64 hpfar, far; + + esr = vcpu->arch.fault.esr_el2; + ec = ESR_ELx_EC(esr); + + if (ec != ESR_ELx_EC_DABT_LOW && ec != ESR_ELx_EC_IABT_LOW) + return true; + + far = read_sysreg_el2(SYS_FAR); + + /* + * The HPFAR can be invalid if the stage 2 fault did not + * happen during a stage 1 page table walk (the ESR_EL2.S1PTW + * bit is clear) and one of the two following cases are true: + * 1. The fault was due to a permission fault + * 2. The processor carries errata 834220 + * + * Therefore, for all non S1PTW faults where we either have a + * permission fault or the errata workaround is enabled, we + * resolve the IPA using the AT instruction. + */ + if (!(esr & ESR_ELx_S1PTW) && + (cpus_have_final_cap(ARM64_WORKAROUND_834220) || + (esr & ESR_ELx_FSC_TYPE) == FSC_PERM)) { + if (!__translate_far_to_hpfar(far, &hpfar)) + return false; + } else { + hpfar = read_sysreg(hpfar_el2); + } + + vcpu->arch.fault.far_el2 = far; + vcpu->arch.fault.hpfar_el2 = hpfar; + return true; +} + +/* Check for an FPSIMD/SVE trap and handle as appropriate */ +static inline bool __hyp_handle_fpsimd(struct kvm_vcpu *vcpu) +{ + bool vhe, sve_guest, sve_host; + u8 esr_ec; + + if (!system_supports_fpsimd()) + return false; + + /* + * Currently system_supports_sve() currently implies has_vhe(), + * so the check is redundant. However, has_vhe() can be determined + * statically and helps the compiler remove dead code. + */ + if (has_vhe() && system_supports_sve()) { + sve_guest = vcpu_has_sve(vcpu); + sve_host = vcpu->arch.flags & KVM_ARM64_HOST_SVE_IN_USE; + vhe = true; + } else { + sve_guest = false; + sve_host = false; + vhe = has_vhe(); + } + + esr_ec = kvm_vcpu_trap_get_class(vcpu); + if (esr_ec != ESR_ELx_EC_FP_ASIMD && + esr_ec != ESR_ELx_EC_SVE) + return false; + + /* Don't handle SVE traps for non-SVE vcpus here: */ + if (!sve_guest) + if (esr_ec != ESR_ELx_EC_FP_ASIMD) + return false; + + /* Valid trap. Switch the context: */ + + if (vhe) { + u64 reg = read_sysreg(cpacr_el1) | CPACR_EL1_FPEN; + + if (sve_guest) + reg |= CPACR_EL1_ZEN; + + write_sysreg(reg, cpacr_el1); + } else { + write_sysreg(read_sysreg(cptr_el2) & ~(u64)CPTR_EL2_TFP, + cptr_el2); + } + + isb(); + + if (vcpu->arch.flags & KVM_ARM64_FP_HOST) { + /* + * In the SVE case, VHE is assumed: it is enforced by + * Kconfig and kvm_arch_init(). + */ + if (sve_host) { + struct thread_struct *thread = container_of( + vcpu->arch.host_fpsimd_state, + struct thread_struct, uw.fpsimd_state); + + sve_save_state(sve_pffr(thread), + &vcpu->arch.host_fpsimd_state->fpsr); + } else { + __fpsimd_save_state(vcpu->arch.host_fpsimd_state); + } + + vcpu->arch.flags &= ~KVM_ARM64_FP_HOST; + } + + if (sve_guest) { + sve_load_state(vcpu_sve_pffr(vcpu), + &vcpu->arch.ctxt.fp_regs.fpsr, + sve_vq_from_vl(vcpu->arch.sve_max_vl) - 1); + write_sysreg_s(__vcpu_sys_reg(vcpu, ZCR_EL1), SYS_ZCR_EL12); + } else { + __fpsimd_restore_state(&vcpu->arch.ctxt.fp_regs); + } + + /* Skip restoring fpexc32 for AArch64 guests */ + if (!(read_sysreg(hcr_el2) & HCR_RW)) + write_sysreg(__vcpu_sys_reg(vcpu, FPEXC32_EL2), fpexc32_el2); + + vcpu->arch.flags |= KVM_ARM64_FP_ENABLED; + + return true; +} + +static inline bool handle_tx2_tvm(struct kvm_vcpu *vcpu) +{ + u32 sysreg = esr_sys64_to_sysreg(kvm_vcpu_get_esr(vcpu)); + int rt = kvm_vcpu_sys_get_rt(vcpu); + u64 val = vcpu_get_reg(vcpu, rt); + + /* + * The normal sysreg handling code expects to see the traps, + * let's not do anything here. + */ + if (vcpu->arch.hcr_el2 & HCR_TVM) + return false; + + switch (sysreg) { + case SYS_SCTLR_EL1: + write_sysreg_el1(val, SYS_SCTLR); + break; + case SYS_TTBR0_EL1: + write_sysreg_el1(val, SYS_TTBR0); + break; + case SYS_TTBR1_EL1: + write_sysreg_el1(val, SYS_TTBR1); + break; + case SYS_TCR_EL1: + write_sysreg_el1(val, SYS_TCR); + break; + case SYS_ESR_EL1: + write_sysreg_el1(val, SYS_ESR); + break; + case SYS_FAR_EL1: + write_sysreg_el1(val, SYS_FAR); + break; + case SYS_AFSR0_EL1: + write_sysreg_el1(val, SYS_AFSR0); + break; + case SYS_AFSR1_EL1: + write_sysreg_el1(val, SYS_AFSR1); + break; + case SYS_MAIR_EL1: + write_sysreg_el1(val, SYS_MAIR); + break; + case SYS_AMAIR_EL1: + write_sysreg_el1(val, SYS_AMAIR); + break; + case SYS_CONTEXTIDR_EL1: + write_sysreg_el1(val, SYS_CONTEXTIDR); + break; + default: + return false; + } + + __kvm_skip_instr(vcpu); + return true; +} + +static inline bool esr_is_ptrauth_trap(u32 esr) +{ + u32 ec = ESR_ELx_EC(esr); + + if (ec == ESR_ELx_EC_PAC) + return true; + + if (ec != ESR_ELx_EC_SYS64) + return false; + + switch (esr_sys64_to_sysreg(esr)) { + case SYS_APIAKEYLO_EL1: + case SYS_APIAKEYHI_EL1: + case SYS_APIBKEYLO_EL1: + case SYS_APIBKEYHI_EL1: + case SYS_APDAKEYLO_EL1: + case SYS_APDAKEYHI_EL1: + case SYS_APDBKEYLO_EL1: + case SYS_APDBKEYHI_EL1: + case SYS_APGAKEYLO_EL1: + case SYS_APGAKEYHI_EL1: + return true; + } + + return false; +} + +#define __ptrauth_save_key(ctxt, key) \ + do { \ + u64 __val; \ + __val = read_sysreg_s(SYS_ ## key ## KEYLO_EL1); \ + ctxt_sys_reg(ctxt, key ## KEYLO_EL1) = __val; \ + __val = read_sysreg_s(SYS_ ## key ## KEYHI_EL1); \ + ctxt_sys_reg(ctxt, key ## KEYHI_EL1) = __val; \ +} while(0) + +static inline bool __hyp_handle_ptrauth(struct kvm_vcpu *vcpu) +{ + struct kvm_cpu_context *ctxt; + u64 val; + + if (!vcpu_has_ptrauth(vcpu) || + !esr_is_ptrauth_trap(kvm_vcpu_get_esr(vcpu))) + return false; + + ctxt = &__hyp_this_cpu_ptr(kvm_host_data)->host_ctxt; + __ptrauth_save_key(ctxt, APIA); + __ptrauth_save_key(ctxt, APIB); + __ptrauth_save_key(ctxt, APDA); + __ptrauth_save_key(ctxt, APDB); + __ptrauth_save_key(ctxt, APGA); + + vcpu_ptrauth_enable(vcpu); + + val = read_sysreg(hcr_el2); + val |= (HCR_API | HCR_APK); + write_sysreg(val, hcr_el2); + + return true; +} + +/* + * Return true when we were able to fixup the guest exit and should return to + * the guest, false when we should restore the host state and return to the + * main run loop. + */ +static inline bool fixup_guest_exit(struct kvm_vcpu *vcpu, u64 *exit_code) +{ + if (ARM_EXCEPTION_CODE(*exit_code) != ARM_EXCEPTION_IRQ) + vcpu->arch.fault.esr_el2 = read_sysreg_el2(SYS_ESR); + + /* + * We're using the raw exception code in order to only process + * the trap if no SError is pending. We will come back to the + * same PC once the SError has been injected, and replay the + * trapping instruction. + */ + if (*exit_code != ARM_EXCEPTION_TRAP) + goto exit; + + if (cpus_have_final_cap(ARM64_WORKAROUND_CAVIUM_TX2_219_TVM) && + kvm_vcpu_trap_get_class(vcpu) == ESR_ELx_EC_SYS64 && + handle_tx2_tvm(vcpu)) + return true; + + /* + * We trap the first access to the FP/SIMD to save the host context + * and restore the guest context lazily. + * If FP/SIMD is not implemented, handle the trap and inject an + * undefined instruction exception to the guest. + * Similarly for trapped SVE accesses. + */ + if (__hyp_handle_fpsimd(vcpu)) + return true; + + if (__hyp_handle_ptrauth(vcpu)) + return true; + + if (!__populate_fault_info(vcpu)) + return true; + + if (static_branch_unlikely(&vgic_v2_cpuif_trap)) { + bool valid; + + valid = kvm_vcpu_trap_get_class(vcpu) == ESR_ELx_EC_DABT_LOW && + kvm_vcpu_trap_get_fault_type(vcpu) == FSC_FAULT && + kvm_vcpu_dabt_isvalid(vcpu) && + !kvm_vcpu_abt_issea(vcpu) && + !kvm_vcpu_dabt_iss1tw(vcpu); + + if (valid) { + int ret = __vgic_v2_perform_cpuif_access(vcpu); + + if (ret == 1) + return true; + + /* Promote an illegal access to an SError.*/ + if (ret == -1) + *exit_code = ARM_EXCEPTION_EL1_SERROR; + + goto exit; + } + } + + if (static_branch_unlikely(&vgic_v3_cpuif_trap) && + (kvm_vcpu_trap_get_class(vcpu) == ESR_ELx_EC_SYS64 || + kvm_vcpu_trap_get_class(vcpu) == ESR_ELx_EC_CP15_32)) { + int ret = __vgic_v3_perform_cpuif_access(vcpu); + + if (ret == 1) + return true; + } + +exit: + /* Return to the host kernel and handle the exit */ + return false; +} + +static inline bool __needs_ssbd_off(struct kvm_vcpu *vcpu) +{ + if (!cpus_have_final_cap(ARM64_SSBD)) + return false; + + return !(vcpu->arch.workaround_flags & VCPU_WORKAROUND_2_FLAG); +} + +static inline void __set_guest_arch_workaround_state(struct kvm_vcpu *vcpu) +{ +#ifdef CONFIG_ARM64_SSBD + /* + * The host runs with the workaround always present. If the + * guest wants it disabled, so be it... + */ + if (__needs_ssbd_off(vcpu) && + __hyp_this_cpu_read(arm64_ssbd_callback_required)) + arm_smccc_1_1_smc(ARM_SMCCC_ARCH_WORKAROUND_2, 0, NULL); +#endif +} + +static inline void __set_host_arch_workaround_state(struct kvm_vcpu *vcpu) +{ +#ifdef CONFIG_ARM64_SSBD + /* + * If the guest has disabled the workaround, bring it back on. + */ + if (__needs_ssbd_off(vcpu) && + __hyp_this_cpu_read(arm64_ssbd_callback_required)) + arm_smccc_1_1_smc(ARM_SMCCC_ARCH_WORKAROUND_2, 1, NULL); +#endif +} + +#endif /* __ARM64_KVM_HYP_SWITCH_H__ */ diff --git a/arch/arm64/kvm/hyp/include/hyp/sysreg-sr.h b/arch/arm64/kvm/hyp/include/hyp/sysreg-sr.h new file mode 100644 index 000000000000..7a986030145f --- /dev/null +++ b/arch/arm64/kvm/hyp/include/hyp/sysreg-sr.h @@ -0,0 +1,193 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Copyright (C) 2012-2015 - ARM Ltd + * Author: Marc Zyngier <marc.zyngier@arm.com> + */ + +#ifndef __ARM64_KVM_HYP_SYSREG_SR_H__ +#define __ARM64_KVM_HYP_SYSREG_SR_H__ + +#include <linux/compiler.h> +#include <linux/kvm_host.h> + +#include <asm/kprobes.h> +#include <asm/kvm_asm.h> +#include <asm/kvm_emulate.h> +#include <asm/kvm_hyp.h> + +static inline void __sysreg_save_common_state(struct kvm_cpu_context *ctxt) +{ + ctxt_sys_reg(ctxt, MDSCR_EL1) = read_sysreg(mdscr_el1); +} + +static inline void __sysreg_save_user_state(struct kvm_cpu_context *ctxt) +{ + ctxt_sys_reg(ctxt, TPIDR_EL0) = read_sysreg(tpidr_el0); + ctxt_sys_reg(ctxt, TPIDRRO_EL0) = read_sysreg(tpidrro_el0); +} + +static inline void __sysreg_save_el1_state(struct kvm_cpu_context *ctxt) +{ + ctxt_sys_reg(ctxt, CSSELR_EL1) = read_sysreg(csselr_el1); + ctxt_sys_reg(ctxt, SCTLR_EL1) = read_sysreg_el1(SYS_SCTLR); + ctxt_sys_reg(ctxt, CPACR_EL1) = read_sysreg_el1(SYS_CPACR); + ctxt_sys_reg(ctxt, TTBR0_EL1) = read_sysreg_el1(SYS_TTBR0); + ctxt_sys_reg(ctxt, TTBR1_EL1) = read_sysreg_el1(SYS_TTBR1); + ctxt_sys_reg(ctxt, TCR_EL1) = read_sysreg_el1(SYS_TCR); + ctxt_sys_reg(ctxt, ESR_EL1) = read_sysreg_el1(SYS_ESR); + ctxt_sys_reg(ctxt, AFSR0_EL1) = read_sysreg_el1(SYS_AFSR0); + ctxt_sys_reg(ctxt, AFSR1_EL1) = read_sysreg_el1(SYS_AFSR1); + ctxt_sys_reg(ctxt, FAR_EL1) = read_sysreg_el1(SYS_FAR); + ctxt_sys_reg(ctxt, MAIR_EL1) = read_sysreg_el1(SYS_MAIR); + ctxt_sys_reg(ctxt, VBAR_EL1) = read_sysreg_el1(SYS_VBAR); + ctxt_sys_reg(ctxt, CONTEXTIDR_EL1) = read_sysreg_el1(SYS_CONTEXTIDR); + ctxt_sys_reg(ctxt, AMAIR_EL1) = read_sysreg_el1(SYS_AMAIR); + ctxt_sys_reg(ctxt, CNTKCTL_EL1) = read_sysreg_el1(SYS_CNTKCTL); + ctxt_sys_reg(ctxt, PAR_EL1) = read_sysreg(par_el1); + ctxt_sys_reg(ctxt, TPIDR_EL1) = read_sysreg(tpidr_el1); + + ctxt_sys_reg(ctxt, SP_EL1) = read_sysreg(sp_el1); + ctxt_sys_reg(ctxt, ELR_EL1) = read_sysreg_el1(SYS_ELR); + ctxt_sys_reg(ctxt, SPSR_EL1) = read_sysreg_el1(SYS_SPSR); +} + +static inline void __sysreg_save_el2_return_state(struct kvm_cpu_context *ctxt) +{ + ctxt->regs.pc = read_sysreg_el2(SYS_ELR); + ctxt->regs.pstate = read_sysreg_el2(SYS_SPSR); + + if (cpus_have_final_cap(ARM64_HAS_RAS_EXTN)) + ctxt_sys_reg(ctxt, DISR_EL1) = read_sysreg_s(SYS_VDISR_EL2); +} + +static inline void __sysreg_restore_common_state(struct kvm_cpu_context *ctxt) +{ + write_sysreg(ctxt_sys_reg(ctxt, MDSCR_EL1), mdscr_el1); +} + +static inline void __sysreg_restore_user_state(struct kvm_cpu_context *ctxt) +{ + write_sysreg(ctxt_sys_reg(ctxt, TPIDR_EL0), tpidr_el0); + write_sysreg(ctxt_sys_reg(ctxt, TPIDRRO_EL0), tpidrro_el0); +} + +static inline void __sysreg_restore_el1_state(struct kvm_cpu_context *ctxt) +{ + write_sysreg(ctxt_sys_reg(ctxt, MPIDR_EL1), vmpidr_el2); + write_sysreg(ctxt_sys_reg(ctxt, CSSELR_EL1), csselr_el1); + + if (has_vhe() || + !cpus_have_final_cap(ARM64_WORKAROUND_SPECULATIVE_AT)) { + write_sysreg_el1(ctxt_sys_reg(ctxt, SCTLR_EL1), SYS_SCTLR); + write_sysreg_el1(ctxt_sys_reg(ctxt, TCR_EL1), SYS_TCR); + } else if (!ctxt->__hyp_running_vcpu) { + /* + * Must only be done for guest registers, hence the context + * test. We're coming from the host, so SCTLR.M is already + * set. Pairs with nVHE's __activate_traps(). + */ + write_sysreg_el1((ctxt_sys_reg(ctxt, TCR_EL1) | + TCR_EPD1_MASK | TCR_EPD0_MASK), + SYS_TCR); + isb(); + } + + write_sysreg_el1(ctxt_sys_reg(ctxt, CPACR_EL1), SYS_CPACR); + write_sysreg_el1(ctxt_sys_reg(ctxt, TTBR0_EL1), SYS_TTBR0); + write_sysreg_el1(ctxt_sys_reg(ctxt, TTBR1_EL1), SYS_TTBR1); + write_sysreg_el1(ctxt_sys_reg(ctxt, ESR_EL1), SYS_ESR); + write_sysreg_el1(ctxt_sys_reg(ctxt, AFSR0_EL1), SYS_AFSR0); + write_sysreg_el1(ctxt_sys_reg(ctxt, AFSR1_EL1), SYS_AFSR1); + write_sysreg_el1(ctxt_sys_reg(ctxt, FAR_EL1), SYS_FAR); + write_sysreg_el1(ctxt_sys_reg(ctxt, MAIR_EL1), SYS_MAIR); + write_sysreg_el1(ctxt_sys_reg(ctxt, VBAR_EL1), SYS_VBAR); + write_sysreg_el1(ctxt_sys_reg(ctxt, CONTEXTIDR_EL1), SYS_CONTEXTIDR); + write_sysreg_el1(ctxt_sys_reg(ctxt, AMAIR_EL1), SYS_AMAIR); + write_sysreg_el1(ctxt_sys_reg(ctxt, CNTKCTL_EL1), SYS_CNTKCTL); + write_sysreg(ctxt_sys_reg(ctxt, PAR_EL1), par_el1); + write_sysreg(ctxt_sys_reg(ctxt, TPIDR_EL1), tpidr_el1); + + if (!has_vhe() && + cpus_have_final_cap(ARM64_WORKAROUND_SPECULATIVE_AT) && + ctxt->__hyp_running_vcpu) { + /* + * Must only be done for host registers, hence the context + * test. Pairs with nVHE's __deactivate_traps(). + */ + isb(); + /* + * At this stage, and thanks to the above isb(), S2 is + * deconfigured and disabled. We can now restore the host's + * S1 configuration: SCTLR, and only then TCR. + */ + write_sysreg_el1(ctxt_sys_reg(ctxt, SCTLR_EL1), SYS_SCTLR); + isb(); + write_sysreg_el1(ctxt_sys_reg(ctxt, TCR_EL1), SYS_TCR); + } + + write_sysreg(ctxt_sys_reg(ctxt, SP_EL1), sp_el1); + write_sysreg_el1(ctxt_sys_reg(ctxt, ELR_EL1), SYS_ELR); + write_sysreg_el1(ctxt_sys_reg(ctxt, SPSR_EL1), SYS_SPSR); +} + +static inline void __sysreg_restore_el2_return_state(struct kvm_cpu_context *ctxt) +{ + u64 pstate = ctxt->regs.pstate; + u64 mode = pstate & PSR_AA32_MODE_MASK; + + /* + * Safety check to ensure we're setting the CPU up to enter the guest + * in a less privileged mode. + * + * If we are attempting a return to EL2 or higher in AArch64 state, + * program SPSR_EL2 with M=EL2h and the IL bit set which ensures that + * we'll take an illegal exception state exception immediately after + * the ERET to the guest. Attempts to return to AArch32 Hyp will + * result in an illegal exception return because EL2's execution state + * is determined by SCR_EL3.RW. + */ + if (!(mode & PSR_MODE32_BIT) && mode >= PSR_MODE_EL2t) + pstate = PSR_MODE_EL2h | PSR_IL_BIT; + + write_sysreg_el2(ctxt->regs.pc, SYS_ELR); + write_sysreg_el2(pstate, SYS_SPSR); + + if (cpus_have_final_cap(ARM64_HAS_RAS_EXTN)) + write_sysreg_s(ctxt_sys_reg(ctxt, DISR_EL1), SYS_VDISR_EL2); +} + +static inline void __sysreg32_save_state(struct kvm_vcpu *vcpu) +{ + if (!vcpu_el1_is_32bit(vcpu)) + return; + + vcpu->arch.ctxt.spsr_abt = read_sysreg(spsr_abt); + vcpu->arch.ctxt.spsr_und = read_sysreg(spsr_und); + vcpu->arch.ctxt.spsr_irq = read_sysreg(spsr_irq); + vcpu->arch.ctxt.spsr_fiq = read_sysreg(spsr_fiq); + + __vcpu_sys_reg(vcpu, DACR32_EL2) = read_sysreg(dacr32_el2); + __vcpu_sys_reg(vcpu, IFSR32_EL2) = read_sysreg(ifsr32_el2); + + if (has_vhe() || vcpu->arch.flags & KVM_ARM64_DEBUG_DIRTY) + __vcpu_sys_reg(vcpu, DBGVCR32_EL2) = read_sysreg(dbgvcr32_el2); +} + +static inline void __sysreg32_restore_state(struct kvm_vcpu *vcpu) +{ + if (!vcpu_el1_is_32bit(vcpu)) + return; + + write_sysreg(vcpu->arch.ctxt.spsr_abt, spsr_abt); + write_sysreg(vcpu->arch.ctxt.spsr_und, spsr_und); + write_sysreg(vcpu->arch.ctxt.spsr_irq, spsr_irq); + write_sysreg(vcpu->arch.ctxt.spsr_fiq, spsr_fiq); + + write_sysreg(__vcpu_sys_reg(vcpu, DACR32_EL2), dacr32_el2); + write_sysreg(__vcpu_sys_reg(vcpu, IFSR32_EL2), ifsr32_el2); + + if (has_vhe() || vcpu->arch.flags & KVM_ARM64_DEBUG_DIRTY) + write_sysreg(__vcpu_sys_reg(vcpu, DBGVCR32_EL2), dbgvcr32_el2); +} + +#endif /* __ARM64_KVM_HYP_SYSREG_SR_H__ */ diff --git a/arch/arm64/kvm/hyp/nvhe/Makefile b/arch/arm64/kvm/hyp/nvhe/Makefile new file mode 100644 index 000000000000..aef76487edc2 --- /dev/null +++ b/arch/arm64/kvm/hyp/nvhe/Makefile @@ -0,0 +1,62 @@ +# SPDX-License-Identifier: GPL-2.0 +# +# Makefile for Kernel-based Virtual Machine module, HYP/nVHE part +# + +asflags-y := -D__KVM_NVHE_HYPERVISOR__ +ccflags-y := -D__KVM_NVHE_HYPERVISOR__ + +obj-y := timer-sr.o sysreg-sr.o debug-sr.o switch.o tlb.o hyp-init.o +obj-y += ../vgic-v3-sr.o ../aarch32.o ../vgic-v2-cpuif-proxy.o ../entry.o \ + ../fpsimd.o ../hyp-entry.o + +obj-y := $(patsubst %.o,%.hyp.o,$(obj-y)) +extra-y := $(patsubst %.hyp.o,%.hyp.tmp.o,$(obj-y)) + +$(obj)/%.hyp.tmp.o: $(src)/%.c FORCE + $(call if_changed_rule,cc_o_c) +$(obj)/%.hyp.tmp.o: $(src)/%.S FORCE + $(call if_changed_rule,as_o_S) +$(obj)/%.hyp.o: $(obj)/%.hyp.tmp.o FORCE + $(call if_changed,hypcopy) + +# Disable reordering functions by GCC (enabled at -O2). +# This pass puts functions into '.text.*' sections to aid the linker +# in optimizing ELF layout. See HYPCOPY comment below for more info. +ccflags-y += $(call cc-option,-fno-reorder-functions) + +# The HYPCOPY command uses `objcopy` to prefix all ELF symbol names +# and relevant ELF section names to avoid clashes with VHE code/data. +# +# Hyp code is assumed to be in the '.text' section of the input object +# files (with the exception of specialized sections such as +# '.hyp.idmap.text'). This assumption may be broken by a compiler that +# divides code into sections like '.text.unlikely' so as to optimize +# ELF layout. HYPCOPY checks that no such sections exist in the input +# using `objdump`, otherwise they would be linked together with other +# kernel code and not memory-mapped correctly at runtime. +quiet_cmd_hypcopy = HYPCOPY $@ + cmd_hypcopy = \ + if $(OBJDUMP) -h $< | grep -F '.text.'; then \ + echo "$@: function reordering not supported in nVHE hyp code" >&2; \ + /bin/false; \ + fi; \ + $(OBJCOPY) --prefix-symbols=__kvm_nvhe_ \ + --rename-section=.text=.hyp.text \ + $< $@ + +# Remove ftrace and Shadow Call Stack CFLAGS. +# This is equivalent to the 'notrace' and '__noscs' annotations. +KBUILD_CFLAGS := $(filter-out $(CC_FLAGS_FTRACE) $(CC_FLAGS_SCS), $(KBUILD_CFLAGS)) + +# KVM nVHE code is run at a different exception code with a different map, so +# compiler instrumentation that inserts callbacks or checks into the code may +# cause crashes. Just disable it. +GCOV_PROFILE := n +KASAN_SANITIZE := n +UBSAN_SANITIZE := n +KCOV_INSTRUMENT := n + +# Skip objtool checking for this directory because nVHE code is compiled with +# non-standard build rules. +OBJECT_FILES_NON_STANDARD := y diff --git a/arch/arm64/kvm/hyp/nvhe/debug-sr.c b/arch/arm64/kvm/hyp/nvhe/debug-sr.c new file mode 100644 index 000000000000..91a711aa8382 --- /dev/null +++ b/arch/arm64/kvm/hyp/nvhe/debug-sr.c @@ -0,0 +1,77 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Copyright (C) 2015 - ARM Ltd + * Author: Marc Zyngier <marc.zyngier@arm.com> + */ + +#include <hyp/debug-sr.h> + +#include <linux/compiler.h> +#include <linux/kvm_host.h> + +#include <asm/debug-monitors.h> +#include <asm/kvm_asm.h> +#include <asm/kvm_hyp.h> +#include <asm/kvm_mmu.h> + +static void __debug_save_spe(u64 *pmscr_el1) +{ + u64 reg; + + /* Clear pmscr in case of early return */ + *pmscr_el1 = 0; + + /* SPE present on this CPU? */ + if (!cpuid_feature_extract_unsigned_field(read_sysreg(id_aa64dfr0_el1), + ID_AA64DFR0_PMSVER_SHIFT)) + return; + + /* Yes; is it owned by EL3? */ + reg = read_sysreg_s(SYS_PMBIDR_EL1); + if (reg & BIT(SYS_PMBIDR_EL1_P_SHIFT)) + return; + + /* No; is the host actually using the thing? */ + reg = read_sysreg_s(SYS_PMBLIMITR_EL1); + if (!(reg & BIT(SYS_PMBLIMITR_EL1_E_SHIFT))) + return; + + /* Yes; save the control register and disable data generation */ + *pmscr_el1 = read_sysreg_s(SYS_PMSCR_EL1); + write_sysreg_s(0, SYS_PMSCR_EL1); + isb(); + + /* Now drain all buffered data to memory */ + psb_csync(); + dsb(nsh); +} + +static void __debug_restore_spe(u64 pmscr_el1) +{ + if (!pmscr_el1) + return; + + /* The host page table is installed, but not yet synchronised */ + isb(); + + /* Re-enable data generation */ + write_sysreg_s(pmscr_el1, SYS_PMSCR_EL1); +} + +void __debug_switch_to_guest(struct kvm_vcpu *vcpu) +{ + /* Disable and flush SPE data generation */ + __debug_save_spe(&vcpu->arch.host_debug_state.pmscr_el1); + __debug_switch_to_guest_common(vcpu); +} + +void __debug_switch_to_host(struct kvm_vcpu *vcpu) +{ + __debug_restore_spe(vcpu->arch.host_debug_state.pmscr_el1); + __debug_switch_to_host_common(vcpu); +} + +u32 __kvm_get_mdcr_el2(void) +{ + return read_sysreg(mdcr_el2); +} diff --git a/arch/arm64/kvm/hyp-init.S b/arch/arm64/kvm/hyp/nvhe/hyp-init.S index 86971fe26f3d..d9434e90c06d 100644 --- a/arch/arm64/kvm/hyp-init.S +++ b/arch/arm64/kvm/hyp/nvhe/hyp-init.S @@ -105,6 +105,11 @@ alternative_else_nop_endif */ mov_q x4, (SCTLR_EL2_RES1 | (SCTLR_ELx_FLAGS & ~SCTLR_ELx_A)) CPU_BE( orr x4, x4, #SCTLR_ELx_EE) +alternative_if ARM64_HAS_ADDRESS_AUTH + mov_q x5, (SCTLR_ELx_ENIA | SCTLR_ELx_ENIB | \ + SCTLR_ELx_ENDA | SCTLR_ELx_ENDB) + orr x4, x4, x5 +alternative_else_nop_endif msr sctlr_el2, x4 isb diff --git a/arch/arm64/kvm/hyp/nvhe/switch.c b/arch/arm64/kvm/hyp/nvhe/switch.c new file mode 100644 index 000000000000..341be2f2f312 --- /dev/null +++ b/arch/arm64/kvm/hyp/nvhe/switch.c @@ -0,0 +1,272 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Copyright (C) 2015 - ARM Ltd + * Author: Marc Zyngier <marc.zyngier@arm.com> + */ + +#include <hyp/switch.h> +#include <hyp/sysreg-sr.h> + +#include <linux/arm-smccc.h> +#include <linux/kvm_host.h> +#include <linux/types.h> +#include <linux/jump_label.h> +#include <uapi/linux/psci.h> + +#include <kvm/arm_psci.h> + +#include <asm/barrier.h> +#include <asm/cpufeature.h> +#include <asm/kprobes.h> +#include <asm/kvm_asm.h> +#include <asm/kvm_emulate.h> +#include <asm/kvm_hyp.h> +#include <asm/kvm_mmu.h> +#include <asm/fpsimd.h> +#include <asm/debug-monitors.h> +#include <asm/processor.h> +#include <asm/thread_info.h> + +static void __activate_traps(struct kvm_vcpu *vcpu) +{ + u64 val; + + ___activate_traps(vcpu); + __activate_traps_common(vcpu); + + val = CPTR_EL2_DEFAULT; + val |= CPTR_EL2_TTA | CPTR_EL2_TZ | CPTR_EL2_TAM; + if (!update_fp_enabled(vcpu)) { + val |= CPTR_EL2_TFP; + __activate_traps_fpsimd32(vcpu); + } + + write_sysreg(val, cptr_el2); + + if (cpus_have_final_cap(ARM64_WORKAROUND_SPECULATIVE_AT)) { + struct kvm_cpu_context *ctxt = &vcpu->arch.ctxt; + + isb(); + /* + * At this stage, and thanks to the above isb(), S2 is + * configured and enabled. We can now restore the guest's S1 + * configuration: SCTLR, and only then TCR. + */ + write_sysreg_el1(ctxt_sys_reg(ctxt, SCTLR_EL1), SYS_SCTLR); + isb(); + write_sysreg_el1(ctxt_sys_reg(ctxt, TCR_EL1), SYS_TCR); + } +} + +static void __deactivate_traps(struct kvm_vcpu *vcpu) +{ + u64 mdcr_el2; + + ___deactivate_traps(vcpu); + + mdcr_el2 = read_sysreg(mdcr_el2); + + if (cpus_have_final_cap(ARM64_WORKAROUND_SPECULATIVE_AT)) { + u64 val; + + /* + * Set the TCR and SCTLR registers in the exact opposite + * sequence as __activate_traps (first prevent walks, + * then force the MMU on). A generous sprinkling of isb() + * ensure that things happen in this exact order. + */ + val = read_sysreg_el1(SYS_TCR); + write_sysreg_el1(val | TCR_EPD1_MASK | TCR_EPD0_MASK, SYS_TCR); + isb(); + val = read_sysreg_el1(SYS_SCTLR); + write_sysreg_el1(val | SCTLR_ELx_M, SYS_SCTLR); + isb(); + } + + __deactivate_traps_common(); + + mdcr_el2 &= MDCR_EL2_HPMN_MASK; + mdcr_el2 |= MDCR_EL2_E2PB_MASK << MDCR_EL2_E2PB_SHIFT; + + write_sysreg(mdcr_el2, mdcr_el2); + write_sysreg(HCR_HOST_NVHE_FLAGS, hcr_el2); + write_sysreg(CPTR_EL2_DEFAULT, cptr_el2); +} + +static void __deactivate_vm(struct kvm_vcpu *vcpu) +{ + write_sysreg(0, vttbr_el2); +} + +/* Save VGICv3 state on non-VHE systems */ +static void __hyp_vgic_save_state(struct kvm_vcpu *vcpu) +{ + if (static_branch_unlikely(&kvm_vgic_global_state.gicv3_cpuif)) { + __vgic_v3_save_state(&vcpu->arch.vgic_cpu.vgic_v3); + __vgic_v3_deactivate_traps(&vcpu->arch.vgic_cpu.vgic_v3); + } +} + +/* Restore VGICv3 state on non_VEH systems */ +static void __hyp_vgic_restore_state(struct kvm_vcpu *vcpu) +{ + if (static_branch_unlikely(&kvm_vgic_global_state.gicv3_cpuif)) { + __vgic_v3_activate_traps(&vcpu->arch.vgic_cpu.vgic_v3); + __vgic_v3_restore_state(&vcpu->arch.vgic_cpu.vgic_v3); + } +} + +/** + * Disable host events, enable guest events + */ +static bool __pmu_switch_to_guest(struct kvm_cpu_context *host_ctxt) +{ + struct kvm_host_data *host; + struct kvm_pmu_events *pmu; + + host = container_of(host_ctxt, struct kvm_host_data, host_ctxt); + pmu = &host->pmu_events; + + if (pmu->events_host) + write_sysreg(pmu->events_host, pmcntenclr_el0); + + if (pmu->events_guest) + write_sysreg(pmu->events_guest, pmcntenset_el0); + + return (pmu->events_host || pmu->events_guest); +} + +/** + * Disable guest events, enable host events + */ +static void __pmu_switch_to_host(struct kvm_cpu_context *host_ctxt) +{ + struct kvm_host_data *host; + struct kvm_pmu_events *pmu; + + host = container_of(host_ctxt, struct kvm_host_data, host_ctxt); + pmu = &host->pmu_events; + + if (pmu->events_guest) + write_sysreg(pmu->events_guest, pmcntenclr_el0); + + if (pmu->events_host) + write_sysreg(pmu->events_host, pmcntenset_el0); +} + +/* Switch to the guest for legacy non-VHE systems */ +int __kvm_vcpu_run(struct kvm_vcpu *vcpu) +{ + struct kvm_cpu_context *host_ctxt; + struct kvm_cpu_context *guest_ctxt; + bool pmu_switch_needed; + u64 exit_code; + + /* + * Having IRQs masked via PMR when entering the guest means the GIC + * will not signal the CPU of interrupts of lower priority, and the + * only way to get out will be via guest exceptions. + * Naturally, we want to avoid this. + */ + if (system_uses_irq_prio_masking()) { + gic_write_pmr(GIC_PRIO_IRQON | GIC_PRIO_PSR_I_SET); + pmr_sync(); + } + + vcpu = kern_hyp_va(vcpu); + + host_ctxt = &__hyp_this_cpu_ptr(kvm_host_data)->host_ctxt; + host_ctxt->__hyp_running_vcpu = vcpu; + guest_ctxt = &vcpu->arch.ctxt; + + pmu_switch_needed = __pmu_switch_to_guest(host_ctxt); + + __sysreg_save_state_nvhe(host_ctxt); + + /* + * We must restore the 32-bit state before the sysregs, thanks + * to erratum #852523 (Cortex-A57) or #853709 (Cortex-A72). + * + * Also, and in order to be able to deal with erratum #1319537 (A57) + * and #1319367 (A72), we must ensure that all VM-related sysreg are + * restored before we enable S2 translation. + */ + __sysreg32_restore_state(vcpu); + __sysreg_restore_state_nvhe(guest_ctxt); + + __activate_vm(kern_hyp_va(vcpu->arch.hw_mmu)); + __activate_traps(vcpu); + + __hyp_vgic_restore_state(vcpu); + __timer_enable_traps(vcpu); + + __debug_switch_to_guest(vcpu); + + __set_guest_arch_workaround_state(vcpu); + + do { + /* Jump in the fire! */ + exit_code = __guest_enter(vcpu, host_ctxt); + + /* And we're baaack! */ + } while (fixup_guest_exit(vcpu, &exit_code)); + + __set_host_arch_workaround_state(vcpu); + + __sysreg_save_state_nvhe(guest_ctxt); + __sysreg32_save_state(vcpu); + __timer_disable_traps(vcpu); + __hyp_vgic_save_state(vcpu); + + __deactivate_traps(vcpu); + __deactivate_vm(vcpu); + + __sysreg_restore_state_nvhe(host_ctxt); + + if (vcpu->arch.flags & KVM_ARM64_FP_ENABLED) + __fpsimd_save_fpexc32(vcpu); + + /* + * This must come after restoring the host sysregs, since a non-VHE + * system may enable SPE here and make use of the TTBRs. + */ + __debug_switch_to_host(vcpu); + + if (pmu_switch_needed) + __pmu_switch_to_host(host_ctxt); + + /* Returning to host will clear PSR.I, remask PMR if needed */ + if (system_uses_irq_prio_masking()) + gic_write_pmr(GIC_PRIO_IRQOFF); + + return exit_code; +} + +void __noreturn hyp_panic(struct kvm_cpu_context *host_ctxt) +{ + u64 spsr = read_sysreg_el2(SYS_SPSR); + u64 elr = read_sysreg_el2(SYS_ELR); + u64 par = read_sysreg(par_el1); + struct kvm_vcpu *vcpu = host_ctxt->__hyp_running_vcpu; + unsigned long str_va; + + if (read_sysreg(vttbr_el2)) { + __timer_disable_traps(vcpu); + __deactivate_traps(vcpu); + __deactivate_vm(vcpu); + __sysreg_restore_state_nvhe(host_ctxt); + } + + /* + * Force the panic string to be loaded from the literal pool, + * making sure it is a kernel address and not a PC-relative + * reference. + */ + asm volatile("ldr %0, =%1" : "=r" (str_va) : "S" (__hyp_panic_string)); + + __hyp_do_panic(str_va, + spsr, elr, + read_sysreg(esr_el2), read_sysreg_el2(SYS_FAR), + read_sysreg(hpfar_el2), par, vcpu); + unreachable(); +} diff --git a/arch/arm64/kvm/hyp/nvhe/sysreg-sr.c b/arch/arm64/kvm/hyp/nvhe/sysreg-sr.c new file mode 100644 index 000000000000..88a25fc8fcd3 --- /dev/null +++ b/arch/arm64/kvm/hyp/nvhe/sysreg-sr.c @@ -0,0 +1,46 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Copyright (C) 2012-2015 - ARM Ltd + * Author: Marc Zyngier <marc.zyngier@arm.com> + */ + +#include <hyp/sysreg-sr.h> + +#include <linux/compiler.h> +#include <linux/kvm_host.h> + +#include <asm/kprobes.h> +#include <asm/kvm_asm.h> +#include <asm/kvm_emulate.h> +#include <asm/kvm_hyp.h> + +/* + * Non-VHE: Both host and guest must save everything. + */ + +void __sysreg_save_state_nvhe(struct kvm_cpu_context *ctxt) +{ + __sysreg_save_el1_state(ctxt); + __sysreg_save_common_state(ctxt); + __sysreg_save_user_state(ctxt); + __sysreg_save_el2_return_state(ctxt); +} + +void __sysreg_restore_state_nvhe(struct kvm_cpu_context *ctxt) +{ + __sysreg_restore_el1_state(ctxt); + __sysreg_restore_common_state(ctxt); + __sysreg_restore_user_state(ctxt); + __sysreg_restore_el2_return_state(ctxt); +} + +void __kvm_enable_ssbs(void) +{ + u64 tmp; + + asm volatile( + "mrs %0, sctlr_el2\n" + "orr %0, %0, %1\n" + "msr sctlr_el2, %0" + : "=&r" (tmp) : "L" (SCTLR_ELx_DSSBS)); +} diff --git a/arch/arm64/kvm/hyp/timer-sr.c b/arch/arm64/kvm/hyp/nvhe/timer-sr.c index fb5c0be33223..9072e71693ba 100644 --- a/arch/arm64/kvm/hyp/timer-sr.c +++ b/arch/arm64/kvm/hyp/nvhe/timer-sr.c @@ -10,7 +10,7 @@ #include <asm/kvm_hyp.h> -void __hyp_text __kvm_timer_set_cntvoff(u64 cntvoff) +void __kvm_timer_set_cntvoff(u64 cntvoff) { write_sysreg(cntvoff, cntvoff_el2); } @@ -19,7 +19,7 @@ void __hyp_text __kvm_timer_set_cntvoff(u64 cntvoff) * Should only be called on non-VHE systems. * VHE systems use EL2 timers and configure EL1 timers in kvm_timer_init_vhe(). */ -void __hyp_text __timer_disable_traps(struct kvm_vcpu *vcpu) +void __timer_disable_traps(struct kvm_vcpu *vcpu) { u64 val; @@ -33,7 +33,7 @@ void __hyp_text __timer_disable_traps(struct kvm_vcpu *vcpu) * Should only be called on non-VHE systems. * VHE systems use EL2 timers and configure EL1 timers in kvm_timer_init_vhe(). */ -void __hyp_text __timer_enable_traps(struct kvm_vcpu *vcpu) +void __timer_enable_traps(struct kvm_vcpu *vcpu) { u64 val; diff --git a/arch/arm64/kvm/hyp/nvhe/tlb.c b/arch/arm64/kvm/hyp/nvhe/tlb.c new file mode 100644 index 000000000000..69eae608d670 --- /dev/null +++ b/arch/arm64/kvm/hyp/nvhe/tlb.c @@ -0,0 +1,154 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Copyright (C) 2015 - ARM Ltd + * Author: Marc Zyngier <marc.zyngier@arm.com> + */ + +#include <asm/kvm_hyp.h> +#include <asm/kvm_mmu.h> +#include <asm/tlbflush.h> + +struct tlb_inv_context { + u64 tcr; +}; + +static void __tlb_switch_to_guest(struct kvm_s2_mmu *mmu, + struct tlb_inv_context *cxt) +{ + if (cpus_have_final_cap(ARM64_WORKAROUND_SPECULATIVE_AT)) { + u64 val; + + /* + * For CPUs that are affected by ARM 1319367, we need to + * avoid a host Stage-1 walk while we have the guest's + * VMID set in the VTTBR in order to invalidate TLBs. + * We're guaranteed that the S1 MMU is enabled, so we can + * simply set the EPD bits to avoid any further TLB fill. + */ + val = cxt->tcr = read_sysreg_el1(SYS_TCR); + val |= TCR_EPD1_MASK | TCR_EPD0_MASK; + write_sysreg_el1(val, SYS_TCR); + isb(); + } + + __load_guest_stage2(mmu); +} + +static void __tlb_switch_to_host(struct tlb_inv_context *cxt) +{ + write_sysreg(0, vttbr_el2); + + if (cpus_have_final_cap(ARM64_WORKAROUND_SPECULATIVE_AT)) { + /* Ensure write of the host VMID */ + isb(); + /* Restore the host's TCR_EL1 */ + write_sysreg_el1(cxt->tcr, SYS_TCR); + } +} + +void __kvm_tlb_flush_vmid_ipa(struct kvm_s2_mmu *mmu, + phys_addr_t ipa, int level) +{ + struct tlb_inv_context cxt; + + dsb(ishst); + + /* Switch to requested VMID */ + mmu = kern_hyp_va(mmu); + __tlb_switch_to_guest(mmu, &cxt); + + /* + * We could do so much better if we had the VA as well. + * Instead, we invalidate Stage-2 for this IPA, and the + * whole of Stage-1. Weep... + */ + ipa >>= 12; + __tlbi_level(ipas2e1is, ipa, level); + + /* + * We have to ensure completion of the invalidation at Stage-2, + * since a table walk on another CPU could refill a TLB with a + * complete (S1 + S2) walk based on the old Stage-2 mapping if + * the Stage-1 invalidation happened first. + */ + dsb(ish); + __tlbi(vmalle1is); + dsb(ish); + isb(); + + /* + * If the host is running at EL1 and we have a VPIPT I-cache, + * then we must perform I-cache maintenance at EL2 in order for + * it to have an effect on the guest. Since the guest cannot hit + * I-cache lines allocated with a different VMID, we don't need + * to worry about junk out of guest reset (we nuke the I-cache on + * VMID rollover), but we do need to be careful when remapping + * executable pages for the same guest. This can happen when KSM + * takes a CoW fault on an executable page, copies the page into + * a page that was previously mapped in the guest and then needs + * to invalidate the guest view of the I-cache for that page + * from EL1. To solve this, we invalidate the entire I-cache when + * unmapping a page from a guest if we have a VPIPT I-cache but + * the host is running at EL1. As above, we could do better if + * we had the VA. + * + * The moral of this story is: if you have a VPIPT I-cache, then + * you should be running with VHE enabled. + */ + if (icache_is_vpipt()) + __flush_icache_all(); + + __tlb_switch_to_host(&cxt); +} + +void __kvm_tlb_flush_vmid(struct kvm_s2_mmu *mmu) +{ + struct tlb_inv_context cxt; + + dsb(ishst); + + /* Switch to requested VMID */ + mmu = kern_hyp_va(mmu); + __tlb_switch_to_guest(mmu, &cxt); + + __tlbi(vmalls12e1is); + dsb(ish); + isb(); + + __tlb_switch_to_host(&cxt); +} + +void __kvm_tlb_flush_local_vmid(struct kvm_s2_mmu *mmu) +{ + struct tlb_inv_context cxt; + + /* Switch to requested VMID */ + mmu = kern_hyp_va(mmu); + __tlb_switch_to_guest(mmu, &cxt); + + __tlbi(vmalle1); + dsb(nsh); + isb(); + + __tlb_switch_to_host(&cxt); +} + +void __kvm_flush_vm_context(void) +{ + dsb(ishst); + __tlbi(alle1is); + + /* + * VIPT and PIPT caches are not affected by VMID, so no maintenance + * is necessary across a VMID rollover. + * + * VPIPT caches constrain lookup and maintenance to the active VMID, + * so we need to invalidate lines with a stale VMID to avoid an ABA + * race after multiple rollovers. + * + */ + if (icache_is_vpipt()) + asm volatile("ic ialluis"); + + dsb(ish); +} diff --git a/arch/arm64/kvm/hyp/smccc_wa.S b/arch/arm64/kvm/hyp/smccc_wa.S new file mode 100644 index 000000000000..b0441dbdf68b --- /dev/null +++ b/arch/arm64/kvm/hyp/smccc_wa.S @@ -0,0 +1,32 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * Copyright (C) 2015-2018 - ARM Ltd + * Author: Marc Zyngier <marc.zyngier@arm.com> + */ + +#include <linux/arm-smccc.h> +#include <linux/linkage.h> + +#include <asm/kvm_asm.h> +#include <asm/kvm_mmu.h> + + /* + * This is not executed directly and is instead copied into the vectors + * by install_bp_hardening_cb(). + */ + .data + .pushsection .rodata + .global __smccc_workaround_1_smc +SYM_DATA_START(__smccc_workaround_1_smc) + esb + sub sp, sp, #(8 * 4) + stp x2, x3, [sp, #(8 * 0)] + stp x0, x1, [sp, #(8 * 2)] + mov w0, #ARM_SMCCC_ARCH_WORKAROUND_1 + smc #0 + ldp x2, x3, [sp, #(8 * 0)] + ldp x0, x1, [sp, #(8 * 2)] + add sp, sp, #(8 * 4) +1: .org __smccc_workaround_1_smc + __SMCCC_WORKAROUND_1_SMC_SZ + .org 1b +SYM_DATA_END(__smccc_workaround_1_smc) diff --git a/arch/arm64/kvm/hyp/switch.c b/arch/arm64/kvm/hyp/switch.c deleted file mode 100644 index db1c4487d95d..000000000000 --- a/arch/arm64/kvm/hyp/switch.c +++ /dev/null @@ -1,936 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0-only -/* - * Copyright (C) 2015 - ARM Ltd - * Author: Marc Zyngier <marc.zyngier@arm.com> - */ - -#include <linux/arm-smccc.h> -#include <linux/kvm_host.h> -#include <linux/types.h> -#include <linux/jump_label.h> -#include <uapi/linux/psci.h> - -#include <kvm/arm_psci.h> - -#include <asm/barrier.h> -#include <asm/cpufeature.h> -#include <asm/kprobes.h> -#include <asm/kvm_asm.h> -#include <asm/kvm_emulate.h> -#include <asm/kvm_hyp.h> -#include <asm/kvm_mmu.h> -#include <asm/fpsimd.h> -#include <asm/debug-monitors.h> -#include <asm/processor.h> -#include <asm/thread_info.h> - -/* Check whether the FP regs were dirtied while in the host-side run loop: */ -static bool __hyp_text update_fp_enabled(struct kvm_vcpu *vcpu) -{ - /* - * When the system doesn't support FP/SIMD, we cannot rely on - * the _TIF_FOREIGN_FPSTATE flag. However, we always inject an - * abort on the very first access to FP and thus we should never - * see KVM_ARM64_FP_ENABLED. For added safety, make sure we always - * trap the accesses. - */ - if (!system_supports_fpsimd() || - vcpu->arch.host_thread_info->flags & _TIF_FOREIGN_FPSTATE) - vcpu->arch.flags &= ~(KVM_ARM64_FP_ENABLED | - KVM_ARM64_FP_HOST); - - return !!(vcpu->arch.flags & KVM_ARM64_FP_ENABLED); -} - -/* Save the 32-bit only FPSIMD system register state */ -static void __hyp_text __fpsimd_save_fpexc32(struct kvm_vcpu *vcpu) -{ - if (!vcpu_el1_is_32bit(vcpu)) - return; - - vcpu->arch.ctxt.sys_regs[FPEXC32_EL2] = read_sysreg(fpexc32_el2); -} - -static void __hyp_text __activate_traps_fpsimd32(struct kvm_vcpu *vcpu) -{ - /* - * We are about to set CPTR_EL2.TFP to trap all floating point - * register accesses to EL2, however, the ARM ARM clearly states that - * traps are only taken to EL2 if the operation would not otherwise - * trap to EL1. Therefore, always make sure that for 32-bit guests, - * we set FPEXC.EN to prevent traps to EL1, when setting the TFP bit. - * If FP/ASIMD is not implemented, FPEXC is UNDEFINED and any access to - * it will cause an exception. - */ - if (vcpu_el1_is_32bit(vcpu) && system_supports_fpsimd()) { - write_sysreg(1 << 30, fpexc32_el2); - isb(); - } -} - -static void __hyp_text __activate_traps_common(struct kvm_vcpu *vcpu) -{ - /* Trap on AArch32 cp15 c15 (impdef sysregs) accesses (EL1 or EL0) */ - write_sysreg(1 << 15, hstr_el2); - - /* - * Make sure we trap PMU access from EL0 to EL2. Also sanitize - * PMSELR_EL0 to make sure it never contains the cycle - * counter, which could make a PMXEVCNTR_EL0 access UNDEF at - * EL1 instead of being trapped to EL2. - */ - write_sysreg(0, pmselr_el0); - write_sysreg(ARMV8_PMU_USERENR_MASK, pmuserenr_el0); - write_sysreg(vcpu->arch.mdcr_el2, mdcr_el2); -} - -static void __hyp_text __deactivate_traps_common(void) -{ - write_sysreg(0, hstr_el2); - write_sysreg(0, pmuserenr_el0); -} - -static void activate_traps_vhe(struct kvm_vcpu *vcpu) -{ - u64 val; - - val = read_sysreg(cpacr_el1); - val |= CPACR_EL1_TTA; - val &= ~CPACR_EL1_ZEN; - - /* - * With VHE (HCR.E2H == 1), accesses to CPACR_EL1 are routed to - * CPTR_EL2. In general, CPACR_EL1 has the same layout as CPTR_EL2, - * except for some missing controls, such as TAM. - * In this case, CPTR_EL2.TAM has the same position with or without - * VHE (HCR.E2H == 1) which allows us to use here the CPTR_EL2.TAM - * shift value for trapping the AMU accesses. - */ - - val |= CPTR_EL2_TAM; - - if (update_fp_enabled(vcpu)) { - if (vcpu_has_sve(vcpu)) - val |= CPACR_EL1_ZEN; - } else { - val &= ~CPACR_EL1_FPEN; - __activate_traps_fpsimd32(vcpu); - } - - write_sysreg(val, cpacr_el1); - - write_sysreg(kvm_get_hyp_vector(), vbar_el1); -} -NOKPROBE_SYMBOL(activate_traps_vhe); - -static void __hyp_text __activate_traps_nvhe(struct kvm_vcpu *vcpu) -{ - u64 val; - - __activate_traps_common(vcpu); - - val = CPTR_EL2_DEFAULT; - val |= CPTR_EL2_TTA | CPTR_EL2_TZ | CPTR_EL2_TAM; - if (!update_fp_enabled(vcpu)) { - val |= CPTR_EL2_TFP; - __activate_traps_fpsimd32(vcpu); - } - - write_sysreg(val, cptr_el2); - - if (cpus_have_final_cap(ARM64_WORKAROUND_SPECULATIVE_AT)) { - struct kvm_cpu_context *ctxt = &vcpu->arch.ctxt; - - isb(); - /* - * At this stage, and thanks to the above isb(), S2 is - * configured and enabled. We can now restore the guest's S1 - * configuration: SCTLR, and only then TCR. - */ - write_sysreg_el1(ctxt->sys_regs[SCTLR_EL1], SYS_SCTLR); - isb(); - write_sysreg_el1(ctxt->sys_regs[TCR_EL1], SYS_TCR); - } -} - -static void __hyp_text __activate_traps(struct kvm_vcpu *vcpu) -{ - u64 hcr = vcpu->arch.hcr_el2; - - if (cpus_have_final_cap(ARM64_WORKAROUND_CAVIUM_TX2_219_TVM)) - hcr |= HCR_TVM; - - write_sysreg(hcr, hcr_el2); - - if (cpus_have_final_cap(ARM64_HAS_RAS_EXTN) && (hcr & HCR_VSE)) - write_sysreg_s(vcpu->arch.vsesr_el2, SYS_VSESR_EL2); - - if (has_vhe()) - activate_traps_vhe(vcpu); - else - __activate_traps_nvhe(vcpu); -} - -static void deactivate_traps_vhe(void) -{ - extern char vectors[]; /* kernel exception vectors */ - write_sysreg(HCR_HOST_VHE_FLAGS, hcr_el2); - - /* - * ARM errata 1165522 and 1530923 require the actual execution of the - * above before we can switch to the EL2/EL0 translation regime used by - * the host. - */ - asm(ALTERNATIVE("nop", "isb", ARM64_WORKAROUND_SPECULATIVE_AT)); - - write_sysreg(CPACR_EL1_DEFAULT, cpacr_el1); - write_sysreg(vectors, vbar_el1); -} -NOKPROBE_SYMBOL(deactivate_traps_vhe); - -static void __hyp_text __deactivate_traps_nvhe(void) -{ - u64 mdcr_el2 = read_sysreg(mdcr_el2); - - if (cpus_have_final_cap(ARM64_WORKAROUND_SPECULATIVE_AT)) { - u64 val; - - /* - * Set the TCR and SCTLR registers in the exact opposite - * sequence as __activate_traps_nvhe (first prevent walks, - * then force the MMU on). A generous sprinkling of isb() - * ensure that things happen in this exact order. - */ - val = read_sysreg_el1(SYS_TCR); - write_sysreg_el1(val | TCR_EPD1_MASK | TCR_EPD0_MASK, SYS_TCR); - isb(); - val = read_sysreg_el1(SYS_SCTLR); - write_sysreg_el1(val | SCTLR_ELx_M, SYS_SCTLR); - isb(); - } - - __deactivate_traps_common(); - - mdcr_el2 &= MDCR_EL2_HPMN_MASK; - mdcr_el2 |= MDCR_EL2_E2PB_MASK << MDCR_EL2_E2PB_SHIFT; - - write_sysreg(mdcr_el2, mdcr_el2); - write_sysreg(HCR_HOST_NVHE_FLAGS, hcr_el2); - write_sysreg(CPTR_EL2_DEFAULT, cptr_el2); -} - -static void __hyp_text __deactivate_traps(struct kvm_vcpu *vcpu) -{ - /* - * If we pended a virtual abort, preserve it until it gets - * cleared. See D1.14.3 (Virtual Interrupts) for details, but - * the crucial bit is "On taking a vSError interrupt, - * HCR_EL2.VSE is cleared to 0." - */ - if (vcpu->arch.hcr_el2 & HCR_VSE) { - vcpu->arch.hcr_el2 &= ~HCR_VSE; - vcpu->arch.hcr_el2 |= read_sysreg(hcr_el2) & HCR_VSE; - } - - if (has_vhe()) - deactivate_traps_vhe(); - else - __deactivate_traps_nvhe(); -} - -void activate_traps_vhe_load(struct kvm_vcpu *vcpu) -{ - __activate_traps_common(vcpu); -} - -void deactivate_traps_vhe_put(void) -{ - u64 mdcr_el2 = read_sysreg(mdcr_el2); - - mdcr_el2 &= MDCR_EL2_HPMN_MASK | - MDCR_EL2_E2PB_MASK << MDCR_EL2_E2PB_SHIFT | - MDCR_EL2_TPMS; - - write_sysreg(mdcr_el2, mdcr_el2); - - __deactivate_traps_common(); -} - -static void __hyp_text __activate_vm(struct kvm *kvm) -{ - __load_guest_stage2(kvm); -} - -static void __hyp_text __deactivate_vm(struct kvm_vcpu *vcpu) -{ - write_sysreg(0, vttbr_el2); -} - -/* Save VGICv3 state on non-VHE systems */ -static void __hyp_text __hyp_vgic_save_state(struct kvm_vcpu *vcpu) -{ - if (static_branch_unlikely(&kvm_vgic_global_state.gicv3_cpuif)) { - __vgic_v3_save_state(&vcpu->arch.vgic_cpu.vgic_v3); - __vgic_v3_deactivate_traps(&vcpu->arch.vgic_cpu.vgic_v3); - } -} - -/* Restore VGICv3 state on non_VEH systems */ -static void __hyp_text __hyp_vgic_restore_state(struct kvm_vcpu *vcpu) -{ - if (static_branch_unlikely(&kvm_vgic_global_state.gicv3_cpuif)) { - __vgic_v3_activate_traps(&vcpu->arch.vgic_cpu.vgic_v3); - __vgic_v3_restore_state(&vcpu->arch.vgic_cpu.vgic_v3); - } -} - -static bool __hyp_text __translate_far_to_hpfar(u64 far, u64 *hpfar) -{ - u64 par, tmp; - - /* - * Resolve the IPA the hard way using the guest VA. - * - * Stage-1 translation already validated the memory access - * rights. As such, we can use the EL1 translation regime, and - * don't have to distinguish between EL0 and EL1 access. - * - * We do need to save/restore PAR_EL1 though, as we haven't - * saved the guest context yet, and we may return early... - */ - par = read_sysreg(par_el1); - asm volatile("at s1e1r, %0" : : "r" (far)); - isb(); - - tmp = read_sysreg(par_el1); - write_sysreg(par, par_el1); - - if (unlikely(tmp & SYS_PAR_EL1_F)) - return false; /* Translation failed, back to guest */ - - /* Convert PAR to HPFAR format */ - *hpfar = PAR_TO_HPFAR(tmp); - return true; -} - -static bool __hyp_text __populate_fault_info(struct kvm_vcpu *vcpu) -{ - u8 ec; - u64 esr; - u64 hpfar, far; - - esr = vcpu->arch.fault.esr_el2; - ec = ESR_ELx_EC(esr); - - if (ec != ESR_ELx_EC_DABT_LOW && ec != ESR_ELx_EC_IABT_LOW) - return true; - - far = read_sysreg_el2(SYS_FAR); - - /* - * The HPFAR can be invalid if the stage 2 fault did not - * happen during a stage 1 page table walk (the ESR_EL2.S1PTW - * bit is clear) and one of the two following cases are true: - * 1. The fault was due to a permission fault - * 2. The processor carries errata 834220 - * - * Therefore, for all non S1PTW faults where we either have a - * permission fault or the errata workaround is enabled, we - * resolve the IPA using the AT instruction. - */ - if (!(esr & ESR_ELx_S1PTW) && - (cpus_have_final_cap(ARM64_WORKAROUND_834220) || - (esr & ESR_ELx_FSC_TYPE) == FSC_PERM)) { - if (!__translate_far_to_hpfar(far, &hpfar)) - return false; - } else { - hpfar = read_sysreg(hpfar_el2); - } - - vcpu->arch.fault.far_el2 = far; - vcpu->arch.fault.hpfar_el2 = hpfar; - return true; -} - -/* Check for an FPSIMD/SVE trap and handle as appropriate */ -static bool __hyp_text __hyp_handle_fpsimd(struct kvm_vcpu *vcpu) -{ - bool vhe, sve_guest, sve_host; - u8 hsr_ec; - - if (!system_supports_fpsimd()) - return false; - - if (system_supports_sve()) { - sve_guest = vcpu_has_sve(vcpu); - sve_host = vcpu->arch.flags & KVM_ARM64_HOST_SVE_IN_USE; - vhe = true; - } else { - sve_guest = false; - sve_host = false; - vhe = has_vhe(); - } - - hsr_ec = kvm_vcpu_trap_get_class(vcpu); - if (hsr_ec != ESR_ELx_EC_FP_ASIMD && - hsr_ec != ESR_ELx_EC_SVE) - return false; - - /* Don't handle SVE traps for non-SVE vcpus here: */ - if (!sve_guest) - if (hsr_ec != ESR_ELx_EC_FP_ASIMD) - return false; - - /* Valid trap. Switch the context: */ - - if (vhe) { - u64 reg = read_sysreg(cpacr_el1) | CPACR_EL1_FPEN; - - if (sve_guest) - reg |= CPACR_EL1_ZEN; - - write_sysreg(reg, cpacr_el1); - } else { - write_sysreg(read_sysreg(cptr_el2) & ~(u64)CPTR_EL2_TFP, - cptr_el2); - } - - isb(); - - if (vcpu->arch.flags & KVM_ARM64_FP_HOST) { - /* - * In the SVE case, VHE is assumed: it is enforced by - * Kconfig and kvm_arch_init(). - */ - if (sve_host) { - struct thread_struct *thread = container_of( - vcpu->arch.host_fpsimd_state, - struct thread_struct, uw.fpsimd_state); - - sve_save_state(sve_pffr(thread), - &vcpu->arch.host_fpsimd_state->fpsr); - } else { - __fpsimd_save_state(vcpu->arch.host_fpsimd_state); - } - - vcpu->arch.flags &= ~KVM_ARM64_FP_HOST; - } - - if (sve_guest) { - sve_load_state(vcpu_sve_pffr(vcpu), - &vcpu->arch.ctxt.gp_regs.fp_regs.fpsr, - sve_vq_from_vl(vcpu->arch.sve_max_vl) - 1); - write_sysreg_s(vcpu->arch.ctxt.sys_regs[ZCR_EL1], SYS_ZCR_EL12); - } else { - __fpsimd_restore_state(&vcpu->arch.ctxt.gp_regs.fp_regs); - } - - /* Skip restoring fpexc32 for AArch64 guests */ - if (!(read_sysreg(hcr_el2) & HCR_RW)) - write_sysreg(vcpu->arch.ctxt.sys_regs[FPEXC32_EL2], - fpexc32_el2); - - vcpu->arch.flags |= KVM_ARM64_FP_ENABLED; - - return true; -} - -static bool __hyp_text handle_tx2_tvm(struct kvm_vcpu *vcpu) -{ - u32 sysreg = esr_sys64_to_sysreg(kvm_vcpu_get_hsr(vcpu)); - int rt = kvm_vcpu_sys_get_rt(vcpu); - u64 val = vcpu_get_reg(vcpu, rt); - - /* - * The normal sysreg handling code expects to see the traps, - * let's not do anything here. - */ - if (vcpu->arch.hcr_el2 & HCR_TVM) - return false; - - switch (sysreg) { - case SYS_SCTLR_EL1: - write_sysreg_el1(val, SYS_SCTLR); - break; - case SYS_TTBR0_EL1: - write_sysreg_el1(val, SYS_TTBR0); - break; - case SYS_TTBR1_EL1: - write_sysreg_el1(val, SYS_TTBR1); - break; - case SYS_TCR_EL1: - write_sysreg_el1(val, SYS_TCR); - break; - case SYS_ESR_EL1: - write_sysreg_el1(val, SYS_ESR); - break; - case SYS_FAR_EL1: - write_sysreg_el1(val, SYS_FAR); - break; - case SYS_AFSR0_EL1: - write_sysreg_el1(val, SYS_AFSR0); - break; - case SYS_AFSR1_EL1: - write_sysreg_el1(val, SYS_AFSR1); - break; - case SYS_MAIR_EL1: - write_sysreg_el1(val, SYS_MAIR); - break; - case SYS_AMAIR_EL1: - write_sysreg_el1(val, SYS_AMAIR); - break; - case SYS_CONTEXTIDR_EL1: - write_sysreg_el1(val, SYS_CONTEXTIDR); - break; - default: - return false; - } - - __kvm_skip_instr(vcpu); - return true; -} - -static bool __hyp_text esr_is_ptrauth_trap(u32 esr) -{ - u32 ec = ESR_ELx_EC(esr); - - if (ec == ESR_ELx_EC_PAC) - return true; - - if (ec != ESR_ELx_EC_SYS64) - return false; - - switch (esr_sys64_to_sysreg(esr)) { - case SYS_APIAKEYLO_EL1: - case SYS_APIAKEYHI_EL1: - case SYS_APIBKEYLO_EL1: - case SYS_APIBKEYHI_EL1: - case SYS_APDAKEYLO_EL1: - case SYS_APDAKEYHI_EL1: - case SYS_APDBKEYLO_EL1: - case SYS_APDBKEYHI_EL1: - case SYS_APGAKEYLO_EL1: - case SYS_APGAKEYHI_EL1: - return true; - } - - return false; -} - -#define __ptrauth_save_key(regs, key) \ -({ \ - regs[key ## KEYLO_EL1] = read_sysreg_s(SYS_ ## key ## KEYLO_EL1); \ - regs[key ## KEYHI_EL1] = read_sysreg_s(SYS_ ## key ## KEYHI_EL1); \ -}) - -static bool __hyp_text __hyp_handle_ptrauth(struct kvm_vcpu *vcpu) -{ - struct kvm_cpu_context *ctxt; - u64 val; - - if (!vcpu_has_ptrauth(vcpu) || - !esr_is_ptrauth_trap(kvm_vcpu_get_hsr(vcpu))) - return false; - - ctxt = &__hyp_this_cpu_ptr(kvm_host_data)->host_ctxt; - __ptrauth_save_key(ctxt->sys_regs, APIA); - __ptrauth_save_key(ctxt->sys_regs, APIB); - __ptrauth_save_key(ctxt->sys_regs, APDA); - __ptrauth_save_key(ctxt->sys_regs, APDB); - __ptrauth_save_key(ctxt->sys_regs, APGA); - - vcpu_ptrauth_enable(vcpu); - - val = read_sysreg(hcr_el2); - val |= (HCR_API | HCR_APK); - write_sysreg(val, hcr_el2); - - return true; -} - -/* - * Return true when we were able to fixup the guest exit and should return to - * the guest, false when we should restore the host state and return to the - * main run loop. - */ -static bool __hyp_text fixup_guest_exit(struct kvm_vcpu *vcpu, u64 *exit_code) -{ - if (ARM_EXCEPTION_CODE(*exit_code) != ARM_EXCEPTION_IRQ) - vcpu->arch.fault.esr_el2 = read_sysreg_el2(SYS_ESR); - - /* - * We're using the raw exception code in order to only process - * the trap if no SError is pending. We will come back to the - * same PC once the SError has been injected, and replay the - * trapping instruction. - */ - if (*exit_code != ARM_EXCEPTION_TRAP) - goto exit; - - if (cpus_have_final_cap(ARM64_WORKAROUND_CAVIUM_TX2_219_TVM) && - kvm_vcpu_trap_get_class(vcpu) == ESR_ELx_EC_SYS64 && - handle_tx2_tvm(vcpu)) - return true; - - /* - * We trap the first access to the FP/SIMD to save the host context - * and restore the guest context lazily. - * If FP/SIMD is not implemented, handle the trap and inject an - * undefined instruction exception to the guest. - * Similarly for trapped SVE accesses. - */ - if (__hyp_handle_fpsimd(vcpu)) - return true; - - if (__hyp_handle_ptrauth(vcpu)) - return true; - - if (!__populate_fault_info(vcpu)) - return true; - - if (static_branch_unlikely(&vgic_v2_cpuif_trap)) { - bool valid; - - valid = kvm_vcpu_trap_get_class(vcpu) == ESR_ELx_EC_DABT_LOW && - kvm_vcpu_trap_get_fault_type(vcpu) == FSC_FAULT && - kvm_vcpu_dabt_isvalid(vcpu) && - !kvm_vcpu_dabt_isextabt(vcpu) && - !kvm_vcpu_dabt_iss1tw(vcpu); - - if (valid) { - int ret = __vgic_v2_perform_cpuif_access(vcpu); - - if (ret == 1) - return true; - - /* Promote an illegal access to an SError.*/ - if (ret == -1) - *exit_code = ARM_EXCEPTION_EL1_SERROR; - - goto exit; - } - } - - if (static_branch_unlikely(&vgic_v3_cpuif_trap) && - (kvm_vcpu_trap_get_class(vcpu) == ESR_ELx_EC_SYS64 || - kvm_vcpu_trap_get_class(vcpu) == ESR_ELx_EC_CP15_32)) { - int ret = __vgic_v3_perform_cpuif_access(vcpu); - - if (ret == 1) - return true; - } - -exit: - /* Return to the host kernel and handle the exit */ - return false; -} - -static inline bool __hyp_text __needs_ssbd_off(struct kvm_vcpu *vcpu) -{ - if (!cpus_have_final_cap(ARM64_SSBD)) - return false; - - return !(vcpu->arch.workaround_flags & VCPU_WORKAROUND_2_FLAG); -} - -static void __hyp_text __set_guest_arch_workaround_state(struct kvm_vcpu *vcpu) -{ -#ifdef CONFIG_ARM64_SSBD - /* - * The host runs with the workaround always present. If the - * guest wants it disabled, so be it... - */ - if (__needs_ssbd_off(vcpu) && - __hyp_this_cpu_read(arm64_ssbd_callback_required)) - arm_smccc_1_1_smc(ARM_SMCCC_ARCH_WORKAROUND_2, 0, NULL); -#endif -} - -static void __hyp_text __set_host_arch_workaround_state(struct kvm_vcpu *vcpu) -{ -#ifdef CONFIG_ARM64_SSBD - /* - * If the guest has disabled the workaround, bring it back on. - */ - if (__needs_ssbd_off(vcpu) && - __hyp_this_cpu_read(arm64_ssbd_callback_required)) - arm_smccc_1_1_smc(ARM_SMCCC_ARCH_WORKAROUND_2, 1, NULL); -#endif -} - -/** - * Disable host events, enable guest events - */ -static bool __hyp_text __pmu_switch_to_guest(struct kvm_cpu_context *host_ctxt) -{ - struct kvm_host_data *host; - struct kvm_pmu_events *pmu; - - host = container_of(host_ctxt, struct kvm_host_data, host_ctxt); - pmu = &host->pmu_events; - - if (pmu->events_host) - write_sysreg(pmu->events_host, pmcntenclr_el0); - - if (pmu->events_guest) - write_sysreg(pmu->events_guest, pmcntenset_el0); - - return (pmu->events_host || pmu->events_guest); -} - -/** - * Disable guest events, enable host events - */ -static void __hyp_text __pmu_switch_to_host(struct kvm_cpu_context *host_ctxt) -{ - struct kvm_host_data *host; - struct kvm_pmu_events *pmu; - - host = container_of(host_ctxt, struct kvm_host_data, host_ctxt); - pmu = &host->pmu_events; - - if (pmu->events_guest) - write_sysreg(pmu->events_guest, pmcntenclr_el0); - - if (pmu->events_host) - write_sysreg(pmu->events_host, pmcntenset_el0); -} - -/* Switch to the guest for VHE systems running in EL2 */ -static int __kvm_vcpu_run_vhe(struct kvm_vcpu *vcpu) -{ - struct kvm_cpu_context *host_ctxt; - struct kvm_cpu_context *guest_ctxt; - u64 exit_code; - - host_ctxt = &__hyp_this_cpu_ptr(kvm_host_data)->host_ctxt; - host_ctxt->__hyp_running_vcpu = vcpu; - guest_ctxt = &vcpu->arch.ctxt; - - sysreg_save_host_state_vhe(host_ctxt); - - /* - * ARM erratum 1165522 requires us to configure both stage 1 and - * stage 2 translation for the guest context before we clear - * HCR_EL2.TGE. - * - * We have already configured the guest's stage 1 translation in - * kvm_vcpu_load_sysregs above. We must now call __activate_vm - * before __activate_traps, because __activate_vm configures - * stage 2 translation, and __activate_traps clear HCR_EL2.TGE - * (among other things). - */ - __activate_vm(vcpu->kvm); - __activate_traps(vcpu); - - sysreg_restore_guest_state_vhe(guest_ctxt); - __debug_switch_to_guest(vcpu); - - __set_guest_arch_workaround_state(vcpu); - - do { - /* Jump in the fire! */ - exit_code = __guest_enter(vcpu, host_ctxt); - - /* And we're baaack! */ - } while (fixup_guest_exit(vcpu, &exit_code)); - - __set_host_arch_workaround_state(vcpu); - - sysreg_save_guest_state_vhe(guest_ctxt); - - __deactivate_traps(vcpu); - - sysreg_restore_host_state_vhe(host_ctxt); - - if (vcpu->arch.flags & KVM_ARM64_FP_ENABLED) - __fpsimd_save_fpexc32(vcpu); - - __debug_switch_to_host(vcpu); - - return exit_code; -} -NOKPROBE_SYMBOL(__kvm_vcpu_run_vhe); - -int kvm_vcpu_run_vhe(struct kvm_vcpu *vcpu) -{ - int ret; - - local_daif_mask(); - - /* - * Having IRQs masked via PMR when entering the guest means the GIC - * will not signal the CPU of interrupts of lower priority, and the - * only way to get out will be via guest exceptions. - * Naturally, we want to avoid this. - * - * local_daif_mask() already sets GIC_PRIO_PSR_I_SET, we just need a - * dsb to ensure the redistributor is forwards EL2 IRQs to the CPU. - */ - pmr_sync(); - - ret = __kvm_vcpu_run_vhe(vcpu); - - /* - * local_daif_restore() takes care to properly restore PSTATE.DAIF - * and the GIC PMR if the host is using IRQ priorities. - */ - local_daif_restore(DAIF_PROCCTX_NOIRQ); - - /* - * When we exit from the guest we change a number of CPU configuration - * parameters, such as traps. Make sure these changes take effect - * before running the host or additional guests. - */ - isb(); - - return ret; -} - -/* Switch to the guest for legacy non-VHE systems */ -int __hyp_text __kvm_vcpu_run_nvhe(struct kvm_vcpu *vcpu) -{ - struct kvm_cpu_context *host_ctxt; - struct kvm_cpu_context *guest_ctxt; - bool pmu_switch_needed; - u64 exit_code; - - /* - * Having IRQs masked via PMR when entering the guest means the GIC - * will not signal the CPU of interrupts of lower priority, and the - * only way to get out will be via guest exceptions. - * Naturally, we want to avoid this. - */ - if (system_uses_irq_prio_masking()) { - gic_write_pmr(GIC_PRIO_IRQON | GIC_PRIO_PSR_I_SET); - pmr_sync(); - } - - vcpu = kern_hyp_va(vcpu); - - host_ctxt = &__hyp_this_cpu_ptr(kvm_host_data)->host_ctxt; - host_ctxt->__hyp_running_vcpu = vcpu; - guest_ctxt = &vcpu->arch.ctxt; - - pmu_switch_needed = __pmu_switch_to_guest(host_ctxt); - - __sysreg_save_state_nvhe(host_ctxt); - - /* - * We must restore the 32-bit state before the sysregs, thanks - * to erratum #852523 (Cortex-A57) or #853709 (Cortex-A72). - * - * Also, and in order to be able to deal with erratum #1319537 (A57) - * and #1319367 (A72), we must ensure that all VM-related sysreg are - * restored before we enable S2 translation. - */ - __sysreg32_restore_state(vcpu); - __sysreg_restore_state_nvhe(guest_ctxt); - - __activate_vm(kern_hyp_va(vcpu->kvm)); - __activate_traps(vcpu); - - __hyp_vgic_restore_state(vcpu); - __timer_enable_traps(vcpu); - - __debug_switch_to_guest(vcpu); - - __set_guest_arch_workaround_state(vcpu); - - do { - /* Jump in the fire! */ - exit_code = __guest_enter(vcpu, host_ctxt); - - /* And we're baaack! */ - } while (fixup_guest_exit(vcpu, &exit_code)); - - __set_host_arch_workaround_state(vcpu); - - __sysreg_save_state_nvhe(guest_ctxt); - __sysreg32_save_state(vcpu); - __timer_disable_traps(vcpu); - __hyp_vgic_save_state(vcpu); - - __deactivate_traps(vcpu); - __deactivate_vm(vcpu); - - __sysreg_restore_state_nvhe(host_ctxt); - - if (vcpu->arch.flags & KVM_ARM64_FP_ENABLED) - __fpsimd_save_fpexc32(vcpu); - - /* - * This must come after restoring the host sysregs, since a non-VHE - * system may enable SPE here and make use of the TTBRs. - */ - __debug_switch_to_host(vcpu); - - if (pmu_switch_needed) - __pmu_switch_to_host(host_ctxt); - - /* Returning to host will clear PSR.I, remask PMR if needed */ - if (system_uses_irq_prio_masking()) - gic_write_pmr(GIC_PRIO_IRQOFF); - - return exit_code; -} - -static const char __hyp_panic_string[] = "HYP panic:\nPS:%08llx PC:%016llx ESR:%08llx\nFAR:%016llx HPFAR:%016llx PAR:%016llx\nVCPU:%p\n"; - -static void __hyp_text __hyp_call_panic_nvhe(u64 spsr, u64 elr, u64 par, - struct kvm_cpu_context *__host_ctxt) -{ - struct kvm_vcpu *vcpu; - unsigned long str_va; - - vcpu = __host_ctxt->__hyp_running_vcpu; - - if (read_sysreg(vttbr_el2)) { - __timer_disable_traps(vcpu); - __deactivate_traps(vcpu); - __deactivate_vm(vcpu); - __sysreg_restore_state_nvhe(__host_ctxt); - } - - /* - * Force the panic string to be loaded from the literal pool, - * making sure it is a kernel address and not a PC-relative - * reference. - */ - asm volatile("ldr %0, =__hyp_panic_string" : "=r" (str_va)); - - __hyp_do_panic(str_va, - spsr, elr, - read_sysreg(esr_el2), read_sysreg_el2(SYS_FAR), - read_sysreg(hpfar_el2), par, vcpu); -} - -static void __hyp_call_panic_vhe(u64 spsr, u64 elr, u64 par, - struct kvm_cpu_context *host_ctxt) -{ - struct kvm_vcpu *vcpu; - vcpu = host_ctxt->__hyp_running_vcpu; - - __deactivate_traps(vcpu); - sysreg_restore_host_state_vhe(host_ctxt); - - panic(__hyp_panic_string, - spsr, elr, - read_sysreg_el2(SYS_ESR), read_sysreg_el2(SYS_FAR), - read_sysreg(hpfar_el2), par, vcpu); -} -NOKPROBE_SYMBOL(__hyp_call_panic_vhe); - -void __hyp_text __noreturn hyp_panic(struct kvm_cpu_context *host_ctxt) -{ - u64 spsr = read_sysreg_el2(SYS_SPSR); - u64 elr = read_sysreg_el2(SYS_ELR); - u64 par = read_sysreg(par_el1); - - if (!has_vhe()) - __hyp_call_panic_nvhe(spsr, elr, par, host_ctxt); - else - __hyp_call_panic_vhe(spsr, elr, par, host_ctxt); - - unreachable(); -} diff --git a/arch/arm64/kvm/hyp/sysreg-sr.c b/arch/arm64/kvm/hyp/sysreg-sr.c deleted file mode 100644 index cc7e957f5b2c..000000000000 --- a/arch/arm64/kvm/hyp/sysreg-sr.c +++ /dev/null @@ -1,333 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0-only -/* - * Copyright (C) 2012-2015 - ARM Ltd - * Author: Marc Zyngier <marc.zyngier@arm.com> - */ - -#include <linux/compiler.h> -#include <linux/kvm_host.h> - -#include <asm/kprobes.h> -#include <asm/kvm_asm.h> -#include <asm/kvm_emulate.h> -#include <asm/kvm_hyp.h> - -/* - * Non-VHE: Both host and guest must save everything. - * - * VHE: Host and guest must save mdscr_el1 and sp_el0 (and the PC and - * pstate, which are handled as part of the el2 return state) on every - * switch (sp_el0 is being dealt with in the assembly code). - * tpidr_el0 and tpidrro_el0 only need to be switched when going - * to host userspace or a different VCPU. EL1 registers only need to be - * switched when potentially going to run a different VCPU. The latter two - * classes are handled as part of kvm_arch_vcpu_load and kvm_arch_vcpu_put. - */ - -static void __hyp_text __sysreg_save_common_state(struct kvm_cpu_context *ctxt) -{ - ctxt->sys_regs[MDSCR_EL1] = read_sysreg(mdscr_el1); -} - -static void __hyp_text __sysreg_save_user_state(struct kvm_cpu_context *ctxt) -{ - ctxt->sys_regs[TPIDR_EL0] = read_sysreg(tpidr_el0); - ctxt->sys_regs[TPIDRRO_EL0] = read_sysreg(tpidrro_el0); -} - -static void __hyp_text __sysreg_save_el1_state(struct kvm_cpu_context *ctxt) -{ - ctxt->sys_regs[CSSELR_EL1] = read_sysreg(csselr_el1); - ctxt->sys_regs[SCTLR_EL1] = read_sysreg_el1(SYS_SCTLR); - ctxt->sys_regs[CPACR_EL1] = read_sysreg_el1(SYS_CPACR); - ctxt->sys_regs[TTBR0_EL1] = read_sysreg_el1(SYS_TTBR0); - ctxt->sys_regs[TTBR1_EL1] = read_sysreg_el1(SYS_TTBR1); - ctxt->sys_regs[TCR_EL1] = read_sysreg_el1(SYS_TCR); - ctxt->sys_regs[ESR_EL1] = read_sysreg_el1(SYS_ESR); - ctxt->sys_regs[AFSR0_EL1] = read_sysreg_el1(SYS_AFSR0); - ctxt->sys_regs[AFSR1_EL1] = read_sysreg_el1(SYS_AFSR1); - ctxt->sys_regs[FAR_EL1] = read_sysreg_el1(SYS_FAR); - ctxt->sys_regs[MAIR_EL1] = read_sysreg_el1(SYS_MAIR); - ctxt->sys_regs[VBAR_EL1] = read_sysreg_el1(SYS_VBAR); - ctxt->sys_regs[CONTEXTIDR_EL1] = read_sysreg_el1(SYS_CONTEXTIDR); - ctxt->sys_regs[AMAIR_EL1] = read_sysreg_el1(SYS_AMAIR); - ctxt->sys_regs[CNTKCTL_EL1] = read_sysreg_el1(SYS_CNTKCTL); - ctxt->sys_regs[PAR_EL1] = read_sysreg(par_el1); - ctxt->sys_regs[TPIDR_EL1] = read_sysreg(tpidr_el1); - - ctxt->gp_regs.sp_el1 = read_sysreg(sp_el1); - ctxt->gp_regs.elr_el1 = read_sysreg_el1(SYS_ELR); - ctxt->gp_regs.spsr[KVM_SPSR_EL1]= read_sysreg_el1(SYS_SPSR); -} - -static void __hyp_text __sysreg_save_el2_return_state(struct kvm_cpu_context *ctxt) -{ - ctxt->gp_regs.regs.pc = read_sysreg_el2(SYS_ELR); - ctxt->gp_regs.regs.pstate = read_sysreg_el2(SYS_SPSR); - - if (cpus_have_final_cap(ARM64_HAS_RAS_EXTN)) - ctxt->sys_regs[DISR_EL1] = read_sysreg_s(SYS_VDISR_EL2); -} - -void __hyp_text __sysreg_save_state_nvhe(struct kvm_cpu_context *ctxt) -{ - __sysreg_save_el1_state(ctxt); - __sysreg_save_common_state(ctxt); - __sysreg_save_user_state(ctxt); - __sysreg_save_el2_return_state(ctxt); -} - -void sysreg_save_host_state_vhe(struct kvm_cpu_context *ctxt) -{ - __sysreg_save_common_state(ctxt); -} -NOKPROBE_SYMBOL(sysreg_save_host_state_vhe); - -void sysreg_save_guest_state_vhe(struct kvm_cpu_context *ctxt) -{ - __sysreg_save_common_state(ctxt); - __sysreg_save_el2_return_state(ctxt); -} -NOKPROBE_SYMBOL(sysreg_save_guest_state_vhe); - -static void __hyp_text __sysreg_restore_common_state(struct kvm_cpu_context *ctxt) -{ - write_sysreg(ctxt->sys_regs[MDSCR_EL1], mdscr_el1); -} - -static void __hyp_text __sysreg_restore_user_state(struct kvm_cpu_context *ctxt) -{ - write_sysreg(ctxt->sys_regs[TPIDR_EL0], tpidr_el0); - write_sysreg(ctxt->sys_regs[TPIDRRO_EL0], tpidrro_el0); -} - -static void __hyp_text __sysreg_restore_el1_state(struct kvm_cpu_context *ctxt) -{ - write_sysreg(ctxt->sys_regs[MPIDR_EL1], vmpidr_el2); - write_sysreg(ctxt->sys_regs[CSSELR_EL1], csselr_el1); - - if (has_vhe() || - !cpus_have_final_cap(ARM64_WORKAROUND_SPECULATIVE_AT)) { - write_sysreg_el1(ctxt->sys_regs[SCTLR_EL1], SYS_SCTLR); - write_sysreg_el1(ctxt->sys_regs[TCR_EL1], SYS_TCR); - } else if (!ctxt->__hyp_running_vcpu) { - /* - * Must only be done for guest registers, hence the context - * test. We're coming from the host, so SCTLR.M is already - * set. Pairs with __activate_traps_nvhe(). - */ - write_sysreg_el1((ctxt->sys_regs[TCR_EL1] | - TCR_EPD1_MASK | TCR_EPD0_MASK), - SYS_TCR); - isb(); - } - - write_sysreg_el1(ctxt->sys_regs[CPACR_EL1], SYS_CPACR); - write_sysreg_el1(ctxt->sys_regs[TTBR0_EL1], SYS_TTBR0); - write_sysreg_el1(ctxt->sys_regs[TTBR1_EL1], SYS_TTBR1); - write_sysreg_el1(ctxt->sys_regs[ESR_EL1], SYS_ESR); - write_sysreg_el1(ctxt->sys_regs[AFSR0_EL1], SYS_AFSR0); - write_sysreg_el1(ctxt->sys_regs[AFSR1_EL1], SYS_AFSR1); - write_sysreg_el1(ctxt->sys_regs[FAR_EL1], SYS_FAR); - write_sysreg_el1(ctxt->sys_regs[MAIR_EL1], SYS_MAIR); - write_sysreg_el1(ctxt->sys_regs[VBAR_EL1], SYS_VBAR); - write_sysreg_el1(ctxt->sys_regs[CONTEXTIDR_EL1],SYS_CONTEXTIDR); - write_sysreg_el1(ctxt->sys_regs[AMAIR_EL1], SYS_AMAIR); - write_sysreg_el1(ctxt->sys_regs[CNTKCTL_EL1], SYS_CNTKCTL); - write_sysreg(ctxt->sys_regs[PAR_EL1], par_el1); - write_sysreg(ctxt->sys_regs[TPIDR_EL1], tpidr_el1); - - if (!has_vhe() && - cpus_have_final_cap(ARM64_WORKAROUND_SPECULATIVE_AT) && - ctxt->__hyp_running_vcpu) { - /* - * Must only be done for host registers, hence the context - * test. Pairs with __deactivate_traps_nvhe(). - */ - isb(); - /* - * At this stage, and thanks to the above isb(), S2 is - * deconfigured and disabled. We can now restore the host's - * S1 configuration: SCTLR, and only then TCR. - */ - write_sysreg_el1(ctxt->sys_regs[SCTLR_EL1], SYS_SCTLR); - isb(); - write_sysreg_el1(ctxt->sys_regs[TCR_EL1], SYS_TCR); - } - - write_sysreg(ctxt->gp_regs.sp_el1, sp_el1); - write_sysreg_el1(ctxt->gp_regs.elr_el1, SYS_ELR); - write_sysreg_el1(ctxt->gp_regs.spsr[KVM_SPSR_EL1],SYS_SPSR); -} - -static void __hyp_text -__sysreg_restore_el2_return_state(struct kvm_cpu_context *ctxt) -{ - u64 pstate = ctxt->gp_regs.regs.pstate; - u64 mode = pstate & PSR_AA32_MODE_MASK; - - /* - * Safety check to ensure we're setting the CPU up to enter the guest - * in a less privileged mode. - * - * If we are attempting a return to EL2 or higher in AArch64 state, - * program SPSR_EL2 with M=EL2h and the IL bit set which ensures that - * we'll take an illegal exception state exception immediately after - * the ERET to the guest. Attempts to return to AArch32 Hyp will - * result in an illegal exception return because EL2's execution state - * is determined by SCR_EL3.RW. - */ - if (!(mode & PSR_MODE32_BIT) && mode >= PSR_MODE_EL2t) - pstate = PSR_MODE_EL2h | PSR_IL_BIT; - - write_sysreg_el2(ctxt->gp_regs.regs.pc, SYS_ELR); - write_sysreg_el2(pstate, SYS_SPSR); - - if (cpus_have_final_cap(ARM64_HAS_RAS_EXTN)) - write_sysreg_s(ctxt->sys_regs[DISR_EL1], SYS_VDISR_EL2); -} - -void __hyp_text __sysreg_restore_state_nvhe(struct kvm_cpu_context *ctxt) -{ - __sysreg_restore_el1_state(ctxt); - __sysreg_restore_common_state(ctxt); - __sysreg_restore_user_state(ctxt); - __sysreg_restore_el2_return_state(ctxt); -} - -void sysreg_restore_host_state_vhe(struct kvm_cpu_context *ctxt) -{ - __sysreg_restore_common_state(ctxt); -} -NOKPROBE_SYMBOL(sysreg_restore_host_state_vhe); - -void sysreg_restore_guest_state_vhe(struct kvm_cpu_context *ctxt) -{ - __sysreg_restore_common_state(ctxt); - __sysreg_restore_el2_return_state(ctxt); -} -NOKPROBE_SYMBOL(sysreg_restore_guest_state_vhe); - -void __hyp_text __sysreg32_save_state(struct kvm_vcpu *vcpu) -{ - u64 *spsr, *sysreg; - - if (!vcpu_el1_is_32bit(vcpu)) - return; - - spsr = vcpu->arch.ctxt.gp_regs.spsr; - sysreg = vcpu->arch.ctxt.sys_regs; - - spsr[KVM_SPSR_ABT] = read_sysreg(spsr_abt); - spsr[KVM_SPSR_UND] = read_sysreg(spsr_und); - spsr[KVM_SPSR_IRQ] = read_sysreg(spsr_irq); - spsr[KVM_SPSR_FIQ] = read_sysreg(spsr_fiq); - - sysreg[DACR32_EL2] = read_sysreg(dacr32_el2); - sysreg[IFSR32_EL2] = read_sysreg(ifsr32_el2); - - if (has_vhe() || vcpu->arch.flags & KVM_ARM64_DEBUG_DIRTY) - sysreg[DBGVCR32_EL2] = read_sysreg(dbgvcr32_el2); -} - -void __hyp_text __sysreg32_restore_state(struct kvm_vcpu *vcpu) -{ - u64 *spsr, *sysreg; - - if (!vcpu_el1_is_32bit(vcpu)) - return; - - spsr = vcpu->arch.ctxt.gp_regs.spsr; - sysreg = vcpu->arch.ctxt.sys_regs; - - write_sysreg(spsr[KVM_SPSR_ABT], spsr_abt); - write_sysreg(spsr[KVM_SPSR_UND], spsr_und); - write_sysreg(spsr[KVM_SPSR_IRQ], spsr_irq); - write_sysreg(spsr[KVM_SPSR_FIQ], spsr_fiq); - - write_sysreg(sysreg[DACR32_EL2], dacr32_el2); - write_sysreg(sysreg[IFSR32_EL2], ifsr32_el2); - - if (has_vhe() || vcpu->arch.flags & KVM_ARM64_DEBUG_DIRTY) - write_sysreg(sysreg[DBGVCR32_EL2], dbgvcr32_el2); -} - -/** - * kvm_vcpu_load_sysregs - Load guest system registers to the physical CPU - * - * @vcpu: The VCPU pointer - * - * Load system registers that do not affect the host's execution, for - * example EL1 system registers on a VHE system where the host kernel - * runs at EL2. This function is called from KVM's vcpu_load() function - * and loading system register state early avoids having to load them on - * every entry to the VM. - */ -void kvm_vcpu_load_sysregs(struct kvm_vcpu *vcpu) -{ - struct kvm_cpu_context *guest_ctxt = &vcpu->arch.ctxt; - struct kvm_cpu_context *host_ctxt; - - if (!has_vhe()) - return; - - host_ctxt = &__hyp_this_cpu_ptr(kvm_host_data)->host_ctxt; - __sysreg_save_user_state(host_ctxt); - - /* - * Load guest EL1 and user state - * - * We must restore the 32-bit state before the sysregs, thanks - * to erratum #852523 (Cortex-A57) or #853709 (Cortex-A72). - */ - __sysreg32_restore_state(vcpu); - __sysreg_restore_user_state(guest_ctxt); - __sysreg_restore_el1_state(guest_ctxt); - - vcpu->arch.sysregs_loaded_on_cpu = true; - - activate_traps_vhe_load(vcpu); -} - -/** - * kvm_vcpu_put_sysregs - Restore host system registers to the physical CPU - * - * @vcpu: The VCPU pointer - * - * Save guest system registers that do not affect the host's execution, for - * example EL1 system registers on a VHE system where the host kernel - * runs at EL2. This function is called from KVM's vcpu_put() function - * and deferring saving system register state until we're no longer running the - * VCPU avoids having to save them on every exit from the VM. - */ -void kvm_vcpu_put_sysregs(struct kvm_vcpu *vcpu) -{ - struct kvm_cpu_context *guest_ctxt = &vcpu->arch.ctxt; - struct kvm_cpu_context *host_ctxt; - - if (!has_vhe()) - return; - - host_ctxt = &__hyp_this_cpu_ptr(kvm_host_data)->host_ctxt; - deactivate_traps_vhe_put(); - - __sysreg_save_el1_state(guest_ctxt); - __sysreg_save_user_state(guest_ctxt); - __sysreg32_save_state(vcpu); - - /* Restore host user state */ - __sysreg_restore_user_state(host_ctxt); - - vcpu->arch.sysregs_loaded_on_cpu = false; -} - -void __hyp_text __kvm_enable_ssbs(void) -{ - u64 tmp; - - asm volatile( - "mrs %0, sctlr_el2\n" - "orr %0, %0, %1\n" - "msr sctlr_el2, %0" - : "=&r" (tmp) : "L" (SCTLR_ELx_DSSBS)); -} diff --git a/arch/arm64/kvm/hyp/tlb.c b/arch/arm64/kvm/hyp/tlb.c deleted file mode 100644 index d063a576d511..000000000000 --- a/arch/arm64/kvm/hyp/tlb.c +++ /dev/null @@ -1,242 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0-only -/* - * Copyright (C) 2015 - ARM Ltd - * Author: Marc Zyngier <marc.zyngier@arm.com> - */ - -#include <linux/irqflags.h> - -#include <asm/kvm_hyp.h> -#include <asm/kvm_mmu.h> -#include <asm/tlbflush.h> - -struct tlb_inv_context { - unsigned long flags; - u64 tcr; - u64 sctlr; -}; - -static void __hyp_text __tlb_switch_to_guest_vhe(struct kvm *kvm, - struct tlb_inv_context *cxt) -{ - u64 val; - - local_irq_save(cxt->flags); - - if (cpus_have_final_cap(ARM64_WORKAROUND_SPECULATIVE_AT)) { - /* - * For CPUs that are affected by ARM errata 1165522 or 1530923, - * we cannot trust stage-1 to be in a correct state at that - * point. Since we do not want to force a full load of the - * vcpu state, we prevent the EL1 page-table walker to - * allocate new TLBs. This is done by setting the EPD bits - * in the TCR_EL1 register. We also need to prevent it to - * allocate IPA->PA walks, so we enable the S1 MMU... - */ - val = cxt->tcr = read_sysreg_el1(SYS_TCR); - val |= TCR_EPD1_MASK | TCR_EPD0_MASK; - write_sysreg_el1(val, SYS_TCR); - val = cxt->sctlr = read_sysreg_el1(SYS_SCTLR); - val |= SCTLR_ELx_M; - write_sysreg_el1(val, SYS_SCTLR); - } - - /* - * With VHE enabled, we have HCR_EL2.{E2H,TGE} = {1,1}, and - * most TLB operations target EL2/EL0. In order to affect the - * guest TLBs (EL1/EL0), we need to change one of these two - * bits. Changing E2H is impossible (goodbye TTBR1_EL2), so - * let's flip TGE before executing the TLB operation. - * - * ARM erratum 1165522 requires some special handling (again), - * as we need to make sure both stages of translation are in - * place before clearing TGE. __load_guest_stage2() already - * has an ISB in order to deal with this. - */ - __load_guest_stage2(kvm); - val = read_sysreg(hcr_el2); - val &= ~HCR_TGE; - write_sysreg(val, hcr_el2); - isb(); -} - -static void __hyp_text __tlb_switch_to_guest_nvhe(struct kvm *kvm, - struct tlb_inv_context *cxt) -{ - if (cpus_have_final_cap(ARM64_WORKAROUND_SPECULATIVE_AT)) { - u64 val; - - /* - * For CPUs that are affected by ARM 1319367, we need to - * avoid a host Stage-1 walk while we have the guest's - * VMID set in the VTTBR in order to invalidate TLBs. - * We're guaranteed that the S1 MMU is enabled, so we can - * simply set the EPD bits to avoid any further TLB fill. - */ - val = cxt->tcr = read_sysreg_el1(SYS_TCR); - val |= TCR_EPD1_MASK | TCR_EPD0_MASK; - write_sysreg_el1(val, SYS_TCR); - isb(); - } - - /* __load_guest_stage2() includes an ISB for the workaround. */ - __load_guest_stage2(kvm); - asm(ALTERNATIVE("isb", "nop", ARM64_WORKAROUND_SPECULATIVE_AT)); -} - -static void __hyp_text __tlb_switch_to_guest(struct kvm *kvm, - struct tlb_inv_context *cxt) -{ - if (has_vhe()) - __tlb_switch_to_guest_vhe(kvm, cxt); - else - __tlb_switch_to_guest_nvhe(kvm, cxt); -} - -static void __hyp_text __tlb_switch_to_host_vhe(struct kvm *kvm, - struct tlb_inv_context *cxt) -{ - /* - * We're done with the TLB operation, let's restore the host's - * view of HCR_EL2. - */ - write_sysreg(0, vttbr_el2); - write_sysreg(HCR_HOST_VHE_FLAGS, hcr_el2); - isb(); - - if (cpus_have_final_cap(ARM64_WORKAROUND_SPECULATIVE_AT)) { - /* Restore the registers to what they were */ - write_sysreg_el1(cxt->tcr, SYS_TCR); - write_sysreg_el1(cxt->sctlr, SYS_SCTLR); - } - - local_irq_restore(cxt->flags); -} - -static void __hyp_text __tlb_switch_to_host_nvhe(struct kvm *kvm, - struct tlb_inv_context *cxt) -{ - write_sysreg(0, vttbr_el2); - - if (cpus_have_final_cap(ARM64_WORKAROUND_SPECULATIVE_AT)) { - /* Ensure write of the host VMID */ - isb(); - /* Restore the host's TCR_EL1 */ - write_sysreg_el1(cxt->tcr, SYS_TCR); - } -} - -static void __hyp_text __tlb_switch_to_host(struct kvm *kvm, - struct tlb_inv_context *cxt) -{ - if (has_vhe()) - __tlb_switch_to_host_vhe(kvm, cxt); - else - __tlb_switch_to_host_nvhe(kvm, cxt); -} - -void __hyp_text __kvm_tlb_flush_vmid_ipa(struct kvm *kvm, phys_addr_t ipa) -{ - struct tlb_inv_context cxt; - - dsb(ishst); - - /* Switch to requested VMID */ - kvm = kern_hyp_va(kvm); - __tlb_switch_to_guest(kvm, &cxt); - - /* - * We could do so much better if we had the VA as well. - * Instead, we invalidate Stage-2 for this IPA, and the - * whole of Stage-1. Weep... - */ - ipa >>= 12; - __tlbi(ipas2e1is, ipa); - - /* - * We have to ensure completion of the invalidation at Stage-2, - * since a table walk on another CPU could refill a TLB with a - * complete (S1 + S2) walk based on the old Stage-2 mapping if - * the Stage-1 invalidation happened first. - */ - dsb(ish); - __tlbi(vmalle1is); - dsb(ish); - isb(); - - /* - * If the host is running at EL1 and we have a VPIPT I-cache, - * then we must perform I-cache maintenance at EL2 in order for - * it to have an effect on the guest. Since the guest cannot hit - * I-cache lines allocated with a different VMID, we don't need - * to worry about junk out of guest reset (we nuke the I-cache on - * VMID rollover), but we do need to be careful when remapping - * executable pages for the same guest. This can happen when KSM - * takes a CoW fault on an executable page, copies the page into - * a page that was previously mapped in the guest and then needs - * to invalidate the guest view of the I-cache for that page - * from EL1. To solve this, we invalidate the entire I-cache when - * unmapping a page from a guest if we have a VPIPT I-cache but - * the host is running at EL1. As above, we could do better if - * we had the VA. - * - * The moral of this story is: if you have a VPIPT I-cache, then - * you should be running with VHE enabled. - */ - if (!has_vhe() && icache_is_vpipt()) - __flush_icache_all(); - - __tlb_switch_to_host(kvm, &cxt); -} - -void __hyp_text __kvm_tlb_flush_vmid(struct kvm *kvm) -{ - struct tlb_inv_context cxt; - - dsb(ishst); - - /* Switch to requested VMID */ - kvm = kern_hyp_va(kvm); - __tlb_switch_to_guest(kvm, &cxt); - - __tlbi(vmalls12e1is); - dsb(ish); - isb(); - - __tlb_switch_to_host(kvm, &cxt); -} - -void __hyp_text __kvm_tlb_flush_local_vmid(struct kvm_vcpu *vcpu) -{ - struct kvm *kvm = kern_hyp_va(kern_hyp_va(vcpu)->kvm); - struct tlb_inv_context cxt; - - /* Switch to requested VMID */ - __tlb_switch_to_guest(kvm, &cxt); - - __tlbi(vmalle1); - dsb(nsh); - isb(); - - __tlb_switch_to_host(kvm, &cxt); -} - -void __hyp_text __kvm_flush_vm_context(void) -{ - dsb(ishst); - __tlbi(alle1is); - - /* - * VIPT and PIPT caches are not affected by VMID, so no maintenance - * is necessary across a VMID rollover. - * - * VPIPT caches constrain lookup and maintenance to the active VMID, - * so we need to invalidate lines with a stale VMID to avoid an ABA - * race after multiple rollovers. - * - */ - if (icache_is_vpipt()) - asm volatile("ic ialluis"); - - dsb(ish); -} diff --git a/arch/arm64/kvm/hyp/vgic-v2-cpuif-proxy.c b/arch/arm64/kvm/hyp/vgic-v2-cpuif-proxy.c index 4f3a087e36d5..bd1bab551d48 100644 --- a/arch/arm64/kvm/hyp/vgic-v2-cpuif-proxy.c +++ b/arch/arm64/kvm/hyp/vgic-v2-cpuif-proxy.c @@ -13,7 +13,7 @@ #include <asm/kvm_hyp.h> #include <asm/kvm_mmu.h> -static bool __hyp_text __is_be(struct kvm_vcpu *vcpu) +static bool __is_be(struct kvm_vcpu *vcpu) { if (vcpu_mode_is_32bit(vcpu)) return !!(read_sysreg_el2(SYS_SPSR) & PSR_AA32_E_BIT); @@ -32,7 +32,7 @@ static bool __hyp_text __is_be(struct kvm_vcpu *vcpu) * 0: Not a GICV access * -1: Illegal GICV access successfully performed */ -int __hyp_text __vgic_v2_perform_cpuif_access(struct kvm_vcpu *vcpu) +int __vgic_v2_perform_cpuif_access(struct kvm_vcpu *vcpu) { struct kvm *kvm = kern_hyp_va(vcpu->kvm); struct vgic_dist *vgic = &kvm->arch.vgic; diff --git a/arch/arm64/kvm/hyp/vgic-v3-sr.c b/arch/arm64/kvm/hyp/vgic-v3-sr.c index 10ed539835c1..5a0073511efb 100644 --- a/arch/arm64/kvm/hyp/vgic-v3-sr.c +++ b/arch/arm64/kvm/hyp/vgic-v3-sr.c @@ -16,7 +16,7 @@ #define vtr_to_nr_pre_bits(v) ((((u32)(v) >> 26) & 7) + 1) #define vtr_to_nr_apr_regs(v) (1 << (vtr_to_nr_pre_bits(v) - 5)) -static u64 __hyp_text __gic_v3_get_lr(unsigned int lr) +static u64 __gic_v3_get_lr(unsigned int lr) { switch (lr & 0xf) { case 0: @@ -56,7 +56,7 @@ static u64 __hyp_text __gic_v3_get_lr(unsigned int lr) unreachable(); } -static void __hyp_text __gic_v3_set_lr(u64 val, int lr) +static void __gic_v3_set_lr(u64 val, int lr) { switch (lr & 0xf) { case 0: @@ -110,7 +110,7 @@ static void __hyp_text __gic_v3_set_lr(u64 val, int lr) } } -static void __hyp_text __vgic_v3_write_ap0rn(u32 val, int n) +static void __vgic_v3_write_ap0rn(u32 val, int n) { switch (n) { case 0: @@ -128,7 +128,7 @@ static void __hyp_text __vgic_v3_write_ap0rn(u32 val, int n) } } -static void __hyp_text __vgic_v3_write_ap1rn(u32 val, int n) +static void __vgic_v3_write_ap1rn(u32 val, int n) { switch (n) { case 0: @@ -146,7 +146,7 @@ static void __hyp_text __vgic_v3_write_ap1rn(u32 val, int n) } } -static u32 __hyp_text __vgic_v3_read_ap0rn(int n) +static u32 __vgic_v3_read_ap0rn(int n) { u32 val; @@ -170,7 +170,7 @@ static u32 __hyp_text __vgic_v3_read_ap0rn(int n) return val; } -static u32 __hyp_text __vgic_v3_read_ap1rn(int n) +static u32 __vgic_v3_read_ap1rn(int n) { u32 val; @@ -194,7 +194,7 @@ static u32 __hyp_text __vgic_v3_read_ap1rn(int n) return val; } -void __hyp_text __vgic_v3_save_state(struct vgic_v3_cpu_if *cpu_if) +void __vgic_v3_save_state(struct vgic_v3_cpu_if *cpu_if) { u64 used_lrs = cpu_if->used_lrs; @@ -229,7 +229,7 @@ void __hyp_text __vgic_v3_save_state(struct vgic_v3_cpu_if *cpu_if) } } -void __hyp_text __vgic_v3_restore_state(struct vgic_v3_cpu_if *cpu_if) +void __vgic_v3_restore_state(struct vgic_v3_cpu_if *cpu_if) { u64 used_lrs = cpu_if->used_lrs; int i; @@ -255,7 +255,7 @@ void __hyp_text __vgic_v3_restore_state(struct vgic_v3_cpu_if *cpu_if) } } -void __hyp_text __vgic_v3_activate_traps(struct vgic_v3_cpu_if *cpu_if) +void __vgic_v3_activate_traps(struct vgic_v3_cpu_if *cpu_if) { /* * VFIQEn is RES1 if ICC_SRE_EL1.SRE is 1. This causes a @@ -302,7 +302,7 @@ void __hyp_text __vgic_v3_activate_traps(struct vgic_v3_cpu_if *cpu_if) write_gicreg(cpu_if->vgic_hcr, ICH_HCR_EL2); } -void __hyp_text __vgic_v3_deactivate_traps(struct vgic_v3_cpu_if *cpu_if) +void __vgic_v3_deactivate_traps(struct vgic_v3_cpu_if *cpu_if) { u64 val; @@ -328,7 +328,7 @@ void __hyp_text __vgic_v3_deactivate_traps(struct vgic_v3_cpu_if *cpu_if) write_gicreg(0, ICH_HCR_EL2); } -void __hyp_text __vgic_v3_save_aprs(struct vgic_v3_cpu_if *cpu_if) +void __vgic_v3_save_aprs(struct vgic_v3_cpu_if *cpu_if) { u64 val; u32 nr_pre_bits; @@ -361,7 +361,7 @@ void __hyp_text __vgic_v3_save_aprs(struct vgic_v3_cpu_if *cpu_if) } } -void __hyp_text __vgic_v3_restore_aprs(struct vgic_v3_cpu_if *cpu_if) +void __vgic_v3_restore_aprs(struct vgic_v3_cpu_if *cpu_if) { u64 val; u32 nr_pre_bits; @@ -394,7 +394,7 @@ void __hyp_text __vgic_v3_restore_aprs(struct vgic_v3_cpu_if *cpu_if) } } -void __hyp_text __vgic_v3_init_lrs(void) +void __vgic_v3_init_lrs(void) { int max_lr_idx = vtr_to_max_lr_idx(read_gicreg(ICH_VTR_EL2)); int i; @@ -403,30 +403,30 @@ void __hyp_text __vgic_v3_init_lrs(void) __gic_v3_set_lr(0, i); } -u64 __hyp_text __vgic_v3_get_ich_vtr_el2(void) +u64 __vgic_v3_get_ich_vtr_el2(void) { return read_gicreg(ICH_VTR_EL2); } -u64 __hyp_text __vgic_v3_read_vmcr(void) +u64 __vgic_v3_read_vmcr(void) { return read_gicreg(ICH_VMCR_EL2); } -void __hyp_text __vgic_v3_write_vmcr(u32 vmcr) +void __vgic_v3_write_vmcr(u32 vmcr) { write_gicreg(vmcr, ICH_VMCR_EL2); } -static int __hyp_text __vgic_v3_bpr_min(void) +static int __vgic_v3_bpr_min(void) { /* See Pseudocode for VPriorityGroup */ return 8 - vtr_to_nr_pre_bits(read_gicreg(ICH_VTR_EL2)); } -static int __hyp_text __vgic_v3_get_group(struct kvm_vcpu *vcpu) +static int __vgic_v3_get_group(struct kvm_vcpu *vcpu) { - u32 esr = kvm_vcpu_get_hsr(vcpu); + u32 esr = kvm_vcpu_get_esr(vcpu); u8 crm = (esr & ESR_ELx_SYS64_ISS_CRM_MASK) >> ESR_ELx_SYS64_ISS_CRM_SHIFT; return crm != 8; @@ -434,9 +434,8 @@ static int __hyp_text __vgic_v3_get_group(struct kvm_vcpu *vcpu) #define GICv3_IDLE_PRIORITY 0xff -static int __hyp_text __vgic_v3_highest_priority_lr(struct kvm_vcpu *vcpu, - u32 vmcr, - u64 *lr_val) +static int __vgic_v3_highest_priority_lr(struct kvm_vcpu *vcpu, u32 vmcr, + u64 *lr_val) { unsigned int used_lrs = vcpu->arch.vgic_cpu.vgic_v3.used_lrs; u8 priority = GICv3_IDLE_PRIORITY; @@ -474,8 +473,8 @@ static int __hyp_text __vgic_v3_highest_priority_lr(struct kvm_vcpu *vcpu, return lr; } -static int __hyp_text __vgic_v3_find_active_lr(struct kvm_vcpu *vcpu, - int intid, u64 *lr_val) +static int __vgic_v3_find_active_lr(struct kvm_vcpu *vcpu, int intid, + u64 *lr_val) { unsigned int used_lrs = vcpu->arch.vgic_cpu.vgic_v3.used_lrs; int i; @@ -494,7 +493,7 @@ static int __hyp_text __vgic_v3_find_active_lr(struct kvm_vcpu *vcpu, return -1; } -static int __hyp_text __vgic_v3_get_highest_active_priority(void) +static int __vgic_v3_get_highest_active_priority(void) { u8 nr_apr_regs = vtr_to_nr_apr_regs(read_gicreg(ICH_VTR_EL2)); u32 hap = 0; @@ -526,12 +525,12 @@ static int __hyp_text __vgic_v3_get_highest_active_priority(void) return GICv3_IDLE_PRIORITY; } -static unsigned int __hyp_text __vgic_v3_get_bpr0(u32 vmcr) +static unsigned int __vgic_v3_get_bpr0(u32 vmcr) { return (vmcr & ICH_VMCR_BPR0_MASK) >> ICH_VMCR_BPR0_SHIFT; } -static unsigned int __hyp_text __vgic_v3_get_bpr1(u32 vmcr) +static unsigned int __vgic_v3_get_bpr1(u32 vmcr) { unsigned int bpr; @@ -550,7 +549,7 @@ static unsigned int __hyp_text __vgic_v3_get_bpr1(u32 vmcr) * Convert a priority to a preemption level, taking the relevant BPR * into account by zeroing the sub-priority bits. */ -static u8 __hyp_text __vgic_v3_pri_to_pre(u8 pri, u32 vmcr, int grp) +static u8 __vgic_v3_pri_to_pre(u8 pri, u32 vmcr, int grp) { unsigned int bpr; @@ -568,7 +567,7 @@ static u8 __hyp_text __vgic_v3_pri_to_pre(u8 pri, u32 vmcr, int grp) * matter what the guest does with its BPR, we can always set/get the * same value of a priority. */ -static void __hyp_text __vgic_v3_set_active_priority(u8 pri, u32 vmcr, int grp) +static void __vgic_v3_set_active_priority(u8 pri, u32 vmcr, int grp) { u8 pre, ap; u32 val; @@ -587,7 +586,7 @@ static void __hyp_text __vgic_v3_set_active_priority(u8 pri, u32 vmcr, int grp) } } -static int __hyp_text __vgic_v3_clear_highest_active_priority(void) +static int __vgic_v3_clear_highest_active_priority(void) { u8 nr_apr_regs = vtr_to_nr_apr_regs(read_gicreg(ICH_VTR_EL2)); u32 hap = 0; @@ -625,7 +624,7 @@ static int __hyp_text __vgic_v3_clear_highest_active_priority(void) return GICv3_IDLE_PRIORITY; } -static void __hyp_text __vgic_v3_read_iar(struct kvm_vcpu *vcpu, u32 vmcr, int rt) +static void __vgic_v3_read_iar(struct kvm_vcpu *vcpu, u32 vmcr, int rt) { u64 lr_val; u8 lr_prio, pmr; @@ -661,7 +660,7 @@ spurious: vcpu_set_reg(vcpu, rt, ICC_IAR1_EL1_SPURIOUS); } -static void __hyp_text __vgic_v3_clear_active_lr(int lr, u64 lr_val) +static void __vgic_v3_clear_active_lr(int lr, u64 lr_val) { lr_val &= ~ICH_LR_ACTIVE_BIT; if (lr_val & ICH_LR_HW) { @@ -674,7 +673,7 @@ static void __hyp_text __vgic_v3_clear_active_lr(int lr, u64 lr_val) __gic_v3_set_lr(lr_val, lr); } -static void __hyp_text __vgic_v3_bump_eoicount(void) +static void __vgic_v3_bump_eoicount(void) { u32 hcr; @@ -683,8 +682,7 @@ static void __hyp_text __vgic_v3_bump_eoicount(void) write_gicreg(hcr, ICH_HCR_EL2); } -static void __hyp_text __vgic_v3_write_dir(struct kvm_vcpu *vcpu, - u32 vmcr, int rt) +static void __vgic_v3_write_dir(struct kvm_vcpu *vcpu, u32 vmcr, int rt) { u32 vid = vcpu_get_reg(vcpu, rt); u64 lr_val; @@ -707,7 +705,7 @@ static void __hyp_text __vgic_v3_write_dir(struct kvm_vcpu *vcpu, __vgic_v3_clear_active_lr(lr, lr_val); } -static void __hyp_text __vgic_v3_write_eoir(struct kvm_vcpu *vcpu, u32 vmcr, int rt) +static void __vgic_v3_write_eoir(struct kvm_vcpu *vcpu, u32 vmcr, int rt) { u32 vid = vcpu_get_reg(vcpu, rt); u64 lr_val; @@ -744,17 +742,17 @@ static void __hyp_text __vgic_v3_write_eoir(struct kvm_vcpu *vcpu, u32 vmcr, int __vgic_v3_clear_active_lr(lr, lr_val); } -static void __hyp_text __vgic_v3_read_igrpen0(struct kvm_vcpu *vcpu, u32 vmcr, int rt) +static void __vgic_v3_read_igrpen0(struct kvm_vcpu *vcpu, u32 vmcr, int rt) { vcpu_set_reg(vcpu, rt, !!(vmcr & ICH_VMCR_ENG0_MASK)); } -static void __hyp_text __vgic_v3_read_igrpen1(struct kvm_vcpu *vcpu, u32 vmcr, int rt) +static void __vgic_v3_read_igrpen1(struct kvm_vcpu *vcpu, u32 vmcr, int rt) { vcpu_set_reg(vcpu, rt, !!(vmcr & ICH_VMCR_ENG1_MASK)); } -static void __hyp_text __vgic_v3_write_igrpen0(struct kvm_vcpu *vcpu, u32 vmcr, int rt) +static void __vgic_v3_write_igrpen0(struct kvm_vcpu *vcpu, u32 vmcr, int rt) { u64 val = vcpu_get_reg(vcpu, rt); @@ -766,7 +764,7 @@ static void __hyp_text __vgic_v3_write_igrpen0(struct kvm_vcpu *vcpu, u32 vmcr, __vgic_v3_write_vmcr(vmcr); } -static void __hyp_text __vgic_v3_write_igrpen1(struct kvm_vcpu *vcpu, u32 vmcr, int rt) +static void __vgic_v3_write_igrpen1(struct kvm_vcpu *vcpu, u32 vmcr, int rt) { u64 val = vcpu_get_reg(vcpu, rt); @@ -778,17 +776,17 @@ static void __hyp_text __vgic_v3_write_igrpen1(struct kvm_vcpu *vcpu, u32 vmcr, __vgic_v3_write_vmcr(vmcr); } -static void __hyp_text __vgic_v3_read_bpr0(struct kvm_vcpu *vcpu, u32 vmcr, int rt) +static void __vgic_v3_read_bpr0(struct kvm_vcpu *vcpu, u32 vmcr, int rt) { vcpu_set_reg(vcpu, rt, __vgic_v3_get_bpr0(vmcr)); } -static void __hyp_text __vgic_v3_read_bpr1(struct kvm_vcpu *vcpu, u32 vmcr, int rt) +static void __vgic_v3_read_bpr1(struct kvm_vcpu *vcpu, u32 vmcr, int rt) { vcpu_set_reg(vcpu, rt, __vgic_v3_get_bpr1(vmcr)); } -static void __hyp_text __vgic_v3_write_bpr0(struct kvm_vcpu *vcpu, u32 vmcr, int rt) +static void __vgic_v3_write_bpr0(struct kvm_vcpu *vcpu, u32 vmcr, int rt) { u64 val = vcpu_get_reg(vcpu, rt); u8 bpr_min = __vgic_v3_bpr_min() - 1; @@ -805,7 +803,7 @@ static void __hyp_text __vgic_v3_write_bpr0(struct kvm_vcpu *vcpu, u32 vmcr, int __vgic_v3_write_vmcr(vmcr); } -static void __hyp_text __vgic_v3_write_bpr1(struct kvm_vcpu *vcpu, u32 vmcr, int rt) +static void __vgic_v3_write_bpr1(struct kvm_vcpu *vcpu, u32 vmcr, int rt) { u64 val = vcpu_get_reg(vcpu, rt); u8 bpr_min = __vgic_v3_bpr_min(); @@ -825,7 +823,7 @@ static void __hyp_text __vgic_v3_write_bpr1(struct kvm_vcpu *vcpu, u32 vmcr, int __vgic_v3_write_vmcr(vmcr); } -static void __hyp_text __vgic_v3_read_apxrn(struct kvm_vcpu *vcpu, int rt, int n) +static void __vgic_v3_read_apxrn(struct kvm_vcpu *vcpu, int rt, int n) { u32 val; @@ -837,7 +835,7 @@ static void __hyp_text __vgic_v3_read_apxrn(struct kvm_vcpu *vcpu, int rt, int n vcpu_set_reg(vcpu, rt, val); } -static void __hyp_text __vgic_v3_write_apxrn(struct kvm_vcpu *vcpu, int rt, int n) +static void __vgic_v3_write_apxrn(struct kvm_vcpu *vcpu, int rt, int n) { u32 val = vcpu_get_reg(vcpu, rt); @@ -847,56 +845,49 @@ static void __hyp_text __vgic_v3_write_apxrn(struct kvm_vcpu *vcpu, int rt, int __vgic_v3_write_ap1rn(val, n); } -static void __hyp_text __vgic_v3_read_apxr0(struct kvm_vcpu *vcpu, +static void __vgic_v3_read_apxr0(struct kvm_vcpu *vcpu, u32 vmcr, int rt) { __vgic_v3_read_apxrn(vcpu, rt, 0); } -static void __hyp_text __vgic_v3_read_apxr1(struct kvm_vcpu *vcpu, +static void __vgic_v3_read_apxr1(struct kvm_vcpu *vcpu, u32 vmcr, int rt) { __vgic_v3_read_apxrn(vcpu, rt, 1); } -static void __hyp_text __vgic_v3_read_apxr2(struct kvm_vcpu *vcpu, - u32 vmcr, int rt) +static void __vgic_v3_read_apxr2(struct kvm_vcpu *vcpu, u32 vmcr, int rt) { __vgic_v3_read_apxrn(vcpu, rt, 2); } -static void __hyp_text __vgic_v3_read_apxr3(struct kvm_vcpu *vcpu, - u32 vmcr, int rt) +static void __vgic_v3_read_apxr3(struct kvm_vcpu *vcpu, u32 vmcr, int rt) { __vgic_v3_read_apxrn(vcpu, rt, 3); } -static void __hyp_text __vgic_v3_write_apxr0(struct kvm_vcpu *vcpu, - u32 vmcr, int rt) +static void __vgic_v3_write_apxr0(struct kvm_vcpu *vcpu, u32 vmcr, int rt) { __vgic_v3_write_apxrn(vcpu, rt, 0); } -static void __hyp_text __vgic_v3_write_apxr1(struct kvm_vcpu *vcpu, - u32 vmcr, int rt) +static void __vgic_v3_write_apxr1(struct kvm_vcpu *vcpu, u32 vmcr, int rt) { __vgic_v3_write_apxrn(vcpu, rt, 1); } -static void __hyp_text __vgic_v3_write_apxr2(struct kvm_vcpu *vcpu, - u32 vmcr, int rt) +static void __vgic_v3_write_apxr2(struct kvm_vcpu *vcpu, u32 vmcr, int rt) { __vgic_v3_write_apxrn(vcpu, rt, 2); } -static void __hyp_text __vgic_v3_write_apxr3(struct kvm_vcpu *vcpu, - u32 vmcr, int rt) +static void __vgic_v3_write_apxr3(struct kvm_vcpu *vcpu, u32 vmcr, int rt) { __vgic_v3_write_apxrn(vcpu, rt, 3); } -static void __hyp_text __vgic_v3_read_hppir(struct kvm_vcpu *vcpu, - u32 vmcr, int rt) +static void __vgic_v3_read_hppir(struct kvm_vcpu *vcpu, u32 vmcr, int rt) { u64 lr_val; int lr, lr_grp, grp; @@ -915,16 +906,14 @@ spurious: vcpu_set_reg(vcpu, rt, lr_val & ICH_LR_VIRTUAL_ID_MASK); } -static void __hyp_text __vgic_v3_read_pmr(struct kvm_vcpu *vcpu, - u32 vmcr, int rt) +static void __vgic_v3_read_pmr(struct kvm_vcpu *vcpu, u32 vmcr, int rt) { vmcr &= ICH_VMCR_PMR_MASK; vmcr >>= ICH_VMCR_PMR_SHIFT; vcpu_set_reg(vcpu, rt, vmcr); } -static void __hyp_text __vgic_v3_write_pmr(struct kvm_vcpu *vcpu, - u32 vmcr, int rt) +static void __vgic_v3_write_pmr(struct kvm_vcpu *vcpu, u32 vmcr, int rt) { u32 val = vcpu_get_reg(vcpu, rt); @@ -936,15 +925,13 @@ static void __hyp_text __vgic_v3_write_pmr(struct kvm_vcpu *vcpu, write_gicreg(vmcr, ICH_VMCR_EL2); } -static void __hyp_text __vgic_v3_read_rpr(struct kvm_vcpu *vcpu, - u32 vmcr, int rt) +static void __vgic_v3_read_rpr(struct kvm_vcpu *vcpu, u32 vmcr, int rt) { u32 val = __vgic_v3_get_highest_active_priority(); vcpu_set_reg(vcpu, rt, val); } -static void __hyp_text __vgic_v3_read_ctlr(struct kvm_vcpu *vcpu, - u32 vmcr, int rt) +static void __vgic_v3_read_ctlr(struct kvm_vcpu *vcpu, u32 vmcr, int rt) { u32 vtr, val; @@ -965,8 +952,7 @@ static void __hyp_text __vgic_v3_read_ctlr(struct kvm_vcpu *vcpu, vcpu_set_reg(vcpu, rt, val); } -static void __hyp_text __vgic_v3_write_ctlr(struct kvm_vcpu *vcpu, - u32 vmcr, int rt) +static void __vgic_v3_write_ctlr(struct kvm_vcpu *vcpu, u32 vmcr, int rt) { u32 val = vcpu_get_reg(vcpu, rt); @@ -983,7 +969,7 @@ static void __hyp_text __vgic_v3_write_ctlr(struct kvm_vcpu *vcpu, write_gicreg(vmcr, ICH_VMCR_EL2); } -int __hyp_text __vgic_v3_perform_cpuif_access(struct kvm_vcpu *vcpu) +int __vgic_v3_perform_cpuif_access(struct kvm_vcpu *vcpu) { int rt; u32 esr; @@ -992,7 +978,7 @@ int __hyp_text __vgic_v3_perform_cpuif_access(struct kvm_vcpu *vcpu) bool is_read; u32 sysreg; - esr = kvm_vcpu_get_hsr(vcpu); + esr = kvm_vcpu_get_esr(vcpu); if (vcpu_mode_is_32bit(vcpu)) { if (!kvm_condition_valid(vcpu)) { __kvm_skip_instr(vcpu); diff --git a/arch/arm64/kvm/hyp/vhe/Makefile b/arch/arm64/kvm/hyp/vhe/Makefile new file mode 100644 index 000000000000..461e97c375cc --- /dev/null +++ b/arch/arm64/kvm/hyp/vhe/Makefile @@ -0,0 +1,11 @@ +# SPDX-License-Identifier: GPL-2.0 +# +# Makefile for Kernel-based Virtual Machine module, HYP/nVHE part +# + +asflags-y := -D__KVM_VHE_HYPERVISOR__ +ccflags-y := -D__KVM_VHE_HYPERVISOR__ + +obj-y := timer-sr.o sysreg-sr.o debug-sr.o switch.o tlb.o +obj-y += ../vgic-v3-sr.o ../aarch32.o ../vgic-v2-cpuif-proxy.o ../entry.o \ + ../fpsimd.o ../hyp-entry.o diff --git a/arch/arm64/kvm/hyp/vhe/debug-sr.c b/arch/arm64/kvm/hyp/vhe/debug-sr.c new file mode 100644 index 000000000000..f1e2e5a00933 --- /dev/null +++ b/arch/arm64/kvm/hyp/vhe/debug-sr.c @@ -0,0 +1,26 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Copyright (C) 2015 - ARM Ltd + * Author: Marc Zyngier <marc.zyngier@arm.com> + */ + +#include <hyp/debug-sr.h> + +#include <linux/kvm_host.h> + +#include <asm/kvm_hyp.h> + +void __debug_switch_to_guest(struct kvm_vcpu *vcpu) +{ + __debug_switch_to_guest_common(vcpu); +} + +void __debug_switch_to_host(struct kvm_vcpu *vcpu) +{ + __debug_switch_to_host_common(vcpu); +} + +u32 __kvm_get_mdcr_el2(void) +{ + return read_sysreg(mdcr_el2); +} diff --git a/arch/arm64/kvm/hyp/vhe/switch.c b/arch/arm64/kvm/hyp/vhe/switch.c new file mode 100644 index 000000000000..c52d714e0d75 --- /dev/null +++ b/arch/arm64/kvm/hyp/vhe/switch.c @@ -0,0 +1,219 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Copyright (C) 2015 - ARM Ltd + * Author: Marc Zyngier <marc.zyngier@arm.com> + */ + +#include <hyp/switch.h> + +#include <linux/arm-smccc.h> +#include <linux/kvm_host.h> +#include <linux/types.h> +#include <linux/jump_label.h> +#include <uapi/linux/psci.h> + +#include <kvm/arm_psci.h> + +#include <asm/barrier.h> +#include <asm/cpufeature.h> +#include <asm/kprobes.h> +#include <asm/kvm_asm.h> +#include <asm/kvm_emulate.h> +#include <asm/kvm_hyp.h> +#include <asm/kvm_mmu.h> +#include <asm/fpsimd.h> +#include <asm/debug-monitors.h> +#include <asm/processor.h> +#include <asm/thread_info.h> + +const char __hyp_panic_string[] = "HYP panic:\nPS:%08llx PC:%016llx ESR:%08llx\nFAR:%016llx HPFAR:%016llx PAR:%016llx\nVCPU:%p\n"; + +static void __activate_traps(struct kvm_vcpu *vcpu) +{ + u64 val; + + ___activate_traps(vcpu); + + val = read_sysreg(cpacr_el1); + val |= CPACR_EL1_TTA; + val &= ~CPACR_EL1_ZEN; + + /* + * With VHE (HCR.E2H == 1), accesses to CPACR_EL1 are routed to + * CPTR_EL2. In general, CPACR_EL1 has the same layout as CPTR_EL2, + * except for some missing controls, such as TAM. + * In this case, CPTR_EL2.TAM has the same position with or without + * VHE (HCR.E2H == 1) which allows us to use here the CPTR_EL2.TAM + * shift value for trapping the AMU accesses. + */ + + val |= CPTR_EL2_TAM; + + if (update_fp_enabled(vcpu)) { + if (vcpu_has_sve(vcpu)) + val |= CPACR_EL1_ZEN; + } else { + val &= ~CPACR_EL1_FPEN; + __activate_traps_fpsimd32(vcpu); + } + + write_sysreg(val, cpacr_el1); + + write_sysreg(kvm_get_hyp_vector(), vbar_el1); +} +NOKPROBE_SYMBOL(__activate_traps); + +static void __deactivate_traps(struct kvm_vcpu *vcpu) +{ + extern char vectors[]; /* kernel exception vectors */ + + ___deactivate_traps(vcpu); + + write_sysreg(HCR_HOST_VHE_FLAGS, hcr_el2); + + /* + * ARM errata 1165522 and 1530923 require the actual execution of the + * above before we can switch to the EL2/EL0 translation regime used by + * the host. + */ + asm(ALTERNATIVE("nop", "isb", ARM64_WORKAROUND_SPECULATIVE_AT)); + + write_sysreg(CPACR_EL1_DEFAULT, cpacr_el1); + write_sysreg(vectors, vbar_el1); +} +NOKPROBE_SYMBOL(__deactivate_traps); + +void activate_traps_vhe_load(struct kvm_vcpu *vcpu) +{ + __activate_traps_common(vcpu); +} + +void deactivate_traps_vhe_put(void) +{ + u64 mdcr_el2 = read_sysreg(mdcr_el2); + + mdcr_el2 &= MDCR_EL2_HPMN_MASK | + MDCR_EL2_E2PB_MASK << MDCR_EL2_E2PB_SHIFT | + MDCR_EL2_TPMS; + + write_sysreg(mdcr_el2, mdcr_el2); + + __deactivate_traps_common(); +} + +/* Switch to the guest for VHE systems running in EL2 */ +static int __kvm_vcpu_run_vhe(struct kvm_vcpu *vcpu) +{ + struct kvm_cpu_context *host_ctxt; + struct kvm_cpu_context *guest_ctxt; + u64 exit_code; + + host_ctxt = &__hyp_this_cpu_ptr(kvm_host_data)->host_ctxt; + host_ctxt->__hyp_running_vcpu = vcpu; + guest_ctxt = &vcpu->arch.ctxt; + + sysreg_save_host_state_vhe(host_ctxt); + + /* + * ARM erratum 1165522 requires us to configure both stage 1 and + * stage 2 translation for the guest context before we clear + * HCR_EL2.TGE. + * + * We have already configured the guest's stage 1 translation in + * kvm_vcpu_load_sysregs_vhe above. We must now call __activate_vm + * before __activate_traps, because __activate_vm configures + * stage 2 translation, and __activate_traps clear HCR_EL2.TGE + * (among other things). + */ + __activate_vm(vcpu->arch.hw_mmu); + __activate_traps(vcpu); + + sysreg_restore_guest_state_vhe(guest_ctxt); + __debug_switch_to_guest(vcpu); + + __set_guest_arch_workaround_state(vcpu); + + do { + /* Jump in the fire! */ + exit_code = __guest_enter(vcpu, host_ctxt); + + /* And we're baaack! */ + } while (fixup_guest_exit(vcpu, &exit_code)); + + __set_host_arch_workaround_state(vcpu); + + sysreg_save_guest_state_vhe(guest_ctxt); + + __deactivate_traps(vcpu); + + sysreg_restore_host_state_vhe(host_ctxt); + + if (vcpu->arch.flags & KVM_ARM64_FP_ENABLED) + __fpsimd_save_fpexc32(vcpu); + + __debug_switch_to_host(vcpu); + + return exit_code; +} +NOKPROBE_SYMBOL(__kvm_vcpu_run_vhe); + +int __kvm_vcpu_run(struct kvm_vcpu *vcpu) +{ + int ret; + + local_daif_mask(); + + /* + * Having IRQs masked via PMR when entering the guest means the GIC + * will not signal the CPU of interrupts of lower priority, and the + * only way to get out will be via guest exceptions. + * Naturally, we want to avoid this. + * + * local_daif_mask() already sets GIC_PRIO_PSR_I_SET, we just need a + * dsb to ensure the redistributor is forwards EL2 IRQs to the CPU. + */ + pmr_sync(); + + ret = __kvm_vcpu_run_vhe(vcpu); + + /* + * local_daif_restore() takes care to properly restore PSTATE.DAIF + * and the GIC PMR if the host is using IRQ priorities. + */ + local_daif_restore(DAIF_PROCCTX_NOIRQ); + + /* + * When we exit from the guest we change a number of CPU configuration + * parameters, such as traps. Make sure these changes take effect + * before running the host or additional guests. + */ + isb(); + + return ret; +} + +static void __hyp_call_panic(u64 spsr, u64 elr, u64 par, + struct kvm_cpu_context *host_ctxt) +{ + struct kvm_vcpu *vcpu; + vcpu = host_ctxt->__hyp_running_vcpu; + + __deactivate_traps(vcpu); + sysreg_restore_host_state_vhe(host_ctxt); + + panic(__hyp_panic_string, + spsr, elr, + read_sysreg_el2(SYS_ESR), read_sysreg_el2(SYS_FAR), + read_sysreg(hpfar_el2), par, vcpu); +} +NOKPROBE_SYMBOL(__hyp_call_panic); + +void __noreturn hyp_panic(struct kvm_cpu_context *host_ctxt) +{ + u64 spsr = read_sysreg_el2(SYS_SPSR); + u64 elr = read_sysreg_el2(SYS_ELR); + u64 par = read_sysreg(par_el1); + + __hyp_call_panic(spsr, elr, par, host_ctxt); + unreachable(); +} diff --git a/arch/arm64/kvm/hyp/vhe/sysreg-sr.c b/arch/arm64/kvm/hyp/vhe/sysreg-sr.c new file mode 100644 index 000000000000..996471e4c138 --- /dev/null +++ b/arch/arm64/kvm/hyp/vhe/sysreg-sr.c @@ -0,0 +1,114 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Copyright (C) 2012-2015 - ARM Ltd + * Author: Marc Zyngier <marc.zyngier@arm.com> + */ + +#include <hyp/sysreg-sr.h> + +#include <linux/compiler.h> +#include <linux/kvm_host.h> + +#include <asm/kprobes.h> +#include <asm/kvm_asm.h> +#include <asm/kvm_emulate.h> +#include <asm/kvm_hyp.h> + +/* + * VHE: Host and guest must save mdscr_el1 and sp_el0 (and the PC and + * pstate, which are handled as part of the el2 return state) on every + * switch (sp_el0 is being dealt with in the assembly code). + * tpidr_el0 and tpidrro_el0 only need to be switched when going + * to host userspace or a different VCPU. EL1 registers only need to be + * switched when potentially going to run a different VCPU. The latter two + * classes are handled as part of kvm_arch_vcpu_load and kvm_arch_vcpu_put. + */ + +void sysreg_save_host_state_vhe(struct kvm_cpu_context *ctxt) +{ + __sysreg_save_common_state(ctxt); +} +NOKPROBE_SYMBOL(sysreg_save_host_state_vhe); + +void sysreg_save_guest_state_vhe(struct kvm_cpu_context *ctxt) +{ + __sysreg_save_common_state(ctxt); + __sysreg_save_el2_return_state(ctxt); +} +NOKPROBE_SYMBOL(sysreg_save_guest_state_vhe); + +void sysreg_restore_host_state_vhe(struct kvm_cpu_context *ctxt) +{ + __sysreg_restore_common_state(ctxt); +} +NOKPROBE_SYMBOL(sysreg_restore_host_state_vhe); + +void sysreg_restore_guest_state_vhe(struct kvm_cpu_context *ctxt) +{ + __sysreg_restore_common_state(ctxt); + __sysreg_restore_el2_return_state(ctxt); +} +NOKPROBE_SYMBOL(sysreg_restore_guest_state_vhe); + +/** + * kvm_vcpu_load_sysregs_vhe - Load guest system registers to the physical CPU + * + * @vcpu: The VCPU pointer + * + * Load system registers that do not affect the host's execution, for + * example EL1 system registers on a VHE system where the host kernel + * runs at EL2. This function is called from KVM's vcpu_load() function + * and loading system register state early avoids having to load them on + * every entry to the VM. + */ +void kvm_vcpu_load_sysregs_vhe(struct kvm_vcpu *vcpu) +{ + struct kvm_cpu_context *guest_ctxt = &vcpu->arch.ctxt; + struct kvm_cpu_context *host_ctxt; + + host_ctxt = &__hyp_this_cpu_ptr(kvm_host_data)->host_ctxt; + __sysreg_save_user_state(host_ctxt); + + /* + * Load guest EL1 and user state + * + * We must restore the 32-bit state before the sysregs, thanks + * to erratum #852523 (Cortex-A57) or #853709 (Cortex-A72). + */ + __sysreg32_restore_state(vcpu); + __sysreg_restore_user_state(guest_ctxt); + __sysreg_restore_el1_state(guest_ctxt); + + vcpu->arch.sysregs_loaded_on_cpu = true; + + activate_traps_vhe_load(vcpu); +} + +/** + * kvm_vcpu_put_sysregs_vhe - Restore host system registers to the physical CPU + * + * @vcpu: The VCPU pointer + * + * Save guest system registers that do not affect the host's execution, for + * example EL1 system registers on a VHE system where the host kernel + * runs at EL2. This function is called from KVM's vcpu_put() function + * and deferring saving system register state until we're no longer running the + * VCPU avoids having to save them on every exit from the VM. + */ +void kvm_vcpu_put_sysregs_vhe(struct kvm_vcpu *vcpu) +{ + struct kvm_cpu_context *guest_ctxt = &vcpu->arch.ctxt; + struct kvm_cpu_context *host_ctxt; + + host_ctxt = &__hyp_this_cpu_ptr(kvm_host_data)->host_ctxt; + deactivate_traps_vhe_put(); + + __sysreg_save_el1_state(guest_ctxt); + __sysreg_save_user_state(guest_ctxt); + __sysreg32_save_state(vcpu); + + /* Restore host user state */ + __sysreg_restore_user_state(host_ctxt); + + vcpu->arch.sysregs_loaded_on_cpu = false; +} diff --git a/arch/arm64/kvm/hyp/vhe/timer-sr.c b/arch/arm64/kvm/hyp/vhe/timer-sr.c new file mode 100644 index 000000000000..4cda674a8be6 --- /dev/null +++ b/arch/arm64/kvm/hyp/vhe/timer-sr.c @@ -0,0 +1,12 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Copyright (C) 2012-2015 - ARM Ltd + * Author: Marc Zyngier <marc.zyngier@arm.com> + */ + +#include <asm/kvm_hyp.h> + +void __kvm_timer_set_cntvoff(u64 cntvoff) +{ + write_sysreg(cntvoff, cntvoff_el2); +} diff --git a/arch/arm64/kvm/hyp/vhe/tlb.c b/arch/arm64/kvm/hyp/vhe/tlb.c new file mode 100644 index 000000000000..fd7895945bbc --- /dev/null +++ b/arch/arm64/kvm/hyp/vhe/tlb.c @@ -0,0 +1,162 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Copyright (C) 2015 - ARM Ltd + * Author: Marc Zyngier <marc.zyngier@arm.com> + */ + +#include <linux/irqflags.h> + +#include <asm/kvm_hyp.h> +#include <asm/kvm_mmu.h> +#include <asm/tlbflush.h> + +struct tlb_inv_context { + unsigned long flags; + u64 tcr; + u64 sctlr; +}; + +static void __tlb_switch_to_guest(struct kvm_s2_mmu *mmu, + struct tlb_inv_context *cxt) +{ + u64 val; + + local_irq_save(cxt->flags); + + if (cpus_have_final_cap(ARM64_WORKAROUND_SPECULATIVE_AT)) { + /* + * For CPUs that are affected by ARM errata 1165522 or 1530923, + * we cannot trust stage-1 to be in a correct state at that + * point. Since we do not want to force a full load of the + * vcpu state, we prevent the EL1 page-table walker to + * allocate new TLBs. This is done by setting the EPD bits + * in the TCR_EL1 register. We also need to prevent it to + * allocate IPA->PA walks, so we enable the S1 MMU... + */ + val = cxt->tcr = read_sysreg_el1(SYS_TCR); + val |= TCR_EPD1_MASK | TCR_EPD0_MASK; + write_sysreg_el1(val, SYS_TCR); + val = cxt->sctlr = read_sysreg_el1(SYS_SCTLR); + val |= SCTLR_ELx_M; + write_sysreg_el1(val, SYS_SCTLR); + } + + /* + * With VHE enabled, we have HCR_EL2.{E2H,TGE} = {1,1}, and + * most TLB operations target EL2/EL0. In order to affect the + * guest TLBs (EL1/EL0), we need to change one of these two + * bits. Changing E2H is impossible (goodbye TTBR1_EL2), so + * let's flip TGE before executing the TLB operation. + * + * ARM erratum 1165522 requires some special handling (again), + * as we need to make sure both stages of translation are in + * place before clearing TGE. __load_guest_stage2() already + * has an ISB in order to deal with this. + */ + __load_guest_stage2(mmu); + val = read_sysreg(hcr_el2); + val &= ~HCR_TGE; + write_sysreg(val, hcr_el2); + isb(); +} + +static void __tlb_switch_to_host(struct tlb_inv_context *cxt) +{ + /* + * We're done with the TLB operation, let's restore the host's + * view of HCR_EL2. + */ + write_sysreg(0, vttbr_el2); + write_sysreg(HCR_HOST_VHE_FLAGS, hcr_el2); + isb(); + + if (cpus_have_final_cap(ARM64_WORKAROUND_SPECULATIVE_AT)) { + /* Restore the registers to what they were */ + write_sysreg_el1(cxt->tcr, SYS_TCR); + write_sysreg_el1(cxt->sctlr, SYS_SCTLR); + } + + local_irq_restore(cxt->flags); +} + +void __kvm_tlb_flush_vmid_ipa(struct kvm_s2_mmu *mmu, + phys_addr_t ipa, int level) +{ + struct tlb_inv_context cxt; + + dsb(ishst); + + /* Switch to requested VMID */ + __tlb_switch_to_guest(mmu, &cxt); + + /* + * We could do so much better if we had the VA as well. + * Instead, we invalidate Stage-2 for this IPA, and the + * whole of Stage-1. Weep... + */ + ipa >>= 12; + __tlbi_level(ipas2e1is, ipa, level); + + /* + * We have to ensure completion of the invalidation at Stage-2, + * since a table walk on another CPU could refill a TLB with a + * complete (S1 + S2) walk based on the old Stage-2 mapping if + * the Stage-1 invalidation happened first. + */ + dsb(ish); + __tlbi(vmalle1is); + dsb(ish); + isb(); + + __tlb_switch_to_host(&cxt); +} + +void __kvm_tlb_flush_vmid(struct kvm_s2_mmu *mmu) +{ + struct tlb_inv_context cxt; + + dsb(ishst); + + /* Switch to requested VMID */ + __tlb_switch_to_guest(mmu, &cxt); + + __tlbi(vmalls12e1is); + dsb(ish); + isb(); + + __tlb_switch_to_host(&cxt); +} + +void __kvm_tlb_flush_local_vmid(struct kvm_s2_mmu *mmu) +{ + struct tlb_inv_context cxt; + + /* Switch to requested VMID */ + __tlb_switch_to_guest(mmu, &cxt); + + __tlbi(vmalle1); + dsb(nsh); + isb(); + + __tlb_switch_to_host(&cxt); +} + +void __kvm_flush_vm_context(void) +{ + dsb(ishst); + __tlbi(alle1is); + + /* + * VIPT and PIPT caches are not affected by VMID, so no maintenance + * is necessary across a VMID rollover. + * + * VPIPT caches constrain lookup and maintenance to the active VMID, + * so we need to invalidate lines with a stale VMID to avoid an ABA + * race after multiple rollovers. + * + */ + if (icache_is_vpipt()) + asm volatile("ic ialluis"); + + dsb(ish); +} diff --git a/arch/arm64/kvm/inject_fault.c b/arch/arm64/kvm/inject_fault.c index e21fdd93027a..ebfdfc27b2bd 100644 --- a/arch/arm64/kvm/inject_fault.c +++ b/arch/arm64/kvm/inject_fault.c @@ -64,7 +64,7 @@ static void enter_exception64(struct kvm_vcpu *vcpu, unsigned long target_mode, case PSR_MODE_EL1h: vbar = vcpu_read_sys_reg(vcpu, VBAR_EL1); sctlr = vcpu_read_sys_reg(vcpu, SCTLR_EL1); - vcpu_write_elr_el1(vcpu, *vcpu_pc(vcpu)); + vcpu_write_sys_reg(vcpu, *vcpu_pc(vcpu), ELR_EL1); break; default: /* Don't do that */ diff --git a/arch/arm64/kvm/mmio.c b/arch/arm64/kvm/mmio.c index 158fbe682611..6a2826f1bf5e 100644 --- a/arch/arm64/kvm/mmio.c +++ b/arch/arm64/kvm/mmio.c @@ -146,12 +146,6 @@ int io_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa) return -ENOSYS; } - /* Page table accesses IO mem: tell guest to fix its TTBR */ - if (kvm_vcpu_dabt_iss1tw(vcpu)) { - kvm_inject_dabt(vcpu, kvm_vcpu_get_hfar(vcpu)); - return 1; - } - /* * Prepare MMIO operation. First decode the syndrome data we get * from the CPU. Then try if some in-kernel emulation feels diff --git a/arch/arm64/kvm/mmu.c b/arch/arm64/kvm/mmu.c index 7a7ddc4558a7..0121ef2c7c8d 100644 --- a/arch/arm64/kvm/mmu.c +++ b/arch/arm64/kvm/mmu.c @@ -55,12 +55,13 @@ static bool memslot_is_logging(struct kvm_memory_slot *memslot) */ void kvm_flush_remote_tlbs(struct kvm *kvm) { - kvm_call_hyp(__kvm_tlb_flush_vmid, kvm); + kvm_call_hyp(__kvm_tlb_flush_vmid, &kvm->arch.mmu); } -static void kvm_tlb_flush_vmid_ipa(struct kvm *kvm, phys_addr_t ipa) +static void kvm_tlb_flush_vmid_ipa(struct kvm_s2_mmu *mmu, phys_addr_t ipa, + int level) { - kvm_call_hyp(__kvm_tlb_flush_vmid_ipa, kvm, ipa); + kvm_call_hyp(__kvm_tlb_flush_vmid_ipa, mmu, ipa, level); } /* @@ -90,74 +91,80 @@ static bool kvm_is_device_pfn(unsigned long pfn) /** * stage2_dissolve_pmd() - clear and flush huge PMD entry - * @kvm: pointer to kvm structure. + * @mmu: pointer to mmu structure to operate on * @addr: IPA * @pmd: pmd pointer for IPA * * Function clears a PMD entry, flushes addr 1st and 2nd stage TLBs. */ -static void stage2_dissolve_pmd(struct kvm *kvm, phys_addr_t addr, pmd_t *pmd) +static void stage2_dissolve_pmd(struct kvm_s2_mmu *mmu, phys_addr_t addr, pmd_t *pmd) { if (!pmd_thp_or_huge(*pmd)) return; pmd_clear(pmd); - kvm_tlb_flush_vmid_ipa(kvm, addr); + kvm_tlb_flush_vmid_ipa(mmu, addr, S2_PMD_LEVEL); put_page(virt_to_page(pmd)); } /** * stage2_dissolve_pud() - clear and flush huge PUD entry - * @kvm: pointer to kvm structure. + * @mmu: pointer to mmu structure to operate on * @addr: IPA * @pud: pud pointer for IPA * * Function clears a PUD entry, flushes addr 1st and 2nd stage TLBs. */ -static void stage2_dissolve_pud(struct kvm *kvm, phys_addr_t addr, pud_t *pudp) +static void stage2_dissolve_pud(struct kvm_s2_mmu *mmu, phys_addr_t addr, pud_t *pudp) { + struct kvm *kvm = mmu->kvm; + if (!stage2_pud_huge(kvm, *pudp)) return; stage2_pud_clear(kvm, pudp); - kvm_tlb_flush_vmid_ipa(kvm, addr); + kvm_tlb_flush_vmid_ipa(mmu, addr, S2_PUD_LEVEL); put_page(virt_to_page(pudp)); } -static void clear_stage2_pgd_entry(struct kvm *kvm, pgd_t *pgd, phys_addr_t addr) +static void clear_stage2_pgd_entry(struct kvm_s2_mmu *mmu, pgd_t *pgd, phys_addr_t addr) { + struct kvm *kvm = mmu->kvm; p4d_t *p4d_table __maybe_unused = stage2_p4d_offset(kvm, pgd, 0UL); stage2_pgd_clear(kvm, pgd); - kvm_tlb_flush_vmid_ipa(kvm, addr); + kvm_tlb_flush_vmid_ipa(mmu, addr, S2_NO_LEVEL_HINT); stage2_p4d_free(kvm, p4d_table); put_page(virt_to_page(pgd)); } -static void clear_stage2_p4d_entry(struct kvm *kvm, p4d_t *p4d, phys_addr_t addr) +static void clear_stage2_p4d_entry(struct kvm_s2_mmu *mmu, p4d_t *p4d, phys_addr_t addr) { + struct kvm *kvm = mmu->kvm; pud_t *pud_table __maybe_unused = stage2_pud_offset(kvm, p4d, 0); stage2_p4d_clear(kvm, p4d); - kvm_tlb_flush_vmid_ipa(kvm, addr); + kvm_tlb_flush_vmid_ipa(mmu, addr, S2_NO_LEVEL_HINT); stage2_pud_free(kvm, pud_table); put_page(virt_to_page(p4d)); } -static void clear_stage2_pud_entry(struct kvm *kvm, pud_t *pud, phys_addr_t addr) +static void clear_stage2_pud_entry(struct kvm_s2_mmu *mmu, pud_t *pud, phys_addr_t addr) { + struct kvm *kvm = mmu->kvm; pmd_t *pmd_table __maybe_unused = stage2_pmd_offset(kvm, pud, 0); + VM_BUG_ON(stage2_pud_huge(kvm, *pud)); stage2_pud_clear(kvm, pud); - kvm_tlb_flush_vmid_ipa(kvm, addr); + kvm_tlb_flush_vmid_ipa(mmu, addr, S2_NO_LEVEL_HINT); stage2_pmd_free(kvm, pmd_table); put_page(virt_to_page(pud)); } -static void clear_stage2_pmd_entry(struct kvm *kvm, pmd_t *pmd, phys_addr_t addr) +static void clear_stage2_pmd_entry(struct kvm_s2_mmu *mmu, pmd_t *pmd, phys_addr_t addr) { pte_t *pte_table = pte_offset_kernel(pmd, 0); VM_BUG_ON(pmd_thp_or_huge(*pmd)); pmd_clear(pmd); - kvm_tlb_flush_vmid_ipa(kvm, addr); + kvm_tlb_flush_vmid_ipa(mmu, addr, S2_NO_LEVEL_HINT); free_page((unsigned long)pte_table); put_page(virt_to_page(pmd)); } @@ -223,7 +230,7 @@ static inline void kvm_pgd_populate(pgd_t *pgdp, p4d_t *p4dp) * we then fully enforce cacheability of RAM, no matter what the guest * does. */ -static void unmap_stage2_ptes(struct kvm *kvm, pmd_t *pmd, +static void unmap_stage2_ptes(struct kvm_s2_mmu *mmu, pmd_t *pmd, phys_addr_t addr, phys_addr_t end) { phys_addr_t start_addr = addr; @@ -235,7 +242,7 @@ static void unmap_stage2_ptes(struct kvm *kvm, pmd_t *pmd, pte_t old_pte = *pte; kvm_set_pte(pte, __pte(0)); - kvm_tlb_flush_vmid_ipa(kvm, addr); + kvm_tlb_flush_vmid_ipa(mmu, addr, S2_PTE_LEVEL); /* No need to invalidate the cache for device mappings */ if (!kvm_is_device_pfn(pte_pfn(old_pte))) @@ -245,13 +252,14 @@ static void unmap_stage2_ptes(struct kvm *kvm, pmd_t *pmd, } } while (pte++, addr += PAGE_SIZE, addr != end); - if (stage2_pte_table_empty(kvm, start_pte)) - clear_stage2_pmd_entry(kvm, pmd, start_addr); + if (stage2_pte_table_empty(mmu->kvm, start_pte)) + clear_stage2_pmd_entry(mmu, pmd, start_addr); } -static void unmap_stage2_pmds(struct kvm *kvm, pud_t *pud, +static void unmap_stage2_pmds(struct kvm_s2_mmu *mmu, pud_t *pud, phys_addr_t addr, phys_addr_t end) { + struct kvm *kvm = mmu->kvm; phys_addr_t next, start_addr = addr; pmd_t *pmd, *start_pmd; @@ -263,24 +271,25 @@ static void unmap_stage2_pmds(struct kvm *kvm, pud_t *pud, pmd_t old_pmd = *pmd; pmd_clear(pmd); - kvm_tlb_flush_vmid_ipa(kvm, addr); + kvm_tlb_flush_vmid_ipa(mmu, addr, S2_PMD_LEVEL); kvm_flush_dcache_pmd(old_pmd); put_page(virt_to_page(pmd)); } else { - unmap_stage2_ptes(kvm, pmd, addr, next); + unmap_stage2_ptes(mmu, pmd, addr, next); } } } while (pmd++, addr = next, addr != end); if (stage2_pmd_table_empty(kvm, start_pmd)) - clear_stage2_pud_entry(kvm, pud, start_addr); + clear_stage2_pud_entry(mmu, pud, start_addr); } -static void unmap_stage2_puds(struct kvm *kvm, p4d_t *p4d, +static void unmap_stage2_puds(struct kvm_s2_mmu *mmu, p4d_t *p4d, phys_addr_t addr, phys_addr_t end) { + struct kvm *kvm = mmu->kvm; phys_addr_t next, start_addr = addr; pud_t *pud, *start_pud; @@ -292,22 +301,23 @@ static void unmap_stage2_puds(struct kvm *kvm, p4d_t *p4d, pud_t old_pud = *pud; stage2_pud_clear(kvm, pud); - kvm_tlb_flush_vmid_ipa(kvm, addr); + kvm_tlb_flush_vmid_ipa(mmu, addr, S2_PUD_LEVEL); kvm_flush_dcache_pud(old_pud); put_page(virt_to_page(pud)); } else { - unmap_stage2_pmds(kvm, pud, addr, next); + unmap_stage2_pmds(mmu, pud, addr, next); } } } while (pud++, addr = next, addr != end); if (stage2_pud_table_empty(kvm, start_pud)) - clear_stage2_p4d_entry(kvm, p4d, start_addr); + clear_stage2_p4d_entry(mmu, p4d, start_addr); } -static void unmap_stage2_p4ds(struct kvm *kvm, pgd_t *pgd, +static void unmap_stage2_p4ds(struct kvm_s2_mmu *mmu, pgd_t *pgd, phys_addr_t addr, phys_addr_t end) { + struct kvm *kvm = mmu->kvm; phys_addr_t next, start_addr = addr; p4d_t *p4d, *start_p4d; @@ -315,11 +325,11 @@ static void unmap_stage2_p4ds(struct kvm *kvm, pgd_t *pgd, do { next = stage2_p4d_addr_end(kvm, addr, end); if (!stage2_p4d_none(kvm, *p4d)) - unmap_stage2_puds(kvm, p4d, addr, next); + unmap_stage2_puds(mmu, p4d, addr, next); } while (p4d++, addr = next, addr != end); if (stage2_p4d_table_empty(kvm, start_p4d)) - clear_stage2_pgd_entry(kvm, pgd, start_addr); + clear_stage2_pgd_entry(mmu, pgd, start_addr); } /** @@ -333,8 +343,9 @@ static void unmap_stage2_p4ds(struct kvm *kvm, pgd_t *pgd, * destroying the VM), otherwise another faulting VCPU may come in and mess * with things behind our backs. */ -static void unmap_stage2_range(struct kvm *kvm, phys_addr_t start, u64 size) +static void unmap_stage2_range(struct kvm_s2_mmu *mmu, phys_addr_t start, u64 size) { + struct kvm *kvm = mmu->kvm; pgd_t *pgd; phys_addr_t addr = start, end = start + size; phys_addr_t next; @@ -342,18 +353,18 @@ static void unmap_stage2_range(struct kvm *kvm, phys_addr_t start, u64 size) assert_spin_locked(&kvm->mmu_lock); WARN_ON(size & ~PAGE_MASK); - pgd = kvm->arch.pgd + stage2_pgd_index(kvm, addr); + pgd = mmu->pgd + stage2_pgd_index(kvm, addr); do { /* * Make sure the page table is still active, as another thread * could have possibly freed the page table, while we released * the lock. */ - if (!READ_ONCE(kvm->arch.pgd)) + if (!READ_ONCE(mmu->pgd)) break; next = stage2_pgd_addr_end(kvm, addr, end); if (!stage2_pgd_none(kvm, *pgd)) - unmap_stage2_p4ds(kvm, pgd, addr, next); + unmap_stage2_p4ds(mmu, pgd, addr, next); /* * If the range is too large, release the kvm->mmu_lock * to prevent starvation and lockup detector warnings. @@ -363,7 +374,7 @@ static void unmap_stage2_range(struct kvm *kvm, phys_addr_t start, u64 size) } while (pgd++, addr = next, addr != end); } -static void stage2_flush_ptes(struct kvm *kvm, pmd_t *pmd, +static void stage2_flush_ptes(struct kvm_s2_mmu *mmu, pmd_t *pmd, phys_addr_t addr, phys_addr_t end) { pte_t *pte; @@ -375,9 +386,10 @@ static void stage2_flush_ptes(struct kvm *kvm, pmd_t *pmd, } while (pte++, addr += PAGE_SIZE, addr != end); } -static void stage2_flush_pmds(struct kvm *kvm, pud_t *pud, +static void stage2_flush_pmds(struct kvm_s2_mmu *mmu, pud_t *pud, phys_addr_t addr, phys_addr_t end) { + struct kvm *kvm = mmu->kvm; pmd_t *pmd; phys_addr_t next; @@ -388,14 +400,15 @@ static void stage2_flush_pmds(struct kvm *kvm, pud_t *pud, if (pmd_thp_or_huge(*pmd)) kvm_flush_dcache_pmd(*pmd); else - stage2_flush_ptes(kvm, pmd, addr, next); + stage2_flush_ptes(mmu, pmd, addr, next); } } while (pmd++, addr = next, addr != end); } -static void stage2_flush_puds(struct kvm *kvm, p4d_t *p4d, +static void stage2_flush_puds(struct kvm_s2_mmu *mmu, p4d_t *p4d, phys_addr_t addr, phys_addr_t end) { + struct kvm *kvm = mmu->kvm; pud_t *pud; phys_addr_t next; @@ -406,14 +419,15 @@ static void stage2_flush_puds(struct kvm *kvm, p4d_t *p4d, if (stage2_pud_huge(kvm, *pud)) kvm_flush_dcache_pud(*pud); else - stage2_flush_pmds(kvm, pud, addr, next); + stage2_flush_pmds(mmu, pud, addr, next); } } while (pud++, addr = next, addr != end); } -static void stage2_flush_p4ds(struct kvm *kvm, pgd_t *pgd, +static void stage2_flush_p4ds(struct kvm_s2_mmu *mmu, pgd_t *pgd, phys_addr_t addr, phys_addr_t end) { + struct kvm *kvm = mmu->kvm; p4d_t *p4d; phys_addr_t next; @@ -421,23 +435,24 @@ static void stage2_flush_p4ds(struct kvm *kvm, pgd_t *pgd, do { next = stage2_p4d_addr_end(kvm, addr, end); if (!stage2_p4d_none(kvm, *p4d)) - stage2_flush_puds(kvm, p4d, addr, next); + stage2_flush_puds(mmu, p4d, addr, next); } while (p4d++, addr = next, addr != end); } static void stage2_flush_memslot(struct kvm *kvm, struct kvm_memory_slot *memslot) { + struct kvm_s2_mmu *mmu = &kvm->arch.mmu; phys_addr_t addr = memslot->base_gfn << PAGE_SHIFT; phys_addr_t end = addr + PAGE_SIZE * memslot->npages; phys_addr_t next; pgd_t *pgd; - pgd = kvm->arch.pgd + stage2_pgd_index(kvm, addr); + pgd = mmu->pgd + stage2_pgd_index(kvm, addr); do { next = stage2_pgd_addr_end(kvm, addr, end); if (!stage2_pgd_none(kvm, *pgd)) - stage2_flush_p4ds(kvm, pgd, addr, next); + stage2_flush_p4ds(mmu, pgd, addr, next); if (next != end) cond_resched_lock(&kvm->mmu_lock); @@ -964,21 +979,23 @@ int create_hyp_exec_mappings(phys_addr_t phys_addr, size_t size, } /** - * kvm_alloc_stage2_pgd - allocate level-1 table for stage-2 translation. - * @kvm: The KVM struct pointer for the VM. + * kvm_init_stage2_mmu - Initialise a S2 MMU strucrure + * @kvm: The pointer to the KVM structure + * @mmu: The pointer to the s2 MMU structure * * Allocates only the stage-2 HW PGD level table(s) of size defined by - * stage2_pgd_size(kvm). + * stage2_pgd_size(mmu->kvm). * * Note we don't need locking here as this is only called when the VM is * created, which can only be done once. */ -int kvm_alloc_stage2_pgd(struct kvm *kvm) +int kvm_init_stage2_mmu(struct kvm *kvm, struct kvm_s2_mmu *mmu) { phys_addr_t pgd_phys; pgd_t *pgd; + int cpu; - if (kvm->arch.pgd != NULL) { + if (mmu->pgd != NULL) { kvm_err("kvm_arch already initialized?\n"); return -EINVAL; } @@ -992,8 +1009,20 @@ int kvm_alloc_stage2_pgd(struct kvm *kvm) if (WARN_ON(pgd_phys & ~kvm_vttbr_baddr_mask(kvm))) return -EINVAL; - kvm->arch.pgd = pgd; - kvm->arch.pgd_phys = pgd_phys; + mmu->last_vcpu_ran = alloc_percpu(typeof(*mmu->last_vcpu_ran)); + if (!mmu->last_vcpu_ran) { + free_pages_exact(pgd, stage2_pgd_size(kvm)); + return -ENOMEM; + } + + for_each_possible_cpu(cpu) + *per_cpu_ptr(mmu->last_vcpu_ran, cpu) = -1; + + mmu->kvm = kvm; + mmu->pgd = pgd; + mmu->pgd_phys = pgd_phys; + mmu->vmid.vmid_gen = 0; + return 0; } @@ -1032,7 +1061,7 @@ static void stage2_unmap_memslot(struct kvm *kvm, if (!(vma->vm_flags & VM_PFNMAP)) { gpa_t gpa = addr + (vm_start - memslot->userspace_addr); - unmap_stage2_range(kvm, gpa, vm_end - vm_start); + unmap_stage2_range(&kvm->arch.mmu, gpa, vm_end - vm_start); } hva = vm_end; } while (hva < reg_end); @@ -1064,39 +1093,34 @@ void stage2_unmap_vm(struct kvm *kvm) srcu_read_unlock(&kvm->srcu, idx); } -/** - * kvm_free_stage2_pgd - free all stage-2 tables - * @kvm: The KVM struct pointer for the VM. - * - * Walks the level-1 page table pointed to by kvm->arch.pgd and frees all - * underlying level-2 and level-3 tables before freeing the actual level-1 table - * and setting the struct pointer to NULL. - */ -void kvm_free_stage2_pgd(struct kvm *kvm) +void kvm_free_stage2_pgd(struct kvm_s2_mmu *mmu) { + struct kvm *kvm = mmu->kvm; void *pgd = NULL; spin_lock(&kvm->mmu_lock); - if (kvm->arch.pgd) { - unmap_stage2_range(kvm, 0, kvm_phys_size(kvm)); - pgd = READ_ONCE(kvm->arch.pgd); - kvm->arch.pgd = NULL; - kvm->arch.pgd_phys = 0; + if (mmu->pgd) { + unmap_stage2_range(mmu, 0, kvm_phys_size(kvm)); + pgd = READ_ONCE(mmu->pgd); + mmu->pgd = NULL; } spin_unlock(&kvm->mmu_lock); /* Free the HW pgd, one page at a time */ - if (pgd) + if (pgd) { free_pages_exact(pgd, stage2_pgd_size(kvm)); + free_percpu(mmu->last_vcpu_ran); + } } -static p4d_t *stage2_get_p4d(struct kvm *kvm, struct kvm_mmu_memory_cache *cache, +static p4d_t *stage2_get_p4d(struct kvm_s2_mmu *mmu, struct kvm_mmu_memory_cache *cache, phys_addr_t addr) { + struct kvm *kvm = mmu->kvm; pgd_t *pgd; p4d_t *p4d; - pgd = kvm->arch.pgd + stage2_pgd_index(kvm, addr); + pgd = mmu->pgd + stage2_pgd_index(kvm, addr); if (stage2_pgd_none(kvm, *pgd)) { if (!cache) return NULL; @@ -1108,13 +1132,14 @@ static p4d_t *stage2_get_p4d(struct kvm *kvm, struct kvm_mmu_memory_cache *cache return stage2_p4d_offset(kvm, pgd, addr); } -static pud_t *stage2_get_pud(struct kvm *kvm, struct kvm_mmu_memory_cache *cache, +static pud_t *stage2_get_pud(struct kvm_s2_mmu *mmu, struct kvm_mmu_memory_cache *cache, phys_addr_t addr) { + struct kvm *kvm = mmu->kvm; p4d_t *p4d; pud_t *pud; - p4d = stage2_get_p4d(kvm, cache, addr); + p4d = stage2_get_p4d(mmu, cache, addr); if (stage2_p4d_none(kvm, *p4d)) { if (!cache) return NULL; @@ -1126,13 +1151,14 @@ static pud_t *stage2_get_pud(struct kvm *kvm, struct kvm_mmu_memory_cache *cache return stage2_pud_offset(kvm, p4d, addr); } -static pmd_t *stage2_get_pmd(struct kvm *kvm, struct kvm_mmu_memory_cache *cache, +static pmd_t *stage2_get_pmd(struct kvm_s2_mmu *mmu, struct kvm_mmu_memory_cache *cache, phys_addr_t addr) { + struct kvm *kvm = mmu->kvm; pud_t *pud; pmd_t *pmd; - pud = stage2_get_pud(kvm, cache, addr); + pud = stage2_get_pud(mmu, cache, addr); if (!pud || stage2_pud_huge(kvm, *pud)) return NULL; @@ -1147,13 +1173,14 @@ static pmd_t *stage2_get_pmd(struct kvm *kvm, struct kvm_mmu_memory_cache *cache return stage2_pmd_offset(kvm, pud, addr); } -static int stage2_set_pmd_huge(struct kvm *kvm, struct kvm_mmu_memory_cache - *cache, phys_addr_t addr, const pmd_t *new_pmd) +static int stage2_set_pmd_huge(struct kvm_s2_mmu *mmu, + struct kvm_mmu_memory_cache *cache, + phys_addr_t addr, const pmd_t *new_pmd) { pmd_t *pmd, old_pmd; retry: - pmd = stage2_get_pmd(kvm, cache, addr); + pmd = stage2_get_pmd(mmu, cache, addr); VM_BUG_ON(!pmd); old_pmd = *pmd; @@ -1186,7 +1213,7 @@ retry: * get handled accordingly. */ if (!pmd_thp_or_huge(old_pmd)) { - unmap_stage2_range(kvm, addr & S2_PMD_MASK, S2_PMD_SIZE); + unmap_stage2_range(mmu, addr & S2_PMD_MASK, S2_PMD_SIZE); goto retry; } /* @@ -1202,7 +1229,7 @@ retry: */ WARN_ON_ONCE(pmd_pfn(old_pmd) != pmd_pfn(*new_pmd)); pmd_clear(pmd); - kvm_tlb_flush_vmid_ipa(kvm, addr); + kvm_tlb_flush_vmid_ipa(mmu, addr, S2_PMD_LEVEL); } else { get_page(virt_to_page(pmd)); } @@ -1211,13 +1238,15 @@ retry: return 0; } -static int stage2_set_pud_huge(struct kvm *kvm, struct kvm_mmu_memory_cache *cache, +static int stage2_set_pud_huge(struct kvm_s2_mmu *mmu, + struct kvm_mmu_memory_cache *cache, phys_addr_t addr, const pud_t *new_pudp) { + struct kvm *kvm = mmu->kvm; pud_t *pudp, old_pud; retry: - pudp = stage2_get_pud(kvm, cache, addr); + pudp = stage2_get_pud(mmu, cache, addr); VM_BUG_ON(!pudp); old_pud = *pudp; @@ -1236,13 +1265,13 @@ retry: * the range for this block and retry. */ if (!stage2_pud_huge(kvm, old_pud)) { - unmap_stage2_range(kvm, addr & S2_PUD_MASK, S2_PUD_SIZE); + unmap_stage2_range(mmu, addr & S2_PUD_MASK, S2_PUD_SIZE); goto retry; } WARN_ON_ONCE(kvm_pud_pfn(old_pud) != kvm_pud_pfn(*new_pudp)); stage2_pud_clear(kvm, pudp); - kvm_tlb_flush_vmid_ipa(kvm, addr); + kvm_tlb_flush_vmid_ipa(mmu, addr, S2_PUD_LEVEL); } else { get_page(virt_to_page(pudp)); } @@ -1257,9 +1286,10 @@ retry: * leaf-entry is returned in the appropriate level variable - pudpp, * pmdpp, ptepp. */ -static bool stage2_get_leaf_entry(struct kvm *kvm, phys_addr_t addr, +static bool stage2_get_leaf_entry(struct kvm_s2_mmu *mmu, phys_addr_t addr, pud_t **pudpp, pmd_t **pmdpp, pte_t **ptepp) { + struct kvm *kvm = mmu->kvm; pud_t *pudp; pmd_t *pmdp; pte_t *ptep; @@ -1268,7 +1298,7 @@ static bool stage2_get_leaf_entry(struct kvm *kvm, phys_addr_t addr, *pmdpp = NULL; *ptepp = NULL; - pudp = stage2_get_pud(kvm, NULL, addr); + pudp = stage2_get_pud(mmu, NULL, addr); if (!pudp || stage2_pud_none(kvm, *pudp) || !stage2_pud_present(kvm, *pudp)) return false; @@ -1294,14 +1324,14 @@ static bool stage2_get_leaf_entry(struct kvm *kvm, phys_addr_t addr, return true; } -static bool stage2_is_exec(struct kvm *kvm, phys_addr_t addr, unsigned long sz) +static bool stage2_is_exec(struct kvm_s2_mmu *mmu, phys_addr_t addr, unsigned long sz) { pud_t *pudp; pmd_t *pmdp; pte_t *ptep; bool found; - found = stage2_get_leaf_entry(kvm, addr, &pudp, &pmdp, &ptep); + found = stage2_get_leaf_entry(mmu, addr, &pudp, &pmdp, &ptep); if (!found) return false; @@ -1313,10 +1343,12 @@ static bool stage2_is_exec(struct kvm *kvm, phys_addr_t addr, unsigned long sz) return sz == PAGE_SIZE && kvm_s2pte_exec(ptep); } -static int stage2_set_pte(struct kvm *kvm, struct kvm_mmu_memory_cache *cache, +static int stage2_set_pte(struct kvm_s2_mmu *mmu, + struct kvm_mmu_memory_cache *cache, phys_addr_t addr, const pte_t *new_pte, unsigned long flags) { + struct kvm *kvm = mmu->kvm; pud_t *pud; pmd_t *pmd; pte_t *pte, old_pte; @@ -1326,7 +1358,7 @@ static int stage2_set_pte(struct kvm *kvm, struct kvm_mmu_memory_cache *cache, VM_BUG_ON(logging_active && !cache); /* Create stage-2 page table mapping - Levels 0 and 1 */ - pud = stage2_get_pud(kvm, cache, addr); + pud = stage2_get_pud(mmu, cache, addr); if (!pud) { /* * Ignore calls from kvm_set_spte_hva for unallocated @@ -1340,7 +1372,7 @@ static int stage2_set_pte(struct kvm *kvm, struct kvm_mmu_memory_cache *cache, * on to allocate page. */ if (logging_active) - stage2_dissolve_pud(kvm, addr, pud); + stage2_dissolve_pud(mmu, addr, pud); if (stage2_pud_none(kvm, *pud)) { if (!cache) @@ -1364,7 +1396,7 @@ static int stage2_set_pte(struct kvm *kvm, struct kvm_mmu_memory_cache *cache, * allocate page. */ if (logging_active) - stage2_dissolve_pmd(kvm, addr, pmd); + stage2_dissolve_pmd(mmu, addr, pmd); /* Create stage-2 page mappings - Level 2 */ if (pmd_none(*pmd)) { @@ -1388,7 +1420,7 @@ static int stage2_set_pte(struct kvm *kvm, struct kvm_mmu_memory_cache *cache, return 0; kvm_set_pte(pte, __pte(0)); - kvm_tlb_flush_vmid_ipa(kvm, addr); + kvm_tlb_flush_vmid_ipa(mmu, addr, S2_PTE_LEVEL); } else { get_page(virt_to_page(pte)); } @@ -1453,8 +1485,8 @@ int kvm_phys_addr_ioremap(struct kvm *kvm, phys_addr_t guest_ipa, if (ret) goto out; spin_lock(&kvm->mmu_lock); - ret = stage2_set_pte(kvm, &cache, addr, &pte, - KVM_S2PTE_FLAG_IS_IOMAP); + ret = stage2_set_pte(&kvm->arch.mmu, &cache, addr, &pte, + KVM_S2PTE_FLAG_IS_IOMAP); spin_unlock(&kvm->mmu_lock); if (ret) goto out; @@ -1493,9 +1525,10 @@ static void stage2_wp_ptes(pmd_t *pmd, phys_addr_t addr, phys_addr_t end) * @addr: range start address * @end: range end address */ -static void stage2_wp_pmds(struct kvm *kvm, pud_t *pud, +static void stage2_wp_pmds(struct kvm_s2_mmu *mmu, pud_t *pud, phys_addr_t addr, phys_addr_t end) { + struct kvm *kvm = mmu->kvm; pmd_t *pmd; phys_addr_t next; @@ -1516,13 +1549,14 @@ static void stage2_wp_pmds(struct kvm *kvm, pud_t *pud, /** * stage2_wp_puds - write protect P4D range - * @pgd: pointer to pgd entry + * @p4d: pointer to p4d entry * @addr: range start address * @end: range end address */ -static void stage2_wp_puds(struct kvm *kvm, p4d_t *p4d, +static void stage2_wp_puds(struct kvm_s2_mmu *mmu, p4d_t *p4d, phys_addr_t addr, phys_addr_t end) { + struct kvm *kvm = mmu->kvm; pud_t *pud; phys_addr_t next; @@ -1534,7 +1568,7 @@ static void stage2_wp_puds(struct kvm *kvm, p4d_t *p4d, if (!kvm_s2pud_readonly(pud)) kvm_set_s2pud_readonly(pud); } else { - stage2_wp_pmds(kvm, pud, addr, next); + stage2_wp_pmds(mmu, pud, addr, next); } } } while (pud++, addr = next, addr != end); @@ -1546,9 +1580,10 @@ static void stage2_wp_puds(struct kvm *kvm, p4d_t *p4d, * @addr: range start address * @end: range end address */ -static void stage2_wp_p4ds(struct kvm *kvm, pgd_t *pgd, +static void stage2_wp_p4ds(struct kvm_s2_mmu *mmu, pgd_t *pgd, phys_addr_t addr, phys_addr_t end) { + struct kvm *kvm = mmu->kvm; p4d_t *p4d; phys_addr_t next; @@ -1556,7 +1591,7 @@ static void stage2_wp_p4ds(struct kvm *kvm, pgd_t *pgd, do { next = stage2_p4d_addr_end(kvm, addr, end); if (!stage2_p4d_none(kvm, *p4d)) - stage2_wp_puds(kvm, p4d, addr, next); + stage2_wp_puds(mmu, p4d, addr, next); } while (p4d++, addr = next, addr != end); } @@ -1566,12 +1601,13 @@ static void stage2_wp_p4ds(struct kvm *kvm, pgd_t *pgd, * @addr: Start address of range * @end: End address of range */ -static void stage2_wp_range(struct kvm *kvm, phys_addr_t addr, phys_addr_t end) +static void stage2_wp_range(struct kvm_s2_mmu *mmu, phys_addr_t addr, phys_addr_t end) { + struct kvm *kvm = mmu->kvm; pgd_t *pgd; phys_addr_t next; - pgd = kvm->arch.pgd + stage2_pgd_index(kvm, addr); + pgd = mmu->pgd + stage2_pgd_index(kvm, addr); do { /* * Release kvm_mmu_lock periodically if the memory region is @@ -1583,11 +1619,11 @@ static void stage2_wp_range(struct kvm *kvm, phys_addr_t addr, phys_addr_t end) * the lock. */ cond_resched_lock(&kvm->mmu_lock); - if (!READ_ONCE(kvm->arch.pgd)) + if (!READ_ONCE(mmu->pgd)) break; next = stage2_pgd_addr_end(kvm, addr, end); if (stage2_pgd_present(kvm, *pgd)) - stage2_wp_p4ds(kvm, pgd, addr, next); + stage2_wp_p4ds(mmu, pgd, addr, next); } while (pgd++, addr = next, addr != end); } @@ -1617,7 +1653,7 @@ void kvm_mmu_wp_memory_region(struct kvm *kvm, int slot) end = (memslot->base_gfn + memslot->npages) << PAGE_SHIFT; spin_lock(&kvm->mmu_lock); - stage2_wp_range(kvm, start, end); + stage2_wp_range(&kvm->arch.mmu, start, end); spin_unlock(&kvm->mmu_lock); kvm_flush_remote_tlbs(kvm); } @@ -1641,7 +1677,7 @@ static void kvm_mmu_write_protect_pt_masked(struct kvm *kvm, phys_addr_t start = (base_gfn + __ffs(mask)) << PAGE_SHIFT; phys_addr_t end = (base_gfn + __fls(mask) + 1) << PAGE_SHIFT; - stage2_wp_range(kvm, start, end); + stage2_wp_range(&kvm->arch.mmu, start, end); } /* @@ -1804,6 +1840,7 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa, pgprot_t mem_type = PAGE_S2; bool logging_active = memslot_is_logging(memslot); unsigned long vma_pagesize, flags = 0; + struct kvm_s2_mmu *mmu = vcpu->arch.hw_mmu; write_fault = kvm_is_write_fault(vcpu); exec_fault = kvm_vcpu_trap_is_iabt(vcpu); @@ -1925,7 +1962,7 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa, */ needs_exec = exec_fault || (fault_status == FSC_PERM && - stage2_is_exec(kvm, fault_ipa, vma_pagesize)); + stage2_is_exec(mmu, fault_ipa, vma_pagesize)); if (vma_pagesize == PUD_SIZE) { pud_t new_pud = kvm_pfn_pud(pfn, mem_type); @@ -1937,7 +1974,7 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa, if (needs_exec) new_pud = kvm_s2pud_mkexec(new_pud); - ret = stage2_set_pud_huge(kvm, memcache, fault_ipa, &new_pud); + ret = stage2_set_pud_huge(mmu, memcache, fault_ipa, &new_pud); } else if (vma_pagesize == PMD_SIZE) { pmd_t new_pmd = kvm_pfn_pmd(pfn, mem_type); @@ -1949,7 +1986,7 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa, if (needs_exec) new_pmd = kvm_s2pmd_mkexec(new_pmd); - ret = stage2_set_pmd_huge(kvm, memcache, fault_ipa, &new_pmd); + ret = stage2_set_pmd_huge(mmu, memcache, fault_ipa, &new_pmd); } else { pte_t new_pte = kvm_pfn_pte(pfn, mem_type); @@ -1961,7 +1998,7 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa, if (needs_exec) new_pte = kvm_s2pte_mkexec(new_pte); - ret = stage2_set_pte(kvm, memcache, fault_ipa, &new_pte, flags); + ret = stage2_set_pte(mmu, memcache, fault_ipa, &new_pte, flags); } out_unlock: @@ -1990,7 +2027,7 @@ static void handle_access_fault(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa) spin_lock(&vcpu->kvm->mmu_lock); - if (!stage2_get_leaf_entry(vcpu->kvm, fault_ipa, &pud, &pmd, &pte)) + if (!stage2_get_leaf_entry(vcpu->arch.hw_mmu, fault_ipa, &pud, &pmd, &pte)) goto out; if (pud) { /* HugeTLB */ @@ -2040,21 +2077,18 @@ int kvm_handle_guest_abort(struct kvm_vcpu *vcpu) is_iabt = kvm_vcpu_trap_is_iabt(vcpu); /* Synchronous External Abort? */ - if (kvm_vcpu_dabt_isextabt(vcpu)) { + if (kvm_vcpu_abt_issea(vcpu)) { /* * For RAS the host kernel may handle this abort. * There is no need to pass the error into the guest. */ - if (!kvm_handle_guest_sea(fault_ipa, kvm_vcpu_get_hsr(vcpu))) - return 1; - - if (unlikely(!is_iabt)) { + if (kvm_handle_guest_sea(fault_ipa, kvm_vcpu_get_esr(vcpu))) kvm_inject_vabt(vcpu); - return 1; - } + + return 1; } - trace_kvm_guest_fault(*vcpu_pc(vcpu), kvm_vcpu_get_hsr(vcpu), + trace_kvm_guest_fault(*vcpu_pc(vcpu), kvm_vcpu_get_esr(vcpu), kvm_vcpu_get_hfar(vcpu), fault_ipa); /* Check the stage-2 fault is trans. fault or write fault */ @@ -2063,7 +2097,7 @@ int kvm_handle_guest_abort(struct kvm_vcpu *vcpu) kvm_err("Unsupported FSC: EC=%#x xFSC=%#lx ESR_EL2=%#lx\n", kvm_vcpu_trap_get_class(vcpu), (unsigned long)kvm_vcpu_trap_get_fault(vcpu), - (unsigned long)kvm_vcpu_get_hsr(vcpu)); + (unsigned long)kvm_vcpu_get_esr(vcpu)); return -EFAULT; } @@ -2074,12 +2108,23 @@ int kvm_handle_guest_abort(struct kvm_vcpu *vcpu) hva = gfn_to_hva_memslot_prot(memslot, gfn, &writable); write_fault = kvm_is_write_fault(vcpu); if (kvm_is_error_hva(hva) || (write_fault && !writable)) { + /* + * The guest has put either its instructions or its page-tables + * somewhere it shouldn't have. Userspace won't be able to do + * anything about this (there's no syndrome for a start), so + * re-inject the abort back into the guest. + */ if (is_iabt) { - /* Prefetch Abort on I/O address */ ret = -ENOEXEC; goto out; } + if (kvm_vcpu_dabt_iss1tw(vcpu)) { + kvm_inject_dabt(vcpu, kvm_vcpu_get_hfar(vcpu)); + ret = 1; + goto out_unlock; + } + /* * Check for a cache maintenance operation. Since we * ended-up here, we know it is outside of any memory @@ -2090,7 +2135,7 @@ int kvm_handle_guest_abort(struct kvm_vcpu *vcpu) * So let's assume that the guest is just being * cautious, and skip the instruction. */ - if (kvm_vcpu_dabt_is_cm(vcpu)) { + if (kvm_is_error_hva(hva) && kvm_vcpu_dabt_is_cm(vcpu)) { kvm_skip_instr(vcpu, kvm_vcpu_trap_il_is32bit(vcpu)); ret = 1; goto out_unlock; @@ -2163,14 +2208,14 @@ static int handle_hva_to_gpa(struct kvm *kvm, static int kvm_unmap_hva_handler(struct kvm *kvm, gpa_t gpa, u64 size, void *data) { - unmap_stage2_range(kvm, gpa, size); + unmap_stage2_range(&kvm->arch.mmu, gpa, size); return 0; } int kvm_unmap_hva_range(struct kvm *kvm, unsigned long start, unsigned long end) { - if (!kvm->arch.pgd) + if (!kvm->arch.mmu.pgd) return 0; trace_kvm_unmap_hva_range(start, end); @@ -2190,7 +2235,7 @@ static int kvm_set_spte_handler(struct kvm *kvm, gpa_t gpa, u64 size, void *data * therefore stage2_set_pte() never needs to clear out a huge PMD * through this calling path. */ - stage2_set_pte(kvm, NULL, gpa, pte, 0); + stage2_set_pte(&kvm->arch.mmu, NULL, gpa, pte, 0); return 0; } @@ -2201,7 +2246,7 @@ int kvm_set_spte_hva(struct kvm *kvm, unsigned long hva, pte_t pte) kvm_pfn_t pfn = pte_pfn(pte); pte_t stage2_pte; - if (!kvm->arch.pgd) + if (!kvm->arch.mmu.pgd) return 0; trace_kvm_set_spte_hva(hva); @@ -2224,7 +2269,7 @@ static int kvm_age_hva_handler(struct kvm *kvm, gpa_t gpa, u64 size, void *data) pte_t *pte; WARN_ON(size != PAGE_SIZE && size != PMD_SIZE && size != PUD_SIZE); - if (!stage2_get_leaf_entry(kvm, gpa, &pud, &pmd, &pte)) + if (!stage2_get_leaf_entry(&kvm->arch.mmu, gpa, &pud, &pmd, &pte)) return 0; if (pud) @@ -2242,7 +2287,7 @@ static int kvm_test_age_hva_handler(struct kvm *kvm, gpa_t gpa, u64 size, void * pte_t *pte; WARN_ON(size != PAGE_SIZE && size != PMD_SIZE && size != PUD_SIZE); - if (!stage2_get_leaf_entry(kvm, gpa, &pud, &pmd, &pte)) + if (!stage2_get_leaf_entry(&kvm->arch.mmu, gpa, &pud, &pmd, &pte)) return 0; if (pud) @@ -2255,7 +2300,7 @@ static int kvm_test_age_hva_handler(struct kvm *kvm, gpa_t gpa, u64 size, void * int kvm_age_hva(struct kvm *kvm, unsigned long start, unsigned long end) { - if (!kvm->arch.pgd) + if (!kvm->arch.mmu.pgd) return 0; trace_kvm_age_hva(start, end); return handle_hva_to_gpa(kvm, start, end, kvm_age_hva_handler, NULL); @@ -2263,7 +2308,7 @@ int kvm_age_hva(struct kvm *kvm, unsigned long start, unsigned long end) int kvm_test_age_hva(struct kvm *kvm, unsigned long hva) { - if (!kvm->arch.pgd) + if (!kvm->arch.mmu.pgd) return 0; trace_kvm_test_age_hva(hva); return handle_hva_to_gpa(kvm, hva, hva + PAGE_SIZE, @@ -2476,7 +2521,7 @@ int kvm_arch_prepare_memory_region(struct kvm *kvm, spin_lock(&kvm->mmu_lock); if (ret) - unmap_stage2_range(kvm, mem->guest_phys_addr, mem->memory_size); + unmap_stage2_range(&kvm->arch.mmu, mem->guest_phys_addr, mem->memory_size); else stage2_flush_memslot(kvm, memslot); spin_unlock(&kvm->mmu_lock); @@ -2495,7 +2540,7 @@ void kvm_arch_memslots_updated(struct kvm *kvm, u64 gen) void kvm_arch_flush_shadow_all(struct kvm *kvm) { - kvm_free_stage2_pgd(kvm); + kvm_free_stage2_pgd(&kvm->arch.mmu); } void kvm_arch_flush_shadow_memslot(struct kvm *kvm, @@ -2505,7 +2550,7 @@ void kvm_arch_flush_shadow_memslot(struct kvm *kvm, phys_addr_t size = slot->npages << PAGE_SHIFT; spin_lock(&kvm->mmu_lock); - unmap_stage2_range(kvm, gpa, size); + unmap_stage2_range(&kvm->arch.mmu, gpa, size); spin_unlock(&kvm->mmu_lock); } diff --git a/arch/arm64/kvm/regmap.c b/arch/arm64/kvm/regmap.c index a900181e3867..accc1d5fba61 100644 --- a/arch/arm64/kvm/regmap.c +++ b/arch/arm64/kvm/regmap.c @@ -100,7 +100,7 @@ static const unsigned long vcpu_reg_offsets[VCPU_NR_MODES][16] = { */ unsigned long *vcpu_reg32(const struct kvm_vcpu *vcpu, u8 reg_num) { - unsigned long *reg_array = (unsigned long *)&vcpu->arch.ctxt.gp_regs.regs; + unsigned long *reg_array = (unsigned long *)&vcpu->arch.ctxt.regs; unsigned long mode = *vcpu_cpsr(vcpu) & PSR_AA32_MODE_MASK; switch (mode) { @@ -147,8 +147,20 @@ unsigned long vcpu_read_spsr32(const struct kvm_vcpu *vcpu) { int spsr_idx = vcpu_spsr32_mode(vcpu); - if (!vcpu->arch.sysregs_loaded_on_cpu) - return vcpu_gp_regs(vcpu)->spsr[spsr_idx]; + if (!vcpu->arch.sysregs_loaded_on_cpu) { + switch (spsr_idx) { + case KVM_SPSR_SVC: + return __vcpu_sys_reg(vcpu, SPSR_EL1); + case KVM_SPSR_ABT: + return vcpu->arch.ctxt.spsr_abt; + case KVM_SPSR_UND: + return vcpu->arch.ctxt.spsr_und; + case KVM_SPSR_IRQ: + return vcpu->arch.ctxt.spsr_irq; + case KVM_SPSR_FIQ: + return vcpu->arch.ctxt.spsr_fiq; + } + } switch (spsr_idx) { case KVM_SPSR_SVC: @@ -171,7 +183,24 @@ void vcpu_write_spsr32(struct kvm_vcpu *vcpu, unsigned long v) int spsr_idx = vcpu_spsr32_mode(vcpu); if (!vcpu->arch.sysregs_loaded_on_cpu) { - vcpu_gp_regs(vcpu)->spsr[spsr_idx] = v; + switch (spsr_idx) { + case KVM_SPSR_SVC: + __vcpu_sys_reg(vcpu, SPSR_EL1) = v; + break; + case KVM_SPSR_ABT: + vcpu->arch.ctxt.spsr_abt = v; + break; + case KVM_SPSR_UND: + vcpu->arch.ctxt.spsr_und = v; + break; + case KVM_SPSR_IRQ: + vcpu->arch.ctxt.spsr_irq = v; + break; + case KVM_SPSR_FIQ: + vcpu->arch.ctxt.spsr_fiq = v; + break; + } + return; } diff --git a/arch/arm64/kvm/reset.c b/arch/arm64/kvm/reset.c index 6ed36be51b4b..ee33875c5c2a 100644 --- a/arch/arm64/kvm/reset.c +++ b/arch/arm64/kvm/reset.c @@ -42,6 +42,11 @@ static u32 kvm_ipa_limit; #define VCPU_RESET_PSTATE_SVC (PSR_AA32_MODE_SVC | PSR_AA32_A_BIT | \ PSR_AA32_I_BIT | PSR_AA32_F_BIT) +static bool system_has_full_ptr_auth(void) +{ + return system_supports_address_auth() && system_supports_generic_auth(); +} + /** * kvm_arch_vm_ioctl_check_extension * @@ -80,8 +85,7 @@ int kvm_arch_vm_ioctl_check_extension(struct kvm *kvm, long ext) break; case KVM_CAP_ARM_PTRAUTH_ADDRESS: case KVM_CAP_ARM_PTRAUTH_GENERIC: - r = has_vhe() && system_supports_address_auth() && - system_supports_generic_auth(); + r = system_has_full_ptr_auth(); break; default: r = 0; @@ -205,19 +209,14 @@ static void kvm_vcpu_reset_sve(struct kvm_vcpu *vcpu) static int kvm_vcpu_enable_ptrauth(struct kvm_vcpu *vcpu) { - /* Support ptrauth only if the system supports these capabilities. */ - if (!has_vhe()) - return -EINVAL; - - if (!system_supports_address_auth() || - !system_supports_generic_auth()) - return -EINVAL; /* * For now make sure that both address/generic pointer authentication - * features are requested by the userspace together. + * features are requested by the userspace together and the system + * supports these capabilities. */ if (!test_bit(KVM_ARM_VCPU_PTRAUTH_ADDRESS, vcpu->arch.features) || - !test_bit(KVM_ARM_VCPU_PTRAUTH_GENERIC, vcpu->arch.features)) + !test_bit(KVM_ARM_VCPU_PTRAUTH_GENERIC, vcpu->arch.features) || + !system_has_full_ptr_auth()) return -EINVAL; vcpu->arch.flags |= KVM_ARM64_GUEST_HAS_PTRAUTH; @@ -292,7 +291,7 @@ int kvm_reset_vcpu(struct kvm_vcpu *vcpu) /* Reset core registers */ memset(vcpu_gp_regs(vcpu), 0, sizeof(*vcpu_gp_regs(vcpu))); - vcpu_gp_regs(vcpu)->regs.pstate = pstate; + vcpu_gp_regs(vcpu)->pstate = pstate; /* Reset system registers */ kvm_reset_sys_regs(vcpu); diff --git a/arch/arm64/kvm/sys_regs.c b/arch/arm64/kvm/sys_regs.c index 138961d7ebe3..077293b5115f 100644 --- a/arch/arm64/kvm/sys_regs.c +++ b/arch/arm64/kvm/sys_regs.c @@ -94,6 +94,7 @@ static bool __vcpu_read_sys_reg_from_cpu(int reg, u64 *val) case TPIDR_EL1: *val = read_sysreg_s(SYS_TPIDR_EL1); break; case AMAIR_EL1: *val = read_sysreg_s(SYS_AMAIR_EL12); break; case CNTKCTL_EL1: *val = read_sysreg_s(SYS_CNTKCTL_EL12); break; + case ELR_EL1: *val = read_sysreg_s(SYS_ELR_EL12); break; case PAR_EL1: *val = read_sysreg_s(SYS_PAR_EL1); break; case DACR32_EL2: *val = read_sysreg_s(SYS_DACR32_EL2); break; case IFSR32_EL2: *val = read_sysreg_s(SYS_IFSR32_EL2); break; @@ -133,6 +134,7 @@ static bool __vcpu_write_sys_reg_to_cpu(u64 val, int reg) case TPIDR_EL1: write_sysreg_s(val, SYS_TPIDR_EL1); break; case AMAIR_EL1: write_sysreg_s(val, SYS_AMAIR_EL12); break; case CNTKCTL_EL1: write_sysreg_s(val, SYS_CNTKCTL_EL12); break; + case ELR_EL1: write_sysreg_s(val, SYS_ELR_EL12); break; case PAR_EL1: write_sysreg_s(val, SYS_PAR_EL1); break; case DACR32_EL2: write_sysreg_s(val, SYS_DACR32_EL2); break; case IFSR32_EL2: write_sysreg_s(val, SYS_IFSR32_EL2); break; @@ -242,6 +244,25 @@ static bool access_vm_reg(struct kvm_vcpu *vcpu, return true; } +static bool access_actlr(struct kvm_vcpu *vcpu, + struct sys_reg_params *p, + const struct sys_reg_desc *r) +{ + if (p->is_write) + return ignore_write(vcpu, p); + + p->regval = vcpu_read_sys_reg(vcpu, ACTLR_EL1); + + if (p->is_aarch32) { + if (r->Op2 & 2) + p->regval = upper_32_bits(p->regval); + else + p->regval = lower_32_bits(p->regval); + } + + return true; +} + /* * Trap handler for the GICv3 SGI generation system register. * Forward the request to the VGIC emulation. @@ -615,6 +636,12 @@ static void reset_amair_el1(struct kvm_vcpu *vcpu, const struct sys_reg_desc *r) vcpu_write_sys_reg(vcpu, amair, AMAIR_EL1); } +static void reset_actlr(struct kvm_vcpu *vcpu, const struct sys_reg_desc *r) +{ + u64 actlr = read_sysreg(actlr_el1); + vcpu_write_sys_reg(vcpu, actlr, ACTLR_EL1); +} + static void reset_mpidr(struct kvm_vcpu *vcpu, const struct sys_reg_desc *r) { u64 mpidr; @@ -1518,6 +1545,7 @@ static const struct sys_reg_desc sys_reg_descs[] = { ID_UNALLOCATED(7,7), { SYS_DESC(SYS_SCTLR_EL1), access_vm_reg, reset_val, SCTLR_EL1, 0x00C50078 }, + { SYS_DESC(SYS_ACTLR_EL1), access_actlr, reset_actlr, ACTLR_EL1 }, { SYS_DESC(SYS_CPACR_EL1), NULL, reset_val, CPACR_EL1, 0 }, { SYS_DESC(SYS_ZCR_EL1), NULL, reset_val, ZCR_EL1, 0, .visibility = sve_visibility }, { SYS_DESC(SYS_TTBR0_EL1), access_vm_reg, reset_unknown, TTBR0_EL1 }, @@ -1957,6 +1985,8 @@ static const struct sys_reg_desc cp14_64_regs[] = { static const struct sys_reg_desc cp15_regs[] = { { Op1( 0), CRn( 0), CRm( 0), Op2( 1), access_ctr }, { Op1( 0), CRn( 1), CRm( 0), Op2( 0), access_vm_reg, NULL, c1_SCTLR }, + { Op1( 0), CRn( 1), CRm( 0), Op2( 1), access_actlr }, + { Op1( 0), CRn( 1), CRm( 0), Op2( 3), access_actlr }, { Op1( 0), CRn( 2), CRm( 0), Op2( 0), access_vm_reg, NULL, c2_TTBR0 }, { Op1( 0), CRn( 2), CRm( 0), Op2( 1), access_vm_reg, NULL, c2_TTBR1 }, { Op1( 0), CRn( 2), CRm( 0), Op2( 2), access_vm_reg, NULL, c2_TTBCR }, @@ -2109,36 +2139,6 @@ static int check_sysreg_table(const struct sys_reg_desc *table, unsigned int n, return 0; } -/* Target specific emulation tables */ -static struct kvm_sys_reg_target_table *target_tables[KVM_ARM_NUM_TARGETS]; - -void kvm_register_target_sys_reg_table(unsigned int target, - struct kvm_sys_reg_target_table *table) -{ - if (check_sysreg_table(table->table64.table, table->table64.num, false) || - check_sysreg_table(table->table32.table, table->table32.num, true)) - return; - - target_tables[target] = table; -} - -/* Get specific register table for this target. */ -static const struct sys_reg_desc *get_target_table(unsigned target, - bool mode_is_64, - size_t *num) -{ - struct kvm_sys_reg_target_table *table; - - table = target_tables[target]; - if (mode_is_64) { - *num = table->table64.num; - return table->table64.table; - } else { - *num = table->table32.num; - return table->table32.table; - } -} - static int match_sys_reg(const void *key, const void *elt) { const unsigned long pval = (unsigned long)key; @@ -2220,10 +2220,10 @@ static int emulate_cp(struct kvm_vcpu *vcpu, static void unhandled_cp_access(struct kvm_vcpu *vcpu, struct sys_reg_params *params) { - u8 hsr_ec = kvm_vcpu_trap_get_class(vcpu); + u8 esr_ec = kvm_vcpu_trap_get_class(vcpu); int cp = -1; - switch(hsr_ec) { + switch (esr_ec) { case ESR_ELx_EC_CP15_32: case ESR_ELx_EC_CP15_64: cp = 15; @@ -2249,22 +2249,20 @@ static void unhandled_cp_access(struct kvm_vcpu *vcpu, */ static int kvm_handle_cp_64(struct kvm_vcpu *vcpu, const struct sys_reg_desc *global, - size_t nr_global, - const struct sys_reg_desc *target_specific, - size_t nr_specific) + size_t nr_global) { struct sys_reg_params params; - u32 hsr = kvm_vcpu_get_hsr(vcpu); + u32 esr = kvm_vcpu_get_esr(vcpu); int Rt = kvm_vcpu_sys_get_rt(vcpu); - int Rt2 = (hsr >> 10) & 0x1f; + int Rt2 = (esr >> 10) & 0x1f; params.is_aarch32 = true; params.is_32bit = false; - params.CRm = (hsr >> 1) & 0xf; - params.is_write = ((hsr & 1) == 0); + params.CRm = (esr >> 1) & 0xf; + params.is_write = ((esr & 1) == 0); params.Op0 = 0; - params.Op1 = (hsr >> 16) & 0xf; + params.Op1 = (esr >> 16) & 0xf; params.Op2 = 0; params.CRn = 0; @@ -2278,14 +2276,11 @@ static int kvm_handle_cp_64(struct kvm_vcpu *vcpu, } /* - * Try to emulate the coprocessor access using the target - * specific table first, and using the global table afterwards. - * If either of the tables contains a handler, handle the + * If the table contains a handler, handle the * potential register operation in the case of a read and return * with success. */ - if (!emulate_cp(vcpu, ¶ms, target_specific, nr_specific) || - !emulate_cp(vcpu, ¶ms, global, nr_global)) { + if (!emulate_cp(vcpu, ¶ms, global, nr_global)) { /* Split up the value between registers for the read side */ if (!params.is_write) { vcpu_set_reg(vcpu, Rt, lower_32_bits(params.regval)); @@ -2306,26 +2301,23 @@ static int kvm_handle_cp_64(struct kvm_vcpu *vcpu, */ static int kvm_handle_cp_32(struct kvm_vcpu *vcpu, const struct sys_reg_desc *global, - size_t nr_global, - const struct sys_reg_desc *target_specific, - size_t nr_specific) + size_t nr_global) { struct sys_reg_params params; - u32 hsr = kvm_vcpu_get_hsr(vcpu); + u32 esr = kvm_vcpu_get_esr(vcpu); int Rt = kvm_vcpu_sys_get_rt(vcpu); params.is_aarch32 = true; params.is_32bit = true; - params.CRm = (hsr >> 1) & 0xf; + params.CRm = (esr >> 1) & 0xf; params.regval = vcpu_get_reg(vcpu, Rt); - params.is_write = ((hsr & 1) == 0); - params.CRn = (hsr >> 10) & 0xf; + params.is_write = ((esr & 1) == 0); + params.CRn = (esr >> 10) & 0xf; params.Op0 = 0; - params.Op1 = (hsr >> 14) & 0x7; - params.Op2 = (hsr >> 17) & 0x7; + params.Op1 = (esr >> 14) & 0x7; + params.Op2 = (esr >> 17) & 0x7; - if (!emulate_cp(vcpu, ¶ms, target_specific, nr_specific) || - !emulate_cp(vcpu, ¶ms, global, nr_global)) { + if (!emulate_cp(vcpu, ¶ms, global, nr_global)) { if (!params.is_write) vcpu_set_reg(vcpu, Rt, params.regval); return 1; @@ -2337,38 +2329,22 @@ static int kvm_handle_cp_32(struct kvm_vcpu *vcpu, int kvm_handle_cp15_64(struct kvm_vcpu *vcpu) { - const struct sys_reg_desc *target_specific; - size_t num; - - target_specific = get_target_table(vcpu->arch.target, false, &num); - return kvm_handle_cp_64(vcpu, - cp15_64_regs, ARRAY_SIZE(cp15_64_regs), - target_specific, num); + return kvm_handle_cp_64(vcpu, cp15_64_regs, ARRAY_SIZE(cp15_64_regs)); } int kvm_handle_cp15_32(struct kvm_vcpu *vcpu) { - const struct sys_reg_desc *target_specific; - size_t num; - - target_specific = get_target_table(vcpu->arch.target, false, &num); - return kvm_handle_cp_32(vcpu, - cp15_regs, ARRAY_SIZE(cp15_regs), - target_specific, num); + return kvm_handle_cp_32(vcpu, cp15_regs, ARRAY_SIZE(cp15_regs)); } int kvm_handle_cp14_64(struct kvm_vcpu *vcpu) { - return kvm_handle_cp_64(vcpu, - cp14_64_regs, ARRAY_SIZE(cp14_64_regs), - NULL, 0); + return kvm_handle_cp_64(vcpu, cp14_64_regs, ARRAY_SIZE(cp14_64_regs)); } int kvm_handle_cp14_32(struct kvm_vcpu *vcpu) { - return kvm_handle_cp_32(vcpu, - cp14_regs, ARRAY_SIZE(cp14_regs), - NULL, 0); + return kvm_handle_cp_32(vcpu, cp14_regs, ARRAY_SIZE(cp14_regs)); } static bool is_imp_def_sys_reg(struct sys_reg_params *params) @@ -2380,15 +2356,9 @@ static bool is_imp_def_sys_reg(struct sys_reg_params *params) static int emulate_sys_reg(struct kvm_vcpu *vcpu, struct sys_reg_params *params) { - size_t num; - const struct sys_reg_desc *table, *r; + const struct sys_reg_desc *r; - table = get_target_table(vcpu->arch.target, true, &num); - - /* Search target-specific then generic table. */ - r = find_reg(params, table, num); - if (!r) - r = find_reg(params, sys_reg_descs, ARRAY_SIZE(sys_reg_descs)); + r = find_reg(params, sys_reg_descs, ARRAY_SIZE(sys_reg_descs)); if (likely(r)) { perform_access(vcpu, params, r); @@ -2403,14 +2373,20 @@ static int emulate_sys_reg(struct kvm_vcpu *vcpu, return 1; } -static void reset_sys_reg_descs(struct kvm_vcpu *vcpu, - const struct sys_reg_desc *table, size_t num) +/** + * kvm_reset_sys_regs - sets system registers to reset value + * @vcpu: The VCPU pointer + * + * This function finds the right table above and sets the registers on the + * virtual CPU struct to their architecturally defined reset values. + */ +void kvm_reset_sys_regs(struct kvm_vcpu *vcpu) { unsigned long i; - for (i = 0; i < num; i++) - if (table[i].reset) - table[i].reset(vcpu, &table[i]); + for (i = 0; i < ARRAY_SIZE(sys_reg_descs); i++) + if (sys_reg_descs[i].reset) + sys_reg_descs[i].reset(vcpu, &sys_reg_descs[i]); } /** @@ -2420,7 +2396,7 @@ static void reset_sys_reg_descs(struct kvm_vcpu *vcpu, int kvm_handle_sys_reg(struct kvm_vcpu *vcpu) { struct sys_reg_params params; - unsigned long esr = kvm_vcpu_get_hsr(vcpu); + unsigned long esr = kvm_vcpu_get_esr(vcpu); int Rt = kvm_vcpu_sys_get_rt(vcpu); int ret; @@ -2491,8 +2467,7 @@ const struct sys_reg_desc *find_reg_by_id(u64 id, static const struct sys_reg_desc *index_to_sys_reg_desc(struct kvm_vcpu *vcpu, u64 id) { - size_t num; - const struct sys_reg_desc *table, *r; + const struct sys_reg_desc *r; struct sys_reg_params params; /* We only do sys_reg for now. */ @@ -2502,10 +2477,7 @@ static const struct sys_reg_desc *index_to_sys_reg_desc(struct kvm_vcpu *vcpu, if (!index_to_params(id, ¶ms)) return NULL; - table = get_target_table(vcpu->arch.target, true, &num); - r = find_reg(¶ms, table, num); - if (!r) - r = find_reg(¶ms, sys_reg_descs, ARRAY_SIZE(sys_reg_descs)); + r = find_reg(¶ms, sys_reg_descs, ARRAY_SIZE(sys_reg_descs)); /* Not saved in the sys_reg array and not otherwise accessible? */ if (r && !(r->reg || r->get_user)) @@ -2805,35 +2777,17 @@ static int walk_one_sys_reg(const struct kvm_vcpu *vcpu, /* Assumed ordered tables, see kvm_sys_reg_table_init. */ static int walk_sys_regs(struct kvm_vcpu *vcpu, u64 __user *uind) { - const struct sys_reg_desc *i1, *i2, *end1, *end2; + const struct sys_reg_desc *i2, *end2; unsigned int total = 0; - size_t num; int err; - /* We check for duplicates here, to allow arch-specific overrides. */ - i1 = get_target_table(vcpu->arch.target, true, &num); - end1 = i1 + num; i2 = sys_reg_descs; end2 = sys_reg_descs + ARRAY_SIZE(sys_reg_descs); - BUG_ON(i1 == end1 || i2 == end2); - - /* Walk carefully, as both tables may refer to the same register. */ - while (i1 || i2) { - int cmp = cmp_sys_reg(i1, i2); - /* target-specific overrides generic entry. */ - if (cmp <= 0) - err = walk_one_sys_reg(vcpu, i1, &uind, &total); - else - err = walk_one_sys_reg(vcpu, i2, &uind, &total); - + while (i2 != end2) { + err = walk_one_sys_reg(vcpu, i2++, &uind, &total); if (err) return err; - - if (cmp <= 0 && ++i1 == end1) - i1 = NULL; - if (cmp >= 0 && ++i2 == end2) - i2 = NULL; } return total; } @@ -2900,22 +2854,3 @@ void kvm_sys_reg_table_init(void) /* Clear all higher bits. */ cache_levels &= (1 << (i*3))-1; } - -/** - * kvm_reset_sys_regs - sets system registers to reset value - * @vcpu: The VCPU pointer - * - * This function finds the right table above and sets the registers on the - * virtual CPU struct to their architecturally defined reset values. - */ -void kvm_reset_sys_regs(struct kvm_vcpu *vcpu) -{ - size_t num; - const struct sys_reg_desc *table; - - /* Generic chip reset first (so target could override). */ - reset_sys_reg_descs(vcpu, sys_reg_descs, ARRAY_SIZE(sys_reg_descs)); - - table = get_target_table(vcpu->arch.target, true, &num); - reset_sys_reg_descs(vcpu, table, num); -} diff --git a/arch/arm64/kvm/sys_regs_generic_v8.c b/arch/arm64/kvm/sys_regs_generic_v8.c deleted file mode 100644 index aa9d356451eb..000000000000 --- a/arch/arm64/kvm/sys_regs_generic_v8.c +++ /dev/null @@ -1,96 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0-only -/* - * Copyright (C) 2012,2013 - ARM Ltd - * Author: Marc Zyngier <marc.zyngier@arm.com> - * - * Based on arch/arm/kvm/coproc_a15.c: - * Copyright (C) 2012 - Virtual Open Systems and Columbia University - * Authors: Rusty Russell <rusty@rustcorp.au> - * Christoffer Dall <c.dall@virtualopensystems.com> - */ -#include <linux/kvm_host.h> -#include <asm/cputype.h> -#include <asm/kvm_arm.h> -#include <asm/kvm_asm.h> -#include <asm/kvm_emulate.h> -#include <asm/kvm_coproc.h> -#include <asm/sysreg.h> -#include <linux/init.h> - -#include "sys_regs.h" - -static bool access_actlr(struct kvm_vcpu *vcpu, - struct sys_reg_params *p, - const struct sys_reg_desc *r) -{ - if (p->is_write) - return ignore_write(vcpu, p); - - p->regval = vcpu_read_sys_reg(vcpu, ACTLR_EL1); - - if (p->is_aarch32) { - if (r->Op2 & 2) - p->regval = upper_32_bits(p->regval); - else - p->regval = lower_32_bits(p->regval); - } - - return true; -} - -static void reset_actlr(struct kvm_vcpu *vcpu, const struct sys_reg_desc *r) -{ - __vcpu_sys_reg(vcpu, ACTLR_EL1) = read_sysreg(actlr_el1); -} - -/* - * Implementation specific sys-reg registers. - * Important: Must be sorted ascending by Op0, Op1, CRn, CRm, Op2 - */ -static const struct sys_reg_desc genericv8_sys_regs[] = { - { SYS_DESC(SYS_ACTLR_EL1), access_actlr, reset_actlr, ACTLR_EL1 }, -}; - -static const struct sys_reg_desc genericv8_cp15_regs[] = { - /* ACTLR */ - { Op1(0b000), CRn(0b0001), CRm(0b0000), Op2(0b001), - access_actlr }, - { Op1(0b000), CRn(0b0001), CRm(0b0000), Op2(0b011), - access_actlr }, -}; - -static struct kvm_sys_reg_target_table genericv8_target_table = { - .table64 = { - .table = genericv8_sys_regs, - .num = ARRAY_SIZE(genericv8_sys_regs), - }, - .table32 = { - .table = genericv8_cp15_regs, - .num = ARRAY_SIZE(genericv8_cp15_regs), - }, -}; - -static int __init sys_reg_genericv8_init(void) -{ - unsigned int i; - - for (i = 1; i < ARRAY_SIZE(genericv8_sys_regs); i++) - BUG_ON(cmp_sys_reg(&genericv8_sys_regs[i-1], - &genericv8_sys_regs[i]) >= 0); - - kvm_register_target_sys_reg_table(KVM_ARM_TARGET_AEM_V8, - &genericv8_target_table); - kvm_register_target_sys_reg_table(KVM_ARM_TARGET_FOUNDATION_V8, - &genericv8_target_table); - kvm_register_target_sys_reg_table(KVM_ARM_TARGET_CORTEX_A53, - &genericv8_target_table); - kvm_register_target_sys_reg_table(KVM_ARM_TARGET_CORTEX_A57, - &genericv8_target_table); - kvm_register_target_sys_reg_table(KVM_ARM_TARGET_XGENE_POTENZA, - &genericv8_target_table); - kvm_register_target_sys_reg_table(KVM_ARM_TARGET_GENERIC_V8, - &genericv8_target_table); - - return 0; -} -late_initcall(sys_reg_genericv8_init); diff --git a/arch/arm64/kvm/trace_arm.h b/arch/arm64/kvm/trace_arm.h index 4c71270cc097..4691053c5ee4 100644 --- a/arch/arm64/kvm/trace_arm.h +++ b/arch/arm64/kvm/trace_arm.h @@ -301,8 +301,8 @@ TRACE_EVENT(kvm_timer_save_state, ), TP_fast_assign( - __entry->ctl = ctx->cnt_ctl; - __entry->cval = ctx->cnt_cval; + __entry->ctl = timer_get_ctl(ctx); + __entry->cval = timer_get_cval(ctx); __entry->timer_idx = arch_timer_ctx_index(ctx); ), @@ -323,8 +323,8 @@ TRACE_EVENT(kvm_timer_restore_state, ), TP_fast_assign( - __entry->ctl = ctx->cnt_ctl; - __entry->cval = ctx->cnt_cval; + __entry->ctl = timer_get_ctl(ctx); + __entry->cval = timer_get_cval(ctx); __entry->timer_idx = arch_timer_ctx_index(ctx); ), diff --git a/arch/arm64/kvm/va_layout.c b/arch/arm64/kvm/va_layout.c index a4f48c1ac28c..e0404bcab019 100644 --- a/arch/arm64/kvm/va_layout.c +++ b/arch/arm64/kvm/va_layout.c @@ -48,7 +48,7 @@ __init void kvm_compute_layout(void) va_mask = GENMASK_ULL(tag_lsb - 1, 0); tag_val = hyp_va_msb; - if (tag_lsb != (vabits_actual - 1)) { + if (IS_ENABLED(CONFIG_RANDOMIZE_BASE) && tag_lsb != (vabits_actual - 1)) { /* We have some free bits to insert a random tag. */ tag_val |= get_random_long() & GENMASK_ULL(vabits_actual - 2, tag_lsb); } diff --git a/arch/arm64/kvm/vgic/vgic-irqfd.c b/arch/arm64/kvm/vgic/vgic-irqfd.c index d8cdfea5cc96..79f8899b234c 100644 --- a/arch/arm64/kvm/vgic/vgic-irqfd.c +++ b/arch/arm64/kvm/vgic/vgic-irqfd.c @@ -100,19 +100,33 @@ int kvm_set_msi(struct kvm_kernel_irq_routing_entry *e, /** * kvm_arch_set_irq_inatomic: fast-path for irqfd injection - * - * Currently only direct MSI injection is supported. */ int kvm_arch_set_irq_inatomic(struct kvm_kernel_irq_routing_entry *e, struct kvm *kvm, int irq_source_id, int level, bool line_status) { - if (e->type == KVM_IRQ_ROUTING_MSI && vgic_has_its(kvm) && level) { + if (!level) + return -EWOULDBLOCK; + + switch (e->type) { + case KVM_IRQ_ROUTING_MSI: { struct kvm_msi msi; + if (!vgic_has_its(kvm)) + break; + kvm_populate_msi(e, &msi); - if (!vgic_its_inject_cached_translation(kvm, &msi)) - return 0; + return vgic_its_inject_cached_translation(kvm, &msi); + } + + case KVM_IRQ_ROUTING_IRQCHIP: + /* + * Injecting SPIs is always possible in atomic context + * as long as the damn vgic is initialized. + */ + if (unlikely(!vgic_initialized(kvm))) + break; + return vgic_irqfd_set_irq(e, kvm, irq_source_id, 1, line_status); } return -EWOULDBLOCK; diff --git a/arch/arm64/kvm/vgic/vgic-its.c b/arch/arm64/kvm/vgic/vgic-its.c index c012a52b19f5..40cbaca81333 100644 --- a/arch/arm64/kvm/vgic/vgic-its.c +++ b/arch/arm64/kvm/vgic/vgic-its.c @@ -757,9 +757,8 @@ int vgic_its_inject_cached_translation(struct kvm *kvm, struct kvm_msi *msi) db = (u64)msi->address_hi << 32 | msi->address_lo; irq = vgic_its_check_cache(kvm, db, msi->devid, msi->data); - if (!irq) - return -1; + return -EWOULDBLOCK; raw_spin_lock_irqsave(&irq->irq_lock, flags); irq->pending_latch = true; diff --git a/arch/arm64/kvm/vgic/vgic-mmio-v3.c b/arch/arm64/kvm/vgic/vgic-mmio-v3.c index d2339a2b9fb9..5c786b915cd3 100644 --- a/arch/arm64/kvm/vgic/vgic-mmio-v3.c +++ b/arch/arm64/kvm/vgic/vgic-mmio-v3.c @@ -389,7 +389,7 @@ u64 vgic_sanitise_outer_cacheability(u64 field) case GIC_BASER_CACHE_nC: return field; default: - return GIC_BASER_CACHE_nC; + return GIC_BASER_CACHE_SameAsInner; } } diff --git a/arch/arm64/mm/fault.c b/arch/arm64/mm/fault.c index 8afb238ff335..f07333e86c2f 100644 --- a/arch/arm64/mm/fault.c +++ b/arch/arm64/mm/fault.c @@ -404,7 +404,8 @@ static void do_bad_area(unsigned long addr, unsigned int esr, struct pt_regs *re #define VM_FAULT_BADACCESS 0x020000 static vm_fault_t __do_page_fault(struct mm_struct *mm, unsigned long addr, - unsigned int mm_flags, unsigned long vm_flags) + unsigned int mm_flags, unsigned long vm_flags, + struct pt_regs *regs) { struct vm_area_struct *vma = find_vma(mm, addr); @@ -428,7 +429,7 @@ static vm_fault_t __do_page_fault(struct mm_struct *mm, unsigned long addr, */ if (!(vma->vm_flags & vm_flags)) return VM_FAULT_BADACCESS; - return handle_mm_fault(vma, addr & PAGE_MASK, mm_flags); + return handle_mm_fault(vma, addr & PAGE_MASK, mm_flags, regs); } static bool is_el0_instruction_abort(unsigned int esr) @@ -450,7 +451,7 @@ static int __kprobes do_page_fault(unsigned long addr, unsigned int esr, { const struct fault_info *inf; struct mm_struct *mm = current->mm; - vm_fault_t fault, major = 0; + vm_fault_t fault; unsigned long vm_flags = VM_ACCESS_FLAGS; unsigned int mm_flags = FAULT_FLAG_DEFAULT; @@ -516,8 +517,7 @@ retry: #endif } - fault = __do_page_fault(mm, addr, mm_flags, vm_flags); - major |= fault & VM_FAULT_MAJOR; + fault = __do_page_fault(mm, addr, mm_flags, vm_flags, regs); /* Quick path to respond to signals */ if (fault_signal_pending(fault, regs)) { @@ -538,25 +538,8 @@ retry: * Handle the "normal" (no error) case first. */ if (likely(!(fault & (VM_FAULT_ERROR | VM_FAULT_BADMAP | - VM_FAULT_BADACCESS)))) { - /* - * Major/minor page fault accounting is only done - * once. If we go through a retry, it is extremely - * likely that the page will be found in page cache at - * that point. - */ - if (major) { - current->maj_flt++; - perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS_MAJ, 1, regs, - addr); - } else { - current->min_flt++; - perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS_MIN, 1, regs, - addr); - } - + VM_FAULT_BADACCESS)))) return 0; - } /* * If we are in kernel mode at this point, we have no context to diff --git a/arch/arm64/mm/numa.c b/arch/arm64/mm/numa.c index aafcee3e3f7e..73f8b49d485c 100644 --- a/arch/arm64/mm/numa.c +++ b/arch/arm64/mm/numa.c @@ -461,13 +461,3 @@ void __init arm64_numa_init(void) numa_init(dummy_numa_init); } - -/* - * We hope that we will be hotplugging memory on nodes we already know about, - * such that acpi_get_node() succeeds and we never fall back to this... - */ -int memory_add_physaddr_to_nid(u64 addr) -{ - pr_warn("Unknown node for memory at 0x%llx, assuming node 0\n", addr); - return 0; -} diff --git a/arch/csky/include/asm/segment.h b/arch/csky/include/asm/segment.h index db2640d5f575..79ede9b1a646 100644 --- a/arch/csky/include/asm/segment.h +++ b/arch/csky/include/asm/segment.h @@ -13,6 +13,6 @@ typedef struct { #define USER_DS ((mm_segment_t) { 0x80000000UL }) #define get_fs() (current_thread_info()->addr_limit) #define set_fs(x) (current_thread_info()->addr_limit = (x)) -#define segment_eq(a, b) ((a).seg == (b).seg) +#define uaccess_kernel() (get_fs().seg == KERNEL_DS.seg) #endif /* __ASM_CSKY_SEGMENT_H */ diff --git a/arch/csky/mm/fault.c b/arch/csky/mm/fault.c index b1dce9f2f04d..081b178b41b1 100644 --- a/arch/csky/mm/fault.c +++ b/arch/csky/mm/fault.c @@ -150,7 +150,8 @@ good_area: * make sure we exit gracefully rather than endlessly redo * the fault. */ - fault = handle_mm_fault(vma, address, write ? FAULT_FLAG_WRITE : 0); + fault = handle_mm_fault(vma, address, write ? FAULT_FLAG_WRITE : 0, + regs); if (unlikely(fault & VM_FAULT_ERROR)) { if (fault & VM_FAULT_OOM) goto out_of_memory; @@ -160,16 +161,6 @@ good_area: goto bad_area; BUG(); } - if (fault & VM_FAULT_MAJOR) { - tsk->maj_flt++; - perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS_MAJ, 1, regs, - address); - } else { - tsk->min_flt++; - perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS_MIN, 1, regs, - address); - } - mmap_read_unlock(mm); return; diff --git a/arch/h8300/include/asm/segment.h b/arch/h8300/include/asm/segment.h index a407978f9f9f..37950725d9b9 100644 --- a/arch/h8300/include/asm/segment.h +++ b/arch/h8300/include/asm/segment.h @@ -33,7 +33,7 @@ static inline mm_segment_t get_fs(void) return USER_DS; } -#define segment_eq(a, b) ((a).seg == (b).seg) +#define uaccess_kernel() (get_fs().seg == KERNEL_DS.seg) #endif /* __ASSEMBLY__ */ diff --git a/arch/hexagon/mm/vm_fault.c b/arch/hexagon/mm/vm_fault.c index cd3808f96b93..ef32c5a84ff3 100644 --- a/arch/hexagon/mm/vm_fault.c +++ b/arch/hexagon/mm/vm_fault.c @@ -18,6 +18,7 @@ #include <linux/signal.h> #include <linux/extable.h> #include <linux/hardirq.h> +#include <linux/perf_event.h> /* * Decode of hardware exception sends us to one of several @@ -53,6 +54,8 @@ void do_page_fault(unsigned long address, long cause, struct pt_regs *regs) if (user_mode(regs)) flags |= FAULT_FLAG_USER; + + perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS, 1, regs, address); retry: mmap_read_lock(mm); vma = find_vma(mm, address); @@ -88,7 +91,7 @@ good_area: break; } - fault = handle_mm_fault(vma, address, flags); + fault = handle_mm_fault(vma, address, flags, regs); if (fault_signal_pending(fault, regs)) return; @@ -96,10 +99,6 @@ good_area: /* The most common case -- we are done. */ if (likely(!(fault & VM_FAULT_ERROR))) { if (flags & FAULT_FLAG_ALLOW_RETRY) { - if (fault & VM_FAULT_MAJOR) - current->maj_flt++; - else - current->min_flt++; if (fault & VM_FAULT_RETRY) { flags |= FAULT_FLAG_TRIED; goto retry; diff --git a/arch/ia64/include/asm/uaccess.h b/arch/ia64/include/asm/uaccess.h index 8aa473a4b0f4..179243c3dfc7 100644 --- a/arch/ia64/include/asm/uaccess.h +++ b/arch/ia64/include/asm/uaccess.h @@ -50,7 +50,7 @@ #define get_fs() (current_thread_info()->addr_limit) #define set_fs(x) (current_thread_info()->addr_limit = (x)) -#define segment_eq(a, b) ((a).seg == (b).seg) +#define uaccess_kernel() (get_fs().seg == KERNEL_DS.seg) /* * When accessing user memory, we need to make sure the entire area really is in diff --git a/arch/ia64/kernel/syscalls/syscall.tbl b/arch/ia64/kernel/syscalls/syscall.tbl index ced9c83e47c9..f52a41f4c340 100644 --- a/arch/ia64/kernel/syscalls/syscall.tbl +++ b/arch/ia64/kernel/syscalls/syscall.tbl @@ -135,7 +135,7 @@ 123 common writev sys_writev 124 common pread64 sys_pread64 125 common pwrite64 sys_pwrite64 -126 common _sysctl sys_sysctl +126 common _sysctl sys_ni_syscall 127 common mmap sys_mmap 128 common munmap sys_munmap 129 common mlock sys_mlock diff --git a/arch/ia64/mm/fault.c b/arch/ia64/mm/fault.c index 3a4dec334cc5..cd9766d2b6e0 100644 --- a/arch/ia64/mm/fault.c +++ b/arch/ia64/mm/fault.c @@ -14,6 +14,7 @@ #include <linux/kdebug.h> #include <linux/prefetch.h> #include <linux/uaccess.h> +#include <linux/perf_event.h> #include <asm/processor.h> #include <asm/exception.h> @@ -105,6 +106,8 @@ ia64_do_page_fault (unsigned long address, unsigned long isr, struct pt_regs *re flags |= FAULT_FLAG_USER; if (mask & VM_WRITE) flags |= FAULT_FLAG_WRITE; + + perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS, 1, regs, address); retry: mmap_read_lock(mm); @@ -143,7 +146,7 @@ retry: * sure we exit gracefully rather than endlessly redo the * fault. */ - fault = handle_mm_fault(vma, address, flags); + fault = handle_mm_fault(vma, address, flags, regs); if (fault_signal_pending(fault, regs)) return; @@ -166,10 +169,6 @@ retry: } if (flags & FAULT_FLAG_ALLOW_RETRY) { - if (fault & VM_FAULT_MAJOR) - current->maj_flt++; - else - current->min_flt++; if (fault & VM_FAULT_RETRY) { flags |= FAULT_FLAG_TRIED; diff --git a/arch/ia64/mm/numa.c b/arch/ia64/mm/numa.c index 5e1015eb6d0d..f34964271101 100644 --- a/arch/ia64/mm/numa.c +++ b/arch/ia64/mm/numa.c @@ -106,7 +106,5 @@ int memory_add_physaddr_to_nid(u64 addr) return 0; return nid; } - -EXPORT_SYMBOL_GPL(memory_add_physaddr_to_nid); #endif #endif diff --git a/arch/m68k/Kconfig b/arch/m68k/Kconfig index 6663f1741798..6f2f38d05772 100644 --- a/arch/m68k/Kconfig +++ b/arch/m68k/Kconfig @@ -16,6 +16,7 @@ config M68K select HAVE_DEBUG_BUGVERBOSE select GENERIC_IRQ_SHOW select GENERIC_ATOMIC64 + select NO_DMA if !MMU && !COLDFIRE select HAVE_UID16 select VIRT_TO_BUS select ARCH_HAVE_NMI_SAFE_CMPXCHG if RMW_INSNS @@ -59,9 +60,6 @@ config TIME_LOW_RES config NO_IOPORT_MAP def_bool y -config NO_DMA - def_bool (MMU && SUN3) || (!MMU && !COLDFIRE) - config ZONE_DMA bool default y diff --git a/arch/m68k/Kconfig.machine b/arch/m68k/Kconfig.machine index a82651d58af4..17e8c3a292d7 100644 --- a/arch/m68k/Kconfig.machine +++ b/arch/m68k/Kconfig.machine @@ -126,6 +126,7 @@ config SUN3 depends on MMU depends on !MMU_MOTOROLA select MMU_SUN3 if MMU + select NO_DMA select M68020 help This option enables support for the Sun 3 series of workstations diff --git a/arch/m68k/include/asm/segment.h b/arch/m68k/include/asm/segment.h index c6686559e9b7..2b5e68a71ef7 100644 --- a/arch/m68k/include/asm/segment.h +++ b/arch/m68k/include/asm/segment.h @@ -52,7 +52,7 @@ static inline void set_fs(mm_segment_t val) #define set_fs(x) (current_thread_info()->addr_limit = (x)) #endif -#define segment_eq(a, b) ((a).seg == (b).seg) +#define uaccess_kernel() (get_fs().seg == KERNEL_DS.seg) #endif /* __ASSEMBLY__ */ diff --git a/arch/m68k/include/asm/tlbflush.h b/arch/m68k/include/asm/tlbflush.h index 191e75a6bb24..5337bc2c262f 100644 --- a/arch/m68k/include/asm/tlbflush.h +++ b/arch/m68k/include/asm/tlbflush.h @@ -85,10 +85,10 @@ static inline void flush_tlb_mm(struct mm_struct *mm) static inline void flush_tlb_page(struct vm_area_struct *vma, unsigned long addr) { if (vma->vm_mm == current->active_mm) { - mm_segment_t old_fs = get_fs(); - set_fs(USER_DS); + mm_segment_t old_fs = force_uaccess_begin(); + __flush_tlb_one(addr); - set_fs(old_fs); + force_uaccess_end(old_fs); } } diff --git a/arch/m68k/kernel/syscalls/syscall.tbl b/arch/m68k/kernel/syscalls/syscall.tbl index 1a4822de7292..81fc799d8392 100644 --- a/arch/m68k/kernel/syscalls/syscall.tbl +++ b/arch/m68k/kernel/syscalls/syscall.tbl @@ -156,7 +156,7 @@ 146 common writev sys_writev 147 common getsid sys_getsid 148 common fdatasync sys_fdatasync -149 common _sysctl sys_sysctl +149 common _sysctl sys_ni_syscall 150 common mlock sys_mlock 151 common munlock sys_munlock 152 common mlockall sys_mlockall diff --git a/arch/m68k/mm/fault.c b/arch/m68k/mm/fault.c index 508abb63da67..795f483b1050 100644 --- a/arch/m68k/mm/fault.c +++ b/arch/m68k/mm/fault.c @@ -12,6 +12,7 @@ #include <linux/interrupt.h> #include <linux/module.h> #include <linux/uaccess.h> +#include <linux/perf_event.h> #include <asm/setup.h> #include <asm/traps.h> @@ -84,6 +85,8 @@ int do_page_fault(struct pt_regs *regs, unsigned long address, if (user_mode(regs)) flags |= FAULT_FLAG_USER; + + perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS, 1, regs, address); retry: mmap_read_lock(mm); @@ -134,7 +137,7 @@ good_area: * the fault. */ - fault = handle_mm_fault(vma, address, flags); + fault = handle_mm_fault(vma, address, flags, regs); pr_debug("handle_mm_fault returns %x\n", fault); if (fault_signal_pending(fault, regs)) @@ -150,16 +153,7 @@ good_area: BUG(); } - /* - * Major/minor page fault accounting is only done on the - * initial attempt. If we go through a retry, it is extremely - * likely that the page will be found in page cache at that point. - */ if (flags & FAULT_FLAG_ALLOW_RETRY) { - if (fault & VM_FAULT_MAJOR) - current->maj_flt++; - else - current->min_flt++; if (fault & VM_FAULT_RETRY) { flags |= FAULT_FLAG_TRIED; diff --git a/arch/microblaze/include/asm/uaccess.h b/arch/microblaze/include/asm/uaccess.h index 6723c56ec378..304b04ffea2f 100644 --- a/arch/microblaze/include/asm/uaccess.h +++ b/arch/microblaze/include/asm/uaccess.h @@ -41,7 +41,7 @@ # define get_fs() (current_thread_info()->addr_limit) # define set_fs(val) (current_thread_info()->addr_limit = (val)) -# define segment_eq(a, b) ((a).seg == (b).seg) +# define uaccess_kernel() (get_fs().seg == KERNEL_DS.seg) #ifndef CONFIG_MMU diff --git a/arch/microblaze/kernel/syscalls/syscall.tbl b/arch/microblaze/kernel/syscalls/syscall.tbl index a3f4be8e7238..b4e263916f41 100644 --- a/arch/microblaze/kernel/syscalls/syscall.tbl +++ b/arch/microblaze/kernel/syscalls/syscall.tbl @@ -156,7 +156,7 @@ 146 common writev sys_writev 147 common getsid sys_getsid 148 common fdatasync sys_fdatasync -149 common _sysctl sys_sysctl +149 common _sysctl sys_ni_syscall 150 common mlock sys_mlock 151 common munlock sys_munlock 152 common mlockall sys_mlockall diff --git a/arch/microblaze/mm/fault.c b/arch/microblaze/mm/fault.c index a2bfe587b491..b3fed2cecf84 100644 --- a/arch/microblaze/mm/fault.c +++ b/arch/microblaze/mm/fault.c @@ -28,6 +28,7 @@ #include <linux/mman.h> #include <linux/mm.h> #include <linux/interrupt.h> +#include <linux/perf_event.h> #include <asm/page.h> #include <asm/mmu.h> @@ -121,6 +122,8 @@ void do_page_fault(struct pt_regs *regs, unsigned long address, if (user_mode(regs)) flags |= FAULT_FLAG_USER; + perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS, 1, regs, address); + /* When running in the kernel we expect faults to occur only to * addresses in user space. All other faults represent errors in the * kernel and should generate an OOPS. Unfortunately, in the case of an @@ -214,7 +217,7 @@ good_area: * make sure we exit gracefully rather than endlessly redo * the fault. */ - fault = handle_mm_fault(vma, address, flags); + fault = handle_mm_fault(vma, address, flags, regs); if (fault_signal_pending(fault, regs)) return; @@ -230,10 +233,6 @@ good_area: } if (flags & FAULT_FLAG_ALLOW_RETRY) { - if (unlikely(fault & VM_FAULT_MAJOR)) - current->maj_flt++; - else - current->min_flt++; if (fault & VM_FAULT_RETRY) { flags |= FAULT_FLAG_TRIED; diff --git a/arch/mips/configs/cu1000-neo_defconfig b/arch/mips/configs/cu1000-neo_defconfig index 6b471cdb16cf..e924c817f73d 100644 --- a/arch/mips/configs/cu1000-neo_defconfig +++ b/arch/mips/configs/cu1000-neo_defconfig @@ -17,7 +17,6 @@ CONFIG_CGROUP_CPUACCT=y CONFIG_NAMESPACES=y CONFIG_USER_NS=y CONFIG_CC_OPTIMIZE_FOR_SIZE=y -CONFIG_SYSCTL_SYSCALL=y CONFIG_KALLSYMS_ALL=y CONFIG_EMBEDDED=y # CONFIG_VM_EVENT_COUNTERS is not set diff --git a/arch/mips/include/asm/uaccess.h b/arch/mips/include/asm/uaccess.h index 62b298c50905..61fc01f177a6 100644 --- a/arch/mips/include/asm/uaccess.h +++ b/arch/mips/include/asm/uaccess.h @@ -72,7 +72,7 @@ extern u64 __ua_limit; #define get_fs() (current_thread_info()->addr_limit) #define set_fs(x) (current_thread_info()->addr_limit = (x)) -#define segment_eq(a, b) ((a).seg == (b).seg) +#define uaccess_kernel() (get_fs().seg == KERNEL_DS.seg) /* * eva_kernel_access() - determine whether kernel memory access on an EVA system diff --git a/arch/mips/include/asm/vdso/gettimeofday.h b/arch/mips/include/asm/vdso/gettimeofday.h index c63ddcaea54c..2203e2d0ae2a 100644 --- a/arch/mips/include/asm/vdso/gettimeofday.h +++ b/arch/mips/include/asm/vdso/gettimeofday.h @@ -167,7 +167,8 @@ static __always_inline u64 read_gic_count(const struct vdso_data *data) #endif -static __always_inline u64 __arch_get_hw_counter(s32 clock_mode) +static __always_inline u64 __arch_get_hw_counter(s32 clock_mode, + const struct vdso_data *vd) { #ifdef CONFIG_CSRC_R4K if (clock_mode == VDSO_CLOCKMODE_R4K) @@ -175,7 +176,7 @@ static __always_inline u64 __arch_get_hw_counter(s32 clock_mode) #endif #ifdef CONFIG_CLKSRC_MIPS_GIC if (clock_mode == VDSO_CLOCKMODE_GIC) - return read_gic_count(get_vdso_data()); + return read_gic_count(vd); #endif /* * Core checks mode already. So this raced against a concurrent diff --git a/arch/mips/kernel/syscalls/syscall_n32.tbl b/arch/mips/kernel/syscalls/syscall_n32.tbl index 6b4ee92e3aed..f9df9edb67a4 100644 --- a/arch/mips/kernel/syscalls/syscall_n32.tbl +++ b/arch/mips/kernel/syscalls/syscall_n32.tbl @@ -159,7 +159,7 @@ 149 n32 munlockall sys_munlockall 150 n32 vhangup sys_vhangup 151 n32 pivot_root sys_pivot_root -152 n32 _sysctl compat_sys_sysctl +152 n32 _sysctl sys_ni_syscall 153 n32 prctl sys_prctl 154 n32 adjtimex sys_adjtimex_time32 155 n32 setrlimit compat_sys_setrlimit diff --git a/arch/mips/kernel/syscalls/syscall_n64.tbl b/arch/mips/kernel/syscalls/syscall_n64.tbl index 391acbf425a0..557f9954a2b9 100644 --- a/arch/mips/kernel/syscalls/syscall_n64.tbl +++ b/arch/mips/kernel/syscalls/syscall_n64.tbl @@ -159,7 +159,7 @@ 149 n64 munlockall sys_munlockall 150 n64 vhangup sys_vhangup 151 n64 pivot_root sys_pivot_root -152 n64 _sysctl sys_sysctl +152 n64 _sysctl sys_ni_syscall 153 n64 prctl sys_prctl 154 n64 adjtimex sys_adjtimex 155 n64 setrlimit sys_setrlimit diff --git a/arch/mips/kernel/syscalls/syscall_o32.tbl b/arch/mips/kernel/syscalls/syscall_o32.tbl index 5727c5187508..195b43cf27c8 100644 --- a/arch/mips/kernel/syscalls/syscall_o32.tbl +++ b/arch/mips/kernel/syscalls/syscall_o32.tbl @@ -164,7 +164,7 @@ 150 o32 unused150 sys_ni_syscall 151 o32 getsid sys_getsid 152 o32 fdatasync sys_fdatasync -153 o32 _sysctl sys_sysctl compat_sys_sysctl +153 o32 _sysctl sys_ni_syscall 154 o32 mlock sys_mlock 155 o32 munlock sys_munlock 156 o32 mlockall sys_mlockall diff --git a/arch/mips/kernel/unaligned.c b/arch/mips/kernel/unaligned.c index 0adce604fa44..126a5f3f4e4c 100644 --- a/arch/mips/kernel/unaligned.c +++ b/arch/mips/kernel/unaligned.c @@ -191,17 +191,16 @@ static void emulate_load_store_insn(struct pt_regs *regs, * memory, so we need to "switch" the address limit to * user space, so that address check can work properly. */ - seg = get_fs(); - set_fs(USER_DS); + seg = force_uaccess_begin(); switch (insn.spec3_format.func) { case lhe_op: if (!access_ok(addr, 2)) { - set_fs(seg); + force_uaccess_end(seg); goto sigbus; } LoadHWE(addr, value, res); if (res) { - set_fs(seg); + force_uaccess_end(seg); goto fault; } compute_return_epc(regs); @@ -209,12 +208,12 @@ static void emulate_load_store_insn(struct pt_regs *regs, break; case lwe_op: if (!access_ok(addr, 4)) { - set_fs(seg); + force_uaccess_end(seg); goto sigbus; } LoadWE(addr, value, res); if (res) { - set_fs(seg); + force_uaccess_end(seg); goto fault; } compute_return_epc(regs); @@ -222,12 +221,12 @@ static void emulate_load_store_insn(struct pt_regs *regs, break; case lhue_op: if (!access_ok(addr, 2)) { - set_fs(seg); + force_uaccess_end(seg); goto sigbus; } LoadHWUE(addr, value, res); if (res) { - set_fs(seg); + force_uaccess_end(seg); goto fault; } compute_return_epc(regs); @@ -235,35 +234,35 @@ static void emulate_load_store_insn(struct pt_regs *regs, break; case she_op: if (!access_ok(addr, 2)) { - set_fs(seg); + force_uaccess_end(seg); goto sigbus; } compute_return_epc(regs); value = regs->regs[insn.spec3_format.rt]; StoreHWE(addr, value, res); if (res) { - set_fs(seg); + force_uaccess_end(seg); goto fault; } break; case swe_op: if (!access_ok(addr, 4)) { - set_fs(seg); + force_uaccess_end(seg); goto sigbus; } compute_return_epc(regs); value = regs->regs[insn.spec3_format.rt]; StoreWE(addr, value, res); if (res) { - set_fs(seg); + force_uaccess_end(seg); goto fault; } break; default: - set_fs(seg); + force_uaccess_end(seg); goto sigill; } - set_fs(seg); + force_uaccess_end(seg); } #endif break; diff --git a/arch/mips/kvm/emulate.c b/arch/mips/kvm/emulate.c index 703782355318..d70c4f8e14e2 100644 --- a/arch/mips/kvm/emulate.c +++ b/arch/mips/kvm/emulate.c @@ -1935,7 +1935,7 @@ enum emulation_result kvm_mips_emulate_load(union mips_instruction inst, case lwu_op: vcpu->mmio_needed = 1; /* unsigned */ - /* fall through */ + fallthrough; #endif case lw_op: run->mmio.len = 4; diff --git a/arch/mips/kvm/vz.c b/arch/mips/kvm/vz.c index 3932f767e938..c299e5d6d69c 100644 --- a/arch/mips/kvm/vz.c +++ b/arch/mips/kvm/vz.c @@ -29,7 +29,9 @@ #include <linux/kvm_host.h> #include "interrupt.h" +#ifdef CONFIG_CPU_LOONGSON64 #include "loongson_regs.h" +#endif #include "trace.h" @@ -1142,7 +1144,6 @@ static enum emulation_result kvm_vz_gpsi_cache(union mips_instruction inst, #ifdef CONFIG_CPU_LOONGSON64 static enum emulation_result kvm_vz_gpsi_lwc2(union mips_instruction inst, u32 *opc, u32 cause, - struct kvm_run *run, struct kvm_vcpu *vcpu) { unsigned int rs, rd; @@ -1240,7 +1241,7 @@ static enum emulation_result kvm_trap_vz_handle_gpsi(u32 cause, u32 *opc, #endif #ifdef CONFIG_CPU_LOONGSON64 case lwc2_op: - er = kvm_vz_gpsi_lwc2(inst, opc, cause, run, vcpu); + er = kvm_vz_gpsi_lwc2(inst, opc, cause, vcpu); break; #endif case spec3_op: diff --git a/arch/mips/mm/fault.c b/arch/mips/mm/fault.c index 01b168a90434..7c871b14e74a 100644 --- a/arch/mips/mm/fault.c +++ b/arch/mips/mm/fault.c @@ -96,6 +96,8 @@ static void __kprobes __do_page_fault(struct pt_regs *regs, unsigned long write, if (user_mode(regs)) flags |= FAULT_FLAG_USER; + + perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS, 1, regs, address); retry: mmap_read_lock(mm); vma = find_vma(mm, address); @@ -152,12 +154,11 @@ good_area: * make sure we exit gracefully rather than endlessly redo * the fault. */ - fault = handle_mm_fault(vma, address, flags); + fault = handle_mm_fault(vma, address, flags, regs); if (fault_signal_pending(fault, regs)) return; - perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS, 1, regs, address); if (unlikely(fault & VM_FAULT_ERROR)) { if (fault & VM_FAULT_OOM) goto out_of_memory; @@ -168,15 +169,6 @@ good_area: BUG(); } if (flags & FAULT_FLAG_ALLOW_RETRY) { - if (fault & VM_FAULT_MAJOR) { - perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS_MAJ, 1, - regs, address); - tsk->maj_flt++; - } else { - perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS_MIN, 1, - regs, address); - tsk->min_flt++; - } if (fault & VM_FAULT_RETRY) { flags |= FAULT_FLAG_TRIED; diff --git a/arch/nds32/include/asm/uaccess.h b/arch/nds32/include/asm/uaccess.h index 3a9219f53ee0..010ba5f1d7dd 100644 --- a/arch/nds32/include/asm/uaccess.h +++ b/arch/nds32/include/asm/uaccess.h @@ -44,7 +44,7 @@ static inline void set_fs(mm_segment_t fs) current_thread_info()->addr_limit = fs; } -#define segment_eq(a, b) ((a) == (b)) +#define uaccess_kernel() (get_fs() == KERNEL_DS) #define __range_ok(addr, size) (size <= get_fs() && addr <= (get_fs() -size)) diff --git a/arch/nds32/kernel/process.c b/arch/nds32/kernel/process.c index e85bbbadc0e7..e01ad5d17224 100644 --- a/arch/nds32/kernel/process.c +++ b/arch/nds32/kernel/process.c @@ -121,7 +121,7 @@ void show_regs(struct pt_regs *regs) regs->uregs[3], regs->uregs[2], regs->uregs[1], regs->uregs[0]); pr_info(" IRQs o%s Segment %s\n", interrupts_enabled(regs) ? "n" : "ff", - segment_eq(get_fs(), KERNEL_DS)? "kernel" : "user"); + uaccess_kernel() ? "kernel" : "user"); } EXPORT_SYMBOL(show_regs); diff --git a/arch/nds32/mm/alignment.c b/arch/nds32/mm/alignment.c index c8b9061a2ee3..1eb7ded6992b 100644 --- a/arch/nds32/mm/alignment.c +++ b/arch/nds32/mm/alignment.c @@ -512,7 +512,7 @@ int do_unaligned_access(unsigned long addr, struct pt_regs *regs) { unsigned long inst; int ret = -EFAULT; - mm_segment_t seg = get_fs(); + mm_segment_t seg; inst = get_inst(regs->ipc); @@ -520,13 +520,12 @@ int do_unaligned_access(unsigned long addr, struct pt_regs *regs) "Faulting addr: 0x%08lx, pc: 0x%08lx [inst: 0x%08lx ]\n", addr, regs->ipc, inst); - set_fs(USER_DS); - + seg = force_uaccess_begin(); if (inst & NDS32_16BIT_INSTRUCTION) ret = do_16((inst >> 16) & 0xffff, regs); else ret = do_32(inst, regs); - set_fs(seg); + force_uaccess_end(seg); return ret; } diff --git a/arch/nds32/mm/fault.c b/arch/nds32/mm/fault.c index 8fb73f6401a0..f02524eb6d56 100644 --- a/arch/nds32/mm/fault.c +++ b/arch/nds32/mm/fault.c @@ -121,6 +121,8 @@ void do_page_fault(unsigned long entry, unsigned long addr, if (unlikely(faulthandler_disabled() || !mm)) goto no_context; + perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS, 1, regs, addr); + /* * As per x86, we may deadlock here. However, since the kernel only * validly references user space from well defined areas of the code, @@ -206,7 +208,7 @@ good_area: * the fault. */ - fault = handle_mm_fault(vma, addr, flags); + fault = handle_mm_fault(vma, addr, flags, regs); /* * If we need to retry but a fatal signal is pending, handle the @@ -228,22 +230,7 @@ good_area: goto bad_area; } - /* - * Major/minor page fault accounting is only done on the initial - * attempt. If we go through a retry, it is extremely likely that the - * page will be found in page cache at that point. - */ - perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS, 1, regs, addr); if (flags & FAULT_FLAG_ALLOW_RETRY) { - if (fault & VM_FAULT_MAJOR) { - tsk->maj_flt++; - perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS_MAJ, - 1, regs, addr); - } else { - tsk->min_flt++; - perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS_MIN, - 1, regs, addr); - } if (fault & VM_FAULT_RETRY) { flags |= FAULT_FLAG_TRIED; diff --git a/arch/nios2/include/asm/uaccess.h b/arch/nios2/include/asm/uaccess.h index e83f831a76f9..a741abbed6fb 100644 --- a/arch/nios2/include/asm/uaccess.h +++ b/arch/nios2/include/asm/uaccess.h @@ -30,7 +30,7 @@ #define get_fs() (current_thread_info()->addr_limit) #define set_fs(seg) (current_thread_info()->addr_limit = (seg)) -#define segment_eq(a, b) ((a).seg == (b).seg) +#define uaccess_kernel() (get_fs().seg == KERNEL_DS.seg) #define __access_ok(addr, len) \ (((signed long)(((long)get_fs().seg) & \ diff --git a/arch/nios2/mm/fault.c b/arch/nios2/mm/fault.c index 4112ef0e247e..9476feecf512 100644 --- a/arch/nios2/mm/fault.c +++ b/arch/nios2/mm/fault.c @@ -24,6 +24,7 @@ #include <linux/mm.h> #include <linux/extable.h> #include <linux/uaccess.h> +#include <linux/perf_event.h> #include <asm/mmu_context.h> #include <asm/traps.h> @@ -83,6 +84,8 @@ asmlinkage void do_page_fault(struct pt_regs *regs, unsigned long cause, if (user_mode(regs)) flags |= FAULT_FLAG_USER; + perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS, 1, regs, address); + if (!mmap_read_trylock(mm)) { if (!user_mode(regs) && !search_exception_tables(regs->ea)) goto bad_area_nosemaphore; @@ -131,7 +134,7 @@ good_area: * make sure we exit gracefully rather than endlessly redo * the fault. */ - fault = handle_mm_fault(vma, address, flags); + fault = handle_mm_fault(vma, address, flags, regs); if (fault_signal_pending(fault, regs)) return; @@ -146,16 +149,7 @@ good_area: BUG(); } - /* - * Major/minor page fault accounting is only done on the - * initial attempt. If we go through a retry, it is extremely - * likely that the page will be found in page cache at that point. - */ if (flags & FAULT_FLAG_ALLOW_RETRY) { - if (fault & VM_FAULT_MAJOR) - current->maj_flt++; - else - current->min_flt++; if (fault & VM_FAULT_RETRY) { flags |= FAULT_FLAG_TRIED; diff --git a/arch/openrisc/include/asm/io.h b/arch/openrisc/include/asm/io.h index db02fb2077d9..7d6b4a77b379 100644 --- a/arch/openrisc/include/asm/io.h +++ b/arch/openrisc/include/asm/io.h @@ -14,6 +14,8 @@ #ifndef __ASM_OPENRISC_IO_H #define __ASM_OPENRISC_IO_H +#include <linux/types.h> + /* * PCI: can we really do 0 here if we have no port IO? */ @@ -25,9 +27,12 @@ #define PIO_OFFSET 0 #define PIO_MASK 0 -#include <asm-generic/io.h> - +#define ioremap ioremap void __iomem *ioremap(phys_addr_t offset, unsigned long size); + +#define iounmap iounmap extern void iounmap(void *addr); +#include <asm-generic/io.h> + #endif diff --git a/arch/openrisc/include/asm/uaccess.h b/arch/openrisc/include/asm/uaccess.h index 17c24f14615f..f0390211236b 100644 --- a/arch/openrisc/include/asm/uaccess.h +++ b/arch/openrisc/include/asm/uaccess.h @@ -43,21 +43,22 @@ #define get_fs() (current_thread_info()->addr_limit) #define set_fs(x) (current_thread_info()->addr_limit = (x)) -#define segment_eq(a, b) ((a) == (b)) +#define uaccess_kernel() (get_fs() == KERNEL_DS) /* Ensure that the range from addr to addr+size is all within the process' * address space */ -#define __range_ok(addr, size) (size <= get_fs() && addr <= (get_fs()-size)) +static inline int __range_ok(unsigned long addr, unsigned long size) +{ + const mm_segment_t fs = get_fs(); -/* Ensure that addr is below task's addr_limit */ -#define __addr_ok(addr) ((unsigned long) addr < get_fs()) + return size <= fs && addr <= (fs - size); +} #define access_ok(addr, size) \ ({ \ - unsigned long __ao_addr = (unsigned long)(addr); \ - unsigned long __ao_size = (unsigned long)(size); \ - __range_ok(__ao_addr, __ao_size); \ + __chk_user_ptr(addr); \ + __range_ok((unsigned long)(addr), (size)); \ }) /* @@ -100,7 +101,7 @@ extern long __put_user_bad(void); #define __put_user_check(x, ptr, size) \ ({ \ long __pu_err = -EFAULT; \ - __typeof__(*(ptr)) *__pu_addr = (ptr); \ + __typeof__(*(ptr)) __user *__pu_addr = (ptr); \ if (access_ok(__pu_addr, size)) \ __put_user_size((x), __pu_addr, (size), __pu_err); \ __pu_err; \ @@ -173,7 +174,7 @@ struct __large_struct { #define __get_user_check(x, ptr, size) \ ({ \ long __gu_err = -EFAULT, __gu_val = 0; \ - const __typeof__(*(ptr)) * __gu_addr = (ptr); \ + const __typeof__(*(ptr)) __user *__gu_addr = (ptr); \ if (access_ok(__gu_addr, size)) \ __get_user_size(__gu_val, __gu_addr, (size), __gu_err); \ (x) = (__force __typeof__(*(ptr)))__gu_val; \ @@ -241,17 +242,17 @@ raw_copy_from_user(void *to, const void __user *from, unsigned long size) return __copy_tofrom_user(to, (__force const void *)from, size); } static inline unsigned long -raw_copy_to_user(void *to, const void __user *from, unsigned long size) +raw_copy_to_user(void __user *to, const void *from, unsigned long size) { return __copy_tofrom_user((__force void *)to, from, size); } #define INLINE_COPY_FROM_USER #define INLINE_COPY_TO_USER -extern unsigned long __clear_user(void *addr, unsigned long size); +extern unsigned long __clear_user(void __user *addr, unsigned long size); static inline __must_check unsigned long -clear_user(void *addr, unsigned long size) +clear_user(void __user *addr, unsigned long size) { if (likely(access_ok(addr, size))) size = __clear_user(addr, size); diff --git a/arch/openrisc/kernel/setup.c b/arch/openrisc/kernel/setup.c index 8aa438e1f51f..b18e775f8be3 100644 --- a/arch/openrisc/kernel/setup.c +++ b/arch/openrisc/kernel/setup.c @@ -292,13 +292,15 @@ void __init setup_arch(char **cmdline_p) init_mm.brk = (unsigned long)_end; #ifdef CONFIG_BLK_DEV_INITRD - initrd_start = (unsigned long)&__initrd_start; - initrd_end = (unsigned long)&__initrd_end; if (initrd_start == initrd_end) { + printk(KERN_INFO "Initial ramdisk not found\n"); initrd_start = 0; initrd_end = 0; + } else { + printk(KERN_INFO "Initial ramdisk at: 0x%p (%lu bytes)\n", + (void *)(initrd_start), initrd_end - initrd_start); + initrd_below_start_ok = 1; } - initrd_below_start_ok = 1; #endif /* setup memblock allocator */ diff --git a/arch/openrisc/kernel/signal.c b/arch/openrisc/kernel/signal.c index 4f0754874d78..97804f21a40c 100644 --- a/arch/openrisc/kernel/signal.c +++ b/arch/openrisc/kernel/signal.c @@ -68,7 +68,7 @@ static int restore_sigcontext(struct pt_regs *regs, asmlinkage long _sys_rt_sigreturn(struct pt_regs *regs) { - struct rt_sigframe *frame = (struct rt_sigframe __user *)regs->sp; + struct rt_sigframe __user *frame = (struct rt_sigframe __user *)regs->sp; sigset_t set; /* @@ -76,7 +76,7 @@ asmlinkage long _sys_rt_sigreturn(struct pt_regs *regs) * then frame should be dword aligned here. If it's * not, then the user is trying to mess with us. */ - if (((long)frame) & 3) + if (((unsigned long)frame) & 3) goto badframe; if (!access_ok(frame, sizeof(*frame))) @@ -151,7 +151,7 @@ static inline void __user *get_sigframe(struct ksignal *ksig, static int setup_rt_frame(struct ksignal *ksig, sigset_t *set, struct pt_regs *regs) { - struct rt_sigframe *frame; + struct rt_sigframe __user *frame; unsigned long return_ip; int err = 0; @@ -181,10 +181,10 @@ static int setup_rt_frame(struct ksignal *ksig, sigset_t *set, l.ori r11,r0,__NR_sigreturn l.sys 1 */ - err |= __put_user(0xa960, (short *)(frame->retcode + 0)); - err |= __put_user(__NR_rt_sigreturn, (short *)(frame->retcode + 2)); - err |= __put_user(0x20000001, (unsigned long *)(frame->retcode + 4)); - err |= __put_user(0x15000000, (unsigned long *)(frame->retcode + 8)); + err |= __put_user(0xa960, (short __user *)(frame->retcode + 0)); + err |= __put_user(__NR_rt_sigreturn, (short __user *)(frame->retcode + 2)); + err |= __put_user(0x20000001, (unsigned long __user *)(frame->retcode + 4)); + err |= __put_user(0x15000000, (unsigned long __user *)(frame->retcode + 8)); if (err) return -EFAULT; diff --git a/arch/openrisc/kernel/smp.c b/arch/openrisc/kernel/smp.c index bd1e660bbc89..29c82ef2e207 100644 --- a/arch/openrisc/kernel/smp.c +++ b/arch/openrisc/kernel/smp.c @@ -219,30 +219,99 @@ static inline void ipi_flush_tlb_all(void *ignored) local_flush_tlb_all(); } +static inline void ipi_flush_tlb_mm(void *info) +{ + struct mm_struct *mm = (struct mm_struct *)info; + + local_flush_tlb_mm(mm); +} + +static void smp_flush_tlb_mm(struct cpumask *cmask, struct mm_struct *mm) +{ + unsigned int cpuid; + + if (cpumask_empty(cmask)) + return; + + cpuid = get_cpu(); + + if (cpumask_any_but(cmask, cpuid) >= nr_cpu_ids) { + /* local cpu is the only cpu present in cpumask */ + local_flush_tlb_mm(mm); + } else { + on_each_cpu_mask(cmask, ipi_flush_tlb_mm, mm, 1); + } + put_cpu(); +} + +struct flush_tlb_data { + unsigned long addr1; + unsigned long addr2; +}; + +static inline void ipi_flush_tlb_page(void *info) +{ + struct flush_tlb_data *fd = (struct flush_tlb_data *)info; + + local_flush_tlb_page(NULL, fd->addr1); +} + +static inline void ipi_flush_tlb_range(void *info) +{ + struct flush_tlb_data *fd = (struct flush_tlb_data *)info; + + local_flush_tlb_range(NULL, fd->addr1, fd->addr2); +} + +static void smp_flush_tlb_range(struct cpumask *cmask, unsigned long start, + unsigned long end) +{ + unsigned int cpuid; + + if (cpumask_empty(cmask)) + return; + + cpuid = get_cpu(); + + if (cpumask_any_but(cmask, cpuid) >= nr_cpu_ids) { + /* local cpu is the only cpu present in cpumask */ + if ((end - start) <= PAGE_SIZE) + local_flush_tlb_page(NULL, start); + else + local_flush_tlb_range(NULL, start, end); + } else { + struct flush_tlb_data fd; + + fd.addr1 = start; + fd.addr2 = end; + + if ((end - start) <= PAGE_SIZE) + on_each_cpu_mask(cmask, ipi_flush_tlb_page, &fd, 1); + else + on_each_cpu_mask(cmask, ipi_flush_tlb_range, &fd, 1); + } + put_cpu(); +} + void flush_tlb_all(void) { on_each_cpu(ipi_flush_tlb_all, NULL, 1); } -/* - * FIXME: implement proper functionality instead of flush_tlb_all. - * *But*, as things currently stands, the local_tlb_flush_* functions will - * all boil down to local_tlb_flush_all anyway. - */ void flush_tlb_mm(struct mm_struct *mm) { - on_each_cpu(ipi_flush_tlb_all, NULL, 1); + smp_flush_tlb_mm(mm_cpumask(mm), mm); } void flush_tlb_page(struct vm_area_struct *vma, unsigned long uaddr) { - on_each_cpu(ipi_flush_tlb_all, NULL, 1); + smp_flush_tlb_range(mm_cpumask(vma->vm_mm), uaddr, uaddr + PAGE_SIZE); } void flush_tlb_range(struct vm_area_struct *vma, unsigned long start, unsigned long end) { - on_each_cpu(ipi_flush_tlb_all, NULL, 1); + smp_flush_tlb_range(mm_cpumask(vma->vm_mm), start, end); } /* Instruction cache invalidate - performed on each cpu */ diff --git a/arch/openrisc/kernel/stacktrace.c b/arch/openrisc/kernel/stacktrace.c index 43f140a28bc7..54d38809e22c 100644 --- a/arch/openrisc/kernel/stacktrace.c +++ b/arch/openrisc/kernel/stacktrace.c @@ -13,6 +13,7 @@ #include <linux/export.h> #include <linux/sched.h> #include <linux/sched/debug.h> +#include <linux/sched/task_stack.h> #include <linux/stacktrace.h> #include <asm/processor.h> @@ -68,12 +69,25 @@ void save_stack_trace_tsk(struct task_struct *tsk, struct stack_trace *trace) { unsigned long *sp = NULL; + if (!try_get_task_stack(tsk)) + return; + if (tsk == current) sp = (unsigned long *) &sp; - else - sp = (unsigned long *) KSTK_ESP(tsk); + else { + unsigned long ksp; + + /* Locate stack from kernel context */ + ksp = task_thread_info(tsk)->ksp; + ksp += STACK_FRAME_OVERHEAD; /* redzone */ + ksp += sizeof(struct pt_regs); + + sp = (unsigned long *) ksp; + } unwind_stack(trace, sp, save_stack_address_nosched); + + put_task_stack(tsk); } EXPORT_SYMBOL_GPL(save_stack_trace_tsk); diff --git a/arch/openrisc/kernel/vmlinux.lds.S b/arch/openrisc/kernel/vmlinux.lds.S index 60449fd7f16f..22fbc5fb24b3 100644 --- a/arch/openrisc/kernel/vmlinux.lds.S +++ b/arch/openrisc/kernel/vmlinux.lds.S @@ -96,18 +96,6 @@ SECTIONS __init_end = .; - . = ALIGN(PAGE_SIZE); - .initrd : AT(ADDR(.initrd) - LOAD_OFFSET) - { - __initrd_start = .; - *(.initrd) - __initrd_end = .; - FILL (0); - . = ALIGN (PAGE_SIZE); - } - - __vmlinux_end = .; /* last address of the physical file */ - BSS_SECTION(0, 0, 0x20) _end = .; diff --git a/arch/openrisc/mm/fault.c b/arch/openrisc/mm/fault.c index d2224ccca294..ca97d9baab51 100644 --- a/arch/openrisc/mm/fault.c +++ b/arch/openrisc/mm/fault.c @@ -15,6 +15,7 @@ #include <linux/interrupt.h> #include <linux/extable.h> #include <linux/sched/signal.h> +#include <linux/perf_event.h> #include <linux/uaccess.h> #include <asm/siginfo.h> @@ -103,6 +104,8 @@ asmlinkage void do_page_fault(struct pt_regs *regs, unsigned long address, if (in_interrupt() || !mm) goto no_context; + perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS, 1, regs, address); + retry: mmap_read_lock(mm); vma = find_vma(mm, address); @@ -159,7 +162,7 @@ good_area: * the fault. */ - fault = handle_mm_fault(vma, address, flags); + fault = handle_mm_fault(vma, address, flags, regs); if (fault_signal_pending(fault, regs)) return; @@ -176,10 +179,6 @@ good_area: if (flags & FAULT_FLAG_ALLOW_RETRY) { /*RGD modeled on Cris */ - if (fault & VM_FAULT_MAJOR) - tsk->maj_flt++; - else - tsk->min_flt++; if (fault & VM_FAULT_RETRY) { flags |= FAULT_FLAG_TRIED; diff --git a/arch/openrisc/mm/tlb.c b/arch/openrisc/mm/tlb.c index 4b680aed8f5f..2b6feabf6381 100644 --- a/arch/openrisc/mm/tlb.c +++ b/arch/openrisc/mm/tlb.c @@ -137,21 +137,28 @@ void local_flush_tlb_mm(struct mm_struct *mm) void switch_mm(struct mm_struct *prev, struct mm_struct *next, struct task_struct *next_tsk) { + unsigned int cpu; + + if (unlikely(prev == next)) + return; + + cpu = smp_processor_id(); + + cpumask_clear_cpu(cpu, mm_cpumask(prev)); + cpumask_set_cpu(cpu, mm_cpumask(next)); + /* remember the pgd for the fault handlers * this is similar to the pgd register in some other CPU's. * we need our own copy of it because current and active_mm * might be invalid at points where we still need to derefer * the pgd. */ - current_pgd[smp_processor_id()] = next->pgd; + current_pgd[cpu] = next->pgd; /* We don't have context support implemented, so flush all * entries belonging to previous map */ - - if (prev != next) - local_flush_tlb_mm(prev); - + local_flush_tlb_mm(prev); } /* diff --git a/arch/parisc/include/asm/atomic.h b/arch/parisc/include/asm/atomic.h index 03862320f779..21b375c67e53 100644 --- a/arch/parisc/include/asm/atomic.h +++ b/arch/parisc/include/asm/atomic.h @@ -34,13 +34,13 @@ extern arch_spinlock_t __atomic_hash[ATOMIC_HASH_SIZE] __lock_aligned; /* Can't use raw_spin_lock_irq because of #include problems, so * this is the substitute */ #define _atomic_spin_lock_irqsave(l,f) do { \ - arch_spinlock_t *s = ATOMIC_HASH(l); \ + arch_spinlock_t *s = ATOMIC_HASH(l); \ local_irq_save(f); \ arch_spin_lock(s); \ } while(0) #define _atomic_spin_unlock_irqrestore(l,f) do { \ - arch_spinlock_t *s = ATOMIC_HASH(l); \ + arch_spinlock_t *s = ATOMIC_HASH(l); \ arch_spin_unlock(s); \ local_irq_restore(f); \ } while(0) @@ -85,7 +85,7 @@ static __inline__ void atomic_##op(int i, atomic_t *v) \ _atomic_spin_lock_irqsave(v, flags); \ v->counter c_op i; \ _atomic_spin_unlock_irqrestore(v, flags); \ -} \ +} #define ATOMIC_OP_RETURN(op, c_op) \ static __inline__ int atomic_##op##_return(int i, atomic_t *v) \ @@ -148,7 +148,7 @@ static __inline__ void atomic64_##op(s64 i, atomic64_t *v) \ _atomic_spin_lock_irqsave(v, flags); \ v->counter c_op i; \ _atomic_spin_unlock_irqrestore(v, flags); \ -} \ +} #define ATOMIC64_OP_RETURN(op, c_op) \ static __inline__ s64 atomic64_##op##_return(s64 i, atomic64_t *v) \ diff --git a/arch/parisc/include/asm/barrier.h b/arch/parisc/include/asm/barrier.h index dbaaca84f27f..640d46edf32e 100644 --- a/arch/parisc/include/asm/barrier.h +++ b/arch/parisc/include/asm/barrier.h @@ -26,6 +26,67 @@ #define __smp_rmb() mb() #define __smp_wmb() mb() +#define __smp_store_release(p, v) \ +do { \ + typeof(p) __p = (p); \ + union { typeof(*p) __val; char __c[1]; } __u = \ + { .__val = (__force typeof(*p)) (v) }; \ + compiletime_assert_atomic_type(*p); \ + switch (sizeof(*p)) { \ + case 1: \ + asm volatile("stb,ma %0,0(%1)" \ + : : "r"(*(__u8 *)__u.__c), "r"(__p) \ + : "memory"); \ + break; \ + case 2: \ + asm volatile("sth,ma %0,0(%1)" \ + : : "r"(*(__u16 *)__u.__c), "r"(__p) \ + : "memory"); \ + break; \ + case 4: \ + asm volatile("stw,ma %0,0(%1)" \ + : : "r"(*(__u32 *)__u.__c), "r"(__p) \ + : "memory"); \ + break; \ + case 8: \ + if (IS_ENABLED(CONFIG_64BIT)) \ + asm volatile("std,ma %0,0(%1)" \ + : : "r"(*(__u64 *)__u.__c), "r"(__p) \ + : "memory"); \ + break; \ + } \ +} while (0) + +#define __smp_load_acquire(p) \ +({ \ + union { typeof(*p) __val; char __c[1]; } __u; \ + typeof(p) __p = (p); \ + compiletime_assert_atomic_type(*p); \ + switch (sizeof(*p)) { \ + case 1: \ + asm volatile("ldb,ma 0(%1),%0" \ + : "=r"(*(__u8 *)__u.__c) : "r"(__p) \ + : "memory"); \ + break; \ + case 2: \ + asm volatile("ldh,ma 0(%1),%0" \ + : "=r"(*(__u16 *)__u.__c) : "r"(__p) \ + : "memory"); \ + break; \ + case 4: \ + asm volatile("ldw,ma 0(%1),%0" \ + : "=r"(*(__u32 *)__u.__c) : "r"(__p) \ + : "memory"); \ + break; \ + case 8: \ + if (IS_ENABLED(CONFIG_64BIT)) \ + asm volatile("ldd,ma 0(%1),%0" \ + : "=r"(*(__u64 *)__u.__c) : "r"(__p) \ + : "memory"); \ + break; \ + } \ + __u.__val; \ +}) #include <asm-generic/barrier.h> #endif /* !__ASSEMBLY__ */ diff --git a/arch/parisc/include/asm/io.h b/arch/parisc/include/asm/io.h index 116effe26143..45e20d38dc59 100644 --- a/arch/parisc/include/asm/io.h +++ b/arch/parisc/include/asm/io.h @@ -303,8 +303,8 @@ extern void outsl (unsigned long port, const void *src, unsigned long count); #define ioread64be ioread64be #define iowrite64 iowrite64 #define iowrite64be iowrite64be -extern u64 ioread64(void __iomem *addr); -extern u64 ioread64be(void __iomem *addr); +extern u64 ioread64(const void __iomem *addr); +extern u64 ioread64be(const void __iomem *addr); extern void iowrite64(u64 val, void __iomem *addr); extern void iowrite64be(u64 val, void __iomem *addr); diff --git a/arch/parisc/include/asm/pgalloc.h b/arch/parisc/include/asm/pgalloc.h index cc7ecc2ef55d..a6482b2ce0ea 100644 --- a/arch/parisc/include/asm/pgalloc.h +++ b/arch/parisc/include/asm/pgalloc.h @@ -10,6 +10,7 @@ #include <asm/cache.h> +#define __HAVE_ARCH_PMD_ALLOC_ONE #define __HAVE_ARCH_PMD_FREE #define __HAVE_ARCH_PGD_FREE #include <asm-generic/pgalloc.h> @@ -67,6 +68,11 @@ static inline void pud_populate(struct mm_struct *mm, pud_t *pud, pmd_t *pmd) (__u32)(__pa((unsigned long)pmd) >> PxD_VALUE_SHIFT))); } +static inline pmd_t *pmd_alloc_one(struct mm_struct *mm, unsigned long address) +{ + return (pmd_t *)__get_free_pages(GFP_PGTABLE_KERNEL, PMD_ORDER); +} + static inline void pmd_free(struct mm_struct *mm, pmd_t *pmd) { if (pmd_flag(*pmd) & PxD_FLAG_ATTACHED) { diff --git a/arch/parisc/include/asm/uaccess.h b/arch/parisc/include/asm/uaccess.h index ebbb9ffe038c..ed2cd4fb479b 100644 --- a/arch/parisc/include/asm/uaccess.h +++ b/arch/parisc/include/asm/uaccess.h @@ -14,7 +14,7 @@ #define KERNEL_DS ((mm_segment_t){0}) #define USER_DS ((mm_segment_t){1}) -#define segment_eq(a, b) ((a).seg == (b).seg) +#define uaccess_kernel() (get_fs().seg == KERNEL_DS.seg) #define get_fs() (current_thread_info()->addr_limit) #define set_fs(x) (current_thread_info()->addr_limit = (x)) diff --git a/arch/parisc/kernel/ftrace.c b/arch/parisc/kernel/ftrace.c index 1df0f67ed667..4bab21c71055 100644 --- a/arch/parisc/kernel/ftrace.c +++ b/arch/parisc/kernel/ftrace.c @@ -64,7 +64,8 @@ void notrace __hot ftrace_function_trampoline(unsigned long parent, function_trace_op, regs); #ifdef CONFIG_FUNCTION_GRAPH_TRACER - if (ftrace_graph_return != (trace_func_graph_ret_t) ftrace_stub || + if (dereference_function_descriptor(ftrace_graph_return) != + dereference_function_descriptor(ftrace_stub) || ftrace_graph_entry != ftrace_graph_entry_stub) { unsigned long *parent_rp; diff --git a/arch/parisc/kernel/syscalls/syscall.tbl b/arch/parisc/kernel/syscalls/syscall.tbl index 292baabefade..def64d221cd4 100644 --- a/arch/parisc/kernel/syscalls/syscall.tbl +++ b/arch/parisc/kernel/syscalls/syscall.tbl @@ -163,7 +163,7 @@ 146 common writev sys_writev compat_sys_writev 147 common getsid sys_getsid 148 common fdatasync sys_fdatasync -149 common _sysctl sys_sysctl compat_sys_sysctl +149 common _sysctl sys_ni_syscall 150 common mlock sys_mlock 151 common munlock sys_munlock 152 common mlockall sys_mlockall diff --git a/arch/parisc/lib/iomap.c b/arch/parisc/lib/iomap.c index 0195aec657e2..ce400417d54e 100644 --- a/arch/parisc/lib/iomap.c +++ b/arch/parisc/lib/iomap.c @@ -43,13 +43,13 @@ #endif struct iomap_ops { - unsigned int (*read8)(void __iomem *); - unsigned int (*read16)(void __iomem *); - unsigned int (*read16be)(void __iomem *); - unsigned int (*read32)(void __iomem *); - unsigned int (*read32be)(void __iomem *); - u64 (*read64)(void __iomem *); - u64 (*read64be)(void __iomem *); + unsigned int (*read8)(const void __iomem *); + unsigned int (*read16)(const void __iomem *); + unsigned int (*read16be)(const void __iomem *); + unsigned int (*read32)(const void __iomem *); + unsigned int (*read32be)(const void __iomem *); + u64 (*read64)(const void __iomem *); + u64 (*read64be)(const void __iomem *); void (*write8)(u8, void __iomem *); void (*write16)(u16, void __iomem *); void (*write16be)(u16, void __iomem *); @@ -57,9 +57,9 @@ struct iomap_ops { void (*write32be)(u32, void __iomem *); void (*write64)(u64, void __iomem *); void (*write64be)(u64, void __iomem *); - void (*read8r)(void __iomem *, void *, unsigned long); - void (*read16r)(void __iomem *, void *, unsigned long); - void (*read32r)(void __iomem *, void *, unsigned long); + void (*read8r)(const void __iomem *, void *, unsigned long); + void (*read16r)(const void __iomem *, void *, unsigned long); + void (*read32r)(const void __iomem *, void *, unsigned long); void (*write8r)(void __iomem *, const void *, unsigned long); void (*write16r)(void __iomem *, const void *, unsigned long); void (*write32r)(void __iomem *, const void *, unsigned long); @@ -69,17 +69,17 @@ struct iomap_ops { #define ADDR2PORT(addr) ((unsigned long __force)(addr) & 0xffffff) -static unsigned int ioport_read8(void __iomem *addr) +static unsigned int ioport_read8(const void __iomem *addr) { return inb(ADDR2PORT(addr)); } -static unsigned int ioport_read16(void __iomem *addr) +static unsigned int ioport_read16(const void __iomem *addr) { return inw(ADDR2PORT(addr)); } -static unsigned int ioport_read32(void __iomem *addr) +static unsigned int ioport_read32(const void __iomem *addr) { return inl(ADDR2PORT(addr)); } @@ -99,17 +99,17 @@ static void ioport_write32(u32 datum, void __iomem *addr) outl(datum, ADDR2PORT(addr)); } -static void ioport_read8r(void __iomem *addr, void *dst, unsigned long count) +static void ioport_read8r(const void __iomem *addr, void *dst, unsigned long count) { insb(ADDR2PORT(addr), dst, count); } -static void ioport_read16r(void __iomem *addr, void *dst, unsigned long count) +static void ioport_read16r(const void __iomem *addr, void *dst, unsigned long count) { insw(ADDR2PORT(addr), dst, count); } -static void ioport_read32r(void __iomem *addr, void *dst, unsigned long count) +static void ioport_read32r(const void __iomem *addr, void *dst, unsigned long count) { insl(ADDR2PORT(addr), dst, count); } @@ -150,37 +150,37 @@ static const struct iomap_ops ioport_ops = { /* Legacy I/O memory ops */ -static unsigned int iomem_read8(void __iomem *addr) +static unsigned int iomem_read8(const void __iomem *addr) { return readb(addr); } -static unsigned int iomem_read16(void __iomem *addr) +static unsigned int iomem_read16(const void __iomem *addr) { return readw(addr); } -static unsigned int iomem_read16be(void __iomem *addr) +static unsigned int iomem_read16be(const void __iomem *addr) { return __raw_readw(addr); } -static unsigned int iomem_read32(void __iomem *addr) +static unsigned int iomem_read32(const void __iomem *addr) { return readl(addr); } -static unsigned int iomem_read32be(void __iomem *addr) +static unsigned int iomem_read32be(const void __iomem *addr) { return __raw_readl(addr); } -static u64 iomem_read64(void __iomem *addr) +static u64 iomem_read64(const void __iomem *addr) { return readq(addr); } -static u64 iomem_read64be(void __iomem *addr) +static u64 iomem_read64be(const void __iomem *addr) { return __raw_readq(addr); } @@ -220,7 +220,7 @@ static void iomem_write64be(u64 datum, void __iomem *addr) __raw_writel(datum, addr); } -static void iomem_read8r(void __iomem *addr, void *dst, unsigned long count) +static void iomem_read8r(const void __iomem *addr, void *dst, unsigned long count) { while (count--) { *(u8 *)dst = __raw_readb(addr); @@ -228,7 +228,7 @@ static void iomem_read8r(void __iomem *addr, void *dst, unsigned long count) } } -static void iomem_read16r(void __iomem *addr, void *dst, unsigned long count) +static void iomem_read16r(const void __iomem *addr, void *dst, unsigned long count) { while (count--) { *(u16 *)dst = __raw_readw(addr); @@ -236,7 +236,7 @@ static void iomem_read16r(void __iomem *addr, void *dst, unsigned long count) } } -static void iomem_read32r(void __iomem *addr, void *dst, unsigned long count) +static void iomem_read32r(const void __iomem *addr, void *dst, unsigned long count) { while (count--) { *(u32 *)dst = __raw_readl(addr); @@ -297,49 +297,49 @@ static const struct iomap_ops *iomap_ops[8] = { }; -unsigned int ioread8(void __iomem *addr) +unsigned int ioread8(const void __iomem *addr) { if (unlikely(INDIRECT_ADDR(addr))) return iomap_ops[ADDR_TO_REGION(addr)]->read8(addr); return *((u8 *)addr); } -unsigned int ioread16(void __iomem *addr) +unsigned int ioread16(const void __iomem *addr) { if (unlikely(INDIRECT_ADDR(addr))) return iomap_ops[ADDR_TO_REGION(addr)]->read16(addr); return le16_to_cpup((u16 *)addr); } -unsigned int ioread16be(void __iomem *addr) +unsigned int ioread16be(const void __iomem *addr) { if (unlikely(INDIRECT_ADDR(addr))) return iomap_ops[ADDR_TO_REGION(addr)]->read16be(addr); return *((u16 *)addr); } -unsigned int ioread32(void __iomem *addr) +unsigned int ioread32(const void __iomem *addr) { if (unlikely(INDIRECT_ADDR(addr))) return iomap_ops[ADDR_TO_REGION(addr)]->read32(addr); return le32_to_cpup((u32 *)addr); } -unsigned int ioread32be(void __iomem *addr) +unsigned int ioread32be(const void __iomem *addr) { if (unlikely(INDIRECT_ADDR(addr))) return iomap_ops[ADDR_TO_REGION(addr)]->read32be(addr); return *((u32 *)addr); } -u64 ioread64(void __iomem *addr) +u64 ioread64(const void __iomem *addr) { if (unlikely(INDIRECT_ADDR(addr))) return iomap_ops[ADDR_TO_REGION(addr)]->read64(addr); return le64_to_cpup((u64 *)addr); } -u64 ioread64be(void __iomem *addr) +u64 ioread64be(const void __iomem *addr) { if (unlikely(INDIRECT_ADDR(addr))) return iomap_ops[ADDR_TO_REGION(addr)]->read64be(addr); @@ -411,7 +411,7 @@ void iowrite64be(u64 datum, void __iomem *addr) /* Repeating interfaces */ -void ioread8_rep(void __iomem *addr, void *dst, unsigned long count) +void ioread8_rep(const void __iomem *addr, void *dst, unsigned long count) { if (unlikely(INDIRECT_ADDR(addr))) { iomap_ops[ADDR_TO_REGION(addr)]->read8r(addr, dst, count); @@ -423,7 +423,7 @@ void ioread8_rep(void __iomem *addr, void *dst, unsigned long count) } } -void ioread16_rep(void __iomem *addr, void *dst, unsigned long count) +void ioread16_rep(const void __iomem *addr, void *dst, unsigned long count) { if (unlikely(INDIRECT_ADDR(addr))) { iomap_ops[ADDR_TO_REGION(addr)]->read16r(addr, dst, count); @@ -435,7 +435,7 @@ void ioread16_rep(void __iomem *addr, void *dst, unsigned long count) } } -void ioread32_rep(void __iomem *addr, void *dst, unsigned long count) +void ioread32_rep(const void __iomem *addr, void *dst, unsigned long count) { if (unlikely(INDIRECT_ADDR(addr))) { iomap_ops[ADDR_TO_REGION(addr)]->read32r(addr, dst, count); diff --git a/arch/parisc/mm/fault.c b/arch/parisc/mm/fault.c index 66ac0719bd49..4bfe2da9fbe3 100644 --- a/arch/parisc/mm/fault.c +++ b/arch/parisc/mm/fault.c @@ -18,6 +18,7 @@ #include <linux/extable.h> #include <linux/uaccess.h> #include <linux/hugetlb.h> +#include <linux/perf_event.h> #include <asm/traps.h> @@ -281,6 +282,7 @@ void do_page_fault(struct pt_regs *regs, unsigned long code, acc_type = parisc_acctyp(code, regs->iir); if (acc_type & VM_WRITE) flags |= FAULT_FLAG_WRITE; + perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS, 1, regs, address); retry: mmap_read_lock(mm); vma = find_vma_prev(mm, address, &prev_vma); @@ -302,7 +304,7 @@ good_area: * fault. */ - fault = handle_mm_fault(vma, address, flags); + fault = handle_mm_fault(vma, address, flags, regs); if (fault_signal_pending(fault, regs)) return; @@ -323,10 +325,6 @@ good_area: BUG(); } if (flags & FAULT_FLAG_ALLOW_RETRY) { - if (fault & VM_FAULT_MAJOR) - current->maj_flt++; - else - current->min_flt++; if (fault & VM_FAULT_RETRY) { /* * No need to mmap_read_unlock(mm) as we would diff --git a/arch/powerpc/include/asm/kvm_book3s_uvmem.h b/arch/powerpc/include/asm/kvm_book3s_uvmem.h index 9cb7d8be2366..0a6319448cb6 100644 --- a/arch/powerpc/include/asm/kvm_book3s_uvmem.h +++ b/arch/powerpc/include/asm/kvm_book3s_uvmem.h @@ -23,6 +23,10 @@ int kvmppc_send_page_to_uv(struct kvm *kvm, unsigned long gfn); unsigned long kvmppc_h_svm_init_abort(struct kvm *kvm); void kvmppc_uvmem_drop_pages(const struct kvm_memory_slot *free, struct kvm *kvm, bool skip_page_out); +int kvmppc_uvmem_memslot_create(struct kvm *kvm, + const struct kvm_memory_slot *new); +void kvmppc_uvmem_memslot_delete(struct kvm *kvm, + const struct kvm_memory_slot *old); #else static inline int kvmppc_uvmem_init(void) { @@ -82,5 +86,15 @@ static inline int kvmppc_send_page_to_uv(struct kvm *kvm, unsigned long gfn) static inline void kvmppc_uvmem_drop_pages(const struct kvm_memory_slot *free, struct kvm *kvm, bool skip_page_out) { } + +static inline int kvmppc_uvmem_memslot_create(struct kvm *kvm, + const struct kvm_memory_slot *new) +{ + return H_UNSUPPORTED; +} + +static inline void kvmppc_uvmem_memslot_delete(struct kvm *kvm, + const struct kvm_memory_slot *old) { } + #endif /* CONFIG_PPC_UV */ #endif /* __ASM_KVM_BOOK3S_UVMEM_H__ */ diff --git a/arch/powerpc/include/asm/kvm_ppc.h b/arch/powerpc/include/asm/kvm_ppc.h index ccf66b3a4c1d..0a056c64c317 100644 --- a/arch/powerpc/include/asm/kvm_ppc.h +++ b/arch/powerpc/include/asm/kvm_ppc.h @@ -59,7 +59,7 @@ enum xlate_readwrite { }; extern int kvmppc_vcpu_run(struct kvm_vcpu *vcpu); -extern int __kvmppc_vcpu_run(struct kvm_run *run, struct kvm_vcpu *vcpu); +extern int __kvmppc_vcpu_run(struct kvm_vcpu *vcpu); extern void kvmppc_handler_highmem(void); extern void kvmppc_dump_vcpu(struct kvm_vcpu *vcpu); diff --git a/arch/powerpc/include/asm/reg.h b/arch/powerpc/include/asm/reg.h index 41419f1fc00f..88fb88491fe9 100644 --- a/arch/powerpc/include/asm/reg.h +++ b/arch/powerpc/include/asm/reg.h @@ -474,7 +474,8 @@ #ifndef SPRN_LPID #define SPRN_LPID 0x13F /* Logical Partition Identifier */ #endif -#define LPID_RSVD 0x3ff /* Reserved LPID for partn switching */ +#define LPID_RSVD_POWER7 0x3ff /* Reserved LPID for partn switching */ +#define LPID_RSVD 0xfff /* Reserved LPID for partn switching */ #define SPRN_HMER 0x150 /* Hypervisor maintenance exception reg */ #define HMER_DEBUG_TRIG (1ul << (63 - 17)) /* Debug trigger */ #define SPRN_HMEER 0x151 /* Hyp maintenance exception enable reg */ @@ -1362,6 +1363,7 @@ #define PVR_ARCH_206p 0x0f100003 #define PVR_ARCH_207 0x0f000004 #define PVR_ARCH_300 0x0f000005 +#define PVR_ARCH_31 0x0f000006 /* Macros for setting and retrieving special purpose registers */ #ifndef __ASSEMBLY__ diff --git a/arch/powerpc/include/asm/uaccess.h b/arch/powerpc/include/asm/uaccess.h index 64c04ab09112..00699903f1ef 100644 --- a/arch/powerpc/include/asm/uaccess.h +++ b/arch/powerpc/include/asm/uaccess.h @@ -38,8 +38,7 @@ static inline void set_fs(mm_segment_t fs) set_thread_flag(TIF_FSCHECK); } -#define segment_eq(a, b) ((a).seg == (b).seg) - +#define uaccess_kernel() (get_fs().seg == KERNEL_DS.seg) #define user_addr_max() (get_fs().seg) #ifdef __powerpc64__ diff --git a/arch/powerpc/kernel/iomap.c b/arch/powerpc/kernel/iomap.c index 5ac84efc6ede..9fe4fb3b08aa 100644 --- a/arch/powerpc/kernel/iomap.c +++ b/arch/powerpc/kernel/iomap.c @@ -15,23 +15,23 @@ * Here comes the ppc64 implementation of the IOMAP * interfaces. */ -unsigned int ioread8(void __iomem *addr) +unsigned int ioread8(const void __iomem *addr) { return readb(addr); } -unsigned int ioread16(void __iomem *addr) +unsigned int ioread16(const void __iomem *addr) { return readw(addr); } -unsigned int ioread16be(void __iomem *addr) +unsigned int ioread16be(const void __iomem *addr) { return readw_be(addr); } -unsigned int ioread32(void __iomem *addr) +unsigned int ioread32(const void __iomem *addr) { return readl(addr); } -unsigned int ioread32be(void __iomem *addr) +unsigned int ioread32be(const void __iomem *addr) { return readl_be(addr); } @@ -41,27 +41,27 @@ EXPORT_SYMBOL(ioread16be); EXPORT_SYMBOL(ioread32); EXPORT_SYMBOL(ioread32be); #ifdef __powerpc64__ -u64 ioread64(void __iomem *addr) +u64 ioread64(const void __iomem *addr) { return readq(addr); } -u64 ioread64_lo_hi(void __iomem *addr) +u64 ioread64_lo_hi(const void __iomem *addr) { return readq(addr); } -u64 ioread64_hi_lo(void __iomem *addr) +u64 ioread64_hi_lo(const void __iomem *addr) { return readq(addr); } -u64 ioread64be(void __iomem *addr) +u64 ioread64be(const void __iomem *addr) { return readq_be(addr); } -u64 ioread64be_lo_hi(void __iomem *addr) +u64 ioread64be_lo_hi(const void __iomem *addr) { return readq_be(addr); } -u64 ioread64be_hi_lo(void __iomem *addr) +u64 ioread64be_hi_lo(const void __iomem *addr) { return readq_be(addr); } @@ -139,15 +139,15 @@ EXPORT_SYMBOL(iowrite64be_hi_lo); * FIXME! We could make these do EEH handling if we really * wanted. Not clear if we do. */ -void ioread8_rep(void __iomem *addr, void *dst, unsigned long count) +void ioread8_rep(const void __iomem *addr, void *dst, unsigned long count) { readsb(addr, dst, count); } -void ioread16_rep(void __iomem *addr, void *dst, unsigned long count) +void ioread16_rep(const void __iomem *addr, void *dst, unsigned long count) { readsw(addr, dst, count); } -void ioread32_rep(void __iomem *addr, void *dst, unsigned long count) +void ioread32_rep(const void __iomem *addr, void *dst, unsigned long count) { readsl(addr, dst, count); } diff --git a/arch/powerpc/kernel/syscalls/syscall.tbl b/arch/powerpc/kernel/syscalls/syscall.tbl index be9f74546068..c2d737ff2e7b 100644 --- a/arch/powerpc/kernel/syscalls/syscall.tbl +++ b/arch/powerpc/kernel/syscalls/syscall.tbl @@ -197,7 +197,7 @@ 146 common writev sys_writev compat_sys_writev 147 common getsid sys_getsid 148 common fdatasync sys_fdatasync -149 nospu _sysctl sys_sysctl compat_sys_sysctl +149 nospu _sysctl sys_ni_syscall 150 common mlock sys_mlock 151 common munlock sys_munlock 152 common mlockall sys_mlockall diff --git a/arch/powerpc/kvm/book3s_64_mmu_hv.c b/arch/powerpc/kvm/book3s_64_mmu_hv.c index 7c5a1812a1c3..38ea396a23d6 100644 --- a/arch/powerpc/kvm/book3s_64_mmu_hv.c +++ b/arch/powerpc/kvm/book3s_64_mmu_hv.c @@ -260,11 +260,15 @@ int kvmppc_mmu_hv_init(void) if (!mmu_has_feature(MMU_FTR_LOCKLESS_TLBIE)) return -EINVAL; - /* POWER7 has 10-bit LPIDs (12-bit in POWER8) */ host_lpid = 0; if (cpu_has_feature(CPU_FTR_HVMODE)) host_lpid = mfspr(SPRN_LPID); - rsvd_lpid = LPID_RSVD; + + /* POWER8 and above have 12-bit LPIDs (10-bit in POWER7) */ + if (cpu_has_feature(CPU_FTR_ARCH_207S)) + rsvd_lpid = LPID_RSVD; + else + rsvd_lpid = LPID_RSVD_POWER7; kvmppc_init_lpid(rsvd_lpid + 1); diff --git a/arch/powerpc/kvm/book3s_64_mmu_radix.c b/arch/powerpc/kvm/book3s_64_mmu_radix.c index 777aa5625d5f..22a677b18695 100644 --- a/arch/powerpc/kvm/book3s_64_mmu_radix.c +++ b/arch/powerpc/kvm/book3s_64_mmu_radix.c @@ -161,7 +161,9 @@ int kvmppc_mmu_walk_radix_tree(struct kvm_vcpu *vcpu, gva_t eaddr, return -EINVAL; /* Read the entry from guest memory */ addr = base + (index * sizeof(rpte)); + vcpu->srcu_idx = srcu_read_lock(&kvm->srcu); ret = kvm_read_guest(kvm, addr, &rpte, sizeof(rpte)); + srcu_read_unlock(&kvm->srcu, vcpu->srcu_idx); if (ret) { if (pte_ret_p) *pte_ret_p = addr; @@ -237,7 +239,9 @@ int kvmppc_mmu_radix_translate_table(struct kvm_vcpu *vcpu, gva_t eaddr, /* Read the table to find the root of the radix tree */ ptbl = (table & PRTB_MASK) + (table_index * sizeof(entry)); + vcpu->srcu_idx = srcu_read_lock(&kvm->srcu); ret = kvm_read_guest(kvm, ptbl, &entry, sizeof(entry)); + srcu_read_unlock(&kvm->srcu, vcpu->srcu_idx); if (ret) return ret; diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c index 0f83f39a2bd2..4ba06a2a306c 100644 --- a/arch/powerpc/kvm/book3s_hv.c +++ b/arch/powerpc/kvm/book3s_hv.c @@ -343,13 +343,18 @@ static void kvmppc_set_pvr_hv(struct kvm_vcpu *vcpu, u32 pvr) vcpu->arch.pvr = pvr; } +/* Dummy value used in computing PCR value below */ +#define PCR_ARCH_31 (PCR_ARCH_300 << 1) + static int kvmppc_set_arch_compat(struct kvm_vcpu *vcpu, u32 arch_compat) { unsigned long host_pcr_bit = 0, guest_pcr_bit = 0; struct kvmppc_vcore *vc = vcpu->arch.vcore; /* We can (emulate) our own architecture version and anything older */ - if (cpu_has_feature(CPU_FTR_ARCH_300)) + if (cpu_has_feature(CPU_FTR_ARCH_31)) + host_pcr_bit = PCR_ARCH_31; + else if (cpu_has_feature(CPU_FTR_ARCH_300)) host_pcr_bit = PCR_ARCH_300; else if (cpu_has_feature(CPU_FTR_ARCH_207S)) host_pcr_bit = PCR_ARCH_207; @@ -375,6 +380,9 @@ static int kvmppc_set_arch_compat(struct kvm_vcpu *vcpu, u32 arch_compat) case PVR_ARCH_300: guest_pcr_bit = PCR_ARCH_300; break; + case PVR_ARCH_31: + guest_pcr_bit = PCR_ARCH_31; + break; default: return -EINVAL; } @@ -2355,7 +2363,7 @@ static int kvmppc_core_vcpu_create_hv(struct kvm_vcpu *vcpu) * to trap and then we emulate them. */ vcpu->arch.hfscr = HFSCR_TAR | HFSCR_EBB | HFSCR_PM | HFSCR_BHRB | - HFSCR_DSCR | HFSCR_VECVSX | HFSCR_FP; + HFSCR_DSCR | HFSCR_VECVSX | HFSCR_FP | HFSCR_PREFIX; if (cpu_has_feature(CPU_FTR_HVMODE)) { vcpu->arch.hfscr &= mfspr(SPRN_HFSCR); if (cpu_has_feature(CPU_FTR_P9_TM_HV_ASSIST)) @@ -4552,16 +4560,14 @@ static void kvmppc_core_commit_memory_region_hv(struct kvm *kvm, switch (change) { case KVM_MR_CREATE: - if (kvmppc_uvmem_slot_init(kvm, new)) - return; - uv_register_mem_slot(kvm->arch.lpid, - new->base_gfn << PAGE_SHIFT, - new->npages * PAGE_SIZE, - 0, new->id); + /* + * @TODO kvmppc_uvmem_memslot_create() can fail and + * return error. Fix this. + */ + kvmppc_uvmem_memslot_create(kvm, new); break; case KVM_MR_DELETE: - uv_unregister_mem_slot(kvm->arch.lpid, old->id); - kvmppc_uvmem_slot_free(kvm, old); + kvmppc_uvmem_memslot_delete(kvm, old); break; default: /* TODO: Handle KVM_MR_MOVE */ diff --git a/arch/powerpc/kvm/book3s_hv_nested.c b/arch/powerpc/kvm/book3s_hv_nested.c index 2c849a65db77..6822d23a2da4 100644 --- a/arch/powerpc/kvm/book3s_hv_nested.c +++ b/arch/powerpc/kvm/book3s_hv_nested.c @@ -233,20 +233,21 @@ long kvmhv_enter_nested_guest(struct kvm_vcpu *vcpu) /* copy parameters in */ hv_ptr = kvmppc_get_gpr(vcpu, 4); + regs_ptr = kvmppc_get_gpr(vcpu, 5); + vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu); err = kvm_vcpu_read_guest(vcpu, hv_ptr, &l2_hv, - sizeof(struct hv_guest_state)); + sizeof(struct hv_guest_state)) || + kvm_vcpu_read_guest(vcpu, regs_ptr, &l2_regs, + sizeof(struct pt_regs)); + srcu_read_unlock(&vcpu->kvm->srcu, vcpu->srcu_idx); if (err) return H_PARAMETER; + if (kvmppc_need_byteswap(vcpu)) byteswap_hv_regs(&l2_hv); if (l2_hv.version != HV_GUEST_STATE_VERSION) return H_P2; - regs_ptr = kvmppc_get_gpr(vcpu, 5); - err = kvm_vcpu_read_guest(vcpu, regs_ptr, &l2_regs, - sizeof(struct pt_regs)); - if (err) - return H_PARAMETER; if (kvmppc_need_byteswap(vcpu)) byteswap_pt_regs(&l2_regs); if (l2_hv.vcpu_token >= NR_CPUS) @@ -323,12 +324,12 @@ long kvmhv_enter_nested_guest(struct kvm_vcpu *vcpu) byteswap_hv_regs(&l2_hv); byteswap_pt_regs(&l2_regs); } + vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu); err = kvm_vcpu_write_guest(vcpu, hv_ptr, &l2_hv, - sizeof(struct hv_guest_state)); - if (err) - return H_AUTHORITY; - err = kvm_vcpu_write_guest(vcpu, regs_ptr, &l2_regs, + sizeof(struct hv_guest_state)) || + kvm_vcpu_write_guest(vcpu, regs_ptr, &l2_regs, sizeof(struct pt_regs)); + srcu_read_unlock(&vcpu->kvm->srcu, vcpu->srcu_idx); if (err) return H_AUTHORITY; @@ -508,12 +509,16 @@ long kvmhv_copy_tofrom_guest_nested(struct kvm_vcpu *vcpu) goto not_found; /* Write what was loaded into our buffer back to the L1 guest */ + vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu); rc = kvm_vcpu_write_guest(vcpu, gp_to, buf, n); + srcu_read_unlock(&vcpu->kvm->srcu, vcpu->srcu_idx); if (rc) goto not_found; } else { /* Load the data to be stored from the L1 guest into our buf */ + vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu); rc = kvm_vcpu_read_guest(vcpu, gp_from, buf, n); + srcu_read_unlock(&vcpu->kvm->srcu, vcpu->srcu_idx); if (rc) goto not_found; @@ -548,9 +553,12 @@ static void kvmhv_update_ptbl_cache(struct kvm_nested_guest *gp) ret = -EFAULT; ptbl_addr = (kvm->arch.l1_ptcr & PRTB_MASK) + (gp->l1_lpid << 4); - if (gp->l1_lpid < (1ul << ((kvm->arch.l1_ptcr & PRTS_MASK) + 8))) + if (gp->l1_lpid < (1ul << ((kvm->arch.l1_ptcr & PRTS_MASK) + 8))) { + int srcu_idx = srcu_read_lock(&kvm->srcu); ret = kvm_read_guest(kvm, ptbl_addr, &ptbl_entry, sizeof(ptbl_entry)); + srcu_read_unlock(&kvm->srcu, srcu_idx); + } if (ret) { gp->l1_gr_to_hr = 0; gp->process_table = 0; diff --git a/arch/powerpc/kvm/book3s_hv_uvmem.c b/arch/powerpc/kvm/book3s_hv_uvmem.c index 6850bd04bcb9..7705d5557239 100644 --- a/arch/powerpc/kvm/book3s_hv_uvmem.c +++ b/arch/powerpc/kvm/book3s_hv_uvmem.c @@ -93,12 +93,133 @@ #include <asm/ultravisor.h> #include <asm/mman.h> #include <asm/kvm_ppc.h> +#include <asm/kvm_book3s_uvmem.h> static struct dev_pagemap kvmppc_uvmem_pgmap; static unsigned long *kvmppc_uvmem_bitmap; static DEFINE_SPINLOCK(kvmppc_uvmem_bitmap_lock); -#define KVMPPC_UVMEM_PFN (1UL << 63) +/* + * States of a GFN + * --------------- + * The GFN can be in one of the following states. + * + * (a) Secure - The GFN is secure. The GFN is associated with + * a Secure VM, the contents of the GFN is not accessible + * to the Hypervisor. This GFN can be backed by a secure-PFN, + * or can be backed by a normal-PFN with contents encrypted. + * The former is true when the GFN is paged-in into the + * ultravisor. The latter is true when the GFN is paged-out + * of the ultravisor. + * + * (b) Shared - The GFN is shared. The GFN is associated with a + * a secure VM. The contents of the GFN is accessible to + * Hypervisor. This GFN is backed by a normal-PFN and its + * content is un-encrypted. + * + * (c) Normal - The GFN is a normal. The GFN is associated with + * a normal VM. The contents of the GFN is accesible to + * the Hypervisor. Its content is never encrypted. + * + * States of a VM. + * --------------- + * + * Normal VM: A VM whose contents are always accessible to + * the hypervisor. All its GFNs are normal-GFNs. + * + * Secure VM: A VM whose contents are not accessible to the + * hypervisor without the VM's consent. Its GFNs are + * either Shared-GFN or Secure-GFNs. + * + * Transient VM: A Normal VM that is transitioning to secure VM. + * The transition starts on successful return of + * H_SVM_INIT_START, and ends on successful return + * of H_SVM_INIT_DONE. This transient VM, can have GFNs + * in any of the three states; i.e Secure-GFN, Shared-GFN, + * and Normal-GFN. The VM never executes in this state + * in supervisor-mode. + * + * Memory slot State. + * ----------------------------- + * The state of a memory slot mirrors the state of the + * VM the memory slot is associated with. + * + * VM State transition. + * -------------------- + * + * A VM always starts in Normal Mode. + * + * H_SVM_INIT_START moves the VM into transient state. During this + * time the Ultravisor may request some of its GFNs to be shared or + * secured. So its GFNs can be in one of the three GFN states. + * + * H_SVM_INIT_DONE moves the VM entirely from transient state to + * secure-state. At this point any left-over normal-GFNs are + * transitioned to Secure-GFN. + * + * H_SVM_INIT_ABORT moves the transient VM back to normal VM. + * All its GFNs are moved to Normal-GFNs. + * + * UV_TERMINATE transitions the secure-VM back to normal-VM. All + * the secure-GFN and shared-GFNs are tranistioned to normal-GFN + * Note: The contents of the normal-GFN is undefined at this point. + * + * GFN state implementation: + * ------------------------- + * + * Secure GFN is associated with a secure-PFN; also called uvmem_pfn, + * when the GFN is paged-in. Its pfn[] has KVMPPC_GFN_UVMEM_PFN flag + * set, and contains the value of the secure-PFN. + * It is associated with a normal-PFN; also called mem_pfn, when + * the GFN is pagedout. Its pfn[] has KVMPPC_GFN_MEM_PFN flag set. + * The value of the normal-PFN is not tracked. + * + * Shared GFN is associated with a normal-PFN. Its pfn[] has + * KVMPPC_UVMEM_SHARED_PFN flag set. The value of the normal-PFN + * is not tracked. + * + * Normal GFN is associated with normal-PFN. Its pfn[] has + * no flag set. The value of the normal-PFN is not tracked. + * + * Life cycle of a GFN + * -------------------- + * + * -------------------------------------------------------------- + * | | Share | Unshare | SVM |H_SVM_INIT_DONE| + * | |operation |operation | abort/ | | + * | | | | terminate | | + * ------------------------------------------------------------- + * | | | | | | + * | Secure | Shared | Secure |Normal |Secure | + * | | | | | | + * | Shared | Shared | Secure |Normal |Shared | + * | | | | | | + * | Normal | Shared | Secure |Normal |Secure | + * -------------------------------------------------------------- + * + * Life cycle of a VM + * -------------------- + * + * -------------------------------------------------------------------- + * | | start | H_SVM_ |H_SVM_ |H_SVM_ |UV_SVM_ | + * | | VM |INIT_START|INIT_DONE|INIT_ABORT |TERMINATE | + * | | | | | | | + * --------- ---------------------------------------------------------- + * | | | | | | | + * | Normal | Normal | Transient|Error |Error |Normal | + * | | | | | | | + * | Secure | Error | Error |Error |Error |Normal | + * | | | | | | | + * |Transient| N/A | Error |Secure |Normal |Normal | + * -------------------------------------------------------------------- + */ + +#define KVMPPC_GFN_UVMEM_PFN (1UL << 63) +#define KVMPPC_GFN_MEM_PFN (1UL << 62) +#define KVMPPC_GFN_SHARED (1UL << 61) +#define KVMPPC_GFN_SECURE (KVMPPC_GFN_UVMEM_PFN | KVMPPC_GFN_MEM_PFN) +#define KVMPPC_GFN_FLAG_MASK (KVMPPC_GFN_SECURE | KVMPPC_GFN_SHARED) +#define KVMPPC_GFN_PFN_MASK (~KVMPPC_GFN_FLAG_MASK) struct kvmppc_uvmem_slot { struct list_head list; @@ -106,11 +227,11 @@ struct kvmppc_uvmem_slot { unsigned long base_pfn; unsigned long *pfns; }; - struct kvmppc_uvmem_page_pvt { struct kvm *kvm; unsigned long gpa; bool skip_page_out; + bool remove_gfn; }; bool kvmppc_uvmem_available(void) @@ -163,8 +284,8 @@ void kvmppc_uvmem_slot_free(struct kvm *kvm, const struct kvm_memory_slot *slot) mutex_unlock(&kvm->arch.uvmem_lock); } -static void kvmppc_uvmem_pfn_insert(unsigned long gfn, unsigned long uvmem_pfn, - struct kvm *kvm) +static void kvmppc_mark_gfn(unsigned long gfn, struct kvm *kvm, + unsigned long flag, unsigned long uvmem_pfn) { struct kvmppc_uvmem_slot *p; @@ -172,24 +293,41 @@ static void kvmppc_uvmem_pfn_insert(unsigned long gfn, unsigned long uvmem_pfn, if (gfn >= p->base_pfn && gfn < p->base_pfn + p->nr_pfns) { unsigned long index = gfn - p->base_pfn; - p->pfns[index] = uvmem_pfn | KVMPPC_UVMEM_PFN; + if (flag == KVMPPC_GFN_UVMEM_PFN) + p->pfns[index] = uvmem_pfn | flag; + else + p->pfns[index] = flag; return; } } } -static void kvmppc_uvmem_pfn_remove(unsigned long gfn, struct kvm *kvm) +/* mark the GFN as secure-GFN associated with @uvmem pfn device-PFN. */ +static void kvmppc_gfn_secure_uvmem_pfn(unsigned long gfn, + unsigned long uvmem_pfn, struct kvm *kvm) { - struct kvmppc_uvmem_slot *p; + kvmppc_mark_gfn(gfn, kvm, KVMPPC_GFN_UVMEM_PFN, uvmem_pfn); +} - list_for_each_entry(p, &kvm->arch.uvmem_pfns, list) { - if (gfn >= p->base_pfn && gfn < p->base_pfn + p->nr_pfns) { - p->pfns[gfn - p->base_pfn] = 0; - return; - } - } +/* mark the GFN as secure-GFN associated with a memory-PFN. */ +static void kvmppc_gfn_secure_mem_pfn(unsigned long gfn, struct kvm *kvm) +{ + kvmppc_mark_gfn(gfn, kvm, KVMPPC_GFN_MEM_PFN, 0); +} + +/* mark the GFN as a shared GFN. */ +static void kvmppc_gfn_shared(unsigned long gfn, struct kvm *kvm) +{ + kvmppc_mark_gfn(gfn, kvm, KVMPPC_GFN_SHARED, 0); +} + +/* mark the GFN as a non-existent GFN. */ +static void kvmppc_gfn_remove(unsigned long gfn, struct kvm *kvm) +{ + kvmppc_mark_gfn(gfn, kvm, 0, 0); } +/* return true, if the GFN is a secure-GFN backed by a secure-PFN */ static bool kvmppc_gfn_is_uvmem_pfn(unsigned long gfn, struct kvm *kvm, unsigned long *uvmem_pfn) { @@ -199,10 +337,10 @@ static bool kvmppc_gfn_is_uvmem_pfn(unsigned long gfn, struct kvm *kvm, if (gfn >= p->base_pfn && gfn < p->base_pfn + p->nr_pfns) { unsigned long index = gfn - p->base_pfn; - if (p->pfns[index] & KVMPPC_UVMEM_PFN) { + if (p->pfns[index] & KVMPPC_GFN_UVMEM_PFN) { if (uvmem_pfn) *uvmem_pfn = p->pfns[index] & - ~KVMPPC_UVMEM_PFN; + KVMPPC_GFN_PFN_MASK; return true; } else return false; @@ -211,10 +349,114 @@ static bool kvmppc_gfn_is_uvmem_pfn(unsigned long gfn, struct kvm *kvm, return false; } +/* + * starting from *gfn search for the next available GFN that is not yet + * transitioned to a secure GFN. return the value of that GFN in *gfn. If a + * GFN is found, return true, else return false + * + * Must be called with kvm->arch.uvmem_lock held. + */ +static bool kvmppc_next_nontransitioned_gfn(const struct kvm_memory_slot *memslot, + struct kvm *kvm, unsigned long *gfn) +{ + struct kvmppc_uvmem_slot *p; + bool ret = false; + unsigned long i; + + list_for_each_entry(p, &kvm->arch.uvmem_pfns, list) + if (*gfn >= p->base_pfn && *gfn < p->base_pfn + p->nr_pfns) + break; + if (!p) + return ret; + /* + * The code below assumes, one to one correspondence between + * kvmppc_uvmem_slot and memslot. + */ + for (i = *gfn; i < p->base_pfn + p->nr_pfns; i++) { + unsigned long index = i - p->base_pfn; + + if (!(p->pfns[index] & KVMPPC_GFN_FLAG_MASK)) { + *gfn = i; + ret = true; + break; + } + } + return ret; +} + +static int kvmppc_memslot_page_merge(struct kvm *kvm, + const struct kvm_memory_slot *memslot, bool merge) +{ + unsigned long gfn = memslot->base_gfn; + unsigned long end, start = gfn_to_hva(kvm, gfn); + int ret = 0; + struct vm_area_struct *vma; + int merge_flag = (merge) ? MADV_MERGEABLE : MADV_UNMERGEABLE; + + if (kvm_is_error_hva(start)) + return H_STATE; + + end = start + (memslot->npages << PAGE_SHIFT); + + mmap_write_lock(kvm->mm); + do { + vma = find_vma_intersection(kvm->mm, start, end); + if (!vma) { + ret = H_STATE; + break; + } + ret = ksm_madvise(vma, vma->vm_start, vma->vm_end, + merge_flag, &vma->vm_flags); + if (ret) { + ret = H_STATE; + break; + } + start = vma->vm_end; + } while (end > vma->vm_end); + + mmap_write_unlock(kvm->mm); + return ret; +} + +static void __kvmppc_uvmem_memslot_delete(struct kvm *kvm, + const struct kvm_memory_slot *memslot) +{ + uv_unregister_mem_slot(kvm->arch.lpid, memslot->id); + kvmppc_uvmem_slot_free(kvm, memslot); + kvmppc_memslot_page_merge(kvm, memslot, true); +} + +static int __kvmppc_uvmem_memslot_create(struct kvm *kvm, + const struct kvm_memory_slot *memslot) +{ + int ret = H_PARAMETER; + + if (kvmppc_memslot_page_merge(kvm, memslot, false)) + return ret; + + if (kvmppc_uvmem_slot_init(kvm, memslot)) + goto out1; + + ret = uv_register_mem_slot(kvm->arch.lpid, + memslot->base_gfn << PAGE_SHIFT, + memslot->npages * PAGE_SIZE, + 0, memslot->id); + if (ret < 0) { + ret = H_PARAMETER; + goto out; + } + return 0; +out: + kvmppc_uvmem_slot_free(kvm, memslot); +out1: + kvmppc_memslot_page_merge(kvm, memslot, true); + return ret; +} + unsigned long kvmppc_h_svm_init_start(struct kvm *kvm) { struct kvm_memslots *slots; - struct kvm_memory_slot *memslot; + struct kvm_memory_slot *memslot, *m; int ret = H_SUCCESS; int srcu_idx; @@ -232,35 +474,117 @@ unsigned long kvmppc_h_svm_init_start(struct kvm *kvm) return H_AUTHORITY; srcu_idx = srcu_read_lock(&kvm->srcu); + + /* register the memslot */ slots = kvm_memslots(kvm); kvm_for_each_memslot(memslot, slots) { - if (kvmppc_uvmem_slot_init(kvm, memslot)) { - ret = H_PARAMETER; - goto out; - } - ret = uv_register_mem_slot(kvm->arch.lpid, - memslot->base_gfn << PAGE_SHIFT, - memslot->npages * PAGE_SIZE, - 0, memslot->id); - if (ret < 0) { - kvmppc_uvmem_slot_free(kvm, memslot); - ret = H_PARAMETER; - goto out; + ret = __kvmppc_uvmem_memslot_create(kvm, memslot); + if (ret) + break; + } + + if (ret) { + slots = kvm_memslots(kvm); + kvm_for_each_memslot(m, slots) { + if (m == memslot) + break; + __kvmppc_uvmem_memslot_delete(kvm, memslot); } } -out: + srcu_read_unlock(&kvm->srcu, srcu_idx); return ret; } -unsigned long kvmppc_h_svm_init_done(struct kvm *kvm) +/* + * Provision a new page on HV side and copy over the contents + * from secure memory using UV_PAGE_OUT uvcall. + * Caller must held kvm->arch.uvmem_lock. + */ +static int __kvmppc_svm_page_out(struct vm_area_struct *vma, + unsigned long start, + unsigned long end, unsigned long page_shift, + struct kvm *kvm, unsigned long gpa) { - if (!(kvm->arch.secure_guest & KVMPPC_SECURE_INIT_START)) - return H_UNSUPPORTED; + unsigned long src_pfn, dst_pfn = 0; + struct migrate_vma mig; + struct page *dpage, *spage; + struct kvmppc_uvmem_page_pvt *pvt; + unsigned long pfn; + int ret = U_SUCCESS; - kvm->arch.secure_guest |= KVMPPC_SECURE_INIT_DONE; - pr_info("LPID %d went secure\n", kvm->arch.lpid); - return H_SUCCESS; + memset(&mig, 0, sizeof(mig)); + mig.vma = vma; + mig.start = start; + mig.end = end; + mig.src = &src_pfn; + mig.dst = &dst_pfn; + mig.pgmap_owner = &kvmppc_uvmem_pgmap; + mig.flags = MIGRATE_VMA_SELECT_DEVICE_PRIVATE; + + /* The requested page is already paged-out, nothing to do */ + if (!kvmppc_gfn_is_uvmem_pfn(gpa >> page_shift, kvm, NULL)) + return ret; + + ret = migrate_vma_setup(&mig); + if (ret) + return -1; + + spage = migrate_pfn_to_page(*mig.src); + if (!spage || !(*mig.src & MIGRATE_PFN_MIGRATE)) + goto out_finalize; + + if (!is_zone_device_page(spage)) + goto out_finalize; + + dpage = alloc_page_vma(GFP_HIGHUSER, vma, start); + if (!dpage) { + ret = -1; + goto out_finalize; + } + + lock_page(dpage); + pvt = spage->zone_device_data; + pfn = page_to_pfn(dpage); + + /* + * This function is used in two cases: + * - When HV touches a secure page, for which we do UV_PAGE_OUT + * - When a secure page is converted to shared page, we *get* + * the page to essentially unmap the device page. In this + * case we skip page-out. + */ + if (!pvt->skip_page_out) + ret = uv_page_out(kvm->arch.lpid, pfn << page_shift, + gpa, 0, page_shift); + + if (ret == U_SUCCESS) + *mig.dst = migrate_pfn(pfn) | MIGRATE_PFN_LOCKED; + else { + unlock_page(dpage); + __free_page(dpage); + goto out_finalize; + } + + migrate_vma_pages(&mig); + +out_finalize: + migrate_vma_finalize(&mig); + return ret; +} + +static inline int kvmppc_svm_page_out(struct vm_area_struct *vma, + unsigned long start, unsigned long end, + unsigned long page_shift, + struct kvm *kvm, unsigned long gpa) +{ + int ret; + + mutex_lock(&kvm->arch.uvmem_lock); + ret = __kvmppc_svm_page_out(vma, start, end, page_shift, kvm, gpa); + mutex_unlock(&kvm->arch.uvmem_lock); + + return ret; } /* @@ -271,33 +595,53 @@ unsigned long kvmppc_h_svm_init_done(struct kvm *kvm) * fault on them, do fault time migration to replace the device PTEs in * QEMU page table with normal PTEs from newly allocated pages. */ -void kvmppc_uvmem_drop_pages(const struct kvm_memory_slot *free, +void kvmppc_uvmem_drop_pages(const struct kvm_memory_slot *slot, struct kvm *kvm, bool skip_page_out) { int i; struct kvmppc_uvmem_page_pvt *pvt; - unsigned long pfn, uvmem_pfn; - unsigned long gfn = free->base_gfn; + struct page *uvmem_page; + struct vm_area_struct *vma = NULL; + unsigned long uvmem_pfn, gfn; + unsigned long addr; - for (i = free->npages; i; --i, ++gfn) { - struct page *uvmem_page; + mmap_read_lock(kvm->mm); + + addr = slot->userspace_addr; + + gfn = slot->base_gfn; + for (i = slot->npages; i; --i, ++gfn, addr += PAGE_SIZE) { + + /* Fetch the VMA if addr is not in the latest fetched one */ + if (!vma || addr >= vma->vm_end) { + vma = find_vma_intersection(kvm->mm, addr, addr+1); + if (!vma) { + pr_err("Can't find VMA for gfn:0x%lx\n", gfn); + break; + } + } mutex_lock(&kvm->arch.uvmem_lock); - if (!kvmppc_gfn_is_uvmem_pfn(gfn, kvm, &uvmem_pfn)) { - mutex_unlock(&kvm->arch.uvmem_lock); - continue; + + if (kvmppc_gfn_is_uvmem_pfn(gfn, kvm, &uvmem_pfn)) { + uvmem_page = pfn_to_page(uvmem_pfn); + pvt = uvmem_page->zone_device_data; + pvt->skip_page_out = skip_page_out; + pvt->remove_gfn = true; + + if (__kvmppc_svm_page_out(vma, addr, addr + PAGE_SIZE, + PAGE_SHIFT, kvm, pvt->gpa)) + pr_err("Can't page out gpa:0x%lx addr:0x%lx\n", + pvt->gpa, addr); + } else { + /* Remove the shared flag if any */ + kvmppc_gfn_remove(gfn, kvm); } - uvmem_page = pfn_to_page(uvmem_pfn); - pvt = uvmem_page->zone_device_data; - pvt->skip_page_out = skip_page_out; mutex_unlock(&kvm->arch.uvmem_lock); - - pfn = gfn_to_pfn(kvm, gfn); - if (is_error_noslot_pfn(pfn)) - continue; - kvm_release_pfn_clean(pfn); } + + mmap_read_unlock(kvm->mm); } unsigned long kvmppc_h_svm_init_abort(struct kvm *kvm) @@ -360,7 +704,7 @@ static struct page *kvmppc_uvmem_get_page(unsigned long gpa, struct kvm *kvm) goto out_clear; uvmem_pfn = bit + pfn_first; - kvmppc_uvmem_pfn_insert(gpa >> PAGE_SHIFT, uvmem_pfn, kvm); + kvmppc_gfn_secure_uvmem_pfn(gpa >> PAGE_SHIFT, uvmem_pfn, kvm); pvt->gpa = gpa; pvt->kvm = kvm; @@ -379,13 +723,14 @@ out: } /* - * Alloc a PFN from private device memory pool and copy page from normal - * memory to secure memory using UV_PAGE_IN uvcall. + * Alloc a PFN from private device memory pool. If @pagein is true, + * copy page from normal memory to secure memory using UV_PAGE_IN uvcall. */ -static int -kvmppc_svm_page_in(struct vm_area_struct *vma, unsigned long start, - unsigned long end, unsigned long gpa, struct kvm *kvm, - unsigned long page_shift, bool *downgrade) +static int kvmppc_svm_page_in(struct vm_area_struct *vma, + unsigned long start, + unsigned long end, unsigned long gpa, struct kvm *kvm, + unsigned long page_shift, + bool pagein) { unsigned long src_pfn, dst_pfn = 0; struct migrate_vma mig; @@ -402,18 +747,6 @@ kvmppc_svm_page_in(struct vm_area_struct *vma, unsigned long start, mig.dst = &dst_pfn; mig.flags = MIGRATE_VMA_SELECT_SYSTEM; - /* - * We come here with mmap_lock write lock held just for - * ksm_madvise(), otherwise we only need read mmap_lock. - * Hence downgrade to read lock once ksm_madvise() is done. - */ - ret = ksm_madvise(vma, vma->vm_start, vma->vm_end, - MADV_UNMERGEABLE, &vma->vm_flags); - mmap_write_downgrade(kvm->mm); - *downgrade = true; - if (ret) - return ret; - ret = migrate_vma_setup(&mig); if (ret) return ret; @@ -429,11 +762,16 @@ kvmppc_svm_page_in(struct vm_area_struct *vma, unsigned long start, goto out_finalize; } - pfn = *mig.src >> MIGRATE_PFN_SHIFT; - spage = migrate_pfn_to_page(*mig.src); - if (spage) - uv_page_in(kvm->arch.lpid, pfn << page_shift, gpa, 0, - page_shift); + if (pagein) { + pfn = *mig.src >> MIGRATE_PFN_SHIFT; + spage = migrate_pfn_to_page(*mig.src); + if (spage) { + ret = uv_page_in(kvm->arch.lpid, pfn << page_shift, + gpa, 0, page_shift); + if (ret) + goto out_finalize; + } + } *mig.dst = migrate_pfn(page_to_pfn(dpage)) | MIGRATE_PFN_LOCKED; migrate_vma_pages(&mig); @@ -442,6 +780,80 @@ out_finalize: return ret; } +static int kvmppc_uv_migrate_mem_slot(struct kvm *kvm, + const struct kvm_memory_slot *memslot) +{ + unsigned long gfn = memslot->base_gfn; + struct vm_area_struct *vma; + unsigned long start, end; + int ret = 0; + + mmap_read_lock(kvm->mm); + mutex_lock(&kvm->arch.uvmem_lock); + while (kvmppc_next_nontransitioned_gfn(memslot, kvm, &gfn)) { + ret = H_STATE; + start = gfn_to_hva(kvm, gfn); + if (kvm_is_error_hva(start)) + break; + + end = start + (1UL << PAGE_SHIFT); + vma = find_vma_intersection(kvm->mm, start, end); + if (!vma || vma->vm_start > start || vma->vm_end < end) + break; + + ret = kvmppc_svm_page_in(vma, start, end, + (gfn << PAGE_SHIFT), kvm, PAGE_SHIFT, false); + if (ret) { + ret = H_STATE; + break; + } + + /* relinquish the cpu if needed */ + cond_resched(); + } + mutex_unlock(&kvm->arch.uvmem_lock); + mmap_read_unlock(kvm->mm); + return ret; +} + +unsigned long kvmppc_h_svm_init_done(struct kvm *kvm) +{ + struct kvm_memslots *slots; + struct kvm_memory_slot *memslot; + int srcu_idx; + long ret = H_SUCCESS; + + if (!(kvm->arch.secure_guest & KVMPPC_SECURE_INIT_START)) + return H_UNSUPPORTED; + + /* migrate any unmoved normal pfn to device pfns*/ + srcu_idx = srcu_read_lock(&kvm->srcu); + slots = kvm_memslots(kvm); + kvm_for_each_memslot(memslot, slots) { + ret = kvmppc_uv_migrate_mem_slot(kvm, memslot); + if (ret) { + /* + * The pages will remain transitioned. + * Its the callers responsibility to + * terminate the VM, which will undo + * all state of the VM. Till then + * this VM is in a erroneous state. + * Its KVMPPC_SECURE_INIT_DONE will + * remain unset. + */ + ret = H_STATE; + goto out; + } + } + + kvm->arch.secure_guest |= KVMPPC_SECURE_INIT_DONE; + pr_info("LPID %d went secure\n", kvm->arch.lpid); + +out: + srcu_read_unlock(&kvm->srcu, srcu_idx); + return ret; +} + /* * Shares the page with HV, thus making it a normal page. * @@ -451,8 +863,8 @@ out_finalize: * In the former case, uses dev_pagemap_ops.migrate_to_ram handler * to unmap the device page from QEMU's page tables. */ -static unsigned long -kvmppc_share_page(struct kvm *kvm, unsigned long gpa, unsigned long page_shift) +static unsigned long kvmppc_share_page(struct kvm *kvm, unsigned long gpa, + unsigned long page_shift) { int ret = H_PARAMETER; @@ -469,6 +881,11 @@ kvmppc_share_page(struct kvm *kvm, unsigned long gpa, unsigned long page_shift) uvmem_page = pfn_to_page(uvmem_pfn); pvt = uvmem_page->zone_device_data; pvt->skip_page_out = true; + /* + * do not drop the GFN. It is a valid GFN + * that is transitioned to a shared GFN. + */ + pvt->remove_gfn = false; } retry: @@ -482,12 +899,16 @@ retry: uvmem_page = pfn_to_page(uvmem_pfn); pvt = uvmem_page->zone_device_data; pvt->skip_page_out = true; + pvt->remove_gfn = false; /* it continues to be a valid GFN */ kvm_release_pfn_clean(pfn); goto retry; } - if (!uv_page_in(kvm->arch.lpid, pfn << page_shift, gpa, 0, page_shift)) + if (!uv_page_in(kvm->arch.lpid, pfn << page_shift, gpa, 0, + page_shift)) { + kvmppc_gfn_shared(gfn, kvm); ret = H_SUCCESS; + } kvm_release_pfn_clean(pfn); mutex_unlock(&kvm->arch.uvmem_lock); out: @@ -501,11 +922,10 @@ out: * H_PAGE_IN_SHARED flag makes the page shared which means that the same * memory in is visible from both UV and HV. */ -unsigned long -kvmppc_h_svm_page_in(struct kvm *kvm, unsigned long gpa, - unsigned long flags, unsigned long page_shift) +unsigned long kvmppc_h_svm_page_in(struct kvm *kvm, unsigned long gpa, + unsigned long flags, + unsigned long page_shift) { - bool downgrade = false; unsigned long start, end; struct vm_area_struct *vma; int srcu_idx; @@ -526,7 +946,7 @@ kvmppc_h_svm_page_in(struct kvm *kvm, unsigned long gpa, ret = H_PARAMETER; srcu_idx = srcu_read_lock(&kvm->srcu); - mmap_write_lock(kvm->mm); + mmap_read_lock(kvm->mm); start = gfn_to_hva(kvm, gfn); if (kvm_is_error_hva(start)) @@ -542,97 +962,20 @@ kvmppc_h_svm_page_in(struct kvm *kvm, unsigned long gpa, if (!vma || vma->vm_start > start || vma->vm_end < end) goto out_unlock; - if (!kvmppc_svm_page_in(vma, start, end, gpa, kvm, page_shift, - &downgrade)) - ret = H_SUCCESS; + if (kvmppc_svm_page_in(vma, start, end, gpa, kvm, page_shift, + true)) + goto out_unlock; + + ret = H_SUCCESS; + out_unlock: mutex_unlock(&kvm->arch.uvmem_lock); out: - if (downgrade) - mmap_read_unlock(kvm->mm); - else - mmap_write_unlock(kvm->mm); + mmap_read_unlock(kvm->mm); srcu_read_unlock(&kvm->srcu, srcu_idx); return ret; } -/* - * Provision a new page on HV side and copy over the contents - * from secure memory using UV_PAGE_OUT uvcall. - */ -static int -kvmppc_svm_page_out(struct vm_area_struct *vma, unsigned long start, - unsigned long end, unsigned long page_shift, - struct kvm *kvm, unsigned long gpa) -{ - unsigned long src_pfn, dst_pfn = 0; - struct migrate_vma mig; - struct page *dpage, *spage; - struct kvmppc_uvmem_page_pvt *pvt; - unsigned long pfn; - int ret = U_SUCCESS; - - memset(&mig, 0, sizeof(mig)); - mig.vma = vma; - mig.start = start; - mig.end = end; - mig.src = &src_pfn; - mig.dst = &dst_pfn; - mig.pgmap_owner = &kvmppc_uvmem_pgmap; - mig.flags = MIGRATE_VMA_SELECT_DEVICE_PRIVATE; - - mutex_lock(&kvm->arch.uvmem_lock); - /* The requested page is already paged-out, nothing to do */ - if (!kvmppc_gfn_is_uvmem_pfn(gpa >> page_shift, kvm, NULL)) - goto out; - - ret = migrate_vma_setup(&mig); - if (ret) - goto out; - - spage = migrate_pfn_to_page(*mig.src); - if (!spage || !(*mig.src & MIGRATE_PFN_MIGRATE)) - goto out_finalize; - - if (!is_zone_device_page(spage)) - goto out_finalize; - - dpage = alloc_page_vma(GFP_HIGHUSER, vma, start); - if (!dpage) { - ret = -1; - goto out_finalize; - } - - lock_page(dpage); - pvt = spage->zone_device_data; - pfn = page_to_pfn(dpage); - - /* - * This function is used in two cases: - * - When HV touches a secure page, for which we do UV_PAGE_OUT - * - When a secure page is converted to shared page, we *get* - * the page to essentially unmap the device page. In this - * case we skip page-out. - */ - if (!pvt->skip_page_out) - ret = uv_page_out(kvm->arch.lpid, pfn << page_shift, - gpa, 0, page_shift); - - if (ret == U_SUCCESS) - *mig.dst = migrate_pfn(pfn) | MIGRATE_PFN_LOCKED; - else { - unlock_page(dpage); - __free_page(dpage); - goto out_finalize; - } - - migrate_vma_pages(&mig); -out_finalize: - migrate_vma_finalize(&mig); -out: - mutex_unlock(&kvm->arch.uvmem_lock); - return ret; -} /* * Fault handler callback that gets called when HV touches any page that @@ -657,7 +1000,8 @@ static vm_fault_t kvmppc_uvmem_migrate_to_ram(struct vm_fault *vmf) /* * Release the device PFN back to the pool * - * Gets called when secure page becomes a normal page during H_SVM_PAGE_OUT. + * Gets called when secure GFN tranistions from a secure-PFN + * to a normal PFN during H_SVM_PAGE_OUT. * Gets called with kvm->arch.uvmem_lock held. */ static void kvmppc_uvmem_page_free(struct page *page) @@ -672,7 +1016,10 @@ static void kvmppc_uvmem_page_free(struct page *page) pvt = page->zone_device_data; page->zone_device_data = NULL; - kvmppc_uvmem_pfn_remove(pvt->gpa >> PAGE_SHIFT, pvt->kvm); + if (pvt->remove_gfn) + kvmppc_gfn_remove(pvt->gpa >> PAGE_SHIFT, pvt->kvm); + else + kvmppc_gfn_secure_mem_pfn(pvt->gpa >> PAGE_SHIFT, pvt->kvm); kfree(pvt); } @@ -744,6 +1091,21 @@ out: return (ret == U_SUCCESS) ? RESUME_GUEST : -EFAULT; } +int kvmppc_uvmem_memslot_create(struct kvm *kvm, const struct kvm_memory_slot *new) +{ + int ret = __kvmppc_uvmem_memslot_create(kvm, new); + + if (!ret) + ret = kvmppc_uv_migrate_mem_slot(kvm, new); + + return ret; +} + +void kvmppc_uvmem_memslot_delete(struct kvm *kvm, const struct kvm_memory_slot *old) +{ + __kvmppc_uvmem_memslot_delete(kvm, old); +} + static u64 kvmppc_get_secmem_size(void) { struct device_node *np; diff --git a/arch/powerpc/kvm/book3s_interrupts.S b/arch/powerpc/kvm/book3s_interrupts.S index 607a9b99c334..25a3679fb590 100644 --- a/arch/powerpc/kvm/book3s_interrupts.S +++ b/arch/powerpc/kvm/book3s_interrupts.S @@ -55,8 +55,7 @@ ****************************************************************************/ /* Registers: - * r3: kvm_run pointer - * r4: vcpu pointer + * r3: vcpu pointer */ _GLOBAL(__kvmppc_vcpu_run) @@ -68,8 +67,8 @@ kvm_start_entry: /* Save host state to the stack */ PPC_STLU r1, -SWITCH_FRAME_SIZE(r1) - /* Save r3 (kvm_run) and r4 (vcpu) */ - SAVE_2GPRS(3, r1) + /* Save r3 (vcpu) */ + SAVE_GPR(3, r1) /* Save non-volatile registers (r14 - r31) */ SAVE_NVGPRS(r1) @@ -82,47 +81,46 @@ kvm_start_entry: PPC_STL r0, _LINK(r1) /* Load non-volatile guest state from the vcpu */ - VCPU_LOAD_NVGPRS(r4) + VCPU_LOAD_NVGPRS(r3) kvm_start_lightweight: /* Copy registers into shadow vcpu so we can access them in real mode */ - mr r3, r4 bl FUNC(kvmppc_copy_to_svcpu) nop - REST_GPR(4, r1) + REST_GPR(3, r1) #ifdef CONFIG_PPC_BOOK3S_64 /* Get the dcbz32 flag */ - PPC_LL r3, VCPU_HFLAGS(r4) - rldicl r3, r3, 0, 63 /* r3 &= 1 */ - stb r3, HSTATE_RESTORE_HID5(r13) + PPC_LL r0, VCPU_HFLAGS(r3) + rldicl r0, r0, 0, 63 /* r3 &= 1 */ + stb r0, HSTATE_RESTORE_HID5(r13) /* Load up guest SPRG3 value, since it's user readable */ - lwz r3, VCPU_SHAREDBE(r4) - cmpwi r3, 0 - ld r5, VCPU_SHARED(r4) + lbz r4, VCPU_SHAREDBE(r3) + cmpwi r4, 0 + ld r5, VCPU_SHARED(r3) beq sprg3_little_endian sprg3_big_endian: #ifdef __BIG_ENDIAN__ - ld r3, VCPU_SHARED_SPRG3(r5) + ld r4, VCPU_SHARED_SPRG3(r5) #else addi r5, r5, VCPU_SHARED_SPRG3 - ldbrx r3, 0, r5 + ldbrx r4, 0, r5 #endif b after_sprg3_load sprg3_little_endian: #ifdef __LITTLE_ENDIAN__ - ld r3, VCPU_SHARED_SPRG3(r5) + ld r4, VCPU_SHARED_SPRG3(r5) #else addi r5, r5, VCPU_SHARED_SPRG3 - ldbrx r3, 0, r5 + ldbrx r4, 0, r5 #endif after_sprg3_load: - mtspr SPRN_SPRG3, r3 + mtspr SPRN_SPRG3, r4 #endif /* CONFIG_PPC_BOOK3S_64 */ - PPC_LL r4, VCPU_SHADOW_MSR(r4) /* get shadow_msr */ + PPC_LL r4, VCPU_SHADOW_MSR(r3) /* get shadow_msr */ /* Jump to segment patching handler and into our guest */ bl FUNC(kvmppc_entry_trampoline) @@ -146,7 +144,7 @@ after_sprg3_load: * */ - PPC_LL r3, GPR4(r1) /* vcpu pointer */ + PPC_LL r3, GPR3(r1) /* vcpu pointer */ /* * kvmppc_copy_from_svcpu can clobber volatile registers, save @@ -169,7 +167,7 @@ after_sprg3_load: #endif /* CONFIG_PPC_BOOK3S_64 */ /* R7 = vcpu */ - PPC_LL r7, GPR4(r1) + PPC_LL r7, GPR3(r1) PPC_STL r14, VCPU_GPR(R14)(r7) PPC_STL r15, VCPU_GPR(R15)(r7) @@ -190,11 +188,11 @@ after_sprg3_load: PPC_STL r30, VCPU_GPR(R30)(r7) PPC_STL r31, VCPU_GPR(R31)(r7) - /* Pass the exit number as 3rd argument to kvmppc_handle_exit */ - lwz r5, VCPU_TRAP(r7) + /* Pass the exit number as 2nd argument to kvmppc_handle_exit */ + lwz r4, VCPU_TRAP(r7) - /* Restore r3 (kvm_run) and r4 (vcpu) */ - REST_2GPRS(3, r1) + /* Restore r3 (vcpu) */ + REST_GPR(3, r1) bl FUNC(kvmppc_handle_exit_pr) /* If RESUME_GUEST, get back in the loop */ @@ -223,11 +221,11 @@ kvm_loop_heavyweight: PPC_LL r4, _LINK(r1) PPC_STL r4, (PPC_LR_STKOFF + SWITCH_FRAME_SIZE)(r1) - /* Load vcpu and cpu_run */ - REST_2GPRS(3, r1) + /* Load vcpu */ + REST_GPR(3, r1) /* Load non-volatile guest state from the vcpu */ - VCPU_LOAD_NVGPRS(r4) + VCPU_LOAD_NVGPRS(r3) /* Jump back into the beginning of this function */ b kvm_start_lightweight @@ -235,7 +233,7 @@ kvm_loop_heavyweight: kvm_loop_lightweight: /* We'll need the vcpu pointer */ - REST_GPR(4, r1) + REST_GPR(3, r1) /* Jump back into the beginning of this function */ b kvm_start_lightweight diff --git a/arch/powerpc/kvm/book3s_pr.c b/arch/powerpc/kvm/book3s_pr.c index ed12dfbf9bb5..88fac22fbf09 100644 --- a/arch/powerpc/kvm/book3s_pr.c +++ b/arch/powerpc/kvm/book3s_pr.c @@ -1151,9 +1151,9 @@ static int kvmppc_exit_pr_progint(struct kvm_vcpu *vcpu, unsigned int exit_nr) return r; } -int kvmppc_handle_exit_pr(struct kvm_run *run, struct kvm_vcpu *vcpu, - unsigned int exit_nr) +int kvmppc_handle_exit_pr(struct kvm_vcpu *vcpu, unsigned int exit_nr) { + struct kvm_run *run = vcpu->run; int r = RESUME_HOST; int s; @@ -1826,12 +1826,11 @@ static void kvmppc_core_vcpu_free_pr(struct kvm_vcpu *vcpu) static int kvmppc_vcpu_run_pr(struct kvm_vcpu *vcpu) { - struct kvm_run *run = vcpu->run; int ret; /* Check if we can run the vcpu at all */ if (!vcpu->arch.sane) { - run->exit_reason = KVM_EXIT_INTERNAL_ERROR; + vcpu->run->exit_reason = KVM_EXIT_INTERNAL_ERROR; ret = -EINVAL; goto out; } @@ -1858,7 +1857,7 @@ static int kvmppc_vcpu_run_pr(struct kvm_vcpu *vcpu) kvmppc_fix_ee_before_entry(); - ret = __kvmppc_vcpu_run(run, vcpu); + ret = __kvmppc_vcpu_run(vcpu); kvmppc_clear_debug(vcpu); diff --git a/arch/powerpc/kvm/book3s_rtas.c b/arch/powerpc/kvm/book3s_rtas.c index 26b25994c969..c5e677508d3b 100644 --- a/arch/powerpc/kvm/book3s_rtas.c +++ b/arch/powerpc/kvm/book3s_rtas.c @@ -229,7 +229,9 @@ int kvmppc_rtas_hcall(struct kvm_vcpu *vcpu) */ args_phys = kvmppc_get_gpr(vcpu, 4) & KVM_PAM; + vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu); rc = kvm_read_guest(vcpu->kvm, args_phys, &args, sizeof(args)); + srcu_read_unlock(&vcpu->kvm->srcu, vcpu->srcu_idx); if (rc) goto fail; diff --git a/arch/powerpc/kvm/booke.c b/arch/powerpc/kvm/booke.c index c0d62a917e20..3e1c9f08e302 100644 --- a/arch/powerpc/kvm/booke.c +++ b/arch/powerpc/kvm/booke.c @@ -731,12 +731,11 @@ int kvmppc_core_check_requests(struct kvm_vcpu *vcpu) int kvmppc_vcpu_run(struct kvm_vcpu *vcpu) { - struct kvm_run *run = vcpu->run; int ret, s; struct debug_reg debug; if (!vcpu->arch.sane) { - run->exit_reason = KVM_EXIT_INTERNAL_ERROR; + vcpu->run->exit_reason = KVM_EXIT_INTERNAL_ERROR; return -EINVAL; } @@ -778,7 +777,7 @@ int kvmppc_vcpu_run(struct kvm_vcpu *vcpu) vcpu->arch.pgdir = vcpu->kvm->mm->pgd; kvmppc_fix_ee_before_entry(); - ret = __kvmppc_vcpu_run(run, vcpu); + ret = __kvmppc_vcpu_run(vcpu); /* No need for guest_exit. It's done in handle_exit. We also get here with interrupts enabled. */ @@ -982,9 +981,9 @@ static int kvmppc_resume_inst_load(struct kvm_vcpu *vcpu, * * Return value is in the form (errcode<<2 | RESUME_FLAG_HOST | RESUME_FLAG_NV) */ -int kvmppc_handle_exit(struct kvm_run *run, struct kvm_vcpu *vcpu, - unsigned int exit_nr) +int kvmppc_handle_exit(struct kvm_vcpu *vcpu, unsigned int exit_nr) { + struct kvm_run *run = vcpu->run; int r = RESUME_HOST; int s; int idx; diff --git a/arch/powerpc/kvm/booke_interrupts.S b/arch/powerpc/kvm/booke_interrupts.S index 2e56ab5a5f55..6fa82efe833b 100644 --- a/arch/powerpc/kvm/booke_interrupts.S +++ b/arch/powerpc/kvm/booke_interrupts.S @@ -237,7 +237,7 @@ _GLOBAL(kvmppc_resume_host) /* Switch to kernel stack and jump to handler. */ LOAD_REG_ADDR(r3, kvmppc_handle_exit) mtctr r3 - lwz r3, HOST_RUN(r1) + mr r3, r4 lwz r2, HOST_R2(r1) mr r14, r4 /* Save vcpu pointer. */ @@ -337,15 +337,14 @@ heavyweight_exit: /* Registers: - * r3: kvm_run pointer - * r4: vcpu pointer + * r3: vcpu pointer */ _GLOBAL(__kvmppc_vcpu_run) stwu r1, -HOST_STACK_SIZE(r1) - stw r1, VCPU_HOST_STACK(r4) /* Save stack pointer to vcpu. */ + stw r1, VCPU_HOST_STACK(r3) /* Save stack pointer to vcpu. */ /* Save host state to stack. */ - stw r3, HOST_RUN(r1) + mr r4, r3 mflr r3 stw r3, HOST_STACK_LR(r1) mfcr r5 diff --git a/arch/powerpc/kvm/bookehv_interrupts.S b/arch/powerpc/kvm/bookehv_interrupts.S index c577ba4b3169..8262c14fc9e6 100644 --- a/arch/powerpc/kvm/bookehv_interrupts.S +++ b/arch/powerpc/kvm/bookehv_interrupts.S @@ -434,9 +434,10 @@ _GLOBAL(kvmppc_resume_host) #endif /* Switch to kernel stack and jump to handler. */ - PPC_LL r3, HOST_RUN(r1) + mr r3, r4 mr r5, r14 /* intno */ mr r14, r4 /* Save vcpu pointer. */ + mr r4, r5 bl kvmppc_handle_exit /* Restore vcpu pointer and the nonvolatiles we used. */ @@ -525,15 +526,14 @@ heavyweight_exit: blr /* Registers: - * r3: kvm_run pointer - * r4: vcpu pointer + * r3: vcpu pointer */ _GLOBAL(__kvmppc_vcpu_run) stwu r1, -HOST_STACK_SIZE(r1) - PPC_STL r1, VCPU_HOST_STACK(r4) /* Save stack pointer to vcpu. */ + PPC_STL r1, VCPU_HOST_STACK(r3) /* Save stack pointer to vcpu. */ /* Save host state to stack. */ - PPC_STL r3, HOST_RUN(r1) + mr r4, r3 mflr r3 mfcr r5 PPC_STL r3, HOST_STACK_LR(r1) diff --git a/arch/powerpc/kvm/powerpc.c b/arch/powerpc/kvm/powerpc.c index aaa7b62f2f82..13999123b735 100644 --- a/arch/powerpc/kvm/powerpc.c +++ b/arch/powerpc/kvm/powerpc.c @@ -403,7 +403,10 @@ int kvmppc_ld(struct kvm_vcpu *vcpu, ulong *eaddr, int size, void *ptr, return EMULATE_DONE; } - if (kvm_read_guest(vcpu->kvm, pte.raddr, ptr, size)) + vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu); + rc = kvm_read_guest(vcpu->kvm, pte.raddr, ptr, size); + srcu_read_unlock(&vcpu->kvm->srcu, vcpu->srcu_idx); + if (rc) return EMULATE_DO_MMIO; return EMULATE_DONE; diff --git a/arch/powerpc/mm/book3s64/hash_utils.c b/arch/powerpc/mm/book3s64/hash_utils.c index 1478fceeb683..1da9dbba9217 100644 --- a/arch/powerpc/mm/book3s64/hash_utils.c +++ b/arch/powerpc/mm/book3s64/hash_utils.c @@ -1115,9 +1115,8 @@ void hash__early_init_mmu_secondary(void) && cpu_has_feature(CPU_FTR_HVMODE)) tlbiel_all(); -#ifdef CONFIG_PPC_MEM_KEYS - mtspr(SPRN_UAMOR, default_uamor); -#endif + if (IS_ENABLED(CONFIG_PPC_MEM_KEYS) && mmu_has_feature(MMU_FTR_PKEY)) + mtspr(SPRN_UAMOR, default_uamor); } #endif /* CONFIG_SMP */ diff --git a/arch/powerpc/mm/book3s64/pkeys.c b/arch/powerpc/mm/book3s64/pkeys.c index 69a6b87f2bb4..b1d091a97611 100644 --- a/arch/powerpc/mm/book3s64/pkeys.c +++ b/arch/powerpc/mm/book3s64/pkeys.c @@ -73,12 +73,6 @@ static int scan_pkey_feature(void) if (early_radix_enabled()) return 0; - /* - * Only P7 and above supports SPRN_AMR update with MSR[PR] = 1 - */ - if (!early_cpu_has_feature(CPU_FTR_ARCH_206)) - return 0; - ret = of_scan_flat_dt(dt_scan_storage_keys, &pkeys_total); if (ret == 0) { /* @@ -124,6 +118,12 @@ void __init pkey_early_init_devtree(void) __builtin_popcountl(ARCH_VM_PKEY_FLAGS >> VM_PKEY_SHIFT) != (sizeof(u64) * BITS_PER_BYTE)); + /* + * Only P7 and above supports SPRN_AMR update with MSR[PR] = 1 + */ + if (!early_cpu_has_feature(CPU_FTR_ARCH_206)) + return; + /* scan the device tree for pkey feature */ pkeys_total = scan_pkey_feature(); if (!pkeys_total) diff --git a/arch/powerpc/mm/copro_fault.c b/arch/powerpc/mm/copro_fault.c index b83abbead4a2..8acd00178956 100644 --- a/arch/powerpc/mm/copro_fault.c +++ b/arch/powerpc/mm/copro_fault.c @@ -64,7 +64,7 @@ int copro_handle_mm_fault(struct mm_struct *mm, unsigned long ea, } ret = 0; - *flt = handle_mm_fault(vma, ea, is_write ? FAULT_FLAG_WRITE : 0); + *flt = handle_mm_fault(vma, ea, is_write ? FAULT_FLAG_WRITE : 0, NULL); if (unlikely(*flt & VM_FAULT_ERROR)) { if (*flt & VM_FAULT_OOM) { ret = -ENOMEM; @@ -76,11 +76,6 @@ int copro_handle_mm_fault(struct mm_struct *mm, unsigned long ea, BUG(); } - if (*flt & VM_FAULT_MAJOR) - current->maj_flt++; - else - current->min_flt++; - out_unlock: mmap_read_unlock(mm); return ret; diff --git a/arch/powerpc/mm/fault.c b/arch/powerpc/mm/fault.c index 925a7231abb3..0add963a849b 100644 --- a/arch/powerpc/mm/fault.c +++ b/arch/powerpc/mm/fault.c @@ -511,7 +511,7 @@ retry: * make sure we exit gracefully rather than endlessly redo * the fault. */ - fault = handle_mm_fault(vma, address, flags); + fault = handle_mm_fault(vma, address, flags, regs); major |= fault & VM_FAULT_MAJOR; @@ -537,14 +537,9 @@ retry: /* * Major/minor page fault accounting. */ - if (major) { - current->maj_flt++; - perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS_MAJ, 1, regs, address); + if (major) cmo_account_page_fault(); - } else { - current->min_flt++; - perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS_MIN, 1, regs, address); - } + return 0; } NOKPROBE_SYMBOL(__do_page_fault); diff --git a/arch/riscv/include/asm/uaccess.h b/arch/riscv/include/asm/uaccess.h index 8ce9d607b53d..f56c66b3f5fe 100644 --- a/arch/riscv/include/asm/uaccess.h +++ b/arch/riscv/include/asm/uaccess.h @@ -8,6 +8,8 @@ #ifndef _ASM_RISCV_UACCESS_H #define _ASM_RISCV_UACCESS_H +#include <asm/pgtable.h> /* for TASK_SIZE */ + /* * User space memory access functions */ @@ -62,11 +64,9 @@ static inline void set_fs(mm_segment_t fs) current_thread_info()->addr_limit = fs; } -#define segment_eq(a, b) ((a).seg == (b).seg) - +#define uaccess_kernel() (get_fs().seg == KERNEL_DS.seg) #define user_addr_max() (get_fs().seg) - /** * access_ok: - Checks if a user space pointer is valid * @addr: User space pointer to start of block to check diff --git a/arch/riscv/include/asm/vdso/gettimeofday.h b/arch/riscv/include/asm/vdso/gettimeofday.h index 3099362d9f26..f839f16e0d2a 100644 --- a/arch/riscv/include/asm/vdso/gettimeofday.h +++ b/arch/riscv/include/asm/vdso/gettimeofday.h @@ -60,7 +60,8 @@ int clock_getres_fallback(clockid_t _clkid, struct __kernel_timespec *_ts) return ret; } -static __always_inline u64 __arch_get_hw_counter(s32 clock_mode) +static __always_inline u64 __arch_get_hw_counter(s32 clock_mode, + const struct vdso_data *vd) { /* * The purpose of csr_read(CSR_TIME) is to trap the system into diff --git a/arch/riscv/kernel/head.S b/arch/riscv/kernel/head.S index d0c5c316e9bb..0a4e81b8dc79 100644 --- a/arch/riscv/kernel/head.S +++ b/arch/riscv/kernel/head.S @@ -77,16 +77,10 @@ relocate: csrw CSR_SATP, a0 .align 2 1: - /* Set trap vector to exception handler */ - la a0, handle_exception + /* Set trap vector to spin forever to help debug */ + la a0, .Lsecondary_park csrw CSR_TVEC, a0 - /* - * Set sup0 scratch register to 0, indicating to exception vector that - * we are presently executing in kernel. - */ - csrw CSR_SCRATCH, zero - /* Reload the global pointer */ .option push .option norelax @@ -144,9 +138,23 @@ secondary_start_common: la a0, swapper_pg_dir call relocate #endif + call setup_trap_vector tail smp_callin #endif /* CONFIG_SMP */ +.align 2 +setup_trap_vector: + /* Set trap vector to exception handler */ + la a0, handle_exception + csrw CSR_TVEC, a0 + + /* + * Set sup0 scratch register to 0, indicating to exception vector that + * we are presently executing in kernel. + */ + csrw CSR_SCRATCH, zero + ret + .Lsecondary_park: /* We lack SMP support or have too many harts, so park this hart */ wfi @@ -240,6 +248,7 @@ clear_bss_done: call relocate #endif /* CONFIG_MMU */ + call setup_trap_vector /* Restore C environment */ la tp, init_task sw zero, TASK_TI_CPU(tp) diff --git a/arch/riscv/mm/fault.c b/arch/riscv/mm/fault.c index 5873835a3e6b..716d64e36f83 100644 --- a/arch/riscv/mm/fault.c +++ b/arch/riscv/mm/fault.c @@ -109,7 +109,7 @@ good_area: * make sure we exit gracefully rather than endlessly redo * the fault. */ - fault = handle_mm_fault(vma, addr, flags); + fault = handle_mm_fault(vma, addr, flags, regs); /* * If we need to retry but a fatal signal is pending, handle the @@ -127,21 +127,7 @@ good_area: BUG(); } - /* - * Major/minor page fault accounting is only done on the - * initial attempt. If we go through a retry, it is extremely - * likely that the page will be found in page cache at that point. - */ if (flags & FAULT_FLAG_ALLOW_RETRY) { - if (fault & VM_FAULT_MAJOR) { - tsk->maj_flt++; - perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS_MAJ, - 1, regs, addr); - } else { - tsk->min_flt++; - perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS_MIN, - 1, regs, addr); - } if (fault & VM_FAULT_RETRY) { flags |= FAULT_FLAG_TRIED; diff --git a/arch/s390/Kbuild b/arch/s390/Kbuild index e63940bb57cd..8b98c501142d 100644 --- a/arch/s390/Kbuild +++ b/arch/s390/Kbuild @@ -7,5 +7,4 @@ obj-$(CONFIG_S390_HYPFS_FS) += hypfs/ obj-$(CONFIG_APPLDATA_BASE) += appldata/ obj-y += net/ obj-$(CONFIG_PCI) += pci/ -obj-$(CONFIG_NUMA) += numa/ obj-$(CONFIG_ARCH_HAS_KEXEC_PURGATORY) += purgatory/ diff --git a/arch/s390/Kconfig b/arch/s390/Kconfig index 8c0b52940165..3d86e12e8e3c 100644 --- a/arch/s390/Kconfig +++ b/arch/s390/Kconfig @@ -126,7 +126,6 @@ config S390 select HAVE_ARCH_JUMP_LABEL_RELATIVE select HAVE_ARCH_KASAN select HAVE_ARCH_KASAN_VMALLOC - select CLOCKSOURCE_VALIDATE_LAST_CYCLE select CPU_NO_EFFICIENT_FFS if !HAVE_MARCH_Z9_109_FEATURES select HAVE_ARCH_SECCOMP_FILTER select HAVE_ARCH_SOFT_DIRTY @@ -766,6 +765,7 @@ config VFIO_AP def_tristate n prompt "VFIO support for AP devices" depends on S390_AP_IOMMU && VFIO_MDEV_DEVICE && KVM + depends on ZCRYPT help This driver grants access to Adjunct Processor (AP) devices via the VFIO mediated device interface. diff --git a/arch/s390/include/asm/atomic.h b/arch/s390/include/asm/atomic.h index cae473a7b6f7..11c5952e1afa 100644 --- a/arch/s390/include/asm/atomic.h +++ b/arch/s390/include/asm/atomic.h @@ -45,7 +45,11 @@ static inline int atomic_fetch_add(int i, atomic_t *v) static inline void atomic_add(int i, atomic_t *v) { #ifdef CONFIG_HAVE_MARCH_Z196_FEATURES - if (__builtin_constant_p(i) && (i > -129) && (i < 128)) { + /* + * Order of conditions is important to circumvent gcc 10 bug: + * https://gcc.gnu.org/pipermail/gcc-patches/2020-July/549318.html + */ + if ((i > -129) && (i < 128) && __builtin_constant_p(i)) { __atomic_add_const(i, &v->counter); return; } @@ -112,7 +116,11 @@ static inline s64 atomic64_fetch_add(s64 i, atomic64_t *v) static inline void atomic64_add(s64 i, atomic64_t *v) { #ifdef CONFIG_HAVE_MARCH_Z196_FEATURES - if (__builtin_constant_p(i) && (i > -129) && (i < 128)) { + /* + * Order of conditions is important to circumvent gcc 10 bug: + * https://gcc.gnu.org/pipermail/gcc-patches/2020-July/549318.html + */ + if ((i > -129) && (i < 128) && __builtin_constant_p(i)) { __atomic64_add_const(i, (long *)&v->counter); return; } diff --git a/arch/s390/include/asm/debug.h b/arch/s390/include/asm/debug.h index 17a26261f288..c1b82bcc017c 100644 --- a/arch/s390/include/asm/debug.h +++ b/arch/s390/include/asm/debug.h @@ -2,7 +2,7 @@ /* * S/390 debug facility * - * Copyright IBM Corp. 1999, 2000 + * Copyright IBM Corp. 1999, 2020 */ #ifndef DEBUG_H #define DEBUG_H @@ -26,19 +26,14 @@ #define DEBUG_DATA(entry) (char *)(entry + 1) /* data is stored behind */ /* the entry information */ -#define __DEBUG_FEATURE_VERSION 2 /* version of debug feature */ +#define __DEBUG_FEATURE_VERSION 3 /* version of debug feature */ struct __debug_entry { - union { - struct { - unsigned long clock : 52; - unsigned long exception : 1; - unsigned long level : 3; - unsigned long cpuid : 8; - } fields; - unsigned long stck; - } id; + unsigned long clock : 60; + unsigned long exception : 1; + unsigned long level : 3; void *caller; + unsigned short cpu; } __packed; typedef struct __debug_entry debug_entry_t; diff --git a/arch/s390/include/asm/topology.h b/arch/s390/include/asm/topology.h index fbb507504a3b..3a0ac0c7a9a3 100644 --- a/arch/s390/include/asm/topology.h +++ b/arch/s390/include/asm/topology.h @@ -86,12 +86,6 @@ static inline const struct cpumask *cpumask_of_node(int node) #define pcibus_to_node(bus) __pcibus_to_node(bus) -#define node_distance(a, b) __node_distance(a, b) -static inline int __node_distance(int a, int b) -{ - return 0; -} - #else /* !CONFIG_NUMA */ #define numa_node_id numa_node_id diff --git a/arch/s390/include/asm/uaccess.h b/arch/s390/include/asm/uaccess.h index 324438889fe1..f09444d6aeab 100644 --- a/arch/s390/include/asm/uaccess.h +++ b/arch/s390/include/asm/uaccess.h @@ -32,7 +32,7 @@ #define USER_DS_SACF (3) #define get_fs() (current->thread.mm_segment) -#define segment_eq(a,b) (((a) & 2) == ((b) & 2)) +#define uaccess_kernel() ((get_fs() & 2) == KERNEL_DS) void set_fs(mm_segment_t fs); diff --git a/arch/s390/kernel/Makefile b/arch/s390/kernel/Makefile index a8f136943deb..efca70970761 100644 --- a/arch/s390/kernel/Makefile +++ b/arch/s390/kernel/Makefile @@ -49,6 +49,7 @@ CFLAGS_REMOVE_nospec-branch.o += $(CC_FLAGS_EXPOLINE) obj-$(CONFIG_MODULES) += module.o obj-$(CONFIG_SCHED_TOPOLOGY) += topology.o +obj-$(CONFIG_NUMA) += numa.o obj-$(CONFIG_AUDIT) += audit.o compat-obj-$(CONFIG_AUDIT) += compat_audit.o obj-$(CONFIG_COMPAT) += compat_linux.o compat_signal.o diff --git a/arch/s390/kernel/debug.c b/arch/s390/kernel/debug.c index beb4b44a11d1..b6619ae9a3e0 100644 --- a/arch/s390/kernel/debug.c +++ b/arch/s390/kernel/debug.c @@ -2,7 +2,7 @@ /* * S/390 debug facility * - * Copyright IBM Corp. 1999, 2012 + * Copyright IBM Corp. 1999, 2020 * * Author(s): Michael Holzheu (holzheu@de.ibm.com), * Holger Smolinski (Holger.Smolinski@de.ibm.com) @@ -433,7 +433,7 @@ static int debug_format_entry(file_private_info_t *p_info) act_entry = (debug_entry_t *) ((char *)id_snap->areas[p_info->act_area] [p_info->act_page] + p_info->act_entry); - if (act_entry->id.stck == 0LL) + if (act_entry->clock == 0LL) goto out; /* empty entry */ if (view->header_proc) len += view->header_proc(id_snap, view, p_info->act_area, @@ -829,12 +829,17 @@ static inline debug_entry_t *get_active_entry(debug_info_t *id) static inline void debug_finish_entry(debug_info_t *id, debug_entry_t *active, int level, int exception) { - active->id.stck = get_tod_clock_fast() - - *(unsigned long long *) &tod_clock_base[1]; - active->id.fields.cpuid = smp_processor_id(); + unsigned char clk[STORE_CLOCK_EXT_SIZE]; + unsigned long timestamp; + + get_tod_clock_ext(clk); + timestamp = *(unsigned long *) &clk[0] >> 4; + timestamp -= TOD_UNIX_EPOCH >> 12; + active->clock = timestamp; + active->cpu = smp_processor_id(); active->caller = __builtin_return_address(0); - active->id.fields.exception = exception; - active->id.fields.level = level; + active->exception = exception; + active->level = level; proceed_active_entry(id); if (exception) proceed_active_area(id); @@ -1398,25 +1403,24 @@ static int debug_hex_ascii_format_fn(debug_info_t *id, struct debug_view *view, int debug_dflt_header_fn(debug_info_t *id, struct debug_view *view, int area, debug_entry_t *entry, char *out_buf) { - unsigned long base, sec, usec; + unsigned long sec, usec; unsigned long caller; unsigned int level; char *except_str; int rc = 0; - level = entry->id.fields.level; - base = (*(unsigned long *) &tod_clock_base[0]) >> 4; - sec = (entry->id.stck >> 12) + base - (TOD_UNIX_EPOCH >> 12); + level = entry->level; + sec = entry->clock; usec = do_div(sec, USEC_PER_SEC); - if (entry->id.fields.exception) + if (entry->exception) except_str = "*"; else except_str = "-"; caller = (unsigned long) entry->caller; - rc += sprintf(out_buf, "%02i %011ld:%06lu %1u %1s %02i %pK ", + rc += sprintf(out_buf, "%02i %011ld:%06lu %1u %1s %04u %pK ", area, sec, usec, level, except_str, - entry->id.fields.cpuid, (void *)caller); + entry->cpu, (void *)caller); return rc; } EXPORT_SYMBOL(debug_dflt_header_fn); diff --git a/arch/s390/numa/numa.c b/arch/s390/kernel/numa.c index 51c5a9f6e525..51c5a9f6e525 100644 --- a/arch/s390/numa/numa.c +++ b/arch/s390/kernel/numa.c diff --git a/arch/s390/kernel/syscalls/syscall.tbl b/arch/s390/kernel/syscalls/syscall.tbl index f1fda4375526..10456bc936fb 100644 --- a/arch/s390/kernel/syscalls/syscall.tbl +++ b/arch/s390/kernel/syscalls/syscall.tbl @@ -138,7 +138,7 @@ 146 common writev sys_writev compat_sys_writev 147 common getsid sys_getsid sys_getsid 148 common fdatasync sys_fdatasync sys_fdatasync -149 common _sysctl sys_sysctl compat_sys_sysctl +149 common _sysctl - - 150 common mlock sys_mlock sys_mlock 151 common munlock sys_munlock sys_munlock 152 common mlockall sys_mlockall sys_mlockall diff --git a/arch/s390/kvm/interrupt.c b/arch/s390/kvm/interrupt.c index 1608fd99bbee..2f177298c663 100644 --- a/arch/s390/kvm/interrupt.c +++ b/arch/s390/kvm/interrupt.c @@ -2768,7 +2768,7 @@ static struct page *get_map_page(struct kvm *kvm, u64 uaddr) struct page *page = NULL; mmap_read_lock(kvm->mm); - get_user_pages_remote(NULL, kvm->mm, uaddr, 1, FOLL_WRITE, + get_user_pages_remote(kvm->mm, uaddr, 1, FOLL_WRITE, &page, NULL, NULL); mmap_read_unlock(kvm->mm); return page; diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c index 66da278a67fb..6b74b92c1a58 100644 --- a/arch/s390/kvm/kvm-s390.c +++ b/arch/s390/kvm/kvm-s390.c @@ -1892,7 +1892,7 @@ static long kvm_s390_set_skeys(struct kvm *kvm, struct kvm_s390_skeys *args) r = set_guest_storage_key(current->mm, hva, keys[i], 0); if (r) { - r = fixup_user_fault(current, current->mm, hva, + r = fixup_user_fault(current->mm, hva, FAULT_FLAG_WRITE, &unlocked); if (r) break; diff --git a/arch/s390/kvm/priv.c b/arch/s390/kvm/priv.c index 2f721a923b54..cd74989ce0b0 100644 --- a/arch/s390/kvm/priv.c +++ b/arch/s390/kvm/priv.c @@ -273,7 +273,7 @@ retry: rc = get_guest_storage_key(current->mm, vmaddr, &key); if (rc) { - rc = fixup_user_fault(current, current->mm, vmaddr, + rc = fixup_user_fault(current->mm, vmaddr, FAULT_FLAG_WRITE, &unlocked); if (!rc) { mmap_read_unlock(current->mm); @@ -319,7 +319,7 @@ retry: mmap_read_lock(current->mm); rc = reset_guest_reference_bit(current->mm, vmaddr); if (rc < 0) { - rc = fixup_user_fault(current, current->mm, vmaddr, + rc = fixup_user_fault(current->mm, vmaddr, FAULT_FLAG_WRITE, &unlocked); if (!rc) { mmap_read_unlock(current->mm); @@ -390,7 +390,7 @@ static int handle_sske(struct kvm_vcpu *vcpu) m3 & SSKE_MC); if (rc < 0) { - rc = fixup_user_fault(current, current->mm, vmaddr, + rc = fixup_user_fault(current->mm, vmaddr, FAULT_FLAG_WRITE, &unlocked); rc = !rc ? -EAGAIN : rc; } @@ -1094,7 +1094,7 @@ static int handle_pfmf(struct kvm_vcpu *vcpu) rc = cond_set_guest_storage_key(current->mm, vmaddr, key, NULL, nq, mr, mc); if (rc < 0) { - rc = fixup_user_fault(current, current->mm, vmaddr, + rc = fixup_user_fault(current->mm, vmaddr, FAULT_FLAG_WRITE, &unlocked); rc = !rc ? -EAGAIN : rc; } diff --git a/arch/s390/lib/test_unwind.c b/arch/s390/lib/test_unwind.c index eb382ceaa116..7c988994931f 100644 --- a/arch/s390/lib/test_unwind.c +++ b/arch/s390/lib/test_unwind.c @@ -64,6 +64,7 @@ static noinline int test_unwind(struct task_struct *task, struct pt_regs *regs, break; if (state.reliable && !addr) { pr_err("unwind state reliable but addr is 0\n"); + kfree(bt); return -EINVAL; } sprint_symbol(sym, addr); diff --git a/arch/s390/mm/fault.c b/arch/s390/mm/fault.c index aebf9183bedd..4c8c063bce5b 100644 --- a/arch/s390/mm/fault.c +++ b/arch/s390/mm/fault.c @@ -476,7 +476,7 @@ retry: * make sure we exit gracefully rather than endlessly redo * the fault. */ - fault = handle_mm_fault(vma, address, flags); + fault = handle_mm_fault(vma, address, flags, regs); if (fault_signal_pending(fault, regs)) { fault = VM_FAULT_SIGNAL; if (flags & FAULT_FLAG_RETRY_NOWAIT) @@ -486,21 +486,7 @@ retry: if (unlikely(fault & VM_FAULT_ERROR)) goto out_up; - /* - * Major/minor page fault accounting is only done on the - * initial attempt. If we go through a retry, it is extremely - * likely that the page will be found in page cache at that point. - */ if (flags & FAULT_FLAG_ALLOW_RETRY) { - if (fault & VM_FAULT_MAJOR) { - tsk->maj_flt++; - perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS_MAJ, 1, - regs, address); - } else { - tsk->min_flt++; - perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS_MIN, 1, - regs, address); - } if (fault & VM_FAULT_RETRY) { if (IS_ENABLED(CONFIG_PGSTE) && gmap && (flags & FAULT_FLAG_RETRY_NOWAIT)) { diff --git a/arch/s390/mm/gmap.c b/arch/s390/mm/gmap.c index 190357ff86b3..373542ca1113 100644 --- a/arch/s390/mm/gmap.c +++ b/arch/s390/mm/gmap.c @@ -649,7 +649,7 @@ retry: rc = vmaddr; goto out_up; } - if (fixup_user_fault(current, gmap->mm, vmaddr, fault_flags, + if (fixup_user_fault(gmap->mm, vmaddr, fault_flags, &unlocked)) { rc = -EFAULT; goto out_up; @@ -879,7 +879,7 @@ static int gmap_pte_op_fixup(struct gmap *gmap, unsigned long gaddr, BUG_ON(gmap_is_shadow(gmap)); fault_flags = (prot == PROT_WRITE) ? FAULT_FLAG_WRITE : 0; - if (fixup_user_fault(current, mm, vmaddr, fault_flags, &unlocked)) + if (fixup_user_fault(mm, vmaddr, fault_flags, &unlocked)) return -EFAULT; if (unlocked) /* lost mmap_lock, caller has to retry __gmap_translate */ @@ -2485,23 +2485,36 @@ void gmap_sync_dirty_log_pmd(struct gmap *gmap, unsigned long bitmap[4], } EXPORT_SYMBOL_GPL(gmap_sync_dirty_log_pmd); +#ifdef CONFIG_TRANSPARENT_HUGEPAGE +static int thp_split_walk_pmd_entry(pmd_t *pmd, unsigned long addr, + unsigned long end, struct mm_walk *walk) +{ + struct vm_area_struct *vma = walk->vma; + + split_huge_pmd(vma, pmd, addr); + return 0; +} + +static const struct mm_walk_ops thp_split_walk_ops = { + .pmd_entry = thp_split_walk_pmd_entry, +}; + static inline void thp_split_mm(struct mm_struct *mm) { -#ifdef CONFIG_TRANSPARENT_HUGEPAGE struct vm_area_struct *vma; - unsigned long addr; for (vma = mm->mmap; vma != NULL; vma = vma->vm_next) { - for (addr = vma->vm_start; - addr < vma->vm_end; - addr += PAGE_SIZE) - follow_page(vma, addr, FOLL_SPLIT); vma->vm_flags &= ~VM_HUGEPAGE; vma->vm_flags |= VM_NOHUGEPAGE; + walk_page_vma(vma, &thp_split_walk_ops, NULL); } mm->def_flags |= VM_NOHUGEPAGE; -#endif } +#else +static inline void thp_split_mm(struct mm_struct *mm) +{ +} +#endif /* CONFIG_TRANSPARENT_HUGEPAGE */ /* * Remove all empty zero pages from the mapping for lazy refaulting diff --git a/arch/s390/numa/Makefile b/arch/s390/numa/Makefile deleted file mode 100644 index c89d26f4f77d..000000000000 --- a/arch/s390/numa/Makefile +++ /dev/null @@ -1,2 +0,0 @@ -# SPDX-License-Identifier: GPL-2.0 -obj-y += numa.o diff --git a/arch/sh/Kconfig b/arch/sh/Kconfig index 9fc2b010e938..d20927128fce 100644 --- a/arch/sh/Kconfig +++ b/arch/sh/Kconfig @@ -1,75 +1,77 @@ # SPDX-License-Identifier: GPL-2.0 config SUPERH def_bool y + select ARCH_32BIT_OFF_T + select ARCH_HAVE_CUSTOM_GPIO_H + select ARCH_HAVE_NMI_SAFE_CMPXCHG if (GUSA_RB || CPU_SH4A) select ARCH_HAS_BINFMT_FLAT if !MMU + select ARCH_HAS_GIGANTIC_PAGE + select ARCH_HAS_GCOV_PROFILE_ALL select ARCH_HAS_PTE_SPECIAL select ARCH_HAS_TICK_BROADCAST if GENERIC_CLOCKEVENTS_BROADCAST + select ARCH_HIBERNATION_POSSIBLE if MMU select ARCH_MIGHT_HAVE_PC_PARPORT - select HAVE_PATA_PLATFORM + select ARCH_WANT_IPC_PARSE_VERSION select CLKDEV_LOOKUP + select CPU_NO_EFFICIENT_FFS select DMA_DECLARE_COHERENT - select HAVE_IDE if HAS_IOPORT_MAP - select HAVE_OPROFILE + select GENERIC_ATOMIC64 + select GENERIC_CLOCKEVENTS + select GENERIC_CMOS_UPDATE if SH_SH03 || SH_DREAMCAST + select GENERIC_IDLE_POLL_SETUP + select GENERIC_IRQ_SHOW + select GENERIC_PCI_IOMAP if PCI + select GENERIC_SCHED_CLOCK + select GENERIC_STRNCPY_FROM_USER + select GENERIC_STRNLEN_USER + select GENERIC_SMP_IDLE_THREAD + select GUP_GET_PTE_LOW_HIGH if X2TLB + select HAVE_ARCH_AUDITSYSCALL + select HAVE_ARCH_KGDB + select HAVE_ARCH_SECCOMP_FILTER select HAVE_ARCH_TRACEHOOK - select HAVE_PERF_EVENTS + select HAVE_COPY_THREAD_TLS select HAVE_DEBUG_BUGVERBOSE - select HAVE_FAST_GUP if MMU - select ARCH_HAVE_CUSTOM_GPIO_H - select ARCH_HAVE_NMI_SAFE_CMPXCHG if (GUSA_RB || CPU_SH4A) - select ARCH_HAS_GCOV_PROFILE_ALL - select PERF_USE_VMALLOC select HAVE_DEBUG_KMEMLEAK - select HAVE_KERNEL_GZIP - select CPU_NO_EFFICIENT_FFS + select HAVE_DYNAMIC_FTRACE + select HAVE_FAST_GUP if MMU + select HAVE_FUNCTION_GRAPH_TRACER + select HAVE_FUNCTION_TRACER + select HAVE_FUTEX_CMPXCHG if FUTEX + select HAVE_FTRACE_MCOUNT_RECORD + select HAVE_HW_BREAKPOINT + select HAVE_IDE if HAS_IOPORT_MAP + select HAVE_IOREMAP_PROT if MMU && !X2TLB select HAVE_KERNEL_BZIP2 + select HAVE_KERNEL_GZIP select HAVE_KERNEL_LZMA - select HAVE_KERNEL_XZ select HAVE_KERNEL_LZO + select HAVE_KERNEL_XZ + select HAVE_KPROBES + select HAVE_KRETPROBES + select HAVE_MIXED_BREAKPOINTS_REGS + select HAVE_MOD_ARCH_SPECIFIC if DWARF_UNWINDER + select HAVE_NMI + select HAVE_OPROFILE + select HAVE_PATA_PLATFORM + select HAVE_PERF_EVENTS + select HAVE_REGS_AND_STACK_ACCESS_API select HAVE_UID16 - select ARCH_WANT_IPC_PARSE_VERSION + select HAVE_STACKPROTECTOR select HAVE_SYSCALL_TRACEPOINTS - select HAVE_REGS_AND_STACK_ACCESS_API - select MAY_HAVE_SPARSE_IRQ select IRQ_FORCED_THREADING - select RTC_LIB - select GENERIC_ATOMIC64 - select GENERIC_IRQ_SHOW - select GENERIC_SMP_IDLE_THREAD - select GENERIC_IDLE_POLL_SETUP - select GENERIC_CLOCKEVENTS - select GENERIC_CMOS_UPDATE if SH_SH03 || SH_DREAMCAST - select GENERIC_PCI_IOMAP if PCI - select GENERIC_SCHED_CLOCK - select GENERIC_STRNCPY_FROM_USER - select GENERIC_STRNLEN_USER - select HAVE_MOD_ARCH_SPECIFIC if DWARF_UNWINDER + select MAY_HAVE_SPARSE_IRQ select MODULES_USE_ELF_RELA + select NEED_SG_DMA_LENGTH + select NO_DMA if !MMU && !DMA_COHERENT select NO_GENERIC_PCI_IOPORT_MAP if PCI - select OLD_SIGSUSPEND select OLD_SIGACTION + select OLD_SIGSUSPEND select PCI_DOMAINS if PCI - select HAVE_ARCH_AUDITSYSCALL - select HAVE_FUTEX_CMPXCHG if FUTEX - select HAVE_NMI - select NEED_SG_DMA_LENGTH - select ARCH_HAS_GIGANTIC_PAGE - select ARCH_32BIT_OFF_T - select GUP_GET_PTE_LOW_HIGH if X2TLB - select HAVE_KPROBES - select HAVE_KRETPROBES - select HAVE_IOREMAP_PROT if MMU && !X2TLB - select HAVE_FUNCTION_TRACER - select HAVE_FTRACE_MCOUNT_RECORD - select HAVE_DYNAMIC_FTRACE - select ARCH_WANT_IPC_PARSE_VERSION - select HAVE_FUNCTION_GRAPH_TRACER - select HAVE_ARCH_KGDB - select HAVE_HW_BREAKPOINT - select HAVE_MIXED_BREAKPOINTS_REGS select PERF_EVENTS - select ARCH_HIBERNATION_POSSIBLE if MMU + select PERF_USE_VMALLOC + select RTC_LIB select SPARSE_IRQ - select HAVE_STACKPROTECTOR help The SuperH is a RISC processor targeted for use in embedded systems and consumer electronics; it was also used in the Sega Dreamcast @@ -123,8 +125,8 @@ config ARCH_HAS_ILOG2_U64 config NO_IOPORT_MAP def_bool !PCI - depends on !SH_CAYMAN && !SH_SH4202_MICRODEV && !SH_SHMIN && \ - !SH_HP6XX && !SH_SOLUTION_ENGINE + depends on !SH_SH4202_MICRODEV && !SH_SHMIN && !SH_HP6XX && \ + !SH_SOLUTION_ENGINE config IO_TRAPPED bool @@ -136,8 +138,10 @@ config DMA_COHERENT bool config DMA_NONCOHERENT - def_bool !DMA_COHERENT + def_bool !NO_DMA && !DMA_COHERENT + select ARCH_HAS_DMA_PREP_COHERENT select ARCH_HAS_SYNC_DMA_FOR_DEVICE + select DMA_DIRECT_REMAP config PGTABLE_LEVELS default 3 if X2TLB @@ -630,7 +634,7 @@ config SMP Y to "Enhanced Real Time Clock Support", below. See also <file:Documentation/admin-guide/lockup-watchdogs.rst> and the SMP-HOWTO - available at <http://www.tldp.org/docs.html#howto>. + available at <https://www.tldp.org/docs.html#howto>. If you don't know what to do here, say N. @@ -726,7 +730,6 @@ config ZERO_PAGE_OFFSET config BOOT_LINK_OFFSET hex default "0x00210000" if SH_SHMIN - default "0x00400000" if SH_CAYMAN default "0x00810000" if SH_7780_SOLUTION_ENGINE default "0x009e0000" if SH_TITAN default "0x01800000" if SH_SDK7780 diff --git a/arch/sh/Makefile b/arch/sh/Makefile index da9cf952f33c..2faebfd72eca 100644 --- a/arch/sh/Makefile +++ b/arch/sh/Makefile @@ -15,11 +15,7 @@ ifneq ($(SUBARCH),$(ARCH)) endif endif -ifeq ($(ARCH),sh) KBUILD_DEFCONFIG := shx3_defconfig -else -KBUILD_DEFCONFIG := cayman_defconfig -endif isa-y := any isa-$(CONFIG_SH_DSP) := sh @@ -143,7 +139,6 @@ machdir-$(CONFIG_SH_SH7763RDP) += mach-sh7763rdp machdir-$(CONFIG_SH_SH4202_MICRODEV) += mach-microdev machdir-$(CONFIG_SH_LANDISK) += mach-landisk machdir-$(CONFIG_SH_LBOX_RE2) += mach-lboxre2 -machdir-$(CONFIG_SH_CAYMAN) += mach-cayman machdir-$(CONFIG_SH_RSK) += mach-rsk ifneq ($(machdir-y),) diff --git a/arch/sh/boards/Kconfig b/arch/sh/boards/Kconfig index fb0ca0c1efe1..83bcb6d2daca 100644 --- a/arch/sh/boards/Kconfig +++ b/arch/sh/boards/Kconfig @@ -340,12 +340,6 @@ config SH_MAGIC_PANEL_R2 help Select Magic Panel R2 if configuring for Magic Panel R2. -config SH_CAYMAN - bool "Hitachi Cayman" - depends on CPU_SUBTYPE_SH5_101 || CPU_SUBTYPE_SH5_103 - select HAVE_PCI - select ARCH_MIGHT_HAVE_PC_SERIO - config SH_POLARIS bool "SMSC Polaris" select CPU_HAS_IPR_IRQ diff --git a/arch/sh/boards/board-sh2007.c b/arch/sh/boards/board-sh2007.c index ef9c87deeb08..6ea85e480851 100644 --- a/arch/sh/boards/board-sh2007.c +++ b/arch/sh/boards/board-sh2007.c @@ -126,14 +126,14 @@ static void __init sh2007_init_irq(void) */ static void __init sh2007_setup(char **cmdline_p) { - printk(KERN_INFO "SH-2007 Setup..."); + pr_info("SH-2007 Setup..."); /* setup wait control registers for area 5 */ __raw_writel(CS5BCR_D, CS5BCR); __raw_writel(CS5WCR_D, CS5WCR); __raw_writel(CS5PCR_D, CS5PCR); - printk(KERN_INFO " done.\n"); + pr_cont(" done.\n"); } /* diff --git a/arch/sh/boards/mach-cayman/Makefile b/arch/sh/boards/mach-cayman/Makefile deleted file mode 100644 index 775a4be57434..000000000000 --- a/arch/sh/boards/mach-cayman/Makefile +++ /dev/null @@ -1,5 +0,0 @@ -# SPDX-License-Identifier: GPL-2.0 -# -# Makefile for the Hitachi Cayman specific parts of the kernel -# -obj-y := setup.o irq.o panic.o diff --git a/arch/sh/boards/mach-cayman/irq.c b/arch/sh/boards/mach-cayman/irq.c deleted file mode 100644 index 0305d0b51730..000000000000 --- a/arch/sh/boards/mach-cayman/irq.c +++ /dev/null @@ -1,148 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -/* - * arch/sh/mach-cayman/irq.c - SH-5 Cayman Interrupt Support - * - * This file handles the board specific parts of the Cayman interrupt system - * - * Copyright (C) 2002 Stuart Menefy - */ -#include <linux/io.h> -#include <linux/irq.h> -#include <linux/interrupt.h> -#include <linux/signal.h> -#include <cpu/irq.h> -#include <asm/page.h> - -/* Setup for the SMSC FDC37C935 / LAN91C100FD */ -#define SMSC_IRQ IRQ_IRL1 - -/* Setup for PCI Bus 2, which transmits interrupts via the EPLD */ -#define PCI2_IRQ IRQ_IRL3 - -unsigned long epld_virt; - -#define EPLD_BASE 0x04002000 -#define EPLD_STATUS_BASE (epld_virt + 0x10) -#define EPLD_MASK_BASE (epld_virt + 0x20) - -/* Note the SMSC SuperIO chip and SMSC LAN chip interrupts are all muxed onto - the same SH-5 interrupt */ - -static irqreturn_t cayman_interrupt_smsc(int irq, void *dev_id) -{ - printk(KERN_INFO "CAYMAN: spurious SMSC interrupt\n"); - return IRQ_NONE; -} - -static irqreturn_t cayman_interrupt_pci2(int irq, void *dev_id) -{ - printk(KERN_INFO "CAYMAN: spurious PCI interrupt, IRQ %d\n", irq); - return IRQ_NONE; -} - -static void enable_cayman_irq(struct irq_data *data) -{ - unsigned int irq = data->irq; - unsigned long flags; - unsigned long mask; - unsigned int reg; - unsigned char bit; - - irq -= START_EXT_IRQS; - reg = EPLD_MASK_BASE + ((irq / 8) << 2); - bit = 1<<(irq % 8); - local_irq_save(flags); - mask = __raw_readl(reg); - mask |= bit; - __raw_writel(mask, reg); - local_irq_restore(flags); -} - -static void disable_cayman_irq(struct irq_data *data) -{ - unsigned int irq = data->irq; - unsigned long flags; - unsigned long mask; - unsigned int reg; - unsigned char bit; - - irq -= START_EXT_IRQS; - reg = EPLD_MASK_BASE + ((irq / 8) << 2); - bit = 1<<(irq % 8); - local_irq_save(flags); - mask = __raw_readl(reg); - mask &= ~bit; - __raw_writel(mask, reg); - local_irq_restore(flags); -} - -struct irq_chip cayman_irq_type = { - .name = "Cayman-IRQ", - .irq_unmask = enable_cayman_irq, - .irq_mask = disable_cayman_irq, -}; - -int cayman_irq_demux(int evt) -{ - int irq = intc_evt_to_irq[evt]; - - if (irq == SMSC_IRQ) { - unsigned long status; - int i; - - status = __raw_readl(EPLD_STATUS_BASE) & - __raw_readl(EPLD_MASK_BASE) & 0xff; - if (status == 0) { - irq = -1; - } else { - for (i=0; i<8; i++) { - if (status & (1<<i)) - break; - } - irq = START_EXT_IRQS + i; - } - } - - if (irq == PCI2_IRQ) { - unsigned long status; - int i; - - status = __raw_readl(EPLD_STATUS_BASE + 3 * sizeof(u32)) & - __raw_readl(EPLD_MASK_BASE + 3 * sizeof(u32)) & 0xff; - if (status == 0) { - irq = -1; - } else { - for (i=0; i<8; i++) { - if (status & (1<<i)) - break; - } - irq = START_EXT_IRQS + (3 * 8) + i; - } - } - - return irq; -} - -void init_cayman_irq(void) -{ - int i; - - epld_virt = (unsigned long)ioremap(EPLD_BASE, 1024); - if (!epld_virt) { - printk(KERN_ERR "Cayman IRQ: Unable to remap EPLD\n"); - return; - } - - for (i = 0; i < NR_EXT_IRQS; i++) { - irq_set_chip_and_handler(START_EXT_IRQS + i, - &cayman_irq_type, handle_level_irq); - } - - /* Setup the SMSC interrupt */ - if (request_irq(SMSC_IRQ, cayman_interrupt_smsc, 0, "Cayman SMSC Mux", - NULL)) - pr_err("Failed to register Cayman SMSC Mux interrupt\n"); - if (request_irq(PCI2_IRQ, cayman_interrupt_pci2, 0, "Cayman PCI2 Mux", - NULL)) - pr_err("Failed to register Cayman PCI2 Mux interrupt\n"); -} diff --git a/arch/sh/boards/mach-cayman/panic.c b/arch/sh/boards/mach-cayman/panic.c deleted file mode 100644 index cfc46314e7d9..000000000000 --- a/arch/sh/boards/mach-cayman/panic.c +++ /dev/null @@ -1,46 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -/* - * Copyright (C) 2003 Richard Curnow, SuperH UK Limited - */ - -#include <linux/kernel.h> -#include <linux/io.h> -#include <cpu/registers.h> - -/* THIS IS A PHYSICAL ADDRESS */ -#define HDSP2534_ADDR (0x04002100) - -static void poor_mans_delay(void) -{ - int i; - - for (i = 0; i < 2500000; i++) - cpu_relax(); -} - -static void show_value(unsigned long x) -{ - int i; - unsigned nibble; - for (i = 0; i < 8; i++) { - nibble = ((x >> (i * 4)) & 0xf); - - __raw_writeb(nibble + ((nibble > 9) ? 55 : 48), - HDSP2534_ADDR + 0xe0 + ((7 - i) << 2)); - } -} - -void -panic_handler(unsigned long panicPC, unsigned long panicSSR, - unsigned long panicEXPEVT) -{ - while (1) { - /* This piece of code displays the PC on the LED display */ - show_value(panicPC); - poor_mans_delay(); - show_value(panicSSR); - poor_mans_delay(); - show_value(panicEXPEVT); - poor_mans_delay(); - } -} diff --git a/arch/sh/boards/mach-cayman/setup.c b/arch/sh/boards/mach-cayman/setup.c deleted file mode 100644 index 8ef76e288da0..000000000000 --- a/arch/sh/boards/mach-cayman/setup.c +++ /dev/null @@ -1,181 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -/* - * arch/sh/mach-cayman/setup.c - * - * SH5 Cayman support - * - * Copyright (C) 2002 David J. Mckay & Benedict Gaster - * Copyright (C) 2003 - 2007 Paul Mundt - */ -#include <linux/init.h> -#include <linux/io.h> -#include <linux/kernel.h> -#include <cpu/irq.h> - -/* - * Platform Dependent Interrupt Priorities. - */ - -/* Using defaults defined in irq.h */ -#define RES NO_PRIORITY /* Disabled */ -#define IR0 IRL0_PRIORITY /* IRLs */ -#define IR1 IRL1_PRIORITY -#define IR2 IRL2_PRIORITY -#define IR3 IRL3_PRIORITY -#define PCA INTA_PRIORITY /* PCI Ints */ -#define PCB INTB_PRIORITY -#define PCC INTC_PRIORITY -#define PCD INTD_PRIORITY -#define SER TOP_PRIORITY -#define ERR TOP_PRIORITY -#define PW0 TOP_PRIORITY -#define PW1 TOP_PRIORITY -#define PW2 TOP_PRIORITY -#define PW3 TOP_PRIORITY -#define DM0 NO_PRIORITY /* DMA Ints */ -#define DM1 NO_PRIORITY -#define DM2 NO_PRIORITY -#define DM3 NO_PRIORITY -#define DAE NO_PRIORITY -#define TU0 TIMER_PRIORITY /* TMU Ints */ -#define TU1 NO_PRIORITY -#define TU2 NO_PRIORITY -#define TI2 NO_PRIORITY -#define ATI NO_PRIORITY /* RTC Ints */ -#define PRI NO_PRIORITY -#define CUI RTC_PRIORITY -#define ERI SCIF_PRIORITY /* SCIF Ints */ -#define RXI SCIF_PRIORITY -#define BRI SCIF_PRIORITY -#define TXI SCIF_PRIORITY -#define ITI TOP_PRIORITY /* WDT Ints */ - -/* Setup for the SMSC FDC37C935 */ -#define SMSC_SUPERIO_BASE 0x04000000 -#define SMSC_CONFIG_PORT_ADDR 0x3f0 -#define SMSC_INDEX_PORT_ADDR SMSC_CONFIG_PORT_ADDR -#define SMSC_DATA_PORT_ADDR 0x3f1 - -#define SMSC_ENTER_CONFIG_KEY 0x55 -#define SMSC_EXIT_CONFIG_KEY 0xaa - -#define SMCS_LOGICAL_DEV_INDEX 0x07 -#define SMSC_DEVICE_ID_INDEX 0x20 -#define SMSC_DEVICE_REV_INDEX 0x21 -#define SMSC_ACTIVATE_INDEX 0x30 -#define SMSC_PRIMARY_BASE_INDEX 0x60 -#define SMSC_SECONDARY_BASE_INDEX 0x62 -#define SMSC_PRIMARY_INT_INDEX 0x70 -#define SMSC_SECONDARY_INT_INDEX 0x72 - -#define SMSC_IDE1_DEVICE 1 -#define SMSC_KEYBOARD_DEVICE 7 -#define SMSC_CONFIG_REGISTERS 8 - -#define SMSC_SUPERIO_READ_INDEXED(index) ({ \ - outb((index), SMSC_INDEX_PORT_ADDR); \ - inb(SMSC_DATA_PORT_ADDR); }) -#define SMSC_SUPERIO_WRITE_INDEXED(val, index) ({ \ - outb((index), SMSC_INDEX_PORT_ADDR); \ - outb((val), SMSC_DATA_PORT_ADDR); }) - -#define IDE1_PRIMARY_BASE 0x01f0 -#define IDE1_SECONDARY_BASE 0x03f6 - -unsigned long smsc_superio_virt; - -int platform_int_priority[NR_INTC_IRQS] = { - IR0, IR1, IR2, IR3, PCA, PCB, PCC, PCD, /* IRQ 0- 7 */ - RES, RES, RES, RES, SER, ERR, PW3, PW2, /* IRQ 8-15 */ - PW1, PW0, DM0, DM1, DM2, DM3, DAE, RES, /* IRQ 16-23 */ - RES, RES, RES, RES, RES, RES, RES, RES, /* IRQ 24-31 */ - TU0, TU1, TU2, TI2, ATI, PRI, CUI, ERI, /* IRQ 32-39 */ - RXI, BRI, TXI, RES, RES, RES, RES, RES, /* IRQ 40-47 */ - RES, RES, RES, RES, RES, RES, RES, RES, /* IRQ 48-55 */ - RES, RES, RES, RES, RES, RES, RES, ITI, /* IRQ 56-63 */ -}; - -static int __init smsc_superio_setup(void) -{ - unsigned char devid, devrev; - - smsc_superio_virt = (unsigned long)ioremap(SMSC_SUPERIO_BASE, 1024); - if (!smsc_superio_virt) { - panic("Unable to remap SMSC SuperIO\n"); - } - - /* Initially the chip is in run state */ - /* Put it into configuration state */ - outb(SMSC_ENTER_CONFIG_KEY, SMSC_CONFIG_PORT_ADDR); - outb(SMSC_ENTER_CONFIG_KEY, SMSC_CONFIG_PORT_ADDR); - - /* Read device ID info */ - devid = SMSC_SUPERIO_READ_INDEXED(SMSC_DEVICE_ID_INDEX); - devrev = SMSC_SUPERIO_READ_INDEXED(SMSC_DEVICE_REV_INDEX); - printk("SMSC SuperIO devid %02x rev %02x\n", devid, devrev); - - /* Select the keyboard device */ - SMSC_SUPERIO_WRITE_INDEXED(SMSC_KEYBOARD_DEVICE, SMCS_LOGICAL_DEV_INDEX); - - /* enable it */ - SMSC_SUPERIO_WRITE_INDEXED(1, SMSC_ACTIVATE_INDEX); - - /* Select the interrupts */ - /* On a PC keyboard is IRQ1, mouse is IRQ12 */ - SMSC_SUPERIO_WRITE_INDEXED(1, SMSC_PRIMARY_INT_INDEX); - SMSC_SUPERIO_WRITE_INDEXED(12, SMSC_SECONDARY_INT_INDEX); - - /* - * Only IDE1 exists on the Cayman - */ - - /* Power it on */ - SMSC_SUPERIO_WRITE_INDEXED(1 << SMSC_IDE1_DEVICE, 0x22); - - SMSC_SUPERIO_WRITE_INDEXED(SMSC_IDE1_DEVICE, SMCS_LOGICAL_DEV_INDEX); - SMSC_SUPERIO_WRITE_INDEXED(1, SMSC_ACTIVATE_INDEX); - - SMSC_SUPERIO_WRITE_INDEXED(IDE1_PRIMARY_BASE >> 8, - SMSC_PRIMARY_BASE_INDEX + 0); - SMSC_SUPERIO_WRITE_INDEXED(IDE1_PRIMARY_BASE & 0xff, - SMSC_PRIMARY_BASE_INDEX + 1); - - SMSC_SUPERIO_WRITE_INDEXED(IDE1_SECONDARY_BASE >> 8, - SMSC_SECONDARY_BASE_INDEX + 0); - SMSC_SUPERIO_WRITE_INDEXED(IDE1_SECONDARY_BASE & 0xff, - SMSC_SECONDARY_BASE_INDEX + 1); - - SMSC_SUPERIO_WRITE_INDEXED(14, SMSC_PRIMARY_INT_INDEX); - - SMSC_SUPERIO_WRITE_INDEXED(SMSC_CONFIG_REGISTERS, - SMCS_LOGICAL_DEV_INDEX); - - SMSC_SUPERIO_WRITE_INDEXED(0x00, 0xc2); /* GP42 = nIDE1_OE */ - SMSC_SUPERIO_WRITE_INDEXED(0x01, 0xc5); /* GP45 = IDE1_IRQ */ - SMSC_SUPERIO_WRITE_INDEXED(0x00, 0xc6); /* GP46 = nIOROP */ - SMSC_SUPERIO_WRITE_INDEXED(0x00, 0xc7); /* GP47 = nIOWOP */ - - /* Exit the configuration state */ - outb(SMSC_EXIT_CONFIG_KEY, SMSC_CONFIG_PORT_ADDR); - - return 0; -} -device_initcall(smsc_superio_setup); - -static void __iomem *cayman_ioport_map(unsigned long port, unsigned int len) -{ - if (port < 0x400) { - extern unsigned long smsc_superio_virt; - return (void __iomem *)((port << 2) | smsc_superio_virt); - } - - return (void __iomem *)port; -} - -extern void init_cayman_irq(void); - -static struct sh_machine_vector mv_cayman __initmv = { - .mv_name = "Hitachi Cayman", - .mv_ioport_map = cayman_ioport_map, - .mv_init_irq = init_cayman_irq, -}; diff --git a/arch/sh/boards/mach-landisk/setup.c b/arch/sh/boards/mach-landisk/setup.c index 16b4d8b0bb85..2c44b94f82fb 100644 --- a/arch/sh/boards/mach-landisk/setup.c +++ b/arch/sh/boards/mach-landisk/setup.c @@ -82,6 +82,9 @@ device_initcall(landisk_devices_setup); static void __init landisk_setup(char **cmdline_p) { + /* I/O port identity mapping */ + __set_io_port_base(0); + /* LED ON */ __raw_writeb(__raw_readb(PA_LED) | 0x03, PA_LED); diff --git a/arch/sh/configs/cayman_defconfig b/arch/sh/configs/cayman_defconfig deleted file mode 100644 index 911437c163c7..000000000000 --- a/arch/sh/configs/cayman_defconfig +++ /dev/null @@ -1,66 +0,0 @@ -CONFIG_POSIX_MQUEUE=y -CONFIG_LOG_BUF_SHIFT=14 -# CONFIG_CC_OPTIMIZE_FOR_SIZE is not set -CONFIG_SLAB=y -CONFIG_MODULES=y -CONFIG_MODULE_UNLOAD=y -# CONFIG_BLK_DEV_BSG is not set -CONFIG_FORCE_MAX_ZONEORDER=11 -CONFIG_MEMORY_START=0x80000000 -CONFIG_MEMORY_SIZE=0x00400000 -CONFIG_FLATMEM_MANUAL=y -CONFIG_CACHE_OFF=y -CONFIG_SH_PCLK_FREQ=50000000 -CONFIG_HEARTBEAT=y -CONFIG_PREEMPT=y -CONFIG_NET=y -CONFIG_PACKET=y -CONFIG_UNIX=y -CONFIG_INET=y -CONFIG_IP_PNP=y -# CONFIG_IPV6 is not set -# CONFIG_FW_LOADER is not set -CONFIG_BLK_DEV_LOOP=y -CONFIG_BLK_DEV_RAM=y -CONFIG_SCSI=y -CONFIG_BLK_DEV_SD=y -CONFIG_SCSI_MULTI_LUN=y -CONFIG_SCSI_SPI_ATTRS=y -CONFIG_NETDEVICES=y -CONFIG_NET_ETHERNET=y -# CONFIG_INPUT_MOUSEDEV_PSAUX is not set -# CONFIG_INPUT_KEYBOARD is not set -# CONFIG_INPUT_MOUSE is not set -# CONFIG_SERIO is not set -CONFIG_HW_RANDOM=y -CONFIG_I2C=m -CONFIG_WATCHDOG=y -CONFIG_FB=y -CONFIG_FIRMWARE_EDID=y -CONFIG_FB_MODE_HELPERS=y -CONFIG_FB_SH_MOBILE_LCDC=m -CONFIG_FRAMEBUFFER_CONSOLE=y -CONFIG_FONTS=y -CONFIG_FONT_8x16=y -CONFIG_LOGO=y -# CONFIG_LOGO_LINUX_MONO is not set -# CONFIG_LOGO_LINUX_VGA16 is not set -# CONFIG_LOGO_LINUX_CLUT224 is not set -# CONFIG_LOGO_SUPERH_MONO is not set -# CONFIG_LOGO_SUPERH_VGA16 is not set -CONFIG_EXT2_FS=y -CONFIG_EXT3_FS=y -# CONFIG_EXT3_DEFAULTS_TO_ORDERED is not set -CONFIG_MINIX_FS=y -CONFIG_ROMFS_FS=y -CONFIG_NFS_FS=y -CONFIG_NFS_V3=y -CONFIG_ROOT_NFS=y -CONFIG_PARTITION_ADVANCED=y -CONFIG_MAGIC_SYSRQ=y -CONFIG_DEBUG_FS=y -CONFIG_DEBUG_KERNEL=y -CONFIG_DETECT_HUNG_TASK=y -CONFIG_SCHEDSTATS=y -CONFIG_FRAME_POINTER=y -# CONFIG_CRYPTO_ANSI_CPRNG is not set diff --git a/arch/sh/configs/dreamcast_defconfig b/arch/sh/configs/dreamcast_defconfig index ae067e0b15e3..6a82c7b8ff32 100644 --- a/arch/sh/configs/dreamcast_defconfig +++ b/arch/sh/configs/dreamcast_defconfig @@ -1,7 +1,6 @@ CONFIG_SYSVIPC=y CONFIG_BSD_PROCESS_ACCT=y CONFIG_LOG_BUF_SHIFT=14 -# CONFIG_SYSCTL_SYSCALL is not set CONFIG_SLAB=y CONFIG_PROFILING=y CONFIG_MODULES=y diff --git a/arch/sh/configs/espt_defconfig b/arch/sh/configs/espt_defconfig index a5b865a75d22..9a988c347e9d 100644 --- a/arch/sh/configs/espt_defconfig +++ b/arch/sh/configs/espt_defconfig @@ -5,7 +5,6 @@ CONFIG_LOG_BUF_SHIFT=14 CONFIG_NAMESPACES=y CONFIG_UTS_NS=y CONFIG_IPC_NS=y -# CONFIG_SYSCTL_SYSCALL is not set CONFIG_SLAB=y CONFIG_PROFILING=y CONFIG_OPROFILE=y diff --git a/arch/sh/configs/hp6xx_defconfig b/arch/sh/configs/hp6xx_defconfig index a92db6694ce2..70e6605d7f7e 100644 --- a/arch/sh/configs/hp6xx_defconfig +++ b/arch/sh/configs/hp6xx_defconfig @@ -3,7 +3,6 @@ CONFIG_IKCONFIG=y CONFIG_IKCONFIG_PROC=y CONFIG_LOG_BUF_SHIFT=14 # CONFIG_CC_OPTIMIZE_FOR_SIZE is not set -# CONFIG_SYSCTL_SYSCALL is not set CONFIG_SLAB=y # CONFIG_BLK_DEV_BSG is not set CONFIG_CPU_SUBTYPE_SH7709=y diff --git a/arch/sh/configs/landisk_defconfig b/arch/sh/configs/landisk_defconfig index 567af752b1bb..ba6ec042606f 100644 --- a/arch/sh/configs/landisk_defconfig +++ b/arch/sh/configs/landisk_defconfig @@ -1,6 +1,5 @@ CONFIG_SYSVIPC=y CONFIG_LOG_BUF_SHIFT=14 -# CONFIG_SYSCTL_SYSCALL is not set CONFIG_KALLSYMS_EXTRA_PASS=y CONFIG_SLAB=y CONFIG_MODULES=y diff --git a/arch/sh/configs/lboxre2_defconfig b/arch/sh/configs/lboxre2_defconfig index 10f6d371ce2c..05e4ac6fed5f 100644 --- a/arch/sh/configs/lboxre2_defconfig +++ b/arch/sh/configs/lboxre2_defconfig @@ -1,6 +1,5 @@ CONFIG_SYSVIPC=y CONFIG_LOG_BUF_SHIFT=14 -# CONFIG_SYSCTL_SYSCALL is not set CONFIG_KALLSYMS_EXTRA_PASS=y CONFIG_SLAB=y CONFIG_MODULES=y diff --git a/arch/sh/configs/microdev_defconfig b/arch/sh/configs/microdev_defconfig index ed84d1303acf..c65667d00313 100644 --- a/arch/sh/configs/microdev_defconfig +++ b/arch/sh/configs/microdev_defconfig @@ -2,7 +2,6 @@ CONFIG_BSD_PROCESS_ACCT=y CONFIG_LOG_BUF_SHIFT=14 CONFIG_BLK_DEV_INITRD=y # CONFIG_CC_OPTIMIZE_FOR_SIZE is not set -# CONFIG_SYSCTL_SYSCALL is not set CONFIG_SLAB=y # CONFIG_BLK_DEV_BSG is not set CONFIG_CPU_SUBTYPE_SH4_202=y diff --git a/arch/sh/configs/migor_defconfig b/arch/sh/configs/migor_defconfig index 37e9521a99e5..a24cf8cd2cea 100644 --- a/arch/sh/configs/migor_defconfig +++ b/arch/sh/configs/migor_defconfig @@ -4,7 +4,6 @@ CONFIG_IKCONFIG_PROC=y CONFIG_LOG_BUF_SHIFT=14 CONFIG_BLK_DEV_INITRD=y # CONFIG_CC_OPTIMIZE_FOR_SIZE is not set -# CONFIG_SYSCTL_SYSCALL is not set CONFIG_SLAB=y CONFIG_PROFILING=y CONFIG_OPROFILE=y diff --git a/arch/sh/configs/r7780mp_defconfig b/arch/sh/configs/r7780mp_defconfig index c97ec60cff27..e922659fdadb 100644 --- a/arch/sh/configs/r7780mp_defconfig +++ b/arch/sh/configs/r7780mp_defconfig @@ -3,7 +3,6 @@ CONFIG_BSD_PROCESS_ACCT=y CONFIG_IKCONFIG=y CONFIG_IKCONFIG_PROC=y CONFIG_LOG_BUF_SHIFT=14 -# CONFIG_SYSCTL_SYSCALL is not set # CONFIG_FUTEX is not set # CONFIG_EPOLL is not set CONFIG_SLAB=y diff --git a/arch/sh/configs/r7785rp_defconfig b/arch/sh/configs/r7785rp_defconfig index 55fce65eb454..5978866358ec 100644 --- a/arch/sh/configs/r7785rp_defconfig +++ b/arch/sh/configs/r7785rp_defconfig @@ -7,7 +7,6 @@ CONFIG_RCU_TRACE=y CONFIG_IKCONFIG=y CONFIG_IKCONFIG_PROC=y CONFIG_LOG_BUF_SHIFT=14 -# CONFIG_SYSCTL_SYSCALL is not set CONFIG_SLAB=y CONFIG_PROFILING=y CONFIG_OPROFILE=y diff --git a/arch/sh/configs/rts7751r2d1_defconfig b/arch/sh/configs/rts7751r2d1_defconfig index 6a3cfe08295f..fc9c22152b08 100644 --- a/arch/sh/configs/rts7751r2d1_defconfig +++ b/arch/sh/configs/rts7751r2d1_defconfig @@ -1,7 +1,6 @@ CONFIG_SYSVIPC=y CONFIG_LOG_BUF_SHIFT=14 # CONFIG_CC_OPTIMIZE_FOR_SIZE is not set -# CONFIG_SYSCTL_SYSCALL is not set CONFIG_SLAB=y CONFIG_PROFILING=y CONFIG_OPROFILE=y diff --git a/arch/sh/configs/rts7751r2dplus_defconfig b/arch/sh/configs/rts7751r2dplus_defconfig index 2b3d7d280672..ff3fd6787fd6 100644 --- a/arch/sh/configs/rts7751r2dplus_defconfig +++ b/arch/sh/configs/rts7751r2dplus_defconfig @@ -1,7 +1,6 @@ CONFIG_SYSVIPC=y CONFIG_LOG_BUF_SHIFT=14 # CONFIG_CC_OPTIMIZE_FOR_SIZE is not set -# CONFIG_SYSCTL_SYSCALL is not set CONFIG_SLAB=y CONFIG_PROFILING=y CONFIG_OPROFILE=y diff --git a/arch/sh/configs/se7206_defconfig b/arch/sh/configs/se7206_defconfig index 21a43f14ffac..ff5bb4489922 100644 --- a/arch/sh/configs/se7206_defconfig +++ b/arch/sh/configs/se7206_defconfig @@ -18,7 +18,6 @@ CONFIG_USER_NS=y CONFIG_PID_NS=y CONFIG_BLK_DEV_INITRD=y # CONFIG_UID16 is not set -# CONFIG_SYSCTL_SYSCALL is not set CONFIG_KALLSYMS_ALL=y # CONFIG_ELF_CORE is not set # CONFIG_COMPAT_BRK is not set diff --git a/arch/sh/configs/se7343_defconfig b/arch/sh/configs/se7343_defconfig index 4e794e719a28..5d6c19338ebf 100644 --- a/arch/sh/configs/se7343_defconfig +++ b/arch/sh/configs/se7343_defconfig @@ -2,7 +2,6 @@ CONFIG_SYSVIPC=y CONFIG_POSIX_MQUEUE=y CONFIG_LOG_BUF_SHIFT=14 -# CONFIG_SYSCTL_SYSCALL is not set # CONFIG_FUTEX is not set # CONFIG_EPOLL is not set # CONFIG_SHMEM is not set diff --git a/arch/sh/configs/se7619_defconfig b/arch/sh/configs/se7619_defconfig index 3264415a5931..71a672c30716 100644 --- a/arch/sh/configs/se7619_defconfig +++ b/arch/sh/configs/se7619_defconfig @@ -1,7 +1,6 @@ # CONFIG_LOCALVERSION_AUTO is not set CONFIG_LOG_BUF_SHIFT=14 # CONFIG_UID16 is not set -# CONFIG_SYSCTL_SYSCALL is not set # CONFIG_KALLSYMS is not set # CONFIG_HOTPLUG is not set # CONFIG_ELF_CORE is not set diff --git a/arch/sh/configs/se7705_defconfig b/arch/sh/configs/se7705_defconfig index 4496b94b7d88..ed00a6eeadf5 100644 --- a/arch/sh/configs/se7705_defconfig +++ b/arch/sh/configs/se7705_defconfig @@ -2,7 +2,6 @@ CONFIG_LOG_BUF_SHIFT=14 CONFIG_BLK_DEV_INITRD=y # CONFIG_CC_OPTIMIZE_FOR_SIZE is not set -# CONFIG_SYSCTL_SYSCALL is not set # CONFIG_KALLSYMS is not set # CONFIG_HOTPLUG is not set CONFIG_SLAB=y diff --git a/arch/sh/configs/se7750_defconfig b/arch/sh/configs/se7750_defconfig index b23f67542728..3f1c13799d79 100644 --- a/arch/sh/configs/se7750_defconfig +++ b/arch/sh/configs/se7750_defconfig @@ -5,7 +5,6 @@ CONFIG_IKCONFIG=y CONFIG_IKCONFIG_PROC=y CONFIG_LOG_BUF_SHIFT=14 # CONFIG_CC_OPTIMIZE_FOR_SIZE is not set -# CONFIG_SYSCTL_SYSCALL is not set # CONFIG_HOTPLUG is not set CONFIG_SLAB=y CONFIG_MODULES=y diff --git a/arch/sh/configs/se7751_defconfig b/arch/sh/configs/se7751_defconfig index 162343683937..4a024065bb75 100644 --- a/arch/sh/configs/se7751_defconfig +++ b/arch/sh/configs/se7751_defconfig @@ -3,7 +3,6 @@ CONFIG_BSD_PROCESS_ACCT=y CONFIG_LOG_BUF_SHIFT=14 CONFIG_BLK_DEV_INITRD=y # CONFIG_CC_OPTIMIZE_FOR_SIZE is not set -# CONFIG_SYSCTL_SYSCALL is not set # CONFIG_HOTPLUG is not set CONFIG_SLAB=y CONFIG_MODULES=y diff --git a/arch/sh/configs/secureedge5410_defconfig b/arch/sh/configs/secureedge5410_defconfig index 360592d63a2f..8422599cfb04 100644 --- a/arch/sh/configs/secureedge5410_defconfig +++ b/arch/sh/configs/secureedge5410_defconfig @@ -1,7 +1,6 @@ # CONFIG_SWAP is not set CONFIG_LOG_BUF_SHIFT=14 CONFIG_BLK_DEV_INITRD=y -# CONFIG_SYSCTL_SYSCALL is not set # CONFIG_HOTPLUG is not set CONFIG_SLAB=y # CONFIG_BLK_DEV_BSG is not set diff --git a/arch/sh/configs/sh03_defconfig b/arch/sh/configs/sh03_defconfig index 87db9a84b5ec..f0073ed39947 100644 --- a/arch/sh/configs/sh03_defconfig +++ b/arch/sh/configs/sh03_defconfig @@ -3,7 +3,6 @@ CONFIG_POSIX_MQUEUE=y CONFIG_BSD_PROCESS_ACCT=y CONFIG_LOG_BUF_SHIFT=14 CONFIG_BLK_DEV_INITRD=y -# CONFIG_SYSCTL_SYSCALL is not set CONFIG_SLAB=y CONFIG_PROFILING=y CONFIG_OPROFILE=m diff --git a/arch/sh/configs/sh7710voipgw_defconfig b/arch/sh/configs/sh7710voipgw_defconfig index 08426913c0e3..0d814770b07f 100644 --- a/arch/sh/configs/sh7710voipgw_defconfig +++ b/arch/sh/configs/sh7710voipgw_defconfig @@ -2,7 +2,6 @@ CONFIG_SYSVIPC=y CONFIG_POSIX_MQUEUE=y CONFIG_LOG_BUF_SHIFT=14 -# CONFIG_SYSCTL_SYSCALL is not set # CONFIG_FUTEX is not set # CONFIG_EPOLL is not set # CONFIG_SHMEM is not set diff --git a/arch/sh/configs/sh7757lcr_defconfig b/arch/sh/configs/sh7757lcr_defconfig index d0933a9b9799..a2700ab165af 100644 --- a/arch/sh/configs/sh7757lcr_defconfig +++ b/arch/sh/configs/sh7757lcr_defconfig @@ -8,7 +8,6 @@ CONFIG_TASK_XACCT=y CONFIG_TASK_IO_ACCOUNTING=y CONFIG_LOG_BUF_SHIFT=14 CONFIG_BLK_DEV_INITRD=y -# CONFIG_SYSCTL_SYSCALL is not set CONFIG_KALLSYMS_ALL=y CONFIG_SLAB=y CONFIG_MODULES=y diff --git a/arch/sh/configs/sh7763rdp_defconfig b/arch/sh/configs/sh7763rdp_defconfig index d0a0aa74cecf..26c5fd02c87a 100644 --- a/arch/sh/configs/sh7763rdp_defconfig +++ b/arch/sh/configs/sh7763rdp_defconfig @@ -5,7 +5,6 @@ CONFIG_LOG_BUF_SHIFT=14 CONFIG_NAMESPACES=y CONFIG_UTS_NS=y CONFIG_IPC_NS=y -# CONFIG_SYSCTL_SYSCALL is not set CONFIG_SLAB=y CONFIG_PROFILING=y CONFIG_OPROFILE=y diff --git a/arch/sh/configs/shmin_defconfig b/arch/sh/configs/shmin_defconfig index a27b129b93c5..c0b6f40d01cc 100644 --- a/arch/sh/configs/shmin_defconfig +++ b/arch/sh/configs/shmin_defconfig @@ -1,7 +1,6 @@ # CONFIG_SWAP is not set CONFIG_LOG_BUF_SHIFT=14 # CONFIG_UID16 is not set -# CONFIG_SYSCTL_SYSCALL is not set # CONFIG_KALLSYMS is not set # CONFIG_HOTPLUG is not set # CONFIG_BUG is not set diff --git a/arch/sh/configs/titan_defconfig b/arch/sh/configs/titan_defconfig index 4ec961ace688..ba887f1351be 100644 --- a/arch/sh/configs/titan_defconfig +++ b/arch/sh/configs/titan_defconfig @@ -6,7 +6,6 @@ CONFIG_IKCONFIG_PROC=y CONFIG_LOG_BUF_SHIFT=16 CONFIG_BLK_DEV_INITRD=y # CONFIG_CC_OPTIMIZE_FOR_SIZE is not set -# CONFIG_SYSCTL_SYSCALL is not set CONFIG_SLAB=y CONFIG_MODULES=y CONFIG_MODULE_UNLOAD=y diff --git a/arch/sh/drivers/pci/Makefile b/arch/sh/drivers/pci/Makefile index a5c1e9066f83..d313fd3ce709 100644 --- a/arch/sh/drivers/pci/Makefile +++ b/arch/sh/drivers/pci/Makefile @@ -25,4 +25,3 @@ obj-$(CONFIG_SH_7780_SOLUTION_ENGINE) += fixups-sdk7780.o obj-$(CONFIG_SH_TITAN) += fixups-titan.o obj-$(CONFIG_SH_LANDISK) += fixups-landisk.o obj-$(CONFIG_SH_LBOX_RE2) += fixups-rts7751r2d.o -obj-$(CONFIG_SH_CAYMAN) += fixups-cayman.o diff --git a/arch/sh/drivers/pci/common.c b/arch/sh/drivers/pci/common.c index fe163ecd0719..2fd2b77e12ce 100644 --- a/arch/sh/drivers/pci/common.c +++ b/arch/sh/drivers/pci/common.c @@ -54,7 +54,7 @@ int __init pci_is_66mhz_capable(struct pci_channel *hose, int cap66 = -1; u16 stat; - printk(KERN_INFO "PCI: Checking 66MHz capabilities...\n"); + pr_info("PCI: Checking 66MHz capabilities...\n"); for (pci_devfn = 0; pci_devfn < 0xff; pci_devfn++) { if (PCI_FUNC(pci_devfn)) @@ -134,7 +134,7 @@ unsigned int pcibios_handle_status_errors(unsigned long addr, pcibios_report_status(PCI_STATUS_REC_TARGET_ABORT | PCI_STATUS_SIG_TARGET_ABORT | PCI_STATUS_REC_MASTER_ABORT, 1); - printk("\n"); + pr_cont("\n"); cmd |= PCI_STATUS_REC_TARGET_ABORT; } @@ -143,7 +143,7 @@ unsigned int pcibios_handle_status_errors(unsigned long addr, printk(KERN_DEBUG "PCI: parity error detected: "); pcibios_report_status(PCI_STATUS_PARITY | PCI_STATUS_DETECTED_PARITY, 1); - printk("\n"); + pr_cont("\n"); cmd |= PCI_STATUS_PARITY | PCI_STATUS_DETECTED_PARITY; diff --git a/arch/sh/drivers/pci/fixups-cayman.c b/arch/sh/drivers/pci/fixups-cayman.c deleted file mode 100644 index c797bfbe2e98..000000000000 --- a/arch/sh/drivers/pci/fixups-cayman.c +++ /dev/null @@ -1,78 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -#include <linux/kernel.h> -#include <linux/init.h> -#include <linux/pci.h> -#include <linux/types.h> -#include <cpu/irq.h> -#include "pci-sh5.h" - -int pcibios_map_platform_irq(const struct pci_dev *dev, u8 slot, u8 pin) -{ - int result = -1; - - /* The complication here is that the PCI IRQ lines from the Cayman's 2 - 5V slots get into the CPU via a different path from the IRQ lines - from the 3 3.3V slots. Thus, we have to detect whether the card's - interrupts go via the 5V or 3.3V path, i.e. the 'bridge swizzling' - at the point where we cross from 5V to 3.3V is not the normal case. - - The added complication is that we don't know that the 5V slots are - always bus 2, because a card containing a PCI-PCI bridge may be - plugged into a 3.3V slot, and this changes the bus numbering. - - Also, the Cayman has an intermediate PCI bus that goes a custom - expansion board header (and to the secondary bridge). This bus has - never been used in practice. - - The 1ary onboard PCI-PCI bridge is device 3 on bus 0 - The 2ary onboard PCI-PCI bridge is device 0 on the 2ary bus of - the 1ary bridge. - */ - - struct slot_pin { - int slot; - int pin; - } path[4]; - int i=0; - - while (dev->bus->number > 0) { - - slot = path[i].slot = PCI_SLOT(dev->devfn); - pin = path[i].pin = pci_swizzle_interrupt_pin(dev, pin); - dev = dev->bus->self; - i++; - if (i > 3) panic("PCI path to root bus too long!\n"); - } - - slot = PCI_SLOT(dev->devfn); - /* This is the slot on bus 0 through which the device is eventually - reachable. */ - - /* Now work back up. */ - if ((slot < 3) || (i == 0)) { - /* Bus 0 (incl. PCI-PCI bridge itself) : perform the final - swizzle now. */ - result = IRQ_INTA + pci_swizzle_interrupt_pin(dev, pin) - 1; - } else { - i--; - slot = path[i].slot; - pin = path[i].pin; - if (slot > 0) { - panic("PCI expansion bus device found - not handled!\n"); - } else { - if (i > 0) { - /* 5V slots */ - i--; - slot = path[i].slot; - pin = path[i].pin; - /* 'pin' was swizzled earlier wrt slot, don't do it again. */ - result = IRQ_P2INTA + (pin - 1); - } else { - /* IRQ for 2ary PCI-PCI bridge : unused */ - result = -1; - } - } - } - - return result; -} diff --git a/arch/sh/drivers/pci/pci-sh7780.c b/arch/sh/drivers/pci/pci-sh7780.c index 287b3a68570c..9a624a6ee354 100644 --- a/arch/sh/drivers/pci/pci-sh7780.c +++ b/arch/sh/drivers/pci/pci-sh7780.c @@ -148,7 +148,7 @@ static irqreturn_t sh7780_pci_serr_irq(int irq, void *dev_id) printk(KERN_DEBUG "PCI: system error received: "); pcibios_report_status(PCI_STATUS_SIG_SYSTEM_ERROR, 1); - printk("\n"); + pr_cont("\n"); /* Deassert SERR */ __raw_writel(SH4_PCIINTM_SDIM, hose->reg_base + SH4_PCIINTM); @@ -179,7 +179,7 @@ static int __init sh7780_pci_setup_irqs(struct pci_channel *hose) ret = request_irq(hose->serr_irq, sh7780_pci_serr_irq, 0, "PCI SERR interrupt", hose); if (unlikely(ret)) { - printk(KERN_ERR "PCI: Failed hooking SERR IRQ\n"); + pr_err("PCI: Failed hooking SERR IRQ\n"); return ret; } @@ -250,7 +250,7 @@ static int __init sh7780_pci_init(void) const char *type; int ret, i; - printk(KERN_NOTICE "PCI: Starting initialization.\n"); + pr_notice("PCI: Starting initialization.\n"); chan->reg_base = 0xfe040000; @@ -270,7 +270,7 @@ static int __init sh7780_pci_init(void) id = __raw_readw(chan->reg_base + PCI_VENDOR_ID); if (id != PCI_VENDOR_ID_RENESAS) { - printk(KERN_ERR "PCI: Unknown vendor ID 0x%04x.\n", id); + pr_err("PCI: Unknown vendor ID 0x%04x.\n", id); return -ENODEV; } @@ -281,14 +281,13 @@ static int __init sh7780_pci_init(void) (id == PCI_DEVICE_ID_RENESAS_SH7785) ? "SH7785" : NULL; if (unlikely(!type)) { - printk(KERN_ERR "PCI: Found an unsupported Renesas host " - "controller, device id 0x%04x.\n", id); + pr_err("PCI: Found an unsupported Renesas host controller, device id 0x%04x.\n", + id); return -EINVAL; } - printk(KERN_NOTICE "PCI: Found a Renesas %s host " - "controller, revision %d.\n", type, - __raw_readb(chan->reg_base + PCI_REVISION_ID)); + pr_notice("PCI: Found a Renesas %s host controller, revision %d.\n", + type, __raw_readb(chan->reg_base + PCI_REVISION_ID)); /* * Now throw it in to register initialization mode and @@ -395,9 +394,9 @@ static int __init sh7780_pci_init(void) sh7780_pci66_init(chan); - printk(KERN_NOTICE "PCI: Running at %dMHz.\n", - (__raw_readw(chan->reg_base + PCI_STATUS) & PCI_STATUS_66MHZ) ? - 66 : 33); + pr_notice("PCI: Running at %dMHz.\n", + (__raw_readw(chan->reg_base + PCI_STATUS) & PCI_STATUS_66MHZ) + ? 66 : 33); return 0; diff --git a/arch/sh/drivers/pci/pci.c b/arch/sh/drivers/pci/pci.c index c7784e156964..6ab0b7377f66 100644 --- a/arch/sh/drivers/pci/pci.c +++ b/arch/sh/drivers/pci/pci.c @@ -120,8 +120,7 @@ int register_pci_controller(struct pci_channel *hose) * Do not panic here but later - this might happen before console init. */ if (!hose->io_map_base) { - printk(KERN_WARNING - "registering PCI controller with io_map_base unset\n"); + pr_warn("registering PCI controller with io_map_base unset\n"); } /* @@ -145,7 +144,7 @@ out: for (--i; i >= 0; i--) release_resource(&hose->resources[i]); - printk(KERN_WARNING "Skipping PCI bus scan due to resource conflict\n"); + pr_warn("Skipping PCI bus scan due to resource conflict\n"); return -1; } @@ -213,8 +212,8 @@ pcibios_bus_report_status_early(struct pci_channel *hose, pci_devfn, PCI_STATUS, status & status_mask); if (warn) - printk("(%02x:%02x: %04X) ", current_bus, - pci_devfn, status); + pr_cont("(%02x:%02x: %04X) ", current_bus, pci_devfn, + status); } } @@ -249,7 +248,7 @@ pcibios_bus_report_status(struct pci_bus *bus, unsigned int status_mask, pci_write_config_word(dev, PCI_STATUS, status & status_mask); if (warn) - printk("(%s: %04X) ", pci_name(dev), status); + pr_cont("(%s: %04X) ", pci_name(dev), status); } list_for_each_entry(dev, &bus->devices, bus_list) diff --git a/arch/sh/include/asm/adc.h b/arch/sh/include/asm/adc.h index 99ec66849559..feccfe639e38 100644 --- a/arch/sh/include/asm/adc.h +++ b/arch/sh/include/asm/adc.h @@ -1,7 +1,6 @@ /* SPDX-License-Identifier: GPL-2.0 */ #ifndef __ASM_ADC_H #define __ASM_ADC_H -#ifdef __KERNEL__ /* * Copyright (C) 2004 Andriy Skulysh */ @@ -10,5 +9,4 @@ int adc_single(unsigned int channel); -#endif /* __KERNEL__ */ #endif /* __ASM_ADC_H */ diff --git a/arch/sh/include/asm/addrspace.h b/arch/sh/include/asm/addrspace.h index 34bfbcddcce0..468fba333e89 100644 --- a/arch/sh/include/asm/addrspace.h +++ b/arch/sh/include/asm/addrspace.h @@ -7,8 +7,6 @@ #ifndef __ASM_SH_ADDRSPACE_H #define __ASM_SH_ADDRSPACE_H -#ifdef __KERNEL__ - #include <cpu/addrspace.h> /* If this CPU supports segmentation, hook up the helpers */ @@ -62,5 +60,4 @@ #define P3_ADDR_MAX P4SEG #endif -#endif /* __KERNEL__ */ #endif /* __ASM_SH_ADDRSPACE_H */ diff --git a/arch/sh/include/asm/bitops.h b/arch/sh/include/asm/bitops.h index 445dd14c448a..450b5854d7c6 100644 --- a/arch/sh/include/asm/bitops.h +++ b/arch/sh/include/asm/bitops.h @@ -2,8 +2,6 @@ #ifndef __ASM_SH_BITOPS_H #define __ASM_SH_BITOPS_H -#ifdef __KERNEL__ - #ifndef _LINUX_BITOPS_H #error only <linux/bitops.h> can be included directly #endif @@ -71,6 +69,4 @@ static inline unsigned long __ffs(unsigned long word) #include <asm-generic/bitops/__fls.h> #include <asm-generic/bitops/fls64.h> -#endif /* __KERNEL__ */ - #endif /* __ASM_SH_BITOPS_H */ diff --git a/arch/sh/include/asm/cache.h b/arch/sh/include/asm/cache.h index 2408ac4873aa..a293343456af 100644 --- a/arch/sh/include/asm/cache.h +++ b/arch/sh/include/asm/cache.h @@ -8,7 +8,6 @@ */ #ifndef __ASM_SH_CACHE_H #define __ASM_SH_CACHE_H -#ifdef __KERNEL__ #include <linux/init.h> #include <cpu/cache.h> @@ -44,5 +43,4 @@ struct cache_info { unsigned long flags; }; #endif /* __ASSEMBLY__ */ -#endif /* __KERNEL__ */ #endif /* __ASM_SH_CACHE_H */ diff --git a/arch/sh/include/asm/cacheflush.h b/arch/sh/include/asm/cacheflush.h index fe7400079b97..4486a865ff62 100644 --- a/arch/sh/include/asm/cacheflush.h +++ b/arch/sh/include/asm/cacheflush.h @@ -2,8 +2,6 @@ #ifndef __ASM_SH_CACHEFLUSH_H #define __ASM_SH_CACHEFLUSH_H -#ifdef __KERNEL__ - #include <linux/mm.h> /* @@ -109,5 +107,4 @@ static inline void *sh_cacheop_vaddr(void *vaddr) return vaddr; } -#endif /* __KERNEL__ */ #endif /* __ASM_SH_CACHEFLUSH_H */ diff --git a/arch/sh/include/asm/dma.h b/arch/sh/include/asm/dma.h index 4d5a21a891c0..17d23ae98c77 100644 --- a/arch/sh/include/asm/dma.h +++ b/arch/sh/include/asm/dma.h @@ -6,7 +6,6 @@ */ #ifndef __ASM_SH_DMA_H #define __ASM_SH_DMA_H -#ifdef __KERNEL__ #include <linux/spinlock.h> #include <linux/wait.h> @@ -144,5 +143,4 @@ extern int isa_dma_bridge_buggy; #define isa_dma_bridge_buggy (0) #endif -#endif /* __KERNEL__ */ #endif /* __ASM_SH_DMA_H */ diff --git a/arch/sh/include/asm/elf.h b/arch/sh/include/asm/elf.h index 7661fb5d548a..2862d6d1cb64 100644 --- a/arch/sh/include/asm/elf.h +++ b/arch/sh/include/asm/elf.h @@ -90,7 +90,6 @@ typedef struct user_fpu_struct elf_fpregset_t; #endif #define ELF_ARCH EM_SH -#ifdef __KERNEL__ /* * This is used to ensure we don't load something for the wrong architecture. */ @@ -209,5 +208,4 @@ do { \ NEW_AUX_ENT(AT_L2_CACHESHAPE, l2_cache_shape); \ } while (0) -#endif /* __KERNEL__ */ #endif /* __ASM_SH_ELF_H */ diff --git a/arch/sh/include/asm/freq.h b/arch/sh/include/asm/freq.h index 18133bf83738..87c23621b7ae 100644 --- a/arch/sh/include/asm/freq.h +++ b/arch/sh/include/asm/freq.h @@ -6,9 +6,7 @@ */ #ifndef __ASM_SH_FREQ_H #define __ASM_SH_FREQ_H -#ifdef __KERNEL__ #include <cpu/freq.h> -#endif /* __KERNEL__ */ #endif /* __ASM_SH_FREQ_H */ diff --git a/arch/sh/include/asm/futex.h b/arch/sh/include/asm/futex.h index b39cda09fb95..b70f3fce6ed7 100644 --- a/arch/sh/include/asm/futex.h +++ b/arch/sh/include/asm/futex.h @@ -2,8 +2,6 @@ #ifndef __ASM_SH_FUTEX_H #define __ASM_SH_FUTEX_H -#ifdef __KERNEL__ - #include <linux/futex.h> #include <linux/uaccess.h> #include <asm/errno.h> @@ -71,5 +69,4 @@ static inline int arch_futex_atomic_op_inuser(int op, u32 oparg, int *oval, return ret; } -#endif /* __KERNEL__ */ #endif /* __ASM_SH_FUTEX_H */ diff --git a/arch/sh/include/asm/io.h b/arch/sh/include/asm/io.h index ec587b583822..6d5c6463bc07 100644 --- a/arch/sh/include/asm/io.h +++ b/arch/sh/include/asm/io.h @@ -21,10 +21,8 @@ #include <linux/pgtable.h> #include <asm-generic/iomap.h> -#ifdef __KERNEL__ #define __IO_PREFIX generic #include <asm/io_generic.h> -#include <asm/io_trapped.h> #include <asm-generic/pci_iomap.h> #include <mach/mangle-port.h> @@ -244,125 +242,38 @@ unsigned long long poke_real_address_q(unsigned long long addr, #define phys_to_virt(address) (__va(address)) #endif -/* - * On 32-bit SH, we traditionally have the whole physical address space - * mapped at all times (as MIPS does), so "ioremap()" and "iounmap()" do - * not need to do anything but place the address in the proper segment. - * This is true for P1 and P2 addresses, as well as some P3 ones. - * However, most of the P3 addresses and newer cores using extended - * addressing need to map through page tables, so the ioremap() - * implementation becomes a bit more complicated. - * - * See arch/sh/mm/ioremap.c for additional notes on this. - * - * We cheat a bit and always return uncachable areas until we've fixed - * the drivers to handle caching properly. - * - * On the SH-5 the concept of segmentation in the 1:1 PXSEG sense simply - * doesn't exist, so everything must go through page tables. - */ #ifdef CONFIG_MMU +void iounmap(void __iomem *addr); void __iomem *__ioremap_caller(phys_addr_t offset, unsigned long size, pgprot_t prot, void *caller); -void iounmap(void __iomem *addr); - -static inline void __iomem * -__ioremap(phys_addr_t offset, unsigned long size, pgprot_t prot) -{ - return __ioremap_caller(offset, size, prot, __builtin_return_address(0)); -} - -static inline void __iomem * -__ioremap_29bit(phys_addr_t offset, unsigned long size, pgprot_t prot) -{ -#ifdef CONFIG_29BIT - phys_addr_t last_addr = offset + size - 1; - - /* - * For P1 and P2 space this is trivial, as everything is already - * mapped. Uncached access for P1 addresses are done through P2. - * In the P3 case or for addresses outside of the 29-bit space, - * mapping must be done by the PMB or by using page tables. - */ - if (likely(PXSEG(offset) < P3SEG && PXSEG(last_addr) < P3SEG)) { - u64 flags = pgprot_val(prot); - - /* - * Anything using the legacy PTEA space attributes needs - * to be kicked down to page table mappings. - */ - if (unlikely(flags & _PAGE_PCC_MASK)) - return NULL; - if (unlikely(flags & _PAGE_CACHABLE)) - return (void __iomem *)P1SEGADDR(offset); - - return (void __iomem *)P2SEGADDR(offset); - } - - /* P4 above the store queues are always mapped. */ - if (unlikely(offset >= P3_ADDR_MAX)) - return (void __iomem *)P4SEGADDR(offset); -#endif - - return NULL; -} - -static inline void __iomem * -__ioremap_mode(phys_addr_t offset, unsigned long size, pgprot_t prot) -{ - void __iomem *ret; - - ret = __ioremap_trapped(offset, size); - if (ret) - return ret; - - ret = __ioremap_29bit(offset, size, prot); - if (ret) - return ret; - - return __ioremap(offset, size, prot); -} -#else -#define __ioremap(offset, size, prot) ((void __iomem *)(offset)) -#define __ioremap_mode(offset, size, prot) ((void __iomem *)(offset)) -static inline void iounmap(void __iomem *addr) {} -#endif /* CONFIG_MMU */ static inline void __iomem *ioremap(phys_addr_t offset, unsigned long size) { - return __ioremap_mode(offset, size, PAGE_KERNEL_NOCACHE); + return __ioremap_caller(offset, size, PAGE_KERNEL_NOCACHE, + __builtin_return_address(0)); } static inline void __iomem * ioremap_cache(phys_addr_t offset, unsigned long size) { - return __ioremap_mode(offset, size, PAGE_KERNEL); + return __ioremap_caller(offset, size, PAGE_KERNEL, + __builtin_return_address(0)); } #define ioremap_cache ioremap_cache #ifdef CONFIG_HAVE_IOREMAP_PROT -static inline void __iomem * -ioremap_prot(phys_addr_t offset, unsigned long size, unsigned long flags) +static inline void __iomem *ioremap_prot(phys_addr_t offset, unsigned long size, + unsigned long flags) { - return __ioremap_mode(offset, size, __pgprot(flags)); + return __ioremap_caller(offset, size, __pgprot(flags), + __builtin_return_address(0)); } -#endif +#endif /* CONFIG_HAVE_IOREMAP_PROT */ -#ifdef CONFIG_IOREMAP_FIXED -extern void __iomem *ioremap_fixed(phys_addr_t, unsigned long, pgprot_t); -extern int iounmap_fixed(void __iomem *); -extern void ioremap_fixed_init(void); -#else -static inline void __iomem * -ioremap_fixed(phys_addr_t phys_addr, unsigned long size, pgprot_t prot) -{ - BUG(); - return NULL; -} - -static inline void ioremap_fixed_init(void) { } -static inline int iounmap_fixed(void __iomem *addr) { return -EINVAL; } -#endif +#else /* CONFIG_MMU */ +#define iounmap(addr) do { } while (0) +#define ioremap(offset, size) ((void __iomem *)(unsigned long)(offset)) +#endif /* CONFIG_MMU */ #define ioremap_uc ioremap @@ -381,6 +292,4 @@ static inline int iounmap_fixed(void __iomem *addr) { return -EINVAL; } int valid_phys_addr_range(phys_addr_t addr, size_t size); int valid_mmap_phys_addr_range(unsigned long pfn, size_t size); -#endif /* __KERNEL__ */ - #endif /* __ASM_SH_IO_H */ diff --git a/arch/sh/include/asm/kdebug.h b/arch/sh/include/asm/kdebug.h index 960545306afa..de8693fabb1d 100644 --- a/arch/sh/include/asm/kdebug.h +++ b/arch/sh/include/asm/kdebug.h @@ -12,8 +12,7 @@ enum die_val { }; /* arch/sh/kernel/dumpstack.c */ -extern void printk_address(unsigned long address, int reliable, - const char *loglvl); +extern void printk_address(unsigned long address, int reliable); extern void dump_mem(const char *str, const char *loglvl, unsigned long bottom, unsigned long top); diff --git a/arch/sh/include/asm/mmu_context.h b/arch/sh/include/asm/mmu_context.h index 48e67d544d53..f664e51e8a15 100644 --- a/arch/sh/include/asm/mmu_context.h +++ b/arch/sh/include/asm/mmu_context.h @@ -8,7 +8,6 @@ #ifndef __ASM_SH_MMU_CONTEXT_H #define __ASM_SH_MMU_CONTEXT_H -#ifdef __KERNEL__ #include <cpu/mmu_context.h> #include <asm/tlbflush.h> #include <linux/uaccess.h> @@ -177,5 +176,4 @@ static inline void disable_mmu(void) #define disable_mmu() do { } while (0) #endif -#endif /* __KERNEL__ */ #endif /* __ASM_SH_MMU_CONTEXT_H */ diff --git a/arch/sh/include/asm/mmzone.h b/arch/sh/include/asm/mmzone.h index cbaee1d1b673..6552a088dc97 100644 --- a/arch/sh/include/asm/mmzone.h +++ b/arch/sh/include/asm/mmzone.h @@ -2,8 +2,6 @@ #ifndef __ASM_SH_MMZONE_H #define __ASM_SH_MMZONE_H -#ifdef __KERNEL__ - #ifdef CONFIG_NEED_MULTIPLE_NODES #include <linux/numa.h> @@ -44,5 +42,4 @@ void __init __add_active_range(unsigned int nid, unsigned long start_pfn, /* arch/sh/mm/init.c */ void __init allocate_pgdat(unsigned int nid); -#endif /* __KERNEL__ */ #endif /* __ASM_SH_MMZONE_H */ diff --git a/arch/sh/include/asm/pci.h b/arch/sh/include/asm/pci.h index 10a36b1cf2ea..ad22e88c6657 100644 --- a/arch/sh/include/asm/pci.h +++ b/arch/sh/include/asm/pci.h @@ -2,8 +2,6 @@ #ifndef __ASM_SH_PCI_H #define __ASM_SH_PCI_H -#ifdef __KERNEL__ - /* Can be used to override the logic in pci_scan_bus for skipping already-configured bus numbers - to be used for buggy BIOSes or architectures with incomplete PCI setup by the loader */ @@ -96,6 +94,4 @@ static inline int pci_get_legacy_ide_irq(struct pci_dev *dev, int channel) return channel ? 15 : 14; } -#endif /* __KERNEL__ */ #endif /* __ASM_SH_PCI_H */ - diff --git a/arch/sh/include/asm/processor_32.h b/arch/sh/include/asm/processor_32.h index d44409413418..aa92cc933889 100644 --- a/arch/sh/include/asm/processor_32.h +++ b/arch/sh/include/asm/processor_32.h @@ -8,7 +8,6 @@ #ifndef __ASM_SH_PROCESSOR_32_H #define __ASM_SH_PROCESSOR_32_H -#ifdef __KERNEL__ #include <linux/compiler.h> #include <linux/linkage.h> @@ -203,5 +202,4 @@ static inline void prefetchw(const void *x) } #endif -#endif /* __KERNEL__ */ #endif /* __ASM_SH_PROCESSOR_32_H */ diff --git a/arch/sh/include/asm/segment.h b/arch/sh/include/asm/segment.h index 33d1d28057cb..02e54a3335d6 100644 --- a/arch/sh/include/asm/segment.h +++ b/arch/sh/include/asm/segment.h @@ -24,8 +24,7 @@ typedef struct { #define USER_DS KERNEL_DS #endif -#define segment_eq(a, b) ((a).seg == (b).seg) - +#define uaccess_kernel() (get_fs().seg == KERNEL_DS.seg) #define get_fs() (current_thread_info()->addr_limit) #define set_fs(x) (current_thread_info()->addr_limit = (x)) diff --git a/arch/sh/include/asm/smc37c93x.h b/arch/sh/include/asm/smc37c93x.h index f054c30a171a..891f2f8f2fd0 100644 --- a/arch/sh/include/asm/smc37c93x.h +++ b/arch/sh/include/asm/smc37c93x.h @@ -112,8 +112,8 @@ typedef struct uart_reg { #define FCR_RFRES 0x0200 /* Receiver FIFO reset */ #define FCR_TFRES 0x0400 /* Transmitter FIFO reset */ #define FCR_DMA 0x0800 /* DMA mode select */ -#define FCR_RTL 0x4000 /* Receiver triger (LSB) */ -#define FCR_RTM 0x8000 /* Receiver triger (MSB) */ +#define FCR_RTL 0x4000 /* Receiver trigger (LSB) */ +#define FCR_RTM 0x8000 /* Receiver trigger (MSB) */ /* Line Control Register */ diff --git a/arch/sh/include/asm/sparsemem.h b/arch/sh/include/asm/sparsemem.h index 4eb899751e45..4703cbe23844 100644 --- a/arch/sh/include/asm/sparsemem.h +++ b/arch/sh/include/asm/sparsemem.h @@ -2,16 +2,11 @@ #ifndef __ASM_SH_SPARSEMEM_H #define __ASM_SH_SPARSEMEM_H -#ifdef __KERNEL__ /* * SECTION_SIZE_BITS 2^N: how big each section will be - * MAX_PHYSADDR_BITS 2^N: how much physical address space we have - * MAX_PHYSMEM_BITS 2^N: how much memory we can have in that space + * MAX_PHYSMEM_BITS 2^N: how much physical address space we have */ #define SECTION_SIZE_BITS 26 -#define MAX_PHYSADDR_BITS 32 #define MAX_PHYSMEM_BITS 32 -#endif - #endif /* __ASM_SH_SPARSEMEM_H */ diff --git a/arch/sh/include/asm/stacktrace.h b/arch/sh/include/asm/stacktrace.h index 50c173c0b9f5..4f98cdc64ec5 100644 --- a/arch/sh/include/asm/stacktrace.h +++ b/arch/sh/include/asm/stacktrace.h @@ -12,8 +12,6 @@ struct stacktrace_ops { void (*address)(void *data, unsigned long address, int reliable); - /* On negative return stop dumping */ - int (*stack)(void *data, char *name); }; void dump_trace(struct task_struct *tsk, struct pt_regs *regs, diff --git a/arch/sh/include/asm/string_32.h b/arch/sh/include/asm/string_32.h index 3558b1d7123e..a276b193d3b4 100644 --- a/arch/sh/include/asm/string_32.h +++ b/arch/sh/include/asm/string_32.h @@ -2,8 +2,6 @@ #ifndef __ASM_SH_STRING_H #define __ASM_SH_STRING_H -#ifdef __KERNEL__ - /* * Copyright (C) 1999 Niibe Yutaka * But consider these trivial functions to be public domain. @@ -28,32 +26,6 @@ static inline char *strcpy(char *__dest, const char *__src) return __xdest; } -#define __HAVE_ARCH_STRNCPY -static inline char *strncpy(char *__dest, const char *__src, size_t __n) -{ - register char *__xdest = __dest; - unsigned long __dummy; - - if (__n == 0) - return __xdest; - - __asm__ __volatile__( - "1:\n" - "mov.b @%1+, %2\n\t" - "mov.b %2, @%0\n\t" - "cmp/eq #0, %2\n\t" - "bt/s 2f\n\t" - " cmp/eq %5,%1\n\t" - "bf/s 1b\n\t" - " add #1, %0\n" - "2:" - : "=r" (__dest), "=r" (__src), "=&z" (__dummy) - : "0" (__dest), "1" (__src), "r" (__src+__n) - : "memory", "t"); - - return __xdest; -} - #define __HAVE_ARCH_STRCMP static inline int strcmp(const char *__cs, const char *__ct) { @@ -127,6 +99,4 @@ extern void *memchr(const void *__s, int __c, size_t __n); #define __HAVE_ARCH_STRLEN extern size_t strlen(const char *); -#endif /* __KERNEL__ */ - #endif /* __ASM_SH_STRING_H */ diff --git a/arch/sh/include/asm/syscall_32.h b/arch/sh/include/asm/syscall_32.h index 0b5b8e75edac..cb51a7528384 100644 --- a/arch/sh/include/asm/syscall_32.h +++ b/arch/sh/include/asm/syscall_32.h @@ -40,10 +40,7 @@ static inline void syscall_set_return_value(struct task_struct *task, struct pt_regs *regs, int error, long val) { - if (error) - regs->regs[0] = -error; - else - regs->regs[0] = val; + regs->regs[0] = (long) error ?: val; } static inline void syscall_get_arguments(struct task_struct *task, diff --git a/arch/sh/include/asm/syscalls_32.h b/arch/sh/include/asm/syscalls_32.h index 9f9faf63b48c..5c555b864fe0 100644 --- a/arch/sh/include/asm/syscalls_32.h +++ b/arch/sh/include/asm/syscalls_32.h @@ -2,8 +2,6 @@ #ifndef __ASM_SH_SYSCALLS_32_H #define __ASM_SH_SYSCALLS_32_H -#ifdef __KERNEL__ - #include <linux/compiler.h> #include <linux/linkage.h> #include <linux/types.h> @@ -26,5 +24,4 @@ asmlinkage void do_syscall_trace_leave(struct pt_regs *regs); asmlinkage void do_notify_resume(struct pt_regs *regs, unsigned int save_r0, unsigned long thread_info_flags); -#endif /* __KERNEL__ */ #endif /* __ASM_SH_SYSCALLS_32_H */ diff --git a/arch/sh/include/asm/thread_info.h b/arch/sh/include/asm/thread_info.h index 6404be69d5fa..243ea5150aa0 100644 --- a/arch/sh/include/asm/thread_info.h +++ b/arch/sh/include/asm/thread_info.h @@ -10,8 +10,6 @@ * Copyright (C) 2002 David Howells (dhowells@redhat.com) * - Incorporating suggestions made by Linus Torvalds and Dave Miller */ -#ifdef __KERNEL__ - #include <asm/page.h> /* @@ -170,7 +168,4 @@ static inline unsigned int get_thread_fault_code(void) } #endif /* !__ASSEMBLY__ */ - -#endif /* __KERNEL__ */ - #endif /* __ASM_SH_THREAD_INFO_H */ diff --git a/arch/sh/include/asm/uaccess_32.h b/arch/sh/include/asm/uaccess_32.h index 624cf55acc27..5d7ddc092afd 100644 --- a/arch/sh/include/asm/uaccess_32.h +++ b/arch/sh/include/asm/uaccess_32.h @@ -26,6 +26,9 @@ do { \ case 4: \ __get_user_asm(x, ptr, retval, "l"); \ break; \ + case 8: \ + __get_user_u64(x, ptr, retval); \ + break; \ default: \ __get_user_unknown(); \ break; \ @@ -66,6 +69,56 @@ do { \ extern void __get_user_unknown(void); +#if defined(CONFIG_CPU_LITTLE_ENDIAN) +#define __get_user_u64(x, addr, err) \ +({ \ +__asm__ __volatile__( \ + "1:\n\t" \ + "mov.l %2,%R1\n\t" \ + "mov.l %T2,%S1\n\t" \ + "2:\n" \ + ".section .fixup,\"ax\"\n" \ + "3:\n\t" \ + "mov #0,%R1\n\t" \ + "mov #0,%S1\n\t" \ + "mov.l 4f, %0\n\t" \ + "jmp @%0\n\t" \ + " mov %3, %0\n\t" \ + ".balign 4\n" \ + "4: .long 2b\n\t" \ + ".previous\n" \ + ".section __ex_table,\"a\"\n\t" \ + ".long 1b, 3b\n\t" \ + ".long 1b + 2, 3b\n\t" \ + ".previous" \ + :"=&r" (err), "=&r" (x) \ + :"m" (__m(addr)), "i" (-EFAULT), "0" (err)); }) +#else +#define __get_user_u64(x, addr, err) \ +({ \ +__asm__ __volatile__( \ + "1:\n\t" \ + "mov.l %2,%S1\n\t" \ + "mov.l %T2,%R1\n\t" \ + "2:\n" \ + ".section .fixup,\"ax\"\n" \ + "3:\n\t" \ + "mov #0,%S1\n\t" \ + "mov #0,%R1\n\t" \ + "mov.l 4f, %0\n\t" \ + "jmp @%0\n\t" \ + " mov %3, %0\n\t" \ + ".balign 4\n" \ + "4: .long 2b\n\t" \ + ".previous\n" \ + ".section __ex_table,\"a\"\n\t" \ + ".long 1b, 3b\n\t" \ + ".long 1b + 2, 3b\n\t" \ + ".previous" \ + :"=&r" (err), "=&r" (x) \ + :"m" (__m(addr)), "i" (-EFAULT), "0" (err)); }) +#endif + #define __put_user_size(x,ptr,size,retval) \ do { \ retval = 0; \ diff --git a/arch/sh/include/asm/watchdog.h b/arch/sh/include/asm/watchdog.h index cecd0fc507f9..b9ca4c99f046 100644 --- a/arch/sh/include/asm/watchdog.h +++ b/arch/sh/include/asm/watchdog.h @@ -8,7 +8,6 @@ */ #ifndef __ASM_SH_WATCHDOG_H #define __ASM_SH_WATCHDOG_H -#ifdef __KERNEL__ #include <linux/types.h> #include <linux/io.h> @@ -157,5 +156,4 @@ static inline void sh_wdt_write_csr(__u8 val) __raw_writew((WTCSR_HIGH << 8) | (__u16)val, WTCSR); } #endif /* CONFIG_CPU_SUBTYPE_SH7785 || CONFIG_CPU_SUBTYPE_SH7780 */ -#endif /* __KERNEL__ */ #endif /* __ASM_SH_WATCHDOG_H */ diff --git a/arch/sh/kernel/Makefile b/arch/sh/kernel/Makefile index b0f5574b6228..aa0fbc9202b1 100644 --- a/arch/sh/kernel/Makefile +++ b/arch/sh/kernel/Makefile @@ -47,5 +47,3 @@ obj-$(CONFIG_DWARF_UNWINDER) += dwarf.o obj-$(CONFIG_PERF_EVENTS) += perf_event.o perf_callchain.o obj-$(CONFIG_DMA_NONCOHERENT) += dma-coherent.o obj-$(CONFIG_HAVE_HW_BREAKPOINT) += hw_breakpoint.o - -ccflags-y := -Werror diff --git a/arch/sh/kernel/disassemble.c b/arch/sh/kernel/disassemble.c index 845543780cc5..08e1af63edd9 100644 --- a/arch/sh/kernel/disassemble.c +++ b/arch/sh/kernel/disassemble.c @@ -376,148 +376,148 @@ static void print_sh_insn(u32 memaddr, u16 insn) } ok: - printk("%-8s ", op->name); + pr_cont("%-8s ", op->name); lastsp = (op->arg[0] == A_END); disp_pc = 0; for (n = 0; n < 6 && op->arg[n] != A_END; n++) { if (n && op->arg[1] != A_END) - printk(", "); + pr_cont(", "); switch (op->arg[n]) { case A_IMM: - printk("#%d", (char)(imm)); + pr_cont("#%d", (char)(imm)); break; case A_R0: - printk("r0"); + pr_cont("r0"); break; case A_REG_N: - printk("r%d", rn); + pr_cont("r%d", rn); break; case A_INC_N: - printk("@r%d+", rn); + pr_cont("@r%d+", rn); break; case A_DEC_N: - printk("@-r%d", rn); + pr_cont("@-r%d", rn); break; case A_IND_N: - printk("@r%d", rn); + pr_cont("@r%d", rn); break; case A_DISP_REG_N: - printk("@(%d,r%d)", imm, rn); + pr_cont("@(%d,r%d)", imm, rn); break; case A_REG_M: - printk("r%d", rm); + pr_cont("r%d", rm); break; case A_INC_M: - printk("@r%d+", rm); + pr_cont("@r%d+", rm); break; case A_DEC_M: - printk("@-r%d", rm); + pr_cont("@-r%d", rm); break; case A_IND_M: - printk("@r%d", rm); + pr_cont("@r%d", rm); break; case A_DISP_REG_M: - printk("@(%d,r%d)", imm, rm); + pr_cont("@(%d,r%d)", imm, rm); break; case A_REG_B: - printk("r%d_bank", rb); + pr_cont("r%d_bank", rb); break; case A_DISP_PC: disp_pc = 1; disp_pc_addr = imm + 4 + (memaddr & relmask); - printk("%08x <%pS>", disp_pc_addr, - (void *)disp_pc_addr); + pr_cont("%08x <%pS>", disp_pc_addr, + (void *)disp_pc_addr); break; case A_IND_R0_REG_N: - printk("@(r0,r%d)", rn); + pr_cont("@(r0,r%d)", rn); break; case A_IND_R0_REG_M: - printk("@(r0,r%d)", rm); + pr_cont("@(r0,r%d)", rm); break; case A_DISP_GBR: - printk("@(%d,gbr)",imm); + pr_cont("@(%d,gbr)", imm); break; case A_R0_GBR: - printk("@(r0,gbr)"); + pr_cont("@(r0,gbr)"); break; case A_BDISP12: case A_BDISP8: - printk("%08x", imm + memaddr); + pr_cont("%08x", imm + memaddr); break; case A_SR: - printk("sr"); + pr_cont("sr"); break; case A_GBR: - printk("gbr"); + pr_cont("gbr"); break; case A_VBR: - printk("vbr"); + pr_cont("vbr"); break; case A_SSR: - printk("ssr"); + pr_cont("ssr"); break; case A_SPC: - printk("spc"); + pr_cont("spc"); break; case A_MACH: - printk("mach"); + pr_cont("mach"); break; case A_MACL: - printk("macl"); + pr_cont("macl"); break; case A_PR: - printk("pr"); + pr_cont("pr"); break; case A_SGR: - printk("sgr"); + pr_cont("sgr"); break; case A_DBR: - printk("dbr"); + pr_cont("dbr"); break; case FD_REG_N: case F_REG_N: - printk("fr%d", rn); + pr_cont("fr%d", rn); break; case F_REG_M: - printk("fr%d", rm); + pr_cont("fr%d", rm); break; case DX_REG_N: if (rn & 1) { - printk("xd%d", rn & ~1); + pr_cont("xd%d", rn & ~1); break; } /* else, fall through */ case D_REG_N: - printk("dr%d", rn); + pr_cont("dr%d", rn); break; case DX_REG_M: if (rm & 1) { - printk("xd%d", rm & ~1); + pr_cont("xd%d", rm & ~1); break; } /* else, fall through */ case D_REG_M: - printk("dr%d", rm); + pr_cont("dr%d", rm); break; case FPSCR_M: case FPSCR_N: - printk("fpscr"); + pr_cont("fpscr"); break; case FPUL_M: case FPUL_N: - printk("fpul"); + pr_cont("fpul"); break; case F_FR0: - printk("fr0"); + pr_cont("fr0"); break; case V_REG_N: - printk("fv%d", rn*4); + pr_cont("fv%d", rn*4); break; case V_REG_M: - printk("fv%d", rm*4); + pr_cont("fv%d", rm*4); break; case XMTRX_M4: - printk("xmtrx"); + pr_cont("xmtrx"); break; default: return; @@ -532,7 +532,7 @@ static void print_sh_insn(u32 memaddr, u16 insn) else __get_user(val, (u32 *)disp_pc_addr); - printk(" ! %08x <%pS>", val, (void *)val); + pr_cont(" ! %08x <%pS>", val, (void *)val); } return; @@ -541,7 +541,7 @@ static void print_sh_insn(u32 memaddr, u16 insn) } - printk(".word 0x%x%x%x%x", nibs[0], nibs[1], nibs[2], nibs[3]); + pr_info(".word 0x%x%x%x%x", nibs[0], nibs[1], nibs[2], nibs[3]); } void show_code(struct pt_regs *regs) @@ -552,20 +552,21 @@ void show_code(struct pt_regs *regs) if (regs->pc & 0x1) return; - printk("Code:\n"); + pr_info("Code:\n"); for (i = -3 ; i < 6 ; i++) { unsigned short insn; if (__get_user(insn, pc + i)) { - printk(" (Bad address in pc)\n"); + pr_err(" (Bad address in pc)\n"); break; } - printk("%s%08lx: ", (i ? " ": "->"), (unsigned long)(pc + i)); + pr_info("%s%08lx: ", (i ? " " : "->"), + (unsigned long)(pc + i)); print_sh_insn((unsigned long)(pc + i), insn); - printk("\n"); + pr_cont("\n"); } - printk("\n"); + pr_info("\n"); } diff --git a/arch/sh/kernel/dma-coherent.c b/arch/sh/kernel/dma-coherent.c index d4811691b93c..cd46a9825e3c 100644 --- a/arch/sh/kernel/dma-coherent.c +++ b/arch/sh/kernel/dma-coherent.c @@ -3,60 +3,13 @@ * Copyright (C) 2004 - 2007 Paul Mundt */ #include <linux/mm.h> -#include <linux/init.h> #include <linux/dma-noncoherent.h> -#include <linux/module.h> #include <asm/cacheflush.h> #include <asm/addrspace.h> -void *arch_dma_alloc(struct device *dev, size_t size, dma_addr_t *dma_handle, - gfp_t gfp, unsigned long attrs) +void arch_dma_prep_coherent(struct page *page, size_t size) { - void *ret, *ret_nocache; - int order = get_order(size); - - gfp |= __GFP_ZERO; - - ret = (void *)__get_free_pages(gfp, order); - if (!ret) - return NULL; - - /* - * Pages from the page allocator may have data present in - * cache. So flush the cache before using uncached memory. - */ - arch_sync_dma_for_device(virt_to_phys(ret), size, - DMA_BIDIRECTIONAL); - - ret_nocache = (void __force *)ioremap(virt_to_phys(ret), size); - if (!ret_nocache) { - free_pages((unsigned long)ret, order); - return NULL; - } - - split_page(pfn_to_page(virt_to_phys(ret) >> PAGE_SHIFT), order); - - *dma_handle = virt_to_phys(ret); - if (!WARN_ON(!dev)) - *dma_handle -= PFN_PHYS(dev->dma_pfn_offset); - - return ret_nocache; -} - -void arch_dma_free(struct device *dev, size_t size, void *vaddr, - dma_addr_t dma_handle, unsigned long attrs) -{ - int order = get_order(size); - unsigned long pfn = (dma_handle >> PAGE_SHIFT); - int k; - - if (!WARN_ON(!dev)) - pfn += dev->dma_pfn_offset; - - for (k = 0; k < (1 << order); k++) - __free_pages(pfn_to_page(pfn + k), 0); - - iounmap(vaddr); + __flush_purge_region(page_address(page), size); } void arch_sync_dma_for_device(phys_addr_t paddr, size_t size, diff --git a/arch/sh/kernel/dumpstack.c b/arch/sh/kernel/dumpstack.c index a13c045804ed..758a6c89e911 100644 --- a/arch/sh/kernel/dumpstack.c +++ b/arch/sh/kernel/dumpstack.c @@ -16,8 +16,8 @@ #include <asm/unwinder.h> #include <asm/stacktrace.h> -void dump_mem(const char *str, const char *loglvl, - unsigned long bottom, unsigned long top) +void dump_mem(const char *str, const char *loglvl, unsigned long bottom, + unsigned long top) { unsigned long p; int i; @@ -31,23 +31,23 @@ void dump_mem(const char *str, const char *loglvl, unsigned int val; if (p < bottom || p >= top) - printk("%s ", loglvl); + pr_cont(" "); else { if (__get_user(val, (unsigned int __user *)p)) { - printk("%s\n", loglvl); + pr_cont("\n"); return; } - printk("%s%08x ", loglvl, val); + pr_cont("%08x ", val); } } - printk("%s\n", loglvl); + pr_cont("\n"); } } -void printk_address(unsigned long address, int reliable, const char *loglvl) +void printk_address(unsigned long address, int reliable) { - printk("%s [<%p>] %s%pS\n", loglvl, (void *) address, - reliable ? "" : "? ", (void *) address); + pr_cont(" [<%px>] %s%pS\n", (void *) address, + reliable ? "" : "? ", (void *) address); } #ifdef CONFIG_FUNCTION_GRAPH_TRACER @@ -107,22 +107,16 @@ stack_reader_dump(struct task_struct *task, struct pt_regs *regs, } } -static int print_trace_stack(void *data, char *name) -{ - printk("%s <%s> ", (char *)data, name); - return 0; -} - /* * Print one address/symbol entries per line. */ static void print_trace_address(void *data, unsigned long addr, int reliable) { - printk_address(addr, reliable, (char *)data); + printk("%s", (char *)data); + printk_address(addr, reliable); } static const struct stacktrace_ops print_trace_ops = { - .stack = print_trace_stack, .address = print_trace_address, }; @@ -136,7 +130,7 @@ void show_trace(struct task_struct *tsk, unsigned long *sp, unwind_stack(tsk, regs, sp, &print_trace_ops, (void *)loglvl); - printk("%s\n", loglvl); + pr_cont("\n"); if (!tsk) tsk = current; diff --git a/arch/sh/kernel/entry-common.S b/arch/sh/kernel/entry-common.S index 9bac5bbb67f3..ad963104d22d 100644 --- a/arch/sh/kernel/entry-common.S +++ b/arch/sh/kernel/entry-common.S @@ -178,34 +178,6 @@ syscall_exit_work: bra resume_userspace nop - .align 2 -syscall_trace_entry: - ! Yes it is traced. - mov r15, r4 - mov.l 7f, r11 ! Call do_syscall_trace_enter which notifies - jsr @r11 ! superior (will chomp R[0-7]) - nop - mov.l r0, @(OFF_R0,r15) ! Save return value - ! Reload R0-R4 from kernel stack, where the - ! parent may have modified them using - ! ptrace(POKEUSR). (Note that R0-R2 are - ! reloaded from the kernel stack by syscall_call - ! below, so don't need to be reloaded here.) - ! This allows the parent to rewrite system calls - ! and args on the fly. - mov.l @(OFF_R4,r15), r4 ! arg0 - mov.l @(OFF_R5,r15), r5 - mov.l @(OFF_R6,r15), r6 - mov.l @(OFF_R7,r15), r7 ! arg3 - mov.l @(OFF_R3,r15), r3 ! syscall_nr - ! - mov.l 6f, r10 ! Number of syscalls - cmp/hs r10, r3 - bf syscall_call - mov #-ENOSYS, r0 - bra syscall_exit - mov.l r0, @(OFF_R0,r15) ! Return value - __restore_all: mov #OFF_SR, r0 mov.l @(r0,r15), r0 ! get status register @@ -388,6 +360,37 @@ syscall_exit: bf syscall_exit_work bra __restore_all nop + + .align 2 +syscall_trace_entry: + ! Yes it is traced. + mov r15, r4 + mov.l 7f, r11 ! Call do_syscall_trace_enter which notifies + jsr @r11 ! superior (will chomp R[0-7]) + nop + cmp/eq #-1, r0 + bt syscall_exit + mov.l r0, @(OFF_R0,r15) ! Save return value + ! Reload R0-R4 from kernel stack, where the + ! parent may have modified them using + ! ptrace(POKEUSR). (Note that R0-R2 are + ! reloaded from the kernel stack by syscall_call + ! below, so don't need to be reloaded here.) + ! This allows the parent to rewrite system calls + ! and args on the fly. + mov.l @(OFF_R4,r15), r4 ! arg0 + mov.l @(OFF_R5,r15), r5 + mov.l @(OFF_R6,r15), r6 + mov.l @(OFF_R7,r15), r7 ! arg3 + mov.l @(OFF_R3,r15), r3 ! syscall_nr + ! + mov.l 6f, r10 ! Number of syscalls + cmp/hs r10, r3 + bf syscall_call + mov #-ENOSYS, r0 + bra syscall_exit + mov.l r0, @(OFF_R0,r15) ! Return value + .align 2 #if !defined(CONFIG_CPU_SH2) 1: .long TRA diff --git a/arch/sh/kernel/io_trapped.c b/arch/sh/kernel/io_trapped.c index 037aab2708b7..004ad0130b10 100644 --- a/arch/sh/kernel/io_trapped.c +++ b/arch/sh/kernel/io_trapped.c @@ -102,7 +102,6 @@ int register_trapped_io(struct trapped_io *tiop) pr_warn("unable to install trapped io filter\n"); return -1; } -EXPORT_SYMBOL_GPL(register_trapped_io); void __iomem *match_trapped_io_handler(struct list_head *list, unsigned long offset, @@ -131,7 +130,6 @@ void __iomem *match_trapped_io_handler(struct list_head *list, spin_unlock_irqrestore(&trapped_lock, flags); return NULL; } -EXPORT_SYMBOL_GPL(match_trapped_io_handler); static struct trapped_io *lookup_tiop(unsigned long address) { diff --git a/arch/sh/kernel/iomap.c b/arch/sh/kernel/iomap.c index ef9e2c97cbb7..0a0dff4e66de 100644 --- a/arch/sh/kernel/iomap.c +++ b/arch/sh/kernel/iomap.c @@ -8,31 +8,31 @@ #include <linux/module.h> #include <linux/io.h> -unsigned int ioread8(void __iomem *addr) +unsigned int ioread8(const void __iomem *addr) { return readb(addr); } EXPORT_SYMBOL(ioread8); -unsigned int ioread16(void __iomem *addr) +unsigned int ioread16(const void __iomem *addr) { return readw(addr); } EXPORT_SYMBOL(ioread16); -unsigned int ioread16be(void __iomem *addr) +unsigned int ioread16be(const void __iomem *addr) { return be16_to_cpu(__raw_readw(addr)); } EXPORT_SYMBOL(ioread16be); -unsigned int ioread32(void __iomem *addr) +unsigned int ioread32(const void __iomem *addr) { return readl(addr); } EXPORT_SYMBOL(ioread32); -unsigned int ioread32be(void __iomem *addr) +unsigned int ioread32be(const void __iomem *addr) { return be32_to_cpu(__raw_readl(addr)); } @@ -74,7 +74,7 @@ EXPORT_SYMBOL(iowrite32be); * convert to CPU byte order. We write in "IO byte * order" (we also don't have IO barriers). */ -static inline void mmio_insb(void __iomem *addr, u8 *dst, int count) +static inline void mmio_insb(const void __iomem *addr, u8 *dst, int count) { while (--count >= 0) { u8 data = __raw_readb(addr); @@ -83,7 +83,7 @@ static inline void mmio_insb(void __iomem *addr, u8 *dst, int count) } } -static inline void mmio_insw(void __iomem *addr, u16 *dst, int count) +static inline void mmio_insw(const void __iomem *addr, u16 *dst, int count) { while (--count >= 0) { u16 data = __raw_readw(addr); @@ -92,7 +92,7 @@ static inline void mmio_insw(void __iomem *addr, u16 *dst, int count) } } -static inline void mmio_insl(void __iomem *addr, u32 *dst, int count) +static inline void mmio_insl(const void __iomem *addr, u32 *dst, int count) { while (--count >= 0) { u32 data = __raw_readl(addr); @@ -125,19 +125,19 @@ static inline void mmio_outsl(void __iomem *addr, const u32 *src, int count) } } -void ioread8_rep(void __iomem *addr, void *dst, unsigned long count) +void ioread8_rep(const void __iomem *addr, void *dst, unsigned long count) { mmio_insb(addr, dst, count); } EXPORT_SYMBOL(ioread8_rep); -void ioread16_rep(void __iomem *addr, void *dst, unsigned long count) +void ioread16_rep(const void __iomem *addr, void *dst, unsigned long count) { mmio_insw(addr, dst, count); } EXPORT_SYMBOL(ioread16_rep); -void ioread32_rep(void __iomem *addr, void *dst, unsigned long count) +void ioread32_rep(const void __iomem *addr, void *dst, unsigned long count) { mmio_insl(addr, dst, count); } diff --git a/arch/sh/kernel/ioport.c b/arch/sh/kernel/ioport.c index 34f8cdbbcf0b..f39446a658bd 100644 --- a/arch/sh/kernel/ioport.c +++ b/arch/sh/kernel/ioport.c @@ -7,6 +7,7 @@ */ #include <linux/module.h> #include <linux/io.h> +#include <asm/io_trapped.h> unsigned long sh_io_port_base __read_mostly = -1; EXPORT_SYMBOL(sh_io_port_base); diff --git a/arch/sh/kernel/machvec.c b/arch/sh/kernel/machvec.c index 76bd8955d4fe..d606679a211e 100644 --- a/arch/sh/kernel/machvec.c +++ b/arch/sh/kernel/machvec.c @@ -65,10 +65,10 @@ static int __init early_parse_mv(char *from) mvp = get_mv_byname(mv_name); if (unlikely(!mvp)) { - printk("Available vectors:\n\n\t'%s', ", sh_mv.mv_name); + pr_info("Available vectors:\n\n\t'%s', ", sh_mv.mv_name); for_each_mv(mvp) - printk("'%s', ", mvp->mv_name); - printk("\n\n"); + pr_cont("'%s', ", mvp->mv_name); + pr_cont("\n\n"); panic("Failed to select machvec '%s' -- halting.\n", mv_name); } else @@ -105,7 +105,7 @@ void __init sh_mv_setup(void) sh_mv = *(struct sh_machine_vector *)&__machvec_start; } - printk(KERN_NOTICE "Booting machvec: %s\n", get_system_type()); + pr_notice("Booting machvec: %s\n", get_system_type()); /* * Manually walk the vec, fill in anything that the board hasn't yet diff --git a/arch/sh/kernel/perf_callchain.c b/arch/sh/kernel/perf_callchain.c index 6281f2fdf9ca..c9d3aa18732d 100644 --- a/arch/sh/kernel/perf_callchain.c +++ b/arch/sh/kernel/perf_callchain.c @@ -11,11 +11,6 @@ #include <asm/unwinder.h> #include <asm/ptrace.h> -static int callchain_stack(void *data, char *name) -{ - return 0; -} - static void callchain_address(void *data, unsigned long addr, int reliable) { struct perf_callchain_entry_ctx *entry = data; @@ -25,7 +20,6 @@ static void callchain_address(void *data, unsigned long addr, int reliable) } static const struct stacktrace_ops callchain_ops = { - .stack = callchain_stack, .address = callchain_address, }; diff --git a/arch/sh/kernel/process_32.c b/arch/sh/kernel/process_32.c index a432c364b13a..80a5d1c66a51 100644 --- a/arch/sh/kernel/process_32.c +++ b/arch/sh/kernel/process_32.c @@ -30,34 +30,30 @@ void show_regs(struct pt_regs * regs) { - printk("\n"); + pr_info("\n"); show_regs_print_info(KERN_DEFAULT); - printk("PC is at %pS\n", (void *)instruction_pointer(regs)); - printk("PR is at %pS\n", (void *)regs->pr); + pr_info("PC is at %pS\n", (void *)instruction_pointer(regs)); + pr_info("PR is at %pS\n", (void *)regs->pr); - printk("PC : %08lx SP : %08lx SR : %08lx ", - regs->pc, regs->regs[15], regs->sr); + pr_info("PC : %08lx SP : %08lx SR : %08lx ", regs->pc, + regs->regs[15], regs->sr); #ifdef CONFIG_MMU - printk("TEA : %08x\n", __raw_readl(MMU_TEA)); + pr_cont("TEA : %08x\n", __raw_readl(MMU_TEA)); #else - printk("\n"); + pr_cont("\n"); #endif - printk("R0 : %08lx R1 : %08lx R2 : %08lx R3 : %08lx\n", - regs->regs[0],regs->regs[1], - regs->regs[2],regs->regs[3]); - printk("R4 : %08lx R5 : %08lx R6 : %08lx R7 : %08lx\n", - regs->regs[4],regs->regs[5], - regs->regs[6],regs->regs[7]); - printk("R8 : %08lx R9 : %08lx R10 : %08lx R11 : %08lx\n", - regs->regs[8],regs->regs[9], - regs->regs[10],regs->regs[11]); - printk("R12 : %08lx R13 : %08lx R14 : %08lx\n", - regs->regs[12],regs->regs[13], - regs->regs[14]); - printk("MACH: %08lx MACL: %08lx GBR : %08lx PR : %08lx\n", - regs->mach, regs->macl, regs->gbr, regs->pr); + pr_info("R0 : %08lx R1 : %08lx R2 : %08lx R3 : %08lx\n", + regs->regs[0], regs->regs[1], regs->regs[2], regs->regs[3]); + pr_info("R4 : %08lx R5 : %08lx R6 : %08lx R7 : %08lx\n", + regs->regs[4], regs->regs[5], regs->regs[6], regs->regs[7]); + pr_info("R8 : %08lx R9 : %08lx R10 : %08lx R11 : %08lx\n", + regs->regs[8], regs->regs[9], regs->regs[10], regs->regs[11]); + pr_info("R12 : %08lx R13 : %08lx R14 : %08lx\n", + regs->regs[12], regs->regs[13], regs->regs[14]); + pr_info("MACH: %08lx MACL: %08lx GBR : %08lx PR : %08lx\n", + regs->mach, regs->macl, regs->gbr, regs->pr); show_trace(NULL, (unsigned long *)regs->regs[15], regs, KERN_DEFAULT); show_code(regs); diff --git a/arch/sh/kernel/ptrace_32.c b/arch/sh/kernel/ptrace_32.c index 609b7c917e6e..b05bf92f9c32 100644 --- a/arch/sh/kernel/ptrace_32.c +++ b/arch/sh/kernel/ptrace_32.c @@ -457,8 +457,6 @@ asmlinkage long do_syscall_trace_enter(struct pt_regs *regs) { long ret = 0; - secure_computing_strict(regs->regs[0]); - if (test_thread_flag(TIF_SYSCALL_TRACE) && tracehook_report_syscall_entry(regs)) /* @@ -468,6 +466,9 @@ asmlinkage long do_syscall_trace_enter(struct pt_regs *regs) */ ret = -1L; + if (secure_computing() == -1) + return -1; + if (unlikely(test_thread_flag(TIF_SYSCALL_TRACEPOINT))) trace_sys_enter(regs, regs->regs[0]); diff --git a/arch/sh/kernel/stacktrace.c b/arch/sh/kernel/stacktrace.c index 2950b19ad077..daf0b53ee066 100644 --- a/arch/sh/kernel/stacktrace.c +++ b/arch/sh/kernel/stacktrace.c @@ -15,11 +15,6 @@ #include <asm/ptrace.h> #include <asm/stacktrace.h> -static int save_stack_stack(void *data, char *name) -{ - return 0; -} - /* * Save stack-backtrace addresses into a stack_trace buffer. */ @@ -40,7 +35,6 @@ static void save_stack_address(void *data, unsigned long addr, int reliable) } static const struct stacktrace_ops save_stack_ops = { - .stack = save_stack_stack, .address = save_stack_address, }; @@ -73,7 +67,6 @@ save_stack_address_nosched(void *data, unsigned long addr, int reliable) } static const struct stacktrace_ops save_stack_ops_nosched = { - .stack = save_stack_stack, .address = save_stack_address_nosched, }; diff --git a/arch/sh/kernel/syscalls/syscall.tbl b/arch/sh/kernel/syscalls/syscall.tbl index 96848db9659e..ae0a00beea5f 100644 --- a/arch/sh/kernel/syscalls/syscall.tbl +++ b/arch/sh/kernel/syscalls/syscall.tbl @@ -156,7 +156,7 @@ 146 common writev sys_writev 147 common getsid sys_getsid 148 common fdatasync sys_fdatasync -149 common _sysctl sys_sysctl +149 common _sysctl sys_ni_syscall 150 common mlock sys_mlock 151 common munlock sys_munlock 152 common mlockall sys_mlockall diff --git a/arch/sh/kernel/traps_32.c b/arch/sh/kernel/traps_32.c index 058c6181bb30..b62ad0ba2395 100644 --- a/arch/sh/kernel/traps_32.c +++ b/arch/sh/kernel/traps_32.c @@ -482,8 +482,6 @@ asmlinkage void do_address_error(struct pt_regs *regs, error_code = lookup_exception_vector(); #endif - oldfs = get_fs(); - if (user_mode(regs)) { int si_code = BUS_ADRERR; unsigned int user_action; @@ -491,13 +489,13 @@ asmlinkage void do_address_error(struct pt_regs *regs, local_irq_enable(); inc_unaligned_user_access(); - set_fs(USER_DS); + oldfs = force_uaccess_begin(); if (copy_from_user(&instruction, (insn_size_t *)(regs->pc & ~1), sizeof(instruction))) { - set_fs(oldfs); + force_uaccess_end(oldfs); goto uspace_segv; } - set_fs(oldfs); + force_uaccess_end(oldfs); /* shout about userspace fixups */ unaligned_fixups_notify(current, instruction, regs); @@ -520,11 +518,11 @@ fixup: goto uspace_segv; } - set_fs(USER_DS); + oldfs = force_uaccess_begin(); tmp = handle_unaligned_access(instruction, regs, &user_mem_access, 0, address); - set_fs(oldfs); + force_uaccess_end(oldfs); if (tmp == 0) return; /* sorted */ diff --git a/arch/sh/lib/Makefile b/arch/sh/lib/Makefile index d0abbe5e38b0..eb473d373ca4 100644 --- a/arch/sh/lib/Makefile +++ b/arch/sh/lib/Makefile @@ -30,5 +30,3 @@ memset-$(CONFIG_CPU_SH4) := memset-sh4.o lib-$(CONFIG_MMU) += copy_page.o __clear_user.o lib-$(CONFIG_MCOUNT) += mcount.o lib-y += $(memcpy-y) $(memset-y) $(udivsi3-y) - -ccflags-y := -Werror diff --git a/arch/sh/lib/delay.c b/arch/sh/lib/delay.c index 540e670dbafc..dad8e6a54906 100644 --- a/arch/sh/lib/delay.c +++ b/arch/sh/lib/delay.c @@ -29,7 +29,6 @@ void __delay(unsigned long loops) : "0" (loops) : "t"); } -EXPORT_SYMBOL(__delay); inline void __const_udelay(unsigned long xloops) { diff --git a/arch/sh/mm/Makefile b/arch/sh/mm/Makefile index 487da0ff03b3..f69ddc70b146 100644 --- a/arch/sh/mm/Makefile +++ b/arch/sh/mm/Makefile @@ -43,5 +43,3 @@ obj-$(CONFIG_UNCACHED_MAPPING) += uncached.o obj-$(CONFIG_HAVE_SRAM_POOL) += sram.o GCOV_PROFILE_pmb.o := n - -ccflags-y := -Werror diff --git a/arch/sh/mm/consistent.c b/arch/sh/mm/consistent.c index 3169a343a5ab..0de206c1acfe 100644 --- a/arch/sh/mm/consistent.c +++ b/arch/sh/mm/consistent.c @@ -57,8 +57,6 @@ int __init platform_resource_setup_memory(struct platform_device *pdev, return -ENOMEM; } - memset(buf, 0, memsize); - r->flags = IORESOURCE_MEM; r->start = dma_handle; r->end = r->start + memsize - 1; diff --git a/arch/sh/mm/fault.c b/arch/sh/mm/fault.c index fbe1f2fe9a8c..88a1f453d73e 100644 --- a/arch/sh/mm/fault.c +++ b/arch/sh/mm/fault.c @@ -208,13 +208,12 @@ show_fault_oops(struct pt_regs *regs, unsigned long address) if (!oops_may_print()) return; - printk(KERN_ALERT "PC:"); pr_alert("BUG: unable to handle kernel %s at %08lx\n", address < PAGE_SIZE ? "NULL pointer dereference" : "paging request", address); pr_alert("PC:"); - printk_address(regs->pc, 1, KERN_ALERT); + printk_address(regs->pc, 1); show_pte(NULL, address); } @@ -482,22 +481,13 @@ good_area: * make sure we exit gracefully rather than endlessly redo * the fault. */ - fault = handle_mm_fault(vma, address, flags); + fault = handle_mm_fault(vma, address, flags, regs); if (unlikely(fault & (VM_FAULT_RETRY | VM_FAULT_ERROR))) if (mm_fault_error(regs, error_code, address, fault)) return; if (flags & FAULT_FLAG_ALLOW_RETRY) { - if (fault & VM_FAULT_MAJOR) { - tsk->maj_flt++; - perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS_MAJ, 1, - regs, address); - } else { - tsk->min_flt++; - perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS_MIN, 1, - regs, address); - } if (fault & VM_FAULT_RETRY) { flags |= FAULT_FLAG_TRIED; diff --git a/arch/sh/mm/init.c b/arch/sh/mm/init.c index 613de8096335..4735176ab811 100644 --- a/arch/sh/mm/init.c +++ b/arch/sh/mm/init.c @@ -29,6 +29,7 @@ #include <asm/cache.h> #include <asm/pgalloc.h> #include <linux/sizes.h> +#include "ioremap.h" pgd_t swapper_pg_dir[PTRS_PER_PGD]; @@ -425,15 +426,6 @@ int arch_add_memory(int nid, u64 start, u64 size, return ret; } -#ifdef CONFIG_NUMA -int memory_add_physaddr_to_nid(u64 addr) -{ - /* Node 0 for now.. */ - return 0; -} -EXPORT_SYMBOL_GPL(memory_add_physaddr_to_nid); -#endif - void arch_remove_memory(int nid, u64 start, u64 size, struct vmem_altmap *altmap) { diff --git a/arch/sh/mm/ioremap.c b/arch/sh/mm/ioremap.c index f6d02246d665..21342581144d 100644 --- a/arch/sh/mm/ioremap.c +++ b/arch/sh/mm/ioremap.c @@ -18,12 +18,59 @@ #include <linux/mm.h> #include <linux/pci.h> #include <linux/io.h> +#include <asm/io_trapped.h> #include <asm/page.h> #include <asm/pgalloc.h> #include <asm/addrspace.h> #include <asm/cacheflush.h> #include <asm/tlbflush.h> #include <asm/mmu.h> +#include "ioremap.h" + +/* + * On 32-bit SH, we traditionally have the whole physical address space mapped + * at all times (as MIPS does), so "ioremap()" and "iounmap()" do not need to do + * anything but place the address in the proper segment. This is true for P1 + * and P2 addresses, as well as some P3 ones. However, most of the P3 addresses + * and newer cores using extended addressing need to map through page tables, so + * the ioremap() implementation becomes a bit more complicated. + */ +#ifdef CONFIG_29BIT +static void __iomem * +__ioremap_29bit(phys_addr_t offset, unsigned long size, pgprot_t prot) +{ + phys_addr_t last_addr = offset + size - 1; + + /* + * For P1 and P2 space this is trivial, as everything is already + * mapped. Uncached access for P1 addresses are done through P2. + * In the P3 case or for addresses outside of the 29-bit space, + * mapping must be done by the PMB or by using page tables. + */ + if (likely(PXSEG(offset) < P3SEG && PXSEG(last_addr) < P3SEG)) { + u64 flags = pgprot_val(prot); + + /* + * Anything using the legacy PTEA space attributes needs + * to be kicked down to page table mappings. + */ + if (unlikely(flags & _PAGE_PCC_MASK)) + return NULL; + if (unlikely(flags & _PAGE_CACHABLE)) + return (void __iomem *)P1SEGADDR(offset); + + return (void __iomem *)P2SEGADDR(offset); + } + + /* P4 above the store queues are always mapped. */ + if (unlikely(offset >= P3_ADDR_MAX)) + return (void __iomem *)P4SEGADDR(offset); + + return NULL; +} +#else +#define __ioremap_29bit(offset, size, prot) NULL +#endif /* CONFIG_29BIT */ /* * Remap an arbitrary physical address space into the kernel virtual @@ -42,6 +89,14 @@ __ioremap_caller(phys_addr_t phys_addr, unsigned long size, unsigned long offset, last_addr, addr, orig_addr; void __iomem *mapped; + mapped = __ioremap_trapped(phys_addr, size); + if (mapped) + return mapped; + + mapped = __ioremap_29bit(phys_addr, size, pgprot); + if (mapped) + return mapped; + /* Don't allow wraparound or zero size */ last_addr = phys_addr + size - 1; if (!size || last_addr < phys_addr) diff --git a/arch/sh/mm/ioremap.h b/arch/sh/mm/ioremap.h new file mode 100644 index 000000000000..f2544e721a35 --- /dev/null +++ b/arch/sh/mm/ioremap.h @@ -0,0 +1,23 @@ +#ifndef _SH_MM_IORMEMAP_H +#define _SH_MM_IORMEMAP_H 1 + +#ifdef CONFIG_IOREMAP_FIXED +void __iomem *ioremap_fixed(phys_addr_t, unsigned long, pgprot_t); +int iounmap_fixed(void __iomem *); +void ioremap_fixed_init(void); +#else +static inline void __iomem * +ioremap_fixed(phys_addr_t phys_addr, unsigned long size, pgprot_t prot) +{ + BUG(); + return NULL; +} +static inline void ioremap_fixed_init(void) +{ +} +static inline int iounmap_fixed(void __iomem *addr) +{ + return -EINVAL; +} +#endif /* CONFIG_IOREMAP_FIXED */ +#endif /* _SH_MM_IORMEMAP_H */ diff --git a/arch/sh/mm/ioremap_fixed.c b/arch/sh/mm/ioremap_fixed.c index aab3f82856bb..136113bcac25 100644 --- a/arch/sh/mm/ioremap_fixed.c +++ b/arch/sh/mm/ioremap_fixed.c @@ -23,6 +23,7 @@ #include <asm/tlbflush.h> #include <asm/mmu.h> #include <asm/mmu_context.h> +#include "ioremap.h" struct ioremap_map { void __iomem *addr; diff --git a/arch/sh/mm/pgtable.c b/arch/sh/mm/pgtable.c index 5c8f9247c3c2..cf7ce4b57359 100644 --- a/arch/sh/mm/pgtable.c +++ b/arch/sh/mm/pgtable.c @@ -2,8 +2,6 @@ #include <linux/mm.h> #include <linux/slab.h> -#define PGALLOC_GFP GFP_KERNEL | __GFP_ZERO - static struct kmem_cache *pgd_cachep; #if PAGETABLE_LEVELS > 2 static struct kmem_cache *pmd_cachep; @@ -13,6 +11,7 @@ void pgd_ctor(void *x) { pgd_t *pgd = x; + memset(pgd, 0, USER_PTRS_PER_PGD * sizeof(pgd_t)); memcpy(pgd + USER_PTRS_PER_PGD, swapper_pg_dir + USER_PTRS_PER_PGD, (PTRS_PER_PGD - USER_PTRS_PER_PGD) * sizeof(pgd_t)); @@ -32,7 +31,7 @@ void pgtable_cache_init(void) pgd_t *pgd_alloc(struct mm_struct *mm) { - return kmem_cache_alloc(pgd_cachep, PGALLOC_GFP); + return kmem_cache_alloc(pgd_cachep, GFP_KERNEL); } void pgd_free(struct mm_struct *mm, pgd_t *pgd) @@ -48,7 +47,7 @@ void pud_populate(struct mm_struct *mm, pud_t *pud, pmd_t *pmd) pmd_t *pmd_alloc_one(struct mm_struct *mm, unsigned long address) { - return kmem_cache_alloc(pmd_cachep, PGALLOC_GFP); + return kmem_cache_alloc(pmd_cachep, GFP_KERNEL | __GFP_ZERO); } void pmd_free(struct mm_struct *mm, pmd_t *pmd) diff --git a/arch/sh/oprofile/backtrace.c b/arch/sh/oprofile/backtrace.c index f1205f92631d..cc16cf86cd92 100644 --- a/arch/sh/oprofile/backtrace.c +++ b/arch/sh/oprofile/backtrace.c @@ -19,12 +19,6 @@ #include <asm/sections.h> #include <asm/stacktrace.h> -static int backtrace_stack(void *data, char *name) -{ - /* Yes, we want all stacks */ - return 0; -} - static void backtrace_address(void *data, unsigned long addr, int reliable) { unsigned int *depth = data; @@ -34,7 +28,6 @@ static void backtrace_address(void *data, unsigned long addr, int reliable) } static struct stacktrace_ops backtrace_ops = { - .stack = backtrace_stack, .address = backtrace_address, }; diff --git a/arch/sh/tools/mach-types b/arch/sh/tools/mach-types index 569977e52c91..29e648855d50 100644 --- a/arch/sh/tools/mach-types +++ b/arch/sh/tools/mach-types @@ -46,7 +46,6 @@ X3PROTO SH_X3PROTO MAGICPANELR2 SH_MAGIC_PANEL_R2 R2D_PLUS RTS7751R2D_PLUS R2D_1 RTS7751R2D_1 -CAYMAN SH_CAYMAN SDK7780 SH_SDK7780 MIGOR SH_MIGOR RSK7201 SH_RSK7201 diff --git a/arch/sparc/include/asm/sparsemem.h b/arch/sparc/include/asm/sparsemem.h index 1dd1b61432db..aa9a676bc341 100644 --- a/arch/sparc/include/asm/sparsemem.h +++ b/arch/sparc/include/asm/sparsemem.h @@ -7,7 +7,6 @@ #include <asm/page.h> #define SECTION_SIZE_BITS 30 -#define MAX_PHYSADDR_BITS MAX_PHYS_ADDRESS_BITS #define MAX_PHYSMEM_BITS MAX_PHYS_ADDRESS_BITS #endif /* !(__KERNEL__) */ diff --git a/arch/sparc/include/asm/uaccess_32.h b/arch/sparc/include/asm/uaccess_32.h index d6d8413eca83..0a2d3ebc4bb8 100644 --- a/arch/sparc/include/asm/uaccess_32.h +++ b/arch/sparc/include/asm/uaccess_32.h @@ -28,7 +28,7 @@ #define get_fs() (current->thread.current_ds) #define set_fs(val) ((current->thread.current_ds) = (val)) -#define segment_eq(a, b) ((a).seg == (b).seg) +#define uaccess_kernel() (get_fs().seg == KERNEL_DS.seg) /* We have there a nice not-mapped page at PAGE_OFFSET - PAGE_SIZE, so that this test * can be fairly lightweight. diff --git a/arch/sparc/include/asm/uaccess_64.h b/arch/sparc/include/asm/uaccess_64.h index bf9d330073b2..698cf69f74e9 100644 --- a/arch/sparc/include/asm/uaccess_64.h +++ b/arch/sparc/include/asm/uaccess_64.h @@ -32,7 +32,7 @@ #define get_fs() ((mm_segment_t){(current_thread_info()->current_ds)}) -#define segment_eq(a, b) ((a).seg == (b).seg) +#define uaccess_kernel() (get_fs().seg == KERNEL_DS.seg) #define set_fs(val) \ do { \ diff --git a/arch/sparc/kernel/syscalls/syscall.tbl b/arch/sparc/kernel/syscalls/syscall.tbl index 46024e80ee86..4af114e84f20 100644 --- a/arch/sparc/kernel/syscalls/syscall.tbl +++ b/arch/sparc/kernel/syscalls/syscall.tbl @@ -300,7 +300,7 @@ 249 64 nanosleep sys_nanosleep 250 32 mremap sys_mremap 250 64 mremap sys_64_mremap -251 common _sysctl sys_sysctl compat_sys_sysctl +251 common _sysctl sys_ni_syscall 252 common getsid sys_getsid 253 common fdatasync sys_fdatasync 254 32 nfsservctl sys_ni_syscall sys_nis_syscall diff --git a/arch/sparc/mm/fault_32.c b/arch/sparc/mm/fault_32.c index cfef656eda0f..8071bfd72349 100644 --- a/arch/sparc/mm/fault_32.c +++ b/arch/sparc/mm/fault_32.c @@ -234,7 +234,7 @@ good_area: * make sure we exit gracefully rather than endlessly redo * the fault. */ - fault = handle_mm_fault(vma, address, flags); + fault = handle_mm_fault(vma, address, flags, regs); if (fault_signal_pending(fault, regs)) return; @@ -250,15 +250,6 @@ good_area: } if (flags & FAULT_FLAG_ALLOW_RETRY) { - if (fault & VM_FAULT_MAJOR) { - current->maj_flt++; - perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS_MAJ, - 1, regs, address); - } else { - current->min_flt++; - perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS_MIN, - 1, regs, address); - } if (fault & VM_FAULT_RETRY) { flags |= FAULT_FLAG_TRIED; @@ -410,7 +401,7 @@ good_area: if (!(vma->vm_flags & (VM_READ | VM_EXEC))) goto bad_area; } - switch (handle_mm_fault(vma, address, flags)) { + switch (handle_mm_fault(vma, address, flags, NULL)) { case VM_FAULT_SIGBUS: case VM_FAULT_OOM: goto do_sigbus; diff --git a/arch/sparc/mm/fault_64.c b/arch/sparc/mm/fault_64.c index a3806614e4dc..0a6bcc85fba7 100644 --- a/arch/sparc/mm/fault_64.c +++ b/arch/sparc/mm/fault_64.c @@ -422,7 +422,7 @@ good_area: goto bad_area; } - fault = handle_mm_fault(vma, address, flags); + fault = handle_mm_fault(vma, address, flags, regs); if (fault_signal_pending(fault, regs)) goto exit_exception; @@ -438,15 +438,6 @@ good_area: } if (flags & FAULT_FLAG_ALLOW_RETRY) { - if (fault & VM_FAULT_MAJOR) { - current->maj_flt++; - perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS_MAJ, - 1, regs, address); - } else { - current->min_flt++; - perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS_MIN, - 1, regs, address); - } if (fault & VM_FAULT_RETRY) { flags |= FAULT_FLAG_TRIED; diff --git a/arch/um/Kconfig b/arch/um/Kconfig index ef69be17ff70..eb51fec75948 100644 --- a/arch/um/Kconfig +++ b/arch/um/Kconfig @@ -14,6 +14,7 @@ config UML select HAVE_FUTEX_CMPXCHG if FUTEX select HAVE_DEBUG_KMEMLEAK select HAVE_DEBUG_BUGVERBOSE + select NO_DMA select GENERIC_IRQ_SHOW select GENERIC_CPU_DEVICES select GENERIC_CLOCKEVENTS @@ -167,9 +168,6 @@ config MMAPPER This driver allows a host file to be used as emulated IO memory inside UML. -config NO_DMA - def_bool y - config PGTABLE_LEVELS int default 3 if 3_LEVEL_PGTABLES diff --git a/arch/um/kernel/trap.c b/arch/um/kernel/trap.c index 2b3afa354a90..ad12f78bda7e 100644 --- a/arch/um/kernel/trap.c +++ b/arch/um/kernel/trap.c @@ -71,7 +71,7 @@ good_area: do { vm_fault_t fault; - fault = handle_mm_fault(vma, address, flags); + fault = handle_mm_fault(vma, address, flags, NULL); if ((fault & VM_FAULT_RETRY) && fatal_signal_pending(current)) goto out_nosemaphore; @@ -88,10 +88,6 @@ good_area: BUG(); } if (flags & FAULT_FLAG_ALLOW_RETRY) { - if (fault & VM_FAULT_MAJOR) - current->maj_flt++; - else - current->min_flt++; if (fault & VM_FAULT_RETRY) { flags |= FAULT_FLAG_TRIED; diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index 9a2849527dd7..7101ac64bb20 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -209,6 +209,7 @@ config X86 select HAVE_PERF_REGS select HAVE_PERF_USER_STACK_DUMP select MMU_GATHER_RCU_TABLE_FREE if PARAVIRT + select HAVE_POSIX_CPU_TIMERS_TASK_WORK select HAVE_REGS_AND_STACK_ACCESS_API select HAVE_RELIABLE_STACKTRACE if X86_64 && (UNWINDER_FRAME_POINTER || UNWINDER_ORC) && STACK_VALIDATION select HAVE_FUNCTION_ARG_ACCESS_API diff --git a/arch/x86/entry/entry_32.S b/arch/x86/entry/entry_32.S index 29b7d52143e9..df8c017e6161 100644 --- a/arch/x86/entry/entry_32.S +++ b/arch/x86/entry/entry_32.S @@ -449,8 +449,6 @@ .macro SWITCH_TO_KERNEL_STACK - ALTERNATIVE "", "jmp .Lend_\@", X86_FEATURE_XENPV - BUG_IF_WRONG_CR3 SWITCH_TO_KERNEL_CR3 scratch_reg=%eax @@ -599,8 +597,6 @@ */ .macro SWITCH_TO_ENTRY_STACK - ALTERNATIVE "", "jmp .Lend_\@", X86_FEATURE_XENPV - /* Bytes to copy */ movl $PTREGS_SIZE, %ecx @@ -872,17 +868,6 @@ SYM_ENTRY(__begin_SYSENTER_singlestep_region, SYM_L_GLOBAL, SYM_A_NONE) * will ignore all of the single-step traps generated in this range. */ -#ifdef CONFIG_XEN_PV -/* - * Xen doesn't set %esp to be precisely what the normal SYSENTER - * entry point expects, so fix it up before using the normal path. - */ -SYM_CODE_START(xen_sysenter_target) - addl $5*4, %esp /* remove xen-provided frame */ - jmp .Lsysenter_past_esp -SYM_CODE_END(xen_sysenter_target) -#endif - /* * 32-bit SYSENTER entry. * @@ -965,9 +950,8 @@ SYM_FUNC_START(entry_SYSENTER_32) movl %esp, %eax call do_SYSENTER_32 - /* XEN PV guests always use IRET path */ - ALTERNATIVE "testl %eax, %eax; jz .Lsyscall_32_done", \ - "jmp .Lsyscall_32_done", X86_FEATURE_XENPV + testl %eax, %eax + jz .Lsyscall_32_done STACKLEAK_ERASE @@ -1165,95 +1149,6 @@ SYM_FUNC_END(entry_INT80_32) #endif .endm -#ifdef CONFIG_PARAVIRT -SYM_CODE_START(native_iret) - iret - _ASM_EXTABLE(native_iret, asm_iret_error) -SYM_CODE_END(native_iret) -#endif - -#ifdef CONFIG_XEN_PV -/* - * See comment in entry_64.S for further explanation - * - * Note: This is not an actual IDT entry point. It's a XEN specific entry - * point and therefore named to match the 64-bit trampoline counterpart. - */ -SYM_FUNC_START(xen_asm_exc_xen_hypervisor_callback) - /* - * Check to see if we got the event in the critical - * region in xen_iret_direct, after we've reenabled - * events and checked for pending events. This simulates - * iret instruction's behaviour where it delivers a - * pending interrupt when enabling interrupts: - */ - cmpl $xen_iret_start_crit, (%esp) - jb 1f - cmpl $xen_iret_end_crit, (%esp) - jae 1f - call xen_iret_crit_fixup -1: - pushl $-1 /* orig_ax = -1 => not a system call */ - SAVE_ALL - ENCODE_FRAME_POINTER - - mov %esp, %eax - call xen_pv_evtchn_do_upcall - jmp handle_exception_return -SYM_FUNC_END(xen_asm_exc_xen_hypervisor_callback) - -/* - * Hypervisor uses this for application faults while it executes. - * We get here for two reasons: - * 1. Fault while reloading DS, ES, FS or GS - * 2. Fault while executing IRET - * Category 1 we fix up by reattempting the load, and zeroing the segment - * register if the load fails. - * Category 2 we fix up by jumping to do_iret_error. We cannot use the - * normal Linux return path in this case because if we use the IRET hypercall - * to pop the stack frame we end up in an infinite loop of failsafe callbacks. - * We distinguish between categories by maintaining a status value in EAX. - */ -SYM_FUNC_START(xen_failsafe_callback) - pushl %eax - movl $1, %eax -1: mov 4(%esp), %ds -2: mov 8(%esp), %es -3: mov 12(%esp), %fs -4: mov 16(%esp), %gs - /* EAX == 0 => Category 1 (Bad segment) - EAX != 0 => Category 2 (Bad IRET) */ - testl %eax, %eax - popl %eax - lea 16(%esp), %esp - jz 5f - jmp asm_iret_error -5: pushl $-1 /* orig_ax = -1 => not a system call */ - SAVE_ALL - ENCODE_FRAME_POINTER - jmp handle_exception_return - -.section .fixup, "ax" -6: xorl %eax, %eax - movl %eax, 4(%esp) - jmp 1b -7: xorl %eax, %eax - movl %eax, 8(%esp) - jmp 2b -8: xorl %eax, %eax - movl %eax, 12(%esp) - jmp 3b -9: xorl %eax, %eax - movl %eax, 16(%esp) - jmp 4b -.previous - _ASM_EXTABLE(1b, 6b) - _ASM_EXTABLE(2b, 7b) - _ASM_EXTABLE(3b, 8b) - _ASM_EXTABLE(4b, 9b) -SYM_FUNC_END(xen_failsafe_callback) -#endif /* CONFIG_XEN_PV */ - SYM_CODE_START_LOCAL_NOALIGN(handle_exception) /* the function address is in %gs's slot on the stack */ SAVE_ALL switch_stacks=1 skip_gs=1 unwind_espfix=1 diff --git a/arch/x86/entry/syscalls/syscall_32.tbl b/arch/x86/entry/syscalls/syscall_32.tbl index e31a75262c9c..9d1102873666 100644 --- a/arch/x86/entry/syscalls/syscall_32.tbl +++ b/arch/x86/entry/syscalls/syscall_32.tbl @@ -160,7 +160,7 @@ 146 i386 writev sys_writev compat_sys_writev 147 i386 getsid sys_getsid 148 i386 fdatasync sys_fdatasync -149 i386 _sysctl sys_sysctl compat_sys_sysctl +149 i386 _sysctl sys_ni_syscall 150 i386 mlock sys_mlock 151 i386 munlock sys_munlock 152 i386 mlockall sys_mlockall diff --git a/arch/x86/entry/syscalls/syscall_64.tbl b/arch/x86/entry/syscalls/syscall_64.tbl index 9d82078c949a..f30d6ae9a688 100644 --- a/arch/x86/entry/syscalls/syscall_64.tbl +++ b/arch/x86/entry/syscalls/syscall_64.tbl @@ -164,7 +164,7 @@ 153 common vhangup sys_vhangup 154 common modify_ldt sys_modify_ldt 155 common pivot_root sys_pivot_root -156 64 _sysctl sys_sysctl +156 64 _sysctl sys_ni_syscall 157 common prctl sys_prctl 158 common arch_prctl sys_arch_prctl 159 common adjtimex sys_adjtimex diff --git a/arch/x86/entry/vdso/vdso32/note.S b/arch/x86/entry/vdso/vdso32/note.S index e78047d119f6..2cbd39939dc6 100644 --- a/arch/x86/entry/vdso/vdso32/note.S +++ b/arch/x86/entry/vdso/vdso32/note.S @@ -16,33 +16,3 @@ ELFNOTE_START(Linux, 0, "a") ELFNOTE_END BUILD_SALT - -#ifdef CONFIG_XEN -/* - * Add a special note telling glibc's dynamic linker a fake hardware - * flavor that it will use to choose the search path for libraries in the - * same way it uses real hardware capabilities like "mmx". - * We supply "nosegneg" as the fake capability, to indicate that we - * do not like negative offsets in instructions using segment overrides, - * since we implement those inefficiently. This makes it possible to - * install libraries optimized to avoid those access patterns in someplace - * like /lib/i686/tls/nosegneg. Note that an /etc/ld.so.conf.d/file - * corresponding to the bits here is needed to make ldconfig work right. - * It should contain: - * hwcap 1 nosegneg - * to match the mapping of bit to name that we give here. - * - * At runtime, the fake hardware feature will be considered to be present - * if its bit is set in the mask word. So, we start with the mask 0, and - * at boot time we set VDSO_NOTE_NONEGSEG_BIT if running under Xen. - */ - -#include "../../xen/vdso.h" /* Defines VDSO_NOTE_NONEGSEG_BIT. */ - -ELFNOTE_START(GNU, 2, "a") - .long 1 /* ncaps */ -VDSO32_NOTE_MASK: /* Symbol used by arch/x86/xen/setup.c */ - .long 0 /* mask */ - .byte VDSO_NOTE_NONEGSEG_BIT; .asciz "nosegneg" /* bit, name */ -ELFNOTE_END -#endif diff --git a/arch/x86/events/rapl.c b/arch/x86/events/rapl.c index 68b38820b10e..67b411f7e8c4 100644 --- a/arch/x86/events/rapl.c +++ b/arch/x86/events/rapl.c @@ -130,11 +130,17 @@ struct rapl_pmus { struct rapl_pmu *pmus[]; }; +enum rapl_unit_quirk { + RAPL_UNIT_QUIRK_NONE, + RAPL_UNIT_QUIRK_INTEL_HSW, + RAPL_UNIT_QUIRK_INTEL_SPR, +}; + struct rapl_model { struct perf_msr *rapl_msrs; unsigned long events; unsigned int msr_power_unit; - bool apply_quirk; + enum rapl_unit_quirk unit_quirk; }; /* 1/2^hw_unit Joule */ @@ -612,14 +618,28 @@ static int rapl_check_hw_unit(struct rapl_model *rm) for (i = 0; i < NR_RAPL_DOMAINS; i++) rapl_hw_unit[i] = (msr_rapl_power_unit_bits >> 8) & 0x1FULL; + switch (rm->unit_quirk) { /* * DRAM domain on HSW server and KNL has fixed energy unit which can be * different than the unit from power unit MSR. See * "Intel Xeon Processor E5-1600 and E5-2600 v3 Product Families, V2 * of 2. Datasheet, September 2014, Reference Number: 330784-001 " */ - if (rm->apply_quirk) + case RAPL_UNIT_QUIRK_INTEL_HSW: + rapl_hw_unit[PERF_RAPL_RAM] = 16; + break; + /* + * SPR shares the same DRAM domain energy unit as HSW, plus it + * also has a fixed energy unit for Psys domain. + */ + case RAPL_UNIT_QUIRK_INTEL_SPR: rapl_hw_unit[PERF_RAPL_RAM] = 16; + rapl_hw_unit[PERF_RAPL_PSYS] = 0; + break; + default: + break; + } + /* * Calculate the timer rate: @@ -665,7 +685,7 @@ static const struct attribute_group *rapl_attr_update[] = { &rapl_events_pkg_group, &rapl_events_ram_group, &rapl_events_gpu_group, - &rapl_events_gpu_group, + &rapl_events_psys_group, NULL, }; @@ -698,7 +718,6 @@ static struct rapl_model model_snb = { .events = BIT(PERF_RAPL_PP0) | BIT(PERF_RAPL_PKG) | BIT(PERF_RAPL_PP1), - .apply_quirk = false, .msr_power_unit = MSR_RAPL_POWER_UNIT, .rapl_msrs = intel_rapl_msrs, }; @@ -707,7 +726,6 @@ static struct rapl_model model_snbep = { .events = BIT(PERF_RAPL_PP0) | BIT(PERF_RAPL_PKG) | BIT(PERF_RAPL_RAM), - .apply_quirk = false, .msr_power_unit = MSR_RAPL_POWER_UNIT, .rapl_msrs = intel_rapl_msrs, }; @@ -717,7 +735,6 @@ static struct rapl_model model_hsw = { BIT(PERF_RAPL_PKG) | BIT(PERF_RAPL_RAM) | BIT(PERF_RAPL_PP1), - .apply_quirk = false, .msr_power_unit = MSR_RAPL_POWER_UNIT, .rapl_msrs = intel_rapl_msrs, }; @@ -726,7 +743,7 @@ static struct rapl_model model_hsx = { .events = BIT(PERF_RAPL_PP0) | BIT(PERF_RAPL_PKG) | BIT(PERF_RAPL_RAM), - .apply_quirk = true, + .unit_quirk = RAPL_UNIT_QUIRK_INTEL_HSW, .msr_power_unit = MSR_RAPL_POWER_UNIT, .rapl_msrs = intel_rapl_msrs, }; @@ -734,7 +751,7 @@ static struct rapl_model model_hsx = { static struct rapl_model model_knl = { .events = BIT(PERF_RAPL_PKG) | BIT(PERF_RAPL_RAM), - .apply_quirk = true, + .unit_quirk = RAPL_UNIT_QUIRK_INTEL_HSW, .msr_power_unit = MSR_RAPL_POWER_UNIT, .rapl_msrs = intel_rapl_msrs, }; @@ -745,14 +762,22 @@ static struct rapl_model model_skl = { BIT(PERF_RAPL_RAM) | BIT(PERF_RAPL_PP1) | BIT(PERF_RAPL_PSYS), - .apply_quirk = false, + .msr_power_unit = MSR_RAPL_POWER_UNIT, + .rapl_msrs = intel_rapl_msrs, +}; + +static struct rapl_model model_spr = { + .events = BIT(PERF_RAPL_PP0) | + BIT(PERF_RAPL_PKG) | + BIT(PERF_RAPL_RAM) | + BIT(PERF_RAPL_PSYS), + .unit_quirk = RAPL_UNIT_QUIRK_INTEL_SPR, .msr_power_unit = MSR_RAPL_POWER_UNIT, .rapl_msrs = intel_rapl_msrs, }; static struct rapl_model model_amd_fam17h = { .events = BIT(PERF_RAPL_PKG), - .apply_quirk = false, .msr_power_unit = MSR_AMD_RAPL_POWER_UNIT, .rapl_msrs = amd_rapl_msrs, }; @@ -787,6 +812,7 @@ static const struct x86_cpu_id rapl_model_match[] __initconst = { X86_MATCH_INTEL_FAM6_MODEL(ICELAKE_X, &model_hsx), X86_MATCH_INTEL_FAM6_MODEL(COMETLAKE_L, &model_skl), X86_MATCH_INTEL_FAM6_MODEL(COMETLAKE, &model_skl), + X86_MATCH_INTEL_FAM6_MODEL(SAPPHIRERAPIDS_X, &model_spr), X86_MATCH_VENDOR_FAM(AMD, 0x17, &model_amd_fam17h), X86_MATCH_VENDOR_FAM(HYGON, 0x18, &model_amd_fam17h), {}, diff --git a/arch/x86/include/asm/mshyperv.h b/arch/x86/include/asm/mshyperv.h index 60b944dd2df1..4f77b8f22e54 100644 --- a/arch/x86/include/asm/mshyperv.h +++ b/arch/x86/include/asm/mshyperv.h @@ -8,6 +8,7 @@ #include <asm/io.h> #include <asm/hyperv-tlfs.h> #include <asm/nospec-branch.h> +#include <asm/paravirt.h> typedef int (*hyperv_fill_flush_list_func)( struct hv_guest_mapping_flush_list *flush, @@ -54,6 +55,17 @@ typedef int (*hyperv_fill_flush_list_func)( vclocks_set_used(VDSO_CLOCKMODE_HVCLOCK); #define hv_get_raw_timer() rdtsc_ordered() +/* + * Reference to pv_ops must be inline so objtool + * detection of noinstr violations can work correctly. + */ +static __always_inline void hv_setup_sched_clock(void *sched_clock) +{ +#ifdef CONFIG_PARAVIRT + pv_ops.time.sched_clock = sched_clock; +#endif +} + void hyperv_vector_handler(struct pt_regs *regs); static inline void hv_enable_stimer0_percpu_irq(int irq) {} diff --git a/arch/x86/include/asm/proto.h b/arch/x86/include/asm/proto.h index 6e81788a30c1..28996fe19301 100644 --- a/arch/x86/include/asm/proto.h +++ b/arch/x86/include/asm/proto.h @@ -25,7 +25,7 @@ void entry_SYSENTER_compat(void); void __end_entry_SYSENTER_compat(void); void entry_SYSCALL_compat(void); void entry_INT80_compat(void); -#if defined(CONFIG_X86_64) && defined(CONFIG_XEN_PV) +#ifdef CONFIG_XEN_PV void xen_entry_INT80_compat(void); #endif #endif diff --git a/arch/x86/include/asm/segment.h b/arch/x86/include/asm/segment.h index 6669164abadc..9646c300f128 100644 --- a/arch/x86/include/asm/segment.h +++ b/arch/x86/include/asm/segment.h @@ -301,7 +301,7 @@ static inline void vdso_read_cpunode(unsigned *cpu, unsigned *node) extern const char early_idt_handler_array[NUM_EXCEPTION_VECTORS][EARLY_IDT_HANDLER_SIZE]; extern void early_ignore_irq(void); -#if defined(CONFIG_X86_64) && defined(CONFIG_XEN_PV) +#ifdef CONFIG_XEN_PV extern const char xen_early_idt_handler_array[NUM_EXCEPTION_VECTORS][XEN_EARLY_IDT_HANDLER_SIZE]; #endif diff --git a/arch/x86/include/asm/uaccess.h b/arch/x86/include/asm/uaccess.h index 2f3e8f2a958f..ecefaffd15d4 100644 --- a/arch/x86/include/asm/uaccess.h +++ b/arch/x86/include/asm/uaccess.h @@ -33,7 +33,7 @@ static inline void set_fs(mm_segment_t fs) set_thread_flag(TIF_FSCHECK); } -#define segment_eq(a, b) ((a).seg == (b).seg) +#define uaccess_kernel() (get_fs().seg == KERNEL_DS.seg) #define user_addr_max() (current->thread.addr_limit.seg) /* diff --git a/arch/x86/include/asm/vdso/gettimeofday.h b/arch/x86/include/asm/vdso/gettimeofday.h index fb81fea99093..df01d7349d79 100644 --- a/arch/x86/include/asm/vdso/gettimeofday.h +++ b/arch/x86/include/asm/vdso/gettimeofday.h @@ -241,7 +241,8 @@ static u64 vread_hvclock(void) } #endif -static inline u64 __arch_get_hw_counter(s32 clock_mode) +static inline u64 __arch_get_hw_counter(s32 clock_mode, + const struct vdso_data *vd) { if (likely(clock_mode == VDSO_CLOCKMODE_TSC)) return (u64)rdtsc_ordered(); diff --git a/arch/x86/kernel/alternative.c b/arch/x86/kernel/alternative.c index d1175533d125..c3daf0aaa0ee 100644 --- a/arch/x86/kernel/alternative.c +++ b/arch/x86/kernel/alternative.c @@ -875,8 +875,6 @@ static void *__text_poke(void *addr, const void *opcode, size_t len) */ BUG_ON(!pages[0] || (cross_page_boundary && !pages[1])); - local_irq_save(flags); - /* * Map the page without the global bit, as TLB flushing is done with * flush_tlb_mm_range(), which is intended for non-global PTEs. @@ -893,6 +891,8 @@ static void *__text_poke(void *addr, const void *opcode, size_t len) */ VM_BUG_ON(!ptep); + local_irq_save(flags); + pte = mk_pte(pages[0], pgprot); set_pte_at(poking_mm, poking_addr, ptep, pte); @@ -942,8 +942,8 @@ static void *__text_poke(void *addr, const void *opcode, size_t len) */ BUG_ON(memcmp(addr, opcode, len)); - pte_unmap_unlock(ptep, ptl); local_irq_restore(flags); + pte_unmap_unlock(ptep, ptl); return addr; } diff --git a/arch/x86/kernel/cpu/acrn.c b/arch/x86/kernel/cpu/acrn.c index 1da9b1c9a2db..0b2c03943ac6 100644 --- a/arch/x86/kernel/cpu/acrn.c +++ b/arch/x86/kernel/cpu/acrn.c @@ -11,14 +11,15 @@ #include <linux/interrupt.h> #include <asm/apic.h> +#include <asm/cpufeatures.h> #include <asm/desc.h> #include <asm/hypervisor.h> #include <asm/idtentry.h> #include <asm/irq_regs.h> -static uint32_t __init acrn_detect(void) +static u32 __init acrn_detect(void) { - return hypervisor_cpuid_base("ACRNACRNACRN\0\0", 0); + return hypervisor_cpuid_base("ACRNACRNACRN", 0); } static void __init acrn_init_platform(void) @@ -29,12 +30,7 @@ static void __init acrn_init_platform(void) static bool acrn_x2apic_available(void) { - /* - * x2apic is not supported for now. Future enablement will have to check - * X86_FEATURE_X2APIC to determine whether x2apic is supported in the - * guest. - */ - return false; + return boot_cpu_has(X86_FEATURE_X2APIC); } static void (*acrn_intr_handler)(void); diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c index f0b743a2fe9c..d3f0db463f96 100644 --- a/arch/x86/kernel/cpu/bugs.c +++ b/arch/x86/kernel/cpu/bugs.c @@ -31,6 +31,7 @@ #include <asm/intel-family.h> #include <asm/e820/api.h> #include <asm/hypervisor.h> +#include <asm/tlbflush.h> #include "cpu.h" @@ -1549,7 +1550,12 @@ static ssize_t l1tf_show_state(char *buf) static ssize_t itlb_multihit_show_state(char *buf) { - if (itlb_multihit_kvm_mitigation) + if (!boot_cpu_has(X86_FEATURE_MSR_IA32_FEAT_CTL) || + !boot_cpu_has(X86_FEATURE_VMX)) + return sprintf(buf, "KVM: Mitigation: VMX unsupported\n"); + else if (!(cr4_read_shadow() & X86_CR4_VMXE)) + return sprintf(buf, "KVM: Mitigation: VMX disabled\n"); + else if (itlb_multihit_kvm_mitigation) return sprintf(buf, "KVM: Mitigation: Split huge pages\n"); else return sprintf(buf, "KVM: Vulnerable\n"); diff --git a/arch/x86/kernel/cpu/mshyperv.c b/arch/x86/kernel/cpu/mshyperv.c index af94f05a5c66..31125448b174 100644 --- a/arch/x86/kernel/cpu/mshyperv.c +++ b/arch/x86/kernel/cpu/mshyperv.c @@ -361,13 +361,6 @@ static void __init ms_hyperv_init_platform(void) #endif } -void hv_setup_sched_clock(void *sched_clock) -{ -#ifdef CONFIG_PARAVIRT - pv_ops.time.sched_clock = sched_clock; -#endif -} - const __initconst struct hypervisor_x86 x86_hyper_ms_hyperv = { .name = "Microsoft Hyper-V", .detect = ms_hyperv_platform, diff --git a/arch/x86/kernel/crash.c b/arch/x86/kernel/crash.c index fd87b59452a3..a8f3af257e26 100644 --- a/arch/x86/kernel/crash.c +++ b/arch/x86/kernel/crash.c @@ -230,7 +230,7 @@ static int elf_header_exclude_ranges(struct crash_mem *cmem) int ret = 0; /* Exclude the low 1M because it is always reserved */ - ret = crash_exclude_mem_range(cmem, 0, 1<<20); + ret = crash_exclude_mem_range(cmem, 0, (1<<20)-1); if (ret) return ret; diff --git a/arch/x86/kernel/fpu/xstate.c b/arch/x86/kernel/fpu/xstate.c index 7a2bf884fede..038e19c0019e 100644 --- a/arch/x86/kernel/fpu/xstate.c +++ b/arch/x86/kernel/fpu/xstate.c @@ -611,6 +611,10 @@ static void check_xstate_against_struct(int nr) * This essentially double-checks what the cpu told us about * how large the XSAVE buffer needs to be. We are recalculating * it to be safe. + * + * Dynamic XSAVE features allocate their own buffers and are not + * covered by these checks. Only the size of the buffer for task->fpu + * is checked here. */ static void do_extra_xstate_size_checks(void) { @@ -673,6 +677,33 @@ static unsigned int __init get_xsaves_size(void) return ebx; } +/* + * Get the total size of the enabled xstates without the dynamic supervisor + * features. + */ +static unsigned int __init get_xsaves_size_no_dynamic(void) +{ + u64 mask = xfeatures_mask_dynamic(); + unsigned int size; + + if (!mask) + return get_xsaves_size(); + + /* Disable dynamic features. */ + wrmsrl(MSR_IA32_XSS, xfeatures_mask_supervisor()); + + /* + * Ask the hardware what size is required of the buffer. + * This is the size required for the task->fpu buffer. + */ + size = get_xsaves_size(); + + /* Re-enable dynamic features so XSAVES will work on them again. */ + wrmsrl(MSR_IA32_XSS, xfeatures_mask_supervisor() | mask); + + return size; +} + static unsigned int __init get_xsave_size(void) { unsigned int eax, ebx, ecx, edx; @@ -710,7 +741,7 @@ static int __init init_xstate_size(void) xsave_size = get_xsave_size(); if (boot_cpu_has(X86_FEATURE_XSAVES)) - possible_xstate_size = get_xsaves_size(); + possible_xstate_size = get_xsaves_size_no_dynamic(); else possible_xstate_size = xsave_size; diff --git a/arch/x86/kernel/head_32.S b/arch/x86/kernel/head_32.S index f66a6b90f954..7ed84c282233 100644 --- a/arch/x86/kernel/head_32.S +++ b/arch/x86/kernel/head_32.S @@ -134,38 +134,7 @@ SYM_CODE_START(startup_32) movl %eax,pa(initial_page_table+0xffc) #endif -#ifdef CONFIG_PARAVIRT - /* This is can only trip for a broken bootloader... */ - cmpw $0x207, pa(boot_params + BP_version) - jb .Ldefault_entry - - /* Paravirt-compatible boot parameters. Look to see what architecture - we're booting under. */ - movl pa(boot_params + BP_hardware_subarch), %eax - cmpl $num_subarch_entries, %eax - jae .Lbad_subarch - - movl pa(subarch_entries)(,%eax,4), %eax - subl $__PAGE_OFFSET, %eax - jmp *%eax - -.Lbad_subarch: -SYM_INNER_LABEL_ALIGN(xen_entry, SYM_L_WEAK) - /* Unknown implementation; there's really - nothing we can do at this point. */ - ud2a - - __INITDATA - -subarch_entries: - .long .Ldefault_entry /* normal x86/PC */ - .long xen_entry /* Xen hypervisor */ - .long .Ldefault_entry /* Moorestown MID */ -num_subarch_entries = (. - subarch_entries) / 4 -.previous -#else jmp .Ldefault_entry -#endif /* CONFIG_PARAVIRT */ SYM_CODE_END(startup_32) #ifdef CONFIG_HOTPLUG_CPU diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c index d6f946707270..9afefe325acb 100644 --- a/arch/x86/kernel/process_64.c +++ b/arch/x86/kernel/process_64.c @@ -390,7 +390,7 @@ unsigned long x86_fsgsbase_read_task(struct task_struct *task, */ mutex_lock(&task->mm->context.lock); ldt = task->mm->context.ldt; - if (unlikely(idx >= ldt->nr_entries)) + if (unlikely(!ldt || idx >= ldt->nr_entries)) base = 0; else base = get_desc_base(ldt->entries + idx); diff --git a/arch/x86/kernel/tsc_msr.c b/arch/x86/kernel/tsc_msr.c index 46c72f2ec32f..6555a857a1e6 100644 --- a/arch/x86/kernel/tsc_msr.c +++ b/arch/x86/kernel/tsc_msr.c @@ -134,10 +134,15 @@ static const struct freq_desc freq_desc_ann = { .mask = 0x0f, }; -/* 24 MHz crystal? : 24 * 13 / 4 = 78 MHz */ +/* + * 24 MHz crystal? : 24 * 13 / 4 = 78 MHz + * Frequency step for Lightning Mountain SoC is fixed to 78 MHz, + * so all the frequency entries are 78000. + */ static const struct freq_desc freq_desc_lgm = { .use_msr_plat = true, - .freqs = { 78000, 78000, 78000, 78000, 78000, 78000, 78000, 78000 }, + .freqs = { 78000, 78000, 78000, 78000, 78000, 78000, 78000, 78000, + 78000, 78000, 78000, 78000, 78000, 78000, 78000, 78000 }, .mask = 0x0f, }; diff --git a/arch/x86/kvm/cpuid.c b/arch/x86/kvm/cpuid.c index fa873e3e6e90..3fd6eec202d7 100644 --- a/arch/x86/kvm/cpuid.c +++ b/arch/x86/kvm/cpuid.c @@ -370,7 +370,8 @@ void kvm_set_cpu_caps(void) kvm_cpu_cap_mask(CPUID_7_EDX, F(AVX512_4VNNIW) | F(AVX512_4FMAPS) | F(SPEC_CTRL) | F(SPEC_CTRL_SSBD) | F(ARCH_CAPABILITIES) | F(INTEL_STIBP) | - F(MD_CLEAR) | F(AVX512_VP2INTERSECT) | F(FSRM) + F(MD_CLEAR) | F(AVX512_VP2INTERSECT) | F(FSRM) | + F(SERIALIZE) ); /* TSC_ADJUST and ARCH_CAPABILITIES are emulated in software. */ diff --git a/arch/x86/kvm/hyperv.c b/arch/x86/kvm/hyperv.c index af9cdb426dd2..814d3aee5cef 100644 --- a/arch/x86/kvm/hyperv.c +++ b/arch/x86/kvm/hyperv.c @@ -900,6 +900,7 @@ int kvm_hv_activate_synic(struct kvm_vcpu *vcpu, bool dont_zero_synic_pages) kvm_request_apicv_update(vcpu->kvm, false, APICV_INHIBIT_REASON_HYPERV); synic->active = true; synic->dont_zero_synic_pages = dont_zero_synic_pages; + synic->control = HV_SYNIC_CONTROL_ENABLE; return 0; } diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 232dd2f9a081..599d73206299 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -820,22 +820,22 @@ int kvm_set_cr0(struct kvm_vcpu *vcpu, unsigned long cr0) if ((cr0 & X86_CR0_PG) && !(cr0 & X86_CR0_PE)) return 1; - if (cr0 & X86_CR0_PG) { #ifdef CONFIG_X86_64 - if (!is_paging(vcpu) && (vcpu->arch.efer & EFER_LME)) { - int cs_db, cs_l; + if ((vcpu->arch.efer & EFER_LME) && !is_paging(vcpu) && + (cr0 & X86_CR0_PG)) { + int cs_db, cs_l; - if (!is_pae(vcpu)) - return 1; - kvm_x86_ops.get_cs_db_l_bits(vcpu, &cs_db, &cs_l); - if (cs_l) - return 1; - } else -#endif - if (is_pae(vcpu) && ((cr0 ^ old_cr0) & pdptr_bits) && - !load_pdptrs(vcpu, vcpu->arch.walk_mmu, kvm_read_cr3(vcpu))) + if (!is_pae(vcpu)) + return 1; + kvm_x86_ops.get_cs_db_l_bits(vcpu, &cs_db, &cs_l); + if (cs_l) return 1; } +#endif + if (!(vcpu->arch.efer & EFER_LME) && (cr0 & X86_CR0_PG) && + is_pae(vcpu) && ((cr0 ^ old_cr0) & pdptr_bits) && + !load_pdptrs(vcpu, vcpu->arch.walk_mmu, kvm_read_cr3(vcpu))) + return 1; if (!(cr0 & X86_CR0_PG) && kvm_read_cr4_bits(vcpu, X86_CR4_PCIDE)) return 1; diff --git a/arch/x86/mm/fault.c b/arch/x86/mm/fault.c index 0c7643d9f7cb..35f1498e9832 100644 --- a/arch/x86/mm/fault.c +++ b/arch/x86/mm/fault.c @@ -1139,7 +1139,7 @@ void do_user_addr_fault(struct pt_regs *regs, struct vm_area_struct *vma; struct task_struct *tsk; struct mm_struct *mm; - vm_fault_t fault, major = 0; + vm_fault_t fault; unsigned int flags = FAULT_FLAG_DEFAULT; tsk = current; @@ -1291,8 +1291,7 @@ good_area: * userland). The return to userland is identified whenever * FAULT_FLAG_USER|FAULT_FLAG_KILLABLE are both set in flags. */ - fault = handle_mm_fault(vma, address, flags); - major |= fault & VM_FAULT_MAJOR; + fault = handle_mm_fault(vma, address, flags, regs); /* Quick path to respond to signals */ if (fault_signal_pending(fault, regs)) { @@ -1319,18 +1318,6 @@ good_area: return; } - /* - * Major/minor page fault accounting. If any of the events - * returned VM_FAULT_MAJOR, we account it as a major fault. - */ - if (major) { - tsk->maj_flt++; - perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS_MAJ, 1, regs, address); - } else { - tsk->min_flt++; - perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS_MIN, 1, regs, address); - } - check_v8086_mode(regs, address, tsk); } NOKPROBE_SYMBOL(do_user_addr_fault); diff --git a/arch/x86/mm/init_64.c b/arch/x86/mm/init_64.c index 3b246ae40c8f..a4ac13cc3fdc 100644 --- a/arch/x86/mm/init_64.c +++ b/arch/x86/mm/init_64.c @@ -1452,6 +1452,15 @@ static unsigned long probe_memory_block_size(void) goto done; } + /* + * Use max block size to minimize overhead on bare metal, where + * alignment for memory hotplug isn't a concern. + */ + if (!boot_cpu_has(X86_FEATURE_HYPERVISOR)) { + bz = MAX_BLOCK_SIZE; + goto done; + } + /* Find the largest allowed block size that aligns to memory end */ for (bz = MAX_BLOCK_SIZE; bz > MIN_MEMORY_BLOCK_SIZE; bz >>= 1) { if (IS_ALIGNED(boot_mem_end, bz)) diff --git a/arch/x86/mm/numa.c b/arch/x86/mm/numa.c index b05f45e5e8e2..aa76ec2d359b 100644 --- a/arch/x86/mm/numa.c +++ b/arch/x86/mm/numa.c @@ -929,5 +929,4 @@ int memory_add_physaddr_to_nid(u64 start) nid = numa_meminfo.blk[0].nid; return nid; } -EXPORT_SYMBOL_GPL(memory_add_physaddr_to_nid); #endif diff --git a/arch/x86/purgatory/Makefile b/arch/x86/purgatory/Makefile index 183ac60e5990..95ea17a9d20c 100644 --- a/arch/x86/purgatory/Makefile +++ b/arch/x86/purgatory/Makefile @@ -32,7 +32,7 @@ KCOV_INSTRUMENT := n # make up the standalone purgatory.ro PURGATORY_CFLAGS_REMOVE := -mcmodel=kernel -PURGATORY_CFLAGS := -mcmodel=large -ffreestanding -fno-zero-initialized-in-bss +PURGATORY_CFLAGS := -mcmodel=large -ffreestanding -fno-zero-initialized-in-bss -g0 PURGATORY_CFLAGS += $(DISABLE_STACKLEAK_PLUGIN) -DDISABLE_BRANCH_PROFILING PURGATORY_CFLAGS += -fno-stack-protector @@ -64,6 +64,9 @@ CFLAGS_sha256.o += $(PURGATORY_CFLAGS) CFLAGS_REMOVE_string.o += $(PURGATORY_CFLAGS_REMOVE) CFLAGS_string.o += $(PURGATORY_CFLAGS) +AFLAGS_REMOVE_setup-x86_$(BITS).o += -Wa,-gdwarf-2 +AFLAGS_REMOVE_entry64.o += -Wa,-gdwarf-2 + $(obj)/purgatory.ro: $(PURGATORY_OBJS) FORCE $(call if_changed,ld) diff --git a/arch/x86/xen/Kconfig b/arch/x86/xen/Kconfig index 1aded63a95cb..218acbd5c7a0 100644 --- a/arch/x86/xen/Kconfig +++ b/arch/x86/xen/Kconfig @@ -19,6 +19,7 @@ config XEN_PV bool "Xen PV guest support" default y depends on XEN + depends on X86_64 select PARAVIRT_XXL select XEN_HAVE_PVMMU select XEN_HAVE_VPMU @@ -50,7 +51,7 @@ config XEN_PVHVM_SMP config XEN_512GB bool "Limit Xen pv-domain memory to 512GB" - depends on XEN_PV && X86_64 + depends on XEN_PV default y help Limit paravirtualized user domains to 512GB of RAM. diff --git a/arch/x86/xen/Makefile b/arch/x86/xen/Makefile index 5f1db522d06b..fc5c5ba4aacb 100644 --- a/arch/x86/xen/Makefile +++ b/arch/x86/xen/Makefile @@ -1,5 +1,5 @@ # SPDX-License-Identifier: GPL-2.0 -OBJECT_FILES_NON_STANDARD_xen-asm_$(BITS).o := y +OBJECT_FILES_NON_STANDARD_xen-asm.o := y ifdef CONFIG_FUNCTION_TRACER # Do not profile debug and lowlevel utilities @@ -33,7 +33,6 @@ obj-$(CONFIG_XEN_PV) += mmu_pv.o obj-$(CONFIG_XEN_PV) += irq.o obj-$(CONFIG_XEN_PV) += multicalls.o obj-$(CONFIG_XEN_PV) += xen-asm.o -obj-$(CONFIG_XEN_PV) += xen-asm_$(BITS).o obj-$(CONFIG_XEN_PVH) += enlighten_pvh.o diff --git a/arch/x86/xen/apic.c b/arch/x86/xen/apic.c index 1aff4ae65655..e82fd1910dae 100644 --- a/arch/x86/xen/apic.c +++ b/arch/x86/xen/apic.c @@ -60,10 +60,6 @@ static u32 xen_apic_read(u32 reg) if (reg == APIC_LVR) return 0x14; -#ifdef CONFIG_X86_32 - if (reg == APIC_LDR) - return SET_APIC_LOGICAL_ID(1UL << smp_processor_id()); -#endif if (reg != APIC_ID) return 0; @@ -129,14 +125,6 @@ static int xen_phys_pkg_id(int initial_apic_id, int index_msb) return initial_apic_id >> index_msb; } -#ifdef CONFIG_X86_32 -static int xen_x86_32_early_logical_apicid(int cpu) -{ - /* Match with APIC_LDR read. Otherwise setup_local_APIC complains. */ - return 1 << cpu; -} -#endif - static void xen_noop(void) { } @@ -199,11 +187,6 @@ static struct apic xen_pv_apic = { .icr_write = xen_apic_icr_write, .wait_icr_idle = xen_noop, .safe_wait_icr_idle = xen_safe_apic_wait_icr_idle, - -#ifdef CONFIG_X86_32 - /* generic_processor_info and setup_local_APIC. */ - .x86_32_early_logical_apicid = xen_x86_32_early_logical_apicid, -#endif }; static void __init xen_apic_check(void) diff --git a/arch/x86/xen/enlighten_pv.c b/arch/x86/xen/enlighten_pv.c index 2aab43a13a8c..22e741e0b10c 100644 --- a/arch/x86/xen/enlighten_pv.c +++ b/arch/x86/xen/enlighten_pv.c @@ -119,14 +119,6 @@ static void __init xen_banner(void) printk(KERN_INFO "Xen version: %d.%d%s%s\n", version >> 16, version & 0xffff, extra.extraversion, xen_feature(XENFEAT_mmu_pt_update_preserve_ad) ? " (preserve-AD)" : ""); - -#ifdef CONFIG_X86_32 - pr_warn("WARNING! WARNING! WARNING! WARNING! WARNING! WARNING! WARNING!\n" - "Support for running as 32-bit PV-guest under Xen will soon be removed\n" - "from the Linux kernel!\n" - "Please use either a 64-bit kernel or switch to HVM or PVH mode!\n" - "WARNING! WARNING! WARNING! WARNING! WARNING! WARNING! WARNING!\n"); -#endif } static void __init xen_pv_init_platform(void) @@ -353,15 +345,13 @@ static void set_aliased_prot(void *v, pgprot_t prot) pte_t *ptep; pte_t pte; unsigned long pfn; - struct page *page; unsigned char dummy; + void *va; ptep = lookup_address((unsigned long)v, &level); BUG_ON(ptep == NULL); pfn = pte_pfn(*ptep); - page = pfn_to_page(pfn); - pte = pfn_pte(pfn, prot); /* @@ -391,14 +381,10 @@ static void set_aliased_prot(void *v, pgprot_t prot) if (HYPERVISOR_update_va_mapping((unsigned long)v, pte, 0)) BUG(); - if (!PageHighMem(page)) { - void *av = __va(PFN_PHYS(pfn)); + va = __va(PFN_PHYS(pfn)); - if (av != v) - if (HYPERVISOR_update_va_mapping((unsigned long)av, pte, 0)) - BUG(); - } else - kmap_flush_unused(); + if (va != v && HYPERVISOR_update_va_mapping((unsigned long)va, pte, 0)) + BUG(); preempt_enable(); } @@ -538,30 +524,12 @@ static void load_TLS_descriptor(struct thread_struct *t, static void xen_load_tls(struct thread_struct *t, unsigned int cpu) { /* - * XXX sleazy hack: If we're being called in a lazy-cpu zone - * and lazy gs handling is enabled, it means we're in a - * context switch, and %gs has just been saved. This means we - * can zero it out to prevent faults on exit from the - * hypervisor if the next process has no %gs. Either way, it - * has been saved, and the new value will get loaded properly. - * This will go away as soon as Xen has been modified to not - * save/restore %gs for normal hypercalls. - * - * On x86_64, this hack is not used for %gs, because gs points - * to KERNEL_GS_BASE (and uses it for PDA references), so we - * must not zero %gs on x86_64 - * - * For x86_64, we need to zero %fs, otherwise we may get an + * In lazy mode we need to zero %fs, otherwise we may get an * exception between the new %fs descriptor being loaded and * %fs being effectively cleared at __switch_to(). */ - if (paravirt_get_lazy_mode() == PARAVIRT_LAZY_CPU) { -#ifdef CONFIG_X86_32 - lazy_load_gs(0); -#else + if (paravirt_get_lazy_mode() == PARAVIRT_LAZY_CPU) loadsegment(fs, 0); -#endif - } xen_mc_batch(); @@ -572,13 +540,11 @@ static void xen_load_tls(struct thread_struct *t, unsigned int cpu) xen_mc_issue(PARAVIRT_LAZY_CPU); } -#ifdef CONFIG_X86_64 static void xen_load_gs_index(unsigned int idx) { if (HYPERVISOR_set_segment_base(SEGBASE_GS_USER_SEL, idx)) BUG(); } -#endif static void xen_write_ldt_entry(struct desc_struct *dt, int entrynum, const void *ptr) @@ -597,7 +563,6 @@ static void xen_write_ldt_entry(struct desc_struct *dt, int entrynum, preempt_enable(); } -#ifdef CONFIG_X86_64 void noist_exc_debug(struct pt_regs *regs); DEFINE_IDTENTRY_RAW(xenpv_exc_nmi) @@ -697,7 +662,6 @@ static bool __ref get_trap_addr(void **addr, unsigned int ist) return true; } -#endif static int cvt_gate_to_trap(int vector, const gate_desc *val, struct trap_info *info) @@ -710,10 +674,8 @@ static int cvt_gate_to_trap(int vector, const gate_desc *val, info->vector = vector; addr = gate_offset(val); -#ifdef CONFIG_X86_64 if (!get_trap_addr((void **)&addr, val->bits.ist)) return 0; -#endif /* CONFIG_X86_64 */ info->address = addr; info->cs = gate_segment(val); @@ -958,15 +920,12 @@ static u64 xen_read_msr_safe(unsigned int msr, int *err) static int xen_write_msr_safe(unsigned int msr, unsigned low, unsigned high) { int ret; -#ifdef CONFIG_X86_64 unsigned int which; u64 base; -#endif ret = 0; switch (msr) { -#ifdef CONFIG_X86_64 case MSR_FS_BASE: which = SEGBASE_FS; goto set; case MSR_KERNEL_GS_BASE: which = SEGBASE_GS_USER; goto set; case MSR_GS_BASE: which = SEGBASE_GS_KERNEL; goto set; @@ -976,7 +935,6 @@ static int xen_write_msr_safe(unsigned int msr, unsigned low, unsigned high) if (HYPERVISOR_set_segment_base(which, base) != 0) ret = -EIO; break; -#endif case MSR_STAR: case MSR_CSTAR: @@ -1058,9 +1016,7 @@ void __init xen_setup_vcpu_info_placement(void) static const struct pv_info xen_info __initconst = { .shared_kernel_pmd = 0, -#ifdef CONFIG_X86_64 .extra_user_64bit_cs = FLAT_USER_CS64, -#endif .name = "Xen", }; @@ -1086,18 +1042,14 @@ static const struct pv_cpu_ops xen_cpu_ops __initconst = { .read_pmc = xen_read_pmc, .iret = xen_iret, -#ifdef CONFIG_X86_64 .usergs_sysret64 = xen_sysret64, -#endif .load_tr_desc = paravirt_nop, .set_ldt = xen_set_ldt, .load_gdt = xen_load_gdt, .load_idt = xen_load_idt, .load_tls = xen_load_tls, -#ifdef CONFIG_X86_64 .load_gs_index = xen_load_gs_index, -#endif .alloc_ldt = xen_alloc_ldt, .free_ldt = xen_free_ldt, @@ -1364,15 +1316,7 @@ asmlinkage __visible void __init xen_start_kernel(void) /* keep using Xen gdt for now; no urgent need to change it */ -#ifdef CONFIG_X86_32 - pv_info.kernel_rpl = 1; - if (xen_feature(XENFEAT_supervisor_mode_kernel)) - pv_info.kernel_rpl = 0; -#else pv_info.kernel_rpl = 0; -#endif - /* set the limit of our address space */ - xen_reserve_top(); /* * We used to do this in xen_arch_setup, but that is too late @@ -1384,12 +1328,6 @@ asmlinkage __visible void __init xen_start_kernel(void) if (rc != 0) xen_raw_printk("physdev_op failed %d\n", rc); -#ifdef CONFIG_X86_32 - /* set up basic CPUID stuff */ - cpu_detect(&new_cpu_data); - set_cpu_cap(&new_cpu_data, X86_FEATURE_FPU); - new_cpu_data.x86_capability[CPUID_1_EDX] = cpuid_edx(1); -#endif if (xen_start_info->mod_start) { if (xen_start_info->flags & SIF_MOD_START_PFN) @@ -1458,12 +1396,8 @@ asmlinkage __visible void __init xen_start_kernel(void) xen_efi_init(&boot_params); /* Start the world */ -#ifdef CONFIG_X86_32 - i386_start_kernel(); -#else cr4_init_shadow(); /* 32b kernel does this in i386_start_kernel() */ x86_64_start_reservations((char *)__pa_symbol(&boot_params)); -#endif } static int xen_cpu_up_prepare_pv(unsigned int cpu) diff --git a/arch/x86/xen/mmu_pv.c b/arch/x86/xen/mmu_pv.c index a58d9c69807a..3273c985d3dd 100644 --- a/arch/x86/xen/mmu_pv.c +++ b/arch/x86/xen/mmu_pv.c @@ -86,19 +86,8 @@ #include "mmu.h" #include "debugfs.h" -#ifdef CONFIG_X86_32 -/* - * Identity map, in addition to plain kernel map. This needs to be - * large enough to allocate page table pages to allocate the rest. - * Each page can map 2MB. - */ -#define LEVEL1_IDENT_ENTRIES (PTRS_PER_PTE * 4) -static RESERVE_BRK_ARRAY(pte_t, level1_ident_pgt, LEVEL1_IDENT_ENTRIES); -#endif -#ifdef CONFIG_X86_64 /* l3 pud for userspace vsyscall mapping */ static pud_t level3_user_vsyscall[PTRS_PER_PUD] __page_aligned_bss; -#endif /* CONFIG_X86_64 */ /* * Protects atomic reservation decrease/increase against concurrent increases. @@ -280,10 +269,7 @@ static inline void __xen_set_pte(pte_t *ptep, pte_t pteval) if (!xen_batched_set_pte(ptep, pteval)) { /* * Could call native_set_pte() here and trap and - * emulate the PTE write but with 32-bit guests this - * needs two traps (one for each of the two 32-bit - * words in the PTE) so do one hypercall directly - * instead. + * emulate the PTE write, but a hypercall is much cheaper. */ struct mmu_update u; @@ -439,26 +425,6 @@ static void xen_set_pud(pud_t *ptr, pud_t val) xen_set_pud_hyper(ptr, val); } -#ifdef CONFIG_X86_PAE -static void xen_set_pte_atomic(pte_t *ptep, pte_t pte) -{ - trace_xen_mmu_set_pte_atomic(ptep, pte); - __xen_set_pte(ptep, pte); -} - -static void xen_pte_clear(struct mm_struct *mm, unsigned long addr, pte_t *ptep) -{ - trace_xen_mmu_pte_clear(mm, addr, ptep); - __xen_set_pte(ptep, native_make_pte(0)); -} - -static void xen_pmd_clear(pmd_t *pmdp) -{ - trace_xen_mmu_pmd_clear(pmdp); - set_pmd(pmdp, __pmd(0)); -} -#endif /* CONFIG_X86_PAE */ - __visible pmd_t xen_make_pmd(pmdval_t pmd) { pmd = pte_pfn_to_mfn(pmd); @@ -466,7 +432,6 @@ __visible pmd_t xen_make_pmd(pmdval_t pmd) } PV_CALLEE_SAVE_REGS_THUNK(xen_make_pmd); -#ifdef CONFIG_X86_64 __visible pudval_t xen_pud_val(pud_t pud) { return pte_mfn_to_pfn(pud.pud); @@ -571,27 +536,27 @@ __visible p4d_t xen_make_p4d(p4dval_t p4d) } PV_CALLEE_SAVE_REGS_THUNK(xen_make_p4d); #endif /* CONFIG_PGTABLE_LEVELS >= 5 */ -#endif /* CONFIG_X86_64 */ -static int xen_pmd_walk(struct mm_struct *mm, pmd_t *pmd, - int (*func)(struct mm_struct *mm, struct page *, enum pt_level), - bool last, unsigned long limit) +static void xen_pmd_walk(struct mm_struct *mm, pmd_t *pmd, + void (*func)(struct mm_struct *mm, struct page *, + enum pt_level), + bool last, unsigned long limit) { - int i, nr, flush = 0; + int i, nr; nr = last ? pmd_index(limit) + 1 : PTRS_PER_PMD; for (i = 0; i < nr; i++) { if (!pmd_none(pmd[i])) - flush |= (*func)(mm, pmd_page(pmd[i]), PT_PTE); + (*func)(mm, pmd_page(pmd[i]), PT_PTE); } - return flush; } -static int xen_pud_walk(struct mm_struct *mm, pud_t *pud, - int (*func)(struct mm_struct *mm, struct page *, enum pt_level), - bool last, unsigned long limit) +static void xen_pud_walk(struct mm_struct *mm, pud_t *pud, + void (*func)(struct mm_struct *mm, struct page *, + enum pt_level), + bool last, unsigned long limit) { - int i, nr, flush = 0; + int i, nr; nr = last ? pud_index(limit) + 1 : PTRS_PER_PUD; for (i = 0; i < nr; i++) { @@ -602,29 +567,26 @@ static int xen_pud_walk(struct mm_struct *mm, pud_t *pud, pmd = pmd_offset(&pud[i], 0); if (PTRS_PER_PMD > 1) - flush |= (*func)(mm, virt_to_page(pmd), PT_PMD); - flush |= xen_pmd_walk(mm, pmd, func, - last && i == nr - 1, limit); + (*func)(mm, virt_to_page(pmd), PT_PMD); + xen_pmd_walk(mm, pmd, func, last && i == nr - 1, limit); } - return flush; } -static int xen_p4d_walk(struct mm_struct *mm, p4d_t *p4d, - int (*func)(struct mm_struct *mm, struct page *, enum pt_level), - bool last, unsigned long limit) +static void xen_p4d_walk(struct mm_struct *mm, p4d_t *p4d, + void (*func)(struct mm_struct *mm, struct page *, + enum pt_level), + bool last, unsigned long limit) { - int flush = 0; pud_t *pud; if (p4d_none(*p4d)) - return flush; + return; pud = pud_offset(p4d, 0); if (PTRS_PER_PUD > 1) - flush |= (*func)(mm, virt_to_page(pud), PT_PUD); - flush |= xen_pud_walk(mm, pud, func, last, limit); - return flush; + (*func)(mm, virt_to_page(pud), PT_PUD); + xen_pud_walk(mm, pud, func, last, limit); } /* @@ -636,32 +598,27 @@ static int xen_p4d_walk(struct mm_struct *mm, p4d_t *p4d, * will be STACK_TOP_MAX, but at boot we need to pin up to * FIXADDR_TOP. * - * For 32-bit the important bit is that we don't pin beyond there, - * because then we start getting into Xen's ptes. - * - * For 64-bit, we must skip the Xen hole in the middle of the address - * space, just after the big x86-64 virtual hole. + * We must skip the Xen hole in the middle of the address space, just after + * the big x86-64 virtual hole. */ -static int __xen_pgd_walk(struct mm_struct *mm, pgd_t *pgd, - int (*func)(struct mm_struct *mm, struct page *, - enum pt_level), - unsigned long limit) +static void __xen_pgd_walk(struct mm_struct *mm, pgd_t *pgd, + void (*func)(struct mm_struct *mm, struct page *, + enum pt_level), + unsigned long limit) { - int i, nr, flush = 0; + int i, nr; unsigned hole_low = 0, hole_high = 0; /* The limit is the last byte to be touched */ limit--; BUG_ON(limit >= FIXADDR_TOP); -#ifdef CONFIG_X86_64 /* * 64-bit has a great big hole in the middle of the address * space, which contains the Xen mappings. */ hole_low = pgd_index(GUARD_HOLE_BASE_ADDR); hole_high = pgd_index(GUARD_HOLE_END_ADDR); -#endif nr = pgd_index(limit) + 1; for (i = 0; i < nr; i++) { @@ -674,22 +631,20 @@ static int __xen_pgd_walk(struct mm_struct *mm, pgd_t *pgd, continue; p4d = p4d_offset(&pgd[i], 0); - flush |= xen_p4d_walk(mm, p4d, func, i == nr - 1, limit); + xen_p4d_walk(mm, p4d, func, i == nr - 1, limit); } /* Do the top level last, so that the callbacks can use it as a cue to do final things like tlb flushes. */ - flush |= (*func)(mm, virt_to_page(pgd), PT_PGD); - - return flush; + (*func)(mm, virt_to_page(pgd), PT_PGD); } -static int xen_pgd_walk(struct mm_struct *mm, - int (*func)(struct mm_struct *mm, struct page *, - enum pt_level), - unsigned long limit) +static void xen_pgd_walk(struct mm_struct *mm, + void (*func)(struct mm_struct *mm, struct page *, + enum pt_level), + unsigned long limit) { - return __xen_pgd_walk(mm, mm->pgd, func, limit); + __xen_pgd_walk(mm, mm->pgd, func, limit); } /* If we're using split pte locks, then take the page's lock and @@ -722,26 +677,17 @@ static void xen_do_pin(unsigned level, unsigned long pfn) xen_extend_mmuext_op(&op); } -static int xen_pin_page(struct mm_struct *mm, struct page *page, - enum pt_level level) +static void xen_pin_page(struct mm_struct *mm, struct page *page, + enum pt_level level) { unsigned pgfl = TestSetPagePinned(page); - int flush; - - if (pgfl) - flush = 0; /* already pinned */ - else if (PageHighMem(page)) - /* kmaps need flushing if we found an unpinned - highpage */ - flush = 1; - else { + + if (!pgfl) { void *pt = lowmem_page_address(page); unsigned long pfn = page_to_pfn(page); struct multicall_space mcs = __xen_mc_entry(0); spinlock_t *ptl; - flush = 0; - /* * We need to hold the pagetable lock between the time * we make the pagetable RO and when we actually pin @@ -778,8 +724,6 @@ static int xen_pin_page(struct mm_struct *mm, struct page *page, xen_mc_callback(xen_pte_unlock, ptl); } } - - return flush; } /* This is called just after a mm has been created, but it has not @@ -787,39 +731,22 @@ static int xen_pin_page(struct mm_struct *mm, struct page *page, read-only, and can be pinned. */ static void __xen_pgd_pin(struct mm_struct *mm, pgd_t *pgd) { + pgd_t *user_pgd = xen_get_user_pgd(pgd); + trace_xen_mmu_pgd_pin(mm, pgd); xen_mc_batch(); - if (__xen_pgd_walk(mm, pgd, xen_pin_page, USER_LIMIT)) { - /* re-enable interrupts for flushing */ - xen_mc_issue(0); + __xen_pgd_walk(mm, pgd, xen_pin_page, USER_LIMIT); - kmap_flush_unused(); + xen_do_pin(MMUEXT_PIN_L4_TABLE, PFN_DOWN(__pa(pgd))); - xen_mc_batch(); + if (user_pgd) { + xen_pin_page(mm, virt_to_page(user_pgd), PT_PGD); + xen_do_pin(MMUEXT_PIN_L4_TABLE, + PFN_DOWN(__pa(user_pgd))); } -#ifdef CONFIG_X86_64 - { - pgd_t *user_pgd = xen_get_user_pgd(pgd); - - xen_do_pin(MMUEXT_PIN_L4_TABLE, PFN_DOWN(__pa(pgd))); - - if (user_pgd) { - xen_pin_page(mm, virt_to_page(user_pgd), PT_PGD); - xen_do_pin(MMUEXT_PIN_L4_TABLE, - PFN_DOWN(__pa(user_pgd))); - } - } -#else /* CONFIG_X86_32 */ -#ifdef CONFIG_X86_PAE - /* Need to make sure unshared kernel PMD is pinnable */ - xen_pin_page(mm, pgd_page(pgd[pgd_index(TASK_SIZE)]), - PT_PMD); -#endif - xen_do_pin(MMUEXT_PIN_L3_TABLE, PFN_DOWN(__pa(pgd))); -#endif /* CONFIG_X86_64 */ xen_mc_issue(0); } @@ -854,11 +781,10 @@ void xen_mm_pin_all(void) spin_unlock(&pgd_lock); } -static int __init xen_mark_pinned(struct mm_struct *mm, struct page *page, - enum pt_level level) +static void __init xen_mark_pinned(struct mm_struct *mm, struct page *page, + enum pt_level level) { SetPagePinned(page); - return 0; } /* @@ -870,18 +796,16 @@ static int __init xen_mark_pinned(struct mm_struct *mm, struct page *page, static void __init xen_after_bootmem(void) { static_branch_enable(&xen_struct_pages_ready); -#ifdef CONFIG_X86_64 SetPagePinned(virt_to_page(level3_user_vsyscall)); -#endif xen_pgd_walk(&init_mm, xen_mark_pinned, FIXADDR_TOP); } -static int xen_unpin_page(struct mm_struct *mm, struct page *page, - enum pt_level level) +static void xen_unpin_page(struct mm_struct *mm, struct page *page, + enum pt_level level) { unsigned pgfl = TestClearPagePinned(page); - if (pgfl && !PageHighMem(page)) { + if (pgfl) { void *pt = lowmem_page_address(page); unsigned long pfn = page_to_pfn(page); spinlock_t *ptl = NULL; @@ -912,36 +836,24 @@ static int xen_unpin_page(struct mm_struct *mm, struct page *page, xen_mc_callback(xen_pte_unlock, ptl); } } - - return 0; /* never need to flush on unpin */ } /* Release a pagetables pages back as normal RW */ static void __xen_pgd_unpin(struct mm_struct *mm, pgd_t *pgd) { + pgd_t *user_pgd = xen_get_user_pgd(pgd); + trace_xen_mmu_pgd_unpin(mm, pgd); xen_mc_batch(); xen_do_pin(MMUEXT_UNPIN_TABLE, PFN_DOWN(__pa(pgd))); -#ifdef CONFIG_X86_64 - { - pgd_t *user_pgd = xen_get_user_pgd(pgd); - - if (user_pgd) { - xen_do_pin(MMUEXT_UNPIN_TABLE, - PFN_DOWN(__pa(user_pgd))); - xen_unpin_page(mm, virt_to_page(user_pgd), PT_PGD); - } + if (user_pgd) { + xen_do_pin(MMUEXT_UNPIN_TABLE, + PFN_DOWN(__pa(user_pgd))); + xen_unpin_page(mm, virt_to_page(user_pgd), PT_PGD); } -#endif - -#ifdef CONFIG_X86_PAE - /* Need to make sure unshared kernel PMD is unpinned */ - xen_unpin_page(mm, pgd_page(pgd[pgd_index(TASK_SIZE)]), - PT_PMD); -#endif __xen_pgd_walk(mm, pgd, xen_unpin_page, USER_LIMIT); @@ -1089,7 +1001,6 @@ static void __init pin_pagetable_pfn(unsigned cmd, unsigned long pfn) BUG(); } -#ifdef CONFIG_X86_64 static void __init xen_cleanhighmap(unsigned long vaddr, unsigned long vaddr_end) { @@ -1273,17 +1184,15 @@ static void __init xen_pagetable_cleanhighmap(void) xen_cleanhighmap(addr, roundup(addr + size, PMD_SIZE * 2)); xen_start_info->pt_base = (unsigned long)__va(__pa(xen_start_info->pt_base)); } -#endif static void __init xen_pagetable_p2m_setup(void) { xen_vmalloc_p2m_tree(); -#ifdef CONFIG_X86_64 xen_pagetable_p2m_free(); xen_pagetable_cleanhighmap(); -#endif + /* And revector! Bye bye old array */ xen_start_info->mfn_list = (unsigned long)xen_p2m_addr; } @@ -1420,6 +1329,8 @@ static void __xen_write_cr3(bool kernel, unsigned long cr3) } static void xen_write_cr3(unsigned long cr3) { + pgd_t *user_pgd = xen_get_user_pgd(__va(cr3)); + BUG_ON(preemptible()); xen_mc_batch(); /* disables interrupts */ @@ -1430,20 +1341,14 @@ static void xen_write_cr3(unsigned long cr3) __xen_write_cr3(true, cr3); -#ifdef CONFIG_X86_64 - { - pgd_t *user_pgd = xen_get_user_pgd(__va(cr3)); - if (user_pgd) - __xen_write_cr3(false, __pa(user_pgd)); - else - __xen_write_cr3(false, 0); - } -#endif + if (user_pgd) + __xen_write_cr3(false, __pa(user_pgd)); + else + __xen_write_cr3(false, 0); xen_mc_issue(PARAVIRT_LAZY_CPU); /* interrupts restored */ } -#ifdef CONFIG_X86_64 /* * At the start of the day - when Xen launches a guest, it has already * built pagetables for the guest. We diligently look over them @@ -1478,49 +1383,39 @@ static void __init xen_write_cr3_init(unsigned long cr3) xen_mc_issue(PARAVIRT_LAZY_CPU); /* interrupts restored */ } -#endif static int xen_pgd_alloc(struct mm_struct *mm) { pgd_t *pgd = mm->pgd; - int ret = 0; + struct page *page = virt_to_page(pgd); + pgd_t *user_pgd; + int ret = -ENOMEM; BUG_ON(PagePinned(virt_to_page(pgd))); + BUG_ON(page->private != 0); -#ifdef CONFIG_X86_64 - { - struct page *page = virt_to_page(pgd); - pgd_t *user_pgd; + user_pgd = (pgd_t *)__get_free_page(GFP_KERNEL | __GFP_ZERO); + page->private = (unsigned long)user_pgd; - BUG_ON(page->private != 0); - - ret = -ENOMEM; - - user_pgd = (pgd_t *)__get_free_page(GFP_KERNEL | __GFP_ZERO); - page->private = (unsigned long)user_pgd; - - if (user_pgd != NULL) { + if (user_pgd != NULL) { #ifdef CONFIG_X86_VSYSCALL_EMULATION - user_pgd[pgd_index(VSYSCALL_ADDR)] = - __pgd(__pa(level3_user_vsyscall) | _PAGE_TABLE); + user_pgd[pgd_index(VSYSCALL_ADDR)] = + __pgd(__pa(level3_user_vsyscall) | _PAGE_TABLE); #endif - ret = 0; - } - - BUG_ON(PagePinned(virt_to_page(xen_get_user_pgd(pgd)))); + ret = 0; } -#endif + + BUG_ON(PagePinned(virt_to_page(xen_get_user_pgd(pgd)))); + return ret; } static void xen_pgd_free(struct mm_struct *mm, pgd_t *pgd) { -#ifdef CONFIG_X86_64 pgd_t *user_pgd = xen_get_user_pgd(pgd); if (user_pgd) free_page((unsigned long)user_pgd); -#endif } /* @@ -1539,7 +1434,6 @@ static void xen_pgd_free(struct mm_struct *mm, pgd_t *pgd) */ __visible pte_t xen_make_pte_init(pteval_t pte) { -#ifdef CONFIG_X86_64 unsigned long pfn; /* @@ -1553,7 +1447,7 @@ __visible pte_t xen_make_pte_init(pteval_t pte) pfn >= xen_start_info->first_p2m_pfn && pfn < xen_start_info->first_p2m_pfn + xen_start_info->nr_p2m_frames) pte &= ~_PAGE_RW; -#endif + pte = pte_pfn_to_mfn(pte); return native_make_pte(pte); } @@ -1561,13 +1455,6 @@ PV_CALLEE_SAVE_REGS_THUNK(xen_make_pte_init); static void __init xen_set_pte_init(pte_t *ptep, pte_t pte) { -#ifdef CONFIG_X86_32 - /* If there's an existing pte, then don't allow _PAGE_RW to be set */ - if (pte_mfn(pte) != INVALID_P2M_ENTRY - && pte_val_ma(*ptep) & _PAGE_PRESENT) - pte = __pte_ma(((pte_val_ma(*ptep) & _PAGE_RW) | ~_PAGE_RW) & - pte_val_ma(pte)); -#endif __xen_set_pte(ptep, pte); } @@ -1642,20 +1529,14 @@ static inline void xen_alloc_ptpage(struct mm_struct *mm, unsigned long pfn, if (static_branch_likely(&xen_struct_pages_ready)) SetPagePinned(page); - if (!PageHighMem(page)) { - xen_mc_batch(); + xen_mc_batch(); - __set_pfn_prot(pfn, PAGE_KERNEL_RO); + __set_pfn_prot(pfn, PAGE_KERNEL_RO); - if (level == PT_PTE && USE_SPLIT_PTE_PTLOCKS) - __pin_pagetable_pfn(MMUEXT_PIN_L1_TABLE, pfn); + if (level == PT_PTE && USE_SPLIT_PTE_PTLOCKS) + __pin_pagetable_pfn(MMUEXT_PIN_L1_TABLE, pfn); - xen_mc_issue(PARAVIRT_LAZY_MMU); - } else { - /* make sure there are no stray mappings of - this page */ - kmap_flush_unused(); - } + xen_mc_issue(PARAVIRT_LAZY_MMU); } } @@ -1678,16 +1559,15 @@ static inline void xen_release_ptpage(unsigned long pfn, unsigned level) trace_xen_mmu_release_ptpage(pfn, level, pinned); if (pinned) { - if (!PageHighMem(page)) { - xen_mc_batch(); + xen_mc_batch(); - if (level == PT_PTE && USE_SPLIT_PTE_PTLOCKS) - __pin_pagetable_pfn(MMUEXT_UNPIN_TABLE, pfn); + if (level == PT_PTE && USE_SPLIT_PTE_PTLOCKS) + __pin_pagetable_pfn(MMUEXT_UNPIN_TABLE, pfn); - __set_pfn_prot(pfn, PAGE_KERNEL); + __set_pfn_prot(pfn, PAGE_KERNEL); + + xen_mc_issue(PARAVIRT_LAZY_MMU); - xen_mc_issue(PARAVIRT_LAZY_MMU); - } ClearPagePinned(page); } } @@ -1702,7 +1582,6 @@ static void xen_release_pmd(unsigned long pfn) xen_release_ptpage(pfn, PT_PMD); } -#ifdef CONFIG_X86_64 static void xen_alloc_pud(struct mm_struct *mm, unsigned long pfn) { xen_alloc_ptpage(mm, pfn, PT_PUD); @@ -1712,20 +1591,6 @@ static void xen_release_pud(unsigned long pfn) { xen_release_ptpage(pfn, PT_PUD); } -#endif - -void __init xen_reserve_top(void) -{ -#ifdef CONFIG_X86_32 - unsigned long top = HYPERVISOR_VIRT_START; - struct xen_platform_parameters pp; - - if (HYPERVISOR_xen_version(XENVER_platform_parameters, &pp) == 0) - top = pp.virt_start; - - reserve_top_address(-top); -#endif /* CONFIG_X86_32 */ -} /* * Like __va(), but returns address in the kernel mapping (which is @@ -1733,11 +1598,7 @@ void __init xen_reserve_top(void) */ static void * __init __ka(phys_addr_t paddr) { -#ifdef CONFIG_X86_64 return (void *)(paddr + __START_KERNEL_map); -#else - return __va(paddr); -#endif } /* Convert a machine address to physical address */ @@ -1771,56 +1632,7 @@ static void __init set_page_prot(void *addr, pgprot_t prot) { return set_page_prot_flags(addr, prot, UVMF_NONE); } -#ifdef CONFIG_X86_32 -static void __init xen_map_identity_early(pmd_t *pmd, unsigned long max_pfn) -{ - unsigned pmdidx, pteidx; - unsigned ident_pte; - unsigned long pfn; - - level1_ident_pgt = extend_brk(sizeof(pte_t) * LEVEL1_IDENT_ENTRIES, - PAGE_SIZE); - - ident_pte = 0; - pfn = 0; - for (pmdidx = 0; pmdidx < PTRS_PER_PMD && pfn < max_pfn; pmdidx++) { - pte_t *pte_page; - - /* Reuse or allocate a page of ptes */ - if (pmd_present(pmd[pmdidx])) - pte_page = m2v(pmd[pmdidx].pmd); - else { - /* Check for free pte pages */ - if (ident_pte == LEVEL1_IDENT_ENTRIES) - break; - - pte_page = &level1_ident_pgt[ident_pte]; - ident_pte += PTRS_PER_PTE; - - pmd[pmdidx] = __pmd(__pa(pte_page) | _PAGE_TABLE); - } - - /* Install mappings */ - for (pteidx = 0; pteidx < PTRS_PER_PTE; pteidx++, pfn++) { - pte_t pte; - if (pfn > max_pfn_mapped) - max_pfn_mapped = pfn; - - if (!pte_none(pte_page[pteidx])) - continue; - - pte = pfn_pte(pfn, PAGE_KERNEL_EXEC); - pte_page[pteidx] = pte; - } - } - - for (pteidx = 0; pteidx < ident_pte; pteidx += PTRS_PER_PTE) - set_page_prot(&level1_ident_pgt[pteidx], PAGE_KERNEL_RO); - - set_page_prot(pmd, PAGE_KERNEL_RO); -} -#endif void __init xen_setup_machphys_mapping(void) { struct xen_machphys_mapping mapping; @@ -1831,13 +1643,8 @@ void __init xen_setup_machphys_mapping(void) } else { machine_to_phys_nr = MACH2PHYS_NR_ENTRIES; } -#ifdef CONFIG_X86_32 - WARN_ON((machine_to_phys_mapping + (machine_to_phys_nr - 1)) - < machine_to_phys_mapping); -#endif } -#ifdef CONFIG_X86_64 static void __init convert_pfn_mfn(void *v) { pte_t *pte = v; @@ -2168,105 +1975,6 @@ void __init xen_relocate_p2m(void) xen_start_info->nr_p2m_frames = n_frames; } -#else /* !CONFIG_X86_64 */ -static RESERVE_BRK_ARRAY(pmd_t, initial_kernel_pmd, PTRS_PER_PMD); -static RESERVE_BRK_ARRAY(pmd_t, swapper_kernel_pmd, PTRS_PER_PMD); -RESERVE_BRK(fixup_kernel_pmd, PAGE_SIZE); -RESERVE_BRK(fixup_kernel_pte, PAGE_SIZE); - -static void __init xen_write_cr3_init(unsigned long cr3) -{ - unsigned long pfn = PFN_DOWN(__pa(swapper_pg_dir)); - - BUG_ON(read_cr3_pa() != __pa(initial_page_table)); - BUG_ON(cr3 != __pa(swapper_pg_dir)); - - /* - * We are switching to swapper_pg_dir for the first time (from - * initial_page_table) and therefore need to mark that page - * read-only and then pin it. - * - * Xen disallows sharing of kernel PMDs for PAE - * guests. Therefore we must copy the kernel PMD from - * initial_page_table into a new kernel PMD to be used in - * swapper_pg_dir. - */ - swapper_kernel_pmd = - extend_brk(sizeof(pmd_t) * PTRS_PER_PMD, PAGE_SIZE); - copy_page(swapper_kernel_pmd, initial_kernel_pmd); - swapper_pg_dir[KERNEL_PGD_BOUNDARY] = - __pgd(__pa(swapper_kernel_pmd) | _PAGE_PRESENT); - set_page_prot(swapper_kernel_pmd, PAGE_KERNEL_RO); - - set_page_prot(swapper_pg_dir, PAGE_KERNEL_RO); - xen_write_cr3(cr3); - pin_pagetable_pfn(MMUEXT_PIN_L3_TABLE, pfn); - - pin_pagetable_pfn(MMUEXT_UNPIN_TABLE, - PFN_DOWN(__pa(initial_page_table))); - set_page_prot(initial_page_table, PAGE_KERNEL); - set_page_prot(initial_kernel_pmd, PAGE_KERNEL); - - pv_ops.mmu.write_cr3 = &xen_write_cr3; -} - -/* - * For 32 bit domains xen_start_info->pt_base is the pgd address which might be - * not the first page table in the page table pool. - * Iterate through the initial page tables to find the real page table base. - */ -static phys_addr_t __init xen_find_pt_base(pmd_t *pmd) -{ - phys_addr_t pt_base, paddr; - unsigned pmdidx; - - pt_base = min(__pa(xen_start_info->pt_base), __pa(pmd)); - - for (pmdidx = 0; pmdidx < PTRS_PER_PMD; pmdidx++) - if (pmd_present(pmd[pmdidx]) && !pmd_large(pmd[pmdidx])) { - paddr = m2p(pmd[pmdidx].pmd); - pt_base = min(pt_base, paddr); - } - - return pt_base; -} - -void __init xen_setup_kernel_pagetable(pgd_t *pgd, unsigned long max_pfn) -{ - pmd_t *kernel_pmd; - - kernel_pmd = m2v(pgd[KERNEL_PGD_BOUNDARY].pgd); - - xen_pt_base = xen_find_pt_base(kernel_pmd); - xen_pt_size = xen_start_info->nr_pt_frames * PAGE_SIZE; - - initial_kernel_pmd = - extend_brk(sizeof(pmd_t) * PTRS_PER_PMD, PAGE_SIZE); - - max_pfn_mapped = PFN_DOWN(xen_pt_base + xen_pt_size + 512 * 1024); - - copy_page(initial_kernel_pmd, kernel_pmd); - - xen_map_identity_early(initial_kernel_pmd, max_pfn); - - copy_page(initial_page_table, pgd); - initial_page_table[KERNEL_PGD_BOUNDARY] = - __pgd(__pa(initial_kernel_pmd) | _PAGE_PRESENT); - - set_page_prot(initial_kernel_pmd, PAGE_KERNEL_RO); - set_page_prot(initial_page_table, PAGE_KERNEL_RO); - set_page_prot(empty_zero_page, PAGE_KERNEL_RO); - - pin_pagetable_pfn(MMUEXT_UNPIN_TABLE, PFN_DOWN(__pa(pgd))); - - pin_pagetable_pfn(MMUEXT_PIN_L3_TABLE, - PFN_DOWN(__pa(initial_page_table))); - xen_write_cr3(__pa(initial_page_table)); - - memblock_reserve(xen_pt_base, xen_pt_size); -} -#endif /* CONFIG_X86_64 */ - void __init xen_reserve_special_pages(void) { phys_addr_t paddr; @@ -2300,12 +2008,7 @@ static void xen_set_fixmap(unsigned idx, phys_addr_t phys, pgprot_t prot) switch (idx) { case FIX_BTMAP_END ... FIX_BTMAP_BEGIN: -#ifdef CONFIG_X86_32 - case FIX_WP_TEST: -# ifdef CONFIG_HIGHMEM - case FIX_KMAP_BEGIN ... FIX_KMAP_END: -# endif -#elif defined(CONFIG_X86_VSYSCALL_EMULATION) +#ifdef CONFIG_X86_VSYSCALL_EMULATION case VSYSCALL_PAGE: #endif /* All local page mappings */ @@ -2357,9 +2060,7 @@ static void __init xen_post_allocator_init(void) pv_ops.mmu.set_pte = xen_set_pte; pv_ops.mmu.set_pmd = xen_set_pmd; pv_ops.mmu.set_pud = xen_set_pud; -#ifdef CONFIG_X86_64 pv_ops.mmu.set_p4d = xen_set_p4d; -#endif /* This will work as long as patching hasn't happened yet (which it hasn't) */ @@ -2367,15 +2068,11 @@ static void __init xen_post_allocator_init(void) pv_ops.mmu.alloc_pmd = xen_alloc_pmd; pv_ops.mmu.release_pte = xen_release_pte; pv_ops.mmu.release_pmd = xen_release_pmd; -#ifdef CONFIG_X86_64 pv_ops.mmu.alloc_pud = xen_alloc_pud; pv_ops.mmu.release_pud = xen_release_pud; -#endif pv_ops.mmu.make_pte = PV_CALLEE_SAVE(xen_make_pte); -#ifdef CONFIG_X86_64 pv_ops.mmu.write_cr3 = &xen_write_cr3; -#endif } static void xen_leave_lazy_mmu(void) @@ -2420,17 +2117,11 @@ static const struct pv_mmu_ops xen_mmu_ops __initconst = { .make_pte = PV_CALLEE_SAVE(xen_make_pte_init), .make_pgd = PV_CALLEE_SAVE(xen_make_pgd), -#ifdef CONFIG_X86_PAE - .set_pte_atomic = xen_set_pte_atomic, - .pte_clear = xen_pte_clear, - .pmd_clear = xen_pmd_clear, -#endif /* CONFIG_X86_PAE */ .set_pud = xen_set_pud_hyper, .make_pmd = PV_CALLEE_SAVE(xen_make_pmd), .pmd_val = PV_CALLEE_SAVE(xen_pmd_val), -#ifdef CONFIG_X86_64 .pud_val = PV_CALLEE_SAVE(xen_pud_val), .make_pud = PV_CALLEE_SAVE(xen_make_pud), .set_p4d = xen_set_p4d_hyper, @@ -2442,7 +2133,6 @@ static const struct pv_mmu_ops xen_mmu_ops __initconst = { .p4d_val = PV_CALLEE_SAVE(xen_p4d_val), .make_p4d = PV_CALLEE_SAVE(xen_make_p4d), #endif -#endif /* CONFIG_X86_64 */ .activate_mm = xen_activate_mm, .dup_mmap = xen_dup_mmap, diff --git a/arch/x86/xen/p2m.c b/arch/x86/xen/p2m.c index 0acba2c712ab..be4151f42611 100644 --- a/arch/x86/xen/p2m.c +++ b/arch/x86/xen/p2m.c @@ -379,12 +379,8 @@ static void __init xen_rebuild_p2m_list(unsigned long *p2m) if (type == P2M_TYPE_PFN || i < chunk) { /* Use initial p2m page contents. */ -#ifdef CONFIG_X86_64 mfns = alloc_p2m_page(); copy_page(mfns, xen_p2m_addr + pfn); -#else - mfns = xen_p2m_addr + pfn; -#endif ptep = populate_extra_pte((unsigned long)(p2m + pfn)); set_pte(ptep, pfn_pte(PFN_DOWN(__pa(mfns)), PAGE_KERNEL)); @@ -467,7 +463,7 @@ EXPORT_SYMBOL_GPL(get_phys_to_machine); * Allocate new pmd(s). It is checked whether the old pmd is still in place. * If not, nothing is changed. This is okay as the only reason for allocating * a new pmd is to replace p2m_missing_pte or p2m_identity_pte by a individual - * pmd. In case of PAE/x86-32 there are multiple pmds to allocate! + * pmd. */ static pte_t *alloc_p2m_pmd(unsigned long addr, pte_t *pte_pg) { diff --git a/arch/x86/xen/setup.c b/arch/x86/xen/setup.c index 3566e37241d7..7eab14d56369 100644 --- a/arch/x86/xen/setup.c +++ b/arch/x86/xen/setup.c @@ -32,7 +32,6 @@ #include <xen/features.h> #include <xen/hvc-console.h> #include "xen-ops.h" -#include "vdso.h" #include "mmu.h" #define GB(x) ((uint64_t)(x) * 1024 * 1024 * 1024) @@ -545,13 +544,10 @@ static unsigned long __init xen_get_pages_limit(void) { unsigned long limit; -#ifdef CONFIG_X86_32 - limit = GB(64) / PAGE_SIZE; -#else limit = MAXMEM / PAGE_SIZE; if (!xen_initial_domain() && xen_512gb_limit) limit = GB(512) / PAGE_SIZE; -#endif + return limit; } @@ -722,17 +718,8 @@ static void __init xen_reserve_xen_mfnlist(void) if (!xen_is_e820_reserved(start, size)) return; -#ifdef CONFIG_X86_32 - /* - * Relocating the p2m on 32 bit system to an arbitrary virtual address - * is not supported, so just give up. - */ - xen_raw_console_write("Xen hypervisor allocated p2m list conflicts with E820 map\n"); - BUG(); -#else xen_relocate_p2m(); memblock_free(start, size); -#endif } /** @@ -921,20 +908,6 @@ char * __init xen_memory_setup(void) return "Xen"; } -/* - * Set the bit indicating "nosegneg" library variants should be used. - * We only need to bother in pure 32-bit mode; compat 32-bit processes - * can have un-truncated segments, so wrapping around is allowed. - */ -static void __init fiddle_vdso(void) -{ -#ifdef CONFIG_X86_32 - u32 *mask = vdso_image_32.data + - vdso_image_32.sym_VDSO32_NOTE_MASK; - *mask |= 1 << VDSO_NOTE_NONEGSEG_BIT; -#endif -} - static int register_callback(unsigned type, const void *func) { struct callback_register callback = { @@ -951,11 +924,7 @@ void xen_enable_sysenter(void) int ret; unsigned sysenter_feature; -#ifdef CONFIG_X86_32 - sysenter_feature = X86_FEATURE_SEP; -#else sysenter_feature = X86_FEATURE_SYSENTER32; -#endif if (!boot_cpu_has(sysenter_feature)) return; @@ -967,7 +936,6 @@ void xen_enable_sysenter(void) void xen_enable_syscall(void) { -#ifdef CONFIG_X86_64 int ret; ret = register_callback(CALLBACKTYPE_syscall, xen_syscall_target); @@ -983,7 +951,6 @@ void xen_enable_syscall(void) if (ret != 0) setup_clear_cpu_cap(X86_FEATURE_SYSCALL32); } -#endif /* CONFIG_X86_64 */ } static void __init xen_pvmmu_arch_setup(void) @@ -1024,7 +991,6 @@ void __init xen_arch_setup(void) disable_cpuidle(); disable_cpufreq(); WARN_ON(xen_set_default_idle()); - fiddle_vdso(); #ifdef CONFIG_NUMA numa_off = 1; #endif diff --git a/arch/x86/xen/smp_pv.c b/arch/x86/xen/smp_pv.c index 47c8f4b444c9..c2ac319f11a4 100644 --- a/arch/x86/xen/smp_pv.c +++ b/arch/x86/xen/smp_pv.c @@ -211,15 +211,6 @@ static void __init xen_pv_smp_prepare_boot_cpu(void) * sure the old memory can be recycled. */ make_lowmem_page_readwrite(xen_initial_gdt); -#ifdef CONFIG_X86_32 - /* - * Xen starts us with XEN_FLAT_RING1_DS, but linux code - * expects __USER_DS - */ - loadsegment(ds, __USER_DS); - loadsegment(es, __USER_DS); -#endif - xen_filter_cpu_maps(); xen_setup_vcpu_info_placement(); @@ -300,10 +291,6 @@ cpu_initialize_context(unsigned int cpu, struct task_struct *idle) gdt = get_cpu_gdt_rw(cpu); -#ifdef CONFIG_X86_32 - ctxt->user_regs.fs = __KERNEL_PERCPU; - ctxt->user_regs.gs = __KERNEL_STACK_CANARY; -#endif memset(&ctxt->fpu_ctxt, 0, sizeof(ctxt->fpu_ctxt)); /* @@ -341,12 +328,7 @@ cpu_initialize_context(unsigned int cpu, struct task_struct *idle) ctxt->kernel_ss = __KERNEL_DS; ctxt->kernel_sp = task_top_of_stack(idle); -#ifdef CONFIG_X86_32 - ctxt->event_callback_cs = __KERNEL_CS; - ctxt->failsafe_callback_cs = __KERNEL_CS; -#else ctxt->gs_base_kernel = per_cpu_offset(cpu); -#endif ctxt->event_callback_eip = (unsigned long)xen_asm_exc_xen_hypervisor_callback; ctxt->failsafe_callback_eip = diff --git a/arch/x86/xen/vdso.h b/arch/x86/xen/vdso.h deleted file mode 100644 index 873c54c488fe..000000000000 --- a/arch/x86/xen/vdso.h +++ /dev/null @@ -1,6 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ - -/* Bit used for the pseudo-hwcap for non-negative segments. We use - bit 1 to avoid bugs in some versions of glibc when bit 0 is - used; the choice is otherwise arbitrary. */ -#define VDSO_NOTE_NONEGSEG_BIT 1 diff --git a/arch/x86/xen/xen-asm.S b/arch/x86/xen/xen-asm.S index 508fe204520b..1cb0e84b9161 100644 --- a/arch/x86/xen/xen-asm.S +++ b/arch/x86/xen/xen-asm.S @@ -6,12 +6,18 @@ * operations here; the indirect forms are better handled in C. */ +#include <asm/errno.h> #include <asm/asm-offsets.h> #include <asm/percpu.h> #include <asm/processor-flags.h> -#include <asm/frame.h> +#include <asm/segment.h> +#include <asm/thread_info.h> #include <asm/asm.h> +#include <asm/frame.h> +#include <xen/interface/xen.h> + +#include <linux/init.h> #include <linux/linkage.h> /* @@ -76,11 +82,7 @@ SYM_FUNC_END(xen_save_fl_direct) */ SYM_FUNC_START(xen_restore_fl_direct) FRAME_BEGIN -#ifdef CONFIG_X86_64 testw $X86_EFLAGS_IF, %di -#else - testb $X86_EFLAGS_IF>>8, %ah -#endif setz PER_CPU_VAR(xen_vcpu_info) + XEN_vcpu_info_mask /* * Preempt here doesn't matter because that will deal with any @@ -104,15 +106,6 @@ SYM_FUNC_END(xen_restore_fl_direct) */ SYM_FUNC_START(check_events) FRAME_BEGIN -#ifdef CONFIG_X86_32 - push %eax - push %ecx - push %edx - call xen_force_evtchn_callback - pop %edx - pop %ecx - pop %eax -#else push %rax push %rcx push %rdx @@ -132,7 +125,6 @@ SYM_FUNC_START(check_events) pop %rdx pop %rcx pop %rax -#endif FRAME_END ret SYM_FUNC_END(check_events) @@ -151,3 +143,175 @@ SYM_FUNC_START(xen_read_cr2_direct) FRAME_END ret SYM_FUNC_END(xen_read_cr2_direct); + +.macro xen_pv_trap name +SYM_CODE_START(xen_\name) + pop %rcx + pop %r11 + jmp \name +SYM_CODE_END(xen_\name) +_ASM_NOKPROBE(xen_\name) +.endm + +xen_pv_trap asm_exc_divide_error +xen_pv_trap asm_xenpv_exc_debug +xen_pv_trap asm_exc_int3 +xen_pv_trap asm_xenpv_exc_nmi +xen_pv_trap asm_exc_overflow +xen_pv_trap asm_exc_bounds +xen_pv_trap asm_exc_invalid_op +xen_pv_trap asm_exc_device_not_available +xen_pv_trap asm_exc_double_fault +xen_pv_trap asm_exc_coproc_segment_overrun +xen_pv_trap asm_exc_invalid_tss +xen_pv_trap asm_exc_segment_not_present +xen_pv_trap asm_exc_stack_segment +xen_pv_trap asm_exc_general_protection +xen_pv_trap asm_exc_page_fault +xen_pv_trap asm_exc_spurious_interrupt_bug +xen_pv_trap asm_exc_coprocessor_error +xen_pv_trap asm_exc_alignment_check +#ifdef CONFIG_X86_MCE +xen_pv_trap asm_exc_machine_check +#endif /* CONFIG_X86_MCE */ +xen_pv_trap asm_exc_simd_coprocessor_error +#ifdef CONFIG_IA32_EMULATION +xen_pv_trap entry_INT80_compat +#endif +xen_pv_trap asm_exc_xen_hypervisor_callback + + __INIT +SYM_CODE_START(xen_early_idt_handler_array) + i = 0 + .rept NUM_EXCEPTION_VECTORS + pop %rcx + pop %r11 + jmp early_idt_handler_array + i*EARLY_IDT_HANDLER_SIZE + i = i + 1 + .fill xen_early_idt_handler_array + i*XEN_EARLY_IDT_HANDLER_SIZE - ., 1, 0xcc + .endr +SYM_CODE_END(xen_early_idt_handler_array) + __FINIT + +hypercall_iret = hypercall_page + __HYPERVISOR_iret * 32 +/* + * Xen64 iret frame: + * + * ss + * rsp + * rflags + * cs + * rip <-- standard iret frame + * + * flags + * + * rcx } + * r11 }<-- pushed by hypercall page + * rsp->rax } + */ +SYM_CODE_START(xen_iret) + pushq $0 + jmp hypercall_iret +SYM_CODE_END(xen_iret) + +SYM_CODE_START(xen_sysret64) + /* + * We're already on the usermode stack at this point, but + * still with the kernel gs, so we can easily switch back. + * + * tss.sp2 is scratch space. + */ + movq %rsp, PER_CPU_VAR(cpu_tss_rw + TSS_sp2) + movq PER_CPU_VAR(cpu_current_top_of_stack), %rsp + + pushq $__USER_DS + pushq PER_CPU_VAR(cpu_tss_rw + TSS_sp2) + pushq %r11 + pushq $__USER_CS + pushq %rcx + + pushq $VGCF_in_syscall + jmp hypercall_iret +SYM_CODE_END(xen_sysret64) + +/* + * Xen handles syscall callbacks much like ordinary exceptions, which + * means we have: + * - kernel gs + * - kernel rsp + * - an iret-like stack frame on the stack (including rcx and r11): + * ss + * rsp + * rflags + * cs + * rip + * r11 + * rsp->rcx + */ + +/* Normal 64-bit system call target */ +SYM_FUNC_START(xen_syscall_target) + popq %rcx + popq %r11 + + /* + * Neither Xen nor the kernel really knows what the old SS and + * CS were. The kernel expects __USER_DS and __USER_CS, so + * report those values even though Xen will guess its own values. + */ + movq $__USER_DS, 4*8(%rsp) + movq $__USER_CS, 1*8(%rsp) + + jmp entry_SYSCALL_64_after_hwframe +SYM_FUNC_END(xen_syscall_target) + +#ifdef CONFIG_IA32_EMULATION + +/* 32-bit compat syscall target */ +SYM_FUNC_START(xen_syscall32_target) + popq %rcx + popq %r11 + + /* + * Neither Xen nor the kernel really knows what the old SS and + * CS were. The kernel expects __USER32_DS and __USER32_CS, so + * report those values even though Xen will guess its own values. + */ + movq $__USER32_DS, 4*8(%rsp) + movq $__USER32_CS, 1*8(%rsp) + + jmp entry_SYSCALL_compat_after_hwframe +SYM_FUNC_END(xen_syscall32_target) + +/* 32-bit compat sysenter target */ +SYM_FUNC_START(xen_sysenter_target) + /* + * NB: Xen is polite and clears TF from EFLAGS for us. This means + * that we don't need to guard against single step exceptions here. + */ + popq %rcx + popq %r11 + + /* + * Neither Xen nor the kernel really knows what the old SS and + * CS were. The kernel expects __USER32_DS and __USER32_CS, so + * report those values even though Xen will guess its own values. + */ + movq $__USER32_DS, 4*8(%rsp) + movq $__USER32_CS, 1*8(%rsp) + + jmp entry_SYSENTER_compat_after_hwframe +SYM_FUNC_END(xen_sysenter_target) + +#else /* !CONFIG_IA32_EMULATION */ + +SYM_FUNC_START_ALIAS(xen_syscall32_target) +SYM_FUNC_START(xen_sysenter_target) + lea 16(%rsp), %rsp /* strip %rcx, %r11 */ + mov $-ENOSYS, %rax + pushq $0 + jmp hypercall_iret +SYM_FUNC_END(xen_sysenter_target) +SYM_FUNC_END_ALIAS(xen_syscall32_target) + +#endif /* CONFIG_IA32_EMULATION */ diff --git a/arch/x86/xen/xen-asm_32.S b/arch/x86/xen/xen-asm_32.S deleted file mode 100644 index 4757cec33abe..000000000000 --- a/arch/x86/xen/xen-asm_32.S +++ /dev/null @@ -1,185 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -/* - * Asm versions of Xen pv-ops, suitable for direct use. - * - * We only bother with direct forms (ie, vcpu in pda) of the - * operations here; the indirect forms are better handled in C. - */ - -#include <asm/thread_info.h> -#include <asm/processor-flags.h> -#include <asm/segment.h> -#include <asm/asm.h> - -#include <xen/interface/xen.h> - -#include <linux/linkage.h> - -/* Pseudo-flag used for virtual NMI, which we don't implement yet */ -#define XEN_EFLAGS_NMI 0x80000000 - -/* - * This is run where a normal iret would be run, with the same stack setup: - * 8: eflags - * 4: cs - * esp-> 0: eip - * - * This attempts to make sure that any pending events are dealt with - * on return to usermode, but there is a small window in which an - * event can happen just before entering usermode. If the nested - * interrupt ends up setting one of the TIF_WORK_MASK pending work - * flags, they will not be tested again before returning to - * usermode. This means that a process can end up with pending work, - * which will be unprocessed until the process enters and leaves the - * kernel again, which could be an unbounded amount of time. This - * means that a pending signal or reschedule event could be - * indefinitely delayed. - * - * The fix is to notice a nested interrupt in the critical window, and - * if one occurs, then fold the nested interrupt into the current - * interrupt stack frame, and re-process it iteratively rather than - * recursively. This means that it will exit via the normal path, and - * all pending work will be dealt with appropriately. - * - * Because the nested interrupt handler needs to deal with the current - * stack state in whatever form its in, we keep things simple by only - * using a single register which is pushed/popped on the stack. - */ - -.macro POP_FS -1: - popw %fs -.pushsection .fixup, "ax" -2: movw $0, (%esp) - jmp 1b -.popsection - _ASM_EXTABLE(1b,2b) -.endm - -SYM_CODE_START(xen_iret) - /* test eflags for special cases */ - testl $(X86_EFLAGS_VM | XEN_EFLAGS_NMI), 8(%esp) - jnz hyper_iret - - push %eax - ESP_OFFSET=4 # bytes pushed onto stack - - /* Store vcpu_info pointer for easy access */ -#ifdef CONFIG_SMP - pushw %fs - movl $(__KERNEL_PERCPU), %eax - movl %eax, %fs - movl %fs:xen_vcpu, %eax - POP_FS -#else - movl %ss:xen_vcpu, %eax -#endif - - /* check IF state we're restoring */ - testb $X86_EFLAGS_IF>>8, 8+1+ESP_OFFSET(%esp) - - /* - * Maybe enable events. Once this happens we could get a - * recursive event, so the critical region starts immediately - * afterwards. However, if that happens we don't end up - * resuming the code, so we don't have to be worried about - * being preempted to another CPU. - */ - setz %ss:XEN_vcpu_info_mask(%eax) -xen_iret_start_crit: - - /* check for unmasked and pending */ - cmpw $0x0001, %ss:XEN_vcpu_info_pending(%eax) - - /* - * If there's something pending, mask events again so we can - * jump back into exc_xen_hypervisor_callback. Otherwise do not - * touch XEN_vcpu_info_mask. - */ - jne 1f - movb $1, %ss:XEN_vcpu_info_mask(%eax) - -1: popl %eax - - /* - * From this point on the registers are restored and the stack - * updated, so we don't need to worry about it if we're - * preempted - */ -iret_restore_end: - - /* - * Jump to hypervisor_callback after fixing up the stack. - * Events are masked, so jumping out of the critical region is - * OK. - */ - je xen_asm_exc_xen_hypervisor_callback - -1: iret -xen_iret_end_crit: - _ASM_EXTABLE(1b, asm_iret_error) - -hyper_iret: - /* put this out of line since its very rarely used */ - jmp hypercall_page + __HYPERVISOR_iret * 32 -SYM_CODE_END(xen_iret) - - .globl xen_iret_start_crit, xen_iret_end_crit - -/* - * This is called by xen_asm_exc_xen_hypervisor_callback in entry_32.S when it sees - * that the EIP at the time of interrupt was between - * xen_iret_start_crit and xen_iret_end_crit. - * - * The stack format at this point is: - * ---------------- - * ss : (ss/esp may be present if we came from usermode) - * esp : - * eflags } outer exception info - * cs } - * eip } - * ---------------- - * eax : outer eax if it hasn't been restored - * ---------------- - * eflags } - * cs } nested exception info - * eip } - * return address : (into xen_asm_exc_xen_hypervisor_callback) - * - * In order to deliver the nested exception properly, we need to discard the - * nested exception frame such that when we handle the exception, we do it - * in the context of the outer exception rather than starting a new one. - * - * The only caveat is that if the outer eax hasn't been restored yet (i.e. - * it's still on stack), we need to restore its value here. -*/ -.pushsection .noinstr.text, "ax" -SYM_CODE_START(xen_iret_crit_fixup) - /* - * Paranoia: Make sure we're really coming from kernel space. - * One could imagine a case where userspace jumps into the - * critical range address, but just before the CPU delivers a - * PF, it decides to deliver an interrupt instead. Unlikely? - * Definitely. Easy to avoid? Yes. - */ - testb $2, 2*4(%esp) /* nested CS */ - jnz 2f - - /* - * If eip is before iret_restore_end then stack - * hasn't been restored yet. - */ - cmpl $iret_restore_end, 1*4(%esp) - jae 1f - - movl 4*4(%esp), %eax /* load outer EAX */ - ret $4*4 /* discard nested EIP, CS, and EFLAGS as - * well as the just restored EAX */ - -1: - ret $3*4 /* discard nested EIP, CS, and EFLAGS */ - -2: - ret -SYM_CODE_END(xen_iret_crit_fixup) -.popsection diff --git a/arch/x86/xen/xen-asm_64.S b/arch/x86/xen/xen-asm_64.S deleted file mode 100644 index aab1d99b2b48..000000000000 --- a/arch/x86/xen/xen-asm_64.S +++ /dev/null @@ -1,192 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -/* - * Asm versions of Xen pv-ops, suitable for direct use. - * - * We only bother with direct forms (ie, vcpu in pda) of the - * operations here; the indirect forms are better handled in C. - */ - -#include <asm/errno.h> -#include <asm/percpu.h> -#include <asm/processor-flags.h> -#include <asm/segment.h> -#include <asm/asm-offsets.h> -#include <asm/thread_info.h> -#include <asm/asm.h> - -#include <xen/interface/xen.h> - -#include <linux/init.h> -#include <linux/linkage.h> - -.macro xen_pv_trap name -SYM_CODE_START(xen_\name) - pop %rcx - pop %r11 - jmp \name -SYM_CODE_END(xen_\name) -_ASM_NOKPROBE(xen_\name) -.endm - -xen_pv_trap asm_exc_divide_error -xen_pv_trap asm_xenpv_exc_debug -xen_pv_trap asm_exc_int3 -xen_pv_trap asm_xenpv_exc_nmi -xen_pv_trap asm_exc_overflow -xen_pv_trap asm_exc_bounds -xen_pv_trap asm_exc_invalid_op -xen_pv_trap asm_exc_device_not_available -xen_pv_trap asm_exc_double_fault -xen_pv_trap asm_exc_coproc_segment_overrun -xen_pv_trap asm_exc_invalid_tss -xen_pv_trap asm_exc_segment_not_present -xen_pv_trap asm_exc_stack_segment -xen_pv_trap asm_exc_general_protection -xen_pv_trap asm_exc_page_fault -xen_pv_trap asm_exc_spurious_interrupt_bug -xen_pv_trap asm_exc_coprocessor_error -xen_pv_trap asm_exc_alignment_check -#ifdef CONFIG_X86_MCE -xen_pv_trap asm_exc_machine_check -#endif /* CONFIG_X86_MCE */ -xen_pv_trap asm_exc_simd_coprocessor_error -#ifdef CONFIG_IA32_EMULATION -xen_pv_trap entry_INT80_compat -#endif -xen_pv_trap asm_exc_xen_hypervisor_callback - - __INIT -SYM_CODE_START(xen_early_idt_handler_array) - i = 0 - .rept NUM_EXCEPTION_VECTORS - pop %rcx - pop %r11 - jmp early_idt_handler_array + i*EARLY_IDT_HANDLER_SIZE - i = i + 1 - .fill xen_early_idt_handler_array + i*XEN_EARLY_IDT_HANDLER_SIZE - ., 1, 0xcc - .endr -SYM_CODE_END(xen_early_idt_handler_array) - __FINIT - -hypercall_iret = hypercall_page + __HYPERVISOR_iret * 32 -/* - * Xen64 iret frame: - * - * ss - * rsp - * rflags - * cs - * rip <-- standard iret frame - * - * flags - * - * rcx } - * r11 }<-- pushed by hypercall page - * rsp->rax } - */ -SYM_CODE_START(xen_iret) - pushq $0 - jmp hypercall_iret -SYM_CODE_END(xen_iret) - -SYM_CODE_START(xen_sysret64) - /* - * We're already on the usermode stack at this point, but - * still with the kernel gs, so we can easily switch back. - * - * tss.sp2 is scratch space. - */ - movq %rsp, PER_CPU_VAR(cpu_tss_rw + TSS_sp2) - movq PER_CPU_VAR(cpu_current_top_of_stack), %rsp - - pushq $__USER_DS - pushq PER_CPU_VAR(cpu_tss_rw + TSS_sp2) - pushq %r11 - pushq $__USER_CS - pushq %rcx - - pushq $VGCF_in_syscall - jmp hypercall_iret -SYM_CODE_END(xen_sysret64) - -/* - * Xen handles syscall callbacks much like ordinary exceptions, which - * means we have: - * - kernel gs - * - kernel rsp - * - an iret-like stack frame on the stack (including rcx and r11): - * ss - * rsp - * rflags - * cs - * rip - * r11 - * rsp->rcx - */ - -/* Normal 64-bit system call target */ -SYM_FUNC_START(xen_syscall_target) - popq %rcx - popq %r11 - - /* - * Neither Xen nor the kernel really knows what the old SS and - * CS were. The kernel expects __USER_DS and __USER_CS, so - * report those values even though Xen will guess its own values. - */ - movq $__USER_DS, 4*8(%rsp) - movq $__USER_CS, 1*8(%rsp) - - jmp entry_SYSCALL_64_after_hwframe -SYM_FUNC_END(xen_syscall_target) - -#ifdef CONFIG_IA32_EMULATION - -/* 32-bit compat syscall target */ -SYM_FUNC_START(xen_syscall32_target) - popq %rcx - popq %r11 - - /* - * Neither Xen nor the kernel really knows what the old SS and - * CS were. The kernel expects __USER32_DS and __USER32_CS, so - * report those values even though Xen will guess its own values. - */ - movq $__USER32_DS, 4*8(%rsp) - movq $__USER32_CS, 1*8(%rsp) - - jmp entry_SYSCALL_compat_after_hwframe -SYM_FUNC_END(xen_syscall32_target) - -/* 32-bit compat sysenter target */ -SYM_FUNC_START(xen_sysenter_target) - /* - * NB: Xen is polite and clears TF from EFLAGS for us. This means - * that we don't need to guard against single step exceptions here. - */ - popq %rcx - popq %r11 - - /* - * Neither Xen nor the kernel really knows what the old SS and - * CS were. The kernel expects __USER32_DS and __USER32_CS, so - * report those values even though Xen will guess its own values. - */ - movq $__USER32_DS, 4*8(%rsp) - movq $__USER32_CS, 1*8(%rsp) - - jmp entry_SYSENTER_compat_after_hwframe -SYM_FUNC_END(xen_sysenter_target) - -#else /* !CONFIG_IA32_EMULATION */ - -SYM_FUNC_START_ALIAS(xen_syscall32_target) -SYM_FUNC_START(xen_sysenter_target) - lea 16(%rsp), %rsp /* strip %rcx, %r11 */ - mov $-ENOSYS, %rax - pushq $0 - jmp hypercall_iret -SYM_FUNC_END(xen_sysenter_target) -SYM_FUNC_END_ALIAS(xen_syscall32_target) - -#endif /* CONFIG_IA32_EMULATION */ diff --git a/arch/x86/xen/xen-head.S b/arch/x86/xen/xen-head.S index 1ba601df3a37..2d7c8f34f56c 100644 --- a/arch/x86/xen/xen-head.S +++ b/arch/x86/xen/xen-head.S @@ -35,13 +35,8 @@ SYM_CODE_START(startup_xen) rep __ASM_SIZE(stos) mov %_ASM_SI, xen_start_info -#ifdef CONFIG_X86_64 mov initial_stack(%rip), %rsp -#else - mov initial_stack, %esp -#endif -#ifdef CONFIG_X86_64 /* Set up %gs. * * The base of %gs always points to fixed_percpu_data. If the @@ -53,7 +48,6 @@ SYM_CODE_START(startup_xen) movq $INIT_PER_CPU_VAR(fixed_percpu_data),%rax cdq wrmsr -#endif call xen_start_kernel SYM_CODE_END(startup_xen) diff --git a/arch/x86/xen/xen-ops.h b/arch/x86/xen/xen-ops.h index 53b224fd6177..45d556f71858 100644 --- a/arch/x86/xen/xen-ops.h +++ b/arch/x86/xen/xen-ops.h @@ -33,7 +33,6 @@ void xen_setup_mfn_list_list(void); void xen_build_mfn_list_list(void); void xen_setup_machphys_mapping(void); void xen_setup_kernel_pagetable(pgd_t *pgd, unsigned long max_pfn); -void xen_reserve_top(void); void __init xen_reserve_special_pages(void); void __init xen_pt_check_e820(void); diff --git a/arch/xtensa/include/asm/uaccess.h b/arch/xtensa/include/asm/uaccess.h index e57f0d0a88d8..b9758119feca 100644 --- a/arch/xtensa/include/asm/uaccess.h +++ b/arch/xtensa/include/asm/uaccess.h @@ -35,7 +35,7 @@ #define get_fs() (current->thread.current_ds) #define set_fs(val) (current->thread.current_ds = (val)) -#define segment_eq(a, b) ((a).seg == (b).seg) +#define uaccess_kernel() (get_fs().seg == KERNEL_DS.seg) #define __kernel_ok (uaccess_kernel()) #define __user_ok(addr, size) \ diff --git a/arch/xtensa/kernel/syscalls/syscall.tbl b/arch/xtensa/kernel/syscalls/syscall.tbl index d216ccba42f7..6276e3c2d3fc 100644 --- a/arch/xtensa/kernel/syscalls/syscall.tbl +++ b/arch/xtensa/kernel/syscalls/syscall.tbl @@ -222,7 +222,7 @@ 204 common quotactl sys_quotactl # 205 was old nfsservctl 205 common nfsservctl sys_ni_syscall -206 common _sysctl sys_sysctl +206 common _sysctl sys_ni_syscall 207 common bdflush sys_bdflush 208 common uname sys_newuname 209 common sysinfo sys_sysinfo diff --git a/arch/xtensa/mm/fault.c b/arch/xtensa/mm/fault.c index c128dcc7c85b..7666408ce12a 100644 --- a/arch/xtensa/mm/fault.c +++ b/arch/xtensa/mm/fault.c @@ -72,6 +72,9 @@ void do_page_fault(struct pt_regs *regs) if (user_mode(regs)) flags |= FAULT_FLAG_USER; + + perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS, 1, regs, address); + retry: mmap_read_lock(mm); vma = find_vma(mm, address); @@ -107,7 +110,7 @@ good_area: * make sure we exit gracefully rather than endlessly redo * the fault. */ - fault = handle_mm_fault(vma, address, flags); + fault = handle_mm_fault(vma, address, flags, regs); if (fault_signal_pending(fault, regs)) return; @@ -122,10 +125,6 @@ good_area: BUG(); } if (flags & FAULT_FLAG_ALLOW_RETRY) { - if (fault & VM_FAULT_MAJOR) - current->maj_flt++; - else - current->min_flt++; if (fault & VM_FAULT_RETRY) { flags |= FAULT_FLAG_TRIED; @@ -139,12 +138,6 @@ good_area: } mmap_read_unlock(mm); - perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS, 1, regs, address); - if (flags & VM_FAULT_MAJOR) - perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS_MAJ, 1, regs, address); - else - perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS_MIN, 1, regs, address); - return; /* Something tried to access memory that isn't in our memory map.. |